233 files changed, 361283 insertions, 0 deletions
diff --git a/src/jit/.clang-format b/src/jit/.clang-format
new file mode 100644
index 0000000000..1e3930f737
--- /dev/null
+++ b/src/jit/.clang-format
@@ -0,0 +1,80 @@
+---
+Language:     Cpp
+AccessModifierOffset: -4
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: true
+AlignConsecutiveDeclarations: true
+AlignEscapedNewlinesLeft: false
+AlignOperands:   true
+AlignTrailingComments: true
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: Empty
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: true
+BinPackArguments: true
+BinPackParameters: false
+BraceWrapping:
+  AfterClass:      true
+  AfterControlStatement: true
+  AfterEnum:       false
+  AfterFunction:   true
+  AfterNamespace:  false
+  AfterObjCDeclaration: false
+  AfterStruct:     true
+  AfterUnion:      true
+  BeforeCatch:     true
+  BeforeElse:      true
+  IndentBraces:    false
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Allman
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: true
+ColumnLimit:   120
+CommentPragmas:  '^ IWYU pragma:'
+ConstructorInitializerAllOnOneLineOrOnePerLine: true
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: false
+DisableFormat:   false
+ExperimentalAutoDetectBinPacking: false
+ForEachMacros:   [  ]
+IndentCaseLabels: true
+IndentWidth:     4
+IndentWrappedFunctionNames: false
+KeepEmptyLinesAtTheStartOfBlocks: true
+MacroBlockBegin: ''
+MacroBlockEnd:   ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBlockIndentWidth: 2
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: true
+PenaltyBreakBeforeFirstCallParameter: 400
+PenaltyBreakComment: 50
+PenaltyBreakFirstLessLess: 500
+PenaltyBreakString: 1000
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 100000
+PointerAlignment: Left
+ReflowComments:  true
+SortIncludes:    false
+SpaceAfterCStyleCast: false
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 1
+SpacesInAngles:  false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard:        Cpp11
+TabWidth:        4
+UseTab:          Never
+...
diff --git a/src/jit/.gitmirror b/src/jit/.gitmirror
new file mode 100644
index 0000000000..f507630f94
--- /dev/null
+++ b/src/jit/.gitmirror
@@ -0,0 +1 @@
+Only contents of this folder, excluding subfolders, will be mirrored by the Git-TFS Mirror. 
+\ No newline at end of file
diff --git a/src/jit/CMakeLists.txt b/src/jit/CMakeLists.txt
new file mode 100644
index 0000000000..6372e37852
--- /dev/null
+++ b/src/jit/CMakeLists.txt
@@ -0,0 +1,212 @@
+set(CMAKE_INCLUDE_CURRENT_DIR ON)
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+
+include_directories("./jitstd")
+include_directories("../inc")
+
+# Enable the following for UNIX altjit on Windows
+# add_definitions(-DALT_JIT)
+
+if (CLR_CMAKE_TARGET_ARCH_AMD64)
+  add_definitions(-DFEATURE_SIMD) 
+  add_definitions(-DFEATURE_AVX_SUPPORT) 
+endif ()
+
+
+if(WIN32)
+  set(JIT_RESOURCES Native.rc)
+endif(WIN32)
+
+set( JIT_SOURCES
+  alloc.cpp
+  assertionprop.cpp
+  bitset.cpp
+  block.cpp
+  codegencommon.cpp
+  compiler.cpp
+  copyprop.cpp
+  disasm.cpp
+  earlyprop.cpp
+  ee_il_dll.cpp
+  eeinterface.cpp
+  emit.cpp
+  error.cpp
+  flowgraph.cpp
+  gcdecode.cpp
+  gcencode.cpp
+  gcinfo.cpp
+  gentree.cpp
+  gschecks.cpp
+  hashbv.cpp
+  hostallocator.cpp
+  importer.cpp
+  inline.cpp
+  inlinepolicy.cpp
+  instr.cpp
+  jitconfig.cpp
+  jiteh.cpp
+  jittelemetry.cpp
+  lclvars.cpp
+  lir.cpp
+  liveness.cpp
+  loopcloning.cpp
+  lower.cpp
+  lsra.cpp
+  morph.cpp
+  objectalloc.cpp
+  optcse.cpp
+  optimizer.cpp
+  rangecheck.cpp
+  rationalize.cpp
+  regalloc.cpp
+  register_arg_convention.cpp
+  regset.cpp
+  scopeinfo.cpp
+  sharedfloat.cpp
+  sideeffects.cpp
+  sm.cpp
+  smdata.cpp
+  smweights.cpp
+  ssabuilder.cpp
+  ssarenamestate.cpp
+  typeinfo.cpp
+  unwind.cpp
+  utils.cpp
+  valuenum.cpp
+)
+
+if(CLR_CMAKE_TARGET_ARCH_AMD64)
+  set( ARCH_SOURCES
+    codegenxarch.cpp
+    emitxarch.cpp
+    lowerxarch.cpp
+    simd.cpp
+    simdcodegenxarch.cpp
+    targetamd64.cpp
+    unwindamd64.cpp
+  )
+elseif(CLR_CMAKE_TARGET_ARCH_ARM)
+  set( ARCH_SOURCES
+    codegenarm.cpp
+    decomposelongs.cpp
+    emitarm.cpp
+    lowerarm.cpp
+    targetarm.cpp
+    unwindarm.cpp
+  )
+elseif(CLR_CMAKE_TARGET_ARCH_I386)
+  set( ARCH_SOURCES
+    codegenxarch.cpp
+    decomposelongs.cpp
+    emitxarch.cpp
+    lowerxarch.cpp
+    simd.cpp
+    simdcodegenxarch.cpp
+    targetx86.cpp
+  )
+elseif(CLR_CMAKE_TARGET_ARCH_ARM64)
+  set( ARCH_SOURCES
+    codegenarm64.cpp
+    emitarm64.cpp
+    lowerarm64.cpp
+    targetarm64.cpp
+    unwindarm.cpp
+    unwindarm64.cpp
+  )
+else()
+  clr_unknown_arch()
+endif()
+
+# The following defines all the source files used by the "legacy" back-end (#ifdef LEGACY_BACKEND).
+# It is always safe to include both legacy and non-legacy files in the build, as everything is properly
+# #ifdef'ed, though it makes the build slightly slower to do so. Note there is only a legacy backend for
+# x86 and ARM.
+
+if(CLR_CMAKE_TARGET_ARCH_AMD64)
+  set( ARCH_LEGACY_SOURCES
+  )
+elseif(CLR_CMAKE_TARGET_ARCH_ARM)
+  set( ARCH_LEGACY_SOURCES
+    codegenlegacy.cpp
+    registerfp.cpp
+  )
+elseif(CLR_CMAKE_TARGET_ARCH_I386)
+  set( ARCH_LEGACY_SOURCES
+    codegenlegacy.cpp
+    stackfp.cpp
+  )
+elseif(CLR_CMAKE_TARGET_ARCH_ARM64)
+  set( ARCH_LEGACY_SOURCES
+  )
+else()
+  clr_unknown_arch()
+endif()
+
+set( SOURCES
+  ${JIT_SOURCES}
+  ${ARCH_SOURCES}
+  ${ARCH_LEGACY_SOURCES}
+  ${JIT_RESOURCES}
+)
+
+convert_to_absolute_path(SOURCES ${SOURCES})
+
+if(WIN32)
+  add_precompiled_header(jitpch.h ../jitpch.cpp SOURCES)
+
+  # Create .def file containing a list of exports preceeded by
+  # 'EXPORTS'.  The file "ClrJit.exports" already contains the list, so we
+  # massage it into the correct format here to create "ClrJit.exports.def".
+  set(JIT_EXPORTS_FILE ${CMAKE_CURRENT_BINARY_DIR}/ClrJit.exports.def)
+  set(JIT_EXPORTS_FILE_TEMP ${JIT_EXPORTS_FILE}.txt)
+  file(READ "ClrJit.exports" exports_list)
+  file(WRITE ${JIT_EXPORTS_FILE_TEMP} "LIBRARY CLRJIT\n")
+  file(APPEND ${JIT_EXPORTS_FILE_TEMP} "EXPORTS\n")
+  file(APPEND ${JIT_EXPORTS_FILE_TEMP} ${exports_list})
+
+  # Copy the file only if it has changed.
+  execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different
+    ${JIT_EXPORTS_FILE_TEMP} ${JIT_EXPORTS_FILE})
+
+  set(SHARED_LIB_SOURCES ${SOURCES} ${JIT_EXPORTS_FILE})
+else()
+  set(JIT_EXPORTS_IN_FILE ${CMAKE_CURRENT_BINARY_DIR}/clrjit.exports.in)
+  file(READ "${CMAKE_CURRENT_LIST_DIR}/ClrJit.exports" jit_exports)
+  file(READ "${CMAKE_CURRENT_LIST_DIR}/ClrJit.PAL.exports" pal_exports)
+  file(WRITE ${JIT_EXPORTS_IN_FILE} ${jit_exports})
+  file(APPEND ${JIT_EXPORTS_IN_FILE} "\n")
+  file(APPEND ${JIT_EXPORTS_IN_FILE} ${pal_exports})
+
+  set(JIT_EXPORTS_FILE ${CMAKE_CURRENT_BINARY_DIR}/clrjit.exports)
+  generate_exports_file(${JIT_EXPORTS_IN_FILE} ${JIT_EXPORTS_FILE})
+
+  if(CMAKE_SYSTEM_NAME STREQUAL Linux OR CMAKE_SYSTEM_NAME STREQUAL FreeBSD OR CMAKE_SYSTEM_NAME STREQUAL NetBSD)
+    # This is required to force using our own PAL, not one that we are loaded with.
+    set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Xlinker -Bsymbolic -Bsymbolic-functions")
+
+    set(JIT_EXPORTS_LINKER_OPTION -Wl,--version-script=${JIT_EXPORTS_FILE})
+  elseif(CMAKE_SYSTEM_NAME STREQUAL Darwin)
+    set(JIT_EXPORTS_LINKER_OPTION -Wl,-exported_symbols_list,${JIT_EXPORTS_FILE})
+  endif()
+
+  set(SHARED_LIB_SOURCES ${SOURCES})
+endif()
+
+add_custom_target(jit_exports DEPENDS ${JIT_EXPORTS_FILE})
+
+set(JIT_BASE_NAME clrjit)
+if (CLR_BUILD_JIT32)
+  set(JIT_BASE_NAME ryujit)
+endif()
+
+if(WIN32)
+  add_definitions(-DFX_VER_INTERNALNAME_STR=${JIT_BASE_NAME}.dll)
+endif(WIN32)
+
+add_subdirectory(dll)
+add_subdirectory(crossgen)
+add_subdirectory(standalone)
+
+if (CLR_CMAKE_PLATFORM_ARCH_I386 OR CLR_CMAKE_PLATFORM_ARCH_ARM)
+    add_subdirectory(protojit)
+endif (CLR_CMAKE_PLATFORM_ARCH_I386 OR CLR_CMAKE_PLATFORM_ARCH_ARM)
diff --git a/src/jit/ClrJit.PAL.exports b/src/jit/ClrJit.PAL.exports
new file mode 100644
index 0000000000..c6b4e8ec57
--- /dev/null
+++ b/src/jit/ClrJit.PAL.exports
@@ -0,0 +1,3 @@
+DllMain
+PAL_RegisterModule
+PAL_UnregisterModule
diff --git a/src/jit/ClrJit.exports b/src/jit/ClrJit.exports
new file mode 100644
index 0000000000..0126e63b4d
--- /dev/null
+++ b/src/jit/ClrJit.exports
@@ -0,0 +1,3 @@
+getJit
+jitStartup
+sxsJitStartup
diff --git a/src/jit/DIRS.proj b/src/jit/DIRS.proj
new file mode 100644
index 0000000000..6d1c06d3f0
--- /dev/null
+++ b/src/jit/DIRS.proj
@@ -0,0 +1,50 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003" ToolsVersion="dogfood">
+  <!--Import the settings-->
+  <Import Project="$(_NTDRIVE)$(_NTROOT)\ndp\clr\clr.props" />
+
+  <PropertyGroup>
+    <BuildInPhase1>true</BuildInPhase1>
+    <BuildInPhaseDefault>false</BuildInPhaseDefault>
+    <BuildCoreBinaries>true</BuildCoreBinaries>
+  </PropertyGroup>
+  
+  <PropertyGroup Condition="'$(BuildProjectName)' != 'CoreSys' and '$(BuildArchitecture)' == 'amd64'">
+    <BuildSysBinaries>false</BuildSysBinaries>
+  </PropertyGroup>
+
+  <PropertyGroup Condition="'$(BuildProjectName)' == 'CoreSys' or '$(BuildArchitecture)' != 'amd64'">
+    <BuildSysBinaries>true</BuildSysBinaries>
+  </PropertyGroup>
+
+  <ItemGroup Condition="'$(BuildExePhase)' == '1'">
+    <!-- x86 and ARM clrjit.dll are built in the JIT32 directory; we build FrankenJit here -->
+    <ProjectFile Condition="'$(BuildArchitecture)' != 'i386' and '$(BuildArchitecture)' != 'arm'"  Include="dll\jit.nativeproj" />
+  </ItemGroup>
+
+  <!-- Only the main JIT gets built for CoreSys. The other jits (e.g., altjits) do not. -->
+  <ItemGroup Condition="'$(BuildExePhase)' == '1' and '$(BuildProjectName)' != 'CoreSys'">
+
+    <!-- Build the "FrankenJit" (RyuJIT front-end, legacy back-end) and "FrankenAltjit". These can't conflict with the names of the JIT32 directory outputs. -->
+    <ProjectFile Condition="'$(BuildArchitecture)' == 'i386' or '$(BuildArchitecture)' == 'arm'"  Include="frankenjit\frankenjit.nativeproj" />
+    <ProjectFile Condition="'$(BuildArchitecture)' == 'i386'"                                     Include="frankenaltjit\frankenaltjit.nativeproj" />
+
+    <!-- This might be useful, to help make sure JIT devs build all configurations of the JIT (including crossgen), but
+         it appears to cause problems with the build system, and it slows down normal JIT developer productivity by adding a seldom-useful build.
+    <ProjectFile Include="crossgen\jit_crossgen.nativeproj" />
+    -->
+
+    <ProjectFile Condition="'$(BuildArchitecture)' == 'arm'"   Include="protojit\protojit.nativeproj" />
+    <ProjectFile Condition="'$(BuildArchitecture)' == 'amd64'" Include="protojit\protojit.nativeproj" />
+    <ProjectFile Condition="'$(BuildArchitecture)' == 'amd64'" Include="ctp\ctpjit.nativeproj" />
+    <ProjectFile Condition="'$(BuildArchitecture)' == 'amd64'" Include="arm64altjit\arm64altjit.nativeproj" />
+    <ProjectFile Condition="'$(BuildArchitecture)' == 'i386'"  Include="protojit\protojit.nativeproj" />
+    <ProjectFile Condition="'$(BuildArchitecture)' == 'i386'"  Include="protononjit\protononjit.nativeproj" />
+
+    <!-- We could build skipjit for all architectures, but we only need it for x86 currently -->
+    <ProjectFile Condition="'$(BuildArchitecture)' == 'i386'"  Include="skipjit\skipjit.nativeproj" />
+  </ItemGroup>
+
+  <!--Import the targets-->
+  <Import Project="$(_NTDRIVE)$(_NTROOT)\tools\Microsoft.DevDiv.Traversal.targets" />
+</Project>
diff --git a/src/jit/Native.rc b/src/jit/Native.rc
new file mode 100644
index 0000000000..9e01bcd6cc
--- /dev/null
+++ b/src/jit/Native.rc
@@ -0,0 +1,8 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#define FX_VER_FILEDESCRIPTION_STR "Microsoft .NET Runtime Just-In-Time Compiler\0"
+
+#include <fxver.h>
+#include <fxver.rc>
diff --git a/src/jit/_typeinfo.h b/src/jit/_typeinfo.h
new file mode 100755
index 0000000000..08273adc8d
--- /dev/null
+++ b/src/jit/_typeinfo.h
@@ -0,0 +1,764 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                          _typeInfo                                         XX
+XX                                                                           XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************
+ This header file is named _typeInfo.h to be distinguished from typeinfo.h
+ in the NT SDK
+******************************************************************************/
+
+/*****************************************************************************/
+#ifndef _TYPEINFO_H_
+#define _TYPEINFO_H_
+/*****************************************************************************/
+
+enum ti_types
+{
+#define DEF_TI(ti, nm) ti,
+#include "titypes.h"
+#undef DEF_TI
+    TI_ONLY_ENUM = TI_METHOD, // Enum values above this are completely described by the enumeration
+    TI_COUNT
+};
+
+#if defined(_TARGET_64BIT_)
+#define TI_I_IMPL TI_LONG
+#else
+#define TI_I_IMPL TI_INT
+#endif
+
+#ifdef DEBUG
+#if VERBOSE_VERIFY
+#define TI_DUMP_PADDING "                                          "
+#ifdef _MSC_VER
+namespace
+{
+#endif // _MSC_VER
+SELECTANY const char* g_ti_type_names_map[] = {
+#define DEF_TI(ti, nm) nm,
+#include "titypes.h"
+#undef DEF_TI
+};
+#ifdef _MSC_VER
+}
+#endif // _MSC_VER
+#endif // VERBOSE_VERIFY
+#endif // DEBUG
+
+#ifdef _MSC_VER
+namespace
+{
+#endif //  _MSC_VER
+SELECTANY const ti_types g_jit_types_map[] = {
+#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) verType,
+#include "typelist.h"
+#undef DEF_TP
+};
+#ifdef _MSC_VER
+}
+#endif // _MSC_VER
+
+#ifdef DEBUG
+#if VERBOSE_VERIFY
+inline const char* tiType2Str(ti_types type)
+{
+    return g_ti_type_names_map[type];
+}
+#endif // VERBOSE_VERIFY
+#endif // DEBUG
+
+// typeInfo does not care about distinction between signed/unsigned
+// This routine converts all unsigned types to signed ones
+inline ti_types varType2tiType(var_types type)
+{
+    assert(g_jit_types_map[TYP_BYTE] == TI_BYTE);
+    assert(g_jit_types_map[TYP_INT] == TI_INT);
+    assert(g_jit_types_map[TYP_UINT] == TI_INT);
+    assert(g_jit_types_map[TYP_FLOAT] == TI_FLOAT);
+    assert(g_jit_types_map[TYP_BYREF] == TI_ERROR);
+    assert(g_jit_types_map[type] != TI_ERROR);
+    return g_jit_types_map[type];
+}
+
+#ifdef _MSC_VER
+namespace
+{
+#endif // _MSC_VER
+SELECTANY const ti_types g_ti_types_map[CORINFO_TYPE_COUNT] = {
+    // see the definition of enum CorInfoType in file inc/corinfo.h
+    TI_ERROR,  // CORINFO_TYPE_UNDEF           = 0x0,
+    TI_ERROR,  // CORINFO_TYPE_VOID            = 0x1,
+    TI_BYTE,   // CORINFO_TYPE_BOOL            = 0x2,
+    TI_SHORT,  // CORINFO_TYPE_CHAR            = 0x3,
+    TI_BYTE,   // CORINFO_TYPE_BYTE            = 0x4,
+    TI_BYTE,   // CORINFO_TYPE_UBYTE           = 0x5,
+    TI_SHORT,  // CORINFO_TYPE_SHORT           = 0x6,
+    TI_SHORT,  // CORINFO_TYPE_USHORT          = 0x7,
+    TI_INT,    // CORINFO_TYPE_INT             = 0x8,
+    TI_INT,    // CORINFO_TYPE_UINT            = 0x9,
+    TI_LONG,   // CORINFO_TYPE_LONG            = 0xa,
+    TI_LONG,   // CORINFO_TYPE_ULONG           = 0xb,
+    TI_I_IMPL, // CORINFO_TYPE_NATIVEINT       = 0xc,
+    TI_I_IMPL, // CORINFO_TYPE_NATIVEUINT      = 0xd,
+    TI_FLOAT,  // CORINFO_TYPE_FLOAT           = 0xe,
+    TI_DOUBLE, // CORINFO_TYPE_DOUBLE          = 0xf,
+    TI_REF,    // CORINFO_TYPE_STRING          = 0x10,
+    TI_ERROR,  // CORINFO_TYPE_PTR             = 0x11,
+    TI_ERROR,  // CORINFO_TYPE_BYREF           = 0x12,
+    TI_STRUCT, // CORINFO_TYPE_VALUECLASS      = 0x13,
+    TI_REF,    // CORINFO_TYPE_CLASS           = 0x14,
+    TI_STRUCT, // CORINFO_TYPE_REFANY          = 0x15,
+    TI_REF,    // CORINFO_TYPE_VAR             = 0x16,
+};
+#ifdef _MSC_VER
+}
+#endif // _MSC_VER
+
+// Convert the type returned from the VM to a ti_type.
+
+inline ti_types JITtype2tiType(CorInfoType type)
+{
+    // spot check to make certain enumerations have not changed
+
+    assert(g_ti_types_map[CORINFO_TYPE_CLASS] == TI_REF);
+    assert(g_ti_types_map[CORINFO_TYPE_BYREF] == TI_ERROR);
+    assert(g_ti_types_map[CORINFO_TYPE_DOUBLE] == TI_DOUBLE);
+    assert(g_ti_types_map[CORINFO_TYPE_VALUECLASS] == TI_STRUCT);
+    assert(g_ti_types_map[CORINFO_TYPE_STRING] == TI_REF);
+
+    type = CorInfoType(type & CORINFO_TYPE_MASK); // strip off modifiers
+
+    assert(type < CORINFO_TYPE_COUNT);
+
+    assert(g_ti_types_map[type] != TI_ERROR || type == CORINFO_TYPE_VOID);
+    return g_ti_types_map[type];
+};
+
+/*****************************************************************************
+ * Declares the typeInfo class, which represents the type of an entity on the
+ * stack, in a local variable or an argument.
+ *
+ * Flags: LLLLLLLLLLLLLLLLffffffffffTTTTTT
+ *
+ * L = local var # or instance field #
+ * x = unused
+ * f = flags
+ * T = type
+ *
+ * The lower bits are used to store the type component, and may be one of:
+ *
+ * TI_* (primitive)   - see tyelist.h for enumeration (BYTE, SHORT, INT..)
+ * TI_REF             - OBJREF / ARRAY use m_cls for the type
+ *                       (including arrays and null objref)
+ * TI_STRUCT          - VALUE type, use m_cls for the actual type
+ *
+ * NOTE carefully that BYREF info is not stored here.  You will never see a
+ * TI_BYREF in this component.  For example, the type component
+ * of a "byref TI_INT" is TI_FLAG_BYREF | TI_INT.
+ *
+ * NOTE carefully that Generic Type Variable info is
+ * only stored here in part.  Values of type "T" (e.g "!0" in ILASM syntax),
+ * i.e. some generic variable type, appear only when verifying generic
+ * code.  They come in two flavours: unboxed and boxed.  Unboxed
+ * is the norm, e.g. a local, field or argument of type T.  Boxed
+ * values arise from an IL instruction such as "box !0".
+ * The EE provides type handles for each different type
+ * variable and the EE's "canCast" operation decides casting
+ * for boxed type variable. Thus:
+ *
+ *    (TI_REF, <type-variable-type-handle>) == boxed type variable
+ *
+ *    (TI_REF, <type-variable-type-handle>)
+ *          + TI_FLAG_GENERIC_TYPE_VAR      == unboxed type variable
+ *
+ * Using TI_REF for these may seem odd but using TI_STRUCT means the
+ * code-generation parts of the importer get confused when they
+ * can't work out the size, GC-ness etc. of the "struct".  So using TI_REF
+ * just tricks these backend parts into generating pseudo-trees for
+ * the generic code we're verifying.  These trees then get thrown away
+ * anyway as we do verification of genreic code in import-only mode.
+ *
+ */
+
+// TI_COUNT is less than or equal to TI_FLAG_DATA_MASK
+
+#define TI_FLAG_DATA_BITS 6
+#define TI_FLAG_DATA_MASK ((1 << TI_FLAG_DATA_BITS) - 1)
+
+// Flag indicating this item is uninitialized
+// Note that if UNINIT and BYREF are both set,
+// it means byref (uninit x) - i.e. we are pointing to an uninit <something>
+
+#define TI_FLAG_UNINIT_OBJREF 0x00000040
+
+// Flag indicating this item is a byref <something>
+
+#define TI_FLAG_BYREF 0x00000080
+
+// This item is a byref generated using the readonly. prefix
+// to a ldelema or Address function on an array type.  The
+// runtime type check is ignored in these cases, but the
+// resulting byref can only be used in order to perform a
+// constraint call.
+
+#define TI_FLAG_BYREF_READONLY 0x00000100
+
+// This item is the MSIL 'I' type which is pointer-sized
+// (different size depending on platform) but which on ALL platforms
+// is implicitly convertible with a 32-bit int but not with a 64-bit one.
+
+// Note:  this flag is currently used only in 64-bit systems to annotate
+// native int types.  In 32 bits, since you can transparently coalesce int32
+// and native-int and both are the same size, JIT32 had no need to model
+// native-ints as a separate entity.  For 64-bit though, since they have
+// different size, it's important to discern between a long and a native int
+// since conversions between them are not verifiable.
+#define TI_FLAG_NATIVE_INT 0x00000200
+
+// This item contains the 'this' pointer (used for tracking)
+
+#define TI_FLAG_THIS_PTR 0x00001000
+
+// This item is a byref to something which has a permanent home
+// (e.g. a static field, or instance field of an object in GC heap, as
+// opposed to the stack or a local variable).  TI_FLAG_BYREF must also be
+// set. This information is useful for tail calls and return byrefs.
+//
+// Instructions that generate a permanent home byref:
+//
+//  ldelema
+//  ldflda of a ref object or another permanent home byref
+//  array element address Get() helper
+//  call or calli to a method that returns a byref and is verifiable or SkipVerify
+//  dup
+//  unbox
+
+#define TI_FLAG_BYREF_PERMANENT_HOME 0x00002000
+
+// This is for use when verifying generic code.
+// This indicates that the type handle is really an unboxed
+// generic type variable (e.g. the result of loading an argument
+// of type T in a class List<T>).  Without this flag
+// the same type handle indicates a boxed generic value,
+// e.g. the result of a "box T" instruction.
+#define TI_FLAG_GENERIC_TYPE_VAR 0x00004000
+
+// Number of bits local var # is shifted
+
+#define TI_FLAG_LOCAL_VAR_SHIFT 16
+#define TI_FLAG_LOCAL_VAR_MASK 0xFFFF0000
+
+// Field info uses the same space as the local info
+
+#define TI_FLAG_FIELD_SHIFT TI_FLAG_LOCAL_VAR_SHIFT
+#define TI_FLAG_FIELD_MASK TI_FLAG_LOCAL_VAR_MASK
+
+#define TI_ALL_BYREF_FLAGS (TI_FLAG_BYREF | TI_FLAG_BYREF_READONLY | TI_FLAG_BYREF_PERMANENT_HOME)
+
+/*****************************************************************************
+ * A typeInfo can be one of several types:
+ * - A primitive type (I4,I8,R4,R8,I)
+ * - A type (ref, array, value type) (m_cls describes the type)
+ * - An array (m_cls describes the array type)
+ * - A byref (byref flag set, otherwise the same as the above),
+ * - A Function Pointer (m_method)
+ * - A byref local variable (byref and byref local flags set), can be
+ *   uninitialized
+ *
+ * The reason that there can be 2 types of byrefs (general byrefs, and byref
+ * locals) is that byref locals initially point to uninitialized items.
+ * Therefore these byrefs must be tracked specialy.
+ */
+
+class typeInfo
+{
+
+private:
+    union {
+        struct
+        {
+            ti_types type : 6;
+            unsigned uninitobj : 1;        // used
+            unsigned byref : 1;            // used
+            unsigned byref_readonly : 1;   // used
+            unsigned nativeInt : 1;        // used
+            unsigned : 2;                  // unused
+            unsigned thisPtr : 1;          // used
+            unsigned thisPermHome : 1;     // used
+            unsigned generic_type_var : 1; // used
+        } m_bits;
+
+        DWORD m_flags;
+    };
+
+    union {
+        CORINFO_CLASS_HANDLE m_cls;
+        // Valid only for type TI_METHOD
+        CORINFO_METHOD_HANDLE m_method;
+    };
+
+    template <typename T>
+    static bool isInvalidHandle(const T handle)
+    {
+        static_assert(std::is_same<T, CORINFO_CLASS_HANDLE>::value || std::is_same<T, CORINFO_METHOD_HANDLE>::value,
+                      "");
+#ifdef _HOST_64BIT_
+        return handle == reinterpret_cast<T>(0xcccccccccccccccc);
+#else
+        return handle == reinterpret_cast<T>(0xcccccccc);
+#endif
+    }
+
+public:
+    typeInfo() : m_flags(TI_ERROR)
+    {
+        m_cls = NO_CLASS_HANDLE;
+    }
+
+    typeInfo(ti_types tiType)
+    {
+        assert((tiType >= TI_BYTE) && (tiType <= TI_NULL));
+        assert(tiType <= TI_FLAG_DATA_MASK);
+
+        m_flags = (DWORD)tiType;
+        m_cls   = NO_CLASS_HANDLE;
+    }
+
+    typeInfo(var_types varType)
+    {
+        m_flags = (DWORD)varType2tiType(varType);
+        m_cls   = NO_CLASS_HANDLE;
+    }
+
+    static typeInfo nativeInt()
+    {
+        typeInfo result = typeInfo(TI_I_IMPL);
+#ifdef _TARGET_64BIT_
+        result.m_flags |= TI_FLAG_NATIVE_INT;
+#endif
+        return result;
+    }
+
+    typeInfo(ti_types tiType, CORINFO_CLASS_HANDLE cls, bool typeVar = false)
+    {
+        assert(tiType == TI_STRUCT || tiType == TI_REF);
+        assert(cls != nullptr && !isInvalidHandle(cls));
+        m_flags = tiType;
+        if (typeVar)
+        {
+            m_flags |= TI_FLAG_GENERIC_TYPE_VAR;
+        }
+        m_cls = cls;
+    }
+
+    typeInfo(CORINFO_METHOD_HANDLE method)
+    {
+        assert(method != nullptr && !isInvalidHandle(method));
+        m_flags  = TI_METHOD;
+        m_method = method;
+    }
+
+#ifdef DEBUG
+#if VERBOSE_VERIFY
+    void Dump() const;
+#endif // VERBOSE_VERIFY
+#endif // DEBUG
+
+public:
+    // Note that we specifically ignore the permanent byref here. The rationale is that
+    // the type system doesn't know about this (it's jit only), ie, signatures don't specify if
+    // a byref is safe, so they are fully equivalent for the jit, except for the RET instruction
+    // , instructions that load safe byrefs and the stack merging logic, which need to know about
+    // the bit
+    static bool AreEquivalent(const typeInfo& li, const typeInfo& ti)
+    {
+        DWORD allFlags = TI_FLAG_DATA_MASK | TI_FLAG_BYREF | TI_FLAG_BYREF_READONLY | TI_FLAG_GENERIC_TYPE_VAR |
+                         TI_FLAG_UNINIT_OBJREF;
+#ifdef _TARGET_64BIT_
+        allFlags |= TI_FLAG_NATIVE_INT;
+#endif // _TARGET_64BIT_
+
+        if ((li.m_flags & allFlags) != (ti.m_flags & allFlags))
+        {
+            return false;
+        }
+
+        unsigned type = li.m_flags & TI_FLAG_DATA_MASK;
+        assert(TI_ERROR <
+               TI_ONLY_ENUM); // TI_ERROR looks like it needs more than enum.  This optimises the success case a bit
+        if (type > TI_ONLY_ENUM)
+        {
+            return true;
+        }
+        if (type == TI_ERROR)
+        {
+            return false; // TI_ERROR != TI_ERROR
+        }
+        assert(li.m_cls != NO_CLASS_HANDLE && ti.m_cls != NO_CLASS_HANDLE);
+        return li.m_cls == ti.m_cls;
+    }
+
+#ifdef DEBUG
+    // On 64-bit systems, nodes whose "proper" type is "native int" get labeled TYP_LONG.
+    // In the verification type system, we always transform "native int" to "TI_LONG" with the
+    // native int flag set.
+    // Ideally, we would keep track of which nodes labeled "TYP_LONG" are really "native int", but
+    // attempts to do that have proved too difficult.  So in situations where we try to compare the
+    // verification type system and the node type system, we use this method, which allows the specific
+    // mismatch where "verTi" is TI_LONG with the native int flag and "nodeTi" is TI_LONG without the
+    // native int flag set.
+    static bool AreEquivalentModuloNativeInt(const typeInfo& verTi, const typeInfo& nodeTi)
+    {
+        if (AreEquivalent(verTi, nodeTi))
+        {
+            return true;
+        }
+#ifdef _TARGET_64BIT_
+        return (nodeTi.IsType(TI_I_IMPL) && tiCompatibleWith(nullptr, verTi, typeInfo::nativeInt(), true)) ||
+               (verTi.IsType(TI_I_IMPL) && tiCompatibleWith(nullptr, typeInfo::nativeInt(), nodeTi, true));
+#else  // _TARGET_64BIT_
+        return false;
+#endif // !_TARGET_64BIT_
+    }
+#endif // DEBUG
+
+    static BOOL tiMergeToCommonParent(COMP_HANDLE CompHnd, typeInfo* pDest, const typeInfo* pSrc, bool* changed);
+    static BOOL tiCompatibleWith(COMP_HANDLE     CompHnd,
+                                 const typeInfo& child,
+                                 const typeInfo& parent,
+                                 bool            normalisedForStack);
+
+    static BOOL tiMergeCompatibleWith(COMP_HANDLE     CompHnd,
+                                      const typeInfo& child,
+                                      const typeInfo& parent,
+                                      bool            normalisedForStack);
+
+    /////////////////////////////////////////////////////////////////////////
+    // Operations
+    /////////////////////////////////////////////////////////////////////////
+
+    void SetIsThisPtr()
+    {
+        m_flags |= TI_FLAG_THIS_PTR;
+        assert(m_bits.thisPtr);
+    }
+
+    void ClearThisPtr()
+    {
+        m_flags &= ~(TI_FLAG_THIS_PTR);
+    }
+
+    void SetIsPermanentHomeByRef()
+    {
+        assert(IsByRef());
+        m_flags |= TI_FLAG_BYREF_PERMANENT_HOME;
+    }
+
+    void SetIsReadonlyByRef()
+    {
+        assert(IsByRef());
+        m_flags |= TI_FLAG_BYREF_READONLY;
+    }
+
+    // Set that this item is uninitialized.
+    void SetUninitialisedObjRef()
+    {
+        assert((IsObjRef() && IsThisPtr()));
+        // For now, this is used only  to track uninit this ptrs in ctors
+
+        m_flags |= TI_FLAG_UNINIT_OBJREF;
+        assert(m_bits.uninitobj);
+    }
+
+    // Set that this item is initialised.
+    void SetInitialisedObjRef()
+    {
+        assert((IsObjRef() && IsThisPtr()));
+        // For now, this is used only  to track uninit this ptrs in ctors
+
+        m_flags &= ~TI_FLAG_UNINIT_OBJREF;
+    }
+
+    typeInfo& DereferenceByRef()
+    {
+        if (!IsByRef())
+        {
+            m_flags = TI_ERROR;
+            INDEBUG(m_cls = NO_CLASS_HANDLE);
+        }
+        m_flags &= ~(TI_FLAG_THIS_PTR | TI_ALL_BYREF_FLAGS);
+        return *this;
+    }
+
+    typeInfo& MakeByRef()
+    {
+        assert(!IsByRef());
+        m_flags &= ~(TI_FLAG_THIS_PTR);
+        m_flags |= TI_FLAG_BYREF;
+        return *this;
+    }
+
+    // I1,I2 --> I4
+    // FLOAT --> DOUBLE
+    // objref, arrays, byrefs, value classes are unchanged
+    //
+    typeInfo& NormaliseForStack()
+    {
+        switch (GetType())
+        {
+            case TI_BYTE:
+            case TI_SHORT:
+                m_flags = TI_INT;
+                break;
+
+            case TI_FLOAT:
+                m_flags = TI_DOUBLE;
+                break;
+            default:
+                break;
+        }
+        return (*this);
+    }
+
+    /////////////////////////////////////////////////////////////////////////
+    // Getters
+    /////////////////////////////////////////////////////////////////////////
+
+    CORINFO_CLASS_HANDLE GetClassHandle() const
+    {
+        return m_cls;
+    }
+
+    CORINFO_CLASS_HANDLE GetClassHandleForValueClass() const
+    {
+        assert(IsType(TI_STRUCT));
+        assert(m_cls != NO_CLASS_HANDLE);
+        return m_cls;
+    }
+
+    CORINFO_CLASS_HANDLE GetClassHandleForObjRef() const
+    {
+        assert(IsType(TI_REF));
+        assert(m_cls != NO_CLASS_HANDLE);
+        return m_cls;
+    }
+
+    CORINFO_METHOD_HANDLE GetMethod() const
+    {
+        assert(GetType() == TI_METHOD);
+        return m_method;
+    }
+
+    // If FEATURE_CORECLR is enabled, GetMethod can be called
+    // before the pointer type is known to be a method pointer type.
+    CORINFO_METHOD_HANDLE GetMethod2() const
+    {
+        return m_method;
+    }
+
+    // Get this item's type
+    // If primitive, returns the primitive type (TI_*)
+    // If not primitive, returns:
+    //  - TI_ERROR if a byref anything
+    //  - TI_REF if a class or array or null or a generic type variable
+    //  - TI_STRUCT if a value class
+    ti_types GetType() const
+    {
+        if (m_flags & TI_FLAG_BYREF)
+        {
+            return TI_ERROR;
+        }
+
+        // objref/array/null (objref), value class, ptr, primitive
+        return (ti_types)(m_flags & TI_FLAG_DATA_MASK);
+    }
+
+    BOOL IsType(ti_types type) const
+    {
+        assert(type != TI_ERROR);
+        return (m_flags & (TI_FLAG_DATA_MASK | TI_FLAG_BYREF | TI_FLAG_BYREF_READONLY | TI_FLAG_BYREF_PERMANENT_HOME |
+                           TI_FLAG_GENERIC_TYPE_VAR)) == DWORD(type);
+    }
+
+    // Returns whether this is an objref
+    BOOL IsObjRef() const
+    {
+        return IsType(TI_REF) || IsType(TI_NULL);
+    }
+
+    // Returns whether this is a by-ref
+    BOOL IsByRef() const
+    {
+        return (m_flags & TI_FLAG_BYREF);
+    }
+
+    // Returns whether this is the this pointer
+    BOOL IsThisPtr() const
+    {
+        return (m_flags & TI_FLAG_THIS_PTR);
+    }
+
+    BOOL IsUnboxedGenericTypeVar() const
+    {
+        return !IsByRef() && (m_flags & TI_FLAG_GENERIC_TYPE_VAR);
+    }
+
+    BOOL IsReadonlyByRef() const
+    {
+        return IsByRef() && (m_flags & TI_FLAG_BYREF_READONLY);
+    }
+
+    BOOL IsPermanentHomeByRef() const
+    {
+        return IsByRef() && (m_flags & TI_FLAG_BYREF_PERMANENT_HOME);
+    }
+
+    // Returns whether this is a method desc
+    BOOL IsMethod() const
+    {
+        return (GetType() == TI_METHOD);
+    }
+
+    BOOL IsStruct() const
+    {
+        return IsType(TI_STRUCT);
+    }
+
+    // A byref value class is NOT a value class
+    BOOL IsValueClass() const
+    {
+        return (IsStruct() || IsPrimitiveType());
+    }
+
+    // Does not return true for primitives. Will return true for value types that behave
+    // as primitives
+    BOOL IsValueClassWithClsHnd() const
+    {
+        if ((GetType() == TI_STRUCT) ||
+            (m_cls && GetType() != TI_REF && GetType() != TI_METHOD &&
+             GetType() != TI_ERROR)) // necessary because if byref bit is set, we return TI_ERROR)
+        {
+            return TRUE;
+        }
+        else
+        {
+            return FALSE;
+        }
+    }
+
+    // Returns whether this is an integer or real number
+    // NOTE: Use NormaliseToPrimitiveType() if you think you may have a
+    // System.Int32 etc., because those types are not considered number
+    // types by this function.
+    BOOL IsNumberType() const
+    {
+        ti_types Type = GetType();
+
+        // I1, I2, Boolean, character etc. cannot exist plainly -
+        // everything is at least an I4
+
+        return (Type == TI_INT || Type == TI_LONG || Type == TI_DOUBLE);
+    }
+
+    // Returns whether this is an integer
+    // NOTE: Use NormaliseToPrimitiveType() if you think you may have a
+    // System.Int32 etc., because those types are not considered number
+    // types by this function.
+    BOOL IsIntegerType() const
+    {
+        ti_types Type = GetType();
+
+        // I1, I2, Boolean, character etc. cannot exist plainly -
+        // everything is at least an I4
+
+        return (Type == TI_INT || Type == TI_LONG);
+    }
+
+    // Returns true whether this is an integer or a native int.
+    BOOL IsIntOrNativeIntType() const
+    {
+#ifdef _TARGET_64BIT_
+        return (GetType() == TI_INT) || AreEquivalent(*this, nativeInt());
+#else
+        return IsType(TI_INT);
+#endif
+    }
+
+    BOOL IsNativeIntType() const
+    {
+        return AreEquivalent(*this, nativeInt());
+    }
+
+    // Returns whether this is a primitive type (not a byref, objref,
+    // array, null, value class, invalid value)
+    // May Need to normalise first (m/r/I4 --> I4)
+    BOOL IsPrimitiveType() const
+    {
+        DWORD Type = GetType();
+
+        // boolean, char, u1,u2 never appear on the operand stack
+        return (Type == TI_BYTE || Type == TI_SHORT || Type == TI_INT || Type == TI_LONG || Type == TI_FLOAT ||
+                Type == TI_DOUBLE);
+    }
+
+    // Returns whether this is the null objref
+    BOOL IsNullObjRef() const
+    {
+        return (IsType(TI_NULL));
+    }
+
+    // must be for a local which is an object type (i.e. has a slot >= 0)
+    // for primitive locals, use the liveness bitmap instead
+    // Note that this works if the error is 'Byref'
+    BOOL IsDead() const
+    {
+        return (m_flags & (TI_FLAG_DATA_MASK)) == TI_ERROR;
+    }
+
+    BOOL IsUninitialisedObjRef() const
+    {
+        return (m_flags & TI_FLAG_UNINIT_OBJREF);
+    }
+
+private:
+    // used to make functions that return typeinfo efficient.
+    typeInfo(DWORD flags, CORINFO_CLASS_HANDLE cls)
+    {
+        m_cls   = cls;
+        m_flags = flags;
+    }
+
+    friend typeInfo ByRef(const typeInfo& ti);
+    friend typeInfo DereferenceByRef(const typeInfo& ti);
+    friend typeInfo NormaliseForStack(const typeInfo& ti);
+};
+
+inline typeInfo NormaliseForStack(const typeInfo& ti)
+{
+    return typeInfo(ti).NormaliseForStack();
+}
+
+// given ti make a byref to that type.
+inline typeInfo ByRef(const typeInfo& ti)
+{
+    return typeInfo(ti).MakeByRef();
+}
+
+// given ti which is a byref, return the type it points at
+inline typeInfo DereferenceByRef(const typeInfo& ti)
+{
+    return typeInfo(ti).DereferenceByRef();
+}
+/*****************************************************************************/
+#endif // _TYPEINFO_H_
+/*****************************************************************************/
diff --git a/src/jit/alloc.cpp b/src/jit/alloc.cpp
new file mode 100644
index 0000000000..5c5f712a3f
--- /dev/null
+++ b/src/jit/alloc.cpp
@@ -0,0 +1,590 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+
+#if defined(_MSC_VER)
+#pragma hdrstop
+#endif // defined(_MSC_VER)
+
+//------------------------------------------------------------------------
+// PooledAllocator:
+//    This subclass of `ArenaAllocator` is a singleton that always keeps
+//    a single default-sized page allocated. We try to use the singleton
+//    allocator as often as possible (i.e. for all non-concurrent
+//    method compilations).
+class PooledAllocator : public ArenaAllocator
+{
+private:
+    enum
+    {
+        POOLED_ALLOCATOR_NOTINITIALIZED = 0,
+        POOLED_ALLOCATOR_IN_USE         = 1,
+        POOLED_ALLOCATOR_AVAILABLE      = 2,
+        POOLED_ALLOCATOR_SHUTDOWN       = 3,
+    };
+
+    static PooledAllocator s_pooledAllocator;
+    static LONG            s_pooledAllocatorState;
+
+    PooledAllocator() : ArenaAllocator()
+    {
+    }
+    PooledAllocator(IEEMemoryManager* memoryManager);
+
+    PooledAllocator(const PooledAllocator& other) = delete;
+    PooledAllocator& operator=(const PooledAllocator& other) = delete;
+
+public:
+    PooledAllocator& operator=(PooledAllocator&& other);
+
+    void destroy() override;
+
+    static void shutdown();
+
+    static ArenaAllocator* getPooledAllocator(IEEMemoryManager* memoryManager);
+};
+
+size_t ArenaAllocator::s_defaultPageSize = 0;
+
+//------------------------------------------------------------------------
+// ArenaAllocator::bypassHostAllocator:
+//    Indicates whether or not the ArenaAllocator should bypass the JIT
+//    host when allocating memory for arena pages.
+//
+// Return Value:
+//    True if the JIT should bypass the JIT host; false otherwise.
+bool ArenaAllocator::bypassHostAllocator()
+{
+#if defined(DEBUG)
+    // When JitDirectAlloc is set, all JIT allocations requests are forwarded
+    // directly to the OS. This allows taking advantage of pageheap and other gflag
+    // knobs for ensuring that we do not have buffer overruns in the JIT.
+
+    return JitConfig.JitDirectAlloc() != 0;
+#else  // defined(DEBUG)
+    return false;
+#endif // !defined(DEBUG)
+}
+
+//------------------------------------------------------------------------
+// ArenaAllocator::getDefaultPageSize:
+//    Returns the default size of an arena page.
+//
+// Return Value:
+//    The default size of an arena page.
+size_t ArenaAllocator::getDefaultPageSize()
+{
+    return s_defaultPageSize;
+}
+
+//------------------------------------------------------------------------
+// ArenaAllocator::ArenaAllocator:
+//    Default-constructs an arena allocator.
+ArenaAllocator::ArenaAllocator()
+    : m_memoryManager(nullptr)
+    , m_firstPage(nullptr)
+    , m_lastPage(nullptr)
+    , m_nextFreeByte(nullptr)
+    , m_lastFreeByte(nullptr)
+{
+}
+
+//------------------------------------------------------------------------
+// ArenaAllocator::ArenaAllocator:
+//    Constructs an arena allocator.
+//
+// Arguments:
+//    memoryManager - The `IEEMemoryManager` instance that will be used to
+//                    allocate memory for arena pages.
+ArenaAllocator::ArenaAllocator(IEEMemoryManager* memoryManager)
+    : m_memoryManager(memoryManager)
+    , m_firstPage(nullptr)
+    , m_lastPage(nullptr)
+    , m_nextFreeByte(nullptr)
+    , m_lastFreeByte(nullptr)
+{
+    assert(getDefaultPageSize() != 0);
+    assert(isInitialized());
+}
+
+//------------------------------------------------------------------------
+// ArenaAllocator::operator=:
+//    Move-assigns a `ArenaAllocator`.
+ArenaAllocator& ArenaAllocator::operator=(ArenaAllocator&& other)
+{
+    assert(!isInitialized());
+
+    m_memoryManager = other.m_memoryManager;
+    m_firstPage     = other.m_firstPage;
+    m_lastPage      = other.m_lastPage;
+    m_nextFreeByte  = other.m_nextFreeByte;
+    m_lastFreeByte  = other.m_lastFreeByte;
+
+    other.m_memoryManager = nullptr;
+    other.m_firstPage     = nullptr;
+    other.m_lastPage      = nullptr;
+    other.m_nextFreeByte  = nullptr;
+    other.m_lastFreeByte  = nullptr;
+
+    return *this;
+}
+
+bool ArenaAllocator::isInitialized()
+{
+    return m_memoryManager != nullptr;
+}
+
+//------------------------------------------------------------------------
+// ArenaAllocator::allocateNewPage:
+//    Allocates a new arena page.
+//
+// Arguments:
+//    size - The number of bytes that were requested by the allocation
+//           that triggered this request to allocate a new arena page.
+//
+// Return Value:
+//    A pointer to the first usable byte of the newly allocated page.
+void* ArenaAllocator::allocateNewPage(size_t size, bool canThrow)
+{
+    assert(isInitialized());
+
+    size_t pageSize = sizeof(PageDescriptor) + size;
+
+    // Check for integer overflow
+    if (pageSize < size)
+    {
+        if (canThrow)
+        {
+            NOMEM();
+        }
+
+        return nullptr;
+    }
+
+    // If the current page is now full, update a few statistics
+    if (m_lastPage != nullptr)
+    {
+        // Undo the "+=" done in allocateMemory()
+        m_nextFreeByte -= size;
+
+        // Save the actual used size of the page
+        m_lastPage->m_usedBytes = m_nextFreeByte - m_lastPage->m_contents;
+    }
+
+    // Round up to a default-sized page if necessary
+    if (pageSize <= s_defaultPageSize)
+    {
+        pageSize = s_defaultPageSize;
+    }
+
+    // Round to the nearest multiple of OS page size if necessary
+    if (!bypassHostAllocator())
+    {
+        pageSize = roundUp(pageSize, DEFAULT_PAGE_SIZE);
+    }
+
+    // Allocate the new page
+    PageDescriptor* newPage = (PageDescriptor*)allocateHostMemory(pageSize);
+    if (newPage == nullptr)
+    {
+        if (canThrow)
+        {
+            NOMEM();
+        }
+
+        return nullptr;
+    }
+
+    // Append the new page to the end of the list
+    newPage->m_next      = nullptr;
+    newPage->m_pageBytes = pageSize;
+    newPage->m_previous  = m_lastPage;
+    newPage->m_usedBytes = 0; // m_usedBytes is meaningless until a new page is allocated.
+                              // Instead of letting it contain garbage (so to confuse us),
+                              // set it to zero.
+
+    if (m_lastPage != nullptr)
+    {
+        m_lastPage->m_next = newPage;
+    }
+    else
+    {
+        m_firstPage = newPage;
+    }
+
+    m_lastPage = newPage;
+
+    // Adjust the next/last free byte pointers
+    m_nextFreeByte = newPage->m_contents + size;
+    m_lastFreeByte = (BYTE*)newPage + pageSize;
+    assert((m_lastFreeByte - m_nextFreeByte) >= 0);
+
+    return newPage->m_contents;
+}
+
+//------------------------------------------------------------------------
+// ArenaAllocator::destroy:
+//    Performs any necessary teardown for an `ArenaAllocator`.
+void ArenaAllocator::destroy()
+{
+    assert(isInitialized());
+
+    // Free all of the allocated pages
+    for (PageDescriptor *page = m_firstPage, *next; page != nullptr; page = next)
+    {
+        next = page->m_next;
+        freeHostMemory(page);
+    }
+
+    // Clear out the allocator's fields
+    m_memoryManager = nullptr;
+    m_firstPage     = nullptr;
+    m_lastPage      = nullptr;
+    m_nextFreeByte  = nullptr;
+    m_lastFreeByte  = nullptr;
+}
+
+// The debug version of the allocator may allocate directly from the
+// OS rather than going through the hosting APIs. In order to do so,
+// it must undef the macros that are usually in place to prevent
+// accidental uses of the OS allocator.
+#if defined(DEBUG)
+#undef GetProcessHeap
+#undef HeapAlloc
+#undef HeapFree
+#endif
+
+//------------------------------------------------------------------------
+// ArenaAllocator::allocateHostMemory:
+//    Allocates memory from the host (or the OS if `bypassHostAllocator()`
+//    returns `true`).
+//
+// Arguments:
+//    size - The number of bytes to allocate.
+//
+// Return Value:
+//    A pointer to the allocated memory.
+void* ArenaAllocator::allocateHostMemory(size_t size)
+{
+    assert(isInitialized());
+
+#if defined(DEBUG)
+    if (bypassHostAllocator())
+    {
+        return ::HeapAlloc(GetProcessHeap(), 0, size);
+    }
+    else
+    {
+        return ClrAllocInProcessHeap(0, S_SIZE_T(size));
+    }
+#else  // defined(DEBUG)
+    return m_memoryManager->ClrVirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_READWRITE);
+#endif // !defined(DEBUG)
+}
+
+//------------------------------------------------------------------------
+// ArenaAllocator::freeHostMemory:
+//    Frees memory allocated by a previous call to `allocateHostMemory`.
+//
+// Arguments:
+//    block - A pointer to the memory to free.
+void ArenaAllocator::freeHostMemory(void* block)
+{
+    assert(isInitialized());
+
+#if defined(DEBUG)
+    if (bypassHostAllocator())
+    {
+        ::HeapFree(GetProcessHeap(), 0, block);
+    }
+    else
+    {
+        ClrFreeInProcessHeap(0, block);
+    }
+#else  // defined(DEBUG)
+    m_memoryManager->ClrVirtualFree(block, 0, MEM_RELEASE);
+#endif // !defined(DEBUG)
+}
+
+#if defined(DEBUG)
+//------------------------------------------------------------------------
+// ArenaAllocator::alloateMemory:
+//    Allocates memory using an `ArenaAllocator`.
+//
+// Arguments:
+//    size - The number of bytes to allocate.
+//
+// Return Value:
+//    A pointer to the allocated memory.
+//
+// Note:
+//    This is the DEBUG-only version of `allocateMemory`; the release
+//    version of this method is defined in the corresponding header file.
+//    This version of the method has some abilities that the release
+//    version does not: it may inject faults into the allocator and
+//    seeds all allocations with a specified pattern to help catch
+//    use-before-init problems.
+void* ArenaAllocator::allocateMemory(size_t size)
+{
+    assert(isInitialized());
+    assert(size != 0 && (size & (sizeof(int) - 1)) == 0);
+
+    // Ensure that we always allocate in pointer sized increments.
+    size = (size_t)roundUp(size, sizeof(size_t));
+
+    if (JitConfig.ShouldInjectFault() != 0)
+    {
+        // Force the underlying memory allocator (either the OS or the CLR hoster)
+        // to allocate the memory. Any fault injection will kick in.
+        void* p = ClrAllocInProcessHeap(0, S_SIZE_T(1));
+        if (p != nullptr)
+        {
+            ClrFreeInProcessHeap(0, p);
+        }
+        else
+        {
+            NOMEM(); // Throw!
+        }
+    }
+
+    void* block = m_nextFreeByte;
+    m_nextFreeByte += size;
+
+    if (m_nextFreeByte > m_lastFreeByte)
+    {
+        block = allocateNewPage(size, true);
+    }
+
+    memset(block, UninitializedWord<char>(), size);
+    return block;
+}
+#endif // defined(DEBUG)
+
+//------------------------------------------------------------------------
+// ArenaAllocator::getTotalBytesAllocated:
+//    Gets the total number of bytes allocated for all of the arena pages
+//    for an `ArenaAllocator`.
+//
+// Return Value:
+//    See above.
+size_t ArenaAllocator::getTotalBytesAllocated()
+{
+    assert(isInitialized());
+
+    size_t bytes = 0;
+    for (PageDescriptor* page = m_firstPage; page != nullptr; page = page->m_next)
+    {
+        bytes += page->m_pageBytes;
+    }
+
+    return bytes;
+}
+
+//------------------------------------------------------------------------
+// ArenaAllocator::getTotalBytesAllocated:
+//    Gets the total number of bytes used in all of the arena pages for
+//    an `ArenaAllocator`.
+//
+// Return Value:
+//    See above.
+//
+// Notes:
+//    An arena page may have unused space at the very end. This happens
+//    when an allocation request comes in (via a call to `allocateMemory`)
+//    that will not fit in the remaining bytes for the current page.
+//    Another way to understand this method is as returning the total
+//    number of bytes allocated for arena pages minus the number of bytes
+//    that are unused across all area pages.
+size_t ArenaAllocator::getTotalBytesUsed()
+{
+    assert(isInitialized());
+
+    if (m_lastPage != nullptr)
+    {
+        m_lastPage->m_usedBytes = m_nextFreeByte - m_lastPage->m_contents;
+    }
+
+    size_t bytes = 0;
+    for (PageDescriptor* page = m_firstPage; page != nullptr; page = page->m_next)
+    {
+        bytes += page->m_usedBytes;
+    }
+
+    return bytes;
+}
+
+//------------------------------------------------------------------------
+// ArenaAllocator::startup:
+//    Performs any necessary initialization for the arena allocator
+//    subsystem.
+void ArenaAllocator::startup()
+{
+    s_defaultPageSize = bypassHostAllocator() ? (size_t)MIN_PAGE_SIZE : (size_t)DEFAULT_PAGE_SIZE;
+}
+
+//------------------------------------------------------------------------
+// ArenaAllocator::shutdown:
+//    Performs any necessary teardown for the arena allocator subsystem.
+void ArenaAllocator::shutdown()
+{
+    PooledAllocator::shutdown();
+}
+
+PooledAllocator PooledAllocator::s_pooledAllocator;
+LONG            PooledAllocator::s_pooledAllocatorState = POOLED_ALLOCATOR_NOTINITIALIZED;
+
+//------------------------------------------------------------------------
+// PooledAllocator::PooledAllocator:
+//    Constructs a `PooledAllocator`.
+PooledAllocator::PooledAllocator(IEEMemoryManager* memoryManager) : ArenaAllocator(memoryManager)
+{
+}
+
+//------------------------------------------------------------------------
+// PooledAllocator::operator=:
+//    Move-assigns a `PooledAllocator`.
+PooledAllocator& PooledAllocator::operator=(PooledAllocator&& other)
+{
+    *((ArenaAllocator*)this) = std::move((ArenaAllocator &&)other);
+    return *this;
+}
+
+//------------------------------------------------------------------------
+// PooledAllocator::shutdown:
+//    Performs any necessary teardown for the pooled allocator.
+//
+// Notes:
+//    If the allocator has been initialized and is in use when this method is called,
+//    it is up to whatever is using the pooled allocator to call `destroy` in order
+//    to free its memory.
+void PooledAllocator::shutdown()
+{
+    LONG oldState = InterlockedExchange(&s_pooledAllocatorState, POOLED_ALLOCATOR_SHUTDOWN);
+    switch (oldState)
+    {
+        case POOLED_ALLOCATOR_NOTINITIALIZED:
+        case POOLED_ALLOCATOR_SHUTDOWN:
+        case POOLED_ALLOCATOR_IN_USE:
+            return;
+
+        case POOLED_ALLOCATOR_AVAILABLE:
+            // The pooled allocator was initialized and not in use; we must destroy it.
+            s_pooledAllocator.destroy();
+            break;
+    }
+}
+
+//------------------------------------------------------------------------
+// PooledAllocator::getPooledAllocator:
+//    Returns the pooled allocator if it is not already in use.
+//
+// Arguments:
+//    memoryManager: The `IEEMemoryManager` instance in use by the caller.
+//
+// Return Value:
+//    A pointer to the pooled allocator if it is available or `nullptr`
+//    if it is already in use.
+//
+// Notes:
+//    Calling `destroy` on the returned allocator will return it to the
+//    pool.
+ArenaAllocator* PooledAllocator::getPooledAllocator(IEEMemoryManager* memoryManager)
+{
+    LONG oldState = InterlockedExchange(&s_pooledAllocatorState, POOLED_ALLOCATOR_IN_USE);
+    switch (oldState)
+    {
+        case POOLED_ALLOCATOR_IN_USE:
+        case POOLED_ALLOCATOR_SHUTDOWN:
+            // Either the allocator is in use or this call raced with a call to `shutdown`.
+            // Return `nullptr`.
+            return nullptr;
+
+        case POOLED_ALLOCATOR_AVAILABLE:
+            if (s_pooledAllocator.m_memoryManager != memoryManager)
+            {
+                // The allocator is available, but it was initialized with a different
+                // memory manager. Release it and return `nullptr`.
+                InterlockedExchange(&s_pooledAllocatorState, POOLED_ALLOCATOR_AVAILABLE);
+                return nullptr;
+            }
+
+            return &s_pooledAllocator;
+
+        case POOLED_ALLOCATOR_NOTINITIALIZED:
+        {
+            PooledAllocator allocator(memoryManager);
+            if (allocator.allocateNewPage(0, false) == nullptr)
+            {
+                // Failed to grab the initial memory page.
+                InterlockedExchange(&s_pooledAllocatorState, POOLED_ALLOCATOR_NOTINITIALIZED);
+                return nullptr;
+            }
+
+            s_pooledAllocator = std::move(allocator);
+        }
+
+            return &s_pooledAllocator;
+
+        default:
+            assert(!"Unknown pooled allocator state");
+            unreached();
+    }
+}
+
+//------------------------------------------------------------------------
+// PooledAllocator::destroy:
+//    Performs any necessary teardown for an `PooledAllocator` and returns the allocator
+//    to the pool.
+void PooledAllocator::destroy()
+{
+    assert(isInitialized());
+    assert(this == &s_pooledAllocator);
+    assert(s_pooledAllocatorState == POOLED_ALLOCATOR_IN_USE || s_pooledAllocatorState == POOLED_ALLOCATOR_SHUTDOWN);
+    assert(m_firstPage != nullptr);
+
+    // Free all but the first allocated page
+    for (PageDescriptor *page = m_firstPage->m_next, *next; page != nullptr; page = next)
+    {
+        next = page->m_next;
+        freeHostMemory(page);
+    }
+
+    // Reset the relevant state to point back to the first byte of the first page
+    m_firstPage->m_next = nullptr;
+    m_lastPage          = m_firstPage;
+    m_nextFreeByte      = m_firstPage->m_contents;
+    m_lastFreeByte      = (BYTE*)m_firstPage + m_firstPage->m_pageBytes;
+
+    assert(getTotalBytesAllocated() == s_defaultPageSize);
+
+    // If we've already been shut down, free the first page. Otherwise, return the allocator to the pool.
+    if (s_pooledAllocatorState == POOLED_ALLOCATOR_SHUTDOWN)
+    {
+        ArenaAllocator::destroy();
+    }
+    else
+    {
+        InterlockedExchange(&s_pooledAllocatorState, POOLED_ALLOCATOR_AVAILABLE);
+    }
+}
+
+//------------------------------------------------------------------------
+// ArenaAllocator::getPooledAllocator:
+//    Returns the pooled allocator if it is not already in use.
+//
+// Arguments:
+//    memoryManager: The `IEEMemoryManager` instance in use by the caller.
+//
+// Return Value:
+//    A pointer to the pooled allocator if it is available or `nullptr`
+//    if it is already in use.
+//
+// Notes:
+//    Calling `destroy` on the returned allocator will return it to the
+//    pool.
+ArenaAllocator* ArenaAllocator::getPooledAllocator(IEEMemoryManager* memoryManager)
+{
+    return PooledAllocator::getPooledAllocator(memoryManager);
+}
diff --git a/src/jit/alloc.h b/src/jit/alloc.h
new file mode 100644
index 0000000000..a769341378
--- /dev/null
+++ b/src/jit/alloc.h
@@ -0,0 +1,99 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef _ALLOC_H_
+#define _ALLOC_H_
+
+#if !defined(_HOST_H_)
+#include "host.h"
+#endif // defined(_HOST_H_)
+
+class ArenaAllocator
+{
+private:
+    ArenaAllocator(const ArenaAllocator& other) = delete;
+    ArenaAllocator& operator=(const ArenaAllocator& other) = delete;
+
+protected:
+    struct PageDescriptor
+    {
+        PageDescriptor* m_next;
+        PageDescriptor* m_previous;
+
+        size_t m_pageBytes; // # of bytes allocated
+        size_t m_usedBytes; // # of bytes actually used. (This is only valid when we've allocated a new page.)
+                            // See ArenaAllocator::allocateNewPage.
+
+        BYTE m_contents[];
+    };
+
+    // Anything less than 64K leaves VM holes since the OS allocates address space in this size.
+    // Thus if we want to make this smaller, we need to do a reserve / commit scheme
+    enum
+    {
+        DEFAULT_PAGE_SIZE = 16 * OS_page_size,
+        MIN_PAGE_SIZE     = sizeof(PageDescriptor)
+    };
+
+    static size_t s_defaultPageSize;
+
+    IEEMemoryManager* m_memoryManager;
+
+    PageDescriptor* m_firstPage;
+    PageDescriptor* m_lastPage;
+
+    // These two pointers (when non-null) will always point into 'm_lastPage'.
+    BYTE* m_nextFreeByte;
+    BYTE* m_lastFreeByte;
+
+    bool isInitialized();
+
+    void* allocateNewPage(size_t size, bool canThrow);
+
+    void* allocateHostMemory(size_t size);
+    void freeHostMemory(void* block);
+
+public:
+    ArenaAllocator();
+    ArenaAllocator(IEEMemoryManager* memoryManager);
+    ArenaAllocator& operator=(ArenaAllocator&& other);
+
+    // NOTE: it would be nice to have a destructor on this type to ensure that any value that
+    //       goes out of scope is either uninitialized or has been torn down via a call to
+    //       destroy(), but this interacts badly in methods that use SEH. #3058 tracks
+    //       revisiting EH in the JIT; such a destructor could be added if SEH is removed
+    //       as part of that work.
+
+    virtual void destroy();
+
+#if defined(DEBUG)
+    void* allocateMemory(size_t sz);
+#else  // defined(DEBUG)
+    inline void* allocateMemory(size_t size)
+    {
+        void* block = m_nextFreeByte;
+        m_nextFreeByte += size;
+
+        if (m_nextFreeByte > m_lastFreeByte)
+        {
+            block = allocateNewPage(size, true);
+        }
+
+        return block;
+    }
+#endif // !defined(DEBUG)
+
+    size_t getTotalBytesAllocated();
+    size_t getTotalBytesUsed();
+
+    static bool   bypassHostAllocator();
+    static size_t getDefaultPageSize();
+
+    static void startup();
+    static void shutdown();
+
+    static ArenaAllocator* getPooledAllocator(IEEMemoryManager* memoryManager);
+};
+
+#endif // _ALLOC_H_
diff --git a/src/jit/arraystack.h b/src/jit/arraystack.h
new file mode 100644
index 0000000000..1692294fcb
--- /dev/null
+++ b/src/jit/arraystack.h
@@ -0,0 +1,146 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// ArrayStack: A stack, implemented as a growable array
+
+template <class T>
+class ArrayStack
+{
+    static const int builtinSize = 8;
+
+public:
+    ArrayStack(Compiler* comp, int initialSize = builtinSize)
+    {
+        compiler = comp;
+
+        if (initialSize > builtinSize)
+        {
+            maxIndex = initialSize;
+            data     = new (compiler, CMK_ArrayStack) T[initialSize];
+        }
+        else
+        {
+            maxIndex = builtinSize;
+            data     = builtinData;
+        }
+
+        tosIndex = 0;
+    }
+
+    void Push(T item)
+    {
+        if (tosIndex == maxIndex)
+        {
+            Realloc();
+        }
+
+        data[tosIndex] = item;
+        tosIndex++;
+    }
+
+    void Realloc()
+    {
+        // get a new chunk 2x the size of the old one
+        // and copy over
+        T* oldData = data;
+        noway_assert(maxIndex * 2 > maxIndex);
+        data = new (compiler, CMK_ArrayStack) T[maxIndex * 2];
+        for (int i = 0; i < maxIndex; i++)
+        {
+            data[i] = oldData[i];
+        }
+        maxIndex *= 2;
+    }
+
+    // reverse the top N in the stack
+    void ReverseTop(int number)
+    {
+        if (number < 2)
+        {
+            return;
+        }
+
+        assert(number <= tosIndex);
+
+        int start  = tosIndex - number;
+        int offset = 0;
+        while (offset < number / 2)
+        {
+            T   temp;
+            int index        = start + offset;
+            int otherIndex   = tosIndex - 1 - offset;
+            temp             = data[index];
+            data[index]      = data[otherIndex];
+            data[otherIndex] = temp;
+
+            offset++;
+        }
+    }
+
+    T Pop()
+    {
+        assert(tosIndex > 0);
+        tosIndex--;
+        return data[tosIndex];
+    }
+
+    T Top()
+    {
+        assert(tosIndex > 0);
+        return data[tosIndex - 1];
+    }
+
+    T& TopRef()
+    {
+        assert(tosIndex > 0);
+        return data[tosIndex - 1];
+    }
+
+    // return the i'th from the top
+    T Index(int idx)
+    {
+        assert(tosIndex > idx);
+        return data[tosIndex - 1 - idx];
+    }
+
+    // return a reference to the i'th from the top
+    T& IndexRef(int idx)
+    {
+        assert(tosIndex > idx);
+        return data[tosIndex - 1 - idx];
+    }
+
+    int Height()
+    {
+        return tosIndex;
+    }
+
+    // return the bottom of the stack
+    T Bottom()
+    {
+        assert(tosIndex > 0);
+        return data[0];
+    }
+
+    // return the i'th from the bottom
+    T Bottom(int indx)
+    {
+        assert(tosIndex > indx);
+        return data[indx];
+    }
+
+    void Reset()
+    {
+        tosIndex = 0;
+    }
+
+private:
+    Compiler* compiler; // needed for allocation
+    int       tosIndex; // first free location
+    int       maxIndex;
+    T*        data;
+    // initial allocation
+    T builtinData[builtinSize];
+};
diff --git a/src/jit/assertionprop.cpp b/src/jit/assertionprop.cpp
new file mode 100644
index 0000000000..fe35c3b780
--- /dev/null
+++ b/src/jit/assertionprop.cpp
@@ -0,0 +1,5142 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                          AssertionProp                                    XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+/*****************************************************************************
+ *
+ *  Helper passed to Compiler::fgWalkTreePre() to find the Asgn node for optAddCopies()
+ */
+
+/* static */
+Compiler::fgWalkResult Compiler::optAddCopiesCallback(GenTreePtr* pTree, fgWalkData* data)
+{
+    GenTreePtr tree = *pTree;
+
+    if (tree->OperKind() & GTK_ASGOP)
+    {
+        GenTreePtr op1  = tree->gtOp.gtOp1;
+        Compiler*  comp = data->compiler;
+
+        if ((op1->gtOper == GT_LCL_VAR) && (op1->gtLclVarCommon.gtLclNum == comp->optAddCopyLclNum))
+        {
+            comp->optAddCopyAsgnNode = tree;
+            return WALK_ABORT;
+        }
+    }
+    return WALK_CONTINUE;
+}
+
+/*****************************************************************************
+ *
+ *  Add new copies before Assertion Prop.
+ */
+
+void Compiler::optAddCopies()
+{
+    unsigned   lclNum;
+    LclVarDsc* varDsc;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\n*************** In optAddCopies()\n\n");
+    }
+    if (verboseTrees)
+    {
+        printf("Blocks/Trees at start of phase\n");
+        fgDispBasicBlocks(true);
+    }
+#endif
+
+    // Don't add any copies if we have reached the tracking limit.
+    if (lvaHaveManyLocals())
+    {
+        return;
+    }
+
+    for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+    {
+        var_types typ = varDsc->TypeGet();
+
+        // We only add copies for non temp local variables
+        // that have a single def and that can possibly be enregistered
+
+        if (varDsc->lvIsTemp || !varDsc->lvSingleDef || !varTypeCanReg(typ))
+        {
+            continue;
+        }
+
+        /* For lvNormalizeOnLoad(), we need to add a cast to the copy-assignment
+           like "copyLclNum = int(varDsc)" and optAssertionGen() only
+           tracks simple assignments. The same goes for lvNormalizedOnStore as
+           the cast is generated in fgMorphSmpOpAsg. This boils down to not having
+           a copy until optAssertionGen handles this*/
+        if (varDsc->lvNormalizeOnLoad() || varDsc->lvNormalizeOnStore() || typ == TYP_BOOL)
+        {
+            continue;
+        }
+
+        if (varTypeIsSmall(varDsc->TypeGet()) || typ == TYP_BOOL)
+        {
+            continue;
+        }
+
+        // If locals must be initialized to zero, that initialization counts as a second definition.
+        // VB in particular allows usage of variables not explicitly initialized.
+        // Note that this effectively disables this optimization for all local variables
+        // as C# sets InitLocals all the time starting in Whidbey.
+
+        if (!varDsc->lvIsParam && info.compInitMem)
+        {
+            continue;
+        }
+
+        // On x86 we may want to add a copy for an incoming double parameter
+        // because we can ensure that the copy we make is double aligned
+        // where as we can never ensure the alignment of an incoming double parameter
+        //
+        // On all other platforms we will never need to make a copy
+        // for an incoming double parameter
+
+        bool isFloatParam = false;
+
+#ifdef _TARGET_X86_
+        isFloatParam = varDsc->lvIsParam && varTypeIsFloating(typ);
+#endif
+
+        if (!isFloatParam && !varDsc->lvVolatileHint)
+        {
+            continue;
+        }
+
+        // We don't want to add a copy for a variable that is part of a struct
+        if (varDsc->lvIsStructField)
+        {
+            continue;
+        }
+
+        // We require that the weighted ref count be significant.
+        if (varDsc->lvRefCntWtd <= (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT / 2))
+        {
+            continue;
+        }
+
+        // For parameters, we only want to add a copy for the heavier-than-average
+        // uses instead of adding a copy to cover every single use.
+        // 'paramImportantUseDom' is the set of blocks that dominate the
+        // heavier-than-average uses of a parameter.
+        // Initial value is all blocks.
+
+        BlockSet BLOCKSET_INIT_NOCOPY(paramImportantUseDom, BlockSetOps::MakeFull(this));
+
+        // This will be threshold for determining heavier-than-average uses
+        unsigned paramAvgWtdRefDiv2 = (varDsc->lvRefCntWtd + varDsc->lvRefCnt / 2) / (varDsc->lvRefCnt * 2);
+
+        bool paramFoundImportantUse = false;
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("Trying to add a copy for V%02u %s, avg_wtd = %s\n", lclNum,
+                   varDsc->lvIsParam ? "an arg" : "a local", refCntWtd2str(paramAvgWtdRefDiv2));
+        }
+#endif
+
+        //
+        // We must have a ref in a block that is dominated only by the entry block
+        //
+
+        if (BlockSetOps::MayBeUninit(varDsc->lvRefBlks))
+        {
+            // No references
+            continue;
+        }
+
+        bool isDominatedByFirstBB = false;
+
+        BLOCKSET_ITER_INIT(this, iter, varDsc->lvRefBlks, blkNum);
+        while (iter.NextElem(this, &blkNum))
+        {
+            /* Find the block 'blkNum' */
+            BasicBlock* block = fgFirstBB;
+            while (block && (block->bbNum != blkNum))
+            {
+                block = block->bbNext;
+            }
+            noway_assert(block && (block->bbNum == blkNum));
+
+            bool     importantUseInBlock = (varDsc->lvIsParam) && (block->getBBWeight(this) > paramAvgWtdRefDiv2);
+            bool     isPreHeaderBlock    = ((block->bbFlags & BBF_LOOP_PREHEADER) != 0);
+            BlockSet BLOCKSET_INIT_NOCOPY(blockDom, BlockSetOps::UninitVal());
+            BlockSet BLOCKSET_INIT_NOCOPY(blockDomSub0, BlockSetOps::UninitVal());
+
+            if (block->bbIDom == nullptr && isPreHeaderBlock)
+            {
+                // Loop Preheader blocks that we insert will have a bbDom set that is nullptr
+                // but we can instead use the bNext successor block's dominator information
+                noway_assert(block->bbNext != nullptr);
+                BlockSetOps::AssignNoCopy(this, blockDom, fgGetDominatorSet(block->bbNext));
+            }
+            else
+            {
+                BlockSetOps::AssignNoCopy(this, blockDom, fgGetDominatorSet(block));
+            }
+
+            if (!BlockSetOps::IsEmpty(this, blockDom))
+            {
+                BlockSetOps::Assign(this, blockDomSub0, blockDom);
+                if (isPreHeaderBlock)
+                {
+                    // We must clear bbNext block number from the dominator set
+                    BlockSetOps::RemoveElemD(this, blockDomSub0, block->bbNext->bbNum);
+                }
+                /* Is this block dominated by fgFirstBB? */
+                if (BlockSetOps::IsMember(this, blockDomSub0, fgFirstBB->bbNum))
+                {
+                    isDominatedByFirstBB = true;
+                }
+            }
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("        Referenced in BB%02u, bbWeight is %s", blkNum, refCntWtd2str(block->getBBWeight(this)));
+
+                if (isDominatedByFirstBB)
+                {
+                    printf(", which is dominated by BB01");
+                }
+
+                if (importantUseInBlock)
+                {
+                    printf(", ImportantUse");
+                }
+
+                printf("\n");
+            }
+#endif
+
+            /* If this is a heavier-than-average block, then track which
+               blocks dominate this use of the parameter. */
+            if (importantUseInBlock)
+            {
+                paramFoundImportantUse = true;
+                BlockSetOps::IntersectionD(this, paramImportantUseDom,
+                                           blockDomSub0); // Clear blocks that do not dominate
+            }
+        }
+
+        // We should have found at least one heavier-than-averageDiv2 block.
+        if (varDsc->lvIsParam)
+        {
+            if (!paramFoundImportantUse)
+            {
+                continue;
+            }
+        }
+
+        // For us to add a new copy:
+        // we require that we have a floating point parameter
+        // or a lvVolatile variable that is always reached from the first BB
+        // and we have at least one block available in paramImportantUseDom
+        //
+        bool doCopy = (isFloatParam || (isDominatedByFirstBB && varDsc->lvVolatileHint)) &&
+                      !BlockSetOps::IsEmpty(this, paramImportantUseDom);
+
+        // Under stress mode we expand the number of candidates
+        // to include parameters of any type
+        // or any variable that is always reached from the first BB
+        //
+        if (compStressCompile(STRESS_GENERIC_VARN, 30))
+        {
+            // Ensure that we preserve the invariants required by the subsequent code.
+            if (varDsc->lvIsParam || isDominatedByFirstBB)
+            {
+                doCopy = true;
+            }
+        }
+
+        if (!doCopy)
+        {
+            continue;
+        }
+
+        GenTreePtr stmt;
+        unsigned   copyLclNum = lvaGrabTemp(false DEBUGARG("optAddCopies"));
+
+        // Because lvaGrabTemp may have reallocated the lvaTable, ensure varDsc
+        // is still in sync with lvaTable[lclNum];
+        varDsc = &lvaTable[lclNum];
+
+        // Set lvType on the new Temp Lcl Var
+        lvaTable[copyLclNum].lvType = typ;
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\n    Finding the best place to insert the assignment V%02i=V%02i\n", copyLclNum, lclNum);
+        }
+#endif
+
+        if (varDsc->lvIsParam)
+        {
+            noway_assert(varDsc->lvDefStmt == nullptr || varDsc->lvIsStructField);
+
+            // Create a new copy assignment tree
+            GenTreePtr copyAsgn = gtNewTempAssign(copyLclNum, gtNewLclvNode(lclNum, typ));
+
+            /* Find the best block to insert the new assignment     */
+            /* We will choose the lowest weighted block, and within */
+            /* those block, the highest numbered block which        */
+            /* dominates all the uses of the local variable         */
+
+            /* Our default is to use the first block */
+            BasicBlock* bestBlock  = fgFirstBB;
+            unsigned    bestWeight = bestBlock->getBBWeight(this);
+            BasicBlock* block      = bestBlock;
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("        Starting at BB%02u, bbWeight is %s", block->bbNum,
+                       refCntWtd2str(block->getBBWeight(this)));
+
+                printf(", bestWeight is %s\n", refCntWtd2str(bestWeight));
+            }
+#endif
+
+            /* We have already calculated paramImportantUseDom above. */
+
+            BLOCKSET_ITER_INIT(this, iter, paramImportantUseDom, blkNum);
+            while (iter.NextElem(this, &blkNum))
+            {
+                /* Advance block to point to 'blkNum' */
+                /* This assumes that the iterator returns block number is increasing lexical order. */
+                while (block && (block->bbNum != blkNum))
+                {
+                    block = block->bbNext;
+                }
+                noway_assert(block && (block->bbNum == blkNum));
+
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("        Considering BB%02u, bbWeight is %s", block->bbNum,
+                           refCntWtd2str(block->getBBWeight(this)));
+
+                    printf(", bestWeight is %s\n", refCntWtd2str(bestWeight));
+                }
+#endif
+
+                // Does this block have a smaller bbWeight value?
+                if (block->getBBWeight(this) > bestWeight)
+                {
+#ifdef DEBUG
+                    if (verbose)
+                    {
+                        printf("bbWeight too high\n");
+                    }
+#endif
+                    continue;
+                }
+
+                // Don't use blocks that are exception handlers because
+                // inserting a new first statement will interface with
+                // the CATCHARG
+
+                if (handlerGetsXcptnObj(block->bbCatchTyp))
+                {
+#ifdef DEBUG
+                    if (verbose)
+                    {
+                        printf("Catch block\n");
+                    }
+#endif
+                    continue;
+                }
+
+                // Don't use the BBJ_ALWAYS block marked with BBF_KEEP_BBJ_ALWAYS. These
+                // are used by EH code. The JIT can not generate code for such a block.
+
+                if (block->bbFlags & BBF_KEEP_BBJ_ALWAYS)
+                {
+#if FEATURE_EH_FUNCLETS
+                    // With funclets, this is only used for BBJ_CALLFINALLY/BBJ_ALWAYS pairs. For x86, it is also used
+                    // as the "final step" block for leaving finallys.
+                    assert((block->bbPrev != nullptr) && block->bbPrev->isBBCallAlwaysPair());
+#endif // FEATURE_EH_FUNCLETS
+#ifdef DEBUG
+                    if (verbose)
+                    {
+                        printf("Internal EH BBJ_ALWAYS block\n");
+                    }
+#endif
+                    continue;
+                }
+
+                // This block will be the new candidate for the insert point
+                // for the new assignment
+                CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("new bestBlock\n");
+                }
+#endif
+
+                bestBlock  = block;
+                bestWeight = block->getBBWeight(this);
+            }
+
+            // If there is a use of the variable in this block
+            // then we insert the assignment at the beginning
+            // otherwise we insert the statement at the end
+            CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("        Insert copy at the %s of BB%02u\n",
+                       (BlockSetOps::IsEmpty(this, paramImportantUseDom) ||
+                        BlockSetOps::IsMember(this, varDsc->lvRefBlks, bestBlock->bbNum))
+                           ? "start"
+                           : "end",
+                       bestBlock->bbNum);
+            }
+#endif
+
+            if (BlockSetOps::IsEmpty(this, paramImportantUseDom) ||
+                BlockSetOps::IsMember(this, varDsc->lvRefBlks, bestBlock->bbNum))
+            {
+                stmt = fgInsertStmtAtBeg(bestBlock, copyAsgn);
+            }
+            else
+            {
+                stmt = fgInsertStmtNearEnd(bestBlock, copyAsgn);
+            }
+
+            /* Increment its lvRefCnt and lvRefCntWtd */
+            lvaTable[lclNum].incRefCnts(fgFirstBB->getBBWeight(this), this);
+
+            /* Increment its lvRefCnt and lvRefCntWtd */
+            lvaTable[copyLclNum].incRefCnts(fgFirstBB->getBBWeight(this), this);
+        }
+        else
+        {
+            noway_assert(varDsc->lvDefStmt != nullptr);
+
+            /* Locate the assignment to varDsc in the lvDefStmt */
+            stmt = varDsc->lvDefStmt;
+            noway_assert(stmt->gtOper == GT_STMT);
+
+            optAddCopyLclNum   = lclNum;  // in
+            optAddCopyAsgnNode = nullptr; // out
+
+            fgWalkTreePre(&stmt->gtStmt.gtStmtExpr, Compiler::optAddCopiesCallback, (void*)this, false);
+
+            noway_assert(optAddCopyAsgnNode);
+
+            GenTreePtr tree = optAddCopyAsgnNode;
+            GenTreePtr op1  = tree->gtOp.gtOp1;
+
+            noway_assert(tree && op1 && (tree->OperKind() & GTK_ASGOP) && (op1->gtOper == GT_LCL_VAR) &&
+                         (op1->gtLclVarCommon.gtLclNum == lclNum));
+
+            /*  TODO-Review: BB_UNITY_WEIGHT is not the correct block weight */
+            unsigned blockWeight = BB_UNITY_WEIGHT;
+
+            /* Increment its lvRefCnt and lvRefCntWtd twice */
+            lvaTable[copyLclNum].incRefCnts(blockWeight, this);
+            lvaTable[copyLclNum].incRefCnts(blockWeight, this);
+
+            /* Assign the old expression into the new temp */
+
+            GenTreePtr newAsgn = gtNewTempAssign(copyLclNum, tree->gtOp.gtOp2);
+
+            /* Copy the new temp to op1 */
+
+            GenTreePtr copyAsgn = gtNewAssignNode(op1, gtNewLclvNode(copyLclNum, typ));
+
+            /* Change the tree to a GT_COMMA with the two assignments as child nodes */
+
+            tree->gtBashToNOP();
+            tree->ChangeOper(GT_COMMA);
+
+            tree->gtOp.gtOp1 = newAsgn;
+            tree->gtOp.gtOp2 = copyAsgn;
+
+            tree->gtFlags |= (newAsgn->gtFlags & GTF_ALL_EFFECT);
+            tree->gtFlags |= (copyAsgn->gtFlags & GTF_ALL_EFFECT);
+        }
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\nIntroducing a new copy for V%02u\n", lclNum);
+            gtDispTree(stmt->gtStmt.gtStmtExpr);
+            printf("\n");
+        }
+#endif
+    }
+}
+
+//------------------------------------------------------------------------------
+// GetAssertionDep: Retrieve the assertions on this local variable
+//
+// Arguments:
+//    lclNum - The local var id.
+//
+// Return Value:
+//    The dependent assertions (assertions using the value of the local var)
+//    of the local var.
+//
+
+ASSERT_TP& Compiler::GetAssertionDep(unsigned lclNum)
+{
+    ExpandArray<ASSERT_TP>& dep = *optAssertionDep;
+    if (dep[lclNum] == nullptr)
+    {
+        dep[lclNum] = optNewEmptyAssertSet();
+    }
+    return dep[lclNum];
+}
+
+/*****************************************************************************
+ *
+ *  Initialize the assertion prop bitset traits and the default bitsets.
+ */
+
+void Compiler::optAssertionTraitsInit(AssertionIndex assertionCount)
+{
+    apTraits = new (getAllocator()) BitVecTraits(assertionCount, this);
+    apFull   = BitVecOps::UninitVal();
+    apEmpty  = BitVecOps::UninitVal();
+    BitVecOps::AssignNoCopy(apTraits, apFull, BitVecOps::MakeFull(apTraits));
+    BitVecOps::AssignNoCopy(apTraits, apEmpty, BitVecOps::MakeEmpty(apTraits));
+}
+
+/*****************************************************************************
+ *
+ *  Initialize the assertion prop tracking logic.
+ */
+
+void Compiler::optAssertionInit(bool isLocalProp)
+{
+    // Use a function countFunc to determine a proper maximum assertion count for the
+    // method being compiled. The function is linear to the IL size for small and
+    // moderate methods. For large methods, considering throughput impact, we track no
+    // more than 64 assertions.
+    // Note this tracks at most only 256 assertions.
+    static const AssertionIndex countFunc[] = {64, 128, 256, 64};
+    static const unsigned       lowerBound  = 0;
+    static const unsigned       upperBound  = sizeof(countFunc) / sizeof(countFunc[0]) - 1;
+    const unsigned              codeSize    = info.compILCodeSize / 512;
+    optMaxAssertionCount                    = countFunc[isLocalProp ? lowerBound : min(upperBound, codeSize)];
+
+    optLocalAssertionProp  = isLocalProp;
+    optAssertionTabPrivate = new (getAllocator()) AssertionDsc[optMaxAssertionCount];
+    optComplementaryAssertionMap =
+        new (getAllocator()) AssertionIndex[optMaxAssertionCount](); // zero-inited (NO_ASSERTION_INDEX.)
+    assert(NO_ASSERTION_INDEX == 0);
+
+    if (!isLocalProp)
+    {
+        optValueNumToAsserts = new (getAllocator()) ValueNumToAssertsMap(getAllocator());
+    }
+
+    if (optAssertionDep == nullptr)
+    {
+        optAssertionDep = new (getAllocator()) ExpandArray<ASSERT_TP>(getAllocator(), max(1, lvaCount));
+    }
+
+    optAssertionTraitsInit(optMaxAssertionCount);
+    optAssertionCount      = 0;
+    optAssertionPropagated = false;
+    bbJtrueAssertionOut    = nullptr;
+}
+
+#ifdef DEBUG
+void Compiler::optPrintAssertion(AssertionDsc* curAssertion, AssertionIndex assertionIndex /* =0 */)
+{
+    if (curAssertion->op1.kind == O1K_EXACT_TYPE)
+    {
+        printf("Type     ");
+    }
+    else if (curAssertion->op1.kind == O1K_ARR_BND)
+    {
+        printf("ArrBnds  ");
+    }
+    else if (curAssertion->op1.kind == O1K_SUBTYPE)
+    {
+        printf("Subtype  ");
+    }
+    else if (curAssertion->op2.kind == O2K_LCLVAR_COPY)
+    {
+        printf("Copy     ");
+    }
+    else if ((curAssertion->op2.kind == O2K_CONST_INT) || (curAssertion->op2.kind == O2K_CONST_LONG) ||
+             (curAssertion->op2.kind == O2K_CONST_DOUBLE))
+    {
+        printf("Constant ");
+    }
+    else if (curAssertion->op2.kind == O2K_SUBRANGE)
+    {
+        printf("Subrange ");
+    }
+    else
+    {
+        printf("?assertion classification? ");
+    }
+    printf("Assertion: ");
+    if (!optLocalAssertionProp)
+    {
+        printf("(%d, %d) ", curAssertion->op1.vn, curAssertion->op2.vn);
+    }
+
+    if (!optLocalAssertionProp)
+    {
+        printf("(" STR_VN "%x," STR_VN "%x) ", curAssertion->op1.vn, curAssertion->op2.vn);
+    }
+
+    if ((curAssertion->op1.kind == O1K_LCLVAR) || (curAssertion->op1.kind == O1K_EXACT_TYPE) ||
+        (curAssertion->op1.kind == O1K_SUBTYPE))
+    {
+        printf("V%02u", curAssertion->op1.lcl.lclNum);
+        if (curAssertion->op1.lcl.ssaNum != SsaConfig::RESERVED_SSA_NUM)
+        {
+            printf(".%02u", curAssertion->op1.lcl.ssaNum);
+        }
+    }
+    else if (curAssertion->op1.kind == O1K_ARR_BND)
+    {
+        printf("[idx:");
+        vnStore->vnDump(this, curAssertion->op1.bnd.vnIdx);
+        printf(";len:");
+        vnStore->vnDump(this, curAssertion->op1.bnd.vnLen);
+        printf("]");
+    }
+    else if (curAssertion->op1.kind == O1K_ARRLEN_OPER_BND)
+    {
+        printf("Oper_Bnd");
+        vnStore->vnDump(this, curAssertion->op1.vn);
+    }
+    else if (curAssertion->op1.kind == O1K_ARRLEN_LOOP_BND)
+    {
+        printf("Loop_Bnd");
+        vnStore->vnDump(this, curAssertion->op1.vn);
+    }
+    else if (curAssertion->op1.kind == O1K_CONSTANT_LOOP_BND)
+    {
+        printf("Loop_Bnd");
+        vnStore->vnDump(this, curAssertion->op1.vn);
+    }
+    else if (curAssertion->op1.kind == O1K_VALUE_NUMBER)
+    {
+        printf("Value_Number");
+        vnStore->vnDump(this, curAssertion->op1.vn);
+    }
+    else
+    {
+        printf("?op1.kind?");
+    }
+
+    if (curAssertion->assertionKind == OAK_SUBRANGE)
+    {
+        printf(" in ");
+    }
+    else if (curAssertion->assertionKind == OAK_EQUAL)
+    {
+        if (curAssertion->op1.kind == O1K_LCLVAR)
+        {
+            printf(" == ");
+        }
+        else
+        {
+            printf(" is ");
+        }
+    }
+    else if (curAssertion->assertionKind == OAK_NO_THROW)
+    {
+        printf(" in range ");
+    }
+    else if (curAssertion->assertionKind == OAK_NOT_EQUAL)
+    {
+        if (curAssertion->op1.kind == O1K_LCLVAR)
+        {
+            printf(" != ");
+        }
+        else
+        {
+            printf(" is not ");
+        }
+    }
+    else
+    {
+        printf(" ?assertionKind? ");
+    }
+
+    if (curAssertion->op1.kind != O1K_ARR_BND)
+    {
+        switch (curAssertion->op2.kind)
+        {
+            case O2K_LCLVAR_COPY:
+                printf("V%02u", curAssertion->op2.lcl.lclNum);
+                if (curAssertion->op1.lcl.ssaNum != SsaConfig::RESERVED_SSA_NUM)
+                {
+                    printf(".%02u", curAssertion->op1.lcl.ssaNum);
+                }
+                break;
+
+            case O2K_CONST_INT:
+            case O2K_IND_CNS_INT:
+                if (curAssertion->op1.kind == O1K_EXACT_TYPE)
+                {
+                    printf("Exact Type MT(%08X)", dspPtr(curAssertion->op2.u1.iconVal));
+                    assert(curAssertion->op2.u1.iconFlags != 0);
+                }
+                else if (curAssertion->op1.kind == O1K_SUBTYPE)
+                {
+                    printf("MT(%08X)", dspPtr(curAssertion->op2.u1.iconVal));
+                    assert(curAssertion->op2.u1.iconFlags != 0);
+                }
+                else if (curAssertion->op1.kind == O1K_ARRLEN_OPER_BND)
+                {
+                    assert(!optLocalAssertionProp);
+                    vnStore->vnDump(this, curAssertion->op2.vn);
+                }
+                else if (curAssertion->op1.kind == O1K_ARRLEN_LOOP_BND)
+                {
+                    assert(!optLocalAssertionProp);
+                    vnStore->vnDump(this, curAssertion->op2.vn);
+                }
+                else if (curAssertion->op1.kind == O1K_CONSTANT_LOOP_BND)
+                {
+                    assert(!optLocalAssertionProp);
+                    vnStore->vnDump(this, curAssertion->op2.vn);
+                }
+                else
+                {
+                    var_types op1Type;
+
+                    if (curAssertion->op1.kind == O1K_VALUE_NUMBER)
+                    {
+                        op1Type = vnStore->TypeOfVN(curAssertion->op1.vn);
+                    }
+                    else
+                    {
+                        unsigned lclNum = curAssertion->op1.lcl.lclNum;
+                        assert(lclNum < lvaCount);
+                        LclVarDsc* varDsc = lvaTable + lclNum;
+                        op1Type           = varDsc->lvType;
+                    }
+
+                    if (op1Type == TYP_REF)
+                    {
+                        assert(curAssertion->op2.u1.iconVal == 0);
+                        printf("null");
+                    }
+                    else
+                    {
+                        if ((curAssertion->op2.u1.iconFlags & GTF_ICON_HDL_MASK) != 0)
+                        {
+                            printf("[%08p]", dspPtr(curAssertion->op2.u1.iconVal));
+                        }
+                        else
+                        {
+                            printf("%d", curAssertion->op2.u1.iconVal);
+                        }
+                    }
+                }
+                break;
+
+            case O2K_CONST_LONG:
+                printf("0x%016llx", curAssertion->op2.lconVal);
+                break;
+
+            case O2K_CONST_DOUBLE:
+                if (*((__int64*)&curAssertion->op2.dconVal) == (__int64)I64(0x8000000000000000))
+                {
+                    printf("-0.00000");
+                }
+                else
+                {
+                    printf("%#lg", curAssertion->op2.dconVal);
+                }
+                break;
+
+            case O2K_SUBRANGE:
+                printf("[%d..%d]", curAssertion->op2.u2.loBound, curAssertion->op2.u2.hiBound);
+                break;
+
+            default:
+                printf("?op2.kind?");
+                break;
+        }
+    }
+
+    if (assertionIndex > 0)
+    {
+        printf(" index=#%02u, mask=", assertionIndex);
+
+        // This is an hack to reuse a known empty set in order to display
+        // a single bit mask.
+        BitVecOps::AddElemD(apTraits, apEmpty, assertionIndex - 1);
+        printf("%s", BitVecOps::ToString(apTraits, apEmpty));
+        BitVecOps::RemoveElemD(apTraits, apEmpty, assertionIndex - 1);
+    }
+    printf("\n");
+}
+#endif // DEBUG
+
+/******************************************************************************
+ *
+ * Helper to retrieve the "assertIndex" assertion. Note that assertIndex 0
+ * is NO_ASSERTION_INDEX and "optAssertionCount" is the last valid index.
+ *
+ */
+Compiler::AssertionDsc* Compiler::optGetAssertion(AssertionIndex assertIndex)
+{
+    assert(NO_ASSERTION_INDEX == 0);
+    noway_assert(assertIndex != NO_ASSERTION_INDEX);
+    noway_assert(assertIndex <= optAssertionCount);
+    AssertionDsc* assertion = &optAssertionTabPrivate[assertIndex - 1];
+#ifdef DEBUG
+    optDebugCheckAssertion(assertion);
+#endif
+
+    return assertion;
+}
+
+/*****************************************************************************
+ *
+ * A simple helper routine so not all callers need to supply a AssertionDsc*
+ * if they don't care about it. Refer overloaded method optCreateAssertion.
+ *
+ */
+Compiler::AssertionIndex Compiler::optCreateAssertion(GenTreePtr op1, GenTreePtr op2, optAssertionKind assertionKind)
+{
+    AssertionDsc assertionDsc;
+    return optCreateAssertion(op1, op2, assertionKind, &assertionDsc);
+}
+
+/*****************************************************************************
+ *
+ *  We attempt to create the following assertion:
+ *
+ *     op1 assertionKind op2
+ *
+ *  If we can create the assertion then update 'assertion' if we are
+ *  unsuccessful assertion->assertionKind will be OAK_INVALID. If we are
+ *  successful in creating the assertion we call optAddAssertion which adds
+ *  the assertion to our assertion table.
+ *
+ *  If we are able to create the assertion the return value is the
+ *  assertionIndex for this assertion otherwise the return value is
+ *  NO_ASSERTION_INDEX and we could not create the assertion.
+ *
+ */
+Compiler::AssertionIndex Compiler::optCreateAssertion(GenTreePtr       op1,
+                                                      GenTreePtr       op2,
+                                                      optAssertionKind assertionKind,
+                                                      AssertionDsc*    assertion)
+{
+    memset(assertion, 0, sizeof(AssertionDsc));
+    //
+    // If we cannot create an assertion using op1 and op2 then the assertionKind
+    // must be OAK_INVALID, so we initialize it to OAK_INVALID and only change it
+    // to a valid assertion when everything is good.
+    //
+    assertion->assertionKind = OAK_INVALID;
+    bool      haveArgs       = false;
+    var_types toType;
+
+    if (op1->gtOper == GT_ARR_BOUNDS_CHECK)
+    {
+        if (assertionKind == OAK_NO_THROW)
+        {
+            GenTreeBoundsChk* arrBndsChk = op1->AsBoundsChk();
+            assertion->assertionKind     = assertionKind;
+            assertion->op1.kind          = O1K_ARR_BND;
+            assertion->op1.bnd.vnIdx     = arrBndsChk->gtIndex->gtVNPair.GetConservative();
+            assertion->op1.bnd.vnLen     = arrBndsChk->gtArrLen->gtVNPair.GetConservative();
+            goto DONE_ASSERTION;
+        }
+    }
+
+    //
+    // Did we receive Helper call args?
+    //
+    if (op1->gtOper == GT_LIST)
+    {
+        if (op2->gtOper != GT_LIST)
+        {
+            goto DONE_ASSERTION; // Don't make an assertion
+        }
+        op1      = op1->gtOp.gtOp1;
+        op2      = op2->gtOp.gtOp1;
+        haveArgs = true;
+    }
+
+    //
+    // Are we trying to make a non-null assertion?
+    //
+    if (op2 == nullptr)
+    {
+        assert(haveArgs == false);
+        //
+        // Must an OAK_NOT_EQUAL assertion
+        //
+        noway_assert(assertionKind == OAK_NOT_EQUAL);
+
+        //
+        // Set op1 to the instance pointer of the indirection
+        //
+
+        ssize_t offset = 0;
+        while ((op1->gtOper == GT_ADD) && (op1->gtType == TYP_BYREF))
+        {
+            if (op1->gtGetOp2()->IsCnsIntOrI())
+            {
+                offset += op1->gtGetOp2()->gtIntCon.gtIconVal;
+                op1 = op1->gtGetOp1();
+            }
+            else if (op1->gtGetOp1()->IsCnsIntOrI())
+            {
+                offset += op1->gtGetOp1()->gtIntCon.gtIconVal;
+                op1 = op1->gtGetOp2();
+            }
+            else
+            {
+                break;
+            }
+        }
+
+        if (fgIsBigOffset(offset) || op1->gtOper != GT_LCL_VAR)
+        {
+            goto DONE_ASSERTION; // Don't make an assertion
+        }
+
+        unsigned lclNum = op1->gtLclVarCommon.gtLclNum;
+        noway_assert(lclNum < lvaCount);
+        LclVarDsc* lclVar = &lvaTable[lclNum];
+
+        ValueNum vn;
+
+        //
+        // We only perform null-checks on GC refs
+        // so only make non-null assertions about GC refs
+        //
+        if (lclVar->TypeGet() != TYP_REF)
+        {
+            if (optLocalAssertionProp || (lclVar->TypeGet() != TYP_BYREF))
+            {
+                goto DONE_ASSERTION; // Don't make an assertion
+            }
+
+            vn = op1->gtVNPair.GetConservative();
+            VNFuncApp funcAttr;
+
+            // Try to get value number corresponding to the GC ref of the indirection
+            while (vnStore->GetVNFunc(vn, &funcAttr) && (funcAttr.m_func == (VNFunc)GT_ADD) &&
+                   (vnStore->TypeOfVN(vn) == TYP_BYREF))
+            {
+                if (vnStore->IsVNConstant(funcAttr.m_args[1]))
+                {
+                    offset += vnStore->CoercedConstantValue<ssize_t>(funcAttr.m_args[1]);
+                    vn = funcAttr.m_args[0];
+                }
+                else if (vnStore->IsVNConstant(funcAttr.m_args[0]))
+                {
+                    offset += vnStore->CoercedConstantValue<ssize_t>(funcAttr.m_args[0]);
+                    vn = funcAttr.m_args[1];
+                }
+                else
+                {
+                    break;
+                }
+            }
+
+            if (fgIsBigOffset(offset) || (vnStore->TypeOfVN(vn) != TYP_REF))
+            {
+                goto DONE_ASSERTION; // Don't make an assertion
+            }
+
+            assertion->op1.kind = O1K_VALUE_NUMBER;
+        }
+        else
+        {
+            //  If the local variable has its address exposed then bail
+            if (lclVar->lvAddrExposed)
+            {
+                goto DONE_ASSERTION; // Don't make an assertion
+            }
+
+            assertion->op1.kind       = O1K_LCLVAR;
+            assertion->op1.lcl.lclNum = lclNum;
+            assertion->op1.lcl.ssaNum = op1->AsLclVarCommon()->GetSsaNum();
+            vn                        = op1->gtVNPair.GetConservative();
+        }
+
+        assertion->op1.vn           = vn;
+        assertion->assertionKind    = assertionKind;
+        assertion->op2.kind         = O2K_CONST_INT;
+        assertion->op2.vn           = ValueNumStore::VNForNull();
+        assertion->op2.u1.iconVal   = 0;
+        assertion->op2.u1.iconFlags = 0;
+#ifdef _TARGET_64BIT_
+        assertion->op2.u1.iconFlags |= 1; // Signify that this is really TYP_LONG
+#endif                                    // _TARGET_64BIT_
+    }
+    //
+    // Are we making an assertion about a local variable?
+    //
+    else if (op1->gtOper == GT_LCL_VAR)
+    {
+        unsigned lclNum = op1->gtLclVarCommon.gtLclNum;
+        noway_assert(lclNum < lvaCount);
+        LclVarDsc* lclVar = &lvaTable[lclNum];
+
+        //  If the local variable has its address exposed then bail
+        if (lclVar->lvAddrExposed)
+        {
+            goto DONE_ASSERTION; // Don't make an assertion
+        }
+
+        if (haveArgs)
+        {
+            //
+            // Must either be an OAK_EQUAL or an OAK_NOT_EQUAL assertion
+            //
+            if ((assertionKind != OAK_EQUAL) && (assertionKind != OAK_NOT_EQUAL))
+            {
+                goto DONE_ASSERTION; // Don't make an assertion
+            }
+
+            if (op2->gtOper == GT_IND)
+            {
+                op2                 = op2->gtOp.gtOp1;
+                assertion->op2.kind = O2K_IND_CNS_INT;
+            }
+            else
+            {
+                assertion->op2.kind = O2K_CONST_INT;
+            }
+
+            if (op2->gtOper != GT_CNS_INT)
+            {
+                goto DONE_ASSERTION; // Don't make an assertion
+            }
+
+            //
+            // TODO-CQ: Check for Sealed class and change kind to O1K_EXACT_TYPE
+            //          And consider the special cases, like CORINFO_FLG_SHAREDINST or CORINFO_FLG_VARIANCE
+            //          where a class can be sealed, but they don't behave as exact types because casts to
+            //          non-base types sometimes still succeed.
+            //
+            assertion->op1.kind         = O1K_SUBTYPE;
+            assertion->op1.lcl.lclNum   = lclNum;
+            assertion->op1.vn           = op1->gtVNPair.GetConservative();
+            assertion->op1.lcl.ssaNum   = op1->AsLclVarCommon()->GetSsaNum();
+            assertion->op2.u1.iconVal   = op2->gtIntCon.gtIconVal;
+            assertion->op2.vn           = op2->gtVNPair.GetConservative();
+            assertion->op2.u1.iconFlags = op2->GetIconHandleFlag();
+
+            //
+            // Ok everything has been set and the assertion looks good
+            //
+            assertion->assertionKind = assertionKind;
+        }
+        else // !haveArgs
+        {
+            /* Skip over a GT_COMMA node(s), if necessary */
+            while (op2->gtOper == GT_COMMA)
+            {
+                op2 = op2->gtOp.gtOp2;
+            }
+
+            assertion->op1.kind       = O1K_LCLVAR;
+            assertion->op1.lcl.lclNum = lclNum;
+            assertion->op1.vn         = op1->gtVNPair.GetConservative();
+            assertion->op1.lcl.ssaNum = op1->AsLclVarCommon()->GetSsaNum();
+
+            switch (op2->gtOper)
+            {
+                optOp2Kind op2Kind;
+                //
+                //  No Assertion
+                //
+                default:
+                    goto DONE_ASSERTION; // Don't make an assertion
+
+                //
+                //  Constant Assertions
+                //
+                case GT_CNS_INT:
+                    op2Kind = O2K_CONST_INT;
+                    goto CNS_COMMON;
+
+                case GT_CNS_LNG:
+                    op2Kind = O2K_CONST_LONG;
+                    goto CNS_COMMON;
+
+                case GT_CNS_DBL:
+                    op2Kind = O2K_CONST_DOUBLE;
+                    goto CNS_COMMON;
+
+                CNS_COMMON:
+                {
+                    // TODO-1stClassStructs: handle constant propagation to struct types.
+                    if (varTypeIsStruct(lclVar))
+                    {
+                        goto DONE_ASSERTION;
+                    }
+                    //
+                    // Must either be an OAK_EQUAL or an OAK_NOT_EQUAL assertion
+                    //
+                    if ((assertionKind != OAK_EQUAL) && (assertionKind != OAK_NOT_EQUAL))
+                    {
+                        goto DONE_ASSERTION; // Don't make an assertion
+                    }
+
+                    // If the LclVar is a TYP_LONG then we only make
+                    // assertions where op2 is also TYP_LONG
+                    //
+                    if ((lclVar->TypeGet() == TYP_LONG) && (op2->TypeGet() != TYP_LONG))
+                    {
+                        goto DONE_ASSERTION; // Don't make an assertion
+                    }
+
+                    assertion->op2.kind    = op2Kind;
+                    assertion->op2.lconVal = 0;
+                    assertion->op2.vn      = op2->gtVNPair.GetConservative();
+
+                    if (op2->gtOper == GT_CNS_INT)
+                    {
+#ifdef _TARGET_ARM_
+                        // Do not Constant-Prop large constants for ARM
+                        if (!codeGen->validImmForMov(op2->gtIntCon.gtIconVal))
+                        {
+                            goto DONE_ASSERTION; // Don't make an assertion
+                        }
+#endif // _TARGET_ARM_
+                        assertion->op2.u1.iconVal   = op2->gtIntCon.gtIconVal;
+                        assertion->op2.u1.iconFlags = op2->GetIconHandleFlag();
+#ifdef _TARGET_64BIT_
+                        if (op2->TypeGet() == TYP_LONG || op2->TypeGet() == TYP_BYREF)
+                        {
+                            assertion->op2.u1.iconFlags |= 1; // Signify that this is really TYP_LONG
+                        }
+#endif // _TARGET_64BIT_
+                    }
+                    else if (op2->gtOper == GT_CNS_LNG)
+                    {
+                        assertion->op2.lconVal = op2->gtLngCon.gtLconVal;
+                    }
+                    else
+                    {
+                        noway_assert(op2->gtOper == GT_CNS_DBL);
+                        /* If we have an NaN value then don't record it */
+                        if (_isnan(op2->gtDblCon.gtDconVal))
+                        {
+                            goto DONE_ASSERTION; // Don't make an assertion
+                        }
+                        assertion->op2.dconVal = op2->gtDblCon.gtDconVal;
+                    }
+
+                    //
+                    // Ok everything has been set and the assertion looks good
+                    //
+                    assertion->assertionKind = assertionKind;
+                }
+                break;
+
+                //
+                //  Copy Assertions
+                //
+                case GT_LCL_VAR:
+                {
+                    //
+                    // Must either be an OAK_EQUAL or an OAK_NOT_EQUAL assertion
+                    //
+                    if ((assertionKind != OAK_EQUAL) && (assertionKind != OAK_NOT_EQUAL))
+                    {
+                        goto DONE_ASSERTION; // Don't make an assertion
+                    }
+
+                    unsigned lclNum2 = op2->gtLclVarCommon.gtLclNum;
+                    noway_assert(lclNum2 < lvaCount);
+                    LclVarDsc* lclVar2 = &lvaTable[lclNum2];
+
+                    // If the two locals are the same then bail
+                    if (lclNum == lclNum2)
+                    {
+                        goto DONE_ASSERTION; // Don't make an assertion
+                    }
+
+                    // If the types are different then bail */
+                    if (lclVar->lvType != lclVar2->lvType)
+                    {
+                        goto DONE_ASSERTION; // Don't make an assertion
+                    }
+
+                    //  If the local variable has its address exposed then bail
+                    if (lclVar2->lvAddrExposed)
+                    {
+                        goto DONE_ASSERTION; // Don't make an assertion
+                    }
+
+                    assertion->op2.kind       = O2K_LCLVAR_COPY;
+                    assertion->op2.lcl.lclNum = lclNum2;
+                    assertion->op2.vn         = op2->gtVNPair.GetConservative();
+                    assertion->op2.lcl.ssaNum = op2->AsLclVarCommon()->GetSsaNum();
+
+                    //
+                    // Ok everything has been set and the assertion looks good
+                    //
+                    assertion->assertionKind = assertionKind;
+                }
+                break;
+
+                //  Subrange Assertions
+                case GT_EQ:
+                case GT_NE:
+                case GT_LT:
+                case GT_LE:
+                case GT_GT:
+                case GT_GE:
+
+                    /* Assigning the result of a RELOP, we can add a boolean subrange assertion */
+
+                    toType = TYP_BOOL;
+                    goto SUBRANGE_COMMON;
+
+                case GT_CLS_VAR:
+
+                    /* Assigning the result of an indirection into a LCL_VAR, see if we can add a subrange assertion */
+
+                    toType = op2->gtType;
+                    goto SUBRANGE_COMMON;
+
+                case GT_ARR_ELEM:
+
+                    /* Assigning the result of an indirection into a LCL_VAR, see if we can add a subrange assertion */
+
+                    toType = op2->gtType;
+                    goto SUBRANGE_COMMON;
+
+                case GT_LCL_FLD:
+
+                    /* Assigning the result of an indirection into a LCL_VAR, see if we can add a subrange assertion */
+
+                    toType = op2->gtType;
+                    goto SUBRANGE_COMMON;
+
+                case GT_IND:
+
+                    /* Assigning the result of an indirection into a LCL_VAR, see if we can add a subrange assertion */
+
+                    toType = op2->gtType;
+                    goto SUBRANGE_COMMON;
+
+                case GT_CAST:
+                {
+                    if (lvaTable[lclNum].lvIsStructField && lvaTable[lclNum].lvNormalizeOnLoad())
+                    {
+                        // Keep the cast on small struct fields.
+                        goto DONE_ASSERTION; // Don't make an assertion
+                    }
+
+                    toType = op2->CastToType();
+                SUBRANGE_COMMON:
+                    if ((assertionKind != OAK_SUBRANGE) && (assertionKind != OAK_EQUAL))
+                    {
+                        goto DONE_ASSERTION; // Don't make an assertion
+                    }
+
+                    if (varTypeIsFloating(op1->TypeGet()))
+                    {
+                        // We don't make assertions on a cast from floating point
+                        goto DONE_ASSERTION;
+                    }
+
+                    switch (toType)
+                    {
+                        case TYP_BOOL:
+                        case TYP_BYTE:
+                        case TYP_UBYTE:
+                        case TYP_SHORT:
+                        case TYP_USHORT:
+                        case TYP_CHAR:
+#ifdef _TARGET_64BIT_
+                        case TYP_UINT:
+                        case TYP_INT:
+#endif // _TARGET_64BIT_
+                            assertion->op2.u2.loBound = AssertionDsc::GetLowerBoundForIntegralType(toType);
+                            assertion->op2.u2.hiBound = AssertionDsc::GetUpperBoundForIntegralType(toType);
+                            break;
+
+                        default:
+                            goto DONE_ASSERTION; // Don't make an assertion
+                    }
+                    assertion->op2.kind      = O2K_SUBRANGE;
+                    assertion->assertionKind = OAK_SUBRANGE;
+                }
+                break;
+            }
+        } // else // !haveArgs
+    }     // if (op1->gtOper == GT_LCL_VAR)
+
+    //
+    // Are we making an IsType assertion?
+    //
+    else if (op1->gtOper == GT_IND)
+    {
+        op1 = op1->gtOp.gtOp1;
+        //
+        // Is this an indirection of a local variable?
+        //
+        if (op1->gtOper == GT_LCL_VAR)
+        {
+            unsigned lclNum = op1->gtLclVarCommon.gtLclNum;
+            noway_assert(lclNum < lvaCount);
+            LclVarDsc* lclVar = &lvaTable[lclNum];
+
+            //  If the local variable has its address exposed then bail
+            if (fgExcludeFromSsa(lclNum))
+            {
+                goto DONE_ASSERTION;
+            }
+
+            // If we have an typeHnd indirection then op1 must be a TYP_REF
+            //  and the indirection must produce a TYP_I
+            //
+            if (op1->gtType != TYP_REF)
+            {
+                goto DONE_ASSERTION; // Don't make an assertion
+            }
+
+            assertion->op1.kind       = O1K_EXACT_TYPE;
+            assertion->op1.lcl.lclNum = lclNum;
+            assertion->op1.vn         = op1->gtVNPair.GetConservative();
+            assertion->op1.lcl.ssaNum = op1->AsLclVarCommon()->GetSsaNum();
+            assert(assertion->op1.lcl.ssaNum == SsaConfig::RESERVED_SSA_NUM ||
+                   assertion->op1.vn ==
+                       lvaTable[lclNum].GetPerSsaData(assertion->op1.lcl.ssaNum)->m_vnPair.GetConservative());
+
+            ssize_t  cnsValue  = 0;
+            unsigned iconFlags = 0;
+            // Ngen case
+            if (op2->gtOper == GT_IND)
+            {
+                if (!optIsTreeKnownIntValue(!optLocalAssertionProp, op2->gtOp.gtOp1, &cnsValue, &iconFlags))
+                {
+                    goto DONE_ASSERTION; // Don't make an assertion
+                }
+
+                assertion->assertionKind  = assertionKind;
+                assertion->op2.kind       = O2K_IND_CNS_INT;
+                assertion->op2.u1.iconVal = cnsValue;
+                assertion->op2.vn         = op2->gtOp.gtOp1->gtVNPair.GetConservative();
+                /* iconFlags should only contain bits in GTF_ICON_HDL_MASK */
+                assert((iconFlags & ~GTF_ICON_HDL_MASK) == 0);
+                assertion->op2.u1.iconFlags = iconFlags;
+#ifdef _TARGET_64BIT_
+                if (op2->gtOp.gtOp1->TypeGet() == TYP_LONG)
+                {
+                    assertion->op2.u1.iconFlags |= 1; // Signify that this is really TYP_LONG
+                }
+#endif // _TARGET_64BIT_
+            }
+            // JIT case
+            else if (optIsTreeKnownIntValue(!optLocalAssertionProp, op2, &cnsValue, &iconFlags))
+            {
+                assertion->assertionKind  = assertionKind;
+                assertion->op2.kind       = O2K_IND_CNS_INT;
+                assertion->op2.u1.iconVal = cnsValue;
+                assertion->op2.vn         = op2->gtVNPair.GetConservative();
+                /* iconFlags should only contain bits in GTF_ICON_HDL_MASK */
+                assert((iconFlags & ~GTF_ICON_HDL_MASK) == 0);
+                assertion->op2.u1.iconFlags = iconFlags;
+#ifdef _TARGET_64BIT_
+                if (op2->TypeGet() == TYP_LONG)
+                {
+                    assertion->op2.u1.iconFlags |= 1; // Signify that this is really TYP_LONG
+                }
+#endif // _TARGET_64BIT_
+            }
+            else
+            {
+                goto DONE_ASSERTION; // Don't make an assertion
+            }
+        }
+    }
+
+DONE_ASSERTION:
+    if (assertion->assertionKind == OAK_INVALID)
+    {
+        return NO_ASSERTION_INDEX;
+    }
+
+    if (!optLocalAssertionProp)
+    {
+        if ((assertion->op1.vn == ValueNumStore::NoVN) || (assertion->op2.vn == ValueNumStore::NoVN) ||
+            (assertion->op1.vn == ValueNumStore::VNForVoid()) || (assertion->op2.vn == ValueNumStore::VNForVoid()))
+        {
+            return NO_ASSERTION_INDEX;
+        }
+
+        // TODO: only copy assertions rely on valid SSA number so we could generate more assertions here
+        if ((assertion->op1.kind != O1K_VALUE_NUMBER) && (assertion->op1.lcl.ssaNum == SsaConfig::RESERVED_SSA_NUM))
+        {
+            return NO_ASSERTION_INDEX;
+        }
+    }
+
+    // Now add the assertion to our assertion table
+    noway_assert(assertion->op1.kind != O1K_INVALID);
+    noway_assert(assertion->op1.kind == O1K_ARR_BND || assertion->op2.kind != O2K_INVALID);
+    return optAddAssertion(assertion);
+}
+
+/*****************************************************************************
+ *
+ * If tree is a constant node holding an integral value, retrieve the value in
+ * pConstant. If the method returns true, pConstant holds the appropriate
+ * constant. Set "vnBased" to true to indicate local or global assertion prop.
+ * "pFlags" indicates if the constant is a handle marked by GTF_ICON_HDL_MASK.
+ */
+bool Compiler::optIsTreeKnownIntValue(bool vnBased, GenTreePtr tree, ssize_t* pConstant, unsigned* pFlags)
+{
+    // Is Local assertion prop?
+    if (!vnBased)
+    {
+        if (tree->OperGet() == GT_CNS_INT)
+        {
+            *pConstant = tree->gtIntCon.IconValue();
+            *pFlags    = tree->GetIconHandleFlag();
+            return true;
+        }
+#ifdef _TARGET_64BIT_
+        // Just to be clear, get it from gtLconVal rather than
+        // overlapping gtIconVal.
+        else if (tree->OperGet() == GT_CNS_LNG)
+        {
+            *pConstant = tree->gtLngCon.gtLconVal;
+            *pFlags    = tree->GetIconHandleFlag();
+            return true;
+        }
+#endif
+        return false;
+    }
+
+    // Global assertion prop
+    if (!vnStore->IsVNConstant(tree->gtVNPair.GetConservative()))
+    {
+        return false;
+    }
+
+    ValueNum  vn     = tree->gtVNPair.GetConservative();
+    var_types vnType = vnStore->TypeOfVN(vn);
+    if (vnType == TYP_INT)
+    {
+        *pConstant = vnStore->ConstantValue<int>(vn);
+        *pFlags    = vnStore->IsVNHandle(vn) ? vnStore->GetHandleFlags(vn) : 0;
+        return true;
+    }
+#ifdef _TARGET_64BIT_
+    else if (vnType == TYP_LONG)
+    {
+        *pConstant = vnStore->ConstantValue<INT64>(vn);
+        *pFlags    = vnStore->IsVNHandle(vn) ? vnStore->GetHandleFlags(vn) : 0;
+        return true;
+    }
+#endif
+    return false;
+}
+
+#ifdef DEBUG
+/*****************************************************************************
+ *
+ * Print the assertions related to a VN for all VNs.
+ *
+ */
+void Compiler::optPrintVnAssertionMapping()
+{
+    printf("\nVN Assertion Mapping\n");
+    printf("---------------------\n");
+    for (ValueNumToAssertsMap::KeyIterator ki = optValueNumToAsserts->Begin(); !ki.Equal(optValueNumToAsserts->End());
+         ++ki)
+    {
+        printf("(%d => ", ki.Get());
+        printf("%s)\n", BitVecOps::ToString(apTraits, ki.GetValue()));
+    }
+}
+#endif
+
+/*****************************************************************************
+ *
+ * Maintain a map "optValueNumToAsserts" i.e., vn -> to set of assertions
+ * about that VN. Given "assertions" about a "vn" add it to the previously
+ * mapped assertions about that "vn."
+ */
+void Compiler::optAddVnAssertionMapping(ValueNum vn, AssertionIndex index)
+{
+    ASSERT_TP cur;
+    if (!optValueNumToAsserts->Lookup(vn, &cur))
+    {
+        cur = optNewEmptyAssertSet();
+        optValueNumToAsserts->Set(vn, cur);
+    }
+    BitVecOps::AddElemD(apTraits, cur, index - 1);
+}
+
+/*****************************************************************************
+ * Statically if we know that this assertion's VN involves a NaN don't bother
+ * wasting an assertion table slot.
+ */
+bool Compiler::optAssertionVnInvolvesNan(AssertionDsc* assertion)
+{
+    if (optLocalAssertionProp)
+    {
+        return false;
+    }
+
+    static const int SZ      = 2;
+    ValueNum         vns[SZ] = {assertion->op1.vn, assertion->op2.vn};
+    for (int i = 0; i < SZ; ++i)
+    {
+        if (vnStore->IsVNConstant(vns[i]))
+        {
+            var_types type = vnStore->TypeOfVN(vns[i]);
+            if ((type == TYP_FLOAT && _isnan(vnStore->ConstantValue<float>(vns[i])) != 0) ||
+                (type == TYP_DOUBLE && _isnan(vnStore->ConstantValue<double>(vns[i])) != 0))
+            {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+/*****************************************************************************
+ *
+ *  Given an assertion add it to the assertion table
+ *
+ *  If it is already in the assertion table return the assertionIndex that
+ *  we use to refer to this element.
+ *  Otherwise add it to the assertion table ad return the assertionIndex that
+ *  we use to refer to this element.
+ *  If we need to add to the table and the table is full return the value zero
+ */
+Compiler::AssertionIndex Compiler::optAddAssertion(AssertionDsc* newAssertion)
+{
+    noway_assert(newAssertion->assertionKind != OAK_INVALID);
+
+    // Even though the propagation step takes care of NaN, just a check
+    // to make sure there is no slot involving a NaN.
+    if (optAssertionVnInvolvesNan(newAssertion))
+    {
+        JITDUMP("Assertion involved Nan not adding\n");
+        return NO_ASSERTION_INDEX;
+    }
+
+    // Check if exists already, so we can skip adding new one. Search backwards.
+    for (AssertionIndex index = optAssertionCount; index >= 1; index--)
+    {
+        AssertionDsc* curAssertion = optGetAssertion(index);
+        if (curAssertion->Equals(newAssertion, !optLocalAssertionProp))
+        {
+            return index;
+        }
+    }
+
+    // Check if we are within max count.
+    if (optAssertionCount >= optMaxAssertionCount)
+    {
+        return NO_ASSERTION_INDEX;
+    }
+
+    optAssertionTabPrivate[optAssertionCount] = *newAssertion;
+    optAssertionCount++;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("GenTreeNode creates assertion:\n");
+        gtDispTree(optAssertionPropCurrentTree, nullptr, nullptr, true);
+        printf(optLocalAssertionProp ? "In BB%02u New Local " : "In BB%02u New Global ", compCurBB->bbNum);
+        optPrintAssertion(newAssertion, optAssertionCount);
+    }
+#endif // DEBUG
+
+    // Assertion mask bits are [index + 1].
+    if (optLocalAssertionProp)
+    {
+        assert(newAssertion->op1.kind == O1K_LCLVAR);
+
+        // Mark the variables this index depends on
+        unsigned lclNum = newAssertion->op1.lcl.lclNum;
+        BitVecOps::AddElemD(apTraits, GetAssertionDep(lclNum), optAssertionCount - 1);
+        if (newAssertion->op2.kind == O2K_LCLVAR_COPY)
+        {
+            lclNum = newAssertion->op2.lcl.lclNum;
+            BitVecOps::AddElemD(apTraits, GetAssertionDep(lclNum), optAssertionCount - 1);
+        }
+    }
+    else
+    // If global assertion prop, then add it to the dependents map.
+    {
+        optAddVnAssertionMapping(newAssertion->op1.vn, optAssertionCount);
+        if (newAssertion->op2.kind == O2K_LCLVAR_COPY)
+        {
+            optAddVnAssertionMapping(newAssertion->op2.vn, optAssertionCount);
+        }
+    }
+
+#ifdef DEBUG
+    optDebugCheckAssertions(optAssertionCount);
+#endif
+    return optAssertionCount;
+}
+
+#ifdef DEBUG
+void Compiler::optDebugCheckAssertion(AssertionDsc* assertion)
+{
+    assert(assertion->assertionKind < OAK_COUNT);
+    assert(assertion->op1.kind < O1K_COUNT);
+    assert(assertion->op2.kind < O2K_COUNT);
+    // It would be good to check that op1.vn and op2.vn are valid value numbers.
+
+    switch (assertion->op1.kind)
+    {
+        case O1K_LCLVAR:
+        case O1K_EXACT_TYPE:
+        case O1K_SUBTYPE:
+            assert(assertion->op1.lcl.lclNum < lvaCount);
+            assert(optLocalAssertionProp || ((assertion->op1.lcl.ssaNum - SsaConfig::UNINIT_SSA_NUM) <
+                                             lvaTable[assertion->op1.lcl.lclNum].lvNumSsaNames));
+            break;
+        case O1K_ARR_BND:
+            // It would be good to check that bnd.vnIdx and bnd.vnLen are valid value numbers.
+            break;
+        case O1K_ARRLEN_OPER_BND:
+        case O1K_ARRLEN_LOOP_BND:
+        case O1K_CONSTANT_LOOP_BND:
+        case O1K_VALUE_NUMBER:
+            assert(!optLocalAssertionProp);
+            break;
+        default:
+            break;
+    }
+    switch (assertion->op2.kind)
+    {
+        case O2K_IND_CNS_INT:
+        case O2K_CONST_INT:
+        {
+            // The only flags that can be set are those in the GTF_ICON_HDL_MASK, or bit 0, which is
+            // used to indicate a long constant.
+            assert((assertion->op2.u1.iconFlags & ~(GTF_ICON_HDL_MASK | 1)) == 0);
+            switch (assertion->op1.kind)
+            {
+                case O1K_EXACT_TYPE:
+                case O1K_SUBTYPE:
+                    assert(assertion->op2.u1.iconFlags != 0);
+                    break;
+                case O1K_LCLVAR:
+                case O1K_ARR_BND:
+                    assert((lvaTable[assertion->op1.lcl.lclNum].lvType != TYP_REF) || (assertion->op2.u1.iconVal == 0));
+                    break;
+                case O1K_VALUE_NUMBER:
+                    assert((vnStore->TypeOfVN(assertion->op1.vn) != TYP_REF) || (assertion->op2.u1.iconVal == 0));
+                    break;
+                default:
+                    break;
+            }
+        }
+        break;
+
+        default:
+            // for all other 'assertion->op2.kind' values we don't check anything
+            break;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Verify that assertion prop related assumptions are valid. If "index"
+ *  is 0 (i.e., NO_ASSERTION_INDEX) then verify all assertions in the table.
+ *  If "index" is between 1 and optAssertionCount, then verify the assertion
+ *  desc corresponding to "index."
+ */
+void Compiler::optDebugCheckAssertions(AssertionIndex index)
+{
+    AssertionIndex start = (index == NO_ASSERTION_INDEX) ? 1 : index;
+    AssertionIndex end   = (index == NO_ASSERTION_INDEX) ? optAssertionCount : index;
+    for (AssertionIndex ind = start; ind <= end; ++ind)
+    {
+        AssertionDsc* assertion = optGetAssertion(ind);
+        optDebugCheckAssertion(assertion);
+    }
+}
+#endif
+
+/*****************************************************************************
+ *
+ * Given a "candidateAssertion", and the assertion operands op1 and op2,
+ * create a complementary assertion and add it to the assertion table,
+ * which can be retrieved using optFindComplementary(index)
+ *
+ */
+
+void Compiler::optCreateComplementaryAssertion(AssertionIndex assertionIndex, GenTreePtr op1, GenTreePtr op2)
+{
+    if (assertionIndex == NO_ASSERTION_INDEX)
+    {
+        return;
+    }
+
+    AssertionDsc& candidateAssertion = *optGetAssertion(assertionIndex);
+    if (candidateAssertion.op1.kind == O1K_ARRLEN_OPER_BND || candidateAssertion.op1.kind == O1K_ARRLEN_LOOP_BND ||
+        candidateAssertion.op1.kind == O1K_CONSTANT_LOOP_BND)
+    {
+        AssertionDsc dsc  = candidateAssertion;
+        dsc.assertionKind = dsc.assertionKind == OAK_EQUAL ? OAK_NOT_EQUAL : OAK_EQUAL;
+        optAddAssertion(&dsc);
+        return;
+    }
+
+    if (candidateAssertion.assertionKind == OAK_EQUAL)
+    {
+        AssertionIndex index = optCreateAssertion(op1, op2, OAK_NOT_EQUAL);
+        optMapComplementary(index, assertionIndex);
+    }
+    else if (candidateAssertion.assertionKind == OAK_NOT_EQUAL)
+    {
+        AssertionIndex index = optCreateAssertion(op1, op2, OAK_EQUAL);
+        optMapComplementary(index, assertionIndex);
+    }
+
+    // Are we making a subtype or exact type assertion?
+    if ((candidateAssertion.op1.kind == O1K_SUBTYPE) || (candidateAssertion.op1.kind == O1K_EXACT_TYPE))
+    {
+        // Did we recieve helper call args?
+        if (op1->gtOper == GT_LIST)
+        {
+            op1 = op1->gtOp.gtOp1;
+        }
+        optCreateAssertion(op1, nullptr, OAK_NOT_EQUAL);
+    }
+}
+
+/*****************************************************************************
+ *
+ * Create assertions for jtrue operands. Given operands "op1" and "op2" that
+ * are used in a conditional evaluation of a jtrue stmt, create assertions
+ * for the operands.
+ */
+
+Compiler::AssertionIndex Compiler::optCreateJtrueAssertions(GenTreePtr                 op1,
+                                                            GenTreePtr                 op2,
+                                                            Compiler::optAssertionKind assertionKind)
+{
+    AssertionDsc   candidateAssertion;
+    AssertionIndex assertionIndex = optCreateAssertion(op1, op2, assertionKind, &candidateAssertion);
+    // Don't bother if we don't have an assertion on the JTrue False path. Current implementation
+    // allows for a complementary only if there is an assertion on the False path (tree->HasAssertion()).
+    if (assertionIndex != NO_ASSERTION_INDEX)
+    {
+        optCreateComplementaryAssertion(assertionIndex, op1, op2);
+    }
+    return assertionIndex;
+}
+
+Compiler::AssertionIndex Compiler::optCreateJTrueBoundsAssertion(GenTreePtr tree)
+{
+    GenTreePtr relop = tree->gtGetOp1();
+    if ((relop->OperKind() & GTK_RELOP) == 0)
+    {
+        return NO_ASSERTION_INDEX;
+    }
+    GenTreePtr op1 = relop->gtGetOp1();
+    GenTreePtr op2 = relop->gtGetOp2();
+
+    ValueNum vn = op1->gtVNPair.GetConservative();
+    // Cases where op1 holds the condition with array arithmetic and op2 is 0.
+    // Loop condition like: "i < a.len +/-k == 0"
+    // Assertion: "i < a.len +/- k == 0"
+    if (vnStore->IsVNArrLenArithBound(vn) &&
+        op2->gtVNPair.GetConservative() == vnStore->VNZeroForType(op2->TypeGet()) &&
+        (relop->gtOper == GT_EQ || relop->gtOper == GT_NE))
+    {
+        AssertionDsc dsc;
+        dsc.assertionKind    = relop->gtOper == GT_EQ ? OAK_EQUAL : OAK_NOT_EQUAL;
+        dsc.op1.kind         = O1K_ARRLEN_OPER_BND;
+        dsc.op1.vn           = vn;
+        dsc.op2.kind         = O2K_CONST_INT;
+        dsc.op2.vn           = vnStore->VNZeroForType(op2->TypeGet());
+        dsc.op2.u1.iconVal   = 0;
+        dsc.op2.u1.iconFlags = 0;
+        AssertionIndex index = optAddAssertion(&dsc);
+        optCreateComplementaryAssertion(index, nullptr, nullptr);
+        return index;
+    }
+    // Cases where op1 holds the condition array length and op2 is 0.
+    // Loop condition like: "i < a.len == 0"
+    // Assertion: "i < a.len == false"
+    else if (vnStore->IsVNArrLenBound(vn) &&
+             (op2->gtVNPair.GetConservative() == vnStore->VNZeroForType(op2->TypeGet())) &&
+             (relop->gtOper == GT_EQ || relop->gtOper == GT_NE))
+    {
+        AssertionDsc dsc;
+        dsc.assertionKind    = relop->gtOper == GT_EQ ? OAK_EQUAL : OAK_NOT_EQUAL;
+        dsc.op1.kind         = O1K_ARRLEN_LOOP_BND;
+        dsc.op1.vn           = vn;
+        dsc.op2.kind         = O2K_CONST_INT;
+        dsc.op2.vn           = vnStore->VNZeroForType(op2->TypeGet());
+        dsc.op2.u1.iconVal   = 0;
+        dsc.op2.u1.iconFlags = 0;
+        AssertionIndex index = optAddAssertion(&dsc);
+        optCreateComplementaryAssertion(index, nullptr, nullptr);
+        return index;
+    }
+    // Cases where op1 holds the lhs of the condition op2 holds rhs.
+    // Loop condition like "i < a.len"
+    // Assertion: "i < a.len != 0"
+    else if (vnStore->IsVNArrLenBound(relop->gtVNPair.GetConservative()))
+    {
+        AssertionDsc dsc;
+        dsc.assertionKind    = OAK_NOT_EQUAL;
+        dsc.op1.kind         = O1K_ARRLEN_LOOP_BND;
+        dsc.op1.vn           = relop->gtVNPair.GetConservative();
+        dsc.op2.kind         = O2K_CONST_INT;
+        dsc.op2.vn           = vnStore->VNZeroForType(TYP_INT);
+        dsc.op2.u1.iconVal   = 0;
+        dsc.op2.u1.iconFlags = 0;
+        AssertionIndex index = optAddAssertion(&dsc);
+        optCreateComplementaryAssertion(index, nullptr, nullptr);
+        return index;
+    }
+    // Cases where op1 holds the condition bound check and op2 is 0.
+    // Loop condition like: "i < 100 == 0"
+    // Assertion: "i < 100 == false"
+    else if (vnStore->IsVNConstantBound(vn) &&
+             (op2->gtVNPair.GetConservative() == vnStore->VNZeroForType(op2->TypeGet())) &&
+             (relop->gtOper == GT_EQ || relop->gtOper == GT_NE))
+    {
+        AssertionDsc dsc;
+        dsc.assertionKind    = relop->gtOper == GT_EQ ? OAK_EQUAL : OAK_NOT_EQUAL;
+        dsc.op1.kind         = O1K_CONSTANT_LOOP_BND;
+        dsc.op1.vn           = vn;
+        dsc.op2.kind         = O2K_CONST_INT;
+        dsc.op2.vn           = vnStore->VNZeroForType(op2->TypeGet());
+        dsc.op2.u1.iconVal   = 0;
+        dsc.op2.u1.iconFlags = 0;
+        AssertionIndex index = optAddAssertion(&dsc);
+        optCreateComplementaryAssertion(index, nullptr, nullptr);
+        return index;
+    }
+    // Cases where op1 holds the lhs of the condition op2 holds rhs.
+    // Loop condition like "i < 100"
+    // Assertion: "i < 100 != 0"
+    else if (vnStore->IsVNConstantBound(relop->gtVNPair.GetConservative()))
+    {
+        AssertionDsc dsc;
+        dsc.assertionKind    = OAK_NOT_EQUAL;
+        dsc.op1.kind         = O1K_CONSTANT_LOOP_BND;
+        dsc.op1.vn           = relop->gtVNPair.GetConservative();
+        dsc.op2.kind         = O2K_CONST_INT;
+        dsc.op2.vn           = vnStore->VNZeroForType(TYP_INT);
+        dsc.op2.u1.iconVal   = 0;
+        dsc.op2.u1.iconFlags = 0;
+        AssertionIndex index = optAddAssertion(&dsc);
+        optCreateComplementaryAssertion(index, nullptr, nullptr);
+        return index;
+    }
+
+    return NO_ASSERTION_INDEX;
+}
+
+/*****************************************************************************
+ *
+ *  Compute assertions for the JTrue node.
+ */
+Compiler::AssertionIndex Compiler::optAssertionGenJtrue(GenTreePtr tree)
+{
+    // Only create assertions for JTRUE when we are in the global phase
+    if (optLocalAssertionProp)
+    {
+        return NO_ASSERTION_INDEX;
+    }
+
+    GenTreePtr relop = tree->gtOp.gtOp1;
+    if ((relop->OperKind() & GTK_RELOP) == 0)
+    {
+        return NO_ASSERTION_INDEX;
+    }
+
+    Compiler::optAssertionKind assertionKind = OAK_INVALID;
+
+    GenTreePtr op1 = relop->gtOp.gtOp1;
+    GenTreePtr op2 = relop->gtOp.gtOp2;
+
+    AssertionIndex index = optCreateJTrueBoundsAssertion(tree);
+    if (index != NO_ASSERTION_INDEX)
+    {
+        return index;
+    }
+
+    // Find assertion kind.
+    switch (relop->gtOper)
+    {
+        case GT_EQ:
+            assertionKind = OAK_EQUAL;
+            break;
+        case GT_NE:
+            assertionKind = OAK_NOT_EQUAL;
+            break;
+        default:
+            // TODO-CQ: add other relop operands. Disabled for now to measure perf
+            // and not occupy assertion table slots. We'll add them when used.
+            return NO_ASSERTION_INDEX;
+    }
+
+    // Check for op1 or op2 to be lcl var and if so, keep it in op1.
+    if ((op1->gtOper != GT_LCL_VAR) && (op2->gtOper == GT_LCL_VAR))
+    {
+        jitstd::swap(op1, op2);
+    }
+    // If op1 is lcl and op2 is const or lcl, create assertion.
+    if ((op1->gtOper == GT_LCL_VAR) &&
+        ((op2->OperKind() & GTK_CONST) || (op2->gtOper == GT_LCL_VAR))) // Fix for Dev10 851483
+    {
+        return optCreateJtrueAssertions(op1, op2, assertionKind);
+    }
+
+    // Check op1 and op2 for an indirection of a GT_LCL_VAR and keep it in op1.
+    if (((op1->gtOper != GT_IND) || (op1->gtOp.gtOp1->gtOper != GT_LCL_VAR)) &&
+        ((op2->gtOper == GT_IND) && (op2->gtOp.gtOp1->gtOper == GT_LCL_VAR)))
+    {
+        jitstd::swap(op1, op2);
+    }
+    // If op1 is ind, then extract op1's oper.
+    if ((op1->gtOper == GT_IND) && (op1->gtOp.gtOp1->gtOper == GT_LCL_VAR))
+    {
+        return optCreateJtrueAssertions(op1, op2, assertionKind);
+    }
+
+    // Look for a call to an IsInstanceOf helper compared to a nullptr
+    if ((op2->gtOper != GT_CNS_INT) && (op1->gtOper == GT_CNS_INT))
+    {
+        jitstd::swap(op1, op2);
+    }
+    // Validate op1 and op2
+    if ((op1->gtOper != GT_CALL) || (op1->gtCall.gtCallType != CT_HELPER) || (op1->TypeGet() != TYP_REF) || // op1
+        (op2->gtOper != GT_CNS_INT) || (op2->gtIntCon.gtIconVal != 0))                                      // op2
+    {
+        return NO_ASSERTION_INDEX;
+    }
+    if (op1->gtCall.gtCallMethHnd != eeFindHelper(CORINFO_HELP_ISINSTANCEOFINTERFACE) &&
+        op1->gtCall.gtCallMethHnd != eeFindHelper(CORINFO_HELP_ISINSTANCEOFARRAY) &&
+        op1->gtCall.gtCallMethHnd != eeFindHelper(CORINFO_HELP_ISINSTANCEOFCLASS) &&
+        op1->gtCall.gtCallMethHnd != eeFindHelper(CORINFO_HELP_ISINSTANCEOFANY))
+    {
+        return NO_ASSERTION_INDEX;
+    }
+
+    op2 = op1->gtCall.gtCallLateArgs->gtOp.gtOp2;
+    op1 = op1->gtCall.gtCallLateArgs;
+
+    // Reverse the assertion
+    assert(assertionKind == OAK_EQUAL || assertionKind == OAK_NOT_EQUAL);
+    assertionKind = (assertionKind == OAK_EQUAL) ? OAK_NOT_EQUAL : OAK_EQUAL;
+
+    if (op1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
+    {
+        return optCreateJtrueAssertions(op1, op2, assertionKind);
+    }
+
+    return NO_ASSERTION_INDEX;
+}
+
+/*****************************************************************************
+ *
+ *  Create an assertion on the phi node if some information can be gleaned
+ *  from all of the constituent phi operands.
+ *
+ */
+Compiler::AssertionIndex Compiler::optAssertionGenPhiDefn(GenTreePtr tree)
+{
+    if (!tree->IsPhiDefn())
+    {
+        return NO_ASSERTION_INDEX;
+    }
+
+    GenTreePtr phi = tree->gtOp.gtOp2;
+
+    // Try to find if all phi arguments are known to be non-null.
+    bool isNonNull = true;
+    for (GenTreeArgList* args = phi->gtOp.gtOp1->AsArgList(); args != nullptr; args = args->Rest())
+    {
+        if (!vnStore->IsKnownNonNull(args->Current()->gtVNPair.GetConservative()))
+        {
+            isNonNull = false;
+            break;
+        }
+    }
+
+    // All phi arguments are non-null implies phi rhs is non-null.
+    if (isNonNull)
+    {
+        return optCreateAssertion(tree->gtOp.gtOp1, nullptr, OAK_NOT_EQUAL);
+    }
+    return NO_ASSERTION_INDEX;
+}
+
+/*****************************************************************************
+ *
+ *  If this statement creates a value assignment or assertion
+ *  then assign an index to the given value assignment by adding
+ *  it to the lookup table, if necessary.
+ */
+void Compiler::optAssertionGen(GenTreePtr tree)
+{
+    tree->ClearAssertion();
+
+    if (tree->gtFlags & GTF_COLON_COND)
+    {
+        return;
+    }
+
+#ifdef DEBUG
+    optAssertionPropCurrentTree = tree;
+#endif
+
+    // For most of the assertions that we create below
+    // the assertion is true after the tree is processed
+    bool           assertionProven = true;
+    AssertionIndex assertionIndex  = NO_ASSERTION_INDEX;
+    switch (tree->gtOper)
+    {
+        case GT_ASG:
+            // VN takes care of non local assertions for assignments and data flow.
+            // TODO-1stClassStructs: Enable assertion prop for struct types.
+            if (varTypeIsStruct(tree))
+            {
+                // Do nothing.
+            }
+            else if (optLocalAssertionProp)
+            {
+                assertionIndex = optCreateAssertion(tree->gtOp.gtOp1, tree->gtOp.gtOp2, OAK_EQUAL);
+            }
+            else
+            {
+                assertionIndex = optAssertionGenPhiDefn(tree);
+            }
+            break;
+
+        case GT_OBJ:
+        case GT_BLK:
+        case GT_DYN_BLK:
+            // TODO-1stClassStructs: These should always be considered to create a non-null
+            // assertion, but previously, when these indirections were implicit due to a block
+            // copy or init, they were not being considered to do so.
+            break;
+        case GT_IND:
+            // TODO-1stClassStructs: All indirections should be considered to create a non-null
+            // assertion, but previously, when these indirections were implicit due to a block
+            // copy or init, they were not being considered to do so.
+            if (tree->gtType == TYP_STRUCT)
+            {
+                GenTree* parent = tree->gtGetParent(nullptr);
+                if ((parent != nullptr) && (parent->gtOper == GT_ASG))
+                {
+                    break;
+                }
+            }
+        case GT_NULLCHECK:
+        case GT_ARR_LENGTH:
+            // An array length can create a non-null assertion
+            assertionIndex = optCreateAssertion(tree->gtOp.gtOp1, nullptr, OAK_NOT_EQUAL);
+            break;
+
+        case GT_ARR_BOUNDS_CHECK:
+            if (!optLocalAssertionProp)
+            {
+                assertionIndex = optCreateAssertion(tree, nullptr, OAK_NO_THROW);
+            }
+            break;
+
+        case GT_ARR_ELEM:
+            // An array element reference can create a non-null assertion
+            assertionIndex = optCreateAssertion(tree->gtArrElem.gtArrObj, nullptr, OAK_NOT_EQUAL);
+            break;
+
+        case GT_CALL:
+            // A virtual call can create a non-null assertion. We transform some virtual calls into non-virtual calls
+            // with a GTF_CALL_NULLCHECK flag set.
+            if ((tree->gtFlags & GTF_CALL_NULLCHECK) || ((tree->gtFlags & GTF_CALL_VIRT_KIND_MASK) != GTF_CALL_NONVIRT))
+            {
+                //  Retrieve the 'this' arg
+                GenTreePtr thisArg = gtGetThisArg(tree);
+#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) || defined(_TARGET_ARM_)
+                if (thisArg == nullptr)
+                {
+                    // For tail calls we lose the this pointer in the argument list but that's OK because a null check
+                    // was made explicit, so we get the assertion when we walk the GT_IND in the argument list.
+                    noway_assert(tree->gtCall.IsTailCall());
+                    break;
+                }
+#endif // _TARGET_X86_ || _TARGET_AMD64_ || _TARGET_ARM_
+                noway_assert(thisArg != nullptr);
+                assertionIndex = optCreateAssertion(thisArg, nullptr, OAK_NOT_EQUAL);
+            }
+            break;
+
+        case GT_CAST:
+            // We only create this assertion for global assertion prop
+            if (!optLocalAssertionProp)
+            {
+                // This represets an assertion that we would like to prove to be true. It is not actually a true
+                // assertion.
+                // If we can prove this assertion true then we can eliminate this cast.
+                assertionIndex  = optCreateAssertion(tree->gtOp.gtOp1, tree, OAK_SUBRANGE);
+                assertionProven = false;
+            }
+            break;
+
+        case GT_JTRUE:
+            assertionIndex = optAssertionGenJtrue(tree);
+            break;
+
+        default:
+            // All other gtOper node kinds, leave 'assertionIndex' = NO_ASSERTION_INDEX
+            break;
+    }
+
+    // For global assertion prop we must store the assertion number in the tree node
+    if ((assertionIndex != NO_ASSERTION_INDEX) && assertionProven && !optLocalAssertionProp)
+    {
+        tree->SetAssertion(assertionIndex);
+    }
+}
+
+/*****************************************************************************
+ *
+ * Maps a complementary assertion to its original assertion so it can be
+ * retrieved faster.
+ */
+void Compiler::optMapComplementary(AssertionIndex assertionIndex, AssertionIndex index)
+{
+    if (assertionIndex == NO_ASSERTION_INDEX || index == NO_ASSERTION_INDEX)
+    {
+        return;
+    }
+    optComplementaryAssertionMap[assertionIndex] = index;
+    optComplementaryAssertionMap[index]          = assertionIndex;
+}
+
+/*****************************************************************************
+ *
+ *  Given an assertion index, return the assertion index of the complementary
+ *  assertion or 0 if one does not exist.
+ */
+Compiler::AssertionIndex Compiler::optFindComplementary(AssertionIndex assertIndex)
+{
+    if (assertIndex == NO_ASSERTION_INDEX)
+    {
+        return NO_ASSERTION_INDEX;
+    }
+    AssertionDsc* inputAssertion = optGetAssertion(assertIndex);
+
+    // Must be an equal or not equal assertion.
+    if (inputAssertion->assertionKind != OAK_EQUAL && inputAssertion->assertionKind != OAK_NOT_EQUAL)
+    {
+        return NO_ASSERTION_INDEX;
+    }
+
+    AssertionIndex index = optComplementaryAssertionMap[assertIndex];
+    if (index != NO_ASSERTION_INDEX && index <= optAssertionCount)
+    {
+        return index;
+    }
+
+    optAssertionKind complementaryAssertionKind =
+        (inputAssertion->assertionKind == OAK_EQUAL) ? OAK_NOT_EQUAL : OAK_EQUAL;
+    for (AssertionIndex index = 1; index <= optAssertionCount; ++index)
+    {
+        // Make sure assertion kinds are complementary and op1, op2 kinds match.
+        AssertionDsc* curAssertion = optGetAssertion(index);
+        if (curAssertion->Complementary(inputAssertion, !optLocalAssertionProp))
+        {
+            optMapComplementary(assertIndex, index);
+            return index;
+        }
+    }
+    return NO_ASSERTION_INDEX;
+}
+
+/*****************************************************************************
+ *
+ *  Given a lclNum and a toType, return assertion index of the assertion that
+ *  claims that a variable's value is always a valid subrange of toType.
+ *  Thus we can discard or omit a cast to toType. Returns NO_ASSERTION_INDEX
+ *  if one such assertion could not be found in "assertions."
+ */
+
+Compiler::AssertionIndex Compiler::optAssertionIsSubrange(GenTreePtr       tree,
+                                                          var_types        toType,
+                                                          ASSERT_VALARG_TP assertions)
+{
+    if (!optLocalAssertionProp && BitVecOps::IsEmpty(apTraits, assertions))
+    {
+        return NO_ASSERTION_INDEX;
+    }
+
+    for (AssertionIndex index = 1; index <= optAssertionCount; index++)
+    {
+        AssertionDsc* curAssertion = optGetAssertion(index);
+        if ((optLocalAssertionProp ||
+             BitVecOps::IsMember(apTraits, assertions, index - 1)) && // either local prop or use propagated assertions
+            (curAssertion->assertionKind == OAK_SUBRANGE) &&
+            (curAssertion->op1.kind == O1K_LCLVAR))
+        {
+            // For local assertion prop use comparison on locals, and use comparison on vns for global prop.
+            bool isEqual = optLocalAssertionProp ? (curAssertion->op1.lcl.lclNum == tree->AsLclVarCommon()->GetLclNum())
+                                                 : (curAssertion->op1.vn == tree->gtVNPair.GetConservative());
+            if (!isEqual)
+            {
+                continue;
+            }
+
+            // Make sure the toType is within current assertion's bounds.
+            switch (toType)
+            {
+                case TYP_BYTE:
+                case TYP_UBYTE:
+                case TYP_SHORT:
+                case TYP_USHORT:
+                case TYP_CHAR:
+                    if ((curAssertion->op2.u2.loBound < AssertionDsc::GetLowerBoundForIntegralType(toType)) ||
+                        (curAssertion->op2.u2.hiBound > AssertionDsc::GetUpperBoundForIntegralType(toType)))
+                    {
+                        continue;
+                    }
+                    break;
+
+                case TYP_UINT:
+                    if (curAssertion->op2.u2.loBound < AssertionDsc::GetLowerBoundForIntegralType(toType))
+                    {
+                        continue;
+                    }
+                    break;
+
+                case TYP_INT:
+                    break;
+
+                default:
+                    continue;
+            }
+            return index;
+        }
+    }
+    return NO_ASSERTION_INDEX;
+}
+
+/**********************************************************************************
+ *
+ * Given a "tree" that is usually arg1 of a isinst/cast kind of GT_CALL (a class
+ * handle), and "methodTableArg" which is a const int (a class handle), then search
+ * if there is an assertion in "assertions", that asserts the equality of the two
+ * class handles and then returns the index of the assertion. If one such assertion
+ * could not be found, then it returns NO_ASSERTION_INDEX.
+ *
+ */
+Compiler::AssertionIndex Compiler::optAssertionIsSubtype(GenTreePtr       tree,
+                                                         GenTreePtr       methodTableArg,
+                                                         ASSERT_VALARG_TP assertions)
+{
+    if (!optLocalAssertionProp && BitVecOps::IsEmpty(apTraits, assertions))
+    {
+        return NO_ASSERTION_INDEX;
+    }
+    for (AssertionIndex index = 1; index <= optAssertionCount; index++)
+    {
+        if (!optLocalAssertionProp && !BitVecOps::IsMember(apTraits, assertions, index - 1))
+        {
+            continue;
+        }
+
+        AssertionDsc* curAssertion = optGetAssertion(index);
+        if (curAssertion->assertionKind != OAK_EQUAL ||
+            (curAssertion->op1.kind != O1K_SUBTYPE && curAssertion->op1.kind != O1K_EXACT_TYPE))
+        {
+            continue;
+        }
+
+        // If local assertion prop use "lcl" based comparison, if global assertion prop use vn based comparison.
+        if ((optLocalAssertionProp) ? (curAssertion->op1.lcl.lclNum != tree->AsLclVarCommon()->GetLclNum())
+                                    : (curAssertion->op1.vn != tree->gtVNPair.GetConservative()))
+        {
+            continue;
+        }
+
+        if (curAssertion->op2.kind == O2K_IND_CNS_INT)
+        {
+            if (methodTableArg->gtOper != GT_IND)
+            {
+                continue;
+            }
+            methodTableArg = methodTableArg->gtOp.gtOp1;
+        }
+        else if (curAssertion->op2.kind != O2K_CONST_INT)
+        {
+            continue;
+        }
+
+        ssize_t  methodTableVal = 0;
+        unsigned iconFlags      = 0;
+        if (!optIsTreeKnownIntValue(!optLocalAssertionProp, methodTableArg, &methodTableVal, &iconFlags))
+        {
+            continue;
+        }
+
+        if (curAssertion->op2.u1.iconVal == methodTableVal)
+        {
+            return index;
+        }
+    }
+    return NO_ASSERTION_INDEX;
+}
+
+//------------------------------------------------------------------------------
+// optVNConstantPropOnTree: Substitutes tree with an evaluated constant while
+//                          managing ref-counts and side-effects.
+//
+// Arguments:
+//    block -  The block containing the tree.
+//    stmt  -  The statement in the block containing the tree.
+//    tree  -  The tree node whose value is known at compile time.
+//             The tree should have a constant value number.
+//
+// Return Value:
+//    Returns a potentially new or a transformed tree node.
+//    Returns nullptr when no transformation is possible.
+//
+// Description:
+//    Transforms a tree node if its result evaluates to a constant. The
+//    transformation can be a "ChangeOper" to a constant or a new constant node
+//    with extracted side-effects.
+//
+//    Before replacing or substituting the "tree" with a constant, extracts any
+//    side effects from the "tree" and creates a comma separated side effect list
+//    and then appends the transformed node at the end of the list.
+//    This comma separated list is then returned.
+//
+//    For JTrue nodes, side effects are not put into a comma separated list. If
+//    the relop will evaluate to "true" or "false" statically, then the side-effects
+//    will be put into new statements, presuming the JTrue will be folded away.
+//
+//    The ref-counts of any variables in the tree being replaced, will be
+//    appropriately decremented. The ref-counts of variables in the side-effect
+//    nodes will be retained.
+//
+GenTreePtr Compiler::optVNConstantPropOnTree(BasicBlock* block, GenTreePtr stmt, GenTreePtr tree)
+{
+    if (tree->OperGet() == GT_JTRUE)
+    {
+        // Treat JTRUE separately to extract side effects into respective statements rather
+        // than using a COMMA separated op1.
+        return optVNConstantPropOnJTrue(block, stmt, tree);
+    }
+    // If relop is part of JTRUE, this should be optimized as part of the parent JTRUE.
+    // Or if relop is part of QMARK or anything else, we simply bail here.
+    else if (tree->OperIsCompare() && (tree->gtFlags & GTF_RELOP_JMP_USED))
+    {
+        return nullptr;
+    }
+
+    ValueNum vnCns = tree->gtVNPair.GetConservative();
+    ValueNum vnLib = tree->gtVNPair.GetLiberal();
+
+    // Check if node evaluates to a constant.
+    if (!vnStore->IsVNConstant(vnCns))
+    {
+        return nullptr;
+    }
+
+    GenTreePtr newTree     = tree;
+    GenTreePtr sideEffList = nullptr;
+    switch (vnStore->TypeOfVN(vnCns))
+    {
+        case TYP_FLOAT:
+        {
+            float value = vnStore->ConstantValue<float>(vnCns);
+
+            if (tree->TypeGet() == TYP_INT)
+            {
+                // Same sized reinterpretation of bits to integer
+                newTree = optPrepareTreeForReplacement(tree, tree);
+                tree->ChangeOperConst(GT_CNS_INT);
+                tree->gtIntCon.gtIconVal = *(reinterpret_cast<int*>(&value));
+                tree->gtVNPair           = ValueNumPair(vnLib, vnCns);
+            }
+            else
+            {
+                // Implicit assignment conversion to float or double
+                assert(varTypeIsFloating(tree->TypeGet()));
+
+                newTree = optPrepareTreeForReplacement(tree, tree);
+                tree->ChangeOperConst(GT_CNS_DBL);
+                tree->gtDblCon.gtDconVal = value;
+                tree->gtVNPair           = ValueNumPair(vnLib, vnCns);
+            }
+            break;
+        }
+
+        case TYP_DOUBLE:
+        {
+            double value = vnStore->ConstantValue<double>(vnCns);
+
+            if (tree->TypeGet() == TYP_LONG)
+            {
+                // Same sized reinterpretation of bits to long
+                newTree = optPrepareTreeForReplacement(tree, tree);
+                tree->ChangeOperConst(GT_CNS_NATIVELONG);
+                tree->gtIntConCommon.SetLngValue(*(reinterpret_cast<INT64*>(&value)));
+                tree->gtVNPair = ValueNumPair(vnLib, vnCns);
+            }
+            else
+            {
+                // Implicit assignment conversion to float or double
+                assert(varTypeIsFloating(tree->TypeGet()));
+
+                newTree = optPrepareTreeForReplacement(tree, tree);
+                tree->ChangeOperConst(GT_CNS_DBL);
+                tree->gtDblCon.gtDconVal = value;
+                tree->gtVNPair           = ValueNumPair(vnLib, vnCns);
+            }
+            break;
+        }
+
+        case TYP_LONG:
+        {
+            INT64 value = vnStore->ConstantValue<INT64>(vnCns);
+#ifdef _TARGET_64BIT_
+            if (vnStore->IsVNHandle(vnCns))
+            {
+#ifdef RELOC_SUPPORT
+                // Don't perform constant folding that involves a handle that needs
+                // to be recorded as a relocation with the VM.
+                if (!opts.compReloc)
+#endif
+                {
+                    newTree           = gtNewIconHandleNode(value, vnStore->GetHandleFlags(vnCns));
+                    newTree->gtVNPair = ValueNumPair(vnLib, vnCns);
+                    newTree           = optPrepareTreeForReplacement(tree, newTree);
+                }
+            }
+            else
+#endif
+            {
+                switch (tree->TypeGet())
+                {
+                    case TYP_INT:
+                        // Implicit assignment conversion to smaller integer
+                        newTree = optPrepareTreeForReplacement(tree, tree);
+                        tree->ChangeOperConst(GT_CNS_INT);
+                        tree->gtIntCon.gtIconVal = (int)value;
+                        tree->gtVNPair           = ValueNumPair(vnLib, vnCns);
+                        break;
+
+                    case TYP_LONG:
+                        // Same type no conversion required
+                        newTree = optPrepareTreeForReplacement(tree, tree);
+                        tree->ChangeOperConst(GT_CNS_NATIVELONG);
+                        tree->gtIntConCommon.SetLngValue(value);
+                        tree->gtVNPair = ValueNumPair(vnLib, vnCns);
+                        break;
+
+                    case TYP_FLOAT:
+                        // No implicit conversions from long to float and value numbering will
+                        // not propagate through memory reinterpretations of different size.
+                        unreached();
+                        break;
+
+                    case TYP_DOUBLE:
+                        // Same sized reinterpretation of bits to double
+                        newTree = optPrepareTreeForReplacement(tree, tree);
+                        tree->ChangeOperConst(GT_CNS_DBL);
+                        tree->gtDblCon.gtDconVal = *(reinterpret_cast<double*>(&value));
+                        tree->gtVNPair           = ValueNumPair(vnLib, vnCns);
+                        break;
+
+                    default:
+                        return nullptr;
+                }
+            }
+        }
+        break;
+
+        case TYP_REF:
+            if (tree->TypeGet() != TYP_REF)
+            {
+                return nullptr;
+            }
+
+            assert(vnStore->ConstantValue<size_t>(vnCns) == 0);
+            newTree = optPrepareTreeForReplacement(tree, tree);
+            tree->ChangeOperConst(GT_CNS_INT);
+            tree->gtIntCon.gtIconVal = 0;
+            tree->ClearIconHandleMask();
+            tree->gtVNPair = ValueNumPair(vnLib, vnCns);
+            break;
+
+        case TYP_INT:
+        {
+            int value = vnStore->ConstantValue<int>(vnCns);
+#ifndef _TARGET_64BIT_
+            if (vnStore->IsVNHandle(vnCns))
+            {
+#ifdef RELOC_SUPPORT
+                // Don't perform constant folding that involves a handle that needs
+                // to be recorded as a relocation with the VM.
+                if (!opts.compReloc)
+#endif
+                {
+                    newTree           = gtNewIconHandleNode(value, vnStore->GetHandleFlags(vnCns));
+                    newTree->gtVNPair = ValueNumPair(vnLib, vnCns);
+                    newTree           = optPrepareTreeForReplacement(tree, newTree);
+                }
+            }
+            else
+#endif
+            {
+                switch (tree->TypeGet())
+                {
+                    case TYP_REF:
+                    case TYP_INT:
+                        // Same type no conversion required
+                        newTree = optPrepareTreeForReplacement(tree, tree);
+                        tree->ChangeOperConst(GT_CNS_INT);
+                        tree->gtIntCon.gtIconVal = value;
+                        tree->ClearIconHandleMask();
+                        tree->gtVNPair = ValueNumPair(vnLib, vnCns);
+                        break;
+
+                    case TYP_LONG:
+                        // Implicit assignment conversion to larger integer
+                        newTree = optPrepareTreeForReplacement(tree, tree);
+                        tree->ChangeOperConst(GT_CNS_NATIVELONG);
+                        tree->gtIntConCommon.SetLngValue(value);
+                        tree->gtVNPair = ValueNumPair(vnLib, vnCns);
+                        break;
+
+                    case TYP_FLOAT:
+                        // Same sized reinterpretation of bits to float
+                        newTree = optPrepareTreeForReplacement(tree, tree);
+                        tree->ChangeOperConst(GT_CNS_DBL);
+                        tree->gtDblCon.gtDconVal = *(reinterpret_cast<float*>(&value));
+                        tree->gtVNPair           = ValueNumPair(vnLib, vnCns);
+                        break;
+
+                    case TYP_DOUBLE:
+                        // No implicit conversions from int to double and value numbering will
+                        // not propagate through memory reinterpretations of different size.
+                        unreached();
+                        break;
+
+                    default:
+                        return nullptr;
+                }
+            }
+        }
+        break;
+
+        default:
+            return nullptr;
+    }
+    return newTree;
+}
+
+/*******************************************************************************************************
+ *
+ * Perform constant propagation on a tree given the "curAssertion" is true at the point of the "tree."
+ *
+ */
+GenTreePtr Compiler::optConstantAssertionProp(AssertionDsc* curAssertion,
+                                              GenTreePtr    tree,
+                                              GenTreePtr stmt DEBUGARG(AssertionIndex index))
+{
+    unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
+
+    if (lclNumIsCSE(lclNum))
+    {
+        return nullptr;
+    }
+
+    GenTreePtr newTree = tree;
+
+    // Update 'newTree' with the new value from our table
+    // Typically newTree == tree and we are updating the node in place
+    switch (curAssertion->op2.kind)
+    {
+        case O2K_CONST_DOUBLE:
+            // There could be a positive zero and a negative zero, so don't propagate zeroes.
+            if (curAssertion->op2.dconVal == 0.0)
+            {
+                return nullptr;
+            }
+            newTree->ChangeOperConst(GT_CNS_DBL);
+            newTree->gtDblCon.gtDconVal = curAssertion->op2.dconVal;
+            break;
+
+        case O2K_CONST_LONG:
+            if (newTree->gtType == TYP_LONG)
+            {
+                newTree->ChangeOperConst(GT_CNS_NATIVELONG);
+                newTree->gtIntConCommon.SetLngValue(curAssertion->op2.lconVal);
+            }
+            else
+            {
+                newTree->ChangeOperConst(GT_CNS_INT);
+                newTree->gtIntCon.gtIconVal = (int)curAssertion->op2.lconVal;
+                newTree->gtType             = TYP_INT;
+            }
+            break;
+
+        case O2K_CONST_INT:
+            if (curAssertion->op2.u1.iconFlags & GTF_ICON_HDL_MASK)
+            {
+                // Here we have to allocate a new 'large' node to replace the old one
+                newTree = gtNewIconHandleNode(curAssertion->op2.u1.iconVal,
+                                              curAssertion->op2.u1.iconFlags & GTF_ICON_HDL_MASK);
+            }
+            else
+            {
+                bool isArrIndex = ((tree->gtFlags & GTF_VAR_ARR_INDEX) != 0);
+                newTree->ChangeOperConst(GT_CNS_INT);
+                newTree->gtIntCon.gtIconVal = curAssertion->op2.u1.iconVal;
+                newTree->ClearIconHandleMask();
+                // If we're doing an array index address, assume any constant propagated contributes to the index.
+                if (isArrIndex)
+                {
+                    newTree->gtIntCon.gtFieldSeq =
+                        GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
+                }
+                newTree->gtFlags &= ~GTF_VAR_ARR_INDEX;
+            }
+
+            // Constant ints are of type TYP_INT, not any of the short forms.
+            if (varTypeIsIntegral(newTree->TypeGet()))
+            {
+#ifdef _TARGET_64BIT_
+                var_types newType = (var_types)((curAssertion->op2.u1.iconFlags & 1) ? TYP_LONG : TYP_INT);
+                if (newTree->TypeGet() != newType)
+                {
+                    noway_assert(newTree->gtType != TYP_REF);
+                    newTree->gtType = newType;
+                }
+#else
+                if (newTree->TypeGet() != TYP_INT)
+                {
+                    noway_assert(newTree->gtType != TYP_REF && newTree->gtType != TYP_LONG);
+                    newTree->gtType = TYP_INT;
+                }
+#endif
+            }
+            break;
+
+        default:
+            return nullptr;
+    }
+
+    if (!optLocalAssertionProp)
+    {
+        assert(newTree->OperIsConst());                      // We should have a simple Constant node for newTree
+        assert(vnStore->IsVNConstant(curAssertion->op2.vn)); // The value number stored for op2 should be a valid
+                                                             // VN representing the constant
+        newTree->gtVNPair.SetBoth(curAssertion->op2.vn);     // Set the ValueNumPair to the constant VN from op2
+                                                             // of the assertion
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nAssertion prop in BB%02u:\n", compCurBB->bbNum);
+        optPrintAssertion(curAssertion, index);
+        gtDispTree(newTree, nullptr, nullptr, true);
+    }
+#endif
+    if (lvaLocalVarRefCounted)
+    {
+        lvaTable[lclNum].decRefCnts(compCurBB->getBBWeight(this), this);
+    }
+
+    return optAssertionProp_Update(newTree, tree, stmt);
+}
+
+/*******************************************************************************************************
+ *
+ *  Called in the context of an existing copy assertion which makes an "==" assertion on "lclVar" and
+ *  "copyVar." Before substituting "copyVar" for "lclVar", we make sure using "copy" doesn't widen access.
+ *
+ */
+bool Compiler::optAssertionProp_LclVarTypeCheck(GenTreePtr tree, LclVarDsc* lclVarDsc, LclVarDsc* copyVarDsc)
+{
+    /*
+        Small struct field locals are stored using the exact width and loaded widened
+        (i.e. lvNormalizeOnStore==false   lvNormalizeOnLoad==true),
+        because the field locals might end up embedded in the parent struct local with the exact width.
+
+            In other words, a store to a short field local should always done using an exact width store
+
+                [00254538] 0x0009 ------------               const     int    0x1234
+            [002545B8] 0x000B -A--G--NR---               =         short
+                [00254570] 0x000A D------N----               lclVar    short  V43 tmp40
+
+            mov   word  ptr [L_043], 0x1234
+
+        Now, if we copy prop, say a short field local V43, to another short local V34
+        for the following tree:
+
+                [04E18650] 0x0001 ------------               lclVar    int   V34 tmp31
+            [04E19714] 0x0002 -A----------               =         int
+                [04E196DC] 0x0001 D------N----               lclVar    int   V36 tmp33
+
+        We will end with this tree:
+
+                [04E18650] 0x0001 ------------               lclVar    int   V43 tmp40
+            [04E19714] 0x0002 -A-----NR---               =         int
+                [04E196DC] 0x0001 D------N----               lclVar    int   V36 tmp33    EAX
+
+        And eventually causing a fetch of 4-byte out from [L_043] :(
+            mov     EAX, dword ptr [L_043]
+
+        The following check is to make sure we only perform the copy prop
+        when we don't retrieve the wider value.
+    */
+
+    if (copyVarDsc->lvIsStructField)
+    {
+        var_types varType = (var_types)copyVarDsc->lvType;
+        // Make sure we don't retrieve the wider value.
+        return !varTypeIsSmall(varType) || (varType == tree->TypeGet());
+    }
+    // Called in the context of a single copy assertion, so the types should have been
+    // taken care by the assertion gen logic for other cases. Just return true.
+    return true;
+}
+
+/**********************************************************************************
+ *
+ *  Perform copy assertion propagation when the lclNum and ssaNum of the "tree" match
+ *  the "curAssertion."
+ *
+ */
+GenTreePtr Compiler::optCopyAssertionProp(AssertionDsc* curAssertion,
+                                          GenTreePtr    tree,
+                                          GenTreePtr stmt DEBUGARG(AssertionIndex index))
+{
+    const AssertionDsc::AssertionDscOp1& op1 = curAssertion->op1;
+    const AssertionDsc::AssertionDscOp2& op2 = curAssertion->op2;
+
+    noway_assert(op1.lcl.lclNum != op2.lcl.lclNum);
+
+    unsigned lclNum = tree->gtLclVarCommon.GetLclNum();
+
+    // Make sure one of the lclNum of the assertion matches with that of the tree.
+    if (op1.lcl.lclNum != lclNum && op2.lcl.lclNum != lclNum)
+    {
+        return nullptr;
+    }
+
+    // Extract the matching lclNum and ssaNum.
+    unsigned copyLclNum = (op1.lcl.lclNum == lclNum) ? op2.lcl.lclNum : op1.lcl.lclNum;
+    unsigned copySsaNum = BAD_VAR_NUM;
+    if (!optLocalAssertionProp)
+    {
+        // Extract the ssaNum of the matching lclNum.
+        unsigned ssaNum = (op1.lcl.lclNum == lclNum) ? op1.lcl.ssaNum : op2.lcl.ssaNum;
+        copySsaNum      = (op1.lcl.lclNum == lclNum) ? op2.lcl.ssaNum : op1.lcl.ssaNum;
+
+        if (ssaNum != tree->AsLclVarCommon()->GetSsaNum())
+        {
+            return nullptr;
+        }
+    }
+
+    LclVarDsc* copyVarDsc = &lvaTable[copyLclNum];
+    LclVarDsc* lclVarDsc  = &lvaTable[lclNum];
+
+    // Make sure the types are compatible.
+    if (!optAssertionProp_LclVarTypeCheck(tree, lclVarDsc, copyVarDsc))
+    {
+        return nullptr;
+    }
+
+    // Make sure we can perform this copy prop.
+    if (optCopyProp_LclVarScore(lclVarDsc, copyVarDsc, curAssertion->op1.lcl.lclNum == lclNum) <= 0)
+    {
+        return nullptr;
+    }
+
+    // If global assertion prop, by now we should have ref counts, fix them.
+    if (lvaLocalVarRefCounted)
+    {
+        lvaTable[lclNum].decRefCnts(compCurBB->getBBWeight(this), this);
+        lvaTable[copyLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
+        tree->gtLclVarCommon.SetSsaNum(copySsaNum);
+    }
+    tree->gtLclVarCommon.SetLclNum(copyLclNum);
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nAssertion prop in BB%02u:\n", compCurBB->bbNum);
+        optPrintAssertion(curAssertion, index);
+        gtDispTree(tree, nullptr, nullptr, true);
+    }
+#endif
+
+    // Update and morph the tree.
+    return optAssertionProp_Update(tree, tree, stmt);
+}
+
+/*****************************************************************************
+ *
+ *  Given a tree consisting of a just a LclVar and a set of available assertions
+ *  we try to propagate an assertion and modify the LclVar tree if we can.
+ *  We pass in the root of the tree via 'stmt', for local copy prop 'stmt' will
+ *  be nullptr. Returns the modified tree, or nullptr if no assertion prop took place.
+ */
+
+GenTreePtr Compiler::optAssertionProp_LclVar(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt)
+{
+    assert(tree->gtOper == GT_LCL_VAR);
+    // If we have a var definition then bail or
+    // If this is the address of the var then it will have the GTF_DONT_CSE
+    // flag set and we don't want to to assertion prop on it.
+    if (tree->gtFlags & (GTF_VAR_DEF | GTF_DONT_CSE))
+    {
+        return nullptr;
+    }
+
+    BitVecOps::Iter iter(apTraits, assertions);
+    unsigned        index = 0;
+    while (iter.NextElem(apTraits, &index))
+    {
+        index++;
+        if (index > optAssertionCount)
+        {
+            break;
+        }
+        // See if the variable is equal to a constant or another variable.
+        AssertionDsc* curAssertion = optGetAssertion((AssertionIndex)index);
+        if (curAssertion->assertionKind != OAK_EQUAL || curAssertion->op1.kind != O1K_LCLVAR)
+        {
+            continue;
+        }
+
+        // Copy prop.
+        if (curAssertion->op2.kind == O2K_LCLVAR_COPY)
+        {
+            // Cannot do copy prop during global assertion prop because of no knowledge
+            // of kill sets. We will still make a == b copy assertions during the global phase to allow
+            // for any implied assertions that can be retrieved. Because implied assertions look for
+            // matching SSA numbers (i.e., if a0 == b1 and b1 == c0 then a0 == c0) they don't need kill sets.
+            if (optLocalAssertionProp)
+            {
+                // Perform copy assertion prop.
+                GenTreePtr newTree = optCopyAssertionProp(curAssertion, tree, stmt DEBUGARG((AssertionIndex)index));
+                if (newTree == nullptr)
+                {
+                    // Skip and try next assertion.
+                    continue;
+                }
+                return newTree;
+            }
+        }
+        // Constant prop (for local assertion prop.)
+        // The case where the tree type could be different than the LclVar type is caused by
+        // gtFoldExpr, specifically the case of a cast, where the fold operation changes the type of the LclVar
+        // node.  In such a case is not safe to perform the substitution since later on the JIT will assert mismatching
+        // types between trees.
+        else if (curAssertion->op1.lcl.lclNum == tree->gtLclVarCommon.GetLclNum() &&
+                 tree->gtType == lvaTable[tree->gtLclVarCommon.GetLclNum()].lvType)
+        {
+            // If local assertion prop just, perform constant prop.
+            if (optLocalAssertionProp)
+            {
+                return optConstantAssertionProp(curAssertion, tree, stmt DEBUGARG((AssertionIndex)index));
+            }
+            // If global assertion, perform constant propagation only if the VN's match and the lcl is non-CSE.
+            else if (curAssertion->op1.vn == tree->gtVNPair.GetConservative())
+            {
+#if FEATURE_ANYCSE
+                // Don't perform constant prop for CSE LclVars
+                if (!lclNumIsCSE(tree->AsLclVarCommon()->GetLclNum()))
+#endif
+                {
+                    return optConstantAssertionProp(curAssertion, tree, stmt DEBUGARG((AssertionIndex)index));
+                }
+            }
+        }
+    }
+    return nullptr;
+}
+
+/*****************************************************************************
+ *
+ *  Given a set of "assertions" to search, find an assertion that matches
+ *  op1Kind and lclNum, op2Kind and the constant value and is either equal or
+ *  not equal assertion.
+ */
+Compiler::AssertionIndex Compiler::optLocalAssertionIsEqualOrNotEqual(
+    optOp1Kind op1Kind, unsigned lclNum, optOp2Kind op2Kind, ssize_t cnsVal, ASSERT_VALARG_TP assertions)
+{
+    noway_assert((op1Kind == O1K_LCLVAR) || (op1Kind == O1K_EXACT_TYPE) || (op1Kind == O1K_SUBTYPE));
+    noway_assert((op2Kind == O2K_CONST_INT) || (op2Kind == O2K_IND_CNS_INT));
+    if (!optLocalAssertionProp && BitVecOps::IsEmpty(apTraits, assertions))
+    {
+        return NO_ASSERTION_INDEX;
+    }
+
+    for (AssertionIndex index = 1; index <= optAssertionCount; ++index)
+    {
+        AssertionDsc* curAssertion = optGetAssertion(index);
+        if (optLocalAssertionProp || BitVecOps::IsMember(apTraits, assertions, index - 1))
+        {
+            if ((curAssertion->assertionKind != OAK_EQUAL) && (curAssertion->assertionKind != OAK_NOT_EQUAL))
+            {
+                continue;
+            }
+
+            if ((curAssertion->op1.kind == op1Kind) && (curAssertion->op1.lcl.lclNum == lclNum) &&
+                (curAssertion->op2.kind == op2Kind))
+            {
+                bool constantIsEqual  = (curAssertion->op2.u1.iconVal == cnsVal);
+                bool assertionIsEqual = (curAssertion->assertionKind == OAK_EQUAL);
+
+                if (constantIsEqual || assertionIsEqual)
+                {
+                    return index;
+                }
+            }
+        }
+    }
+    return NO_ASSERTION_INDEX;
+}
+
+/*****************************************************************************
+ *
+ *  Given a set of "assertions" to search for, find an assertion that is either
+ *  "op1" == "op2" or "op1" != "op2." Does a value number based comparison.
+ *
+ */
+Compiler::AssertionIndex Compiler::optGlobalAssertionIsEqualOrNotEqual(ASSERT_VALARG_TP assertions,
+                                                                       GenTreePtr       op1,
+                                                                       GenTreePtr       op2)
+{
+    if (BitVecOps::IsEmpty(apTraits, assertions))
+    {
+        return NO_ASSERTION_INDEX;
+    }
+    BitVecOps::Iter iter(apTraits, assertions);
+    unsigned        index = 0;
+    while (iter.NextElem(apTraits, &index))
+    {
+        index++;
+        if (index > optAssertionCount)
+        {
+            break;
+        }
+        AssertionDsc* curAssertion = optGetAssertion((AssertionIndex)index);
+        if ((curAssertion->assertionKind != OAK_EQUAL && curAssertion->assertionKind != OAK_NOT_EQUAL))
+        {
+            continue;
+        }
+
+        if (curAssertion->op1.vn == op1->gtVNPair.GetConservative() &&
+            curAssertion->op2.vn == op2->gtVNPair.GetConservative())
+        {
+            return (AssertionIndex)index;
+        }
+    }
+    return NO_ASSERTION_INDEX;
+}
+
+/*****************************************************************************
+ *
+ *  Given a tree consisting of a RelOp and a set of available assertions
+ *  we try to propagate an assertion and modify the RelOp tree if we can.
+ *  We pass in the root of the tree via 'stmt', for local copy prop 'stmt' will be nullptr
+ *  Returns the modified tree, or nullptr if no assertion prop took place
+ */
+
+GenTreePtr Compiler::optAssertionProp_RelOp(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt)
+{
+    assert(tree->OperKind() & GTK_RELOP);
+
+    //
+    // Currently only GT_EQ or GT_NE are supported Relops for AssertionProp
+    //
+    if ((tree->gtOper != GT_EQ) && (tree->gtOper != GT_NE))
+    {
+        return nullptr;
+    }
+
+    if (!optLocalAssertionProp)
+    {
+        // If global assertion prop then use value numbering.
+        return optAssertionPropGlobal_RelOp(assertions, tree, stmt);
+    }
+    else
+    {
+        // If local assertion prop then use variable based prop.
+        return optAssertionPropLocal_RelOp(assertions, tree, stmt);
+    }
+}
+
+/*************************************************************************************
+ *
+ *  Given the set of "assertions" to look up a relop assertion about the relop "tree",
+ *  perform Value numbering based relop assertion propagation on the tree.
+ *
+ */
+GenTreePtr Compiler::optAssertionPropGlobal_RelOp(ASSERT_VALARG_TP assertions,
+                                                  const GenTreePtr tree,
+                                                  const GenTreePtr stmt)
+{
+    assert(tree->OperGet() == GT_EQ || tree->OperGet() == GT_NE);
+
+    GenTreePtr newTree = tree;
+    GenTreePtr op1     = tree->gtOp.gtOp1;
+    GenTreePtr op2     = tree->gtOp.gtOp2;
+
+    if (op1->gtOper != GT_LCL_VAR)
+    {
+        return nullptr;
+    }
+
+    // Find an equal or not equal assertion involving "op1" and "op2".
+    AssertionIndex index = optGlobalAssertionIsEqualOrNotEqual(assertions, op1, op2);
+    if (index == NO_ASSERTION_INDEX)
+    {
+        return nullptr;
+    }
+
+    AssertionDsc* curAssertion = optGetAssertion(index);
+
+    // Allow or not to reverse condition for OAK_NOT_EQUAL assertions.
+    bool allowReverse = true;
+
+    // If the assertion involves "op2" and it is a constant, then check if "op1" also has a constant value.
+    if (vnStore->IsVNConstant(op2->gtVNPair.GetConservative()))
+    {
+        ValueNum vnCns = op2->gtVNPair.GetConservative();
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\nVN relop based constant assertion prop in BB%02u:\n", compCurBB->bbNum);
+            printf("Assertion index=#%02u: ", index);
+            printTreeID(op1);
+            printf(" %s ", (curAssertion->assertionKind == OAK_EQUAL) ? "==" : "!=");
+            if (genActualType(op1->TypeGet()) == TYP_INT)
+            {
+                printf("%d\n", vnStore->ConstantValue<int>(vnCns));
+            }
+            else if (op1->TypeGet() == TYP_LONG)
+            {
+                printf("%I64d\n", vnStore->ConstantValue<INT64>(vnCns));
+            }
+            else if (op1->TypeGet() == TYP_DOUBLE)
+            {
+                printf("%f\n", vnStore->ConstantValue<double>(vnCns));
+            }
+            else if (op1->TypeGet() == TYP_FLOAT)
+            {
+                printf("%f\n", vnStore->ConstantValue<float>(vnCns));
+            }
+            else if (op1->TypeGet() == TYP_REF)
+            {
+                // The only constant of TYP_REF that ValueNumbering supports is 'null'
+                assert(vnStore->ConstantValue<size_t>(vnCns) == 0);
+                printf("null\n");
+            }
+            else
+            {
+                printf("??unknown\n");
+            }
+            gtDispTree(tree, nullptr, nullptr, true);
+        }
+#endif
+        // Decrement the ref counts, before we change the oper.
+        lvaTable[op1->gtLclVar.gtLclNum].decRefCnts(compCurBB->getBBWeight(this), this);
+
+        // Change the oper to const.
+        if (genActualType(op1->TypeGet()) == TYP_INT)
+        {
+            op1->ChangeOperConst(GT_CNS_INT);
+            op1->gtIntCon.gtIconVal = vnStore->ConstantValue<int>(vnCns);
+        }
+        else if (op1->TypeGet() == TYP_LONG)
+        {
+            op1->ChangeOperConst(GT_CNS_NATIVELONG);
+            op1->gtIntConCommon.SetLngValue(vnStore->ConstantValue<INT64>(vnCns));
+        }
+        else if (op1->TypeGet() == TYP_DOUBLE)
+        {
+            double constant = vnStore->ConstantValue<double>(vnCns);
+            op1->ChangeOperConst(GT_CNS_DBL);
+            op1->gtDblCon.gtDconVal = constant;
+
+            // Nothing can be equal to NaN. So if IL had "op1 == NaN", then we already made op1 NaN,
+            // which will yield a false correctly. Instead if IL had "op1 != NaN", then we already
+            // made op1 NaN which will yield a true correctly. Note that this is irrespective of the
+            // assertion we have made.
+            allowReverse = (_isnan(constant) == 0);
+        }
+        else if (op1->TypeGet() == TYP_FLOAT)
+        {
+            float constant = vnStore->ConstantValue<float>(vnCns);
+            op1->ChangeOperConst(GT_CNS_DBL);
+            op1->gtDblCon.gtDconVal = constant;
+            // See comments for TYP_DOUBLE.
+            allowReverse = (_isnan(constant) == 0);
+        }
+        else if (op1->TypeGet() == TYP_REF)
+        {
+            op1->ChangeOperConst(GT_CNS_INT);
+            // The only constant of TYP_REF that ValueNumbering supports is 'null'
+            noway_assert(vnStore->ConstantValue<size_t>(vnCns) == 0);
+            op1->gtIntCon.gtIconVal = 0;
+        }
+        else
+        {
+            noway_assert(!"unknown type in Global_RelOp");
+        }
+
+        op1->gtVNPair.SetBoth(vnCns); // Preserve the ValueNumPair, as ChangeOperConst/SetOper will clear it.
+    }
+    // If the assertion involves "op2" and "op1" is also a local var, then just morph the tree.
+    else if (op2->gtOper == GT_LCL_VAR)
+    {
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\nVN relop based copy assertion prop in BB%02u:\n", compCurBB->bbNum);
+            printf("Assertion index=#%02u: V%02d.%02d %s V%02d.%02d\n", index, op1->gtLclVar.gtLclNum,
+                   op1->gtLclVar.gtSsaNum, (curAssertion->assertionKind == OAK_EQUAL) ? "==" : "!=",
+                   op2->gtLclVar.gtLclNum, op2->gtLclVar.gtSsaNum);
+            gtDispTree(tree, nullptr, nullptr, true);
+        }
+#endif
+        lvaTable[op1->gtLclVar.gtLclNum].decRefCnts(compCurBB->getBBWeight(this), this);
+
+        // If floating point, don't just substitute op1 with op2, this won't work if
+        // op2 is NaN. Just turn it into a "true" or "false" yielding expression.
+        if (op1->TypeGet() == TYP_DOUBLE || op1->TypeGet() == TYP_FLOAT)
+        {
+            // Note we can't trust the OAK_EQUAL as the value could end up being a NaN
+            // violating the assertion. However, we create OAK_EQUAL assertions for floating
+            // point only on JTrue nodes, so if the condition held earlier, it will hold
+            // now. We don't create OAK_EQUAL assertion on floating point from GT_ASG
+            // because we depend on value num which would constant prop the NaN.
+            lvaTable[op2->gtLclVar.gtLclNum].decRefCnts(compCurBB->getBBWeight(this), this);
+            op1->ChangeOperConst(GT_CNS_DBL);
+            op1->gtDblCon.gtDconVal = 0;
+            op2->ChangeOperConst(GT_CNS_DBL);
+            op2->gtDblCon.gtDconVal = 0;
+        }
+        // Change the op1 LclVar to the op2 LclVar
+        else
+        {
+            noway_assert(varTypeIsIntegralOrI(op1->TypeGet()));
+            lvaTable[op2->gtLclVar.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
+            op1->AsLclVarCommon()->SetLclNum(op2->AsLclVarCommon()->GetLclNum());
+            op1->AsLclVarCommon()->SetSsaNum(op2->AsLclVarCommon()->GetSsaNum());
+        }
+    }
+    else
+    {
+        return nullptr;
+    }
+
+    // Finally reverse the condition, if we have a not equal assertion.
+    if (allowReverse && curAssertion->assertionKind == OAK_NOT_EQUAL)
+    {
+        gtReverseCond(tree);
+    }
+
+    newTree = fgMorphTree(tree);
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        gtDispTree(newTree, nullptr, nullptr, true);
+    }
+#endif
+
+    return optAssertionProp_Update(newTree, tree, stmt);
+}
+
+/*************************************************************************************
+ *
+ *  Given the set of "assertions" to look up a relop assertion about the relop "tree",
+ *  perform local variable name based relop assertion propagation on the tree.
+ *
+ */
+GenTreePtr Compiler::optAssertionPropLocal_RelOp(ASSERT_VALARG_TP assertions,
+                                                 const GenTreePtr tree,
+                                                 const GenTreePtr stmt)
+{
+    assert(tree->OperGet() == GT_EQ || tree->OperGet() == GT_NE);
+
+    GenTreePtr op1 = tree->gtOp.gtOp1;
+    GenTreePtr op2 = tree->gtOp.gtOp2;
+
+    // For Local AssertionProp we only can fold when op1 is a GT_LCL_VAR
+    if (op1->gtOper != GT_LCL_VAR)
+    {
+        return nullptr;
+    }
+
+    // For Local AssertionProp we only can fold when op2 is a GT_CNS_INT
+    if (op2->gtOper != GT_CNS_INT)
+    {
+        return nullptr;
+    }
+
+    optOp1Kind op1Kind = O1K_LCLVAR;
+    optOp2Kind op2Kind = O2K_CONST_INT;
+    ssize_t    cnsVal  = op2->gtIntCon.gtIconVal;
+    var_types  cmpType = op1->TypeGet();
+
+    // Don't try to fold/optimize Floating Compares; there are multiple zero values.
+    if (varTypeIsFloating(cmpType))
+    {
+        return nullptr;
+    }
+
+    // Find an equal or not equal assertion about op1 var.
+    unsigned lclNum = op1->gtLclVarCommon.gtLclNum;
+    noway_assert(lclNum < lvaCount);
+    AssertionIndex index = optLocalAssertionIsEqualOrNotEqual(op1Kind, lclNum, op2Kind, cnsVal, assertions);
+
+    if (index == NO_ASSERTION_INDEX)
+    {
+        return nullptr;
+    }
+
+    AssertionDsc* curAssertion = optGetAssertion(index);
+
+    bool assertionKindIsEqual = (curAssertion->assertionKind == OAK_EQUAL);
+    bool constantIsEqual      = false;
+
+    if (genTypeSize(cmpType) == TARGET_POINTER_SIZE)
+    {
+        constantIsEqual = (curAssertion->op2.u1.iconVal == cnsVal);
+    }
+#ifdef _TARGET_64BIT_
+    else if (genTypeSize(cmpType) == sizeof(INT32))
+    {
+        // Compare the low 32-bits only
+        constantIsEqual = (((INT32)curAssertion->op2.u1.iconVal) == ((INT32)cnsVal));
+    }
+#endif
+    else
+    {
+        // We currently don't fold/optimze when the GT_LCL_VAR has been cast to a small type
+        return nullptr;
+    }
+
+    noway_assert(constantIsEqual || assertionKindIsEqual);
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nAssertion prop for index #%02u in BB%02u:\n", index, compCurBB->bbNum);
+        gtDispTree(tree, nullptr, nullptr, true);
+    }
+#endif
+
+    // Return either CNS_INT 0 or CNS_INT 1.
+    bool foldResult = (constantIsEqual == assertionKindIsEqual);
+    if (tree->gtOper == GT_NE)
+    {
+        foldResult = !foldResult;
+    }
+
+    op2->gtIntCon.gtIconVal = foldResult;
+    op2->gtType             = TYP_INT;
+
+    return optAssertionProp_Update(op2, tree, stmt);
+}
+
+/*****************************************************************************
+ *
+ *  Given a tree consisting of a Cast and a set of available assertions
+ *  we try to propagate an assertion and modify the Cast tree if we can.
+ *  We pass in the root of the tree via 'stmt', for local copy prop 'stmt'
+ *  will be nullptr.
+ *
+ *  Returns the modified tree, or nullptr if no assertion prop took place.
+ */
+GenTreePtr Compiler::optAssertionProp_Cast(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt)
+{
+    assert(tree->gtOper == GT_CAST);
+
+    var_types  toType = tree->gtCast.gtCastType;
+    GenTreePtr op1    = tree->gtCast.CastOp();
+
+    // If we have a cast involving floating point types, then bail.
+    if (varTypeIsFloating(toType) || varTypeIsFloating(op1->TypeGet()))
+    {
+        return nullptr;
+    }
+
+    // Skip over a GT_COMMA node(s), if necessary to get to the lcl.
+    GenTreePtr lcl = op1;
+    while (lcl->gtOper == GT_COMMA)
+    {
+        lcl = lcl->gtOp.gtOp2;
+    }
+
+    // If we don't have a cast of a LCL_VAR then bail.
+    if (lcl->gtOper != GT_LCL_VAR)
+    {
+        return nullptr;
+    }
+
+    unsigned index = optAssertionIsSubrange(lcl, toType, assertions);
+    if (index != NO_ASSERTION_INDEX)
+    {
+        LclVarDsc* varDsc = &lvaTable[lcl->gtLclVarCommon.gtLclNum];
+        if (varDsc->lvNormalizeOnLoad() || varTypeIsLong(varDsc->TypeGet()))
+        {
+            // For normalize on load variables it must be a narrowing cast to remove
+            if (genTypeSize(toType) > genTypeSize(varDsc->TypeGet()))
+            {
+                // Can we just remove the GTF_OVERFLOW flag?
+                if ((tree->gtFlags & GTF_OVERFLOW) == 0)
+                {
+                    return nullptr;
+                }
+                else
+                {
+
+#ifdef DEBUG
+                    if (verbose)
+                    {
+                        printf("\nSubrange prop for index #%02u in BB%02u:\n", index, compCurBB->bbNum);
+                        gtDispTree(tree, nullptr, nullptr, true);
+                    }
+#endif
+                    tree->gtFlags &= ~GTF_OVERFLOW; // This cast cannot overflow
+                    return optAssertionProp_Update(tree, tree, stmt);
+                }
+            }
+
+            //             GT_CAST   long -> uint -> int
+            //                |
+            //           GT_LCL_VAR long
+            //
+            // Where the lclvar is known to be in the range of [0..MAX_UINT]
+            //
+            // A load of a 32-bit unsigned int is the same as a load of a 32-bit signed int
+            //
+            if (toType == TYP_UINT)
+            {
+                toType = TYP_INT;
+            }
+
+            // Change the "lcl" type to match what the cast wanted, by propagating the type
+            // change down the comma nodes leading to the "lcl", if we skipped them earlier.
+            GenTreePtr tmp = op1;
+            while (tmp->gtOper == GT_COMMA)
+            {
+                tmp->gtType = toType;
+                tmp         = tmp->gtOp.gtOp2;
+            }
+            noway_assert(tmp == lcl);
+            tmp->gtType = toType;
+        }
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\nSubrange prop for index #%02u in BB%02u:\n", index, compCurBB->bbNum);
+            gtDispTree(tree, nullptr, nullptr, true);
+        }
+#endif
+        return optAssertionProp_Update(op1, tree, stmt);
+    }
+    return nullptr;
+}
+
+/*****************************************************************************
+ *
+ *  Given a tree with an array bounds check node, eliminate it because it was
+ *  checked already in the program.
+ */
+GenTreePtr Compiler::optAssertionProp_Comma(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt)
+{
+    // Remove the bounds check as part of the GT_COMMA node since we need parent pointer to remove nodes.
+    // When processing visits the bounds check, it sets the throw kind to None if the check is redundant.
+    if ((tree->gtGetOp1()->OperGet() == GT_ARR_BOUNDS_CHECK) &&
+        ((tree->gtGetOp1()->gtFlags & GTF_ARR_BOUND_INBND) != 0))
+    {
+        optRemoveRangeCheck(tree, stmt, true, GTF_ASG, true /* force remove */);
+        return optAssertionProp_Update(tree, tree, stmt);
+    }
+    return nullptr;
+}
+
+/*****************************************************************************
+ *
+ *  Given a tree consisting of a Ind and a set of available assertions, we try
+ *  to propagate an assertion and modify the Ind tree if we can. We pass in the
+ *  root of the tree via 'stmt', for local copy prop 'stmt' will be nullptr.
+ *
+ *  Returns the modified tree, or nullptr if no assertion prop took place.
+ *
+ */
+
+GenTreePtr Compiler::optAssertionProp_Ind(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt)
+{
+    assert(tree->OperIsIndir());
+
+    // TODO-1stClassStructs: All indirections should be handled here, but
+    // previously, when these indirections were GT_OBJ, or implicit due to a block
+    // copy or init, they were not being handled.
+    if (tree->TypeGet() == TYP_STRUCT)
+    {
+        if (tree->OperIsBlk())
+        {
+            return nullptr;
+        }
+        else
+        {
+            GenTree* parent = tree->gtGetParent(nullptr);
+            if ((parent != nullptr) && parent->OperIsBlkOp())
+            {
+                return nullptr;
+            }
+        }
+    }
+
+    if (!(tree->gtFlags & GTF_EXCEPT))
+    {
+        return nullptr;
+    }
+
+    // Check for add of a constant.
+    GenTreePtr op1 = tree->gtOp.gtOp1;
+    if ((op1->gtOper == GT_ADD) && (op1->gtOp.gtOp2->gtOper == GT_CNS_INT))
+    {
+        op1 = op1->gtOp.gtOp1;
+    }
+
+    if (op1->gtOper != GT_LCL_VAR)
+    {
+        return nullptr;
+    }
+
+    unsigned lclNum = op1->gtLclVarCommon.gtLclNum;
+
+#ifdef DEBUG
+    bool           vnBased = false;
+    AssertionIndex index   = NO_ASSERTION_INDEX;
+#endif
+    if (optAssertionIsNonNull(op1, assertions DEBUGARG(&vnBased) DEBUGARG(&index)))
+    {
+#ifdef DEBUG
+        if (verbose)
+        {
+            (vnBased) ? printf("\nVN based non-null prop in BB%02u:\n", compCurBB->bbNum)
+                      : printf("\nNon-null prop for index #%02u in BB%02u:\n", index, compCurBB->bbNum);
+            gtDispTree(tree, nullptr, nullptr, true);
+        }
+#endif
+        tree->gtFlags &= ~GTF_EXCEPT;
+
+        // Set this flag to prevent reordering
+        tree->gtFlags |= GTF_ORDER_SIDEEFF;
+
+        return optAssertionProp_Update(tree, tree, stmt);
+    }
+
+    return nullptr;
+}
+
+/*****************************************************************************
+ *  Check if a non-null assertion can be made about the input operand "op"
+ *  from the set of "assertions," or implicitly from the value number on "op."
+ *
+ *  Sets "pVnBased" if the assertion is value number based. If no matching
+ *  assertions are found from the table, then returns "NO_ASSERTION_INDEX."
+ *
+ *  Note: If both VN and assertion table yield a matching assertion, "pVnBased"
+ *  is only set and the return value is "NO_ASSERTION_INDEX."
+ */
+bool Compiler::optAssertionIsNonNull(GenTreePtr       op,
+                                     ASSERT_VALARG_TP assertions DEBUGARG(bool* pVnBased)
+                                         DEBUGARG(AssertionIndex* pIndex))
+{
+    bool vnBased = (!optLocalAssertionProp && vnStore->IsKnownNonNull(op->gtVNPair.GetConservative()));
+#ifdef DEBUG
+    *pVnBased = vnBased;
+#endif
+
+    if (vnBased)
+    {
+#ifdef DEBUG
+        *pIndex = NO_ASSERTION_INDEX;
+#endif
+        return true;
+    }
+
+    AssertionIndex index = optAssertionIsNonNullInternal(op, assertions);
+#ifdef DEBUG
+    *pIndex = index;
+#endif
+    return index != NO_ASSERTION_INDEX;
+}
+
+/*****************************************************************************
+ *  Check if a non-null assertion can be made about the input operand "op"
+ *  from the set of "assertions."
+ *
+ */
+Compiler::AssertionIndex Compiler::optAssertionIsNonNullInternal(GenTreePtr op, ASSERT_VALARG_TP assertions)
+{
+    // If local assertion prop use lcl comparison, else use VN comparison.
+    if (!optLocalAssertionProp)
+    {
+        ValueNum vn = op->gtVNPair.GetConservative();
+
+        if (BitVecOps::IsEmpty(apTraits, assertions))
+        {
+            return NO_ASSERTION_INDEX;
+        }
+
+        // Check each assertion to find if we have a vn == or != null assertion.
+        BitVecOps::Iter iter(apTraits, assertions);
+        unsigned        index = 0;
+        while (iter.NextElem(apTraits, &index))
+        {
+            index++;
+            if (index > optAssertionCount)
+            {
+                break;
+            }
+            AssertionDsc* curAssertion = optGetAssertion((AssertionIndex)index);
+            if (curAssertion->assertionKind != OAK_NOT_EQUAL)
+            {
+                continue;
+            }
+            if (curAssertion->op1.vn != vn || curAssertion->op2.vn != ValueNumStore::VNForNull())
+            {
+                continue;
+            }
+            return (AssertionIndex)index;
+        }
+    }
+    else
+    {
+        unsigned lclNum = op->AsLclVarCommon()->GetLclNum();
+        // Check each assertion to find if we have a variable == or != null assertion.
+        for (AssertionIndex index = 1; index <= optAssertionCount; index++)
+        {
+            AssertionDsc* curAssertion = optGetAssertion(index);
+            if ((curAssertion->assertionKind == OAK_NOT_EQUAL) && // kind
+                (curAssertion->op1.kind == O1K_LCLVAR) &&         // op1
+                (curAssertion->op2.kind == O2K_CONST_INT) &&      // op2
+                (curAssertion->op1.lcl.lclNum == lclNum) && (curAssertion->op2.u1.iconVal == 0))
+            {
+                return index;
+            }
+        }
+    }
+    return NO_ASSERTION_INDEX;
+}
+/*****************************************************************************
+ *
+ *  Given a tree consisting of a call and a set of available assertions, we
+ *  try to propagate a non-null assertion and modify the Call tree if we can.
+ *  Returns the modified tree, or nullptr if no assertion prop took place.
+ *
+ */
+GenTreePtr Compiler::optNonNullAssertionProp_Call(ASSERT_VALARG_TP assertions,
+                                                  const GenTreePtr tree,
+                                                  const GenTreePtr stmt)
+{
+    assert(tree->gtOper == GT_CALL);
+    if ((tree->gtFlags & GTF_CALL_NULLCHECK) == 0)
+    {
+        return nullptr;
+    }
+    GenTreePtr op1 = gtGetThisArg(tree);
+    noway_assert(op1 != nullptr);
+    if (op1->gtOper != GT_LCL_VAR)
+    {
+        return nullptr;
+    }
+
+#ifdef DEBUG
+    bool           vnBased = false;
+    AssertionIndex index   = NO_ASSERTION_INDEX;
+#endif
+    if (optAssertionIsNonNull(op1, assertions DEBUGARG(&vnBased) DEBUGARG(&index)))
+    {
+#ifdef DEBUG
+        if (verbose)
+        {
+            (vnBased) ? printf("\nVN based non-null prop in BB%02u:\n", compCurBB->bbNum)
+                      : printf("\nNon-null prop for index #%02u in BB%02u:\n", index, compCurBB->bbNum);
+            gtDispTree(tree, nullptr, nullptr, true);
+        }
+#endif
+        tree->gtFlags &= ~GTF_CALL_NULLCHECK;
+        tree->gtFlags &= ~GTF_EXCEPT;
+        noway_assert(tree->gtFlags & GTF_SIDE_EFFECT);
+        return tree;
+    }
+    return nullptr;
+}
+
+/*****************************************************************************
+ *
+ *  Given a tree consisting of a call and a set of available assertions, we
+ *  try to propagate an assertion and modify the Call tree if we can. Our
+ *  current modifications are limited to removing the nullptrCHECK flag from
+ *  the call.
+ *  We pass in the root of the tree via 'stmt', for local copy prop 'stmt'
+ *  will be nullptr. Returns the modified tree, or nullptr if no assertion prop
+ *  took place.
+ *
+ */
+
+GenTreePtr Compiler::optAssertionProp_Call(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt)
+{
+    assert(tree->gtOper == GT_CALL);
+
+    if (optNonNullAssertionProp_Call(assertions, tree, stmt))
+    {
+        return optAssertionProp_Update(tree, tree, stmt);
+    }
+    else if (!optLocalAssertionProp && (tree->gtCall.gtCallType == CT_HELPER))
+    {
+        if (tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_ISINSTANCEOFINTERFACE) ||
+            tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_ISINSTANCEOFARRAY) ||
+            tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_ISINSTANCEOFCLASS) ||
+            tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_ISINSTANCEOFANY) ||
+            tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_CHKCASTINTERFACE) ||
+            tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_CHKCASTARRAY) ||
+            tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_CHKCASTCLASS) ||
+            tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_CHKCASTANY) ||
+            tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_CHKCASTCLASS_SPECIAL))
+        {
+            GenTreePtr arg1 = gtArgEntryByArgNum(tree->AsCall(), 1)->node;
+            if (arg1->gtOper != GT_LCL_VAR)
+            {
+                return nullptr;
+            }
+
+            GenTreePtr arg2 = gtArgEntryByArgNum(tree->AsCall(), 0)->node;
+
+            unsigned index = optAssertionIsSubtype(arg1, arg2, assertions);
+            if (index != NO_ASSERTION_INDEX)
+            {
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("\nDid VN based subtype prop for index #%02u in BB%02u:\n", index, compCurBB->bbNum);
+                    gtDispTree(tree, nullptr, nullptr, true);
+                }
+#endif
+                GenTreePtr list = nullptr;
+                gtExtractSideEffList(tree, &list, GTF_SIDE_EFFECT, true);
+                if (list != nullptr)
+                {
+                    arg1 = gtNewOperNode(GT_COMMA, tree->TypeGet(), list, arg1);
+                    fgSetTreeSeq(arg1);
+                }
+
+                return optAssertionProp_Update(arg1, tree, stmt);
+            }
+        }
+    }
+
+    return nullptr;
+}
+
+/*****************************************************************************
+ *
+ *  Given a tree consisting of a comma node with a bounds check, remove any
+ *  redundant bounds check that has already been checked in the program flow.
+ */
+GenTreePtr Compiler::optAssertionProp_BndsChk(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt)
+{
+    if (optLocalAssertionProp)
+    {
+        return nullptr;
+    }
+
+    assert(tree->gtOper == GT_ARR_BOUNDS_CHECK);
+
+    BitVecOps::Iter iter(apTraits, assertions);
+    unsigned        index = 0;
+    while (iter.NextElem(apTraits, &index))
+    {
+        index++;
+        if (index > optAssertionCount)
+        {
+            break;
+        }
+        // If it is not a nothrow assertion, skip.
+        AssertionDsc* curAssertion = optGetAssertion((AssertionIndex)index);
+        if (!curAssertion->IsBoundsCheckNoThrow())
+        {
+            continue;
+        }
+
+        GenTreeBoundsChk* arrBndsChk = tree->AsBoundsChk();
+
+        // Set 'isRedundant' to true if we can determine that 'arrBndsChk' can be
+        // classified as a redundant bounds check using 'curAssertion'
+        bool isRedundant = false;
+#ifdef DEBUG
+        const char* dbgMsg = "Not Set";
+#endif
+
+        // Do we have a previous range check involving the same 'vnLen' upper bound?
+        if (curAssertion->op1.bnd.vnLen == arrBndsChk->gtArrLen->gtVNPair.GetConservative())
+        {
+            ValueNum vnCurIdx = arrBndsChk->gtIndex->gtVNPair.GetConservative();
+
+            // Do we have the exact same lower bound 'vnIdx'?
+            //       a[i] followed by a[i]
+            if (curAssertion->op1.bnd.vnIdx == vnCurIdx)
+            {
+                isRedundant = true;
+#ifdef DEBUG
+                dbgMsg = "a[i] followed by a[i]";
+#endif
+            }
+            // Are we using zero as the index?
+            // It can always be considered as redundant with any previous value
+            //       a[*] followed by a[0]
+            else if (vnCurIdx == vnStore->VNZeroForType(arrBndsChk->gtIndex->TypeGet()))
+            {
+                isRedundant = true;
+#ifdef DEBUG
+                dbgMsg = "a[*] followed by a[0]";
+#endif
+            }
+            // Do we have two constant indexes?
+            else if (vnStore->IsVNConstant(curAssertion->op1.bnd.vnIdx) && vnStore->IsVNConstant(vnCurIdx))
+            {
+                // Make sure the types match.
+                var_types type1 = vnStore->TypeOfVN(curAssertion->op1.bnd.vnIdx);
+                var_types type2 = vnStore->TypeOfVN(vnCurIdx);
+
+                if (type1 == type2 && type1 == TYP_INT)
+                {
+                    int index1 = vnStore->ConstantValue<int>(curAssertion->op1.bnd.vnIdx);
+                    int index2 = vnStore->ConstantValue<int>(vnCurIdx);
+
+                    // the case where index1 == index2 should have been handled above
+                    assert(index1 != index2);
+
+                    // It can always be considered as redundant with any previous higher constant value
+                    //       a[K1] followed by a[K2], with K2 >= 0 and K1 >= K2
+                    if (index2 >= 0 && index1 >= index2)
+                    {
+                        isRedundant = true;
+#ifdef DEBUG
+                        dbgMsg = "a[K1] followed by a[K2], with K2 >= 0 and K1 >= K2";
+#endif
+                    }
+                }
+            }
+            // Extend this to remove additional redundant bounds checks:
+            // i.e.  a[i+1] followed by a[i]  by using the VN(i+1) >= VN(i)
+            //       a[i]   followed by a[j]  when j is known to be >= i
+            //       a[i]   followed by a[5]  when i is known to be >= 5
+        }
+
+        if (!isRedundant)
+        {
+            continue;
+        }
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\nVN based redundant (%s) bounds check assertion prop for index #%02u in BB%02u:\n", dbgMsg, index,
+                   compCurBB->bbNum);
+            gtDispTree(tree, nullptr, nullptr, true);
+        }
+#endif
+
+        // Defer actually removing the tree until processing reaches its parent comma, since
+        // optRemoveRangeCheck needs to rewrite the whole comma tree.
+        arrBndsChk->gtFlags |= GTF_ARR_BOUND_INBND;
+        return nullptr;
+    }
+    return nullptr;
+}
+
+/*****************************************************************************
+ *
+ *  Called when we have a successfully performed an assertion prop. We have
+ *  the newTree in hand. This method will replace the existing tree in the
+ *  stmt with the newTree.
+ *
+ */
+
+GenTreePtr Compiler::optAssertionProp_Update(const GenTreePtr newTree, const GenTreePtr tree, const GenTreePtr stmt)
+{
+    noway_assert(newTree != nullptr);
+
+    if (stmt == nullptr)
+    {
+        noway_assert(optLocalAssertionProp);
+    }
+    else
+    {
+        noway_assert(!optLocalAssertionProp);
+
+        // If newTree == tree then we modified the tree in-place otherwise we have to
+        // locate our parent node and update it so that it points to newTree
+        if (newTree != tree)
+        {
+            GenTreePtr* link = gtFindLink(stmt, tree);
+#ifdef DEBUG
+            if (link == nullptr)
+            {
+                noway_assert(!"gtFindLink failed!");
+                printf("\nCould not find parent of:\n");
+                gtDispTree(tree);
+                printf("\nIn this stmt:\n");
+                gtDispTree(stmt);
+            }
+#endif
+            noway_assert(link != nullptr);
+            noway_assert(tree != nullptr);
+            if (link != nullptr)
+            {
+                // Replace the old operand with the newTree
+                *link = newTree;
+
+                // We only need to ensure that the gtNext field is set as it is used to traverse
+                // to the next node in the tree. We will re-morph this entire statement in
+                // optAssertionPropMain(). It will reset the gtPrev and gtNext links for all nodes.
+
+                newTree->gtNext = tree->gtNext;
+            }
+        }
+    }
+
+    // Record that we propagated the assertion.
+    optAssertionPropagated            = true;
+    optAssertionPropagatedCurrentStmt = true;
+
+    return newTree;
+}
+
+/*****************************************************************************
+ *
+ *  Given a tree and a set of available assertions we try to propagate an
+ *  assertion and modify 'tree' if we can. We pass in the root of the tree
+ *  via 'stmt', for local copy prop 'stmt' will be nullptr.
+ *
+ *  Returns the modified tree, or nullptr if no assertion prop took place.
+ */
+
+GenTreePtr Compiler::optAssertionProp(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt)
+{
+    switch (tree->gtOper)
+    {
+        case GT_LCL_VAR:
+            return optAssertionProp_LclVar(assertions, tree, stmt);
+
+        case GT_OBJ:
+        case GT_BLK:
+        case GT_DYN_BLK:
+        case GT_IND:
+        case GT_NULLCHECK:
+            return optAssertionProp_Ind(assertions, tree, stmt);
+
+        case GT_ARR_BOUNDS_CHECK:
+            return optAssertionProp_BndsChk(assertions, tree, stmt);
+
+        case GT_COMMA:
+            return optAssertionProp_Comma(assertions, tree, stmt);
+
+        case GT_CAST:
+            return optAssertionProp_Cast(assertions, tree, stmt);
+
+        case GT_CALL:
+            return optAssertionProp_Call(assertions, tree, stmt);
+
+        case GT_EQ:
+        case GT_NE:
+        case GT_LT:
+        case GT_LE:
+        case GT_GT:
+        case GT_GE:
+
+            return optAssertionProp_RelOp(assertions, tree, stmt);
+
+        default:
+            return nullptr;
+    }
+}
+
+//------------------------------------------------------------------------
+// optImpliedAssertions: Given a tree node that makes an assertion this
+//                       method computes the set of implied assertions
+//                       that are also true. The updated assertions are
+//                       maintained on the Compiler object.
+//
+// Arguments:
+//      assertionIndex   : The id of the assertion.
+//      activeAssertions : The assertions that are already true at this point.
+
+void Compiler::optImpliedAssertions(AssertionIndex assertionIndex, ASSERT_TP& activeAssertions)
+{
+    noway_assert(!optLocalAssertionProp);
+    noway_assert(assertionIndex != 0);
+    noway_assert(assertionIndex <= optAssertionCount);
+
+    AssertionDsc* curAssertion = optGetAssertion(assertionIndex);
+    if (!BitVecOps::IsEmpty(apTraits, activeAssertions))
+    {
+        const ASSERT_TP mappedAssertions = optGetVnMappedAssertions(curAssertion->op1.vn);
+        if (mappedAssertions == nullptr)
+        {
+            return;
+        }
+
+        ASSERT_TP chkAssertions = BitVecOps::MakeCopy(apTraits, mappedAssertions);
+
+        if (curAssertion->op2.kind == O2K_LCLVAR_COPY)
+        {
+            const ASSERT_TP op2Assertions = optGetVnMappedAssertions(curAssertion->op2.vn);
+            if (op2Assertions != nullptr)
+            {
+                BitVecOps::UnionD(apTraits, chkAssertions, op2Assertions);
+            }
+        }
+        BitVecOps::IntersectionD(apTraits, chkAssertions, activeAssertions);
+
+        if (BitVecOps::IsEmpty(apTraits, chkAssertions))
+        {
+            return;
+        }
+
+        // Check each assertion in chkAssertions to see if it can be applied to curAssertion
+        BitVecOps::Iter chkIter(apTraits, chkAssertions);
+        unsigned        chkIndex = 0;
+        while (chkIter.NextElem(apTraits, &chkIndex))
+        {
+            chkIndex++;
+            if (chkIndex > optAssertionCount)
+            {
+                break;
+            }
+            if (chkIndex == assertionIndex)
+            {
+                continue;
+            }
+
+            // Determine which one is a copy assertion and use the other to check for implied assertions.
+            AssertionDsc* iterAssertion = optGetAssertion((AssertionIndex)chkIndex);
+            if (curAssertion->IsCopyAssertion())
+            {
+                optImpliedByCopyAssertion(curAssertion, iterAssertion, activeAssertions);
+            }
+            else if (iterAssertion->IsCopyAssertion())
+            {
+                optImpliedByCopyAssertion(iterAssertion, curAssertion, activeAssertions);
+            }
+        }
+    }
+    // Is curAssertion a constant assignment of a 32-bit integer?
+    // (i.e  GT_LVL_VAR X  == GT_CNS_INT)
+    else if ((curAssertion->assertionKind == OAK_EQUAL) && (curAssertion->op1.kind == O1K_LCLVAR) &&
+             (curAssertion->op2.kind == O2K_CONST_INT))
+    {
+        optImpliedByConstAssertion(curAssertion, activeAssertions);
+    }
+}
+
+/*****************************************************************************
+ *
+ *   Given a set of active assertions this method computes the set
+ *   of non-Null implied assertions that are also true
+ */
+
+void Compiler::optImpliedByTypeOfAssertions(ASSERT_TP& activeAssertions)
+{
+    if (BitVecOps::IsEmpty(apTraits, activeAssertions))
+    {
+        return;
+    }
+
+    // Check each assertion in activeAssertions to see if it can be applied to constAssertion
+    BitVecOps::Iter chkIter(apTraits, activeAssertions);
+    unsigned        chkIndex = 0;
+    while (chkIter.NextElem(apTraits, &chkIndex))
+    {
+        chkIndex++;
+        if (chkIndex > optAssertionCount)
+        {
+            break;
+        }
+        // chkAssertion must be Type/Subtype is equal assertion
+        AssertionDsc* chkAssertion = optGetAssertion((AssertionIndex)chkIndex);
+        if ((chkAssertion->op1.kind != O1K_SUBTYPE && chkAssertion->op1.kind != O1K_EXACT_TYPE) ||
+            (chkAssertion->assertionKind != OAK_EQUAL))
+        {
+            continue;
+        }
+
+        // Search the assertion table for a non-null assertion on op1 that matches chkAssertion
+        for (unsigned impIndex = 1; impIndex <= optAssertionCount; impIndex++)
+        {
+            AssertionDsc* impAssertion = optGetAssertion((AssertionIndex)impIndex);
+
+            //  The impAssertion must be different from the chkAssertion
+            if (impIndex == chkIndex)
+            {
+                continue;
+            }
+
+            // impAssertion must be a Non Null assertion on lclNum
+            if ((impAssertion->assertionKind != OAK_NOT_EQUAL) ||
+                ((impAssertion->op1.kind != O1K_LCLVAR) && (impAssertion->op1.kind != O1K_VALUE_NUMBER)) ||
+                (impAssertion->op2.kind != O2K_CONST_INT) || (impAssertion->op1.vn != chkAssertion->op1.vn))
+            {
+                continue;
+            }
+
+            // The bit may already be in the result set
+            if (!BitVecOps::IsMember(apTraits, activeAssertions, impIndex - 1))
+            {
+                BitVecOps::AddElemD(apTraits, activeAssertions, impIndex - 1);
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("\nCompiler::optImpliedByTypeOfAssertions: %s Assertion #%02d, implies assertion #%02d",
+                           (chkAssertion->op1.kind == O1K_SUBTYPE) ? "Subtype" : "Exact-type", chkIndex, impIndex);
+                }
+#endif
+            }
+
+            // There is at most one non-null assertion that is implied by the current chkIndex assertion
+            break;
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// optGetVnMappedAssertions: Given a value number, get the assertions
+//                           we have about the value number.
+//
+// Arguments:
+//      vn - The given value number.
+//
+// Return Value:
+//      The assertions we have about the value number.
+//
+
+ASSERT_VALRET_TP Compiler::optGetVnMappedAssertions(ValueNum vn)
+{
+    ASSERT_TP set = BitVecOps::UninitVal();
+    if (optValueNumToAsserts->Lookup(vn, &set))
+    {
+        return set;
+    }
+    return BitVecOps::UninitVal();
+}
+
+/*****************************************************************************
+ *
+ *   Given a const assertion this method computes the set of implied assertions
+ *   that are also true
+ */
+
+void Compiler::optImpliedByConstAssertion(AssertionDsc* constAssertion, ASSERT_TP& result)
+{
+    noway_assert(constAssertion->assertionKind == OAK_EQUAL);
+    noway_assert(constAssertion->op1.kind == O1K_LCLVAR);
+    noway_assert(constAssertion->op2.kind == O2K_CONST_INT);
+
+    ssize_t iconVal = constAssertion->op2.u1.iconVal;
+
+    const ASSERT_TP chkAssertions = optGetVnMappedAssertions(constAssertion->op1.vn);
+    if (chkAssertions == nullptr || BitVecOps::IsEmpty(apTraits, chkAssertions))
+    {
+        return;
+    }
+
+    // Check each assertion in chkAssertions to see if it can be applied to constAssertion
+    BitVecOps::Iter chkIter(apTraits, chkAssertions);
+    unsigned        chkIndex = 0;
+    while (chkIter.NextElem(apTraits, &chkIndex))
+    {
+        chkIndex++;
+        if (chkIndex > optAssertionCount)
+        {
+            break;
+        }
+        // The impAssertion must be different from the const assertion.
+        AssertionDsc* impAssertion = optGetAssertion((AssertionIndex)chkIndex);
+        if (impAssertion == constAssertion)
+        {
+            continue;
+        }
+
+        // The impAssertion must be an assertion about the same local var.
+        if (impAssertion->op1.vn != constAssertion->op1.vn)
+        {
+            continue;
+        }
+
+        bool usable = false;
+        switch (impAssertion->op2.kind)
+        {
+            case O2K_SUBRANGE:
+                // Is the const assertion's constant, within implied assertion's bounds?
+                usable = ((iconVal >= impAssertion->op2.u2.loBound) && (iconVal <= impAssertion->op2.u2.hiBound));
+                break;
+
+            case O2K_CONST_INT:
+                // Is the const assertion's constant equal/not equal to the implied assertion?
+                usable = ((impAssertion->assertionKind == OAK_EQUAL) && (impAssertion->op2.u1.iconVal == iconVal)) ||
+                         ((impAssertion->assertionKind == OAK_NOT_EQUAL) && (impAssertion->op2.u1.iconVal != iconVal));
+                break;
+
+            default:
+                // leave 'usable' = false;
+                break;
+        }
+
+        if (usable)
+        {
+            BitVecOps::AddElemD(apTraits, result, chkIndex - 1);
+#ifdef DEBUG
+            if (verbose)
+            {
+                AssertionDsc* firstAssertion = optGetAssertion(1);
+                printf("\nCompiler::optImpliedByConstAssertion: constAssertion #%02d , implies assertion #%02d",
+                       (constAssertion - firstAssertion) + 1, (impAssertion - firstAssertion) + 1);
+            }
+#endif
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Given a copy assertion and a dependent assertion this method computes the
+ *  set of implied assertions that are also true.
+ *  For copy assertions, exact SSA num and LCL nums should match, because
+ *  we don't have kill sets and we depend on their value num for dataflow.
+ */
+
+void Compiler::optImpliedByCopyAssertion(AssertionDsc* copyAssertion, AssertionDsc* depAssertion, ASSERT_TP& result)
+{
+    noway_assert(copyAssertion->IsCopyAssertion());
+
+    // Get the copyAssert's lcl/ssa nums.
+    unsigned copyAssertLclNum = BAD_VAR_NUM;
+    unsigned copyAssertSsaNum = SsaConfig::RESERVED_SSA_NUM;
+
+    // Check if copyAssertion's op1 or op2 matches the depAssertion's op1.
+    if (depAssertion->op1.lcl.lclNum == copyAssertion->op1.lcl.lclNum)
+    {
+        copyAssertLclNum = copyAssertion->op2.lcl.lclNum;
+        copyAssertSsaNum = copyAssertion->op2.lcl.ssaNum;
+    }
+    else if (depAssertion->op1.lcl.lclNum == copyAssertion->op2.lcl.lclNum)
+    {
+        copyAssertLclNum = copyAssertion->op1.lcl.lclNum;
+        copyAssertSsaNum = copyAssertion->op1.lcl.ssaNum;
+    }
+    // Check if copyAssertion's op1 or op2 matches the depAssertion's op2.
+    else if (depAssertion->op2.kind == O2K_LCLVAR_COPY)
+    {
+        if (depAssertion->op2.lcl.lclNum == copyAssertion->op1.lcl.lclNum)
+        {
+            copyAssertLclNum = copyAssertion->op2.lcl.lclNum;
+            copyAssertSsaNum = copyAssertion->op2.lcl.ssaNum;
+        }
+        else if (depAssertion->op2.lcl.lclNum == copyAssertion->op2.lcl.lclNum)
+        {
+            copyAssertLclNum = copyAssertion->op1.lcl.lclNum;
+            copyAssertSsaNum = copyAssertion->op1.lcl.ssaNum;
+        }
+    }
+
+    if (copyAssertLclNum == BAD_VAR_NUM || copyAssertSsaNum == SsaConfig::RESERVED_SSA_NUM)
+    {
+        return;
+    }
+
+    // Get the depAssert's lcl/ssa nums.
+    unsigned depAssertLclNum = BAD_VAR_NUM;
+    unsigned depAssertSsaNum = SsaConfig::RESERVED_SSA_NUM;
+    if ((depAssertion->op1.kind == O1K_LCLVAR) && (depAssertion->op2.kind == O2K_LCLVAR_COPY))
+    {
+        if ((depAssertion->op1.lcl.lclNum == copyAssertion->op1.lcl.lclNum) ||
+            (depAssertion->op1.lcl.lclNum == copyAssertion->op2.lcl.lclNum))
+        {
+            depAssertLclNum = depAssertion->op2.lcl.lclNum;
+            depAssertSsaNum = depAssertion->op2.lcl.ssaNum;
+        }
+        else if ((depAssertion->op2.lcl.lclNum == copyAssertion->op1.lcl.lclNum) ||
+                 (depAssertion->op2.lcl.lclNum == copyAssertion->op2.lcl.lclNum))
+        {
+            depAssertLclNum = depAssertion->op1.lcl.lclNum;
+            depAssertSsaNum = depAssertion->op1.lcl.ssaNum;
+        }
+    }
+
+    if (depAssertLclNum == BAD_VAR_NUM || depAssertSsaNum == SsaConfig::RESERVED_SSA_NUM)
+    {
+        return;
+    }
+
+    // Is depAssertion a constant assignment of a 32-bit integer?
+    // (i.e  GT_LVL_VAR X == GT_CNS_INT)
+    bool depIsConstAssertion = ((depAssertion->assertionKind == OAK_EQUAL) && (depAssertion->op1.kind == O1K_LCLVAR) &&
+                                (depAssertion->op2.kind == O2K_CONST_INT));
+
+    // Search the assertion table for an assertion on op1 that matches depAssertion
+    // The matching assertion is the implied assertion.
+    for (AssertionIndex impIndex = 1; impIndex <= optAssertionCount; impIndex++)
+    {
+        AssertionDsc* impAssertion = optGetAssertion(impIndex);
+
+        //  The impAssertion must be different from the copy and dependent assertions
+        if (impAssertion == copyAssertion || impAssertion == depAssertion)
+        {
+            continue;
+        }
+
+        if (!AssertionDsc::SameKind(depAssertion, impAssertion))
+        {
+            continue;
+        }
+
+        bool op1MatchesCopy =
+            (copyAssertLclNum == impAssertion->op1.lcl.lclNum) && (copyAssertSsaNum == impAssertion->op1.lcl.ssaNum);
+
+        bool usable = false;
+        switch (impAssertion->op2.kind)
+        {
+            case O2K_SUBRANGE:
+                usable = op1MatchesCopy && ((impAssertion->op2.u2.loBound <= depAssertion->op2.u2.loBound) &&
+                                            (impAssertion->op2.u2.hiBound >= depAssertion->op2.u2.hiBound));
+                break;
+
+            case O2K_CONST_LONG:
+                usable = op1MatchesCopy && (impAssertion->op2.lconVal == depAssertion->op2.lconVal);
+                break;
+
+            case O2K_CONST_DOUBLE:
+                // Exact memory match because of positive and negative zero
+                usable = op1MatchesCopy &&
+                         (memcmp(&impAssertion->op2.dconVal, &depAssertion->op2.dconVal, sizeof(double)) == 0);
+                break;
+
+            case O2K_IND_CNS_INT:
+                // This is the ngen case where we have an indirection of an address.
+                noway_assert((impAssertion->op1.kind == O1K_EXACT_TYPE) || (impAssertion->op1.kind == O1K_SUBTYPE));
+
+                __fallthrough;
+
+            case O2K_CONST_INT:
+                usable = op1MatchesCopy && (impAssertion->op2.u1.iconVal == depAssertion->op2.u1.iconVal);
+                break;
+
+            case O2K_LCLVAR_COPY:
+                // Check if op1 of impAssertion matches copyAssertion and also op2 of impAssertion matches depAssertion.
+                if (op1MatchesCopy && (depAssertLclNum == impAssertion->op2.lcl.lclNum &&
+                                       depAssertSsaNum == impAssertion->op2.lcl.ssaNum))
+                {
+                    usable = true;
+                }
+                else
+                {
+                    // Otherwise, op2 of impAssertion should match copyAssertion and also op1 of impAssertion matches
+                    // depAssertion.
+                    usable = ((copyAssertLclNum == impAssertion->op2.lcl.lclNum &&
+                               copyAssertSsaNum == impAssertion->op2.lcl.ssaNum) &&
+                              (depAssertLclNum == impAssertion->op1.lcl.lclNum &&
+                               depAssertSsaNum == impAssertion->op1.lcl.ssaNum));
+                }
+                break;
+
+            default:
+                // leave 'usable' = false;
+                break;
+        }
+
+        if (usable)
+        {
+            BitVecOps::AddElemD(apTraits, result, impIndex - 1);
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                AssertionDsc* firstAssertion = optGetAssertion(1);
+                printf("\nCompiler::optImpliedByCopyAssertion: copyAssertion #%02d and depAssertion #%02d, implies "
+                       "assertion #%02d",
+                       (copyAssertion - firstAssertion) + 1, (depAssertion - firstAssertion) + 1,
+                       (impAssertion - firstAssertion) + 1);
+            }
+#endif
+            // If the depAssertion is a const assertion then any other assertions that it implies could also imply a
+            // subrange assertion.
+            if (depIsConstAssertion)
+            {
+                optImpliedByConstAssertion(impAssertion, result);
+            }
+        }
+    }
+}
+
+#include "dataflow.h"
+
+/*****************************************************************************
+ *
+ * Dataflow visitor like callback so that all dataflow is in a single place
+ *
+ */
+class AssertionPropFlowCallback
+{
+private:
+    ASSERT_TP preMergeOut;
+    ASSERT_TP preMergeJumpDestOut;
+
+    ASSERT_TP* mJumpDestOut;
+    ASSERT_TP* mJumpDestGen;
+
+    Compiler*     m_pCompiler;
+    BitVecTraits* apTraits;
+
+public:
+    AssertionPropFlowCallback(Compiler* pCompiler, ASSERT_TP* jumpDestOut, ASSERT_TP* jumpDestGen)
+        : preMergeOut(BitVecOps::UninitVal())
+        , preMergeJumpDestOut(BitVecOps::UninitVal())
+        , mJumpDestOut(jumpDestOut)
+        , mJumpDestGen(jumpDestGen)
+        , m_pCompiler(pCompiler)
+        , apTraits(pCompiler->apTraits)
+    {
+    }
+
+    // At the start of the merge function of the dataflow equations, initialize premerge state (to detect change.)
+    void StartMerge(BasicBlock* block)
+    {
+        JITDUMP("AssertionPropCallback::StartMerge: BB%02d in -> %s\n", block->bbNum,
+                BitVecOps::ToString(apTraits, block->bbAssertionIn));
+        BitVecOps::Assign(apTraits, preMergeOut, block->bbAssertionOut);
+        BitVecOps::Assign(apTraits, preMergeJumpDestOut, mJumpDestOut[block->bbNum]);
+    }
+
+    // During merge, perform the actual merging of the predecessor's (since this is a forward analysis) dataflow flags.
+    void Merge(BasicBlock* block, BasicBlock* predBlock, flowList* preds)
+    {
+        ASSERT_TP pAssertionOut = ((predBlock->bbJumpKind == BBJ_COND) && (predBlock->bbJumpDest == block))
+                                      ? mJumpDestOut[predBlock->bbNum]
+                                      : predBlock->bbAssertionOut;
+        JITDUMP("AssertionPropCallback::Merge     : BB%02d in -> %s, predBlock BB%02d out -> %s\n", block->bbNum,
+                BitVecOps::ToString(apTraits, block->bbAssertionIn), predBlock->bbNum,
+                BitVecOps::ToString(apTraits, predBlock->bbAssertionOut));
+        BitVecOps::IntersectionD(apTraits, block->bbAssertionIn, pAssertionOut);
+    }
+
+    // At the end of the merge store results of the dataflow equations, in a postmerge state.
+    bool EndMerge(BasicBlock* block)
+    {
+        JITDUMP("AssertionPropCallback::EndMerge  : BB%02d in -> %s\n\n", block->bbNum,
+                BitVecOps::ToString(apTraits, block->bbAssertionIn));
+
+        // PERF: eliminate this tmp by passing in a OperationTree (AST) to the bitset,
+        // so the expr tree is operated on a single bit level. See "expression templates."
+        ASSERT_TP tmp = BitVecOps::MakeCopy(apTraits, block->bbAssertionIn);
+        BitVecOps::UnionD(apTraits, tmp, block->bbAssertionGen);
+        BitVecOps::IntersectionD(apTraits, block->bbAssertionOut, tmp);
+
+        BitVecOps::Assign(apTraits, tmp, block->bbAssertionIn);
+        BitVecOps::UnionD(apTraits, tmp, mJumpDestGen[block->bbNum]);
+        BitVecOps::IntersectionD(apTraits, mJumpDestOut[block->bbNum], tmp);
+
+        bool changed = (!BitVecOps::Equal(apTraits, preMergeOut, block->bbAssertionOut) ||
+                        !BitVecOps::Equal(apTraits, preMergeJumpDestOut, mJumpDestOut[block->bbNum]));
+
+        if (changed)
+        {
+            JITDUMP("AssertionPropCallback::Changed   : BB%02d before out -> %s; after out -> %s;\n"
+                    "\t\tjumpDest before out -> %s; jumpDest after out -> %s;\n\n",
+                    block->bbNum, BitVecOps::ToString(apTraits, preMergeOut),
+                    BitVecOps::ToString(apTraits, block->bbAssertionOut),
+                    BitVecOps::ToString(apTraits, preMergeJumpDestOut),
+                    BitVecOps::ToString(apTraits, mJumpDestOut[block->bbNum]));
+        }
+        else
+        {
+            JITDUMP("AssertionPropCallback::Unchanged  : BB%02d out -> %s; \t\tjumpDest out -> %s\n\n", block->bbNum,
+                    BitVecOps::ToString(apTraits, block->bbAssertionOut),
+                    BitVecOps::ToString(apTraits, mJumpDestOut[block->bbNum]));
+        }
+
+        return changed;
+    }
+};
+
+ASSERT_VALRET_TP Compiler::optNewFullAssertSet()
+{
+    return BitVecOps::MakeCopy(apTraits, apFull);
+}
+
+ASSERT_VALRET_TP Compiler::optNewEmptyAssertSet()
+{
+    return BitVecOps::MakeCopy(apTraits, apEmpty);
+}
+
+/*****************************************************************************
+ *
+ *   Compute the assertions generated by each block.
+ */
+ASSERT_TP* Compiler::optComputeAssertionGen()
+{
+    ASSERT_TP* jumpDestGen = fgAllocateTypeForEachBlk<ASSERT_TP>();
+
+    ASSERT_TP valueGen         = BitVecOps::MakeEmpty(apTraits);
+    ASSERT_TP jumpDestValueGen = BitVecOps::MakeEmpty(apTraits);
+
+    for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+    {
+        jumpDestGen[block->bbNum] = BitVecOps::MakeEmpty(apTraits);
+
+        BitVecOps::ClearD(apTraits, valueGen);
+        BitVecOps::ClearD(apTraits, jumpDestValueGen);
+
+        // Walk the statement trees in this basic block.
+        for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+        {
+            noway_assert(stmt->gtOper == GT_STMT);
+
+            for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+            {
+                // Store whatever we have accumulated into jumpDest edge's valueGen.
+                if (tree->gtOper == GT_JTRUE)
+                {
+                    BitVecOps::Assign(apTraits, jumpDestValueGen, valueGen);
+                }
+                if (!tree->HasAssertion())
+                {
+                    continue;
+                }
+
+                // For regular trees, just update valueGen. For GT_JTRUE, for false part,
+                // update valueGen and true part update jumpDestValueGen.
+                AssertionIndex assertionIndex[2] = {(AssertionIndex)tree->GetAssertion(),
+                                                    (tree->OperGet() == GT_JTRUE)
+                                                        ? optFindComplementary((AssertionIndex)tree->GetAssertion())
+                                                        : 0};
+
+                for (unsigned i = 0; i < 2; ++i)
+                {
+                    if (assertionIndex[i] > 0)
+                    {
+                        // If GT_JTRUE, and true part use jumpDestValueGen.
+                        ASSERT_TP& gen = (i == 0 && tree->OperGet() == GT_JTRUE) ? jumpDestValueGen : valueGen;
+                        optImpliedAssertions(assertionIndex[i], gen);
+                        BitVecOps::AddElemD(apTraits, gen, assertionIndex[i] - 1);
+                    }
+                }
+            }
+        }
+
+        BitVecOps::Assign(apTraits, block->bbAssertionGen, valueGen);
+        BitVecOps::Assign(apTraits, jumpDestGen[block->bbNum], jumpDestValueGen);
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\nBB%02u valueGen = %s", block->bbNum, BitVecOps::ToString(apTraits, valueGen));
+            if (block->bbJumpKind == BBJ_COND)
+            {
+                printf(" => BB%02u valueGen = %s,", block->bbJumpDest->bbNum,
+                       BitVecOps::ToString(apTraits, jumpDestValueGen));
+            }
+        }
+#endif
+    }
+    return jumpDestGen;
+}
+
+/*****************************************************************************
+ *
+ *   Initialize the assertion data flow flags that will be propagated.
+ */
+
+ASSERT_TP* Compiler::optInitAssertionDataflowFlags()
+{
+    ASSERT_TP* jumpDestOut = fgAllocateTypeForEachBlk<ASSERT_TP>();
+
+    // The local assertion gen phase may have created unreachable blocks.
+    // They will never be visited in the dataflow propagation phase, so they need to
+    // be initialized correctly. This means that instead of setting their sets to
+    // apFull (i.e. all possible bits set), we need to set the bits only for valid
+    // assertions (note that at this point we are not creating any new assertions).
+    // Also note that assertion indices start from 1.
+    ASSERT_TP apValidFull = optNewEmptyAssertSet();
+    for (int i = 1; i <= optAssertionCount; i++)
+    {
+        BitVecOps::AddElemD(apTraits, apValidFull, i - 1);
+    }
+
+    // Initially estimate the OUT sets to everything except killed expressions
+    // Also set the IN sets to 1, so that we can perform the intersection.
+    // Also, zero-out the flags for handler blocks, as we could be in the
+    // handler due to an exception bypassing the regular program flow which
+    // actually generates assertions along the bbAssertionOut/jumpDestOut
+    // edges.
+    for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+    {
+        block->bbAssertionIn = optNewEmptyAssertSet();
+        if (!bbIsHandlerBeg(block))
+        {
+            BitVecOps::Assign(apTraits, block->bbAssertionIn, apValidFull);
+        }
+        block->bbAssertionGen = optNewEmptyAssertSet();
+        block->bbAssertionOut = optNewEmptyAssertSet();
+        BitVecOps::Assign(apTraits, block->bbAssertionOut, apValidFull);
+        jumpDestOut[block->bbNum] = optNewEmptyAssertSet();
+        BitVecOps::Assign(apTraits, jumpDestOut[block->bbNum], apValidFull);
+    }
+    // Compute the data flow values for all tracked expressions
+    // IN and OUT never change for the initial basic block B1
+    BitVecOps::Assign(apTraits, fgFirstBB->bbAssertionIn, apEmpty);
+    return jumpDestOut;
+}
+
+// Callback data for the VN based constant prop visitor.
+struct VNAssertionPropVisitorInfo
+{
+    Compiler*   pThis;
+    GenTreePtr  stmt;
+    BasicBlock* block;
+    VNAssertionPropVisitorInfo(Compiler* pThis, BasicBlock* block, GenTreePtr stmt)
+        : pThis(pThis), stmt(stmt), block(block)
+    {
+    }
+};
+
+//------------------------------------------------------------------------------
+// optPrepareTreeForReplacement
+//    Updates ref counts and extracts side effects from a tree so it can be
+//    replaced with a comma separated list of side effects + a new tree.
+//
+// Note:
+//    The old and new trees may be the same. In this case, the tree will be
+//    appended to the side-effect list (if present) and returned.
+//
+// Arguments:
+//    oldTree  - The tree node to be dropped from the stmt expr.
+//    newTree  - The tree node to append to the side effect list from "oldTree".
+//
+// Return Value:
+//    Returns a comma separated list of side-effects present in the "oldTree".
+//    When "newTree" is non-null:
+//      1. When side-effects are present in oldTree, newTree will be appended to the
+//         comma separated list.
+//      2. When no side effects are present, then returns the "newTree" without
+//         any list.
+//    When "newTree" is null:
+//      1. Returns the extracted side-effects from "oldTree"
+//      2. When no side-effects are present, returns null.
+//
+// Description:
+//    Decrements ref counts for the "oldTree" that is going to be replaced. If there
+//    are side effects in the tree, then ref counts for variables in the side effects
+//    are incremented because they need to be kept in the stmt expr.
+//
+//    Either the "newTree" is returned when no side effects are present or a comma
+//    separated side effect list with "newTree" is returned.
+//
+GenTreePtr Compiler::optPrepareTreeForReplacement(GenTreePtr oldTree, GenTreePtr newTree)
+{
+    // If we have side effects, extract them and append newTree to the list.
+    GenTreePtr sideEffList = nullptr;
+    if (oldTree->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS)
+    {
+        gtExtractSideEffList(oldTree, &sideEffList, GTF_PERSISTENT_SIDE_EFFECTS_IN_CSE);
+    }
+    if (sideEffList)
+    {
+        noway_assert(sideEffList->gtFlags & GTF_SIDE_EFFECT);
+
+        // Increment the ref counts as we want to keep the side effects.
+        lvaRecursiveIncRefCounts(sideEffList);
+
+        if (newTree)
+        {
+            newTree = gtNewOperNode(GT_COMMA, newTree->TypeGet(), sideEffList, newTree);
+        }
+        else
+        {
+            newTree = sideEffList;
+        }
+    }
+
+    // Decrement the ref counts as the oldTree is going to be dropped.
+    lvaRecursiveDecRefCounts(oldTree);
+    return newTree;
+}
+
+//------------------------------------------------------------------------------
+// optVNConstantPropOnJTrue
+//    Constant propagate on the JTrue node by extracting side effects and moving
+//    them into their own statements. The relop node is then modified to yield
+//    true or false, so the branch can be folded.
+//
+// Arguments:
+//    block - The block that contains the JTrue.
+//    stmt  - The JTrue stmt which can be evaluated to a constant.
+//    tree  - The JTrue node whose relop evaluates to 0 or non-zero value.
+//
+// Return Value:
+//    The jmpTrue tree node that has relop of the form "0 =/!= 0".
+//    If "tree" evaluates to "true" relop is "0 == 0". Else relop is "0 != 0".
+//
+// Description:
+//    Special treatment for JTRUE nodes' constant propagation. This is because
+//    for JTRUE(1) or JTRUE(0), if there are side effects they need to be put
+//    in separate statements. This is to prevent relop's constant
+//    propagation from doing a simple minded conversion from
+//    (1) STMT(JTRUE(RELOP(COMMA(sideEffect, OP1), OP2)), S.T. op1 =/!= op2 to
+//    (2) STMT(JTRUE(COMMA(sideEffect, 1/0)).
+//
+//    fgFoldConditional doesn't fold (2), a side-effecting JTRUE's op1. So, let us,
+//    here, convert (1) as two statements: STMT(sideEffect), STMT(JTRUE(1/0)),
+//    so that the JTRUE will get folded by fgFoldConditional.
+//
+//  Note: fgFoldConditional is called from other places as well, which may be
+//  sensitive to adding new statements. Hence the change is not made directly
+//  into fgFoldConditional.
+//
+GenTreePtr Compiler::optVNConstantPropOnJTrue(BasicBlock* block, GenTreePtr stmt, GenTreePtr test)
+{
+    GenTreePtr relop = test->gtGetOp1();
+
+    // VN based assertion non-null on this relop has been performed.
+    if (!relop->OperIsCompare())
+    {
+        return nullptr;
+    }
+
+    //
+    // Make sure GTF_RELOP_JMP_USED flag is set so that we can later skip constant
+    // prop'ing a JTRUE's relop child node for a second time in the pre-order
+    // tree walk.
+    //
+    assert((relop->gtFlags & GTF_RELOP_JMP_USED) != 0);
+
+    if (!vnStore->IsVNConstant(relop->gtVNPair.GetConservative()))
+    {
+        return nullptr;
+    }
+
+    // Prepare the tree for replacement so any side effects can be extracted.
+    GenTreePtr sideEffList = optPrepareTreeForReplacement(test, nullptr);
+
+    while (sideEffList)
+    {
+        GenTreePtr newStmt;
+        if (sideEffList->OperGet() == GT_COMMA)
+        {
+            newStmt     = fgInsertStmtNearEnd(block, sideEffList->gtGetOp1());
+            sideEffList = sideEffList->gtGetOp2();
+        }
+        else
+        {
+            newStmt     = fgInsertStmtNearEnd(block, sideEffList);
+            sideEffList = nullptr;
+        }
+        fgMorphBlockStmt(block, newStmt DEBUGARG(__FUNCTION__));
+        gtSetStmtInfo(newStmt);
+        fgSetStmtSeq(newStmt);
+    }
+
+    // Transform the relop's operands to be both zeroes.
+    ValueNum vnZero             = vnStore->VNZeroForType(TYP_INT);
+    relop->gtOp.gtOp1           = gtNewIconNode(0);
+    relop->gtOp.gtOp1->gtVNPair = ValueNumPair(vnZero, vnZero);
+    relop->gtOp.gtOp2           = gtNewIconNode(0);
+    relop->gtOp.gtOp2->gtVNPair = ValueNumPair(vnZero, vnZero);
+
+    // Update the oper and restore the value numbers.
+    ValueNum vnCns       = relop->gtVNPair.GetConservative();
+    ValueNum vnLib       = relop->gtVNPair.GetLiberal();
+    bool     evalsToTrue = vnStore->CoercedConstantValue<INT64>(vnCns) != 0;
+    relop->SetOper(evalsToTrue ? GT_EQ : GT_NE);
+    relop->gtVNPair = ValueNumPair(vnLib, vnCns);
+
+    return test;
+}
+
+//------------------------------------------------------------------------------
+// optVNConstantPropCurStmt
+//    Performs constant prop on the current statement's tree nodes.
+//
+// Assumption:
+//    This function is called as part of a pre-order tree walk.
+//
+// Arguments:
+//    tree  - The currently visited tree node.
+//    stmt  - The statement node in which the "tree" is present.
+//    block - The block that contains the statement that contains the tree.
+//
+// Return Value:
+//    Returns the standard visitor walk result.
+//
+// Description:
+//    Checks if a node is an R-value and evaluates to a constant. If the node
+//    evaluates to constant, then the tree is replaced by its side effects and
+//    the constant node.
+//
+Compiler::fgWalkResult Compiler::optVNConstantPropCurStmt(BasicBlock* block, GenTreePtr stmt, GenTreePtr tree)
+{
+    // Don't propagate floating-point constants into a TYP_STRUCT LclVar
+    // This can occur for HFA return values (see hfa_sf3E_r.exe)
+    if (tree->TypeGet() == TYP_STRUCT)
+    {
+        return WALK_CONTINUE;
+    }
+
+    switch (tree->OperGet())
+    {
+        // Make sure we have an R-value.
+        case GT_ADD:
+        case GT_SUB:
+        case GT_DIV:
+        case GT_MOD:
+        case GT_UDIV:
+        case GT_UMOD:
+        case GT_MULHI:
+        case GT_EQ:
+        case GT_NE:
+        case GT_LT:
+        case GT_LE:
+        case GT_GE:
+        case GT_GT:
+        case GT_OR:
+        case GT_XOR:
+        case GT_AND:
+        case GT_LSH:
+        case GT_RSH:
+        case GT_RSZ:
+        case GT_NEG:
+        case GT_CHS:
+        case GT_CAST:
+        case GT_INTRINSIC:
+            break;
+
+        case GT_JTRUE:
+            break;
+
+        case GT_MUL:
+            // Don't transform long multiplies.
+            if (tree->gtFlags & GTF_MUL_64RSLT)
+            {
+                return WALK_SKIP_SUBTREES;
+            }
+            break;
+
+        case GT_LCL_VAR:
+            // Make sure the local variable is an R-value.
+            if ((tree->gtFlags & (GTF_VAR_DEF | GTF_DONT_CSE)))
+            {
+                return WALK_CONTINUE;
+            }
+#if FEATURE_ANYCSE
+            // Let's not conflict with CSE (to save the movw/movt).
+            if (lclNumIsCSE(tree->AsLclVarCommon()->GetLclNum()))
+            {
+                return WALK_CONTINUE;
+            }
+#endif
+            break;
+
+        default:
+            // Unknown node, continue to walk.
+            return WALK_CONTINUE;
+    }
+
+    // Perform the constant propagation
+    GenTreePtr newTree = optVNConstantPropOnTree(block, stmt, tree);
+    if (newTree == nullptr)
+    {
+        // Not propagated, keep going.
+        return WALK_CONTINUE;
+    }
+
+    // Successful propagation, mark as assertion propagated and skip
+    // sub-tree (with side-effects) visits.
+    optAssertionProp_Update(newTree, tree, stmt);
+
+    JITDUMP("After constant propagation on [%06u]:\n", tree->gtTreeID);
+    DBEXEC(VERBOSE, gtDispTree(stmt));
+
+    return WALK_SKIP_SUBTREES;
+}
+
+//------------------------------------------------------------------------------
+// optVnNonNullPropCurStmt
+//    Performs VN based non-null propagation on the tree node.
+//
+// Assumption:
+//    This function is called as part of a pre-order tree walk.
+//
+// Arguments:
+//    block - The block that contains the statement that contains the tree.
+//    stmt  - The statement node in which the "tree" is present.
+//    tree  - The currently visited tree node.
+//
+// Return Value:
+//    None.
+//
+// Description:
+//    Performs value number based non-null propagation on GT_CALL and
+//    indirections. This is different from flow based assertions and helps
+//    unify VN based constant prop and non-null prop in a single pre-order walk.
+//
+void Compiler::optVnNonNullPropCurStmt(BasicBlock* block, GenTreePtr stmt, GenTreePtr tree)
+{
+    ASSERT_TP  empty   = BitVecOps::MakeEmpty(apTraits);
+    GenTreePtr newTree = nullptr;
+    if (tree->OperGet() == GT_CALL)
+    {
+        newTree = optNonNullAssertionProp_Call(empty, tree, stmt);
+    }
+    else if (tree->OperIsIndir())
+    {
+        newTree = optAssertionProp_Ind(empty, tree, stmt);
+    }
+    if (newTree)
+    {
+        assert(newTree == tree);
+        optAssertionProp_Update(newTree, tree, stmt);
+    }
+}
+
+//------------------------------------------------------------------------------
+// optVNAssertionPropCurStmtVisitor
+//    Unified Value Numbering based assertion propagation visitor.
+//
+// Assumption:
+//    This function is called as part of a pre-order tree walk.
+//
+// Return Value:
+//    WALK_RESULTs.
+//
+// Description:
+//    An unified value numbering based assertion prop visitor that
+//    performs non-null and constant assertion propagation based on
+//    value numbers.
+//
+/* static */
+Compiler::fgWalkResult Compiler::optVNAssertionPropCurStmtVisitor(GenTreePtr* ppTree, fgWalkData* data)
+{
+    VNAssertionPropVisitorInfo* pData = (VNAssertionPropVisitorInfo*)data->pCallbackData;
+    Compiler*                   pThis = pData->pThis;
+
+    pThis->optVnNonNullPropCurStmt(pData->block, pData->stmt, *ppTree);
+
+    return pThis->optVNConstantPropCurStmt(pData->block, pData->stmt, *ppTree);
+}
+
+/*****************************************************************************
+ *
+ *   Perform VN based i.e., data flow based assertion prop first because
+ *   even if we don't gen new control flow assertions, we still propagate
+ *   these first.
+ *
+ *   Returns the skipped next stmt if the current statement or next few
+ *   statements got removed, else just returns the incoming stmt.
+ */
+GenTreePtr Compiler::optVNAssertionPropCurStmt(BasicBlock* block, GenTreePtr stmt)
+{
+    // TODO-Review: EH successor/predecessor iteration seems broken.
+    // See: SELF_HOST_TESTS_ARM\jit\Directed\ExcepFilters\fault\fault.exe
+    if (block->bbCatchTyp == BBCT_FAULT)
+    {
+        return stmt;
+    }
+
+    // Preserve the prev link before the propagation and morph.
+    GenTreePtr prev = (stmt == block->firstStmt()) ? nullptr : stmt->gtPrev;
+
+    // Perform VN based assertion prop first, in case we don't find
+    // anything in assertion gen.
+    optAssertionPropagatedCurrentStmt = false;
+
+    VNAssertionPropVisitorInfo data(this, block, stmt);
+    fgWalkTreePre(&stmt->gtStmt.gtStmtExpr, Compiler::optVNAssertionPropCurStmtVisitor, &data);
+
+    if (optAssertionPropagatedCurrentStmt)
+    {
+        fgMorphBlockStmt(block, stmt DEBUGARG("optVNAssertionPropCurStmt"));
+        gtSetStmtInfo(stmt);
+        fgSetStmtSeq(stmt);
+    }
+
+    // Check if propagation removed statements starting from current stmt.
+    // If so, advance to the next good statement.
+    GenTreePtr nextStmt = (prev == nullptr) ? block->firstStmt() : prev->gtNext;
+    return nextStmt;
+}
+
+/*****************************************************************************
+ *
+ *   The entry point for assertion propagation
+ */
+
+void Compiler::optAssertionPropMain()
+{
+    if (fgSsaPassesCompleted == 0)
+    {
+        return;
+    }
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In optAssertionPropMain()\n");
+        printf("Blocks/Trees at start of phase\n");
+        fgDispBasicBlocks(true);
+    }
+#endif
+
+    optAssertionInit(false);
+
+    noway_assert(optAssertionCount == 0);
+
+    // First discover all value assignments and record them in the table.
+    for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+    {
+        compCurBB = block;
+
+        fgRemoveRestOfBlock = false;
+
+        GenTreePtr stmt = block->bbTreeList;
+        while (stmt)
+        {
+            // We need to remove the rest of the block.
+            if (fgRemoveRestOfBlock)
+            {
+                fgRemoveStmt(block, stmt);
+                stmt = stmt->gtNext;
+                continue;
+            }
+            else
+            {
+                // Perform VN based assertion prop before assertion gen.
+                GenTreePtr nextStmt = optVNAssertionPropCurStmt(block, stmt);
+
+                // Propagation resulted in removal of the remaining stmts, perform it.
+                if (fgRemoveRestOfBlock)
+                {
+                    stmt = stmt->gtNext;
+                    continue;
+                }
+
+                // Propagation removed the current stmt or next few stmts, so skip them.
+                if (stmt != nextStmt)
+                {
+                    stmt = nextStmt;
+                    continue;
+                }
+            }
+
+            // Perform assertion gen for control flow based assertions.
+            for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+            {
+                optAssertionGen(tree);
+            }
+
+            // Advance the iterator
+            stmt = stmt->gtNext;
+        }
+    }
+
+    if (!optAssertionCount)
+    {
+        return;
+    }
+
+#ifdef DEBUG
+    fgDebugCheckLinks();
+#endif
+
+    // Allocate the bits for the predicate sensitive dataflow analysis
+    bbJtrueAssertionOut    = optInitAssertionDataflowFlags();
+    ASSERT_TP* jumpDestGen = optComputeAssertionGen();
+
+    // Modified dataflow algorithm for available expressions.
+    DataFlow                  flow(this);
+    AssertionPropFlowCallback ap(this, bbJtrueAssertionOut, jumpDestGen);
+    flow.ForwardAnalysis(ap);
+
+    for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+    {
+        // Compute any implied non-Null assertions for block->bbAssertionIn
+        optImpliedByTypeOfAssertions(block->bbAssertionIn);
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\n");
+        for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+        {
+            printf("\nBB%02u", block->bbNum);
+            printf(" valueIn  = %s", BitVecOps::ToString(apTraits, block->bbAssertionIn));
+            printf(" valueOut = %s", BitVecOps::ToString(apTraits, block->bbAssertionOut));
+            if (block->bbJumpKind == BBJ_COND)
+            {
+                printf(" => BB%02u", block->bbJumpDest->bbNum);
+                printf(" valueOut= %s", BitVecOps::ToString(apTraits, bbJtrueAssertionOut[block->bbNum]));
+            }
+        }
+        printf("\n");
+    }
+#endif // DEBUG
+
+    // Perform assertion propagation (and constant folding)
+    for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+    {
+        ASSERT_TP assertions = BitVecOps::MakeCopy(apTraits, block->bbAssertionIn);
+
+        // TODO-Review: EH successor/predecessor iteration seems broken.
+        // SELF_HOST_TESTS_ARM\jit\Directed\ExcepFilters\fault\fault.exe
+        if (block->bbCatchTyp == BBCT_FAULT)
+        {
+            continue;
+        }
+
+        // Make the current basic block address available globally.
+        compCurBB           = block;
+        fgRemoveRestOfBlock = false;
+
+        // Walk the statement trees in this basic block
+        GenTreePtr stmt = block->FirstNonPhiDef();
+        while (stmt)
+        {
+            noway_assert(stmt->gtOper == GT_STMT);
+
+            // Propagation tells us to remove the rest of the block. Remove it.
+            if (fgRemoveRestOfBlock)
+            {
+                fgRemoveStmt(block, stmt);
+                stmt = stmt->gtNext;
+                continue;
+            }
+
+            // Preserve the prev link before the propagation and morph, to check if propagation
+            // removes the current stmt.
+            GenTreePtr prev = (stmt == block->firstStmt()) ? nullptr : stmt->gtPrev;
+
+            optAssertionPropagatedCurrentStmt = false; // set to true if a assertion propagation took place
+                                                       // and thus we must morph, set order, re-link
+            for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+            {
+                JITDUMP("Propagating %s assertions for BB%02d, stmt [%06d], tree [%06d], tree -> %d\n",
+                        BitVecOps::ToString(apTraits, assertions), block->bbNum, dspTreeID(stmt), dspTreeID(tree),
+                        tree->GetAssertion());
+
+                GenTreePtr newTree = optAssertionProp(assertions, tree, stmt);
+                if (newTree)
+                {
+                    assert(optAssertionPropagatedCurrentStmt == true);
+                    tree = newTree;
+                }
+
+                // Is this an assignment to a local variable
+                GenTreeLclVarCommon* lclVarTree = nullptr;
+
+                // If this tree makes an assertion - make it available.
+                if (tree->HasAssertion())
+                {
+                    BitVecOps::AddElemD(apTraits, assertions, tree->GetAssertion() - 1);
+
+                    // Also include any implied assertions for the tree node.
+                    optImpliedAssertions((AssertionIndex)tree->GetAssertion(), assertions);
+                }
+            }
+
+            if (optAssertionPropagatedCurrentStmt)
+            {
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("Re-morphing this stmt:\n");
+                    gtDispTree(stmt);
+                    printf("\n");
+                }
+#endif
+                // Re-morph the statement.
+                fgMorphBlockStmt(block, stmt DEBUGARG("optAssertionPropMain"));
+
+                // Recalculate the gtCostSz, etc...
+                gtSetStmtInfo(stmt);
+
+                // Re-thread the nodes
+                fgSetStmtSeq(stmt);
+            }
+
+            // Check if propagation removed statements starting from current stmt.
+            // If so, advance to the next good statement.
+            GenTreePtr nextStmt = (prev == nullptr) ? block->firstStmt() : prev->gtNext;
+            stmt                = (stmt == nextStmt) ? stmt->gtNext : nextStmt;
+        }
+        optAssertionPropagatedCurrentStmt = false; // clear it back as we are done with stmts.
+    }
+
+#ifdef DEBUG
+    fgDebugCheckBBlist();
+    fgDebugCheckLinks();
+#endif
+
+    // Assertion propagation may have changed the reference counts
+    // We need to resort the variable table
+
+    if (optAssertionPropagated)
+    {
+        lvaSortAgain = true;
+    }
+}
diff --git a/src/jit/bitset.cpp b/src/jit/bitset.cpp
new file mode 100644
index 0000000000..90ef253199
--- /dev/null
+++ b/src/jit/bitset.cpp
@@ -0,0 +1,185 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+#include "bitset.h"
+#include "bitsetasuint64.h"
+#include "bitsetasshortlong.h"
+#include "bitsetasuint64inclass.h"
+
+// clang-format off
+unsigned BitSetSupport::BitCountTable[16] = { 0, 1, 1, 2, 
+                                              1, 2, 2, 3, 
+                                              1, 2, 2, 3, 
+                                              2, 3, 3, 4 };
+// clang-format on
+
+#ifdef DEBUG
+template <typename BitSetType, unsigned Uniq, typename Env, typename BitSetTraits>
+void BitSetSupport::RunTests(Env env)
+{
+
+    typedef BitSetOps<BitSetType, Uniq, Env, BitSetTraits> LclBitSetOps;
+
+    // The tests require that the Size is at least 52...
+    assert(BitSetTraits::GetSize(env) > 51);
+
+    BitSetType bs1;
+    LclBitSetOps::AssignNoCopy(env, bs1, LclBitSetOps::MakeEmpty(env));
+    unsigned bs1bits[] = {0, 10, 44, 45};
+    LclBitSetOps::AddElemD(env, bs1, bs1bits[0]);
+    LclBitSetOps::AddElemD(env, bs1, bs1bits[1]);
+    LclBitSetOps::AddElemD(env, bs1, bs1bits[2]);
+    LclBitSetOps::AddElemD(env, bs1, bs1bits[3]);
+
+    typename LclBitSetOps::Iter bsi(env, bs1);
+    unsigned                    bitNum = 0;
+    unsigned                    k      = 0;
+    while (bsi.NextElem(env, &bitNum))
+    {
+        assert(bitNum == bs1bits[k]);
+        k++;
+    }
+    assert(k == 4);
+
+    assert(LclBitSetOps::Equal(env, bs1, LclBitSetOps::Union(env, bs1, bs1)));
+    assert(LclBitSetOps::Equal(env, bs1, LclBitSetOps::Intersection(env, bs1, bs1)));
+    assert(LclBitSetOps::IsSubset(env, bs1, bs1));
+
+    BitSetType bs2;
+    LclBitSetOps::AssignNoCopy(env, bs2, LclBitSetOps::MakeEmpty(env));
+    unsigned bs2bits[] = {0, 10, 50, 51};
+    LclBitSetOps::AddElemD(env, bs2, bs2bits[0]);
+    LclBitSetOps::AddElemD(env, bs2, bs2bits[1]);
+    LclBitSetOps::AddElemD(env, bs2, bs2bits[2]);
+    LclBitSetOps::AddElemD(env, bs2, bs2bits[3]);
+
+    unsigned   unionBits[] = {0, 10, 44, 45, 50, 51};
+    BitSetType bsU12;
+    LclBitSetOps::AssignNoCopy(env, bsU12, LclBitSetOps::Union(env, bs1, bs2));
+    k      = 0;
+    bsi    = typename LclBitSetOps::Iter(env, bsU12);
+    bitNum = 0;
+    while (bsi.NextElem(env, &bitNum))
+    {
+        assert(bitNum == unionBits[k]);
+        k++;
+    }
+    assert(k == 6);
+
+    k                                = 0;
+    typename LclBitSetOps::Iter bsiL = typename LclBitSetOps::Iter(env, bsU12);
+    bitNum                           = 0;
+    while (bsiL.NextElem(env, &bitNum))
+    {
+        assert(bitNum == unionBits[k]);
+        k++;
+    }
+    assert(k == 6);
+
+    unsigned   intersectionBits[] = {0, 10};
+    BitSetType bsI12;
+    LclBitSetOps::AssignNoCopy(env, bsI12, LclBitSetOps::Intersection(env, bs1, bs2));
+    k      = 0;
+    bsi    = typename LclBitSetOps::Iter(env, bsI12);
+    bitNum = 0;
+    while (bsi.NextElem(env, &bitNum))
+    {
+        assert(bitNum == intersectionBits[k]);
+        k++;
+    }
+    assert(k == 2);
+}
+
+class TestBitSetTraits
+{
+public:
+    static IAllocator* GetAllocator(IAllocator* alloc)
+    {
+        return alloc;
+    }
+    static unsigned GetSize(IAllocator* alloc)
+    {
+        return 64;
+    }
+    static unsigned GetArrSize(IAllocator* alloc, unsigned elemSize)
+    {
+        assert(elemSize == sizeof(size_t));
+        return (64 / 8) / sizeof(size_t);
+    }
+    static unsigned GetEpoch(IAllocator* alloc)
+    {
+        return 0;
+    }
+};
+
+void BitSetSupport::TestSuite(IAllocator* env)
+{
+    BitSetSupport::RunTests<UINT64, BSUInt64, IAllocator*, TestBitSetTraits>(env);
+    BitSetSupport::RunTests<BitSetShortLongRep, BSShortLong, IAllocator*, TestBitSetTraits>(env);
+    BitSetSupport::RunTests<BitSetUint64<IAllocator*, TestBitSetTraits>, BSUInt64Class, IAllocator*, TestBitSetTraits>(
+        env);
+}
+#endif
+
+const char* BitSetSupport::OpNames[BitSetSupport::BSOP_NUMOPS] = {
+#define BSOPNAME(x) #x,
+#include "bitsetops.h"
+#undef BSOPNAME
+};
+
+void BitSetSupport::BitSetOpCounter::RecordOp(BitSetSupport::Operation op)
+{
+    OpCounts[op]++;
+    TotalOps++;
+
+    if ((TotalOps % 1000000) == 0)
+    {
+        if (OpOutputFile == nullptr)
+        {
+            OpOutputFile = fopen(m_fileName, "a");
+        }
+        fprintf(OpOutputFile, "@ %d total ops.\n", TotalOps);
+
+        unsigned OpOrder[BSOP_NUMOPS];
+        bool     OpOrdered[BSOP_NUMOPS];
+
+        // First sort by total operations (into an index permutation array, using a simple n^2 sort).
+        for (unsigned k = 0; k < BitSetSupport::BSOP_NUMOPS; k++)
+        {
+            OpOrdered[k] = false;
+        }
+        for (unsigned k = 0; k < BitSetSupport::BSOP_NUMOPS; k++)
+        {
+            bool     candSet = false;
+            unsigned cand    = 0;
+            unsigned candInd = 0;
+            for (unsigned j = 0; j < BitSetSupport::BSOP_NUMOPS; j++)
+            {
+                if (OpOrdered[j])
+                {
+                    continue;
+                }
+                if (!candSet || OpCounts[j] > cand)
+                {
+                    candInd = j;
+                    cand    = OpCounts[j];
+                    candSet = true;
+                }
+            }
+            assert(candSet);
+            OpOrder[k]         = candInd;
+            OpOrdered[candInd] = true;
+        }
+
+        for (unsigned ii = 0; ii < BitSetSupport::BSOP_NUMOPS; ii++)
+        {
+            unsigned i = OpOrder[ii];
+            fprintf(OpOutputFile, "   Op %40s: %8d\n", OpNames[i], OpCounts[i]);
+        }
+    }
+}
diff --git a/src/jit/bitset.h b/src/jit/bitset.h
new file mode 100644
index 0000000000..4ecb2fc0d4
--- /dev/null
+++ b/src/jit/bitset.h
@@ -0,0 +1,452 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// A set of integers in the range [0..N], for some given N.
+
+/*****************************************************************************/
+#ifndef _BITSET_H_
+#define _BITSET_H_
+/*****************************************************************************/
+
+// This class provides some constant declarations and some static utility methods useful
+// for bitset implementations.
+class BitSetSupport
+{
+#ifdef DEBUG
+    template <typename BitSetType, unsigned Brand, typename Env, typename BitSetTraits>
+    static void RunTests(Env env);
+#endif
+
+public:
+    static const unsigned BitsInByte = 8;
+
+    // This maps 4-bit ("nibble") values into the number of 1 bits they contain.
+    static unsigned BitCountTable[16];
+
+    // Returns the number of 1 bits in the binary representation of "u".
+    template <typename T>
+    static unsigned CountBitsInIntegral(T u)
+    {
+        unsigned res = 0;
+        // We process "u" in 4-bit nibbles, hence the "*2" below.
+        for (int i = 0; i < sizeof(T) * 2; i++)
+        {
+            res += BitCountTable[u & 0xf];
+            u >>= 4;
+        }
+        return res;
+    }
+
+#ifdef DEBUG
+    // This runs the "TestSuite" method for a few important instantiations of BitSet.
+    static void TestSuite(IAllocator* env);
+#endif
+
+    enum Operation
+    {
+#define BSOPNAME(x) x,
+#include "bitsetops.h"
+#undef BSOPNAME
+        BSOP_NUMOPS
+    };
+    static const char* OpNames[BSOP_NUMOPS];
+
+    class BitSetOpCounter
+    {
+        unsigned    TotalOps;
+        unsigned    OpCounts[BSOP_NUMOPS];
+        const char* m_fileName;
+        FILE*       OpOutputFile;
+
+    public:
+        BitSetOpCounter(const char* fileName) : TotalOps(0), m_fileName(fileName), OpOutputFile(nullptr)
+        {
+            for (unsigned i = 0; i < BSOP_NUMOPS; i++)
+            {
+                OpCounts[i] = 0;
+            }
+        }
+
+        void RecordOp(Operation op);
+    };
+};
+
+template <>
+FORCEINLINE unsigned BitSetSupport::CountBitsInIntegral<unsigned>(unsigned c)
+{
+    // Make sure we're 32 bit.
+    assert(sizeof(unsigned) == 4);
+    c = (c & 0x55555555) + ((c >> 1) & 0x55555555);
+    c = (c & 0x33333333) + ((c >> 2) & 0x33333333);
+    c = (c & 0x0f0f0f0f) + ((c >> 4) & 0x0f0f0f0f);
+    c = (c & 0x00ff00ff) + ((c >> 8) & 0x00ff00ff);
+    c = (c & 0x0000ffff) + ((c >> 16) & 0x0000ffff);
+    return c;
+}
+
+// A "BitSet" represents a set of integers from a "universe" [0..N-1].  This implementation assumes that "N"
+// (the "Size") is provided by the "Env" template argument type discussed below, and accessed from the Env
+// via a static method of the BitSetTraits type discussed below.  The intent of "BitSet" is that the set is
+// represented as a bit array.  Various binary operations therefore only make sense if the operands are
+// subsets of the same universe.  Further, the integers in the set that the BitSet represents may have
+// different interpretations at a higher level, so even if the range of the universe stays the same,
+// the higher-level meaning of those bits may change.  For these reasons, we assume the Env can provide
+// (again, via static methods of the BitSetTraits) the current "epoch" number.  The Env must keep the
+// Size the same while the epoch has a given value; a BitSet implementation may legally stamp BitSets
+// with the current epoch, and assert that BitSets from different epochs are not intermixed.
+
+// Some implementations may use a representation that (at least sometimes) is a pointer to a
+// heap-allocated data structure.  (The operations of BitSetOps are static methods, rather than
+// declaring a BitSet class type with multiple subtypes, to allow maximally efficient raw
+// primitive type representations.)  Therefore, we must be careful about assignment and
+// initialization.  We often want to reason about BitSets as immutable values, and just copying
+// the representation would introduce sharing in the indirect case, which is usually not what's
+// desired.  On the other hand, there are many cases in which the RHS value has just been
+// created functionally, and the intialization/assignment is obviously its last use.  In these
+// cases, allocating a new indirect representation for the lhs (if it does not already have one)
+// would be unnecessary and wasteful.  Thus, for assignment, we have a "normal" assignment
+// function, which makes a copy of the referent data structure in the indirect case, and an
+// "AssignNoCopy" version, which does not, and instead introduces sharing in the indirect case.
+// Obviously, the latter should be used with care.
+//
+// (Orthogonally, there are also further versions of assignment that differ in whether the "rhs"
+// argument may be uninitialized.  The normal assignment operation requires the "rhs" argument not be
+// uninitialized; "AssignNoCopy" has the same requirement.  The "AssignAllowUninitRhs" version allows
+// the "rhs" to be the uninit value, and sets the "lhs" to be uninitialized in that case.)
+
+// This class has static methods that provide the operations on BitSets.
+//
+// An instantiation requires:
+//    typename BitSetType:         the representation type of this kind of BitSet.
+//
+//    unsigned Brand:              an integer constant.  This is unused by the implementation; it exists
+//                                 *only* to ensure that we can have, if desired, multiple distinct BitSetOps
+//                                 implementations for the same BitSetType, by instantiating these with different
+//                                 values for Brand (thus "branding" them so that they are distinct from one another.)
+//
+//    typename Env:                a type that determines the (current) size of the given BitSet type, as well
+//                                 as an allocation function, and the current epoch (integer that changes when
+//                                 "universe" of the BitSet changes) -- all via static methods of the "BitSetTraits"
+//                                 type.
+//
+//    typename BitSetTraits:
+//      An "adapter" class that provides methods that retrieves things from the Env:
+//        static IAllocator* GetAllococator(Env):   yields an "IAllocator*" that the BitSet implementation can use.
+//        static unsigned    GetSize(Env):          the current size (= # of bits) of this bitset type.
+//        static unsigned    GetArrSize(Env, unsigned elemSize):  The number of "elemSize" chunks sufficient to hold
+//                                                                "GetSize". A given BitSet implementation must call
+//                                                                this with only one constant value. Thus, and "Env"
+//                                                                may compute this result when GetSize changes.
+//
+//        static unsigned    GetEpoch(Env):         the current epoch.
+//
+// (For many instantiations, BitSetValueArgType and BitSetValueRetType will be the same as BitSetType; in cases where
+// BitSetType is a class, type, BitSetValueArgType may need to be "const BitSetType&", for example.)
+//
+// In addition to implementing the method signatures here, an instantiation of BitSetOps must also export a
+// BitSetOps::Iter type, which supports the following operations:
+//      Iter(BitSetValueArgType):        a constructor
+//      bool NextElem(unsigned* pElem):  returns true if the iteration is not complete, and sets *pElem to the next
+//                                       yielded member.
+//
+// Finally, it should export two further types:
+//
+//    ValArgType: the type used to pass a BitSet as a by-value argument.
+//    RetValType: the type that should be used to return a BitSet.
+//
+// For many instantiations, these can be identical to BitSetTypes.  When the representation type is a class,
+// however, ValArgType may need to be "const BitSetType&", and RetValArg may need to be a helper class, if the
+// class hides default copy constructors and assignment operators to detect erroneous usage.
+//
+template <typename BitSetType, unsigned Brand, typename Env, typename BitSetTraits>
+class BitSetOps
+{
+#if 0
+    // Below are the set of methods that an instantiation of BitSetOps should provide.  This is
+    // #if'd out because it doesn't make any difference; C++ has no mechanism for checking that
+    // the methods of an instantiation are consistent with these signatures, other than the expectations
+    // embodied in the program that uses the instantiation(s).  But it's useful documentation, and
+    // we should try to keep it up to date.
+
+  public:
+
+    // The uninitialized value -- not a real bitset (if possible).
+    static BitSetValueRetType UninitVal();
+
+    // Returns "true" iff "bs" may be the uninit value.
+    static bool MayBeUninit(BitSetValueArgType bs);
+
+    // Returns the a new BitSet that is empty.  Uses the Allocator of "env" to allocate memory for 
+    // the representation, if necessary.
+    static BitSetValueRetType MakeEmpty(Env env);
+
+    // Returns the a new BitSet that is "full" -- represents all the integers in the current range.
+    // Uses the Allocator of "env" to allocate memory for the representation, if necessary.
+    static BitSetValueRetType MakeFull(Env env);
+
+    // Returns the set containing the single element "bitNum" (which is required to be within the
+    // BitSet's current range).  Uses the Allocator of "env" to allocate memory for the representation,
+    // if necessary.
+    static BitSetValueRetType MakeSingleton(Env env, unsigned bitNum);
+
+    // Assign "rhs" to "lhs".  "rhs" must not be the uninitialized value.  "lhs" may be, in which case
+    // "rhs" will be copied if necessary.
+    static void Assign(Env env, BitSetType& lhs, BitSetValueArgType rhs);
+
+    // Assign "rhs" to "lhs"...*even* if "rhs" is the uninitialized value.
+    static void AssignAllowUninitRhs(Env env, BitSetType& lhs, BitSetValueArgType rhs);
+
+    // This is a "destructive" assignment -- it should only be used if the rhs is "dead" after the assignment.
+    // In particular, if the rhs has a level of indirection to a heap-allocated data structure, that pointer will
+    // be copied into the lhs.
+    static void AssignNoCopy(Env env, BitSetType& lhs, BitSetValueArgType rhs);
+
+    // Destructively set "bs" to be the empty set.  This method is unique, in that it does *not*
+    // require "bs" to be a bitset of the current epoch.  It ensures that it is after, however.
+    // (If the representation is indirect, this requires allocating a new, empty representation.
+    // If this is a performance issue, we could provide a new version of ClearD that assumes/asserts
+    // that the rep is for the current epoch -- this would be useful if a given bitset were repeatedly
+    // cleared within an epoch.)
+    static void ClearD(Env env, BitSetType& bs);
+
+    // Returns a copy of "bs".  If the representation of "bs" involves a level of indirection, the data
+    // structure is copied and a pointer to the copy is returned.
+    static BitSetValueRetType MakeCopy(Env env, BitSetValueArgType bs);
+
+    // Returns "true" iff ""bs" represents the empty set.
+    static bool IsEmpty(Env env, BitSetValueArgType bs);
+
+    // Returns the number of members in "bs".
+    static unsigned Count(Env env, BitSetValueArgType bs);
+
+    // Returns "true" iff "i" is a member of "bs".
+    static bool IsMember(Env env, const BitSetValueArgType bs, unsigned i);
+
+    // Destructively modify "bs" to ensure that "i" is a member.
+    static void AddElemD(Env env, BitSetType& bs, unsigned i);
+    // Returns a BitSet that is a copy of "bs" with "i" added.
+    static BitSetValueRetType AddElem(Env env, BitSetValueArgType bs, unsigned i);
+
+    // Destructively modify "bs" to ensure that "i" is not a member.
+    static void RemoveElemD(Env env, BitSetType& bs, unsigned i);
+    // Returns a BitSet that is a copy of "bs" with "i" removed.
+    static BitSetValueRetType RemoveElem(Env env, BitSetValueArgType bs1, unsigned i);
+
+    // Destructively modify "bs1" to be the union of "bs1" and "bs2".
+    static void UnionD(Env env, BitSetType& bs1, BitSetValueArgType bs2);
+    // Returns a new BitSet that is the union of "bs1" and "bs2".
+    static BitSetValueRetType Union(Env env, BitSetValueArgType bs1, BitSetValueArgType bs2);
+
+    // Destructively modify "bs1" to be the intersection of "bs1" and "bs2".
+    static void IntersectionD(Env env, BitSetType& bs1, BitSetValueArgType bs2);
+    // Returns a new BitSet that is the intersection of "bs1" and "bs2".
+    static BitSetValueRetType Intersection(Env env, BitSetValueArgType bs1, BitSetValueArgType bs2);
+
+    // Returns true iff "bs1" and "bs2" have an empty intersection.
+    static bool IsEmptyIntersection(Env env, BitSetValueArgType bs1, BitSetValueArgType bs2);
+
+    // Destructively modify "bs1" to be the set difference of "bs1" and "bs2".
+    static void DiffD(Env env, BitSetType& bs1, BitSetValueArgType bs2);
+    // Returns a new BitSet that is the set difference of "bs1" and "bs2".
+    static BitSetValueRetType Diff(Env env, BitSetValueArgType bs1, BitSetValueArgType bs2);
+
+    // Returns true iff "bs2" is a subset of "bs1."
+    static bool IsSubset(Env env, BitSetValueArgType bs1, BitSetValueArgType bs2);
+
+    // Returns true iff "bs1" and "bs2" are equal.
+    static bool Equal(Env env, BitSetValueArgType bs1, BitSetValueArgType bs2);
+
+#ifdef DEBUG
+    // Returns a string representing the contents of "bs".  Allocates memory for the representation
+    // using the Allocator of "env".
+    static const char* ToString(Env env, BitSetValueArgType bs);
+#endif
+
+    // Declare this as a type -- will be a real class in real instantiations.
+    class Iter {
+      public:
+        Iter(Env env, BitSetValueArgType bs) {}
+        bool NextElem(Env env, unsigned* pElem) { return false; }
+    };
+
+    typename ValArgType;
+    typename RetValType;
+#endif // 0 -- the above is #if'd out, since it's really just an extended comment on what an instantiation
+       // should provide.
+};
+
+template <typename BitSetType,
+          unsigned Brand,
+          typename Env,
+          typename BitSetTraits,
+          typename BitSetValueArgType,
+          typename BitSetValueRetType,
+          typename BaseIter>
+class BitSetOpsWithCounter
+{
+    typedef BitSetOps<BitSetType, Brand, Env, BitSetTraits> BSO;
+
+public:
+    static BitSetValueRetType UninitVal()
+    {
+        return BSO::UninitVal();
+    }
+    static bool MayBeUninit(BitSetValueArgType bs)
+    {
+        return BSO::MayBeUninit(bs);
+    }
+    static BitSetValueRetType MakeEmpty(Env env)
+    {
+        BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_MakeEmpty);
+        return BSO::MakeEmpty(env);
+    }
+    static BitSetValueRetType MakeFull(Env env)
+    {
+        BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_MakeFull);
+        return BSO::MakeFull(env);
+    }
+    static BitSetValueRetType MakeSingleton(Env env, unsigned bitNum)
+    {
+        BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_MakeSingleton);
+        return BSO::MakeSingleton(env, bitNum);
+    }
+    static void Assign(Env env, BitSetType& lhs, BitSetValueArgType rhs)
+    {
+        BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_Assign);
+        BSO::Assign(env, lhs, rhs);
+    }
+    static void AssignAllowUninitRhs(Env env, BitSetType& lhs, BitSetValueArgType rhs)
+    {
+        BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_AssignAllowUninitRhs);
+        BSO::AssignAllowUninitRhs(env, lhs, rhs);
+    }
+    static void AssignNoCopy(Env env, BitSetType& lhs, BitSetValueArgType rhs)
+    {
+        BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_AssignNocopy);
+        BSO::AssignNoCopy(env, lhs, rhs);
+    }
+    static void ClearD(Env env, BitSetType& bs)
+    {
+        BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_ClearD);
+        BSO::ClearD(env, bs);
+    }
+    static BitSetValueRetType MakeCopy(Env env, BitSetValueArgType bs)
+    {
+        BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_MakeCopy);
+        return BSO::MakeCopy(env, bs);
+    }
+    static bool IsEmpty(Env env, BitSetValueArgType bs)
+    {
+        BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_IsEmpty);
+        return BSO::IsEmpty(env, bs);
+    }
+    static unsigned Count(Env env, BitSetValueArgType bs)
+    {
+        BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_Count);
+        return BSO::Count(env, bs);
+    }
+    static bool IsMember(Env env, const BitSetValueArgType bs, unsigned i)
+    {
+        BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_IsMember);
+        return BSO::IsMember(env, bs, i);
+    }
+    static void AddElemD(Env env, BitSetType& bs, unsigned i)
+    {
+        BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_AddElemD);
+        BSO::AddElemD(env, bs, i);
+    }
+    static BitSetValueRetType AddElem(Env env, BitSetValueArgType bs, unsigned i)
+    {
+        BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_AddElem);
+        return BSO::AddElem(env, bs, i);
+    }
+    static void RemoveElemD(Env env, BitSetType& bs, unsigned i)
+    {
+        BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_RemoveElemD);
+        BSO::RemoveElemD(env, bs, i);
+    }
+    static BitSetValueRetType RemoveElem(Env env, BitSetValueArgType bs1, unsigned i)
+    {
+        BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_RemoveElem);
+        return BSO::RemoveElem(env, bs1, i);
+    }
+    static void UnionD(Env env, BitSetType& bs1, BitSetValueArgType bs2)
+    {
+        BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_UnionD);
+        BSO::UnionD(env, bs1, bs2);
+    }
+    static BitSetValueRetType Union(Env env, BitSetValueArgType bs1, BitSetValueArgType bs2)
+    {
+        BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_Union);
+        return BSO::Union(env, bs1, bs2);
+    }
+    static void IntersectionD(Env env, BitSetType& bs1, BitSetValueArgType bs2)
+    {
+        BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_IntersectionD);
+        BSO::IntersectionD(env, bs1, bs2);
+    }
+    static BitSetValueRetType Intersection(Env env, BitSetValueArgType bs1, BitSetValueArgType bs2)
+    {
+        BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_Intersection);
+        return BSO::Intersection(env, bs1, bs2);
+    }
+    static bool IsEmptyIntersection(Env env, BitSetValueArgType bs1, BitSetValueArgType bs2)
+    {
+        BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_IsEmptyIntersection);
+        return BSO::IsEmptyIntersection(env, bs1, bs2);
+    }
+    static void DiffD(Env env, BitSetType& bs1, BitSetValueArgType bs2)
+    {
+        BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_DiffD);
+        BSO::DiffD(env, bs1, bs2);
+    }
+    static BitSetValueRetType Diff(Env env, BitSetValueArgType bs1, BitSetValueArgType bs2)
+    {
+        BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_Diff);
+        return BSO::Diff(env, bs1, bs2);
+    }
+    static bool IsSubset(Env env, BitSetValueArgType bs1, BitSetValueArgType bs2)
+    {
+        BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_IsSubset);
+        return BSO::IsSubset(env, bs1, bs2);
+    }
+    static bool Equal(Env env, BitSetValueArgType bs1, BitSetValueArgType bs2)
+    {
+        BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_Equal);
+        return BSO::Equal(env, bs1, bs2);
+    }
+#ifdef DEBUG
+    static const char* ToString(Env env, BitSetValueArgType bs)
+    {
+        BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_ToString);
+        return BSO::ToString(env, bs);
+    }
+#endif
+
+    class Iter
+    {
+        BaseIter m_iter;
+
+    public:
+        Iter(Env env, BitSetValueArgType bs) : m_iter(env, bs)
+        {
+        }
+
+        bool NextElem(Env env, unsigned* pElem)
+        {
+            BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_NextBit);
+            return m_iter.NextElem(env, pElem);
+        }
+    };
+};
+
+// We define symbolic names for the various bitset implementations available, to allow choices between them.
+
+#define BSUInt64 0
+#define BSShortLong 1
+#define BSUInt64Class 2
+
+/*****************************************************************************/
+#endif // _BITSET_H_
+/*****************************************************************************/
diff --git a/src/jit/bitsetasshortlong.h b/src/jit/bitsetasshortlong.h
new file mode 100644
index 0000000000..ec437e189c
--- /dev/null
+++ b/src/jit/bitsetasshortlong.h
@@ -0,0 +1,792 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// A set of integers in the range [0..N], for some N defined by the "Env" (via "BitSetTraits").
+//
+// Represented as a pointer-sized item.  If N bits can fit in this item, the representation is "direct"; otherwise,
+// the item is a pointer to an array of K size_t's, where K is the number of size_t's necessary to hold N bits.
+
+#ifndef bitSetAsShortLong_DEFINED
+#define bitSetAsShortLong_DEFINED 1
+
+#include "bitset.h"
+#include "compilerbitsettraits.h"
+
+typedef size_t* BitSetShortLongRep;
+
+template <typename Env, typename BitSetTraits>
+class BitSetOps</*BitSetType*/ BitSetShortLongRep,
+                /*Brand*/ BSShortLong,
+                /*Env*/ Env,
+                /*BitSetTraits*/ BitSetTraits>
+{
+public:
+    typedef BitSetShortLongRep Rep;
+
+private:
+    static const unsigned BitsInSizeT = sizeof(size_t) * BitSetSupport::BitsInByte;
+
+    inline static bool IsShort(Env env)
+    {
+        return BitSetTraits::GetArrSize(env, sizeof(size_t)) <= 1;
+    }
+
+    // The operations on the "long" (pointer-to-array-of-size_t) versions of the representation.
+    static void AssignLong(Env env, BitSetShortLongRep& lhs, BitSetShortLongRep rhs);
+    static BitSetShortLongRep MakeSingletonLong(Env env, unsigned bitNum);
+    static BitSetShortLongRep MakeCopyLong(Env env, BitSetShortLongRep bs);
+    static bool IsEmptyLong(Env env, BitSetShortLongRep bs);
+    static unsigned CountLong(Env env, BitSetShortLongRep bs);
+    static void UnionDLong(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2);
+    static void DiffDLong(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2);
+    static void AddElemDLong(Env env, BitSetShortLongRep& bs, unsigned i);
+    static void RemoveElemDLong(Env env, BitSetShortLongRep& bs, unsigned i);
+    static void ClearDLong(Env env, BitSetShortLongRep& bs);
+    static BitSetShortLongRep MakeUninitArrayBits(Env env);
+    static BitSetShortLongRep MakeEmptyArrayBits(Env env);
+    static BitSetShortLongRep MakeFullArrayBits(Env env);
+    static bool IsMemberLong(Env env, BitSetShortLongRep bs, unsigned i);
+    static bool EqualLong(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2);
+    static bool IsSubsetLong(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2);
+    static bool IsEmptyIntersectionLong(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2);
+    static void IntersectionDLong(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2);
+#ifdef DEBUG
+    static const char* ToStringLong(Env env, BitSetShortLongRep bs);
+#endif
+
+public:
+    inline static BitSetShortLongRep UninitVal()
+    {
+        return nullptr;
+    }
+
+    static bool MayBeUninit(BitSetShortLongRep bs)
+    {
+        return bs == UninitVal();
+    }
+
+    static void Assign(Env env, BitSetShortLongRep& lhs, BitSetShortLongRep rhs)
+    {
+        // We can't assert that rhs != UninitVal in the Short case, because in that
+        // case it's a legal value.
+        if (IsShort(env))
+        {
+            // Both are short.
+            lhs = rhs;
+        }
+        else if (lhs == UninitVal())
+        {
+            assert(rhs != UninitVal());
+            lhs = MakeCopy(env, rhs);
+        }
+        else
+        {
+            AssignLong(env, lhs, rhs);
+        }
+    }
+
+    static void AssignAllowUninitRhs(Env env, BitSetShortLongRep& lhs, BitSetShortLongRep rhs)
+    {
+        if (IsShort(env))
+        {
+            // Both are short.
+            lhs = rhs;
+        }
+        else if (rhs == UninitVal())
+        {
+            lhs = rhs;
+        }
+        else if (lhs == UninitVal())
+        {
+            lhs = MakeCopy(env, rhs);
+        }
+        else
+        {
+            AssignLong(env, lhs, rhs);
+        }
+    }
+
+    static void AssignNoCopy(Env env, BitSetShortLongRep& lhs, BitSetShortLongRep rhs)
+    {
+        lhs = rhs;
+    }
+
+    static void ClearD(Env env, BitSetShortLongRep& bs)
+    {
+        if (IsShort(env))
+        {
+            bs = (BitSetShortLongRep) nullptr;
+        }
+        else
+        {
+            assert(bs != UninitVal());
+            ClearDLong(env, bs);
+        }
+    }
+
+    static BitSetShortLongRep MakeSingleton(Env env, unsigned bitNum)
+    {
+        assert(bitNum < BitSetTraits::GetSize(env));
+        if (IsShort(env))
+        {
+            return BitSetShortLongRep(((size_t)1) << bitNum);
+        }
+        else
+        {
+            return MakeSingletonLong(env, bitNum);
+        }
+    }
+
+    static BitSetShortLongRep MakeCopy(Env env, BitSetShortLongRep bs)
+    {
+        if (IsShort(env))
+        {
+            return bs;
+        }
+        else
+        {
+            return MakeCopyLong(env, bs);
+        }
+    }
+
+    static bool IsEmpty(Env env, BitSetShortLongRep bs)
+    {
+        if (IsShort(env))
+        {
+            return bs == nullptr;
+        }
+        else
+        {
+            assert(bs != UninitVal());
+            return IsEmptyLong(env, bs);
+        }
+    }
+
+    static unsigned Count(Env env, BitSetShortLongRep bs)
+    {
+        if (IsShort(env))
+        {
+            return BitSetSupport::CountBitsInIntegral(size_t(bs));
+        }
+        else
+        {
+            assert(bs != UninitVal());
+            return CountLong(env, bs);
+        }
+    }
+
+    static void UnionD(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2)
+    {
+        if (IsShort(env))
+        {
+            bs1 = (BitSetShortLongRep)(((size_t)bs1) | ((size_t)bs2));
+        }
+        else
+        {
+            UnionDLong(env, bs1, bs2);
+        }
+    }
+    static BitSetShortLongRep Union(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2)
+    {
+        BitSetShortLongRep res = MakeCopy(env, bs1);
+        UnionD(env, res, bs2);
+        return res;
+    }
+
+    static void DiffD(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2)
+    {
+        if (IsShort(env))
+        {
+            bs1 = (BitSetShortLongRep)(((size_t)bs1) & (~(size_t)bs2));
+        }
+        else
+        {
+            DiffDLong(env, bs1, bs2);
+        }
+    }
+    static BitSetShortLongRep Diff(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2)
+    {
+        BitSetShortLongRep res = MakeCopy(env, bs1);
+        DiffD(env, res, bs2);
+        return res;
+    }
+
+    static void RemoveElemD(Env env, BitSetShortLongRep& bs, unsigned i)
+    {
+        assert(i < BitSetTraits::GetSize(env));
+        if (IsShort(env))
+        {
+            size_t mask = ((size_t)1) << i;
+            mask        = ~mask;
+            bs          = (BitSetShortLongRep)(((size_t)bs) & mask);
+        }
+        else
+        {
+            assert(bs != UninitVal());
+            RemoveElemDLong(env, bs, i);
+        }
+    }
+    static BitSetShortLongRep RemoveElem(Env env, BitSetShortLongRep bs, unsigned i)
+    {
+        BitSetShortLongRep res = MakeCopy(env, bs);
+        RemoveElemD(env, res, i);
+        return res;
+    }
+
+    static void AddElemD(Env env, BitSetShortLongRep& bs, unsigned i)
+    {
+        assert(i < BitSetTraits::GetSize(env));
+        if (IsShort(env))
+        {
+            size_t mask = ((size_t)1) << i;
+            bs          = (BitSetShortLongRep)(((size_t)bs) | mask);
+        }
+        else
+        {
+            AddElemDLong(env, bs, i);
+        }
+    }
+    static BitSetShortLongRep AddElem(Env env, BitSetShortLongRep bs, unsigned i)
+    {
+        BitSetShortLongRep res = MakeCopy(env, bs);
+        AddElemD(env, res, i);
+        return res;
+    }
+
+    static bool IsMember(Env env, const BitSetShortLongRep bs, unsigned i)
+    {
+        assert(i < BitSetTraits::GetSize(env));
+        if (IsShort(env))
+        {
+            size_t mask = ((size_t)1) << i;
+            return (((size_t)bs) & mask) != 0;
+        }
+        else
+        {
+            assert(bs != UninitVal());
+            return IsMemberLong(env, bs, i);
+        }
+    }
+
+    static void IntersectionD(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2)
+    {
+        if (IsShort(env))
+        {
+            (size_t&)bs1 &= (size_t)bs2;
+        }
+        else
+        {
+            IntersectionDLong(env, bs1, bs2);
+        }
+    }
+
+    static BitSetShortLongRep Intersection(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2)
+    {
+        BitSetShortLongRep res = MakeCopy(env, bs1);
+        IntersectionD(env, res, bs2);
+        return res;
+    }
+    static bool IsEmptyIntersection(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2)
+    {
+        if (IsShort(env))
+        {
+            return (((size_t)bs1) & ((size_t)bs2)) == 0;
+        }
+        else
+        {
+            return IsEmptyIntersectionLong(env, bs1, bs2);
+        }
+    }
+
+    static bool IsSubset(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2)
+    {
+        if (IsShort(env))
+        {
+            size_t u1 = (size_t)bs1;
+            size_t u2 = (size_t)bs2;
+            return (u1 & u2) == u1;
+        }
+        else
+        {
+            return IsSubsetLong(env, bs1, bs2);
+        }
+    }
+
+    static bool Equal(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2)
+    {
+        if (IsShort(env))
+        {
+            return (size_t)bs1 == (size_t)bs2;
+        }
+        else
+        {
+            return EqualLong(env, bs1, bs2);
+        }
+    }
+
+#ifdef DEBUG
+    // Returns a string valid until the allocator releases the memory.
+    static const char* ToString(Env env, BitSetShortLongRep bs)
+    {
+        if (IsShort(env))
+        {
+            assert(sizeof(BitSetShortLongRep) == sizeof(size_t));
+            IAllocator* alloc          = BitSetTraits::GetDebugOnlyAllocator(env);
+            const int   CharsForSizeT  = sizeof(size_t) * 2;
+            char*       res            = nullptr;
+            const int   ShortAllocSize = CharsForSizeT + 4;
+            res                        = (char*)alloc->Alloc(ShortAllocSize);
+            size_t   bits              = (size_t)bs;
+            unsigned remaining         = ShortAllocSize;
+            char*    ptr               = res;
+            if (sizeof(size_t) == sizeof(int64_t))
+            {
+                sprintf_s(ptr, remaining, "%016llX", bits);
+            }
+            else
+            {
+                assert(sizeof(size_t) == sizeof(int));
+                sprintf_s(ptr, remaining, "%08X", bits);
+            }
+            return res;
+        }
+        else
+        {
+            return ToStringLong(env, bs);
+        }
+    }
+#endif
+
+    static BitSetShortLongRep MakeEmpty(Env env)
+    {
+        if (IsShort(env))
+        {
+            return nullptr;
+        }
+        else
+        {
+            return MakeEmptyArrayBits(env);
+        }
+    }
+
+    static BitSetShortLongRep MakeFull(Env env)
+    {
+        if (IsShort(env))
+        {
+            // Can't just shift by numBits+1, since that might be 32 (and (1 << 32( == 1, for an unsigned).
+            unsigned numBits = BitSetTraits::GetSize(env);
+            if (numBits == BitsInSizeT)
+            {
+                // Can't use the implementation below to get all 1's...
+                return BitSetShortLongRep(size_t(-1));
+            }
+            else
+            {
+                return BitSetShortLongRep((size_t(1) << numBits) - 1);
+            }
+        }
+        else
+        {
+            return MakeFullArrayBits(env);
+        }
+    }
+
+    class Iter
+    {
+        BitSetShortLongRep m_bs;   // The BitSet that we're iterating over.
+        size_t             m_bits; // The "current" bits remaining to be iterated over.
+        // In the "short" case, these are all the remaining bits.
+        // In the "long" case, these are remaining bits in element "m_index";
+        // these and the bits in the remaining elements comprise the remaining bits.
+        unsigned m_index; // If "m_bs" uses the long (indirect) representation, the current index in the array.
+        // the index of the element in A(bs) that is currently being iterated.
+        unsigned m_bitNum; // The number of bits that have already been iterated over (set or clear).  If you
+        // add this to the bit number of the next bit in "m_bits", you get the proper bit number of that
+        // bit in "m_bs".
+
+    public:
+        Iter(Env env, const BitSetShortLongRep& bs) : m_bs(bs), m_bitNum(0)
+        {
+            if (BitSetOps::IsShort(env))
+            {
+                m_index = 0;
+                m_bits  = (size_t)bs;
+            }
+            else
+            {
+                assert(bs != BitSetOps::UninitVal());
+                m_index = 0;
+                m_bits  = bs[0];
+            }
+        }
+
+        bool NextElem(Env env, unsigned* pElem)
+        {
+#if BITSET_TRACK_OPCOUNTS
+            BitSetStaticsImpl::RecordOp(BitSetStaticsImpl::BSOP_NextBit);
+#endif
+            for (;;)
+            {
+                DWORD nextBit;
+                BOOL  hasBit;
+#ifdef _HOST_64BIT_
+                static_assert_no_msg(sizeof(size_t) == 8);
+                hasBit = BitScanForward64(&nextBit, m_bits);
+#else
+                static_assert_no_msg(sizeof(size_t) == 4);
+                hasBit = BitScanForward(&nextBit, m_bits);
+#endif
+
+                // If there's a bit, doesn't matter if we're short or long.
+                if (hasBit)
+                {
+                    *pElem = m_bitNum + nextBit;
+                    m_bitNum += nextBit + 1;
+                    m_bits >>= nextBit;
+                    m_bits >>= 1; // Have to do these separately -- if we have 0x80000000, nextBit == 31, and shifting
+                                  // by 32 bits does nothing.
+                    return true;
+                }
+                else
+                {
+                    unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+                    if (len <= 1)
+                    {
+                        return false;
+                    }
+                    else
+                    {
+                        m_index++;
+                        if (m_index == len)
+                        {
+                            return false;
+                        }
+                        // Otherwise...
+                        m_bitNum = m_index * sizeof(size_t) * BitSetSupport::BitsInByte;
+                        m_bits   = m_bs[m_index];
+                        continue;
+                    }
+                }
+            }
+        }
+    };
+
+    friend class Iter;
+
+    typedef size_t* ValArgType;
+    typedef size_t* RetValType;
+};
+
+template <typename Env, typename BitSetTraits>
+void BitSetOps</*BitSetType*/ BitSetShortLongRep,
+               /*Brand*/ BSShortLong,
+               /*Env*/ Env,
+               /*BitSetTraits*/ BitSetTraits>::AssignLong(Env env, BitSetShortLongRep& lhs, BitSetShortLongRep rhs)
+{
+    assert(!IsShort(env));
+    unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+    for (unsigned i = 0; i < len; i++)
+    {
+        lhs[i] = rhs[i];
+    }
+}
+
+template <typename Env, typename BitSetTraits>
+BitSetShortLongRep BitSetOps</*BitSetType*/ BitSetShortLongRep,
+                             /*Brand*/ BSShortLong,
+                             /*Env*/ Env,
+                             /*BitSetTraits*/ BitSetTraits>::MakeSingletonLong(Env env, unsigned bitNum)
+{
+    assert(!IsShort(env));
+    BitSetShortLongRep res   = MakeEmptyArrayBits(env);
+    unsigned           index = bitNum / BitsInSizeT;
+    res[index]               = ((size_t)1) << (bitNum % BitsInSizeT);
+    return res;
+}
+
+template <typename Env, typename BitSetTraits>
+BitSetShortLongRep BitSetOps</*BitSetType*/ BitSetShortLongRep,
+                             /*Brand*/ BSShortLong,
+                             /*Env*/ Env,
+                             /*BitSetTraits*/ BitSetTraits>::MakeCopyLong(Env env, BitSetShortLongRep bs)
+{
+    assert(!IsShort(env));
+    BitSetShortLongRep res = MakeUninitArrayBits(env);
+    unsigned           len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+    for (unsigned i = 0; i < len; i++)
+    {
+        res[i] = bs[i];
+    }
+    return res;
+}
+
+template <typename Env, typename BitSetTraits>
+bool BitSetOps</*BitSetType*/ BitSetShortLongRep,
+               /*Brand*/ BSShortLong,
+               /*Env*/ Env,
+               /*BitSetTraits*/ BitSetTraits>::IsEmptyLong(Env env, BitSetShortLongRep bs)
+{
+    assert(!IsShort(env));
+    unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+    for (unsigned i = 0; i < len; i++)
+    {
+        if (bs[i] != 0)
+        {
+            return false;
+        }
+    }
+    return true;
+}
+
+template <typename Env, typename BitSetTraits>
+unsigned BitSetOps</*BitSetType*/ BitSetShortLongRep,
+                   /*Brand*/ BSShortLong,
+                   /*Env*/ Env,
+                   /*BitSetTraits*/ BitSetTraits>::CountLong(Env env, BitSetShortLongRep bs)
+{
+    assert(!IsShort(env));
+    unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+    unsigned res = 0;
+    for (unsigned i = 0; i < len; i++)
+    {
+        res += BitSetSupport::CountBitsInIntegral(bs[i]);
+    }
+    return res;
+}
+
+template <typename Env, typename BitSetTraits>
+void BitSetOps</*BitSetType*/ BitSetShortLongRep,
+               /*Brand*/ BSShortLong,
+               /*Env*/ Env,
+               /*BitSetTraits*/ BitSetTraits>::UnionDLong(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2)
+{
+    assert(!IsShort(env));
+    unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+    for (unsigned i = 0; i < len; i++)
+    {
+        bs1[i] |= bs2[i];
+    }
+}
+
+template <typename Env, typename BitSetTraits>
+void BitSetOps</*BitSetType*/ BitSetShortLongRep,
+               /*Brand*/ BSShortLong,
+               /*Env*/ Env,
+               /*BitSetTraits*/ BitSetTraits>::DiffDLong(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2)
+{
+    assert(!IsShort(env));
+    unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+    for (unsigned i = 0; i < len; i++)
+    {
+        bs1[i] &= ~bs2[i];
+    }
+}
+
+template <typename Env, typename BitSetTraits>
+void BitSetOps</*BitSetType*/ BitSetShortLongRep,
+               /*Brand*/ BSShortLong,
+               /*Env*/ Env,
+               /*BitSetTraits*/ BitSetTraits>::AddElemDLong(Env env, BitSetShortLongRep& bs, unsigned i)
+{
+    assert(!IsShort(env));
+    unsigned index = i / BitsInSizeT;
+    size_t   mask  = ((size_t)1) << (i % BitsInSizeT);
+    bs[index] |= mask;
+}
+
+template <typename Env, typename BitSetTraits>
+void BitSetOps</*BitSetType*/ BitSetShortLongRep,
+               /*Brand*/ BSShortLong,
+               /*Env*/ Env,
+               /*BitSetTraits*/ BitSetTraits>::RemoveElemDLong(Env env, BitSetShortLongRep& bs, unsigned i)
+{
+    assert(!IsShort(env));
+    unsigned index = i / BitsInSizeT;
+    size_t   mask  = ((size_t)1) << (i % BitsInSizeT);
+    mask           = ~mask;
+    bs[index] &= mask;
+}
+
+template <typename Env, typename BitSetTraits>
+void BitSetOps</*BitSetType*/ BitSetShortLongRep,
+               /*Brand*/ BSShortLong,
+               /*Env*/ Env,
+               /*BitSetTraits*/ BitSetTraits>::ClearDLong(Env env, BitSetShortLongRep& bs)
+{
+    assert(!IsShort(env));
+    // Recall that ClearD does *not* require "bs" to be of the current epoch.
+    // Therefore, we must allocate a new representation.
+    bs = MakeEmptyArrayBits(env);
+}
+
+template <typename Env, typename BitSetTraits>
+BitSetShortLongRep BitSetOps</*BitSetType*/ BitSetShortLongRep,
+                             /*Brand*/ BSShortLong,
+                             /*Env*/ Env,
+                             /*BitSetTraits*/ BitSetTraits>::MakeUninitArrayBits(Env env)
+{
+    assert(!IsShort(env));
+    unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+    assert(len > 1); // Or else would not require an array.
+    return (BitSetShortLongRep)(BitSetTraits::GetAllocator(env)->Alloc(len * sizeof(size_t)));
+}
+
+template <typename Env, typename BitSetTraits>
+BitSetShortLongRep BitSetOps</*BitSetType*/ BitSetShortLongRep,
+                             /*Brand*/ BSShortLong,
+                             /*Env*/ Env,
+                             /*BitSetTraits*/ BitSetTraits>::MakeEmptyArrayBits(Env env)
+{
+    assert(!IsShort(env));
+    unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+    assert(len > 1); // Or else would not require an array.
+    BitSetShortLongRep res = (BitSetShortLongRep)(BitSetTraits::GetAllocator(env)->Alloc(len * sizeof(size_t)));
+    for (unsigned i = 0; i < len; i++)
+    {
+        res[i] = 0;
+    }
+    return res;
+}
+
+template <typename Env, typename BitSetTraits>
+BitSetShortLongRep BitSetOps</*BitSetType*/ BitSetShortLongRep,
+                             /*Brand*/ BSShortLong,
+                             /*Env*/ Env,
+                             /*BitSetTraits*/ BitSetTraits>::MakeFullArrayBits(Env env)
+{
+    assert(!IsShort(env));
+    unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+    assert(len > 1); // Or else would not require an array.
+    BitSetShortLongRep res = (BitSetShortLongRep)(BitSetTraits::GetAllocator(env)->Alloc(len * sizeof(size_t)));
+    for (unsigned i = 0; i < len - 1; i++)
+    {
+        res[i] = size_t(-1);
+    }
+    // Start with all ones, shift in zeros in the last elem.
+    unsigned lastElemBits = (BitSetTraits::GetSize(env) - 1) % BitsInSizeT + 1;
+    res[len - 1]          = (size_t(-1) >> (BitsInSizeT - lastElemBits));
+    return res;
+}
+
+template <typename Env, typename BitSetTraits>
+bool BitSetOps</*BitSetType*/ BitSetShortLongRep,
+               /*Brand*/ BSShortLong,
+               /*Env*/ Env,
+               /*BitSetTraits*/ BitSetTraits>::IsMemberLong(Env env, BitSetShortLongRep bs, unsigned i)
+{
+    assert(!IsShort(env));
+    unsigned index     = i / BitsInSizeT;
+    unsigned bitInElem = (i % BitsInSizeT);
+    size_t   mask      = ((size_t)1) << bitInElem;
+    return (bs[index] & mask) != 0;
+}
+
+template <typename Env, typename BitSetTraits>
+void BitSetOps</*BitSetType*/ BitSetShortLongRep,
+               /*Brand*/ BSShortLong,
+               /*Env*/ Env,
+               /*BitSetTraits*/ BitSetTraits>::IntersectionDLong(Env                 env,
+                                                                 BitSetShortLongRep& bs1,
+                                                                 BitSetShortLongRep  bs2)
+{
+    assert(!IsShort(env));
+    unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+    for (unsigned i = 0; i < len; i++)
+    {
+        bs1[i] &= bs2[i];
+    }
+}
+
+template <typename Env, typename BitSetTraits>
+bool BitSetOps</*BitSetType*/ BitSetShortLongRep,
+               /*Brand*/ BSShortLong,
+               /*Env*/ Env,
+               /*BitSetTraits*/ BitSetTraits>::IsEmptyIntersectionLong(Env                env,
+                                                                       BitSetShortLongRep bs1,
+                                                                       BitSetShortLongRep bs2)
+{
+    assert(!IsShort(env));
+    unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+    for (unsigned i = 0; i < len; i++)
+    {
+        if ((bs1[i] & bs2[i]) != 0)
+        {
+            return false;
+        }
+    }
+    return true;
+}
+
+template <typename Env, typename BitSetTraits>
+bool BitSetOps</*BitSetType*/ BitSetShortLongRep,
+               /*Brand*/ BSShortLong,
+               /*Env*/ Env,
+               /*BitSetTraits*/ BitSetTraits>::EqualLong(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2)
+{
+    assert(!IsShort(env));
+    unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+    for (unsigned i = 0; i < len; i++)
+    {
+        if (bs1[i] != bs2[i])
+        {
+            return false;
+        }
+    }
+    return true;
+}
+
+template <typename Env, typename BitSetTraits>
+bool BitSetOps</*BitSetType*/ BitSetShortLongRep,
+               /*Brand*/ BSShortLong,
+               /*Env*/ Env,
+               /*BitSetTraits*/ BitSetTraits>::IsSubsetLong(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2)
+{
+    assert(!IsShort(env));
+    unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+    for (unsigned i = 0; i < len; i++)
+    {
+        if ((bs1[i] & bs2[i]) != bs1[i])
+        {
+            return false;
+        }
+    }
+    return true;
+}
+
+#ifdef DEBUG
+template <typename Env, typename BitSetTraits>
+const char* BitSetOps</*BitSetType*/ BitSetShortLongRep,
+                      /*Brand*/ BSShortLong,
+                      /*Env*/ Env,
+                      /*BitSetTraits*/ BitSetTraits>::ToStringLong(Env env, BitSetShortLongRep bs)
+{
+    assert(!IsShort(env));
+    unsigned    len           = BitSetTraits::GetArrSize(env, sizeof(size_t));
+    const int   CharsForSizeT = sizeof(size_t) * 2;
+    unsigned    allocSz       = len * CharsForSizeT + 4;
+    unsigned    remaining     = allocSz;
+    IAllocator* alloc         = BitSetTraits::GetDebugOnlyAllocator(env);
+    char*       res           = (char*)alloc->Alloc(allocSz);
+    char*       temp          = res;
+    for (unsigned i = len; 0 < i; i--)
+    {
+        size_t bits = bs[i - 1];
+        for (unsigned bytesDone = 0; bytesDone < sizeof(size_t); bytesDone += sizeof(unsigned))
+        {
+            unsigned bits0 = (unsigned)bits;
+            sprintf_s(temp, remaining, "%08X", bits0);
+            temp += 8;
+            remaining -= 8;
+            bytesDone += 4;
+            assert(sizeof(unsigned) == 4);
+            // Doing this twice by 16, rather than once by 32, avoids warnings when size_t == unsigned.
+            bits = bits >> 16;
+            bits = bits >> 16;
+        }
+    }
+    return res;
+}
+#endif
+
+#endif // bitSetAsShortLong_DEFINED
diff --git a/src/jit/bitsetasuint64.h b/src/jit/bitsetasuint64.h
new file mode 100644
index 0000000000..150f7e9d61
--- /dev/null
+++ b/src/jit/bitsetasuint64.h
@@ -0,0 +1,236 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef bitSetAsUint64_DEFINED
+#define bitSetAsUint64_DEFINED 1
+
+#include "bitset.h"
+
+template <typename Env, typename BitSetTraits>
+class BitSetOps</*BitSetType*/ UINT64,
+                /*Brand*/ BSUInt64,
+                /*Env*/ Env,
+                /*BitSetTraits*/ BitSetTraits>
+{
+public:
+    typedef UINT64 Rep;
+
+private:
+    static UINT64 Singleton(unsigned bitNum)
+    {
+        assert(bitNum < sizeof(UINT64) * BitSetSupport::BitsInByte);
+        return (UINT64)1 << bitNum;
+    }
+
+public:
+    static void Assign(Env env, UINT64& lhs, UINT64 rhs)
+    {
+        lhs = rhs;
+    }
+
+    static void AssignNouninit(Env env, UINT64& lhs, UINT64 rhs)
+    {
+        lhs = rhs;
+    }
+
+    static void AssignAllowUninitRhs(Env env, UINT64& lhs, UINT64 rhs)
+    {
+        lhs = rhs;
+    }
+
+    static void AssignNoCopy(Env env, UINT64& lhs, UINT64 rhs)
+    {
+        lhs = rhs;
+    }
+
+    static void ClearD(Env env, UINT64& bs)
+    {
+        bs = 0;
+    }
+
+    static UINT64 MakeSingleton(Env env, unsigned bitNum)
+    {
+        assert(bitNum < BitSetTraits::GetSize(env));
+        return Singleton(bitNum);
+    }
+
+    static UINT64 MakeCopy(Env env, UINT64 bs)
+    {
+        return bs;
+    }
+
+    static bool IsEmpty(Env env, UINT64 bs)
+    {
+        return bs == 0;
+    }
+
+    static unsigned Count(Env env, UINT64 bs)
+    {
+        return BitSetSupport::CountBitsInIntegral(bs);
+    }
+
+    static void UnionD(Env env, UINT64& bs1, UINT64 bs2)
+    {
+        bs1 |= bs2;
+    }
+
+    static UINT64 Union(Env env, UINT64& bs1, UINT64 bs2)
+    {
+        return bs1 | bs2;
+    }
+
+    static void DiffD(Env env, UINT64& bs1, UINT64 bs2)
+    {
+        bs1 = bs1 & ~bs2;
+    }
+
+    static UINT64 Diff(Env env, UINT64 bs1, UINT64 bs2)
+    {
+        return bs1 & ~bs2;
+    }
+
+    static void RemoveElemD(Env env, UINT64& bs1, unsigned i)
+    {
+        assert(i < BitSetTraits::GetSize(env));
+        bs1 &= ~Singleton(i);
+    }
+
+    static UINT64 RemoveElem(Env env, UINT64 bs1, unsigned i)
+    {
+        return bs1 & ~Singleton(i);
+    }
+
+    static void AddElemD(Env env, UINT64& bs1, unsigned i)
+    {
+        assert(i < BitSetTraits::GetSize(env));
+        bs1 |= Singleton(i);
+    }
+
+    static UINT64 AddElem(Env env, UINT64 bs1, unsigned i)
+    {
+        assert(i < BitSetTraits::GetSize(env));
+        return bs1 | Singleton(i);
+    }
+
+    static bool IsMember(Env env, const UINT64 bs1, unsigned i)
+    {
+        assert(i < BitSetTraits::GetSize(env));
+        return (bs1 & Singleton(i)) != 0;
+    }
+
+    static void IntersectionD(Env env, UINT64& bs1, UINT64 bs2)
+    {
+        bs1 &= bs2;
+    }
+
+    static UINT64 Intersection(Env env, UINT64 bs1, UINT64 bs2)
+    {
+        return bs1 & bs2;
+    }
+
+    static bool IsEmptyIntersection(Env env, UINT64 bs1, UINT64 bs2)
+    {
+        return (bs1 & bs2) == 0;
+    }
+
+    static bool IsSubset(Env env, UINT64 bs1, UINT64 bs2)
+    {
+        return ((bs1 & bs2) == bs1);
+    }
+
+    static bool Equal(Env env, UINT64 bs1, UINT64 bs2)
+    {
+        return bs1 == bs2;
+    }
+
+    static UINT64 MakeEmpty(Env env)
+    {
+        return 0;
+    }
+
+    static UINT64 MakeFull(Env env)
+    {
+        unsigned sz = BitSetTraits::GetSize(env);
+        if (sz == sizeof(UINT64) * 8)
+        {
+            return UINT64(-1);
+        }
+        else
+        {
+            return (UINT64(1) << sz) - 1;
+        }
+    }
+
+#ifdef DEBUG
+    static const char* ToString(Env env, UINT64 bs)
+    {
+        IAllocator* alloc          = BitSetTraits::GetDebugOnlyAllocator(env);
+        const int   CharsForUINT64 = sizeof(UINT64) * 2;
+        char*       res            = NULL;
+        const int   AllocSize      = CharsForUINT64 + 4;
+        res                        = (char*)alloc->Alloc(AllocSize);
+        UINT64   bits              = bs;
+        unsigned remaining         = AllocSize;
+        char*    ptr               = res;
+        for (unsigned bytesDone = 0; bytesDone < sizeof(UINT64); bytesDone += sizeof(unsigned))
+        {
+            unsigned bits0 = (unsigned)bits;
+            sprintf_s(ptr, remaining, "%08X", bits0);
+            ptr += 8;
+            remaining -= 8;
+            bytesDone += 4;
+            assert(sizeof(unsigned) == 4);
+            // Doing this twice by 16, rather than once by 32, avoids warnings when size_t == unsigned.
+            bits = bits >> 16;
+            bits = bits >> 16;
+        }
+        return res;
+    }
+#endif
+
+    static UINT64 UninitVal()
+    {
+        return 0;
+    }
+
+    static bool MayBeUninit(UINT64 bs)
+    {
+        return bs == UninitVal();
+    }
+
+    class Iter
+    {
+        UINT64 m_bits;
+
+    public:
+        Iter(Env env, const UINT64& bits) : m_bits(bits)
+        {
+        }
+
+        bool NextElem(Env env, unsigned* pElem)
+        {
+            if (m_bits)
+            {
+                unsigned bitNum = *pElem;
+                while ((m_bits & 0x1) == 0)
+                {
+                    bitNum++;
+                    m_bits >>= 1;
+                }
+                *pElem = bitNum;
+                m_bits &= ~0x1;
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+    };
+
+    typedef UINT64 ValArgType;
+    typedef UINT64 RetValType;
+};
+
+#endif // bitSetAsUint64_DEFINED
diff --git a/src/jit/bitsetasuint64inclass.h b/src/jit/bitsetasuint64inclass.h
new file mode 100644
index 0000000000..be92624613
--- /dev/null
+++ b/src/jit/bitsetasuint64inclass.h
@@ -0,0 +1,500 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef bitSetAsUint64InClass_DEFINED
+#define bitSetAsUint64InClass_DEFINED 1
+
+#include "bitset.h"
+#include "bitsetasuint64.h"
+#include "stdmacros.h"
+
+template <typename Env, typename BitSetTraits>
+class BitSetUint64ValueRetType;
+
+template <typename Env, typename BitSetTraits>
+class BitSetUint64Iter;
+
+template <typename Env, typename BitSetTraits>
+class BitSetUint64
+{
+public:
+    typedef BitSetUint64<Env, BitSetTraits> Rep;
+
+private:
+    friend class BitSetOps</*BitSetType*/ BitSetUint64<Env, BitSetTraits>,
+                           /*Brand*/ BSUInt64Class,
+                           /*Env*/ Env,
+                           /*BitSetTraits*/ BitSetTraits>;
+
+    friend class BitSetUint64ValueRetType<Env, BitSetTraits>;
+    friend class BitSetUint64Iter<Env, BitSetTraits>;
+
+    UINT64 m_bits;
+
+#ifdef DEBUG
+    unsigned m_epoch;
+#endif
+
+    typedef BitSetOps<UINT64, BSUInt64, Env, BitSetTraits> Uint64BitSetOps;
+
+    void CheckEpoch(Env env) const
+    {
+#ifdef DEBUG
+        assert(m_epoch == BitSetTraits::GetEpoch(env));
+#endif
+    }
+
+#ifdef DEBUG
+    // In debug, make sure we don't have any public assignment, by making this private.
+    BitSetUint64& operator=(const BitSetUint64& bs)
+    {
+        m_bits  = bs.m_bits;
+        m_epoch = bs.m_epoch;
+        return (*this);
+    }
+#endif // DEBUG
+
+    bool operator==(const BitSetUint64& bs) const
+    {
+        return m_bits == bs.m_bits
+#ifdef DEBUG
+               && m_epoch == bs.m_epoch
+#endif
+            ;
+    }
+
+#ifndef DEBUG
+    // In debug we also want the default copy constructor to be private, to make inadvertent
+    // default initializations illegal.  Debug builds therefore arrange to use the
+    // non-default constructor defined below that takes an extra argument where one would
+    // otherwise use a copy constructor.  In non-debug builds, we don't pass the extra dummy
+    // int argument, and just make copy constructor defined here visible.
+public:
+#endif
+    BitSetUint64(const BitSetUint64& bs)
+        : m_bits(bs.m_bits)
+#ifdef DEBUG
+        , m_epoch(bs.m_epoch)
+#endif
+    {
+    }
+
+#ifdef DEBUG
+public:
+    // But we add a public constructor that's *almost* the default constructor.
+    BitSetUint64(const BitSetUint64& bs, int xxx) : m_bits(bs.m_bits), m_epoch(bs.m_epoch)
+    {
+    }
+#endif
+
+private:
+    // Return the number of bits set in the BitSet.
+    inline unsigned Count(Env env) const
+    {
+        CheckEpoch(env);
+        return Uint64BitSetOps::Count(env, m_bits);
+    }
+
+    inline void DiffD(Env env, const BitSetUint64& bs2)
+    {
+        CheckEpoch(env);
+        bs2.CheckEpoch(env);
+        Uint64BitSetOps::DiffD(env, m_bits, bs2.m_bits);
+    }
+
+    inline BitSetUint64 Diff(Env env, const BitSetUint64& bs2) const
+    {
+        CheckEpoch(env);
+        bs2.CheckEpoch(env);
+        BitSetUint64 res(*this);
+        Uint64BitSetOps::DiffD(env, res.m_bits, bs2.m_bits);
+        return res;
+    }
+
+    inline void RemoveElemD(Env env, unsigned i)
+    {
+        CheckEpoch(env);
+        Uint64BitSetOps::RemoveElemD(env, m_bits, i);
+    }
+
+    inline BitSetUint64 RemoveElem(Env env, unsigned i) const
+    {
+        CheckEpoch(env);
+        BitSetUint64 res(*this);
+        Uint64BitSetOps::RemoveElemD(env, res.m_bits, i);
+        return res;
+    }
+
+    inline void AddElemD(Env env, unsigned i)
+    {
+        CheckEpoch(env);
+        Uint64BitSetOps::AddElemD(env, m_bits, i);
+    }
+
+    inline BitSetUint64 AddElem(Env env, unsigned i) const
+    {
+        CheckEpoch(env);
+        BitSetUint64 res(*this);
+        Uint64BitSetOps::AddElemD(env, res.m_bits, i);
+        return res;
+    }
+
+    inline bool IsMember(Env env, unsigned i) const
+    {
+        CheckEpoch(env);
+        return Uint64BitSetOps::IsMember(env, m_bits, i);
+    }
+
+    inline void IntersectionD(Env env, const BitSetUint64& bs2)
+    {
+        CheckEpoch(env);
+        bs2.CheckEpoch(env);
+        m_bits = m_bits & bs2.m_bits;
+    }
+
+    inline BitSetUint64 Intersection(Env env, const BitSetUint64& bs2) const
+    {
+        CheckEpoch(env);
+        bs2.CheckEpoch(env);
+        BitSetUint64 res(*this);
+        Uint64BitSetOps::IntersectionD(env, res.m_bits, bs2.m_bits);
+        return res;
+    }
+
+    inline void UnionD(Env env, const BitSetUint64& bs2)
+    {
+        CheckEpoch(env);
+        bs2.CheckEpoch(env);
+        Uint64BitSetOps::UnionD(env, m_bits, bs2.m_bits);
+    }
+
+    inline BitSetUint64 Union(Env env, const BitSetUint64& bs2) const
+    {
+        CheckEpoch(env);
+        bs2.CheckEpoch(env);
+        BitSetUint64 res(*this);
+        Uint64BitSetOps::UnionD(env, res.m_bits, bs2.m_bits);
+        return res;
+    }
+
+    inline void ClearD(Env env)
+    {
+        // Recall that ClearD does *not* require "*this" to be of the current epoch.
+        Uint64BitSetOps::ClearD(env, m_bits);
+#ifdef DEBUG
+        // But it updates it to of the current epoch.
+        m_epoch = BitSetTraits::GetEpoch(env);
+#endif
+    }
+
+    inline bool IsEmpty(Env env) const
+    {
+        CheckEpoch(env);
+        return Uint64BitSetOps::IsEmpty(env, m_bits);
+    }
+
+    inline bool IsSubset(Env env, const BitSetUint64& bs2) const
+    {
+        CheckEpoch(env);
+        bs2.CheckEpoch(env);
+        return Uint64BitSetOps::IsSubset(env, m_bits, bs2.m_bits);
+    }
+
+    inline bool IsEmptyIntersection(Env env, const BitSetUint64& bs2) const
+    {
+        CheckEpoch(env);
+        bs2.CheckEpoch(env);
+        return Uint64BitSetOps::IsEmptyIntersection(env, m_bits, bs2.m_bits);
+    }
+
+    inline bool Equal(Env env, const BitSetUint64& bs2) const
+    {
+        CheckEpoch(env);
+        bs2.CheckEpoch(env);
+        return Uint64BitSetOps::Equal(env, m_bits, bs2.m_bits);
+    }
+
+    const char* ToString(Env env) const
+    {
+        return Uint64BitSetOps::ToString(env, m_bits);
+    }
+
+public:
+    // Uninint
+    BitSetUint64()
+        : m_bits(0)
+#ifdef DEBUG
+        , m_epoch(UINT32_MAX) // Undefined.
+#endif
+    {
+    }
+
+    BitSetUint64(Env env, bool full = false)
+        : m_bits(0)
+#ifdef DEBUG
+        , m_epoch(BitSetTraits::GetEpoch(env))
+#endif
+    {
+        if (full)
+        {
+            m_bits = Uint64BitSetOps::MakeFull(env);
+        }
+    }
+
+    inline BitSetUint64(const BitSetUint64ValueRetType<Env, BitSetTraits>& rt);
+
+    BitSetUint64(Env env, unsigned bitNum)
+        : m_bits(Uint64BitSetOps::MakeSingleton(env, bitNum))
+#ifdef DEBUG
+        , m_epoch(BitSetTraits::GetEpoch(env))
+#endif
+    {
+        assert(bitNum < BitSetTraits::GetSize(env));
+    }
+};
+
+template <typename Env, typename BitSetTraits>
+class BitSetUint64ValueRetType
+{
+    friend class BitSetUint64<Env, BitSetTraits>;
+
+    BitSetUint64<Env, BitSetTraits> m_bs;
+
+public:
+    BitSetUint64ValueRetType(const BitSetUint64<Env, BitSetTraits>& bs) : m_bs(bs)
+    {
+    }
+};
+
+template <typename Env, typename BitSetTraits>
+BitSetUint64<Env, BitSetTraits>::BitSetUint64(const BitSetUint64ValueRetType<Env, BitSetTraits>& rt)
+    : m_bits(rt.m_bs.m_bits)
+#ifdef DEBUG
+    , m_epoch(rt.m_bs.m_epoch)
+#endif
+{
+}
+
+// You *can* clear a bit after it's been iterated.  But you shouldn't otherwise mutate the
+// bitset during bit iteration.
+template <typename Env, typename BitSetTraits>
+class BitSetUint64Iter
+{
+    UINT64   m_bits;
+    unsigned m_bitNum;
+
+public:
+    BitSetUint64Iter(Env env, const BitSetUint64<Env, BitSetTraits>& bs) : m_bits(bs.m_bits), m_bitNum(0)
+    {
+    }
+
+    bool NextElem(Env env, unsigned* pElem)
+    {
+        static const unsigned UINT64_SIZE = 64;
+
+        if ((m_bits & 0x1) != 0)
+        {
+            *pElem = m_bitNum;
+            m_bitNum++;
+            m_bits >>= 1;
+            return true;
+        }
+        else
+        {
+            // Skip groups of 4 zeros -- an optimization for sparse bitsets.
+            while (m_bitNum < UINT64_SIZE && (m_bits & 0xf) == 0)
+            {
+                m_bitNum += 4;
+                m_bits >>= 4;
+            }
+            while (m_bitNum < UINT64_SIZE && (m_bits & 0x1) == 0)
+            {
+                m_bitNum += 1;
+                m_bits >>= 1;
+            }
+            if (m_bitNum < UINT64_SIZE)
+            {
+                *pElem = m_bitNum;
+                m_bitNum++;
+                m_bits >>= 1;
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+    }
+};
+
+template <typename Env, typename BitSetTraits>
+class BitSetOps</*BitSetType*/ BitSetUint64<Env, BitSetTraits>,
+                /*Brand*/ BSUInt64Class,
+                /*Env*/ Env,
+                /*BitSetTraits*/ BitSetTraits>
+{
+    typedef BitSetUint64<Env, BitSetTraits>             BST;
+    typedef const BitSetUint64<Env, BitSetTraits>&      BSTValArg;
+    typedef BitSetUint64ValueRetType<Env, BitSetTraits> BSTRetVal;
+
+public:
+    static BSTRetVal UninitVal()
+    {
+        return BitSetUint64<Env, BitSetTraits>();
+    }
+
+    static bool MayBeUninit(BSTValArg bs)
+    {
+        return bs == UninitVal();
+    }
+
+    static void Assign(Env env, BST& lhs, BSTValArg rhs)
+    {
+        lhs = rhs;
+    }
+
+    static void AssignNouninit(Env env, BST& lhs, BSTValArg rhs)
+    {
+        lhs = rhs;
+    }
+
+    static void AssignAllowUninitRhs(Env env, BST& lhs, BSTValArg rhs)
+    {
+        lhs = rhs;
+    }
+
+    static void AssignNoCopy(Env env, BST& lhs, BSTValArg rhs)
+    {
+        lhs = rhs;
+    }
+
+    static void ClearD(Env env, BST& bs)
+    {
+        bs.ClearD(env);
+    }
+
+    static BSTRetVal MakeSingleton(Env env, unsigned bitNum)
+    {
+        assert(bitNum < BitSetTraits::GetSize(env));
+        return BST(env, bitNum);
+    }
+
+    static BSTRetVal MakeCopy(Env env, BSTValArg bs)
+    {
+        return bs;
+    }
+
+    static bool IsEmpty(Env env, BSTValArg bs)
+    {
+        return bs.IsEmpty(env);
+    }
+
+    static unsigned Count(Env env, BSTValArg bs)
+    {
+        return bs.Count(env);
+    }
+
+    static void UnionD(Env env, BST& bs1, BSTValArg bs2)
+    {
+        bs1.UnionD(env, bs2);
+    }
+
+    static BSTRetVal Union(Env env, BSTValArg bs1, BSTValArg bs2)
+    {
+        return bs1.Union(env, bs2);
+    }
+
+    static void DiffD(Env env, BST& bs1, BSTValArg bs2)
+    {
+        bs1.DiffD(env, bs2);
+    }
+
+    static BSTRetVal Diff(Env env, BSTValArg bs1, BSTValArg bs2)
+    {
+        return bs1.Diff(env, bs2);
+    }
+
+    static void RemoveElemD(Env env, BST& bs1, unsigned i)
+    {
+        assert(i < BitSetTraits::GetSize(env));
+        bs1.RemoveElemD(env, i);
+    }
+
+    static BSTRetVal RemoveElem(Env env, BSTValArg bs1, unsigned i)
+    {
+        assert(i < BitSetTraits::GetSize(env));
+        return bs1.RemoveElem(env, i);
+    }
+
+    static void AddElemD(Env env, BST& bs1, unsigned i)
+    {
+        assert(i < BitSetTraits::GetSize(env));
+        bs1.AddElemD(env, i);
+    }
+
+    static BSTRetVal AddElem(Env env, BSTValArg bs1, unsigned i)
+    {
+        assert(i < BitSetTraits::GetSize(env));
+        return bs1.AddElem(env, i);
+    }
+
+    static bool IsMember(Env env, BSTValArg bs1, unsigned i)
+    {
+        assert(i < BitSetTraits::GetSize(env));
+        return bs1.IsMember(env, i);
+    }
+
+    static void IntersectionD(Env env, BST& bs1, BSTValArg bs2)
+    {
+        bs1.IntersectionD(env, bs2);
+    }
+
+    static BSTRetVal Intersection(Env env, BSTValArg bs1, BSTValArg bs2)
+    {
+        return bs1.Intersection(env, bs2);
+    }
+
+    static bool IsEmptyIntersection(Env env, BSTValArg bs1, BSTValArg bs2)
+    {
+        return bs1.IsEmptyIntersection(env, bs2);
+    }
+
+    static bool IsSubset(Env env, BSTValArg bs1, BSTValArg bs2)
+    {
+        return bs1.IsSubset(env, bs2);
+    }
+
+    static bool Equal(Env env, BSTValArg bs1, BSTValArg bs2)
+    {
+        return bs1.Equal(env, bs2);
+    }
+
+    static bool NotEqual(Env env, BSTValArg bs1, BSTValArg bs2)
+    {
+        return !bs1.Equal(env, bs2);
+    }
+
+    static BSTRetVal MakeEmpty(Env env)
+    {
+        return BST(env);
+    }
+
+    static BSTRetVal MakeFull(Env env)
+    {
+        return BST(env, /*full*/ true);
+    }
+
+#ifdef DEBUG
+    static const char* ToString(Env env, BSTValArg bs)
+    {
+        return bs.ToString(env);
+    }
+#endif
+
+    typedef BitSetUint64Iter<Env, BitSetTraits> Iter;
+
+    typedef const BitSetUint64<Env, BitSetTraits>&      ValArgType;
+    typedef BitSetUint64ValueRetType<Env, BitSetTraits> RetValType;
+};
+#endif // bitSetAsUint64InClass_DEFINED
diff --git a/src/jit/bitsetops.h b/src/jit/bitsetops.h
new file mode 100644
index 0000000000..edf39eaf56
--- /dev/null
+++ b/src/jit/bitsetops.h
@@ -0,0 +1,34 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+BSOPNAME(BSOP_Assign)
+BSOPNAME(BSOP_AssignAllowUninitRhs)
+BSOPNAME(BSOP_AssignNocopy)
+BSOPNAME(BSOP_ClearD)
+BSOPNAME(BSOP_MakeSingleton)
+BSOPNAME(BSOP_MakeEmpty)
+BSOPNAME(BSOP_MakeFull)
+BSOPNAME(BSOP_MakeCopy)
+BSOPNAME(BSOP_IsEmpty)
+BSOPNAME(BSOP_Count)
+BSOPNAME(BSOP_RemoveElemD)
+BSOPNAME(BSOP_RemoveElem)
+BSOPNAME(BSOP_AddElemD)
+BSOPNAME(BSOP_AddElem)
+BSOPNAME(BSOP_UnionD)
+BSOPNAME(BSOP_Union)
+BSOPNAME(BSOP_IntersectionD)
+BSOPNAME(BSOP_Intersection)
+BSOPNAME(BSOP_IsEmptyIntersection)
+BSOPNAME(BSOP_DiffD)
+BSOPNAME(BSOP_Diff)
+BSOPNAME(BSOP_IsMember)
+BSOPNAME(BSOP_IsNotMember)
+BSOPNAME(BSOP_NoBitsAbove)
+BSOPNAME(BSOP_LeftShiftSingletonByOneD)
+BSOPNAME(BSOP_IsSubset)
+BSOPNAME(BSOP_Equal)
+BSOPNAME(BSOP_NotEqual)
+BSOPNAME(BSOP_NextBit)
+BSOPNAME(BSOP_ToString)
diff --git a/src/jit/bitvec.h b/src/jit/bitvec.h
new file mode 100644
index 0000000000..4db211ba0a
--- /dev/null
+++ b/src/jit/bitvec.h
@@ -0,0 +1,56 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// This include file determines how BitVec is implemented.
+//
+#ifndef _BITVEC_INCLUDED_
+#define _BITVEC_INCLUDED_ 1
+
+// This class simplifies creation and usage of "ShortLong" bitsets.
+//
+// Create new bitsets like so:
+//
+//   BitVecTraits traits(size, pCompiler);
+//   BitVec bitvec = BitVecOps::MakeEmpty(&traits);
+//
+// and call functions like so:
+//
+//   BitVecOps::AddElemD(&traits, bitvec, 10);
+//   BitVecOps::IsMember(&traits, bitvec, 10));
+//
+
+#include "bitset.h"
+#include "compilerbitsettraits.h"
+#include "bitsetasshortlong.h"
+
+typedef BitSetOps</*BitSetType*/ BitSetShortLongRep,
+                  /*Brand*/ BSShortLong,
+                  /*Env*/ BitVecTraits*,
+                  /*BitSetTraits*/ BitVecTraits>
+    BitVecOps;
+
+typedef BitSetShortLongRep BitVec;
+
+// These types should be used as the types for BitVec arguments and return values, respectively.
+typedef BitVecOps::ValArgType BitVec_ValArg_T;
+typedef BitVecOps::RetValType BitVec_ValRet_T;
+
+// Initialize "_varName" to "_initVal."  Copies contents, not references; if "_varName" is uninitialized, allocates a
+// set for it (using "_traits" for any necessary allocation), and copies the contents of "_initVal" into it.
+#define BITVEC_INIT(_traits, _varName, _initVal) _varName(BitVecOps::MakeCopy(_traits, _initVal))
+
+// Initializes "_varName" to "_initVal", without copying: if "_initVal" is an indirect representation, copies its
+// pointer into "_varName".
+#define BITVEC_INIT_NOCOPY(_varName, _initVal) _varName(_initVal)
+
+// The iterator pattern.
+
+// Use this to initialize an iterator "_iterName" to iterate over a BitVec "_bitVec".
+// "_bitNum" will be an unsigned variable to which we assign the elements of "_bitVec".
+#define BITVEC_ITER_INIT(_traits, _iterName, _bitVec, _bitNum)                                                         \
+    unsigned        _bitNum = 0;                                                                                       \
+    BitVecOps::Iter _iterName(_traits, _bitVec)
+
+#endif // _BITVEC_INCLUDED_
diff --git a/src/jit/block.cpp b/src/jit/block.cpp
new file mode 100644
index 0000000000..2d37754ec5
--- /dev/null
+++ b/src/jit/block.cpp
@@ -0,0 +1,771 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                          BasicBlock                                       XX
+XX                                                                           XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifdef DEBUG
+flowList* ShuffleHelper(unsigned hash, flowList* res)
+{
+    flowList* head = res;
+    for (flowList *prev = nullptr; res != nullptr; prev = res, res = res->flNext)
+    {
+        unsigned blkHash = (hash ^ (res->flBlock->bbNum << 16) ^ res->flBlock->bbNum);
+        if (((blkHash % 1879) & 1) && prev != nullptr)
+        {
+            // Swap res with head.
+            prev->flNext = head;
+            jitstd::swap(head->flNext, res->flNext);
+            jitstd::swap(head, res);
+        }
+    }
+    return head;
+}
+
+unsigned SsaStressHashHelper()
+{
+    // hash = 0: turned off, hash = 1: use method hash, hash = *: use custom hash.
+    unsigned hash = JitConfig.JitSsaStress();
+
+    if (hash == 0)
+    {
+        return hash;
+    }
+    if (hash == 1)
+    {
+        return JitTls::GetCompiler()->info.compMethodHash();
+    }
+    return ((hash >> 16) == 0) ? ((hash << 16) | hash) : hash;
+}
+#endif
+
+EHSuccessorIter::EHSuccessorIter(Compiler* comp, BasicBlock* block)
+    : m_comp(comp)
+    , m_block(block)
+    , m_curRegSucc(nullptr)
+    , m_curTry(comp->ehGetBlockExnFlowDsc(block))
+    , m_remainingRegSuccs(block->NumSucc(comp))
+{
+    // If "block" is a "leave helper" block (the empty BBJ_ALWAYS block that pairs with a
+    // preceding BBJ_CALLFINALLY block to implement a "leave" IL instruction), then no exceptions
+    // can occur within it, so clear m_curTry if it's non-null.
+    if (m_curTry != nullptr)
+    {
+        BasicBlock* beforeBlock = block->bbPrev;
+        if (beforeBlock != nullptr && beforeBlock->isBBCallAlwaysPair())
+        {
+            m_curTry = nullptr;
+        }
+    }
+
+    if (m_curTry == nullptr && m_remainingRegSuccs > 0)
+    {
+        // Examine the successors to see if any are the start of try blocks.
+        FindNextRegSuccTry();
+    }
+}
+
+void EHSuccessorIter::FindNextRegSuccTry()
+{
+    assert(m_curTry == nullptr);
+
+    // Must now consider the next regular successor, if any.
+    while (m_remainingRegSuccs > 0)
+    {
+        m_remainingRegSuccs--;
+        m_curRegSucc = m_block->GetSucc(m_remainingRegSuccs, m_comp);
+        if (m_comp->bbIsTryBeg(m_curRegSucc))
+        {
+            assert(m_curRegSucc->hasTryIndex()); // Since it is a try begin.
+            unsigned newTryIndex = m_curRegSucc->getTryIndex();
+
+            // If the try region started by "m_curRegSucc" (represented by newTryIndex) contains m_block,
+            // we've already yielded its handler, as one of the EH handler successors of m_block itself.
+            if (m_comp->bbInExnFlowRegions(newTryIndex, m_block))
+            {
+                continue;
+            }
+
+            // Otherwise, consider this try.
+            m_curTry = m_comp->ehGetDsc(newTryIndex);
+            break;
+        }
+    }
+}
+
+void EHSuccessorIter::operator++(void)
+{
+    assert(m_curTry != nullptr);
+    if (m_curTry->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX)
+    {
+        m_curTry = m_comp->ehGetDsc(m_curTry->ebdEnclosingTryIndex);
+
+        // If we've gone over into considering try's containing successors,
+        // then the enclosing try must have the successor as its first block.
+        if (m_curRegSucc == nullptr || m_curTry->ebdTryBeg == m_curRegSucc)
+        {
+            return;
+        }
+
+        // Otherwise, give up, try the next regular successor.
+        m_curTry = nullptr;
+    }
+    else
+    {
+        m_curTry = nullptr;
+    }
+
+    // We've exhausted all try blocks.
+    // See if there are any remaining regular successors that start try blocks.
+    FindNextRegSuccTry();
+}
+
+BasicBlock* EHSuccessorIter::operator*()
+{
+    assert(m_curTry != nullptr);
+    return m_curTry->ExFlowBlock();
+}
+
+flowList* Compiler::BlockPredsWithEH(BasicBlock* blk)
+{
+    BlockToFlowListMap* ehPreds = GetBlockToEHPreds();
+    flowList*           res;
+    if (ehPreds->Lookup(blk, &res))
+    {
+        return res;
+    }
+
+    res = blk->bbPreds;
+    unsigned tryIndex;
+    if (bbIsExFlowBlock(blk, &tryIndex))
+    {
+        // Find the first block of the try.
+        EHblkDsc*   ehblk    = ehGetDsc(tryIndex);
+        BasicBlock* tryStart = ehblk->ebdTryBeg;
+        for (flowList* tryStartPreds = tryStart->bbPreds; tryStartPreds != nullptr;
+             tryStartPreds           = tryStartPreds->flNext)
+        {
+            res = new (this, CMK_FlowList) flowList(tryStartPreds->flBlock, res);
+
+#if MEASURE_BLOCK_SIZE
+            genFlowNodeCnt += 1;
+            genFlowNodeSize += sizeof(flowList);
+#endif // MEASURE_BLOCK_SIZE
+        }
+
+        // Now add all blocks handled by this handler (except for second blocks of BBJ_CALLFINALLY/BBJ_ALWAYS pairs;
+        // these cannot cause transfer to the handler...)
+        BasicBlock* prevBB = nullptr;
+
+        // TODO-Throughput: It would be nice if we could iterate just over the blocks in the try, via
+        // something like:
+        //   for (BasicBlock* bb = ehblk->ebdTryBeg; bb != ehblk->ebdTryLast->bbNext; bb = bb->bbNext)
+        //     (plus adding in any filter blocks outside the try whose exceptions are handled here).
+        // That doesn't work, however: funclets have caused us to sometimes split the body of a try into
+        // more than one sequence of contiguous blocks.  We need to find a better way to do this.
+        for (BasicBlock *bb = fgFirstBB; bb != nullptr; prevBB = bb, bb = bb->bbNext)
+        {
+            if (bbInExnFlowRegions(tryIndex, bb) && (prevBB == nullptr || !prevBB->isBBCallAlwaysPair()))
+            {
+                res = new (this, CMK_FlowList) flowList(bb, res);
+
+#if MEASURE_BLOCK_SIZE
+                genFlowNodeCnt += 1;
+                genFlowNodeSize += sizeof(flowList);
+#endif // MEASURE_BLOCK_SIZE
+            }
+        }
+
+#ifdef DEBUG
+        unsigned hash = SsaStressHashHelper();
+        if (hash != 0)
+        {
+            res = ShuffleHelper(hash, res);
+        }
+#endif // DEBUG
+
+        ehPreds->Set(blk, res);
+    }
+    return res;
+}
+
+#ifdef DEBUG
+
+//------------------------------------------------------------------------
+// dspBlockILRange(): Display the block's IL range as [XXX...YYY), where XXX and YYY might be "???" for BAD_IL_OFFSET.
+//
+void BasicBlock::dspBlockILRange()
+{
+    if (bbCodeOffs != BAD_IL_OFFSET)
+    {
+        printf("[%03X..", bbCodeOffs);
+    }
+    else
+    {
+        printf("[???"
+               "..");
+    }
+
+    if (bbCodeOffsEnd != BAD_IL_OFFSET)
+    {
+        // brace-matching editor workaround for following line: (
+        printf("%03X)", bbCodeOffsEnd);
+    }
+    else
+    {
+        // brace-matching editor workaround for following line: (
+        printf("???"
+               ")");
+    }
+}
+
+//------------------------------------------------------------------------
+// dspFlags: Print out the block's flags
+//
+void BasicBlock::dspFlags()
+{
+    if (bbFlags & BBF_VISITED)
+    {
+        printf("v ");
+    }
+    if (bbFlags & BBF_MARKED)
+    {
+        printf("m ");
+    }
+    if (bbFlags & BBF_CHANGED)
+    {
+        printf("! ");
+    }
+    if (bbFlags & BBF_REMOVED)
+    {
+        printf("del ");
+    }
+    if (bbFlags & BBF_DONT_REMOVE)
+    {
+        printf("keep ");
+    }
+    if (bbFlags & BBF_IMPORTED)
+    {
+        printf("i ");
+    }
+    if (bbFlags & BBF_INTERNAL)
+    {
+        printf("internal ");
+    }
+    if (bbFlags & BBF_FAILED_VERIFICATION)
+    {
+        printf("failV ");
+    }
+    if (bbFlags & BBF_TRY_BEG)
+    {
+        printf("try ");
+    }
+    if (bbFlags & BBF_NEEDS_GCPOLL)
+    {
+        printf("poll ");
+    }
+    if (bbFlags & BBF_RUN_RARELY)
+    {
+        printf("rare ");
+    }
+    if (bbFlags & BBF_LOOP_HEAD)
+    {
+        printf("Loop ");
+    }
+    if (bbFlags & BBF_LOOP_CALL0)
+    {
+        printf("Loop0 ");
+    }
+    if (bbFlags & BBF_LOOP_CALL1)
+    {
+        printf("Loop1 ");
+    }
+    if (bbFlags & BBF_HAS_LABEL)
+    {
+        printf("label ");
+    }
+    if (bbFlags & BBF_JMP_TARGET)
+    {
+        printf("target ");
+    }
+    if (bbFlags & BBF_HAS_JMP)
+    {
+        printf("jmp ");
+    }
+    if (bbFlags & BBF_GC_SAFE_POINT)
+    {
+        printf("gcsafe ");
+    }
+    if (bbFlags & BBF_FUNCLET_BEG)
+    {
+        printf("flet ");
+    }
+    if (bbFlags & BBF_HAS_IDX_LEN)
+    {
+        printf("idxlen ");
+    }
+    if (bbFlags & BBF_HAS_NEWARRAY)
+    {
+        printf("new[] ");
+    }
+    if (bbFlags & BBF_HAS_NEWOBJ)
+    {
+        printf("newobj ");
+    }
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+    if (bbFlags & BBF_FINALLY_TARGET)
+    {
+        printf("ftarget ");
+    }
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+    if (bbFlags & BBF_BACKWARD_JUMP)
+    {
+        printf("bwd ");
+    }
+    if (bbFlags & BBF_RETLESS_CALL)
+    {
+        printf("retless ");
+    }
+    if (bbFlags & BBF_LOOP_PREHEADER)
+    {
+        printf("LoopPH ");
+    }
+    if (bbFlags & BBF_COLD)
+    {
+        printf("cold ");
+    }
+    if (bbFlags & BBF_PROF_WEIGHT)
+    {
+        printf("IBC ");
+    }
+#ifdef LEGACY_BACKEND
+    if (bbFlags & BBF_FORWARD_SWITCH)
+    {
+        printf("fswitch ");
+    }
+#else  // !LEGACY_BACKEND
+    if (bbFlags & BBF_IS_LIR)
+    {
+        printf("LIR ");
+    }
+#endif // LEGACY_BACKEND
+    if (bbFlags & BBF_KEEP_BBJ_ALWAYS)
+    {
+        printf("KEEP ");
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Display the bbPreds basic block list (the block predecessors).
+ *  Returns the number of characters printed.
+ */
+
+unsigned BasicBlock::dspPreds()
+{
+    unsigned count = 0;
+    for (flowList* pred = bbPreds; pred != nullptr; pred = pred->flNext)
+    {
+        if (count != 0)
+        {
+            printf(",");
+            count += 1;
+        }
+        printf("BB%02u", pred->flBlock->bbNum);
+        count += 4;
+
+        // Account for %02u only handling 2 digits, but we can display more than that.
+        unsigned digits = CountDigits(pred->flBlock->bbNum);
+        if (digits > 2)
+        {
+            count += digits - 2;
+        }
+
+        // Does this predecessor have an interesting dup count? If so, display it.
+        if (pred->flDupCount > 1)
+        {
+            printf("(%u)", pred->flDupCount);
+            count += 2 + CountDigits(pred->flDupCount);
+        }
+    }
+    return count;
+}
+
+/*****************************************************************************
+ *
+ *  Display the bbCheapPreds basic block list (the block predecessors).
+ *  Returns the number of characters printed.
+ */
+
+unsigned BasicBlock::dspCheapPreds()
+{
+    unsigned count = 0;
+    for (BasicBlockList* pred = bbCheapPreds; pred != nullptr; pred = pred->next)
+    {
+        if (count != 0)
+        {
+            printf(",");
+            count += 1;
+        }
+        printf("BB%02u", pred->block->bbNum);
+        count += 4;
+
+        // Account for %02u only handling 2 digits, but we can display more than that.
+        unsigned digits = CountDigits(pred->block->bbNum);
+        if (digits > 2)
+        {
+            count += digits - 2;
+        }
+    }
+    return count;
+}
+
+/*****************************************************************************
+ *
+ *  Display the basic block successors.
+ *  Returns the count of successors.
+ */
+
+unsigned BasicBlock::dspSuccs(Compiler* compiler)
+{
+    unsigned numSuccs = NumSucc(compiler);
+    unsigned count    = 0;
+    for (unsigned i = 0; i < numSuccs; i++)
+    {
+        printf("%s", (count == 0) ? "" : ",");
+        printf("BB%02u", GetSucc(i, compiler)->bbNum);
+        count++;
+    }
+    return count;
+}
+
+// Display a compact representation of the bbJumpKind, that is, where this block branches.
+// This is similar to code in Compiler::fgTableDispBasicBlock(), but doesn't have that code's requirements to align
+// things strictly.
+void BasicBlock::dspJumpKind()
+{
+    switch (bbJumpKind)
+    {
+        case BBJ_EHFINALLYRET:
+            printf(" (finret)");
+            break;
+
+        case BBJ_EHFILTERRET:
+            printf(" (fltret)");
+            break;
+
+        case BBJ_EHCATCHRET:
+            printf(" -> BB%02u (cret)", bbJumpDest->bbNum);
+            break;
+
+        case BBJ_THROW:
+            printf(" (throw)");
+            break;
+
+        case BBJ_RETURN:
+            printf(" (return)");
+            break;
+
+        case BBJ_NONE:
+            // For fall-through blocks, print nothing.
+            break;
+
+        case BBJ_ALWAYS:
+            if (bbFlags & BBF_KEEP_BBJ_ALWAYS)
+            {
+                printf(" -> BB%02u (ALWAYS)", bbJumpDest->bbNum);
+            }
+            else
+            {
+                printf(" -> BB%02u (always)", bbJumpDest->bbNum);
+            }
+            break;
+
+        case BBJ_LEAVE:
+            printf(" -> BB%02u (leave)", bbJumpDest->bbNum);
+            break;
+
+        case BBJ_CALLFINALLY:
+            printf(" -> BB%02u (callf)", bbJumpDest->bbNum);
+            break;
+
+        case BBJ_COND:
+            printf(" -> BB%02u (cond)", bbJumpDest->bbNum);
+            break;
+
+        case BBJ_SWITCH:
+            printf(" ->");
+
+            unsigned jumpCnt;
+            jumpCnt = bbJumpSwt->bbsCount;
+            BasicBlock** jumpTab;
+            jumpTab = bbJumpSwt->bbsDstTab;
+            do
+            {
+                printf("%cBB%02u", (jumpTab == bbJumpSwt->bbsDstTab) ? ' ' : ',', (*jumpTab)->bbNum);
+            } while (++jumpTab, --jumpCnt);
+
+            printf(" (switch)");
+            break;
+
+        default:
+            unreached();
+            break;
+    }
+}
+
+void BasicBlock::dspBlockHeader(Compiler* compiler,
+                                bool      showKind /*= true*/,
+                                bool      showFlags /*= false*/,
+                                bool      showPreds /*= true*/)
+{
+    printf("BB%02u ", bbNum);
+    dspBlockILRange();
+    if (showKind)
+    {
+        dspJumpKind();
+    }
+    if (showPreds)
+    {
+        printf(", preds={");
+        if (compiler->fgCheapPredsValid)
+        {
+            dspCheapPreds();
+        }
+        else
+        {
+            dspPreds();
+        }
+        printf("} succs={");
+        dspSuccs(compiler);
+        printf("}");
+    }
+    if (showFlags)
+    {
+        printf(" flags=0x%08x: ", bbFlags);
+        dspFlags();
+    }
+    printf("\n");
+}
+
+#endif // DEBUG
+
+// Allocation function for HeapPhiArg.
+void* BasicBlock::HeapPhiArg::operator new(size_t sz, Compiler* comp)
+{
+    return comp->compGetMem(sz, CMK_HeapPhiArg);
+}
+
+void BasicBlock::CloneBlockState(Compiler* compiler, BasicBlock* to, const BasicBlock* from)
+{
+    assert(to->bbTreeList == nullptr);
+
+    to->bbFlags  = from->bbFlags;
+    to->bbWeight = from->bbWeight;
+    BlockSetOps::AssignAllowUninitRhs(compiler, to->bbReach, from->bbReach);
+    to->copyEHRegion(from);
+    to->bbCatchTyp    = from->bbCatchTyp;
+    to->bbRefs        = from->bbRefs;
+    to->bbStkTempsIn  = from->bbStkTempsIn;
+    to->bbStkTempsOut = from->bbStkTempsOut;
+    to->bbStkDepth    = from->bbStkDepth;
+    to->bbCodeOffs    = from->bbCodeOffs;
+    to->bbCodeOffsEnd = from->bbCodeOffsEnd;
+    VarSetOps::AssignAllowUninitRhs(compiler, to->bbScope, from->bbScope);
+#if FEATURE_STACK_FP_X87
+    to->bbFPStateX87 = from->bbFPStateX87;
+#endif // FEATURE_STACK_FP_X87
+    to->bbNatLoopNum = from->bbNatLoopNum;
+#ifdef DEBUG
+    to->bbLoopNum     = from->bbLoopNum;
+    to->bbTgtStkDepth = from->bbTgtStkDepth;
+#endif // DEBUG
+
+    for (GenTreePtr fromStmt = from->bbTreeList; fromStmt != nullptr; fromStmt = fromStmt->gtNext)
+    {
+        compiler->fgInsertStmtAtEnd(to,
+                                    compiler->fgNewStmtFromTree(compiler->gtCloneExpr(fromStmt->gtStmt.gtStmtExpr)));
+    }
+}
+
+// LIR helpers
+void BasicBlock::MakeLIR(GenTree* firstNode, GenTree* lastNode)
+{
+#ifdef LEGACY_BACKEND
+    unreached();
+#else  // !LEGACY_BACKEND
+    assert(!IsLIR());
+    assert((firstNode == nullptr) == (lastNode == nullptr));
+    assert((firstNode == lastNode) || firstNode->Precedes(lastNode));
+
+    m_firstNode = firstNode;
+    m_lastNode  = lastNode;
+    bbFlags |= BBF_IS_LIR;
+#endif // LEGACY_BACKEND
+}
+
+bool BasicBlock::IsLIR()
+{
+#ifdef LEGACY_BACKEND
+    return false;
+#else  // !LEGACY_BACKEND
+    const bool isLIR = (bbFlags & BBF_IS_LIR) != 0;
+    assert((bbTreeList == nullptr) || ((isLIR) == !bbTreeList->IsStatement()));
+    return isLIR;
+#endif // LEGACY_BACKEND
+}
+
+//------------------------------------------------------------------------
+// firstStmt: Returns the first statement in the block
+//
+// Arguments:
+//    None.
+//
+// Return Value:
+//    The first statement in the block's bbTreeList.
+//
+GenTreeStmt* BasicBlock::firstStmt()
+{
+    if (bbTreeList == nullptr)
+    {
+        return nullptr;
+    }
+
+    return bbTreeList->AsStmt();
+}
+
+//------------------------------------------------------------------------
+// lastStmt: Returns the last statement in the block
+//
+// Arguments:
+//    None.
+//
+// Return Value:
+//    The last statement in the block's bbTreeList.
+//
+GenTreeStmt* BasicBlock::lastStmt()
+{
+    if (bbTreeList == nullptr)
+    {
+        return nullptr;
+    }
+
+    GenTree* result = bbTreeList->gtPrev;
+    assert(result && result->gtNext == nullptr);
+    return result->AsStmt();
+}
+
+
+//------------------------------------------------------------------------
+// BasicBlock::firstNode: Returns the first node in the block.
+//
+GenTree* BasicBlock::firstNode()
+{
+    return IsLIR() ? bbTreeList : Compiler::fgGetFirstNode(firstStmt()->gtStmtExpr);
+}
+
+//------------------------------------------------------------------------
+// BasicBlock::lastNode: Returns the last node in the block.
+//
+GenTree* BasicBlock::lastNode()
+{
+    return IsLIR() ? m_lastNode : lastStmt()->gtStmtExpr;
+}
+
+//------------------------------------------------------------------------
+// GetUniquePred: Returns the unique predecessor of a block, if one exists.
+// The predecessor lists must be accurate.
+//
+// Arguments:
+//    None.
+//
+// Return Value:
+//    The unique predecessor of a block, or nullptr if there is no unique predecessor.
+//
+// Notes:
+//    If the first block has a predecessor (which it may have, if it is the target of
+//    a backedge), we never want to consider it "unique" because the prolog is an
+//    implicit predecessor.
+
+BasicBlock* BasicBlock::GetUniquePred(Compiler* compiler)
+{
+    if ((bbPreds == nullptr) || (bbPreds->flNext != nullptr) || (this == compiler->fgFirstBB))
+    {
+        return nullptr;
+    }
+    else
+    {
+        return bbPreds->flBlock;
+    }
+}
+
+//------------------------------------------------------------------------
+// GetUniqueSucc: Returns the unique successor of a block, if one exists.
+// Only considers BBJ_ALWAYS and BBJ_NONE block types.
+//
+// Arguments:
+//    None.
+//
+// Return Value:
+//    The unique successor of a block, or nullptr if there is no unique successor.
+
+BasicBlock* BasicBlock::GetUniqueSucc()
+{
+    if (bbJumpKind == BBJ_ALWAYS)
+    {
+        return bbJumpDest;
+    }
+    else if (bbJumpKind == BBJ_NONE)
+    {
+        return bbNext;
+    }
+    else
+    {
+        return nullptr;
+    }
+}
+
+// Static vars.
+BasicBlock::HeapPhiArg* BasicBlock::EmptyHeapPhiDef = (BasicBlock::HeapPhiArg*)0x1;
+
+unsigned PtrKeyFuncs<BasicBlock>::GetHashCode(const BasicBlock* ptr)
+{
+#ifdef DEBUG
+    unsigned hash = SsaStressHashHelper();
+    if (hash != 0)
+    {
+        return (hash ^ (ptr->bbNum << 16) ^ ptr->bbNum);
+    }
+#endif
+    return ptr->bbNum;
+}
+
+bool BasicBlock::isEmpty()
+{
+    if (!IsLIR())
+    {
+        return (this->FirstNonPhiDef() == nullptr);
+    }
+
+    for (GenTree* node : LIR::AsRange(this).NonPhiNodes())
+    {
+        if (node->OperGet() != GT_IL_OFFSET)
+        {
+            return false;
+        }
+    }
+
+    return true;
+}
diff --git a/src/jit/block.h b/src/jit/block.h
new file mode 100644
index 0000000000..ecfbb620a1
--- /dev/null
+++ b/src/jit/block.h
@@ -0,0 +1,1313 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                          BasicBlock                                       XX
+XX                                                                           XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************/
+#ifndef _BLOCK_H_
+#define _BLOCK_H_
+/*****************************************************************************/
+
+#include "vartype.h" // For "var_types.h"
+#include "_typeinfo.h"
+/*****************************************************************************/
+
+// Defines VARSET_TP
+#include "varset.h"
+
+#include "blockset.h"
+#include "jitstd.h"
+#include "bitvec.h"
+#include "simplerhash.h"
+
+/*****************************************************************************/
+
+#if LARGE_EXPSET
+typedef unsigned __int64 EXPSET_TP;
+#define EXPSET_SZ 64
+#else
+typedef unsigned int EXPSET_TP;
+#define EXPSET_SZ 32
+#endif
+
+#define EXPSET_ALL ((EXPSET_TP)0 - 1)
+
+typedef BitVec          ASSERT_TP;
+typedef BitVec_ValArg_T ASSERT_VALARG_TP;
+typedef BitVec_ValRet_T ASSERT_VALRET_TP;
+
+/*****************************************************************************
+ *
+ *  Each basic block ends with a jump which is described as a value
+ *  of the following enumeration.
+ */
+
+DECLARE_TYPED_ENUM(BBjumpKinds, BYTE)
+{
+    BBJ_EHFINALLYRET,    // block ends with 'endfinally' (for finally or fault)
+        BBJ_EHFILTERRET, // block ends with 'endfilter'
+        BBJ_EHCATCHRET,  // block ends with a leave out of a catch (only #if FEATURE_EH_FUNCLETS)
+        BBJ_THROW,       // block ends with 'throw'
+        BBJ_RETURN,      // block ends with 'ret'
+
+        BBJ_NONE, // block flows into the next one (no jump)
+
+        BBJ_ALWAYS,      // block always jumps to the target
+        BBJ_LEAVE,       // block always jumps to the target, maybe out of guarded
+                         // region. Used temporarily until importing
+        BBJ_CALLFINALLY, // block always calls the target finally
+        BBJ_COND,        // block conditionally jumps to the target
+        BBJ_SWITCH,      // block ends with a switch statement
+
+        BBJ_COUNT
+}
+END_DECLARE_TYPED_ENUM(BBjumpKinds, BYTE)
+
+struct GenTree;
+struct GenTreeStmt;
+struct BasicBlock;
+class Compiler;
+class typeInfo;
+struct BasicBlockList;
+struct flowList;
+struct EHblkDsc;
+
+#if FEATURE_STACK_FP_X87
+struct FlatFPStateX87;
+#endif
+
+/*****************************************************************************
+ *
+ *  The following describes a switch block.
+ *
+ *  Things to know:
+ *  1. If bbsHasDefault is true, the default case is the last one in the array of basic block addresses
+ *     namely bbsDstTab[bbsCount - 1].
+ *  2. bbsCount must be at least 1, for the default case. bbsCount cannot be zero. It appears that the ECMA spec
+ *     allows for a degenerate switch with zero cases. Normally, the optimizer will optimize degenerate
+ *     switches with just a default case to a BBJ_ALWAYS branch, and a switch with just two cases to a BBJ_COND.
+ *     However, in debuggable code, we might not do that, so bbsCount might be 1.
+ */
+struct BBswtDesc
+{
+    unsigned     bbsCount;  // count of cases (includes 'default' if bbsHasDefault)
+    BasicBlock** bbsDstTab; // case label table address
+    bool         bbsHasDefault;
+
+    BBswtDesc() : bbsHasDefault(true)
+    {
+    }
+
+    void removeDefault()
+    {
+        assert(bbsHasDefault);
+        assert(bbsCount > 0);
+        bbsHasDefault = false;
+        bbsCount--;
+    }
+
+    BasicBlock* getDefault()
+    {
+        assert(bbsHasDefault);
+        assert(bbsCount > 0);
+        return bbsDstTab[bbsCount - 1];
+    }
+};
+
+struct StackEntry
+{
+    GenTree* val;
+    typeInfo seTypeInfo;
+};
+/*****************************************************************************/
+
+enum ThisInitState
+{
+    TIS_Bottom, // We don't know anything about the 'this' pointer.
+    TIS_Uninit, // The 'this' pointer for this constructor is known to be uninitialized.
+    TIS_Init,   // The 'this' pointer for this constructor is known to be initialized.
+    TIS_Top,    // This results from merging the state of two blocks one with TIS_Unint and the other with TIS_Init.
+                // We use this in fault blocks to prevent us from accessing the 'this' pointer, but otherwise
+                // allowing the fault block to generate code.
+};
+
+struct EntryState
+{
+    ThisInitState thisInitialized : 8; // used to track whether the this ptr is initialized (we could use
+                                       // fewer bits here)
+    unsigned    esStackDepth : 24;     // size of esStack
+    StackEntry* esStack;               // ptr to  stack
+};
+
+// This encapsulates the "exception handling" successors of a block.  That is,
+// if a basic block BB1 occurs in a try block, we consider the first basic block
+// BB2 of the corresponding handler to be an "EH successor" of BB1.  Because we
+// make the conservative assumption that control flow can jump from a try block
+// to its handler at any time, the immediate (regular control flow)
+// predecessor(s) of the the first block of a try block are also considered to
+// have the first block of the handler as an EH successor.  This makes variables that
+// are "live-in" to the handler become "live-out" for these try-predecessor block,
+// so that they become live-in to the try -- which we require.
+class EHSuccessorIter
+{
+    // The current compilation.
+    Compiler* m_comp;
+
+    // The block whose EH successors we are iterating over.
+    BasicBlock* m_block;
+
+    // The current "regular" successor of "m_block" that we're considering.
+    BasicBlock* m_curRegSucc;
+
+    // The current try block.  If non-null, then the current successor "m_curRegSucc"
+    // is the first block of the handler of this block.  While this try block has
+    // enclosing try's that also start with "m_curRegSucc", the corresponding handlers will be
+    // further EH successors.
+    EHblkDsc* m_curTry;
+
+    // The number of "regular" (i.e., non-exceptional) successors that remain to
+    // be considered.  If BB1 has successor BB2, and BB2 is the first block of a
+    // try block, then we consider the catch block of BB2's try to be an EH
+    // successor of BB1.  This captures the iteration over the successors of BB1
+    // for this purpose.  (In reverse order; we're done when this field is 0).
+    int m_remainingRegSuccs;
+
+    // Requires that "m_curTry" is NULL.  Determines whether there is, as
+    // discussed just above, a regular successor that's the first block of a
+    // try; if so, sets "m_curTry" to that try block.  (As noted above, selecting
+    // the try containing the current regular successor as the "current try" may cause
+    // multiple first-blocks of catches to be yielded as EH successors: trys enclosing
+    // the current try are also included if they also start with the current EH successor.)
+    void FindNextRegSuccTry();
+
+public:
+    // Returns the standard "end" iterator.
+    EHSuccessorIter()
+        : m_comp(nullptr), m_block(nullptr), m_curRegSucc(nullptr), m_curTry(nullptr), m_remainingRegSuccs(0)
+    {
+    }
+
+    // Initializes the iterator to represent the EH successors of "block".
+    EHSuccessorIter(Compiler* comp, BasicBlock* block);
+
+    // Go on to the next EH successor.
+    void operator++(void);
+
+    // Requires that "this" is not equal to the standard "end" iterator.  Returns the
+    // current EH successor.
+    BasicBlock* operator*();
+
+    // Returns "true" iff "*this" is equal to "ehsi" -- ignoring the "m_comp"
+    // and "m_block" fields.
+    bool operator==(const EHSuccessorIter& ehsi)
+    {
+        // Ignore the compiler; we'll assume that's the same.
+        return m_curTry == ehsi.m_curTry && m_remainingRegSuccs == ehsi.m_remainingRegSuccs;
+    }
+
+    bool operator!=(const EHSuccessorIter& ehsi)
+    {
+        return !((*this) == ehsi);
+    }
+};
+
+// Yields both normal and EH successors (in that order) in one iteration.
+class AllSuccessorIter
+{
+    // Normal succ state.
+    Compiler*       m_comp;
+    BasicBlock*     m_blk;
+    unsigned        m_normSucc;
+    unsigned        m_numNormSuccs;
+    EHSuccessorIter m_ehIter;
+
+    // True iff m_blk is a BBJ_CALLFINALLY block, and the current try block of m_ehIter,
+    // the first block of whose handler would be next yielded, is the jump target of m_blk.
+    inline bool CurTryIsBlkCallFinallyTarget();
+
+public:
+    inline AllSuccessorIter()
+    {
+    }
+
+    // Initializes "this" to iterate over all successors of "block."
+    inline AllSuccessorIter(Compiler* comp, BasicBlock* block);
+
+    // Used for constructing an appropriate "end" iter.  Should be called with
+    // the number of normal successors of the block being iterated.
+    AllSuccessorIter(unsigned numSuccs) : m_normSucc(numSuccs), m_numNormSuccs(numSuccs), m_ehIter()
+    {
+    }
+
+    // Go on to the next successor.
+    inline void operator++(void);
+
+    // Requires that "this" is not equal to the standard "end" iterator.  Returns the
+    // current successor.
+    inline BasicBlock* operator*();
+
+    // Returns "true" iff "*this" is equal to "asi" -- ignoring the "m_comp"
+    // and "m_block" fields.
+    bool operator==(const AllSuccessorIter& asi)
+    {
+        return m_normSucc == asi.m_normSucc && m_ehIter == asi.m_ehIter;
+    }
+
+    bool operator!=(const AllSuccessorIter& asi)
+    {
+        return !((*this) == asi);
+    }
+};
+
+//------------------------------------------------------------------------
+// BasicBlock: describes a basic block in the flowgraph.
+//
+// Note that this type derives from LIR::Range in order to make the LIR
+// utilities that are polymorphic over basic block and scratch ranges
+// faster and simpler.
+//
+struct BasicBlock : private LIR::Range
+{
+    friend class LIR;
+
+    BasicBlock* bbNext; // next BB in ascending PC offset order
+    BasicBlock* bbPrev;
+
+    void setNext(BasicBlock* next)
+    {
+        bbNext = next;
+        if (next)
+        {
+            next->bbPrev = this;
+        }
+    }
+
+    unsigned bbNum; // the block's number
+
+    unsigned bbPostOrderNum; // the block's post order number in the graph.
+    unsigned bbRefs; // number of blocks that can reach here, either by fall-through or a branch. If this falls to zero,
+                     // the block is unreachable.
+
+    unsigned bbFlags; // see BBF_xxxx below
+
+#define BBF_VISITED 0x00000001 // BB visited during optimizations
+#define BBF_MARKED 0x00000002  // BB marked  during optimizations
+#define BBF_CHANGED 0x00000004 // input/output of this block has changed
+#define BBF_REMOVED 0x00000008 // BB has been removed from bb-list
+
+#define BBF_DONT_REMOVE 0x00000010         // BB should not be removed during flow graph optimizations
+#define BBF_IMPORTED 0x00000020            // BB byte-code has been imported
+#define BBF_INTERNAL 0x00000040            // BB has been added by the compiler
+#define BBF_FAILED_VERIFICATION 0x00000080 // BB has verification exception
+
+#define BBF_TRY_BEG 0x00000100       // BB starts a 'try' block
+#define BBF_FUNCLET_BEG 0x00000200   // BB is the beginning of a funclet
+#define BBF_HAS_NULLCHECK 0x00000400 // BB contains a null check
+#define BBF_NEEDS_GCPOLL 0x00000800  // This BB is the source of a back edge and needs a GC Poll
+
+#define BBF_RUN_RARELY 0x00001000 // BB is rarely run (catch clauses, blocks with throws etc)
+#define BBF_LOOP_HEAD 0x00002000  // BB is the head of a loop
+#define BBF_LOOP_CALL0 0x00004000 // BB starts a loop that sometimes won't call
+#define BBF_LOOP_CALL1 0x00008000 // BB starts a loop that will always     call
+
+#define BBF_HAS_LABEL 0x00010000     // BB needs a label
+#define BBF_JMP_TARGET 0x00020000    // BB is a target of an implicit/explicit jump
+#define BBF_HAS_JMP 0x00040000       // BB executes a JMP instruction (instead of return)
+#define BBF_GC_SAFE_POINT 0x00080000 // BB has a GC safe point (a call).  More abstractly, BB does not
+                                     // require a (further) poll -- this may be because this BB has a
+                                     // call, or, in some cases, because the BB occurs in a loop, and
+                                     // we've determined that all paths in the loop body leading to BB
+                                     // include a call.
+#define BBF_HAS_VTABREF 0x00100000   // BB contains reference of vtable
+#define BBF_HAS_IDX_LEN 0x00200000   // BB contains simple index or length expressions on an array local var.
+#define BBF_HAS_NEWARRAY 0x00400000  // BB contains 'new' of an array
+#define BBF_HAS_NEWOBJ 0x00800000    // BB contains 'new' of an object type.
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+#define BBF_FINALLY_TARGET 0x01000000 // BB is the target of a finally return: where a finally will return during
+                                      // non-exceptional flow. Because the ARM calling sequence for calling a
+                                      // finally explicitly sets the return address to the finally target and jumps
+                                      // to the finally, instead of using a call instruction, ARM needs this to
+                                      // generate correct code at the finally target, to allow for proper stack
+                                      // unwind from within a non-exceptional call to a finally.
+#endif                                // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+#define BBF_BACKWARD_JUMP 0x02000000  // BB is surrounded by a backward jump/switch arc
+#define BBF_RETLESS_CALL 0x04000000   // BBJ_CALLFINALLY that will never return (and therefore, won't need a paired
+                                      // BBJ_ALWAYS); see isBBCallAlwaysPair().
+#define BBF_LOOP_PREHEADER 0x08000000 // BB is a loop preheader block
+
+#define BBF_COLD 0x10000000        // BB is cold
+#define BBF_PROF_WEIGHT 0x20000000 // BB weight is computed from profile data
+#ifdef LEGACY_BACKEND
+#define BBF_FORWARD_SWITCH 0x40000000  // Aux flag used in FP codegen to know if a jmptable entry has been forwarded
+#else                                  // !LEGACY_BACKEND
+#define BBF_IS_LIR 0x40000000          // Set if the basic block contains LIR (as opposed to HIR)
+#endif                                 // LEGACY_BACKEND
+#define BBF_KEEP_BBJ_ALWAYS 0x80000000 // A special BBJ_ALWAYS block, used by EH code generation. Keep the jump kind
+                                       // as BBJ_ALWAYS. Used for the paired BBJ_ALWAYS block following the
+                                       // BBJ_CALLFINALLY block, as well as, on x86, the final step block out of a
+                                       // finally.
+
+    bool isRunRarely()
+    {
+        return ((bbFlags & BBF_RUN_RARELY) != 0);
+    }
+    bool isLoopHead()
+    {
+        return ((bbFlags & BBF_LOOP_HEAD) != 0);
+    }
+
+// Flags to update when two blocks are compacted
+
+#define BBF_COMPACT_UPD                                                                                                \
+    (BBF_CHANGED | BBF_GC_SAFE_POINT | BBF_HAS_JMP | BBF_NEEDS_GCPOLL | BBF_HAS_IDX_LEN | BBF_BACKWARD_JUMP |          \
+     BBF_HAS_NEWARRAY | BBF_HAS_NEWOBJ)
+
+// Flags a block should not have had before it is split.
+
+#ifdef LEGACY_BACKEND
+#define BBF_SPLIT_NONEXIST                                                                                             \
+    (BBF_CHANGED | BBF_LOOP_HEAD | BBF_LOOP_CALL0 | BBF_LOOP_CALL1 | BBF_RETLESS_CALL | BBF_LOOP_PREHEADER |           \
+     BBF_COLD | BBF_FORWARD_SWITCH)
+#else // !LEGACY_BACKEND
+#define BBF_SPLIT_NONEXIST                                                                                             \
+    (BBF_CHANGED | BBF_LOOP_HEAD | BBF_LOOP_CALL0 | BBF_LOOP_CALL1 | BBF_RETLESS_CALL | BBF_LOOP_PREHEADER | BBF_COLD)
+#endif // LEGACY_BACKEND
+
+// Flags lost by the top block when a block is split.
+// Note, this is a conservative guess.
+// For example, the top block might or might not have BBF_GC_SAFE_POINT,
+// but we assume it does not have BBF_GC_SAFE_POINT any more.
+
+#define BBF_SPLIT_LOST (BBF_GC_SAFE_POINT | BBF_HAS_JMP | BBF_KEEP_BBJ_ALWAYS)
+
+// Flags gained by the bottom block when a block is split.
+// Note, this is a conservative guess.
+// For example, the bottom block might or might not have BBF_HAS_NEWARRAY,
+// but we assume it has BBF_HAS_NEWARRAY.
+
+// TODO: Should BBF_RUN_RARELY be added to BBF_SPLIT_GAINED ?
+
+#define BBF_SPLIT_GAINED                                                                                               \
+    (BBF_DONT_REMOVE | BBF_HAS_LABEL | BBF_HAS_JMP | BBF_BACKWARD_JUMP | BBF_HAS_IDX_LEN | BBF_HAS_NEWARRAY |          \
+     BBF_PROF_WEIGHT | BBF_HAS_NEWOBJ | BBF_KEEP_BBJ_ALWAYS)
+
+#ifndef __GNUC__ // GCC doesn't like C_ASSERT at global scope
+    static_assert_no_msg((BBF_SPLIT_NONEXIST & BBF_SPLIT_LOST) == 0);
+    static_assert_no_msg((BBF_SPLIT_NONEXIST & BBF_SPLIT_GAINED) == 0);
+#endif
+
+#ifdef DEBUG
+    void     dspFlags();                   // Print the flags
+    unsigned dspCheapPreds();              // Print the predecessors (bbCheapPreds)
+    unsigned dspPreds();                   // Print the predecessors (bbPreds)
+    unsigned dspSuccs(Compiler* compiler); // Print the successors. The 'compiler' argument determines whether EH
+                                           // regions are printed: see NumSucc() for details.
+    void dspJumpKind();                    // Print the block jump kind (e.g., BBJ_NONE, BBJ_COND, etc.).
+    void dspBlockHeader(Compiler* compiler,
+                        bool      showKind  = true,
+                        bool      showFlags = false,
+                        bool showPreds = true); // Print a simple basic block header for various output, including a
+                                                // list of predecessors and successors.
+#endif                                          // DEBUG
+
+    typedef unsigned weight_t; // Type used to hold block and edge weights
+                               // Note that for CLR v2.0 and earlier our
+                               // block weights were stored using unsigned shorts
+
+#define BB_UNITY_WEIGHT 100 // how much a normal execute once block weights
+#define BB_LOOP_WEIGHT 8    // how much more loops are weighted
+#define BB_ZERO_WEIGHT 0
+#define BB_MAX_WEIGHT ULONG_MAX // we're using an 'unsigned' for the weight
+#define BB_VERY_HOT_WEIGHT 256  // how many average hits a BB has (per BBT scenario run) for this block
+                                // to be considered as very hot
+
+    weight_t bbWeight; // The dynamic execution weight of this block
+
+    // getBBWeight -- get the normalized weight of this block
+    unsigned getBBWeight(Compiler* comp);
+
+    // setBBWeight -- if the block weight is not derived from a profile, then set the weight to the input
+    // weight, but make sure to not overflow BB_MAX_WEIGHT
+    void setBBWeight(unsigned weight)
+    {
+        if (!(this->bbFlags & BBF_PROF_WEIGHT))
+        {
+            this->bbWeight = min(weight, BB_MAX_WEIGHT);
+        }
+    }
+
+    // modifyBBWeight -- same as setBBWeight, but also make sure that if the block is rarely run, it stays that
+    // way, and if it's not rarely run then its weight never drops below 1.
+    void modifyBBWeight(unsigned weight)
+    {
+        if (this->bbWeight != BB_ZERO_WEIGHT)
+        {
+            setBBWeight(max(weight, 1));
+        }
+    }
+
+    // setBBProfileWeight -- Set the profile-derived weight for a basic block
+    void setBBProfileWeight(unsigned weight)
+    {
+        this->bbFlags |= BBF_PROF_WEIGHT;
+        // Check if the multiplication by BB_UNITY_WEIGHT will overflow.
+        this->bbWeight = (weight <= BB_MAX_WEIGHT / BB_UNITY_WEIGHT) ? weight * BB_UNITY_WEIGHT : BB_MAX_WEIGHT;
+    }
+
+    // this block will inherit the same weight and relevant bbFlags as bSrc
+    void inheritWeight(BasicBlock* bSrc)
+    {
+        this->bbWeight = bSrc->bbWeight;
+
+        if (bSrc->bbFlags & BBF_PROF_WEIGHT)
+        {
+            this->bbFlags |= BBF_PROF_WEIGHT;
+        }
+        else
+        {
+            this->bbFlags &= ~BBF_PROF_WEIGHT;
+        }
+
+        if (this->bbWeight == 0)
+        {
+            this->bbFlags |= BBF_RUN_RARELY;
+        }
+        else
+        {
+            this->bbFlags &= ~BBF_RUN_RARELY;
+        }
+    }
+
+    // Similar to inheritWeight(), but we're splitting a block (such as creating blocks for qmark removal).
+    // So, specify a percentage (0 to 99; if it's 100, just use inheritWeight()) of the weight that we're
+    // going to inherit. Since the number isn't exact, clear the BBF_PROF_WEIGHT flag.
+    void inheritWeightPercentage(BasicBlock* bSrc, unsigned percentage)
+    {
+        assert(0 <= percentage && percentage < 100);
+
+        // Check for overflow
+        if (bSrc->bbWeight * 100 <= bSrc->bbWeight)
+        {
+            this->bbWeight = bSrc->bbWeight;
+        }
+        else
+        {
+            this->bbWeight = bSrc->bbWeight * percentage / 100;
+        }
+
+        this->bbFlags &= ~BBF_PROF_WEIGHT;
+
+        if (this->bbWeight == 0)
+        {
+            this->bbFlags |= BBF_RUN_RARELY;
+        }
+        else
+        {
+            this->bbFlags &= ~BBF_RUN_RARELY;
+        }
+    }
+
+    // makeBlockHot()
+    //     This is used to override any profiling data
+    //     and force a block to be in the hot region.
+    //     We only call this method for handler entry point
+    //     and only when HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION is 1.
+    //     Doing this helps fgReorderBlocks() by telling
+    //     it to try to move these blocks into the hot region.
+    //     Note that we do this strictly as an optimization,
+    //     not for correctness. fgDetermineFirstColdBlock()
+    //     will find all handler entry points and ensure that
+    //     for now we don't place them in the cold section.
+    //
+    void makeBlockHot()
+    {
+        if (this->bbWeight == BB_ZERO_WEIGHT)
+        {
+            this->bbFlags &= ~BBF_RUN_RARELY;  // Clear any RarelyRun flag
+            this->bbFlags &= ~BBF_PROF_WEIGHT; // Clear any profile-derived flag
+            this->bbWeight = 1;
+        }
+    }
+
+    bool isMaxBBWeight()
+    {
+        return (bbWeight == BB_MAX_WEIGHT);
+    }
+
+    // Returns "true" if the block is empty. Empty here means there are no statement
+    // trees *except* PHI definitions.
+    bool isEmpty();
+
+    // Returns "true" iff "this" is the first block of a BBJ_CALLFINALLY/BBJ_ALWAYS pair --
+    // a block corresponding to an exit from the try of a try/finally.  In the flow graph,
+    // this becomes a block that calls the finally, and a second, immediately
+    // following empty block (in the bbNext chain) to which the finally will return, and which
+    // branches unconditionally to the next block to be executed outside the try/finally.
+    // Note that code is often generated differently than this description. For example, on ARM,
+    // the target of the BBJ_ALWAYS is loaded in LR (the return register), and a direct jump is
+    // made to the 'finally'. The effect is that the 'finally' returns directly to the target of
+    // the BBJ_ALWAYS. A "retless" BBJ_CALLFINALLY is one that has no corresponding BBJ_ALWAYS.
+    // This can happen if the finally is known to not return (e.g., it contains a 'throw'). In
+    // that case, the BBJ_CALLFINALLY flags has BBF_RETLESS_CALL set. Note that ARM never has
+    // "retless" BBJ_CALLFINALLY blocks due to a requirement to use the BBJ_ALWAYS for
+    // generating code.
+    bool isBBCallAlwaysPair()
+    {
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+        if (this->bbJumpKind == BBJ_CALLFINALLY)
+#else
+        if ((this->bbJumpKind == BBJ_CALLFINALLY) && !(this->bbFlags & BBF_RETLESS_CALL))
+#endif
+        {
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+            // On ARM, there are no retless BBJ_CALLFINALLY.
+            assert(!(this->bbFlags & BBF_RETLESS_CALL));
+#endif
+            // Some asserts that the next block is a BBJ_ALWAYS of the proper form.
+            assert(this->bbNext != nullptr);
+            assert(this->bbNext->bbJumpKind == BBJ_ALWAYS);
+            assert(this->bbNext->bbFlags & BBF_KEEP_BBJ_ALWAYS);
+            assert(this->bbNext->isEmpty());
+
+            return true;
+        }
+        else
+        {
+            return false;
+        }
+    }
+
+    BBjumpKinds bbJumpKind; // jump (if any) at the end of this block
+
+    /* The following union describes the jump target(s) of this block */
+    union {
+        unsigned    bbJumpOffs; // PC offset (temporary only)
+        BasicBlock* bbJumpDest; // basic block
+        BBswtDesc*  bbJumpSwt;  // switch descriptor
+    };
+
+    // NumSucc() gives the number of successors, and GetSucc() allows one to iterate over them.
+    //
+    // The behavior of both for blocks that end in BBJ_EHFINALLYRET (a return from a finally or fault block)
+    // depends on whether "comp" is non-null. If it is null, then the block is considered to have no
+    // successor. If it is non-null, we figure out the actual successors. Some cases will want one behavior,
+    // other cases the other.  For example, IL verification requires that these blocks end in an empty operand
+    // stack, and since the dataflow analysis of IL verification is concerned only with the contents of the
+    // operand stack, we can consider the finally block to have no successors. But a more general dataflow
+    // analysis that is tracking the contents of local variables might want to consider *all* successors,
+    // and would pass the current Compiler object.
+    //
+    // Similarly, BBJ_EHFILTERRET blocks are assumed to have no successors if "comp" is null; if non-null,
+    // NumSucc/GetSucc yields the first block of the try blocks handler.
+    //
+    // Also, the behavior for switches changes depending on the value of "comp". If it is null, then all
+    // switch successors are returned. If it is non-null, then only unique switch successors are returned;
+    // the duplicate successors are omitted.
+    //
+    // Note that for BBJ_COND, which has two successors (fall through and condition true branch target),
+    // only the unique targets are returned. Thus, if both targets are the same, NumSucc() will only return 1
+    // instead of 2.
+    //
+    // Returns the number of successors of "this".
+    unsigned NumSucc(Compiler* comp = nullptr);
+
+    // Returns the "i"th successor.  Requires (0 <= i < NumSucc()).
+    BasicBlock* GetSucc(unsigned i, Compiler* comp = nullptr);
+
+    BasicBlock* GetUniquePred(Compiler* comp);
+
+    BasicBlock* GetUniqueSucc();
+
+    unsigned countOfInEdges() const
+    {
+        return bbRefs;
+    }
+
+    __declspec(property(get = getBBTreeList, put = setBBTreeList)) GenTree* bbTreeList; // the body of the block.
+
+    GenTree* getBBTreeList() const
+    {
+        return m_firstNode;
+    }
+
+    void setBBTreeList(GenTree* tree)
+    {
+        m_firstNode = tree;
+    }
+
+    EntryState* bbEntryState; // verifier tracked state of all entries in stack.
+
+#define NO_BASE_TMP UINT_MAX // base# to use when we have none
+    unsigned bbStkTempsIn;   // base# for input stack temps
+    unsigned bbStkTempsOut;  // base# for output stack temps
+
+#define MAX_XCPTN_INDEX (USHRT_MAX - 1)
+
+    // It would be nice to make bbTryIndex and bbHndIndex private, but there is still code that uses them directly,
+    // especially Compiler::fgNewBBinRegion() and friends.
+
+    // index, into the compHndBBtab table, of innermost 'try' clause containing the BB (used for raising exceptions).
+    // Stored as index + 1; 0 means "no try index".
+    unsigned short bbTryIndex;
+
+    // index, into the compHndBBtab table, of innermost handler (filter, catch, fault/finally) containing the BB.
+    // Stored as index + 1; 0 means "no handler index".
+    unsigned short bbHndIndex;
+
+    // Given two EH indices that are either bbTryIndex or bbHndIndex (or related), determine if index1 might be more
+    // deeply nested than index2. Both index1 and index2 are in the range [0..compHndBBtabCount], where 0 means
+    // "main function" and otherwise the value is an index into compHndBBtab[]. Note that "sibling" EH regions will
+    // have a numeric index relationship that doesn't indicate nesting, whereas a more deeply nested region must have
+    // a lower index than the region it is nested within. Note that if you compare a single block's bbTryIndex and
+    // bbHndIndex, there is guaranteed to be a nesting relationship, since that block can't be simultaneously in two
+    // sibling EH regions. In that case, "maybe" is actually "definitely".
+    static bool ehIndexMaybeMoreNested(unsigned index1, unsigned index2)
+    {
+        if (index1 == 0)
+        {
+            // index1 is in the main method. It can't be more deeply nested than index2.
+            return false;
+        }
+        else if (index2 == 0)
+        {
+            // index1 represents an EH region, whereas index2 is the main method. Thus, index1 is more deeply nested.
+            assert(index1 > 0);
+            return true;
+        }
+        else
+        {
+            // If index1 has a smaller index, it might be more deeply nested than index2.
+            assert(index1 > 0);
+            assert(index2 > 0);
+            return index1 < index2;
+        }
+    }
+
+    // catch type: class token of handler, or one of BBCT_*. Only set on first block of catch handler.
+    unsigned bbCatchTyp;
+
+    bool hasTryIndex() const
+    {
+        return bbTryIndex != 0;
+    }
+    bool hasHndIndex() const
+    {
+        return bbHndIndex != 0;
+    }
+    unsigned getTryIndex() const
+    {
+        assert(bbTryIndex != 0);
+        return bbTryIndex - 1;
+    }
+    unsigned getHndIndex() const
+    {
+        assert(bbHndIndex != 0);
+        return bbHndIndex - 1;
+    }
+    void setTryIndex(unsigned val)
+    {
+        bbTryIndex = (unsigned short)(val + 1);
+        assert(bbTryIndex != 0);
+    }
+    void setHndIndex(unsigned val)
+    {
+        bbHndIndex = (unsigned short)(val + 1);
+        assert(bbHndIndex != 0);
+    }
+    void clearTryIndex()
+    {
+        bbTryIndex = 0;
+    }
+    void clearHndIndex()
+    {
+        bbHndIndex = 0;
+    }
+
+    void copyEHRegion(const BasicBlock* from)
+    {
+        bbTryIndex = from->bbTryIndex;
+        bbHndIndex = from->bbHndIndex;
+    }
+
+    static bool sameTryRegion(const BasicBlock* blk1, const BasicBlock* blk2)
+    {
+        return blk1->bbTryIndex == blk2->bbTryIndex;
+    }
+    static bool sameHndRegion(const BasicBlock* blk1, const BasicBlock* blk2)
+    {
+        return blk1->bbHndIndex == blk2->bbHndIndex;
+    }
+    static bool sameEHRegion(const BasicBlock* blk1, const BasicBlock* blk2)
+    {
+        return sameTryRegion(blk1, blk2) && sameHndRegion(blk1, blk2);
+    }
+
+// Some non-zero value that will not collide with real tokens for bbCatchTyp
+#define BBCT_NONE 0x00000000
+#define BBCT_FAULT 0xFFFFFFFC
+#define BBCT_FINALLY 0xFFFFFFFD
+#define BBCT_FILTER 0xFFFFFFFE
+#define BBCT_FILTER_HANDLER 0xFFFFFFFF
+#define handlerGetsXcptnObj(hndTyp) ((hndTyp) != BBCT_NONE && (hndTyp) != BBCT_FAULT && (hndTyp) != BBCT_FINALLY)
+
+    // TODO-Cleanup: Get rid of bbStkDepth and use bbStackDepthOnEntry() instead
+    union {
+        unsigned short bbStkDepth; // stack depth on entry
+        unsigned short bbFPinVars; // number of inner enregistered FP vars
+    };
+
+    // Basic block predecessor lists. Early in compilation, some phases might need to compute "cheap" predecessor
+    // lists. These are stored in bbCheapPreds, computed by fgComputeCheapPreds(). If bbCheapPreds is valid,
+    // 'fgCheapPredsValid' will be 'true'. Later, the "full" predecessor lists are created by fgComputePreds(), stored
+    // in 'bbPreds', and then maintained throughout compilation. 'fgComputePredsDone' will be 'true' after the
+    // full predecessor lists are created. See the comment at fgComputeCheapPreds() to see how those differ from
+    // the "full" variant.
+    union {
+        BasicBlockList* bbCheapPreds; // ptr to list of cheap predecessors (used before normal preds are computed)
+        flowList*       bbPreds;      // ptr to list of predecessors
+    };
+
+    BlockSet    bbReach; // Set of all blocks that can reach this one
+    BasicBlock* bbIDom;  // Represent the closest dominator to this block (called the Immediate
+                         // Dominator) used to compute the dominance tree.
+    unsigned bbDfsNum;   // The index of this block in DFS reverse post order
+                         // relative to the flow graph.
+
+#if ASSERTION_PROP
+    // A set of blocks which dominate this one *except* the normal entry block. This is lazily initialized
+    // and used only by Assertion Prop, intersected with fgEnterBlks!
+    BlockSet bbDoms;
+#endif
+
+    IL_OFFSET bbCodeOffs;    // IL offset of the beginning of the block
+    IL_OFFSET bbCodeOffsEnd; // IL offset past the end of the block. Thus, the [bbCodeOffs..bbCodeOffsEnd)
+                             // range is not inclusive of the end offset. The count of IL bytes in the block
+                             // is bbCodeOffsEnd - bbCodeOffs, assuming neither are BAD_IL_OFFSET.
+
+#ifdef DEBUG
+    void dspBlockILRange(); // Display the block's IL range as [XXX...YYY), where XXX and YYY might be "???" for
+                            // BAD_IL_OFFSET.
+#endif                      // DEBUG
+
+    VARSET_TP bbVarUse; // variables used     by block (before an assignment)
+    VARSET_TP bbVarDef; // variables assigned by block (before a use)
+    VARSET_TP bbVarTmp; // TEMP: only used by FP enregistering code!
+
+    VARSET_TP bbLiveIn;  // variables live on entry
+    VARSET_TP bbLiveOut; // variables live on exit
+
+    // Use, def, live in/out information for the implicit "Heap" variable.
+    unsigned bbHeapUse : 1;
+    unsigned bbHeapDef : 1;
+    unsigned bbHeapLiveIn : 1;
+    unsigned bbHeapLiveOut : 1;
+    unsigned bbHeapHavoc : 1; // If true, at some point the block does an operation that leaves the heap
+                              // in an unknown state. (E.g., unanalyzed call, store through unknown
+                              // pointer...)
+
+    // We want to make phi functions for the special implicit var "Heap".  But since this is not a real
+    // lclVar, and thus has no local #, we can't use a GenTreePhiArg.  Instead, we use this struct.
+    struct HeapPhiArg
+    {
+        bool m_isSsaNum; // If true, the phi arg is an SSA # for an internal try block heap state, being
+                         // added to the phi of a catch block.  If false, it's a pred block.
+        union {
+            BasicBlock* m_predBB; // Predecessor block from which the SSA # flows.
+            unsigned    m_ssaNum; // SSA# for internal block heap state.
+        };
+        HeapPhiArg* m_nextArg; // Next arg in the list, else NULL.
+
+        unsigned GetSsaNum()
+        {
+            if (m_isSsaNum)
+            {
+                return m_ssaNum;
+            }
+            else
+            {
+                assert(m_predBB != nullptr);
+                return m_predBB->bbHeapSsaNumOut;
+            }
+        }
+
+        HeapPhiArg(BasicBlock* predBB, HeapPhiArg* nextArg = nullptr)
+            : m_isSsaNum(false), m_predBB(predBB), m_nextArg(nextArg)
+        {
+        }
+        HeapPhiArg(unsigned ssaNum, HeapPhiArg* nextArg = nullptr)
+            : m_isSsaNum(true), m_ssaNum(ssaNum), m_nextArg(nextArg)
+        {
+        }
+
+        void* operator new(size_t sz, class Compiler* comp);
+    };
+    static HeapPhiArg* EmptyHeapPhiDef; // Special value (0x1, FWIW) to represent a to-be-filled in Phi arg list
+                                        // for Heap.
+    HeapPhiArg* bbHeapSsaPhiFunc;       // If the "in" Heap SSA var is not a phi definition, this value is NULL.
+                                        // Otherwise, it is either the special value EmptyHeapPhiDefn, to indicate
+                                        // that Heap needs a phi definition on entry, or else it is the linked list
+                                        // of the phi arguments.
+    unsigned bbHeapSsaNumIn;            // The SSA # of "Heap" on entry to the block.
+    unsigned bbHeapSsaNumOut;           // The SSA # of "Heap" on exit from the block.
+
+#ifdef DEBUGGING_SUPPORT
+    VARSET_TP bbScope; // variables in scope over the block
+#endif
+
+    void InitVarSets(class Compiler* comp);
+
+    /* The following are the standard bit sets for dataflow analysis.
+     *  We perform CSE and range-checks at the same time
+     *  and assertion propagation separately,
+     *  thus we can union them since the two operations are completely disjunct.
+     */
+
+    union {
+        EXPSET_TP bbCseGen; // CSEs computed by block
+#if ASSERTION_PROP
+        ASSERT_TP bbAssertionGen; // value assignments computed by block
+#endif
+    };
+
+    union {
+#if ASSERTION_PROP
+        ASSERT_TP bbAssertionKill; // value assignments killed   by block
+#endif
+    };
+
+    union {
+        EXPSET_TP bbCseIn; // CSEs available on entry
+#if ASSERTION_PROP
+        ASSERT_TP bbAssertionIn; // value assignments available on entry
+#endif
+    };
+
+    union {
+        EXPSET_TP bbCseOut; // CSEs available on exit
+#if ASSERTION_PROP
+        ASSERT_TP bbAssertionOut; // value assignments available on exit
+#endif
+    };
+
+    void* bbEmitCookie;
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+    void* bbUnwindNopEmitCookie;
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+
+#ifdef VERIFIER
+    stackDesc bbStackIn;  // stack descriptor for  input
+    stackDesc bbStackOut; // stack descriptor for output
+
+    verTypeVal* bbTypesIn;  // list of variable types on  input
+    verTypeVal* bbTypesOut; // list of variable types on output
+#endif                      // VERIFIER
+
+#if FEATURE_STACK_FP_X87
+    FlatFPStateX87* bbFPStateX87; // State of FP stack on entry to the basic block
+#endif                            // FEATURE_STACK_FP_X87
+
+    /* The following fields used for loop detection */
+
+    typedef unsigned char loopNumber;
+    static const unsigned NOT_IN_LOOP = UCHAR_MAX;
+
+#ifdef DEBUG
+    // This is the label a loop gets as part of the second, reachability-based
+    // loop discovery mechanism.  This is apparently only used for debugging.
+    // We hope we'll eventually just have one loop-discovery mechanism, and this will go away.
+    loopNumber bbLoopNum; // set to 'n' for a loop #n header
+#endif                    // DEBUG
+
+    loopNumber bbNatLoopNum; // Index, in optLoopTable, of most-nested loop that contains this block,
+                             // or else NOT_IN_LOOP if this block is not in a loop.
+
+#define MAX_LOOP_NUM 16       // we're using a 'short' for the mask
+#define LOOP_MASK_TP unsigned // must be big enough for a mask
+
+//-------------------------------------------------------------------------
+
+#if MEASURE_BLOCK_SIZE
+    static size_t s_Size;
+    static size_t s_Count;
+#endif // MEASURE_BLOCK_SIZE
+
+    bool bbFallsThrough();
+
+    // Our slop fraction is 1/128 of the block weight rounded off
+    static weight_t GetSlopFraction(weight_t weightBlk)
+    {
+        return ((weightBlk + 64) / 128);
+    }
+
+    // Given an the edge b1 -> b2, calculate the slop fraction by
+    // using the higher of the two block weights
+    static weight_t GetSlopFraction(BasicBlock* b1, BasicBlock* b2)
+    {
+        return GetSlopFraction(max(b1->bbWeight, b2->bbWeight));
+    }
+
+#ifdef DEBUG
+    unsigned        bbTgtStkDepth; // Native stack depth on entry (for throw-blocks)
+    static unsigned s_nMaxTrees;   // The max # of tree nodes in any BB
+
+    unsigned bbStmtNum; // The statement number of the first stmt in this block
+
+    // This is used in integrity checks.  We semi-randomly pick a traversal stamp, label all blocks
+    // in the BB list with that stamp (in this field); then we can tell if (e.g.) predecessors are
+    // still in the BB list by whether they have the same stamp (with high probability).
+    unsigned bbTraversalStamp;
+#endif // DEBUG
+
+    ThisInitState bbThisOnEntry();
+    unsigned      bbStackDepthOnEntry();
+    void bbSetStack(void* stackBuffer);
+    StackEntry* bbStackOnEntry();
+    void        bbSetRunRarely();
+
+    // "bbNum" is one-based (for unknown reasons); it is sometimes useful to have the corresponding
+    // zero-based number for use as an array index.
+    unsigned bbInd()
+    {
+        assert(bbNum > 0);
+        return bbNum - 1;
+    }
+
+    GenTreeStmt* firstStmt();
+    GenTreeStmt* lastStmt();
+    GenTreeStmt* lastTopLevelStmt();
+
+    GenTree* firstNode();
+    GenTree* lastNode();
+
+    bool containsStatement(GenTree* statement);
+
+    bool endsWithJmpMethod(Compiler* comp);
+
+    bool endsWithTailCall(Compiler* comp,
+                          bool      fastTailCallsOnly,
+                          bool      tailCallsConvertibleToLoopOnly,
+                          GenTree** tailCall);
+
+    bool endsWithTailCallOrJmp(Compiler* comp, bool fastTailCallsOnly = false);
+
+    bool endsWithTailCallConvertibleToLoop(Compiler* comp, GenTree** tailCall);
+
+    // Returns the first statement in the statement list of "this" that is
+    // not an SSA definition (a lcl = phi(...) assignment).
+    GenTreeStmt* FirstNonPhiDef();
+    GenTree*     FirstNonPhiDefOrCatchArgAsg();
+
+    BasicBlock()
+        :
+#if ASSERTION_PROP
+        BLOCKSET_INIT_NOCOPY(bbDoms, BlockSetOps::UninitVal())
+        ,
+#endif // ASSERTION_PROP
+        VARSET_INIT_NOCOPY(bbLiveIn, VarSetOps::UninitVal())
+        , VARSET_INIT_NOCOPY(bbLiveOut, VarSetOps::UninitVal())
+    {
+    }
+
+private:
+    EHSuccessorIter StartEHSuccs(Compiler* comp)
+    {
+        return EHSuccessorIter(comp, this);
+    }
+    EHSuccessorIter EndEHSuccs()
+    {
+        return EHSuccessorIter();
+    }
+
+    friend struct EHSuccs;
+
+    AllSuccessorIter StartAllSuccs(Compiler* comp)
+    {
+        return AllSuccessorIter(comp, this);
+    }
+    AllSuccessorIter EndAllSuccs(Compiler* comp)
+    {
+        return AllSuccessorIter(NumSucc(comp));
+    }
+
+    friend struct AllSuccs;
+
+public:
+    // Iteratable collection of the EH successors of a block.
+    class EHSuccs
+    {
+        Compiler*   m_comp;
+        BasicBlock* m_block;
+
+    public:
+        EHSuccs(Compiler* comp, BasicBlock* block) : m_comp(comp), m_block(block)
+        {
+        }
+
+        EHSuccessorIter begin()
+        {
+            return m_block->StartEHSuccs(m_comp);
+        }
+        EHSuccessorIter end()
+        {
+            return EHSuccessorIter();
+        }
+    };
+
+    EHSuccs GetEHSuccs(Compiler* comp)
+    {
+        return EHSuccs(comp, this);
+    }
+
+    class AllSuccs
+    {
+        Compiler*   m_comp;
+        BasicBlock* m_block;
+
+    public:
+        AllSuccs(Compiler* comp, BasicBlock* block) : m_comp(comp), m_block(block)
+        {
+        }
+
+        AllSuccessorIter begin()
+        {
+            return m_block->StartAllSuccs(m_comp);
+        }
+        AllSuccessorIter end()
+        {
+            return AllSuccessorIter(m_block->NumSucc(m_comp));
+        }
+    };
+
+    AllSuccs GetAllSuccs(Compiler* comp)
+    {
+        return AllSuccs(comp, this);
+    }
+
+    // Clone block state and statements from 'from' block to 'to' block.
+    // Assumes that "to" is an empty block.
+    static void CloneBlockState(Compiler* compiler, BasicBlock* to, const BasicBlock* from);
+
+    void MakeLIR(GenTree* firstNode, GenTree* lastNode);
+    bool IsLIR();
+};
+
+template <>
+struct PtrKeyFuncs<BasicBlock> : public KeyFuncsDefEquals<const BasicBlock*>
+{
+public:
+    // Make sure hashing is deterministic and not on "ptr."
+    static unsigned GetHashCode(const BasicBlock* ptr);
+};
+
+// A set of blocks.
+typedef SimplerHashTable<BasicBlock*, PtrKeyFuncs<BasicBlock>, bool, JitSimplerHashBehavior> BlkSet;
+
+// A map of block -> set of blocks, can be used as sparse block trees.
+typedef SimplerHashTable<BasicBlock*, PtrKeyFuncs<BasicBlock>, BlkSet*, JitSimplerHashBehavior> BlkToBlkSetMap;
+
+// Map from Block to Block.  Used for a variety of purposes.
+typedef SimplerHashTable<BasicBlock*, PtrKeyFuncs<BasicBlock>, BasicBlock*, JitSimplerHashBehavior> BlockToBlockMap;
+
+// In compiler terminology the control flow between two BasicBlocks
+// is typically referred to as an "edge".  Most well known are the
+// backward branches for loops, which are often called "back-edges".
+//
+// "struct flowList" is the type that represents our control flow edges.
+// This type is a linked list of zero or more "edges".
+// (The list of zero edges is represented by NULL.)
+// Every BasicBlock has a field called bbPreds of this type.  This field
+// represents the list of "edges" that flow into this BasicBlock.
+// The flowList type only stores the BasicBlock* of the source for the
+// control flow edge.  The destination block for the control flow edge
+// is implied to be the block which contained the bbPreds field.
+//
+// For a switch branch target there may be multiple "edges" that have
+// the same source block (and destination block).  We need to count the
+// number of these edges so that during optimization we will know when
+// we have zero of them.  Rather than have extra flowList entries we
+// increment the flDupCount field.
+//
+// When we have Profile weight for the BasicBlocks we can usually compute
+// the number of times each edge was executed by examining the adjacent
+// BasicBlock weights.  As we are doing for BasicBlocks, we call the number
+// of times that a control flow edge was executed the "edge weight".
+// In order to compute the edge weights we need to use a bounded range
+// for every edge weight. These two fields, 'flEdgeWeightMin' and 'flEdgeWeightMax'
+// are used to hold a bounded range.  Most often these will converge such
+// that both values are the same and that value is the exact edge weight.
+// Sometimes we are left with a rage of possible values between [Min..Max]
+// which represents an inexact edge weight.
+//
+// The bbPreds list is initially created by Compiler::fgComputePreds()
+// and is incrementally kept up to date.
+//
+// The edge weight are computed by Compiler::fgComputeEdgeWeights()
+// the edge weights are used to straighten conditional branches
+// by Compiler::fgReorderBlocks()
+//
+// We have a simpler struct, BasicBlockList, which is simply a singly-linked
+// list of blocks. This is used for various purposes, but one is as a "cheap"
+// predecessor list, computed by fgComputeCheapPreds(), and stored as a list
+// on BasicBlock pointed to by bbCheapPreds.
+
+struct BasicBlockList
+{
+    BasicBlockList* next;  // The next BasicBlock in the list, nullptr for end of list.
+    BasicBlock*     block; // The BasicBlock of interest.
+
+    BasicBlockList() : next(nullptr), block(nullptr)
+    {
+    }
+
+    BasicBlockList(BasicBlock* blk, BasicBlockList* rest) : next(rest), block(blk)
+    {
+    }
+};
+
+struct flowList
+{
+    flowList*   flNext;  // The next BasicBlock in the list, nullptr for end of list.
+    BasicBlock* flBlock; // The BasicBlock of interest.
+
+    BasicBlock::weight_t flEdgeWeightMin;
+    BasicBlock::weight_t flEdgeWeightMax;
+
+    unsigned flDupCount; // The count of duplicate "edges" (use only for switch stmts)
+
+    // These two methods are used to set new values for flEdgeWeightMin and flEdgeWeightMax
+    // they are used only during the computation of the edge weights
+    // They return false if the newWeight is not between the current [min..max]
+    // when slop is non-zero we allow for the case where our weights might be off by 'slop'
+    //
+    bool setEdgeWeightMinChecked(BasicBlock::weight_t newWeight, BasicBlock::weight_t slop, bool* wbUsedSlop);
+    bool setEdgeWeightMaxChecked(BasicBlock::weight_t newWeight, BasicBlock::weight_t slop, bool* wbUsedSlop);
+
+    flowList() : flNext(nullptr), flBlock(nullptr), flEdgeWeightMin(0), flEdgeWeightMax(0), flDupCount(0)
+    {
+    }
+
+    flowList(BasicBlock* blk, flowList* rest)
+        : flNext(rest), flBlock(blk), flEdgeWeightMin(0), flEdgeWeightMax(0), flDupCount(0)
+    {
+    }
+};
+
+// This enum represents a pre/post-visit action state to emulate a depth-first
+// spanning tree traversal of a tree or graph.
+enum DfsStackState
+{
+    DSS_Invalid, // The initialized, invalid error state
+    DSS_Pre,     // The DFS pre-order (first visit) traversal state
+    DSS_Post     // The DFS post-order (last visit) traversal state
+};
+
+// These structs represents an entry in a stack used to emulate a non-recursive
+// depth-first spanning tree traversal of a graph. The entry contains either a
+// block pointer or a block number depending on which is more useful.
+struct DfsBlockEntry
+{
+    DfsStackState dfsStackState; // The pre/post traversal action for this entry
+    BasicBlock*   dfsBlock;      // The corresponding block for the action
+
+    DfsBlockEntry() : dfsStackState(DSS_Invalid), dfsBlock(nullptr)
+    {
+    }
+
+    DfsBlockEntry(DfsStackState state, BasicBlock* basicBlock) : dfsStackState(state), dfsBlock(basicBlock)
+    {
+    }
+};
+
+struct DfsNumEntry
+{
+    DfsStackState dfsStackState; // The pre/post traversal action for this entry
+    unsigned      dfsNum;        // The corresponding block number for the action
+
+    DfsNumEntry() : dfsStackState(DSS_Invalid), dfsNum(0)
+    {
+    }
+
+    DfsNumEntry(DfsStackState state, unsigned bbNum) : dfsStackState(state), dfsNum(bbNum)
+    {
+    }
+};
+
+/*****************************************************************************/
+
+extern BasicBlock* __cdecl verAllocBasicBlock();
+
+#ifdef DEBUG
+extern void __cdecl verDispBasicBlocks();
+#endif
+
+/*****************************************************************************
+ *
+ *  The following call-backs supplied by the client; it's used by the code
+ *  emitter to convert a basic block to its corresponding emitter cookie.
+ */
+
+void* emitCodeGetCookie(BasicBlock* block);
+
+AllSuccessorIter::AllSuccessorIter(Compiler* comp, BasicBlock* block)
+    : m_comp(comp), m_blk(block), m_normSucc(0), m_numNormSuccs(block->NumSucc(comp)), m_ehIter(comp, block)
+{
+    if (CurTryIsBlkCallFinallyTarget())
+    {
+        ++m_ehIter;
+    }
+}
+
+bool AllSuccessorIter::CurTryIsBlkCallFinallyTarget()
+{
+    return (m_blk->bbJumpKind == BBJ_CALLFINALLY) && (m_ehIter != EHSuccessorIter()) &&
+           (m_blk->bbJumpDest == (*m_ehIter));
+}
+
+void AllSuccessorIter::operator++(void)
+{
+    if (m_normSucc < m_numNormSuccs)
+    {
+        m_normSucc++;
+    }
+    else
+    {
+        ++m_ehIter;
+
+        // If the original block whose successors we're iterating over
+        // is a BBJ_CALLFINALLY, that finally clause's first block
+        // will be yielded as a normal successor.  Don't also yield as
+        // an exceptional successor.
+        if (CurTryIsBlkCallFinallyTarget())
+        {
+            ++m_ehIter;
+        }
+    }
+}
+
+// Requires that "this" is not equal to the standard "end" iterator.  Returns the
+// current successor.
+BasicBlock* AllSuccessorIter::operator*()
+{
+    if (m_normSucc < m_numNormSuccs)
+    {
+        return m_blk->GetSucc(m_normSucc, m_comp);
+    }
+    else
+    {
+        return *m_ehIter;
+    }
+}
+/*****************************************************************************/
+#endif // _BLOCK_H_
+/*****************************************************************************/
diff --git a/src/jit/blockset.h b/src/jit/blockset.h
new file mode 100644
index 0000000000..c8e27eabe8
--- /dev/null
+++ b/src/jit/blockset.h
@@ -0,0 +1,77 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// This include file determines how BlockSet is implemented.
+//
+#ifndef _BLOCKSET_INCLUDED_
+#define _BLOCKSET_INCLUDED_ 1
+
+// A BlockSet is a set of BasicBlocks, represented by the BasicBlock number (bbNum).
+// Unlike VARSET_TP, we only support a single implementation: the bitset "shortlong"
+// implementation.
+//
+// Note that BasicBlocks in the JIT are numbered starting at 1. We always just waste the
+// 0th bit to avoid having to do "bbNum - 1" calculations everywhere (at the BlockSet call
+// sites). This makes reading the code easier, and avoids potential problems of forgetting
+// to do a "- 1" somewhere.
+//
+// Basic blocks can be renumbered during compilation, so it is important to not mix
+// BlockSets created before and after a renumbering. Every time the blocks are renumbered
+// creates a different "epoch", during which the basic block numbers are stable.
+
+#include "bitset.h"
+#include "compilerbitsettraits.h"
+#include "bitsetasshortlong.h"
+
+class BlockSetOps : public BitSetOps</*BitSetType*/ BitSetShortLongRep,
+                                     /*Brand*/ BSShortLong,
+                                     /*Env*/ Compiler*,
+                                     /*BitSetTraits*/ BasicBlockBitSetTraits>
+{
+public:
+    // Specialize BlockSetOps::MakeFull(). Since we number basic blocks from one, we remove bit zero from
+    // the block set. Otherwise, IsEmpty() would never return true.
+    static BitSetShortLongRep MakeFull(Compiler* env)
+    {
+        BitSetShortLongRep retval;
+
+        // First, make a full set using the BitSetOps::MakeFull
+
+        retval = BitSetOps</*BitSetType*/ BitSetShortLongRep,
+                           /*Brand*/ BSShortLong,
+                           /*Env*/ Compiler*,
+                           /*BitSetTraits*/ BasicBlockBitSetTraits>::MakeFull(env);
+
+        // Now, remove element zero, since we number basic blocks starting at one, and index the set with the
+        // basic block number. If we left this, then IsEmpty() would never return true.
+        BlockSetOps::RemoveElemD(env, retval, 0);
+
+        return retval;
+    }
+};
+
+typedef BitSetShortLongRep BlockSet;
+
+// These types should be used as the types for BlockSet arguments and return values, respectively.
+typedef BlockSetOps::ValArgType BlockSet_ValArg_T;
+typedef BlockSetOps::RetValType BlockSet_ValRet_T;
+
+// Initialize "_varName" to "_initVal."  Copies contents, not references; if "_varName" is uninitialized, allocates a
+// var set for it (using "_comp" for any necessary allocation), and copies the contents of "_initVal" into it.
+#define BLOCKSET_INIT(_comp, _varName, _initVal) _varName(BlockSetOps::MakeCopy(_comp, _initVal))
+
+// Initializes "_varName" to "_initVal", without copying: if "_initVal" is an indirect representation, copies its
+// pointer into "_varName".
+#define BLOCKSET_INIT_NOCOPY(_varName, _initVal) _varName(_initVal)
+
+// The iterator pattern.
+
+// Use this to initialize an iterator "_iterName" to iterate over a BlockSet "_blockSet".
+// "_blockNum" will be an unsigned variable to which we assign the elements of "_blockSet".
+#define BLOCKSET_ITER_INIT(_comp, _iterName, _blockSet, _blockNum)                                                     \
+    unsigned          _blockNum = 0;                                                                                   \
+    BlockSetOps::Iter _iterName(_comp, _blockSet)
+
+#endif // _BLOCKSET_INCLUDED_
diff --git a/src/jit/codegen.h b/src/jit/codegen.h
new file mode 100755
index 0000000000..0c4a311186
--- /dev/null
+++ b/src/jit/codegen.h
@@ -0,0 +1,967 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// This class contains all the data & functionality for code generation
+// of a method, except for the target-specific elements, which are
+// primarily in the Target class.
+//
+
+#ifndef _CODEGEN_H_
+#define _CODEGEN_H_
+#include "compiler.h" // temporary??
+#include "codegeninterface.h"
+#include "regset.h"
+#include "jitgcinfo.h"
+
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) || defined(_TARGET_ARM_)
+#define FOREACH_REGISTER_FILE(file)                                                                                    \
+    for ((file) = &(this->intRegState); (file) != NULL;                                                                \
+         (file) = ((file) == &(this->intRegState)) ? &(this->floatRegState) : NULL)
+#else
+#define FOREACH_REGISTER_FILE(file) (file) = &(this->intRegState);
+#endif
+
+class CodeGen : public CodeGenInterface
+{
+    friend class emitter;
+    friend class DisAssembler;
+
+public:
+    // This could use further abstraction
+    CodeGen(Compiler* theCompiler);
+
+    virtual void genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode);
+    // TODO-Cleanup: Abstract out the part of this that finds the addressing mode, and
+    // move it to Lower
+    virtual bool genCreateAddrMode(GenTreePtr  addr,
+                                   int         mode,
+                                   bool        fold,
+                                   regMaskTP   regMask,
+                                   bool*       revPtr,
+                                   GenTreePtr* rv1Ptr,
+                                   GenTreePtr* rv2Ptr,
+#if SCALED_ADDR_MODES
+                                   unsigned* mulPtr,
+#endif
+                                   unsigned* cnsPtr,
+                                   bool      nogen = false);
+
+
+private:
+#if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+    // Bit masks used in negating a float or double number.
+    // The below gentrees encapsulate the data offset to the bitmasks as GT_CLS_VAR nodes.
+    // This is to avoid creating more than one data constant for these bitmasks when a
+    // method has more than one GT_NEG operation on floating point values.
+    GenTreePtr negBitmaskFlt;
+    GenTreePtr negBitmaskDbl;
+
+    // Bit masks used in computing Math.Abs() of a float or double number.
+    GenTreePtr absBitmaskFlt;
+    GenTreePtr absBitmaskDbl;
+
+    // Bit mask used in U8 -> double conversion to adjust the result.
+    GenTreePtr u8ToDblBitmask;
+
+    // Generates SSE2 code for the given tree as "Operand BitWiseOp BitMask"
+    void genSSE2BitwiseOp(GenTreePtr treeNode);
+#endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+
+    void genPrepForCompiler();
+
+    void genPrepForEHCodegen();
+
+    inline RegState* regStateForType(var_types t)
+    {
+        return varTypeIsFloating(t) ? &floatRegState : &intRegState;
+    }
+    inline RegState* regStateForReg(regNumber reg)
+    {
+        return genIsValidFloatReg(reg) ? &floatRegState : &intRegState;
+    }
+
+    regNumber genFramePointerReg()
+    {
+        if (isFramePointerUsed())
+        {
+            return REG_FPBASE;
+        }
+        else
+        {
+            return REG_SPBASE;
+        }
+    }
+
+    enum CompareKind
+    {
+        CK_SIGNED,
+        CK_UNSIGNED,
+        CK_LOGICAL
+    };
+    static emitJumpKind genJumpKindForOper(genTreeOps cmp, CompareKind compareKind);
+
+    // For a given compare oper tree, returns the conditions to use with jmp/set in 'jmpKind' array.
+    // The corresponding elements of jmpToTrueLabel indicate whether the target of the jump is to the
+    // 'true' label or a 'false' label.
+    //
+    // 'true' label corresponds to jump target of the current basic block i.e. the target to
+    // branch to on compare condition being true.  'false' label corresponds to the target to
+    // branch to on condition being false.
+    static void genJumpKindsForTree(GenTreePtr cmpTree, emitJumpKind jmpKind[2], bool jmpToTrueLabel[2]);
+
+#if !defined(_TARGET_64BIT_)
+    static void genJumpKindsForTreeLongHi(GenTreePtr cmpTree, emitJumpKind jmpKind[2]);
+#endif //! defined(_TARGET_64BIT_)
+
+    static bool genShouldRoundFP();
+
+    GenTreeIndir indirForm(var_types type, GenTree* base);
+
+    GenTreeIntCon intForm(var_types type, ssize_t value);
+
+    void genRangeCheck(GenTree* node);
+
+    void genLockedInstructions(GenTree* node);
+
+    //-------------------------------------------------------------------------
+    // Register-related methods
+
+    void rsInit();
+
+#ifdef REG_OPT_RSVD
+    // On some targets such as the ARM we may need to have an extra reserved register
+    //  that is used when addressing stack based locals and stack based temps.
+    //  This method returns the regNumber that should be used when an extra register
+    //  is needed to access the stack based locals and stack based temps.
+    //
+    regNumber rsGetRsvdReg()
+    {
+        // We should have already added this register to the mask
+        //  of reserved registers in regSet.rdMaskResvd
+        noway_assert((regSet.rsMaskResvd & RBM_OPT_RSVD) != 0);
+
+        return REG_OPT_RSVD;
+    }
+#endif // REG_OPT_RSVD
+
+    regNumber findStkLclInReg(unsigned lclNum)
+    {
+#ifdef DEBUG
+        genInterruptibleUsed = true;
+#endif
+        return regTracker.rsLclIsInReg(lclNum);
+    }
+
+    //-------------------------------------------------------------------------
+
+    bool     genUseBlockInit;  // true if we plan to block-initialize the local stack frame
+    unsigned genInitStkLclCnt; // The count of local variables that we need to zero init
+
+    //  Keeps track of how many bytes we've pushed on the processor's stack.
+    //
+    unsigned genStackLevel;
+
+#if STACK_PROBES
+    // Stack Probes
+    bool genNeedPrologStackProbe;
+
+    void genGenerateStackProbe();
+#endif
+
+#ifdef LEGACY_BACKEND
+    regMaskTP genNewLiveRegMask(GenTreePtr first, GenTreePtr second);
+
+    // During codegen, determine the LiveSet after tree.
+    // Preconditions: must be called during codegen, when compCurLife and
+    // compCurLifeTree are being maintained, and tree must occur in the current
+    // statement.
+    VARSET_VALRET_TP genUpdateLiveSetForward(GenTreePtr tree);
+#endif
+
+    //-------------------------------------------------------------------------
+
+    void genReportEH();
+
+    // Allocates storage for the GC info, writes the GC info into that storage, records the address of the
+    // GC info of the method with the EE, and returns a pointer to the "info" portion (just post-header) of
+    // the GC info.  Requires "codeSize" to be the size of the generated code, "prologSize" and "epilogSize"
+    // to be the sizes of the prolog and epilog, respectively.  In DEBUG, makes a check involving the
+    // "codePtr", assumed to be a pointer to the start of the generated code.
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef JIT32_GCENCODER
+    void* genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr));
+    void* genCreateAndStoreGCInfoJIT32(unsigned codeSize,
+                                       unsigned prologSize,
+                                       unsigned epilogSize DEBUGARG(void* codePtr));
+#else  // !JIT32_GCENCODER
+    void genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr));
+    void genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr));
+#endif // !JIT32_GCENCODER
+
+    /**************************************************************************
+     *                          PROTECTED
+     *************************************************************************/
+
+protected:
+    // the current (pending) label ref, a label which has been referenced but not yet seen
+    BasicBlock* genPendingCallLabel;
+
+#ifdef DEBUG
+    // Last instr we have displayed for dspInstrs
+    unsigned genCurDispOffset;
+
+    static const char* genInsName(instruction ins);
+#endif // DEBUG
+
+    //-------------------------------------------------------------------------
+
+    // JIT-time constants for use in multi-dimensional array code generation.
+    unsigned genOffsetOfMDArrayLowerBound(var_types elemType, unsigned rank, unsigned dimension);
+    unsigned genOffsetOfMDArrayDimensionSize(var_types elemType, unsigned rank, unsigned dimension);
+
+#ifdef DEBUG
+    static const char* genSizeStr(emitAttr size);
+
+    void genStressRegs(GenTreePtr tree);
+#endif // DEBUG
+
+    void genCodeForBBlist();
+
+public:
+#ifndef LEGACY_BACKEND
+    // genSpillVar is called by compUpdateLifeVar in the !LEGACY_BACKEND case
+    void genSpillVar(GenTreePtr tree);
+#endif // !LEGACY_BACKEND
+
+protected:
+#ifndef LEGACY_BACKEND
+    void genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTarget = REG_NA);
+#else
+    void genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize);
+#endif
+
+    void genGCWriteBarrier(GenTreePtr tree, GCInfo::WriteBarrierForm wbf);
+
+    BasicBlock* genCreateTempLabel();
+
+    void genDefineTempLabel(BasicBlock* label);
+
+    void genAdjustSP(ssize_t delta);
+
+    void genExitCode(BasicBlock* block);
+
+    //-------------------------------------------------------------------------
+
+    GenTreePtr genMakeConst(const void* cnsAddr, var_types cnsType, GenTreePtr cnsTree, bool dblAlign);
+
+    //-------------------------------------------------------------------------
+
+    void genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKind, GenTreePtr failBlk = nullptr);
+
+    void genCheckOverflow(GenTreePtr tree);
+
+    //-------------------------------------------------------------------------
+    //
+    // Prolog/epilog generation
+    //
+    //-------------------------------------------------------------------------
+
+    //
+    // Prolog functions and data (there are a few exceptions for more generally used things)
+    //
+
+    void genEstablishFramePointer(int delta, bool reportUnwindData);
+    void genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState);
+    void genEnregisterIncomingStackArgs();
+    void genCheckUseBlockInit();
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
+    void genClearStackVec3ArgUpperBits();
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING && FEATURE_SIMD
+
+#if defined(_TARGET_ARM64_)
+    bool genInstrWithConstant(instruction ins,
+                              emitAttr    attr,
+                              regNumber   reg1,
+                              regNumber   reg2,
+                              ssize_t     imm,
+                              regNumber   tmpReg,
+                              bool        inUnwindRegion = false);
+
+    void genStackPointerAdjustment(ssize_t spAdjustment, regNumber tmpReg, bool* pTmpRegIsZero);
+
+    void genPrologSaveRegPair(regNumber reg1,
+                              regNumber reg2,
+                              int       spOffset,
+                              int       spDelta,
+                              bool      lastSavedWasPreviousPair,
+                              regNumber tmpReg,
+                              bool*     pTmpRegIsZero);
+
+    void genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero);
+
+    void genEpilogRestoreRegPair(
+        regNumber reg1, regNumber reg2, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero);
+
+    void genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero);
+
+    void genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta);
+
+    void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta);
+
+    void genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed);
+#else
+    void genPushCalleeSavedRegisters();
+#endif
+
+    void genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn);
+
+#if defined(_TARGET_ARM_)
+
+    void genPushFltRegs(regMaskTP regMask);
+    void genPopFltRegs(regMaskTP regMask);
+    regMaskTP genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskCalleeSavedFloat);
+
+    regMaskTP genJmpCallArgMask();
+
+    void genFreeLclFrame(unsigned           frameSize,
+                         /* IN OUT */ bool* pUnwindStarted,
+                         bool               jmpEpilog);
+
+    bool genUsedPopToReturn; // True if we use the pop into PC to return,
+                             // False if we didn't and must branch to LR to return.
+
+    // A set of information that is used by funclet prolog and epilog generation. It is collected once, before
+    // funclet prologs and epilogs are generated, and used by all funclet prologs and epilogs, which must all be the
+    // same.
+    struct FuncletFrameInfoDsc
+    {
+        regMaskTP fiSaveRegs;                  // Set of registers saved in the funclet prolog (includes LR)
+        unsigned  fiFunctionCallerSPtoFPdelta; // Delta between caller SP and the frame pointer
+        unsigned  fiSpDelta;                   // Stack pointer delta
+        unsigned  fiPSP_slot_SP_offset;        // PSP slot offset from SP
+        int       fiPSP_slot_CallerSP_offset;  // PSP slot offset from Caller SP
+    };
+
+    FuncletFrameInfoDsc genFuncletInfo;
+
+#elif defined(_TARGET_ARM64_)
+
+    // A set of information that is used by funclet prolog and epilog generation. It is collected once, before
+    // funclet prologs and epilogs are generated, and used by all funclet prologs and epilogs, which must all be the
+    // same.
+    struct FuncletFrameInfoDsc
+    {
+        regMaskTP fiSaveRegs;                // Set of callee-saved registers saved in the funclet prolog (includes LR)
+        int fiFunction_CallerSP_to_FP_delta; // Delta between caller SP and the frame pointer in the parent function
+                                             // (negative)
+        int fiSP_to_FPLR_save_delta;         // FP/LR register save offset from SP (positive)
+        int fiSP_to_PSP_slot_delta;          // PSP slot offset from SP (positive)
+        int fiSP_to_CalleeSave_delta;        // First callee-saved register slot offset from SP (positive)
+        int fiCallerSP_to_PSP_slot_delta;    // PSP slot offset from Caller SP (negative)
+        int fiFrameType;                     // Funclet frame types are numbered. See genFuncletProlog() for details.
+        int fiSpDelta1;                      // Stack pointer delta 1 (negative)
+        int fiSpDelta2;                      // Stack pointer delta 2 (negative)
+    };
+
+    FuncletFrameInfoDsc genFuncletInfo;
+
+#elif defined(_TARGET_AMD64_)
+
+    // A set of information that is used by funclet prolog and epilog generation. It is collected once, before
+    // funclet prologs and epilogs are generated, and used by all funclet prologs and epilogs, which must all be the
+    // same.
+    struct FuncletFrameInfoDsc
+    {
+        unsigned fiFunction_InitialSP_to_FP_delta; // Delta between Initial-SP and the frame pointer
+        unsigned fiSpDelta;                        // Stack pointer delta
+        int      fiPSP_slot_InitialSP_offset;      // PSP slot offset from Initial-SP
+    };
+
+    FuncletFrameInfoDsc genFuncletInfo;
+
+#endif // _TARGET_AMD64_
+
+#if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+
+    // Save/Restore callee saved float regs to stack
+    void genPreserveCalleeSavedFltRegs(unsigned lclFrameSize);
+    void genRestoreCalleeSavedFltRegs(unsigned lclFrameSize);
+
+#endif // _TARGET_XARCH_ && FEATURE_STACK_FP_X87
+
+#if !FEATURE_STACK_FP_X87
+    void genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& initDblRegs, const regNumber& initReg);
+#endif // !FEATURE_STACK_FP_X87
+
+    regNumber genGetZeroReg(regNumber initReg, bool* pInitRegZeroed);
+
+    void genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, bool* pInitRegZeroed);
+
+    void genReportGenericContextArg(regNumber initReg, bool* pInitRegZeroed);
+
+    void genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed);
+
+    void genFinalizeFrame();
+
+#ifdef PROFILING_SUPPORTED
+    void genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed);
+    void genProfilingLeaveCallback(unsigned helper = CORINFO_HELP_PROF_FCN_LEAVE);
+#endif // PROFILING_SUPPORTED
+
+    void genPrologPadForReJit();
+
+    void genEmitCall(int                   callType,
+                     CORINFO_METHOD_HANDLE methHnd,
+                     INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) void* addr X86_ARG(ssize_t argSize),
+                     emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
+                     IL_OFFSETX ilOffset,
+                     regNumber  base   = REG_NA,
+                     bool       isJump = false,
+                     bool       isNoGC = false);
+
+    void genEmitCall(int                   callType,
+                     CORINFO_METHOD_HANDLE methHnd,
+                     INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) GenTreeIndir* indir X86_ARG(ssize_t argSize),
+                     emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
+                     IL_OFFSETX ilOffset);
+
+    //
+    // Epilog functions
+    //
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_ARM_)
+    bool genCanUsePopToReturn(regMaskTP maskPopRegsInt, bool jmpEpilog);
+#endif
+
+#if defined(_TARGET_ARM64_)
+
+    void genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog);
+
+#else // !defined(_TARGET_ARM64_)
+
+    void genPopCalleeSavedRegisters(bool jmpEpilog = false);
+
+#endif // !defined(_TARGET_ARM64_)
+
+    //
+    // Common or driving functions
+    //
+
+    void genReserveProlog(BasicBlock* block); // currently unused
+    void genReserveEpilog(BasicBlock* block);
+    void genFnProlog();
+    void genFnEpilog(BasicBlock* block);
+
+#if FEATURE_EH_FUNCLETS
+
+    void genReserveFuncletProlog(BasicBlock* block);
+    void genReserveFuncletEpilog(BasicBlock* block);
+    void genFuncletProlog(BasicBlock* block);
+    void genFuncletEpilog();
+    void genCaptureFuncletPrologEpilogInfo();
+
+    void genSetPSPSym(regNumber initReg, bool* pInitRegZeroed);
+
+    void genUpdateCurrentFunclet(BasicBlock* block);
+
+#else // FEATURE_EH_FUNCLETS
+
+    // This is a no-op when there are no funclets!
+    void genUpdateCurrentFunclet(BasicBlock* block)
+    {
+        return;
+    }
+
+#endif // FEATURE_EH_FUNCLETS
+
+    void genGeneratePrologsAndEpilogs();
+
+#if defined(DEBUG) && defined(_TARGET_ARM64_)
+    void genArm64EmitterUnitTests();
+#endif
+
+#if defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
+    void genAmd64EmitterUnitTests();
+#endif
+
+//-------------------------------------------------------------------------
+//
+// End prolog/epilog generation
+//
+//-------------------------------------------------------------------------
+
+/*****************************************************************************/
+#ifdef DEBUGGING_SUPPORT
+/*****************************************************************************/
+
+#ifdef DEBUG
+    void genIPmappingDisp(unsigned mappingNum, Compiler::IPmappingDsc* ipMapping);
+    void genIPmappingListDisp();
+#endif // DEBUG
+
+    void genIPmappingAdd(IL_OFFSETX offset, bool isLabel);
+    void genIPmappingAddToFront(IL_OFFSETX offset);
+    void genIPmappingGen();
+
+    void genEnsureCodeEmitted(IL_OFFSETX offsx);
+
+    //-------------------------------------------------------------------------
+    // scope info for the variables
+
+    void genSetScopeInfo(unsigned            which,
+                         UNATIVE_OFFSET      startOffs,
+                         UNATIVE_OFFSET      length,
+                         unsigned            varNum,
+                         unsigned            LVnum,
+                         bool                avail,
+                         Compiler::siVarLoc& loc);
+
+    void genSetScopeInfo();
+
+    void genRemoveBBsection(BasicBlock* head, BasicBlock* tail);
+
+protected:
+    /*
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XX                                                                           XX
+    XX                           ScopeInfo                                       XX
+    XX                                                                           XX
+    XX  Keeps track of the scopes during code-generation.                        XX
+    XX  This is used to translate the local-variable debugging information       XX
+    XX  from IL offsets to native code offsets.                                  XX
+    XX                                                                           XX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    */
+
+    /*****************************************************************************/
+    /*****************************************************************************
+     *                              ScopeInfo
+     *
+     * This class is called during code gen at block-boundaries, and when the
+     * set of live variables changes. It keeps track of the scope of the variables
+     * in terms of the native code PC.
+     */
+
+public:
+    void siInit();
+
+    void siBeginBlock(BasicBlock* block);
+
+    void siEndBlock(BasicBlock* block);
+
+    virtual void siUpdate();
+
+    void siCheckVarScope(unsigned varNum, IL_OFFSET offs);
+
+    void siCloseAllOpenScopes();
+
+#ifdef DEBUG
+    void siDispOpenScopes();
+#endif
+
+    /**************************************************************************
+     *                          PROTECTED
+     *************************************************************************/
+
+protected:
+    struct siScope
+    {
+        emitLocation scStartLoc; // emitter location of start of scope
+        emitLocation scEndLoc;   // emitter location of end of scope
+
+        unsigned scVarNum; // index into lvaTable
+        unsigned scLVnum;  // 'which' in eeGetLVinfo()
+
+        unsigned scStackLevel; // Only for stk-vars
+        bool scAvailable : 1;  // It has a home / Home recycled - TODO-Cleanup: it appears this is unused (always true)
+
+        siScope* scPrev;
+        siScope* scNext;
+    };
+
+    siScope siOpenScopeList, siScopeList, *siOpenScopeLast, *siScopeLast;
+
+    unsigned siScopeCnt;
+
+    VARSET_TP siLastLife; // Life at last call to siUpdate()
+
+    // Tracks the last entry for each tracked register variable
+
+    siScope* siLatestTrackedScopes[lclMAX_TRACKED];
+
+    IL_OFFSET siLastEndOffs; // IL offset of the (exclusive) end of the last block processed
+
+#if FEATURE_EH_FUNCLETS
+    bool siInFuncletRegion; // Have we seen the start of the funclet region?
+#endif                      // FEATURE_EH_FUNCLETS
+
+    // Functions
+
+    siScope* siNewScope(unsigned LVnum, unsigned varNum);
+
+    void siRemoveFromOpenScopeList(siScope* scope);
+
+    void siEndTrackedScope(unsigned varIndex);
+
+    void siEndScope(unsigned varNum);
+
+    void siEndScope(siScope* scope);
+
+#ifdef DEBUG
+    bool siVerifyLocalVarTab();
+#endif
+
+#ifdef LATE_DISASM
+public:
+    /* virtual */
+    const char* siRegVarName(size_t offs, size_t size, unsigned reg);
+
+    /* virtual */
+    const char* siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs);
+#endif // LATE_DISASM
+
+public:
+    /*
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XX                                                                           XX
+    XX                          PrologScopeInfo                                  XX
+    XX                                                                           XX
+    XX We need special handling in the prolog block, as the parameter variables  XX
+    XX may not be in the same position described by genLclVarTable - they all    XX
+    XX start out on the stack                                                    XX
+    XX                                                                           XX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    */
+
+public:
+    void psiBegProlog();
+
+    void psiAdjustStackLevel(unsigned size);
+
+    void psiMoveESPtoEBP();
+
+    void psiMoveToReg(unsigned varNum, regNumber reg = REG_NA, regNumber otherReg = REG_NA);
+
+    void psiMoveToStack(unsigned varNum);
+
+    void psiEndProlog();
+
+    /**************************************************************************
+     *                          PROTECTED
+     *************************************************************************/
+
+protected:
+    struct psiScope
+    {
+        emitLocation scStartLoc; // emitter location of start of scope
+        emitLocation scEndLoc;   // emitter location of end of scope
+
+        unsigned scSlotNum; // index into lclVarTab
+        unsigned scLVnum;   // 'which' in eeGetLVinfo()
+
+        bool scRegister;
+
+        union {
+            struct
+            {
+                regNumberSmall scRegNum;
+
+                // Used for:
+                //  - "other half" of long var on architectures with 32 bit size registers - x86.
+                //  - for System V structs it stores the second register
+                //    used to pass a register passed struct.
+                regNumberSmall scOtherReg;
+            } u1;
+
+            struct
+            {
+                regNumberSmall scBaseReg;
+                NATIVE_OFFSET  scOffset;
+            } u2;
+        };
+
+        psiScope* scPrev;
+        psiScope* scNext;
+    };
+
+    psiScope psiOpenScopeList, psiScopeList, *psiOpenScopeLast, *psiScopeLast;
+
+    unsigned psiScopeCnt;
+
+    // Implementation Functions
+
+    psiScope* psiNewPrologScope(unsigned LVnum, unsigned slotNum);
+
+    void psiEndPrologScope(psiScope* scope);
+
+    void psSetScopeOffset(psiScope* newScope, LclVarDsc* lclVarDsc1);
+
+/*****************************************************************************
+ *                        TrnslLocalVarInfo
+ *
+ * This struct holds the LocalVarInfo in terms of the generated native code
+ * after a call to genSetScopeInfo()
+ */
+
+#ifdef DEBUG
+
+    struct TrnslLocalVarInfo
+    {
+        unsigned           tlviVarNum;
+        unsigned           tlviLVnum;
+        VarName            tlviName;
+        UNATIVE_OFFSET     tlviStartPC;
+        size_t             tlviLength;
+        bool               tlviAvailable;
+        Compiler::siVarLoc tlviVarLoc;
+    };
+
+    // Array of scopes of LocalVars in terms of native code
+
+    TrnslLocalVarInfo* genTrnslLocalVarInfo;
+    unsigned           genTrnslLocalVarCount;
+#endif
+
+/*****************************************************************************/
+#endif // DEBUGGING_SUPPORT
+/*****************************************************************************/
+
+#ifndef LEGACY_BACKEND
+#include "codegenlinear.h"
+#else // LEGACY_BACKEND
+#include "codegenclassic.h"
+#endif // LEGACY_BACKEND
+
+    /*
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XX                                                                           XX
+    XX                           Instruction                                     XX
+    XX                                                                           XX
+    XX  The interface to generate a machine-instruction.                         XX
+    XX  Currently specific to x86                                                XX
+    XX  TODO-Cleanup: Consider factoring this out of CodeGen                     XX
+    XX                                                                           XX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    */
+
+public:
+    void instInit();
+
+    regNumber genGetZeroRegister();
+
+    void instGen(instruction ins);
+#ifdef _TARGET_XARCH_
+    void instNop(unsigned size);
+#endif
+
+    void inst_JMP(emitJumpKind jmp, BasicBlock* tgtBlock);
+
+    void inst_SET(emitJumpKind condition, regNumber reg);
+
+    void inst_RV(instruction ins, regNumber reg, var_types type, emitAttr size = EA_UNKNOWN);
+
+    void inst_RV_RV(instruction ins,
+                    regNumber   reg1,
+                    regNumber   reg2,
+                    var_types   type  = TYP_I_IMPL,
+                    emitAttr    size  = EA_UNKNOWN,
+                    insFlags    flags = INS_FLAGS_DONT_CARE);
+
+    void inst_RV_RV_RV(instruction ins,
+                       regNumber   reg1,
+                       regNumber   reg2,
+                       regNumber   reg3,
+                       emitAttr    size,
+                       insFlags    flags = INS_FLAGS_DONT_CARE);
+
+    void inst_IV(instruction ins, int val);
+    void inst_IV_handle(instruction ins, int val);
+    void inst_FS(instruction ins, unsigned stk = 0);
+
+    void inst_RV_IV(instruction ins, regNumber reg, ssize_t val, emitAttr size, insFlags flags = INS_FLAGS_DONT_CARE);
+
+    void inst_ST_RV(instruction ins, TempDsc* tmp, unsigned ofs, regNumber reg, var_types type);
+    void inst_ST_IV(instruction ins, TempDsc* tmp, unsigned ofs, int val, var_types type);
+
+    void inst_SA_RV(instruction ins, unsigned ofs, regNumber reg, var_types type);
+    void inst_SA_IV(instruction ins, unsigned ofs, int val, var_types type);
+
+    void inst_RV_ST(
+        instruction ins, regNumber reg, TempDsc* tmp, unsigned ofs, var_types type, emitAttr size = EA_UNKNOWN);
+    void inst_FS_ST(instruction ins, emitAttr size, TempDsc* tmp, unsigned ofs);
+
+    void instEmit_indCall(GenTreePtr call,
+                          size_t     argSize,
+                          emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize));
+
+    void instEmit_RM(instruction ins, GenTreePtr tree, GenTreePtr addr, unsigned offs);
+
+    void instEmit_RM_RV(instruction ins, emitAttr size, GenTreePtr tree, regNumber reg, unsigned offs);
+
+    void instEmit_RV_RM(instruction ins, emitAttr size, regNumber reg, GenTreePtr tree, unsigned offs);
+
+    void instEmit_RV_RIA(instruction ins, regNumber reg1, regNumber reg2, unsigned offs);
+
+    void inst_TT(instruction ins, GenTreePtr tree, unsigned offs = 0, int shfv = 0, emitAttr size = EA_UNKNOWN);
+
+    void inst_TT_RV(instruction ins,
+                    GenTreePtr  tree,
+                    regNumber   reg,
+                    unsigned    offs  = 0,
+                    emitAttr    size  = EA_UNKNOWN,
+                    insFlags    flags = INS_FLAGS_DONT_CARE);
+
+    void inst_TT_IV(instruction ins,
+                    GenTreePtr  tree,
+                    ssize_t     val,
+                    unsigned    offs  = 0,
+                    emitAttr    size  = EA_UNKNOWN,
+                    insFlags    flags = INS_FLAGS_DONT_CARE);
+
+    void inst_RV_AT(instruction ins,
+                    emitAttr    size,
+                    var_types   type,
+                    regNumber   reg,
+                    GenTreePtr  tree,
+                    unsigned    offs  = 0,
+                    insFlags    flags = INS_FLAGS_DONT_CARE);
+
+    void inst_AT_IV(instruction ins, emitAttr size, GenTreePtr baseTree, int icon, unsigned offs = 0);
+
+    void inst_RV_TT(instruction ins,
+                    regNumber   reg,
+                    GenTreePtr  tree,
+                    unsigned    offs  = 0,
+                    emitAttr    size  = EA_UNKNOWN,
+                    insFlags    flags = INS_FLAGS_DONT_CARE);
+
+    void inst_RV_TT_IV(instruction ins, regNumber reg, GenTreePtr tree, int val);
+
+    void inst_FS_TT(instruction ins, GenTreePtr tree);
+
+    void inst_RV_SH(instruction ins, emitAttr size, regNumber reg, unsigned val, insFlags flags = INS_FLAGS_DONT_CARE);
+
+    void inst_TT_SH(instruction ins, GenTreePtr tree, unsigned val, unsigned offs = 0);
+
+    void inst_RV_CL(instruction ins, regNumber reg, var_types type = TYP_I_IMPL);
+
+    void inst_TT_CL(instruction ins, GenTreePtr tree, unsigned offs = 0);
+
+#if defined(_TARGET_XARCH_)
+    void inst_RV_RV_IV(instruction ins, emitAttr size, regNumber reg1, regNumber reg2, unsigned ival);
+#endif
+
+    void inst_RV_RR(instruction ins, emitAttr size, regNumber reg1, regNumber reg2);
+
+    void inst_RV_ST(instruction ins, emitAttr size, regNumber reg, GenTreePtr tree);
+
+    void inst_mov_RV_ST(regNumber reg, GenTreePtr tree);
+
+    void instGetAddrMode(GenTreePtr addr, regNumber* baseReg, unsigned* indScale, regNumber* indReg, unsigned* cns);
+
+    void inst_set_SV_var(GenTreePtr tree);
+
+#ifdef _TARGET_ARM_
+    bool arm_Valid_Imm_For_Instr(instruction ins, ssize_t imm, insFlags flags);
+    bool arm_Valid_Disp_For_LdSt(ssize_t disp, var_types type);
+    bool arm_Valid_Imm_For_Alu(ssize_t imm);
+    bool arm_Valid_Imm_For_Mov(ssize_t imm);
+    bool arm_Valid_Imm_For_Small_Mov(regNumber reg, ssize_t imm, insFlags flags);
+    bool arm_Valid_Imm_For_Add(ssize_t imm, insFlags flag);
+    bool arm_Valid_Imm_For_Add_SP(ssize_t imm);
+    bool arm_Valid_Imm_For_BL(ssize_t addr);
+
+    bool ins_Writes_Dest(instruction ins);
+#endif
+
+    bool isMoveIns(instruction ins);
+    instruction ins_Move_Extend(var_types srcType, bool srcInReg);
+
+    instruction ins_Copy(var_types dstType);
+    instruction ins_CopyIntToFloat(var_types srcType, var_types dstTyp);
+    instruction ins_CopyFloatToInt(var_types srcType, var_types dstTyp);
+    static instruction ins_FloatStore(var_types type = TYP_DOUBLE);
+    static instruction ins_FloatCopy(var_types type = TYP_DOUBLE);
+    instruction ins_FloatConv(var_types to, var_types from);
+    instruction ins_FloatCompare(var_types type);
+    instruction ins_MathOp(genTreeOps oper, var_types type);
+    instruction ins_FloatSqrt(var_types type);
+
+    void instGen_Return(unsigned stkArgSize);
+
+    void instGen_MemoryBarrier();
+
+    void instGen_Set_Reg_To_Zero(emitAttr size, regNumber reg, insFlags flags = INS_FLAGS_DONT_CARE);
+
+    void instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags = INS_FLAGS_DONT_CARE);
+
+    void instGen_Compare_Reg_To_Zero(emitAttr size, regNumber reg);
+
+    void instGen_Compare_Reg_To_Reg(emitAttr size, regNumber reg1, regNumber reg2);
+
+    void instGen_Compare_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm);
+
+    void instGen_Load_Reg_From_Lcl(var_types srcType, regNumber dstReg, int varNum, int offs);
+
+    void instGen_Store_Reg_Into_Lcl(var_types dstType, regNumber srcReg, int varNum, int offs);
+
+    void instGen_Store_Imm_Into_Lcl(
+        var_types dstType, emitAttr sizeAttr, ssize_t imm, int varNum, int offs, regNumber regToUse = REG_NA);
+
+#ifdef DEBUG
+    void __cdecl instDisp(instruction ins, bool noNL, const char* fmt, ...);
+#endif
+
+#ifdef _TARGET_XARCH_
+    instruction genMapShiftInsToShiftByConstantIns(instruction ins, int shiftByValue);
+#endif // _TARGET_XARCH_
+};
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                       Instruction                                         XX
+XX                      Inline functions                                     XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#ifdef _TARGET_XARCH_
+/*****************************************************************************
+ *
+ *  Generate a floating-point instruction that has one operand given by
+ *  a tree (which has been made addressable).
+ */
+
+inline void CodeGen::inst_FS_TT(instruction ins, GenTreePtr tree)
+{
+    assert(instIsFP(ins));
+
+    assert(varTypeIsFloating(tree->gtType));
+
+    inst_TT(ins, tree, 0);
+}
+#endif
+
+/*****************************************************************************
+ *
+ *  Generate a "shift reg, cl" instruction.
+ */
+
+inline void CodeGen::inst_RV_CL(instruction ins, regNumber reg, var_types type)
+{
+    inst_RV(ins, reg, type);
+}
+
+#endif // _CODEGEN_H_
diff --git a/src/jit/codegenarm.cpp b/src/jit/codegenarm.cpp
new file mode 100644
index 0000000000..4ce82307f9
--- /dev/null
+++ b/src/jit/codegenarm.cpp
@@ -0,0 +1,2106 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                        ARM Code Generator                                 XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
+
+#ifdef _TARGET_ARM_
+#include "codegen.h"
+#include "lower.h"
+#include "gcinfo.h"
+#include "emit.h"
+
+#ifndef JIT32_GCENCODER
+#include "gcinfoencoder.h"
+#endif
+
+// Get the register assigned to the given node
+
+regNumber CodeGenInterface::genGetAssignedReg(GenTreePtr tree)
+{
+    return tree->gtRegNum;
+}
+
+//------------------------------------------------------------------------
+// genSpillVar: Spill a local variable
+//
+// Arguments:
+//    tree      - the lclVar node for the variable being spilled
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    The lclVar must be a register candidate (lvRegCandidate)
+
+void CodeGen::genSpillVar(GenTreePtr tree)
+{
+    regMaskTP  regMask;
+    unsigned   varNum = tree->gtLclVarCommon.gtLclNum;
+    LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+
+    // We don't actually need to spill if it is already living in memory
+    bool needsSpill = ((tree->gtFlags & GTF_VAR_DEF) == 0 && varDsc->lvIsInReg());
+    if (needsSpill)
+    {
+        bool restoreRegVar = false;
+        if (tree->gtOper == GT_REG_VAR)
+        {
+            tree->SetOper(GT_LCL_VAR);
+            restoreRegVar = true;
+        }
+
+        // mask off the flag to generate the right spill code, then bring it back
+        tree->gtFlags &= ~GTF_REG_VAL;
+
+        instruction storeIns = ins_Store(tree->TypeGet());
+
+        if (varTypeIsMultiReg(tree))
+        {
+            assert(varDsc->lvRegNum == genRegPairLo(tree->gtRegPair));
+            assert(varDsc->lvOtherReg == genRegPairHi(tree->gtRegPair));
+            regNumber regLo = genRegPairLo(tree->gtRegPair);
+            regNumber regHi = genRegPairHi(tree->gtRegPair);
+            inst_TT_RV(storeIns, tree, regLo);
+            inst_TT_RV(storeIns, tree, regHi, 4);
+        }
+        else
+        {
+            assert(varDsc->lvRegNum == tree->gtRegNum);
+            inst_TT_RV(storeIns, tree, tree->gtRegNum);
+        }
+        tree->gtFlags |= GTF_REG_VAL;
+
+        if (restoreRegVar)
+        {
+            tree->SetOper(GT_REG_VAR);
+        }
+
+        genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(tree));
+        gcInfo.gcMarkRegSetNpt(varDsc->lvRegMask());
+
+        if (VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex))
+        {
+#ifdef DEBUG
+            if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
+            {
+                JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum);
+            }
+            else
+            {
+                JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum);
+            }
+#endif
+            VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
+        }
+    }
+
+    tree->gtFlags &= ~GTF_SPILL;
+    varDsc->lvRegNum = REG_STK;
+    if (varTypeIsMultiReg(tree))
+    {
+        varDsc->lvOtherReg = REG_STK;
+    }
+}
+
+// inline
+void CodeGenInterface::genUpdateVarReg(LclVarDsc* varDsc, GenTreePtr tree)
+{
+    assert(tree->OperIsScalarLocal() || (tree->gtOper == GT_COPY));
+    varDsc->lvRegNum = tree->gtRegNum;
+}
+
+/*****************************************************************************
+ *
+ *  Generate code that will set the given register to the integer constant.
+ */
+
+void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags)
+{
+    // Reg cannot be a FP reg
+    assert(!genIsValidFloatReg(reg));
+
+    // The only TYP_REF constant that can come this path is a managed 'null' since it is not
+    // relocatable.  Other ref type constants (e.g. string objects) go through a different
+    // code path.
+    noway_assert(type != TYP_REF || val == 0);
+
+    if (val == 0)
+    {
+        instGen_Set_Reg_To_Zero(emitActualTypeSize(type), reg, flags);
+    }
+    else
+    {
+        // TODO-CQ: needs all the optimized cases
+        getEmitter()->emitIns_R_I(INS_mov, emitActualTypeSize(type), reg, val);
+    }
+}
+
+/*****************************************************************************
+ *
+ *   Generate code to check that the GS cookie wasn't thrashed by a buffer
+ *   overrun.  If pushReg is true, preserve all registers around code sequence.
+ *   Otherwise, ECX maybe modified.
+ */
+void CodeGen::genEmitGSCookieCheck(bool pushReg)
+{
+    NYI("ARM genEmitGSCookieCheck is not yet implemented for protojit");
+}
+
+/*****************************************************************************
+ *
+ *  Generate code for all the basic blocks in the function.
+ */
+
+void CodeGen::genCodeForBBlist()
+{
+    unsigned   varNum;
+    LclVarDsc* varDsc;
+
+    unsigned savedStkLvl;
+
+#ifdef DEBUG
+    genInterruptibleUsed = true;
+
+    // You have to be careful if you create basic blocks from now on
+    compiler->fgSafeBasicBlockCreation = false;
+
+    // This stress mode is not comptible with fully interruptible GC
+    if (genInterruptible && compiler->opts.compStackCheckOnCall)
+    {
+        compiler->opts.compStackCheckOnCall = false;
+    }
+
+    // This stress mode is not comptible with fully interruptible GC
+    if (genInterruptible && compiler->opts.compStackCheckOnRet)
+    {
+        compiler->opts.compStackCheckOnRet = false;
+    }
+#endif
+
+    // Prepare the blocks for exception handling codegen: mark the blocks that needs labels.
+    genPrepForEHCodegen();
+
+    assert(!compiler->fgFirstBBScratch ||
+           compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first.
+
+    /* Initialize the spill tracking logic */
+
+    regSet.rsSpillBeg();
+
+#ifdef DEBUGGING_SUPPORT
+    /* Initialize the line# tracking logic */
+
+    if (compiler->opts.compScopeInfo)
+    {
+        siInit();
+    }
+#endif
+
+    if (compiler->opts.compDbgEnC)
+    {
+        noway_assert(isFramePointerUsed());
+        regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
+    }
+
+    /* If we have any pinvoke calls, we might potentially trash everything */
+    if (compiler->info.compCallUnmanaged)
+    {
+        noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
+        regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
+    }
+
+    genPendingCallLabel = nullptr;
+
+    /* Initialize the pointer tracking code */
+
+    gcInfo.gcRegPtrSetInit();
+    gcInfo.gcVarPtrSetInit();
+
+    /* If any arguments live in registers, mark those regs as such */
+
+    for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+    {
+        /* Is this variable a parameter assigned to a register? */
+
+        if (!varDsc->lvIsParam || !varDsc->lvRegister)
+            continue;
+
+        /* Is the argument live on entry to the method? */
+
+        if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
+            continue;
+
+        /* Is this a floating-point argument? */
+
+        if (varDsc->IsFloatRegType())
+            continue;
+
+        noway_assert(!varTypeIsFloating(varDsc->TypeGet()));
+
+        /* Mark the register as holding the variable */
+
+        regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum);
+    }
+
+    unsigned finallyNesting = 0;
+
+    // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without
+    // allocation at the start of each basic block.
+    VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler));
+
+    /*-------------------------------------------------------------------------
+     *
+     *  Walk the basic blocks and generate code for each one
+     *
+     */
+
+    BasicBlock* block;
+    BasicBlock* lblk; /* previous block */
+
+    for (lblk = NULL, block = compiler->fgFirstBB; block != NULL; lblk = block, block = block->bbNext)
+    {
+#ifdef DEBUG
+        if (compiler->verbose)
+        {
+            printf("\n=============== Generating ");
+            block->dspBlockHeader(compiler, true, true);
+            compiler->fgDispBBLiveness(block);
+        }
+#endif // DEBUG
+
+        /* Figure out which registers hold variables on entry to this block */
+
+        regSet.ClearMaskVars();
+        gcInfo.gcRegGCrefSetCur = RBM_NONE;
+        gcInfo.gcRegByrefSetCur = RBM_NONE;
+
+        compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(block);
+
+        genUpdateLife(block->bbLiveIn);
+
+        // Even if liveness didn't change, we need to update the registers containing GC references.
+        // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't
+        // change? We cleared them out above. Maybe we should just not clear them out, but update the ones that change
+        // here. That would require handling the changes in recordVarLocationsAtStartOfBB().
+
+        regMaskTP newLiveRegSet  = RBM_NONE;
+        regMaskTP newRegGCrefSet = RBM_NONE;
+        regMaskTP newRegByrefSet = RBM_NONE;
+        VARSET_ITER_INIT(compiler, iter, block->bbLiveIn, varIndex);
+        while (iter.NextElem(compiler, &varIndex))
+        {
+            unsigned   varNum = compiler->lvaTrackedToVarNum[varIndex];
+            LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+
+            if (varDsc->lvIsInReg())
+            {
+                newLiveRegSet |= varDsc->lvRegMask();
+                if (varDsc->lvType == TYP_REF)
+                {
+                    newRegGCrefSet |= varDsc->lvRegMask();
+                }
+                else if (varDsc->lvType == TYP_BYREF)
+                {
+                    newRegByrefSet |= varDsc->lvRegMask();
+                }
+            }
+            else if (varDsc->lvType == TYP_REF || varDsc->lvType == TYP_BYREF)
+            {
+                VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
+            }
+        }
+
+        regSet.rsMaskVars = newLiveRegSet;
+        gcInfo.gcMarkRegSetGCref(newRegGCrefSet DEBUGARG(true));
+        gcInfo.gcMarkRegSetByref(newRegByrefSet DEBUGARG(true));
+
+        /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to
+           represent the exception object (TYP_REF).
+           We mark REG_EXCEPTION_OBJECT as holding a GC object on entry
+           to the block,  it will be the first thing evaluated
+           (thanks to GTF_ORDER_SIDEEFF).
+         */
+
+        if (handlerGetsXcptnObj(block->bbCatchTyp))
+        {
+            for (GenTree* node : LIR::AsRange(block))
+            {
+                if (node->OperGet() == GT_CATCH_ARG)
+                {
+                    gcInfo.gcMarkRegSetGCref(RBM_EXCEPTION_OBJECT);
+                    break;
+                }
+            }
+        }
+
+        /* Start a new code output block */
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if FEATURE_EH_FUNCLETS
+#if defined(_TARGET_ARM_)
+        // If this block is the target of a finally return, we need to add a preceding NOP, in the same EH region,
+        // so the unwinder doesn't get confused by our "movw lr, xxx; movt lr, xxx; b Lyyy" calling convention that
+        // calls the funclet during non-exceptional control flow.
+        if (block->bbFlags & BBF_FINALLY_TARGET)
+        {
+            assert(block->bbFlags & BBF_JMP_TARGET);
+
+#ifdef DEBUG
+            if (compiler->verbose)
+            {
+                printf("\nEmitting finally target NOP predecessor for BB%02u\n", block->bbNum);
+            }
+#endif
+            // Create a label that we'll use for computing the start of an EH region, if this block is
+            // at the beginning of such a region. If we used the existing bbEmitCookie as is for
+            // determining the EH regions, then this NOP would end up outside of the region, if this
+            // block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP
+            // would be executed, which we would prefer not to do.
+
+            block->bbUnwindNopEmitCookie =
+                getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
+
+            instGen(INS_nop);
+        }
+#endif // defined(_TARGET_ARM_)
+
+        genUpdateCurrentFunclet(block);
+#endif // FEATURE_EH_FUNCLETS
+
+#ifdef _TARGET_XARCH_
+        if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD)
+        {
+            getEmitter()->emitLoopAlign();
+        }
+#endif
+
+#ifdef DEBUG
+        if (compiler->opts.dspCode)
+            printf("\n      L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum);
+#endif
+
+        block->bbEmitCookie = NULL;
+
+        if (block->bbFlags & (BBF_JMP_TARGET | BBF_HAS_LABEL))
+        {
+            /* Mark a label and update the current set of live GC refs */
+
+            block->bbEmitCookie =
+                getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
+                                           /*isFinally*/ block->bbFlags & BBF_FINALLY_TARGET);
+        }
+
+        if (block == compiler->fgFirstColdBlock)
+        {
+#ifdef DEBUG
+            if (compiler->verbose)
+            {
+                printf("\nThis is the start of the cold region of the method\n");
+            }
+#endif
+            // We should never have a block that falls through into the Cold section
+            noway_assert(!lblk->bbFallsThrough());
+
+            // We require the block that starts the Cold section to have a label
+            noway_assert(block->bbEmitCookie);
+            getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie);
+        }
+
+        /* Both stacks are always empty on entry to a basic block */
+
+        genStackLevel = 0;
+
+#if !FEATURE_FIXED_OUT_ARGS
+        /* Check for inserted throw blocks and adjust genStackLevel */
+
+        if (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
+        {
+            noway_assert(block->bbFlags & BBF_JMP_TARGET);
+
+            genStackLevel = compiler->fgThrowHlpBlkStkLevel(block) * sizeof(int);
+
+            if (genStackLevel)
+            {
+                NYI("Need emitMarkStackLvl()");
+            }
+        }
+#endif // !FEATURE_FIXED_OUT_ARGS
+
+        savedStkLvl = genStackLevel;
+
+        /* Tell everyone which basic block we're working on */
+
+        compiler->compCurBB = block;
+
+#ifdef DEBUGGING_SUPPORT
+        siBeginBlock(block);
+
+        // BBF_INTERNAL blocks don't correspond to any single IL instruction.
+        if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) && block != compiler->fgFirstBB)
+            genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true);
+
+        bool firstMapping = true;
+#endif // DEBUGGING_SUPPORT
+
+        /*---------------------------------------------------------------------
+         *
+         *  Generate code for each statement-tree in the block
+         *
+         */
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if FEATURE_EH_FUNCLETS
+        if (block->bbFlags & BBF_FUNCLET_BEG)
+        {
+            genReserveFuncletProlog(block);
+        }
+#endif // FEATURE_EH_FUNCLETS
+
+        // Clear compCurStmt and compCurLifeTree.
+        compiler->compCurStmt     = nullptr;
+        compiler->compCurLifeTree = nullptr;
+
+#ifdef DEBUG
+        bool pastProfileUpdate = false;
+#endif
+
+// Traverse the block in linear order, generating code for each node as we
+// as we encounter it.
+#ifdef DEBUGGING_SUPPORT
+        IL_OFFSETX currentILOffset = BAD_IL_OFFSET;
+#endif
+        for (GenTree* node : LIR::AsRange(block))
+        {
+#ifdef DEBUGGING_SUPPORT
+            // Do we have a new IL offset?
+            if (node->OperGet() == GT_IL_OFFSET)
+            {
+                genEnsureCodeEmitted(currentILOffset);
+
+                currentILOffset = node->gtStmt.gtStmtILoffsx;
+
+                genIPmappingAdd(currentILOffset, firstMapping);
+                firstMapping = false;
+            }
+#endif // DEBUGGING_SUPPORT
+
+#ifdef DEBUG
+            if (node->OperGet() == GT_IL_OFFSET)
+            {
+                noway_assert(node->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize ||
+                             node->gtStmt.gtStmtLastILoffs == BAD_IL_OFFSET);
+
+                if (compiler->opts.dspCode && compiler->opts.dspInstrs &&
+                    node->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET)
+                {
+                    while (genCurDispOffset <= node->gtStmt.gtStmtLastILoffs)
+                    {
+                        genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, ">    ");
+                    }
+                }
+            }
+#endif // DEBUG
+
+            genCodeForTreeNode(node);
+            if (node->gtHasReg() && node->gtLsraInfo.isLocalDefUse)
+            {
+                genConsumeReg(node);
+            }
+
+#ifdef DEBUG
+            regSet.rsSpillChk();
+
+            assert((node->gtFlags & GTF_SPILL) == 0);
+
+            /* Make sure we didn't bungle pointer register tracking */
+
+            regMaskTP ptrRegs       = (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur);
+            regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars;
+
+            // If return is a GC-type, clear it.  Note that if a common
+            // epilog is generated (genReturnBB) it has a void return
+            // even though we might return a ref.  We can't use the compRetType
+            // as the determiner because something we are tracking as a byref
+            // might be used as a return value of a int function (which is legal)
+            if (node->gtOper == GT_RETURN && (varTypeIsGC(compiler->info.compRetType) ||
+                                              (node->gtOp.gtOp1 != 0 && varTypeIsGC(node->gtOp.gtOp1->TypeGet()))))
+            {
+                nonVarPtrRegs &= ~RBM_INTRET;
+            }
+
+            // When profiling, the first few nodes in a catch block will be an update of
+            // the profile count (does not interfere with the exception object).
+            if (((compiler->opts.eeFlags & CORJIT_FLG_BBINSTR) != 0) && handlerGetsXcptnObj(block->bbCatchTyp))
+            {
+                pastProfileUpdate = pastProfileUpdate || node->OperGet() == GT_CATCH_ARG;
+                if (!pastProfileUpdate)
+                {
+                    nonVarPtrRegs &= ~RBM_EXCEPTION_OBJECT;
+                }
+            }
+
+            if (nonVarPtrRegs)
+            {
+                printf("Regset after node=");
+                Compiler::printTreeID(node);
+                printf(" BB%02u gcr=", block->bbNum);
+                printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
+                compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
+                printf(", byr=");
+                printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
+                compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
+                printf(", regVars=");
+                printRegMaskInt(regSet.rsMaskVars);
+                compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars);
+                printf("\n");
+            }
+
+            noway_assert(nonVarPtrRegs == 0);
+#endif // DEBUG
+        }
+
+#ifdef DEBUGGING_SUPPORT
+        // It is possible to reach the end of the block without generating code for the current IL offset.
+        // For example, if the following IR ends the current block, no code will have been generated for
+        // offset 21:
+        //
+        //          (  0,  0) [000040] ------------                il_offset void   IL offset: 21
+        //
+        //     N001 (  0,  0) [000039] ------------                nop       void
+        //
+        // This can lead to problems when debugging the generated code. To prevent these issues, make sure
+        // we've generated code for the last IL offset we saw in the block.
+        genEnsureCodeEmitted(currentILOffset);
+
+        if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
+        {
+            siEndBlock(block);
+
+            /* Is this the last block, and are there any open scopes left ? */
+
+            bool isLastBlockProcessed = (block->bbNext == NULL);
+            if (block->isBBCallAlwaysPair())
+            {
+                isLastBlockProcessed = (block->bbNext->bbNext == NULL);
+            }
+
+            if (isLastBlockProcessed && siOpenScopeList.scNext)
+            {
+                /* This assert no longer holds, because we may insert a throw
+                   block to demarcate the end of a try or finally region when they
+                   are at the end of the method.  It would be nice if we could fix
+                   our code so that this throw block will no longer be necessary. */
+
+                // noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize);
+
+                siCloseAllOpenScopes();
+            }
+        }
+
+#endif // DEBUGGING_SUPPORT
+
+        genStackLevel -= savedStkLvl;
+
+#ifdef DEBUG
+        // compCurLife should be equal to the liveOut set, except that we don't keep
+        // it up to date for vars that are not register candidates
+        // (it would be nice to have a xor set function)
+
+        VARSET_TP VARSET_INIT_NOCOPY(extraLiveVars, VarSetOps::Diff(compiler, block->bbLiveOut, compiler->compCurLife));
+        VarSetOps::UnionD(compiler, extraLiveVars, VarSetOps::Diff(compiler, compiler->compCurLife, block->bbLiveOut));
+        VARSET_ITER_INIT(compiler, extraLiveVarIter, extraLiveVars, extraLiveVarIndex);
+        while (extraLiveVarIter.NextElem(compiler, &extraLiveVarIndex))
+        {
+            unsigned   varNum = compiler->lvaTrackedToVarNum[extraLiveVarIndex];
+            LclVarDsc* varDsc = compiler->lvaTable + varNum;
+            assert(!varDsc->lvIsRegCandidate());
+        }
+#endif
+
+        /* Both stacks should always be empty on exit from a basic block */
+
+        noway_assert(genStackLevel == 0);
+
+#ifdef _TARGET_AMD64_
+        // On AMD64, we need to generate a NOP after a call that is the last instruction of the block, in several
+        // situations, to support proper exception handling semantics. This is mostly to ensure that when the stack
+        // walker computes an instruction pointer for a frame, that instruction pointer is in the correct EH region.
+        // The document "X64 and ARM ABIs.docx" has more details. The situations:
+        // 1. If the call instruction is in a different EH region as the instruction that follows it.
+        // 2. If the call immediately precedes an OS epilog. (Note that what the JIT or VM consider an epilog might
+        //    be slightly different from what the OS considers an epilog, and it is the OS-reported epilog that matters
+        //    here.)
+        // We handle case #1 here, and case #2 in the emitter.
+        if (getEmitter()->emitIsLastInsCall())
+        {
+            // Ok, the last instruction generated is a call instruction. Do any of the other conditions hold?
+            // Note: we may be generating a few too many NOPs for the case of call preceding an epilog. Technically,
+            // if the next block is a BBJ_RETURN, an epilog will be generated, but there may be some instructions
+            // generated before the OS epilog starts, such as a GS cookie check.
+            if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
+            {
+                // We only need the NOP if we're not going to generate any more code as part of the block end.
+
+                switch (block->bbJumpKind)
+                {
+                    case BBJ_ALWAYS:
+                    case BBJ_THROW:
+                    case BBJ_CALLFINALLY:
+                    case BBJ_EHCATCHRET:
+                    // We're going to generate more code below anyway, so no need for the NOP.
+
+                    case BBJ_RETURN:
+                    case BBJ_EHFINALLYRET:
+                    case BBJ_EHFILTERRET:
+                        // These are the "epilog follows" case, handled in the emitter.
+
+                        break;
+
+                    case BBJ_NONE:
+                        if (block->bbNext == nullptr)
+                        {
+                            // Call immediately before the end of the code; we should never get here    .
+                            instGen(INS_BREAKPOINT); // This should never get executed
+                        }
+                        else
+                        {
+                            // We need the NOP
+                            instGen(INS_nop);
+                        }
+                        break;
+
+                    case BBJ_COND:
+                    case BBJ_SWITCH:
+                    // These can't have a call as the last instruction!
+
+                    default:
+                        noway_assert(!"Unexpected bbJumpKind");
+                        break;
+                }
+            }
+        }
+#endif //_TARGET_AMD64_
+
+        /* Do we need to generate a jump or return? */
+
+        switch (block->bbJumpKind)
+        {
+            case BBJ_ALWAYS:
+                inst_JMP(EJ_jmp, block->bbJumpDest);
+                break;
+
+            case BBJ_RETURN:
+                genExitCode(block);
+                break;
+
+            case BBJ_THROW:
+                // If we have a throw at the end of a function or funclet, we need to emit another instruction
+                // afterwards to help the OS unwinder determine the correct context during unwind.
+                // We insert an unexecuted breakpoint instruction in several situations
+                // following a throw instruction:
+                // 1. If the throw is the last instruction of the function or funclet. This helps
+                //    the OS unwinder determine the correct context during an unwind from the
+                //    thrown exception.
+                // 2. If this is this is the last block of the hot section.
+                // 3. If the subsequent block is a special throw block.
+                // 4. On AMD64, if the next block is in a different EH region.
+                if ((block->bbNext == NULL)
+#if FEATURE_EH_FUNCLETS
+                    || (block->bbNext->bbFlags & BBF_FUNCLET_BEG)
+#endif // FEATURE_EH_FUNCLETS
+#ifdef _TARGET_AMD64_
+                    || !BasicBlock::sameEHRegion(block, block->bbNext)
+#endif // _TARGET_AMD64_
+                    || (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) ||
+                    block->bbNext == compiler->fgFirstColdBlock)
+                {
+                    instGen(INS_BREAKPOINT); // This should never get executed
+                }
+
+                break;
+
+            case BBJ_CALLFINALLY:
+
+                // Now set REG_LR to the address of where the finally funclet should
+                // return to directly.
+
+                BasicBlock* bbFinallyRet;
+                bbFinallyRet = NULL;
+
+                // We don't have retless calls, since we use the BBJ_ALWAYS to point at a NOP pad where
+                // we would have otherwise created retless calls.
+                assert(block->isBBCallAlwaysPair());
+
+                assert(block->bbNext != NULL);
+                assert(block->bbNext->bbJumpKind == BBJ_ALWAYS);
+                assert(block->bbNext->bbJumpDest != NULL);
+                assert(block->bbNext->bbJumpDest->bbFlags & BBF_FINALLY_TARGET);
+
+                bbFinallyRet = block->bbNext->bbJumpDest;
+                bbFinallyRet->bbFlags |= BBF_JMP_TARGET;
+
+#if 0
+            // TODO-ARM-CQ:
+            // We don't know the address of finally funclet yet.  But adr requires the offset
+            // to finally funclet from current IP is within 4095 bytes. So this code is disabled
+            // for now.
+            getEmitter()->emitIns_J_R (INS_adr,
+                                     EA_4BYTE,
+                                     bbFinallyRet,
+                                     REG_LR);
+#else  // !0
+                // Load the address where the finally funclet should return into LR.
+                // The funclet prolog/epilog will do "push {lr}" / "pop {pc}" to do
+                // the return.
+                getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, bbFinallyRet, REG_LR);
+                getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, bbFinallyRet, REG_LR);
+#endif // !0
+
+                // Jump to the finally BB
+                inst_JMP(EJ_jmp, block->bbJumpDest);
+
+                // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
+                // jump target using bbJumpDest - that is already used to point
+                // to the finally block. So just skip past the BBJ_ALWAYS unless the
+                // block is RETLESS.
+                if (!(block->bbFlags & BBF_RETLESS_CALL))
+                {
+                    assert(block->isBBCallAlwaysPair());
+
+                    lblk  = block;
+                    block = block->bbNext;
+                }
+                break;
+
+#ifdef _TARGET_ARM_
+
+            case BBJ_EHCATCHRET:
+                // set r0 to the address the VM should return to after the catch
+                getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, block->bbJumpDest, REG_R0);
+                getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, block->bbJumpDest, REG_R0);
+
+                __fallthrough;
+
+            case BBJ_EHFINALLYRET:
+            case BBJ_EHFILTERRET:
+                genReserveFuncletEpilog(block);
+                break;
+
+#elif defined(_TARGET_AMD64_)
+
+            case BBJ_EHCATCHRET:
+                // Set EAX to the address the VM should return to after the catch.
+                // Generate a RIP-relative
+                //         lea reg, [rip + disp32] ; the RIP is implicit
+                // which will be position-indepenent.
+                // TODO-ARM-Bug?: For ngen, we need to generate a reloc for the displacement (maybe EA_PTR_DSP_RELOC).
+                getEmitter()->emitIns_R_L(INS_lea, EA_PTRSIZE, block->bbJumpDest, REG_INTRET);
+                __fallthrough;
+
+            case BBJ_EHFINALLYRET:
+            case BBJ_EHFILTERRET:
+                genReserveFuncletEpilog(block);
+                break;
+
+#endif // _TARGET_AMD64_
+
+            case BBJ_NONE:
+            case BBJ_COND:
+            case BBJ_SWITCH:
+                break;
+
+            default:
+                noway_assert(!"Unexpected bbJumpKind");
+                break;
+        }
+
+#ifdef DEBUG
+        compiler->compCurBB = 0;
+#endif
+
+    } //------------------ END-FOR each block of the method -------------------
+
+    /* Nothing is live at this point */
+    genUpdateLife(VarSetOps::MakeEmpty(compiler));
+
+    /* Finalize the spill  tracking logic */
+
+    regSet.rsSpillEnd();
+
+    /* Finalize the temp   tracking logic */
+
+    compiler->tmpEnd();
+
+#ifdef DEBUG
+    if (compiler->verbose)
+    {
+        printf("\n# ");
+        printf("compCycleEstimate = %6d, compSizeEstimate = %5d ", compiler->compCycleEstimate, compiler->compSizeEstimate);
+        printf("%s\n", compiler->info.compFullName);
+    }
+#endif
+}
+
+// return the child that has the same reg as the dst (if any)
+// other child returned (out param) in 'other'
+GenTree* sameRegAsDst(GenTree* tree, GenTree*& other /*out*/)
+{
+    if (tree->gtRegNum == REG_NA)
+    {
+        other = nullptr;
+        return NULL;
+    }
+
+    GenTreePtr op1 = tree->gtOp.gtOp1->gtEffectiveVal();
+    GenTreePtr op2 = tree->gtOp.gtOp2->gtEffectiveVal();
+    if (op1->gtRegNum == tree->gtRegNum)
+    {
+        other = op2;
+        return op1;
+    }
+    if (op2->gtRegNum == tree->gtRegNum)
+    {
+        other = op1;
+        return op2;
+    }
+    else
+    {
+        other = nullptr;
+        return NULL;
+    }
+}
+
+//  move an immediate value into an integer register
+
+void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags)
+{
+    // reg cannot be a FP register
+    assert(!genIsValidFloatReg(reg));
+
+    if (!compiler->opts.compReloc)
+    {
+        size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs
+    }
+
+    if ((imm == 0) && !EA_IS_RELOC(size))
+    {
+        instGen_Set_Reg_To_Zero(size, reg, flags);
+    }
+    else
+    {
+#ifdef _TARGET_AMD64_
+        if (AddrShouldUsePCRel(imm))
+        {
+            getEmitter()->emitIns_R_AI(INS_lea, EA_PTR_DSP_RELOC, reg, imm);
+        }
+        else
+#endif // _TARGET_AMD64_
+        {
+            getEmitter()->emitIns_R_I(INS_mov, size, reg, imm);
+        }
+    }
+    regTracker.rsTrackRegIntCns(reg, imm);
+}
+
+/*****************************************************************************
+ *
+ * Generate code to set a register 'targetReg' of type 'targetType' to the constant
+ * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call
+ * genProduceReg() on the target register.
+ */
+void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTreePtr tree)
+{
+    switch (tree->gtOper)
+    {
+        case GT_CNS_INT:
+        {
+            // relocatable values tend to come down as a CNS_INT of native int type
+            // so the line between these two opcodes is kind of blurry
+            GenTreeIntConCommon* con    = tree->AsIntConCommon();
+            ssize_t              cnsVal = con->IconValue();
+
+            bool needReloc = compiler->opts.compReloc && tree->IsIconHandle();
+            if (needReloc)
+            {
+                instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, targetReg, cnsVal);
+                regTracker.rsTrackRegTrash(targetReg);
+            }
+            else
+            {
+                genSetRegToIcon(targetReg, cnsVal, targetType);
+            }
+        }
+        break;
+
+        case GT_CNS_DBL:
+        {
+            NYI("GT_CNS_DBL");
+        }
+        break;
+
+        default:
+            unreached();
+    }
+}
+
+/*****************************************************************************
+ *
+ * Generate code for a single node in the tree.
+ * Preconditions: All operands have been evaluated
+ *
+ */
+void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
+{
+    regNumber targetReg  = treeNode->gtRegNum;
+    var_types targetType = treeNode->TypeGet();
+    emitter*  emit       = getEmitter();
+
+    JITDUMP("Generating: ");
+    DISPNODE(treeNode);
+
+    // contained nodes are part of their parents for codegen purposes
+    // ex : immediates, most LEAs
+    if (treeNode->isContained())
+    {
+        return;
+    }
+
+    switch (treeNode->gtOper)
+    {
+        case GT_CNS_INT:
+        case GT_CNS_DBL:
+            genSetRegToConst(targetReg, targetType, treeNode);
+            genProduceReg(treeNode);
+            break;
+
+        case GT_NEG:
+        case GT_NOT:
+        {
+            NYI("GT_NEG and GT_NOT");
+        }
+            genProduceReg(treeNode);
+            break;
+
+        case GT_OR:
+        case GT_XOR:
+        case GT_AND:
+            assert(varTypeIsIntegralOrI(treeNode));
+            __fallthrough;
+
+        case GT_ADD:
+        case GT_SUB:
+        {
+            const genTreeOps oper = treeNode->OperGet();
+            if ((oper == GT_ADD || oper == GT_SUB) && treeNode->gtOverflow())
+            {
+                // This is also checked in the importer.
+                NYI("Overflow not yet implemented");
+            }
+
+            GenTreePtr  op1 = treeNode->gtGetOp1();
+            GenTreePtr  op2 = treeNode->gtGetOp2();
+            instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
+
+            // The arithmetic node must be sitting in a register (since it's not contained)
+            noway_assert(targetReg != REG_NA);
+
+            regNumber op1reg = op1->gtRegNum;
+            regNumber op2reg = op2->gtRegNum;
+
+            GenTreePtr dst;
+            GenTreePtr src;
+
+            genConsumeIfReg(op1);
+            genConsumeIfReg(op2);
+
+            // This is the case of reg1 = reg1 op reg2
+            // We're ready to emit the instruction without any moves
+            if (op1reg == targetReg)
+            {
+                dst = op1;
+                src = op2;
+            }
+            // We have reg1 = reg2 op reg1
+            // In order for this operation to be correct
+            // we need that op is a commutative operation so
+            // we can convert it into reg1 = reg1 op reg2 and emit
+            // the same code as above
+            else if (op2reg == targetReg)
+            {
+                noway_assert(GenTree::OperIsCommutative(treeNode->OperGet()));
+                dst = op2;
+                src = op1;
+            }
+            // dest, op1 and op2 registers are different:
+            // reg3 = reg1 op reg2
+            // We can implement this by issuing a mov:
+            // reg3 = reg1
+            // reg3 = reg3 op reg2
+            else
+            {
+                inst_RV_RV(ins_Move_Extend(targetType, true), targetReg, op1reg, op1->gtType);
+                regTracker.rsTrackRegCopy(targetReg, op1reg);
+                gcInfo.gcMarkRegPtrVal(targetReg, targetType);
+                dst = treeNode;
+                src = op2;
+            }
+
+            regNumber r = emit->emitInsBinary(ins, emitTypeSize(treeNode), dst, src);
+            noway_assert(r == targetReg);
+        }
+            genProduceReg(treeNode);
+            break;
+
+        case GT_LSH:
+        case GT_RSH:
+        case GT_RSZ:
+            genCodeForShift(treeNode);
+            // genCodeForShift() calls genProduceReg()
+            break;
+
+        case GT_CAST:
+            // Cast is never contained (?)
+            noway_assert(targetReg != REG_NA);
+
+            // Overflow conversions from float/double --> int types go through helper calls.
+            if (treeNode->gtOverflow() && !varTypeIsFloating(treeNode->gtOp.gtOp1))
+                NYI("Unimplmented GT_CAST:int <--> int with overflow");
+
+            if (varTypeIsFloating(targetType) && varTypeIsFloating(treeNode->gtOp.gtOp1))
+            {
+                // Casts float/double <--> double/float
+                genFloatToFloatCast(treeNode);
+            }
+            else if (varTypeIsFloating(treeNode->gtOp.gtOp1))
+            {
+                // Casts float/double --> int32/int64
+                genFloatToIntCast(treeNode);
+            }
+            else if (varTypeIsFloating(targetType))
+            {
+                // Casts int32/uint32/int64/uint64 --> float/double
+                genIntToFloatCast(treeNode);
+            }
+            else
+            {
+                // Casts int <--> int
+                genIntToIntCast(treeNode);
+            }
+            // The per-case functions call genProduceReg()
+            break;
+
+        case GT_LCL_VAR:
+        {
+            GenTreeLclVarCommon* lcl = treeNode->AsLclVarCommon();
+            // lcl_vars are not defs
+            assert((treeNode->gtFlags & GTF_VAR_DEF) == 0);
+
+            bool isRegCandidate = compiler->lvaTable[lcl->gtLclNum].lvIsRegCandidate();
+
+            if (isRegCandidate && !(treeNode->gtFlags & GTF_VAR_DEATH))
+            {
+                assert((treeNode->InReg()) || (treeNode->gtFlags & GTF_SPILLED));
+            }
+
+            // If this is a register candidate that has been spilled, genConsumeReg() will
+            // reload it at the point of use.  Otherwise, if it's not in a register, we load it here.
+
+            if (!treeNode->InReg() && !(treeNode->gtFlags & GTF_SPILLED))
+            {
+                assert(!isRegCandidate);
+                emit->emitIns_R_S(ins_Load(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode->gtRegNum,
+                                  lcl->gtLclNum, 0);
+                genProduceReg(treeNode);
+            }
+        }
+        break;
+
+        case GT_LCL_FLD_ADDR:
+        case GT_LCL_VAR_ADDR:
+        {
+            // Address of a local var.  This by itself should never be allocated a register.
+            // If it is worth storing the address in a register then it should be cse'ed into
+            // a temp and that would be allocated a register.
+            noway_assert(targetType == TYP_BYREF);
+            noway_assert(!treeNode->InReg());
+
+            inst_RV_TT(INS_lea, targetReg, treeNode, 0, EA_BYREF);
+        }
+            genProduceReg(treeNode);
+            break;
+
+        case GT_LCL_FLD:
+        {
+            NYI_IF(targetType == TYP_STRUCT, "GT_LCL_FLD: struct load local field not supported");
+            NYI_IF(treeNode->gtRegNum == REG_NA, "GT_LCL_FLD: load local field not into a register is not supported");
+
+            emitAttr size   = emitTypeSize(targetType);
+            unsigned offs   = treeNode->gtLclFld.gtLclOffs;
+            unsigned varNum = treeNode->gtLclVarCommon.gtLclNum;
+            assert(varNum < compiler->lvaCount);
+
+            emit->emitIns_R_S(ins_Move_Extend(targetType, treeNode->InReg()), size, targetReg, varNum, offs);
+        }
+            genProduceReg(treeNode);
+            break;
+
+        case GT_STORE_LCL_FLD:
+        {
+            NYI_IF(targetType == TYP_STRUCT, "GT_STORE_LCL_FLD: struct store local field not supported");
+            noway_assert(!treeNode->InReg());
+
+            GenTreePtr op1 = treeNode->gtOp.gtOp1->gtEffectiveVal();
+            genConsumeIfReg(op1);
+            emit->emitInsBinary(ins_Store(targetType), emitTypeSize(treeNode), treeNode, op1);
+        }
+        break;
+
+        case GT_STORE_LCL_VAR:
+        {
+            NYI_IF(targetType == TYP_STRUCT, "struct store local not supported");
+
+            GenTreePtr op1 = treeNode->gtOp.gtOp1->gtEffectiveVal();
+            genConsumeIfReg(op1);
+            if (treeNode->gtRegNum == REG_NA)
+            {
+                // stack store
+                emit->emitInsMov(ins_Store(targetType), emitTypeSize(treeNode), treeNode);
+                compiler->lvaTable[treeNode->AsLclVarCommon()->gtLclNum].lvRegNum = REG_STK;
+            }
+            else if (op1->isContained())
+            {
+                // Currently, we assume that the contained source of a GT_STORE_LCL_VAR writing to a register
+                // must be a constant. However, in the future we might want to support a contained memory op.
+                // This is a bit tricky because we have to decide it's contained before register allocation,
+                // and this would be a case where, once that's done, we need to mark that node as always
+                // requiring a register - which we always assume now anyway, but once we "optimize" that
+                // we'll have to take cases like this into account.
+                assert((op1->gtRegNum == REG_NA) && op1->OperIsConst());
+                genSetRegToConst(treeNode->gtRegNum, targetType, op1);
+            }
+            else if (op1->gtRegNum != treeNode->gtRegNum)
+            {
+                assert(op1->gtRegNum != REG_NA);
+                emit->emitInsBinary(ins_Move_Extend(targetType, true), emitTypeSize(treeNode), treeNode, op1);
+            }
+            if (treeNode->gtRegNum != REG_NA)
+                genProduceReg(treeNode);
+        }
+        break;
+
+        case GT_RETFILT:
+            // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in
+            // the return register, if it's not already there. The processing is the same as GT_RETURN.
+            if (targetType != TYP_VOID)
+            {
+                // For filters, the IL spec says the result is type int32. Further, the only specified legal values
+                // are 0 or 1, with the use of other values "undefined".
+                assert(targetType == TYP_INT);
+            }
+
+            __fallthrough;
+
+        case GT_RETURN:
+        {
+            GenTreePtr op1 = treeNode->gtOp.gtOp1;
+            if (targetType == TYP_VOID)
+            {
+                assert(op1 == nullptr);
+                break;
+            }
+            assert(op1 != nullptr);
+            op1 = op1->gtEffectiveVal();
+
+            NYI_IF(op1->gtRegNum == REG_NA, "GT_RETURN: return of a value not in register");
+            genConsumeReg(op1);
+
+            regNumber retReg = varTypeIsFloating(op1) ? REG_FLOATRET : REG_INTRET;
+            if (op1->gtRegNum != retReg)
+            {
+                inst_RV_RV(ins_Move_Extend(targetType, true), retReg, op1->gtRegNum, targetType);
+            }
+        }
+        break;
+
+        case GT_LEA:
+        {
+            // if we are here, it is the case where there is an LEA that cannot
+            // be folded into a parent instruction
+            GenTreeAddrMode* lea = treeNode->AsAddrMode();
+            genLeaInstruction(lea);
+        }
+        // genLeaInstruction calls genProduceReg()
+        break;
+
+        case GT_IND:
+            emit->emitInsMov(ins_Load(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode);
+            genProduceReg(treeNode);
+            break;
+
+        case GT_MUL:
+        {
+            NYI("GT_MUL");
+        }
+            genProduceReg(treeNode);
+            break;
+
+        case GT_MOD:
+        case GT_UDIV:
+        case GT_UMOD:
+            // We shouldn't be seeing GT_MOD on float/double args as it should get morphed into a
+            // helper call by front-end.  Similarly we shouldn't be seeing GT_UDIV and GT_UMOD
+            // on float/double args.
+            noway_assert(!varTypeIsFloating(treeNode));
+            __fallthrough;
+
+        case GT_DIV:
+        {
+            NYI("GT_DIV");
+        }
+            genProduceReg(treeNode);
+            break;
+
+        case GT_INTRINSIC:
+        {
+            NYI("GT_INTRINSIC");
+        }
+            genProduceReg(treeNode);
+            break;
+
+        case GT_EQ:
+        case GT_NE:
+        case GT_LT:
+        case GT_LE:
+        case GT_GE:
+        case GT_GT:
+        {
+            // TODO-ARM-CQ: Check if we can use the currently set flags.
+            // TODO-ARM-CQ: Check for the case where we can simply transfer the carry bit to a register
+            //         (signed < or >= where targetReg != REG_NA)
+
+            GenTreeOp* tree = treeNode->AsOp();
+            GenTreePtr op1  = tree->gtOp1->gtEffectiveVal();
+            GenTreePtr op2  = tree->gtOp2->gtEffectiveVal();
+
+            genConsumeIfReg(op1);
+            genConsumeIfReg(op2);
+
+            instruction ins = INS_cmp;
+            emitAttr    cmpAttr;
+            if (varTypeIsFloating(op1))
+            {
+                NYI("Floating point compare");
+
+                bool isUnordered = ((treeNode->gtFlags & GTF_RELOP_NAN_UN) != 0);
+                switch (tree->OperGet())
+                {
+                    case GT_EQ:
+                        ins = INS_beq;
+                    case GT_NE:
+                        ins = INS_bne;
+                    case GT_LT:
+                        ins = isUnordered ? INS_blt : INS_blo;
+                    case GT_LE:
+                        ins = isUnordered ? INS_ble : INS_bls;
+                    case GT_GE:
+                        ins = isUnordered ? INS_bpl : INS_bge;
+                    case GT_GT:
+                        ins = isUnordered ? INS_bhi : INS_bgt;
+                    default:
+                        unreached();
+                }
+            }
+            else
+            {
+                var_types op1Type = op1->TypeGet();
+                var_types op2Type = op2->TypeGet();
+                assert(!varTypeIsFloating(op2Type));
+                ins = INS_cmp;
+                if (op1Type == op2Type)
+                {
+                    cmpAttr = emitTypeSize(op1Type);
+                }
+                else
+                {
+                    var_types cmpType    = TYP_INT;
+                    bool      op1Is64Bit = (varTypeIsLong(op1Type) || op1Type == TYP_REF);
+                    bool      op2Is64Bit = (varTypeIsLong(op2Type) || op2Type == TYP_REF);
+                    NYI_IF(op1Is64Bit || op2Is64Bit, "Long compare");
+                    assert(!op1->isContainedMemoryOp() || op1Type == op2Type);
+                    assert(!op2->isContainedMemoryOp() || op1Type == op2Type);
+                    cmpAttr = emitTypeSize(cmpType);
+                }
+            }
+            emit->emitInsBinary(ins, cmpAttr, op1, op2);
+
+            // Are we evaluating this into a register?
+            if (targetReg != REG_NA)
+            {
+                genSetRegToCond(targetReg, tree);
+                genProduceReg(tree);
+            }
+        }
+        break;
+
+        case GT_JTRUE:
+        {
+            GenTree* cmp = treeNode->gtOp.gtOp1->gtEffectiveVal();
+            assert(cmp->OperIsCompare());
+            assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
+
+            // Get the "kind" and type of the comparison.  Note that whether it is an unsigned cmp
+            // is governed by a flag NOT by the inherent type of the node
+            // TODO-ARM-CQ: Check if we can use the currently set flags.
+            CompareKind compareKind = ((cmp->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
+
+            emitJumpKind jmpKind   = genJumpKindForOper(cmp->gtOper, compareKind);
+            BasicBlock*  jmpTarget = compiler->compCurBB->bbJumpDest;
+
+            inst_JMP(jmpKind, jmpTarget);
+        }
+        break;
+
+        case GT_RETURNTRAP:
+        {
+            // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC
+            // based on the contents of 'data'
+
+            GenTree* data = treeNode->gtOp.gtOp1->gtEffectiveVal();
+            genConsumeIfReg(data);
+            GenTreeIntCon cns = intForm(TYP_INT, 0);
+            emit->emitInsBinary(INS_cmp, emitTypeSize(TYP_INT), data, &cns);
+
+            BasicBlock* skipLabel = genCreateTempLabel();
+
+            emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+            inst_JMP(jmpEqual, skipLabel);
+            // emit the call to the EE-helper that stops for GC (or other reasons)
+
+            genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN);
+            genDefineTempLabel(skipLabel);
+        }
+        break;
+
+        case GT_STOREIND:
+        {
+            NYI("GT_STOREIND");
+        }
+        break;
+
+        case GT_COPY:
+        {
+            assert(treeNode->gtOp.gtOp1->IsLocal());
+            GenTreeLclVarCommon* lcl    = treeNode->gtOp.gtOp1->AsLclVarCommon();
+            LclVarDsc*           varDsc = &compiler->lvaTable[lcl->gtLclNum];
+            inst_RV_RV(ins_Move_Extend(targetType, true), targetReg, genConsumeReg(treeNode->gtOp.gtOp1), targetType,
+                       emitTypeSize(targetType));
+
+            // The old location is dying
+            genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(treeNode->gtOp.gtOp1));
+
+            gcInfo.gcMarkRegSetNpt(genRegMask(treeNode->gtOp.gtOp1->gtRegNum));
+
+            genUpdateVarReg(varDsc, treeNode);
+
+            // The new location is going live
+            genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode));
+        }
+            genProduceReg(treeNode);
+            break;
+
+        case GT_LIST:
+        case GT_ARGPLACE:
+            // Nothing to do
+            break;
+
+        case GT_PUTARG_STK:
+        {
+            NYI_IF(targetType == TYP_STRUCT, "GT_PUTARG_STK: struct support not implemented");
+
+            // Get argument offset on stack.
+            // Here we cross check that argument offset hasn't changed from lowering to codegen since
+            // we are storing arg slot number in GT_PUTARG_STK node in lowering phase.
+            int argOffset = treeNode->AsPutArgStk()->gtSlotNum * TARGET_POINTER_SIZE;
+#ifdef DEBUG
+            fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->AsPutArgStk()->gtCall, treeNode);
+            assert(curArgTabEntry);
+            assert(argOffset == (int)curArgTabEntry->slotNum * TARGET_POINTER_SIZE);
+#endif
+
+            GenTreePtr data = treeNode->gtOp.gtOp1->gtEffectiveVal();
+            if (data->isContained())
+            {
+                emit->emitIns_S_I(ins_Store(targetType), emitTypeSize(targetType), compiler->lvaOutgoingArgSpaceVar,
+                                  argOffset, (int)data->AsIntConCommon()->IconValue());
+            }
+            else
+            {
+                genConsumeReg(data);
+                emit->emitIns_S_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum,
+                                  compiler->lvaOutgoingArgSpaceVar, argOffset);
+            }
+        }
+        break;
+
+        case GT_PUTARG_REG:
+        {
+            NYI_IF(targetType == TYP_STRUCT, "GT_PUTARG_REG: struct support not implemented");
+
+            // commas show up here commonly, as part of a nullchk operation
+            GenTree* op1 = treeNode->gtOp.gtOp1->gtEffectiveVal();
+            // If child node is not already in the register we need, move it
+            genConsumeReg(op1);
+            if (treeNode->gtRegNum != op1->gtRegNum)
+            {
+                inst_RV_RV(ins_Move_Extend(targetType, true), treeNode->gtRegNum, op1->gtRegNum, targetType);
+            }
+        }
+            genProduceReg(treeNode);
+            break;
+
+        case GT_CALL:
+            genCallInstruction(treeNode);
+            break;
+
+        case GT_LOCKADD:
+        case GT_XCHG:
+        case GT_XADD:
+            genLockedInstructions(treeNode);
+            break;
+
+        case GT_CMPXCHG:
+        {
+            NYI("GT_CMPXCHG");
+        }
+            genProduceReg(treeNode);
+            break;
+
+        case GT_RELOAD:
+            // do nothing - reload is just a marker.
+            // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child
+            // into the register specified in this node.
+            break;
+
+        case GT_NOP:
+            break;
+
+        case GT_NO_OP:
+            NYI("GT_NO_OP");
+            break;
+
+        case GT_ARR_BOUNDS_CHECK:
+            genRangeCheck(treeNode);
+            break;
+
+        case GT_PHYSREG:
+            if (treeNode->gtRegNum != treeNode->AsPhysReg()->gtSrcReg)
+            {
+                inst_RV_RV(INS_mov, treeNode->gtRegNum, treeNode->AsPhysReg()->gtSrcReg, targetType);
+
+                genTransferRegGCState(treeNode->gtRegNum, treeNode->AsPhysReg()->gtSrcReg);
+            }
+            break;
+
+        case GT_PHYSREGDST:
+            break;
+
+        case GT_NULLCHECK:
+        {
+            assert(!treeNode->gtOp.gtOp1->isContained());
+            regNumber reg = genConsumeReg(treeNode->gtOp.gtOp1);
+            emit->emitIns_AR_R(INS_cmp, EA_4BYTE, reg, reg, 0);
+        }
+        break;
+
+        case GT_CATCH_ARG:
+
+            noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
+
+            /* Catch arguments get passed in a register. genCodeForBBlist()
+               would have marked it as holding a GC object, but not used. */
+
+            noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
+            genConsumeReg(treeNode);
+            break;
+
+        case GT_PINVOKE_PROLOG:
+            noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0);
+
+            // the runtime side requires the codegen here to be consistent
+            emit->emitDisableRandomNops();
+            break;
+
+        case GT_LABEL:
+            genPendingCallLabel       = genCreateTempLabel();
+            treeNode->gtLabel.gtLabBB = genPendingCallLabel;
+            emit->emitIns_R_L(INS_lea, EA_PTRSIZE, genPendingCallLabel, treeNode->gtRegNum);
+            break;
+
+        default:
+        {
+#ifdef DEBUG
+            char message[256];
+            sprintf(message, "NYI: Unimplemented node type %s\n", GenTree::NodeName(treeNode->OperGet()));
+            notYetImplemented(message, __FILE__, __LINE__);
+#else
+            NYI("unimplemented node");
+#endif
+        }
+        break;
+    }
+}
+
+// generate code for the locked operations:
+// GT_LOCKADD, GT_XCHG, GT_XADD
+void CodeGen::genLockedInstructions(GenTree* treeNode)
+{
+    NYI("genLockedInstructions");
+}
+
+// generate code for GT_ARR_BOUNDS_CHECK node
+void CodeGen::genRangeCheck(GenTreePtr oper)
+{
+    noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK);
+    GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
+
+    GenTreePtr arrLen    = bndsChk->gtArrLen->gtEffectiveVal();
+    GenTreePtr arrIdx    = bndsChk->gtIndex->gtEffectiveVal();
+    GenTreePtr arrRef    = NULL;
+    int        lenOffset = 0;
+
+    GenTree *    src1, *src2;
+    emitJumpKind jmpKind;
+
+    if (arrIdx->isContainedIntOrIImmed())
+    {
+        // To encode using a cmp immediate, we place the
+        //  constant operand in the second position
+        src1    = arrLen;
+        src2    = arrIdx;
+        jmpKind = genJumpKindForOper(GT_LE, CK_UNSIGNED);
+    }
+    else
+    {
+        src1    = arrIdx;
+        src2    = arrLen;
+        jmpKind = genJumpKindForOper(GT_GE, CK_UNSIGNED);
+    }
+
+    genConsumeIfReg(src1);
+    genConsumeIfReg(src2);
+
+    getEmitter()->emitInsBinary(INS_cmp, emitAttr(TYP_INT), src1, src2);
+    genJumpToThrowHlpBlk(jmpKind, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
+}
+
+// make a temporary indir we can feed to pattern matching routines
+// in cases where we don't want to instantiate all the indirs that happen
+//
+GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base)
+{
+    GenTreeIndir i(GT_IND, type, base, nullptr);
+    i.gtRegNum = REG_NA;
+    // has to be nonnull (because contained nodes can't be the last in block)
+    // but don't want it to be a valid pointer
+    i.gtNext = (GenTree*)(-1);
+    return i;
+}
+
+// make a temporary int we can feed to pattern matching routines
+// in cases where we don't want to instantiate
+//
+GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value)
+{
+    GenTreeIntCon i(type, value);
+    i.gtRegNum = REG_NA;
+    // has to be nonnull (because contained nodes can't be the last in block)
+    // but don't want it to be a valid pointer
+    i.gtNext = (GenTree*)(-1);
+    return i;
+}
+
+instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
+{
+    instruction ins;
+
+    if (varTypeIsFloating(type))
+        return CodeGen::ins_MathOp(oper, type);
+
+    switch (oper)
+    {
+        case GT_ADD:
+            ins = INS_add;
+            break;
+        case GT_AND:
+            ins = INS_AND;
+            break;
+        case GT_MUL:
+            ins = INS_MUL;
+            break;
+        case GT_LSH:
+            ins = INS_SHIFT_LEFT_LOGICAL;
+            break;
+        case GT_NEG:
+            ins = INS_rsb;
+            break;
+        case GT_NOT:
+            ins = INS_NOT;
+            break;
+        case GT_OR:
+            ins = INS_OR;
+            break;
+        case GT_RSH:
+            ins = INS_SHIFT_RIGHT_ARITHM;
+            break;
+        case GT_RSZ:
+            ins = INS_SHIFT_RIGHT_LOGICAL;
+            break;
+        case GT_SUB:
+            ins = INS_sub;
+            break;
+        case GT_XOR:
+            ins = INS_XOR;
+            break;
+        default:
+            unreached();
+            break;
+    }
+    return ins;
+}
+
+//------------------------------------------------------------------------
+// genCodeForShift: Generates the code sequence for a GenTree node that
+// represents a bit shift or rotate operation (<<, >>, >>>, rol, ror).
+//
+// Arguments:
+//    tree - the bit shift node (that specifies the type of bit shift to perform).
+//
+// Assumptions:
+//    a) All GenTrees are register allocated.
+//
+void CodeGen::genCodeForShift(GenTreePtr tree)
+{
+    NYI("genCodeForShift");
+}
+
+void CodeGen::genUnspillRegIfNeeded(GenTree* tree)
+{
+    regNumber dstReg = tree->gtRegNum;
+
+    GenTree* unspillTree = tree;
+    if (tree->gtOper == GT_RELOAD)
+    {
+        unspillTree = tree->gtOp.gtOp1;
+    }
+    if (unspillTree->gtFlags & GTF_SPILLED)
+    {
+        if (genIsRegCandidateLocal(unspillTree))
+        {
+            // Reset spilled flag, since we are going to load a local variable from its home location.
+            unspillTree->gtFlags &= ~GTF_SPILLED;
+
+            // Load local variable from its home location.
+            inst_RV_TT(ins_Load(unspillTree->gtType), dstReg, unspillTree);
+
+            unspillTree->SetInReg();
+
+            GenTreeLclVarCommon* lcl    = unspillTree->AsLclVarCommon();
+            LclVarDsc*           varDsc = &compiler->lvaTable[lcl->gtLclNum];
+
+            // TODO-Review: We would like to call:
+            //      genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(tree));
+            // instead of the following code, but this ends up hitting this assert:
+            //      assert((regSet.rsMaskVars & regMask) == 0);
+            // due to issues with LSRA resolution moves.
+            // So, just force it for now. This probably indicates a condition that creates a GC hole!
+            //
+            // Extra note: I think we really want to call something like gcInfo.gcUpdateForRegVarMove,
+            // because the variable is not really going live or dead, but that method is somewhat poorly
+            // factored because it, in turn, updates rsMaskVars which is part of RegSet not GCInfo.
+            // TODO-Cleanup: This code exists in other CodeGen*.cpp files, and should be moved to CodeGenCommon.cpp.
+
+            genUpdateVarReg(varDsc, tree);
+#ifdef DEBUG
+            if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
+            {
+                JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", lcl->gtLclNum);
+            }
+#endif // DEBUG
+            VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
+
+#ifdef DEBUG
+            if (compiler->verbose)
+            {
+                printf("\t\t\t\t\t\t\tV%02u in reg ", lcl->gtLclNum);
+                varDsc->PrintVarReg();
+                printf(" is becoming live  ");
+                Compiler::printTreeID(unspillTree);
+                printf("\n");
+            }
+#endif // DEBUG
+
+            regSet.AddMaskVars(genGetRegMask(varDsc));
+        }
+        else
+        {
+            TempDsc* t = regSet.rsUnspillInPlace(unspillTree, unspillTree->gtRegNum);
+            compiler->tmpRlsTemp(t);
+            getEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType), emitActualTypeSize(unspillTree->gtType), dstReg,
+                                      t->tdTempNum(), 0);
+
+            unspillTree->SetInReg();
+        }
+
+        gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
+    }
+}
+
+// do liveness update for a subnode that is being consumed by codegen
+regNumber CodeGen::genConsumeReg(GenTree* tree)
+{
+    genUnspillRegIfNeeded(tree);
+
+    // genUpdateLife() will also spill local var if marked as GTF_SPILL by calling CodeGen::genSpillVar
+    genUpdateLife(tree);
+    assert(tree->gtRegNum != REG_NA);
+
+    // there are three cases where consuming a reg means clearing the bit in the live mask
+    // 1. it was not produced by a local
+    // 2. it was produced by a local that is going dead
+    // 3. it was produced by a local that does not live in that reg (like one allocated on the stack)
+
+    if (genIsRegCandidateLocal(tree))
+    {
+        GenTreeLclVarCommon* lcl    = tree->AsLclVarCommon();
+        LclVarDsc*           varDsc = &compiler->lvaTable[lcl->GetLclNum()];
+
+        if (varDsc->lvRegNum == tree->gtRegNum && ((tree->gtFlags & GTF_VAR_DEATH) != 0))
+        {
+            gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
+        }
+        else if (!varDsc->lvLRACandidate)
+        {
+            gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
+        }
+    }
+    else
+    {
+        gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
+    }
+
+    return tree->gtRegNum;
+}
+
+// Do liveness update for an address tree: one of GT_LEA, GT_LCL_VAR, or GT_CNS_INT (for call indirect).
+void CodeGen::genConsumeAddress(GenTree* addr)
+{
+    if (addr->OperGet() == GT_LEA)
+    {
+        genConsumeAddrMode(addr->AsAddrMode());
+    }
+    else
+    {
+        assert(!addr->isContained());
+        genConsumeReg(addr);
+    }
+}
+
+// do liveness update for a subnode that is being consumed by codegen
+void CodeGen::genConsumeAddrMode(GenTreeAddrMode* addr)
+{
+    if (addr->Base())
+        genConsumeReg(addr->Base());
+    if (addr->Index())
+        genConsumeReg(addr->Index());
+}
+
+// do liveness update for register produced by the current node in codegen
+void CodeGen::genProduceReg(GenTree* tree)
+{
+    if (tree->gtFlags & GTF_SPILL)
+    {
+        if (genIsRegCandidateLocal(tree))
+        {
+            // Store local variable to its home location.
+            tree->gtFlags &= ~GTF_REG_VAL;
+            inst_TT_RV(ins_Store(tree->gtType), tree, tree->gtRegNum);
+        }
+        else
+        {
+            tree->SetInReg();
+            regSet.rsSpillTree(tree->gtRegNum, tree);
+            tree->gtFlags |= GTF_SPILLED;
+            tree->gtFlags &= ~GTF_SPILL;
+            gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
+            return;
+        }
+    }
+
+    genUpdateLife(tree);
+
+    // If we've produced a register, mark it as a pointer, as needed.
+    // Except in the case of a dead definition of a lclVar.
+    if (tree->gtHasReg() && (!tree->IsLocal() || (tree->gtFlags & GTF_VAR_DEATH) == 0))
+    {
+        gcInfo.gcMarkRegPtrVal(tree->gtRegNum, tree->TypeGet());
+    }
+    tree->SetInReg();
+}
+
+// transfer gc/byref status of src reg to dst reg
+void CodeGen::genTransferRegGCState(regNumber dst, regNumber src)
+{
+    regMaskTP srcMask = genRegMask(src);
+    regMaskTP dstMask = genRegMask(dst);
+
+    if (gcInfo.gcRegGCrefSetCur & srcMask)
+    {
+        gcInfo.gcMarkRegSetGCref(dstMask);
+    }
+    else if (gcInfo.gcRegByrefSetCur & srcMask)
+    {
+        gcInfo.gcMarkRegSetByref(dstMask);
+    }
+    else
+    {
+        gcInfo.gcMarkRegSetNpt(dstMask);
+    }
+}
+
+// Produce code for a GT_CALL node
+void CodeGen::genCallInstruction(GenTreePtr node)
+{
+    NYI("Call not implemented");
+}
+
+// produce code for a GT_LEA subnode
+void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
+{
+    if (lea->Base() && lea->Index())
+    {
+        regNumber baseReg  = genConsumeReg(lea->Base());
+        regNumber indexReg = genConsumeReg(lea->Index());
+        getEmitter()->emitIns_R_ARX(INS_lea, EA_BYREF, lea->gtRegNum, baseReg, indexReg, lea->gtScale, lea->gtOffset);
+    }
+    else if (lea->Base())
+    {
+        getEmitter()->emitIns_R_AR(INS_lea, EA_BYREF, lea->gtRegNum, genConsumeReg(lea->Base()), lea->gtOffset);
+    }
+
+    genProduceReg(lea);
+}
+
+// Generate code to materialize a condition into a register
+// (the condition codes must already have been appropriately set)
+
+void CodeGen::genSetRegToCond(regNumber dstReg, GenTreePtr tree)
+{
+    NYI("genSetRegToCond");
+}
+
+//------------------------------------------------------------------------
+// genIntToIntCast: Generate code for an integer cast
+//
+// Arguments:
+//    treeNode - The GT_CAST node
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    The treeNode must have an assigned register.
+//    For a signed convert from byte, the source must be in a byte-addressable register.
+//    Neither the source nor target type can be a floating point type.
+//
+void CodeGen::genIntToIntCast(GenTreePtr treeNode)
+{
+    NYI("Cast");
+}
+
+//------------------------------------------------------------------------
+// genFloatToFloatCast: Generate code for a cast between float and double
+//
+// Arguments:
+//    treeNode - The GT_CAST node
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    Cast is a non-overflow conversion.
+//    The treeNode must have an assigned register.
+//    The cast is between float and double.
+//
+void CodeGen::genFloatToFloatCast(GenTreePtr treeNode)
+{
+    NYI("Cast");
+}
+
+//------------------------------------------------------------------------
+// genIntToFloatCast: Generate code to cast an int/long to float/double
+//
+// Arguments:
+//    treeNode - The GT_CAST node
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    Cast is a non-overflow conversion.
+//    The treeNode must have an assigned register.
+//    SrcType= int32/uint32/int64/uint64 and DstType=float/double.
+//
+void CodeGen::genIntToFloatCast(GenTreePtr treeNode)
+{
+    NYI("Cast");
+}
+
+//------------------------------------------------------------------------
+// genFloatToIntCast: Generate code to cast float/double to int/long
+//
+// Arguments:
+//    treeNode - The GT_CAST node
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    Cast is a non-overflow conversion.
+//    The treeNode must have an assigned register.
+//    SrcType=float/double and DstType= int32/uint32/int64/uint64
+//
+void CodeGen::genFloatToIntCast(GenTreePtr treeNode)
+{
+    NYI("Cast");
+}
+
+/*****************************************************************************
+ *
+ *  Create and record GC Info for the function.
+ */
+#ifdef JIT32_GCENCODER
+void*
+#else
+void
+#endif
+CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr))
+{
+#ifdef JIT32_GCENCODER
+    return genCreateAndStoreGCInfoJIT32(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
+#else
+    genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr));
+#endif
+}
+
+// TODO-ARM-Cleanup: It seems that the ARM JIT (classic and otherwise) uses this method, so it seems to be
+// inappropriately named?
+
+void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr))
+{
+    IAllocator*    allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC());
+    GcInfoEncoder* gcInfoEncoder  = new (compiler, CMK_GC)
+        GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM);
+    assert(gcInfoEncoder);
+
+    // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
+    gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
+
+    // First we figure out the encoder ID's for the stack slots and registers.
+    gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS);
+    // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
+    gcInfoEncoder->FinalizeSlotIds();
+    // Now we can actually use those slot ID's to declare live ranges.
+    gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK);
+
+    gcInfoEncoder->Build();
+
+    // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
+    // let's save the values anyway for debugging purposes
+    compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
+    compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface
+}
+
+/*****************************************************************************
+ *  Emit a call to a helper function.
+ */
+
+void CodeGen::genEmitHelperCall(unsigned helper,
+                                int      argSize,
+                                emitAttr retSize
+#ifndef LEGACY_BACKEND
+                                ,
+                                regNumber callTargetReg /*= REG_NA */
+#endif                                                  // !LEGACY_BACKEND
+                                )
+{
+    NYI("Helper call");
+}
+
+/*****************************************************************************/
+#ifdef DEBUGGING_SUPPORT
+/*****************************************************************************
+ *                          genSetScopeInfo
+ *
+ * Called for every scope info piece to record by the main genSetScopeInfo()
+ */
+
+void CodeGen::genSetScopeInfo(unsigned            which,
+                              UNATIVE_OFFSET      startOffs,
+                              UNATIVE_OFFSET      length,
+                              unsigned            varNum,
+                              unsigned            LVnum,
+                              bool                avail,
+                              Compiler::siVarLoc& varLoc)
+{
+    /* We need to do some mapping while reporting back these variables */
+
+    unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
+    noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
+
+    VarName name = nullptr;
+
+#ifdef DEBUG
+
+    for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
+    {
+        if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
+        {
+            name = compiler->info.compVarScopes[scopeNum].vsdName;
+        }
+    }
+
+    // Hang on to this compiler->info.
+
+    TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which];
+
+    tlvi.tlviVarNum    = ilVarNum;
+    tlvi.tlviLVnum     = LVnum;
+    tlvi.tlviName      = name;
+    tlvi.tlviStartPC   = startOffs;
+    tlvi.tlviLength    = length;
+    tlvi.tlviAvailable = avail;
+    tlvi.tlviVarLoc    = varLoc;
+
+#endif // DEBUG
+
+    compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc);
+}
+#endif // DEBUGGING_SUPPORT
+
+#endif // _TARGET_ARM_
+
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp
new file mode 100644
index 0000000000..ca0df53a34
--- /dev/null
+++ b/src/jit/codegenarm64.cpp
@@ -0,0 +1,9723 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                        Arm64 Code Generator                               XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
+
+#ifdef _TARGET_ARM64_
+#include "emit.h"
+#include "codegen.h"
+#include "lower.h"
+#include "gcinfo.h"
+#include "gcinfoencoder.h"
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                           Prolog / Epilog                                 XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+//------------------------------------------------------------------------
+// genInstrWithConstant:   we will typically generate one instruction
+//
+//    ins  reg1, reg2, imm
+//
+// However the imm might not fit as a directly encodable immediate,
+// when it doesn't fit we generate extra instruction(s) that sets up
+// the 'regTmp' with the proper immediate value.
+//
+//     mov  regTmp, imm
+//     ins  reg1, reg2, regTmp
+//
+// Arguments:
+//    ins                 - instruction
+//    attr                - operation size and GC attribute
+//    reg1, reg2          - first and second register operands
+//    imm                 - immediate value (third operand when it fits)
+//    tmpReg              - temp register to use when the 'imm' doesn't fit
+//    inUnwindRegion      - true if we are in a prolog/epilog region with unwind codes
+//
+// Return Value:
+//    returns true if the immediate was too large and tmpReg was used and modified.
+//
+bool CodeGen::genInstrWithConstant(instruction ins,
+                                   emitAttr    attr,
+                                   regNumber   reg1,
+                                   regNumber   reg2,
+                                   ssize_t     imm,
+                                   regNumber   tmpReg,
+                                   bool        inUnwindRegion /* = false */)
+{
+    bool     immFitsInIns = false;
+    emitAttr size         = EA_SIZE(attr);
+
+    // reg1 is usually a dest register
+    // reg2 is always source register
+    assert(tmpReg != reg2); // regTmp can not match any source register
+
+    switch (ins)
+    {
+        case INS_add:
+        case INS_sub:
+            if (imm < 0)
+            {
+                imm = -imm;
+                ins = (ins == INS_add) ? INS_sub : INS_add;
+            }
+            immFitsInIns = emitter::emitIns_valid_imm_for_add(imm, size);
+            break;
+
+        case INS_strb:
+        case INS_strh:
+        case INS_str:
+            // reg1 is a source register for store instructions
+            assert(tmpReg != reg1); // regTmp can not match any source register
+            immFitsInIns = emitter::emitIns_valid_imm_for_ldst_offset(imm, size);
+            break;
+
+        case INS_ldrsb:
+        case INS_ldrsh:
+        case INS_ldrsw:
+        case INS_ldrb:
+        case INS_ldrh:
+        case INS_ldr:
+            immFitsInIns = emitter::emitIns_valid_imm_for_ldst_offset(imm, size);
+            break;
+
+        default:
+            assert(!"Unexpected instruction in genInstrWithConstant");
+            break;
+    }
+
+    if (immFitsInIns)
+    {
+        // generate a single instruction that encodes the immediate directly
+        getEmitter()->emitIns_R_R_I(ins, attr, reg1, reg2, imm);
+    }
+    else
+    {
+        // caller can specify REG_NA  for tmpReg, when it "knows" that the immediate will always fit
+        assert(tmpReg != REG_NA);
+
+        // generate two or more instructions
+
+        // first we load the immediate into tmpReg
+        instGen_Set_Reg_To_Imm(size, tmpReg, imm);
+        regTracker.rsTrackRegTrash(tmpReg);
+
+        // when we are in an unwind code region
+        // we record the extra instructions using unwindPadding()
+        if (inUnwindRegion)
+        {
+            compiler->unwindPadding();
+        }
+
+        // generate the instruction using a three register encoding with the immediate in tmpReg
+        getEmitter()->emitIns_R_R_R(ins, attr, reg1, reg2, tmpReg);
+    }
+    return immFitsInIns;
+}
+
+//------------------------------------------------------------------------
+// genStackPointerAdjustment: add a specified constant value to the stack pointer in either the prolog
+// or the epilog. The unwind codes for the generated instructions are produced. An available temporary
+// register is required to be specified, in case the constant is too large to encode in an "add"
+// instruction (or "sub" instruction if we choose to use one), such that we need to load the constant
+// into a register first, before using it.
+//
+// Arguments:
+//    spDelta                 - the value to add to SP (can be negative)
+//    tmpReg                  - an available temporary register
+//    pTmpRegIsZero           - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
+//                              Otherwise, we don't touch it.
+//
+// Return Value:
+//    None.
+
+void CodeGen::genStackPointerAdjustment(ssize_t spDelta, regNumber tmpReg, bool* pTmpRegIsZero)
+{
+    // Even though INS_add is specified here, the encoder will choose either
+    // an INS_add or an INS_sub and encode the immediate as a positive value
+    //
+    if (genInstrWithConstant(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, spDelta, tmpReg, true))
+    {
+        if (pTmpRegIsZero != nullptr)
+        {
+            *pTmpRegIsZero = false;
+        }
+    }
+
+    // spDelta is negative in the prolog, positive in the epilog, but we always tell the unwind codes the positive
+    // value.
+    ssize_t  spDeltaAbs    = abs(spDelta);
+    unsigned unwindSpDelta = (unsigned)spDeltaAbs;
+    assert((ssize_t)unwindSpDelta == spDeltaAbs); // make sure that it fits in a unsigned
+
+    compiler->unwindAllocStack(unwindSpDelta);
+}
+
+//------------------------------------------------------------------------
+// genPrologSaveRegPair: Save a pair of general-purpose or floating-point/SIMD registers in a function or funclet
+// prolog. If possible, we use pre-indexed addressing to adjust SP and store the registers with a single instruction.
+// The caller must ensure that we can use the STP instruction, and that spOffset will be in the legal range for that
+// instruction.
+//
+// Arguments:
+//    reg1                     - First register of pair to save.
+//    reg2                     - Second register of pair to save.
+//    spOffset                 - The offset from SP to store reg1 (must be positive or zero).
+//    spDelta                  - If non-zero, the amount to add to SP before the register saves (must be negative or
+//                               zero).
+//    lastSavedWasPreviousPair - True if the last prolog instruction was to save the previous register pair. This
+//                               allows us to emit the "save_next" unwind code.
+//    tmpReg                   - An available temporary register. Needed for the case of large frames.
+//    pTmpRegIsZero            - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
+//                               Otherwise, we don't touch it.
+//
+// Return Value:
+//    None.
+
+void CodeGen::genPrologSaveRegPair(regNumber reg1,
+                                   regNumber reg2,
+                                   int       spOffset,
+                                   int       spDelta,
+                                   bool      lastSavedWasPreviousPair,
+                                   regNumber tmpReg,
+                                   bool*     pTmpRegIsZero)
+{
+    assert(spOffset >= 0);
+    assert(spDelta <= 0);
+    assert((spDelta % 16) == 0);                                  // SP changes must be 16-byte aligned
+    assert(genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2)); // registers must be both general-purpose, or both
+                                                                  // FP/SIMD
+
+    bool needToSaveRegs = true;
+    if (spDelta != 0)
+    {
+        if ((spOffset == 0) && (spDelta >= -512))
+        {
+            // We can use pre-indexed addressing.
+            // stp REG, REG + 1, [SP, #spDelta]!
+            // 64-bit STP offset range: -512 to 504, multiple of 8.
+            getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, INS_OPTS_PRE_INDEX);
+            compiler->unwindSaveRegPairPreindexed(reg1, reg2, spDelta);
+
+            needToSaveRegs = false;
+        }
+        else // (spDelta < -512))
+        {
+            // We need to do SP adjustment separately from the store; we can't fold in a pre-indexed addressing and the
+            // non-zero offset.
+
+            // generate sub SP,SP,imm
+            genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero);
+        }
+    }
+
+    if (needToSaveRegs)
+    {
+        // stp REG, REG + 1, [SP, #offset]
+        // 64-bit STP offset range: -512 to 504, multiple of 8.
+        assert(spOffset <= 504);
+        getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset);
+
+        if (lastSavedWasPreviousPair)
+        {
+            // This works as long as we've only been saving pairs, in order, and we've saved the previous one just
+            // before this one.
+            compiler->unwindSaveNext();
+        }
+        else
+        {
+            compiler->unwindSaveRegPair(reg1, reg2, spOffset);
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// genPrologSaveReg: Like genPrologSaveRegPair, but for a single register. Save a single general-purpose or
+// floating-point/SIMD register in a function or funclet prolog. Note that if we wish to change SP (i.e., spDelta != 0),
+// then spOffset must be 8. This is because otherwise we would create an alignment hole above the saved register, not
+// below it, which we currently don't support. This restriction could be loosened if the callers change to handle it
+// (and this function changes to support using pre-indexed STR addressing). The caller must ensure that we can use the
+// STR instruction, and that spOffset will be in the legal range for that instruction.
+//
+// Arguments:
+//    reg1                     - Register to save.
+//    spOffset                 - The offset from SP to store reg1 (must be positive or zero).
+//    spDelta                  - If non-zero, the amount to add to SP before the register saves (must be negative or
+//                               zero).
+//    tmpReg                   - An available temporary register. Needed for the case of large frames.
+//    pTmpRegIsZero            - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
+//                               Otherwise, we don't touch it.
+//
+// Return Value:
+//    None.
+
+void CodeGen::genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero)
+{
+    assert(spOffset >= 0);
+    assert(spDelta <= 0);
+    assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned
+
+    if (spDelta != 0)
+    {
+        // generate sub SP,SP,imm
+        genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero);
+    }
+
+    // str REG, [SP, #offset]
+    // 64-bit STR offset range: 0 to 32760, multiple of 8.
+    getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
+    compiler->unwindSaveReg(reg1, spOffset);
+}
+
+//------------------------------------------------------------------------
+// genEpilogRestoreRegPair: This is the opposite of genPrologSaveRegPair(), run in the epilog instead of the prolog.
+// The stack pointer adjustment, if requested, is done after the register restore, using post-index addressing.
+// The caller must ensure that we can use the LDP instruction, and that spOffset will be in the legal range for that
+// instruction.
+//
+// Arguments:
+//    reg1                     - First register of pair to restore.
+//    reg2                     - Second register of pair to restore.
+//    spOffset                 - The offset from SP to load reg1 (must be positive or zero).
+//    spDelta                  - If non-zero, the amount to add to SP after the register restores (must be positive or
+//                               zero).
+//    tmpReg                   - An available temporary register. Needed for the case of large frames.
+//    pTmpRegIsZero            - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
+//                               Otherwise, we don't touch it.
+//
+// Return Value:
+//    None.
+
+void CodeGen::genEpilogRestoreRegPair(
+    regNumber reg1, regNumber reg2, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero)
+{
+    assert(spOffset >= 0);
+    assert(spDelta >= 0);
+    assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned
+
+    if (spDelta != 0)
+    {
+        if ((spOffset == 0) && (spDelta <= 504))
+        {
+            // Fold the SP change into this instruction.
+            // ldp reg1, reg2, [SP], #spDelta
+            getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spDelta, INS_OPTS_POST_INDEX);
+            compiler->unwindSaveRegPairPreindexed(reg1, reg2, -spDelta);
+        }
+        else // (spDelta > 504))
+        {
+            // Can't fold in the SP change; need to use a separate ADD instruction.
+
+            // ldp reg1, reg2, [SP, #offset]
+            getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset);
+            compiler->unwindSaveRegPair(reg1, reg2, spOffset);
+
+            // generate add SP,SP,imm
+            genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero);
+        }
+    }
+    else
+    {
+        // ldp reg1, reg2, [SP, #offset]
+        getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, spOffset);
+        compiler->unwindSaveRegPair(reg1, reg2, spOffset);
+    }
+}
+
+//------------------------------------------------------------------------
+// genEpilogRestoreReg: The opposite of genPrologSaveReg(), run in the epilog instead of the prolog.
+//
+// Arguments:
+//    reg1                     - Register to restore.
+//    spOffset                 - The offset from SP to restore reg1 (must be positive or zero).
+//    spDelta                  - If non-zero, the amount to add to SP after the register restores (must be positive or
+//                               zero).
+//    tmpReg                   - An available temporary register. Needed for the case of large frames.
+//    pTmpRegIsZero            - If we use tmpReg, and pTmpRegIsZero is non-null, we set *pTmpRegIsZero to 'false'.
+//                               Otherwise, we don't touch it.
+//
+// Return Value:
+//    None.
+
+void CodeGen::genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero)
+{
+    assert(spOffset >= 0);
+    assert(spDelta >= 0);
+    assert((spDelta % 16) == 0); // SP changes must be 16-byte aligned
+
+    // ldr reg1, [SP, #offset]
+    getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, reg1, REG_SPBASE, spOffset);
+    compiler->unwindSaveReg(reg1, spOffset);
+
+    if (spDelta != 0)
+    {
+        // generate add SP,SP,imm
+        genStackPointerAdjustment(spDelta, tmpReg, pTmpRegIsZero);
+    }
+}
+
+//------------------------------------------------------------------------
+// genSaveCalleeSavedRegistersHelp: Save the callee-saved registers in 'regsToSaveMask' to the stack frame
+// in the function or funclet prolog. The save set does not contain FP, since that is
+// guaranteed to be saved separately, so we can set up chaining. We can only use the instructions
+// that are allowed by the unwind codes. Integer registers are stored at lower addresses,
+// FP/SIMD registers are stored at higher addresses. There are no gaps. The caller ensures that
+// there is enough space on the frame to store these registers, and that the store instructions
+// we need to use (STR or STP) are encodable with the stack-pointer immediate offsets we need to
+// use. Note that the save set can contain LR if this is a frame without a frame pointer, in
+// which case LR is saved along with the other callee-saved registers. The caller can tell us
+// to fold in a stack pointer adjustment, which we will do with the first instruction. Note that
+// the stack pointer adjustment must be by a multiple of 16 to preserve the invariant that the
+// stack pointer is always 16 byte aligned. If we are saving an odd number of callee-saved
+// registers, though, we will have an empty aligment slot somewhere. It turns out we will put
+// it below (at a lower address) the callee-saved registers, as that is currently how we
+// do frame layout. This means that the first stack offset will be 8 and the stack pointer
+// adjustment must be done by a SUB, and not folded in to a pre-indexed store.
+//
+// Arguments:
+//    regsToSaveMask          - The mask of callee-saved registers to save. If empty, this function does nothing.
+//    lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area. Note that
+//                              if non-zero spDelta, then this is the offset of the first save *after* that
+//                              SP adjustment.
+//    spDelta                 - If non-zero, the amount to add to SP before the register saves (must be negative or
+//                              zero).
+//
+// Return Value:
+//    None.
+
+void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta)
+{
+    assert(spDelta <= 0);
+    unsigned regsToSaveCount = genCountBits(regsToSaveMask);
+    if (regsToSaveCount == 0)
+    {
+        if (spDelta != 0)
+        {
+            // Currently this is the case for varargs only
+            // whose size is MAX_REG_ARG * REGSIZE_BYTES = 64 bytes.
+            genStackPointerAdjustment(spDelta, REG_NA, nullptr);
+        }
+        return;
+    }
+
+    assert((spDelta % 16) == 0);
+    assert((regsToSaveMask & RBM_FP) == 0);                             // we never save FP here
+    assert(regsToSaveCount <= genCountBits(RBM_CALLEE_SAVED | RBM_LR)); // We also save LR, even though it is not in
+                                                                        // RBM_CALLEE_SAVED.
+
+    regMaskTP maskSaveRegsFloat = regsToSaveMask & RBM_ALLFLOAT;
+    regMaskTP maskSaveRegsInt   = regsToSaveMask & ~maskSaveRegsFloat;
+
+    int spOffset = lowestCalleeSavedOffset; // this is the offset *after* we change SP.
+
+    unsigned intRegsToSaveCount   = genCountBits(maskSaveRegsInt);
+    unsigned floatRegsToSaveCount = genCountBits(maskSaveRegsFloat);
+    bool     isPairSave           = false;
+#ifdef DEBUG
+    bool isRegsToSaveCountOdd = ((intRegsToSaveCount + floatRegsToSaveCount) % 2 != 0);
+#endif
+
+    // Save the integer registers
+
+    bool lastSavedWasPair = false;
+
+    while (maskSaveRegsInt != RBM_NONE)
+    {
+        // If this is the first store that needs to change SP (spDelta != 0),
+        // then the offset must be 8 to account for alignment for the odd count
+        // or it must be 0 for the even count.
+        assert((spDelta == 0) || (isRegsToSaveCountOdd && spOffset == REGSIZE_BYTES) ||
+               (!isRegsToSaveCountOdd && spOffset == 0));
+
+        isPairSave         = (intRegsToSaveCount >= 2);
+        regMaskTP reg1Mask = genFindLowestBit(maskSaveRegsInt);
+        regNumber reg1     = genRegNumFromMask(reg1Mask);
+        maskSaveRegsInt &= ~reg1Mask;
+        intRegsToSaveCount -= 1;
+
+        if (isPairSave)
+        {
+            // We can use a STP instruction.
+
+            regMaskTP reg2Mask = genFindLowestBit(maskSaveRegsInt);
+            regNumber reg2     = genRegNumFromMask(reg2Mask);
+            assert((reg2 == REG_NEXT(reg1)) || (reg2 == REG_LR));
+            maskSaveRegsInt &= ~reg2Mask;
+            intRegsToSaveCount -= 1;
+
+            genPrologSaveRegPair(reg1, reg2, spOffset, spDelta, lastSavedWasPair, REG_IP0, nullptr);
+
+            // TODO-ARM64-CQ: this code works in the prolog, but it's a bit weird to think about "next" when generating
+            // this epilog, to get the codes to match. Turn this off until that is better understood.
+            // lastSavedWasPair = true;
+
+            spOffset += 2 * REGSIZE_BYTES;
+        }
+        else
+        {
+            // No register pair; we use a STR instruction.
+
+            genPrologSaveReg(reg1, spOffset, spDelta, REG_IP0, nullptr);
+
+            lastSavedWasPair = false;
+            spOffset += REGSIZE_BYTES;
+        }
+
+        spDelta = 0; // We've now changed SP already, if necessary; don't do it again.
+    }
+
+    assert(intRegsToSaveCount == 0);
+
+    // Save the floating-point/SIMD registers
+
+    lastSavedWasPair = false;
+
+    while (maskSaveRegsFloat != RBM_NONE)
+    {
+        // If this is the first store that needs to change SP (spDelta != 0),
+        // then the offset must be 8 to account for alignment for the odd count
+        // or it must be 0 for the even count.
+        assert((spDelta == 0) || (isRegsToSaveCountOdd && spOffset == REGSIZE_BYTES) ||
+               (!isRegsToSaveCountOdd && spOffset == 0));
+
+        isPairSave         = (floatRegsToSaveCount >= 2);
+        regMaskTP reg1Mask = genFindLowestBit(maskSaveRegsFloat);
+        regNumber reg1     = genRegNumFromMask(reg1Mask);
+        maskSaveRegsFloat &= ~reg1Mask;
+        floatRegsToSaveCount -= 1;
+
+        if (isPairSave)
+        {
+            // We can use a STP instruction.
+
+            regMaskTP reg2Mask = genFindLowestBit(maskSaveRegsFloat);
+            regNumber reg2     = genRegNumFromMask(reg2Mask);
+            assert(reg2 == REG_NEXT(reg1));
+            maskSaveRegsFloat &= ~reg2Mask;
+            floatRegsToSaveCount -= 1;
+
+            genPrologSaveRegPair(reg1, reg2, spOffset, spDelta, lastSavedWasPair, REG_IP0, nullptr);
+
+            // TODO-ARM64-CQ: this code works in the prolog, but it's a bit weird to think about "next" when generating
+            // this epilog, to get the codes to match. Turn this off until that is better understood.
+            // lastSavedWasPair = true;
+
+            spOffset += 2 * FPSAVE_REGSIZE_BYTES;
+        }
+        else
+        {
+            // No register pair; we use a STR instruction.
+
+            genPrologSaveReg(reg1, spOffset, spDelta, REG_IP0, nullptr);
+
+            lastSavedWasPair = false;
+            spOffset += FPSAVE_REGSIZE_BYTES;
+        }
+
+        spDelta = 0; // We've now changed SP already, if necessary; don't do it again.
+    }
+
+    assert(floatRegsToSaveCount == 0);
+}
+
+//------------------------------------------------------------------------
+// genRestoreCalleeSavedRegistersHelp: Restore the callee-saved registers in 'regsToRestoreMask' from the stack frame
+// in the function or funclet epilog. This exactly reverses the actions of genSaveCalleeSavedRegistersHelp().
+//
+// Arguments:
+//    regsToRestoreMask       - The mask of callee-saved registers to restore. If empty, this function does nothing.
+//    lowestCalleeSavedOffset - The offset from SP that is the beginning of the callee-saved register area.
+//    spDelta                 - If non-zero, the amount to add to SP after the register restores (must be positive or
+//                              zero).
+//
+// Here's an example restore sequence:
+//      ldp     x27, x28, [sp,#96]
+//      ldp     x25, x26, [sp,#80]
+//      ldp     x23, x24, [sp,#64]
+//      ldp     x21, x22, [sp,#48]
+//      ldp     x19, x20, [sp,#32]
+//
+// For the case of non-zero spDelta, we assume the base of the callee-save registers to restore is at SP, and
+// the last restore adjusts SP by the specified amount. For example:
+//      ldp     x27, x28, [sp,#64]
+//      ldp     x25, x26, [sp,#48]
+//      ldp     x23, x24, [sp,#32]
+//      ldp     x21, x22, [sp,#16]
+//      ldp     x19, x20, [sp], #80
+//
+// Note you call the unwind functions specifying the prolog operation that is being un-done. So, for example, when
+// generating a post-indexed load, you call the unwind function for specifying the corresponding preindexed store.
+//
+// Return Value:
+//    None.
+
+void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta)
+{
+    assert(spDelta >= 0);
+    unsigned regsToRestoreCount = genCountBits(regsToRestoreMask);
+    if (regsToRestoreCount == 0)
+    {
+        if (spDelta != 0)
+        {
+            // Currently this is the case for varargs only
+            // whose size is MAX_REG_ARG * REGSIZE_BYTES = 64 bytes.
+            genStackPointerAdjustment(spDelta, REG_NA, nullptr);
+        }
+        return;
+    }
+
+    assert((spDelta % 16) == 0);
+    assert((regsToRestoreMask & RBM_FP) == 0); // we never restore FP here
+    assert(regsToRestoreCount <=
+           genCountBits(RBM_CALLEE_SAVED | RBM_LR)); // We also save LR, even though it is not in RBM_CALLEE_SAVED.
+
+    regMaskTP maskRestoreRegsFloat = regsToRestoreMask & RBM_ALLFLOAT;
+    regMaskTP maskRestoreRegsInt   = regsToRestoreMask & ~maskRestoreRegsFloat;
+
+    assert(REGSIZE_BYTES == FPSAVE_REGSIZE_BYTES);
+    int spOffset = lowestCalleeSavedOffset + regsToRestoreCount * REGSIZE_BYTES; // Point past the end, to start. We
+                                                                                 // predecrement to find the offset to
+                                                                                 // load from.
+
+    unsigned floatRegsToRestoreCount         = genCountBits(maskRestoreRegsFloat);
+    unsigned intRegsToRestoreCount           = genCountBits(maskRestoreRegsInt);
+    int      stackDelta                      = 0;
+    bool     isPairRestore                   = false;
+    bool     thisIsTheLastRestoreInstruction = false;
+#ifdef DEBUG
+    bool isRegsToRestoreCountOdd = ((floatRegsToRestoreCount + intRegsToRestoreCount) % 2 != 0);
+#endif
+
+    // We want to restore in the opposite order we saved, so the unwind codes match. Be careful to handle odd numbers of
+    // callee-saved registers properly.
+
+    // Restore the floating-point/SIMD registers
+
+    while (maskRestoreRegsFloat != RBM_NONE)
+    {
+        thisIsTheLastRestoreInstruction = (floatRegsToRestoreCount <= 2) && (maskRestoreRegsInt == RBM_NONE);
+        isPairRestore                   = (floatRegsToRestoreCount % 2) == 0;
+
+        // Update stack delta only if it is the last restore (the first save).
+        if (thisIsTheLastRestoreInstruction)
+        {
+            assert(stackDelta == 0);
+            stackDelta = spDelta;
+        }
+
+        // Update stack offset.
+        if (isPairRestore)
+        {
+            spOffset -= 2 * FPSAVE_REGSIZE_BYTES;
+        }
+        else
+        {
+            spOffset -= FPSAVE_REGSIZE_BYTES;
+        }
+
+        // If this is the last restore (the first save) that needs to change SP (stackDelta != 0),
+        // then the offset must be 8 to account for alignment for the odd count
+        // or it must be 0 for the even count.
+        assert((stackDelta == 0) || (isRegsToRestoreCountOdd && spOffset == FPSAVE_REGSIZE_BYTES) ||
+               (!isRegsToRestoreCountOdd && spOffset == 0));
+
+        regMaskTP reg2Mask = genFindHighestBit(maskRestoreRegsFloat);
+        regNumber reg2     = genRegNumFromMask(reg2Mask);
+        maskRestoreRegsFloat &= ~reg2Mask;
+        floatRegsToRestoreCount -= 1;
+
+        if (isPairRestore)
+        {
+            regMaskTP reg1Mask = genFindHighestBit(maskRestoreRegsFloat);
+            regNumber reg1     = genRegNumFromMask(reg1Mask);
+            maskRestoreRegsFloat &= ~reg1Mask;
+            floatRegsToRestoreCount -= 1;
+
+            genEpilogRestoreRegPair(reg1, reg2, spOffset, stackDelta, REG_IP0, nullptr);
+        }
+        else
+        {
+            genEpilogRestoreReg(reg2, spOffset, stackDelta, REG_IP0, nullptr);
+        }
+    }
+
+    assert(floatRegsToRestoreCount == 0);
+
+    // Restore the integer registers
+
+    while (maskRestoreRegsInt != RBM_NONE)
+    {
+        thisIsTheLastRestoreInstruction = (intRegsToRestoreCount <= 2);
+        isPairRestore                   = (intRegsToRestoreCount % 2) == 0;
+
+        // Update stack delta only if it is the last restore (the first save).
+        if (thisIsTheLastRestoreInstruction)
+        {
+            assert(stackDelta == 0);
+            stackDelta = spDelta;
+        }
+
+        // Update stack offset.
+        spOffset -= REGSIZE_BYTES;
+        if (isPairRestore)
+        {
+            spOffset -= REGSIZE_BYTES;
+        }
+
+        // If this is the last restore (the first save) that needs to change SP (stackDelta != 0),
+        // then the offset must be 8 to account for alignment for the odd count
+        // or it must be 0 for the even count.
+        assert((stackDelta == 0) || (isRegsToRestoreCountOdd && spOffset == REGSIZE_BYTES) ||
+               (!isRegsToRestoreCountOdd && spOffset == 0));
+
+        regMaskTP reg2Mask = genFindHighestBit(maskRestoreRegsInt);
+        regNumber reg2     = genRegNumFromMask(reg2Mask);
+        maskRestoreRegsInt &= ~reg2Mask;
+        intRegsToRestoreCount -= 1;
+
+        if (isPairRestore)
+        {
+            regMaskTP reg1Mask = genFindHighestBit(maskRestoreRegsInt);
+            regNumber reg1     = genRegNumFromMask(reg1Mask);
+            maskRestoreRegsInt &= ~reg1Mask;
+            intRegsToRestoreCount -= 1;
+
+            genEpilogRestoreRegPair(reg1, reg2, spOffset, stackDelta, REG_IP0, nullptr);
+        }
+        else
+        {
+            genEpilogRestoreReg(reg2, spOffset, stackDelta, REG_IP0, nullptr);
+        }
+    }
+
+    assert(intRegsToRestoreCount == 0);
+}
+
+// clang-format off
+/*****************************************************************************
+ *
+ *  Generates code for an EH funclet prolog.
+ *
+ *  Funclets have the following incoming arguments:
+ *
+ *      catch:          x0 = the exception object that was caught (see GT_CATCH_ARG)
+ *      filter:         x0 = the exception object to filter (see GT_CATCH_ARG), x1 = CallerSP of the containing function
+ *      finally/fault:  none
+ *
+ *  Funclets set the following registers on exit:
+ *
+ *      catch:          x0 = the address at which execution should resume (see BBJ_EHCATCHRET)
+ *      filter:         x0 = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
+ *      finally/fault:  none
+ *
+ *  The ARM64 funclet prolog sequence is one of the following (Note: #framesz is total funclet frame size,
+ *  including everything; #outsz is outgoing argument space. #framesz must be a multiple of 16):
+ *
+ *  Frame type 1:
+ *     For #outsz == 0 and #framesz <= 512:
+ *     stp fp,lr,[sp,-#framesz]!    ; establish the frame, save FP/LR
+ *     stp x19,x20,[sp,#xxx]        ; save callee-saved registers, as necessary
+ *
+ *  The funclet frame is thus:
+ *
+ *      |                       |
+ *      |-----------------------|
+ *      |       incoming        |
+ *      |       arguments       |
+ *      +=======================+ <---- Caller's SP
+ *      |Callee saved registers | // multiple of 8 bytes
+ *      |-----------------------|
+ *      |        PSP slot       | // 8 bytes
+ *      |-----------------------|
+ *      ~  alignment padding    ~ // To make the whole frame 16 byte aligned.
+ *      |-----------------------|
+ *      |      Saved FP, LR     | // 16 bytes
+ *      |-----------------------| <---- Ambient SP
+ *      |       |               |         
+ *      ~       | Stack grows   ~         
+ *      |       | downward      |         
+ *              V
+ *
+ *  Frame type 2:
+ *     For #outsz != 0 and #framesz <= 512:
+ *     sub sp,sp,#framesz           ; establish the frame
+ *     stp fp,lr,[sp,#outsz]        ; save FP/LR.
+ *     stp x19,x20,[sp,#xxx]        ; save callee-saved registers, as necessary
+ *
+ *  The funclet frame is thus:
+ *
+ *      |                       |
+ *      |-----------------------|
+ *      |       incoming        |
+ *      |       arguments       |
+ *      +=======================+ <---- Caller's SP
+ *      |Callee saved registers | // multiple of 8 bytes
+ *      |-----------------------|
+ *      |        PSP slot       | // 8 bytes
+ *      |-----------------------|
+ *      ~  alignment padding    ~ // To make the whole frame 16 byte aligned.
+ *      |-----------------------|
+ *      |      Saved FP, LR     | // 16 bytes
+ *      |-----------------------|
+ *      |   Outgoing arg space  | // multiple of 8 bytes
+ *      |-----------------------| <---- Ambient SP
+ *      |       |               |         
+ *      ~       | Stack grows   ~         
+ *      |       | downward      |         
+ *              V
+ *
+ *  Frame type 3:
+ *     For #framesz > 512:
+ *     stp fp,lr,[sp,- (#framesz - #outsz)]!    ; establish the frame, save FP/LR: note that it is guaranteed here that (#framesz - #outsz) <= 168
+ *     stp x19,x20,[sp,#xxx]                    ; save callee-saved registers, as necessary
+ *     sub sp,sp,#outsz                         ; create space for outgoing argument space
+ *
+ *  The funclet frame is thus:
+ *
+ *      |                       |
+ *      |-----------------------|
+ *      |       incoming        |
+ *      |       arguments       |
+ *      +=======================+ <---- Caller's SP
+ *      |Callee saved registers | // multiple of 8 bytes
+ *      |-----------------------|
+ *      |        PSP slot       | // 8 bytes
+ *      |-----------------------|
+ *      ~  alignment padding    ~ // To make the first SP subtraction 16 byte aligned
+ *      |-----------------------|
+ *      |      Saved FP, LR     | // 16 bytes
+ *      |-----------------------|
+ *      ~  alignment padding    ~ // To make the whole frame 16 byte aligned (specifically, to 16-byte align the outgoing argument space).
+ *      |-----------------------|
+ *      |   Outgoing arg space  | // multiple of 8 bytes
+ *      |-----------------------| <---- Ambient SP
+ *      |       |               |         
+ *      ~       | Stack grows   ~         
+ *      |       | downward      |         
+ *              V
+ *
+ * Both #1 and #2 only change SP once. That means that there will be a maximum of one alignment slot needed. For the general case, #3,
+ * it is possible that we will need to add alignment to both changes to SP, leading to 16 bytes of alignment. Remember that the stack
+ * pointer needs to be 16 byte aligned at all times. The size of the PSP slot plus callee-saved registers space is a maximum of 168 bytes:
+ * (1 PSP slot + 12 integer registers + 8 FP/SIMD registers) * 8 bytes. The outgoing argument size, however, can be very large, if we call a
+ * function that takes a large number of arguments (note that we currently use the same outgoing argument space size in the funclet as for the main
+ * function, even if the funclet doesn't have any calls, or has a much smaller, or larger, maximum number of outgoing arguments for any call).
+ * In that case, we need to 16-byte align the initial change to SP, before saving off the callee-saved registers and establishing the PSPsym,
+ * so we can use the limited immediate offset encodings we have available, before doing another 16-byte aligned SP adjustment to create the
+ * outgoing argument space. Both changes to SP might need to add alignment padding.
+ *
+ * Note that in all cases, the PSPSym is in exactly the same position with respect to Caller-SP, and that location is the same relative to Caller-SP
+ * as in the main function.
+ *
+ *     ; After this header, fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested filters.
+ *     ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet epilog.
+ *
+ *     if (this is a filter funclet)
+ *     {
+ *          // x1 on entry to a filter funclet is CallerSP of the containing function:
+ *          // either the main function, or the funclet for a handler that this filter is dynamically nested within.
+ *          // Note that a filter can be dynamically nested within a funclet even if it is not statically within
+ *          // a funclet. Consider:
+ *          //
+ *          //    try {
+ *          //        try {
+ *          //            throw new Exception();
+ *          //        } catch(Exception) {
+ *          //            throw new Exception();     // The exception thrown here ...
+ *          //        }
+ *          //    } filter {                         // ... will be processed here, while the "catch" funclet frame is still on the stack
+ *          //    } filter-handler {
+ *          //    }
+ *          //
+ *          // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the enclosing frame will
+ *          // be a funclet or main function. We won't know any time there is a filter protecting nested EH. To simplify, we just always
+ *          // create a main function PSP for any function with a filter.
+ *
+ *          ldr x1, [x1, #CallerSP_to_PSP_slot_delta]  ; Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or function)
+ *          str x1, [sp, #SP_to_PSP_slot_delta]        ; store the PSP
+ *          add fp, x1, #Function_CallerSP_to_FP_delta ; re-establish the frame pointer
+ *     }
+ *     else
+ *     {
+ *          // This is NOT a filter funclet. The VM re-establishes the frame pointer on entry.
+ *          // TODO-ARM64-CQ: if VM set x1 to CallerSP on entry, like for filters, we could save an instruction.
+ *
+ *          add x3, fp, #Function_FP_to_CallerSP_delta  ; compute the CallerSP, given the frame pointer. x3 is scratch.
+ *          str x3, [sp, #SP_to_PSP_slot_delta]         ; store the PSP
+ *     }
+ *
+ *  An example epilog sequence is then:
+ *
+ *     add sp,sp,#outsz             ; if any outgoing argument space
+ *     ...                          ; restore callee-saved registers
+ *     ldp x19,x20,[sp,#xxx]
+ *     ldp fp,lr,[sp],#framesz
+ *     ret lr
+ *
+ *  The funclet frame is thus:
+ *
+ *      |                       |
+ *      |-----------------------|
+ *      |       incoming        |
+ *      |       arguments       |
+ *      +=======================+ <---- Caller's SP
+ *      |Callee saved registers | // multiple of 8 bytes
+ *      |-----------------------|
+ *      |        PSP slot       | // 8 bytes
+ *      |-----------------------|
+ *      |      Saved FP, LR     | // 16 bytes
+ *      |-----------------------|
+ *      ~  alignment padding    ~ // To make the whole frame 16 byte aligned.
+ *      |-----------------------|
+ *      |   Outgoing arg space  | // multiple of 8 bytes
+ *      |-----------------------| <---- Ambient SP
+ *      |       |               |         
+ *      ~       | Stack grows   ~         
+ *      |       | downward      |         
+ *              V
+ */
+// clang-format on
+
+void CodeGen::genFuncletProlog(BasicBlock* block)
+{
+#ifdef DEBUG
+    if (verbose)
+        printf("*************** In genFuncletProlog()\n");
+#endif
+
+    assert(block != NULL);
+    assert(block->bbFlags && BBF_FUNCLET_BEG);
+
+    ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
+
+    gcInfo.gcResetForBB();
+
+    compiler->unwindBegProlog();
+
+    regMaskTP maskSaveRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
+    regMaskTP maskSaveRegsInt   = genFuncletInfo.fiSaveRegs & ~maskSaveRegsFloat;
+
+    // Funclets must always save LR and FP, since when we have funclets we must have an FP frame.
+    assert((maskSaveRegsInt & RBM_LR) != 0);
+    assert((maskSaveRegsInt & RBM_FP) != 0);
+
+    bool isFilter = (block->bbCatchTyp == BBCT_FILTER);
+
+    regMaskTP maskArgRegsLiveIn;
+    if (isFilter)
+    {
+        maskArgRegsLiveIn = RBM_R0 | RBM_R1;
+    }
+    else if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT))
+    {
+        maskArgRegsLiveIn = RBM_NONE;
+    }
+    else
+    {
+        maskArgRegsLiveIn = RBM_R0;
+    }
+
+    int lowestCalleeSavedOffset = genFuncletInfo.fiSP_to_CalleeSave_delta;
+
+    if (genFuncletInfo.fiFrameType == 1)
+    {
+        getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSpDelta1,
+                                      INS_OPTS_PRE_INDEX);
+        compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
+
+        assert(genFuncletInfo.fiSpDelta2 == 0);
+        assert(genFuncletInfo.fiSP_to_FPLR_save_delta == 0);
+    }
+    else if (genFuncletInfo.fiFrameType == 2)
+    {
+        // fiFrameType==2 constraints:
+        assert(genFuncletInfo.fiSpDelta1 < 0);
+        assert(genFuncletInfo.fiSpDelta1 >= -512);
+
+        // generate sub SP,SP,imm
+        genStackPointerAdjustment(genFuncletInfo.fiSpDelta1, REG_NA, nullptr);
+
+        assert(genFuncletInfo.fiSpDelta2 == 0);
+
+        getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
+                                      genFuncletInfo.fiSP_to_FPLR_save_delta);
+        compiler->unwindSaveRegPair(REG_FP, REG_LR, genFuncletInfo.fiSP_to_FPLR_save_delta);
+    }
+    else
+    {
+        assert(genFuncletInfo.fiFrameType == 3);
+        getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSpDelta1,
+                                      INS_OPTS_PRE_INDEX);
+        compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
+
+        lowestCalleeSavedOffset += genFuncletInfo.fiSpDelta2; // We haven't done the second adjustment of SP yet.
+    }
+    maskSaveRegsInt &= ~(RBM_LR | RBM_FP); // We've saved these now
+
+    genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, lowestCalleeSavedOffset, 0);
+
+    if (genFuncletInfo.fiFrameType == 3)
+    {
+        // Note that genFuncletInfo.fiSpDelta2 is always a negative value
+        assert(genFuncletInfo.fiSpDelta2 < 0);
+
+        // generate sub SP,SP,imm
+        genStackPointerAdjustment(genFuncletInfo.fiSpDelta2, REG_R2, nullptr);
+    }
+
+    // This is the end of the OS-reported prolog for purposes of unwinding
+    compiler->unwindEndProlog();
+
+    if (isFilter)
+    {
+        // This is the first block of a filter
+        // Note that register x1 = CallerSP of the containing function
+        // X1 is overwritten by the first Load (new callerSP)
+        // X2 is scratch when we have a large constant offset
+
+        // Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or function)
+        genInstrWithConstant(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_R1,
+                             genFuncletInfo.fiCallerSP_to_PSP_slot_delta, REG_R2, false);
+        regTracker.rsTrackRegTrash(REG_R1);
+
+        // Store the PSP value (aka CallerSP)
+        genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_SPBASE,
+                             genFuncletInfo.fiSP_to_PSP_slot_delta, REG_R2, false);
+
+        // re-establish the frame pointer
+        genInstrWithConstant(INS_add, EA_PTRSIZE, REG_FPBASE, REG_R1, genFuncletInfo.fiFunction_CallerSP_to_FP_delta,
+                             REG_R2, false);
+    }
+    else // This is a non-filter funclet
+    {
+        // X3 is scratch, X2 can also become scratch
+
+        // compute the CallerSP, given the frame pointer. x3 is scratch.
+        genInstrWithConstant(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE, -genFuncletInfo.fiFunction_CallerSP_to_FP_delta,
+                             REG_R2, false);
+        regTracker.rsTrackRegTrash(REG_R3);
+
+        genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R3, REG_SPBASE,
+                             genFuncletInfo.fiSP_to_PSP_slot_delta, REG_R2, false);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Generates code for an EH funclet epilog.
+ */
+
+void CodeGen::genFuncletEpilog()
+{
+#ifdef DEBUG
+    if (verbose)
+        printf("*************** In genFuncletEpilog()\n");
+#endif
+
+    ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
+
+    bool unwindStarted = false;
+
+    if (!unwindStarted)
+    {
+        // We can delay this until we know we'll generate an unwindable instruction, if necessary.
+        compiler->unwindBegEpilog();
+        unwindStarted = true;
+    }
+
+    regMaskTP maskRestoreRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
+    regMaskTP maskRestoreRegsInt   = genFuncletInfo.fiSaveRegs & ~maskRestoreRegsFloat;
+
+    // Funclets must always save LR and FP, since when we have funclets we must have an FP frame.
+    assert((maskRestoreRegsInt & RBM_LR) != 0);
+    assert((maskRestoreRegsInt & RBM_FP) != 0);
+
+    maskRestoreRegsInt &= ~(RBM_LR | RBM_FP); // We restore FP/LR at the end
+
+    int lowestCalleeSavedOffset = genFuncletInfo.fiSP_to_CalleeSave_delta;
+
+    if (genFuncletInfo.fiFrameType == 3)
+    {
+        // Note that genFuncletInfo.fiSpDelta2 is always a negative value
+        assert(genFuncletInfo.fiSpDelta2 < 0);
+
+        // generate add SP,SP,imm
+        genStackPointerAdjustment(-genFuncletInfo.fiSpDelta2, REG_R2, nullptr);
+
+        lowestCalleeSavedOffset += genFuncletInfo.fiSpDelta2;
+    }
+
+    regMaskTP regsToRestoreMask = maskRestoreRegsInt | maskRestoreRegsFloat;
+    genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, lowestCalleeSavedOffset, 0);
+
+    if (genFuncletInfo.fiFrameType == 1)
+    {
+        getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -genFuncletInfo.fiSpDelta1,
+                                      INS_OPTS_POST_INDEX);
+        compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
+
+        assert(genFuncletInfo.fiSpDelta2 == 0);
+        assert(genFuncletInfo.fiSP_to_FPLR_save_delta == 0);
+    }
+    else if (genFuncletInfo.fiFrameType == 2)
+    {
+        getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
+                                      genFuncletInfo.fiSP_to_FPLR_save_delta);
+        compiler->unwindSaveRegPair(REG_FP, REG_LR, genFuncletInfo.fiSP_to_FPLR_save_delta);
+
+        // fiFrameType==2 constraints:
+        assert(genFuncletInfo.fiSpDelta1 < 0);
+        assert(genFuncletInfo.fiSpDelta1 >= -512);
+
+        // generate add SP,SP,imm
+        genStackPointerAdjustment(-genFuncletInfo.fiSpDelta1, REG_NA, nullptr);
+
+        assert(genFuncletInfo.fiSpDelta2 == 0);
+    }
+    else
+    {
+        assert(genFuncletInfo.fiFrameType == 3);
+
+        getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -genFuncletInfo.fiSpDelta1,
+                                      INS_OPTS_POST_INDEX);
+        compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
+    }
+
+    inst_RV(INS_ret, REG_LR, TYP_I_IMPL);
+    compiler->unwindReturn(REG_LR);
+
+    compiler->unwindEndEpilog();
+}
+
+/*****************************************************************************
+ *
+ *  Capture the information used to generate the funclet prologs and epilogs.
+ *  Note that all funclet prologs are identical, and all funclet epilogs are
+ *  identical (per type: filters are identical, and non-filters are identical).
+ *  Thus, we compute the data used for these just once.
+ *
+ *  See genFuncletProlog() for more information about the prolog/epilog sequences.
+ */
+
+void CodeGen::genCaptureFuncletPrologEpilogInfo()
+{
+    if (!compiler->ehAnyFunclets())
+        return;
+
+    assert(isFramePointerUsed());
+    assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be
+                                                                          // finalized
+
+    genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta();
+
+    regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved;
+    assert((rsMaskSaveRegs & RBM_LR) != 0);
+    assert((rsMaskSaveRegs & RBM_FP) != 0);
+
+    unsigned saveRegsCount       = genCountBits(rsMaskSaveRegs);
+    unsigned saveRegsPlusPSPSize = saveRegsCount * REGSIZE_BYTES + /* PSPSym */ REGSIZE_BYTES;
+    if (compiler->info.compIsVarArgs)
+    {
+        // For varargs we always save all of the integer register arguments
+        // so that they are contiguous with the incoming stack arguments.
+        saveRegsPlusPSPSize += MAX_REG_ARG * REGSIZE_BYTES;
+    }
+    unsigned saveRegsPlusPSPSizeAligned = (unsigned)roundUp(saveRegsPlusPSPSize, STACK_ALIGN);
+
+    assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0);
+    unsigned outgoingArgSpaceAligned = (unsigned)roundUp(compiler->lvaOutgoingArgSpaceSize, STACK_ALIGN);
+
+    unsigned maxFuncletFrameSizeAligned = saveRegsPlusPSPSizeAligned + outgoingArgSpaceAligned;
+    assert((maxFuncletFrameSizeAligned % STACK_ALIGN) == 0);
+
+    int SP_to_FPLR_save_delta;
+    int SP_to_PSP_slot_delta;
+    int CallerSP_to_PSP_slot_delta;
+
+    if (maxFuncletFrameSizeAligned <= 512)
+    {
+        unsigned funcletFrameSize        = saveRegsPlusPSPSize + compiler->lvaOutgoingArgSpaceSize;
+        unsigned funcletFrameSizeAligned = (unsigned)roundUp(funcletFrameSize, STACK_ALIGN);
+        assert(funcletFrameSizeAligned <= maxFuncletFrameSizeAligned);
+
+        unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize;
+        assert((funcletFrameAlignmentPad == 0) || (funcletFrameAlignmentPad == REGSIZE_BYTES));
+
+        SP_to_FPLR_save_delta      = compiler->lvaOutgoingArgSpaceSize;
+        SP_to_PSP_slot_delta       = SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + funcletFrameAlignmentPad;
+        CallerSP_to_PSP_slot_delta = -(int)(saveRegsPlusPSPSize - 2 /* FP, LR */ * REGSIZE_BYTES);
+
+        if (compiler->lvaOutgoingArgSpaceSize == 0)
+        {
+            genFuncletInfo.fiFrameType = 1;
+        }
+        else
+        {
+            genFuncletInfo.fiFrameType = 2;
+        }
+        genFuncletInfo.fiSpDelta1 = -(int)funcletFrameSizeAligned;
+        genFuncletInfo.fiSpDelta2 = 0;
+
+        assert(genFuncletInfo.fiSpDelta1 + genFuncletInfo.fiSpDelta2 == -(int)funcletFrameSizeAligned);
+    }
+    else
+    {
+        unsigned saveRegsPlusPSPAlignmentPad = saveRegsPlusPSPSizeAligned - saveRegsPlusPSPSize;
+        assert((saveRegsPlusPSPAlignmentPad == 0) || (saveRegsPlusPSPAlignmentPad == REGSIZE_BYTES));
+
+        SP_to_FPLR_save_delta = outgoingArgSpaceAligned;
+        SP_to_PSP_slot_delta  = SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + saveRegsPlusPSPAlignmentPad;
+        CallerSP_to_PSP_slot_delta =
+            -(int)(saveRegsPlusPSPSizeAligned - 2 /* FP, LR */ * REGSIZE_BYTES - saveRegsPlusPSPAlignmentPad);
+
+        genFuncletInfo.fiFrameType = 3;
+        genFuncletInfo.fiSpDelta1  = -(int)saveRegsPlusPSPSizeAligned;
+        genFuncletInfo.fiSpDelta2  = -(int)outgoingArgSpaceAligned;
+
+        assert(genFuncletInfo.fiSpDelta1 + genFuncletInfo.fiSpDelta2 == -(int)maxFuncletFrameSizeAligned);
+    }
+
+    /* Now save it for future use */
+
+    genFuncletInfo.fiSaveRegs                   = rsMaskSaveRegs;
+    genFuncletInfo.fiSP_to_FPLR_save_delta      = SP_to_FPLR_save_delta;
+    genFuncletInfo.fiSP_to_PSP_slot_delta       = SP_to_PSP_slot_delta;
+    genFuncletInfo.fiSP_to_CalleeSave_delta     = SP_to_PSP_slot_delta + REGSIZE_BYTES;
+    genFuncletInfo.fiCallerSP_to_PSP_slot_delta = CallerSP_to_PSP_slot_delta;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\n");
+        printf("Funclet prolog / epilog info\n");
+        printf("                        Save regs: ");
+        dspRegMask(genFuncletInfo.fiSaveRegs);
+        printf("\n");
+        printf("    Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_CallerSP_to_FP_delta);
+        printf("  SP to FP/LR save location delta: %d\n", genFuncletInfo.fiSP_to_FPLR_save_delta);
+        printf("             SP to PSP slot delta: %d\n", genFuncletInfo.fiSP_to_PSP_slot_delta);
+        printf("    SP to callee-saved area delta: %d\n", genFuncletInfo.fiSP_to_CalleeSave_delta);
+        printf("      Caller SP to PSP slot delta: %d\n", genFuncletInfo.fiCallerSP_to_PSP_slot_delta);
+        printf("                       Frame type: %d\n", genFuncletInfo.fiFrameType);
+        printf("                       SP delta 1: %d\n", genFuncletInfo.fiSpDelta1);
+        printf("                       SP delta 2: %d\n", genFuncletInfo.fiSpDelta2);
+
+        if (CallerSP_to_PSP_slot_delta != compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for debugging
+        {
+            printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n",
+                   compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym));
+        }
+    }
+#endif // DEBUG
+
+    assert(genFuncletInfo.fiSP_to_FPLR_save_delta >= 0);
+    assert(genFuncletInfo.fiSP_to_PSP_slot_delta >= 0);
+    assert(genFuncletInfo.fiSP_to_CalleeSave_delta >= 0);
+    assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta <= 0);
+    assert(compiler->lvaPSPSym != BAD_VAR_NUM);
+    assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta ==
+           compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and
+                                                                         // funclet!
+}
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                           End Prolog / Epilog                             XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+// Get the register assigned to the given node
+
+regNumber CodeGenInterface::genGetAssignedReg(GenTreePtr tree)
+{
+    return tree->gtRegNum;
+}
+
+//------------------------------------------------------------------------
+// genSpillVar: Spill a local variable
+//
+// Arguments:
+//    tree      - the lclVar node for the variable being spilled
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    The lclVar must be a register candidate (lvRegCandidate)
+
+void CodeGen::genSpillVar(GenTreePtr tree)
+{
+    unsigned   varNum = tree->gtLclVarCommon.gtLclNum;
+    LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+
+    assert(varDsc->lvIsRegCandidate());
+
+    // We don't actually need to spill if it is already living in memory
+    bool needsSpill = ((tree->gtFlags & GTF_VAR_DEF) == 0 && varDsc->lvIsInReg());
+    if (needsSpill)
+    {
+        var_types lclTyp = varDsc->TypeGet();
+        if (varDsc->lvNormalizeOnStore())
+            lclTyp    = genActualType(lclTyp);
+        emitAttr size = emitTypeSize(lclTyp);
+
+        bool restoreRegVar = false;
+        if (tree->gtOper == GT_REG_VAR)
+        {
+            tree->SetOper(GT_LCL_VAR);
+            restoreRegVar = true;
+        }
+
+        // mask off the flag to generate the right spill code, then bring it back
+        tree->gtFlags &= ~GTF_REG_VAL;
+
+        instruction storeIns = ins_Store(tree->TypeGet(), compiler->isSIMDTypeLocalAligned(varNum));
+
+        assert(varDsc->lvRegNum == tree->gtRegNum);
+        inst_TT_RV(storeIns, tree, tree->gtRegNum, 0, size);
+
+        tree->gtFlags |= GTF_REG_VAL;
+
+        if (restoreRegVar)
+        {
+            tree->SetOper(GT_REG_VAR);
+        }
+
+        genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(tree));
+        gcInfo.gcMarkRegSetNpt(varDsc->lvRegMask());
+
+        if (VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex))
+        {
+#ifdef DEBUG
+            if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
+            {
+                JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum);
+            }
+            else
+            {
+                JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum);
+            }
+#endif
+            VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
+        }
+    }
+
+    tree->gtFlags &= ~GTF_SPILL;
+    varDsc->lvRegNum = REG_STK;
+    if (varTypeIsMultiReg(tree))
+    {
+        varDsc->lvOtherReg = REG_STK;
+    }
+}
+
+// inline
+void CodeGenInterface::genUpdateVarReg(LclVarDsc* varDsc, GenTreePtr tree)
+{
+    assert(tree->OperIsScalarLocal() || (tree->gtOper == GT_COPY));
+    varDsc->lvRegNum = tree->gtRegNum;
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+/*****************************************************************************
+ *
+ *  Generate code that will set the given register to the integer constant.
+ */
+
+void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags)
+{
+    // Reg cannot be a FP reg
+    assert(!genIsValidFloatReg(reg));
+
+    // The only TYP_REF constant that can come this path is a managed 'null' since it is not
+    // relocatable.  Other ref type constants (e.g. string objects) go through a different
+    // code path.
+    noway_assert(type != TYP_REF || val == 0);
+
+    instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
+}
+
+/*****************************************************************************
+ *
+ *   Generate code to check that the GS cookie wasn't thrashed by a buffer
+ *   overrun.  On ARM64 we always use REG_TMP_0 and REG_TMP_1 as temp registers
+ *   and this works fine in the case of tail calls
+ *   Implementation Note: pushReg = true, in case of tail calls.
+ */
+void CodeGen::genEmitGSCookieCheck(bool pushReg)
+{
+    noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
+
+    // Make sure that the return register is reported as live GC-ref so that any GC that kicks in while
+    // executing GS cookie check will not collect the object pointed to by REG_INTRET (R0).
+    if (!pushReg && (compiler->info.compRetType == TYP_REF))
+        gcInfo.gcRegGCrefSetCur |= RBM_INTRET;
+
+    regNumber regGSConst = REG_TMP_0;
+    regNumber regGSValue = REG_TMP_1;
+
+    if (compiler->gsGlobalSecurityCookieAddr == nullptr)
+    {
+        // load the GS cookie constant into a reg
+        //
+        genSetRegToIcon(regGSConst, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL);
+    }
+    else
+    {
+        // Ngen case - GS cookie constant needs to be accessed through an indirection.
+        instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSConst, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+        getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSConst, regGSConst, 0);
+    }
+    // Load this method's GS value from the stack frame
+    getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSValue, compiler->lvaGSSecurityCookie, 0);
+    // Compare with the GC cookie constant
+    getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regGSConst, regGSValue);
+
+    BasicBlock*  gsCheckBlk = genCreateTempLabel();
+    emitJumpKind jmpEqual   = genJumpKindForOper(GT_EQ, CK_SIGNED);
+    inst_JMP(jmpEqual, gsCheckBlk);
+    genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN);
+    genDefineTempLabel(gsCheckBlk);
+}
+
+/*****************************************************************************
+ *
+ *  Generate code for all the basic blocks in the function.
+ */
+
+void CodeGen::genCodeForBBlist()
+{
+    unsigned   varNum;
+    LclVarDsc* varDsc;
+
+    unsigned savedStkLvl;
+
+#ifdef DEBUG
+    genInterruptibleUsed = true;
+
+    // You have to be careful if you create basic blocks from now on
+    compiler->fgSafeBasicBlockCreation = false;
+
+    // This stress mode is not comptible with fully interruptible GC
+    if (genInterruptible && compiler->opts.compStackCheckOnCall)
+    {
+        compiler->opts.compStackCheckOnCall = false;
+    }
+
+    // This stress mode is not comptible with fully interruptible GC
+    if (genInterruptible && compiler->opts.compStackCheckOnRet)
+    {
+        compiler->opts.compStackCheckOnRet = false;
+    }
+#endif // DEBUG
+
+    // Prepare the blocks for exception handling codegen: mark the blocks that needs labels.
+    genPrepForEHCodegen();
+
+    assert(!compiler->fgFirstBBScratch ||
+           compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first.
+
+    /* Initialize the spill tracking logic */
+
+    regSet.rsSpillBeg();
+
+#ifdef DEBUGGING_SUPPORT
+    /* Initialize the line# tracking logic */
+
+    if (compiler->opts.compScopeInfo)
+    {
+        siInit();
+    }
+#endif
+
+    // The current implementation of switch tables requires the first block to have a label so it
+    // can generate offsets to the switch label targets.
+    // TODO-ARM64-CQ: remove this when switches have been re-implemented to not use this.
+    if (compiler->fgHasSwitch)
+    {
+        compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;
+    }
+
+    genPendingCallLabel = nullptr;
+
+    /* Initialize the pointer tracking code */
+
+    gcInfo.gcRegPtrSetInit();
+    gcInfo.gcVarPtrSetInit();
+
+    /* If any arguments live in registers, mark those regs as such */
+
+    for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+    {
+        /* Is this variable a parameter assigned to a register? */
+
+        if (!varDsc->lvIsParam || !varDsc->lvRegister)
+            continue;
+
+        /* Is the argument live on entry to the method? */
+
+        if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
+            continue;
+
+        /* Is this a floating-point argument? */
+
+        if (varDsc->IsFloatRegType())
+            continue;
+
+        noway_assert(!varTypeIsFloating(varDsc->TypeGet()));
+
+        /* Mark the register as holding the variable */
+
+        regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum);
+    }
+
+    unsigned finallyNesting = 0;
+
+    // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without
+    // allocation at the start of each basic block.
+    VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler));
+
+    /*-------------------------------------------------------------------------
+     *
+     *  Walk the basic blocks and generate code for each one
+     *
+     */
+
+    BasicBlock* block;
+    BasicBlock* lblk; /* previous block */
+
+    for (lblk = NULL, block = compiler->fgFirstBB; block != NULL; lblk = block, block = block->bbNext)
+    {
+#ifdef DEBUG
+        if (compiler->verbose)
+        {
+            printf("\n=============== Generating ");
+            block->dspBlockHeader(compiler, true, true);
+            compiler->fgDispBBLiveness(block);
+        }
+#endif // DEBUG
+
+        /* Figure out which registers hold variables on entry to this block */
+
+        regSet.ClearMaskVars();
+        gcInfo.gcRegGCrefSetCur = RBM_NONE;
+        gcInfo.gcRegByrefSetCur = RBM_NONE;
+
+        compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(block);
+
+        genUpdateLife(block->bbLiveIn);
+
+        // Even if liveness didn't change, we need to update the registers containing GC references.
+        // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't
+        // change? We cleared them out above. Maybe we should just not clear them out, but update the ones that change
+        // here. That would require handling the changes in recordVarLocationsAtStartOfBB().
+
+        regMaskTP newLiveRegSet  = RBM_NONE;
+        regMaskTP newRegGCrefSet = RBM_NONE;
+        regMaskTP newRegByrefSet = RBM_NONE;
+#ifdef DEBUG
+        VARSET_TP VARSET_INIT_NOCOPY(removedGCVars, VarSetOps::MakeEmpty(compiler));
+        VARSET_TP VARSET_INIT_NOCOPY(addedGCVars, VarSetOps::MakeEmpty(compiler));
+#endif
+        VARSET_ITER_INIT(compiler, iter, block->bbLiveIn, varIndex);
+        while (iter.NextElem(compiler, &varIndex))
+        {
+            unsigned   varNum = compiler->lvaTrackedToVarNum[varIndex];
+            LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+
+            if (varDsc->lvIsInReg())
+            {
+                newLiveRegSet |= varDsc->lvRegMask();
+                if (varDsc->lvType == TYP_REF)
+                {
+                    newRegGCrefSet |= varDsc->lvRegMask();
+                }
+                else if (varDsc->lvType == TYP_BYREF)
+                {
+                    newRegByrefSet |= varDsc->lvRegMask();
+                }
+#ifdef DEBUG
+                if (verbose && VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex))
+                {
+                    VarSetOps::AddElemD(compiler, removedGCVars, varIndex);
+                }
+#endif // DEBUG
+                VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
+            }
+            else if (compiler->lvaIsGCTracked(varDsc))
+            {
+#ifdef DEBUG
+                if (verbose && !VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex))
+                {
+                    VarSetOps::AddElemD(compiler, addedGCVars, varIndex);
+                }
+#endif // DEBUG
+                VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
+            }
+        }
+
+        regSet.rsMaskVars = newLiveRegSet;
+
+#ifdef DEBUG
+        if (compiler->verbose)
+        {
+            if (!VarSetOps::IsEmpty(compiler, addedGCVars))
+            {
+                printf("\t\t\t\t\t\t\tAdded GCVars: ");
+                dumpConvertedVarSet(compiler, addedGCVars);
+                printf("\n");
+            }
+            if (!VarSetOps::IsEmpty(compiler, removedGCVars))
+            {
+                printf("\t\t\t\t\t\t\tRemoved GCVars: ");
+                dumpConvertedVarSet(compiler, removedGCVars);
+                printf("\n");
+            }
+        }
+#endif // DEBUG
+
+        gcInfo.gcMarkRegSetGCref(newRegGCrefSet DEBUGARG(true));
+        gcInfo.gcMarkRegSetByref(newRegByrefSet DEBUGARG(true));
+
+        /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to
+           represent the exception object (TYP_REF).
+           We mark REG_EXCEPTION_OBJECT as holding a GC object on entry
+           to the block,  it will be the first thing evaluated
+           (thanks to GTF_ORDER_SIDEEFF).
+         */
+
+        if (handlerGetsXcptnObj(block->bbCatchTyp))
+        {
+            for (GenTree* node : LIR::AsRange(block))
+            {
+                if (node->OperGet() == GT_CATCH_ARG)
+                {
+                    gcInfo.gcMarkRegSetGCref(RBM_EXCEPTION_OBJECT);
+                    break;
+                }
+            }
+        }
+
+        /* Start a new code output block */
+
+        genUpdateCurrentFunclet(block);
+
+#ifdef _TARGET_XARCH_
+        if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD)
+        {
+            getEmitter()->emitLoopAlign();
+        }
+#endif
+
+#ifdef DEBUG
+        if (compiler->opts.dspCode)
+            printf("\n      L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum);
+#endif
+
+        block->bbEmitCookie = NULL;
+
+        if (block->bbFlags & (BBF_JMP_TARGET | BBF_HAS_LABEL))
+        {
+            /* Mark a label and update the current set of live GC refs */
+
+            block->bbEmitCookie = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+                                                             gcInfo.gcRegByrefSetCur, FALSE);
+        }
+
+        if (block == compiler->fgFirstColdBlock)
+        {
+#ifdef DEBUG
+            if (compiler->verbose)
+            {
+                printf("\nThis is the start of the cold region of the method\n");
+            }
+#endif
+            // We should never have a block that falls through into the Cold section
+            noway_assert(!lblk->bbFallsThrough());
+
+            // We require the block that starts the Cold section to have a label
+            noway_assert(block->bbEmitCookie);
+            getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie);
+        }
+
+        /* Both stacks are always empty on entry to a basic block */
+
+        genStackLevel = 0;
+
+        savedStkLvl = genStackLevel;
+
+        /* Tell everyone which basic block we're working on */
+
+        compiler->compCurBB = block;
+
+#ifdef DEBUGGING_SUPPORT
+        siBeginBlock(block);
+
+        // BBF_INTERNAL blocks don't correspond to any single IL instruction.
+        if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) &&
+            !compiler->fgBBisScratch(block)) // If the block is the distinguished first scratch block, then no need to
+                                             // emit a NO_MAPPING entry, immediately after the prolog.
+        {
+            genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true);
+        }
+
+        bool firstMapping = true;
+#endif // DEBUGGING_SUPPORT
+
+        /*---------------------------------------------------------------------
+         *
+         *  Generate code for each statement-tree in the block
+         *
+         */
+
+        if (block->bbFlags & BBF_FUNCLET_BEG)
+        {
+            genReserveFuncletProlog(block);
+        }
+
+        // Clear compCurStmt and compCurLifeTree.
+        compiler->compCurStmt     = nullptr;
+        compiler->compCurLifeTree = nullptr;
+
+        // Traverse the block in linear order, generating code for each node as we
+        // as we encounter it.
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUGGING_SUPPORT
+        IL_OFFSETX currentILOffset = BAD_IL_OFFSET;
+#endif
+        for (GenTree* node : LIR::AsRange(block).NonPhiNodes())
+        {
+#ifdef DEBUGGING_SUPPORT
+            // Do we have a new IL offset?
+            if (node->OperGet() == GT_IL_OFFSET)
+            {
+                genEnsureCodeEmitted(currentILOffset);
+                currentILOffset = node->gtStmt.gtStmtILoffsx;
+                genIPmappingAdd(currentILOffset, firstMapping);
+                firstMapping = false;
+            }
+#endif // DEBUGGING_SUPPORT
+
+#ifdef DEBUG
+            if (node->OperGet() == GT_IL_OFFSET)
+            {
+                noway_assert(node->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize ||
+                             node->gtStmt.gtStmtLastILoffs == BAD_IL_OFFSET);
+
+                if (compiler->opts.dspCode && compiler->opts.dspInstrs &&
+                    node->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET)
+                {
+                    while (genCurDispOffset <= node->gtStmt.gtStmtLastILoffs)
+                    {
+                        genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, ">    ");
+                    }
+                }
+            }
+#endif // DEBUG
+
+            genCodeForTreeNode(node);
+            if (node->gtHasReg() && node->gtLsraInfo.isLocalDefUse)
+            {
+                genConsumeReg(node);
+            }
+        } // end for each node in block
+
+#ifdef DEBUG
+        // The following set of register spill checks and GC pointer tracking checks used to be
+        // performed at statement boundaries. Now, with LIR, there are no statements, so they are
+        // performed at the end of each block.
+        // TODO: could these checks be performed more frequently? E.g., at each location where
+        // the register allocator says there are no live non-variable registers. Perhaps this could
+        // be done by (a) keeping a running count of live non-variable registers by using
+        // gtLsraInfo.srcCount and gtLsraInfo.dstCount to decrement and increment the count, respectively,
+        // and running the checks when the count is zero. Or, (b) use the map maintained by LSRA
+        // (operandToLocationInfoMap) to mark a node somehow when, after the execution of that node,
+        // there will be no live non-variable registers.
+
+        regSet.rsSpillChk();
+
+        /* Make sure we didn't bungle pointer register tracking */
+
+        regMaskTP ptrRegs       = gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur;
+        regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars;
+
+        // If return is a GC-type, clear it.  Note that if a common
+        // epilog is generated (genReturnBB) it has a void return
+        // even though we might return a ref.  We can't use the compRetType
+        // as the determiner because something we are tracking as a byref
+        // might be used as a return value of a int function (which is legal)
+        GenTree* blockLastNode = block->lastNode();
+        if ((blockLastNode != nullptr) && (blockLastNode->gtOper == GT_RETURN) &&
+            (varTypeIsGC(compiler->info.compRetType) ||
+             (blockLastNode->gtOp.gtOp1 != nullptr && varTypeIsGC(blockLastNode->gtOp.gtOp1->TypeGet()))))
+        {
+            nonVarPtrRegs &= ~RBM_INTRET;
+        }
+
+        if (nonVarPtrRegs)
+        {
+            printf("Regset after BB%02u gcr=", block->bbNum);
+            printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
+            compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
+            printf(", byr=");
+            printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
+            compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
+            printf(", regVars=");
+            printRegMaskInt(regSet.rsMaskVars);
+            compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars);
+            printf("\n");
+        }
+
+        noway_assert(nonVarPtrRegs == RBM_NONE);
+#endif // DEBUG
+
+#if defined(DEBUG) && defined(_TARGET_ARM64_)
+        if (block->bbNext == nullptr)
+        {
+            // Unit testing of the ARM64 emitter: generate a bunch of instructions into the last block
+            // (it's as good as any, but better than the prolog, which can only be a single instruction
+            // group) then use COMPlus_JitLateDisasm=* to see if the late disassembler
+            // thinks the instructions are the same as we do.
+            genArm64EmitterUnitTests();
+        }
+#endif // defined(DEBUG) && defined(_TARGET_ARM64_)
+
+#ifdef DEBUGGING_SUPPORT
+        // It is possible to reach the end of the block without generating code for the current IL offset.
+        // For example, if the following IR ends the current block, no code will have been generated for
+        // offset 21:
+        //
+        //          (  0,  0) [000040] ------------                il_offset void   IL offset: 21
+        //
+        //     N001 (  0,  0) [000039] ------------                nop       void
+        //
+        // This can lead to problems when debugging the generated code. To prevent these issues, make sure
+        // we've generated code for the last IL offset we saw in the block.
+        genEnsureCodeEmitted(currentILOffset);
+
+        if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
+        {
+            siEndBlock(block);
+
+            /* Is this the last block, and are there any open scopes left ? */
+
+            bool isLastBlockProcessed = (block->bbNext == NULL);
+            if (block->isBBCallAlwaysPair())
+            {
+                isLastBlockProcessed = (block->bbNext->bbNext == NULL);
+            }
+
+            if (isLastBlockProcessed && siOpenScopeList.scNext)
+            {
+                /* This assert no longer holds, because we may insert a throw
+                   block to demarcate the end of a try or finally region when they
+                   are at the end of the method.  It would be nice if we could fix
+                   our code so that this throw block will no longer be necessary. */
+
+                // noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize);
+
+                siCloseAllOpenScopes();
+            }
+        }
+
+#endif // DEBUGGING_SUPPORT
+
+        genStackLevel -= savedStkLvl;
+
+#ifdef DEBUG
+        // compCurLife should be equal to the liveOut set, except that we don't keep
+        // it up to date for vars that are not register candidates
+        // (it would be nice to have a xor set function)
+
+        VARSET_TP VARSET_INIT_NOCOPY(extraLiveVars, VarSetOps::Diff(compiler, block->bbLiveOut, compiler->compCurLife));
+        VarSetOps::UnionD(compiler, extraLiveVars, VarSetOps::Diff(compiler, compiler->compCurLife, block->bbLiveOut));
+        VARSET_ITER_INIT(compiler, extraLiveVarIter, extraLiveVars, extraLiveVarIndex);
+        while (extraLiveVarIter.NextElem(compiler, &extraLiveVarIndex))
+        {
+            unsigned   varNum = compiler->lvaTrackedToVarNum[extraLiveVarIndex];
+            LclVarDsc* varDsc = compiler->lvaTable + varNum;
+            assert(!varDsc->lvIsRegCandidate());
+        }
+#endif
+
+        /* Both stacks should always be empty on exit from a basic block */
+
+        noway_assert(genStackLevel == 0);
+
+#if 0
+        // On AMD64, we need to generate a NOP after a call that is the last instruction of the block, in several
+        // situations, to support proper exception handling semantics. This is mostly to ensure that when the stack
+        // walker computes an instruction pointer for a frame, that instruction pointer is in the correct EH region.
+        // The document "X64 and ARM ABIs.docx" has more details. The situations:
+        // 1. If the call instruction is in a different EH region as the instruction that follows it.
+        // 2. If the call immediately precedes an OS epilog. (Note that what the JIT or VM consider an epilog might
+        //    be slightly different from what the OS considers an epilog, and it is the OS-reported epilog that matters here.)
+        // We handle case #1 here, and case #2 in the emitter.
+        if (getEmitter()->emitIsLastInsCall())
+        {
+            // Ok, the last instruction generated is a call instruction. Do any of the other conditions hold?
+            // Note: we may be generating a few too many NOPs for the case of call preceding an epilog. Technically,
+            // if the next block is a BBJ_RETURN, an epilog will be generated, but there may be some instructions
+            // generated before the OS epilog starts, such as a GS cookie check.
+            if ((block->bbNext == nullptr) ||
+                !BasicBlock::sameEHRegion(block, block->bbNext))
+            {
+                // We only need the NOP if we're not going to generate any more code as part of the block end.
+
+                switch (block->bbJumpKind)
+                {
+                case BBJ_ALWAYS:
+                case BBJ_THROW:
+                case BBJ_CALLFINALLY:
+                case BBJ_EHCATCHRET:
+                    // We're going to generate more code below anyway, so no need for the NOP.
+
+                case BBJ_RETURN:
+                case BBJ_EHFINALLYRET:
+                case BBJ_EHFILTERRET:
+                    // These are the "epilog follows" case, handled in the emitter.
+
+                    break;
+
+                case BBJ_NONE:
+                    if (block->bbNext == nullptr)
+                    {
+                        // Call immediately before the end of the code; we should never get here    .
+                        instGen(INS_BREAKPOINT); // This should never get executed
+                    }
+                    else
+                    {
+                        // We need the NOP
+                        instGen(INS_nop);
+                    }
+                    break;
+
+                case BBJ_COND:
+                case BBJ_SWITCH:
+                    // These can't have a call as the last instruction!
+
+                default:
+                    noway_assert(!"Unexpected bbJumpKind");
+                    break;
+                }
+            }
+        }
+#endif // 0
+
+        /* Do we need to generate a jump or return? */
+
+        switch (block->bbJumpKind)
+        {
+            case BBJ_ALWAYS:
+                inst_JMP(EJ_jmp, block->bbJumpDest);
+                break;
+
+            case BBJ_RETURN:
+                genExitCode(block);
+                break;
+
+            case BBJ_THROW:
+                // If we have a throw at the end of a function or funclet, we need to emit another instruction
+                // afterwards to help the OS unwinder determine the correct context during unwind.
+                // We insert an unexecuted breakpoint instruction in several situations
+                // following a throw instruction:
+                // 1. If the throw is the last instruction of the function or funclet. This helps
+                //    the OS unwinder determine the correct context during an unwind from the
+                //    thrown exception.
+                // 2. If this is this is the last block of the hot section.
+                // 3. If the subsequent block is a special throw block.
+                // 4. On AMD64, if the next block is in a different EH region.
+                if ((block->bbNext == NULL) || (block->bbNext->bbFlags & BBF_FUNCLET_BEG) ||
+                    !BasicBlock::sameEHRegion(block, block->bbNext) ||
+                    (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) ||
+                    block->bbNext == compiler->fgFirstColdBlock)
+                {
+                    instGen(INS_BREAKPOINT); // This should never get executed
+                }
+
+                break;
+
+            case BBJ_CALLFINALLY:
+
+                // Generate a call to the finally, like this:
+                //      mov         x0,qword ptr [fp + 10H]         // Load x0 with PSPSym
+                //      bl          finally-funclet
+                //      b           finally-return                  // Only for non-retless finally calls
+                // The 'b' can be a NOP if we're going to the next block.
+
+                getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R0, compiler->lvaPSPSym, 0);
+                getEmitter()->emitIns_J(INS_bl_local, block->bbJumpDest);
+
+                if (block->bbFlags & BBF_RETLESS_CALL)
+                {
+                    // We have a retless call, and the last instruction generated was a call.
+                    // If the next block is in a different EH region (or is the end of the code
+                    // block), then we need to generate a breakpoint here (since it will never
+                    // get executed) to get proper unwind behavior.
+
+                    if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
+                    {
+                        instGen(INS_BREAKPOINT); // This should never get executed
+                    }
+                }
+                else
+                {
+                    // Because of the way the flowgraph is connected, the liveness info for this one instruction
+                    // after the call is not (can not be) correct in cases where a variable has a last use in the
+                    // handler.  So turn off GC reporting for this single instruction.
+                    getEmitter()->emitDisableGC();
+
+                    // Now go to where the finally funclet needs to return to.
+                    if (block->bbNext->bbJumpDest == block->bbNext->bbNext)
+                    {
+                        // Fall-through.
+                        // TODO-ARM64-CQ: Can we get rid of this instruction, and just have the call return directly
+                        // to the next instruction? This would depend on stack walking from within the finally
+                        // handler working without this instruction being in this special EH region.
+                        instGen(INS_nop);
+                    }
+                    else
+                    {
+                        inst_JMP(EJ_jmp, block->bbNext->bbJumpDest);
+                    }
+
+                    getEmitter()->emitEnableGC();
+                }
+
+                // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
+                // jump target using bbJumpDest - that is already used to point
+                // to the finally block. So just skip past the BBJ_ALWAYS unless the
+                // block is RETLESS.
+                if (!(block->bbFlags & BBF_RETLESS_CALL))
+                {
+                    assert(block->isBBCallAlwaysPair());
+
+                    lblk  = block;
+                    block = block->bbNext;
+                }
+                break;
+
+            case BBJ_EHCATCHRET:
+                // For long address (default): `adrp + add` will be emitted.
+                // For short address (proven later): `adr` will be emitted.
+                getEmitter()->emitIns_R_L(INS_adr, EA_PTRSIZE, block->bbJumpDest, REG_INTRET);
+
+                __fallthrough;
+
+            case BBJ_EHFINALLYRET:
+            case BBJ_EHFILTERRET:
+                genReserveFuncletEpilog(block);
+                break;
+
+            case BBJ_NONE:
+            case BBJ_COND:
+            case BBJ_SWITCH:
+                break;
+
+            default:
+                noway_assert(!"Unexpected bbJumpKind");
+                break;
+        }
+
+#ifdef DEBUG
+        compiler->compCurBB = 0;
+#endif
+
+    } //------------------ END-FOR each block of the method -------------------
+
+    /* Nothing is live at this point */
+    genUpdateLife(VarSetOps::MakeEmpty(compiler));
+
+    /* Finalize the spill  tracking logic */
+
+    regSet.rsSpillEnd();
+
+    /* Finalize the temp   tracking logic */
+
+    compiler->tmpEnd();
+
+#ifdef DEBUG
+    if (compiler->verbose)
+    {
+        printf("\n# ");
+        printf("compCycleEstimate = %6d, compSizeEstimate = %5d ", compiler->compCycleEstimate,
+               compiler->compSizeEstimate);
+        printf("%s\n", compiler->info.compFullName);
+    }
+#endif
+}
+
+// return the child that has the same reg as the dst (if any)
+// other child returned (out param) in 'other'
+// TODO-Cleanup: move to CodeGenCommon.cpp
+GenTree* sameRegAsDst(GenTree* tree, GenTree*& other /*out*/)
+{
+    if (tree->gtRegNum == REG_NA)
+    {
+        other = nullptr;
+        return NULL;
+    }
+
+    GenTreePtr op1 = tree->gtOp.gtOp1;
+    GenTreePtr op2 = tree->gtOp.gtOp2;
+    if (op1->gtRegNum == tree->gtRegNum)
+    {
+        other = op2;
+        return op1;
+    }
+    if (op2->gtRegNum == tree->gtRegNum)
+    {
+        other = op1;
+        return op2;
+    }
+    else
+    {
+        other = nullptr;
+        return NULL;
+    }
+}
+
+//  move an immediate value into an integer register
+
+void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags)
+{
+    // reg cannot be a FP register
+    assert(!genIsValidFloatReg(reg));
+    if (!compiler->opts.compReloc)
+    {
+        size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs
+    }
+
+    if (EA_IS_RELOC(size))
+    {
+        // This emits a pair of adrp/add (two instructions) with fix-ups.
+        getEmitter()->emitIns_R_AI(INS_adrp, size, reg, imm);
+    }
+    else if (imm == 0)
+    {
+        instGen_Set_Reg_To_Zero(size, reg, flags);
+    }
+    else
+    {
+        if (emitter::emitIns_valid_imm_for_mov(imm, size))
+        {
+            getEmitter()->emitIns_R_I(INS_mov, size, reg, imm);
+        }
+        else
+        {
+            getEmitter()->emitIns_R_I(INS_mov, size, reg, (imm & 0xffff));
+            getEmitter()->emitIns_R_I_I(INS_movk, size, reg, ((imm >> 16) & 0xffff), 16, INS_OPTS_LSL);
+
+            if ((size == EA_8BYTE) &&
+                ((imm >> 32) != 0)) // Sometimes the upper 32 bits are zero and the first mov has zero-ed them
+            {
+                getEmitter()->emitIns_R_I_I(INS_movk, EA_8BYTE, reg, ((imm >> 32) & 0xffff), 32, INS_OPTS_LSL);
+                if ((imm >> 48) != 0) // Frequently the upper 16 bits are zero and the first mov has zero-ed them
+                {
+                    getEmitter()->emitIns_R_I_I(INS_movk, EA_8BYTE, reg, ((imm >> 48) & 0xffff), 48, INS_OPTS_LSL);
+                }
+            }
+        }
+        // The caller may have requested that the flags be set on this mov (rarely/never)
+        if (flags == INS_FLAGS_SET)
+        {
+            getEmitter()->emitIns_R_I(INS_tst, size, reg, 0);
+        }
+    }
+
+    regTracker.rsTrackRegIntCns(reg, imm);
+}
+
+/***********************************************************************************
+ *
+ * Generate code to set a register 'targetReg' of type 'targetType' to the constant
+ * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call
+ * genProduceReg() on the target register.
+ */
+void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTreePtr tree)
+{
+    switch (tree->gtOper)
+    {
+        case GT_CNS_INT:
+        {
+            // relocatable values tend to come down as a CNS_INT of native int type
+            // so the line between these two opcodes is kind of blurry
+            GenTreeIntConCommon* con    = tree->AsIntConCommon();
+            ssize_t              cnsVal = con->IconValue();
+
+            bool needReloc = compiler->opts.compReloc && tree->IsIconHandle();
+            if (needReloc)
+            {
+                instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, targetReg, cnsVal);
+                regTracker.rsTrackRegTrash(targetReg);
+            }
+            else
+            {
+                genSetRegToIcon(targetReg, cnsVal, targetType);
+            }
+        }
+        break;
+
+        case GT_CNS_DBL:
+        {
+            emitter*       emit       = getEmitter();
+            emitAttr       size       = emitTypeSize(tree);
+            GenTreeDblCon* dblConst   = tree->AsDblCon();
+            double         constValue = dblConst->gtDblCon.gtDconVal;
+
+            // Make sure we use "movi reg, 0x00"  only for positive zero (0.0) and not for negative zero (-0.0)
+            if (*(__int64*)&constValue == 0)
+            {
+                // A faster/smaller way to generate 0.0
+                // We will just zero out the entire vector register for both float and double
+                emit->emitIns_R_I(INS_movi, EA_16BYTE, targetReg, 0x00, INS_OPTS_16B);
+            }
+            else if (emitter::emitIns_valid_imm_for_fmov(constValue))
+            {
+                // We can load the FP constant using the fmov FP-immediate for this constValue
+                emit->emitIns_R_F(INS_fmov, size, targetReg, constValue);
+            }
+            else
+            {
+                // Get a temp integer register to compute long address.
+                regMaskTP addrRegMask = tree->gtRsvdRegs;
+                regNumber addrReg     = genRegNumFromMask(addrRegMask);
+                noway_assert(addrReg != REG_NA);
+
+                // We must load the FP constant from the constant pool
+                // Emit a data section constant for the float or double constant.
+                CORINFO_FIELD_HANDLE hnd = emit->emitFltOrDblConst(dblConst);
+                // For long address (default): `adrp + ldr + fmov` will be emitted.
+                // For short address (proven later), `ldr` will be emitted.
+                emit->emitIns_R_C(INS_ldr, size, targetReg, addrReg, hnd, 0);
+            }
+        }
+        break;
+
+        default:
+            unreached();
+    }
+}
+
+// Generate code to get the high N bits of a N*N=2N bit multiplication result
+void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
+{
+    assert(!(treeNode->gtFlags & GTF_UNSIGNED));
+    assert(!treeNode->gtOverflowEx());
+
+#if 0
+    regNumber targetReg  = treeNode->gtRegNum;
+    var_types targetType = treeNode->TypeGet();
+    emitter *emit        = getEmitter();
+    emitAttr size        = emitTypeSize(treeNode);
+    GenTree *op1         = treeNode->gtOp.gtOp1;
+    GenTree *op2         = treeNode->gtOp.gtOp2;
+
+    // to get the high bits of the multiply, we are constrained to using the
+    // 1-op form:  RDX:RAX = RAX * rm
+    // The 3-op form (Rx=Ry*Rz) does not support it.
+
+    genConsumeOperands(treeNode->AsOp());
+
+    GenTree* regOp = op1;
+    GenTree* rmOp  = op2; 
+            
+    // Set rmOp to the contained memory operand (if any)
+    //
+    if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == targetReg)))
+    {
+        regOp = op2;
+        rmOp  = op1;
+    }
+    assert(!regOp->isContained());
+            
+    // Setup targetReg when neither of the source operands was a matching register
+    if (regOp->gtRegNum != targetReg)
+    {
+        inst_RV_RV(ins_Copy(targetType), targetReg, regOp->gtRegNum, targetType);
+    }
+            
+    emit->emitInsBinary(INS_imulEAX, size, treeNode, rmOp);
+            
+    // Move the result to the desired register, if necessary
+    if (targetReg != REG_RDX)
+    {
+        inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType);
+    }
+#else  // !0
+    NYI("genCodeForMulHi");
+#endif // !0
+}
+
+// generate code for a DIV or MOD operation
+//
+void CodeGen::genCodeForDivMod(GenTreeOp* treeNode)
+{
+    // unused on ARM64
+}
+
+// Generate code for ADD, SUB, MUL, DIV, UDIV, AND, OR and XOR
+// This method is expected to have called genConsumeOperands() before calling it.
+void CodeGen::genCodeForBinary(GenTree* treeNode)
+{
+    const genTreeOps oper       = treeNode->OperGet();
+    regNumber        targetReg  = treeNode->gtRegNum;
+    var_types        targetType = treeNode->TypeGet();
+    emitter*         emit       = getEmitter();
+
+    assert(oper == GT_ADD || oper == GT_SUB || oper == GT_MUL || oper == GT_DIV || oper == GT_UDIV || oper == GT_AND ||
+           oper == GT_OR || oper == GT_XOR);
+
+    GenTreePtr  op1 = treeNode->gtGetOp1();
+    GenTreePtr  op2 = treeNode->gtGetOp2();
+    instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
+
+    // The arithmetic node must be sitting in a register (since it's not contained)
+    noway_assert(targetReg != REG_NA);
+
+    regNumber r = emit->emitInsTernary(ins, emitTypeSize(treeNode), treeNode, op1, op2);
+    noway_assert(r == targetReg);
+
+    genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// isStructReturn: Returns whether the 'treeNode' is returning a struct.
+//
+// Arguments:
+//    treeNode - The tree node to evaluate whether is a struct return.
+//
+// Return Value:
+//    Returns true if the 'treeNode" is a GT_RETURN node of type struct.
+//    Otherwise returns false.
+//
+bool CodeGen::isStructReturn(GenTreePtr treeNode)
+{
+    // This method could be called for 'treeNode' of GT_RET_FILT or GT_RETURN.
+    // For the GT_RET_FILT, the return is always
+    // a bool or a void, for the end of a finally block.
+    noway_assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
+
+    return varTypeIsStruct(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genStructReturn: Generates code for returning a struct.
+//
+// Arguments:
+//    treeNode - The GT_RETURN tree node.
+//
+// Return Value:
+//    None
+//
+// Assumption:
+//    op1 of GT_RETURN node is either GT_LCL_VAR or multi-reg GT_CALL
+void CodeGen::genStructReturn(GenTreePtr treeNode)
+{
+    assert(treeNode->OperGet() == GT_RETURN);
+    assert(isStructReturn(treeNode));
+    GenTreePtr op1 = treeNode->gtGetOp1();
+
+    if (op1->OperGet() == GT_LCL_VAR)
+    {
+        GenTreeLclVarCommon* lclVar  = op1->AsLclVarCommon();
+        LclVarDsc*           varDsc  = &(compiler->lvaTable[lclVar->gtLclNum]);
+        var_types            lclType = genActualType(varDsc->TypeGet());
+
+        // Currently only multireg TYP_STRUCT types such as HFA's and 16-byte structs are supported
+        // In the future we could have FEATURE_SIMD types like TYP_SIMD16
+        assert(lclType == TYP_STRUCT);
+        assert(varDsc->lvIsMultiRegRet);
+
+        ReturnTypeDesc retTypeDesc;
+        unsigned       regCount;
+
+        retTypeDesc.InitializeStructReturnType(compiler, varDsc->lvVerTypeInfo.GetClassHandle());
+        regCount = retTypeDesc.GetReturnRegCount();
+
+        assert(regCount >= 2);
+        assert(op1->isContained());
+
+        // Copy var on stack into ABI return registers
+        int offset = 0;
+        for (unsigned i = 0; i < regCount; ++i)
+        {
+            var_types type = retTypeDesc.GetReturnRegType(i);
+            regNumber reg  = retTypeDesc.GetABIReturnReg(i);
+            getEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), reg, lclVar->gtLclNum, offset);
+            offset += genTypeSize(type);
+        }
+    }
+    else // op1 must be multi-reg GT_CALL
+    {
+        assert(op1->IsMultiRegCall() || op1->IsCopyOrReloadOfMultiRegCall());
+
+        genConsumeRegs(op1);
+
+        GenTree*     actualOp1 = op1->gtSkipReloadOrCopy();
+        GenTreeCall* call      = actualOp1->AsCall();
+
+        ReturnTypeDesc* pRetTypeDesc;
+        unsigned        regCount;
+        unsigned        matchingCount = 0;
+
+        pRetTypeDesc = call->GetReturnTypeDesc();
+        regCount     = pRetTypeDesc->GetReturnRegCount();
+
+        var_types regType[MAX_RET_REG_COUNT];
+        regNumber returnReg[MAX_RET_REG_COUNT];
+        regNumber allocatedReg[MAX_RET_REG_COUNT];
+        regMaskTP srcRegsMask       = 0;
+        regMaskTP dstRegsMask       = 0;
+        bool      needToShuffleRegs = false; // Set to true if we have to move any registers
+
+        for (unsigned i = 0; i < regCount; ++i)
+        {
+            regType[i]   = pRetTypeDesc->GetReturnRegType(i);
+            returnReg[i] = pRetTypeDesc->GetABIReturnReg(i);
+
+            regNumber reloadReg = REG_NA;
+            if (op1->IsCopyOrReload())
+            {
+                // GT_COPY/GT_RELOAD will have valid reg for those positions
+                // that need to be copied or reloaded.
+                reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
+            }
+
+            if (reloadReg != REG_NA)
+            {
+                allocatedReg[i] = reloadReg;
+            }
+            else
+            {
+                allocatedReg[i] = call->GetRegNumByIdx(i);
+            }
+
+            if (returnReg[i] == allocatedReg[i])
+            {
+                matchingCount++;
+            }
+            else // We need to move this value
+            {
+                // We want to move the value from allocatedReg[i] into returnReg[i]
+                // so record these two registers in the src and dst masks
+                //
+                srcRegsMask |= genRegMask(allocatedReg[i]);
+                dstRegsMask |= genRegMask(returnReg[i]);
+
+                needToShuffleRegs = true;
+            }
+        }
+
+        if (needToShuffleRegs)
+        {
+            assert(matchingCount < regCount);
+
+            unsigned  remainingRegCount = regCount - matchingCount;
+            regMaskTP extraRegMask      = treeNode->gtRsvdRegs;
+
+            while (remainingRegCount > 0)
+            {
+                // set 'available' to the 'dst' registers that are not currently holding 'src' registers
+                //
+                regMaskTP availableMask = dstRegsMask & ~srcRegsMask;
+
+                regMaskTP dstMask;
+                regNumber srcReg;
+                regNumber dstReg;
+                var_types curType   = TYP_UNKNOWN;
+                regNumber freeUpReg = REG_NA;
+
+                if (availableMask == 0)
+                {
+                    // Circular register dependencies
+                    // So just free up the lowest register in dstRegsMask by moving it to the 'extra' register
+
+                    assert(dstRegsMask == srcRegsMask);         // this has to be true for us to reach here
+                    assert(extraRegMask != 0);                  // we require an 'extra' register
+                    assert((extraRegMask & ~dstRegsMask) != 0); // it can't be part of dstRegsMask
+
+                    availableMask = extraRegMask & ~dstRegsMask;
+
+                    regMaskTP srcMask = genFindLowestBit(srcRegsMask);
+                    freeUpReg         = genRegNumFromMask(srcMask);
+                }
+
+                dstMask = genFindLowestBit(availableMask);
+                dstReg  = genRegNumFromMask(dstMask);
+                srcReg  = REG_NA;
+
+                if (freeUpReg != REG_NA)
+                {
+                    // We will free up the srcReg by moving it to dstReg which is an extra register
+                    //
+                    srcReg = freeUpReg;
+
+                    // Find the 'srcReg' and set 'curType', change allocatedReg[] to dstReg
+                    // and add the new register mask bit to srcRegsMask
+                    //
+                    for (unsigned i = 0; i < regCount; ++i)
+                    {
+                        if (allocatedReg[i] == srcReg)
+                        {
+                            curType         = regType[i];
+                            allocatedReg[i] = dstReg;
+                            srcRegsMask |= genRegMask(dstReg);
+                        }
+                    }
+                }
+                else // The normal case
+                {
+                    // Find the 'srcReg' and set 'curType'
+                    //
+                    for (unsigned i = 0; i < regCount; ++i)
+                    {
+                        if (returnReg[i] == dstReg)
+                        {
+                            srcReg  = allocatedReg[i];
+                            curType = regType[i];
+                        }
+                    }
+                    // After we perform this move we will have one less registers to setup
+                    remainingRegCount--;
+                }
+                assert(curType != TYP_UNKNOWN);
+
+                inst_RV_RV(ins_Copy(curType), dstReg, srcReg, curType);
+
+                // Clear the appropriate bits in srcRegsMask and dstRegsMask
+                srcRegsMask &= ~genRegMask(srcReg);
+                dstRegsMask &= ~genRegMask(dstReg);
+
+            } // while (remainingRegCount > 0)
+
+        } // (needToShuffleRegs)
+
+    } // op1 must be multi-reg GT_CALL
+}
+
+//------------------------------------------------------------------------
+// genReturn: Generates code for return statement.
+//            In case of struct return, delegates to the genStructReturn method.
+//
+// Arguments:
+//    treeNode - The GT_RETURN or GT_RETFILT tree node.
+//
+// Return Value:
+//    None
+//
+void CodeGen::genReturn(GenTreePtr treeNode)
+{
+    assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
+    GenTreePtr op1        = treeNode->gtGetOp1();
+    var_types  targetType = treeNode->TypeGet();
+
+#ifdef DEBUG
+    if (targetType == TYP_VOID)
+    {
+        assert(op1 == nullptr);
+    }
+#endif
+
+    if (isStructReturn(treeNode))
+    {
+        genStructReturn(treeNode);
+    }
+    else if (targetType != TYP_VOID)
+    {
+        assert(op1 != nullptr);
+        noway_assert(op1->gtRegNum != REG_NA);
+
+        genConsumeReg(op1);
+
+        regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET;
+
+        bool movRequired = (op1->gtRegNum != retReg);
+
+        if (!movRequired)
+        {
+            if (op1->OperGet() == GT_LCL_VAR)
+            {
+                GenTreeLclVarCommon* lcl            = op1->AsLclVarCommon();
+                bool                 isRegCandidate = compiler->lvaTable[lcl->gtLclNum].lvIsRegCandidate();
+                if (isRegCandidate && ((op1->gtFlags & GTF_SPILLED) == 0))
+                {
+                    assert(op1->InReg());
+
+                    // We may need to generate a zero-extending mov instruction to load the value from this GT_LCL_VAR
+
+                    unsigned   lclNum  = lcl->gtLclNum;
+                    LclVarDsc* varDsc  = &(compiler->lvaTable[lclNum]);
+                    var_types  op1Type = genActualType(op1->TypeGet());
+                    var_types  lclType = genActualType(varDsc->TypeGet());
+
+                    if (genTypeSize(op1Type) < genTypeSize(lclType))
+                    {
+                        movRequired = true;
+                    }
+                }
+            }
+        }
+
+        if (movRequired)
+        {
+            emitAttr movSize = EA_ATTR(genTypeSize(targetType));
+            getEmitter()->emitIns_R_R(INS_mov, movSize, retReg, op1->gtRegNum);
+        }
+    }
+
+#ifdef PROFILING_SUPPORTED
+    // There will be a single return block while generating profiler ELT callbacks.
+    //
+    // Reason for not materializing Leave callback as a GT_PROF_HOOK node after GT_RETURN:
+    // In flowgraph and other places assert that the last node of a block marked as
+    // GT_RETURN is either a GT_RETURN or GT_JMP or a tail call.  It would be nice to
+    // maintain such an invariant irrespective of whether profiler hook needed or not.
+    // Also, there is not much to be gained by materializing it as an explicit node.
+    if (compiler->compCurBB == compiler->genReturnBB)
+    {
+        genProfilingLeaveCallback();
+    }
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Generate code for a single node in the tree.
+ * Preconditions: All operands have been evaluated
+ *
+ */
+void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
+{
+    regNumber targetReg  = treeNode->gtRegNum;
+    var_types targetType = treeNode->TypeGet();
+    emitter*  emit       = getEmitter();
+
+#ifdef DEBUG
+    if (compiler->verbose)
+    {
+        unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio
+        printf("Generating: ");
+        compiler->gtDispTree(treeNode, nullptr, nullptr, true);
+    }
+#endif // DEBUG
+
+    // Is this a node whose value is already in a register?  LSRA denotes this by
+    // setting the GTF_REUSE_REG_VAL flag.
+    if (treeNode->IsReuseRegVal())
+    {
+        // For now, this is only used for constant nodes.
+        assert((treeNode->OperGet() == GT_CNS_INT) || (treeNode->OperGet() == GT_CNS_DBL));
+        JITDUMP("  TreeNode is marked ReuseReg\n");
+        return;
+    }
+
+    // contained nodes are part of their parents for codegen purposes
+    // ex : immediates, most LEAs
+    if (treeNode->isContained())
+    {
+        return;
+    }
+
+    switch (treeNode->gtOper)
+    {
+        case GT_START_NONGC:
+            getEmitter()->emitDisableGC();
+            break;
+
+        case GT_PROF_HOOK:
+            // We should be seeing this only if profiler hook is needed
+            noway_assert(compiler->compIsProfilerHookNeeded());
+
+#ifdef PROFILING_SUPPORTED
+            // Right now this node is used only for tail calls. In future if
+            // we intend to use it for Enter or Leave hooks, add a data member
+            // to this node indicating the kind of profiler hook. For example,
+            // helper number can be used.
+            genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
+#endif // PROFILING_SUPPORTED
+            break;
+
+        case GT_LCLHEAP:
+            genLclHeap(treeNode);
+            break;
+
+        case GT_CNS_INT:
+        case GT_CNS_DBL:
+            genSetRegToConst(targetReg, targetType, treeNode);
+            genProduceReg(treeNode);
+            break;
+
+        case GT_NOT:
+            assert(!varTypeIsFloating(targetType));
+
+            __fallthrough;
+
+        case GT_NEG:
+        {
+            instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
+
+            // The arithmetic node must be sitting in a register (since it's not contained)
+            assert(!treeNode->isContained());
+            // The dst can only be a register.
+            assert(targetReg != REG_NA);
+
+            GenTreePtr operand = treeNode->gtGetOp1();
+            assert(!operand->isContained());
+            // The src must be a register.
+            regNumber operandReg = genConsumeReg(operand);
+
+            getEmitter()->emitIns_R_R(ins, emitTypeSize(treeNode), targetReg, operandReg);
+        }
+            genProduceReg(treeNode);
+            break;
+
+        case GT_DIV:
+        case GT_UDIV:
+            genConsumeOperands(treeNode->AsOp());
+
+            if (varTypeIsFloating(targetType))
+            {
+                // Floating point divide never raises an exception
+                genCodeForBinary(treeNode);
+            }
+            else // an integer divide operation
+            {
+                GenTreePtr divisorOp = treeNode->gtGetOp2();
+                emitAttr   size      = EA_ATTR(genTypeSize(genActualType(treeNode->TypeGet())));
+
+                if (divisorOp->IsIntegralConst(0))
+                {
+                    // We unconditionally throw a divide by zero exception
+                    genJumpToThrowHlpBlk(EJ_jmp, SCK_DIV_BY_ZERO);
+
+                    // We still need to call genProduceReg
+                    genProduceReg(treeNode);
+                }
+                else // the divisor is not the constant zero
+                {
+                    regNumber divisorReg = divisorOp->gtRegNum;
+
+                    // Generate the require runtime checks for GT_DIV or GT_UDIV
+                    if (treeNode->gtOper == GT_DIV)
+                    {
+                        BasicBlock* sdivLabel = genCreateTempLabel();
+
+                        // Two possible exceptions:
+                        //     (AnyVal /  0) => DivideByZeroException
+                        //     (MinInt / -1) => ArithmeticException
+                        //
+                        bool checkDividend = true;
+
+                        // Do we have an immediate for the 'divisorOp'?
+                        //
+                        if (divisorOp->IsCnsIntOrI())
+                        {
+                            GenTreeIntConCommon* intConstTree  = divisorOp->AsIntConCommon();
+                            ssize_t              intConstValue = intConstTree->IconValue();
+                            assert(intConstValue != 0); // already checked above by IsIntegralConst(0))
+                            if (intConstValue != -1)
+                            {
+                                checkDividend = false; // We statically know that the dividend is not -1
+                            }
+                        }
+                        else // insert check for divison by zero
+                        {
+                            // Check if the divisor is zero throw a DivideByZeroException
+                            emit->emitIns_R_I(INS_cmp, size, divisorReg, 0);
+                            emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+                            genJumpToThrowHlpBlk(jmpEqual, SCK_DIV_BY_ZERO);
+                        }
+
+                        if (checkDividend)
+                        {
+                            // Check if the divisor is not -1 branch to 'sdivLabel'
+                            emit->emitIns_R_I(INS_cmp, size, divisorReg, -1);
+
+                            emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
+                            inst_JMP(jmpNotEqual, sdivLabel);
+                            // If control flow continues past here the 'divisorReg' is known to be -1
+
+                            regNumber dividendReg = treeNode->gtGetOp1()->gtRegNum;
+                            // At this point the divisor is known to be -1
+                            //
+                            // Issue the 'adds  zr, dividendReg, dividendReg' instruction
+                            // this will set both the Z and V flags only when dividendReg is MinInt
+                            //
+                            emit->emitIns_R_R_R(INS_adds, size, REG_ZR, dividendReg, dividendReg);
+                            inst_JMP(jmpNotEqual, sdivLabel);             // goto sdiv if the Z flag is clear
+                            genJumpToThrowHlpBlk(EJ_vs, SCK_ARITH_EXCPN); // if the V flags is set throw
+                                                                          // ArithmeticException
+
+                            genDefineTempLabel(sdivLabel);
+                        }
+                        genCodeForBinary(treeNode); // Generate the sdiv instruction
+                    }
+                    else // (treeNode->gtOper == GT_UDIV)
+                    {
+                        // Only one possible exception
+                        //     (AnyVal /  0) => DivideByZeroException
+                        //
+                        // Note that division by the constant 0 was already checked for above by the
+                        // op2->IsIntegralConst(0) check
+                        //
+                        if (!divisorOp->IsCnsIntOrI())
+                        {
+                            // divisorOp is not a constant, so it could be zero
+                            //
+                            emit->emitIns_R_I(INS_cmp, size, divisorReg, 0);
+                            emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+                            genJumpToThrowHlpBlk(jmpEqual, SCK_DIV_BY_ZERO);
+                        }
+                        genCodeForBinary(treeNode);
+                    }
+                }
+            }
+            break;
+
+        case GT_OR:
+        case GT_XOR:
+        case GT_AND:
+            assert(varTypeIsIntegralOrI(treeNode));
+            __fallthrough;
+        case GT_ADD:
+        case GT_SUB:
+        case GT_MUL:
+            genConsumeOperands(treeNode->AsOp());
+            genCodeForBinary(treeNode);
+            break;
+
+        case GT_LSH:
+        case GT_RSH:
+        case GT_RSZ:
+        case GT_ROR:
+            genCodeForShift(treeNode);
+            // genCodeForShift() calls genProduceReg()
+            break;
+
+        case GT_CAST:
+            if (varTypeIsFloating(targetType) && varTypeIsFloating(treeNode->gtOp.gtOp1))
+            {
+                // Casts float/double <--> double/float
+                genFloatToFloatCast(treeNode);
+            }
+            else if (varTypeIsFloating(treeNode->gtOp.gtOp1))
+            {
+                // Casts float/double --> int32/int64
+                genFloatToIntCast(treeNode);
+            }
+            else if (varTypeIsFloating(targetType))
+            {
+                // Casts int32/uint32/int64/uint64 --> float/double
+                genIntToFloatCast(treeNode);
+            }
+            else
+            {
+                // Casts int <--> int
+                genIntToIntCast(treeNode);
+            }
+            // The per-case functions call genProduceReg()
+            break;
+
+        case GT_LCL_FLD_ADDR:
+        case GT_LCL_VAR_ADDR:
+            // Address of a local var.  This by itself should never be allocated a register.
+            // If it is worth storing the address in a register then it should be cse'ed into
+            // a temp and that would be allocated a register.
+            noway_assert(targetType == TYP_BYREF);
+            noway_assert(!treeNode->InReg());
+
+            inst_RV_TT(INS_lea, targetReg, treeNode, 0, EA_BYREF);
+            genProduceReg(treeNode);
+            break;
+
+        case GT_LCL_FLD:
+        {
+            GenTreeLclVarCommon* varNode = treeNode->AsLclVarCommon();
+            assert(varNode->gtLclNum < compiler->lvaCount);
+            unsigned   varNum = varNode->gtLclNum;
+            LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+
+            if (targetType == TYP_STRUCT)
+            {
+                NYI("GT_LCL_FLD with TYP_STRUCT");
+            }
+            emitAttr size = emitTypeSize(targetType);
+
+            noway_assert(targetType != TYP_STRUCT);
+            noway_assert(targetReg != REG_NA);
+
+            unsigned offset = treeNode->gtLclFld.gtLclOffs;
+
+            if (varTypeIsFloating(targetType))
+            {
+                if (treeNode->InReg())
+                {
+                    NYI("GT_LCL_FLD with register to register Floating point move");
+                }
+                else
+                {
+                    emit->emitIns_R_S(ins_Load(targetType), size, targetReg, varNum, offset);
+                }
+            }
+            else
+            {
+                size = EA_SET_SIZE(size, EA_8BYTE);
+                emit->emitIns_R_S(ins_Move_Extend(targetType, treeNode->InReg()), size, targetReg, varNum, offset);
+            }
+            genProduceReg(treeNode);
+        }
+        break;
+
+        case GT_LCL_VAR:
+        {
+            GenTreeLclVarCommon* varNode = treeNode->AsLclVarCommon();
+
+            unsigned varNum = varNode->gtLclNum;
+            assert(varNum < compiler->lvaCount);
+            LclVarDsc* varDsc         = &(compiler->lvaTable[varNum]);
+            bool       isRegCandidate = varDsc->lvIsRegCandidate();
+
+            // lcl_vars are not defs
+            assert((treeNode->gtFlags & GTF_VAR_DEF) == 0);
+
+            if (isRegCandidate && !(treeNode->gtFlags & GTF_VAR_DEATH))
+            {
+                assert((treeNode->InReg()) || (treeNode->gtFlags & GTF_SPILLED));
+            }
+
+            // If this is a register candidate that has been spilled, genConsumeReg() will
+            // reload it at the point of use.  Otherwise, if it's not in a register, we load it here.
+
+            if (!treeNode->InReg() && !(treeNode->gtFlags & GTF_SPILLED))
+            {
+                assert(!isRegCandidate);
+
+                // targetType must be a normal scalar type and not a TYP_STRUCT
+                assert(targetType != TYP_STRUCT);
+
+                instruction ins  = ins_Load(targetType);
+                emitAttr    attr = emitTypeSize(targetType);
+
+                attr = emit->emitInsAdjustLoadStoreAttr(ins, attr);
+
+                emit->emitIns_R_S(ins, attr, targetReg, varNum, 0);
+                genProduceReg(treeNode);
+            }
+        }
+        break;
+
+        case GT_STORE_LCL_FLD:
+        {
+            noway_assert(targetType != TYP_STRUCT);
+
+            // record the offset
+            unsigned offset = treeNode->gtLclFld.gtLclOffs;
+
+            // We must have a stack store with GT_STORE_LCL_FLD
+            noway_assert(!treeNode->InReg());
+            noway_assert(targetReg == REG_NA);
+
+            GenTreeLclVarCommon* varNode = treeNode->AsLclVarCommon();
+            unsigned             varNum  = varNode->gtLclNum;
+            assert(varNum < compiler->lvaCount);
+            LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+
+            // Ensure that lclVar nodes are typed correctly.
+            assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet()));
+
+            GenTreePtr data = treeNode->gtOp.gtOp1->gtEffectiveVal();
+            genConsumeRegs(data);
+
+            regNumber dataReg = REG_NA;
+            if (data->isContainedIntOrIImmed())
+            {
+                assert(data->IsIntegralConst(0));
+                dataReg = REG_ZR;
+            }
+            else
+            {
+                assert(!data->isContained());
+                genConsumeReg(data);
+                dataReg = data->gtRegNum;
+            }
+            assert(dataReg != REG_NA);
+
+            instruction ins = ins_Store(targetType);
+
+            emitAttr attr = emitTypeSize(targetType);
+
+            attr = emit->emitInsAdjustLoadStoreAttr(ins, attr);
+
+            emit->emitIns_S_R(ins, attr, dataReg, varNum, offset);
+
+            genUpdateLife(varNode);
+
+            varDsc->lvRegNum = REG_STK;
+        }
+        break;
+
+        case GT_STORE_LCL_VAR:
+        {
+            GenTreeLclVarCommon* varNode = treeNode->AsLclVarCommon();
+
+            unsigned varNum = varNode->gtLclNum;
+            assert(varNum < compiler->lvaCount);
+            LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+            unsigned   offset = 0;
+
+            // Ensure that lclVar nodes are typed correctly.
+            assert(!varDsc->lvNormalizeOnStore() || targetType == genActualType(varDsc->TypeGet()));
+
+            GenTreePtr data = treeNode->gtOp.gtOp1->gtEffectiveVal();
+
+            // var = call, where call returns a multi-reg return value
+            // case is handled separately.
+            if (data->gtSkipReloadOrCopy()->IsMultiRegCall())
+            {
+                genMultiRegCallStoreToLocal(treeNode);
+            }
+            else
+            {
+                genConsumeRegs(data);
+
+                regNumber dataReg = REG_NA;
+                if (data->isContainedIntOrIImmed())
+                {
+                    assert(data->IsIntegralConst(0));
+                    dataReg = REG_ZR;
+                }
+                else
+                {
+                    assert(!data->isContained());
+                    genConsumeReg(data);
+                    dataReg = data->gtRegNum;
+                }
+                assert(dataReg != REG_NA);
+
+                if (targetReg == REG_NA) // store into stack based LclVar
+                {
+                    inst_set_SV_var(varNode);
+
+                    instruction ins  = ins_Store(targetType);
+                    emitAttr    attr = emitTypeSize(targetType);
+
+                    attr = emit->emitInsAdjustLoadStoreAttr(ins, attr);
+
+                    emit->emitIns_S_R(ins, attr, dataReg, varNum, offset);
+
+                    genUpdateLife(varNode);
+
+                    varDsc->lvRegNum = REG_STK;
+                }
+                else // store into register (i.e move into register)
+                {
+                    if (dataReg != targetReg)
+                    {
+                        // Assign into targetReg when dataReg (from op1) is not the same register
+                        inst_RV_RV(ins_Copy(targetType), targetReg, dataReg, targetType);
+                    }
+                    genProduceReg(treeNode);
+                }
+            }
+        }
+        break;
+
+        case GT_RETFILT:
+            // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in
+            // the return register, if it's not already there. The processing is the same as GT_RETURN.
+            if (targetType != TYP_VOID)
+            {
+                // For filters, the IL spec says the result is type int32. Further, the only specified legal values
+                // are 0 or 1, with the use of other values "undefined".
+                assert(targetType == TYP_INT);
+            }
+
+            __fallthrough;
+
+        case GT_RETURN:
+            genReturn(treeNode);
+            break;
+
+        case GT_LEA:
+        {
+            // if we are here, it is the case where there is an LEA that cannot
+            // be folded into a parent instruction
+            GenTreeAddrMode* lea = treeNode->AsAddrMode();
+            genLeaInstruction(lea);
+        }
+        // genLeaInstruction calls genProduceReg()
+        break;
+
+        case GT_IND:
+            genConsumeAddress(treeNode->AsIndir()->Addr());
+            emit->emitInsLoadStoreOp(ins_Load(targetType), emitTypeSize(treeNode), targetReg, treeNode->AsIndir());
+            genProduceReg(treeNode);
+            break;
+
+        case GT_MULHI:
+            genCodeForMulHi(treeNode->AsOp());
+            genProduceReg(treeNode);
+            break;
+
+        case GT_MOD:
+        case GT_UMOD:
+            // Integer MOD should have been morphed into a sequence of sub, mul, div in fgMorph.
+            //
+            // We shouldn't be seeing GT_MOD on float/double as it is morphed into a helper call by front-end.
+            noway_assert(!"Codegen for GT_MOD/GT_UMOD");
+            break;
+
+        case GT_INTRINSIC:
+            genIntrinsic(treeNode);
+            break;
+
+#ifdef FEATURE_SIMD
+        case GT_SIMD:
+            genSIMDIntrinsic(treeNode->AsSIMD());
+            break;
+#endif // FEATURE_SIMD
+
+        case GT_CKFINITE:
+            genCkfinite(treeNode);
+            break;
+
+        case GT_EQ:
+        case GT_NE:
+        case GT_LT:
+        case GT_LE:
+        case GT_GE:
+        case GT_GT:
+        {
+            // TODO-ARM64-CQ: Check if we can use the currently set flags.
+            // TODO-ARM64-CQ: Check for the case where we can simply transfer the carry bit to a register
+            //         (signed < or >= where targetReg != REG_NA)
+
+            GenTreeOp* tree    = treeNode->AsOp();
+            GenTreePtr op1     = tree->gtOp1;
+            GenTreePtr op2     = tree->gtOp2;
+            var_types  op1Type = op1->TypeGet();
+            var_types  op2Type = op2->TypeGet();
+
+            assert(!op1->isContainedMemoryOp());
+            assert(!op2->isContainedMemoryOp());
+
+            genConsumeOperands(tree);
+
+            emitAttr cmpSize = EA_UNKNOWN;
+
+            if (varTypeIsFloating(op1Type))
+            {
+                assert(varTypeIsFloating(op2Type));
+                assert(!op1->isContained());
+                assert(op1Type == op2Type);
+                cmpSize = EA_ATTR(genTypeSize(op1Type));
+
+                if (op2->IsIntegralConst(0))
+                {
+                    emit->emitIns_R_F(INS_fcmp, cmpSize, op1->gtRegNum, 0.0);
+                }
+                else
+                {
+                    assert(!op2->isContained());
+                    emit->emitIns_R_R(INS_fcmp, cmpSize, op1->gtRegNum, op2->gtRegNum);
+                }
+            }
+            else
+            {
+                assert(!varTypeIsFloating(op2Type));
+                // We don't support swapping op1 and op2 to generate cmp reg, imm
+                assert(!op1->isContainedIntOrIImmed());
+
+                // TODO-ARM64-CQ: the second register argument of a CMP can be sign/zero
+                // extended as part of the instruction (using "CMP (extended register)").
+                // We should use that if possible, swapping operands
+                // (and reversing the condition) if necessary.
+                unsigned op1Size = genTypeSize(op1Type);
+                unsigned op2Size = genTypeSize(op2Type);
+
+                if ((op1Size < 4) || (op1Size < op2Size))
+                {
+                    // We need to sign/zero extend op1 up to 32 or 64 bits.
+                    instruction ins = ins_Move_Extend(op1Type, true);
+                    inst_RV_RV(ins, op1->gtRegNum, op1->gtRegNum);
+                }
+
+                if (!op2->isContainedIntOrIImmed())
+                {
+                    if ((op2Size < 4) || (op2Size < op1Size))
+                    {
+                        // We need to sign/zero extend op2 up to 32 or 64 bits.
+                        instruction ins = ins_Move_Extend(op2Type, true);
+                        inst_RV_RV(ins, op2->gtRegNum, op2->gtRegNum);
+                    }
+                }
+                cmpSize = EA_4BYTE;
+                if ((op1Size == EA_8BYTE) || (op2Size == EA_8BYTE))
+                {
+                    cmpSize = EA_8BYTE;
+                }
+
+                if (op2->isContainedIntOrIImmed())
+                {
+                    GenTreeIntConCommon* intConst = op2->AsIntConCommon();
+                    emit->emitIns_R_I(INS_cmp, cmpSize, op1->gtRegNum, intConst->IconValue());
+                }
+                else
+                {
+                    emit->emitIns_R_R(INS_cmp, cmpSize, op1->gtRegNum, op2->gtRegNum);
+                }
+            }
+
+            // Are we evaluating this into a register?
+            if (targetReg != REG_NA)
+            {
+                genSetRegToCond(targetReg, tree);
+                genProduceReg(tree);
+            }
+        }
+        break;
+
+        case GT_JTRUE:
+        {
+            GenTree* cmp = treeNode->gtOp.gtOp1->gtEffectiveVal();
+            assert(cmp->OperIsCompare());
+            assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
+
+            // Get the "kind" and type of the comparison.  Note that whether it is an unsigned cmp
+            // is governed by a flag NOT by the inherent type of the node
+            emitJumpKind jumpKind[2];
+            bool         branchToTrueLabel[2];
+            genJumpKindsForTree(cmp, jumpKind, branchToTrueLabel);
+            assert(jumpKind[0] != EJ_NONE);
+
+            // On Arm64 the branches will always branch to the true label
+            assert(branchToTrueLabel[0]);
+            inst_JMP(jumpKind[0], compiler->compCurBB->bbJumpDest);
+
+            if (jumpKind[1] != EJ_NONE)
+            {
+                // the second conditional branch always has to be to the true label
+                assert(branchToTrueLabel[1]);
+                inst_JMP(jumpKind[1], compiler->compCurBB->bbJumpDest);
+            }
+        }
+        break;
+
+        case GT_RETURNTRAP:
+        {
+            // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC
+            // based on the contents of 'data'
+
+            GenTree* data = treeNode->gtOp.gtOp1;
+            genConsumeRegs(data);
+            emit->emitIns_R_I(INS_cmp, EA_4BYTE, data->gtRegNum, 0);
+
+            BasicBlock* skipLabel = genCreateTempLabel();
+
+            emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+            inst_JMP(jmpEqual, skipLabel);
+            // emit the call to the EE-helper that stops for GC (or other reasons)
+
+            genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN);
+            genDefineTempLabel(skipLabel);
+        }
+        break;
+
+        case GT_STOREIND:
+        {
+            GenTree*                 data             = treeNode->gtOp.gtOp2;
+            GenTree*                 addr             = treeNode->gtOp.gtOp1;
+            GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(treeNode, data);
+            if (writeBarrierForm != GCInfo::WBF_NoBarrier)
+            {
+                // data and addr must be in registers.
+                // Consume both registers so that any copies of interfering
+                // registers are taken care of.
+                genConsumeOperands(treeNode->AsOp());
+
+#if NOGC_WRITE_BARRIERS
+                // At this point, we should not have any interference.
+                // That is, 'data' must not be in REG_WRITE_BARRIER_DST_BYREF,
+                //  as that is where 'addr' must go.
+                noway_assert(data->gtRegNum != REG_WRITE_BARRIER_DST_BYREF);
+
+                // 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF)
+                if (addr->gtRegNum != REG_WRITE_BARRIER_DST_BYREF)
+                {
+                    inst_RV_RV(INS_mov, REG_WRITE_BARRIER_DST_BYREF, addr->gtRegNum, addr->TypeGet());
+                }
+
+                // 'data'  goes into x15 (REG_WRITE_BARRIER)
+                if (data->gtRegNum != REG_WRITE_BARRIER)
+                {
+                    inst_RV_RV(INS_mov, REG_WRITE_BARRIER, data->gtRegNum, data->TypeGet());
+                }
+#else
+                // At this point, we should not have any interference.
+                // That is, 'data' must not be in REG_ARG_0,
+                //  as that is where 'addr' must go.
+                noway_assert(data->gtRegNum != REG_ARG_0);
+
+                // addr goes in REG_ARG_0
+                if (addr->gtRegNum != REG_ARG_0)
+                {
+                    inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet());
+                }
+
+                // data goes in REG_ARG_1
+                if (data->gtRegNum != REG_ARG_1)
+                {
+                    inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet());
+                }
+#endif // NOGC_WRITE_BARRIERS
+
+                genGCWriteBarrier(treeNode, writeBarrierForm);
+            }
+            else // A normal store, not a WriteBarrier store
+            {
+                bool     reverseOps  = ((treeNode->gtFlags & GTF_REVERSE_OPS) != 0);
+                bool     dataIsUnary = false;
+                GenTree* nonRMWsrc   = nullptr;
+                // We must consume the operands in the proper execution order,
+                // so that liveness is updated appropriately.
+                if (!reverseOps)
+                {
+                    genConsumeAddress(addr);
+                }
+
+                if (!data->isContained())
+                {
+                    genConsumeRegs(data);
+                }
+
+                if (reverseOps)
+                {
+                    genConsumeAddress(addr);
+                }
+
+                regNumber dataReg = REG_NA;
+                if (data->isContainedIntOrIImmed())
+                {
+                    assert(data->IsIntegralConst(0));
+                    dataReg = REG_ZR;
+                }
+                else // data is not contained, so evaluate it into a register
+                {
+                    assert(!data->isContained());
+                    dataReg = data->gtRegNum;
+                }
+
+                emit->emitInsLoadStoreOp(ins_Store(targetType), emitTypeSize(treeNode), dataReg, treeNode->AsIndir());
+            }
+        }
+        break;
+
+        case GT_COPY:
+            // This is handled at the time we call genConsumeReg() on the GT_COPY
+            break;
+
+        case GT_SWAP:
+        {
+            // Swap is only supported for lclVar operands that are enregistered
+            // We do not consume or produce any registers.  Both operands remain enregistered.
+            // However, the gc-ness may change.
+            assert(genIsRegCandidateLocal(treeNode->gtOp.gtOp1) && genIsRegCandidateLocal(treeNode->gtOp.gtOp2));
+
+            GenTreeLclVarCommon* lcl1    = treeNode->gtOp.gtOp1->AsLclVarCommon();
+            LclVarDsc*           varDsc1 = &(compiler->lvaTable[lcl1->gtLclNum]);
+            var_types            type1   = varDsc1->TypeGet();
+            GenTreeLclVarCommon* lcl2    = treeNode->gtOp.gtOp2->AsLclVarCommon();
+            LclVarDsc*           varDsc2 = &(compiler->lvaTable[lcl2->gtLclNum]);
+            var_types            type2   = varDsc2->TypeGet();
+
+            // We must have both int or both fp regs
+            assert(!varTypeIsFloating(type1) || varTypeIsFloating(type2));
+
+            // FP swap is not yet implemented (and should have NYI'd in LSRA)
+            assert(!varTypeIsFloating(type1));
+
+            regNumber oldOp1Reg     = lcl1->gtRegNum;
+            regMaskTP oldOp1RegMask = genRegMask(oldOp1Reg);
+            regNumber oldOp2Reg     = lcl2->gtRegNum;
+            regMaskTP oldOp2RegMask = genRegMask(oldOp2Reg);
+
+            // We don't call genUpdateVarReg because we don't have a tree node with the new register.
+            varDsc1->lvRegNum = oldOp2Reg;
+            varDsc2->lvRegNum = oldOp1Reg;
+
+            // Do the xchg
+            emitAttr size = EA_PTRSIZE;
+            if (varTypeGCtype(type1) != varTypeGCtype(type2))
+            {
+                // If the type specified to the emitter is a GC type, it will swap the GC-ness of the registers.
+                // Otherwise it will leave them alone, which is correct if they have the same GC-ness.
+                size = EA_GCREF;
+            }
+
+            NYI("register swap");
+            // inst_RV_RV(INS_xchg, oldOp1Reg, oldOp2Reg, TYP_I_IMPL, size);
+
+            // Update the gcInfo.
+            // Manually remove these regs for the gc sets (mostly to avoid confusing duplicative dump output)
+            gcInfo.gcRegByrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask);
+            gcInfo.gcRegGCrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask);
+
+            // gcMarkRegPtrVal will do the appropriate thing for non-gc types.
+            // It will also dump the updates.
+            gcInfo.gcMarkRegPtrVal(oldOp2Reg, type1);
+            gcInfo.gcMarkRegPtrVal(oldOp1Reg, type2);
+        }
+        break;
+
+        case GT_LIST:
+        case GT_ARGPLACE:
+            // Nothing to do
+            break;
+
+        case GT_PUTARG_STK:
+            genPutArgStk(treeNode);
+            break;
+
+        case GT_PUTARG_REG:
+            assert(targetType != TYP_STRUCT); // Any TYP_STRUCT register args should have been removed by
+                                              // fgMorphMultiregStructArg
+            // We have a normal non-Struct targetType
+            {
+                GenTree* op1 = treeNode->gtOp.gtOp1;
+                // If child node is not already in the register we need, move it
+                genConsumeReg(op1);
+                if (targetReg != op1->gtRegNum)
+                {
+                    inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
+                }
+            }
+            genProduceReg(treeNode);
+            break;
+
+        case GT_CALL:
+            genCallInstruction(treeNode);
+            break;
+
+        case GT_JMP:
+            genJmpMethod(treeNode);
+            break;
+
+        case GT_LOCKADD:
+        case GT_XCHG:
+        case GT_XADD:
+            genLockedInstructions(treeNode);
+            break;
+
+        case GT_MEMORYBARRIER:
+            instGen_MemoryBarrier();
+            break;
+
+        case GT_CMPXCHG:
+            NYI("GT_CMPXCHG");
+            break;
+
+        case GT_RELOAD:
+            // do nothing - reload is just a marker.
+            // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child
+            // into the register specified in this node.
+            break;
+
+        case GT_NOP:
+            break;
+
+        case GT_NO_OP:
+            if (treeNode->gtFlags & GTF_NO_OP_NO)
+            {
+                noway_assert(!"GTF_NO_OP_NO should not be set");
+            }
+            else
+            {
+                instGen(INS_nop);
+            }
+            break;
+
+        case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+        case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+            genRangeCheck(treeNode);
+            break;
+
+        case GT_PHYSREG:
+            if (targetReg != treeNode->AsPhysReg()->gtSrcReg)
+            {
+                inst_RV_RV(ins_Copy(targetType), targetReg, treeNode->AsPhysReg()->gtSrcReg, targetType);
+
+                genTransferRegGCState(targetReg, treeNode->AsPhysReg()->gtSrcReg);
+            }
+            genProduceReg(treeNode);
+            break;
+
+        case GT_PHYSREGDST:
+            break;
+
+        case GT_NULLCHECK:
+        {
+            assert(!treeNode->gtOp.gtOp1->isContained());
+            regNumber reg = genConsumeReg(treeNode->gtOp.gtOp1);
+            emit->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, reg, 0);
+        }
+        break;
+
+        case GT_CATCH_ARG:
+
+            noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
+
+            /* Catch arguments get passed in a register. genCodeForBBlist()
+               would have marked it as holding a GC object, but not used. */
+
+            noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
+            genConsumeReg(treeNode);
+            break;
+
+        case GT_PINVOKE_PROLOG:
+            noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0);
+
+            // the runtime side requires the codegen here to be consistent
+            emit->emitDisableRandomNops();
+            break;
+
+        case GT_LABEL:
+            genPendingCallLabel       = genCreateTempLabel();
+            treeNode->gtLabel.gtLabBB = genPendingCallLabel;
+
+            // For long address (default): `adrp + add` will be emitted.
+            // For short address (proven later): `adr` will be emitted.
+            emit->emitIns_R_L(INS_adr, EA_PTRSIZE, genPendingCallLabel, targetReg);
+            break;
+
+        case GT_STORE_OBJ:
+            if (treeNode->OperIsCopyBlkOp())
+            {
+                assert(treeNode->AsObj()->gtGcPtrCount != 0);
+                genCodeForCpObj(treeNode->AsObj());
+                break;
+            }
+            __fallthrough;
+
+        case GT_STORE_DYN_BLK:
+        case GT_STORE_BLK:
+        {
+            GenTreeBlk* blkOp = treeNode->AsBlk();
+            if (blkOp->gtBlkOpGcUnsafe)
+            {
+                getEmitter()->emitDisableGC();
+            }
+            bool isCopyBlk = blkOp->OperIsCopyBlkOp();
+
+            switch (blkOp->gtBlkOpKind)
+            {
+                case GenTreeBlk::BlkOpKindHelper:
+                    if (isCopyBlk)
+                    {
+                        genCodeForCpBlk(blkOp);
+                    }
+                    else
+                    {
+                        genCodeForInitBlk(blkOp);
+                    }
+                    break;
+                case GenTreeBlk::BlkOpKindUnroll:
+                    if (isCopyBlk)
+                    {
+                        genCodeForCpBlkUnroll(blkOp);
+                    }
+                    else
+                    {
+                        genCodeForInitBlkUnroll(blkOp);
+                    }
+                    break;
+                default:
+                    unreached();
+            }
+            if (blkOp->gtBlkOpGcUnsafe)
+            {
+                getEmitter()->emitEnableGC();
+            }
+        }
+        break;
+
+        case GT_JMPTABLE:
+            genJumpTable(treeNode);
+            break;
+
+        case GT_SWITCH_TABLE:
+            genTableBasedSwitch(treeNode);
+            break;
+
+        case GT_ARR_INDEX:
+            genCodeForArrIndex(treeNode->AsArrIndex());
+            break;
+
+        case GT_ARR_OFFSET:
+            genCodeForArrOffset(treeNode->AsArrOffs());
+            break;
+
+        case GT_CLS_VAR_ADDR:
+            NYI("GT_CLS_VAR_ADDR");
+            break;
+
+        case GT_IL_OFFSET:
+            // Do nothing; these nodes are simply markers for debug info.
+            break;
+
+        default:
+        {
+#ifdef DEBUG
+            char message[256];
+            sprintf(message, "Unimplemented node type %s\n", GenTree::NodeName(treeNode->OperGet()));
+#endif
+            assert(!"Unknown node in codegen");
+        }
+        break;
+    }
+}
+
+//----------------------------------------------------------------------------------
+// genMultiRegCallStoreToLocal: store multi-reg return value of a call node to a local
+//
+// Arguments:
+//    treeNode  -  Gentree of GT_STORE_LCL_VAR
+//
+// Return Value:
+//    None
+//
+// Assumption:
+//    The child of store is a multi-reg call node.
+//    genProduceReg() on treeNode is made by caller of this routine.
+//
+void CodeGen::genMultiRegCallStoreToLocal(GenTreePtr treeNode)
+{
+    assert(treeNode->OperGet() == GT_STORE_LCL_VAR);
+
+    // Structs of size >=9 and <=16 are returned in two return registers on ARM64 and HFAs.
+    assert(varTypeIsStruct(treeNode));
+
+    // Assumption: current ARM64 implementation requires that a multi-reg struct
+    // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from
+    // being struct promoted.
+    unsigned   lclNum = treeNode->AsLclVarCommon()->gtLclNum;
+    LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
+    noway_assert(varDsc->lvIsMultiRegRet);
+
+    GenTree*     op1       = treeNode->gtGetOp1();
+    GenTree*     actualOp1 = op1->gtSkipReloadOrCopy();
+    GenTreeCall* call      = actualOp1->AsCall();
+    assert(call->HasMultiRegRetVal());
+
+    genConsumeRegs(op1);
+
+    ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc();
+    unsigned        regCount     = pRetTypeDesc->GetReturnRegCount();
+
+    if (treeNode->gtRegNum != REG_NA)
+    {
+        // Right now the only enregistrable structs supported are SIMD types.
+        assert(varTypeIsSIMD(treeNode));
+        NYI("GT_STORE_LCL_VAR of a SIMD enregisterable struct");
+    }
+    else
+    {
+        // Stack store
+        int offset = 0;
+        for (unsigned i = 0; i < regCount; ++i)
+        {
+            var_types type = pRetTypeDesc->GetReturnRegType(i);
+            regNumber reg  = call->GetRegNumByIdx(i);
+            if (op1->IsCopyOrReload())
+            {
+                // GT_COPY/GT_RELOAD will have valid reg for those positions
+                // that need to be copied or reloaded.
+                regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
+                if (reloadReg != REG_NA)
+                {
+                    reg = reloadReg;
+                }
+            }
+
+            assert(reg != REG_NA);
+            getEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset);
+            offset += genTypeSize(type);
+        }
+
+        varDsc->lvRegNum = REG_STK;
+    }
+}
+
+/***********************************************************************************************
+ *  Generate code for localloc
+ */
+void CodeGen::genLclHeap(GenTreePtr tree)
+{
+    assert(tree->OperGet() == GT_LCLHEAP);
+
+    GenTreePtr size = tree->gtOp.gtOp1;
+    noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL));
+
+    regNumber   targetReg       = tree->gtRegNum;
+    regMaskTP   tmpRegsMask     = tree->gtRsvdRegs;
+    regNumber   regCnt          = REG_NA;
+    regNumber   pspSymReg       = REG_NA;
+    var_types   type            = genActualType(size->gtType);
+    emitAttr    easz            = emitTypeSize(type);
+    BasicBlock* endLabel        = nullptr;
+    BasicBlock* loop            = nullptr;
+    unsigned    stackAdjustment = 0;
+
+#ifdef DEBUG
+    // Verify ESP
+    if (compiler->opts.compStackCheckOnRet)
+    {
+        noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
+                     compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
+                     compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
+        getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
+
+        BasicBlock*  esp_check = genCreateTempLabel();
+        emitJumpKind jmpEqual  = genJumpKindForOper(GT_EQ, CK_SIGNED);
+        inst_JMP(jmpEqual, esp_check);
+        getEmitter()->emitIns(INS_BREAKPOINT);
+        genDefineTempLabel(esp_check);
+    }
+#endif
+
+    noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes
+    noway_assert(genStackLevel == 0);   // Can't have anything on the stack
+
+    // Whether method has PSPSym.
+    bool hasPspSym;
+#if FEATURE_EH_FUNCLETS
+    hasPspSym = (compiler->lvaPSPSym != BAD_VAR_NUM);
+#else
+    hasPspSym = false;
+#endif
+
+    // compute the amount of memory to allocate to properly STACK_ALIGN.
+    size_t amount = 0;
+    if (size->IsCnsIntOrI())
+    {
+        // If size is a constant, then it must be contained.
+        assert(size->isContained());
+
+        // If amount is zero then return null in targetReg
+        amount = size->gtIntCon.gtIconVal;
+        if (amount == 0)
+        {
+            instGen_Set_Reg_To_Zero(EA_PTRSIZE, targetReg);
+            goto BAILOUT;
+        }
+
+        // 'amount' is the total numbe of bytes to localloc to properly STACK_ALIGN
+        amount = AlignUp(amount, STACK_ALIGN);
+    }
+    else
+    {
+        // If 0 bail out by returning null in targetReg
+        genConsumeRegAndCopy(size, targetReg);
+        endLabel = genCreateTempLabel();
+        getEmitter()->emitIns_R_R(INS_TEST, easz, targetReg, targetReg);
+        emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+        inst_JMP(jmpEqual, endLabel);
+
+        // Compute the size of the block to allocate and perform alignment.
+        // If the method has no PSPSym and compInitMem=true, we can reuse targetReg as regcnt,
+        // since we don't need any internal registers.
+        if (!hasPspSym && compiler->info.compInitMem)
+        {
+            assert(genCountBits(tmpRegsMask) == 0);
+            regCnt = targetReg;
+        }
+        else
+        {
+            assert(genCountBits(tmpRegsMask) >= 1);
+            regMaskTP regCntMask = genFindLowestBit(tmpRegsMask);
+            tmpRegsMask &= ~regCntMask;
+            regCnt = genRegNumFromMask(regCntMask);
+            if (regCnt != targetReg)
+                inst_RV_RV(INS_mov, regCnt, targetReg, size->TypeGet());
+        }
+
+        // Align to STACK_ALIGN
+        // regCnt will be the total number of bytes to localloc
+        inst_RV_IV(INS_add, regCnt, (STACK_ALIGN - 1), emitActualTypeSize(type));
+        inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
+    }
+
+    stackAdjustment = 0;
+#if FEATURE_EH_FUNCLETS
+    // If we have PSPsym, then need to re-locate it after localloc.
+    if (hasPspSym)
+    {
+        stackAdjustment += STACK_ALIGN;
+
+        // Save a copy of PSPSym
+        assert(genCountBits(tmpRegsMask) >= 1);
+        regMaskTP pspSymRegMask = genFindLowestBit(tmpRegsMask);
+        tmpRegsMask &= ~pspSymRegMask;
+        pspSymReg = genRegNumFromMask(pspSymRegMask);
+        getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, pspSymReg, compiler->lvaPSPSym, 0);
+    }
+#endif
+
+#if FEATURE_FIXED_OUT_ARGS
+    // If we have an outgoing arg area then we must adjust the SP by popping off the
+    // outgoing arg area. We will restore it right before we return from this method.
+    //
+    // Localloc is supposed to return stack space that is STACK_ALIGN'ed.  The following
+    // are the cases that needs to be handled:
+    //   i) Method has PSPSym + out-going arg area.
+    //      It is guaranteed that size of out-going arg area is STACK_ALIGNED (see fgMorphArgs).
+    //      Therefore, we will pop-off RSP upto out-going arg area before locallocating.
+    //      We need to add padding to ensure RSP is STACK_ALIGN'ed while re-locating PSPSym + arg area.
+    //  ii) Method has no PSPSym but out-going arg area.
+    //      Almost same case as above without the requirement to pad for the final RSP to be STACK_ALIGN'ed.
+    // iii) Method has PSPSym but no out-going arg area.
+    //      Nothing to pop-off from the stack but needs to relocate PSPSym with SP padded.
+    //  iv) Method has neither PSPSym nor out-going arg area.
+    //      Nothing needs to popped off from stack nor relocated.
+    if (compiler->lvaOutgoingArgSpaceSize > 0)
+    {
+        assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain
+                                                                        // aligned
+        inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
+        stackAdjustment += compiler->lvaOutgoingArgSpaceSize;
+    }
+#endif
+
+    if (size->IsCnsIntOrI())
+    {
+        // We should reach here only for non-zero, constant size allocations.
+        assert(amount > 0);
+
+        // For small allocations we will generate up to four stp instructions
+        size_t cntStackAlignedWidthItems = (amount >> STACK_ALIGN_SHIFT);
+        if (cntStackAlignedWidthItems <= 4)
+        {
+            while (cntStackAlignedWidthItems != 0)
+            {
+                // We can use pre-indexed addressing.
+                // stp ZR, ZR, [SP, #-16]!
+                getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, REG_SPBASE, -16, INS_OPTS_PRE_INDEX);
+                cntStackAlignedWidthItems -= 1;
+            }
+
+            goto ALLOC_DONE;
+        }
+        else if (!compiler->info.compInitMem && (amount < compiler->eeGetPageSize())) // must be < not <=
+        {
+            // Since the size is a page or less, simply adjust the SP value
+            // The SP might already be in the guard page, must touch it BEFORE
+            // the alloc, not after.
+            // ldr wz, [SP, #0]
+            getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, REG_SP, 0);
+
+            inst_RV_IV(INS_sub, REG_SP, amount, EA_PTRSIZE);
+
+            goto ALLOC_DONE;
+        }
+
+        // else, "mov regCnt, amount"
+        // If the method has no PSPSym and compInitMem=true, we can reuse targetReg as regcnt.
+        // Since size is a constant, regCnt is not yet initialized.
+        assert(regCnt == REG_NA);
+        if (!hasPspSym && compiler->info.compInitMem)
+        {
+            assert(genCountBits(tmpRegsMask) == 0);
+            regCnt = targetReg;
+        }
+        else
+        {
+            assert(genCountBits(tmpRegsMask) >= 1);
+            regMaskTP regCntMask = genFindLowestBit(tmpRegsMask);
+            tmpRegsMask &= ~regCntMask;
+            regCnt = genRegNumFromMask(regCntMask);
+        }
+        genSetRegToIcon(regCnt, amount, ((int)amount == amount) ? TYP_INT : TYP_LONG);
+    }
+
+    if (compiler->info.compInitMem)
+    {
+        BasicBlock* loop = genCreateTempLabel();
+
+        // At this point 'regCnt' is set to the total number of bytes to locAlloc.
+        // Since we have to zero out the allocated memory AND ensure that RSP is always valid
+        // by tickling the pages, we will just push 0's on the stack.
+        //
+        // Note: regCnt is guaranteed to be even on Amd64 since STACK_ALIGN/TARGET_POINTER_SIZE = 2
+        // and localloc size is a multiple of STACK_ALIGN.
+
+        // Loop:
+        genDefineTempLabel(loop);
+
+        // We can use pre-indexed addressing.
+        // stp ZR, ZR, [SP, #-16]!
+        getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, REG_SPBASE, -16, INS_OPTS_PRE_INDEX);
+
+        // If not done, loop
+        // Note that regCnt is the number of bytes to stack allocate.
+        // Therefore we need to subtract 16 from regcnt here.
+        assert(genIsValidIntReg(regCnt));
+        inst_RV_IV(INS_subs, regCnt, 16, emitActualTypeSize(type));
+        emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
+        inst_JMP(jmpNotEqual, loop);
+    }
+    else
+    {
+        // At this point 'regCnt' is set to the total number of bytes to locAlloc.
+        //
+        // We don't need to zero out the allocated memory. However, we do have
+        // to tickle the pages to ensure that SP is always valid and is
+        // in sync with the "stack guard page".  Note that in the worst
+        // case SP is on the last byte of the guard page.  Thus you must
+        // touch SP+0 first not SP+x01000.
+        //
+        // Another subtlety is that you don't want SP to be exactly on the
+        // boundary of the guard page because PUSH is predecrement, thus
+        // call setup would not touch the guard page but just beyond it
+        //
+        // Note that we go through a few hoops so that SP never points to
+        // illegal pages at any time during the ticking process
+        //
+        //       subs  regCnt, SP, regCnt      // regCnt now holds ultimate SP
+        //       jb    Loop                    // result is smaller than orignial SP (no wrap around)
+        //       mov   regCnt, #0              // Overflow, pick lowest possible value
+        //
+        //  Loop:
+        //       ldr   wzr, [SP + 0]           // tickle the page - read from the page
+        //       sub   regTmp, SP, PAGE_SIZE   // decrement SP by PAGE_SIZE
+        //       cmp   regTmp, regCnt
+        //       jb    Done
+        //       mov   SP, regTmp
+        //       j     Loop
+        //
+        //  Done:
+        //       mov   SP, regCnt
+        //
+
+        // Setup the regTmp
+        assert(tmpRegsMask != RBM_NONE);
+        assert(genCountBits(tmpRegsMask) == 1);
+        regNumber regTmp = genRegNumFromMask(tmpRegsMask);
+
+        BasicBlock* loop = genCreateTempLabel();
+        BasicBlock* done = genCreateTempLabel();
+
+        //       subs  regCnt, SP, regCnt      // regCnt now holds ultimate SP
+        getEmitter()->emitIns_R_R_R(INS_subs, EA_PTRSIZE, regCnt, REG_SPBASE, regCnt);
+
+        inst_JMP(EJ_vc, loop); // branch if the V flag is not set
+
+        // Overflow, set regCnt to lowest possible value
+        instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
+
+        genDefineTempLabel(loop);
+
+        // tickle the page - Read from the updated SP - this triggers a page fault when on the guard page
+        getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, REG_SPBASE, 0);
+
+        // decrement SP by PAGE_SIZE
+        getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, regTmp, REG_SPBASE, compiler->eeGetPageSize());
+
+        getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regTmp, regCnt);
+        emitJumpKind jmpLTU = genJumpKindForOper(GT_LT, CK_UNSIGNED);
+        inst_JMP(jmpLTU, done);
+
+        // Update SP to be at the next page of stack that we will tickle
+        getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, regCnt);
+
+        // Jump to loop and tickle new stack address
+        inst_JMP(EJ_jmp, loop);
+
+        // Done with stack tickle loop
+        genDefineTempLabel(done);
+
+        // Now just move the final value to SP
+        getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, regCnt);
+    }
+
+ALLOC_DONE:
+    // Re-adjust SP to allocate PSPSym and out-going arg area
+    if (stackAdjustment != 0)
+    {
+        assert((stackAdjustment % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned
+        assert(stackAdjustment > 0);
+        getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, (int)stackAdjustment);
+
+#if FEATURE_EH_FUNCLETS
+        // Write PSPSym to its new location.
+        if (hasPspSym)
+        {
+            assert(genIsValidIntReg(pspSymReg));
+            getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, pspSymReg, compiler->lvaPSPSym, 0);
+        }
+#endif
+        // Return the stackalloc'ed address in result register.
+        // TargetReg = RSP + stackAdjustment.
+        //
+        getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, targetReg, REG_SPBASE, (int)stackAdjustment);
+    }
+    else // stackAdjustment == 0
+    {
+        // Move the final value of SP to targetReg
+        inst_RV_RV(INS_mov, targetReg, REG_SPBASE);
+    }
+
+BAILOUT:
+    if (endLabel != nullptr)
+        genDefineTempLabel(endLabel);
+
+    // Write the lvaShadowSPfirst stack frame slot
+    noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM);
+    getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, targetReg, compiler->lvaLocAllocSPvar, 0);
+
+#if STACK_PROBES
+    if (compiler->opts.compNeedStackProbes)
+    {
+        genGenerateStackProbe();
+    }
+#endif
+
+#ifdef DEBUG
+    // Update new ESP
+    if (compiler->opts.compStackCheckOnRet)
+    {
+        noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
+                     compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
+                     compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
+        getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, targetReg, compiler->lvaReturnEspCheck, 0);
+    }
+#endif
+
+    genProduceReg(tree);
+}
+
+// Generate code for InitBlk by performing a loop unroll
+// Preconditions:
+//   a) Both the size and fill byte value are integer constants.
+//   b) The size of the struct to initialize is smaller than INITBLK_UNROLL_LIMIT bytes.
+void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode)
+{
+#if 0
+    // Make sure we got the arguments of the initblk/initobj operation in the right registers
+    unsigned   size    = initBlkNode->Size();
+    GenTreePtr dstAddr = initBlkNode->Addr();
+    GenTreePtr initVal = initBlkNode->Data();
+
+    assert(!dstAddr->isContained());
+    assert(!initVal->isContained());
+    assert(size != 0);
+    assert(size <= INITBLK_UNROLL_LIMIT);
+    assert(initVal->gtSkipReloadOrCopy()->IsCnsIntOrI());
+
+    emitter *emit = getEmitter();
+
+    genConsumeReg(initVal);
+    genConsumeReg(dstAddr);
+
+    // If the initVal was moved, or spilled and reloaded to a different register,
+    // get the original initVal from below the GT_RELOAD, but only after capturing the valReg,
+    // which needs to be the new register.
+    regNumber valReg = initVal->gtRegNum;
+    initVal = initVal->gtSkipReloadOrCopy();
+#else  // !0
+    NYI("genCodeForInitBlkUnroll");
+#endif // !0
+}
+
+// Generates code for InitBlk by calling the VM memset helper function.
+// Preconditions:
+// a) The size argument of the InitBlk is not an integer constant.
+// b) The size argument of the InitBlk is >= INITBLK_STOS_LIMIT bytes.
+void CodeGen::genCodeForInitBlk(GenTreeBlk* initBlkNode)
+{
+    // Make sure we got the arguments of the initblk operation in the right registers
+    unsigned   size    = initBlkNode->Size();
+    GenTreePtr dstAddr = initBlkNode->Addr();
+    GenTreePtr initVal = initBlkNode->Data();
+
+    assert(!dstAddr->isContained());
+    assert(!initVal->isContained());
+    assert(initBlkNode->gtRsvdRegs == RBM_ARG_2);
+
+    if (size == 0)
+    {
+        noway_assert(initBlkNode->gtOper == GT_DYN_BLK);
+        genConsumeRegAndCopy(initBlkNode->AsDynBlk()->gtDynamicSize, REG_ARG_2);
+    }
+    else
+    {
+// TODO-ARM64-CQ: When initblk loop unrolling is implemented
+//                put this assert back on.
+#if 0
+        assert(size >= INITBLK_UNROLL_LIMIT);
+#endif // 0
+        genSetRegToIcon(REG_ARG_2, size);
+    }
+    genConsumeRegAndCopy(initVal, REG_ARG_1);
+    genConsumeRegAndCopy(dstAddr, REG_ARG_0);
+
+    genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN);
+}
+
+// Generate code for a load from some address + offset
+//   base: tree node which can be either a local address or arbitrary node
+//   offset: distance from the base from which to load
+void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset)
+{
+    emitter* emit = getEmitter();
+
+    if (base->OperIsLocalAddr())
+    {
+        if (base->gtOper == GT_LCL_FLD_ADDR)
+            offset += base->gtLclFld.gtLclOffs;
+        emit->emitIns_R_S(ins, size, dst, base->gtLclVarCommon.gtLclNum, offset);
+    }
+    else
+    {
+        emit->emitIns_R_R_I(ins, size, dst, base->gtRegNum, offset);
+    }
+}
+
+// Generate code for a store to some address + offset
+//   base: tree node which can be either a local address or arbitrary node
+//   offset: distance from the base from which to load
+void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* base, unsigned offset)
+{
+#if 0
+    emitter *emit = getEmitter();
+
+    if (base->OperIsLocalAddr())
+    {
+        if (base->gtOper == GT_LCL_FLD_ADDR)
+            offset += base->gtLclFld.gtLclOffs;
+        emit->emitIns_S_R(ins, size, src, base->gtLclVarCommon.gtLclNum, offset);
+    }
+    else
+    {
+        emit->emitIns_AR_R(ins, size, src, base->gtRegNum, offset);
+    }
+#else  // !0
+    NYI("genCodeForStoreOffset");
+#endif // !0
+}
+
+// Generates CpBlk code by performing a loop unroll
+// Preconditions:
+//  The size argument of the CpBlk node is a constant and <= 64 bytes.
+//  This may seem small but covers >95% of the cases in several framework assemblies.
+void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode)
+{
+#if 0
+    // Make sure we got the arguments of the cpblk operation in the right registers
+    unsigned   size    = cpBlkNode->Size();
+    GenTreePtr dstAddr = cpBlkNode->Addr();
+    GenTreePtr source  = cpBlkNode->Data();
+    noway_assert(source->gtOper == GT_IND);
+    GenTreePtr srcAddr = source->gtGetOp1();
+
+    assert((size != 0 ) && (size <= CPBLK_UNROLL_LIMIT));
+
+    emitter *emit = getEmitter();
+
+    if (!srcAddr->isContained())
+        genConsumeReg(srcAddr);
+
+    if (!dstAddr->isContained())
+        genConsumeReg(dstAddr);
+
+    unsigned offset = 0;
+
+    // If the size of this struct is larger than 16 bytes
+    // let's use SSE2 to be able to do 16 byte at a time 
+    // loads and stores.
+    if (size >= XMM_REGSIZE_BYTES)
+    {
+        assert(cpBlkNode->gtRsvdRegs != RBM_NONE);
+        assert(genCountBits(cpBlkNode->gtRsvdRegs) == 1);
+        regNumber xmmReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs);
+        assert(genIsValidFloatReg(xmmReg));
+        size_t slots = size / XMM_REGSIZE_BYTES;
+
+        while (slots-- > 0)
+        {
+            // Load
+            genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, srcAddr, offset);
+            // Store
+            genCodeForStoreOffset(INS_movdqu, EA_8BYTE, xmmReg, dstAddr, offset);
+            offset += XMM_REGSIZE_BYTES;
+        }
+    }
+
+    // Fill the remainder (15 bytes or less) if there's one.
+    if ((size & 0xf) != 0)
+    {
+        // Grab the integer temp register to emit the remaining loads and stores.
+        regNumber tmpReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs & RBM_ALLINT);
+
+        if ((size & 8) != 0)
+        {
+            genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, srcAddr, offset);
+            genCodeForStoreOffset(INS_mov, EA_8BYTE, tmpReg, dstAddr, offset);
+            offset += 8;
+        }
+        if ((size & 4) != 0)
+        {
+            genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset);
+            genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset);
+            offset += 4;
+        }
+        if ((size & 2) != 0)
+        {
+            genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, srcAddr, offset);
+            genCodeForStoreOffset(INS_mov, EA_2BYTE, tmpReg, dstAddr, offset);
+            offset += 2;
+        }
+        if ((size & 1) != 0)
+        {
+            genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, srcAddr, offset);
+            genCodeForStoreOffset(INS_mov, EA_1BYTE, tmpReg, dstAddr, offset);
+        }
+    }
+#else  // !0
+    NYI("genCodeForCpBlkUnroll");
+#endif // !0
+}
+
+// Generate code for CpObj nodes wich copy structs that have interleaved
+// GC pointers.
+// For this case we'll generate a sequence of loads/stores in the case of struct
+// slots that don't contain GC pointers.  The generated code will look like:
+// ldr tempReg, [R13, #8]
+// str tempReg, [R14, #8]
+//
+// In the case of a GC-Pointer we'll call the ByRef write barrier helper
+// who happens to use the same registers as the previous call to maintain
+// the same register requirements and register killsets:
+// bl CORINFO_HELP_ASSIGN_BYREF
+//
+// So finally an example would look like this:
+// ldr tempReg, [R13, #8]
+// str tempReg, [R14, #8]
+// bl CORINFO_HELP_ASSIGN_BYREF
+// ldr tempReg, [R13, #8]
+// str tempReg, [R14, #8]
+// bl CORINFO_HELP_ASSIGN_BYREF
+// ldr tempReg, [R13, #8]
+// str tempReg, [R14, #8]
+void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
+{
+    // Make sure we got the arguments of the cpobj operation in the right registers
+    GenTreePtr dstAddr = cpObjNode->Addr();
+    GenTreePtr source  = cpObjNode->Data();
+    noway_assert(source->gtOper == GT_IND);
+    GenTreePtr srcAddr = source->gtGetOp1();
+
+    bool dstOnStack = dstAddr->OperIsLocalAddr();
+
+#ifdef DEBUG
+    assert(!dstAddr->isContained());
+    assert(!srcAddr->isContained());
+
+    // This GenTree node has data about GC pointers, this means we're dealing
+    // with CpObj.
+    assert(cpObjNode->gtGcPtrCount > 0);
+#endif // DEBUG
+
+    // Consume these registers.
+    // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
+    genConsumeRegAndCopy(srcAddr, REG_WRITE_BARRIER_SRC_BYREF);
+    gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_SRC_BYREF, srcAddr->TypeGet());
+
+    genConsumeRegAndCopy(dstAddr, REG_WRITE_BARRIER_DST_BYREF);
+    gcInfo.gcMarkRegPtrVal(REG_WRITE_BARRIER_DST_BYREF, dstAddr->TypeGet());
+
+    // Temp register used to perform the sequence of loads and stores.
+    regNumber tmpReg = genRegNumFromMask(cpObjNode->gtRsvdRegs);
+
+#ifdef DEBUG
+    assert(cpObjNode->gtRsvdRegs != RBM_NONE);
+    assert(genCountBits(cpObjNode->gtRsvdRegs) == 1);
+    assert(genIsValidIntReg(tmpReg));
+#endif // DEBUG
+
+    unsigned slots = cpObjNode->gtSlots;
+    emitter* emit  = getEmitter();
+
+    // If we can prove it's on the stack we don't need to use the write barrier.
+    if (dstOnStack)
+    {
+        // TODO-ARM64-CQ: Consider using LDP/STP to save codesize.
+        while (slots > 0)
+        {
+            emit->emitIns_R_R_I(INS_ldr, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE,
+                                INS_OPTS_POST_INDEX);
+            emit->emitIns_R_R_I(INS_str, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE,
+                                INS_OPTS_POST_INDEX);
+            slots--;
+        }
+    }
+    else
+    {
+        BYTE*    gcPtrs     = cpObjNode->gtGcPtrs;
+        unsigned gcPtrCount = cpObjNode->gtGcPtrCount;
+
+        unsigned i = 0;
+        while (i < slots)
+        {
+            switch (gcPtrs[i])
+            {
+                case TYPE_GC_NONE:
+                    // TODO-ARM64-CQ: Consider using LDP/STP to save codesize in case of contigous NON-GC slots.
+                    emit->emitIns_R_R_I(INS_ldr, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_SRC_BYREF, TARGET_POINTER_SIZE,
+                                        INS_OPTS_POST_INDEX);
+                    emit->emitIns_R_R_I(INS_str, EA_8BYTE, tmpReg, REG_WRITE_BARRIER_DST_BYREF, TARGET_POINTER_SIZE,
+                                        INS_OPTS_POST_INDEX);
+                    break;
+
+                default:
+                    // We have a GC pointer, call the memory barrier.
+                    genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
+                    gcPtrCount--;
+                    break;
+            }
+            ++i;
+        }
+        assert(gcPtrCount == 0);
+    }
+
+    // Clear the gcInfo for REG_WRITE_BARRIER_SRC_BYREF and REG_WRITE_BARRIER_DST_BYREF.
+    // While we normally update GC info prior to the last instruction that uses them,
+    // these actually live into the helper call.
+    gcInfo.gcMarkRegSetNpt(RBM_WRITE_BARRIER_SRC_BYREF | RBM_WRITE_BARRIER_DST_BYREF);
+}
+
+// Generate code for a CpBlk node by the means of the VM memcpy helper call
+// Preconditions:
+// a) The size argument of the CpBlk is not an integer constant
+// b) The size argument is a constant but is larger than CPBLK_MOVS_LIMIT bytes.
+void CodeGen::genCodeForCpBlk(GenTreeBlk* cpBlkNode)
+{
+    // Make sure we got the arguments of the cpblk operation in the right registers
+    unsigned   blockSize = cpBlkNode->Size();
+    GenTreePtr dstAddr   = cpBlkNode->Addr();
+    GenTreePtr source    = cpBlkNode->Data();
+    noway_assert(source->gtOper == GT_IND);
+    GenTreePtr srcAddr = source->gtGetOp1();
+
+    assert(!dstAddr->isContained());
+    assert(!srcAddr->isContained());
+    assert(cpBlkNode->gtRsvdRegs == RBM_ARG_2);
+
+    if (blockSize != 0)
+    {
+#if 0
+    // Enable this when we support cpblk loop unrolling.
+
+        assert(blockSize->gtIntCon.gtIconVal >= CPBLK_UNROLL_LIMIT);
+
+#endif // 0
+        genSetRegToIcon(REG_ARG_2, blockSize);
+    }
+    else
+    {
+        noway_assert(cpBlkNode->gtOper == GT_DYN_BLK);
+        genConsumeRegAndCopy(cpBlkNode->AsDynBlk()->gtDynamicSize, REG_ARG_2);
+    }
+    genConsumeRegAndCopy(srcAddr, REG_ARG_1);
+    genConsumeRegAndCopy(dstAddr, REG_ARG_0);
+
+    genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN);
+}
+
+// generate code do a switch statement based on a table of ip-relative offsets
+void CodeGen::genTableBasedSwitch(GenTree* treeNode)
+{
+    genConsumeOperands(treeNode->AsOp());
+    regNumber idxReg  = treeNode->gtOp.gtOp1->gtRegNum;
+    regNumber baseReg = treeNode->gtOp.gtOp2->gtRegNum;
+
+    regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
+
+    // load the ip-relative offset (which is relative to start of fgFirstBB)
+    getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, baseReg, baseReg, idxReg, INS_OPTS_LSL);
+
+    // add it to the absolute address of fgFirstBB
+    compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;
+    getEmitter()->emitIns_R_L(INS_adr, EA_PTRSIZE, compiler->fgFirstBB, tmpReg);
+    getEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, baseReg, baseReg, tmpReg);
+
+    // br baseReg
+    getEmitter()->emitIns_R(INS_br, emitTypeSize(TYP_I_IMPL), baseReg);
+}
+
+// emits the table and an instruction to get the address of the first element
+void CodeGen::genJumpTable(GenTree* treeNode)
+{
+    noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH);
+    assert(treeNode->OperGet() == GT_JMPTABLE);
+
+    unsigned     jumpCount = compiler->compCurBB->bbJumpSwt->bbsCount;
+    BasicBlock** jumpTable = compiler->compCurBB->bbJumpSwt->bbsDstTab;
+    unsigned     jmpTabOffs;
+    unsigned     jmpTabBase;
+
+    jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCount, true);
+
+    jmpTabOffs = 0;
+
+    JITDUMP("\n      J_M%03u_DS%02u LABEL   DWORD\n", Compiler::s_compMethodsCount, jmpTabBase);
+
+    for (unsigned i = 0; i < jumpCount; i++)
+    {
+        BasicBlock* target = *jumpTable++;
+        noway_assert(target->bbFlags & BBF_JMP_TARGET);
+
+        JITDUMP("            DD      L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, target->bbNum);
+
+        getEmitter()->emitDataGenData(i, target);
+    };
+
+    getEmitter()->emitDataGenEnd();
+
+    // Access to inline data is 'abstracted' by a special type of static member
+    // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
+    // to constant data, not a real static field.
+    getEmitter()->emitIns_R_C(INS_adr, emitTypeSize(TYP_I_IMPL), treeNode->gtRegNum, REG_NA,
+                              compiler->eeFindJitDataOffs(jmpTabBase), 0);
+    genProduceReg(treeNode);
+}
+
+// generate code for the locked operations:
+// GT_LOCKADD, GT_XCHG, GT_XADD
+void CodeGen::genLockedInstructions(GenTree* treeNode)
+{
+#if 0
+    GenTree* data       = treeNode->gtOp.gtOp2;
+    GenTree* addr       = treeNode->gtOp.gtOp1;
+    regNumber targetReg = treeNode->gtRegNum;
+    regNumber dataReg   = data->gtRegNum;
+    regNumber addrReg   = addr->gtRegNum;
+    instruction ins;
+
+    // all of these nodes implicitly do an indirection on op1
+    // so create a temporary node to feed into the pattern matching
+    GenTreeIndir i = indirForm(data->TypeGet(), addr);
+    genConsumeReg(addr);
+
+    // The register allocator should have extended the lifetime of the address
+    // so that it is not used as the target.
+    noway_assert(addrReg != targetReg);
+
+    // If data is a lclVar that's not a last use, we'd better have allocated a register
+    // for the result (except in the case of GT_LOCKADD which does not produce a register result).
+    assert(targetReg != REG_NA || treeNode->OperGet() == GT_LOCKADD || !genIsRegCandidateLocal(data) || (data->gtFlags & GTF_VAR_DEATH) != 0);
+
+    genConsumeIfReg(data);
+    if (targetReg != REG_NA && dataReg != REG_NA && dataReg != targetReg)
+    {
+        inst_RV_RV(ins_Copy(data->TypeGet()), targetReg, dataReg);
+        data->gtRegNum = targetReg;
+
+        // TODO-ARM64-Cleanup: Consider whether it is worth it, for debugging purposes, to restore the
+        // original gtRegNum on data, after calling emitInsBinary below.
+    }
+    switch (treeNode->OperGet())
+    {
+    case GT_LOCKADD:
+        instGen(INS_lock);
+        ins = INS_add;
+        break;
+    case GT_XCHG:
+        // lock is implied by xchg
+        ins = INS_xchg;
+        break;
+    case GT_XADD:
+        instGen(INS_lock);
+        ins = INS_xadd;
+        break;
+    default:
+        unreached();
+    }
+    getEmitter()->emitInsBinary(ins, emitTypeSize(data), &i, data);
+
+    if (treeNode->gtRegNum != REG_NA)
+    {
+        genProduceReg(treeNode);
+    }
+#else  // !0
+    NYI("genLockedInstructions");
+#endif // !0
+}
+
+// generate code for BoundsCheck nodes
+void CodeGen::genRangeCheck(GenTreePtr oper)
+{
+#ifdef FEATURE_SIMD
+    noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK || oper->OperGet() == GT_SIMD_CHK);
+#else  // !FEATURE_SIMD
+    noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK);
+#endif // !FEATURE_SIMD
+
+    GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
+
+    GenTreePtr arrLen    = bndsChk->gtArrLen;
+    GenTreePtr arrIndex  = bndsChk->gtIndex;
+    GenTreePtr arrRef    = NULL;
+    int        lenOffset = 0;
+
+    GenTree *    src1, *src2;
+    emitJumpKind jmpKind;
+
+    genConsumeRegs(arrLen);
+    genConsumeRegs(arrIndex);
+
+    if (arrIndex->isContainedIntOrIImmed())
+    {
+        // To encode using a cmp immediate, we place the
+        //  constant operand in the second position
+        src1    = arrLen;
+        src2    = arrIndex;
+        jmpKind = genJumpKindForOper(GT_LE, CK_UNSIGNED);
+    }
+    else
+    {
+        src1    = arrIndex;
+        src2    = arrLen;
+        jmpKind = genJumpKindForOper(GT_GE, CK_UNSIGNED);
+    }
+
+    GenTreeIntConCommon* intConst = nullptr;
+    if (src2->isContainedIntOrIImmed())
+    {
+        intConst = src2->AsIntConCommon();
+    }
+
+    if (intConst != nullptr)
+    {
+        getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, src1->gtRegNum, intConst->IconValue());
+    }
+    else
+    {
+        getEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE, src1->gtRegNum, src2->gtRegNum);
+    }
+
+    genJumpToThrowHlpBlk(jmpKind, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
+}
+
+//------------------------------------------------------------------------
+// genOffsetOfMDArrayLowerBound: Returns the offset from the Array object to the
+//   lower bound for the given dimension.
+//
+// Arguments:
+//    elemType  - the element type of the array
+//    rank      - the rank of the array
+//    dimension - the dimension for which the lower bound offset will be returned.
+//
+// Return Value:
+//    The offset.
+// TODO-Cleanup: move to CodeGenCommon.cpp
+
+// static
+unsigned CodeGen::genOffsetOfMDArrayLowerBound(var_types elemType, unsigned rank, unsigned dimension)
+{
+    // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets.
+    return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * (dimension + rank);
+}
+
+//------------------------------------------------------------------------
+// genOffsetOfMDArrayLength: Returns the offset from the Array object to the
+//   size for the given dimension.
+//
+// Arguments:
+//    elemType  - the element type of the array
+//    rank      - the rank of the array
+//    dimension - the dimension for which the lower bound offset will be returned.
+//
+// Return Value:
+//    The offset.
+// TODO-Cleanup: move to CodeGenCommon.cpp
+
+// static
+unsigned CodeGen::genOffsetOfMDArrayDimensionSize(var_types elemType, unsigned rank, unsigned dimension)
+{
+    // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets.
+    return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * dimension;
+}
+
+//------------------------------------------------------------------------
+// genCodeForArrIndex: Generates code to bounds check the index for one dimension of an array reference,
+//                     producing the effective index by subtracting the lower bound.
+//
+// Arguments:
+//    arrIndex - the node for which we're generating code
+//
+// Return Value:
+//    None.
+//
+
+void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex)
+{
+    emitter*   emit      = getEmitter();
+    GenTreePtr arrObj    = arrIndex->ArrObj();
+    GenTreePtr indexNode = arrIndex->IndexExpr();
+    regNumber  arrReg    = genConsumeReg(arrObj);
+    regNumber  indexReg  = genConsumeReg(indexNode);
+    regNumber  tgtReg    = arrIndex->gtRegNum;
+    noway_assert(tgtReg != REG_NA);
+
+    // We will use a temp register to load the lower bound and dimension size values
+    //
+    regMaskTP tmpRegsMask = arrIndex->gtRsvdRegs; // there will be two bits set
+    tmpRegsMask &= ~genRegMask(tgtReg);           // remove the bit for 'tgtReg' from 'tmpRegsMask'
+
+    regMaskTP tmpRegMask = genFindLowestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask
+    regNumber tmpReg     = genRegNumFromMask(tmpRegMask); // set tmpReg from that mask
+    noway_assert(tmpReg != REG_NA);
+
+    assert(tgtReg != tmpReg);
+
+    unsigned  dim      = arrIndex->gtCurrDim;
+    unsigned  rank     = arrIndex->gtArrRank;
+    var_types elemType = arrIndex->gtArrElemType;
+    unsigned  offset;
+
+    offset = genOffsetOfMDArrayLowerBound(elemType, rank, dim);
+    emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_8BYTE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load
+    emit->emitIns_R_R_R(INS_sub, EA_4BYTE, tgtReg, indexReg, tmpReg);
+
+    offset = genOffsetOfMDArrayDimensionSize(elemType, rank, dim);
+    emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_8BYTE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load
+    emit->emitIns_R_R(INS_cmp, EA_4BYTE, tgtReg, tmpReg);
+
+    emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
+    genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL);
+
+    genProduceReg(arrIndex);
+}
+
+//------------------------------------------------------------------------
+// genCodeForArrOffset: Generates code to compute the flattened array offset for
+//    one dimension of an array reference:
+//        result = (prevDimOffset * dimSize) + effectiveIndex
+//    where dimSize is obtained from the arrObj operand
+//
+// Arguments:
+//    arrOffset - the node for which we're generating code
+//
+// Return Value:
+//    None.
+//
+// Notes:
+//    dimSize and effectiveIndex are always non-negative, the former by design,
+//    and the latter because it has been normalized to be zero-based.
+
+void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset)
+{
+    GenTreePtr offsetNode = arrOffset->gtOffset;
+    GenTreePtr indexNode  = arrOffset->gtIndex;
+    regNumber  tgtReg     = arrOffset->gtRegNum;
+
+    noway_assert(tgtReg != REG_NA);
+
+    if (!offsetNode->IsIntegralConst(0))
+    {
+        emitter*   emit   = getEmitter();
+        GenTreePtr arrObj = arrOffset->gtArrObj;
+        regNumber  arrReg = genConsumeReg(arrObj);
+        noway_assert(arrReg != REG_NA);
+        regNumber offsetReg = genConsumeReg(offsetNode);
+        noway_assert(offsetReg != REG_NA);
+        regNumber indexReg = genConsumeReg(indexNode);
+        noway_assert(indexReg != REG_NA);
+        regMaskTP tmpRegMask = arrOffset->gtRsvdRegs;
+        regNumber tmpReg     = genRegNumFromMask(tmpRegMask);
+        noway_assert(tmpReg != REG_NA);
+        unsigned  dim      = arrOffset->gtCurrDim;
+        unsigned  rank     = arrOffset->gtArrRank;
+        var_types elemType = arrOffset->gtArrElemType;
+        unsigned  offset   = genOffsetOfMDArrayDimensionSize(elemType, rank, dim);
+
+        // Load tmpReg with the dimension size
+        emit->emitIns_R_R_I(ins_Load(TYP_INT), EA_8BYTE, tmpReg, arrReg, offset); // a 4 BYTE sign extending load
+
+        // Evaluate tgtReg = offsetReg*dim_size + indexReg.
+        emit->emitIns_R_R_R_R(INS_madd, EA_4BYTE, tgtReg, tmpReg, offsetReg, indexReg);
+    }
+    else
+    {
+        regNumber indexReg = genConsumeReg(indexNode);
+        if (indexReg != tgtReg)
+        {
+            inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_INT);
+        }
+    }
+    genProduceReg(arrOffset);
+}
+
+// make a temporary indir we can feed to pattern matching routines
+// in cases where we don't want to instantiate all the indirs that happen
+//
+// TODO-Cleanup: move to CodeGenCommon.cpp
+GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base)
+{
+    GenTreeIndir i(GT_IND, type, base, nullptr);
+    i.gtRegNum = REG_NA;
+    // has to be nonnull (because contained nodes can't be the last in block)
+    // but don't want it to be a valid pointer
+    i.gtNext = (GenTree*)(-1);
+    return i;
+}
+
+// make a temporary int we can feed to pattern matching routines
+// in cases where we don't want to instantiate
+//
+// TODO-Cleanup: move to CodeGenCommon.cpp
+GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value)
+{
+    GenTreeIntCon i(type, value);
+    i.gtRegNum = REG_NA;
+    // has to be nonnull (because contained nodes can't be the last in block)
+    // but don't want it to be a valid pointer
+    i.gtNext = (GenTree*)(-1);
+    return i;
+}
+
+instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
+{
+    instruction ins = INS_brk;
+
+    if (varTypeIsFloating(type))
+    {
+        switch (oper)
+        {
+            case GT_ADD:
+                ins = INS_fadd;
+                break;
+            case GT_SUB:
+                ins = INS_fsub;
+                break;
+            case GT_MUL:
+                ins = INS_fmul;
+                break;
+            case GT_DIV:
+                ins = INS_fdiv;
+                break;
+            case GT_NEG:
+                ins = INS_fneg;
+                break;
+
+            default:
+                NYI("Unhandled oper in genGetInsForOper() - float");
+                unreached();
+                break;
+        }
+    }
+    else
+    {
+        switch (oper)
+        {
+            case GT_ADD:
+                ins = INS_add;
+                break;
+            case GT_AND:
+                ins = INS_and;
+                break;
+            case GT_DIV:
+                ins = INS_sdiv;
+                break;
+            case GT_UDIV:
+                ins = INS_udiv;
+                break;
+            case GT_MUL:
+                ins = INS_mul;
+                break;
+            case GT_LSH:
+                ins = INS_lsl;
+                break;
+            case GT_NEG:
+                ins = INS_neg;
+                break;
+            case GT_NOT:
+                ins = INS_mvn;
+                break;
+            case GT_OR:
+                ins = INS_orr;
+                break;
+            case GT_ROR:
+                ins = INS_ror;
+                break;
+            case GT_RSH:
+                ins = INS_asr;
+                break;
+            case GT_RSZ:
+                ins = INS_lsr;
+                break;
+            case GT_SUB:
+                ins = INS_sub;
+                break;
+            case GT_XOR:
+                ins = INS_eor;
+                break;
+
+            default:
+                NYI("Unhandled oper in genGetInsForOper() - integer");
+                unreached();
+                break;
+        }
+    }
+    return ins;
+}
+
+//------------------------------------------------------------------------
+// genCodeForShift: Generates the code sequence for a GenTree node that
+// represents a bit shift or rotate operation (<<, >>, >>>, rol, ror).
+//
+// Arguments:
+//    tree - the bit shift node (that specifies the type of bit shift to perform).
+//
+// Assumptions:
+//    a) All GenTrees are register allocated.
+//
+void CodeGen::genCodeForShift(GenTreePtr tree)
+{
+    var_types   targetType = tree->TypeGet();
+    genTreeOps  oper       = tree->OperGet();
+    instruction ins        = genGetInsForOper(oper, targetType);
+    emitAttr    size       = emitTypeSize(tree);
+
+    assert(tree->gtRegNum != REG_NA);
+
+    GenTreePtr operand = tree->gtGetOp1();
+    genConsumeReg(operand);
+
+    GenTreePtr shiftBy = tree->gtGetOp2();
+    if (!shiftBy->IsCnsIntOrI())
+    {
+        genConsumeReg(shiftBy);
+        getEmitter()->emitIns_R_R_R(ins, size, tree->gtRegNum, operand->gtRegNum, shiftBy->gtRegNum);
+    }
+    else
+    {
+        unsigned immWidth   = emitter::getBitWidth(size); // immWidth will be set to 32 or 64
+        ssize_t  shiftByImm = shiftBy->gtIntCon.gtIconVal & (immWidth - 1);
+
+        getEmitter()->emitIns_R_R_I(ins, size, tree->gtRegNum, operand->gtRegNum, shiftByImm);
+    }
+
+    genProduceReg(tree);
+}
+
+// TODO-Cleanup: move to CodeGenCommon.cpp
+void CodeGen::genUnspillRegIfNeeded(GenTree* tree)
+{
+    regNumber dstReg = tree->gtRegNum;
+
+    GenTree* unspillTree = tree;
+    if (tree->gtOper == GT_RELOAD)
+    {
+        unspillTree = tree->gtOp.gtOp1;
+    }
+
+    if (unspillTree->gtFlags & GTF_SPILLED)
+    {
+        if (genIsRegCandidateLocal(unspillTree))
+        {
+            // Reset spilled flag, since we are going to load a local variable from its home location.
+            unspillTree->gtFlags &= ~GTF_SPILLED;
+
+            GenTreeLclVarCommon* lcl    = unspillTree->AsLclVarCommon();
+            LclVarDsc*           varDsc = &compiler->lvaTable[lcl->gtLclNum];
+
+            var_types   targetType = unspillTree->gtType;
+            instruction ins        = ins_Load(targetType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum));
+            emitAttr    attr       = emitTypeSize(targetType);
+            emitter*    emit       = getEmitter();
+
+            // Fixes Issue #3326
+            attr = emit->emitInsAdjustLoadStoreAttr(ins, attr);
+
+            // Load local variable from its home location.
+            inst_RV_TT(ins, dstReg, unspillTree, 0, attr);
+
+            unspillTree->SetInReg();
+
+            // TODO-Review: We would like to call:
+            //      genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(tree));
+            // instead of the following code, but this ends up hitting this assert:
+            //      assert((regSet.rsMaskVars & regMask) == 0);
+            // due to issues with LSRA resolution moves.
+            // So, just force it for now. This probably indicates a condition that creates a GC hole!
+            //
+            // Extra note: I think we really want to call something like gcInfo.gcUpdateForRegVarMove,
+            // because the variable is not really going live or dead, but that method is somewhat poorly
+            // factored because it, in turn, updates rsMaskVars which is part of RegSet not GCInfo.
+            // This code exists in other CodeGen*.cpp files.
+
+            // Don't update the variable's location if we are just re-spilling it again.
+
+            if ((unspillTree->gtFlags & GTF_SPILL) == 0)
+            {
+                genUpdateVarReg(varDsc, tree);
+#ifdef DEBUG
+                if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
+                {
+                    JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", lcl->gtLclNum);
+                }
+#endif // DEBUG
+                VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
+
+#ifdef DEBUG
+                if (compiler->verbose)
+                {
+                    printf("\t\t\t\t\t\t\tV%02u in reg ", lcl->gtLclNum);
+                    varDsc->PrintVarReg();
+                    printf(" is becoming live  ");
+                    compiler->printTreeID(unspillTree);
+                    printf("\n");
+                }
+#endif // DEBUG
+
+                regSet.AddMaskVars(genGetRegMask(varDsc));
+            }
+
+            gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
+        }
+        else if (unspillTree->IsMultiRegCall())
+        {
+            GenTreeCall*         call         = unspillTree->AsCall();
+            ReturnTypeDesc*      pRetTypeDesc = call->GetReturnTypeDesc();
+            unsigned             regCount     = pRetTypeDesc->GetReturnRegCount();
+            GenTreeCopyOrReload* reloadTree   = nullptr;
+            if (tree->OperGet() == GT_RELOAD)
+            {
+                reloadTree = tree->AsCopyOrReload();
+            }
+
+            // In case of multi-reg call node, GTF_SPILLED flag on it indicates that
+            // one or more of its result regs are spilled.  Call node needs to be
+            // queried to know which specific result regs to be unspilled.
+            for (unsigned i = 0; i < regCount; ++i)
+            {
+                unsigned flags = call->GetRegSpillFlagByIdx(i);
+                if ((flags & GTF_SPILLED) != 0)
+                {
+                    var_types dstType        = pRetTypeDesc->GetReturnRegType(i);
+                    regNumber unspillTreeReg = call->GetRegNumByIdx(i);
+
+                    if (reloadTree != nullptr)
+                    {
+                        dstReg = reloadTree->GetRegNumByIdx(i);
+                        if (dstReg == REG_NA)
+                        {
+                            dstReg = unspillTreeReg;
+                        }
+                    }
+                    else
+                    {
+                        dstReg = unspillTreeReg;
+                    }
+
+                    TempDsc* t = regSet.rsUnspillInPlace(call, unspillTreeReg, i);
+                    getEmitter()->emitIns_R_S(ins_Load(dstType), emitActualTypeSize(dstType), dstReg, t->tdTempNum(),
+                                              0);
+                    compiler->tmpRlsTemp(t);
+                    gcInfo.gcMarkRegPtrVal(dstReg, dstType);
+                }
+            }
+
+            unspillTree->gtFlags &= ~GTF_SPILLED;
+            unspillTree->SetInReg();
+        }
+        else
+        {
+            TempDsc* t = regSet.rsUnspillInPlace(unspillTree, unspillTree->gtRegNum);
+            getEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType), emitActualTypeSize(unspillTree->TypeGet()), dstReg,
+                                      t->tdTempNum(), 0);
+            compiler->tmpRlsTemp(t);
+
+            unspillTree->gtFlags &= ~GTF_SPILLED;
+            unspillTree->SetInReg();
+            gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
+        }
+    }
+}
+
+// Do Liveness update for a subnodes that is being consumed by codegen
+// including the logic for reload in case is needed and also takes care
+// of locating the value on the desired register.
+void CodeGen::genConsumeRegAndCopy(GenTree* tree, regNumber needReg)
+{
+    regNumber treeReg = genConsumeReg(tree);
+    if (treeReg != needReg)
+    {
+        var_types targetType = tree->TypeGet();
+        inst_RV_RV(ins_Copy(targetType), needReg, treeReg, targetType);
+    }
+}
+
+void CodeGen::genRegCopy(GenTree* treeNode)
+{
+    assert(treeNode->OperGet() == GT_COPY);
+
+    var_types targetType = treeNode->TypeGet();
+    regNumber targetReg  = treeNode->gtRegNum;
+    assert(targetReg != REG_NA);
+
+    GenTree* op1 = treeNode->gtOp.gtOp1;
+
+    // Check whether this node and the node from which we're copying the value have the same
+    // register type.
+    // This can happen if (currently iff) we have a SIMD vector type that fits in an integer
+    // register, in which case it is passed as an argument, or returned from a call,
+    // in an integer register and must be copied if it's in an xmm register.
+
+    if (varTypeIsFloating(treeNode) != varTypeIsFloating(op1))
+    {
+        inst_RV_RV(INS_fmov, targetReg, genConsumeReg(op1), targetType);
+    }
+    else
+    {
+        inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType);
+    }
+
+    if (op1->IsLocal())
+    {
+        // The lclVar will never be a def.
+        // If it is a last use, the lclVar will be killed by genConsumeReg(), as usual, and genProduceReg will
+        // appropriately set the gcInfo for the copied value.
+        // If not, there are two cases we need to handle:
+        // - If this is a TEMPORARY copy (indicated by the GTF_VAR_DEATH flag) the variable
+        //   will remain live in its original register.
+        //   genProduceReg() will appropriately set the gcInfo for the copied value,
+        //   and genConsumeReg will reset it.
+        // - Otherwise, we need to update register info for the lclVar.
+
+        GenTreeLclVarCommon* lcl = op1->AsLclVarCommon();
+        assert((lcl->gtFlags & GTF_VAR_DEF) == 0);
+
+        if ((lcl->gtFlags & GTF_VAR_DEATH) == 0 && (treeNode->gtFlags & GTF_VAR_DEATH) == 0)
+        {
+            LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
+
+            // If we didn't just spill it (in genConsumeReg, above), then update the register info
+            if (varDsc->lvRegNum != REG_STK)
+            {
+                // The old location is dying
+                genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1));
+
+                gcInfo.gcMarkRegSetNpt(genRegMask(op1->gtRegNum));
+
+                genUpdateVarReg(varDsc, treeNode);
+
+                // The new location is going live
+                genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode));
+            }
+        }
+    }
+    genProduceReg(treeNode);
+}
+
+// Do liveness update for a subnode that is being consumed by codegen.
+// TODO-Cleanup: move to CodeGenCommon.cpp
+regNumber CodeGen::genConsumeReg(GenTree* tree)
+{
+    if (tree->OperGet() == GT_COPY)
+    {
+        genRegCopy(tree);
+    }
+    // Handle the case where we have a lclVar that needs to be copied before use (i.e. because it
+    // interferes with one of the other sources (or the target, if it's a "delayed use" register)).
+    // TODO-Cleanup: This is a special copyReg case in LSRA - consider eliminating these and
+    // always using GT_COPY to make the lclVar location explicit.
+    // Note that we have to do this before calling genUpdateLife because otherwise if we spill it
+    // the lvRegNum will be set to REG_STK and we will lose track of what register currently holds
+    // the lclVar (normally when a lclVar is spilled it is then used from its former register
+    // location, which matches the gtRegNum on the node).
+    // (Note that it doesn't matter if we call this before or after genUnspillRegIfNeeded
+    // because if it's on the stack it will always get reloaded into tree->gtRegNum).
+    if (genIsRegCandidateLocal(tree))
+    {
+        GenTreeLclVarCommon* lcl    = tree->AsLclVarCommon();
+        LclVarDsc*           varDsc = &compiler->lvaTable[lcl->GetLclNum()];
+        if ((varDsc->lvRegNum != REG_STK) && (varDsc->lvRegNum != tree->gtRegNum))
+        {
+            inst_RV_RV(ins_Copy(tree->TypeGet()), tree->gtRegNum, varDsc->lvRegNum);
+        }
+    }
+
+    genUnspillRegIfNeeded(tree);
+
+    // genUpdateLife() will also spill local var if marked as GTF_SPILL by calling CodeGen::genSpillVar
+    genUpdateLife(tree);
+    assert(tree->gtRegNum != REG_NA);
+
+    // there are three cases where consuming a reg means clearing the bit in the live mask
+    // 1. it was not produced by a local
+    // 2. it was produced by a local that is going dead
+    // 3. it was produced by a local that does not live in that reg (like one allocated on the stack)
+
+    if (genIsRegCandidateLocal(tree))
+    {
+        GenTreeLclVarCommon* lcl    = tree->AsLclVarCommon();
+        LclVarDsc*           varDsc = &compiler->lvaTable[lcl->GetLclNum()];
+        assert(varDsc->lvLRACandidate);
+
+        if ((tree->gtFlags & GTF_VAR_DEATH) != 0)
+        {
+            gcInfo.gcMarkRegSetNpt(genRegMask(varDsc->lvRegNum));
+        }
+        else if (varDsc->lvRegNum == REG_STK)
+        {
+            // We have loaded this into a register only temporarily
+            gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
+        }
+    }
+    else
+    {
+        gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
+    }
+
+    return tree->gtRegNum;
+}
+
+// Do liveness update for an address tree: one of GT_LEA, GT_LCL_VAR, or GT_CNS_INT (for call indirect).
+// TODO-Cleanup: move to CodeGenCommon.cpp
+void CodeGen::genConsumeAddress(GenTree* addr)
+{
+    if (addr->OperGet() == GT_LEA)
+    {
+        genConsumeAddrMode(addr->AsAddrMode());
+    }
+    else if (!addr->isContained())
+    {
+        genConsumeReg(addr);
+    }
+}
+
+// do liveness update for a subnode that is being consumed by codegen
+// TODO-Cleanup: move to CodeGenCommon.cpp
+void CodeGen::genConsumeAddrMode(GenTreeAddrMode* addr)
+{
+    if (addr->Base())
+        genConsumeReg(addr->Base());
+    if (addr->Index())
+        genConsumeReg(addr->Index());
+}
+
+// TODO-Cleanup: move to CodeGenCommon.cpp
+void CodeGen::genConsumeRegs(GenTree* tree)
+{
+    if (tree->isContained())
+    {
+        if (tree->isIndir())
+        {
+            genConsumeAddress(tree->AsIndir()->Addr());
+        }
+        else if (tree->OperGet() == GT_AND)
+        {
+            // This is the special contained GT_AND that we created in Lowering::LowerCmp()
+            // Now we need to consume the operands of the GT_AND node.
+            genConsumeOperands(tree->AsOp());
+        }
+        else
+        {
+            assert(tree->OperIsLeaf());
+        }
+    }
+    else
+    {
+        genConsumeReg(tree);
+    }
+}
+
+//------------------------------------------------------------------------
+// genConsumeOperands: Do liveness update for the operands of a unary or binary tree
+//
+// Arguments:
+//    tree - the GenTreeOp whose operands will have their liveness updated.
+//
+// Return Value:
+//    None.
+//
+// Notes:
+//    Note that this logic is localized here because we must do the liveness update in
+//    the correct execution order.  This is important because we may have two operands
+//    that involve the same lclVar, and if one is marked "lastUse" we must handle it
+//    after the first.
+// TODO-Cleanup: move to CodeGenCommon.cpp
+
+void CodeGen::genConsumeOperands(GenTreeOp* tree)
+{
+    GenTree* firstOp  = tree->gtOp1;
+    GenTree* secondOp = tree->gtOp2;
+    if ((tree->gtFlags & GTF_REVERSE_OPS) != 0)
+    {
+        assert(secondOp != nullptr);
+        firstOp  = secondOp;
+        secondOp = tree->gtOp1;
+    }
+    if (firstOp != nullptr)
+    {
+        genConsumeRegs(firstOp);
+    }
+    if (secondOp != nullptr)
+    {
+        genConsumeRegs(secondOp);
+    }
+}
+
+// do liveness update for register produced by the current node in codegen
+// TODO-Cleanup: move to CodeGenCommon.cpp
+void CodeGen::genProduceReg(GenTree* tree)
+{
+    if (tree->gtFlags & GTF_SPILL)
+    {
+        if (genIsRegCandidateLocal(tree))
+        {
+            // Store local variable to its home location.
+            tree->gtFlags &= ~GTF_REG_VAL;
+            inst_TT_RV(ins_Store(tree->gtType, compiler->isSIMDTypeLocalAligned(tree->gtLclVarCommon.gtLclNum)), tree,
+                       tree->gtRegNum);
+        }
+        else
+        {
+            tree->SetInReg();
+            regSet.rsSpillTree(tree->gtRegNum, tree);
+            tree->gtFlags |= GTF_SPILLED;
+            tree->gtFlags &= ~GTF_SPILL;
+            gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
+            return;
+        }
+    }
+
+    genUpdateLife(tree);
+
+    // If we've produced a register, mark it as a pointer, as needed.
+    if (tree->gtHasReg())
+    {
+        // We only mark the register in the following cases:
+        // 1. It is not a register candidate local. In this case, we're producing a
+        //    register from a local, but the local is not a register candidate. Thus,
+        //    we must be loading it as a temp register, and any "last use" flag on
+        //    the register wouldn't be relevant.
+        // 2. The register candidate local is going dead. There's no point to mark
+        //    the register as live, with a GC pointer, if the variable is dead.
+        if (!genIsRegCandidateLocal(tree) || ((tree->gtFlags & GTF_VAR_DEATH) == 0))
+        {
+            gcInfo.gcMarkRegPtrVal(tree->gtRegNum, tree->TypeGet());
+        }
+    }
+    tree->SetInReg();
+}
+
+// transfer gc/byref status of src reg to dst reg
+// TODO-Cleanup: move to CodeGenCommon.cpp
+void CodeGen::genTransferRegGCState(regNumber dst, regNumber src)
+{
+    regMaskTP srcMask = genRegMask(src);
+    regMaskTP dstMask = genRegMask(dst);
+
+    if (gcInfo.gcRegGCrefSetCur & srcMask)
+    {
+        gcInfo.gcMarkRegSetGCref(dstMask);
+    }
+    else if (gcInfo.gcRegByrefSetCur & srcMask)
+    {
+        gcInfo.gcMarkRegSetByref(dstMask);
+    }
+    else
+    {
+        gcInfo.gcMarkRegSetNpt(dstMask);
+    }
+}
+
+// generates an ip-relative call or indirect call via reg ('call reg')
+//     pass in 'addr' for a relative call or 'base' for a indirect register call
+//     methHnd - optional, only used for pretty printing
+//     retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC)
+// TODO-Cleanup: move to CodeGenCommon.cpp
+void CodeGen::genEmitCall(int                   callType,
+                          CORINFO_METHOD_HANDLE methHnd,
+                          INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) void* addr,
+                          emitAttr                                               retSize,
+                          emitAttr                                               secondRetSize,
+                          IL_OFFSETX                                             ilOffset,
+                          regNumber                                              base,
+                          bool                                                   isJump,
+                          bool                                                   isNoGC)
+{
+
+    getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, 0,
+                               retSize, secondRetSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+                               gcInfo.gcRegByrefSetCur, ilOffset, base, REG_NA, 0, 0, isJump,
+                               emitter::emitNoGChelper(compiler->eeGetHelperNum(methHnd)));
+}
+
+// generates an indirect call via addressing mode (call []) given an indir node
+//     methHnd - optional, only used for pretty printing
+//     retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC)
+// TODO-Cleanup: move to CodeGenCommon.cpp
+void CodeGen::genEmitCall(int                   callType,
+                          CORINFO_METHOD_HANDLE methHnd,
+                          INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) GenTreeIndir* indir,
+                          emitAttr                                                       retSize,
+                          emitAttr                                                       secondRetSize,
+                          IL_OFFSETX                                                     ilOffset)
+{
+    genConsumeAddress(indir->Addr());
+
+    getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) nullptr, 0,
+                               retSize, secondRetSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+                               gcInfo.gcRegByrefSetCur, ilOffset, indir->Base() ? indir->Base()->gtRegNum : REG_NA,
+                               indir->Index() ? indir->Index()->gtRegNum : REG_NA, indir->Scale(), indir->Offset());
+}
+
+// Produce code for a GT_CALL node
+void CodeGen::genCallInstruction(GenTreePtr node)
+{
+    GenTreeCall* call = node->AsCall();
+
+    assert(call->gtOper == GT_CALL);
+
+    gtCallTypes callType = (gtCallTypes)call->gtCallType;
+
+    IL_OFFSETX ilOffset = BAD_IL_OFFSET;
+
+    // all virtuals should have been expanded into a control expression
+    assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr);
+
+    // Consume all the arg regs
+    for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+    {
+        assert(list->IsList());
+
+        GenTreePtr argNode = list->Current();
+
+        fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode->gtSkipReloadOrCopy());
+        assert(curArgTabEntry);
+
+        if (curArgTabEntry->regNum == REG_STK)
+            continue;
+
+        // Deal with multi register passed struct args.
+        if (argNode->OperGet() == GT_LIST)
+        {
+            GenTreeArgList* argListPtr   = argNode->AsArgList();
+            unsigned        iterationNum = 0;
+            regNumber       argReg       = curArgTabEntry->regNum;
+            for (; argListPtr != nullptr; argListPtr = argListPtr->Rest(), iterationNum++)
+            {
+                GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1;
+                assert(putArgRegNode->gtOper == GT_PUTARG_REG);
+
+                genConsumeReg(putArgRegNode);
+
+                if (putArgRegNode->gtRegNum != argReg)
+                {
+                    inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), putArgRegNode->InReg()), argReg,
+                               putArgRegNode->gtRegNum);
+                }
+
+                argReg = genRegArgNext(argReg);
+            }
+        }
+        else
+        {
+            regNumber argReg = curArgTabEntry->regNum;
+            genConsumeReg(argNode);
+            if (argNode->gtRegNum != argReg)
+            {
+                inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum);
+            }
+        }
+
+        // In the case of a varargs call,
+        // the ABI dictates that if we have floating point args,
+        // we must pass the enregistered arguments in both the
+        // integer and floating point registers so, let's do that.
+        if (call->IsVarargs() && varTypeIsFloating(argNode))
+        {
+            NYI_ARM64("CodeGen - IsVarargs");
+        }
+    }
+
+    // Insert a null check on "this" pointer if asked.
+    if (call->NeedsNullCheck())
+    {
+        const regNumber regThis = genGetThisArgReg(call);
+        getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_ZR, regThis, 0);
+    }
+
+    // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper method.
+    CORINFO_METHOD_HANDLE methHnd;
+    GenTree*              target = call->gtControlExpr;
+    if (callType == CT_INDIRECT)
+    {
+        assert(target == nullptr);
+        target  = call->gtCall.gtCallAddr;
+        methHnd = nullptr;
+    }
+    else
+    {
+        methHnd = call->gtCallMethHnd;
+    }
+
+    CORINFO_SIG_INFO* sigInfo = nullptr;
+#ifdef DEBUG
+    // Pass the call signature information down into the emitter so the emitter can associate
+    // native call sites with the signatures they were generated from.
+    if (callType != CT_HELPER)
+    {
+        sigInfo = call->callSig;
+    }
+#endif // DEBUG
+
+    // If fast tail call, then we are done.  In this case we setup the args (both reg args
+    // and stack args in incoming arg area) and call target in IP0.  Epilog sequence would
+    // generate "br IP0".
+    if (call->IsFastTailCall())
+    {
+        // Don't support fast tail calling JIT helpers
+        assert(callType != CT_HELPER);
+
+        // Fast tail calls materialize call target either in gtControlExpr or in gtCallAddr.
+        assert(target != nullptr);
+
+        genConsumeReg(target);
+
+        if (target->gtRegNum != REG_IP0)
+        {
+            inst_RV_RV(INS_mov, REG_IP0, target->gtRegNum);
+        }
+        return;
+    }
+
+    // For a pinvoke to unmanged code we emit a label to clear
+    // the GC pointer state before the callsite.
+    // We can't utilize the typical lazy killing of GC pointers
+    // at (or inside) the callsite.
+    if (call->IsUnmanaged())
+    {
+        genDefineTempLabel(genCreateTempLabel());
+    }
+
+    // Determine return value size(s).
+    ReturnTypeDesc* pRetTypeDesc  = call->GetReturnTypeDesc();
+    emitAttr        retSize       = EA_PTRSIZE;
+    emitAttr        secondRetSize = EA_UNKNOWN;
+
+    if (call->HasMultiRegRetVal())
+    {
+        retSize       = emitTypeSize(pRetTypeDesc->GetReturnRegType(0));
+        secondRetSize = emitTypeSize(pRetTypeDesc->GetReturnRegType(1));
+    }
+    else
+    {
+        assert(!varTypeIsStruct(call));
+
+        if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY)
+        {
+            retSize = EA_GCREF;
+        }
+        else if (call->gtType == TYP_BYREF)
+        {
+            retSize = EA_BYREF;
+        }
+    }
+
+#ifdef DEBUGGING_SUPPORT
+    // We need to propagate the IL offset information to the call instruction, so we can emit
+    // an IL to native mapping record for the call, to support managed return value debugging.
+    // We don't want tail call helper calls that were converted from normal calls to get a record,
+    // so we skip this hash table lookup logic in that case.
+    if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != nullptr && !call->IsTailCall())
+    {
+        (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
+    }
+#endif // DEBUGGING_SUPPORT
+
+    if (target != nullptr)
+    {
+        // For Arm64 a call target can not be a contained indirection
+        assert(!target->isContainedIndir());
+
+        // We have already generated code for gtControlExpr evaluating it into a register.
+        // We just need to emit "call reg" in this case.
+        //
+        assert(genIsValidIntReg(target->gtRegNum));
+
+        genEmitCall(emitter::EC_INDIR_R, methHnd,
+                    INDEBUG_LDISASM_COMMA(sigInfo) nullptr, // addr
+                    retSize, secondRetSize, ilOffset, genConsumeReg(target));
+    }
+    else
+    {
+        // Generate a direct call to a non-virtual user defined or helper method
+        assert(callType == CT_HELPER || callType == CT_USER_FUNC);
+
+        void* addr = nullptr;
+        if (callType == CT_HELPER)
+        {
+            // Direct call to a helper method.
+            CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd);
+            noway_assert(helperNum != CORINFO_HELP_UNDEF);
+
+            void* pAddr = nullptr;
+            addr        = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
+
+            if (addr == nullptr)
+            {
+                addr = pAddr;
+            }
+        }
+        else
+        {
+            // Direct call to a non-virtual user function.
+            CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY;
+            if (call->IsSameThis())
+            {
+                aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS);
+            }
+
+            if ((call->NeedsNullCheck()) == 0)
+            {
+                aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL);
+            }
+
+            CORINFO_CONST_LOOKUP addrInfo;
+            compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo, aflags);
+
+            addr = addrInfo.addr;
+        }
+#if 0
+        // Use this path if you want to load an absolute call target using 
+        //  a sequence of movs followed by an indirect call (blr instruction)
+
+        // Load the call target address in x16
+        instGen_Set_Reg_To_Imm(EA_8BYTE, REG_IP0, (ssize_t) addr);
+
+        // indirect call to constant address in IP0
+        genEmitCall(emitter::EC_INDIR_R,
+                    methHnd, 
+                    INDEBUG_LDISASM_COMMA(sigInfo)
+                    nullptr, //addr
+                    retSize,
+                    secondRetSize,
+                    ilOffset,
+                    REG_IP0);
+#else
+        // Non-virtual direct call to known addresses
+        genEmitCall(emitter::EC_FUNC_TOKEN, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, retSize, secondRetSize,
+                    ilOffset);
+#endif
+    }
+
+    // if it was a pinvoke we may have needed to get the address of a label
+    if (genPendingCallLabel)
+    {
+        assert(call->IsUnmanaged());
+        genDefineTempLabel(genPendingCallLabel);
+        genPendingCallLabel = nullptr;
+    }
+
+    // Update GC info:
+    // All Callee arg registers are trashed and no longer contain any GC pointers.
+    // TODO-ARM64-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here?
+    // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other
+    // registers from RBM_CALLEE_TRASH
+    assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
+    assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
+    gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS;
+    gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS;
+
+    var_types returnType = call->TypeGet();
+    if (returnType != TYP_VOID)
+    {
+        regNumber returnReg;
+
+        if (call->HasMultiRegRetVal())
+        {
+            assert(pRetTypeDesc != nullptr);
+            unsigned regCount = pRetTypeDesc->GetReturnRegCount();
+
+            // If regs allocated to call node are different from ABI return
+            // regs in which the call has returned its result, move the result
+            // to regs allocated to call node.
+            for (unsigned i = 0; i < regCount; ++i)
+            {
+                var_types regType      = pRetTypeDesc->GetReturnRegType(i);
+                returnReg              = pRetTypeDesc->GetABIReturnReg(i);
+                regNumber allocatedReg = call->GetRegNumByIdx(i);
+                if (returnReg != allocatedReg)
+                {
+                    inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType);
+                }
+            }
+        }
+        else
+        {
+            if (varTypeIsFloating(returnType))
+            {
+                returnReg = REG_FLOATRET;
+            }
+            else
+            {
+                returnReg = REG_INTRET;
+            }
+
+            if (call->gtRegNum != returnReg)
+            {
+                inst_RV_RV(ins_Copy(returnType), call->gtRegNum, returnReg, returnType);
+            }
+        }
+
+        genProduceReg(call);
+    }
+
+    // If there is nothing next, that means the result is thrown away, so this value is not live.
+    // However, for minopts or debuggable code, we keep it live to support managed return value debugging.
+    if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode)
+    {
+        gcInfo.gcMarkRegSetNpt(RBM_INTRET);
+    }
+}
+
+// Produce code for a GT_JMP node.
+// The arguments of the caller needs to be transferred to the callee before exiting caller.
+// The actual jump to callee is generated as part of caller epilog sequence.
+// Therefore the codegen of GT_JMP is to ensure that the callee arguments are correctly setup.
+void CodeGen::genJmpMethod(GenTreePtr jmp)
+{
+    assert(jmp->OperGet() == GT_JMP);
+    assert(compiler->compJmpOpUsed);
+
+    // If no arguments, nothing to do
+    if (compiler->info.compArgsCount == 0)
+    {
+        return;
+    }
+
+    // Make sure register arguments are in their initial registers
+    // and stack arguments are put back as well.
+    unsigned   varNum;
+    LclVarDsc* varDsc;
+
+    // First move any en-registered stack arguments back to the stack.
+    // At the same time any reg arg not in correct reg is moved back to its stack location.
+    //
+    // We are not strictly required to spill reg args that are not in the desired reg for a jmp call
+    // But that would require us to deal with circularity while moving values around.  Spilling
+    // to stack makes the implementation simple, which is not a bad trade off given Jmp calls
+    // are not frequent.
+    for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
+    {
+        varDsc = compiler->lvaTable + varNum;
+
+        if (varDsc->lvPromoted)
+        {
+            noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+
+            unsigned fieldVarNum = varDsc->lvFieldLclStart;
+            varDsc               = compiler->lvaTable + fieldVarNum;
+        }
+        noway_assert(varDsc->lvIsParam);
+
+        if (varDsc->lvIsRegArg && (varDsc->lvRegNum != REG_STK))
+        {
+            // Skip reg args which are already in its right register for jmp call.
+            // If not, we will spill such args to their stack locations.
+            //
+            // If we need to generate a tail call profiler hook, then spill all
+            // arg regs to free them up for the callback.
+            if (!compiler->compIsProfilerHookNeeded() && (varDsc->lvRegNum == varDsc->lvArgReg))
+                continue;
+        }
+        else if (varDsc->lvRegNum == REG_STK)
+        {
+            // Skip args which are currently living in stack.
+            continue;
+        }
+
+        // If we came here it means either a reg argument not in the right register or
+        // a stack argument currently living in a register.  In either case the following
+        // assert should hold.
+        assert(varDsc->lvRegNum != REG_STK);
+        assert(varDsc->TypeGet() != TYP_STRUCT);
+        var_types storeType = genActualType(varDsc->TypeGet());
+        emitAttr  storeSize = emitActualTypeSize(storeType);
+
+        getEmitter()->emitIns_S_R(ins_Store(storeType), storeSize, varDsc->lvRegNum, varNum, 0);
+
+        // Update lvRegNum life and GC info to indicate lvRegNum is dead and varDsc stack slot is going live.
+        // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
+        // Therefore manually update life of varDsc->lvRegNum.
+        regMaskTP tempMask = genRegMask(varDsc->lvRegNum);
+        regSet.RemoveMaskVars(tempMask);
+        gcInfo.gcMarkRegSetNpt(tempMask);
+        if (compiler->lvaIsGCTracked(varDsc))
+        {
+            VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varNum);
+        }
+    }
+
+#ifdef PROFILING_SUPPORTED
+    // At this point all arg regs are free.
+    // Emit tail call profiler callback.
+    genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
+#endif
+
+    // Next move any un-enregistered register arguments back to their register.
+    regMaskTP fixedIntArgMask = RBM_NONE;    // tracks the int arg regs occupying fixed args in case of a vararg method.
+    unsigned  firstArgVarNum  = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method.
+    for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
+    {
+        varDsc = compiler->lvaTable + varNum;
+        if (varDsc->lvPromoted)
+        {
+            noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+
+            unsigned fieldVarNum = varDsc->lvFieldLclStart;
+            varDsc               = compiler->lvaTable + fieldVarNum;
+        }
+        noway_assert(varDsc->lvIsParam);
+
+        // Skip if arg not passed in a register.
+        if (!varDsc->lvIsRegArg)
+            continue;
+
+        // Register argument
+        noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
+
+        // Is register argument already in the right register?
+        // If not load it from its stack location.
+        regNumber argReg     = varDsc->lvArgReg; // incoming arg register
+        regNumber argRegNext = REG_NA;
+
+        if (varDsc->lvRegNum != argReg)
+        {
+            var_types loadType = TYP_UNDEF;
+            if (varTypeIsStruct(varDsc))
+            {
+                // Must be <= 16 bytes or else it wouldn't be passed in registers
+                noway_assert(EA_SIZE_IN_BYTES(varDsc->lvSize()) <= MAX_PASS_MULTIREG_BYTES);
+                loadType = compiler->getJitGCType(varDsc->lvGcLayout[0]);
+            }
+            else
+            {
+                loadType = compiler->mangleVarArgsType(genActualType(varDsc->TypeGet()));
+            }
+            emitAttr loadSize = emitActualTypeSize(loadType);
+            getEmitter()->emitIns_R_S(ins_Load(loadType), loadSize, argReg, varNum, 0);
+
+            // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
+            // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
+            // Therefore manually update life of argReg.  Note that GT_JMP marks the end of the basic block
+            // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
+            regSet.AddMaskVars(genRegMask(argReg));
+            gcInfo.gcMarkRegPtrVal(argReg, loadType);
+
+            if (compiler->lvaIsMultiregStruct(varDsc))
+            {
+                if (varDsc->lvIsHfa())
+                {
+                    NYI_ARM64("CodeGen::genJmpMethod with multireg HFA arg");
+                }
+
+                // Restore the second register.
+                argRegNext = genRegArgNext(argReg);
+
+                loadType = compiler->getJitGCType(varDsc->lvGcLayout[1]);
+                loadSize = emitActualTypeSize(loadType);
+                getEmitter()->emitIns_R_S(ins_Load(loadType), loadSize, argRegNext, varNum, TARGET_POINTER_SIZE);
+
+                regSet.AddMaskVars(genRegMask(argRegNext));
+                gcInfo.gcMarkRegPtrVal(argRegNext, loadType);
+            }
+
+            if (compiler->lvaIsGCTracked(varDsc))
+            {
+                VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varNum);
+            }
+        }
+
+        // In case of a jmp call to a vararg method ensure only integer registers are passed.
+        if (compiler->info.compIsVarArgs)
+        {
+            assert((genRegMask(argReg) & RBM_ARG_REGS) != RBM_NONE);
+
+            fixedIntArgMask |= genRegMask(argReg);
+
+            if (compiler->lvaIsMultiregStruct(varDsc))
+            {
+                assert(argRegNext != REG_NA);
+                fixedIntArgMask |= genRegMask(argRegNext);
+            }
+
+            if (argReg == REG_ARG_0)
+            {
+                assert(firstArgVarNum == BAD_VAR_NUM);
+                firstArgVarNum = varNum;
+            }
+        }
+    }
+
+    // Jmp call to a vararg method - if the method has fewer than 8 fixed arguments,
+    // load the remaining integer arg registers from the corresponding
+    // shadow stack slots.  This is for the reason that we don't know the number and type
+    // of non-fixed params passed by the caller, therefore we have to assume the worst case
+    // of caller passing all 8 integer arg regs.
+    //
+    // The caller could have passed gc-ref/byref type var args.  Since these are var args
+    // the callee no way of knowing their gc-ness.  Therefore, mark the region that loads
+    // remaining arg registers from shadow stack slots as non-gc interruptible.
+    if (fixedIntArgMask != RBM_NONE)
+    {
+        assert(compiler->info.compIsVarArgs);
+        assert(firstArgVarNum != BAD_VAR_NUM);
+
+        regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask;
+        if (remainingIntArgMask != RBM_NONE)
+        {
+            getEmitter()->emitDisableGC();
+            for (int argNum = 0, argOffset = 0; argNum < MAX_REG_ARG; ++argNum)
+            {
+                regNumber argReg     = intArgRegs[argNum];
+                regMaskTP argRegMask = genRegMask(argReg);
+
+                if ((remainingIntArgMask & argRegMask) != 0)
+                {
+                    remainingIntArgMask &= ~argRegMask;
+                    getEmitter()->emitIns_R_S(INS_ldr, EA_8BYTE, argReg, firstArgVarNum, argOffset);
+                }
+
+                argOffset += REGSIZE_BYTES;
+            }
+            getEmitter()->emitEnableGC();
+        }
+    }
+}
+
+// produce code for a GT_LEA subnode
+void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
+{
+    genConsumeOperands(lea);
+    emitter* emit   = getEmitter();
+    emitAttr size   = emitTypeSize(lea);
+    unsigned offset = lea->gtOffset;
+
+    // In ARM64 we can only load addresses of the form:
+    //
+    // [Base + index*scale]
+    // [Base + Offset]
+    // [Literal] (PC-Relative)
+    //
+    // So for the case of a LEA node of the form [Base + Index*Scale + Offset] we will generate:
+    // destReg = baseReg + indexReg * scale;
+    // destReg = destReg + offset;
+    //
+    // TODO-ARM64-CQ: The purpose of the GT_LEA node is to directly reflect a single target architecture
+    //             addressing mode instruction.  Currently we're 'cheating' by producing one or more
+    //             instructions to generate the addressing mode so we need to modify lowering to
+    //             produce LEAs that are a 1:1 relationship to the ARM64 architecture.
+    if (lea->Base() && lea->Index())
+    {
+        GenTree* memBase = lea->Base();
+        GenTree* index   = lea->Index();
+        unsigned offset  = lea->gtOffset;
+
+        DWORD lsl;
+
+        assert(isPow2(lea->gtScale));
+        BitScanForward(&lsl, lea->gtScale);
+
+        assert(lsl <= 4);
+
+        if (offset != 0)
+        {
+            regMaskTP tmpRegMask = lea->gtRsvdRegs;
+            regNumber tmpReg     = genRegNumFromMask(tmpRegMask);
+            noway_assert(tmpReg != REG_NA);
+
+            if (emitter::emitIns_valid_imm_for_add(offset, EA_8BYTE))
+            {
+                if (lsl > 0)
+                {
+                    // Generate code to set tmpReg = base + index*scale
+                    emit->emitIns_R_R_R_I(INS_add, EA_PTRSIZE, tmpReg, memBase->gtRegNum, index->gtRegNum, lsl,
+                                          INS_OPTS_LSL);
+                }
+                else // no scale
+                {
+                    // Generate code to set tmpReg = base + index
+                    emit->emitIns_R_R_R(INS_add, EA_PTRSIZE, tmpReg, memBase->gtRegNum, index->gtRegNum);
+                }
+
+                // Then compute target reg from [tmpReg + offset]
+                emit->emitIns_R_R_I(INS_add, size, lea->gtRegNum, tmpReg, offset);
+                ;
+            }
+            else // large offset
+            {
+                // First load/store tmpReg with the large offset constant
+                instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset);
+                // Then add the base register
+                //      rd = rd + base
+                emit->emitIns_R_R_R(INS_add, EA_PTRSIZE, tmpReg, tmpReg, memBase->gtRegNum);
+
+                noway_assert(tmpReg != index->gtRegNum);
+
+                // Then compute target reg from [tmpReg + index*scale]
+                emit->emitIns_R_R_R_I(INS_add, size, lea->gtRegNum, tmpReg, index->gtRegNum, lsl, INS_OPTS_LSL);
+            }
+        }
+        else
+        {
+            if (lsl > 0)
+            {
+                // Then compute target reg from [base + index*scale]
+                emit->emitIns_R_R_R_I(INS_add, size, lea->gtRegNum, memBase->gtRegNum, index->gtRegNum, lsl,
+                                      INS_OPTS_LSL);
+            }
+            else
+            {
+                // Then compute target reg from [base + index]
+                emit->emitIns_R_R_R(INS_add, size, lea->gtRegNum, memBase->gtRegNum, index->gtRegNum);
+            }
+        }
+    }
+    else if (lea->Base())
+    {
+        GenTree* memBase = lea->Base();
+
+        if (emitter::emitIns_valid_imm_for_add(offset, EA_8BYTE))
+        {
+            if (offset != 0)
+            {
+                // Then compute target reg from [memBase + offset]
+                emit->emitIns_R_R_I(INS_add, size, lea->gtRegNum, memBase->gtRegNum, offset);
+            }
+            else // offset is zero
+            {
+                emit->emitIns_R_R(INS_mov, size, lea->gtRegNum, memBase->gtRegNum);
+            }
+        }
+        else
+        {
+            // We require a tmpReg to hold the offset
+            regMaskTP tmpRegMask = lea->gtRsvdRegs;
+            regNumber tmpReg     = genRegNumFromMask(tmpRegMask);
+            noway_assert(tmpReg != REG_NA);
+
+            // First load tmpReg with the large offset constant
+            instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset);
+
+            // Then compute target reg from [memBase + tmpReg]
+            emit->emitIns_R_R_R(INS_add, size, lea->gtRegNum, memBase->gtRegNum, tmpReg);
+        }
+    }
+    else if (lea->Index())
+    {
+        // If we encounter a GT_LEA node without a base it means it came out
+        // when attempting to optimize an arbitrary arithmetic expression during lower.
+        // This is currently disabled in ARM64 since we need to adjust lower to account
+        // for the simpler instructions ARM64 supports.
+        // TODO-ARM64-CQ:  Fix this and let LEA optimize arithmetic trees too.
+        assert(!"We shouldn't see a baseless address computation during CodeGen for ARM64");
+    }
+
+    genProduceReg(lea);
+}
+
+//-------------------------------------------------------------------------------------------
+// genJumpKindsForTree:  Determine the number and kinds of conditional branches
+//                       necessary to implement the given GT_CMP node
+//
+// Arguments:
+//   cmpTree           - (input) The GenTree node that is used to set the Condition codes
+//                     - The GenTree Relop node that was used to set the Condition codes
+//   jmpKind[2]        - (output) One or two conditional branch instructions
+//   jmpToTrueLabel[2] - (output) On Arm64 both branches will always branch to the true label
+//
+// Return Value:
+//    Sets the proper values into the array elements of jmpKind[] and jmpToTrueLabel[]
+//
+// Assumptions:
+//    At least one conditional branch instruction will be returned.
+//    Typically only one conditional branch is needed
+//     and the second jmpKind[] value is set to EJ_NONE
+//-------------------------------------------------------------------------------------------
+
+// static
+void CodeGen::genJumpKindsForTree(GenTreePtr cmpTree, emitJumpKind jmpKind[2], bool jmpToTrueLabel[2])
+{
+    // On Arm64 both branches will always branch to the true label
+    jmpToTrueLabel[0] = true;
+    jmpToTrueLabel[1] = true;
+
+    // For integer comparisons just use genJumpKindForOper
+    if (!varTypeIsFloating(cmpTree->gtOp.gtOp1->gtEffectiveVal()))
+    {
+        CompareKind compareKind = ((cmpTree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
+        jmpKind[0]              = genJumpKindForOper(cmpTree->gtOper, compareKind);
+        jmpKind[1]              = EJ_NONE;
+    }
+    else // We have a Floating Point Compare operation
+    {
+        assert(cmpTree->OperIsCompare());
+
+        // For details on this mapping, see the ARM64 Condition Code
+        // table at section C1.2.3 in the ARMV8 architecture manual
+        //
+
+        // We must check the GTF_RELOP_NAN_UN to find out
+        // if we need to branch when we have a NaN operand.
+        //
+        if ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) != 0)
+        {
+            // Must branch if we have an NaN, unordered
+            switch (cmpTree->gtOper)
+            {
+                case GT_EQ:
+                    jmpKind[0] = EJ_eq; // branch or set when equal (and no NaN's)
+                    jmpKind[1] = EJ_vs; // branch or set when we have a NaN
+                    break;
+
+                case GT_NE:
+                    jmpKind[0] = EJ_ne; // branch or set when not equal (or have NaN's)
+                    jmpKind[1] = EJ_NONE;
+                    break;
+
+                case GT_LT:
+                    jmpKind[0] = EJ_lt; // branch or set when less than (or have NaN's)
+                    jmpKind[1] = EJ_NONE;
+                    break;
+
+                case GT_LE:
+                    jmpKind[0] = EJ_le; // branch or set when less than or equal (or have NaN's)
+                    jmpKind[1] = EJ_NONE;
+                    break;
+
+                case GT_GT:
+                    jmpKind[0] = EJ_hi; // branch or set when greater than (or have NaN's)
+                    jmpKind[1] = EJ_NONE;
+                    break;
+
+                case GT_GE:
+                    jmpKind[0] = EJ_hs; // branch or set when greater than or equal (or have NaN's)
+                    jmpKind[1] = EJ_NONE;
+                    break;
+
+                default:
+                    unreached();
+            }
+        }
+        else // ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) == 0)
+        {
+            // Do not branch if we have an NaN, unordered
+            switch (cmpTree->gtOper)
+            {
+                case GT_EQ:
+                    jmpKind[0] = EJ_eq; // branch or set when equal (and no NaN's)
+                    jmpKind[1] = EJ_NONE;
+                    break;
+
+                case GT_NE:
+                    jmpKind[0] = EJ_gt; // branch or set when greater than (and no NaN's)
+                    jmpKind[1] = EJ_lo; // branch or set when less than (and no NaN's)
+                    break;
+
+                case GT_LT:
+                    jmpKind[0] = EJ_lo; // branch or set when less than (and no NaN's)
+                    jmpKind[1] = EJ_NONE;
+                    break;
+
+                case GT_LE:
+                    jmpKind[0] = EJ_ls; // branch or set when less than or equal (and no NaN's)
+                    jmpKind[1] = EJ_NONE;
+                    break;
+
+                case GT_GT:
+                    jmpKind[0] = EJ_gt; // branch or set when greater than (and no NaN's)
+                    jmpKind[1] = EJ_NONE;
+                    break;
+
+                case GT_GE:
+                    jmpKind[0] = EJ_ge; // branch or set when greater than or equal (and no NaN's)
+                    jmpKind[1] = EJ_NONE;
+                    break;
+
+                default:
+                    unreached();
+            }
+        }
+    }
+}
+
+//-------------------------------------------------------------------------------------------
+// genSetRegToCond:  Set a register 'dstReg' to the appropriate one or zero value
+//                   corresponding to a binary Relational operator result.
+//
+// Arguments:
+//   dstReg          - The target register to set to 1 or 0
+//   tree            - The GenTree Relop node that was used to set the Condition codes
+//
+// Return Value:     none
+//
+// Notes:
+//    A full 64-bit value of either 1 or 0 is setup in the 'dstReg'
+//-------------------------------------------------------------------------------------------
+
+void CodeGen::genSetRegToCond(regNumber dstReg, GenTreePtr tree)
+{
+    emitJumpKind jumpKind[2];
+    bool         branchToTrueLabel[2];
+    genJumpKindsForTree(tree, jumpKind, branchToTrueLabel);
+    assert(jumpKind[0] != EJ_NONE);
+
+    // Set the reg according to the flags
+    inst_SET(jumpKind[0], dstReg);
+
+    // Do we need to use two operation to set the flags?
+    //
+    if (jumpKind[1] != EJ_NONE)
+    {
+        emitter* emit    = getEmitter();
+        bool     ordered = ((tree->gtFlags & GTF_RELOP_NAN_UN) == 0);
+        insCond  secondCond;
+
+        // The only ones that require two operations are the
+        // floating point compare operations of BEQ or BNE.UN
+        //
+        if (tree->gtOper == GT_EQ)
+        {
+            // This must be an ordered comparison.
+            assert(ordered);
+            assert(jumpKind[1] == EJ_vs); // We complement this value
+            secondCond = INS_COND_VC;     // for the secondCond
+        }
+        else // gtOper == GT_NE
+        {
+            // This must be BNE.UN (unordered comparison)
+            assert((tree->gtOper == GT_NE) && !ordered);
+            assert(jumpKind[1] == EJ_lo); // We complement this value
+            secondCond = INS_COND_HS;     // for the secondCond
+        }
+
+        // The second instruction is a 'csinc' instruction that either selects the previous dstReg
+        // or increments the ZR register, which produces a 1 result.
+
+        emit->emitIns_R_R_R_COND(INS_csinc, EA_8BYTE, dstReg, dstReg, REG_ZR, secondCond);
+    }
+}
+
+//------------------------------------------------------------------------
+// genIntToIntCast: Generate code for an integer cast
+//    This method handles integer overflow checking casts
+//    as well as ordinary integer casts.
+//
+// Arguments:
+//    treeNode - The GT_CAST node
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    The treeNode is not a contained node and must have an assigned register.
+//    For a signed convert from byte, the source must be in a byte-addressable register.
+//    Neither the source nor target type can be a floating point type.
+//
+// TODO-ARM64-CQ: Allow castOp to be a contained node without an assigned register.
+//
+void CodeGen::genIntToIntCast(GenTreePtr treeNode)
+{
+    assert(treeNode->OperGet() == GT_CAST);
+
+    GenTreePtr castOp = treeNode->gtCast.CastOp();
+    emitter*   emit   = getEmitter();
+
+    var_types dstType     = treeNode->CastToType();
+    var_types srcType     = genActualType(castOp->TypeGet());
+    emitAttr  movSize     = emitActualTypeSize(dstType);
+    bool      movRequired = false;
+
+    regNumber targetReg = treeNode->gtRegNum;
+    regNumber sourceReg = castOp->gtRegNum;
+
+    // For Long to Int conversion we will have a reserved integer register to hold the immediate mask
+    regNumber tmpReg = (treeNode->gtRsvdRegs == RBM_NONE) ? REG_NA : genRegNumFromMask(treeNode->gtRsvdRegs);
+
+    assert(genIsValidIntReg(targetReg));
+    assert(genIsValidIntReg(sourceReg));
+
+    instruction ins = INS_invalid;
+
+    genConsumeReg(castOp);
+    Lowering::CastInfo castInfo;
+
+    // Get information about the cast.
+    Lowering::getCastDescription(treeNode, &castInfo);
+
+    if (castInfo.requiresOverflowCheck)
+    {
+
+        emitAttr cmpSize = EA_ATTR(genTypeSize(srcType));
+
+        if (castInfo.signCheckOnly)
+        {
+            // We only need to check for a negative value in sourceReg
+            emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, 0);
+            emitJumpKind jmpLT = genJumpKindForOper(GT_LT, CK_SIGNED);
+            genJumpToThrowHlpBlk(jmpLT, SCK_OVERFLOW);
+            noway_assert(genTypeSize(srcType) == 4 || genTypeSize(srcType) == 8);
+            // This is only interesting case to ensure zero-upper bits.
+            if ((srcType == TYP_INT) && (dstType == TYP_ULONG))
+            {
+                // cast to TYP_ULONG:
+                // We use a mov with size=EA_4BYTE
+                // which will zero out the upper bits
+                movSize     = EA_4BYTE;
+                movRequired = true;
+            }
+        }
+        else if (castInfo.unsignedSource || castInfo.unsignedDest)
+        {
+            // When we are converting from/to unsigned,
+            // we only have to check for any bits set in 'typeMask'
+
+            noway_assert(castInfo.typeMask != 0);
+            emit->emitIns_R_I(INS_tst, cmpSize, sourceReg, castInfo.typeMask);
+            emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
+            genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
+        }
+        else
+        {
+            // For a narrowing signed cast
+            //
+            // We must check the value is in a signed range.
+
+            // Compare with the MAX
+
+            noway_assert((castInfo.typeMin != 0) && (castInfo.typeMax != 0));
+
+            if (emitter::emitIns_valid_imm_for_cmp(castInfo.typeMax, cmpSize))
+            {
+                emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, castInfo.typeMax);
+            }
+            else
+            {
+                noway_assert(tmpReg != REG_NA);
+                instGen_Set_Reg_To_Imm(cmpSize, tmpReg, castInfo.typeMax);
+                emit->emitIns_R_R(INS_cmp, cmpSize, sourceReg, tmpReg);
+            }
+
+            emitJumpKind jmpGT = genJumpKindForOper(GT_GT, CK_SIGNED);
+            genJumpToThrowHlpBlk(jmpGT, SCK_OVERFLOW);
+
+            // Compare with the MIN
+
+            if (emitter::emitIns_valid_imm_for_cmp(castInfo.typeMin, cmpSize))
+            {
+                emit->emitIns_R_I(INS_cmp, cmpSize, sourceReg, castInfo.typeMin);
+            }
+            else
+            {
+                noway_assert(tmpReg != REG_NA);
+                instGen_Set_Reg_To_Imm(cmpSize, tmpReg, castInfo.typeMin);
+                emit->emitIns_R_R(INS_cmp, cmpSize, sourceReg, tmpReg);
+            }
+
+            emitJumpKind jmpLT = genJumpKindForOper(GT_LT, CK_SIGNED);
+            genJumpToThrowHlpBlk(jmpLT, SCK_OVERFLOW);
+        }
+        ins = INS_mov;
+    }
+    else // Non-overflow checking cast.
+    {
+        if (genTypeSize(srcType) == genTypeSize(dstType))
+        {
+            ins = INS_mov;
+        }
+        else
+        {
+            var_types extendType = TYP_UNKNOWN;
+
+            // If we need to treat a signed type as unsigned
+            if ((treeNode->gtFlags & GTF_UNSIGNED) != 0)
+            {
+                extendType  = genUnsignedType(srcType);
+                movSize     = emitTypeSize(extendType);
+                movRequired = true;
+            }
+            else
+            {
+                if (genTypeSize(srcType) < genTypeSize(dstType))
+                {
+                    extendType = srcType;
+                    if (srcType == TYP_UINT)
+                    {
+                        // If we are casting from a smaller type to
+                        // a larger type, then we need to make sure the
+                        // higher 4 bytes are zero to gaurentee the correct value.
+                        // Therefore using a mov with EA_4BYTE in place of EA_8BYTE
+                        // will zero the upper bits
+                        movSize     = EA_4BYTE;
+                        movRequired = true;
+                    }
+                }
+                else // (genTypeSize(srcType) > genTypeSize(dstType))
+                {
+                    extendType = dstType;
+                    if (dstType == TYP_INT)
+                    {
+                        movSize = EA_8BYTE; // a sxtw instruction requires EA_8BYTE
+                    }
+                }
+            }
+
+            ins = ins_Move_Extend(extendType, castOp->InReg());
+        }
+    }
+
+    // We should never be generating a load from memory instruction here!
+    assert(!emit->emitInsIsLoad(ins));
+
+    if ((ins != INS_mov) || movRequired || (targetReg != sourceReg))
+    {
+        emit->emitIns_R_R(ins, movSize, targetReg, sourceReg);
+    }
+
+    genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genFloatToFloatCast: Generate code for a cast between float and double
+//
+// Arguments:
+//    treeNode - The GT_CAST node
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    Cast is a non-overflow conversion.
+//    The treeNode must have an assigned register.
+//    The cast is between float and double or vice versa.
+//
+void CodeGen::genFloatToFloatCast(GenTreePtr treeNode)
+{
+    // float <--> double conversions are always non-overflow ones
+    assert(treeNode->OperGet() == GT_CAST);
+    assert(!treeNode->gtOverflow());
+
+    regNumber targetReg = treeNode->gtRegNum;
+    assert(genIsValidFloatReg(targetReg));
+
+    GenTreePtr op1 = treeNode->gtOp.gtOp1;
+    assert(!op1->isContained());               // Cannot be contained
+    assert(genIsValidFloatReg(op1->gtRegNum)); // Must be a valid float reg.
+
+    var_types dstType = treeNode->CastToType();
+    var_types srcType = op1->TypeGet();
+    assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
+
+    genConsumeOperands(treeNode->AsOp());
+
+    // treeNode must be a reg
+    assert(!treeNode->isContained());
+
+    if (srcType != dstType)
+    {
+        insOpts cvtOption = (srcType == TYP_FLOAT) ? INS_OPTS_S_TO_D  // convert Single to Double
+                                                   : INS_OPTS_D_TO_S; // convert Double to Single
+
+        getEmitter()->emitIns_R_R(INS_fcvt, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum, cvtOption);
+    }
+    else if (treeNode->gtRegNum != op1->gtRegNum)
+    {
+        // If double to double cast or float to float cast. Emit a move instruction.
+        getEmitter()->emitIns_R_R(INS_mov, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum);
+    }
+
+    genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genIntToFloatCast: Generate code to cast an int/long to float/double
+//
+// Arguments:
+//    treeNode - The GT_CAST node
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    Cast is a non-overflow conversion.
+//    The treeNode must have an assigned register.
+//    SrcType= int32/uint32/int64/uint64 and DstType=float/double.
+//
+void CodeGen::genIntToFloatCast(GenTreePtr treeNode)
+{
+    // int type --> float/double conversions are always non-overflow ones
+    assert(treeNode->OperGet() == GT_CAST);
+    assert(!treeNode->gtOverflow());
+
+    regNumber targetReg = treeNode->gtRegNum;
+    assert(genIsValidFloatReg(targetReg));
+
+    GenTreePtr op1 = treeNode->gtOp.gtOp1;
+    assert(!op1->isContained());             // Cannot be contained
+    assert(genIsValidIntReg(op1->gtRegNum)); // Must be a valid int reg.
+
+    var_types dstType = treeNode->CastToType();
+    var_types srcType = op1->TypeGet();
+    assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
+
+    // force the srcType to unsigned if GT_UNSIGNED flag is set
+    if (treeNode->gtFlags & GTF_UNSIGNED)
+    {
+        srcType = genUnsignedType(srcType);
+    }
+
+    // We should never see a srcType whose size is neither EA_4BYTE or EA_8BYTE
+    // For conversions from small types (byte/sbyte/int16/uint16) to float/double,
+    // we expect the front-end or lowering phase to have generated two levels of cast.
+    //
+    emitAttr srcSize = EA_ATTR(genTypeSize(srcType));
+    noway_assert((srcSize == EA_4BYTE) || (srcSize == EA_8BYTE));
+
+    instruction ins       = varTypeIsUnsigned(srcType) ? INS_ucvtf : INS_scvtf;
+    insOpts     cvtOption = INS_OPTS_NONE; // invalid value
+
+    if (dstType == TYP_DOUBLE)
+    {
+        if (srcSize == EA_4BYTE)
+        {
+            cvtOption = INS_OPTS_4BYTE_TO_D;
+        }
+        else
+        {
+            assert(srcSize == EA_8BYTE);
+            cvtOption = INS_OPTS_8BYTE_TO_D;
+        }
+    }
+    else
+    {
+        assert(dstType == TYP_FLOAT);
+        if (srcSize == EA_4BYTE)
+        {
+            cvtOption = INS_OPTS_4BYTE_TO_S;
+        }
+        else
+        {
+            assert(srcSize == EA_8BYTE);
+            cvtOption = INS_OPTS_8BYTE_TO_S;
+        }
+    }
+
+    genConsumeOperands(treeNode->AsOp());
+
+    getEmitter()->emitIns_R_R(ins, emitTypeSize(dstType), treeNode->gtRegNum, op1->gtRegNum, cvtOption);
+
+    genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genFloatToIntCast: Generate code to cast float/double to int/long
+//
+// Arguments:
+//    treeNode - The GT_CAST node
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    Cast is a non-overflow conversion.
+//    The treeNode must have an assigned register.
+//    SrcType=float/double and DstType= int32/uint32/int64/uint64
+//
+void CodeGen::genFloatToIntCast(GenTreePtr treeNode)
+{
+    // we don't expect to see overflow detecting float/double --> int type conversions here
+    // as they should have been converted into helper calls by front-end.
+    assert(treeNode->OperGet() == GT_CAST);
+    assert(!treeNode->gtOverflow());
+
+    regNumber targetReg = treeNode->gtRegNum;
+    assert(genIsValidIntReg(targetReg)); // Must be a valid int reg.
+
+    GenTreePtr op1 = treeNode->gtOp.gtOp1;
+    assert(!op1->isContained());               // Cannot be contained
+    assert(genIsValidFloatReg(op1->gtRegNum)); // Must be a valid float reg.
+
+    var_types dstType = treeNode->CastToType();
+    var_types srcType = op1->TypeGet();
+    assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType));
+
+    // We should never see a dstType whose size is neither EA_4BYTE or EA_8BYTE
+    // For conversions to small types (byte/sbyte/int16/uint16) from float/double,
+    // we expect the front-end or lowering phase to have generated two levels of cast.
+    //
+    emitAttr dstSize = EA_ATTR(genTypeSize(dstType));
+    noway_assert((dstSize == EA_4BYTE) || (dstSize == EA_8BYTE));
+
+    instruction ins       = INS_fcvtzs;    // default to sign converts
+    insOpts     cvtOption = INS_OPTS_NONE; // invalid value
+
+    if (varTypeIsUnsigned(dstType))
+    {
+        ins = INS_fcvtzu; // use unsigned converts
+    }
+
+    if (srcType == TYP_DOUBLE)
+    {
+        if (dstSize == EA_4BYTE)
+        {
+            cvtOption = INS_OPTS_D_TO_4BYTE;
+        }
+        else
+        {
+            assert(dstSize == EA_8BYTE);
+            cvtOption = INS_OPTS_D_TO_8BYTE;
+        }
+    }
+    else
+    {
+        assert(srcType == TYP_FLOAT);
+        if (dstSize == EA_4BYTE)
+        {
+            cvtOption = INS_OPTS_S_TO_4BYTE;
+        }
+        else
+        {
+            assert(dstSize == EA_8BYTE);
+            cvtOption = INS_OPTS_S_TO_8BYTE;
+        }
+    }
+
+    genConsumeOperands(treeNode->AsOp());
+
+    getEmitter()->emitIns_R_R(ins, dstSize, treeNode->gtRegNum, op1->gtRegNum, cvtOption);
+
+    genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genCkfinite: Generate code for ckfinite opcode.
+//
+// Arguments:
+//    treeNode - The GT_CKFINITE node
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    GT_CKFINITE node has reserved an internal register.
+//
+// TODO-ARM64-CQ - mark the operand as contained if known to be in
+// memory (e.g. field or an array element).
+//
+void CodeGen::genCkfinite(GenTreePtr treeNode)
+{
+    assert(treeNode->OperGet() == GT_CKFINITE);
+
+    GenTreePtr op1         = treeNode->gtOp.gtOp1;
+    var_types  targetType  = treeNode->TypeGet();
+    int        expMask     = (targetType == TYP_FLOAT) ? 0x7F8 : 0x7FF; // Bit mask to extract exponent.
+    int        shiftAmount = targetType == TYP_FLOAT ? 20 : 52;
+
+    emitter* emit = getEmitter();
+
+    // Extract exponent into a register.
+    regNumber intReg = genRegNumFromMask(treeNode->gtRsvdRegs);
+    regNumber fpReg  = genConsumeReg(op1);
+    assert(intReg != REG_NA);
+
+    emit->emitIns_R_R(ins_Copy(targetType), emitTypeSize(treeNode), intReg, fpReg);
+    emit->emitIns_R_R_I(INS_lsr, emitTypeSize(targetType), intReg, intReg, shiftAmount);
+
+    // Mask of exponent with all 1's and check if the exponent is all 1's
+    emit->emitIns_R_R_I(INS_and, EA_4BYTE, intReg, intReg, expMask);
+    emit->emitIns_R_I(INS_cmp, EA_4BYTE, intReg, expMask);
+
+    // If exponent is all 1's, throw ArithmeticException
+    emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+    genJumpToThrowHlpBlk(jmpEqual, SCK_ARITH_EXCPN);
+
+    // if it is a finite value copy it to targetReg
+    if (treeNode->gtRegNum != fpReg)
+    {
+        emit->emitIns_R_R(ins_Copy(targetType), emitTypeSize(treeNode), treeNode->gtRegNum, fpReg);
+    }
+    genProduceReg(treeNode);
+}
+
+int CodeGenInterface::genSPtoFPdelta()
+{
+    int delta;
+
+    // We place the saved frame pointer immediately above the outgoing argument space.
+    delta = (int)compiler->lvaOutgoingArgSpaceSize;
+
+    assert(delta >= 0);
+    return delta;
+}
+
+//---------------------------------------------------------------------
+// genTotalFrameSize - return the total size of the stack frame, including local size,
+// callee-saved register size, etc.
+//
+// Return value:
+//    Total frame size
+//
+
+int CodeGenInterface::genTotalFrameSize()
+{
+    // For varargs functions, we home all the incoming register arguments. They are not
+    // included in the compCalleeRegsPushed count. This is like prespill on ARM32, but
+    // since we don't use "push" instructions to save them, we don't have to do the
+    // save of these varargs register arguments as the first thing in the prolog.
+
+    assert(!IsUninitialized(compiler->compCalleeRegsPushed));
+
+    int totalFrameSize = (compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) +
+                         compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize;
+
+    assert(totalFrameSize >= 0);
+    return totalFrameSize;
+}
+
+//---------------------------------------------------------------------
+// genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer.
+// This number is going to be negative, since the Caller-SP is at a higher
+// address than the frame pointer.
+//
+// There must be a frame pointer to call this function!
+
+int CodeGenInterface::genCallerSPtoFPdelta()
+{
+    assert(isFramePointerUsed());
+    int callerSPtoFPdelta;
+
+    callerSPtoFPdelta = genCallerSPtoInitialSPdelta() + genSPtoFPdelta();
+
+    assert(callerSPtoFPdelta <= 0);
+    return callerSPtoFPdelta;
+}
+
+//---------------------------------------------------------------------
+// genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP.
+//
+// This number will be negative.
+
+int CodeGenInterface::genCallerSPtoInitialSPdelta()
+{
+    int callerSPtoSPdelta = 0;
+
+    callerSPtoSPdelta -= genTotalFrameSize();
+
+    assert(callerSPtoSPdelta <= 0);
+    return callerSPtoSPdelta;
+}
+
+//---------------------------------------------------------------------
+// genIntrinsic - generate code for a given intrinsic
+//
+// Arguments
+//    treeNode - the GT_INTRINSIC node
+//
+// Return value:
+//    None
+//
+void CodeGen::genIntrinsic(GenTreePtr treeNode)
+{
+    // Both operand and its result must be of the same floating point type.
+    GenTreePtr srcNode = treeNode->gtOp.gtOp1;
+    assert(varTypeIsFloating(srcNode));
+    assert(srcNode->TypeGet() == treeNode->TypeGet());
+
+    // Right now only Abs/Round/Sqrt are treated as math intrinsics.
+    //
+    switch (treeNode->gtIntrinsic.gtIntrinsicId)
+    {
+        case CORINFO_INTRINSIC_Abs:
+            genConsumeOperands(treeNode->AsOp());
+            getEmitter()->emitInsBinary(INS_fabs, emitTypeSize(treeNode), treeNode, srcNode);
+            break;
+
+        case CORINFO_INTRINSIC_Round:
+            genConsumeOperands(treeNode->AsOp());
+            getEmitter()->emitInsBinary(INS_frintn, emitTypeSize(treeNode), treeNode, srcNode);
+            break;
+
+        case CORINFO_INTRINSIC_Sqrt:
+            genConsumeOperands(treeNode->AsOp());
+            getEmitter()->emitInsBinary(INS_fsqrt, emitTypeSize(treeNode), treeNode, srcNode);
+            break;
+
+        default:
+            assert(!"genIntrinsic: Unsupported intrinsic");
+            unreached();
+    }
+
+    genProduceReg(treeNode);
+}
+
+//---------------------------------------------------------------------
+// genPutArgStk - generate code for a GT_PUTARG_STK node
+//
+// Arguments
+//    treeNode - the GT_PUTARG_STK node
+//
+// Return value:
+//    None
+//
+void CodeGen::genPutArgStk(GenTreePtr treeNode)
+{
+    assert(treeNode->OperGet() == GT_PUTARG_STK);
+    var_types  targetType = treeNode->TypeGet();
+    GenTreePtr source     = treeNode->gtOp.gtOp1;
+    emitter*   emit       = getEmitter();
+
+    // This is the varNum for our store operations,
+    // typically this is the varNum for the Outgoing arg space
+    // When we are generating a tail call it will be the varNum for arg0
+    unsigned varNumOut;
+    unsigned argOffsetMax; // Records the maximum size of this area for assert checks
+
+    // This is the varNum for our load operations,
+    // only used when we have a multireg struct with a LclVar source
+    unsigned varNumInp = BAD_VAR_NUM;
+
+    // Get argument offset to use with 'varNumOut'
+    // Here we cross check that argument offset hasn't changed from lowering to codegen since
+    // we are storing arg slot number in GT_PUTARG_STK node in lowering phase.
+    unsigned argOffsetOut = treeNode->AsPutArgStk()->gtSlotNum * TARGET_POINTER_SIZE;
+
+#ifdef DEBUG
+    fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->AsPutArgStk()->gtCall, treeNode);
+    assert(curArgTabEntry);
+    assert(argOffsetOut == (curArgTabEntry->slotNum * TARGET_POINTER_SIZE));
+#endif // DEBUG
+
+#if FEATURE_FASTTAILCALL
+    bool putInIncomingArgArea = treeNode->AsPutArgStk()->putInIncomingArgArea;
+#else
+    const bool putInIncomingArgArea = false;
+#endif
+    // Whether to setup stk arg in incoming or out-going arg area?
+    // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area.
+    // All other calls - stk arg is setup in out-going arg area.
+    if (putInIncomingArgArea)
+    {
+        varNumOut    = getFirstArgWithStackSlot();
+        argOffsetMax = compiler->compArgSize;
+#if FEATURE_FASTTAILCALL
+        // This must be a fast tail call.
+        assert(treeNode->AsPutArgStk()->gtCall->AsCall()->IsFastTailCall());
+
+        // Since it is a fast tail call, the existence of first incoming arg is guaranteed
+        // because fast tail call requires that in-coming arg area of caller is >= out-going
+        // arg area required for tail call.
+        LclVarDsc* varDsc = &(compiler->lvaTable[varNumOut]);
+        assert(varDsc != nullptr);
+#endif // FEATURE_FASTTAILCALL
+    }
+    else
+    {
+        varNumOut    = compiler->lvaOutgoingArgSpaceVar;
+        argOffsetMax = compiler->lvaOutgoingArgSpaceSize;
+    }
+    bool isStruct = (targetType == TYP_STRUCT) || (source->OperGet() == GT_LIST);
+
+    if (!isStruct) // a normal non-Struct argument
+    {
+        instruction storeIns  = ins_Store(targetType);
+        emitAttr    storeAttr = emitTypeSize(targetType);
+
+        // If it is contained then source must be the integer constant zero
+        if (source->isContained())
+        {
+            assert(source->OperGet() == GT_CNS_INT);
+            assert(source->AsIntConCommon()->IconValue() == 0);
+            emit->emitIns_S_R(storeIns, storeAttr, REG_ZR, varNumOut, argOffsetOut);
+        }
+        else
+        {
+            genConsumeReg(source);
+            emit->emitIns_S_R(storeIns, storeAttr, source->gtRegNum, varNumOut, argOffsetOut);
+        }
+        argOffsetOut += EA_SIZE_IN_BYTES(storeAttr);
+        assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
+    }
+    else // We have some kind of a struct argument
+    {
+        assert(source->isContained()); // We expect that this node was marked as contained in LowerArm64
+
+        if (source->OperGet() == GT_LIST)
+        {
+            // Deal with the multi register passed struct args.
+            GenTreeArgList* argListPtr = source->AsArgList();
+
+            // Evaluate each of the GT_LIST items into their register
+            // and store their register into the outgoing argument area
+            for (; argListPtr != nullptr; argListPtr = argListPtr->Rest())
+            {
+                GenTreePtr nextArgNode = argListPtr->gtOp.gtOp1;
+                genConsumeReg(nextArgNode);
+
+                regNumber reg  = nextArgNode->gtRegNum;
+                var_types type = nextArgNode->TypeGet();
+                emitAttr  attr = emitTypeSize(type);
+
+                // Emit store instructions to store the registers produced by the GT_LIST into the outgoing argument
+                // area
+                emit->emitIns_S_R(ins_Store(type), attr, reg, varNumOut, argOffsetOut);
+                argOffsetOut += EA_SIZE_IN_BYTES(attr);
+                assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
+            }
+        }
+        else // We must have a GT_OBJ or a GT_LCL_VAR
+        {
+            noway_assert((source->OperGet() == GT_LCL_VAR) || (source->OperGet() == GT_OBJ));
+
+            var_types targetType = source->TypeGet();
+            noway_assert(varTypeIsStruct(targetType));
+
+            // We will copy this struct to the stack, possibly using a ldp instruction
+            // Setup loReg and hiReg from the internal registers that we reserved in lower.
+            //
+            regNumber loReg   = REG_NA;
+            regNumber hiReg   = REG_NA;
+            regNumber addrReg = REG_NA;
+
+            // In lowerArm64/TreeNodeInfoInitPutArgStk we have reserved two internal integer registers
+            genGetRegPairFromMask(treeNode->gtRsvdRegs, &loReg, &hiReg);
+
+            GenTreeLclVarCommon* varNode  = nullptr;
+            GenTreePtr           addrNode = nullptr;
+
+            if (source->OperGet() == GT_LCL_VAR)
+            {
+                varNode = source->AsLclVarCommon();
+            }
+            else // we must have a GT_OBJ
+            {
+                assert(source->OperGet() == GT_OBJ);
+
+                addrNode = source->gtOp.gtOp1;
+
+                // addrNode can either be a GT_LCL_VAR_ADDR or an address expression
+                //
+                if (addrNode->OperGet() == GT_LCL_VAR_ADDR)
+                {
+                    // We have a GT_OBJ(GT_LCL_VAR_ADDR)
+                    //
+                    // We will treat this case the same as above
+                    // (i.e if we just had this GT_LCL_VAR directly as the source)
+                    // so update 'source' to point this GT_LCL_VAR_ADDR node
+                    // and continue to the codegen for the LCL_VAR node below
+                    //
+                    varNode  = addrNode->AsLclVarCommon();
+                    addrNode = nullptr;
+                }
+            }
+
+            // Either varNode or addrNOde must have been setup above,
+            // the xor ensures that only one of the two is setup, not both
+            assert((varNode != nullptr) ^ (addrNode != nullptr));
+
+            BYTE     gcPtrs[MAX_ARG_REG_COUNT] = {};         // TYPE_GC_NONE = 0
+            BYTE*    structGcLayout            = &gcPtrs[0]; // The GC layout for the struct
+            unsigned gcPtrCount;                             // The count of GC pointers in the struct
+            int      structSize;
+            bool     isHfa;
+
+            // Setup the structSize, isHFa, and gcPtrCount
+            if (varNode != nullptr)
+            {
+                varNumInp = varNode->gtLclNum;
+                assert(varNumInp < compiler->lvaCount);
+                LclVarDsc* varDsc = &compiler->lvaTable[varNumInp];
+
+                assert(varDsc->lvType == TYP_STRUCT);
+                assert(varDsc->lvOnFrame);   // This struct also must live in the stack frame
+                assert(!varDsc->lvRegister); // And it can't live in a register (SIMD)
+
+                structSize = varDsc->lvSize(); // This yields the roundUp size, but that is fine
+                                               // as that is how much stack is allocated for this LclVar
+                isHfa          = varDsc->lvIsHfa();
+                gcPtrCount     = varDsc->lvStructGcCount;
+                structGcLayout = varDsc->lvGcLayout;
+            }
+            else // addrNode is used
+            {
+                assert(addrNode != nullptr);
+
+                // Generate code to load the address that we need into a register
+                genConsumeAddress(addrNode);
+                addrReg = addrNode->gtRegNum;
+
+                CORINFO_CLASS_HANDLE objClass = source->gtObj.gtClass;
+
+                structSize = compiler->info.compCompHnd->getClassSize(objClass);
+                isHfa      = compiler->IsHfa(objClass);
+                gcPtrCount = compiler->info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
+            }
+
+            bool hasGCpointers = (gcPtrCount > 0); // true if there are any GC pointers in the struct
+
+            // If we have an HFA we can't have any GC pointers,
+            // if not then the max size for the the struct is 16 bytes
+            if (isHfa)
+            {
+                noway_assert(gcPtrCount == 0);
+            }
+            else
+            {
+                noway_assert(structSize <= 2 * TARGET_POINTER_SIZE);
+            }
+
+            noway_assert(structSize <= MAX_PASS_MULTIREG_BYTES);
+
+            // For a 16-byte structSize with GC pointers we will use two ldr and two str instructions
+            //             ldr     x2, [x0]
+            //             ldr     x3, [x0, #8]
+            //             str     x2, [sp, #16]
+            //             str     x3, [sp, #24]
+            //
+            // For a 16-byte structSize with no GC pointers we will use a ldp and two str instructions
+            //             ldp     x2, x3, [x0]
+            //             str     x2, [sp, #16]
+            //             str     x3, [sp, #24]
+            //
+            // For a 32-byte structSize with no GC pointers we will use two ldp and four str instructions
+            //             ldp     x2, x3, [x0]
+            //             str     x2, [sp, #16]
+            //             str     x3, [sp, #24]
+            //             ldp     x2, x3, [x0]
+            //             str     x2, [sp, #32]
+            //             str     x3, [sp, #40]
+            //
+            // Note that when loading from a varNode we currently can't use the ldp instruction
+            // TODO-ARM64-CQ: Implement support for using a ldp instruction with a varNum (see emitIns_R_S)
+            //
+
+            int      remainingSize = structSize;
+            unsigned structOffset  = 0;
+            unsigned nextIndex     = 0;
+
+            while (remainingSize >= 2 * TARGET_POINTER_SIZE)
+            {
+                var_types type0 = compiler->getJitGCType(gcPtrs[nextIndex + 0]);
+                var_types type1 = compiler->getJitGCType(gcPtrs[nextIndex + 1]);
+
+                if (hasGCpointers)
+                {
+                    // We have GC pointers, so use two ldr instructions
+                    //
+                    // We must do it this way because we can't currently pass or track
+                    // two different emitAttr values for a ldp instruction.
+
+                    // Make sure that the first load instruction does not overwrite the addrReg.
+                    //
+                    if (loReg != addrReg)
+                    {
+                        if (varNode != nullptr)
+                        {
+                            // Load from our varNumImp source
+                            emit->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), loReg, varNumInp, 0);
+                            emit->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), hiReg, varNumInp,
+                                              TARGET_POINTER_SIZE);
+                        }
+                        else
+                        {
+                            // Load from our address expression source
+                            emit->emitIns_R_R_I(ins_Load(type0), emitTypeSize(type0), loReg, addrReg, structOffset);
+                            emit->emitIns_R_R_I(ins_Load(type1), emitTypeSize(type1), hiReg, addrReg,
+                                                structOffset + TARGET_POINTER_SIZE);
+                        }
+                    }
+                    else // loReg == addrReg
+                    {
+                        assert(varNode == nullptr); // because addrReg is REG_NA when varNode is non-null
+                        assert(hiReg != addrReg);
+                        // Load from our address expression source
+                        emit->emitIns_R_R_I(ins_Load(type1), emitTypeSize(type1), hiReg, addrReg,
+                                            structOffset + TARGET_POINTER_SIZE);
+                        emit->emitIns_R_R_I(ins_Load(type0), emitTypeSize(type0), loReg, addrReg, structOffset);
+                    }
+                }
+                else // our struct has no GC pointers
+                {
+                    if (varNode != nullptr)
+                    {
+                        // Load from our varNumImp source, currently we can't use a ldp instruction to do this
+                        emit->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), loReg, varNumInp, 0);
+                        emit->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), hiReg, varNumInp, TARGET_POINTER_SIZE);
+                    }
+                    else
+                    {
+                        // Use a ldp instruction
+
+                        // Load from our address expression source
+                        emit->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, loReg, hiReg, addrReg, structOffset);
+                    }
+                }
+
+                // Emit two store instructions to store the two registers into the outgoing argument area
+                emit->emitIns_S_R(ins_Store(type0), emitTypeSize(type0), loReg, varNumOut, argOffsetOut);
+                emit->emitIns_S_R(ins_Store(type1), emitTypeSize(type1), hiReg, varNumOut,
+                                  argOffsetOut + TARGET_POINTER_SIZE);
+                argOffsetOut += (2 * TARGET_POINTER_SIZE); // We stored 16-bytes of the struct
+                assert(argOffsetOut <= argOffsetMax);      // We can't write beyound the outgoing area area
+
+                remainingSize -= (2 * TARGET_POINTER_SIZE); // We loaded 16-bytes of the struct
+                structOffset += (2 * TARGET_POINTER_SIZE);
+                nextIndex += 2;
+            }
+
+            // For a 12-byte structSize we will we will generate two load instructions
+            //             ldr     x2, [x0]
+            //             ldr     w3, [x0, #8]
+            //             str     x2, [sp, #16]
+            //             str     w3, [sp, #24]
+            //
+            // When the first instruction has a loReg that is the same register as the addrReg,
+            //  we set deferLoad to true and issue the intructions in the reverse order
+            //             ldr     x3, [x2, #8]
+            //             ldr     x2, [x2]
+            //             str     x2, [sp, #16]
+            //             str     x3, [sp, #24]
+            //
+
+            var_types nextType = compiler->getJitGCType(gcPtrs[nextIndex]);
+            emitAttr  nextAttr = emitTypeSize(nextType);
+            regNumber curReg   = loReg;
+
+            bool      deferLoad   = false;
+            var_types deferType   = TYP_UNKNOWN;
+            emitAttr  deferAttr   = EA_PTRSIZE;
+            int       deferOffset = 0;
+
+            while (remainingSize > 0)
+            {
+                if (remainingSize >= TARGET_POINTER_SIZE)
+                {
+                    remainingSize -= TARGET_POINTER_SIZE;
+
+                    if ((curReg == addrReg) && (remainingSize != 0))
+                    {
+                        deferLoad   = true;
+                        deferType   = nextType;
+                        deferAttr   = emitTypeSize(nextType);
+                        deferOffset = structOffset;
+                    }
+                    else // the typical case
+                    {
+                        if (varNode != nullptr)
+                        {
+                            // Load from our varNumImp source
+                            emit->emitIns_R_S(ins_Load(nextType), nextAttr, curReg, varNumInp, structOffset);
+                        }
+                        else
+                        {
+                            // Load from our address expression source
+                            emit->emitIns_R_R_I(ins_Load(nextType), nextAttr, curReg, addrReg, structOffset);
+                        }
+                        // Emit a store instruction to store the register into the outgoing argument area
+                        emit->emitIns_S_R(ins_Store(nextType), nextAttr, curReg, varNumOut, argOffsetOut);
+                        argOffsetOut += EA_SIZE_IN_BYTES(nextAttr);
+                        assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
+                    }
+                    curReg = hiReg;
+                    structOffset += TARGET_POINTER_SIZE;
+                    nextIndex++;
+                    nextType = compiler->getJitGCType(gcPtrs[nextIndex]);
+                    nextAttr = emitTypeSize(nextType);
+                }
+                else // (remainingSize < TARGET_POINTER_SIZE)
+                {
+                    int loadSize  = remainingSize;
+                    remainingSize = 0;
+
+                    // We should never have to do a non-pointer sized load when we have a LclVar source
+                    assert(varNode == nullptr);
+
+                    // the left over size is smaller than a pointer and thus can never be a GC type
+                    assert(varTypeIsGC(nextType) == false);
+
+                    var_types loadType = TYP_UINT;
+                    if (loadSize == 1)
+                    {
+                        loadType = TYP_UBYTE;
+                    }
+                    else if (loadSize == 2)
+                    {
+                        loadType = TYP_USHORT;
+                    }
+                    else
+                    {
+                        // Need to handle additional loadSize cases here
+                        noway_assert(loadSize == 4);
+                    }
+
+                    instruction loadIns  = ins_Load(loadType);
+                    emitAttr    loadAttr = emitAttr(loadSize);
+
+                    // When deferLoad is false, curReg can be the same as addrReg
+                    // because the last instruction is allowed to overwrite addrReg.
+                    //
+                    noway_assert(!deferLoad || (curReg != addrReg));
+
+                    emit->emitIns_R_R_I(loadIns, loadAttr, curReg, addrReg, structOffset);
+
+                    // Emit a store instruction to store the register into the outgoing argument area
+                    emit->emitIns_S_R(ins_Store(loadType), loadAttr, curReg, varNumOut, argOffsetOut);
+                    argOffsetOut += EA_SIZE_IN_BYTES(loadAttr);
+                    assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
+                }
+            }
+
+            if (deferLoad)
+            {
+                // We should never have to do a deferred load when we have a LclVar source
+                assert(varNode == nullptr);
+
+                curReg = addrReg;
+
+                // Load from our address expression source
+                emit->emitIns_R_R_I(ins_Load(deferType), deferAttr, curReg, addrReg, deferOffset);
+
+                // Emit a store instruction to store the register into the outgoing argument area
+                emit->emitIns_S_R(ins_Store(nextType), nextAttr, curReg, varNumOut, argOffsetOut);
+                argOffsetOut += EA_SIZE_IN_BYTES(nextAttr);
+                assert(argOffsetOut <= argOffsetMax); // We can't write beyound the outgoing area area
+            }
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Create and record GC Info for the function.
+ */
+void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize,
+                                      unsigned prologSize,
+                                      unsigned epilogSize DEBUGARG(void* codePtr))
+{
+    genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr));
+}
+
+void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr))
+{
+    IAllocator*    allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC());
+    GcInfoEncoder* gcInfoEncoder  = new (compiler, CMK_GC)
+        GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM);
+    assert(gcInfoEncoder != nullptr);
+
+    // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
+    gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
+
+    // First we figure out the encoder ID's for the stack slots and registers.
+    gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS);
+
+    // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
+    gcInfoEncoder->FinalizeSlotIds();
+
+    // Now we can actually use those slot ID's to declare live ranges.
+    gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK);
+
+#if defined(DEBUGGING_SUPPORT)
+    if (compiler->opts.compDbgEnC)
+    {
+        // what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp)
+        // which is:
+        //  -return address
+        //  -saved off RBP
+        //  -saved 'this' pointer and bool for synchronized methods
+
+        // 4 slots for RBP + return address + RSI + RDI
+        int preservedAreaSize = 4 * REGSIZE_BYTES;
+
+        if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
+        {
+            if (!(compiler->info.compFlags & CORINFO_FLG_STATIC))
+                preservedAreaSize += REGSIZE_BYTES;
+
+            preservedAreaSize += 1; // bool for synchronized methods
+        }
+
+        // Used to signal both that the method is compiled for EnC, and also the size of the block at the top of the
+        // frame
+        gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize);
+    }
+#endif
+
+    gcInfoEncoder->Build();
+
+    // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
+    // let's save the values anyway for debugging purposes
+    compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
+    compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface
+}
+
+/*****************************************************************************
+ *  Emit a call to a helper function.
+ *
+ */
+
+void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTargetReg /*= REG_NA */)
+{
+    void* addr  = nullptr;
+    void* pAddr = nullptr;
+
+    emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
+    addr                           = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr);
+    regNumber callTarget           = REG_NA;
+
+    if (addr == nullptr)
+    {
+        // This is call to a runtime helper.
+        // adrp x, [reloc:rel page addr]
+        // add x, x, [reloc:page offset]
+        // ldr x, [x]
+        // br x
+
+        if (callTargetReg == REG_NA)
+        {
+            // If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but
+            // this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET.
+            callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET;
+        }
+
+        regMaskTP callTargetMask = genRegMask(callTargetReg);
+        regMaskTP callKillSet    = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
+
+        // assert that all registers in callTargetMask are in the callKillSet
+        noway_assert((callTargetMask & callKillSet) == callTargetMask);
+
+        callTarget = callTargetReg;
+
+        // adrp + add with relocations will be emitted
+        getEmitter()->emitIns_R_AI(INS_adrp, EA_PTR_DSP_RELOC, callTarget, (ssize_t)pAddr);
+        getEmitter()->emitIns_R_R(INS_ldr, EA_PTRSIZE, callTarget, callTarget);
+        callType = emitter::EC_INDIR_R;
+    }
+
+    getEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr, argSize,
+                               retSize, EA_UNKNOWN, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+                               gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, /* IL offset */
+                               callTarget,                             /* ireg */
+                               REG_NA, 0, 0,                           /* xreg, xmul, disp */
+                               false,                                  /* isJump */
+                               emitter::emitNoGChelper(helper));
+
+    regMaskTP killMask = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
+    regTracker.rsTrashRegSet(killMask);
+    regTracker.rsTrashRegsForGCInterruptability();
+}
+
+/*****************************************************************************/
+#ifdef DEBUGGING_SUPPORT
+/*****************************************************************************
+ *                          genSetScopeInfo
+ *
+ * Called for every scope info piece to record by the main genSetScopeInfo()
+ */
+
+// TODO-Cleanup: move to CodeGenCommon.cpp
+void CodeGen::genSetScopeInfo(unsigned            which,
+                              UNATIVE_OFFSET      startOffs,
+                              UNATIVE_OFFSET      length,
+                              unsigned            varNum,
+                              unsigned            LVnum,
+                              bool                avail,
+                              Compiler::siVarLoc& varLoc)
+{
+    /* We need to do some mapping while reporting back these variables */
+
+    unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
+    noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
+
+    VarName name = nullptr;
+
+#ifdef DEBUG
+
+    for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
+    {
+        if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
+        {
+            name = compiler->info.compVarScopes[scopeNum].vsdName;
+        }
+    }
+
+    // Hang on to this compiler->info.
+
+    TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which];
+
+    tlvi.tlviVarNum    = ilVarNum;
+    tlvi.tlviLVnum     = LVnum;
+    tlvi.tlviName      = name;
+    tlvi.tlviStartPC   = startOffs;
+    tlvi.tlviLength    = length;
+    tlvi.tlviAvailable = avail;
+    tlvi.tlviVarLoc    = varLoc;
+
+#endif // DEBUG
+
+    compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc);
+}
+#endif // DEBUGGING_SUPPORT
+
+/*****************************************************************************
+ * Unit testing of the ARM64 emitter: generate a bunch of instructions into the prolog
+ * (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late
+ * disassembler thinks the instructions as the same as we do.
+ */
+
+// Uncomment "#define ALL_ARM64_EMITTER_UNIT_TESTS" to run all the unit tests here.
+// After adding a unit test, and verifying it works, put it under this #ifdef, so we don't see it run every time.
+//#define ALL_ARM64_EMITTER_UNIT_TESTS
+
+#if defined(DEBUG)
+void CodeGen::genArm64EmitterUnitTests()
+{
+    if (!verbose)
+    {
+        return;
+    }
+
+    if (!compiler->opts.altJit)
+    {
+        // No point doing this in a "real" JIT.
+        return;
+    }
+
+    // Mark the "fake" instructions in the output.
+    printf("*************** In genArm64EmitterUnitTests()\n");
+
+    emitter* theEmitter = getEmitter();
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    // We use this:
+    //      genDefineTempLabel(genCreateTempLabel());
+    // to create artificial labels to help separate groups of tests.
+
+    //
+    // Loads/Stores basic general register
+    //
+
+    genDefineTempLabel(genCreateTempLabel());
+
+    // ldr/str Xt, [reg]
+    theEmitter->emitIns_R_R(INS_ldr, EA_8BYTE, REG_R8, REG_R9);
+    theEmitter->emitIns_R_R(INS_ldrb, EA_1BYTE, REG_R8, REG_R9);
+    theEmitter->emitIns_R_R(INS_ldrh, EA_2BYTE, REG_R8, REG_R9);
+    theEmitter->emitIns_R_R(INS_str, EA_8BYTE, REG_R8, REG_R9);
+    theEmitter->emitIns_R_R(INS_strb, EA_1BYTE, REG_R8, REG_R9);
+    theEmitter->emitIns_R_R(INS_strh, EA_2BYTE, REG_R8, REG_R9);
+
+    // ldr/str Wt, [reg]
+    theEmitter->emitIns_R_R(INS_ldr, EA_4BYTE, REG_R8, REG_R9);
+    theEmitter->emitIns_R_R(INS_ldrb, EA_1BYTE, REG_R8, REG_R9);
+    theEmitter->emitIns_R_R(INS_ldrh, EA_2BYTE, REG_R8, REG_R9);
+    theEmitter->emitIns_R_R(INS_str, EA_4BYTE, REG_R8, REG_R9);
+    theEmitter->emitIns_R_R(INS_strb, EA_1BYTE, REG_R8, REG_R9);
+    theEmitter->emitIns_R_R(INS_strh, EA_2BYTE, REG_R8, REG_R9);
+
+    theEmitter->emitIns_R_R(INS_ldrsb, EA_4BYTE, REG_R8, REG_R9); // target Wt
+    theEmitter->emitIns_R_R(INS_ldrsh, EA_4BYTE, REG_R8, REG_R9); // target Wt
+    theEmitter->emitIns_R_R(INS_ldrsb, EA_8BYTE, REG_R8, REG_R9); // target Xt
+    theEmitter->emitIns_R_R(INS_ldrsh, EA_8BYTE, REG_R8, REG_R9); // target Xt
+    theEmitter->emitIns_R_R(INS_ldrsw, EA_8BYTE, REG_R8, REG_R9); // target Xt
+
+    theEmitter->emitIns_R_R_I(INS_ldurb, EA_4BYTE, REG_R8, REG_R9, 1);
+    theEmitter->emitIns_R_R_I(INS_ldurh, EA_4BYTE, REG_R8, REG_R9, 1);
+    theEmitter->emitIns_R_R_I(INS_sturb, EA_4BYTE, REG_R8, REG_R9, 1);
+    theEmitter->emitIns_R_R_I(INS_sturh, EA_4BYTE, REG_R8, REG_R9, 1);
+    theEmitter->emitIns_R_R_I(INS_ldursb, EA_4BYTE, REG_R8, REG_R9, 1);
+    theEmitter->emitIns_R_R_I(INS_ldursb, EA_8BYTE, REG_R8, REG_R9, 1);
+    theEmitter->emitIns_R_R_I(INS_ldursh, EA_4BYTE, REG_R8, REG_R9, 1);
+    theEmitter->emitIns_R_R_I(INS_ldursh, EA_8BYTE, REG_R8, REG_R9, 1);
+    theEmitter->emitIns_R_R_I(INS_ldur, EA_8BYTE, REG_R8, REG_R9, 1);
+    theEmitter->emitIns_R_R_I(INS_ldur, EA_4BYTE, REG_R8, REG_R9, 1);
+    theEmitter->emitIns_R_R_I(INS_stur, EA_4BYTE, REG_R8, REG_R9, 1);
+    theEmitter->emitIns_R_R_I(INS_stur, EA_8BYTE, REG_R8, REG_R9, 1);
+    theEmitter->emitIns_R_R_I(INS_ldursw, EA_8BYTE, REG_R8, REG_R9, 1);
+
+    // SP and ZR tests
+    theEmitter->emitIns_R_R_I(INS_ldur, EA_8BYTE, REG_R8, REG_SP, 1);
+    theEmitter->emitIns_R_R_I(INS_ldurb, EA_8BYTE, REG_ZR, REG_R9, 1);
+    theEmitter->emitIns_R_R_I(INS_ldurh, EA_8BYTE, REG_ZR, REG_SP, 1);
+
+    // scaled
+    theEmitter->emitIns_R_R_I(INS_ldrb, EA_1BYTE, REG_R8, REG_R9, 1);
+    theEmitter->emitIns_R_R_I(INS_ldrh, EA_2BYTE, REG_R8, REG_R9, 2);
+    theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_R8, REG_R9, 4);
+    theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 8);
+
+    // pre-/post-indexed (unscaled)
+    theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_R8, REG_R9, 1, INS_OPTS_POST_INDEX);
+    theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_R8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+    theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_POST_INDEX);
+    theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    //
+    // Compares
+    //
+
+    genDefineTempLabel(genCreateTempLabel());
+
+    // cmp reg, reg
+    theEmitter->emitIns_R_R(INS_cmp, EA_8BYTE, REG_R8, REG_R9);
+    theEmitter->emitIns_R_R(INS_cmn, EA_8BYTE, REG_R8, REG_R9);
+
+    // cmp reg, imm
+    theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 0);
+    theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 4095);
+    theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 1 << 12);
+    theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 4095 << 12);
+
+    theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 0);
+    theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 4095);
+    theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 1 << 12);
+    theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 4095 << 12);
+
+    theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, -1);
+    theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, -0xfff);
+    theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 0xfffffffffffff000LL);
+    theEmitter->emitIns_R_I(INS_cmp, EA_8BYTE, REG_R8, 0xffffffffff800000LL);
+
+    theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, -1);
+    theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, -0xfff);
+    theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 0xfffffffffffff000LL);
+    theEmitter->emitIns_R_I(INS_cmn, EA_8BYTE, REG_R8, 0xffffffffff800000LL);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    // R_R
+    //
+
+    genDefineTempLabel(genCreateTempLabel());
+
+    theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_R1, REG_R12);
+    theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_R2, REG_R13);
+    theEmitter->emitIns_R_R(INS_rbit, EA_8BYTE, REG_R3, REG_R14);
+    theEmitter->emitIns_R_R(INS_rev, EA_8BYTE, REG_R4, REG_R15);
+    theEmitter->emitIns_R_R(INS_rev16, EA_8BYTE, REG_R5, REG_R0);
+    theEmitter->emitIns_R_R(INS_rev32, EA_8BYTE, REG_R6, REG_R1);
+
+    theEmitter->emitIns_R_R(INS_cls, EA_4BYTE, REG_R7, REG_R2);
+    theEmitter->emitIns_R_R(INS_clz, EA_4BYTE, REG_R8, REG_R3);
+    theEmitter->emitIns_R_R(INS_rbit, EA_4BYTE, REG_R9, REG_R4);
+    theEmitter->emitIns_R_R(INS_rev, EA_4BYTE, REG_R10, REG_R5);
+    theEmitter->emitIns_R_R(INS_rev16, EA_4BYTE, REG_R11, REG_R6);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    //
+    // R_I
+    //
+
+    genDefineTempLabel(genCreateTempLabel());
+
+    // mov reg, imm(i16,hw)
+    theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0000000000001234);
+    theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0000000043210000);
+    theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0000567800000000);
+    theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x8765000000000000);
+    theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xFFFFFFFFFFFF1234);
+    theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xFFFFFFFF4321FFFF);
+    theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xFFFF5678FFFFFFFF);
+    theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x8765FFFFFFFFFFFF);
+
+    theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x00001234);
+    theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x87650000);
+    theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0xFFFF1234);
+    theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x4567FFFF);
+
+    // mov reg, imm(N,r,s)
+    theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x00FFFFF000000000);
+    theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x6666666666666666);
+    theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_SP, 0x7FFF00007FFF0000);
+    theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x5555555555555555);
+    theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0xE003E003E003E003);
+    theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_R8, 0x0707070707070707);
+
+    theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x00FFFFF0);
+    theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x66666666);
+    theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x03FFC000);
+    theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x55555555);
+    theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0xE003E003);
+    theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_R8, 0x07070707);
+
+    theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0xE003E003E003E003);
+    theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x00FFFFF000000000);
+    theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x6666666666666666);
+    theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x0707070707070707);
+    theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x7FFF00007FFF0000);
+    theEmitter->emitIns_R_I(INS_tst, EA_8BYTE, REG_R8, 0x5555555555555555);
+
+    theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0xE003E003);
+    theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x00FFFFF0);
+    theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x66666666);
+    theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x07070707);
+    theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0xFFF00000);
+    theEmitter->emitIns_R_I(INS_tst, EA_4BYTE, REG_R8, 0x55555555);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    //
+    // R_R
+    //
+
+    genDefineTempLabel(genCreateTempLabel());
+
+    // tst reg, reg
+    theEmitter->emitIns_R_R(INS_tst, EA_8BYTE, REG_R7, REG_R10);
+
+    // mov reg, reg
+    theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_R7, REG_R10);
+    theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_R8, REG_SP);
+    theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_SP, REG_R9);
+
+    theEmitter->emitIns_R_R(INS_mvn, EA_8BYTE, REG_R5, REG_R11);
+    theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_R4, REG_R12);
+    theEmitter->emitIns_R_R(INS_negs, EA_8BYTE, REG_R3, REG_R13);
+
+    theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_R7, REG_R10);
+    theEmitter->emitIns_R_R(INS_mvn, EA_4BYTE, REG_R5, REG_R11);
+    theEmitter->emitIns_R_R(INS_neg, EA_4BYTE, REG_R4, REG_R12);
+    theEmitter->emitIns_R_R(INS_negs, EA_4BYTE, REG_R3, REG_R13);
+
+    theEmitter->emitIns_R_R(INS_sxtb, EA_8BYTE, REG_R7, REG_R10);
+    theEmitter->emitIns_R_R(INS_sxth, EA_8BYTE, REG_R5, REG_R11);
+    theEmitter->emitIns_R_R(INS_sxtw, EA_8BYTE, REG_R4, REG_R12);
+    theEmitter->emitIns_R_R(INS_uxtb, EA_8BYTE, REG_R3, REG_R13); // map to Wt
+    theEmitter->emitIns_R_R(INS_uxth, EA_8BYTE, REG_R2, REG_R14); // map to Wt
+
+    theEmitter->emitIns_R_R(INS_sxtb, EA_4BYTE, REG_R7, REG_R10);
+    theEmitter->emitIns_R_R(INS_sxth, EA_4BYTE, REG_R5, REG_R11);
+    theEmitter->emitIns_R_R(INS_uxtb, EA_4BYTE, REG_R3, REG_R13);
+    theEmitter->emitIns_R_R(INS_uxth, EA_4BYTE, REG_R2, REG_R14);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    //
+    // R_I_I
+    //
+
+    genDefineTempLabel(genCreateTempLabel());
+
+    // mov reg, imm(i16,hw)
+    theEmitter->emitIns_R_I_I(INS_mov, EA_8BYTE, REG_R8, 0x1234, 0, INS_OPTS_LSL);
+    theEmitter->emitIns_R_I_I(INS_mov, EA_8BYTE, REG_R8, 0x4321, 16, INS_OPTS_LSL);
+
+    theEmitter->emitIns_R_I_I(INS_movk, EA_8BYTE, REG_R8, 0x4321, 16, INS_OPTS_LSL);
+    theEmitter->emitIns_R_I_I(INS_movn, EA_8BYTE, REG_R8, 0x5678, 32, INS_OPTS_LSL);
+    theEmitter->emitIns_R_I_I(INS_movz, EA_8BYTE, REG_R8, 0x8765, 48, INS_OPTS_LSL);
+
+    theEmitter->emitIns_R_I_I(INS_movk, EA_4BYTE, REG_R8, 0x4321, 16, INS_OPTS_LSL);
+    theEmitter->emitIns_R_I_I(INS_movn, EA_4BYTE, REG_R8, 0x5678, 16, INS_OPTS_LSL);
+    theEmitter->emitIns_R_I_I(INS_movz, EA_4BYTE, REG_R8, 0x8765, 16, INS_OPTS_LSL);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    //
+    // R_R_I
+    //
+
+    genDefineTempLabel(genCreateTempLabel());
+
+    theEmitter->emitIns_R_R_I(INS_lsl, EA_8BYTE, REG_R0, REG_R0, 1);
+    theEmitter->emitIns_R_R_I(INS_lsl, EA_4BYTE, REG_R9, REG_R3, 18);
+    theEmitter->emitIns_R_R_I(INS_lsr, EA_8BYTE, REG_R7, REG_R0, 37);
+    theEmitter->emitIns_R_R_I(INS_lsr, EA_4BYTE, REG_R0, REG_R1, 2);
+    theEmitter->emitIns_R_R_I(INS_asr, EA_8BYTE, REG_R2, REG_R3, 53);
+    theEmitter->emitIns_R_R_I(INS_asr, EA_4BYTE, REG_R9, REG_R3, 18);
+
+    theEmitter->emitIns_R_R_I(INS_and, EA_8BYTE, REG_R2, REG_R3, 0x5555555555555555);
+    theEmitter->emitIns_R_R_I(INS_ands, EA_8BYTE, REG_R1, REG_R5, 0x6666666666666666);
+    theEmitter->emitIns_R_R_I(INS_eor, EA_8BYTE, REG_R8, REG_R9, 0x0707070707070707);
+    theEmitter->emitIns_R_R_I(INS_orr, EA_8BYTE, REG_SP, REG_R3, 0xFFFC000000000000);
+    theEmitter->emitIns_R_R_I(INS_ands, EA_4BYTE, REG_R8, REG_R9, 0xE003E003);
+
+    theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 1);
+    theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 31);
+    theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 32);
+    theEmitter->emitIns_R_R_I(INS_ror, EA_8BYTE, REG_R8, REG_R9, 63);
+
+    theEmitter->emitIns_R_R_I(INS_ror, EA_4BYTE, REG_R8, REG_R9, 1);
+    theEmitter->emitIns_R_R_I(INS_ror, EA_4BYTE, REG_R8, REG_R9, 31);
+
+    theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0); // == mov
+    theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 1);
+    theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, -1);
+    theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xfff);
+    theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, -0xfff);
+    theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0x1000);
+    theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xfff000);
+    theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
+    theEmitter->emitIns_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
+
+    theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0); // == mov
+    theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 1);
+    theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, -1);
+    theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xfff);
+    theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, -0xfff);
+    theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0x1000);
+    theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xfff000);
+    theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
+    theEmitter->emitIns_R_R_I(INS_add, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
+
+    theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0); // == mov
+    theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 1);
+    theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, -1);
+    theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xfff);
+    theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, -0xfff);
+    theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0x1000);
+    theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xfff000);
+    theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
+    theEmitter->emitIns_R_R_I(INS_sub, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
+
+    theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0); // == mov
+    theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 1);
+    theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, -1);
+    theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xfff);
+    theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, -0xfff);
+    theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0x1000);
+    theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xfff000);
+    theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
+    theEmitter->emitIns_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
+
+    theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0); // == mov
+    theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 1);
+    theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, -1);
+    theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xfff);
+    theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, -0xfff);
+    theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0x1000);
+    theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xfff000);
+    theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
+    theEmitter->emitIns_R_R_I(INS_adds, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
+
+    theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0); // == mov
+    theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 1);
+    theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, -1);
+    theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xfff);
+    theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, -0xfff);
+    theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0x1000);
+    theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xfff000);
+    theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
+    theEmitter->emitIns_R_R_I(INS_adds, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
+
+    theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0); // == mov
+    theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 1);
+    theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, -1);
+    theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xfff);
+    theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, -0xfff);
+    theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0x1000);
+    theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xfff000);
+    theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
+    theEmitter->emitIns_R_R_I(INS_subs, EA_8BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
+
+    theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0); // == mov
+    theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 1);
+    theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, -1);
+    theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xfff);
+    theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, -0xfff);
+    theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0x1000);
+    theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xfff000);
+    theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xfffffffffffff000LL);
+    theEmitter->emitIns_R_R_I(INS_subs, EA_4BYTE, REG_R8, REG_R9, 0xffffffffff800000LL);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    //
+    // R_R_I cmp/txt
+    //
+
+    // cmp
+    theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0);
+    theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 0);
+
+    // CMP (shifted register)
+    theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 31, INS_OPTS_LSL);
+    theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 32, INS_OPTS_LSR);
+    theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 33, INS_OPTS_ASR);
+
+    theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 21, INS_OPTS_LSL);
+    theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 22, INS_OPTS_LSR);
+    theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 23, INS_OPTS_ASR);
+
+    // TST (shifted register)
+    theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 31, INS_OPTS_LSL);
+    theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 32, INS_OPTS_LSR);
+    theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 33, INS_OPTS_ASR);
+    theEmitter->emitIns_R_R_I(INS_tst, EA_8BYTE, REG_R8, REG_R9, 34, INS_OPTS_ROR);
+
+    theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 21, INS_OPTS_LSL);
+    theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 22, INS_OPTS_LSR);
+    theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 23, INS_OPTS_ASR);
+    theEmitter->emitIns_R_R_I(INS_tst, EA_4BYTE, REG_R8, REG_R9, 24, INS_OPTS_ROR);
+
+    // CMP (extended register)
+    theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTB);
+    theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTH);
+    theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTW); // "cmp x8, x9, UXTW"; msdis
+                                                                                    // disassembles this "cmp x8,x9",
+                                                                                    // which looks like an msdis issue.
+    theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTX);
+
+    theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTB);
+    theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTH);
+    theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTW);
+    theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTX);
+
+    // CMP 64-bit (extended register) and left shift
+    theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_UXTB);
+    theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 2, INS_OPTS_UXTH);
+    theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 3, INS_OPTS_UXTW);
+    theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 4, INS_OPTS_UXTX);
+
+    theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 1, INS_OPTS_SXTB);
+    theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 2, INS_OPTS_SXTH);
+    theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 3, INS_OPTS_SXTW);
+    theEmitter->emitIns_R_R_I(INS_cmp, EA_8BYTE, REG_R8, REG_R9, 4, INS_OPTS_SXTX);
+
+    // CMP 32-bit (extended register) and left shift
+    theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 0, INS_OPTS_UXTB);
+    theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 2, INS_OPTS_UXTH);
+    theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 4, INS_OPTS_UXTW);
+
+    theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 0, INS_OPTS_SXTB);
+    theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 2, INS_OPTS_SXTH);
+    theEmitter->emitIns_R_R_I(INS_cmp, EA_4BYTE, REG_R8, REG_R9, 4, INS_OPTS_SXTW);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    //
+    // R_R_R
+    //
+
+    genDefineTempLabel(genCreateTempLabel());
+
+    theEmitter->emitIns_R_R_R(INS_lsl, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_lsr, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_asr, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_ror, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_adc, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_adcs, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_sbc, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_sbcs, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_udiv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_sdiv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_mneg, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_smull, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_smnegl, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_smulh, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_umull, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_umnegl, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_umulh, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_lslv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_lsrv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_asrv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_rorv, EA_8BYTE, REG_R8, REG_R9, REG_R10);
+
+    theEmitter->emitIns_R_R_R(INS_lsl, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_lsr, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_asr, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_ror, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_adc, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_adcs, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_sbc, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_sbcs, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_udiv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_sdiv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_mul, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_mneg, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_smull, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_smnegl, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_smulh, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_umull, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_umnegl, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_umulh, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_lslv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_lsrv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_asrv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+    theEmitter->emitIns_R_R_R(INS_rorv, EA_4BYTE, REG_R8, REG_R9, REG_R10);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    //
+    // R_R_I_I
+    //
+
+    genDefineTempLabel(genCreateTempLabel());
+
+    theEmitter->emitIns_R_R_I_I(INS_sbfm, EA_8BYTE, REG_R2, REG_R3, 4, 39);
+    theEmitter->emitIns_R_R_I_I(INS_bfm, EA_8BYTE, REG_R1, REG_R5, 20, 23);
+    theEmitter->emitIns_R_R_I_I(INS_ubfm, EA_8BYTE, REG_R8, REG_R9, 36, 7);
+
+    theEmitter->emitIns_R_R_I_I(INS_sbfiz, EA_8BYTE, REG_R2, REG_R3, 7, 37);
+    theEmitter->emitIns_R_R_I_I(INS_bfi, EA_8BYTE, REG_R1, REG_R5, 23, 21);
+    theEmitter->emitIns_R_R_I_I(INS_ubfiz, EA_8BYTE, REG_R8, REG_R9, 39, 5);
+
+    theEmitter->emitIns_R_R_I_I(INS_sbfx, EA_8BYTE, REG_R2, REG_R3, 10, 24);
+    theEmitter->emitIns_R_R_I_I(INS_bfxil, EA_8BYTE, REG_R1, REG_R5, 26, 16);
+    theEmitter->emitIns_R_R_I_I(INS_ubfx, EA_8BYTE, REG_R8, REG_R9, 42, 8);
+
+    theEmitter->emitIns_R_R_I_I(INS_sbfm, EA_4BYTE, REG_R2, REG_R3, 4, 19);
+    theEmitter->emitIns_R_R_I_I(INS_bfm, EA_4BYTE, REG_R1, REG_R5, 10, 13);
+    theEmitter->emitIns_R_R_I_I(INS_ubfm, EA_4BYTE, REG_R8, REG_R9, 16, 7);
+
+    theEmitter->emitIns_R_R_I_I(INS_sbfiz, EA_4BYTE, REG_R2, REG_R3, 5, 17);
+    theEmitter->emitIns_R_R_I_I(INS_bfi, EA_4BYTE, REG_R1, REG_R5, 13, 11);
+    theEmitter->emitIns_R_R_I_I(INS_ubfiz, EA_4BYTE, REG_R8, REG_R9, 19, 5);
+
+    theEmitter->emitIns_R_R_I_I(INS_sbfx, EA_4BYTE, REG_R2, REG_R3, 3, 14);
+    theEmitter->emitIns_R_R_I_I(INS_bfxil, EA_4BYTE, REG_R1, REG_R5, 11, 9);
+    theEmitter->emitIns_R_R_I_I(INS_ubfx, EA_4BYTE, REG_R8, REG_R9, 22, 8);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    //
+    // R_R_R_I
+    //
+
+    genDefineTempLabel(genCreateTempLabel());
+
+    // ADD (extended register)
+    theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTB);
+    theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTH);
+    theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTW);
+    theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTX);
+    theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTB);
+    theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTH);
+    theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTW);
+    theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTX);
+
+    // ADD (extended register) and left shift
+    theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTB);
+    theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTH);
+    theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTW);
+    theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTX);
+    theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTB);
+    theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTH);
+    theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTW);
+    theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTX);
+
+    // ADD (shifted register)
+    theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+    theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 31, INS_OPTS_LSL);
+    theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 32, INS_OPTS_LSR);
+    theEmitter->emitIns_R_R_R_I(INS_add, EA_8BYTE, REG_R8, REG_R9, REG_R10, 33, INS_OPTS_ASR);
+
+    // EXTR (extract field from register pair)
+    theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 1);
+    theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 31);
+    theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 32);
+    theEmitter->emitIns_R_R_R_I(INS_extr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 63);
+
+    theEmitter->emitIns_R_R_R_I(INS_extr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 1);
+    theEmitter->emitIns_R_R_R_I(INS_extr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 31);
+
+    // SUB (extended register)
+    theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTB);
+    theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTH);
+    theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTW);
+    theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_UXTX);
+    theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTB);
+    theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTH);
+    theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTW);
+    theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0, INS_OPTS_SXTX);
+
+    // SUB (extended register) and left shift
+    theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTB);
+    theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTH);
+    theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTW);
+    theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_UXTX);
+    theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTB);
+    theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTH);
+    theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTW);
+    theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_SXTX);
+
+    // SUB (shifted register)
+    theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
+    theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 27, INS_OPTS_LSL);
+    theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 28, INS_OPTS_LSR);
+    theEmitter->emitIns_R_R_R_I(INS_sub, EA_4BYTE, REG_R8, REG_R9, REG_R10, 29, INS_OPTS_ASR);
+
+    // bit operations
+    theEmitter->emitIns_R_R_R_I(INS_and, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+    theEmitter->emitIns_R_R_R_I(INS_ands, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+    theEmitter->emitIns_R_R_R_I(INS_eor, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+    theEmitter->emitIns_R_R_R_I(INS_orr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+    theEmitter->emitIns_R_R_R_I(INS_bic, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+    theEmitter->emitIns_R_R_R_I(INS_bics, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+    theEmitter->emitIns_R_R_R_I(INS_eon, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+    theEmitter->emitIns_R_R_R_I(INS_orn, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+
+    theEmitter->emitIns_R_R_R_I(INS_and, EA_8BYTE, REG_R8, REG_R9, REG_R10, 1, INS_OPTS_LSL);
+    theEmitter->emitIns_R_R_R_I(INS_ands, EA_8BYTE, REG_R8, REG_R9, REG_R10, 2, INS_OPTS_LSR);
+    theEmitter->emitIns_R_R_R_I(INS_eor, EA_8BYTE, REG_R8, REG_R9, REG_R10, 3, INS_OPTS_ASR);
+    theEmitter->emitIns_R_R_R_I(INS_orr, EA_8BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_ROR);
+    theEmitter->emitIns_R_R_R_I(INS_bic, EA_8BYTE, REG_R8, REG_R9, REG_R10, 5, INS_OPTS_LSL);
+    theEmitter->emitIns_R_R_R_I(INS_bics, EA_8BYTE, REG_R8, REG_R9, REG_R10, 6, INS_OPTS_LSR);
+    theEmitter->emitIns_R_R_R_I(INS_eon, EA_8BYTE, REG_R8, REG_R9, REG_R10, 7, INS_OPTS_ASR);
+    theEmitter->emitIns_R_R_R_I(INS_orn, EA_8BYTE, REG_R8, REG_R9, REG_R10, 8, INS_OPTS_ROR);
+
+    theEmitter->emitIns_R_R_R_I(INS_and, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
+    theEmitter->emitIns_R_R_R_I(INS_ands, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
+    theEmitter->emitIns_R_R_R_I(INS_eor, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
+    theEmitter->emitIns_R_R_R_I(INS_orr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
+    theEmitter->emitIns_R_R_R_I(INS_bic, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
+    theEmitter->emitIns_R_R_R_I(INS_bics, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
+    theEmitter->emitIns_R_R_R_I(INS_eon, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
+    theEmitter->emitIns_R_R_R_I(INS_orn, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
+
+    theEmitter->emitIns_R_R_R_I(INS_and, EA_4BYTE, REG_R8, REG_R9, REG_R10, 1, INS_OPTS_LSL);
+    theEmitter->emitIns_R_R_R_I(INS_ands, EA_4BYTE, REG_R8, REG_R9, REG_R10, 2, INS_OPTS_LSR);
+    theEmitter->emitIns_R_R_R_I(INS_eor, EA_4BYTE, REG_R8, REG_R9, REG_R10, 3, INS_OPTS_ASR);
+    theEmitter->emitIns_R_R_R_I(INS_orr, EA_4BYTE, REG_R8, REG_R9, REG_R10, 4, INS_OPTS_ROR);
+    theEmitter->emitIns_R_R_R_I(INS_bic, EA_4BYTE, REG_R8, REG_R9, REG_R10, 5, INS_OPTS_LSL);
+    theEmitter->emitIns_R_R_R_I(INS_bics, EA_4BYTE, REG_R8, REG_R9, REG_R10, 6, INS_OPTS_LSR);
+    theEmitter->emitIns_R_R_R_I(INS_eon, EA_4BYTE, REG_R8, REG_R9, REG_R10, 7, INS_OPTS_ASR);
+    theEmitter->emitIns_R_R_R_I(INS_orn, EA_4BYTE, REG_R8, REG_R9, REG_R10, 8, INS_OPTS_ROR);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    //
+    // R_R_R_I  -- load/store pair
+    //
+
+    theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+    theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+    theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 8);
+    theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 8);
+
+    theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0);
+    theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0);
+    theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 8);
+    theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 8);
+
+    theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+    theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 0);
+    theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16);
+    theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16);
+    theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX);
+    theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX);
+    theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX);
+    theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX);
+
+    theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0);
+    theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 0);
+    theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 16);
+    theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_SP, 16);
+    theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX);
+    theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX);
+    theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX);
+    theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX);
+
+    theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 0);
+    theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16);
+    theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_POST_INDEX);
+    theEmitter->emitIns_R_R_R_I(INS_ldpsw, EA_4BYTE, REG_R8, REG_R9, REG_R10, 16, INS_OPTS_PRE_INDEX);
+
+    // SP and ZR tests
+    theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_ZR, REG_R1, REG_SP, 0);
+    theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_R0, REG_ZR, REG_SP, 16);
+    theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_ZR, REG_R1, REG_SP, 0);
+    theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_R0, REG_ZR, REG_SP, 16);
+    theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_ZR, REG_ZR, REG_SP, 16, INS_OPTS_POST_INDEX);
+    theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_ZR, REG_ZR, REG_R8, 16, INS_OPTS_PRE_INDEX);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    //
+    // R_R_R_Ext    -- load/store shifted/extend
+    //
+
+    genDefineTempLabel(genCreateTempLabel());
+
+    // LDR (register)
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 3);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 3);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 3);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 3);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 3);
+
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 2);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 2);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 2);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 2);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 2);
+
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 1);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 1);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 1);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 1);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 1);
+
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 2);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 2);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 2);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 2);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrsw, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 2);
+
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 1);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 1);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 1);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 1);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrsh, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 1);
+
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_4BYTE, REG_R8, REG_SP, REG_R9);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_8BYTE, REG_R8, REG_SP, REG_R9);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldrsb, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+
+    // STR (register)
+    theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9);
+    theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
+    theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 3);
+    theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+    theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 3);
+    theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+    theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 3);
+    theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+    theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 3);
+    theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+    theEmitter->emitIns_R_R_R_Ext(INS_str, EA_8BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 3);
+
+    theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9);
+    theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
+    theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 2);
+    theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+    theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 2);
+    theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+    theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 2);
+    theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+    theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 2);
+    theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+    theEmitter->emitIns_R_R_R_Ext(INS_str, EA_4BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 2);
+
+    theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9);
+    theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL);
+    theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_LSL, 1);
+    theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+    theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW, 1);
+    theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+    theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW, 1);
+    theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+    theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX, 1);
+    theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+    theEmitter->emitIns_R_R_R_Ext(INS_strh, EA_2BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX, 1);
+
+    theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9);
+    theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTW);
+    theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTW);
+    theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_SXTX);
+    theEmitter->emitIns_R_R_R_Ext(INS_strb, EA_1BYTE, REG_R8, REG_SP, REG_R9, INS_OPTS_UXTX);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    //
+    // R_R_R_R
+    //
+
+    genDefineTempLabel(genCreateTempLabel());
+
+    theEmitter->emitIns_R_R_R_R(INS_madd, EA_4BYTE, REG_R0, REG_R12, REG_R27, REG_R10);
+    theEmitter->emitIns_R_R_R_R(INS_msub, EA_4BYTE, REG_R1, REG_R13, REG_R28, REG_R11);
+    theEmitter->emitIns_R_R_R_R(INS_smaddl, EA_4BYTE, REG_R2, REG_R14, REG_R0, REG_R12);
+    theEmitter->emitIns_R_R_R_R(INS_smsubl, EA_4BYTE, REG_R3, REG_R15, REG_R1, REG_R13);
+    theEmitter->emitIns_R_R_R_R(INS_umaddl, EA_4BYTE, REG_R4, REG_R19, REG_R2, REG_R14);
+    theEmitter->emitIns_R_R_R_R(INS_umsubl, EA_4BYTE, REG_R5, REG_R20, REG_R3, REG_R15);
+
+    theEmitter->emitIns_R_R_R_R(INS_madd, EA_8BYTE, REG_R6, REG_R21, REG_R4, REG_R19);
+    theEmitter->emitIns_R_R_R_R(INS_msub, EA_8BYTE, REG_R7, REG_R22, REG_R5, REG_R20);
+    theEmitter->emitIns_R_R_R_R(INS_smaddl, EA_8BYTE, REG_R8, REG_R23, REG_R6, REG_R21);
+    theEmitter->emitIns_R_R_R_R(INS_smsubl, EA_8BYTE, REG_R9, REG_R24, REG_R7, REG_R22);
+    theEmitter->emitIns_R_R_R_R(INS_umaddl, EA_8BYTE, REG_R10, REG_R25, REG_R8, REG_R23);
+    theEmitter->emitIns_R_R_R_R(INS_umsubl, EA_8BYTE, REG_R11, REG_R26, REG_R9, REG_R24);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    // R_COND
+    //
+
+    // cset reg, cond
+    theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R9, INS_COND_EQ); // eq
+    theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R8, INS_COND_NE); // ne
+    theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R7, INS_COND_HS); // hs
+    theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R6, INS_COND_LO); // lo
+    theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R5, INS_COND_MI); // mi
+    theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R4, INS_COND_PL); // pl
+    theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R3, INS_COND_VS); // vs
+    theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R2, INS_COND_VC); // vc
+    theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R1, INS_COND_HI); // hi
+    theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R0, INS_COND_LS); // ls
+    theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R9, INS_COND_GE); // ge
+    theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R8, INS_COND_LT); // lt
+    theEmitter->emitIns_R_COND(INS_cset, EA_8BYTE, REG_R7, INS_COND_GT); // gt
+    theEmitter->emitIns_R_COND(INS_cset, EA_4BYTE, REG_R6, INS_COND_LE); // le
+
+    // csetm reg, cond
+    theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R9, INS_COND_EQ); // eq
+    theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R8, INS_COND_NE); // ne
+    theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R7, INS_COND_HS); // hs
+    theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R6, INS_COND_LO); // lo
+    theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R5, INS_COND_MI); // mi
+    theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R4, INS_COND_PL); // pl
+    theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R3, INS_COND_VS); // vs
+    theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R2, INS_COND_VC); // vc
+    theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R1, INS_COND_HI); // hi
+    theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R0, INS_COND_LS); // ls
+    theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R9, INS_COND_GE); // ge
+    theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R8, INS_COND_LT); // lt
+    theEmitter->emitIns_R_COND(INS_csetm, EA_4BYTE, REG_R7, INS_COND_GT); // gt
+    theEmitter->emitIns_R_COND(INS_csetm, EA_8BYTE, REG_R6, INS_COND_LE); // le
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    // R_R_COND
+    //
+
+    // cinc reg, reg, cond
+    // cinv reg, reg, cond
+    // cneg reg, reg, cond
+    theEmitter->emitIns_R_R_COND(INS_cinc, EA_8BYTE, REG_R0, REG_R4, INS_COND_EQ); // eq
+    theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R1, REG_R5, INS_COND_NE); // ne
+    theEmitter->emitIns_R_R_COND(INS_cneg, EA_4BYTE, REG_R2, REG_R6, INS_COND_HS); // hs
+    theEmitter->emitIns_R_R_COND(INS_cinc, EA_8BYTE, REG_R3, REG_R7, INS_COND_LO); // lo
+    theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R4, REG_R8, INS_COND_MI); // mi
+    theEmitter->emitIns_R_R_COND(INS_cneg, EA_8BYTE, REG_R5, REG_R9, INS_COND_PL); // pl
+    theEmitter->emitIns_R_R_COND(INS_cinc, EA_8BYTE, REG_R6, REG_R0, INS_COND_VS); // vs
+    theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R7, REG_R1, INS_COND_VC); // vc
+    theEmitter->emitIns_R_R_COND(INS_cneg, EA_8BYTE, REG_R8, REG_R2, INS_COND_HI); // hi
+    theEmitter->emitIns_R_R_COND(INS_cinc, EA_4BYTE, REG_R9, REG_R3, INS_COND_LS); // ls
+    theEmitter->emitIns_R_R_COND(INS_cinv, EA_4BYTE, REG_R0, REG_R4, INS_COND_GE); // ge
+    theEmitter->emitIns_R_R_COND(INS_cneg, EA_8BYTE, REG_R2, REG_R5, INS_COND_LT); // lt
+    theEmitter->emitIns_R_R_COND(INS_cinc, EA_4BYTE, REG_R2, REG_R6, INS_COND_GT); // gt
+    theEmitter->emitIns_R_R_COND(INS_cinv, EA_8BYTE, REG_R3, REG_R7, INS_COND_LE); // le
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    // R_R_R_COND
+    //
+
+    // csel  reg, reg, reg, cond
+    // csinc reg, reg, reg, cond
+    // csinv reg, reg, reg, cond
+    // csneg reg, reg, reg, cond
+    theEmitter->emitIns_R_R_R_COND(INS_csel, EA_8BYTE, REG_R0, REG_R4, REG_R8, INS_COND_EQ);  // eq
+    theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_4BYTE, REG_R1, REG_R5, REG_R9, INS_COND_NE); // ne
+    theEmitter->emitIns_R_R_R_COND(INS_csinv, EA_4BYTE, REG_R2, REG_R6, REG_R0, INS_COND_HS); // hs
+    theEmitter->emitIns_R_R_R_COND(INS_csneg, EA_8BYTE, REG_R3, REG_R7, REG_R1, INS_COND_LO); // lo
+    theEmitter->emitIns_R_R_R_COND(INS_csel, EA_4BYTE, REG_R4, REG_R8, REG_R2, INS_COND_MI);  // mi
+    theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_8BYTE, REG_R5, REG_R9, REG_R3, INS_COND_PL); // pl
+    theEmitter->emitIns_R_R_R_COND(INS_csinv, EA_8BYTE, REG_R6, REG_R0, REG_R4, INS_COND_VS); // vs
+    theEmitter->emitIns_R_R_R_COND(INS_csneg, EA_4BYTE, REG_R7, REG_R1, REG_R5, INS_COND_VC); // vc
+    theEmitter->emitIns_R_R_R_COND(INS_csel, EA_8BYTE, REG_R8, REG_R2, REG_R6, INS_COND_HI);  // hi
+    theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_4BYTE, REG_R9, REG_R3, REG_R7, INS_COND_LS); // ls
+    theEmitter->emitIns_R_R_R_COND(INS_csinv, EA_4BYTE, REG_R0, REG_R4, REG_R8, INS_COND_GE); // ge
+    theEmitter->emitIns_R_R_R_COND(INS_csneg, EA_8BYTE, REG_R2, REG_R5, REG_R9, INS_COND_LT); // lt
+    theEmitter->emitIns_R_R_R_COND(INS_csel, EA_4BYTE, REG_R2, REG_R6, REG_R0, INS_COND_GT);  // gt
+    theEmitter->emitIns_R_R_R_COND(INS_csinc, EA_8BYTE, REG_R3, REG_R7, REG_R1, INS_COND_LE); // le
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    // R_R_FLAGS_COND
+    //
+
+    // ccmp reg1, reg2, nzcv, cond
+    theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R9, REG_R3, INS_FLAGS_V, INS_COND_EQ);    // eq
+    theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R8, REG_R2, INS_FLAGS_C, INS_COND_NE);    // ne
+    theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R7, REG_R1, INS_FLAGS_Z, INS_COND_HS);    // hs
+    theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R6, REG_R0, INS_FLAGS_N, INS_COND_LO);    // lo
+    theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R5, REG_R3, INS_FLAGS_CV, INS_COND_MI);   // mi
+    theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R4, REG_R2, INS_FLAGS_ZV, INS_COND_PL);   // pl
+    theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R3, REG_R1, INS_FLAGS_ZC, INS_COND_VS);   // vs
+    theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R2, REG_R0, INS_FLAGS_NV, INS_COND_VC);   // vc
+    theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R1, REG_R3, INS_FLAGS_NC, INS_COND_HI);   // hi
+    theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R0, REG_R2, INS_FLAGS_NZ, INS_COND_LS);   // ls
+    theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R9, REG_R1, INS_FLAGS_NONE, INS_COND_GE); // ge
+    theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R8, REG_R0, INS_FLAGS_NZV, INS_COND_LT);  // lt
+    theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R7, REG_R3, INS_FLAGS_NZC, INS_COND_GT);  // gt
+    theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R6, REG_R2, INS_FLAGS_NZCV, INS_COND_LE); // le
+
+    // ccmp reg1, imm, nzcv, cond
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R9, 3, INS_FLAGS_V, INS_COND_EQ);     // eq
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R8, 2, INS_FLAGS_C, INS_COND_NE);     // ne
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R7, 1, INS_FLAGS_Z, INS_COND_HS);     // hs
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R6, 0, INS_FLAGS_N, INS_COND_LO);     // lo
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R5, 31, INS_FLAGS_CV, INS_COND_MI);   // mi
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R4, 28, INS_FLAGS_ZV, INS_COND_PL);   // pl
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R3, 25, INS_FLAGS_ZC, INS_COND_VS);   // vs
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R2, 22, INS_FLAGS_NV, INS_COND_VC);   // vc
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R1, 19, INS_FLAGS_NC, INS_COND_HI);   // hi
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R0, 16, INS_FLAGS_NZ, INS_COND_LS);   // ls
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R9, 13, INS_FLAGS_NONE, INS_COND_GE); // ge
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R8, 10, INS_FLAGS_NZV, INS_COND_LT);  // lt
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R7, 7, INS_FLAGS_NZC, INS_COND_GT);   // gt
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R6, 4, INS_FLAGS_NZCV, INS_COND_LE);  // le
+
+    // ccmp reg1, imm, nzcv, cond  -- encoded as ccmn
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R9, -3, INS_FLAGS_V, INS_COND_EQ);     // eq
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R8, -2, INS_FLAGS_C, INS_COND_NE);     // ne
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R7, -1, INS_FLAGS_Z, INS_COND_HS);     // hs
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R6, -5, INS_FLAGS_N, INS_COND_LO);     // lo
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R5, -31, INS_FLAGS_CV, INS_COND_MI);   // mi
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R4, -28, INS_FLAGS_ZV, INS_COND_PL);   // pl
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R3, -25, INS_FLAGS_ZC, INS_COND_VS);   // vs
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R2, -22, INS_FLAGS_NV, INS_COND_VC);   // vc
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R1, -19, INS_FLAGS_NC, INS_COND_HI);   // hi
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R0, -16, INS_FLAGS_NZ, INS_COND_LS);   // ls
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R9, -13, INS_FLAGS_NONE, INS_COND_GE); // ge
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R8, -10, INS_FLAGS_NZV, INS_COND_LT);  // lt
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_8BYTE, REG_R7, -7, INS_FLAGS_NZC, INS_COND_GT);   // gt
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmp, EA_4BYTE, REG_R6, -4, INS_FLAGS_NZCV, INS_COND_LE);  // le
+
+    // ccmn reg1, reg2, nzcv, cond
+    theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R9, REG_R3, INS_FLAGS_V, INS_COND_EQ);    // eq
+    theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R8, REG_R2, INS_FLAGS_C, INS_COND_NE);    // ne
+    theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R7, REG_R1, INS_FLAGS_Z, INS_COND_HS);    // hs
+    theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R6, REG_R0, INS_FLAGS_N, INS_COND_LO);    // lo
+    theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R5, REG_R3, INS_FLAGS_CV, INS_COND_MI);   // mi
+    theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R4, REG_R2, INS_FLAGS_ZV, INS_COND_PL);   // pl
+    theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R3, REG_R1, INS_FLAGS_ZC, INS_COND_VS);   // vs
+    theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R2, REG_R0, INS_FLAGS_NV, INS_COND_VC);   // vc
+    theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R1, REG_R3, INS_FLAGS_NC, INS_COND_HI);   // hi
+    theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R0, REG_R2, INS_FLAGS_NZ, INS_COND_LS);   // ls
+    theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R9, REG_R1, INS_FLAGS_NONE, INS_COND_GE); // ge
+    theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R8, REG_R0, INS_FLAGS_NZV, INS_COND_LT);  // lt
+    theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R7, REG_R3, INS_FLAGS_NZC, INS_COND_GT);  // gt
+    theEmitter->emitIns_R_R_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R6, REG_R2, INS_FLAGS_NZCV, INS_COND_LE); // le
+
+    // ccmn reg1, imm, nzcv, cond
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R9, 3, INS_FLAGS_V, INS_COND_EQ);     // eq
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R8, 2, INS_FLAGS_C, INS_COND_NE);     // ne
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R7, 1, INS_FLAGS_Z, INS_COND_HS);     // hs
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R6, 0, INS_FLAGS_N, INS_COND_LO);     // lo
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R5, 31, INS_FLAGS_CV, INS_COND_MI);   // mi
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R4, 28, INS_FLAGS_ZV, INS_COND_PL);   // pl
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R3, 25, INS_FLAGS_ZC, INS_COND_VS);   // vs
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R2, 22, INS_FLAGS_NV, INS_COND_VC);   // vc
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R1, 19, INS_FLAGS_NC, INS_COND_HI);   // hi
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R0, 16, INS_FLAGS_NZ, INS_COND_LS);   // ls
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R9, 13, INS_FLAGS_NONE, INS_COND_GE); // ge
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R8, 10, INS_FLAGS_NZV, INS_COND_LT);  // lt
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_8BYTE, REG_R7, 7, INS_FLAGS_NZC, INS_COND_GT);   // gt
+    theEmitter->emitIns_R_I_FLAGS_COND(INS_ccmn, EA_4BYTE, REG_R6, 4, INS_FLAGS_NZCV, INS_COND_LE);  // le
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    //
+    // Branch to register
+    //
+
+    genDefineTempLabel(genCreateTempLabel());
+
+    theEmitter->emitIns_R(INS_br, EA_PTRSIZE, REG_R8);
+    theEmitter->emitIns_R(INS_blr, EA_PTRSIZE, REG_R9);
+    theEmitter->emitIns_R(INS_ret, EA_PTRSIZE, REG_R8);
+    theEmitter->emitIns_R(INS_ret, EA_PTRSIZE, REG_LR);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    //
+    // Misc
+    //
+
+    genDefineTempLabel(genCreateTempLabel());
+
+    theEmitter->emitIns_I(INS_brk, EA_PTRSIZE, 0);
+    theEmitter->emitIns_I(INS_brk, EA_PTRSIZE, 65535);
+
+    theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_OSHLD);
+    theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_OSHST);
+    theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_OSH);
+
+    theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_NSHLD);
+    theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_NSHST);
+    theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_NSH);
+
+    theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_ISHLD);
+    theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_ISHST);
+    theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_ISH);
+
+    theEmitter->emitIns_BARR(INS_dsb, INS_BARRIER_LD);
+    theEmitter->emitIns_BARR(INS_dmb, INS_BARRIER_ST);
+    theEmitter->emitIns_BARR(INS_isb, INS_BARRIER_SY);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    ////////////////////////////////////////////////////////////////////////////////
+    //
+    // SIMD and Floating point
+    //
+    ////////////////////////////////////////////////////////////////////////////////
+
+    //
+    // Load/Stores vector register
+    //
+
+    genDefineTempLabel(genCreateTempLabel());
+
+    // ldr/str Vt, [reg]
+    theEmitter->emitIns_R_R(INS_ldr, EA_8BYTE, REG_V1, REG_R9);
+    theEmitter->emitIns_R_R(INS_str, EA_8BYTE, REG_V2, REG_R8);
+    theEmitter->emitIns_R_R(INS_ldr, EA_4BYTE, REG_V3, REG_R7);
+    theEmitter->emitIns_R_R(INS_str, EA_4BYTE, REG_V4, REG_R6);
+    theEmitter->emitIns_R_R(INS_ldr, EA_2BYTE, REG_V5, REG_R5);
+    theEmitter->emitIns_R_R(INS_str, EA_2BYTE, REG_V6, REG_R4);
+    theEmitter->emitIns_R_R(INS_ldr, EA_1BYTE, REG_V7, REG_R3);
+    theEmitter->emitIns_R_R(INS_str, EA_1BYTE, REG_V8, REG_R2);
+    theEmitter->emitIns_R_R(INS_ldr, EA_16BYTE, REG_V9, REG_R1);
+    theEmitter->emitIns_R_R(INS_str, EA_16BYTE, REG_V10, REG_R0);
+
+    // ldr/str Vt, [reg+cns]        -- scaled
+    theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V8, REG_R9, 1);
+    theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V8, REG_R9, 2);
+    theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V8, REG_R9, 4);
+    theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V8, REG_R9, 8);
+    theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V8, REG_R9, 16);
+
+    theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V7, REG_R10, 1);
+    theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V7, REG_R10, 2);
+    theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V7, REG_R10, 4);
+    theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V7, REG_R10, 8);
+    theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V7, REG_R10, 16);
+
+    // ldr/str Vt, [reg],cns        -- post-indexed (unscaled)
+    // ldr/str Vt, [reg+cns]!       -- post-indexed (unscaled)
+    theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
+    theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
+    theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
+    theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
+    theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
+
+    theEmitter->emitIns_R_R_I(INS_ldr, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+    theEmitter->emitIns_R_R_I(INS_ldr, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+    theEmitter->emitIns_R_R_I(INS_ldr, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+    theEmitter->emitIns_R_R_I(INS_ldr, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+    theEmitter->emitIns_R_R_I(INS_ldr, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+
+    theEmitter->emitIns_R_R_I(INS_str, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
+    theEmitter->emitIns_R_R_I(INS_str, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
+    theEmitter->emitIns_R_R_I(INS_str, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
+    theEmitter->emitIns_R_R_I(INS_str, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
+    theEmitter->emitIns_R_R_I(INS_str, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_POST_INDEX);
+
+    theEmitter->emitIns_R_R_I(INS_str, EA_1BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+    theEmitter->emitIns_R_R_I(INS_str, EA_2BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+    theEmitter->emitIns_R_R_I(INS_str, EA_4BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+    theEmitter->emitIns_R_R_I(INS_str, EA_8BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+    theEmitter->emitIns_R_R_I(INS_str, EA_16BYTE, REG_V8, REG_R9, 1, INS_OPTS_PRE_INDEX);
+
+    theEmitter->emitIns_R_R_I(INS_ldur, EA_1BYTE, REG_V8, REG_R9, 2);
+    theEmitter->emitIns_R_R_I(INS_ldur, EA_2BYTE, REG_V8, REG_R9, 3);
+    theEmitter->emitIns_R_R_I(INS_ldur, EA_4BYTE, REG_V8, REG_R9, 5);
+    theEmitter->emitIns_R_R_I(INS_ldur, EA_8BYTE, REG_V8, REG_R9, 9);
+    theEmitter->emitIns_R_R_I(INS_ldur, EA_16BYTE, REG_V8, REG_R9, 17);
+
+    theEmitter->emitIns_R_R_I(INS_stur, EA_1BYTE, REG_V7, REG_R10, 2);
+    theEmitter->emitIns_R_R_I(INS_stur, EA_2BYTE, REG_V7, REG_R10, 3);
+    theEmitter->emitIns_R_R_I(INS_stur, EA_4BYTE, REG_V7, REG_R10, 5);
+    theEmitter->emitIns_R_R_I(INS_stur, EA_8BYTE, REG_V7, REG_R10, 9);
+    theEmitter->emitIns_R_R_I(INS_stur, EA_16BYTE, REG_V7, REG_R10, 17);
+
+    // load/store pair
+    theEmitter->emitIns_R_R_R(INS_ldnp, EA_8BYTE, REG_V0, REG_V1, REG_R10);
+    theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_V1, REG_V2, REG_R10, 0);
+    theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_8BYTE, REG_V2, REG_V3, REG_R10, 8);
+    theEmitter->emitIns_R_R_R_I(INS_stnp, EA_8BYTE, REG_V3, REG_V4, REG_R10, 24);
+
+    theEmitter->emitIns_R_R_R(INS_ldnp, EA_4BYTE, REG_V4, REG_V5, REG_SP);
+    theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_V5, REG_V6, REG_SP, 0);
+    theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_4BYTE, REG_V6, REG_V7, REG_SP, 4);
+    theEmitter->emitIns_R_R_R_I(INS_stnp, EA_4BYTE, REG_V7, REG_V8, REG_SP, 12);
+
+    theEmitter->emitIns_R_R_R(INS_ldnp, EA_16BYTE, REG_V8, REG_V9, REG_R10);
+    theEmitter->emitIns_R_R_R_I(INS_stnp, EA_16BYTE, REG_V9, REG_V10, REG_R10, 0);
+    theEmitter->emitIns_R_R_R_I(INS_ldnp, EA_16BYTE, REG_V10, REG_V11, REG_R10, 16);
+    theEmitter->emitIns_R_R_R_I(INS_stnp, EA_16BYTE, REG_V11, REG_V12, REG_R10, 48);
+
+    theEmitter->emitIns_R_R_R(INS_ldp, EA_8BYTE, REG_V0, REG_V1, REG_R10);
+    theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V1, REG_V2, REG_SP, 0);
+    theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_V2, REG_V3, REG_SP, 8);
+    theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V3, REG_V4, REG_R10, 16);
+    theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_V4, REG_V5, REG_R10, 24, INS_OPTS_POST_INDEX);
+    theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V5, REG_V6, REG_SP, 32, INS_OPTS_POST_INDEX);
+    theEmitter->emitIns_R_R_R_I(INS_ldp, EA_8BYTE, REG_V6, REG_V7, REG_SP, 40, INS_OPTS_PRE_INDEX);
+    theEmitter->emitIns_R_R_R_I(INS_stp, EA_8BYTE, REG_V7, REG_V8, REG_R10, 48, INS_OPTS_PRE_INDEX);
+
+    theEmitter->emitIns_R_R_R(INS_ldp, EA_4BYTE, REG_V0, REG_V1, REG_R10);
+    theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V1, REG_V2, REG_SP, 0);
+    theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_V2, REG_V3, REG_SP, 4);
+    theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V3, REG_V4, REG_R10, 8);
+    theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_V4, REG_V5, REG_R10, 12, INS_OPTS_POST_INDEX);
+    theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V5, REG_V6, REG_SP, 16, INS_OPTS_POST_INDEX);
+    theEmitter->emitIns_R_R_R_I(INS_ldp, EA_4BYTE, REG_V6, REG_V7, REG_SP, 20, INS_OPTS_PRE_INDEX);
+    theEmitter->emitIns_R_R_R_I(INS_stp, EA_4BYTE, REG_V7, REG_V8, REG_R10, 24, INS_OPTS_PRE_INDEX);
+
+    theEmitter->emitIns_R_R_R(INS_ldp, EA_16BYTE, REG_V0, REG_V1, REG_R10);
+    theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V1, REG_V2, REG_SP, 0);
+    theEmitter->emitIns_R_R_R_I(INS_ldp, EA_16BYTE, REG_V2, REG_V3, REG_SP, 16);
+    theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V3, REG_V4, REG_R10, 32);
+    theEmitter->emitIns_R_R_R_I(INS_ldp, EA_16BYTE, REG_V4, REG_V5, REG_R10, 48, INS_OPTS_POST_INDEX);
+    theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V5, REG_V6, REG_SP, 64, INS_OPTS_POST_INDEX);
+    theEmitter->emitIns_R_R_R_I(INS_ldp, EA_16BYTE, REG_V6, REG_V7, REG_SP, 80, INS_OPTS_PRE_INDEX);
+    theEmitter->emitIns_R_R_R_I(INS_stp, EA_16BYTE, REG_V7, REG_V8, REG_R10, 96, INS_OPTS_PRE_INDEX);
+
+    // LDR (register)
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V1, REG_SP, REG_R9);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 3);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 3);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 3);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 3);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_8BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 3);
+
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V1, REG_SP, REG_R9);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 2);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 2);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 2);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 2);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_4BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 2);
+
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V1, REG_SP, REG_R9);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 4);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 4);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 4);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 4);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_16BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 4);
+
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V1, REG_SP, REG_R9);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V2, REG_R7, REG_R9, INS_OPTS_LSL);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_LSL, 1);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V4, REG_R7, REG_R9, INS_OPTS_SXTW);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_SXTW, 1);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V6, REG_SP, REG_R9, INS_OPTS_UXTW);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V7, REG_R7, REG_R9, INS_OPTS_UXTW, 1);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V8, REG_R7, REG_R9, INS_OPTS_SXTX);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V9, REG_R7, REG_R9, INS_OPTS_SXTX, 1);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V10, REG_R7, REG_R9, INS_OPTS_UXTX);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_2BYTE, REG_V11, REG_SP, REG_R9, INS_OPTS_UXTX, 1);
+
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V1, REG_R7, REG_R9);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V2, REG_SP, REG_R9, INS_OPTS_SXTW);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V3, REG_R7, REG_R9, INS_OPTS_UXTW);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V4, REG_SP, REG_R9, INS_OPTS_SXTX);
+    theEmitter->emitIns_R_R_R_Ext(INS_ldr, EA_1BYTE, REG_V5, REG_R7, REG_R9, INS_OPTS_UXTX);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    //
+    // R_R   mov and aliases for mov
+    //
+
+    // mov vector to vector
+    theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_V0, REG_V1);
+    theEmitter->emitIns_R_R(INS_mov, EA_16BYTE, REG_V2, REG_V3);
+
+    theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_V12, REG_V13);
+    theEmitter->emitIns_R_R(INS_mov, EA_2BYTE, REG_V14, REG_V15);
+    theEmitter->emitIns_R_R(INS_mov, EA_1BYTE, REG_V16, REG_V17);
+
+    // mov vector to general
+    theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_R0, REG_V4);
+    theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_R1, REG_V5);
+    theEmitter->emitIns_R_R(INS_mov, EA_2BYTE, REG_R2, REG_V6);
+    theEmitter->emitIns_R_R(INS_mov, EA_1BYTE, REG_R3, REG_V7);
+
+    // mov general to vector
+    theEmitter->emitIns_R_R(INS_mov, EA_8BYTE, REG_V8, REG_R4);
+    theEmitter->emitIns_R_R(INS_mov, EA_4BYTE, REG_V9, REG_R5);
+    theEmitter->emitIns_R_R(INS_mov, EA_2BYTE, REG_V10, REG_R6);
+    theEmitter->emitIns_R_R(INS_mov, EA_1BYTE, REG_V11, REG_R7);
+
+    // mov vector[index] to vector
+    theEmitter->emitIns_R_R_I(INS_mov, EA_8BYTE, REG_V0, REG_V1, 1);
+    theEmitter->emitIns_R_R_I(INS_mov, EA_4BYTE, REG_V2, REG_V3, 3);
+    theEmitter->emitIns_R_R_I(INS_mov, EA_2BYTE, REG_V4, REG_V5, 7);
+    theEmitter->emitIns_R_R_I(INS_mov, EA_1BYTE, REG_V6, REG_V7, 15);
+
+    // mov to general from vector[index]
+    theEmitter->emitIns_R_R_I(INS_mov, EA_8BYTE, REG_R8, REG_V16, 1);
+    theEmitter->emitIns_R_R_I(INS_mov, EA_4BYTE, REG_R9, REG_V17, 2);
+    theEmitter->emitIns_R_R_I(INS_mov, EA_2BYTE, REG_R10, REG_V18, 3);
+    theEmitter->emitIns_R_R_I(INS_mov, EA_1BYTE, REG_R11, REG_V19, 4);
+
+    // mov to vector[index] from general
+    theEmitter->emitIns_R_R_I(INS_mov, EA_8BYTE, REG_V20, REG_R12, 1);
+    theEmitter->emitIns_R_R_I(INS_mov, EA_4BYTE, REG_V21, REG_R13, 2);
+    theEmitter->emitIns_R_R_I(INS_mov, EA_2BYTE, REG_V22, REG_R14, 6);
+    theEmitter->emitIns_R_R_I(INS_mov, EA_1BYTE, REG_V23, REG_R15, 8);
+
+    // mov vector[index] to vector[index2]
+    theEmitter->emitIns_R_R_I_I(INS_mov, EA_8BYTE, REG_V8, REG_V9, 1, 0);
+    theEmitter->emitIns_R_R_I_I(INS_mov, EA_4BYTE, REG_V10, REG_V11, 2, 1);
+    theEmitter->emitIns_R_R_I_I(INS_mov, EA_2BYTE, REG_V12, REG_V13, 5, 2);
+    theEmitter->emitIns_R_R_I_I(INS_mov, EA_1BYTE, REG_V14, REG_V15, 12, 3);
+
+    //////////////////////////////////////////////////////////////////////////////////
+
+    // mov/dup scalar
+    theEmitter->emitIns_R_R_I(INS_dup, EA_8BYTE, REG_V24, REG_V25, 1);
+    theEmitter->emitIns_R_R_I(INS_dup, EA_4BYTE, REG_V26, REG_V27, 3);
+    theEmitter->emitIns_R_R_I(INS_dup, EA_2BYTE, REG_V28, REG_V29, 7);
+    theEmitter->emitIns_R_R_I(INS_dup, EA_1BYTE, REG_V30, REG_V31, 15);
+
+    // mov/ins vector element
+    theEmitter->emitIns_R_R_I_I(INS_ins, EA_8BYTE, REG_V0, REG_V1, 0, 1);
+    theEmitter->emitIns_R_R_I_I(INS_ins, EA_4BYTE, REG_V2, REG_V3, 2, 2);
+    theEmitter->emitIns_R_R_I_I(INS_ins, EA_2BYTE, REG_V4, REG_V5, 4, 3);
+    theEmitter->emitIns_R_R_I_I(INS_ins, EA_1BYTE, REG_V6, REG_V7, 8, 4);
+
+    // umov to general from vector element
+    theEmitter->emitIns_R_R_I(INS_umov, EA_8BYTE, REG_R0, REG_V8, 1);
+    theEmitter->emitIns_R_R_I(INS_umov, EA_4BYTE, REG_R1, REG_V9, 2);
+    theEmitter->emitIns_R_R_I(INS_umov, EA_2BYTE, REG_R2, REG_V10, 4);
+    theEmitter->emitIns_R_R_I(INS_umov, EA_1BYTE, REG_R3, REG_V11, 8);
+
+    // ins to vector element from general
+    theEmitter->emitIns_R_R_I(INS_ins, EA_8BYTE, REG_V12, REG_R4, 1);
+    theEmitter->emitIns_R_R_I(INS_ins, EA_4BYTE, REG_V13, REG_R5, 3);
+    theEmitter->emitIns_R_R_I(INS_ins, EA_2BYTE, REG_V14, REG_R6, 7);
+    theEmitter->emitIns_R_R_I(INS_ins, EA_1BYTE, REG_V15, REG_R7, 15);
+
+    // smov to general from vector element
+    theEmitter->emitIns_R_R_I(INS_smov, EA_4BYTE, REG_R5, REG_V17, 2);
+    theEmitter->emitIns_R_R_I(INS_smov, EA_2BYTE, REG_R6, REG_V18, 4);
+    theEmitter->emitIns_R_R_I(INS_smov, EA_1BYTE, REG_R7, REG_V19, 8);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    //
+    // R_I   movi and mvni
+    //
+
+    // movi  imm8  (vector)
+    theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V0, 0x00, INS_OPTS_8B);
+    theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V1, 0xFF, INS_OPTS_8B);
+    theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V2, 0x00, INS_OPTS_16B);
+    theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V3, 0xFF, INS_OPTS_16B);
+
+    theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V4, 0x007F, INS_OPTS_4H);
+    theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V5, 0x7F00, INS_OPTS_4H); // LSL  8
+    theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V6, 0x003F, INS_OPTS_8H);
+    theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V7, 0x3F00, INS_OPTS_8H); // LSL  8
+
+    theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V8, 0x1F, INS_OPTS_2S);
+    theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V9, 0x1F00, INS_OPTS_2S);      // LSL  8
+    theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V10, 0x1F0000, INS_OPTS_2S);   // LSL 16
+    theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V11, 0x1F000000, INS_OPTS_2S); // LSL 24
+
+    theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V12, 0x1FFF, INS_OPTS_2S);   // MSL  8
+    theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V13, 0x1FFFFF, INS_OPTS_2S); // MSL 16
+
+    theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V14, 0x37, INS_OPTS_4S);
+    theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V15, 0x3700, INS_OPTS_4S);     // LSL  8
+    theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V16, 0x370000, INS_OPTS_4S);   // LSL 16
+    theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V17, 0x37000000, INS_OPTS_4S); // LSL 24
+
+    theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V18, 0x37FF, INS_OPTS_4S);   // MSL  8
+    theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V19, 0x37FFFF, INS_OPTS_4S); // MSL 16
+
+    theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V20, 0xFF80, INS_OPTS_4H);  // mvni
+    theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V21, 0xFFC0, INS_OPTS_8H); // mvni
+
+    theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V22, 0xFFFFFFE0, INS_OPTS_2S);  // mvni
+    theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V23, 0xFFFFF0FF, INS_OPTS_4S); // mvni LSL  8
+    theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V24, 0xFFF8FFFF, INS_OPTS_2S);  // mvni LSL 16
+    theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V25, 0xFCFFFFFF, INS_OPTS_4S); // mvni LSL 24
+
+    theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V26, 0xFFFFFE00, INS_OPTS_2S);  // mvni MSL  8
+    theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V27, 0xFFFC0000, INS_OPTS_4S); // mvni MSL 16
+
+    theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V28, 0x00FF00FF00FF00FF, INS_OPTS_1D);
+    theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V29, 0x00FFFF0000FFFF00, INS_OPTS_2D);
+    theEmitter->emitIns_R_I(INS_movi, EA_8BYTE, REG_V30, 0xFF000000FF000000);
+    theEmitter->emitIns_R_I(INS_movi, EA_16BYTE, REG_V31, 0x0, INS_OPTS_2D);
+
+    theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V0, 0x0022, INS_OPTS_4H);
+    theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V1, 0x2200, INS_OPTS_4H); // LSL  8
+    theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V2, 0x0033, INS_OPTS_8H);
+    theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V3, 0x3300, INS_OPTS_8H); // LSL  8
+
+    theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V4, 0x42, INS_OPTS_2S);
+    theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V5, 0x4200, INS_OPTS_2S);     // LSL  8
+    theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V6, 0x420000, INS_OPTS_2S);   // LSL 16
+    theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V7, 0x42000000, INS_OPTS_2S); // LSL 24
+
+    theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V8, 0x42FF, INS_OPTS_2S);   // MSL  8
+    theEmitter->emitIns_R_I(INS_mvni, EA_8BYTE, REG_V9, 0x42FFFF, INS_OPTS_2S); // MSL 16
+
+    theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V10, 0x5D, INS_OPTS_4S);
+    theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V11, 0x5D00, INS_OPTS_4S);     // LSL  8
+    theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V12, 0x5D0000, INS_OPTS_4S);   // LSL 16
+    theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V13, 0x5D000000, INS_OPTS_4S); // LSL 24
+
+    theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V14, 0x5DFF, INS_OPTS_4S);   // MSL  8
+    theEmitter->emitIns_R_I(INS_mvni, EA_16BYTE, REG_V15, 0x5DFFFF, INS_OPTS_4S); // MSL 16
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    //
+    // R_I   orr/bic vector immediate
+    //
+
+    theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V0, 0x0022, INS_OPTS_4H);
+    theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V1, 0x2200, INS_OPTS_4H); // LSL  8
+    theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V2, 0x0033, INS_OPTS_8H);
+    theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V3, 0x3300, INS_OPTS_8H); // LSL  8
+
+    theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V4, 0x42, INS_OPTS_2S);
+    theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V5, 0x4200, INS_OPTS_2S);     // LSL  8
+    theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V6, 0x420000, INS_OPTS_2S);   // LSL 16
+    theEmitter->emitIns_R_I(INS_orr, EA_8BYTE, REG_V7, 0x42000000, INS_OPTS_2S); // LSL 24
+
+    theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V10, 0x5D, INS_OPTS_4S);
+    theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V11, 0x5D00, INS_OPTS_4S);     // LSL  8
+    theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V12, 0x5D0000, INS_OPTS_4S);   // LSL 16
+    theEmitter->emitIns_R_I(INS_orr, EA_16BYTE, REG_V13, 0x5D000000, INS_OPTS_4S); // LSL 24
+
+    theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V0, 0x0022, INS_OPTS_4H);
+    theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V1, 0x2200, INS_OPTS_4H); // LSL  8
+    theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V2, 0x0033, INS_OPTS_8H);
+    theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V3, 0x3300, INS_OPTS_8H); // LSL  8
+
+    theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V4, 0x42, INS_OPTS_2S);
+    theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V5, 0x4200, INS_OPTS_2S);     // LSL  8
+    theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V6, 0x420000, INS_OPTS_2S);   // LSL 16
+    theEmitter->emitIns_R_I(INS_bic, EA_8BYTE, REG_V7, 0x42000000, INS_OPTS_2S); // LSL 24
+
+    theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V10, 0x5D, INS_OPTS_4S);
+    theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V11, 0x5D00, INS_OPTS_4S);     // LSL  8
+    theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V12, 0x5D0000, INS_OPTS_4S);   // LSL 16
+    theEmitter->emitIns_R_I(INS_bic, EA_16BYTE, REG_V13, 0x5D000000, INS_OPTS_4S); // LSL 24
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    //
+    // R_F   cmp/fmov immediate
+    //
+
+    // fmov  imm8  (scalar)
+    theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V14, 1.0);
+    theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V15, -1.0);
+    theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V0, 2.0); // encodes imm8 == 0
+    theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V16, 10.0);
+    theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V17, -10.0);
+    theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V18, 31); // Largest encodable value
+    theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V19, -31);
+    theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V20, 1.25);
+    theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V21, -1.25);
+    theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V22, 0.125); // Smallest encodable value
+    theEmitter->emitIns_R_F(INS_fmov, EA_4BYTE, REG_V23, -0.125);
+
+    // fmov  imm8  (vector)
+    theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V0, 2.0, INS_OPTS_2S);
+    theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V24, 1.0, INS_OPTS_2S);
+    theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V25, 1.0, INS_OPTS_4S);
+    theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V26, 1.0, INS_OPTS_2D);
+    theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V27, -10.0, INS_OPTS_2S);
+    theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V28, -10.0, INS_OPTS_4S);
+    theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V29, -10.0, INS_OPTS_2D);
+    theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V30, 31.0, INS_OPTS_2S);
+    theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V31, 31.0, INS_OPTS_4S);
+    theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V0, 31.0, INS_OPTS_2D);
+    theEmitter->emitIns_R_F(INS_fmov, EA_8BYTE, REG_V1, -0.125, INS_OPTS_2S);
+    theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V2, -0.125, INS_OPTS_4S);
+    theEmitter->emitIns_R_F(INS_fmov, EA_16BYTE, REG_V3, -0.125, INS_OPTS_2D);
+
+    // fcmp with 0.0
+    theEmitter->emitIns_R_F(INS_fcmp, EA_8BYTE, REG_V12, 0.0);
+    theEmitter->emitIns_R_F(INS_fcmp, EA_4BYTE, REG_V13, 0.0);
+    theEmitter->emitIns_R_F(INS_fcmpe, EA_8BYTE, REG_V14, 0.0);
+    theEmitter->emitIns_R_F(INS_fcmpe, EA_4BYTE, REG_V15, 0.0);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    //
+    // R_R   fmov/fcmp/fcvt
+    //
+
+    // fmov to vector to vector
+    theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_V0, REG_V2);
+    theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_V1, REG_V3);
+
+    // fmov to vector to general
+    theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_R0, REG_V4);
+    theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_R1, REG_V5);
+    //    using the optional conversion specifier
+    theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_D_TO_8BYTE);
+    theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_R3, REG_V7, INS_OPTS_S_TO_4BYTE);
+
+    // fmov to general to vector
+    theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_V8, REG_R4);
+    theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_V9, REG_R5);
+    //   using the optional conversion specifier
+    theEmitter->emitIns_R_R(INS_fmov, EA_8BYTE, REG_V10, REG_R6, INS_OPTS_8BYTE_TO_D);
+    theEmitter->emitIns_R_R(INS_fmov, EA_4BYTE, REG_V11, REG_R7, INS_OPTS_4BYTE_TO_S);
+
+    // fcmp/fcmpe
+    theEmitter->emitIns_R_R(INS_fcmp, EA_8BYTE, REG_V8, REG_V16);
+    theEmitter->emitIns_R_R(INS_fcmp, EA_4BYTE, REG_V9, REG_V17);
+    theEmitter->emitIns_R_R(INS_fcmpe, EA_8BYTE, REG_V10, REG_V18);
+    theEmitter->emitIns_R_R(INS_fcmpe, EA_4BYTE, REG_V11, REG_V19);
+
+    // fcvt
+    theEmitter->emitIns_R_R(INS_fcvt, EA_8BYTE, REG_V24, REG_V25, INS_OPTS_S_TO_D); // Single to Double
+    theEmitter->emitIns_R_R(INS_fcvt, EA_4BYTE, REG_V26, REG_V27, INS_OPTS_D_TO_S); // Double to Single
+
+    theEmitter->emitIns_R_R(INS_fcvt, EA_4BYTE, REG_V1, REG_V2, INS_OPTS_H_TO_S);
+    theEmitter->emitIns_R_R(INS_fcvt, EA_8BYTE, REG_V3, REG_V4, INS_OPTS_H_TO_D);
+
+    theEmitter->emitIns_R_R(INS_fcvt, EA_2BYTE, REG_V5, REG_V6, INS_OPTS_S_TO_H);
+    theEmitter->emitIns_R_R(INS_fcvt, EA_2BYTE, REG_V7, REG_V8, INS_OPTS_D_TO_H);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    //
+    // R_R   floating point conversions
+    //
+
+    // fcvtas scalar
+    theEmitter->emitIns_R_R(INS_fcvtas, EA_4BYTE, REG_V0, REG_V1);
+    theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_V2, REG_V3);
+
+    // fcvtas scalar to general
+    theEmitter->emitIns_R_R(INS_fcvtas, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
+    theEmitter->emitIns_R_R(INS_fcvtas, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
+    theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
+    theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
+
+    // fcvtas vector
+    theEmitter->emitIns_R_R(INS_fcvtas, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+    theEmitter->emitIns_R_R(INS_fcvtas, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+    theEmitter->emitIns_R_R(INS_fcvtas, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+    // fcvtau scalar
+    theEmitter->emitIns_R_R(INS_fcvtau, EA_4BYTE, REG_V0, REG_V1);
+    theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_V2, REG_V3);
+
+    // fcvtau scalar to general
+    theEmitter->emitIns_R_R(INS_fcvtau, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
+    theEmitter->emitIns_R_R(INS_fcvtau, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
+    theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
+    theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
+
+    // fcvtau vector
+    theEmitter->emitIns_R_R(INS_fcvtau, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+    theEmitter->emitIns_R_R(INS_fcvtau, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+    theEmitter->emitIns_R_R(INS_fcvtau, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+    ////////////////////////////////////////////////////////////////////////////////
+
+    // fcvtms scalar
+    theEmitter->emitIns_R_R(INS_fcvtms, EA_4BYTE, REG_V0, REG_V1);
+    theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_V2, REG_V3);
+
+    // fcvtms scalar to general
+    theEmitter->emitIns_R_R(INS_fcvtms, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
+    theEmitter->emitIns_R_R(INS_fcvtms, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
+    theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
+    theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
+
+    // fcvtms vector
+    theEmitter->emitIns_R_R(INS_fcvtms, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+    theEmitter->emitIns_R_R(INS_fcvtms, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+    theEmitter->emitIns_R_R(INS_fcvtms, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+    // fcvtmu scalar
+    theEmitter->emitIns_R_R(INS_fcvtmu, EA_4BYTE, REG_V0, REG_V1);
+    theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_V2, REG_V3);
+
+    // fcvtmu scalar to general
+    theEmitter->emitIns_R_R(INS_fcvtmu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
+    theEmitter->emitIns_R_R(INS_fcvtmu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
+    theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
+    theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
+
+    // fcvtmu vector
+    theEmitter->emitIns_R_R(INS_fcvtmu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+    theEmitter->emitIns_R_R(INS_fcvtmu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+    theEmitter->emitIns_R_R(INS_fcvtmu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+    ////////////////////////////////////////////////////////////////////////////////
+
+    // fcvtns scalar
+    theEmitter->emitIns_R_R(INS_fcvtns, EA_4BYTE, REG_V0, REG_V1);
+    theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_V2, REG_V3);
+
+    // fcvtns scalar to general
+    theEmitter->emitIns_R_R(INS_fcvtns, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
+    theEmitter->emitIns_R_R(INS_fcvtns, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
+    theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
+    theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
+
+    // fcvtns vector
+    theEmitter->emitIns_R_R(INS_fcvtns, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+    theEmitter->emitIns_R_R(INS_fcvtns, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+    theEmitter->emitIns_R_R(INS_fcvtns, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+    // fcvtnu scalar
+    theEmitter->emitIns_R_R(INS_fcvtnu, EA_4BYTE, REG_V0, REG_V1);
+    theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_V2, REG_V3);
+
+    // fcvtnu scalar to general
+    theEmitter->emitIns_R_R(INS_fcvtnu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
+    theEmitter->emitIns_R_R(INS_fcvtnu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
+    theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
+    theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
+
+    // fcvtnu vector
+    theEmitter->emitIns_R_R(INS_fcvtnu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+    theEmitter->emitIns_R_R(INS_fcvtnu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+    theEmitter->emitIns_R_R(INS_fcvtnu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+    ////////////////////////////////////////////////////////////////////////////////
+
+    // fcvtps scalar
+    theEmitter->emitIns_R_R(INS_fcvtps, EA_4BYTE, REG_V0, REG_V1);
+    theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_V2, REG_V3);
+
+    // fcvtps scalar to general
+    theEmitter->emitIns_R_R(INS_fcvtps, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
+    theEmitter->emitIns_R_R(INS_fcvtps, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
+    theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
+    theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
+
+    // fcvtps vector
+    theEmitter->emitIns_R_R(INS_fcvtps, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+    theEmitter->emitIns_R_R(INS_fcvtps, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+    theEmitter->emitIns_R_R(INS_fcvtps, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+    // fcvtpu scalar
+    theEmitter->emitIns_R_R(INS_fcvtpu, EA_4BYTE, REG_V0, REG_V1);
+    theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_V2, REG_V3);
+
+    // fcvtpu scalar to general
+    theEmitter->emitIns_R_R(INS_fcvtpu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
+    theEmitter->emitIns_R_R(INS_fcvtpu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
+    theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
+    theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
+
+    // fcvtpu vector
+    theEmitter->emitIns_R_R(INS_fcvtpu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+    theEmitter->emitIns_R_R(INS_fcvtpu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+    theEmitter->emitIns_R_R(INS_fcvtpu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+    ////////////////////////////////////////////////////////////////////////////////
+
+    // fcvtzs scalar
+    theEmitter->emitIns_R_R(INS_fcvtzs, EA_4BYTE, REG_V0, REG_V1);
+    theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_V2, REG_V3);
+
+    // fcvtzs scalar to general
+    theEmitter->emitIns_R_R(INS_fcvtzs, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
+    theEmitter->emitIns_R_R(INS_fcvtzs, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
+    theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
+    theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
+
+    // fcvtzs vector
+    theEmitter->emitIns_R_R(INS_fcvtzs, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+    theEmitter->emitIns_R_R(INS_fcvtzs, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+    theEmitter->emitIns_R_R(INS_fcvtzs, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+    // fcvtzu scalar
+    theEmitter->emitIns_R_R(INS_fcvtzu, EA_4BYTE, REG_V0, REG_V1);
+    theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_V2, REG_V3);
+
+    // fcvtzu scalar to general
+    theEmitter->emitIns_R_R(INS_fcvtzu, EA_4BYTE, REG_R0, REG_V4, INS_OPTS_S_TO_4BYTE);
+    theEmitter->emitIns_R_R(INS_fcvtzu, EA_4BYTE, REG_R1, REG_V5, INS_OPTS_D_TO_4BYTE);
+    theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_R2, REG_V6, INS_OPTS_S_TO_8BYTE);
+    theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_R3, REG_V7, INS_OPTS_D_TO_8BYTE);
+
+    // fcvtzu vector
+    theEmitter->emitIns_R_R(INS_fcvtzu, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+    theEmitter->emitIns_R_R(INS_fcvtzu, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+    theEmitter->emitIns_R_R(INS_fcvtzu, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+    ////////////////////////////////////////////////////////////////////////////////
+
+    // scvtf scalar
+    theEmitter->emitIns_R_R(INS_scvtf, EA_4BYTE, REG_V0, REG_V1);
+    theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V2, REG_V3);
+
+    // scvtf scalar from general
+    theEmitter->emitIns_R_R(INS_scvtf, EA_4BYTE, REG_V4, REG_R0, INS_OPTS_4BYTE_TO_S);
+    theEmitter->emitIns_R_R(INS_scvtf, EA_4BYTE, REG_V5, REG_R1, INS_OPTS_8BYTE_TO_S);
+    theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V6, REG_R2, INS_OPTS_4BYTE_TO_D);
+    theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V7, REG_R3, INS_OPTS_8BYTE_TO_D);
+
+    // scvtf vector
+    theEmitter->emitIns_R_R(INS_scvtf, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+    theEmitter->emitIns_R_R(INS_scvtf, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+    theEmitter->emitIns_R_R(INS_scvtf, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+    // ucvtf scalar
+    theEmitter->emitIns_R_R(INS_ucvtf, EA_4BYTE, REG_V0, REG_V1);
+    theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V2, REG_V3);
+
+    // ucvtf scalar from general
+    theEmitter->emitIns_R_R(INS_ucvtf, EA_4BYTE, REG_V4, REG_R0, INS_OPTS_4BYTE_TO_S);
+    theEmitter->emitIns_R_R(INS_ucvtf, EA_4BYTE, REG_V5, REG_R1, INS_OPTS_8BYTE_TO_S);
+    theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V6, REG_R2, INS_OPTS_4BYTE_TO_D);
+    theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V7, REG_R3, INS_OPTS_8BYTE_TO_D);
+
+    // ucvtf vector
+    theEmitter->emitIns_R_R(INS_ucvtf, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+    theEmitter->emitIns_R_R(INS_ucvtf, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+    theEmitter->emitIns_R_R(INS_ucvtf, EA_16BYTE, REG_V12, REG_V13, INS_OPTS_2D);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    //
+    // R_R   floating point operations, one dest, one source
+    //
+
+    // fabs scalar
+    theEmitter->emitIns_R_R(INS_fabs, EA_4BYTE, REG_V0, REG_V1);
+    theEmitter->emitIns_R_R(INS_fabs, EA_8BYTE, REG_V2, REG_V3);
+
+    // fabs vector
+    theEmitter->emitIns_R_R(INS_fabs, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+    theEmitter->emitIns_R_R(INS_fabs, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
+    theEmitter->emitIns_R_R(INS_fabs, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
+
+    // fneg scalar
+    theEmitter->emitIns_R_R(INS_fneg, EA_4BYTE, REG_V0, REG_V1);
+    theEmitter->emitIns_R_R(INS_fneg, EA_8BYTE, REG_V2, REG_V3);
+
+    // fneg vector
+    theEmitter->emitIns_R_R(INS_fneg, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+    theEmitter->emitIns_R_R(INS_fneg, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
+    theEmitter->emitIns_R_R(INS_fneg, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
+
+    // fsqrt scalar
+    theEmitter->emitIns_R_R(INS_fsqrt, EA_4BYTE, REG_V0, REG_V1);
+    theEmitter->emitIns_R_R(INS_fsqrt, EA_8BYTE, REG_V2, REG_V3);
+
+    // fsqrt vector
+    theEmitter->emitIns_R_R(INS_fsqrt, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+    theEmitter->emitIns_R_R(INS_fsqrt, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
+    theEmitter->emitIns_R_R(INS_fsqrt, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
+
+    genDefineTempLabel(genCreateTempLabel());
+
+    // abs scalar
+    theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V2, REG_V3);
+
+    // abs vector
+    theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
+    theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
+    theEmitter->emitIns_R_R(INS_abs, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
+    theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
+    theEmitter->emitIns_R_R(INS_abs, EA_16BYTE, REG_V16, REG_V17, INS_OPTS_2D);
+
+    // neg scalar
+    theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V2, REG_V3);
+
+    // neg vector
+    theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
+    theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
+    theEmitter->emitIns_R_R(INS_neg, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
+    theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
+    theEmitter->emitIns_R_R(INS_neg, EA_16BYTE, REG_V16, REG_V17, INS_OPTS_2D);
+
+    // mvn vector
+    theEmitter->emitIns_R_R(INS_mvn, EA_8BYTE, REG_V4, REG_V5);
+    theEmitter->emitIns_R_R(INS_mvn, EA_8BYTE, REG_V6, REG_V7, INS_OPTS_8B);
+    theEmitter->emitIns_R_R(INS_mvn, EA_16BYTE, REG_V8, REG_V9);
+    theEmitter->emitIns_R_R(INS_mvn, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_16B);
+
+    // cnt vector
+    theEmitter->emitIns_R_R(INS_cnt, EA_8BYTE, REG_V22, REG_V23, INS_OPTS_8B);
+    theEmitter->emitIns_R_R(INS_cnt, EA_16BYTE, REG_V24, REG_V25, INS_OPTS_16B);
+
+    // not vector (the same encoding as mvn)
+    theEmitter->emitIns_R_R(INS_not, EA_8BYTE, REG_V12, REG_V13);
+    theEmitter->emitIns_R_R(INS_not, EA_8BYTE, REG_V14, REG_V15, INS_OPTS_8B);
+    theEmitter->emitIns_R_R(INS_not, EA_16BYTE, REG_V16, REG_V17);
+    theEmitter->emitIns_R_R(INS_not, EA_16BYTE, REG_V18, REG_V19, INS_OPTS_16B);
+
+    // cls vector
+    theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
+    theEmitter->emitIns_R_R(INS_cls, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R(INS_cls, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
+    theEmitter->emitIns_R_R(INS_cls, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
+    theEmitter->emitIns_R_R(INS_cls, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
+
+    // clz vector
+    theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
+    theEmitter->emitIns_R_R(INS_clz, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R(INS_clz, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
+    theEmitter->emitIns_R_R(INS_clz, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
+    theEmitter->emitIns_R_R(INS_clz, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
+
+    // rbit vector
+    theEmitter->emitIns_R_R(INS_rbit, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B);
+    theEmitter->emitIns_R_R(INS_rbit, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B);
+
+    // rev16 vector
+    theEmitter->emitIns_R_R(INS_rev16, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B);
+    theEmitter->emitIns_R_R(INS_rev16, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B);
+
+    // rev32 vector
+    theEmitter->emitIns_R_R(INS_rev32, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
+    theEmitter->emitIns_R_R(INS_rev32, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R(INS_rev32, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R(INS_rev32, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
+
+    // rev64 vector
+    theEmitter->emitIns_R_R(INS_rev64, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_8B);
+    theEmitter->emitIns_R_R(INS_rev64, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R(INS_rev64, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R(INS_rev64, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_8H);
+    theEmitter->emitIns_R_R(INS_rev64, EA_8BYTE, REG_V12, REG_V13, INS_OPTS_2S);
+    theEmitter->emitIns_R_R(INS_rev64, EA_16BYTE, REG_V14, REG_V15, INS_OPTS_4S);
+
+#endif
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    //
+    // R_R   floating point round to int, one dest, one source
+    //
+
+    // frinta scalar
+    theEmitter->emitIns_R_R(INS_frinta, EA_4BYTE, REG_V0, REG_V1);
+    theEmitter->emitIns_R_R(INS_frinta, EA_8BYTE, REG_V2, REG_V3);
+
+    // frinta vector
+    theEmitter->emitIns_R_R(INS_frinta, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+    theEmitter->emitIns_R_R(INS_frinta, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
+    theEmitter->emitIns_R_R(INS_frinta, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
+
+    // frinti scalar
+    theEmitter->emitIns_R_R(INS_frinti, EA_4BYTE, REG_V0, REG_V1);
+    theEmitter->emitIns_R_R(INS_frinti, EA_8BYTE, REG_V2, REG_V3);
+
+    // frinti vector
+    theEmitter->emitIns_R_R(INS_frinti, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+    theEmitter->emitIns_R_R(INS_frinti, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
+    theEmitter->emitIns_R_R(INS_frinti, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
+
+    // frintm scalar
+    theEmitter->emitIns_R_R(INS_frintm, EA_4BYTE, REG_V0, REG_V1);
+    theEmitter->emitIns_R_R(INS_frintm, EA_8BYTE, REG_V2, REG_V3);
+
+    // frintm vector
+    theEmitter->emitIns_R_R(INS_frintm, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+    theEmitter->emitIns_R_R(INS_frintm, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
+    theEmitter->emitIns_R_R(INS_frintm, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
+
+    // frintn scalar
+    theEmitter->emitIns_R_R(INS_frintn, EA_4BYTE, REG_V0, REG_V1);
+    theEmitter->emitIns_R_R(INS_frintn, EA_8BYTE, REG_V2, REG_V3);
+
+    // frintn vector
+    theEmitter->emitIns_R_R(INS_frintn, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+    theEmitter->emitIns_R_R(INS_frintn, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
+    theEmitter->emitIns_R_R(INS_frintn, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
+
+    // frintp scalar
+    theEmitter->emitIns_R_R(INS_frintp, EA_4BYTE, REG_V0, REG_V1);
+    theEmitter->emitIns_R_R(INS_frintp, EA_8BYTE, REG_V2, REG_V3);
+
+    // frintp vector
+    theEmitter->emitIns_R_R(INS_frintp, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+    theEmitter->emitIns_R_R(INS_frintp, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
+    theEmitter->emitIns_R_R(INS_frintp, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
+
+    // frintx scalar
+    theEmitter->emitIns_R_R(INS_frintx, EA_4BYTE, REG_V0, REG_V1);
+    theEmitter->emitIns_R_R(INS_frintx, EA_8BYTE, REG_V2, REG_V3);
+
+    // frintx vector
+    theEmitter->emitIns_R_R(INS_frintx, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+    theEmitter->emitIns_R_R(INS_frintx, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
+    theEmitter->emitIns_R_R(INS_frintx, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
+
+    // frintz scalar
+    theEmitter->emitIns_R_R(INS_frintz, EA_4BYTE, REG_V0, REG_V1);
+    theEmitter->emitIns_R_R(INS_frintz, EA_8BYTE, REG_V2, REG_V3);
+
+    // frintz vector
+    theEmitter->emitIns_R_R(INS_frintz, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_2S);
+    theEmitter->emitIns_R_R(INS_frintz, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_4S);
+    theEmitter->emitIns_R_R(INS_frintz, EA_16BYTE, REG_V8, REG_V9, INS_OPTS_2D);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    //
+    // R_R_R   floating point operations, one dest, two source
+    //
+
+    genDefineTempLabel(genCreateTempLabel());
+
+    theEmitter->emitIns_R_R_R(INS_fadd, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
+    theEmitter->emitIns_R_R_R(INS_fadd, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
+    theEmitter->emitIns_R_R_R(INS_fadd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R(INS_fadd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R(INS_fadd, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
+
+    theEmitter->emitIns_R_R_R(INS_fsub, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
+    theEmitter->emitIns_R_R_R(INS_fsub, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
+    theEmitter->emitIns_R_R_R(INS_fsub, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R(INS_fsub, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R(INS_fsub, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
+
+    theEmitter->emitIns_R_R_R(INS_fdiv, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
+    theEmitter->emitIns_R_R_R(INS_fdiv, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
+    theEmitter->emitIns_R_R_R(INS_fdiv, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R(INS_fdiv, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R(INS_fdiv, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
+
+    theEmitter->emitIns_R_R_R(INS_fmax, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
+    theEmitter->emitIns_R_R_R(INS_fmax, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
+    theEmitter->emitIns_R_R_R(INS_fmax, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R(INS_fmax, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R(INS_fmax, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
+
+    theEmitter->emitIns_R_R_R(INS_fmin, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
+    theEmitter->emitIns_R_R_R(INS_fmin, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
+    theEmitter->emitIns_R_R_R(INS_fmin, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R(INS_fmin, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R(INS_fmin, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
+
+    // fabd
+    theEmitter->emitIns_R_R_R(INS_fabd, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
+    theEmitter->emitIns_R_R_R(INS_fabd, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
+    theEmitter->emitIns_R_R_R(INS_fabd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R(INS_fabd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R(INS_fabd, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
+
+    genDefineTempLabel(genCreateTempLabel());
+
+    theEmitter->emitIns_R_R_R(INS_fmul, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
+    theEmitter->emitIns_R_R_R(INS_fmul, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
+    theEmitter->emitIns_R_R_R(INS_fmul, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R(INS_fmul, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R(INS_fmul, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
+
+    theEmitter->emitIns_R_R_R_I(INS_fmul, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE
+    theEmitter->emitIns_R_R_R_I(INS_fmul, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE
+    theEmitter->emitIns_R_R_R_I(INS_fmul, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R_I(INS_fmul, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R_I(INS_fmul, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D);
+
+    theEmitter->emitIns_R_R_R(INS_fmulx, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
+    theEmitter->emitIns_R_R_R(INS_fmulx, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
+    theEmitter->emitIns_R_R_R(INS_fmulx, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R(INS_fmulx, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R(INS_fmulx, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
+
+    theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE
+    theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE
+    theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R_I(INS_fmulx, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D);
+
+    theEmitter->emitIns_R_R_R(INS_fnmul, EA_4BYTE, REG_V0, REG_V1, REG_V2); // scalar 4BYTE
+    theEmitter->emitIns_R_R_R(INS_fnmul, EA_8BYTE, REG_V3, REG_V4, REG_V5); // scalar 8BYTE
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    //
+    // R_R_I  vector operations, one dest, one source reg, one immed
+    //
+
+    genDefineTempLabel(genCreateTempLabel());
+
+    // 'sshr' scalar
+    theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V0, REG_V1, 1);
+    theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V2, REG_V3, 14);
+    theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V4, REG_V5, 27);
+    theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V6, REG_V7, 40);
+    theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V8, REG_V9, 63);
+
+    // 'sshr' vector
+    theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_sshr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+    theEmitter->emitIns_R_R_I(INS_sshr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+    // 'ssra' scalar
+    theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V0, REG_V1, 1);
+    theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V2, REG_V3, 14);
+    theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V4, REG_V5, 27);
+    theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V6, REG_V7, 40);
+    theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V8, REG_V9, 63);
+
+    // 'ssra' vector
+    theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_ssra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+    theEmitter->emitIns_R_R_I(INS_ssra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+    // 'srshr' scalar
+    theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V0, REG_V1, 1);
+    theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V2, REG_V3, 14);
+    theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V4, REG_V5, 27);
+    theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V6, REG_V7, 40);
+    theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V8, REG_V9, 63);
+
+    // 'srshr' vector
+    theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_srshr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+    theEmitter->emitIns_R_R_I(INS_srshr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+    // 'srsra' scalar
+    theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V0, REG_V1, 1);
+    theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V2, REG_V3, 14);
+    theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V4, REG_V5, 27);
+    theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V6, REG_V7, 40);
+    theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V8, REG_V9, 63);
+
+    // 'srsra' vector
+    theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_srsra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+    theEmitter->emitIns_R_R_I(INS_srsra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+    // 'shl' scalar
+    theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V0, REG_V1, 1);
+    theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V2, REG_V3, 14);
+    theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V4, REG_V5, 27);
+    theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V6, REG_V7, 40);
+    theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V8, REG_V9, 63);
+
+    // 'shl' vector
+    theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_shl, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+    theEmitter->emitIns_R_R_I(INS_shl, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+    // 'ushr' scalar
+    theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V0, REG_V1, 1);
+    theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V2, REG_V3, 14);
+    theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V4, REG_V5, 27);
+    theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V6, REG_V7, 40);
+    theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V8, REG_V9, 63);
+
+    // 'ushr' vector
+    theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_ushr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+    theEmitter->emitIns_R_R_I(INS_ushr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+    // 'usra' scalar
+    theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V0, REG_V1, 1);
+    theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V2, REG_V3, 14);
+    theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V4, REG_V5, 27);
+    theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V6, REG_V7, 40);
+    theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V8, REG_V9, 63);
+
+    // 'usra' vector
+    theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_usra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+    theEmitter->emitIns_R_R_I(INS_usra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+    // 'urshr' scalar
+    theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V0, REG_V1, 1);
+    theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V2, REG_V3, 14);
+    theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V4, REG_V5, 27);
+    theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V6, REG_V7, 40);
+    theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V8, REG_V9, 63);
+
+    // 'urshr' vector
+    theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_urshr, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+    theEmitter->emitIns_R_R_I(INS_urshr, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+    // 'ursra' scalar
+    theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V0, REG_V1, 1);
+    theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V2, REG_V3, 14);
+    theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V4, REG_V5, 27);
+    theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V6, REG_V7, 40);
+    theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V8, REG_V9, 63);
+
+    // 'srsra' vector
+    theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_ursra, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+    theEmitter->emitIns_R_R_I(INS_ursra, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+    // 'sri' scalar
+    theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V0, REG_V1, 1);
+    theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V2, REG_V3, 14);
+    theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V4, REG_V5, 27);
+    theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V6, REG_V7, 40);
+    theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V8, REG_V9, 63);
+
+    // 'sri' vector
+    theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_sri, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+    theEmitter->emitIns_R_R_I(INS_sri, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+    // 'sli' scalar
+    theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V0, REG_V1, 1);
+    theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V2, REG_V3, 14);
+    theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V4, REG_V5, 27);
+    theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V6, REG_V7, 40);
+    theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V8, REG_V9, 63);
+
+    // 'sli' vector
+    theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_sli, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V12, REG_V13, 33, INS_OPTS_2D);
+    theEmitter->emitIns_R_R_I(INS_sli, EA_16BYTE, REG_V14, REG_V15, 63, INS_OPTS_2D);
+
+    // 'sshll' vector
+    theEmitter->emitIns_R_R_I(INS_sshll, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_I(INS_sshll2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_sshll, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_I(INS_sshll2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_sshll, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_I(INS_sshll2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+
+    // 'ushll' vector
+    theEmitter->emitIns_R_R_I(INS_ushll, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_I(INS_ushll2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_ushll, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_I(INS_ushll2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_ushll, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_I(INS_ushll2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+
+    // 'shrn' vector
+    theEmitter->emitIns_R_R_I(INS_shrn, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_shrn, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_shrn, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_I(INS_shrn2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+
+    // 'rshrn' vector
+    theEmitter->emitIns_R_R_I(INS_rshrn, EA_8BYTE, REG_V0, REG_V1, 1, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V2, REG_V3, 7, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_I(INS_rshrn, EA_8BYTE, REG_V4, REG_V5, 9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V6, REG_V7, 15, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_I(INS_rshrn, EA_8BYTE, REG_V8, REG_V9, 17, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_I(INS_rshrn2, EA_16BYTE, REG_V10, REG_V11, 31, INS_OPTS_4S);
+
+    // 'sxtl' vector
+    theEmitter->emitIns_R_R(INS_sxtl, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B);
+    theEmitter->emitIns_R_R(INS_sxtl2, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B);
+    theEmitter->emitIns_R_R(INS_sxtl, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_4H);
+    theEmitter->emitIns_R_R(INS_sxtl2, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_8H);
+    theEmitter->emitIns_R_R(INS_sxtl, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+    theEmitter->emitIns_R_R(INS_sxtl2, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+
+    // 'uxtl' vector
+    theEmitter->emitIns_R_R(INS_uxtl, EA_8BYTE, REG_V0, REG_V1, INS_OPTS_8B);
+    theEmitter->emitIns_R_R(INS_uxtl2, EA_16BYTE, REG_V2, REG_V3, INS_OPTS_16B);
+    theEmitter->emitIns_R_R(INS_uxtl, EA_8BYTE, REG_V4, REG_V5, INS_OPTS_4H);
+    theEmitter->emitIns_R_R(INS_uxtl2, EA_16BYTE, REG_V6, REG_V7, INS_OPTS_8H);
+    theEmitter->emitIns_R_R(INS_uxtl, EA_8BYTE, REG_V8, REG_V9, INS_OPTS_2S);
+    theEmitter->emitIns_R_R(INS_uxtl2, EA_16BYTE, REG_V10, REG_V11, INS_OPTS_4S);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    //
+    // R_R_R   vector operations, one dest, two source
+    //
+
+    genDefineTempLabel(genCreateTempLabel());
+
+    // Specifying an Arrangement is optional
+    //
+    theEmitter->emitIns_R_R_R(INS_and, EA_8BYTE, REG_V6, REG_V7, REG_V8);
+    theEmitter->emitIns_R_R_R(INS_bic, EA_8BYTE, REG_V9, REG_V10, REG_V11);
+    theEmitter->emitIns_R_R_R(INS_eor, EA_8BYTE, REG_V12, REG_V13, REG_V14);
+    theEmitter->emitIns_R_R_R(INS_orr, EA_8BYTE, REG_V15, REG_V16, REG_V17);
+    theEmitter->emitIns_R_R_R(INS_orn, EA_8BYTE, REG_V18, REG_V19, REG_V20);
+    theEmitter->emitIns_R_R_R(INS_and, EA_16BYTE, REG_V21, REG_V22, REG_V23);
+    theEmitter->emitIns_R_R_R(INS_bic, EA_16BYTE, REG_V24, REG_V25, REG_V26);
+    theEmitter->emitIns_R_R_R(INS_eor, EA_16BYTE, REG_V27, REG_V28, REG_V29);
+    theEmitter->emitIns_R_R_R(INS_orr, EA_16BYTE, REG_V30, REG_V31, REG_V0);
+    theEmitter->emitIns_R_R_R(INS_orn, EA_16BYTE, REG_V1, REG_V2, REG_V3);
+
+    theEmitter->emitIns_R_R_R(INS_bsl, EA_8BYTE, REG_V4, REG_V5, REG_V6);
+    theEmitter->emitIns_R_R_R(INS_bit, EA_8BYTE, REG_V7, REG_V8, REG_V9);
+    theEmitter->emitIns_R_R_R(INS_bif, EA_8BYTE, REG_V10, REG_V11, REG_V12);
+    theEmitter->emitIns_R_R_R(INS_bsl, EA_16BYTE, REG_V13, REG_V14, REG_V15);
+    theEmitter->emitIns_R_R_R(INS_bit, EA_16BYTE, REG_V16, REG_V17, REG_V18);
+    theEmitter->emitIns_R_R_R(INS_bif, EA_16BYTE, REG_V19, REG_V20, REG_V21);
+
+    // Default Arrangement as per the ARM64 manual
+    //
+    theEmitter->emitIns_R_R_R(INS_and, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_R(INS_bic, EA_8BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_R(INS_eor, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_R(INS_orr, EA_8BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_R(INS_orn, EA_8BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_R(INS_and, EA_16BYTE, REG_V21, REG_V22, REG_V23, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_R(INS_bic, EA_16BYTE, REG_V24, REG_V25, REG_V26, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_R(INS_eor, EA_16BYTE, REG_V27, REG_V28, REG_V29, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_R(INS_orr, EA_16BYTE, REG_V30, REG_V31, REG_V0, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_R(INS_orn, EA_16BYTE, REG_V1, REG_V2, REG_V3, INS_OPTS_16B);
+
+    theEmitter->emitIns_R_R_R(INS_bsl, EA_8BYTE, REG_V4, REG_V5, REG_V6, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_R(INS_bit, EA_8BYTE, REG_V7, REG_V8, REG_V9, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_R(INS_bif, EA_8BYTE, REG_V10, REG_V11, REG_V12, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_R(INS_bsl, EA_16BYTE, REG_V13, REG_V14, REG_V15, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_R(INS_bit, EA_16BYTE, REG_V16, REG_V17, REG_V18, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_R(INS_bif, EA_16BYTE, REG_V19, REG_V20, REG_V21, INS_OPTS_16B);
+
+    genDefineTempLabel(genCreateTempLabel());
+
+    theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V0, REG_V1, REG_V2); // scalar 8BYTE
+    theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_R(INS_add, EA_8BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R(INS_add, EA_16BYTE, REG_V21, REG_V22, REG_V23, INS_OPTS_2D);
+
+    theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V1, REG_V2, REG_V3); // scalar 8BYTE
+    theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V4, REG_V5, REG_V6, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V7, REG_V8, REG_V9, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_R(INS_sub, EA_8BYTE, REG_V10, REG_V11, REG_V12, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V13, REG_V14, REG_V15, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V16, REG_V17, REG_V18, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V19, REG_V20, REG_V21, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R(INS_sub, EA_16BYTE, REG_V22, REG_V23, REG_V24, INS_OPTS_2D);
+
+    genDefineTempLabel(genCreateTempLabel());
+
+    // saba vector
+    theEmitter->emitIns_R_R_R(INS_saba, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_R(INS_saba, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_R(INS_saba, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_R(INS_saba, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_R(INS_saba, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R(INS_saba, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+    // sabd vector
+    theEmitter->emitIns_R_R_R(INS_sabd, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_R(INS_sabd, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_R(INS_sabd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_R(INS_sabd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_R(INS_sabd, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R(INS_sabd, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+    // uaba vector
+    theEmitter->emitIns_R_R_R(INS_uaba, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_R(INS_uaba, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_R(INS_uaba, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_R(INS_uaba, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_R(INS_uaba, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R(INS_uaba, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+    // uabd vector
+    theEmitter->emitIns_R_R_R(INS_uabd, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_R(INS_uabd, EA_16BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_R(INS_uabd, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_R(INS_uabd, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_R(INS_uabd, EA_8BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R(INS_uabd, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    //
+    // R_R_R  vector multiply
+    //
+
+    genDefineTempLabel(genCreateTempLabel());
+
+    theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_V0, REG_V1, REG_V2, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_V3, REG_V4, REG_V5, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_R(INS_mul, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R(INS_mul, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_16B);
+    theEmitter->emitIns_R_R_R(INS_mul, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_R(INS_mul, EA_16BYTE, REG_V15, REG_V16, REG_V17, INS_OPTS_4S);
+
+    theEmitter->emitIns_R_R_R(INS_pmul, EA_8BYTE, REG_V18, REG_V19, REG_V20, INS_OPTS_8B);
+    theEmitter->emitIns_R_R_R(INS_pmul, EA_16BYTE, REG_V21, REG_V22, REG_V23, INS_OPTS_16B);
+
+    // 'mul' vector by elem
+    theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V0, REG_V1, REG_V16, 0, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V2, REG_V3, REG_V15, 1, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V4, REG_V5, REG_V17, 3, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V6, REG_V7, REG_V0, 0, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V8, REG_V9, REG_V1, 3, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_R_I(INS_mul, EA_8BYTE, REG_V10, REG_V11, REG_V2, 7, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V12, REG_V13, REG_V14, 0, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V14, REG_V15, REG_V18, 1, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V16, REG_V17, REG_V13, 3, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V18, REG_V19, REG_V3, 0, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V20, REG_V21, REG_V4, 3, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_R_I(INS_mul, EA_16BYTE, REG_V22, REG_V23, REG_V5, 7, INS_OPTS_8H);
+
+    // 'mla' vector by elem
+    theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V0, REG_V1, REG_V16, 0, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V2, REG_V3, REG_V15, 1, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V4, REG_V5, REG_V17, 3, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V6, REG_V7, REG_V0, 0, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V8, REG_V9, REG_V1, 3, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_R_I(INS_mla, EA_8BYTE, REG_V10, REG_V11, REG_V2, 7, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V12, REG_V13, REG_V14, 0, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V14, REG_V15, REG_V18, 1, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V16, REG_V17, REG_V13, 3, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V18, REG_V19, REG_V3, 0, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V20, REG_V21, REG_V4, 3, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_R_I(INS_mla, EA_16BYTE, REG_V22, REG_V23, REG_V5, 7, INS_OPTS_8H);
+
+    // 'mls' vector by elem
+    theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V0, REG_V1, REG_V16, 0, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V2, REG_V3, REG_V15, 1, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V4, REG_V5, REG_V17, 3, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V6, REG_V7, REG_V0, 0, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V8, REG_V9, REG_V1, 3, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_R_I(INS_mls, EA_8BYTE, REG_V10, REG_V11, REG_V2, 7, INS_OPTS_4H);
+    theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V12, REG_V13, REG_V14, 0, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V14, REG_V15, REG_V18, 1, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V16, REG_V17, REG_V13, 3, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V18, REG_V19, REG_V3, 0, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V20, REG_V21, REG_V4, 3, INS_OPTS_8H);
+    theEmitter->emitIns_R_R_R_I(INS_mls, EA_16BYTE, REG_V22, REG_V23, REG_V5, 7, INS_OPTS_8H);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    //
+    // R_R_R   floating point operations, one source/dest, and two source
+    //
+
+    genDefineTempLabel(genCreateTempLabel());
+
+    theEmitter->emitIns_R_R_R(INS_fmla, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R(INS_fmla, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R(INS_fmla, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
+
+    theEmitter->emitIns_R_R_R_I(INS_fmla, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE
+    theEmitter->emitIns_R_R_R_I(INS_fmla, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE
+    theEmitter->emitIns_R_R_R_I(INS_fmla, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R_I(INS_fmla, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R_I(INS_fmla, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D);
+
+    theEmitter->emitIns_R_R_R(INS_fmls, EA_8BYTE, REG_V6, REG_V7, REG_V8, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R(INS_fmls, EA_16BYTE, REG_V9, REG_V10, REG_V11, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R(INS_fmls, EA_16BYTE, REG_V12, REG_V13, REG_V14, INS_OPTS_2D);
+
+    theEmitter->emitIns_R_R_R_I(INS_fmls, EA_4BYTE, REG_V15, REG_V16, REG_V17, 3); // scalar by elem 4BYTE
+    theEmitter->emitIns_R_R_R_I(INS_fmls, EA_8BYTE, REG_V18, REG_V19, REG_V20, 1); // scalar by elem 8BYTE
+    theEmitter->emitIns_R_R_R_I(INS_fmls, EA_8BYTE, REG_V21, REG_V22, REG_V23, 0, INS_OPTS_2S);
+    theEmitter->emitIns_R_R_R_I(INS_fmls, EA_16BYTE, REG_V24, REG_V25, REG_V26, 2, INS_OPTS_4S);
+    theEmitter->emitIns_R_R_R_I(INS_fmls, EA_16BYTE, REG_V27, REG_V28, REG_V29, 0, INS_OPTS_2D);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+    //
+    // R_R_R_R   floating point operations, one dest, and three source
+    //
+
+    theEmitter->emitIns_R_R_R_R(INS_fmadd, EA_4BYTE, REG_V0, REG_V8, REG_V16, REG_V24);
+    theEmitter->emitIns_R_R_R_R(INS_fmsub, EA_4BYTE, REG_V1, REG_V9, REG_V17, REG_V25);
+    theEmitter->emitIns_R_R_R_R(INS_fnmadd, EA_4BYTE, REG_V2, REG_V10, REG_V18, REG_V26);
+    theEmitter->emitIns_R_R_R_R(INS_fnmsub, EA_4BYTE, REG_V3, REG_V11, REG_V19, REG_V27);
+
+    theEmitter->emitIns_R_R_R_R(INS_fmadd, EA_8BYTE, REG_V4, REG_V12, REG_V20, REG_V28);
+    theEmitter->emitIns_R_R_R_R(INS_fmsub, EA_8BYTE, REG_V5, REG_V13, REG_V21, REG_V29);
+    theEmitter->emitIns_R_R_R_R(INS_fnmadd, EA_8BYTE, REG_V6, REG_V14, REG_V22, REG_V30);
+    theEmitter->emitIns_R_R_R_R(INS_fnmsub, EA_8BYTE, REG_V7, REG_V15, REG_V23, REG_V31);
+
+#endif
+
+#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
+
+    BasicBlock* label = genCreateTempLabel();
+    genDefineTempLabel(label);
+    instGen(INS_nop);
+    instGen(INS_nop);
+    instGen(INS_nop);
+    instGen(INS_nop);
+    theEmitter->emitIns_R_L(INS_adr, EA_4BYTE_DSP_RELOC, label, REG_R0);
+
+#endif // ALL_ARM64_EMITTER_UNIT_TESTS
+
+    printf("*************** End of genArm64EmitterUnitTests()\n");
+}
+#endif // defined(DEBUG)
+
+#endif // _TARGET_ARM64_
+
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/codegenclassic.h b/src/jit/codegenclassic.h
new file mode 100644
index 0000000000..81b7b34194
--- /dev/null
+++ b/src/jit/codegenclassic.h
@@ -0,0 +1,606 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// This file contains the members of CodeGen that are defined and used
+// only by the "classic" JIT backend.  It is included by CodeGen.h in the
+// definition of the CodeGen class.
+//
+
+#ifndef _CODEGENCLASSIC_H_
+#define _CODEGENCLASSIC_H_
+
+#ifdef LEGACY_BACKEND // Not necessary (it's this way in the #include location), but helpful to IntelliSense
+
+public:
+regNumber genIsEnregisteredIntVariable(GenTreePtr tree);
+
+void sched_AM(instruction ins,
+              emitAttr    size,
+              regNumber   ireg,
+              bool        rdst,
+              GenTreePtr  tree,
+              unsigned    offs,
+              bool        cons  = false,
+              int         cval  = 0,
+              insFlags    flags = INS_FLAGS_DONT_CARE);
+
+protected:
+#if FEATURE_STACK_FP_X87
+VARSET_TP genFPregVars;    // mask corresponding to genFPregCnt
+unsigned  genFPdeadRegCnt; // The dead unpopped part of genFPregCnt
+#endif                     // FEATURE_STACK_FP_X87
+
+//-------------------------------------------------------------------------
+
+void genSetRegToIcon(regNumber reg, ssize_t val, var_types type = TYP_INT, insFlags flags = INS_FLAGS_DONT_CARE);
+
+regNumber genGetRegSetToIcon(ssize_t val, regMaskTP regBest = 0, var_types type = TYP_INT);
+void genDecRegBy(regNumber reg, ssize_t ival, GenTreePtr tree);
+void genIncRegBy(regNumber reg, ssize_t ival, GenTreePtr tree, var_types dstType = TYP_INT, bool ovfl = false);
+
+void genMulRegBy(regNumber reg, ssize_t ival, GenTreePtr tree, var_types dstType = TYP_INT, bool ovfl = false);
+
+//-------------------------------------------------------------------------
+
+bool genRegTrashable(regNumber reg, GenTreePtr tree);
+
+//
+// Prolog functions and data (there are a few exceptions for more generally used things)
+//
+
+regMaskTP genPInvokeMethodProlog(regMaskTP initRegs);
+
+void genPInvokeMethodEpilog();
+
+regNumber genPInvokeCallProlog(LclVarDsc*            varDsc,
+                               int                   argSize,
+                               CORINFO_METHOD_HANDLE methodToken,
+                               BasicBlock*           returnLabel);
+
+void genPInvokeCallEpilog(LclVarDsc* varDsc, regMaskTP retVal);
+
+regNumber genLclHeap(GenTreePtr size);
+
+void genSinglePush();
+
+void genSinglePop();
+
+void genDyingVars(VARSET_VALARG_TP beforeSet, VARSET_VALARG_TP afterSet);
+
+bool genContainsVarDeath(GenTreePtr from, GenTreePtr to, unsigned varNum);
+
+void genComputeReg(
+    GenTreePtr tree, regMaskTP needReg, RegSet::ExactReg mustReg, RegSet::KeepReg keepReg, bool freeOnly = false);
+
+void genCompIntoFreeReg(GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg);
+
+void genReleaseReg(GenTreePtr tree);
+
+void genRecoverReg(GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg);
+
+void genMoveRegPairHalf(GenTreePtr tree, regNumber dst, regNumber src, int off = 0);
+
+void genMoveRegPair(GenTreePtr tree, regMaskTP needReg, regPairNo newPair);
+
+void genComputeRegPair(
+    GenTreePtr tree, regPairNo needRegPair, regMaskTP avoidReg, RegSet::KeepReg keepReg, bool freeOnly = false);
+
+void genCompIntoFreeRegPair(GenTreePtr tree, regMaskTP avoidReg, RegSet::KeepReg keepReg);
+
+void genComputeAddressable(GenTreePtr      tree,
+                           regMaskTP       addrReg,
+                           RegSet::KeepReg keptReg,
+                           regMaskTP       needReg,
+                           RegSet::KeepReg keepReg,
+                           bool            freeOnly = false);
+
+void genReleaseRegPair(GenTreePtr tree);
+
+void genRecoverRegPair(GenTreePtr tree, regPairNo regPair, RegSet::KeepReg keepReg);
+
+void genEvalIntoFreeRegPair(GenTreePtr tree, regPairNo regPair, regMaskTP avoidReg);
+
+void genMakeRegPairAvailable(regPairNo regPair);
+
+bool genMakeIndAddrMode(GenTreePtr      addr,
+                        GenTreePtr      oper,
+                        bool            forLea,
+                        regMaskTP       regMask,
+                        RegSet::KeepReg keepReg,
+                        regMaskTP*      useMaskPtr,
+                        bool            deferOp = false);
+
+regMaskTP genMakeRvalueAddressable(
+    GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg, bool forLoadStore, bool smallOK = false);
+
+regMaskTP genMakeAddressable(
+    GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg, bool smallOK = false, bool deferOK = false);
+
+regMaskTP genMakeAddrArrElem(GenTreePtr arrElem, GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg);
+
+regMaskTP genMakeAddressable2(GenTreePtr      tree,
+                              regMaskTP       needReg,
+                              RegSet::KeepReg keepReg,
+                              bool            forLoadStore,
+                              bool            smallOK      = false,
+                              bool            deferOK      = false,
+                              bool            evalSideEffs = false);
+
+bool genStillAddressable(GenTreePtr tree);
+
+regMaskTP genRestoreAddrMode(GenTreePtr addr, GenTreePtr tree, bool lockPhase);
+
+regMaskTP genRestAddressable(GenTreePtr tree, regMaskTP addrReg, regMaskTP lockMask);
+
+regMaskTP genKeepAddressable(GenTreePtr tree, regMaskTP addrReg, regMaskTP avoidMask = RBM_NONE);
+
+void genDoneAddressable(GenTreePtr tree, regMaskTP addrReg, RegSet::KeepReg keptReg);
+
+GenTreePtr genMakeAddrOrFPstk(GenTreePtr tree, regMaskTP* regMaskPtr, bool roundResult);
+
+void genEmitGSCookieCheck(bool pushReg);
+
+void genEvalSideEffects(GenTreePtr tree);
+
+void genCondJump(GenTreePtr cond, BasicBlock* destTrue = NULL, BasicBlock* destFalse = NULL, bool bStackFPFixup = true);
+
+emitJumpKind genCondSetFlags(GenTreePtr cond);
+
+void genJCC(genTreeOps cmp, BasicBlock* block, var_types type);
+
+void genJccLongHi(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool unsOper = false);
+
+void genJccLongLo(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse);
+
+void genCondJumpLng(GenTreePtr cond, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool bFPTransition = false);
+
+bool genUse_fcomip();
+
+void genTableSwitch(regNumber reg, unsigned jumpCnt, BasicBlock** jumpTab);
+
+regMaskTP WriteBarrier(GenTreePtr tgt, GenTreePtr assignVal, regMaskTP addrReg);
+
+void genCodeForTreeConst(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg = RBM_NONE);
+
+void genCodeForTreeLeaf(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg = RBM_NONE);
+
+// If "tree" is a comma node, generates code for the left comma arguments,
+// in order, returning the first right argument in the list that is not
+// a comma node.
+GenTreePtr genCodeForCommaTree(GenTreePtr tree);
+
+void genCodeForTreeLeaf_GT_JMP(GenTreePtr tree);
+
+static Compiler::fgWalkPreFn fgIsVarAssignedTo;
+
+void genCodeForQmark(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg);
+
+bool genCodeForQmarkWithCMOV(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg);
+
+#ifdef _TARGET_XARCH_
+void genCodeForMultEAX(GenTreePtr tree);
+#endif
+#ifdef _TARGET_ARM_
+void genCodeForMult64(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg);
+#endif
+
+void genCodeForTreeSmpBinArithLogOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg);
+
+void genCodeForTreeSmpBinArithLogAsgOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg);
+
+void genCodeForUnsignedMod(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg);
+
+void genCodeForSignedMod(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg);
+
+void genCodeForUnsignedDiv(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg);
+
+void genCodeForSignedDiv(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg);
+
+void genCodeForGeneralDivide(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg);
+
+void genCodeForAsgShift(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg);
+
+void genCodeForShift(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg);
+
+void genCodeForRelop(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg);
+
+void genCodeForCopyObj(GenTreePtr tree, regMaskTP destReg);
+
+void genCodeForBlkOp(GenTreePtr tree, regMaskTP destReg);
+
+void genCodeForTreeSmpOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg = RBM_NONE);
+
+regNumber genIntegerCast(GenTree* tree, regMaskTP needReg, regMaskTP bestReg);
+
+void genCodeForNumericCast(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg);
+
+void genCodeForTreeSmpOp_GT_ADDR(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg = RBM_NONE);
+
+void genCodeForTreeSmpOpAsg(GenTreePtr tree);
+
+void genCodeForTreeSmpOpAsg_DONE_ASSG(GenTreePtr tree, regMaskTP addrReg, regNumber reg, bool ovfl);
+
+void genCodeForTreeSpecialOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg = RBM_NONE);
+
+void genCodeForTree(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg = RBM_NONE);
+
+void genCodeForTree_DONE_LIFE(GenTreePtr tree, regNumber reg)
+{
+    /* We've computed the value of 'tree' into 'reg' */
+
+    assert(reg != 0xFEEFFAAFu);
+    assert(!IsUninitialized(reg));
+
+    genMarkTreeInReg(tree, reg);
+}
+
+void genCodeForTree_DONE_LIFE(GenTreePtr tree, regPairNo regPair)
+{
+    /* We've computed the value of 'tree' into 'regPair' */
+
+    genMarkTreeInRegPair(tree, regPair);
+}
+
+void genCodeForTree_DONE(GenTreePtr tree, regNumber reg)
+{
+    /* Check whether this subtree has freed up any variables */
+
+    genUpdateLife(tree);
+
+    genCodeForTree_DONE_LIFE(tree, reg);
+}
+
+void genCodeForTree_REG_VAR1(GenTreePtr tree)
+{
+    /* Value is already in a register */
+
+    regNumber reg = tree->gtRegNum;
+
+    gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet());
+
+    genCodeForTree_DONE(tree, reg);
+}
+
+void genCodeForTreeLng(GenTreePtr tree, regMaskTP needReg, regMaskTP avoidReg);
+
+regPairNo genCodeForLongModInt(GenTreePtr tree, regMaskTP needReg);
+
+unsigned genRegCountForLiveIntEnregVars(GenTreePtr tree);
+
+#ifdef _TARGET_ARM_
+void genStoreFromFltRetRegs(GenTreePtr tree);
+void genLoadIntoFltRetRegs(GenTreePtr tree);
+void genLdStFltRetRegsPromotedVar(LclVarDsc* varDsc, bool isLoadIntoFltReg);
+#endif
+
+#if CPU_HAS_FP_SUPPORT
+void genRoundFpExpression(GenTreePtr op, var_types type = TYP_UNDEF);
+void genCodeForTreeFlt(GenTreePtr tree, regMaskTP needReg = RBM_ALLFLOAT, regMaskTP bestReg = RBM_NONE);
+#endif
+
+// FP stuff
+#include "fp.h"
+
+void genCodeForJumpTable(GenTreePtr tree);
+void genCodeForSwitchTable(GenTreePtr tree);
+void genCodeForSwitch(GenTreePtr tree);
+
+regMaskTP genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noRefRegs);
+void genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs);
+
+size_t genPushArgList(GenTreePtr call);
+
+#ifdef _TARGET_ARM_
+// We are generating code for a promoted struct local variable.  Fill the next slot (register or
+// 4-byte stack slot) with one or more field variables of the promoted struct local -- or 2 such slots
+// if the next field is a 64-bit value.
+// The arguments are:
+//    "arg" is the current argument node.
+//
+//    "curArgTabEntry" arg table entry pointer for "arg".
+//
+//    "promotedStructLocalVarDesc" describes the struct local being copied, assumed non-NULL.
+//
+//    "fieldSize" is somewhat misnamed; it must be the element in the struct's GC layout describing the next slot
+//       of the struct -- it will be EA_4BYTE, EA_GCREF, or EA_BYREF.
+//
+//    "*pNextPromotedStructFieldVar" must be the the local variable number of the next field variable to copy;
+//       this location will be updated by the call to reflect the bytes that are copied.
+//
+//    "*pBytesOfNextSlotOfCurPromotedStruct" must be the number of bytes within the struct local at which the next
+//       slot to be copied starts.  This location will be updated by the call to reflect the bytes that are copied.
+//
+//    "*pCurRegNum" must be the current argument register number, and will be updated if argument registers are filled.
+//
+//    "argOffset" must be the offset of the next slot to be filled in the outgoing argument area, if the argument is to
+//    be
+//       put in the outgoing arg area of the stack (or else should be INT_MAX if the next slot to be filled is a
+//       register).
+//       (Strictly speaking, after the addition of "argOffsetOfFirstStackSlot", this arg is redundant, and is only used
+//       in assertions, and could be removed.)
+//
+//    "fieldOffsetOfFirstStackSlot" must be the offset within the promoted struct local of the first slot that should be
+//       copied to the outgoing argument area -- non-zero only in the case of a struct that spans registers and stack
+//       slots.
+//
+//    "argOffsetOfFirstStackSlot" must be the 4-byte-aligned offset of the first offset in the outgoing argument area
+//    which could
+//       contain part of the struct.  (Explicit alignment may mean it doesn't actually contain part of the struct.)
+//
+//    "*deadFieldVarRegs" is an out parameter, the set of registers containing promoted field variables that become dead
+//    after
+//       this (implicit) use.
+//
+//    "*pRegTmp" -- if a temporary register is needed, and this is not REG_STK, uses that register.  Otherwise, if it is
+//    REG_STK,
+//       allocates a register, uses it, and sets "*pRegTmp" to the allocated register.
+//
+// Returns "true" iff it filled two slots with an 8-byte value.
+bool genFillSlotFromPromotedStruct(GenTreePtr       arg,
+                                   fgArgTabEntryPtr curArgTabEntry,
+                                   LclVarDsc*       promotedStructLocalVarDesc,
+                                   emitAttr         fieldSize,
+                                   unsigned*        pNextPromotedStructFieldVar,         // IN/OUT
+                                   unsigned*        pBytesOfNextSlotOfCurPromotedStruct, // IN/OUT
+                                   regNumber*       pCurRegNum,                          // IN/OUT
+                                   int              argOffset,
+                                   int              fieldOffsetOfFirstStackSlot,
+                                   int              argOffsetOfFirstStackSlot,
+                                   regMaskTP*       deadFieldVarRegs, // OUT
+                                   regNumber*       pRegTmp);         // IN/OUT
+
+#endif // _TARGET_ARM_
+// Requires that "curr" is a cpblk.  If the RHS is a promoted struct local,
+// then returns a regMaskTP representing the set of registers holding
+// fieldVars of the RHS that go dead with this use (as determined by the live set
+// of cpBlk).
+regMaskTP genFindDeadFieldRegs(GenTreePtr cpBlk);
+
+void SetupLateArgs(GenTreePtr call);
+
+#ifdef _TARGET_ARM_
+void PushMkRefAnyArg(GenTreePtr mkRefAnyTree, fgArgTabEntryPtr curArgTabEntry, regMaskTP regNeedMask);
+#endif // _TARGET_ARM_
+
+regMaskTP genLoadIndirectCallTarget(GenTreePtr call);
+
+regMaskTP genCodeForCall(GenTreePtr call, bool valUsed);
+
+GenTreePtr genGetAddrModeBase(GenTreePtr tree);
+
+GenTreePtr genIsAddrMode(GenTreePtr tree, GenTreePtr* indxPtr);
+
+private:
+bool genIsLocalLastUse(GenTreePtr tree);
+
+bool genIsRegCandidateLocal(GenTreePtr tree);
+
+//=========================================================================
+//  Debugging support
+//=========================================================================
+
+#if FEATURE_STACK_FP_X87
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                   Flat FP model                                           XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+bool StackFPIsSameAsFloat(double d);
+bool FlatFPSameRegisters(FlatFPStateX87* pState, regMaskTP mask);
+
+// FlatFPStateX87_ functions are the actual verbs to do stuff
+// like doing a transition, loading   register, etc. It's also
+// responsible for emitting the x87 code to do so. We keep
+// them in Compiler because we don't want to store a pointer to the
+// emitter.
+void FlatFPX87_Kill(FlatFPStateX87* pState, unsigned iVirtual);
+void FlatFPX87_PushVirtual(FlatFPStateX87* pState, unsigned iRegister, bool bEmitCode = true);
+unsigned FlatFPX87_Pop(FlatFPStateX87* pState, bool bEmitCode = true);
+unsigned FlatFPX87_Top(FlatFPStateX87* pState, bool bEmitCode = true);
+void FlatFPX87_Unload(FlatFPStateX87* pState, unsigned iVirtual, bool bEmitCode = true);
+#endif
+
+// Codegen functions. This is the API that codegen will use
+regMaskTP genPushArgumentStackFP(GenTreePtr arg);
+void genRoundFpExpressionStackFP(GenTreePtr op, var_types type = TYP_UNDEF);
+void genCodeForTreeStackFP_Const(GenTreePtr tree);
+void genCodeForTreeStackFP_Leaf(GenTreePtr tree);
+void genCodeForTreeStackFP_SmpOp(GenTreePtr tree);
+void genCodeForTreeStackFP_Special(GenTreePtr tree);
+void genCodeForTreeStackFP_Cast(GenTreePtr tree);
+void genCodeForTreeStackFP(GenTreePtr tree);
+void genCondJumpFltStackFP(GenTreePtr cond, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool bDoTransition = true);
+void genCondJumpFloat(GenTreePtr cond, BasicBlock* jumpTrue, BasicBlock* jumpFalse);
+void genCondJumpLngStackFP(GenTreePtr cond, BasicBlock* jumpTrue, BasicBlock* jumpFalse);
+
+void genFloatConst(GenTree* tree, RegSet::RegisterPreference* pref);
+void genFloatLeaf(GenTree* tree, RegSet::RegisterPreference* pref);
+void genFloatSimple(GenTree* tree, RegSet::RegisterPreference* pref);
+void genFloatMath(GenTree* tree, RegSet::RegisterPreference* pref);
+void genFloatCheckFinite(GenTree* tree, RegSet::RegisterPreference* pref);
+void genLoadFloat(GenTreePtr tree, regNumber reg);
+void genFloatAssign(GenTree* tree);
+void genFloatArith(GenTree* tree, RegSet::RegisterPreference* pref);
+void genFloatAsgArith(GenTree* tree);
+
+regNumber genAssignArithFloat(genTreeOps oper, GenTreePtr dst, regNumber dstreg, GenTreePtr src, regNumber srcreg);
+
+GenTreePtr genMakeAddressableFloat(GenTreePtr tree,
+                                   regMaskTP* regMaskIntPtr,
+                                   regMaskTP* regMaskFltPtr,
+                                   bool       bCollapseConstantDoubles = true);
+
+void genCodeForTreeFloat(GenTreePtr tree, RegSet::RegisterPreference* pref = NULL);
+
+void genCodeForTreeFloat(GenTreePtr tree, regMaskTP needReg, regMaskTP bestReg);
+
+regNumber genArithmFloat(
+    genTreeOps oper, GenTreePtr dst, regNumber dstreg, GenTreePtr src, regNumber srcreg, bool bReverse);
+void genCodeForTreeCastFloat(GenTreePtr tree, RegSet::RegisterPreference* pref);
+void genCodeForTreeCastToFloat(GenTreePtr tree, RegSet::RegisterPreference* pref);
+void genCodeForTreeCastFromFloat(GenTreePtr tree, RegSet::RegisterPreference* pref);
+void genKeepAddressableFloat(GenTreePtr tree, regMaskTP* regMaskIntPtr, regMaskTP* regMaskFltPtr);
+void genDoneAddressableFloat(GenTreePtr tree, regMaskTP addrRegInt, regMaskTP addrRegFlt, RegSet::KeepReg keptReg);
+void genComputeAddressableFloat(GenTreePtr      tree,
+                                regMaskTP       addrRegInt,
+                                regMaskTP       addrRegFlt,
+                                RegSet::KeepReg keptReg,
+                                regMaskTP       needReg,
+                                RegSet::KeepReg keepReg,
+                                bool            freeOnly = false);
+void genRoundFloatExpression(GenTreePtr op, var_types type);
+
+#if FEATURE_STACK_FP_X87
+// Assumes then block will be generated before else block.
+struct QmarkStateStackFP
+{
+    FlatFPStateX87 stackState;
+};
+
+void genQMarkRegVarTransition(GenTreePtr nextNode, VARSET_VALARG_TP liveset);
+void genQMarkBeforeElseStackFP(QmarkStateStackFP* pState, VARSET_VALARG_TP varsetCond, GenTreePtr nextNode);
+void genQMarkAfterElseBlockStackFP(QmarkStateStackFP* pState, VARSET_VALARG_TP varsetCond, GenTreePtr nextNode);
+void genQMarkAfterThenBlockStackFP(QmarkStateStackFP* pState);
+
+#endif
+
+GenTreePtr genMakeAddressableStackFP(GenTreePtr tree,
+                                     regMaskTP* regMaskIntPtr,
+                                     regMaskTP* regMaskFltPtr,
+                                     bool       bCollapseConstantDoubles = true);
+void genKeepAddressableStackFP(GenTreePtr tree, regMaskTP* regMaskIntPtr, regMaskTP* regMaskFltPtr);
+void genDoneAddressableStackFP(GenTreePtr tree, regMaskTP addrRegInt, regMaskTP addrRegFlt, RegSet::KeepReg keptReg);
+
+void genCodeForTreeStackFP_Asg(GenTreePtr tree);
+void genCodeForTreeStackFP_AsgArithm(GenTreePtr tree);
+void genCodeForTreeStackFP_Arithm(GenTreePtr tree);
+void genCodeForTreeStackFP_DONE(GenTreePtr tree, regNumber reg);
+void genCodeForTreeFloat_DONE(GenTreePtr tree, regNumber reg);
+
+void genSetupStateStackFP(BasicBlock* block);
+regMaskTP genRegMaskFromLivenessStackFP(VARSET_VALARG_TP varset);
+
+// bReverse means make op1 addressable and codegen for op2.
+// If op1 or op2 are comma expressions, will do code-gen for their non-last comma parts,
+// and set op1 and op2 to the remaining non-comma expressions.
+void genSetupForOpStackFP(
+    GenTreePtr& op1, GenTreePtr& op2, bool bReverse, bool bMakeOp1Addressable, bool bOp1ReadOnly, bool bOp2ReadOnly);
+
+#if FEATURE_STACK_FP_X87
+
+#ifdef DEBUG
+bool ConsistentAfterStatementStackFP();
+#endif
+
+private:
+void SpillTempsStackFP(regMaskTP canSpillMask);
+void SpillForCallStackFP();
+void UnspillRegVarsStackFp();
+
+// Transition API. Takes care of the stack matching of basicblock boundaries
+void genCodeForPrologStackFP();
+void genCodeForEndBlockTransitionStackFP(BasicBlock* block);
+
+void genCodeForBBTransitionStackFP(BasicBlock* pDst);
+void genCodeForTransitionStackFP(FlatFPStateX87* pSrc, FlatFPStateX87* pDst);
+void genCodeForTransitionFromMask(FlatFPStateX87* pSrc, regMaskTP mask, bool bEmitCode = true);
+BasicBlock* genTransitionBlockStackFP(FlatFPStateX87* pState, BasicBlock* pFrom, BasicBlock* pTarget);
+
+// This is the API codegen will use to emit virtual fp code. In theory, nobody above this API
+// should know about x87 instructions.
+
+int  genNumberTemps();
+void genDiscardStackFP(GenTreePtr tree);
+void genRegRenameWithMasks(regNumber dstReg, regNumber srcReg);
+void genRegVarBirthStackFP(GenTreePtr tree);
+void genRegVarBirthStackFP(LclVarDsc* varDsc);
+void genRegVarDeathStackFP(GenTreePtr tree);
+void genRegVarDeathStackFP(LclVarDsc* varDsc);
+void genLoadStackFP(GenTreePtr tree, regNumber reg);
+void genMovStackFP(GenTreePtr dst, regNumber dstreg, GenTreePtr src, regNumber srcreg);
+bool genCompInsStackFP(GenTreePtr tos, GenTreePtr other);
+regNumber genArithmStackFP(
+    genTreeOps oper, GenTreePtr dst, regNumber dstreg, GenTreePtr src, regNumber srcreg, bool bReverse);
+regNumber genAsgArithmStackFP(genTreeOps oper, GenTreePtr dst, regNumber dstreg, GenTreePtr src, regNumber srcreg);
+void genCondJmpInsStackFP(emitJumpKind jumpKind,
+                          BasicBlock*  jumpTrue,
+                          BasicBlock*  jumpFalse,
+                          bool         bDoTransition = true);
+void genTableSwitchStackFP(regNumber reg, unsigned jumpCnt, BasicBlock** jumpTab);
+
+void JitDumpFPState();
+#else  // !FEATURE_STACK_FP_X87
+void SpillForCallRegisterFP(regMaskTP noSpillMask);
+#endif // !FEATURE_STACK_FP_X87
+
+// When bOnlyNoMemAccess = true, the load will be generated only for constant loading that doesn't
+// involve memory accesses, (ie: fldz for positive zero, or fld1 for 1). Will return true the function
+// did the load
+bool genConstantLoadStackFP(GenTreePtr tree, bool bOnlyNoMemAccess = false);
+void genEndOfStatement();
+
+#if FEATURE_STACK_FP_X87
+struct genRegVarDiesInSubTreeData
+{
+    regNumber reg;
+    bool      result;
+};
+static Compiler::fgWalkPreFn genRegVarDiesInSubTreeWorker;
+bool genRegVarDiesInSubTree(GenTreePtr tree, regNumber reg);
+#endif // FEATURE_STACK_FP_X87
+
+// Float spill
+void UnspillFloat(RegSet::SpillDsc* spillDsc);
+void UnspillFloat(GenTreePtr tree);
+void UnspillFloat(LclVarDsc* varDsc);
+void UnspillFloatMachineDep(RegSet::SpillDsc* spillDsc);
+void UnspillFloatMachineDep(RegSet::SpillDsc* spillDsc, bool useSameReg);
+void RemoveSpillDsc(RegSet::SpillDsc* spillDsc);
+
+protected:
+struct genLivenessSet
+{
+    VARSET_TP    liveSet;
+    VARSET_TP    varPtrSet;
+    regMaskSmall maskVars;
+    regMaskSmall gcRefRegs;
+    regMaskSmall byRefRegs;
+
+    genLivenessSet()
+        : VARSET_INIT_NOCOPY(liveSet, VarSetOps::UninitVal()), VARSET_INIT_NOCOPY(varPtrSet, VarSetOps::UninitVal())
+    {
+    }
+};
+
+void saveLiveness(genLivenessSet* ls);
+void restoreLiveness(genLivenessSet* ls);
+void checkLiveness(genLivenessSet* ls);
+void unspillLiveness(genLivenessSet* ls);
+
+//-------------------------------------------------------------------------
+//
+//  If we know that the flags register is set to a value that corresponds
+//  to the current value of a register or variable, the following values
+//  record that information.
+//
+
+emitLocation genFlagsEqLoc;
+regNumber    genFlagsEqReg;
+unsigned     genFlagsEqVar;
+
+void genFlagsEqualToNone();
+void genFlagsEqualToReg(GenTreePtr tree, regNumber reg);
+void genFlagsEqualToVar(GenTreePtr tree, unsigned var);
+bool genFlagsAreReg(regNumber reg);
+bool genFlagsAreVar(unsigned var);
+
+#endif // LEGACY_BACKEND
+
+#endif // _CODEGENCLASSIC_H_
diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp
new file mode 100755
index 0000000000..2710447ade
--- /dev/null
+++ b/src/jit/codegencommon.cpp
@@ -0,0 +1,11779 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX Code Generator Common:                                                    XX
+XX   Methods common to all architectures and register allocation strategies  XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+// TODO-Cleanup: There are additional methods in CodeGen*.cpp that are almost
+// identical, and which should probably be moved here.
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+#include "codegen.h"
+
+#include "gcinfo.h"
+#include "emit.h"
+
+#ifndef JIT32_GCENCODER
+#include "gcinfoencoder.h"
+#endif
+
+/*****************************************************************************/
+
+const BYTE genTypeSizes[] = {
+#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) sz,
+#include "typelist.h"
+#undef DEF_TP
+};
+
+const BYTE genTypeAlignments[] = {
+#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) al,
+#include "typelist.h"
+#undef DEF_TP
+};
+
+const BYTE genTypeStSzs[] = {
+#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) st,
+#include "typelist.h"
+#undef DEF_TP
+};
+
+const BYTE genActualTypes[] = {
+#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) jitType,
+#include "typelist.h"
+#undef DEF_TP
+};
+
+void CodeGenInterface::setFramePointerRequiredEH(bool value)
+{
+    m_cgFramePointerRequired = value;
+
+#ifndef JIT32_GCENCODER
+    if (value)
+    {
+        // EnumGcRefs will only enumerate slots in aborted frames
+        // if they are fully-interruptible.  So if we have a catch
+        // or finally that will keep frame-vars alive, we need to
+        // force fully-interruptible.
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("Method has EH, marking method as fully interruptible\n");
+        }
+#endif
+
+        m_cgInterruptible = true;
+    }
+#endif // JIT32_GCENCODER
+}
+
+/*****************************************************************************/
+CodeGenInterface* getCodeGenerator(Compiler* comp)
+{
+    return new (comp, CMK_Codegen) CodeGen(comp);
+}
+
+// CodeGen constructor
+CodeGenInterface::CodeGenInterface(Compiler* theCompiler)
+    : gcInfo(theCompiler), regSet(theCompiler, gcInfo), compiler(theCompiler)
+{
+}
+
+/*****************************************************************************/
+
+CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler)
+{
+#if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+    negBitmaskFlt  = nullptr;
+    negBitmaskDbl  = nullptr;
+    absBitmaskFlt  = nullptr;
+    absBitmaskDbl  = nullptr;
+    u8ToDblBitmask = nullptr;
+#endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+
+    regTracker.rsTrackInit(compiler, &regSet);
+    gcInfo.regSet        = &regSet;
+    m_cgEmitter          = new (compiler->getAllocator()) emitter();
+    m_cgEmitter->codeGen = this;
+    m_cgEmitter->gcInfo  = &gcInfo;
+
+#ifdef DEBUG
+    setVerbose(compiler->verbose);
+#endif // DEBUG
+
+    compiler->tmpInit();
+
+#ifdef DEBUG
+#if defined(_TARGET_X86_) && defined(LEGACY_BACKEND)
+    // This appears to be x86-specific. It's attempting to make sure all offsets to temps
+    // are large. For ARM, this doesn't interact well with our decision about whether to use
+    // R10 or not as a reserved register.
+    if (regSet.rsStressRegs())
+        compiler->tmpIntSpillMax = (SCHAR_MAX / sizeof(int));
+#endif // defined(_TARGET_X86_) && defined(LEGACY_BACKEND)
+#endif // DEBUG
+
+    instInit();
+
+#ifdef LEGACY_BACKEND
+    // TODO-Cleanup: These used to be set in rsInit() - should they be moved to RegSet??
+    // They are also accessed by the register allocators and fgMorphLclVar().
+    intRegState.rsCurRegArgNum   = 0;
+    floatRegState.rsCurRegArgNum = 0;
+#endif // LEGACY_BACKEND
+
+#ifdef LATE_DISASM
+    getDisAssembler().disInit(compiler);
+#endif
+
+#ifdef DEBUG
+    genTempLiveChg        = true;
+    genTrnslLocalVarCount = 0;
+
+    // Shouldn't be used before it is set in genFnProlog()
+    compiler->compCalleeRegsPushed = UninitializedWord<unsigned>();
+
+#if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+    // Shouldn't be used before it is set in genFnProlog()
+    compiler->compCalleeFPRegsSavedMask = (regMaskTP)-1;
+#endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+#endif // DEBUG
+
+#ifdef _TARGET_AMD64_
+    // This will be set before final frame layout.
+    compiler->compVSQuirkStackPaddingNeeded = 0;
+
+    // Set to true if we perform the Quirk that fixes the PPP issue
+    compiler->compQuirkForPPPflag = false;
+#endif // _TARGET_AMD64_
+
+#ifdef LEGACY_BACKEND
+    genFlagsEqualToNone();
+#endif // LEGACY_BACKEND
+
+#ifdef DEBUGGING_SUPPORT
+    //  Initialize the IP-mapping logic.
+    compiler->genIPmappingList        = nullptr;
+    compiler->genIPmappingLast        = nullptr;
+    compiler->genCallSite2ILOffsetMap = nullptr;
+#endif
+
+    /* Assume that we not fully interruptible */
+
+    genInterruptible = false;
+#ifdef DEBUG
+    genInterruptibleUsed = false;
+    genCurDispOffset     = (unsigned)-1;
+#endif
+}
+
+void CodeGenInterface::genMarkTreeInReg(GenTreePtr tree, regNumber reg)
+{
+    tree->gtRegNum = reg;
+    tree->gtFlags |= GTF_REG_VAL;
+}
+
+#if CPU_LONG_USES_REGPAIR
+void CodeGenInterface::genMarkTreeInRegPair(GenTreePtr tree, regPairNo regPair)
+{
+    tree->gtRegPair = regPair;
+    tree->gtFlags |= GTF_REG_VAL;
+}
+#endif
+
+#if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
+
+//---------------------------------------------------------------------
+// genTotalFrameSize - return the "total" size of the stack frame, including local size
+// and callee-saved register size. There are a few things "missing" depending on the
+// platform. The function genCallerSPtoInitialSPdelta() includes those things.
+//
+// For ARM, this doesn't include the prespilled registers.
+//
+// For x86, this doesn't include the frame pointer if codeGen->isFramePointerUsed() is true.
+// It also doesn't include the pushed return address.
+//
+// Return value:
+//    Frame size
+
+int CodeGenInterface::genTotalFrameSize()
+{
+    assert(!IsUninitialized(compiler->compCalleeRegsPushed));
+
+    int totalFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize;
+
+    assert(totalFrameSize >= 0);
+    return totalFrameSize;
+}
+
+//---------------------------------------------------------------------
+// genSPtoFPdelta - return the offset from SP to the frame pointer.
+// This number is going to be positive, since SP must be at the lowest
+// address.
+//
+// There must be a frame pointer to call this function!
+
+int CodeGenInterface::genSPtoFPdelta()
+{
+    assert(isFramePointerUsed());
+
+    int delta;
+
+    delta = -genCallerSPtoInitialSPdelta() + genCallerSPtoFPdelta();
+
+    assert(delta >= 0);
+    return delta;
+}
+
+//---------------------------------------------------------------------
+// genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer.
+// This number is going to be negative, since the Caller-SP is at a higher
+// address than the frame pointer.
+//
+// There must be a frame pointer to call this function!
+
+int CodeGenInterface::genCallerSPtoFPdelta()
+{
+    assert(isFramePointerUsed());
+    int callerSPtoFPdelta = 0;
+
+#if defined(_TARGET_ARM_)
+    // On ARM, we first push the prespill registers, then store LR, then R11 (FP), and point R11 at the saved R11.
+    callerSPtoFPdelta -= genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
+    callerSPtoFPdelta -= 2 * REGSIZE_BYTES;
+#elif defined(_TARGET_X86_)
+    // Thanks to ebp chaining, the difference between ebp-based addresses
+    // and caller-SP-relative addresses is just the 2 pointers:
+    //     return address
+    //     pushed ebp
+    callerSPtoFPdelta -= 2 * REGSIZE_BYTES;
+#else
+#error "Unknown _TARGET_"
+#endif // _TARGET_*
+
+    assert(callerSPtoFPdelta <= 0);
+    return callerSPtoFPdelta;
+}
+
+//---------------------------------------------------------------------
+// genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP.
+//
+// This number will be negative.
+
+int CodeGenInterface::genCallerSPtoInitialSPdelta()
+{
+    int callerSPtoSPdelta = 0;
+
+#if defined(_TARGET_ARM_)
+    callerSPtoSPdelta -= genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
+    callerSPtoSPdelta -= genTotalFrameSize();
+#elif defined(_TARGET_X86_)
+    callerSPtoSPdelta -= genTotalFrameSize();
+    callerSPtoSPdelta -= REGSIZE_BYTES; // caller-pushed return address
+
+    // compCalleeRegsPushed does not account for the frame pointer
+    // TODO-Cleanup: shouldn't this be part of genTotalFrameSize?
+    if (isFramePointerUsed())
+    {
+        callerSPtoSPdelta -= REGSIZE_BYTES;
+    }
+#else
+#error "Unknown _TARGET_"
+#endif // _TARGET_*
+
+    assert(callerSPtoSPdelta <= 0);
+    return callerSPtoSPdelta;
+}
+
+#endif // defined(_TARGET_X86_) || defined(_TARGET_ARM_)
+
+/*****************************************************************************
+ * Should we round simple operations (assignments, arithmetic operations, etc.)
+ */
+
+// inline
+// static
+bool CodeGen::genShouldRoundFP()
+{
+    RoundLevel roundLevel = getRoundFloatLevel();
+
+    switch (roundLevel)
+    {
+        case ROUND_NEVER:
+        case ROUND_CMP_CONST:
+        case ROUND_CMP:
+            return false;
+
+        default:
+            assert(roundLevel == ROUND_ALWAYS);
+            return true;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Initialize some global variables.
+ */
+
+void CodeGen::genPrepForCompiler()
+{
+    unsigned   varNum;
+    LclVarDsc* varDsc;
+
+    /* Figure out which non-register variables hold pointers */
+
+    VarSetOps::AssignNoCopy(compiler, gcInfo.gcTrkStkPtrLcls, VarSetOps::MakeEmpty(compiler));
+
+    // Figure out which variables live in registers.
+    // Also, initialize gcTrkStkPtrLcls to include all tracked variables that do not fully live
+    // in a register (i.e. they live on the stack for all or part of their lifetime).
+    // Note that lvRegister indicates that a lclVar is in a register for its entire lifetime.
+
+    VarSetOps::AssignNoCopy(compiler, compiler->raRegVarsMask, VarSetOps::MakeEmpty(compiler));
+
+    for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+    {
+        if (varDsc->lvTracked
+#ifndef LEGACY_BACKEND
+            || varDsc->lvIsRegCandidate()
+#endif // !LEGACY_BACKEND
+                )
+        {
+            if (varDsc->lvRegister
+#if FEATURE_STACK_FP_X87
+                && !varDsc->IsFloatRegType()
+#endif
+                    )
+            {
+                VarSetOps::AddElemD(compiler, compiler->raRegVarsMask, varDsc->lvVarIndex);
+            }
+            else if (compiler->lvaIsGCTracked(varDsc) && (!varDsc->lvIsParam || varDsc->lvIsRegArg))
+            {
+                VarSetOps::AddElemD(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex);
+            }
+        }
+    }
+    VarSetOps::AssignNoCopy(compiler, genLastLiveSet, VarSetOps::MakeEmpty(compiler));
+    genLastLiveMask = RBM_NONE;
+#ifdef DEBUG
+    compiler->fgBBcountAtCodegen = compiler->fgBBcount;
+#endif
+}
+
+/*****************************************************************************
+ *  To report exception handling information to the VM, we need the size of the exception
+ *  handling regions. To compute that, we need to emit labels for the beginning block of
+ *  an EH region, and the block that immediately follows a region. Go through the EH
+ *  table and mark all these blocks with BBF_HAS_LABEL to make this happen.
+ *
+ *  The beginning blocks of the EH regions already should have this flag set.
+ *
+ *  No blocks should be added or removed after this.
+ *
+ *  This code is closely couple with genReportEH() in the sense that any block
+ *  that this procedure has determined it needs to have a label has to be selected
+ *  using the same logic both here and in genReportEH(), so basically any time there is
+ *  a change in the way we handle EH reporting, we have to keep the logic of these two
+ *  methods 'in sync'.
+ */
+
+void CodeGen::genPrepForEHCodegen()
+{
+    assert(!compiler->fgSafeBasicBlockCreation);
+
+    EHblkDsc* HBtab;
+    EHblkDsc* HBtabEnd;
+
+    bool anyFinallys = false;
+
+    for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
+         HBtab < HBtabEnd; HBtab++)
+    {
+        assert(HBtab->ebdTryBeg->bbFlags & BBF_HAS_LABEL);
+        assert(HBtab->ebdHndBeg->bbFlags & BBF_HAS_LABEL);
+
+        if (HBtab->ebdTryLast->bbNext != nullptr)
+        {
+            HBtab->ebdTryLast->bbNext->bbFlags |= BBF_HAS_LABEL;
+        }
+
+        if (HBtab->ebdHndLast->bbNext != nullptr)
+        {
+            HBtab->ebdHndLast->bbNext->bbFlags |= BBF_HAS_LABEL;
+        }
+
+        if (HBtab->HasFilter())
+        {
+            assert(HBtab->ebdFilter->bbFlags & BBF_HAS_LABEL);
+            // The block after the last block of the filter is
+            // the handler begin block, which we already asserted
+            // has BBF_HAS_LABEL set.
+        }
+
+#ifdef _TARGET_AMD64_
+        if (HBtab->HasFinallyHandler())
+        {
+            anyFinallys = true;
+        }
+#endif // _TARGET_AMD64_
+    }
+
+#ifdef _TARGET_AMD64_
+    if (anyFinallys)
+    {
+        for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
+        {
+            if (block->bbJumpKind == BBJ_CALLFINALLY)
+            {
+                BasicBlock* bbToLabel = block->bbNext;
+                if (block->isBBCallAlwaysPair())
+                {
+                    bbToLabel = bbToLabel->bbNext; // skip the BBJ_ALWAYS
+                }
+                if (bbToLabel != nullptr)
+                {
+                    bbToLabel->bbFlags |= BBF_HAS_LABEL;
+                }
+            } // block is BBJ_CALLFINALLY
+        }     // for each block
+    }         // if (anyFinallys)
+#endif        // _TARGET_AMD64_
+}
+
+void CodeGenInterface::genUpdateLife(GenTreePtr tree)
+{
+    compiler->compUpdateLife</*ForCodeGen*/ true>(tree);
+}
+
+void CodeGenInterface::genUpdateLife(VARSET_VALARG_TP newLife)
+{
+    compiler->compUpdateLife</*ForCodeGen*/ true>(newLife);
+}
+
+#ifdef LEGACY_BACKEND
+// Returns the liveSet after tree has executed.
+// "tree" MUST occur in the current statement, AFTER the most recent
+// update of compiler->compCurLifeTree and compiler->compCurLife.
+//
+VARSET_VALRET_TP CodeGen::genUpdateLiveSetForward(GenTreePtr tree)
+{
+    VARSET_TP  VARSET_INIT(compiler, startLiveSet, compiler->compCurLife);
+    GenTreePtr startNode;
+    assert(tree != compiler->compCurLifeTree);
+    if (compiler->compCurLifeTree == nullptr)
+    {
+        assert(compiler->compCurStmt != nullptr);
+        startNode = compiler->compCurStmt->gtStmt.gtStmtList;
+    }
+    else
+    {
+        startNode = compiler->compCurLifeTree->gtNext;
+    }
+    return compiler->fgUpdateLiveSet(startLiveSet, startNode, tree);
+}
+
+// Determine the registers that are live after "second" has been evaluated,
+// but which are not live after "first".
+// PRECONDITIONS:
+// 1. "first" must occur after compiler->compCurLifeTree in execution order for the current statement
+// 2. "second" must occur after "first" in the current statement
+//
+regMaskTP CodeGen::genNewLiveRegMask(GenTreePtr first, GenTreePtr second)
+{
+    // First, compute the liveset after "first"
+    VARSET_TP firstLiveSet = genUpdateLiveSetForward(first);
+    // Now, update the set forward from "first" to "second"
+    VARSET_TP secondLiveSet = compiler->fgUpdateLiveSet(firstLiveSet, first->gtNext, second);
+    regMaskTP newLiveMask   = genLiveMask(VarSetOps::Diff(compiler, secondLiveSet, firstLiveSet));
+    return newLiveMask;
+}
+#endif
+
+// Return the register mask for the given register variable
+// inline
+regMaskTP CodeGenInterface::genGetRegMask(const LclVarDsc* varDsc)
+{
+    regMaskTP regMask = RBM_NONE;
+
+    assert(varDsc->lvIsInReg());
+
+    if (varTypeIsFloating(varDsc->TypeGet()))
+    {
+        regMask = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
+    }
+    else
+    {
+        regMask = genRegMask(varDsc->lvRegNum);
+        if (isRegPairType(varDsc->lvType))
+        {
+            regMask |= genRegMask(varDsc->lvOtherReg);
+        }
+    }
+    return regMask;
+}
+
+// Return the register mask for the given lclVar or regVar tree node
+// inline
+regMaskTP CodeGenInterface::genGetRegMask(GenTreePtr tree)
+{
+    assert(tree->gtOper == GT_LCL_VAR || tree->gtOper == GT_REG_VAR);
+
+    regMaskTP        regMask = RBM_NONE;
+    const LclVarDsc* varDsc  = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
+    if (varDsc->lvPromoted)
+    {
+        for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
+        {
+            noway_assert(compiler->lvaTable[i].lvIsStructField);
+            if (compiler->lvaTable[i].lvIsInReg())
+            {
+                regMask |= genGetRegMask(&compiler->lvaTable[i]);
+            }
+        }
+    }
+    else if (varDsc->lvIsInReg())
+    {
+        regMask = genGetRegMask(varDsc);
+    }
+    return regMask;
+}
+
+//------------------------------------------------------------------------
+// getRegistersFromMask: Given a register mask return the two registers
+//                       specified by the mask.
+//
+// Arguments:
+//    regPairMask:  a register mask that has exactly two bits set
+// Return values:
+//    pLoReg:       the address of where to write the first register
+//    pHiReg:       the address of where to write the second register
+//
+void CodeGenInterface::genGetRegPairFromMask(regMaskTP regPairMask, regNumber* pLoReg, regNumber* pHiReg)
+{
+    assert(genCountBits(regPairMask) == 2);
+
+    regMaskTP loMask = genFindLowestBit(regPairMask); // set loMask to a one-bit mask
+    regMaskTP hiMask = regPairMask - loMask;          // set hiMask to the other bit that was in tmpRegMask
+
+    regNumber loReg = genRegNumFromMask(loMask); // set loReg from loMask
+    regNumber hiReg = genRegNumFromMask(hiMask); // set hiReg from hiMask
+
+    *pLoReg = loReg;
+    *pHiReg = hiReg;
+}
+
+// The given lclVar is either going live (being born) or dying.
+// It might be both going live and dying (that is, it is a dead store) under MinOpts.
+// Update regSet.rsMaskVars accordingly.
+// inline
+void CodeGenInterface::genUpdateRegLife(const LclVarDsc* varDsc, bool isBorn, bool isDying DEBUGARG(GenTreePtr tree))
+{
+#if FEATURE_STACK_FP_X87
+    // The stack fp reg vars are handled elsewhere
+    if (varTypeIsFloating(varDsc->TypeGet()))
+        return;
+#endif
+
+    regMaskTP regMask = genGetRegMask(varDsc);
+
+#ifdef DEBUG
+    if (compiler->verbose)
+    {
+        printf("\t\t\t\t\t\t\tV%02u in reg ", (varDsc - compiler->lvaTable));
+        varDsc->PrintVarReg();
+        printf(" is becoming %s  ", (isDying) ? "dead" : "live");
+        Compiler::printTreeID(tree);
+        printf("\n");
+    }
+#endif // DEBUG
+
+    if (isDying)
+    {
+        // We'd like to be able to assert the following, however if we are walking
+        // through a qmark/colon tree, we may encounter multiple last-use nodes.
+        // assert((regSet.rsMaskVars & regMask) == regMask);
+        regSet.RemoveMaskVars(regMask);
+    }
+    else
+    {
+        assert((regSet.rsMaskVars & regMask) == 0);
+        regSet.AddMaskVars(regMask);
+    }
+}
+
+// Gets a register mask that represent the kill set for a helper call since
+// not all JIT Helper calls follow the standard ABI on the target architecture.
+//
+// TODO-CQ: Currently this list is incomplete (not all helpers calls are
+//          enumerated) and not 100% accurate (some killsets are bigger than
+//          what they really are).
+//          There's some work to be done in several places in the JIT to
+//          accurately track the registers that are getting killed by
+//          helper calls:
+//              a) LSRA needs several changes to accomodate more precise killsets
+//                 for every helper call it sees (both explicitly [easy] and
+//                 implicitly [hard])
+//              b) Currently for AMD64, when we generate code for a helper call
+//                 we're independently over-pessimizing the killsets of the call
+//                 (independently from LSRA) and this needs changes
+//                 both in CodeGenAmd64.cpp and emitx86.cpp.
+//
+//                 The best solution for this problem would be to try to centralize
+//                 the killset information in a single place but then make the
+//                 corresponding changes so every code generation phase is in sync
+//                 about this.
+//
+//         The interim solution is to only add known helper calls that don't
+//         follow the AMD64 ABI and actually trash registers that are supposed to be non-volatile.
+regMaskTP Compiler::compHelperCallKillSet(CorInfoHelpFunc helper)
+{
+    switch (helper)
+    {
+        case CORINFO_HELP_ASSIGN_BYREF:
+#if defined(_TARGET_AMD64_)
+            return RBM_RSI | RBM_RDI | RBM_CALLEE_TRASH;
+#elif defined(_TARGET_ARM64_)
+            return RBM_CALLEE_TRASH_NOGC;
+#else
+            NYI("Model kill set for CORINFO_HELP_ASSIGN_BYREF on target arch");
+            return RBM_CALLEE_TRASH;
+#endif
+
+        case CORINFO_HELP_PROF_FCN_ENTER:
+#ifdef _TARGET_AMD64_
+            return RBM_PROFILER_ENTER_TRASH;
+#else
+            unreached();
+#endif
+        case CORINFO_HELP_PROF_FCN_LEAVE:
+        case CORINFO_HELP_PROF_FCN_TAILCALL:
+#ifdef _TARGET_AMD64_
+            return RBM_PROFILER_LEAVE_TRASH;
+#else
+            unreached();
+#endif
+
+        case CORINFO_HELP_STOP_FOR_GC:
+            return RBM_STOP_FOR_GC_TRASH;
+
+        case CORINFO_HELP_INIT_PINVOKE_FRAME:
+            return RBM_INIT_PINVOKE_FRAME_TRASH;
+
+        default:
+            return RBM_CALLEE_TRASH;
+    }
+}
+
+//
+// Gets a register mask that represents the kill set for "NO GC" helper calls since
+// not all JIT Helper calls follow the standard ABI on the target architecture.
+//
+// Note: This list may not be complete and defaults to the default NOGC registers.
+//
+regMaskTP Compiler::compNoGCHelperCallKillSet(CorInfoHelpFunc helper)
+{
+    assert(emitter::emitNoGChelper(helper));
+#ifdef _TARGET_AMD64_
+    switch (helper)
+    {
+        case CORINFO_HELP_PROF_FCN_ENTER:
+            return RBM_PROFILER_ENTER_TRASH;
+
+        case CORINFO_HELP_PROF_FCN_LEAVE:
+        case CORINFO_HELP_PROF_FCN_TAILCALL:
+            return RBM_PROFILER_LEAVE_TRASH;
+
+        case CORINFO_HELP_ASSIGN_BYREF:
+            // this helper doesn't trash RSI and RDI
+            return RBM_CALLEE_TRASH_NOGC & ~(RBM_RSI | RBM_RDI);
+
+        default:
+            return RBM_CALLEE_TRASH_NOGC;
+    }
+#else
+    return RBM_CALLEE_TRASH_NOGC;
+#endif
+}
+
+// Update liveness (always var liveness, i.e., compCurLife, and also, if "ForCodeGen" is true, reg liveness, i.e.,
+// regSet.rsMaskVars as well)
+// if the given lclVar (or indir(addr(local)))/regVar node is going live (being born) or dying.
+template <bool ForCodeGen>
+void Compiler::compUpdateLifeVar(GenTreePtr tree, VARSET_TP* pLastUseVars)
+{
+    GenTreePtr indirAddrLocal = fgIsIndirOfAddrOfLocal(tree);
+    assert(tree->OperIsNonPhiLocal() || indirAddrLocal != nullptr);
+
+    // Get the local var tree -- if "tree" is "Ldobj(addr(x))", or "ind(addr(x))" this is "x", else it's "tree".
+    GenTreePtr lclVarTree = indirAddrLocal;
+    if (lclVarTree == nullptr)
+    {
+        lclVarTree = tree;
+    }
+    unsigned int lclNum = lclVarTree->gtLclVarCommon.gtLclNum;
+    LclVarDsc*   varDsc = lvaTable + lclNum;
+
+#ifdef DEBUG
+#if !defined(_TARGET_AMD64_)
+    // There are no addr nodes on ARM and we are experimenting with encountering vars in 'random' order.
+    // Struct fields are not traversed in a consistent order, so ignore them when
+    // verifying that we see the var nodes in execution order
+    if (ForCodeGen)
+    {
+        if (tree->OperIsIndir())
+        {
+            assert(indirAddrLocal != NULL);
+        }
+        else if (tree->gtNext != NULL && tree->gtNext->gtOper == GT_ADDR &&
+                 ((tree->gtNext->gtNext == NULL || !tree->gtNext->gtNext->OperIsIndir())))
+        {
+            assert(tree->IsLocal()); // Can only take the address of a local.
+            // The ADDR might occur in a context where the address it contributes is eventually
+            // dereferenced, so we can't say that this is not a use or def.
+        }
+#if 0   
+        // TODO-ARM64-Bug?: These asserts don't seem right for ARM64: I don't understand why we have to assert 
+        // two consecutive lclvars (in execution order) can only be observed if the first one is a struct field.
+        // It seems to me this is code only applicable to the legacy JIT and not RyuJIT (and therefore why it was 
+        // ifdef'ed out for AMD64).
+        else if (!varDsc->lvIsStructField)
+        {
+            GenTreePtr prevTree;
+            for (prevTree = tree->gtPrev;
+                 prevTree != NULL && prevTree != compCurLifeTree;
+                 prevTree = prevTree->gtPrev)
+            {
+                if ((prevTree->gtOper == GT_LCL_VAR) || (prevTree->gtOper == GT_REG_VAR))
+                {
+                    LclVarDsc * prevVarDsc = lvaTable + prevTree->gtLclVarCommon.gtLclNum;
+
+                    // These are the only things for which this method MUST be called
+                    assert(prevVarDsc->lvIsStructField);
+                }
+            }
+            assert(prevTree == compCurLifeTree);
+        }
+#endif // 0
+    }
+#endif // !_TARGET_AMD64_
+#endif // DEBUG
+
+    compCurLifeTree = tree;
+    VARSET_TP VARSET_INIT(this, newLife, compCurLife);
+
+    // By codegen, a struct may not be TYP_STRUCT, so we have to
+    // check lvPromoted, for the case where the fields are being
+    // tracked.
+    if (!varDsc->lvTracked && !varDsc->lvPromoted)
+    {
+        return;
+    }
+
+    bool isBorn = ((tree->gtFlags & GTF_VAR_DEF) != 0 && (tree->gtFlags & GTF_VAR_USEASG) == 0); // if it's "x <op>=
+                                                                                                 // ..." then variable
+                                                                                                 // "x" must have had a
+                                                                                                 // previous, original,
+                                                                                                 // site to be born.
+    bool isDying = ((tree->gtFlags & GTF_VAR_DEATH) != 0);
+#ifndef LEGACY_BACKEND
+    bool spill = ((tree->gtFlags & GTF_SPILL) != 0);
+#endif // !LEGACY_BACKEND
+
+#ifndef LEGACY_BACKEND
+    // For RyuJIT backend, since all tracked vars are register candidates, but not all are in registers at all times,
+    // we maintain two separate sets of variables - the total set of variables that are either
+    // born or dying here, and the subset of those that are on the stack
+    VARSET_TP VARSET_INIT_NOCOPY(stackVarDeltaSet, VarSetOps::MakeEmpty(this));
+#endif // !LEGACY_BACKEND
+
+    if (isBorn || isDying)
+    {
+        bool hasDeadTrackedFieldVars = false; // If this is true, then, for a LDOBJ(ADDR(<promoted struct local>)),
+        VARSET_TP* deadTrackedFieldVars =
+            nullptr; // *deadTrackedFieldVars indicates which tracked field vars are dying.
+        VARSET_TP VARSET_INIT_NOCOPY(varDeltaSet, VarSetOps::MakeEmpty(this));
+
+        if (varDsc->lvTracked)
+        {
+            VarSetOps::AddElemD(this, varDeltaSet, varDsc->lvVarIndex);
+            if (ForCodeGen)
+            {
+#ifndef LEGACY_BACKEND
+                if (isBorn && varDsc->lvIsRegCandidate() && tree->gtHasReg())
+                {
+                    codeGen->genUpdateVarReg(varDsc, tree);
+                }
+#endif // !LEGACY_BACKEND
+                if (varDsc->lvIsInReg()
+#ifndef LEGACY_BACKEND
+                    && tree->gtRegNum != REG_NA
+#endif // !LEGACY_BACKEND
+                    )
+                {
+                    codeGen->genUpdateRegLife(varDsc, isBorn, isDying DEBUGARG(tree));
+                }
+#ifndef LEGACY_BACKEND
+                else
+                {
+                    VarSetOps::AddElemD(this, stackVarDeltaSet, varDsc->lvVarIndex);
+                }
+#endif // !LEGACY_BACKEND
+            }
+        }
+        else if (varDsc->lvPromoted)
+        {
+            if (indirAddrLocal != nullptr && isDying)
+            {
+                assert(!isBorn); // GTF_VAR_DEATH only set for LDOBJ last use.
+                hasDeadTrackedFieldVars = GetPromotedStructDeathVars()->Lookup(indirAddrLocal, &deadTrackedFieldVars);
+                if (hasDeadTrackedFieldVars)
+                {
+                    VarSetOps::Assign(this, varDeltaSet, *deadTrackedFieldVars);
+                }
+            }
+
+            for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
+            {
+                LclVarDsc* fldVarDsc = &(lvaTable[i]);
+                noway_assert(fldVarDsc->lvIsStructField);
+                if (fldVarDsc->lvTracked)
+                {
+                    unsigned fldVarIndex = fldVarDsc->lvVarIndex;
+                    noway_assert(fldVarIndex < lvaTrackedCount);
+                    if (!hasDeadTrackedFieldVars)
+                    {
+                        VarSetOps::AddElemD(this, varDeltaSet, fldVarIndex);
+                        if (ForCodeGen)
+                        {
+                            // We repeat this call here and below to avoid the VarSetOps::IsMember
+                            // test in this, the common case, where we have no deadTrackedFieldVars.
+                            if (fldVarDsc->lvIsInReg())
+                            {
+#ifndef LEGACY_BACKEND
+                                if (isBorn)
+                                {
+                                    codeGen->genUpdateVarReg(fldVarDsc, tree);
+                                }
+#endif // !LEGACY_BACKEND
+                                codeGen->genUpdateRegLife(fldVarDsc, isBorn, isDying DEBUGARG(tree));
+                            }
+#ifndef LEGACY_BACKEND
+                            else
+                            {
+                                VarSetOps::AddElemD(this, stackVarDeltaSet, fldVarIndex);
+                            }
+#endif // !LEGACY_BACKEND
+                        }
+                    }
+                    else if (ForCodeGen && VarSetOps::IsMember(this, varDeltaSet, fldVarIndex))
+                    {
+                        if (lvaTable[i].lvIsInReg())
+                        {
+#ifndef LEGACY_BACKEND
+                            if (isBorn)
+                            {
+                                codeGen->genUpdateVarReg(fldVarDsc, tree);
+                            }
+#endif // !LEGACY_BACKEND
+                            codeGen->genUpdateRegLife(fldVarDsc, isBorn, isDying DEBUGARG(tree));
+                        }
+#ifndef LEGACY_BACKEND
+                        else
+                        {
+                            VarSetOps::AddElemD(this, stackVarDeltaSet, fldVarIndex);
+                        }
+#endif // !LEGACY_BACKEND
+                    }
+                }
+            }
+        }
+
+        // First, update the live set
+        if (isDying)
+        {
+            // We'd like to be able to assert the following, however if we are walking
+            // through a qmark/colon tree, we may encounter multiple last-use nodes.
+            // assert (VarSetOps::IsSubset(compiler, regVarDeltaSet, newLife));
+            VarSetOps::DiffD(this, newLife, varDeltaSet);
+            if (pLastUseVars != nullptr)
+            {
+                VarSetOps::Assign(this, *pLastUseVars, varDeltaSet);
+            }
+        }
+        else
+        {
+            // This shouldn't be in newLife, unless this is debug code, in which
+            // case we keep vars live everywhere, OR the variable is address-exposed,
+            // OR this block is part of a try block, in which case it may be live at the handler
+            // Could add a check that, if it's in newLife, that it's also in
+            // fgGetHandlerLiveVars(compCurBB), but seems excessive
+            //
+            // For a dead store, it can be the case that we set both isBorn and isDying to true.
+            // (We don't eliminate dead stores under MinOpts, so we can't assume they're always
+            // eliminated.)  If it's both, we handled it above.
+            VarSetOps::UnionD(this, newLife, varDeltaSet);
+        }
+    }
+
+    if (!VarSetOps::Equal(this, compCurLife, newLife))
+    {
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\t\t\t\t\t\t\tLive vars: ");
+            dumpConvertedVarSet(this, compCurLife);
+            printf(" => ");
+            dumpConvertedVarSet(this, newLife);
+            printf("\n");
+        }
+#endif // DEBUG
+
+        VarSetOps::Assign(this, compCurLife, newLife);
+
+        if (ForCodeGen)
+        {
+#ifndef LEGACY_BACKEND
+
+            // Only add vars to the gcInfo.gcVarPtrSetCur if they are currently on stack, since the
+            // gcInfo.gcTrkStkPtrLcls
+            // includes all TRACKED vars that EVER live on the stack (i.e. are not always in a register).
+            VARSET_TP VARSET_INIT_NOCOPY(gcTrkStkDeltaSet,
+                                         VarSetOps::Intersection(this, codeGen->gcInfo.gcTrkStkPtrLcls,
+                                                                 stackVarDeltaSet));
+            if (!VarSetOps::IsEmpty(this, gcTrkStkDeltaSet))
+            {
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("\t\t\t\t\t\t\tGCvars: ");
+                    dumpConvertedVarSet(this, codeGen->gcInfo.gcVarPtrSetCur);
+                    printf(" => ");
+                }
+#endif // DEBUG
+
+                if (isBorn)
+                {
+                    VarSetOps::UnionD(this, codeGen->gcInfo.gcVarPtrSetCur, gcTrkStkDeltaSet);
+                }
+                else
+                {
+                    VarSetOps::DiffD(this, codeGen->gcInfo.gcVarPtrSetCur, gcTrkStkDeltaSet);
+                }
+
+#ifdef DEBUG
+                if (verbose)
+                {
+                    dumpConvertedVarSet(this, codeGen->gcInfo.gcVarPtrSetCur);
+                    printf("\n");
+                }
+#endif // DEBUG
+            }
+
+#else // LEGACY_BACKEND
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                VARSET_TP VARSET_INIT_NOCOPY(gcVarPtrSetNew,
+                                             VarSetOps::Intersection(this, newLife, codeGen->gcInfo.gcTrkStkPtrLcls));
+                if (!VarSetOps::Equal(this, codeGen->gcInfo.gcVarPtrSetCur, gcVarPtrSetNew))
+                {
+                    printf("\t\t\t\t\t\t\tGCvars: ");
+                    dumpConvertedVarSet(this, codeGen->gcInfo.gcVarPtrSetCur);
+                    printf(" => ");
+                    dumpConvertedVarSet(this, gcVarPtrSetNew);
+                    printf("\n");
+                }
+            }
+#endif // DEBUG
+
+            VarSetOps::AssignNoCopy(this, codeGen->gcInfo.gcVarPtrSetCur,
+                                    VarSetOps::Intersection(this, newLife, codeGen->gcInfo.gcTrkStkPtrLcls));
+
+#endif // LEGACY_BACKEND
+
+#ifdef DEBUGGING_SUPPORT
+            codeGen->siUpdate();
+#endif
+        }
+    }
+
+#ifndef LEGACY_BACKEND
+    if (ForCodeGen && spill)
+    {
+        assert(!varDsc->lvPromoted);
+        codeGen->genSpillVar(tree);
+        if (VarSetOps::IsMember(this, codeGen->gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex))
+        {
+            if (!VarSetOps::IsMember(this, codeGen->gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
+            {
+                VarSetOps::AddElemD(this, codeGen->gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("\t\t\t\t\t\t\tVar V%02u becoming live\n", varDsc - lvaTable);
+                }
+#endif // DEBUG
+            }
+        }
+    }
+#endif // !LEGACY_BACKEND
+}
+
+// Need an explicit instantiation.
+template void Compiler::compUpdateLifeVar<false>(GenTreePtr tree, VARSET_TP* pLastUseVars);
+
+template <bool ForCodeGen>
+void Compiler::compChangeLife(VARSET_VALARG_TP newLife DEBUGARG(GenTreePtr tree))
+{
+    LclVarDsc* varDsc;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        if (tree != nullptr)
+        {
+            Compiler::printTreeID(tree);
+        }
+        printf("Change life %s ", VarSetOps::ToString(this, compCurLife));
+        dumpConvertedVarSet(this, compCurLife);
+        printf(" -> %s ", VarSetOps::ToString(this, newLife));
+        dumpConvertedVarSet(this, newLife);
+        printf("\n");
+    }
+#endif // DEBUG
+
+    /* We should only be called when the live set has actually changed */
+
+    noway_assert(!VarSetOps::Equal(this, compCurLife, newLife));
+
+    if (!ForCodeGen)
+    {
+        VarSetOps::Assign(this, compCurLife, newLife);
+        return;
+    }
+
+    /* Figure out which variables are becoming live/dead at this point */
+
+    // deadSet = compCurLife - newLife
+    VARSET_TP VARSET_INIT(this, deadSet, compCurLife);
+    VarSetOps::DiffD(this, deadSet, newLife);
+
+    // bornSet = newLife - compCurLife
+    VARSET_TP VARSET_INIT(this, bornSet, newLife);
+    VarSetOps::DiffD(this, bornSet, compCurLife);
+
+    /* Can't simultaneously become live and dead at the same time */
+
+    // (deadSet UNION bornSet) != EMPTY
+    noway_assert(!VarSetOps::IsEmpty(this, VarSetOps::Union(this, deadSet, bornSet)));
+    // (deadSet INTERSECTION bornSet) == EMPTY
+    noway_assert(VarSetOps::IsEmpty(this, VarSetOps::Intersection(this, deadSet, bornSet)));
+
+#ifdef LEGACY_BACKEND
+    // In the LEGACY_BACKEND case, we only consider variables that are fully enregisterd
+    // and there may be none.
+    VarSetOps::IntersectionD(this, deadSet, raRegVarsMask);
+    VarSetOps::IntersectionD(this, bornSet, raRegVarsMask);
+    // And all gcTrkStkPtrLcls that are now live will be on the stack
+    VarSetOps::AssignNoCopy(this, codeGen->gcInfo.gcVarPtrSetCur,
+                            VarSetOps::Intersection(this, newLife, codeGen->gcInfo.gcTrkStkPtrLcls));
+#endif // LEGACY_BACKEND
+
+    VarSetOps::Assign(this, compCurLife, newLife);
+
+    // Handle the dying vars first, then the newly live vars.
+    // This is because, in the RyuJIT backend case, they may occupy registers that
+    // will be occupied by another var that is newly live.
+    VARSET_ITER_INIT(this, deadIter, deadSet, deadVarIndex);
+    while (deadIter.NextElem(this, &deadVarIndex))
+    {
+        unsigned varNum = lvaTrackedToVarNum[deadVarIndex];
+        varDsc          = lvaTable + varNum;
+        bool isGCRef    = (varDsc->TypeGet() == TYP_REF);
+        bool isByRef    = (varDsc->TypeGet() == TYP_BYREF);
+
+        if (varDsc->lvIsInReg())
+        {
+            // TODO-Cleanup: Move the code from compUpdateLifeVar to genUpdateRegLife that updates the
+            // gc sets
+            regMaskTP regMask = varDsc->lvRegMask();
+            if (isGCRef)
+            {
+                codeGen->gcInfo.gcRegGCrefSetCur &= ~regMask;
+            }
+            else if (isByRef)
+            {
+                codeGen->gcInfo.gcRegByrefSetCur &= ~regMask;
+            }
+            codeGen->genUpdateRegLife(varDsc, false /*isBorn*/, true /*isDying*/ DEBUGARG(tree));
+        }
+#ifndef LEGACY_BACKEND
+        // This isn't in a register, so update the gcVarPtrSetCur.
+        // (Note that in the LEGACY_BACKEND case gcVarPtrSetCur is updated above unconditionally
+        // for all gcTrkStkPtrLcls in newLife, because none of them ever live in a register.)
+        else if (isGCRef || isByRef)
+        {
+            VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, deadVarIndex);
+            JITDUMP("\t\t\t\t\t\t\tV%02u becoming dead\n", varNum);
+        }
+#endif // !LEGACY_BACKEND
+    }
+
+    VARSET_ITER_INIT(this, bornIter, bornSet, bornVarIndex);
+    while (bornIter.NextElem(this, &bornVarIndex))
+    {
+        unsigned varNum = lvaTrackedToVarNum[bornVarIndex];
+        varDsc          = lvaTable + varNum;
+        bool isGCRef    = (varDsc->TypeGet() == TYP_REF);
+        bool isByRef    = (varDsc->TypeGet() == TYP_BYREF);
+
+        if (varDsc->lvIsInReg())
+        {
+#ifndef LEGACY_BACKEND
+#ifdef DEBUG
+            if (VarSetOps::IsMember(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex))
+            {
+                JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", varNum);
+            }
+#endif // DEBUG
+            VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex);
+#endif // !LEGACY_BACKEND
+            codeGen->genUpdateRegLife(varDsc, true /*isBorn*/, false /*isDying*/ DEBUGARG(tree));
+            regMaskTP regMask = varDsc->lvRegMask();
+            if (isGCRef)
+            {
+                codeGen->gcInfo.gcRegGCrefSetCur |= regMask;
+            }
+            else if (isByRef)
+            {
+                codeGen->gcInfo.gcRegByrefSetCur |= regMask;
+            }
+        }
+#ifndef LEGACY_BACKEND
+        // This isn't in a register, so update the gcVarPtrSetCur
+        else if (lvaIsGCTracked(varDsc))
+        {
+            VarSetOps::AddElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex);
+            JITDUMP("\t\t\t\t\t\t\tV%02u becoming live\n", varNum);
+        }
+#endif // !LEGACY_BACKEND
+    }
+
+#ifdef DEBUGGING_SUPPORT
+    codeGen->siUpdate();
+#endif
+}
+
+// Need an explicit instantiation.
+template void Compiler::compChangeLife<true>(VARSET_VALARG_TP newLife DEBUGARG(GenTreePtr tree));
+
+#ifdef LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ *  Get the mask of integer registers that contain 'live' enregistered
+ *  local variables after "tree".
+ *
+ *  The output is the mask of integer registers that are currently
+ *  alive and holding the enregistered local variables.
+ */
+regMaskTP CodeGenInterface::genLiveMask(GenTreePtr tree)
+{
+    regMaskTP liveMask = regSet.rsMaskVars;
+
+    GenTreePtr nextNode;
+    if (compiler->compCurLifeTree == nullptr)
+    {
+        assert(compiler->compCurStmt != nullptr);
+        nextNode = compiler->compCurStmt->gtStmt.gtStmtList;
+    }
+    else
+    {
+        nextNode = compiler->compCurLifeTree->gtNext;
+    }
+
+    // Theoretically, we should always be able to find "tree" by walking
+    // forward in execution order.  But unfortunately, there is at least
+    // one case (addressing) where a node may be evaluated out of order
+    // So, we have to handle that case
+    bool outOfOrder = false;
+    for (; nextNode != tree->gtNext; nextNode = nextNode->gtNext)
+    {
+        if (nextNode == nullptr)
+        {
+            outOfOrder = true;
+            break;
+        }
+        if (nextNode->gtOper == GT_LCL_VAR || nextNode->gtOper == GT_REG_VAR)
+        {
+            bool isBorn  = ((tree->gtFlags & GTF_VAR_DEF) != 0 && (tree->gtFlags & GTF_VAR_USEASG) == 0);
+            bool isDying = ((nextNode->gtFlags & GTF_VAR_DEATH) != 0);
+            if (isBorn || isDying)
+            {
+                regMaskTP regMask = genGetRegMask(nextNode);
+                if (regMask != RBM_NONE)
+                {
+                    if (isBorn)
+                    {
+                        liveMask |= regMask;
+                    }
+                    else
+                    {
+                        liveMask &= ~(regMask);
+                    }
+                }
+            }
+        }
+    }
+    if (outOfOrder)
+    {
+        assert(compiler->compCurLifeTree != nullptr);
+        liveMask = regSet.rsMaskVars;
+        // We were unable to find "tree" by traversing forward.  We must now go
+        // backward from compiler->compCurLifeTree instead.  We have to start with compiler->compCurLifeTree,
+        // since regSet.rsMaskVars reflects its completed execution
+        for (nextNode = compiler->compCurLifeTree; nextNode != tree; nextNode = nextNode->gtPrev)
+        {
+            assert(nextNode != nullptr);
+
+            if (nextNode->gtOper == GT_LCL_VAR || nextNode->gtOper == GT_REG_VAR)
+            {
+                bool isBorn  = ((tree->gtFlags & GTF_VAR_DEF) != 0 && (tree->gtFlags & GTF_VAR_USEASG) == 0);
+                bool isDying = ((nextNode->gtFlags & GTF_VAR_DEATH) != 0);
+                if (isBorn || isDying)
+                {
+                    regMaskTP regMask = genGetRegMask(nextNode);
+                    if (regMask != RBM_NONE)
+                    {
+                        // We're going backward - so things born are removed
+                        // and vice versa
+                        if (isBorn)
+                        {
+                            liveMask &= ~(regMask);
+                        }
+                        else
+                        {
+                            liveMask |= regMask;
+                        }
+                    }
+                }
+            }
+        }
+    }
+    return liveMask;
+}
+
+/*****************************************************************************
+ *
+ *  Get the mask of integer registers that contain 'live' enregistered
+ *  local variables.
+
+ *  The input is a liveSet which contains a set of local
+ *  variables that are currently alive
+ *
+ *  The output is the mask of x86 integer registers that are currently
+ *  alive and holding the enregistered local variables
+ */
+
+regMaskTP CodeGenInterface::genLiveMask(VARSET_VALARG_TP liveSet)
+{
+    // Check for the zero LiveSet mask
+    if (VarSetOps::IsEmpty(compiler, liveSet))
+    {
+        return RBM_NONE;
+    }
+
+    // set if our liveSet matches the one we have cached: genLastLiveSet -> genLastLiveMask
+    if (VarSetOps::Equal(compiler, liveSet, genLastLiveSet))
+    {
+        return genLastLiveMask;
+    }
+
+    regMaskTP liveMask = 0;
+
+    VARSET_ITER_INIT(compiler, iter, liveSet, varIndex);
+    while (iter.NextElem(compiler, &varIndex))
+    {
+
+        // If the variable is not enregistered, then it can't contribute to the liveMask
+        if (!VarSetOps::IsMember(compiler, compiler->raRegVarsMask, varIndex))
+        {
+            continue;
+        }
+
+        // Find the variable in compiler->lvaTable
+        unsigned   varNum = compiler->lvaTrackedToVarNum[varIndex];
+        LclVarDsc* varDsc = compiler->lvaTable + varNum;
+
+#if !FEATURE_FP_REGALLOC
+        // If the variable is a floating point type, then it can't contribute to the liveMask
+        if (varDsc->IsFloatRegType())
+        {
+            continue;
+        }
+#endif
+
+        noway_assert(compiler->lvaTable[varNum].lvRegister);
+        regMaskTP regBit;
+
+        if (varTypeIsFloating(varDsc->TypeGet()))
+        {
+            regBit = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
+        }
+        else
+        {
+            regBit = genRegMask(varDsc->lvRegNum);
+
+            // For longs we may have two regs
+            if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
+            {
+                regBit |= genRegMask(varDsc->lvOtherReg);
+            }
+        }
+
+        noway_assert(regBit != 0);
+
+        // We should not already have any of these bits set
+        noway_assert((liveMask & regBit) == 0);
+
+        // Update the liveMask with the register bits that are live
+        liveMask |= regBit;
+    }
+
+    // cache the last mapping between gtLiveSet -> liveMask
+    VarSetOps::Assign(compiler, genLastLiveSet, liveSet);
+    genLastLiveMask = liveMask;
+
+    return liveMask;
+}
+
+#endif
+
+/*****************************************************************************
+ *
+ *  Generate a spill.
+ */
+void CodeGenInterface::spillReg(var_types type, TempDsc* tmp, regNumber reg)
+{
+    getEmitter()->emitIns_S_R(ins_Store(type), emitActualTypeSize(type), reg, tmp->tdTempNum(), 0);
+}
+
+/*****************************************************************************
+ *
+ *  Generate a reload.
+ */
+void CodeGenInterface::reloadReg(var_types type, TempDsc* tmp, regNumber reg)
+{
+    getEmitter()->emitIns_R_S(ins_Load(type), emitActualTypeSize(type), reg, tmp->tdTempNum(), 0);
+}
+
+#ifdef LEGACY_BACKEND
+#if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
+void CodeGenInterface::reloadFloatReg(var_types type, TempDsc* tmp, regNumber reg)
+{
+    var_types tmpType = tmp->tdTempType();
+    getEmitter()->emitIns_R_S(ins_FloatLoad(type), emitActualTypeSize(tmpType), reg, tmp->tdTempNum(), 0);
+}
+#endif
+#endif // LEGACY_BACKEND
+
+// inline
+regNumber CodeGenInterface::genGetThisArgReg(GenTreePtr call)
+{
+    noway_assert(call->IsCall());
+    return REG_ARG_0;
+}
+
+//----------------------------------------------------------------------
+// getSpillTempDsc: get the TempDsc corresponding to a spilled tree.
+//
+// Arguments:
+//   tree  -  spilled GenTree node
+//
+// Return Value:
+//   TempDsc corresponding to tree
+TempDsc* CodeGenInterface::getSpillTempDsc(GenTree* tree)
+{
+    // tree must be in spilled state.
+    assert((tree->gtFlags & GTF_SPILLED) != 0);
+
+    // Get the tree's SpillDsc.
+    RegSet::SpillDsc* prevDsc;
+    RegSet::SpillDsc* spillDsc = regSet.rsGetSpillInfo(tree, tree->gtRegNum, &prevDsc);
+    assert(spillDsc != nullptr);
+
+    // Get the temp desc.
+    TempDsc* temp = regSet.rsGetSpillTempWord(tree->gtRegNum, spillDsc, prevDsc);
+    return temp;
+}
+
+#ifdef _TARGET_XARCH_
+
+#ifdef _TARGET_AMD64_
+// Returns relocation type hint for an addr.
+// Note that there are no reloc hints on x86.
+//
+// Arguments
+//    addr  -  data address
+//
+// Returns
+//    relocation type hint
+//
+unsigned short CodeGenInterface::genAddrRelocTypeHint(size_t addr)
+{
+    return compiler->eeGetRelocTypeHint((void*)addr);
+}
+#endif //_TARGET_AMD64_
+
+// Return true if an absolute indirect data address can be encoded as IP-relative.
+// offset. Note that this method should be used only when the caller knows that
+// the address is an icon value that VM has given and there is no GenTree node
+// representing it. Otherwise, one should always use FitsInAddrBase().
+//
+// Arguments
+//    addr  -  an absolute indirect data address
+//
+// Returns
+//    true if indir data addr could be encoded as IP-relative offset.
+//
+bool CodeGenInterface::genDataIndirAddrCanBeEncodedAsPCRelOffset(size_t addr)
+{
+#ifdef _TARGET_AMD64_
+    return genAddrRelocTypeHint(addr) == IMAGE_REL_BASED_REL32;
+#else
+    // x86: PC-relative addressing is available only for control flow instructions (jmp and call)
+    return false;
+#endif
+}
+
+// Return true if an indirect code address can be encoded as IP-relative offset.
+// Note that this method should be used only when the caller knows that the
+// address is an icon value that VM has given and there is no GenTree node
+// representing it. Otherwise, one should always use FitsInAddrBase().
+//
+// Arguments
+//    addr  -  an absolute indirect code address
+//
+// Returns
+//    true if indir code addr could be encoded as IP-relative offset.
+//
+bool CodeGenInterface::genCodeIndirAddrCanBeEncodedAsPCRelOffset(size_t addr)
+{
+#ifdef _TARGET_AMD64_
+    return genAddrRelocTypeHint(addr) == IMAGE_REL_BASED_REL32;
+#else
+    // x86: PC-relative addressing is available only for control flow instructions (jmp and call)
+    return true;
+#endif
+}
+
+// Return true if an indirect code address can be encoded as 32-bit displacement
+// relative to zero. Note that this method should be used only when the caller
+// knows that the address is an icon value that VM has given and there is no
+// GenTree node representing it. Otherwise, one should always use FitsInAddrBase().
+//
+// Arguments
+//    addr  -  absolute indirect code address
+//
+// Returns
+//    true if absolute indir code addr could be encoded as 32-bit displacement relative to zero.
+//
+bool CodeGenInterface::genCodeIndirAddrCanBeEncodedAsZeroRelOffset(size_t addr)
+{
+    return GenTreeIntConCommon::FitsInI32((ssize_t)addr);
+}
+
+// Return true if an absolute indirect code address needs a relocation recorded with VM.
+//
+// Arguments
+//    addr  -  an absolute indirect code address
+//
+// Returns
+//    true if indir code addr needs a relocation recorded with VM
+//
+bool CodeGenInterface::genCodeIndirAddrNeedsReloc(size_t addr)
+{
+    // If generating relocatable ngen code, then all code addr should go through relocation
+    if (compiler->opts.compReloc)
+    {
+        return true;
+    }
+
+#ifdef _TARGET_AMD64_
+    // If code addr could be encoded as 32-bit offset relative to IP, we need to record a relocation.
+    if (genCodeIndirAddrCanBeEncodedAsPCRelOffset(addr))
+    {
+        return true;
+    }
+
+    // It could be possible that the code indir addr could be encoded as 32-bit displacement relative
+    // to zero.  But we don't need to emit a relocation in that case.
+    return false;
+#else  //_TARGET_X86_
+    // On x86 there is need for recording relocations during jitting,
+    // because all addrs fit within 32-bits.
+    return false;
+#endif //_TARGET_X86_
+}
+
+// Return true if a direct code address needs to be marked as relocatable.
+//
+// Arguments
+//    addr  -  absolute direct code address
+//
+// Returns
+//    true if direct code addr needs a relocation recorded with VM
+//
+bool CodeGenInterface::genCodeAddrNeedsReloc(size_t addr)
+{
+    // If generating relocatable ngen code, then all code addr should go through relocation
+    if (compiler->opts.compReloc)
+    {
+        return true;
+    }
+
+#ifdef _TARGET_AMD64_
+    // By default all direct code addresses go through relocation so that VM will setup
+    // a jump stub if addr cannot be encoded as pc-relative offset.
+    return true;
+#else  //_TARGET_X86_
+    // On x86 there is no need for recording relocations during jitting,
+    // because all addrs fit within 32-bits.
+    return false;
+#endif //_TARGET_X86_
+}
+#endif //_TARGET_XARCH_
+
+/*****************************************************************************
+ *
+ *  The following can be used to create basic blocks that serve as labels for
+ *  the emitter. Use with caution - these are not real basic blocks!
+ *
+ */
+
+// inline
+BasicBlock* CodeGen::genCreateTempLabel()
+{
+#ifdef DEBUG
+    // These blocks don't affect FP
+    compiler->fgSafeBasicBlockCreation = true;
+#endif
+
+    BasicBlock* block = compiler->bbNewBasicBlock(BBJ_NONE);
+
+#ifdef DEBUG
+    compiler->fgSafeBasicBlockCreation = false;
+#endif
+
+    block->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+
+    // Use coldness of current block, as this label will
+    // be contained in it.
+    block->bbFlags |= (compiler->compCurBB->bbFlags & BBF_COLD);
+
+#ifdef DEBUG
+    block->bbTgtStkDepth = genStackLevel / sizeof(int);
+#endif
+    return block;
+}
+
+// inline
+void CodeGen::genDefineTempLabel(BasicBlock* label)
+{
+#ifdef DEBUG
+    if (compiler->opts.dspCode)
+    {
+        printf("\n      L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, label->bbNum);
+    }
+#endif
+
+    label->bbEmitCookie =
+        getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
+
+    /* gcInfo.gcRegGCrefSetCur does not account for redundant load-suppression
+       of GC vars, and the emitter will not know about */
+
+    regTracker.rsTrackRegClrPtr();
+}
+
+/*****************************************************************************
+ *
+ *  Adjust the stack pointer by the given value; assumes that this follows
+ *  a call so only callee-saved registers (and registers that may hold a
+ *  return value) are used at this point.
+ */
+
+void CodeGen::genAdjustSP(ssize_t delta)
+{
+#ifdef _TARGET_X86_
+    if (delta == sizeof(int))
+        inst_RV(INS_pop, REG_ECX, TYP_INT);
+    else
+#endif
+        inst_RV_IV(INS_add, REG_SPBASE, delta, EA_PTRSIZE);
+}
+
+#ifdef _TARGET_ARM_
+// return size
+// alignmentWB is out param
+unsigned CodeGenInterface::InferOpSizeAlign(GenTreePtr op, unsigned* alignmentWB)
+{
+    unsigned alignment = 0;
+    unsigned opSize    = 0;
+
+    if (op->gtType == TYP_STRUCT || op->OperIsCopyBlkOp())
+    {
+        opSize = InferStructOpSizeAlign(op, &alignment);
+    }
+    else
+    {
+        alignment = genTypeAlignments[op->TypeGet()];
+        opSize    = genTypeSizes[op->TypeGet()];
+    }
+
+    assert(opSize != 0);
+    assert(alignment != 0);
+
+    (*alignmentWB) = alignment;
+    return opSize;
+}
+// return size
+// alignmentWB is out param
+unsigned CodeGenInterface::InferStructOpSizeAlign(GenTreePtr op, unsigned* alignmentWB)
+{
+    unsigned alignment = 0;
+    unsigned opSize    = 0;
+
+    while (op->gtOper == GT_COMMA)
+    {
+        op = op->gtOp.gtOp2;
+    }
+
+    if (op->gtOper == GT_OBJ)
+    {
+        CORINFO_CLASS_HANDLE clsHnd = op->AsObj()->gtClass;
+        opSize                      = compiler->info.compCompHnd->getClassSize(clsHnd);
+        alignment = roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
+    }
+    else if (op->gtOper == GT_LCL_VAR)
+    {
+        unsigned   varNum = op->gtLclVarCommon.gtLclNum;
+        LclVarDsc* varDsc = compiler->lvaTable + varNum;
+        assert(varDsc->lvType == TYP_STRUCT);
+        opSize = varDsc->lvSize();
+        if (varDsc->lvStructDoubleAlign)
+        {
+            alignment = TARGET_POINTER_SIZE * 2;
+        }
+        else
+        {
+            alignment = TARGET_POINTER_SIZE;
+        }
+    }
+    else if (op->OperIsCopyBlkOp())
+    {
+        GenTreePtr op2 = op->gtOp.gtOp2;
+
+        if (op2->OperGet() == GT_CNS_INT)
+        {
+            if (op2->IsIconHandle(GTF_ICON_CLASS_HDL))
+            {
+                CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)op2->gtIntCon.gtIconVal;
+                opSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
+                alignment =
+                    roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
+            }
+            else
+            {
+                opSize         = op2->gtIntCon.gtIconVal;
+                GenTreePtr op1 = op->gtOp.gtOp1;
+                assert(op1->OperGet() == GT_LIST);
+                GenTreePtr dstAddr = op1->gtOp.gtOp1;
+                if (dstAddr->OperGet() == GT_ADDR)
+                {
+                    InferStructOpSizeAlign(dstAddr->gtOp.gtOp1, &alignment);
+                }
+                else
+                {
+                    assert(!"Unhandle dstAddr node");
+                    alignment = TARGET_POINTER_SIZE;
+                }
+            }
+        }
+        else
+        {
+            noway_assert(!"Variable sized COPYBLK register arg!");
+            opSize    = 0;
+            alignment = TARGET_POINTER_SIZE;
+        }
+    }
+    else if (op->gtOper == GT_MKREFANY)
+    {
+        opSize    = TARGET_POINTER_SIZE * 2;
+        alignment = TARGET_POINTER_SIZE;
+    }
+    else if (op->IsArgPlaceHolderNode())
+    {
+        CORINFO_CLASS_HANDLE clsHnd = op->gtArgPlace.gtArgPlaceClsHnd;
+        assert(clsHnd != 0);
+        opSize    = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
+        alignment = roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
+    }
+    else
+    {
+        assert(!"Unhandled gtOper");
+        opSize    = TARGET_POINTER_SIZE;
+        alignment = TARGET_POINTER_SIZE;
+    }
+
+    assert(opSize != 0);
+    assert(alignment != 0);
+
+    (*alignmentWB) = alignment;
+    return opSize;
+}
+
+#endif // _TARGET_ARM_
+
+/*****************************************************************************
+ *
+ *  Take an address expression and try to find the best set of components to
+ *  form an address mode; returns non-zero if this is successful.
+ *
+ *  TODO-Cleanup: The RyuJIT backend never uses this to actually generate code.
+ *  Refactor this code so that the underlying analysis can be used in
+ *  the RyuJIT Backend to do lowering, instead of having to call this method with the
+ *  option to not generate the code.
+ *
+ *  'fold' specifies if it is OK to fold the array index which hangs off
+ *  a GT_NOP node.
+ *
+ *  If successful, the parameters will be set to the following values:
+ *
+ *      *rv1Ptr     ...     base operand
+ *      *rv2Ptr     ...     optional operand
+ *      *revPtr     ...     true if rv2 is before rv1 in the evaluation order
+ *  #if SCALED_ADDR_MODES
+ *      *mulPtr     ...     optional multiplier (2/4/8) for rv2
+ *                          Note that for [reg1 + reg2] and [reg1 + reg2 + icon], *mulPtr == 0.
+ *  #endif
+ *      *cnsPtr     ...     integer constant [optional]
+ *
+ *  The 'mode' parameter may have one of the following values:
+ *
+ *  #if LEA_AVAILABLE
+ *         +1       ...     we're trying to compute a value via 'LEA'
+ *  #endif
+ *
+ *          0       ...     we're trying to form an address mode
+ *
+ *         -1       ...     we're generating code for an address mode,
+ *                          and thus the address must already form an
+ *                          address mode (without any further work)
+ *
+ *  IMPORTANT NOTE: This routine doesn't generate any code, it merely
+ *                  identifies the components that might be used to
+ *                  form an address mode later on.
+ */
+
+bool CodeGen::genCreateAddrMode(GenTreePtr  addr,
+                                int         mode,
+                                bool        fold,
+                                regMaskTP   regMask,
+                                bool*       revPtr,
+                                GenTreePtr* rv1Ptr,
+                                GenTreePtr* rv2Ptr,
+#if SCALED_ADDR_MODES
+                                unsigned* mulPtr,
+#endif
+                                unsigned* cnsPtr,
+                                bool      nogen)
+{
+#ifndef LEGACY_BACKEND
+    assert(nogen == true);
+#endif // !LEGACY_BACKEND
+
+    /*
+        The following indirections are valid address modes on x86/x64:
+
+            [                  icon]      * not handled here
+            [reg                   ]      * not handled here
+            [reg             + icon]
+            [reg2 +     reg1       ]
+            [reg2 +     reg1 + icon]
+            [reg2 + 2 * reg1       ]
+            [reg2 + 4 * reg1       ]
+            [reg2 + 8 * reg1       ]
+            [       2 * reg1 + icon]
+            [       4 * reg1 + icon]
+            [       8 * reg1 + icon]
+            [reg2 + 2 * reg1 + icon]
+            [reg2 + 4 * reg1 + icon]
+            [reg2 + 8 * reg1 + icon]
+
+        The following indirections are valid address modes on arm64:
+
+            [reg]
+            [reg  + icon]
+            [reg2 + reg1]
+            [reg2 + reg1 * natural-scale]
+
+     */
+
+    /* All indirect address modes require the address to be an addition */
+
+    if (addr->gtOper != GT_ADD)
+    {
+        return false;
+    }
+
+    // Can't use indirect addressing mode as we need to check for overflow.
+    // Also, can't use 'lea' as it doesn't set the flags.
+
+    if (addr->gtOverflow())
+    {
+        return false;
+    }
+
+    GenTreePtr rv1 = nullptr;
+    GenTreePtr rv2 = nullptr;
+
+    GenTreePtr op1;
+    GenTreePtr op2;
+
+    ssize_t cns;
+#if SCALED_ADDR_MODES
+    unsigned mul;
+#endif
+
+    GenTreePtr tmp;
+
+    /* What order are the sub-operands to be evaluated */
+
+    if (addr->gtFlags & GTF_REVERSE_OPS)
+    {
+        op1 = addr->gtOp.gtOp2;
+        op2 = addr->gtOp.gtOp1;
+    }
+    else
+    {
+        op1 = addr->gtOp.gtOp1;
+        op2 = addr->gtOp.gtOp2;
+    }
+
+    bool rev = false; // Is op2 first in the evaluation order?
+
+    /*
+        A complex address mode can combine the following operands:
+
+            op1     ...     base address
+            op2     ...     optional scaled index
+#if SCALED_ADDR_MODES
+            mul     ...     optional multiplier (2/4/8) for op2
+#endif
+            cns     ...     optional displacement
+
+        Here we try to find such a set of operands and arrange for these
+        to sit in registers.
+     */
+
+    cns = 0;
+#if SCALED_ADDR_MODES
+    mul = 0;
+#endif
+
+AGAIN:
+    /* We come back to 'AGAIN' if we have an add of a constant, and we are folding that
+       constant, or we have gone through a GT_NOP or GT_COMMA node. We never come back
+       here if we find a scaled index.
+    */
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if SCALED_ADDR_MODES
+    assert(mul == 0);
+#endif
+
+#ifdef LEGACY_BACKEND
+    /* Check both operands as far as being register variables */
+
+    if (mode != -1)
+    {
+        if (op1->gtOper == GT_LCL_VAR)
+            genMarkLclVar(op1);
+        if (op2->gtOper == GT_LCL_VAR)
+            genMarkLclVar(op2);
+    }
+#endif // LEGACY_BACKEND
+
+    /* Special case: keep constants as 'op2' */
+
+    if (op1->IsCnsIntOrI())
+    {
+        // Presumably op2 is assumed to not be a constant (shouldn't happen if we've done constant folding)?
+        tmp = op1;
+        op1 = op2;
+        op2 = tmp;
+    }
+
+    /* Check for an addition of a constant */
+
+    if (op2->IsIntCnsFitsInI32() && (op2->gtType != TYP_REF) && FitsIn<INT32>(cns + op2->gtIntConCommon.IconValue()))
+    {
+        /* We're adding a constant */
+
+        cns += op2->gtIntConCommon.IconValue();
+
+#ifdef LEGACY_BACKEND
+        /* Can (and should) we use "add reg, icon" ? */
+
+        if ((op1->gtFlags & GTF_REG_VAL) && mode == 1 && !nogen)
+        {
+            regNumber reg1 = op1->gtRegNum;
+
+            if ((regMask == 0 || (regMask & genRegMask(reg1))) && genRegTrashable(reg1, addr))
+            {
+                // In case genMarkLclVar(op1) bashed it above and it is
+                // the last use of the variable.
+
+                genUpdateLife(op1);
+
+                /* 'reg1' is trashable, so add "icon" into it */
+
+                genIncRegBy(reg1, cns, addr, addr->TypeGet());
+
+                genUpdateLife(addr);
+                return true;
+            }
+        }
+#endif // LEGACY_BACKEND
+
+#ifdef _TARGET_ARM64_
+        if (cns == 0)
+#endif
+        {
+            /* Inspect the operand the constant is being added to */
+
+            switch (op1->gtOper)
+            {
+                case GT_ADD:
+
+                    if (op1->gtOverflow())
+                    {
+                        break;
+                    }
+
+                    op2 = op1->gtOp.gtOp2;
+                    op1 = op1->gtOp.gtOp1;
+
+                    goto AGAIN;
+
+#if SCALED_ADDR_MODES && !defined(_TARGET_ARM64_)
+                // TODO-ARM64-CQ: For now we don't try to create a scaled index on ARM64.
+                case GT_MUL:
+                    if (op1->gtOverflow())
+                    {
+                        return false; // Need overflow check
+                    }
+
+                    __fallthrough;
+
+                case GT_LSH:
+
+                    mul = op1->GetScaledIndex();
+                    if (mul)
+                    {
+                        /* We can use "[mul*rv2 + icon]" */
+
+                        rv1 = nullptr;
+                        rv2 = op1->gtOp.gtOp1;
+
+                        goto FOUND_AM;
+                    }
+                    break;
+#endif
+
+                default:
+                    break;
+            }
+        }
+
+        /* The best we can do is "[rv1 + icon]" */
+
+        rv1 = op1;
+        rv2 = nullptr;
+
+        goto FOUND_AM;
+    }
+
+    /* op2 is not a constant. So keep on trying.
+       Does op1 or op2 already sit in a register? */
+
+    if (op1->gtFlags & GTF_REG_VAL)
+    {
+        /* op1 is sitting in a register */
+    }
+    else if (op2->gtFlags & GTF_REG_VAL)
+    {
+        /* op2 is sitting in a register. Keep the enregistered value as op1 */
+
+        tmp = op1;
+        op1 = op2;
+        op2 = tmp;
+
+        noway_assert(rev == false);
+        rev = true;
+    }
+    else
+    {
+        /* Neither op1 nor op2 are sitting in a register right now */
+
+        switch (op1->gtOper)
+        {
+#ifndef _TARGET_ARM64_
+            // TODO-ARM64-CQ: For now we don't try to create a scaled index on ARM64.
+            case GT_ADD:
+
+                if (op1->gtOverflow())
+                {
+                    break;
+                }
+
+                if (op1->gtOp.gtOp2->IsIntCnsFitsInI32() && FitsIn<INT32>(cns + op1->gtOp.gtOp2->gtIntCon.gtIconVal))
+                {
+                    cns += op1->gtOp.gtOp2->gtIntCon.gtIconVal;
+                    op1 = op1->gtOp.gtOp1;
+
+                    goto AGAIN;
+                }
+
+                break;
+
+#if SCALED_ADDR_MODES
+
+            case GT_MUL:
+
+                if (op1->gtOverflow())
+                {
+                    break;
+                }
+
+                __fallthrough;
+
+            case GT_LSH:
+
+                mul = op1->GetScaledIndex();
+                if (mul)
+                {
+                    /* 'op1' is a scaled value */
+
+                    rv1 = op2;
+                    rv2 = op1->gtOp.gtOp1;
+
+                    int argScale;
+                    while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
+                    {
+                        if (jitIsScaleIndexMul(argScale * mul))
+                        {
+                            mul = mul * argScale;
+                            rv2 = rv2->gtOp.gtOp1;
+                        }
+                        else
+                        {
+                            break;
+                        }
+                    }
+
+                    noway_assert(rev == false);
+                    rev = true;
+
+                    goto FOUND_AM;
+                }
+                break;
+
+#endif // SCALED_ADDR_MODES
+#endif // !_TARGET_ARM64_
+
+            case GT_NOP:
+
+                if (!nogen)
+                {
+                    break;
+                }
+
+                op1 = op1->gtOp.gtOp1;
+                goto AGAIN;
+
+            case GT_COMMA:
+
+                if (!nogen)
+                {
+                    break;
+                }
+
+                op1 = op1->gtOp.gtOp2;
+                goto AGAIN;
+
+            default:
+                break;
+        }
+
+        noway_assert(op2);
+        switch (op2->gtOper)
+        {
+#ifndef _TARGET_ARM64_
+            // TODO-ARM64-CQ: For now we don't try to create a scaled index on ARM64.
+            case GT_ADD:
+
+                if (op2->gtOverflow())
+                {
+                    break;
+                }
+
+                if (op2->gtOp.gtOp2->IsIntCnsFitsInI32() && FitsIn<INT32>(cns + op2->gtOp.gtOp2->gtIntCon.gtIconVal))
+                {
+                    cns += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
+                    op2 = op2->gtOp.gtOp1;
+
+                    goto AGAIN;
+                }
+
+                break;
+
+#if SCALED_ADDR_MODES
+
+            case GT_MUL:
+
+                if (op2->gtOverflow())
+                {
+                    break;
+                }
+
+                __fallthrough;
+
+            case GT_LSH:
+
+                mul = op2->GetScaledIndex();
+                if (mul)
+                {
+                    // 'op2' is a scaled value...is it's argument also scaled?
+                    int argScale;
+                    rv2 = op2->gtOp.gtOp1;
+                    while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
+                    {
+                        if (jitIsScaleIndexMul(argScale * mul))
+                        {
+                            mul = mul * argScale;
+                            rv2 = rv2->gtOp.gtOp1;
+                        }
+                        else
+                        {
+                            break;
+                        }
+                    }
+
+                    rv1 = op1;
+
+                    goto FOUND_AM;
+                }
+                break;
+
+#endif // SCALED_ADDR_MODES
+#endif // !_TARGET_ARM64_
+
+            case GT_NOP:
+
+                if (!nogen)
+                {
+                    break;
+                }
+
+                op2 = op2->gtOp.gtOp1;
+                goto AGAIN;
+
+            case GT_COMMA:
+
+                if (!nogen)
+                {
+                    break;
+                }
+
+                op2 = op2->gtOp.gtOp2;
+                goto AGAIN;
+
+            default:
+                break;
+        }
+
+        goto ADD_OP12;
+    }
+
+    /* op1 is in a register.
+       Is op2 an addition or a scaled value? */
+
+    noway_assert(op2);
+
+#ifndef _TARGET_ARM64_
+    // TODO-ARM64-CQ: For now we don't try to create a scaled index on ARM64.
+    switch (op2->gtOper)
+    {
+        case GT_ADD:
+
+            if (op2->gtOverflow())
+            {
+                break;
+            }
+
+            if (op2->gtOp.gtOp2->IsIntCnsFitsInI32() && FitsIn<INT32>(cns + op2->gtOp.gtOp2->gtIntCon.gtIconVal))
+            {
+                cns += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
+                op2 = op2->gtOp.gtOp1;
+                goto AGAIN;
+            }
+
+            break;
+
+#if SCALED_ADDR_MODES
+
+        case GT_MUL:
+
+            if (op2->gtOverflow())
+            {
+                break;
+            }
+
+            __fallthrough;
+
+        case GT_LSH:
+
+            mul = op2->GetScaledIndex();
+            if (mul)
+            {
+                rv1 = op1;
+                rv2 = op2->gtOp.gtOp1;
+                int argScale;
+                while ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (argScale = rv2->GetScaledIndex()) != 0)
+                {
+                    if (jitIsScaleIndexMul(argScale * mul))
+                    {
+                        mul = mul * argScale;
+                        rv2 = rv2->gtOp.gtOp1;
+                    }
+                    else
+                    {
+                        break;
+                    }
+                }
+
+                goto FOUND_AM;
+            }
+            break;
+
+#endif // SCALED_ADDR_MODES
+
+        default:
+            break;
+    }
+#endif // !_TARGET_ARM64_
+
+ADD_OP12:
+
+    /* The best we can do "[rv1 + rv2]" or "[rv1 + rv2 + cns]" */
+
+    rv1 = op1;
+    rv2 = op2;
+#ifdef _TARGET_ARM64_
+    assert(cns == 0);
+#endif
+
+FOUND_AM:
+
+#ifdef LEGACY_BACKEND
+    /* Check for register variables */
+
+    if (mode != -1)
+    {
+        if (rv1 && rv1->gtOper == GT_LCL_VAR)
+            genMarkLclVar(rv1);
+        if (rv2 && rv2->gtOper == GT_LCL_VAR)
+            genMarkLclVar(rv2);
+    }
+#endif // LEGACY_BACKEND
+
+    if (rv2)
+    {
+        /* Make sure a GC address doesn't end up in 'rv2' */
+
+        if (varTypeIsGC(rv2->TypeGet()))
+        {
+            noway_assert(rv1 && !varTypeIsGC(rv1->TypeGet()));
+
+            tmp = rv1;
+            rv1 = rv2;
+            rv2 = tmp;
+
+            rev = !rev;
+        }
+
+        /* Special case: constant array index (that is range-checked) */
+
+        if (fold)
+        {
+            ssize_t    tmpMul;
+            GenTreePtr index;
+
+            if ((rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) && (rv2->gtOp.gtOp2->IsCnsIntOrI()))
+            {
+                /* For valuetype arrays where we can't use the scaled address
+                   mode, rv2 will point to the scaled index. So we have to do
+                   more work */
+
+                tmpMul = compiler->optGetArrayRefScaleAndIndex(rv2, &index DEBUGARG(false));
+                if (mul)
+                {
+                    tmpMul *= mul;
+                }
+            }
+            else
+            {
+                /* May be a simple array. rv2 will points to the actual index */
+
+                index  = rv2;
+                tmpMul = mul;
+            }
+
+            /* Get hold of the array index and see if it's a constant */
+            if (index->IsIntCnsFitsInI32())
+            {
+                /* Get hold of the index value */
+                ssize_t ixv = index->AsIntConCommon()->IconValue();
+
+#if SCALED_ADDR_MODES
+                /* Scale the index if necessary */
+                if (tmpMul)
+                {
+                    ixv *= tmpMul;
+                }
+#endif
+
+                if (FitsIn<INT32>(cns + ixv))
+                {
+                    /* Add the scaled index to the offset value */
+
+                    cns += ixv;
+
+#if SCALED_ADDR_MODES
+                    /* There is no scaled operand any more */
+                    mul = 0;
+#endif
+                    rv2 = nullptr;
+                }
+            }
+        }
+    }
+
+    // We shouldn't have [rv2*1 + cns] - this is equivalent to [rv1 + cns]
+    noway_assert(rv1 || mul != 1);
+
+    noway_assert(FitsIn<INT32>(cns));
+
+    /* Success - return the various components to the caller */
+
+    *revPtr = rev;
+    *rv1Ptr = rv1;
+    *rv2Ptr = rv2;
+#if SCALED_ADDR_MODES
+    *mulPtr = mul;
+#endif
+    *cnsPtr = (unsigned)cns;
+
+    return true;
+}
+
+/*****************************************************************************
+*  The condition to use for (the jmp/set for) the given type of operation
+*
+*  In case of amd64, this routine should be used when there is no gentree available
+*  and one needs to generate jumps based on integer comparisons.  When gentree is
+*  available always use its overloaded version.
+*
+*/
+
+// static
+emitJumpKind CodeGen::genJumpKindForOper(genTreeOps cmp, CompareKind compareKind)
+{
+    const static BYTE genJCCinsSigned[] = {
+#if defined(_TARGET_XARCH_)
+        EJ_je,  // GT_EQ
+        EJ_jne, // GT_NE
+        EJ_jl,  // GT_LT
+        EJ_jle, // GT_LE
+        EJ_jge, // GT_GE
+        EJ_jg,  // GT_GT
+#elif defined(_TARGET_ARMARCH_)
+        EJ_eq,   // GT_EQ
+        EJ_ne,   // GT_NE
+        EJ_lt,   // GT_LT
+        EJ_le,   // GT_LE
+        EJ_ge,   // GT_GE
+        EJ_gt,   // GT_GT
+#endif
+    };
+
+    const static BYTE genJCCinsUnsigned[] = /* unsigned comparison */
+    {
+#if defined(_TARGET_XARCH_)
+        EJ_je,  // GT_EQ
+        EJ_jne, // GT_NE
+        EJ_jb,  // GT_LT
+        EJ_jbe, // GT_LE
+        EJ_jae, // GT_GE
+        EJ_ja,  // GT_GT
+#elif defined(_TARGET_ARMARCH_)
+        EJ_eq,   // GT_EQ
+        EJ_ne,   // GT_NE
+        EJ_lo,   // GT_LT
+        EJ_ls,   // GT_LE
+        EJ_hs,   // GT_GE
+        EJ_hi,   // GT_GT
+#endif
+    };
+
+    const static BYTE genJCCinsLogical[] = /* logical operation */
+    {
+#if defined(_TARGET_XARCH_)
+        EJ_je,   // GT_EQ   (Z == 1)
+        EJ_jne,  // GT_NE   (Z == 0)
+        EJ_js,   // GT_LT   (S == 1)
+        EJ_NONE, // GT_LE
+        EJ_jns,  // GT_GE   (S == 0)
+        EJ_NONE, // GT_GT
+#elif defined(_TARGET_ARMARCH_)
+        EJ_eq,   // GT_EQ   (Z == 1)
+        EJ_ne,   // GT_NE   (Z == 0)
+        EJ_mi,   // GT_LT   (N == 1)
+        EJ_NONE, // GT_LE
+        EJ_pl,   // GT_GE   (N == 0)
+        EJ_NONE, // GT_GT
+#endif
+    };
+
+#if defined(_TARGET_XARCH_)
+    assert(genJCCinsSigned[GT_EQ - GT_EQ] == EJ_je);
+    assert(genJCCinsSigned[GT_NE - GT_EQ] == EJ_jne);
+    assert(genJCCinsSigned[GT_LT - GT_EQ] == EJ_jl);
+    assert(genJCCinsSigned[GT_LE - GT_EQ] == EJ_jle);
+    assert(genJCCinsSigned[GT_GE - GT_EQ] == EJ_jge);
+    assert(genJCCinsSigned[GT_GT - GT_EQ] == EJ_jg);
+
+    assert(genJCCinsUnsigned[GT_EQ - GT_EQ] == EJ_je);
+    assert(genJCCinsUnsigned[GT_NE - GT_EQ] == EJ_jne);
+    assert(genJCCinsUnsigned[GT_LT - GT_EQ] == EJ_jb);
+    assert(genJCCinsUnsigned[GT_LE - GT_EQ] == EJ_jbe);
+    assert(genJCCinsUnsigned[GT_GE - GT_EQ] == EJ_jae);
+    assert(genJCCinsUnsigned[GT_GT - GT_EQ] == EJ_ja);
+
+    assert(genJCCinsLogical[GT_EQ - GT_EQ] == EJ_je);
+    assert(genJCCinsLogical[GT_NE - GT_EQ] == EJ_jne);
+    assert(genJCCinsLogical[GT_LT - GT_EQ] == EJ_js);
+    assert(genJCCinsLogical[GT_GE - GT_EQ] == EJ_jns);
+#elif defined(_TARGET_ARMARCH_)
+    assert(genJCCinsSigned[GT_EQ - GT_EQ] == EJ_eq);
+    assert(genJCCinsSigned[GT_NE - GT_EQ] == EJ_ne);
+    assert(genJCCinsSigned[GT_LT - GT_EQ] == EJ_lt);
+    assert(genJCCinsSigned[GT_LE - GT_EQ] == EJ_le);
+    assert(genJCCinsSigned[GT_GE - GT_EQ] == EJ_ge);
+    assert(genJCCinsSigned[GT_GT - GT_EQ] == EJ_gt);
+
+    assert(genJCCinsUnsigned[GT_EQ - GT_EQ] == EJ_eq);
+    assert(genJCCinsUnsigned[GT_NE - GT_EQ] == EJ_ne);
+    assert(genJCCinsUnsigned[GT_LT - GT_EQ] == EJ_lo);
+    assert(genJCCinsUnsigned[GT_LE - GT_EQ] == EJ_ls);
+    assert(genJCCinsUnsigned[GT_GE - GT_EQ] == EJ_hs);
+    assert(genJCCinsUnsigned[GT_GT - GT_EQ] == EJ_hi);
+
+    assert(genJCCinsLogical[GT_EQ - GT_EQ] == EJ_eq);
+    assert(genJCCinsLogical[GT_NE - GT_EQ] == EJ_ne);
+    assert(genJCCinsLogical[GT_LT - GT_EQ] == EJ_mi);
+    assert(genJCCinsLogical[GT_GE - GT_EQ] == EJ_pl);
+#else
+    assert(!"unknown arch");
+#endif
+    assert(GenTree::OperIsCompare(cmp));
+
+    emitJumpKind result = EJ_COUNT;
+
+    if (compareKind == CK_UNSIGNED)
+    {
+        result = (emitJumpKind)genJCCinsUnsigned[cmp - GT_EQ];
+    }
+    else if (compareKind == CK_SIGNED)
+    {
+        result = (emitJumpKind)genJCCinsSigned[cmp - GT_EQ];
+    }
+    else if (compareKind == CK_LOGICAL)
+    {
+        result = (emitJumpKind)genJCCinsLogical[cmp - GT_EQ];
+    }
+    assert(result != EJ_COUNT);
+    return result;
+}
+
+/*****************************************************************************
+ *
+ *  Generate an exit sequence for a return from a method (note: when compiling
+ *  for speed there might be multiple exit points).
+ */
+
+void CodeGen::genExitCode(BasicBlock* block)
+{
+#ifdef DEBUGGING_SUPPORT
+    /* Just wrote the first instruction of the epilog - inform debugger
+       Note that this may result in a duplicate IPmapping entry, and
+       that this is ok  */
+
+    // For non-optimized debuggable code, there is only one epilog.
+    genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::EPILOG, true);
+#endif // DEBUGGING_SUPPORT
+
+    bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
+    if (compiler->getNeedsGSSecurityCookie())
+    {
+        genEmitGSCookieCheck(jmpEpilog);
+
+        if (jmpEpilog)
+        {
+            // Dev10 642944 -
+            // The GS cookie check created a temp label that has no live
+            // incoming GC registers, we need to fix that
+
+            unsigned   varNum;
+            LclVarDsc* varDsc;
+
+            /* Figure out which register parameters hold pointers */
+
+            for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount && varDsc->lvIsRegArg;
+                 varNum++, varDsc++)
+            {
+                noway_assert(varDsc->lvIsParam);
+
+                gcInfo.gcMarkRegPtrVal(varDsc->lvArgReg, varDsc->TypeGet());
+            }
+
+            getEmitter()->emitThisGCrefRegs = getEmitter()->emitInitGCrefRegs = gcInfo.gcRegGCrefSetCur;
+            getEmitter()->emitThisByrefRegs = getEmitter()->emitInitByrefRegs = gcInfo.gcRegByrefSetCur;
+        }
+    }
+
+    genReserveEpilog(block);
+}
+
+/*****************************************************************************
+ *
+ * Generate code for an out-of-line exception.
+ * For debuggable code, we generate the 'throw' inline.
+ * For non-dbg code, we share the helper blocks created by fgAddCodeRef().
+ */
+
+void CodeGen::genJumpToThrowHlpBlk(emitJumpKind jumpKind, SpecialCodeKind codeKind, GenTreePtr failBlk)
+{
+    if (!compiler->opts.compDbgCode)
+    {
+        /* For non-debuggable code, find and use the helper block for
+           raising the exception. The block may be shared by other trees too. */
+
+        BasicBlock* tgtBlk;
+
+        if (failBlk)
+        {
+            /* We already know which block to jump to. Use that. */
+
+            noway_assert(failBlk->gtOper == GT_LABEL);
+            tgtBlk = failBlk->gtLabel.gtLabBB;
+            noway_assert(
+                tgtBlk ==
+                compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB))->acdDstBlk);
+        }
+        else
+        {
+            /* Find the helper-block which raises the exception. */
+
+            Compiler::AddCodeDsc* add =
+                compiler->fgFindExcptnTarget(codeKind, compiler->bbThrowIndex(compiler->compCurBB));
+            PREFIX_ASSUME_MSG((add != nullptr), ("ERROR: failed to find exception throw block"));
+            tgtBlk = add->acdDstBlk;
+        }
+
+        noway_assert(tgtBlk);
+
+        // Jump to the excption-throwing block on error.
+
+        inst_JMP(jumpKind, tgtBlk);
+    }
+    else
+    {
+        /* The code to throw the exception will be generated inline, and
+           we will jump around it in the normal non-exception case */
+
+        BasicBlock*  tgtBlk          = nullptr;
+        emitJumpKind reverseJumpKind = emitter::emitReverseJumpKind(jumpKind);
+        if (reverseJumpKind != jumpKind)
+        {
+            tgtBlk = genCreateTempLabel();
+            inst_JMP(reverseJumpKind, tgtBlk);
+        }
+
+        genEmitHelperCall(compiler->acdHelper(codeKind), 0, EA_UNKNOWN);
+
+        /* Define the spot for the normal non-exception case to jump to */
+        if (tgtBlk != nullptr)
+        {
+            assert(reverseJumpKind != jumpKind);
+            genDefineTempLabel(tgtBlk);
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ * The last operation done was generating code for "tree" and that would
+ * have set the flags. Check if the operation caused an overflow.
+ */
+
+// inline
+void CodeGen::genCheckOverflow(GenTreePtr tree)
+{
+    // Overflow-check should be asked for this tree
+    noway_assert(tree->gtOverflow());
+
+    const var_types type = tree->TypeGet();
+
+    // Overflow checks can only occur for the non-small types: (i.e. TYP_INT,TYP_LONG)
+    noway_assert(!varTypeIsSmall(type));
+
+    emitJumpKind jumpKind;
+
+#ifdef _TARGET_ARM64_
+    if (tree->OperGet() == GT_MUL)
+    {
+        jumpKind = EJ_ne;
+    }
+    else
+#endif
+    {
+        bool isUnsignedOverflow = ((tree->gtFlags & GTF_UNSIGNED) != 0);
+
+#if defined(_TARGET_XARCH_)
+
+        jumpKind = isUnsignedOverflow ? EJ_jb : EJ_jo;
+
+#elif defined(_TARGET_ARMARCH_)
+
+        jumpKind = isUnsignedOverflow ? EJ_lo : EJ_vs;
+
+        if (jumpKind == EJ_lo)
+        {
+            if ((tree->OperGet() != GT_SUB) && (tree->gtOper != GT_ASG_SUB))
+            {
+                jumpKind = EJ_hs;
+            }
+        }
+
+#endif // defined(_TARGET_ARMARCH_)
+    }
+
+    // Jump to the block which will throw the expection
+
+    genJumpToThrowHlpBlk(jumpKind, SCK_OVERFLOW);
+}
+
+#if FEATURE_EH_FUNCLETS
+
+/*****************************************************************************
+ *
+ *  Update the current funclet as needed by calling genUpdateCurrentFunclet().
+ *  For non-BBF_FUNCLET_BEG blocks, it asserts that the current funclet
+ *  is up-to-date.
+ *
+ */
+
+void CodeGen::genUpdateCurrentFunclet(BasicBlock* block)
+{
+    if (block->bbFlags & BBF_FUNCLET_BEG)
+    {
+        compiler->funSetCurrentFunc(compiler->funGetFuncIdx(block));
+        if (compiler->funCurrentFunc()->funKind == FUNC_FILTER)
+        {
+            assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->ebdFilter == block);
+        }
+        else
+        {
+            // We shouldn't see FUNC_ROOT
+            assert(compiler->funCurrentFunc()->funKind == FUNC_HANDLER);
+            assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->ebdHndBeg == block);
+        }
+    }
+    else
+    {
+        assert(compiler->compCurrFuncIdx <= compiler->compFuncInfoCount);
+        if (compiler->funCurrentFunc()->funKind == FUNC_FILTER)
+        {
+            assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->InFilterRegionBBRange(block));
+        }
+        else if (compiler->funCurrentFunc()->funKind == FUNC_ROOT)
+        {
+            assert(!block->hasHndIndex());
+        }
+        else
+        {
+            assert(compiler->funCurrentFunc()->funKind == FUNC_HANDLER);
+            assert(compiler->ehGetDsc(compiler->funCurrentFunc()->funEHIndex)->InHndRegionBBRange(block));
+        }
+    }
+}
+#endif // FEATURE_EH_FUNCLETS
+
+/*****************************************************************************
+ *
+ *  Generate code for the function.
+ */
+
+void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode)
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In genGenerateCode()\n");
+        compiler->fgDispBasicBlocks(compiler->verboseTrees);
+    }
+#endif
+
+    unsigned codeSize;
+    unsigned prologSize;
+    unsigned epilogSize;
+
+    void* consPtr;
+
+#ifdef DEBUG
+    genInterruptibleUsed = true;
+
+#if STACK_PROBES
+    genNeedPrologStackProbe = false;
+#endif
+
+    compiler->fgDebugCheckBBlist();
+#endif // DEBUG
+
+    /* This is the real thing */
+
+    genPrepForCompiler();
+
+    /* Prepare the emitter */
+    getEmitter()->Init();
+#ifdef DEBUG
+    VarSetOps::AssignNoCopy(compiler, genTempOldLife, VarSetOps::MakeEmpty(compiler));
+#endif
+
+#ifdef DEBUG
+    if (compiler->opts.disAsmSpilled && regSet.rsNeededSpillReg)
+    {
+        compiler->opts.disAsm = true;
+    }
+
+    if (compiler->opts.disAsm)
+    {
+        printf("; Assembly listing for method %s\n", compiler->info.compFullName);
+
+        printf("; Emitting ");
+
+        if (compiler->compCodeOpt() == Compiler::SMALL_CODE)
+        {
+            printf("SMALL_CODE");
+        }
+        else if (compiler->compCodeOpt() == Compiler::FAST_CODE)
+        {
+            printf("FAST_CODE");
+        }
+        else
+        {
+            printf("BLENDED_CODE");
+        }
+
+        printf(" for ");
+
+        if (compiler->info.genCPU == CPU_X86)
+        {
+            printf("generic X86 CPU");
+        }
+        else if (compiler->info.genCPU == CPU_X86_PENTIUM_4)
+        {
+            printf("Pentium 4");
+        }
+        else if (compiler->info.genCPU == CPU_X64)
+        {
+            if (compiler->canUseAVX())
+            {
+                printf("X64 CPU with AVX");
+            }
+            else
+            {
+                printf("X64 CPU with SSE2");
+            }
+        }
+
+        else if (compiler->info.genCPU == CPU_ARM)
+        {
+            printf("generic ARM CPU");
+        }
+
+        printf("\n");
+
+        if ((compiler->opts.compFlags & CLFLG_MAXOPT) == CLFLG_MAXOPT)
+        {
+            printf("; optimized code\n");
+        }
+        else if (compiler->opts.compDbgCode)
+        {
+            printf("; debuggable code\n");
+        }
+        else if (compiler->opts.MinOpts())
+        {
+            printf("; compiler->opts.MinOpts() is true\n");
+        }
+        else
+        {
+            printf("; unknown optimization flags\n");
+        }
+
+#if DOUBLE_ALIGN
+        if (compiler->genDoubleAlign())
+            printf("; double-aligned frame\n");
+        else
+#endif
+            printf("; %s based frame\n", isFramePointerUsed() ? STR_FPBASE : STR_SPBASE);
+
+        if (genInterruptible)
+        {
+            printf("; fully interruptible\n");
+        }
+        else
+        {
+            printf("; partially interruptible\n");
+        }
+
+        if (compiler->fgHaveProfileData())
+        {
+            printf("; with IBC profile data\n");
+        }
+
+        if (compiler->fgProfileData_ILSizeMismatch)
+        {
+            printf("; discarded IBC profile data due to mismatch in ILSize\n");
+        }
+    }
+#endif // DEBUG
+
+#ifndef LEGACY_BACKEND
+
+    // For RyuJIT backend, we compute the final frame layout before code generation. This is because LSRA
+    // has already computed exactly the maximum concurrent number of spill temps of each type that are
+    // required during code generation. So, there is nothing left to estimate: we can be precise in the frame
+    // layout. This helps us generate smaller code, and allocate, after code generation, a smaller amount of
+    // memory from the VM.
+
+    genFinalizeFrame();
+
+    unsigned maxTmpSize = compiler->tmpSize; // This is precise after LSRA has pre-allocated the temps.
+
+#else // LEGACY_BACKEND
+
+    // Estimate the frame size: first, estimate the number of spill temps needed by taking the register
+    // predictor spill temp estimates and stress levels into consideration. Then, compute the tentative
+    // frame layout using conservative callee-save register estimation (namely, guess they'll all be used
+    // and thus saved on the frame).
+
+    // Compute the maximum estimated spill temp size.
+    unsigned maxTmpSize = sizeof(double) + sizeof(float) + sizeof(__int64) + sizeof(void*);
+
+    maxTmpSize += (compiler->tmpDoubleSpillMax * sizeof(double)) + (compiler->tmpIntSpillMax * sizeof(int));
+
+#ifdef DEBUG
+
+    /* When StressRegs is >=1, there will be a bunch of spills not predicted by
+       the predictor (see logic in rsPickReg).  It will be very hard to teach
+       the predictor about the behavior of rsPickReg for StressRegs >= 1, so
+       instead let's make maxTmpSize large enough so that we won't be wrong.
+       This means that at StressRegs >= 1, we will not be testing the logic
+       that sets the maxTmpSize size.
+    */
+
+    if (regSet.rsStressRegs() >= 1)
+    {
+        maxTmpSize += (REG_TMP_ORDER_COUNT * REGSIZE_BYTES);
+    }
+
+    // JIT uses 2 passes when assigning stack variable (i.e. args, temps, and locals) locations in varDsc->lvStkOffs.
+    // During the 1st pass (in genGenerateCode), it estimates the maximum possible size for stack temps
+    // and put it in maxTmpSize. Then it calculates the varDsc->lvStkOffs for each variable based on this estimation.
+    // However during stress mode, we might spill more temps on the stack, which might grow the
+    // size of the temp area.
+    // This might cause varDsc->lvStkOffs to change during the 2nd pass (in emitEndCodeGen).
+    // If the change of varDsc->lvStkOffs crosses the threshold for the instruction size,
+    // we will then have a mismatched estimated code size (during the 1st pass) and the actual emitted code size
+    // (during the 2nd pass).
+    // Also, if STRESS_UNSAFE_BUFFER_CHECKS is turned on, we might reorder the stack variable locations,
+    // which could cause the mismatch too.
+    //
+    // The following code is simply bump the maxTmpSize up to at least BYTE_MAX+1 during the stress mode, so that
+    // we don't run into code size problem during stress.
+
+    if (getJitStressLevel() != 0)
+    {
+        if (maxTmpSize < BYTE_MAX + 1)
+        {
+            maxTmpSize = BYTE_MAX + 1;
+        }
+    }
+#endif // DEBUG
+
+    /* Estimate the offsets of locals/arguments and size of frame */
+
+    unsigned lclSize = compiler->lvaFrameSize(Compiler::TENTATIVE_FRAME_LAYOUT);
+
+#ifdef DEBUG
+    //
+    // Display the local frame offsets that we have tentatively decided upon
+    //
+    if (verbose)
+    {
+        compiler->lvaTableDump();
+    }
+#endif // DEBUG
+
+#endif // LEGACY_BACKEND
+
+    getEmitter()->emitBegFN(isFramePointerUsed()
+#if defined(DEBUG)
+                                ,
+                            (compiler->compCodeOpt() != Compiler::SMALL_CODE) &&
+                                !(compiler->opts.eeFlags & CORJIT_FLG_PREJIT)
+#endif
+#ifdef LEGACY_BACKEND
+                                ,
+                            lclSize
+#endif // LEGACY_BACKEND
+                            ,
+                            maxTmpSize);
+
+    /* Now generate code for the function */
+    genCodeForBBlist();
+
+#ifndef LEGACY_BACKEND
+#ifdef DEBUG
+    // After code generation, dump the frame layout again. It should be the same as before code generation, if code
+    // generation hasn't touched it (it shouldn't!).
+    if (verbose)
+    {
+        compiler->lvaTableDump();
+    }
+#endif // DEBUG
+#endif // !LEGACY_BACKEND
+
+    /* We can now generate the function prolog and epilog */
+
+    genGeneratePrologsAndEpilogs();
+
+    /* Bind jump distances */
+
+    getEmitter()->emitJumpDistBind();
+
+    /* The code is now complete and final; it should not change after this. */
+
+    /* Compute the size of the code sections that we are going to ask the VM
+       to allocate. Note that this might not be precisely the size of the
+       code we emit, though it's fatal if we emit more code than the size we
+       compute here.
+       (Note: an example of a case where we emit less code would be useful.)
+    */
+
+    getEmitter()->emitComputeCodeSizes();
+
+#ifdef DEBUG
+
+    // Code to test or stress our ability to run a fallback compile.
+    // We trigger the fallback here, before asking the VM for any memory,
+    // because if not, we will leak mem, as the current codebase can't free
+    // the mem after the emitter asks the VM for it. As this is only a stress
+    // mode, we only want the functionality, and don't care about the relative
+    // ugliness of having the failure here.
+    if (!compiler->jitFallbackCompile)
+    {
+        // Use COMPlus_JitNoForceFallback=1 to prevent NOWAY assert testing from happening,
+        // especially that caused by enabling JIT stress.
+        if (!JitConfig.JitNoForceFallback())
+        {
+            if (JitConfig.JitForceFallback() || compiler->compStressCompile(Compiler::STRESS_GENERIC_VARN, 5))
+            {
+                NO_WAY_NOASSERT("Stress failure");
+            }
+        }
+    }
+
+#endif // DEBUG
+
+    /* We've finished collecting all the unwind information for the function. Now reserve
+       space for it from the VM.
+    */
+
+    compiler->unwindReserve();
+
+#if DISPLAY_SIZES
+
+    size_t dataSize = getEmitter()->emitDataSize();
+
+#endif // DISPLAY_SIZES
+
+    void* coldCodePtr;
+
+    bool trackedStackPtrsContig; // are tracked stk-ptrs contiguous ?
+
+#ifdef _TARGET_AMD64_
+    trackedStackPtrsContig = false;
+#elif defined(_TARGET_ARM_)
+    // On arm due to prespilling of arguments, tracked stk-ptrs may not be contiguous
+    trackedStackPtrsContig = !compiler->opts.compDbgEnC && !compiler->compIsProfilerHookNeeded();
+#elif defined(_TARGET_ARM64_)
+    // Incoming vararg registers are homed on the top of the stack. Tracked var may not be contiguous.
+    trackedStackPtrsContig = !compiler->opts.compDbgEnC && !compiler->info.compIsVarArgs;
+#else
+    trackedStackPtrsContig = !compiler->opts.compDbgEnC;
+#endif
+
+#ifdef DEBUG
+    /* We're done generating code for this function */
+    compiler->compCodeGenDone = true;
+#endif
+
+    compiler->EndPhase(PHASE_GENERATE_CODE);
+
+    codeSize = getEmitter()->emitEndCodeGen(compiler, trackedStackPtrsContig, genInterruptible, genFullPtrRegMap,
+                                            (compiler->info.compRetType == TYP_REF), compiler->compHndBBtabCount,
+                                            &prologSize, &epilogSize, codePtr, &coldCodePtr, &consPtr);
+
+    compiler->EndPhase(PHASE_EMIT_CODE);
+
+#ifdef DEBUG
+    if (compiler->opts.disAsm)
+    {
+        printf("; Total bytes of code %d, prolog size %d for method %s\n", codeSize, prologSize,
+               compiler->info.compFullName);
+        printf("; ============================================================\n");
+        printf(""); // in our logic this causes a flush
+    }
+
+    if (verbose)
+    {
+        printf("*************** After end code gen, before unwindEmit()\n");
+        getEmitter()->emitDispIGlist(true);
+    }
+#endif
+
+#if EMIT_TRACK_STACK_DEPTH
+    /* Check our max stack level. Needed for fgAddCodeRef().
+       We need to relax the assert as our estimation won't include code-gen
+       stack changes (which we know don't affect fgAddCodeRef()) */
+    noway_assert(getEmitter()->emitMaxStackDepth <=
+                 (compiler->fgPtrArgCntMax + compiler->compHndBBtabCount + // Return address for locally-called finallys
+                  genTypeStSz(TYP_LONG) +                 // longs/doubles may be transferred via stack, etc
+                  (compiler->compTailCallUsed ? 4 : 0))); // CORINFO_HELP_TAILCALL args
+#endif
+
+    *nativeSizeOfCode                 = codeSize;
+    compiler->info.compNativeCodeSize = (UNATIVE_OFFSET)codeSize;
+
+    // printf("%6u bytes of code generated for %s.%s\n", codeSize, compiler->info.compFullName);
+
+    // Make sure that the x86 alignment and cache prefetch optimization rules
+    // were obeyed.
+
+    // Don't start a method in the last 7 bytes of a 16-byte alignment area
+    //   unless we are generating SMALL_CODE
+    // noway_assert( (((unsigned)(*codePtr) % 16) <= 8) || (compiler->compCodeOpt() == SMALL_CODE));
+
+    /* Now that the code is issued, we can finalize and emit the unwind data */
+
+    compiler->unwindEmit(*codePtr, coldCodePtr);
+
+#ifdef DEBUGGING_SUPPORT
+
+    /* Finalize the line # tracking logic after we know the exact block sizes/offsets */
+
+    genIPmappingGen();
+
+    /* Finalize the Local Var info in terms of generated code */
+
+    genSetScopeInfo();
+
+#endif // DEBUGGING_SUPPORT
+
+#ifdef LATE_DISASM
+    unsigned finalHotCodeSize;
+    unsigned finalColdCodeSize;
+    if (compiler->fgFirstColdBlock != nullptr)
+    {
+        // We did some hot/cold splitting. The hot section is always padded out to the
+        // size we thought it would be, but the cold section is not.
+        assert(codeSize <= compiler->info.compTotalHotCodeSize + compiler->info.compTotalColdCodeSize);
+        assert(compiler->info.compTotalHotCodeSize > 0);
+        assert(compiler->info.compTotalColdCodeSize > 0);
+        finalHotCodeSize  = compiler->info.compTotalHotCodeSize;
+        finalColdCodeSize = codeSize - finalHotCodeSize;
+    }
+    else
+    {
+        // No hot/cold splitting
+        assert(codeSize <= compiler->info.compTotalHotCodeSize);
+        assert(compiler->info.compTotalHotCodeSize > 0);
+        assert(compiler->info.compTotalColdCodeSize == 0);
+        finalHotCodeSize  = codeSize;
+        finalColdCodeSize = 0;
+    }
+    getDisAssembler().disAsmCode((BYTE*)*codePtr, finalHotCodeSize, (BYTE*)coldCodePtr, finalColdCodeSize);
+#endif // LATE_DISASM
+
+    /* Report any exception handlers to the VM */
+
+    genReportEH();
+
+#ifdef JIT32_GCENCODER
+#ifdef DEBUG
+    void* infoPtr =
+#endif // DEBUG
+#endif
+        // Create and store the GC info for this method.
+        genCreateAndStoreGCInfo(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
+
+#ifdef DEBUG
+    FILE* dmpf = jitstdout;
+
+    compiler->opts.dmpHex = false;
+    if (!strcmp(compiler->info.compMethodName, "<name of method you want the hex dump for"))
+    {
+        FILE*   codf;
+        errno_t ec = fopen_s(&codf, "C:\\JIT.COD", "at"); // NOTE: file append mode
+        if (ec != 0)
+        {
+            assert(codf);
+            dmpf                  = codf;
+            compiler->opts.dmpHex = true;
+        }
+    }
+    if (compiler->opts.dmpHex)
+    {
+        size_t consSize = getEmitter()->emitDataSize();
+        size_t infoSize = compiler->compInfoBlkSize;
+
+        fprintf(dmpf, "Generated code for %s:\n", compiler->info.compFullName);
+        fprintf(dmpf, "\n");
+
+        if (codeSize)
+        {
+            fprintf(dmpf, "    Code  at %p [%04X bytes]\n", dspPtr(*codePtr), codeSize);
+        }
+        if (consSize)
+        {
+            fprintf(dmpf, "    Const at %p [%04X bytes]\n", dspPtr(consPtr), consSize);
+        }
+#ifdef JIT32_GCENCODER
+        if (infoSize)
+            fprintf(dmpf, "    Info  at %p [%04X bytes]\n", dspPtr(infoPtr), infoSize);
+#endif // JIT32_GCENCODER
+
+        fprintf(dmpf, "\n");
+
+        if (codeSize)
+        {
+            hexDump(dmpf, "Code", (BYTE*)*codePtr, codeSize);
+        }
+        if (consSize)
+        {
+            hexDump(dmpf, "Const", (BYTE*)consPtr, consSize);
+        }
+#ifdef JIT32_GCENCODER
+        if (infoSize)
+            hexDump(dmpf, "Info", (BYTE*)infoPtr, infoSize);
+#endif // JIT32_GCENCODER
+
+        fflush(dmpf);
+    }
+
+    if (dmpf != jitstdout)
+    {
+        fclose(dmpf);
+    }
+
+#endif // DEBUG
+
+    /* Tell the emitter that we're done with this function */
+
+    getEmitter()->emitEndFN();
+
+    /* Shut down the spill logic */
+
+    regSet.rsSpillDone();
+
+    /* Shut down the temp logic */
+
+    compiler->tmpDone();
+
+#if DISPLAY_SIZES
+
+    grossVMsize += compiler->info.compILCodeSize;
+    totalNCsize += codeSize + dataSize + compiler->compInfoBlkSize;
+    grossNCsize += codeSize + dataSize;
+
+#endif // DISPLAY_SIZES
+
+    compiler->EndPhase(PHASE_EMIT_GCEH);
+}
+
+/*****************************************************************************
+ *
+ *  Report EH clauses to the VM
+ */
+
+void CodeGen::genReportEH()
+{
+    if (compiler->compHndBBtabCount == 0)
+    {
+        return;
+    }
+
+#ifdef DEBUG
+    if (compiler->opts.dspEHTable)
+    {
+        printf("*************** EH table for %s\n", compiler->info.compFullName);
+    }
+#endif // DEBUG
+
+    unsigned  XTnum;
+    EHblkDsc* HBtab;
+    EHblkDsc* HBtabEnd;
+
+    unsigned EHCount = compiler->compHndBBtabCount;
+
+#if FEATURE_EH_FUNCLETS
+    // Count duplicated clauses. This uses the same logic as below, where we actually generate them for reporting to the
+    // VM.
+    unsigned duplicateClauseCount = 0;
+    unsigned enclosingTryIndex;
+    for (XTnum = 0; XTnum < compiler->compHndBBtabCount; XTnum++)
+    {
+        for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum); // find the true enclosing try index,
+                                                                             // ignoring 'mutual protect' trys
+             enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX;
+             enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex))
+        {
+            ++duplicateClauseCount;
+        }
+    }
+    EHCount += duplicateClauseCount;
+
+#if FEATURE_EH_CALLFINALLY_THUNKS
+    unsigned clonedFinallyCount = 0;
+
+    // We don't keep track of how many cloned finally there are. So, go through and count.
+    // We do a quick pass first through the EH table to see if there are any try/finally
+    // clauses. If there aren't, we don't need to look for BBJ_CALLFINALLY.
+
+    bool anyFinallys = false;
+    for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
+         HBtab < HBtabEnd; HBtab++)
+    {
+        if (HBtab->HasFinallyHandler())
+        {
+            anyFinallys = true;
+            break;
+        }
+    }
+    if (anyFinallys)
+    {
+        for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
+        {
+            if (block->bbJumpKind == BBJ_CALLFINALLY)
+            {
+                ++clonedFinallyCount;
+            }
+        }
+
+        EHCount += clonedFinallyCount;
+    }
+#endif // FEATURE_EH_CALLFINALLY_THUNKS
+
+#endif // FEATURE_EH_FUNCLETS
+
+#ifdef DEBUG
+    if (compiler->opts.dspEHTable)
+    {
+#if FEATURE_EH_FUNCLETS
+#if FEATURE_EH_CALLFINALLY_THUNKS
+        printf("%d EH table entries, %d duplicate clauses, %d cloned finallys, %d total EH entries reported to VM\n",
+               compiler->compHndBBtabCount, duplicateClauseCount, clonedFinallyCount, EHCount);
+        assert(compiler->compHndBBtabCount + duplicateClauseCount + clonedFinallyCount == EHCount);
+#else  // !FEATURE_EH_CALLFINALLY_THUNKS
+        printf("%d EH table entries, %d duplicate clauses, %d total EH entries reported to VM\n",
+               compiler->compHndBBtabCount, duplicateClauseCount, EHCount);
+        assert(compiler->compHndBBtabCount + duplicateClauseCount == EHCount);
+#endif // !FEATURE_EH_CALLFINALLY_THUNKS
+#else  // !FEATURE_EH_FUNCLETS
+        printf("%d EH table entries, %d total EH entries reported to VM\n", compiler->compHndBBtabCount, EHCount);
+        assert(compiler->compHndBBtabCount == EHCount);
+#endif // !FEATURE_EH_FUNCLETS
+    }
+#endif // DEBUG
+
+    // Tell the VM how many EH clauses to expect.
+    compiler->eeSetEHcount(EHCount);
+
+    XTnum = 0; // This is the index we pass to the VM
+
+    for (HBtab = compiler->compHndBBtab, HBtabEnd = compiler->compHndBBtab + compiler->compHndBBtabCount;
+         HBtab < HBtabEnd; HBtab++)
+    {
+        UNATIVE_OFFSET tryBeg, tryEnd, hndBeg, hndEnd, hndTyp;
+
+        tryBeg = compiler->ehCodeOffset(HBtab->ebdTryBeg);
+        hndBeg = compiler->ehCodeOffset(HBtab->ebdHndBeg);
+
+        tryEnd = (HBtab->ebdTryLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
+                                                           : compiler->ehCodeOffset(HBtab->ebdTryLast->bbNext);
+        hndEnd = (HBtab->ebdHndLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
+                                                           : compiler->ehCodeOffset(HBtab->ebdHndLast->bbNext);
+
+        if (HBtab->HasFilter())
+        {
+            hndTyp = compiler->ehCodeOffset(HBtab->ebdFilter);
+        }
+        else
+        {
+            hndTyp = HBtab->ebdTyp;
+        }
+
+        CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(HBtab->ebdHandlerType);
+
+        // Note that we reuse the CORINFO_EH_CLAUSE type, even though the names of
+        // the fields aren't accurate.
+
+        CORINFO_EH_CLAUSE clause;
+        clause.ClassToken    = hndTyp; /* filter offset is passed back here for filter-based exception handlers */
+        clause.Flags         = flags;
+        clause.TryOffset     = tryBeg;
+        clause.TryLength     = tryEnd;
+        clause.HandlerOffset = hndBeg;
+        clause.HandlerLength = hndEnd;
+
+        assert(XTnum < EHCount);
+
+        // Tell the VM about this EH clause.
+        compiler->eeSetEHinfo(XTnum, &clause);
+
+        ++XTnum;
+    }
+
+#if FEATURE_EH_FUNCLETS
+    // Now output duplicated clauses.
+    //
+    // If a funclet has been created by moving a handler out of a try region that it was originally nested
+    // within, then we need to report a "duplicate" clause representing the fact that an exception in that
+    // handler can be caught by the 'try' it has been moved out of. This is because the original 'try' region
+    // descriptor can only specify a single, contiguous protected range, but the funclet we've moved out is
+    // no longer contiguous with the original 'try' region. The new EH descriptor will have the same handler
+    // region as the enclosing try region's handler region. This is the sense in which it is duplicated:
+    // there is now a "duplicate" clause with the same handler region as another, but a different 'try'
+    // region.
+    //
+    // For example, consider this (capital letters represent an unknown code sequence, numbers identify a
+    // try or handler region):
+    //
+    // A
+    // try (1) {
+    //   B
+    //   try (2) {
+    //     C
+    //   } catch (3) {
+    //     D
+    //   } catch (4) {
+    //     E
+    //   }
+    //   F
+    // } catch (5) {
+    //   G
+    // }
+    // H
+    //
+    // Here, we have try region (1) BCDEF protected by catch (5) G, and region (2) C protected
+    // by catch (3) D and catch (4) E. Note that catch (4) E does *NOT* protect the code "D".
+    // This is an example of 'mutually protect' regions. First, we move handlers (3) and (4)
+    // to the end of the code. However, (3) and (4) are nested inside, and protected by, try (1). Again
+    // note that (3) is not nested inside (4), despite ebdEnclosingTryIndex indicating that.
+    // The code "D" and "E" won't be contiguous with the protected region for try (1) (which
+    // will, after moving catch (3) AND (4), be BCF). Thus, we need to add a new EH descriptor
+    // representing try (1) protecting the new funclets catch (3) and (4).
+    // The code will be generated as follows:
+    //
+    // ABCFH // "main" code
+    // D // funclet
+    // E // funclet
+    // G // funclet
+    //
+    // The EH regions are:
+    //
+    //  C -> D
+    //  C -> E
+    //  BCF -> G
+    //  D -> G // "duplicate" clause
+    //  E -> G // "duplicate" clause
+    //
+    // Note that we actually need to generate one of these additional "duplicate" clauses for every
+    // region the funclet is nested in. Take this example:
+    //
+    //  A
+    //  try (1) {
+    //      B
+    //      try (2,3) {
+    //          C
+    //          try (4) {
+    //              D
+    //              try (5,6) {
+    //                  E
+    //              } catch {
+    //                  F
+    //              } catch {
+    //                  G
+    //              }
+    //              H
+    //          } catch {
+    //              I
+    //          }
+    //          J
+    //      } catch {
+    //          K
+    //      } catch {
+    //          L
+    //      }
+    //      M
+    //  } catch {
+    //      N
+    //  }
+    //  O
+    //
+    // When we pull out funclets, we get the following generated code:
+    //
+    // ABCDEHJMO // "main" function
+    // F // funclet
+    // G // funclet
+    // I // funclet
+    // K // funclet
+    // L // funclet
+    // N // funclet
+    //
+    // And the EH regions we report to the VM are (in order; main clauses
+    // first in most-to-least nested order, funclets ("duplicated clauses")
+    // last, in most-to-least nested) are:
+    //
+    //  E -> F
+    //  E -> G
+    //  DEH -> I
+    //  CDEHJ -> K
+    //  CDEHJ -> L
+    //  BCDEHJM -> N
+    //  F -> I // funclet clause #1 for F
+    //  F -> K // funclet clause #2 for F
+    //  F -> L // funclet clause #3 for F
+    //  F -> N // funclet clause #4 for F
+    //  G -> I // funclet clause #1 for G
+    //  G -> K // funclet clause #2 for G
+    //  G -> L // funclet clause #3 for G
+    //  G -> N // funclet clause #4 for G
+    //  I -> K // funclet clause #1 for I
+    //  I -> L // funclet clause #2 for I
+    //  I -> N // funclet clause #3 for I
+    //  K -> N // funclet clause #1 for K
+    //  L -> N // funclet clause #1 for L
+    //
+    // So whereas the IL had 6 EH clauses, we need to report 19 EH clauses to the VM.
+    // Note that due to the nature of 'mutually protect' clauses, it would be incorrect
+    // to add a clause "F -> G" because F is NOT protected by G, but we still have
+    // both "F -> K" and "F -> L" because F IS protected by both of those handlers.
+    //
+    // The overall ordering of the clauses is still the same most-to-least nesting
+    // after front-to-back start offset. Because we place the funclets at the end
+    // these new clauses should also go at the end by this ordering.
+    //
+
+    if (duplicateClauseCount > 0)
+    {
+        unsigned reportedDuplicateClauseCount = 0; // How many duplicated clauses have we reported?
+        unsigned XTnum2;
+        for (XTnum2 = 0, HBtab = compiler->compHndBBtab; XTnum2 < compiler->compHndBBtabCount; XTnum2++, HBtab++)
+        {
+            unsigned enclosingTryIndex;
+
+            EHblkDsc* fletTab = compiler->ehGetDsc(XTnum2);
+
+            for (enclosingTryIndex = compiler->ehTrueEnclosingTryIndexIL(XTnum2); // find the true enclosing try index,
+                                                                                  // ignoring 'mutual protect' trys
+                 enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX;
+                 enclosingTryIndex = compiler->ehGetEnclosingTryIndex(enclosingTryIndex))
+            {
+                // The funclet we moved out is nested in a try region, so create a new EH descriptor for the funclet
+                // that will have the enclosing try protecting the funclet.
+
+                noway_assert(XTnum2 < enclosingTryIndex); // the enclosing region must be less nested, and hence have a
+                                                          // greater EH table index
+
+                EHblkDsc* encTab = compiler->ehGetDsc(enclosingTryIndex);
+
+                // The try region is the handler of the funclet. Note that for filters, we don't protect the
+                // filter region, only the filter handler region. This is because exceptions in filters never
+                // escape; the VM swallows them.
+
+                BasicBlock* bbTryBeg  = fletTab->ebdHndBeg;
+                BasicBlock* bbTryLast = fletTab->ebdHndLast;
+
+                BasicBlock* bbHndBeg  = encTab->ebdHndBeg; // The handler region is the same as the enclosing try
+                BasicBlock* bbHndLast = encTab->ebdHndLast;
+
+                UNATIVE_OFFSET tryBeg, tryEnd, hndBeg, hndEnd, hndTyp;
+
+                tryBeg = compiler->ehCodeOffset(bbTryBeg);
+                hndBeg = compiler->ehCodeOffset(bbHndBeg);
+
+                tryEnd = (bbTryLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
+                                                           : compiler->ehCodeOffset(bbTryLast->bbNext);
+                hndEnd = (bbHndLast == compiler->fgLastBB) ? compiler->info.compNativeCodeSize
+                                                           : compiler->ehCodeOffset(bbHndLast->bbNext);
+
+                if (encTab->HasFilter())
+                {
+                    hndTyp = compiler->ehCodeOffset(encTab->ebdFilter);
+                }
+                else
+                {
+                    hndTyp = encTab->ebdTyp;
+                }
+
+                CORINFO_EH_CLAUSE_FLAGS flags = ToCORINFO_EH_CLAUSE_FLAGS(encTab->ebdHandlerType);
+
+                // Tell the VM this is an extra clause caused by moving funclets out of line.
+                // It seems weird this is from the CorExceptionFlag enum in corhdr.h,
+                // not the CORINFO_EH_CLAUSE_FLAGS enum in corinfo.h.
+                flags = (CORINFO_EH_CLAUSE_FLAGS)(flags | COR_ILEXCEPTION_CLAUSE_DUPLICATED);
+
+                // Note that the JIT-EE interface reuses the CORINFO_EH_CLAUSE type, even though the names of
+                // the fields aren't really accurate. For example, we set "TryLength" to the offset of the
+                // instruction immediately after the 'try' body. So, it really could be more accurately named
+                // "TryEndOffset".
+
+                CORINFO_EH_CLAUSE clause;
+                clause.ClassToken = hndTyp; /* filter offset is passed back here for filter-based exception handlers */
+                clause.Flags      = flags;
+                clause.TryOffset  = tryBeg;
+                clause.TryLength  = tryEnd;
+                clause.HandlerOffset = hndBeg;
+                clause.HandlerLength = hndEnd;
+
+                assert(XTnum < EHCount);
+
+                // Tell the VM about this EH clause (a duplicated clause).
+                compiler->eeSetEHinfo(XTnum, &clause);
+
+                ++XTnum;
+                ++reportedDuplicateClauseCount;
+
+#ifndef DEBUG
+                if (duplicateClauseCount == reportedDuplicateClauseCount)
+                {
+                    break; // we've reported all of them; no need to continue looking
+                }
+#endif // !DEBUG
+
+            } // for each 'true' enclosing 'try'
+        }     // for each EH table entry
+
+        assert(duplicateClauseCount == reportedDuplicateClauseCount);
+    } // if (duplicateClauseCount > 0)
+
+#if FEATURE_EH_CALLFINALLY_THUNKS
+    if (anyFinallys)
+    {
+        unsigned reportedClonedFinallyCount = 0;
+        for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
+        {
+            if (block->bbJumpKind == BBJ_CALLFINALLY)
+            {
+                UNATIVE_OFFSET hndBeg, hndEnd;
+
+                hndBeg = compiler->ehCodeOffset(block);
+
+                // How big is it? The BBJ_ALWAYS has a null bbEmitCookie! Look for the block after, which must be
+                // a label or jump target, since the BBJ_CALLFINALLY doesn't fall through.
+                BasicBlock* bbLabel = block->bbNext;
+                if (block->isBBCallAlwaysPair())
+                {
+                    bbLabel = bbLabel->bbNext; // skip the BBJ_ALWAYS
+                }
+                if (bbLabel == nullptr)
+                {
+                    hndEnd = compiler->info.compNativeCodeSize;
+                }
+                else
+                {
+                    assert(bbLabel->bbEmitCookie != nullptr);
+                    hndEnd = compiler->ehCodeOffset(bbLabel);
+                }
+
+                CORINFO_EH_CLAUSE clause;
+                clause.ClassToken = 0; // unused
+                clause.Flags = (CORINFO_EH_CLAUSE_FLAGS)(CORINFO_EH_CLAUSE_FINALLY | COR_ILEXCEPTION_CLAUSE_DUPLICATED);
+                clause.TryOffset     = hndBeg;
+                clause.TryLength     = hndBeg;
+                clause.HandlerOffset = hndBeg;
+                clause.HandlerLength = hndEnd;
+
+                assert(XTnum < EHCount);
+
+                // Tell the VM about this EH clause (a cloned finally clause).
+                compiler->eeSetEHinfo(XTnum, &clause);
+
+                ++XTnum;
+                ++reportedClonedFinallyCount;
+
+#ifndef DEBUG
+                if (clonedFinallyCount == reportedClonedFinallyCount)
+                {
+                    break; // we're done; no need to keep looking
+                }
+#endif        // !DEBUG
+            } // block is BBJ_CALLFINALLY
+        }     // for each block
+
+        assert(clonedFinallyCount == reportedClonedFinallyCount);
+    }  // if (anyFinallys)
+#endif // FEATURE_EH_CALLFINALLY_THUNKS
+
+#endif // FEATURE_EH_FUNCLETS
+
+    assert(XTnum == EHCount);
+}
+
+void CodeGen::genGCWriteBarrier(GenTreePtr tgt, GCInfo::WriteBarrierForm wbf)
+{
+#ifndef LEGACY_BACKEND
+    noway_assert(tgt->gtOper == GT_STOREIND);
+#else  // LEGACY_BACKEND
+    noway_assert(tgt->gtOper == GT_IND || tgt->gtOper == GT_CLS_VAR); // enforced by gcIsWriteBarrierCandidate
+#endif // LEGACY_BACKEND
+
+    /* Call the proper vm helper */
+    int helper = CORINFO_HELP_ASSIGN_REF;
+#ifdef DEBUG
+    if (wbf == GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
+    {
+        helper = CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP;
+    }
+    else
+#endif
+        if (tgt->gtOper != GT_CLS_VAR)
+    {
+        if (wbf != GCInfo::WBF_BarrierUnchecked) // This overrides the tests below.
+        {
+            if (tgt->gtFlags & GTF_IND_TGTANYWHERE)
+            {
+                helper = CORINFO_HELP_CHECKED_ASSIGN_REF;
+            }
+            else if (tgt->gtOp.gtOp1->TypeGet() == TYP_I_IMPL)
+            {
+                helper = CORINFO_HELP_CHECKED_ASSIGN_REF;
+            }
+        }
+    }
+    assert(((helper == CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP) && (wbf == GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)) ||
+           ((helper == CORINFO_HELP_CHECKED_ASSIGN_REF) &&
+            (wbf == GCInfo::WBF_BarrierChecked || wbf == GCInfo::WBF_BarrierUnknown)) ||
+           ((helper == CORINFO_HELP_ASSIGN_REF) &&
+            (wbf == GCInfo::WBF_BarrierUnchecked || wbf == GCInfo::WBF_BarrierUnknown)));
+
+#ifdef FEATURE_COUNT_GC_WRITE_BARRIERS
+    // We classify the "tgt" trees as follows:
+    // If "tgt" is of the form (where [ x ] indicates an optional x, and { x1, ..., xn } means "one of the x_i forms"):
+    //    IND [-> ADDR -> IND] -> { GT_LCL_VAR, GT_REG_VAR, ADD({GT_LCL_VAR, GT_REG_VAR}, X), ADD(X, (GT_LCL_VAR,
+    //    GT_REG_VAR)) }
+    // then let "v" be the GT_LCL_VAR or GT_REG_VAR.
+    //   * If "v" is the return buffer argument, classify as CWBKind_RetBuf.
+    //   * If "v" is another by-ref argument, classify as CWBKind_ByRefArg.
+    //   * Otherwise, classify as CWBKind_OtherByRefLocal.
+    // If "tgt" is of the form IND -> ADDR -> GT_LCL_VAR, clasify as CWBKind_AddrOfLocal.
+    // Otherwise, classify as CWBKind_Unclassified.
+
+    CheckedWriteBarrierKinds wbKind = CWBKind_Unclassified;
+    if (tgt->gtOper == GT_IND)
+    {
+        GenTreePtr lcl = NULL;
+
+        GenTreePtr indArg = tgt->gtOp.gtOp1;
+        if (indArg->gtOper == GT_ADDR && indArg->gtOp.gtOp1->gtOper == GT_IND)
+        {
+            indArg = indArg->gtOp.gtOp1->gtOp.gtOp1;
+        }
+        if (indArg->gtOper == GT_LCL_VAR || indArg->gtOper == GT_REG_VAR)
+        {
+            lcl = indArg;
+        }
+        else if (indArg->gtOper == GT_ADD)
+        {
+            if (indArg->gtOp.gtOp1->gtOper == GT_LCL_VAR || indArg->gtOp.gtOp1->gtOper == GT_REG_VAR)
+            {
+                lcl = indArg->gtOp.gtOp1;
+            }
+            else if (indArg->gtOp.gtOp2->gtOper == GT_LCL_VAR || indArg->gtOp.gtOp2->gtOper == GT_REG_VAR)
+            {
+                lcl = indArg->gtOp.gtOp2;
+            }
+        }
+        if (lcl != NULL)
+        {
+            wbKind          = CWBKind_OtherByRefLocal; // Unclassified local variable.
+            unsigned lclNum = 0;
+            if (lcl->gtOper == GT_LCL_VAR)
+                lclNum = lcl->gtLclVarCommon.gtLclNum;
+            else
+            {
+                assert(lcl->gtOper == GT_REG_VAR);
+                lclNum = lcl->gtRegVar.gtLclNum;
+            }
+            if (lclNum == compiler->info.compRetBuffArg)
+            {
+                wbKind = CWBKind_RetBuf; // Ret buff.  Can happen if the struct exceeds the size limit.
+            }
+            else
+            {
+                LclVarDsc* varDsc = &compiler->lvaTable[lclNum];
+                if (varDsc->lvIsParam && varDsc->lvType == TYP_BYREF)
+                {
+                    wbKind = CWBKind_ByRefArg; // Out (or in/out) arg
+                }
+            }
+        }
+        else
+        {
+            // We should have eliminated the barrier for this case.
+            assert(!(indArg->gtOper == GT_ADDR && indArg->gtOp.gtOp1->gtOper == GT_LCL_VAR));
+        }
+    }
+
+    if (helper == CORINFO_HELP_CHECKED_ASSIGN_REF)
+    {
+#if 0
+#ifdef DEBUG
+        // Enable this to sample the unclassified trees.
+        static int unclassifiedBarrierSite = 0;
+        if (wbKind == CWBKind_Unclassified)
+        {
+            unclassifiedBarrierSite++;
+            printf("unclassifiedBarrierSite = %d:\n", unclassifiedBarrierSite); compiler->gtDispTree(tgt); printf(""); printf("\n");
+        }
+#endif // DEBUG
+#endif // 0
+        genStackLevel += 4;
+        inst_IV(INS_push, wbKind);
+        genEmitHelperCall(helper,
+                          4,           // argSize
+                          EA_PTRSIZE); // retSize
+        genStackLevel -= 4;
+    }
+    else
+    {
+        genEmitHelperCall(helper,
+                          0,           // argSize
+                          EA_PTRSIZE); // retSize
+    }
+
+#else  // !FEATURE_COUNT_GC_WRITE_BARRIERS
+    genEmitHelperCall(helper,
+                      0,           // argSize
+                      EA_PTRSIZE); // retSize
+#endif // !FEATURE_COUNT_GC_WRITE_BARRIERS
+}
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                           Prolog / Epilog                                 XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************
+ *
+ *  Generates code for moving incoming register arguments to their
+ *  assigned location, in the function prolog.
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState)
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In genFnPrologCalleeRegArgs() for %s regs\n", regState->rsIsFloat ? "float" : "int");
+    }
+#endif
+
+#ifdef _TARGET_ARM64_
+    if (compiler->info.compIsVarArgs)
+    {
+        // We've already saved all int registers at the top of stack in the prolog.
+        // No need further action.
+        return;
+    }
+#endif
+
+    unsigned  argMax;           // maximum argNum value plus 1, (including the RetBuffArg)
+    unsigned  argNum;           // current argNum, always in [0..argMax-1]
+    unsigned  fixedRetBufIndex; // argNum value used by the fixed return buffer argument (ARM64)
+    unsigned  regArgNum;        // index into the regArgTab[] table
+    regMaskTP regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn;
+    bool      doingFloat     = regState->rsIsFloat;
+
+    // We should be generating the prolog block when we are called
+    assert(compiler->compGeneratingProlog);
+
+    // We expect to have some registers of the type we are doing, that are LiveIn, otherwise we don't need to be called.
+    noway_assert(regArgMaskLive != 0);
+
+    // If a method has 3 args (and no fixed return buffer) then argMax is 3 and valid indexes are 0,1,2
+    // If a method has a fixed return buffer (on ARM64) then argMax gets set to 9 and valid index are 0-8
+    //
+    // The regArgTab can always have unused entries,
+    //    for example if an architecture always increments the arg register number but uses either
+    //    an integer register or a floating point register to hold the next argument
+    //    then with a mix of float and integer args you could have:
+    //
+    //    sampleMethod(int i, float x, int j, float y, int k, float z);
+    //          r0, r2 and r4 as valid integer arguments with argMax as 5
+    //      and f1, f3 and f5 and valid floating point arguments with argMax as 6
+    //    The first one is doingFloat==false and the second one is doingFloat==true
+    //
+    //    If a fixed return buffer (in r8) was also present then the first one would become:
+    //          r0, r2, r4 and r8 as valid integer arguments with argMax as 9
+    //
+
+    argMax           = regState->rsCalleeRegArgCount;
+    fixedRetBufIndex = (unsigned)-1; // Invalid value
+
+    // If necessary we will select a correct xtraReg for circular floating point args later.
+    if (doingFloat)
+    {
+        xtraReg = REG_NA;
+        noway_assert(argMax <= MAX_FLOAT_REG_ARG);
+    }
+    else // we are doing the integer registers
+    {
+        noway_assert(argMax <= MAX_REG_ARG);
+        if (hasFixedRetBuffReg())
+        {
+            fixedRetBufIndex = theFixedRetBuffArgNum();
+            // We have an additional integer register argument when hasFixedRetBuffReg() is true
+            argMax = fixedRetBufIndex + 1;
+            assert(argMax == (MAX_REG_ARG + 1));
+        }
+    }
+
+    //
+    // Construct a table with the register arguments, for detecting circular and
+    // non-circular dependencies between the register arguments. A dependency is when
+    // an argument register Rn needs to be moved to register Rm that is also an argument
+    // register. The table is constructed in the order the arguments are passed in
+    // registers: the first register argument is in regArgTab[0], the second in
+    // regArgTab[1], etc. Note that on ARM, a TYP_DOUBLE takes two entries, starting
+    // at an even index. The regArgTab is indexed from 0 to argMax - 1.
+    // Note that due to an extra argument register for ARM64 (i.e  theFixedRetBuffReg())
+    // we have increased the allocated size of the regArgTab[] by one.
+    //
+    struct regArgElem
+    {
+        unsigned varNum; // index into compiler->lvaTable[] for this register argument
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+        var_types type;   // the Jit type of this regArgTab entry
+#endif                    // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+        unsigned trashBy; // index into this regArgTab[] table of the register that will be copied to this register.
+                          // That is, for regArgTab[x].trashBy = y, argument register number 'y' will be copied to
+                          // argument register number 'x'. Only used when circular = true.
+        char slot;        // 0 means the register is not used for a register argument
+                          // 1 means the first part of a register argument
+                          // 2, 3 or 4  means the second,third or fourth part of a multireg argument
+        bool stackArg;    // true if the argument gets homed to the stack
+        bool processed;   // true after we've processed the argument (and it is in its final location)
+        bool circular;    // true if this register participates in a circular dependency loop.
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+        // For UNIX AMD64 struct passing, the type of the register argument slot can differ from
+        // the type of the lclVar in ways that are not ascertainable from lvType.
+        // So, for that case we retain the type of the register in the regArgTab.
+
+        var_types getRegType(Compiler* compiler)
+        {
+            return type; // UNIX_AMD64 implementation
+        }
+
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+        // In other cases, we simply use the type of the lclVar to determine the type of the register.
+        var_types getRegType(Compiler* compiler)
+        {
+            LclVarDsc varDsc = compiler->lvaTable[varNum];
+            // Check if this is an HFA register arg and return the HFA type
+            if (varDsc.lvIsHfaRegArg())
+            {
+                return varDsc.GetHfaType();
+            }
+            return varDsc.lvType;
+        }
+
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+    } regArgTab[max(MAX_REG_ARG + 1, MAX_FLOAT_REG_ARG)] = {};
+
+    unsigned   varNum;
+    LclVarDsc* varDsc;
+    for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+    {
+        // Is this variable a register arg?
+        if (!varDsc->lvIsParam)
+        {
+            continue;
+        }
+
+        if (!varDsc->lvIsRegArg)
+        {
+            continue;
+        }
+
+        // When we have a promoted struct we have two possible LclVars that can represent the incoming argument
+        // in the regArgTab[], either the original TYP_STRUCT argument or the introduced lvStructField.
+        // We will use the lvStructField if we have a TYPE_INDEPENDENT promoted struct field otherwise
+        // use the the original TYP_STRUCT argument.
+        //
+        if (varDsc->lvPromoted || varDsc->lvIsStructField)
+        {
+            LclVarDsc* parentVarDsc = varDsc;
+            if (varDsc->lvIsStructField)
+            {
+                assert(!varDsc->lvPromoted);
+                parentVarDsc = &compiler->lvaTable[varDsc->lvParentLcl];
+            }
+
+            Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(parentVarDsc);
+
+            if (promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT)
+            {
+                noway_assert(parentVarDsc->lvFieldCnt == 1); // We only handle one field here
+
+                // For register arguments that are independent promoted structs we put the promoted field varNum in the
+                // regArgTab[]
+                if (varDsc->lvPromoted)
+                {
+                    continue;
+                }
+            }
+            else
+            {
+                // For register arguments that are not independent promoted structs we put the parent struct varNum in
+                // the regArgTab[]
+                if (varDsc->lvIsStructField)
+                {
+                    continue;
+                }
+            }
+        }
+
+        var_types regType = varDsc->TypeGet();
+        // Change regType to the HFA type when we have a HFA argument
+        if (varDsc->lvIsHfaRegArg())
+        {
+            regType = varDsc->GetHfaType();
+        }
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+        if (!varTypeIsStruct(regType))
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+        {
+            // A struct might be passed  partially in XMM register for System V calls.
+            // So a single arg might use both register files.
+            if (isFloatRegType(regType) != doingFloat)
+            {
+                continue;
+            }
+        }
+
+        int slots = 0;
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+        if (varTypeIsStruct(varDsc))
+        {
+            CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
+            assert(typeHnd != nullptr);
+            SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+            compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
+            if (!structDesc.passedInRegisters)
+            {
+                // The var is not passed in registers.
+                continue;
+            }
+
+            unsigned firstRegSlot = 0;
+            for (unsigned slotCounter = 0; slotCounter < structDesc.eightByteCount; slotCounter++)
+            {
+                regNumber regNum = varDsc->lvRegNumForSlot(slotCounter);
+                var_types regType;
+
+#ifdef FEATURE_SIMD
+                // Assumption 1:
+                // RyuJit backend depends on the assumption that on 64-Bit targets Vector3 size is rounded off
+                // to TARGET_POINTER_SIZE and hence Vector3 locals on stack can be treated as TYP_SIMD16 for
+                // reading and writing purposes.  Hence while homing a Vector3 type arg on stack we should
+                // home entire 16-bytes so that the upper-most 4-bytes will be zeroed when written to stack.
+                //
+                // Assumption 2:
+                // RyuJit backend is making another implicit assumption that Vector3 type args when passed in
+                // registers or on stack, the upper most 4-bytes will be zero.
+                //
+                // For P/Invoke return and Reverse P/Invoke argument passing, native compiler doesn't guarantee
+                // that upper 4-bytes of a Vector3 type struct is zero initialized and hence assumption 2 is
+                // invalid.
+                //
+                // RyuJIT x64 Windows: arguments are treated as passed by ref and hence read/written just 12
+                // bytes. In case of Vector3 returns, Caller allocates a zero initialized Vector3 local and
+                // passes it retBuf arg and Callee method writes only 12 bytes to retBuf. For this reason,
+                // there is no need to clear upper 4-bytes of Vector3 type args.
+                //
+                // RyuJIT x64 Unix: arguments are treated as passed by value and read/writen as if TYP_SIMD16.
+                // Vector3 return values are returned two return registers and Caller assembles them into a
+                // single xmm reg. Hence RyuJIT explicitly generates code to clears upper 4-bytes of Vector3
+                // type args in prolog and Vector3 type return value of a call
+
+                if (varDsc->lvType == TYP_SIMD12)
+                {
+                    regType = TYP_DOUBLE;
+                }
+                else
+#endif
+                {
+                    regType = compiler->GetEightByteType(structDesc, slotCounter);
+                }
+
+                regArgNum = genMapRegNumToRegArgNum(regNum, regType);
+
+                if ((!doingFloat && (structDesc.IsIntegralSlot(slotCounter))) ||
+                    (doingFloat && (structDesc.IsSseSlot(slotCounter))))
+                {
+                    // Store the reg for the first slot.
+                    if (slots == 0)
+                    {
+                        firstRegSlot = regArgNum;
+                    }
+
+                    // Bingo - add it to our table
+                    noway_assert(regArgNum < argMax);
+                    noway_assert(regArgTab[regArgNum].slot == 0); // we better not have added it already (there better
+                                                                  // not be multiple vars representing this argument
+                                                                  // register)
+                    regArgTab[regArgNum].varNum = varNum;
+                    regArgTab[regArgNum].slot   = (char)(slotCounter + 1);
+                    regArgTab[regArgNum].type   = regType;
+                    slots++;
+                }
+            }
+
+            if (slots == 0)
+            {
+                continue; // Nothing to do for this regState set.
+            }
+
+            regArgNum = firstRegSlot;
+        }
+        else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+        {
+            // Bingo - add it to our table
+            regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, regType);
+
+            noway_assert(regArgNum < argMax);
+            // We better not have added it already (there better not be multiple vars representing this argument
+            // register)
+            noway_assert(regArgTab[regArgNum].slot == 0);
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+            // Set the register type.
+            regArgTab[regArgNum].type = regType;
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+            regArgTab[regArgNum].varNum = varNum;
+            regArgTab[regArgNum].slot   = 1;
+
+            slots = 1;
+
+#if FEATURE_MULTIREG_ARGS
+            if (compiler->lvaIsMultiregStruct(varDsc))
+            {
+                if (varDsc->lvIsHfaRegArg())
+                {
+                    // We have an HFA argument, set slots to the number of registers used
+                    slots = varDsc->lvHfaSlots();
+                }
+                else
+                {
+                    // Currently all non-HFA multireg structs are two registers in size (i.e. two slots)
+                    assert(varDsc->lvSize() == (2 * TARGET_POINTER_SIZE));
+                    // We have a non-HFA multireg argument, set slots to two
+                    slots = 2;
+                }
+
+                // Note that regArgNum+1 represents an argument index not an actual argument register.
+                // see genMapRegArgNumToRegNum(unsigned argNum, var_types type)
+
+                // This is the setup for the rest of a multireg struct arg
+
+                for (int i = 1; i < slots; i++)
+                {
+                    noway_assert((regArgNum + i) < argMax);
+
+                    // We better not have added it already (there better not be multiple vars representing this argument
+                    // register)
+                    noway_assert(regArgTab[regArgNum + i].slot == 0);
+
+                    regArgTab[regArgNum + i].varNum = varNum;
+                    regArgTab[regArgNum + i].slot   = (char)(i + 1);
+                }
+            }
+#endif // FEATURE_MULTIREG_ARGS
+        }
+
+#ifdef _TARGET_ARM_
+        int lclSize = compiler->lvaLclSize(varNum);
+
+        if (lclSize > REGSIZE_BYTES)
+        {
+            unsigned maxRegArgNum = doingFloat ? MAX_FLOAT_REG_ARG : MAX_REG_ARG;
+            slots                 = lclSize / REGSIZE_BYTES;
+            if (regArgNum + slots > maxRegArgNum)
+            {
+                slots = maxRegArgNum - regArgNum;
+            }
+        }
+        C_ASSERT((char)MAX_REG_ARG == MAX_REG_ARG);
+        assert(slots < INT8_MAX);
+        for (char i = 1; i < slots; i++)
+        {
+            regArgTab[regArgNum + i].varNum = varNum;
+            regArgTab[regArgNum + i].slot   = i + 1;
+        }
+#endif // _TARGET_ARM_
+
+        for (int i = 0; i < slots; i++)
+        {
+            regType          = regArgTab[regArgNum + i].getRegType(compiler);
+            regNumber regNum = genMapRegArgNumToRegNum(regArgNum + i, regType);
+
+#if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+            // lvArgReg could be INT or FLOAT reg. So the following assertion doesn't hold.
+            // The type of the register depends on the classification of the first eightbyte
+            // of the struct. For information on classification refer to the System V x86_64 ABI at:
+            // http://www.x86-64.org/documentation/abi.pdf
+
+            assert((i > 0) || (regNum == varDsc->lvArgReg));
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+            // Is the arg dead on entry to the method ?
+
+            if ((regArgMaskLive & genRegMask(regNum)) == 0)
+            {
+                if (varDsc->lvTrackedNonStruct())
+                {
+                    noway_assert(!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex));
+                }
+                else
+                {
+#ifdef _TARGET_X86_
+                    noway_assert(varDsc->lvType == TYP_STRUCT);
+#else // !_TARGET_X86_
+#ifndef LEGACY_BACKEND
+                    // For LSRA, it may not be in regArgMaskLive if it has a zero
+                    // refcnt.  This is in contrast with the non-LSRA case in which all
+                    // non-tracked args are assumed live on entry.
+                    noway_assert((varDsc->lvRefCnt == 0) || (varDsc->lvType == TYP_STRUCT) ||
+                                 (varDsc->lvAddrExposed && compiler->info.compIsVarArgs));
+#else  // LEGACY_BACKEND
+                    noway_assert(
+                        varDsc->lvType == TYP_STRUCT ||
+                        (varDsc->lvAddrExposed && (compiler->info.compIsVarArgs || compiler->opts.compUseSoftFP)));
+#endif // LEGACY_BACKEND
+#endif // !_TARGET_X86_
+                }
+                // Mark it as processed and be done with it
+                regArgTab[regArgNum + i].processed = true;
+                goto NON_DEP;
+            }
+
+#ifdef _TARGET_ARM_
+            // On the ARM when the varDsc is a struct arg (or pre-spilled due to varargs) the initReg/xtraReg
+            // could be equal to lvArgReg. The pre-spilled registers are also not considered live either since
+            // they've already been spilled.
+            //
+            if ((regSet.rsMaskPreSpillRegs(false) & genRegMask(regNum)) == 0)
+#endif // _TARGET_ARM_
+            {
+                noway_assert(xtraReg != varDsc->lvArgReg + i);
+                noway_assert(regArgMaskLive & genRegMask(regNum));
+            }
+
+            regArgTab[regArgNum + i].processed = false;
+
+            /* mark stack arguments since we will take care of those first */
+            regArgTab[regArgNum + i].stackArg = (varDsc->lvIsInReg()) ? false : true;
+
+            /* If it goes on the stack or in a register that doesn't hold
+             * an argument anymore -> CANNOT form a circular dependency */
+
+            if (varDsc->lvIsInReg() && (genRegMask(regNum) & regArgMaskLive))
+            {
+                /* will trash another argument -> possible dependency
+                 * We may need several passes after the table is constructed
+                 * to decide on that */
+
+                /* Maybe the argument stays in the register (IDEAL) */
+
+                if ((i == 0) && (varDsc->lvRegNum == regNum))
+                {
+                    goto NON_DEP;
+                }
+
+#if !defined(_TARGET_64BIT_)
+                if ((i == 1) && varTypeIsStruct(varDsc) && (varDsc->lvOtherReg == regNum))
+                {
+                    goto NON_DEP;
+                }
+                if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_LONG) && (varDsc->lvOtherReg == regNum))
+                {
+                    goto NON_DEP;
+                }
+
+                if ((i == 1) && (genActualType(varDsc->TypeGet()) == TYP_DOUBLE) &&
+                    (REG_NEXT(varDsc->lvRegNum) == regNum))
+                {
+                    goto NON_DEP;
+                }
+#endif // !defined(_TARGET_64BIT_)
+                regArgTab[regArgNum + i].circular = true;
+            }
+            else
+            {
+            NON_DEP:
+                regArgTab[regArgNum + i].circular = false;
+
+                /* mark the argument register as free */
+                regArgMaskLive &= ~genRegMask(regNum);
+            }
+        }
+    }
+
+    /* Find the circular dependencies for the argument registers, if any.
+     * A circular dependency is a set of registers R1, R2, ..., Rn
+     * such that R1->R2 (that is, R1 needs to be moved to R2), R2->R3, ..., Rn->R1 */
+
+    bool change = true;
+    if (regArgMaskLive)
+    {
+        /* Possible circular dependencies still exist; the previous pass was not enough
+         * to filter them out. Use a "sieve" strategy to find all circular dependencies. */
+
+        while (change)
+        {
+            change = false;
+
+            for (argNum = 0; argNum < argMax; argNum++)
+            {
+                // If we already marked the argument as non-circular then continue
+
+                if (!regArgTab[argNum].circular)
+                {
+                    continue;
+                }
+
+                if (regArgTab[argNum].slot == 0) // Not a register argument
+                {
+                    continue;
+                }
+
+                varNum = regArgTab[argNum].varNum;
+                noway_assert(varNum < compiler->lvaCount);
+                varDsc = compiler->lvaTable + varNum;
+                noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
+
+                /* cannot possibly have stack arguments */
+                noway_assert(varDsc->lvIsInReg());
+                noway_assert(!regArgTab[argNum].stackArg);
+
+                var_types regType = regArgTab[argNum].getRegType(compiler);
+                regNumber regNum  = genMapRegArgNumToRegNum(argNum, regType);
+
+                regNumber destRegNum = REG_NA;
+                if (regArgTab[argNum].slot == 1)
+                {
+                    destRegNum = varDsc->lvRegNum;
+                }
+#if FEATURE_MULTIREG_ARGS && defined(FEATURE_SIMD) && defined(_TARGET_AMD64_)
+                else
+                {
+                    assert(regArgTab[argNum].slot == 2);
+                    assert(argNum > 0);
+                    assert(regArgTab[argNum - 1].slot == 1);
+                    assert(regArgTab[argNum - 1].varNum == varNum);
+                    assert((varDsc->lvType == TYP_SIMD12) || (varDsc->lvType == TYP_SIMD16));
+                    regArgMaskLive &= ~genRegMask(regNum);
+                    regArgTab[argNum].circular = false;
+                    change                     = true;
+                    continue;
+                }
+#elif !defined(_TARGET_64BIT_)
+                else if (regArgTab[argNum].slot == 2 && genActualType(varDsc->TypeGet()) == TYP_LONG)
+                {
+                    destRegNum = varDsc->lvOtherReg;
+                }
+                else
+                {
+                    assert(regArgTab[argNum].slot == 2);
+                    assert(varDsc->TypeGet() == TYP_DOUBLE);
+                    destRegNum = REG_NEXT(varDsc->lvRegNum);
+                }
+#endif // !defined(_TARGET_64BIT_)
+                noway_assert(destRegNum != REG_NA);
+                if (genRegMask(destRegNum) & regArgMaskLive)
+                {
+                    /* we are trashing a live argument register - record it */
+                    unsigned destRegArgNum = genMapRegNumToRegArgNum(destRegNum, regType);
+                    noway_assert(destRegArgNum < argMax);
+                    regArgTab[destRegArgNum].trashBy = argNum;
+                }
+                else
+                {
+                    /* argument goes to a free register */
+                    regArgTab[argNum].circular = false;
+                    change                     = true;
+
+                    /* mark the argument register as free */
+                    regArgMaskLive &= ~genRegMask(regNum);
+                }
+            }
+        }
+    }
+
+    /* At this point, everything that has the "circular" flag
+     * set to "true" forms a circular dependency */
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+    if (regArgMaskLive)
+    {
+        if (verbose)
+        {
+            printf("Circular dependencies found while home-ing the incoming arguments.\n");
+        }
+    }
+#endif
+
+    // LSRA allocates registers to incoming parameters in order and will not overwrite
+    // a register still holding a live parameter.
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef LEGACY_BACKEND
+    noway_assert(((regArgMaskLive & RBM_FLTARG_REGS) == 0) &&
+                 "Homing of float argument registers with circular dependencies not implemented.");
+#endif // LEGACY_BACKEND
+
+    /* Now move the arguments to their locations.
+     * First consider ones that go on the stack since they may
+     * free some registers. */
+
+    regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn; // reset the live in to what it was at the start
+    for (argNum = 0; argNum < argMax; argNum++)
+    {
+        emitAttr size;
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+        // If this is the wrong register file, just continue.
+        if (regArgTab[argNum].type == TYP_UNDEF)
+        {
+            // This could happen if the reg in regArgTab[argNum] is of the other register file -
+            //     for System V register passed structs where the first reg is GPR and the second an XMM reg.
+            // The next register file processing will process it.
+            continue;
+        }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+        // If the arg is dead on entry to the method, skip it
+
+        if (regArgTab[argNum].processed)
+        {
+            continue;
+        }
+
+        if (regArgTab[argNum].slot == 0) // Not a register argument
+        {
+            continue;
+        }
+
+        varNum = regArgTab[argNum].varNum;
+        noway_assert(varNum < compiler->lvaCount);
+        varDsc = compiler->lvaTable + varNum;
+
+#ifndef _TARGET_64BIT_
+        // If not a stack arg go to the next one
+        if (varDsc->lvType == TYP_LONG)
+        {
+            if (regArgTab[argNum].slot == 1 && !regArgTab[argNum].stackArg)
+            {
+                continue;
+            }
+            else if (varDsc->lvOtherReg != REG_STK)
+            {
+                continue;
+            }
+        }
+        else
+#endif // !_TARGET_64BIT_
+        {
+            // If not a stack arg go to the next one
+            if (!regArgTab[argNum].stackArg)
+            {
+                continue;
+            }
+        }
+
+#if defined(_TARGET_ARM_)
+        if (varDsc->lvType == TYP_DOUBLE)
+        {
+            if (regArgTab[argNum].slot == 2)
+            {
+                // We handled the entire double when processing the first half (slot == 1)
+                continue;
+            }
+        }
+#endif
+
+        noway_assert(regArgTab[argNum].circular == false);
+
+        noway_assert(varDsc->lvIsParam);
+        noway_assert(varDsc->lvIsRegArg);
+        noway_assert(varDsc->lvIsInReg() == false ||
+                     (varDsc->lvType == TYP_LONG && varDsc->lvOtherReg == REG_STK && regArgTab[argNum].slot == 2));
+
+        var_types storeType = TYP_UNDEF;
+        unsigned  slotSize  = TARGET_POINTER_SIZE;
+
+        if (varTypeIsStruct(varDsc))
+        {
+            storeType = TYP_I_IMPL; // Default store type for a struct type is a pointer sized integer
+#if FEATURE_MULTIREG_ARGS
+            // Must be <= MAX_PASS_MULTIREG_BYTES or else it wouldn't be passed in registers
+            noway_assert(varDsc->lvSize() <= MAX_PASS_MULTIREG_BYTES);
+#endif // FEATURE_MULTIREG_ARGS
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+            storeType = regArgTab[argNum].type;
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+            if (varDsc->lvIsHfaRegArg())
+            {
+#ifdef _TARGET_ARM_
+                // On ARM32 the storeType for HFA args is always TYP_FLOAT
+                storeType = TYP_FLOAT;
+                slotSize  = (unsigned)emitActualTypeSize(storeType);
+#else  // _TARGET_ARM64_
+                storeType = genActualType(varDsc->GetHfaType());
+                slotSize  = (unsigned)emitActualTypeSize(storeType);
+#endif // _TARGET_ARM64_
+            }
+        }
+        else // Not a struct type
+        {
+            storeType = genActualType(varDsc->TypeGet());
+        }
+        size = emitActualTypeSize(storeType);
+#ifdef _TARGET_X86_
+        noway_assert(genTypeSize(storeType) == TARGET_POINTER_SIZE);
+#endif //_TARGET_X86_
+
+        regNumber srcRegNum = genMapRegArgNumToRegNum(argNum, storeType);
+
+        // Stack argument - if the ref count is 0 don't care about it
+
+        if (!varDsc->lvOnFrame)
+        {
+            noway_assert(varDsc->lvRefCnt == 0);
+        }
+        else
+        {
+            // Since slot is typically 1, baseOffset is typically 0
+            int baseOffset = (regArgTab[argNum].slot - 1) * slotSize;
+
+            getEmitter()->emitIns_S_R(ins_Store(storeType), size, srcRegNum, varNum, baseOffset);
+
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
+            // Check if we are writing past the end of the struct
+            if (varTypeIsStruct(varDsc))
+            {
+                assert(varDsc->lvSize() >= baseOffset + (unsigned)size);
+            }
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+            if (regArgTab[argNum].slot == 1)
+            {
+                psiMoveToStack(varNum);
+            }
+        }
+
+        /* mark the argument as processed */
+
+        regArgTab[argNum].processed = true;
+        regArgMaskLive &= ~genRegMask(srcRegNum);
+
+#if defined(_TARGET_ARM_)
+        if (storeType == TYP_DOUBLE)
+        {
+            regArgTab[argNum + 1].processed = true;
+            regArgMaskLive &= ~genRegMask(REG_NEXT(srcRegNum));
+        }
+#endif
+    }
+
+    /* Process any circular dependencies */
+    if (regArgMaskLive)
+    {
+        unsigned    begReg, destReg, srcReg;
+        unsigned    varNumDest, varNumSrc;
+        LclVarDsc*  varDscDest;
+        LclVarDsc*  varDscSrc;
+        instruction insCopy = INS_mov;
+
+        if (doingFloat)
+        {
+#if defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+            insCopy = ins_Copy(TYP_DOUBLE);
+            // Compute xtraReg here when we have a float argument
+            assert(xtraReg == REG_NA);
+
+            regMaskTP fpAvailMask;
+
+            fpAvailMask = RBM_FLT_CALLEE_TRASH & ~regArgMaskLive;
+#if defined(FEATURE_HFA)
+            fpAvailMask &= RBM_ALLDOUBLE;
+#else
+#if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#error Error. Wrong architecture.
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#endif // defined(FEATURE_HFA)
+
+            if (fpAvailMask == RBM_NONE)
+            {
+                fpAvailMask = RBM_ALLFLOAT & ~regArgMaskLive;
+#if defined(FEATURE_HFA)
+                fpAvailMask &= RBM_ALLDOUBLE;
+#else
+#if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#error Error. Wrong architecture.
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#endif // defined(FEATURE_HFA)
+            }
+
+            assert(fpAvailMask != RBM_NONE);
+
+            // We pick the lowest avail register number
+            regMaskTP tempMask = genFindLowestBit(fpAvailMask);
+            xtraReg            = genRegNumFromMask(tempMask);
+#elif defined(_TARGET_X86_)
+            // This case shouldn't occur on x86 since NYI gets converted to an assert
+            NYI("Homing circular FP registers via xtraReg");
+#endif
+        }
+
+        for (argNum = 0; argNum < argMax; argNum++)
+        {
+            // If not a circular dependency then continue
+            if (!regArgTab[argNum].circular)
+            {
+                continue;
+            }
+
+            // If already processed the dependency then continue
+
+            if (regArgTab[argNum].processed)
+            {
+                continue;
+            }
+
+            if (regArgTab[argNum].slot == 0) // Not a register argument
+            {
+                continue;
+            }
+
+            destReg = begReg = argNum;
+            srcReg           = regArgTab[argNum].trashBy;
+
+            varNumDest = regArgTab[destReg].varNum;
+            noway_assert(varNumDest < compiler->lvaCount);
+            varDscDest = compiler->lvaTable + varNumDest;
+            noway_assert(varDscDest->lvIsParam && varDscDest->lvIsRegArg);
+
+            noway_assert(srcReg < argMax);
+            varNumSrc = regArgTab[srcReg].varNum;
+            noway_assert(varNumSrc < compiler->lvaCount);
+            varDscSrc = compiler->lvaTable + varNumSrc;
+            noway_assert(varDscSrc->lvIsParam && varDscSrc->lvIsRegArg);
+
+            emitAttr size = EA_PTRSIZE;
+
+#ifdef _TARGET_XARCH_
+            //
+            // The following code relies upon the target architecture having an
+            // 'xchg' instruction which directly swaps the values held in two registers.
+            // On the ARM architecture we do not have such an instruction.
+            //
+            if (destReg == regArgTab[srcReg].trashBy)
+            {
+                /* only 2 registers form the circular dependency - use "xchg" */
+
+                varNum = regArgTab[argNum].varNum;
+                noway_assert(varNum < compiler->lvaCount);
+                varDsc = compiler->lvaTable + varNum;
+                noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
+
+                noway_assert(genTypeSize(genActualType(varDscSrc->TypeGet())) <= REGSIZE_BYTES);
+
+                /* Set "size" to indicate GC if one and only one of
+                 * the operands is a pointer
+                 * RATIONALE: If both are pointers, nothing changes in
+                 * the GC pointer tracking. If only one is a pointer we
+                 * have to "swap" the registers in the GC reg pointer mask
+                 */
+
+                if (varTypeGCtype(varDscSrc->TypeGet()) != varTypeGCtype(varDscDest->TypeGet()))
+                {
+                    size = EA_GCREF;
+                }
+
+                noway_assert(varDscDest->lvArgReg == varDscSrc->lvRegNum);
+
+                getEmitter()->emitIns_R_R(INS_xchg, size, varDscSrc->lvRegNum, varDscSrc->lvArgReg);
+                regTracker.rsTrackRegTrash(varDscSrc->lvRegNum);
+                regTracker.rsTrackRegTrash(varDscSrc->lvArgReg);
+
+                /* mark both arguments as processed */
+                regArgTab[destReg].processed = true;
+                regArgTab[srcReg].processed  = true;
+
+                regArgMaskLive &= ~genRegMask(varDscSrc->lvArgReg);
+                regArgMaskLive &= ~genRegMask(varDscDest->lvArgReg);
+
+                psiMoveToReg(varNumSrc);
+                psiMoveToReg(varNumDest);
+            }
+            else
+#endif // _TARGET_XARCH_
+            {
+                var_types destMemType = varDscDest->TypeGet();
+
+#ifdef _TARGET_ARM_
+                bool cycleAllDouble = true; // assume the best
+
+                unsigned iter = begReg;
+                do
+                {
+                    if (compiler->lvaTable[regArgTab[iter].varNum].TypeGet() != TYP_DOUBLE)
+                    {
+                        cycleAllDouble = false;
+                        break;
+                    }
+                    iter = regArgTab[iter].trashBy;
+                } while (iter != begReg);
+
+                // We may treat doubles as floats for ARM because we could have partial circular
+                // dependencies of a float with a lo/hi part of the double. We mark the
+                // trashBy values for each slot of the double, so let the circular dependency
+                // logic work its way out for floats rather than doubles. If a cycle has all
+                // doubles, then optimize so that instead of two vmov.f32's to move a double,
+                // we can use one vmov.f64.
+                //
+                if (!cycleAllDouble && destMemType == TYP_DOUBLE)
+                {
+                    destMemType = TYP_FLOAT;
+                }
+#endif // _TARGET_ARM_
+
+                if (destMemType == TYP_REF)
+                {
+                    size = EA_GCREF;
+                }
+                else if (destMemType == TYP_BYREF)
+                {
+                    size = EA_BYREF;
+                }
+                else if (destMemType == TYP_DOUBLE)
+                {
+                    size = EA_8BYTE;
+                }
+                else if (destMemType == TYP_FLOAT)
+                {
+                    size = EA_4BYTE;
+                }
+
+                /* move the dest reg (begReg) in the extra reg */
+
+                assert(xtraReg != REG_NA);
+
+                regNumber begRegNum = genMapRegArgNumToRegNum(begReg, destMemType);
+
+                getEmitter()->emitIns_R_R(insCopy, size, xtraReg, begRegNum);
+
+                regTracker.rsTrackRegCopy(xtraReg, begRegNum);
+
+                *pXtraRegClobbered = true;
+
+                psiMoveToReg(varNumDest, xtraReg);
+
+                /* start moving everything to its right place */
+
+                while (srcReg != begReg)
+                {
+                    /* mov dest, src */
+
+                    regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType);
+                    regNumber srcRegNum  = genMapRegArgNumToRegNum(srcReg, destMemType);
+
+                    getEmitter()->emitIns_R_R(insCopy, size, destRegNum, srcRegNum);
+
+                    regTracker.rsTrackRegCopy(destRegNum, srcRegNum);
+
+                    /* mark 'src' as processed */
+                    noway_assert(srcReg < argMax);
+                    regArgTab[srcReg].processed = true;
+#ifdef _TARGET_ARM_
+                    if (size == EA_8BYTE)
+                        regArgTab[srcReg + 1].processed = true;
+#endif
+                    regArgMaskLive &= ~genMapArgNumToRegMask(srcReg, destMemType);
+
+                    /* move to the next pair */
+                    destReg = srcReg;
+                    srcReg  = regArgTab[srcReg].trashBy;
+
+                    varDscDest  = varDscSrc;
+                    destMemType = varDscDest->TypeGet();
+#ifdef _TARGET_ARM_
+                    if (!cycleAllDouble && destMemType == TYP_DOUBLE)
+                    {
+                        destMemType = TYP_FLOAT;
+                    }
+#endif
+                    varNumSrc = regArgTab[srcReg].varNum;
+                    noway_assert(varNumSrc < compiler->lvaCount);
+                    varDscSrc = compiler->lvaTable + varNumSrc;
+                    noway_assert(varDscSrc->lvIsParam && varDscSrc->lvIsRegArg);
+
+                    if (destMemType == TYP_REF)
+                    {
+                        size = EA_GCREF;
+                    }
+                    else if (destMemType == TYP_DOUBLE)
+                    {
+                        size = EA_8BYTE;
+                    }
+                    else
+                    {
+                        size = EA_4BYTE;
+                    }
+                }
+
+                /* take care of the beginning register */
+
+                noway_assert(srcReg == begReg);
+
+                /* move the dest reg (begReg) in the extra reg */
+
+                regNumber destRegNum = genMapRegArgNumToRegNum(destReg, destMemType);
+
+                getEmitter()->emitIns_R_R(insCopy, size, destRegNum, xtraReg);
+
+                regTracker.rsTrackRegCopy(destRegNum, xtraReg);
+
+                psiMoveToReg(varNumSrc);
+
+                /* mark the beginning register as processed */
+
+                regArgTab[srcReg].processed = true;
+#ifdef _TARGET_ARM_
+                if (size == EA_8BYTE)
+                    regArgTab[srcReg + 1].processed = true;
+#endif
+                regArgMaskLive &= ~genMapArgNumToRegMask(srcReg, destMemType);
+            }
+        }
+    }
+
+    /* Finally take care of the remaining arguments that must be enregistered */
+    while (regArgMaskLive)
+    {
+        regMaskTP regArgMaskLiveSave = regArgMaskLive;
+
+        for (argNum = 0; argNum < argMax; argNum++)
+        {
+            /* If already processed go to the next one */
+            if (regArgTab[argNum].processed)
+            {
+                continue;
+            }
+
+            if (regArgTab[argNum].slot == 0)
+            { // Not a register argument
+                continue;
+            }
+
+            varNum = regArgTab[argNum].varNum;
+            noway_assert(varNum < compiler->lvaCount);
+            varDsc            = compiler->lvaTable + varNum;
+            var_types regType = regArgTab[argNum].getRegType(compiler);
+            regNumber regNum  = genMapRegArgNumToRegNum(argNum, regType);
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+            if (regType == TYP_UNDEF)
+            {
+                // This could happen if the reg in regArgTab[argNum] is of the other register file -
+                // for System V register passed structs where the first reg is GPR and the second an XMM reg.
+                // The next register file processing will process it.
+                regArgMaskLive &= ~genRegMask(regNum);
+                continue;
+            }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+            noway_assert(varDsc->lvIsParam && varDsc->lvIsRegArg);
+#ifndef _TARGET_64BIT_
+#ifndef _TARGET_ARM_
+            // Right now we think that incoming arguments are not pointer sized.  When we eventually
+            // understand the calling convention, this still won't be true. But maybe we'll have a better
+            // idea of how to ignore it.
+
+            // On Arm, a long can be passed in register
+            noway_assert(genTypeSize(genActualType(varDsc->TypeGet())) == sizeof(void*));
+#endif
+#endif //_TARGET_64BIT_
+
+            noway_assert(varDsc->lvIsInReg() && !regArgTab[argNum].circular);
+
+            /* Register argument - hopefully it stays in the same register */
+            regNumber destRegNum  = REG_NA;
+            var_types destMemType = varDsc->TypeGet();
+
+            if (regArgTab[argNum].slot == 1)
+            {
+                destRegNum = varDsc->lvRegNum;
+
+#ifdef _TARGET_ARM_
+                if (genActualType(destMemType) == TYP_DOUBLE && regArgTab[argNum + 1].processed)
+                {
+                    // The second half of the double has already been processed! Treat this as a single.
+                    destMemType = TYP_FLOAT;
+                }
+#endif // _TARGET_ARM_
+            }
+#ifndef _TARGET_64BIT_
+            else if (regArgTab[argNum].slot == 2 && genActualType(destMemType) == TYP_LONG)
+            {
+#ifndef LEGACY_BACKEND
+                assert(genActualType(varDsc->TypeGet()) == TYP_LONG || genActualType(varDsc->TypeGet()) == TYP_DOUBLE);
+                if (genActualType(varDsc->TypeGet()) == TYP_DOUBLE)
+                {
+                    destRegNum = regNum;
+                }
+                else
+#endif // !LEGACY_BACKEND
+                    destRegNum = varDsc->lvOtherReg;
+
+                assert(destRegNum != REG_STK);
+            }
+            else
+            {
+                assert(regArgTab[argNum].slot == 2);
+                assert(destMemType == TYP_DOUBLE);
+
+                // For doubles, we move the entire double using the argNum representing
+                // the first half of the double. There are two things we won't do:
+                // (1) move the double when the 1st half of the destination is free but the
+                // 2nd half is occupied, and (2) move the double when the 2nd half of the
+                // destination is free but the 1st half is occupied. Here we consider the
+                // case where the first half can't be moved initially because its target is
+                // still busy, but the second half can be moved. We wait until the entire
+                // double can be moved, if possible. For example, we have F0/F1 double moving to F2/F3,
+                // and F2 single moving to F16. When we process F0, its target F2 is busy,
+                // so we skip it on the first pass. When we process F1, its target F3 is
+                // available. However, we want to move F0/F1 all at once, so we skip it here.
+                // We process F2, which frees up F2. The next pass through, we process F0 and
+                // F2/F3 are empty, so we move it. Note that if half of a double is involved
+                // in a circularity with a single, then we will have already moved that half
+                // above, so we go ahead and move the remaining half as a single.
+                // Because there are no circularities left, we are guaranteed to terminate.
+
+                assert(argNum > 0);
+                assert(regArgTab[argNum - 1].slot == 1);
+
+                if (!regArgTab[argNum - 1].processed)
+                {
+                    // The first half of the double hasn't been processed; try to be processed at the same time
+                    continue;
+                }
+
+                // The first half of the double has been processed but the second half hasn't!
+                // This could happen for double F2/F3 moving to F0/F1, and single F0 moving to F2.
+                // In that case, there is a F0/F2 loop that is not a double-only loop. The circular
+                // dependency logic above will move them as singles, leaving just F3 to move. Treat
+                // it as a single to finish the shuffling.
+
+                destMemType = TYP_FLOAT;
+                destRegNum  = REG_NEXT(varDsc->lvRegNum);
+            }
+#endif // !_TARGET_64BIT_
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
+            else
+            {
+                assert(regArgTab[argNum].slot == 2);
+                assert(argNum > 0);
+                assert(regArgTab[argNum - 1].slot == 1);
+                assert((varDsc->lvType == TYP_SIMD12) || (varDsc->lvType == TYP_SIMD16));
+                destRegNum = varDsc->lvRegNum;
+                noway_assert(regNum != destRegNum);
+                continue;
+            }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
+            noway_assert(destRegNum != REG_NA);
+            if (destRegNum != regNum)
+            {
+                /* Cannot trash a currently live register argument.
+                 * Skip this one until its target will be free
+                 * which is guaranteed to happen since we have no circular dependencies. */
+
+                regMaskTP destMask = genRegMask(destRegNum);
+#ifdef _TARGET_ARM_
+                // Don't process the double until both halves of the destination are clear.
+                if (genActualType(destMemType) == TYP_DOUBLE)
+                {
+                    assert((destMask & RBM_DBL_REGS) != 0);
+                    destMask |= genRegMask(REG_NEXT(destRegNum));
+                }
+#endif
+
+                if (destMask & regArgMaskLive)
+                {
+                    continue;
+                }
+
+                /* Move it to the new register */
+
+                emitAttr size = emitActualTypeSize(destMemType);
+
+                getEmitter()->emitIns_R_R(ins_Copy(destMemType), size, destRegNum, regNum);
+
+                psiMoveToReg(varNum);
+            }
+
+            /* mark the argument as processed */
+
+            assert(!regArgTab[argNum].processed);
+            regArgTab[argNum].processed = true;
+            regArgMaskLive &= ~genRegMask(regNum);
+#if FEATURE_MULTIREG_ARGS
+            int argRegCount = 1;
+#ifdef _TARGET_ARM_
+            if (genActualType(destMemType) == TYP_DOUBLE)
+            {
+                argRegCount = 2;
+            }
+#endif
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
+            if (varTypeIsStruct(varDsc) && argNum < (argMax - 1) && regArgTab[argNum + 1].slot == 2)
+            {
+                argRegCount          = 2;
+                int       nextArgNum = argNum + 1;
+                regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler));
+                noway_assert(regArgTab[nextArgNum].varNum == varNum);
+                // Emit a shufpd with a 0 immediate, which preserves the 0th element of the dest reg
+                // and moves the 0th element of the src reg into the 1st element of the dest reg.
+                getEmitter()->emitIns_R_R_I(INS_shufpd, emitActualTypeSize(varDsc->lvType), destRegNum, nextRegNum, 0);
+                // Set destRegNum to regNum so that we skip the setting of the register below,
+                // but mark argNum as processed and clear regNum from the live mask.
+                destRegNum = regNum;
+            }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
+            // Mark the rest of the argument registers corresponding to this multi-reg type as
+            // being processed and no longer live.
+            for (int regSlot = 1; regSlot < argRegCount; regSlot++)
+            {
+                int nextArgNum = argNum + regSlot;
+                assert(!regArgTab[nextArgNum].processed);
+                regArgTab[nextArgNum].processed = true;
+                regNumber nextRegNum = genMapRegArgNumToRegNum(nextArgNum, regArgTab[nextArgNum].getRegType(compiler));
+                regArgMaskLive &= ~genRegMask(nextRegNum);
+            }
+#endif // FEATURE_MULTIREG_ARGS
+        }
+
+        noway_assert(regArgMaskLiveSave != regArgMaskLive); // if it doesn't change, we have an infinite loop
+    }
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************
+ * If any incoming stack arguments live in registers, load them.
+ */
+void CodeGen::genEnregisterIncomingStackArgs()
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In genEnregisterIncomingStackArgs()\n");
+    }
+#endif
+
+    assert(compiler->compGeneratingProlog);
+
+    unsigned varNum = 0;
+
+    for (LclVarDsc *varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+    {
+        /* Is this variable a parameter? */
+
+        if (!varDsc->lvIsParam)
+        {
+            continue;
+        }
+
+        /* If it's a register argument then it's already been taken care of.
+           But, on Arm when under a profiler, we would have prespilled a register argument
+           and hence here we need to load it from its prespilled location.
+        */
+        bool isPrespilledForProfiling = false;
+#if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
+        isPrespilledForProfiling =
+            compiler->compIsProfilerHookNeeded() && compiler->lvaIsPreSpilled(varNum, regSet.rsMaskPreSpillRegs(false));
+#endif
+
+        if (varDsc->lvIsRegArg && !isPrespilledForProfiling)
+        {
+            continue;
+        }
+
+        /* Has the parameter been assigned to a register? */
+
+        if (!varDsc->lvIsInReg())
+        {
+            continue;
+        }
+
+        var_types type = genActualType(varDsc->TypeGet());
+
+#if FEATURE_STACK_FP_X87
+        // Floating point locals are loaded onto the x86-FPU in the next section
+        if (varTypeIsFloating(type))
+            continue;
+#endif
+
+        /* Is the variable dead on entry */
+
+        if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
+        {
+            continue;
+        }
+
+        /* Load the incoming parameter into the register */
+
+        /* Figure out the home offset of the incoming argument */
+
+        regNumber regNum;
+        regNumber otherReg;
+
+#ifndef LEGACY_BACKEND
+#ifdef _TARGET_ARM_
+        if (type == TYP_LONG)
+        {
+            regPairNo regPair = varDsc->lvArgInitRegPair;
+            regNum            = genRegPairLo(regPair);
+            otherReg          = genRegPairHi(regPair);
+        }
+        else
+#endif // _TARGET_ARM
+        {
+            regNum   = varDsc->lvArgInitReg;
+            otherReg = REG_NA;
+        }
+#else  // LEGACY_BACKEND
+        regNum = varDsc->lvRegNum;
+        if (type == TYP_LONG)
+        {
+            otherReg = varDsc->lvOtherReg;
+        }
+        else
+        {
+            otherReg = REG_NA;
+        }
+#endif // LEGACY_BACKEND
+
+        assert(regNum != REG_STK);
+
+#ifndef _TARGET_64BIT_
+        if (type == TYP_LONG)
+        {
+            /* long - at least the low half must be enregistered */
+
+            getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, regNum, varNum, 0);
+            regTracker.rsTrackRegTrash(regNum);
+
+            /* Is the upper half also enregistered? */
+
+            if (otherReg != REG_STK)
+            {
+                getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, otherReg, varNum, sizeof(int));
+                regTracker.rsTrackRegTrash(otherReg);
+            }
+        }
+        else
+#endif // _TARGET_64BIT_
+        {
+            /* Loading a single register - this is the easy/common case */
+
+            getEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), regNum, varNum, 0);
+            regTracker.rsTrackRegTrash(regNum);
+        }
+
+        psiMoveToReg(varNum);
+    }
+}
+
+/*-------------------------------------------------------------------------
+ *
+ *  We have to decide whether we're going to use block initialization
+ *  in the prolog before we assign final stack offsets. This is because
+ *  when using block initialization we may need additional callee-saved
+ *  registers which need to be saved on the frame, thus increasing the
+ *  frame size.
+ *
+ *  We'll count the number of locals we have to initialize,
+ *  and if there are lots of them we'll use block initialization.
+ *  Thus, the local variable table must have accurate register location
+ *  information for enregistered locals for their register state on entry
+ *  to the function.
+ *
+ *  At the same time we set lvMustInit for locals (enregistered or on stack)
+ *  that must be initialized (e.g. initialize memory (comInitMem),
+ *  untracked pointers or disable DFA)
+ */
+void CodeGen::genCheckUseBlockInit()
+{
+#ifndef LEGACY_BACKEND // this is called before codegen in RyuJIT backend
+    assert(!compiler->compGeneratingProlog);
+#else  // LEGACY_BACKEND
+    assert(compiler->compGeneratingProlog);
+#endif // LEGACY_BACKEND
+
+    unsigned initStkLclCnt = 0;  // The number of int-sized stack local variables that need to be initialized (variables
+                                 // larger than int count for more than 1).
+    unsigned largeGcStructs = 0; // The number of "large" structs with GC pointers. Used as part of the heuristic to
+                                 // determine whether to use block init.
+
+    unsigned   varNum;
+    LclVarDsc* varDsc;
+
+    for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+    {
+        if (varDsc->lvIsParam)
+        {
+            continue;
+        }
+
+        if (!varDsc->lvIsInReg() && !varDsc->lvOnFrame)
+        {
+            noway_assert(varDsc->lvRefCnt == 0);
+            continue;
+        }
+
+        if (varNum == compiler->lvaInlinedPInvokeFrameVar || varNum == compiler->lvaStubArgumentVar)
+        {
+            continue;
+        }
+
+#if FEATURE_FIXED_OUT_ARGS
+        if (varNum == compiler->lvaPInvokeFrameRegSaveVar)
+        {
+            continue;
+        }
+        if (varNum == compiler->lvaOutgoingArgSpaceVar)
+        {
+            continue;
+        }
+#endif
+
+#if FEATURE_EH_FUNCLETS
+        // There's no need to force 0-initialization of the PSPSym, it will be
+        // initialized with a real value in the prolog
+        if (varNum == compiler->lvaPSPSym)
+        {
+            continue;
+        }
+#endif
+
+        if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+        {
+            // For Compiler::PROMOTION_TYPE_DEPENDENT type of promotion, the whole struct should have been
+            // initialized by the parent struct. No need to set the lvMustInit bit in the
+            // field locals.
+            continue;
+        }
+
+        if (compiler->info.compInitMem || varTypeIsGC(varDsc->TypeGet()) || (varDsc->lvStructGcCount > 0) ||
+            varDsc->lvMustInit)
+        {
+            if (varDsc->lvTracked)
+            {
+                /* For uninitialized use of tracked variables, the liveness
+                 * will bubble to the top (compiler->fgFirstBB) in fgInterBlockLocalVarLiveness()
+                 */
+                if (varDsc->lvMustInit ||
+                    VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
+                {
+                    /* This var must be initialized */
+
+                    varDsc->lvMustInit = 1;
+
+                    /* See if the variable is on the stack will be initialized
+                     * using rep stos - compute the total size to be zero-ed */
+
+                    if (varDsc->lvOnFrame)
+                    {
+                        if (!varDsc->lvRegister)
+                        {
+#ifndef LEGACY_BACKEND
+                            if (!varDsc->lvIsInReg())
+#endif // !LEGACY_BACKEND
+                            {
+                                // Var is completely on the stack, in the legacy JIT case, or
+                                // on the stack at entry, in the RyuJIT case.
+                                initStkLclCnt += (unsigned)roundUp(compiler->lvaLclSize(varNum)) / sizeof(int);
+                            }
+                        }
+                        else
+                        {
+                            // Var is partially enregistered
+                            noway_assert(genTypeSize(varDsc->TypeGet()) > sizeof(int) && varDsc->lvOtherReg == REG_STK);
+                            initStkLclCnt += genTypeStSz(TYP_INT);
+                        }
+                    }
+                }
+            }
+
+            /* With compInitMem, all untracked vars will have to be init'ed */
+            /* VSW 102460 - Do not force initialization of compiler generated temps,
+                unless they are untracked GC type or structs that contain GC pointers */
+            CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if FEATURE_SIMD
+            // TODO-1stClassStructs
+            // This is here to duplicate previous behavior, where TYP_SIMD8 locals
+            // were not being re-typed correctly.
+            if ((!varDsc->lvTracked || (varDsc->lvType == TYP_STRUCT) || (varDsc->lvType == TYP_SIMD8)) &&
+#else  // !FEATURE_SIMD
+            if ((!varDsc->lvTracked || (varDsc->lvType == TYP_STRUCT)) &&
+#endif // !FEATURE_SIMD
+                varDsc->lvOnFrame &&
+                (!varDsc->lvIsTemp || varTypeIsGC(varDsc->TypeGet()) || (varDsc->lvStructGcCount > 0)))
+            {
+                varDsc->lvMustInit = true;
+
+                initStkLclCnt += (unsigned)roundUp(compiler->lvaLclSize(varNum)) / sizeof(int);
+            }
+
+            continue;
+        }
+
+        /* Ignore if not a pointer variable or value class with a GC field */
+
+        if (!compiler->lvaTypeIsGC(varNum))
+        {
+            continue;
+        }
+
+#if CAN_DISABLE_DFA
+        /* If we don't know lifetimes of variables, must be conservative */
+
+        if (compiler->opts.MinOpts())
+        {
+            varDsc->lvMustInit = true;
+            noway_assert(!varDsc->lvRegister);
+        }
+        else
+#endif // CAN_DISABLE_DFA
+        {
+            if (!varDsc->lvTracked)
+            {
+                varDsc->lvMustInit = true;
+            }
+        }
+
+        /* Is this a 'must-init' stack pointer local? */
+
+        if (varDsc->lvMustInit && varDsc->lvOnFrame)
+        {
+            initStkLclCnt += varDsc->lvStructGcCount;
+        }
+
+        if ((compiler->lvaLclSize(varNum) > (3 * sizeof(void*))) && (largeGcStructs <= 4))
+        {
+            largeGcStructs++;
+        }
+    }
+
+    /* Don't forget about spill temps that hold pointers */
+
+    if (!TRACK_GC_TEMP_LIFETIMES)
+    {
+        assert(compiler->tmpAllFree());
+        for (TempDsc* tempThis = compiler->tmpListBeg(); tempThis != nullptr; tempThis = compiler->tmpListNxt(tempThis))
+        {
+            if (varTypeIsGC(tempThis->tdTempType()))
+            {
+                initStkLclCnt++;
+            }
+        }
+    }
+
+    // After debugging this further it was found that this logic is incorrect:
+    // it incorrectly assumes the stack slots are always 4 bytes (not necessarily the case)
+    // and this also double counts variables (we saw this in the debugger) around line 4829.
+    // Even though this doesn't pose a problem with correctness it will improperly decide to
+    // zero init the stack using a block operation instead of a 'case by case' basis.
+    genInitStkLclCnt = initStkLclCnt;
+
+    /* If we have more than 4 untracked locals, use block initialization */
+    /* TODO-Review: If we have large structs, bias toward not using block initialization since
+       we waste all the other slots.  Really need to compute the correct
+       and compare that against zeroing the slots individually */
+
+    genUseBlockInit = (genInitStkLclCnt > (largeGcStructs + 4));
+
+    if (genUseBlockInit)
+    {
+        regMaskTP maskCalleeRegArgMask = intRegState.rsCalleeRegArgMaskLiveIn;
+
+        // If there is a secret stub param, don't count it, as it will no longer
+        // be live when we do block init.
+        if (compiler->info.compPublishStubParam)
+        {
+            maskCalleeRegArgMask &= ~RBM_SECRET_STUB_PARAM;
+        }
+
+#ifdef _TARGET_XARCH_
+        // If we're going to use "REP STOS", remember that we will trash EDI
+        // For fastcall we will have to save ECX, EAX
+        // so reserve two extra callee saved
+        // This is better than pushing eax, ecx, because we in the later
+        // we will mess up already computed offsets on the stack (for ESP frames)
+        regSet.rsSetRegsModified(RBM_EDI);
+
+#ifdef UNIX_AMD64_ABI
+        // For register arguments we may have to save ECX (and RDI on Amd64 System V OSes.)
+        // In such case use R12 and R13 registers.
+        if (maskCalleeRegArgMask & RBM_RCX)
+        {
+            regSet.rsSetRegsModified(RBM_R12);
+        }
+
+        if (maskCalleeRegArgMask & RBM_RDI)
+        {
+            regSet.rsSetRegsModified(RBM_R13);
+        }
+#else  // !UNIX_AMD64_ABI
+        if (maskCalleeRegArgMask & RBM_ECX)
+        {
+            regSet.rsSetRegsModified(RBM_ESI);
+        }
+#endif // !UNIX_AMD64_ABI
+
+        if (maskCalleeRegArgMask & RBM_EAX)
+        {
+            regSet.rsSetRegsModified(RBM_EBX);
+        }
+
+#endif // _TARGET_XARCH_
+#ifdef _TARGET_ARM_
+        //
+        // On the Arm if we are using a block init to initialize, then we
+        // must force spill R4/R5/R6 so that we can use them during
+        // zero-initialization process.
+        //
+        int forceSpillRegCount = genCountBits(maskCalleeRegArgMask & ~regSet.rsMaskPreSpillRegs(false)) - 1;
+        if (forceSpillRegCount > 0)
+            regSet.rsSetRegsModified(RBM_R4);
+        if (forceSpillRegCount > 1)
+            regSet.rsSetRegsModified(RBM_R5);
+        if (forceSpillRegCount > 2)
+            regSet.rsSetRegsModified(RBM_R6);
+#endif // _TARGET_ARM_
+    }
+}
+
+/*-----------------------------------------------------------------------------
+ *
+ *  Push any callee-saved registers we have used
+ */
+
+#if defined(_TARGET_ARM64_)
+void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed)
+#else
+void          CodeGen::genPushCalleeSavedRegisters()
+#endif
+{
+    assert(compiler->compGeneratingProlog);
+
+#if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+    // x86/x64 doesn't support push of xmm/ymm regs, therefore consider only integer registers for pushing onto stack
+    // here. Space for float registers to be preserved is stack allocated and saved as part of prolog sequence and not
+    // here.
+    regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_INT_CALLEE_SAVED;
+#else // !defined(_TARGET_XARCH_) || FEATURE_STACK_FP_X87
+    regMaskTP rsPushRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
+#endif
+
+#if ETW_EBP_FRAMED
+    if (!isFramePointerUsed() && regSet.rsRegsModified(RBM_FPBASE))
+    {
+        noway_assert(!"Used register RBM_FPBASE as a scratch register!");
+    }
+#endif
+
+#ifdef _TARGET_XARCH_
+    // On X86/X64 we have already pushed the FP (frame-pointer) prior to calling this method
+    if (isFramePointerUsed())
+    {
+        rsPushRegs &= ~RBM_FPBASE;
+    }
+#endif
+
+#ifdef _TARGET_ARMARCH_
+    // On ARM we push the FP (frame-pointer) here along with all other callee saved registers
+    if (isFramePointerUsed())
+        rsPushRegs |= RBM_FPBASE;
+
+    //
+    // It may be possible to skip pushing/popping lr for leaf methods. However, such optimization would require
+    // changes in GC suspension architecture.
+    //
+    // We would need to guarantee that a tight loop calling a virtual leaf method can be suspended for GC. Today, we
+    // generate partially interruptible code for both the method that contains the tight loop with the call and the leaf
+    // method. GC suspension depends on return address hijacking in this case. Return address hijacking depends
+    // on the return address to be saved on the stack. If we skipped pushing/popping lr, the return address would never
+    // be saved on the stack and the GC suspension would time out.
+    //
+    // So if we wanted to skip pushing pushing/popping lr for leaf frames, we would also need to do one of
+    // the following to make GC suspension work in the above scenario:
+    // - Make return address hijacking work even when lr is not saved on the stack.
+    // - Generate fully interruptible code for loops that contains calls
+    // - Generate fully interruptible code for leaf methods
+    //
+    // Given the limited benefit from this optimization (<10k for mscorlib NGen image), the extra complexity
+    // is not worth it.
+    //
+    rsPushRegs |= RBM_LR; // We must save the return address (in the LR register)
+
+    regSet.rsMaskCalleeSaved = rsPushRegs;
+#endif // _TARGET_ARMARCH_
+
+#ifdef DEBUG
+    if (compiler->compCalleeRegsPushed != genCountBits(rsPushRegs))
+    {
+        printf("Error: unexpected number of callee-saved registers to push. Expected: %d. Got: %d ",
+               compiler->compCalleeRegsPushed, genCountBits(rsPushRegs));
+        dspRegMask(rsPushRegs);
+        printf("\n");
+        assert(compiler->compCalleeRegsPushed == genCountBits(rsPushRegs));
+    }
+#endif // DEBUG
+
+#if defined(_TARGET_ARM_)
+    regMaskTP maskPushRegsFloat = rsPushRegs & RBM_ALLFLOAT;
+    regMaskTP maskPushRegsInt   = rsPushRegs & ~maskPushRegsFloat;
+
+    maskPushRegsInt |= genStackAllocRegisterMask(compiler->compLclFrameSize, maskPushRegsFloat);
+
+    assert(FitsIn<int>(maskPushRegsInt));
+    inst_IV(INS_push, (int)maskPushRegsInt);
+    compiler->unwindPushMaskInt(maskPushRegsInt);
+
+    if (maskPushRegsFloat != 0)
+    {
+        genPushFltRegs(maskPushRegsFloat);
+        compiler->unwindPushMaskFloat(maskPushRegsFloat);
+    }
+#elif defined(_TARGET_ARM64_)
+    // See the document "ARM64 JIT Frame Layout" and/or "ARM64 Exception Data" for more details or requirements and
+    // options. Case numbers in comments here refer to this document.
+    //
+    // For most frames, generate, e.g.:
+    //      stp fp,  lr,  [sp,-0x80]!   // predecrement SP with full frame size, and store FP/LR pair. Store pair
+    //                                  // ensures stack stays aligned.
+    //      stp r19, r20, [sp, 0x60]    // store at positive offset from SP established above, into callee-saved area
+    //                                  // at top of frame (highest addresses).
+    //      stp r21, r22, [sp, 0x70]
+    //
+    // Notes:
+    // 1. We don't always need to save FP. If FP isn't saved, then LR is saved with the other callee-saved registers
+    //    at the top of the frame.
+    // 2. If we save FP, then the first store is FP, LR.
+    // 3. General-purpose registers are 8 bytes, floating-point registers are 16 bytes, but FP/SIMD registers only
+    //    preserve their lower 8 bytes, by calling convention.
+    // 4. For frames with varargs, we spill the integer register arguments to the stack, so all the arguments are
+    //    consecutive.
+    // 5. We allocate the frame here; no further changes to SP are allowed (except in the body, for localloc).
+
+    int totalFrameSize = genTotalFrameSize();
+
+    int offset; // This will be the starting place for saving the callee-saved registers, in increasing order.
+
+    regMaskTP maskSaveRegsFloat = rsPushRegs & RBM_ALLFLOAT;
+    regMaskTP maskSaveRegsInt   = rsPushRegs & ~maskSaveRegsFloat;
+
+    if (compiler->info.compIsVarArgs)
+    {
+        assert(maskSaveRegsFloat == RBM_NONE);
+    }
+
+    int frameType = 0; // This number is arbitrary, is defined below, and corresponds to one of the frame styles we
+                       // generate based on various sizes.
+    int calleeSaveSPDelta          = 0;
+    int calleeSaveSPDeltaUnaligned = 0;
+
+    if (isFramePointerUsed())
+    {
+        // We need to save both FP and LR.
+
+        assert((maskSaveRegsInt & RBM_FP) != 0);
+        assert((maskSaveRegsInt & RBM_LR) != 0);
+
+        if ((compiler->lvaOutgoingArgSpaceSize == 0) && (totalFrameSize < 512))
+        {
+            // Case #1.
+            //
+            // Generate:
+            //      stp fp,lr,[sp,#-framesz]!
+            //
+            // The (totalFrameSize < 512) condition ensures that both the predecrement
+            //  and the postincrement of SP can occur with STP.
+            //
+            // After saving callee-saved registers, we establish the frame pointer with:
+            //      mov fp,sp
+            // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match.
+
+            frameType = 1;
+
+            getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -totalFrameSize,
+                                          INS_OPTS_PRE_INDEX);
+            compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize);
+
+            maskSaveRegsInt &= ~(RBM_FP | RBM_LR);                        // We've already saved FP/LR
+            offset = (int)compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR
+        }
+        else if (totalFrameSize <= 512)
+        {
+            // Case #2.
+            //
+            // Generate:
+            //      sub sp,sp,#framesz
+            //      stp fp,lr,[sp,#outsz]   // note that by necessity, #outsz <= #framesz - 16, so #outsz <= 496.
+            //
+            // The (totalFrameSize <= 512) condition ensures the callee-saved registers can all be saved using STP with
+            // signed offset encoding.
+            //
+            // After saving callee-saved registers, we establish the frame pointer with:
+            //      add fp,sp,#outsz
+            // We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match.
+
+            frameType = 2;
+
+            assert(compiler->lvaOutgoingArgSpaceSize + 2 * REGSIZE_BYTES <= (unsigned)totalFrameSize);
+
+            getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
+            compiler->unwindAllocStack(totalFrameSize);
+
+            getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
+                                          compiler->lvaOutgoingArgSpaceSize);
+            compiler->unwindSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize);
+
+            maskSaveRegsInt &= ~(RBM_FP | RBM_LR);                        // We've already saved FP/LR
+            offset = (int)compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR
+        }
+        else
+        {
+            // Case 5 or 6.
+            //
+            // First, the callee-saved registers will be saved, and the callee-saved register code must use pre-index
+            // to subtract from SP as the first instruction. It must also leave space for varargs registers to be
+            // stored. For example:
+            //      stp r19,r20,[sp,#-96]!
+            //      stp d8,d9,[sp,#16]
+            //      ... save varargs incoming integer registers ...
+            // Note that all SP alterations must be 16-byte aligned. We have already calculated any alignment to be
+            // lower on the stack than the callee-saved registers (see lvaAlignFrame() for how we calculate alignment).
+            // So, if there is an odd number of callee-saved registers, we use (for example, with just one saved
+            // register):
+            //      sub sp,sp,#16
+            //      str r19,[sp,#8]
+            // This is one additional instruction, but it centralizes the aligned space. Otherwise, it might be
+            // possible to have two 8-byte alignment padding words, one below the callee-saved registers, and one
+            // above them. If that is preferable, we could implement it.
+            // Note that any varargs saved space will always be 16-byte aligned, since there are 8 argument registers.
+            //
+            // Then, define #remainingFrameSz = #framesz - (callee-saved size + varargs space + possible alignment
+            // padding from above).
+            // Note that #remainingFrameSz must not be zero, since we still need to save FP,SP.
+            //
+            // Generate:
+            //      sub sp,sp,#remainingFrameSz
+            // or, for large frames:
+            //      mov rX, #remainingFrameSz // maybe multiple instructions
+            //      sub sp,sp,rX
+            //
+            // followed by:
+            //      stp fp,lr,[sp,#outsz]
+            //      add fp,sp,#outsz
+            //
+            // However, we need to handle the case where #outsz is larger than the constant signed offset encoding can
+            // handle. And, once again, we might need to deal with #outsz that is not aligned to 16-bytes (i.e.,
+            // STACK_ALIGN). So, in the case of large #outsz we will have an additional SP adjustment, using one of the
+            // following sequences:
+            //
+            // Define #remainingFrameSz2 = #remainingFrameSz - #outsz.
+            //
+            //      sub sp,sp,#remainingFrameSz2  // if #remainingFrameSz2 is 16-byte aligned
+            //      stp fp,lr,[sp]
+            //      mov fp,sp
+            //      sub sp,sp,#outsz    // in this case, #outsz must also be 16-byte aligned
+            //
+            // Or:
+            //
+            //      sub sp,sp,roundUp(#remainingFrameSz2,16)    // if #remainingFrameSz2 is not 16-byte aligned (it is
+            //                                                  // always guaranteed to be 8 byte aligned).
+            //      stp fp,lr,[sp,#8]                           // it will always be #8 in the unaligned case
+            //      add fp,sp,#8
+            //      sub sp,sp,#outsz - #8
+            //
+            // (As usual, for a large constant "#outsz - #8", we might need multiple instructions:
+            //      mov rX, #outsz - #8 // maybe multiple instructions
+            //      sub sp,sp,rX
+            // )
+
+            frameType = 3;
+
+            calleeSaveSPDeltaUnaligned =
+                totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; // 2 for FP, LR which we'll save later.
+            assert(calleeSaveSPDeltaUnaligned >= 0);
+            assert((calleeSaveSPDeltaUnaligned % 8) == 0); // It better at least be 8 byte aligned.
+            calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDeltaUnaligned, STACK_ALIGN);
+
+            offset = calleeSaveSPDelta - calleeSaveSPDeltaUnaligned;
+            assert((offset == 0) || (offset == REGSIZE_BYTES)); // At most one alignment slot between SP and where we
+                                                                // store the callee-saved registers.
+
+            // We'll take care of these later, but callee-saved regs code shouldn't see them.
+            maskSaveRegsInt &= ~(RBM_FP | RBM_LR);
+        }
+    }
+    else
+    {
+        // No frame pointer (no chaining).
+        assert((maskSaveRegsInt & RBM_FP) == 0);
+        assert((maskSaveRegsInt & RBM_LR) != 0);
+
+        // Note that there is no pre-indexed save_lrpair unwind code variant, so we can't allocate the frame using 'stp'
+        // if we only have one callee-saved register plus LR to save.
+
+        NYI("Frame without frame pointer");
+        offset = 0;
+    }
+
+    assert(frameType != 0);
+
+    genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, -calleeSaveSPDelta);
+
+    offset += genCountBits(maskSaveRegsInt | maskSaveRegsFloat) * REGSIZE_BYTES;
+
+    // For varargs, home the incoming arg registers last. Note that there is nothing to unwind here,
+    // so we just report "NOP" unwind codes. If there's no more frame setup after this, we don't
+    // need to add codes at all.
+
+    if (compiler->info.compIsVarArgs)
+    {
+        // There are 8 general-purpose registers to home, thus 'offset' must be 16-byte aligned here.
+        assert((offset % 16) == 0);
+        for (regNumber reg1 = REG_ARG_FIRST; reg1 < REG_ARG_LAST; reg1 = REG_NEXT(REG_NEXT(reg1)))
+        {
+            regNumber reg2 = REG_NEXT(reg1);
+            // stp REG, REG + 1, [SP, #offset]
+            getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, reg1, reg2, REG_SPBASE, offset);
+            compiler->unwindNop();
+            offset += 2 * REGSIZE_BYTES;
+        }
+    }
+
+    if (frameType == 1)
+    {
+        getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE);
+        compiler->unwindSetFrameReg(REG_FPBASE, 0);
+    }
+    else if (frameType == 2)
+    {
+        getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize);
+        compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
+    }
+    else if (frameType == 3)
+    {
+        int remainingFrameSz = totalFrameSize - calleeSaveSPDelta;
+        assert(remainingFrameSz > 0);
+        assert((remainingFrameSz % 16) == 0); // this is guaranteed to be 16-byte aligned because each component --
+                                              // totalFrameSize and calleeSaveSPDelta -- is 16-byte aligned.
+
+        if (compiler->lvaOutgoingArgSpaceSize >= 504)
+        {
+            // We can't do "stp fp,lr,[sp,#outsz]" because #outsz is too big.
+            // If compiler->lvaOutgoingArgSpaceSize is not aligned, we need to align the SP adjustment.
+            assert(remainingFrameSz > (int)compiler->lvaOutgoingArgSpaceSize);
+            int spAdjustment2Unaligned = remainingFrameSz - compiler->lvaOutgoingArgSpaceSize;
+            int spAdjustment2          = (int)roundUp((size_t)spAdjustment2Unaligned, STACK_ALIGN);
+            int alignmentAdjustment2   = spAdjustment2 - spAdjustment2Unaligned;
+            assert((alignmentAdjustment2 == 0) || (alignmentAdjustment2 == 8));
+
+            genPrologSaveRegPair(REG_FP, REG_LR, alignmentAdjustment2, -spAdjustment2, false, initReg, pInitRegZeroed);
+            offset += spAdjustment2;
+
+            // Now subtract off the #outsz (or the rest of the #outsz if it was unaligned, and the above "sub" included
+            // some of it)
+
+            int spAdjustment3 = compiler->lvaOutgoingArgSpaceSize - alignmentAdjustment2;
+            assert(spAdjustment3 > 0);
+            assert((spAdjustment3 % 16) == 0);
+
+            getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, alignmentAdjustment2);
+            compiler->unwindSetFrameReg(REG_FPBASE, alignmentAdjustment2);
+
+            genStackPointerAdjustment(-spAdjustment3, initReg, pInitRegZeroed);
+            offset += spAdjustment3;
+        }
+        else
+        {
+            genPrologSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize, -remainingFrameSz, false, initReg,
+                                 pInitRegZeroed);
+            offset += remainingFrameSz;
+
+            getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize);
+            compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
+        }
+    }
+
+    assert(offset == totalFrameSize);
+
+#elif defined(_TARGET_XARCH_)
+    // Push backwards so we match the order we will pop them in the epilog
+    // and all the other code that expects it to be in this order.
+    for (regNumber reg = REG_INT_LAST; rsPushRegs != RBM_NONE; reg = REG_PREV(reg))
+    {
+        regMaskTP regBit = genRegMask(reg);
+
+        if ((regBit & rsPushRegs) != 0)
+        {
+            inst_RV(INS_push, reg, TYP_REF);
+            compiler->unwindPush(reg);
+
+            if (!doubleAlignOrFramePointerUsed())
+            {
+                psiAdjustStackLevel(REGSIZE_BYTES);
+            }
+
+            rsPushRegs &= ~regBit;
+        }
+    }
+
+#else
+    assert(!"Unknown TARGET");
+#endif // _TARGET_*
+}
+
+/*-----------------------------------------------------------------------------
+ *
+ *  Probe the stack and allocate the local stack frame: subtract from SP.
+ *  On ARM64, this only does the probing; allocating the frame is done when callee-saved registers are saved.
+ */
+
+void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn)
+{
+    assert(compiler->compGeneratingProlog);
+
+    if (frameSize == 0)
+    {
+        return;
+    }
+
+    const size_t pageSize = compiler->eeGetPageSize();
+
+#ifdef _TARGET_ARM_
+    assert(!compiler->info.compPublishStubParam || (REG_SECRET_STUB_PARAM != initReg));
+#endif // _TARGET_ARM_
+
+#ifdef _TARGET_XARCH_
+    if (frameSize == REGSIZE_BYTES)
+    {
+        // Frame size is the same as register size.
+        inst_RV(INS_push, REG_EAX, TYP_I_IMPL);
+    }
+    else
+#endif // _TARGET_XARCH_
+        if (frameSize < pageSize)
+    {
+#ifndef _TARGET_ARM64_
+        // Frame size is (0x0008..0x1000)
+        inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE);
+#endif // !_TARGET_ARM64_
+    }
+    else if (frameSize < compiler->getVeryLargeFrameSize())
+    {
+        // Frame size is (0x1000..0x3000)
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if CPU_LOAD_STORE_ARCH
+        instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -(ssize_t)pageSize);
+        getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, initReg, REG_SPBASE, initReg);
+        regTracker.rsTrackRegTrash(initReg);
+        *pInitRegZeroed = false; // The initReg does not contain zero
+#else
+        getEmitter()->emitIns_AR_R(INS_TEST, EA_PTRSIZE, REG_EAX, REG_SPBASE, -(int)pageSize);
+#endif
+
+        if (frameSize >= 0x2000)
+        {
+#if CPU_LOAD_STORE_ARCH
+            instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -2 * (ssize_t)pageSize);
+            getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, initReg, REG_SPBASE, initReg);
+            regTracker.rsTrackRegTrash(initReg);
+#else
+            getEmitter()->emitIns_AR_R(INS_TEST, EA_PTRSIZE, REG_EAX, REG_SPBASE, -2 * (int)pageSize);
+#endif
+        }
+
+#ifdef _TARGET_ARM64_
+        compiler->unwindPadding();
+#else // !_TARGET_ARM64_
+#if CPU_LOAD_STORE_ARCH
+        instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, frameSize);
+        compiler->unwindPadding();
+        getEmitter()->emitIns_R_R_R(INS_sub, EA_4BYTE, REG_SPBASE, REG_SPBASE, initReg);
+#else
+        inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE);
+#endif
+#endif // !_TARGET_ARM64_
+    }
+    else
+    {
+        // Frame size >= 0x3000
+        assert(frameSize >= compiler->getVeryLargeFrameSize());
+
+        // Emit the following sequence to 'tickle' the pages.
+        // Note it is important that stack pointer not change until this is
+        // complete since the tickles could cause a stack overflow, and we
+        // need to be able to crawl the stack afterward (which means the
+        // stack pointer needs to be known).
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_XARCH_
+        bool pushedStubParam = false;
+        if (compiler->info.compPublishStubParam && (REG_SECRET_STUB_PARAM == initReg))
+        {
+            // push register containing the StubParam
+            inst_RV(INS_push, REG_SECRET_STUB_PARAM, TYP_I_IMPL);
+            pushedStubParam = true;
+        }
+#endif // !_TARGET_XARCH_
+
+        instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
+
+        //
+        // Can't have a label inside the ReJIT padding area
+        //
+        genPrologPadForReJit();
+
+#if CPU_LOAD_STORE_ARCH
+
+        // TODO-ARM64-Bug?: set the availMask properly!
+        regMaskTP availMask =
+            (regSet.rsGetModifiedRegsMask() & RBM_ALLINT) | RBM_R12 | RBM_LR; // Set of available registers
+        availMask &= ~maskArgRegsLiveIn;   // Remove all of the incoming argument registers as they are currently live
+        availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg
+
+        regNumber rOffset = initReg;
+        regNumber rLimit;
+        regNumber rTemp;
+        regMaskTP tempMask;
+
+        // We pick the next lowest register number for rTemp
+        noway_assert(availMask != RBM_NONE);
+        tempMask = genFindLowestBit(availMask);
+        rTemp    = genRegNumFromMask(tempMask);
+        availMask &= ~tempMask;
+
+        // We pick the next lowest register number for rLimit
+        noway_assert(availMask != RBM_NONE);
+        tempMask = genFindLowestBit(availMask);
+        rLimit   = genRegNumFromMask(tempMask);
+        availMask &= ~tempMask;
+
+        // TODO-LdStArch-Bug?: review this. The first time we load from [sp+0] which will always succeed. That doesn't
+        // make sense.
+        // TODO-ARM64-CQ: we could probably use ZR on ARM64 instead of rTemp.
+        //
+        //      mov rLimit, -frameSize
+        // loop:
+        //      ldr rTemp, [sp+rOffset]
+        //      sub rOffset, 0x1000     // Note that 0x1000 on ARM32 uses the funky Thumb immediate encoding
+        //      cmp rOffset, rLimit
+        //      jge loop
+        noway_assert((ssize_t)(int)frameSize == (ssize_t)frameSize); // make sure framesize safely fits within an int
+        instGen_Set_Reg_To_Imm(EA_PTRSIZE, rLimit, -(int)frameSize);
+        getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, rTemp, REG_SPBASE, rOffset);
+        regTracker.rsTrackRegTrash(rTemp);
+#if defined(_TARGET_ARM_)
+        getEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, rOffset, pageSize);
+#elif defined(_TARGET_ARM64_)
+        getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, rOffset, rOffset, pageSize);
+#endif // _TARGET_ARM64_
+        getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, rOffset, rLimit);
+        getEmitter()->emitIns_J(INS_bhi, NULL, -4);
+
+#else // !CPU_LOAD_STORE_ARCH
+
+        // Code size for each instruction. We need this because the
+        // backward branch is hard-coded with the number of bytes to branch.
+        // The encoding differs based on the architecture and what register is
+        // used (namely, using RAX has a smaller encoding).
+        //
+        // loop:
+        // For x86
+        //      test [esp + eax], eax       3
+        //      sub eax, 0x1000             5
+        //      cmp EAX, -frameSize         5
+        //      jge loop                    2
+        //
+        // For AMD64 using RAX
+        //      test [rsp + rax], rax       4
+        //      sub rax, 0x1000             6
+        //      cmp rax, -frameSize         6
+        //      jge loop                    2
+        //
+        // For AMD64 using RBP
+        //      test [rsp + rbp], rbp       4
+        //      sub rbp, 0x1000             7
+        //      cmp rbp, -frameSize         7
+        //      jge loop                    2
+
+        getEmitter()->emitIns_R_ARR(INS_TEST, EA_PTRSIZE, initReg, REG_SPBASE, initReg, 0);
+        inst_RV_IV(INS_sub, initReg, pageSize, EA_PTRSIZE);
+        inst_RV_IV(INS_cmp, initReg, -((ssize_t)frameSize), EA_PTRSIZE);
+
+        int bytesForBackwardJump;
+#ifdef _TARGET_AMD64_
+        assert((initReg == REG_EAX) || (initReg == REG_EBP)); // We use RBP as initReg for EH funclets.
+        bytesForBackwardJump = ((initReg == REG_EAX) ? -18 : -20);
+#else  // !_TARGET_AMD64_
+        assert(initReg == REG_EAX);
+        bytesForBackwardJump = -15;
+#endif // !_TARGET_AMD64_
+
+        inst_IV(INS_jge, bytesForBackwardJump); // Branch backwards to start of loop
+
+#endif // !CPU_LOAD_STORE_ARCH
+
+        *pInitRegZeroed = false; // The initReg does not contain zero
+
+#ifdef _TARGET_XARCH_
+        if (pushedStubParam)
+        {
+            // pop eax
+            inst_RV(INS_pop, REG_SECRET_STUB_PARAM, TYP_I_IMPL);
+            regTracker.rsTrackRegTrash(REG_SECRET_STUB_PARAM);
+        }
+#endif // _TARGET_XARCH_
+
+#if CPU_LOAD_STORE_ARCH
+        compiler->unwindPadding();
+#endif
+
+#if CPU_LOAD_STORE_ARCH
+#ifndef _TARGET_ARM64_
+        inst_RV_RV(INS_add, REG_SPBASE, rLimit, TYP_I_IMPL);
+#endif // !_TARGET_ARM64_
+#else
+        //      sub esp, frameSize   6
+        inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE);
+#endif
+    }
+
+#ifndef _TARGET_ARM64_
+    compiler->unwindAllocStack(frameSize);
+
+    if (!doubleAlignOrFramePointerUsed())
+    {
+        psiAdjustStackLevel(frameSize);
+    }
+#endif // !_TARGET_ARM64_
+}
+
+#if defined(_TARGET_ARM_)
+
+void CodeGen::genPushFltRegs(regMaskTP regMask)
+{
+    assert(regMask != 0);                        // Don't call uness we have some registers to push
+    assert((regMask & RBM_ALLFLOAT) == regMask); // Only floasting point registers should be in regMask
+
+    regNumber lowReg = genRegNumFromMask(genFindLowestBit(regMask));
+    int       slots  = genCountBits(regMask);
+    // regMask should be contiguously set
+    regMaskTP tmpMask = ((regMask >> lowReg) + 1); // tmpMask should have a single bit set
+    assert((tmpMask & (tmpMask - 1)) == 0);
+    assert(lowReg == REG_F16); // Currently we expect to start at F16 in the unwind codes
+
+    // Our calling convention requires that we only use vpush for TYP_DOUBLE registers
+    noway_assert(floatRegCanHoldType(lowReg, TYP_DOUBLE));
+    noway_assert((slots % 2) == 0);
+
+    getEmitter()->emitIns_R_I(INS_vpush, EA_8BYTE, lowReg, slots / 2);
+}
+
+void CodeGen::genPopFltRegs(regMaskTP regMask)
+{
+    assert(regMask != 0);                        // Don't call uness we have some registers to pop
+    assert((regMask & RBM_ALLFLOAT) == regMask); // Only floasting point registers should be in regMask
+
+    regNumber lowReg = genRegNumFromMask(genFindLowestBit(regMask));
+    int       slots  = genCountBits(regMask);
+    // regMask should be contiguously set
+    regMaskTP tmpMask = ((regMask >> lowReg) + 1); // tmpMask should have a single bit set
+    assert((tmpMask & (tmpMask - 1)) == 0);
+
+    // Our calling convention requires that we only use vpop for TYP_DOUBLE registers
+    noway_assert(floatRegCanHoldType(lowReg, TYP_DOUBLE));
+    noway_assert((slots % 2) == 0);
+
+    getEmitter()->emitIns_R_I(INS_vpop, EA_8BYTE, lowReg, slots / 2);
+}
+
+/*-----------------------------------------------------------------------------
+ *
+ *  If we have a jmp call, then the argument registers cannot be used in the
+ *  epilog. So return the current call's argument registers as the argument
+ *  registers for the jmp call.
+ */
+regMaskTP CodeGen::genJmpCallArgMask()
+{
+    assert(compiler->compGeneratingEpilog);
+
+    regMaskTP argMask = RBM_NONE;
+    for (unsigned varNum = 0; varNum < compiler->info.compArgsCount; ++varNum)
+    {
+        const LclVarDsc& desc = compiler->lvaTable[varNum];
+        if (desc.lvIsRegArg)
+        {
+            argMask |= genRegMask(desc.lvArgReg);
+        }
+    }
+    return argMask;
+}
+
+/*-----------------------------------------------------------------------------
+ *
+ *  Free the local stack frame: add to SP.
+ *  If epilog unwind hasn't been started, and we generate code, we start unwind
+ *  and set *pUnwindStarted = true.
+ */
+
+void CodeGen::genFreeLclFrame(unsigned frameSize, /* IN OUT */ bool* pUnwindStarted, bool jmpEpilog)
+{
+    assert(compiler->compGeneratingEpilog);
+
+    if (frameSize == 0)
+        return;
+
+    // Add 'frameSize' to SP.
+    //
+    // Unfortunately, we can't just use:
+    //
+    //      inst_RV_IV(INS_add, REG_SPBASE, frameSize, EA_PTRSIZE);
+    //
+    // because we need to generate proper unwind codes for each instruction generated,
+    // and large frame sizes might generate a temp register load which might
+    // need an unwind code. We don't want to generate a "NOP" code for this
+    // temp register load; we want the unwind codes to start after that.
+
+    if (arm_Valid_Imm_For_Instr(INS_add, frameSize, INS_FLAGS_DONT_CARE))
+    {
+        if (!*pUnwindStarted)
+        {
+            compiler->unwindBegEpilog();
+            *pUnwindStarted = true;
+        }
+
+        getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, frameSize, INS_FLAGS_DONT_CARE);
+    }
+    else
+    {
+        regMaskTP grabMask = RBM_INT_CALLEE_TRASH;
+        if (jmpEpilog)
+        {
+            // Do not use argument registers as scratch registers in the jmp epilog.
+            grabMask &= ~genJmpCallArgMask();
+        }
+#ifndef LEGACY_BACKEND
+        regNumber tmpReg;
+        tmpReg = REG_TMP_0;
+#else  // LEGACY_BACKEND
+        regNumber tmpReg = regSet.rsGrabReg(grabMask);
+#endif // LEGACY_BACKEND
+        instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, frameSize);
+        if (*pUnwindStarted)
+        {
+            compiler->unwindPadding();
+        }
+
+        // We're going to generate an unwindable instruction, so check again if
+        // we need to start the unwind codes.
+
+        if (!*pUnwindStarted)
+        {
+            compiler->unwindBegEpilog();
+            *pUnwindStarted = true;
+        }
+
+        getEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, REG_SPBASE, tmpReg, INS_FLAGS_DONT_CARE);
+    }
+
+    compiler->unwindAllocStack(frameSize);
+}
+
+/*-----------------------------------------------------------------------------
+ *
+ *  Returns register mask to push/pop to allocate a small stack frame,
+ *  instead of using "sub sp" / "add sp". Returns RBM_NONE if either frame size
+ *  is zero, or if we should use "sub sp" / "add sp" instead of push/pop.
+ */
+regMaskTP CodeGen::genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskCalleeSavedFloat)
+{
+    assert(compiler->compGeneratingProlog || compiler->compGeneratingEpilog);
+
+    // We can't do this optimization with callee saved floating point registers because
+    // the stack would be allocated in a wrong spot.
+    if (maskCalleeSavedFloat != RBM_NONE)
+        return RBM_NONE;
+
+    // Allocate space for small frames by pushing extra registers. It generates smaller and faster code
+    // that extra sub sp,XXX/add sp,XXX.
+    // R0 and R1 may be used by return value. Keep things simple and just skip the optimization
+    // for the 3*REGSIZE_BYTES and 4*REGSIZE_BYTES cases. They are less common and they have more
+    // significant negative side-effects (more memory bus traffic).
+    switch (frameSize)
+    {
+        case REGSIZE_BYTES:
+            return RBM_R3;
+        case 2 * REGSIZE_BYTES:
+            return RBM_R2 | RBM_R3;
+        default:
+            return RBM_NONE;
+    }
+}
+
+#endif // _TARGET_ARM_
+
+#if !FEATURE_STACK_FP_X87
+
+/*****************************************************************************
+ *
+ *  initFltRegs -- The mask of float regs to be zeroed.
+ *  initDblRegs -- The mask of double regs to be zeroed.
+ *  initReg -- A zero initialized integer reg to copy from.
+ *
+ *  Does best effort to move between VFP/xmm regs if one is already
+ *  initialized to 0. (Arm Only) Else copies from the integer register which
+ *  is slower.
+ */
+void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& initDblRegs, const regNumber& initReg)
+{
+    assert(compiler->compGeneratingProlog);
+
+    // The first float/double reg that is initialized to 0. So they can be used to
+    // initialize the remaining registers.
+    regNumber fltInitReg = REG_NA;
+    regNumber dblInitReg = REG_NA;
+
+    // Iterate through float/double registers and initialize them to 0 or
+    // copy from already initialized register of the same type.
+    regMaskTP regMask = genRegMask(REG_FP_FIRST);
+    for (regNumber reg = REG_FP_FIRST; reg <= REG_FP_LAST; reg = REG_NEXT(reg), regMask <<= 1)
+    {
+        if (regMask & initFltRegs)
+        {
+            // Do we have a float register already set to 0?
+            if (fltInitReg != REG_NA)
+            {
+                // Copy from float.
+                inst_RV_RV(ins_Copy(TYP_FLOAT), reg, fltInitReg, TYP_FLOAT);
+            }
+            else
+            {
+#ifdef _TARGET_ARM_
+                // Do we have a double register initialized to 0?
+                if (dblInitReg != REG_NA)
+                {
+                    // Copy from double.
+                    inst_RV_RV(INS_vcvt_d2f, reg, dblInitReg, TYP_FLOAT);
+                }
+                else
+                {
+                    // Copy from int.
+                    inst_RV_RV(INS_vmov_i2f, reg, initReg, TYP_FLOAT, EA_4BYTE);
+                }
+#elif defined(_TARGET_XARCH_)
+                // Xorpd xmmreg, xmmreg is the fastest way to initialize a float register to
+                // zero instead of moving constant 0.0f.  Though we just need to initialize just the 32-bits
+                // we will use xorpd to initialize 64-bits of the xmm register so that it can be
+                // used to zero initialize xmm registers that hold double values.
+                inst_RV_RV(INS_xorpd, reg, reg, TYP_DOUBLE);
+                dblInitReg = reg;
+#elif defined(_TARGET_ARM64_)
+                NYI("Initialize floating-point register to zero");
+#else // _TARGET_*
+#error Unsupported or unset target architecture
+#endif
+                fltInitReg = reg;
+            }
+        }
+        else if (regMask & initDblRegs)
+        {
+            // Do we have a double register already set to 0?
+            if (dblInitReg != REG_NA)
+            {
+                // Copy from double.
+                inst_RV_RV(ins_Copy(TYP_DOUBLE), reg, dblInitReg, TYP_DOUBLE);
+            }
+            else
+            {
+#ifdef _TARGET_ARM_
+                // Do we have a float register initialized to 0?
+                if (fltInitReg != REG_NA)
+                {
+                    // Copy from float.
+                    inst_RV_RV(INS_vcvt_f2d, reg, fltInitReg, TYP_DOUBLE);
+                }
+                else
+                {
+                    // Copy from int.
+                    inst_RV_RV_RV(INS_vmov_i2d, reg, initReg, initReg, EA_8BYTE);
+                }
+#elif defined(_TARGET_XARCH_)
+                // Xorpd xmmreg, xmmreg is the fastest way to initialize a double register to
+                // zero than moving constant 0.0d.  We can also use lower 32-bits of 'reg'
+                // for zero initializing xmm registers subsequently that contain float values.
+                inst_RV_RV(INS_xorpd, reg, reg, TYP_DOUBLE);
+                fltInitReg = reg;
+#elif defined(_TARGET_ARM64_)
+                // We will just zero out the entire vector register. This sets it to a double zero value
+                getEmitter()->emitIns_R_I(INS_movi, EA_16BYTE, reg, 0x00, INS_OPTS_16B);
+#else // _TARGET_*
+#error Unsupported or unset target architecture
+#endif
+                dblInitReg = reg;
+            }
+        }
+    }
+}
+#endif // !FEATURE_STACK_FP_X87
+
+/*-----------------------------------------------------------------------------
+ *
+ *  Restore any callee-saved registers we have used
+ */
+
+#if defined(_TARGET_ARM_)
+
+bool CodeGen::genCanUsePopToReturn(regMaskTP maskPopRegsInt, bool jmpEpilog)
+{
+    assert(compiler->compGeneratingEpilog);
+
+    if (!jmpEpilog && regSet.rsMaskPreSpillRegs(true) == RBM_NONE)
+        return true;
+    else
+        return false;
+}
+
+void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
+{
+    assert(compiler->compGeneratingEpilog);
+
+    regMaskTP maskPopRegs      = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
+    regMaskTP maskPopRegsFloat = maskPopRegs & RBM_ALLFLOAT;
+    regMaskTP maskPopRegsInt   = maskPopRegs & ~maskPopRegsFloat;
+
+    // First, pop float registers
+
+    if (maskPopRegsFloat != RBM_NONE)
+    {
+        genPopFltRegs(maskPopRegsFloat);
+        compiler->unwindPopMaskFloat(maskPopRegsFloat);
+    }
+
+    // Next, pop integer registers
+
+    if (!jmpEpilog)
+    {
+        regMaskTP maskStackAlloc = genStackAllocRegisterMask(compiler->compLclFrameSize, maskPopRegsFloat);
+        maskPopRegsInt |= maskStackAlloc;
+    }
+
+    if (isFramePointerUsed())
+    {
+        assert(!regSet.rsRegsModified(RBM_FPBASE));
+        maskPopRegsInt |= RBM_FPBASE;
+    }
+
+    if (genCanUsePopToReturn(maskPopRegsInt, jmpEpilog))
+    {
+        maskPopRegsInt |= RBM_PC;
+        // Record the fact that we use a pop to the PC to perform the return
+        genUsedPopToReturn = true;
+    }
+    else
+    {
+        maskPopRegsInt |= RBM_LR;
+        // Record the fact that we did not use a pop to the PC to perform the return
+        genUsedPopToReturn = false;
+    }
+
+    assert(FitsIn<int>(maskPopRegsInt));
+    inst_IV(INS_pop, (int)maskPopRegsInt);
+    compiler->unwindPopMaskInt(maskPopRegsInt);
+}
+
+#elif defined(_TARGET_ARM64_)
+
+void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog)
+{
+    assert(compiler->compGeneratingEpilog);
+
+    regMaskTP rsRestoreRegs = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
+
+    if (isFramePointerUsed())
+    {
+        rsRestoreRegs |= RBM_FPBASE;
+    }
+
+    rsRestoreRegs |= RBM_LR; // We must save/restore the return address (in the LR register)
+
+    regMaskTP regsToRestoreMask = rsRestoreRegs;
+
+    int totalFrameSize = genTotalFrameSize();
+
+    int calleeSaveSPOffset; // This will be the starting place for restoring the callee-saved registers, in decreasing
+                            // order.
+    int frameType                  = 0; // An indicator of what type of frame we are popping.
+    int calleeSaveSPDelta          = 0;
+    int calleeSaveSPDeltaUnaligned = 0;
+
+    if (isFramePointerUsed())
+    {
+        if ((compiler->lvaOutgoingArgSpaceSize == 0) && (totalFrameSize < 512))
+        {
+            frameType = 1;
+            if (compiler->compLocallocUsed)
+            {
+                // Restore sp from fp
+                //      mov sp, fp
+                inst_RV_RV(INS_mov, REG_SPBASE, REG_FPBASE);
+                compiler->unwindSetFrameReg(REG_FPBASE, 0);
+            }
+
+            regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP.
+
+            // Compute callee save SP offset which is at the top of local frame while the FP/LR is saved at the bottom
+            // of stack.
+            calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES;
+        }
+        else if (totalFrameSize <= 512)
+        {
+            frameType = 2;
+            if (compiler->compLocallocUsed)
+            {
+                // Restore sp from fp
+                //      sub sp, fp, #outsz
+                getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE,
+                                            compiler->lvaOutgoingArgSpaceSize);
+                compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
+            }
+
+            regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP.
+
+            // Compute callee save SP offset which is at the top of local frame while the FP/LR is saved at the bottom
+            // of stack.
+            calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES;
+        }
+        else
+        {
+            frameType = 3;
+
+            calleeSaveSPDeltaUnaligned = totalFrameSize - compiler->compLclFrameSize -
+                                         2 * REGSIZE_BYTES; // 2 for FP, LR which we'll restore later.
+            assert(calleeSaveSPDeltaUnaligned >= 0);
+            assert((calleeSaveSPDeltaUnaligned % 8) == 0); // It better at least be 8 byte aligned.
+            calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDeltaUnaligned, STACK_ALIGN);
+
+            regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and (hopefully) post-index SP.
+
+            int remainingFrameSz = totalFrameSize - calleeSaveSPDelta;
+            assert(remainingFrameSz > 0);
+
+            if (compiler->lvaOutgoingArgSpaceSize >= 504)
+            {
+                // We can't do "ldp fp,lr,[sp,#outsz]" because #outsz is too big.
+                // If compiler->lvaOutgoingArgSpaceSize is not aligned, we need to align the SP adjustment.
+                assert(remainingFrameSz > (int)compiler->lvaOutgoingArgSpaceSize);
+                int spAdjustment2Unaligned = remainingFrameSz - compiler->lvaOutgoingArgSpaceSize;
+                int spAdjustment2          = (int)roundUp((size_t)spAdjustment2Unaligned, STACK_ALIGN);
+                int alignmentAdjustment2   = spAdjustment2 - spAdjustment2Unaligned;
+                assert((alignmentAdjustment2 == 0) || (alignmentAdjustment2 == REGSIZE_BYTES));
+
+                if (compiler->compLocallocUsed)
+                {
+                    // Restore sp from fp. No need to update sp after this since we've set up fp before adjusting sp in
+                    // prolog.
+                    //      sub sp, fp, #alignmentAdjustment2
+                    getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, alignmentAdjustment2);
+                    compiler->unwindSetFrameReg(REG_FPBASE, alignmentAdjustment2);
+                }
+                else
+                {
+                    // Generate:
+                    //      add sp,sp,#outsz                ; if #outsz is not 16-byte aligned, we need to be more
+                    //                                      ; careful
+                    int spAdjustment3 = compiler->lvaOutgoingArgSpaceSize - alignmentAdjustment2;
+                    assert(spAdjustment3 > 0);
+                    assert((spAdjustment3 % 16) == 0);
+                    genStackPointerAdjustment(spAdjustment3, REG_IP0, nullptr);
+                }
+
+                // Generate:
+                //      ldp fp,lr,[sp]
+                //      add sp,sp,#remainingFrameSz
+                genEpilogRestoreRegPair(REG_FP, REG_LR, alignmentAdjustment2, spAdjustment2, REG_IP0, nullptr);
+            }
+            else
+            {
+                if (compiler->compLocallocUsed)
+                {
+                    // Restore sp from fp
+                    //      sub sp, fp, #outsz
+                    getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE,
+                                                compiler->lvaOutgoingArgSpaceSize);
+                    compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
+                }
+
+                // Generate:
+                //      ldp fp,lr,[sp,#outsz]
+                //      add sp,sp,#remainingFrameSz     ; might need to load this constant in a scratch register if
+                //                                      ; it's large
+
+                genEpilogRestoreRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize, remainingFrameSz, REG_IP0,
+                                        nullptr);
+            }
+
+            // Unlike frameType=1 or frameType=2 that restore SP at the end,
+            // frameType=3 already adjusted SP above to delete local frame.
+            // There is at most one alignment slot between SP and where we store the callee-saved registers.
+            calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPDeltaUnaligned;
+            assert((calleeSaveSPOffset == 0) || (calleeSaveSPOffset == REGSIZE_BYTES));
+        }
+    }
+    else
+    {
+        // No frame pointer (no chaining).
+        NYI("Frame without frame pointer");
+        calleeSaveSPOffset = 0;
+    }
+
+    genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, calleeSaveSPOffset, calleeSaveSPDelta);
+
+    if (frameType == 1)
+    {
+        // Generate:
+        //      ldp fp,lr,[sp],#framesz
+
+        getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, totalFrameSize,
+                                      INS_OPTS_POST_INDEX);
+        compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize);
+    }
+    else if (frameType == 2)
+    {
+        // Generate:
+        //      ldr fp,lr,[sp,#outsz]
+        //      add sp,sp,#framesz
+
+        getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
+                                      compiler->lvaOutgoingArgSpaceSize);
+        compiler->unwindSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize);
+
+        getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
+        compiler->unwindAllocStack(totalFrameSize);
+    }
+    else if (frameType == 3)
+    {
+        // Nothing to do after restoring callee-saved registers.
+    }
+    else
+    {
+        unreached();
+    }
+}
+
+#elif defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+
+void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
+{
+    assert(compiler->compGeneratingEpilog);
+
+    unsigned popCount = 0;
+    if (regSet.rsRegsModified(RBM_EBX))
+    {
+        popCount++;
+        inst_RV(INS_pop, REG_EBX, TYP_I_IMPL);
+    }
+    if (regSet.rsRegsModified(RBM_FPBASE))
+    {
+        // EBP cannot be directly modified for EBP frame and double-aligned frames
+        assert(!doubleAlignOrFramePointerUsed());
+
+        popCount++;
+        inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
+    }
+
+#ifndef UNIX_AMD64_ABI
+    // For System V AMD64 calling convention ESI and EDI are volatile registers.
+    if (regSet.rsRegsModified(RBM_ESI))
+    {
+        popCount++;
+        inst_RV(INS_pop, REG_ESI, TYP_I_IMPL);
+    }
+    if (regSet.rsRegsModified(RBM_EDI))
+    {
+        popCount++;
+        inst_RV(INS_pop, REG_EDI, TYP_I_IMPL);
+    }
+#endif // !defined(UNIX_AMD64_ABI)
+
+#ifdef _TARGET_AMD64_
+    if (regSet.rsRegsModified(RBM_R12))
+    {
+        popCount++;
+        inst_RV(INS_pop, REG_R12, TYP_I_IMPL);
+    }
+    if (regSet.rsRegsModified(RBM_R13))
+    {
+        popCount++;
+        inst_RV(INS_pop, REG_R13, TYP_I_IMPL);
+    }
+    if (regSet.rsRegsModified(RBM_R14))
+    {
+        popCount++;
+        inst_RV(INS_pop, REG_R14, TYP_I_IMPL);
+    }
+    if (regSet.rsRegsModified(RBM_R15))
+    {
+        popCount++;
+        inst_RV(INS_pop, REG_R15, TYP_I_IMPL);
+    }
+#endif // _TARGET_AMD64_
+
+    // Amd64/x86 doesn't support push/pop of xmm registers.
+    // These will get saved to stack separately after allocating
+    // space on stack in prolog sequence.  PopCount is essentially
+    // tracking the count of integer registers pushed.
+
+    noway_assert(compiler->compCalleeRegsPushed == popCount);
+}
+
+#elif defined(_TARGET_X86_)
+
+void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog)
+{
+    assert(compiler->compGeneratingEpilog);
+
+    unsigned popCount = 0;
+
+    /*  NOTE:   The EBP-less frame code below depends on the fact that
+                all of the pops are generated right at the start and
+                each takes one byte of machine code.
+     */
+
+    if (regSet.rsRegsModified(RBM_FPBASE))
+    {
+        // EBP cannot be directly modified for EBP frame and double-aligned frames
+        noway_assert(!doubleAlignOrFramePointerUsed());
+
+        inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
+        popCount++;
+    }
+    if (regSet.rsRegsModified(RBM_EBX))
+    {
+        popCount++;
+        inst_RV(INS_pop, REG_EBX, TYP_I_IMPL);
+    }
+    if (regSet.rsRegsModified(RBM_ESI))
+    {
+        popCount++;
+        inst_RV(INS_pop, REG_ESI, TYP_I_IMPL);
+    }
+    if (regSet.rsRegsModified(RBM_EDI))
+    {
+        popCount++;
+        inst_RV(INS_pop, REG_EDI, TYP_I_IMPL);
+    }
+    noway_assert(compiler->compCalleeRegsPushed == popCount);
+}
+
+#endif // _TARGET_*
+
+// We need a register with value zero. Zero the initReg, if necessary, and set *pInitRegZeroed if so.
+// Return the register to use. On ARM64, we never touch the initReg, and always just return REG_ZR.
+regNumber CodeGen::genGetZeroReg(regNumber initReg, bool* pInitRegZeroed)
+{
+#ifdef _TARGET_ARM64_
+    return REG_ZR;
+#else  // !_TARGET_ARM64_
+    if (*pInitRegZeroed == false)
+    {
+        instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
+        *pInitRegZeroed = true;
+    }
+    return initReg;
+#endif // !_TARGET_ARM64_
+}
+
+/*-----------------------------------------------------------------------------
+ *
+ * Do we have any untracked pointer locals at all,
+ * or do we need to initialize memory for locspace?
+ *
+ * untrLclHi      - (Untracked locals High-Offset)   The upper bound offset at which the zero init code will end
+ * initializing memory (not inclusive).
+ * untrLclLo      - (Untracked locals Low-Offset)    The lower bound at which the zero init code will start zero
+ * initializing memory.
+ * initReg        - A scratch register (that gets set to zero on some platforms).
+ * pInitRegZeroed - Sets a flag that tells the callee whether or not the initReg register got zeroed.
+ */
+void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg, bool* pInitRegZeroed)
+{
+    assert(compiler->compGeneratingProlog);
+
+    if (genUseBlockInit)
+    {
+        assert(untrLclHi > untrLclLo);
+#ifdef _TARGET_ARMARCH_
+        /*
+            Generate the following code:
+
+            For cnt less than 10
+
+                mov     rZero1, 0
+                mov     rZero2, 0
+                mov     rCnt,  <cnt>
+                stm     <rZero1,rZero2>,[rAddr!]
+    <optional>  stm     <rZero1,rZero2>,[rAddr!]
+    <optional>  stm     <rZero1,rZero2>,[rAddr!]
+    <optional>  stm     <rZero1,rZero2>,[rAddr!]
+    <optional>  str     rZero1,[rAddr]
+
+            For rCnt greater than or equal to 10
+
+                mov     rZero1, 0
+                mov     rZero2, 0
+                mov     rCnt,  <cnt/2>
+                sub     rAddr, sp, OFFS
+
+            loop:
+                stm     <rZero1,rZero2>,[rAddr!]
+                sub     rCnt,rCnt,1
+                jnz     loop
+
+    <optional>  str     rZero1,[rAddr]   // When cnt is odd
+
+            NOTE: for ARM64, the instruction is stp, not stm. And we can use ZR instead of allocating registers.
+         */
+
+        regNumber rAddr;
+        regNumber rCnt = REG_NA; // Invalid
+        regMaskTP regMask;
+
+        regMaskTP availMask = regSet.rsGetModifiedRegsMask() | RBM_INT_CALLEE_TRASH; // Set of available registers
+        availMask &= ~intRegState.rsCalleeRegArgMaskLiveIn; // Remove all of the incoming argument registers as they are
+                                                            // currently live
+        availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg as we will zero it and maybe use it for
+                                           // a large constant.
+
+#if defined(_TARGET_ARM_)
+
+        if (compiler->compLocallocUsed)
+        {
+            availMask &= ~RBM_SAVED_LOCALLOC_SP; // Remove the register reserved when we have a localloc frame
+        }
+
+        regNumber rZero1; // We're going to use initReg for rZero1
+        regNumber rZero2;
+
+        // We pick the next lowest register number for rZero2
+        noway_assert(availMask != RBM_NONE);
+        regMask = genFindLowestBit(availMask);
+        rZero2  = genRegNumFromMask(regMask);
+        availMask &= ~regMask;
+        assert((genRegMask(rZero2) & intRegState.rsCalleeRegArgMaskLiveIn) ==
+               0); // rZero2 is not a live incoming argument reg
+
+        // We pick the next lowest register number for rAddr
+        noway_assert(availMask != RBM_NONE);
+        regMask = genFindLowestBit(availMask);
+        rAddr   = genRegNumFromMask(regMask);
+        availMask &= ~regMask;
+
+#else // !define(_TARGET_ARM_)
+
+        regNumber rZero1 = REG_ZR;
+        rAddr            = initReg;
+        *pInitRegZeroed  = false;
+
+#endif // !defined(_TARGET_ARM_)
+
+        bool     useLoop   = false;
+        unsigned uCntBytes = untrLclHi - untrLclLo;
+        assert((uCntBytes % sizeof(int)) == 0);         // The smallest stack slot is always 4 bytes.
+        unsigned uCntSlots = uCntBytes / REGSIZE_BYTES; // How many register sized stack slots we're going to use.
+
+        // When uCntSlots is 9 or less, we will emit a sequence of stm/stp instructions inline.
+        // When it is 10 or greater, we will emit a loop containing a stm/stp instruction.
+        // In both of these cases the stm/stp instruction will write two zeros to memory
+        // and we will use a single str instruction at the end whenever we have an odd count.
+        if (uCntSlots >= 10)
+            useLoop = true;
+
+        if (useLoop)
+        {
+            // We pick the next lowest register number for rCnt
+            noway_assert(availMask != RBM_NONE);
+            regMask = genFindLowestBit(availMask);
+            rCnt    = genRegNumFromMask(regMask);
+            availMask &= ~regMask;
+        }
+
+        assert((genRegMask(rAddr) & intRegState.rsCalleeRegArgMaskLiveIn) ==
+               0); // rAddr is not a live incoming argument reg
+#if defined(_TARGET_ARM_)
+        if (arm_Valid_Imm_For_Add(untrLclLo, INS_FLAGS_DONT_CARE))
+#else  // !_TARGET_ARM_
+        if (emitter::emitIns_valid_imm_for_add(untrLclLo, EA_PTRSIZE))
+#endif // !_TARGET_ARM_
+        {
+            getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, rAddr, genFramePointerReg(), untrLclLo);
+        }
+        else
+        {
+            // Load immediate into the InitReg register
+            instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, (ssize_t)untrLclLo);
+            getEmitter()->emitIns_R_R_R(INS_add, EA_PTRSIZE, rAddr, genFramePointerReg(), initReg);
+            *pInitRegZeroed = false;
+        }
+
+        if (useLoop)
+        {
+            noway_assert(uCntSlots >= 2);
+            assert((genRegMask(rCnt) & intRegState.rsCalleeRegArgMaskLiveIn) ==
+                   0); // rCnt is not a live incoming argument reg
+            instGen_Set_Reg_To_Imm(EA_PTRSIZE, rCnt, (ssize_t)uCntSlots / 2);
+        }
+
+#if defined(_TARGET_ARM_)
+        rZero1 = genGetZeroReg(initReg, pInitRegZeroed);
+        instGen_Set_Reg_To_Zero(EA_PTRSIZE, rZero2);
+        ssize_t stmImm = (ssize_t)(genRegMask(rZero1) | genRegMask(rZero2));
+#endif // _TARGET_ARM_
+
+        if (!useLoop)
+        {
+            while (uCntBytes >= REGSIZE_BYTES * 2)
+            {
+#ifdef _TARGET_ARM_
+                getEmitter()->emitIns_R_I(INS_stm, EA_PTRSIZE, rAddr, stmImm);
+#else  // !_TARGET_ARM_
+                getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, rAddr, 2 * REGSIZE_BYTES,
+                                              INS_OPTS_POST_INDEX);
+#endif // !_TARGET_ARM_
+                uCntBytes -= REGSIZE_BYTES * 2;
+            }
+        }
+        else // useLoop is true
+        {
+#ifdef _TARGET_ARM_
+            getEmitter()->emitIns_R_I(INS_stm, EA_PTRSIZE, rAddr, stmImm); // zero stack slots
+            getEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, rCnt, 1, INS_FLAGS_SET);
+#else  // !_TARGET_ARM_
+            getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, rAddr, 2 * REGSIZE_BYTES,
+                                          INS_OPTS_POST_INDEX); // zero stack slots
+            getEmitter()->emitIns_R_R_I(INS_subs, EA_PTRSIZE, rCnt, rCnt, 1);
+#endif // !_TARGET_ARM_
+            getEmitter()->emitIns_J(INS_bhi, NULL, -3);
+            uCntBytes %= REGSIZE_BYTES * 2;
+        }
+
+        if (uCntBytes >= REGSIZE_BYTES) // check and zero the last register-sized stack slot (odd number)
+        {
+#ifdef _TARGET_ARM_
+            getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, rZero1, rAddr, 0);
+#else  // _TARGET_ARM_
+            if ((uCntBytes - REGSIZE_BYTES) == 0)
+            {
+                getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_ZR, rAddr, 0);
+            }
+            else
+            {
+                getEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_ZR, rAddr, REGSIZE_BYTES, INS_OPTS_POST_INDEX);
+            }
+#endif // !_TARGET_ARM_
+            uCntBytes -= REGSIZE_BYTES;
+        }
+#ifdef _TARGET_ARM64_
+        if (uCntBytes > 0)
+        {
+            assert(uCntBytes == sizeof(int));
+            getEmitter()->emitIns_R_R_I(INS_str, EA_4BYTE, REG_ZR, rAddr, 0);
+            uCntBytes -= sizeof(int);
+        }
+#endif // _TARGET_ARM64_
+        noway_assert(uCntBytes == 0);
+
+#elif defined(_TARGET_XARCH_)
+        /*
+            Generate the following code:
+
+                lea     edi, [ebp/esp-OFFS]
+                mov     ecx, <size>
+                xor     eax, eax
+                rep     stosd
+         */
+
+        noway_assert(regSet.rsRegsModified(RBM_EDI));
+
+#ifdef UNIX_AMD64_ABI
+        // For register arguments we may have to save ECX and RDI on Amd64 System V OSes
+        if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RCX)
+        {
+            noway_assert(regSet.rsRegsModified(RBM_R12));
+            inst_RV_RV(INS_mov, REG_R12, REG_RCX);
+            regTracker.rsTrackRegTrash(REG_R12);
+        }
+
+        if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RDI)
+        {
+            noway_assert(regSet.rsRegsModified(RBM_R13));
+            inst_RV_RV(INS_mov, REG_R13, REG_RDI);
+            regTracker.rsTrackRegTrash(REG_R13);
+        }
+#else  // !UNIX_AMD64_ABI
+        // For register arguments we may have to save ECX
+        if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_ECX)
+        {
+            noway_assert(regSet.rsRegsModified(RBM_ESI));
+            inst_RV_RV(INS_mov, REG_ESI, REG_ECX);
+            regTracker.rsTrackRegTrash(REG_ESI);
+        }
+#endif // !UNIX_AMD64_ABI
+
+        noway_assert((intRegState.rsCalleeRegArgMaskLiveIn & RBM_EAX) == 0);
+
+        getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_EDI, genFramePointerReg(), untrLclLo);
+        regTracker.rsTrackRegTrash(REG_EDI);
+
+        inst_RV_IV(INS_mov, REG_ECX, (untrLclHi - untrLclLo) / sizeof(int), EA_4BYTE);
+        instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EAX);
+        instGen(INS_r_stosd);
+
+#ifdef UNIX_AMD64_ABI
+        // Move back the argument registers
+        if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RCX)
+        {
+            inst_RV_RV(INS_mov, REG_RCX, REG_R12);
+        }
+
+        if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_RDI)
+        {
+            inst_RV_RV(INS_mov, REG_RDI, REG_R13);
+        }
+#else  // !UNIX_AMD64_ABI
+        // Move back the argument registers
+        if (intRegState.rsCalleeRegArgMaskLiveIn & RBM_ECX)
+        {
+            inst_RV_RV(INS_mov, REG_ECX, REG_ESI);
+        }
+#endif // !UNIX_AMD64_ABI
+
+#else // _TARGET_*
+#error Unsupported or unset target architecture
+#endif // _TARGET_*
+    }
+    else if (genInitStkLclCnt > 0)
+    {
+        assert((genRegMask(initReg) & intRegState.rsCalleeRegArgMaskLiveIn) ==
+               0); // initReg is not a live incoming argument reg
+
+        /* Initialize any lvMustInit vars on the stack */
+
+        LclVarDsc* varDsc;
+        unsigned   varNum;
+
+        for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+        {
+            if (!varDsc->lvMustInit)
+            {
+                continue;
+            }
+
+            // TODO-Review: I'm not sure that we're correctly handling the mustInit case for
+            // partially-enregistered vars in the case where we don't use a block init.
+            noway_assert(varDsc->lvIsInReg() || varDsc->lvOnFrame);
+
+            // lvMustInit can only be set for GC types or TYP_STRUCT types
+            // or when compInitMem is true
+            // or when in debug code
+
+            noway_assert(varTypeIsGC(varDsc->TypeGet()) || (varDsc->TypeGet() == TYP_STRUCT) ||
+                         compiler->info.compInitMem || compiler->opts.compDbgCode);
+
+#ifdef _TARGET_64BIT_
+            if (!varDsc->lvOnFrame)
+            {
+                continue;
+            }
+#else  // !_TARGET_64BIT_
+            if (varDsc->lvRegister)
+            {
+                if (varDsc->lvOnFrame)
+                {
+                    /* This is a partially enregistered TYP_LONG var */
+                    noway_assert(varDsc->lvOtherReg == REG_STK);
+                    noway_assert(varDsc->lvType == TYP_LONG);
+
+                    noway_assert(compiler->info.compInitMem);
+
+                    getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, genGetZeroReg(initReg, pInitRegZeroed),
+                                              varNum, sizeof(int));
+                }
+                continue;
+            }
+#endif // !_TARGET_64BIT_
+
+            if ((varDsc->TypeGet() == TYP_STRUCT) && !compiler->info.compInitMem &&
+                (varDsc->lvExactSize >= TARGET_POINTER_SIZE))
+            {
+                // We only initialize the GC variables in the TYP_STRUCT
+                const unsigned slots  = (unsigned)compiler->lvaLclSize(varNum) / REGSIZE_BYTES;
+                const BYTE*    gcPtrs = compiler->lvaGetGcLayout(varNum);
+
+                for (unsigned i = 0; i < slots; i++)
+                {
+                    if (gcPtrs[i] != TYPE_GC_NONE)
+                    {
+                        getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE,
+                                                  genGetZeroReg(initReg, pInitRegZeroed), varNum, i * REGSIZE_BYTES);
+                    }
+                }
+            }
+            else
+            {
+                regNumber zeroReg = genGetZeroReg(initReg, pInitRegZeroed);
+
+                // zero out the whole thing rounded up to a single stack slot size
+                unsigned lclSize = (unsigned)roundUp(compiler->lvaLclSize(varNum), sizeof(int));
+                unsigned i;
+                for (i = 0; i + REGSIZE_BYTES <= lclSize; i += REGSIZE_BYTES)
+                {
+                    getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, zeroReg, varNum, i);
+                }
+
+#ifdef _TARGET_64BIT_
+                assert(i == lclSize || (i + sizeof(int) == lclSize));
+                if (i != lclSize)
+                {
+                    getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, zeroReg, varNum, i);
+                    i += sizeof(int);
+                }
+#endif // _TARGET_64BIT_
+                assert(i == lclSize);
+            }
+        }
+
+        if (!TRACK_GC_TEMP_LIFETIMES)
+        {
+            assert(compiler->tmpAllFree());
+            for (TempDsc* tempThis = compiler->tmpListBeg(); tempThis != nullptr;
+                 tempThis          = compiler->tmpListNxt(tempThis))
+            {
+                if (!varTypeIsGC(tempThis->tdTempType()))
+                {
+                    continue;
+                }
+
+                // printf("initialize untracked spillTmp [EBP-%04X]\n", stkOffs);
+
+                inst_ST_RV(ins_Store(TYP_I_IMPL), tempThis, 0, genGetZeroReg(initReg, pInitRegZeroed), TYP_I_IMPL);
+            }
+        }
+    }
+}
+
+/*-----------------------------------------------------------------------------
+ *
+ *  Save the generic context argument.
+ *
+ *  We need to do this within the "prolog" in case anyone tries to inspect
+ *  the param-type-arg/this (which can be done after the prolog) using
+ *  ICodeManager::GetParamTypeArg().
+ */
+
+void CodeGen::genReportGenericContextArg(regNumber initReg, bool* pInitRegZeroed)
+{
+    assert(compiler->compGeneratingProlog);
+
+    bool reportArg = compiler->lvaReportParamTypeArg();
+
+    // We should report either generic context arg or "this" when used so.
+    if (!reportArg)
+    {
+#ifndef JIT32_GCENCODER
+        if (!compiler->lvaKeepAliveAndReportThis())
+#endif
+        {
+            return;
+        }
+    }
+
+    // For JIT32_GCENCODER, we won't be here if reportArg is false.
+    unsigned contextArg = reportArg ? compiler->info.compTypeCtxtArg : compiler->info.compThisArg;
+
+    noway_assert(contextArg != BAD_VAR_NUM);
+    LclVarDsc* varDsc = &compiler->lvaTable[contextArg];
+
+    // We are still in the prolog and compiler->info.compTypeCtxtArg has not been
+    // moved to its final home location. So we need to use it from the
+    // incoming location.
+
+    regNumber reg;
+
+    bool isPrespilledForProfiling = false;
+#if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
+    isPrespilledForProfiling =
+        compiler->compIsProfilerHookNeeded() && compiler->lvaIsPreSpilled(contextArg, regSet.rsMaskPreSpillRegs(false));
+#endif
+
+    // Load from the argument register only if it is not prespilled.
+    if (compiler->lvaIsRegArgument(contextArg) && !isPrespilledForProfiling)
+    {
+        reg = varDsc->lvArgReg;
+    }
+    else
+    {
+        if (isFramePointerUsed())
+        {
+#if defined(_TARGET_ARM_)
+            // lvStkOffs is always valid for incoming stack-arguments, even if the argument
+            // will become enregistered.
+            // On Arm compiler->compArgSize doesn't include r11 and lr sizes and hence we need to add 2*REGSIZE_BYTES
+            noway_assert((2 * REGSIZE_BYTES <= varDsc->lvStkOffs) &&
+                         (size_t(varDsc->lvStkOffs) < compiler->compArgSize + 2 * REGSIZE_BYTES));
+#else
+            // lvStkOffs is always valid for incoming stack-arguments, even if the argument
+            // will become enregistered.
+            noway_assert((0 < varDsc->lvStkOffs) && (size_t(varDsc->lvStkOffs) < compiler->compArgSize));
+#endif
+        }
+
+        // We will just use the initReg since it is an available register
+        // and we are probably done using it anyway...
+        reg             = initReg;
+        *pInitRegZeroed = false;
+
+        // mov reg, [compiler->info.compTypeCtxtArg]
+        getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(), varDsc->lvStkOffs);
+        regTracker.rsTrackRegTrash(reg);
+    }
+
+#if CPU_LOAD_STORE_ARCH
+    getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
+                                compiler->lvaCachedGenericContextArgOffset());
+#else  // CPU_LOAD_STORE_ARCH
+    // mov [ebp-lvaCachedGenericContextArgOffset()], reg
+    getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, genFramePointerReg(),
+                               compiler->lvaCachedGenericContextArgOffset());
+#endif // !CPU_LOAD_STORE_ARCH
+}
+
+/*-----------------------------------------------------------------------------
+ *
+ *  Set the "GS" security cookie in the prolog.
+ */
+
+void CodeGen::genSetGSSecurityCookie(regNumber initReg, bool* pInitRegZeroed)
+{
+    assert(compiler->compGeneratingProlog);
+
+    if (!compiler->getNeedsGSSecurityCookie())
+    {
+        return;
+    }
+
+    noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
+
+    if (compiler->gsGlobalSecurityCookieAddr == nullptr)
+    {
+#ifdef _TARGET_AMD64_
+        // eax = #GlobalSecurityCookieVal64; [frame.GSSecurityCookie] = eax
+        getEmitter()->emitIns_R_I(INS_mov, EA_PTRSIZE, REG_RAX, compiler->gsGlobalSecurityCookieVal);
+        getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_RAX, compiler->lvaGSSecurityCookie, 0);
+#else
+        //  mov   dword ptr [frame.GSSecurityCookie], #GlobalSecurityCookieVal
+        instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, compiler->gsGlobalSecurityCookieVal,
+                                   compiler->lvaGSSecurityCookie, 0, initReg);
+#endif
+    }
+    else
+    {
+        regNumber reg;
+#ifdef _TARGET_XARCH_
+        // Always use EAX on x86 and x64
+        // On x64, if we're not moving into RAX, and the address isn't RIP relative, we can't encode it.
+        reg = REG_EAX;
+#else
+        // We will just use the initReg since it is an available register
+        reg = initReg;
+#endif
+
+        *pInitRegZeroed = false;
+
+#if CPU_LOAD_STORE_ARCH
+        instGen_Set_Reg_To_Imm(EA_PTR_DSP_RELOC, reg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+        getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, reg, 0);
+        regTracker.rsTrackRegTrash(reg);
+#else
+        //  mov   reg, dword ptr [compiler->gsGlobalSecurityCookieAddr]
+        //  mov   dword ptr [frame.GSSecurityCookie], reg
+        getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, reg, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+        regTracker.rsTrackRegTrash(reg);
+#endif
+        getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, compiler->lvaGSSecurityCookie, 0);
+    }
+}
+
+#ifdef PROFILING_SUPPORTED
+
+/*-----------------------------------------------------------------------------
+ *
+ *  Generate the profiling function enter callback.
+ */
+
+void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
+{
+    assert(compiler->compGeneratingProlog);
+
+    // Give profiler a chance to back out of hooking this method
+    if (!compiler->compIsProfilerHookNeeded())
+    {
+        return;
+    }
+
+#ifndef LEGACY_BACKEND
+#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) // No profiling for System V systems yet.
+    unsigned   varNum;
+    LclVarDsc* varDsc;
+
+    // Since the method needs to make a profiler callback, it should have out-going arg space allocated.
+    noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
+    noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES));
+
+    // Home all arguments passed in arg registers (RCX, RDX, R8 and R9).
+    // In case of vararg methods, arg regs are already homed.
+    //
+    // Note: Here we don't need to worry about updating gc'info since enter
+    // callback is generated as part of prolog which is non-gc interruptible.
+    // Moreover GC cannot kick while executing inside profiler callback which is a
+    // profiler requirement so it can examine arguments which could be obj refs.
+    if (!compiler->info.compIsVarArgs)
+    {
+        for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
+        {
+            noway_assert(varDsc->lvIsParam);
+
+            if (!varDsc->lvIsRegArg)
+            {
+                continue;
+            }
+
+            var_types storeType = varDsc->lvaArgType();
+            regNumber argReg    = varDsc->lvArgReg;
+            getEmitter()->emitIns_S_R(ins_Store(storeType), emitTypeSize(storeType), argReg, varNum, 0);
+        }
+    }
+
+    // Emit profiler EnterCallback(ProfilerMethHnd, caller's SP)
+    // RCX = ProfilerMethHnd
+    if (compiler->compProfilerMethHndIndirected)
+    {
+        // Profiler hooks enabled during Ngen time.
+        // Profiler handle needs to be accessed through an indirection of a pointer.
+        getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
+    }
+    else
+    {
+        // No need to record relocations, if we are generating ELT hooks under the influence
+        // of complus_JitELtHookEnabled=1
+        if (compiler->opts.compJitELTHookEnabled)
+        {
+            genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
+        }
+        else
+        {
+            instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
+        }
+    }
+
+    // RDX = caller's SP
+    // Notes
+    //   1) Here we can query caller's SP offset since prolog will be generated after final frame layout.
+    //   2) caller's SP relative offset to FramePointer will be negative.  We need to add absolute value
+    //      of that offset to FramePointer to obtain caller's SP value.
+    assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
+    int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
+    getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
+
+    // Can't have a call until we have enough padding for rejit
+    genPrologPadForReJit();
+
+    // This will emit either
+    // "call ip-relative 32-bit offset" or
+    // "mov rax, helper addr; call rax"
+    genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER, 0, EA_UNKNOWN);
+
+    // TODO-AMD64-CQ: Rather than reloading, see if this could be optimized by combining with prolog
+    // generation logic that moves args around as required by first BB entry point conditions
+    // computed by LSRA.  Code pointers for investigating this further: genFnPrologCalleeRegArgs()
+    // and genEnregisterIncomingStackArgs().
+    //
+    // Now reload arg registers from home locations.
+    // Vararg methods:
+    //   - we need to reload only known (i.e. fixed) reg args.
+    //   - if floating point type, also reload it into corresponding integer reg
+    for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
+    {
+        noway_assert(varDsc->lvIsParam);
+
+        if (!varDsc->lvIsRegArg)
+        {
+            continue;
+        }
+
+        var_types loadType = varDsc->lvaArgType();
+        regNumber argReg   = varDsc->lvArgReg;
+        getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
+
+#if FEATURE_VARARG
+        if (compiler->info.compIsVarArgs && varTypeIsFloating(loadType))
+        {
+            regNumber   intArgReg = compiler->getCallArgIntRegister(argReg);
+            instruction ins       = ins_CopyFloatToInt(loadType, TYP_LONG);
+            inst_RV_RV(ins, argReg, intArgReg, loadType);
+        }
+#endif //  FEATURE_VARARG
+    }
+
+    // If initReg is one of RBM_CALLEE_TRASH, then it needs to be zero'ed before using.
+    if ((RBM_CALLEE_TRASH & genRegMask(initReg)) != 0)
+    {
+        *pInitRegZeroed = false;
+    }
+
+#else //!_TARGET_AMD64_
+    NYI("RyuJIT: Emit Profiler Enter callback");
+#endif
+
+#else // LEGACY_BACKEND
+
+    unsigned saveStackLvl2 = genStackLevel;
+
+#if defined(_TARGET_X86_)
+    // Important note: when you change enter probe layout, you must also update SKIP_ENTER_PROF_CALLBACK()
+    // for x86 stack unwinding
+
+    // Push the profilerHandle
+    if (compiler->compProfilerMethHndIndirected)
+    {
+        getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
+    }
+    else
+    {
+        inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
+    }
+#elif defined(_TARGET_ARM_)
+    // On Arm arguments are prespilled on stack, which frees r0-r3.
+    // For generating Enter callout we would need two registers and one of them has to be r0 to pass profiler handle.
+    // The call target register could be any free register.
+    regNumber argReg = regSet.rsGrabReg(RBM_PROFILER_ENTER_ARG);
+    noway_assert(argReg == REG_PROFILER_ENTER_ARG);
+    regSet.rsLockReg(RBM_PROFILER_ENTER_ARG);
+
+    if (compiler->compProfilerMethHndIndirected)
+    {
+        getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, argReg, (ssize_t)compiler->compProfilerMethHnd);
+        regTracker.rsTrackRegTrash(argReg);
+    }
+    else
+    {
+        instGen_Set_Reg_To_Imm(EA_4BYTE, argReg, (ssize_t)compiler->compProfilerMethHnd);
+    }
+#else  // _TARGET_*
+    NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking registers");
+#endif // _TARGET_*
+
+    //
+    // Can't have a call until we have enough padding for rejit
+    //
+    genPrologPadForReJit();
+
+    // This will emit either
+    // "call ip-relative 32-bit offset" or
+    // "mov rax, helper addr; call rax"
+    genEmitHelperCall(CORINFO_HELP_PROF_FCN_ENTER,
+                      0,           // argSize. Again, we have to lie about it
+                      EA_UNKNOWN); // retSize
+
+#if defined(_TARGET_X86_)
+    //
+    // Adjust the number of stack slots used by this managed method if necessary.
+    //
+    if (compiler->fgPtrArgCntMax < 1)
+    {
+        compiler->fgPtrArgCntMax = 1;
+    }
+#elif defined(_TARGET_ARM_)
+    // Unlock registers
+    regSet.rsUnlockReg(RBM_PROFILER_ENTER_ARG);
+
+    if (initReg == argReg)
+    {
+        *pInitRegZeroed = false;
+    }
+#else  // _TARGET_*
+    NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking registers");
+#endif // _TARGET_*
+
+    /* Restore the stack level */
+
+    genStackLevel = saveStackLvl2;
+#endif // LEGACY_BACKEND
+}
+
+/*****************************************************************************
+ *
+ *  Generates Leave profiler hook.
+ *  Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node.
+ */
+
+void CodeGen::genProfilingLeaveCallback(unsigned helper /*= CORINFO_HELP_PROF_FCN_LEAVE*/)
+{
+    // Only hook if profiler says it's okay.
+    if (!compiler->compIsProfilerHookNeeded())
+    {
+        return;
+    }
+
+    compiler->info.compProfilerCallback = true;
+
+    // Need to save on to the stack level, since the callee will pop the argument
+    unsigned saveStackLvl2 = genStackLevel;
+
+#ifndef LEGACY_BACKEND
+
+#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) // No profiling for System V systems yet.
+    // Since the method needs to make a profiler callback, it should have out-going arg space allocated.
+    noway_assert(compiler->lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
+    noway_assert(compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES));
+
+    // If thisPtr needs to be kept alive and reported, it cannot be one of the callee trash
+    // registers that profiler callback kills.
+    if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvIsInReg())
+    {
+        regMaskTP thisPtrMask = genRegMask(compiler->lvaTable[compiler->info.compThisArg].lvRegNum);
+        noway_assert((RBM_PROFILER_LEAVE_TRASH & thisPtrMask) == 0);
+    }
+
+    // At this point return value is computed and stored in RAX or XMM0.
+    // On Amd64, Leave callback preserves the return register.  We keep
+    // RAX alive by not reporting as trashed by helper call.  Also note
+    // that GC cannot kick-in while executing inside profiler callback,
+    // which is a requirement of profiler as well since it needs to examine
+    // return value which could be an obj ref.
+
+    // RCX = ProfilerMethHnd
+    if (compiler->compProfilerMethHndIndirected)
+    {
+        // Profiler hooks enabled during Ngen time.
+        // Profiler handle needs to be accessed through an indirection of an address.
+        getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
+    }
+    else
+    {
+        // Don't record relocations, if we are generating ELT hooks under the influence
+        // of complus_JitELtHookEnabled=1
+        if (compiler->opts.compJitELTHookEnabled)
+        {
+            genSetRegToIcon(REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd, TYP_I_IMPL);
+        }
+        else
+        {
+            instGen_Set_Reg_To_Imm(EA_8BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
+        }
+    }
+
+    // RDX = caller's SP
+    // TODO-AMD64-Cleanup: Once we start doing codegen after final frame layout, retain the "if" portion
+    // of the stmnts to execute unconditionally and clean-up rest.
+    if (compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
+    {
+        // Caller's SP relative offset to FramePointer will be negative.  We need to add absolute
+        // value of that offset to FramePointer to obtain caller's SP value.
+        int callerSPOffset = compiler->lvaToCallerSPRelativeOffset(0, isFramePointerUsed());
+        getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_ARG_1, genFramePointerReg(), -callerSPOffset);
+    }
+    else
+    {
+        // If we are here means that it is a tentative frame layout during which we
+        // cannot use caller's SP offset since it is an estimate.  For now we require the
+        // method to have at least a single arg so that we can use it to obtain caller's
+        // SP.
+        LclVarDsc* varDsc = compiler->lvaTable;
+        NYI_IF((varDsc == nullptr) || !varDsc->lvIsParam, "Profiler ELT callback for a method without any params");
+
+        // lea rdx, [FramePointer + Arg0's offset]
+        getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_ARG_1, 0, 0);
+    }
+
+    // We can use any callee trash register (other than RAX, RCX, RDX) for call target.
+    // We use R8 here. This will emit either
+    // "call ip-relative 32-bit offset" or
+    // "mov r8, helper addr; call r8"
+    genEmitHelperCall(helper, 0, EA_UNKNOWN, REG_ARG_2);
+
+#else  //!_TARGET_AMD64_
+    NYI("RyuJIT: Emit Profiler Leave callback");
+#endif // _TARGET_*
+
+#else // LEGACY_BACKEND
+
+#if defined(_TARGET_X86_)
+    //
+    // Push the profilerHandle
+    //
+
+    if (compiler->compProfilerMethHndIndirected)
+    {
+        getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA, (ssize_t)compiler->compProfilerMethHnd);
+    }
+    else
+    {
+        inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
+    }
+    genSinglePush();
+
+    genEmitHelperCall(CORINFO_HELP_PROF_FCN_LEAVE,
+                      sizeof(int) * 1, // argSize
+                      EA_UNKNOWN);     // retSize
+
+    //
+    // Adjust the number of stack slots used by this managed method if necessary.
+    //
+    if (compiler->fgPtrArgCntMax < 1)
+    {
+        compiler->fgPtrArgCntMax = 1;
+    }
+#elif defined(_TARGET_ARM_)
+    //
+    // Push the profilerHandle
+    //
+
+    // We could optimize register usage based on return value is int/long/void. But to keep it simple we will lock
+    // RBM_PROFILER_RET_USED always.
+    regNumber scratchReg = regSet.rsGrabReg(RBM_PROFILER_RET_SCRATCH);
+    noway_assert(scratchReg == REG_PROFILER_RET_SCRATCH);
+    regSet.rsLockReg(RBM_PROFILER_RET_USED);
+
+    // Contract between JIT and Profiler Leave callout on arm:
+    // Return size <= 4 bytes: REG_PROFILER_RET_SCRATCH will contain return value
+    // Return size > 4 and <= 8: <REG_PROFILER_RET_SCRATCH,r1> will contain return value.
+    // Floating point or double or HFA return values will be in s0-s15 in case of non-vararg methods.
+    // It is assumed that profiler Leave callback doesn't trash registers r1,REG_PROFILER_RET_SCRATCH and s0-s15.
+    //
+    // In the following cases r0 doesn't contain a return value and hence need not be preserved before emitting Leave
+    // callback.
+    bool     r0Trashed;
+    emitAttr attr = EA_UNKNOWN;
+
+    if (compiler->info.compRetType == TYP_VOID ||
+        (!compiler->info.compIsVarArgs && !compiler->opts.compUseSoftFP && (varTypeIsFloating(compiler->info.compRetType) ||
+                                           compiler->IsHfa(compiler->info.compMethodInfo->args.retTypeClass))))
+    {
+        r0Trashed = false;
+    }
+    else
+    {
+        // Has a return value and r0 is in use. For emitting Leave profiler callout we would need r0 for passing
+        // profiler handle. Therefore, r0 is moved to REG_PROFILER_RETURN_SCRATCH as per contract.
+        if (RBM_ARG_0 & gcInfo.gcRegGCrefSetCur)
+        {
+            attr = EA_GCREF;
+            gcInfo.gcMarkRegSetGCref(RBM_PROFILER_RET_SCRATCH);
+        }
+        else if (RBM_ARG_0 & gcInfo.gcRegByrefSetCur)
+        {
+            attr = EA_BYREF;
+            gcInfo.gcMarkRegSetByref(RBM_PROFILER_RET_SCRATCH);
+        }
+        else
+        {
+            attr = EA_4BYTE;
+        }
+
+        getEmitter()->emitIns_R_R(INS_mov, attr, REG_PROFILER_RET_SCRATCH, REG_ARG_0);
+        regTracker.rsTrackRegTrash(REG_PROFILER_RET_SCRATCH);
+        gcInfo.gcMarkRegSetNpt(RBM_ARG_0);
+        r0Trashed = true;
+    }
+
+    if (compiler->compProfilerMethHndIndirected)
+    {
+        getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
+        regTracker.rsTrackRegTrash(REG_ARG_0);
+    }
+    else
+    {
+        instGen_Set_Reg_To_Imm(EA_4BYTE, REG_ARG_0, (ssize_t)compiler->compProfilerMethHnd);
+    }
+
+    genEmitHelperCall(CORINFO_HELP_PROF_FCN_LEAVE,
+                      0,           // argSize
+                      EA_UNKNOWN); // retSize
+
+    // Restore state that existed before profiler callback
+    if (r0Trashed)
+    {
+        getEmitter()->emitIns_R_R(INS_mov, attr, REG_ARG_0, REG_PROFILER_RET_SCRATCH);
+        regTracker.rsTrackRegTrash(REG_ARG_0);
+        gcInfo.gcMarkRegSetNpt(RBM_PROFILER_RET_SCRATCH);
+    }
+
+    regSet.rsUnlockReg(RBM_PROFILER_RET_USED);
+#else  // _TARGET_*
+    NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking them");
+#endif // _TARGET_*
+
+#endif // LEGACY_BACKEND
+
+    /* Restore the stack level */
+    genStackLevel = saveStackLvl2;
+}
+
+#endif // PROFILING_SUPPORTED
+
+/*****************************************************************************
+
+Esp frames :
+----------
+
+These instructions are just a reordering of the instructions used today.
+
+push ebp
+push esi
+push edi
+push ebx
+sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
+...
+add esp, LOCALS_SIZE / pop dummyReg
+pop ebx
+pop edi
+pop esi
+pop ebp
+ret
+
+Ebp frames :
+----------
+
+The epilog does "add esp, LOCALS_SIZE" instead of "mov ebp, esp".
+Everything else is similar, though in a different order.
+
+The security object will no longer be at a fixed offset. However, the
+offset can still be determined by looking up the GC-info and determining
+how many callee-saved registers are pushed.
+
+push ebp
+mov ebp, esp
+push esi
+push edi
+push ebx
+sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
+...
+add esp, LOCALS_SIZE / pop dummyReg
+pop ebx
+pop edi
+pop esi
+(mov esp, ebp if there are no callee-saved registers)
+pop ebp
+ret
+
+Double-aligned frame :
+--------------------
+
+LOCALS_SIZE_ADJUSTED needs to include an unused DWORD if an odd number
+of callee-saved registers are pushed on the stack so that the locals
+themselves are qword-aligned. The instructions are the same as today,
+just in a different order.
+
+push ebp
+mov ebp, esp
+and esp, 0xFFFFFFFC
+push esi
+push edi
+push ebx
+sub esp, LOCALS_SIZE_ADJUSTED / push dummyReg if LOCALS_SIZE=sizeof(void*)
+...
+add esp, LOCALS_SIZE_ADJUSTED / pop dummyReg
+pop ebx
+pop edi
+pop esi
+pop ebp
+mov esp, ebp
+pop ebp
+ret
+
+localloc (with ebp) frames :
+--------------------------
+
+The instructions are the same as today, just in a different order.
+Also, today the epilog does "lea esp, [ebp-LOCALS_SIZE-calleeSavedRegsPushedSize]"
+which will change to "lea esp, [ebp-calleeSavedRegsPushedSize]".
+
+push ebp
+mov ebp, esp
+push esi
+push edi
+push ebx
+sub esp, LOCALS_SIZE / push dummyReg if LOCALS_SIZE=sizeof(void*)
+...
+lea esp, [ebp-calleeSavedRegsPushedSize]
+pop ebx
+pop edi
+pop esi
+(mov esp, ebp if there are no callee-saved registers)
+pop ebp
+ret
+
+*****************************************************************************/
+
+/*****************************************************************************
+ *
+ *  Generates appropriate NOP padding for a function prolog to support ReJIT.
+ */
+
+void CodeGen::genPrologPadForReJit()
+{
+    assert(compiler->compGeneratingProlog);
+
+#ifdef _TARGET_XARCH_
+    if (!(compiler->opts.eeFlags & CORJIT_FLG_PROF_REJIT_NOPS))
+    {
+        return;
+    }
+
+#if FEATURE_EH_FUNCLETS
+
+    // No need to generate pad (nops) for funclets.
+    // When compiling the main function (and not a funclet)
+    // the value of funCurrentFunc->funKind is equal to FUNC_ROOT.
+    if (compiler->funCurrentFunc()->funKind != FUNC_ROOT)
+    {
+        return;
+    }
+
+#endif // FEATURE_EH_FUNCLETS
+
+    unsigned size = getEmitter()->emitGetPrologOffsetEstimate();
+    if (size < 5)
+    {
+        instNop(5 - size);
+    }
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Reserve space for a function prolog.
+ */
+
+void CodeGen::genReserveProlog(BasicBlock* block)
+{
+    assert(block != nullptr);
+
+    JITDUMP("Reserving prolog IG for block BB%02u\n", block->bbNum);
+
+    /* Nothing is live on entry to the prolog */
+
+    getEmitter()->emitCreatePlaceholderIG(IGPT_PROLOG, block, VarSetOps::MakeEmpty(compiler), 0, 0, false);
+}
+
+/*****************************************************************************
+ *
+ *  Reserve space for a function epilog.
+ */
+
+void CodeGen::genReserveEpilog(BasicBlock* block)
+{
+    VARSET_TP VARSET_INIT(compiler, gcrefVarsArg, getEmitter()->emitThisGCrefVars);
+    regMaskTP gcrefRegsArg = gcInfo.gcRegGCrefSetCur;
+    regMaskTP byrefRegsArg = gcInfo.gcRegByrefSetCur;
+
+    /* The return value is special-cased: make sure it goes live for the epilog */
+
+    bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
+
+    if (genFullPtrRegMap && !jmpEpilog)
+    {
+        if (varTypeIsGC(compiler->info.compRetNativeType))
+        {
+            noway_assert(genTypeStSz(compiler->info.compRetNativeType) == genTypeStSz(TYP_I_IMPL));
+
+            gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetNativeType);
+
+            switch (compiler->info.compRetNativeType)
+            {
+                case TYP_REF:
+                    gcrefRegsArg |= RBM_INTRET;
+                    break;
+                case TYP_BYREF:
+                    byrefRegsArg |= RBM_INTRET;
+                    break;
+                default:
+                    break;
+            }
+        }
+    }
+
+    JITDUMP("Reserving epilog IG for block BB%02u\n", block->bbNum);
+
+    assert(block != nullptr);
+    bool last = (block->bbNext == nullptr);
+    getEmitter()->emitCreatePlaceholderIG(IGPT_EPILOG, block, gcrefVarsArg, gcrefRegsArg, byrefRegsArg, last);
+}
+
+#if FEATURE_EH_FUNCLETS
+
+/*****************************************************************************
+ *
+ *  Reserve space for a funclet prolog.
+ */
+
+void CodeGen::genReserveFuncletProlog(BasicBlock* block)
+{
+    assert(block != nullptr);
+
+    /* Currently, no registers are live on entry to the prolog, except maybe
+       the exception object. There might be some live stack vars, but they
+       cannot be accessed until after the frame pointer is re-established.
+       In order to potentially prevent emitting a death before the prolog
+       and a birth right after it, we just report it as live during the
+       prolog, and rely on the prolog being non-interruptible. Trust
+       genCodeForBBlist to correctly initialize all the sets.
+
+       We might need to relax these asserts if the VM ever starts
+       restoring any registers, then we could have live-in reg vars...
+    */
+
+    noway_assert((gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT) == gcInfo.gcRegGCrefSetCur);
+    noway_assert(gcInfo.gcRegByrefSetCur == 0);
+
+    JITDUMP("Reserving funclet prolog IG for block BB%02u\n", block->bbNum);
+
+    getEmitter()->emitCreatePlaceholderIG(IGPT_FUNCLET_PROLOG, block, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+                                          gcInfo.gcRegByrefSetCur, false);
+}
+
+/*****************************************************************************
+ *
+ *  Reserve space for a funclet epilog.
+ */
+
+void CodeGen::genReserveFuncletEpilog(BasicBlock* block)
+{
+    assert(block != nullptr);
+
+    JITDUMP("Reserving funclet epilog IG for block BB%02u\n", block->bbNum);
+
+    bool last = (block->bbNext == nullptr);
+    getEmitter()->emitCreatePlaceholderIG(IGPT_FUNCLET_EPILOG, block, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+                                          gcInfo.gcRegByrefSetCur, last);
+}
+
+#endif // FEATURE_EH_FUNCLETS
+
+/*****************************************************************************
+ *  Finalize the frame size and offset assignments.
+ *
+ *  No changes can be made to the modified register set after this, since that can affect how many
+ *  callee-saved registers get saved.
+ */
+void CodeGen::genFinalizeFrame()
+{
+    JITDUMP("Finalizing stack frame\n");
+
+#ifndef LEGACY_BACKEND
+    // Initializations need to happen based on the var locations at the start
+    // of the first basic block, so load those up. In particular, the determination
+    // of whether or not to use block init in the prolog is dependent on the variable
+    // locations on entry to the function.
+    compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(compiler->fgFirstBB);
+#endif // !LEGACY_BACKEND
+
+    genCheckUseBlockInit();
+
+    // Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc.
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_X86_)
+
+    if (compiler->compTailCallUsed)
+    {
+        // If we are generating a helper-based tailcall, we've set the tailcall helper "flags"
+        // argument to "1", indicating to the tailcall helper that we've saved the callee-saved
+        // registers (ebx, esi, edi). So, we need to make sure all the callee-saved registers
+        // actually get saved.
+
+        regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED);
+    }
+#endif // _TARGET_X86_
+
+#if defined(_TARGET_ARMARCH_)
+    // We need to determine if we will change SP larger than a specific amount to determine if we want to use a loop
+    // to touch stack pages, that will require multiple registers. See genAllocLclFrame() for details.
+    if (compiler->compLclFrameSize >= compiler->getVeryLargeFrameSize())
+    {
+        regSet.rsSetRegsModified(VERY_LARGE_FRAME_SIZE_REG_MASK);
+    }
+#endif // defined(_TARGET_ARMARCH_)
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("Modified regs: ");
+        dspRegMask(regSet.rsGetModifiedRegsMask());
+        printf("\n");
+    }
+#endif // DEBUG
+
+    // Set various registers as "modified" for special code generation scenarios: Edit & Continue, P/Invoke calls, etc.
+    if (compiler->opts.compDbgEnC)
+    {
+        // We always save FP.
+        noway_assert(isFramePointerUsed());
+#ifdef _TARGET_AMD64_
+        // On x64 we always save exactly RBP, RSI and RDI for EnC.
+        regMaskTP okRegs = (RBM_CALLEE_TRASH | RBM_FPBASE | RBM_RSI | RBM_RDI);
+        regSet.rsSetRegsModified(RBM_RSI | RBM_RDI);
+        noway_assert((regSet.rsGetModifiedRegsMask() & ~okRegs) == 0);
+#else  // !_TARGET_AMD64_
+        // On x86 we save all callee saved regs so the saved reg area size is consistent
+        regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
+#endif // !_TARGET_AMD64_
+    }
+
+    /* If we have any pinvoke calls, we might potentially trash everything */
+    if (compiler->info.compCallUnmanaged)
+    {
+        noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
+        regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
+    }
+
+    /* Count how many callee-saved registers will actually be saved (pushed) */
+
+    // EBP cannot be (directly) modified for EBP frame and double-aligned frames
+    noway_assert(!doubleAlignOrFramePointerUsed() || !regSet.rsRegsModified(RBM_FPBASE));
+
+#if ETW_EBP_FRAMED
+    // EBP cannot be (directly) modified
+    noway_assert(!regSet.rsRegsModified(RBM_FPBASE));
+#endif
+
+    regMaskTP maskCalleeRegsPushed = regSet.rsGetModifiedRegsMask() & RBM_CALLEE_SAVED;
+
+#ifdef _TARGET_ARMARCH_
+    if (isFramePointerUsed())
+    {
+        // For a FP based frame we have to push/pop the FP register
+        //
+        maskCalleeRegsPushed |= RBM_FPBASE;
+
+        // This assert check that we are not using REG_FP
+        // as both the frame pointer and as a codegen register
+        //
+        assert(!regSet.rsRegsModified(RBM_FPBASE));
+    }
+
+    // we always push LR.  See genPushCalleeSavedRegisters
+    //
+    maskCalleeRegsPushed |= RBM_LR;
+
+#if defined(_TARGET_ARM_)
+    // TODO-ARM64-Bug?: enable some variant of this for FP on ARM64?
+    regMaskTP maskPushRegsFloat = maskCalleeRegsPushed & RBM_ALLFLOAT;
+    regMaskTP maskPushRegsInt   = maskCalleeRegsPushed & ~maskPushRegsFloat;
+
+    if ((maskPushRegsFloat != RBM_NONE) ||
+        (compiler->opts.MinOpts() && (regSet.rsMaskResvd & maskCalleeRegsPushed & RBM_OPT_RSVD)))
+    {
+        // Here we try to keep stack double-aligned before the vpush
+        if ((genCountBits(regSet.rsMaskPreSpillRegs(true) | maskPushRegsInt) % 2) != 0)
+        {
+            regNumber extraPushedReg = REG_R4;
+            while (maskPushRegsInt & genRegMask(extraPushedReg))
+            {
+                extraPushedReg = REG_NEXT(extraPushedReg);
+            }
+            if (extraPushedReg < REG_R11)
+            {
+                maskPushRegsInt |= genRegMask(extraPushedReg);
+                regSet.rsSetRegsModified(genRegMask(extraPushedReg));
+            }
+        }
+        maskCalleeRegsPushed = maskPushRegsInt | maskPushRegsFloat;
+    }
+
+    // We currently only expect to push/pop consecutive FP registers
+    // and these have to be double-sized registers as well.
+    // Here we will insure that maskPushRegsFloat obeys these requirements.
+    //
+    if (maskPushRegsFloat != RBM_NONE)
+    {
+        regMaskTP contiguousMask = genRegMaskFloat(REG_F16, TYP_DOUBLE);
+        while (maskPushRegsFloat > contiguousMask)
+        {
+            contiguousMask <<= 2;
+            contiguousMask |= genRegMaskFloat(REG_F16, TYP_DOUBLE);
+        }
+        if (maskPushRegsFloat != contiguousMask)
+        {
+            regMaskTP maskExtraRegs = contiguousMask - maskPushRegsFloat;
+            maskPushRegsFloat |= maskExtraRegs;
+            regSet.rsSetRegsModified(maskExtraRegs);
+            maskCalleeRegsPushed |= maskExtraRegs;
+        }
+    }
+#endif // _TARGET_ARM_
+#endif // _TARGET_ARMARCH_
+
+#if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+    // Compute the count of callee saved float regs saved on stack.
+    // On Amd64 we push only integer regs. Callee saved float (xmm6-xmm15)
+    // regs are stack allocated and preserved in their stack locations.
+    compiler->compCalleeFPRegsSavedMask = maskCalleeRegsPushed & RBM_FLT_CALLEE_SAVED;
+    maskCalleeRegsPushed &= ~RBM_FLT_CALLEE_SAVED;
+#endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+
+    compiler->compCalleeRegsPushed = genCountBits(maskCalleeRegsPushed);
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("Callee-saved registers pushed: %d ", compiler->compCalleeRegsPushed);
+        dspRegMask(maskCalleeRegsPushed);
+        printf("\n");
+    }
+#endif // DEBUG
+
+    /* Assign the final offsets to things living on the stack frame */
+
+    compiler->lvaAssignFrameOffsets(Compiler::FINAL_FRAME_LAYOUT);
+
+    /* We want to make sure that the prolog size calculated here is accurate
+       (that is instructions will not shrink because of conservative stack
+       frame approximations).  We do this by filling in the correct size
+       here (where we have committed to the final numbers for the frame offsets)
+       This will ensure that the prolog size is always correct
+    */
+    getEmitter()->emitMaxTmpSize = compiler->tmpSize;
+
+#ifdef DEBUG
+    if (compiler->opts.dspCode || compiler->opts.disAsm || compiler->opts.disAsm2 || verbose)
+    {
+        compiler->lvaTableDump();
+    }
+#endif
+}
+
+//------------------------------------------------------------------------
+// genEstablishFramePointer: Set up the frame pointer by adding an offset to the stack pointer.
+//
+// Arguments:
+//    delta - the offset to add to the current stack pointer to establish the frame pointer
+//    reportUnwindData - true if establishing the frame pointer should be reported in the OS unwind data.
+
+void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData)
+{
+    assert(compiler->compGeneratingProlog);
+
+#if defined(_TARGET_XARCH_)
+
+    if (delta == 0)
+    {
+        getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE);
+        psiMoveESPtoEBP();
+    }
+    else
+    {
+        getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta);
+        // We don't update prolog scope info (there is no function to handle lea), but that is currently dead code
+        // anyway.
+    }
+
+    if (reportUnwindData)
+    {
+        compiler->unwindSetFrameReg(REG_FPBASE, delta);
+    }
+
+#elif defined(_TARGET_ARM_)
+
+    assert(arm_Valid_Imm_For_Add_SP(delta));
+    getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta);
+
+    if (reportUnwindData)
+    {
+        compiler->unwindPadding();
+    }
+
+#else
+    NYI("establish frame pointer");
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Generates code for a function prolog.
+ *
+ *  NOTE REGARDING CHANGES THAT IMPACT THE DEBUGGER:
+ *
+ *  The debugger relies on decoding ARM instructions to be able to successfully step through code. It does not
+ *  implement decoding all ARM instructions. It only implements decoding the instructions which the JIT emits, and
+ *  only instructions which result in control not going to the next instruction. Basically, any time execution would
+ *  not continue at the next instruction (such as B, BL, BX, BLX, POP{pc}, etc.), the debugger has to be able to
+ *  decode that instruction. If any of this is changed on ARM, the debugger team needs to be notified so that it
+ *  can ensure stepping isn't broken. This is also a requirement for x86 and amd64.
+ *
+ *  If any changes are made in the prolog, epilog, calls, returns, and branches, it is a good idea to notify the
+ *  debugger team to ensure that stepping still works.
+ *
+ *  ARM stepping code is here: debug\ee\arm\armwalker.cpp, vm\arm\armsinglestepper.cpp.
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+void CodeGen::genFnProlog()
+{
+    ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
+
+    compiler->funSetCurrentFunc(0);
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In genFnProlog()\n");
+    }
+#endif
+
+#ifdef DEBUG
+    genInterruptibleUsed = true;
+#endif
+
+#ifdef LEGACY_BACKEND
+    genFinalizeFrame();
+#endif // LEGACY_BACKEND
+
+    assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT);
+
+    /* Ready to start on the prolog proper */
+
+    getEmitter()->emitBegProlog();
+    compiler->unwindBegProlog();
+
+#ifdef DEBUGGING_SUPPORT
+    // Do this so we can put the prolog instruction group ahead of
+    // other instruction groups
+    genIPmappingAddToFront((IL_OFFSETX)ICorDebugInfo::PROLOG);
+#endif // DEBUGGING_SUPPORT
+
+#ifdef DEBUG
+    if (compiler->opts.dspCode)
+    {
+        printf("\n__prolog:\n");
+    }
+#endif
+
+#ifdef DEBUGGING_SUPPORT
+    if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
+    {
+        // Create new scopes for the method-parameters for the prolog-block.
+        psiBegProlog();
+    }
+#endif
+
+#ifdef DEBUG
+
+    if (compiler->compJitHaltMethod())
+    {
+        /* put a nop first because the debugger and other tools are likely to
+           put an int3 at the begining and we don't want to confuse them */
+
+        instGen(INS_nop);
+        instGen(INS_BREAKPOINT);
+
+#ifdef _TARGET_ARMARCH_
+        // Avoid asserts in the unwind info because these instructions aren't accounted for.
+        compiler->unwindPadding();
+#endif // _TARGET_ARMARCH_
+    }
+#endif // DEBUG
+
+#if FEATURE_EH_FUNCLETS && defined(DEBUG)
+
+    // We cannot force 0-initialization of the PSPSym
+    // as it will overwrite the real value
+    if (compiler->lvaPSPSym != BAD_VAR_NUM)
+    {
+        LclVarDsc* varDsc = &compiler->lvaTable[compiler->lvaPSPSym];
+        assert(!varDsc->lvMustInit);
+    }
+
+#endif // FEATURE_EH_FUNCLETS && DEBUG
+
+    /*-------------------------------------------------------------------------
+     *
+     *  Record the stack frame ranges that will cover all of the tracked
+     *  and untracked pointer variables.
+     *  Also find which registers will need to be zero-initialized.
+     *
+     *  'initRegs': - Generally, enregistered variables should not need to be
+     *                zero-inited. They only need to be zero-inited when they
+     *                have a possibly uninitialized read on some control
+     *                flow path. Apparently some of the IL_STUBs that we
+     *                generate have this property.
+     */
+
+    int untrLclLo = +INT_MAX;
+    int untrLclHi = -INT_MAX;
+    // 'hasUntrLcl' is true if there are any stack locals which must be init'ed.
+    // Note that they may be tracked, but simply not allocated to a register.
+    bool hasUntrLcl = false;
+
+    int  GCrefLo  = +INT_MAX;
+    int  GCrefHi  = -INT_MAX;
+    bool hasGCRef = false;
+
+    regMaskTP initRegs    = RBM_NONE; // Registers which must be init'ed.
+    regMaskTP initFltRegs = RBM_NONE; // FP registers which must be init'ed.
+    regMaskTP initDblRegs = RBM_NONE;
+
+    unsigned   varNum;
+    LclVarDsc* varDsc;
+
+    for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+    {
+        if (varDsc->lvIsParam && !varDsc->lvIsRegArg)
+        {
+            continue;
+        }
+
+        if (!varDsc->lvIsInReg() && !varDsc->lvOnFrame)
+        {
+            noway_assert(varDsc->lvRefCnt == 0);
+            continue;
+        }
+
+        signed int loOffs = varDsc->lvStkOffs;
+        signed int hiOffs = varDsc->lvStkOffs + compiler->lvaLclSize(varNum);
+
+        /* We need to know the offset range of tracked stack GC refs */
+        /* We assume that the GC reference can be anywhere in the TYP_STRUCT */
+
+        if (compiler->lvaTypeIsGC(varNum) && varDsc->lvTrackedNonStruct() && varDsc->lvOnFrame)
+        {
+            // For fields of PROMOTION_TYPE_DEPENDENT type of promotion, they should have been
+            // taken care of by the parent struct.
+            if (!compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+            {
+                hasGCRef = true;
+
+                if (loOffs < GCrefLo)
+                {
+                    GCrefLo = loOffs;
+                }
+                if (hiOffs > GCrefHi)
+                {
+                    GCrefHi = hiOffs;
+                }
+            }
+        }
+
+        /* For lvMustInit vars, gather pertinent info */
+
+        if (!varDsc->lvMustInit)
+        {
+            continue;
+        }
+
+        if (varDsc->lvIsInReg())
+        {
+            regMaskTP regMask = genRegMask(varDsc->lvRegNum);
+            if (!varDsc->IsFloatRegType())
+            {
+                initRegs |= regMask;
+
+                if (varTypeIsMultiReg(varDsc))
+                {
+                    if (varDsc->lvOtherReg != REG_STK)
+                    {
+                        initRegs |= genRegMask(varDsc->lvOtherReg);
+                    }
+                    else
+                    {
+                        /* Upper DWORD is on the stack, and needs to be inited */
+
+                        loOffs += sizeof(int);
+                        goto INIT_STK;
+                    }
+                }
+            }
+#if !FEATURE_STACK_FP_X87
+            else if (varDsc->TypeGet() == TYP_DOUBLE)
+            {
+                initDblRegs |= regMask;
+            }
+            else
+            {
+                initFltRegs |= regMask;
+            }
+#endif // !FEATURE_STACK_FP_X87
+        }
+        else
+        {
+        INIT_STK:
+
+            hasUntrLcl = true;
+
+            if (loOffs < untrLclLo)
+            {
+                untrLclLo = loOffs;
+            }
+            if (hiOffs > untrLclHi)
+            {
+                untrLclHi = hiOffs;
+            }
+        }
+    }
+
+    /* Don't forget about spill temps that hold pointers */
+
+    if (!TRACK_GC_TEMP_LIFETIMES)
+    {
+        assert(compiler->tmpAllFree());
+        for (TempDsc* tempThis = compiler->tmpListBeg(); tempThis != nullptr; tempThis = compiler->tmpListNxt(tempThis))
+        {
+            if (!varTypeIsGC(tempThis->tdTempType()))
+            {
+                continue;
+            }
+
+            signed int loOffs = tempThis->tdTempOffs();
+            signed int hiOffs = loOffs + TARGET_POINTER_SIZE;
+
+            // If there is a frame pointer used, due to frame pointer chaining it will point to the stored value of the
+            // previous frame pointer. Thus, stkOffs can't be zero.
+            CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if !defined(_TARGET_AMD64_)
+            // However, on amd64 there is no requirement to chain frame pointers.
+
+            noway_assert(!isFramePointerUsed() || loOffs != 0);
+#endif // !defined(_TARGET_AMD64_)
+            // printf("    Untracked tmp at [EBP-%04X]\n", -stkOffs);
+
+            hasUntrLcl = true;
+
+            if (loOffs < untrLclLo)
+            {
+                untrLclLo = loOffs;
+            }
+            if (hiOffs > untrLclHi)
+            {
+                untrLclHi = hiOffs;
+            }
+        }
+    }
+
+    assert((genInitStkLclCnt > 0) == hasUntrLcl);
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        if (genInitStkLclCnt > 0)
+        {
+            printf("Found %u lvMustInit stk vars, frame offsets %d through %d\n", genInitStkLclCnt, -untrLclLo,
+                   -untrLclHi);
+        }
+    }
+#endif
+
+#ifdef _TARGET_ARM_
+    // On the ARM we will spill any incoming struct args in the first instruction in the prolog
+    // Ditto for all enregistered user arguments in a varargs method.
+    // These registers will be available to use for the initReg.  We just remove
+    // all of these registers from the rsCalleeRegArgMaskLiveIn.
+    //
+    intRegState.rsCalleeRegArgMaskLiveIn &= ~regSet.rsMaskPreSpillRegs(false);
+#endif
+
+    /* Choose the register to use for zero initialization */
+
+    regNumber initReg       = REG_SCRATCH; // Unless we find a better register below
+    bool      initRegZeroed = false;
+    regMaskTP excludeMask   = intRegState.rsCalleeRegArgMaskLiveIn;
+    regMaskTP tempMask;
+
+    // We should not use the special PINVOKE registers as the initReg
+    // since they are trashed by the jithelper call to setup the PINVOKE frame
+    if (compiler->info.compCallUnmanaged)
+    {
+        excludeMask |= RBM_PINVOKE_FRAME;
+
+        assert((!compiler->opts.ShouldUsePInvokeHelpers()) || (compiler->info.compLvFrameListRoot == BAD_VAR_NUM));
+        if (!compiler->opts.ShouldUsePInvokeHelpers())
+        {
+            noway_assert(compiler->info.compLvFrameListRoot < compiler->lvaCount);
+
+            excludeMask |= (RBM_PINVOKE_TCB | RBM_PINVOKE_SCRATCH);
+
+            // We also must exclude the register used by compLvFrameListRoot when it is enregistered
+            //
+            LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
+            if (varDsc->lvRegister)
+            {
+                excludeMask |= genRegMask(varDsc->lvRegNum);
+            }
+        }
+    }
+
+#ifdef _TARGET_ARM_
+    // If we have a variable sized frame (compLocallocUsed is true)
+    // then using REG_SAVED_LOCALLOC_SP in the prolog is not allowed
+    if (compiler->compLocallocUsed)
+    {
+        excludeMask |= RBM_SAVED_LOCALLOC_SP;
+    }
+#endif // _TARGET_ARM_
+
+#if defined(_TARGET_XARCH_)
+    if (compiler->compLclFrameSize >= compiler->getVeryLargeFrameSize())
+    {
+        // We currently must use REG_EAX on x86 here
+        // because the loop's backwards branch depends upon the size of EAX encodings
+        assert(initReg == REG_EAX);
+    }
+    else
+#endif // _TARGET_XARCH_
+    {
+        tempMask = initRegs & ~excludeMask & ~regSet.rsMaskResvd;
+
+        if (tempMask != RBM_NONE)
+        {
+            // We will use one of the registers that we were planning to zero init anyway.
+            // We pick the lowest register number.
+            tempMask = genFindLowestBit(tempMask);
+            initReg  = genRegNumFromMask(tempMask);
+        }
+        // Next we prefer to use one of the unused argument registers.
+        // If they aren't available we use one of the caller-saved integer registers.
+        else
+        {
+            tempMask = regSet.rsGetModifiedRegsMask() & RBM_ALLINT & ~excludeMask & ~regSet.rsMaskResvd;
+            if (tempMask != RBM_NONE)
+            {
+                // We pick the lowest register number
+                tempMask = genFindLowestBit(tempMask);
+                initReg  = genRegNumFromMask(tempMask);
+            }
+        }
+    }
+
+    noway_assert(!compiler->info.compCallUnmanaged || (initReg != REG_PINVOKE_FRAME));
+
+#if defined(_TARGET_AMD64_)
+    // If we are a varargs call, in order to set up the arguments correctly this
+    // must be done in a 2 step process. As per the x64 ABI:
+    // a) The caller sets up the argument shadow space (just before the return
+    //    address, 4 pointer sized slots).
+    // b) The callee is responsible to home the arguments on the shadow space
+    //    provided by the caller.
+    // This way, the varargs iterator will be able to retrieve the
+    // call arguments properly since both the arg regs and the stack allocated
+    // args will be contiguous.
+    if (compiler->info.compIsVarArgs)
+    {
+        getEmitter()->spillIntArgRegsToShadowSlots();
+    }
+
+#endif // _TARGET_AMD64_
+
+#ifdef _TARGET_ARM_
+    /*-------------------------------------------------------------------------
+     *
+     * Now start emitting the part of the prolog which sets up the frame
+     */
+
+    if (regSet.rsMaskPreSpillRegs(true) != RBM_NONE)
+    {
+        inst_IV(INS_push, (int)regSet.rsMaskPreSpillRegs(true));
+        compiler->unwindPushMaskInt(regSet.rsMaskPreSpillRegs(true));
+    }
+#endif // _TARGET_ARM_
+
+#ifdef _TARGET_XARCH_
+    if (doubleAlignOrFramePointerUsed())
+    {
+        inst_RV(INS_push, REG_FPBASE, TYP_REF);
+        compiler->unwindPush(REG_FPBASE);
+        psiAdjustStackLevel(REGSIZE_BYTES);
+
+#ifndef _TARGET_AMD64_ // On AMD64, establish the frame pointer after the "sub rsp"
+        genEstablishFramePointer(0, /*reportUnwindData*/ true);
+#endif // !_TARGET_AMD64_
+
+#if DOUBLE_ALIGN
+        if (compiler->genDoubleAlign())
+        {
+            noway_assert(isFramePointerUsed() == false);
+            noway_assert(!regSet.rsRegsModified(RBM_FPBASE)); /* Trashing EBP is out.    */
+
+            inst_RV_IV(INS_AND, REG_SPBASE, -8, EA_PTRSIZE);
+        }
+#endif // DOUBLE_ALIGN
+    }
+#endif // _TARGET_XARCH_
+
+#ifdef _TARGET_ARM64_
+    // Probe large frames now, if necessary, since genPushCalleeSavedRegisters() will allocate the frame.
+    genAllocLclFrame(compiler->compLclFrameSize, initReg, &initRegZeroed, intRegState.rsCalleeRegArgMaskLiveIn);
+    genPushCalleeSavedRegisters(initReg, &initRegZeroed);
+#else  // !_TARGET_ARM64_
+    genPushCalleeSavedRegisters();
+#endif // !_TARGET_ARM64_
+
+#ifdef _TARGET_ARM_
+    bool needToEstablishFP        = false;
+    int  afterLclFrameSPtoFPdelta = 0;
+    if (doubleAlignOrFramePointerUsed())
+    {
+        needToEstablishFP = true;
+
+        // If the local frame is small enough, we establish the frame pointer after the OS-reported prolog.
+        // This makes the prolog and epilog match, giving us smaller unwind data. If the frame size is
+        // too big, we go ahead and do it here.
+
+        int SPtoFPdelta          = (compiler->compCalleeRegsPushed - 2) * REGSIZE_BYTES;
+        afterLclFrameSPtoFPdelta = SPtoFPdelta + compiler->compLclFrameSize;
+        if (!arm_Valid_Imm_For_Add_SP(afterLclFrameSPtoFPdelta))
+        {
+            // Oh well, it looks too big. Go ahead and establish the frame pointer here.
+            genEstablishFramePointer(SPtoFPdelta, /*reportUnwindData*/ true);
+            needToEstablishFP = false;
+        }
+    }
+#endif // _TARGET_ARM_
+
+    //-------------------------------------------------------------------------
+    //
+    // Subtract the local frame size from SP.
+    //
+    //-------------------------------------------------------------------------
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef _TARGET_ARM64_
+    regMaskTP maskStackAlloc = RBM_NONE;
+
+#ifdef _TARGET_ARM_
+    maskStackAlloc =
+        genStackAllocRegisterMask(compiler->compLclFrameSize, regSet.rsGetModifiedRegsMask() & RBM_FLT_CALLEE_SAVED);
+#endif // _TARGET_ARM_
+
+    if (maskStackAlloc == RBM_NONE)
+    {
+        genAllocLclFrame(compiler->compLclFrameSize, initReg, &initRegZeroed, intRegState.rsCalleeRegArgMaskLiveIn);
+    }
+#endif // !_TARGET_ARM64_
+
+//-------------------------------------------------------------------------
+
+#ifdef _TARGET_ARM_
+    if (compiler->compLocallocUsed)
+    {
+        getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, REG_SAVED_LOCALLOC_SP, REG_SPBASE);
+        regTracker.rsTrackRegTrash(REG_SAVED_LOCALLOC_SP);
+        compiler->unwindSetFrameReg(REG_SAVED_LOCALLOC_SP, 0);
+    }
+#endif // _TARGET_ARMARCH_
+
+#if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+    // Preserve callee saved float regs to stack.
+    genPreserveCalleeSavedFltRegs(compiler->compLclFrameSize);
+#endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+
+#ifdef _TARGET_AMD64_
+    // Establish the AMD64 frame pointer after the OS-reported prolog.
+    if (doubleAlignOrFramePointerUsed())
+    {
+        bool reportUnwindData = compiler->compLocallocUsed || compiler->opts.compDbgEnC;
+        genEstablishFramePointer(compiler->codeGen->genSPtoFPdelta(), reportUnwindData);
+    }
+#endif //_TARGET_AMD64_
+
+//-------------------------------------------------------------------------
+//
+// This is the end of the OS-reported prolog for purposes of unwinding
+//
+//-------------------------------------------------------------------------
+
+#ifdef _TARGET_ARM_
+    if (needToEstablishFP)
+    {
+        genEstablishFramePointer(afterLclFrameSPtoFPdelta, /*reportUnwindData*/ false);
+        needToEstablishFP = false; // nobody uses this later, but set it anyway, just to be explicit
+    }
+#endif // _TARGET_ARM_
+
+    if (compiler->info.compPublishStubParam)
+    {
+#if CPU_LOAD_STORE_ARCH
+        getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SECRET_STUB_PARAM, genFramePointerReg(),
+                                    compiler->lvaTable[compiler->lvaStubArgumentVar].lvStkOffs);
+#else
+        // mov [lvaStubArgumentVar], EAX
+        getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SECRET_STUB_PARAM, genFramePointerReg(),
+                                   compiler->lvaTable[compiler->lvaStubArgumentVar].lvStkOffs);
+#endif
+        assert(intRegState.rsCalleeRegArgMaskLiveIn & RBM_SECRET_STUB_PARAM);
+
+        // It's no longer live; clear it out so it can be used after this in the prolog
+        intRegState.rsCalleeRegArgMaskLiveIn &= ~RBM_SECRET_STUB_PARAM;
+    }
+
+#if STACK_PROBES
+    // We could probably fold this into the loop for the FrameSize >= 0x3000 probing
+    // when creating the stack frame. Don't think it's worth it, though.
+    if (genNeedPrologStackProbe)
+    {
+        //
+        // Can't have a call until we have enough padding for rejit
+        //
+        genPrologPadForReJit();
+        noway_assert(compiler->opts.compNeedStackProbes);
+        genGenerateStackProbe();
+        compiler->compStackProbePrologDone = true;
+    }
+#endif // STACK_PROBES
+
+    //
+    // Zero out the frame as needed
+    //
+
+    genZeroInitFrame(untrLclHi, untrLclLo, initReg, &initRegZeroed);
+
+#if FEATURE_EH_FUNCLETS
+
+    genSetPSPSym(initReg, &initRegZeroed);
+
+#else // !FEATURE_EH_FUNCLETS
+
+    // when compInitMem is true the genZeroInitFrame will zero out the shadow SP slots
+    if (compiler->ehNeedsShadowSPslots() && !compiler->info.compInitMem)
+    {
+        /*
+        // size/speed option?
+        getEmitter()->emitIns_I_ARR(INS_mov, EA_PTRSIZE, 0,
+                                REG_EBP, REG_NA, -compiler->lvaShadowSPfirstOffs);
+        */
+
+        // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
+        unsigned filterEndOffsetSlotOffs = compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - (sizeof(void*));
+
+        // Zero out the slot for nesting level 0
+        unsigned firstSlotOffs = filterEndOffsetSlotOffs - (sizeof(void*));
+
+        if (!initRegZeroed)
+        {
+            instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
+            initRegZeroed = true;
+        }
+
+        getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, initReg, compiler->lvaShadowSPslotsVar,
+                                  firstSlotOffs);
+    }
+
+#endif // !FEATURE_EH_FUNCLETS
+
+    genReportGenericContextArg(initReg, &initRegZeroed);
+
+#if defined(LEGACY_BACKEND) // in RyuJIT backend this has already been expanded into trees
+    if (compiler->info.compCallUnmanaged)
+    {
+        getEmitter()->emitDisableRandomNops();
+        initRegs = genPInvokeMethodProlog(initRegs);
+        getEmitter()->emitEnableRandomNops();
+    }
+#endif // defined(LEGACY_BACKEND)
+
+    // The local variable representing the security object must be on the stack frame
+    // and must be 0 initialized.
+    noway_assert((compiler->lvaSecurityObject == BAD_VAR_NUM) ||
+                 (compiler->lvaTable[compiler->lvaSecurityObject].lvOnFrame &&
+                  compiler->lvaTable[compiler->lvaSecurityObject].lvMustInit));
+
+    // Initialize any "hidden" slots/locals
+
+    if (compiler->compLocallocUsed)
+    {
+        noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM);
+#ifdef _TARGET_ARM64_
+        getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_FPBASE, compiler->lvaLocAllocSPvar, 0);
+#else
+        getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
+#endif
+    }
+
+    // Set up the GS security cookie
+
+    genSetGSSecurityCookie(initReg, &initRegZeroed);
+
+#ifdef PROFILING_SUPPORTED
+
+    // Insert a function entry callback for profiling, if requested.
+    genProfilingEnterCallback(initReg, &initRegZeroed);
+
+#endif // PROFILING_SUPPORTED
+
+    if (!genInterruptible)
+    {
+        /*-------------------------------------------------------------------------
+         *
+         * The 'real' prolog ends here for non-interruptible methods.
+         * For fully-interruptible methods, we extend the prolog so that
+         * we do not need to track GC inforation while shuffling the
+         * arguments.
+         *
+         * Make sure there's enough padding for ReJIT.
+         *
+         */
+        genPrologPadForReJit();
+        getEmitter()->emitMarkPrologEnd();
+    }
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) && defined(FEATURE_SIMD)
+    // The unused bits of Vector3 arguments must be cleared
+    // since native compiler doesn't initize the upper bits to zeros.
+    //
+    // TODO-Cleanup: This logic can be implemented in
+    // genFnPrologCalleeRegArgs() for argument registers and
+    // genEnregisterIncomingStackArgs() for stack arguments.
+    genClearStackVec3ArgUpperBits();
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING && FEATURE_SIMD
+
+    /*-----------------------------------------------------------------------------
+     * Take care of register arguments first
+     */
+
+    RegState* regState;
+
+#ifndef LEGACY_BACKEND
+    // Update the arg initial register locations.
+    compiler->lvaUpdateArgsWithInitialReg();
+#endif // !LEGACY_BACKEND
+
+    FOREACH_REGISTER_FILE(regState)
+    {
+        if (regState->rsCalleeRegArgMaskLiveIn)
+        {
+            // If we need an extra register to shuffle around the incoming registers
+            // we will use xtraReg (initReg) and set the xtraRegClobbered flag,
+            // if we don't need to use the xtraReg then this flag will stay false
+            //
+            regNumber xtraReg;
+            bool      xtraRegClobbered = false;
+
+            if (genRegMask(initReg) & RBM_ARG_REGS)
+            {
+                xtraReg = initReg;
+            }
+            else
+            {
+                xtraReg       = REG_SCRATCH;
+                initRegZeroed = false;
+            }
+
+            genFnPrologCalleeRegArgs(xtraReg, &xtraRegClobbered, regState);
+
+            if (xtraRegClobbered)
+            {
+                initRegZeroed = false;
+            }
+        }
+    }
+
+    // Home the incoming arguments
+    genEnregisterIncomingStackArgs();
+
+    /* Initialize any must-init registers variables now */
+
+    if (initRegs)
+    {
+        regMaskTP regMask = 0x1;
+
+        for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg), regMask <<= 1)
+        {
+            if (regMask & initRegs)
+            {
+                // Check if we have already zeroed this register
+                if ((reg == initReg) && initRegZeroed)
+                {
+                    continue;
+                }
+                else
+                {
+                    instGen_Set_Reg_To_Zero(EA_PTRSIZE, reg);
+                    if (reg == initReg)
+                    {
+                        initRegZeroed = true;
+                    }
+                }
+            }
+        }
+    }
+
+#if !FEATURE_STACK_FP_X87
+    if (initFltRegs | initDblRegs)
+    {
+        // If initReg is not in initRegs then we will use REG_SCRATCH
+        if ((genRegMask(initReg) & initRegs) == 0)
+        {
+            initReg       = REG_SCRATCH;
+            initRegZeroed = false;
+        }
+
+#ifdef _TARGET_ARM_
+        // This is needed only for Arm since it can use a zero initialized int register
+        // to initialize vfp registers.
+        if (!initRegZeroed)
+        {
+            instGen_Set_Reg_To_Zero(EA_PTRSIZE, initReg);
+            initRegZeroed = true;
+        }
+#endif // _TARGET_ARM_
+
+        genZeroInitFltRegs(initFltRegs, initDblRegs, initReg);
+    }
+#endif // !FEATURE_STACK_FP_X87
+
+#if FEATURE_STACK_FP_X87
+    //
+    // Here is where we load the enregistered floating point arguments
+    //   and locals onto the x86-FPU.
+    //
+    genCodeForPrologStackFP();
+#endif
+
+    //-----------------------------------------------------------------------------
+
+    //
+    // Increase the prolog size here only if fully interruptible.
+    // And again make sure it's big enough for ReJIT
+    //
+
+    if (genInterruptible)
+    {
+        genPrologPadForReJit();
+        getEmitter()->emitMarkPrologEnd();
+    }
+
+#ifdef DEBUGGING_SUPPORT
+    if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
+    {
+        psiEndProlog();
+    }
+#endif
+
+    if (hasGCRef)
+    {
+        getEmitter()->emitSetFrameRangeGCRs(GCrefLo, GCrefHi);
+    }
+    else
+    {
+        noway_assert(GCrefLo == +INT_MAX);
+        noway_assert(GCrefHi == -INT_MAX);
+    }
+
+#ifdef DEBUG
+    if (compiler->opts.dspCode)
+    {
+        printf("\n");
+    }
+#endif
+
+#ifdef _TARGET_X86_
+    // On non-x86 the VARARG cookie does not need any special treatment.
+
+    // Load up the VARARG argument pointer register so it doesn't get clobbered.
+    // only do this if we actually access any statically declared args
+    // (our argument pointer register has a refcount > 0).
+    unsigned argsStartVar = compiler->lvaVarargsBaseOfStkArgs;
+
+    if (compiler->info.compIsVarArgs && compiler->lvaTable[argsStartVar].lvRefCnt > 0)
+    {
+        varDsc = &compiler->lvaTable[argsStartVar];
+
+        noway_assert(compiler->info.compArgsCount > 0);
+
+        // MOV EAX, <VARARGS HANDLE>
+        getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, compiler->info.compArgsCount - 1, 0);
+        regTracker.rsTrackRegTrash(REG_EAX);
+
+        // MOV EAX, [EAX]
+        getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, REG_EAX, 0);
+
+        // EDX might actually be holding something here.  So make sure to only use EAX for this code
+        // sequence.
+
+        LclVarDsc* lastArg = &compiler->lvaTable[compiler->info.compArgsCount - 1];
+        noway_assert(!lastArg->lvRegister);
+        signed offset = lastArg->lvStkOffs;
+        assert(offset != BAD_STK_OFFS);
+        noway_assert(lastArg->lvFramePointerBased);
+
+        // LEA EAX, &<VARARGS HANDLE> + EAX
+        getEmitter()->emitIns_R_ARR(INS_lea, EA_PTRSIZE, REG_EAX, genFramePointerReg(), REG_EAX, offset);
+
+        if (varDsc->lvRegister)
+        {
+            if (varDsc->lvRegNum != REG_EAX)
+            {
+                getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, varDsc->lvRegNum, REG_EAX);
+                regTracker.rsTrackRegTrash(varDsc->lvRegNum);
+            }
+        }
+        else
+        {
+            getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_EAX, argsStartVar, 0);
+        }
+    }
+
+#endif // _TARGET_X86_
+
+#ifdef DEBUG
+    if (compiler->opts.compStackCheckOnRet)
+    {
+        noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
+                     compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
+                     compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
+        getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
+    }
+#endif
+
+    getEmitter()->emitEndProlog();
+    compiler->unwindEndProlog();
+
+    noway_assert(getEmitter()->emitMaxTmpSize == compiler->tmpSize);
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************
+ *
+ *  Generates code for a function epilog.
+ *
+ *  Please consult the "debugger team notification" comment in genFnProlog().
+ */
+
+#if defined(_TARGET_ARM_)
+
+void CodeGen::genFnEpilog(BasicBlock* block)
+{
+#ifdef DEBUG
+    if (verbose)
+        printf("*************** In genFnEpilog()\n");
+#endif
+
+    ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
+
+    VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, getEmitter()->emitInitGCrefVars);
+    gcInfo.gcRegGCrefSetCur = getEmitter()->emitInitGCrefRegs;
+    gcInfo.gcRegByrefSetCur = getEmitter()->emitInitByrefRegs;
+
+#ifdef DEBUG
+    if (compiler->opts.dspCode)
+        printf("\n__epilog:\n");
+
+    if (verbose)
+    {
+        printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur));
+        dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur);
+        printf(", gcRegGCrefSetCur=");
+        printRegMaskInt(gcInfo.gcRegGCrefSetCur);
+        getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur);
+        printf(", gcRegByrefSetCur=");
+        printRegMaskInt(gcInfo.gcRegByrefSetCur);
+        getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur);
+        printf("\n");
+    }
+#endif
+
+    bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
+
+    // We delay starting the unwind codes until we have an instruction which we know
+    // needs an unwind code. In particular, for large stack frames in methods without
+    // localloc, the sequence might look something like this:
+    //      movw    r3, 0x38e0
+    //      add     sp, r3
+    //      pop     {r4,r5,r6,r10,r11,pc}
+    // In this case, the "movw" should not be part of the unwind codes, since it will
+    // be a NOP, and it is a waste to start with a NOP. Note that calling unwindBegEpilog()
+    // also sets the current location as the beginning offset of the epilog, so every
+    // instruction afterwards needs an unwind code. In the case above, if you call
+    // unwindBegEpilog() before the "movw", then you must generate a NOP for the "movw".
+
+    bool unwindStarted = false;
+
+    // Tear down the stack frame
+
+    if (compiler->compLocallocUsed)
+    {
+        if (!unwindStarted)
+        {
+            compiler->unwindBegEpilog();
+            unwindStarted = true;
+        }
+
+        // mov R9 into SP
+        inst_RV_RV(INS_mov, REG_SP, REG_SAVED_LOCALLOC_SP);
+        compiler->unwindSetFrameReg(REG_SAVED_LOCALLOC_SP, 0);
+    }
+
+    if (jmpEpilog ||
+        genStackAllocRegisterMask(compiler->compLclFrameSize, regSet.rsGetModifiedRegsMask() & RBM_FLT_CALLEE_SAVED) ==
+            RBM_NONE)
+    {
+        genFreeLclFrame(compiler->compLclFrameSize, &unwindStarted, jmpEpilog);
+    }
+
+    if (!unwindStarted)
+    {
+        // If we haven't generated anything yet, we're certainly going to generate a "pop" next.
+        compiler->unwindBegEpilog();
+        unwindStarted = true;
+    }
+
+    genPopCalleeSavedRegisters(jmpEpilog);
+
+    if (regSet.rsMaskPreSpillRegs(true) != RBM_NONE)
+    {
+        // We better not have used a pop PC to return otherwise this will be unreachable code
+        noway_assert(!genUsedPopToReturn);
+
+        int preSpillRegArgSize = genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
+        inst_RV_IV(INS_add, REG_SPBASE, preSpillRegArgSize, EA_PTRSIZE);
+        compiler->unwindAllocStack(preSpillRegArgSize);
+    }
+
+    if (jmpEpilog)
+    {
+        noway_assert(block->bbJumpKind == BBJ_RETURN);
+        noway_assert(block->bbTreeList);
+
+        // We better not have used a pop PC to return otherwise this will be unreachable code
+        noway_assert(!genUsedPopToReturn);
+
+        /* figure out what jump we have */
+
+        GenTree* jmpNode = block->lastNode();
+        noway_assert(jmpNode->gtOper == GT_JMP);
+
+        CORINFO_METHOD_HANDLE methHnd = (CORINFO_METHOD_HANDLE)jmpNode->gtVal.gtVal1;
+
+        CORINFO_CONST_LOOKUP  addrInfo;
+        void*                 addr;
+        regNumber             indCallReg;
+        emitter::EmitCallType callType;
+
+        compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo);
+        switch (addrInfo.accessType)
+        {
+            case IAT_VALUE:
+                if (arm_Valid_Imm_For_BL((ssize_t)addrInfo.addr))
+                {
+                    // Simple direct call
+                    callType   = emitter::EC_FUNC_TOKEN;
+                    addr       = addrInfo.addr;
+                    indCallReg = REG_NA;
+                    break;
+                }
+
+                // otherwise the target address doesn't fit in an immediate
+                // so we have to burn a register...
+                __fallthrough;
+
+            case IAT_PVALUE:
+                // Load the address into a register, load indirect and call  through a register
+                // We have to use R12 since we assume the argument registers are in use
+                callType   = emitter::EC_INDIR_R;
+                indCallReg = REG_R12;
+                addr       = NULL;
+                instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addrInfo.addr);
+                if (addrInfo.accessType == IAT_PVALUE)
+                {
+                    getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
+                    regTracker.rsTrackRegTrash(indCallReg);
+                }
+                break;
+
+            case IAT_PPVALUE:
+            default:
+                NO_WAY("Unsupported JMP indirection");
+        }
+
+        /* Simply emit a jump to the methodHnd. This is similar to a call so we can use
+         * the same descriptor with some minor adjustments.
+         */
+
+        getEmitter()->emitIns_Call(callType, methHnd, INDEBUG_LDISASM_COMMA(nullptr) addr,
+                                   0,          // argSize
+                                   EA_UNKNOWN, // retSize
+                                   gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
+                                   BAD_IL_OFFSET, // IL offset
+                                   indCallReg,    // ireg
+                                   REG_NA,        // xreg
+                                   0,             // xmul
+                                   0,             // disp
+                                   true);         // isJump
+    }
+    else
+    {
+        if (!genUsedPopToReturn)
+        {
+            // If we did not use a pop to return, then we did a "pop {..., lr}" instead of "pop {..., pc}",
+            // so we need a "bx lr" instruction to return from the function.
+            inst_RV(INS_bx, REG_LR, TYP_I_IMPL);
+            compiler->unwindBranch16();
+        }
+    }
+
+    compiler->unwindEndEpilog();
+}
+
+#elif defined(_TARGET_ARM64_)
+
+void CodeGen::genFnEpilog(BasicBlock* block)
+{
+#ifdef DEBUG
+    if (verbose)
+        printf("*************** In genFnEpilog()\n");
+#endif
+
+    ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
+
+    VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, getEmitter()->emitInitGCrefVars);
+    gcInfo.gcRegGCrefSetCur = getEmitter()->emitInitGCrefRegs;
+    gcInfo.gcRegByrefSetCur = getEmitter()->emitInitByrefRegs;
+
+#ifdef DEBUG
+    if (compiler->opts.dspCode)
+        printf("\n__epilog:\n");
+
+    if (verbose)
+    {
+        printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur));
+        dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur);
+        printf(", gcRegGCrefSetCur=");
+        printRegMaskInt(gcInfo.gcRegGCrefSetCur);
+        getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur);
+        printf(", gcRegByrefSetCur=");
+        printRegMaskInt(gcInfo.gcRegByrefSetCur);
+        getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur);
+        printf("\n");
+    }
+#endif
+
+    bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
+
+    compiler->unwindBegEpilog();
+
+    genPopCalleeSavedRegistersAndFreeLclFrame(jmpEpilog);
+
+    if (jmpEpilog)
+    {
+        noway_assert(block->bbJumpKind == BBJ_RETURN);
+        noway_assert(block->bbTreeList != nullptr);
+
+        // figure out what jump we have
+        GenTree* jmpNode = block->lastNode();
+#if !FEATURE_FASTTAILCALL
+        noway_assert(jmpNode->gtOper == GT_JMP);
+#else
+        // arm64
+        // If jmpNode is GT_JMP then gtNext must be null.
+        // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts.
+        noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr));
+
+        // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp
+        noway_assert((jmpNode->gtOper == GT_JMP) ||
+                     ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall()));
+
+        // The next block is associated with this "if" stmt
+        if (jmpNode->gtOper == GT_JMP)
+#endif
+        {
+            // Simply emit a jump to the methodHnd. This is similar to a call so we can use
+            // the same descriptor with some minor adjustments.
+            CORINFO_METHOD_HANDLE methHnd = (CORINFO_METHOD_HANDLE)jmpNode->gtVal.gtVal1;
+
+            CORINFO_CONST_LOOKUP addrInfo;
+            compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo);
+            if (addrInfo.accessType != IAT_VALUE)
+            {
+                NYI_ARM64("Unsupported JMP indirection");
+            }
+
+            emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
+
+            // Simply emit a jump to the methodHnd. This is similar to a call so we can use
+            // the same descriptor with some minor adjustments.
+            getEmitter()->emitIns_Call(callType, methHnd, INDEBUG_LDISASM_COMMA(nullptr) addrInfo.addr,
+                                       0,          // argSize
+                                       EA_UNKNOWN, // retSize
+                                       EA_UNKNOWN, // secondRetSize
+                                       gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
+                                       BAD_IL_OFFSET, REG_NA, REG_NA, 0, 0, /* iloffset, ireg, xreg, xmul, disp */
+                                       true);                               /* isJump */
+        }
+#if FEATURE_FASTTAILCALL
+        else
+        {
+            // Fast tail call.
+            // Call target = REG_IP0.
+            // https://github.com/dotnet/coreclr/issues/4827
+            // Do we need a special encoding for stack walker like rex.w prefix for x64?
+            getEmitter()->emitIns_R(INS_br, emitTypeSize(TYP_I_IMPL), REG_IP0);
+        }
+#endif // FEATURE_FASTTAILCALL
+    }
+    else
+    {
+        inst_RV(INS_ret, REG_LR, TYP_I_IMPL);
+        compiler->unwindReturn(REG_LR);
+    }
+
+    compiler->unwindEndEpilog();
+}
+
+#elif defined(_TARGET_XARCH_)
+
+void CodeGen::genFnEpilog(BasicBlock* block)
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In genFnEpilog()\n");
+    }
+#endif
+
+    ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
+
+    VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, getEmitter()->emitInitGCrefVars);
+    gcInfo.gcRegGCrefSetCur = getEmitter()->emitInitGCrefRegs;
+    gcInfo.gcRegByrefSetCur = getEmitter()->emitInitByrefRegs;
+
+    noway_assert(!compiler->opts.MinOpts() || isFramePointerUsed()); // FPO not allowed with minOpts
+
+#ifdef DEBUG
+    genInterruptibleUsed = true;
+#endif
+
+    bool jmpEpilog = ((block->bbFlags & BBF_HAS_JMP) != 0);
+
+#ifdef DEBUG
+    if (compiler->opts.dspCode)
+    {
+        printf("\n__epilog:\n");
+    }
+
+    if (verbose)
+    {
+        printf("gcVarPtrSetCur=%s ", VarSetOps::ToString(compiler, gcInfo.gcVarPtrSetCur));
+        dumpConvertedVarSet(compiler, gcInfo.gcVarPtrSetCur);
+        printf(", gcRegGCrefSetCur=");
+        printRegMaskInt(gcInfo.gcRegGCrefSetCur);
+        getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur);
+        printf(", gcRegByrefSetCur=");
+        printRegMaskInt(gcInfo.gcRegByrefSetCur);
+        getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur);
+        printf("\n");
+    }
+#endif
+
+#if !FEATURE_STACK_FP_X87
+    // Restore float registers that were saved to stack before SP is modified.
+    genRestoreCalleeSavedFltRegs(compiler->compLclFrameSize);
+#endif // !FEATURE_STACK_FP_X87
+
+    /* Compute the size in bytes we've pushed/popped */
+
+    if (!doubleAlignOrFramePointerUsed())
+    {
+        // We have an ESP frame */
+
+        noway_assert(compiler->compLocallocUsed == false); // Only used with frame-pointer
+
+        /* Get rid of our local variables */
+
+        if (compiler->compLclFrameSize)
+        {
+#ifdef _TARGET_X86_
+            /* Add 'compiler->compLclFrameSize' to ESP */
+            /* Use pop ECX to increment ESP by 4, unless compiler->compJmpOpUsed is true */
+
+            if ((compiler->compLclFrameSize == sizeof(void*)) && !compiler->compJmpOpUsed)
+            {
+                inst_RV(INS_pop, REG_ECX, TYP_I_IMPL);
+                regTracker.rsTrackRegTrash(REG_ECX);
+            }
+            else
+#endif // _TARGET_X86
+            {
+                /* Add 'compiler->compLclFrameSize' to ESP */
+                /* Generate "add esp, <stack-size>" */
+                inst_RV_IV(INS_add, REG_SPBASE, compiler->compLclFrameSize, EA_PTRSIZE);
+            }
+        }
+
+        genPopCalleeSavedRegisters();
+    }
+    else
+    {
+        noway_assert(doubleAlignOrFramePointerUsed());
+
+        /* Tear down the stack frame */
+
+        bool needMovEspEbp = false;
+
+#if DOUBLE_ALIGN
+        if (compiler->genDoubleAlign())
+        {
+            //
+            // add esp, compLclFrameSize
+            //
+            // We need not do anything (except the "mov esp, ebp") if
+            // compiler->compCalleeRegsPushed==0. However, this is unlikely, and it
+            // also complicates the code manager. Hence, we ignore that case.
+
+            noway_assert(compiler->compLclFrameSize != 0);
+            inst_RV_IV(INS_add, REG_SPBASE, compiler->compLclFrameSize, EA_PTRSIZE);
+
+            needMovEspEbp = true;
+        }
+        else
+#endif // DOUBLE_ALIGN
+        {
+            bool needLea = false;
+
+            if (compiler->compLocallocUsed)
+            {
+                // ESP may be variable if a localloc was actually executed. Reset it.
+                //    lea esp, [ebp - compiler->compCalleeRegsPushed * REGSIZE_BYTES]
+
+                needLea = true;
+            }
+            else if (!regSet.rsRegsModified(RBM_CALLEE_SAVED))
+            {
+                if (compiler->compLclFrameSize != 0)
+                {
+#ifdef _TARGET_AMD64_
+                    // AMD64 can't use "mov esp, ebp", according to the ABI specification describing epilogs. So,
+                    // do an LEA to "pop off" the frame allocation.
+                    needLea = true;
+#else  // !_TARGET_AMD64_
+                    // We will just generate "mov esp, ebp" and be done with it.
+                    needMovEspEbp = true;
+#endif // !_TARGET_AMD64_
+                }
+            }
+            else if (compiler->compLclFrameSize == 0)
+            {
+                // do nothing before popping the callee-saved registers
+            }
+#ifdef _TARGET_X86_
+            else if (compiler->compLclFrameSize == REGSIZE_BYTES)
+            {
+                // "pop ecx" will make ESP point to the callee-saved registers
+                inst_RV(INS_pop, REG_ECX, TYP_I_IMPL);
+                regTracker.rsTrackRegTrash(REG_ECX);
+            }
+#endif // _TARGET_X86
+            else
+            {
+                // We need to make ESP point to the callee-saved registers
+                needLea = true;
+            }
+
+            if (needLea)
+            {
+                int offset;
+
+#ifdef _TARGET_AMD64_
+                // lea esp, [ebp + compiler->compLclFrameSize - genSPtoFPdelta]
+                //
+                // Case 1: localloc not used.
+                // genSPToFPDelta = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize
+                // offset = compiler->compCalleeRegsPushed * REGSIZE_BYTES;
+                // The amount to be subtracted from RBP to point at callee saved int regs.
+                //
+                // Case 2: localloc used
+                // genSPToFPDelta = Min(240, (int)compiler->lvaOutgoingArgSpaceSize)
+                // Offset = Amount to be aded to RBP to point at callee saved int regs.
+                offset = genSPtoFPdelta() - compiler->compLclFrameSize;
+
+                // Offset should fit within a byte if localloc is not used.
+                if (!compiler->compLocallocUsed)
+                {
+                    noway_assert(offset < UCHAR_MAX);
+                }
+#else
+                // lea esp, [ebp - compiler->compCalleeRegsPushed * REGSIZE_BYTES]
+                offset = compiler->compCalleeRegsPushed * REGSIZE_BYTES;
+                noway_assert(offset < UCHAR_MAX); // the offset fits in a byte
+#endif
+
+                getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, -offset);
+            }
+        }
+
+        //
+        // Pop the callee-saved registers (if any)
+        //
+
+        genPopCalleeSavedRegisters();
+
+#ifdef _TARGET_AMD64_
+        assert(!needMovEspEbp); // "mov esp, ebp" is not allowed in AMD64 epilogs
+#else  // !_TARGET_AMD64_
+        if (needMovEspEbp)
+        {
+            // mov esp, ebp
+            inst_RV_RV(INS_mov, REG_SPBASE, REG_FPBASE);
+        }
+#endif // !_TARGET_AMD64_
+
+        // pop ebp
+        inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
+    }
+
+    getEmitter()->emitStartExitSeq(); // Mark the start of the "return" sequence
+
+    /* Check if this a special return block i.e.
+     * CEE_JMP instruction */
+
+    if (jmpEpilog)
+    {
+        noway_assert(block->bbJumpKind == BBJ_RETURN);
+        noway_assert(block->bbTreeList);
+
+        // figure out what jump we have
+        GenTree* jmpNode = block->lastNode();
+#if !FEATURE_FASTTAILCALL
+        // x86
+        noway_assert(jmpNode->gtOper == GT_JMP);
+#else
+        // amd64
+        // If jmpNode is GT_JMP then gtNext must be null.
+        // If jmpNode is a fast tail call, gtNext need not be null since it could have embedded stmts.
+        noway_assert((jmpNode->gtOper != GT_JMP) || (jmpNode->gtNext == nullptr));
+
+        // Could either be a "jmp method" or "fast tail call" implemented as epilog+jmp
+        noway_assert((jmpNode->gtOper == GT_JMP) ||
+                     ((jmpNode->gtOper == GT_CALL) && jmpNode->AsCall()->IsFastTailCall()));
+
+        // The next block is associated with this "if" stmt
+        if (jmpNode->gtOper == GT_JMP)
+#endif
+        {
+            // Simply emit a jump to the methodHnd. This is similar to a call so we can use
+            // the same descriptor with some minor adjustments.
+            CORINFO_METHOD_HANDLE methHnd = (CORINFO_METHOD_HANDLE)jmpNode->gtVal.gtVal1;
+
+            CORINFO_CONST_LOOKUP addrInfo;
+            compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo);
+            if (addrInfo.accessType != IAT_VALUE && addrInfo.accessType != IAT_PVALUE)
+            {
+                NO_WAY("Unsupported JMP indirection");
+            }
+
+            const emitter::EmitCallType callType =
+                (addrInfo.accessType == IAT_VALUE) ? emitter::EC_FUNC_TOKEN : emitter::EC_FUNC_TOKEN_INDIR;
+
+            // Simply emit a jump to the methodHnd. This is similar to a call so we can use
+            // the same descriptor with some minor adjustments.
+            getEmitter()->emitIns_Call(callType, methHnd, INDEBUG_LDISASM_COMMA(nullptr) addrInfo.addr,
+                                       0,                                                      // argSize
+                                       EA_UNKNOWN                                              // retSize
+                                       FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(EA_UNKNOWN), // secondRetSize
+                                       gcInfo.gcVarPtrSetCur,
+                                       gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, REG_NA, REG_NA,
+                                       0, 0,  /* iloffset, ireg, xreg, xmul, disp */
+                                       true); /* isJump */
+        }
+#if FEATURE_FASTTAILCALL
+        else
+        {
+#ifdef _TARGET_AMD64_
+            // Fast tail call.
+            // Call target = RAX.
+            // Stack walker requires that a register indirect tail call be rex.w prefixed.
+            getEmitter()->emitIns_R(INS_rex_jmp, emitTypeSize(TYP_I_IMPL), REG_RAX);
+#else
+            assert(!"Fast tail call as epilog+jmp");
+            unreached();
+#endif //_TARGET_AMD64_
+        }
+#endif // FEATURE_FASTTAILCALL
+    }
+    else
+    {
+        unsigned stkArgSize = 0; // Zero on all platforms except x86
+
+#if defined(_TARGET_X86_)
+
+        noway_assert(compiler->compArgSize >= intRegState.rsCalleeRegArgCount * sizeof(void*));
+        stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * sizeof(void*);
+
+        noway_assert(compiler->compArgSize < 0x10000); // "ret" only has 2 byte operand
+
+        // varargs has caller pop
+        if (compiler->info.compIsVarArgs)
+            stkArgSize = 0;
+
+#endif // defined(_TARGET_X86_)
+
+        /* Return, popping our arguments (if any) */
+        instGen_Return(stkArgSize);
+    }
+}
+
+#else // _TARGET_*
+#error Unsupported or unset target architecture
+#endif // _TARGET_*
+
+#if FEATURE_EH_FUNCLETS
+
+#ifdef _TARGET_ARM_
+
+/*****************************************************************************
+ *
+ *  Generates code for an EH funclet prolog.
+ *
+ *  Funclets have the following incoming arguments:
+ *
+ *      catch:          r0 = the exception object that was caught (see GT_CATCH_ARG)
+ *      filter:         r0 = the exception object to filter (see GT_CATCH_ARG), r1 = CallerSP of the containing function
+ *      finally/fault:  none
+ *
+ *  Funclets set the following registers on exit:
+ *
+ *      catch:          r0 = the address at which execution should resume (see BBJ_EHCATCHRET)
+ *      filter:         r0 = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
+ *      finally/fault:  none
+ *
+ *  The ARM funclet prolog sequence is:
+ *
+ *     push {regs,lr}   ; We push the callee-saved regs and 'lr'.
+ *                      ;   TODO-ARM-CQ: We probably only need to save lr, plus any callee-save registers that we
+ *                      ;         actually use in the funclet. Currently, we save the same set of callee-saved regs
+ *                      ;         calculated for the entire function.
+ *     sub sp, XXX      ; Establish the rest of the frame.
+ *                      ;   XXX is determined by lvaOutgoingArgSpaceSize plus space for the PSP slot, aligned
+ *                      ;   up to preserve stack alignment. If we push an odd number of registers, we also
+ *                      ;   generate this, to keep the stack aligned.
+ *
+ *     ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested
+ *     ;     filters.
+ *     ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet
+ *     ;     epilog.
+ *
+ *     if (this is a filter funclet)
+ *     {
+ *          // r1 on entry to a filter funclet is CallerSP of the containing function:
+ *          // either the main function, or the funclet for a handler that this filter is dynamically nested within.
+ *          // Note that a filter can be dynamically nested within a funclet even if it is not statically within
+ *          // a funclet. Consider:
+ *          //
+ *          //    try {
+ *          //        try {
+ *          //            throw new Exception();
+ *          //        } catch(Exception) {
+ *          //            throw new Exception();     // The exception thrown here ...
+ *          //        }
+ *          //    } filter {                         // ... will be processed here, while the "catch" funclet frame is
+ *          //                                       // still on the stack
+ *          //    } filter-handler {
+ *          //    }
+ *          //
+ *          // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the
+ *          // enclosing frame will be a funclet or main function. We won't know any time there is a filter protecting
+ *          // nested EH. To simplify, we just always create a main function PSP for any function with a filter.
+ *
+ *          ldr r1, [r1 - PSP_slot_CallerSP_offset]     ; Load the CallerSP of the main function (stored in the PSP of
+ *                                                      ; the dynamically containing funclet or function)
+ *          str r1, [sp + PSP_slot_SP_offset]           ; store the PSP
+ *          sub r11, r1, Function_CallerSP_to_FP_delta  ; re-establish the frame pointer
+ *     }
+ *     else
+ *     {
+ *          // This is NOT a filter funclet. The VM re-establishes the frame pointer on entry.
+ *          // TODO-ARM-CQ: if VM set r1 to CallerSP on entry, like for filters, we could save an instruction.
+ *
+ *          add r3, r11, Function_CallerSP_to_FP_delta  ; compute the CallerSP, given the frame pointer. r3 is scratch.
+ *          str r3, [sp + PSP_slot_SP_offset]           ; store the PSP
+ *     }
+ *
+ *  The epilog sequence is then:
+ *
+ *     add sp, XXX      ; if necessary
+ *     pop {regs,pc}
+ *
+ *  If it is worth it, we could push r0, r1, r2, r3 instead of using an additional add/sub instruction.
+ *  Code size would be smaller, but we would be writing to / reading from the stack, which might be slow.
+ *
+ *  The funclet frame is thus:
+ *
+ *      |                       |
+ *      |-----------------------|
+ *      |       incoming        |
+ *      |       arguments       |
+ *      +=======================+ <---- Caller's SP
+ *      |Callee saved registers |
+ *      |-----------------------|
+ *      |Pre-spill regs space   |   // This is only necessary to keep the PSP slot at the same offset
+ *      |                       |   // in function and funclet
+ *      |-----------------------|
+ *      |        PSP slot       |
+ *      |-----------------------|
+ *      ~  possible 4 byte pad  ~
+ *      ~     for alignment     ~
+ *      |-----------------------|
+ *      |   Outgoing arg space  |
+ *      |-----------------------| <---- Ambient SP
+ *      |       |               |
+ *      ~       | Stack grows   ~
+ *      |       | downward      |
+ *              V
+ */
+
+void CodeGen::genFuncletProlog(BasicBlock* block)
+{
+#ifdef DEBUG
+    if (verbose)
+        printf("*************** In genFuncletProlog()\n");
+#endif
+
+    assert(block != NULL);
+    assert(block->bbFlags && BBF_FUNCLET_BEG);
+
+    ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
+
+    gcInfo.gcResetForBB();
+
+    compiler->unwindBegProlog();
+
+    regMaskTP maskPushRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
+    regMaskTP maskPushRegsInt   = genFuncletInfo.fiSaveRegs & ~maskPushRegsFloat;
+
+    regMaskTP maskStackAlloc = genStackAllocRegisterMask(genFuncletInfo.fiSpDelta, maskPushRegsFloat);
+    maskPushRegsInt |= maskStackAlloc;
+
+    assert(FitsIn<int>(maskPushRegsInt));
+    inst_IV(INS_push, (int)maskPushRegsInt);
+    compiler->unwindPushMaskInt(maskPushRegsInt);
+
+    if (maskPushRegsFloat != RBM_NONE)
+    {
+        genPushFltRegs(maskPushRegsFloat);
+        compiler->unwindPushMaskFloat(maskPushRegsFloat);
+    }
+
+    bool isFilter = (block->bbCatchTyp == BBCT_FILTER);
+
+    regMaskTP maskArgRegsLiveIn;
+    if (isFilter)
+    {
+        maskArgRegsLiveIn = RBM_R0 | RBM_R1;
+    }
+    else if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT))
+    {
+        maskArgRegsLiveIn = RBM_NONE;
+    }
+    else
+    {
+        maskArgRegsLiveIn = RBM_R0;
+    }
+
+    regNumber initReg       = REG_R3; // R3 is never live on entry to a funclet, so it can be trashed
+    bool      initRegZeroed = false;
+
+    if (maskStackAlloc == RBM_NONE)
+    {
+        genAllocLclFrame(genFuncletInfo.fiSpDelta, initReg, &initRegZeroed, maskArgRegsLiveIn);
+    }
+
+    // This is the end of the OS-reported prolog for purposes of unwinding
+    compiler->unwindEndProlog();
+
+    if (isFilter)
+    {
+        // This is the first block of a filter
+
+        getEmitter()->emitIns_R_R_I(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_R1,
+                                    genFuncletInfo.fiPSP_slot_CallerSP_offset);
+        regTracker.rsTrackRegTrash(REG_R1);
+        getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_SPBASE,
+                                    genFuncletInfo.fiPSP_slot_SP_offset);
+        getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_FPBASE, REG_R1,
+                                    genFuncletInfo.fiFunctionCallerSPtoFPdelta);
+    }
+    else
+    {
+        // This is a non-filter funclet
+        getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE,
+                                    genFuncletInfo.fiFunctionCallerSPtoFPdelta);
+        regTracker.rsTrackRegTrash(REG_R3);
+        getEmitter()->emitIns_R_R_I(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R3, REG_SPBASE,
+                                    genFuncletInfo.fiPSP_slot_SP_offset);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Generates code for an EH funclet epilog.
+ */
+
+void CodeGen::genFuncletEpilog()
+{
+#ifdef DEBUG
+    if (verbose)
+        printf("*************** In genFuncletEpilog()\n");
+#endif
+
+    ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
+
+    // Just as for the main function, we delay starting the unwind codes until we have
+    // an instruction which we know needs an unwind code. This is to support code like
+    // this:
+    //      movw    r3, 0x38e0
+    //      add     sp, r3
+    //      pop     {r4,r5,r6,r10,r11,pc}
+    // where the "movw" shouldn't be part of the unwind codes. See genFnEpilog() for more details.
+
+    bool unwindStarted = false;
+
+    /* The saved regs info saves the LR register. We need to pop the PC register to return */
+    assert(genFuncletInfo.fiSaveRegs & RBM_LR);
+
+    regMaskTP maskPopRegsFloat = genFuncletInfo.fiSaveRegs & RBM_ALLFLOAT;
+    regMaskTP maskPopRegsInt   = genFuncletInfo.fiSaveRegs & ~maskPopRegsFloat;
+
+    regMaskTP maskStackAlloc = genStackAllocRegisterMask(genFuncletInfo.fiSpDelta, maskPopRegsFloat);
+    maskPopRegsInt |= maskStackAlloc;
+
+    if (maskStackAlloc == RBM_NONE)
+    {
+        genFreeLclFrame(genFuncletInfo.fiSpDelta, &unwindStarted, false);
+    }
+
+    if (!unwindStarted)
+    {
+        // We'll definitely generate an unwindable instruction next
+        compiler->unwindBegEpilog();
+        unwindStarted = true;
+    }
+
+    maskPopRegsInt &= ~RBM_LR;
+    maskPopRegsInt |= RBM_PC;
+
+    if (maskPopRegsFloat != RBM_NONE)
+    {
+        genPopFltRegs(maskPopRegsFloat);
+        compiler->unwindPopMaskFloat(maskPopRegsFloat);
+    }
+
+    assert(FitsIn<int>(maskPopRegsInt));
+    inst_IV(INS_pop, (int)maskPopRegsInt);
+    compiler->unwindPopMaskInt(maskPopRegsInt);
+
+    compiler->unwindEndEpilog();
+}
+
+/*****************************************************************************
+ *
+ *  Capture the information used to generate the funclet prologs and epilogs.
+ *  Note that all funclet prologs are identical, and all funclet epilogs are
+ *  identical (per type: filters are identical, and non-filters are identical).
+ *  Thus, we compute the data used for these just once.
+ *
+ *  See genFuncletProlog() for more information about the prolog/epilog sequences.
+ */
+
+void CodeGen::genCaptureFuncletPrologEpilogInfo()
+{
+    if (compiler->ehAnyFunclets())
+    {
+        assert(isFramePointerUsed());
+        assert(compiler->lvaDoneFrameLayout ==
+               Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be finalized
+
+        // Frame pointer doesn't point at the end, it points at the pushed r11. So, instead
+        // of adding the number of callee-saved regs to CallerSP, we add 1 for lr and 1 for r11
+        // (plus the "pre spill regs"). Note that we assume r12 and r13 aren't saved
+        // (also assumed in genFnProlog()).
+        assert((regSet.rsMaskCalleeSaved & (RBM_R12 | RBM_R13)) == 0);
+        unsigned preSpillRegArgSize                = genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
+        genFuncletInfo.fiFunctionCallerSPtoFPdelta = preSpillRegArgSize + 2 * REGSIZE_BYTES;
+
+        regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved;
+        unsigned  saveRegsCount  = genCountBits(rsMaskSaveRegs);
+        unsigned  saveRegsSize   = saveRegsCount * REGSIZE_BYTES; // bytes of regs we're saving
+        assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0);
+        unsigned funcletFrameSize =
+            preSpillRegArgSize + saveRegsSize + REGSIZE_BYTES /* PSP slot */ + compiler->lvaOutgoingArgSpaceSize;
+
+        unsigned funcletFrameSizeAligned  = roundUp(funcletFrameSize, STACK_ALIGN);
+        unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize;
+        unsigned spDelta                  = funcletFrameSizeAligned - saveRegsSize;
+
+        unsigned PSP_slot_SP_offset = compiler->lvaOutgoingArgSpaceSize + funcletFrameAlignmentPad;
+        int      PSP_slot_CallerSP_offset =
+            -(int)(funcletFrameSize - compiler->lvaOutgoingArgSpaceSize); // NOTE: it's negative!
+
+        /* Now save it for future use */
+
+        genFuncletInfo.fiSaveRegs                 = rsMaskSaveRegs;
+        genFuncletInfo.fiSpDelta                  = spDelta;
+        genFuncletInfo.fiPSP_slot_SP_offset       = PSP_slot_SP_offset;
+        genFuncletInfo.fiPSP_slot_CallerSP_offset = PSP_slot_CallerSP_offset;
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\n");
+            printf("Funclet prolog / epilog info\n");
+            printf("    Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunctionCallerSPtoFPdelta);
+            printf("                        Save regs: ");
+            dspRegMask(rsMaskSaveRegs);
+            printf("\n");
+            printf("                         SP delta: %d\n", genFuncletInfo.fiSpDelta);
+            printf("               PSP slot SP offset: %d\n", genFuncletInfo.fiPSP_slot_SP_offset);
+            printf("        PSP slot Caller SP offset: %d\n", genFuncletInfo.fiPSP_slot_CallerSP_offset);
+
+            if (PSP_slot_CallerSP_offset !=
+                compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for debugging
+                printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n",
+                       compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym));
+        }
+#endif // DEBUG
+
+        assert(PSP_slot_CallerSP_offset < 0);
+        assert(compiler->lvaPSPSym != BAD_VAR_NUM);
+        assert(PSP_slot_CallerSP_offset == compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset
+                                                                                                         // used in main
+                                                                                                         // function and
+                                                                                                         // funclet!
+    }
+}
+
+#elif defined(_TARGET_AMD64_)
+
+/*****************************************************************************
+ *
+ *  Generates code for an EH funclet prolog.
+ *
+ *  Funclets have the following incoming arguments:
+ *
+ *      catch/filter-handler: rcx = InitialSP, rdx = the exception object that was caught (see GT_CATCH_ARG)
+ *      filter:               rcx = InitialSP, rdx = the exception object to filter (see GT_CATCH_ARG)
+ *      finally/fault:        rcx = InitialSP
+ *
+ *  Funclets set the following registers on exit:
+ *
+ *      catch/filter-handler: rax = the address at which execution should resume (see BBJ_EHCATCHRET)
+ *      filter:               rax = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT)
+ *      finally/fault:        none
+ *
+ *  The AMD64 funclet prolog sequence is:
+ *
+ *     push ebp
+ *     push callee-saved regs
+ *                      ; TODO-AMD64-CQ: We probably only need to save any callee-save registers that we actually use
+ *                      ;         in the funclet. Currently, we save the same set of callee-saved regs calculated for
+ *                      ;         the entire function.
+ *     sub sp, XXX      ; Establish the rest of the frame.
+ *                      ;   XXX is determined by lvaOutgoingArgSpaceSize plus space for the PSP slot, aligned
+ *                      ;   up to preserve stack alignment. If we push an odd number of registers, we also
+ *                      ;   generate this, to keep the stack aligned.
+ *
+ *     ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested
+ *     ;    filters.
+ *     ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet
+ *     ;    epilog.
+ *     ; Also, re-establish the frame pointer from the PSP.
+ *
+ *     mov rbp, [rcx + PSP_slot_InitialSP_offset]       ; Load the PSP (InitialSP of the main function stored in the
+ *                                                      ; PSP of the dynamically containing funclet or function)
+ *     mov [rsp + PSP_slot_InitialSP_offset], rbp       ; store the PSP in our frame
+ *     lea ebp, [rbp + Function_InitialSP_to_FP_delta]  ; re-establish the frame pointer of the parent frame. If
+ *                                                      ; Function_InitialSP_to_FP_delta==0, we don't need this
+ *                                                      ; instruction.
+ *
+ *  The epilog sequence is then:
+ *
+ *     add rsp, XXX
+ *     pop callee-saved regs    ; if necessary
+ *     pop rbp
+ *     ret
+ *
+ *  The funclet frame is thus:
+ *
+ *      |                       |
+ *      |-----------------------|
+ *      |       incoming        |
+ *      |       arguments       |
+ *      +=======================+ <---- Caller's SP
+ *      |    Return address     |
+ *      |-----------------------|
+ *      |      Saved EBP        |
+ *      |-----------------------|
+ *      |Callee saved registers |
+ *      |-----------------------|
+ *      ~  possible 8 byte pad  ~
+ *      ~     for alignment     ~
+ *      |-----------------------|
+ *      |        PSP slot       |
+ *      |-----------------------|
+ *      |   Outgoing arg space  | // this only exists if the function makes a call
+ *      |-----------------------| <---- Initial SP
+ *      |       |               |
+ *      ~       | Stack grows   ~
+ *      |       | downward      |
+ *              V
+ *
+ * TODO-AMD64-Bug?: the frame pointer should really point to the PSP slot (the debugger seems to assume this
+ * in DacDbiInterfaceImpl::InitParentFrameInfo()), or someplace above Initial-SP. There is an AMD64
+ * UNWIND_INFO restriction that it must be within 240 bytes of Initial-SP. See jit64\amd64\inc\md.h
+ * "FRAMEPTR OFFSETS" for details.
+ */
+
+void CodeGen::genFuncletProlog(BasicBlock* block)
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In genFuncletProlog()\n");
+    }
+#endif
+
+    assert(!regSet.rsRegsModified(RBM_FPBASE));
+    assert(block != nullptr);
+    assert(block->bbFlags & BBF_FUNCLET_BEG);
+    assert(isFramePointerUsed());
+
+    ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
+
+    gcInfo.gcResetForBB();
+
+    compiler->unwindBegProlog();
+
+    // We need to push ebp, since it's callee-saved.
+    // We need to push the callee-saved registers. We only need to push the ones that we need, but we don't
+    // keep track of that on a per-funclet basis, so we push the same set as in the main function.
+    // The only fixed-size frame we need to allocate is whatever is big enough for the PSPSym, since nothing else
+    // is stored here (all temps are allocated in the parent frame).
+    // We do need to allocate the outgoing argument space, in case there are calls here. This must be the same
+    // size as the parent frame's outgoing argument space, to keep the PSPSym offset the same.
+
+    inst_RV(INS_push, REG_FPBASE, TYP_REF);
+    compiler->unwindPush(REG_FPBASE);
+
+    // Callee saved int registers are pushed to stack.
+    genPushCalleeSavedRegisters();
+
+    regMaskTP maskArgRegsLiveIn;
+    if ((block->bbCatchTyp == BBCT_FINALLY) || (block->bbCatchTyp == BBCT_FAULT))
+    {
+        maskArgRegsLiveIn = RBM_ARG_0;
+    }
+    else
+    {
+        maskArgRegsLiveIn = RBM_ARG_0 | RBM_ARG_2;
+    }
+
+    regNumber initReg       = REG_EBP; // We already saved EBP, so it can be trashed
+    bool      initRegZeroed = false;
+
+    genAllocLclFrame(genFuncletInfo.fiSpDelta, initReg, &initRegZeroed, maskArgRegsLiveIn);
+
+    // Callee saved float registers are copied to stack in their assigned stack slots
+    // after allocating space for them as part of funclet frame.
+    genPreserveCalleeSavedFltRegs(genFuncletInfo.fiSpDelta);
+
+    // This is the end of the OS-reported prolog for purposes of unwinding
+    compiler->unwindEndProlog();
+
+    getEmitter()->emitIns_R_AR(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_ARG_0, genFuncletInfo.fiPSP_slot_InitialSP_offset);
+
+    regTracker.rsTrackRegTrash(REG_FPBASE);
+
+    getEmitter()->emitIns_AR_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, genFuncletInfo.fiPSP_slot_InitialSP_offset);
+
+    if (genFuncletInfo.fiFunction_InitialSP_to_FP_delta != 0)
+    {
+        getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_FPBASE, REG_FPBASE,
+                                   genFuncletInfo.fiFunction_InitialSP_to_FP_delta);
+    }
+
+    // We've modified EBP, but not really. Say that we haven't...
+    regSet.rsRemoveRegsModified(RBM_FPBASE);
+}
+
+/*****************************************************************************
+ *
+ *  Generates code for an EH funclet epilog.
+ *
+ *  Note that we don't do anything with unwind codes, because AMD64 only cares about unwind codes for the prolog.
+ */
+
+void CodeGen::genFuncletEpilog()
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In genFuncletEpilog()\n");
+    }
+#endif
+
+    ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
+
+    // Restore callee saved XMM regs from their stack slots before modifying SP
+    // to position at callee saved int regs.
+    genRestoreCalleeSavedFltRegs(genFuncletInfo.fiSpDelta);
+    inst_RV_IV(INS_add, REG_SPBASE, genFuncletInfo.fiSpDelta, EA_PTRSIZE);
+    genPopCalleeSavedRegisters();
+    inst_RV(INS_pop, REG_EBP, TYP_I_IMPL);
+    instGen_Return(0);
+}
+
+/*****************************************************************************
+ *
+ *  Capture the information used to generate the funclet prologs and epilogs.
+ */
+
+void CodeGen::genCaptureFuncletPrologEpilogInfo()
+{
+    if (!compiler->ehAnyFunclets())
+    {
+        return;
+    }
+
+    // Note that compLclFrameSize can't be used (for can we call functions that depend on it),
+    // because we're not going to allocate the same size frame as the parent.
+
+    assert(isFramePointerUsed());
+    assert(compiler->lvaDoneFrameLayout ==
+           Compiler::FINAL_FRAME_LAYOUT);                         // The frame size and offsets must be finalized
+    assert(compiler->compCalleeFPRegsSavedMask != (regMaskTP)-1); // The float registers to be preserved is finalized
+
+    // Even though lvaToInitialSPRelativeOffset() depends on compLclFrameSize,
+    // that's ok, because we're figuring out an offset in the parent frame.
+    genFuncletInfo.fiFunction_InitialSP_to_FP_delta =
+        compiler->lvaToInitialSPRelativeOffset(0, true); // trick to find the Initial-SP-relative offset of the frame
+                                                         // pointer.
+
+    assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0);
+#ifndef UNIX_AMD64_ABI
+    // No 4 slots for outgoing params on the stack for System V systems.
+    assert((compiler->lvaOutgoingArgSpaceSize == 0) ||
+           (compiler->lvaOutgoingArgSpaceSize >= (4 * REGSIZE_BYTES))); // On AMD64, we always have 4 outgoing argument
+// slots if there are any calls in the function.
+#endif // UNIX_AMD64_ABI
+    unsigned offset = compiler->lvaOutgoingArgSpaceSize;
+
+    genFuncletInfo.fiPSP_slot_InitialSP_offset = offset;
+
+    // How much stack do we allocate in the funclet?
+    // We need to 16-byte align the stack.
+
+    unsigned totalFrameSize =
+        REGSIZE_BYTES                                       // return address
+        + REGSIZE_BYTES                                     // pushed EBP
+        + (compiler->compCalleeRegsPushed * REGSIZE_BYTES); // pushed callee-saved int regs, not including EBP
+
+    // Entire 128-bits of XMM register is saved to stack due to ABI encoding requirement.
+    // Copying entire XMM register to/from memory will be performant if SP is aligned at XMM_REGSIZE_BYTES boundary.
+    unsigned calleeFPRegsSavedSize = genCountBits(compiler->compCalleeFPRegsSavedMask) * XMM_REGSIZE_BYTES;
+    unsigned FPRegsPad             = (calleeFPRegsSavedSize > 0) ? AlignmentPad(totalFrameSize, XMM_REGSIZE_BYTES) : 0;
+
+    totalFrameSize += FPRegsPad               // Padding before pushing entire xmm regs
+                      + calleeFPRegsSavedSize // pushed callee-saved float regs
+                      // below calculated 'pad' will go here
+                      + REGSIZE_BYTES                     // PSPSym
+                      + compiler->lvaOutgoingArgSpaceSize // outgoing arg space
+        ;
+
+    unsigned pad = AlignmentPad(totalFrameSize, 16);
+
+    genFuncletInfo.fiSpDelta = FPRegsPad                           // Padding to align SP on XMM_REGSIZE_BYTES boundary
+                               + calleeFPRegsSavedSize             // Callee saved xmm regs
+                               + pad + REGSIZE_BYTES               // PSPSym
+                               + compiler->lvaOutgoingArgSpaceSize // outgoing arg space
+        ;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\n");
+        printf("Funclet prolog / epilog info\n");
+        printf("   Function InitialSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_InitialSP_to_FP_delta);
+        printf("                         SP delta: %d\n", genFuncletInfo.fiSpDelta);
+        printf("       PSP slot Initial SP offset: %d\n", genFuncletInfo.fiPSP_slot_InitialSP_offset);
+    }
+#endif // DEBUG
+
+    assert(compiler->lvaPSPSym != BAD_VAR_NUM);
+    assert(genFuncletInfo.fiPSP_slot_InitialSP_offset ==
+           compiler->lvaGetInitialSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and
+                                                                          // funclet!
+}
+
+#elif defined(_TARGET_ARM64_)
+
+// Look in CodeGenArm64.cpp
+
+#else // _TARGET_*
+
+/*****************************************************************************
+ *
+ *  Generates code for an EH funclet prolog.
+ */
+
+void CodeGen::genFuncletProlog(BasicBlock* block)
+{
+    NYI("Funclet prolog");
+}
+
+/*****************************************************************************
+ *
+ *  Generates code for an EH funclet epilog.
+ */
+
+void CodeGen::genFuncletEpilog()
+{
+    NYI("Funclet epilog");
+}
+
+/*****************************************************************************
+ *
+ *  Capture the information used to generate the funclet prologs and epilogs.
+ */
+
+void CodeGen::genCaptureFuncletPrologEpilogInfo()
+{
+    if (compiler->ehAnyFunclets())
+    {
+        NYI("genCaptureFuncletPrologEpilogInfo()");
+    }
+}
+
+#endif // _TARGET_*
+
+/*-----------------------------------------------------------------------------
+ *
+ *  Set the main function PSPSym value in the frame.
+ *  Funclets use different code to load the PSP sym and save it in their frame.
+ *  See the document "X64 and ARM ABIs.docx" for a full description of the PSPSym.
+ *  The PSPSym section of that document is copied here.
+ *
+ ***********************************
+ *  The name PSPSym stands for Previous Stack Pointer Symbol.  It is how a funclet
+ *  accesses locals from the main function body.
+ *
+ *  First, two definitions.
+ *
+ *  Caller-SP is the value of the stack pointer in a function's caller before the call
+ *  instruction is executed. That is, when function A calls function B, Caller-SP for B
+ *  is the value of the stack pointer immediately before the call instruction in A
+ *  (calling B) was executed. Note that this definition holds for both AMD64, which
+ *  pushes the return value when a call instruction is executed, and for ARM, which
+ *  doesn't. For AMD64, Caller-SP is the address above the call return address.
+ *
+ *  Initial-SP is the initial value of the stack pointer after the fixed-size portion of
+ *  the frame has been allocated. That is, before any "alloca"-type allocations.
+ *
+ *  The PSPSym is a pointer-sized local variable in the frame of the main function and
+ *  of each funclet. The value stored in PSPSym is the value of Initial-SP/Caller-SP
+ *  for the main function.  The stack offset of the PSPSym is reported to the VM in the
+ *  GC information header.  The value reported in the GC information is the offset of the
+ *  PSPSym from Initial-SP/Caller-SP. (Note that both the value stored, and the way the
+ *  value is reported to the VM, differs between architectures. In particular, note that
+ *  most things in the GC information header are reported as offsets relative to Caller-SP,
+ *  but PSPSym on AMD64 is one (maybe the only) exception.)
+ *
+ *  The VM uses the PSPSym to find other locals it cares about (such as the generics context
+ *  in a funclet frame). The JIT uses it to re-establish the frame pointer register, so that
+ *  the frame pointer is the same value in a funclet as it is in the main function body.
+ *
+ *  When a funclet is called, it is passed the Establisher Frame Pointer. For AMD64 this is
+ *  true for all funclets and it is passed as the first argument in RCX, but for ARM this is
+ *  only true for first pass funclets (currently just filters) and it is passed as the second
+ *  argument in R1. The Establisher Frame Pointer is a stack pointer of an interesting "parent"
+ *  frame in the exception processing system. For the CLR, it points either to the main function
+ *  frame or a dynamically enclosing funclet frame from the same function, for the funclet being
+ *  invoked. The value of the Establisher Frame Pointer is Initial-SP on AMD64, Caller-SP on ARM.
+ *
+ *  Using the establisher frame, the funclet wants to load the value of the PSPSym. Since we
+ *  don't know if the Establisher Frame is from the main function or a funclet, we design the
+ *  main function and funclet frame layouts to place the PSPSym at an identical, small, constant
+ *  offset from the Establisher Frame in each case. (This is also required because we only report
+ *  a single offset to the PSPSym in the GC information, and that offset must be valid for the main
+ *  function and all of its funclets). Then, the funclet uses this known offset to compute the
+ *  PSPSym address and read its value. From this, it can compute the value of the frame pointer
+ *  (which is a constant offset from the PSPSym value) and set the frame register to be the same
+ *  as the parent function. Also, the funclet writes the value of the PSPSym to its own frame's
+ *  PSPSym. This "copying" of the PSPSym happens for every funclet invocation, in particular,
+ *  for every nested funclet invocation.
+ *
+ *  On ARM, for all second pass funclets (finally, fault, catch, and filter-handler) the VM
+ *  restores all non-volatile registers to their values within the parent frame. This includes
+ *  the frame register (R11). Thus, the PSPSym is not used to recompute the frame pointer register
+ *  in this case, though the PSPSym is copied to the funclet's frame, as for all funclets.
+ *
+ *  Catch, Filter, and Filter-handlers also get an Exception object (GC ref) as an argument
+ *  (REG_EXCEPTION_OBJECT).  On AMD64 it is the second argument and thus passed in RDX.  On
+ *  ARM this is the first argument and passed in R0.
+ *
+ *  (Note that the JIT64 source code contains a comment that says, "The current CLR doesn't always
+ *  pass the correct establisher frame to the funclet. Funclet may receive establisher frame of
+ *  funclet when expecting that of original routine." It indicates this is the reason that a PSPSym
+ *  is required in all funclets as well as the main function, whereas if the establisher frame was
+ *  correctly reported, the PSPSym could be omitted in some cases.)
+ ***********************************
+ */
+void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed)
+{
+    assert(compiler->compGeneratingProlog);
+
+    if (!compiler->ehNeedsPSPSym())
+    {
+        return;
+    }
+
+    noway_assert(isFramePointerUsed());         // We need an explicit frame pointer
+    assert(compiler->lvaPSPSym != BAD_VAR_NUM); // We should have created the PSPSym variable
+
+#if defined(_TARGET_ARM_)
+
+    // We either generate:
+    //     add     r1, r11, 8
+    //     str     r1, [reg + PSPSymOffset]
+    // or:
+    //     add     r1, sp, 76
+    //     str     r1, [reg + PSPSymOffset]
+    // depending on the smallest encoding
+
+    int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta();
+
+    int       callerSPOffs;
+    regNumber regBase;
+
+    if (arm_Valid_Imm_For_Add_SP(SPtoCallerSPdelta))
+    {
+        // use the "add <reg>, sp, imm" form
+
+        callerSPOffs = SPtoCallerSPdelta;
+        regBase      = REG_SPBASE;
+    }
+    else
+    {
+        // use the "add <reg>, r11, imm" form
+
+        int FPtoCallerSPdelta = -genCallerSPtoFPdelta();
+        noway_assert(arm_Valid_Imm_For_Add(FPtoCallerSPdelta, INS_FLAGS_DONT_CARE));
+
+        callerSPOffs = FPtoCallerSPdelta;
+        regBase      = REG_FPBASE;
+    }
+
+    // We will just use the initReg since it is an available register
+    // and we are probably done using it anyway...
+    regNumber regTmp = initReg;
+    *pInitRegZeroed  = false;
+
+    getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, regTmp, regBase, callerSPOffs);
+    getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0);
+
+#elif defined(_TARGET_ARM64_)
+
+    int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta();
+
+    // We will just use the initReg since it is an available register
+    // and we are probably done using it anyway...
+    regNumber regTmp = initReg;
+    *pInitRegZeroed  = false;
+
+    getEmitter()->emitIns_R_R_Imm(INS_add, EA_PTRSIZE, regTmp, REG_SPBASE, SPtoCallerSPdelta);
+    getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0);
+
+#elif defined(_TARGET_AMD64_)
+
+    // The PSP sym value is Initial-SP, not Caller-SP!
+    // We assume that RSP is Initial-SP when this function is called. That is, the stack frame
+    // has been established.
+    //
+    // We generate:
+    //     mov     [rbp-20h], rsp       // store the Initial-SP (our current rsp) in the PSPsym
+
+    getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaPSPSym, 0);
+
+#else // _TARGET_*
+
+    NYI("Set function PSP sym");
+
+#endif // _TARGET_*
+}
+
+#endif // FEATURE_EH_FUNCLETS
+
+/*****************************************************************************
+ *
+ *  Generates code for all the function and funclet prologs and epilogs.
+ */
+
+void CodeGen::genGeneratePrologsAndEpilogs()
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** Before prolog / epilog generation\n");
+        getEmitter()->emitDispIGlist(false);
+    }
+#endif
+
+#ifndef LEGACY_BACKEND
+    // Before generating the prolog, we need to reset the variable locations to what they will be on entry.
+    // This affects our code that determines which untracked locals need to be zero initialized.
+    compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(compiler->fgFirstBB);
+#endif // !LEGACY_BACKEND
+
+    // Tell the emitter we're done with main code generation, and are going to start prolog and epilog generation.
+
+    getEmitter()->emitStartPrologEpilogGeneration();
+
+    gcInfo.gcResetForBB();
+    genFnProlog();
+
+    // Generate all the prologs and epilogs.
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if FEATURE_EH_FUNCLETS
+
+    // Capture the data we're going to use in the funclet prolog and epilog generation. This is
+    // information computed during codegen, or during function prolog generation, like
+    // frame offsets. It must run after main function prolog generation.
+
+    genCaptureFuncletPrologEpilogInfo();
+
+#endif // FEATURE_EH_FUNCLETS
+
+    // Walk the list of prologs and epilogs and generate them.
+    // We maintain a list of prolog and epilog basic blocks in
+    // the insGroup structure in the emitter. This list was created
+    // during code generation by the genReserve*() functions.
+    //
+    // TODO: it seems like better design would be to create a list of prologs/epilogs
+    // in the code generator (not the emitter), and then walk that list. But we already
+    // have the insGroup list, which serves well, so we don't need the extra allocations
+    // for a prolog/epilog list in the code generator.
+
+    getEmitter()->emitGeneratePrologEpilog();
+
+    // Tell the emitter we're done with all prolog and epilog generation.
+
+    getEmitter()->emitFinishPrologEpilogGeneration();
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** After prolog / epilog generation\n");
+        getEmitter()->emitDispIGlist(false);
+    }
+#endif
+}
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                           End Prolog / Epilog                             XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#if STACK_PROBES
+void CodeGen::genGenerateStackProbe()
+{
+    noway_assert(compiler->opts.compNeedStackProbes);
+
+    // If this assert fires, it means somebody has changed the value
+    // CORINFO_STACKPROBE_DEPTH.
+    // Why does the EE need such a deep probe? It should just need a couple
+    // of bytes, to set up a frame in the unmanaged code..
+
+    static_assert_no_msg(CORINFO_STACKPROBE_DEPTH + JIT_RESERVED_STACK < compiler->eeGetPageSize());
+
+    JITDUMP("Emitting stack probe:\n");
+    getEmitter()->emitIns_AR_R(INS_TEST, EA_PTRSIZE, REG_EAX, REG_SPBASE,
+                               -(CORINFO_STACKPROBE_DEPTH + JIT_RESERVED_STACK));
+}
+#endif // STACK_PROBES
+
+/*****************************************************************************
+ *
+ *  Record the constant and return a tree node that yields its address.
+ */
+
+GenTreePtr CodeGen::genMakeConst(const void* cnsAddr, var_types cnsType, GenTreePtr cnsTree, bool dblAlign)
+{
+    // Assign the constant an offset in the data section
+    UNATIVE_OFFSET cnsSize = genTypeSize(cnsType);
+    UNATIVE_OFFSET cnum    = getEmitter()->emitDataConst(cnsAddr, cnsSize, dblAlign);
+
+#ifdef DEBUG
+    if (compiler->opts.dspCode)
+    {
+        printf("   @%s%02u   ", "CNS", cnum);
+
+        switch (cnsType)
+        {
+            case TYP_INT:
+                printf("DD      %d \n", *(int*)cnsAddr);
+                break;
+            case TYP_LONG:
+                printf("DQ      %lld\n", *(__int64*)cnsAddr);
+                break;
+            case TYP_FLOAT:
+                printf("DF      %f \n", *(float*)cnsAddr);
+                break;
+            case TYP_DOUBLE:
+                printf("DQ      %lf\n", *(double*)cnsAddr);
+                break;
+
+            default:
+                noway_assert(!"unexpected constant type");
+        }
+    }
+#endif
+
+    // Access to inline data is 'abstracted' by a special type of static member
+    // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
+    // to constant data, not a real static field.
+
+    return new (compiler, GT_CLS_VAR) GenTreeClsVar(cnsType, compiler->eeFindJitDataOffs(cnum), nullptr);
+}
+
+#if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+// Save compCalleeFPRegsPushed with the smallest register number saved at [RSP+offset], working
+// down the stack to the largest register number stored at [RSP+offset-(genCountBits(regMask)-1)*XMM_REG_SIZE]
+// Here offset = 16-byte aligned offset after pushing integer registers.
+//
+// Params
+//   lclFrameSize - Fixed frame size excluding callee pushed int regs.
+//             non-funclet: this will be compLclFrameSize.
+//             funclet frames: this will be FuncletInfo.fiSpDelta.
+void CodeGen::genPreserveCalleeSavedFltRegs(unsigned lclFrameSize)
+{
+    regMaskTP regMask = compiler->compCalleeFPRegsSavedMask;
+
+    // Only callee saved floating point registers should be in regMask
+    assert((regMask & RBM_FLT_CALLEE_SAVED) == regMask);
+
+    // fast path return
+    if (regMask == RBM_NONE)
+    {
+        return;
+    }
+
+#ifdef _TARGET_AMD64_
+    unsigned firstFPRegPadding = compiler->lvaIsCalleeSavedIntRegCountEven() ? REGSIZE_BYTES : 0;
+    unsigned offset            = lclFrameSize - firstFPRegPadding - XMM_REGSIZE_BYTES;
+
+    // Offset is 16-byte aligned since we use movaps for preserving xmm regs.
+    assert((offset % 16) == 0);
+    instruction copyIns = ins_Copy(TYP_FLOAT);
+#else  // !_TARGET_AMD64_
+    unsigned    offset            = lclFrameSize - XMM_REGSIZE_BYTES;
+    instruction copyIns           = INS_movupd;
+#endif // !_TARGET_AMD64_
+
+    for (regNumber reg = REG_FLT_CALLEE_SAVED_FIRST; regMask != RBM_NONE; reg = REG_NEXT(reg))
+    {
+        regMaskTP regBit = genRegMask(reg);
+        if ((regBit & regMask) != 0)
+        {
+            // ABI requires us to preserve lower 128-bits of YMM register.
+            getEmitter()->emitIns_AR_R(copyIns,
+                                       EA_8BYTE, // TODO-XArch-Cleanup: size specified here doesn't matter but should be
+                                                 // EA_16BYTE
+                                       reg, REG_SPBASE, offset);
+            compiler->unwindSaveReg(reg, offset);
+            regMask &= ~regBit;
+            offset -= XMM_REGSIZE_BYTES;
+        }
+    }
+
+#ifdef FEATURE_AVX_SUPPORT
+    // Just before restoring float registers issue a Vzeroupper to zero out upper 128-bits of all YMM regs.
+    // This is to avoid penalty if this routine is using AVX-256 and now returning to a routine that is
+    // using SSE2.
+    if (compiler->getFloatingPointInstructionSet() == InstructionSet_AVX)
+    {
+        instGen(INS_vzeroupper);
+    }
+#endif
+}
+
+// Save/Restore compCalleeFPRegsPushed with the smallest register number saved at [RSP+offset], working
+// down the stack to the largest register number stored at [RSP+offset-(genCountBits(regMask)-1)*XMM_REG_SIZE]
+// Here offset = 16-byte aligned offset after pushing integer registers.
+//
+// Params
+//   lclFrameSize - Fixed frame size excluding callee pushed int regs.
+//             non-funclet: this will be compLclFrameSize.
+//             funclet frames: this will be FuncletInfo.fiSpDelta.
+void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize)
+{
+    regMaskTP regMask = compiler->compCalleeFPRegsSavedMask;
+
+    // Only callee saved floating point registers should be in regMask
+    assert((regMask & RBM_FLT_CALLEE_SAVED) == regMask);
+
+    // fast path return
+    if (regMask == RBM_NONE)
+    {
+        return;
+    }
+
+#ifdef _TARGET_AMD64_
+    unsigned    firstFPRegPadding = compiler->lvaIsCalleeSavedIntRegCountEven() ? REGSIZE_BYTES : 0;
+    instruction copyIns           = ins_Copy(TYP_FLOAT);
+#else  // !_TARGET_AMD64_
+    unsigned    firstFPRegPadding = 0;
+    instruction copyIns           = INS_movupd;
+#endif // !_TARGET_AMD64_
+
+    unsigned  offset;
+    regNumber regBase;
+    if (compiler->compLocallocUsed)
+    {
+        // localloc frame: use frame pointer relative offset
+        assert(isFramePointerUsed());
+        regBase = REG_FPBASE;
+        offset  = lclFrameSize - genSPtoFPdelta() - firstFPRegPadding - XMM_REGSIZE_BYTES;
+    }
+    else
+    {
+        regBase = REG_SPBASE;
+        offset  = lclFrameSize - firstFPRegPadding - XMM_REGSIZE_BYTES;
+    }
+
+#ifdef _TARGET_AMD64_
+    // Offset is 16-byte aligned since we use movaps for restoring xmm regs
+    assert((offset % 16) == 0);
+#endif // _TARGET_AMD64_
+
+#ifdef FEATURE_AVX_SUPPORT
+    // Just before restoring float registers issue a Vzeroupper to zero out upper 128-bits of all YMM regs.
+    // This is to avoid penalty if this routine is using AVX-256 and now returning to a routine that is
+    // using SSE2.
+    if (compiler->getFloatingPointInstructionSet() == InstructionSet_AVX)
+    {
+        instGen(INS_vzeroupper);
+    }
+#endif
+
+    for (regNumber reg = REG_FLT_CALLEE_SAVED_FIRST; regMask != RBM_NONE; reg = REG_NEXT(reg))
+    {
+        regMaskTP regBit = genRegMask(reg);
+        if ((regBit & regMask) != 0)
+        {
+            // ABI requires us to restore lower 128-bits of YMM register.
+            getEmitter()->emitIns_R_AR(copyIns,
+                                       EA_8BYTE, // TODO-XArch-Cleanup: size specified here doesn't matter but should be
+                                                 // EA_16BYTE
+                                       reg, regBase, offset);
+            regMask &= ~regBit;
+            offset -= XMM_REGSIZE_BYTES;
+        }
+    }
+}
+#endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+
+//-----------------------------------------------------------------------------------
+// IsMultiRegPassedType: Returns true if the type is returned in multiple registers
+//
+// Arguments:
+//     hClass   -  type handle
+//
+// Return Value:
+//     true if type is passed in multiple registers, false otherwise.
+//
+bool Compiler::IsMultiRegPassedType(CORINFO_CLASS_HANDLE hClass)
+{
+    if (hClass == NO_CLASS_HANDLE)
+    {
+        return false;
+    }
+
+    structPassingKind howToPassStruct;
+    var_types         returnType = getArgTypeForStruct(hClass, &howToPassStruct);
+
+    return (returnType == TYP_STRUCT);
+}
+
+//-----------------------------------------------------------------------------------
+// IsMultiRegReturnedType: Returns true if the type is returned in multiple registers
+//
+// Arguments:
+//     hClass   -  type handle
+//
+// Return Value:
+//     true if type is returned in multiple registers, false otherwise.
+//
+bool Compiler::IsMultiRegReturnedType(CORINFO_CLASS_HANDLE hClass)
+{
+    if (hClass == NO_CLASS_HANDLE)
+    {
+        return false;
+    }
+
+    structPassingKind howToReturnStruct;
+    var_types         returnType = getReturnTypeForStruct(hClass, &howToReturnStruct);
+
+    return (returnType == TYP_STRUCT);
+}
+
+//----------------------------------------------
+// Methods that support HFA's for ARM32/ARM64
+//----------------------------------------------
+
+bool Compiler::IsHfa(CORINFO_CLASS_HANDLE hClass)
+{
+#ifdef FEATURE_HFA
+    return varTypeIsFloating(GetHfaType(hClass));
+#else
+    return false;
+#endif
+}
+
+bool Compiler::IsHfa(GenTreePtr tree)
+{
+#ifdef FEATURE_HFA
+    return IsHfa(gtGetStructHandleIfPresent(tree));
+#else
+    return false;
+#endif
+}
+
+var_types Compiler::GetHfaType(GenTreePtr tree)
+{
+#ifdef FEATURE_HFA
+    if (tree->TypeGet() == TYP_STRUCT)
+    {
+        return GetHfaType(gtGetStructHandleIfPresent(tree));
+    }
+#endif
+    return TYP_UNDEF;
+}
+
+unsigned Compiler::GetHfaCount(GenTreePtr tree)
+{
+    return GetHfaCount(gtGetStructHandleIfPresent(tree));
+}
+
+var_types Compiler::GetHfaType(CORINFO_CLASS_HANDLE hClass)
+{
+    var_types result = TYP_UNDEF;
+    if (hClass != NO_CLASS_HANDLE)
+    {
+#ifdef FEATURE_HFA
+        CorInfoType corType = info.compCompHnd->getHFAType(hClass);
+        if (corType != CORINFO_TYPE_UNDEF)
+        {
+            result = JITtype2varType(corType);
+        }
+#endif // FEATURE_HFA
+    }
+    return result;
+}
+
+//------------------------------------------------------------------------
+// GetHfaCount: Given a  class handle for an HFA struct
+//    return the number of registers needed to hold the HFA
+//
+//    Note that on ARM32 the single precision registers overlap with
+//        the double precision registers and for that reason each
+//        double register is considered to be two single registers.
+//        Thus for ARM32 an HFA of 4 doubles this function will return 8.
+//    On ARM64 given an HFA of 4 singles or 4 doubles this function will
+//         will return 4 for both.
+// Arguments:
+//    hClass: the class handle of a HFA struct
+//
+unsigned Compiler::GetHfaCount(CORINFO_CLASS_HANDLE hClass)
+{
+    assert(IsHfa(hClass));
+#ifdef _TARGET_ARM_
+    // A HFA of doubles is twice as large as an HFA of singles for ARM32
+    // (i.e. uses twice the number of single precison registers)
+    return info.compCompHnd->getClassSize(hClass) / REGSIZE_BYTES;
+#else  // _TARGET_ARM64_
+    var_types hfaType   = GetHfaType(hClass);
+    unsigned  classSize = info.compCompHnd->getClassSize(hClass);
+    // Note that the retail build issues a warning about a potential divsion by zero without the Max function
+    unsigned elemSize = Max((unsigned)1, EA_SIZE_IN_BYTES(emitActualTypeSize(hfaType)));
+    return classSize / elemSize;
+#endif // _TARGET_ARM64_
+}
+
+#ifdef _TARGET_XARCH_
+
+//------------------------------------------------------------------------
+// genMapShiftInsToShiftByConstantIns: Given a general shift/rotate instruction,
+// map it to the specific x86/x64 shift opcode for a shift/rotate by a constant.
+// X86/x64 has a special encoding for shift/rotate-by-constant-1.
+//
+// Arguments:
+//    ins: the base shift/rotate instruction
+//    shiftByValue: the constant value by which we are shifting/rotating
+//
+instruction CodeGen::genMapShiftInsToShiftByConstantIns(instruction ins, int shiftByValue)
+{
+    assert(ins == INS_rcl || ins == INS_rcr || ins == INS_rol || ins == INS_ror || ins == INS_shl || ins == INS_shr ||
+           ins == INS_sar);
+
+    // Which format should we use?
+
+    instruction shiftByConstantIns;
+
+    if (shiftByValue == 1)
+    {
+        // Use the shift-by-one format.
+
+        assert(INS_rcl + 1 == INS_rcl_1);
+        assert(INS_rcr + 1 == INS_rcr_1);
+        assert(INS_rol + 1 == INS_rol_1);
+        assert(INS_ror + 1 == INS_ror_1);
+        assert(INS_shl + 1 == INS_shl_1);
+        assert(INS_shr + 1 == INS_shr_1);
+        assert(INS_sar + 1 == INS_sar_1);
+
+        shiftByConstantIns = (instruction)(ins + 1);
+    }
+    else
+    {
+        // Use the shift-by-NNN format.
+
+        assert(INS_rcl + 2 == INS_rcl_N);
+        assert(INS_rcr + 2 == INS_rcr_N);
+        assert(INS_rol + 2 == INS_rol_N);
+        assert(INS_ror + 2 == INS_ror_N);
+        assert(INS_shl + 2 == INS_shl_N);
+        assert(INS_shr + 2 == INS_shr_N);
+        assert(INS_sar + 2 == INS_sar_N);
+
+        shiftByConstantIns = (instruction)(ins + 2);
+    }
+
+    return shiftByConstantIns;
+}
+
+#endif // _TARGET_XARCH_
+
+#if !defined(LEGACY_BACKEND) && (defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_))
+
+//------------------------------------------------------------------------------------------------ //
+// getFirstArgWithStackSlot - returns the first argument with stack slot on the caller's frame.
+//
+// Return value:
+//    The number of the first argument with stack slot on the caller's frame.
+//
+// Note:
+//    On x64 Windows the caller always creates slots (homing space) in its frame for the
+//    first 4 arguments of a callee (register passed args). So, the the variable number
+//    (lclNum) for the first argument with a stack slot is always 0.
+//    For System V systems or arm64, there is no such calling convention requirement, and the code needs to find
+//    the first stack passed argument from the caller. This is done by iterating over
+//    all the lvParam variables and finding the first with lvArgReg equals to REG_STK.
+//
+unsigned CodeGen::getFirstArgWithStackSlot()
+{
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) || defined(_TARGET_ARM64_)
+    unsigned baseVarNum = 0;
+#if defined(FEATURE_UNIX_AMR64_STRUCT_PASSING)
+    baseVarNum = compiler->lvaFirstStackIncomingArgNum;
+
+    if (compiler->lvaFirstStackIncomingArgNum != BAD_VAR_NUM)
+    {
+        baseVarNum = compiler->lvaFirstStackIncomingArgNum;
+    }
+    else
+#endif // FEATURE_UNIX_ARM64_STRUCT_PASSING
+    {
+        // Iterate over all the local variables in the Lcl var table.
+        // They contain all the implicit arguments - thisPtr, retBuf,
+        // generic context, PInvoke cookie, var arg cookie,no-standard args, etc.
+        LclVarDsc* varDsc = nullptr;
+        for (unsigned i = 0; i < compiler->info.compArgsCount; i++)
+        {
+            varDsc = &(compiler->lvaTable[i]);
+
+            // We are iterating over the arguments only.
+            assert(varDsc->lvIsParam);
+
+            if (varDsc->lvArgReg == REG_STK)
+            {
+                baseVarNum = i;
+#if defined(FEATURE_UNIX_AMR64_STRUCT_PASSING)
+                compiler->lvaFirstStackIncomingArgNum = baseVarNum;
+#endif // FEATURE_UNIX_ARM64_STRUCT_PASSING
+                break;
+            }
+        }
+        assert(varDsc != nullptr);
+    }
+
+    return baseVarNum;
+#elif defined(_TARGET_AMD64_)
+    return 0;
+#else
+    // Not implemented for x86.
+    NYI_X86("getFirstArgWithStackSlot not yet implemented for x86.");
+    return BAD_VAR_NUM;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING || _TARGET_ARM64_
+}
+
+#endif // !LEGACY_BACKEND && (_TARGET_XARCH_ || _TARGET_ARM64_)
+
+/*****************************************************************************/
+#ifdef DEBUGGING_SUPPORT
+
+/*****************************************************************************
+ *                          genSetScopeInfo
+ *
+ * This function should be called only after the sizes of the emitter blocks
+ * have been finalized.
+ */
+
+void CodeGen::genSetScopeInfo()
+{
+    if (!compiler->opts.compScopeInfo)
+    {
+        return;
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In genSetScopeInfo()\n");
+    }
+#endif
+
+    if (compiler->info.compVarScopesCount == 0)
+    {
+        compiler->eeSetLVcount(0);
+        compiler->eeSetLVdone();
+        return;
+    }
+
+    noway_assert(compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0));
+    noway_assert(psiOpenScopeList.scNext == nullptr);
+
+    unsigned i;
+    unsigned scopeCnt = siScopeCnt + psiScopeCnt;
+
+    compiler->eeSetLVcount(scopeCnt);
+
+#ifdef DEBUG
+    genTrnslLocalVarCount = scopeCnt;
+    if (scopeCnt)
+    {
+        genTrnslLocalVarInfo = new (compiler, CMK_DebugOnly) TrnslLocalVarInfo[scopeCnt];
+    }
+#endif
+
+    // Record the scopes found for the parameters over the prolog.
+    // The prolog needs to be treated differently as a variable may not
+    // have the same info in the prolog block as is given by compiler->lvaTable.
+    // eg. A register parameter is actually on the stack, before it is loaded to reg.
+
+    CodeGen::psiScope* scopeP;
+
+    for (i = 0, scopeP = psiScopeList.scNext; i < psiScopeCnt; i++, scopeP = scopeP->scNext)
+    {
+        noway_assert(scopeP != nullptr);
+        noway_assert(scopeP->scStartLoc.Valid());
+        noway_assert(scopeP->scEndLoc.Valid());
+
+        UNATIVE_OFFSET startOffs = scopeP->scStartLoc.CodeOffset(getEmitter());
+        UNATIVE_OFFSET endOffs   = scopeP->scEndLoc.CodeOffset(getEmitter());
+
+        unsigned varNum = scopeP->scSlotNum;
+        noway_assert(startOffs <= endOffs);
+
+        // The range may be 0 if the prolog is empty. For such a case,
+        // report the liveness of arguments to span at least the first
+        // instruction in the method. This will be incorrect (except on
+        // entry to the method) if the very first instruction of the method
+        // is part of a loop. However, this should happen
+        // very rarely, and the incorrectness is worth being able to look
+        // at the argument on entry to the method.
+        if (startOffs == endOffs)
+        {
+            noway_assert(startOffs == 0);
+            endOffs++;
+        }
+
+        Compiler::siVarLoc varLoc;
+
+        if (scopeP->scRegister)
+        {
+            varLoc.vlType       = Compiler::VLT_REG;
+            varLoc.vlReg.vlrReg = (regNumber)scopeP->u1.scRegNum;
+        }
+        else
+        {
+            varLoc.vlType           = Compiler::VLT_STK;
+            varLoc.vlStk.vlsBaseReg = (regNumber)scopeP->u2.scBaseReg;
+            varLoc.vlStk.vlsOffset  = scopeP->u2.scOffset;
+        }
+
+        genSetScopeInfo(i, startOffs, endOffs - startOffs, varNum, scopeP->scLVnum, true, varLoc);
+    }
+
+    // Record the scopes for the rest of the method.
+    // Check that the LocalVarInfo scopes look OK
+    noway_assert(siOpenScopeList.scNext == nullptr);
+
+    CodeGen::siScope* scopeL;
+
+    for (i = 0, scopeL = siScopeList.scNext; i < siScopeCnt; i++, scopeL = scopeL->scNext)
+    {
+        noway_assert(scopeL != nullptr);
+        noway_assert(scopeL->scStartLoc.Valid());
+        noway_assert(scopeL->scEndLoc.Valid());
+
+        // Find the start and end IP
+
+        UNATIVE_OFFSET startOffs = scopeL->scStartLoc.CodeOffset(getEmitter());
+        UNATIVE_OFFSET endOffs   = scopeL->scEndLoc.CodeOffset(getEmitter());
+
+        noway_assert(scopeL->scStartLoc != scopeL->scEndLoc);
+
+        // For stack vars, find the base register, and offset
+
+        regNumber baseReg;
+        signed    offset = compiler->lvaTable[scopeL->scVarNum].lvStkOffs;
+
+        if (!compiler->lvaTable[scopeL->scVarNum].lvFramePointerBased)
+        {
+            baseReg = REG_SPBASE;
+            offset += scopeL->scStackLevel;
+        }
+        else
+        {
+            baseReg = REG_FPBASE;
+        }
+
+        // Now fill in the varLoc
+
+        Compiler::siVarLoc varLoc;
+
+        // TODO-Review: This only works for always-enregistered variables. With LSRA, a variable might be in a register
+        // for part of its lifetime, or in different registers for different parts of its lifetime.
+        // This should only matter for non-debug code, where we do variable enregistration.
+        // We should store the ranges of variable enregistration in the scope table.
+        if (compiler->lvaTable[scopeL->scVarNum].lvIsInReg())
+        {
+            var_types type = genActualType(compiler->lvaTable[scopeL->scVarNum].TypeGet());
+            switch (type)
+            {
+                case TYP_INT:
+                case TYP_REF:
+                case TYP_BYREF:
+#ifdef _TARGET_64BIT_
+                case TYP_LONG:
+#endif // _TARGET_64BIT_
+
+                    varLoc.vlType       = Compiler::VLT_REG;
+                    varLoc.vlReg.vlrReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
+                    break;
+
+#ifndef _TARGET_64BIT_
+                case TYP_LONG:
+#if !CPU_HAS_FP_SUPPORT
+                case TYP_DOUBLE:
+#endif
+
+                    if (compiler->lvaTable[scopeL->scVarNum].lvOtherReg != REG_STK)
+                    {
+                        varLoc.vlType            = Compiler::VLT_REG_REG;
+                        varLoc.vlRegReg.vlrrReg1 = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
+                        varLoc.vlRegReg.vlrrReg2 = compiler->lvaTable[scopeL->scVarNum].lvOtherReg;
+                    }
+                    else
+                    {
+                        varLoc.vlType                        = Compiler::VLT_REG_STK;
+                        varLoc.vlRegStk.vlrsReg              = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
+                        varLoc.vlRegStk.vlrsStk.vlrssBaseReg = baseReg;
+                        if (!isFramePointerUsed() && varLoc.vlRegStk.vlrsStk.vlrssBaseReg == REG_SPBASE)
+                        {
+                            varLoc.vlRegStk.vlrsStk.vlrssBaseReg = (regNumber)ICorDebugInfo::REGNUM_AMBIENT_SP;
+                        }
+                        varLoc.vlRegStk.vlrsStk.vlrssOffset = offset + sizeof(int);
+                    }
+                    break;
+#endif // !_TARGET_64BIT_
+
+#ifdef _TARGET_64BIT_
+
+                case TYP_FLOAT:
+                case TYP_DOUBLE:
+                    // TODO-AMD64-Bug: ndp\clr\src\inc\corinfo.h has a definition of RegNum that only goes up to R15,
+                    // so no XMM registers can get debug information.
+                    varLoc.vlType       = Compiler::VLT_REG_FP;
+                    varLoc.vlReg.vlrReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
+                    break;
+
+#else // !_TARGET_64BIT_
+
+#if CPU_HAS_FP_SUPPORT
+                case TYP_FLOAT:
+                case TYP_DOUBLE:
+                    if (isFloatRegType(type))
+                    {
+                        varLoc.vlType         = Compiler::VLT_FPSTK;
+                        varLoc.vlFPstk.vlfReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
+                    }
+                    break;
+#endif // CPU_HAS_FP_SUPPORT
+
+#endif // !_TARGET_64BIT_
+
+#ifdef FEATURE_SIMD
+                case TYP_SIMD8:
+                case TYP_SIMD12:
+                case TYP_SIMD16:
+                case TYP_SIMD32:
+                    varLoc.vlType = Compiler::VLT_REG_FP;
+
+                    // TODO-AMD64-Bug: ndp\clr\src\inc\corinfo.h has a definition of RegNum that only goes up to R15,
+                    // so no XMM registers can get debug information.
+                    //
+                    // Note: Need to initialize vlrReg field, otherwise during jit dump hitting an assert
+                    // in eeDispVar() --> getRegName() that regNumber is valid.
+                    varLoc.vlReg.vlrReg = compiler->lvaTable[scopeL->scVarNum].lvRegNum;
+                    break;
+#endif // FEATURE_SIMD
+
+                default:
+                    noway_assert(!"Invalid type");
+            }
+        }
+        else
+        {
+            assert(offset != BAD_STK_OFFS);
+            LclVarDsc* varDsc = compiler->lvaTable + scopeL->scVarNum;
+            switch (genActualType(varDsc->TypeGet()))
+            {
+                case TYP_INT:
+                case TYP_REF:
+                case TYP_BYREF:
+                case TYP_FLOAT:
+                case TYP_STRUCT:
+                case TYP_BLK: // Needed because of the TYP_BLK stress mode
+#ifdef FEATURE_SIMD
+                case TYP_SIMD8:
+                case TYP_SIMD12:
+                case TYP_SIMD16:
+                case TYP_SIMD32:
+#endif
+#ifdef _TARGET_64BIT_
+                case TYP_LONG:
+                case TYP_DOUBLE:
+#endif // _TARGET_64BIT_
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+                    // In the AMD64 ABI we are supposed to pass a struct by reference when its
+                    // size is not 1, 2, 4 or 8 bytes in size. During fgMorph, the compiler modifies
+                    // the IR to comply with the ABI and therefore changes the type of the lclVar
+                    // that holds the struct from TYP_STRUCT to TYP_BYREF but it gives us a hint that
+                    // this is still a struct by setting the lvIsTemp flag.
+                    // The same is true for ARM64 and structs > 16 bytes.
+                    // (See Compiler::fgMarkImplicitByRefArgs in Morph.cpp for further detail)
+                    // Now, the VM expects a special enum for these type of local vars: VLT_STK_BYREF
+                    // to accomodate for this situation.
+                    if (varDsc->lvType == TYP_BYREF && varDsc->lvIsTemp)
+                    {
+                        assert(varDsc->lvIsParam);
+                        varLoc.vlType = Compiler::VLT_STK_BYREF;
+                    }
+                    else
+#endif // defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+                    {
+                        varLoc.vlType = Compiler::VLT_STK;
+                    }
+                    varLoc.vlStk.vlsBaseReg = baseReg;
+                    varLoc.vlStk.vlsOffset  = offset;
+                    if (!isFramePointerUsed() && varLoc.vlStk.vlsBaseReg == REG_SPBASE)
+                    {
+                        varLoc.vlStk.vlsBaseReg = (regNumber)ICorDebugInfo::REGNUM_AMBIENT_SP;
+                    }
+                    break;
+
+#ifndef _TARGET_64BIT_
+                case TYP_LONG:
+                case TYP_DOUBLE:
+                    varLoc.vlType             = Compiler::VLT_STK2;
+                    varLoc.vlStk2.vls2BaseReg = baseReg;
+                    varLoc.vlStk2.vls2Offset  = offset;
+                    if (!isFramePointerUsed() && varLoc.vlStk2.vls2BaseReg == REG_SPBASE)
+                    {
+                        varLoc.vlStk2.vls2BaseReg = (regNumber)ICorDebugInfo::REGNUM_AMBIENT_SP;
+                    }
+                    break;
+#endif // !_TARGET_64BIT_
+
+                default:
+                    noway_assert(!"Invalid type");
+            }
+        }
+
+        genSetScopeInfo(psiScopeCnt + i, startOffs, endOffs - startOffs, scopeL->scVarNum, scopeL->scLVnum,
+                        scopeL->scAvailable, varLoc);
+    }
+
+    compiler->eeSetLVdone();
+}
+
+/*****************************************************************************/
+#ifdef LATE_DISASM
+#if defined(DEBUG)
+/*****************************************************************************
+ *                          CompilerRegName
+ *
+ * Can be called only after lviSetLocalVarInfo() has been called
+ */
+
+/* virtual */
+const char* CodeGen::siRegVarName(size_t offs, size_t size, unsigned reg)
+{
+    if (!compiler->opts.compScopeInfo)
+        return nullptr;
+
+    if (compiler->info.compVarScopesCount == 0)
+        return nullptr;
+
+    noway_assert(genTrnslLocalVarCount == 0 || genTrnslLocalVarInfo);
+
+    for (unsigned i = 0; i < genTrnslLocalVarCount; i++)
+    {
+        if ((genTrnslLocalVarInfo[i].tlviVarLoc.vlIsInReg((regNumber)reg)) &&
+            (genTrnslLocalVarInfo[i].tlviAvailable == true) && (genTrnslLocalVarInfo[i].tlviStartPC <= offs + size) &&
+            (genTrnslLocalVarInfo[i].tlviStartPC + genTrnslLocalVarInfo[i].tlviLength > offs))
+        {
+            return genTrnslLocalVarInfo[i].tlviName ? compiler->VarNameToStr(genTrnslLocalVarInfo[i].tlviName) : NULL;
+        }
+    }
+
+    return NULL;
+}
+
+/*****************************************************************************
+ *                          CompilerStkName
+ *
+ * Can be called only after lviSetLocalVarInfo() has been called
+ */
+
+/* virtual */
+const char* CodeGen::siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs)
+{
+    if (!compiler->opts.compScopeInfo)
+        return nullptr;
+
+    if (compiler->info.compVarScopesCount == 0)
+        return nullptr;
+
+    noway_assert(genTrnslLocalVarCount == 0 || genTrnslLocalVarInfo);
+
+    for (unsigned i = 0; i < genTrnslLocalVarCount; i++)
+    {
+        if ((genTrnslLocalVarInfo[i].tlviVarLoc.vlIsOnStk((regNumber)reg, stkOffs)) &&
+            (genTrnslLocalVarInfo[i].tlviAvailable == true) && (genTrnslLocalVarInfo[i].tlviStartPC <= offs + size) &&
+            (genTrnslLocalVarInfo[i].tlviStartPC + genTrnslLocalVarInfo[i].tlviLength > offs))
+        {
+            return genTrnslLocalVarInfo[i].tlviName ? compiler->VarNameToStr(genTrnslLocalVarInfo[i].tlviName) : NULL;
+        }
+    }
+
+    return NULL;
+}
+
+/*****************************************************************************/
+#endif // defined(DEBUG)
+#endif // LATE_DISASM
+
+#ifdef DEBUG
+
+/*****************************************************************************
+ *  Display a IPmappingDsc. Pass -1 as mappingNum to not display a mapping number.
+ */
+
+void CodeGen::genIPmappingDisp(unsigned mappingNum, Compiler::IPmappingDsc* ipMapping)
+{
+    if (mappingNum != unsigned(-1))
+    {
+        printf("%d: ", mappingNum);
+    }
+
+    IL_OFFSETX offsx = ipMapping->ipmdILoffsx;
+
+    if (offsx == BAD_IL_OFFSET)
+    {
+        printf("???");
+    }
+    else
+    {
+        Compiler::eeDispILOffs(jitGetILoffsAny(offsx));
+
+        if (jitIsStackEmpty(offsx))
+        {
+            printf(" STACK_EMPTY");
+        }
+
+        if (jitIsCallInstruction(offsx))
+        {
+            printf(" CALL_INSTRUCTION");
+        }
+    }
+
+    printf(" ");
+    ipMapping->ipmdNativeLoc.Print();
+    // We can only call this after code generation. Is there any way to tell when it's legal to call?
+    // printf(" [%x]", ipMapping->ipmdNativeLoc.CodeOffset(getEmitter()));
+
+    if (ipMapping->ipmdIsLabel)
+    {
+        printf(" label");
+    }
+
+    printf("\n");
+}
+
+void CodeGen::genIPmappingListDisp()
+{
+    unsigned                mappingNum = 0;
+    Compiler::IPmappingDsc* ipMapping;
+
+    for (ipMapping = compiler->genIPmappingList; ipMapping != nullptr; ipMapping = ipMapping->ipmdNext)
+    {
+        genIPmappingDisp(mappingNum, ipMapping);
+        ++mappingNum;
+    }
+}
+
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ *  Append an IPmappingDsc struct to the list that we're maintaining
+ *  for the debugger.
+ *  Record the instr offset as being at the current code gen position.
+ */
+
+void CodeGen::genIPmappingAdd(IL_OFFSETX offsx, bool isLabel)
+{
+    if (!compiler->opts.compDbgInfo)
+    {
+        return;
+    }
+
+    assert(offsx != BAD_IL_OFFSET);
+
+    switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
+    {
+        case ICorDebugInfo::PROLOG:
+        case ICorDebugInfo::EPILOG:
+            break;
+
+        default:
+
+            if (offsx != ICorDebugInfo::NO_MAPPING)
+            {
+                noway_assert(jitGetILoffs(offsx) <= compiler->info.compILCodeSize);
+            }
+
+            // Ignore this one if it's the same IL offset as the last one we saw.
+            // Note that we'll let through two identical IL offsets if the flag bits
+            // differ, or two identical "special" mappings (e.g., PROLOG).
+            if ((compiler->genIPmappingLast != nullptr) && (offsx == compiler->genIPmappingLast->ipmdILoffsx))
+            {
+                JITDUMP("genIPmappingAdd: ignoring duplicate IL offset 0x%x\n", offsx);
+                return;
+            }
+            break;
+    }
+
+    /* Create a mapping entry and append it to the list */
+
+    Compiler::IPmappingDsc* addMapping =
+        (Compiler::IPmappingDsc*)compiler->compGetMem(sizeof(*addMapping), CMK_DebugInfo);
+
+    addMapping->ipmdNativeLoc.CaptureLocation(getEmitter());
+    addMapping->ipmdILoffsx = offsx;
+    addMapping->ipmdIsLabel = isLabel;
+    addMapping->ipmdNext    = nullptr;
+
+    if (compiler->genIPmappingList != nullptr)
+    {
+        assert(compiler->genIPmappingLast != nullptr);
+        assert(compiler->genIPmappingLast->ipmdNext == nullptr);
+        compiler->genIPmappingLast->ipmdNext = addMapping;
+    }
+    else
+    {
+        assert(compiler->genIPmappingLast == nullptr);
+        compiler->genIPmappingList = addMapping;
+    }
+
+    compiler->genIPmappingLast = addMapping;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("Added IP mapping: ");
+        genIPmappingDisp(unsigned(-1), addMapping);
+    }
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ *  Prepend an IPmappingDsc struct to the list that we're maintaining
+ *  for the debugger.
+ *  Record the instr offset as being at the current code gen position.
+ */
+void CodeGen::genIPmappingAddToFront(IL_OFFSETX offsx)
+{
+    if (!compiler->opts.compDbgInfo)
+    {
+        return;
+    }
+
+    assert(offsx != BAD_IL_OFFSET);
+    assert(compiler->compGeneratingProlog); // We only ever do this during prolog generation.
+
+    switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
+    {
+        case ICorDebugInfo::NO_MAPPING:
+        case ICorDebugInfo::PROLOG:
+        case ICorDebugInfo::EPILOG:
+            break;
+
+        default:
+            noway_assert(jitGetILoffs(offsx) <= compiler->info.compILCodeSize);
+            break;
+    }
+
+    /* Create a mapping entry and prepend it to the list */
+
+    Compiler::IPmappingDsc* addMapping =
+        (Compiler::IPmappingDsc*)compiler->compGetMem(sizeof(*addMapping), CMK_DebugInfo);
+
+    addMapping->ipmdNativeLoc.CaptureLocation(getEmitter());
+    addMapping->ipmdILoffsx = offsx;
+    addMapping->ipmdIsLabel = true;
+    addMapping->ipmdNext    = nullptr;
+
+    addMapping->ipmdNext       = compiler->genIPmappingList;
+    compiler->genIPmappingList = addMapping;
+
+    if (compiler->genIPmappingLast == nullptr)
+    {
+        compiler->genIPmappingLast = addMapping;
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("Added IP mapping to front: ");
+        genIPmappingDisp(unsigned(-1), addMapping);
+    }
+#endif // DEBUG
+}
+
+/*****************************************************************************/
+
+C_ASSERT(IL_OFFSETX(ICorDebugInfo::NO_MAPPING) != IL_OFFSETX(BAD_IL_OFFSET));
+C_ASSERT(IL_OFFSETX(ICorDebugInfo::PROLOG) != IL_OFFSETX(BAD_IL_OFFSET));
+C_ASSERT(IL_OFFSETX(ICorDebugInfo::EPILOG) != IL_OFFSETX(BAD_IL_OFFSET));
+
+C_ASSERT(IL_OFFSETX(BAD_IL_OFFSET) > MAX_IL_OFFSET);
+C_ASSERT(IL_OFFSETX(ICorDebugInfo::NO_MAPPING) > MAX_IL_OFFSET);
+C_ASSERT(IL_OFFSETX(ICorDebugInfo::PROLOG) > MAX_IL_OFFSET);
+C_ASSERT(IL_OFFSETX(ICorDebugInfo::EPILOG) > MAX_IL_OFFSET);
+
+//------------------------------------------------------------------------
+// jitGetILoffs: Returns the IL offset portion of the IL_OFFSETX type.
+//      Asserts if any ICorDebugInfo distinguished value (like ICorDebugInfo::NO_MAPPING)
+//      is seen; these are unexpected here. Also asserts if passed BAD_IL_OFFSET.
+//
+// Arguments:
+//    offsx - the IL_OFFSETX value with the IL offset to extract.
+//
+// Return Value:
+//    The IL offset.
+
+IL_OFFSET jitGetILoffs(IL_OFFSETX offsx)
+{
+    assert(offsx != BAD_IL_OFFSET);
+
+    switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
+    {
+        case ICorDebugInfo::NO_MAPPING:
+        case ICorDebugInfo::PROLOG:
+        case ICorDebugInfo::EPILOG:
+            unreached();
+
+        default:
+            return IL_OFFSET(offsx & ~IL_OFFSETX_BITS);
+    }
+}
+
+//------------------------------------------------------------------------
+// jitGetILoffsAny: Similar to jitGetILoffs(), but passes through ICorDebugInfo
+//      distinguished values. Asserts if passed BAD_IL_OFFSET.
+//
+// Arguments:
+//    offsx - the IL_OFFSETX value with the IL offset to extract.
+//
+// Return Value:
+//    The IL offset.
+
+IL_OFFSET jitGetILoffsAny(IL_OFFSETX offsx)
+{
+    assert(offsx != BAD_IL_OFFSET);
+
+    switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
+    {
+        case ICorDebugInfo::NO_MAPPING:
+        case ICorDebugInfo::PROLOG:
+        case ICorDebugInfo::EPILOG:
+            return IL_OFFSET(offsx);
+
+        default:
+            return IL_OFFSET(offsx & ~IL_OFFSETX_BITS);
+    }
+}
+
+//------------------------------------------------------------------------
+// jitIsStackEmpty: Does the IL offset have the stack empty bit set?
+//      Asserts if passed BAD_IL_OFFSET.
+//
+// Arguments:
+//    offsx - the IL_OFFSETX value to check
+//
+// Return Value:
+//    'true' if the stack empty bit is set; 'false' otherwise.
+
+bool jitIsStackEmpty(IL_OFFSETX offsx)
+{
+    assert(offsx != BAD_IL_OFFSET);
+
+    switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
+    {
+        case ICorDebugInfo::NO_MAPPING:
+        case ICorDebugInfo::PROLOG:
+        case ICorDebugInfo::EPILOG:
+            return true;
+
+        default:
+            return (offsx & IL_OFFSETX_STKBIT) == 0;
+    }
+}
+
+//------------------------------------------------------------------------
+// jitIsCallInstruction: Does the IL offset have the call instruction bit set?
+//      Asserts if passed BAD_IL_OFFSET.
+//
+// Arguments:
+//    offsx - the IL_OFFSETX value to check
+//
+// Return Value:
+//    'true' if the call instruction bit is set; 'false' otherwise.
+
+bool jitIsCallInstruction(IL_OFFSETX offsx)
+{
+    assert(offsx != BAD_IL_OFFSET);
+
+    switch ((int)offsx) // Need the cast since offs is unsigned and the case statements are comparing to signed.
+    {
+        case ICorDebugInfo::NO_MAPPING:
+        case ICorDebugInfo::PROLOG:
+        case ICorDebugInfo::EPILOG:
+            return false;
+
+        default:
+            return (offsx & IL_OFFSETX_CALLINSTRUCTIONBIT) != 0;
+    }
+}
+
+/*****************************************************************************/
+
+void CodeGen::genEnsureCodeEmitted(IL_OFFSETX offsx)
+{
+    if (!compiler->opts.compDbgCode)
+    {
+        return;
+    }
+
+    if (offsx == BAD_IL_OFFSET)
+    {
+        return;
+    }
+
+    /* If other IL were offsets reported, skip */
+
+    if (compiler->genIPmappingLast == nullptr)
+    {
+        return;
+    }
+
+    if (compiler->genIPmappingLast->ipmdILoffsx != offsx)
+    {
+        return;
+    }
+
+    /* offsx was the last reported offset. Make sure that we generated native code */
+
+    if (compiler->genIPmappingLast->ipmdNativeLoc.IsCurrentLocation(getEmitter()))
+    {
+        instGen(INS_nop);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Shut down the IP-mapping logic, report the info to the EE.
+ */
+
+void CodeGen::genIPmappingGen()
+{
+    if (!compiler->opts.compDbgInfo)
+    {
+        return;
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In genIPmappingGen()\n");
+    }
+#endif
+
+    if (compiler->genIPmappingList == nullptr)
+    {
+        compiler->eeSetLIcount(0);
+        compiler->eeSetLIdone();
+        return;
+    }
+
+    Compiler::IPmappingDsc* tmpMapping;
+    Compiler::IPmappingDsc* prevMapping;
+    unsigned                mappingCnt;
+    UNATIVE_OFFSET          lastNativeOfs;
+
+    /* First count the number of distinct mapping records */
+
+    mappingCnt    = 0;
+    lastNativeOfs = UNATIVE_OFFSET(~0);
+
+    for (prevMapping = nullptr, tmpMapping = compiler->genIPmappingList; tmpMapping != nullptr;
+         tmpMapping = tmpMapping->ipmdNext)
+    {
+        IL_OFFSETX srcIP = tmpMapping->ipmdILoffsx;
+
+        // Managed RetVal - since new sequence points are emitted to identify IL calls,
+        // make sure that those are not filtered and do not interfere with filtering of
+        // other sequence points.
+        if (jitIsCallInstruction(srcIP))
+        {
+            mappingCnt++;
+            continue;
+        }
+
+        UNATIVE_OFFSET nextNativeOfs = tmpMapping->ipmdNativeLoc.CodeOffset(getEmitter());
+
+        if (nextNativeOfs != lastNativeOfs)
+        {
+            mappingCnt++;
+            lastNativeOfs = nextNativeOfs;
+            prevMapping   = tmpMapping;
+            continue;
+        }
+
+        /* If there are mappings with the same native offset, then:
+           o If one of them is NO_MAPPING, ignore it
+           o If one of them is a label, report that and ignore the other one
+           o Else report the higher IL offset
+         */
+
+        PREFIX_ASSUME(prevMapping != nullptr); // We would exit before if this was true
+        if (prevMapping->ipmdILoffsx == (IL_OFFSETX)ICorDebugInfo::NO_MAPPING)
+        {
+            // If the previous entry was NO_MAPPING, ignore it
+            prevMapping->ipmdNativeLoc.Init();
+            prevMapping = tmpMapping;
+        }
+        else if (srcIP == (IL_OFFSETX)ICorDebugInfo::NO_MAPPING)
+        {
+            // If the current entry is NO_MAPPING, ignore it
+            // Leave prevMapping unchanged as tmpMapping is no longer valid
+            tmpMapping->ipmdNativeLoc.Init();
+        }
+        else if (srcIP == (IL_OFFSETX)ICorDebugInfo::EPILOG || srcIP == 0)
+        {
+            // counting for special cases: see below
+            mappingCnt++;
+            prevMapping = tmpMapping;
+        }
+        else
+        {
+            noway_assert(prevMapping != nullptr);
+            noway_assert(!prevMapping->ipmdNativeLoc.Valid() ||
+                         lastNativeOfs == prevMapping->ipmdNativeLoc.CodeOffset(getEmitter()));
+
+            /* The previous block had the same native offset. We have to
+               discard one of the mappings. Simply reinitialize ipmdNativeLoc
+               and prevMapping will be ignored later. */
+
+            if (prevMapping->ipmdIsLabel)
+            {
+                // Leave prevMapping unchanged as tmpMapping is no longer valid
+                tmpMapping->ipmdNativeLoc.Init();
+            }
+            else
+            {
+                prevMapping->ipmdNativeLoc.Init();
+                prevMapping = tmpMapping;
+            }
+        }
+    }
+
+    /* Tell them how many mapping records we've got */
+
+    compiler->eeSetLIcount(mappingCnt);
+
+    /* Now tell them about the mappings */
+
+    mappingCnt    = 0;
+    lastNativeOfs = UNATIVE_OFFSET(~0);
+
+    for (tmpMapping = compiler->genIPmappingList; tmpMapping != nullptr; tmpMapping = tmpMapping->ipmdNext)
+    {
+        // Do we have to skip this record ?
+        if (!tmpMapping->ipmdNativeLoc.Valid())
+        {
+            continue;
+        }
+
+        UNATIVE_OFFSET nextNativeOfs = tmpMapping->ipmdNativeLoc.CodeOffset(getEmitter());
+        IL_OFFSETX     srcIP         = tmpMapping->ipmdILoffsx;
+
+        if (jitIsCallInstruction(srcIP))
+        {
+            compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffs(srcIP), jitIsStackEmpty(srcIP), true);
+        }
+        else if (nextNativeOfs != lastNativeOfs)
+        {
+            compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffsAny(srcIP), jitIsStackEmpty(srcIP), false);
+            lastNativeOfs = nextNativeOfs;
+        }
+        else if (srcIP == (IL_OFFSETX)ICorDebugInfo::EPILOG || srcIP == 0)
+        {
+            // For the special case of an IL instruction with no body
+            // followed by the epilog (say ret void immediately preceding
+            // the method end), we put two entries in, so that we'll stop
+            // at the (empty) ret statement if the user tries to put a
+            // breakpoint there, and then have the option of seeing the
+            // epilog or not based on SetUnmappedStopMask for the stepper.
+            compiler->eeSetLIinfo(mappingCnt++, nextNativeOfs, jitGetILoffsAny(srcIP), jitIsStackEmpty(srcIP), false);
+        }
+    }
+
+#if 0
+    // TODO-Review:
+    //This check is disabled.  It is always true that any time this check asserts, the debugger would have a
+    //problem with IL source level debugging.  However, for a C# file, it only matters if things are on
+    //different source lines.  As a result, we have all sorts of latent problems with how we emit debug
+    //info, but very few actual ones.  Whenever someone wants to tackle that problem in general, turn this
+    //assert back on.
+    if (compiler->opts.compDbgCode)
+    {
+        //Assert that the first instruction of every basic block with more than one incoming edge has a
+        //different sequence point from each incoming block.
+        //
+        //It turns out that the only thing we really have to assert is that the first statement in each basic
+        //block has an IL offset and appears in eeBoundaries.
+        for (BasicBlock * block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
+        {
+            if ((block->bbRefs > 1) && (block->bbTreeList != nullptr))
+            {
+                noway_assert(block->bbTreeList->gtOper == GT_STMT);
+                bool found = false;
+                if (block->bbTreeList->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET)
+                {
+                    IL_OFFSET ilOffs = jitGetILoffs(block->bbTreeList->gtStmt.gtStmtILoffsx);
+                    for (unsigned i = 0; i < eeBoundariesCount; ++i)
+                    {
+                        if (eeBoundaries[i].ilOffset == ilOffs)
+                        {
+                            found = true;
+                            break;
+                        }
+                    }
+                }
+                noway_assert(found && "A basic block that is a jump target did not start a new sequence point.");
+            }
+        }
+    }
+#endif // 0
+
+    compiler->eeSetLIdone();
+}
+
+#endif // DEBUGGING_SUPPORT
+
+/*============================================================================
+ *
+ *   These are empty stubs to help the late dis-assembler to compile
+ *   if DEBUGGING_SUPPORT is not enabled, or the late disassembler is being
+ *   built into a non-DEBUG build.
+ *
+ *============================================================================
+ */
+
+#if defined(LATE_DISASM)
+#if !defined(DEBUGGING_SUPPORT) || !defined(DEBUG)
+
+/* virtual */
+const char* CodeGen::siRegVarName(size_t offs, size_t size, unsigned reg)
+{
+    return NULL;
+}
+
+/* virtual */
+const char* CodeGen::siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs)
+{
+    return NULL;
+}
+
+/*****************************************************************************/
+#endif // !defined(DEBUGGING_SUPPORT) || !defined(DEBUG)
+#endif // defined(LATE_DISASM)
+/*****************************************************************************/
diff --git a/src/jit/codegeninterface.h b/src/jit/codegeninterface.h
new file mode 100644
index 0000000000..e9abbe6b3c
--- /dev/null
+++ b/src/jit/codegeninterface.h
@@ -0,0 +1,440 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// This file declares the types that constitute the interface between the
+// code generator (CodeGen class) and the rest of the JIT.
+//
+// RegState
+//
+// CodeGenInterface includes only the public methods that are called by
+// the Compiler.
+//
+// CodeGenContext contains the shared context between the code generator
+// and other phases of the JIT, especially the register allocator and
+// GC encoder.  It is distinct from CodeGenInterface so that it can be
+// included in the Compiler object, and avoid an extra indirection when
+// accessed from members of Compiler.
+//
+
+#ifndef _CODEGEN_INTERFACE_H_
+#define _CODEGEN_INTERFACE_H_
+
+#include "regset.h"
+#include "jitgcinfo.h"
+
+// Forward reference types
+
+class CodeGenInterface;
+class emitter;
+
+// Small helper types
+
+//-------------------- Register selection ---------------------------------
+
+struct RegState
+{
+    regMaskTP rsCalleeRegArgMaskLiveIn; // mask of register arguments (live on entry to method)
+#ifdef LEGACY_BACKEND
+    unsigned rsCurRegArgNum; // current argument number (for caller)
+#endif
+    unsigned rsCalleeRegArgCount; // total number of incoming register arguments of this kind (int or float)
+    bool     rsIsFloat;           // true for float argument registers, false for integer argument registers
+};
+
+//-------------------- CodeGenInterface ---------------------------------
+// interface to hide the full CodeGen implementation from rest of Compiler
+
+CodeGenInterface* getCodeGenerator(Compiler* comp);
+
+class CodeGenInterface
+{
+    friend class emitter;
+
+public:
+    CodeGenInterface(Compiler* theCompiler);
+    virtual void genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode) = 0;
+
+#ifndef LEGACY_BACKEND
+    // genSpillVar is called by compUpdateLifeVar in the RyuJIT backend case.
+    // TODO-Cleanup: We should handle the spill directly in CodeGen, rather than
+    // calling it from compUpdateLifeVar.  Then this can be non-virtual.
+
+    virtual void genSpillVar(GenTreePtr tree) = 0;
+#endif // !LEGACY_BACKEND
+
+    //-------------------------------------------------------------------------
+    //  The following property indicates whether to align loops.
+    //  (Used to avoid effects of loop alignment when diagnosing perf issues.)
+    __declspec(property(get = doAlignLoops, put = setAlignLoops)) bool genAlignLoops;
+    bool doAlignLoops()
+    {
+        return m_genAlignLoops;
+    }
+    void setAlignLoops(bool value)
+    {
+        m_genAlignLoops = value;
+    }
+
+    // TODO-Cleanup: Abstract out the part of this that finds the addressing mode, and
+    // move it to Lower
+    virtual bool genCreateAddrMode(GenTreePtr  addr,
+                                   int         mode,
+                                   bool        fold,
+                                   regMaskTP   regMask,
+                                   bool*       revPtr,
+                                   GenTreePtr* rv1Ptr,
+                                   GenTreePtr* rv2Ptr,
+#if SCALED_ADDR_MODES
+                                   unsigned* mulPtr,
+#endif
+                                   unsigned* cnsPtr,
+                                   bool      nogen = false) = 0;
+
+    void genCalcFrameSize();
+
+    GCInfo gcInfo;
+
+    RegSet   regSet;
+    RegState intRegState;
+    RegState floatRegState;
+
+    // TODO-Cleanup: The only reason that regTracker needs to live in CodeGenInterface is that
+    // in RegSet::rsUnspillOneReg, it needs to mark the new register as "trash"
+    RegTracker regTracker;
+
+public:
+    void trashReg(regNumber reg)
+    {
+        regTracker.rsTrackRegTrash(reg);
+    }
+
+protected:
+    Compiler* compiler;
+    bool      m_genAlignLoops;
+
+private:
+    static const BYTE instInfo[INS_count];
+
+#define INST_FP 0x01 // is it a FP instruction?
+public:
+    static bool instIsFP(instruction ins);
+
+    //-------------------------------------------------------------------------
+    // Liveness-related fields & methods
+public:
+    void genUpdateRegLife(const LclVarDsc* varDsc, bool isBorn, bool isDying DEBUGARG(GenTreePtr tree));
+#ifndef LEGACY_BACKEND
+    void genUpdateVarReg(LclVarDsc* varDsc, GenTreePtr tree);
+#endif // !LEGACY_BACKEND
+
+protected:
+#ifdef DEBUG
+    VARSET_TP genTempOldLife;
+    bool      genTempLiveChg;
+#endif
+
+    VARSET_TP genLastLiveSet;  // A one element map (genLastLiveSet-> genLastLiveMask)
+    regMaskTP genLastLiveMask; // these two are used in genLiveMask
+
+    regMaskTP genGetRegMask(const LclVarDsc* varDsc);
+    regMaskTP genGetRegMask(GenTreePtr tree);
+
+    void genUpdateLife(GenTreePtr tree);
+    void genUpdateLife(VARSET_VALARG_TP newLife);
+
+#ifdef LEGACY_BACKEND
+    regMaskTP genLiveMask(GenTreePtr tree);
+    regMaskTP genLiveMask(VARSET_VALARG_TP liveSet);
+#endif
+
+    void genGetRegPairFromMask(regMaskTP regPairMask, regNumber* pLoReg, regNumber* pHiReg);
+
+    // The following property indicates whether the current method sets up
+    // an explicit stack frame or not.
+private:
+    PhasedVar<bool> m_cgFramePointerUsed;
+
+public:
+    bool isFramePointerUsed() const
+    {
+        return m_cgFramePointerUsed;
+    }
+    void setFramePointerUsed(bool value)
+    {
+        m_cgFramePointerUsed = value;
+    }
+    void resetFramePointerUsedWritePhase()
+    {
+        m_cgFramePointerUsed.ResetWritePhase();
+    }
+
+    // The following property indicates whether the current method requires
+    // an explicit frame. Does not prohibit double alignment of the stack.
+private:
+    PhasedVar<bool> m_cgFrameRequired;
+
+public:
+    bool isFrameRequired() const
+    {
+        return m_cgFrameRequired;
+    }
+    void setFrameRequired(bool value)
+    {
+        m_cgFrameRequired = value;
+    }
+
+public:
+    int genCallerSPtoFPdelta();
+    int genCallerSPtoInitialSPdelta();
+    int genSPtoFPdelta();
+    int genTotalFrameSize();
+
+    regNumber genGetThisArgReg(GenTreePtr call);
+
+#ifdef _TARGET_XARCH_
+#ifdef _TARGET_AMD64_
+    // There are no reloc hints on x86
+    unsigned short genAddrRelocTypeHint(size_t addr);
+#endif
+    bool genDataIndirAddrCanBeEncodedAsPCRelOffset(size_t addr);
+    bool genCodeIndirAddrCanBeEncodedAsPCRelOffset(size_t addr);
+    bool genCodeIndirAddrCanBeEncodedAsZeroRelOffset(size_t addr);
+    bool genCodeIndirAddrNeedsReloc(size_t addr);
+    bool genCodeAddrNeedsReloc(size_t addr);
+#endif
+
+    // If both isFramePointerRequired() and isFrameRequired() are false, the method is eligible
+    // for Frame-Pointer-Omission (FPO).
+
+    // The following property indicates whether the current method requires
+    // an explicit stack frame, and all arguments and locals to be
+    // accessible relative to the Frame Pointer. Prohibits double alignment
+    // of the stack.
+private:
+    PhasedVar<bool> m_cgFramePointerRequired;
+
+public:
+    bool isFramePointerRequired() const
+    {
+        return m_cgFramePointerRequired;
+    }
+    void setFramePointerRequired(bool value)
+    {
+        m_cgFramePointerRequired = value;
+    }
+    void setFramePointerRequiredEH(bool value);
+
+    void setFramePointerRequiredGCInfo(bool value)
+    {
+#ifdef JIT32_GCENCODER
+        m_cgFramePointerRequired = value;
+#endif
+    }
+
+#if DOUBLE_ALIGN
+    // The following property indicates whether we going to double-align the frame.
+    // Arguments are accessed relative to the Frame Pointer (EBP), and
+    // locals are accessed relative to the Stack Pointer (ESP).
+public:
+    bool doDoubleAlign() const
+    {
+        return m_cgDoubleAlign;
+    }
+    void setDoubleAlign(bool value)
+    {
+        m_cgDoubleAlign = value;
+    }
+    bool doubleAlignOrFramePointerUsed() const
+    {
+        return isFramePointerUsed() || doDoubleAlign();
+    }
+
+private:
+    bool m_cgDoubleAlign;
+#else  // !DOUBLE_ALIGN
+public:
+    bool doubleAlignOrFramePointerUsed() const
+    {
+        return isFramePointerUsed();
+    }
+#endif // !DOUBLE_ALIGN
+
+#ifdef DEBUG
+    // The following is used to make sure the value of 'genInterruptible' isn't
+    // changed after it's been used by any logic that depends on its value.
+public:
+    bool isGCTypeFixed()
+    {
+        return genInterruptibleUsed;
+    }
+
+protected:
+    bool genInterruptibleUsed;
+#endif
+
+public:
+#if FEATURE_STACK_FP_X87
+    FlatFPStateX87 compCurFPState;
+    unsigned       genFPregCnt; // count of current FP reg. vars (including dead but unpopped ones)
+
+    void SetRegVarFloat(regNumber reg, var_types type, LclVarDsc* varDsc);
+
+    void inst_FN(instruction ins, unsigned stk);
+
+    //  Keeps track of the current level of the FP coprocessor stack
+    //  (excluding FP reg. vars).
+    //  Do not use directly, instead use the processor agnostic accessor
+    //  methods below
+    //
+    unsigned genFPstkLevel;
+
+    void genResetFPstkLevel(unsigned newValue = 0);
+    unsigned        genGetFPstkLevel();
+    FlatFPStateX87* FlatFPAllocFPState(FlatFPStateX87* pInitFrom = 0);
+
+    void genIncrementFPstkLevel(unsigned inc = 1);
+    void genDecrementFPstkLevel(unsigned dec = 1);
+
+    static const char* regVarNameStackFP(regNumber reg);
+
+    // FlatFPStateX87_ functions are the actual verbs to do stuff
+    // like doing a transition, loading   register, etc. It's also
+    // responsible for emitting the x87 code to do so. We keep
+    // them in Compiler because we don't want to store a pointer to the
+    // emitter.
+    void FlatFPX87_MoveToTOS(FlatFPStateX87* pState, unsigned iVirtual, bool bEmitCode = true);
+    void FlatFPX87_SwapStack(FlatFPStateX87* pState, unsigned i, unsigned j, bool bEmitCode = true);
+
+#endif // FEATURE_STACK_FP_X87
+
+#ifndef LEGACY_BACKEND
+    regNumber genGetAssignedReg(GenTreePtr tree);
+#endif // !LEGACY_BACKEND
+
+#ifdef LEGACY_BACKEND
+    // Changes GT_LCL_VAR nodes to GT_REG_VAR nodes if possible.
+    bool genMarkLclVar(GenTreePtr tree);
+
+    void genBashLclVar(GenTreePtr tree, unsigned varNum, LclVarDsc* varDsc);
+#endif // LEGACY_BACKEND
+
+public:
+    unsigned InferStructOpSizeAlign(GenTreePtr op, unsigned* alignmentWB);
+    unsigned InferOpSizeAlign(GenTreePtr op, unsigned* alignmentWB);
+
+    void genMarkTreeInReg(GenTreePtr tree, regNumber reg);
+#if CPU_LONG_USES_REGPAIR
+    void genMarkTreeInRegPair(GenTreePtr tree, regPairNo regPair);
+#endif
+    // Methods to abstract target information
+
+    bool validImmForInstr(instruction ins, ssize_t val, insFlags flags = INS_FLAGS_DONT_CARE);
+    bool validDispForLdSt(ssize_t disp, var_types type);
+    bool validImmForAdd(ssize_t imm, insFlags flags);
+    bool validImmForAlu(ssize_t imm);
+    bool validImmForMov(ssize_t imm);
+    bool validImmForBL(ssize_t addr);
+
+    instruction ins_Load(var_types srcType, bool aligned = false);
+    instruction ins_Store(var_types dstType, bool aligned = false);
+    static instruction ins_FloatLoad(var_types type = TYP_DOUBLE);
+
+    // Methods for spilling - used by RegSet
+    void spillReg(var_types type, TempDsc* tmp, regNumber reg);
+    void reloadReg(var_types type, TempDsc* tmp, regNumber reg);
+    void reloadFloatReg(var_types type, TempDsc* tmp, regNumber reg);
+
+#ifdef LEGACY_BACKEND
+    void SpillFloat(regNumber reg, bool bIsCall = false);
+#endif // LEGACY_BACKEND
+
+    // The following method is used by xarch emitter for handling contained tree temps.
+    TempDsc* getSpillTempDsc(GenTree* tree);
+
+public:
+    emitter* getEmitter()
+    {
+        return m_cgEmitter;
+    }
+
+protected:
+    emitter* m_cgEmitter;
+
+#ifdef LATE_DISASM
+public:
+    DisAssembler& getDisAssembler()
+    {
+        return m_cgDisAsm;
+    }
+
+protected:
+    DisAssembler m_cgDisAsm;
+#endif // LATE_DISASM
+
+public:
+#ifdef DEBUG
+    void setVerbose(bool value)
+    {
+        verbose = value;
+    }
+    bool verbose;
+#ifdef LEGACY_BACKEND
+    // Stress mode
+    int       genStressFloat();
+    regMaskTP genStressLockedMaskFloat();
+#endif // LEGACY_BACKEND
+#endif // DEBUG
+
+    // The following is set to true if we've determined that the current method
+    // is to be fully interruptible.
+    //
+public:
+    __declspec(property(get = getInterruptible, put = setInterruptible)) bool genInterruptible;
+    bool getInterruptible()
+    {
+        return m_cgInterruptible;
+    }
+    void setInterruptible(bool value)
+    {
+        m_cgInterruptible = value;
+    }
+
+private:
+    bool m_cgInterruptible;
+
+    //  The following will be set to true if we've determined that we need to
+    //  generate a full-blown pointer register map for the current method.
+    //  Currently it is equal to (genInterruptible || !isFramePointerUsed())
+    //  (i.e. We generate the full-blown map for EBP-less methods and
+    //        for fully interruptible methods)
+    //
+public:
+    __declspec(property(get = doFullPtrRegMap, put = setFullPtrRegMap)) bool genFullPtrRegMap;
+    bool doFullPtrRegMap()
+    {
+        return m_cgFullPtrRegMap;
+    }
+    void setFullPtrRegMap(bool value)
+    {
+        m_cgFullPtrRegMap = value;
+    }
+
+private:
+    bool m_cgFullPtrRegMap;
+
+#ifdef DEBUGGING_SUPPORT
+public:
+    virtual void siUpdate() = 0;
+#endif // DEBUGGING_SUPPORT
+
+#ifdef LATE_DISASM
+public:
+    virtual const char* siRegVarName(size_t offs, size_t size, unsigned reg) = 0;
+
+    virtual const char* siStackVarName(size_t offs, size_t size, unsigned reg, unsigned stkOffs) = 0;
+#endif // LATE_DISASM
+};
+
+#endif // _CODEGEN_INTERFACE_H_
diff --git a/src/jit/codegenlegacy.cpp b/src/jit/codegenlegacy.cpp
new file mode 100644
index 0000000000..ea40eb2aff
--- /dev/null
+++ b/src/jit/codegenlegacy.cpp
@@ -0,0 +1,22057 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                           CodeGenerator                                   XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+#include "codegen.h"
+
+#ifdef LEGACY_BACKEND // This file is NOT used for the '!LEGACY_BACKEND' that uses the linear scan register allocator
+
+#ifdef _TARGET_AMD64_
+#error AMD64 must be !LEGACY_BACKEND
+#endif
+
+#ifdef _TARGET_ARM64_
+#error ARM64 must be !LEGACY_BACKEND
+#endif
+
+#include "gcinfo.h"
+#include "emit.h"
+
+#ifndef JIT32_GCENCODER
+#include "gcinfoencoder.h"
+#endif
+
+/*****************************************************************************
+ *
+ *  Determine what variables die between beforeSet and afterSet, and
+ *  update the liveness globals accordingly:
+ *  compiler->compCurLife, gcInfo.gcVarPtrSetCur, regSet.rsMaskVars, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur
+ */
+
+void CodeGen::genDyingVars(VARSET_VALARG_TP beforeSet, VARSET_VALARG_TP afterSet)
+{
+    unsigned   varNum;
+    LclVarDsc* varDsc;
+    regMaskTP  regBit;
+    VARSET_TP  VARSET_INIT_NOCOPY(deadSet, VarSetOps::Diff(compiler, beforeSet, afterSet));
+
+    if (VarSetOps::IsEmpty(compiler, deadSet))
+        return;
+
+    /* iterate through the dead variables */
+
+    VARSET_ITER_INIT(compiler, iter, deadSet, varIndex);
+    while (iter.NextElem(compiler, &varIndex))
+    {
+        varNum = compiler->lvaTrackedToVarNum[varIndex];
+        varDsc = compiler->lvaTable + varNum;
+
+        /* Remove this variable from the 'deadSet' bit set */
+
+        noway_assert(VarSetOps::IsMember(compiler, compiler->compCurLife, varIndex));
+
+        VarSetOps::RemoveElemD(compiler, compiler->compCurLife, varIndex);
+
+        noway_assert(!VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varIndex) ||
+                     VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex));
+
+        VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
+
+        /* We are done if the variable is not enregistered */
+
+        if (!varDsc->lvRegister)
+        {
+#ifdef DEBUG
+            if (compiler->verbose)
+            {
+                printf("\t\t\t\t\t\t\tV%02u,T%02u is a dyingVar\n", varNum, varDsc->lvVarIndex);
+            }
+#endif
+            continue;
+        }
+
+#if !FEATURE_FP_REGALLOC
+        // We don't do FP-enreg of vars whose liveness changes in GTF_COLON_COND
+        if (!varDsc->IsFloatRegType())
+#endif
+        {
+            /* Get hold of the appropriate register bit(s) */
+
+            if (varTypeIsFloating(varDsc->TypeGet()))
+            {
+                regBit = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
+            }
+            else
+            {
+                regBit = genRegMask(varDsc->lvRegNum);
+                if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
+                    regBit |= genRegMask(varDsc->lvOtherReg);
+            }
+
+#ifdef DEBUG
+            if (compiler->verbose)
+            {
+                printf("\t\t\t\t\t\t\tV%02u,T%02u in reg %s is a dyingVar\n", varNum, varDsc->lvVarIndex,
+                       compiler->compRegVarName(varDsc->lvRegNum));
+            }
+#endif
+            noway_assert((regSet.rsMaskVars & regBit) != 0);
+
+            regSet.RemoveMaskVars(regBit);
+
+            // Remove GC tracking if any for this register
+
+            if ((regBit & regSet.rsMaskUsed) == 0) // The register may be multi-used
+                gcInfo.gcMarkRegSetNpt(regBit);
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Change the given enregistered local variable node to a register variable node
+ */
+
+void CodeGenInterface::genBashLclVar(GenTreePtr tree, unsigned varNum, LclVarDsc* varDsc)
+{
+    noway_assert(tree->gtOper == GT_LCL_VAR);
+    noway_assert(varDsc->lvRegister);
+
+    if (isRegPairType(varDsc->lvType))
+    {
+        /* Check for the case of a variable that was narrowed to an int */
+
+        if (isRegPairType(tree->gtType))
+        {
+            genMarkTreeInRegPair(tree, gen2regs2pair(varDsc->lvRegNum, varDsc->lvOtherReg));
+            return;
+        }
+
+        noway_assert(tree->gtFlags & GTF_VAR_CAST);
+        noway_assert(tree->gtType == TYP_INT);
+    }
+    else
+    {
+        noway_assert(!isRegPairType(tree->gtType));
+    }
+
+    /* It's a register variable -- modify the node */
+
+    unsigned livenessFlags = (tree->gtFlags & GTF_LIVENESS_MASK);
+
+    ValueNumPair vnp = tree->gtVNPair; // Save the ValueNumPair
+    tree->SetOper(GT_REG_VAR);
+    tree->gtVNPair = vnp; // Preserve the ValueNumPair, as SetOper will clear it.
+
+    tree->gtFlags |= livenessFlags;
+    tree->gtFlags |= GTF_REG_VAL;
+    tree->gtRegNum          = varDsc->lvRegNum;
+    tree->gtRegVar.gtRegNum = varDsc->lvRegNum;
+    tree->gtRegVar.SetLclNum(varNum);
+}
+
+// inline
+void CodeGen::saveLiveness(genLivenessSet* ls)
+{
+    VarSetOps::Assign(compiler, ls->liveSet, compiler->compCurLife);
+    VarSetOps::Assign(compiler, ls->varPtrSet, gcInfo.gcVarPtrSetCur);
+    ls->maskVars  = (regMaskSmall)regSet.rsMaskVars;
+    ls->gcRefRegs = (regMaskSmall)gcInfo.gcRegGCrefSetCur;
+    ls->byRefRegs = (regMaskSmall)gcInfo.gcRegByrefSetCur;
+}
+
+// inline
+void CodeGen::restoreLiveness(genLivenessSet* ls)
+{
+    VarSetOps::Assign(compiler, compiler->compCurLife, ls->liveSet);
+    VarSetOps::Assign(compiler, gcInfo.gcVarPtrSetCur, ls->varPtrSet);
+    regSet.rsMaskVars       = ls->maskVars;
+    gcInfo.gcRegGCrefSetCur = ls->gcRefRegs;
+    gcInfo.gcRegByrefSetCur = ls->byRefRegs;
+}
+
+// inline
+void CodeGen::checkLiveness(genLivenessSet* ls)
+{
+    assert(VarSetOps::Equal(compiler, compiler->compCurLife, ls->liveSet));
+    assert(VarSetOps::Equal(compiler, gcInfo.gcVarPtrSetCur, ls->varPtrSet));
+    assert(regSet.rsMaskVars == ls->maskVars);
+    assert(gcInfo.gcRegGCrefSetCur == ls->gcRefRegs);
+    assert(gcInfo.gcRegByrefSetCur == ls->byRefRegs);
+}
+
+// inline
+bool CodeGenInterface::genMarkLclVar(GenTreePtr tree)
+{
+    unsigned   varNum;
+    LclVarDsc* varDsc;
+
+    assert(tree->gtOper == GT_LCL_VAR);
+
+    /* Does the variable live in a register? */
+
+    varNum = tree->gtLclVarCommon.gtLclNum;
+    assert(varNum < compiler->lvaCount);
+    varDsc = compiler->lvaTable + varNum;
+
+    if (varDsc->lvRegister)
+    {
+        genBashLclVar(tree, varNum, varDsc);
+        return true;
+    }
+    else
+    {
+        return false;
+    }
+}
+
+// inline
+GenTreePtr CodeGen::genGetAddrModeBase(GenTreePtr tree)
+{
+    bool       rev;
+    unsigned   mul;
+    unsigned   cns;
+    GenTreePtr adr;
+    GenTreePtr idx;
+
+    if (genCreateAddrMode(tree,     // address
+                          0,        // mode
+                          false,    // fold
+                          RBM_NONE, // reg mask
+                          &rev,     // reverse ops
+                          &adr,     // base addr
+                          &idx,     // index val
+#if SCALED_ADDR_MODES
+                          &mul, // scaling
+#endif
+                          &cns,  // displacement
+                          true)) // don't generate code
+        return adr;
+    else
+        return NULL;
+}
+
+// inline
+void CodeGen::genSinglePush()
+{
+    genStackLevel += sizeof(void*);
+}
+
+// inline
+void CodeGen::genSinglePop()
+{
+    genStackLevel -= sizeof(void*);
+}
+
+#if FEATURE_STACK_FP_X87
+// inline
+void CodeGenInterface::genResetFPstkLevel(unsigned newValue /* = 0 */)
+{
+    genFPstkLevel = newValue;
+}
+
+// inline
+unsigned CodeGenInterface::genGetFPstkLevel()
+{
+    return genFPstkLevel;
+}
+
+// inline
+void CodeGenInterface::genIncrementFPstkLevel(unsigned inc /* = 1 */)
+{
+    noway_assert((inc == 0) || genFPstkLevel + inc > genFPstkLevel);
+    genFPstkLevel += inc;
+}
+
+// inline
+void CodeGenInterface::genDecrementFPstkLevel(unsigned dec /* = 1 */)
+{
+    noway_assert((dec == 0) || genFPstkLevel - dec < genFPstkLevel);
+    genFPstkLevel -= dec;
+}
+
+#endif // FEATURE_STACK_FP_X87
+
+/*****************************************************************************
+ *
+ *  Generate code that will set the given register to the integer constant.
+ */
+
+void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags)
+{
+    noway_assert(type != TYP_REF || val == NULL);
+
+    /* Does the reg already hold this constant? */
+
+    if (!regTracker.rsIconIsInReg(val, reg))
+    {
+        if (val == 0)
+        {
+            instGen_Set_Reg_To_Zero(emitActualTypeSize(type), reg, flags);
+        }
+#ifdef _TARGET_ARM_
+        // If we can set a register to a constant with a small encoding, then do that.
+        else if (arm_Valid_Imm_For_Small_Mov(reg, val, flags))
+        {
+            instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
+        }
+#endif
+        else
+        {
+            /* See if a register holds the value or a close value? */
+            bool      constantLoaded = false;
+            ssize_t   delta;
+            regNumber srcReg = regTracker.rsIconIsInReg(val, &delta);
+
+            if (srcReg != REG_NA)
+            {
+                if (delta == 0)
+                {
+                    inst_RV_RV(INS_mov, reg, srcReg, type, emitActualTypeSize(type), flags);
+                    constantLoaded = true;
+                }
+                else
+                {
+#if defined(_TARGET_XARCH_)
+                    /* delta should fit inside a byte */
+                    if (delta == (signed char)delta)
+                    {
+                        /* use an lea instruction to set reg */
+                        getEmitter()->emitIns_R_AR(INS_lea, emitTypeSize(type), reg, srcReg, (int)delta);
+                        constantLoaded = true;
+                    }
+#elif defined(_TARGET_ARM_)
+                    /* We found a register 'regS' that has the value we need, modulo a small delta.
+                       That is, the value we need is 'regS + delta'.
+                       We one to generate one of the following instructions, listed in order of preference:
+
+                            adds  regD, delta        ; 2 bytes. if regD == regS, regD is a low register, and
+                       0<=delta<=255
+                            subs  regD, delta        ; 2 bytes. if regD == regS, regD is a low register, and
+                       -255<=delta<=0
+                            adds  regD, regS, delta  ; 2 bytes. if regD and regS are low registers and 0<=delta<=7
+                            subs  regD, regS, delta  ; 2 bytes. if regD and regS are low registers and -7<=delta<=0
+                            mov   regD, icon         ; 4 bytes. icon is a wacky Thumb 12-bit immediate.
+                            movw  regD, icon         ; 4 bytes. 0<=icon<=65535
+                            add.w regD, regS, delta  ; 4 bytes. delta is a wacky Thumb 12-bit immediate.
+                            sub.w regD, regS, delta  ; 4 bytes. delta is a wacky Thumb 12-bit immediate.
+                            addw  regD, regS, delta  ; 4 bytes. 0<=delta<=4095
+                            subw  regD, regS, delta  ; 4 bytes. -4095<=delta<=0
+
+                       If it wasn't for the desire to generate the "mov reg,icon" forms if possible (and no bigger
+                       than necessary), this would be a lot simpler. Note that we might set the overflow flag: we
+                       can have regS containing the largest signed int 0x7fffffff and need the smallest signed int
+                       0x80000000. In this case, delta will be 1.
+                    */
+
+                    bool      useAdd     = false;
+                    regMaskTP regMask    = genRegMask(reg);
+                    regMaskTP srcRegMask = genRegMask(srcReg);
+
+                    if ((flags != INS_FLAGS_NOT_SET) && (reg == srcReg) && (regMask & RBM_LOW_REGS) &&
+                        (unsigned_abs(delta) <= 255))
+                    {
+                        useAdd = true;
+                    }
+                    else if ((flags != INS_FLAGS_NOT_SET) && (regMask & RBM_LOW_REGS) && (srcRegMask & RBM_LOW_REGS) &&
+                             (unsigned_abs(delta) <= 7))
+                    {
+                        useAdd = true;
+                    }
+                    else if (arm_Valid_Imm_For_Mov(val))
+                    {
+                        // fall through to general "!constantLoaded" case below
+                    }
+                    else if (arm_Valid_Imm_For_Add(delta, flags))
+                    {
+                        useAdd = true;
+                    }
+
+                    if (useAdd)
+                    {
+                        getEmitter()->emitIns_R_R_I(INS_add, EA_4BYTE, reg, srcReg, delta, flags);
+                        constantLoaded = true;
+                    }
+#else
+                    assert(!"Codegen missing");
+#endif
+                }
+            }
+
+            if (!constantLoaded) // Have we loaded it yet?
+            {
+#ifdef _TARGET_X86_
+                if (val == -1)
+                {
+                    /* or reg,-1 takes 3 bytes */
+                    inst_RV_IV(INS_OR, reg, val, emitActualTypeSize(type));
+                }
+                else
+                    /* For SMALL_CODE it is smaller to push a small immediate and
+                       then pop it into the dest register */
+                    if ((compiler->compCodeOpt() == Compiler::SMALL_CODE) && val == (signed char)val)
+                {
+                    /* "mov" has no s(sign)-bit and so always takes 6 bytes,
+                       whereas push+pop takes 2+1 bytes */
+
+                    inst_IV(INS_push, val);
+                    genSinglePush();
+
+                    inst_RV(INS_pop, reg, type);
+                    genSinglePop();
+                }
+                else
+#endif // _TARGET_X86_
+                {
+                    instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
+                }
+            }
+        }
+    }
+    regTracker.rsTrackRegIntCns(reg, val);
+    gcInfo.gcMarkRegPtrVal(reg, type);
+}
+
+/*****************************************************************************
+ *
+ *  Find an existing register set to the given integer constant, or
+ *  pick a register and generate code that will set it to the integer constant.
+ *
+ *  If no existing register is set to the constant, it will use regSet.rsPickReg(regBest)
+ *  to pick some register to set.  NOTE that this means the returned regNumber
+ *  might *not* be in regBest.  It also implies that you should lock any registers
+ *  you don't want spilled (not just mark as used).
+ *
+ */
+
+regNumber CodeGen::genGetRegSetToIcon(ssize_t val, regMaskTP regBest /* = 0 */, var_types type /* = TYP_INT */)
+{
+    regNumber regCns;
+#if REDUNDANT_LOAD
+
+    // Is there already a register with zero that we can use?
+    regCns = regTracker.rsIconIsInReg(val);
+
+    if (regCns == REG_NA)
+#endif
+    {
+        // If not, grab a register to hold the constant, preferring
+        // any register besides RBM_TMP_0 so it can hopefully be re-used
+        regCns = regSet.rsPickReg(regBest, regBest & ~RBM_TMP_0);
+
+        // Now set the constant
+        genSetRegToIcon(regCns, val, type);
+    }
+
+    // NOTE: there is guarantee that regCns is in regBest's mask
+    return regCns;
+}
+
+/*****************************************************************************/
+/*****************************************************************************
+ *
+ *  Add the given constant to the specified register.
+ *  'tree' is the resulting tree
+ */
+
+void CodeGen::genIncRegBy(regNumber reg, ssize_t ival, GenTreePtr tree, var_types dstType, bool ovfl)
+{
+    bool setFlags = (tree != NULL) && tree->gtSetFlags();
+
+#ifdef _TARGET_XARCH_
+    /* First check to see if we can generate inc or dec instruction(s) */
+    /* But avoid inc/dec on P4 in general for fast code or inside loops for blended code */
+    if (!ovfl && !compiler->optAvoidIncDec(compiler->compCurBB->getBBWeight(compiler)))
+    {
+        emitAttr size = emitTypeSize(dstType);
+
+        switch (ival)
+        {
+            case 2:
+                inst_RV(INS_inc, reg, dstType, size);
+                __fallthrough;
+            case 1:
+                inst_RV(INS_inc, reg, dstType, size);
+
+                goto UPDATE_LIVENESS;
+
+            case -2:
+                inst_RV(INS_dec, reg, dstType, size);
+                __fallthrough;
+            case -1:
+                inst_RV(INS_dec, reg, dstType, size);
+
+                goto UPDATE_LIVENESS;
+        }
+    }
+#endif
+
+    insFlags flags = setFlags ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+    inst_RV_IV(INS_add, reg, ival, emitActualTypeSize(dstType), flags);
+
+#ifdef _TARGET_XARCH_
+UPDATE_LIVENESS:
+#endif
+
+    if (setFlags)
+        genFlagsEqualToReg(tree, reg);
+
+    regTracker.rsTrackRegTrash(reg);
+
+    gcInfo.gcMarkRegSetNpt(genRegMask(reg));
+
+    if (tree != NULL)
+    {
+        if (!tree->OperIsAssignment())
+        {
+            genMarkTreeInReg(tree, reg);
+            if (varTypeIsGC(tree->TypeGet()))
+                gcInfo.gcMarkRegSetByref(genRegMask(reg));
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Subtract the given constant from the specified register.
+ *  Should only be used for unsigned sub with overflow. Else
+ *  genIncRegBy() can be used using -ival. We shouldn't use genIncRegBy()
+ *  for these cases as the flags are set differently, and the following
+ *  check for overflow won't work correctly.
+ *  'tree' is the resulting tree.
+ */
+
+void CodeGen::genDecRegBy(regNumber reg, ssize_t ival, GenTreePtr tree)
+{
+    noway_assert((tree->gtFlags & GTF_OVERFLOW) &&
+                 ((tree->gtFlags & GTF_UNSIGNED) || ival == ((tree->gtType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN)));
+    noway_assert(tree->gtType == TYP_INT || tree->gtType == TYP_I_IMPL);
+
+    regTracker.rsTrackRegTrash(reg);
+
+    noway_assert(!varTypeIsGC(tree->TypeGet()));
+    gcInfo.gcMarkRegSetNpt(genRegMask(reg));
+
+    insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+    inst_RV_IV(INS_sub, reg, ival, emitActualTypeSize(tree->TypeGet()), flags);
+
+    if (tree->gtSetFlags())
+        genFlagsEqualToReg(tree, reg);
+
+    if (tree)
+    {
+        genMarkTreeInReg(tree, reg);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Multiply the specified register by the given value.
+ *  'tree' is the resulting tree
+ */
+
+void CodeGen::genMulRegBy(regNumber reg, ssize_t ival, GenTreePtr tree, var_types dstType, bool ovfl)
+{
+    noway_assert(genActualType(dstType) == TYP_INT || genActualType(dstType) == TYP_I_IMPL);
+
+    regTracker.rsTrackRegTrash(reg);
+
+    if (tree)
+    {
+        genMarkTreeInReg(tree, reg);
+    }
+
+    bool     use_shift = false;
+    unsigned shift_by  = 0;
+
+    if ((dstType >= TYP_INT) && !ovfl && (ival > 0) && ((ival & (ival - 1)) == 0))
+    {
+        use_shift = true;
+        BitScanForwardPtr((ULONG*)&shift_by, (ULONG)ival);
+    }
+
+    if (use_shift)
+    {
+        if (shift_by != 0)
+        {
+            insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+            inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, emitTypeSize(dstType), reg, shift_by, flags);
+            if (tree->gtSetFlags())
+                genFlagsEqualToReg(tree, reg);
+        }
+    }
+    else
+    {
+        instruction ins;
+#ifdef _TARGET_XARCH_
+        ins = getEmitter()->inst3opImulForReg(reg);
+#else
+        ins = INS_mul;
+#endif
+
+        inst_RV_IV(ins, reg, ival, emitActualTypeSize(dstType));
+    }
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+/*****************************************************************************
+ *
+ *  Compute the value 'tree' into a register that's in 'needReg'
+ *  (or any free register if 'needReg' is RBM_NONE).
+ *
+ *  Note that 'needReg' is just a recommendation unless mustReg==RegSet::EXACT_REG.
+ *  If keepReg==RegSet::KEEP_REG, we mark the register as being used.
+ *
+ *  If you require that the register returned is trashable, pass true for 'freeOnly'.
+ */
+
+void CodeGen::genComputeReg(
+    GenTreePtr tree, regMaskTP needReg, RegSet::ExactReg mustReg, RegSet::KeepReg keepReg, bool freeOnly)
+{
+    noway_assert(tree->gtType != TYP_VOID);
+
+    regNumber reg;
+    regNumber rg2;
+
+#if FEATURE_STACK_FP_X87
+    noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL ||
+                 genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF);
+#elif defined(_TARGET_ARM_)
+    noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL ||
+                 genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF ||
+                 genActualType(tree->gtType) == TYP_FLOAT || genActualType(tree->gtType) == TYP_DOUBLE ||
+                 genActualType(tree->gtType) == TYP_STRUCT);
+#else
+    noway_assert(genActualType(tree->gtType) == TYP_INT || genActualType(tree->gtType) == TYP_I_IMPL ||
+                 genActualType(tree->gtType) == TYP_REF || tree->gtType == TYP_BYREF ||
+                 genActualType(tree->gtType) == TYP_FLOAT || genActualType(tree->gtType) == TYP_DOUBLE);
+#endif
+
+    /* Generate the value, hopefully into the right register */
+
+    genCodeForTree(tree, needReg);
+    noway_assert(tree->gtFlags & GTF_REG_VAL);
+
+    // There is a workaround in genCodeForTreeLng() that changes the type of the
+    // tree of a GT_MUL with 64 bit result to TYP_INT from TYP_LONG, then calls
+    // genComputeReg(). genCodeForTree(), above, will put the result in gtRegPair for ARM,
+    // or leave it in EAX/EDX for x86, but only set EAX as gtRegNum. There's no point
+    // running the rest of this code, because anything looking at gtRegNum on ARM or
+    // attempting to move from EAX/EDX will be wrong.
+    if ((tree->OperGet() == GT_MUL) && (tree->gtFlags & GTF_MUL_64RSLT))
+        goto REG_OK;
+
+    reg = tree->gtRegNum;
+
+    /* Did the value end up in an acceptable register? */
+
+    if ((mustReg == RegSet::EXACT_REG) && needReg && !(genRegMask(reg) & needReg))
+    {
+        /* Not good enough to satisfy the caller's orders */
+
+        if (varTypeIsFloating(tree))
+        {
+            RegSet::RegisterPreference pref(needReg, RBM_NONE);
+            rg2 = regSet.PickRegFloat(tree->TypeGet(), &pref);
+        }
+        else
+        {
+            rg2 = regSet.rsGrabReg(needReg);
+        }
+    }
+    else
+    {
+        /* Do we have to end up with a free register? */
+
+        if (!freeOnly)
+            goto REG_OK;
+
+        /* Did we luck out and the value got computed into an unused reg? */
+
+        if (genRegMask(reg) & regSet.rsRegMaskFree())
+            goto REG_OK;
+
+        /* Register already in use, so spill previous value */
+
+        if ((mustReg == RegSet::EXACT_REG) && needReg && (genRegMask(reg) & needReg))
+        {
+            rg2 = regSet.rsGrabReg(needReg);
+            if (rg2 == reg)
+            {
+                gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet());
+                tree->gtRegNum = reg;
+                goto REG_OK;
+            }
+        }
+        else
+        {
+            /* OK, let's find a trashable home for the value */
+
+            regMaskTP rv1RegUsed;
+
+            regSet.rsLockReg(genRegMask(reg), &rv1RegUsed);
+            rg2 = regSet.rsPickReg(needReg);
+            regSet.rsUnlockReg(genRegMask(reg), rv1RegUsed);
+        }
+    }
+
+    noway_assert(reg != rg2);
+
+    /* Update the value in the target register */
+
+    regTracker.rsTrackRegCopy(rg2, reg);
+
+    inst_RV_RV(ins_Copy(tree->TypeGet()), rg2, reg, tree->TypeGet());
+
+    /* The value has been transferred to 'reg' */
+
+    if ((genRegMask(reg) & regSet.rsMaskUsed) == 0)
+        gcInfo.gcMarkRegSetNpt(genRegMask(reg));
+
+    gcInfo.gcMarkRegPtrVal(rg2, tree->TypeGet());
+
+    /* The value is now in an appropriate register */
+
+    tree->gtRegNum = rg2;
+
+REG_OK:
+
+    /* Does the caller want us to mark the register as used? */
+
+    if (keepReg == RegSet::KEEP_REG)
+    {
+        /* In case we're computing a value into a register variable */
+
+        genUpdateLife(tree);
+
+        /* Mark the register as 'used' */
+
+        regSet.rsMarkRegUsed(tree);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Same as genComputeReg(), the only difference being that the result is
+ *  guaranteed to end up in a trashable register.
+ */
+
+// inline
+void CodeGen::genCompIntoFreeReg(GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg)
+{
+    genComputeReg(tree, needReg, RegSet::ANY_REG, keepReg, true);
+}
+
+/*****************************************************************************
+ *
+ *  The value 'tree' was earlier computed into a register; free up that
+ *  register (but also make sure the value is presently in a register).
+ */
+
+void CodeGen::genReleaseReg(GenTreePtr tree)
+{
+    if (tree->gtFlags & GTF_SPILLED)
+    {
+        /* The register has been spilled -- reload it */
+
+        regSet.rsUnspillReg(tree, 0, RegSet::FREE_REG);
+        return;
+    }
+
+    regSet.rsMarkRegFree(genRegMask(tree->gtRegNum));
+}
+
+/*****************************************************************************
+ *
+ *  The value 'tree' was earlier computed into a register. Check whether that
+ *  register has been spilled (and reload it if so), and if 'keepReg' is RegSet::FREE_REG,
+ *  free the register. The caller shouldn't need to be setting GCness of the register
+ *  where tree will be recovered to, so we disallow keepReg==RegSet::FREE_REG for GC type trees.
+ */
+
+void CodeGen::genRecoverReg(GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg)
+{
+    if (tree->gtFlags & GTF_SPILLED)
+    {
+        /* The register has been spilled -- reload it */
+
+        regSet.rsUnspillReg(tree, needReg, keepReg);
+        return;
+    }
+    else if (needReg && (needReg & genRegMask(tree->gtRegNum)) == 0)
+    {
+        /* We need the tree in another register. So move it there */
+
+        noway_assert(tree->gtFlags & GTF_REG_VAL);
+        regNumber oldReg = tree->gtRegNum;
+
+        /* Pick an acceptable register */
+
+        regNumber reg = regSet.rsGrabReg(needReg);
+
+        /* Copy the value */
+
+        inst_RV_RV(INS_mov, reg, oldReg, tree->TypeGet());
+        tree->gtRegNum = reg;
+
+        gcInfo.gcMarkRegPtrVal(tree);
+        regSet.rsMarkRegUsed(tree);
+        regSet.rsMarkRegFree(oldReg, tree);
+
+        regTracker.rsTrackRegCopy(reg, oldReg);
+    }
+
+    /* Free the register if the caller desired so */
+
+    if (keepReg == RegSet::FREE_REG)
+    {
+        regSet.rsMarkRegFree(genRegMask(tree->gtRegNum));
+        // Can't use RegSet::FREE_REG on a GC type
+        noway_assert(!varTypeIsGC(tree->gtType));
+    }
+    else
+    {
+        noway_assert(regSet.rsMaskUsed & genRegMask(tree->gtRegNum));
+    }
+}
+
+/*****************************************************************************
+ *
+ * Move one half of a register pair to its new regPair(half).
+ */
+
+// inline
+void CodeGen::genMoveRegPairHalf(GenTreePtr tree, regNumber dst, regNumber src, int off)
+{
+    if (src == REG_STK)
+    {
+        // handle long to unsigned long overflow casts
+        while (tree->gtOper == GT_CAST)
+        {
+            noway_assert(tree->gtType == TYP_LONG);
+            tree = tree->gtCast.CastOp();
+        }
+        noway_assert(tree->gtEffectiveVal()->gtOper == GT_LCL_VAR);
+        noway_assert(tree->gtType == TYP_LONG);
+        inst_RV_TT(ins_Load(TYP_INT), dst, tree, off);
+        regTracker.rsTrackRegTrash(dst);
+    }
+    else
+    {
+        regTracker.rsTrackRegCopy(dst, src);
+        inst_RV_RV(INS_mov, dst, src, TYP_INT);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  The given long value is in a register pair, but it's not an acceptable
+ *  one. We have to move the value into a register pair in 'needReg' (if
+ *  non-zero) or the pair 'newPair' (when 'newPair != REG_PAIR_NONE').
+ *
+ *  Important note: if 'needReg' is non-zero, we assume the current pair
+ *  has not been marked as free. If, OTOH, 'newPair' is specified, we
+ *  assume that the current register pair is marked as used and free it.
+ */
+
+void CodeGen::genMoveRegPair(GenTreePtr tree, regMaskTP needReg, regPairNo newPair)
+{
+    regPairNo oldPair;
+
+    regNumber oldLo;
+    regNumber oldHi;
+    regNumber newLo;
+    regNumber newHi;
+
+    /* Either a target set or a specific pair may be requested */
+
+    noway_assert((needReg != 0) != (newPair != REG_PAIR_NONE));
+
+    /* Get hold of the current pair */
+
+    oldPair = tree->gtRegPair;
+    noway_assert(oldPair != newPair);
+
+    /* Are we supposed to move to a specific pair? */
+
+    if (newPair != REG_PAIR_NONE)
+    {
+        regMaskTP oldMask = genRegPairMask(oldPair);
+        regMaskTP loMask  = genRegMask(genRegPairLo(newPair));
+        regMaskTP hiMask  = genRegMask(genRegPairHi(newPair));
+        regMaskTP overlap = oldMask & (loMask | hiMask);
+
+        /* First lock any registers that are in both pairs */
+
+        noway_assert((regSet.rsMaskUsed & overlap) == overlap);
+        noway_assert((regSet.rsMaskLock & overlap) == 0);
+        regSet.rsMaskLock |= overlap;
+
+        /* Make sure any additional registers we need are free */
+
+        if ((loMask & regSet.rsMaskUsed) != 0 && (loMask & oldMask) == 0)
+        {
+            regSet.rsGrabReg(loMask);
+        }
+
+        if ((hiMask & regSet.rsMaskUsed) != 0 && (hiMask & oldMask) == 0)
+        {
+            regSet.rsGrabReg(hiMask);
+        }
+
+        /* Unlock those registers we have temporarily locked */
+
+        noway_assert((regSet.rsMaskUsed & overlap) == overlap);
+        noway_assert((regSet.rsMaskLock & overlap) == overlap);
+        regSet.rsMaskLock -= overlap;
+
+        /* We can now free the old pair */
+
+        regSet.rsMarkRegFree(oldMask);
+    }
+    else
+    {
+        /* Pick the new pair based on the caller's stated preference */
+
+        newPair = regSet.rsGrabRegPair(needReg);
+    }
+
+    // If grabbed pair is the same as old one we're done
+    if (newPair == oldPair)
+    {
+        noway_assert((oldLo = genRegPairLo(oldPair), oldHi = genRegPairHi(oldPair), newLo = genRegPairLo(newPair),
+                      newHi = genRegPairHi(newPair), newLo != REG_STK && newHi != REG_STK));
+        return;
+    }
+
+    /* Move the values from the old pair into the new one */
+
+    oldLo = genRegPairLo(oldPair);
+    oldHi = genRegPairHi(oldPair);
+    newLo = genRegPairLo(newPair);
+    newHi = genRegPairHi(newPair);
+
+    noway_assert(newLo != REG_STK && newHi != REG_STK);
+
+    /* Careful - the register pairs might overlap */
+
+    if (newLo == oldLo)
+    {
+        /* The low registers are identical, just move the upper half */
+
+        noway_assert(newHi != oldHi);
+        genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
+    }
+    else
+    {
+        /* The low registers are different, are the upper ones the same? */
+
+        if (newHi == oldHi)
+        {
+            /* Just move the lower half, then */
+            genMoveRegPairHalf(tree, newLo, oldLo, 0);
+        }
+        else
+        {
+            /* Both sets are different - is there an overlap? */
+
+            if (newLo == oldHi)
+            {
+                /* Are high and low simply swapped ? */
+
+                if (newHi == oldLo)
+                {
+#ifdef _TARGET_ARM_
+                    /* Let's use XOR swap to reduce register pressure. */
+                    inst_RV_RV(INS_eor, oldLo, oldHi);
+                    inst_RV_RV(INS_eor, oldHi, oldLo);
+                    inst_RV_RV(INS_eor, oldLo, oldHi);
+#else
+                    inst_RV_RV(INS_xchg, oldHi, oldLo);
+#endif
+                    regTracker.rsTrackRegSwap(oldHi, oldLo);
+                }
+                else
+                {
+                    /* New lower == old higher, so move higher half first */
+
+                    noway_assert(newHi != oldLo);
+                    genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
+                    genMoveRegPairHalf(tree, newLo, oldLo, 0);
+                }
+            }
+            else
+            {
+                /* Move lower half first */
+                genMoveRegPairHalf(tree, newLo, oldLo, 0);
+                genMoveRegPairHalf(tree, newHi, oldHi, sizeof(int));
+            }
+        }
+    }
+
+    /* Record the fact that we're switching to another pair */
+
+    tree->gtRegPair = newPair;
+}
+
+/*****************************************************************************
+ *
+ *  Compute the value 'tree' into the register pair specified by 'needRegPair'
+ *  if 'needRegPair' is REG_PAIR_NONE then use any free register pair, avoid
+ *  those in avoidReg.
+ *  If 'keepReg' is set to RegSet::KEEP_REG then we mark both registers that the
+ *  value ends up in as being used.
+ */
+
+void CodeGen::genComputeRegPair(
+    GenTreePtr tree, regPairNo needRegPair, regMaskTP avoidReg, RegSet::KeepReg keepReg, bool freeOnly)
+{
+    regMaskTP regMask;
+    regPairNo regPair;
+    regMaskTP tmpMask;
+    regMaskTP tmpUsedMask;
+    regNumber rLo;
+    regNumber rHi;
+
+    noway_assert(isRegPairType(tree->gtType));
+
+    if (needRegPair == REG_PAIR_NONE)
+    {
+        if (freeOnly)
+        {
+            regMask = regSet.rsRegMaskFree() & ~avoidReg;
+            if (genMaxOneBit(regMask))
+                regMask = regSet.rsRegMaskFree();
+        }
+        else
+        {
+            regMask = RBM_ALLINT & ~avoidReg;
+        }
+
+        if (genMaxOneBit(regMask))
+            regMask = regSet.rsRegMaskCanGrab();
+    }
+    else
+    {
+        regMask = genRegPairMask(needRegPair);
+    }
+
+    /* Generate the value, hopefully into the right register pair */
+
+    genCodeForTreeLng(tree, regMask, avoidReg);
+
+    noway_assert(tree->gtFlags & GTF_REG_VAL);
+
+    regPair = tree->gtRegPair;
+    tmpMask = genRegPairMask(regPair);
+
+    rLo = genRegPairLo(regPair);
+    rHi = genRegPairHi(regPair);
+
+    /* At least one half is in a real register */
+
+    noway_assert(rLo != REG_STK || rHi != REG_STK);
+
+    /* Did the value end up in an acceptable register pair? */
+
+    if (needRegPair != REG_PAIR_NONE)
+    {
+        if (needRegPair != regPair)
+        {
+            /* This is a workaround. If we specify a regPair for genMoveRegPair */
+            /* it expects the source pair being marked as used */
+            regSet.rsMarkRegPairUsed(tree);
+            genMoveRegPair(tree, 0, needRegPair);
+        }
+    }
+    else if (freeOnly)
+    {
+        /* Do we have to end up with a free register pair?
+           Something might have gotten freed up above */
+        bool mustMoveReg = false;
+
+        regMask = regSet.rsRegMaskFree() & ~avoidReg;
+
+        if (genMaxOneBit(regMask))
+            regMask = regSet.rsRegMaskFree();
+
+        if ((tmpMask & regMask) != tmpMask || rLo == REG_STK || rHi == REG_STK)
+        {
+            /* Note that we must call genMoveRegPair if one of our registers
+               comes from the used mask, so that it will be properly spilled. */
+
+            mustMoveReg = true;
+        }
+
+        if (genMaxOneBit(regMask))
+            regMask |= regSet.rsRegMaskCanGrab() & ~avoidReg;
+
+        if (genMaxOneBit(regMask))
+            regMask |= regSet.rsRegMaskCanGrab();
+
+        /* Did the value end up in a free register pair? */
+
+        if (mustMoveReg)
+        {
+            /* We'll have to move the value to a free (trashable) pair */
+            genMoveRegPair(tree, regMask, REG_PAIR_NONE);
+        }
+    }
+    else
+    {
+        noway_assert(needRegPair == REG_PAIR_NONE);
+        noway_assert(!freeOnly);
+
+        /* it is possible to have tmpMask also in the regSet.rsMaskUsed */
+        tmpUsedMask = tmpMask & regSet.rsMaskUsed;
+        tmpMask &= ~regSet.rsMaskUsed;
+
+        /* Make sure that the value is in "real" registers*/
+        if (rLo == REG_STK)
+        {
+            /* Get one of the desired registers, but exclude rHi */
+
+            regSet.rsLockReg(tmpMask);
+            regSet.rsLockUsedReg(tmpUsedMask);
+
+            regNumber reg = regSet.rsPickReg(regMask);
+
+            regSet.rsUnlockUsedReg(tmpUsedMask);
+            regSet.rsUnlockReg(tmpMask);
+
+            inst_RV_TT(ins_Load(TYP_INT), reg, tree, 0);
+
+            tree->gtRegPair = gen2regs2pair(reg, rHi);
+
+            regTracker.rsTrackRegTrash(reg);
+            gcInfo.gcMarkRegSetNpt(genRegMask(reg));
+        }
+        else if (rHi == REG_STK)
+        {
+            /* Get one of the desired registers, but exclude rLo */
+
+            regSet.rsLockReg(tmpMask);
+            regSet.rsLockUsedReg(tmpUsedMask);
+
+            regNumber reg = regSet.rsPickReg(regMask);
+
+            regSet.rsUnlockUsedReg(tmpUsedMask);
+            regSet.rsUnlockReg(tmpMask);
+
+            inst_RV_TT(ins_Load(TYP_INT), reg, tree, 4);
+
+            tree->gtRegPair = gen2regs2pair(rLo, reg);
+
+            regTracker.rsTrackRegTrash(reg);
+            gcInfo.gcMarkRegSetNpt(genRegMask(reg));
+        }
+    }
+
+    /* Does the caller want us to mark the register as used? */
+
+    if (keepReg == RegSet::KEEP_REG)
+    {
+        /* In case we're computing a value into a register variable */
+
+        genUpdateLife(tree);
+
+        /* Mark the register as 'used' */
+
+        regSet.rsMarkRegPairUsed(tree);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Same as genComputeRegPair(), the only difference being that the result
+ *  is guaranteed to end up in a trashable register pair.
+ */
+
+// inline
+void CodeGen::genCompIntoFreeRegPair(GenTreePtr tree, regMaskTP avoidReg, RegSet::KeepReg keepReg)
+{
+    genComputeRegPair(tree, REG_PAIR_NONE, avoidReg, keepReg, true);
+}
+
+/*****************************************************************************
+ *
+ *  The value 'tree' was earlier computed into a register pair; free up that
+ *  register pair (but also make sure the value is presently in a register
+ *  pair).
+ */
+
+void CodeGen::genReleaseRegPair(GenTreePtr tree)
+{
+    if (tree->gtFlags & GTF_SPILLED)
+    {
+        /* The register has been spilled -- reload it */
+
+        regSet.rsUnspillRegPair(tree, 0, RegSet::FREE_REG);
+        return;
+    }
+
+    regSet.rsMarkRegFree(genRegPairMask(tree->gtRegPair));
+}
+
+/*****************************************************************************
+ *
+ *  The value 'tree' was earlier computed into a register pair. Check whether
+ *  either register of that pair has been spilled (and reload it if so), and
+ *  if 'keepReg' is 0, free the register pair.
+ */
+
+void CodeGen::genRecoverRegPair(GenTreePtr tree, regPairNo regPair, RegSet::KeepReg keepReg)
+{
+    if (tree->gtFlags & GTF_SPILLED)
+    {
+        regMaskTP regMask;
+
+        if (regPair == REG_PAIR_NONE)
+            regMask = RBM_NONE;
+        else
+            regMask = genRegPairMask(regPair);
+
+        /* The register pair has been spilled -- reload it */
+
+        regSet.rsUnspillRegPair(tree, regMask, RegSet::KEEP_REG);
+    }
+
+    /* Does the caller insist on the value being in a specific place? */
+
+    if (regPair != REG_PAIR_NONE && regPair != tree->gtRegPair)
+    {
+        /* No good -- we'll have to move the value to a new place */
+
+        genMoveRegPair(tree, 0, regPair);
+
+        /* Mark the pair as used if appropriate */
+
+        if (keepReg == RegSet::KEEP_REG)
+            regSet.rsMarkRegPairUsed(tree);
+
+        return;
+    }
+
+    /* Free the register pair if the caller desired so */
+
+    if (keepReg == RegSet::FREE_REG)
+        regSet.rsMarkRegFree(genRegPairMask(tree->gtRegPair));
+}
+
+/*****************************************************************************
+ *
+ *  Compute the given long value into the specified register pair; don't mark
+ *  the register pair as used.
+ */
+
+// inline
+void CodeGen::genEvalIntoFreeRegPair(GenTreePtr tree, regPairNo regPair, regMaskTP avoidReg)
+{
+    genComputeRegPair(tree, regPair, avoidReg, RegSet::KEEP_REG);
+    genRecoverRegPair(tree, regPair, RegSet::FREE_REG);
+}
+
+/*****************************************************************************
+ *  This helper makes sure that the regpair target of an assignment is
+ *  available for use.  This needs to be called in genCodeForTreeLng just before
+ *  a long assignment, but must not be called until everything has been
+ *  evaluated, or else we might try to spill enregistered variables.
+ *
+ */
+
+// inline
+void CodeGen::genMakeRegPairAvailable(regPairNo regPair)
+{
+    /* Make sure the target of the store is available */
+
+    regNumber regLo = genRegPairLo(regPair);
+    regNumber regHi = genRegPairHi(regPair);
+
+    if ((regHi != REG_STK) && (regSet.rsMaskUsed & genRegMask(regHi)))
+        regSet.rsSpillReg(regHi);
+
+    if ((regLo != REG_STK) && (regSet.rsMaskUsed & genRegMask(regLo)))
+        regSet.rsSpillReg(regLo);
+}
+
+/*****************************************************************************/
+/*****************************************************************************
+ *
+ *  Return true if the given tree 'addr' can be computed via an addressing mode,
+ *  such as "[ebx+esi*4+20]". If the expression isn't an address mode already
+ *  try to make it so (but we don't try 'too hard' to accomplish this).
+ *
+ *  If we end up needing a register (or two registers) to hold some part(s) of the
+ *  address, we return the use register mask via '*useMaskPtr'.
+ *
+ *  If keepReg==RegSet::KEEP_REG, the registers (viz. *useMaskPtr) will be marked as
+ *  in use. The caller would then be responsible for calling
+ *  regSet.rsMarkRegFree(*useMaskPtr).
+ *
+ *  If keepReg==RegSet::FREE_REG, then the caller needs update the GC-tracking by
+ *  calling genDoneAddressable(addr, *useMaskPtr, RegSet::FREE_REG);
+ */
+
+bool CodeGen::genMakeIndAddrMode(GenTreePtr      addr,
+                                 GenTreePtr      oper,
+                                 bool            forLea,
+                                 regMaskTP       regMask,
+                                 RegSet::KeepReg keepReg,
+                                 regMaskTP*      useMaskPtr,
+                                 bool            deferOK)
+{
+    if (addr->gtOper == GT_ARR_ELEM)
+    {
+        regMaskTP regs = genMakeAddrArrElem(addr, oper, RBM_ALLINT, keepReg);
+        *useMaskPtr    = regs;
+        return true;
+    }
+
+    bool       rev;
+    GenTreePtr rv1;
+    GenTreePtr rv2;
+    bool       operIsArrIndex; // is oper an array index
+    GenTreePtr scaledIndex;    // If scaled addressing mode can't be used
+
+    regMaskTP anyMask = RBM_ALLINT;
+
+    unsigned cns;
+    unsigned mul;
+
+    GenTreePtr tmp;
+    int        ixv = INT_MAX; // unset value
+
+    GenTreePtr scaledIndexVal;
+
+    regMaskTP newLiveMask;
+    regMaskTP rv1Mask;
+    regMaskTP rv2Mask;
+
+    /* Deferred address mode forming NYI for x86 */
+
+    noway_assert(deferOK == false);
+
+    noway_assert(oper == NULL ||
+                 ((oper->OperIsIndir() || oper->OperIsAtomicOp()) &&
+                  ((oper->gtOper == GT_CMPXCHG && oper->gtCmpXchg.gtOpLocation == addr) || oper->gtOp.gtOp1 == addr)));
+    operIsArrIndex = (oper != nullptr && oper->OperGet() == GT_IND && (oper->gtFlags & GTF_IND_ARR_INDEX) != 0);
+
+    if (addr->gtOper == GT_LEA)
+    {
+        rev                  = (addr->gtFlags & GTF_REVERSE_OPS) != 0;
+        GenTreeAddrMode* lea = addr->AsAddrMode();
+        rv1                  = lea->Base();
+        rv2                  = lea->Index();
+        mul                  = lea->gtScale;
+        cns                  = lea->gtOffset;
+
+        if (rv1 != NULL && rv2 == NULL && cns == 0 && (rv1->gtFlags & GTF_REG_VAL) != 0)
+        {
+            scaledIndex = NULL;
+            goto YES;
+        }
+    }
+    else
+    {
+        // NOTE: FOR NOW THIS ISN'T APPROPRIATELY INDENTED - THIS IS TO MAKE IT
+        // EASIER TO MERGE
+
+        /* Is the complete address already sitting in a register? */
+
+        if ((addr->gtFlags & GTF_REG_VAL) || (addr->gtOper == GT_LCL_VAR && genMarkLclVar(addr)))
+        {
+            genUpdateLife(addr);
+
+            rv1 = addr;
+            rv2 = scaledIndex = 0;
+            cns               = 0;
+
+            goto YES;
+        }
+
+        /* Is it an absolute address */
+
+        if (addr->IsCnsIntOrI())
+        {
+            rv1 = rv2 = scaledIndex = 0;
+            // along this code path cns is never used, so place a BOGUS value in it as proof
+            // cns = addr->gtIntCon.gtIconVal;
+            cns = UINT_MAX;
+
+            goto YES;
+        }
+
+        /* Is there a chance of forming an address mode? */
+
+        if (!genCreateAddrMode(addr, forLea ? 1 : 0, false, regMask, &rev, &rv1, &rv2, &mul, &cns))
+        {
+            /* This better not be an array index */
+            noway_assert(!operIsArrIndex);
+
+            return false;
+        }
+        // THIS IS THE END OF THE INAPPROPRIATELY INDENTED SECTION
+    }
+
+    /*  For scaled array access, RV2 may not be pointing to the index of the
+        array if the CPU does not support the needed scaling factor.  We will
+        make it point to the actual index, and scaledIndex will point to
+        the scaled value */
+
+    scaledIndex    = NULL;
+    scaledIndexVal = NULL;
+
+    if (operIsArrIndex && rv2 != NULL && (rv2->gtOper == GT_MUL || rv2->gtOper == GT_LSH) &&
+        rv2->gtOp.gtOp2->IsIntCnsFitsInI32())
+    {
+        scaledIndex = rv2;
+        compiler->optGetArrayRefScaleAndIndex(scaledIndex, &scaledIndexVal DEBUGARG(true));
+
+        noway_assert(scaledIndex->gtOp.gtOp2->IsIntCnsFitsInI32());
+    }
+
+    /* Has the address already been computed? */
+
+    if (addr->gtFlags & GTF_REG_VAL)
+    {
+        if (forLea)
+            return true;
+
+        rv1         = addr;
+        rv2         = NULL;
+        scaledIndex = NULL;
+        genUpdateLife(addr);
+        goto YES;
+    }
+
+    /*
+        Here we have the following operands:
+
+            rv1     .....       base address
+            rv2     .....       offset value        (or NULL)
+            mul     .....       multiplier for rv2  (or 0)
+            cns     .....       additional constant (or 0)
+
+        The first operand must be present (and be an address) unless we're
+        computing an expression via 'LEA'. The scaled operand is optional,
+        but must not be a pointer if present.
+     */
+
+    noway_assert(rv2 == NULL || !varTypeIsGC(rv2->TypeGet()));
+
+    /*-------------------------------------------------------------------------
+     *
+     * Make sure both rv1 and rv2 (if present) are in registers
+     *
+     */
+
+    // Trivial case : Is either rv1 or rv2 a NULL ?
+
+    if (!rv2)
+    {
+        /* A single operand, make sure it's in a register */
+
+        if (cns != 0)
+        {
+            // In the case where "rv1" is already in a register, there's no reason to get into a
+            // register in "regMask" yet, if there's a non-zero constant that we're going to add;
+            // if there is, we can do an LEA.
+            genCodeForTree(rv1, RBM_NONE);
+        }
+        else
+        {
+            genCodeForTree(rv1, regMask);
+        }
+        goto DONE_REGS;
+    }
+    else if (!rv1)
+    {
+        /* A single (scaled) operand, make sure it's in a register */
+
+        genCodeForTree(rv2, 0);
+        goto DONE_REGS;
+    }
+
+    /* At this point, both rv1 and rv2 are non-NULL and we have to make sure
+       they are in registers */
+
+    noway_assert(rv1 && rv2);
+
+    /*  If we have to check a constant array index, compare it against
+        the array dimension (see below) but then fold the index with a
+        scaling factor (if any) and additional offset (if any).
+     */
+
+    if (rv2->gtOper == GT_CNS_INT || (scaledIndex != NULL && scaledIndexVal->gtOper == GT_CNS_INT))
+    {
+        if (scaledIndex != NULL)
+        {
+            assert(rv2 == scaledIndex && scaledIndexVal != NULL);
+            rv2 = scaledIndexVal;
+        }
+        /* We must have a range-checked index operation */
+
+        noway_assert(operIsArrIndex);
+
+        /* Get hold of the index value and see if it's a constant */
+
+        if (rv2->IsIntCnsFitsInI32())
+        {
+            ixv = (int)rv2->gtIntCon.gtIconVal;
+            // Maybe I should just set "fold" true in the call to genMakeAddressable above.
+            if (scaledIndex != NULL)
+            {
+                int scale = 1 << ((int)scaledIndex->gtOp.gtOp2->gtIntCon.gtIconVal); // If this truncates, that's OK --
+                                                                                     // multiple of 2^6.
+                if (mul == 0)
+                {
+                    mul = scale;
+                }
+                else
+                {
+                    mul *= scale;
+                }
+            }
+            rv2 = scaledIndex = NULL;
+
+            /* Add the scaled index into the added value */
+
+            if (mul)
+                cns += ixv * mul;
+            else
+                cns += ixv;
+
+            /* Make sure 'rv1' is in a register */
+
+            genCodeForTree(rv1, regMask);
+
+            goto DONE_REGS;
+        }
+    }
+
+    if (rv1->gtFlags & GTF_REG_VAL)
+    {
+        /* op1 already in register - how about op2? */
+
+        if (rv2->gtFlags & GTF_REG_VAL)
+        {
+            /* Great - both operands are in registers already. Just update
+               the liveness and we are done. */
+
+            if (rev)
+            {
+                genUpdateLife(rv2);
+                genUpdateLife(rv1);
+            }
+            else
+            {
+                genUpdateLife(rv1);
+                genUpdateLife(rv2);
+            }
+
+            goto DONE_REGS;
+        }
+
+        /* rv1 is in a register, but rv2 isn't */
+
+        if (!rev)
+        {
+            /* rv1 is already materialized in a register. Just update liveness
+               to rv1 and generate code for rv2 */
+
+            genUpdateLife(rv1);
+            regSet.rsMarkRegUsed(rv1, oper);
+        }
+
+        goto GEN_RV2;
+    }
+    else if (rv2->gtFlags & GTF_REG_VAL)
+    {
+        /* rv2 is in a register, but rv1 isn't */
+
+        noway_assert(rv2->gtOper == GT_REG_VAR);
+
+        if (rev)
+        {
+            /* rv2 is already materialized in a register. Update liveness
+               to after rv2 and then hang on to rv2 */
+
+            genUpdateLife(rv2);
+            regSet.rsMarkRegUsed(rv2, oper);
+        }
+
+        /* Generate the for the first operand */
+
+        genCodeForTree(rv1, regMask);
+
+        if (rev)
+        {
+            // Free up rv2 in the right fashion (it might be re-marked if keepReg)
+            regSet.rsMarkRegUsed(rv1, oper);
+            regSet.rsLockUsedReg(genRegMask(rv1->gtRegNum));
+            genReleaseReg(rv2);
+            regSet.rsUnlockUsedReg(genRegMask(rv1->gtRegNum));
+            genReleaseReg(rv1);
+        }
+        else
+        {
+            /* We have evaluated rv1, and now we just need to update liveness
+               to rv2 which was already in a register */
+
+            genUpdateLife(rv2);
+        }
+
+        goto DONE_REGS;
+    }
+
+    if (forLea && !cns)
+        return false;
+
+    /* Make sure we preserve the correct operand order */
+
+    if (rev)
+    {
+        /* Generate the second operand first */
+
+        // Determine what registers go live between rv2 and rv1
+        newLiveMask = genNewLiveRegMask(rv2, rv1);
+
+        rv2Mask = regMask & ~newLiveMask;
+        rv2Mask &= ~rv1->gtRsvdRegs;
+
+        if (rv2Mask == RBM_NONE)
+        {
+            // The regMask hint cannot be honored
+            // We probably have a call that trashes the register(s) in regMask
+            // so ignore the regMask hint, but try to avoid using
+            // the registers in newLiveMask and the rv1->gtRsvdRegs
+            //
+            rv2Mask = RBM_ALLINT & ~newLiveMask;
+            rv2Mask = regSet.rsMustExclude(rv2Mask, rv1->gtRsvdRegs);
+        }
+
+        genCodeForTree(rv2, rv2Mask);
+        regMask &= ~genRegMask(rv2->gtRegNum);
+
+        regSet.rsMarkRegUsed(rv2, oper);
+
+        /* Generate the first operand second */
+
+        genCodeForTree(rv1, regMask);
+        regSet.rsMarkRegUsed(rv1, oper);
+
+        /* Free up both operands in the right order (they might be
+           re-marked as used below)
+        */
+        regSet.rsLockUsedReg(genRegMask(rv1->gtRegNum));
+        genReleaseReg(rv2);
+        regSet.rsUnlockUsedReg(genRegMask(rv1->gtRegNum));
+        genReleaseReg(rv1);
+    }
+    else
+    {
+        /* Get the first operand into a register */
+
+        // Determine what registers go live between rv1 and rv2
+        newLiveMask = genNewLiveRegMask(rv1, rv2);
+
+        rv1Mask = regMask & ~newLiveMask;
+        rv1Mask &= ~rv2->gtRsvdRegs;
+
+        if (rv1Mask == RBM_NONE)
+        {
+            // The regMask hint cannot be honored
+            // We probably have a call that trashes the register(s) in regMask
+            // so ignore the regMask hint, but try to avoid using
+            // the registers in liveMask and the rv2->gtRsvdRegs
+            //
+            rv1Mask = RBM_ALLINT & ~newLiveMask;
+            rv1Mask = regSet.rsMustExclude(rv1Mask, rv2->gtRsvdRegs);
+        }
+
+        genCodeForTree(rv1, rv1Mask);
+        regSet.rsMarkRegUsed(rv1, oper);
+
+    GEN_RV2:
+
+        /* Here, we need to get rv2 in a register. We have either already
+           materialized rv1 into a register, or it was already in a one */
+
+        noway_assert(rv1->gtFlags & GTF_REG_VAL);
+        noway_assert(rev || regSet.rsIsTreeInReg(rv1->gtRegNum, rv1));
+
+        /* Generate the second operand as well */
+
+        regMask &= ~genRegMask(rv1->gtRegNum);
+        genCodeForTree(rv2, regMask);
+
+        if (rev)
+        {
+            /* rev==true means the evaluation order is rv2,rv1. We just
+               evaluated rv2, and rv1 was already in a register. Just
+               update liveness to rv1 and we are done. */
+
+            genUpdateLife(rv1);
+        }
+        else
+        {
+            /* We have evaluated rv1 and rv2. Free up both operands in
+               the right order (they might be re-marked as used below) */
+
+            /* Even though we have not explicitly marked rv2 as used,
+               rv2->gtRegNum may be used if rv2 is a multi-use or
+               an enregistered variable. */
+            regMaskTP rv2Used;
+            regSet.rsLockReg(genRegMask(rv2->gtRegNum), &rv2Used);
+
+            /* Check for special case both rv1 and rv2 are the same register */
+            if (rv2Used != genRegMask(rv1->gtRegNum))
+            {
+                genReleaseReg(rv1);
+                regSet.rsUnlockReg(genRegMask(rv2->gtRegNum), rv2Used);
+            }
+            else
+            {
+                regSet.rsUnlockReg(genRegMask(rv2->gtRegNum), rv2Used);
+                genReleaseReg(rv1);
+            }
+        }
+    }
+
+/*-------------------------------------------------------------------------
+ *
+ * At this point, both rv1 and rv2 (if present) are in registers
+ *
+ */
+
+DONE_REGS:
+
+    /* We must verify that 'rv1' and 'rv2' are both sitting in registers */
+
+    if (rv1 && !(rv1->gtFlags & GTF_REG_VAL))
+        return false;
+    if (rv2 && !(rv2->gtFlags & GTF_REG_VAL))
+        return false;
+
+YES:
+
+    // *(intVar1+intVar1) causes problems as we
+    // call regSet.rsMarkRegUsed(op1) and regSet.rsMarkRegUsed(op2). So the calling function
+    // needs to know that it has to call rsFreeReg(reg1) twice. We can't do
+    // that currently as we return a single mask in useMaskPtr.
+
+    if ((keepReg == RegSet::KEEP_REG) && oper && rv1 && rv2 && (rv1->gtFlags & rv2->gtFlags & GTF_REG_VAL))
+    {
+        if (rv1->gtRegNum == rv2->gtRegNum)
+        {
+            noway_assert(!operIsArrIndex);
+            return false;
+        }
+    }
+
+    /* Check either register operand to see if it needs to be saved */
+
+    if (rv1)
+    {
+        noway_assert(rv1->gtFlags & GTF_REG_VAL);
+
+        if (keepReg == RegSet::KEEP_REG)
+        {
+            regSet.rsMarkRegUsed(rv1, oper);
+        }
+        else
+        {
+            /* If the register holds an address, mark it */
+
+            gcInfo.gcMarkRegPtrVal(rv1->gtRegNum, rv1->TypeGet());
+        }
+    }
+
+    if (rv2)
+    {
+        noway_assert(rv2->gtFlags & GTF_REG_VAL);
+
+        if (keepReg == RegSet::KEEP_REG)
+            regSet.rsMarkRegUsed(rv2, oper);
+    }
+
+    if (deferOK)
+    {
+        noway_assert(!scaledIndex);
+        return true;
+    }
+
+    /* Compute the set of registers the address depends on */
+
+    regMaskTP useMask = RBM_NONE;
+
+    if (rv1)
+    {
+        if (rv1->gtFlags & GTF_SPILLED)
+            regSet.rsUnspillReg(rv1, 0, RegSet::KEEP_REG);
+
+        noway_assert(rv1->gtFlags & GTF_REG_VAL);
+        useMask |= genRegMask(rv1->gtRegNum);
+    }
+
+    if (rv2)
+    {
+        if (rv2->gtFlags & GTF_SPILLED)
+        {
+            if (rv1)
+            {
+                regMaskTP lregMask = genRegMask(rv1->gtRegNum);
+                regMaskTP used;
+
+                regSet.rsLockReg(lregMask, &used);
+                regSet.rsUnspillReg(rv2, 0, RegSet::KEEP_REG);
+                regSet.rsUnlockReg(lregMask, used);
+            }
+            else
+                regSet.rsUnspillReg(rv2, 0, RegSet::KEEP_REG);
+        }
+        noway_assert(rv2->gtFlags & GTF_REG_VAL);
+        useMask |= genRegMask(rv2->gtRegNum);
+    }
+
+    /* Tell the caller which registers we need to hang on to */
+
+    *useMaskPtr = useMask;
+
+    return true;
+}
+
+/*****************************************************************************
+ *
+ *  'oper' is an array bounds check (a GT_ARR_BOUNDS_CHECK node).
+ */
+
+void CodeGen::genRangeCheck(GenTreePtr oper)
+{
+    noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK);
+    GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
+
+    GenTreePtr arrLen    = bndsChk->gtArrLen;
+    GenTreePtr arrRef    = NULL;
+    int        lenOffset = 0;
+
+    // If "arrLen" is a ARR_LENGTH operation, get the array whose length that takes in a register.
+    // Otherwise, if the length is not a constant, get it (the length, not the arr reference) in
+    // a register.
+
+    if (arrLen->OperGet() == GT_ARR_LENGTH)
+    {
+        GenTreeArrLen* arrLenExact = arrLen->AsArrLen();
+        lenOffset                  = arrLenExact->ArrLenOffset();
+
+#if !CPU_LOAD_STORE_ARCH && !defined(_TARGET_64BIT_)
+        // We always load the length into a register on ARM and x64.
+
+        // 64-bit has to act like LOAD_STORE_ARCH because the array only holds 32-bit
+        // lengths, but the index expression *can* be native int (64-bits)
+        arrRef = arrLenExact->ArrRef();
+        genCodeForTree(arrRef, RBM_ALLINT);
+        noway_assert(arrRef->gtFlags & GTF_REG_VAL);
+        regSet.rsMarkRegUsed(arrRef);
+        noway_assert(regSet.rsMaskUsed & genRegMask(arrRef->gtRegNum));
+#endif
+    }
+#if !CPU_LOAD_STORE_ARCH && !defined(_TARGET_64BIT_)
+    // This is another form in which we have an array reference and a constant length.  Don't use
+    // on LOAD_STORE or 64BIT.
+    else if (arrLen->OperGet() == GT_IND && arrLen->gtOp.gtOp1->IsAddWithI32Const(&arrRef, &lenOffset))
+    {
+        genCodeForTree(arrRef, RBM_ALLINT);
+        noway_assert(arrRef->gtFlags & GTF_REG_VAL);
+        regSet.rsMarkRegUsed(arrRef);
+        noway_assert(regSet.rsMaskUsed & genRegMask(arrRef->gtRegNum));
+    }
+#endif
+
+    // If we didn't find one of the special forms above, generate code to evaluate the array length to a register.
+    if (arrRef == NULL)
+    {
+        // (Unless it's a constant.)
+        if (!arrLen->IsCnsIntOrI())
+        {
+            genCodeForTree(arrLen, RBM_ALLINT);
+            regSet.rsMarkRegUsed(arrLen);
+
+            noway_assert(arrLen->gtFlags & GTF_REG_VAL);
+            noway_assert(regSet.rsMaskUsed & genRegMask(arrLen->gtRegNum));
+        }
+    }
+
+    /* Is the array index a constant value? */
+    GenTreePtr index = bndsChk->gtIndex;
+    if (!index->IsCnsIntOrI())
+    {
+        // No, it's not a constant.
+        genCodeForTree(index, RBM_ALLINT);
+        regSet.rsMarkRegUsed(index);
+
+        // If we need "arrRef" or "arrLen", and evaluating "index" displaced whichever of them we're using
+        // from its register, get it back in a register.
+        if (arrRef != NULL)
+            genRecoverReg(arrRef, ~genRegMask(index->gtRegNum), RegSet::KEEP_REG);
+        else if (!arrLen->IsCnsIntOrI())
+            genRecoverReg(arrLen, ~genRegMask(index->gtRegNum), RegSet::KEEP_REG);
+
+        /* Make sure we have the values we expect */
+        noway_assert(index->gtFlags & GTF_REG_VAL);
+        noway_assert(regSet.rsMaskUsed & genRegMask(index->gtRegNum));
+
+        noway_assert(index->TypeGet() == TYP_I_IMPL ||
+                     (varTypeIsIntegral(index->TypeGet()) && !varTypeIsLong(index->TypeGet())));
+        var_types indxType = index->TypeGet();
+        if (indxType != TYP_I_IMPL)
+            indxType = TYP_INT;
+
+        if (arrRef != NULL)
+        { // _TARGET_X86_ or X64 when we have a TYP_INT (32-bit) index expression in the index register
+
+            /* Generate "cmp index, [arrRef+LenOffs]" */
+            inst_RV_AT(INS_cmp, emitTypeSize(indxType), indxType, index->gtRegNum, arrRef, lenOffset);
+        }
+        else if (arrLen->IsCnsIntOrI())
+        {
+            ssize_t len = arrLen->AsIntConCommon()->IconValue();
+            inst_RV_IV(INS_cmp, index->gtRegNum, len, EA_4BYTE);
+        }
+        else
+        {
+            inst_RV_RV(INS_cmp, index->gtRegNum, arrLen->gtRegNum, indxType, emitTypeSize(indxType));
+        }
+
+        /* Generate "jae <fail_label>" */
+
+        noway_assert(oper->gtOper == GT_ARR_BOUNDS_CHECK);
+        emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
+        genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
+    }
+    else
+    {
+        /* Generate "cmp [rv1+LenOffs], cns" */
+
+        bool indIsInt = true;
+#ifdef _TARGET_64BIT_
+        int     ixv     = 0;
+        ssize_t ixvFull = index->AsIntConCommon()->IconValue();
+        if (ixvFull > INT32_MAX)
+        {
+            indIsInt = false;
+        }
+        else
+        {
+            ixv = (int)ixvFull;
+        }
+#else
+        ssize_t ixvFull = index->AsIntConCommon()->IconValue();
+        int     ixv     = (int)ixvFull;
+#endif
+        if (arrRef != NULL && indIsInt)
+        { // _TARGET_X86_ or X64 when we have a TYP_INT (32-bit) index expression in the index register
+            /* Generate "cmp [arrRef+LenOffs], ixv" */
+            inst_AT_IV(INS_cmp, EA_4BYTE, arrRef, ixv, lenOffset);
+            // Generate "jbe <fail_label>"
+            emitJumpKind jmpLEU = genJumpKindForOper(GT_LE, CK_UNSIGNED);
+            genJumpToThrowHlpBlk(jmpLEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
+        }
+        else if (arrLen->IsCnsIntOrI())
+        {
+            ssize_t lenv = arrLen->AsIntConCommon()->IconValue();
+            // Both are constants; decide at compile time.
+            if (!(0 <= ixvFull && ixvFull < lenv))
+            {
+                genJumpToThrowHlpBlk(EJ_jmp, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
+            }
+        }
+        else if (!indIsInt)
+        {
+            genJumpToThrowHlpBlk(EJ_jmp, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
+        }
+        else
+        {
+            /* Generate "cmp arrLen, ixv" */
+            inst_RV_IV(INS_cmp, arrLen->gtRegNum, ixv, EA_4BYTE);
+            // Generate "jbe <fail_label>"
+            emitJumpKind jmpLEU = genJumpKindForOper(GT_LE, CK_UNSIGNED);
+            genJumpToThrowHlpBlk(jmpLEU, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
+        }
+    }
+
+    // Free the registers that were used.
+    if (arrRef != NULL)
+    {
+        regSet.rsMarkRegFree(arrRef->gtRegNum, arrRef);
+    }
+    else if (!arrLen->IsCnsIntOrI())
+    {
+        regSet.rsMarkRegFree(arrLen->gtRegNum, arrLen);
+    }
+
+    if (!index->IsCnsIntOrI())
+    {
+        regSet.rsMarkRegFree(index->gtRegNum, index);
+    }
+}
+
+/*****************************************************************************
+ *
+ * If compiling without REDUNDANT_LOAD, same as genMakeAddressable().
+ * Otherwise, check if rvalue is in register. If so, mark it. Then
+ * call genMakeAddressable(). Needed because genMakeAddressable is used
+ * for both lvalue and rvalue, and we only can do this for rvalue.
+ */
+
+// inline
+regMaskTP CodeGen::genMakeRvalueAddressable(
+    GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg, bool forLoadStore, bool smallOK)
+{
+    regNumber reg;
+
+#if REDUNDANT_LOAD
+
+    if (tree->gtOper == GT_LCL_VAR)
+    {
+        reg = findStkLclInReg(tree->gtLclVarCommon.gtLclNum);
+
+        if (reg != REG_NA && (needReg == 0 || (genRegMask(reg) & needReg) != 0))
+        {
+            noway_assert(!isRegPairType(tree->gtType));
+
+            genMarkTreeInReg(tree, reg);
+        }
+    }
+
+#endif
+
+    return genMakeAddressable2(tree, needReg, keepReg, forLoadStore, smallOK);
+}
+
+/*****************************************************************************/
+
+bool CodeGen::genIsLocalLastUse(GenTreePtr tree)
+{
+    const LclVarDsc* varDsc = &compiler->lvaTable[tree->gtLclVarCommon.gtLclNum];
+
+    noway_assert(tree->OperGet() == GT_LCL_VAR);
+    noway_assert(varDsc->lvTracked);
+
+    return ((tree->gtFlags & GTF_VAR_DEATH) != 0);
+}
+
+/*****************************************************************************
+ *
+ *  This is genMakeAddressable(GT_ARR_ELEM).
+ *  Makes the array-element addressible and returns the addressibility registers.
+ *  It also marks them as used if keepReg==RegSet::KEEP_REG.
+ *  tree is the dependant tree.
+ *
+ *  Note that an array-element needs 2 registers to be addressibile, the
+ *  array-object and the offset. This function marks gtArrObj and gtArrInds[0]
+ *  with the 2 registers so that other functions (like instGetAddrMode()) know
+ *  where to look for the offset to use.
+ */
+
+regMaskTP CodeGen::genMakeAddrArrElem(GenTreePtr arrElem, GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg)
+{
+    noway_assert(arrElem->gtOper == GT_ARR_ELEM);
+    noway_assert(!tree || tree->gtOper == GT_IND || tree == arrElem);
+
+    /* Evaluate all the operands. We don't evaluate them into registers yet
+       as GT_ARR_ELEM does not reorder the evaluation of the operands, and
+       hence may use a sub-optimal ordering. We try to improve this
+       situation somewhat by accessing the operands in stages
+       (genMakeAddressable2 + genComputeAddressable and
+       genCompIntoFreeReg + genRecoverReg).
+
+       Note: we compute operands into free regs to avoid multiple uses of
+       the same register. Multi-use would cause problems when we free
+       registers in FIFO order instead of the assumed LIFO order that
+       applies to all type of tree nodes except for GT_ARR_ELEM.
+     */
+
+    GenTreePtr arrObj   = arrElem->gtArrElem.gtArrObj;
+    unsigned   rank     = arrElem->gtArrElem.gtArrRank;
+    var_types  elemType = arrElem->gtArrElem.gtArrElemType;
+    regMaskTP  addrReg  = RBM_NONE;
+    regMaskTP  regNeed  = RBM_ALLINT;
+
+#if FEATURE_WRITE_BARRIER && !NOGC_WRITE_BARRIERS
+    // In CodeGen::WriteBarrier we set up ARG_1 followed by ARG_0
+    // since the arrObj participates in the lea/add instruction
+    // that computes ARG_0 we should avoid putting it in ARG_1
+    //
+    if (varTypeIsGC(elemType))
+    {
+        regNeed &= ~RBM_ARG_1;
+    }
+#endif
+
+    // Strip off any comma expression.
+    arrObj = genCodeForCommaTree(arrObj);
+
+    // Having generated the code for the comma, we don't care about it anymore.
+    arrElem->gtArrElem.gtArrObj = arrObj;
+
+    // If the array ref is a stack var that's dying here we have to move it
+    // into a register (regalloc already counts of this), as if it's a GC pointer
+    // it can be collected from here on. This is not an issue for locals that are
+    // in a register, as they get marked as used an will be tracked.
+    // The bug that caused this is #100776. (untracked vars?)
+    if (arrObj->OperGet() == GT_LCL_VAR && compiler->optIsTrackedLocal(arrObj) && genIsLocalLastUse(arrObj) &&
+        !genMarkLclVar(arrObj))
+    {
+        genCodeForTree(arrObj, regNeed);
+        regSet.rsMarkRegUsed(arrObj, 0);
+        addrReg = genRegMask(arrObj->gtRegNum);
+    }
+    else
+    {
+        addrReg = genMakeAddressable2(arrObj, regNeed, RegSet::KEEP_REG,
+                                      true,  // forLoadStore
+                                      false, // smallOK
+                                      false, // deferOK
+                                      true); // evalSideEffs
+    }
+
+    unsigned dim;
+    for (dim = 0; dim < rank; dim++)
+        genCompIntoFreeReg(arrElem->gtArrElem.gtArrInds[dim], RBM_NONE, RegSet::KEEP_REG);
+
+    /* Ensure that the array-object is in a register */
+
+    addrReg = genKeepAddressable(arrObj, addrReg);
+    genComputeAddressable(arrObj, addrReg, RegSet::KEEP_REG, regNeed, RegSet::KEEP_REG);
+
+    regNumber arrReg     = arrObj->gtRegNum;
+    regMaskTP arrRegMask = genRegMask(arrReg);
+    regMaskTP indRegMask = RBM_ALLINT & ~arrRegMask;
+    regSet.rsLockUsedReg(arrRegMask);
+
+    /* Now process all the indices, do the range check, and compute
+       the offset of the element */
+
+    regNumber accReg = DUMMY_INIT(REG_CORRUPT); // accumulates the offset calculation
+
+    for (dim = 0; dim < rank; dim++)
+    {
+        GenTreePtr index = arrElem->gtArrElem.gtArrInds[dim];
+
+        /* Get the index into a free register (other than the register holding the array) */
+
+        genRecoverReg(index, indRegMask, RegSet::KEEP_REG);
+
+#if CPU_LOAD_STORE_ARCH
+        /* Subtract the lower bound, and do the range check */
+
+        regNumber valueReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(arrReg) & ~genRegMask(index->gtRegNum));
+        getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg,
+                                   compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * (dim + rank));
+        regTracker.rsTrackRegTrash(valueReg);
+        getEmitter()->emitIns_R_R(INS_sub, EA_4BYTE, index->gtRegNum, valueReg);
+        regTracker.rsTrackRegTrash(index->gtRegNum);
+
+        getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg,
+                                   compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
+        getEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE, index->gtRegNum, valueReg);
+#else
+        /* Subtract the lower bound, and do the range check */
+        getEmitter()->emitIns_R_AR(INS_sub, EA_4BYTE, index->gtRegNum, arrReg,
+                                   compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * (dim + rank));
+        regTracker.rsTrackRegTrash(index->gtRegNum);
+
+        getEmitter()->emitIns_R_AR(INS_cmp, EA_4BYTE, index->gtRegNum, arrReg,
+                                   compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
+#endif
+        emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
+        genJumpToThrowHlpBlk(jmpGEU, SCK_RNGCHK_FAIL);
+
+        if (dim == 0)
+        {
+            /* Hang on to the register of the first index */
+
+            noway_assert(accReg == DUMMY_INIT(REG_CORRUPT));
+            accReg = index->gtRegNum;
+            noway_assert(accReg != arrReg);
+            regSet.rsLockUsedReg(genRegMask(accReg));
+        }
+        else
+        {
+            /* Evaluate accReg = accReg*dim_size + index */
+
+            noway_assert(accReg != DUMMY_INIT(REG_CORRUPT));
+#if CPU_LOAD_STORE_ARCH
+            getEmitter()->emitIns_R_AR(INS_ldr, EA_4BYTE, valueReg, arrReg,
+                                       compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
+            regTracker.rsTrackRegTrash(valueReg);
+            getEmitter()->emitIns_R_R(INS_MUL, EA_4BYTE, accReg, valueReg);
+#else
+            getEmitter()->emitIns_R_AR(INS_MUL, EA_4BYTE, accReg, arrReg,
+                                       compiler->eeGetArrayDataOffset(elemType) + sizeof(int) * dim);
+#endif
+
+            inst_RV_RV(INS_add, accReg, index->gtRegNum);
+            regSet.rsMarkRegFree(index->gtRegNum, index);
+            regTracker.rsTrackRegTrash(accReg);
+        }
+    }
+
+    if (!jitIsScaleIndexMul(arrElem->gtArrElem.gtArrElemSize))
+    {
+        regNumber sizeReg = genGetRegSetToIcon(arrElem->gtArrElem.gtArrElemSize);
+
+        getEmitter()->emitIns_R_R(INS_MUL, EA_4BYTE, accReg, sizeReg);
+        regTracker.rsTrackRegTrash(accReg);
+    }
+
+    regSet.rsUnlockUsedReg(genRegMask(arrReg));
+    regSet.rsUnlockUsedReg(genRegMask(accReg));
+
+    regSet.rsMarkRegFree(genRegMask(arrReg));
+    regSet.rsMarkRegFree(genRegMask(accReg));
+
+    if (keepReg == RegSet::KEEP_REG)
+    {
+        /* We mark the addressability registers on arrObj and gtArrInds[0].
+           instGetAddrMode() knows to work with this. */
+
+        regSet.rsMarkRegUsed(arrObj, tree);
+        regSet.rsMarkRegUsed(arrElem->gtArrElem.gtArrInds[0], tree);
+    }
+
+    return genRegMask(arrReg) | genRegMask(accReg);
+}
+
+/*****************************************************************************
+ *
+ *  Make sure the given tree is addressable.  'needReg' is a mask that indicates
+ *  the set of registers we would prefer the destination tree to be computed
+ *  into (RBM_NONE means no preference).
+ *
+ *  'tree' can subsequently be used with the inst_XX_TT() family of functions.
+ *
+ *  If 'keepReg' is RegSet::KEEP_REG, we mark any registers the addressability depends
+ *  on as used, and return the mask for that register set (if no registers
+ *  are marked as used, RBM_NONE is returned).
+ *
+ *  If 'smallOK' is not true and the datatype being address is a byte or short,
+ *  then the tree is forced into a register.  This is useful when the machine
+ *  instruction being emitted does not have a byte or short version.
+ *
+ *  The "deferOK" parameter indicates the mode of operation - when it's false,
+ *  upon returning an actual address mode must have been formed (i.e. it must
+ *  be possible to immediately call one of the inst_TT methods to operate on
+ *  the value). When "deferOK" is true, we do whatever it takes to be ready
+ *  to form the address mode later - for example, if an index address mode on
+ *  a particular CPU requires the use of a specific register, we usually don't
+ *  want to immediately grab that register for an address mode that will only
+ *  be needed later. The convention is to call genMakeAddressable() with
+ *  "deferOK" equal to true, do whatever work is needed to prepare the other
+ *  operand, call genMakeAddressable() with "deferOK" equal to false, and
+ *  finally call one of the inst_TT methods right after that.
+ *
+ *  If we do any other codegen after genMakeAddressable(tree) which can
+ *  potentially spill the addressability registers, genKeepAddressable()
+ *  needs to be called before accessing the tree again.
+ *
+ *  genDoneAddressable() needs to be called when we are done with the tree
+ *  to free the addressability registers.
+ */
+
+regMaskTP CodeGen::genMakeAddressable(
+    GenTreePtr tree, regMaskTP needReg, RegSet::KeepReg keepReg, bool smallOK, bool deferOK)
+{
+    GenTreePtr addr = NULL;
+    regMaskTP  regMask;
+
+    /* Is the value simply sitting in a register? */
+
+    if (tree->gtFlags & GTF_REG_VAL)
+    {
+        genUpdateLife(tree);
+
+        goto GOT_VAL;
+    }
+
+    // TODO: If the value is for example a cast of float -> int, compute
+    // TODO: the converted value into a stack temp, and leave it there,
+    // TODO: since stack temps are always addressable. This would require
+    // TODO: recording the fact that a particular tree is in a stack temp.
+
+    /* byte/char/short operand -- is this acceptable to the caller? */
+
+    if (varTypeIsSmall(tree->TypeGet()) && !smallOK)
+        goto EVAL_TREE;
+
+    // Evaluate non-last elements of comma expressions, to get to the last.
+    tree = genCodeForCommaTree(tree);
+
+    switch (tree->gtOper)
+    {
+        case GT_LCL_FLD:
+
+            // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
+            // to worry about it being enregistered.
+            noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
+
+            genUpdateLife(tree);
+            return 0;
+
+        case GT_LCL_VAR:
+
+            if (!genMarkLclVar(tree))
+            {
+                genUpdateLife(tree);
+                return 0;
+            }
+
+            __fallthrough; // it turns out the variable lives in a register
+
+        case GT_REG_VAR:
+
+            genUpdateLife(tree);
+
+            goto GOT_VAL;
+
+        case GT_CLS_VAR:
+
+            return 0;
+
+        case GT_CNS_INT:
+#ifdef _TARGET_64BIT_
+            // Non-relocs will be sign extended, so we don't have to enregister
+            // constants that are equivalent to a sign-extended int.
+            // Relocs can be left alone if they are RIP-relative.
+            if ((genTypeSize(tree->TypeGet()) > 4) &&
+                (!tree->IsIntCnsFitsInI32() ||
+                 (tree->IsIconHandle() &&
+                  (IMAGE_REL_BASED_REL32 != compiler->eeGetRelocTypeHint((void*)tree->gtIntCon.gtIconVal)))))
+            {
+                break;
+            }
+#endif // _TARGET_64BIT_
+            __fallthrough;
+
+        case GT_CNS_LNG:
+        case GT_CNS_DBL:
+            // For MinOpts, we don't do constant folding, so we have
+            // constants showing up in places we don't like.
+            // force them into a register now to prevent that.
+            if (compiler->opts.OptEnabled(CLFLG_CONSTANTFOLD))
+                return 0;
+            break;
+
+        case GT_IND:
+        case GT_NULLCHECK:
+
+            /* Try to make the address directly addressable */
+
+            if (genMakeIndAddrMode(tree->gtOp.gtOp1, tree, false, /* not for LEA */
+                                   needReg, keepReg, &regMask, deferOK))
+            {
+                genUpdateLife(tree);
+                return regMask;
+            }
+
+            /* No good, we'll have to load the address into a register */
+
+            addr = tree;
+            tree = tree->gtOp.gtOp1;
+            break;
+
+        default:
+            break;
+    }
+
+EVAL_TREE:
+
+    /* Here we need to compute the value 'tree' into a register */
+
+    genCodeForTree(tree, needReg);
+
+GOT_VAL:
+
+    noway_assert(tree->gtFlags & GTF_REG_VAL);
+
+    if (isRegPairType(tree->gtType))
+    {
+        /* Are we supposed to hang on to the register? */
+
+        if (keepReg == RegSet::KEEP_REG)
+            regSet.rsMarkRegPairUsed(tree);
+
+        regMask = genRegPairMask(tree->gtRegPair);
+    }
+    else
+    {
+        /* Are we supposed to hang on to the register? */
+
+        if (keepReg == RegSet::KEEP_REG)
+            regSet.rsMarkRegUsed(tree, addr);
+
+        regMask = genRegMask(tree->gtRegNum);
+    }
+
+    return regMask;
+}
+
+/*****************************************************************************
+ *  Compute a tree (which was previously made addressable using
+ *  genMakeAddressable()) into a register.
+ *  needReg - mask of preferred registers.
+ *  keepReg - should the computed register be marked as used by the tree
+ *  freeOnly - target register needs to be a scratch register
+ */
+
+void CodeGen::genComputeAddressable(GenTreePtr      tree,
+                                    regMaskTP       addrReg,
+                                    RegSet::KeepReg keptReg,
+                                    regMaskTP       needReg,
+                                    RegSet::KeepReg keepReg,
+                                    bool            freeOnly)
+{
+    noway_assert(genStillAddressable(tree));
+    noway_assert(varTypeIsIntegralOrI(tree->TypeGet()));
+
+    genDoneAddressable(tree, addrReg, keptReg);
+
+    regNumber reg;
+
+    if (tree->gtFlags & GTF_REG_VAL)
+    {
+        reg = tree->gtRegNum;
+
+        if (freeOnly && !(genRegMask(reg) & regSet.rsRegMaskFree()))
+            goto MOVE_REG;
+    }
+    else
+    {
+        if (tree->OperIsConst())
+        {
+            /* Need to handle consts separately as we don't want to emit
+              "mov reg, 0" (emitter doesn't like that). Also, genSetRegToIcon()
+              handles consts better for SMALL_CODE */
+
+            noway_assert(tree->IsCnsIntOrI());
+            reg = genGetRegSetToIcon(tree->gtIntCon.gtIconVal, needReg, tree->gtType);
+        }
+        else
+        {
+        MOVE_REG:
+            reg = regSet.rsPickReg(needReg);
+
+            inst_RV_TT(INS_mov, reg, tree);
+            regTracker.rsTrackRegTrash(reg);
+        }
+    }
+
+    genMarkTreeInReg(tree, reg);
+
+    if (keepReg == RegSet::KEEP_REG)
+        regSet.rsMarkRegUsed(tree);
+    else
+        gcInfo.gcMarkRegPtrVal(tree);
+}
+
+/*****************************************************************************
+ *  Should be similar to genMakeAddressable() but gives more control.
+ */
+
+regMaskTP CodeGen::genMakeAddressable2(GenTreePtr      tree,
+                                       regMaskTP       needReg,
+                                       RegSet::KeepReg keepReg,
+                                       bool            forLoadStore,
+                                       bool            smallOK,
+                                       bool            deferOK,
+                                       bool            evalSideEffs)
+
+{
+    bool evalToReg = false;
+
+    if (evalSideEffs && (tree->gtOper == GT_IND) && (tree->gtFlags & GTF_EXCEPT))
+        evalToReg = true;
+
+#if CPU_LOAD_STORE_ARCH
+    if (!forLoadStore)
+        evalToReg = true;
+#endif
+
+    if (evalToReg)
+    {
+        genCodeForTree(tree, needReg);
+
+        noway_assert(tree->gtFlags & GTF_REG_VAL);
+
+        if (isRegPairType(tree->gtType))
+        {
+            /* Are we supposed to hang on to the register? */
+
+            if (keepReg == RegSet::KEEP_REG)
+                regSet.rsMarkRegPairUsed(tree);
+
+            return genRegPairMask(tree->gtRegPair);
+        }
+        else
+        {
+            /* Are we supposed to hang on to the register? */
+
+            if (keepReg == RegSet::KEEP_REG)
+                regSet.rsMarkRegUsed(tree);
+
+            return genRegMask(tree->gtRegNum);
+        }
+    }
+    else
+    {
+        return genMakeAddressable(tree, needReg, keepReg, smallOK, deferOK);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  The given tree was previously passed to genMakeAddressable(); return
+ *  'true' if the operand is still addressable.
+ */
+
+// inline
+bool CodeGen::genStillAddressable(GenTreePtr tree)
+{
+    /* Has the value (or one or more of its sub-operands) been spilled? */
+
+    if (tree->gtFlags & (GTF_SPILLED | GTF_SPILLED_OPER))
+        return false;
+
+    return true;
+}
+
+/*****************************************************************************
+ *
+ *  Recursive helper to restore complex address modes. The 'lockPhase'
+ *  argument indicates whether we're in the 'lock' or 'reload' phase.
+ */
+
+regMaskTP CodeGen::genRestoreAddrMode(GenTreePtr addr, GenTreePtr tree, bool lockPhase)
+{
+    regMaskTP regMask = RBM_NONE;
+
+    /* Have we found a spilled value? */
+
+    if (tree->gtFlags & GTF_SPILLED)
+    {
+        /* Do nothing if we're locking, otherwise reload and lock */
+
+        if (!lockPhase)
+        {
+            /* Unspill the register */
+
+            regSet.rsUnspillReg(tree, 0, RegSet::FREE_REG);
+
+            /* The value should now be sitting in a register */
+
+            noway_assert(tree->gtFlags & GTF_REG_VAL);
+            regMask = genRegMask(tree->gtRegNum);
+
+            /* Mark the register as used for the address */
+
+            regSet.rsMarkRegUsed(tree, addr);
+
+            /* Lock the register until we're done with the entire address */
+
+            regSet.rsMaskLock |= regMask;
+        }
+
+        return regMask;
+    }
+
+    /* Is this sub-tree sitting in a register? */
+
+    if (tree->gtFlags & GTF_REG_VAL)
+    {
+        regMask = genRegMask(tree->gtRegNum);
+
+        /* Lock the register if we're in the locking phase */
+
+        if (lockPhase)
+            regSet.rsMaskLock |= regMask;
+    }
+    else
+    {
+        /* Process any sub-operands of this node */
+
+        unsigned kind = tree->OperKind();
+
+        if (kind & GTK_SMPOP)
+        {
+            /* Unary/binary operator */
+
+            if (tree->gtOp.gtOp1)
+                regMask |= genRestoreAddrMode(addr, tree->gtOp.gtOp1, lockPhase);
+            if (tree->gtGetOp2())
+                regMask |= genRestoreAddrMode(addr, tree->gtOp.gtOp2, lockPhase);
+        }
+        else if (tree->gtOper == GT_ARR_ELEM)
+        {
+            /* gtArrObj is the array-object and gtArrInds[0] is marked with the register
+               which holds the offset-calculation */
+
+            regMask |= genRestoreAddrMode(addr, tree->gtArrElem.gtArrObj, lockPhase);
+            regMask |= genRestoreAddrMode(addr, tree->gtArrElem.gtArrInds[0], lockPhase);
+        }
+        else if (tree->gtOper == GT_CMPXCHG)
+        {
+            regMask |= genRestoreAddrMode(addr, tree->gtCmpXchg.gtOpLocation, lockPhase);
+        }
+        else
+        {
+            /* Must be a leaf/constant node */
+
+            noway_assert(kind & (GTK_LEAF | GTK_CONST));
+        }
+    }
+
+    return regMask;
+}
+
+/*****************************************************************************
+ *
+ *  The given tree was previously passed to genMakeAddressable, but since then
+ *  some of its registers are known to have been spilled; do whatever it takes
+ *  to make the operand addressable again (typically by reloading any spilled
+ *  registers).
+ */
+
+regMaskTP CodeGen::genRestAddressable(GenTreePtr tree, regMaskTP addrReg, regMaskTP lockMask)
+{
+    noway_assert((regSet.rsMaskLock & lockMask) == lockMask);
+
+    /* Is this a 'simple' register spill? */
+
+    if (tree->gtFlags & GTF_SPILLED)
+    {
+        /* The mask must match the original register/regpair */
+
+        if (isRegPairType(tree->gtType))
+        {
+            noway_assert(addrReg == genRegPairMask(tree->gtRegPair));
+
+            regSet.rsUnspillRegPair(tree, /* restore it anywhere */ RBM_NONE, RegSet::KEEP_REG);
+
+            addrReg = genRegPairMask(tree->gtRegPair);
+        }
+        else
+        {
+            noway_assert(addrReg == genRegMask(tree->gtRegNum));
+
+            regSet.rsUnspillReg(tree, /* restore it anywhere */ RBM_NONE, RegSet::KEEP_REG);
+
+            addrReg = genRegMask(tree->gtRegNum);
+        }
+
+        noway_assert((regSet.rsMaskLock & lockMask) == lockMask);
+        regSet.rsMaskLock -= lockMask;
+
+        return addrReg;
+    }
+
+    /* We have a complex address mode with some of its sub-operands spilled */
+
+    noway_assert((tree->gtFlags & GTF_REG_VAL) == 0);
+    noway_assert((tree->gtFlags & GTF_SPILLED_OPER) != 0);
+
+    /*
+        We'll proceed in several phases:
+
+         1. Lock any registers that are part of the address mode and
+            have not been spilled. This prevents these registers from
+            getting spilled in step 2.
+
+         2. Reload any registers that have been spilled; lock each
+            one right after it is reloaded.
+
+         3. Unlock all the registers.
+     */
+
+    addrReg = genRestoreAddrMode(tree, tree, true);
+    addrReg |= genRestoreAddrMode(tree, tree, false);
+
+    /* Unlock all registers that the address mode uses */
+
+    lockMask |= addrReg;
+
+    noway_assert((regSet.rsMaskLock & lockMask) == lockMask);
+    regSet.rsMaskLock -= lockMask;
+
+    return addrReg;
+}
+
+/*****************************************************************************
+ *
+ *  The given tree was previously passed to genMakeAddressable, but since then
+ *  some of its registers might have been spilled ('addrReg' is the set of
+ *  registers used by the address). This function makes sure the operand is
+ *  still addressable (while avoiding any of the registers in 'avoidMask'),
+ *  and returns the (possibly modified) set of registers that are used by
+ *  the address (these will be marked as used on exit).
+ */
+
+regMaskTP CodeGen::genKeepAddressable(GenTreePtr tree, regMaskTP addrReg, regMaskTP avoidMask)
+{
+    /* Is the operand still addressable? */
+
+    tree = tree->gtEffectiveVal(/*commaOnly*/ true); // Strip off commas for this purpose.
+
+    if (!genStillAddressable(tree))
+    {
+        if (avoidMask)
+        {
+            // Temporarily lock 'avoidMask' while we restore addressability
+            // genRestAddressable will unlock the 'avoidMask' for us
+            // avoidMask must already be marked as a used reg in regSet.rsMaskUsed
+            // In regSet.rsRegMaskFree() we require that all locked register be marked as used
+            //
+            regSet.rsLockUsedReg(avoidMask);
+        }
+
+        addrReg = genRestAddressable(tree, addrReg, avoidMask);
+
+        noway_assert((regSet.rsMaskLock & avoidMask) == 0);
+    }
+
+    return addrReg;
+}
+
+/*****************************************************************************
+ *
+ *  After we're finished with the given operand (which was previously marked
+ *  by calling genMakeAddressable), this function must be called to free any
+ *  registers that may have been used by the address.
+ *  keptReg indicates if the addressability registers were marked as used
+ *  by genMakeAddressable().
+ */
+
+void CodeGen::genDoneAddressable(GenTreePtr tree, regMaskTP addrReg, RegSet::KeepReg keptReg)
+{
+    if (keptReg == RegSet::FREE_REG)
+    {
+        // We exclude regSet.rsMaskUsed since the registers may be multi-used.
+        // ie. There may be a pending use in a higher-up tree.
+
+        addrReg &= ~regSet.rsMaskUsed;
+
+        /* addrReg was not marked as used. So just reset its GC info */
+        if (addrReg)
+        {
+            gcInfo.gcMarkRegSetNpt(addrReg);
+        }
+    }
+    else
+    {
+        /* addrReg was marked as used. So we need to free it up (which
+           will also reset its GC info) */
+
+        regSet.rsMarkRegFree(addrReg);
+    }
+}
+
+/*****************************************************************************/
+/*****************************************************************************
+ *
+ *  Make sure the given floating point value is addressable, and return a tree
+ *  that will yield the value as an addressing mode (this tree may differ from
+ *  the one passed in, BTW). If the only way to make the value addressable is
+ *  to evaluate into the FP stack, we do this and return zero.
+ */
+
+GenTreePtr CodeGen::genMakeAddrOrFPstk(GenTreePtr tree, regMaskTP* regMaskPtr, bool roundResult)
+{
+    *regMaskPtr = 0;
+
+    switch (tree->gtOper)
+    {
+        case GT_LCL_VAR:
+        case GT_LCL_FLD:
+        case GT_CLS_VAR:
+            return tree;
+
+        case GT_CNS_DBL:
+            if (tree->gtType == TYP_FLOAT)
+            {
+                float f = forceCastToFloat(tree->gtDblCon.gtDconVal);
+                return genMakeConst(&f, TYP_FLOAT, tree, false);
+            }
+            return genMakeConst(&tree->gtDblCon.gtDconVal, tree->gtType, tree, true);
+
+        case GT_IND:
+        case GT_NULLCHECK:
+
+            /* Try to make the address directly addressable */
+
+            if (genMakeIndAddrMode(tree->gtOp.gtOp1, tree, false, /* not for LEA */
+                                   0, RegSet::FREE_REG, regMaskPtr, false))
+            {
+                genUpdateLife(tree);
+                return tree;
+            }
+
+            break;
+
+        default:
+            break;
+    }
+#if FEATURE_STACK_FP_X87
+    /* We have no choice but to compute the value 'tree' onto the FP stack */
+
+    genCodeForTreeFlt(tree);
+#endif
+    return 0;
+}
+
+/*****************************************************************************/
+/*****************************************************************************
+ *
+ *  Display a string literal value (debug only).
+ */
+
+#ifdef DEBUG
+#endif
+
+/*****************************************************************************
+ *
+ *   Generate code to check that the GS cookie wasn't thrashed by a buffer
+ *   overrun.  If pushReg is true, preserve all registers around code sequence.
+ *   Otherwise, ECX maybe modified.
+ *
+ *   TODO-ARM-Bug?: pushReg is not implemented (is it needed for ARM?)
+ */
+void CodeGen::genEmitGSCookieCheck(bool pushReg)
+{
+    // Make sure that EAX didn't die in the return expression
+    if (!pushReg && (compiler->info.compRetType == TYP_REF))
+        gcInfo.gcRegGCrefSetCur |= RBM_INTRET;
+
+    // Add cookie check code for unsafe buffers
+    BasicBlock* gsCheckBlk;
+    regMaskTP   byrefPushedRegs = RBM_NONE;
+    regMaskTP   norefPushedRegs = RBM_NONE;
+    regMaskTP   pushedRegs      = RBM_NONE;
+
+    noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
+
+    if (compiler->gsGlobalSecurityCookieAddr == NULL)
+    {
+        // JIT case
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if CPU_LOAD_STORE_ARCH
+
+        regNumber reg = regSet.rsGrabReg(RBM_ALLINT);
+        getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, reg, compiler->lvaGSSecurityCookie, 0);
+        regTracker.rsTrackRegTrash(reg);
+
+        if (arm_Valid_Imm_For_Alu(compiler->gsGlobalSecurityCookieVal) ||
+            arm_Valid_Imm_For_Alu(~compiler->gsGlobalSecurityCookieVal))
+        {
+            getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg, compiler->gsGlobalSecurityCookieVal);
+        }
+        else
+        {
+            // Load CookieVal into a register
+            regNumber immReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
+            instGen_Set_Reg_To_Imm(EA_4BYTE, immReg, compiler->gsGlobalSecurityCookieVal);
+            getEmitter()->emitIns_R_R(INS_cmp, EA_4BYTE, reg, immReg);
+        }
+#else
+        getEmitter()->emitIns_S_I(INS_cmp, EA_PTRSIZE, compiler->lvaGSSecurityCookie, 0,
+                                  (int)compiler->gsGlobalSecurityCookieVal);
+#endif
+    }
+    else
+    {
+        regNumber regGSCheck;
+        regMaskTP regMaskGSCheck;
+#if CPU_LOAD_STORE_ARCH
+        regGSCheck     = regSet.rsGrabReg(RBM_ALLINT);
+        regMaskGSCheck = genRegMask(regGSCheck);
+#else
+        // Don't pick the 'this' register
+        if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvRegister &&
+            (compiler->lvaTable[compiler->info.compThisArg].lvRegNum == REG_ECX))
+        {
+            regGSCheck     = REG_EDX;
+            regMaskGSCheck = RBM_EDX;
+        }
+        else
+        {
+            regGSCheck     = REG_ECX;
+            regMaskGSCheck = RBM_ECX;
+        }
+
+        // NGen case
+        if (pushReg && (regMaskGSCheck & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock)))
+        {
+            pushedRegs = genPushRegs(regMaskGSCheck, &byrefPushedRegs, &norefPushedRegs);
+        }
+        else
+        {
+            noway_assert((regMaskGSCheck & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock)) == 0);
+        }
+#endif
+#if defined(_TARGET_ARM_)
+        instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSCheck, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+        getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, regGSCheck, regGSCheck, 0);
+#else
+        getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, regGSCheck, FLD_GLOBAL_DS,
+                                  (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+#endif // !_TARGET_ARM_
+        regTracker.rsTrashRegSet(regMaskGSCheck);
+#ifdef _TARGET_ARM_
+        regNumber regTmp = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regGSCheck));
+        getEmitter()->emitIns_R_S(INS_ldr, EA_PTRSIZE, regTmp, compiler->lvaGSSecurityCookie, 0);
+        regTracker.rsTrackRegTrash(regTmp);
+        getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, regTmp, regGSCheck);
+#else
+        getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
+#endif
+    }
+
+    gsCheckBlk            = genCreateTempLabel();
+    emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+    inst_JMP(jmpEqual, gsCheckBlk);
+    genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN);
+    genDefineTempLabel(gsCheckBlk);
+
+    genPopRegs(pushedRegs, byrefPushedRegs, norefPushedRegs);
+}
+
+/*****************************************************************************
+ *
+ *  Generate any side effects within the given expression tree.
+ */
+
+void CodeGen::genEvalSideEffects(GenTreePtr tree)
+{
+    genTreeOps oper;
+    unsigned   kind;
+
+AGAIN:
+
+    /* Does this sub-tree contain any side-effects? */
+    if (tree->gtFlags & GTF_SIDE_EFFECT)
+    {
+#if FEATURE_STACK_FP_X87
+        /* Remember the current FP stack level */
+        int iTemps = genNumberTemps();
+#endif
+        if (tree->OperIsIndir())
+        {
+            regMaskTP addrReg = genMakeAddressable(tree, RBM_ALLINT, RegSet::KEEP_REG, true, false);
+
+            if (tree->gtFlags & GTF_REG_VAL)
+            {
+                gcInfo.gcMarkRegPtrVal(tree);
+                genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
+            }
+            // GTF_IND_RNGCHK trees have already de-referenced the pointer, and so
+            // do not need an additional null-check
+            /* Do this only if the GTF_EXCEPT or GTF_IND_VOLATILE flag is set on the indir */
+            else if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0 && ((tree->gtFlags & GTF_EXCEPT) | GTF_IND_VOLATILE))
+            {
+                /* Compare against any register to do null-check */
+                CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_XARCH_)
+                inst_TT_RV(INS_cmp, tree, REG_TMP_0, 0, EA_1BYTE);
+                genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
+#elif CPU_LOAD_STORE_ARCH
+                if (varTypeIsFloating(tree->TypeGet()))
+                {
+                    genComputeAddressableFloat(tree, addrReg, RBM_NONE, RegSet::KEEP_REG, RBM_ALLFLOAT,
+                                               RegSet::FREE_REG);
+                }
+                else
+                {
+                    genComputeAddressable(tree, addrReg, RegSet::KEEP_REG, RBM_NONE, RegSet::FREE_REG);
+                }
+#ifdef _TARGET_ARM_
+                if (tree->gtFlags & GTF_IND_VOLATILE)
+                {
+                    // Emit a memory barrier instruction after the load
+                    instGen_MemoryBarrier();
+                }
+#endif
+#else
+                NYI("TARGET");
+#endif
+            }
+            else
+            {
+                genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
+            }
+        }
+        else
+        {
+            /* Generate the expression and throw it away */
+            genCodeForTree(tree, RBM_ALL(tree->TypeGet()));
+            if (tree->gtFlags & GTF_REG_VAL)
+            {
+                gcInfo.gcMarkRegPtrVal(tree);
+            }
+        }
+#if FEATURE_STACK_FP_X87
+        /* If the tree computed a value on the FP stack, pop the stack */
+        if (genNumberTemps() > iTemps)
+        {
+            noway_assert(genNumberTemps() == iTemps + 1);
+            genDiscardStackFP(tree);
+        }
+#endif
+        return;
+    }
+
+    noway_assert(tree->gtOper != GT_ASG);
+
+    /* Walk the tree, just to mark any dead values appropriately */
+
+    oper = tree->OperGet();
+    kind = tree->OperKind();
+
+    /* Is this a constant or leaf node? */
+
+    if (kind & (GTK_CONST | GTK_LEAF))
+    {
+#if FEATURE_STACK_FP_X87
+        if (tree->IsRegVar() && isFloatRegType(tree->gtType) && tree->IsRegVarDeath())
+        {
+            genRegVarDeathStackFP(tree);
+            FlatFPX87_Unload(&compCurFPState, tree->gtRegNum);
+        }
+#endif
+        genUpdateLife(tree);
+        gcInfo.gcMarkRegPtrVal(tree);
+        return;
+    }
+
+    /* Must be a 'simple' unary/binary operator */
+
+    noway_assert(kind & GTK_SMPOP);
+
+    if (tree->gtGetOp2())
+    {
+        genEvalSideEffects(tree->gtOp.gtOp1);
+
+        tree = tree->gtOp.gtOp2;
+        goto AGAIN;
+    }
+    else
+    {
+        tree = tree->gtOp.gtOp1;
+        if (tree)
+            goto AGAIN;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  A persistent pointer value is being overwritten, record it for the GC.
+ *
+ *  tgt        : the destination being written to
+ *  assignVal  : the value being assigned (the source). It must currently be in a register.
+ *  tgtAddrReg : the set of registers being used by "tgt"
+ *
+ *  Returns    : the mask of the scratch register that was used.
+ *               RBM_NONE if a write-barrier is not needed.
+ */
+
+regMaskTP CodeGen::WriteBarrier(GenTreePtr tgt, GenTreePtr assignVal, regMaskTP tgtAddrReg)
+{
+    noway_assert(assignVal->gtFlags & GTF_REG_VAL);
+
+    GCInfo::WriteBarrierForm wbf = gcInfo.gcIsWriteBarrierCandidate(tgt, assignVal);
+    if (wbf == GCInfo::WBF_NoBarrier)
+        return RBM_NONE;
+
+    regMaskTP resultRegMask = RBM_NONE;
+
+#if FEATURE_WRITE_BARRIER
+
+    regNumber reg = assignVal->gtRegNum;
+
+#if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
+#ifdef DEBUG
+    if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug) // This one is always a call to a C++ method.
+    {
+#endif
+        const static int regToHelper[2][8] = {
+            // If the target is known to be in managed memory
+            {
+                CORINFO_HELP_ASSIGN_REF_EAX, CORINFO_HELP_ASSIGN_REF_ECX, -1, CORINFO_HELP_ASSIGN_REF_EBX, -1,
+                CORINFO_HELP_ASSIGN_REF_EBP, CORINFO_HELP_ASSIGN_REF_ESI, CORINFO_HELP_ASSIGN_REF_EDI,
+            },
+
+            // Don't know if the target is in managed memory
+            {
+                CORINFO_HELP_CHECKED_ASSIGN_REF_EAX, CORINFO_HELP_CHECKED_ASSIGN_REF_ECX, -1,
+                CORINFO_HELP_CHECKED_ASSIGN_REF_EBX, -1, CORINFO_HELP_CHECKED_ASSIGN_REF_EBP,
+                CORINFO_HELP_CHECKED_ASSIGN_REF_ESI, CORINFO_HELP_CHECKED_ASSIGN_REF_EDI,
+            },
+        };
+
+        noway_assert(regToHelper[0][REG_EAX] == CORINFO_HELP_ASSIGN_REF_EAX);
+        noway_assert(regToHelper[0][REG_ECX] == CORINFO_HELP_ASSIGN_REF_ECX);
+        noway_assert(regToHelper[0][REG_EBX] == CORINFO_HELP_ASSIGN_REF_EBX);
+        noway_assert(regToHelper[0][REG_ESP] == -1);
+        noway_assert(regToHelper[0][REG_EBP] == CORINFO_HELP_ASSIGN_REF_EBP);
+        noway_assert(regToHelper[0][REG_ESI] == CORINFO_HELP_ASSIGN_REF_ESI);
+        noway_assert(regToHelper[0][REG_EDI] == CORINFO_HELP_ASSIGN_REF_EDI);
+
+        noway_assert(regToHelper[1][REG_EAX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EAX);
+        noway_assert(regToHelper[1][REG_ECX] == CORINFO_HELP_CHECKED_ASSIGN_REF_ECX);
+        noway_assert(regToHelper[1][REG_EBX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBX);
+        noway_assert(regToHelper[1][REG_ESP] == -1);
+        noway_assert(regToHelper[1][REG_EBP] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBP);
+        noway_assert(regToHelper[1][REG_ESI] == CORINFO_HELP_CHECKED_ASSIGN_REF_ESI);
+        noway_assert(regToHelper[1][REG_EDI] == CORINFO_HELP_CHECKED_ASSIGN_REF_EDI);
+
+        noway_assert((reg != REG_ESP) && (reg != REG_WRITE_BARRIER));
+
+        /*
+            Generate the following code:
+
+                    lea     edx, tgt
+                    call    write_barrier_helper_reg
+
+            First grab the RBM_WRITE_BARRIER register for the target address.
+         */
+
+        regNumber rg1;
+        bool      trashOp1;
+
+        if ((tgtAddrReg & RBM_WRITE_BARRIER) == 0)
+        {
+            rg1 = regSet.rsGrabReg(RBM_WRITE_BARRIER);
+
+            regSet.rsMaskUsed |= RBM_WRITE_BARRIER;
+            regSet.rsMaskLock |= RBM_WRITE_BARRIER;
+
+            trashOp1 = false;
+        }
+        else
+        {
+            rg1 = REG_WRITE_BARRIER;
+
+            trashOp1 = true;
+        }
+
+        noway_assert(rg1 == REG_WRITE_BARRIER);
+
+        /* Generate "lea EDX, [addr-mode]" */
+
+        noway_assert(tgt->gtType == TYP_REF);
+        tgt->gtType = TYP_BYREF;
+        inst_RV_TT(INS_lea, rg1, tgt, 0, EA_BYREF);
+
+        /* Free up anything that was tied up by the LHS */
+        genDoneAddressable(tgt, tgtAddrReg, RegSet::KEEP_REG);
+
+        // In case "tgt" was a comma:
+        tgt = tgt->gtEffectiveVal();
+
+        regTracker.rsTrackRegTrash(rg1);
+        gcInfo.gcMarkRegSetNpt(genRegMask(rg1));
+        gcInfo.gcMarkRegPtrVal(rg1, TYP_BYREF);
+
+        /* Call the proper vm helper */
+
+        // enforced by gcIsWriteBarrierCandidate
+        noway_assert(tgt->gtOper == GT_IND || tgt->gtOper == GT_CLS_VAR);
+
+        unsigned tgtAnywhere = 0;
+        if ((tgt->gtOper == GT_IND) &&
+            ((tgt->gtFlags & GTF_IND_TGTANYWHERE) || (tgt->gtOp.gtOp1->TypeGet() == TYP_I_IMPL)))
+        {
+            tgtAnywhere = 1;
+        }
+
+        int helper    = regToHelper[tgtAnywhere][reg];
+        resultRegMask = genRegMask(reg);
+
+        gcInfo.gcMarkRegSetNpt(RBM_WRITE_BARRIER); // byref EDX is killed in the call
+
+        genEmitHelperCall(helper,
+                          0,           // argSize
+                          EA_PTRSIZE); // retSize
+
+        if (!trashOp1)
+        {
+            regSet.rsMaskUsed &= ~RBM_WRITE_BARRIER;
+            regSet.rsMaskLock &= ~RBM_WRITE_BARRIER;
+        }
+
+        return resultRegMask;
+
+#ifdef DEBUG
+    }
+    else
+#endif
+#endif // defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
+
+#if defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS)
+    {
+        /*
+            Generate the following code (or its equivalent on the given target):
+
+                    mov     arg1, srcReg
+                    lea     arg0, tgt
+                    call    write_barrier_helper
+
+            First, setup REG_ARG_1 with the GC ref that we are storing via the Write Barrier
+         */
+
+        if (reg != REG_ARG_1)
+        {
+            // We may need to spill whatever is in the ARG_1 register
+            //
+            if ((regSet.rsMaskUsed & RBM_ARG_1) != 0)
+            {
+                regSet.rsSpillReg(REG_ARG_1);
+            }
+
+            inst_RV_RV(INS_mov, REG_ARG_1, reg, TYP_REF);
+        }
+        resultRegMask = RBM_ARG_1;
+
+        regTracker.rsTrackRegTrash(REG_ARG_1);
+        gcInfo.gcMarkRegSetNpt(REG_ARG_1);
+        gcInfo.gcMarkRegSetGCref(RBM_ARG_1); // gcref in ARG_1
+
+        bool free_arg1 = false;
+        if ((regSet.rsMaskUsed & RBM_ARG_1) == 0)
+        {
+            regSet.rsMaskUsed |= RBM_ARG_1;
+            free_arg1 = true;
+        }
+
+        // Then we setup REG_ARG_0 with the target address to store into via the Write Barrier
+
+        /* Generate "lea R0, [addr-mode]" */
+
+        noway_assert(tgt->gtType == TYP_REF);
+        tgt->gtType = TYP_BYREF;
+
+        tgtAddrReg = genKeepAddressable(tgt, tgtAddrReg);
+
+        // We may need to spill whatever is in the ARG_0 register
+        //
+        if (((tgtAddrReg & RBM_ARG_0) == 0) &&        // tgtAddrReg does not contain REG_ARG_0
+            ((regSet.rsMaskUsed & RBM_ARG_0) != 0) && // and regSet.rsMaskUsed contains REG_ARG_0
+            (reg != REG_ARG_0)) // unless REG_ARG_0 contains the REF value being written, which we're finished with.
+        {
+            regSet.rsSpillReg(REG_ARG_0);
+        }
+
+        inst_RV_TT(INS_lea, REG_ARG_0, tgt, 0, EA_BYREF);
+
+        /* Free up anything that was tied up by the LHS */
+        genDoneAddressable(tgt, tgtAddrReg, RegSet::KEEP_REG);
+
+        regTracker.rsTrackRegTrash(REG_ARG_0);
+        gcInfo.gcMarkRegSetNpt(REG_ARG_0);
+        gcInfo.gcMarkRegSetByref(RBM_ARG_0); // byref in ARG_0
+
+#ifdef _TARGET_ARM_
+#if NOGC_WRITE_BARRIERS
+        // Finally, we may be required to spill whatever is in the further argument registers
+        // trashed by the call. The write barrier trashes some further registers --
+        // either the standard volatile var set, or, if we're using assembly barriers, a more specialized set.
+
+        regMaskTP volatileRegsTrashed = RBM_CALLEE_TRASH_NOGC;
+#else
+        regMaskTP volatileRegsTrashed = RBM_CALLEE_TRASH;
+#endif
+        // Spill any other registers trashed by the write barrier call and currently in use.
+        regMaskTP mustSpill = (volatileRegsTrashed & regSet.rsMaskUsed & ~(RBM_ARG_0 | RBM_ARG_1));
+        if (mustSpill)
+            regSet.rsSpillRegs(mustSpill);
+#endif // _TARGET_ARM_
+
+        bool free_arg0 = false;
+        if ((regSet.rsMaskUsed & RBM_ARG_0) == 0)
+        {
+            regSet.rsMaskUsed |= RBM_ARG_0;
+            free_arg0 = true;
+        }
+
+        // genEmitHelperCall might need to grab a register
+        // so don't let it spill one of the arguments
+        //
+        regMaskTP reallyUsedRegs = RBM_NONE;
+        regSet.rsLockReg(RBM_ARG_0 | RBM_ARG_1, &reallyUsedRegs);
+
+        genGCWriteBarrier(tgt, wbf);
+
+        regSet.rsUnlockReg(RBM_ARG_0 | RBM_ARG_1, reallyUsedRegs);
+        gcInfo.gcMarkRegSetNpt(RBM_ARG_0 | RBM_ARG_1); // byref ARG_0 and reg ARG_1 are killed by the call
+
+        if (free_arg0)
+        {
+            regSet.rsMaskUsed &= ~RBM_ARG_0;
+        }
+        if (free_arg1)
+        {
+            regSet.rsMaskUsed &= ~RBM_ARG_1;
+        }
+
+        return resultRegMask;
+    }
+#endif // defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS)
+
+#else // !FEATURE_WRITE_BARRIER
+
+    NYI("FEATURE_WRITE_BARRIER unimplemented");
+    return resultRegMask;
+
+#endif // !FEATURE_WRITE_BARRIER
+}
+
+#ifdef _TARGET_X86_
+/*****************************************************************************
+ *
+ *  Generate the appropriate conditional jump(s) right after the low 32 bits
+ *  of two long values have been compared.
+ */
+
+void CodeGen::genJccLongHi(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool isUnsigned)
+{
+    if (cmp != GT_NE)
+    {
+        jumpFalse->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+    }
+
+    switch (cmp)
+    {
+        case GT_EQ:
+            inst_JMP(EJ_jne, jumpFalse);
+            break;
+
+        case GT_NE:
+            inst_JMP(EJ_jne, jumpTrue);
+            break;
+
+        case GT_LT:
+        case GT_LE:
+            if (isUnsigned)
+            {
+                inst_JMP(EJ_ja, jumpFalse);
+                inst_JMP(EJ_jb, jumpTrue);
+            }
+            else
+            {
+                inst_JMP(EJ_jg, jumpFalse);
+                inst_JMP(EJ_jl, jumpTrue);
+            }
+            break;
+
+        case GT_GE:
+        case GT_GT:
+            if (isUnsigned)
+            {
+                inst_JMP(EJ_jb, jumpFalse);
+                inst_JMP(EJ_ja, jumpTrue);
+            }
+            else
+            {
+                inst_JMP(EJ_jl, jumpFalse);
+                inst_JMP(EJ_jg, jumpTrue);
+            }
+            break;
+
+        default:
+            noway_assert(!"expected a comparison operator");
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Generate the appropriate conditional jump(s) right after the high 32 bits
+ *  of two long values have been compared.
+ */
+
+void CodeGen::genJccLongLo(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse)
+{
+    switch (cmp)
+    {
+        case GT_EQ:
+            inst_JMP(EJ_je, jumpTrue);
+            break;
+
+        case GT_NE:
+            inst_JMP(EJ_jne, jumpTrue);
+            break;
+
+        case GT_LT:
+            inst_JMP(EJ_jb, jumpTrue);
+            break;
+
+        case GT_LE:
+            inst_JMP(EJ_jbe, jumpTrue);
+            break;
+
+        case GT_GE:
+            inst_JMP(EJ_jae, jumpTrue);
+            break;
+
+        case GT_GT:
+            inst_JMP(EJ_ja, jumpTrue);
+            break;
+
+        default:
+            noway_assert(!"expected comparison");
+    }
+}
+#elif defined(_TARGET_ARM_)
+/*****************************************************************************
+*
+*  Generate the appropriate conditional jump(s) right after the low 32 bits
+*  of two long values have been compared.
+*/
+
+void CodeGen::genJccLongHi(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool isUnsigned)
+{
+    if (cmp != GT_NE)
+    {
+        jumpFalse->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+    }
+
+    switch (cmp)
+    {
+        case GT_EQ:
+            inst_JMP(EJ_ne, jumpFalse);
+            break;
+
+        case GT_NE:
+            inst_JMP(EJ_ne, jumpTrue);
+            break;
+
+        case GT_LT:
+        case GT_LE:
+            if (isUnsigned)
+            {
+                inst_JMP(EJ_hi, jumpFalse);
+                inst_JMP(EJ_lo, jumpTrue);
+            }
+            else
+            {
+                inst_JMP(EJ_gt, jumpFalse);
+                inst_JMP(EJ_lt, jumpTrue);
+            }
+            break;
+
+        case GT_GE:
+        case GT_GT:
+            if (isUnsigned)
+            {
+                inst_JMP(EJ_lo, jumpFalse);
+                inst_JMP(EJ_hi, jumpTrue);
+            }
+            else
+            {
+                inst_JMP(EJ_lt, jumpFalse);
+                inst_JMP(EJ_gt, jumpTrue);
+            }
+            break;
+
+        default:
+            noway_assert(!"expected a comparison operator");
+    }
+}
+
+/*****************************************************************************
+*
+*  Generate the appropriate conditional jump(s) right after the high 32 bits
+*  of two long values have been compared.
+*/
+
+void CodeGen::genJccLongLo(genTreeOps cmp, BasicBlock* jumpTrue, BasicBlock* jumpFalse)
+{
+    switch (cmp)
+    {
+        case GT_EQ:
+            inst_JMP(EJ_eq, jumpTrue);
+            break;
+
+        case GT_NE:
+            inst_JMP(EJ_ne, jumpTrue);
+            break;
+
+        case GT_LT:
+            inst_JMP(EJ_lo, jumpTrue);
+            break;
+
+        case GT_LE:
+            inst_JMP(EJ_ls, jumpTrue);
+            break;
+
+        case GT_GE:
+            inst_JMP(EJ_hs, jumpTrue);
+            break;
+
+        case GT_GT:
+            inst_JMP(EJ_hi, jumpTrue);
+            break;
+
+        default:
+            noway_assert(!"expected comparison");
+    }
+}
+#endif
+/*****************************************************************************
+ *
+ *  Called by genCondJump() for TYP_LONG.
+ */
+
+void CodeGen::genCondJumpLng(GenTreePtr cond, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool bFPTransition)
+{
+    noway_assert(jumpTrue && jumpFalse);
+    noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == false); // Done in genCondJump()
+    noway_assert(cond->gtOp.gtOp1->gtType == TYP_LONG);
+
+    GenTreePtr op1 = cond->gtOp.gtOp1;
+    GenTreePtr op2 = cond->gtOp.gtOp2;
+    genTreeOps cmp = cond->OperGet();
+
+    regMaskTP addrReg;
+
+    /* Are we comparing against a constant? */
+
+    if (op2->gtOper == GT_CNS_LNG)
+    {
+        __int64   lval = op2->gtLngCon.gtLconVal;
+        regNumber rTmp;
+
+        // We're "done" evaluating op2; let's strip any commas off op1 before we
+        // evaluate it.
+        op1 = genCodeForCommaTree(op1);
+
+        /* We can generate better code for some special cases */
+        instruction ins              = INS_invalid;
+        bool        useIncToSetFlags = false;
+        bool        specialCaseCmp   = false;
+
+        if (cmp == GT_EQ)
+        {
+            if (lval == 0)
+            {
+                /* op1 == 0  */
+                ins              = INS_OR;
+                useIncToSetFlags = false;
+                specialCaseCmp   = true;
+            }
+            else if (lval == -1)
+            {
+                /* op1 == -1 */
+                ins              = INS_AND;
+                useIncToSetFlags = true;
+                specialCaseCmp   = true;
+            }
+        }
+        else if (cmp == GT_NE)
+        {
+            if (lval == 0)
+            {
+                /* op1 != 0  */
+                ins              = INS_OR;
+                useIncToSetFlags = false;
+                specialCaseCmp   = true;
+            }
+            else if (lval == -1)
+            {
+                /* op1 != -1 */
+                ins              = INS_AND;
+                useIncToSetFlags = true;
+                specialCaseCmp   = true;
+            }
+        }
+
+        if (specialCaseCmp)
+        {
+            /* Make the comparand addressable */
+
+            addrReg = genMakeRvalueAddressable(op1, 0, RegSet::KEEP_REG, false, true);
+
+            regMaskTP tmpMask = regSet.rsRegMaskCanGrab();
+            insFlags  flags   = useIncToSetFlags ? INS_FLAGS_DONT_CARE : INS_FLAGS_SET;
+
+            if (op1->gtFlags & GTF_REG_VAL)
+            {
+                regPairNo regPair = op1->gtRegPair;
+                regNumber rLo     = genRegPairLo(regPair);
+                regNumber rHi     = genRegPairHi(regPair);
+                if (tmpMask & genRegMask(rLo))
+                {
+                    rTmp = rLo;
+                }
+                else if (tmpMask & genRegMask(rHi))
+                {
+                    rTmp = rHi;
+                    rHi  = rLo;
+                }
+                else
+                {
+                    rTmp = regSet.rsGrabReg(tmpMask);
+                    inst_RV_RV(INS_mov, rTmp, rLo, TYP_INT);
+                }
+
+                /* The register is now trashed */
+                regTracker.rsTrackRegTrash(rTmp);
+
+                if (rHi != REG_STK)
+                {
+                    /* Set the flags using INS_AND | INS_OR */
+                    inst_RV_RV(ins, rTmp, rHi, TYP_INT, EA_4BYTE, flags);
+                }
+                else
+                {
+                    /* Set the flags using INS_AND | INS_OR */
+                    inst_RV_TT(ins, rTmp, op1, 4, EA_4BYTE, flags);
+                }
+            }
+            else // op1 is not GTF_REG_VAL
+            {
+                rTmp = regSet.rsGrabReg(tmpMask);
+
+                /* Load the low 32-bits of op1 */
+                inst_RV_TT(ins_Load(TYP_INT), rTmp, op1, 0);
+
+                /* The register is now trashed */
+                regTracker.rsTrackRegTrash(rTmp);
+
+                /* Set the flags using INS_AND | INS_OR */
+                inst_RV_TT(ins, rTmp, op1, 4, EA_4BYTE, flags);
+            }
+
+            /* Free up the addrReg(s) if any */
+            genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+            /* compares against -1, also requires an an inc instruction */
+            if (useIncToSetFlags)
+            {
+                /* Make sure the inc will set the flags */
+                assert(cond->gtSetFlags());
+                genIncRegBy(rTmp, 1, cond, TYP_INT);
+            }
+
+#if FEATURE_STACK_FP_X87
+            // We may need a transition block
+            if (bFPTransition)
+            {
+                jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
+            }
+#endif
+            emitJumpKind jmpKind = genJumpKindForOper(cmp, CK_SIGNED);
+            inst_JMP(jmpKind, jumpTrue);
+        }
+        else // specialCaseCmp == false
+        {
+            /* Make the comparand addressable */
+            addrReg = genMakeRvalueAddressable(op1, 0, RegSet::FREE_REG, false, true);
+
+            /* Compare the high part first */
+
+            int ival = (int)(lval >> 32);
+
+            /* Comparing a register against 0 is easier */
+
+            if (!ival && (op1->gtFlags & GTF_REG_VAL) && (rTmp = genRegPairHi(op1->gtRegPair)) != REG_STK)
+            {
+                /* Generate 'test rTmp, rTmp' */
+                instGen_Compare_Reg_To_Zero(emitTypeSize(op1->TypeGet()), rTmp); // set flags
+            }
+            else
+            {
+                if (!(op1->gtFlags & GTF_REG_VAL) && (op1->gtOper == GT_CNS_LNG))
+                {
+                    /* Special case: comparison of two constants */
+                    // Needed as gtFoldExpr() doesn't fold longs
+
+                    noway_assert(addrReg == 0);
+                    int op1_hiword = (int)(op1->gtLngCon.gtLconVal >> 32);
+
+                    /* Get the constant operand into a register */
+                    rTmp = genGetRegSetToIcon(op1_hiword);
+
+                    /* Generate 'cmp rTmp, ival' */
+
+                    inst_RV_IV(INS_cmp, rTmp, ival, EA_4BYTE);
+                }
+                else
+                {
+                    /* Generate 'cmp op1, ival' */
+
+                    inst_TT_IV(INS_cmp, op1, ival, 4);
+                }
+            }
+
+#if FEATURE_STACK_FP_X87
+            // We may need a transition block
+            if (bFPTransition)
+            {
+                jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
+            }
+#endif
+            /* Generate the appropriate jumps */
+
+            if (cond->gtFlags & GTF_UNSIGNED)
+                genJccLongHi(cmp, jumpTrue, jumpFalse, true);
+            else
+                genJccLongHi(cmp, jumpTrue, jumpFalse);
+
+            /* Compare the low part second */
+
+            ival = (int)lval;
+
+            /* Comparing a register against 0 is easier */
+
+            if (!ival && (op1->gtFlags & GTF_REG_VAL) && (rTmp = genRegPairLo(op1->gtRegPair)) != REG_STK)
+            {
+                /* Generate 'test rTmp, rTmp' */
+                instGen_Compare_Reg_To_Zero(emitTypeSize(op1->TypeGet()), rTmp); // set flags
+            }
+            else
+            {
+                if (!(op1->gtFlags & GTF_REG_VAL) && (op1->gtOper == GT_CNS_LNG))
+                {
+                    /* Special case: comparison of two constants */
+                    // Needed as gtFoldExpr() doesn't fold longs
+
+                    noway_assert(addrReg == 0);
+                    int op1_loword = (int)op1->gtLngCon.gtLconVal;
+
+                    /* get the constant operand into a register */
+                    rTmp = genGetRegSetToIcon(op1_loword);
+
+                    /* Generate 'cmp rTmp, ival' */
+
+                    inst_RV_IV(INS_cmp, rTmp, ival, EA_4BYTE);
+                }
+                else
+                {
+                    /* Generate 'cmp op1, ival' */
+
+                    inst_TT_IV(INS_cmp, op1, ival, 0);
+                }
+            }
+
+            /* Generate the appropriate jumps */
+            genJccLongLo(cmp, jumpTrue, jumpFalse);
+
+            genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
+        }
+    }
+    else // (op2->gtOper != GT_CNS_LNG)
+    {
+
+        /* The operands would be reversed by physically swapping them */
+
+        noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == 0);
+
+        /* Generate the first operand into a register pair */
+
+        genComputeRegPair(op1, REG_PAIR_NONE, op2->gtRsvdRegs, RegSet::KEEP_REG, false);
+        noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+#if CPU_LOAD_STORE_ARCH
+        /* Generate the second operand into a register pair */
+        // Fix 388442 ARM JitStress WP7
+        genComputeRegPair(op2, REG_PAIR_NONE, genRegPairMask(op1->gtRegPair), RegSet::KEEP_REG, false);
+        noway_assert(op2->gtFlags & GTF_REG_VAL);
+        regSet.rsLockUsedReg(genRegPairMask(op2->gtRegPair));
+#else
+        /* Make the second operand addressable */
+
+        addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT & ~genRegPairMask(op1->gtRegPair), RegSet::KEEP_REG, false);
+#endif
+        /* Make sure the first operand hasn't been spilled */
+
+        genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
+        noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+        regPairNo regPair = op1->gtRegPair;
+
+#if !CPU_LOAD_STORE_ARCH
+        /* Make sure 'op2' is still addressable while avoiding 'op1' (regPair) */
+
+        addrReg = genKeepAddressable(op2, addrReg, genRegPairMask(regPair));
+#endif
+
+#if FEATURE_STACK_FP_X87
+        // We may need a transition block
+        if (bFPTransition)
+        {
+            jumpTrue = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTrue);
+        }
+#endif
+
+        /* Perform the comparison - high parts */
+
+        inst_RV_TT(INS_cmp, genRegPairHi(regPair), op2, 4);
+
+        if (cond->gtFlags & GTF_UNSIGNED)
+            genJccLongHi(cmp, jumpTrue, jumpFalse, true);
+        else
+            genJccLongHi(cmp, jumpTrue, jumpFalse);
+
+        /* Compare the low parts */
+
+        inst_RV_TT(INS_cmp, genRegPairLo(regPair), op2, 0);
+        genJccLongLo(cmp, jumpTrue, jumpFalse);
+
+        /* Free up anything that was tied up by either operand */
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if CPU_LOAD_STORE_ARCH
+
+        // Fix 388442 ARM JitStress WP7
+        regSet.rsUnlockUsedReg(genRegPairMask(op2->gtRegPair));
+        genReleaseRegPair(op2);
+#else
+        genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
+#endif
+        genReleaseRegPair(op1);
+    }
+}
+
+/*****************************************************************************
+ *  gen_fcomp_FN, gen_fcomp_FS_TT, gen_fcompp_FS
+ *  Called by genCondJumpFlt() to generate the fcomp instruction appropriate
+ *  to the architecture we're running on.
+ *
+ *  P5:
+ *  gen_fcomp_FN:     fcomp ST(0), stk
+ *  gen_fcomp_FS_TT:  fcomp ST(0), addr
+ *  gen_fcompp_FS:    fcompp
+ *    These are followed by fnstsw, sahf to get the flags in EFLAGS.
+ *
+ *  P6:
+ *  gen_fcomp_FN:     fcomip ST(0), stk
+ *  gen_fcomp_FS_TT:  fld addr, fcomip ST(0), ST(1), fstp ST(0)
+ *      (and reverse the branch condition since addr comes first)
+ *  gen_fcompp_FS:    fcomip, fstp
+ *    These instructions will correctly set the EFLAGS register.
+ *
+ *  Return value:  These functions return true if the instruction has
+ *    already placed its result in the EFLAGS register.
+ */
+
+bool CodeGen::genUse_fcomip()
+{
+    return compiler->opts.compUseFCOMI;
+}
+
+/*****************************************************************************
+ *
+ *  Sets the flag for the TYP_INT/TYP_REF comparison.
+ *  We try to use the flags if they have already been set by a prior
+ *  instruction.
+ *  eg. i++; if(i<0) {}  Here, the "i++;" will have set the sign flag. We don't
+ *                       need to compare again with zero. Just use a "INS_js"
+ *
+ *  Returns the flags the following jump/set instruction should use.
+ */
+
+emitJumpKind CodeGen::genCondSetFlags(GenTreePtr cond)
+{
+    noway_assert(cond->OperIsCompare());
+    noway_assert(varTypeIsI(genActualType(cond->gtOp.gtOp1->gtType)));
+
+    GenTreePtr op1 = cond->gtOp.gtOp1;
+    GenTreePtr op2 = cond->gtOp.gtOp2;
+    genTreeOps cmp = cond->OperGet();
+
+    if (cond->gtFlags & GTF_REVERSE_OPS)
+    {
+        /* Don't forget to modify the condition as well */
+
+        cond->gtOp.gtOp1 = op2;
+        cond->gtOp.gtOp2 = op1;
+        cond->SetOper(GenTree::SwapRelop(cmp));
+        cond->gtFlags &= ~GTF_REVERSE_OPS;
+
+        /* Get hold of the new values */
+
+        cmp = cond->OperGet();
+        op1 = cond->gtOp.gtOp1;
+        op2 = cond->gtOp.gtOp2;
+    }
+
+    // Note that op1's type may get bashed. So save it early
+
+    var_types op1Type     = op1->TypeGet();
+    bool      unsignedCmp = (cond->gtFlags & GTF_UNSIGNED) != 0;
+    emitAttr  size        = EA_UNKNOWN;
+
+    regMaskTP    regNeed;
+    regMaskTP    addrReg1 = RBM_NONE;
+    regMaskTP    addrReg2 = RBM_NONE;
+    emitJumpKind jumpKind = EJ_COUNT; // Initialize with an invalid value
+
+    bool byteCmp;
+    bool shortCmp;
+
+    regMaskTP newLiveMask;
+    regNumber op1Reg;
+
+    /* Are we comparing against a constant? */
+
+    if (op2->IsCnsIntOrI())
+    {
+        ssize_t ival = op2->gtIntConCommon.IconValue();
+
+        /* unsigned less than comparisons with 1 ('< 1' )
+           should be transformed into '== 0' to potentially
+           suppress a tst instruction.
+        */
+        if ((ival == 1) && (cmp == GT_LT) && unsignedCmp)
+        {
+            op2->gtIntCon.gtIconVal = ival = 0;
+            cond->gtOper = cmp = GT_EQ;
+        }
+
+        /* Comparisons against 0 can be easier */
+
+        if (ival == 0)
+        {
+            // if we can safely change the comparison to unsigned we do so
+            if (!unsignedCmp && varTypeIsSmall(op1->TypeGet()) && varTypeIsUnsigned(op1->TypeGet()))
+            {
+                unsignedCmp = true;
+            }
+
+            /* unsigned comparisons with 0 should be transformed into
+               '==0' or '!= 0' to potentially suppress a tst instruction. */
+
+            if (unsignedCmp)
+            {
+                if (cmp == GT_GT)
+                    cond->gtOper = cmp = GT_NE;
+                else if (cmp == GT_LE)
+                    cond->gtOper = cmp = GT_EQ;
+            }
+
+            /* Is this a simple zero/non-zero test? */
+
+            if (cmp == GT_EQ || cmp == GT_NE)
+            {
+                /* Is the operand an "AND" operation? */
+
+                if (op1->gtOper == GT_AND)
+                {
+                    GenTreePtr an1 = op1->gtOp.gtOp1;
+                    GenTreePtr an2 = op1->gtOp.gtOp2;
+
+                    /* Check for the case "expr & icon" */
+
+                    if (an2->IsIntCnsFitsInI32())
+                    {
+                        int iVal = (int)an2->gtIntCon.gtIconVal;
+
+                        /* make sure that constant is not out of an1's range */
+
+                        switch (an1->gtType)
+                        {
+                            case TYP_BOOL:
+                            case TYP_BYTE:
+                                if (iVal & 0xffffff00)
+                                    goto NO_TEST_FOR_AND;
+                                break;
+                            case TYP_CHAR:
+                            case TYP_SHORT:
+                                if (iVal & 0xffff0000)
+                                    goto NO_TEST_FOR_AND;
+                                break;
+                            default:
+                                break;
+                        }
+
+                        if (an1->IsCnsIntOrI())
+                        {
+                            // Special case - Both operands of AND are consts
+                            genComputeReg(an1, 0, RegSet::EXACT_REG, RegSet::KEEP_REG);
+                            addrReg1 = genRegMask(an1->gtRegNum);
+                        }
+                        else
+                        {
+                            addrReg1 = genMakeAddressable(an1, RBM_NONE, RegSet::KEEP_REG, true);
+                        }
+#if CPU_LOAD_STORE_ARCH
+                        if ((an1->gtFlags & GTF_REG_VAL) == 0)
+                        {
+                            genComputeAddressable(an1, addrReg1, RegSet::KEEP_REG, RBM_NONE, RegSet::KEEP_REG);
+                            if (arm_Valid_Imm_For_Alu(iVal))
+                            {
+                                inst_RV_IV(INS_TEST, an1->gtRegNum, iVal, emitActualTypeSize(an1->gtType));
+                            }
+                            else
+                            {
+                                regNumber regTmp = regSet.rsPickFreeReg();
+                                instGen_Set_Reg_To_Imm(EmitSize(an2), regTmp, iVal);
+                                inst_RV_RV(INS_TEST, an1->gtRegNum, regTmp);
+                            }
+                            genReleaseReg(an1);
+                            addrReg1 = RBM_NONE;
+                        }
+                        else
+#endif
+                        {
+#ifdef _TARGET_XARCH_
+                            // Check to see if we can use a smaller immediate.
+                            if ((an1->gtFlags & GTF_REG_VAL) && ((iVal & 0x0000FFFF) == iVal))
+                            {
+                                var_types testType =
+                                    (var_types)(((iVal & 0x000000FF) == iVal) ? TYP_UBYTE : TYP_USHORT);
+#if CPU_HAS_BYTE_REGS
+                                // if we don't have byte-able register, switch to the 2-byte form
+                                if ((testType == TYP_UBYTE) && !(genRegMask(an1->gtRegNum) & RBM_BYTE_REGS))
+                                {
+                                    testType = TYP_USHORT;
+                                }
+#endif // CPU_HAS_BYTE_REGS
+
+                                inst_TT_IV(INS_TEST, an1, iVal, testType);
+                            }
+                            else
+#endif // _TARGET_XARCH_
+                            {
+                                inst_TT_IV(INS_TEST, an1, iVal);
+                            }
+                        }
+
+                        goto DONE;
+
+                    NO_TEST_FOR_AND:;
+                    }
+
+                    // TODO: Check for other cases that can generate 'test',
+                    // TODO: also check for a 64-bit integer zero test which
+                    // TODO: could generate 'or lo, hi' followed by jz/jnz.
+                }
+            }
+
+            // See what Jcc instruction we would use if we can take advantage of
+            // the knowledge of EFLAGs.
+
+            if (unsignedCmp)
+            {
+                /*
+                    Unsigned comparison to 0. Using this table:
+
+                    ----------------------------------------------------
+                    | Comparison | Flags Checked    | Instruction Used |
+                    ----------------------------------------------------
+                    |    == 0    | ZF = 1           |       je         |
+                    ----------------------------------------------------
+                    |    != 0    | ZF = 0           |       jne        |
+                    ----------------------------------------------------
+                    |     < 0    | always FALSE     |       N/A        |
+                    ----------------------------------------------------
+                    |    <= 0    | ZF = 1           |       je         |
+                    ----------------------------------------------------
+                    |    >= 0    | always TRUE      |       N/A        |
+                    ----------------------------------------------------
+                    |     > 0    | ZF = 0           |       jne        |
+                    ----------------------------------------------------
+                */
+                switch (cmp)
+                {
+#ifdef _TARGET_ARM_
+                    case GT_EQ:
+                        jumpKind = EJ_eq;
+                        break;
+                    case GT_NE:
+                        jumpKind = EJ_ne;
+                        break;
+                    case GT_LT:
+                        jumpKind = EJ_NONE;
+                        break;
+                    case GT_LE:
+                        jumpKind = EJ_eq;
+                        break;
+                    case GT_GE:
+                        jumpKind = EJ_NONE;
+                        break;
+                    case GT_GT:
+                        jumpKind = EJ_ne;
+                        break;
+#elif defined(_TARGET_X86_)
+                    case GT_EQ:
+                        jumpKind = EJ_je;
+                        break;
+                    case GT_NE:
+                        jumpKind = EJ_jne;
+                        break;
+                    case GT_LT:
+                        jumpKind = EJ_NONE;
+                        break;
+                    case GT_LE:
+                        jumpKind = EJ_je;
+                        break;
+                    case GT_GE:
+                        jumpKind = EJ_NONE;
+                        break;
+                    case GT_GT:
+                        jumpKind = EJ_jne;
+                        break;
+#endif // TARGET
+                    default:
+                        noway_assert(!"Unexpected comparison OpCode");
+                        break;
+                }
+            }
+            else
+            {
+                /*
+                    Signed comparison to 0. Using this table:
+
+                    -----------------------------------------------------
+                    | Comparison | Flags Checked     | Instruction Used |
+                    -----------------------------------------------------
+                    |    == 0    | ZF = 1            |       je         |
+                    -----------------------------------------------------
+                    |    != 0    | ZF = 0            |       jne        |
+                    -----------------------------------------------------
+                    |     < 0    | SF = 1            |       js         |
+                    -----------------------------------------------------
+                    |    <= 0    |      N/A          |       N/A        |
+                    -----------------------------------------------------
+                    |    >= 0    | SF = 0            |       jns        |
+                    -----------------------------------------------------
+                    |     > 0    |      N/A          |       N/A        |
+                    -----------------------------------------------------
+                */
+
+                switch (cmp)
+                {
+#ifdef _TARGET_ARM_
+                    case GT_EQ:
+                        jumpKind = EJ_eq;
+                        break;
+                    case GT_NE:
+                        jumpKind = EJ_ne;
+                        break;
+                    case GT_LT:
+                        jumpKind = EJ_mi;
+                        break;
+                    case GT_LE:
+                        jumpKind = EJ_NONE;
+                        break;
+                    case GT_GE:
+                        jumpKind = EJ_pl;
+                        break;
+                    case GT_GT:
+                        jumpKind = EJ_NONE;
+                        break;
+#elif defined(_TARGET_X86_)
+                    case GT_EQ:
+                        jumpKind = EJ_je;
+                        break;
+                    case GT_NE:
+                        jumpKind = EJ_jne;
+                        break;
+                    case GT_LT:
+                        jumpKind = EJ_js;
+                        break;
+                    case GT_LE:
+                        jumpKind = EJ_NONE;
+                        break;
+                    case GT_GE:
+                        jumpKind = EJ_jns;
+                        break;
+                    case GT_GT:
+                        jumpKind = EJ_NONE;
+                        break;
+#endif // TARGET
+                    default:
+                        noway_assert(!"Unexpected comparison OpCode");
+                        break;
+                }
+                assert(jumpKind == genJumpKindForOper(cmp, CK_LOGICAL));
+            }
+            assert(jumpKind != EJ_COUNT); // Ensure that it was assigned a valid value above
+
+            /* Is the value a simple local variable? */
+
+            if (op1->gtOper == GT_LCL_VAR)
+            {
+                /* Is the flags register set to the value? */
+
+                if (genFlagsAreVar(op1->gtLclVarCommon.gtLclNum))
+                {
+                    if (jumpKind != EJ_NONE)
+                    {
+                        addrReg1 = RBM_NONE;
+                        genUpdateLife(op1);
+                        goto DONE_FLAGS;
+                    }
+                }
+            }
+
+            /* Make the comparand addressable */
+            addrReg1 = genMakeRvalueAddressable(op1, RBM_NONE, RegSet::KEEP_REG, false, true);
+
+            /* Are the condition flags set based on the value? */
+
+            unsigned flags = (op1->gtFlags & GTF_ZSF_SET);
+
+            if (op1->gtFlags & GTF_REG_VAL)
+            {
+                if (genFlagsAreReg(op1->gtRegNum))
+                {
+                    flags |= GTF_ZSF_SET;
+                }
+            }
+
+            if (flags)
+            {
+                if (jumpKind != EJ_NONE)
+                {
+                    goto DONE_FLAGS;
+                }
+            }
+
+            /* Is the value in a register? */
+
+            if (op1->gtFlags & GTF_REG_VAL)
+            {
+                regNumber reg = op1->gtRegNum;
+
+                /* With a 'test' we can do any signed test or any test for equality */
+
+                if (!(cond->gtFlags & GTF_UNSIGNED) || cmp == GT_EQ || cmp == GT_NE)
+                {
+                    emitAttr compareSize = emitTypeSize(op1->TypeGet());
+
+                    // If we have an GT_REG_VAR then the register will be properly sign/zero extended
+                    // But only up to 4 bytes
+                    if ((op1->gtOper == GT_REG_VAR) && (compareSize < EA_4BYTE))
+                    {
+                        compareSize = EA_4BYTE;
+                    }
+
+#if CPU_HAS_BYTE_REGS
+                    // Make sure if we require a byte compare that we have a byte-able register
+                    if ((compareSize != EA_1BYTE) || ((genRegMask(op1->gtRegNum) & RBM_BYTE_REGS) != 0))
+#endif // CPU_HAS_BYTE_REGS
+                    {
+                        /* Generate 'test reg, reg' */
+                        instGen_Compare_Reg_To_Zero(compareSize, reg);
+                        goto DONE;
+                    }
+                }
+            }
+        }
+
+        else // if (ival != 0)
+        {
+            bool smallOk = true;
+
+            /* make sure that constant is not out of op1's range
+               if it is, we need to perform an int with int comparison
+               and therefore, we set smallOk to false, so op1 gets loaded
+               into a register
+            */
+
+            /* If op1 is TYP_SHORT, and is followed by an unsigned
+             * comparison, we can use smallOk. But we don't know which
+             * flags will be needed. This probably doesn't happen often.
+            */
+            var_types gtType = op1->TypeGet();
+
+            switch (gtType)
+            {
+                case TYP_BYTE:
+                    if (ival != (signed char)ival)
+                        smallOk = false;
+                    break;
+                case TYP_BOOL:
+                case TYP_UBYTE:
+                    if (ival != (unsigned char)ival)
+                        smallOk = false;
+                    break;
+
+                case TYP_SHORT:
+                    if (ival != (signed short)ival)
+                        smallOk = false;
+                    break;
+                case TYP_CHAR:
+                    if (ival != (unsigned short)ival)
+                        smallOk = false;
+                    break;
+
+#ifdef _TARGET_64BIT_
+                case TYP_INT:
+                    if (!FitsIn<INT32>(ival))
+                        smallOk = false;
+                    break;
+                case TYP_UINT:
+                    if (!FitsIn<UINT32>(ival))
+                        smallOk = false;
+                    break;
+#endif // _TARGET_64BIT_
+
+                default:
+                    break;
+            }
+
+            if (smallOk &&                 // constant is in op1's range
+                !unsignedCmp &&            // signed comparison
+                varTypeIsSmall(gtType) &&  // smalltype var
+                varTypeIsUnsigned(gtType)) // unsigned type
+            {
+                unsignedCmp = true;
+            }
+
+            /* Make the comparand addressable */
+            addrReg1 = genMakeRvalueAddressable(op1, RBM_NONE, RegSet::KEEP_REG, false, smallOk);
+        }
+
+        // #if defined(DEBUGGING_SUPPORT)
+
+        /* Special case: comparison of two constants */
+
+        // Needed if Importer doesn't call gtFoldExpr()
+
+        if (!(op1->gtFlags & GTF_REG_VAL) && (op1->IsCnsIntOrI()))
+        {
+            // noway_assert(compiler->opts.MinOpts() || compiler->opts.compDbgCode);
+
+            /* Workaround: get the constant operand into a register */
+            genComputeReg(op1, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
+
+            noway_assert(addrReg1 == RBM_NONE);
+            noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+            addrReg1 = genRegMask(op1->gtRegNum);
+        }
+
+        // #endif
+
+        /* Compare the operand against the constant */
+
+        if (op2->IsIconHandle())
+        {
+            inst_TT_IV(INS_cmp, op1, ival, 0, EA_HANDLE_CNS_RELOC);
+        }
+        else
+        {
+            inst_TT_IV(INS_cmp, op1, ival);
+        }
+        goto DONE;
+    }
+
+    //---------------------------------------------------------------------
+    //
+    // We reach here if op2 was not a GT_CNS_INT
+    //
+
+    byteCmp  = false;
+    shortCmp = false;
+
+    if (op1Type == op2->gtType)
+    {
+        shortCmp = varTypeIsShort(op1Type);
+        byteCmp  = varTypeIsByte(op1Type);
+    }
+
+    noway_assert(op1->gtOper != GT_CNS_INT);
+
+    if (op2->gtOper == GT_LCL_VAR)
+        genMarkLclVar(op2);
+
+    assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
+    assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
+
+    /* Are we comparing against a register? */
+
+    if (op2->gtFlags & GTF_REG_VAL)
+    {
+        /* Make the comparands addressable and mark as used */
+
+        assert(addrReg1 == RBM_NONE);
+        addrReg1 = genMakeAddressable2(op1, RBM_NONE, RegSet::KEEP_REG, false, true);
+
+        /* Is the size of the comparison byte/char/short ? */
+
+        if (varTypeIsSmall(op1->TypeGet()))
+        {
+            /* Is op2 sitting in an appropriate register? */
+
+            if (varTypeIsByte(op1->TypeGet()) && !isByteReg(op2->gtRegNum))
+                goto NO_SMALL_CMP;
+
+            /* Is op2 of the right type for a small comparison */
+
+            if (op2->gtOper == GT_REG_VAR)
+            {
+                if (op1->gtType != compiler->lvaGetRealType(op2->gtRegVar.gtLclNum))
+                    goto NO_SMALL_CMP;
+            }
+            else
+            {
+                if (op1->gtType != op2->gtType)
+                    goto NO_SMALL_CMP;
+            }
+
+            if (varTypeIsUnsigned(op1->TypeGet()))
+                unsignedCmp = true;
+        }
+
+        assert(addrReg2 == RBM_NONE);
+
+        genComputeReg(op2, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
+        addrReg2 = genRegMask(op2->gtRegNum);
+        addrReg1 = genKeepAddressable(op1, addrReg1, addrReg2);
+        assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
+        assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
+
+        /* Compare against the register */
+
+        inst_TT_RV(INS_cmp, op1, op2->gtRegNum);
+
+        goto DONE;
+
+    NO_SMALL_CMP:
+
+        // op1 has been made addressable and is marked as in use
+        // op2 is un-generated
+        assert(addrReg2 == 0);
+
+        if ((op1->gtFlags & GTF_REG_VAL) == 0)
+        {
+            regNumber reg1 = regSet.rsPickReg();
+
+            noway_assert(varTypeIsSmall(op1->TypeGet()));
+            instruction ins = ins_Move_Extend(op1->TypeGet(), (op1->gtFlags & GTF_REG_VAL) != 0);
+
+            // regSet.rsPickReg can cause one of the trees within this address mode to get spilled
+            // so we need to make sure it is still valid.  Note that at this point, reg1 is
+            // *not* marked as in use, and it is possible for it to be used in the address
+            // mode expression, but that is OK, because we are done with expression after
+            // this.  We only need reg1.
+            addrReg1 = genKeepAddressable(op1, addrReg1);
+            inst_RV_TT(ins, reg1, op1);
+            regTracker.rsTrackRegTrash(reg1);
+
+            genDoneAddressable(op1, addrReg1, RegSet::KEEP_REG);
+            addrReg1 = 0;
+
+            genMarkTreeInReg(op1, reg1);
+
+            regSet.rsMarkRegUsed(op1);
+            addrReg1 = genRegMask(op1->gtRegNum);
+        }
+
+        assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
+        assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
+
+        goto DONE_OP1;
+    }
+
+    // We come here if op2 is not enregistered or not in a "good" register.
+
+    assert(addrReg1 == 0);
+
+    // Determine what registers go live between op1 and op2
+    newLiveMask = genNewLiveRegMask(op1, op2);
+
+    // Setup regNeed with the set of register that we suggest for op1 to be in
+    //
+    regNeed = RBM_ALLINT;
+
+    // avoid selecting registers that get newly born in op2
+    regNeed = regSet.rsNarrowHint(regNeed, ~newLiveMask);
+
+    // avoid selecting op2 reserved regs
+    regNeed = regSet.rsNarrowHint(regNeed, ~op2->gtRsvdRegs);
+
+#if CPU_HAS_BYTE_REGS
+    // if necessary setup regNeed to select just the byte-able registers
+    if (byteCmp)
+        regNeed = regSet.rsNarrowHint(RBM_BYTE_REGS, regNeed);
+#endif // CPU_HAS_BYTE_REGS
+
+    // Compute the first comparand into some register, regNeed here is simply a hint because RegSet::ANY_REG is used.
+    //
+    genComputeReg(op1, regNeed, RegSet::ANY_REG, RegSet::FREE_REG);
+    noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+    op1Reg = op1->gtRegNum;
+
+    // Setup regNeed with the set of register that we require for op1 to be in
+    //
+    regNeed = RBM_ALLINT;
+
+#if CPU_HAS_BYTE_REGS
+    // if necessary setup regNeed to select just the byte-able registers
+    if (byteCmp)
+        regNeed &= RBM_BYTE_REGS;
+#endif // CPU_HAS_BYTE_REGS
+
+    // avoid selecting registers that get newly born in op2, as using them will force a spill temp to be used.
+    regNeed = regSet.rsMustExclude(regNeed, newLiveMask);
+
+    // avoid selecting op2 reserved regs, as using them will force a spill temp to be used.
+    regNeed = regSet.rsMustExclude(regNeed, op2->gtRsvdRegs);
+
+    // Did we end up in an acceptable register?
+    // and do we have an acceptable free register available to grab?
+    //
+    if (((genRegMask(op1Reg) & regNeed) == 0) && ((regSet.rsRegMaskFree() & regNeed) != 0))
+    {
+        // Grab an acceptable register
+        regNumber newReg = regSet.rsGrabReg(regNeed);
+
+        noway_assert(op1Reg != newReg);
+
+        /* Update the value in the target register */
+
+        regTracker.rsTrackRegCopy(newReg, op1Reg);
+
+        inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
+
+        /* The value has been transferred to 'reg' */
+
+        if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
+            gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
+
+        gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
+
+        /* The value is now in an appropriate register */
+
+        op1->gtRegNum = newReg;
+    }
+    noway_assert(op1->gtFlags & GTF_REG_VAL);
+    op1Reg = op1->gtRegNum;
+
+    genUpdateLife(op1);
+
+    /* Mark the register as 'used' */
+    regSet.rsMarkRegUsed(op1);
+
+    addrReg1 = genRegMask(op1Reg);
+
+    assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
+    assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
+
+DONE_OP1:
+
+    assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
+    assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
+    noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+    // Setup regNeed with either RBM_ALLINT or the RBM_BYTE_REGS subset
+    // when byteCmp is true we will perform a byte sized cmp instruction
+    // and that instruction requires that any registers used are byte-able ones.
+    //
+    regNeed = RBM_ALLINT;
+
+#if CPU_HAS_BYTE_REGS
+    // if necessary setup regNeed to select just the byte-able registers
+    if (byteCmp)
+        regNeed &= RBM_BYTE_REGS;
+#endif // CPU_HAS_BYTE_REGS
+
+    /* Make the comparand addressable */
+    assert(addrReg2 == 0);
+    addrReg2 = genMakeRvalueAddressable(op2, regNeed, RegSet::KEEP_REG, false, (byteCmp | shortCmp));
+
+    /*  Make sure the first operand is still in a register; if
+        it's been spilled, we have to make sure it's reloaded
+        into a byte-addressable register if needed.
+        Pass keepReg=RegSet::KEEP_REG. Otherwise get pointer lifetimes wrong.
+     */
+
+    assert(addrReg1 != 0);
+    genRecoverReg(op1, regNeed, RegSet::KEEP_REG);
+
+    noway_assert(op1->gtFlags & GTF_REG_VAL);
+    noway_assert(!byteCmp || isByteReg(op1->gtRegNum));
+
+    addrReg1 = genRegMask(op1->gtRegNum);
+    regSet.rsLockUsedReg(addrReg1);
+
+    /* Make sure that op2 is addressable. If we are going to do a
+       byte-comparison, we need it to be in a byte register. */
+
+    if (byteCmp && (op2->gtFlags & GTF_REG_VAL))
+    {
+        genRecoverReg(op2, regNeed, RegSet::KEEP_REG);
+        addrReg2 = genRegMask(op2->gtRegNum);
+    }
+    else
+    {
+        addrReg2 = genKeepAddressable(op2, addrReg2);
+    }
+
+    regSet.rsUnlockUsedReg(addrReg1);
+
+    assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
+    assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
+
+    if (byteCmp || shortCmp)
+    {
+        size = emitTypeSize(op2->TypeGet());
+        if (varTypeIsUnsigned(op1Type))
+            unsignedCmp = true;
+    }
+    else
+    {
+        size = emitActualTypeSize(op2->TypeGet());
+    }
+
+    /* Perform the comparison */
+    inst_RV_TT(INS_cmp, op1->gtRegNum, op2, 0, size);
+
+DONE:
+
+    jumpKind = genJumpKindForOper(cmp, unsignedCmp ? CK_UNSIGNED : CK_SIGNED);
+
+DONE_FLAGS: // We have determined what jumpKind to use
+
+    genUpdateLife(cond);
+
+    /* The condition value is dead at the jump that follows */
+
+    assert(((addrReg1 | addrReg2) & regSet.rsMaskUsed) == (addrReg1 | addrReg2));
+    assert(((addrReg1 & addrReg2) & regSet.rsMaskMult) == (addrReg1 & addrReg2));
+    genDoneAddressable(op1, addrReg1, RegSet::KEEP_REG);
+    genDoneAddressable(op2, addrReg2, RegSet::KEEP_REG);
+
+    noway_assert(jumpKind != EJ_COUNT); // Ensure that it was assigned a valid value
+
+    return jumpKind;
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+/*****************************************************************************
+ *
+ *  Generate code to jump to the jump target of the current basic block if
+ *  the given relational operator yields 'true'.
+ */
+
+void CodeGen::genCondJump(GenTreePtr cond, BasicBlock* destTrue, BasicBlock* destFalse, bool bStackFPFixup)
+{
+    BasicBlock* jumpTrue;
+    BasicBlock* jumpFalse;
+
+    GenTreePtr op1 = cond->gtOp.gtOp1;
+    GenTreePtr op2 = cond->gtOp.gtOp2;
+    genTreeOps cmp = cond->OperGet();
+
+    if (destTrue)
+    {
+        jumpTrue  = destTrue;
+        jumpFalse = destFalse;
+    }
+    else
+    {
+        noway_assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
+
+        jumpTrue  = compiler->compCurBB->bbJumpDest;
+        jumpFalse = compiler->compCurBB->bbNext;
+    }
+
+    noway_assert(cond->OperIsCompare());
+
+    /* Make sure the more expensive operand is 'op1' */
+    noway_assert((cond->gtFlags & GTF_REVERSE_OPS) == 0);
+
+    if (cond->gtFlags & GTF_REVERSE_OPS) // TODO: note that this is now dead code, since the above is a noway_assert()
+    {
+        /* Don't forget to modify the condition as well */
+
+        cond->gtOp.gtOp1 = op2;
+        cond->gtOp.gtOp2 = op1;
+        cond->SetOper(GenTree::SwapRelop(cmp));
+        cond->gtFlags &= ~GTF_REVERSE_OPS;
+
+        /* Get hold of the new values */
+
+        cmp = cond->OperGet();
+        op1 = cond->gtOp.gtOp1;
+        op2 = cond->gtOp.gtOp2;
+    }
+
+    /* What is the type of the operand? */
+
+    switch (genActualType(op1->gtType))
+    {
+        case TYP_INT:
+        case TYP_REF:
+        case TYP_BYREF:
+            emitJumpKind jumpKind;
+
+            // Check if we can use the currently set flags. Else set them
+
+            jumpKind = genCondSetFlags(cond);
+
+#if FEATURE_STACK_FP_X87
+            if (bStackFPFixup)
+            {
+                genCondJmpInsStackFP(jumpKind, jumpTrue, jumpFalse);
+            }
+            else
+#endif
+            {
+                /* Generate the conditional jump */
+                inst_JMP(jumpKind, jumpTrue);
+            }
+
+            return;
+
+        case TYP_LONG:
+#if FEATURE_STACK_FP_X87
+            if (bStackFPFixup)
+            {
+                genCondJumpLngStackFP(cond, jumpTrue, jumpFalse);
+            }
+            else
+#endif
+            {
+                genCondJumpLng(cond, jumpTrue, jumpFalse);
+            }
+            return;
+
+        case TYP_FLOAT:
+        case TYP_DOUBLE:
+#if FEATURE_STACK_FP_X87
+            genCondJumpFltStackFP(cond, jumpTrue, jumpFalse, bStackFPFixup);
+#else
+            genCondJumpFloat(cond, jumpTrue, jumpFalse);
+#endif
+            return;
+
+        default:
+#ifdef DEBUG
+            compiler->gtDispTree(cond);
+#endif
+            unreached(); // unexpected/unsupported 'jtrue' operands type
+    }
+}
+
+/*****************************************************************************
+ *  Spill registers to check callers can handle it.
+ */
+
+#ifdef DEBUG
+
+void CodeGen::genStressRegs(GenTreePtr tree)
+{
+    if (regSet.rsStressRegs() < 2)
+        return;
+
+    /* Spill as many registers as possible. Callers should be prepared
+       to handle this case.
+       But don't spill trees with no size (TYP_STRUCT comes to mind) */
+
+    {
+        regMaskTP spillRegs = regSet.rsRegMaskCanGrab() & regSet.rsMaskUsed;
+        regNumber regNum;
+        regMaskTP regBit;
+
+        for (regNum = REG_FIRST, regBit = 1; regNum < REG_COUNT; regNum = REG_NEXT(regNum), regBit <<= 1)
+        {
+            if ((spillRegs & regBit) && (regSet.rsUsedTree[regNum] != NULL) &&
+                (genTypeSize(regSet.rsUsedTree[regNum]->TypeGet()) > 0))
+            {
+                regSet.rsSpillReg(regNum);
+
+                spillRegs &= regSet.rsMaskUsed;
+
+                if (!spillRegs)
+                    break;
+            }
+        }
+    }
+
+    regMaskTP trashRegs = regSet.rsRegMaskFree();
+
+    if (trashRegs == RBM_NONE)
+        return;
+
+    /* It is sometimes reasonable to expect that calling genCodeForTree()
+       on certain trees won't spill anything */
+
+    if ((compiler->compCurStmt == compiler->compCurBB->bbTreeList) && (compiler->compCurBB->bbCatchTyp) &&
+        handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp))
+    {
+        trashRegs &= ~(RBM_EXCEPTION_OBJECT);
+    }
+
+    // If genCodeForTree() effectively gets called a second time on the same tree
+
+    if (tree->gtFlags & GTF_REG_VAL)
+    {
+        noway_assert(varTypeIsIntegralOrI(tree->TypeGet()));
+        trashRegs &= ~genRegMask(tree->gtRegNum);
+    }
+
+    if (tree->gtType == TYP_INT && tree->OperIsSimple())
+    {
+        GenTreePtr op1 = tree->gtOp.gtOp1;
+        GenTreePtr op2 = tree->gtOp.gtOp2;
+        if (op1 && (op1->gtFlags & GTF_REG_VAL))
+            trashRegs &= ~genRegMask(op1->gtRegNum);
+        if (op2 && (op2->gtFlags & GTF_REG_VAL))
+            trashRegs &= ~genRegMask(op2->gtRegNum);
+    }
+
+    if (compiler->compCurBB == compiler->genReturnBB)
+    {
+        if (compiler->info.compCallUnmanaged)
+        {
+            LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
+            if (varDsc->lvRegister)
+                trashRegs &= ~genRegMask(varDsc->lvRegNum);
+        }
+    }
+
+    /* Now trash the registers. We use regSet.rsModifiedRegsMask, else we will have
+       to save/restore the register. We try to be as unintrusive
+       as possible */
+
+    noway_assert((REG_INT_LAST - REG_INT_FIRST) == 7);
+    // This is obviously false for ARM, but this function is never called.
+    for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg))
+    {
+        regMaskTP regMask = genRegMask(reg);
+
+        if (regSet.rsRegsModified(regMask & trashRegs))
+            genSetRegToIcon(reg, 0);
+    }
+}
+
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ *  Generate code for a GTK_CONST tree
+ */
+
+void CodeGen::genCodeForTreeConst(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+    noway_assert(tree->IsCnsIntOrI());
+
+    ssize_t   ival    = tree->gtIntConCommon.IconValue();
+    regMaskTP needReg = destReg;
+    regNumber reg;
+    bool      needReloc = compiler->opts.compReloc && tree->IsIconHandle();
+
+#if REDUNDANT_LOAD
+
+    /* If we are targeting destReg and ival is zero           */
+    /* we would rather xor needReg than copy another register */
+
+    if (!needReloc)
+    {
+        bool reuseConstantInReg = false;
+
+        if (destReg == RBM_NONE)
+            reuseConstantInReg = true;
+
+#ifdef _TARGET_ARM_
+        // If we can set a register to a constant with a small encoding, then do that.
+        // Assume we'll get a low register if needReg has low registers as options.
+        if (!reuseConstantInReg &&
+            !arm_Valid_Imm_For_Small_Mov((needReg & RBM_LOW_REGS) ? REG_R0 : REG_R8, ival, INS_FLAGS_DONT_CARE))
+        {
+            reuseConstantInReg = true;
+        }
+#else
+        if (!reuseConstantInReg && ival != 0)
+            reuseConstantInReg = true;
+#endif
+
+        if (reuseConstantInReg)
+        {
+            /* Is the constant already in register? If so, use this register */
+
+            reg = regTracker.rsIconIsInReg(ival);
+            if (reg != REG_NA)
+                goto REG_LOADED;
+        }
+    }
+
+#endif // REDUNDANT_LOAD
+
+    reg = regSet.rsPickReg(needReg, bestReg);
+
+    /* If the constant is a handle, we need a reloc to be applied to it */
+
+    if (needReloc)
+    {
+        instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg, ival);
+        regTracker.rsTrackRegTrash(reg);
+    }
+    else
+    {
+        genSetRegToIcon(reg, ival, tree->TypeGet());
+    }
+
+REG_LOADED:
+
+#ifdef DEBUG
+    /* Special case: GT_CNS_INT - Restore the current live set if it was changed */
+
+    if (!genTempLiveChg)
+    {
+        VarSetOps::Assign(compiler, compiler->compCurLife, genTempOldLife);
+        genTempLiveChg = true;
+    }
+#endif
+
+    gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet()); // In case the handle is a GC object (for eg, frozen strings)
+    genCodeForTree_DONE(tree, reg);
+}
+
+/*****************************************************************************
+ *
+ *  Generate code for a GTK_LEAF tree
+ */
+
+void CodeGen::genCodeForTreeLeaf(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+    genTreeOps oper    = tree->OperGet();
+    regNumber  reg     = DUMMY_INIT(REG_CORRUPT);
+    regMaskTP  regs    = regSet.rsMaskUsed;
+    regMaskTP  needReg = destReg;
+    size_t     size;
+
+    noway_assert(tree->OperKind() & GTK_LEAF);
+
+    switch (oper)
+    {
+        case GT_REG_VAR:
+            NO_WAY("GT_REG_VAR should have been caught above");
+            break;
+
+        case GT_LCL_VAR:
+
+            /* Does the variable live in a register? */
+
+            if (genMarkLclVar(tree))
+            {
+                genCodeForTree_REG_VAR1(tree);
+                return;
+            }
+
+#if REDUNDANT_LOAD
+
+            /* Is the local variable already in register? */
+
+            reg = findStkLclInReg(tree->gtLclVarCommon.gtLclNum);
+
+            if (reg != REG_NA)
+            {
+                /* Use the register the variable happens to be in */
+                regMaskTP regMask = genRegMask(reg);
+
+                // If the register that it was in isn't one of the needRegs
+                // then try to move it into a needReg register
+
+                if (((regMask & needReg) == 0) && (regSet.rsRegMaskCanGrab() & needReg))
+                {
+                    regNumber rg2 = reg;
+                    reg           = regSet.rsPickReg(needReg, bestReg);
+                    if (reg != rg2)
+                    {
+                        regMask = genRegMask(reg);
+                        inst_RV_RV(INS_mov, reg, rg2, tree->TypeGet());
+                    }
+                }
+
+                gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet());
+                regTracker.rsTrackRegLclVar(reg, tree->gtLclVarCommon.gtLclNum);
+                break;
+            }
+
+#endif
+            goto MEM_LEAF;
+
+        case GT_LCL_FLD:
+
+            // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
+            // to worry about it being enregistered.
+            noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
+            goto MEM_LEAF;
+
+        case GT_CLS_VAR:
+
+        MEM_LEAF:
+
+            /* Pick a register for the value */
+
+            reg = regSet.rsPickReg(needReg, bestReg);
+
+            /* Load the variable into the register */
+
+            size = genTypeSize(tree->gtType);
+
+            if (size < EA_4BYTE)
+            {
+                instruction ins = ins_Move_Extend(tree->TypeGet(), (tree->gtFlags & GTF_REG_VAL) != 0);
+                inst_RV_TT(ins, reg, tree, 0);
+
+                /* We've now "promoted" the tree-node to TYP_INT */
+
+                tree->gtType = TYP_INT;
+            }
+            else
+            {
+                inst_RV_TT(INS_mov, reg, tree, 0);
+            }
+
+            regTracker.rsTrackRegTrash(reg);
+
+            gcInfo.gcMarkRegPtrVal(reg, tree->TypeGet());
+
+            switch (oper)
+            {
+                case GT_CLS_VAR:
+                    regTracker.rsTrackRegClsVar(reg, tree);
+                    break;
+                case GT_LCL_VAR:
+                    regTracker.rsTrackRegLclVar(reg, tree->gtLclVarCommon.gtLclNum);
+                    break;
+                case GT_LCL_FLD:
+                    break;
+                default:
+                    noway_assert(!"Unexpected oper");
+            }
+
+#ifdef _TARGET_ARM_
+            if (tree->gtFlags & GTF_IND_VOLATILE)
+            {
+                // Emit a memory barrier instruction after the load
+                instGen_MemoryBarrier();
+            }
+#endif
+
+            break;
+
+        case GT_NO_OP:
+            // The VM does certain things with actual NOP instructions
+            // so generate something small that has no effect, but isn't
+            // a typical NOP
+            if (tree->gtFlags & GTF_NO_OP_NO)
+            {
+#ifdef _TARGET_XARCH_
+                // The VM expects 0x66 0x90 for a 2-byte NOP, not 0x90 0x90
+                instGen(INS_nop);
+                instGen(INS_nop);
+#elif defined(_TARGET_ARM_)
+                // The VM isn't checking yet, when it does, hopefully it will
+                // get fooled by the wider variant.
+                instGen(INS_nopw);
+#else
+                NYI("Non-nop NO_OP");
+#endif
+            }
+            else
+            {
+                instGen(INS_nop);
+            }
+            reg = REG_STK;
+            break;
+
+#if !FEATURE_EH_FUNCLETS
+        case GT_END_LFIN:
+
+            /* Have to clear the shadowSP of the nesting level which
+               encloses the finally */
+
+            unsigned finallyNesting;
+            finallyNesting = (unsigned)tree->gtVal.gtVal1;
+            noway_assert(tree->gtVal.gtVal1 <
+                         compiler->compHndBBtabCount); // assert we didn't truncate with the cast above.
+            noway_assert(finallyNesting < compiler->compHndBBtabCount);
+
+            // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
+            unsigned filterEndOffsetSlotOffs;
+            PREFIX_ASSUME(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) >
+                          sizeof(void*)); // below doesn't underflow.
+            filterEndOffsetSlotOffs = (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - (sizeof(void*)));
+
+            unsigned curNestingSlotOffs;
+            curNestingSlotOffs = filterEndOffsetSlotOffs - ((finallyNesting + 1) * sizeof(void*));
+            instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar, curNestingSlotOffs);
+            reg = REG_STK;
+            break;
+#endif // !FEATURE_EH_FUNCLETS
+
+        case GT_CATCH_ARG:
+
+            noway_assert(compiler->compCurBB->bbCatchTyp && handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
+
+            /* Catch arguments get passed in a register. genCodeForBBlist()
+               would have marked it as holding a GC object, but not used. */
+
+            noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
+            reg = REG_EXCEPTION_OBJECT;
+            break;
+
+        case GT_JMP:
+            genCodeForTreeLeaf_GT_JMP(tree);
+            return;
+
+        case GT_MEMORYBARRIER:
+            // Emit the memory barrier instruction
+            instGen_MemoryBarrier();
+            reg = REG_STK;
+            break;
+
+        default:
+#ifdef DEBUG
+            compiler->gtDispTree(tree);
+#endif
+            noway_assert(!"unexpected leaf");
+    }
+
+    noway_assert(reg != DUMMY_INIT(REG_CORRUPT));
+    genCodeForTree_DONE(tree, reg);
+}
+
+GenTreePtr CodeGen::genCodeForCommaTree(GenTreePtr tree)
+{
+    while (tree->OperGet() == GT_COMMA)
+    {
+        GenTreePtr op1 = tree->gtOp.gtOp1;
+        genCodeForTree(op1, RBM_NONE);
+        gcInfo.gcMarkRegPtrVal(op1);
+
+        tree = tree->gtOp.gtOp2;
+    }
+    return tree;
+}
+
+/*****************************************************************************
+ *
+ *  Generate code for the a leaf node of type GT_JMP
+ */
+
+void CodeGen::genCodeForTreeLeaf_GT_JMP(GenTreePtr tree)
+{
+    noway_assert(compiler->compCurBB->bbFlags & BBF_HAS_JMP);
+
+#ifdef PROFILING_SUPPORTED
+    if (compiler->compIsProfilerHookNeeded())
+    {
+        /* fire the event at the call site */
+        unsigned saveStackLvl2 = genStackLevel;
+
+        compiler->info.compProfilerCallback = true;
+
+#ifdef _TARGET_X86_
+        //
+        // Push the profilerHandle
+        //
+        regMaskTP byrefPushedRegs;
+        regMaskTP norefPushedRegs;
+        regMaskTP pushedArgRegs =
+            genPushRegs(RBM_ARG_REGS & (regSet.rsMaskUsed | regSet.rsMaskVars | regSet.rsMaskLock), &byrefPushedRegs,
+                        &norefPushedRegs);
+
+        if (compiler->compProfilerMethHndIndirected)
+        {
+            getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA,
+                                       (ssize_t)compiler->compProfilerMethHnd);
+        }
+        else
+        {
+            inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
+        }
+        genSinglePush();
+
+        genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
+                          sizeof(int) * 1, // argSize
+                          EA_UNKNOWN);     // retSize
+
+        //
+        // Adjust the number of stack slots used by this managed method if necessary.
+        //
+        if (compiler->fgPtrArgCntMax < 1)
+        {
+            compiler->fgPtrArgCntMax = 1;
+        }
+
+        genPopRegs(pushedArgRegs, byrefPushedRegs, norefPushedRegs);
+#elif _TARGET_ARM_
+        // For GT_JMP nodes we have added r0 as a used register, when under arm profiler, to evaluate GT_JMP node.
+        // To emit tailcall callback we need r0 to pass profiler handle. Any free register could be used as call target.
+        regNumber argReg = regSet.rsGrabReg(RBM_PROFILER_JMP_USED);
+        noway_assert(argReg == REG_PROFILER_JMP_ARG);
+        regSet.rsLockReg(RBM_PROFILER_JMP_USED);
+
+        if (compiler->compProfilerMethHndIndirected)
+        {
+            getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, argReg, (ssize_t)compiler->compProfilerMethHnd);
+            regTracker.rsTrackRegTrash(argReg);
+        }
+        else
+        {
+            instGen_Set_Reg_To_Imm(EA_4BYTE, argReg, (ssize_t)compiler->compProfilerMethHnd);
+        }
+
+        genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
+                          0,           // argSize
+                          EA_UNKNOWN); // retSize
+
+        regSet.rsUnlockReg(RBM_PROFILER_JMP_USED);
+#else
+        NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking 'arguments'");
+#endif //_TARGET_X86_
+
+        /* Restore the stack level */
+        genStackLevel = saveStackLvl2;
+    }
+#endif // PROFILING_SUPPORTED
+
+    /* This code is cloned from the regular processing of GT_RETURN values.  We have to remember to
+     * call genPInvokeMethodEpilog anywhere that we have a method return.  We should really
+     * generate trees for the PInvoke prolog and epilog so we can remove these special cases.
+     */
+
+    if (compiler->info.compCallUnmanaged)
+    {
+        genPInvokeMethodEpilog();
+    }
+
+    // Make sure register arguments are in their initial registers
+    // and stack arguments are put back as well.
+    //
+    // This does not deal with circular dependencies of register
+    // arguments, which is safe because RegAlloc prevents that by
+    // not enregistering any RegArgs when a JMP opcode is used.
+
+    if (compiler->info.compArgsCount == 0)
+    {
+        return;
+    }
+
+    unsigned   varNum;
+    LclVarDsc* varDsc;
+
+    // First move any enregistered stack arguments back to the stack
+    for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
+    {
+        noway_assert(varDsc->lvIsParam);
+        if (varDsc->lvIsRegArg || !varDsc->lvRegister)
+            continue;
+
+        /* Argument was passed on the stack, but ended up in a register
+         * Store it back to the stack */
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef _TARGET_64BIT_
+        if (varDsc->TypeGet() == TYP_LONG)
+        {
+            /* long - at least the low half must be enregistered */
+
+            getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, varDsc->lvRegNum, varNum, 0);
+
+            /* Is the upper half also enregistered? */
+
+            if (varDsc->lvOtherReg != REG_STK)
+            {
+                getEmitter()->emitIns_S_R(ins_Store(TYP_INT), EA_4BYTE, varDsc->lvOtherReg, varNum, sizeof(int));
+            }
+        }
+        else
+#endif // _TARGET_64BIT_
+        {
+            getEmitter()->emitIns_S_R(ins_Store(varDsc->TypeGet()), emitTypeSize(varDsc->TypeGet()), varDsc->lvRegNum,
+                                      varNum, 0);
+        }
+    }
+
+#ifdef _TARGET_ARM_
+    regMaskTP fixedArgsMask = RBM_NONE;
+#endif
+
+    // Next move any un-enregistered register arguments back to their register
+    for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->info.compArgsCount; varNum++, varDsc++)
+    {
+        /* Is this variable a register arg? */
+
+        if (!varDsc->lvIsRegArg)
+            continue;
+
+        /* Register argument */
+
+        noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
+        noway_assert(!varDsc->lvRegister);
+
+        /* Reload it from the stack */
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef _TARGET_64BIT_
+        if (varDsc->TypeGet() == TYP_LONG)
+        {
+            /* long - at least the low half must be enregistered */
+
+            getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, varDsc->lvArgReg, varNum, 0);
+            regTracker.rsTrackRegTrash(varDsc->lvArgReg);
+
+            /* Also assume the upper half also enregistered */
+
+            getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, genRegArgNext(varDsc->lvArgReg), varNum,
+                                      sizeof(int));
+            regTracker.rsTrackRegTrash(genRegArgNext(varDsc->lvArgReg));
+
+#ifdef _TARGET_ARM_
+            fixedArgsMask |= genRegMask(varDsc->lvArgReg);
+            fixedArgsMask |= genRegMask(genRegArgNext(varDsc->lvArgReg));
+#endif
+        }
+        else
+#endif // _TARGET_64BIT_
+#ifdef _TARGET_ARM_
+            if (varDsc->lvIsHfaRegArg())
+        {
+            const var_types   elemType = varDsc->GetHfaType();
+            const instruction loadOp   = ins_Load(elemType);
+            const emitAttr    size     = emitTypeSize(elemType);
+            regNumber         argReg   = varDsc->lvArgReg;
+            const unsigned    maxSize  = min(varDsc->lvSize(), (LAST_FP_ARGREG + 1 - argReg) * REGSIZE_BYTES);
+
+            for (unsigned ofs = 0; ofs < maxSize; ofs += (unsigned)size)
+            {
+                getEmitter()->emitIns_R_S(loadOp, size, argReg, varNum, ofs);
+                assert(genIsValidFloatReg(argReg)); // we don't use register tracking for FP
+                argReg = regNextOfType(argReg, elemType);
+            }
+        }
+        else if (varDsc->TypeGet() == TYP_STRUCT)
+        {
+            const var_types   elemType = TYP_INT; // we pad everything out to at least 4 bytes
+            const instruction loadOp   = ins_Load(elemType);
+            const emitAttr    size     = emitTypeSize(elemType);
+            regNumber         argReg   = varDsc->lvArgReg;
+            const unsigned    maxSize  = min(varDsc->lvSize(), (REG_ARG_LAST + 1 - argReg) * REGSIZE_BYTES);
+
+            for (unsigned ofs = 0; ofs < maxSize; ofs += (unsigned)size)
+            {
+                getEmitter()->emitIns_R_S(loadOp, size, argReg, varNum, ofs);
+                regTracker.rsTrackRegTrash(argReg);
+
+                fixedArgsMask |= genRegMask(argReg);
+
+                argReg = genRegArgNext(argReg);
+            }
+        }
+        else
+#endif //_TARGET_ARM_
+        {
+            var_types loadType = varDsc->TypeGet();
+            regNumber argReg   = varDsc->lvArgReg; // incoming arg register
+            bool      twoParts = false;
+
+            if (compiler->info.compIsVarArgs && isFloatRegType(loadType))
+            {
+#ifndef _TARGET_64BIT_
+                if (loadType == TYP_DOUBLE)
+                    twoParts = true;
+#endif
+                loadType = TYP_I_IMPL;
+                assert(isValidIntArgReg(argReg));
+            }
+
+            getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
+            regTracker.rsTrackRegTrash(argReg);
+
+#ifdef _TARGET_ARM_
+            fixedArgsMask |= genRegMask(argReg);
+#endif
+            if (twoParts)
+            {
+                argReg = genRegArgNext(argReg);
+                assert(isValidIntArgReg(argReg));
+
+                getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, REGSIZE_BYTES);
+                regTracker.rsTrackRegTrash(argReg);
+
+#ifdef _TARGET_ARM_
+                fixedArgsMask |= genRegMask(argReg);
+#endif
+            }
+        }
+    }
+
+#ifdef _TARGET_ARM_
+    // Check if we have any non-fixed args possibly in the arg registers.
+    if (compiler->info.compIsVarArgs && (fixedArgsMask & RBM_ARG_REGS) != RBM_ARG_REGS)
+    {
+        noway_assert(compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame);
+
+        regNumber regDeclArgs = REG_ARG_FIRST;
+
+        // Skip the 'this' pointer.
+        if (!compiler->info.compIsStatic)
+        {
+            regDeclArgs = REG_NEXT(regDeclArgs);
+        }
+
+        // Skip the 'generic context.'
+        if (compiler->info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE)
+        {
+            regDeclArgs = REG_NEXT(regDeclArgs);
+        }
+
+        // Skip any 'return buffer arg.'
+        if (compiler->info.compRetBuffArg != BAD_VAR_NUM)
+        {
+            regDeclArgs = REG_NEXT(regDeclArgs);
+        }
+
+        // Skip the 'vararg cookie.'
+        regDeclArgs = REG_NEXT(regDeclArgs);
+
+        // Also add offset for the vararg cookie.
+        int offset = REGSIZE_BYTES;
+
+        // Load all the variable arguments in registers back to their registers.
+        for (regNumber reg = regDeclArgs; reg <= REG_ARG_LAST; reg = REG_NEXT(reg))
+        {
+            if (!(fixedArgsMask & genRegMask(reg)))
+            {
+                getEmitter()->emitIns_R_S(ins_Load(TYP_INT), EA_4BYTE, reg, compiler->lvaVarargsHandleArg, offset);
+                regTracker.rsTrackRegTrash(reg);
+            }
+            offset += REGSIZE_BYTES;
+        }
+    }
+#endif // _TARGET_ARM_
+}
+
+/*****************************************************************************
+ *
+ *  Check if a variable is assigned to in a tree.  The variable number is
+ *  passed in pCallBackData.  If the variable is assigned to, return
+ *  Compiler::WALK_ABORT.  Otherwise return Compiler::WALK_CONTINUE.
+ */
+Compiler::fgWalkResult CodeGen::fgIsVarAssignedTo(GenTreePtr* pTree, Compiler::fgWalkData* data)
+{
+    GenTreePtr tree = *pTree;
+    if ((tree->OperIsAssignment()) && (tree->gtOp.gtOp1->OperGet() == GT_LCL_VAR) &&
+        (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == (unsigned)(size_t)data->pCallbackData))
+    {
+        return Compiler::WALK_ABORT;
+    }
+
+    return Compiler::WALK_CONTINUE;
+}
+
+regNumber CodeGen::genIsEnregisteredIntVariable(GenTreePtr tree)
+{
+    unsigned   varNum;
+    LclVarDsc* varDsc;
+
+    if (tree->gtOper == GT_LCL_VAR)
+    {
+        /* Does the variable live in a register? */
+
+        varNum = tree->gtLclVarCommon.gtLclNum;
+        noway_assert(varNum < compiler->lvaCount);
+        varDsc = compiler->lvaTable + varNum;
+
+        if (!varDsc->IsFloatRegType() && varDsc->lvRegister)
+        {
+            return varDsc->lvRegNum;
+        }
+    }
+
+    return REG_NA;
+}
+
+// inline
+void CodeGen::unspillLiveness(genLivenessSet* ls)
+{
+    // Only try to unspill the registers that are missing from the currentLiveRegs
+    //
+    regMaskTP cannotSpillMask = ls->maskVars | ls->gcRefRegs | ls->byRefRegs;
+    regMaskTP currentLiveRegs = regSet.rsMaskVars | gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur;
+    cannotSpillMask &= ~currentLiveRegs;
+
+    // Typically this will always be true and we will return
+    //
+    if (cannotSpillMask == 0)
+        return;
+
+    for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg))
+    {
+        // Is this a register that we cannot leave in the spilled state?
+        //
+        if ((cannotSpillMask & genRegMask(reg)) == 0)
+            continue;
+
+        RegSet::SpillDsc* spill = regSet.rsSpillDesc[reg];
+
+        // Was it spilled, if not then skip it.
+        //
+        if (!spill)
+            continue;
+
+        noway_assert(spill->spillTree->gtFlags & GTF_SPILLED);
+
+        regSet.rsUnspillReg(spill->spillTree, genRegMask(reg), RegSet::KEEP_REG);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Generate code for a qmark colon
+ */
+
+void CodeGen::genCodeForQmark(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+    GenTreePtr op1 = tree->gtOp.gtOp1;
+    GenTreePtr op2 = tree->gtOp.gtOp2;
+    regNumber  reg;
+    regMaskTP  regs    = regSet.rsMaskUsed;
+    regMaskTP  needReg = destReg;
+
+    noway_assert(compiler->compQmarkUsed);
+    noway_assert(tree->gtOper == GT_QMARK);
+    noway_assert(op1->OperIsCompare());
+    noway_assert(op2->gtOper == GT_COLON);
+
+    GenTreePtr thenNode = op2->AsColon()->ThenNode();
+    GenTreePtr elseNode = op2->AsColon()->ElseNode();
+
+    /* If elseNode is a Nop node you must reverse the
+       thenNode and elseNode prior to reaching here!
+       (If both 'else' and 'then' are Nops, whole qmark will have been optimized away.) */
+
+    noway_assert(!elseNode->IsNothingNode());
+
+    /* Try to implement the qmark colon using a CMOV.  If we can't for
+       whatever reason, this will return false and we will implement
+       it using regular branching constructs. */
+
+    if (genCodeForQmarkWithCMOV(tree, destReg, bestReg))
+        return;
+
+    /*
+        This is a ?: operator; generate code like this:
+
+            condition_compare
+            jmp_if_true lab_true
+
+        lab_false:
+            op1 (false = 'else' part)
+            jmp lab_done
+
+        lab_true:
+            op2 (true = 'then' part)
+
+        lab_done:
+
+
+        NOTE: If no 'then' part we do not generate the 'jmp lab_done'
+            or the 'lab_done' label
+    */
+
+    BasicBlock* lab_true;
+    BasicBlock* lab_false;
+    BasicBlock* lab_done;
+
+    genLivenessSet entryLiveness;
+    genLivenessSet exitLiveness;
+
+    lab_true  = genCreateTempLabel();
+    lab_false = genCreateTempLabel();
+
+#if FEATURE_STACK_FP_X87
+    /* Spill any register that hold partial values so that the exit liveness
+       from sides is the same */
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+    regMaskTP spillMask = regSet.rsMaskUsedFloat | regSet.rsMaskLockedFloat | regSet.rsMaskRegVarFloat;
+
+    // spillMask should be the whole FP stack
+    noway_assert(compCurFPState.m_uStackSize == genCountBits(spillMask));
+#endif
+
+    SpillTempsStackFP(regSet.rsMaskUsedFloat);
+    noway_assert(regSet.rsMaskUsedFloat == 0);
+#endif
+
+    /* Before we generate code for qmark, we spill all the currently used registers
+       that conflict with the registers used in the qmark tree. This is to avoid
+       introducing spills that only occur on either the 'then' or 'else' side of
+       the tree, but not both identically. We need to be careful with enregistered
+       variables that are used; see below.
+    */
+
+    if (regSet.rsMaskUsed)
+    {
+        /* If regSet.rsMaskUsed overlaps with regSet.rsMaskVars (multi-use of the enregistered
+           variable), then it may not get spilled. However, the variable may
+           then go dead within thenNode/elseNode, at which point regSet.rsMaskUsed
+           may get spilled from one side and not the other. So unmark regSet.rsMaskVars
+           before spilling regSet.rsMaskUsed */
+
+        regMaskTP rsAdditionalCandidates = regSet.rsMaskUsed & regSet.rsMaskVars;
+        regMaskTP rsAdditional           = RBM_NONE;
+
+        // For each multi-use of an enregistered variable, we need to determine if
+        // it can get spilled inside the qmark colon.  This can only happen if
+        // its life ends somewhere in the qmark colon.  We have the following
+        // cases:
+        // 1) Variable is dead at the end of the colon -- needs to be spilled
+        // 2) Variable is alive at the end of the colon -- needs to be spilled
+        //    iff it is assigned to in the colon.  In order to determine that, we
+        //    examine the GTF_ASG flag to see if any assignments were made in the
+        //    colon.  If there are any, we need to do a tree walk to see if this
+        //    variable is the target of an assignment.  This treewalk should not
+        //    happen frequently.
+        if (rsAdditionalCandidates)
+        {
+#ifdef DEBUG
+            if (compiler->verbose)
+            {
+                Compiler::printTreeID(tree);
+                printf(": Qmark-Colon additional spilling candidates are ");
+                dspRegMask(rsAdditionalCandidates);
+                printf("\n");
+            }
+#endif
+
+            // If any candidates are not alive at the GT_QMARK node, then they
+            // need to be spilled
+
+            VARSET_TP VARSET_INIT(compiler, rsLiveNow, compiler->compCurLife);
+            VARSET_TP VARSET_INIT_NOCOPY(rsLiveAfter, compiler->fgUpdateLiveSet(compiler->compCurLife,
+                                                                                compiler->compCurLifeTree, tree));
+
+            VARSET_TP VARSET_INIT_NOCOPY(regVarLiveNow,
+                                         VarSetOps::Intersection(compiler, compiler->raRegVarsMask, rsLiveNow));
+
+            VARSET_ITER_INIT(compiler, iter, regVarLiveNow, varIndex);
+            while (iter.NextElem(compiler, &varIndex))
+            {
+                // Find the variable in compiler->lvaTable
+                unsigned   varNum = compiler->lvaTrackedToVarNum[varIndex];
+                LclVarDsc* varDsc = compiler->lvaTable + varNum;
+
+#if !FEATURE_FP_REGALLOC
+                if (varDsc->IsFloatRegType())
+                    continue;
+#endif
+
+                noway_assert(varDsc->lvRegister);
+
+                regMaskTP regBit;
+
+                if (varTypeIsFloating(varDsc->TypeGet()))
+                {
+                    regBit = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
+                }
+                else
+                {
+                    regBit = genRegMask(varDsc->lvRegNum);
+
+                    // For longs we may need to spill both regs
+                    if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
+                        regBit |= genRegMask(varDsc->lvOtherReg);
+                }
+
+                // Is it one of our reg-use vars?  If not, we don't need to spill it.
+                regBit &= rsAdditionalCandidates;
+                if (!regBit)
+                    continue;
+
+                // Is the variable live at the end of the colon?
+                if (VarSetOps::IsMember(compiler, rsLiveAfter, varIndex))
+                {
+                    // Variable is alive at the end of the colon.  Was it assigned
+                    // to inside the colon?
+
+                    if (!(op2->gtFlags & GTF_ASG))
+                        continue;
+
+                    if (compiler->fgWalkTreePre(&op2, CodeGen::fgIsVarAssignedTo, (void*)(size_t)varNum) ==
+                        Compiler::WALK_ABORT)
+                    {
+                        // Variable was assigned to, so we need to spill it.
+
+                        rsAdditional |= regBit;
+#ifdef DEBUG
+                        if (compiler->verbose)
+                        {
+                            Compiler::printTreeID(tree);
+                            printf(": Qmark-Colon candidate ");
+                            dspRegMask(regBit);
+                            printf("\n");
+                            printf("    is assigned to inside colon and will be spilled\n");
+                        }
+#endif
+                    }
+                }
+                else
+                {
+                    // Variable is not alive at the end of the colon.  We need to spill it.
+
+                    rsAdditional |= regBit;
+#ifdef DEBUG
+                    if (compiler->verbose)
+                    {
+                        Compiler::printTreeID(tree);
+                        printf(": Qmark-Colon candidate ");
+                        dspRegMask(regBit);
+                        printf("\n");
+                        printf("    is alive at end of colon and will be spilled\n");
+                    }
+#endif
+                }
+            }
+
+#ifdef DEBUG
+            if (compiler->verbose)
+            {
+                Compiler::printTreeID(tree);
+                printf(": Qmark-Colon approved additional spilling candidates are ");
+                dspRegMask(rsAdditional);
+                printf("\n");
+            }
+#endif
+        }
+
+        noway_assert((rsAdditionalCandidates | rsAdditional) == rsAdditionalCandidates);
+
+        // We only need to spill registers that are modified by the qmark tree, as specified in tree->gtUsedRegs.
+        // If we ever need to use and spill a register while generating code that is not in tree->gtUsedRegs,
+        // we will have unbalanced spills and generate bad code.
+        regMaskTP rsSpill =
+            ((regSet.rsMaskUsed & ~(regSet.rsMaskVars | regSet.rsMaskResvd)) | rsAdditional) & tree->gtUsedRegs;
+
+#ifdef DEBUG
+        // Under register stress, regSet.rsPickReg() ignores the recommended registers and always picks
+        // 'bad' registers, causing spills. So, just force all used registers to get spilled
+        // in the stress case, to avoid the problem we're trying to resolve here. Thus, any spills
+        // that occur within the qmark condition, 'then' case, or 'else' case, will have to be
+        // unspilled while generating that same tree.
+
+        if (regSet.rsStressRegs() >= 1)
+        {
+            rsSpill |= regSet.rsMaskUsed & ~(regSet.rsMaskVars | regSet.rsMaskLock | regSet.rsMaskResvd);
+        }
+#endif // DEBUG
+
+        if (rsSpill)
+        {
+            // Remember which registers hold pointers. We will spill
+            // them, but the code that follows will fetch reg vars from
+            // the registers, so we need that gc compiler->info.
+            regMaskTP gcRegSavedByref = gcInfo.gcRegByrefSetCur & rsAdditional;
+            regMaskTP gcRegSavedGCRef = gcInfo.gcRegGCrefSetCur & rsAdditional;
+
+            // regSet.rsSpillRegs() will assert if we try to spill any enregistered variables.
+            // So, pretend there aren't any, and spill them anyway. This will only occur
+            // if rsAdditional is non-empty.
+            regMaskTP rsTemp = regSet.rsMaskVars;
+            regSet.ClearMaskVars();
+
+            regSet.rsSpillRegs(rsSpill);
+
+            // Restore gc tracking masks.
+            gcInfo.gcRegByrefSetCur |= gcRegSavedByref;
+            gcInfo.gcRegGCrefSetCur |= gcRegSavedGCRef;
+
+            // Set regSet.rsMaskVars back to normal
+            regSet.rsMaskVars = rsTemp;
+        }
+    }
+
+    // Generate the conditional jump but without doing any StackFP fixups.
+    genCondJump(op1, lab_true, lab_false, false);
+
+    /* Save the current liveness, register status, and GC pointers */
+    /* This is the liveness information upon entry                 */
+    /* to both the then and else parts of the qmark                */
+
+    saveLiveness(&entryLiveness);
+
+    /* Clear the liveness of any local variables that are dead upon   */
+    /* entry to the else part.                                        */
+
+    /* Subtract the liveSet upon entry of the then part (op1->gtNext) */
+    /* from the "colon or op2" liveSet                                */
+    genDyingVars(compiler->compCurLife, tree->gtQmark.gtElseLiveSet);
+
+    /* genCondJump() closes the current emitter block */
+
+    genDefineTempLabel(lab_false);
+
+#if FEATURE_STACK_FP_X87
+    // Store fpstate
+
+    QmarkStateStackFP tempFPState;
+    bool              bHasFPUState = !compCurFPState.IsEmpty();
+    genQMarkBeforeElseStackFP(&tempFPState, tree->gtQmark.gtElseLiveSet, op1->gtNext);
+#endif
+
+    /* Does the operator yield a value? */
+
+    if (tree->gtType == TYP_VOID)
+    {
+        /* Generate the code for the else part of the qmark */
+
+        genCodeForTree(elseNode, needReg, bestReg);
+
+        /* The type is VOID, so we shouldn't have computed a value */
+
+        noway_assert(!(elseNode->gtFlags & GTF_REG_VAL));
+
+        /* Save the current liveness, register status, and GC pointers               */
+        /* This is the liveness information upon exit of the then part of the qmark  */
+
+        saveLiveness(&exitLiveness);
+
+        /* Is there a 'then' part? */
+
+        if (thenNode->IsNothingNode())
+        {
+#if FEATURE_STACK_FP_X87
+            if (bHasFPUState)
+            {
+                // We had FP state on entry just after the condition, so potentially, the else
+                // node may have to do transition work.
+                lab_done = genCreateTempLabel();
+
+                /* Generate jmp lab_done */
+
+                inst_JMP(EJ_jmp, lab_done);
+
+                /* No 'then' - just generate the 'lab_true' label */
+
+                genDefineTempLabel(lab_true);
+
+                // We need to do this after defining the lab_false label
+                genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
+                genQMarkAfterThenBlockStackFP(&tempFPState);
+                genDefineTempLabel(lab_done);
+            }
+            else
+#endif // FEATURE_STACK_FP_X87
+            {
+                /* No 'then' - just generate the 'lab_true' label */
+                genDefineTempLabel(lab_true);
+            }
+        }
+        else
+        {
+            lab_done = genCreateTempLabel();
+
+            /* Generate jmp lab_done */
+
+            inst_JMP(EJ_jmp, lab_done);
+
+            /* Restore the liveness that we had upon entry of the then part of the qmark */
+
+            restoreLiveness(&entryLiveness);
+
+            /* Clear the liveness of any local variables that are dead upon    */
+            /* entry to the then part.                                         */
+            genDyingVars(compiler->compCurLife, tree->gtQmark.gtThenLiveSet);
+
+            /* Generate lab_true: */
+
+            genDefineTempLabel(lab_true);
+#if FEATURE_STACK_FP_X87
+            // We need to do this after defining the lab_false label
+            genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
+#endif
+            /* Enter the then part - trash all registers */
+
+            regTracker.rsTrackRegClr();
+
+            /* Generate the code for the then part of the qmark */
+
+            genCodeForTree(thenNode, needReg, bestReg);
+
+            /* The type is VOID, so we shouldn't have computed a value */
+
+            noway_assert(!(thenNode->gtFlags & GTF_REG_VAL));
+
+            unspillLiveness(&exitLiveness);
+
+            /* Verify that the exit liveness information is the same for the two parts of the qmark */
+
+            checkLiveness(&exitLiveness);
+#if FEATURE_STACK_FP_X87
+            genQMarkAfterThenBlockStackFP(&tempFPState);
+#endif
+            /* Define the "result" label */
+
+            genDefineTempLabel(lab_done);
+        }
+
+        /* Join of the two branches - trash all registers */
+
+        regTracker.rsTrackRegClr();
+
+        /* We're just about done */
+
+        genUpdateLife(tree);
+    }
+    else
+    {
+        /* Generate code for a qmark that generates a value */
+
+        /* Generate the code for the else part of the qmark */
+
+        noway_assert(elseNode->IsNothingNode() == false);
+
+        /* Compute the elseNode into any free register */
+        genComputeReg(elseNode, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
+        noway_assert(elseNode->gtFlags & GTF_REG_VAL);
+        noway_assert(elseNode->gtRegNum != REG_NA);
+
+        /* Record the chosen register */
+        reg  = elseNode->gtRegNum;
+        regs = genRegMask(reg);
+
+        /* Save the current liveness, register status, and GC pointers               */
+        /* This is the liveness information upon exit of the else part of the qmark  */
+
+        saveLiveness(&exitLiveness);
+
+        /* Generate jmp lab_done */
+        lab_done = genCreateTempLabel();
+
+#ifdef DEBUG
+        // We will use this to assert we don't emit instructions if we decide not to
+        // do the jmp
+        unsigned emittedInstructions = getEmitter()->emitInsCount;
+        bool     bSkippedJump        = false;
+#endif
+        // We would like to know here if the else node is really going to generate
+        // code, as if it isn't, we're generating here a jump to the next instruction.
+        // What you would really like is to be able to go back and remove the jump, but
+        // we have no way of doing that right now.
+
+        if (
+#if FEATURE_STACK_FP_X87
+            !bHasFPUState && // If there is no FPU state, we won't need an x87 transition
+#endif
+            genIsEnregisteredIntVariable(thenNode) == reg)
+        {
+#ifdef DEBUG
+            // For the moment, fix this easy case (enregistered else node), which
+            // is the one that happens all the time.
+
+            bSkippedJump = true;
+#endif
+        }
+        else
+        {
+            inst_JMP(EJ_jmp, lab_done);
+        }
+
+        /* Restore the liveness that we had upon entry of the else part of the qmark */
+
+        restoreLiveness(&entryLiveness);
+
+        /* Clear the liveness of any local variables that are dead upon    */
+        /* entry to the then part.                                         */
+        genDyingVars(compiler->compCurLife, tree->gtQmark.gtThenLiveSet);
+
+        /* Generate lab_true: */
+        genDefineTempLabel(lab_true);
+#if FEATURE_STACK_FP_X87
+        // Store FP state
+
+        // We need to do this after defining the lab_true label
+        genQMarkAfterElseBlockStackFP(&tempFPState, compiler->compCurLife, op2->gtNext);
+#endif
+        /* Enter the then part - trash all registers */
+
+        regTracker.rsTrackRegClr();
+
+        /* Generate the code for the then part of the qmark */
+
+        noway_assert(thenNode->IsNothingNode() == false);
+
+        /* This must place a value into the chosen register */
+        genComputeReg(thenNode, regs, RegSet::EXACT_REG, RegSet::FREE_REG, true);
+
+        noway_assert(thenNode->gtFlags & GTF_REG_VAL);
+        noway_assert(thenNode->gtRegNum == reg);
+
+        unspillLiveness(&exitLiveness);
+
+        /* Verify that the exit liveness information is the same for the two parts of the qmark */
+        checkLiveness(&exitLiveness);
+#if FEATURE_STACK_FP_X87
+        genQMarkAfterThenBlockStackFP(&tempFPState);
+#endif
+
+#ifdef DEBUG
+        noway_assert(bSkippedJump == false || getEmitter()->emitInsCount == emittedInstructions);
+#endif
+
+        /* Define the "result" label */
+        genDefineTempLabel(lab_done);
+
+        /* Join of the two branches - trash all registers */
+
+        regTracker.rsTrackRegClr();
+
+        /* Check whether this subtree has freed up any variables */
+
+        genUpdateLife(tree);
+
+        genMarkTreeInReg(tree, reg);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Generate code for a qmark colon using the CMOV instruction.  It's OK
+ *  to return false when we can't easily implement it using a cmov (leading
+ *  genCodeForQmark to implement it using branches).
+ */
+
+bool CodeGen::genCodeForQmarkWithCMOV(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+#ifdef _TARGET_XARCH_
+    GenTreePtr cond  = tree->gtOp.gtOp1;
+    GenTreePtr colon = tree->gtOp.gtOp2;
+    // Warning: this naming of the local vars is backwards!
+    GenTreePtr thenNode = colon->gtOp.gtOp1;
+    GenTreePtr elseNode = colon->gtOp.gtOp2;
+    GenTreePtr alwaysNode, predicateNode;
+    regNumber  reg;
+    regMaskTP  needReg = destReg;
+
+    noway_assert(tree->gtOper == GT_QMARK);
+    noway_assert(cond->OperIsCompare());
+    noway_assert(colon->gtOper == GT_COLON);
+
+#ifdef DEBUG
+    if (JitConfig.JitNoCMOV())
+    {
+        return false;
+    }
+#endif
+
+    /* Can only implement CMOV on processors that support it */
+
+    if (!compiler->opts.compUseCMOV)
+    {
+        return false;
+    }
+
+    /* thenNode better be a local or a constant */
+
+    if ((thenNode->OperGet() != GT_CNS_INT) && (thenNode->OperGet() != GT_LCL_VAR))
+    {
+        return false;
+    }
+
+    /* elseNode better be a local or a constant or nothing */
+
+    if ((elseNode->OperGet() != GT_CNS_INT) && (elseNode->OperGet() != GT_LCL_VAR))
+    {
+        return false;
+    }
+
+    /* can't handle two constants here */
+
+    if ((thenNode->OperGet() == GT_CNS_INT) && (elseNode->OperGet() == GT_CNS_INT))
+    {
+        return false;
+    }
+
+    /* let's not handle comparisons of non-integer types */
+
+    if (!varTypeIsI(cond->gtOp.gtOp1->gtType))
+    {
+        return false;
+    }
+
+    /* Choose nodes for predicateNode and alwaysNode.  Swap cond if necessary.
+       The biggest constraint is that cmov doesn't take an integer argument.
+    */
+
+    bool reverseCond = false;
+    if (elseNode->OperGet() == GT_CNS_INT)
+    {
+        // else node is a constant
+
+        alwaysNode    = elseNode;
+        predicateNode = thenNode;
+        reverseCond   = true;
+    }
+    else
+    {
+        alwaysNode    = thenNode;
+        predicateNode = elseNode;
+    }
+
+    // If the live set in alwaysNode is not the same as in tree, then
+    // the variable in predicate node dies here.  This is a dangerous
+    // case that we don't handle (genComputeReg could overwrite
+    // the value of the variable in the predicate node).
+
+    // This assert is just paranoid (we've already asserted it above)
+    assert(predicateNode->OperGet() == GT_LCL_VAR);
+    if ((predicateNode->gtFlags & GTF_VAR_DEATH) != 0)
+    {
+        return false;
+    }
+
+    // Pass this point we are comitting to use CMOV.
+
+    if (reverseCond)
+    {
+        compiler->gtReverseCond(cond);
+    }
+
+    emitJumpKind jumpKind = genCondSetFlags(cond);
+
+    // Compute the always node into any free register.  If it's a constant,
+    // we need to generate the mov instruction here (otherwise genComputeReg might
+    // modify the flags, as in xor reg,reg).
+
+    if (alwaysNode->OperGet() == GT_CNS_INT)
+    {
+        reg = regSet.rsPickReg(needReg, bestReg);
+        inst_RV_IV(INS_mov, reg, alwaysNode->gtIntCon.gtIconVal, emitActualTypeSize(alwaysNode->TypeGet()));
+        gcInfo.gcMarkRegPtrVal(reg, alwaysNode->TypeGet());
+        regTracker.rsTrackRegTrash(reg);
+    }
+    else
+    {
+        genComputeReg(alwaysNode, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
+        noway_assert(alwaysNode->gtFlags & GTF_REG_VAL);
+        noway_assert(alwaysNode->gtRegNum != REG_NA);
+
+        // Record the chosen register
+
+        reg = alwaysNode->gtRegNum;
+    }
+
+    regNumber regPredicate = REG_NA;
+
+    // Is predicateNode an enregistered variable?
+
+    if (genMarkLclVar(predicateNode))
+    {
+        // Variable lives in a register
+
+        regPredicate = predicateNode->gtRegNum;
+    }
+#if REDUNDANT_LOAD
+    else
+    {
+        // Checks if the variable happens to be in any of the registers
+
+        regPredicate = findStkLclInReg(predicateNode->gtLclVarCommon.gtLclNum);
+    }
+#endif
+
+    const static instruction EJtoCMOV[] = {INS_nop,    INS_nop,    INS_cmovo,  INS_cmovno, INS_cmovb,  INS_cmovae,
+                                           INS_cmove,  INS_cmovne, INS_cmovbe, INS_cmova,  INS_cmovs,  INS_cmovns,
+                                           INS_cmovpe, INS_cmovpo, INS_cmovl,  INS_cmovge, INS_cmovle, INS_cmovg};
+
+    noway_assert((unsigned)jumpKind < (sizeof(EJtoCMOV) / sizeof(EJtoCMOV[0])));
+    instruction cmov_ins = EJtoCMOV[jumpKind];
+
+    noway_assert(insIsCMOV(cmov_ins));
+
+    if (regPredicate != REG_NA)
+    {
+        // regPredicate is in a register
+
+        inst_RV_RV(cmov_ins, reg, regPredicate, predicateNode->TypeGet());
+    }
+    else
+    {
+        // regPredicate is in memory
+
+        inst_RV_TT(cmov_ins, reg, predicateNode, NULL);
+    }
+    gcInfo.gcMarkRegPtrVal(reg, predicateNode->TypeGet());
+    regTracker.rsTrackRegTrash(reg);
+
+    genUpdateLife(alwaysNode);
+    genUpdateLife(predicateNode);
+    genCodeForTree_DONE_LIFE(tree, reg);
+    return true;
+#else
+    return false;
+#endif
+}
+
+#ifdef _TARGET_XARCH_
+void CodeGen::genCodeForMultEAX(GenTreePtr tree)
+{
+    GenTreePtr op1  = tree->gtOp.gtOp1;
+    GenTreePtr op2  = tree->gtGetOp2();
+    bool       ovfl = tree->gtOverflow();
+    regNumber  reg  = DUMMY_INIT(REG_CORRUPT);
+    regMaskTP  addrReg;
+
+    noway_assert(tree->OperGet() == GT_MUL);
+
+    /* We'll evaluate 'op1' first */
+
+    regMaskTP op1Mask = regSet.rsMustExclude(RBM_EAX, op2->gtRsvdRegs);
+
+    /* Generate the op1 into op1Mask and hold on to it. freeOnly=true */
+
+    genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
+    noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+    // If op2 is a constant we need to load  the constant into a register
+    if (op2->OperKind() & GTK_CONST)
+    {
+        genCodeForTree(op2, RBM_EDX); // since EDX is going to be spilled anyway
+        noway_assert(op2->gtFlags & GTF_REG_VAL);
+        regSet.rsMarkRegUsed(op2);
+        addrReg = genRegMask(op2->gtRegNum);
+    }
+    else
+    {
+        /* Make the second operand addressable */
+        // Try to avoid EAX.
+        addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT & ~RBM_EAX, RegSet::KEEP_REG, false);
+    }
+
+    /* Make sure the first operand is still in a register */
+    // op1 *must* go into EAX.
+    genRecoverReg(op1, RBM_EAX, RegSet::KEEP_REG);
+    noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+    reg = op1->gtRegNum;
+
+    // For 8 bit operations, we need to pick byte addressable registers
+
+    if (ovfl && varTypeIsByte(tree->TypeGet()) && !(genRegMask(reg) & RBM_BYTE_REGS))
+    {
+        regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
+
+        inst_RV_RV(INS_mov, byteReg, reg);
+
+        regTracker.rsTrackRegTrash(byteReg);
+        regSet.rsMarkRegFree(genRegMask(reg));
+
+        reg           = byteReg;
+        op1->gtRegNum = reg;
+        regSet.rsMarkRegUsed(op1);
+    }
+
+    /* Make sure the operand is still addressable */
+    addrReg = genKeepAddressable(op2, addrReg, genRegMask(reg));
+
+    /* Free up the operand, if it's a regvar */
+
+    genUpdateLife(op2);
+
+    /* The register is about to be trashed */
+
+    regTracker.rsTrackRegTrash(reg);
+
+    // For overflow instructions, tree->TypeGet() is the accurate type,
+    // and gives us the size for the operands.
+
+    emitAttr opSize = emitTypeSize(tree->TypeGet());
+
+    /* Compute the new value */
+
+    noway_assert(op1->gtRegNum == REG_EAX);
+
+    // Make sure Edx is free (unless used by op2 itself)
+    bool op2Released = false;
+
+    if ((addrReg & RBM_EDX) == 0)
+    {
+        // op2 does not use Edx, so make sure noone else does either
+        regSet.rsGrabReg(RBM_EDX);
+    }
+    else if (regSet.rsMaskMult & RBM_EDX)
+    {
+        /* Edx is used by op2 and some other trees.
+           Spill the other trees besides op2. */
+
+        regSet.rsGrabReg(RBM_EDX);
+        op2Released = true;
+
+        /* keepReg==RegSet::FREE_REG so that the other multi-used trees
+           don't get marked as unspilled as well. */
+        regSet.rsUnspillReg(op2, RBM_EDX, RegSet::FREE_REG);
+    }
+
+    instruction ins;
+
+    if (tree->gtFlags & GTF_UNSIGNED)
+        ins = INS_mulEAX;
+    else
+        ins = INS_imulEAX;
+
+    inst_TT(ins, op2, 0, 0, opSize);
+
+    /* Both EAX and EDX are now trashed */
+
+    regTracker.rsTrackRegTrash(REG_EAX);
+    regTracker.rsTrackRegTrash(REG_EDX);
+
+    /* Free up anything that was tied up by the operand */
+
+    if (!op2Released)
+        genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
+
+    /* The result will be where the first operand is sitting */
+
+    /* We must use RegSet::KEEP_REG since op1 can have a GC pointer here */
+    genRecoverReg(op1, 0, RegSet::KEEP_REG);
+
+    reg = op1->gtRegNum;
+    noway_assert(reg == REG_EAX);
+
+    genReleaseReg(op1);
+
+    /* Do we need an overflow check */
+
+    if (ovfl)
+        genCheckOverflow(tree);
+
+    genCodeForTree_DONE(tree, reg);
+}
+#endif // _TARGET_XARCH_
+
+#ifdef _TARGET_ARM_
+void CodeGen::genCodeForMult64(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+    GenTreePtr op1 = tree->gtOp.gtOp1;
+    GenTreePtr op2 = tree->gtGetOp2();
+
+    noway_assert(tree->OperGet() == GT_MUL);
+
+    /* Generate the first operand into some register */
+
+    genComputeReg(op1, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG);
+    noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+    /* Generate the second operand into some register */
+
+    genComputeReg(op2, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG);
+    noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+    /* Make sure the first operand is still in a register */
+    genRecoverReg(op1, 0, RegSet::KEEP_REG);
+    noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+    /* Free up the operands */
+    genUpdateLife(tree);
+
+    genReleaseReg(op1);
+    genReleaseReg(op2);
+
+    regNumber regLo = regSet.rsPickReg(destReg, bestReg);
+    regNumber regHi;
+
+    regSet.rsLockReg(genRegMask(regLo));
+    regHi = regSet.rsPickReg(destReg & ~genRegMask(regLo));
+    regSet.rsUnlockReg(genRegMask(regLo));
+
+    instruction ins;
+    if (tree->gtFlags & GTF_UNSIGNED)
+        ins = INS_umull;
+    else
+        ins = INS_smull;
+
+    getEmitter()->emitIns_R_R_R_R(ins, EA_4BYTE, regLo, regHi, op1->gtRegNum, op2->gtRegNum);
+    regTracker.rsTrackRegTrash(regHi);
+    regTracker.rsTrackRegTrash(regLo);
+
+    /* Do we need an overflow check */
+
+    if (tree->gtOverflow())
+    {
+        // Keep regLo [and regHi] locked while generating code for the gtOverflow() case
+        //
+        regSet.rsLockReg(genRegMask(regLo));
+
+        if (tree->gtFlags & GTF_MUL_64RSLT)
+            regSet.rsLockReg(genRegMask(regHi));
+
+        regNumber regTmpHi = regHi;
+        if ((tree->gtFlags & GTF_UNSIGNED) == 0)
+        {
+            getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, regLo, 0x80000000);
+            regTmpHi = regSet.rsPickReg(RBM_ALLINT);
+            getEmitter()->emitIns_R_R_I(INS_adc, EA_4BYTE, regTmpHi, regHi, 0);
+            regTracker.rsTrackRegTrash(regTmpHi);
+        }
+        getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, regTmpHi, 0);
+
+        // Jump to the block which will throw the expection
+        emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
+        genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
+
+        // Unlock regLo [and regHi] after generating code for the gtOverflow() case
+        //
+        regSet.rsUnlockReg(genRegMask(regLo));
+
+        if (tree->gtFlags & GTF_MUL_64RSLT)
+            regSet.rsUnlockReg(genRegMask(regHi));
+    }
+
+    genUpdateLife(tree);
+
+    if (tree->gtFlags & GTF_MUL_64RSLT)
+        genMarkTreeInRegPair(tree, gen2regs2pair(regLo, regHi));
+    else
+        genMarkTreeInReg(tree, regLo);
+}
+#endif // _TARGET_ARM_
+
+/*****************************************************************************
+ *
+ *  Generate code for a simple binary arithmetic or logical operator.
+ *  Handles GT_AND, GT_OR, GT_XOR, GT_ADD, GT_SUB, GT_MUL.
+ */
+
+void CodeGen::genCodeForTreeSmpBinArithLogOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+    instruction     ins;
+    genTreeOps      oper     = tree->OperGet();
+    const var_types treeType = tree->TypeGet();
+    GenTreePtr      op1      = tree->gtOp.gtOp1;
+    GenTreePtr      op2      = tree->gtGetOp2();
+    insFlags        flags    = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+    regNumber       reg      = DUMMY_INIT(REG_CORRUPT);
+    regMaskTP       needReg  = destReg;
+
+    /* Figure out what instruction to generate */
+
+    bool isArith;
+    switch (oper)
+    {
+        case GT_AND:
+            ins     = INS_AND;
+            isArith = false;
+            break;
+        case GT_OR:
+            ins     = INS_OR;
+            isArith = false;
+            break;
+        case GT_XOR:
+            ins     = INS_XOR;
+            isArith = false;
+            break;
+        case GT_ADD:
+            ins     = INS_add;
+            isArith = true;
+            break;
+        case GT_SUB:
+            ins     = INS_sub;
+            isArith = true;
+            break;
+        case GT_MUL:
+            ins     = INS_MUL;
+            isArith = true;
+            break;
+        default:
+            unreached();
+    }
+
+#ifdef _TARGET_XARCH_
+    /* Special case: try to use the 3 operand form "imul reg, op1, icon" */
+
+    if ((oper == GT_MUL) &&
+        op2->IsIntCnsFitsInI32() &&              // op2 is a constant that fits in a sign-extended 32-bit immediate
+        !op1->IsCnsIntOrI() &&                   // op1 is not a constant
+        (tree->gtFlags & GTF_MUL_64RSLT) == 0 && // tree not marked with MUL_64RSLT
+        !varTypeIsByte(treeType) &&              // No encoding for say "imul al,al,imm"
+        !tree->gtOverflow())                     // 3 operand imul doesn't set flags
+    {
+        /* Make the first operand addressable */
+
+        regMaskTP addrReg = genMakeRvalueAddressable(op1, needReg & ~op2->gtRsvdRegs, RegSet::FREE_REG, false);
+
+        /* Grab a register for the target */
+
+        reg = regSet.rsPickReg(needReg, bestReg);
+
+#if LEA_AVAILABLE
+        /* Compute the value into the target: reg=op1*op2_icon */
+        if (op2->gtIntCon.gtIconVal == 3 || op2->gtIntCon.gtIconVal == 5 || op2->gtIntCon.gtIconVal == 9)
+        {
+            regNumber regSrc;
+            if (op1->gtFlags & GTF_REG_VAL)
+            {
+                regSrc = op1->gtRegNum;
+            }
+            else
+            {
+                inst_RV_TT(INS_mov, reg, op1, 0, emitActualTypeSize(op1->TypeGet()));
+                regSrc = reg;
+            }
+            getEmitter()->emitIns_R_ARX(INS_lea, emitActualTypeSize(treeType), reg, regSrc, regSrc,
+                                        (op2->gtIntCon.gtIconVal & -2), 0);
+        }
+        else
+#endif // LEA_AVAILABLE
+        {
+            /* Compute the value into the target: reg=op1*op2_icon */
+            inst_RV_TT_IV(INS_MUL, reg, op1, (int)op2->gtIntCon.gtIconVal);
+        }
+
+        /* The register has been trashed now */
+
+        regTracker.rsTrackRegTrash(reg);
+
+        /* The address is no longer live */
+
+        genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
+
+        genCodeForTree_DONE(tree, reg);
+        return;
+    }
+#endif // _TARGET_XARCH_
+
+    bool ovfl = false;
+
+    if (isArith)
+    {
+        // We only reach here for GT_ADD, GT_SUB and GT_MUL.
+        assert((oper == GT_ADD) || (oper == GT_SUB) || (oper == GT_MUL));
+
+        ovfl = tree->gtOverflow();
+
+        /* We record the accurate (small) types in trees only we need to
+         * check for overflow. Otherwise we record genActualType()
+         */
+
+        noway_assert(ovfl || (treeType == genActualType(treeType)));
+
+#if LEA_AVAILABLE
+
+        /* Can we use an 'lea' to compute the result?
+           Can't use 'lea' for overflow as it doesn't set flags
+           Can't use 'lea' unless we have at least two free registers */
+        {
+            bool bEnoughRegs = genRegCountForLiveIntEnregVars(tree) + // Live intreg variables
+                                   genCountBits(regSet.rsMaskLock) +  // Locked registers
+                                   2                                  // We will need two regisers
+                               <= genCountBits(RBM_ALLINT & ~(doubleAlignOrFramePointerUsed() ? RBM_FPBASE : 0));
+
+            regMaskTP regs = RBM_NONE; // OUT argument
+            if (!ovfl && bEnoughRegs && genMakeIndAddrMode(tree, NULL, true, needReg, RegSet::FREE_REG, &regs, false))
+            {
+                emitAttr size;
+
+                /* Is the value now computed in some register? */
+
+                if (tree->gtFlags & GTF_REG_VAL)
+                {
+                    genCodeForTree_REG_VAR1(tree);
+                    return;
+                }
+
+                /* If we can reuse op1/2's register directly, and 'tree' is
+                   a simple expression (ie. not in scaled index form),
+                   might as well just use "add" instead of "lea" */
+
+                // However, if we're in a context where we want to evaluate "tree" into a specific
+                // register different from the reg we'd use in this optimization, then it doesn't
+                // make sense to do the "add", since we'd also have to do a "mov."
+                if (op1->gtFlags & GTF_REG_VAL)
+                {
+                    reg = op1->gtRegNum;
+
+                    if ((genRegMask(reg) & regSet.rsRegMaskFree()) && (genRegMask(reg) & needReg))
+                    {
+                        if (op2->gtFlags & GTF_REG_VAL)
+                        {
+                            /* Simply add op2 to the register */
+
+                            inst_RV_TT(INS_add, reg, op2, 0, emitTypeSize(treeType), flags);
+
+                            if (tree->gtSetFlags())
+                                genFlagsEqualToReg(tree, reg);
+
+                            goto DONE_LEA_ADD;
+                        }
+                        else if (op2->OperGet() == GT_CNS_INT)
+                        {
+                            /* Simply add op2 to the register */
+
+                            genIncRegBy(reg, op2->gtIntCon.gtIconVal, tree, treeType);
+
+                            goto DONE_LEA_ADD;
+                        }
+                    }
+                }
+
+                if (op2->gtFlags & GTF_REG_VAL)
+                {
+                    reg = op2->gtRegNum;
+
+                    if ((genRegMask(reg) & regSet.rsRegMaskFree()) && (genRegMask(reg) & needReg))
+                    {
+                        if (op1->gtFlags & GTF_REG_VAL)
+                        {
+                            /* Simply add op1 to the register */
+
+                            inst_RV_TT(INS_add, reg, op1, 0, emitTypeSize(treeType), flags);
+
+                            if (tree->gtSetFlags())
+                                genFlagsEqualToReg(tree, reg);
+
+                            goto DONE_LEA_ADD;
+                        }
+                    }
+                }
+
+                // The expression either requires a scaled-index form, or the
+                // op1 or op2's register can't be targeted, this can be
+                // caused when op1 or op2 are enregistered variables.
+
+                reg  = regSet.rsPickReg(needReg, bestReg);
+                size = emitActualTypeSize(treeType);
+
+                /* Generate "lea reg, [addr-mode]" */
+
+                inst_RV_AT(INS_lea, size, treeType, reg, tree, 0, flags);
+
+#ifndef _TARGET_XARCH_
+                // Don't call genFlagsEqualToReg on x86/x64
+                //  as it does not set the flags
+                if (tree->gtSetFlags())
+                    genFlagsEqualToReg(tree, reg);
+#endif
+
+            DONE_LEA_ADD:
+                /* The register has been trashed now */
+                regTracker.rsTrackRegTrash(reg);
+
+                genDoneAddressable(tree, regs, RegSet::FREE_REG);
+
+                /* The following could be an 'inner' pointer!!! */
+
+                noway_assert(treeType == TYP_BYREF || !varTypeIsGC(treeType));
+
+                if (treeType == TYP_BYREF)
+                {
+                    genUpdateLife(tree);
+
+                    gcInfo.gcMarkRegSetNpt(genRegMask(reg)); // in case "reg" was a TYP_GCREF before
+                    gcInfo.gcMarkRegPtrVal(reg, TYP_BYREF);
+                }
+
+                genCodeForTree_DONE(tree, reg);
+                return;
+            }
+        }
+
+#endif // LEA_AVAILABLE
+
+        noway_assert((varTypeIsGC(treeType) == false) || (treeType == TYP_BYREF && (ins == INS_add || ins == INS_sub)));
+    }
+
+    /* The following makes an assumption about gtSetEvalOrder(this) */
+
+    noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
+
+    /* Compute a useful register mask */
+    needReg = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
+    needReg = regSet.rsNarrowHint(needReg, regSet.rsRegMaskFree());
+
+    // Determine what registers go live between op1 and op2
+    // Don't bother checking if op1 is already in a register.
+    // This is not just for efficiency; if it's already in a
+    // register then it may already be considered "evaluated"
+    // for the purposes of liveness, in which genNewLiveRegMask
+    // will assert
+    if (!op1->InReg())
+    {
+        regMaskTP newLiveMask = genNewLiveRegMask(op1, op2);
+        if (newLiveMask)
+        {
+            needReg = regSet.rsNarrowHint(needReg, ~newLiveMask);
+        }
+    }
+
+#if CPU_HAS_BYTE_REGS
+    /* 8-bit operations can only be done in the byte-regs */
+    if (varTypeIsByte(treeType))
+        needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
+#endif // CPU_HAS_BYTE_REGS
+
+    // Try selecting one of the 'bestRegs'
+    needReg = regSet.rsNarrowHint(needReg, bestReg);
+
+    /* Special case: small_val & small_mask */
+
+    if (varTypeIsSmall(op1->TypeGet()) && op2->IsCnsIntOrI() && oper == GT_AND)
+    {
+        size_t    and_val = op2->gtIntCon.gtIconVal;
+        size_t    andMask;
+        var_types typ = op1->TypeGet();
+
+        switch (typ)
+        {
+            case TYP_BOOL:
+            case TYP_BYTE:
+            case TYP_UBYTE:
+                andMask = 0x000000FF;
+                break;
+            case TYP_SHORT:
+            case TYP_CHAR:
+                andMask = 0x0000FFFF;
+                break;
+            default:
+                noway_assert(!"unexpected type");
+                return;
+        }
+
+        // Is the 'and_val' completely contained within the bits found in 'andMask'
+        if ((and_val & ~andMask) == 0)
+        {
+            // We must use unsigned instructions when loading op1
+            if (varTypeIsByte(typ))
+            {
+                op1->gtType = TYP_UBYTE;
+            }
+            else // varTypeIsShort(typ)
+            {
+                assert(varTypeIsShort(typ));
+                op1->gtType = TYP_CHAR;
+            }
+
+            /* Generate the first operand into a scratch register */
+
+            op1 = genCodeForCommaTree(op1);
+            genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
+
+            noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+            regNumber op1Reg = op1->gtRegNum;
+
+            // Did we end up in an acceptable register?
+            // and do we have an acceptable free register available to grab?
+            //
+            if (((genRegMask(op1Reg) & needReg) == 0) && ((regSet.rsRegMaskFree() & needReg) != 0))
+            {
+                // See if we can pick a register from bestReg
+                bestReg &= needReg;
+
+                // Grab an acceptable register
+                regNumber newReg;
+                if ((bestReg & regSet.rsRegMaskFree()) != 0)
+                    newReg = regSet.rsGrabReg(bestReg);
+                else
+                    newReg = regSet.rsGrabReg(needReg);
+
+                noway_assert(op1Reg != newReg);
+
+                /* Update the value in the target register */
+
+                regTracker.rsTrackRegCopy(newReg, op1Reg);
+
+                inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
+
+                /* The value has been transferred to 'reg' */
+
+                if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
+                    gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
+
+                gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
+
+                /* The value is now in an appropriate register */
+
+                op1->gtRegNum = newReg;
+            }
+            noway_assert(op1->gtFlags & GTF_REG_VAL);
+            genUpdateLife(op1);
+
+            /* Mark the register as 'used' */
+            regSet.rsMarkRegUsed(op1);
+            reg = op1->gtRegNum;
+
+            if (and_val != andMask) // Does the "and" mask only cover some of the bits?
+            {
+                /* "and" the value */
+
+                inst_RV_IV(INS_AND, reg, and_val, EA_4BYTE, flags);
+            }
+
+#ifdef DEBUG
+            /* Update the live set of register variables */
+            if (compiler->opts.varNames)
+                genUpdateLife(tree);
+#endif
+
+            /* Now we can update the register pointer information */
+
+            genReleaseReg(op1);
+            gcInfo.gcMarkRegPtrVal(reg, treeType);
+
+            genCodeForTree_DONE_LIFE(tree, reg);
+            return;
+        }
+    }
+
+#ifdef _TARGET_XARCH_
+
+    // Do we have to use the special "imul" instruction
+    // which has eax as the implicit operand ?
+    //
+    bool multEAX = false;
+
+    if (oper == GT_MUL)
+    {
+        if (tree->gtFlags & GTF_MUL_64RSLT)
+        {
+            /* Only multiplying with EAX will leave the 64-bit
+             * result in EDX:EAX */
+
+            multEAX = true;
+        }
+        else if (ovfl)
+        {
+            if (tree->gtFlags & GTF_UNSIGNED)
+            {
+                /* "mul reg/mem" always has EAX as default operand */
+
+                multEAX = true;
+            }
+            else if (varTypeIsSmall(treeType))
+            {
+                /* Only the "imul with EAX" encoding has the 'w' bit
+                 * to specify the size of the operands */
+
+                multEAX = true;
+            }
+        }
+    }
+
+    if (multEAX)
+    {
+        noway_assert(oper == GT_MUL);
+
+        return genCodeForMultEAX(tree);
+    }
+#endif // _TARGET_XARCH_
+
+#ifdef _TARGET_ARM_
+
+    // Do we have to use the special 32x32 => 64 bit multiply
+    //
+    bool mult64 = false;
+
+    if (oper == GT_MUL)
+    {
+        if (tree->gtFlags & GTF_MUL_64RSLT)
+        {
+            mult64 = true;
+        }
+        else if (ovfl)
+        {
+            // We always must use the 32x32 => 64 bit multiply
+            // to detect overflow
+            mult64 = true;
+        }
+    }
+
+    if (mult64)
+    {
+        noway_assert(oper == GT_MUL);
+
+        return genCodeForMult64(tree, destReg, bestReg);
+    }
+#endif // _TARGET_ARM_
+
+    /* Generate the first operand into a scratch register */
+
+    op1 = genCodeForCommaTree(op1);
+    genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::FREE_REG, true);
+
+    noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+    regNumber op1Reg = op1->gtRegNum;
+
+    // Setup needReg with the set of register that we require for op1 to be in
+    //
+    needReg = RBM_ALLINT;
+
+    /* Compute a useful register mask */
+    needReg = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
+    needReg = regSet.rsNarrowHint(needReg, regSet.rsRegMaskFree());
+
+#if CPU_HAS_BYTE_REGS
+    /* 8-bit operations can only be done in the byte-regs */
+    if (varTypeIsByte(treeType))
+        needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
+#endif // CPU_HAS_BYTE_REGS
+
+    // Did we end up in an acceptable register?
+    // and do we have an acceptable free register available to grab?
+    //
+    if (((genRegMask(op1Reg) & needReg) == 0) && ((regSet.rsRegMaskFree() & needReg) != 0))
+    {
+        // See if we can pick a register from bestReg
+        bestReg &= needReg;
+
+        // Grab an acceptable register
+        regNumber newReg;
+        if ((bestReg & regSet.rsRegMaskFree()) != 0)
+            newReg = regSet.rsGrabReg(bestReg);
+        else
+            newReg = regSet.rsGrabReg(needReg);
+
+        noway_assert(op1Reg != newReg);
+
+        /* Update the value in the target register */
+
+        regTracker.rsTrackRegCopy(newReg, op1Reg);
+
+        inst_RV_RV(ins_Copy(op1->TypeGet()), newReg, op1Reg, op1->TypeGet());
+
+        /* The value has been transferred to 'reg' */
+
+        if ((genRegMask(op1Reg) & regSet.rsMaskUsed) == 0)
+            gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
+
+        gcInfo.gcMarkRegPtrVal(newReg, op1->TypeGet());
+
+        /* The value is now in an appropriate register */
+
+        op1->gtRegNum = newReg;
+    }
+    noway_assert(op1->gtFlags & GTF_REG_VAL);
+    op1Reg = op1->gtRegNum;
+
+    genUpdateLife(op1);
+
+    /* Mark the register as 'used' */
+    regSet.rsMarkRegUsed(op1);
+
+    bool isSmallConst = false;
+
+#ifdef _TARGET_ARM_
+    if ((op2->gtOper == GT_CNS_INT) && arm_Valid_Imm_For_Instr(ins, op2->gtIntCon.gtIconVal, INS_FLAGS_DONT_CARE))
+    {
+        isSmallConst = true;
+    }
+#endif
+    /* Make the second operand addressable */
+
+    regMaskTP addrReg = genMakeRvalueAddressable(op2, RBM_ALLINT, RegSet::KEEP_REG, isSmallConst);
+
+#if CPU_LOAD_STORE_ARCH
+    genRecoverReg(op1, RBM_ALLINT, RegSet::KEEP_REG);
+#else  // !CPU_LOAD_STORE_ARCH
+    /* Is op1 spilled and op2 in a register? */
+
+    if ((op1->gtFlags & GTF_SPILLED) && (op2->gtFlags & GTF_REG_VAL) && (ins != INS_sub))
+    {
+        noway_assert(ins == INS_add || ins == INS_MUL || ins == INS_AND || ins == INS_OR || ins == INS_XOR);
+
+        // genMakeRvalueAddressable(GT_LCL_VAR) shouldn't spill anything
+        noway_assert(op2->gtOper != GT_LCL_VAR ||
+                     varTypeIsSmall(compiler->lvaTable[op2->gtLclVarCommon.gtLclNum].TypeGet()));
+
+        reg               = op2->gtRegNum;
+        regMaskTP regMask = genRegMask(reg);
+
+        /* Is the register holding op2 available? */
+
+        if (regMask & regSet.rsMaskVars)
+        {
+        }
+        else
+        {
+            /* Get the temp we spilled into. */
+
+            TempDsc* temp = regSet.rsUnspillInPlace(op1, op1->gtRegNum);
+
+            /* For 8bit operations, we need to make sure that op2 is
+               in a byte-addressable registers */
+
+            if (varTypeIsByte(treeType) && !(regMask & RBM_BYTE_REGS))
+            {
+                regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
+
+                inst_RV_RV(INS_mov, byteReg, reg);
+                regTracker.rsTrackRegTrash(byteReg);
+
+                /* op2 couldn't have spilled as it was not sitting in
+                   RBM_BYTE_REGS, and regSet.rsGrabReg() will only spill its args */
+                noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+                regSet.rsUnlockReg(regMask);
+                regSet.rsMarkRegFree(regMask);
+
+                reg           = byteReg;
+                regMask       = genRegMask(reg);
+                op2->gtRegNum = reg;
+                regSet.rsMarkRegUsed(op2);
+            }
+
+            inst_RV_ST(ins, reg, temp, 0, treeType);
+
+            regTracker.rsTrackRegTrash(reg);
+
+            /* Free the temp */
+
+            compiler->tmpRlsTemp(temp);
+
+            /* 'add'/'sub' set all CC flags, others only ZF */
+
+            /* If we need to check overflow, for small types, the
+             * flags can't be used as we perform the arithmetic
+             * operation (on small registers) and then sign extend it
+             *
+             * NOTE : If we ever don't need to sign-extend the result,
+             * we can use the flags
+             */
+
+            if (tree->gtSetFlags())
+            {
+                genFlagsEqualToReg(tree, reg);
+            }
+
+            /* The result is where the second operand is sitting. Mark result reg as free */
+            regSet.rsMarkRegFree(genRegMask(reg));
+
+            gcInfo.gcMarkRegPtrVal(reg, treeType);
+
+            goto CHK_OVF;
+        }
+    }
+#endif // !CPU_LOAD_STORE_ARCH
+
+    /* Make sure the first operand is still in a register */
+    regSet.rsLockUsedReg(addrReg);
+    genRecoverReg(op1, 0, RegSet::KEEP_REG);
+    noway_assert(op1->gtFlags & GTF_REG_VAL);
+    regSet.rsUnlockUsedReg(addrReg);
+
+    reg = op1->gtRegNum;
+
+    // For 8 bit operations, we need to pick byte addressable registers
+
+    if (varTypeIsByte(treeType) && !(genRegMask(reg) & RBM_BYTE_REGS))
+    {
+        regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
+
+        inst_RV_RV(INS_mov, byteReg, reg);
+
+        regTracker.rsTrackRegTrash(byteReg);
+        regSet.rsMarkRegFree(genRegMask(reg));
+
+        reg           = byteReg;
+        op1->gtRegNum = reg;
+        regSet.rsMarkRegUsed(op1);
+    }
+
+    /* Make sure the operand is still addressable */
+    addrReg = genKeepAddressable(op2, addrReg, genRegMask(reg));
+
+    /* Free up the operand, if it's a regvar */
+
+    genUpdateLife(op2);
+
+    /* The register is about to be trashed */
+
+    regTracker.rsTrackRegTrash(reg);
+
+    bool op2Released = false;
+
+    // For overflow instructions, tree->gtType is the accurate type,
+    // and gives us the size for the operands.
+
+    emitAttr opSize = emitTypeSize(treeType);
+
+    /* Compute the new value */
+
+    if (isArith && !op2->InReg() && (op2->OperKind() & GTK_CONST)
+#if !CPU_HAS_FP_SUPPORT
+        && (treeType == TYP_INT || treeType == TYP_I_IMPL)
+#endif
+            )
+    {
+        ssize_t ival = op2->gtIntCon.gtIconVal;
+
+        if (oper == GT_ADD)
+        {
+            genIncRegBy(reg, ival, tree, treeType, ovfl);
+        }
+        else if (oper == GT_SUB)
+        {
+            if (ovfl && ((tree->gtFlags & GTF_UNSIGNED) ||
+                         (ival == ((treeType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN))) // -0x80000000 == 0x80000000.
+                                                                                      // Therefore we can't use -ival.
+                )
+            {
+                /* For unsigned overflow, we have to use INS_sub to set
+                   the flags correctly */
+
+                genDecRegBy(reg, ival, tree);
+            }
+            else
+            {
+                /* Else, we simply add the negative of the value */
+
+                genIncRegBy(reg, -ival, tree, treeType, ovfl);
+            }
+        }
+        else if (oper == GT_MUL)
+        {
+            genMulRegBy(reg, ival, tree, treeType, ovfl);
+        }
+    }
+    else
+    {
+        // op2 could be a GT_COMMA (i.e. an assignment for a CSE def)
+        op2 = op2->gtEffectiveVal();
+        if (varTypeIsByte(treeType) && op2->InReg())
+        {
+            noway_assert(genRegMask(reg) & RBM_BYTE_REGS);
+
+            regNumber op2reg     = op2->gtRegNum;
+            regMaskTP op2regMask = genRegMask(op2reg);
+
+            if (!(op2regMask & RBM_BYTE_REGS))
+            {
+                regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
+
+                inst_RV_RV(INS_mov, byteReg, op2reg);
+                regTracker.rsTrackRegTrash(byteReg);
+
+                genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
+                op2Released = true;
+
+                op2->gtRegNum = byteReg;
+            }
+        }
+
+        inst_RV_TT(ins, reg, op2, 0, opSize, flags);
+    }
+
+    /* Free up anything that was tied up by the operand */
+
+    if (!op2Released)
+        genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
+
+    /* The result will be where the first operand is sitting */
+
+    /* We must use RegSet::KEEP_REG since op1 can have a GC pointer here */
+    genRecoverReg(op1, 0, RegSet::KEEP_REG);
+
+    reg = op1->gtRegNum;
+
+    /* 'add'/'sub' set all CC flags, others only ZF+SF */
+
+    if (tree->gtSetFlags())
+        genFlagsEqualToReg(tree, reg);
+
+    genReleaseReg(op1);
+
+#if !CPU_LOAD_STORE_ARCH
+CHK_OVF:
+#endif // !CPU_LOAD_STORE_ARCH
+
+    /* Do we need an overflow check */
+
+    if (ovfl)
+        genCheckOverflow(tree);
+
+    genCodeForTree_DONE(tree, reg);
+}
+
+/*****************************************************************************
+ *
+ *  Generate code for a simple binary arithmetic or logical assignment operator: x <op>= y.
+ *  Handles GT_ASG_AND, GT_ASG_OR, GT_ASG_XOR, GT_ASG_ADD, GT_ASG_SUB.
+ */
+
+void CodeGen::genCodeForTreeSmpBinArithLogAsgOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+    instruction      ins;
+    const genTreeOps oper     = tree->OperGet();
+    const var_types  treeType = tree->TypeGet();
+    GenTreePtr       op1      = tree->gtOp.gtOp1;
+    GenTreePtr       op2      = tree->gtGetOp2();
+    insFlags         flags    = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+    regNumber        reg      = DUMMY_INIT(REG_CORRUPT);
+    regMaskTP        needReg  = destReg;
+    regMaskTP        addrReg;
+
+    /* Figure out what instruction to generate */
+
+    bool isArith;
+    switch (oper)
+    {
+        case GT_ASG_AND:
+            ins     = INS_AND;
+            isArith = false;
+            break;
+        case GT_ASG_OR:
+            ins     = INS_OR;
+            isArith = false;
+            break;
+        case GT_ASG_XOR:
+            ins     = INS_XOR;
+            isArith = false;
+            break;
+        case GT_ASG_ADD:
+            ins     = INS_add;
+            isArith = true;
+            break;
+        case GT_ASG_SUB:
+            ins     = INS_sub;
+            isArith = true;
+            break;
+        default:
+            unreached();
+    }
+
+    bool ovfl = false;
+
+    if (isArith)
+    {
+        // We only reach here for GT_ASG_SUB, GT_ASG_ADD.
+
+        ovfl = tree->gtOverflow();
+
+        // We can't use += with overflow if the value cannot be changed
+        // in case of an overflow-exception which the "+" might cause
+        noway_assert(!ovfl ||
+                     ((op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_LCL_FLD) && !compiler->compCurBB->hasTryIndex()));
+
+        /* Do not allow overflow instructions with refs/byrefs */
+
+        noway_assert(!ovfl || !varTypeIsGC(treeType));
+
+        // We disallow overflow and byte-ops here as it is too much trouble
+        noway_assert(!ovfl || !varTypeIsByte(treeType));
+
+        /* Is the second operand a constant? */
+
+        if (op2->IsIntCnsFitsInI32())
+        {
+            int ival = (int)op2->gtIntCon.gtIconVal;
+
+            /* What is the target of the assignment? */
+
+            switch (op1->gtOper)
+            {
+                case GT_REG_VAR:
+
+                REG_VAR4:
+
+                    reg = op1->gtRegVar.gtRegNum;
+
+                    /* No registers are needed for addressing */
+
+                    addrReg = RBM_NONE;
+#if !CPU_LOAD_STORE_ARCH
+                INCDEC_REG:
+#endif
+                    /* We're adding a constant to a register */
+
+                    if (oper == GT_ASG_ADD)
+                        genIncRegBy(reg, ival, tree, treeType, ovfl);
+                    else if (ovfl && ((tree->gtFlags & GTF_UNSIGNED) ||
+                                      ival == ((treeType == TYP_INT) ? INT32_MIN : SSIZE_T_MIN)) // -0x80000000 ==
+                                                                                                 // 0x80000000.
+                                                                                                 // Therefore we can't
+                                                                                                 // use -ival.
+                             )
+                        /* For unsigned overflow, we have to use INS_sub to set
+                            the flags correctly */
+                        genDecRegBy(reg, ival, tree);
+                    else
+                        genIncRegBy(reg, -ival, tree, treeType, ovfl);
+
+                    break;
+
+                case GT_LCL_VAR:
+
+                    /* Does the variable live in a register? */
+
+                    if (genMarkLclVar(op1))
+                        goto REG_VAR4;
+
+                    __fallthrough;
+
+                default:
+
+                    /* Make the target addressable for load/store */
+                    addrReg = genMakeAddressable2(op1, needReg, RegSet::KEEP_REG, true, true);
+
+#if !CPU_LOAD_STORE_ARCH
+                    // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory
+
+                    /* For small types with overflow check, we need to
+                        sign/zero extend the result, so we need it in a reg */
+
+                    if (ovfl && genTypeSize(treeType) < sizeof(int))
+#endif // !CPU_LOAD_STORE_ARCH
+                    {
+                        // Load op1 into a reg
+
+                        reg = regSet.rsGrabReg(RBM_ALLINT & ~addrReg);
+
+                        inst_RV_TT(INS_mov, reg, op1);
+
+                        // Issue the add/sub and the overflow check
+
+                        inst_RV_IV(ins, reg, ival, emitActualTypeSize(treeType), flags);
+                        regTracker.rsTrackRegTrash(reg);
+
+                        if (ovfl)
+                        {
+                            genCheckOverflow(tree);
+                        }
+
+                        /* Store the (sign/zero extended) result back to
+                            the stack location of the variable */
+
+                        inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
+
+                        break;
+                    }
+#if !CPU_LOAD_STORE_ARCH
+                    else
+                    {
+                        /* Add/subtract the new value into/from the target */
+
+                        if (op1->gtFlags & GTF_REG_VAL)
+                        {
+                            reg = op1->gtRegNum;
+                            goto INCDEC_REG;
+                        }
+
+                        /* Special case: inc/dec (up to P3, or for small code, or blended code outside loops) */
+                        if (!ovfl && (ival == 1 || ival == -1) &&
+                            !compiler->optAvoidIncDec(compiler->compCurBB->getBBWeight(compiler)))
+                        {
+                            noway_assert(oper == GT_ASG_SUB || oper == GT_ASG_ADD);
+                            if (oper == GT_ASG_SUB)
+                                ival = -ival;
+
+                            ins = (ival > 0) ? INS_inc : INS_dec;
+                            inst_TT(ins, op1);
+                        }
+                        else
+                        {
+                            inst_TT_IV(ins, op1, ival);
+                        }
+
+                        if ((op1->gtOper == GT_LCL_VAR) && (!ovfl || treeType == TYP_INT))
+                        {
+                            if (tree->gtSetFlags())
+                                genFlagsEqualToVar(tree, op1->gtLclVarCommon.gtLclNum);
+                        }
+
+                        break;
+                    }
+#endif        // !CPU_LOAD_STORE_ARCH
+            } // end switch (op1->gtOper)
+
+            genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+            genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
+            return;
+        } // end if (op2->IsIntCnsFitsInI32())
+    }     // end if (isArith)
+
+    noway_assert(!varTypeIsGC(treeType) || ins == INS_sub || ins == INS_add);
+
+    /* Is the target a register or local variable? */
+
+    switch (op1->gtOper)
+    {
+        case GT_LCL_VAR:
+
+            /* Does the target variable live in a register? */
+
+            if (!genMarkLclVar(op1))
+                break;
+
+            __fallthrough;
+
+        case GT_REG_VAR:
+
+            /* Get hold of the target register */
+
+            reg = op1->gtRegVar.gtRegNum;
+
+            /* Make sure the target of the store is available */
+
+            if (regSet.rsMaskUsed & genRegMask(reg))
+            {
+                regSet.rsSpillReg(reg);
+            }
+
+            /* Make the RHS addressable */
+
+            addrReg = genMakeRvalueAddressable(op2, 0, RegSet::KEEP_REG, false);
+
+            /* Compute the new value into the target register */
+            CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if CPU_HAS_BYTE_REGS
+
+            // Fix 383833 X86 ILGEN
+            regNumber reg2;
+            if ((op2->gtFlags & GTF_REG_VAL) != 0)
+            {
+                reg2 = op2->gtRegNum;
+            }
+            else
+            {
+                reg2 = REG_STK;
+            }
+
+            // We can only generate a byte ADD,SUB,OR,AND operation when reg and reg2 are both BYTE registers
+            // when op2 is in memory then reg2==REG_STK and we will need to force op2 into a register
+            //
+            if (varTypeIsByte(treeType) &&
+                (((genRegMask(reg) & RBM_BYTE_REGS) == 0) || ((genRegMask(reg2) & RBM_BYTE_REGS) == 0)))
+            {
+                // We will force op2 into a register (via sign/zero extending load)
+                // for the cases where op2 is in memory and thus could have
+                // an unmapped page just beyond its location
+                //
+                if ((op2->OperIsIndir() || (op2->gtOper == GT_CLS_VAR)) && varTypeIsSmall(op2->TypeGet()))
+                {
+                    genCodeForTree(op2, 0);
+                    assert((op2->gtFlags & GTF_REG_VAL) != 0);
+                }
+
+                inst_RV_TT(ins, reg, op2, 0, EA_4BYTE, flags);
+
+                bool canOmit = false;
+
+                if (varTypeIsUnsigned(treeType))
+                {
+                    // When op2 is a byte sized constant we can omit the zero extend instruction
+                    if ((op2->gtOper == GT_CNS_INT) && ((op2->gtIntCon.gtIconVal & 0xFF) == op2->gtIntCon.gtIconVal))
+                    {
+                        canOmit = true;
+                    }
+                }
+                else // treeType is signed
+                {
+                    // When op2 is a positive 7-bit or smaller constant
+                    // we can omit the sign extension sequence.
+                    if ((op2->gtOper == GT_CNS_INT) && ((op2->gtIntCon.gtIconVal & 0x7F) == op2->gtIntCon.gtIconVal))
+                    {
+                        canOmit = true;
+                    }
+                }
+
+                if (!canOmit)
+                {
+                    // If reg is a byte reg then we can use a movzx/movsx instruction
+                    //
+                    if ((genRegMask(reg) & RBM_BYTE_REGS) != 0)
+                    {
+                        instruction extendIns = ins_Move_Extend(treeType, true);
+                        inst_RV_RV(extendIns, reg, reg, treeType, emitTypeSize(treeType));
+                    }
+                    else // we can't encode a movzx/movsx instruction
+                    {
+                        if (varTypeIsUnsigned(treeType))
+                        {
+                            // otherwise, we must zero the upper 24 bits of 'reg'
+                            inst_RV_IV(INS_AND, reg, 0xFF, EA_4BYTE);
+                        }
+                        else // treeType is signed
+                        {
+                            // otherwise, we must sign extend the result in the non-byteable register 'reg'
+                            // We will shift the register left 24 bits, thus putting the sign-bit into the high bit
+                            // then we do an arithmetic shift back 24 bits which propagate the sign bit correctly.
+                            //
+                            inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, reg, 24);
+                            inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, reg, 24);
+                        }
+                    }
+                }
+            }
+            else
+#endif // CPU_HAS_BYTE_REGS
+            {
+                inst_RV_TT(ins, reg, op2, 0, emitTypeSize(treeType), flags);
+            }
+
+            /* The zero flag is now equal to the register value */
+
+            if (tree->gtSetFlags())
+                genFlagsEqualToReg(tree, reg);
+
+            /* Remember that we trashed the target */
+
+            regTracker.rsTrackRegTrash(reg);
+
+            /* Free up anything that was tied up by the RHS */
+
+            genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
+
+            genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
+            return;
+
+        default:
+            break;
+    } // end switch (op1->gtOper)
+
+#if !CPU_LOAD_STORE_ARCH
+    /* Special case: "x ^= -1" is actually "not(x)" */
+
+    if (oper == GT_ASG_XOR)
+    {
+        if (op2->gtOper == GT_CNS_INT && op2->gtIntCon.gtIconVal == -1)
+        {
+            addrReg = genMakeAddressable(op1, RBM_ALLINT, RegSet::KEEP_REG, true);
+            inst_TT(INS_NOT, op1);
+            genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+            genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, ovfl);
+            return;
+        }
+    }
+#endif // !CPU_LOAD_STORE_ARCH
+
+    /* Setup target mask for op2 (byte-regs for small operands) */
+
+    unsigned needMask;
+    needMask = (varTypeIsByte(treeType)) ? RBM_BYTE_REGS : RBM_ALLINT;
+
+    /* Is the second operand a constant? */
+
+    if (op2->IsIntCnsFitsInI32())
+    {
+        int ival = (int)op2->gtIntCon.gtIconVal;
+
+        /* Make the target addressable */
+        addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG, true);
+
+        inst_TT_IV(ins, op1, ival, 0, emitTypeSize(treeType), flags);
+
+        genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
+
+        genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, ovfl);
+        return;
+    }
+
+    /* Is the value or the address to be computed first? */
+
+    if (tree->gtFlags & GTF_REVERSE_OPS)
+    {
+        /* Compute the new value into a register */
+
+        genComputeReg(op2, needMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
+
+        /* Make the target addressable for load/store */
+        addrReg = genMakeAddressable2(op1, 0, RegSet::KEEP_REG, true, true);
+        regSet.rsLockUsedReg(addrReg);
+
+#if !CPU_LOAD_STORE_ARCH
+        // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory
+        /* For small types with overflow check, we need to
+            sign/zero extend the result, so we need it in a reg */
+
+        if (ovfl && genTypeSize(treeType) < sizeof(int))
+#endif // !CPU_LOAD_STORE_ARCH
+        {
+            reg = regSet.rsPickReg();
+            regSet.rsLockReg(genRegMask(reg));
+
+            noway_assert(genIsValidReg(reg));
+
+            /* Generate "ldr reg, [var]" */
+
+            inst_RV_TT(ins_Load(op1->TypeGet()), reg, op1);
+
+            if (op1->gtOper == GT_LCL_VAR)
+                regTracker.rsTrackRegLclVar(reg, op1->gtLclVar.gtLclNum);
+            else
+                regTracker.rsTrackRegTrash(reg);
+
+            /* Make sure the new value is in a register */
+
+            genRecoverReg(op2, 0, RegSet::KEEP_REG);
+
+            /* Compute the new value */
+
+            inst_RV_RV(ins, reg, op2->gtRegNum, treeType, emitTypeSize(treeType), flags);
+
+            if (ovfl)
+                genCheckOverflow(tree);
+
+            /* Move the new value back to the variable */
+            /* Generate "str reg, [var]" */
+
+            inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
+            regSet.rsUnlockReg(genRegMask(reg));
+
+            if (op1->gtOper == GT_LCL_VAR)
+                regTracker.rsTrackRegLclVar(reg, op1->gtLclVarCommon.gtLclNum);
+        }
+#if !CPU_LOAD_STORE_ARCH
+        else
+        {
+            /* Make sure the new value is in a register */
+
+            genRecoverReg(op2, 0, RegSet::KEEP_REG);
+
+            /* Add the new value into the target */
+
+            inst_TT_RV(ins, op1, op2->gtRegNum);
+        }
+#endif // !CPU_LOAD_STORE_ARCH
+        /* Free up anything that was tied up either side */
+        regSet.rsUnlockUsedReg(addrReg);
+        genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+        genReleaseReg(op2);
+    }
+    else
+    {
+        /* Make the target addressable */
+
+        addrReg = genMakeAddressable2(op1, RBM_ALLINT & ~op2->gtRsvdRegs, RegSet::KEEP_REG, true, true);
+
+        /* Compute the new value into a register */
+
+        genComputeReg(op2, needMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
+        regSet.rsLockUsedReg(genRegMask(op2->gtRegNum));
+
+        /* Make sure the target is still addressable */
+
+        addrReg = genKeepAddressable(op1, addrReg);
+        regSet.rsLockUsedReg(addrReg);
+
+#if !CPU_LOAD_STORE_ARCH
+        // For CPU_LOAD_STORE_ARCH, we always load from memory then store to memory
+
+        /* For small types with overflow check, we need to
+            sign/zero extend the result, so we need it in a reg */
+
+        if (ovfl && genTypeSize(treeType) < sizeof(int))
+#endif // !CPU_LOAD_STORE_ARCH
+        {
+            reg = regSet.rsPickReg();
+
+            inst_RV_TT(INS_mov, reg, op1);
+
+            inst_RV_RV(ins, reg, op2->gtRegNum, treeType, emitTypeSize(treeType), flags);
+            regTracker.rsTrackRegTrash(reg);
+
+            if (ovfl)
+                genCheckOverflow(tree);
+
+            inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
+
+            if (op1->gtOper == GT_LCL_VAR)
+                regTracker.rsTrackRegLclVar(reg, op1->gtLclVar.gtLclNum);
+        }
+#if !CPU_LOAD_STORE_ARCH
+        else
+        {
+            /* Add the new value into the target */
+
+            inst_TT_RV(ins, op1, op2->gtRegNum);
+        }
+#endif
+
+        /* Free up anything that was tied up either side */
+        regSet.rsUnlockUsedReg(addrReg);
+        genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+        regSet.rsUnlockUsedReg(genRegMask(op2->gtRegNum));
+        genReleaseReg(op2);
+    }
+
+    genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, reg, ovfl);
+}
+
+/*****************************************************************************
+ *
+ *  Generate code for GT_UMOD.
+ */
+
+void CodeGen::genCodeForUnsignedMod(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+    assert(tree->OperGet() == GT_UMOD);
+
+    GenTreePtr      op1      = tree->gtOp.gtOp1;
+    GenTreePtr      op2      = tree->gtOp.gtOp2;
+    const var_types treeType = tree->TypeGet();
+    regMaskTP       needReg  = destReg;
+    regNumber       reg;
+
+    /* Is this a division by an integer constant? */
+
+    noway_assert(op2);
+    if (compiler->fgIsUnsignedModOptimizable(op2))
+    {
+        /* Generate the operand into some register */
+
+        genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
+        noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+        reg = op1->gtRegNum;
+
+        /* Generate the appropriate sequence */
+        size_t ival = op2->gtIntCon.gtIconVal - 1;
+        inst_RV_IV(INS_AND, reg, ival, emitActualTypeSize(treeType));
+
+        /* The register is now trashed */
+
+        regTracker.rsTrackRegTrash(reg);
+
+        genCodeForTree_DONE(tree, reg);
+        return;
+    }
+
+    genCodeForGeneralDivide(tree, destReg, bestReg);
+}
+
+/*****************************************************************************
+ *
+ *  Generate code for GT_MOD.
+ */
+
+void CodeGen::genCodeForSignedMod(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+    assert(tree->OperGet() == GT_MOD);
+
+    GenTreePtr      op1      = tree->gtOp.gtOp1;
+    GenTreePtr      op2      = tree->gtOp.gtOp2;
+    const var_types treeType = tree->TypeGet();
+    regMaskTP       needReg  = destReg;
+    regNumber       reg;
+
+    /* Is this a division by an integer constant? */
+
+    noway_assert(op2);
+    if (compiler->fgIsSignedModOptimizable(op2))
+    {
+        ssize_t     ival = op2->gtIntCon.gtIconVal;
+        BasicBlock* skip = genCreateTempLabel();
+
+        /* Generate the operand into some register */
+
+        genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
+        noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+        reg = op1->gtRegNum;
+
+        /* Generate the appropriate sequence */
+
+        inst_RV_IV(INS_AND, reg, (int)(ival - 1) | 0x80000000, EA_4BYTE, INS_FLAGS_SET);
+
+        /* The register is now trashed */
+
+        regTracker.rsTrackRegTrash(reg);
+
+        /* Check and branch for a postive value */
+        emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
+        inst_JMP(jmpGEL, skip);
+
+        /* Generate the rest of the sequence and we're done */
+
+        genIncRegBy(reg, -1, NULL, treeType);
+        ival = -ival;
+        if ((treeType == TYP_LONG) && ((int)ival != ival))
+        {
+            regNumber immReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
+            instGen_Set_Reg_To_Imm(EA_8BYTE, immReg, ival);
+            inst_RV_RV(INS_OR, reg, immReg, TYP_LONG);
+        }
+        else
+        {
+            inst_RV_IV(INS_OR, reg, (int)ival, emitActualTypeSize(treeType));
+        }
+        genIncRegBy(reg, 1, NULL, treeType);
+
+        /* Define the 'skip' label and we're done */
+
+        genDefineTempLabel(skip);
+
+        genCodeForTree_DONE(tree, reg);
+        return;
+    }
+
+    genCodeForGeneralDivide(tree, destReg, bestReg);
+}
+
+/*****************************************************************************
+ *
+ *  Generate code for GT_UDIV.
+ */
+
+void CodeGen::genCodeForUnsignedDiv(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+    assert(tree->OperGet() == GT_UDIV);
+
+    GenTreePtr      op1      = tree->gtOp.gtOp1;
+    GenTreePtr      op2      = tree->gtOp.gtOp2;
+    const var_types treeType = tree->TypeGet();
+    regMaskTP       needReg  = destReg;
+    regNumber       reg;
+
+    /* Is this a division by an integer constant? */
+
+    noway_assert(op2);
+    if (compiler->fgIsUnsignedDivOptimizable(op2))
+    {
+        size_t ival = op2->gtIntCon.gtIconVal;
+
+        /* Division by 1 must be handled elsewhere */
+
+        noway_assert(ival != 1 || compiler->opts.MinOpts());
+
+        /* Generate the operand into some register */
+
+        genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
+        noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+        reg = op1->gtRegNum;
+
+        /* Generate "shr reg, log2(value)" */
+
+        inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, emitTypeSize(treeType), reg, genLog2(ival));
+
+        /* The register is now trashed */
+
+        regTracker.rsTrackRegTrash(reg);
+
+        genCodeForTree_DONE(tree, reg);
+        return;
+    }
+
+    genCodeForGeneralDivide(tree, destReg, bestReg);
+}
+
+/*****************************************************************************
+ *
+ *  Generate code for GT_DIV.
+ */
+
+void CodeGen::genCodeForSignedDiv(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+    assert(tree->OperGet() == GT_DIV);
+
+    GenTreePtr      op1      = tree->gtOp.gtOp1;
+    GenTreePtr      op2      = tree->gtOp.gtOp2;
+    const var_types treeType = tree->TypeGet();
+    regMaskTP       needReg  = destReg;
+    regNumber       reg;
+
+    /* Is this a division by an integer constant? */
+
+    noway_assert(op2);
+    if (compiler->fgIsSignedDivOptimizable(op2))
+    {
+        ssize_t ival_s = op2->gtIntConCommon.IconValue();
+        assert(ival_s > 0); // Postcondition of compiler->fgIsSignedDivOptimizable...
+        size_t ival = static_cast<size_t>(ival_s);
+
+        /* Division by 1 must be handled elsewhere */
+
+        noway_assert(ival != 1);
+
+        BasicBlock* onNegDivisee = genCreateTempLabel();
+
+        /* Generate the operand into some register */
+
+        genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
+        noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+        reg = op1->gtRegNum;
+
+        if (ival == 2)
+        {
+            /* Generate "sar reg, log2(value)" */
+
+            inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, emitTypeSize(treeType), reg, genLog2(ival), INS_FLAGS_SET);
+
+            // Check and branch for a postive value, skipping the INS_ADDC instruction
+            emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
+            inst_JMP(jmpGEL, onNegDivisee);
+
+            // Add the carry flag to 'reg'
+            inst_RV_IV(INS_ADDC, reg, 0, emitActualTypeSize(treeType));
+
+            /* Define the 'onNegDivisee' label and we're done */
+
+            genDefineTempLabel(onNegDivisee);
+
+            /* The register is now trashed */
+
+            regTracker.rsTrackRegTrash(reg);
+
+            /* The result is the same as the operand */
+
+            reg = op1->gtRegNum;
+        }
+        else
+        {
+            /* Generate the following sequence */
+            /*
+            test    reg, reg
+            jns     onNegDivisee
+            add     reg, ival-1
+            onNegDivisee:
+            sar     reg, log2(ival)
+            */
+
+            instGen_Compare_Reg_To_Zero(emitTypeSize(treeType), reg);
+
+            // Check and branch for a postive value, skipping the INS_add instruction
+            emitJumpKind jmpGEL = genJumpKindForOper(GT_GE, CK_LOGICAL);
+            inst_JMP(jmpGEL, onNegDivisee);
+
+            inst_RV_IV(INS_add, reg, (int)ival - 1, emitActualTypeSize(treeType));
+
+            /* Define the 'onNegDivisee' label and we're done */
+
+            genDefineTempLabel(onNegDivisee);
+
+            /* Generate "sar reg, log2(value)" */
+
+            inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, emitTypeSize(treeType), reg, genLog2(ival));
+
+            /* The register is now trashed */
+
+            regTracker.rsTrackRegTrash(reg);
+
+            /* The result is the same as the operand */
+
+            reg = op1->gtRegNum;
+        }
+
+        genCodeForTree_DONE(tree, reg);
+        return;
+    }
+
+    genCodeForGeneralDivide(tree, destReg, bestReg);
+}
+
+/*****************************************************************************
+ *
+ *  Generate code for a general divide. Handles the general case for GT_UMOD, GT_MOD, GT_UDIV, GT_DIV
+ *  (if op2 is not a power of 2 constant).
+ */
+
+void CodeGen::genCodeForGeneralDivide(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+    assert(tree->OperGet() == GT_UMOD || tree->OperGet() == GT_MOD || tree->OperGet() == GT_UDIV ||
+           tree->OperGet() == GT_DIV);
+
+    GenTreePtr      op1      = tree->gtOp.gtOp1;
+    GenTreePtr      op2      = tree->gtOp.gtOp2;
+    const var_types treeType = tree->TypeGet();
+    regMaskTP       needReg  = destReg;
+    regNumber       reg;
+    instruction     ins;
+    bool            gotOp1;
+    regMaskTP       addrReg;
+
+#if USE_HELPERS_FOR_INT_DIV
+    noway_assert(!"Unreachable: fgMorph should have transformed this into a JitHelper");
+#endif
+
+#if defined(_TARGET_XARCH_)
+
+    /* Which operand are we supposed to evaluate first? */
+
+    if (tree->gtFlags & GTF_REVERSE_OPS)
+    {
+        /* We'll evaluate 'op2' first */
+
+        gotOp1 = false;
+        destReg &= ~op1->gtRsvdRegs;
+
+        /* Also if op1 is an enregistered LCL_VAR then exclude its register as well */
+        if (op1->gtOper == GT_LCL_VAR)
+        {
+            unsigned varNum = op1->gtLclVarCommon.gtLclNum;
+            noway_assert(varNum < compiler->lvaCount);
+            LclVarDsc* varDsc = compiler->lvaTable + varNum;
+            if (varDsc->lvRegister)
+            {
+                destReg &= ~genRegMask(varDsc->lvRegNum);
+            }
+        }
+    }
+    else
+    {
+        /* We'll evaluate 'op1' first */
+
+        gotOp1 = true;
+
+        regMaskTP op1Mask;
+        if (RBM_EAX & op2->gtRsvdRegs)
+            op1Mask = RBM_ALLINT & ~op2->gtRsvdRegs;
+        else
+            op1Mask = RBM_EAX; // EAX would be ideal
+
+        /* Generate the dividend into EAX and hold on to it. freeOnly=true */
+
+        genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
+    }
+
+    /* We want to avoid using EAX or EDX for the second operand */
+
+    destReg = regSet.rsMustExclude(destReg, RBM_EAX | RBM_EDX);
+
+    /* Make the second operand addressable */
+    op2 = genCodeForCommaTree(op2);
+
+    /* Special case: if op2 is a local var we are done */
+
+    if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD)
+    {
+        if ((op2->gtFlags & GTF_REG_VAL) == 0)
+            addrReg = genMakeRvalueAddressable(op2, destReg, RegSet::KEEP_REG, false);
+        else
+            addrReg = 0;
+    }
+    else
+    {
+        genComputeReg(op2, destReg, RegSet::ANY_REG, RegSet::KEEP_REG);
+
+        noway_assert(op2->gtFlags & GTF_REG_VAL);
+        addrReg = genRegMask(op2->gtRegNum);
+    }
+
+    /* Make sure we have the dividend in EAX */
+
+    if (gotOp1)
+    {
+        /* We've previously computed op1 into EAX */
+
+        genRecoverReg(op1, RBM_EAX, RegSet::KEEP_REG);
+    }
+    else
+    {
+        /* Compute op1 into EAX and hold on to it */
+
+        genComputeReg(op1, RBM_EAX, RegSet::EXACT_REG, RegSet::KEEP_REG, true);
+    }
+
+    noway_assert(op1->gtFlags & GTF_REG_VAL);
+    noway_assert(op1->gtRegNum == REG_EAX);
+
+    /* We can now safely (we think) grab EDX */
+
+    regSet.rsGrabReg(RBM_EDX);
+    regSet.rsLockReg(RBM_EDX);
+
+    /* Convert the integer in EAX into a un/signed long in EDX:EAX */
+
+    const genTreeOps oper = tree->OperGet();
+
+    if (oper == GT_UMOD || oper == GT_UDIV)
+        instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX);
+    else
+        instGen(INS_cdq);
+
+    /* Make sure the divisor is still addressable */
+
+    addrReg = genKeepAddressable(op2, addrReg, RBM_EAX);
+
+    /* Perform the division */
+
+    if (oper == GT_UMOD || oper == GT_UDIV)
+        inst_TT(INS_UNSIGNED_DIVIDE, op2);
+    else
+        inst_TT(INS_SIGNED_DIVIDE, op2);
+
+    /* Free up anything tied up by the divisor's address */
+
+    genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
+
+    /* Unlock and free EDX */
+
+    regSet.rsUnlockReg(RBM_EDX);
+
+    /* Free up op1 (which is in EAX) as well */
+
+    genReleaseReg(op1);
+
+    /* Both EAX and EDX are now trashed */
+
+    regTracker.rsTrackRegTrash(REG_EAX);
+    regTracker.rsTrackRegTrash(REG_EDX);
+
+    /* Figure out which register the result is in */
+
+    reg = (oper == GT_DIV || oper == GT_UDIV) ? REG_EAX : REG_EDX;
+
+    /* Don't forget to mark the first operand as using EAX and EDX */
+
+    op1->gtRegNum = reg;
+
+    genCodeForTree_DONE(tree, reg);
+
+#elif defined(_TARGET_ARM_)
+
+    /* Which operand are we supposed to evaluate first? */
+
+    if (tree->gtFlags & GTF_REVERSE_OPS)
+    {
+        /* We'll evaluate 'op2' first */
+
+        gotOp1 = false;
+        destReg &= ~op1->gtRsvdRegs;
+
+        /* Also if op1 is an enregistered LCL_VAR then exclude its register as well */
+        if (op1->gtOper == GT_LCL_VAR)
+        {
+            unsigned varNum = op1->gtLclVarCommon.gtLclNum;
+            noway_assert(varNum < compiler->lvaCount);
+            LclVarDsc* varDsc = compiler->lvaTable + varNum;
+            if (varDsc->lvRegister)
+            {
+                destReg &= ~genRegMask(varDsc->lvRegNum);
+            }
+        }
+    }
+    else
+    {
+        /* We'll evaluate 'op1' first */
+
+        gotOp1            = true;
+        regMaskTP op1Mask = RBM_ALLINT & ~op2->gtRsvdRegs;
+
+        /* Generate the dividend into a register and hold on to it. */
+
+        genComputeReg(op1, op1Mask, RegSet::ANY_REG, RegSet::KEEP_REG, true);
+    }
+
+    /* Evaluate the second operand into a register and hold onto it. */
+
+    genComputeReg(op2, destReg, RegSet::ANY_REG, RegSet::KEEP_REG);
+
+    noway_assert(op2->gtFlags & GTF_REG_VAL);
+    addrReg = genRegMask(op2->gtRegNum);
+
+    if (gotOp1)
+    {
+        // Recover op1 if spilled
+        genRecoverReg(op1, RBM_NONE, RegSet::KEEP_REG);
+    }
+    else
+    {
+        /* Compute op1 into any register and hold on to it */
+        genComputeReg(op1, RBM_ALLINT, RegSet::ANY_REG, RegSet::KEEP_REG, true);
+    }
+    noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+    reg = regSet.rsPickReg(needReg, bestReg);
+
+    // Perform the divison
+
+    const genTreeOps oper = tree->OperGet();
+
+    if (oper == GT_UMOD || oper == GT_UDIV)
+        ins = INS_udiv;
+    else
+        ins = INS_sdiv;
+
+    getEmitter()->emitIns_R_R_R(ins, EA_4BYTE, reg, op1->gtRegNum, op2->gtRegNum);
+
+    if (oper == GT_UMOD || oper == GT_MOD)
+    {
+        getEmitter()->emitIns_R_R_R(INS_mul, EA_4BYTE, reg, op2->gtRegNum, reg);
+        getEmitter()->emitIns_R_R_R(INS_sub, EA_4BYTE, reg, op1->gtRegNum, reg);
+    }
+    /* Free up op1 and op2 */
+    genReleaseReg(op1);
+    genReleaseReg(op2);
+
+    genCodeForTree_DONE(tree, reg);
+
+#else
+#error "Unknown _TARGET_"
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Generate code for an assignment shift (x <op>= ). Handles GT_ASG_LSH, GT_ASG_RSH, GT_ASG_RSZ.
+ */
+
+void CodeGen::genCodeForAsgShift(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+    assert(tree->OperGet() == GT_ASG_LSH || tree->OperGet() == GT_ASG_RSH || tree->OperGet() == GT_ASG_RSZ);
+
+    const genTreeOps oper     = tree->OperGet();
+    GenTreePtr       op1      = tree->gtOp.gtOp1;
+    GenTreePtr       op2      = tree->gtOp.gtOp2;
+    const var_types  treeType = tree->TypeGet();
+    insFlags         flags    = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+    regMaskTP        needReg  = destReg;
+    regNumber        reg;
+    instruction      ins;
+    regMaskTP        addrReg;
+
+    switch (oper)
+    {
+        case GT_ASG_LSH:
+            ins = INS_SHIFT_LEFT_LOGICAL;
+            break;
+        case GT_ASG_RSH:
+            ins = INS_SHIFT_RIGHT_ARITHM;
+            break;
+        case GT_ASG_RSZ:
+            ins = INS_SHIFT_RIGHT_LOGICAL;
+            break;
+        default:
+            unreached();
+    }
+
+    noway_assert(!varTypeIsGC(treeType));
+    noway_assert(op2);
+
+    /* Shifts by a constant amount are easier */
+
+    if (op2->IsCnsIntOrI())
+    {
+        /* Make the target addressable */
+
+        addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
+
+        /* Are we shifting a register left by 1 bit? */
+
+        if ((oper == GT_ASG_LSH) && (op2->gtIntCon.gtIconVal == 1) && (op1->gtFlags & GTF_REG_VAL))
+        {
+            /* The target lives in a register */
+
+            reg = op1->gtRegNum;
+
+            /* "add reg, reg" is cheaper than "shl reg, 1" */
+
+            inst_RV_RV(INS_add, reg, reg, treeType, emitActualTypeSize(treeType), flags);
+        }
+        else
+        {
+#if CPU_LOAD_STORE_ARCH
+            if ((op1->gtFlags & GTF_REG_VAL) == 0)
+            {
+                regSet.rsLockUsedReg(addrReg);
+
+                // Load op1 into a reg
+
+                reg = regSet.rsPickReg(RBM_ALLINT);
+
+                inst_RV_TT(INS_mov, reg, op1);
+
+                // Issue the shift
+
+                inst_RV_IV(ins, reg, (int)op2->gtIntCon.gtIconVal, emitActualTypeSize(treeType), flags);
+                regTracker.rsTrackRegTrash(reg);
+
+                /* Store the (sign/zero extended) result back to the stack location of the variable */
+
+                inst_TT_RV(ins_Store(op1->TypeGet()), op1, reg);
+
+                regSet.rsUnlockUsedReg(addrReg);
+            }
+            else
+#endif // CPU_LOAD_STORE_ARCH
+            {
+                /* Shift by the constant value */
+
+                inst_TT_SH(ins, op1, (int)op2->gtIntCon.gtIconVal);
+            }
+        }
+
+        /* If the target is a register, it has a new value */
+
+        if (op1->gtFlags & GTF_REG_VAL)
+            regTracker.rsTrackRegTrash(op1->gtRegNum);
+
+        genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+        /* The zero flag is now equal to the target value */
+        /* X86: But only if the shift count is != 0 */
+
+        if (op2->gtIntCon.gtIconVal != 0)
+        {
+            if (tree->gtSetFlags())
+            {
+                if (op1->gtOper == GT_LCL_VAR)
+                {
+                    genFlagsEqualToVar(tree, op1->gtLclVarCommon.gtLclNum);
+                }
+                else if (op1->gtOper == GT_REG_VAR)
+                {
+                    genFlagsEqualToReg(tree, op1->gtRegNum);
+                }
+            }
+        }
+        else
+        {
+            // It is possible for the shift count to equal 0 with valid
+            // IL, and not be optimized away, in the case where the node
+            // is of a small type.  The sequence of instructions looks like
+            // ldsfld, shr, stsfld and executed on a char field.  This will
+            // never happen with code produced by our compilers, because the
+            // compilers will insert a conv.u2 before the stsfld (which will
+            // lead us down a different codepath in the JIT and optimize away
+            // the shift by zero).  This case is not worth optimizing and we
+            // will just make sure to generate correct code for it.
+
+            genFlagsEqualToNone();
+        }
+    }
+    else
+    {
+        regMaskTP op2Regs = RBM_NONE;
+        if (REG_SHIFT != REG_NA)
+            op2Regs = RBM_SHIFT;
+
+        regMaskTP tempRegs;
+
+        if (tree->gtFlags & GTF_REVERSE_OPS)
+        {
+            tempRegs = regSet.rsMustExclude(op2Regs, op1->gtRsvdRegs);
+            genCodeForTree(op2, tempRegs);
+            regSet.rsMarkRegUsed(op2);
+
+            tempRegs = regSet.rsMustExclude(RBM_ALLINT, genRegMask(op2->gtRegNum));
+            addrReg  = genMakeAddressable(op1, tempRegs, RegSet::KEEP_REG, true);
+
+            genRecoverReg(op2, op2Regs, RegSet::KEEP_REG);
+        }
+        else
+        {
+            /* Make the target addressable avoiding op2->RsvdRegs [and RBM_SHIFT] */
+            regMaskTP excludeMask = op2->gtRsvdRegs;
+            if (REG_SHIFT != REG_NA)
+                excludeMask |= RBM_SHIFT;
+
+            tempRegs = regSet.rsMustExclude(RBM_ALLINT, excludeMask);
+            addrReg  = genMakeAddressable(op1, tempRegs, RegSet::KEEP_REG, true);
+
+            /* Load the shift count into the necessary register */
+            genComputeReg(op2, op2Regs, RegSet::EXACT_REG, RegSet::KEEP_REG);
+        }
+
+        /* Make sure the address registers are still here */
+        addrReg = genKeepAddressable(op1, addrReg, op2Regs);
+
+#ifdef _TARGET_XARCH_
+        /* Perform the shift */
+        inst_TT_CL(ins, op1);
+#else
+        /* Perform the shift */
+        noway_assert(op2->gtFlags & GTF_REG_VAL);
+        op2Regs = genRegMask(op2->gtRegNum);
+
+        regSet.rsLockUsedReg(addrReg | op2Regs);
+        inst_TT_RV(ins, op1, op2->gtRegNum, 0, emitTypeSize(treeType), flags);
+        regSet.rsUnlockUsedReg(addrReg | op2Regs);
+#endif
+        /* Free the address registers */
+        genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+        /* If the value is in a register, it's now trash */
+
+        if (op1->gtFlags & GTF_REG_VAL)
+            regTracker.rsTrackRegTrash(op1->gtRegNum);
+
+        /* Release the op2 [RBM_SHIFT] operand */
+
+        genReleaseReg(op2);
+    }
+
+    genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, /* unused for ovfl=false */ REG_NA, /* ovfl */ false);
+}
+
+/*****************************************************************************
+ *
+ *  Generate code for a shift. Handles GT_LSH, GT_RSH, GT_RSZ.
+ */
+
+void CodeGen::genCodeForShift(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+    assert(tree->OperIsShift());
+
+    const genTreeOps oper     = tree->OperGet();
+    GenTreePtr       op1      = tree->gtOp.gtOp1;
+    GenTreePtr       op2      = tree->gtOp.gtOp2;
+    const var_types  treeType = tree->TypeGet();
+    insFlags         flags    = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+    regMaskTP        needReg  = destReg;
+    regNumber        reg;
+    instruction      ins;
+
+    switch (oper)
+    {
+        case GT_LSH:
+            ins = INS_SHIFT_LEFT_LOGICAL;
+            break;
+        case GT_RSH:
+            ins = INS_SHIFT_RIGHT_ARITHM;
+            break;
+        case GT_RSZ:
+            ins = INS_SHIFT_RIGHT_LOGICAL;
+            break;
+        default:
+            unreached();
+    }
+
+    /* Is the shift count constant? */
+    noway_assert(op2);
+    if (op2->IsIntCnsFitsInI32())
+    {
+        // TODO: Check to see if we could generate a LEA instead!
+
+        /* Compute the left operand into any free register */
+
+        genCompIntoFreeReg(op1, needReg, RegSet::KEEP_REG);
+
+        noway_assert(op1->gtFlags & GTF_REG_VAL);
+        reg = op1->gtRegNum;
+
+        /* Are we shifting left by 1 bit? (or 2 bits for fast code) */
+
+        // On ARM, until proven otherwise by performance numbers, just do the shift.
+        // It's no bigger than add (16 bits for low registers, 32 bits for high registers).
+        // It's smaller than two "add reg, reg".
+
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef _TARGET_ARM_
+        if (oper == GT_LSH)
+        {
+            emitAttr size = emitActualTypeSize(treeType);
+            if (op2->gtIntConCommon.IconValue() == 1)
+            {
+                /* "add reg, reg" is smaller and faster than "shl reg, 1" */
+                inst_RV_RV(INS_add, reg, reg, treeType, size, flags);
+            }
+            else if ((op2->gtIntConCommon.IconValue() == 2) && (compiler->compCodeOpt() == Compiler::FAST_CODE))
+            {
+                /* two "add reg, reg" instructions are faster than "shl reg, 2" */
+                inst_RV_RV(INS_add, reg, reg, treeType);
+                inst_RV_RV(INS_add, reg, reg, treeType, size, flags);
+            }
+            else
+                goto DO_SHIFT_BY_CNS;
+        }
+        else
+#endif // _TARGET_ARM_
+        {
+#ifndef _TARGET_ARM_
+        DO_SHIFT_BY_CNS:
+#endif // _TARGET_ARM_
+            // If we are shifting 'reg' by zero bits and do not need the flags to be set
+            // then we can just skip emitting the instruction as 'reg' is already correct.
+            //
+            if ((op2->gtIntConCommon.IconValue() != 0) || tree->gtSetFlags())
+            {
+                /* Generate the appropriate shift instruction */
+                inst_RV_SH(ins, emitTypeSize(treeType), reg, (int)op2->gtIntConCommon.IconValue(), flags);
+            }
+        }
+    }
+    else
+    {
+        /* Calculate a useful register mask for computing op1 */
+        needReg = regSet.rsNarrowHint(regSet.rsRegMaskFree(), needReg);
+        regMaskTP op2RegMask;
+#ifdef _TARGET_XARCH_
+        op2RegMask = RBM_ECX;
+#else
+        op2RegMask = RBM_NONE;
+#endif
+        needReg = regSet.rsMustExclude(needReg, op2RegMask);
+
+        regMaskTP tempRegs;
+
+        /* Which operand are we supposed to evaluate first? */
+        if (tree->gtFlags & GTF_REVERSE_OPS)
+        {
+            /* Load the shift count [into ECX on XARCH] */
+            tempRegs = regSet.rsMustExclude(op2RegMask, op1->gtRsvdRegs);
+            genComputeReg(op2, tempRegs, RegSet::EXACT_REG, RegSet::KEEP_REG, false);
+
+            /* We must not target the register that is holding op2 */
+            needReg = regSet.rsMustExclude(needReg, genRegMask(op2->gtRegNum));
+
+            /* Now evaluate 'op1' into a free register */
+            genComputeReg(op1, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, true);
+
+            /* Recover op2 into ECX */
+            genRecoverReg(op2, op2RegMask, RegSet::KEEP_REG);
+        }
+        else
+        {
+            /* Compute op1 into a register, trying to avoid op2->rsvdRegs and ECX */
+            tempRegs = regSet.rsMustExclude(needReg, op2->gtRsvdRegs);
+            genComputeReg(op1, tempRegs, RegSet::ANY_REG, RegSet::KEEP_REG, true);
+
+            /* Load the shift count [into ECX on XARCH] */
+            genComputeReg(op2, op2RegMask, RegSet::EXACT_REG, RegSet::KEEP_REG, false);
+        }
+
+        noway_assert(op2->gtFlags & GTF_REG_VAL);
+#ifdef _TARGET_XARCH_
+        noway_assert(genRegMask(op2->gtRegNum) == op2RegMask);
+#endif
+        // Check for the case of op1 being spilled during the evaluation of op2
+        if (op1->gtFlags & GTF_SPILLED)
+        {
+            // The register has been spilled -- reload it to any register except ECX
+            regSet.rsLockUsedReg(op2RegMask);
+            regSet.rsUnspillReg(op1, 0, RegSet::KEEP_REG);
+            regSet.rsUnlockUsedReg(op2RegMask);
+        }
+
+        noway_assert(op1->gtFlags & GTF_REG_VAL);
+        reg = op1->gtRegNum;
+
+#ifdef _TARGET_ARM_
+        /* Perform the shift */
+        getEmitter()->emitIns_R_R(ins, EA_4BYTE, reg, op2->gtRegNum, flags);
+#else
+        /* Perform the shift */
+        inst_RV_CL(ins, reg);
+#endif
+        genReleaseReg(op2);
+    }
+
+    noway_assert(op1->gtFlags & GTF_REG_VAL);
+    noway_assert(reg == op1->gtRegNum);
+
+    /* The register is now trashed */
+    genReleaseReg(op1);
+    regTracker.rsTrackRegTrash(reg);
+
+    genCodeForTree_DONE(tree, reg);
+}
+
+/*****************************************************************************
+ *
+ *  Generate code for a top-level relational operator (not one that is part of a GT_JTRUE tree).
+ *  Handles GT_EQ, GT_NE, GT_LT, GT_LE, GT_GE, GT_GT.
+ */
+
+void CodeGen::genCodeForRelop(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+    assert(tree->OperGet() == GT_EQ || tree->OperGet() == GT_NE || tree->OperGet() == GT_LT ||
+           tree->OperGet() == GT_LE || tree->OperGet() == GT_GE || tree->OperGet() == GT_GT);
+
+    const genTreeOps oper     = tree->OperGet();
+    GenTreePtr       op1      = tree->gtOp.gtOp1;
+    const var_types  treeType = tree->TypeGet();
+    regMaskTP        needReg  = destReg;
+    regNumber        reg;
+
+    // Longs and float comparisons are converted to "?:"
+    noway_assert(!compiler->fgMorphRelopToQmark(op1));
+
+    // Check if we can use the currently set flags. Else set them
+
+    emitJumpKind jumpKind = genCondSetFlags(tree);
+
+    // Grab a register to materialize the bool value into
+
+    bestReg = regSet.rsRegMaskCanGrab() & RBM_BYTE_REGS;
+
+    // Check that the predictor did the right job
+    noway_assert(bestReg);
+
+    // If needReg is in bestReg then use it
+    if (needReg & bestReg)
+        reg = regSet.rsGrabReg(needReg & bestReg);
+    else
+        reg = regSet.rsGrabReg(bestReg);
+
+#if defined(_TARGET_ARM_)
+
+    // Generate:
+    //      jump-if-true L_true
+    //      mov reg, 0
+    //      jmp L_end
+    //    L_true:
+    //      mov reg, 1
+    //    L_end:
+
+    BasicBlock* L_true;
+    BasicBlock* L_end;
+
+    L_true = genCreateTempLabel();
+    L_end  = genCreateTempLabel();
+
+    inst_JMP(jumpKind, L_true);
+    getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, reg, 0); // Executes when the cond is false
+    inst_JMP(EJ_jmp, L_end);
+    genDefineTempLabel(L_true);
+    getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, reg, 1); // Executes when the cond is true
+    genDefineTempLabel(L_end);
+
+    regTracker.rsTrackRegTrash(reg);
+
+#elif defined(_TARGET_XARCH_)
+    regMaskTP regs = genRegMask(reg);
+    noway_assert(regs & RBM_BYTE_REGS);
+
+    // Set (lower byte of) reg according to the flags
+
+    /* Look for the special case where just want to transfer the carry bit */
+
+    if (jumpKind == EJ_jb)
+    {
+        inst_RV_RV(INS_SUBC, reg, reg);
+        inst_RV(INS_NEG, reg, TYP_INT);
+        regTracker.rsTrackRegTrash(reg);
+    }
+    else if (jumpKind == EJ_jae)
+    {
+        inst_RV_RV(INS_SUBC, reg, reg);
+        genIncRegBy(reg, 1, tree, TYP_INT);
+        regTracker.rsTrackRegTrash(reg);
+    }
+    else
+    {
+        inst_SET(jumpKind, reg);
+
+        regTracker.rsTrackRegTrash(reg);
+
+        if (treeType == TYP_INT)
+        {
+            // Set the higher bytes to 0
+            inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), reg, reg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
+        }
+        else
+        {
+            noway_assert(treeType == TYP_BYTE);
+        }
+    }
+#else
+    NYI("TARGET");
+#endif // _TARGET_XXX
+
+    genCodeForTree_DONE(tree, reg);
+}
+
+//------------------------------------------------------------------------
+// genCodeForCopyObj: Generate code for a CopyObj node
+//
+// Arguments:
+//    tree    - The CopyObj node we are going to generate code for.
+//    destReg - The register mask for register(s), if any, that will be defined.
+//
+// Return Value:
+//    None
+
+void CodeGen::genCodeForCopyObj(GenTreePtr tree, regMaskTP destReg)
+{
+    // If the value class doesn't have any fields that are GC refs or
+    // the target isn't on the GC-heap, we can merge it with CPBLK.
+    // GC fields cannot be copied directly, instead we will
+    // need to use a jit-helper for that.
+    assert(tree->gtOper == GT_ASG);
+    assert(tree->gtOp.gtOp1->gtOper == GT_OBJ);
+
+    GenTreeObj* cpObjOp = tree->gtOp.gtOp1->AsObj();
+    assert(cpObjOp->HasGCPtr());
+
+#ifdef _TARGET_ARM_
+    if (cpObjOp->IsVolatile())
+    {
+        // Emit a memory barrier instruction before the CopyBlk
+        instGen_MemoryBarrier();
+    }
+#endif
+    assert(tree->gtOp.gtOp2->OperIsIndir());
+    GenTreePtr srcObj = tree->gtOp.gtOp2->AsIndir()->Addr();
+    GenTreePtr dstObj = cpObjOp->Addr();
+
+    noway_assert(dstObj->gtType == TYP_BYREF || dstObj->gtType == TYP_I_IMPL);
+
+#ifdef DEBUG
+    CORINFO_CLASS_HANDLE clsHnd       = (CORINFO_CLASS_HANDLE)cpObjOp->gtClass;
+    size_t               debugBlkSize = roundUp(compiler->info.compCompHnd->getClassSize(clsHnd), TARGET_POINTER_SIZE);
+
+    // Since we round up, we are not handling the case where we have a non-pointer sized struct with GC pointers.
+    // The EE currently does not allow this.  Let's assert it just to be safe.
+    noway_assert(compiler->info.compCompHnd->getClassSize(clsHnd) == debugBlkSize);
+#endif
+
+    size_t   blkSize    = cpObjOp->gtSlots * TARGET_POINTER_SIZE;
+    unsigned slots      = cpObjOp->gtSlots;
+    BYTE*    gcPtrs     = cpObjOp->gtGcPtrs;
+    unsigned gcPtrCount = cpObjOp->gtGcPtrCount;
+    assert(blkSize == cpObjOp->gtBlkSize);
+
+    GenTreePtr treeFirst, treeSecond;
+    regNumber  regFirst, regSecond;
+
+    // Check what order the object-ptrs have to be evaluated in ?
+
+    if (tree->gtFlags & GTF_REVERSE_OPS)
+    {
+        treeFirst  = srcObj;
+        treeSecond = dstObj;
+#if CPU_USES_BLOCK_MOVE
+        regFirst  = REG_ESI;
+        regSecond = REG_EDI;
+#else
+        regFirst  = REG_ARG_1;
+        regSecond = REG_ARG_0;
+#endif
+    }
+    else
+    {
+        treeFirst  = dstObj;
+        treeSecond = srcObj;
+#if CPU_USES_BLOCK_MOVE
+        regFirst  = REG_EDI;
+        regSecond = REG_ESI;
+#else
+        regFirst  = REG_ARG_0;
+        regSecond = REG_ARG_1;
+#endif
+    }
+
+    bool     dstIsOnStack = (dstObj->gtOper == GT_ADDR && (dstObj->gtFlags & GTF_ADDR_ONSTACK));
+    bool     srcIsOnStack = (srcObj->gtOper == GT_ADDR && (srcObj->gtFlags & GTF_ADDR_ONSTACK));
+    emitAttr srcType      = (varTypeIsGC(srcObj) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE;
+    emitAttr dstType      = (varTypeIsGC(dstObj) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE;
+
+#if CPU_USES_BLOCK_MOVE
+    // Materialize the trees in the order desired
+
+    genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
+    genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
+    genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG);
+
+    // Grab ECX because it will be trashed by the helper
+    //
+    regSet.rsGrabReg(RBM_ECX);
+
+    while (blkSize >= TARGET_POINTER_SIZE)
+    {
+        if (*gcPtrs++ == TYPE_GC_NONE || dstIsOnStack)
+        {
+            // Note that we can use movsd even if it is a GC pointer being transfered
+            // because the value is not cached anywhere.  If we did this in two moves,
+            // we would have to make certain we passed the appropriate GC info on to
+            // the emitter.
+            instGen(INS_movsp);
+        }
+        else
+        {
+            // This helper will act like a MOVSD
+            //    -- inputs EDI and ESI are byrefs
+            //    -- including incrementing of ESI and EDI by 4
+            //    -- helper will trash ECX
+            //
+            regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond);
+            regSet.rsLockUsedReg(argRegs);
+            genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF,
+                              0,           // argSize
+                              EA_PTRSIZE); // retSize
+            regSet.rsUnlockUsedReg(argRegs);
+        }
+
+        blkSize -= TARGET_POINTER_SIZE;
+    }
+
+    // "movsd/movsq" as well as CPX_BYREF_ASG modify all three registers
+
+    regTracker.rsTrackRegTrash(REG_EDI);
+    regTracker.rsTrackRegTrash(REG_ESI);
+    regTracker.rsTrackRegTrash(REG_ECX);
+
+    gcInfo.gcMarkRegSetNpt(RBM_ESI | RBM_EDI);
+
+    /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as
+        it is a emitNoGChelper. However, we have to let the emitter know that
+        the GC liveness has changed. We do this by creating a new label.
+        */
+
+    noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF));
+
+    genDefineTempLabel(&dummyBB);
+
+#else //  !CPU_USES_BLOCK_MOVE
+
+#ifndef _TARGET_ARM_
+// Currently only the ARM implementation is provided
+#error "COPYBLK for non-ARM && non-CPU_USES_BLOCK_MOVE"
+#endif
+
+    // Materialize the trees in the order desired
+    bool      helperUsed;
+    regNumber regDst;
+    regNumber regSrc;
+    regNumber regTemp;
+
+    if ((gcPtrCount > 0) && !dstIsOnStack)
+    {
+        genComputeReg(treeFirst, genRegMask(regFirst), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
+        genComputeReg(treeSecond, genRegMask(regSecond), RegSet::EXACT_REG, RegSet::KEEP_REG, true);
+        genRecoverReg(treeFirst, genRegMask(regFirst), RegSet::KEEP_REG);
+
+        /* The helper is a Asm-routine that will trash R2,R3 and LR */
+        {
+            /* Spill any callee-saved registers which are being used */
+            regMaskTP spillRegs = RBM_CALLEE_TRASH_NOGC & regSet.rsMaskUsed;
+
+            if (spillRegs)
+            {
+                regSet.rsSpillRegs(spillRegs);
+            }
+        }
+
+        // Grab R2 (aka REG_TMP_1) because it will be trashed by the helper
+        // We will also use it as the temp register for our load/store sequences
+        //
+        assert(REG_R2 == REG_TMP_1);
+        regTemp    = regSet.rsGrabReg(RBM_R2);
+        helperUsed = true;
+    }
+    else
+    {
+        genCompIntoFreeReg(treeFirst, (RBM_ALLINT & ~treeSecond->gtRsvdRegs), RegSet::KEEP_REG);
+        genCompIntoFreeReg(treeSecond, RBM_ALLINT, RegSet::KEEP_REG);
+        genRecoverReg(treeFirst, RBM_ALLINT, RegSet::KEEP_REG);
+
+        // Grab any temp register to use for our load/store sequences
+        //
+        regTemp    = regSet.rsGrabReg(RBM_ALLINT);
+        helperUsed = false;
+    }
+    assert(dstObj->gtFlags & GTF_REG_VAL);
+    assert(srcObj->gtFlags & GTF_REG_VAL);
+
+    regDst = dstObj->gtRegNum;
+    regSrc = srcObj->gtRegNum;
+
+    assert(regDst != regTemp);
+    assert(regSrc != regTemp);
+
+    instruction loadIns  = ins_Load(TYP_I_IMPL);  // INS_ldr
+    instruction storeIns = ins_Store(TYP_I_IMPL); // INS_str
+
+    size_t offset = 0;
+    while (blkSize >= TARGET_POINTER_SIZE)
+    {
+        CorInfoGCType gcType;
+        CorInfoGCType gcTypeNext = TYPE_GC_NONE;
+        var_types     type       = TYP_I_IMPL;
+
+#if FEATURE_WRITE_BARRIER
+        gcType                   = (CorInfoGCType)(*gcPtrs++);
+        if (blkSize > TARGET_POINTER_SIZE)
+            gcTypeNext = (CorInfoGCType)(*gcPtrs);
+
+        if (gcType == TYPE_GC_REF)
+            type = TYP_REF;
+        else if (gcType == TYPE_GC_BYREF)
+            type = TYP_BYREF;
+
+        if (helperUsed)
+        {
+            assert(regDst == REG_ARG_0);
+            assert(regSrc == REG_ARG_1);
+            assert(regTemp == REG_R2);
+        }
+#else
+        gcType = TYPE_GC_NONE;
+#endif // FEATURE_WRITE_BARRIER
+
+        blkSize -= TARGET_POINTER_SIZE;
+
+        emitAttr opSize = emitTypeSize(type);
+
+        if (!helperUsed || (gcType == TYPE_GC_NONE))
+        {
+            getEmitter()->emitIns_R_R_I(loadIns, opSize, regTemp, regSrc, offset);
+            getEmitter()->emitIns_R_R_I(storeIns, opSize, regTemp, regDst, offset);
+            offset += TARGET_POINTER_SIZE;
+
+            if ((helperUsed && (gcTypeNext != TYPE_GC_NONE)) || ((offset >= 128) && (blkSize > 0)))
+            {
+                getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, offset);
+                getEmitter()->emitIns_R_I(INS_add, dstType, regDst, offset);
+                offset = 0;
+            }
+        }
+        else
+        {
+            assert(offset == 0);
+
+            // The helper will act like this:
+            //    -- inputs R0 and R1 are byrefs
+            //    -- helper will perform copy from *R1 into *R0
+            //    -- helper will perform post increment of R0 and R1 by 4
+            //    -- helper will trash R2
+            //    -- helper will trash R3
+            //    -- calling the helper implicitly trashes LR
+            //
+            assert(helperUsed);
+            regMaskTP argRegs = genRegMask(regFirst) | genRegMask(regSecond);
+            regSet.rsLockUsedReg(argRegs);
+            genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF,
+                              0,           // argSize
+                              EA_PTRSIZE); // retSize
+
+            regSet.rsUnlockUsedReg(argRegs);
+            regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH_NOGC);
+        }
+    }
+
+    regTracker.rsTrackRegTrash(regDst);
+    regTracker.rsTrackRegTrash(regSrc);
+    regTracker.rsTrackRegTrash(regTemp);
+
+    gcInfo.gcMarkRegSetNpt(genRegMask(regDst) | genRegMask(regSrc));
+
+    /* The emitter won't record CORINFO_HELP_ASSIGN_BYREF in the GC tables as
+        it is a emitNoGChelper. However, we have to let the emitter know that
+        the GC liveness has changed. We do this by creating a new label.
+        */
+
+    noway_assert(emitter::emitNoGChelper(CORINFO_HELP_ASSIGN_BYREF));
+
+    genDefineTempLabel(&dummyBB);
+
+#endif //  !CPU_USES_BLOCK_MOVE
+
+    assert(blkSize == 0);
+
+    genReleaseReg(dstObj);
+    genReleaseReg(srcObj);
+
+    genCodeForTree_DONE(tree, REG_NA);
+
+#ifdef _TARGET_ARM_
+    if (cpObjOp->IsVolatile())
+    {
+        // Emit a memory barrier instruction after the CopyBlk
+        instGen_MemoryBarrier();
+    }
+#endif
+}
+
+//------------------------------------------------------------------------
+// genCodeForBlkOp: Generate code for a block copy or init operation
+//
+// Arguments:
+//    tree    - The block assignment
+//    destReg - The expected destination register
+//
+void CodeGen::genCodeForBlkOp(GenTreePtr tree, regMaskTP destReg)
+{
+    genTreeOps oper    = tree->OperGet();
+    GenTreePtr dest    = tree->gtOp.gtOp1;
+    GenTreePtr src     = tree->gtGetOp2();
+    regMaskTP  needReg = destReg;
+    regMaskTP  regs    = regSet.rsMaskUsed;
+    GenTreePtr opsPtr[3];
+    regMaskTP  regsPtr[3];
+    GenTreePtr destPtr;
+    GenTreePtr srcPtrOrVal;
+
+    noway_assert(tree->OperIsBlkOp());
+
+    bool       isCopyBlk    = false;
+    bool       isInitBlk    = false;
+    bool       hasGCpointer = false;
+    unsigned   blockSize    = dest->AsBlk()->gtBlkSize;
+    GenTreePtr sizeNode     = nullptr;
+    bool       sizeIsConst  = true;
+    if (dest->gtOper == GT_DYN_BLK)
+    {
+        sizeNode    = dest->AsDynBlk()->gtDynamicSize;
+        sizeIsConst = false;
+    }
+
+    if (tree->OperIsCopyBlkOp())
+    {
+        isCopyBlk = true;
+        if (dest->gtOper == GT_OBJ)
+        {
+            if (dest->AsObj()->gtGcPtrCount != 0)
+            {
+                genCodeForCopyObj(tree, destReg);
+                return;
+            }
+        }
+    }
+    else
+    {
+        isInitBlk = true;
+    }
+
+    // Ensure that we have an address in the CopyBlk case.
+    if (isCopyBlk)
+    {
+        // TODO-1stClassStructs: Allow a lclVar here.
+        assert(src->OperIsIndir());
+        srcPtrOrVal = src->AsIndir()->Addr();
+    }
+    else
+    {
+        srcPtrOrVal = src;
+    }
+
+#ifdef _TARGET_ARM_
+    if (dest->AsBlk()->IsVolatile())
+    {
+        // Emit a memory barrier instruction before the InitBlk/CopyBlk
+        instGen_MemoryBarrier();
+    }
+#endif
+    {
+        destPtr = dest->AsBlk()->Addr();
+        noway_assert(destPtr->TypeGet() == TYP_BYREF || varTypeIsIntegral(destPtr->TypeGet()));
+        noway_assert(
+            (isCopyBlk && (srcPtrOrVal->TypeGet() == TYP_BYREF || varTypeIsIntegral(srcPtrOrVal->TypeGet()))) ||
+            (isInitBlk && varTypeIsIntegral(srcPtrOrVal->TypeGet())));
+
+        noway_assert(destPtr && srcPtrOrVal);
+
+#if CPU_USES_BLOCK_MOVE
+        regs = isInitBlk ? RBM_EAX : RBM_ESI; // What is the needReg for Val/Src
+
+        /* Some special code for block moves/inits for constant sizes */
+
+        //
+        // Is this a fixed size COPYBLK?
+        //      or a fixed size INITBLK with a constant init value?
+        //
+        if ((sizeIsConst) && (isCopyBlk || (srcPtrOrVal->IsCnsIntOrI())))
+        {
+            size_t      length  = blockSize;
+            size_t      initVal = 0;
+            instruction ins_P, ins_PR, ins_B;
+
+            if (isInitBlk)
+            {
+                ins_P  = INS_stosp;
+                ins_PR = INS_r_stosp;
+                ins_B  = INS_stosb;
+
+                /* Properly extend the init constant from a U1 to a U4 */
+                initVal = 0xFF & ((unsigned)srcPtrOrVal->gtIntCon.gtIconVal);
+
+                /* If it is a non-zero value we have to replicate      */
+                /* the byte value four times to form the DWORD         */
+                /* Then we change this new value into the tree-node      */
+
+                if (initVal)
+                {
+                    initVal = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24);
+#ifdef _TARGET_64BIT_
+                    if (length > 4)
+                    {
+                        initVal             = initVal | (initVal << 32);
+                        srcPtrOrVal->gtType = TYP_LONG;
+                    }
+                    else
+                    {
+                        srcPtrOrVal->gtType = TYP_INT;
+                    }
+#endif // _TARGET_64BIT_
+                }
+                srcPtrOrVal->gtIntCon.gtIconVal = initVal;
+            }
+            else
+            {
+                ins_P  = INS_movsp;
+                ins_PR = INS_r_movsp;
+                ins_B  = INS_movsb;
+            }
+
+            // Determine if we will be using SSE2
+            unsigned movqLenMin = 8;
+            unsigned movqLenMax = 24;
+
+            bool bWillUseSSE2      = false;
+            bool bWillUseOnlySSE2  = false;
+            bool bNeedEvaluateCnst = true; // If we only use SSE2, we will just load the constant there.
+
+#ifdef _TARGET_64BIT_
+
+// Until we get SSE2 instructions that move 16 bytes at a time instead of just 8
+// there is no point in wasting space on the bigger instructions
+
+#else // !_TARGET_64BIT_
+
+            if (compiler->opts.compCanUseSSE2)
+            {
+                unsigned curBBweight = compiler->compCurBB->getBBWeight(compiler);
+
+                /* Adjust for BB weight */
+                if (curBBweight == BB_ZERO_WEIGHT)
+                {
+                    // Don't bother with this optimization in
+                    // rarely run blocks
+                    movqLenMax = movqLenMin = 0;
+                }
+                else if (curBBweight < BB_UNITY_WEIGHT)
+                {
+                    // Be less aggressive when we are inside a conditional
+                    movqLenMax = 16;
+                }
+                else if (curBBweight >= (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT) / 2)
+                {
+                    // Be more aggressive when we are inside a loop
+                    movqLenMax = 48;
+                }
+
+                if ((compiler->compCodeOpt() == Compiler::FAST_CODE) || isInitBlk)
+                {
+                    // Be more aggressive when optimizing for speed
+                    // InitBlk uses fewer instructions
+                    movqLenMax += 16;
+                }
+
+                if (compiler->compCodeOpt() != Compiler::SMALL_CODE && length >= movqLenMin && length <= movqLenMax)
+                {
+                    bWillUseSSE2 = true;
+
+                    if ((length % 8) == 0)
+                    {
+                        bWillUseOnlySSE2 = true;
+                        if (isInitBlk && (initVal == 0))
+                        {
+                            bNeedEvaluateCnst = false;
+                            noway_assert((srcPtrOrVal->OperGet() == GT_CNS_INT));
+                        }
+                    }
+                }
+            }
+
+#endif // !_TARGET_64BIT_
+
+            const bool bWillTrashRegSrc = (isCopyBlk && !bWillUseOnlySSE2);
+            /* Evaluate dest and src/val */
+
+            if (tree->gtFlags & GTF_REVERSE_OPS)
+            {
+                if (bNeedEvaluateCnst)
+                {
+                    genComputeReg(srcPtrOrVal, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc);
+                }
+                genComputeReg(destPtr, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2);
+                if (bNeedEvaluateCnst)
+                {
+                    genRecoverReg(srcPtrOrVal, regs, RegSet::KEEP_REG);
+                }
+            }
+            else
+            {
+                genComputeReg(destPtr, RBM_EDI, RegSet::EXACT_REG, RegSet::KEEP_REG, !bWillUseOnlySSE2);
+                if (bNeedEvaluateCnst)
+                {
+                    genComputeReg(srcPtrOrVal, regs, RegSet::EXACT_REG, RegSet::KEEP_REG, bWillTrashRegSrc);
+                }
+                genRecoverReg(destPtr, RBM_EDI, RegSet::KEEP_REG);
+            }
+
+            bool bTrashedESI = false;
+            bool bTrashedEDI = false;
+
+            if (bWillUseSSE2)
+            {
+                int       blkDisp = 0;
+                regNumber xmmReg  = REG_XMM0;
+
+                if (isInitBlk)
+                {
+                    if (initVal)
+                    {
+                        getEmitter()->emitIns_R_R(INS_mov_i2xmm, EA_4BYTE, xmmReg, REG_EAX);
+                        getEmitter()->emitIns_R_R(INS_punpckldq, EA_4BYTE, xmmReg, xmmReg);
+                    }
+                    else
+                    {
+                        getEmitter()->emitIns_R_R(INS_xorps, EA_8BYTE, xmmReg, xmmReg);
+                    }
+                }
+
+                JITLOG_THIS(compiler, (LL_INFO100, "Using XMM instructions for %3d byte %s while compiling %s\n",
+                                       length, isInitBlk ? "initblk" : "copyblk", compiler->info.compFullName));
+
+                while (length > 7)
+                {
+                    if (isInitBlk)
+                    {
+                        getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp);
+                    }
+                    else
+                    {
+                        getEmitter()->emitIns_R_AR(INS_movq, EA_8BYTE, xmmReg, REG_ESI, blkDisp);
+                        getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_EDI, blkDisp);
+                    }
+                    blkDisp += 8;
+                    length -= 8;
+                }
+
+                if (length > 0)
+                {
+                    noway_assert(bNeedEvaluateCnst);
+                    noway_assert(!bWillUseOnlySSE2);
+
+                    if (isCopyBlk)
+                    {
+                        inst_RV_IV(INS_add, REG_ESI, blkDisp, emitActualTypeSize(srcPtrOrVal->TypeGet()));
+                        bTrashedESI = true;
+                    }
+
+                    inst_RV_IV(INS_add, REG_EDI, blkDisp, emitActualTypeSize(destPtr->TypeGet()));
+                    bTrashedEDI = true;
+
+                    if (length >= REGSIZE_BYTES)
+                    {
+                        instGen(ins_P);
+                        length -= REGSIZE_BYTES;
+                    }
+                }
+            }
+            else if (compiler->compCodeOpt() == Compiler::SMALL_CODE)
+            {
+                /* For small code, we can only use ins_DR to generate fast
+                    and small code. We also can't use "rep movsb" because
+                    we may not atomically reading and writing the DWORD */
+
+                noway_assert(bNeedEvaluateCnst);
+
+                goto USE_DR;
+            }
+            else if (length <= 4 * REGSIZE_BYTES)
+            {
+                noway_assert(bNeedEvaluateCnst);
+
+                while (length >= REGSIZE_BYTES)
+                {
+                    instGen(ins_P);
+                    length -= REGSIZE_BYTES;
+                }
+
+                bTrashedEDI = true;
+                if (isCopyBlk)
+                    bTrashedESI = true;
+            }
+            else
+            {
+            USE_DR:
+                noway_assert(bNeedEvaluateCnst);
+
+                /* set ECX to length/REGSIZE_BYTES (in pointer-sized words) */
+                genSetRegToIcon(REG_ECX, length / REGSIZE_BYTES, TYP_I_IMPL);
+
+                length &= (REGSIZE_BYTES - 1);
+
+                instGen(ins_PR);
+
+                regTracker.rsTrackRegTrash(REG_ECX);
+
+                bTrashedEDI = true;
+                if (isCopyBlk)
+                    bTrashedESI = true;
+            }
+
+            /* Now take care of the remainder */
+            CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_64BIT_
+            if (length > 4)
+            {
+                noway_assert(bNeedEvaluateCnst);
+                noway_assert(length < 8);
+
+                instGen((isInitBlk) ? INS_stosd : INS_movsd);
+                length -= 4;
+
+                bTrashedEDI = true;
+                if (isCopyBlk)
+                    bTrashedESI = true;
+            }
+
+#endif // _TARGET_64BIT_
+
+            if (length)
+            {
+                noway_assert(bNeedEvaluateCnst);
+
+                while (length--)
+                {
+                    instGen(ins_B);
+                }
+
+                bTrashedEDI = true;
+                if (isCopyBlk)
+                    bTrashedESI = true;
+            }
+
+            noway_assert(bTrashedEDI == !bWillUseOnlySSE2);
+            if (bTrashedEDI)
+                regTracker.rsTrackRegTrash(REG_EDI);
+            if (bTrashedESI)
+                regTracker.rsTrackRegTrash(REG_ESI);
+            // else No need to trash EAX as it wasnt destroyed by the "rep stos"
+
+            genReleaseReg(destPtr);
+            if (bNeedEvaluateCnst)
+                genReleaseReg(srcPtrOrVal);
+        }
+        else
+        {
+            //
+            // This a variable-sized COPYBLK/INITBLK,
+            //   or a fixed size INITBLK with a variable init value,
+            //
+
+            // What order should the Dest, Val/Src, and Size be calculated
+
+            compiler->fgOrderBlockOps(tree, RBM_EDI, regs, RBM_ECX, opsPtr, regsPtr); // OUT arguments
+
+            noway_assert((isInitBlk && (regs == RBM_EAX)) || (isCopyBlk && (regs == RBM_ESI)));
+            genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[0] != RBM_EAX));
+            genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[1] != RBM_EAX));
+            if (opsPtr[2] != nullptr)
+            {
+                genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG, (regsPtr[2] != RBM_EAX));
+            }
+            genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG);
+            genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG);
+
+            noway_assert((destPtr->gtFlags & GTF_REG_VAL) && // Dest
+                         (destPtr->gtRegNum == REG_EDI));
+
+            noway_assert((srcPtrOrVal->gtFlags & GTF_REG_VAL) && // Val/Src
+                         (genRegMask(srcPtrOrVal->gtRegNum) == regs));
+
+            if (sizeIsConst)
+            {
+                inst_RV_IV(INS_mov, REG_ECX, blockSize, EA_PTRSIZE);
+            }
+            else
+            {
+                noway_assert((sizeNode->gtFlags & GTF_REG_VAL) && // Size
+                             (sizeNode->gtRegNum == REG_ECX));
+            }
+
+            if (isInitBlk)
+                instGen(INS_r_stosb);
+            else
+                instGen(INS_r_movsb);
+
+            regTracker.rsTrackRegTrash(REG_EDI);
+            regTracker.rsTrackRegTrash(REG_ECX);
+
+            if (isCopyBlk)
+                regTracker.rsTrackRegTrash(REG_ESI);
+            // else No need to trash EAX as it wasnt destroyed by the "rep stos"
+
+            genReleaseReg(opsPtr[0]);
+            genReleaseReg(opsPtr[1]);
+            if (opsPtr[2] != nullptr)
+            {
+                genReleaseReg(opsPtr[2]);
+            }
+        }
+
+#else // !CPU_USES_BLOCK_MOVE
+
+#ifndef _TARGET_ARM_
+// Currently only the ARM implementation is provided
+#error "COPYBLK/INITBLK non-ARM && non-CPU_USES_BLOCK_MOVE"
+#endif
+        //
+        // Is this a fixed size COPYBLK?
+        //      or a fixed size INITBLK with a constant init value?
+        //
+        if (sizeIsConst && (isCopyBlk || (srcPtrOrVal->OperGet() == GT_CNS_INT)))
+        {
+            GenTreePtr dstOp          = destPtr;
+            GenTreePtr srcOp          = srcPtrOrVal;
+            unsigned   length         = blockSize;
+            unsigned   fullStoreCount = length / TARGET_POINTER_SIZE;
+            unsigned   initVal        = 0;
+            bool       useLoop        = false;
+
+            if (isInitBlk)
+            {
+                /* Properly extend the init constant from a U1 to a U4 */
+                initVal = 0xFF & ((unsigned)srcOp->gtIntCon.gtIconVal);
+
+                /* If it is a non-zero value we have to replicate      */
+                /* the byte value four times to form the DWORD         */
+                /* Then we store this new value into the tree-node      */
+
+                if (initVal != 0)
+                {
+                    initVal                         = initVal | (initVal << 8) | (initVal << 16) | (initVal << 24);
+                    srcPtrOrVal->gtIntCon.gtIconVal = initVal;
+                }
+            }
+
+            // Will we be using a loop to implement this INITBLK/COPYBLK?
+            if ((isCopyBlk && (fullStoreCount >= 8)) || (isInitBlk && (fullStoreCount >= 16)))
+            {
+                useLoop = true;
+            }
+
+            regMaskTP usedRegs;
+            regNumber regDst;
+            regNumber regSrc;
+            regNumber regTemp;
+
+            /* Evaluate dest and src/val */
+
+            if (tree->gtFlags & GTF_REVERSE_OPS)
+            {
+                genComputeReg(srcOp, (needReg & ~dstOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
+                assert(srcOp->gtFlags & GTF_REG_VAL);
+
+                genComputeReg(dstOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
+                assert(dstOp->gtFlags & GTF_REG_VAL);
+                regDst = dstOp->gtRegNum;
+
+                genRecoverReg(srcOp, needReg, RegSet::KEEP_REG);
+                regSrc = srcOp->gtRegNum;
+            }
+            else
+            {
+                genComputeReg(dstOp, (needReg & ~srcOp->gtRsvdRegs), RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
+                assert(dstOp->gtFlags & GTF_REG_VAL);
+
+                genComputeReg(srcOp, needReg, RegSet::ANY_REG, RegSet::KEEP_REG, useLoop);
+                assert(srcOp->gtFlags & GTF_REG_VAL);
+                regSrc = srcOp->gtRegNum;
+
+                genRecoverReg(dstOp, needReg, RegSet::KEEP_REG);
+                regDst = dstOp->gtRegNum;
+            }
+            assert(dstOp->gtFlags & GTF_REG_VAL);
+            assert(srcOp->gtFlags & GTF_REG_VAL);
+
+            regDst                = dstOp->gtRegNum;
+            regSrc                = srcOp->gtRegNum;
+            usedRegs              = (genRegMask(regSrc) | genRegMask(regDst));
+            bool     dstIsOnStack = (dstOp->gtOper == GT_ADDR && (dstOp->gtFlags & GTF_ADDR_ONSTACK));
+            emitAttr dstType      = (varTypeIsGC(dstOp) && !dstIsOnStack) ? EA_BYREF : EA_PTRSIZE;
+            emitAttr srcType;
+
+            if (isCopyBlk)
+            {
+                // Prefer a low register,but avoid one of the ones we've already grabbed
+                regTemp = regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
+                usedRegs |= genRegMask(regTemp);
+                bool srcIsOnStack = (srcOp->gtOper == GT_ADDR && (srcOp->gtFlags & GTF_ADDR_ONSTACK));
+                srcType           = (varTypeIsGC(srcOp) && !srcIsOnStack) ? EA_BYREF : EA_PTRSIZE;
+            }
+            else
+            {
+                regTemp = REG_STK;
+                srcType = EA_PTRSIZE;
+            }
+
+            instruction loadIns  = ins_Load(TYP_I_IMPL);  // INS_ldr
+            instruction storeIns = ins_Store(TYP_I_IMPL); // INS_str
+
+            int finalOffset;
+
+            // Can we emit a small number of ldr/str instructions to implement this INITBLK/COPYBLK?
+            if (!useLoop)
+            {
+                for (unsigned i = 0; i < fullStoreCount; i++)
+                {
+                    if (isCopyBlk)
+                    {
+                        getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, i * TARGET_POINTER_SIZE);
+                        getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, i * TARGET_POINTER_SIZE);
+                        gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
+                        regTracker.rsTrackRegTrash(regTemp);
+                    }
+                    else
+                    {
+                        getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, i * TARGET_POINTER_SIZE);
+                    }
+                }
+
+                finalOffset = fullStoreCount * TARGET_POINTER_SIZE;
+                length -= finalOffset;
+            }
+            else // We will use a loop to implement this INITBLK/COPYBLK
+            {
+                unsigned pairStoreLoopCount = fullStoreCount / 2;
+
+                // We need a second temp register for CopyBlk
+                regNumber regTemp2 = REG_STK;
+                if (isCopyBlk)
+                {
+                    // Prefer a low register, but avoid one of the ones we've already grabbed
+                    regTemp2 =
+                        regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
+                    usedRegs |= genRegMask(regTemp2);
+                }
+
+                // Pick and initialize the loop counter register
+                regNumber regLoopIndex;
+                regLoopIndex =
+                    regSet.rsGrabReg(regSet.rsNarrowHint(regSet.rsRegMaskCanGrab() & ~usedRegs, RBM_LOW_REGS));
+                genSetRegToIcon(regLoopIndex, pairStoreLoopCount, TYP_INT);
+
+                // Create and define the Basic Block for the loop top
+                BasicBlock* loopTopBlock = genCreateTempLabel();
+                genDefineTempLabel(loopTopBlock);
+
+                // The loop body
+                if (isCopyBlk)
+                {
+                    getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0);
+                    getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp2, regSrc, TARGET_POINTER_SIZE);
+                    getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0);
+                    getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp2, regDst, TARGET_POINTER_SIZE);
+                    getEmitter()->emitIns_R_I(INS_add, srcType, regSrc, 2 * TARGET_POINTER_SIZE);
+                    gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
+                    gcInfo.gcMarkRegSetNpt(genRegMask(regTemp2));
+                    regTracker.rsTrackRegTrash(regSrc);
+                    regTracker.rsTrackRegTrash(regTemp);
+                    regTracker.rsTrackRegTrash(regTemp2);
+                }
+                else // isInitBlk
+                {
+                    getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0);
+                    getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, TARGET_POINTER_SIZE);
+                }
+
+                getEmitter()->emitIns_R_I(INS_add, dstType, regDst, 2 * TARGET_POINTER_SIZE);
+                regTracker.rsTrackRegTrash(regDst);
+                getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, regLoopIndex, 1, INS_FLAGS_SET);
+                emitJumpKind jmpGTS = genJumpKindForOper(GT_GT, CK_SIGNED);
+                inst_JMP(jmpGTS, loopTopBlock);
+
+                regTracker.rsTrackRegIntCns(regLoopIndex, 0);
+
+                length -= (pairStoreLoopCount * (2 * TARGET_POINTER_SIZE));
+
+                if (length & TARGET_POINTER_SIZE)
+                {
+                    if (isCopyBlk)
+                    {
+                        getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regTemp, regSrc, 0);
+                        getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regTemp, regDst, 0);
+                    }
+                    else
+                    {
+                        getEmitter()->emitIns_R_R_I(storeIns, EA_4BYTE, regSrc, regDst, 0);
+                    }
+                    finalOffset = TARGET_POINTER_SIZE;
+                    length -= TARGET_POINTER_SIZE;
+                }
+                else
+                {
+                    finalOffset = 0;
+                }
+            }
+
+            if (length & sizeof(short))
+            {
+                loadIns  = ins_Load(TYP_USHORT);  // INS_ldrh
+                storeIns = ins_Store(TYP_USHORT); // INS_strh
+
+                if (isCopyBlk)
+                {
+                    getEmitter()->emitIns_R_R_I(loadIns, EA_2BYTE, regTemp, regSrc, finalOffset);
+                    getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regTemp, regDst, finalOffset);
+                    gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
+                    regTracker.rsTrackRegTrash(regTemp);
+                }
+                else
+                {
+                    getEmitter()->emitIns_R_R_I(storeIns, EA_2BYTE, regSrc, regDst, finalOffset);
+                }
+                length -= sizeof(short);
+                finalOffset += sizeof(short);
+            }
+
+            if (length & sizeof(char))
+            {
+                loadIns  = ins_Load(TYP_UBYTE);  // INS_ldrb
+                storeIns = ins_Store(TYP_UBYTE); // INS_strb
+
+                if (isCopyBlk)
+                {
+                    getEmitter()->emitIns_R_R_I(loadIns, EA_1BYTE, regTemp, regSrc, finalOffset);
+                    getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regTemp, regDst, finalOffset);
+                    gcInfo.gcMarkRegSetNpt(genRegMask(regTemp));
+                    regTracker.rsTrackRegTrash(regTemp);
+                }
+                else
+                {
+                    getEmitter()->emitIns_R_R_I(storeIns, EA_1BYTE, regSrc, regDst, finalOffset);
+                }
+                length -= sizeof(char);
+            }
+            assert(length == 0);
+
+            genReleaseReg(dstOp);
+            genReleaseReg(srcOp);
+        }
+        else
+        {
+            //
+            // This a variable-sized COPYBLK/INITBLK,
+            //   or a fixed size INITBLK with a variable init value,
+            //
+
+            // What order should the Dest, Val/Src, and Size be calculated
+
+            compiler->fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, RBM_ARG_2, opsPtr, regsPtr); // OUT arguments
+
+            genComputeReg(opsPtr[0], regsPtr[0], RegSet::EXACT_REG, RegSet::KEEP_REG);
+            genComputeReg(opsPtr[1], regsPtr[1], RegSet::EXACT_REG, RegSet::KEEP_REG);
+            if (opsPtr[2] != nullptr)
+            {
+                genComputeReg(opsPtr[2], regsPtr[2], RegSet::EXACT_REG, RegSet::KEEP_REG);
+            }
+            genRecoverReg(opsPtr[0], regsPtr[0], RegSet::KEEP_REG);
+            genRecoverReg(opsPtr[1], regsPtr[1], RegSet::KEEP_REG);
+
+            noway_assert((destPtr->gtFlags & GTF_REG_VAL) && // Dest
+                         (destPtr->gtRegNum == REG_ARG_0));
+
+            noway_assert((srcPtrOrVal->gtFlags & GTF_REG_VAL) && // Val/Src
+                         (srcPtrOrVal->gtRegNum == REG_ARG_1));
+
+            if (sizeIsConst)
+            {
+                inst_RV_IV(INS_mov, REG_ARG_2, blockSize, EA_PTRSIZE);
+            }
+            else
+            {
+                noway_assert((sizeNode->gtFlags & GTF_REG_VAL) && // Size
+                             (sizeNode->gtRegNum == REG_ARG_2));
+            }
+
+            regSet.rsLockUsedReg(RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2);
+
+            genEmitHelperCall(isCopyBlk ? CORINFO_HELP_MEMCPY
+                                        /* GT_INITBLK */
+                                        : CORINFO_HELP_MEMSET,
+                              0, EA_UNKNOWN);
+
+            regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH);
+
+            regSet.rsUnlockUsedReg(RBM_ARG_0 | RBM_ARG_1 | RBM_ARG_2);
+            genReleaseReg(opsPtr[0]);
+            genReleaseReg(opsPtr[1]);
+            if (opsPtr[2] != nullptr)
+            {
+                genReleaseReg(opsPtr[2]);
+            }
+        }
+
+        if (isCopyBlk && dest->AsBlk()->IsVolatile())
+        {
+            // Emit a memory barrier instruction after the CopyBlk
+            instGen_MemoryBarrier();
+        }
+#endif // !CPU_USES_BLOCK_MOVE
+    }
+}
+BasicBlock dummyBB;
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+void CodeGen::genCodeForTreeSmpOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+    const genTreeOps oper     = tree->OperGet();
+    const var_types  treeType = tree->TypeGet();
+    GenTreePtr       op1      = tree->gtOp.gtOp1;
+    GenTreePtr       op2      = tree->gtGetOp2();
+    regNumber        reg      = DUMMY_INIT(REG_CORRUPT);
+    regMaskTP        regs     = regSet.rsMaskUsed;
+    regMaskTP        needReg  = destReg;
+    insFlags         flags    = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+    emitAttr         size;
+    instruction      ins;
+    regMaskTP        addrReg;
+    GenTreePtr       opsPtr[3];
+    regMaskTP        regsPtr[3];
+
+#ifdef DEBUG
+    addrReg = 0xDEADCAFE;
+#endif
+
+    noway_assert(tree->OperKind() & GTK_SMPOP);
+
+    switch (oper)
+    {
+        case GT_ASG:
+            if (tree->OperIsBlkOp())
+            {
+                genCodeForBlkOp(tree, destReg);
+            }
+            else
+            {
+                genCodeForTreeSmpOpAsg(tree);
+            }
+            return;
+
+        case GT_ASG_LSH:
+        case GT_ASG_RSH:
+        case GT_ASG_RSZ:
+            genCodeForAsgShift(tree, destReg, bestReg);
+            return;
+
+        case GT_ASG_AND:
+        case GT_ASG_OR:
+        case GT_ASG_XOR:
+        case GT_ASG_ADD:
+        case GT_ASG_SUB:
+            genCodeForTreeSmpBinArithLogAsgOp(tree, destReg, bestReg);
+            return;
+
+        case GT_CHS:
+            addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG, true);
+#ifdef _TARGET_XARCH_
+            // Note that the specialCase here occurs when the treeType specifies a byte sized operation
+            // and we decided to enregister the op1 LclVar in a non-byteable register (ESI or EDI)
+            //
+            bool specialCase;
+            specialCase = false;
+            if (op1->gtOper == GT_REG_VAR)
+            {
+                /* Get hold of the target register */
+
+                reg = op1->gtRegVar.gtRegNum;
+                if (varTypeIsByte(treeType) && !(genRegMask(reg) & RBM_BYTE_REGS))
+                {
+                    regNumber byteReg = regSet.rsGrabReg(RBM_BYTE_REGS);
+
+                    inst_RV_RV(INS_mov, byteReg, reg);
+                    regTracker.rsTrackRegTrash(byteReg);
+
+                    inst_RV(INS_NEG, byteReg, treeType, emitTypeSize(treeType));
+                    var_types   op1Type     = op1->TypeGet();
+                    instruction wideningIns = ins_Move_Extend(op1Type, true);
+                    inst_RV_RV(wideningIns, reg, byteReg, op1Type, emitTypeSize(op1Type));
+                    regTracker.rsTrackRegTrash(reg);
+                    specialCase = true;
+                }
+            }
+
+            if (!specialCase)
+            {
+                inst_TT(INS_NEG, op1, 0, 0, emitTypeSize(treeType));
+            }
+#else // not  _TARGET_XARCH_
+            if (op1->gtFlags & GTF_REG_VAL)
+            {
+                inst_TT_IV(INS_NEG, op1, 0, 0, emitTypeSize(treeType), flags);
+            }
+            else
+            {
+                // Fix 388382 ARM JitStress WP7
+                var_types op1Type = op1->TypeGet();
+                regNumber reg     = regSet.rsPickFreeReg();
+                inst_RV_TT(ins_Load(op1Type), reg, op1, 0, emitTypeSize(op1Type));
+                regTracker.rsTrackRegTrash(reg);
+                inst_RV_IV(INS_NEG, reg, 0, emitTypeSize(treeType), flags);
+                inst_TT_RV(ins_Store(op1Type), op1, reg, 0, emitTypeSize(op1Type));
+            }
+#endif
+            if (op1->gtFlags & GTF_REG_VAL)
+                regTracker.rsTrackRegTrash(op1->gtRegNum);
+            genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+            genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, tree->gtRegNum, /* ovfl */ false);
+            return;
+
+        case GT_AND:
+        case GT_OR:
+        case GT_XOR:
+        case GT_ADD:
+        case GT_SUB:
+        case GT_MUL:
+            genCodeForTreeSmpBinArithLogOp(tree, destReg, bestReg);
+            return;
+
+        case GT_UMOD:
+            genCodeForUnsignedMod(tree, destReg, bestReg);
+            return;
+
+        case GT_MOD:
+            genCodeForSignedMod(tree, destReg, bestReg);
+            return;
+
+        case GT_UDIV:
+            genCodeForUnsignedDiv(tree, destReg, bestReg);
+            return;
+
+        case GT_DIV:
+            genCodeForSignedDiv(tree, destReg, bestReg);
+            return;
+
+        case GT_LSH:
+        case GT_RSH:
+        case GT_RSZ:
+            genCodeForShift(tree, destReg, bestReg);
+            return;
+
+        case GT_NEG:
+        case GT_NOT:
+
+            /* Generate the operand into some register */
+
+            genCompIntoFreeReg(op1, needReg, RegSet::FREE_REG);
+            noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+            reg = op1->gtRegNum;
+
+            /* Negate/reverse the value in the register */
+
+            inst_RV((oper == GT_NEG) ? INS_NEG : INS_NOT, reg, treeType);
+
+            /* The register is now trashed */
+
+            regTracker.rsTrackRegTrash(reg);
+
+            genCodeForTree_DONE(tree, reg);
+            return;
+
+        case GT_IND:
+        case GT_NULLCHECK: // At this point, explicit null checks are just like inds...
+
+            /* Make sure the operand is addressable */
+
+            addrReg = genMakeAddressable(tree, RBM_ALLINT, RegSet::KEEP_REG, true);
+
+            genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
+
+            /* Figure out the size of the value being loaded */
+
+            size = EA_ATTR(genTypeSize(tree->gtType));
+
+            /* Pick a register for the value */
+
+            if (needReg == RBM_ALLINT && bestReg == 0)
+            {
+                /* Absent a better suggestion, pick a useless register */
+
+                bestReg = regSet.rsExcludeHint(regSet.rsRegMaskFree(), ~regTracker.rsUselessRegs());
+            }
+
+            reg = regSet.rsPickReg(needReg, bestReg);
+
+            if (op1->IsCnsIntOrI() && op1->IsIconHandle(GTF_ICON_TLS_HDL))
+            {
+                noway_assert(size == EA_PTRSIZE);
+                getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, FLD_GLOBAL_FS,
+                                          (int)op1->gtIntCon.gtIconVal);
+            }
+            else
+            {
+                /* Generate "mov reg, [addr]" or "movsx/movzx reg, [addr]" */
+
+                inst_mov_RV_ST(reg, tree);
+            }
+
+#ifdef _TARGET_ARM_
+            if (tree->gtFlags & GTF_IND_VOLATILE)
+            {
+                // Emit a memory barrier instruction after the load
+                instGen_MemoryBarrier();
+            }
+#endif
+
+            /* Note the new contents of the register we used */
+
+            regTracker.rsTrackRegTrash(reg);
+
+#ifdef DEBUG
+            /* Update the live set of register variables */
+            if (compiler->opts.varNames)
+                genUpdateLife(tree);
+#endif
+
+            /* Now we can update the register pointer information */
+
+            // genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
+            gcInfo.gcMarkRegPtrVal(reg, treeType);
+
+            genCodeForTree_DONE_LIFE(tree, reg);
+            return;
+
+        case GT_CAST:
+
+            genCodeForNumericCast(tree, destReg, bestReg);
+            return;
+
+        case GT_JTRUE:
+
+            /* Is this a test of a relational operator? */
+
+            if (op1->OperIsCompare())
+            {
+                /* Generate the conditional jump */
+
+                genCondJump(op1);
+
+                genUpdateLife(tree);
+                return;
+            }
+
+#ifdef DEBUG
+            compiler->gtDispTree(tree);
+#endif
+            NO_WAY("ISSUE: can we ever have a jumpCC without a compare node?");
+            break;
+
+        case GT_SWITCH:
+            genCodeForSwitch(tree);
+            return;
+
+        case GT_RETFILT:
+            noway_assert(tree->gtType == TYP_VOID || op1 != 0);
+            if (op1 == 0) // endfinally
+            {
+                reg = REG_NA;
+
+#ifdef _TARGET_XARCH_
+                /* Return using a pop-jmp sequence. As the "try" block calls
+                   the finally with a jmp, this leaves the x86 call-ret stack
+                   balanced in the normal flow of path. */
+
+                noway_assert(isFramePointerRequired());
+                inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL);
+                inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL);
+#elif defined(_TARGET_ARM_)
+// Nothing needed for ARM
+#else
+                NYI("TARGET");
+#endif
+            }
+            else // endfilter
+            {
+                genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG);
+                noway_assert(op1->gtFlags & GTF_REG_VAL);
+                noway_assert(op1->gtRegNum == REG_INTRET);
+                /* The return value has now been computed */
+                reg = op1->gtRegNum;
+
+                /* Return */
+                instGen_Return(0);
+            }
+
+            genCodeForTree_DONE(tree, reg);
+            return;
+
+        case GT_RETURN:
+
+            // TODO: this should be done AFTER we called exit mon so that
+            //       we are sure that we don't have to keep 'this' alive
+
+            if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
+            {
+                /* either it's an "empty" statement or the return statement
+                   of a synchronized method
+                 */
+
+                genPInvokeMethodEpilog();
+            }
+
+            /* Is there a return value and/or an exit statement? */
+
+            if (op1)
+            {
+                if (op1->gtType == TYP_VOID)
+                {
+                    // We're returning nothing, just generate the block (shared epilog calls).
+                    genCodeForTree(op1, 0);
+                }
+#ifdef _TARGET_ARM_
+                else if (op1->gtType == TYP_STRUCT)
+                {
+                    if (op1->gtOper == GT_CALL)
+                    {
+                        // We have a return call() because we failed to tail call.
+                        // In any case, just generate the call and be done.
+                        assert(compiler->IsHfa(op1));
+                        genCodeForCall(op1, true);
+                        genMarkTreeInReg(op1, REG_FLOATRET);
+                    }
+                    else
+                    {
+                        assert(op1->gtOper == GT_LCL_VAR);
+                        assert(compiler->IsHfa(compiler->lvaGetStruct(op1->gtLclVarCommon.gtLclNum)));
+                        genLoadIntoFltRetRegs(op1);
+                    }
+                }
+                else if (op1->TypeGet() == TYP_FLOAT)
+                {
+                    // This can only occur when we are returning a non-HFA struct
+                    // that is composed of a single float field and we performed
+                    // struct promotion and enregistered the float field.
+                    //
+                    genComputeReg(op1, 0, RegSet::ANY_REG, RegSet::FREE_REG);
+                    getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, REG_INTRET, op1->gtRegNum);
+                }
+#endif // _TARGET_ARM_
+                else
+                {
+                    // we can now go through this code for compiler->genReturnBB.  I've regularized all the code.
+
+                    // noway_assert(compiler->compCurBB != compiler->genReturnBB);
+
+                    noway_assert(op1->gtType != TYP_VOID);
+
+                    /* Generate the return value into the return register */
+
+                    genComputeReg(op1, RBM_INTRET, RegSet::EXACT_REG, RegSet::FREE_REG);
+
+                    /* The result must now be in the return register */
+
+                    noway_assert(op1->gtFlags & GTF_REG_VAL);
+                    noway_assert(op1->gtRegNum == REG_INTRET);
+                }
+
+                /* The return value has now been computed */
+
+                reg = op1->gtRegNum;
+
+                genCodeForTree_DONE(tree, reg);
+            }
+
+#ifdef PROFILING_SUPPORTED
+            // The profiling hook does not trash registers, so it's safe to call after we emit the code for
+            // the GT_RETURN tree.
+
+            if (compiler->compCurBB == compiler->genReturnBB)
+            {
+                genProfilingLeaveCallback();
+            }
+#endif
+#ifdef DEBUG
+            if (compiler->opts.compStackCheckOnRet)
+            {
+                noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
+                             compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
+                             compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
+                getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
+
+                BasicBlock*  esp_check = genCreateTempLabel();
+                emitJumpKind jmpEqual  = genJumpKindForOper(GT_EQ, CK_SIGNED);
+                inst_JMP(jmpEqual, esp_check);
+                getEmitter()->emitIns(INS_BREAKPOINT);
+                genDefineTempLabel(esp_check);
+            }
+#endif
+            return;
+
+        case GT_COMMA:
+
+            if (tree->gtFlags & GTF_REVERSE_OPS)
+            {
+                if (tree->gtType == TYP_VOID)
+                {
+                    genEvalSideEffects(op2);
+                    genUpdateLife(op2);
+                    genEvalSideEffects(op1);
+                    genUpdateLife(tree);
+                    return;
+                }
+
+                // Generate op2
+                genCodeForTree(op2, needReg);
+                genUpdateLife(op2);
+
+                noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+                regSet.rsMarkRegUsed(op2);
+
+                // Do side effects of op1
+                genEvalSideEffects(op1);
+
+                // Recover op2 if spilled
+                genRecoverReg(op2, RBM_NONE, RegSet::KEEP_REG);
+
+                regSet.rsMarkRegFree(genRegMask(op2->gtRegNum));
+
+                // set gc info if we need so
+                gcInfo.gcMarkRegPtrVal(op2->gtRegNum, treeType);
+
+                genUpdateLife(tree);
+                genCodeForTree_DONE(tree, op2->gtRegNum);
+
+                return;
+            }
+            else
+            {
+                noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
+
+                /* Generate side effects of the first operand */
+
+                genEvalSideEffects(op1);
+                genUpdateLife(op1);
+
+                /* Is the value of the second operand used? */
+
+                if (tree->gtType == TYP_VOID)
+                {
+                    /* The right operand produces no result. The morpher is
+                       responsible for resetting the type of GT_COMMA nodes
+                       to TYP_VOID if op2 isn't meant to yield a result. */
+
+                    genEvalSideEffects(op2);
+                    genUpdateLife(tree);
+                    return;
+                }
+
+                /* Generate the second operand, i.e. the 'real' value */
+
+                genCodeForTree(op2, needReg);
+                noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+                /* The result of 'op2' is also the final result */
+
+                reg = op2->gtRegNum;
+
+                /* Remember whether we set the flags */
+
+                tree->gtFlags |= (op2->gtFlags & GTF_ZSF_SET);
+
+                genCodeForTree_DONE(tree, reg);
+                return;
+            }
+
+        case GT_BOX:
+            genCodeForTree(op1, needReg);
+            noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+            /* The result of 'op1' is also the final result */
+
+            reg = op1->gtRegNum;
+
+            /* Remember whether we set the flags */
+
+            tree->gtFlags |= (op1->gtFlags & GTF_ZSF_SET);
+
+            genCodeForTree_DONE(tree, reg);
+            return;
+
+        case GT_QMARK:
+
+            genCodeForQmark(tree, destReg, bestReg);
+            return;
+
+        case GT_NOP:
+
+#if OPT_BOOL_OPS
+            if (op1 == NULL)
+                return;
+#endif
+
+            /* Generate the operand into some register */
+
+            genCodeForTree(op1, needReg);
+
+            /* The result is the same as the operand */
+
+            reg = op1->gtRegNum;
+
+            genCodeForTree_DONE(tree, reg);
+            return;
+
+        case GT_INTRINSIC:
+
+            switch (tree->gtIntrinsic.gtIntrinsicId)
+            {
+                case CORINFO_INTRINSIC_Round:
+                {
+                    noway_assert(tree->gtType == TYP_INT);
+
+#if FEATURE_STACK_FP_X87
+                    genCodeForTreeFlt(op1);
+
+                    /* Store the FP value into the temp */
+                    TempDsc* temp = compiler->tmpGetTemp(TYP_INT);
+
+                    FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
+                    FlatFPX87_Kill(&compCurFPState, op1->gtRegNum);
+                    inst_FS_ST(INS_fistp, EA_4BYTE, temp, 0);
+
+                    reg = regSet.rsPickReg(needReg, bestReg);
+                    regTracker.rsTrackRegTrash(reg);
+
+                    inst_RV_ST(INS_mov, reg, temp, 0, TYP_INT);
+
+                    compiler->tmpRlsTemp(temp);
+#else
+                    genCodeForTreeFloat(tree, needReg, bestReg);
+                    return;
+#endif
+                }
+                break;
+
+                default:
+                    noway_assert(!"unexpected math intrinsic");
+            }
+
+            genCodeForTree_DONE(tree, reg);
+            return;
+
+        case GT_LCLHEAP:
+
+            reg = genLclHeap(op1);
+            genCodeForTree_DONE(tree, reg);
+            return;
+
+        case GT_EQ:
+        case GT_NE:
+        case GT_LT:
+        case GT_LE:
+        case GT_GE:
+        case GT_GT:
+            genCodeForRelop(tree, destReg, bestReg);
+            return;
+
+        case GT_ADDR:
+
+            genCodeForTreeSmpOp_GT_ADDR(tree, destReg, bestReg);
+            return;
+
+#ifdef _TARGET_XARCH_
+        case GT_LOCKADD:
+
+            // This is for a locked add operation.  We know that the resulting value doesn't "go" anywhere.
+            // For reference, op1 is the location.  op2 is the addend or the value.
+            if (op2->OperIsConst())
+            {
+                noway_assert(op2->TypeGet() == TYP_INT);
+                ssize_t cns = op2->gtIntCon.gtIconVal;
+
+                genComputeReg(op1, RBM_NONE, RegSet::ANY_REG, RegSet::KEEP_REG);
+                switch (cns)
+                {
+                    case 1:
+                        instGen(INS_lock);
+                        instEmit_RM(INS_inc, op1, op1, 0);
+                        break;
+                    case -1:
+                        instGen(INS_lock);
+                        instEmit_RM(INS_dec, op1, op1, 0);
+                        break;
+                    default:
+                        assert((int)cns == cns); // By test above for AMD64.
+                        instGen(INS_lock);
+                        inst_AT_IV(INS_add, EA_4BYTE, op1, (int)cns, 0);
+                        break;
+                }
+                genReleaseReg(op1);
+            }
+            else
+            {
+                // non constant addend means it needs to go into a register.
+                ins = INS_add;
+                goto LockBinOpCommon;
+            }
+
+            genFlagsEqualToNone(); // We didn't compute a result into a register.
+            genUpdateLife(tree);   // We didn't compute an operand into anything.
+            return;
+
+        case GT_XADD:
+            ins = INS_xadd;
+            goto LockBinOpCommon;
+        case GT_XCHG:
+            ins = INS_xchg;
+            goto LockBinOpCommon;
+        LockBinOpCommon:
+        {
+            // Compute the second operand into a register.  xadd and xchg are r/m32, r32.  So even if op2
+            // is a constant, it needs to be in a register.  This should be the output register if
+            // possible.
+            //
+            // For reference, gtOp1 is the location.  gtOp2 is the addend or the value.
+
+            GenTreePtr location = op1;
+            GenTreePtr value    = op2;
+
+            // Again, a friendly reminder.  IL calling convention is left to right.
+            if (tree->gtFlags & GTF_REVERSE_OPS)
+            {
+                // The atomic operations destroy this argument, so force it into a scratch register
+                reg = regSet.rsPickFreeReg();
+                genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
+
+                // Must evaluate location into a register
+                genCodeForTree(location, needReg, RBM_NONE);
+                assert(location->gtFlags & GTF_REG_VAL);
+                regSet.rsMarkRegUsed(location);
+                regSet.rsLockUsedReg(genRegMask(location->gtRegNum));
+                genRecoverReg(value, RBM_NONE, RegSet::KEEP_REG);
+                regSet.rsUnlockUsedReg(genRegMask(location->gtRegNum));
+
+                if (ins != INS_xchg)
+                {
+                    // xchg implies the lock prefix, but xadd and add require it.
+                    instGen(INS_lock);
+                }
+                instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
+                genReleaseReg(value);
+                regTracker.rsTrackRegTrash(reg);
+                genReleaseReg(location);
+            }
+            else
+            {
+                regMaskTP addrReg;
+                if (genMakeIndAddrMode(location, tree, false, /* not for LEA */
+                                       needReg, RegSet::KEEP_REG, &addrReg))
+                {
+                    genUpdateLife(location);
+
+                    reg = regSet.rsPickFreeReg();
+                    genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
+                    addrReg = genKeepAddressable(location, addrReg, genRegMask(reg));
+
+                    if (ins != INS_xchg)
+                    {
+                        // xchg implies the lock prefix, but xadd and add require it.
+                        instGen(INS_lock);
+                    }
+
+                    // instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
+                    // inst_TT_RV(ins, location, reg);
+                    sched_AM(ins, EA_4BYTE, reg, false, location, 0);
+
+                    genReleaseReg(value);
+                    regTracker.rsTrackRegTrash(reg);
+                    genDoneAddressable(location, addrReg, RegSet::KEEP_REG);
+                }
+                else
+                {
+                    // Must evalute location into a register.
+                    genCodeForTree(location, needReg, RBM_NONE);
+                    assert(location->gtFlags && GTF_REG_VAL);
+                    regSet.rsMarkRegUsed(location);
+
+                    // xadd destroys this argument, so force it into a scratch register
+                    reg = regSet.rsPickFreeReg();
+                    genComputeReg(value, genRegMask(reg), RegSet::EXACT_REG, RegSet::KEEP_REG);
+                    regSet.rsLockUsedReg(genRegMask(value->gtRegNum));
+                    genRecoverReg(location, RBM_NONE, RegSet::KEEP_REG);
+                    regSet.rsUnlockUsedReg(genRegMask(value->gtRegNum));
+
+                    if (ins != INS_xchg)
+                    {
+                        // xchg implies the lock prefix, but xadd and add require it.
+                        instGen(INS_lock);
+                    }
+
+                    instEmit_RM_RV(ins, EA_4BYTE, location, reg, 0);
+
+                    genReleaseReg(value);
+                    regTracker.rsTrackRegTrash(reg);
+                    genReleaseReg(location);
+                }
+            }
+
+            // The flags are equal to the target of the tree (i.e. the result of the add), not to the
+            // result in the register.  If tree is actually GT_IND->GT_ADDR->GT_LCL_VAR, we could use
+            // that information to set the flags.  Doesn't seem like there is a good reason for that.
+            // Therefore, trash the flags.
+            genFlagsEqualToNone();
+
+            if (ins == INS_add)
+            {
+                // If the operator was add, then we were called from the GT_LOCKADD
+                // case.  In that case we don't use the result, so we don't need to
+                // update anything.
+                genUpdateLife(tree);
+            }
+            else
+            {
+                genCodeForTree_DONE(tree, reg);
+            }
+        }
+            return;
+
+#else // !_TARGET_XARCH_
+
+        case GT_LOCKADD:
+        case GT_XADD:
+        case GT_XCHG:
+
+            NYI_ARM("LOCK instructions");
+#endif
+
+        case GT_ARR_LENGTH:
+        {
+            // Make the corresponding ind(a + c) node, and do codegen for that.
+            GenTreePtr addr = compiler->gtNewOperNode(GT_ADD, TYP_BYREF, tree->gtArrLen.ArrRef(),
+                                                      compiler->gtNewIconNode(tree->AsArrLen()->ArrLenOffset()));
+            tree->SetOper(GT_IND);
+            tree->gtFlags |= GTF_IND_ARR_LEN; // Record that this node represents an array length expression.
+            assert(tree->TypeGet() == TYP_INT);
+            tree->gtOp.gtOp1 = addr;
+            genCodeForTree(tree, destReg, bestReg);
+            return;
+        }
+
+        case GT_OBJ:
+            // All GT_OBJ nodes must have been morphed prior to this.
+            noway_assert(!"Should not see a GT_OBJ node during CodeGen.");
+
+        default:
+#ifdef DEBUG
+            compiler->gtDispTree(tree);
+#endif
+            noway_assert(!"unexpected unary/binary operator");
+    } // end switch (oper)
+
+    unreached();
+}
+#ifdef _PREFAST_
+#pragma warning(pop) // End suppress PREFast warning about overly large function
+#endif
+
+regNumber CodeGen::genIntegerCast(GenTree* tree, regMaskTP needReg, regMaskTP bestReg)
+{
+    instruction ins;
+    emitAttr    size;
+    bool        unsv;
+    bool        andv = false;
+    regNumber   reg;
+    GenTreePtr  op1     = tree->gtOp.gtOp1->gtEffectiveVal();
+    var_types   dstType = tree->CastToType();
+    var_types   srcType = op1->TypeGet();
+
+    if (genTypeSize(srcType) < genTypeSize(dstType))
+    {
+        // Widening cast
+
+        /* we need the source size */
+
+        size = EA_ATTR(genTypeSize(srcType));
+
+        noway_assert(size < EA_PTRSIZE);
+
+        unsv = varTypeIsUnsigned(srcType);
+        ins  = ins_Move_Extend(srcType, op1->InReg());
+
+        /*
+            Special case: for a cast of byte to char we first
+            have to expand the byte (w/ sign extension), then
+            mask off the high bits.
+            Use 'movsx' followed by 'and'
+        */
+        if (!unsv && varTypeIsUnsigned(dstType) && genTypeSize(dstType) < EA_4BYTE)
+        {
+            noway_assert(genTypeSize(dstType) == EA_2BYTE && size == EA_1BYTE);
+            andv = true;
+        }
+    }
+    else
+    {
+        // Narrowing cast, or sign-changing cast
+
+        noway_assert(genTypeSize(srcType) >= genTypeSize(dstType));
+
+        size = EA_ATTR(genTypeSize(dstType));
+
+        unsv = varTypeIsUnsigned(dstType);
+        ins  = ins_Move_Extend(dstType, op1->InReg());
+    }
+
+    noway_assert(size < EA_PTRSIZE);
+
+    // Set bestReg to the same register a op1 if op1 is a regVar and is available
+    if (op1->InReg())
+    {
+        regMaskTP op1RegMask = genRegMask(op1->gtRegNum);
+        if ((((op1RegMask & bestReg) != 0) || (bestReg == 0)) && ((op1RegMask & regSet.rsRegMaskFree()) != 0))
+        {
+            bestReg = op1RegMask;
+        }
+    }
+
+    /* Is the value sitting in a non-byte-addressable register? */
+
+    if (op1->InReg() && (size == EA_1BYTE) && !isByteReg(op1->gtRegNum))
+    {
+        if (unsv)
+        {
+            // for unsigned values we can AND, so it needs not be a byte register
+
+            reg = regSet.rsPickReg(needReg, bestReg);
+
+            ins = INS_AND;
+        }
+        else
+        {
+            /* Move the value into a byte register */
+
+            reg = regSet.rsGrabReg(RBM_BYTE_REGS);
+        }
+
+        if (reg != op1->gtRegNum)
+        {
+            /* Move the value into that register */
+
+            regTracker.rsTrackRegCopy(reg, op1->gtRegNum);
+            inst_RV_RV(INS_mov, reg, op1->gtRegNum, srcType);
+
+            /* The value has a new home now */
+
+            op1->gtRegNum = reg;
+        }
+    }
+    else
+    {
+        /* Pick a register for the value (general case) */
+
+        reg = regSet.rsPickReg(needReg, bestReg);
+
+        // if we (might) need to set the flags and the value is in the same register
+        // and we have an unsigned value then use AND instead of MOVZX
+        if (tree->gtSetFlags() && unsv && op1->InReg() && (op1->gtRegNum == reg))
+        {
+#ifdef _TARGET_X86_
+            noway_assert(ins == INS_movzx);
+#endif
+            ins = INS_AND;
+        }
+    }
+
+    if (ins == INS_AND)
+    {
+        noway_assert(andv == false && unsv);
+
+        /* Generate "and reg, MASK */
+
+        insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+        inst_RV_IV(INS_AND, reg, (size == EA_1BYTE) ? 0xFF : 0xFFFF, EA_4BYTE, flags);
+
+        if (tree->gtSetFlags())
+            genFlagsEqualToReg(tree, reg);
+    }
+    else
+    {
+#ifdef _TARGET_XARCH_
+        noway_assert(ins == INS_movsx || ins == INS_movzx);
+#endif
+
+        /* Generate "movsx/movzx reg, [addr]" */
+
+        inst_RV_ST(ins, size, reg, op1);
+
+        /* Mask off high bits for cast from byte to char */
+
+        if (andv)
+        {
+#ifdef _TARGET_XARCH_
+            noway_assert(genTypeSize(dstType) == 2 && ins == INS_movsx);
+#endif
+            insFlags flags = tree->gtSetFlags() ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+            inst_RV_IV(INS_AND, reg, 0xFFFF, EA_4BYTE, flags);
+
+            if (tree->gtSetFlags())
+                genFlagsEqualToReg(tree, reg);
+        }
+    }
+
+    regTracker.rsTrackRegTrash(reg);
+    return reg;
+}
+
+void CodeGen::genCodeForNumericCast(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+    GenTreePtr op1      = tree->gtOp.gtOp1;
+    var_types  dstType  = tree->CastToType();
+    var_types  baseType = TYP_INT;
+    regNumber  reg      = DUMMY_INIT(REG_CORRUPT);
+    regMaskTP  needReg  = destReg;
+    regMaskTP  addrReg;
+    emitAttr   size;
+    BOOL       unsv;
+
+    /*
+      * Constant casts should have been folded earlier
+      * If not finite don't bother
+      * We don't do this optimization for debug code/no optimization
+      */
+
+    noway_assert((op1->gtOper != GT_CNS_INT && op1->gtOper != GT_CNS_LNG && op1->gtOper != GT_CNS_DBL) ||
+                 tree->gtOverflow() || (op1->gtOper == GT_CNS_DBL && !_finite(op1->gtDblCon.gtDconVal)) ||
+                 !compiler->opts.OptEnabled(CLFLG_CONSTANTFOLD));
+
+    noway_assert(dstType != TYP_VOID);
+
+    /* What type are we casting from? */
+
+    switch (op1->TypeGet())
+    {
+        case TYP_LONG:
+
+            /* Special case: the long is generated via the mod of long
+               with an int.  This is really an int and need not be
+               converted to a reg pair. NOTE: the flag only indicates
+               that this is a case to TYP_INT, it hasn't actually
+               verified the second operand of the MOD! */
+
+            if (((op1->gtOper == GT_MOD) || (op1->gtOper == GT_UMOD)) && (op1->gtFlags & GTF_MOD_INT_RESULT))
+            {
+
+                /* Verify that the op2 of the mod node is
+                   1) An integer tree, or
+                   2) A long constant that is small enough to fit in an integer
+                */
+
+                GenTreePtr modop2 = op1->gtOp.gtOp2;
+                if ((genActualType(modop2->gtType) == TYP_INT) ||
+                    ((modop2->gtOper == GT_CNS_LNG) && (modop2->gtLngCon.gtLconVal == (int)modop2->gtLngCon.gtLconVal)))
+                {
+                    genCodeForTree(op1, destReg, bestReg);
+
+#ifdef _TARGET_64BIT_
+                    reg = op1->gtRegNum;
+#else  // _TARGET_64BIT_
+                    reg = genRegPairLo(op1->gtRegPair);
+#endif //_TARGET_64BIT_
+
+                    genCodeForTree_DONE(tree, reg);
+                    return;
+                }
+            }
+
+            /* Make the operand addressable.  When gtOverflow() is true,
+               hold on to the addrReg as we will need it to access the higher dword */
+
+            op1 = genCodeForCommaTree(op1); // Strip off any commas (necessary, since we seem to generate code for op1
+                                            // twice!)
+                                            // See, e.g., the TYP_INT case below...
+
+            addrReg = genMakeAddressable2(op1, 0, tree->gtOverflow() ? RegSet::KEEP_REG : RegSet::FREE_REG, false);
+
+            /* Load the lower half of the value into some register */
+
+            if (op1->gtFlags & GTF_REG_VAL)
+            {
+                /* Can we simply use the low part of the value? */
+                reg = genRegPairLo(op1->gtRegPair);
+
+                if (tree->gtOverflow())
+                    goto REG_OK;
+
+                regMaskTP loMask;
+                loMask = genRegMask(reg);
+                if (loMask & regSet.rsRegMaskFree())
+                    bestReg = loMask;
+            }
+
+            // for cast overflow we need to preserve addrReg for testing the hiDword
+            // so we lock it to prevent regSet.rsPickReg from picking it.
+            if (tree->gtOverflow())
+                regSet.rsLockUsedReg(addrReg);
+
+            reg = regSet.rsPickReg(needReg, bestReg);
+
+            if (tree->gtOverflow())
+                regSet.rsUnlockUsedReg(addrReg);
+
+            noway_assert(genStillAddressable(op1));
+
+        REG_OK:
+            if (((op1->gtFlags & GTF_REG_VAL) == 0) || (reg != genRegPairLo(op1->gtRegPair)))
+            {
+                /* Generate "mov reg, [addr-mode]" */
+                inst_RV_TT(ins_Load(TYP_INT), reg, op1);
+            }
+
+            /* conv.ovf.i8i4, or conv.ovf.u8u4 */
+
+            if (tree->gtOverflow())
+            {
+                regNumber hiReg = (op1->gtFlags & GTF_REG_VAL) ? genRegPairHi(op1->gtRegPair) : REG_NA;
+
+                emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
+                emitJumpKind jmpLTS      = genJumpKindForOper(GT_LT, CK_SIGNED);
+
+                switch (dstType)
+                {
+                    case TYP_INT:
+                        // conv.ovf.i8.i4
+                        /*  Generate the following sequence
+
+                                test loDWord, loDWord   // set flags
+                                jl neg
+                           pos: test hiDWord, hiDWord   // set flags
+                                jne ovf
+                                jmp done
+                           neg: cmp hiDWord, 0xFFFFFFFF
+                                jne ovf
+                          done:
+
+                        */
+
+                        instGen_Compare_Reg_To_Zero(EA_4BYTE, reg);
+                        if (tree->gtFlags & GTF_UNSIGNED) // conv.ovf.u8.i4       (i4 > 0 and upper bits 0)
+                        {
+                            genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
+                            goto UPPER_BITS_ZERO;
+                        }
+
+#if CPU_LOAD_STORE_ARCH
+                        // This is tricky.
+                        // We will generate code like
+                        // if (...)
+                        // {
+                        // ...
+                        // }
+                        // else
+                        // {
+                        // ...
+                        // }
+                        // We load the tree op1 into regs when we generate code for if clause.
+                        // When we generate else clause, we see the tree is already loaded into reg, and start use it
+                        // directly.
+                        // Well, when the code is run, we may execute else clause without going through if clause.
+                        //
+                        genCodeForTree(op1, 0);
+#endif
+
+                        BasicBlock* neg;
+                        BasicBlock* done;
+
+                        neg  = genCreateTempLabel();
+                        done = genCreateTempLabel();
+
+                        // Is the loDWord positive or negative
+                        inst_JMP(jmpLTS, neg);
+
+                        // If loDWord is positive, hiDWord should be 0 (sign extended loDWord)
+
+                        if (hiReg < REG_STK)
+                        {
+                            instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg);
+                        }
+                        else
+                        {
+                            inst_TT_IV(INS_cmp, op1, 0x00000000, 4);
+                        }
+
+                        genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
+                        inst_JMP(EJ_jmp, done);
+
+                        // If loDWord is negative, hiDWord should be -1 (sign extended loDWord)
+
+                        genDefineTempLabel(neg);
+
+                        if (hiReg < REG_STK)
+                        {
+                            inst_RV_IV(INS_cmp, hiReg, 0xFFFFFFFFL, EA_4BYTE);
+                        }
+                        else
+                        {
+                            inst_TT_IV(INS_cmp, op1, 0xFFFFFFFFL, 4);
+                        }
+                        genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
+
+                        // Done
+
+                        genDefineTempLabel(done);
+
+                        break;
+
+                    case TYP_UINT: // conv.ovf.u8u4
+                    UPPER_BITS_ZERO:
+                        // Just check that the upper DWord is 0
+
+                        if (hiReg < REG_STK)
+                        {
+                            instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
+                        }
+                        else
+                        {
+                            inst_TT_IV(INS_cmp, op1, 0, 4);
+                        }
+
+                        genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
+                        break;
+
+                    default:
+                        noway_assert(!"Unexpected dstType");
+                        break;
+                }
+
+                genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+            }
+
+            regTracker.rsTrackRegTrash(reg);
+            genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
+
+            genCodeForTree_DONE(tree, reg);
+            return;
+
+        case TYP_BOOL:
+        case TYP_BYTE:
+        case TYP_SHORT:
+        case TYP_CHAR:
+        case TYP_UBYTE:
+            break;
+
+        case TYP_UINT:
+        case TYP_INT:
+            break;
+
+#if FEATURE_STACK_FP_X87
+        case TYP_FLOAT:
+            NO_WAY("OPCAST from TYP_FLOAT should have been converted into a helper call");
+            break;
+
+        case TYP_DOUBLE:
+            if (compiler->opts.compCanUseSSE2)
+            {
+                // do the SSE2 based cast inline
+                // getting the fp operand
+
+                regMaskTP addrRegInt = 0;
+                regMaskTP addrRegFlt = 0;
+
+                // make the operand addressable
+                // We don't want to collapse constant doubles into floats, as the SSE2 instruction
+                // operates on doubles. Note that these (casts from constant doubles) usually get
+                // folded, but we don't do it for some cases (infinitys, etc). So essentially this
+                // shouldn't affect performance or size at all. We're fixing this for #336067
+                op1 = genMakeAddressableStackFP(op1, &addrRegInt, &addrRegFlt, false);
+                if (!addrRegFlt && !op1->IsRegVar())
+                {
+                    // we have the address
+
+                    inst_RV_TT(INS_movsdsse2, REG_XMM0, op1, 0, EA_8BYTE);
+                    genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
+                    genUpdateLife(op1);
+
+                    reg = regSet.rsPickReg(needReg);
+                    getEmitter()->emitIns_R_R(INS_cvttsd2si, EA_8BYTE, reg, REG_XMM0);
+
+                    regTracker.rsTrackRegTrash(reg);
+                    genCodeForTree_DONE(tree, reg);
+                }
+                else
+                {
+                    // we will need to use a temp to get it into the xmm reg
+                    var_types typeTemp = op1->TypeGet();
+                    TempDsc*  temp     = compiler->tmpGetTemp(typeTemp);
+
+                    size = EA_ATTR(genTypeSize(typeTemp));
+
+                    if (addrRegFlt)
+                    {
+                        // On the fp stack; Take reg to top of stack
+
+                        FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
+                    }
+                    else
+                    {
+                        // op1->IsRegVar()
+                        // pick a register
+                        reg = regSet.PickRegFloat();
+                        if (!op1->IsRegVarDeath())
+                        {
+                            // Load it on the fp stack
+                            genLoadStackFP(op1, reg);
+                        }
+                        else
+                        {
+                            // if it's dying, genLoadStackFP just renames it and then we move reg to TOS
+                            genLoadStackFP(op1, reg);
+                            FlatFPX87_MoveToTOS(&compCurFPState, reg);
+                        }
+                    }
+
+                    // pop it off the fp stack
+                    compCurFPState.Pop();
+
+                    getEmitter()->emitIns_S(INS_fstp, size, temp->tdTempNum(), 0);
+                    // pick a reg
+                    reg = regSet.rsPickReg(needReg);
+
+                    inst_RV_ST(INS_movsdsse2, REG_XMM0, temp, 0, TYP_DOUBLE, EA_8BYTE);
+                    getEmitter()->emitIns_R_R(INS_cvttsd2si, EA_8BYTE, reg, REG_XMM0);
+
+                    // done..release the temp
+                    compiler->tmpRlsTemp(temp);
+
+                    // the reg is now trashed
+                    regTracker.rsTrackRegTrash(reg);
+                    genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
+                    genUpdateLife(op1);
+                    genCodeForTree_DONE(tree, reg);
+                }
+            }
+#else
+        case TYP_FLOAT:
+        case TYP_DOUBLE:
+            genCodeForTreeFloat(tree, needReg, bestReg);
+#endif // FEATURE_STACK_FP_X87
+            return;
+
+        default:
+            noway_assert(!"unexpected cast type");
+    }
+
+    if (tree->gtOverflow())
+    {
+        /* Compute op1 into a register, and free the register */
+
+        genComputeReg(op1, destReg, RegSet::ANY_REG, RegSet::FREE_REG);
+        reg = op1->gtRegNum;
+
+        /* Do we need to compare the value, or just check masks */
+
+        ssize_t typeMin = DUMMY_INIT(~0), typeMax = DUMMY_INIT(0);
+        ssize_t typeMask;
+
+        switch (dstType)
+        {
+            case TYP_BYTE:
+                typeMask = ssize_t((int)0xFFFFFF80);
+                typeMin  = SCHAR_MIN;
+                typeMax  = SCHAR_MAX;
+                unsv     = (tree->gtFlags & GTF_UNSIGNED);
+                break;
+            case TYP_SHORT:
+                typeMask = ssize_t((int)0xFFFF8000);
+                typeMin  = SHRT_MIN;
+                typeMax  = SHRT_MAX;
+                unsv     = (tree->gtFlags & GTF_UNSIGNED);
+                break;
+            case TYP_INT:
+                typeMask = ssize_t((int)0x80000000L);
+#ifdef _TARGET_64BIT_
+                unsv    = (tree->gtFlags & GTF_UNSIGNED);
+                typeMin = INT_MIN;
+                typeMax = INT_MAX;
+#else // _TARGET_64BIT_
+                noway_assert((tree->gtFlags & GTF_UNSIGNED) != 0);
+                unsv     = true;
+#endif // _TARGET_64BIT_
+                break;
+            case TYP_UBYTE:
+                unsv     = true;
+                typeMask = ssize_t((int)0xFFFFFF00L);
+                break;
+            case TYP_CHAR:
+                unsv     = true;
+                typeMask = ssize_t((int)0xFFFF0000L);
+                break;
+            case TYP_UINT:
+                unsv = true;
+#ifdef _TARGET_64BIT_
+                typeMask = 0xFFFFFFFF00000000LL;
+#else  // _TARGET_64BIT_
+                typeMask = 0x80000000L;
+                noway_assert((tree->gtFlags & GTF_UNSIGNED) == 0);
+#endif // _TARGET_64BIT_
+                break;
+            default:
+                NO_WAY("Unknown type");
+                return;
+        }
+
+        // If we just have to check a mask.
+        // This must be conv.ovf.u4u1, conv.ovf.u4u2, conv.ovf.u4i4,
+        // or conv.i4u4
+
+        if (unsv)
+        {
+            inst_RV_IV(INS_TEST, reg, typeMask, emitActualTypeSize(baseType));
+            emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
+            genJumpToThrowHlpBlk(jmpNotEqual, SCK_OVERFLOW);
+        }
+        else
+        {
+            // Check the value is in range.
+            // This must be conv.ovf.i4i1, etc.
+
+            // Compare with the MAX
+
+            noway_assert(typeMin != DUMMY_INIT(~0) && typeMax != DUMMY_INIT(0));
+
+            inst_RV_IV(INS_cmp, reg, typeMax, emitActualTypeSize(baseType));
+            emitJumpKind jmpGTS = genJumpKindForOper(GT_GT, CK_SIGNED);
+            genJumpToThrowHlpBlk(jmpGTS, SCK_OVERFLOW);
+
+            // Compare with the MIN
+
+            inst_RV_IV(INS_cmp, reg, typeMin, emitActualTypeSize(baseType));
+            emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
+            genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
+        }
+
+        genCodeForTree_DONE(tree, reg);
+        return;
+    }
+
+    /* Make the operand addressable */
+
+    addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG, true);
+
+    reg = genIntegerCast(tree, needReg, bestReg);
+
+    genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
+
+    genCodeForTree_DONE(tree, reg);
+}
+
+/*****************************************************************************
+ *
+ *  Generate code for a leaf node of type GT_ADDR
+ */
+
+void CodeGen::genCodeForTreeSmpOp_GT_ADDR(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+    genTreeOps      oper     = tree->OperGet();
+    const var_types treeType = tree->TypeGet();
+    GenTreePtr      op1;
+    regNumber       reg;
+    regMaskTP       needReg = destReg;
+    regMaskTP       addrReg;
+
+#ifdef DEBUG
+    reg     = (regNumber)0xFEEFFAAF; // to detect uninitialized use
+    addrReg = 0xDEADCAFE;
+#endif
+
+    // We should get here for ldloca, ldarga, ldslfda, ldelema,
+    // or ldflda.
+    if (oper == GT_ARR_ELEM)
+    {
+        op1 = tree;
+    }
+    else
+    {
+        op1 = tree->gtOp.gtOp1;
+    }
+
+    // (tree=op1, needReg=0, keepReg=RegSet::FREE_REG, smallOK=true)
+    if (oper == GT_ARR_ELEM)
+    {
+        // To get the address of the array element,
+        // we first call genMakeAddrArrElem to make the element addressable.
+        //     (That is, for example, we first emit code to calculate EBX, and EAX.)
+        // And then use lea to obtain the address.
+        //     (That is, for example, we then emit
+        //         lea EBX, bword ptr [EBX+4*EAX+36]
+        //      to obtain the address of the array element.)
+        addrReg = genMakeAddrArrElem(op1, tree, RBM_NONE, RegSet::FREE_REG);
+    }
+    else
+    {
+        addrReg = genMakeAddressable(op1, 0, RegSet::FREE_REG, true);
+    }
+
+    noway_assert(treeType == TYP_BYREF || treeType == TYP_I_IMPL);
+
+    // We want to reuse one of the scratch registers that were used
+    // in forming the address mode as the target register for the lea.
+    // If bestReg is unset or if it is set to one of the registers used to
+    // form the address (i.e. addrReg), we calculate the scratch register
+    // to use as the target register for the LEA
+
+    bestReg = regSet.rsUseIfZero(bestReg, addrReg);
+    bestReg = regSet.rsNarrowHint(bestReg, addrReg);
+
+    /* Even if addrReg is regSet.rsRegMaskCanGrab(), regSet.rsPickReg() won't spill
+       it since keepReg==false.
+       If addrReg can't be grabbed, regSet.rsPickReg() won't touch it anyway.
+       So this is guaranteed not to spill addrReg */
+
+    reg = regSet.rsPickReg(needReg, bestReg);
+
+    // Slight workaround, force the inst routine to think that
+    // value being loaded is an int (since that is what what
+    // LEA will return)  otherwise it would try to allocate
+    // two registers for a long etc.
+    noway_assert(treeType == TYP_I_IMPL || treeType == TYP_BYREF);
+    op1->gtType = treeType;
+
+    inst_RV_TT(INS_lea, reg, op1, 0, (treeType == TYP_BYREF) ? EA_BYREF : EA_PTRSIZE);
+
+    // The Lea instruction above better not have tried to put the
+    // 'value' pointed to by 'op1' in a register, LEA will not work.
+    noway_assert(!(op1->gtFlags & GTF_REG_VAL));
+
+    genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
+    // gcInfo.gcMarkRegSetNpt(genRegMask(reg));
+    noway_assert((gcInfo.gcRegGCrefSetCur & genRegMask(reg)) == 0);
+
+    regTracker.rsTrackRegTrash(reg); // reg does have foldable value in it
+    gcInfo.gcMarkRegPtrVal(reg, treeType);
+
+    genCodeForTree_DONE(tree, reg);
+}
+
+#ifdef _TARGET_ARM_
+
+/*****************************************************************************
+ *
+ * Move (load/store) between float ret regs and struct promoted variable.
+ *
+ * varDsc - The struct variable to be loaded from or stored into.
+ * isLoadIntoFlt - Perform a load operation if "true" or store if "false."
+ *
+ */
+void CodeGen::genLdStFltRetRegsPromotedVar(LclVarDsc* varDsc, bool isLoadIntoFlt)
+{
+    regNumber curReg = REG_FLOATRET;
+
+    unsigned lclLast = varDsc->lvFieldLclStart + varDsc->lvFieldCnt - 1;
+    for (unsigned lclNum = varDsc->lvFieldLclStart; lclNum <= lclLast; ++lclNum)
+    {
+        LclVarDsc* varDscFld = &compiler->lvaTable[lclNum];
+
+        // Is the struct field promoted and sitting in a register?
+        if (varDscFld->lvRegister)
+        {
+            // Move from the struct field into curReg if load
+            // else move into struct field from curReg if store
+            regNumber srcReg = (isLoadIntoFlt) ? varDscFld->lvRegNum : curReg;
+            regNumber dstReg = (isLoadIntoFlt) ? curReg : varDscFld->lvRegNum;
+            if (srcReg != dstReg)
+            {
+                inst_RV_RV(ins_Copy(varDscFld->TypeGet()), dstReg, srcReg, varDscFld->TypeGet());
+                regTracker.rsTrackRegCopy(dstReg, srcReg);
+            }
+        }
+        else
+        {
+            // This field is in memory, do a move between the field and float registers.
+            emitAttr size = (varDscFld->TypeGet() == TYP_DOUBLE) ? EA_8BYTE : EA_4BYTE;
+            if (isLoadIntoFlt)
+            {
+                getEmitter()->emitIns_R_S(ins_Load(varDscFld->TypeGet()), size, curReg, lclNum, 0);
+                regTracker.rsTrackRegTrash(curReg);
+            }
+            else
+            {
+                getEmitter()->emitIns_S_R(ins_Store(varDscFld->TypeGet()), size, curReg, lclNum, 0);
+            }
+        }
+
+        // Advance the current reg.
+        curReg = (varDscFld->TypeGet() == TYP_DOUBLE) ? REG_NEXT(REG_NEXT(curReg)) : REG_NEXT(curReg);
+    }
+}
+
+void CodeGen::genLoadIntoFltRetRegs(GenTreePtr tree)
+{
+    assert(tree->TypeGet() == TYP_STRUCT);
+    assert(tree->gtOper == GT_LCL_VAR);
+    LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
+    int        slots  = varDsc->lvSize() / REGSIZE_BYTES;
+    if (varDsc->lvPromoted)
+    {
+        genLdStFltRetRegsPromotedVar(varDsc, true);
+    }
+    else
+    {
+        if (slots <= 2)
+        {
+            // Use the load float/double instruction.
+            inst_RV_TT(ins_Load((slots == 1) ? TYP_FLOAT : TYP_DOUBLE), REG_FLOATRET, tree, 0,
+                       (slots == 1) ? EA_4BYTE : EA_8BYTE);
+        }
+        else
+        {
+            // Use the load store multiple instruction.
+            regNumber reg = regSet.rsPickReg(RBM_ALLINT);
+            inst_RV_TT(INS_lea, reg, tree, 0, EA_PTRSIZE);
+            regTracker.rsTrackRegTrash(reg);
+            getEmitter()->emitIns_R_R_I(INS_vldm, EA_4BYTE, REG_FLOATRET, reg, slots * REGSIZE_BYTES);
+        }
+    }
+    genMarkTreeInReg(tree, REG_FLOATRET);
+}
+
+void CodeGen::genStoreFromFltRetRegs(GenTreePtr tree)
+{
+    assert(tree->TypeGet() == TYP_STRUCT);
+    assert(tree->OperGet() == GT_ASG);
+
+    // LHS should be lcl var or fld.
+    GenTreePtr op1 = tree->gtOp.gtOp1;
+
+    // TODO: We had a bug where op1 was a GT_IND, the result of morphing a GT_BOX, and not properly
+    // handling multiple levels of inlined functions that return HFA on the right-hand-side.
+    // So, make the op1 check a noway_assert (that exists in non-debug builds) so we'll fall
+    // back to MinOpts with no inlining, if we don't have what we expect. We don't want to
+    // do the full IsHfa() check in non-debug, since that involves VM calls, so leave that
+    // as a regular assert().
+    noway_assert((op1->gtOper == GT_LCL_VAR) || (op1->gtOper == GT_LCL_FLD));
+    unsigned varNum = op1->gtLclVarCommon.gtLclNum;
+    assert(compiler->IsHfa(compiler->lvaGetStruct(varNum)));
+
+    // The RHS should be a call.
+    GenTreePtr op2 = tree->gtOp.gtOp2;
+    assert(op2->gtOper == GT_CALL);
+
+    // Generate code for call and copy the return registers into the local.
+    regMaskTP retMask = genCodeForCall(op2, true);
+
+    // Ret mask should be contiguously set from s0, up to s3 or starting from d0 upto d3.
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+    regMaskTP mask = ((retMask >> REG_FLOATRET) + 1);
+    assert((mask & (mask - 1)) == 0);
+    assert(mask <= (1 << MAX_HFA_RET_SLOTS));
+    assert((retMask & (((regMaskTP)RBM_FLOATRET) - 1)) == 0);
+#endif
+
+    int slots = genCountBits(retMask & RBM_ALLFLOAT);
+
+    LclVarDsc* varDsc = &compiler->lvaTable[varNum];
+
+    if (varDsc->lvPromoted)
+    {
+        genLdStFltRetRegsPromotedVar(varDsc, false);
+    }
+    else
+    {
+        if (slots <= 2)
+        {
+            inst_TT_RV(ins_Store((slots == 1) ? TYP_FLOAT : TYP_DOUBLE), op1, REG_FLOATRET, 0,
+                       (slots == 1) ? EA_4BYTE : EA_8BYTE);
+        }
+        else
+        {
+            regNumber reg = regSet.rsPickReg(RBM_ALLINT);
+            inst_RV_TT(INS_lea, reg, op1, 0, EA_PTRSIZE);
+            regTracker.rsTrackRegTrash(reg);
+            getEmitter()->emitIns_R_R_I(INS_vstm, EA_4BYTE, REG_FLOATRET, reg, slots * REGSIZE_BYTES);
+        }
+    }
+}
+
+#endif // _TARGET_ARM_
+
+/*****************************************************************************
+ *
+ *  Generate code for a GT_ASG tree
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+void CodeGen::genCodeForTreeSmpOpAsg(GenTreePtr tree)
+{
+    noway_assert(tree->gtOper == GT_ASG);
+
+    GenTreePtr  op1     = tree->gtOp.gtOp1;
+    GenTreePtr  op2     = tree->gtOp.gtOp2;
+    regMaskTP   needReg = RBM_ALLINT;
+    regMaskTP   bestReg = RBM_CORRUPT;
+    regMaskTP   addrReg = DUMMY_INIT(RBM_CORRUPT);
+    bool        ovfl    = false; // Do we need an overflow check
+    bool        volat   = false; // Is this a volatile store
+    regMaskTP   regGC;
+    instruction ins;
+#ifdef DEBUGGING_SUPPORT
+    unsigned lclVarNum = compiler->lvaCount;
+    unsigned lclILoffs = DUMMY_INIT(0);
+#endif
+
+#ifdef _TARGET_ARM_
+    if (tree->gtType == TYP_STRUCT)
+    {
+        // We use copy block to assign structs, however to receive HFAs in registers
+        // from a CALL, we use assignment, var = (hfa) call();
+        assert(compiler->IsHfa(tree));
+        genStoreFromFltRetRegs(tree);
+        return;
+    }
+#endif
+
+#ifdef DEBUG
+    if (varTypeIsFloating(op1) != varTypeIsFloating(op2))
+    {
+        if (varTypeIsFloating(op1))
+            assert(!"Bad IL: Illegal assignment of integer into float!");
+        else
+            assert(!"Bad IL: Illegal assignment of float into integer!");
+    }
+#endif
+
+    if ((tree->gtFlags & GTF_REVERSE_OPS) == 0)
+    {
+        op1 = genCodeForCommaTree(op1); // Strip away any comma expressions.
+    }
+
+    /* Is the target a register or local variable? */
+    switch (op1->gtOper)
+    {
+        unsigned   varNum;
+        LclVarDsc* varDsc;
+
+        case GT_LCL_VAR:
+            varNum = op1->gtLclVarCommon.gtLclNum;
+            noway_assert(varNum < compiler->lvaCount);
+            varDsc = compiler->lvaTable + varNum;
+
+#ifdef DEBUGGING_SUPPORT
+            /* For non-debuggable code, every definition of a lcl-var has
+             * to be checked to see if we need to open a new scope for it.
+             * Remember the local var info to call siCheckVarScope
+             * AFTER code generation of the assignment.
+             */
+            if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode && (compiler->info.compVarScopesCount > 0))
+            {
+                lclVarNum = varNum;
+                lclILoffs = op1->gtLclVar.gtLclILoffs;
+            }
+#endif
+
+            /* Check against dead store ? (with min opts we may have dead stores) */
+
+            noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH));
+
+            /* Does this variable live in a register? */
+
+            if (genMarkLclVar(op1))
+                goto REG_VAR2;
+
+            break;
+
+        REG_VAR2:
+
+            /* Get hold of the target register */
+
+            regNumber op1Reg;
+
+            op1Reg = op1->gtRegVar.gtRegNum;
+
+#ifdef DEBUG
+            /* Compute the RHS (hopefully) into the variable's register.
+               For debuggable code, op1Reg may already be part of regSet.rsMaskVars,
+               as variables are kept alive everywhere. So we have to be
+               careful if we want to compute the value directly into
+               the variable's register. */
+
+            bool needToUpdateRegSetCheckLevel;
+            needToUpdateRegSetCheckLevel = false;
+#endif
+
+            // We should only be accessing lvVarIndex if varDsc is tracked.
+            assert(varDsc->lvTracked);
+
+            if (VarSetOps::IsMember(compiler, genUpdateLiveSetForward(op2), varDsc->lvVarIndex))
+            {
+                noway_assert(compiler->opts.compDbgCode);
+
+                /* The predictor might expect us to generate op2 directly
+                   into the var's register. However, since the variable is
+                   already alive, first kill it and its register. */
+
+                if (rpCanAsgOperWithoutReg(op2, true))
+                {
+                    genUpdateLife(VarSetOps::RemoveElem(compiler, compiler->compCurLife, varDsc->lvVarIndex));
+                    needReg = regSet.rsNarrowHint(needReg, genRegMask(op1Reg));
+#ifdef DEBUG
+                    needToUpdateRegSetCheckLevel = true;
+#endif
+                }
+            }
+            else
+            {
+                needReg = regSet.rsNarrowHint(needReg, genRegMask(op1Reg));
+            }
+
+#ifdef DEBUG
+
+            /* Special cases: op2 is a GT_CNS_INT */
+
+            if (op2->gtOper == GT_CNS_INT && !(op1->gtFlags & GTF_VAR_DEATH))
+            {
+                /* Save the old life status */
+
+                VarSetOps::Assign(compiler, genTempOldLife, compiler->compCurLife);
+                VarSetOps::AddElemD(compiler, compiler->compCurLife, varDsc->lvVarIndex);
+
+                /* Set a flag to avoid printing the message
+                   and remember that life was changed. */
+
+                genTempLiveChg = false;
+            }
+#endif
+
+#ifdef DEBUG
+            if (needToUpdateRegSetCheckLevel)
+                compiler->compRegSetCheckLevel++;
+#endif
+            genCodeForTree(op2, needReg, genRegMask(op1Reg));
+#ifdef DEBUG
+            if (needToUpdateRegSetCheckLevel)
+                compiler->compRegSetCheckLevel--;
+            noway_assert(compiler->compRegSetCheckLevel >= 0);
+#endif
+            noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+            /* Make sure the value ends up in the right place ... */
+
+            if (op2->gtRegNum != op1Reg)
+            {
+                /* Make sure the target of the store is available */
+
+                if (regSet.rsMaskUsed & genRegMask(op1Reg))
+                    regSet.rsSpillReg(op1Reg);
+
+#ifdef _TARGET_ARM_
+                if (op1->TypeGet() == TYP_FLOAT)
+                {
+                    // This can only occur when we are returning a non-HFA struct
+                    // that is composed of a single float field.
+                    //
+                    inst_RV_RV(INS_vmov_i2f, op1Reg, op2->gtRegNum, op1->TypeGet());
+                }
+                else
+#endif // _TARGET_ARM_
+                {
+                    inst_RV_RV(INS_mov, op1Reg, op2->gtRegNum, op1->TypeGet());
+                }
+
+                /* The value has been transferred to 'op1Reg' */
+
+                regTracker.rsTrackRegCopy(op1Reg, op2->gtRegNum);
+
+                if ((genRegMask(op2->gtRegNum) & regSet.rsMaskUsed) == 0)
+                    gcInfo.gcMarkRegSetNpt(genRegMask(op2->gtRegNum));
+
+                gcInfo.gcMarkRegPtrVal(op1Reg, tree->TypeGet());
+            }
+            else
+            {
+                // First we need to remove it from the original reg set mask (or else trigger an
+                // assert when we add it to the other reg set mask).
+                gcInfo.gcMarkRegSetNpt(genRegMask(op1Reg));
+                gcInfo.gcMarkRegPtrVal(op1Reg, tree->TypeGet());
+
+                // The emitter has logic that tracks the GCness of registers and asserts if you
+                // try to do bad things to a GC pointer (like lose its GCness).
+
+                // An explict cast of a GC pointer to an int (which is legal if the
+                // pointer is pinned) is encoded as an assignment of a GC source
+                // to a integer variable.  Unfortunately if the source was the last
+                // use, and the source register gets reused by the destination, no
+                // code gets emitted (That is where we are at right now).  The emitter
+                // thinks the register is a GC pointer (it did not see the cast).
+                // This causes asserts, as well as bad GC info since we will continue
+                // to report the register as a GC pointer even if we do arithmetic
+                // with it. So force the emitter to see the change in the type
+                // of variable by placing a label.
+                // We only have to do this check at this point because in the
+                // CAST morphing, we create a temp and assignment whenever we
+                // have a cast that loses its GCness.
+
+                if (varTypeGCtype(op2->TypeGet()) != varTypeGCtype(op1->TypeGet()))
+                {
+                    void* label = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+                                                             gcInfo.gcRegByrefSetCur);
+                }
+            }
+
+            addrReg = 0;
+
+            genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, op1Reg, ovfl);
+            goto LExit;
+
+        case GT_LCL_FLD:
+
+            // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
+            // to worry about it being enregistered.
+            noway_assert(compiler->lvaTable[op1->gtLclFld.gtLclNum].lvRegister == 0);
+            break;
+
+        case GT_CLS_VAR:
+
+            __fallthrough;
+
+        case GT_IND:
+        case GT_NULLCHECK:
+
+            assert((op1->OperGet() == GT_CLS_VAR) || (op1->OperGet() == GT_IND));
+
+            if (op1->gtFlags & GTF_IND_VOLATILE)
+            {
+                volat = true;
+            }
+
+            break;
+
+        default:
+            break;
+    }
+
+    /* Is the value being assigned a simple one? */
+
+    noway_assert(op2);
+    switch (op2->gtOper)
+    {
+        case GT_LCL_VAR:
+
+            if (!genMarkLclVar(op2))
+                goto SMALL_ASG;
+
+            __fallthrough;
+
+        case GT_REG_VAR:
+
+            /* Is the target a byte/short/char value? */
+
+            if (varTypeIsSmall(op1->TypeGet()))
+                goto SMALL_ASG;
+
+            if (tree->gtFlags & GTF_REVERSE_OPS)
+                goto SMALL_ASG;
+
+            /* Make the target addressable */
+
+            op1 = genCodeForCommaTree(op1); // Strip away comma expressions.
+
+            addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
+
+            /* Does the write barrier helper do the assignment? */
+
+            regGC = WriteBarrier(op1, op2, addrReg);
+
+            // Was assignment done by the WriteBarrier
+            if (regGC == RBM_NONE)
+            {
+#ifdef _TARGET_ARM_
+                if (volat)
+                {
+                    // Emit a memory barrier instruction before the store
+                    instGen_MemoryBarrier();
+                }
+#endif
+
+                /* Move the value into the target */
+
+                inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegVar.gtRegNum);
+
+                // This is done in WriteBarrier when (regGC != RBM_NONE)
+
+                /* Free up anything that was tied up by the LHS */
+                genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+            }
+
+            /* Free up the RHS */
+            genUpdateLife(op2);
+
+            /* Remember that we've also touched the op2 register */
+
+            addrReg |= genRegMask(op2->gtRegVar.gtRegNum);
+            break;
+
+        case GT_CNS_INT:
+
+            ssize_t ival;
+            ival = op2->gtIntCon.gtIconVal;
+            emitAttr size;
+            size = emitTypeSize(tree->TypeGet());
+
+            ins = ins_Store(op1->TypeGet());
+
+            // If we are storing a constant into a local variable
+            // we extend the size of the store here
+            // this normally takes place in CodeGen::inst_TT_IV on x86.
+            //
+            if ((op1->gtOper == GT_LCL_VAR) && (size < EA_4BYTE))
+            {
+                unsigned   varNum = op1->gtLclVarCommon.gtLclNum;
+                LclVarDsc* varDsc = compiler->lvaTable + varNum;
+
+                // Fix the immediate by sign extending if needed
+                if (!varTypeIsUnsigned(varDsc->TypeGet()))
+                {
+                    if (size == EA_1BYTE)
+                    {
+                        if ((ival & 0x7f) != ival)
+                            ival = ival | 0xffffff00;
+                    }
+                    else
+                    {
+                        assert(size == EA_2BYTE);
+                        if ((ival & 0x7fff) != ival)
+                            ival = ival | 0xffff0000;
+                    }
+                }
+
+                // A local stack slot is at least 4 bytes in size, regardless of
+                // what the local var is typed as, so auto-promote it here
+                // unless it is a field of a promoted struct
+                if (!varDsc->lvIsStructField)
+                {
+                    size = EA_SET_SIZE(size, EA_4BYTE);
+                    ins  = ins_Store(TYP_INT);
+                }
+            }
+
+            /* Make the target addressable */
+
+            addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG, true);
+
+#ifdef _TARGET_ARM_
+            if (volat)
+            {
+                // Emit a memory barrier instruction before the store
+                instGen_MemoryBarrier();
+            }
+#endif
+
+            /* Move the value into the target */
+
+            noway_assert(op1->gtOper != GT_REG_VAR);
+            if (compiler->opts.compReloc && op2->IsIconHandle())
+            {
+                /* The constant is actually a handle that may need relocation
+                   applied to it.  genComputeReg will do the right thing (see
+                   code in genCodeForTreeConst), so we'll just call it to load
+                   the constant into a register. */
+
+                genComputeReg(op2, needReg & ~addrReg, RegSet::ANY_REG, RegSet::KEEP_REG);
+                addrReg = genKeepAddressable(op1, addrReg, genRegMask(op2->gtRegNum));
+                noway_assert(op2->gtFlags & GTF_REG_VAL);
+                inst_TT_RV(ins, op1, op2->gtRegNum);
+                genReleaseReg(op2);
+            }
+            else
+            {
+                regSet.rsLockUsedReg(addrReg);
+
+#if REDUNDANT_LOAD
+                bool      copyIconFromReg = true;
+                regNumber iconReg         = REG_NA;
+
+#ifdef _TARGET_ARM_
+                // Only if the constant can't be encoded in a small instruction,
+                // look for another register to copy the value from. (Assumes
+                // target is a small register.)
+                if ((op1->gtFlags & GTF_REG_VAL) && !isRegPairType(tree->gtType) &&
+                    arm_Valid_Imm_For_Small_Mov(op1->gtRegNum, ival, INS_FLAGS_DONT_CARE))
+                {
+                    copyIconFromReg = false;
+                }
+#endif // _TARGET_ARM_
+
+                if (copyIconFromReg)
+                {
+                    iconReg = regTracker.rsIconIsInReg(ival);
+                    if (iconReg == REG_NA)
+                        copyIconFromReg = false;
+                }
+
+                if (copyIconFromReg && (isByteReg(iconReg) || (genTypeSize(tree->TypeGet()) == EA_PTRSIZE) ||
+                                        (genTypeSize(tree->TypeGet()) == EA_4BYTE)))
+                {
+                    /* Move the value into the target */
+
+                    inst_TT_RV(ins, op1, iconReg, 0, size);
+                }
+                else
+#endif // REDUNDANT_LOAD
+                {
+                    inst_TT_IV(ins, op1, ival, 0, size);
+                }
+
+                regSet.rsUnlockUsedReg(addrReg);
+            }
+
+            /* Free up anything that was tied up by the LHS */
+
+            genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+            break;
+
+        default:
+
+        SMALL_ASG:
+
+            bool             isWriteBarrier = false;
+            regMaskTP        needRegOp1     = RBM_ALLINT;
+            RegSet::ExactReg mustReg        = RegSet::ANY_REG; // set to RegSet::EXACT_REG for op1 and NOGC helpers
+
+            /*  Is the LHS more complex than the RHS? */
+
+            if (tree->gtFlags & GTF_REVERSE_OPS)
+            {
+                /* Is the target a byte/short/char value? */
+
+                if (varTypeIsSmall(op1->TypeGet()))
+                {
+                    noway_assert(op1->gtOper != GT_LCL_VAR || (op1->gtFlags & GTF_VAR_CAST) ||
+                                 // TODO: Why does this have to be true?
+                                 compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvIsStructField ||
+                                 compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvNormalizeOnLoad());
+
+                    if (op2->gtOper == GT_CAST && !op2->gtOverflow())
+                    {
+                        /* Special case: cast to small type */
+
+                        if (op2->CastToType() >= op1->gtType)
+                        {
+                            /* Make sure the cast operand is not > int */
+
+                            if (op2->CastFromType() <= TYP_INT)
+                            {
+                                /* Cast via a non-smaller type */
+
+                                op2 = op2->gtCast.CastOp();
+                            }
+                        }
+                    }
+
+                    if (op2->gtOper == GT_AND && op2->gtOp.gtOp2->gtOper == GT_CNS_INT)
+                    {
+                        unsigned mask;
+                        switch (op1->gtType)
+                        {
+                            case TYP_BYTE:
+                                mask = 0x000000FF;
+                                break;
+                            case TYP_SHORT:
+                                mask = 0x0000FFFF;
+                                break;
+                            case TYP_CHAR:
+                                mask = 0x0000FFFF;
+                                break;
+                            default:
+                                goto SIMPLE_SMALL;
+                        }
+
+                        if (unsigned(op2->gtOp.gtOp2->gtIntCon.gtIconVal) == mask)
+                        {
+                            /* Redundant AND */
+
+                            op2 = op2->gtOp.gtOp1;
+                        }
+                    }
+
+                /* Must get the new value into a byte register */
+
+                SIMPLE_SMALL:
+                    if (varTypeIsByte(op1->TypeGet()))
+                        genComputeReg(op2, RBM_BYTE_REGS, RegSet::EXACT_REG, RegSet::KEEP_REG);
+                    else
+                        goto NOT_SMALL;
+                }
+                else
+                {
+                NOT_SMALL:
+                    /* Generate the RHS into a register */
+
+                    isWriteBarrier = gcInfo.gcIsWriteBarrierAsgNode(tree);
+                    if (isWriteBarrier)
+                    {
+#if NOGC_WRITE_BARRIERS
+                        // Exclude the REG_WRITE_BARRIER from op2's needReg mask
+                        needReg = Target::exclude_WriteBarrierReg(needReg);
+                        mustReg = RegSet::EXACT_REG;
+#else  // !NOGC_WRITE_BARRIERS
+                        // This code should be generic across architectures.
+
+                        // For the standard JIT Helper calls
+                        // op1 goes into REG_ARG_0 and
+                        // op2 goes into REG_ARG_1
+                        //
+                        needRegOp1 = RBM_ARG_0;
+                        needReg    = RBM_ARG_1;
+#endif // !NOGC_WRITE_BARRIERS
+                    }
+                    genComputeReg(op2, needReg, mustReg, RegSet::KEEP_REG);
+                }
+
+                noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+                /* Make the target addressable */
+
+                op1     = genCodeForCommaTree(op1); // Strip off any comma expressions.
+                addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true);
+
+                /*  Make sure the RHS register hasn't been spilled;
+                    keep the register marked as "used", otherwise
+                    we might get the pointer lifetimes wrong.
+                */
+
+                if (varTypeIsByte(op1->TypeGet()))
+                    needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
+
+                genRecoverReg(op2, needReg, RegSet::KEEP_REG);
+                noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+                /* Lock the RHS temporarily (lock only already used) */
+
+                regSet.rsLockUsedReg(genRegMask(op2->gtRegNum));
+
+                /* Make sure the LHS is still addressable */
+
+                addrReg = genKeepAddressable(op1, addrReg);
+
+                /* We can unlock (only already used ) the RHS register */
+
+                regSet.rsUnlockUsedReg(genRegMask(op2->gtRegNum));
+
+                /* Does the write barrier helper do the assignment? */
+
+                regGC = WriteBarrier(op1, op2, addrReg);
+
+                if (regGC != 0)
+                {
+                    // Yes, assignment done by the WriteBarrier
+                    noway_assert(isWriteBarrier);
+                }
+                else
+                {
+#ifdef _TARGET_ARM_
+                    if (volat)
+                    {
+                        // Emit a memory barrier instruction before the store
+                        instGen_MemoryBarrier();
+                    }
+#endif
+
+                    /* Move the value into the target */
+
+                    inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegNum);
+                }
+
+#ifdef DEBUG
+                /* Update the current liveness info */
+                if (compiler->opts.varNames)
+                    genUpdateLife(tree);
+#endif
+
+                // If op2 register is still in use, free it.  (Might not be in use, if
+                // a full-call write barrier was done, and the register was a caller-saved
+                // register.)
+                regMaskTP op2RM = genRegMask(op2->gtRegNum);
+                if (op2RM & regSet.rsMaskUsed)
+                    regSet.rsMarkRegFree(genRegMask(op2->gtRegNum));
+
+                // This is done in WriteBarrier when (regGC != 0)
+                if (regGC == 0)
+                {
+                    /* Free up anything that was tied up by the LHS */
+                    genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+                }
+            }
+            else
+            {
+                /* Make the target addressable */
+
+                isWriteBarrier = gcInfo.gcIsWriteBarrierAsgNode(tree);
+
+                if (isWriteBarrier)
+                {
+#if NOGC_WRITE_BARRIERS
+                    /* Try to avoid RBM_TMP_0 */
+                    needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~RBM_TMP_0);
+                    mustReg    = RegSet::EXACT_REG; // For op2
+#else                                               // !NOGC_WRITE_BARRIERS
+                    // This code should be generic across architectures.
+
+                    // For the standard JIT Helper calls
+                    // op1 goes into REG_ARG_0 and
+                    // op2 goes into REG_ARG_1
+                    //
+                    needRegOp1 = RBM_ARG_0;
+                    needReg    = RBM_ARG_1;
+                    mustReg    = RegSet::EXACT_REG; // For op2
+#endif                                              // !NOGC_WRITE_BARRIERS
+                }
+
+                needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~op2->gtRsvdRegs);
+
+                op1 = genCodeForCommaTree(op1); // Strip away any comma expression.
+
+                addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true);
+
+#if CPU_HAS_BYTE_REGS
+                /* Is the target a byte value? */
+                if (varTypeIsByte(op1->TypeGet()))
+                {
+                    /* Must get the new value into a byte register */
+                    needReg = regSet.rsNarrowHint(RBM_BYTE_REGS, needReg);
+                    mustReg = RegSet::EXACT_REG;
+
+                    if (op2->gtType >= op1->gtType)
+                        op2->gtFlags |= GTF_SMALL_OK;
+                }
+#endif
+
+#if NOGC_WRITE_BARRIERS
+                /* For WriteBarrier we can't use REG_WRITE_BARRIER */
+                if (isWriteBarrier)
+                    needReg = Target::exclude_WriteBarrierReg(needReg);
+
+                /* Also avoid using the previously computed addrReg(s) */
+                bestReg = regSet.rsNarrowHint(needReg, ~addrReg);
+
+                /* If we have a reg available to grab then use bestReg */
+                if (bestReg & regSet.rsRegMaskCanGrab())
+                    needReg = bestReg;
+
+                mustReg = RegSet::EXACT_REG;
+#endif
+
+                /* Generate the RHS into a register */
+                genComputeReg(op2, needReg, mustReg, RegSet::KEEP_REG);
+                noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+                /* Make sure the target is still addressable */
+                addrReg = genKeepAddressable(op1, addrReg, genRegMask(op2->gtRegNum));
+                noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+                /* Does the write barrier helper do the assignment? */
+
+                regGC = WriteBarrier(op1, op2, addrReg);
+
+                if (regGC != 0)
+                {
+                    // Yes, assignment done by the WriteBarrier
+                    noway_assert(isWriteBarrier);
+                }
+                else
+                {
+                    assert(!isWriteBarrier);
+
+#ifdef _TARGET_ARM_
+                    if (volat)
+                    {
+                        // Emit a memory barrier instruction before the store
+                        instGen_MemoryBarrier();
+                    }
+#endif
+
+                    /* Move the value into the target */
+
+                    inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2->gtRegNum);
+                }
+
+                /* The new value is no longer needed */
+
+                genReleaseReg(op2);
+
+#ifdef DEBUG
+                /* Update the current liveness info */
+                if (compiler->opts.varNames)
+                    genUpdateLife(tree);
+#endif
+
+                // This is done in WriteBarrier when (regGC != 0)
+                if (regGC == 0)
+                {
+                    /* Free up anything that was tied up by the LHS */
+                    genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+                }
+            }
+
+            addrReg = RBM_NONE;
+            break;
+    }
+
+    noway_assert(addrReg != DUMMY_INIT(RBM_CORRUPT));
+    genCodeForTreeSmpOpAsg_DONE_ASSG(tree, addrReg, REG_NA, ovfl);
+
+LExit:
+#ifdef DEBUGGING_SUPPORT
+    /* For non-debuggable code, every definition of a lcl-var has
+     * to be checked to see if we need to open a new scope for it.
+     */
+    if (lclVarNum < compiler->lvaCount)
+        siCheckVarScope(lclVarNum, lclILoffs);
+#endif
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************
+ *
+ *  Generate code to complete the assignment operation
+ */
+
+void CodeGen::genCodeForTreeSmpOpAsg_DONE_ASSG(GenTreePtr tree, regMaskTP addrReg, regNumber reg, bool ovfl)
+{
+    const var_types treeType = tree->TypeGet();
+    GenTreePtr      op1      = tree->gtOp.gtOp1;
+    GenTreePtr      op2      = tree->gtOp.gtOp2;
+    noway_assert(op2);
+
+    if (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_REG_VAR)
+        genUpdateLife(op1);
+    genUpdateLife(tree);
+
+#if REDUNDANT_LOAD
+
+    if (op1->gtOper == GT_LCL_VAR)
+        regTracker.rsTrashLcl(op1->gtLclVarCommon.gtLclNum);
+
+    /* Have we just assigned a value that is in a register? */
+
+    if ((op2->gtFlags & GTF_REG_VAL) && tree->gtOper == GT_ASG)
+    {
+        regTracker.rsTrackRegAssign(op1, op2);
+    }
+
+#endif
+
+    noway_assert(addrReg != 0xDEADCAFE);
+
+    gcInfo.gcMarkRegSetNpt(addrReg);
+
+    if (ovfl)
+    {
+        noway_assert(tree->gtOper == GT_ASG_ADD || tree->gtOper == GT_ASG_SUB);
+
+        /* If GTF_REG_VAL is not set, and it is a small type, then
+           we must have loaded it up from memory, done the increment,
+           checked for overflow, and then stored it back to memory */
+
+        bool ovfCheckDone = (genTypeSize(op1->TypeGet()) < sizeof(int)) && !(op1->gtFlags & GTF_REG_VAL);
+
+        if (!ovfCheckDone)
+        {
+            // For small sizes, reg should be set as we sign/zero extend it.
+
+            noway_assert(genIsValidReg(reg) || genTypeSize(treeType) == sizeof(int));
+
+            /* Currently we don't morph x=x+y into x+=y in try blocks
+             * if we need overflow check, as x+y may throw an exception.
+             * We can do it if x is not live on entry to the catch block.
+             */
+            noway_assert(!compiler->compCurBB->hasTryIndex());
+
+            genCheckOverflow(tree);
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Generate code for a special op tree
+ */
+
+void CodeGen::genCodeForTreeSpecialOp(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+    genTreeOps oper = tree->OperGet();
+    regNumber  reg  = DUMMY_INIT(REG_CORRUPT);
+    regMaskTP  regs = regSet.rsMaskUsed;
+
+    noway_assert((tree->OperKind() & (GTK_CONST | GTK_LEAF | GTK_SMPOP)) == 0);
+
+    switch (oper)
+    {
+        case GT_CALL:
+            regs = genCodeForCall(tree, true);
+
+            /* If the result is in a register, make sure it ends up in the right place */
+
+            if (regs != RBM_NONE)
+            {
+                genMarkTreeInReg(tree, genRegNumFromMask(regs));
+            }
+
+            genUpdateLife(tree);
+            return;
+
+        case GT_FIELD:
+            NO_WAY("should not see this operator in this phase");
+            break;
+
+        case GT_ARR_BOUNDS_CHECK:
+        {
+#ifdef FEATURE_ENABLE_NO_RANGE_CHECKS
+            // MUST NEVER CHECK-IN WITH THIS ENABLED.
+            // This is just for convenience in doing performance investigations and requires x86ret builds
+            if (!JitConfig.JitNoRngChk())
+#endif
+                genRangeCheck(tree);
+        }
+            return;
+
+        case GT_ARR_ELEM:
+            genCodeForTreeSmpOp_GT_ADDR(tree, destReg, bestReg);
+            return;
+
+        case GT_CMPXCHG:
+        {
+#if defined(_TARGET_XARCH_)
+            // cmpxchg does not have an [r/m32], imm32 encoding, so we need a register for the value operand
+
+            // Since this is a "call", evaluate the operands from right to left.  Don't worry about spilling
+            // right now, just get the trees evaluated.
+
+            // As a friendly reminder.  IL args are evaluated left to right.
+
+            GenTreePtr location  = tree->gtCmpXchg.gtOpLocation;  // arg1
+            GenTreePtr value     = tree->gtCmpXchg.gtOpValue;     // arg2
+            GenTreePtr comparand = tree->gtCmpXchg.gtOpComparand; // arg3
+            regMaskTP  addrReg;
+
+            bool isAddr = genMakeIndAddrMode(location, tree, false, /* not for LEA */
+                                             RBM_ALLINT, RegSet::KEEP_REG, &addrReg);
+
+            if (!isAddr)
+            {
+                genCodeForTree(location, RBM_NONE, RBM_NONE);
+                assert(location->gtFlags && GTF_REG_VAL);
+                addrReg = genRegMask(location->gtRegNum);
+                regSet.rsMarkRegUsed(location);
+            }
+
+            // We must have a reg for the Value, but it doesn't really matter which register.
+
+            // Try to avoid EAX and the address regsiter if possible.
+            genComputeReg(value, regSet.rsNarrowHint(RBM_ALLINT, RBM_EAX | addrReg), RegSet::ANY_REG, RegSet::KEEP_REG);
+
+#ifdef DEBUG
+            // cmpxchg uses EAX as an implicit operand to hold the comparand
+            // We're going to destroy EAX in this operation, so we better not be keeping
+            // anything important in it.
+            if (RBM_EAX & regSet.rsMaskVars)
+            {
+                // We have a variable enregistered in EAX.  Make sure it goes dead in this tree.
+                for (unsigned varNum = 0; varNum < compiler->lvaCount; ++varNum)
+                {
+                    const LclVarDsc& varDesc = compiler->lvaTable[varNum];
+                    if (!varDesc.lvIsRegCandidate())
+                        continue;
+                    if (!varDesc.lvRegister)
+                        continue;
+                    if (isFloatRegType(varDesc.lvType))
+                        continue;
+                    if (varDesc.lvRegNum != REG_EAX)
+                        continue;
+                    // We may need to check lvOtherReg.
+
+                    // If the variable isn't going dead during this tree, we've just trashed a local with
+                    // cmpxchg.
+                    noway_assert(genContainsVarDeath(value->gtNext, comparand->gtNext, varNum));
+
+                    break;
+                }
+            }
+#endif
+            genComputeReg(comparand, RBM_EAX, RegSet::EXACT_REG, RegSet::KEEP_REG);
+
+            // By this point we've evaluated everything.  However the odds are that we've spilled something by
+            // now.  Let's recover all the registers and force them to stay.
+
+            // Well, we just computed comparand, so it's still in EAX.
+            noway_assert(comparand->gtRegNum == REG_EAX);
+            regSet.rsLockUsedReg(RBM_EAX);
+
+            // Stick it anywhere other than EAX.
+            genRecoverReg(value, ~RBM_EAX, RegSet::KEEP_REG);
+            reg = value->gtRegNum;
+            noway_assert(reg != REG_EAX);
+            regSet.rsLockUsedReg(genRegMask(reg));
+
+            if (isAddr)
+            {
+                addrReg = genKeepAddressable(/*location*/ tree, addrReg, 0 /*avoidMask*/);
+            }
+            else
+            {
+                genRecoverReg(location, ~(RBM_EAX | genRegMask(reg)), RegSet::KEEP_REG);
+            }
+
+            regSet.rsUnlockUsedReg(genRegMask(reg));
+            regSet.rsUnlockUsedReg(RBM_EAX);
+
+            instGen(INS_lock);
+            if (isAddr)
+            {
+                sched_AM(INS_cmpxchg, EA_4BYTE, reg, false, location, 0);
+                genDoneAddressable(location, addrReg, RegSet::KEEP_REG);
+            }
+            else
+            {
+                instEmit_RM_RV(INS_cmpxchg, EA_4BYTE, location, reg, 0);
+                genReleaseReg(location);
+            }
+
+            genReleaseReg(value);
+            genReleaseReg(comparand);
+
+            // EAX and the value register are both trashed at this point.
+            regTracker.rsTrackRegTrash(REG_EAX);
+            regTracker.rsTrackRegTrash(reg);
+
+            reg = REG_EAX;
+
+            genFlagsEqualToNone();
+            break;
+#else // not defined(_TARGET_XARCH_)
+            NYI("GT_CMPXCHG codegen");
+            break;
+#endif
+        }
+
+        default:
+#ifdef DEBUG
+            compiler->gtDispTree(tree);
+#endif
+            noway_assert(!"unexpected operator");
+            NO_WAY("unexpected operator");
+    }
+
+    noway_assert(reg != DUMMY_INIT(REG_CORRUPT));
+    genCodeForTree_DONE(tree, reg);
+}
+
+/*****************************************************************************
+ *
+ *  Generate code for the given tree. tree->gtRegNum will be set to the
+ *  register where the tree lives.
+ *
+ *  If 'destReg' is non-zero, we'll do our best to compute the value into a
+ *  register that is in that register set.
+ *  Use genComputeReg() if you need the tree in a specific register.
+ *  Use genCompIntoFreeReg() if the register needs to be written to. Otherwise,
+ *  the register can only be used for read, but not for write.
+ *  Use genMakeAddressable() if you only need the tree to be accessible
+ *  using a complex addressing mode, and do not necessarily need the tree
+ *  materialized in a register.
+ *
+ *  The GCness of the register will be properly set in gcInfo.gcRegGCrefSetCur/gcInfo.gcRegByrefSetCur.
+ *
+ *  The register will not be marked as used. Use regSet.rsMarkRegUsed() if the
+ *  register will not be consumed right away and could possibly be spilled.
+ */
+
+void CodeGen::genCodeForTree(GenTreePtr tree, regMaskTP destReg, regMaskTP bestReg)
+{
+#if 0
+    if  (compiler->verbose)
+    {
+        printf("Generating code for tree ");
+        Compiler::printTreeID(tree);
+        printf(" destReg = 0x%x bestReg = 0x%x\n", destReg, bestReg);
+    }
+    genStressRegs(tree);
+#endif
+
+    noway_assert(tree);
+    noway_assert(tree->gtOper != GT_STMT);
+    assert(tree->IsNodeProperlySized());
+
+    // When assigning to a enregistered local variable we receive
+    // a hint that we should target the register that is used to
+    // hold the enregistered local variable.
+    // When receiving this hint both destReg and bestReg masks are set
+    // to the register that is used by the enregistered local variable.
+    //
+    // However it is possible to us to have a different local variable
+    // targeting the same register to become alive (and later die)
+    // as we descend the expression tree.
+    //
+    // To handle such cases we will remove any registers that are alive from the
+    // both the destReg and bestReg masks.
+    //
+    regMaskTP liveMask = genLiveMask(tree);
+
+    // This removes any registers used to hold enregistered locals
+    // from the destReg and bestReg masks.
+    // After this either mask could become 0
+    //
+    destReg &= ~liveMask;
+    bestReg &= ~liveMask;
+
+    /* 'destReg' of 0 really means 'any' */
+
+    destReg = regSet.rsUseIfZero(destReg, RBM_ALL(tree->TypeGet()));
+
+    if (destReg != RBM_ALL(tree->TypeGet()))
+        bestReg = regSet.rsUseIfZero(bestReg, destReg);
+
+    // Long, float, and double have their own codegen functions
+    switch (tree->TypeGet())
+    {
+
+        case TYP_LONG:
+#if !CPU_HAS_FP_SUPPORT
+        case TYP_DOUBLE:
+#endif
+            genCodeForTreeLng(tree, destReg, /*avoidReg*/ RBM_NONE);
+            return;
+
+#if CPU_HAS_FP_SUPPORT
+        case TYP_FLOAT:
+        case TYP_DOUBLE:
+
+            // For comma nodes, we'll get back here for the last node in the comma list.
+            if (tree->gtOper != GT_COMMA)
+            {
+                genCodeForTreeFlt(tree, RBM_ALLFLOAT, RBM_ALLFLOAT & (destReg | bestReg));
+                return;
+            }
+            break;
+#endif
+
+#ifdef DEBUG
+        case TYP_UINT:
+        case TYP_ULONG:
+            noway_assert(!"These types are only used as markers in GT_CAST nodes");
+            break;
+#endif
+
+        default:
+            break;
+    }
+
+    /* Is the value already in a register? */
+
+    if (tree->gtFlags & GTF_REG_VAL)
+    {
+        genCodeForTree_REG_VAR1(tree);
+        return;
+    }
+
+    /* We better not have a spilled value here */
+
+    noway_assert((tree->gtFlags & GTF_SPILLED) == 0);
+
+    /* Figure out what kind of a node we have */
+
+    unsigned kind = tree->OperKind();
+
+    if (kind & GTK_CONST)
+    {
+        /* Handle constant nodes */
+
+        genCodeForTreeConst(tree, destReg, bestReg);
+    }
+    else if (kind & GTK_LEAF)
+    {
+        /* Handle leaf nodes */
+
+        genCodeForTreeLeaf(tree, destReg, bestReg);
+    }
+    else if (kind & GTK_SMPOP)
+    {
+        /* Handle 'simple' unary/binary operators */
+
+        genCodeForTreeSmpOp(tree, destReg, bestReg);
+    }
+    else
+    {
+        /* Handle special operators */
+
+        genCodeForTreeSpecialOp(tree, destReg, bestReg);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Generate code for all the basic blocks in the function.
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+void CodeGen::genCodeForBBlist()
+{
+    unsigned   varNum;
+    LclVarDsc* varDsc;
+
+    unsigned savedStkLvl;
+
+#ifdef DEBUG
+    genInterruptibleUsed = true;
+    unsigned stmtNum     = 0;
+    unsigned totalCostEx = 0;
+    unsigned totalCostSz = 0;
+
+    // You have to be careful if you create basic blocks from now on
+    compiler->fgSafeBasicBlockCreation = false;
+
+    // This stress mode is not comptible with fully interruptible GC
+    if (genInterruptible && compiler->opts.compStackCheckOnCall)
+    {
+        compiler->opts.compStackCheckOnCall = false;
+    }
+
+    // This stress mode is not comptible with fully interruptible GC
+    if (genInterruptible && compiler->opts.compStackCheckOnRet)
+    {
+        compiler->opts.compStackCheckOnRet = false;
+    }
+#endif
+
+    // Prepare the blocks for exception handling codegen: mark the blocks that needs labels.
+    genPrepForEHCodegen();
+
+    assert(!compiler->fgFirstBBScratch ||
+           compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first.
+
+    /* Initialize the spill tracking logic */
+
+    regSet.rsSpillBeg();
+
+#ifdef DEBUGGING_SUPPORT
+    /* Initialize the line# tracking logic */
+
+    if (compiler->opts.compScopeInfo)
+    {
+        siInit();
+    }
+#endif
+
+#ifdef _TARGET_X86_
+    if (compiler->compTailCallUsed)
+    {
+        noway_assert(isFramePointerUsed());
+        regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
+    }
+#endif
+
+    if (compiler->opts.compDbgEnC)
+    {
+        noway_assert(isFramePointerUsed());
+        regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
+    }
+
+    /* If we have any pinvoke calls, we might potentially trash everything */
+
+    if (compiler->info.compCallUnmanaged)
+    {
+        noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
+        regSet.rsSetRegsModified(RBM_INT_CALLEE_SAVED & ~RBM_FPBASE);
+    }
+
+    /* Initialize the pointer tracking code */
+
+    gcInfo.gcRegPtrSetInit();
+    gcInfo.gcVarPtrSetInit();
+
+    /* If any arguments live in registers, mark those regs as such */
+
+    for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+    {
+        /* Is this variable a parameter assigned to a register? */
+
+        if (!varDsc->lvIsParam || !varDsc->lvRegister)
+            continue;
+
+        /* Is the argument live on entry to the method? */
+
+        if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
+            continue;
+
+#if CPU_HAS_FP_SUPPORT
+        /* Is this a floating-point argument? */
+
+        if (varDsc->IsFloatRegType())
+            continue;
+
+        noway_assert(!varTypeIsFloating(varDsc->TypeGet()));
+#endif
+
+        /* Mark the register as holding the variable */
+
+        if (isRegPairType(varDsc->lvType))
+        {
+            regTracker.rsTrackRegLclVarLng(varDsc->lvRegNum, varNum, true);
+
+            if (varDsc->lvOtherReg != REG_STK)
+                regTracker.rsTrackRegLclVarLng(varDsc->lvOtherReg, varNum, false);
+        }
+        else
+        {
+            regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum);
+        }
+    }
+
+    unsigned finallyNesting = 0;
+
+    // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without
+    // allocation at the start of each basic block.
+    VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler));
+
+    /*-------------------------------------------------------------------------
+     *
+     *  Walk the basic blocks and generate code for each one
+     *
+     */
+
+    BasicBlock* block;
+    BasicBlock* lblk; /* previous block */
+
+    for (lblk = NULL, block = compiler->fgFirstBB; block != NULL; lblk = block, block = block->bbNext)
+    {
+#ifdef DEBUG
+        if (compiler->verbose)
+        {
+            printf("\n=============== Generating ");
+            block->dspBlockHeader(compiler, true, true);
+            compiler->fgDispBBLiveness(block);
+        }
+#endif // DEBUG
+
+        VARSET_TP VARSET_INIT_NOCOPY(liveSet, VarSetOps::UninitVal());
+
+        regMaskTP gcrefRegs = 0;
+        regMaskTP byrefRegs = 0;
+
+        /* Does any other block jump to this point ? */
+
+        if (block->bbFlags & BBF_JMP_TARGET)
+        {
+            /* Someone may jump here, so trash all regs */
+
+            regTracker.rsTrackRegClr();
+
+            genFlagsEqualToNone();
+        }
+        else
+        {
+            /* No jump, but pointers always need to get trashed for proper GC tracking */
+
+            regTracker.rsTrackRegClrPtr();
+        }
+
+        /* No registers are used or locked on entry to a basic block */
+
+        regSet.rsMaskUsed = RBM_NONE;
+        regSet.rsMaskMult = RBM_NONE;
+        regSet.rsMaskLock = RBM_NONE;
+
+        // If we need to reserve registers such that they are not used
+        // by CodeGen in this BasicBlock we do so here.
+        // On the ARM when we have large frame offsets for locals we
+        // will have RBM_R10 in the regSet.rsMaskResvd set,
+        // additionally if a LocAlloc or alloca is used RBM_R9 is in
+        // the regSet.rsMaskResvd set and we lock these registers here.
+        //
+        if (regSet.rsMaskResvd != RBM_NONE)
+        {
+            regSet.rsLockReg(regSet.rsMaskResvd);
+            regSet.rsSetRegsModified(regSet.rsMaskResvd);
+        }
+
+        /* Figure out which registers hold variables on entry to this block */
+
+        regMaskTP specialUseMask = regSet.rsMaskResvd;
+
+        specialUseMask |= doubleAlignOrFramePointerUsed() ? RBM_SPBASE | RBM_FPBASE : RBM_SPBASE;
+        regSet.ClearMaskVars();
+        VarSetOps::ClearD(compiler, compiler->compCurLife);
+        VarSetOps::Assign(compiler, liveSet, block->bbLiveIn);
+
+#if FEATURE_STACK_FP_X87
+        VarSetOps::AssignNoCopy(compiler, genFPregVars,
+                                VarSetOps::Intersection(compiler, liveSet, compiler->optAllFPregVars));
+        genFPregCnt     = VarSetOps::Count(compiler, genFPregVars);
+        genFPdeadRegCnt = 0;
+#endif
+        gcInfo.gcResetForBB();
+
+        genUpdateLife(liveSet); // This updates regSet.rsMaskVars with bits from any enregistered LclVars
+#if FEATURE_STACK_FP_X87
+        VarSetOps::IntersectionD(compiler, liveSet, compiler->optAllNonFPvars);
+#endif
+
+        // We should never enregister variables in any of the specialUseMask registers
+        noway_assert((specialUseMask & regSet.rsMaskVars) == 0);
+
+        VARSET_ITER_INIT(compiler, iter, liveSet, varIndex);
+        while (iter.NextElem(compiler, &varIndex))
+        {
+            varNum = compiler->lvaTrackedToVarNum[varIndex];
+            varDsc = compiler->lvaTable + varNum;
+            assert(varDsc->lvTracked);
+            /* Ignore the variable if it's not not in a reg */
+
+            if (!varDsc->lvRegister)
+                continue;
+            if (isFloatRegType(varDsc->lvType))
+                continue;
+
+            /* Get hold of the index and the bitmask for the variable */
+            regNumber regNum  = varDsc->lvRegNum;
+            regMaskTP regMask = genRegMask(regNum);
+
+            regSet.AddMaskVars(regMask);
+
+            if (varDsc->lvType == TYP_REF)
+                gcrefRegs |= regMask;
+            else if (varDsc->lvType == TYP_BYREF)
+                byrefRegs |= regMask;
+
+            /* Mark the register holding the variable as such */
+
+            if (varTypeIsMultiReg(varDsc))
+            {
+                regTracker.rsTrackRegLclVarLng(regNum, varNum, true);
+                if (varDsc->lvOtherReg != REG_STK)
+                {
+                    regTracker.rsTrackRegLclVarLng(varDsc->lvOtherReg, varNum, false);
+                    regMask |= genRegMask(varDsc->lvOtherReg);
+                }
+            }
+            else
+            {
+                regTracker.rsTrackRegLclVar(regNum, varNum);
+            }
+        }
+
+        gcInfo.gcPtrArgCnt = 0;
+
+#if FEATURE_STACK_FP_X87
+
+        regSet.rsMaskUsedFloat = regSet.rsMaskRegVarFloat = regSet.rsMaskLockedFloat = RBM_NONE;
+
+        memset(regSet.genUsedRegsFloat, 0, sizeof(regSet.genUsedRegsFloat));
+        memset(regSet.genRegVarsFloat, 0, sizeof(regSet.genRegVarsFloat));
+
+        // Setup fp state on block entry
+        genSetupStateStackFP(block);
+
+#ifdef DEBUG
+        if (compiler->verbose)
+        {
+            JitDumpFPState();
+        }
+#endif // DEBUG
+#endif // FEATURE_STACK_FP_X87
+
+        /* Make sure we keep track of what pointers are live */
+
+        noway_assert((gcrefRegs & byrefRegs) == 0); // Something can't be both a gcref and a byref
+        gcInfo.gcRegGCrefSetCur = gcrefRegs;
+        gcInfo.gcRegByrefSetCur = byrefRegs;
+
+        /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to
+           represent the exception object (TYP_REF).
+           We mark REG_EXCEPTION_OBJECT as holding a GC object on entry
+           to the block,  it will be the first thing evaluated
+           (thanks to GTF_ORDER_SIDEEFF).
+         */
+
+        if (handlerGetsXcptnObj(block->bbCatchTyp))
+        {
+            GenTreePtr firstStmt = block->FirstNonPhiDef();
+            if (firstStmt != NULL)
+            {
+                GenTreePtr firstTree = firstStmt->gtStmt.gtStmtExpr;
+                if (compiler->gtHasCatchArg(firstTree))
+                {
+                    gcInfo.gcRegGCrefSetCur |= RBM_EXCEPTION_OBJECT;
+                }
+            }
+        }
+
+        /* Start a new code output block */
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if FEATURE_EH_FUNCLETS
+#if defined(_TARGET_ARM_)
+        // If this block is the target of a finally return, we need to add a preceding NOP, in the same EH region,
+        // so the unwinder doesn't get confused by our "movw lr, xxx; movt lr, xxx; b Lyyy" calling convention that
+        // calls the funclet during non-exceptional control flow.
+        if (block->bbFlags & BBF_FINALLY_TARGET)
+        {
+            assert(block->bbFlags & BBF_JMP_TARGET);
+
+#ifdef DEBUG
+            if (compiler->verbose)
+            {
+                printf("\nEmitting finally target NOP predecessor for BB%02u\n", block->bbNum);
+            }
+#endif
+            // Create a label that we'll use for computing the start of an EH region, if this block is
+            // at the beginning of such a region. If we used the existing bbEmitCookie as is for
+            // determining the EH regions, then this NOP would end up outside of the region, if this
+            // block starts an EH region. If we pointed the existing bbEmitCookie here, then the NOP
+            // would be executed, which we would prefer not to do.
+
+            block->bbUnwindNopEmitCookie =
+                getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
+
+            instGen(INS_nop);
+        }
+#endif // defined(_TARGET_ARM_)
+
+        genUpdateCurrentFunclet(block);
+#endif // FEATURE_EH_FUNCLETS
+
+#ifdef _TARGET_XARCH_
+        if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD)
+        {
+            getEmitter()->emitLoopAlign();
+        }
+#endif
+
+#ifdef DEBUG
+        if (compiler->opts.dspCode)
+            printf("\n      L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum);
+#endif
+
+        block->bbEmitCookie = NULL;
+
+        if (block->bbFlags & (BBF_JMP_TARGET | BBF_HAS_LABEL))
+        {
+            /* Mark a label and update the current set of live GC refs */
+
+            block->bbEmitCookie =
+                getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+                                           /*isFinally*/ block->bbFlags & BBF_FINALLY_TARGET
+#else
+                                           FALSE
+#endif
+                                           );
+        }
+
+        if (block == compiler->fgFirstColdBlock)
+        {
+#ifdef DEBUG
+            if (compiler->verbose)
+            {
+                printf("\nThis is the start of the cold region of the method\n");
+            }
+#endif
+            // We should never have a block that falls through into the Cold section
+            noway_assert(!lblk->bbFallsThrough());
+
+            // We require the block that starts the Cold section to have a label
+            noway_assert(block->bbEmitCookie);
+            getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie);
+        }
+
+        /* Both stacks are always empty on entry to a basic block */
+
+        genStackLevel = 0;
+#if FEATURE_STACK_FP_X87
+        genResetFPstkLevel();
+#endif // FEATURE_STACK_FP_X87
+
+#if !FEATURE_FIXED_OUT_ARGS
+        /* Check for inserted throw blocks and adjust genStackLevel */
+
+        if (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block))
+        {
+            noway_assert(block->bbFlags & BBF_JMP_TARGET);
+
+            genStackLevel = compiler->fgThrowHlpBlkStkLevel(block) * sizeof(int);
+
+            if (genStackLevel)
+            {
+#ifdef _TARGET_X86_
+                getEmitter()->emitMarkStackLvl(genStackLevel);
+                inst_RV_IV(INS_add, REG_SPBASE, genStackLevel, EA_PTRSIZE);
+                genStackLevel = 0;
+#else  // _TARGET_X86_
+                NYI("Need emitMarkStackLvl()");
+#endif // _TARGET_X86_
+            }
+        }
+#endif // !FEATURE_FIXED_OUT_ARGS
+
+        savedStkLvl = genStackLevel;
+
+        /* Tell everyone which basic block we're working on */
+
+        compiler->compCurBB = block;
+
+#ifdef DEBUGGING_SUPPORT
+        siBeginBlock(block);
+
+        // BBF_INTERNAL blocks don't correspond to any single IL instruction.
+        if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) && block != compiler->fgFirstBB)
+            genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true);
+
+        bool firstMapping = true;
+#endif // DEBUGGING_SUPPORT
+
+        /*---------------------------------------------------------------------
+         *
+         *  Generate code for each statement-tree in the block
+         *
+         */
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if FEATURE_EH_FUNCLETS
+        if (block->bbFlags & BBF_FUNCLET_BEG)
+        {
+            genReserveFuncletProlog(block);
+        }
+#endif // FEATURE_EH_FUNCLETS
+
+        for (GenTreePtr stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
+        {
+            noway_assert(stmt->gtOper == GT_STMT);
+
+#if defined(DEBUGGING_SUPPORT)
+
+            /* Do we have a new IL-offset ? */
+
+            if (stmt->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET)
+            {
+                /* Create and append a new IP-mapping entry */
+                genIPmappingAdd(stmt->gtStmt.gtStmt.gtStmtILoffsx, firstMapping);
+                firstMapping = false;
+            }
+
+#endif // DEBUGGING_SUPPORT
+
+#ifdef DEBUG
+            if (stmt->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET)
+            {
+                noway_assert(stmt->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize);
+                if (compiler->opts.dspCode && compiler->opts.dspInstrs)
+                {
+                    while (genCurDispOffset <= stmt->gtStmt.gtStmtLastILoffs)
+                    {
+                        genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, ">    ");
+                    }
+                }
+            }
+#endif // DEBUG
+
+            /* Get hold of the statement tree */
+            GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+
+#ifdef DEBUG
+            stmtNum++;
+            if (compiler->verbose)
+            {
+                printf("\nGenerating BB%02u, stmt %u\t\t", block->bbNum, stmtNum);
+                printf("Holding variables: ");
+                dspRegMask(regSet.rsMaskVars);
+                printf("\n\n");
+                compiler->gtDispTree(compiler->opts.compDbgInfo ? stmt : tree);
+                printf("\n");
+#if FEATURE_STACK_FP_X87
+                JitDumpFPState();
+#endif
+
+                printf("Execution Order:\n");
+                for (GenTreePtr treeNode = stmt->gtStmt.gtStmtList; treeNode != NULL; treeNode = treeNode->gtNext)
+                {
+                    compiler->gtDispTree(treeNode, 0, NULL, true);
+                }
+                printf("\n");
+            }
+            totalCostEx += (stmt->gtCostEx * block->getBBWeight(compiler));
+            totalCostSz += stmt->gtCostSz;
+#endif // DEBUG
+
+            compiler->compCurStmt = stmt;
+
+            compiler->compCurLifeTree = NULL;
+            switch (tree->gtOper)
+            {
+                case GT_CALL:
+                    // Managed Retval under managed debugger - we need to make sure that the returned ref-type is
+                    // reported as alive even though not used within the caller for managed debugger sake.  So
+                    // consider the return value of the method as used if generating debuggable code.
+                    genCodeForCall(tree, compiler->opts.MinOpts() || compiler->opts.compDbgCode);
+                    genUpdateLife(tree);
+                    gcInfo.gcMarkRegSetNpt(RBM_INTRET);
+                    break;
+
+                case GT_IND:
+                case GT_NULLCHECK:
+
+                    // Just do the side effects
+                    genEvalSideEffects(tree);
+                    break;
+
+                default:
+                    /* Generate code for the tree */
+
+                    genCodeForTree(tree, 0);
+                    break;
+            }
+
+            regSet.rsSpillChk();
+
+            /* The value of the tree isn't used, unless it's a return stmt */
+
+            if (tree->gtOper != GT_RETURN)
+                gcInfo.gcMarkRegPtrVal(tree);
+
+#if FEATURE_STACK_FP_X87
+            genEndOfStatement();
+#endif
+
+#ifdef DEBUG
+            /* Make sure we didn't bungle pointer register tracking */
+
+            regMaskTP ptrRegs       = (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur);
+            regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars;
+
+            // If return is a GC-type, clear it.  Note that if a common
+            // epilog is generated (compiler->genReturnBB) it has a void return
+            // even though we might return a ref.  We can't use the compRetType
+            // as the determiner because something we are tracking as a byref
+            // might be used as a return value of a int function (which is legal)
+            if (tree->gtOper == GT_RETURN && (varTypeIsGC(compiler->info.compRetType) ||
+                                              (tree->gtOp.gtOp1 != 0 && varTypeIsGC(tree->gtOp.gtOp1->TypeGet()))))
+            {
+                nonVarPtrRegs &= ~RBM_INTRET;
+            }
+
+            // When profiling, the first statement in a catch block will be the
+            // harmless "inc" instruction (does not interfere with the exception
+            // object).
+
+            if ((compiler->opts.eeFlags & CORJIT_FLG_BBINSTR) && (stmt == block->bbTreeList) &&
+                (block->bbCatchTyp && handlerGetsXcptnObj(block->bbCatchTyp)))
+            {
+                nonVarPtrRegs &= ~RBM_EXCEPTION_OBJECT;
+            }
+
+            if (nonVarPtrRegs)
+            {
+                printf("Regset after tree=");
+                Compiler::printTreeID(tree);
+                printf(" BB%02u gcr=", block->bbNum);
+                printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
+                compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
+                printf(", byr=");
+                printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
+                compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
+                printf(", regVars=");
+                printRegMaskInt(regSet.rsMaskVars);
+                compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars);
+                printf("\n");
+            }
+
+            noway_assert(nonVarPtrRegs == 0);
+#endif // DEBUG
+
+            noway_assert(stmt->gtOper == GT_STMT);
+
+#ifdef DEBUGGING_SUPPORT
+            genEnsureCodeEmitted(stmt->gtStmt.gtStmtILoffsx);
+#endif
+
+        } //-------- END-FOR each statement-tree of the current block ---------
+
+#ifdef DEBUGGING_SUPPORT
+
+        if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
+        {
+            siEndBlock(block);
+
+            /* Is this the last block, and are there any open scopes left ? */
+
+            bool isLastBlockProcessed = (block->bbNext == NULL);
+            if (block->isBBCallAlwaysPair())
+            {
+                isLastBlockProcessed = (block->bbNext->bbNext == NULL);
+            }
+
+            if (isLastBlockProcessed && siOpenScopeList.scNext)
+            {
+                /* This assert no longer holds, because we may insert a throw
+                   block to demarcate the end of a try or finally region when they
+                   are at the end of the method.  It would be nice if we could fix
+                   our code so that this throw block will no longer be necessary. */
+
+                // noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize);
+
+                siCloseAllOpenScopes();
+            }
+        }
+
+#endif // DEBUGGING_SUPPORT
+
+        genStackLevel -= savedStkLvl;
+
+        gcInfo.gcMarkRegSetNpt(gcrefRegs | byrefRegs);
+
+        if (!VarSetOps::Equal(compiler, compiler->compCurLife, block->bbLiveOut))
+            compiler->genChangeLife(block->bbLiveOut DEBUGARG(NULL));
+
+        /* Both stacks should always be empty on exit from a basic block */
+
+        noway_assert(genStackLevel == 0);
+#if FEATURE_STACK_FP_X87
+        noway_assert(genGetFPstkLevel() == 0);
+
+        // Do the FPState matching that may have to be done
+        genCodeForEndBlockTransitionStackFP(block);
+#endif
+
+        noway_assert(genFullPtrRegMap == false || gcInfo.gcPtrArgCnt == 0);
+
+        /* Do we need to generate a jump or return? */
+
+        switch (block->bbJumpKind)
+        {
+            case BBJ_ALWAYS:
+                inst_JMP(EJ_jmp, block->bbJumpDest);
+                break;
+
+            case BBJ_RETURN:
+                genExitCode(block);
+                break;
+
+            case BBJ_THROW:
+                // If we have a throw at the end of a function or funclet, we need to emit another instruction
+                // afterwards to help the OS unwinder determine the correct context during unwind.
+                // We insert an unexecuted breakpoint instruction in several situations
+                // following a throw instruction:
+                // 1. If the throw is the last instruction of the function or funclet. This helps
+                //    the OS unwinder determine the correct context during an unwind from the
+                //    thrown exception.
+                // 2. If this is this is the last block of the hot section.
+                // 3. If the subsequent block is a special throw block.
+                if ((block->bbNext == NULL)
+#if FEATURE_EH_FUNCLETS
+                    || (block->bbNext->bbFlags & BBF_FUNCLET_BEG)
+#endif // FEATURE_EH_FUNCLETS
+                    || (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) ||
+                    block->bbNext == compiler->fgFirstColdBlock)
+                {
+                    instGen(INS_BREAKPOINT); // This should never get executed
+                }
+
+                break;
+
+            case BBJ_CALLFINALLY:
+
+#if defined(_TARGET_X86_)
+
+                /* If we are about to invoke a finally locally from a try block,
+                   we have to set the hidden slot corresponding to the finally's
+                   nesting level. When invoked in response to an exception, the
+                   EE usually does it.
+
+                   We must have : BBJ_CALLFINALLY followed by a BBJ_ALWAYS.
+
+                   This code depends on this order not being messed up.
+                   We will emit :
+                        mov [ebp-(n+1)],0
+                        mov [ebp-  n  ],0xFC
+                        push &step
+                        jmp  finallyBlock
+
+                  step: mov [ebp-  n  ],0
+                        jmp leaveTarget
+                  leaveTarget:
+                 */
+
+                noway_assert(isFramePointerUsed());
+
+                // Get the nesting level which contains the finally
+                compiler->fgGetNestingLevel(block, &finallyNesting);
+
+                // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
+                unsigned filterEndOffsetSlotOffs;
+                filterEndOffsetSlotOffs =
+                    (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - (sizeof(void*)));
+
+                unsigned curNestingSlotOffs;
+                curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * sizeof(void*)));
+
+                // Zero out the slot for the next nesting level
+                instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar,
+                                           curNestingSlotOffs - sizeof(void*));
+
+                instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, LCL_FINALLY_MARK, compiler->lvaShadowSPslotsVar,
+                                           curNestingSlotOffs);
+
+                // Now push the address of where the finally funclet should
+                // return to directly.
+                if (!(block->bbFlags & BBF_RETLESS_CALL))
+                {
+                    assert(block->isBBCallAlwaysPair());
+                    getEmitter()->emitIns_J(INS_push_hide, block->bbNext->bbJumpDest);
+                }
+                else
+                {
+                    // EE expects a DWORD, so we give him 0
+                    inst_IV(INS_push_hide, 0);
+                }
+
+                // Jump to the finally BB
+                inst_JMP(EJ_jmp, block->bbJumpDest);
+
+#elif defined(_TARGET_ARM_)
+
+                // Now set REG_LR to the address of where the finally funclet should
+                // return to directly.
+
+                BasicBlock* bbFinallyRet;
+                bbFinallyRet = NULL;
+
+                // We don't have retless calls, since we use the BBJ_ALWAYS to point at a NOP pad where
+                // we would have otherwise created retless calls.
+                assert(block->isBBCallAlwaysPair());
+
+                assert(block->bbNext != NULL);
+                assert(block->bbNext->bbJumpKind == BBJ_ALWAYS);
+                assert(block->bbNext->bbJumpDest != NULL);
+                assert(block->bbNext->bbJumpDest->bbFlags & BBF_FINALLY_TARGET);
+
+                bbFinallyRet = block->bbNext->bbJumpDest;
+                bbFinallyRet->bbFlags |= BBF_JMP_TARGET;
+
+#if 0
+            // We don't know the address of finally funclet yet.  But adr requires the offset
+            // to finally funclet from current IP is within 4095 bytes. So this code is disabled
+            // for now.
+            getEmitter()->emitIns_J_R (INS_adr,
+                                     EA_4BYTE,
+                                     bbFinallyRet,
+                                     REG_LR);
+#else  // 0
+                // Load the address where the finally funclet should return into LR.
+                // The funclet prolog/epilog will do "push {lr}" / "pop {pc}" to do
+                // the return.
+                getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, bbFinallyRet, REG_LR);
+                getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, bbFinallyRet, REG_LR);
+                regTracker.rsTrackRegTrash(REG_LR);
+#endif // 0
+
+                // Jump to the finally BB
+                inst_JMP(EJ_jmp, block->bbJumpDest);
+#else
+                NYI("TARGET");
+#endif
+
+                // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
+                // jump target using bbJumpDest - that is already used to point
+                // to the finally block. So just skip past the BBJ_ALWAYS unless the
+                // block is RETLESS.
+                if (!(block->bbFlags & BBF_RETLESS_CALL))
+                {
+                    assert(block->isBBCallAlwaysPair());
+
+                    lblk  = block;
+                    block = block->bbNext;
+                }
+                break;
+
+#ifdef _TARGET_ARM_
+
+            case BBJ_EHCATCHRET:
+                // set r0 to the address the VM should return to after the catch
+                getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, block->bbJumpDest, REG_R0);
+                getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, block->bbJumpDest, REG_R0);
+                regTracker.rsTrackRegTrash(REG_R0);
+
+                __fallthrough;
+
+            case BBJ_EHFINALLYRET:
+            case BBJ_EHFILTERRET:
+                genReserveFuncletEpilog(block);
+                break;
+
+#else // _TARGET_ARM_
+
+            case BBJ_EHFINALLYRET:
+            case BBJ_EHFILTERRET:
+            case BBJ_EHCATCHRET:
+                break;
+
+#endif // _TARGET_ARM_
+
+            case BBJ_NONE:
+            case BBJ_COND:
+            case BBJ_SWITCH:
+                break;
+
+            default:
+                noway_assert(!"Unexpected bbJumpKind");
+                break;
+        }
+
+#ifdef DEBUG
+        compiler->compCurBB = 0;
+#endif
+
+    } //------------------ END-FOR each block of the method -------------------
+
+    /* Nothing is live at this point */
+    genUpdateLife(VarSetOps::MakeEmpty(compiler));
+
+    /* Finalize the spill  tracking logic */
+
+    regSet.rsSpillEnd();
+
+    /* Finalize the temp   tracking logic */
+
+    compiler->tmpEnd();
+
+#ifdef DEBUG
+    if (compiler->verbose)
+    {
+        printf("\n# ");
+        printf("totalCostEx = %6d, totalCostSz = %5d ", totalCostEx, totalCostSz);
+        printf("%s\n", compiler->info.compFullName);
+    }
+#endif
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************
+ *
+ *  Generate code for a long operation.
+ *  needReg is a recommendation of which registers to use for the tree.
+ *  For partially enregistered longs, the tree will be marked as GTF_REG_VAL
+ *    without loading the stack part into a register. Note that only leaf
+ *    nodes (or if gtEffectiveVal() == leaf node) may be marked as partially
+ *    enregistered so that we can know the memory location of the other half.
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+void CodeGen::genCodeForTreeLng(GenTreePtr tree, regMaskTP needReg, regMaskTP avoidReg)
+{
+    genTreeOps oper;
+    unsigned   kind;
+
+    regPairNo regPair = DUMMY_INIT(REG_PAIR_CORRUPT);
+    regMaskTP addrReg;
+    regNumber regLo;
+    regNumber regHi;
+
+    noway_assert(tree);
+    noway_assert(tree->gtOper != GT_STMT);
+    noway_assert(genActualType(tree->gtType) == TYP_LONG);
+
+    /* Figure out what kind of a node we have */
+
+    oper = tree->OperGet();
+    kind = tree->OperKind();
+
+    if (tree->gtFlags & GTF_REG_VAL)
+    {
+    REG_VAR_LONG:
+        regPair = tree->gtRegPair;
+
+        gcInfo.gcMarkRegSetNpt(genRegPairMask(regPair));
+
+        goto DONE;
+    }
+
+    /* Is this a constant node? */
+
+    if (kind & GTK_CONST)
+    {
+        __int64 lval;
+
+        /* Pick a register pair for the value */
+
+        regPair = regSet.rsPickRegPair(needReg);
+
+        /* Load the value into the registers */
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if !CPU_HAS_FP_SUPPORT
+        if (oper == GT_CNS_DBL)
+        {
+            noway_assert(sizeof(__int64) == sizeof(double));
+
+            noway_assert(sizeof(tree->gtLngCon.gtLconVal) == sizeof(tree->gtDblCon.gtDconVal));
+
+            lval = *(__int64*)(&tree->gtDblCon.gtDconVal);
+        }
+        else
+#endif
+        {
+            noway_assert(oper == GT_CNS_LNG);
+
+            lval = tree->gtLngCon.gtLconVal;
+        }
+
+        genSetRegToIcon(genRegPairLo(regPair), int(lval));
+        genSetRegToIcon(genRegPairHi(regPair), int(lval >> 32));
+        goto DONE;
+    }
+
+    /* Is this a leaf node? */
+
+    if (kind & GTK_LEAF)
+    {
+        switch (oper)
+        {
+            case GT_LCL_VAR:
+
+#if REDUNDANT_LOAD
+
+                /*  This case has to consider the case in which an int64 LCL_VAR
+                 *  may both be enregistered and also have a cached copy of itself
+                 *  in a different set of registers.
+                 *  We want to return the registers that have the most in common
+                 *  with the needReg mask
+                 */
+
+                /*  Does the var have a copy of itself in the cached registers?
+                 *  And are these cached registers both free?
+                 *  If so use these registers if they match any needReg.
+                 */
+
+                regPair = regTracker.rsLclIsInRegPair(tree->gtLclVarCommon.gtLclNum);
+
+                if ((regPair != REG_PAIR_NONE) && ((regSet.rsRegMaskFree() & needReg) == needReg) &&
+                    ((genRegPairMask(regPair) & needReg) != RBM_NONE))
+                {
+                    goto DONE;
+                }
+
+                /*  Does the variable live in a register?
+                 *  If so use these registers.
+                 */
+                if (genMarkLclVar(tree))
+                    goto REG_VAR_LONG;
+
+                /*  If tree is not an enregistered variable then
+                 *  be sure to use any cached register that contain
+                 *  a copy of this local variable
+                 */
+                if (regPair != REG_PAIR_NONE)
+                {
+                    goto DONE;
+                }
+#endif
+                goto MEM_LEAF;
+
+            case GT_LCL_FLD:
+
+                // We only use GT_LCL_FLD for lvDoNotEnregister vars, so we don't have
+                // to worry about it being enregistered.
+                noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
+                goto MEM_LEAF;
+
+            case GT_CLS_VAR:
+            MEM_LEAF:
+
+                /* Pick a register pair for the value */
+
+                regPair = regSet.rsPickRegPair(needReg);
+
+                /* Load the value into the registers */
+
+                instruction loadIns;
+
+                loadIns = ins_Load(TYP_INT); // INS_ldr
+                regLo   = genRegPairLo(regPair);
+                regHi   = genRegPairHi(regPair);
+
+#if CPU_LOAD_STORE_ARCH
+                {
+                    regNumber regAddr = regSet.rsGrabReg(RBM_ALLINT);
+                    inst_RV_TT(INS_lea, regAddr, tree, 0);
+                    regTracker.rsTrackRegTrash(regAddr);
+
+                    if (regLo != regAddr)
+                    {
+                        // assert(regLo != regAddr);  // forced by if statement
+                        getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regLo, regAddr, 0);
+                        getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regHi, regAddr, 4);
+                    }
+                    else
+                    {
+                        // assert(regHi != regAddr);  // implied by regpair property and the if statement
+                        getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regHi, regAddr, 4);
+                        getEmitter()->emitIns_R_R_I(loadIns, EA_4BYTE, regLo, regAddr, 0);
+                    }
+                }
+#else
+                inst_RV_TT(loadIns, regLo, tree, 0);
+                inst_RV_TT(loadIns, regHi, tree, 4);
+#endif
+
+#ifdef _TARGET_ARM_
+                if ((oper == GT_CLS_VAR) && (tree->gtFlags & GTF_IND_VOLATILE))
+                {
+                    // Emit a memory barrier instruction after the load
+                    instGen_MemoryBarrier();
+                }
+#endif
+
+                regTracker.rsTrackRegTrash(regLo);
+                regTracker.rsTrackRegTrash(regHi);
+
+                goto DONE;
+
+            default:
+#ifdef DEBUG
+                compiler->gtDispTree(tree);
+#endif
+                noway_assert(!"unexpected leaf");
+        }
+    }
+
+    /* Is it a 'simple' unary/binary operator? */
+
+    if (kind & GTK_SMPOP)
+    {
+        instruction insLo;
+        instruction insHi;
+        bool        doLo;
+        bool        doHi;
+        bool        setCarry = false;
+        int         helper;
+
+        GenTreePtr op1 = tree->gtOp.gtOp1;
+        GenTreePtr op2 = tree->gtGetOp2();
+
+        switch (oper)
+        {
+            case GT_ASG:
+            {
+#ifdef DEBUGGING_SUPPORT
+                unsigned lclVarNum    = compiler->lvaCount;
+                unsigned lclVarILoffs = DUMMY_INIT(0);
+#endif
+
+                /* Is the target a local ? */
+
+                if (op1->gtOper == GT_LCL_VAR)
+                {
+                    unsigned   varNum = op1->gtLclVarCommon.gtLclNum;
+                    LclVarDsc* varDsc;
+
+                    noway_assert(varNum < compiler->lvaCount);
+                    varDsc = compiler->lvaTable + varNum;
+
+                    // No dead stores, (with min opts we may have dead stores)
+                    noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH));
+
+#ifdef DEBUGGING_SUPPORT
+                    /* For non-debuggable code, every definition of a lcl-var has
+                     * to be checked to see if we need to open a new scope for it.
+                     * Remember the local var info to call siCheckVarScope
+                     * AFTER codegen of the assignment.
+                     */
+                    if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode &&
+                        (compiler->info.compVarScopesCount > 0))
+                    {
+                        lclVarNum    = varNum;
+                        lclVarILoffs = op1->gtLclVar.gtLclILoffs;
+                    }
+#endif
+
+                    /* Has the variable been assigned to a register (pair) ? */
+
+                    if (genMarkLclVar(op1))
+                    {
+                        noway_assert(op1->gtFlags & GTF_REG_VAL);
+                        regPair = op1->gtRegPair;
+                        regLo   = genRegPairLo(regPair);
+                        regHi   = genRegPairHi(regPair);
+                        noway_assert(regLo != regHi);
+
+                        /* Is the value being assigned a constant? */
+
+                        if (op2->gtOper == GT_CNS_LNG)
+                        {
+                            /* Move the value into the target */
+
+                            genMakeRegPairAvailable(regPair);
+
+                            instruction ins;
+                            if (regLo == REG_STK)
+                            {
+                                ins = ins_Store(TYP_INT);
+                            }
+                            else
+                            {
+                                // Always do the stack first (in case it grabs a register it can't
+                                // clobber regLo this way)
+                                if (regHi == REG_STK)
+                                {
+                                    inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
+                                }
+                                ins = INS_mov;
+                            }
+                            inst_TT_IV(ins, op1, (int)(op2->gtLngCon.gtLconVal), 0);
+
+                            // The REG_STK case has already been handled
+                            if (regHi != REG_STK)
+                            {
+                                ins = INS_mov;
+                                inst_TT_IV(ins, op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
+                            }
+
+                            goto DONE_ASSG_REGS;
+                        }
+
+                        /* Compute the RHS into desired register pair */
+
+                        if (regHi != REG_STK)
+                        {
+                            genComputeRegPair(op2, regPair, avoidReg, RegSet::KEEP_REG);
+                            noway_assert(op2->gtFlags & GTF_REG_VAL);
+                            noway_assert(op2->gtRegPair == regPair);
+                        }
+                        else
+                        {
+                            regPairNo curPair;
+                            regNumber curLo;
+                            regNumber curHi;
+
+                            genComputeRegPair(op2, REG_PAIR_NONE, avoidReg, RegSet::KEEP_REG);
+
+                            noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+                            curPair = op2->gtRegPair;
+                            curLo   = genRegPairLo(curPair);
+                            curHi   = genRegPairHi(curPair);
+
+                            /* move high first, target is on stack */
+                            inst_TT_RV(ins_Store(TYP_INT), op1, curHi, 4);
+
+                            if (regLo != curLo)
+                            {
+                                if ((regSet.rsMaskUsed & genRegMask(regLo)) && (regLo != curHi))
+                                    regSet.rsSpillReg(regLo);
+                                inst_RV_RV(INS_mov, regLo, curLo, TYP_LONG);
+                                regTracker.rsTrackRegCopy(regLo, curLo);
+                            }
+                        }
+
+                        genReleaseRegPair(op2);
+                        goto DONE_ASSG_REGS;
+                    }
+                }
+
+                /* Is the value being assigned a constant? */
+
+                if (op2->gtOper == GT_CNS_LNG)
+                {
+                    /* Make the target addressable */
+
+                    addrReg = genMakeAddressable(op1, needReg, RegSet::KEEP_REG);
+
+                    /* Move the value into the target */
+
+                    inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal), 0);
+                    inst_TT_IV(ins_Store(TYP_INT), op1, (int)(op2->gtLngCon.gtLconVal >> 32), 4);
+
+                    genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+                    goto LAsgExit;
+                }
+
+#if 0
+                /* Catch a case where can avoid generating op reg, mem. Better pairing
+                 * from
+                 *     mov regHi, mem
+                 *     op  regHi, reg
+                 *
+                 * To avoid problems with order of evaluation, only do this if op2 is
+                 * a non-enregistered local variable
+                 */
+
+                if (GenTree::OperIsCommutative(oper) &&
+                    op1->gtOper == GT_LCL_VAR &&
+                    op2->gtOper == GT_LCL_VAR)
+                {
+                    regPair = regTracker.rsLclIsInRegPair(op2->gtLclVarCommon.gtLclNum);
+
+                    /* Is op2 a non-enregistered local variable? */
+                    if (regPair == REG_PAIR_NONE)
+                    {
+                        regPair = regTracker.rsLclIsInRegPair(op1->gtLclVarCommon.gtLclNum);
+
+                        /* Is op1 an enregistered local variable? */
+                        if (regPair != REG_PAIR_NONE)
+                        {
+                            /* Swap the operands */
+                            GenTreePtr op = op1;
+                            op1 = op2;
+                            op2 = op;
+                        }
+                    }
+                }
+#endif
+
+                /* Eliminate worthless assignment "lcl = lcl" */
+
+                if (op2->gtOper == GT_LCL_VAR && op1->gtOper == GT_LCL_VAR &&
+                    op2->gtLclVarCommon.gtLclNum == op1->gtLclVarCommon.gtLclNum)
+                {
+                    genUpdateLife(op2);
+                    goto LAsgExit;
+                }
+
+                if (op2->gtOper == GT_CAST && TYP_ULONG == op2->CastToType() && op2->CastFromType() <= TYP_INT &&
+                    // op1,op2 need to be materialized in the correct order.
+                    (tree->gtFlags & GTF_REVERSE_OPS))
+                {
+                    /* Generate the small RHS into a register pair */
+
+                    GenTreePtr smallOpr = op2->gtOp.gtOp1;
+
+                    genComputeReg(smallOpr, 0, RegSet::ANY_REG, RegSet::KEEP_REG);
+
+                    /* Make the target addressable */
+
+                    addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG, true);
+
+                    /* Make sure everything is still addressable */
+
+                    genRecoverReg(smallOpr, 0, RegSet::KEEP_REG);
+                    noway_assert(smallOpr->gtFlags & GTF_REG_VAL);
+                    regHi   = smallOpr->gtRegNum;
+                    addrReg = genKeepAddressable(op1, addrReg, genRegMask(regHi));
+
+                    // conv.ovf.u8 could overflow if the original number was negative
+                    if (op2->gtOverflow())
+                    {
+                        noway_assert((op2->gtFlags & GTF_UNSIGNED) ==
+                                     0);                              // conv.ovf.u8.un should be bashed to conv.u8.un
+                        instGen_Compare_Reg_To_Zero(EA_4BYTE, regHi); // set flags
+                        emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
+                        genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
+                    }
+
+                    /* Move the value into the target */
+
+                    inst_TT_RV(ins_Store(TYP_INT), op1, regHi, 0);
+                    inst_TT_IV(ins_Store(TYP_INT), op1, 0, 4); // Store 0 in hi-word
+
+                    /* Free up anything that was tied up by either side */
+
+                    genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+                    genReleaseReg(smallOpr);
+
+#if REDUNDANT_LOAD
+                    if (op1->gtOper == GT_LCL_VAR)
+                    {
+                        /* clear this local from reg table */
+                        regTracker.rsTrashLclLong(op1->gtLclVarCommon.gtLclNum);
+
+                        /* mark RHS registers as containing the local var */
+                        regTracker.rsTrackRegLclVarLng(regHi, op1->gtLclVarCommon.gtLclNum, true);
+                    }
+#endif
+                    goto LAsgExit;
+                }
+
+                /* Is the LHS more complex than the RHS? */
+
+                if (tree->gtFlags & GTF_REVERSE_OPS)
+                {
+                    /* Generate the RHS into a register pair */
+
+                    genComputeRegPair(op2, REG_PAIR_NONE, avoidReg | op1->gtUsedRegs, RegSet::KEEP_REG);
+                    noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+                    /* Make the target addressable */
+                    op1     = genCodeForCommaTree(op1);
+                    addrReg = genMakeAddressable(op1, 0, RegSet::KEEP_REG);
+
+                    /* Make sure the RHS register hasn't been spilled */
+
+                    genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::KEEP_REG);
+                }
+                else
+                {
+                    /* Make the target addressable */
+
+                    op1     = genCodeForCommaTree(op1);
+                    addrReg = genMakeAddressable(op1, RBM_ALLINT & ~op2->gtRsvdRegs, RegSet::KEEP_REG, true);
+
+                    /* Generate the RHS into a register pair */
+
+                    genComputeRegPair(op2, REG_PAIR_NONE, avoidReg, RegSet::KEEP_REG, false);
+                }
+
+                /* Lock 'op2' and make sure 'op1' is still addressable */
+
+                noway_assert(op2->gtFlags & GTF_REG_VAL);
+                regPair = op2->gtRegPair;
+
+                addrReg = genKeepAddressable(op1, addrReg, genRegPairMask(regPair));
+
+                /* Move the value into the target */
+
+                inst_TT_RV(ins_Store(TYP_INT), op1, genRegPairLo(regPair), 0);
+                inst_TT_RV(ins_Store(TYP_INT), op1, genRegPairHi(regPair), 4);
+
+                /* Free up anything that was tied up by either side */
+
+                genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+                genReleaseRegPair(op2);
+
+            DONE_ASSG_REGS:
+
+#if REDUNDANT_LOAD
+
+                if (op1->gtOper == GT_LCL_VAR)
+                {
+                    /* Clear this local from reg table */
+
+                    regTracker.rsTrashLclLong(op1->gtLclVarCommon.gtLclNum);
+
+                    if ((op2->gtFlags & GTF_REG_VAL) &&
+                        /* constant has precedence over local */
+                        //                    rsRegValues[op2->gtRegNum].rvdKind != RV_INT_CNS &&
+                        tree->gtOper == GT_ASG)
+                    {
+                        regNumber regNo;
+
+                        /* mark RHS registers as containing the local var */
+
+                        regNo = genRegPairLo(op2->gtRegPair);
+                        if (regNo != REG_STK)
+                            regTracker.rsTrackRegLclVarLng(regNo, op1->gtLclVarCommon.gtLclNum, true);
+
+                        regNo = genRegPairHi(op2->gtRegPair);
+                        if (regNo != REG_STK)
+                        {
+                            /* For partially enregistered longs, we might have
+                               stomped on op2's hiReg */
+                            if (!(op1->gtFlags & GTF_REG_VAL) || regNo != genRegPairLo(op1->gtRegPair))
+                            {
+                                regTracker.rsTrackRegLclVarLng(regNo, op1->gtLclVarCommon.gtLclNum, false);
+                            }
+                        }
+                    }
+                }
+#endif
+
+            LAsgExit:
+
+                genUpdateLife(op1);
+                genUpdateLife(tree);
+
+#ifdef DEBUGGING_SUPPORT
+                /* For non-debuggable code, every definition of a lcl-var has
+                 * to be checked to see if we need to open a new scope for it.
+                 */
+                if (lclVarNum < compiler->lvaCount)
+                    siCheckVarScope(lclVarNum, lclVarILoffs);
+#endif
+            }
+                return;
+
+            case GT_SUB:
+                insLo    = INS_sub;
+                insHi    = INS_SUBC;
+                setCarry = true;
+                goto BINOP_OVF;
+            case GT_ADD:
+                insLo    = INS_add;
+                insHi    = INS_ADDC;
+                setCarry = true;
+                goto BINOP_OVF;
+
+                bool ovfl;
+
+            BINOP_OVF:
+                ovfl = tree->gtOverflow();
+                goto _BINOP;
+
+            case GT_AND:
+                insLo = insHi = INS_AND;
+                goto BINOP;
+            case GT_OR:
+                insLo = insHi = INS_OR;
+                goto BINOP;
+            case GT_XOR:
+                insLo = insHi = INS_XOR;
+                goto BINOP;
+
+            BINOP:
+                ovfl = false;
+                goto _BINOP;
+
+            _BINOP:
+
+                /* The following makes an assumption about gtSetEvalOrder(this) */
+
+                noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
+
+                /* Special case: check for "(long(intval) << 32) | longval" */
+
+                if (oper == GT_OR && op1->gtOper == GT_LSH)
+                {
+                    GenTreePtr lshLHS = op1->gtOp.gtOp1;
+                    GenTreePtr lshRHS = op1->gtOp.gtOp2;
+
+                    if (lshLHS->gtOper == GT_CAST && lshRHS->gtOper == GT_CNS_INT && lshRHS->gtIntCon.gtIconVal == 32 &&
+                        genTypeSize(TYP_INT) == genTypeSize(lshLHS->CastFromType()))
+                    {
+
+                        /* Throw away the cast of the shift operand. */
+
+                        op1 = lshLHS->gtCast.CastOp();
+
+                        /* Special case: check op2 for "ulong(intval)" */
+                        if ((op2->gtOper == GT_CAST) && (op2->CastToType() == TYP_ULONG) &&
+                            genTypeSize(TYP_INT) == genTypeSize(op2->CastFromType()))
+                        {
+                            /* Throw away the cast of the second operand. */
+
+                            op2 = op2->gtCast.CastOp();
+                            goto SIMPLE_OR_LONG;
+                        }
+                        /* Special case: check op2 for "long(intval) & 0xFFFFFFFF" */
+                        else if (op2->gtOper == GT_AND)
+                        {
+                            GenTreePtr andLHS;
+                            andLHS = op2->gtOp.gtOp1;
+                            GenTreePtr andRHS;
+                            andRHS = op2->gtOp.gtOp2;
+
+                            if (andLHS->gtOper == GT_CAST && andRHS->gtOper == GT_CNS_LNG &&
+                                andRHS->gtLngCon.gtLconVal == 0x00000000FFFFFFFF &&
+                                genTypeSize(TYP_INT) == genTypeSize(andLHS->CastFromType()))
+                            {
+                                /* Throw away the cast of the second operand. */
+
+                                op2 = andLHS->gtCast.CastOp();
+
+                            SIMPLE_OR_LONG:
+                                // Load the high DWORD, ie. op1
+
+                                genCodeForTree(op1, needReg & ~op2->gtRsvdRegs);
+
+                                noway_assert(op1->gtFlags & GTF_REG_VAL);
+                                regHi = op1->gtRegNum;
+                                regSet.rsMarkRegUsed(op1);
+
+                                // Load the low DWORD, ie. op2
+
+                                genCodeForTree(op2, needReg & ~genRegMask(regHi));
+
+                                noway_assert(op2->gtFlags & GTF_REG_VAL);
+                                regLo = op2->gtRegNum;
+
+                                /* Make sure regHi is still around. Also, force
+                                   regLo to be excluded in case regLo==regHi */
+
+                                genRecoverReg(op1, ~genRegMask(regLo), RegSet::FREE_REG);
+                                regHi = op1->gtRegNum;
+
+                                regPair = gen2regs2pair(regLo, regHi);
+                                goto DONE;
+                            }
+                        }
+
+                        /*  Generate the following sequence:
+                               Prepare op1 (discarding shift)
+                               Compute op2 into some regpair
+                               OR regpairhi, op1
+                         */
+
+                        /* First, make op1 addressable */
+
+                        /* tempReg must avoid both needReg, op2->RsvdRegs and regSet.rsMaskResvd.
+
+                           It appears incorrect to exclude needReg as we are not ensuring that the reg pair into
+                           which the long value is computed is from needReg.  But at this point the safest fix is
+                           to exclude regSet.rsMaskResvd.
+
+                           Note that needReg could be the set of free registers (excluding reserved ones).  If we don't
+                           exclude regSet.rsMaskResvd, the expression below will have the effect of trying to choose a
+                           reg from
+                           reserved set which is bound to fail.  To prevent that we avoid regSet.rsMaskResvd.
+                         */
+                        regMaskTP tempReg = RBM_ALLINT & ~needReg & ~op2->gtRsvdRegs & ~avoidReg & ~regSet.rsMaskResvd;
+
+                        addrReg = genMakeAddressable(op1, tempReg, RegSet::KEEP_REG);
+
+                        genCompIntoFreeRegPair(op2, avoidReg, RegSet::KEEP_REG);
+
+                        noway_assert(op2->gtFlags & GTF_REG_VAL);
+                        regPair = op2->gtRegPair;
+                        regHi   = genRegPairHi(regPair);
+
+                        /* The operand might have interfered with the address */
+
+                        addrReg = genKeepAddressable(op1, addrReg, genRegPairMask(regPair));
+
+                        /* Now compute the result */
+
+                        inst_RV_TT(insHi, regHi, op1, 0);
+
+                        regTracker.rsTrackRegTrash(regHi);
+
+                        /* Free up anything that was tied up by the LHS */
+
+                        genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+                        /* The result is where the second operand is sitting */
+
+                        genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::FREE_REG);
+
+                        regPair = op2->gtRegPair;
+                        goto DONE;
+                    }
+                }
+
+                /* Special case: check for "longval | (long(intval) << 32)" */
+
+                if (oper == GT_OR && op2->gtOper == GT_LSH)
+                {
+                    GenTreePtr lshLHS = op2->gtOp.gtOp1;
+                    GenTreePtr lshRHS = op2->gtOp.gtOp2;
+
+                    if (lshLHS->gtOper == GT_CAST && lshRHS->gtOper == GT_CNS_INT && lshRHS->gtIntCon.gtIconVal == 32 &&
+                        genTypeSize(TYP_INT) == genTypeSize(lshLHS->CastFromType()))
+
+                    {
+                        /* We throw away the cast of the shift operand. */
+
+                        op2 = lshLHS->gtCast.CastOp();
+
+                        /* Special case: check op1 for "long(intval) & 0xFFFFFFFF" */
+
+                        if (op1->gtOper == GT_AND)
+                        {
+                            GenTreePtr andLHS = op1->gtOp.gtOp1;
+                            GenTreePtr andRHS = op1->gtOp.gtOp2;
+
+                            if (andLHS->gtOper == GT_CAST && andRHS->gtOper == GT_CNS_LNG &&
+                                andRHS->gtLngCon.gtLconVal == 0x00000000FFFFFFFF &&
+                                genTypeSize(TYP_INT) == genTypeSize(andLHS->CastFromType()))
+                            {
+                                /* Throw away the cast of the first operand. */
+
+                                op1 = andLHS->gtCast.CastOp();
+
+                                // Load the low DWORD, ie. op1
+
+                                genCodeForTree(op1, needReg & ~op2->gtRsvdRegs);
+
+                                noway_assert(op1->gtFlags & GTF_REG_VAL);
+                                regLo = op1->gtRegNum;
+                                regSet.rsMarkRegUsed(op1);
+
+                                // Load the high DWORD, ie. op2
+
+                                genCodeForTree(op2, needReg & ~genRegMask(regLo));
+
+                                noway_assert(op2->gtFlags & GTF_REG_VAL);
+                                regHi = op2->gtRegNum;
+
+                                /* Make sure regLo is still around. Also, force
+                                   regHi to be excluded in case regLo==regHi */
+
+                                genRecoverReg(op1, ~genRegMask(regHi), RegSet::FREE_REG);
+                                regLo = op1->gtRegNum;
+
+                                regPair = gen2regs2pair(regLo, regHi);
+                                goto DONE;
+                            }
+                        }
+
+                        /*  Generate the following sequence:
+                              Compute op1 into some regpair
+                              Make op2 (ignoring shift) addressable
+                              OR regPairHi, op2
+                         */
+
+                        // First, generate the first operand into some register
+
+                        genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
+                        noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+                        /* Make the second operand addressable */
+
+                        addrReg = genMakeAddressable(op2, needReg, RegSet::KEEP_REG);
+
+                        /* Make sure the result is in a free register pair */
+
+                        genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
+                        regPair = op1->gtRegPair;
+                        regHi   = genRegPairHi(regPair);
+
+                        /* The operand might have interfered with the address */
+
+                        addrReg = genKeepAddressable(op2, addrReg, genRegPairMask(regPair));
+
+                        /* Compute the new value */
+
+                        inst_RV_TT(insHi, regHi, op2, 0);
+
+                        /* The value in the high register has been trashed */
+
+                        regTracker.rsTrackRegTrash(regHi);
+
+                        goto DONE_OR;
+                    }
+                }
+
+                /* Generate the first operand into registers */
+
+                if ((genCountBits(needReg) == 2) && ((regSet.rsRegMaskFree() & needReg) == needReg) &&
+                    ((op2->gtRsvdRegs & needReg) == RBM_NONE) && (!(tree->gtFlags & GTF_ASG)))
+                {
+                    regPair = regSet.rsPickRegPair(needReg);
+                    genComputeRegPair(op1, regPair, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
+                }
+                else
+                {
+                    genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::KEEP_REG);
+                }
+                noway_assert(op1->gtFlags & GTF_REG_VAL);
+                regMaskTP op1Mask;
+                regPair = op1->gtRegPair;
+                op1Mask = genRegPairMask(regPair);
+
+                /* Make the second operand addressable */
+                regMaskTP needReg2;
+                needReg2 = regSet.rsNarrowHint(needReg, ~op1Mask);
+                addrReg  = genMakeAddressable(op2, needReg2, RegSet::KEEP_REG);
+
+                // TODO: If 'op1' got spilled and 'op2' happens to be
+                // TODO: in a register, and we have add/mul/and/or/xor,
+                // TODO: reverse the operands since we can perform the
+                // TODO: operation directly with the spill temp, e.g.
+                // TODO: 'add regHi, [temp]'.
+
+                /* Make sure the result is in a free register pair */
+
+                genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::KEEP_REG);
+                regPair = op1->gtRegPair;
+                op1Mask = genRegPairMask(regPair);
+
+                regLo = genRegPairLo(regPair);
+                regHi = genRegPairHi(regPair);
+
+                /* Make sure that we don't spill regLo/regHi below */
+                regSet.rsLockUsedReg(op1Mask);
+
+                /* The operand might have interfered with the address */
+
+                addrReg = genKeepAddressable(op2, addrReg);
+
+                /* The value in the register pair is about to be trashed */
+
+                regTracker.rsTrackRegTrash(regLo);
+                regTracker.rsTrackRegTrash(regHi);
+
+                /* Compute the new value */
+
+                doLo = true;
+                doHi = true;
+
+                if (op2->gtOper == GT_CNS_LNG)
+                {
+                    __int64 icon = op2->gtLngCon.gtLconVal;
+
+                    /* Check for "(op1 AND -1)" and "(op1 [X]OR 0)" */
+
+                    switch (oper)
+                    {
+                        case GT_AND:
+                            if ((int)(icon) == -1)
+                                doLo = false;
+                            if ((int)(icon >> 32) == -1)
+                                doHi = false;
+
+                            if (!(icon & I64(0x00000000FFFFFFFF)))
+                            {
+                                genSetRegToIcon(regLo, 0);
+                                doLo = false;
+                            }
+
+                            if (!(icon & I64(0xFFFFFFFF00000000)))
+                            {
+                                /* Just to always set low first*/
+
+                                if (doLo)
+                                {
+                                    inst_RV_TT(insLo, regLo, op2, 0);
+                                    doLo = false;
+                                }
+                                genSetRegToIcon(regHi, 0);
+                                doHi = false;
+                            }
+
+                            break;
+
+                        case GT_OR:
+                        case GT_XOR:
+                            if (!(icon & I64(0x00000000FFFFFFFF)))
+                                doLo = false;
+                            if (!(icon & I64(0xFFFFFFFF00000000)))
+                                doHi = false;
+                            break;
+                        default:
+                            break;
+                    }
+                }
+
+                // Fix 383813 X86/ARM ILGEN
+                // Fix 383793 ARM ILGEN
+                // Fix 383911 ARM ILGEN
+                regMaskTP newMask;
+                newMask = addrReg & ~op1Mask;
+                regSet.rsLockUsedReg(newMask);
+
+                if (doLo)
+                {
+                    insFlags flagsLo = setCarry ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+                    inst_RV_TT(insLo, regLo, op2, 0, EA_4BYTE, flagsLo);
+                }
+                if (doHi)
+                {
+                    insFlags flagsHi = ovfl ? INS_FLAGS_SET : INS_FLAGS_DONT_CARE;
+                    inst_RV_TT(insHi, regHi, op2, 4, EA_4BYTE, flagsHi);
+                }
+
+                regSet.rsUnlockUsedReg(newMask);
+                regSet.rsUnlockUsedReg(op1Mask);
+
+            DONE_OR:
+
+                /* Free up anything that was tied up by the LHS */
+
+                genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
+
+                /* The result is where the first operand is sitting */
+
+                genRecoverRegPair(op1, REG_PAIR_NONE, RegSet::FREE_REG);
+
+                regPair = op1->gtRegPair;
+
+                if (ovfl)
+                    genCheckOverflow(tree);
+
+                goto DONE;
+
+            case GT_UMOD:
+
+                regPair = genCodeForLongModInt(tree, needReg);
+                goto DONE;
+
+            case GT_MUL:
+
+                /* Special case: both operands promoted from int */
+
+                assert(tree->gtIsValid64RsltMul());
+
+                /* Change to an integer multiply temporarily */
+
+                tree->gtType = TYP_INT;
+
+                noway_assert(op1->gtOper == GT_CAST && op2->gtOper == GT_CAST);
+                tree->gtOp.gtOp1 = op1->gtCast.CastOp();
+                tree->gtOp.gtOp2 = op2->gtCast.CastOp();
+
+                assert(tree->gtFlags & GTF_MUL_64RSLT);
+
+#if defined(_TARGET_X86_)
+                // imul on x86 requires EDX:EAX
+                genComputeReg(tree, (RBM_EAX | RBM_EDX), RegSet::EXACT_REG, RegSet::FREE_REG);
+                noway_assert(tree->gtFlags & GTF_REG_VAL);
+                noway_assert(tree->gtRegNum == REG_EAX); // Also REG_EDX is setup with hi 32-bits
+#elif defined(_TARGET_ARM_)
+                genComputeReg(tree, needReg, RegSet::ANY_REG, RegSet::FREE_REG);
+                noway_assert(tree->gtFlags & GTF_REG_VAL);
+#else
+                assert(!"Unsupported target for 64-bit multiply codegen");
+#endif
+
+                /* Restore gtType, op1 and op2 from the change above */
+
+                tree->gtType     = TYP_LONG;
+                tree->gtOp.gtOp1 = op1;
+                tree->gtOp.gtOp2 = op2;
+
+#if defined(_TARGET_X86_)
+                /* The result is now in EDX:EAX */
+                regPair = REG_PAIR_EAXEDX;
+#elif defined(_TARGET_ARM_)
+                regPair = tree->gtRegPair;
+#endif
+                goto DONE;
+
+            case GT_LSH:
+                helper = CORINFO_HELP_LLSH;
+                goto SHIFT;
+            case GT_RSH:
+                helper = CORINFO_HELP_LRSH;
+                goto SHIFT;
+            case GT_RSZ:
+                helper = CORINFO_HELP_LRSZ;
+                goto SHIFT;
+
+            SHIFT:
+
+                noway_assert(op1->gtType == TYP_LONG);
+                noway_assert(genActualType(op2->gtType) == TYP_INT);
+
+                /* Is the second operand a constant? */
+
+                if (op2->gtOper == GT_CNS_INT)
+                {
+                    unsigned int count = op2->gtIntCon.gtIconVal;
+
+                    /* Compute the left operand into a free register pair */
+
+                    genCompIntoFreeRegPair(op1, avoidReg | op2->gtRsvdRegs, RegSet::FREE_REG);
+                    noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+                    regPair = op1->gtRegPair;
+                    regLo   = genRegPairLo(regPair);
+                    regHi   = genRegPairHi(regPair);
+
+                    /* Assume the value in the register pair is trashed. In some cases, though,
+                       a register might be set to zero, and we can use that information to improve
+                       some code generation.
+                    */
+
+                    regTracker.rsTrackRegTrash(regLo);
+                    regTracker.rsTrackRegTrash(regHi);
+
+                    /* Generate the appropriate shift instructions */
+
+                    switch (oper)
+                    {
+                        case GT_LSH:
+                            if (count == 0)
+                            {
+                                // regHi, regLo are correct
+                            }
+                            else if (count < 32)
+                            {
+#if defined(_TARGET_XARCH_)
+                                inst_RV_RV_IV(INS_shld, EA_4BYTE, regHi, regLo, count);
+#elif defined(_TARGET_ARM_)
+                                inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, count);
+                                getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regHi, regHi, regLo, 32 - count,
+                                                              INS_FLAGS_DONT_CARE, INS_OPTS_LSR);
+#else  // _TARGET_*
+                                NYI("INS_shld");
+#endif // _TARGET_*
+                                inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regLo, count);
+                            }
+                            else // count >= 32
+                            {
+                                assert(count >= 32);
+                                if (count < 64)
+                                {
+#if defined(_TARGET_ARM_)
+                                    if (count == 32)
+                                    {
+                                        // mov low dword into high dword (i.e. shift left by 32-bits)
+                                        inst_RV_RV(INS_mov, regHi, regLo);
+                                    }
+                                    else
+                                    {
+                                        assert(count > 32 && count < 64);
+                                        getEmitter()->emitIns_R_R_I(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, regLo,
+                                                                    count - 32);
+                                    }
+#else  // _TARGET_*
+                                    // mov low dword into high dword (i.e. shift left by 32-bits)
+                                    inst_RV_RV(INS_mov, regHi, regLo);
+                                    if (count > 32)
+                                    {
+                                        // Shift high dword left by count - 32
+                                        inst_RV_SH(INS_SHIFT_LEFT_LOGICAL, EA_4BYTE, regHi, count - 32);
+                                    }
+#endif // _TARGET_*
+                                }
+                                else // count >= 64
+                                {
+                                    assert(count >= 64);
+                                    genSetRegToIcon(regHi, 0);
+                                }
+                                genSetRegToIcon(regLo, 0);
+                            }
+                            break;
+
+                        case GT_RSH:
+                            if (count == 0)
+                            {
+                                // regHi, regLo are correct
+                            }
+                            else if (count < 32)
+                            {
+#if defined(_TARGET_XARCH_)
+                                inst_RV_RV_IV(INS_shrd, EA_4BYTE, regLo, regHi, count);
+#elif defined(_TARGET_ARM_)
+                                inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count);
+                                getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regLo, regLo, regHi, 32 - count,
+                                                              INS_FLAGS_DONT_CARE, INS_OPTS_LSL);
+#else  // _TARGET_*
+                                NYI("INS_shrd");
+#endif // _TARGET_*
+                                inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, count);
+                            }
+                            else // count >= 32
+                            {
+                                assert(count >= 32);
+                                if (count < 64)
+                                {
+#if defined(_TARGET_ARM_)
+                                    if (count == 32)
+                                    {
+                                        // mov high dword into low dword (i.e. shift right by 32-bits)
+                                        inst_RV_RV(INS_mov, regLo, regHi);
+                                    }
+                                    else
+                                    {
+                                        assert(count > 32 && count < 64);
+                                        getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regLo, regHi,
+                                                                    count - 32);
+                                    }
+#else  // _TARGET_*
+                                    // mov high dword into low dword (i.e. shift right by 32-bits)
+                                    inst_RV_RV(INS_mov, regLo, regHi);
+                                    if (count > 32)
+                                    {
+                                        // Shift low dword right by count - 32
+                                        inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regLo, count - 32);
+                                    }
+#endif // _TARGET_*
+                                }
+
+                                // Propagate sign bit in high dword
+                                inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, 31);
+
+                                if (count >= 64)
+                                {
+                                    // Propagate the sign from the high dword
+                                    inst_RV_RV(INS_mov, regLo, regHi, TYP_INT);
+                                }
+                            }
+                            break;
+
+                        case GT_RSZ:
+                            if (count == 0)
+                            {
+                                // regHi, regLo are correct
+                            }
+                            else if (count < 32)
+                            {
+#if defined(_TARGET_XARCH_)
+                                inst_RV_RV_IV(INS_shrd, EA_4BYTE, regLo, regHi, count);
+#elif defined(_TARGET_ARM_)
+                                inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count);
+                                getEmitter()->emitIns_R_R_R_I(INS_OR, EA_4BYTE, regLo, regLo, regHi, 32 - count,
+                                                              INS_FLAGS_DONT_CARE, INS_OPTS_LSL);
+#else  // _TARGET_*
+                                NYI("INS_shrd");
+#endif // _TARGET_*
+                                inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regHi, count);
+                            }
+                            else // count >= 32
+                            {
+                                assert(count >= 32);
+                                if (count < 64)
+                                {
+#if defined(_TARGET_ARM_)
+                                    if (count == 32)
+                                    {
+                                        // mov high dword into low dword (i.e. shift right by 32-bits)
+                                        inst_RV_RV(INS_mov, regLo, regHi);
+                                    }
+                                    else
+                                    {
+                                        assert(count > 32 && count < 64);
+                                        getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, regHi,
+                                                                    count - 32);
+                                    }
+#else  // _TARGET_*
+                                    // mov high dword into low dword (i.e. shift right by 32-bits)
+                                    inst_RV_RV(INS_mov, regLo, regHi);
+                                    if (count > 32)
+                                    {
+                                        // Shift low dword right by count - 32
+                                        inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, regLo, count - 32);
+                                    }
+#endif // _TARGET_*
+                                }
+                                else // count >= 64
+                                {
+                                    assert(count >= 64);
+                                    genSetRegToIcon(regLo, 0);
+                                }
+                                genSetRegToIcon(regHi, 0);
+                            }
+                            break;
+
+                        default:
+                            noway_assert(!"Illegal oper for long shift");
+                            break;
+                    }
+
+                    goto DONE_SHF;
+                }
+
+                /* Which operand are we supposed to compute first? */
+
+                assert((RBM_SHIFT_LNG & RBM_LNGARG_0) == 0);
+
+                if (tree->gtFlags & GTF_REVERSE_OPS)
+                {
+                    /* The second operand can't be a constant */
+
+                    noway_assert(op2->gtOper != GT_CNS_INT);
+
+                    /* Load the shift count, hopefully into RBM_SHIFT */
+                    RegSet::ExactReg exactReg;
+                    if ((RBM_SHIFT_LNG & op1->gtRsvdRegs) == 0)
+                        exactReg = RegSet::EXACT_REG;
+                    else
+                        exactReg = RegSet::ANY_REG;
+                    genComputeReg(op2, RBM_SHIFT_LNG, exactReg, RegSet::KEEP_REG);
+
+                    /* Compute the left operand into REG_LNGARG_0 */
+
+                    genComputeRegPair(op1, REG_LNGARG_0, avoidReg, RegSet::KEEP_REG, false);
+                    noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+                    /* Lock op1 so that it doesn't get trashed */
+
+                    regSet.rsLockUsedReg(RBM_LNGARG_0);
+
+                    /* Make sure the shift count wasn't displaced */
+
+                    genRecoverReg(op2, RBM_SHIFT_LNG, RegSet::KEEP_REG);
+
+                    /* Lock op2 */
+
+                    regSet.rsLockUsedReg(RBM_SHIFT_LNG);
+                }
+                else
+                {
+                    /* Compute the left operand into REG_LNGARG_0 */
+
+                    genComputeRegPair(op1, REG_LNGARG_0, avoidReg, RegSet::KEEP_REG, false);
+                    noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+                    /* Compute the shift count into RBM_SHIFT */
+
+                    genComputeReg(op2, RBM_SHIFT_LNG, RegSet::EXACT_REG, RegSet::KEEP_REG);
+
+                    /* Lock op2 */
+
+                    regSet.rsLockUsedReg(RBM_SHIFT_LNG);
+
+                    /* Make sure the value hasn't been displaced */
+
+                    genRecoverRegPair(op1, REG_LNGARG_0, RegSet::KEEP_REG);
+
+                    /* Lock op1 so that it doesn't get trashed */
+
+                    regSet.rsLockUsedReg(RBM_LNGARG_0);
+                }
+
+#ifndef _TARGET_X86_
+                /* The generic helper is a C-routine and so it follows the full ABI */
+                {
+                    /* Spill any callee-saved registers which are being used */
+                    regMaskTP spillRegs = RBM_CALLEE_TRASH & regSet.rsMaskUsed;
+
+                    /* But do not spill our argument registers. */
+                    spillRegs &= ~(RBM_LNGARG_0 | RBM_SHIFT_LNG);
+
+                    if (spillRegs)
+                    {
+                        regSet.rsSpillRegs(spillRegs);
+                    }
+                }
+#endif // !_TARGET_X86_
+
+                /* Perform the shift by calling a helper function */
+
+                noway_assert(op1->gtRegPair == REG_LNGARG_0);
+                noway_assert(op2->gtRegNum == REG_SHIFT_LNG);
+                noway_assert((regSet.rsMaskLock & (RBM_LNGARG_0 | RBM_SHIFT_LNG)) == (RBM_LNGARG_0 | RBM_SHIFT_LNG));
+
+                genEmitHelperCall(helper,
+                                  0,         // argSize
+                                  EA_8BYTE); // retSize
+
+#ifdef _TARGET_X86_
+                /* The value in the register pair is trashed */
+
+                regTracker.rsTrackRegTrash(genRegPairLo(REG_LNGARG_0));
+                regTracker.rsTrackRegTrash(genRegPairHi(REG_LNGARG_0));
+#else  // _TARGET_X86_
+                /* The generic helper is a C-routine and so it follows the full ABI */
+                regTracker.rsTrackRegMaskTrash(RBM_CALLEE_TRASH);
+#endif // _TARGET_X86_
+
+                /* Release both operands */
+
+                regSet.rsUnlockUsedReg(RBM_LNGARG_0 | RBM_SHIFT_LNG);
+                genReleaseRegPair(op1);
+                genReleaseReg(op2);
+
+            DONE_SHF:
+
+                noway_assert(op1->gtFlags & GTF_REG_VAL);
+                regPair = op1->gtRegPair;
+                goto DONE;
+
+            case GT_NEG:
+            case GT_NOT:
+
+                /* Generate the operand into some register pair */
+
+                genCompIntoFreeRegPair(op1, avoidReg, RegSet::FREE_REG);
+                noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+                regPair = op1->gtRegPair;
+
+                /* Figure out which registers the value is in */
+
+                regLo = genRegPairLo(regPair);
+                regHi = genRegPairHi(regPair);
+
+                /* The value in the register pair is about to be trashed */
+
+                regTracker.rsTrackRegTrash(regLo);
+                regTracker.rsTrackRegTrash(regHi);
+
+                /* Unary "neg": negate the value  in the register pair */
+                if (oper == GT_NEG)
+                {
+#ifdef _TARGET_ARM_
+
+                    // ARM doesn't have an opcode that sets the carry bit like
+                    // x86, so we can't use neg/addc/neg.  Instead we use subtract
+                    // with carry.  Too bad this uses an extra register.
+
+                    // Lock regLo and regHi so we don't pick them, and then pick
+                    // a third register to be our 0.
+                    regMaskTP regPairMask = genRegMask(regLo) | genRegMask(regHi);
+                    regSet.rsLockReg(regPairMask);
+                    regMaskTP regBest = RBM_ALLINT & ~avoidReg;
+                    regNumber regZero = genGetRegSetToIcon(0, regBest);
+                    regSet.rsUnlockReg(regPairMask);
+
+                    inst_RV_IV(INS_rsb, regLo, 0, EA_4BYTE, INS_FLAGS_SET);
+                    getEmitter()->emitIns_R_R_R_I(INS_sbc, EA_4BYTE, regHi, regZero, regHi, 0);
+
+#elif defined(_TARGET_XARCH_)
+
+                    inst_RV(INS_NEG, regLo, TYP_LONG);
+                    inst_RV_IV(INS_ADDC, regHi, 0, emitActualTypeSize(TYP_LONG));
+                    inst_RV(INS_NEG, regHi, TYP_LONG);
+#else
+                    NYI("GT_NEG on TYP_LONG");
+#endif
+                }
+                else
+                {
+                    /* Unary "not": flip all the bits in the register pair */
+
+                    inst_RV(INS_NOT, regLo, TYP_LONG);
+                    inst_RV(INS_NOT, regHi, TYP_LONG);
+                }
+
+                goto DONE;
+
+#if LONG_ASG_OPS
+
+            case GT_ASG_OR:
+                insLo = insHi = INS_OR;
+                goto ASG_OPR;
+            case GT_ASG_XOR:
+                insLo = insHi = INS_XOR;
+                goto ASG_OPR;
+            case GT_ASG_AND:
+                insLo = insHi = INS_AND;
+                goto ASG_OPR;
+            case GT_ASG_SUB:
+                insLo = INS_sub;
+                insHi = INS_SUBC;
+                goto ASG_OPR;
+            case GT_ASG_ADD:
+                insLo = INS_add;
+                insHi = INS_ADDC;
+                goto ASG_OPR;
+
+            ASG_OPR:
+
+                if (op2->gtOper == GT_CNS_LNG)
+                {
+                    __int64 lval = op2->gtLngCon.gtLconVal;
+
+                    /* Make the target addressable */
+
+                    addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG);
+
+                    /* Optimize some special cases */
+
+                    doLo = doHi = true;
+
+                    /* Check for "(op1 AND -1)" and "(op1 [X]OR 0)" */
+
+                    switch (oper)
+                    {
+                        case GT_ASG_AND:
+                            if ((int)(lval) == -1)
+                                doLo = false;
+                            if ((int)(lval >> 32) == -1)
+                                doHi = false;
+                            break;
+
+                        case GT_ASG_OR:
+                        case GT_ASG_XOR:
+                            if (!(lval & 0x00000000FFFFFFFF))
+                                doLo = false;
+                            if (!(lval & 0xFFFFFFFF00000000))
+                                doHi = false;
+                            break;
+                    }
+
+                    if (doLo)
+                        inst_TT_IV(insLo, op1, (int)(lval), 0);
+                    if (doHi)
+                        inst_TT_IV(insHi, op1, (int)(lval >> 32), 4);
+
+                    bool isArith = (oper == GT_ASG_ADD || oper == GT_ASG_SUB);
+                    if (doLo || doHi)
+                        tree->gtFlags |= GTF_ZSF_SET;
+
+                    genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
+                    goto DONE_ASSG_REGS;
+                }
+
+                /* TODO: allow non-const long assignment operators */
+
+                noway_assert(!"non-const long asgop NYI");
+
+#endif // LONG_ASG_OPS
+
+            case GT_IND:
+            case GT_NULLCHECK:
+            {
+                regMaskTP tmpMask;
+                int       hiFirst;
+
+                regMaskTP availMask = RBM_ALLINT & ~needReg;
+
+                /* Make sure the operand is addressable */
+
+                addrReg = genMakeAddressable(tree, availMask, RegSet::FREE_REG);
+
+                GenTreePtr addr = oper == GT_IND ? op1 : tree;
+
+                /* Pick a register for the value */
+
+                regPair = regSet.rsPickRegPair(needReg);
+                tmpMask = genRegPairMask(regPair);
+
+                /* Is there any overlap between the register pair and the address? */
+
+                hiFirst = FALSE;
+
+                if (tmpMask & addrReg)
+                {
+                    /* Does one or both of the target registers overlap? */
+
+                    if ((tmpMask & addrReg) != tmpMask)
+                    {
+                        /* Only one register overlaps */
+
+                        noway_assert(genMaxOneBit(tmpMask & addrReg) == TRUE);
+
+                        /* If the low register overlaps, load the upper half first */
+
+                        if (addrReg & genRegMask(genRegPairLo(regPair)))
+                            hiFirst = TRUE;
+                    }
+                    else
+                    {
+                        regMaskTP regFree;
+
+                        /* The register completely overlaps with the address */
+
+                        noway_assert(genMaxOneBit(tmpMask & addrReg) == FALSE);
+
+                        /* Can we pick another pair easily? */
+
+                        regFree = regSet.rsRegMaskFree() & ~addrReg;
+                        if (needReg)
+                            regFree &= needReg;
+
+                        /* More than one free register available? */
+
+                        if (regFree && !genMaxOneBit(regFree))
+                        {
+                            regPair = regSet.rsPickRegPair(regFree);
+                            tmpMask = genRegPairMask(regPair);
+                        }
+                        else
+                        {
+                            // printf("Overlap: needReg = %08X\n", needReg);
+
+                            // Reg-prediction won't allow this
+                            noway_assert((regSet.rsMaskVars & addrReg) == 0);
+
+                            // Grab one fresh reg, and use any one of addrReg
+
+                            if (regFree) // Try to follow 'needReg'
+                                regLo = regSet.rsGrabReg(regFree);
+                            else // Pick any reg besides addrReg
+                                regLo = regSet.rsGrabReg(RBM_ALLINT & ~addrReg);
+
+                            unsigned  regBit = 0x1;
+                            regNumber regNo;
+
+                            for (regNo = REG_INT_FIRST; regNo <= REG_INT_LAST; regNo = REG_NEXT(regNo), regBit <<= 1)
+                            {
+                                // Found one of addrReg. Use it.
+                                if (regBit & addrReg)
+                                    break;
+                            }
+                            noway_assert(genIsValidReg(regNo)); // Should have found regNo
+
+                            regPair = gen2regs2pair(regLo, regNo);
+                            tmpMask = genRegPairMask(regPair);
+                        }
+                    }
+                }
+
+                /* Make sure the value is still addressable */
+
+                noway_assert(genStillAddressable(tree));
+
+                /* Figure out which registers the value is in */
+
+                regLo = genRegPairLo(regPair);
+                regHi = genRegPairHi(regPair);
+
+                /* The value in the register pair is about to be trashed */
+
+                regTracker.rsTrackRegTrash(regLo);
+                regTracker.rsTrackRegTrash(regHi);
+
+                /* Load the target registers from where the value is */
+
+                if (hiFirst)
+                {
+                    inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regHi, addr, 4);
+                    regSet.rsLockReg(genRegMask(regHi));
+                    inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regLo, addr, 0);
+                    regSet.rsUnlockReg(genRegMask(regHi));
+                }
+                else
+                {
+                    inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regLo, addr, 0);
+                    regSet.rsLockReg(genRegMask(regLo));
+                    inst_RV_AT(ins_Load(TYP_INT), EA_4BYTE, TYP_INT, regHi, addr, 4);
+                    regSet.rsUnlockReg(genRegMask(regLo));
+                }
+
+#ifdef _TARGET_ARM_
+                if (tree->gtFlags & GTF_IND_VOLATILE)
+                {
+                    // Emit a memory barrier instruction after the load
+                    instGen_MemoryBarrier();
+                }
+#endif
+
+                genUpdateLife(tree);
+                genDoneAddressable(tree, addrReg, RegSet::FREE_REG);
+            }
+                goto DONE;
+
+            case GT_CAST:
+
+                /* What are we casting from? */
+
+                switch (op1->gtType)
+                {
+                    case TYP_BOOL:
+                    case TYP_BYTE:
+                    case TYP_CHAR:
+                    case TYP_SHORT:
+                    case TYP_INT:
+                    case TYP_UBYTE:
+                    case TYP_BYREF:
+                    {
+                        regMaskTP hiRegMask;
+                        regMaskTP loRegMask;
+
+                        // For an unsigned cast we don't need to sign-extend the 32 bit value
+                        if (tree->gtFlags & GTF_UNSIGNED)
+                        {
+                            // Does needReg have exactly two bits on and thus
+                            // specifies the exact register pair that we want to use
+                            if (!genMaxOneBit(needReg))
+                            {
+                                regPair = regSet.rsFindRegPairNo(needReg);
+                                if (needReg != genRegPairMask(regPair))
+                                    goto ANY_FREE_REG_UNSIGNED;
+                                loRegMask = genRegMask(genRegPairLo(regPair));
+                                if ((loRegMask & regSet.rsRegMaskCanGrab()) == 0)
+                                    goto ANY_FREE_REG_UNSIGNED;
+                                hiRegMask = genRegMask(genRegPairHi(regPair));
+                            }
+                            else
+                            {
+                            ANY_FREE_REG_UNSIGNED:
+                                loRegMask = needReg;
+                                hiRegMask = needReg;
+                            }
+
+                            genComputeReg(op1, loRegMask, RegSet::ANY_REG, RegSet::KEEP_REG);
+                            noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+                            regLo     = op1->gtRegNum;
+                            loRegMask = genRegMask(regLo);
+                            regSet.rsLockUsedReg(loRegMask);
+                            regHi = regSet.rsPickReg(hiRegMask);
+                            regSet.rsUnlockUsedReg(loRegMask);
+
+                            regPair = gen2regs2pair(regLo, regHi);
+
+                            // Move 0 to the higher word of the ULong
+                            genSetRegToIcon(regHi, 0, TYP_INT);
+
+                            /* We can now free up the operand */
+                            genReleaseReg(op1);
+
+                            goto DONE;
+                        }
+#ifdef _TARGET_XARCH_
+                        /* Cast of 'int' to 'long' --> Use cdq if EAX,EDX are available
+                           and we need the result to be in those registers.
+                           cdq is smaller so we use it for SMALL_CODE
+                        */
+
+                        if ((needReg & (RBM_EAX | RBM_EDX)) == (RBM_EAX | RBM_EDX) &&
+                            (regSet.rsRegMaskFree() & RBM_EDX))
+                        {
+                            genCodeForTree(op1, RBM_EAX);
+                            regSet.rsMarkRegUsed(op1);
+
+                            /* If we have to spill EDX, might as well use the faster
+                               sar as the spill will increase code size anyway */
+
+                            if (op1->gtRegNum != REG_EAX || !(regSet.rsRegMaskFree() & RBM_EDX))
+                            {
+                                hiRegMask = regSet.rsRegMaskFree();
+                                goto USE_SAR_FOR_CAST;
+                            }
+
+                            regSet.rsGrabReg(RBM_EDX);
+                            regTracker.rsTrackRegTrash(REG_EDX);
+
+                            /* Convert the int in EAX into a long in EDX:EAX */
+
+                            instGen(INS_cdq);
+
+                            /* The result is in EDX:EAX */
+
+                            regPair = REG_PAIR_EAXEDX;
+                        }
+                        else
+#endif
+                        {
+                            /* use the sar instruction to sign-extend a 32-bit integer */
+
+                            // Does needReg have exactly two bits on and thus
+                            // specifies the exact register pair that we want to use
+                            if (!genMaxOneBit(needReg))
+                            {
+                                regPair = regSet.rsFindRegPairNo(needReg);
+                                if ((regPair == REG_PAIR_NONE) || (needReg != genRegPairMask(regPair)))
+                                    goto ANY_FREE_REG_SIGNED;
+                                loRegMask = genRegMask(genRegPairLo(regPair));
+                                if ((loRegMask & regSet.rsRegMaskCanGrab()) == 0)
+                                    goto ANY_FREE_REG_SIGNED;
+                                hiRegMask = genRegMask(genRegPairHi(regPair));
+                            }
+                            else
+                            {
+                            ANY_FREE_REG_SIGNED:
+                                loRegMask = needReg;
+                                hiRegMask = RBM_NONE;
+                            }
+
+                            genComputeReg(op1, loRegMask, RegSet::ANY_REG, RegSet::KEEP_REG);
+#ifdef _TARGET_XARCH_
+                        USE_SAR_FOR_CAST:
+#endif
+                            noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+                            regLo     = op1->gtRegNum;
+                            loRegMask = genRegMask(regLo);
+                            regSet.rsLockUsedReg(loRegMask);
+                            regHi = regSet.rsPickReg(hiRegMask);
+                            regSet.rsUnlockUsedReg(loRegMask);
+
+                            regPair = gen2regs2pair(regLo, regHi);
+
+#ifdef _TARGET_ARM_
+                            /* Copy the lo32 bits from regLo to regHi and sign-extend it */
+                            // Use one instruction instead of two
+                            getEmitter()->emitIns_R_R_I(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, regLo, 31);
+#else
+                            /* Copy the lo32 bits from regLo to regHi and sign-extend it */
+                            inst_RV_RV(INS_mov, regHi, regLo, TYP_INT);
+                            inst_RV_SH(INS_SHIFT_RIGHT_ARITHM, EA_4BYTE, regHi, 31);
+#endif
+
+                            /* The value in the upper register is trashed */
+
+                            regTracker.rsTrackRegTrash(regHi);
+                        }
+
+                        /* We can now free up the operand */
+                        genReleaseReg(op1);
+
+                        // conv.ovf.u8 could overflow if the original number was negative
+                        if (tree->gtOverflow() && TYP_ULONG == tree->CastToType())
+                        {
+                            regNumber hiReg = genRegPairHi(regPair);
+                            instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
+                            emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
+                            genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
+                        }
+                    }
+                        goto DONE;
+
+                    case TYP_FLOAT:
+                    case TYP_DOUBLE:
+
+#if 0
+                /* Load the FP value onto the coprocessor stack */
+
+                genCodeForTreeFlt(op1);
+
+                /* Allocate a temp for the long value */
+
+                temp = compiler->tmpGetTemp(TYP_LONG);
+
+                /* Store the FP value into the temp */
+
+                inst_FS_ST(INS_fistpl, sizeof(int), temp, 0);
+                genFPstkLevel--;
+
+                /* Pick a register pair for the value */
+
+                regPair  = regSet.rsPickRegPair(needReg);
+
+                /* Figure out which registers the value is in */
+
+                regLo = genRegPairLo(regPair);
+                regHi = genRegPairHi(regPair);
+
+                /* The value in the register pair is about to be trashed */
+
+                regTracker.rsTrackRegTrash(regLo);
+                regTracker.rsTrackRegTrash(regHi);
+
+                /* Load the converted value into the registers */
+
+                inst_RV_ST(INS_mov, EA_4BYTE, regLo, temp, 0);
+                inst_RV_ST(INS_mov, EA_4BYTE, regHi, temp, 4);
+
+                /* We no longer need the temp */
+
+                compiler->tmpRlsTemp(temp);
+                goto DONE;
+#else
+                        NO_WAY("Cast from TYP_FLOAT or TYP_DOUBLE supposed to be done via a helper call");
+                        break;
+#endif
+                    case TYP_LONG:
+                    case TYP_ULONG:
+                    {
+                        noway_assert(tree->gtOverflow()); // conv.ovf.u8 or conv.ovf.i8
+
+                        genComputeRegPair(op1, REG_PAIR_NONE, RBM_ALLINT & ~needReg, RegSet::FREE_REG);
+                        regPair = op1->gtRegPair;
+
+                        // Do we need to set the sign-flag, or can we checked if it is set?
+                        // and not do this "test" if so.
+
+                        if (op1->gtFlags & GTF_REG_VAL)
+                        {
+                            regNumber hiReg = genRegPairHi(op1->gtRegPair);
+                            noway_assert(hiReg != REG_STK);
+                            instGen_Compare_Reg_To_Zero(EA_4BYTE, hiReg); // set flags
+                        }
+                        else
+                        {
+                            inst_TT_IV(INS_cmp, op1, 0, sizeof(int));
+                        }
+
+                        emitJumpKind jmpLTS = genJumpKindForOper(GT_LT, CK_SIGNED);
+                        genJumpToThrowHlpBlk(jmpLTS, SCK_OVERFLOW);
+                    }
+                        goto DONE;
+
+                    default:
+#ifdef DEBUG
+                        compiler->gtDispTree(tree);
+#endif
+                        NO_WAY("unexpected cast to long");
+                }
+                break;
+
+            case GT_RETURN:
+
+                /* TODO:
+                 * This code is cloned from the regular processing of GT_RETURN values.  We have to remember to
+                 * call genPInvokeMethodEpilog anywhere that we have a GT_RETURN statement.  We should really
+                 * generate trees for the PInvoke prolog and epilog so we can remove these special cases.
+                 */
+
+                // TODO: this should be done AFTER we called exit mon so that
+                //       we are sure that we don't have to keep 'this' alive
+
+                if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
+                {
+                    /* either it's an "empty" statement or the return statement
+                       of a synchronized method
+                     */
+
+                    genPInvokeMethodEpilog();
+                }
+
+#if CPU_LONG_USES_REGPAIR
+                /* There must be a long return value */
+
+                noway_assert(op1);
+
+                /* Evaluate the return value into EDX:EAX */
+
+                genEvalIntoFreeRegPair(op1, REG_LNGRET, avoidReg);
+
+                noway_assert(op1->gtFlags & GTF_REG_VAL);
+                noway_assert(op1->gtRegPair == REG_LNGRET);
+
+#else
+                NYI("64-bit return");
+#endif
+
+#ifdef PROFILING_SUPPORTED
+                // The profiling hook does not trash registers, so it's safe to call after we emit the code for
+                // the GT_RETURN tree.
+
+                if (compiler->compCurBB == compiler->genReturnBB)
+                {
+                    genProfilingLeaveCallback();
+                }
+#endif
+                return;
+
+            case GT_QMARK:
+                noway_assert(!"inliner-generated ?: for longs NYI");
+                NO_WAY("inliner-generated ?: for longs NYI");
+                break;
+
+            case GT_COMMA:
+
+                if (tree->gtFlags & GTF_REVERSE_OPS)
+                {
+                    // Generate op2
+                    genCodeForTreeLng(op2, needReg, avoidReg);
+                    genUpdateLife(op2);
+
+                    noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+                    regSet.rsMarkRegPairUsed(op2);
+
+                    // Do side effects of op1
+                    genEvalSideEffects(op1);
+
+                    // Recover op2 if spilled
+                    genRecoverRegPair(op2, REG_PAIR_NONE, RegSet::KEEP_REG);
+
+                    genReleaseRegPair(op2);
+
+                    genUpdateLife(tree);
+
+                    regPair = op2->gtRegPair;
+                }
+                else
+                {
+                    noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
+
+                    /* Generate side effects of the first operand */
+
+                    genEvalSideEffects(op1);
+                    genUpdateLife(op1);
+
+                    /* Is the value of the second operand used? */
+
+                    if (tree->gtType == TYP_VOID)
+                    {
+                        /* The right operand produces no result */
+
+                        genEvalSideEffects(op2);
+                        genUpdateLife(tree);
+                        return;
+                    }
+
+                    /* Generate the second operand, i.e. the 'real' value */
+
+                    genCodeForTreeLng(op2, needReg, avoidReg);
+
+                    /* The result of 'op2' is also the final result */
+
+                    regPair = op2->gtRegPair;
+                }
+
+                goto DONE;
+
+            case GT_BOX:
+            {
+                /* Generate the  operand, i.e. the 'real' value */
+
+                genCodeForTreeLng(op1, needReg, avoidReg);
+
+                /* The result of 'op1' is also the final result */
+
+                regPair = op1->gtRegPair;
+            }
+
+                goto DONE;
+
+            case GT_NOP:
+                if (op1 == NULL)
+                    return;
+
+                genCodeForTreeLng(op1, needReg, avoidReg);
+                regPair = op1->gtRegPair;
+                goto DONE;
+
+            default:
+                break;
+        }
+
+#ifdef DEBUG
+        compiler->gtDispTree(tree);
+#endif
+        noway_assert(!"unexpected 64-bit operator");
+    }
+
+    /* See what kind of a special operator we have here */
+
+    switch (oper)
+    {
+        regMaskTP retMask;
+        case GT_CALL:
+            retMask = genCodeForCall(tree, true);
+            if (retMask == RBM_NONE)
+                regPair = REG_PAIR_NONE;
+            else
+                regPair = regSet.rsFindRegPairNo(retMask);
+            break;
+
+        default:
+#ifdef DEBUG
+            compiler->gtDispTree(tree);
+#endif
+            NO_WAY("unexpected long operator");
+    }
+
+DONE:
+
+    genUpdateLife(tree);
+
+    /* Here we've computed the value of 'tree' into 'regPair' */
+
+    noway_assert(regPair != DUMMY_INIT(REG_PAIR_CORRUPT));
+
+    genMarkTreeInRegPair(tree, regPair);
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************
+ *
+ *  Generate code for a mod of a long by an int.
+ */
+
+regPairNo CodeGen::genCodeForLongModInt(GenTreePtr tree, regMaskTP needReg)
+{
+#ifdef _TARGET_X86_
+
+    regPairNo regPair;
+    regMaskTP addrReg;
+
+    genTreeOps oper = tree->OperGet();
+    GenTreePtr op1  = tree->gtOp.gtOp1;
+    GenTreePtr op2  = tree->gtOp.gtOp2;
+
+    /* Codegen only for Unsigned MOD */
+    noway_assert(oper == GT_UMOD);
+
+    /* op2 must be a long constant in the range 2 to 0x3fffffff */
+
+    noway_assert((op2->gtOper == GT_CNS_LNG) && (op2->gtLngCon.gtLconVal >= 2) &&
+                 (op2->gtLngCon.gtLconVal <= 0x3fffffff));
+    int val = (int)op2->gtLngCon.gtLconVal;
+
+    op2->ChangeOperConst(GT_CNS_INT); // it's effectively an integer constant
+
+    op2->gtType             = TYP_INT;
+    op2->gtIntCon.gtIconVal = val;
+
+    /* Which operand are we supposed to compute first? */
+
+    if (tree->gtFlags & GTF_REVERSE_OPS)
+    {
+        /* Compute the second operand into a scratch register, other
+           than EAX or EDX */
+
+        needReg = regSet.rsMustExclude(needReg, RBM_PAIR_TMP);
+
+        /* Special case: if op2 is a local var we are done */
+
+        if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD || op2->gtOper == GT_CLS_VAR)
+        {
+            addrReg = genMakeRvalueAddressable(op2, needReg, RegSet::KEEP_REG, false);
+        }
+        else
+        {
+            genComputeReg(op2, needReg, RegSet::ANY_REG, RegSet::KEEP_REG);
+
+            noway_assert(op2->gtFlags & GTF_REG_VAL);
+            addrReg = genRegMask(op2->gtRegNum);
+        }
+
+        /* Compute the first operand into EAX:EDX */
+
+        genComputeRegPair(op1, REG_PAIR_TMP, RBM_NONE, RegSet::KEEP_REG, true);
+        noway_assert(op1->gtFlags & GTF_REG_VAL);
+        noway_assert(op1->gtRegPair == REG_PAIR_TMP);
+
+        /* And recover the second argument while locking the first one */
+
+        addrReg = genKeepAddressable(op2, addrReg, RBM_PAIR_TMP);
+    }
+    else
+    {
+        /* Compute the first operand into EAX:EDX */
+
+        genComputeRegPair(op1, REG_PAIR_EAXEDX, RBM_NONE, RegSet::KEEP_REG, true);
+        noway_assert(op1->gtFlags & GTF_REG_VAL);
+        noway_assert(op1->gtRegPair == REG_PAIR_TMP);
+
+        /* Compute the second operand into a scratch register, other
+           than EAX or EDX */
+
+        needReg = regSet.rsMustExclude(needReg, RBM_PAIR_TMP);
+
+        /* Special case: if op2 is a local var we are done */
+
+        if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_LCL_FLD || op2->gtOper == GT_CLS_VAR)
+        {
+            addrReg = genMakeRvalueAddressable(op2, needReg, RegSet::KEEP_REG, false);
+        }
+        else
+        {
+            genComputeReg(op2, needReg, RegSet::ANY_REG, RegSet::KEEP_REG);
+
+            noway_assert(op2->gtFlags & GTF_REG_VAL);
+            addrReg = genRegMask(op2->gtRegNum);
+        }
+
+        /* Recover the first argument */
+
+        genRecoverRegPair(op1, REG_PAIR_EAXEDX, RegSet::KEEP_REG);
+
+        /* And recover the second argument while locking the first one */
+
+        addrReg = genKeepAddressable(op2, addrReg, RBM_PAIR_TMP);
+    }
+
+    /* At this point, EAX:EDX contains the 64bit dividend and op2->gtRegNum
+       contains the 32bit divisor.  We want to generate the following code:
+
+       ==========================
+       Unsigned (GT_UMOD)
+
+       cmp edx, op2->gtRegNum
+       jb  lab_no_overflow
+
+       mov temp, eax
+       mov eax, edx
+       xor edx, edx
+       div op2->g2RegNum
+       mov eax, temp
+
+       lab_no_overflow:
+       idiv
+       ==========================
+       This works because (a * 2^32 + b) % c = ((a % c) * 2^32 + b) % c
+    */
+
+    BasicBlock* lab_no_overflow = genCreateTempLabel();
+
+    // grab a temporary register other than eax, edx, and op2->gtRegNum
+
+    regNumber tempReg = regSet.rsGrabReg(RBM_ALLINT & ~(RBM_PAIR_TMP | genRegMask(op2->gtRegNum)));
+
+    // EAX and tempReg will be trashed by the mov instructions.  Doing
+    // this early won't hurt, and might prevent confusion in genSetRegToIcon.
+
+    regTracker.rsTrackRegTrash(REG_PAIR_TMP_LO);
+    regTracker.rsTrackRegTrash(tempReg);
+
+    inst_RV_RV(INS_cmp, REG_PAIR_TMP_HI, op2->gtRegNum);
+    inst_JMP(EJ_jb, lab_no_overflow);
+
+    inst_RV_RV(INS_mov, tempReg, REG_PAIR_TMP_LO, TYP_INT);
+    inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, REG_PAIR_TMP_HI, TYP_INT);
+    genSetRegToIcon(REG_PAIR_TMP_HI, 0, TYP_INT);
+    inst_TT(INS_UNSIGNED_DIVIDE, op2);
+    inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, tempReg, TYP_INT);
+
+    // Jump point for no overflow divide
+
+    genDefineTempLabel(lab_no_overflow);
+
+    // Issue the divide instruction
+
+    inst_TT(INS_UNSIGNED_DIVIDE, op2);
+
+    /* EAX, EDX, tempReg and op2->gtRegNum are now trashed */
+
+    regTracker.rsTrackRegTrash(REG_PAIR_TMP_LO);
+    regTracker.rsTrackRegTrash(REG_PAIR_TMP_HI);
+    regTracker.rsTrackRegTrash(tempReg);
+    regTracker.rsTrackRegTrash(op2->gtRegNum);
+
+    if (tree->gtFlags & GTF_MOD_INT_RESULT)
+    {
+        /* We don't need to normalize the result, because the caller wants
+           an int (in edx) */
+
+        regPair = REG_PAIR_TMP_REVERSE;
+    }
+    else
+    {
+        /* The result is now in EDX, we now have to normalize it, i.e. we have
+           to issue:
+           mov eax, edx; xor edx, edx (for UMOD)
+        */
+
+        inst_RV_RV(INS_mov, REG_PAIR_TMP_LO, REG_PAIR_TMP_HI, TYP_INT);
+
+        genSetRegToIcon(REG_PAIR_TMP_HI, 0, TYP_INT);
+
+        regPair = REG_PAIR_TMP;
+    }
+
+    genReleaseRegPair(op1);
+    genDoneAddressable(op2, addrReg, RegSet::KEEP_REG);
+
+    return regPair;
+
+#else // !_TARGET_X86_
+
+    NYI("codegen for LongModInt");
+
+    return REG_PAIR_NONE;
+
+#endif // !_TARGET_X86_
+}
+
+// Given a tree, return the number of registers that are currently
+// used to hold integer enregistered local variables.
+// Note that, an enregistered TYP_LONG can take 1 or 2 registers.
+unsigned CodeGen::genRegCountForLiveIntEnregVars(GenTreePtr tree)
+{
+    unsigned regCount = 0;
+
+    VARSET_ITER_INIT(compiler, iter, compiler->compCurLife, varNum);
+    while (iter.NextElem(compiler, &varNum))
+    {
+        unsigned   lclNum = compiler->lvaTrackedToVarNum[varNum];
+        LclVarDsc* varDsc = &compiler->lvaTable[lclNum];
+
+        if (varDsc->lvRegister && !varTypeIsFloating(varDsc->TypeGet()))
+        {
+            ++regCount;
+
+            if (varTypeIsLong(varDsc->TypeGet()))
+            {
+                // For enregistered LONG/ULONG, the lower half should always be in a register.
+                noway_assert(varDsc->lvRegNum != REG_STK);
+
+                // If the LONG/ULONG is NOT paritally enregistered, then the higher half should be in a register as
+                // well.
+                if (varDsc->lvOtherReg != REG_STK)
+                {
+                    ++regCount;
+                }
+            }
+        }
+    }
+
+    return regCount;
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+#if CPU_HAS_FP_SUPPORT
+/*****************************************************************************
+ *
+ *  Generate code for a floating-point operation.
+ */
+
+void CodeGen::genCodeForTreeFlt(GenTreePtr tree,
+                                regMaskTP  needReg, /* = RBM_ALLFLOAT */
+                                regMaskTP  bestReg) /* = RBM_NONE */
+{
+    genCodeForTreeFloat(tree, needReg, bestReg);
+
+    if (tree->OperGet() == GT_RETURN)
+    {
+        // Make sure to get ALL THE EPILOG CODE
+
+        // TODO: this should be done AFTER we called exit mon so that
+        //       we are sure that we don't have to keep 'this' alive
+
+        if (compiler->info.compCallUnmanaged && (compiler->compCurBB == compiler->genReturnBB))
+        {
+            /* either it's an "empty" statement or the return statement
+               of a synchronized method
+             */
+
+            genPInvokeMethodEpilog();
+        }
+
+#ifdef PROFILING_SUPPORTED
+        // The profiling hook does not trash registers, so it's safe to call after we emit the code for
+        // the GT_RETURN tree.
+
+        if (compiler->compCurBB == compiler->genReturnBB)
+        {
+            genProfilingLeaveCallback();
+        }
+#endif
+    }
+}
+
+/*****************************************************************************/
+#endif // CPU_HAS_FP_SUPPORT
+
+/*****************************************************************************
+ *
+ *  Generate a table switch - the switch value (0-based) is in register 'reg'.
+ */
+
+void CodeGen::genTableSwitch(regNumber reg, unsigned jumpCnt, BasicBlock** jumpTab)
+{
+    unsigned jmpTabBase;
+
+    if (jumpCnt == 1)
+    {
+        // In debug code, we don't optimize away the trivial switch statements.  So we can get here with a
+        // BBJ_SWITCH with only a default case.  Therefore, don't generate the switch table.
+        noway_assert(compiler->opts.MinOpts() || compiler->opts.compDbgCode);
+        inst_JMP(EJ_jmp, jumpTab[0]);
+        return;
+    }
+
+    noway_assert(jumpCnt >= 2);
+
+    /* Is the number of cases right for a test and jump switch? */
+
+    const bool fFirstCaseFollows = (compiler->compCurBB->bbNext == jumpTab[0]);
+    const bool fDefaultFollows   = (compiler->compCurBB->bbNext == jumpTab[jumpCnt - 1]);
+    const bool fHaveScratchReg   = ((regSet.rsRegMaskFree() & genRegMask(reg)) != 0);
+
+    unsigned minSwitchTabJumpCnt = 2; // table is better than just 2 cmp/jcc
+
+    // This means really just a single cmp/jcc (aka a simple if/else)
+    if (fFirstCaseFollows || fDefaultFollows)
+        minSwitchTabJumpCnt++;
+
+#ifdef _TARGET_ARM_
+    // On the ARM for small switch tables we will
+    // generate a sequence of compare and branch instructions
+    // because the code to load the base of the switch
+    // table is huge and hideous due to the relocation... :(
+    //
+    minSwitchTabJumpCnt++;
+    if (fHaveScratchReg)
+        minSwitchTabJumpCnt++;
+
+#endif // _TARGET_ARM_
+
+    if (jumpCnt < minSwitchTabJumpCnt)
+    {
+        /* Does the first case label follow? */
+        emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+
+        if (fFirstCaseFollows)
+        {
+            /* Check for the default case */
+            inst_RV_IV(INS_cmp, reg, jumpCnt - 1, EA_4BYTE);
+            emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
+            inst_JMP(jmpGEU, jumpTab[jumpCnt - 1]);
+
+            /* No need to jump to the first case */
+
+            jumpCnt -= 2;
+            jumpTab += 1;
+
+            /* Generate a series of "dec reg; jmp label" */
+
+            // Make sure that we can trash the register so
+            // that we can generate a series of compares and jumps
+            //
+            if ((jumpCnt > 0) && !fHaveScratchReg)
+            {
+                regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT);
+                inst_RV_RV(INS_mov, tmpReg, reg);
+                regTracker.rsTrackRegTrash(tmpReg);
+                reg = tmpReg;
+            }
+
+            while (jumpCnt > 0)
+            {
+                inst_RV_IV(INS_sub, reg, 1, EA_4BYTE, INS_FLAGS_SET);
+                inst_JMP(jmpEqual, *jumpTab++);
+                jumpCnt--;
+            }
+        }
+        else
+        {
+            /* Check for case0 first */
+            instGen_Compare_Reg_To_Zero(EA_4BYTE, reg); // set flags
+            inst_JMP(jmpEqual, *jumpTab);
+
+            /* No need to jump to the first case or the default */
+
+            jumpCnt -= 2;
+            jumpTab += 1;
+
+            /* Generate a series of "dec reg; jmp label" */
+
+            // Make sure that we can trash the register so
+            // that we can generate a series of compares and jumps
+            //
+            if ((jumpCnt > 0) && !fHaveScratchReg)
+            {
+                regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT);
+                inst_RV_RV(INS_mov, tmpReg, reg);
+                regTracker.rsTrackRegTrash(tmpReg);
+                reg = tmpReg;
+            }
+
+            while (jumpCnt > 0)
+            {
+                inst_RV_IV(INS_sub, reg, 1, EA_4BYTE, INS_FLAGS_SET);
+                inst_JMP(jmpEqual, *jumpTab++);
+                jumpCnt--;
+            }
+
+            if (!fDefaultFollows)
+            {
+                inst_JMP(EJ_jmp, *jumpTab);
+            }
+        }
+
+        if ((fFirstCaseFollows || fDefaultFollows) &&
+            compiler->fgInDifferentRegions(compiler->compCurBB, compiler->compCurBB->bbNext))
+        {
+            inst_JMP(EJ_jmp, compiler->compCurBB->bbNext);
+        }
+
+        return;
+    }
+
+    /* First take care of the default case */
+
+    inst_RV_IV(INS_cmp, reg, jumpCnt - 1, EA_4BYTE);
+    emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
+    inst_JMP(jmpGEU, jumpTab[jumpCnt - 1]);
+
+    /* Generate the jump table contents */
+
+    jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCnt - 1, false);
+
+#ifdef DEBUG
+    if (compiler->opts.dspCode)
+        printf("\n      J_M%03u_DS%02u LABEL   DWORD\n", Compiler::s_compMethodsCount, jmpTabBase);
+#endif
+
+    for (unsigned index = 0; index < jumpCnt - 1; index++)
+    {
+        BasicBlock* target = jumpTab[index];
+
+        noway_assert(target->bbFlags & BBF_JMP_TARGET);
+
+#ifdef DEBUG
+        if (compiler->opts.dspCode)
+            printf("            DD      L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, target->bbNum);
+#endif
+
+        getEmitter()->emitDataGenData(index, target);
+    }
+
+    getEmitter()->emitDataGenEnd();
+
+#ifdef _TARGET_ARM_
+    // We need to load the address of the table into a register.
+    // The data section might get placed a long distance away, so we
+    // can't safely do a PC-relative ADR. :(
+    // Pick any register except the index register.
+    //
+    regNumber regTabBase = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
+    getEmitter()->emitIns_R_D(INS_movw, EA_HANDLE_CNS_RELOC, jmpTabBase, regTabBase);
+    getEmitter()->emitIns_R_D(INS_movt, EA_HANDLE_CNS_RELOC, jmpTabBase, regTabBase);
+    regTracker.rsTrackRegTrash(regTabBase);
+
+    // LDR PC, [regTableBase + reg * 4] (encoded as LDR PC, [regTableBase, reg, LSL 2]
+    getEmitter()->emitIns_R_ARX(INS_ldr, EA_PTRSIZE, REG_PC, regTabBase, reg, TARGET_POINTER_SIZE, 0);
+
+#else // !_TARGET_ARM_
+
+    getEmitter()->emitIns_IJ(EA_4BYTE_DSP_RELOC, reg, jmpTabBase);
+
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Generate code for a switch statement.
+ */
+
+void CodeGen::genCodeForSwitch(GenTreePtr tree)
+{
+    unsigned     jumpCnt;
+    BasicBlock** jumpTab;
+
+    GenTreePtr oper;
+    regNumber  reg;
+
+    noway_assert(tree->gtOper == GT_SWITCH);
+    oper = tree->gtOp.gtOp1;
+    noway_assert(genActualTypeIsIntOrI(oper->gtType));
+
+    /* Get hold of the jump table */
+
+    noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH);
+
+    jumpCnt = compiler->compCurBB->bbJumpSwt->bbsCount;
+    jumpTab = compiler->compCurBB->bbJumpSwt->bbsDstTab;
+
+    /* Compute the switch value into some register */
+
+    genCodeForTree(oper, 0);
+
+    /* Get hold of the register the value is in */
+
+    noway_assert(oper->gtFlags & GTF_REG_VAL);
+    reg = oper->gtRegNum;
+
+#if FEATURE_STACK_FP_X87
+    if (!compCurFPState.IsEmpty())
+    {
+        return genTableSwitchStackFP(reg, jumpCnt, jumpTab);
+    }
+    else
+#endif // FEATURE_STACK_FP_X87
+    {
+        return genTableSwitch(reg, jumpCnt, jumpTab);
+    }
+}
+
+/*****************************************************************************/
+/*****************************************************************************
+ *  Emit a call to a helper function.
+ */
+
+// inline
+void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize)
+{
+    // Can we call the helper function directly
+
+    void *addr = NULL, **pAddr = NULL;
+
+#if defined(_TARGET_ARM_) && defined(DEBUG) && defined(PROFILING_SUPPORTED)
+    // Don't ask VM if it hasn't requested ELT hooks
+    if (!compiler->compProfilerHookNeeded && compiler->opts.compJitELTHookEnabled &&
+        (helper == CORINFO_HELP_PROF_FCN_ENTER || helper == CORINFO_HELP_PROF_FCN_LEAVE ||
+         helper == CORINFO_HELP_PROF_FCN_TAILCALL))
+    {
+        addr = compiler->compProfilerMethHnd;
+    }
+    else
+#endif
+    {
+        addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, (void**)&pAddr);
+    }
+
+#ifdef _TARGET_ARM_
+    if (!addr || !arm_Valid_Imm_For_BL((ssize_t)addr))
+    {
+        // Load the address into a register and call  through a register
+        regNumber indCallReg =
+            regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection
+        if (addr)
+        {
+            instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
+        }
+        else
+        {
+            getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)pAddr);
+            regTracker.rsTrackRegTrash(indCallReg);
+        }
+
+        getEmitter()->emitIns_Call(emitter::EC_INDIR_R, compiler->eeFindHelper(helper),
+                                   INDEBUG_LDISASM_COMMA(nullptr) NULL, // addr
+                                   argSize, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+                                   gcInfo.gcRegByrefSetCur,
+                                   BAD_IL_OFFSET, // ilOffset
+                                   indCallReg,    // ireg
+                                   REG_NA, 0, 0,  // xreg, xmul, disp
+                                   false,         // isJump
+                                   emitter::emitNoGChelper(helper),
+                                   (CorInfoHelpFunc)helper == CORINFO_HELP_PROF_FCN_LEAVE);
+    }
+    else
+    {
+        getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, compiler->eeFindHelper(helper),
+                                   INDEBUG_LDISASM_COMMA(nullptr) addr, argSize, retSize, gcInfo.gcVarPtrSetCur,
+                                   gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, REG_NA, REG_NA, 0,
+                                   0,     /* ilOffset, ireg, xreg, xmul, disp */
+                                   false, /* isJump */
+                                   emitter::emitNoGChelper(helper),
+                                   (CorInfoHelpFunc)helper == CORINFO_HELP_PROF_FCN_LEAVE);
+    }
+#else
+
+    {
+        emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
+
+        if (!addr)
+        {
+            callType = emitter::EC_FUNC_TOKEN_INDIR;
+            addr     = pAddr;
+        }
+
+        getEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr,
+                                   argSize, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+                                   gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, REG_NA, REG_NA, 0,
+                                   0,     /* ilOffset, ireg, xreg, xmul, disp */
+                                   false, /* isJump */
+                                   emitter::emitNoGChelper(helper));
+    }
+#endif
+
+    regTracker.rsTrashRegSet(RBM_CALLEE_TRASH);
+    regTracker.rsTrashRegsForGCInterruptability();
+}
+
+/*****************************************************************************
+ *
+ *  Push the given registers.
+ *  This function does not check if the register is marked as used, etc.
+ */
+
+regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* noRefRegs)
+{
+    *byrefRegs = RBM_NONE;
+    *noRefRegs = RBM_NONE;
+
+    // noway_assert((regs & regSet.rsRegMaskFree()) == regs); // Don't care. Caller is responsible for all this
+
+    if (regs == RBM_NONE)
+        return RBM_NONE;
+
+#if FEATURE_FIXED_OUT_ARGS
+
+    NYI("Don't call genPushRegs with real regs!");
+    return RBM_NONE;
+
+#else // FEATURE_FIXED_OUT_ARGS
+
+    noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_I_IMPL));
+    noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_I_IMPL));
+
+    regMaskTP pushedRegs = regs;
+
+    for (regNumber reg = REG_INT_FIRST; regs != RBM_NONE; reg = REG_NEXT(reg))
+    {
+        regMaskTP regBit = regMaskTP(1) << reg;
+
+        if ((regBit & regs) == RBM_NONE)
+            continue;
+
+        var_types type;
+        if (regBit & gcInfo.gcRegGCrefSetCur)
+        {
+            type = TYP_REF;
+        }
+        else if (regBit & gcInfo.gcRegByrefSetCur)
+        {
+            *byrefRegs |= regBit;
+            type = TYP_BYREF;
+        }
+        else if (noRefRegs != NULL)
+        {
+            *noRefRegs |= regBit;
+            type = TYP_I_IMPL;
+        }
+        else
+        {
+            continue;
+        }
+
+        inst_RV(INS_push, reg, type);
+
+        genSinglePush();
+        gcInfo.gcMarkRegSetNpt(regBit);
+
+        regs &= ~regBit;
+    }
+
+    return pushedRegs;
+
+#endif // FEATURE_FIXED_OUT_ARGS
+}
+
+/*****************************************************************************
+ *
+ * Pop the registers pushed by genPushRegs()
+ */
+
+void CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefRegs)
+{
+    if (regs == RBM_NONE)
+        return;
+
+#if FEATURE_FIXED_OUT_ARGS
+
+    NYI("Don't call genPopRegs with real regs!");
+
+#else // FEATURE_FIXED_OUT_ARGS
+
+    noway_assert((regs & byrefRegs) == byrefRegs);
+    noway_assert((regs & noRefRegs) == noRefRegs);
+    // noway_assert((regs & regSet.rsRegMaskFree()) == regs); // Don't care. Caller is responsible for all this
+    noway_assert((regs & (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur)) == RBM_NONE);
+
+    noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_INT));
+    noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_INT));
+
+    // Walk the registers in the reverse order as genPushRegs()
+    for (regNumber reg = REG_INT_LAST; regs != RBM_NONE; reg = REG_PREV(reg))
+    {
+        regMaskTP regBit = regMaskTP(1) << reg;
+
+        if ((regBit & regs) == RBM_NONE)
+            continue;
+
+        var_types type;
+        if (regBit & byrefRegs)
+        {
+            type = TYP_BYREF;
+        }
+        else if (regBit & noRefRegs)
+        {
+            type = TYP_INT;
+        }
+        else
+        {
+            type = TYP_REF;
+        }
+
+        inst_RV(INS_pop, reg, type);
+        genSinglePop();
+
+        if (type != TYP_INT)
+            gcInfo.gcMarkRegPtrVal(reg, type);
+
+        regs &= ~regBit;
+    }
+
+#endif // FEATURE_FIXED_OUT_ARGS
+}
+
+/*****************************************************************************
+ *
+ *  Push the given argument list, right to left; returns the total amount of
+ *  stuff pushed.
+ */
+
+#if !FEATURE_FIXED_OUT_ARGS
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+size_t CodeGen::genPushArgList(GenTreePtr call)
+{
+    GenTreeArgList* regArgs = call->gtCall.gtCallLateArgs;
+    size_t          size    = 0;
+    regMaskTP       addrReg;
+
+    GenTreeArgList* args;
+    // Create a local, artificial GenTreeArgList that includes the gtCallObjp, if that exists, as first argument,
+    // so we can iterate over this argument list more uniformly.
+    // Need to provide a temporary non-null first argument here: if we use this, we'll replace it.
+    GenTreeArgList firstForObjp(/*temp dummy arg*/ call, call->gtCall.gtCallArgs);
+    if (call->gtCall.gtCallObjp == NULL)
+    {
+        args = call->gtCall.gtCallArgs;
+    }
+    else
+    {
+        firstForObjp.Current() = call->gtCall.gtCallObjp;
+        args                   = &firstForObjp;
+    }
+
+    GenTreePtr curr;
+    var_types  type;
+    size_t     opsz;
+
+    for (; args; args = args->Rest())
+    {
+        addrReg = DUMMY_INIT(RBM_CORRUPT); // to detect uninitialized use
+
+        /* Get hold of the next argument value */
+        curr = args->Current();
+
+        if (curr->IsArgPlaceHolderNode())
+        {
+            assert(curr->gtFlags & GTF_LATE_ARG);
+
+            addrReg = 0;
+            continue;
+        }
+
+        // If we have a comma expression, eval the non-last, then deal with the last.
+        if (!(curr->gtFlags & GTF_LATE_ARG))
+            curr = genCodeForCommaTree(curr);
+
+        /* See what type of a value we're passing */
+        type = curr->TypeGet();
+
+        opsz = genTypeSize(genActualType(type));
+
+        switch (type)
+        {
+            case TYP_BOOL:
+            case TYP_BYTE:
+            case TYP_SHORT:
+            case TYP_CHAR:
+            case TYP_UBYTE:
+
+                /* Don't want to push a small value, make it a full word */
+
+                genCodeForTree(curr, 0);
+
+                __fallthrough; // now the value should be in a register ...
+
+            case TYP_INT:
+            case TYP_REF:
+            case TYP_BYREF:
+#if !CPU_HAS_FP_SUPPORT
+            case TYP_FLOAT:
+#endif
+
+                if (curr->gtFlags & GTF_LATE_ARG)
+                {
+                    assert(curr->gtOper == GT_ASG);
+                    /* one more argument will be passed in a register */
+                    noway_assert(intRegState.rsCurRegArgNum < MAX_REG_ARG);
+
+                    /* arg is passed in the register, nothing on the stack */
+
+                    opsz = 0;
+                }
+
+                /* Is this value a handle? */
+
+                if (curr->gtOper == GT_CNS_INT && curr->IsIconHandle())
+                {
+                    /* Emit a fixup for the push instruction */
+
+                    inst_IV_handle(INS_push, curr->gtIntCon.gtIconVal);
+                    genSinglePush();
+
+                    addrReg = 0;
+                    break;
+                }
+
+                /* Is the value a constant? */
+
+                if (curr->gtOper == GT_CNS_INT)
+                {
+
+#if REDUNDANT_LOAD
+                    regNumber reg = regTracker.rsIconIsInReg(curr->gtIntCon.gtIconVal);
+
+                    if (reg != REG_NA)
+                    {
+                        inst_RV(INS_push, reg, TYP_INT);
+                    }
+                    else
+#endif
+                    {
+                        inst_IV(INS_push, curr->gtIntCon.gtIconVal);
+                    }
+
+                    /* If the type is TYP_REF, then this must be a "null". So we can
+                       treat it as a TYP_INT as we don't need to report it as a GC ptr */
+
+                    noway_assert(curr->TypeGet() == TYP_INT ||
+                                 (varTypeIsGC(curr->TypeGet()) && curr->gtIntCon.gtIconVal == 0));
+
+                    genSinglePush();
+
+                    addrReg = 0;
+                    break;
+                }
+
+                if (curr->gtFlags & GTF_LATE_ARG)
+                {
+                    /* This must be a register arg temp assignment */
+
+                    noway_assert(curr->gtOper == GT_ASG);
+
+                    /* Evaluate it to the temp */
+
+                    genCodeForTree(curr, 0);
+
+                    /* Increment the current argument register counter */
+
+                    intRegState.rsCurRegArgNum++;
+
+                    addrReg = 0;
+                }
+                else
+                {
+                    /* This is a 32-bit integer non-register argument */
+
+                    addrReg = genMakeRvalueAddressable(curr, 0, RegSet::KEEP_REG, false);
+                    inst_TT(INS_push, curr);
+                    genSinglePush();
+                    genDoneAddressable(curr, addrReg, RegSet::KEEP_REG);
+                }
+                break;
+
+            case TYP_LONG:
+#if !CPU_HAS_FP_SUPPORT
+            case TYP_DOUBLE:
+#endif
+
+                /* Is the value a constant? */
+
+                if (curr->gtOper == GT_CNS_LNG)
+                {
+                    inst_IV(INS_push, (int)(curr->gtLngCon.gtLconVal >> 32));
+                    genSinglePush();
+                    inst_IV(INS_push, (int)(curr->gtLngCon.gtLconVal));
+                    genSinglePush();
+
+                    addrReg = 0;
+                }
+                else
+                {
+                    addrReg = genMakeAddressable(curr, 0, RegSet::FREE_REG);
+
+                    inst_TT(INS_push, curr, sizeof(int));
+                    genSinglePush();
+                    inst_TT(INS_push, curr);
+                    genSinglePush();
+                }
+                break;
+
+#if CPU_HAS_FP_SUPPORT
+            case TYP_FLOAT:
+            case TYP_DOUBLE:
+#endif
+#if FEATURE_STACK_FP_X87
+                addrReg = genPushArgumentStackFP(curr);
+#else
+                NYI("FP codegen");
+                addrReg = 0;
+#endif
+                break;
+
+            case TYP_VOID:
+
+                /* Is this a nothing node, deferred register argument? */
+
+                if (curr->gtFlags & GTF_LATE_ARG)
+                {
+                    GenTree* arg = curr;
+                    if (arg->gtOper == GT_COMMA)
+                    {
+                        while (arg->gtOper == GT_COMMA)
+                        {
+                            GenTreePtr op1 = arg->gtOp.gtOp1;
+                            genEvalSideEffects(op1);
+                            genUpdateLife(op1);
+                            arg = arg->gtOp.gtOp2;
+                        }
+                        if (!arg->IsNothingNode())
+                        {
+                            genEvalSideEffects(arg);
+                            genUpdateLife(arg);
+                        }
+                    }
+
+                    /* increment the register count and continue with the next argument */
+
+                    intRegState.rsCurRegArgNum++;
+
+                    noway_assert(opsz == 0);
+
+                    addrReg = 0;
+                    break;
+                }
+
+                __fallthrough;
+
+            case TYP_STRUCT:
+            {
+                GenTree* arg = curr;
+                while (arg->gtOper == GT_COMMA)
+                {
+                    GenTreePtr op1 = arg->gtOp.gtOp1;
+                    genEvalSideEffects(op1);
+                    genUpdateLife(op1);
+                    arg = arg->gtOp.gtOp2;
+                }
+
+                noway_assert(arg->gtOper == GT_OBJ || arg->gtOper == GT_MKREFANY || arg->gtOper == GT_IND);
+                noway_assert((arg->gtFlags & GTF_REVERSE_OPS) == 0);
+                noway_assert(addrReg == DUMMY_INIT(RBM_CORRUPT));
+
+                if (arg->gtOper == GT_MKREFANY)
+                {
+                    GenTreePtr op1 = arg->gtOp.gtOp1;
+                    GenTreePtr op2 = arg->gtOp.gtOp2;
+
+                    addrReg = genMakeAddressable(op1, RBM_NONE, RegSet::KEEP_REG);
+
+                    /* Is this value a handle? */
+                    if (op2->gtOper == GT_CNS_INT && op2->IsIconHandle())
+                    {
+                        /* Emit a fixup for the push instruction */
+
+                        inst_IV_handle(INS_push, op2->gtIntCon.gtIconVal);
+                        genSinglePush();
+                    }
+                    else
+                    {
+                        regMaskTP addrReg2 = genMakeRvalueAddressable(op2, 0, RegSet::KEEP_REG, false);
+                        inst_TT(INS_push, op2);
+                        genSinglePush();
+                        genDoneAddressable(op2, addrReg2, RegSet::KEEP_REG);
+                    }
+                    addrReg = genKeepAddressable(op1, addrReg);
+                    inst_TT(INS_push, op1);
+                    genSinglePush();
+                    genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+                    opsz = 2 * TARGET_POINTER_SIZE;
+                }
+                else
+                {
+                    noway_assert(arg->gtOper == GT_OBJ);
+
+                    if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
+                    {
+                        GenTreePtr structLocalTree = arg->gtObj.gtOp1->gtOp.gtOp1;
+                        unsigned   structLclNum    = structLocalTree->gtLclVarCommon.gtLclNum;
+                        LclVarDsc* varDsc          = &compiler->lvaTable[structLclNum];
+
+                        // As much as we would like this to be a noway_assert, we can't because
+                        // there are some weird casts out there, and backwards compatiblity
+                        // dictates we do *NOT* start rejecting them now. lvaGetPromotion and
+                        // lvPromoted in general currently do not require the local to be
+                        // TYP_STRUCT, so this assert is really more about how we wish the world
+                        // was then some JIT invariant.
+                        assert((structLocalTree->TypeGet() == TYP_STRUCT) || compiler->compUnsafeCastUsed);
+
+                        Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
+
+                        if (varDsc->lvPromoted &&
+                            promotionType ==
+                                Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is guaranteed to live on stack.
+                        {
+                            assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
+
+                            addrReg = 0;
+
+                            // Get the number of BYTES to copy to the stack
+                            opsz = roundUp(compiler->info.compCompHnd->getClassSize(arg->gtObj.gtClass), sizeof(void*));
+                            size_t bytesToBeCopied = opsz;
+
+                            // postponedFields is true if we have any postponed fields
+                            //   Any field that does not start on a 4-byte boundary is a postponed field
+                            //   Such a field is required to be a short or a byte
+                            //
+                            // postponedRegKind records the kind of scratch register we will
+                            //   need to process the postponed fields
+                            //   RBM_NONE means that we don't need a register
+                            //
+                            // expectedAlignedOffset records the aligned offset that
+                            //   has to exist for a push to cover the postponed fields.
+                            //   Since all promoted structs have the tightly packed property
+                            //   we are guaranteed that we will have such a push
+                            //
+                            bool      postponedFields       = false;
+                            regMaskTP postponedRegKind      = RBM_NONE;
+                            size_t    expectedAlignedOffset = UINT_MAX;
+
+                            VARSET_TP* deadVarBits = NULL;
+                            compiler->GetPromotedStructDeathVars()->Lookup(structLocalTree, &deadVarBits);
+
+                            // Reverse loop, starts pushing from the end of the struct (i.e. the highest field offset)
+                            //
+                            for (int varNum = varDsc->lvFieldLclStart + varDsc->lvFieldCnt - 1;
+                                 varNum >= (int)varDsc->lvFieldLclStart; varNum--)
+                            {
+                                LclVarDsc* fieldVarDsc = compiler->lvaTable + varNum;
+#ifdef DEBUG
+                                if (fieldVarDsc->lvExactSize == 2 * sizeof(unsigned))
+                                {
+                                    noway_assert(fieldVarDsc->lvFldOffset % (2 * sizeof(unsigned)) == 0);
+                                    noway_assert(fieldVarDsc->lvFldOffset + (2 * sizeof(unsigned)) == bytesToBeCopied);
+                                }
+#endif
+                                // Whenever we see a stack-aligned fieldVarDsc then we use 4-byte push instruction(s)
+                                // For packed structs we will go back and store the unaligned bytes and shorts
+                                // in the next loop
+                                //
+                                if (fieldVarDsc->lvStackAligned())
+                                {
+                                    if (fieldVarDsc->lvExactSize != 2 * sizeof(unsigned) &&
+                                        fieldVarDsc->lvFldOffset + sizeof(void*) != bytesToBeCopied)
+                                    {
+                                        // Might need 4-bytes paddings for fields other than LONG and DOUBLE.
+                                        // Just push some junk (i.e EAX) on the stack.
+                                        inst_RV(INS_push, REG_EAX, TYP_INT);
+                                        genSinglePush();
+
+                                        bytesToBeCopied -= sizeof(void*);
+                                    }
+
+                                    // If we have an expectedAlignedOffset make sure that this push instruction
+                                    // is what we expect to cover the postponedFields
+                                    //
+                                    if (expectedAlignedOffset != UINT_MAX)
+                                    {
+                                        // This push must be for a small field
+                                        noway_assert(fieldVarDsc->lvExactSize < 4);
+                                        // The fldOffset for this push should be equal to the expectedAlignedOffset
+                                        noway_assert(fieldVarDsc->lvFldOffset == expectedAlignedOffset);
+                                        expectedAlignedOffset = UINT_MAX;
+                                    }
+
+                                    // Push the "upper half" of LONG var first
+
+                                    if (isRegPairType(fieldVarDsc->lvType))
+                                    {
+                                        if (fieldVarDsc->lvOtherReg != REG_STK)
+                                        {
+                                            inst_RV(INS_push, fieldVarDsc->lvOtherReg, TYP_INT);
+                                            genSinglePush();
+
+                                            // Prepare the set of vars to be cleared from gcref/gcbyref set
+                                            // in case they become dead after genUpdateLife.
+                                            // genDoneAddressable() will remove dead gc vars by calling
+                                            // gcInfo.gcMarkRegSetNpt.
+                                            // Although it is not addrReg, we just borrow the name here.
+                                            addrReg |= genRegMask(fieldVarDsc->lvOtherReg);
+                                        }
+                                        else
+                                        {
+                                            getEmitter()->emitIns_S(INS_push, EA_4BYTE, varNum, sizeof(void*));
+                                            genSinglePush();
+                                        }
+
+                                        bytesToBeCopied -= sizeof(void*);
+                                    }
+
+                                    // Push the "upper half" of DOUBLE var if it is not enregistered.
+
+                                    if (fieldVarDsc->lvType == TYP_DOUBLE)
+                                    {
+                                        if (!fieldVarDsc->lvRegister)
+                                        {
+                                            getEmitter()->emitIns_S(INS_push, EA_4BYTE, varNum, sizeof(void*));
+                                            genSinglePush();
+                                        }
+
+                                        bytesToBeCopied -= sizeof(void*);
+                                    }
+
+                                    //
+                                    // Push the field local.
+                                    //
+
+                                    if (fieldVarDsc->lvRegister)
+                                    {
+                                        if (!varTypeIsFloating(genActualType(fieldVarDsc->TypeGet())))
+                                        {
+                                            inst_RV(INS_push, fieldVarDsc->lvRegNum,
+                                                    genActualType(fieldVarDsc->TypeGet()));
+                                            genSinglePush();
+
+                                            // Prepare the set of vars to be cleared from gcref/gcbyref set
+                                            // in case they become dead after genUpdateLife.
+                                            // genDoneAddressable() will remove dead gc vars by calling
+                                            // gcInfo.gcMarkRegSetNpt.
+                                            // Although it is not addrReg, we just borrow the name here.
+                                            addrReg |= genRegMask(fieldVarDsc->lvRegNum);
+                                        }
+                                        else
+                                        {
+                                            // Must be TYP_FLOAT or TYP_DOUBLE
+                                            noway_assert(fieldVarDsc->lvRegNum != REG_FPNONE);
+
+                                            noway_assert(fieldVarDsc->lvExactSize == sizeof(unsigned) ||
+                                                         fieldVarDsc->lvExactSize == 2 * sizeof(unsigned));
+
+                                            inst_RV_IV(INS_sub, REG_SPBASE, fieldVarDsc->lvExactSize, EA_PTRSIZE);
+
+                                            genSinglePush();
+                                            if (fieldVarDsc->lvExactSize == 2 * sizeof(unsigned))
+                                            {
+                                                genSinglePush();
+                                            }
+
+#if FEATURE_STACK_FP_X87
+                                            GenTree* fieldTree = new (compiler, GT_REG_VAR)
+                                                GenTreeLclVar(fieldVarDsc->lvType, varNum, BAD_IL_OFFSET);
+                                            fieldTree->gtOper            = GT_REG_VAR;
+                                            fieldTree->gtRegNum          = fieldVarDsc->lvRegNum;
+                                            fieldTree->gtRegVar.gtRegNum = fieldVarDsc->lvRegNum;
+                                            if ((arg->gtFlags & GTF_VAR_DEATH) != 0)
+                                            {
+                                                if (fieldVarDsc->lvTracked &&
+                                                    (deadVarBits == NULL ||
+                                                     VarSetOps::IsMember(compiler, *deadVarBits,
+                                                                         fieldVarDsc->lvVarIndex)))
+                                                {
+                                                    fieldTree->gtFlags |= GTF_VAR_DEATH;
+                                                }
+                                            }
+                                            genCodeForTreeStackFP_Leaf(fieldTree);
+
+                                            // Take reg to top of stack
+
+                                            FlatFPX87_MoveToTOS(&compCurFPState, fieldTree->gtRegNum);
+
+                                            // Pop it off to stack
+                                            compCurFPState.Pop();
+
+                                            getEmitter()->emitIns_AR_R(INS_fstp, EA_ATTR(fieldVarDsc->lvExactSize),
+                                                                       REG_NA, REG_SPBASE, 0);
+#else
+                                            NYI_FLAT_FP_X87("FP codegen");
+#endif
+                                        }
+                                    }
+                                    else
+                                    {
+                                        getEmitter()->emitIns_S(INS_push,
+                                                                (fieldVarDsc->TypeGet() == TYP_REF) ? EA_GCREF
+                                                                                                    : EA_4BYTE,
+                                                                varNum, 0);
+                                        genSinglePush();
+                                    }
+
+                                    bytesToBeCopied -= sizeof(void*);
+                                }
+                                else // not stack aligned
+                                {
+                                    noway_assert(fieldVarDsc->lvExactSize < 4);
+
+                                    // We will need to use a store byte or store word
+                                    // to set this unaligned location
+                                    postponedFields = true;
+
+                                    if (expectedAlignedOffset != UINT_MAX)
+                                    {
+                                        // This should never change until it is set back to UINT_MAX by an aligned
+                                        // offset
+                                        noway_assert(expectedAlignedOffset ==
+                                                     roundUp(fieldVarDsc->lvFldOffset, sizeof(void*)) - sizeof(void*));
+                                    }
+
+                                    expectedAlignedOffset =
+                                        roundUp(fieldVarDsc->lvFldOffset, sizeof(void*)) - sizeof(void*);
+
+                                    noway_assert(expectedAlignedOffset < bytesToBeCopied);
+
+                                    if (fieldVarDsc->lvRegister)
+                                    {
+                                        // Do we need to use a byte-able register?
+                                        if (fieldVarDsc->lvExactSize == 1)
+                                        {
+                                            // Did we enregister fieldVarDsc2 in a non byte-able register?
+                                            if ((genRegMask(fieldVarDsc->lvRegNum) & RBM_BYTE_REGS) == 0)
+                                            {
+                                                // then we will need to grab a byte-able register
+                                                postponedRegKind = RBM_BYTE_REGS;
+                                            }
+                                        }
+                                    }
+                                    else // not enregistered
+                                    {
+                                        if (fieldVarDsc->lvExactSize == 1)
+                                        {
+                                            // We will need to grab a byte-able register
+                                            postponedRegKind = RBM_BYTE_REGS;
+                                        }
+                                        else
+                                        {
+                                            // We will need to grab any scratch register
+                                            if (postponedRegKind != RBM_BYTE_REGS)
+                                                postponedRegKind = RBM_ALLINT;
+                                        }
+                                    }
+                                }
+                            }
+
+                            // Now we've pushed all of the aligned fields.
+                            //
+                            // We should have pushed bytes equal to the entire struct
+                            noway_assert(bytesToBeCopied == 0);
+
+                            // We should have seen a push that covers every postponed field
+                            noway_assert(expectedAlignedOffset == UINT_MAX);
+
+                            // Did we have any postponed fields?
+                            if (postponedFields)
+                            {
+                                regNumber regNum = REG_STK; // means no register
+
+                                // If we needed a scratch register then grab it here
+
+                                if (postponedRegKind != RBM_NONE)
+                                    regNum = regSet.rsGrabReg(postponedRegKind);
+
+                                // Forward loop, starts from the lowest field offset
+                                //
+                                for (unsigned varNum = varDsc->lvFieldLclStart;
+                                     varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; varNum++)
+                                {
+                                    LclVarDsc* fieldVarDsc = compiler->lvaTable + varNum;
+
+                                    // All stack aligned fields have already been pushed
+                                    if (fieldVarDsc->lvStackAligned())
+                                        continue;
+
+                                    // We have a postponed field
+
+                                    // It must be a byte or a short
+                                    noway_assert(fieldVarDsc->lvExactSize < 4);
+
+                                    // Is the field enregistered?
+                                    if (fieldVarDsc->lvRegister)
+                                    {
+                                        // Frequently we can just use that register
+                                        regNumber tmpRegNum = fieldVarDsc->lvRegNum;
+
+                                        // Do we need to use a byte-able register?
+                                        if (fieldVarDsc->lvExactSize == 1)
+                                        {
+                                            // Did we enregister the field in a non byte-able register?
+                                            if ((genRegMask(tmpRegNum) & RBM_BYTE_REGS) == 0)
+                                            {
+                                                // then we will need to use the byte-able register 'regNum'
+                                                noway_assert((genRegMask(regNum) & RBM_BYTE_REGS) != 0);
+
+                                                // Copy the register that contains fieldVarDsc into 'regNum'
+                                                getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, regNum,
+                                                                          fieldVarDsc->lvRegNum);
+                                                regTracker.rsTrackRegLclVar(regNum, varNum);
+
+                                                // tmpRegNum is the register that we will extract the byte value from
+                                                tmpRegNum = regNum;
+                                            }
+                                            noway_assert((genRegMask(tmpRegNum) & RBM_BYTE_REGS) != 0);
+                                        }
+
+                                        getEmitter()->emitIns_AR_R(ins_Store(fieldVarDsc->TypeGet()),
+                                                                   (emitAttr)fieldVarDsc->lvExactSize, tmpRegNum,
+                                                                   REG_SPBASE, fieldVarDsc->lvFldOffset);
+                                    }
+                                    else // not enregistered
+                                    {
+                                        // We will copy the non-enregister fieldVar into our scratch register 'regNum'
+
+                                        noway_assert(regNum != REG_STK);
+                                        getEmitter()->emitIns_R_S(ins_Load(fieldVarDsc->TypeGet()),
+                                                                  (emitAttr)fieldVarDsc->lvExactSize, regNum, varNum,
+                                                                  0);
+
+                                        regTracker.rsTrackRegLclVar(regNum, varNum);
+
+                                        // Store the value (byte or short) into the stack
+
+                                        getEmitter()->emitIns_AR_R(ins_Store(fieldVarDsc->TypeGet()),
+                                                                   (emitAttr)fieldVarDsc->lvExactSize, regNum,
+                                                                   REG_SPBASE, fieldVarDsc->lvFldOffset);
+                                    }
+                                }
+                            }
+                            genUpdateLife(structLocalTree);
+
+                            break;
+                        }
+                    }
+
+                    genCodeForTree(arg->gtObj.gtOp1, 0);
+                    noway_assert(arg->gtObj.gtOp1->gtFlags & GTF_REG_VAL);
+                    regNumber reg = arg->gtObj.gtOp1->gtRegNum;
+                    // Get the number of DWORDS to copy to the stack
+                    opsz = roundUp(compiler->info.compCompHnd->getClassSize(arg->gtObj.gtClass), sizeof(void*));
+                    unsigned slots = (unsigned)(opsz / sizeof(void*));
+
+                    BYTE* gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
+
+                    compiler->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
+
+                    BOOL bNoneGC = TRUE;
+                    for (int i = slots - 1; i >= 0; --i)
+                    {
+                        if (gcLayout[i] != TYPE_GC_NONE)
+                        {
+                            bNoneGC = FALSE;
+                            break;
+                        }
+                    }
+
+                    /* passing large structures using movq instead of pushes does not increase codesize very much */
+                    unsigned movqLenMin  = 8;
+                    unsigned movqLenMax  = 64;
+                    unsigned curBBweight = compiler->compCurBB->getBBWeight(compiler);
+
+                    if ((compiler->compCodeOpt() == Compiler::SMALL_CODE) || (curBBweight == BB_ZERO_WEIGHT))
+                    {
+                        // Don't bother with this optimization in
+                        // rarely run blocks or when optimizing for size
+                        movqLenMax = movqLenMin = 0;
+                    }
+                    else if (compiler->compCodeOpt() == Compiler::FAST_CODE)
+                    {
+                        // Be more aggressive when optimizing for speed
+                        movqLenMax *= 2;
+                    }
+
+                    /* Adjust for BB weight */
+                    if (curBBweight >= (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT) / 2)
+                    {
+                        // Be more aggressive when we are inside a loop
+                        movqLenMax *= 2;
+                    }
+
+                    if (compiler->opts.compCanUseSSE2 && bNoneGC && (opsz >= movqLenMin) && (opsz <= movqLenMax))
+                    {
+                        JITLOG_THIS(compiler, (LL_INFO10000,
+                                               "Using XMM instructions to pass %3d byte valuetype while compiling %s\n",
+                                               opsz, compiler->info.compFullName));
+
+                        int       stkDisp = (int)(unsigned)opsz;
+                        int       curDisp = 0;
+                        regNumber xmmReg  = REG_XMM0;
+
+                        if (opsz & 0x4)
+                        {
+                            stkDisp -= sizeof(void*);
+                            getEmitter()->emitIns_AR_R(INS_push, EA_4BYTE, REG_NA, reg, stkDisp);
+                            genSinglePush();
+                        }
+
+                        inst_RV_IV(INS_sub, REG_SPBASE, stkDisp, EA_PTRSIZE);
+                        genStackLevel += stkDisp;
+
+                        while (curDisp < stkDisp)
+                        {
+                            getEmitter()->emitIns_R_AR(INS_movq, EA_8BYTE, xmmReg, reg, curDisp);
+                            getEmitter()->emitIns_AR_R(INS_movq, EA_8BYTE, xmmReg, REG_SPBASE, curDisp);
+                            curDisp += 2 * sizeof(void*);
+                        }
+                        noway_assert(curDisp == stkDisp);
+                    }
+                    else
+                    {
+                        for (int i = slots - 1; i >= 0; --i)
+                        {
+                            emitAttr fieldSize;
+                            if (gcLayout[i] == TYPE_GC_NONE)
+                                fieldSize = EA_4BYTE;
+                            else if (gcLayout[i] == TYPE_GC_REF)
+                                fieldSize = EA_GCREF;
+                            else
+                            {
+                                noway_assert(gcLayout[i] == TYPE_GC_BYREF);
+                                fieldSize = EA_BYREF;
+                            }
+                            getEmitter()->emitIns_AR_R(INS_push, fieldSize, REG_NA, reg, i * sizeof(void*));
+                            genSinglePush();
+                        }
+                    }
+                    gcInfo.gcMarkRegSetNpt(genRegMask(reg)); // Kill the pointer in op1
+                }
+
+                addrReg = 0;
+                break;
+            }
+
+            default:
+                noway_assert(!"unhandled/unexpected arg type");
+                NO_WAY("unhandled/unexpected arg type");
+        }
+
+        /* Update the current set of live variables */
+
+        genUpdateLife(curr);
+
+        /* Update the current set of register pointers */
+
+        noway_assert(addrReg != DUMMY_INIT(RBM_CORRUPT));
+        genDoneAddressable(curr, addrReg, RegSet::FREE_REG);
+
+        /* Remember how much stuff we've pushed on the stack */
+
+        size += opsz;
+
+        /* Update the current argument stack offset */
+
+        /* Continue with the next argument, if any more are present */
+
+    } // while args
+
+    /* Move the deferred arguments to registers */
+
+    for (args = regArgs; args; args = args->Rest())
+    {
+        curr = args->Current();
+
+        assert(!curr->IsArgPlaceHolderNode()); // No place holders nodes are in the late args
+
+        fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
+        assert(curArgTabEntry);
+        regNumber regNum = curArgTabEntry->regNum;
+
+        noway_assert(isRegParamType(curr->TypeGet()));
+        noway_assert(curr->gtType != TYP_VOID);
+
+        /* Evaluate the argument to a register [pair] */
+
+        if (genTypeSize(genActualType(curr->TypeGet())) == sizeof(int))
+        {
+            /* Check if this is the guess area for the resolve interface call
+             * Pass a size of EA_OFFSET*/
+            if (curr->gtOper == GT_CLS_VAR && compiler->eeGetJitDataOffs(curr->gtClsVar.gtClsVarHnd) >= 0)
+            {
+                getEmitter()->emitIns_R_C(ins_Load(TYP_INT), EA_OFFSET, regNum, curr->gtClsVar.gtClsVarHnd, 0);
+                regTracker.rsTrackRegTrash(regNum);
+
+                /* The value is now in the appropriate register */
+
+                genMarkTreeInReg(curr, regNum);
+            }
+            else
+            {
+                genComputeReg(curr, genRegMask(regNum), RegSet::EXACT_REG, RegSet::FREE_REG, false);
+            }
+
+            noway_assert(curr->gtRegNum == regNum);
+
+            /* If the register is already marked as used, it will become
+               multi-used. However, since it is a callee-trashed register,
+               we will have to spill it before the call anyway. So do it now */
+
+            if (regSet.rsMaskUsed & genRegMask(regNum))
+            {
+                noway_assert(genRegMask(regNum) & RBM_CALLEE_TRASH);
+                regSet.rsSpillReg(regNum);
+            }
+
+            /* Mark the register as 'used' */
+
+            regSet.rsMarkRegUsed(curr);
+        }
+        else
+        {
+            noway_assert(!"UNDONE: Passing a TYP_STRUCT in register arguments");
+        }
+    }
+
+    /* If any of the previously loaded arguments were spilled - reload them */
+
+    for (args = regArgs; args; args = args->Rest())
+    {
+        curr = args->Current();
+        assert(curr);
+
+        if (curr->gtFlags & GTF_SPILLED)
+        {
+            if (isRegPairType(curr->gtType))
+            {
+                regSet.rsUnspillRegPair(curr, genRegPairMask(curr->gtRegPair), RegSet::KEEP_REG);
+            }
+            else
+            {
+                regSet.rsUnspillReg(curr, genRegMask(curr->gtRegNum), RegSet::KEEP_REG);
+            }
+        }
+    }
+
+    /* Return the total size pushed */
+
+    return size;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+#else // FEATURE_FIXED_OUT_ARGS
+
+//
+// ARM and AMD64 uses this method to pass the stack based args
+//
+// returns size pushed (always zero)
+size_t CodeGen::genPushArgList(GenTreePtr call)
+{
+
+    GenTreeArgList* lateArgs = call->gtCall.gtCallLateArgs;
+    GenTreePtr      curr;
+    var_types       type;
+    int             argSize;
+
+    GenTreeArgList* args;
+    // Create a local, artificial GenTreeArgList that includes the gtCallObjp, if that exists, as first argument,
+    // so we can iterate over this argument list more uniformly.
+    // Need to provide a temporary non-null first argument here: if we use this, we'll replace it.
+    GenTreeArgList objpArgList(/*temp dummy arg*/ call, call->gtCall.gtCallArgs);
+    if (call->gtCall.gtCallObjp == NULL)
+    {
+        args = call->gtCall.gtCallArgs;
+    }
+    else
+    {
+        objpArgList.Current() = call->gtCall.gtCallObjp;
+        args                  = &objpArgList;
+    }
+
+    for (; args; args = args->Rest())
+    {
+        /* Get hold of the next argument value */
+        curr = args->Current();
+
+        fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
+        assert(curArgTabEntry);
+        regNumber regNum    = curArgTabEntry->regNum;
+        int       argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
+
+        /* See what type of a value we're passing */
+        type = curr->TypeGet();
+
+        if ((type == TYP_STRUCT) && (curr->gtOper == GT_ASG))
+        {
+            type = TYP_VOID;
+        }
+
+        // This holds the set of registers corresponding to enregistered promoted struct field variables
+        // that go dead after this use of the variable in the argument list.
+        regMaskTP deadFieldVarRegs = RBM_NONE;
+
+        argSize = TARGET_POINTER_SIZE; // The default size for an arg is one pointer-sized item
+
+        if (curr->IsArgPlaceHolderNode())
+        {
+            assert(curr->gtFlags & GTF_LATE_ARG);
+            goto DEFERRED;
+        }
+
+        if (varTypeIsSmall(type))
+        {
+            // Normalize 'type', it represents the item that we will be storing in the Outgoing Args
+            type = TYP_I_IMPL;
+        }
+
+        switch (type)
+        {
+
+            case TYP_DOUBLE:
+            case TYP_LONG:
+
+#if defined(_TARGET_ARM_)
+
+                argSize = (TARGET_POINTER_SIZE * 2);
+
+                /* Is the value a constant? */
+
+                if (curr->gtOper == GT_CNS_LNG)
+                {
+                    assert((curr->gtFlags & GTF_LATE_ARG) == 0);
+
+                    int hiVal = (int)(curr->gtLngCon.gtLconVal >> 32);
+                    int loVal = (int)(curr->gtLngCon.gtLconVal & 0xffffffff);
+
+                    instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, loVal, compiler->lvaOutgoingArgSpaceVar, argOffset);
+
+                    instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, hiVal, compiler->lvaOutgoingArgSpaceVar,
+                                               argOffset + 4);
+
+                    break;
+                }
+                else
+                {
+                    genCodeForTree(curr, 0);
+
+                    if (curr->gtFlags & GTF_LATE_ARG)
+                    {
+                        // The arg was assigned into a temp and
+                        // will be moved to the correct register or slot later
+
+                        argSize = 0; // nothing is passed on the stack
+                    }
+                    else
+                    {
+                        // The arg is passed in the outgoing argument area of the stack frame
+                        //
+                        assert(curr->gtOper != GT_ASG);      // GTF_LATE_ARG should be set if this is the case
+                        assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCodeForTree(curr, 0)
+
+                        if (type == TYP_LONG)
+                        {
+                            regNumber regLo = genRegPairLo(curr->gtRegPair);
+                            regNumber regHi = genRegPairHi(curr->gtRegPair);
+
+                            assert(regLo != REG_STK);
+                            inst_SA_RV(ins_Store(TYP_INT), argOffset, regLo, TYP_INT);
+                            if (regHi == REG_STK)
+                            {
+                                regHi = regSet.rsPickFreeReg();
+                                inst_RV_TT(ins_Load(TYP_INT), regHi, curr, 4);
+                                regTracker.rsTrackRegTrash(regHi);
+                            }
+                            inst_SA_RV(ins_Store(TYP_INT), argOffset + 4, regHi, TYP_INT);
+                        }
+                        else // (type == TYP_DOUBLE)
+                        {
+                            inst_SA_RV(ins_Store(type), argOffset, curr->gtRegNum, type);
+                        }
+                    }
+                }
+                break;
+
+#elif defined(_TARGET_64BIT_)
+                __fallthrough;
+#else
+#error "Unknown target for passing TYP_LONG argument using FIXED_ARGS"
+#endif
+
+            case TYP_REF:
+            case TYP_BYREF:
+
+            case TYP_FLOAT:
+            case TYP_INT:
+                /* Is the value a constant? */
+
+                if (curr->gtOper == GT_CNS_INT)
+                {
+                    assert(!(curr->gtFlags & GTF_LATE_ARG));
+
+#if REDUNDANT_LOAD
+                    regNumber reg = regTracker.rsIconIsInReg(curr->gtIntCon.gtIconVal);
+
+                    if (reg != REG_NA)
+                    {
+                        inst_SA_RV(ins_Store(type), argOffset, reg, type);
+                    }
+                    else
+#endif
+                    {
+                        bool     needReloc = compiler->opts.compReloc && curr->IsIconHandle();
+                        emitAttr attr      = needReloc ? EA_HANDLE_CNS_RELOC : emitTypeSize(type);
+                        instGen_Store_Imm_Into_Lcl(type, attr, curr->gtIntCon.gtIconVal,
+                                                   compiler->lvaOutgoingArgSpaceVar, argOffset);
+                    }
+                    break;
+                }
+
+                /* This is passed as a pointer-sized integer argument */
+
+                genCodeForTree(curr, 0);
+
+                // The arg has been evaluated now, but will be put in a register or pushed on the stack later.
+                if (curr->gtFlags & GTF_LATE_ARG)
+                {
+#ifdef _TARGET_ARM_
+                    argSize = 0; // nothing is passed on the stack
+#endif
+                }
+                else
+                {
+                    // The arg is passed in the outgoing argument area of the stack frame
+
+                    assert(curr->gtOper != GT_ASG);      // GTF_LATE_ARG should be set if this is the case
+                    assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCodeForTree(curr, 0)
+                    inst_SA_RV(ins_Store(type), argOffset, curr->gtRegNum, type);
+
+                    if ((genRegMask(curr->gtRegNum) & regSet.rsMaskUsed) == 0)
+                        gcInfo.gcMarkRegSetNpt(genRegMask(curr->gtRegNum));
+                }
+                break;
+
+            case TYP_VOID:
+                /* Is this a nothing node, deferred register argument? */
+
+                if (curr->gtFlags & GTF_LATE_ARG)
+                {
+                /* Handle side-effects */
+                DEFERRED:
+                    if (curr->OperIsCopyBlkOp() || curr->OperGet() == GT_COMMA)
+                    {
+#ifdef _TARGET_ARM_
+                        {
+                            GenTreePtr curArgNode    = curArgTabEntry->node;
+                            var_types  curRegArgType = curArgNode->gtType;
+                            assert(curRegArgType != TYP_UNDEF);
+
+                            if (curRegArgType == TYP_STRUCT)
+                            {
+                                // If the RHS of the COPYBLK is a promoted struct local, then the use of that
+                                // is an implicit use of all its field vars.  If these are last uses, remember that,
+                                // so we can later update the GC compiler->info.
+                                if (curr->OperIsCopyBlkOp())
+                                    deadFieldVarRegs |= genFindDeadFieldRegs(curr);
+                            }
+                        }
+#endif // _TARGET_ARM_
+
+                        genCodeForTree(curr, 0);
+                    }
+                    else
+                    {
+                        assert(curr->IsArgPlaceHolderNode() || curr->IsNothingNode());
+                    }
+
+#if defined(_TARGET_ARM_)
+                    argSize = curArgTabEntry->numSlots * TARGET_POINTER_SIZE;
+#endif
+                }
+                else
+                {
+                    for (GenTree* arg = curr; arg->gtOper == GT_COMMA; arg = arg->gtOp.gtOp2)
+                    {
+                        GenTreePtr op1 = arg->gtOp.gtOp1;
+
+                        genEvalSideEffects(op1);
+                        genUpdateLife(op1);
+                    }
+                }
+                break;
+
+#ifdef _TARGET_ARM_
+
+            case TYP_STRUCT:
+            {
+                GenTree* arg = curr;
+                while (arg->gtOper == GT_COMMA)
+                {
+                    GenTreePtr op1 = arg->gtOp.gtOp1;
+                    genEvalSideEffects(op1);
+                    genUpdateLife(op1);
+                    arg = arg->gtOp.gtOp2;
+                }
+                noway_assert((arg->OperGet() == GT_OBJ) || (arg->OperGet() == GT_MKREFANY));
+
+                CORINFO_CLASS_HANDLE clsHnd;
+                unsigned             argAlign;
+                unsigned             slots;
+                BYTE*                gcLayout = NULL;
+
+                // If the struct being passed is a OBJ of a local struct variable that is promoted (in the
+                // INDEPENDENT fashion, which doesn't require writes to be written through to the variable's
+                // home stack loc) "promotedStructLocalVarDesc" will be set to point to the local variable
+                // table entry for the promoted struct local.  As we fill slots with the contents of a
+                // promoted struct, "bytesOfNextSlotOfCurPromotedStruct" will be the number of filled bytes
+                // that indicate another filled slot, and "nextPromotedStructFieldVar" will be the local
+                // variable number of the next field variable to be copied.
+                LclVarDsc* promotedStructLocalVarDesc           = NULL;
+                GenTreePtr structLocalTree                      = NULL;
+                unsigned   bytesOfNextSlotOfCurPromotedStruct   = TARGET_POINTER_SIZE; // Size of slot.
+                unsigned   nextPromotedStructFieldVar           = BAD_VAR_NUM;
+                unsigned   promotedStructOffsetOfFirstStackSlot = 0;
+                unsigned   argOffsetOfFirstStackSlot            = UINT32_MAX; // Indicates uninitialized.
+
+                if (arg->OperGet() == GT_OBJ)
+                {
+                    clsHnd                = arg->gtObj.gtClass;
+                    unsigned originalSize = compiler->info.compCompHnd->getClassSize(clsHnd);
+                    argAlign =
+                        roundUp(compiler->info.compCompHnd->getClassAlignmentRequirement(clsHnd), TARGET_POINTER_SIZE);
+                    argSize = (unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE));
+
+                    slots = (unsigned)(argSize / TARGET_POINTER_SIZE);
+
+                    gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
+
+                    compiler->info.compCompHnd->getClassGClayout(clsHnd, gcLayout);
+
+                    // Are we loading a promoted struct local var?
+                    if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
+                    {
+                        structLocalTree         = arg->gtObj.gtOp1->gtOp.gtOp1;
+                        unsigned   structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
+                        LclVarDsc* varDsc       = &compiler->lvaTable[structLclNum];
+
+                        // As much as we would like this to be a noway_assert, we can't because
+                        // there are some weird casts out there, and backwards compatiblity
+                        // dictates we do *NOT* start rejecting them now. lvaGetPromotion and
+                        // lvPromoted in general currently do not require the local to be
+                        // TYP_STRUCT, so this assert is really more about how we wish the world
+                        // was then some JIT invariant.
+                        assert((structLocalTree->TypeGet() == TYP_STRUCT) || compiler->compUnsafeCastUsed);
+
+                        Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
+
+                        if (varDsc->lvPromoted &&
+                            promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is guaranteed to live
+                                                                                   // on stack.
+                        {
+                            assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
+                            promotedStructLocalVarDesc = varDsc;
+                            nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
+                        }
+                    }
+                }
+                else
+                {
+                    noway_assert(arg->OperGet() == GT_MKREFANY);
+
+                    clsHnd   = NULL;
+                    argAlign = TARGET_POINTER_SIZE;
+                    argSize  = 2 * TARGET_POINTER_SIZE;
+                    slots    = 2;
+                }
+
+                // Any TYP_STRUCT argument that is passed in registers must be moved over to the LateArg list
+                noway_assert(regNum == REG_STK);
+
+                // This code passes a TYP_STRUCT by value using the outgoing arg space var
+                //
+                if (arg->OperGet() == GT_OBJ)
+                {
+                    regNumber regSrc = REG_STK;
+                    regNumber regTmp = REG_STK; // This will get set below if the obj is not of a promoted struct local.
+                    int       cStackSlots = 0;
+
+                    if (promotedStructLocalVarDesc == NULL)
+                    {
+                        genComputeReg(arg->gtObj.gtOp1, 0, RegSet::ANY_REG, RegSet::KEEP_REG);
+                        noway_assert(arg->gtObj.gtOp1->gtFlags & GTF_REG_VAL);
+                        regSrc = arg->gtObj.gtOp1->gtRegNum;
+                    }
+
+                    // The number of bytes to add "argOffset" to get the arg offset of the current slot.
+                    int extraArgOffset = 0;
+
+                    for (unsigned i = 0; i < slots; i++)
+                    {
+                        emitAttr fieldSize;
+                        if (gcLayout[i] == TYPE_GC_NONE)
+                            fieldSize = EA_PTRSIZE;
+                        else if (gcLayout[i] == TYPE_GC_REF)
+                            fieldSize = EA_GCREF;
+                        else
+                        {
+                            noway_assert(gcLayout[i] == TYPE_GC_BYREF);
+                            fieldSize = EA_BYREF;
+                        }
+
+                        // Pass the argument using the lvaOutgoingArgSpaceVar
+
+                        if (promotedStructLocalVarDesc != NULL)
+                        {
+                            if (argOffsetOfFirstStackSlot == UINT32_MAX)
+                                argOffsetOfFirstStackSlot = argOffset;
+
+                            regNumber maxRegArg       = regNumber(MAX_REG_ARG);
+                            bool      filledExtraSlot = genFillSlotFromPromotedStruct(
+                                arg, curArgTabEntry, promotedStructLocalVarDesc, fieldSize, &nextPromotedStructFieldVar,
+                                &bytesOfNextSlotOfCurPromotedStruct,
+                                /*pCurRegNum*/ &maxRegArg,
+                                /*argOffset*/ argOffset + extraArgOffset,
+                                /*fieldOffsetOfFirstStackSlot*/ promotedStructOffsetOfFirstStackSlot,
+                                argOffsetOfFirstStackSlot, &deadFieldVarRegs, &regTmp);
+                            extraArgOffset += TARGET_POINTER_SIZE;
+                            // If we filled an extra slot with an 8-byte value, skip a slot.
+                            if (filledExtraSlot)
+                            {
+                                i++;
+                                cStackSlots++;
+                                extraArgOffset += TARGET_POINTER_SIZE;
+                            }
+                        }
+                        else
+                        {
+                            if (regTmp == REG_STK)
+                            {
+                                regTmp = regSet.rsPickFreeReg();
+                            }
+
+                            getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), fieldSize, regTmp, regSrc,
+                                                       i * TARGET_POINTER_SIZE);
+
+                            getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
+                                                      compiler->lvaOutgoingArgSpaceVar,
+                                                      argOffset + cStackSlots * TARGET_POINTER_SIZE);
+                            regTracker.rsTrackRegTrash(regTmp);
+                        }
+                        cStackSlots++;
+                    }
+
+                    if (promotedStructLocalVarDesc == NULL)
+                    {
+                        regSet.rsMarkRegFree(genRegMask(regSrc));
+                    }
+                    if (structLocalTree != NULL)
+                        genUpdateLife(structLocalTree);
+                }
+                else
+                {
+                    assert(arg->OperGet() == GT_MKREFANY);
+                    PushMkRefAnyArg(arg, curArgTabEntry, RBM_ALLINT);
+                    argSize = (curArgTabEntry->numSlots * TARGET_POINTER_SIZE);
+                }
+            }
+            break;
+#endif // _TARGET_ARM_
+
+            default:
+                assert(!"unhandled/unexpected arg type");
+                NO_WAY("unhandled/unexpected arg type");
+        }
+
+        /* Update the current set of live variables */
+
+        genUpdateLife(curr);
+
+        // Now, if some copied field locals were enregistered, and they're now dead, update the set of
+        // register holding gc pointers.
+        if (deadFieldVarRegs != 0)
+            gcInfo.gcMarkRegSetNpt(deadFieldVarRegs);
+
+        /* Update the current argument stack offset */
+
+        argOffset += argSize;
+
+        /* Continue with the next argument, if any more are present */
+    } // while (args)
+
+    if (lateArgs)
+    {
+        SetupLateArgs(call);
+    }
+
+    /* Return the total size pushed */
+
+    return 0;
+}
+
+#ifdef _TARGET_ARM_
+bool CodeGen::genFillSlotFromPromotedStruct(GenTreePtr       arg,
+                                            fgArgTabEntryPtr curArgTabEntry,
+                                            LclVarDsc*       promotedStructLocalVarDesc,
+                                            emitAttr         fieldSize,
+                                            unsigned*        pNextPromotedStructFieldVar,
+                                            unsigned*        pBytesOfNextSlotOfCurPromotedStruct,
+                                            regNumber*       pCurRegNum,
+                                            int              argOffset,
+                                            int              fieldOffsetOfFirstStackSlot,
+                                            int              argOffsetOfFirstStackSlot,
+                                            regMaskTP*       deadFieldVarRegs,
+                                            regNumber*       pRegTmp)
+{
+    unsigned nextPromotedStructFieldVar = *pNextPromotedStructFieldVar;
+    unsigned limitPromotedStructFieldVar =
+        promotedStructLocalVarDesc->lvFieldLclStart + promotedStructLocalVarDesc->lvFieldCnt;
+    unsigned bytesOfNextSlotOfCurPromotedStruct = *pBytesOfNextSlotOfCurPromotedStruct;
+
+    regNumber curRegNum       = *pCurRegNum;
+    regNumber regTmp          = *pRegTmp;
+    bool      filledExtraSlot = false;
+
+    if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
+    {
+        // We've already finished; just return.
+        // We can reach this because the calling loop computes a # of slots based on the size of the struct.
+        // If the struct has padding at the end because of alignment (say, long/int), then we'll get a call for
+        // the fourth slot, even though we've copied all the fields.
+        return false;
+    }
+
+    LclVarDsc* fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
+
+    // Does this field fill an entire slot, and does it go at the start of the slot?
+    // If so, things are easier...
+
+    bool oneFieldFillsSlotFromStart =
+        (fieldVarDsc->lvFldOffset < bytesOfNextSlotOfCurPromotedStruct) // The field should start in the current slot...
+        && ((fieldVarDsc->lvFldOffset % 4) == 0)                        // at the start of the slot, and...
+        && (nextPromotedStructFieldVar + 1 ==
+                limitPromotedStructFieldVar // next field, if there is one, goes in the next slot.
+            || compiler->lvaTable[nextPromotedStructFieldVar + 1].lvFldOffset >= bytesOfNextSlotOfCurPromotedStruct);
+
+    // Compute the proper size.
+    if (fieldSize == EA_4BYTE) // Not a GC ref or byref.
+    {
+        switch (fieldVarDsc->lvExactSize)
+        {
+            case 1:
+                fieldSize = EA_1BYTE;
+                break;
+            case 2:
+                fieldSize = EA_2BYTE;
+                break;
+            case 8:
+                // An 8-byte field will be at an 8-byte-aligned offset unless explicit layout has been used,
+                // in which case we should not have promoted the struct variable.
+                noway_assert((fieldVarDsc->lvFldOffset % 8) == 0);
+
+                // If the current reg number is not aligned, align it, and return to the calling loop, which will
+                // consider that a filled slot and move on to the next argument register.
+                if (curRegNum != MAX_REG_ARG && ((curRegNum % 2) != 0))
+                {
+                    // We must update the slot target, however!
+                    bytesOfNextSlotOfCurPromotedStruct += 4;
+                    *pBytesOfNextSlotOfCurPromotedStruct = bytesOfNextSlotOfCurPromotedStruct;
+                    return false;
+                }
+                // Dest is an aligned pair of arg regs, if the struct type demands it.
+                noway_assert((curRegNum % 2) == 0);
+                // We leave the fieldSize as EA_4BYTE; but we must do 2 reg moves.
+                break;
+            default:
+                assert(fieldVarDsc->lvExactSize == 4);
+                break;
+        }
+    }
+    else
+    {
+        // If the gc layout said it's a GC ref or byref, then the field size must be 4.
+        noway_assert(fieldVarDsc->lvExactSize == 4);
+    }
+
+    // We may need the type of the field to influence instruction selection.
+    // If we have a TYP_LONG we can use TYP_I_IMPL and we do two loads/stores
+    // If the fieldVarDsc is enregistered float we must use the field's exact type
+    // however if it is in memory we can use an integer type TYP_I_IMPL
+    //
+    var_types fieldTypeForInstr = var_types(fieldVarDsc->lvType);
+    if ((fieldVarDsc->lvType == TYP_LONG) || (!fieldVarDsc->lvRegister && varTypeIsFloating(fieldTypeForInstr)))
+    {
+        fieldTypeForInstr = TYP_I_IMPL;
+    }
+
+    // If we have a HFA, then it is a much simpler deal -- HFAs are completely enregistered.
+    if (curArgTabEntry->isHfaRegArg)
+    {
+        assert(oneFieldFillsSlotFromStart);
+
+        // Is the field variable promoted?
+        if (fieldVarDsc->lvRegister)
+        {
+            // Move the field var living in register to dst, if they are different registers.
+            regNumber srcReg = fieldVarDsc->lvRegNum;
+            regNumber dstReg = curRegNum;
+            if (srcReg != dstReg)
+            {
+                inst_RV_RV(ins_Copy(fieldVarDsc->TypeGet()), dstReg, srcReg, fieldVarDsc->TypeGet());
+                assert(genIsValidFloatReg(dstReg)); // we don't use register tracking for FP
+            }
+        }
+        else
+        {
+            // Move the field var living in stack to dst.
+            getEmitter()->emitIns_R_S(ins_Load(fieldVarDsc->TypeGet()),
+                                      fieldVarDsc->TypeGet() == TYP_DOUBLE ? EA_8BYTE : EA_4BYTE, curRegNum,
+                                      nextPromotedStructFieldVar, 0);
+            assert(genIsValidFloatReg(curRegNum)); // we don't use register tracking for FP
+        }
+
+        // Mark the arg as used and using reg val.
+        genMarkTreeInReg(arg, curRegNum);
+        regSet.SetUsedRegFloat(arg, true);
+
+        // Advance for double.
+        if (fieldVarDsc->TypeGet() == TYP_DOUBLE)
+        {
+            bytesOfNextSlotOfCurPromotedStruct += 4;
+            curRegNum     = REG_NEXT(curRegNum);
+            arg->gtRegNum = curRegNum;
+            regSet.SetUsedRegFloat(arg, true);
+            filledExtraSlot = true;
+        }
+        arg->gtRegNum = curArgTabEntry->regNum;
+
+        // Advance.
+        bytesOfNextSlotOfCurPromotedStruct += 4;
+        nextPromotedStructFieldVar++;
+    }
+    else
+    {
+        if (oneFieldFillsSlotFromStart)
+        {
+            // If we write to the stack, offset in outgoing args at which we'll write.
+            int fieldArgOffset = argOffsetOfFirstStackSlot + fieldVarDsc->lvFldOffset - fieldOffsetOfFirstStackSlot;
+            assert(fieldArgOffset >= 0);
+
+            // Is the source a register or memory?
+            if (fieldVarDsc->lvRegister)
+            {
+                if (fieldTypeForInstr == TYP_DOUBLE)
+                {
+                    fieldSize = EA_8BYTE;
+                }
+
+                // Are we writing to a register or to the stack?
+                if (curRegNum != MAX_REG_ARG)
+                {
+                    // Source is register and Dest is register.
+
+                    instruction insCopy = INS_mov;
+
+                    if (varTypeIsFloating(fieldTypeForInstr))
+                    {
+                        if (fieldTypeForInstr == TYP_FLOAT)
+                        {
+                            insCopy = INS_vmov_f2i;
+                        }
+                        else
+                        {
+                            assert(fieldTypeForInstr == TYP_DOUBLE);
+                            insCopy = INS_vmov_d2i;
+                        }
+                    }
+
+                    // If the value being copied is a TYP_LONG (8 bytes), it may be in two registers.  Record the second
+                    // register (which may become a tmp register, if its held in the argument register that the first
+                    // register to be copied will overwrite).
+                    regNumber otherRegNum = REG_STK;
+                    if (fieldVarDsc->lvType == TYP_LONG)
+                    {
+                        otherRegNum = fieldVarDsc->lvOtherReg;
+                        // Are we about to overwrite?
+                        if (otherRegNum == curRegNum)
+                        {
+                            if (regTmp == REG_STK)
+                            {
+                                regTmp = regSet.rsPickFreeReg();
+                            }
+                            // Copy the second register to the temp reg.
+                            getEmitter()->emitIns_R_R(INS_mov, fieldSize, regTmp, otherRegNum);
+                            regTracker.rsTrackRegCopy(regTmp, otherRegNum);
+                            otherRegNum = regTmp;
+                        }
+                    }
+
+                    if (fieldVarDsc->lvType == TYP_DOUBLE)
+                    {
+                        assert(curRegNum <= REG_R2);
+                        getEmitter()->emitIns_R_R_R(insCopy, fieldSize, curRegNum, genRegArgNext(curRegNum),
+                                                    fieldVarDsc->lvRegNum);
+                        regTracker.rsTrackRegTrash(curRegNum);
+                        regTracker.rsTrackRegTrash(genRegArgNext(curRegNum));
+                    }
+                    else
+                    {
+                        // Now do the first register.
+                        // It might be the case that it's already in the desired register; if so do nothing.
+                        if (curRegNum != fieldVarDsc->lvRegNum)
+                        {
+                            getEmitter()->emitIns_R_R(insCopy, fieldSize, curRegNum, fieldVarDsc->lvRegNum);
+                            regTracker.rsTrackRegCopy(curRegNum, fieldVarDsc->lvRegNum);
+                        }
+                    }
+
+                    // In either case, mark the arg register as used.
+                    regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
+
+                    // Is there a second half of the value?
+                    if (fieldVarDsc->lvExactSize == 8)
+                    {
+                        curRegNum = genRegArgNext(curRegNum);
+                        // The second dest reg must also be an argument register.
+                        noway_assert(curRegNum < MAX_REG_ARG);
+
+                        // Now, if it's an 8-byte TYP_LONG, we have to do the second 4 bytes.
+                        if (fieldVarDsc->lvType == TYP_LONG)
+                        {
+                            // Copy the second register into the next argument register
+
+                            // If it's a register variable for a TYP_LONG value, then otherReg now should
+                            //  hold the second register or it might say that it's in the stack.
+                            if (otherRegNum == REG_STK)
+                            {
+                                // Apparently when we partially enregister, we allocate stack space for the full
+                                // 8 bytes, and enregister the low half.  Thus the final TARGET_POINTER_SIZE offset
+                                // parameter, to get the high half.
+                                getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, curRegNum,
+                                                          nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
+                                regTracker.rsTrackRegTrash(curRegNum);
+                            }
+                            else
+                            {
+                                // The other half is in a register.
+                                // Again, it might be the case that it's already in the desired register; if so do
+                                // nothing.
+                                if (curRegNum != otherRegNum)
+                                {
+                                    getEmitter()->emitIns_R_R(INS_mov, fieldSize, curRegNum, otherRegNum);
+                                    regTracker.rsTrackRegCopy(curRegNum, otherRegNum);
+                                }
+                            }
+                        }
+
+                        // Also mark the 2nd arg register as used.
+                        regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, false);
+                        // Record the fact that we filled in an extra register slot
+                        filledExtraSlot = true;
+                    }
+                }
+                else
+                {
+                    // Source is register and Dest is memory (OutgoingArgSpace).
+
+                    // Now write the srcReg into the right location in the outgoing argument list.
+                    getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum,
+                                              compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
+
+                    if (fieldVarDsc->lvExactSize == 8)
+                    {
+                        // Now, if it's an 8-byte TYP_LONG, we have to do the second 4 bytes.
+                        if (fieldVarDsc->lvType == TYP_LONG)
+                        {
+                            if (fieldVarDsc->lvOtherReg == REG_STK)
+                            {
+                                // Source is stack.
+                                if (regTmp == REG_STK)
+                                {
+                                    regTmp = regSet.rsPickFreeReg();
+                                }
+                                // Apparently if we partially enregister, we allocate stack space for the full
+                                // 8 bytes, and enregister the low half.  Thus the final TARGET_POINTER_SIZE offset
+                                // parameter, to get the high half.
+                                getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
+                                                          nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
+                                regTracker.rsTrackRegTrash(regTmp);
+                                getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
+                                                          compiler->lvaOutgoingArgSpaceVar,
+                                                          fieldArgOffset + TARGET_POINTER_SIZE);
+                            }
+                            else
+                            {
+                                getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, fieldVarDsc->lvOtherReg,
+                                                          compiler->lvaOutgoingArgSpaceVar,
+                                                          fieldArgOffset + TARGET_POINTER_SIZE);
+                            }
+                        }
+                        // Record the fact that we filled in an extra register slot
+                        filledExtraSlot = true;
+                    }
+                }
+                assert(fieldVarDsc->lvTracked); // Must be tracked, since it's enregistered...
+                // If the fieldVar becomes dead, then declare the register not to contain a pointer value.
+                if (arg->gtFlags & GTF_VAR_DEATH)
+                {
+                    *deadFieldVarRegs |= genRegMask(fieldVarDsc->lvRegNum);
+                    // We don't bother with the second reg of a register pair, since if it has one,
+                    // it obviously doesn't hold a pointer.
+                }
+            }
+            else
+            {
+                // Source is in memory.
+
+                if (curRegNum != MAX_REG_ARG)
+                {
+                    // Dest is reg.
+                    getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, curRegNum,
+                                              nextPromotedStructFieldVar, 0);
+                    regTracker.rsTrackRegTrash(curRegNum);
+
+                    regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
+
+                    if (fieldVarDsc->lvExactSize == 8)
+                    {
+                        noway_assert(fieldSize == EA_4BYTE);
+                        curRegNum = genRegArgNext(curRegNum);
+                        noway_assert(curRegNum < MAX_REG_ARG); // Because of 8-byte alignment.
+                        getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), fieldSize, curRegNum,
+                                                  nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
+                        regTracker.rsTrackRegTrash(curRegNum);
+                        regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
+                        // Record the fact that we filled in an extra stack slot
+                        filledExtraSlot = true;
+                    }
+                }
+                else
+                {
+                    // Dest is stack.
+                    if (regTmp == REG_STK)
+                    {
+                        regTmp = regSet.rsPickFreeReg();
+                    }
+                    getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
+                                              nextPromotedStructFieldVar, 0);
+
+                    // Now write regTmp into the right location in the outgoing argument list.
+                    getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp,
+                                              compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
+                    // We overwrote "regTmp", so erase any previous value we recorded that it contained.
+                    regTracker.rsTrackRegTrash(regTmp);
+
+                    if (fieldVarDsc->lvExactSize == 8)
+                    {
+                        getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
+                                                  nextPromotedStructFieldVar, TARGET_POINTER_SIZE);
+
+                        getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
+                                                  compiler->lvaOutgoingArgSpaceVar,
+                                                  fieldArgOffset + TARGET_POINTER_SIZE);
+                        // Record the fact that we filled in an extra stack slot
+                        filledExtraSlot = true;
+                    }
+                }
+            }
+
+            // Bump up the following if we filled in an extra slot
+            if (filledExtraSlot)
+                bytesOfNextSlotOfCurPromotedStruct += 4;
+
+            // Go to the next field.
+            nextPromotedStructFieldVar++;
+            if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
+            {
+                fieldVarDsc = NULL;
+            }
+            else
+            {
+                // The next field should have the same parent variable, and we should have put the field vars in order
+                // sorted by offset.
+                assert(fieldVarDsc->lvIsStructField && compiler->lvaTable[nextPromotedStructFieldVar].lvIsStructField &&
+                       fieldVarDsc->lvParentLcl == compiler->lvaTable[nextPromotedStructFieldVar].lvParentLcl &&
+                       fieldVarDsc->lvFldOffset < compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset);
+                fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
+            }
+            bytesOfNextSlotOfCurPromotedStruct += 4;
+        }
+        else // oneFieldFillsSlotFromStart == false
+        {
+            // The current slot should contain more than one field.
+            // We'll construct a word in memory for the slot, then load it into a register.
+            // (Note that it *may* be possible for the fldOffset to be greater than the largest offset in the current
+            // slot, in which case we'll just skip this loop altogether.)
+            while (fieldVarDsc != NULL && fieldVarDsc->lvFldOffset < bytesOfNextSlotOfCurPromotedStruct)
+            {
+                // If it doesn't fill a slot, it can't overflow the slot (again, because we only promote structs
+                // whose fields have their natural alignment, and alignment == size on ARM).
+                noway_assert(fieldVarDsc->lvFldOffset + fieldVarDsc->lvExactSize <= bytesOfNextSlotOfCurPromotedStruct);
+
+                // If the argument goes to the stack, the offset in the outgoing arg area for the argument.
+                int fieldArgOffset = argOffsetOfFirstStackSlot + fieldVarDsc->lvFldOffset - fieldOffsetOfFirstStackSlot;
+                noway_assert(argOffset == INT32_MAX ||
+                             (argOffset <= fieldArgOffset && fieldArgOffset < argOffset + TARGET_POINTER_SIZE));
+
+                if (fieldVarDsc->lvRegister)
+                {
+                    if (curRegNum != MAX_REG_ARG)
+                    {
+                        noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
+
+                        getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum,
+                                                  compiler->lvaPromotedStructAssemblyScratchVar,
+                                                  fieldVarDsc->lvFldOffset % 4);
+                    }
+                    else
+                    {
+                        // Dest is stack; write directly.
+                        getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, fieldVarDsc->lvRegNum,
+                                                  compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
+                    }
+                }
+                else
+                {
+                    // Source is in memory.
+
+                    // Make sure we have a temporary register to use...
+                    if (regTmp == REG_STK)
+                    {
+                        regTmp = regSet.rsPickFreeReg();
+                    }
+                    getEmitter()->emitIns_R_S(ins_Load(fieldTypeForInstr), fieldSize, regTmp,
+                                              nextPromotedStructFieldVar, 0);
+                    regTracker.rsTrackRegTrash(regTmp);
+
+                    if (curRegNum != MAX_REG_ARG)
+                    {
+                        noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
+
+                        getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp,
+                                                  compiler->lvaPromotedStructAssemblyScratchVar,
+                                                  fieldVarDsc->lvFldOffset % 4);
+                    }
+                    else
+                    {
+                        getEmitter()->emitIns_S_R(ins_Store(fieldTypeForInstr), fieldSize, regTmp,
+                                                  compiler->lvaOutgoingArgSpaceVar, fieldArgOffset);
+                    }
+                }
+                // Go to the next field.
+                nextPromotedStructFieldVar++;
+                if (nextPromotedStructFieldVar == limitPromotedStructFieldVar)
+                {
+                    fieldVarDsc = NULL;
+                }
+                else
+                {
+                    // The next field should have the same parent variable, and we should have put the field vars in
+                    // order sorted by offset.
+                    noway_assert(fieldVarDsc->lvIsStructField &&
+                                 compiler->lvaTable[nextPromotedStructFieldVar].lvIsStructField &&
+                                 fieldVarDsc->lvParentLcl ==
+                                     compiler->lvaTable[nextPromotedStructFieldVar].lvParentLcl &&
+                                 fieldVarDsc->lvFldOffset < compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset);
+                    fieldVarDsc = &compiler->lvaTable[nextPromotedStructFieldVar];
+                }
+            }
+            // Now, if we were accumulating into the first scratch word of the outgoing argument space in order to
+            // write to an argument register, do so.
+            if (curRegNum != MAX_REG_ARG)
+            {
+                noway_assert(compiler->lvaPromotedStructAssemblyScratchVar != BAD_VAR_NUM);
+
+                getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_4BYTE, curRegNum,
+                                          compiler->lvaPromotedStructAssemblyScratchVar, 0);
+                regTracker.rsTrackRegTrash(curRegNum);
+                regSet.rsMarkArgRegUsedByPromotedFieldArg(arg, curRegNum, EA_IS_GCREF(fieldSize));
+            }
+            // We've finished a slot; set the goal of the next slot.
+            bytesOfNextSlotOfCurPromotedStruct += 4;
+        }
+    }
+
+    // Write back the updates.
+    *pNextPromotedStructFieldVar         = nextPromotedStructFieldVar;
+    *pBytesOfNextSlotOfCurPromotedStruct = bytesOfNextSlotOfCurPromotedStruct;
+    *pCurRegNum                          = curRegNum;
+    *pRegTmp                             = regTmp;
+
+    return filledExtraSlot;
+}
+#endif // _TARGET_ARM_
+
+regMaskTP CodeGen::genFindDeadFieldRegs(GenTreePtr cpBlk)
+{
+    noway_assert(cpBlk->OperIsCopyBlkOp()); // Precondition.
+    GenTreePtr rhs = cpBlk->gtOp.gtOp1;
+    regMaskTP  res = 0;
+    if (rhs->OperIsIndir())
+    {
+        GenTree* addr = rhs->AsIndir()->Addr();
+        if (addr->gtOper == GT_ADDR)
+        {
+            rhs = addr->gtOp.gtOp1;
+        }
+    }
+    if (rhs->OperGet() == GT_LCL_VAR)
+    {
+        LclVarDsc* rhsDsc = &compiler->lvaTable[rhs->gtLclVarCommon.gtLclNum];
+        if (rhsDsc->lvPromoted)
+        {
+            // It is promoted; iterate over its field vars.
+            unsigned fieldVarNum = rhsDsc->lvFieldLclStart;
+            for (unsigned i = 0; i < rhsDsc->lvFieldCnt; i++, fieldVarNum++)
+            {
+                LclVarDsc* fieldVarDsc = &compiler->lvaTable[fieldVarNum];
+                // Did the variable go dead, and is it enregistered?
+                if (fieldVarDsc->lvRegister && (rhs->gtFlags & GTF_VAR_DEATH))
+                {
+                    // Add the register number to the set of registers holding field vars that are going dead.
+                    res |= genRegMask(fieldVarDsc->lvRegNum);
+                }
+            }
+        }
+    }
+    return res;
+}
+
+void CodeGen::SetupLateArgs(GenTreePtr call)
+{
+    GenTreeArgList* lateArgs;
+    GenTreePtr      curr;
+
+    /* Generate the code to move the late arguments into registers */
+
+    for (lateArgs = call->gtCall.gtCallLateArgs; lateArgs; lateArgs = lateArgs->Rest())
+    {
+        curr = lateArgs->Current();
+        assert(curr);
+
+        fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, curr);
+        assert(curArgTabEntry);
+        regNumber regNum    = curArgTabEntry->regNum;
+        unsigned  argOffset = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
+
+        assert(isRegParamType(curr->TypeGet()));
+        assert(curr->gtType != TYP_VOID);
+
+        /* If the register is already marked as used, it will become
+           multi-used. However, since it is a callee-trashed register,
+           we will have to spill it before the call anyway. So do it now */
+
+        {
+            // Remember which registers hold pointers. We will spill
+            // them, but the code that follows will fetch reg vars from
+            // the registers, so we need that gc compiler->info.
+            // Also regSet.rsSpillReg doesn't like to spill enregistered
+            // variables, but if this is their last use that is *exactly*
+            // what we need to do, so we have to temporarily pretend
+            // they are no longer live.
+            // You might ask why are they in regSet.rsMaskUsed and regSet.rsMaskVars
+            // when their last use is about to occur?
+            // It is because this is the second operand to be evaluated
+            // of some parent binary op, and the first operand is
+            // live across this tree, and thought it could re-use the
+            // variables register (like a GT_REG_VAR). This probably
+            // is caused by RegAlloc assuming the first operand would
+            // evaluate into another register.
+            regMaskTP rsTemp          = regSet.rsMaskVars & regSet.rsMaskUsed & RBM_CALLEE_TRASH;
+            regMaskTP gcRegSavedByref = gcInfo.gcRegByrefSetCur & rsTemp;
+            regMaskTP gcRegSavedGCRef = gcInfo.gcRegGCrefSetCur & rsTemp;
+            regSet.RemoveMaskVars(rsTemp);
+
+            regNumber regNum2 = regNum;
+            for (unsigned i = 0; i < curArgTabEntry->numRegs; i++)
+            {
+                if (regSet.rsMaskUsed & genRegMask(regNum2))
+                {
+                    assert(genRegMask(regNum2) & RBM_CALLEE_TRASH);
+                    regSet.rsSpillReg(regNum2);
+                }
+                regNum2 = genRegArgNext(regNum2);
+                assert(i + 1 == curArgTabEntry->numRegs || regNum2 != MAX_REG_ARG);
+            }
+
+            // Restore gc tracking masks.
+            gcInfo.gcRegByrefSetCur |= gcRegSavedByref;
+            gcInfo.gcRegGCrefSetCur |= gcRegSavedGCRef;
+
+            // Set maskvars back to normal
+            regSet.AddMaskVars(rsTemp);
+        }
+
+        /* Evaluate the argument to a register */
+
+        /* Check if this is the guess area for the resolve interface call
+         * Pass a size of EA_OFFSET*/
+        if (curr->gtOper == GT_CLS_VAR && compiler->eeGetJitDataOffs(curr->gtClsVar.gtClsVarHnd) >= 0)
+        {
+            getEmitter()->emitIns_R_C(ins_Load(TYP_INT), EA_OFFSET, regNum, curr->gtClsVar.gtClsVarHnd, 0);
+            regTracker.rsTrackRegTrash(regNum);
+
+            /* The value is now in the appropriate register */
+
+            genMarkTreeInReg(curr, regNum);
+
+            regSet.rsMarkRegUsed(curr);
+        }
+#ifdef _TARGET_ARM_
+        else if (curr->gtType == TYP_STRUCT)
+        {
+            GenTree* arg = curr;
+            while (arg->gtOper == GT_COMMA)
+            {
+                GenTreePtr op1 = arg->gtOp.gtOp1;
+                genEvalSideEffects(op1);
+                genUpdateLife(op1);
+                arg = arg->gtOp.gtOp2;
+            }
+            noway_assert((arg->OperGet() == GT_OBJ) || (arg->OperGet() == GT_LCL_VAR) ||
+                         (arg->OperGet() == GT_MKREFANY));
+
+            // This code passes a TYP_STRUCT by value using
+            // the argument registers first and
+            // then the lvaOutgoingArgSpaceVar area.
+            //
+
+            // We prefer to choose low registers here to reduce code bloat
+            regMaskTP regNeedMask    = RBM_LOW_REGS;
+            unsigned  firstStackSlot = 0;
+            unsigned  argAlign       = TARGET_POINTER_SIZE;
+            size_t    originalSize   = InferStructOpSizeAlign(arg, &argAlign);
+
+            unsigned slots = (unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE);
+            assert(slots > 0);
+
+            if (regNum == REG_STK)
+            {
+                firstStackSlot = 0;
+            }
+            else
+            {
+                if (argAlign == (TARGET_POINTER_SIZE * 2))
+                {
+                    assert((regNum & 1) == 0);
+                }
+
+                // firstStackSlot is an index of the first slot of the struct
+                // that is on the stack, in the range [0,slots]. If it is 'slots',
+                // then the entire struct is in registers. It is also equal to
+                // the number of slots of the struct that are passed in registers.
+
+                if (curArgTabEntry->isHfaRegArg)
+                {
+                    // HFA arguments that have been decided to go into registers fit the reg space.
+                    assert(regNum >= FIRST_FP_ARGREG && "HFA must go in FP register");
+                    assert(regNum + slots - 1 <= LAST_FP_ARGREG &&
+                           "HFA argument doesn't fit entirely in FP argument registers");
+                    firstStackSlot = slots;
+                }
+                else if (regNum + slots > MAX_REG_ARG)
+                {
+                    firstStackSlot = MAX_REG_ARG - regNum;
+                    assert(firstStackSlot > 0);
+                }
+                else
+                {
+                    firstStackSlot = slots;
+                }
+
+                if (curArgTabEntry->isHfaRegArg)
+                {
+                    // Mask out the registers used by an HFA arg from the ones used to compute tree into.
+                    for (unsigned i = regNum; i < regNum + slots; i++)
+                    {
+                        regNeedMask &= ~genRegMask(regNumber(i));
+                    }
+                }
+            }
+
+            // This holds the set of registers corresponding to enregistered promoted struct field variables
+            // that go dead after this use of the variable in the argument list.
+            regMaskTP deadFieldVarRegs = RBM_NONE;
+
+            // If the struct being passed is an OBJ of a local struct variable that is promoted (in the
+            // INDEPENDENT fashion, which doesn't require writes to be written through to the variables
+            // home stack loc) "promotedStructLocalVarDesc" will be set to point to the local variable
+            // table entry for the promoted struct local.  As we fill slots with the contents of a
+            // promoted struct, "bytesOfNextSlotOfCurPromotedStruct" will be the number of filled bytes
+            // that indicate another filled slot (if we have a 12-byte struct, it has 3 four byte slots; when we're
+            // working on the second slot, "bytesOfNextSlotOfCurPromotedStruct" will be 8, the point at which we're
+            // done), and "nextPromotedStructFieldVar" will be the local variable number of the next field variable
+            // to be copied.
+            LclVarDsc* promotedStructLocalVarDesc         = NULL;
+            unsigned   bytesOfNextSlotOfCurPromotedStruct = 0; // Size of slot.
+            unsigned   nextPromotedStructFieldVar         = BAD_VAR_NUM;
+            GenTreePtr structLocalTree                    = NULL;
+
+            BYTE*     gcLayout = NULL;
+            regNumber regSrc   = REG_NA;
+            if (arg->gtOper == GT_OBJ)
+            {
+                // Are we loading a promoted struct local var?
+                if (arg->gtObj.gtOp1->gtOper == GT_ADDR && arg->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
+                {
+                    structLocalTree         = arg->gtObj.gtOp1->gtOp.gtOp1;
+                    unsigned   structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
+                    LclVarDsc* varDsc       = &compiler->lvaTable[structLclNum];
+
+                    Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
+
+                    if (varDsc->lvPromoted && promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is
+                                                                                                     // guaranteed to
+                                                                                                     // live on stack.
+                    {
+                        // Fix 388395 ARM JitStress WP7
+                        noway_assert(structLocalTree->TypeGet() == TYP_STRUCT);
+
+                        assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
+                        promotedStructLocalVarDesc = varDsc;
+                        nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
+                    }
+                }
+
+                if (promotedStructLocalVarDesc == NULL)
+                {
+                    // If it's not a promoted struct variable, set "regSrc" to the address
+                    // of the struct local.
+                    genComputeReg(arg->gtObj.gtOp1, regNeedMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
+                    noway_assert(arg->gtObj.gtOp1->gtFlags & GTF_REG_VAL);
+                    regSrc = arg->gtObj.gtOp1->gtRegNum;
+                    // Remove this register from the set of registers that we pick from, unless slots equals 1
+                    if (slots > 1)
+                        regNeedMask &= ~genRegMask(regSrc);
+                }
+
+                gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
+                compiler->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
+            }
+            else if (arg->gtOper == GT_LCL_VAR)
+            {
+                // Move the address of the LCL_VAR in arg into reg
+
+                unsigned varNum = arg->gtLclVarCommon.gtLclNum;
+
+                // Are we loading a promoted struct local var?
+                structLocalTree         = arg;
+                unsigned   structLclNum = structLocalTree->gtLclVarCommon.gtLclNum;
+                LclVarDsc* varDsc       = &compiler->lvaTable[structLclNum];
+
+                noway_assert(structLocalTree->TypeGet() == TYP_STRUCT);
+
+                Compiler::lvaPromotionType promotionType = compiler->lvaGetPromotionType(varDsc);
+
+                if (varDsc->lvPromoted && promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT) // Otherwise it is
+                                                                                                 // guaranteed to live
+                                                                                                 // on stack.
+                {
+                    assert(!varDsc->lvAddrExposed); // Compiler::PROMOTION_TYPE_INDEPENDENT ==> not exposed.
+                    promotedStructLocalVarDesc = varDsc;
+                    nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
+                }
+
+                if (promotedStructLocalVarDesc == NULL)
+                {
+                    regSrc = regSet.rsPickFreeReg(regNeedMask);
+                    // Remove this register from the set of registers that we pick from, unless slots equals 1
+                    if (slots > 1)
+                        regNeedMask &= ~genRegMask(regSrc);
+
+                    getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, regSrc, varNum, 0);
+                    regTracker.rsTrackRegTrash(regSrc);
+                    gcLayout = compiler->lvaGetGcLayout(varNum);
+                }
+            }
+            else if (arg->gtOper == GT_MKREFANY)
+            {
+                assert(slots == 2);
+                assert((firstStackSlot == 1) || (firstStackSlot == 2));
+                assert(argOffset == 0); // ???
+                PushMkRefAnyArg(arg, curArgTabEntry, regNeedMask);
+
+                // Adjust argOffset if part of this guy was pushed onto the stack
+                if (firstStackSlot < slots)
+                {
+                    argOffset += TARGET_POINTER_SIZE;
+                }
+
+                // Skip the copy loop below because we have already placed the argument in the right place
+                slots    = 0;
+                gcLayout = NULL;
+            }
+            else
+            {
+                assert(!"Unsupported TYP_STRUCT arg kind");
+                gcLayout = new (compiler, CMK_Codegen) BYTE[slots];
+            }
+
+            if (promotedStructLocalVarDesc != NULL)
+            {
+                // We must do do the stack parts first, since those might need values
+                // from argument registers that will be overwritten in the portion of the
+                // loop that writes into the argument registers.
+                bytesOfNextSlotOfCurPromotedStruct = (firstStackSlot + 1) * TARGET_POINTER_SIZE;
+                // Now find the var number of the first that starts in the first stack slot.
+                unsigned fieldVarLim =
+                    promotedStructLocalVarDesc->lvFieldLclStart + promotedStructLocalVarDesc->lvFieldCnt;
+                while (compiler->lvaTable[nextPromotedStructFieldVar].lvFldOffset <
+                           (firstStackSlot * TARGET_POINTER_SIZE) &&
+                       nextPromotedStructFieldVar < fieldVarLim)
+                {
+                    nextPromotedStructFieldVar++;
+                }
+                // If we reach the limit, meaning there is no field that goes even partly in the stack, only if the
+                // first stack slot is after the last slot.
+                assert(nextPromotedStructFieldVar < fieldVarLim || firstStackSlot >= slots);
+            }
+
+            if (slots > 0) // the mkref case may have set "slots" to zero.
+            {
+                // First pass the stack portion of the struct (if any)
+                //
+                int argOffsetOfFirstStackSlot = argOffset;
+                for (unsigned i = firstStackSlot; i < slots; i++)
+                {
+                    emitAttr fieldSize;
+                    if (gcLayout[i] == TYPE_GC_NONE)
+                        fieldSize = EA_PTRSIZE;
+                    else if (gcLayout[i] == TYPE_GC_REF)
+                        fieldSize = EA_GCREF;
+                    else
+                    {
+                        noway_assert(gcLayout[i] == TYPE_GC_BYREF);
+                        fieldSize = EA_BYREF;
+                    }
+
+                    regNumber maxRegArg = regNumber(MAX_REG_ARG);
+                    if (promotedStructLocalVarDesc != NULL)
+                    {
+                        regNumber regTmp = REG_STK;
+
+                        bool filledExtraSlot =
+                            genFillSlotFromPromotedStruct(arg, curArgTabEntry, promotedStructLocalVarDesc, fieldSize,
+                                                          &nextPromotedStructFieldVar,
+                                                          &bytesOfNextSlotOfCurPromotedStruct,
+                                                          /*pCurRegNum*/ &maxRegArg, argOffset,
+                                                          /*fieldOffsetOfFirstStackSlot*/ firstStackSlot *
+                                                              TARGET_POINTER_SIZE,
+                                                          argOffsetOfFirstStackSlot, &deadFieldVarRegs, &regTmp);
+                        if (filledExtraSlot)
+                        {
+                            i++;
+                            argOffset += TARGET_POINTER_SIZE;
+                        }
+                    }
+                    else // (promotedStructLocalVarDesc == NULL)
+                    {
+                        // when slots > 1, we perform multiple load/stores thus regTmp cannot be equal to regSrc
+                        // and although regSrc has been excluded from regNeedMask, regNeedMask is only a *hint*
+                        // to regSet.rsPickFreeReg, so we need to be a little more forceful.
+                        // Otherwise, just re-use the same register.
+                        //
+                        regNumber regTmp = regSrc;
+                        if (slots != 1)
+                        {
+                            regMaskTP regSrcUsed;
+                            regSet.rsLockReg(genRegMask(regSrc), &regSrcUsed);
+
+                            regTmp = regSet.rsPickFreeReg(regNeedMask);
+
+                            noway_assert(regTmp != regSrc);
+
+                            regSet.rsUnlockReg(genRegMask(regSrc), regSrcUsed);
+                        }
+
+                        getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), fieldSize, regTmp, regSrc,
+                                                   i * TARGET_POINTER_SIZE);
+
+                        getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), fieldSize, regTmp,
+                                                  compiler->lvaOutgoingArgSpaceVar, argOffset);
+                        regTracker.rsTrackRegTrash(regTmp);
+                    }
+                    argOffset += TARGET_POINTER_SIZE;
+                }
+
+                // Now pass the register portion of the struct
+                //
+
+                bytesOfNextSlotOfCurPromotedStruct = TARGET_POINTER_SIZE;
+                if (promotedStructLocalVarDesc != NULL)
+                    nextPromotedStructFieldVar = promotedStructLocalVarDesc->lvFieldLclStart;
+
+                // Create a nested loop here so that the first time thru the loop
+                // we setup all of the regArg registers except for possibly
+                // the one that would overwrite regSrc.  Then in the final loop
+                // (if necessary) we just setup regArg/regSrc with the overwrite
+                //
+                bool overwriteRegSrc     = false;
+                bool needOverwriteRegSrc = false;
+                do
+                {
+                    if (needOverwriteRegSrc)
+                        overwriteRegSrc = true;
+
+                    for (unsigned i = 0; i < firstStackSlot; i++)
+                    {
+                        regNumber regArg = (regNumber)(regNum + i);
+
+                        if (overwriteRegSrc == false)
+                        {
+                            if (regArg == regSrc)
+                            {
+                                needOverwriteRegSrc = true;
+                                continue;
+                            }
+                        }
+                        else
+                        {
+                            if (regArg != regSrc)
+                                continue;
+                        }
+
+                        emitAttr fieldSize;
+                        if (gcLayout[i] == TYPE_GC_NONE)
+                            fieldSize = EA_PTRSIZE;
+                        else if (gcLayout[i] == TYPE_GC_REF)
+                            fieldSize = EA_GCREF;
+                        else
+                        {
+                            noway_assert(gcLayout[i] == TYPE_GC_BYREF);
+                            fieldSize = EA_BYREF;
+                        }
+
+                        regNumber regTmp = REG_STK;
+                        if (promotedStructLocalVarDesc != NULL)
+                        {
+                            bool filledExtraSlot =
+                                genFillSlotFromPromotedStruct(arg, curArgTabEntry, promotedStructLocalVarDesc,
+                                                              fieldSize, &nextPromotedStructFieldVar,
+                                                              &bytesOfNextSlotOfCurPromotedStruct,
+                                                              /*pCurRegNum*/ &regArg,
+                                                              /*argOffset*/ INT32_MAX,
+                                                              /*fieldOffsetOfFirstStackSlot*/ INT32_MAX,
+                                                              /*argOffsetOfFirstStackSlot*/ INT32_MAX,
+                                                              &deadFieldVarRegs, &regTmp);
+                            if (filledExtraSlot)
+                                i++;
+                        }
+                        else
+                        {
+                            getEmitter()->emitIns_R_AR(ins_Load(curArgTabEntry->isHfaRegArg ? TYP_FLOAT : TYP_I_IMPL),
+                                                       fieldSize, regArg, regSrc, i * TARGET_POINTER_SIZE);
+                        }
+                        regTracker.rsTrackRegTrash(regArg);
+                    }
+                } while (needOverwriteRegSrc != overwriteRegSrc);
+            }
+
+            if ((arg->gtOper == GT_OBJ) && (promotedStructLocalVarDesc == NULL))
+            {
+                regSet.rsMarkRegFree(genRegMask(regSrc));
+            }
+
+            if (regNum != REG_STK && promotedStructLocalVarDesc == NULL) // If promoted, we already declared the regs
+                                                                         // used.
+            {
+                arg->gtFlags |= GTF_REG_VAL;
+                for (unsigned i = 1; i < firstStackSlot; i++)
+                {
+                    arg->gtRegNum = (regNumber)(regNum + i);
+                    curArgTabEntry->isHfaRegArg ? regSet.SetUsedRegFloat(arg, true) : regSet.rsMarkRegUsed(arg);
+                }
+                arg->gtRegNum = regNum;
+                curArgTabEntry->isHfaRegArg ? regSet.SetUsedRegFloat(arg, true) : regSet.rsMarkRegUsed(arg);
+            }
+
+            // If we're doing struct promotion, the liveness of the promoted field vars may change after this use,
+            // so update liveness.
+            genUpdateLife(arg);
+
+            // Now, if some copied field locals were enregistered, and they're now dead, update the set of
+            // register holding gc pointers.
+            if (deadFieldVarRegs != RBM_NONE)
+                gcInfo.gcMarkRegSetNpt(deadFieldVarRegs);
+        }
+        else if (curr->gtType == TYP_LONG || curr->gtType == TYP_ULONG)
+        {
+            if (curArgTabEntry->regNum == REG_STK)
+            {
+                // The arg is passed in the outgoing argument area of the stack frame
+                genCompIntoFreeRegPair(curr, RBM_NONE, RegSet::FREE_REG);
+                assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCompIntoFreeRegPair(curr, 0)
+
+                inst_SA_RV(ins_Store(TYP_INT), argOffset + 0, genRegPairLo(curr->gtRegPair), TYP_INT);
+                inst_SA_RV(ins_Store(TYP_INT), argOffset + 4, genRegPairHi(curr->gtRegPair), TYP_INT);
+            }
+            else
+            {
+                assert(regNum < REG_ARG_LAST);
+                regPairNo regPair = gen2regs2pair(regNum, REG_NEXT(regNum));
+                genComputeRegPair(curr, regPair, RBM_NONE, RegSet::FREE_REG, false);
+                assert(curr->gtRegPair == regPair);
+                regSet.rsMarkRegPairUsed(curr);
+            }
+        }
+#endif // _TARGET_ARM_
+        else if (curArgTabEntry->regNum == REG_STK)
+        {
+            // The arg is passed in the outgoing argument area of the stack frame
+            //
+            genCodeForTree(curr, 0);
+            assert(curr->gtFlags & GTF_REG_VAL); // should be enregistered after genCodeForTree(curr, 0)
+
+            inst_SA_RV(ins_Store(curr->gtType), argOffset, curr->gtRegNum, curr->gtType);
+
+            if ((genRegMask(curr->gtRegNum) & regSet.rsMaskUsed) == 0)
+                gcInfo.gcMarkRegSetNpt(genRegMask(curr->gtRegNum));
+        }
+        else
+        {
+            if (!varTypeIsFloating(curr->gtType))
+            {
+                genComputeReg(curr, genRegMask(regNum), RegSet::EXACT_REG, RegSet::FREE_REG, false);
+                assert(curr->gtRegNum == regNum);
+                regSet.rsMarkRegUsed(curr);
+            }
+            else // varTypeIsFloating(curr->gtType)
+            {
+                if (genIsValidFloatReg(regNum))
+                {
+                    genComputeReg(curr, genRegMaskFloat(regNum, curr->gtType), RegSet::EXACT_REG, RegSet::FREE_REG,
+                                  false);
+                    assert(curr->gtRegNum == regNum);
+                    regSet.rsMarkRegUsed(curr);
+                }
+                else
+                {
+                    genCodeForTree(curr, 0);
+                    // If we are loading a floating point type into integer registers
+                    // then it must be for varargs.
+                    // genCodeForTree will load it into a floating point register,
+                    // now copy it into the correct integer register(s)
+                    if (curr->TypeGet() == TYP_FLOAT)
+                    {
+                        assert(genRegMask(regNum) & RBM_CALLEE_TRASH);
+                        regSet.rsSpillRegIfUsed(regNum);
+#ifdef _TARGET_ARM_
+                        getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, regNum, curr->gtRegNum);
+#else
+#error "Unsupported target"
+#endif
+                        regTracker.rsTrackRegTrash(regNum);
+
+                        curr->gtType   = TYP_INT; // Change this to TYP_INT in case we need to spill this register
+                        curr->gtRegNum = regNum;
+                        regSet.rsMarkRegUsed(curr);
+                    }
+                    else
+                    {
+                        assert(curr->TypeGet() == TYP_DOUBLE);
+                        regNumber intRegNumLo = regNum;
+                        curr->gtType = TYP_LONG; // Change this to TYP_LONG in case we spill this
+#ifdef _TARGET_ARM_
+                        regNumber intRegNumHi = regNumber(intRegNumLo + 1);
+                        assert(genRegMask(intRegNumHi) & RBM_CALLEE_TRASH);
+                        assert(genRegMask(intRegNumLo) & RBM_CALLEE_TRASH);
+                        regSet.rsSpillRegIfUsed(intRegNumHi);
+                        regSet.rsSpillRegIfUsed(intRegNumLo);
+
+                        getEmitter()->emitIns_R_R_R(INS_vmov_d2i, EA_8BYTE, intRegNumLo, intRegNumHi, curr->gtRegNum);
+                        regTracker.rsTrackRegTrash(intRegNumLo);
+                        regTracker.rsTrackRegTrash(intRegNumHi);
+                        curr->gtRegPair = gen2regs2pair(intRegNumLo, intRegNumHi);
+                        regSet.rsMarkRegPairUsed(curr);
+#else
+#error "Unsupported target"
+#endif
+                    }
+                }
+            }
+        }
+    }
+
+    /* If any of the previously loaded arguments were spilled - reload them */
+
+    for (lateArgs = call->gtCall.gtCallLateArgs; lateArgs; lateArgs = lateArgs->Rest())
+    {
+        curr = lateArgs->Current();
+        assert(curr);
+
+        if (curr->gtFlags & GTF_SPILLED)
+        {
+            if (isRegPairType(curr->gtType))
+            {
+                regSet.rsUnspillRegPair(curr, genRegPairMask(curr->gtRegPair), RegSet::KEEP_REG);
+            }
+            else
+            {
+                regSet.rsUnspillReg(curr, genRegMask(curr->gtRegNum), RegSet::KEEP_REG);
+            }
+        }
+    }
+}
+
+#ifdef _TARGET_ARM_
+
+// 'Push' a single GT_MKREFANY argument onto a call's argument list
+// The argument is passed as described by the fgArgTabEntry
+// If any part of the struct is to be passed in a register the
+// regNum value will be equal to the the registers used to pass the
+// the first part of the struct.
+// If any part is to go onto the stack, we first generate the
+// value into a register specified by 'regNeedMask' and
+// then store it to the out going argument area.
+// When this method returns, both parts of the TypeReference have
+// been pushed onto the stack, but *no* registers have been marked
+// as 'in-use', that is the responsibility of the caller.
+//
+void CodeGen::PushMkRefAnyArg(GenTreePtr mkRefAnyTree, fgArgTabEntryPtr curArgTabEntry, regMaskTP regNeedMask)
+{
+    regNumber regNum = curArgTabEntry->regNum;
+    regNumber regNum2;
+    assert(mkRefAnyTree->gtOper == GT_MKREFANY);
+    regMaskTP arg1RegMask = 0;
+    int       argOffset   = curArgTabEntry->slotNum * TARGET_POINTER_SIZE;
+
+    // Construct the TypedReference directly into the argument list of the call by
+    // 'pushing' the first field of the typed reference: the pointer.
+    // Do this by directly generating it into the argument register or outgoing arg area of the stack.
+    // Mark it as used so we don't trash it while generating the second field.
+    //
+    if (regNum == REG_STK)
+    {
+        genComputeReg(mkRefAnyTree->gtOp.gtOp1, regNeedMask, RegSet::EXACT_REG, RegSet::FREE_REG);
+        noway_assert(mkRefAnyTree->gtOp.gtOp1->gtFlags & GTF_REG_VAL);
+        regNumber tmpReg1 = mkRefAnyTree->gtOp.gtOp1->gtRegNum;
+        inst_SA_RV(ins_Store(TYP_I_IMPL), argOffset, tmpReg1, TYP_I_IMPL);
+        regTracker.rsTrackRegTrash(tmpReg1);
+        argOffset += TARGET_POINTER_SIZE;
+        regNum2 = REG_STK;
+    }
+    else
+    {
+        assert(regNum <= REG_ARG_LAST);
+        arg1RegMask = genRegMask(regNum);
+        genComputeReg(mkRefAnyTree->gtOp.gtOp1, arg1RegMask, RegSet::EXACT_REG, RegSet::KEEP_REG);
+        regNum2 = (regNum == REG_ARG_LAST) ? REG_STK : genRegArgNext(regNum);
+    }
+
+    // Now 'push' the second field of the typed reference: the method table.
+    if (regNum2 == REG_STK)
+    {
+        genComputeReg(mkRefAnyTree->gtOp.gtOp2, regNeedMask, RegSet::EXACT_REG, RegSet::FREE_REG);
+        noway_assert(mkRefAnyTree->gtOp.gtOp2->gtFlags & GTF_REG_VAL);
+        regNumber tmpReg2 = mkRefAnyTree->gtOp.gtOp2->gtRegNum;
+        inst_SA_RV(ins_Store(TYP_I_IMPL), argOffset, tmpReg2, TYP_I_IMPL);
+        regTracker.rsTrackRegTrash(tmpReg2);
+    }
+    else
+    {
+        assert(regNum2 <= REG_ARG_LAST);
+        // We don't have to mark this register as being in use here because it will
+        // be done by the caller, and we don't want to double-count it.
+        genComputeReg(mkRefAnyTree->gtOp.gtOp2, genRegMask(regNum2), RegSet::EXACT_REG, RegSet::FREE_REG);
+    }
+
+    // Now that we are done generating the second part of the TypeReference, we can mark
+    // the first register as free.
+    // The caller in the shared path we will re-mark all registers used by this argument
+    // as being used, so we don't want to double-count this one.
+    if (arg1RegMask != 0)
+    {
+        GenTreePtr op1 = mkRefAnyTree->gtOp.gtOp1;
+        if (op1->gtFlags & GTF_SPILLED)
+        {
+            /* The register that we loaded arg1 into has been spilled -- reload it back into the correct arg register */
+
+            regSet.rsUnspillReg(op1, arg1RegMask, RegSet::FREE_REG);
+        }
+        else
+        {
+            regSet.rsMarkRegFree(arg1RegMask);
+        }
+    }
+}
+#endif // _TARGET_ARM_
+
+#endif // FEATURE_FIXED_OUT_ARGS
+
+regMaskTP CodeGen::genLoadIndirectCallTarget(GenTreePtr call)
+{
+    assert((gtCallTypes)call->gtCall.gtCallType == CT_INDIRECT);
+
+    regMaskTP fptrRegs;
+
+    /* Loading the indirect call target might cause one or more of the previously
+       loaded argument registers to be spilled. So, we save information about all
+       the argument registers, and unspill any of them that get spilled, after
+       the call target is loaded.
+    */
+    struct
+    {
+        GenTreePtr node;
+        union {
+            regNumber regNum;
+            regPairNo regPair;
+        };
+    } regArgTab[MAX_REG_ARG];
+
+    /* Record the previously loaded arguments, if any */
+
+    unsigned  regIndex;
+    regMaskTP prefRegs = regSet.rsRegMaskFree();
+    regMaskTP argRegs  = RBM_NONE;
+    for (regIndex = 0; regIndex < MAX_REG_ARG; regIndex++)
+    {
+        regMaskTP  mask;
+        regNumber  regNum        = genMapRegArgNumToRegNum(regIndex, TYP_INT);
+        GenTreePtr argTree       = regSet.rsUsedTree[regNum];
+        regArgTab[regIndex].node = argTree;
+        if ((argTree != NULL) && (argTree->gtType != TYP_STRUCT)) // We won't spill the struct
+        {
+            assert(argTree->gtFlags & GTF_REG_VAL);
+            if (isRegPairType(argTree->gtType))
+            {
+                regPairNo regPair = argTree->gtRegPair;
+                assert(regNum == genRegPairHi(regPair) || regNum == genRegPairLo(regPair));
+                regArgTab[regIndex].regPair = regPair;
+                mask                        = genRegPairMask(regPair);
+            }
+            else
+            {
+                assert(regNum == argTree->gtRegNum);
+                regArgTab[regIndex].regNum = regNum;
+                mask                       = genRegMask(regNum);
+            }
+            assert(!(prefRegs & mask));
+            argRegs |= mask;
+        }
+    }
+
+    /* Record the register(s) used for the indirect call func ptr */
+    fptrRegs = genMakeRvalueAddressable(call->gtCall.gtCallAddr, prefRegs, RegSet::KEEP_REG, false);
+
+    /* If any of the previously loaded arguments were spilled, reload them */
+
+    for (regIndex = 0; regIndex < MAX_REG_ARG; regIndex++)
+    {
+        GenTreePtr argTree = regArgTab[regIndex].node;
+        if ((argTree != NULL) && (argTree->gtFlags & GTF_SPILLED))
+        {
+            assert(argTree->gtType != TYP_STRUCT); // We currently don't support spilling structs in argument registers
+            if (isRegPairType(argTree->gtType))
+            {
+                regSet.rsUnspillRegPair(argTree, genRegPairMask(regArgTab[regIndex].regPair), RegSet::KEEP_REG);
+            }
+            else
+            {
+                regSet.rsUnspillReg(argTree, genRegMask(regArgTab[regIndex].regNum), RegSet::KEEP_REG);
+            }
+        }
+    }
+
+    /* Make sure the target is still addressable while avoiding the argument registers */
+
+    fptrRegs = genKeepAddressable(call->gtCall.gtCallAddr, fptrRegs, argRegs);
+
+    return fptrRegs;
+}
+
+/*****************************************************************************
+ *
+ *  Generate code for a call. If the call returns a value in register(s), the
+ *  register mask that describes where the result will be found is returned;
+ *  otherwise, RBM_NONE is returned.
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+regMaskTP CodeGen::genCodeForCall(GenTreePtr call, bool valUsed)
+{
+    emitAttr              retSize;
+    size_t                argSize;
+    size_t                args;
+    regMaskTP             retVal;
+    emitter::EmitCallType emitCallType;
+
+    unsigned saveStackLvl;
+
+    BasicBlock* returnLabel   = DUMMY_INIT(NULL);
+    LclVarDsc*  frameListRoot = NULL;
+
+    unsigned savCurIntArgReg;
+    unsigned savCurFloatArgReg;
+
+    unsigned areg;
+
+    regMaskTP fptrRegs = RBM_NONE;
+    regMaskTP vptrMask = RBM_NONE;
+
+#ifdef DEBUG
+    unsigned stackLvl = getEmitter()->emitCurStackLvl;
+
+    if (compiler->verbose)
+    {
+        printf("\t\t\t\t\t\t\tBeg call ");
+        Compiler::printTreeID(call);
+        printf(" stack %02u [E=%02u]\n", genStackLevel, stackLvl);
+    }
+#endif
+
+    gtCallTypes callType = (gtCallTypes)call->gtCall.gtCallType;
+    IL_OFFSETX  ilOffset = BAD_IL_OFFSET;
+
+    CORINFO_SIG_INFO* sigInfo = nullptr;
+
+#ifdef DEBUGGING_SUPPORT
+    if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != NULL)
+    {
+        (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
+    }
+#endif
+
+    /* Make some sanity checks on the call node */
+
+    // This is a call
+    noway_assert(call->IsCall());
+    // "this" only makes sense for user functions
+    noway_assert(call->gtCall.gtCallObjp == 0 || callType == CT_USER_FUNC || callType == CT_INDIRECT);
+    // tailcalls won't be done for helpers, caller-pop args, and check that
+    // the global flag is set
+    noway_assert(!call->gtCall.IsTailCall() ||
+                 (callType != CT_HELPER && !(call->gtFlags & GTF_CALL_POP_ARGS) && compiler->compTailCallUsed));
+
+#ifdef DEBUG
+    // Pass the call signature information down into the emitter so the emitter can associate
+    // native call sites with the signatures they were generated from.
+    if (callType != CT_HELPER)
+    {
+        sigInfo = call->gtCall.callSig;
+    }
+#endif // DEBUG
+
+    unsigned pseudoStackLvl = 0;
+
+    if (!isFramePointerUsed() && (genStackLevel != 0) && compiler->fgIsThrowHlpBlk(compiler->compCurBB))
+    {
+        noway_assert(compiler->compCurBB->bbTreeList->gtStmt.gtStmtExpr == call);
+
+        pseudoStackLvl = genStackLevel;
+
+        noway_assert(!"Blocks with non-empty stack on entry are NYI in the emitter "
+                      "so fgAddCodeRef() should have set isFramePointerRequired()");
+    }
+
+    /* Mark the current stack level and list of pointer arguments */
+
+    saveStackLvl = genStackLevel;
+
+    /*-------------------------------------------------------------------------
+     *  Set up the registers and arguments
+     */
+
+    /* We'll keep track of how much we've pushed on the stack */
+
+    argSize = 0;
+
+    /* We need to get a label for the return address with the proper stack depth. */
+    /* For the callee pops case (the default) that is before the args are pushed. */
+
+    if ((call->gtFlags & GTF_CALL_UNMANAGED) && !(call->gtFlags & GTF_CALL_POP_ARGS))
+    {
+        returnLabel = genCreateTempLabel();
+    }
+
+    /*
+        Make sure to save the current argument register status
+        in case we have nested calls.
+     */
+
+    noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
+    savCurIntArgReg              = intRegState.rsCurRegArgNum;
+    savCurFloatArgReg            = floatRegState.rsCurRegArgNum;
+    intRegState.rsCurRegArgNum   = 0;
+    floatRegState.rsCurRegArgNum = 0;
+
+    /* Pass the arguments */
+
+    if ((call->gtCall.gtCallObjp != NULL) || (call->gtCall.gtCallArgs != NULL))
+    {
+        argSize += genPushArgList(call);
+    }
+
+    /* We need to get a label for the return address with the proper stack depth. */
+    /* For the caller pops case (cdecl) that is after the args are pushed. */
+
+    if (call->gtFlags & GTF_CALL_UNMANAGED)
+    {
+        if (call->gtFlags & GTF_CALL_POP_ARGS)
+            returnLabel = genCreateTempLabel();
+
+        /* Make sure that we now have a label */
+        noway_assert(returnLabel != DUMMY_INIT(NULL));
+    }
+
+    if (callType == CT_INDIRECT)
+    {
+        fptrRegs = genLoadIndirectCallTarget(call);
+    }
+
+    /* Make sure any callee-trashed registers are saved */
+
+    regMaskTP calleeTrashedRegs = RBM_NONE;
+
+#if GTF_CALL_REG_SAVE
+    if (call->gtFlags & GTF_CALL_REG_SAVE)
+    {
+        /* The return value reg(s) will definitely be trashed */
+
+        switch (call->gtType)
+        {
+            case TYP_INT:
+            case TYP_REF:
+            case TYP_BYREF:
+#if !CPU_HAS_FP_SUPPORT
+            case TYP_FLOAT:
+#endif
+                calleeTrashedRegs = RBM_INTRET;
+                break;
+
+            case TYP_LONG:
+#if !CPU_HAS_FP_SUPPORT
+            case TYP_DOUBLE:
+#endif
+                calleeTrashedRegs = RBM_LNGRET;
+                break;
+
+            case TYP_VOID:
+#if CPU_HAS_FP_SUPPORT
+            case TYP_FLOAT:
+            case TYP_DOUBLE:
+#endif
+                calleeTrashedRegs = 0;
+                break;
+
+            default:
+                noway_assert(!"unhandled/unexpected type");
+        }
+    }
+    else
+#endif
+    {
+        calleeTrashedRegs = RBM_CALLEE_TRASH;
+    }
+
+    /* Spill any callee-saved registers which are being used */
+
+    regMaskTP spillRegs = calleeTrashedRegs & regSet.rsMaskUsed;
+
+    /* We need to save all GC registers to the InlinedCallFrame.
+       Instead, just spill them to temps. */
+
+    if (call->gtFlags & GTF_CALL_UNMANAGED)
+        spillRegs |= (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & regSet.rsMaskUsed;
+
+    // Ignore fptrRegs as it is needed only to perform the indirect call
+
+    spillRegs &= ~fptrRegs;
+
+    /* Do not spill the argument registers.
+       Multi-use of RBM_ARG_REGS should be prevented by genPushArgList() */
+
+    noway_assert((regSet.rsMaskMult & call->gtCall.gtCallRegUsedMask) == 0);
+    spillRegs &= ~call->gtCall.gtCallRegUsedMask;
+
+    if (spillRegs)
+    {
+        regSet.rsSpillRegs(spillRegs);
+    }
+
+#if FEATURE_STACK_FP_X87
+    // Spill fp stack
+    SpillForCallStackFP();
+
+    if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
+    {
+        // Pick up a reg
+        regNumber regReturn = regSet.PickRegFloat();
+
+        // Assign reg to tree
+        genMarkTreeInReg(call, regReturn);
+
+        // Mark as used
+        regSet.SetUsedRegFloat(call, true);
+
+        // Update fp state
+        compCurFPState.Push(regReturn);
+    }
+#else
+    SpillForCallRegisterFP(call->gtCall.gtCallRegUsedMask);
+#endif
+
+    /* If the method returns a GC ref, set size to EA_GCREF or EA_BYREF */
+
+    retSize = EA_PTRSIZE;
+
+    if (valUsed)
+    {
+        if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY)
+        {
+            retSize = EA_GCREF;
+        }
+        else if (call->gtType == TYP_BYREF)
+        {
+            retSize = EA_BYREF;
+        }
+    }
+
+    /*-------------------------------------------------------------------------
+     * For caller-pop calls, the GC info will report the arguments as pending
+       arguments as the caller explicitly pops them. Also should be
+       reported as non-GC arguments as they effectively go dead at the
+       call site (callee owns them)
+     */
+
+    args = (call->gtFlags & GTF_CALL_POP_ARGS) ? -int(argSize) : argSize;
+
+#ifdef PROFILING_SUPPORTED
+
+    /*-------------------------------------------------------------------------
+     *  Generate the profiling hooks for the call
+     */
+
+    /* Treat special cases first */
+
+    /* fire the event at the call site */
+    /* alas, right now I can only handle calls via a method handle */
+    if (compiler->compIsProfilerHookNeeded() && (callType == CT_USER_FUNC) && call->gtCall.IsTailCall())
+    {
+        unsigned saveStackLvl2 = genStackLevel;
+
+        //
+        // Push the profilerHandle
+        //
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_X86_
+        regMaskTP byrefPushedRegs;
+        regMaskTP norefPushedRegs;
+        regMaskTP pushedArgRegs = genPushRegs(call->gtCall.gtCallRegUsedMask, &byrefPushedRegs, &norefPushedRegs);
+
+        if (compiler->compProfilerMethHndIndirected)
+        {
+            getEmitter()->emitIns_AR_R(INS_push, EA_PTR_DSP_RELOC, REG_NA, REG_NA,
+                                       (ssize_t)compiler->compProfilerMethHnd);
+        }
+        else
+        {
+            inst_IV(INS_push, (size_t)compiler->compProfilerMethHnd);
+        }
+        genSinglePush();
+
+        genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
+                          sizeof(int) * 1, // argSize
+                          EA_UNKNOWN);     // retSize
+
+        //
+        // Adjust the number of stack slots used by this managed method if necessary.
+        //
+        if (compiler->fgPtrArgCntMax < 1)
+        {
+            compiler->fgPtrArgCntMax = 1;
+        }
+
+        genPopRegs(pushedArgRegs, byrefPushedRegs, norefPushedRegs);
+#elif _TARGET_ARM_
+        // We need r0 (to pass profiler handle) and another register (call target) to emit a tailcall callback.
+        // To make r0 available, we add REG_PROFILER_TAIL_SCRATCH as an additional interference for tail prefixed calls.
+        // Here we grab a register to temporarily store r0 and revert it back after we have emitted callback.
+        //
+        // By the time we reach this point argument registers are setup (by genPushArgList()), therefore we don't want
+        // to disturb them and hence argument registers are locked here.
+        regMaskTP usedMask = RBM_NONE;
+        regSet.rsLockReg(RBM_ARG_REGS, &usedMask);
+
+        regNumber scratchReg = regSet.rsGrabReg(RBM_CALLEE_SAVED);
+        regSet.rsLockReg(genRegMask(scratchReg));
+
+        emitAttr attr = EA_UNKNOWN;
+        if (RBM_R0 & gcInfo.gcRegGCrefSetCur)
+        {
+            attr = EA_GCREF;
+            gcInfo.gcMarkRegSetGCref(scratchReg);
+        }
+        else if (RBM_R0 & gcInfo.gcRegByrefSetCur)
+        {
+            attr = EA_BYREF;
+            gcInfo.gcMarkRegSetByref(scratchReg);
+        }
+        else
+        {
+            attr = EA_4BYTE;
+        }
+
+        getEmitter()->emitIns_R_R(INS_mov, attr, scratchReg, REG_R0);
+        regTracker.rsTrackRegTrash(scratchReg);
+
+        if (compiler->compProfilerMethHndIndirected)
+        {
+            getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, REG_R0, (ssize_t)compiler->compProfilerMethHnd);
+            regTracker.rsTrackRegTrash(REG_R0);
+        }
+        else
+        {
+            instGen_Set_Reg_To_Imm(EA_4BYTE, REG_R0, (ssize_t)compiler->compProfilerMethHnd);
+        }
+
+        genEmitHelperCall(CORINFO_HELP_PROF_FCN_TAILCALL,
+                          0,           // argSize
+                          EA_UNKNOWN); // retSize
+
+        // Restore back to the state that existed before profiler callback
+        gcInfo.gcMarkRegSetNpt(scratchReg);
+        getEmitter()->emitIns_R_R(INS_mov, attr, REG_R0, scratchReg);
+        regTracker.rsTrackRegTrash(REG_R0);
+        regSet.rsUnlockReg(genRegMask(scratchReg));
+        regSet.rsUnlockReg(RBM_ARG_REGS, usedMask);
+#else
+        NYI("Pushing the profilerHandle & caller's sp for the profiler callout and locking any registers");
+#endif //_TARGET_X86_
+
+        /* Restore the stack level */
+        genStackLevel = saveStackLvl2;
+    }
+
+#endif // PROFILING_SUPPORTED
+
+#ifdef DEBUG
+    /*-------------------------------------------------------------------------
+     *  Generate an ESP check for the call
+     */
+
+    if (compiler->opts.compStackCheckOnCall
+#if defined(USE_TRANSITION_THUNKS) || defined(USE_DYNAMIC_STACK_ALIGN)
+        // check the stacks as frequently as possible
+        && !call->IsHelperCall()
+#else
+        && call->gtCall.gtCallType == CT_USER_FUNC
+#endif
+            )
+    {
+        noway_assert(compiler->lvaCallEspCheck != 0xCCCCCCCC &&
+                     compiler->lvaTable[compiler->lvaCallEspCheck].lvDoNotEnregister &&
+                     compiler->lvaTable[compiler->lvaCallEspCheck].lvOnFrame);
+        getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaCallEspCheck, 0);
+    }
+#endif
+
+    /*-------------------------------------------------------------------------
+     *  Generate the call
+     */
+
+    bool            fPossibleSyncHelperCall = false;
+    CorInfoHelpFunc helperNum               = CORINFO_HELP_UNDEF; /* only initialized to avoid compiler C4701 warning */
+
+    bool fTailCallTargetIsVSD = false;
+
+    bool fTailCall = (call->gtCall.gtCallMoreFlags & GTF_CALL_M_TAILCALL) != 0;
+
+    /* Check for Delegate.Invoke. If so, we inline it. We get the
+       target-object and target-function from the delegate-object, and do
+       an indirect call.
+     */
+
+    if ((call->gtCall.gtCallMoreFlags & GTF_CALL_M_DELEGATE_INV) && !fTailCall)
+    {
+        noway_assert(call->gtCall.gtCallType == CT_USER_FUNC);
+
+        assert((compiler->info.compCompHnd->getMethodAttribs(call->gtCall.gtCallMethHnd) &
+                (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL)) ==
+               (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL));
+
+        /* Find the offsets of the 'this' pointer and new target */
+
+        CORINFO_EE_INFO* pInfo;
+        unsigned         instOffs;     // offset of new 'this' pointer
+        unsigned         firstTgtOffs; // offset of first target to invoke
+        const regNumber  regThis = genGetThisArgReg(call);
+
+        pInfo        = compiler->eeGetEEInfo();
+        instOffs     = pInfo->offsetOfDelegateInstance;
+        firstTgtOffs = pInfo->offsetOfDelegateFirstTarget;
+
+#ifdef _TARGET_ARM_
+        if ((call->gtCall.gtCallMoreFlags & GTF_CALL_M_SECURE_DELEGATE_INV))
+        {
+            getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_VIRTUAL_STUB_PARAM, regThis,
+                                        pInfo->offsetOfSecureDelegateIndirectCell);
+            regTracker.rsTrackRegTrash(REG_VIRTUAL_STUB_PARAM);
+        }
+#endif // _TARGET_ARM_
+
+        // Grab an available register to use for the CALL indirection
+        regNumber indCallReg = regSet.rsGrabReg(RBM_ALLINT);
+
+        //  Save the invoke-target-function in indCallReg
+        //  'mov indCallReg, dword ptr [regThis + firstTgtOffs]'
+        getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, indCallReg, regThis, firstTgtOffs);
+        regTracker.rsTrackRegTrash(indCallReg);
+
+        /* Set new 'this' in REG_CALL_THIS - 'mov REG_CALL_THIS, dword ptr [regThis + instOffs]' */
+
+        getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_GCREF, regThis, regThis, instOffs);
+        regTracker.rsTrackRegTrash(regThis);
+        noway_assert(instOffs < 127);
+
+        /* Call through indCallReg */
+
+        getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
+                                   NULL,                                // methHnd
+                                   INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
+                                   args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+                                   gcInfo.gcRegByrefSetCur, ilOffset, indCallReg);
+    }
+    else
+
+        /*-------------------------------------------------------------------------
+         *  Virtual and interface calls
+         */
+
+        switch (call->gtFlags & GTF_CALL_VIRT_KIND_MASK)
+        {
+            case GTF_CALL_VIRT_STUB:
+            {
+                regSet.rsSetRegsModified(RBM_VIRTUAL_STUB_PARAM);
+
+                // An x86 JIT which uses full stub dispatch must generate only
+                // the following stub dispatch calls:
+                //
+                // (1) isCallRelativeIndirect:
+                //        call dword ptr [rel32]  ;  FF 15 ---rel32----
+                // (2) isCallRelative:
+                //        call abc                ;     E8 ---rel32----
+                // (3) isCallRegisterIndirect:
+                //     3-byte nop                 ;
+                //     call dword ptr [eax]       ;     FF 10
+                //
+                // THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN
+                // vm\i386\cGenCpu.h, esp. isCallRegisterIndirect.
+
+                //
+                // Please do not insert any Random NOPs while constructing this VSD call
+                //
+                getEmitter()->emitDisableRandomNops();
+
+                if (!fTailCall)
+                {
+                    // This is code to set up an indirect call to a stub address computed
+                    // via dictionary lookup.  However the dispatch stub receivers aren't set up
+                    // to accept such calls at the moment.
+                    if (callType == CT_INDIRECT)
+                    {
+                        regNumber indReg;
+
+                        // -------------------------------------------------------------------------
+                        // The importer decided we needed a stub call via a computed
+                        // stub dispatch address, i.e. an address which came from a dictionary lookup.
+                        //   - The dictionary lookup produces an indirected address, suitable for call
+                        //     via "call [REG_VIRTUAL_STUB_PARAM]"
+                        //
+                        // This combination will only be generated for shared generic code and when
+                        // stub dispatch is active.
+
+                        // No need to null check the this pointer - the dispatch code will deal with this.
+
+                        noway_assert(genStillAddressable(call->gtCall.gtCallAddr));
+
+                        // Now put the address in REG_VIRTUAL_STUB_PARAM.
+                        // This is typically a nop when the register used for
+                        // the gtCallAddr is REG_VIRTUAL_STUB_PARAM
+                        //
+                        inst_RV_TT(INS_mov, REG_VIRTUAL_STUB_PARAM, call->gtCall.gtCallAddr);
+                        regTracker.rsTrackRegTrash(REG_VIRTUAL_STUB_PARAM);
+
+#if defined(_TARGET_X86_)
+                        // Emit enough bytes of nops so that this sequence can be distinguished
+                        // from other virtual stub dispatch calls.
+                        //
+                        // NOTE: THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN
+                        //        vm\i386\cGenCpu.h, esp. isCallRegisterIndirect.
+                        //
+                        getEmitter()->emitIns_Nop(3);
+
+                        // Make the virtual stub call:
+                        //     call   [REG_VIRTUAL_STUB_PARAM]
+                        //
+                        emitCallType = emitter::EC_INDIR_ARD;
+
+                        indReg = REG_VIRTUAL_STUB_PARAM;
+                        genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
+
+#elif CPU_LOAD_STORE_ARCH // ARM doesn't allow us to use an indirection for the call
+
+                        genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
+
+                        // Make the virtual stub call:
+                        //     ldr   indReg, [REG_VIRTUAL_STUB_PARAM]
+                        //     call  indReg
+                        //
+                        emitCallType = emitter::EC_INDIR_R;
+
+                        // Now dereference [REG_VIRTUAL_STUB_PARAM] and put it in a new temp register 'indReg'
+                        //
+                        indReg = regSet.rsGrabReg(RBM_ALLINT & ~RBM_VIRTUAL_STUB_PARAM);
+                        assert(call->gtCall.gtCallAddr->gtFlags & GTF_REG_VAL);
+                        getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indReg, REG_VIRTUAL_STUB_PARAM, 0);
+                        regTracker.rsTrackRegTrash(indReg);
+
+#else
+#error "Unknown target for VSD call"
+#endif
+
+                        getEmitter()->emitIns_Call(emitCallType,
+                                                   NULL,                                // methHnd
+                                                   INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
+                                                   args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+                                                   gcInfo.gcRegByrefSetCur, ilOffset, indReg);
+                    }
+                    else
+                    {
+                        // -------------------------------------------------------------------------
+                        // Check for a direct stub call.
+                        //
+
+                        // Get stub addr. This will return NULL if virtual call stubs are not active
+                        void* stubAddr = NULL;
+
+                        stubAddr = (void*)call->gtCall.gtStubCallStubAddr;
+
+                        noway_assert(stubAddr != NULL);
+
+                        // -------------------------------------------------------------------------
+                        // Direct stub calls, though the stubAddr itself may still need to be
+                        // accesed via an indirection.
+                        //
+
+                        // No need to null check - the dispatch code will deal with null this.
+
+                        emitter::EmitCallType callTypeStubAddr = emitter::EC_FUNC_ADDR;
+                        void*                 addr             = stubAddr;
+                        int                   disp             = 0;
+                        regNumber             callReg          = REG_NA;
+
+                        if (call->gtCall.gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT)
+                        {
+#if CPU_LOAD_STORE_ARCH
+                            callReg = regSet.rsGrabReg(RBM_VIRTUAL_STUB_PARAM);
+                            noway_assert(callReg == REG_VIRTUAL_STUB_PARAM);
+
+                            instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_VIRTUAL_STUB_PARAM, (ssize_t)stubAddr);
+                            // The stub will write-back to this register, so don't track it
+                            regTracker.rsTrackRegTrash(REG_VIRTUAL_STUB_PARAM);
+                            getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, REG_JUMP_THUNK_PARAM,
+                                                        REG_VIRTUAL_STUB_PARAM, 0);
+                            regTracker.rsTrackRegTrash(REG_JUMP_THUNK_PARAM);
+                            callTypeStubAddr = emitter::EC_INDIR_R;
+                            getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
+                                                       NULL,                                // methHnd
+                                                       INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
+                                                       args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+                                                       gcInfo.gcRegByrefSetCur, ilOffset, REG_JUMP_THUNK_PARAM);
+
+#else
+                            // emit an indirect call
+                            callTypeStubAddr = emitter::EC_INDIR_C;
+                            addr             = 0;
+                            disp             = (ssize_t)stubAddr;
+#endif
+                        }
+#if CPU_LOAD_STORE_ARCH
+                        if (callTypeStubAddr != emitter::EC_INDIR_R)
+#endif
+                        {
+                            getEmitter()->emitIns_Call(callTypeStubAddr, call->gtCall.gtCallMethHnd,
+                                                       INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize,
+                                                       gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+                                                       gcInfo.gcRegByrefSetCur, ilOffset, callReg, REG_NA, 0, disp);
+                        }
+                    }
+                }
+                else // tailCall is true
+                {
+
+// Non-X86 tail calls materialize the null-check in fgMorphTailCall, when it
+// moves the this pointer out of it's usual place and into the argument list.
+#ifdef _TARGET_X86_
+
+                    // Generate "cmp ECX, [ECX]" to trap null pointers
+                    const regNumber regThis = genGetThisArgReg(call);
+                    getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0);
+
+#endif // _TARGET_X86_
+
+                    if (callType == CT_INDIRECT)
+                    {
+                        noway_assert(genStillAddressable(call->gtCall.gtCallAddr));
+
+                        // Now put the address in EAX.
+                        inst_RV_TT(INS_mov, REG_TAILCALL_ADDR, call->gtCall.gtCallAddr);
+                        regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
+
+                        genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
+                    }
+                    else
+                    {
+                        // importer/EE should guarantee the indirection
+                        noway_assert(call->gtCall.gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT);
+
+                        instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_TAILCALL_ADDR,
+                                               ssize_t(call->gtCall.gtStubCallStubAddr));
+                    }
+
+                    fTailCallTargetIsVSD = true;
+                }
+
+                //
+                // OK to start inserting random NOPs again
+                //
+                getEmitter()->emitEnableRandomNops();
+            }
+            break;
+
+            case GTF_CALL_VIRT_VTABLE:
+                // stub dispatching is off or this is not a virtual call (could be a tailcall)
+                {
+                    regNumber vptrReg;
+                    unsigned  vtabOffsOfIndirection;
+                    unsigned  vtabOffsAfterIndirection;
+
+                    noway_assert(callType == CT_USER_FUNC);
+
+                    vptrReg =
+                        regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL indirection
+                    vptrMask = genRegMask(vptrReg);
+
+                    /* The register no longer holds a live pointer value */
+                    gcInfo.gcMarkRegSetNpt(vptrMask);
+
+                    // MOV vptrReg, [REG_CALL_THIS + offs]
+                    getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, genGetThisArgReg(call),
+                                               VPTR_OFFS);
+                    regTracker.rsTrackRegTrash(vptrReg);
+
+                    noway_assert(vptrMask & ~call->gtCall.gtCallRegUsedMask);
+
+                    /* Get hold of the vtable offset (note: this might be expensive) */
+
+                    compiler->info.compCompHnd->getMethodVTableOffset(call->gtCall.gtCallMethHnd,
+                                                                      &vtabOffsOfIndirection,
+                                                                      &vtabOffsAfterIndirection);
+
+                    /* Get the appropriate vtable chunk */
+
+                    /* The register no longer holds a live pointer value */
+                    gcInfo.gcMarkRegSetNpt(vptrMask);
+
+                    // MOV vptrReg, [REG_CALL_IND_SCRATCH + vtabOffsOfIndirection]
+                    getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, vptrReg,
+                                               vtabOffsOfIndirection);
+
+                    /* Call through the appropriate vtable slot */
+
+                    if (fTailCall)
+                    {
+                        /* Load the function address: "[vptrReg+vtabOffs] -> reg_intret" */
+
+                        getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_TAILCALL_ADDR, vptrReg,
+                                                   vtabOffsAfterIndirection);
+                    }
+                    else
+                    {
+#if CPU_LOAD_STORE_ARCH
+                        getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, vptrReg, vptrReg,
+                                                   vtabOffsAfterIndirection);
+
+                        getEmitter()->emitIns_Call(emitter::EC_INDIR_R, call->gtCall.gtCallMethHnd,
+                                                   INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
+                                                   args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+                                                   gcInfo.gcRegByrefSetCur, ilOffset,
+                                                   vptrReg); // ireg
+#else
+                        getEmitter()->emitIns_Call(emitter::EC_FUNC_VIRTUAL, call->gtCall.gtCallMethHnd,
+                                                   INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
+                                                   args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+                                                   gcInfo.gcRegByrefSetCur, ilOffset,
+                                                   vptrReg,                   // ireg
+                                                   REG_NA,                    // xreg
+                                                   0,                         // xmul
+                                                   vtabOffsAfterIndirection); // disp
+#endif // CPU_LOAD_STORE_ARCH
+                    }
+                }
+                break;
+
+            case GTF_CALL_NONVIRT:
+            {
+                //------------------------ Non-virtual/Indirect calls -------------------------
+                // Lots of cases follow
+                //    - Direct P/Invoke calls
+                //    - Indirect calls to P/Invoke functions via the P/Invoke stub
+                //    - Direct Helper calls
+                //    - Indirect Helper calls
+                //    - Direct calls to known addresses
+                //    - Direct calls where address is accessed by one or two indirections
+                //    - Indirect calls to computed addresses
+                //    - Tailcall versions of all of the above
+
+                CORINFO_METHOD_HANDLE methHnd = call->gtCall.gtCallMethHnd;
+
+                //------------------------------------------------------
+                // Non-virtual/Indirect calls: Insert a null check on the "this" pointer if needed
+                //
+                // For (final and private) functions which were called with
+                //  invokevirtual, but which we call directly, we need to
+                //  dereference the object pointer to make sure it's not NULL.
+                //
+
+                if (call->gtFlags & GTF_CALL_NULLCHECK)
+                {
+                    /* Generate "cmp ECX, [ECX]" to trap null pointers */
+                    const regNumber regThis = genGetThisArgReg(call);
+#if CPU_LOAD_STORE_ARCH
+                    regNumber indReg =
+                        regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the indirection
+                    getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, regThis, 0);
+                    regTracker.rsTrackRegTrash(indReg);
+#else
+                    getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0);
+#endif
+                }
+
+                if (call->gtFlags & GTF_CALL_UNMANAGED)
+                {
+                    //------------------------------------------------------
+                    // Non-virtual/Indirect calls: PInvoke calls.
+
+                    noway_assert(compiler->info.compCallUnmanaged != 0);
+
+                    /* args shouldn't be greater than 64K */
+
+                    noway_assert((argSize & 0xffff0000) == 0);
+
+                    /* Remember the varDsc for the callsite-epilog */
+
+                    frameListRoot = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
+
+                    // exact codegen is required
+                    getEmitter()->emitDisableRandomNops();
+
+                    int nArgSize = 0;
+
+                    regNumber indCallReg = REG_NA;
+
+                    if (callType == CT_INDIRECT)
+                    {
+                        noway_assert(genStillAddressable(call->gtCall.gtCallAddr));
+
+                        if (call->gtCall.gtCallAddr->gtFlags & GTF_REG_VAL)
+                            indCallReg = call->gtCall.gtCallAddr->gtRegNum;
+
+                        nArgSize = (call->gtFlags & GTF_CALL_POP_ARGS) ? 0 : (int)argSize;
+                        methHnd  = 0;
+                    }
+                    else
+                    {
+                        noway_assert(callType == CT_USER_FUNC);
+                    }
+
+                    regNumber tcbReg;
+                    tcbReg = genPInvokeCallProlog(frameListRoot, nArgSize, methHnd, returnLabel);
+
+                    void* addr = NULL;
+
+                    if (callType == CT_INDIRECT)
+                    {
+                        /* Double check that the callee didn't use/trash the
+                           registers holding the call target.
+                        */
+                        noway_assert(tcbReg != indCallReg);
+
+                        if (indCallReg == REG_NA)
+                        {
+                            indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
+                                                                       // indirection
+
+                            /* Please note that this even works with tcbReg == REG_EAX.
+                            tcbReg contains an interesting value only if frameListRoot is
+                            an enregistered local that stays alive across the call
+                            (certainly not EAX). If frameListRoot has been moved into
+                            EAX, we can trash it since it won't survive across the call
+                            anyways.
+                            */
+
+                            inst_RV_TT(INS_mov, indCallReg, call->gtCall.gtCallAddr);
+                            regTracker.rsTrackRegTrash(indCallReg);
+                        }
+
+                        emitCallType = emitter::EC_INDIR_R;
+                    }
+                    else
+                    {
+                        noway_assert(callType == CT_USER_FUNC);
+
+                        void* pAddr;
+                        addr = compiler->info.compCompHnd->getAddressOfPInvokeFixup(methHnd, (void**)&pAddr);
+                        if (addr != NULL)
+                        {
+#if CPU_LOAD_STORE_ARCH
+                            // Load the address into a register, indirect it and call  through a register
+                            indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
+                                                                       // indirection
+                            instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
+                            getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
+                            regTracker.rsTrackRegTrash(indCallReg);
+                            // Now make the call "call indCallReg"
+
+                            getEmitter()->emitIns_Call(emitter::EC_INDIR_R,
+                                                       methHnd,                       // methHnd
+                                                       INDEBUG_LDISASM_COMMA(sigInfo) // sigInfo
+                                                       NULL,                          // addr
+                                                       args,
+                                                       retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+                                                       gcInfo.gcRegByrefSetCur, ilOffset, indCallReg);
+
+                            emitCallType = emitter::EC_INDIR_R;
+                            break;
+#else
+                            emitCallType = emitter::EC_FUNC_TOKEN_INDIR;
+                            indCallReg   = REG_NA;
+#endif
+                        }
+                        else
+                        {
+                            // Double-indirection. Load the address into a register
+                            // and call indirectly through a register
+                            indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
+                                                                       // indirection
+
+#if CPU_LOAD_STORE_ARCH
+                            instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)pAddr);
+                            getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
+                            getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
+                            regTracker.rsTrackRegTrash(indCallReg);
+
+                            emitCallType = emitter::EC_INDIR_R;
+
+#else
+                            getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)pAddr);
+                            regTracker.rsTrackRegTrash(indCallReg);
+                            emitCallType = emitter::EC_INDIR_ARD;
+
+#endif // CPU_LOAD_STORE_ARCH
+                        }
+                    }
+
+                    getEmitter()->emitIns_Call(emitCallType, compiler->eeMarkNativeTarget(methHnd),
+                                               INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize,
+                                               gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
+                                               ilOffset, indCallReg);
+
+                    if (callType == CT_INDIRECT)
+                        genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
+
+                    getEmitter()->emitEnableRandomNops();
+
+                    // Done with PInvoke calls
+                    break;
+                }
+
+                if (callType == CT_INDIRECT)
+                {
+                    noway_assert(genStillAddressable(call->gtCall.gtCallAddr));
+
+                    if (call->gtCall.gtCallCookie)
+                    {
+                        //------------------------------------------------------
+                        // Non-virtual indirect calls via the P/Invoke stub
+
+                        GenTreePtr cookie = call->gtCall.gtCallCookie;
+                        GenTreePtr target = call->gtCall.gtCallAddr;
+
+                        noway_assert((call->gtFlags & GTF_CALL_POP_ARGS) == 0);
+
+                        noway_assert(cookie->gtOper == GT_CNS_INT ||
+                                     cookie->gtOper == GT_IND && cookie->gtOp.gtOp1->gtOper == GT_CNS_INT);
+
+                        noway_assert(args == argSize);
+
+#if defined(_TARGET_X86_)
+                        /* load eax with the real target */
+
+                        inst_RV_TT(INS_mov, REG_EAX, target);
+                        regTracker.rsTrackRegTrash(REG_EAX);
+
+                        if (cookie->gtOper == GT_CNS_INT)
+                            inst_IV_handle(INS_push, cookie->gtIntCon.gtIconVal);
+                        else
+                            inst_TT(INS_push, cookie);
+
+                        /* Keep track of ESP for EBP-less frames */
+                        genSinglePush();
+
+                        argSize += sizeof(void*);
+
+#elif defined(_TARGET_ARM_)
+
+                        // Ensure that we spill these registers (if caller saved) in the prolog
+                        regSet.rsSetRegsModified(RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
+
+                        // ARM: load r12 with the real target
+                        // X64: load r10 with the real target
+                        inst_RV_TT(INS_mov, REG_PINVOKE_TARGET_PARAM, target);
+                        regTracker.rsTrackRegTrash(REG_PINVOKE_TARGET_PARAM);
+
+                        // ARM: load r4  with the pinvoke VASigCookie
+                        // X64: load r11 with the pinvoke VASigCookie
+                        if (cookie->gtOper == GT_CNS_INT)
+                            inst_RV_IV(INS_mov, REG_PINVOKE_COOKIE_PARAM, cookie->gtIntCon.gtIconVal,
+                                       EA_HANDLE_CNS_RELOC);
+                        else
+                            inst_RV_TT(INS_mov, REG_PINVOKE_COOKIE_PARAM, cookie);
+                        regTracker.rsTrackRegTrash(REG_PINVOKE_COOKIE_PARAM);
+
+                        noway_assert(args == argSize);
+
+                        // Ensure that we don't trash any of these registers if we have to load
+                        // the helper call target into a register to invoke it.
+                        regMaskTP regsUsed;
+                        regSet.rsLockReg(call->gtCall.gtCallRegUsedMask | RBM_PINVOKE_TARGET_PARAM |
+                                             RBM_PINVOKE_COOKIE_PARAM,
+                                         &regsUsed);
+#else
+                        NYI("Non-virtual indirect calls via the P/Invoke stub");
+#endif
+
+                        args = argSize;
+                        noway_assert((size_t)(int)args == args);
+
+                        genEmitHelperCall(CORINFO_HELP_PINVOKE_CALLI, (int)args, retSize);
+
+#if defined(_TARGET_ARM_)
+                        regSet.rsUnlockReg(call->gtCall.gtCallRegUsedMask | RBM_PINVOKE_TARGET_PARAM |
+                                               RBM_PINVOKE_COOKIE_PARAM,
+                                           regsUsed);
+#endif
+
+#ifdef _TARGET_ARM_
+                        // genEmitHelperCall doesn't record all registers a helper call would trash.
+                        regTracker.rsTrackRegTrash(REG_PINVOKE_COOKIE_PARAM);
+#endif
+                    }
+                    else
+                    {
+                        //------------------------------------------------------
+                        // Non-virtual indirect calls
+
+                        if (fTailCall)
+                        {
+                            inst_RV_TT(INS_mov, REG_TAILCALL_ADDR, call->gtCall.gtCallAddr);
+                            regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
+                        }
+                        else
+                            instEmit_indCall(call, args, retSize);
+                    }
+
+                    genDoneAddressable(call->gtCall.gtCallAddr, fptrRegs, RegSet::KEEP_REG);
+
+                    // Done with indirect calls
+                    break;
+                }
+
+                //------------------------------------------------------
+                // Non-virtual direct/indirect calls: Work out if the address of the
+                // call is known at JIT time (if not it is either an indirect call
+                // or the address must be accessed via an single/double indirection)
+
+                noway_assert(callType == CT_USER_FUNC || callType == CT_HELPER);
+
+                void*          addr;
+                InfoAccessType accessType;
+
+                helperNum = compiler->eeGetHelperNum(methHnd);
+
+                if (callType == CT_HELPER)
+                {
+                    noway_assert(helperNum != CORINFO_HELP_UNDEF);
+
+                    void* pAddr;
+                    addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
+
+                    accessType = IAT_VALUE;
+
+                    if (!addr)
+                    {
+                        accessType = IAT_PVALUE;
+                        addr       = pAddr;
+                    }
+                }
+                else
+                {
+                    noway_assert(helperNum == CORINFO_HELP_UNDEF);
+
+                    CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY;
+
+                    if (call->gtCall.gtCallMoreFlags & GTF_CALL_M_NONVIRT_SAME_THIS)
+                        aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS);
+
+                    if ((call->gtFlags & GTF_CALL_NULLCHECK) == 0)
+                        aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL);
+
+                    CORINFO_CONST_LOOKUP addrInfo;
+                    compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo, aflags);
+
+                    accessType = addrInfo.accessType;
+                    addr       = addrInfo.addr;
+                }
+
+                if (fTailCall)
+                {
+                    noway_assert(callType == CT_USER_FUNC);
+
+                    switch (accessType)
+                    {
+                        case IAT_VALUE:
+                            //------------------------------------------------------
+                            // Non-virtual direct calls to known addressess
+                            //
+                            instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr);
+                            break;
+
+                        case IAT_PVALUE:
+                            //------------------------------------------------------
+                            // Non-virtual direct calls to addresses accessed by
+                            // a single indirection.
+                            //
+                            // For tailcalls we place the target address in REG_TAILCALL_ADDR
+                            CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if CPU_LOAD_STORE_ARCH
+                            {
+                                regNumber indReg = REG_TAILCALL_ADDR;
+                                instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indReg, (ssize_t)addr);
+                                getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
+                                regTracker.rsTrackRegTrash(indReg);
+                            }
+#else
+                            getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr);
+                            regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
+#endif
+                            break;
+
+                        case IAT_PPVALUE:
+                            //------------------------------------------------------
+                            // Non-virtual direct calls to addresses accessed by
+                            // a double indirection.
+                            //
+                            // For tailcalls we place the target address in REG_TAILCALL_ADDR
+                            CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if CPU_LOAD_STORE_ARCH
+                            {
+                                regNumber indReg = REG_TAILCALL_ADDR;
+                                instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indReg, (ssize_t)addr);
+                                getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
+                                getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, indReg, indReg, 0);
+                                regTracker.rsTrackRegTrash(indReg);
+                            }
+#else
+                            getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, REG_TAILCALL_ADDR, (ssize_t)addr);
+                            getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_TAILCALL_ADDR,
+                                                       REG_TAILCALL_ADDR, 0);
+                            regTracker.rsTrackRegTrash(REG_TAILCALL_ADDR);
+#endif
+                            break;
+
+                        default:
+                            noway_assert(!"Bad accessType");
+                            break;
+                    }
+                }
+                else
+                {
+                    switch (accessType)
+                    {
+                        regNumber indCallReg;
+
+                        case IAT_VALUE:
+                            //------------------------------------------------------
+                            // Non-virtual direct calls to known addressess
+                            //
+                            // The vast majority of calls end up here....  Wouldn't
+                            // it be nice if they all did!
+                            CLANG_FORMAT_COMMENT_ANCHOR;
+#ifdef _TARGET_ARM_
+                            if (!arm_Valid_Imm_For_BL((ssize_t)addr))
+                            {
+                                // Load the address into a register and call  through a register
+                                indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the
+                                                                           // CALL indirection
+                                instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
+
+                                getEmitter()->emitIns_Call(emitter::EC_INDIR_R, methHnd,
+                                                           INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
+                                                           args, retSize, gcInfo.gcVarPtrSetCur,
+                                                           gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset,
+                                                           indCallReg,   // ireg
+                                                           REG_NA, 0, 0, // xreg, xmul, disp
+                                                           false,        // isJump
+                                                           emitter::emitNoGChelper(helperNum));
+                            }
+                            else
+#endif
+                            {
+                                getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, methHnd,
+                                                           INDEBUG_LDISASM_COMMA(sigInfo) addr, args, retSize,
+                                                           gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+                                                           gcInfo.gcRegByrefSetCur, ilOffset, REG_NA, REG_NA, 0,
+                                                           0,     /* ireg, xreg, xmul, disp */
+                                                           false, /* isJump */
+                                                           emitter::emitNoGChelper(helperNum));
+                            }
+                            break;
+
+                        case IAT_PVALUE:
+                            //------------------------------------------------------
+                            // Non-virtual direct calls to addresses accessed by
+                            // a single indirection.
+                            //
+
+                            // Load the address into a register, load indirect and call  through a register
+                            CLANG_FORMAT_COMMENT_ANCHOR;
+#if CPU_LOAD_STORE_ARCH
+                            indCallReg = regSet.rsGrabReg(RBM_ALLINT); // Grab an available register to use for the CALL
+                                                                       // indirection
+
+                            instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
+                            getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
+                            regTracker.rsTrackRegTrash(indCallReg);
+
+                            emitCallType = emitter::EC_INDIR_R;
+                            addr         = NULL;
+
+#else
+                            emitCallType = emitter::EC_FUNC_TOKEN_INDIR;
+                            indCallReg   = REG_NA;
+
+#endif // CPU_LOAD_STORE_ARCH
+
+                            getEmitter()->emitIns_Call(emitCallType, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, args,
+                                                       retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+                                                       gcInfo.gcRegByrefSetCur, ilOffset,
+                                                       indCallReg,   // ireg
+                                                       REG_NA, 0, 0, // xreg, xmul, disp
+                                                       false,        /* isJump */
+                                                       emitter::emitNoGChelper(helperNum));
+                            break;
+
+                        case IAT_PPVALUE:
+                        {
+                            //------------------------------------------------------
+                            // Non-virtual direct calls to addresses accessed by
+                            // a double indirection.
+                            //
+                            // Double-indirection. Load the address into a register
+                            // and call indirectly through the register
+
+                            noway_assert(helperNum == CORINFO_HELP_UNDEF);
+
+                            // Grab an available register to use for the CALL indirection
+                            indCallReg = regSet.rsGrabReg(RBM_ALLINT);
+
+#if CPU_LOAD_STORE_ARCH
+                            instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, indCallReg, (ssize_t)addr);
+                            getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
+                            getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, indCallReg, indCallReg, 0);
+                            regTracker.rsTrackRegTrash(indCallReg);
+
+                            emitCallType = emitter::EC_INDIR_R;
+
+#else
+
+                            getEmitter()->emitIns_R_AI(INS_mov, EA_PTR_DSP_RELOC, indCallReg, (ssize_t)addr);
+                            regTracker.rsTrackRegTrash(indCallReg);
+
+                            emitCallType = emitter::EC_INDIR_ARD;
+
+#endif // CPU_LOAD_STORE_ARCH
+
+                            getEmitter()->emitIns_Call(emitCallType, methHnd,
+                                                       INDEBUG_LDISASM_COMMA(sigInfo) NULL, // addr
+                                                       args, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+                                                       gcInfo.gcRegByrefSetCur, ilOffset,
+                                                       indCallReg,   // ireg
+                                                       REG_NA, 0, 0, // xreg, xmul, disp
+                                                       false,        // isJump
+                                                       emitter::emitNoGChelper(helperNum));
+                        }
+                        break;
+
+                        default:
+                            noway_assert(!"Bad accessType");
+                            break;
+                    }
+
+                    // tracking of region protected by the monitor in synchronized methods
+                    if ((helperNum != CORINFO_HELP_UNDEF) && (compiler->info.compFlags & CORINFO_FLG_SYNCH))
+                    {
+                        fPossibleSyncHelperCall = true;
+                    }
+                }
+            }
+            break;
+
+            default:
+                noway_assert(!"strange call type");
+                break;
+        }
+
+    /*-------------------------------------------------------------------------
+     *  For tailcalls, REG_INTRET contains the address of the target function,
+     *  enregistered args are in the correct registers, and the stack arguments
+     *  have been pushed on the stack. Now call the stub-sliding helper
+     */
+
+    if (fTailCall)
+    {
+
+        if (compiler->info.compCallUnmanaged)
+            genPInvokeMethodEpilog();
+
+#ifdef _TARGET_X86_
+        noway_assert(0 <= (ssize_t)args); // caller-pop args not supported for tailcall
+
+        // Push the count of the incoming stack arguments
+
+        unsigned nOldStkArgs =
+            (unsigned)((compiler->compArgSize - (intRegState.rsCalleeRegArgCount * sizeof(void*))) / sizeof(void*));
+        getEmitter()->emitIns_I(INS_push, EA_4BYTE, nOldStkArgs);
+        genSinglePush(); // Keep track of ESP for EBP-less frames
+        args += sizeof(void*);
+
+        // Push the count of the outgoing stack arguments
+
+        getEmitter()->emitIns_I(INS_push, EA_4BYTE, argSize / sizeof(void*));
+        genSinglePush(); // Keep track of ESP for EBP-less frames
+        args += sizeof(void*);
+
+        // Push info about the callee-saved registers to be restored
+        // For now, we always spill all registers if compiler->compTailCallUsed
+
+        DWORD calleeSavedRegInfo = 1 |                                 // always restore EDI,ESI,EBX
+                                   (fTailCallTargetIsVSD ? 0x2 : 0x0); // Stub dispatch flag
+        getEmitter()->emitIns_I(INS_push, EA_4BYTE, calleeSavedRegInfo);
+        genSinglePush(); // Keep track of ESP for EBP-less frames
+        args += sizeof(void*);
+
+        // Push the address of the target function
+
+        getEmitter()->emitIns_R(INS_push, EA_4BYTE, REG_TAILCALL_ADDR);
+        genSinglePush(); // Keep track of ESP for EBP-less frames
+        args += sizeof(void*);
+
+#else // _TARGET_X86_
+
+        args    = 0;
+        retSize = EA_UNKNOWN;
+
+#endif // _TARGET_X86_
+
+        if (compiler->getNeedsGSSecurityCookie())
+        {
+            genEmitGSCookieCheck(true);
+        }
+
+        // TailCall helper does not poll for GC. An explicit GC poll
+        // Should have been placed in when we morphed this into a tail call.
+        noway_assert(compiler->compCurBB->bbFlags & BBF_GC_SAFE_POINT);
+
+        // Now call the helper
+
+        genEmitHelperCall(CORINFO_HELP_TAILCALL, (int)args, retSize);
+    }
+
+    /*-------------------------------------------------------------------------
+     *  Done with call.
+     *  Trash registers, pop arguments if needed, etc
+     */
+
+    /* Mark the argument registers as free */
+
+    noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
+
+    for (areg = 0; areg < MAX_REG_ARG; areg++)
+    {
+        regMaskTP curArgMask = genMapArgNumToRegMask(areg, TYP_INT);
+
+        // Is this one of the used argument registers?
+        if ((curArgMask & call->gtCall.gtCallRegUsedMask) == 0)
+            continue;
+
+#ifdef _TARGET_ARM_
+        if (regSet.rsUsedTree[areg] == NULL)
+        {
+            noway_assert(areg % 2 == 1 &&
+                         (((areg + 1) >= MAX_REG_ARG) || (regSet.rsUsedTree[areg + 1]->TypeGet() == TYP_STRUCT) ||
+                          (genTypeStSz(regSet.rsUsedTree[areg + 1]->TypeGet()) == 2)));
+            continue;
+        }
+#endif
+
+        regSet.rsMarkRegFree(curArgMask);
+
+        // We keep regSet.rsMaskVars current during codegen, so we have to remove any
+        // that have been copied into arg regs.
+
+        regSet.RemoveMaskVars(curArgMask);
+        gcInfo.gcRegGCrefSetCur &= ~(curArgMask);
+        gcInfo.gcRegByrefSetCur &= ~(curArgMask);
+    }
+
+#if !FEATURE_STACK_FP_X87
+    //-------------------------------------------------------------------------
+    // free up the FP args
+
+    for (areg = 0; areg < MAX_FLOAT_REG_ARG; areg++)
+    {
+        regNumber argRegNum  = genMapRegArgNumToRegNum(areg, TYP_FLOAT);
+        regMaskTP curArgMask = genMapArgNumToRegMask(areg, TYP_FLOAT);
+
+        // Is this one of the used argument registers?
+        if ((curArgMask & call->gtCall.gtCallRegUsedMask) == 0)
+            continue;
+
+        regSet.rsMaskUsed &= ~curArgMask;
+        regSet.rsUsedTree[argRegNum] = NULL;
+    }
+#endif // !FEATURE_STACK_FP_X87
+
+    /* restore the old argument register status */
+
+    intRegState.rsCurRegArgNum   = savCurIntArgReg;
+    floatRegState.rsCurRegArgNum = savCurFloatArgReg;
+
+    noway_assert(intRegState.rsCurRegArgNum <= MAX_REG_ARG);
+
+    /* Mark all trashed registers as such */
+
+    if (calleeTrashedRegs)
+        regTracker.rsTrashRegSet(calleeTrashedRegs);
+
+    regTracker.rsTrashRegsForGCInterruptability();
+
+#ifdef DEBUG
+
+    if (!(call->gtFlags & GTF_CALL_POP_ARGS))
+    {
+        if (compiler->verbose)
+        {
+            printf("\t\t\t\t\t\t\tEnd call ");
+            Compiler::printTreeID(call);
+            printf(" stack %02u [E=%02u] argSize=%u\n", saveStackLvl, getEmitter()->emitCurStackLvl, argSize);
+        }
+        noway_assert(stackLvl == getEmitter()->emitCurStackLvl);
+    }
+
+#endif
+
+#if FEATURE_STACK_FP_X87
+    /* All float temps must be spilled around function calls */
+    if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
+    {
+        noway_assert(compCurFPState.m_uStackSize == 1);
+    }
+    else
+    {
+        noway_assert(compCurFPState.m_uStackSize == 0);
+    }
+#else
+    if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
+    {
+#ifdef _TARGET_ARM_
+        if (call->gtCall.IsVarargs() || compiler->opts.compUseSoftFP)
+        {
+            // Result return for vararg methods is in r0, r1, but our callers would
+            // expect the return in s0, s1 because of floating type. Do the move now.
+            if (call->gtType == TYP_FLOAT)
+            {
+                inst_RV_RV(INS_vmov_i2f, REG_FLOATRET, REG_INTRET, TYP_FLOAT, EA_4BYTE);
+            }
+            else
+            {
+                inst_RV_RV_RV(INS_vmov_i2d, REG_FLOATRET, REG_INTRET, REG_NEXT(REG_INTRET), EA_8BYTE);
+            }
+        }
+#endif
+        genMarkTreeInReg(call, REG_FLOATRET);
+    }
+#endif
+
+    /* The function will pop all arguments before returning */
+
+    genStackLevel = saveStackLvl;
+
+    /* No trashed registers may possibly hold a pointer at this point */
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+
+    regMaskTP ptrRegs = (gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & (calleeTrashedRegs & RBM_ALLINT) &
+                        ~regSet.rsMaskVars & ~vptrMask;
+    if (ptrRegs)
+    {
+        // A reg may be dead already.  The assertion is too strong.
+        LclVarDsc* varDsc;
+        unsigned   varNum;
+
+        // use compiler->compCurLife
+        for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount && ptrRegs != 0; varNum++, varDsc++)
+        {
+            /* Ignore the variable if it's not tracked, not in a register, or a floating-point type */
+
+            if (!varDsc->lvTracked)
+                continue;
+            if (!varDsc->lvRegister)
+                continue;
+            if (varDsc->IsFloatRegType())
+                continue;
+
+            /* Get hold of the index and the bitmask for the variable */
+
+            unsigned varIndex = varDsc->lvVarIndex;
+
+            /* Is this variable live currently? */
+
+            if (!VarSetOps::IsMember(compiler, compiler->compCurLife, varIndex))
+            {
+                regNumber regNum  = varDsc->lvRegNum;
+                regMaskTP regMask = genRegMask(regNum);
+
+                if (varDsc->lvType == TYP_REF || varDsc->lvType == TYP_BYREF)
+                    ptrRegs &= ~regMask;
+            }
+        }
+        if (ptrRegs)
+        {
+            printf("Bad call handling for ");
+            Compiler::printTreeID(call);
+            printf("\n");
+            noway_assert(!"A callee trashed reg is holding a GC pointer");
+        }
+    }
+#endif
+
+#if defined(_TARGET_X86_)
+    //-------------------------------------------------------------------------
+    // Create a label for tracking of region protected by the monitor in synchronized methods.
+    // This needs to be here, rather than above where fPossibleSyncHelperCall is set,
+    // so the GC state vars have been updated before creating the label.
+
+    if (fPossibleSyncHelperCall)
+    {
+        switch (helperNum)
+        {
+            case CORINFO_HELP_MON_ENTER:
+            case CORINFO_HELP_MON_ENTER_STATIC:
+                noway_assert(compiler->syncStartEmitCookie == NULL);
+                compiler->syncStartEmitCookie =
+                    getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
+                noway_assert(compiler->syncStartEmitCookie != NULL);
+                break;
+            case CORINFO_HELP_MON_EXIT:
+            case CORINFO_HELP_MON_EXIT_STATIC:
+                noway_assert(compiler->syncEndEmitCookie == NULL);
+                compiler->syncEndEmitCookie =
+                    getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
+                noway_assert(compiler->syncEndEmitCookie != NULL);
+                break;
+            default:
+                break;
+        }
+    }
+#endif // _TARGET_X86_
+
+    if (call->gtFlags & GTF_CALL_UNMANAGED)
+    {
+        genDefineTempLabel(returnLabel);
+
+#ifdef _TARGET_X86_
+        if (getInlinePInvokeCheckEnabled())
+        {
+            noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
+            BasicBlock* esp_check;
+
+            CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
+            /* mov   ecx, dword ptr [frame.callSiteTracker] */
+
+            getEmitter()->emitIns_R_S(INS_mov, EA_4BYTE, REG_ARG_0, compiler->lvaInlinedPInvokeFrameVar,
+                                      pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
+            regTracker.rsTrackRegTrash(REG_ARG_0);
+
+            /* Generate the conditional jump */
+
+            if (!(call->gtFlags & GTF_CALL_POP_ARGS))
+            {
+                if (argSize)
+                {
+                    getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_ARG_0, argSize);
+                }
+            }
+            /* cmp   ecx, esp */
+
+            getEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, REG_ARG_0, REG_SPBASE);
+
+            esp_check = genCreateTempLabel();
+
+            emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+            inst_JMP(jmpEqual, esp_check);
+
+            getEmitter()->emitIns(INS_BREAKPOINT);
+
+            /* genCondJump() closes the current emitter block */
+
+            genDefineTempLabel(esp_check);
+        }
+#endif
+    }
+
+    /* Are we supposed to pop the arguments? */
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_X86_)
+    if (call->gtFlags & GTF_CALL_UNMANAGED)
+    {
+        if ((compiler->opts.eeFlags & CORJIT_FLG_PINVOKE_RESTORE_ESP) ||
+            compiler->compStressCompile(Compiler::STRESS_PINVOKE_RESTORE_ESP, 50))
+        {
+            // P/Invoke signature mismatch resilience - restore ESP to pre-call value. We would ideally
+            // take care of the cdecl argument popping here as well but the stack depth tracking logic
+            // makes this very hard, i.e. it needs to "see" the actual pop.
+
+            CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
+
+            if (argSize == 0 || (call->gtFlags & GTF_CALL_POP_ARGS))
+            {
+                /* mov   esp, dword ptr [frame.callSiteTracker] */
+                getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE,
+                                          compiler->lvaInlinedPInvokeFrameVar,
+                                          pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
+            }
+            else
+            {
+                /* mov   ecx, dword ptr [frame.callSiteTracker] */
+                getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0,
+                                          compiler->lvaInlinedPInvokeFrameVar,
+                                          pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
+                regTracker.rsTrackRegTrash(REG_ARG_0);
+
+                /* lea   esp, [ecx + argSize] */
+                getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_SPBASE, REG_ARG_0, (int)argSize);
+            }
+        }
+    }
+#endif // _TARGET_X86_
+
+    if (call->gtFlags & GTF_CALL_POP_ARGS)
+    {
+        noway_assert(args == (size_t) - (int)argSize);
+
+        if (argSize)
+        {
+            genAdjustSP(argSize);
+        }
+    }
+
+    if (pseudoStackLvl)
+    {
+        noway_assert(call->gtType == TYP_VOID);
+
+        /* Generate NOP */
+
+        instGen(INS_nop);
+    }
+
+    /* What does the function return? */
+
+    retVal = RBM_NONE;
+
+    switch (call->gtType)
+    {
+        case TYP_REF:
+        case TYP_ARRAY:
+        case TYP_BYREF:
+            gcInfo.gcMarkRegPtrVal(REG_INTRET, call->TypeGet());
+
+            __fallthrough;
+
+        case TYP_INT:
+#if !CPU_HAS_FP_SUPPORT
+        case TYP_FLOAT:
+#endif
+            retVal = RBM_INTRET;
+            break;
+
+#ifdef _TARGET_ARM_
+        case TYP_STRUCT:
+        {
+            assert(call->gtCall.gtRetClsHnd != NULL);
+            assert(compiler->IsHfa(call->gtCall.gtRetClsHnd));
+            int retSlots = compiler->GetHfaCount(call->gtCall.gtRetClsHnd);
+            assert(retSlots > 0 && retSlots <= MAX_HFA_RET_SLOTS);
+            assert(MAX_HFA_RET_SLOTS < sizeof(int) * 8);
+            retVal = ((1 << retSlots) - 1) << REG_FLOATRET;
+        }
+        break;
+#endif
+
+        case TYP_LONG:
+#if !CPU_HAS_FP_SUPPORT
+        case TYP_DOUBLE:
+#endif
+            retVal = RBM_LNGRET;
+            break;
+
+#if CPU_HAS_FP_SUPPORT
+        case TYP_FLOAT:
+        case TYP_DOUBLE:
+
+            break;
+#endif
+
+        case TYP_VOID:
+            break;
+
+        default:
+            noway_assert(!"unexpected/unhandled fn return type");
+    }
+
+    // We now have to generate the "call epilog" (if it was a call to unmanaged code).
+    /* if it is a call to unmanaged code, frameListRoot must be set */
+
+    noway_assert((call->gtFlags & GTF_CALL_UNMANAGED) == 0 || frameListRoot);
+
+    if (frameListRoot)
+        genPInvokeCallEpilog(frameListRoot, retVal);
+
+    if (frameListRoot && (call->gtCall.gtCallMoreFlags & GTF_CALL_M_FRAME_VAR_DEATH))
+    {
+        if (frameListRoot->lvRegister)
+        {
+            bool isBorn  = false;
+            bool isDying = true;
+            genUpdateRegLife(frameListRoot, isBorn, isDying DEBUGARG(call));
+        }
+    }
+
+#ifdef DEBUG
+    if (compiler->opts.compStackCheckOnCall
+#if defined(USE_TRANSITION_THUNKS) || defined(USE_DYNAMIC_STACK_ALIGN)
+        // check the stack as frequently as possible
+        && !call->IsHelperCall()
+#else
+        && call->gtCall.gtCallType == CT_USER_FUNC
+#endif
+            )
+    {
+        noway_assert(compiler->lvaCallEspCheck != 0xCCCCCCCC &&
+                     compiler->lvaTable[compiler->lvaCallEspCheck].lvDoNotEnregister &&
+                     compiler->lvaTable[compiler->lvaCallEspCheck].lvOnFrame);
+        if (argSize > 0)
+        {
+            getEmitter()->emitIns_R_R(INS_mov, EA_4BYTE, REG_ARG_0, REG_SPBASE);
+            getEmitter()->emitIns_R_I(INS_sub, EA_4BYTE, REG_ARG_0, argSize);
+            getEmitter()->emitIns_S_R(INS_cmp, EA_4BYTE, REG_ARG_0, compiler->lvaCallEspCheck, 0);
+            regTracker.rsTrackRegTrash(REG_ARG_0);
+        }
+        else
+            getEmitter()->emitIns_S_R(INS_cmp, EA_4BYTE, REG_SPBASE, compiler->lvaCallEspCheck, 0);
+
+        BasicBlock*  esp_check = genCreateTempLabel();
+        emitJumpKind jmpEqual  = genJumpKindForOper(GT_EQ, CK_SIGNED);
+        inst_JMP(jmpEqual, esp_check);
+        getEmitter()->emitIns(INS_BREAKPOINT);
+        genDefineTempLabel(esp_check);
+    }
+#endif // DEBUG
+
+#if FEATURE_STACK_FP_X87
+    UnspillRegVarsStackFp();
+#endif // FEATURE_STACK_FP_X87
+
+    if (call->gtType == TYP_FLOAT || call->gtType == TYP_DOUBLE)
+    {
+        // Restore return node if necessary
+        if (call->gtFlags & GTF_SPILLED)
+        {
+            UnspillFloat(call);
+        }
+
+#if FEATURE_STACK_FP_X87
+        // Mark as free
+        regSet.SetUsedRegFloat(call, false);
+#endif
+    }
+
+#if FEATURE_STACK_FP_X87
+#ifdef DEBUG
+    if (compiler->verbose)
+    {
+        JitDumpFPState();
+    }
+#endif
+#endif
+
+    return retVal;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************
+ *
+ *  Create and record GC Info for the function.
+ */
+#ifdef JIT32_GCENCODER
+void*
+#else
+void
+#endif
+CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr))
+{
+#ifdef JIT32_GCENCODER
+    return genCreateAndStoreGCInfoJIT32(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
+#else
+    genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr));
+#endif
+}
+
+#ifdef JIT32_GCENCODER
+void* CodeGen::genCreateAndStoreGCInfoJIT32(unsigned codeSize,
+                                            unsigned prologSize,
+                                            unsigned epilogSize DEBUGARG(void* codePtr))
+{
+    BYTE    headerBuf[64];
+    InfoHdr header;
+
+    int s_cached;
+#ifdef DEBUG
+    size_t headerSize =
+#endif
+        compiler->compInfoBlkSize =
+            gcInfo.gcInfoBlockHdrSave(headerBuf, 0, codeSize, prologSize, epilogSize, &header, &s_cached);
+
+    size_t argTabOffset = 0;
+    size_t ptrMapSize   = gcInfo.gcPtrTableSize(header, codeSize, &argTabOffset);
+
+#if DISPLAY_SIZES
+
+    if (genInterruptible)
+    {
+        gcHeaderISize += compiler->compInfoBlkSize;
+        gcPtrMapISize += ptrMapSize;
+    }
+    else
+    {
+        gcHeaderNSize += compiler->compInfoBlkSize;
+        gcPtrMapNSize += ptrMapSize;
+    }
+
+#endif // DISPLAY_SIZES
+
+    compiler->compInfoBlkSize += ptrMapSize;
+
+    /* Allocate the info block for the method */
+
+    compiler->compInfoBlkAddr = (BYTE*)compiler->info.compCompHnd->allocGCInfo(compiler->compInfoBlkSize);
+
+#if 0 // VERBOSE_SIZES
+    // TODO-Review: 'dataSize', below, is not defined
+
+//  if  (compiler->compInfoBlkSize > codeSize && compiler->compInfoBlkSize > 100)
+    {
+        printf("[%7u VM, %7u+%7u/%7u x86 %03u/%03u%%] %s.%s\n",
+               compiler->info.compILCodeSize,
+               compiler->compInfoBlkSize,
+               codeSize + dataSize,
+               codeSize + dataSize - prologSize - epilogSize,
+               100 * (codeSize + dataSize) / compiler->info.compILCodeSize,
+               100 * (codeSize + dataSize + compiler->compInfoBlkSize) / compiler->info.compILCodeSize,
+               compiler->info.compClassName,
+               compiler->info.compMethodName);
+    }
+
+#endif
+
+    /* Fill in the info block and return it to the caller */
+
+    void* infoPtr = compiler->compInfoBlkAddr;
+
+    /* Create the method info block: header followed by GC tracking tables */
+
+    compiler->compInfoBlkAddr +=
+        gcInfo.gcInfoBlockHdrSave(compiler->compInfoBlkAddr, -1, codeSize, prologSize, epilogSize, &header, &s_cached);
+
+    assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize);
+    compiler->compInfoBlkAddr = gcInfo.gcPtrTableSave(compiler->compInfoBlkAddr, header, codeSize, &argTabOffset);
+    assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize + ptrMapSize);
+
+#ifdef DEBUG
+
+    if (0)
+    {
+        BYTE*    temp = (BYTE*)infoPtr;
+        unsigned size = compiler->compInfoBlkAddr - temp;
+        BYTE*    ptab = temp + headerSize;
+
+        noway_assert(size == headerSize + ptrMapSize);
+
+        printf("Method info block - header [%u bytes]:", headerSize);
+
+        for (unsigned i = 0; i < size; i++)
+        {
+            if (temp == ptab)
+            {
+                printf("\nMethod info block - ptrtab [%u bytes]:", ptrMapSize);
+                printf("\n    %04X: %*c", i & ~0xF, 3 * (i & 0xF), ' ');
+            }
+            else
+            {
+                if (!(i % 16))
+                    printf("\n    %04X: ", i);
+            }
+
+            printf("%02X ", *temp++);
+        }
+
+        printf("\n");
+    }
+
+#endif // DEBUG
+
+#if DUMP_GC_TABLES
+
+    if (compiler->opts.dspGCtbls)
+    {
+        const BYTE* base = (BYTE*)infoPtr;
+        unsigned    size;
+        unsigned    methodSize;
+        InfoHdr     dumpHeader;
+
+        printf("GC Info for method %s\n", compiler->info.compFullName);
+        printf("GC info size = %3u\n", compiler->compInfoBlkSize);
+
+        size = gcInfo.gcInfoBlockHdrDump(base, &dumpHeader, &methodSize);
+        // printf("size of header encoding is %3u\n", size);
+        printf("\n");
+
+        if (compiler->opts.dspGCtbls)
+        {
+            base += size;
+            size = gcInfo.gcDumpPtrTable(base, dumpHeader, methodSize);
+            // printf("size of pointer table is %3u\n", size);
+            printf("\n");
+            noway_assert(compiler->compInfoBlkAddr == (base + size));
+        }
+    }
+
+#ifdef DEBUG
+    if (jitOpts.testMask & 128)
+    {
+        for (unsigned offs = 0; offs < codeSize; offs++)
+        {
+            gcInfo.gcFindPtrsInFrame(infoPtr, codePtr, offs);
+        }
+    }
+#endif // DEBUG
+#endif // DUMP_GC_TABLES
+
+    /* Make sure we ended up generating the expected number of bytes */
+
+    noway_assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + compiler->compInfoBlkSize);
+
+    return infoPtr;
+}
+
+#else // JIT32_GCENCODER
+
+void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr))
+{
+    IAllocator*    allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC());
+    GcInfoEncoder* gcInfoEncoder  = new (compiler, CMK_GC)
+        GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM);
+    assert(gcInfoEncoder);
+
+    // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
+    gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
+
+    // First we figure out the encoder ID's for the stack slots and registers.
+    gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS);
+    // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
+    gcInfoEncoder->FinalizeSlotIds();
+    // Now we can actually use those slot ID's to declare live ranges.
+    gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK);
+
+    gcInfoEncoder->Build();
+
+    // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
+    // let's save the values anyway for debugging purposes
+    compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
+    compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface
+}
+#endif
+
+/*****************************************************************************
+ *  For CEE_LOCALLOC
+ */
+
+regNumber CodeGen::genLclHeap(GenTreePtr size)
+{
+    noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL));
+
+    // regCnt is a register used to hold both
+    //              the amount to stack alloc (either in bytes or pointer sized words)
+    //          and the final stack alloc address to return as the result
+    //
+    regNumber regCnt = DUMMY_INIT(REG_CORRUPT);
+    var_types type   = genActualType(size->gtType);
+    emitAttr  easz   = emitTypeSize(type);
+
+#ifdef DEBUG
+    // Verify ESP
+    if (compiler->opts.compStackCheckOnRet)
+    {
+        noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
+                     compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
+                     compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
+        getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
+
+        BasicBlock*  esp_check = genCreateTempLabel();
+        emitJumpKind jmpEqual  = genJumpKindForOper(GT_EQ, CK_SIGNED);
+        inst_JMP(jmpEqual, esp_check);
+        getEmitter()->emitIns(INS_BREAKPOINT);
+        genDefineTempLabel(esp_check);
+    }
+#endif
+
+    noway_assert(isFramePointerUsed());
+    noway_assert(genStackLevel == 0); // Can't have anything on the stack
+
+    BasicBlock* endLabel = NULL;
+#if FEATURE_FIXED_OUT_ARGS
+    bool stackAdjusted = false;
+#endif
+
+    if (size->IsCnsIntOrI())
+    {
+#if FEATURE_FIXED_OUT_ARGS
+        // If we have an outgoing arg area then we must adjust the SP
+        // essentially popping off the outgoing arg area,
+        // We will restore it right before we return from this method
+        //
+        if (compiler->lvaOutgoingArgSpaceSize > 0)
+        {
+            assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) ==
+                   0); // This must be true for the stack to remain aligned
+            inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
+            stackAdjusted = true;
+        }
+#endif
+        size_t amount = size->gtIntCon.gtIconVal;
+
+        // Convert amount to be properly STACK_ALIGN and count of DWORD_PTRs
+        amount += (STACK_ALIGN - 1);
+        amount &= ~(STACK_ALIGN - 1);
+        amount >>= STACK_ALIGN_SHIFT;      // amount is number of pointer-sized words to locAlloc
+        size->gtIntCon.gtIconVal = amount; // update the GT_CNS value in the node
+
+        /* If amount is zero then return null in RegCnt */
+        if (amount == 0)
+        {
+            regCnt = regSet.rsGrabReg(RBM_ALLINT);
+            instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
+            goto DONE;
+        }
+
+        /* For small allocations we will generate up to six push 0 inline */
+        if (amount <= 6)
+        {
+            regCnt = regSet.rsGrabReg(RBM_ALLINT);
+#if CPU_LOAD_STORE_ARCH
+            regNumber regZero = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
+            // Set 'regZero' to zero
+            instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero);
+#endif
+
+            while (amount != 0)
+            {
+#if CPU_LOAD_STORE_ARCH
+                inst_IV(INS_push, (unsigned)genRegMask(regZero));
+#else
+                inst_IV(INS_push_hide, 0); // push_hide means don't track the stack
+#endif
+                amount--;
+            }
+
+            regTracker.rsTrackRegTrash(regCnt);
+            // --- move regCnt, ESP
+            inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL);
+            goto DONE;
+        }
+        else
+        {
+            if (!compiler->info.compInitMem)
+            {
+                // Re-bias amount to be number of bytes to adjust the SP
+                amount <<= STACK_ALIGN_SHIFT;
+                size->gtIntCon.gtIconVal = amount;      // update the GT_CNS value in the node
+                if (amount < compiler->eeGetPageSize()) // must be < not <=
+                {
+                    // Since the size is a page or less, simply adjust ESP
+
+                    // ESP might already be in the guard page, must touch it BEFORE
+                    // the alloc, not after.
+                    regCnt = regSet.rsGrabReg(RBM_ALLINT);
+                    inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL);
+#if CPU_LOAD_STORE_ARCH
+                    regNumber regTmp = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
+                    getEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, regTmp, REG_SPBASE, 0);
+                    regTracker.rsTrackRegTrash(regTmp);
+#else
+                    getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
+#endif
+                    inst_RV_IV(INS_sub, regCnt, amount, EA_PTRSIZE);
+                    inst_RV_RV(INS_mov, REG_SPBASE, regCnt, TYP_I_IMPL);
+                    regTracker.rsTrackRegTrash(regCnt);
+                    goto DONE;
+                }
+            }
+        }
+    }
+
+    // Compute the size of the block to allocate
+    genCompIntoFreeReg(size, 0, RegSet::KEEP_REG);
+    noway_assert(size->gtFlags & GTF_REG_VAL);
+    regCnt = size->gtRegNum;
+
+#if FEATURE_FIXED_OUT_ARGS
+    // If we have an outgoing arg area then we must adjust the SP
+    // essentially popping off the outgoing arg area,
+    // We will restore it right before we return from this method
+    //
+    if ((compiler->lvaOutgoingArgSpaceSize > 0) && !stackAdjusted)
+    {
+        assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) ==
+               0); // This must be true for the stack to remain aligned
+        inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
+        stackAdjusted = true;
+    }
+#endif
+
+    //  Perform alignment if we don't have a GT_CNS size
+    //
+    if (!size->IsCnsIntOrI())
+    {
+        endLabel = genCreateTempLabel();
+
+        // If 0 we bail out
+        instGen_Compare_Reg_To_Zero(easz, regCnt); // set flags
+        emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+        inst_JMP(jmpEqual, endLabel);
+
+        // Align to STACK_ALIGN
+        inst_RV_IV(INS_add, regCnt, (STACK_ALIGN - 1), emitActualTypeSize(type));
+
+        if (compiler->info.compInitMem)
+        {
+#if ((STACK_ALIGN >> STACK_ALIGN_SHIFT) > 1)
+            // regCnt will be the number of pointer-sized words to locAlloc
+            // If the shift right won't do the 'and' do it here
+            inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
+#endif
+            // --- shr regCnt, 2 ---
+            inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_PTRSIZE, regCnt, STACK_ALIGN_SHIFT);
+        }
+        else
+        {
+            // regCnt will be the total number of bytes to locAlloc
+
+            inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
+        }
+    }
+
+    BasicBlock* loop;
+    loop = genCreateTempLabel();
+
+    if (compiler->info.compInitMem)
+    {
+        // At this point 'regCnt' is set to the number of pointer-sized words to locAlloc
+
+        /* Since we have to zero out the allocated memory AND ensure that
+           ESP is always valid by tickling the pages, we will just push 0's
+           on the stack */
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_ARM_)
+        regNumber regZero1 = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt));
+        regNumber regZero2 = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regCnt) & ~genRegMask(regZero1));
+        // Set 'regZero1' and 'regZero2' to zero
+        instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero1);
+        instGen_Set_Reg_To_Zero(EA_PTRSIZE, regZero2);
+#endif
+
+        // Loop:
+        genDefineTempLabel(loop);
+
+#if defined(_TARGET_X86_)
+
+        inst_IV(INS_push_hide, 0); // --- push 0
+        // Are we done?
+        inst_RV(INS_dec, regCnt, type);
+
+#elif defined(_TARGET_ARM_)
+
+        inst_IV(INS_push, (unsigned)(genRegMask(regZero1) | genRegMask(regZero2)));
+        // Are we done?
+        inst_RV_IV(INS_sub, regCnt, 2, emitActualTypeSize(type), INS_FLAGS_SET);
+
+#else
+        assert(!"Codegen missing");
+#endif // TARGETS
+
+        emitJumpKind jmpNotEqual = genJumpKindForOper(GT_NE, CK_SIGNED);
+        inst_JMP(jmpNotEqual, loop);
+
+        // Move the final value of ESP into regCnt
+        inst_RV_RV(INS_mov, regCnt, REG_SPBASE);
+        regTracker.rsTrackRegTrash(regCnt);
+    }
+    else
+    {
+        // At this point 'regCnt' is set to the total number of bytes to locAlloc
+
+        /* We don't need to zero out the allocated memory. However, we do have
+           to tickle the pages to ensure that ESP is always valid and is
+           in sync with the "stack guard page".  Note that in the worst
+           case ESP is on the last byte of the guard page.  Thus you must
+           touch ESP+0 first not ESP+x01000.
+
+           Another subtlety is that you don't want ESP to be exactly on the
+           boundary of the guard page because PUSH is predecrement, thus
+           call setup would not touch the guard page but just beyond it */
+
+        /* Note that we go through a few hoops so that ESP never points to
+           illegal pages at any time during the ticking process
+
+                  neg   REG
+                  add   REG, ESP         // reg now holds ultimate ESP
+                  jb    loop             // result is smaller than orignial ESP (no wrap around)
+                  xor   REG, REG,        // Overflow, pick lowest possible number
+             loop:
+                  test  ESP, [ESP+0]     // X86 - tickle the page
+                  ldr   REGH,[ESP+0]     // ARM - tickle the page
+                  mov   REGH, ESP
+                  sub   REGH, PAGE_SIZE
+                  mov   ESP, REGH
+                  cmp   ESP, REG
+                  jae   loop
+
+                  mov   ESP, REG
+             end:
+          */
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_ARM_
+
+        inst_RV_RV_RV(INS_sub, regCnt, REG_SPBASE, regCnt, EA_4BYTE, INS_FLAGS_SET);
+        inst_JMP(EJ_hs, loop);
+#else
+        inst_RV(INS_NEG, regCnt, TYP_I_IMPL);
+        inst_RV_RV(INS_add, regCnt, REG_SPBASE, TYP_I_IMPL);
+        inst_JMP(EJ_jb, loop);
+#endif
+        regTracker.rsTrackRegTrash(regCnt);
+
+        instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
+
+        genDefineTempLabel(loop);
+
+        // This is a workaround to avoid the emitter trying to track the
+        // decrement of the ESP - we do the subtraction in another reg
+        // instead of adjusting ESP directly.
+
+        regNumber regTemp = regSet.rsPickReg();
+
+        // Tickle the decremented value, and move back to ESP,
+        // note that it has to be done BEFORE the update of ESP since
+        // ESP might already be on the guard page.  It is OK to leave
+        // the final value of ESP on the guard page
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if CPU_LOAD_STORE_ARCH
+        getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, regTemp, REG_SPBASE, 0);
+#else
+        getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
+#endif
+
+        inst_RV_RV(INS_mov, regTemp, REG_SPBASE, TYP_I_IMPL);
+        regTracker.rsTrackRegTrash(regTemp);
+
+        inst_RV_IV(INS_sub, regTemp, compiler->eeGetPageSize(), EA_PTRSIZE);
+        inst_RV_RV(INS_mov, REG_SPBASE, regTemp, TYP_I_IMPL);
+
+        genRecoverReg(size, RBM_ALLINT,
+                      RegSet::KEEP_REG); // not purely the 'size' tree anymore; though it is derived from 'size'
+        noway_assert(size->gtFlags & GTF_REG_VAL);
+        regCnt = size->gtRegNum;
+        inst_RV_RV(INS_cmp, REG_SPBASE, regCnt, TYP_I_IMPL);
+        emitJumpKind jmpGEU = genJumpKindForOper(GT_GE, CK_UNSIGNED);
+        inst_JMP(jmpGEU, loop);
+
+        // Move the final value to ESP
+        inst_RV_RV(INS_mov, REG_SPBASE, regCnt);
+    }
+    regSet.rsMarkRegFree(genRegMask(regCnt));
+
+DONE:
+
+    noway_assert(regCnt != DUMMY_INIT(REG_CORRUPT));
+
+    if (endLabel != NULL)
+        genDefineTempLabel(endLabel);
+
+#if FEATURE_FIXED_OUT_ARGS
+    // If we have an outgoing arg area then we must readjust the SP
+    //
+    if (stackAdjusted)
+    {
+        assert(compiler->lvaOutgoingArgSpaceSize > 0);
+        assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) ==
+               0); // This must be true for the stack to remain aligned
+        inst_RV_IV(INS_sub, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
+    }
+#endif
+
+    /* Write the lvaShadowSPfirst stack frame slot */
+    noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM);
+    getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
+
+#if STACK_PROBES
+    // Don't think it is worth it the codegen complexity to embed this
+    // when it's possible in each of the customized allocas.
+    if (compiler->opts.compNeedStackProbes)
+    {
+        genGenerateStackProbe();
+    }
+#endif
+
+#ifdef DEBUG
+    // Update new ESP
+    if (compiler->opts.compStackCheckOnRet)
+    {
+        noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
+                     compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
+                     compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
+        getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
+    }
+#endif
+
+    return regCnt;
+}
+
+/*****************************************************************************/
+#ifdef DEBUGGING_SUPPORT
+/*****************************************************************************
+ *                          genSetScopeInfo
+ *
+ * Called for every scope info piece to record by the main genSetScopeInfo()
+ */
+
+void CodeGen::genSetScopeInfo(unsigned            which,
+                              UNATIVE_OFFSET      startOffs,
+                              UNATIVE_OFFSET      length,
+                              unsigned            varNum,
+                              unsigned            LVnum,
+                              bool                avail,
+                              Compiler::siVarLoc& varLoc)
+{
+    /* We need to do some mapping while reporting back these variables */
+
+    unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
+    noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
+
+#ifdef _TARGET_X86_
+    // Non-x86 platforms are allowed to access all arguments directly
+    // so we don't need this code.
+
+    // Is this a varargs function?
+
+    if (compiler->info.compIsVarArgs && varNum != compiler->lvaVarargsHandleArg &&
+        varNum < compiler->info.compArgsCount && !compiler->lvaTable[varNum].lvIsRegArg)
+    {
+        noway_assert(varLoc.vlType == Compiler::VLT_STK || varLoc.vlType == Compiler::VLT_STK2);
+
+        // All stack arguments (except the varargs handle) have to be
+        // accessed via the varargs cookie. Discard generated info,
+        // and just find its position relative to the varargs handle
+
+        PREFIX_ASSUME(compiler->lvaVarargsHandleArg < compiler->info.compArgsCount);
+        if (!compiler->lvaTable[compiler->lvaVarargsHandleArg].lvOnFrame)
+        {
+            noway_assert(!compiler->opts.compDbgCode);
+            return;
+        }
+
+        // Can't check compiler->lvaTable[varNum].lvOnFrame as we don't set it for
+        // arguments of vararg functions to avoid reporting them to GC.
+        noway_assert(!compiler->lvaTable[varNum].lvRegister);
+        unsigned cookieOffset = compiler->lvaTable[compiler->lvaVarargsHandleArg].lvStkOffs;
+        unsigned varOffset    = compiler->lvaTable[varNum].lvStkOffs;
+
+        noway_assert(cookieOffset < varOffset);
+        unsigned offset     = varOffset - cookieOffset;
+        unsigned stkArgSize = compiler->compArgSize - intRegState.rsCalleeRegArgCount * sizeof(void*);
+        noway_assert(offset < stkArgSize);
+        offset = stkArgSize - offset;
+
+        varLoc.vlType                   = Compiler::VLT_FIXED_VA;
+        varLoc.vlFixedVarArg.vlfvOffset = offset;
+    }
+
+#endif // _TARGET_X86_
+
+    VarName name = NULL;
+
+#ifdef DEBUG
+
+    for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
+    {
+        if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
+        {
+            name = compiler->info.compVarScopes[scopeNum].vsdName;
+        }
+    }
+
+    // Hang on to this compiler->info.
+
+    TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which];
+
+    tlvi.tlviVarNum    = ilVarNum;
+    tlvi.tlviLVnum     = LVnum;
+    tlvi.tlviName      = name;
+    tlvi.tlviStartPC   = startOffs;
+    tlvi.tlviLength    = length;
+    tlvi.tlviAvailable = avail;
+    tlvi.tlviVarLoc    = varLoc;
+
+#endif // DEBUG
+
+    compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc);
+}
+
+#endif // DEBUGGING_SUPPORT
+
+/*****************************************************************************
+ *
+ *  Return non-zero if the given register is free after the given tree is
+ *  evaluated (i.e. the register is either not used at all, or it holds a
+ *  register variable which is not live after the given node).
+ *  This is only called by genCreateAddrMode, when tree is a GT_ADD, with one
+ *  constant operand, and one that's in a register.  Thus, the only thing we
+ *  need to determine is whether the register holding op1 is dead.
+ */
+bool CodeGen::genRegTrashable(regNumber reg, GenTreePtr tree)
+{
+    regMaskTP vars;
+    regMaskTP mask = genRegMask(reg);
+
+    if (regSet.rsMaskUsed & mask)
+        return false;
+
+    assert(tree->gtOper == GT_ADD);
+    GenTreePtr regValTree = tree->gtOp.gtOp1;
+    if (!tree->gtOp.gtOp2->IsCnsIntOrI())
+    {
+        regValTree = tree->gtOp.gtOp2;
+        assert(tree->gtOp.gtOp1->IsCnsIntOrI());
+    }
+    assert(regValTree->gtFlags & GTF_REG_VAL);
+
+    /* At this point, the only way that the register will remain live
+     * is if it is itself a register variable that isn't dying.
+     */
+    assert(regValTree->gtRegNum == reg);
+    if (regValTree->IsRegVar() && !regValTree->IsRegVarDeath())
+        return false;
+    else
+        return true;
+}
+
+/*****************************************************************************/
+//
+// This method calculates the USE and DEF values for a statement.
+// It also calls fgSetRngChkTarget for the statement.
+//
+// We refactor out this code from fgPerBlockLocalVarLiveness
+// and add QMARK logics to it.
+//
+// NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE
+//
+// The usage of this method is very limited.
+// We should only call it for the first node in the statement or
+// for the node after the GTF_RELOP_QMARK node.
+//
+// NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE NOTE
+
+/*
+       Since a GT_QMARK tree can take two paths (i.e. the thenTree Path or the elseTree path),
+       when we calculate its fgCurDefSet and fgCurUseSet, we need to combine the results
+       from both trees.
+
+       Note that the GT_QMARK trees are threaded as shown below with nodes 1 to 11
+       linked by gtNext.
+
+       The algorithm we use is:
+       (1) We walk these nodes according the the evaluation order (i.e. from node 1 to node 11).
+       (2) When we see the GTF_RELOP_QMARK node, we know we are about to split the path.
+           We cache copies of current fgCurDefSet and fgCurUseSet.
+           (The fact that it is recursively calling itself is for nested QMARK case,
+            where we need to remember multiple copies of fgCurDefSet and fgCurUseSet.)
+       (3) We walk the thenTree.
+       (4) When we see GT_COLON node, we know that we just finished the thenTree.
+           We then make a copy of the current fgCurDefSet and fgCurUseSet,
+           restore them to the ones before the thenTree, and then continue walking
+           the elseTree.
+       (5) When we see the GT_QMARK node, we know we just finished the elseTree.
+           So we combine the results from the thenTree and elseTree and then return.
+
+
+                                 +--------------------+
+                                 |      GT_QMARK    11|
+                                 +----------+---------+
+                                            |
+                                            *
+                                           / \
+                                         /     \
+                                       /         \
+                  +---------------------+       +--------------------+
+                  |      GT_<cond>    3 |       |     GT_COLON     7 |
+                  |  w/ GTF_RELOP_QMARK |       |  w/ GTF_COLON_COND |
+                  +----------+----------+       +---------+----------+
+                             |                            |
+                             *                            *
+                            / \                          / \
+                          /     \                      /     \
+                        /         \                  /         \
+                       2           1          thenTree 6       elseTree 10
+                                  x               |                |
+                                 /                *                *
+     +----------------+        /                 / \              / \
+     |prevExpr->gtNext+------/                 /     \          /     \
+     +----------------+                      /         \      /         \
+                                            5           4    9           8
+
+
+*/
+
+GenTreePtr Compiler::fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode, // The node to start walking with.
+                                                          GenTreePtr relopNode, // The node before the startNode.
+                                                                                // (It should either be NULL or
+                                                                                // a GTF_RELOP_QMARK node.)
+                                                          GenTreePtr asgdLclVar)
+{
+    GenTreePtr tree;
+
+    VARSET_TP VARSET_INIT(this, defSet_BeforeSplit, fgCurDefSet); // Store the current fgCurDefSet and fgCurUseSet so
+    VARSET_TP VARSET_INIT(this, useSet_BeforeSplit, fgCurUseSet); // we can restore then before entering the elseTree.
+
+    bool heapUse_BeforeSplit   = fgCurHeapUse;
+    bool heapDef_BeforeSplit   = fgCurHeapDef;
+    bool heapHavoc_BeforeSplit = fgCurHeapHavoc;
+
+    VARSET_TP VARSET_INIT_NOCOPY(defSet_AfterThenTree, VarSetOps::MakeEmpty(this)); // These two variables will store
+                                                                                    // the USE and DEF sets after
+    VARSET_TP VARSET_INIT_NOCOPY(useSet_AfterThenTree, VarSetOps::MakeEmpty(this)); // evaluating the thenTree.
+
+    bool heapUse_AfterThenTree   = fgCurHeapUse;
+    bool heapDef_AfterThenTree   = fgCurHeapDef;
+    bool heapHavoc_AfterThenTree = fgCurHeapHavoc;
+
+    // relopNode is either NULL or a GTF_RELOP_QMARK node.
+    assert(!relopNode || (relopNode->OperKind() & GTK_RELOP) && (relopNode->gtFlags & GTF_RELOP_QMARK));
+
+    // If relopNode is NULL, then the startNode must be the 1st node of the statement.
+    // If relopNode is non-NULL, then the startNode must be the node right after the GTF_RELOP_QMARK node.
+    assert((!relopNode && startNode == compCurStmt->gtStmt.gtStmtList) ||
+           (relopNode && startNode == relopNode->gtNext));
+
+    for (tree = startNode; tree; tree = tree->gtNext)
+    {
+        switch (tree->gtOper)
+        {
+
+            case GT_QMARK:
+
+                // This must be a GT_QMARK node whose GTF_RELOP_QMARK node is recursively calling us.
+                noway_assert(relopNode && tree->gtOp.gtOp1 == relopNode);
+
+                // By the time we see a GT_QMARK, we must have finished processing the elseTree.
+                // So it's the time to combine the results
+                // from the the thenTree and the elseTree, and then return.
+
+                VarSetOps::IntersectionD(this, fgCurDefSet, defSet_AfterThenTree);
+                VarSetOps::UnionD(this, fgCurUseSet, useSet_AfterThenTree);
+
+                fgCurHeapDef   = fgCurHeapDef && heapDef_AfterThenTree;
+                fgCurHeapHavoc = fgCurHeapHavoc && heapHavoc_AfterThenTree;
+                fgCurHeapUse   = fgCurHeapUse || heapUse_AfterThenTree;
+
+                // Return the GT_QMARK node itself so the caller can continue from there.
+                // NOTE: the caller will get to the next node by doing the "tree = tree->gtNext"
+                // in the "for" statement.
+                goto _return;
+
+            case GT_COLON:
+                // By the time we see GT_COLON, we must have just walked the thenTree.
+                // So we need to do two things here.
+                // (1) Save the current fgCurDefSet and fgCurUseSet so that later we can combine them
+                //     with the result from the elseTree.
+                // (2) Restore fgCurDefSet and fgCurUseSet to the points before the thenTree is walked.
+                //     and then continue walking the elseTree.
+                VarSetOps::Assign(this, defSet_AfterThenTree, fgCurDefSet);
+                VarSetOps::Assign(this, useSet_AfterThenTree, fgCurUseSet);
+
+                heapDef_AfterThenTree   = fgCurHeapDef;
+                heapHavoc_AfterThenTree = fgCurHeapHavoc;
+                heapUse_AfterThenTree   = fgCurHeapUse;
+
+                VarSetOps::Assign(this, fgCurDefSet, defSet_BeforeSplit);
+                VarSetOps::Assign(this, fgCurUseSet, useSet_BeforeSplit);
+
+                fgCurHeapDef   = heapDef_BeforeSplit;
+                fgCurHeapHavoc = heapHavoc_BeforeSplit;
+                fgCurHeapUse   = heapUse_BeforeSplit;
+
+                break;
+
+            case GT_LCL_VAR:
+            case GT_LCL_FLD:
+            case GT_LCL_VAR_ADDR:
+            case GT_LCL_FLD_ADDR:
+            case GT_STORE_LCL_VAR:
+            case GT_STORE_LCL_FLD:
+                fgMarkUseDef(tree->AsLclVarCommon(), asgdLclVar);
+                break;
+
+            case GT_CLS_VAR:
+                // For Volatile indirection, first mutate the global heap
+                // see comments in ValueNum.cpp (under case GT_CLS_VAR)
+                // This models Volatile reads as def-then-use of the heap.
+                // and allows for a CSE of a subsequent non-volatile read
+                if ((tree->gtFlags & GTF_FLD_VOLATILE) != 0)
+                {
+                    // For any Volatile indirection, we must handle it as a
+                    // definition of the global heap
+                    fgCurHeapDef = true;
+                }
+                // If the GT_CLS_VAR is the lhs of an assignment, we'll handle it as a heap def, when we get to
+                // assignment.
+                // Otherwise, we treat it as a use here.
+                if (!fgCurHeapDef && (tree->gtFlags & GTF_CLS_VAR_ASG_LHS) == 0)
+                {
+                    fgCurHeapUse = true;
+                }
+                break;
+
+            case GT_IND:
+                // For Volatile indirection, first mutate the global heap
+                // see comments in ValueNum.cpp (under case GT_CLS_VAR)
+                // This models Volatile reads as def-then-use of the heap.
+                // and allows for a CSE of a subsequent non-volatile read
+                if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
+                {
+                    // For any Volatile indirection, we must handle it as a
+                    // definition of the global heap
+                    fgCurHeapDef = true;
+                }
+
+                // If the GT_IND is the lhs of an assignment, we'll handle it
+                // as a heap def, when we get to assignment.
+                // Otherwise, we treat it as a use here.
+                if ((tree->gtFlags & GTF_IND_ASG_LHS) == 0)
+                {
+                    GenTreeLclVarCommon* dummyLclVarTree = NULL;
+                    bool                 dummyIsEntire   = false;
+                    GenTreePtr           addrArg         = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
+                    if (!addrArg->DefinesLocalAddr(this, /*width doesn't matter*/ 0, &dummyLclVarTree, &dummyIsEntire))
+                    {
+                        if (!fgCurHeapDef)
+                        {
+                            fgCurHeapUse = true;
+                        }
+                    }
+                    else
+                    {
+                        // Defines a local addr
+                        assert(dummyLclVarTree != nullptr);
+                        fgMarkUseDef(dummyLclVarTree->AsLclVarCommon(), asgdLclVar);
+                    }
+                }
+                break;
+
+            // These should have been morphed away to become GT_INDs:
+            case GT_FIELD:
+            case GT_INDEX:
+                unreached();
+                break;
+
+            // We'll assume these are use-then-defs of the heap.
+            case GT_LOCKADD:
+            case GT_XADD:
+            case GT_XCHG:
+            case GT_CMPXCHG:
+                if (!fgCurHeapDef)
+                {
+                    fgCurHeapUse = true;
+                }
+                fgCurHeapDef   = true;
+                fgCurHeapHavoc = true;
+                break;
+
+            case GT_MEMORYBARRIER:
+                // Simliar to any Volatile indirection, we must handle this as a definition of the global heap
+                fgCurHeapDef = true;
+                break;
+
+            // For now, all calls read/write the heap, the latter in its entirety.  Might tighten this case later.
+            case GT_CALL:
+            {
+                GenTreeCall* call    = tree->AsCall();
+                bool         modHeap = true;
+                if (call->gtCallType == CT_HELPER)
+                {
+                    CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
+
+                    if (!s_helperCallProperties.MutatesHeap(helpFunc) && !s_helperCallProperties.MayRunCctor(helpFunc))
+                    {
+                        modHeap = false;
+                    }
+                }
+                if (modHeap)
+                {
+                    if (!fgCurHeapDef)
+                    {
+                        fgCurHeapUse = true;
+                    }
+                    fgCurHeapDef   = true;
+                    fgCurHeapHavoc = true;
+                }
+            }
+
+                // If this is a p/invoke unmanaged call or if this is a tail-call
+                // and we have an unmanaged p/invoke call in the method,
+                // then we're going to run the p/invoke epilog.
+                // So we mark the FrameRoot as used by this instruction.
+                // This ensures that the block->bbVarUse will contain
+                // the FrameRoot local var if is it a tracked variable.
+
+                if (tree->gtCall.IsUnmanaged() || (tree->gtCall.IsTailCall() && info.compCallUnmanaged))
+                {
+                    /* Get the TCB local and mark it as used */
+
+                    noway_assert(info.compLvFrameListRoot < lvaCount);
+
+                    LclVarDsc* varDsc = &lvaTable[info.compLvFrameListRoot];
+
+                    if (varDsc->lvTracked)
+                    {
+                        if (!VarSetOps::IsMember(this, fgCurDefSet, varDsc->lvVarIndex))
+                        {
+                            VarSetOps::AddElemD(this, fgCurUseSet, varDsc->lvVarIndex);
+                        }
+                    }
+                }
+
+                break;
+
+            default:
+
+                // Determine whether it defines a heap location.
+                if (tree->OperIsAssignment() || tree->OperIsBlkOp())
+                {
+                    GenTreeLclVarCommon* dummyLclVarTree = NULL;
+                    if (!tree->DefinesLocal(this, &dummyLclVarTree))
+                    {
+                        // If it doesn't define a local, then it might update the heap.
+                        fgCurHeapDef = true;
+                    }
+                }
+
+                // Are we seeing a GT_<cond> for a GT_QMARK node?
+                if ((tree->OperKind() & GTK_RELOP) && (tree->gtFlags & GTF_RELOP_QMARK))
+                {
+                    // We are about to enter the parallel paths (i.e. the thenTree and the elseTree).
+                    // Recursively call fgLegacyPerStatementLocalVarLiveness.
+                    // At the very beginning of fgLegacyPerStatementLocalVarLiveness, we will cache the values of the
+                    // current
+                    // fgCurDefSet and fgCurUseSet into local variables defSet_BeforeSplit and useSet_BeforeSplit.
+                    // The cached values will be used to restore fgCurDefSet and fgCurUseSet once we see the GT_COLON
+                    // node.
+                    tree = fgLegacyPerStatementLocalVarLiveness(tree->gtNext, tree, asgdLclVar);
+
+                    // We must have been returned here after seeing a GT_QMARK node.
+                    noway_assert(tree->gtOper == GT_QMARK);
+                }
+
+                break;
+        }
+    }
+
+_return:
+    return tree;
+}
+
+/*****************************************************************************/
+
+/*****************************************************************************
+ * Initialize the TCB local and the NDirect stub, afterwards "push"
+ * the hoisted NDirect stub.
+ *
+ * 'initRegs' is the set of registers which will be zeroed out by the prolog
+ *             typically initRegs is zero
+ *
+ * The layout of the NDirect Inlined Call Frame is as follows:
+ * (see VM/frames.h and VM/JITInterface.cpp for more information)
+ *
+ *   offset     field name                        when set
+ *  --------------------------------------------------------------
+ *    +00h      vptr for class InlinedCallFrame   method prolog
+ *    +04h      m_Next                            method prolog
+ *    +08h      m_Datum                           call site
+ *    +0ch      m_pCallSiteTracker (callsite ESP) call site and zeroed in method prolog
+ *    +10h      m_pCallerReturnAddress            call site
+ *    +14h      m_pCalleeSavedRegisters           not set by JIT
+ *    +18h      JIT retval spill area (int)       before call_gc
+ *    +1ch      JIT retval spill area (long)      before call_gc
+ *    +20h      Saved value of EBP                method prolog
+ */
+
+regMaskTP CodeGen::genPInvokeMethodProlog(regMaskTP initRegs)
+{
+    assert(compiler->compGeneratingProlog);
+    noway_assert(!compiler->opts.ShouldUsePInvokeHelpers());
+    noway_assert(compiler->info.compCallUnmanaged);
+
+    CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
+    noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
+
+    /* let's find out if compLvFrameListRoot is enregistered */
+
+    LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
+
+    noway_assert(!varDsc->lvIsParam);
+    noway_assert(varDsc->lvType == TYP_I_IMPL);
+
+    DWORD threadTlsIndex, *pThreadTlsIndex;
+
+    threadTlsIndex = compiler->info.compCompHnd->getThreadTLSIndex((void**)&pThreadTlsIndex);
+#if defined(_TARGET_X86_)
+    if (threadTlsIndex == (DWORD)-1 || pInfo->osType != CORINFO_WINNT)
+#else
+    if (true)
+#endif
+    {
+        // Instead of calling GetThread(), and getting GS cookie and
+        // InlinedCallFrame vptr through indirections, we'll call only one helper.
+        // The helper takes frame address in REG_PINVOKE_FRAME, returns TCB in REG_PINVOKE_TCB
+        // and uses REG_PINVOKE_SCRATCH as scratch register.
+        getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_PINVOKE_FRAME, compiler->lvaInlinedPInvokeFrameVar,
+                                  pInfo->inlinedCallFrameInfo.offsetOfFrameVptr);
+        regTracker.rsTrackRegTrash(REG_PINVOKE_FRAME);
+
+        // We're about to trask REG_PINVOKE_TCB, it better not be in use!
+        assert((regSet.rsMaskUsed & RBM_PINVOKE_TCB) == 0);
+
+        // Don't use the argument registers (including the special argument in
+        // REG_PINVOKE_FRAME) for computing the target address.
+        regSet.rsLockReg(RBM_ARG_REGS | RBM_PINVOKE_FRAME);
+
+        genEmitHelperCall(CORINFO_HELP_INIT_PINVOKE_FRAME, 0, EA_UNKNOWN);
+
+        regSet.rsUnlockReg(RBM_ARG_REGS | RBM_PINVOKE_FRAME);
+
+        if (varDsc->lvRegister)
+        {
+            regNumber regTgt = varDsc->lvRegNum;
+
+            // we are about to initialize it. So turn the bit off in initRegs to prevent
+            // the prolog reinitializing it.
+            initRegs &= ~genRegMask(regTgt);
+
+            if (regTgt != REG_PINVOKE_TCB)
+            {
+                // move TCB to the its register if necessary
+                getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, regTgt, REG_PINVOKE_TCB);
+                regTracker.rsTrackRegTrash(regTgt);
+            }
+        }
+        else
+        {
+            // move TCB to its stack location
+            getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB,
+                                      compiler->info.compLvFrameListRoot, 0);
+        }
+
+        // We are done, the rest of this function deals with the inlined case.
+        return initRegs;
+    }
+
+    regNumber regTCB;
+
+    if (varDsc->lvRegister)
+    {
+        regTCB = varDsc->lvRegNum;
+
+        // we are about to initialize it. So turn the bit off in initRegs to prevent
+        // the prolog reinitializing it.
+        initRegs &= ~genRegMask(regTCB);
+    }
+    else // varDsc is allocated on the Stack
+    {
+        regTCB = REG_PINVOKE_TCB;
+    }
+
+#if !defined(_TARGET_ARM_)
+#define WIN_NT_TLS_OFFSET (0xE10)
+#define WIN_NT5_TLS_HIGHOFFSET (0xf94)
+
+    /* get TCB,  mov reg, FS:[compiler->info.compEEInfo.threadTlsIndex] */
+
+    // TODO-ARM-CQ: should we inline TlsGetValue here?
+
+    if (threadTlsIndex < 64)
+    {
+        //  mov  reg, FS:[0xE10+threadTlsIndex*4]
+        getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, FLD_GLOBAL_FS,
+                                  WIN_NT_TLS_OFFSET + threadTlsIndex * sizeof(int));
+        regTracker.rsTrackRegTrash(regTCB);
+    }
+    else
+    {
+        noway_assert(pInfo->osMajor >= 5);
+
+        DWORD basePtr = WIN_NT5_TLS_HIGHOFFSET;
+        threadTlsIndex -= 64;
+
+        // mov reg, FS:[0x2c] or mov reg, fs:[0xf94]
+        // mov reg, [reg+threadTlsIndex*4]
+
+        getEmitter()->emitIns_R_C(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, FLD_GLOBAL_FS, basePtr);
+        getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regTCB, regTCB, threadTlsIndex * sizeof(int));
+        regTracker.rsTrackRegTrash(regTCB);
+    }
+#endif
+
+    /* save TCB in local var if not enregistered */
+
+    if (!varDsc->lvRegister)
+    {
+        getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, regTCB, compiler->info.compLvFrameListRoot, 0);
+    }
+
+    /* set frame's vptr */
+
+    const void *inlinedCallFrameVptr, **pInlinedCallFrameVptr;
+    inlinedCallFrameVptr = compiler->info.compCompHnd->getInlinedCallFrameVptr((void**)&pInlinedCallFrameVptr);
+    noway_assert(inlinedCallFrameVptr != NULL); // if we have the TLS index, vptr must also be known
+
+    instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_HANDLE_CNS_RELOC, (ssize_t)inlinedCallFrameVptr,
+                               compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfFrameVptr,
+                               REG_PINVOKE_SCRATCH);
+
+    // Set the GSCookie
+    GSCookie gsCookie, *pGSCookie;
+    compiler->info.compCompHnd->getGSCookie(&gsCookie, &pGSCookie);
+    noway_assert(gsCookie != 0); // if we have the TLS index, GS cookie must also be known
+
+    instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, (ssize_t)gsCookie, compiler->lvaInlinedPInvokeFrameVar,
+                               pInfo->inlinedCallFrameInfo.offsetOfGSCookie, REG_PINVOKE_SCRATCH);
+
+    /* Get current frame root (mov reg2, [reg+offsetOfThreadFrame]) and
+       set next field in frame */
+
+    getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH, regTCB,
+                               pInfo->offsetOfThreadFrame);
+    regTracker.rsTrackRegTrash(REG_PINVOKE_SCRATCH);
+
+    getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH,
+                              compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfFrameLink);
+
+    noway_assert(isFramePointerUsed()); // Setup of Pinvoke frame currently requires an EBP style frame
+
+    /* set EBP value in frame */
+    getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, genFramePointerReg(),
+                              compiler->lvaInlinedPInvokeFrameVar, pInfo->inlinedCallFrameInfo.offsetOfCalleeSavedFP);
+
+    /* reset track field in frame */
+    instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaInlinedPInvokeFrameVar,
+                               pInfo->inlinedCallFrameInfo.offsetOfReturnAddress, REG_PINVOKE_SCRATCH);
+
+    /* get address of our frame */
+
+    getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, REG_PINVOKE_SCRATCH, compiler->lvaInlinedPInvokeFrameVar,
+                              pInfo->inlinedCallFrameInfo.offsetOfFrameVptr);
+    regTracker.rsTrackRegTrash(REG_PINVOKE_SCRATCH);
+
+    /* now "push" our N/direct frame */
+
+    getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_SCRATCH, regTCB,
+                               pInfo->offsetOfThreadFrame);
+
+    return initRegs;
+}
+
+/*****************************************************************************
+ *  Unchain the InlinedCallFrame.
+ *  Technically, this is not part of the epilog; it is called when we are generating code for a GT_RETURN node
+ *  or tail call.
+ */
+void CodeGen::genPInvokeMethodEpilog()
+{
+    noway_assert(compiler->info.compCallUnmanaged);
+    noway_assert(!compiler->opts.ShouldUsePInvokeHelpers());
+    noway_assert(compiler->compCurBB == compiler->genReturnBB ||
+                 (compiler->compTailCallUsed && (compiler->compCurBB->bbJumpKind == BBJ_THROW)) ||
+                 (compiler->compJmpOpUsed && (compiler->compCurBB->bbFlags & BBF_HAS_JMP)));
+
+    CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
+    noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
+
+    getEmitter()->emitDisableRandomNops();
+    // debug check to make sure that we're not using ESI and/or EDI across this call, except for
+    // compLvFrameListRoot.
+    unsigned regTrashCheck = 0;
+
+    /* XXX Tue 5/29/2007
+     * We explicitly add interference for these in CodeGen::rgPredictRegUse.  If you change the code
+     * sequence or registers used, make sure to update the interference for compiler->genReturnLocal.
+     */
+    LclVarDsc* varDsc = &compiler->lvaTable[compiler->info.compLvFrameListRoot];
+    regNumber  reg;
+    regNumber  reg2 = REG_PINVOKE_FRAME;
+
+    //
+    // Two cases for epilog invocation:
+    //
+    // 1. Return
+    //    We can trash the ESI/EDI registers.
+    //
+    // 2. Tail call
+    //    When tail called, we'd like to preserve enregistered args,
+    //    in ESI/EDI so we can pass it to the callee.
+    //
+    // For ARM, don't modify SP for storing and restoring the TCB/frame registers.
+    // Instead use the reserved local variable slot.
+    //
+    if (compiler->compCurBB->bbFlags & BBF_HAS_JMP)
+    {
+        if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_TCB)
+        {
+#if FEATURE_FIXED_OUT_ARGS
+            // Save the register in the reserved local var slot.
+            getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB,
+                                      compiler->lvaPInvokeFrameRegSaveVar, 0);
+#else
+            inst_RV(INS_push, REG_PINVOKE_TCB, TYP_I_IMPL);
+#endif
+        }
+        if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_FRAME)
+        {
+#if FEATURE_FIXED_OUT_ARGS
+            // Save the register in the reserved local var slot.
+            getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_FRAME,
+                                      compiler->lvaPInvokeFrameRegSaveVar, REGSIZE_BYTES);
+#else
+            inst_RV(INS_push, REG_PINVOKE_FRAME, TYP_I_IMPL);
+#endif
+        }
+    }
+
+    if (varDsc->lvRegister)
+    {
+        reg = varDsc->lvRegNum;
+        if (reg == reg2)
+            reg2 = REG_PINVOKE_TCB;
+
+        regTrashCheck |= genRegMask(reg2);
+    }
+    else
+    {
+        /* mov esi, [tcb address]    */
+
+        getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB, compiler->info.compLvFrameListRoot,
+                                  0);
+        regTracker.rsTrackRegTrash(REG_PINVOKE_TCB);
+        reg = REG_PINVOKE_TCB;
+
+        regTrashCheck = RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME;
+    }
+
+    /* mov edi, [ebp-frame.next] */
+
+    getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg2, compiler->lvaInlinedPInvokeFrameVar,
+                              pInfo->inlinedCallFrameInfo.offsetOfFrameLink);
+    regTracker.rsTrackRegTrash(reg2);
+
+    /* mov [esi+offsetOfThreadFrame], edi */
+
+    getEmitter()->emitIns_AR_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg2, reg, pInfo->offsetOfThreadFrame);
+
+    noway_assert(!(regSet.rsMaskUsed & regTrashCheck));
+
+    if (compiler->genReturnLocal != BAD_VAR_NUM && compiler->lvaTable[compiler->genReturnLocal].lvTracked &&
+        compiler->lvaTable[compiler->genReturnLocal].lvRegister)
+    {
+        // really make sure we're not clobbering compiler->genReturnLocal.
+        noway_assert(
+            !(genRegMask(compiler->lvaTable[compiler->genReturnLocal].lvRegNum) &
+              ((varDsc->lvRegister ? genRegMask(varDsc->lvRegNum) : 0) | RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME)));
+    }
+
+    (void)regTrashCheck;
+
+    // Restore the registers ESI and EDI.
+    if (compiler->compCurBB->bbFlags & BBF_HAS_JMP)
+    {
+        if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_FRAME)
+        {
+#if FEATURE_FIXED_OUT_ARGS
+            // Restore the register from the reserved local var slot.
+            getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_FRAME,
+                                      compiler->lvaPInvokeFrameRegSaveVar, REGSIZE_BYTES);
+#else
+            inst_RV(INS_pop, REG_PINVOKE_FRAME, TYP_I_IMPL);
+#endif
+            regTracker.rsTrackRegTrash(REG_PINVOKE_FRAME);
+        }
+        if (compiler->rpMaskPInvokeEpilogIntf & RBM_PINVOKE_TCB)
+        {
+#if FEATURE_FIXED_OUT_ARGS
+            // Restore the register from the reserved local var slot.
+            getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_PINVOKE_TCB,
+                                      compiler->lvaPInvokeFrameRegSaveVar, 0);
+#else
+            inst_RV(INS_pop, REG_PINVOKE_TCB, TYP_I_IMPL);
+#endif
+            regTracker.rsTrackRegTrash(REG_PINVOKE_TCB);
+        }
+    }
+    getEmitter()->emitEnableRandomNops();
+}
+
+/*****************************************************************************
+    This function emits the call-site prolog for direct calls to unmanaged code.
+    It does all the necessary setup of the InlinedCallFrame.
+    frameListRoot specifies the local containing the thread control block.
+    argSize or methodToken is the value to be copied into the m_datum
+            field of the frame (methodToken may be indirected & have a reloc)
+    The function returns  the register now containing the thread control block,
+    (it could be either enregistered or loaded into one of the scratch registers)
+*/
+
+regNumber CodeGen::genPInvokeCallProlog(LclVarDsc*            frameListRoot,
+                                        int                   argSize,
+                                        CORINFO_METHOD_HANDLE methodToken,
+                                        BasicBlock*           returnLabel)
+{
+    // Some stack locals might be 'cached' in registers, we need to trash them
+    // from the regTracker *and* also ensure the gc tracker does not consider
+    // them live (see the next assert).  However, they might be live reg vars
+    // that are non-pointers CSE'd from pointers.
+    // That means the register will be live in rsMaskVars, so we can't just
+    // call gcMarkSetNpt().
+    {
+        regMaskTP deadRegs = regTracker.rsTrashRegsForGCInterruptability() & ~RBM_ARG_REGS;
+        gcInfo.gcRegGCrefSetCur &= ~deadRegs;
+        gcInfo.gcRegByrefSetCur &= ~deadRegs;
+
+#ifdef DEBUG
+        deadRegs &= regSet.rsMaskVars;
+        if (deadRegs)
+        {
+            for (LclVarDsc* varDsc = compiler->lvaTable;
+                 ((varDsc < (compiler->lvaTable + compiler->lvaCount)) && deadRegs); varDsc++)
+            {
+                if (!varDsc->lvTracked || !varDsc->lvRegister)
+                    continue;
+
+                if (!VarSetOps::IsMember(compiler, compiler->compCurLife, varDsc->lvVarIndex))
+                    continue;
+
+                regMaskTP varRegMask = genRegMask(varDsc->lvRegNum);
+                if (isRegPairType(varDsc->lvType) && varDsc->lvOtherReg != REG_STK)
+                    varRegMask |= genRegMask(varDsc->lvOtherReg);
+
+                if (varRegMask & deadRegs)
+                {
+                    // We found the enregistered var that should not be live if it
+                    // was a GC pointer.
+                    noway_assert(!varTypeIsGC(varDsc));
+                    deadRegs &= ~varRegMask;
+                }
+            }
+        }
+#endif // DEBUG
+    }
+
+    /* Since we are using the InlinedCallFrame, we should have spilled all
+       GC pointers to it - even from callee-saved registers */
+
+    noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~RBM_ARG_REGS) == 0);
+
+    /* must specify only one of these parameters */
+    noway_assert((argSize == 0) || (methodToken == NULL));
+
+    /* We are about to call unmanaged code directly.
+       Before we can do that we have to emit the following sequence:
+
+       mov  dword ptr [frame.callTarget], MethodToken
+       mov  dword ptr [frame.callSiteTracker], esp
+       mov  reg, dword ptr [tcb_address]
+       mov  byte  ptr [tcb+offsetOfGcState], 0
+
+     */
+
+    CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
+
+    noway_assert(compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
+
+    /* mov   dword ptr [frame.callSiteTarget], value */
+
+    if (methodToken == NULL)
+    {
+        /* mov   dword ptr [frame.callSiteTarget], argSize */
+        instGen_Store_Imm_Into_Lcl(TYP_INT, EA_4BYTE, argSize, compiler->lvaInlinedPInvokeFrameVar,
+                                   pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
+    }
+    else
+    {
+        void *embedMethHnd, *pEmbedMethHnd;
+
+        embedMethHnd = (void*)compiler->info.compCompHnd->embedMethodHandle(methodToken, &pEmbedMethHnd);
+
+        noway_assert((!embedMethHnd) != (!pEmbedMethHnd));
+
+        if (embedMethHnd != NULL)
+        {
+            /* mov   dword ptr [frame.callSiteTarget], "MethodDesc" */
+
+            instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_HANDLE_CNS_RELOC, (ssize_t)embedMethHnd,
+                                       compiler->lvaInlinedPInvokeFrameVar,
+                                       pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
+        }
+        else
+        {
+            /* mov   reg, dword ptr [MethodDescIndir]
+               mov   dword ptr [frame.callSiteTarget], reg */
+
+            regNumber reg = regSet.rsPickFreeReg();
+
+#if CPU_LOAD_STORE_ARCH
+            instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg, (ssize_t)pEmbedMethHnd);
+            getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg, reg, 0);
+#else  // !CPU_LOAD_STORE_ARCH
+            getEmitter()->emitIns_R_AI(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, reg, (ssize_t)pEmbedMethHnd);
+#endif // !CPU_LOAD_STORE_ARCH
+            regTracker.rsTrackRegTrash(reg);
+            getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, reg, compiler->lvaInlinedPInvokeFrameVar,
+                                      pInfo->inlinedCallFrameInfo.offsetOfCallTarget);
+        }
+    }
+
+    regNumber tcbReg = REG_NA;
+
+    if (frameListRoot->lvRegister)
+    {
+        tcbReg = frameListRoot->lvRegNum;
+    }
+    else
+    {
+        tcbReg = regSet.rsGrabReg(RBM_ALLINT);
+
+        /* mov reg, dword ptr [tcb address]    */
+
+        getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, tcbReg,
+                                  (unsigned)(frameListRoot - compiler->lvaTable), 0);
+        regTracker.rsTrackRegTrash(tcbReg);
+    }
+
+#ifdef _TARGET_X86_
+    /* mov   dword ptr [frame.callSiteTracker], esp */
+
+    getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaInlinedPInvokeFrameVar,
+                              pInfo->inlinedCallFrameInfo.offsetOfCallSiteSP);
+#endif // _TARGET_X86_
+
+#if CPU_LOAD_STORE_ARCH
+    regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(tcbReg));
+    getEmitter()->emitIns_J_R(INS_adr, EA_PTRSIZE, returnLabel, tmpReg);
+    regTracker.rsTrackRegTrash(tmpReg);
+    getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, tmpReg, compiler->lvaInlinedPInvokeFrameVar,
+                              pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
+#else  // !CPU_LOAD_STORE_ARCH
+    /* mov   dword ptr [frame.callSiteReturnAddress], label */
+
+    getEmitter()->emitIns_J_S(ins_Store(TYP_I_IMPL), EA_PTRSIZE, returnLabel, compiler->lvaInlinedPInvokeFrameVar,
+                              pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
+#endif // !CPU_LOAD_STORE_ARCH
+
+#if CPU_LOAD_STORE_ARCH
+    instGen_Set_Reg_To_Zero(EA_1BYTE, tmpReg);
+
+    noway_assert(tmpReg != tcbReg);
+
+    getEmitter()->emitIns_AR_R(ins_Store(TYP_BYTE), EA_1BYTE, tmpReg, tcbReg, pInfo->offsetOfGCState);
+#else  // !CPU_LOAD_STORE_ARCH
+    /* mov   byte  ptr [tcbReg+offsetOfGcState], 0 */
+
+    getEmitter()->emitIns_I_AR(ins_Store(TYP_BYTE), EA_1BYTE, 0, tcbReg, pInfo->offsetOfGCState);
+#endif // !CPU_LOAD_STORE_ARCH
+
+    return tcbReg;
+}
+
+/*****************************************************************************
+ *
+   First we have to mark in the hoisted NDirect stub that we are back
+   in managed code. Then we have to check (a global flag) whether GC is
+   pending or not. If so, we just call into a jit-helper.
+   Right now we have this call always inlined, i.e. we always skip around
+   the jit-helper call.
+   Note:
+   The tcb address is a regular local (initialized in the prolog), so it is either
+   enregistered or in the frame:
+
+        tcb_reg = [tcb_address is enregistered] OR [mov ecx, tcb_address]
+        mov  byte ptr[tcb_reg+offsetOfGcState], 1
+        cmp  'global GC pending flag', 0
+        je   @f
+        [mov  ECX, tcb_reg]  OR [ecx was setup above]     ; we pass the tcb value to callGC
+        [mov  [EBP+spill_area+0], eax]                    ; spill the int  return value if any
+        [mov  [EBP+spill_area+4], edx]                    ; spill the long return value if any
+        call @callGC
+        [mov  eax, [EBP+spill_area+0] ]                   ; reload the int  return value if any
+        [mov  edx, [EBP+spill_area+4] ]                   ; reload the long return value if any
+    @f:
+ */
+
+void CodeGen::genPInvokeCallEpilog(LclVarDsc* frameListRoot, regMaskTP retVal)
+{
+    BasicBlock*      clab_nostop;
+    CORINFO_EE_INFO* pInfo = compiler->eeGetEEInfo();
+    regNumber        reg2;
+    regNumber        reg3;
+
+#ifdef _TARGET_ARM_
+    reg3 = REG_R3;
+#else
+    reg3     = REG_EDX;
+#endif
+
+    getEmitter()->emitDisableRandomNops();
+
+    if (frameListRoot->lvRegister)
+    {
+        /* make sure that register is live across the call */
+
+        reg2 = frameListRoot->lvRegNum;
+        noway_assert(genRegMask(reg2) & RBM_INT_CALLEE_SAVED);
+    }
+    else
+    {
+        /* mov   reg2, dword ptr [tcb address]    */
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_ARM_
+        reg2 = REG_R2;
+#else
+        reg2 = REG_ECX;
+#endif
+
+        getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, reg2,
+                                  (unsigned)(frameListRoot - compiler->lvaTable), 0);
+        regTracker.rsTrackRegTrash(reg2);
+    }
+
+#ifdef _TARGET_ARM_
+    /* mov   r3, 1 */
+    /* strb  [r2+offsetOfGcState], r3 */
+    instGen_Set_Reg_To_Imm(EA_PTRSIZE, reg3, 1);
+    getEmitter()->emitIns_AR_R(ins_Store(TYP_BYTE), EA_1BYTE, reg3, reg2, pInfo->offsetOfGCState);
+#else
+    /* mov   byte ptr [tcb+offsetOfGcState], 1 */
+    getEmitter()->emitIns_I_AR(ins_Store(TYP_BYTE), EA_1BYTE, 1, reg2, pInfo->offsetOfGCState);
+#endif
+
+    /* test global flag (we return to managed code) */
+
+    LONG *addrOfCaptureThreadGlobal, **pAddrOfCaptureThreadGlobal;
+
+    addrOfCaptureThreadGlobal =
+        compiler->info.compCompHnd->getAddrOfCaptureThreadGlobal((void**)&pAddrOfCaptureThreadGlobal);
+    noway_assert((!addrOfCaptureThreadGlobal) != (!pAddrOfCaptureThreadGlobal));
+
+    // Can we directly use addrOfCaptureThreadGlobal?
+
+    if (addrOfCaptureThreadGlobal)
+    {
+#ifdef _TARGET_ARM_
+        instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg3, (ssize_t)addrOfCaptureThreadGlobal);
+        getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0);
+        regTracker.rsTrackRegTrash(reg3);
+        getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg3, 0);
+#else
+        getEmitter()->emitIns_C_I(INS_cmp, EA_PTR_DSP_RELOC, FLD_GLOBAL_DS, (ssize_t)addrOfCaptureThreadGlobal, 0);
+#endif
+    }
+    else
+    {
+#ifdef _TARGET_ARM_
+        instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg3, (ssize_t)pAddrOfCaptureThreadGlobal);
+        getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0);
+        regTracker.rsTrackRegTrash(reg3);
+        getEmitter()->emitIns_R_R_I(ins_Load(TYP_INT), EA_4BYTE, reg3, reg3, 0);
+        getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, reg3, 0);
+#else // !_TARGET_ARM_
+
+        getEmitter()->emitIns_R_AI(ins_Load(TYP_I_IMPL), EA_PTR_DSP_RELOC, REG_ECX,
+                                   (ssize_t)pAddrOfCaptureThreadGlobal);
+        regTracker.rsTrackRegTrash(REG_ECX);
+
+        getEmitter()->emitIns_I_AR(INS_cmp, EA_4BYTE, 0, REG_ECX, 0);
+
+#endif // !_TARGET_ARM_
+    }
+
+    /* */
+    clab_nostop = genCreateTempLabel();
+
+    /* Generate the conditional jump */
+    emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+    inst_JMP(jmpEqual, clab_nostop);
+
+#ifdef _TARGET_ARM_
+// The helper preserves the return value on ARM
+#else
+    /* save return value (if necessary) */
+    if (retVal != RBM_NONE)
+    {
+        if (retVal == RBM_INTRET || retVal == RBM_LNGRET)
+        {
+            /* push eax */
+
+            inst_RV(INS_push, REG_INTRET, TYP_INT);
+
+            if (retVal == RBM_LNGRET)
+            {
+                /* push edx */
+
+                inst_RV(INS_push, REG_EDX, TYP_INT);
+            }
+        }
+    }
+#endif
+
+    /* emit the call to the EE-helper that stops for GC (or other reasons) */
+
+    genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, /* argSize */
+                      EA_UNKNOWN);                 /* retSize */
+
+#ifdef _TARGET_ARM_
+// The helper preserves the return value on ARM
+#else
+    /* restore return value (if necessary) */
+
+    if (retVal != RBM_NONE)
+    {
+        if (retVal == RBM_INTRET || retVal == RBM_LNGRET)
+        {
+            if (retVal == RBM_LNGRET)
+            {
+                /* pop edx */
+
+                inst_RV(INS_pop, REG_EDX, TYP_INT);
+                regTracker.rsTrackRegTrash(REG_EDX);
+            }
+
+            /* pop eax */
+
+            inst_RV(INS_pop, REG_INTRET, TYP_INT);
+            regTracker.rsTrackRegTrash(REG_INTRET);
+        }
+    }
+#endif
+
+    /* genCondJump() closes the current emitter block */
+
+    genDefineTempLabel(clab_nostop);
+
+    // This marks the InlinedCallFrame as "inactive".  In fully interruptible code, this is not atomic with
+    // the above code.  So the process is:
+    // 1) Return to cooperative mode
+    // 2) Check to see if we need to stop for GC
+    // 3) Return from the p/invoke (as far as the stack walker is concerned).
+
+    /* mov  dword ptr [frame.callSiteTracker], 0 */
+
+    instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaInlinedPInvokeFrameVar,
+                               pInfo->inlinedCallFrameInfo.offsetOfReturnAddress);
+
+    getEmitter()->emitEnableRandomNops();
+}
+
+/*****************************************************************************/
+
+/*****************************************************************************
+*           TRACKING OF FLAGS
+*****************************************************************************/
+
+void CodeGen::genFlagsEqualToNone()
+{
+    genFlagsEqReg = REG_NA;
+    genFlagsEqVar = (unsigned)-1;
+    genFlagsEqLoc.Init();
+}
+
+/*****************************************************************************
+ *
+ *  Record the fact that the flags register has a value that reflects the
+ *  contents of the given register.
+ */
+
+void CodeGen::genFlagsEqualToReg(GenTreePtr tree, regNumber reg)
+{
+    genFlagsEqLoc.CaptureLocation(getEmitter());
+    genFlagsEqReg = reg;
+
+    /* previous setting of flags by a var becomes invalid */
+
+    genFlagsEqVar = 0xFFFFFFFF;
+
+    /* Set appropriate flags on the tree */
+
+    if (tree)
+    {
+        tree->gtFlags |= GTF_ZSF_SET;
+        assert(tree->gtSetFlags());
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Record the fact that the flags register has a value that reflects the
+ *  contents of the given local variable.
+ */
+
+void CodeGen::genFlagsEqualToVar(GenTreePtr tree, unsigned var)
+{
+    genFlagsEqLoc.CaptureLocation(getEmitter());
+    genFlagsEqVar = var;
+
+    /* previous setting of flags by a register becomes invalid */
+
+    genFlagsEqReg = REG_NA;
+
+    /* Set appropriate flags on the tree */
+
+    if (tree)
+    {
+        tree->gtFlags |= GTF_ZSF_SET;
+        assert(tree->gtSetFlags());
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Return an indication of whether the flags register is set to the current
+ *  value of the given register/variable. The return value is as follows:
+ *
+ *      false  ..  nothing
+ *      true   ..  the zero flag (ZF) and sign flag (SF) is set
+ */
+
+bool CodeGen::genFlagsAreReg(regNumber reg)
+{
+    if ((genFlagsEqReg == reg) && genFlagsEqLoc.IsCurrentLocation(getEmitter()))
+    {
+        return true;
+    }
+
+    return false;
+}
+
+bool CodeGen::genFlagsAreVar(unsigned var)
+{
+    if ((genFlagsEqVar == var) && genFlagsEqLoc.IsCurrentLocation(getEmitter()))
+    {
+        return true;
+    }
+
+    return false;
+}
+
+/*****************************************************************************
+ * This utility function returns true iff the execution path from "from"
+ * (inclusive) to "to" (exclusive) contains a death of the given var
+ */
+bool CodeGen::genContainsVarDeath(GenTreePtr from, GenTreePtr to, unsigned varNum)
+{
+    GenTreePtr tree;
+    for (tree = from; tree != NULL && tree != to; tree = tree->gtNext)
+    {
+        if (tree->IsLocal() && (tree->gtFlags & GTF_VAR_DEATH))
+        {
+            unsigned dyingVarNum = tree->gtLclVarCommon.gtLclNum;
+            if (dyingVarNum == varNum)
+                return true;
+            LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+            if (varDsc->lvPromoted)
+            {
+                assert(varDsc->lvType == TYP_STRUCT);
+                unsigned firstFieldNum = varDsc->lvFieldLclStart;
+                if (varNum >= firstFieldNum && varNum < firstFieldNum + varDsc->lvFieldCnt)
+                {
+                    return true;
+                }
+            }
+        }
+    }
+    assert(tree != NULL);
+    return false;
+}
+
+#endif // LEGACY_BACKEND
diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h
new file mode 100644
index 0000000000..fb0d6ea165
--- /dev/null
+++ b/src/jit/codegenlinear.h
@@ -0,0 +1,224 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// This file contains the members of CodeGen that are defined and used
+// only by the RyuJIT backend.  It is included by CodeGen.h in the
+// definition of the CodeGen class.
+//
+
+#ifndef LEGACY_BACKEND // Not necessary (it's this way in the #include location), but helpful to IntelliSense
+
+void genSetRegToConst(regNumber targetReg, var_types targetType, GenTreePtr tree);
+
+void genCodeForTreeNode(GenTreePtr treeNode);
+
+void genCodeForBinary(GenTreePtr treeNode);
+
+void genCodeForDivMod(GenTreeOp* treeNode);
+
+void genCodeForMulHi(GenTreeOp* treeNode);
+
+void genLeaInstruction(GenTreeAddrMode* lea);
+
+void genSetRegToCond(regNumber dstReg, GenTreePtr tree);
+
+void genIntToIntCast(GenTreePtr treeNode);
+
+void genFloatToFloatCast(GenTreePtr treeNode);
+
+void genFloatToIntCast(GenTreePtr treeNode);
+
+void genIntToFloatCast(GenTreePtr treeNode);
+
+void genCkfinite(GenTreePtr treeNode);
+
+void genIntrinsic(GenTreePtr treeNode);
+
+void genPutArgStk(GenTreePtr treeNode);
+unsigned getBaseVarForPutArgStk(GenTreePtr treeNode);
+
+#if defined(_TARGET_XARCH_) || defined(_TARGET_ARM64_)
+unsigned getFirstArgWithStackSlot();
+#endif // _TARGET_XARCH_ || _TARGET_ARM64_
+
+void genCompareFloat(GenTreePtr treeNode);
+
+void genCompareInt(GenTreePtr treeNode);
+
+#if !defined(_TARGET_64BIT_)
+void genCompareLong(GenTreePtr treeNode);
+void genJTrueLong(GenTreePtr treeNode);
+#endif
+
+#ifdef FEATURE_SIMD
+enum SIMDScalarMoveType
+{
+    SMT_ZeroInitUpper,                  // zero initlaize target upper bits
+    SMT_ZeroInitUpper_SrcHasUpperZeros, // zero initialize target upper bits; source upper bits are known to be zero
+    SMT_PreserveUpper                   // preserve target upper bits
+};
+
+instruction getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_types baseType, unsigned* ival = nullptr);
+void genSIMDScalarMove(var_types type, regNumber target, regNumber src, SIMDScalarMoveType moveType);
+void genSIMDZero(var_types targetType, var_types baseType, regNumber targetReg);
+void genSIMDIntrinsicInit(GenTreeSIMD* simdNode);
+void genSIMDIntrinsicInitN(GenTreeSIMD* simdNode);
+void genSIMDIntrinsicInitArray(GenTreeSIMD* simdNode);
+void genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode);
+void genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode);
+void genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode);
+void genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode);
+void genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode);
+void genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode);
+void genSIMDIntrinsicShuffleSSE2(GenTreeSIMD* simdNode);
+void genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode);
+void genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode);
+
+void genSIMDIntrinsic(GenTreeSIMD* simdNode);
+void genSIMDCheck(GenTree* treeNode);
+
+// TYP_SIMD12 (i.e Vector3 of size 12 bytes) is not a hardware supported size and requires
+// two reads/writes on 64-bit targets. These routines abstract reading/writing of Vector3
+// values through an indirection. Note that Vector3 locals allocated on stack would have
+// their size rounded to TARGET_POINTER_SIZE (which is 8 bytes on 64-bit targets) and hence
+// Vector3 locals could be treated as TYP_SIMD16 while reading/writing.
+void genStoreIndTypeSIMD12(GenTree* treeNode);
+void genStoreLclFldTypeSIMD12(GenTree* treeNode);
+void genLoadIndTypeSIMD12(GenTree* treeNode);
+void genLoadLclFldTypeSIMD12(GenTree* treeNode);
+#endif // FEATURE_SIMD
+
+#if !defined(_TARGET_64BIT_)
+
+// CodeGen for Long Ints
+
+void genStoreLongLclVar(GenTree* treeNode);
+
+#endif // !defined(_TARGET_64BIT_)
+
+void genProduceReg(GenTree* tree);
+
+void genUnspillRegIfNeeded(GenTree* tree);
+
+regNumber genConsumeReg(GenTree* tree);
+
+void genConsumeRegAndCopy(GenTree* tree, regNumber needReg);
+
+void genConsumeIfReg(GenTreePtr tree)
+{
+    if (!tree->isContained())
+    {
+        (void)genConsumeReg(tree);
+    }
+}
+
+void genRegCopy(GenTreePtr tree);
+
+void genTransferRegGCState(regNumber dst, regNumber src);
+
+void genConsumeAddress(GenTree* addr);
+
+void genConsumeAddrMode(GenTreeAddrMode* mode);
+
+void genConsumeBlockSize(GenTreeBlk* blkNode, regNumber sizeReg);
+void genConsumeBlockDst(GenTreeBlk* blkNode);
+GenTree* genConsumeBlockSrc(GenTreeBlk* blkNode);
+void genConsumeBlockOp(GenTreeBlk* blkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+void genConsumePutStructArgStk(
+    GenTreePutArgStk* putArgStkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg, unsigned baseVarNum);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+void genConsumeRegs(GenTree* tree);
+
+void genConsumeOperands(GenTreeOp* tree);
+
+void genEmitGSCookieCheck(bool pushReg);
+
+void genSetRegToIcon(regNumber reg, ssize_t val, var_types type = TYP_INT, insFlags flags = INS_FLAGS_DONT_CARE);
+
+void genCodeForShift(GenTreePtr tree);
+
+#ifdef _TARGET_XARCH_
+void genCodeForShiftRMW(GenTreeStoreInd* storeInd);
+#endif // _TARGET_XARCH_
+
+void genCodeForCpObj(GenTreeObj* cpObjNode);
+
+void genCodeForCpBlk(GenTreeBlk* cpBlkNode);
+
+void genCodeForCpBlkRepMovs(GenTreeBlk* cpBlkNode);
+
+void genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+void genPutStructArgStk(GenTreePtr treeNode, unsigned baseVarNum);
+
+void genStructPutArgRepMovs(GenTreePutArgStk* putArgStkNode, unsigned baseVarNum);
+void genStructPutArgUnroll(GenTreePutArgStk* putArgStkNode, unsigned baseVarNum);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+void genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset);
+
+void genCodeForStoreOffset(instruction ins, emitAttr size, regNumber dst, GenTree* base, unsigned offset);
+
+void genCodeForStoreBlk(GenTreeBlk* storeBlkNode);
+
+void genCodeForInitBlk(GenTreeBlk* initBlkNode);
+
+void genCodeForInitBlkRepStos(GenTreeBlk* initBlkNode);
+
+void genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode);
+
+void genJumpTable(GenTree* tree);
+
+void genTableBasedSwitch(GenTree* tree);
+
+void genCodeForArrIndex(GenTreeArrIndex* treeNode);
+
+void genCodeForArrOffset(GenTreeArrOffs* treeNode);
+
+instruction genGetInsForOper(genTreeOps oper, var_types type);
+
+void genStoreInd(GenTreePtr node);
+
+bool genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarrierForm, GenTree* addr, GenTree* data);
+
+void genCallInstruction(GenTreePtr call);
+
+void genJmpMethod(GenTreePtr jmp);
+
+void genMultiRegCallStoreToLocal(GenTreePtr treeNode);
+
+// Deals with codegen for muti-register struct returns.
+bool isStructReturn(GenTreePtr treeNode);
+void genStructReturn(GenTreePtr treeNode);
+
+// Codegen for GT_RETURN.
+void genReturn(GenTreePtr treeNode);
+
+void genLclHeap(GenTreePtr tree);
+
+bool genIsRegCandidateLocal(GenTreePtr tree)
+{
+    if (!tree->IsLocal())
+    {
+        return false;
+    }
+    const LclVarDsc* varDsc = &compiler->lvaTable[tree->gtLclVarCommon.gtLclNum];
+    return (varDsc->lvIsRegCandidate());
+}
+
+#ifdef DEBUG
+GenTree* lastConsumedNode;
+void genCheckConsumeNode(GenTree* treeNode);
+#else  // !DEBUG
+inline void genCheckConsumeNode(GenTree* treeNode)
+{
+}
+#endif // DEBUG
+
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
new file mode 100644
index 0000000000..a41c28695b
--- /dev/null
+++ b/src/jit/codegenxarch.cpp
@@ -0,0 +1,9388 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                        Amd64/x86 Code Generator                           XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator.
+
+#ifdef _TARGET_XARCH_
+#include "emit.h"
+#include "codegen.h"
+#include "lower.h"
+#include "gcinfo.h"
+#include "gcinfoencoder.h"
+
+// Get the register assigned to the given node
+
+regNumber CodeGenInterface::genGetAssignedReg(GenTreePtr tree)
+{
+    return tree->gtRegNum;
+}
+
+//------------------------------------------------------------------------
+// genSpillVar: Spill a local variable
+//
+// Arguments:
+//    tree      - the lclVar node for the variable being spilled
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    The lclVar must be a register candidate (lvRegCandidate)
+
+void CodeGen::genSpillVar(GenTreePtr tree)
+{
+    unsigned   varNum = tree->gtLclVarCommon.gtLclNum;
+    LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+
+    assert(varDsc->lvIsRegCandidate());
+
+    // We don't actually need to spill if it is already living in memory
+    bool needsSpill = ((tree->gtFlags & GTF_VAR_DEF) == 0 && varDsc->lvIsInReg());
+    if (needsSpill)
+    {
+        var_types lclTyp = varDsc->TypeGet();
+        if (varDsc->lvNormalizeOnStore())
+        {
+            lclTyp = genActualType(lclTyp);
+        }
+        emitAttr size = emitTypeSize(lclTyp);
+
+        bool restoreRegVar = false;
+        if (tree->gtOper == GT_REG_VAR)
+        {
+            tree->SetOper(GT_LCL_VAR);
+            restoreRegVar = true;
+        }
+
+        // mask off the flag to generate the right spill code, then bring it back
+        tree->gtFlags &= ~GTF_REG_VAL;
+
+        instruction storeIns = ins_Store(tree->TypeGet(), compiler->isSIMDTypeLocalAligned(varNum));
+#if CPU_LONG_USES_REGPAIR
+        if (varTypeIsMultiReg(tree))
+        {
+            assert(varDsc->lvRegNum == genRegPairLo(tree->gtRegPair));
+            assert(varDsc->lvOtherReg == genRegPairHi(tree->gtRegPair));
+            regNumber regLo = genRegPairLo(tree->gtRegPair);
+            regNumber regHi = genRegPairHi(tree->gtRegPair);
+            inst_TT_RV(storeIns, tree, regLo);
+            inst_TT_RV(storeIns, tree, regHi, 4);
+        }
+        else
+#endif
+        {
+            assert(varDsc->lvRegNum == tree->gtRegNum);
+            inst_TT_RV(storeIns, tree, tree->gtRegNum, 0, size);
+        }
+        tree->gtFlags |= GTF_REG_VAL;
+
+        if (restoreRegVar)
+        {
+            tree->SetOper(GT_REG_VAR);
+        }
+
+        genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(tree));
+        gcInfo.gcMarkRegSetNpt(varDsc->lvRegMask());
+
+        if (VarSetOps::IsMember(compiler, gcInfo.gcTrkStkPtrLcls, varDsc->lvVarIndex))
+        {
+#ifdef DEBUG
+            if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
+            {
+                JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum);
+            }
+            else
+            {
+                JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum);
+            }
+#endif
+            VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
+        }
+    }
+
+    tree->gtFlags &= ~GTF_SPILL;
+    varDsc->lvRegNum = REG_STK;
+    if (varTypeIsMultiReg(tree))
+    {
+        varDsc->lvOtherReg = REG_STK;
+    }
+}
+
+// inline
+void CodeGenInterface::genUpdateVarReg(LclVarDsc* varDsc, GenTreePtr tree)
+{
+    assert(tree->OperIsScalarLocal() || (tree->gtOper == GT_COPY));
+    varDsc->lvRegNum = tree->gtRegNum;
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+/*****************************************************************************
+ *
+ *  Generate code that will set the given register to the integer constant.
+ */
+
+void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags)
+{
+    // Reg cannot be a FP reg
+    assert(!genIsValidFloatReg(reg));
+
+    // The only TYP_REF constant that can come this path is a managed 'null' since it is not
+    // relocatable.  Other ref type constants (e.g. string objects) go through a different
+    // code path.
+    noway_assert(type != TYP_REF || val == 0);
+
+    if (val == 0)
+    {
+        instGen_Set_Reg_To_Zero(emitActualTypeSize(type), reg, flags);
+    }
+    else
+    {
+        // TODO-XArch-CQ: needs all the optimized cases
+        getEmitter()->emitIns_R_I(INS_mov, emitActualTypeSize(type), reg, val);
+    }
+}
+
+/*****************************************************************************
+ *
+ *   Generate code to check that the GS cookie wasn't thrashed by a buffer
+ *   overrun.  If pushReg is true, preserve all registers around code sequence.
+ *   Otherwise ECX could be modified.
+ *
+ *   Implementation Note: pushReg = true, in case of tail calls.
+ */
+void CodeGen::genEmitGSCookieCheck(bool pushReg)
+{
+    noway_assert(compiler->gsGlobalSecurityCookieAddr || compiler->gsGlobalSecurityCookieVal);
+
+    // Make sure that EAX is reported as live GC-ref so that any GC that kicks in while
+    // executing GS cookie check will not collect the object pointed to by EAX.
+    //
+    // For Amd64 System V, a two-register-returned struct could be returned in RAX and RDX
+    // In such case make sure that the correct GC-ness of RDX is reported as well, so
+    // a GC object pointed by RDX will not be collected.
+    if (!pushReg)
+    {
+        // Handle multi-reg return type values
+        if (compiler->compMethodReturnsMultiRegRetType())
+        {
+            ReturnTypeDesc retTypeDesc;
+            if (varTypeIsLong(compiler->info.compRetNativeType))
+            {
+                retTypeDesc.InitializeLongReturnType(compiler);
+            }
+            else // we must have a struct return type
+            {
+                retTypeDesc.InitializeStructReturnType(compiler, compiler->info.compMethodInfo->args.retTypeClass);
+            }
+
+            unsigned regCount = retTypeDesc.GetReturnRegCount();
+
+            // Only x86 and x64 Unix ABI allows multi-reg return and
+            // number of result regs should be equal to MAX_RET_REG_COUNT.
+            assert(regCount == MAX_RET_REG_COUNT);
+
+            for (unsigned i = 0; i < regCount; ++i)
+            {
+                gcInfo.gcMarkRegPtrVal(retTypeDesc.GetABIReturnReg(i), retTypeDesc.GetReturnRegType(i));
+            }
+        }
+        else if (compiler->compMethodReturnsRetBufAddr())
+        {
+            // This is for returning in an implicit RetBuf.
+            // If the address of the buffer is returned in REG_INTRET, mark the content of INTRET as ByRef.
+
+            // In case the return is in an implicit RetBuf, the native return type should be a struct
+            assert(varTypeIsStruct(compiler->info.compRetNativeType));
+
+            gcInfo.gcMarkRegPtrVal(REG_INTRET, TYP_BYREF);
+        }
+        // ... all other cases.
+        else
+        {
+#ifdef _TARGET_AMD64_
+            // For x64, structs that are not returned in registers are always
+            // returned in implicit RetBuf. If we reached here, we should not have
+            // a RetBuf and the return type should not be a struct.
+            assert(compiler->info.compRetBuffArg == BAD_VAR_NUM);
+            assert(!varTypeIsStruct(compiler->info.compRetNativeType));
+#endif // _TARGET_AMD64_
+
+            // For x86 Windows we can't make such assertions since we generate code for returning of
+            // the RetBuf in REG_INTRET only when the ProfilerHook is enabled. Otherwise
+            // compRetNativeType could be TYP_STRUCT.
+            gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetNativeType);
+        }
+    }
+
+    regNumber regGSCheck;
+    if (!pushReg)
+    {
+        // Non-tail call: we can use any callee trash register that is not
+        // a return register or contain 'this' pointer (keep alive this), since
+        // we are generating GS cookie check after a GT_RETURN block.
+        // Note: On Amd64 System V RDX is an arg register - REG_ARG_2 - as well
+        // as return register for two-register-returned structs.
+        if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvRegister &&
+            (compiler->lvaTable[compiler->info.compThisArg].lvRegNum == REG_ARG_0))
+        {
+            regGSCheck = REG_ARG_1;
+        }
+        else
+        {
+            regGSCheck = REG_ARG_0;
+        }
+    }
+    else
+    {
+#ifdef _TARGET_X86_
+        NYI_X86("Tail calls from methods that need GS check");
+        regGSCheck = REG_NA;
+#else  // !_TARGET_X86_
+        // Tail calls from methods that need GS check:  We need to preserve registers while
+        // emitting GS cookie check for a tail prefixed call or a jmp. To emit GS cookie
+        // check, we might need a register. This won't be an issue for jmp calls for the
+        // reason mentioned below (see comment starting with "Jmp Calls:").
+        //
+        // The following are the possible solutions in case of tail prefixed calls:
+        // 1) Use R11 - ignore tail prefix on calls that need to pass a param in R11 when
+        //    present in methods that require GS cookie check.  Rest of the tail calls that
+        //    do not require R11 will be honored.
+        // 2) Internal register - GT_CALL node reserves an internal register and emits GS
+        //    cookie check as part of tail call codegen. GenExitCode() needs to special case
+        //    fast tail calls implemented as epilog+jmp or such tail calls should always get
+        //    dispatched via helper.
+        // 3) Materialize GS cookie check as a sperate node hanging off GT_CALL node in
+        //    right execution order during rationalization.
+        //
+        // There are two calls that use R11: VSD and calli pinvokes with cookie param. Tail
+        // prefix on pinvokes is ignored.  That is, options 2 and 3 will allow tail prefixed
+        // VSD calls from methods that need GS check.
+        //
+        // Tail prefixed calls: Right now for Jit64 compat, method requiring GS cookie check
+        // ignores tail prefix.  In future, if we intend to support tail calls from such a method,
+        // consider one of the options mentioned above.  For now adding an assert that we don't
+        // expect to see a tail call in a method that requires GS check.
+        noway_assert(!compiler->compTailCallUsed);
+
+        // Jmp calls: specify method handle using which JIT queries VM for its entry point
+        // address and hence it can neither be a VSD call nor PInvoke calli with cookie
+        // parameter.  Therefore, in case of jmp calls it is safe to use R11.
+        regGSCheck = REG_R11;
+#endif // !_TARGET_X86_
+    }
+
+    if (compiler->gsGlobalSecurityCookieAddr == nullptr)
+    {
+        // If GS cookie value fits within 32-bits we can use 'cmp mem64, imm32'.
+        // Otherwise, load the value into a reg and use 'cmp mem64, reg64'.
+        if ((int)compiler->gsGlobalSecurityCookieVal != (ssize_t)compiler->gsGlobalSecurityCookieVal)
+        {
+            genSetRegToIcon(regGSCheck, compiler->gsGlobalSecurityCookieVal, TYP_I_IMPL);
+            getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
+        }
+        else
+        {
+            getEmitter()->emitIns_S_I(INS_cmp, EA_PTRSIZE, compiler->lvaGSSecurityCookie, 0,
+                                      (int)compiler->gsGlobalSecurityCookieVal);
+        }
+    }
+    else
+    {
+        // Ngen case - GS cookie value needs to be accessed through an indirection.
+        instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regGSCheck, (ssize_t)compiler->gsGlobalSecurityCookieAddr);
+        getEmitter()->emitIns_R_AR(ins_Load(TYP_I_IMPL), EA_PTRSIZE, regGSCheck, regGSCheck, 0);
+        getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, regGSCheck, compiler->lvaGSSecurityCookie, 0);
+    }
+
+    BasicBlock*  gsCheckBlk = genCreateTempLabel();
+    emitJumpKind jmpEqual   = genJumpKindForOper(GT_EQ, CK_SIGNED);
+    inst_JMP(jmpEqual, gsCheckBlk);
+    genEmitHelperCall(CORINFO_HELP_FAIL_FAST, 0, EA_UNKNOWN);
+    genDefineTempLabel(gsCheckBlk);
+}
+
+/*****************************************************************************
+ *
+ *  Generate code for all the basic blocks in the function.
+ */
+
+void CodeGen::genCodeForBBlist()
+{
+    unsigned   varNum;
+    LclVarDsc* varDsc;
+
+    unsigned savedStkLvl;
+
+#ifdef DEBUG
+    genInterruptibleUsed = true;
+
+    // You have to be careful if you create basic blocks from now on
+    compiler->fgSafeBasicBlockCreation = false;
+
+    // This stress mode is not comptible with fully interruptible GC
+    if (genInterruptible && compiler->opts.compStackCheckOnCall)
+    {
+        compiler->opts.compStackCheckOnCall = false;
+    }
+
+    // This stress mode is not comptible with fully interruptible GC
+    if (genInterruptible && compiler->opts.compStackCheckOnRet)
+    {
+        compiler->opts.compStackCheckOnRet = false;
+    }
+#endif // DEBUG
+
+    // Prepare the blocks for exception handling codegen: mark the blocks that needs labels.
+    genPrepForEHCodegen();
+
+    assert(!compiler->fgFirstBBScratch ||
+           compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first.
+
+    /* Initialize the spill tracking logic */
+
+    regSet.rsSpillBeg();
+
+#ifdef DEBUGGING_SUPPORT
+    /* Initialize the line# tracking logic */
+
+    if (compiler->opts.compScopeInfo)
+    {
+        siInit();
+    }
+#endif
+
+    // The current implementation of switch tables requires the first block to have a label so it
+    // can generate offsets to the switch label targets.
+    // TODO-XArch-CQ: remove this when switches have been re-implemented to not use this.
+    if (compiler->fgHasSwitch)
+    {
+        compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;
+    }
+
+    genPendingCallLabel = nullptr;
+
+    /* Initialize the pointer tracking code */
+
+    gcInfo.gcRegPtrSetInit();
+    gcInfo.gcVarPtrSetInit();
+
+    /* If any arguments live in registers, mark those regs as such */
+
+    for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+    {
+        /* Is this variable a parameter assigned to a register? */
+
+        if (!varDsc->lvIsParam || !varDsc->lvRegister)
+        {
+            continue;
+        }
+
+        /* Is the argument live on entry to the method? */
+
+        if (!VarSetOps::IsMember(compiler, compiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex))
+        {
+            continue;
+        }
+
+        /* Is this a floating-point argument? */
+
+        if (varDsc->IsFloatRegType())
+        {
+            continue;
+        }
+
+        noway_assert(!varTypeIsFloating(varDsc->TypeGet()));
+
+        /* Mark the register as holding the variable */
+
+        regTracker.rsTrackRegLclVar(varDsc->lvRegNum, varNum);
+    }
+
+    unsigned finallyNesting = 0;
+
+    // Make sure a set is allocated for compiler->compCurLife (in the long case), so we can set it to empty without
+    // allocation at the start of each basic block.
+    VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, VarSetOps::MakeEmpty(compiler));
+
+    /*-------------------------------------------------------------------------
+     *
+     *  Walk the basic blocks and generate code for each one
+     *
+     */
+
+    BasicBlock* block;
+    BasicBlock* lblk; /* previous block */
+
+    for (lblk = nullptr, block = compiler->fgFirstBB; block != nullptr; lblk = block, block = block->bbNext)
+    {
+#ifdef DEBUG
+        if (compiler->verbose)
+        {
+            printf("\n=============== Generating ");
+            block->dspBlockHeader(compiler, true, true);
+            compiler->fgDispBBLiveness(block);
+        }
+#endif // DEBUG
+
+        // Figure out which registers hold variables on entry to this block
+
+        regSet.ClearMaskVars();
+        gcInfo.gcRegGCrefSetCur = RBM_NONE;
+        gcInfo.gcRegByrefSetCur = RBM_NONE;
+
+        compiler->m_pLinearScan->recordVarLocationsAtStartOfBB(block);
+
+        genUpdateLife(block->bbLiveIn);
+
+        // Even if liveness didn't change, we need to update the registers containing GC references.
+        // genUpdateLife will update the registers live due to liveness changes. But what about registers that didn't
+        // change? We cleared them out above. Maybe we should just not clear them out, but update the ones that change
+        // here. That would require handling the changes in recordVarLocationsAtStartOfBB().
+
+        regMaskTP newLiveRegSet  = RBM_NONE;
+        regMaskTP newRegGCrefSet = RBM_NONE;
+        regMaskTP newRegByrefSet = RBM_NONE;
+#ifdef DEBUG
+        VARSET_TP VARSET_INIT_NOCOPY(removedGCVars, VarSetOps::MakeEmpty(compiler));
+        VARSET_TP VARSET_INIT_NOCOPY(addedGCVars, VarSetOps::MakeEmpty(compiler));
+#endif
+        VARSET_ITER_INIT(compiler, iter, block->bbLiveIn, varIndex);
+        while (iter.NextElem(compiler, &varIndex))
+        {
+            unsigned   varNum = compiler->lvaTrackedToVarNum[varIndex];
+            LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+
+            if (varDsc->lvIsInReg())
+            {
+                newLiveRegSet |= varDsc->lvRegMask();
+                if (varDsc->lvType == TYP_REF)
+                {
+                    newRegGCrefSet |= varDsc->lvRegMask();
+                }
+                else if (varDsc->lvType == TYP_BYREF)
+                {
+                    newRegByrefSet |= varDsc->lvRegMask();
+                }
+#ifdef DEBUG
+                if (verbose && VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex))
+                {
+                    VarSetOps::AddElemD(compiler, removedGCVars, varIndex);
+                }
+#endif // DEBUG
+                VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
+            }
+            else if (compiler->lvaIsGCTracked(varDsc))
+            {
+#ifdef DEBUG
+                if (verbose && !VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex))
+                {
+                    VarSetOps::AddElemD(compiler, addedGCVars, varIndex);
+                }
+#endif // DEBUG
+                VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex);
+            }
+        }
+
+        regSet.rsMaskVars = newLiveRegSet;
+
+#ifdef DEBUG
+        if (compiler->verbose)
+        {
+            if (!VarSetOps::IsEmpty(compiler, addedGCVars))
+            {
+                printf("\t\t\t\t\t\t\tAdded GCVars: ");
+                dumpConvertedVarSet(compiler, addedGCVars);
+                printf("\n");
+            }
+            if (!VarSetOps::IsEmpty(compiler, removedGCVars))
+            {
+                printf("\t\t\t\t\t\t\tRemoved GCVars: ");
+                dumpConvertedVarSet(compiler, removedGCVars);
+                printf("\n");
+            }
+        }
+#endif // DEBUG
+
+        gcInfo.gcMarkRegSetGCref(newRegGCrefSet DEBUGARG(true));
+        gcInfo.gcMarkRegSetByref(newRegByrefSet DEBUGARG(true));
+
+        /* Blocks with handlerGetsXcptnObj()==true use GT_CATCH_ARG to
+           represent the exception object (TYP_REF).
+           We mark REG_EXCEPTION_OBJECT as holding a GC object on entry
+           to the block,  it will be the first thing evaluated
+           (thanks to GTF_ORDER_SIDEEFF).
+         */
+
+        if (handlerGetsXcptnObj(block->bbCatchTyp))
+        {
+            for (GenTree* node : LIR::AsRange(block))
+            {
+                if (node->OperGet() == GT_CATCH_ARG)
+                {
+                    gcInfo.gcMarkRegSetGCref(RBM_EXCEPTION_OBJECT);
+                    break;
+                }
+            }
+        }
+
+        /* Start a new code output block */
+
+        genUpdateCurrentFunclet(block);
+
+        if (genAlignLoops && block->bbFlags & BBF_LOOP_HEAD)
+        {
+            getEmitter()->emitLoopAlign();
+        }
+
+#ifdef DEBUG
+        if (compiler->opts.dspCode)
+        {
+            printf("\n      L_M%03u_BB%02u:\n", Compiler::s_compMethodsCount, block->bbNum);
+        }
+#endif
+
+        block->bbEmitCookie = nullptr;
+
+        if (block->bbFlags & (BBF_JMP_TARGET | BBF_HAS_LABEL))
+        {
+            /* Mark a label and update the current set of live GC refs */
+
+            block->bbEmitCookie = getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+                                                             gcInfo.gcRegByrefSetCur, FALSE);
+        }
+
+        if (block == compiler->fgFirstColdBlock)
+        {
+#ifdef DEBUG
+            if (compiler->verbose)
+            {
+                printf("\nThis is the start of the cold region of the method\n");
+            }
+#endif
+            // We should never have a block that falls through into the Cold section
+            noway_assert(!lblk->bbFallsThrough());
+
+            // We require the block that starts the Cold section to have a label
+            noway_assert(block->bbEmitCookie);
+            getEmitter()->emitSetFirstColdIGCookie(block->bbEmitCookie);
+        }
+
+        /* Both stacks are always empty on entry to a basic block */
+
+        genStackLevel = 0;
+
+        savedStkLvl = genStackLevel;
+
+        /* Tell everyone which basic block we're working on */
+
+        compiler->compCurBB = block;
+
+#ifdef DEBUGGING_SUPPORT
+        siBeginBlock(block);
+
+        // BBF_INTERNAL blocks don't correspond to any single IL instruction.
+        if (compiler->opts.compDbgInfo && (block->bbFlags & BBF_INTERNAL) &&
+            !compiler->fgBBisScratch(block)) // If the block is the distinguished first scratch block, then no need to
+                                             // emit a NO_MAPPING entry, immediately after the prolog.
+        {
+            genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::NO_MAPPING, true);
+        }
+
+        bool firstMapping = true;
+#endif // DEBUGGING_SUPPORT
+
+        /*---------------------------------------------------------------------
+         *
+         *  Generate code for each statement-tree in the block
+         *
+         */
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if FEATURE_EH_FUNCLETS
+        if (block->bbFlags & BBF_FUNCLET_BEG)
+        {
+            genReserveFuncletProlog(block);
+        }
+#endif // FEATURE_EH_FUNCLETS
+
+        // Clear compCurStmt and compCurLifeTree.
+        compiler->compCurStmt     = nullptr;
+        compiler->compCurLifeTree = nullptr;
+
+        // Traverse the block in linear order, generating code for each node as we
+        // as we encounter it.
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUGGING_SUPPORT
+        IL_OFFSETX currentILOffset = BAD_IL_OFFSET;
+#endif
+        for (GenTree* node : LIR::AsRange(block).NonPhiNodes())
+        {
+#ifdef DEBUGGING_SUPPORT
+            // Do we have a new IL offset?
+            if (node->OperGet() == GT_IL_OFFSET)
+            {
+                genEnsureCodeEmitted(currentILOffset);
+                currentILOffset = node->gtStmt.gtStmtILoffsx;
+                genIPmappingAdd(currentILOffset, firstMapping);
+                firstMapping = false;
+            }
+#endif // DEBUGGING_SUPPORT
+
+#ifdef DEBUG
+            if (node->OperGet() == GT_IL_OFFSET)
+            {
+                noway_assert(node->gtStmt.gtStmtLastILoffs <= compiler->info.compILCodeSize ||
+                             node->gtStmt.gtStmtLastILoffs == BAD_IL_OFFSET);
+
+                if (compiler->opts.dspCode && compiler->opts.dspInstrs &&
+                    node->gtStmt.gtStmtLastILoffs != BAD_IL_OFFSET)
+                {
+                    while (genCurDispOffset <= node->gtStmt.gtStmtLastILoffs)
+                    {
+                        genCurDispOffset += dumpSingleInstr(compiler->info.compCode, genCurDispOffset, ">    ");
+                    }
+                }
+            }
+#endif // DEBUG
+
+            genCodeForTreeNode(node);
+            if (node->gtHasReg() && node->gtLsraInfo.isLocalDefUse)
+            {
+                genConsumeReg(node);
+            }
+        } // end for each node in block
+
+#ifdef DEBUG
+        // The following set of register spill checks and GC pointer tracking checks used to be
+        // performed at statement boundaries. Now, with LIR, there are no statements, so they are
+        // performed at the end of each block.
+        // TODO: could these checks be performed more frequently? E.g., at each location where
+        // the register allocator says there are no live non-variable registers. Perhaps this could
+        // be done by (a) keeping a running count of live non-variable registers by using
+        // gtLsraInfo.srcCount and gtLsraInfo.dstCount to decrement and increment the count, respectively,
+        // and running the checks when the count is zero. Or, (b) use the map maintained by LSRA
+        // (operandToLocationInfoMap) to mark a node somehow when, after the execution of that node,
+        // there will be no live non-variable registers.
+
+        regSet.rsSpillChk();
+
+        /* Make sure we didn't bungle pointer register tracking */
+
+        regMaskTP ptrRegs       = gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur;
+        regMaskTP nonVarPtrRegs = ptrRegs & ~regSet.rsMaskVars;
+
+        // If return is a GC-type, clear it.  Note that if a common
+        // epilog is generated (genReturnBB) it has a void return
+        // even though we might return a ref.  We can't use the compRetType
+        // as the determiner because something we are tracking as a byref
+        // might be used as a return value of a int function (which is legal)
+        GenTree* blockLastNode = block->lastNode();
+        if ((blockLastNode != nullptr) && (blockLastNode->gtOper == GT_RETURN) &&
+            (varTypeIsGC(compiler->info.compRetType) ||
+             (blockLastNode->gtOp.gtOp1 != nullptr && varTypeIsGC(blockLastNode->gtOp.gtOp1->TypeGet()))))
+        {
+            nonVarPtrRegs &= ~RBM_INTRET;
+        }
+
+        if (nonVarPtrRegs)
+        {
+            printf("Regset after BB%02u gcr=", block->bbNum);
+            printRegMaskInt(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
+            compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegGCrefSetCur & ~regSet.rsMaskVars);
+            printf(", byr=");
+            printRegMaskInt(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
+            compiler->getEmitter()->emitDispRegSet(gcInfo.gcRegByrefSetCur & ~regSet.rsMaskVars);
+            printf(", regVars=");
+            printRegMaskInt(regSet.rsMaskVars);
+            compiler->getEmitter()->emitDispRegSet(regSet.rsMaskVars);
+            printf("\n");
+        }
+
+        noway_assert(nonVarPtrRegs == RBM_NONE);
+#endif // DEBUG
+
+#if defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
+        if (block->bbNext == nullptr)
+        {
+            // Unit testing of the AMD64 emitter: generate a bunch of instructions into the last block
+            // (it's as good as any, but better than the prolog, which can only be a single instruction
+            // group) then use COMPlus_JitLateDisasm=* to see if the late disassembler
+            // thinks the instructions are the same as we do.
+            genAmd64EmitterUnitTests();
+        }
+#endif // defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_ARM64_)
+
+#ifdef DEBUGGING_SUPPORT
+        // It is possible to reach the end of the block without generating code for the current IL offset.
+        // For example, if the following IR ends the current block, no code will have been generated for
+        // offset 21:
+        //
+        //          (  0,  0) [000040] ------------                il_offset void   IL offset: 21
+        //
+        //     N001 (  0,  0) [000039] ------------                nop       void
+        //
+        // This can lead to problems when debugging the generated code. To prevent these issues, make sure
+        // we've generated code for the last IL offset we saw in the block.
+        genEnsureCodeEmitted(currentILOffset);
+
+        if (compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0))
+        {
+            siEndBlock(block);
+
+            /* Is this the last block, and are there any open scopes left ? */
+
+            bool isLastBlockProcessed = (block->bbNext == nullptr);
+            if (block->isBBCallAlwaysPair())
+            {
+                isLastBlockProcessed = (block->bbNext->bbNext == nullptr);
+            }
+
+            if (isLastBlockProcessed && siOpenScopeList.scNext)
+            {
+                /* This assert no longer holds, because we may insert a throw
+                   block to demarcate the end of a try or finally region when they
+                   are at the end of the method.  It would be nice if we could fix
+                   our code so that this throw block will no longer be necessary. */
+
+                // noway_assert(block->bbCodeOffsEnd != compiler->info.compILCodeSize);
+
+                siCloseAllOpenScopes();
+            }
+        }
+
+#endif // DEBUGGING_SUPPORT
+
+        genStackLevel -= savedStkLvl;
+
+#ifdef DEBUG
+        // compCurLife should be equal to the liveOut set, except that we don't keep
+        // it up to date for vars that are not register candidates
+        // (it would be nice to have a xor set function)
+
+        VARSET_TP VARSET_INIT_NOCOPY(extraLiveVars, VarSetOps::Diff(compiler, block->bbLiveOut, compiler->compCurLife));
+        VarSetOps::UnionD(compiler, extraLiveVars, VarSetOps::Diff(compiler, compiler->compCurLife, block->bbLiveOut));
+        VARSET_ITER_INIT(compiler, extraLiveVarIter, extraLiveVars, extraLiveVarIndex);
+        while (extraLiveVarIter.NextElem(compiler, &extraLiveVarIndex))
+        {
+            unsigned   varNum = compiler->lvaTrackedToVarNum[extraLiveVarIndex];
+            LclVarDsc* varDsc = compiler->lvaTable + varNum;
+            assert(!varDsc->lvIsRegCandidate());
+        }
+#endif
+
+        /* Both stacks should always be empty on exit from a basic block */
+        noway_assert(genStackLevel == 0);
+
+#ifdef _TARGET_AMD64_
+        // On AMD64, we need to generate a NOP after a call that is the last instruction of the block, in several
+        // situations, to support proper exception handling semantics. This is mostly to ensure that when the stack
+        // walker computes an instruction pointer for a frame, that instruction pointer is in the correct EH region.
+        // The document "X64 and ARM ABIs.docx" has more details. The situations:
+        // 1. If the call instruction is in a different EH region as the instruction that follows it.
+        // 2. If the call immediately precedes an OS epilog. (Note that what the JIT or VM consider an epilog might
+        //    be slightly different from what the OS considers an epilog, and it is the OS-reported epilog that matters
+        //    here.)
+        // We handle case #1 here, and case #2 in the emitter.
+        if (getEmitter()->emitIsLastInsCall())
+        {
+            // Ok, the last instruction generated is a call instruction. Do any of the other conditions hold?
+            // Note: we may be generating a few too many NOPs for the case of call preceding an epilog. Technically,
+            // if the next block is a BBJ_RETURN, an epilog will be generated, but there may be some instructions
+            // generated before the OS epilog starts, such as a GS cookie check.
+            if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
+            {
+                // We only need the NOP if we're not going to generate any more code as part of the block end.
+
+                switch (block->bbJumpKind)
+                {
+                    case BBJ_ALWAYS:
+                    case BBJ_THROW:
+                    case BBJ_CALLFINALLY:
+                    case BBJ_EHCATCHRET:
+                    // We're going to generate more code below anyway, so no need for the NOP.
+
+                    case BBJ_RETURN:
+                    case BBJ_EHFINALLYRET:
+                    case BBJ_EHFILTERRET:
+                        // These are the "epilog follows" case, handled in the emitter.
+
+                        break;
+
+                    case BBJ_NONE:
+                        if (block->bbNext == nullptr)
+                        {
+                            // Call immediately before the end of the code; we should never get here    .
+                            instGen(INS_BREAKPOINT); // This should never get executed
+                        }
+                        else
+                        {
+                            // We need the NOP
+                            instGen(INS_nop);
+                        }
+                        break;
+
+                    case BBJ_COND:
+                    case BBJ_SWITCH:
+                    // These can't have a call as the last instruction!
+
+                    default:
+                        noway_assert(!"Unexpected bbJumpKind");
+                        break;
+                }
+            }
+        }
+#endif // _TARGET_AMD64_
+
+        /* Do we need to generate a jump or return? */
+
+        switch (block->bbJumpKind)
+        {
+            case BBJ_ALWAYS:
+                inst_JMP(EJ_jmp, block->bbJumpDest);
+                break;
+
+            case BBJ_RETURN:
+                genExitCode(block);
+                break;
+
+            case BBJ_THROW:
+                // If we have a throw at the end of a function or funclet, we need to emit another instruction
+                // afterwards to help the OS unwinder determine the correct context during unwind.
+                // We insert an unexecuted breakpoint instruction in several situations
+                // following a throw instruction:
+                // 1. If the throw is the last instruction of the function or funclet. This helps
+                //    the OS unwinder determine the correct context during an unwind from the
+                //    thrown exception.
+                // 2. If this is this is the last block of the hot section.
+                // 3. If the subsequent block is a special throw block.
+                // 4. On AMD64, if the next block is in a different EH region.
+                if ((block->bbNext == nullptr) || (block->bbNext->bbFlags & BBF_FUNCLET_BEG) ||
+                    !BasicBlock::sameEHRegion(block, block->bbNext) ||
+                    (!isFramePointerUsed() && compiler->fgIsThrowHlpBlk(block->bbNext)) ||
+                    block->bbNext == compiler->fgFirstColdBlock)
+                {
+                    instGen(INS_BREAKPOINT); // This should never get executed
+                }
+
+                break;
+
+            case BBJ_CALLFINALLY:
+
+#if FEATURE_EH_FUNCLETS
+
+                // Generate a call to the finally, like this:
+                //      mov         rcx,qword ptr [rbp + 20H]       // Load rcx with PSPSym
+                //      call        finally-funclet
+                //      jmp         finally-return                  // Only for non-retless finally calls
+                // The jmp can be a NOP if we're going to the next block.
+                // If we're generating code for the main function (not a funclet), and there is no localloc,
+                // then RSP at this point is the same value as that stored in the PSPsym. So just copy RSP
+                // instead of loading the PSPSym in this case.
+
+                if (!compiler->compLocallocUsed && (compiler->funCurrentFunc()->funKind == FUNC_ROOT))
+                {
+                    inst_RV_RV(INS_mov, REG_ARG_0, REG_SPBASE, TYP_I_IMPL);
+                }
+                else
+                {
+                    getEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0, compiler->lvaPSPSym, 0);
+                }
+                getEmitter()->emitIns_J(INS_call, block->bbJumpDest);
+
+                if (block->bbFlags & BBF_RETLESS_CALL)
+                {
+                    // We have a retless call, and the last instruction generated was a call.
+                    // If the next block is in a different EH region (or is the end of the code
+                    // block), then we need to generate a breakpoint here (since it will never
+                    // get executed) to get proper unwind behavior.
+
+                    if ((block->bbNext == nullptr) || !BasicBlock::sameEHRegion(block, block->bbNext))
+                    {
+                        instGen(INS_BREAKPOINT); // This should never get executed
+                    }
+                }
+                else
+                {
+                    // Because of the way the flowgraph is connected, the liveness info for this one instruction
+                    // after the call is not (can not be) correct in cases where a variable has a last use in the
+                    // handler.  So turn off GC reporting for this single instruction.
+                    getEmitter()->emitDisableGC();
+
+                    // Now go to where the finally funclet needs to return to.
+                    if (block->bbNext->bbJumpDest == block->bbNext->bbNext)
+                    {
+                        // Fall-through.
+                        // TODO-XArch-CQ: Can we get rid of this instruction, and just have the call return directly
+                        // to the next instruction? This would depend on stack walking from within the finally
+                        // handler working without this instruction being in this special EH region.
+                        instGen(INS_nop);
+                    }
+                    else
+                    {
+                        inst_JMP(EJ_jmp, block->bbNext->bbJumpDest);
+                    }
+
+                    getEmitter()->emitEnableGC();
+                }
+
+#else // !FEATURE_EH_FUNCLETS
+
+                // If we are about to invoke a finally locally from a try block, we have to set the ShadowSP slot
+                // corresponding to the finally's nesting level. When invoked in response to an exception, the
+                // EE does this.
+                //
+                // We have a BBJ_CALLFINALLY followed by a BBJ_ALWAYS.
+                //
+                // We will emit :
+                //      mov [ebp - (n + 1)], 0
+                //      mov [ebp -  n     ], 0xFC
+                //      push &step
+                //      jmp  finallyBlock
+                // ...
+                // step:
+                //      mov [ebp -  n     ], 0
+                //      jmp leaveTarget
+                // ...
+                // leaveTarget:
+
+                noway_assert(isFramePointerUsed());
+
+                // Get the nesting level which contains the finally
+                compiler->fgGetNestingLevel(block, &finallyNesting);
+
+                // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
+                unsigned filterEndOffsetSlotOffs;
+                filterEndOffsetSlotOffs =
+                    (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE);
+
+                unsigned curNestingSlotOffs;
+                curNestingSlotOffs = (unsigned)(filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE));
+
+                // Zero out the slot for the next nesting level
+                instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar,
+                                           curNestingSlotOffs - TARGET_POINTER_SIZE);
+                instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, LCL_FINALLY_MARK, compiler->lvaShadowSPslotsVar,
+                                           curNestingSlotOffs);
+
+                // Now push the address where the finally funclet should return to directly.
+                if (!(block->bbFlags & BBF_RETLESS_CALL))
+                {
+                    assert(block->isBBCallAlwaysPair());
+                    getEmitter()->emitIns_J(INS_push_hide, block->bbNext->bbJumpDest);
+                }
+                else
+                {
+                    // EE expects a DWORD, so we give him 0
+                    inst_IV(INS_push_hide, 0);
+                }
+
+                // Jump to the finally BB
+                inst_JMP(EJ_jmp, block->bbJumpDest);
+
+#endif // !FEATURE_EH_FUNCLETS
+
+                // The BBJ_ALWAYS is used because the BBJ_CALLFINALLY can't point to the
+                // jump target using bbJumpDest - that is already used to point
+                // to the finally block. So just skip past the BBJ_ALWAYS unless the
+                // block is RETLESS.
+                if (!(block->bbFlags & BBF_RETLESS_CALL))
+                {
+                    assert(block->isBBCallAlwaysPair());
+
+                    lblk  = block;
+                    block = block->bbNext;
+                }
+
+                break;
+
+#if FEATURE_EH_FUNCLETS
+
+            case BBJ_EHCATCHRET:
+                // Set RAX to the address the VM should return to after the catch.
+                // Generate a RIP-relative
+                //         lea reg, [rip + disp32] ; the RIP is implicit
+                // which will be position-indepenent.
+                getEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, block->bbJumpDest, REG_INTRET);
+                __fallthrough;
+
+            case BBJ_EHFINALLYRET:
+            case BBJ_EHFILTERRET:
+                genReserveFuncletEpilog(block);
+                break;
+
+#else // !FEATURE_EH_FUNCLETS
+
+            case BBJ_EHCATCHRET:
+                noway_assert(!"Unexpected BBJ_EHCATCHRET"); // not used on x86
+
+            case BBJ_EHFINALLYRET:
+            case BBJ_EHFILTERRET:
+            {
+                // The last statement of the block must be a GT_RETFILT, which has already been generated.
+                assert(block->lastNode() != nullptr);
+                assert(block->lastNode()->OperGet() == GT_RETFILT);
+
+                if (block->bbJumpKind == BBJ_EHFINALLYRET)
+                {
+                    assert(block->lastNode()->gtOp.gtOp1 == nullptr); // op1 == nullptr means endfinally
+
+                    // Return using a pop-jmp sequence. As the "try" block calls
+                    // the finally with a jmp, this leaves the x86 call-ret stack
+                    // balanced in the normal flow of path.
+
+                    noway_assert(isFramePointerRequired());
+                    inst_RV(INS_pop_hide, REG_EAX, TYP_I_IMPL);
+                    inst_RV(INS_i_jmp, REG_EAX, TYP_I_IMPL);
+                }
+                else
+                {
+                    assert(block->bbJumpKind == BBJ_EHFILTERRET);
+
+                    // The return value has already been computed.
+                    instGen_Return(0);
+                }
+            }
+            break;
+
+#endif // !FEATURE_EH_FUNCLETS
+
+            case BBJ_NONE:
+            case BBJ_COND:
+            case BBJ_SWITCH:
+                break;
+
+            default:
+                noway_assert(!"Unexpected bbJumpKind");
+                break;
+        }
+
+#ifdef DEBUG
+        compiler->compCurBB = nullptr;
+#endif
+
+    } //------------------ END-FOR each block of the method -------------------
+
+    /* Nothing is live at this point */
+    genUpdateLife(VarSetOps::MakeEmpty(compiler));
+
+    /* Finalize the spill  tracking logic */
+
+    regSet.rsSpillEnd();
+
+    /* Finalize the temp   tracking logic */
+
+    compiler->tmpEnd();
+
+#ifdef DEBUG
+    if (compiler->verbose)
+    {
+        printf("\n# ");
+        printf("compCycleEstimate = %6d, compSizeEstimate = %5d ", compiler->compCycleEstimate,
+               compiler->compSizeEstimate);
+        printf("%s\n", compiler->info.compFullName);
+    }
+#endif
+}
+
+// return the child that has the same reg as the dst (if any)
+// other child returned (out param) in 'other'
+GenTree* sameRegAsDst(GenTree* tree, GenTree*& other /*out*/)
+{
+    if (tree->gtRegNum == REG_NA)
+    {
+        other = nullptr;
+        return nullptr;
+    }
+
+    GenTreePtr op1 = tree->gtOp.gtOp1;
+    GenTreePtr op2 = tree->gtOp.gtOp2;
+    if (op1->gtRegNum == tree->gtRegNum)
+    {
+        other = op2;
+        return op1;
+    }
+    if (op2->gtRegNum == tree->gtRegNum)
+    {
+        other = op1;
+        return op2;
+    }
+    else
+    {
+        other = nullptr;
+        return nullptr;
+    }
+}
+
+//  Move an immediate value into an integer register
+
+void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags)
+{
+    // reg cannot be a FP register
+    assert(!genIsValidFloatReg(reg));
+
+    if (!compiler->opts.compReloc)
+    {
+        size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs
+    }
+
+    if ((imm == 0) && !EA_IS_RELOC(size))
+    {
+        instGen_Set_Reg_To_Zero(size, reg, flags);
+    }
+    else
+    {
+        if (genDataIndirAddrCanBeEncodedAsPCRelOffset(imm))
+        {
+            getEmitter()->emitIns_R_AI(INS_lea, EA_PTR_DSP_RELOC, reg, imm);
+        }
+        else
+        {
+            getEmitter()->emitIns_R_I(INS_mov, size, reg, imm);
+        }
+    }
+    regTracker.rsTrackRegIntCns(reg, imm);
+}
+
+/***********************************************************************************
+ *
+ * Generate code to set a register 'targetReg' of type 'targetType' to the constant
+ * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call
+ * genProduceReg() on the target register.
+ */
+void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTreePtr tree)
+{
+
+    switch (tree->gtOper)
+    {
+        case GT_CNS_INT:
+        {
+            // relocatable values tend to come down as a CNS_INT of native int type
+            // so the line between these two opcodes is kind of blurry
+            GenTreeIntConCommon* con    = tree->AsIntConCommon();
+            ssize_t              cnsVal = con->IconValue();
+
+            if (con->ImmedValNeedsReloc(compiler))
+            {
+                instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, targetReg, cnsVal);
+                regTracker.rsTrackRegTrash(targetReg);
+            }
+            else
+            {
+                genSetRegToIcon(targetReg, cnsVal, targetType);
+            }
+        }
+        break;
+
+        case GT_CNS_DBL:
+        {
+            double constValue = tree->gtDblCon.gtDconVal;
+
+            // Make sure we use "xorpd reg, reg"  only for +ve zero constant (0.0) and not for -ve zero (-0.0)
+            if (*(__int64*)&constValue == 0)
+            {
+                // A faster/smaller way to generate 0
+                instruction ins = genGetInsForOper(GT_XOR, targetType);
+                inst_RV_RV(ins, targetReg, targetReg, targetType);
+            }
+            else
+            {
+                GenTreePtr cns;
+                if (targetType == TYP_FLOAT)
+                {
+                    float f = forceCastToFloat(constValue);
+                    cns     = genMakeConst(&f, targetType, tree, false);
+                }
+                else
+                {
+                    cns = genMakeConst(&constValue, targetType, tree, true);
+                }
+
+                inst_RV_TT(ins_Load(targetType), targetReg, cns);
+            }
+        }
+        break;
+
+        default:
+            unreached();
+    }
+}
+
+// Generate code to get the high N bits of a N*N=2N bit multiplication result
+void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
+{
+    assert(!(treeNode->gtFlags & GTF_UNSIGNED));
+    assert(!treeNode->gtOverflowEx());
+
+    regNumber targetReg  = treeNode->gtRegNum;
+    var_types targetType = treeNode->TypeGet();
+    emitter*  emit       = getEmitter();
+    emitAttr  size       = emitTypeSize(treeNode);
+    GenTree*  op1        = treeNode->gtOp.gtOp1;
+    GenTree*  op2        = treeNode->gtOp.gtOp2;
+
+    // to get the high bits of the multiply, we are constrained to using the
+    // 1-op form:  RDX:RAX = RAX * rm
+    // The 3-op form (Rx=Ry*Rz) does not support it.
+
+    genConsumeOperands(treeNode->AsOp());
+
+    GenTree* regOp = op1;
+    GenTree* rmOp  = op2;
+
+    // Set rmOp to the contained memory operand (if any)
+    //
+    if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == targetReg)))
+    {
+        regOp = op2;
+        rmOp  = op1;
+    }
+    assert(!regOp->isContained());
+
+    // Setup targetReg when neither of the source operands was a matching register
+    if (regOp->gtRegNum != targetReg)
+    {
+        inst_RV_RV(ins_Copy(targetType), targetReg, regOp->gtRegNum, targetType);
+    }
+
+    emit->emitInsBinary(INS_imulEAX, size, treeNode, rmOp);
+
+    // Move the result to the desired register, if necessary
+    if (targetReg != REG_RDX)
+    {
+        inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType);
+    }
+}
+
+// generate code for a DIV or MOD operation
+//
+void CodeGen::genCodeForDivMod(GenTreeOp* treeNode)
+{
+    GenTree*   dividend   = treeNode->gtOp1;
+    GenTree*   divisor    = treeNode->gtOp2;
+    genTreeOps oper       = treeNode->OperGet();
+    emitAttr   size       = emitTypeSize(treeNode);
+    regNumber  targetReg  = treeNode->gtRegNum;
+    var_types  targetType = treeNode->TypeGet();
+    emitter*   emit       = getEmitter();
+
+    // dividend is not contained.
+    assert(!dividend->isContained());
+
+    genConsumeOperands(treeNode->AsOp());
+    if (varTypeIsFloating(targetType))
+    {
+        // divisor is not contained or if contained is a memory op.
+        // Note that a reg optional operand is a treated as a memory op
+        // if no register is allocated to it.
+        assert(!divisor->isContained() || divisor->isMemoryOp() || divisor->IsCnsFltOrDbl() ||
+               divisor->IsRegOptional());
+
+        // Floating point div/rem operation
+        assert(oper == GT_DIV || oper == GT_MOD);
+
+        if (dividend->gtRegNum == targetReg)
+        {
+            emit->emitInsBinary(genGetInsForOper(treeNode->gtOper, targetType), size, treeNode, divisor);
+        }
+        else if (!divisor->isContained() && divisor->gtRegNum == targetReg)
+        {
+            // It is not possible to generate 2-operand divss or divsd where reg2 = reg1 / reg2
+            // because divss/divsd reg1, reg2 will over-write reg1.  Therefore, in case of AMD64
+            // LSRA has to make sure that such a register assignment is not generated for floating
+            // point div/rem operations.
+            noway_assert(
+                !"GT_DIV/GT_MOD (float): case of reg2 = reg1 / reg2, LSRA should never generate such a reg assignment");
+        }
+        else
+        {
+            inst_RV_RV(ins_Copy(targetType), targetReg, dividend->gtRegNum, targetType);
+            emit->emitInsBinary(genGetInsForOper(treeNode->gtOper, targetType), size, treeNode, divisor);
+        }
+    }
+    else
+    {
+        // dividend must be in RAX
+        if (dividend->gtRegNum != REG_RAX)
+        {
+            inst_RV_RV(INS_mov, REG_RAX, dividend->gtRegNum, targetType);
+        }
+
+        // zero or sign extend rax to rdx
+        if (oper == GT_UMOD || oper == GT_UDIV)
+        {
+            instGen_Set_Reg_To_Zero(EA_PTRSIZE, REG_EDX);
+        }
+        else
+        {
+            emit->emitIns(INS_cdq, size);
+            // the cdq instruction writes RDX, So clear the gcInfo for RDX
+            gcInfo.gcMarkRegSetNpt(RBM_RDX);
+        }
+
+        // Perform the 'targetType' (64-bit or 32-bit) divide instruction
+        instruction ins;
+        if (oper == GT_UMOD || oper == GT_UDIV)
+        {
+            ins = INS_div;
+        }
+        else
+        {
+            ins = INS_idiv;
+        }
+
+        emit->emitInsBinary(ins, size, treeNode, divisor);
+
+        // DIV/IDIV instructions always store the quotient in RAX and the remainder in RDX.
+        // Move the result to the desired register, if necessary
+        if (oper == GT_DIV || oper == GT_UDIV)
+        {
+            if (targetReg != REG_RAX)
+            {
+                inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
+            }
+        }
+        else
+        {
+            assert((oper == GT_MOD) || (oper == GT_UMOD));
+            if (targetReg != REG_RDX)
+            {
+                inst_RV_RV(INS_mov, targetReg, REG_RDX, targetType);
+            }
+        }
+    }
+    genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genCodeForBinary: Generate code for many binary arithmetic operators
+// This method is expected to have called genConsumeOperands() before calling it.
+//
+// Arguments:
+//    treeNode - The binary operation for which we are generating code.
+//
+// Return Value:
+//    None.
+//
+// Notes:
+//    Mul and div variants have special constraints on x64 so are not handled here.
+//    See teh assert below for the operators that are handled.
+
+void CodeGen::genCodeForBinary(GenTree* treeNode)
+{
+    const genTreeOps oper       = treeNode->OperGet();
+    regNumber        targetReg  = treeNode->gtRegNum;
+    var_types        targetType = treeNode->TypeGet();
+    emitter*         emit       = getEmitter();
+
+#if defined(_TARGET_64BIT_)
+    assert(oper == GT_OR || oper == GT_XOR || oper == GT_AND || oper == GT_ADD || oper == GT_SUB);
+#else  // !defined(_TARGET_64BIT_)
+    assert(oper == GT_OR || oper == GT_XOR || oper == GT_AND || oper == GT_ADD_LO || oper == GT_ADD_HI ||
+           oper == GT_SUB_LO || oper == GT_SUB_HI || oper == GT_MUL_HI || oper == GT_DIV_HI || oper == GT_MOD_HI ||
+           oper == GT_ADD || oper == GT_SUB);
+#endif // !defined(_TARGET_64BIT_)
+
+    GenTreePtr op1 = treeNode->gtGetOp1();
+    GenTreePtr op2 = treeNode->gtGetOp2();
+
+    // Commutative operations can mark op1 as contained to generate "op reg, memop/immed"
+    if (op1->isContained())
+    {
+        assert(treeNode->OperIsCommutative());
+        assert(op1->isMemoryOp() || op1->IsCnsNonZeroFltOrDbl() || op1->IsIntCnsFitsInI32() || op1->IsRegOptional());
+
+        op1 = treeNode->gtGetOp2();
+        op2 = treeNode->gtGetOp1();
+    }
+
+    instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
+
+    // The arithmetic node must be sitting in a register (since it's not contained)
+    noway_assert(targetReg != REG_NA);
+
+    regNumber op1reg = op1->isContained() ? REG_NA : op1->gtRegNum;
+    regNumber op2reg = op2->isContained() ? REG_NA : op2->gtRegNum;
+
+    GenTreePtr dst;
+    GenTreePtr src;
+
+    // This is the case of reg1 = reg1 op reg2
+    // We're ready to emit the instruction without any moves
+    if (op1reg == targetReg)
+    {
+        dst = op1;
+        src = op2;
+    }
+    // We have reg1 = reg2 op reg1
+    // In order for this operation to be correct
+    // we need that op is a commutative operation so
+    // we can convert it into reg1 = reg1 op reg2 and emit
+    // the same code as above
+    else if (op2reg == targetReg)
+    {
+        noway_assert(GenTree::OperIsCommutative(oper));
+        dst = op2;
+        src = op1;
+    }
+    // now we know there are 3 different operands so attempt to use LEA
+    else if (oper == GT_ADD && !varTypeIsFloating(treeNode) && !treeNode->gtOverflowEx() // LEA does not set flags
+             && (op2->isContainedIntOrIImmed() || !op2->isContained()))
+    {
+        if (op2->isContainedIntOrIImmed())
+        {
+            emit->emitIns_R_AR(INS_lea, emitTypeSize(treeNode), targetReg, op1reg,
+                               (int)op2->AsIntConCommon()->IconValue());
+        }
+        else
+        {
+            assert(op2reg != REG_NA);
+            emit->emitIns_R_ARX(INS_lea, emitTypeSize(treeNode), targetReg, op1reg, op2reg, 1, 0);
+        }
+        genProduceReg(treeNode);
+        return;
+    }
+    // dest, op1 and op2 registers are different:
+    // reg3 = reg1 op reg2
+    // We can implement this by issuing a mov:
+    // reg3 = reg1
+    // reg3 = reg3 op reg2
+    else
+    {
+        inst_RV_RV(ins_Copy(targetType), targetReg, op1reg, targetType);
+        regTracker.rsTrackRegCopy(targetReg, op1reg);
+        gcInfo.gcMarkRegPtrVal(targetReg, targetType);
+        dst = treeNode;
+        src = op2;
+    }
+
+    // try to use an inc or dec
+    if (oper == GT_ADD && !varTypeIsFloating(treeNode) && src->isContainedIntOrIImmed() && !treeNode->gtOverflowEx())
+    {
+        if (src->IsIntegralConst(1))
+        {
+            emit->emitIns_R(INS_inc, emitTypeSize(treeNode), targetReg);
+            genProduceReg(treeNode);
+            return;
+        }
+        else if (src->IsIntegralConst(-1))
+        {
+            emit->emitIns_R(INS_dec, emitTypeSize(treeNode), targetReg);
+            genProduceReg(treeNode);
+            return;
+        }
+    }
+    regNumber r = emit->emitInsBinary(ins, emitTypeSize(treeNode), dst, src);
+    noway_assert(r == targetReg);
+
+    if (treeNode->gtOverflowEx())
+    {
+#if !defined(_TARGET_64BIT_)
+        assert(oper == GT_ADD || oper == GT_SUB || oper == GT_ADD_HI || oper == GT_SUB_HI);
+#else
+        assert(oper == GT_ADD || oper == GT_SUB);
+#endif
+        genCheckOverflow(treeNode);
+    }
+    genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// isStructReturn: Returns whether the 'treeNode' is returning a struct.
+//
+// Arguments:
+//    treeNode - The tree node to evaluate whether is a struct return.
+//
+// Return Value:
+//    For AMD64 *nix: returns true if the 'treeNode" is a GT_RETURN node, of type struct.
+//                    Otherwise returns false.
+//    For other platforms always returns false.
+//
+bool CodeGen::isStructReturn(GenTreePtr treeNode)
+{
+    // This method could be called for 'treeNode' of GT_RET_FILT or GT_RETURN.
+    // For the GT_RET_FILT, the return is always
+    // a bool or a void, for the end of a finally block.
+    noway_assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
+    if (treeNode->OperGet() != GT_RETURN)
+    {
+        return false;
+    }
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+    return varTypeIsStruct(treeNode);
+#else  // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+    assert(!varTypeIsStruct(treeNode));
+    return false;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+}
+
+//------------------------------------------------------------------------
+// genStructReturn: Generates code for returning a struct.
+//
+// Arguments:
+//    treeNode - The GT_RETURN tree node.
+//
+// Return Value:
+//    None
+//
+// Assumption:
+//    op1 of GT_RETURN node is either GT_LCL_VAR or multi-reg GT_CALL
+void CodeGen::genStructReturn(GenTreePtr treeNode)
+{
+    assert(treeNode->OperGet() == GT_RETURN);
+    GenTreePtr op1 = treeNode->gtGetOp1();
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+    if (op1->OperGet() == GT_LCL_VAR)
+    {
+        GenTreeLclVarCommon* lclVar = op1->AsLclVarCommon();
+        LclVarDsc*           varDsc = &(compiler->lvaTable[lclVar->gtLclNum]);
+        assert(varDsc->lvIsMultiRegRet);
+
+        ReturnTypeDesc retTypeDesc;
+        retTypeDesc.InitializeStructReturnType(compiler, varDsc->lvVerTypeInfo.GetClassHandle());
+        unsigned regCount = retTypeDesc.GetReturnRegCount();
+        assert(regCount == MAX_RET_REG_COUNT);
+
+        if (varTypeIsEnregisterableStruct(op1))
+        {
+            // Right now the only enregistrable structs supported are SIMD vector types.
+            assert(varTypeIsSIMD(op1));
+            assert(!op1->isContained());
+
+            // This is a case of operand is in a single reg and needs to be
+            // returned in multiple ABI return registers.
+            regNumber opReg = genConsumeReg(op1);
+            regNumber reg0  = retTypeDesc.GetABIReturnReg(0);
+            regNumber reg1  = retTypeDesc.GetABIReturnReg(1);
+
+            if (opReg != reg0 && opReg != reg1)
+            {
+                // Operand reg is different from return regs.
+                // Copy opReg to reg0 and let it to be handled by one of the
+                // two cases below.
+                inst_RV_RV(ins_Copy(TYP_DOUBLE), reg0, opReg, TYP_DOUBLE);
+                opReg = reg0;
+            }
+
+            if (opReg == reg0)
+            {
+                assert(opReg != reg1);
+
+                // reg0 - already has required 8-byte in bit position [63:0].
+                // reg1 = opReg.
+                // swap upper and lower 8-bytes of reg1 so that desired 8-byte is in bit position [63:0].
+                inst_RV_RV(ins_Copy(TYP_DOUBLE), reg1, opReg, TYP_DOUBLE);
+            }
+            else
+            {
+                assert(opReg == reg1);
+
+                // reg0 = opReg.
+                // swap upper and lower 8-bytes of reg1 so that desired 8-byte is in bit position [63:0].
+                inst_RV_RV(ins_Copy(TYP_DOUBLE), reg0, opReg, TYP_DOUBLE);
+            }
+            inst_RV_RV_IV(INS_shufpd, EA_16BYTE, reg1, reg1, 0x01);
+        }
+        else
+        {
+            assert(op1->isContained());
+
+            // Copy var on stack into ABI return registers
+            int offset = 0;
+            for (unsigned i = 0; i < regCount; ++i)
+            {
+                var_types type = retTypeDesc.GetReturnRegType(i);
+                regNumber reg  = retTypeDesc.GetABIReturnReg(i);
+                getEmitter()->emitIns_R_S(ins_Load(type), emitTypeSize(type), reg, lclVar->gtLclNum, offset);
+                offset += genTypeSize(type);
+            }
+        }
+    }
+    else
+    {
+        assert(op1->IsMultiRegCall() || op1->IsCopyOrReloadOfMultiRegCall());
+
+        genConsumeRegs(op1);
+
+        GenTree*        actualOp1   = op1->gtSkipReloadOrCopy();
+        GenTreeCall*    call        = actualOp1->AsCall();
+        ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+        unsigned        regCount    = retTypeDesc->GetReturnRegCount();
+        assert(regCount == MAX_RET_REG_COUNT);
+
+        // Handle circular dependency between call allocated regs and ABI return regs.
+        //
+        // It is possible under LSRA stress that originally allocated regs of call node,
+        // say rax and rdx, are spilled and reloaded to rdx and rax respectively.  But
+        // GT_RETURN needs to  move values as follows: rdx->rax, rax->rdx. Similar kind
+        // kind of circular dependency could arise between xmm0 and xmm1 return regs.
+        // Codegen is expected to handle such circular dependency.
+        //
+        var_types regType0      = retTypeDesc->GetReturnRegType(0);
+        regNumber returnReg0    = retTypeDesc->GetABIReturnReg(0);
+        regNumber allocatedReg0 = call->GetRegNumByIdx(0);
+
+        var_types regType1      = retTypeDesc->GetReturnRegType(1);
+        regNumber returnReg1    = retTypeDesc->GetABIReturnReg(1);
+        regNumber allocatedReg1 = call->GetRegNumByIdx(1);
+
+        if (op1->IsCopyOrReload())
+        {
+            // GT_COPY/GT_RELOAD will have valid reg for those positions
+            // that need to be copied or reloaded.
+            regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(0);
+            if (reloadReg != REG_NA)
+            {
+                allocatedReg0 = reloadReg;
+            }
+
+            reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(1);
+            if (reloadReg != REG_NA)
+            {
+                allocatedReg1 = reloadReg;
+            }
+        }
+
+        if (allocatedReg0 == returnReg1 && allocatedReg1 == returnReg0)
+        {
+            // Circular dependency - swap allocatedReg0 and allocatedReg1
+            if (varTypeIsFloating(regType0))
+            {
+                assert(varTypeIsFloating(regType1));
+
+                // The fastest way to swap two XMM regs is using PXOR
+                inst_RV_RV(INS_pxor, allocatedReg0, allocatedReg1, TYP_DOUBLE);
+                inst_RV_RV(INS_pxor, allocatedReg1, allocatedReg0, TYP_DOUBLE);
+                inst_RV_RV(INS_pxor, allocatedReg0, allocatedReg1, TYP_DOUBLE);
+            }
+            else
+            {
+                assert(varTypeIsIntegral(regType0));
+                assert(varTypeIsIntegral(regType1));
+                inst_RV_RV(INS_xchg, allocatedReg1, allocatedReg0, TYP_I_IMPL);
+            }
+        }
+        else if (allocatedReg1 == returnReg0)
+        {
+            // Change the order of moves to correctly handle dependency.
+            if (allocatedReg1 != returnReg1)
+            {
+                inst_RV_RV(ins_Copy(regType1), returnReg1, allocatedReg1, regType1);
+            }
+
+            if (allocatedReg0 != returnReg0)
+            {
+                inst_RV_RV(ins_Copy(regType0), returnReg0, allocatedReg0, regType0);
+            }
+        }
+        else
+        {
+            // No circular dependency case.
+            if (allocatedReg0 != returnReg0)
+            {
+                inst_RV_RV(ins_Copy(regType0), returnReg0, allocatedReg0, regType0);
+            }
+
+            if (allocatedReg1 != returnReg1)
+            {
+                inst_RV_RV(ins_Copy(regType1), returnReg1, allocatedReg1, regType1);
+            }
+        }
+    }
+#else
+    unreached();
+#endif
+}
+
+//------------------------------------------------------------------------
+// genReturn: Generates code for return statement.
+//            In case of struct return, delegates to the genStructReturn method.
+//
+// Arguments:
+//    treeNode - The GT_RETURN or GT_RETFILT tree node.
+//
+// Return Value:
+//    None
+//
+void CodeGen::genReturn(GenTreePtr treeNode)
+{
+    assert(treeNode->OperGet() == GT_RETURN || treeNode->OperGet() == GT_RETFILT);
+    GenTreePtr op1        = treeNode->gtGetOp1();
+    var_types  targetType = treeNode->TypeGet();
+
+#ifdef DEBUG
+    if (targetType == TYP_VOID)
+    {
+        assert(op1 == nullptr);
+    }
+#endif
+
+#ifdef _TARGET_X86_
+    if (treeNode->TypeGet() == TYP_LONG)
+    {
+        assert(op1 != nullptr);
+        noway_assert(op1->OperGet() == GT_LONG);
+        GenTree* loRetVal = op1->gtGetOp1();
+        GenTree* hiRetVal = op1->gtGetOp2();
+        noway_assert((loRetVal->gtRegNum != REG_NA) && (hiRetVal->gtRegNum != REG_NA));
+
+        genConsumeReg(loRetVal);
+        genConsumeReg(hiRetVal);
+        if (loRetVal->gtRegNum != REG_LNGRET_LO)
+        {
+            inst_RV_RV(ins_Copy(targetType), REG_LNGRET_LO, loRetVal->gtRegNum, TYP_INT);
+        }
+        if (hiRetVal->gtRegNum != REG_LNGRET_HI)
+        {
+            inst_RV_RV(ins_Copy(targetType), REG_LNGRET_HI, hiRetVal->gtRegNum, TYP_INT);
+        }
+    }
+    else
+#endif // !defined(_TARGET_X86_)
+    {
+        if (isStructReturn(treeNode))
+        {
+            genStructReturn(treeNode);
+        }
+        else if (targetType != TYP_VOID)
+        {
+            assert(op1 != nullptr);
+            noway_assert(op1->gtRegNum != REG_NA);
+
+            // !! NOTE !! genConsumeReg will clear op1 as GC ref after it has
+            // consumed a reg for the operand. This is because the variable
+            // is dead after return. But we are issuing more instructions
+            // like "profiler leave callback" after this consumption. So
+            // if you are issuing more instructions after this point,
+            // remember to keep the variable live up until the new method
+            // exit point where it is actually dead.
+            genConsumeReg(op1);
+
+            regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET;
+#ifdef _TARGET_X86_
+            if (varTypeIsFloating(treeNode))
+            {
+                // Spill the return value register from an XMM register to the stack, then load it on the x87 stack.
+                // If it already has a home location, use that. Otherwise, we need a temp.
+                if (genIsRegCandidateLocal(op1) && compiler->lvaTable[op1->gtLclVarCommon.gtLclNum].lvOnFrame)
+                {
+                    // Store local variable to its home location, if necessary.
+                    if ((op1->gtFlags & GTF_REG_VAL) != 0)
+                    {
+                        op1->gtFlags &= ~GTF_REG_VAL;
+                        inst_TT_RV(ins_Store(op1->gtType,
+                                             compiler->isSIMDTypeLocalAligned(op1->gtLclVarCommon.gtLclNum)),
+                                   op1, op1->gtRegNum);
+                    }
+                    // Now, load it to the fp stack.
+                    getEmitter()->emitIns_S(INS_fld, emitTypeSize(op1), op1->AsLclVarCommon()->gtLclNum, 0);
+                }
+                else
+                {
+                    // Spill the value, which should be in a register, then load it to the fp stack.
+                    // TODO-X86-CQ: Deal with things that are already in memory (don't call genConsumeReg yet).
+                    op1->gtFlags |= GTF_SPILL;
+                    regSet.rsSpillTree(op1->gtRegNum, op1);
+                    op1->gtFlags |= GTF_SPILLED;
+                    op1->gtFlags &= ~GTF_SPILL;
+
+                    TempDsc* t = regSet.rsUnspillInPlace(op1, op1->gtRegNum);
+                    inst_FS_ST(INS_fld, emitActualTypeSize(op1->gtType), t, 0);
+                    op1->gtFlags &= ~GTF_SPILLED;
+                    compiler->tmpRlsTemp(t);
+                }
+            }
+            else
+#endif // _TARGET_X86_
+            {
+                if (op1->gtRegNum != retReg)
+                {
+                    inst_RV_RV(ins_Copy(targetType), retReg, op1->gtRegNum, targetType);
+                }
+            }
+        }
+    }
+
+#ifdef PROFILING_SUPPORTED
+    // !! Note !!
+    // TODO-AMD64-Unix: If the profiler hook is implemented on *nix, make sure for 2 register returned structs
+    //                  the RAX and RDX needs to be kept alive. Make the necessary changes in lowerxarch.cpp
+    //                  in the handling of the GT_RETURN statement.
+    //                  Such structs containing GC pointers need to be handled by calling gcInfo.gcMarkRegSetNpt
+    //                  for the return registers containing GC refs.
+
+    // There will be a single return block while generating profiler ELT callbacks.
+    //
+    // Reason for not materializing Leave callback as a GT_PROF_HOOK node after GT_RETURN:
+    // In flowgraph and other places assert that the last node of a block marked as
+    // GT_RETURN is either a GT_RETURN or GT_JMP or a tail call.  It would be nice to
+    // maintain such an invariant irrespective of whether profiler hook needed or not.
+    // Also, there is not much to be gained by materializing it as an explicit node.
+    if (compiler->compCurBB == compiler->genReturnBB)
+    {
+        // !! NOTE !!
+        // Since we are invalidating the assumption that we would slip into the epilog
+        // right after the "return", we need to preserve the return reg's GC state
+        // across the call until actual method return.
+        if (varTypeIsGC(compiler->info.compRetType))
+        {
+            gcInfo.gcMarkRegPtrVal(REG_INTRET, compiler->info.compRetType);
+        }
+
+        genProfilingLeaveCallback();
+
+        if (varTypeIsGC(compiler->info.compRetType))
+        {
+            gcInfo.gcMarkRegSetNpt(REG_INTRET);
+        }
+    }
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Generate code for a single node in the tree.
+ * Preconditions: All operands have been evaluated
+ *
+ */
+void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
+{
+    regNumber targetReg;
+#if !defined(_TARGET_64BIT_)
+    if (treeNode->TypeGet() == TYP_LONG)
+    {
+        // All long enregistered nodes will have been decomposed into their
+        // constituent lo and hi nodes.
+        targetReg = REG_NA;
+    }
+    else
+#endif // !defined(_TARGET_64BIT_)
+    {
+        targetReg = treeNode->gtRegNum;
+    }
+    var_types targetType = treeNode->TypeGet();
+    emitter*  emit       = getEmitter();
+
+#ifdef DEBUG
+    // Validate that all the operands for the current node are consumed in order.
+    // This is important because LSRA ensures that any necessary copies will be
+    // handled correctly.
+    lastConsumedNode = nullptr;
+    if (compiler->verbose)
+    {
+        unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio
+        printf("Generating: ");
+        compiler->gtDispTree(treeNode, nullptr, nullptr, true);
+    }
+#endif // DEBUG
+
+    // Is this a node whose value is already in a register?  LSRA denotes this by
+    // setting the GTF_REUSE_REG_VAL flag.
+    if (treeNode->IsReuseRegVal())
+    {
+        // For now, this is only used for constant nodes.
+        assert((treeNode->OperIsConst()));
+        JITDUMP("  TreeNode is marked ReuseReg\n");
+        return;
+    }
+
+    // contained nodes are part of their parents for codegen purposes
+    // ex : immediates, most LEAs
+    if (treeNode->isContained())
+    {
+        return;
+    }
+
+    switch (treeNode->gtOper)
+    {
+        case GT_START_NONGC:
+            getEmitter()->emitDisableGC();
+            break;
+
+        case GT_PROF_HOOK:
+#ifdef PROFILING_SUPPORTED
+            // We should be seeing this only if profiler hook is needed
+            noway_assert(compiler->compIsProfilerHookNeeded());
+
+            // Right now this node is used only for tail calls. In future if
+            // we intend to use it for Enter or Leave hooks, add a data member
+            // to this node indicating the kind of profiler hook. For example,
+            // helper number can be used.
+            genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
+#endif // PROFILING_SUPPORTED
+            break;
+
+        case GT_LCLHEAP:
+            genLclHeap(treeNode);
+            break;
+
+        case GT_CNS_INT:
+#ifdef _TARGET_X86_
+            NYI_IF(treeNode->IsIconHandle(GTF_ICON_TLS_HDL), "TLS constants");
+#endif // _TARGET_X86_
+            __fallthrough;
+
+        case GT_CNS_DBL:
+            genSetRegToConst(targetReg, targetType, treeNode);
+            genProduceReg(treeNode);
+            break;
+
+        case GT_NEG:
+        case GT_NOT:
+            if (varTypeIsFloating(targetType))
+            {
+                assert(treeNode->gtOper == GT_NEG);
+                genSSE2BitwiseOp(treeNode);
+            }
+            else
+            {
+                GenTreePtr operand = treeNode->gtGetOp1();
+                assert(!operand->isContained());
+                regNumber operandReg = genConsumeReg(operand);
+
+                if (operandReg != targetReg)
+                {
+                    inst_RV_RV(INS_mov, targetReg, operandReg, targetType);
+                }
+
+                instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
+                inst_RV(ins, targetReg, targetType);
+            }
+            genProduceReg(treeNode);
+            break;
+
+        case GT_OR:
+        case GT_XOR:
+        case GT_AND:
+            assert(varTypeIsIntegralOrI(treeNode));
+            __fallthrough;
+
+#if !defined(_TARGET_64BIT_)
+        case GT_ADD_LO:
+        case GT_ADD_HI:
+        case GT_SUB_LO:
+        case GT_SUB_HI:
+#endif // !defined(_TARGET_64BIT_)
+        case GT_ADD:
+        case GT_SUB:
+            genConsumeOperands(treeNode->AsOp());
+            genCodeForBinary(treeNode);
+            break;
+
+        case GT_LSH:
+        case GT_RSH:
+        case GT_RSZ:
+        case GT_ROL:
+        case GT_ROR:
+            genCodeForShift(treeNode);
+            // genCodeForShift() calls genProduceReg()
+            break;
+
+        case GT_CAST:
+#if !defined(_TARGET_64BIT_)
+            // We will NYI in DecomposeNode() if we are cast TO a long type, but we do not
+            // yet support casting FROM a long type either, and that's simpler to catch
+            // here.
+            NYI_IF(varTypeIsLong(treeNode->gtOp.gtOp1), "Casts from TYP_LONG");
+#endif // !defined(_TARGET_64BIT_)
+
+            if (varTypeIsFloating(targetType) && varTypeIsFloating(treeNode->gtOp.gtOp1))
+            {
+                // Casts float/double <--> double/float
+                genFloatToFloatCast(treeNode);
+            }
+            else if (varTypeIsFloating(treeNode->gtOp.gtOp1))
+            {
+                // Casts float/double --> int32/int64
+                genFloatToIntCast(treeNode);
+            }
+            else if (varTypeIsFloating(targetType))
+            {
+                // Casts int32/uint32/int64/uint64 --> float/double
+                genIntToFloatCast(treeNode);
+            }
+            else
+            {
+                // Casts int <--> int
+                genIntToIntCast(treeNode);
+            }
+            // The per-case functions call genProduceReg()
+            break;
+
+        case GT_LCL_VAR:
+        {
+            // lcl_vars are not defs
+            assert((treeNode->gtFlags & GTF_VAR_DEF) == 0);
+
+            GenTreeLclVarCommon* lcl            = treeNode->AsLclVarCommon();
+            bool                 isRegCandidate = compiler->lvaTable[lcl->gtLclNum].lvIsRegCandidate();
+
+            if (isRegCandidate && !(treeNode->gtFlags & GTF_VAR_DEATH))
+            {
+                assert((treeNode->InReg()) || (treeNode->gtFlags & GTF_SPILLED));
+            }
+
+            // If this is a register candidate that has been spilled, genConsumeReg() will
+            // reload it at the point of use.  Otherwise, if it's not in a register, we load it here.
+
+            if (!treeNode->InReg() && !(treeNode->gtFlags & GTF_SPILLED))
+            {
+                assert(!isRegCandidate);
+
+                emit->emitIns_R_S(ins_Load(treeNode->TypeGet(), compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)),
+                                  emitTypeSize(treeNode), treeNode->gtRegNum, lcl->gtLclNum, 0);
+                genProduceReg(treeNode);
+            }
+        }
+        break;
+
+        case GT_LCL_FLD_ADDR:
+        case GT_LCL_VAR_ADDR:
+            // Address of a local var.  This by itself should never be allocated a register.
+            // If it is worth storing the address in a register then it should be cse'ed into
+            // a temp and that would be allocated a register.
+            noway_assert(targetType == TYP_BYREF);
+            noway_assert(!treeNode->InReg());
+
+            inst_RV_TT(INS_lea, targetReg, treeNode, 0, EA_BYREF);
+            genProduceReg(treeNode);
+            break;
+
+        case GT_LCL_FLD:
+        {
+            noway_assert(targetType != TYP_STRUCT);
+            noway_assert(treeNode->gtRegNum != REG_NA);
+
+#ifdef FEATURE_SIMD
+            // Loading of TYP_SIMD12 (i.e. Vector3) field
+            if (treeNode->TypeGet() == TYP_SIMD12)
+            {
+                genLoadLclFldTypeSIMD12(treeNode);
+                break;
+            }
+#endif
+
+            emitAttr size   = emitTypeSize(targetType);
+            unsigned offs   = treeNode->gtLclFld.gtLclOffs;
+            unsigned varNum = treeNode->gtLclVarCommon.gtLclNum;
+            assert(varNum < compiler->lvaCount);
+
+            emit->emitIns_R_S(ins_Move_Extend(targetType, treeNode->InReg()), size, targetReg, varNum, offs);
+        }
+            genProduceReg(treeNode);
+            break;
+
+        case GT_STORE_LCL_FLD:
+        {
+            noway_assert(targetType != TYP_STRUCT);
+            noway_assert(!treeNode->InReg());
+            assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
+
+#ifdef FEATURE_SIMD
+            // storing of TYP_SIMD12 (i.e. Vector3) field
+            if (treeNode->TypeGet() == TYP_SIMD12)
+            {
+                genStoreLclFldTypeSIMD12(treeNode);
+                break;
+            }
+#endif
+            GenTreePtr op1 = treeNode->gtGetOp1();
+            genConsumeRegs(op1);
+            emit->emitInsBinary(ins_Store(targetType), emitTypeSize(treeNode), treeNode, op1);
+        }
+        break;
+
+        case GT_STORE_LCL_VAR:
+        {
+            GenTreePtr op1 = treeNode->gtGetOp1();
+
+            // var = call, where call returns a multi-reg return value
+            // case is handled separately.
+            if (op1->gtSkipReloadOrCopy()->IsMultiRegCall())
+            {
+                genMultiRegCallStoreToLocal(treeNode);
+            }
+            else
+            {
+                noway_assert(targetType != TYP_STRUCT);
+                assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
+
+                unsigned   lclNum = treeNode->AsLclVarCommon()->gtLclNum;
+                LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
+
+                // Ensure that lclVar nodes are typed correctly.
+                assert(!varDsc->lvNormalizeOnStore() || treeNode->TypeGet() == genActualType(varDsc->TypeGet()));
+
+#if !defined(_TARGET_64BIT_)
+                if (treeNode->TypeGet() == TYP_LONG)
+                {
+                    genStoreLongLclVar(treeNode);
+                    break;
+                }
+#endif // !defined(_TARGET_64BIT_)
+
+#ifdef FEATURE_SIMD
+                if (varTypeIsSIMD(targetType) && (targetReg != REG_NA) && op1->IsCnsIntOrI())
+                {
+                    // This is only possible for a zero-init.
+                    noway_assert(op1->IsIntegralConst(0));
+                    genSIMDZero(targetType, varDsc->lvBaseType, targetReg);
+                    genProduceReg(treeNode);
+                    break;
+                }
+#endif // FEATURE_SIMD
+
+                genConsumeRegs(op1);
+
+                if (treeNode->gtRegNum == REG_NA)
+                {
+                    // stack store
+                    emit->emitInsMov(ins_Store(targetType, compiler->isSIMDTypeLocalAligned(lclNum)),
+                                     emitTypeSize(targetType), treeNode);
+                    varDsc->lvRegNum = REG_STK;
+                }
+                else
+                {
+                    bool containedOp1 = op1->isContained();
+                    // Look for the case where we have a constant zero which we've marked for reuse,
+                    // but which isn't actually in the register we want.  In that case, it's better to create
+                    // zero in the target register, because an xor is smaller than a copy. Note that we could
+                    // potentially handle this in the register allocator, but we can't always catch it there
+                    // because the target may not have a register allocated for it yet.
+                    if (!containedOp1 && (op1->gtRegNum != treeNode->gtRegNum) &&
+                        (op1->IsIntegralConst(0) || op1->IsFPZero()))
+                    {
+                        op1->gtRegNum = REG_NA;
+                        op1->ResetReuseRegVal();
+                        containedOp1 = true;
+                    }
+
+                    if (containedOp1)
+                    {
+                        // Currently, we assume that the contained source of a GT_STORE_LCL_VAR writing to a register
+                        // must be a constant. However, in the future we might want to support a contained memory op.
+                        // This is a bit tricky because we have to decide it's contained before register allocation,
+                        // and this would be a case where, once that's done, we need to mark that node as always
+                        // requiring a register - which we always assume now anyway, but once we "optimize" that
+                        // we'll have to take cases like this into account.
+                        assert((op1->gtRegNum == REG_NA) && op1->OperIsConst());
+                        genSetRegToConst(treeNode->gtRegNum, targetType, op1);
+                    }
+                    else if (op1->gtRegNum != treeNode->gtRegNum)
+                    {
+                        assert(op1->gtRegNum != REG_NA);
+                        emit->emitInsBinary(ins_Move_Extend(targetType, true), emitTypeSize(treeNode), treeNode, op1);
+                    }
+                }
+            }
+
+            if (treeNode->gtRegNum != REG_NA)
+            {
+                genProduceReg(treeNode);
+            }
+        }
+        break;
+
+        case GT_RETFILT:
+            // A void GT_RETFILT is the end of a finally. For non-void filter returns we need to load the result in
+            // the return register, if it's not already there. The processing is the same as GT_RETURN.
+            if (targetType != TYP_VOID)
+            {
+                // For filters, the IL spec says the result is type int32. Further, the only specified legal values
+                // are 0 or 1, with the use of other values "undefined".
+                assert(targetType == TYP_INT);
+            }
+
+            __fallthrough;
+
+        case GT_RETURN:
+            genReturn(treeNode);
+            break;
+
+        case GT_LEA:
+        {
+            // if we are here, it is the case where there is an LEA that cannot
+            // be folded into a parent instruction
+            GenTreeAddrMode* lea = treeNode->AsAddrMode();
+            genLeaInstruction(lea);
+        }
+        // genLeaInstruction calls genProduceReg()
+        break;
+
+        case GT_IND:
+#ifdef FEATURE_SIMD
+            // Handling of Vector3 type values loaded through indirection.
+            if (treeNode->TypeGet() == TYP_SIMD12)
+            {
+                genLoadIndTypeSIMD12(treeNode);
+                break;
+            }
+#endif // FEATURE_SIMD
+
+            genConsumeAddress(treeNode->AsIndir()->Addr());
+            emit->emitInsMov(ins_Load(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode);
+            genProduceReg(treeNode);
+            break;
+
+        case GT_MULHI:
+            genCodeForMulHi(treeNode->AsOp());
+            genProduceReg(treeNode);
+            break;
+
+        case GT_MUL:
+        {
+            instruction ins;
+            emitAttr    size                  = emitTypeSize(treeNode);
+            bool        isUnsignedMultiply    = ((treeNode->gtFlags & GTF_UNSIGNED) != 0);
+            bool        requiresOverflowCheck = treeNode->gtOverflowEx();
+
+            GenTree* op1 = treeNode->gtGetOp1();
+            GenTree* op2 = treeNode->gtGetOp2();
+
+            // there are 3 forms of x64 multiply:
+            // 1-op form with 128 result:  RDX:RAX = RAX * rm
+            // 2-op form: reg *= rm
+            // 3-op form: reg = rm * imm
+
+            genConsumeOperands(treeNode->AsOp());
+
+            // This matches the 'mul' lowering in Lowering::SetMulOpCounts()
+            //
+            // immOp :: Only one operand can be an immediate
+            // rmOp  :: Only one operand can be a memory op.
+            // regOp :: A register op (especially the operand that matches 'targetReg')
+            //          (can be nullptr when we have both a memory op and an immediate op)
+
+            GenTree* immOp = nullptr;
+            GenTree* rmOp  = op1;
+            GenTree* regOp;
+
+            if (op2->isContainedIntOrIImmed())
+            {
+                immOp = op2;
+            }
+            else if (op1->isContainedIntOrIImmed())
+            {
+                immOp = op1;
+                rmOp  = op2;
+            }
+
+            if (immOp != nullptr)
+            {
+                // This must be a non-floating point operation.
+                assert(!varTypeIsFloating(treeNode));
+
+                // CQ: When possible use LEA for mul by imm 3, 5 or 9
+                ssize_t imm = immOp->AsIntConCommon()->IconValue();
+
+                if (!requiresOverflowCheck && !rmOp->isContained() && ((imm == 3) || (imm == 5) || (imm == 9)))
+                {
+                    // We will use the LEA instruction to perform this multiply
+                    // Note that an LEA with base=x, index=x and scale=(imm-1) computes x*imm when imm=3,5 or 9.
+                    unsigned int scale = (unsigned int)(imm - 1);
+                    getEmitter()->emitIns_R_ARX(INS_lea, size, targetReg, rmOp->gtRegNum, rmOp->gtRegNum, scale, 0);
+                }
+                else
+                {
+                    // use the 3-op form with immediate
+                    ins = getEmitter()->inst3opImulForReg(targetReg);
+                    emit->emitInsBinary(ins, size, rmOp, immOp);
+                }
+            }
+            else // we have no contained immediate operand
+            {
+                regOp = op1;
+                rmOp  = op2;
+
+                regNumber mulTargetReg = targetReg;
+                if (isUnsignedMultiply && requiresOverflowCheck)
+                {
+                    ins          = INS_mulEAX;
+                    mulTargetReg = REG_RAX;
+                }
+                else
+                {
+                    ins = genGetInsForOper(GT_MUL, targetType);
+                }
+
+                // Set rmOp to the contain memory operand (if any)
+                // or set regOp to the op2 when it has the matching target register for our multiply op
+                //
+                if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == mulTargetReg)))
+                {
+                    regOp = op2;
+                    rmOp  = op1;
+                }
+                assert(!regOp->isContained());
+
+                // Setup targetReg when neither of the source operands was a matching register
+                if (regOp->gtRegNum != mulTargetReg)
+                {
+                    inst_RV_RV(ins_Copy(targetType), mulTargetReg, regOp->gtRegNum, targetType);
+                }
+
+                emit->emitInsBinary(ins, size, treeNode, rmOp);
+
+                // Move the result to the desired register, if necessary
+                if ((ins == INS_mulEAX) && (targetReg != REG_RAX))
+                {
+                    inst_RV_RV(INS_mov, targetReg, REG_RAX, targetType);
+                }
+            }
+
+            if (requiresOverflowCheck)
+            {
+                // Overflow checking is only used for non-floating point types
+                noway_assert(!varTypeIsFloating(treeNode));
+
+                genCheckOverflow(treeNode);
+            }
+        }
+            genProduceReg(treeNode);
+            break;
+
+        case GT_MOD:
+        case GT_UDIV:
+        case GT_UMOD:
+            // We shouldn't be seeing GT_MOD on float/double args as it should get morphed into a
+            // helper call by front-end.  Similarly we shouldn't be seeing GT_UDIV and GT_UMOD
+            // on float/double args.
+            noway_assert(!varTypeIsFloating(treeNode));
+            __fallthrough;
+
+        case GT_DIV:
+            genCodeForDivMod(treeNode->AsOp());
+            break;
+
+        case GT_INTRINSIC:
+            genIntrinsic(treeNode);
+            break;
+
+#ifdef FEATURE_SIMD
+        case GT_SIMD:
+            genSIMDIntrinsic(treeNode->AsSIMD());
+            break;
+#endif // FEATURE_SIMD
+
+        case GT_CKFINITE:
+            genCkfinite(treeNode);
+            break;
+
+        case GT_EQ:
+        case GT_NE:
+        case GT_LT:
+        case GT_LE:
+        case GT_GE:
+        case GT_GT:
+        {
+            // TODO-XArch-CQ: Check if we can use the currently set flags.
+            // TODO-XArch-CQ: Check for the case where we can simply transfer the carry bit to a register
+            //         (signed < or >= where targetReg != REG_NA)
+
+            GenTreePtr op1     = treeNode->gtGetOp1();
+            var_types  op1Type = op1->TypeGet();
+
+            if (varTypeIsFloating(op1Type))
+            {
+                genCompareFloat(treeNode);
+            }
+#if !defined(_TARGET_64BIT_)
+            // X86 Long comparison
+            else if (varTypeIsLong(op1Type))
+            {
+                // When not materializing the result in a register, the compare logic is generated
+                // when we generate the GT_JTRUE.
+                if (treeNode->gtRegNum != REG_NA)
+                {
+                    genCompareLong(treeNode);
+                }
+                else
+                {
+                    // We generate the compare when we generate the GT_JTRUE, but we need to consume
+                    // the operands now.
+                    genConsumeOperands(treeNode->AsOp());
+                }
+            }
+#endif // !defined(_TARGET_64BIT_)
+            else
+            {
+                genCompareInt(treeNode);
+            }
+        }
+        break;
+
+        case GT_JTRUE:
+        {
+            GenTree* cmp = treeNode->gtOp.gtOp1;
+
+            assert(cmp->OperIsCompare());
+            assert(compiler->compCurBB->bbJumpKind == BBJ_COND);
+
+#if !defined(_TARGET_64BIT_)
+            // For long compares, we emit special logic
+            if (varTypeIsLong(cmp->gtGetOp1()))
+            {
+                genJTrueLong(cmp);
+            }
+            else
+#endif
+            {
+                // Get the "kind" and type of the comparison.  Note that whether it is an unsigned cmp
+                // is governed by a flag NOT by the inherent type of the node
+                // TODO-XArch-CQ: Check if we can use the currently set flags.
+                emitJumpKind jumpKind[2];
+                bool         branchToTrueLabel[2];
+                genJumpKindsForTree(cmp, jumpKind, branchToTrueLabel);
+
+                BasicBlock* skipLabel = nullptr;
+                if (jumpKind[0] != EJ_NONE)
+                {
+                    BasicBlock* jmpTarget;
+                    if (branchToTrueLabel[0])
+                    {
+                        jmpTarget = compiler->compCurBB->bbJumpDest;
+                    }
+                    else
+                    {
+                        // This case arises only for ordered GT_EQ right now
+                        assert((cmp->gtOper == GT_EQ) && ((cmp->gtFlags & GTF_RELOP_NAN_UN) == 0));
+                        skipLabel = genCreateTempLabel();
+                        jmpTarget = skipLabel;
+                    }
+
+                    inst_JMP(jumpKind[0], jmpTarget);
+                }
+
+                if (jumpKind[1] != EJ_NONE)
+                {
+                    // the second conditional branch always has to be to the true label
+                    assert(branchToTrueLabel[1]);
+                    inst_JMP(jumpKind[1], compiler->compCurBB->bbJumpDest);
+                }
+
+                if (skipLabel != nullptr)
+                {
+                    genDefineTempLabel(skipLabel);
+                }
+            }
+        }
+        break;
+
+        case GT_RETURNTRAP:
+        {
+            // this is nothing but a conditional call to CORINFO_HELP_STOP_FOR_GC
+            // based on the contents of 'data'
+
+            GenTree* data = treeNode->gtOp.gtOp1;
+            genConsumeRegs(data);
+            GenTreeIntCon cns = intForm(TYP_INT, 0);
+            emit->emitInsBinary(INS_cmp, emitTypeSize(TYP_INT), data, &cns);
+
+            BasicBlock* skipLabel = genCreateTempLabel();
+
+            emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+            inst_JMP(jmpEqual, skipLabel);
+
+            // emit the call to the EE-helper that stops for GC (or other reasons)
+            assert(treeNode->gtRsvdRegs != RBM_NONE);
+            assert(genCountBits(treeNode->gtRsvdRegs) == 1);
+            regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
+            assert(genIsValidIntReg(tmpReg));
+
+            genEmitHelperCall(CORINFO_HELP_STOP_FOR_GC, 0, EA_UNKNOWN, tmpReg);
+            genDefineTempLabel(skipLabel);
+        }
+        break;
+
+        case GT_STOREIND:
+            genStoreInd(treeNode);
+            break;
+
+        case GT_COPY:
+            // This is handled at the time we call genConsumeReg() on the GT_COPY
+            break;
+
+        case GT_SWAP:
+        {
+            // Swap is only supported for lclVar operands that are enregistered
+            // We do not consume or produce any registers.  Both operands remain enregistered.
+            // However, the gc-ness may change.
+            assert(genIsRegCandidateLocal(treeNode->gtOp.gtOp1) && genIsRegCandidateLocal(treeNode->gtOp.gtOp2));
+
+            GenTreeLclVarCommon* lcl1    = treeNode->gtOp.gtOp1->AsLclVarCommon();
+            LclVarDsc*           varDsc1 = &(compiler->lvaTable[lcl1->gtLclNum]);
+            var_types            type1   = varDsc1->TypeGet();
+            GenTreeLclVarCommon* lcl2    = treeNode->gtOp.gtOp2->AsLclVarCommon();
+            LclVarDsc*           varDsc2 = &(compiler->lvaTable[lcl2->gtLclNum]);
+            var_types            type2   = varDsc2->TypeGet();
+
+            // We must have both int or both fp regs
+            assert(!varTypeIsFloating(type1) || varTypeIsFloating(type2));
+
+            // FP swap is not yet implemented (and should have NYI'd in LSRA)
+            assert(!varTypeIsFloating(type1));
+
+            regNumber oldOp1Reg     = lcl1->gtRegNum;
+            regMaskTP oldOp1RegMask = genRegMask(oldOp1Reg);
+            regNumber oldOp2Reg     = lcl2->gtRegNum;
+            regMaskTP oldOp2RegMask = genRegMask(oldOp2Reg);
+
+            // We don't call genUpdateVarReg because we don't have a tree node with the new register.
+            varDsc1->lvRegNum = oldOp2Reg;
+            varDsc2->lvRegNum = oldOp1Reg;
+
+            // Do the xchg
+            emitAttr size = EA_PTRSIZE;
+            if (varTypeGCtype(type1) != varTypeGCtype(type2))
+            {
+                // If the type specified to the emitter is a GC type, it will swap the GC-ness of the registers.
+                // Otherwise it will leave them alone, which is correct if they have the same GC-ness.
+                size = EA_GCREF;
+            }
+            inst_RV_RV(INS_xchg, oldOp1Reg, oldOp2Reg, TYP_I_IMPL, size);
+
+            // Update the gcInfo.
+            // Manually remove these regs for the gc sets (mostly to avoid confusing duplicative dump output)
+            gcInfo.gcRegByrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask);
+            gcInfo.gcRegGCrefSetCur &= ~(oldOp1RegMask | oldOp2RegMask);
+
+            // gcMarkRegPtrVal will do the appropriate thing for non-gc types.
+            // It will also dump the updates.
+            gcInfo.gcMarkRegPtrVal(oldOp2Reg, type1);
+            gcInfo.gcMarkRegPtrVal(oldOp1Reg, type2);
+        }
+        break;
+
+        case GT_LIST:
+        case GT_ARGPLACE:
+            // Nothing to do
+            break;
+
+        case GT_PUTARG_STK:
+            genPutArgStk(treeNode);
+            break;
+
+        case GT_PUTARG_REG:
+        {
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
+            noway_assert(targetType != TYP_STRUCT);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+            // commas show up here commonly, as part of a nullchk operation
+            GenTree* op1 = treeNode->gtOp.gtOp1;
+            // If child node is not already in the register we need, move it
+            genConsumeReg(op1);
+            if (treeNode->gtRegNum != op1->gtRegNum)
+            {
+                inst_RV_RV(ins_Copy(targetType), treeNode->gtRegNum, op1->gtRegNum, targetType);
+            }
+            genProduceReg(treeNode);
+        }
+        break;
+
+        case GT_CALL:
+            genCallInstruction(treeNode);
+            break;
+
+        case GT_JMP:
+            genJmpMethod(treeNode);
+            break;
+
+        case GT_LOCKADD:
+        case GT_XCHG:
+        case GT_XADD:
+            genLockedInstructions(treeNode);
+            break;
+
+        case GT_MEMORYBARRIER:
+            instGen_MemoryBarrier();
+            break;
+
+        case GT_CMPXCHG:
+        {
+            GenTreePtr location  = treeNode->gtCmpXchg.gtOpLocation;  // arg1
+            GenTreePtr value     = treeNode->gtCmpXchg.gtOpValue;     // arg2
+            GenTreePtr comparand = treeNode->gtCmpXchg.gtOpComparand; // arg3
+
+            assert(location->gtRegNum != REG_NA && location->gtRegNum != REG_RAX);
+            assert(value->gtRegNum != REG_NA && value->gtRegNum != REG_RAX);
+
+            genConsumeReg(location);
+            genConsumeReg(value);
+            genConsumeReg(comparand);
+            // comparand goes to RAX;
+            // Note that we must issue this move after the genConsumeRegs(), in case any of the above
+            // have a GT_COPY from RAX.
+            if (comparand->gtRegNum != REG_RAX)
+            {
+                inst_RV_RV(ins_Copy(comparand->TypeGet()), REG_RAX, comparand->gtRegNum, comparand->TypeGet());
+            }
+
+            // location is Rm
+            instGen(INS_lock);
+
+            emit->emitIns_AR_R(INS_cmpxchg, emitTypeSize(targetType), value->gtRegNum, location->gtRegNum, 0);
+
+            // Result is in RAX
+            if (targetReg != REG_RAX)
+            {
+                inst_RV_RV(ins_Copy(targetType), targetReg, REG_RAX, targetType);
+            }
+        }
+            genProduceReg(treeNode);
+            break;
+
+        case GT_RELOAD:
+            // do nothing - reload is just a marker.
+            // The parent node will call genConsumeReg on this which will trigger the unspill of this node's child
+            // into the register specified in this node.
+            break;
+
+        case GT_NOP:
+            break;
+
+        case GT_NO_OP:
+            if (treeNode->gtFlags & GTF_NO_OP_NO)
+            {
+                noway_assert(!"GTF_NO_OP_NO should not be set");
+            }
+            else
+            {
+                getEmitter()->emitIns_Nop(1);
+            }
+            break;
+
+        case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+        case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+            genRangeCheck(treeNode);
+            break;
+
+        case GT_PHYSREG:
+            if (treeNode->gtRegNum != treeNode->AsPhysReg()->gtSrcReg)
+            {
+                inst_RV_RV(INS_mov, treeNode->gtRegNum, treeNode->AsPhysReg()->gtSrcReg, targetType);
+
+                genTransferRegGCState(treeNode->gtRegNum, treeNode->AsPhysReg()->gtSrcReg);
+            }
+            genProduceReg(treeNode);
+            break;
+
+        case GT_PHYSREGDST:
+            break;
+
+        case GT_NULLCHECK:
+        {
+            assert(!treeNode->gtOp.gtOp1->isContained());
+            regNumber reg = genConsumeReg(treeNode->gtOp.gtOp1);
+            emit->emitIns_AR_R(INS_cmp, EA_4BYTE, reg, reg, 0);
+        }
+        break;
+
+        case GT_CATCH_ARG:
+
+            noway_assert(handlerGetsXcptnObj(compiler->compCurBB->bbCatchTyp));
+
+            /* Catch arguments get passed in a register. genCodeForBBlist()
+               would have marked it as holding a GC object, but not used. */
+
+            noway_assert(gcInfo.gcRegGCrefSetCur & RBM_EXCEPTION_OBJECT);
+            genConsumeReg(treeNode);
+            break;
+
+#if !FEATURE_EH_FUNCLETS
+        case GT_END_LFIN:
+
+            // Have to clear the ShadowSP of the nesting level which encloses the finally. Generates:
+            //     mov dword ptr [ebp-0xC], 0  // for some slot of the ShadowSP local var
+
+            unsigned finallyNesting;
+            finallyNesting = treeNode->gtVal.gtVal1;
+            noway_assert(treeNode->gtVal.gtVal1 < compiler->compHndBBtabCount);
+            noway_assert(finallyNesting < compiler->compHndBBtabCount);
+
+            // The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
+            unsigned filterEndOffsetSlotOffs;
+            PREFIX_ASSUME(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) >
+                          TARGET_POINTER_SIZE); // below doesn't underflow.
+            filterEndOffsetSlotOffs =
+                (unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE);
+
+            unsigned curNestingSlotOffs;
+            curNestingSlotOffs = filterEndOffsetSlotOffs - ((finallyNesting + 1) * TARGET_POINTER_SIZE);
+            instGen_Store_Imm_Into_Lcl(TYP_I_IMPL, EA_PTRSIZE, 0, compiler->lvaShadowSPslotsVar, curNestingSlotOffs);
+            break;
+#endif // !FEATURE_EH_FUNCLETS
+
+        case GT_PINVOKE_PROLOG:
+            noway_assert(((gcInfo.gcRegGCrefSetCur | gcInfo.gcRegByrefSetCur) & ~fullIntArgRegMask()) == 0);
+
+            // the runtime side requires the codegen here to be consistent
+            emit->emitDisableRandomNops();
+            break;
+
+        case GT_LABEL:
+            genPendingCallLabel       = genCreateTempLabel();
+            treeNode->gtLabel.gtLabBB = genPendingCallLabel;
+            emit->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, genPendingCallLabel, treeNode->gtRegNum);
+            break;
+
+        case GT_STORE_OBJ:
+            if (treeNode->OperIsCopyBlkOp() && !treeNode->AsBlk()->gtBlkOpGcUnsafe)
+            {
+                assert(treeNode->AsObj()->gtGcPtrCount != 0);
+                genCodeForCpObj(treeNode->AsObj());
+                break;
+            }
+            __fallthrough;
+
+        case GT_STORE_DYN_BLK:
+        case GT_STORE_BLK:
+            genCodeForStoreBlk(treeNode->AsBlk());
+            break;
+
+        case GT_JMPTABLE:
+            genJumpTable(treeNode);
+            break;
+
+        case GT_SWITCH_TABLE:
+            genTableBasedSwitch(treeNode);
+            break;
+
+        case GT_ARR_INDEX:
+            genCodeForArrIndex(treeNode->AsArrIndex());
+            break;
+
+        case GT_ARR_OFFSET:
+            genCodeForArrOffset(treeNode->AsArrOffs());
+            break;
+
+        case GT_CLS_VAR_ADDR:
+            getEmitter()->emitIns_R_C(INS_lea, EA_PTRSIZE, targetReg, treeNode->gtClsVar.gtClsVarHnd, 0);
+            genProduceReg(treeNode);
+            break;
+
+#if !defined(_TARGET_64BIT_)
+        case GT_LONG:
+            assert(!treeNode->isContained());
+            genConsumeRegs(treeNode);
+            break;
+#endif
+
+        case GT_IL_OFFSET:
+            // Do nothing; these nodes are simply markers for debug info.
+            break;
+
+        default:
+        {
+#ifdef DEBUG
+            char message[256];
+            sprintf(message, "Unimplemented node type %s\n", GenTree::NodeName(treeNode->OperGet()));
+#endif
+            assert(!"Unknown node in codegen");
+        }
+        break;
+    }
+}
+
+//----------------------------------------------------------------------------------
+// genMultiRegCallStoreToLocal: store multi-reg return value of a call node to a local
+//
+// Arguments:
+//    treeNode  -  Gentree of GT_STORE_LCL_VAR
+//
+// Return Value:
+//    None
+//
+// Assumption:
+//    The child of store is a multi-reg call node.
+//    genProduceReg() on treeNode is made by caller of this routine.
+//
+void CodeGen::genMultiRegCallStoreToLocal(GenTreePtr treeNode)
+{
+    assert(treeNode->OperGet() == GT_STORE_LCL_VAR);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+    // Structs of size >=9 and <=16 are returned in two return registers on x64 Unix.
+    assert(varTypeIsStruct(treeNode));
+
+    // Assumption: current x64 Unix implementation requires that a multi-reg struct
+    // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from
+    // being struct promoted.
+    unsigned   lclNum = treeNode->AsLclVarCommon()->gtLclNum;
+    LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
+    noway_assert(varDsc->lvIsMultiRegRet);
+
+    GenTree*     op1       = treeNode->gtGetOp1();
+    GenTree*     actualOp1 = op1->gtSkipReloadOrCopy();
+    GenTreeCall* call      = actualOp1->AsCall();
+    assert(call->HasMultiRegRetVal());
+
+    genConsumeRegs(op1);
+
+    ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+    assert(retTypeDesc->GetReturnRegCount() == MAX_RET_REG_COUNT);
+    unsigned regCount = retTypeDesc->GetReturnRegCount();
+
+    if (treeNode->gtRegNum != REG_NA)
+    {
+        // Right now the only enregistrable structs supported are SIMD types.
+        assert(varTypeIsSIMD(treeNode));
+        assert(varTypeIsFloating(retTypeDesc->GetReturnRegType(0)));
+        assert(varTypeIsFloating(retTypeDesc->GetReturnRegType(1)));
+
+        // This is a case of two 8-bytes that comprise the operand is in
+        // two different xmm registers and needs to assembled into a single
+        // xmm register.
+        regNumber targetReg = treeNode->gtRegNum;
+        regNumber reg0      = call->GetRegNumByIdx(0);
+        regNumber reg1      = call->GetRegNumByIdx(1);
+
+        if (op1->IsCopyOrReload())
+        {
+            // GT_COPY/GT_RELOAD will have valid reg for those positions
+            // that need to be copied or reloaded.
+            regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(0);
+            if (reloadReg != REG_NA)
+            {
+                reg0 = reloadReg;
+            }
+
+            reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(1);
+            if (reloadReg != REG_NA)
+            {
+                reg1 = reloadReg;
+            }
+        }
+
+        if (targetReg != reg0 && targetReg != reg1)
+        {
+            // Copy reg0 into targetReg and let it to be handled by one
+            // of the cases below.
+            inst_RV_RV(ins_Copy(TYP_DOUBLE), targetReg, reg0, TYP_DOUBLE);
+            targetReg = reg0;
+        }
+
+        if (targetReg == reg0)
+        {
+            // targeReg[63:0] = targetReg[63:0]
+            // targetReg[127:64] = reg1[127:64]
+            inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, reg1, 0x00);
+        }
+        else
+        {
+            assert(targetReg == reg1);
+
+            // We need two shuffles to achieve this
+            // First:
+            // targeReg[63:0] = targetReg[63:0]
+            // targetReg[127:64] = reg0[63:0]
+            //
+            // Second:
+            // targeReg[63:0] = targetReg[127:64]
+            // targetReg[127:64] = targetReg[63:0]
+            //
+            // Essentially copy low 8-bytes from reg0 to high 8-bytes of targetReg
+            // and next swap low and high 8-bytes of targetReg to have them
+            // rearranged in the right order.
+            inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, reg0, 0x00);
+            inst_RV_RV_IV(INS_shufpd, EA_16BYTE, targetReg, targetReg, 0x01);
+        }
+    }
+    else
+    {
+        // Stack store
+        int offset = 0;
+        for (unsigned i = 0; i < regCount; ++i)
+        {
+            var_types type = retTypeDesc->GetReturnRegType(i);
+            regNumber reg  = call->GetRegNumByIdx(i);
+            if (op1->IsCopyOrReload())
+            {
+                // GT_COPY/GT_RELOAD will have valid reg for those positions
+                // that need to be copied or reloaded.
+                regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
+                if (reloadReg != REG_NA)
+                {
+                    reg = reloadReg;
+                }
+            }
+
+            assert(reg != REG_NA);
+            getEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset);
+            offset += genTypeSize(type);
+        }
+
+        varDsc->lvRegNum = REG_STK;
+    }
+#elif defined(_TARGET_X86_)
+    // Longs are returned in two return registers on x86.
+    assert(varTypeIsLong(treeNode));
+
+    // Assumption: current x86 implementation requires that a multi-reg long
+    // var in 'var = call' is flagged as lvIsMultiRegRet to prevent it from
+    // being promoted.
+    unsigned   lclNum = treeNode->AsLclVarCommon()->gtLclNum;
+    LclVarDsc* varDsc = &(compiler->lvaTable[lclNum]);
+    noway_assert(varDsc->lvIsMultiRegRet);
+
+    GenTree*     op1       = treeNode->gtGetOp1();
+    GenTree*     actualOp1 = op1->gtSkipReloadOrCopy();
+    GenTreeCall* call      = actualOp1->AsCall();
+    assert(call->HasMultiRegRetVal());
+
+    genConsumeRegs(op1);
+
+    ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+    unsigned        regCount    = retTypeDesc->GetReturnRegCount();
+    assert(regCount == MAX_RET_REG_COUNT);
+
+    // Stack store
+    int offset = 0;
+    for (unsigned i = 0; i < regCount; ++i)
+    {
+        var_types type = retTypeDesc->GetReturnRegType(i);
+        regNumber reg  = call->GetRegNumByIdx(i);
+        if (op1->IsCopyOrReload())
+        {
+            // GT_COPY/GT_RELOAD will have valid reg for those positions
+            // that need to be copied or reloaded.
+            regNumber reloadReg = op1->AsCopyOrReload()->GetRegNumByIdx(i);
+            if (reloadReg != REG_NA)
+            {
+                reg = reloadReg;
+            }
+        }
+
+        assert(reg != REG_NA);
+        getEmitter()->emitIns_S_R(ins_Store(type), emitTypeSize(type), reg, lclNum, offset);
+        offset += genTypeSize(type);
+    }
+
+    varDsc->lvRegNum            = REG_STK;
+#else  // !FEATURE_UNIX_AMD64_STRUCT_PASSING && !_TARGET_X86_
+    assert(!"Unreached");
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING && !_TARGET_X86_
+}
+
+//------------------------------------------------------------------------
+// genLclHeap: Generate code for localloc.
+//
+// Arguments:
+//      tree - the localloc tree to generate.
+//
+// Notes:
+//      Note that for x86, we don't track ESP movements while generating the localloc code.
+//      The ESP tracking is used to report stack pointer-relative GC info, which is not
+//      interesting while doing the localloc construction. Also, for functions with localloc,
+//      we have EBP frames, and EBP-relative locals, and ESP-relative accesses only for function
+//      call arguments. We store the ESP after the localloc is complete in the LocAllocSP
+//      variable. This variable is implicitly reported to the VM in the GC info (its position
+//      is defined by convention relative to other items), and is used by the GC to find the
+//      "base" stack pointer in functions with localloc.
+//
+void CodeGen::genLclHeap(GenTreePtr tree)
+{
+    assert(tree->OperGet() == GT_LCLHEAP);
+    assert(compiler->compLocallocUsed);
+
+    GenTreePtr size = tree->gtOp.gtOp1;
+    noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL));
+
+    regNumber   targetReg   = tree->gtRegNum;
+    regMaskTP   tmpRegsMask = tree->gtRsvdRegs;
+    regNumber   regCnt      = REG_NA;
+    var_types   type        = genActualType(size->gtType);
+    emitAttr    easz        = emitTypeSize(type);
+    BasicBlock* endLabel    = nullptr;
+
+#ifdef DEBUG
+    // Verify ESP
+    if (compiler->opts.compStackCheckOnRet)
+    {
+        noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
+                     compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
+                     compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
+        getEmitter()->emitIns_S_R(INS_cmp, EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
+
+        BasicBlock*  esp_check = genCreateTempLabel();
+        emitJumpKind jmpEqual  = genJumpKindForOper(GT_EQ, CK_SIGNED);
+        inst_JMP(jmpEqual, esp_check);
+        getEmitter()->emitIns(INS_BREAKPOINT);
+        genDefineTempLabel(esp_check);
+    }
+#endif
+
+    noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes
+    noway_assert(genStackLevel == 0);   // Can't have anything on the stack
+
+    unsigned    stackAdjustment = 0;
+    BasicBlock* loop            = nullptr;
+
+    // compute the amount of memory to allocate to properly STACK_ALIGN.
+    size_t amount = 0;
+    if (size->IsCnsIntOrI())
+    {
+        // If size is a constant, then it must be contained.
+        assert(size->isContained());
+
+        // If amount is zero then return null in targetReg
+        amount = size->gtIntCon.gtIconVal;
+        if (amount == 0)
+        {
+            instGen_Set_Reg_To_Zero(EA_PTRSIZE, targetReg);
+            goto BAILOUT;
+        }
+
+        // 'amount' is the total number of bytes to localloc to properly STACK_ALIGN
+        amount = AlignUp(amount, STACK_ALIGN);
+    }
+    else
+    {
+        // The localloc requested memory size is non-constant.
+
+        // Put the size value in targetReg. If it is zero, bail out by returning null in targetReg.
+        genConsumeRegAndCopy(size, targetReg);
+        endLabel = genCreateTempLabel();
+        getEmitter()->emitIns_R_R(INS_test, easz, targetReg, targetReg);
+        inst_JMP(EJ_je, endLabel);
+
+        // Compute the size of the block to allocate and perform alignment.
+        // If compInitMem=true, we can reuse targetReg as regcnt,
+        // since we don't need any internal registers.
+        if (compiler->info.compInitMem)
+        {
+            assert(genCountBits(tmpRegsMask) == 0);
+            regCnt = targetReg;
+        }
+        else
+        {
+            assert(genCountBits(tmpRegsMask) >= 1);
+            regMaskTP regCntMask = genFindLowestBit(tmpRegsMask);
+            tmpRegsMask &= ~regCntMask;
+            regCnt = genRegNumFromMask(regCntMask);
+            if (regCnt != targetReg)
+            {
+                // Above, we put the size in targetReg. Now, copy it to our new temp register if necessary.
+                inst_RV_RV(INS_mov, regCnt, targetReg, size->TypeGet());
+            }
+        }
+
+        // Round up the number of bytes to allocate to a STACK_ALIGN boundary. This is done
+        // by code like:
+        //      add reg, 15
+        //      and reg, -16
+        // However, in the initialized memory case, we need the count of STACK_ALIGN-sized
+        // elements, not a byte count, after the alignment. So instead of the "and", which
+        // becomes unnecessary, generate a shift, e.g.:
+        //      add reg, 15
+        //      shr reg, 4
+
+        inst_RV_IV(INS_add, regCnt, STACK_ALIGN - 1, emitActualTypeSize(type));
+
+        if (compiler->info.compInitMem)
+        {
+            // Convert the count from a count of bytes to a loop count. We will loop once per
+            // stack alignment size, so each loop will zero 4 bytes on x86 and 16 bytes on x64.
+            // Note that we zero a single reg-size word per iteration on x86, and 2 reg-size
+            // words per iteration on x64. We will shift off all the stack alignment bits
+            // added above, so there is no need for an 'and' instruction.
+
+            // --- shr regCnt, 2 (or 4) ---
+            inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_PTRSIZE, regCnt, STACK_ALIGN_SHIFT_ALL);
+        }
+        else
+        {
+            // Otherwise, mask off the low bits to align the byte count.
+            inst_RV_IV(INS_AND, regCnt, ~(STACK_ALIGN - 1), emitActualTypeSize(type));
+        }
+    }
+
+#if FEATURE_FIXED_OUT_ARGS
+    // If we have an outgoing arg area then we must adjust the SP by popping off the
+    // outgoing arg area. We will restore it right before we return from this method.
+    //
+    // Localloc returns stack space that aligned to STACK_ALIGN bytes. The following
+    // are the cases that need to be handled:
+    //   i) Method has out-going arg area.
+    //      It is guaranteed that size of out-going arg area is STACK_ALIGN'ed (see fgMorphArgs).
+    //      Therefore, we will pop off the out-going arg area from RSP before allocating the localloc space.
+    //  ii) Method has no out-going arg area.
+    //      Nothing to pop off from the stack.
+    if (compiler->lvaOutgoingArgSpaceSize > 0)
+    {
+        assert((compiler->lvaOutgoingArgSpaceSize % STACK_ALIGN) == 0); // This must be true for the stack to remain
+                                                                        // aligned
+        inst_RV_IV(INS_add, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize, EA_PTRSIZE);
+        stackAdjustment += compiler->lvaOutgoingArgSpaceSize;
+    }
+#endif
+
+    if (size->IsCnsIntOrI())
+    {
+        // We should reach here only for non-zero, constant size allocations.
+        assert(amount > 0);
+        assert((amount % STACK_ALIGN) == 0);
+        assert((amount % REGSIZE_BYTES) == 0);
+
+        // For small allocations we will generate up to six push 0 inline
+        size_t cntRegSizedWords = amount / REGSIZE_BYTES;
+        if (cntRegSizedWords <= 6)
+        {
+            for (; cntRegSizedWords != 0; cntRegSizedWords--)
+            {
+                inst_IV(INS_push_hide, 0); // push_hide means don't track the stack
+            }
+            goto ALLOC_DONE;
+        }
+
+        bool doNoInitLessThanOnePageAlloc =
+            !compiler->info.compInitMem && (amount < compiler->eeGetPageSize()); // must be < not <=
+
+#ifdef _TARGET_X86_
+        bool needRegCntRegister = true;
+#else  // !_TARGET_X86_
+        bool needRegCntRegister = !doNoInitLessThanOnePageAlloc;
+#endif // !_TARGET_X86_
+
+        if (needRegCntRegister)
+        {
+            // If compInitMem=true, we can reuse targetReg as regcnt.
+            // Since size is a constant, regCnt is not yet initialized.
+            assert(regCnt == REG_NA);
+            if (compiler->info.compInitMem)
+            {
+                assert(genCountBits(tmpRegsMask) == 0);
+                regCnt = targetReg;
+            }
+            else
+            {
+                assert(genCountBits(tmpRegsMask) >= 1);
+                regMaskTP regCntMask = genFindLowestBit(tmpRegsMask);
+                tmpRegsMask &= ~regCntMask;
+                regCnt = genRegNumFromMask(regCntMask);
+            }
+        }
+
+        if (doNoInitLessThanOnePageAlloc)
+        {
+            // Since the size is less than a page, simply adjust ESP.
+            // ESP might already be in the guard page, so we must touch it BEFORE
+            // the alloc, not after.
+            CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_X86_
+            // For x86, we don't want to use "sub ESP" because we don't want the emitter to track the adjustment
+            // to ESP. So do the work in the count register.
+            // TODO-CQ: manipulate ESP directly, to share code, reduce #ifdefs, and improve CQ. This would require
+            // creating a way to temporarily turn off the emitter's tracking of ESP, maybe marking instrDescs as "don't
+            // track".
+            inst_RV_RV(INS_mov, regCnt, REG_SPBASE, TYP_I_IMPL);
+            getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
+            inst_RV_IV(INS_sub, regCnt, amount, EA_PTRSIZE);
+            inst_RV_RV(INS_mov, REG_SPBASE, regCnt, TYP_I_IMPL);
+#else  // !_TARGET_X86_
+            getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
+            inst_RV_IV(INS_sub, REG_SPBASE, amount, EA_PTRSIZE);
+#endif // !_TARGET_X86_
+
+            goto ALLOC_DONE;
+        }
+
+        // else, "mov regCnt, amount"
+
+        if (compiler->info.compInitMem)
+        {
+            // When initializing memory, we want 'amount' to be the loop count.
+            assert((amount % STACK_ALIGN) == 0);
+            amount /= STACK_ALIGN;
+        }
+
+        genSetRegToIcon(regCnt, amount, ((int)amount == amount) ? TYP_INT : TYP_LONG);
+    }
+
+    loop = genCreateTempLabel();
+    if (compiler->info.compInitMem)
+    {
+        // At this point 'regCnt' is set to the number of loop iterations for this loop, if each
+        // iteration zeros (and subtracts from the stack pointer) STACK_ALIGN bytes.
+        // Since we have to zero out the allocated memory AND ensure that RSP is always valid
+        // by tickling the pages, we will just push 0's on the stack.
+
+        assert(genIsValidIntReg(regCnt));
+
+        // Loop:
+        genDefineTempLabel(loop);
+
+#if defined(_TARGET_AMD64_)
+        // Push two 8-byte zeros. This matches the 16-byte STACK_ALIGN value.
+        static_assert_no_msg(STACK_ALIGN == (REGSIZE_BYTES * 2));
+        inst_IV(INS_push_hide, 0); // --- push 8-byte 0
+        inst_IV(INS_push_hide, 0); // --- push 8-byte 0
+#elif defined(_TARGET_X86_)
+        // Push a single 4-byte zero. This matches the 4-byte STACK_ALIGN value.
+        static_assert_no_msg(STACK_ALIGN == REGSIZE_BYTES);
+        inst_IV(INS_push_hide, 0); // --- push 4-byte 0
+#endif // _TARGET_X86_
+
+        // Decrement the loop counter and loop if not done.
+        inst_RV(INS_dec, regCnt, TYP_I_IMPL);
+        inst_JMP(EJ_jne, loop);
+    }
+    else
+    {
+        // At this point 'regCnt' is set to the total number of bytes to localloc.
+        //
+        // We don't need to zero out the allocated memory. However, we do have
+        // to tickle the pages to ensure that ESP is always valid and is
+        // in sync with the "stack guard page".  Note that in the worst
+        // case ESP is on the last byte of the guard page.  Thus you must
+        // touch ESP+0 first not ESP+x01000.
+        //
+        // Another subtlety is that you don't want ESP to be exactly on the
+        // boundary of the guard page because PUSH is predecrement, thus
+        // call setup would not touch the guard page but just beyond it
+        //
+        // Note that we go through a few hoops so that ESP never points to
+        // illegal pages at any time during the tickling process
+        //
+        //       neg   REGCNT
+        //       add   REGCNT, ESP      // reg now holds ultimate ESP
+        //       jb    loop             // result is smaller than orignial ESP (no wrap around)
+        //       xor   REGCNT, REGCNT,  // Overflow, pick lowest possible number
+        //  loop:
+        //       test  ESP, [ESP+0]     // tickle the page
+        //       mov   REGTMP, ESP
+        //       sub   REGTMP, PAGE_SIZE
+        //       mov   ESP, REGTMP
+        //       cmp   ESP, REGCNT
+        //       jae   loop
+        //
+        //       mov   ESP, REG
+        //  end:
+        inst_RV(INS_NEG, regCnt, TYP_I_IMPL);
+        inst_RV_RV(INS_add, regCnt, REG_SPBASE, TYP_I_IMPL);
+        inst_JMP(EJ_jb, loop);
+
+        instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
+
+        genDefineTempLabel(loop);
+
+        // Tickle the decremented value, and move back to ESP,
+        // note that it has to be done BEFORE the update of ESP since
+        // ESP might already be on the guard page.  It is OK to leave
+        // the final value of ESP on the guard page
+        getEmitter()->emitIns_AR_R(INS_TEST, EA_4BYTE, REG_SPBASE, REG_SPBASE, 0);
+
+        // This is a harmless trick to avoid the emitter trying to track the
+        // decrement of the ESP - we do the subtraction in another reg instead
+        // of adjusting ESP directly.
+        assert(tmpRegsMask != RBM_NONE);
+        assert(genCountBits(tmpRegsMask) == 1);
+        regNumber regTmp = genRegNumFromMask(tmpRegsMask);
+
+        inst_RV_RV(INS_mov, regTmp, REG_SPBASE, TYP_I_IMPL);
+        inst_RV_IV(INS_sub, regTmp, compiler->eeGetPageSize(), EA_PTRSIZE);
+        inst_RV_RV(INS_mov, REG_SPBASE, regTmp, TYP_I_IMPL);
+
+        inst_RV_RV(INS_cmp, REG_SPBASE, regCnt, TYP_I_IMPL);
+        inst_JMP(EJ_jae, loop);
+
+        // Move the final value to ESP
+        inst_RV_RV(INS_mov, REG_SPBASE, regCnt);
+    }
+
+ALLOC_DONE:
+    // Re-adjust SP to allocate out-going arg area
+    if (stackAdjustment > 0)
+    {
+        assert((stackAdjustment % STACK_ALIGN) == 0); // This must be true for the stack to remain aligned
+        inst_RV_IV(INS_sub, REG_SPBASE, stackAdjustment, EA_PTRSIZE);
+    }
+
+    // Return the stackalloc'ed address in result register.
+    // TargetReg = RSP + stackAdjustment.
+    getEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, targetReg, REG_SPBASE, stackAdjustment);
+
+    if (endLabel != nullptr)
+    {
+        genDefineTempLabel(endLabel);
+    }
+
+BAILOUT:
+
+    // Write the lvaLocAllocSPvar stack frame slot
+    noway_assert(compiler->lvaLocAllocSPvar != BAD_VAR_NUM);
+    getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaLocAllocSPvar, 0);
+
+#if STACK_PROBES
+    if (compiler->opts.compNeedStackProbes)
+    {
+        genGenerateStackProbe();
+    }
+#endif
+
+#ifdef DEBUG
+    // Update new ESP
+    if (compiler->opts.compStackCheckOnRet)
+    {
+        noway_assert(compiler->lvaReturnEspCheck != 0xCCCCCCCC &&
+                     compiler->lvaTable[compiler->lvaReturnEspCheck].lvDoNotEnregister &&
+                     compiler->lvaTable[compiler->lvaReturnEspCheck].lvOnFrame);
+        getEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaReturnEspCheck, 0);
+    }
+#endif
+
+    genProduceReg(tree);
+}
+
+void CodeGen::genCodeForStoreBlk(GenTreeBlk* storeBlkNode)
+{
+    if (storeBlkNode->gtBlkOpGcUnsafe)
+    {
+        getEmitter()->emitDisableGC();
+    }
+    bool isCopyBlk = storeBlkNode->OperIsCopyBlkOp();
+
+    switch (storeBlkNode->gtBlkOpKind)
+    {
+#ifdef _TARGET_AMD64_
+        case GenTreeBlk::BlkOpKindHelper:
+            if (isCopyBlk)
+            {
+                genCodeForCpBlk(storeBlkNode);
+            }
+            else
+            {
+                genCodeForInitBlk(storeBlkNode);
+            }
+            break;
+#endif // _TARGET_AMD64_
+        case GenTreeBlk::BlkOpKindRepInstr:
+            if (isCopyBlk)
+            {
+                genCodeForCpBlkRepMovs(storeBlkNode);
+            }
+            else
+            {
+                genCodeForInitBlkRepStos(storeBlkNode);
+            }
+            break;
+        case GenTreeBlk::BlkOpKindUnroll:
+            if (isCopyBlk)
+            {
+                genCodeForCpBlkUnroll(storeBlkNode);
+            }
+            else
+            {
+                genCodeForInitBlkUnroll(storeBlkNode);
+            }
+            break;
+        default:
+            unreached();
+    }
+    if (storeBlkNode->gtBlkOpGcUnsafe)
+    {
+        getEmitter()->emitEnableGC();
+    }
+}
+
+// Generate code for InitBlk using rep stos.
+// Preconditions:
+//  The size of the buffers must be a constant and also less than INITBLK_STOS_LIMIT bytes.
+//  Any value larger than that, we'll use the helper even if both the
+//  fill byte and the size are integer constants.
+void CodeGen::genCodeForInitBlkRepStos(GenTreeBlk* initBlkNode)
+{
+    // Make sure we got the arguments of the initblk/initobj operation in the right registers
+    unsigned   size    = initBlkNode->Size();
+    GenTreePtr dstAddr = initBlkNode->Addr();
+    GenTreePtr initVal = initBlkNode->Data();
+
+#ifdef DEBUG
+    assert(!dstAddr->isContained());
+    assert(!initVal->isContained());
+#ifdef _TARGET_AMD64_
+    assert(size != 0);
+#endif
+    if (initVal->IsCnsIntOrI())
+    {
+#ifdef _TARGET_AMD64_
+        assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT);
+#else
+        assert(size > CPBLK_UNROLL_LIMIT);
+#endif
+    }
+
+#endif // DEBUG
+
+    genConsumeBlockOp(initBlkNode, REG_RDI, REG_RAX, REG_RCX);
+    instGen(INS_r_stosb);
+}
+
+// Generate code for InitBlk by performing a loop unroll
+// Preconditions:
+//   a) Both the size and fill byte value are integer constants.
+//   b) The size of the struct to initialize is smaller than INITBLK_UNROLL_LIMIT bytes.
+//
+void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode)
+{
+    // Make sure we got the arguments of the initblk/initobj operation in the right registers
+    unsigned   size    = initBlkNode->Size();
+    GenTreePtr dstAddr = initBlkNode->Addr();
+    GenTreePtr initVal = initBlkNode->Data();
+
+    assert(!dstAddr->isContained());
+    assert(!initVal->isContained());
+    assert(size != 0);
+    assert(size <= INITBLK_UNROLL_LIMIT);
+    assert(initVal->gtSkipReloadOrCopy()->IsCnsIntOrI());
+
+    emitter* emit = getEmitter();
+
+    genConsumeOperands(initBlkNode);
+
+    // If the initVal was moved, or spilled and reloaded to a different register,
+    // get the original initVal from below the GT_RELOAD, but only after capturing the valReg,
+    // which needs to be the new register.
+    regNumber valReg = initVal->gtRegNum;
+    initVal          = initVal->gtSkipReloadOrCopy();
+
+    unsigned offset = 0;
+
+    // Perform an unroll using SSE2 loads and stores.
+    if (size >= XMM_REGSIZE_BYTES)
+    {
+        regNumber tmpReg = genRegNumFromMask(initBlkNode->gtRsvdRegs);
+
+#ifdef DEBUG
+        assert(initBlkNode->gtRsvdRegs != RBM_NONE);
+        assert(genCountBits(initBlkNode->gtRsvdRegs) == 1);
+        assert(genIsValidFloatReg(tmpReg));
+#endif // DEBUG
+
+        if (initVal->gtIntCon.gtIconVal != 0)
+        {
+            emit->emitIns_R_R(INS_mov_i2xmm, EA_PTRSIZE, tmpReg, valReg);
+            emit->emitIns_R_R(INS_punpckldq, EA_8BYTE, tmpReg, tmpReg);
+#ifdef _TARGET_X86_
+            // For x86, we need one more to convert it from 8 bytes to 16 bytes.
+            emit->emitIns_R_R(INS_punpckldq, EA_8BYTE, tmpReg, tmpReg);
+#endif // _TARGET_X86_
+        }
+        else
+        {
+            emit->emitIns_R_R(INS_xorpd, EA_8BYTE, tmpReg, tmpReg);
+        }
+
+        // Determine how many 16 byte slots we're going to fill using SSE movs.
+        size_t slots = size / XMM_REGSIZE_BYTES;
+
+        while (slots-- > 0)
+        {
+            emit->emitIns_AR_R(INS_movdqu, EA_8BYTE, tmpReg, dstAddr->gtRegNum, offset);
+            offset += XMM_REGSIZE_BYTES;
+        }
+    }
+
+    // Fill the remainder (or a < 16 byte sized struct)
+    if ((size & 8) != 0)
+    {
+#ifdef _TARGET_X86_
+        // TODO-X86-CQ: [1091735] Revisit block ops codegen. One example: use movq for 8 byte movs.
+        emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
+        offset += 4;
+        emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
+        offset += 4;
+#else  // !_TARGET_X86_
+        emit->emitIns_AR_R(INS_mov, EA_8BYTE, valReg, dstAddr->gtRegNum, offset);
+        offset += 8;
+#endif // !_TARGET_X86_
+    }
+    if ((size & 4) != 0)
+    {
+        emit->emitIns_AR_R(INS_mov, EA_4BYTE, valReg, dstAddr->gtRegNum, offset);
+        offset += 4;
+    }
+    if ((size & 2) != 0)
+    {
+        emit->emitIns_AR_R(INS_mov, EA_2BYTE, valReg, dstAddr->gtRegNum, offset);
+        offset += 2;
+    }
+    if ((size & 1) != 0)
+    {
+        emit->emitIns_AR_R(INS_mov, EA_1BYTE, valReg, dstAddr->gtRegNum, offset);
+    }
+}
+
+// Generates code for InitBlk by calling the VM memset helper function.
+// Preconditions:
+// a) The size argument of the InitBlk is not an integer constant.
+// b) The size argument of the InitBlk is >= INITBLK_STOS_LIMIT bytes.
+void CodeGen::genCodeForInitBlk(GenTreeBlk* initBlkNode)
+{
+#ifdef _TARGET_AMD64_
+    // Make sure we got the arguments of the initblk operation in the right registers
+    unsigned   blockSize = initBlkNode->Size();
+    GenTreePtr dstAddr   = initBlkNode->Addr();
+    GenTreePtr initVal   = initBlkNode->Data();
+
+    assert(!dstAddr->isContained());
+    assert(!initVal->isContained());
+
+    if (blockSize != 0)
+    {
+        assert(blockSize >= CPBLK_MOVS_LIMIT);
+    }
+
+    genConsumeBlockOp(initBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
+
+    genEmitHelperCall(CORINFO_HELP_MEMSET, 0, EA_UNKNOWN);
+#else  // !_TARGET_AMD64_
+    NYI_X86("Helper call for InitBlk");
+#endif // !_TARGET_AMD64_
+}
+
+// Generate code for a load from some address + offset
+//   baseNode: tree node which can be either a local address or arbitrary node
+//   offset: distance from the baseNode from which to load
+void CodeGen::genCodeForLoadOffset(instruction ins, emitAttr size, regNumber dst, GenTree* baseNode, unsigned offset)
+{
+    emitter* emit = getEmitter();
+
+    if (baseNode->OperIsLocalAddr())
+    {
+        if (baseNode->gtOper == GT_LCL_FLD_ADDR)
+        {
+            offset += baseNode->gtLclFld.gtLclOffs;
+        }
+        emit->emitIns_R_S(ins, size, dst, baseNode->gtLclVarCommon.gtLclNum, offset);
+    }
+    else
+    {
+        emit->emitIns_R_AR(ins, size, dst, baseNode->gtRegNum, offset);
+    }
+}
+
+//------------------------------------------------------------------------
+// genCodeForStoreOffset: Generate code to store a reg to [base + offset].
+//
+// Arguments:
+//      ins         - the instruction to generate.
+//      size        - the size that needs to be stored.
+//      src         - the register which needs to be stored.
+//      baseNode    - the base, relative to which to store the src register.
+//      offset      - the offset that is added to the baseNode to calculate the address to store into.
+//
+void CodeGen::genCodeForStoreOffset(instruction ins, emitAttr size, regNumber src, GenTree* baseNode, unsigned offset)
+{
+    emitter* emit = getEmitter();
+
+    if (baseNode->OperIsLocalAddr())
+    {
+        if (baseNode->gtOper == GT_LCL_FLD_ADDR)
+        {
+            offset += baseNode->gtLclFld.gtLclOffs;
+        }
+
+        emit->emitIns_S_R(ins, size, src, baseNode->AsLclVarCommon()->GetLclNum(), offset);
+    }
+    else
+    {
+        emit->emitIns_AR_R(ins, size, src, baseNode->gtRegNum, offset);
+    }
+}
+
+// Generates CpBlk code by performing a loop unroll
+// Preconditions:
+//  The size argument of the CpBlk node is a constant and <= 64 bytes.
+//  This may seem small but covers >95% of the cases in several framework assemblies.
+//
+void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode)
+{
+    // Make sure we got the arguments of the cpblk operation in the right registers
+    unsigned   size    = cpBlkNode->Size();
+    GenTreePtr dstAddr = cpBlkNode->Addr();
+    GenTreePtr source  = cpBlkNode->Data();
+    GenTreePtr srcAddr = nullptr;
+    assert(size <= CPBLK_UNROLL_LIMIT);
+
+    emitter* emit = getEmitter();
+
+    if (source->gtOper == GT_IND)
+    {
+        srcAddr = source->gtGetOp1();
+        if (!srcAddr->isContained())
+        {
+            genConsumeReg(srcAddr);
+        }
+    }
+    else
+    {
+        noway_assert(source->IsLocal());
+        // TODO-Cleanup: Consider making the addrForm() method in Rationalize public, e.g. in GenTree.
+        // OR: transform source to GT_IND(GT_LCL_VAR_ADDR)
+        if (source->OperGet() == GT_LCL_VAR)
+        {
+            source->SetOper(GT_LCL_VAR_ADDR);
+        }
+        else
+        {
+            assert(source->OperGet() == GT_LCL_FLD);
+            source->SetOper(GT_LCL_FLD_ADDR);
+        }
+        srcAddr = source;
+    }
+
+    if (!dstAddr->isContained())
+    {
+        genConsumeReg(dstAddr);
+    }
+
+    unsigned offset = 0;
+
+    // If the size of this struct is larger than 16 bytes
+    // let's use SSE2 to be able to do 16 byte at a time
+    // loads and stores.
+
+    if (size >= XMM_REGSIZE_BYTES)
+    {
+        assert(cpBlkNode->gtRsvdRegs != RBM_NONE);
+        regNumber xmmReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs & RBM_ALLFLOAT);
+        assert(genIsValidFloatReg(xmmReg));
+        size_t slots = size / XMM_REGSIZE_BYTES;
+
+        // TODO: In the below code the load and store instructions are for 16 bytes, but the
+        //       type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but
+        //       this probably needs to be changed.
+        while (slots-- > 0)
+        {
+            // Load
+            genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, srcAddr, offset);
+            // Store
+            genCodeForStoreOffset(INS_movdqu, EA_8BYTE, xmmReg, dstAddr, offset);
+            offset += XMM_REGSIZE_BYTES;
+        }
+    }
+
+    // Fill the remainder (15 bytes or less) if there's one.
+    if ((size & 0xf) != 0)
+    {
+        // Grab the integer temp register to emit the remaining loads and stores.
+        regNumber tmpReg = genRegNumFromMask(cpBlkNode->gtRsvdRegs & RBM_ALLINT);
+
+        if ((size & 8) != 0)
+        {
+#ifdef _TARGET_X86_
+            // TODO-X86-CQ: [1091735] Revisit block ops codegen. One example: use movq for 8 byte movs.
+            for (unsigned savedOffs = offset; offset < savedOffs + 8; offset += 4)
+            {
+                genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset);
+                genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset);
+            }
+#else  // !_TARGET_X86_
+            genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, srcAddr, offset);
+            genCodeForStoreOffset(INS_mov, EA_8BYTE, tmpReg, dstAddr, offset);
+            offset += 8;
+#endif // !_TARGET_X86_
+        }
+        if ((size & 4) != 0)
+        {
+            genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, srcAddr, offset);
+            genCodeForStoreOffset(INS_mov, EA_4BYTE, tmpReg, dstAddr, offset);
+            offset += 4;
+        }
+        if ((size & 2) != 0)
+        {
+            genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, srcAddr, offset);
+            genCodeForStoreOffset(INS_mov, EA_2BYTE, tmpReg, dstAddr, offset);
+            offset += 2;
+        }
+        if ((size & 1) != 0)
+        {
+            genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, srcAddr, offset);
+            genCodeForStoreOffset(INS_mov, EA_1BYTE, tmpReg, dstAddr, offset);
+        }
+    }
+}
+
+// Generate code for CpBlk by using rep movs
+// Preconditions:
+// The size argument of the CpBlk is a constant and is between
+// CPBLK_UNROLL_LIMIT and CPBLK_MOVS_LIMIT bytes.
+void CodeGen::genCodeForCpBlkRepMovs(GenTreeBlk* cpBlkNode)
+{
+    // Make sure we got the arguments of the cpblk operation in the right registers
+    unsigned   size    = cpBlkNode->Size();
+    GenTreePtr dstAddr = cpBlkNode->Addr();
+    GenTreePtr source  = cpBlkNode->Data();
+    GenTreePtr srcAddr = nullptr;
+
+#ifdef DEBUG
+    assert(!dstAddr->isContained());
+    assert(source->isContained());
+
+#ifdef _TARGET_X86_
+    if (size == 0)
+    {
+        noway_assert(cpBlkNode->OperGet() == GT_STORE_DYN_BLK);
+    }
+    else
+#endif
+    {
+#ifdef _TARGET_X64_
+        assert(size > CPBLK_UNROLL_LIMIT && size < CPBLK_MOVS_LIMIT);
+#else
+        assert(size > CPBLK_UNROLL_LIMIT);
+#endif
+    }
+#endif // DEBUG
+
+    genConsumeBlockOp(cpBlkNode, REG_RDI, REG_RSI, REG_RCX);
+    instGen(INS_r_movsb);
+}
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+//---------------------------------------------------------------------------------------------------------------//
+// genStructPutArgUnroll: Generates code for passing a struct arg on stack by value using loop unrolling.
+//
+// Arguments:
+//     putArgNode  - the PutArgStk tree.
+//     baseVarNum  - the base var number, relative to which the by-val struct will be copied on the stack.
+//
+// TODO-Amd64-Unix: Try to share code with copyblk.
+//      Need refactoring of copyblk before it could be used for putarg_stk.
+//      The difference for now is that a putarg_stk contains its children, while cpyblk does not.
+//      This creates differences in code. After some significant refactoring it could be reused.
+//
+void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode, unsigned baseVarNum)
+{
+    // We will never call this method for SIMD types, which are stored directly
+    // in genPutStructArgStk().
+    noway_assert(putArgNode->TypeGet() == TYP_STRUCT);
+
+    // Make sure we got the arguments of the cpblk operation in the right registers
+    GenTreePtr dstAddr = putArgNode;
+    GenTreePtr src     = putArgNode->gtOp.gtOp1;
+
+    size_t size = putArgNode->getArgSize();
+    assert(size <= CPBLK_UNROLL_LIMIT);
+
+    emitter* emit         = getEmitter();
+    unsigned putArgOffset = putArgNode->getArgOffset();
+
+    assert(src->isContained());
+
+    assert(src->gtOper == GT_OBJ);
+
+    if (!src->gtOp.gtOp1->isContained())
+    {
+        genConsumeReg(src->gtOp.gtOp1);
+    }
+
+    unsigned offset = 0;
+
+    // If the size of this struct is larger than 16 bytes
+    // let's use SSE2 to be able to do 16 byte at a time
+    // loads and stores.
+    if (size >= XMM_REGSIZE_BYTES)
+    {
+        assert(putArgNode->gtRsvdRegs != RBM_NONE);
+        regNumber xmmReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLFLOAT);
+        assert(genIsValidFloatReg(xmmReg));
+        size_t slots = size / XMM_REGSIZE_BYTES;
+
+        assert(putArgNode->gtGetOp1()->isContained());
+        assert(putArgNode->gtGetOp1()->gtOp.gtOper == GT_OBJ);
+
+        // TODO: In the below code the load and store instructions are for 16 bytes, but the
+        //          type is EA_8BYTE. The movdqa/u are 16 byte instructions, so it works, but
+        //          this probably needs to be changed.
+        while (slots-- > 0)
+        {
+            // Load
+            genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmReg, src->gtGetOp1(),
+                                 offset); // Load the address of the child of the Obj node.
+
+            // Store
+            emit->emitIns_S_R(INS_movdqu, EA_8BYTE, xmmReg, baseVarNum, putArgOffset + offset);
+
+            offset += XMM_REGSIZE_BYTES;
+        }
+    }
+
+    // Fill the remainder (15 bytes or less) if there's one.
+    if ((size & 0xf) != 0)
+    {
+        // Grab the integer temp register to emit the remaining loads and stores.
+        regNumber tmpReg = genRegNumFromMask(putArgNode->gtRsvdRegs & RBM_ALLINT);
+        assert(genIsValidIntReg(tmpReg));
+
+        if ((size & 8) != 0)
+        {
+            genCodeForLoadOffset(INS_mov, EA_8BYTE, tmpReg, src->gtOp.gtOp1, offset);
+
+            emit->emitIns_S_R(INS_mov, EA_8BYTE, tmpReg, baseVarNum, putArgOffset + offset);
+
+            offset += 8;
+        }
+
+        if ((size & 4) != 0)
+        {
+            genCodeForLoadOffset(INS_mov, EA_4BYTE, tmpReg, src->gtOp.gtOp1, offset);
+
+            emit->emitIns_S_R(INS_mov, EA_4BYTE, tmpReg, baseVarNum, putArgOffset + offset);
+
+            offset += 4;
+        }
+
+        if ((size & 2) != 0)
+        {
+            genCodeForLoadOffset(INS_mov, EA_2BYTE, tmpReg, src->gtOp.gtOp1, offset);
+
+            emit->emitIns_S_R(INS_mov, EA_2BYTE, tmpReg, baseVarNum, putArgOffset + offset);
+
+            offset += 2;
+        }
+
+        if ((size & 1) != 0)
+        {
+            genCodeForLoadOffset(INS_mov, EA_1BYTE, tmpReg, src->gtOp.gtOp1, offset);
+            emit->emitIns_S_R(INS_mov, EA_1BYTE, tmpReg, baseVarNum, putArgOffset + offset);
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// genStructPutArgRepMovs: Generates code for passing a struct arg by value on stack using Rep Movs.
+//
+// Arguments:
+//     putArgNode  - the PutArgStk tree.
+//     baseVarNum  - the base var number, relative to which the by-val struct bits will go.
+//
+// Preconditions:
+//     The size argument of the PutArgStk (for structs) is a constant and is between
+//     CPBLK_UNROLL_LIMIT and CPBLK_MOVS_LIMIT bytes.
+//
+void CodeGen::genStructPutArgRepMovs(GenTreePutArgStk* putArgNode, unsigned baseVarNum)
+{
+    assert(putArgNode->TypeGet() == TYP_STRUCT);
+    assert(putArgNode->getArgSize() > CPBLK_UNROLL_LIMIT);
+    assert(baseVarNum != BAD_VAR_NUM);
+
+    // Make sure we got the arguments of the cpblk operation in the right registers
+    GenTreePtr dstAddr = putArgNode;
+    GenTreePtr srcAddr = putArgNode->gtGetOp1();
+
+    // Validate state.
+    assert(putArgNode->gtRsvdRegs == (RBM_RDI | RBM_RCX | RBM_RSI));
+    assert(srcAddr->isContained());
+
+    genConsumePutStructArgStk(putArgNode, REG_RDI, REG_RSI, REG_RCX, baseVarNum);
+    instGen(INS_r_movsb);
+}
+
+//------------------------------------------------------------------------
+// If any Vector3 args are on stack and they are not pass-by-ref, the upper 32bits
+// must be cleared to zeroes. The native compiler doesn't clear the upper bits
+// and there is no way to know if the caller is native or not. So, the upper
+// 32 bits of Vector argument on stack are always cleared to zero.
+#ifdef FEATURE_SIMD
+void CodeGen::genClearStackVec3ArgUpperBits()
+{
+#ifdef DEBUG
+    if (verbose)
+        printf("*************** In genClearStackVec3ArgUpperBits()\n");
+#endif
+
+    assert(compiler->compGeneratingProlog);
+
+    unsigned varNum = 0;
+
+    for (unsigned varNum = 0; varNum < compiler->info.compArgsCount; varNum++)
+    {
+        LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+        assert(varDsc->lvIsParam);
+
+        // Does var has simd12 type?
+        if (varDsc->lvType != TYP_SIMD12)
+        {
+            continue;
+        }
+
+        if (!varDsc->lvIsRegArg)
+        {
+            // Clear the upper 32 bits by mov dword ptr [V_ARG_BASE+0xC], 0
+            getEmitter()->emitIns_S_I(ins_Store(TYP_INT), EA_4BYTE, varNum, genTypeSize(TYP_FLOAT) * 3, 0);
+        }
+        else
+        {
+            // Assume that for x64 linux, an argument is fully in registers
+            // or fully on stack.
+            regNumber argReg = varDsc->GetOtherArgReg();
+
+            // Clear the upper 32 bits by two shift instructions.
+            // argReg = argReg << 96
+            getEmitter()->emitIns_R_I(INS_pslldq, emitActualTypeSize(TYP_SIMD12), argReg, 12);
+            // argReg = argReg >> 96
+            getEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(TYP_SIMD12), argReg, 12);
+        }
+    }
+}
+#endif // FEATURE_SIMD
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+// Generate code for CpObj nodes wich copy structs that have interleaved
+// GC pointers.
+// This will generate a sequence of movsq instructions for the cases of non-gc members
+// and calls to the BY_REF_ASSIGN helper otherwise.
+void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
+{
+    // Make sure we got the arguments of the cpobj operation in the right registers
+    GenTreePtr dstAddr       = cpObjNode->Addr();
+    GenTreePtr source        = cpObjNode->Data();
+    GenTreePtr srcAddr       = nullptr;
+    bool       sourceIsLocal = false;
+
+    assert(source->isContained());
+    if (source->gtOper == GT_IND)
+    {
+        srcAddr = source->gtGetOp1();
+        assert(!srcAddr->isContained());
+    }
+    else
+    {
+        noway_assert(source->IsLocal());
+        sourceIsLocal = true;
+        // TODO: Consider making the addrForm() method in Rationalize public, e.g. in GenTree.
+        // OR: transform source to GT_IND(GT_LCL_VAR_ADDR)
+        if (source->OperGet() == GT_LCL_VAR)
+        {
+            source->SetOper(GT_LCL_VAR_ADDR);
+        }
+        else
+        {
+            assert(source->OperGet() == GT_LCL_FLD);
+            source->SetOper(GT_LCL_FLD_ADDR);
+        }
+        srcAddr = source;
+    }
+
+    bool dstOnStack = dstAddr->OperIsLocalAddr();
+
+#ifdef DEBUG
+    bool isRepMovsqUsed = false;
+
+    assert(!dstAddr->isContained());
+
+    // If the GenTree node has data about GC pointers, this means we're dealing
+    // with CpObj, so this requires special logic.
+    assert(cpObjNode->gtGcPtrCount > 0);
+
+    // MovSq instruction is used for copying non-gcref fields and it needs
+    // src = RSI and dst = RDI.
+    // Either these registers must not contain lclVars, or they must be dying or marked for spill.
+    // This is because these registers are incremented as we go through the struct.
+    GenTree* actualSrcAddr    = srcAddr->gtSkipReloadOrCopy();
+    GenTree* actualDstAddr    = dstAddr->gtSkipReloadOrCopy();
+    unsigned srcLclVarNum     = BAD_VAR_NUM;
+    unsigned dstLclVarNum     = BAD_VAR_NUM;
+    bool     isSrcAddrLiveOut = false;
+    bool     isDstAddrLiveOut = false;
+    if (genIsRegCandidateLocal(actualSrcAddr))
+    {
+        srcLclVarNum     = actualSrcAddr->AsLclVarCommon()->gtLclNum;
+        isSrcAddrLiveOut = ((actualSrcAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0);
+    }
+    if (genIsRegCandidateLocal(actualDstAddr))
+    {
+        dstLclVarNum     = actualDstAddr->AsLclVarCommon()->gtLclNum;
+        isDstAddrLiveOut = ((actualDstAddr->gtFlags & (GTF_VAR_DEATH | GTF_SPILL)) == 0);
+    }
+    assert((actualSrcAddr->gtRegNum != REG_RSI) || !isSrcAddrLiveOut ||
+           ((srcLclVarNum == dstLclVarNum) && !isDstAddrLiveOut));
+    assert((actualDstAddr->gtRegNum != REG_RDI) || !isDstAddrLiveOut ||
+           ((srcLclVarNum == dstLclVarNum) && !isSrcAddrLiveOut));
+#endif // DEBUG
+
+    // Consume these registers.
+    // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
+    if (sourceIsLocal)
+    {
+        inst_RV_TT(INS_lea, REG_RSI, source, 0, EA_BYREF);
+        genConsumeBlockOp(cpObjNode, REG_RDI, REG_NA, REG_NA);
+    }
+    else
+    {
+        genConsumeBlockOp(cpObjNode, REG_RDI, REG_RSI, REG_NA);
+    }
+    gcInfo.gcMarkRegPtrVal(REG_RSI, srcAddr->TypeGet());
+    gcInfo.gcMarkRegPtrVal(REG_RDI, dstAddr->TypeGet());
+
+    unsigned slots = cpObjNode->gtSlots;
+
+    // If we can prove it's on the stack we don't need to use the write barrier.
+    if (dstOnStack)
+    {
+        if (slots >= CPOBJ_NONGC_SLOTS_LIMIT)
+        {
+#ifdef DEBUG
+            // If the destination of the CpObj is on the stack
+            // make sure we allocated RCX to emit rep movsq.
+            regNumber tmpReg = genRegNumFromMask(cpObjNode->gtRsvdRegs & RBM_ALLINT);
+            assert(tmpReg == REG_RCX);
+            isRepMovsqUsed = true;
+#endif // DEBUG
+
+            getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, slots);
+            instGen(INS_r_movsq);
+        }
+        else
+        {
+            // For small structs, it's better to emit a sequence of movsq than to
+            // emit a rep movsq instruction.
+            while (slots > 0)
+            {
+                instGen(INS_movsq);
+                slots--;
+            }
+        }
+    }
+    else
+    {
+        BYTE*    gcPtrs     = cpObjNode->gtGcPtrs;
+        unsigned gcPtrCount = cpObjNode->gtGcPtrCount;
+
+        unsigned i = 0;
+        while (i < slots)
+        {
+            switch (gcPtrs[i])
+            {
+                case TYPE_GC_NONE:
+                    // Let's see if we can use rep movsq instead of a sequence of movsq instructions
+                    // to save cycles and code size.
+                    {
+                        unsigned nonGcSlotCount = 0;
+
+                        do
+                        {
+                            nonGcSlotCount++;
+                            i++;
+                        } while (i < slots && gcPtrs[i] == TYPE_GC_NONE);
+
+                        // If we have a very small contiguous non-gc region, it's better just to
+                        // emit a sequence of movsq instructions
+                        if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
+                        {
+                            while (nonGcSlotCount > 0)
+                            {
+                                instGen(INS_movsq);
+                                nonGcSlotCount--;
+                            }
+                        }
+                        else
+                        {
+#ifdef DEBUG
+                            // Otherwise, we can save code-size and improve CQ by emitting
+                            // rep movsq
+                            regNumber tmpReg = genRegNumFromMask(cpObjNode->gtRsvdRegs & RBM_ALLINT);
+                            assert(tmpReg == REG_RCX);
+                            isRepMovsqUsed = true;
+#endif // DEBUG
+                            getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount);
+                            instGen(INS_r_movsq);
+                        }
+                    }
+                    break;
+                default:
+                    // We have a GC pointer, call the memory barrier.
+                    genEmitHelperCall(CORINFO_HELP_ASSIGN_BYREF, 0, EA_PTRSIZE);
+                    gcPtrCount--;
+                    i++;
+            }
+        }
+
+        assert(gcPtrCount == 0);
+    }
+
+    // Clear the gcInfo for RSI and RDI.
+    // While we normally update GC info prior to the last instruction that uses them,
+    // these actually live into the helper call.
+    gcInfo.gcMarkRegSetNpt(RBM_RSI);
+    gcInfo.gcMarkRegSetNpt(RBM_RDI);
+}
+
+// Generate code for a CpBlk node by the means of the VM memcpy helper call
+// Preconditions:
+// a) The size argument of the CpBlk is not an integer constant
+// b) The size argument is a constant but is larger than CPBLK_MOVS_LIMIT bytes.
+void CodeGen::genCodeForCpBlk(GenTreeBlk* cpBlkNode)
+{
+#ifdef _TARGET_AMD64_
+    // Make sure we got the arguments of the cpblk operation in the right registers
+    unsigned   blockSize = cpBlkNode->Size();
+    GenTreePtr dstAddr   = cpBlkNode->Addr();
+    GenTreePtr source    = cpBlkNode->Data();
+    GenTreePtr srcAddr   = nullptr;
+
+    // Size goes in arg2
+    if (blockSize != 0)
+    {
+        assert(blockSize >= CPBLK_MOVS_LIMIT);
+        assert((cpBlkNode->gtRsvdRegs & RBM_ARG_2) != 0);
+    }
+    else
+    {
+        noway_assert(cpBlkNode->gtOper == GT_STORE_DYN_BLK);
+    }
+
+    // Source address goes in arg1
+    if (source->gtOper == GT_IND)
+    {
+        srcAddr = source->gtGetOp1();
+        assert(!srcAddr->isContained());
+    }
+    else
+    {
+        noway_assert(source->IsLocal());
+        assert((cpBlkNode->gtRsvdRegs & RBM_ARG_1) != 0);
+        inst_RV_TT(INS_lea, REG_ARG_1, source, 0, EA_BYREF);
+    }
+
+    genConsumeBlockOp(cpBlkNode, REG_ARG_0, REG_ARG_1, REG_ARG_2);
+
+    genEmitHelperCall(CORINFO_HELP_MEMCPY, 0, EA_UNKNOWN);
+#else  // !_TARGET_AMD64_
+    noway_assert(false && "Helper call for CpBlk is not needed.");
+#endif // !_TARGET_AMD64_
+}
+
+// generate code do a switch statement based on a table of ip-relative offsets
+void CodeGen::genTableBasedSwitch(GenTree* treeNode)
+{
+    genConsumeOperands(treeNode->AsOp());
+    regNumber idxReg  = treeNode->gtOp.gtOp1->gtRegNum;
+    regNumber baseReg = treeNode->gtOp.gtOp2->gtRegNum;
+
+    regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
+
+    // load the ip-relative offset (which is relative to start of fgFirstBB)
+    getEmitter()->emitIns_R_ARX(INS_mov, EA_4BYTE, baseReg, baseReg, idxReg, 4, 0);
+
+    // add it to the absolute address of fgFirstBB
+    compiler->fgFirstBB->bbFlags |= BBF_JMP_TARGET;
+    getEmitter()->emitIns_R_L(INS_lea, EA_PTR_DSP_RELOC, compiler->fgFirstBB, tmpReg);
+    getEmitter()->emitIns_R_R(INS_add, EA_PTRSIZE, baseReg, tmpReg);
+    // jmp baseReg
+    getEmitter()->emitIns_R(INS_i_jmp, emitTypeSize(TYP_I_IMPL), baseReg);
+}
+
+// emits the table and an instruction to get the address of the first element
+void CodeGen::genJumpTable(GenTree* treeNode)
+{
+    noway_assert(compiler->compCurBB->bbJumpKind == BBJ_SWITCH);
+    assert(treeNode->OperGet() == GT_JMPTABLE);
+
+    unsigned     jumpCount = compiler->compCurBB->bbJumpSwt->bbsCount;
+    BasicBlock** jumpTable = compiler->compCurBB->bbJumpSwt->bbsDstTab;
+    unsigned     jmpTabOffs;
+    unsigned     jmpTabBase;
+
+    jmpTabBase = getEmitter()->emitBBTableDataGenBeg(jumpCount, true);
+
+    jmpTabOffs = 0;
+
+    JITDUMP("\n      J_M%03u_DS%02u LABEL   DWORD\n", Compiler::s_compMethodsCount, jmpTabBase);
+
+    for (unsigned i = 0; i < jumpCount; i++)
+    {
+        BasicBlock* target = *jumpTable++;
+        noway_assert(target->bbFlags & BBF_JMP_TARGET);
+
+        JITDUMP("            DD      L_M%03u_BB%02u\n", Compiler::s_compMethodsCount, target->bbNum);
+
+        getEmitter()->emitDataGenData(i, target);
+    };
+
+    getEmitter()->emitDataGenEnd();
+
+    // Access to inline data is 'abstracted' by a special type of static member
+    // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
+    // to constant data, not a real static field.
+    getEmitter()->emitIns_R_C(INS_lea, emitTypeSize(TYP_I_IMPL), treeNode->gtRegNum,
+                              compiler->eeFindJitDataOffs(jmpTabBase), 0);
+    genProduceReg(treeNode);
+}
+
+// generate code for the locked operations:
+// GT_LOCKADD, GT_XCHG, GT_XADD
+void CodeGen::genLockedInstructions(GenTree* treeNode)
+{
+    GenTree*    data      = treeNode->gtOp.gtOp2;
+    GenTree*    addr      = treeNode->gtOp.gtOp1;
+    regNumber   targetReg = treeNode->gtRegNum;
+    regNumber   dataReg   = data->gtRegNum;
+    regNumber   addrReg   = addr->gtRegNum;
+    instruction ins;
+
+    // all of these nodes implicitly do an indirection on op1
+    // so create a temporary node to feed into the pattern matching
+    GenTreeIndir i = indirForm(data->TypeGet(), addr);
+    genConsumeReg(addr);
+
+    // The register allocator should have extended the lifetime of the address
+    // so that it is not used as the target.
+    noway_assert(addrReg != targetReg);
+
+    // If data is a lclVar that's not a last use, we'd better have allocated a register
+    // for the result (except in the case of GT_LOCKADD which does not produce a register result).
+    assert(targetReg != REG_NA || treeNode->OperGet() == GT_LOCKADD || !genIsRegCandidateLocal(data) ||
+           (data->gtFlags & GTF_VAR_DEATH) != 0);
+
+    genConsumeIfReg(data);
+    if (targetReg != REG_NA && dataReg != REG_NA && dataReg != targetReg)
+    {
+        inst_RV_RV(ins_Copy(data->TypeGet()), targetReg, dataReg);
+        data->gtRegNum = targetReg;
+
+        // TODO-XArch-Cleanup: Consider whether it is worth it, for debugging purposes, to restore the
+        // original gtRegNum on data, after calling emitInsBinary below.
+    }
+    switch (treeNode->OperGet())
+    {
+        case GT_LOCKADD:
+            instGen(INS_lock);
+            ins = INS_add;
+            break;
+        case GT_XCHG:
+            // lock is implied by xchg
+            ins = INS_xchg;
+            break;
+        case GT_XADD:
+            instGen(INS_lock);
+            ins = INS_xadd;
+            break;
+        default:
+            unreached();
+    }
+    getEmitter()->emitInsBinary(ins, emitTypeSize(data), &i, data);
+
+    if (treeNode->gtRegNum != REG_NA)
+    {
+        genProduceReg(treeNode);
+    }
+}
+
+// generate code for BoundsCheck nodes
+void CodeGen::genRangeCheck(GenTreePtr oper)
+{
+#ifdef FEATURE_SIMD
+    noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK || oper->OperGet() == GT_SIMD_CHK);
+#else  // !FEATURE_SIMD
+    noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK);
+#endif // !FEATURE_SIMD
+
+    GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
+
+    GenTreePtr arrLen    = bndsChk->gtArrLen;
+    GenTreePtr arrIndex  = bndsChk->gtIndex;
+    GenTreePtr arrRef    = nullptr;
+    int        lenOffset = 0;
+
+    GenTree *    src1, *src2;
+    emitJumpKind jmpKind;
+
+    genConsumeRegs(arrLen);
+    genConsumeRegs(arrIndex);
+
+    if (arrIndex->isContainedIntOrIImmed())
+    {
+        // arrIndex is a contained constant.  In this case
+        // we will generate one of the following
+        //      cmp [mem], immed    (if arrLen is a memory op)
+        //      cmp reg, immed      (if arrLen is in a reg)
+        //
+        // That is arrLen cannot be a contained immed.
+        assert(!arrLen->isContainedIntOrIImmed());
+
+        src1    = arrLen;
+        src2    = arrIndex;
+        jmpKind = EJ_jbe;
+    }
+    else
+    {
+        // arrIndex could either be a contained memory op or a reg
+        // In this case we will generate one of the following
+        //      cmp  [mem], immed   (if arrLen is a constant)
+        //      cmp  [mem], reg     (if arrLen is in a reg)
+        //      cmp  reg, immed     (if arrIndex is in a reg)
+        //      cmp  reg1, reg2     (if arraIndex is in reg1)
+        //      cmp  reg, [mem]     (if arrLen is a memory op)
+        //
+        // That is only one of arrIndex or arrLen can be a memory op.
+        assert(!arrIndex->isContainedMemoryOp() || !arrLen->isContainedMemoryOp());
+
+        src1    = arrIndex;
+        src2    = arrLen;
+        jmpKind = EJ_jae;
+    }
+
+    var_types bndsChkType = src2->TypeGet();
+#if DEBUG
+    // Bounds checks can only be 32 or 64 bit sized comparisons.
+    assert(bndsChkType == TYP_INT || bndsChkType == TYP_LONG);
+
+    // The type of the bounds check should always wide enough to compare against the index.
+    assert(emitTypeSize(bndsChkType) >= emitTypeSize(src1->TypeGet()));
+#endif // DEBUG
+
+    getEmitter()->emitInsBinary(INS_cmp, emitTypeSize(bndsChkType), src1, src2);
+    genJumpToThrowHlpBlk(jmpKind, bndsChk->gtThrowKind, bndsChk->gtIndRngFailBB);
+}
+
+//------------------------------------------------------------------------
+// genOffsetOfMDArrayLowerBound: Returns the offset from the Array object to the
+//   lower bound for the given dimension.
+//
+// Arguments:
+//    elemType  - the element type of the array
+//    rank      - the rank of the array
+//    dimension - the dimension for which the lower bound offset will be returned.
+//
+// Return Value:
+//    The offset.
+
+unsigned CodeGen::genOffsetOfMDArrayLowerBound(var_types elemType, unsigned rank, unsigned dimension)
+{
+    // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets.
+    return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * (dimension + rank);
+}
+
+//------------------------------------------------------------------------
+// genOffsetOfMDArrayLength: Returns the offset from the Array object to the
+//   size for the given dimension.
+//
+// Arguments:
+//    elemType  - the element type of the array
+//    rank      - the rank of the array
+//    dimension - the dimension for which the lower bound offset will be returned.
+//
+// Return Value:
+//    The offset.
+
+unsigned CodeGen::genOffsetOfMDArrayDimensionSize(var_types elemType, unsigned rank, unsigned dimension)
+{
+    // Note that the lower bound and length fields of the Array object are always TYP_INT, even on 64-bit targets.
+    return compiler->eeGetArrayDataOffset(elemType) + genTypeSize(TYP_INT) * dimension;
+}
+
+//------------------------------------------------------------------------
+// genCodeForArrIndex: Generates code to bounds check the index for one dimension of an array reference,
+//                     producing the effective index by subtracting the lower bound.
+//
+// Arguments:
+//    arrIndex - the node for which we're generating code
+//
+// Return Value:
+//    None.
+//
+
+void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex)
+{
+    GenTreePtr arrObj    = arrIndex->ArrObj();
+    GenTreePtr indexNode = arrIndex->IndexExpr();
+
+    regNumber arrReg   = genConsumeReg(arrObj);
+    regNumber indexReg = genConsumeReg(indexNode);
+    regNumber tgtReg   = arrIndex->gtRegNum;
+
+    unsigned  dim      = arrIndex->gtCurrDim;
+    unsigned  rank     = arrIndex->gtArrRank;
+    var_types elemType = arrIndex->gtArrElemType;
+
+    noway_assert(tgtReg != REG_NA);
+
+    // Subtract the lower bound for this dimension.
+    // TODO-XArch-CQ: make this contained if it's an immediate that fits.
+    if (tgtReg != indexReg)
+    {
+        inst_RV_RV(INS_mov, tgtReg, indexReg, indexNode->TypeGet());
+    }
+    getEmitter()->emitIns_R_AR(INS_sub, emitActualTypeSize(TYP_INT), tgtReg, arrReg,
+                               genOffsetOfMDArrayLowerBound(elemType, rank, dim));
+    getEmitter()->emitIns_R_AR(INS_cmp, emitActualTypeSize(TYP_INT), tgtReg, arrReg,
+                               genOffsetOfMDArrayDimensionSize(elemType, rank, dim));
+    genJumpToThrowHlpBlk(EJ_jae, SCK_RNGCHK_FAIL);
+
+    genProduceReg(arrIndex);
+}
+
+//------------------------------------------------------------------------
+// genCodeForArrOffset: Generates code to compute the flattened array offset for
+//    one dimension of an array reference:
+//        result = (prevDimOffset * dimSize) + effectiveIndex
+//    where dimSize is obtained from the arrObj operand
+//
+// Arguments:
+//    arrOffset - the node for which we're generating code
+//
+// Return Value:
+//    None.
+//
+// Notes:
+//    dimSize and effectiveIndex are always non-negative, the former by design,
+//    and the latter because it has been normalized to be zero-based.
+
+void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset)
+{
+    GenTreePtr offsetNode = arrOffset->gtOffset;
+    GenTreePtr indexNode  = arrOffset->gtIndex;
+    GenTreePtr arrObj     = arrOffset->gtArrObj;
+
+    regNumber tgtReg = arrOffset->gtRegNum;
+
+    noway_assert(tgtReg != REG_NA);
+
+    unsigned  dim      = arrOffset->gtCurrDim;
+    unsigned  rank     = arrOffset->gtArrRank;
+    var_types elemType = arrOffset->gtArrElemType;
+
+    // We will use a temp register for the offset*scale+effectiveIndex computation.
+    regMaskTP tmpRegMask = arrOffset->gtRsvdRegs;
+    regNumber tmpReg     = genRegNumFromMask(tmpRegMask);
+
+    // First, consume the operands in the correct order.
+    regNumber offsetReg = REG_NA;
+    if (!offsetNode->IsIntegralConst(0))
+    {
+        offsetReg = genConsumeReg(offsetNode);
+    }
+    else
+    {
+        assert(offsetNode->isContained());
+    }
+    regNumber indexReg = genConsumeReg(indexNode);
+    // Although arrReg may not be used in the constant-index case, if we have generated
+    // the value into a register, we must consume it, otherwise we will fail to end the
+    // live range of the gc ptr.
+    // TODO-CQ: Currently arrObj will always have a register allocated to it.
+    // We could avoid allocating a register for it, which would be of value if the arrObj
+    // is an on-stack lclVar.
+    regNumber arrReg = REG_NA;
+    if (arrObj->gtHasReg())
+    {
+        arrReg = genConsumeReg(arrObj);
+    }
+
+    if (!offsetNode->IsIntegralConst(0))
+    {
+        // Evaluate tgtReg = offsetReg*dim_size + indexReg.
+        // tmpReg is used to load dim_size and the result of the multiplication.
+        // Note that dim_size will never be negative.
+
+        getEmitter()->emitIns_R_AR(INS_mov, emitActualTypeSize(TYP_INT), tmpReg, arrReg,
+                                   genOffsetOfMDArrayDimensionSize(elemType, rank, dim));
+        inst_RV_RV(INS_imul, tmpReg, offsetReg);
+
+        if (tmpReg == tgtReg)
+        {
+            inst_RV_RV(INS_add, tmpReg, indexReg);
+        }
+        else
+        {
+            if (indexReg != tgtReg)
+            {
+                inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_I_IMPL);
+            }
+            inst_RV_RV(INS_add, tgtReg, tmpReg);
+        }
+    }
+    else
+    {
+        if (indexReg != tgtReg)
+        {
+            inst_RV_RV(INS_mov, tgtReg, indexReg, TYP_INT);
+        }
+    }
+    genProduceReg(arrOffset);
+}
+
+// make a temporary indir we can feed to pattern matching routines
+// in cases where we don't want to instantiate all the indirs that happen
+//
+GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base)
+{
+    GenTreeIndir i(GT_IND, type, base, nullptr);
+    i.gtRegNum = REG_NA;
+    // has to be nonnull (because contained nodes can't be the last in block)
+    // but don't want it to be a valid pointer
+    i.gtNext = (GenTree*)(-1);
+    return i;
+}
+
+// make a temporary int we can feed to pattern matching routines
+// in cases where we don't want to instantiate
+//
+GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value)
+{
+    GenTreeIntCon i(type, value);
+    i.gtRegNum = REG_NA;
+    // has to be nonnull (because contained nodes can't be the last in block)
+    // but don't want it to be a valid pointer
+    i.gtNext = (GenTree*)(-1);
+    return i;
+}
+
+instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
+{
+    instruction ins;
+
+    // Operations on SIMD vectors shouldn't come this path
+    assert(!varTypeIsSIMD(type));
+    if (varTypeIsFloating(type))
+    {
+        return ins_MathOp(oper, type);
+    }
+
+    switch (oper)
+    {
+        case GT_ADD:
+            ins = INS_add;
+            break;
+        case GT_AND:
+            ins = INS_and;
+            break;
+        case GT_LSH:
+            ins = INS_shl;
+            break;
+        case GT_MUL:
+            ins = INS_imul;
+            break;
+        case GT_NEG:
+            ins = INS_neg;
+            break;
+        case GT_NOT:
+            ins = INS_not;
+            break;
+        case GT_OR:
+            ins = INS_or;
+            break;
+        case GT_ROL:
+            ins = INS_rol;
+            break;
+        case GT_ROR:
+            ins = INS_ror;
+            break;
+        case GT_RSH:
+            ins = INS_sar;
+            break;
+        case GT_RSZ:
+            ins = INS_shr;
+            break;
+        case GT_SUB:
+            ins = INS_sub;
+            break;
+        case GT_XOR:
+            ins = INS_xor;
+            break;
+#if !defined(_TARGET_64BIT_)
+        case GT_ADD_LO:
+            ins = INS_add;
+            break;
+        case GT_ADD_HI:
+            ins = INS_adc;
+            break;
+        case GT_SUB_LO:
+            ins = INS_sub;
+            break;
+        case GT_SUB_HI:
+            ins = INS_sbb;
+            break;
+#endif // !defined(_TARGET_64BIT_)
+        default:
+            unreached();
+            break;
+    }
+    return ins;
+}
+
+//------------------------------------------------------------------------
+// genCodeForShift: Generates the code sequence for a GenTree node that
+// represents a bit shift or rotate operation (<<, >>, >>>, rol, ror).
+//
+// Arguments:
+//    tree - the bit shift node (that specifies the type of bit shift to perform).
+//
+// Assumptions:
+//    a) All GenTrees are register allocated.
+//    b) The shift-by-amount in tree->gtOp.gtOp2 is either a contained constant or
+//       it's a register-allocated expression. If it is in a register that is
+//       not RCX, it will be moved to RCX (so RCX better not be in use!).
+//
+void CodeGen::genCodeForShift(GenTreePtr tree)
+{
+    // Only the non-RMW case here.
+    assert(tree->OperIsShiftOrRotate());
+    assert(!tree->gtOp.gtOp1->isContained());
+    assert(tree->gtRegNum != REG_NA);
+
+    genConsumeOperands(tree->AsOp());
+
+    var_types   targetType = tree->TypeGet();
+    instruction ins        = genGetInsForOper(tree->OperGet(), targetType);
+
+    GenTreePtr operand    = tree->gtGetOp1();
+    regNumber  operandReg = operand->gtRegNum;
+
+    GenTreePtr shiftBy = tree->gtGetOp2();
+    if (shiftBy->isContainedIntOrIImmed())
+    {
+        // First, move the operand to the destination register and
+        // later on perform the shift in-place.
+        // (LSRA will try to avoid this situation through preferencing.)
+        if (tree->gtRegNum != operandReg)
+        {
+            inst_RV_RV(INS_mov, tree->gtRegNum, operandReg, targetType);
+        }
+
+        int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue();
+        inst_RV_SH(ins, emitTypeSize(tree), tree->gtRegNum, shiftByValue);
+    }
+    else
+    {
+        // We must have the number of bits to shift stored in ECX, since we constrained this node to
+        // sit in ECX. In case this didn't happen, LSRA expects the code generator to move it since it's a single
+        // register destination requirement.
+        regNumber shiftReg = shiftBy->gtRegNum;
+        if (shiftReg != REG_RCX)
+        {
+            // Issue the mov to RCX:
+            inst_RV_RV(INS_mov, REG_RCX, shiftReg, shiftBy->TypeGet());
+        }
+
+        // The operand to be shifted must not be in ECX
+        noway_assert(operandReg != REG_RCX);
+
+        if (tree->gtRegNum != operandReg)
+        {
+            inst_RV_RV(INS_mov, tree->gtRegNum, operandReg, targetType);
+        }
+        inst_RV_CL(ins, tree->gtRegNum, targetType);
+    }
+
+    genProduceReg(tree);
+}
+
+//------------------------------------------------------------------------
+// genCodeForShiftRMW: Generates the code sequence for a GT_STOREIND GenTree node that
+// represents a RMW bit shift or rotate operation (<<, >>, >>>, rol, ror), for example:
+//      GT_STOREIND( AddressTree, GT_SHL( Ind ( AddressTree ), Operand ) )
+//
+// Arguments:
+//    storeIndNode: the GT_STOREIND node.
+//
+void CodeGen::genCodeForShiftRMW(GenTreeStoreInd* storeInd)
+{
+    GenTree* data = storeInd->Data();
+    GenTree* addr = storeInd->Addr();
+
+    assert(data->OperIsShiftOrRotate());
+
+    // This function only handles the RMW case.
+    assert(data->gtOp.gtOp1->isContained());
+    assert(data->gtOp.gtOp1->isIndir());
+    assert(Lowering::IndirsAreEquivalent(data->gtOp.gtOp1, storeInd));
+    assert(data->gtRegNum == REG_NA);
+
+    var_types   targetType = data->TypeGet();
+    genTreeOps  oper       = data->OperGet();
+    instruction ins        = genGetInsForOper(oper, targetType);
+    emitAttr    attr       = EA_ATTR(genTypeSize(targetType));
+
+    GenTree* shiftBy = data->gtOp.gtOp2;
+    if (shiftBy->isContainedIntOrIImmed())
+    {
+        int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue();
+        ins              = genMapShiftInsToShiftByConstantIns(ins, shiftByValue);
+        if (shiftByValue == 1)
+        {
+            // There is no source in this case, as the shift by count is embedded in the instruction opcode itself.
+            getEmitter()->emitInsRMW(ins, attr, storeInd);
+        }
+        else
+        {
+            getEmitter()->emitInsRMW(ins, attr, storeInd, shiftBy);
+        }
+    }
+    else
+    {
+        // We must have the number of bits to shift stored in ECX, since we constrained this node to
+        // sit in ECX. In case this didn't happen, LSRA expects the code generator to move it since it's a single
+        // register destination requirement.
+        regNumber shiftReg = shiftBy->gtRegNum;
+        if (shiftReg != REG_RCX)
+        {
+            // Issue the mov to RCX:
+            inst_RV_RV(INS_mov, REG_RCX, shiftReg, shiftBy->TypeGet());
+        }
+
+        // The shiftBy operand is implicit, so call the unary version of emitInsRMW.
+        getEmitter()->emitInsRMW(ins, attr, storeInd);
+    }
+}
+
+void CodeGen::genUnspillRegIfNeeded(GenTree* tree)
+{
+    regNumber dstReg      = tree->gtRegNum;
+    GenTree*  unspillTree = tree;
+
+    if (tree->gtOper == GT_RELOAD)
+    {
+        unspillTree = tree->gtOp.gtOp1;
+    }
+
+    if ((unspillTree->gtFlags & GTF_SPILLED) != 0)
+    {
+        if (genIsRegCandidateLocal(unspillTree))
+        {
+            // Reset spilled flag, since we are going to load a local variable from its home location.
+            unspillTree->gtFlags &= ~GTF_SPILLED;
+
+            GenTreeLclVarCommon* lcl    = unspillTree->AsLclVarCommon();
+            LclVarDsc*           varDsc = &compiler->lvaTable[lcl->gtLclNum];
+
+            // Load local variable from its home location.
+            // In most cases the tree type will indicate the correct type to use for the load.
+            // However, if it is NOT a normalizeOnLoad lclVar (i.e. NOT a small int that always gets
+            // widened when loaded into a register), and its size is not the same as genActualType of
+            // the type of the lclVar, then we need to change the type of the tree node when loading.
+            // This situation happens due to "optimizations" that avoid a cast and
+            // simply retype the node when using long type lclVar as an int.
+            // While loading the int in that case would work for this use of the lclVar, if it is
+            // later used as a long, we will have incorrectly truncated the long.
+            // In the normalizeOnLoad case ins_Load will return an appropriate sign- or zero-
+            // extending load.
+
+            var_types treeType = unspillTree->TypeGet();
+            if (treeType != genActualType(varDsc->lvType) && !varTypeIsGC(treeType) && !varDsc->lvNormalizeOnLoad())
+            {
+                assert(!varTypeIsGC(varDsc));
+                var_types spillType = genActualType(varDsc->lvType);
+                unspillTree->gtType = spillType;
+                inst_RV_TT(ins_Load(spillType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), dstReg, unspillTree);
+                unspillTree->gtType = treeType;
+            }
+            else
+            {
+                inst_RV_TT(ins_Load(treeType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum)), dstReg, unspillTree);
+            }
+
+            unspillTree->SetInReg();
+
+            // TODO-Review: We would like to call:
+            //      genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(tree));
+            // instead of the following code, but this ends up hitting this assert:
+            //      assert((regSet.rsMaskVars & regMask) == 0);
+            // due to issues with LSRA resolution moves.
+            // So, just force it for now. This probably indicates a condition that creates a GC hole!
+            //
+            // Extra note: I think we really want to call something like gcInfo.gcUpdateForRegVarMove,
+            // because the variable is not really going live or dead, but that method is somewhat poorly
+            // factored because it, in turn, updates rsMaskVars which is part of RegSet not GCInfo.
+            // TODO-Cleanup: This code exists in other CodeGen*.cpp files, and should be moved to CodeGenCommon.cpp.
+
+            // Don't update the variable's location if we are just re-spilling it again.
+
+            if ((unspillTree->gtFlags & GTF_SPILL) == 0)
+            {
+                genUpdateVarReg(varDsc, tree);
+#ifdef DEBUG
+                if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
+                {
+                    JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", lcl->gtLclNum);
+                }
+#endif // DEBUG
+                VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
+
+#ifdef DEBUG
+                if (compiler->verbose)
+                {
+                    printf("\t\t\t\t\t\t\tV%02u in reg ", lcl->gtLclNum);
+                    varDsc->PrintVarReg();
+                    printf(" is becoming live  ");
+                    compiler->printTreeID(unspillTree);
+                    printf("\n");
+                }
+#endif // DEBUG
+
+                regSet.AddMaskVars(genGetRegMask(varDsc));
+            }
+
+            gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
+        }
+        else if (unspillTree->IsMultiRegCall())
+        {
+            GenTreeCall*         call        = unspillTree->AsCall();
+            ReturnTypeDesc*      retTypeDesc = call->GetReturnTypeDesc();
+            unsigned             regCount    = retTypeDesc->GetReturnRegCount();
+            GenTreeCopyOrReload* reloadTree  = nullptr;
+            if (tree->OperGet() == GT_RELOAD)
+            {
+                reloadTree = tree->AsCopyOrReload();
+            }
+
+            // In case of multi-reg call node, GTF_SPILLED flag on it indicates that
+            // one or more of its result regs are spilled.  Call node needs to be
+            // queried to know which specific result regs to be unspilled.
+            for (unsigned i = 0; i < regCount; ++i)
+            {
+                unsigned flags = call->GetRegSpillFlagByIdx(i);
+                if ((flags & GTF_SPILLED) != 0)
+                {
+                    var_types dstType        = retTypeDesc->GetReturnRegType(i);
+                    regNumber unspillTreeReg = call->GetRegNumByIdx(i);
+
+                    if (reloadTree != nullptr)
+                    {
+                        dstReg = reloadTree->GetRegNumByIdx(i);
+                        if (dstReg == REG_NA)
+                        {
+                            dstReg = unspillTreeReg;
+                        }
+                    }
+                    else
+                    {
+                        dstReg = unspillTreeReg;
+                    }
+
+                    TempDsc* t = regSet.rsUnspillInPlace(call, unspillTreeReg, i);
+                    getEmitter()->emitIns_R_S(ins_Load(dstType), emitActualTypeSize(dstType), dstReg, t->tdTempNum(),
+                                              0);
+                    compiler->tmpRlsTemp(t);
+                    gcInfo.gcMarkRegPtrVal(dstReg, dstType);
+                }
+            }
+
+            unspillTree->gtFlags &= ~GTF_SPILLED;
+            unspillTree->SetInReg();
+        }
+        else
+        {
+            TempDsc* t = regSet.rsUnspillInPlace(unspillTree, unspillTree->gtRegNum);
+            getEmitter()->emitIns_R_S(ins_Load(unspillTree->gtType), emitActualTypeSize(unspillTree->TypeGet()), dstReg,
+                                      t->tdTempNum(), 0);
+            compiler->tmpRlsTemp(t);
+
+            unspillTree->gtFlags &= ~GTF_SPILLED;
+            unspillTree->SetInReg();
+            gcInfo.gcMarkRegPtrVal(dstReg, unspillTree->TypeGet());
+        }
+    }
+}
+
+// Do Liveness update for a subnodes that is being consumed by codegen
+// including the logic for reload in case is needed and also takes care
+// of locating the value on the desired register.
+void CodeGen::genConsumeRegAndCopy(GenTree* tree, regNumber needReg)
+{
+    if (needReg == REG_NA)
+    {
+        return;
+    }
+    regNumber treeReg = genConsumeReg(tree);
+    if (treeReg != needReg)
+    {
+        inst_RV_RV(INS_mov, needReg, treeReg, tree->TypeGet());
+    }
+}
+
+void CodeGen::genRegCopy(GenTree* treeNode)
+{
+    assert(treeNode->OperGet() == GT_COPY);
+    GenTree* op1 = treeNode->gtOp.gtOp1;
+
+    if (op1->IsMultiRegCall())
+    {
+        genConsumeReg(op1);
+
+        GenTreeCopyOrReload* copyTree    = treeNode->AsCopyOrReload();
+        GenTreeCall*         call        = op1->AsCall();
+        ReturnTypeDesc*      retTypeDesc = call->GetReturnTypeDesc();
+        unsigned             regCount    = retTypeDesc->GetReturnRegCount();
+
+        for (unsigned i = 0; i < regCount; ++i)
+        {
+            var_types type    = retTypeDesc->GetReturnRegType(i);
+            regNumber fromReg = call->GetRegNumByIdx(i);
+            regNumber toReg   = copyTree->GetRegNumByIdx(i);
+
+            // A Multi-reg GT_COPY node will have valid reg only for those
+            // positions that corresponding result reg of call node needs
+            // to be copied.
+            if (toReg != REG_NA)
+            {
+                assert(toReg != fromReg);
+                inst_RV_RV(ins_Copy(type), toReg, fromReg, type);
+            }
+        }
+    }
+    else
+    {
+        var_types targetType = treeNode->TypeGet();
+        regNumber targetReg  = treeNode->gtRegNum;
+        assert(targetReg != REG_NA);
+
+        // Check whether this node and the node from which we're copying the value have
+        // different register types. This can happen if (currently iff) we have a SIMD
+        // vector type that fits in an integer register, in which case it is passed as
+        // an argument, or returned from a call, in an integer register and must be
+        // copied if it's in an xmm register.
+
+        bool srcFltReg = (varTypeIsFloating(op1) || varTypeIsSIMD(op1));
+        bool tgtFltReg = (varTypeIsFloating(treeNode) || varTypeIsSIMD(treeNode));
+        if (srcFltReg != tgtFltReg)
+        {
+            instruction ins;
+            regNumber   fpReg;
+            regNumber   intReg;
+            if (tgtFltReg)
+            {
+                ins    = ins_CopyIntToFloat(op1->TypeGet(), treeNode->TypeGet());
+                fpReg  = targetReg;
+                intReg = op1->gtRegNum;
+            }
+            else
+            {
+                ins    = ins_CopyFloatToInt(op1->TypeGet(), treeNode->TypeGet());
+                intReg = targetReg;
+                fpReg  = op1->gtRegNum;
+            }
+            inst_RV_RV(ins, fpReg, intReg, targetType);
+        }
+        else
+        {
+            inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType);
+        }
+
+        if (op1->IsLocal())
+        {
+            // The lclVar will never be a def.
+            // If it is a last use, the lclVar will be killed by genConsumeReg(), as usual, and genProduceReg will
+            // appropriately set the gcInfo for the copied value.
+            // If not, there are two cases we need to handle:
+            // - If this is a TEMPORARY copy (indicated by the GTF_VAR_DEATH flag) the variable
+            //   will remain live in its original register.
+            //   genProduceReg() will appropriately set the gcInfo for the copied value,
+            //   and genConsumeReg will reset it.
+            // - Otherwise, we need to update register info for the lclVar.
+
+            GenTreeLclVarCommon* lcl = op1->AsLclVarCommon();
+            assert((lcl->gtFlags & GTF_VAR_DEF) == 0);
+
+            if ((lcl->gtFlags & GTF_VAR_DEATH) == 0 && (treeNode->gtFlags & GTF_VAR_DEATH) == 0)
+            {
+                LclVarDsc* varDsc = &compiler->lvaTable[lcl->gtLclNum];
+
+                // If we didn't just spill it (in genConsumeReg, above), then update the register info
+                if (varDsc->lvRegNum != REG_STK)
+                {
+                    // The old location is dying
+                    genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(op1));
+
+                    gcInfo.gcMarkRegSetNpt(genRegMask(op1->gtRegNum));
+
+                    genUpdateVarReg(varDsc, treeNode);
+
+                    // The new location is going live
+                    genUpdateRegLife(varDsc, /*isBorn*/ true, /*isDying*/ false DEBUGARG(treeNode));
+                }
+            }
+        }
+    }
+
+    genProduceReg(treeNode);
+}
+
+// Check that registers are consumed in the right order for the current node being generated.
+#ifdef DEBUG
+void CodeGen::genCheckConsumeNode(GenTree* treeNode)
+{
+    // GT_PUTARG_REG is consumed out of order.
+    if (treeNode->gtSeqNum != 0 && treeNode->OperGet() != GT_PUTARG_REG)
+    {
+        if (lastConsumedNode != nullptr)
+        {
+            if (treeNode == lastConsumedNode)
+            {
+                if (verbose)
+                {
+                    printf("Node was consumed twice:\n    ");
+                    compiler->gtDispTree(treeNode, nullptr, nullptr, true);
+                }
+            }
+            else
+            {
+                if (verbose && (lastConsumedNode->gtSeqNum > treeNode->gtSeqNum))
+                {
+                    printf("Nodes were consumed out-of-order:\n");
+                    compiler->gtDispTree(lastConsumedNode, nullptr, nullptr, true);
+                    compiler->gtDispTree(treeNode, nullptr, nullptr, true);
+                }
+                // assert(lastConsumedNode->gtSeqNum < treeNode->gtSeqNum);
+            }
+        }
+        lastConsumedNode = treeNode;
+    }
+}
+#endif // DEBUG
+
+//--------------------------------------------------------------------
+// genConsumeReg: Do liveness update for a subnode that is being
+// consumed by codegen.
+//
+// Arguments:
+//    tree - GenTree node
+//
+// Return Value:
+//    Returns the reg number of tree.
+//    In case of multi-reg call node returns the first reg number
+//    of the multi-reg return.
+regNumber CodeGen::genConsumeReg(GenTree* tree)
+{
+    if (tree->OperGet() == GT_COPY)
+    {
+        genRegCopy(tree);
+    }
+
+    // Handle the case where we have a lclVar that needs to be copied before use (i.e. because it
+    // interferes with one of the other sources (or the target, if it's a "delayed use" register)).
+    // TODO-Cleanup: This is a special copyReg case in LSRA - consider eliminating these and
+    // always using GT_COPY to make the lclVar location explicit.
+    // Note that we have to do this before calling genUpdateLife because otherwise if we spill it
+    // the lvRegNum will be set to REG_STK and we will lose track of what register currently holds
+    // the lclVar (normally when a lclVar is spilled it is then used from its former register
+    // location, which matches the gtRegNum on the node).
+    // (Note that it doesn't matter if we call this before or after genUnspillRegIfNeeded
+    // because if it's on the stack it will always get reloaded into tree->gtRegNum).
+    if (genIsRegCandidateLocal(tree))
+    {
+        GenTreeLclVarCommon* lcl    = tree->AsLclVarCommon();
+        LclVarDsc*           varDsc = &compiler->lvaTable[lcl->GetLclNum()];
+        if (varDsc->lvRegNum != REG_STK && varDsc->lvRegNum != tree->gtRegNum)
+        {
+            inst_RV_RV(INS_mov, tree->gtRegNum, varDsc->lvRegNum);
+        }
+    }
+
+    genUnspillRegIfNeeded(tree);
+
+    // genUpdateLife() will also spill local var if marked as GTF_SPILL by calling CodeGen::genSpillVar
+    genUpdateLife(tree);
+
+    assert(tree->gtHasReg());
+
+    // there are three cases where consuming a reg means clearing the bit in the live mask
+    // 1. it was not produced by a local
+    // 2. it was produced by a local that is going dead
+    // 3. it was produced by a local that does not live in that reg (like one allocated on the stack)
+
+    if (genIsRegCandidateLocal(tree))
+    {
+        GenTreeLclVarCommon* lcl    = tree->AsLclVarCommon();
+        LclVarDsc*           varDsc = &compiler->lvaTable[lcl->GetLclNum()];
+        assert(varDsc->lvLRACandidate);
+
+        if ((tree->gtFlags & GTF_VAR_DEATH) != 0)
+        {
+            gcInfo.gcMarkRegSetNpt(genRegMask(varDsc->lvRegNum));
+        }
+        else if (varDsc->lvRegNum == REG_STK)
+        {
+            // We have loaded this into a register only temporarily
+            gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
+        }
+    }
+    else
+    {
+        gcInfo.gcMarkRegSetNpt(tree->gtGetRegMask());
+    }
+
+    genCheckConsumeNode(tree);
+    return tree->gtRegNum;
+}
+
+// Do liveness update for an address tree: one of GT_LEA, GT_LCL_VAR, or GT_CNS_INT (for call indirect).
+void CodeGen::genConsumeAddress(GenTree* addr)
+{
+    if (!addr->isContained())
+    {
+        genConsumeReg(addr);
+    }
+    else if (addr->OperGet() == GT_LEA)
+    {
+        genConsumeAddrMode(addr->AsAddrMode());
+    }
+}
+
+// do liveness update for a subnode that is being consumed by codegen
+void CodeGen::genConsumeAddrMode(GenTreeAddrMode* addr)
+{
+    genConsumeOperands(addr);
+}
+
+void CodeGen::genConsumeRegs(GenTree* tree)
+{
+#if !defined(_TARGET_64BIT_)
+    if (tree->OperGet() == GT_LONG)
+    {
+        genConsumeRegs(tree->gtGetOp1());
+        genConsumeRegs(tree->gtGetOp2());
+        return;
+    }
+#endif // !defined(_TARGET_64BIT_)
+
+    if (tree->isContained())
+    {
+        if (tree->isContainedSpillTemp())
+        {
+            // spill temps are un-tracked and hence no need to update life
+        }
+        else if (tree->isIndir())
+        {
+            genConsumeAddress(tree->AsIndir()->Addr());
+        }
+        else if (tree->OperGet() == GT_AND)
+        {
+            // This is the special contained GT_AND that we created in Lowering::LowerCmp()
+            // Now we need to consume the operands of the GT_AND node.
+            genConsumeOperands(tree->AsOp());
+        }
+        else if (tree->OperGet() == GT_LCL_VAR)
+        {
+            // A contained lcl var must be living on stack and marked as reg optional.
+            unsigned   varNum = tree->AsLclVarCommon()->GetLclNum();
+            LclVarDsc* varDsc = compiler->lvaTable + varNum;
+
+            noway_assert(varDsc->lvRegNum == REG_STK);
+            noway_assert(tree->IsRegOptional());
+
+            // Update the life of reg optional lcl var.
+            genUpdateLife(tree);
+        }
+        else
+        {
+            assert(tree->OperIsLeaf());
+        }
+    }
+    else
+    {
+        genConsumeReg(tree);
+    }
+}
+
+//------------------------------------------------------------------------
+// genConsumeOperands: Do liveness update for the operands of a unary or binary tree
+//
+// Arguments:
+//    tree - the GenTreeOp whose operands will have their liveness updated.
+//
+// Return Value:
+//    None.
+//
+// Notes:
+//    Note that this logic is localized here because we must do the liveness update in
+//    the correct execution order.  This is important because we may have two operands
+//    that involve the same lclVar, and if one is marked "lastUse" we must handle it
+//    after the first.
+
+void CodeGen::genConsumeOperands(GenTreeOp* tree)
+{
+    GenTree* firstOp  = tree->gtOp1;
+    GenTree* secondOp = tree->gtOp2;
+    if ((tree->gtFlags & GTF_REVERSE_OPS) != 0)
+    {
+        assert(secondOp != nullptr);
+        firstOp  = secondOp;
+        secondOp = tree->gtOp1;
+    }
+    if (firstOp != nullptr)
+    {
+        genConsumeRegs(firstOp);
+    }
+    if (secondOp != nullptr)
+    {
+        genConsumeRegs(secondOp);
+    }
+}
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+//------------------------------------------------------------------------
+// genConsumePutStructArgStk: Do liveness update for the operands of a PutArgStk node.
+//                      Also loads in the right register the addresses of the
+//                      src/dst for rep mov operation.
+//
+// Arguments:
+//    putArgNode - the PUTARG_STK tree.
+//    dstReg     - the dstReg for the rep move operation.
+//    srcReg     - the srcReg for the rep move operation.
+//    sizeReg    - the sizeReg for the rep move operation.
+//    baseVarNum - the varnum for the local used for placing the "by-value" args on the stack.
+//
+// Return Value:
+//    None.
+//
+// Note: sizeReg can be REG_NA when this function is used to consume the dstReg and srcReg
+//           for copying on the stack a struct with references.
+//       The source address/offset is determined from the address on the GT_OBJ node, while
+//       the destination address is the address contained in 'baseVarNum' plus the offset
+//       provided in the 'putArgNode'.
+
+void CodeGen::genConsumePutStructArgStk(
+    GenTreePutArgStk* putArgNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg, unsigned baseVarNum)
+{
+    assert(varTypeIsStruct(putArgNode));
+    assert(baseVarNum != BAD_VAR_NUM);
+
+    // The putArgNode children are always contained. We should not consume any registers.
+    assert(putArgNode->gtGetOp1()->isContained());
+
+    GenTree* dstAddr = putArgNode;
+
+    // Get the source address.
+    GenTree* src = putArgNode->gtGetOp1();
+    assert((src->gtOper == GT_OBJ) || ((src->gtOper == GT_IND && varTypeIsSIMD(src))));
+    GenTree* srcAddr = src->gtGetOp1();
+
+    size_t size = putArgNode->getArgSize();
+
+    assert(dstReg != REG_NA);
+    assert(srcReg != REG_NA);
+
+    // Consume the registers only if they are not contained or set to REG_NA.
+    if (srcAddr->gtRegNum != REG_NA)
+    {
+        genConsumeReg(srcAddr);
+    }
+
+    // If the op1 is already in the dstReg - nothing to do.
+    // Otherwise load the op1 (GT_ADDR) into the dstReg to copy the struct on the stack by value.
+    if (dstAddr->gtRegNum != dstReg)
+    {
+        // Generate LEA instruction to load the stack of the outgoing var + SlotNum offset (or the incoming arg area
+        // for tail calls) in RDI.
+        // Destination is always local (on the stack) - use EA_PTRSIZE.
+        getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, dstReg, baseVarNum, putArgNode->getArgOffset());
+    }
+
+    if (srcAddr->gtRegNum != srcReg)
+    {
+        if (srcAddr->OperIsLocalAddr())
+        {
+            // The OperLocalAddr is always contained.
+            assert(srcAddr->isContained());
+            GenTreeLclVarCommon* lclNode = srcAddr->AsLclVarCommon();
+
+            // Generate LEA instruction to load the LclVar address in RSI.
+            // Source is known to be on the stack. Use EA_PTRSIZE.
+            unsigned int offset = 0;
+            if (srcAddr->OperGet() == GT_LCL_FLD_ADDR)
+            {
+                offset = srcAddr->AsLclFld()->gtLclOffs;
+            }
+            getEmitter()->emitIns_R_S(INS_lea, EA_PTRSIZE, srcReg, lclNode->gtLclNum, offset);
+        }
+        else
+        {
+            assert(srcAddr->gtRegNum != REG_NA);
+            // Source is not known to be on the stack. Use EA_BYREF.
+            getEmitter()->emitIns_R_R(INS_mov, EA_BYREF, srcReg, srcAddr->gtRegNum);
+        }
+    }
+
+    if (sizeReg != REG_NA)
+    {
+        inst_RV_IV(INS_mov, sizeReg, size, EA_8BYTE);
+    }
+}
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+//------------------------------------------------------------------------
+// genConsumeBlockSize: Ensure that the block size is in the given register
+//
+// Arguments:
+//    blkNode - The block node
+//    sizeReg - The register into which the block's size should go
+//
+
+void CodeGen::genConsumeBlockSize(GenTreeBlk* blkNode, regNumber sizeReg)
+{
+    if (sizeReg != REG_NA)
+    {
+        unsigned blockSize = blkNode->Size();
+        if (blockSize != 0)
+        {
+            assert(blkNode->gtRsvdRegs == genRegMask(sizeReg));
+            genSetRegToIcon(sizeReg, blockSize);
+        }
+        else
+        {
+            noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
+            genConsumeReg(blkNode->AsDynBlk()->gtDynamicSize);
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// genConsumeBlockDst: Ensure that the block destination address is in its
+//                     allocated register.
+// Arguments:
+//    blkNode - The block node
+//
+
+void CodeGen::genConsumeBlockDst(GenTreeBlk* blkNode)
+{
+    GenTree* dstAddr = blkNode->Addr();
+    genConsumeReg(dstAddr);
+}
+
+//------------------------------------------------------------------------
+// genConsumeBlockSrc: Ensure that the block source address is in its
+//                     allocated register if it is non-local.
+// Arguments:
+//    blkNode - The block node
+//
+// Return Value:
+//    Returns the source address node, if it is non-local,
+//    and nullptr otherwise.
+
+GenTree* CodeGen::genConsumeBlockSrc(GenTreeBlk* blkNode)
+{
+    GenTree* src = blkNode->Data();
+    if (blkNode->OperIsCopyBlkOp())
+    {
+        // For a CopyBlk we need the address of the source.
+        if (src->OperGet() == GT_IND)
+        {
+            src = src->gtOp.gtOp1;
+        }
+        else
+        {
+            // This must be a local.
+            // For this case, there is no source address register, as it is a
+            // stack-based address.
+            assert(src->OperIsLocal());
+            return nullptr;
+        }
+    }
+    genConsumeReg(src);
+    return src;
+}
+
+//------------------------------------------------------------------------
+// genConsumeBlockOp: Ensure that the block's operands are enregistered
+//                    as needed.
+// Arguments:
+//    blkNode - The block node
+//
+// Notes:
+//    This ensures that the operands are consumed in the proper order to
+//    obey liveness modeling.
+
+void CodeGen::genConsumeBlockOp(GenTreeBlk* blkNode, regNumber dstReg, regNumber srcReg, regNumber sizeReg)
+{
+    // We have to consume the registers, and perform any copies, in the actual execution order.
+    // The nominal order is: dst, src, size.  However this may have been changed
+    // with reverse flags on the blkNode and the setting of gtEvalSizeFirst in the case of a dynamic
+    // block size.
+    // Note that the register allocator ensures that the registers ON THE NODES will not interfere
+    // with one another if consumed (i.e. reloaded or moved to their ASSIGNED reg) in execution order.
+    // Further, it ensures that they will not interfere with one another if they are then copied
+    // to the REQUIRED register (if a fixed register requirement) in execution order.  This requires,
+    // then, that we first consume all the operands, then do any necessary moves.
+
+    GenTree* dstAddr       = blkNode->Addr();
+    GenTree* src           = nullptr;
+    unsigned blockSize     = blkNode->Size();
+    GenTree* size          = nullptr;
+    bool     evalSizeFirst = true;
+
+    if (blkNode->OperGet() == GT_STORE_DYN_BLK)
+    {
+        evalSizeFirst = blkNode->AsDynBlk()->gtEvalSizeFirst;
+        size          = blkNode->AsDynBlk()->gtDynamicSize;
+    }
+
+    // First, consusme all the sources in order
+    if (evalSizeFirst)
+    {
+        genConsumeBlockSize(blkNode, sizeReg);
+    }
+    if (blkNode->IsReverseOp())
+    {
+        src = genConsumeBlockSrc(blkNode);
+        genConsumeBlockDst(blkNode);
+    }
+    else
+    {
+        genConsumeBlockDst(blkNode);
+        src = genConsumeBlockSrc(blkNode);
+    }
+    if (!evalSizeFirst)
+    {
+        genConsumeBlockSize(blkNode, sizeReg);
+    }
+    // Next, perform any necessary moves.
+    if (evalSizeFirst && (size != nullptr) && (size->gtRegNum != sizeReg))
+    {
+        inst_RV_RV(INS_mov, sizeReg, size->gtRegNum, size->TypeGet());
+    }
+    if (blkNode->IsReverseOp())
+    {
+        if ((src != nullptr) && (src->gtRegNum != srcReg))
+        {
+            inst_RV_RV(INS_mov, srcReg, src->gtRegNum, src->TypeGet());
+        }
+        if (dstAddr->gtRegNum != dstReg)
+        {
+            inst_RV_RV(INS_mov, dstReg, dstAddr->gtRegNum, dstAddr->TypeGet());
+        }
+    }
+    else
+    {
+        if (dstAddr->gtRegNum != dstReg)
+        {
+            inst_RV_RV(INS_mov, dstReg, dstAddr->gtRegNum, dstAddr->TypeGet());
+        }
+        if ((src != nullptr) && (src->gtRegNum != srcReg))
+        {
+            inst_RV_RV(INS_mov, srcReg, src->gtRegNum, src->TypeGet());
+        }
+    }
+    if (!evalSizeFirst && size != nullptr && (size->gtRegNum != sizeReg))
+    {
+        inst_RV_RV(INS_mov, sizeReg, size->gtRegNum, size->TypeGet());
+    }
+}
+
+//-------------------------------------------------------------------------
+// genProduceReg: do liveness update for register produced by the current
+// node in codegen.
+//
+// Arguments:
+//     tree   -  Gentree node
+//
+// Return Value:
+//     None.
+void CodeGen::genProduceReg(GenTree* tree)
+{
+    if (tree->gtFlags & GTF_SPILL)
+    {
+        // Code for GT_COPY node gets generated as part of consuming regs by its parent.
+        // A GT_COPY node in turn produces reg result and it should never be marked to
+        // spill.
+        //
+        // Similarly GT_RELOAD node gets generated as part of consuming regs by its
+        // parent and should never be marked for spilling.
+        noway_assert(!tree->IsCopyOrReload());
+
+        if (genIsRegCandidateLocal(tree))
+        {
+            // Store local variable to its home location.
+            tree->gtFlags &= ~GTF_REG_VAL;
+            // Ensure that lclVar stores are typed correctly.
+            unsigned varNum = tree->gtLclVarCommon.gtLclNum;
+            assert(!compiler->lvaTable[varNum].lvNormalizeOnStore() ||
+                   (tree->TypeGet() == genActualType(compiler->lvaTable[varNum].TypeGet())));
+            inst_TT_RV(ins_Store(tree->gtType, compiler->isSIMDTypeLocalAligned(varNum)), tree, tree->gtRegNum);
+        }
+        else
+        {
+            // In case of multi-reg call node, spill flag on call node
+            // indicates that one or more of its allocated regs need to
+            // be spilled.  Call node needs to be further queried to
+            // know which of its result regs needs to be spilled.
+            if (tree->IsMultiRegCall())
+            {
+                GenTreeCall*    call        = tree->AsCall();
+                ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+                unsigned        regCount    = retTypeDesc->GetReturnRegCount();
+
+                for (unsigned i = 0; i < regCount; ++i)
+                {
+                    unsigned flags = call->GetRegSpillFlagByIdx(i);
+                    if ((flags & GTF_SPILL) != 0)
+                    {
+                        regNumber reg = call->GetRegNumByIdx(i);
+                        call->SetInReg();
+                        regSet.rsSpillTree(reg, call, i);
+                        gcInfo.gcMarkRegSetNpt(genRegMask(reg));
+                    }
+                }
+            }
+            else
+            {
+                tree->SetInReg();
+                regSet.rsSpillTree(tree->gtRegNum, tree);
+                gcInfo.gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
+            }
+
+            tree->gtFlags |= GTF_SPILLED;
+            tree->gtFlags &= ~GTF_SPILL;
+
+            return;
+        }
+    }
+
+    genUpdateLife(tree);
+
+    // If we've produced a register, mark it as a pointer, as needed.
+    if (tree->gtHasReg())
+    {
+        // We only mark the register in the following cases:
+        // 1. It is not a register candidate local. In this case, we're producing a
+        //    register from a local, but the local is not a register candidate. Thus,
+        //    we must be loading it as a temp register, and any "last use" flag on
+        //    the register wouldn't be relevant.
+        // 2. The register candidate local is going dead. There's no point to mark
+        //    the register as live, with a GC pointer, if the variable is dead.
+        if (!genIsRegCandidateLocal(tree) || ((tree->gtFlags & GTF_VAR_DEATH) == 0))
+        {
+            // Multi-reg call node will produce more than one register result.
+            // Mark all the regs produced by call node.
+            if (tree->IsMultiRegCall())
+            {
+                GenTreeCall*    call        = tree->AsCall();
+                ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+                unsigned        regCount    = retTypeDesc->GetReturnRegCount();
+
+                for (unsigned i = 0; i < regCount; ++i)
+                {
+                    regNumber reg  = call->GetRegNumByIdx(i);
+                    var_types type = retTypeDesc->GetReturnRegType(i);
+                    gcInfo.gcMarkRegPtrVal(reg, type);
+                }
+            }
+            else if (tree->IsCopyOrReloadOfMultiRegCall())
+            {
+                // we should never see reload of multi-reg call here
+                // because GT_RELOAD gets generated in reg consuming path.
+                noway_assert(tree->OperGet() == GT_COPY);
+
+                // A multi-reg GT_COPY node produces those regs to which
+                // copy has taken place.
+                GenTreeCopyOrReload* copy        = tree->AsCopyOrReload();
+                GenTreeCall*         call        = copy->gtGetOp1()->AsCall();
+                ReturnTypeDesc*      retTypeDesc = call->GetReturnTypeDesc();
+                unsigned             regCount    = retTypeDesc->GetReturnRegCount();
+
+                for (unsigned i = 0; i < regCount; ++i)
+                {
+                    var_types type    = retTypeDesc->GetReturnRegType(i);
+                    regNumber fromReg = call->GetRegNumByIdx(i);
+                    regNumber toReg   = copy->GetRegNumByIdx(i);
+
+                    if (toReg != REG_NA)
+                    {
+                        gcInfo.gcMarkRegPtrVal(toReg, type);
+                    }
+                }
+            }
+            else
+            {
+                gcInfo.gcMarkRegPtrVal(tree->gtRegNum, tree->TypeGet());
+            }
+        }
+    }
+    tree->SetInReg();
+}
+
+// transfer gc/byref status of src reg to dst reg
+void CodeGen::genTransferRegGCState(regNumber dst, regNumber src)
+{
+    regMaskTP srcMask = genRegMask(src);
+    regMaskTP dstMask = genRegMask(dst);
+
+    if (gcInfo.gcRegGCrefSetCur & srcMask)
+    {
+        gcInfo.gcMarkRegSetGCref(dstMask);
+    }
+    else if (gcInfo.gcRegByrefSetCur & srcMask)
+    {
+        gcInfo.gcMarkRegSetByref(dstMask);
+    }
+    else
+    {
+        gcInfo.gcMarkRegSetNpt(dstMask);
+    }
+}
+
+// generates an ip-relative call or indirect call via reg ('call reg')
+//     pass in 'addr' for a relative call or 'base' for a indirect register call
+//     methHnd - optional, only used for pretty printing
+//     retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC)
+void CodeGen::genEmitCall(int                   callType,
+                          CORINFO_METHOD_HANDLE methHnd,
+                          INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) void* addr X86_ARG(ssize_t argSize),
+                          emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
+                          IL_OFFSETX ilOffset,
+                          regNumber  base,
+                          bool       isJump,
+                          bool       isNoGC)
+{
+#if !defined(_TARGET_X86_)
+    ssize_t argSize = 0;
+#endif // !defined(_TARGET_X86_)
+    getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, argSize,
+                               retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), gcInfo.gcVarPtrSetCur,
+                               gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset, base, REG_NA, 0, 0, isJump,
+                               emitter::emitNoGChelper(compiler->eeGetHelperNum(methHnd)));
+}
+
+// generates an indirect call via addressing mode (call []) given an indir node
+//     methHnd - optional, only used for pretty printing
+//     retSize - emitter type of return for GC purposes, should be EA_BYREF, EA_GCREF, or EA_PTRSIZE(not GC)
+void CodeGen::genEmitCall(int                   callType,
+                          CORINFO_METHOD_HANDLE methHnd,
+                          INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) GenTreeIndir* indir X86_ARG(ssize_t argSize),
+                          emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
+                          IL_OFFSETX ilOffset)
+{
+#if !defined(_TARGET_X86_)
+    ssize_t argSize = 0;
+#endif // !defined(_TARGET_X86_)
+    genConsumeAddress(indir->Addr());
+
+    getEmitter()->emitIns_Call(emitter::EmitCallType(callType), methHnd, INDEBUG_LDISASM_COMMA(sigInfo) nullptr,
+                               argSize, retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
+                               gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, ilOffset,
+                               indir->Base() ? indir->Base()->gtRegNum : REG_NA,
+                               indir->Index() ? indir->Index()->gtRegNum : REG_NA, indir->Scale(), indir->Offset());
+}
+
+//------------------------------------------------------------------------
+// genStoreInd: Generate code for a GT_STOREIND node.
+//
+// Arguments:
+//    treeNode - The GT_STOREIND node for which to generate code.
+//
+// Return Value:
+//    none
+
+void CodeGen::genStoreInd(GenTreePtr node)
+{
+    assert(node->OperGet() == GT_STOREIND);
+
+#ifdef FEATURE_SIMD
+    // Storing Vector3 of size 12 bytes through indirection
+    if (node->TypeGet() == TYP_SIMD12)
+    {
+        genStoreIndTypeSIMD12(node);
+        return;
+    }
+#endif // FEATURE_SIMD
+
+    GenTreeStoreInd* storeInd   = node->AsStoreInd();
+    GenTree*         data       = storeInd->Data();
+    GenTree*         addr       = storeInd->Addr();
+    var_types        targetType = storeInd->TypeGet();
+
+    assert(!varTypeIsFloating(targetType) || (targetType == data->TypeGet()));
+
+    GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(storeInd, data);
+    if (writeBarrierForm != GCInfo::WBF_NoBarrier)
+    {
+        // data and addr must be in registers.
+        // Consume both registers so that any copies of interfering registers are taken care of.
+        genConsumeOperands(storeInd->AsOp());
+
+        if (genEmitOptimizedGCWriteBarrier(writeBarrierForm, addr, data))
+        {
+            return;
+        }
+
+        // At this point, we should not have any interference.
+        // That is, 'data' must not be in REG_ARG_0, as that is where 'addr' must go.
+        noway_assert(data->gtRegNum != REG_ARG_0);
+
+        // addr goes in REG_ARG_0
+        if (addr->gtRegNum != REG_ARG_0)
+        {
+            inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet());
+        }
+
+        // data goes in REG_ARG_1
+        if (data->gtRegNum != REG_ARG_1)
+        {
+            inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet());
+        }
+
+        genGCWriteBarrier(storeInd, writeBarrierForm);
+    }
+    else
+    {
+        bool     reverseOps    = ((storeInd->gtFlags & GTF_REVERSE_OPS) != 0);
+        bool     dataIsUnary   = false;
+        bool     isRMWMemoryOp = storeInd->IsRMWMemoryOp();
+        GenTree* rmwSrc        = nullptr;
+
+        // We must consume the operands in the proper execution order, so that liveness is
+        // updated appropriately.
+        if (!reverseOps)
+        {
+            genConsumeAddress(addr);
+        }
+
+        // If storeInd represents a RMW memory op then its data is a non-leaf node marked as contained
+        // and non-indir operand of data is the source of RMW memory op.
+        if (isRMWMemoryOp)
+        {
+            assert(data->isContained() && !data->OperIsLeaf());
+
+            GenTreePtr rmwDst = nullptr;
+
+            dataIsUnary = (GenTree::OperIsUnary(data->OperGet()) != 0);
+            if (!dataIsUnary)
+            {
+                if (storeInd->IsRMWDstOp1())
+                {
+                    rmwDst = data->gtGetOp1();
+                    rmwSrc = data->gtGetOp2();
+                }
+                else
+                {
+                    assert(storeInd->IsRMWDstOp2());
+                    rmwDst = data->gtGetOp2();
+                    rmwSrc = data->gtGetOp1();
+                }
+
+                genConsumeRegs(rmwSrc);
+            }
+            else
+            {
+                // *(p) = oper *(p): Here addr = p, rmwsrc=rmwDst = *(p) i.e. GT_IND(p)
+                // For unary RMW ops, src and dst of RMW memory op is the same.  Lower
+                // clears operand counts on rmwSrc and we don't need to perform a
+                // genConsumeReg() on it.
+                assert(storeInd->IsRMWDstOp1());
+                rmwSrc = data->gtGetOp1();
+                rmwDst = data->gtGetOp1();
+                assert(rmwSrc->isContained());
+            }
+
+            assert(rmwSrc != nullptr);
+            assert(rmwDst != nullptr);
+            assert(Lowering::IndirsAreEquivalent(rmwDst, storeInd));
+        }
+        else
+        {
+            genConsumeRegs(data);
+        }
+
+        if (reverseOps)
+        {
+            genConsumeAddress(addr);
+        }
+
+        if (isRMWMemoryOp)
+        {
+            if (dataIsUnary)
+            {
+                // generate code for unary RMW memory ops like neg/not
+                getEmitter()->emitInsRMW(genGetInsForOper(data->OperGet(), data->TypeGet()), emitTypeSize(storeInd),
+                                         storeInd);
+            }
+            else
+            {
+                if (data->OperIsShiftOrRotate())
+                {
+                    // Generate code for shift RMW memory ops.
+                    // The data address needs to be op1 (it must be [addr] = [addr] <shift> <amount>, not [addr] =
+                    // <amount> <shift> [addr]).
+                    assert(storeInd->IsRMWDstOp1());
+                    assert(rmwSrc == data->gtGetOp2());
+                    genCodeForShiftRMW(storeInd);
+                }
+                else
+                {
+                    // generate code for remaining binary RMW memory ops like add/sub/and/or/xor
+                    getEmitter()->emitInsRMW(genGetInsForOper(data->OperGet(), data->TypeGet()), emitTypeSize(storeInd),
+                                             storeInd, rmwSrc);
+                }
+            }
+        }
+        else
+        {
+            getEmitter()->emitInsMov(ins_Store(data->TypeGet()), emitTypeSize(storeInd), storeInd);
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// genEmitOptimizedGCWriteBarrier: Generate write barrier store using the optimized
+// helper functions.
+//
+// Arguments:
+//    writeBarrierForm - the write barrier form to use
+//    addr - the address at which to do the store
+//    data - the data to store
+//
+// Return Value:
+//    true if an optimized write barrier form was used, false if not. If this
+//    function returns false, the caller must emit a "standard" write barrier.
+
+bool CodeGen::genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarrierForm, GenTree* addr, GenTree* data)
+{
+    assert(writeBarrierForm != GCInfo::WBF_NoBarrier);
+
+#if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
+    bool useOptimizedWriteBarriers = true;
+
+#ifdef DEBUG
+    useOptimizedWriteBarriers =
+        (writeBarrierForm != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug); // This one is always a call to a C++ method.
+#endif
+
+    if (!useOptimizedWriteBarriers)
+    {
+        return false;
+    }
+
+    const static int regToHelper[2][8] = {
+        // If the target is known to be in managed memory
+        {
+            CORINFO_HELP_ASSIGN_REF_EAX, CORINFO_HELP_ASSIGN_REF_ECX, -1, CORINFO_HELP_ASSIGN_REF_EBX, -1,
+            CORINFO_HELP_ASSIGN_REF_EBP, CORINFO_HELP_ASSIGN_REF_ESI, CORINFO_HELP_ASSIGN_REF_EDI,
+        },
+
+        // Don't know if the target is in managed memory
+        {
+            CORINFO_HELP_CHECKED_ASSIGN_REF_EAX, CORINFO_HELP_CHECKED_ASSIGN_REF_ECX, -1,
+            CORINFO_HELP_CHECKED_ASSIGN_REF_EBX, -1, CORINFO_HELP_CHECKED_ASSIGN_REF_EBP,
+            CORINFO_HELP_CHECKED_ASSIGN_REF_ESI, CORINFO_HELP_CHECKED_ASSIGN_REF_EDI,
+        },
+    };
+
+    noway_assert(regToHelper[0][REG_EAX] == CORINFO_HELP_ASSIGN_REF_EAX);
+    noway_assert(regToHelper[0][REG_ECX] == CORINFO_HELP_ASSIGN_REF_ECX);
+    noway_assert(regToHelper[0][REG_EBX] == CORINFO_HELP_ASSIGN_REF_EBX);
+    noway_assert(regToHelper[0][REG_ESP] == -1);
+    noway_assert(regToHelper[0][REG_EBP] == CORINFO_HELP_ASSIGN_REF_EBP);
+    noway_assert(regToHelper[0][REG_ESI] == CORINFO_HELP_ASSIGN_REF_ESI);
+    noway_assert(regToHelper[0][REG_EDI] == CORINFO_HELP_ASSIGN_REF_EDI);
+
+    noway_assert(regToHelper[1][REG_EAX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EAX);
+    noway_assert(regToHelper[1][REG_ECX] == CORINFO_HELP_CHECKED_ASSIGN_REF_ECX);
+    noway_assert(regToHelper[1][REG_EBX] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBX);
+    noway_assert(regToHelper[1][REG_ESP] == -1);
+    noway_assert(regToHelper[1][REG_EBP] == CORINFO_HELP_CHECKED_ASSIGN_REF_EBP);
+    noway_assert(regToHelper[1][REG_ESI] == CORINFO_HELP_CHECKED_ASSIGN_REF_ESI);
+    noway_assert(regToHelper[1][REG_EDI] == CORINFO_HELP_CHECKED_ASSIGN_REF_EDI);
+
+    regNumber reg = data->gtRegNum;
+    noway_assert((reg != REG_ESP) && (reg != REG_WRITE_BARRIER));
+
+    // Generate the following code:
+    //            lea     edx, addr
+    //            call    write_barrier_helper_reg
+
+    // addr goes in REG_ARG_0
+    if (addr->gtRegNum != REG_WRITE_BARRIER) // REVIEW: can it ever not already by in this register?
+    {
+        inst_RV_RV(INS_mov, REG_WRITE_BARRIER, addr->gtRegNum, addr->TypeGet());
+    }
+
+    unsigned tgtAnywhere = 0;
+    if (writeBarrierForm != GCInfo::WBF_BarrierUnchecked)
+    {
+        tgtAnywhere = 1;
+    }
+
+    // We might want to call a modified version of genGCWriteBarrier() to get the benefit of
+    // the FEATURE_COUNT_GC_WRITE_BARRIERS code there, but that code doesn't look like it works
+    // with rationalized RyuJIT IR. So, for now, just emit the helper call directly here.
+
+    genEmitHelperCall(regToHelper[tgtAnywhere][reg],
+                      0,           // argSize
+                      EA_PTRSIZE); // retSize
+
+    return true;
+#else  // !defined(_TARGET_X86_) || !NOGC_WRITE_BARRIERS
+    return false;
+#endif // !defined(_TARGET_X86_) || !NOGC_WRITE_BARRIERS
+}
+
+// Produce code for a GT_CALL node
+void CodeGen::genCallInstruction(GenTreePtr node)
+{
+    GenTreeCall* call = node->AsCall();
+    assert(call->gtOper == GT_CALL);
+
+    gtCallTypes callType = (gtCallTypes)call->gtCallType;
+
+    IL_OFFSETX ilOffset = BAD_IL_OFFSET;
+
+    // all virtuals should have been expanded into a control expression
+    assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr);
+
+    // Consume all the arg regs
+    for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+    {
+        assert(list->IsList());
+
+        GenTreePtr argNode = list->Current();
+
+        fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode->gtSkipReloadOrCopy());
+        assert(curArgTabEntry);
+
+        if (curArgTabEntry->regNum == REG_STK)
+        {
+            continue;
+        }
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+        // Deal with multi register passed struct args.
+        if (argNode->OperGet() == GT_LIST)
+        {
+            GenTreeArgList* argListPtr   = argNode->AsArgList();
+            unsigned        iterationNum = 0;
+            for (; argListPtr != nullptr; argListPtr = argListPtr->Rest(), iterationNum++)
+            {
+                GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1;
+                assert(putArgRegNode->gtOper == GT_PUTARG_REG);
+                regNumber argReg = REG_NA;
+
+                if (iterationNum == 0)
+                {
+                    argReg = curArgTabEntry->regNum;
+                }
+                else
+                {
+                    assert(iterationNum == 1);
+                    argReg = curArgTabEntry->otherRegNum;
+                }
+
+                genConsumeReg(putArgRegNode);
+
+                // Validate the putArgRegNode has the right type.
+                assert(putArgRegNode->TypeGet() ==
+                       compiler->GetTypeFromClassificationAndSizes(curArgTabEntry->structDesc
+                                                                       .eightByteClassifications[iterationNum],
+                                                                   curArgTabEntry->structDesc
+                                                                       .eightByteSizes[iterationNum]));
+                if (putArgRegNode->gtRegNum != argReg)
+                {
+                    inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), putArgRegNode->InReg()), argReg,
+                               putArgRegNode->gtRegNum);
+                }
+            }
+        }
+        else
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+        {
+            regNumber argReg = curArgTabEntry->regNum;
+            genConsumeReg(argNode);
+            if (argNode->gtRegNum != argReg)
+            {
+                inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum);
+            }
+        }
+
+#if FEATURE_VARARG
+        // In the case of a varargs call,
+        // the ABI dictates that if we have floating point args,
+        // we must pass the enregistered arguments in both the
+        // integer and floating point registers so, let's do that.
+        if (call->IsVarargs() && varTypeIsFloating(argNode))
+        {
+            regNumber   targetReg = compiler->getCallArgIntRegister(argNode->gtRegNum);
+            instruction ins       = ins_CopyFloatToInt(argNode->TypeGet(), TYP_LONG);
+            inst_RV_RV(ins, argNode->gtRegNum, targetReg);
+        }
+#endif // FEATURE_VARARG
+    }
+
+#if defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+    // The call will pop its arguments.
+    // for each putarg_stk:
+    ssize_t    stackArgBytes = 0;
+    GenTreePtr args          = call->gtCallArgs;
+    while (args)
+    {
+        GenTreePtr arg = args->gtOp.gtOp1;
+        if (arg->OperGet() != GT_ARGPLACE && !(arg->gtFlags & GTF_LATE_ARG))
+        {
+#if defined(_TARGET_X86_)
+            assert((arg->OperGet() == GT_PUTARG_STK) || (arg->OperGet() == GT_LONG));
+            if (arg->OperGet() == GT_LONG)
+            {
+                assert((arg->gtGetOp1()->OperGet() == GT_PUTARG_STK) && (arg->gtGetOp2()->OperGet() == GT_PUTARG_STK));
+            }
+#endif // defined(_TARGET_X86_)
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+            if (genActualType(arg->TypeGet()) == TYP_STRUCT)
+            {
+                assert(arg->OperGet() == GT_PUTARG_STK);
+
+                GenTreeObj* obj = arg->gtGetOp1()->AsObj();
+                stackArgBytes   = compiler->info.compCompHnd->getClassSize(obj->gtClass);
+            }
+            else
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+                stackArgBytes += genTypeSize(genActualType(arg->TypeGet()));
+        }
+        args = args->gtOp.gtOp2;
+    }
+#endif // defined(_TARGET_X86_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+    // Insert a null check on "this" pointer if asked.
+    if (call->NeedsNullCheck())
+    {
+        const regNumber regThis = genGetThisArgReg(call);
+        getEmitter()->emitIns_AR_R(INS_cmp, EA_4BYTE, regThis, regThis, 0);
+    }
+
+    // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper method.
+    CORINFO_METHOD_HANDLE methHnd;
+    GenTree*              target = call->gtControlExpr;
+    if (callType == CT_INDIRECT)
+    {
+        assert(target == nullptr);
+        target  = call->gtCall.gtCallAddr;
+        methHnd = nullptr;
+    }
+    else
+    {
+        methHnd = call->gtCallMethHnd;
+    }
+
+    CORINFO_SIG_INFO* sigInfo = nullptr;
+#ifdef DEBUG
+    // Pass the call signature information down into the emitter so the emitter can associate
+    // native call sites with the signatures they were generated from.
+    if (callType != CT_HELPER)
+    {
+        sigInfo = call->callSig;
+    }
+#endif // DEBUG
+
+    // If fast tail call, then we are done.  In this case we setup the args (both reg args
+    // and stack args in incoming arg area) and call target in rax.  Epilog sequence would
+    // generate "jmp rax".
+    if (call->IsFastTailCall())
+    {
+        // Don't support fast tail calling JIT helpers
+        assert(callType != CT_HELPER);
+
+        // Fast tail calls materialize call target either in gtControlExpr or in gtCallAddr.
+        assert(target != nullptr);
+
+        genConsumeReg(target);
+        if (target->gtRegNum != REG_RAX)
+        {
+            inst_RV_RV(INS_mov, REG_RAX, target->gtRegNum);
+        }
+        return;
+    }
+
+    // For a pinvoke to unmanged code we emit a label to clear
+    // the GC pointer state before the callsite.
+    // We can't utilize the typical lazy killing of GC pointers
+    // at (or inside) the callsite.
+    if (call->IsUnmanaged())
+    {
+        genDefineTempLabel(genCreateTempLabel());
+    }
+
+    // Determine return value size(s).
+    ReturnTypeDesc* retTypeDesc   = call->GetReturnTypeDesc();
+    emitAttr        retSize       = EA_PTRSIZE;
+    emitAttr        secondRetSize = EA_UNKNOWN;
+
+    if (call->HasMultiRegRetVal())
+    {
+        retSize       = emitTypeSize(retTypeDesc->GetReturnRegType(0));
+        secondRetSize = emitTypeSize(retTypeDesc->GetReturnRegType(1));
+    }
+    else
+    {
+        assert(!varTypeIsStruct(call));
+
+        if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY)
+        {
+            retSize = EA_GCREF;
+        }
+        else if (call->gtType == TYP_BYREF)
+        {
+            retSize = EA_BYREF;
+        }
+    }
+
+    bool            fPossibleSyncHelperCall = false;
+    CorInfoHelpFunc helperNum               = CORINFO_HELP_UNDEF;
+
+#ifdef DEBUGGING_SUPPORT
+    // We need to propagate the IL offset information to the call instruction, so we can emit
+    // an IL to native mapping record for the call, to support managed return value debugging.
+    // We don't want tail call helper calls that were converted from normal calls to get a record,
+    // so we skip this hash table lookup logic in that case.
+    if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != nullptr && !call->IsTailCall())
+    {
+        (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
+    }
+#endif // DEBUGGING_SUPPORT
+
+#if defined(_TARGET_X86_)
+    // If the callee pops the arguments, we pass a positive value as the argSize, and the emitter will
+    // adjust its stack level accordingly.
+    // If the caller needs to explicitly pop its arguments, we must pass a negative value, and then do the
+    // pop when we're done.
+    ssize_t argSizeForEmitter = stackArgBytes;
+    if ((call->gtFlags & GTF_CALL_POP_ARGS) != 0)
+    {
+        argSizeForEmitter = -stackArgBytes;
+    }
+
+#endif // defined(_TARGET_X86_)
+
+    if (target != nullptr)
+    {
+        if (target->isContainedIndir())
+        {
+            if (target->AsIndir()->HasBase() && target->AsIndir()->Base()->isContainedIntOrIImmed())
+            {
+                // Note that if gtControlExpr is an indir of an absolute address, we mark it as
+                // contained only if it can be encoded as PC-relative offset.
+                assert(target->AsIndir()->Base()->AsIntConCommon()->FitsInAddrBase(compiler));
+
+                genEmitCall(emitter::EC_FUNC_TOKEN_INDIR, methHnd,
+                            INDEBUG_LDISASM_COMMA(sigInfo)(void*) target->AsIndir()
+                                ->Base()
+                                ->AsIntConCommon()
+                                ->IconValue() X86_ARG(argSizeForEmitter),
+                            retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
+            }
+            else
+            {
+                genEmitCall(emitter::EC_INDIR_ARD, methHnd,
+                            INDEBUG_LDISASM_COMMA(sigInfo) target->AsIndir() X86_ARG(argSizeForEmitter),
+                            retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
+            }
+        }
+        else
+        {
+            // We have already generated code for gtControlExpr evaluating it into a register.
+            // We just need to emit "call reg" in this case.
+            assert(genIsValidIntReg(target->gtRegNum));
+            genEmitCall(emitter::EC_INDIR_R, methHnd,
+                        INDEBUG_LDISASM_COMMA(sigInfo) nullptr // addr
+                        X86_ARG(argSizeForEmitter),
+                        retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset, genConsumeReg(target));
+        }
+    }
+#ifdef FEATURE_READYTORUN_COMPILER
+    else if (call->gtEntryPoint.addr != nullptr)
+    {
+        genEmitCall((call->gtEntryPoint.accessType == IAT_VALUE) ? emitter::EC_FUNC_TOKEN
+                                                                 : emitter::EC_FUNC_TOKEN_INDIR,
+                    methHnd, INDEBUG_LDISASM_COMMA(sigInfo)(void*) call->gtEntryPoint.addr X86_ARG(argSizeForEmitter),
+                    retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
+    }
+#endif
+    else
+    {
+        // Generate a direct call to a non-virtual user defined or helper method
+        assert(callType == CT_HELPER || callType == CT_USER_FUNC);
+
+        void* addr = nullptr;
+        if (callType == CT_HELPER)
+        {
+            // Direct call to a helper method.
+            helperNum = compiler->eeGetHelperNum(methHnd);
+            noway_assert(helperNum != CORINFO_HELP_UNDEF);
+
+            void* pAddr = nullptr;
+            addr        = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
+
+            if (addr == nullptr)
+            {
+                addr = pAddr;
+            }
+
+            // tracking of region protected by the monitor in synchronized methods
+            if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
+            {
+                fPossibleSyncHelperCall = true;
+            }
+        }
+        else
+        {
+            // Direct call to a non-virtual user function.
+            addr = call->gtDirectCallAddress;
+        }
+
+        // Non-virtual direct calls to known addresses
+        genEmitCall(emitter::EC_FUNC_TOKEN, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr X86_ARG(argSizeForEmitter),
+                    retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
+    }
+
+    // if it was a pinvoke we may have needed to get the address of a label
+    if (genPendingCallLabel)
+    {
+        assert(call->IsUnmanaged());
+        genDefineTempLabel(genPendingCallLabel);
+        genPendingCallLabel = nullptr;
+    }
+
+#if defined(_TARGET_X86_)
+    // The call will pop its arguments.
+    genStackLevel -= stackArgBytes;
+#endif // defined(_TARGET_X86_)
+
+    // Update GC info:
+    // All Callee arg registers are trashed and no longer contain any GC pointers.
+    // TODO-XArch-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here?
+    // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other
+    // registers from RBM_CALLEE_TRASH.
+    assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
+    assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
+    gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS;
+    gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS;
+
+    var_types returnType = call->TypeGet();
+    if (returnType != TYP_VOID)
+    {
+#ifdef _TARGET_X86_
+        if (varTypeIsFloating(returnType))
+        {
+            // Spill the value from the fp stack.
+            // Then, load it into the target register.
+            call->gtFlags |= GTF_SPILL;
+            regSet.rsSpillFPStack(call);
+            call->gtFlags |= GTF_SPILLED;
+            call->gtFlags &= ~GTF_SPILL;
+        }
+        else
+#endif // _TARGET_X86_
+        {
+            regNumber returnReg;
+
+            if (call->HasMultiRegRetVal())
+            {
+                assert(retTypeDesc != nullptr);
+                unsigned regCount = retTypeDesc->GetReturnRegCount();
+
+                // If regs allocated to call node are different from ABI return
+                // regs in which the call has returned its result, move the result
+                // to regs allocated to call node.
+                for (unsigned i = 0; i < regCount; ++i)
+                {
+                    var_types regType      = retTypeDesc->GetReturnRegType(i);
+                    returnReg              = retTypeDesc->GetABIReturnReg(i);
+                    regNumber allocatedReg = call->GetRegNumByIdx(i);
+                    if (returnReg != allocatedReg)
+                    {
+                        inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType);
+                    }
+                }
+
+#ifdef FEATURE_SIMD
+                // A Vector3 return value is stored in xmm0 and xmm1.
+                // RyuJIT assumes that the upper unused bits of xmm1 are cleared but
+                // the native compiler doesn't guarantee it.
+                if (returnType == TYP_SIMD12)
+                {
+                    returnReg = retTypeDesc->GetABIReturnReg(1);
+                    // Clear the upper 32 bits by two shift instructions.
+                    // retReg = retReg << 96
+                    // retReg = retReg >> 96
+                    getEmitter()->emitIns_R_I(INS_pslldq, emitActualTypeSize(TYP_SIMD12), returnReg, 12);
+                    getEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(TYP_SIMD12), returnReg, 12);
+                }
+#endif // FEATURE_SIMD
+            }
+            else
+            {
+#ifdef _TARGET_X86_
+                if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME))
+                {
+                    // The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with
+                    // TCB in REG_PINVOKE_TCB. AMD64/ARM64 use the standard calling convention. fgMorphCall() sets the
+                    // correct argument registers.
+                    returnReg = REG_PINVOKE_TCB;
+                }
+                else
+#endif // _TARGET_X86_
+                    if (varTypeIsFloating(returnType))
+                {
+                    returnReg = REG_FLOATRET;
+                }
+                else
+                {
+                    returnReg = REG_INTRET;
+                }
+
+                if (call->gtRegNum != returnReg)
+                {
+                    inst_RV_RV(ins_Copy(returnType), call->gtRegNum, returnReg, returnType);
+                }
+            }
+
+            genProduceReg(call);
+        }
+    }
+
+    // If there is nothing next, that means the result is thrown away, so this value is not live.
+    // However, for minopts or debuggable code, we keep it live to support managed return value debugging.
+    if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode)
+    {
+        gcInfo.gcMarkRegSetNpt(RBM_INTRET);
+    }
+
+#if defined(_TARGET_X86_)
+    //-------------------------------------------------------------------------
+    // Create a label for tracking of region protected by the monitor in synchronized methods.
+    // This needs to be here, rather than above where fPossibleSyncHelperCall is set,
+    // so the GC state vars have been updated before creating the label.
+
+    if (fPossibleSyncHelperCall)
+    {
+        switch (helperNum)
+        {
+            case CORINFO_HELP_MON_ENTER:
+            case CORINFO_HELP_MON_ENTER_STATIC:
+                noway_assert(compiler->syncStartEmitCookie == NULL);
+                compiler->syncStartEmitCookie =
+                    getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
+                noway_assert(compiler->syncStartEmitCookie != NULL);
+                break;
+            case CORINFO_HELP_MON_EXIT:
+            case CORINFO_HELP_MON_EXIT_STATIC:
+                noway_assert(compiler->syncEndEmitCookie == NULL);
+                compiler->syncEndEmitCookie =
+                    getEmitter()->emitAddLabel(gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
+                noway_assert(compiler->syncEndEmitCookie != NULL);
+                break;
+            default:
+                break;
+        }
+    }
+
+    // Is the caller supposed to pop the arguments?
+    if (((call->gtFlags & GTF_CALL_POP_ARGS) != 0) && (stackArgBytes != 0))
+    {
+        genAdjustSP(stackArgBytes);
+    }
+#endif // _TARGET_X86_
+}
+
+// Produce code for a GT_JMP node.
+// The arguments of the caller needs to be transferred to the callee before exiting caller.
+// The actual jump to callee is generated as part of caller epilog sequence.
+// Therefore the codegen of GT_JMP is to ensure that the callee arguments are correctly setup.
+void CodeGen::genJmpMethod(GenTreePtr jmp)
+{
+    assert(jmp->OperGet() == GT_JMP);
+    assert(compiler->compJmpOpUsed);
+
+    // If no arguments, nothing to do
+    if (compiler->info.compArgsCount == 0)
+    {
+        return;
+    }
+
+    // Make sure register arguments are in their initial registers
+    // and stack arguments are put back as well.
+    unsigned   varNum;
+    LclVarDsc* varDsc;
+
+    // First move any en-registered stack arguments back to the stack.
+    // At the same time any reg arg not in correct reg is moved back to its stack location.
+    //
+    // We are not strictly required to spill reg args that are not in the desired reg for a jmp call
+    // But that would require us to deal with circularity while moving values around.  Spilling
+    // to stack makes the implementation simple, which is not a bad trade off given Jmp calls
+    // are not frequent.
+    for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
+    {
+        varDsc = compiler->lvaTable + varNum;
+
+        if (varDsc->lvPromoted)
+        {
+            noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+
+            unsigned fieldVarNum = varDsc->lvFieldLclStart;
+            varDsc               = compiler->lvaTable + fieldVarNum;
+        }
+        noway_assert(varDsc->lvIsParam);
+
+        if (varDsc->lvIsRegArg && (varDsc->lvRegNum != REG_STK))
+        {
+            // Skip reg args which are already in its right register for jmp call.
+            // If not, we will spill such args to their stack locations.
+            //
+            // If we need to generate a tail call profiler hook, then spill all
+            // arg regs to free them up for the callback.
+            if (!compiler->compIsProfilerHookNeeded() && (varDsc->lvRegNum == varDsc->lvArgReg))
+            {
+                continue;
+            }
+        }
+        else if (varDsc->lvRegNum == REG_STK)
+        {
+            // Skip args which are currently living in stack.
+            continue;
+        }
+
+        // If we came here it means either a reg argument not in the right register or
+        // a stack argument currently living in a register.  In either case the following
+        // assert should hold.
+        assert(varDsc->lvRegNum != REG_STK);
+
+        var_types loadType = varDsc->lvaArgType();
+        getEmitter()->emitIns_S_R(ins_Store(loadType), emitTypeSize(loadType), varDsc->lvRegNum, varNum, 0);
+
+        // Update lvRegNum life and GC info to indicate lvRegNum is dead and varDsc stack slot is going live.
+        // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
+        // Therefore manually update life of varDsc->lvRegNum.
+        regMaskTP tempMask = varDsc->lvRegMask();
+        regSet.RemoveMaskVars(tempMask);
+        gcInfo.gcMarkRegSetNpt(tempMask);
+        if (compiler->lvaIsGCTracked(varDsc))
+        {
+#ifdef DEBUG
+            if (!VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
+            {
+                JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming live\n", varNum);
+            }
+            else
+            {
+                JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing live\n", varNum);
+            }
+#endif // DEBUG
+
+            VarSetOps::AddElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
+        }
+    }
+
+#ifdef PROFILING_SUPPORTED
+    // At this point all arg regs are free.
+    // Emit tail call profiler callback.
+    genProfilingLeaveCallback(CORINFO_HELP_PROF_FCN_TAILCALL);
+#endif
+
+    // Next move any un-enregistered register arguments back to their register.
+    regMaskTP fixedIntArgMask = RBM_NONE;    // tracks the int arg regs occupying fixed args in case of a vararg method.
+    unsigned  firstArgVarNum  = BAD_VAR_NUM; // varNum of the first argument in case of a vararg method.
+    for (varNum = 0; (varNum < compiler->info.compArgsCount); varNum++)
+    {
+        varDsc = compiler->lvaTable + varNum;
+        if (varDsc->lvPromoted)
+        {
+            noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+
+            unsigned fieldVarNum = varDsc->lvFieldLclStart;
+            varDsc               = compiler->lvaTable + fieldVarNum;
+        }
+        noway_assert(varDsc->lvIsParam);
+
+        // Skip if arg not passed in a register.
+        if (!varDsc->lvIsRegArg)
+        {
+            continue;
+        }
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+        if (varTypeIsStruct(varDsc))
+        {
+            CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
+            assert(typeHnd != nullptr);
+
+            SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+            compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
+            assert(structDesc.passedInRegisters);
+
+            unsigned __int8 offset0 = 0;
+            unsigned __int8 offset1 = 0;
+            var_types       type0   = TYP_UNKNOWN;
+            var_types       type1   = TYP_UNKNOWN;
+
+            // Get the eightbyte data
+            compiler->GetStructTypeOffset(structDesc, &type0, &type1, &offset0, &offset1);
+
+            // Move the values into the right registers.
+            //
+
+            // Update varDsc->lvArgReg and lvOtherArgReg life and GC Info to indicate varDsc stack slot is dead and
+            // argReg is going live. Note that we cannot modify varDsc->lvRegNum and lvOtherArgReg here because another
+            // basic block may not be expecting it. Therefore manually update life of argReg.  Note that GT_JMP marks
+            // the end of the basic block and after which reg life and gc info will be recomputed for the new block in
+            // genCodeForBBList().
+            if (type0 != TYP_UNKNOWN)
+            {
+                getEmitter()->emitIns_R_S(ins_Load(type0), emitTypeSize(type0), varDsc->lvArgReg, varNum, offset0);
+                regSet.rsMaskVars |= genRegMask(varDsc->lvArgReg);
+                gcInfo.gcMarkRegPtrVal(varDsc->lvArgReg, type0);
+            }
+
+            if (type1 != TYP_UNKNOWN)
+            {
+                getEmitter()->emitIns_R_S(ins_Load(type1), emitTypeSize(type1), varDsc->lvOtherArgReg, varNum, offset1);
+                regSet.rsMaskVars |= genRegMask(varDsc->lvOtherArgReg);
+                gcInfo.gcMarkRegPtrVal(varDsc->lvOtherArgReg, type1);
+            }
+
+            if (varDsc->lvTracked)
+            {
+                VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
+            }
+        }
+        else
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+        {
+            // Register argument
+            noway_assert(isRegParamType(genActualType(varDsc->TypeGet())));
+
+            // Is register argument already in the right register?
+            // If not load it from its stack location.
+            var_types loadType = varDsc->lvaArgType();
+            regNumber argReg   = varDsc->lvArgReg; // incoming arg register
+
+            if (varDsc->lvRegNum != argReg)
+            {
+                assert(genIsValidReg(argReg));
+                getEmitter()->emitIns_R_S(ins_Load(loadType), emitTypeSize(loadType), argReg, varNum, 0);
+
+                // Update argReg life and GC Info to indicate varDsc stack slot is dead and argReg is going live.
+                // Note that we cannot modify varDsc->lvRegNum here because another basic block may not be expecting it.
+                // Therefore manually update life of argReg.  Note that GT_JMP marks the end of the basic block
+                // and after which reg life and gc info will be recomputed for the new block in genCodeForBBList().
+                regSet.AddMaskVars(genRegMask(argReg));
+                gcInfo.gcMarkRegPtrVal(argReg, loadType);
+                if (compiler->lvaIsGCTracked(varDsc))
+                {
+#ifdef DEBUG
+                    if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex))
+                    {
+                        JITDUMP("\t\t\t\t\t\t\tVar V%02u becoming dead\n", varNum);
+                    }
+                    else
+                    {
+                        JITDUMP("\t\t\t\t\t\t\tVar V%02u continuing dead\n", varNum);
+                    }
+#endif // DEBUG
+
+                    VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex);
+                }
+            }
+        }
+
+#if FEATURE_VARARG && defined(_TARGET_AMD64_)
+        // In case of a jmp call to a vararg method also pass the float/double arg in the corresponding int arg
+        // register. This is due to the AMD64 ABI which requires floating point values passed to varargs functions to
+        // be passed in both integer and floating point registers. It doesn't apply to x86, which passes floating point
+        // values on the stack.
+        if (compiler->info.compIsVarArgs)
+        {
+            regNumber intArgReg;
+            var_types loadType = varDsc->lvaArgType();
+            regNumber argReg   = varDsc->lvArgReg; // incoming arg register
+
+            if (varTypeIsFloating(loadType))
+            {
+                intArgReg       = compiler->getCallArgIntRegister(argReg);
+                instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG);
+                inst_RV_RV(ins, argReg, intArgReg, loadType);
+            }
+            else
+            {
+                intArgReg = argReg;
+            }
+
+            fixedIntArgMask |= genRegMask(intArgReg);
+
+            if (intArgReg == REG_ARG_0)
+            {
+                assert(firstArgVarNum == BAD_VAR_NUM);
+                firstArgVarNum = varNum;
+            }
+        }
+#endif // FEATURE_VARARG
+    }
+
+#if FEATURE_VARARG && defined(_TARGET_AMD64_)
+    // Jmp call to a vararg method - if the method has fewer than 4 fixed arguments,
+    // load the remaining arg registers (both int and float) from the corresponding
+    // shadow stack slots.  This is for the reason that we don't know the number and type
+    // of non-fixed params passed by the caller, therefore we have to assume the worst case
+    // of caller passing float/double args both in int and float arg regs.
+    //
+    // This doesn't apply to x86, which doesn't pass floating point values in floating
+    // point registers.
+    //
+    // The caller could have passed gc-ref/byref type var args.  Since these are var args
+    // the callee no way of knowing their gc-ness.  Therefore, mark the region that loads
+    // remaining arg registers from shadow stack slots as non-gc interruptible.
+    if (fixedIntArgMask != RBM_NONE)
+    {
+        assert(compiler->info.compIsVarArgs);
+        assert(firstArgVarNum != BAD_VAR_NUM);
+
+        regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask;
+        if (remainingIntArgMask != RBM_NONE)
+        {
+            instruction insCopyIntToFloat = ins_CopyIntToFloat(TYP_LONG, TYP_DOUBLE);
+            getEmitter()->emitDisableGC();
+            for (int argNum = 0, argOffset = 0; argNum < MAX_REG_ARG; ++argNum)
+            {
+                regNumber argReg     = intArgRegs[argNum];
+                regMaskTP argRegMask = genRegMask(argReg);
+
+                if ((remainingIntArgMask & argRegMask) != 0)
+                {
+                    remainingIntArgMask &= ~argRegMask;
+                    getEmitter()->emitIns_R_S(INS_mov, EA_8BYTE, argReg, firstArgVarNum, argOffset);
+
+                    // also load it in corresponding float arg reg
+                    regNumber floatReg = compiler->getCallArgFloatRegister(argReg);
+                    inst_RV_RV(insCopyIntToFloat, floatReg, argReg);
+                }
+
+                argOffset += REGSIZE_BYTES;
+            }
+            getEmitter()->emitEnableGC();
+        }
+    }
+#endif // FEATURE_VARARG
+}
+
+// produce code for a GT_LEA subnode
+void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
+{
+    emitAttr size = emitTypeSize(lea);
+    genConsumeOperands(lea);
+
+    if (lea->Base() && lea->Index())
+    {
+        regNumber baseReg  = lea->Base()->gtRegNum;
+        regNumber indexReg = lea->Index()->gtRegNum;
+        getEmitter()->emitIns_R_ARX(INS_lea, size, lea->gtRegNum, baseReg, indexReg, lea->gtScale, lea->gtOffset);
+    }
+    else if (lea->Base())
+    {
+        getEmitter()->emitIns_R_AR(INS_lea, size, lea->gtRegNum, lea->Base()->gtRegNum, lea->gtOffset);
+    }
+    else if (lea->Index())
+    {
+        getEmitter()->emitIns_R_ARX(INS_lea, size, lea->gtRegNum, REG_NA, lea->Index()->gtRegNum, lea->gtScale,
+                                    lea->gtOffset);
+    }
+
+    genProduceReg(lea);
+}
+
+//-------------------------------------------------------------------------------------------
+// genJumpKindsForTree:  Determine the number and kinds of conditional branches
+//                       necessary to implement the given GT_CMP node
+//
+// Arguments:
+//    cmpTree          - (input) The GenTree node that is used to set the Condition codes
+//                     - The GenTree Relop node that was used to set the Condition codes
+//   jmpKind[2]        - (output) One or two conditional branch instructions
+//   jmpToTrueLabel[2] - (output) When true we branch to the true case
+//                       When false we create a second label and branch to the false case
+//                       Only GT_EQ for a floating point compares can have a false value.
+//
+// Return Value:
+//    Sets the proper values into the array elements of jmpKind[] and jmpToTrueLabel[]
+//
+// Assumptions:
+//    At least one conditional branch instruction will be returned.
+//    Typically only one conditional branch is needed
+//     and the second jmpKind[] value is set to EJ_NONE
+//
+// Notes:
+//    jmpToTrueLabel[i]= true  implies branch when the compare operation is true.
+//    jmpToTrueLabel[i]= false implies branch when the compare operation is false.
+//-------------------------------------------------------------------------------------------
+
+// static
+void CodeGen::genJumpKindsForTree(GenTreePtr cmpTree, emitJumpKind jmpKind[2], bool jmpToTrueLabel[2])
+{
+    // Except for BEQ (=  ordered GT_EQ) both jumps are to the true label.
+    jmpToTrueLabel[0] = true;
+    jmpToTrueLabel[1] = true;
+
+    // For integer comparisons just use genJumpKindForOper
+    if (!varTypeIsFloating(cmpTree->gtOp.gtOp1->gtEffectiveVal()))
+    {
+        CompareKind compareKind = ((cmpTree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
+        jmpKind[0]              = genJumpKindForOper(cmpTree->gtOper, compareKind);
+        jmpKind[1]              = EJ_NONE;
+    }
+    else
+    {
+        assert(cmpTree->OperIsCompare());
+
+        // For details on how we arrived at this mapping, see the comment block in genCodeForTreeNode()
+        // while generating code for compare opererators (e.g. GT_EQ etc).
+        if ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) != 0)
+        {
+            // Must branch if we have an NaN, unordered
+            switch (cmpTree->gtOper)
+            {
+                case GT_LT:
+                case GT_GT:
+                    jmpKind[0] = EJ_jb;
+                    jmpKind[1] = EJ_NONE;
+                    break;
+
+                case GT_LE:
+                case GT_GE:
+                    jmpKind[0] = EJ_jbe;
+                    jmpKind[1] = EJ_NONE;
+                    break;
+
+                case GT_NE:
+                    jmpKind[0] = EJ_jpe;
+                    jmpKind[1] = EJ_jne;
+                    break;
+
+                case GT_EQ:
+                    jmpKind[0] = EJ_je;
+                    jmpKind[1] = EJ_NONE;
+                    break;
+
+                default:
+                    unreached();
+            }
+        }
+        else // ((cmpTree->gtFlags & GTF_RELOP_NAN_UN) == 0)
+        {
+            // Do not branch if we have an NaN, unordered
+            switch (cmpTree->gtOper)
+            {
+                case GT_LT:
+                case GT_GT:
+                    jmpKind[0] = EJ_ja;
+                    jmpKind[1] = EJ_NONE;
+                    break;
+
+                case GT_LE:
+                case GT_GE:
+                    jmpKind[0] = EJ_jae;
+                    jmpKind[1] = EJ_NONE;
+                    break;
+
+                case GT_NE:
+                    jmpKind[0] = EJ_jne;
+                    jmpKind[1] = EJ_NONE;
+                    break;
+
+                case GT_EQ:
+                    jmpKind[0]        = EJ_jpe;
+                    jmpKind[1]        = EJ_je;
+                    jmpToTrueLabel[0] = false;
+                    break;
+
+                default:
+                    unreached();
+            }
+        }
+    }
+}
+
+#if !defined(_TARGET_64BIT_)
+//------------------------------------------------------------------------
+// genJumpKindsForTreeLongHi: Generate the jump types for compare
+// operators of the high parts of a compare with long type operands
+// on x86 for the case where rel-op result needs to be materialized into a
+// register.
+//
+// Arguments:
+//    cmpTree - The GT_CMP node
+//    jmpKind - Return array of jump kinds
+//    jmpToTrueLabel - Return array of if the jump is going to true label
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genJumpKindsForTreeLongHi(GenTreePtr cmpTree, emitJumpKind jmpKind[2])
+{
+    assert(cmpTree->OperIsCompare());
+    CompareKind compareKind = ((cmpTree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
+
+    switch (cmpTree->gtOper)
+    {
+        case GT_LT:
+        case GT_LE:
+            if (compareKind == CK_SIGNED)
+            {
+                jmpKind[0] = EJ_jl;
+                jmpKind[1] = EJ_jg;
+            }
+            else
+            {
+                jmpKind[0] = EJ_jb;
+                jmpKind[1] = EJ_ja;
+            }
+            break;
+
+        case GT_GT:
+        case GT_GE:
+            if (compareKind == CK_SIGNED)
+            {
+                jmpKind[0] = EJ_jg;
+                jmpKind[1] = EJ_jl;
+            }
+            else
+            {
+                jmpKind[0] = EJ_ja;
+                jmpKind[1] = EJ_jb;
+            }
+            break;
+
+        case GT_EQ:
+            // GT_EQ will not jump to the true label if the hi parts are equal
+            jmpKind[0] = EJ_NONE;
+            jmpKind[1] = EJ_jne;
+            break;
+
+        case GT_NE:
+            // GT_NE will always jump to the true label if the high parts are not equal
+            jmpKind[0] = EJ_jne;
+            jmpKind[1] = EJ_NONE;
+            break;
+
+        default:
+            unreached();
+    }
+}
+
+//------------------------------------------------------------------------
+// genCompareLong: Generate code for comparing two longs on x86 when the result of the compare
+// is manifested in a register.
+//
+// Arguments:
+//    treeNode - the compare tree
+//
+// Return Value:
+//    None.
+// Comments:
+// For long compares, we need to compare the high parts of operands first, then the low parts.
+// If the high compare is false, we do not need to compare the low parts. For less than and
+// greater than, if the high compare is true, we can assume the entire compare is true. For
+// compares that are realized in a register, we will generate:
+//
+//    Opcode            x86 equivalent          Comment
+//    ------            --------------          -------
+//    GT_EQ             cmp hiOp1,hiOp2         If any part is not equal, the entire compare
+//                      jne label               is false.
+//                      cmp loOp1,loOp2
+//                      label: sete
+//
+//    GT_NE             cmp hiOp1,hiOp2         If any part is not equal, the entire compare
+//                      jne label               is true.
+//                      cmp loOp1,loOp2
+//                      label: setne
+//
+//    GT_LT; unsigned   cmp hiOp1,hiOp2         If hiOp1 is not equal to hiOp2, the flags are set
+//                      jne label               correctly and we do not need to check lo. Otherwise,
+//                      cmp loOp1,loOp2         we need to compare the lo halves
+//                      label: setb
+//
+//    GT_LE; unsigned   cmp hiOp1,hiOp2         If hiOp1 is not equal to hiOp2, the flags are set
+//                      jne label               correctly and we do not need to check lo. Otherwise,
+//                      cmp loOp1,loOp2         we need to compare the lo halves
+//                      label: setbe
+//
+//    GT_GT; unsigned   cmp hiOp1,hiOp2         If hiOp1 is not equal to hiOp2, the flags are set
+//                      jne label               correctly and we do not need to check lo. Otherwise,
+//                      cmp loOp1,loOp2         we need to compare the lo halves
+//                      label: seta
+//
+//    GT_GE; unsigned   cmp hiOp1,hiOp2         If hiOp1 is not equal to hiOp2, the flags are set
+//                      jne label               correctly and we do not need to check lo. Otherwise,
+//                      cmp loOp1,loOp2         we need to compare the lo halves
+//                      label: setae
+//
+// For signed long comparisons, we need additional labels, as we need to use signed conditions on the
+// "set" instruction:
+//
+//    GT_LT; signed     cmp hiOp1,hiOp2         If hiOp1 is not equal to hiOp2, the flags are set
+//                      jne labelHi             correctly and we do not need to check lo. Otherwise,
+//                      cmp loOp1,loOp2         we need to compare the lo halves
+//                      setb                    Unsigned set for lo compare
+//                      jmp labelFinal
+//                      labelHi: setl           Signed set for high compare
+//                      labelFinal:
+//
+//    GT_LE; signed     cmp hiOp1,hiOp2         If hiOp1 is not equal to hiOp2, the flags are set
+//                      jne labelHi             correctly and we do not need to check lo. Otherwise,
+//                      cmp loOp1,loOp2         we need to compare the lo halves
+//                      setbe                   Unsigend set for lo compare
+//                      jmp labelFinal
+//                      labelHi: setle          Signed set for hi compare
+//                      labelFinal:
+//
+//    GT_GT; signed     cmp hiOp1,hiOp2         If hiOp1 is not equal to hiOp2, the flags are set
+//                      jne labelHi             correctly and we do not need to check lo. Otherwise,
+//                      cmp loOp1,loOp2         we need to compare the lo halves
+//                      seta                    Unsigned set for lo compare
+//                      jmp labelFinal
+//                      labelHi: setg           Signed set for high compare
+//                      labelFinal
+//
+//    GT_GE; signed     cmp hiOp1,hiOp2         If hiOp1 is not equal to hiOp2, the flags are set
+//                      jne labelHi             correctly and we do not need to check lo. Otherwise,
+//                      cmp loOp1,loOp2         we need to compare the lo halves
+//                      setae                   Unsigned set for lo compare
+//                      jmp labelFinal
+//                      labelHi: setge          Signed set for hi compare
+//                      labelFinal:
+//
+// TODO-X86-CQ: Check if hi or lo parts of op2 are 0 and change the compare to a test.
+void CodeGen::genCompareLong(GenTreePtr treeNode)
+{
+    assert(treeNode->OperIsCompare());
+
+    GenTreeOp* tree = treeNode->AsOp();
+    GenTreePtr op1  = tree->gtOp1;
+    GenTreePtr op2  = tree->gtOp2;
+
+    assert(varTypeIsLong(op1->TypeGet()));
+    assert(varTypeIsLong(op2->TypeGet()));
+
+    regNumber targetReg = treeNode->gtRegNum;
+
+    genConsumeOperands(tree);
+
+    assert(targetReg != REG_NA);
+
+    GenTreePtr loOp1 = op1->gtGetOp1();
+    GenTreePtr hiOp1 = op1->gtGetOp2();
+    GenTreePtr loOp2 = op2->gtGetOp1();
+    GenTreePtr hiOp2 = op2->gtGetOp2();
+
+    // Create compare for the high parts
+    instruction ins     = INS_cmp;
+    var_types   cmpType = TYP_INT;
+    emitAttr    cmpAttr = emitTypeSize(cmpType);
+
+    // Emit the compare instruction
+    getEmitter()->emitInsBinary(ins, cmpAttr, hiOp1, hiOp2);
+
+    // Generate the first jump for the high compare
+    CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
+
+    BasicBlock* labelHi    = genCreateTempLabel();
+    BasicBlock* labelFinal = genCreateTempLabel();
+
+    if (compareKind == CK_SIGNED && (tree->gtOper != GT_NE && tree->gtOper != GT_EQ))
+    {
+        // If we are doing a signed comparison, we need to do a signed set if the high compare is true,
+        // but an unsigned set if we fall through to the low compare. If we have a GT_NE or GT_EQ, we do not
+        // need to worry about the sign of the comparison, so we can use the simplified case.
+
+        // We only have to check for equality for the hi comparison. If they are not equal, then the set will
+        // do the right thing. If they are equal, we have to check the lo halves.
+        inst_JMP(EJ_jne, labelHi);
+
+        // Emit the comparison. Perform the set for the lo. Jump to labelFinal
+        getEmitter()->emitInsBinary(ins, cmpAttr, loOp1, loOp2);
+
+        // The low set must be unsigned
+        emitJumpKind jumpKindLo = genJumpKindForOper(tree->gtOper, CK_UNSIGNED);
+
+        inst_SET(jumpKindLo, targetReg);
+        // Set the higher bytes to 0
+        inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
+        genProduceReg(tree);
+
+        inst_JMP(EJ_jmp, labelFinal);
+
+        // Define the label for hi jump target here. If we have jumped here, we want to set
+        // the target register based on the jump kind of the actual compare type.
+
+        genDefineTempLabel(labelHi);
+        inst_SET(genJumpKindForOper(tree->gtOper, compareKind), targetReg);
+
+        // Set the higher bytes to 0
+        inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
+        genProduceReg(tree);
+
+        genDefineTempLabel(labelFinal);
+    }
+    else
+    {
+        // If the compare is unsigned, or if the sign doesn't change the set instruction, we can use
+        // the same set logic for both the hi and lo compare, so we don't need to jump to a high label,
+        // we can just jump to the set that the lo compare will use.
+
+        // We only have to check for equality for the hi comparison. If they are not equal, then the set will
+        // do the right thing. If they are equal, we have to check the lo halves.
+        inst_JMP(EJ_jne, labelFinal);
+
+        // Emit the comparison
+        getEmitter()->emitInsBinary(ins, cmpAttr, loOp1, loOp2);
+
+        // Define the label for hi jump target here. If we have jumped here, we want to set
+        // the target register based on the jump kind of the lower half (the actual compare
+        // type). If we have fallen through, then we are doing a normal int compare for the
+        // lower parts
+
+        genDefineTempLabel(labelFinal);
+
+        // The low set must be unsigned
+        emitJumpKind jumpKindLo = genJumpKindForOper(tree->gtOper, CK_UNSIGNED);
+
+        inst_SET(jumpKindLo, targetReg);
+        // Set the higher bytes to 0
+        inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
+        genProduceReg(tree);
+    }
+}
+
+//------------------------------------------------------------------------
+// genJTrueLong: Generate code for comparing two longs on x86 for the case where the result
+// is not manifested in a register.
+//
+// Arguments:
+//    treeNode - the compare tree
+//
+// Return Value:
+//    None.
+// Comments:
+// For long compares, we need to compare the high parts of operands first, then the low parts.
+// We only have to do the low compare if the high parts of the operands are equal.
+//
+// In the case where the result of a rel-op is not realized in a register, we generate:
+//
+//    Opcode            x86 equivalent          Comment
+//    ------            --------------          -------
+//
+//    GT_LT; unsigned   cmp hiOp1,hiOp2
+//                      jb  trueLabel
+//                      ja  falseLabel
+//                      cmp loOp1,loOp2
+//                      jb  trueLabel
+//                      falseLabel:
+//
+//    GT_LE; unsigned   cmp hiOp1,hiOp2
+//                      jb  trueLabel
+//                      ja  falseLabel
+//                      cmp loOp1,loOp2
+//                      jbe trueLabel
+//                      falseLabel:
+//
+//    GT_GT; unsigned   cmp hiOp1,hiOp2
+//                      ja  trueLabel
+//                      jb  falseLabel
+//                      cmp loOp1,loOp2
+//                      ja  trueLabel
+//                      falseLabel:
+//
+//    GT_GE; unsigned   cmp hiOp1,hiOp2
+//                      ja  trueLabel
+//                      jb  falseLabel
+//                      cmp loOp1,loOp2
+//                      jae trueLabel
+//                      falseLabel:
+//
+//    GT_LT; signed     cmp hiOp1,hiOp2
+//                      jl  trueLabel
+//                      jg  falseLabel
+//                      cmp loOp1,loOp2
+//                      jb  trueLabel
+//                      falseLabel:
+//
+//    GT_LE; signed     cmp hiOp1,hiOp2
+//                      jl  trueLabel
+//                      jg  falseLabel
+//                      cmp loOp1,loOp2
+//                      jbe trueLabel
+//                      falseLabel:
+//
+//    GT_GT; signed     cmp hiOp1,hiOp2
+//                      jg  trueLabel
+//                      jl  falseLabel
+//                      cmp loOp1,loOp2
+//                      ja  trueLabel
+//                      falseLabel:
+//
+//    GT_GE; signed     cmp hiOp1,hiOp2
+//                      jg  trueLabel
+//                      jl  falseLabel
+//                      cmp loOp1,loOp2
+//                      jae trueLabel
+//                      falseLabel:
+//
+//    GT_EQ;            cmp hiOp1,hiOp2
+//                      jne falseLabel
+//                      cmp loOp1,loOp2
+//                      je  trueLabel
+//                      falseLabel:
+//
+//    GT_NE;            cmp hiOp1,hiOp2
+//                      jne labelTrue
+//                      cmp loOp1,loOp2
+//                      jne trueLabel
+//                      falseLabel:
+//
+// TODO-X86-CQ: Check if hi or lo parts of op2 are 0 and change the compare to a test.
+void CodeGen::genJTrueLong(GenTreePtr treeNode)
+{
+    assert(treeNode->OperIsCompare());
+
+    GenTreeOp* tree = treeNode->AsOp();
+    GenTreePtr op1  = tree->gtOp1;
+    GenTreePtr op2  = tree->gtOp2;
+
+    assert(varTypeIsLong(op1->TypeGet()));
+    assert(varTypeIsLong(op2->TypeGet()));
+
+    regNumber targetReg = treeNode->gtRegNum;
+
+    assert(targetReg == REG_NA);
+
+    GenTreePtr loOp1 = op1->gtGetOp1();
+    GenTreePtr hiOp1 = op1->gtGetOp2();
+    GenTreePtr loOp2 = op2->gtGetOp1();
+    GenTreePtr hiOp2 = op2->gtGetOp2();
+
+    // Emit the compare instruction
+    getEmitter()->emitInsBinary(INS_cmp, EA_4BYTE, hiOp1, hiOp2);
+
+    // Generate the first jump for the high compare
+    CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
+
+    // TODO-X86-CQ: If the next block is a BBJ_ALWAYS, we can set falseLabel = compiler->compCurBB->bbNext->bbJumpDest.
+    BasicBlock* falseLabel = genCreateTempLabel();
+
+    emitJumpKind jumpKindHi[2];
+
+    // Generate the jumps for the high compare
+    genJumpKindsForTreeLongHi(tree, jumpKindHi);
+
+    BasicBlock* trueLabel = compiler->compCurBB->bbJumpDest;
+
+    if (jumpKindHi[0] != EJ_NONE)
+    {
+        inst_JMP(jumpKindHi[0], trueLabel);
+    }
+
+    if (jumpKindHi[1] != EJ_NONE)
+    {
+        inst_JMP(jumpKindHi[1], falseLabel);
+    }
+
+    // The low jump must be unsigned
+    emitJumpKind jumpKindLo = genJumpKindForOper(tree->gtOper, CK_UNSIGNED);
+
+    // Emit the comparison and the jump to the trueLabel
+    getEmitter()->emitInsBinary(INS_cmp, EA_4BYTE, loOp1, loOp2);
+
+    inst_JMP(jumpKindLo, trueLabel);
+
+    // Generate falseLabel, which is the false path. We will jump here if the high compare is false
+    // or fall through if the low compare is false.
+    genDefineTempLabel(falseLabel);
+}
+#endif //! defined(_TARGET_64BIT_)
+
+//------------------------------------------------------------------------
+// genCompareFloat: Generate code for comparing two floating point values
+//
+// Arguments:
+//    treeNode - the compare tree
+//
+// Return Value:
+//    None.
+// Comments:
+// SSE2 instruction ucomis[s|d] is performs unordered comparison and
+// updates rFLAGS register as follows.
+//        Result of compare         ZF  PF CF
+//        -----------------        ------------
+//        Unordered                 1   1   1     <-- this result implies one of operands of compare is a NAN.
+//        Greater                   0   0   0
+//        Less Than                 0   0   1
+//        Equal                     1   0   0
+//
+// From the above table the following equalities follow. As per ECMA spec *.UN opcodes perform
+// unordered comparison of floating point values.  That is *.UN comparisons result in true when
+// one of the operands is a NaN whereas ordered comparisons results in false.
+//
+//    Opcode          Amd64 equivalent         Comment
+//    ------          -----------------        --------
+//    BLT.UN(a,b)      ucomis[s|d] a, b        Jb branches if CF=1, which means either a<b or unordered from the above
+//                     jb                      table
+//
+//    BLT(a,b)         ucomis[s|d] b, a        Ja branches if CF=0 and ZF=0, which means b>a that in turn implies a<b
+//                     ja
+//
+//    BGT.UN(a,b)      ucomis[s|d] b, a        branch if b<a or unordered ==> branch if a>b or unordered
+//                     jb
+//
+//    BGT(a, b)        ucomis[s|d] a, b        branch if a>b
+//                     ja
+//
+//    BLE.UN(a,b)      ucomis[s|d] a, b        jbe branches if CF=1 or ZF=1, which implies a<=b or unordered
+//                     jbe
+//
+//    BLE(a,b)         ucomis[s|d] b, a        jae branches if CF=0, which mean b>=a or a<=b
+//                     jae
+//
+//    BGE.UN(a,b)      ucomis[s|d] b, a        branch if b<=a or unordered ==> branch if a>=b or unordered
+//                     jbe
+//
+//    BGE(a,b)         ucomis[s|d] a, b        branch if a>=b
+//                     jae
+//
+//    BEQ.UN(a,b)      ucomis[s|d] a, b        branch if a==b or unordered.  There is no BEQ.UN opcode in ECMA spec.
+//                     je                      This case is given for completeness, in case if JIT generates such
+//                                             a gentree internally.
+//
+//    BEQ(a,b)         ucomis[s|d] a, b        From the above table, PF=0 and ZF=1 corresponds to a==b.
+//                     jpe L1
+//                     je <true label>
+//                 L1:
+//
+//    BNE(a,b)         ucomis[s|d] a, b        branch if a!=b.  There is no BNE opcode in ECMA spec. This case is
+//                     jne                     given for completeness, in case if JIT generates such a gentree
+//                                             internally.
+//
+//    BNE.UN(a,b)      ucomis[s|d] a, b        From the above table, PF=1 or ZF=0 implies unordered or a!=b
+//                     jpe <true label>
+//                     jne <true label>
+//
+// As we can see from the above equalities that the operands of a compare operator need to be
+// reveresed in case of BLT/CLT, BGT.UN/CGT.UN, BLE/CLE, BGE.UN/CGE.UN.
+void CodeGen::genCompareFloat(GenTreePtr treeNode)
+{
+    assert(treeNode->OperIsCompare());
+
+    GenTreeOp* tree    = treeNode->AsOp();
+    GenTreePtr op1     = tree->gtOp1;
+    GenTreePtr op2     = tree->gtOp2;
+    var_types  op1Type = op1->TypeGet();
+    var_types  op2Type = op2->TypeGet();
+
+    genConsumeOperands(tree);
+
+    assert(varTypeIsFloating(op1Type));
+    assert(op1Type == op2Type);
+
+    regNumber   targetReg = treeNode->gtRegNum;
+    instruction ins;
+    emitAttr    cmpAttr;
+
+    bool reverseOps;
+    if ((tree->gtFlags & GTF_RELOP_NAN_UN) != 0)
+    {
+        // Unordered comparison case
+        reverseOps = (tree->gtOper == GT_GT || tree->gtOper == GT_GE);
+    }
+    else
+    {
+        reverseOps = (tree->gtOper == GT_LT || tree->gtOper == GT_LE);
+    }
+
+    if (reverseOps)
+    {
+        GenTreePtr tmp = op1;
+        op1            = op2;
+        op2            = tmp;
+    }
+
+    ins     = ins_FloatCompare(op1Type);
+    cmpAttr = emitTypeSize(op1Type);
+
+    getEmitter()->emitInsBinary(ins, cmpAttr, op1, op2);
+
+    // Are we evaluating this into a register?
+    if (targetReg != REG_NA)
+    {
+        genSetRegToCond(targetReg, tree);
+        genProduceReg(tree);
+    }
+}
+
+//------------------------------------------------------------------------
+// genCompareInt: Generate code for comparing ints or, on amd64, longs.
+//
+// Arguments:
+//    treeNode - the compare tree
+//
+// Return Value:
+//    None.
+void CodeGen::genCompareInt(GenTreePtr treeNode)
+{
+    assert(treeNode->OperIsCompare());
+
+    GenTreeOp* tree    = treeNode->AsOp();
+    GenTreePtr op1     = tree->gtOp1;
+    GenTreePtr op2     = tree->gtOp2;
+    var_types  op1Type = op1->TypeGet();
+    var_types  op2Type = op2->TypeGet();
+
+    genConsumeOperands(tree);
+
+    instruction ins;
+    emitAttr    cmpAttr;
+
+    regNumber targetReg = treeNode->gtRegNum;
+    assert(!op1->isContainedIntOrIImmed()); // We no longer support swapping op1 and op2 to generate cmp reg, imm
+    assert(!varTypeIsFloating(op2Type));
+
+#ifdef _TARGET_X86_
+    assert(!varTypeIsLong(op1Type) && !varTypeIsLong(op2Type));
+#endif // _TARGET_X86_
+
+    // By default we use an int32 sized cmp instruction
+    //
+    ins               = INS_cmp;
+    var_types cmpType = TYP_INT;
+
+    // In the if/then/else statement below we may change the
+    // 'cmpType' and/or 'ins' to generate a smaller instruction
+
+    // Are we comparing two values that are the same size?
+    //
+    if (genTypeSize(op1Type) == genTypeSize(op2Type))
+    {
+        if (op1Type == op2Type)
+        {
+            // If both types are exactly the same we can use that type
+            cmpType = op1Type;
+        }
+        else if (genTypeSize(op1Type) == 8)
+        {
+            // If we have two different int64 types we need to use a long compare
+            cmpType = TYP_LONG;
+        }
+
+        cmpAttr = emitTypeSize(cmpType);
+    }
+    else // Here we know that (op1Type != op2Type)
+    {
+        // Do we have a short compare against a constant in op2?
+        //
+        // We checked for this case in LowerCmp() and if we can perform a small
+        // compare immediate we labeled this compare with a GTF_RELOP_SMALL
+        // and for unsigned small non-equality compares the GTF_UNSIGNED flag.
+        //
+        if (op2->isContainedIntOrIImmed() && ((tree->gtFlags & GTF_RELOP_SMALL) != 0))
+        {
+            assert(varTypeIsSmall(op1Type));
+            cmpType = op1Type;
+        }
+#ifdef _TARGET_AMD64_
+        else // compare two different sized operands
+        {
+            // For this case we don't want any memory operands, only registers or immediates
+            //
+            assert(!op1->isContainedMemoryOp());
+            assert(!op2->isContainedMemoryOp());
+
+            // Check for the case where one operand is an int64 type
+            // Lower should have placed 32-bit operand in a register
+            // for signed comparisons we will sign extend the 32-bit value in place.
+            //
+            bool op1Is64Bit = (genTypeSize(op1Type) == 8);
+            bool op2Is64Bit = (genTypeSize(op2Type) == 8);
+            if (op1Is64Bit)
+            {
+                cmpType = TYP_LONG;
+                if (!(tree->gtFlags & GTF_UNSIGNED) && !op2Is64Bit)
+                {
+                    assert(op2->gtRegNum != REG_NA);
+                    inst_RV_RV(INS_movsxd, op2->gtRegNum, op2->gtRegNum, op2Type);
+                }
+            }
+            else if (op2Is64Bit)
+            {
+                cmpType = TYP_LONG;
+                if (!(tree->gtFlags & GTF_UNSIGNED) && !op1Is64Bit)
+                {
+                    assert(op1->gtRegNum != REG_NA);
+                }
+            }
+        }
+#endif // _TARGET_AMD64_
+
+        cmpAttr = emitTypeSize(cmpType);
+    }
+
+    // See if we can generate a "test" instruction instead of a "cmp".
+    // For this to generate the correct conditional branch we must have
+    // a compare against zero.
+    //
+    if (op2->IsIntegralConst(0))
+    {
+        if (op1->isContained())
+        {
+            // op1 can be a contained memory op
+            // or the special contained GT_AND that we created in Lowering::LowerCmp()
+            //
+            if ((op1->OperGet() == GT_AND))
+            {
+                noway_assert(op1->gtOp.gtOp2->isContainedIntOrIImmed());
+
+                ins = INS_test;        // we will generate "test andOp1, andOp2CnsVal"
+                op2 = op1->gtOp.gtOp2; // must assign op2 before we overwrite op1
+                op1 = op1->gtOp.gtOp1; // overwrite op1
+
+                if (op1->isContainedMemoryOp())
+                {
+                    // use the size andOp1 if it is a contained memoryop.
+                    cmpAttr = emitTypeSize(op1->TypeGet());
+                }
+                // fallthrough to emit->emitInsBinary(ins, cmpAttr, op1, op2);
+            }
+        }
+        else // op1 is not contained thus it must be in a register
+        {
+            ins = INS_test;
+            op2 = op1; // we will generate "test reg1,reg1"
+            // fallthrough to emit->emitInsBinary(ins, cmpAttr, op1, op2);
+        }
+    }
+
+    getEmitter()->emitInsBinary(ins, cmpAttr, op1, op2);
+
+    // Are we evaluating this into a register?
+    if (targetReg != REG_NA)
+    {
+        genSetRegToCond(targetReg, tree);
+        genProduceReg(tree);
+    }
+}
+
+//-------------------------------------------------------------------------------------------
+// genSetRegToCond:  Set a register 'dstReg' to the appropriate one or zero value
+//                   corresponding to a binary Relational operator result.
+//
+// Arguments:
+//   dstReg          - The target register to set to 1 or 0
+//   tree            - The GenTree Relop node that was used to set the Condition codes
+//
+// Return Value:     none
+//
+// Notes:
+//    A full 64-bit value of either 1 or 0 is setup in the 'dstReg'
+//-------------------------------------------------------------------------------------------
+
+void CodeGen::genSetRegToCond(regNumber dstReg, GenTreePtr tree)
+{
+    noway_assert((genRegMask(dstReg) & RBM_BYTE_REGS) != 0);
+
+    emitJumpKind jumpKind[2];
+    bool         branchToTrueLabel[2];
+    genJumpKindsForTree(tree, jumpKind, branchToTrueLabel);
+
+    if (jumpKind[1] == EJ_NONE)
+    {
+        // Set (lower byte of) reg according to the flags
+        inst_SET(jumpKind[0], dstReg);
+    }
+    else
+    {
+#ifdef DEBUG
+        // jmpKind[1] != EJ_NONE implies BEQ and BEN.UN of floating point values.
+        // These are represented by two conditions.
+        if (tree->gtOper == GT_EQ)
+        {
+            // This must be an ordered comparison.
+            assert((tree->gtFlags & GTF_RELOP_NAN_UN) == 0);
+        }
+        else
+        {
+            // This must be BNE.UN
+            assert((tree->gtOper == GT_NE) && ((tree->gtFlags & GTF_RELOP_NAN_UN) != 0));
+        }
+#endif
+
+        // Here is the sample code generated in each case:
+        // BEQ ==  cmp, jpe <false label>, je <true label>
+        // That is, to materialize comparison reg needs to be set if PF=0 and ZF=1
+        //      setnp reg  // if (PF==0) reg = 1 else reg = 0
+        //      jpe L1     // Jmp if PF==1
+        //      sete reg
+        //  L1:
+        //
+        // BNE.UN == cmp, jpe <true label>, jne <true label>
+        // That is, to materialize the comparison reg needs to be set if either PF=1 or ZF=0;
+        //     setp reg
+        //     jpe L1
+        //     setne reg
+        //  L1:
+
+        // reverse the jmpkind condition before setting dstReg if it is to false label.
+        inst_SET(branchToTrueLabel[0] ? jumpKind[0] : emitter::emitReverseJumpKind(jumpKind[0]), dstReg);
+
+        BasicBlock* label = genCreateTempLabel();
+        inst_JMP(jumpKind[0], label);
+
+        // second branch is always to true label
+        assert(branchToTrueLabel[1]);
+        inst_SET(jumpKind[1], dstReg);
+        genDefineTempLabel(label);
+    }
+
+    var_types treeType = tree->TypeGet();
+    if (treeType == TYP_INT || treeType == TYP_LONG)
+    {
+        // Set the higher bytes to 0
+        inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), dstReg, dstReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
+    }
+    else
+    {
+        noway_assert(treeType == TYP_BYTE);
+    }
+}
+
+//------------------------------------------------------------------------
+// genIntToIntCast: Generate code for an integer cast
+//    This method handles integer overflow checking casts
+//    as well as ordinary integer casts.
+//
+// Arguments:
+//    treeNode - The GT_CAST node
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    The treeNode is not a contained node and must have an assigned register.
+//    For a signed convert from byte, the source must be in a byte-addressable register.
+//    Neither the source nor target type can be a floating point type.
+//
+// TODO-XArch-CQ: Allow castOp to be a contained node without an assigned register.
+// TODO: refactor to use getCastDescription
+//
+void CodeGen::genIntToIntCast(GenTreePtr treeNode)
+{
+    assert(treeNode->OperGet() == GT_CAST);
+
+    GenTreePtr castOp        = treeNode->gtCast.CastOp();
+    regNumber  targetReg     = treeNode->gtRegNum;
+    regNumber  sourceReg     = castOp->gtRegNum;
+    var_types  dstType       = treeNode->CastToType();
+    bool       isUnsignedDst = varTypeIsUnsigned(dstType);
+    var_types  srcType       = genActualType(castOp->TypeGet());
+    bool       isUnsignedSrc = varTypeIsUnsigned(srcType);
+
+    // if necessary, force the srcType to unsigned when the GT_UNSIGNED flag is set
+    if (!isUnsignedSrc && (treeNode->gtFlags & GTF_UNSIGNED) != 0)
+    {
+        srcType       = genUnsignedType(srcType);
+        isUnsignedSrc = true;
+    }
+
+    bool requiresOverflowCheck = false;
+    bool needAndAfter          = false;
+
+    assert(genIsValidIntReg(targetReg));
+    assert(genIsValidIntReg(sourceReg));
+
+    instruction ins  = INS_invalid;
+    emitAttr    size = EA_UNKNOWN;
+
+    if (genTypeSize(srcType) < genTypeSize(dstType))
+    {
+        // Widening cast
+
+        // Is this an Overflow checking cast?
+        // We only need to handle one case, as the other casts can never overflow.
+        //   cast from TYP_INT to TYP_ULONG
+        //
+        if (treeNode->gtOverflow() && (srcType == TYP_INT) && (dstType == TYP_ULONG))
+        {
+            requiresOverflowCheck = true;
+            size                  = EA_ATTR(genTypeSize(srcType));
+            ins                   = INS_mov;
+        }
+        else
+        {
+            // we need the source size
+            size = EA_ATTR(genTypeSize(srcType));
+            noway_assert(size < EA_PTRSIZE);
+
+            ins = ins_Move_Extend(srcType, castOp->InReg());
+
+            /*
+                Special case: ins_Move_Extend assumes the destination type is no bigger
+                than TYP_INT.  movsx and movzx can already extend all the way to
+                64-bit, and a regular 32-bit mov clears the high 32 bits (like the non-existant movzxd),
+                but for a sign extension from TYP_INT to TYP_LONG, we need to use movsxd opcode.
+            */
+            if (!isUnsignedSrc && !isUnsignedDst && (size == EA_4BYTE) && (genTypeSize(dstType) > EA_4BYTE))
+            {
+#ifdef _TARGET_X86_
+                NYI_X86("Cast to 64 bit for x86/RyuJIT");
+#else  // !_TARGET_X86_
+                ins = INS_movsxd;
+#endif // !_TARGET_X86_
+            }
+
+            /*
+                Special case: for a cast of byte to char we first
+                have to expand the byte (w/ sign extension), then
+                mask off the high bits.
+                Use 'movsx' followed by 'and'
+            */
+            if (!isUnsignedSrc && isUnsignedDst && (genTypeSize(dstType) < EA_4BYTE))
+            {
+                noway_assert(genTypeSize(dstType) == EA_2BYTE && size == EA_1BYTE);
+                needAndAfter = true;
+            }
+        }
+    }
+    else
+    {
+        // Narrowing cast, or sign-changing cast
+        noway_assert(genTypeSize(srcType) >= genTypeSize(dstType));
+
+        // Is this an Overflow checking cast?
+        if (treeNode->gtOverflow())
+        {
+            requiresOverflowCheck = true;
+            size                  = EA_ATTR(genTypeSize(srcType));
+            ins                   = INS_mov;
+        }
+        else
+        {
+            size = EA_ATTR(genTypeSize(dstType));
+            ins  = ins_Move_Extend(dstType, castOp->InReg());
+        }
+    }
+
+    noway_assert(ins != INS_invalid);
+
+    genConsumeReg(castOp);
+
+    if (requiresOverflowCheck)
+    {
+        ssize_t typeMin        = 0;
+        ssize_t typeMax        = 0;
+        ssize_t typeMask       = 0;
+        bool    needScratchReg = false;
+        bool    signCheckOnly  = false;
+
+        /* Do we need to compare the value, or just check masks */
+
+        switch (dstType)
+        {
+            case TYP_BYTE:
+                typeMask = ssize_t((int)0xFFFFFF80);
+                typeMin  = SCHAR_MIN;
+                typeMax  = SCHAR_MAX;
+                break;
+
+            case TYP_UBYTE:
+                typeMask = ssize_t((int)0xFFFFFF00L);
+                break;
+
+            case TYP_SHORT:
+                typeMask = ssize_t((int)0xFFFF8000);
+                typeMin  = SHRT_MIN;
+                typeMax  = SHRT_MAX;
+                break;
+
+            case TYP_CHAR:
+                typeMask = ssize_t((int)0xFFFF0000L);
+                break;
+
+            case TYP_INT:
+                if (srcType == TYP_UINT)
+                {
+                    signCheckOnly = true;
+                }
+                else
+                {
+                    typeMask = 0xFFFFFFFF80000000LL;
+                    typeMin  = INT_MIN;
+                    typeMax  = INT_MAX;
+                }
+                break;
+
+            case TYP_UINT:
+                if (srcType == TYP_INT)
+                {
+                    signCheckOnly = true;
+                }
+                else
+                {
+                    needScratchReg = true;
+                }
+                break;
+
+            case TYP_LONG:
+                noway_assert(srcType == TYP_ULONG);
+                signCheckOnly = true;
+                break;
+
+            case TYP_ULONG:
+                noway_assert((srcType == TYP_LONG) || (srcType == TYP_INT));
+                signCheckOnly = true;
+                break;
+
+            default:
+                NO_WAY("Unknown type");
+                return;
+        }
+
+        if (signCheckOnly)
+        {
+            // We only need to check for a negative value in sourceReg
+            inst_RV_IV(INS_cmp, sourceReg, 0, size);
+            genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW);
+        }
+        else
+        {
+            regNumber tmpReg = REG_NA;
+
+            if (needScratchReg)
+            {
+                // We need an additional temp register
+                // Make sure we have exactly one allocated.
+                assert(treeNode->gtRsvdRegs != RBM_NONE);
+                assert(genCountBits(treeNode->gtRsvdRegs) == 1);
+                tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
+            }
+
+            // When we are converting from unsigned or to unsigned, we
+            // will only have to check for any bits set using 'typeMask'
+            if (isUnsignedSrc || isUnsignedDst)
+            {
+                if (needScratchReg)
+                {
+                    inst_RV_RV(INS_mov, tmpReg, sourceReg, TYP_LONG); // Move the 64-bit value to a writeable temp reg
+                    inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, size, tmpReg, 32); // Shift right by 32 bits
+                    genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW);            // Thow if result shift is non-zero
+                }
+                else
+                {
+                    noway_assert(typeMask != 0);
+                    inst_RV_IV(INS_TEST, sourceReg, typeMask, size);
+                    genJumpToThrowHlpBlk(EJ_jne, SCK_OVERFLOW);
+                }
+            }
+            else
+            {
+                // For a narrowing signed cast
+                //
+                // We must check the value is in a signed range.
+
+                // Compare with the MAX
+
+                noway_assert((typeMin != 0) && (typeMax != 0));
+
+                inst_RV_IV(INS_cmp, sourceReg, typeMax, size);
+                genJumpToThrowHlpBlk(EJ_jg, SCK_OVERFLOW);
+
+                // Compare with the MIN
+
+                inst_RV_IV(INS_cmp, sourceReg, typeMin, size);
+                genJumpToThrowHlpBlk(EJ_jl, SCK_OVERFLOW);
+            }
+        }
+
+        if (targetReg != sourceReg
+#ifdef _TARGET_AMD64_
+            // On amd64, we can hit this path for a same-register
+            // 4-byte to 8-byte widening conversion, and need to
+            // emit the instruction to set the high bits correctly.
+            || (EA_ATTR(genTypeSize(dstType)) == EA_8BYTE && EA_ATTR(genTypeSize(srcType)) == EA_4BYTE)
+#endif // _TARGET_AMD64_
+                )
+            inst_RV_RV(ins, targetReg, sourceReg, srcType, size);
+    }
+    else // non-overflow checking cast
+    {
+        noway_assert(size < EA_PTRSIZE || srcType == dstType);
+
+        // We may have code transformations that result in casts where srcType is the same as dstType.
+        // e.g. Bug 824281, in which a comma is split by the rationalizer, leaving an assignment of a
+        // long constant to a long lclVar.
+        if (srcType == dstType)
+        {
+            ins = INS_mov;
+        }
+        /* Is the value sitting in a non-byte-addressable register? */
+        else if (castOp->InReg() && (size == EA_1BYTE) && !isByteReg(sourceReg))
+        {
+            if (isUnsignedDst)
+            {
+                // for unsigned values we can AND, so it need not be a byte register
+                ins = INS_AND;
+            }
+            else
+            {
+                // Move the value into a byte register
+                noway_assert(!"Signed byte convert from non-byte-addressable register");
+            }
+
+            /* Generate "mov targetReg, castOp->gtReg */
+            if (targetReg != sourceReg)
+            {
+                inst_RV_RV(INS_mov, targetReg, sourceReg, srcType);
+            }
+        }
+
+        if (ins == INS_AND)
+        {
+            noway_assert((needAndAfter == false) && isUnsignedDst);
+
+            /* Generate "and reg, MASK */
+            unsigned fillPattern;
+            if (size == EA_1BYTE)
+            {
+                fillPattern = 0xff;
+            }
+            else if (size == EA_2BYTE)
+            {
+                fillPattern = 0xffff;
+            }
+            else
+            {
+                fillPattern = 0xffffffff;
+            }
+
+            inst_RV_IV(INS_AND, targetReg, fillPattern, EA_4BYTE);
+        }
+#ifdef _TARGET_AMD64_
+        else if (ins == INS_movsxd)
+        {
+            noway_assert(!needAndAfter);
+            inst_RV_RV(ins, targetReg, sourceReg, srcType, size);
+        }
+#endif // _TARGET_AMD64_
+        else if (ins == INS_mov)
+        {
+            noway_assert(!needAndAfter);
+            if (targetReg != sourceReg
+#ifdef _TARGET_AMD64_
+                // On amd64, 'mov' is the opcode used to zero-extend from
+                // 4 bytes to 8 bytes.
+                || (EA_ATTR(genTypeSize(dstType)) == EA_8BYTE && EA_ATTR(genTypeSize(srcType)) == EA_4BYTE)
+#endif // _TARGET_AMD64_
+                    )
+            {
+                inst_RV_RV(ins, targetReg, sourceReg, srcType, size);
+            }
+        }
+        else
+        {
+            noway_assert(ins == INS_movsx || ins == INS_movzx);
+
+            /* Generate "mov targetReg, castOp->gtReg */
+            inst_RV_RV(ins, targetReg, sourceReg, srcType, size);
+
+            /* Mask off high bits for cast from byte to char */
+            if (needAndAfter)
+            {
+                noway_assert(genTypeSize(dstType) == 2 && ins == INS_movsx);
+                inst_RV_IV(INS_AND, targetReg, 0xFFFF, EA_4BYTE);
+            }
+        }
+    }
+
+    genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genFloatToFloatCast: Generate code for a cast between float and double
+//
+// Arguments:
+//    treeNode - The GT_CAST node
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    Cast is a non-overflow conversion.
+//    The treeNode must have an assigned register.
+//    The cast is between float and double or vice versa.
+//
+void CodeGen::genFloatToFloatCast(GenTreePtr treeNode)
+{
+    // float <--> double conversions are always non-overflow ones
+    assert(treeNode->OperGet() == GT_CAST);
+    assert(!treeNode->gtOverflow());
+
+    regNumber targetReg = treeNode->gtRegNum;
+    assert(genIsValidFloatReg(targetReg));
+
+    GenTreePtr op1 = treeNode->gtOp.gtOp1;
+#ifdef DEBUG
+    // If not contained, must be a valid float reg.
+    if (!op1->isContained())
+    {
+        assert(genIsValidFloatReg(op1->gtRegNum));
+    }
+#endif
+
+    var_types dstType = treeNode->CastToType();
+    var_types srcType = op1->TypeGet();
+    assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
+
+    genConsumeOperands(treeNode->AsOp());
+    if (srcType == dstType && targetReg == op1->gtRegNum)
+    {
+        // source and destinations types are the same and also reside in the same register.
+        // we just need to consume and produce the reg in this case.
+        ;
+    }
+    else
+    {
+        instruction ins = ins_FloatConv(dstType, srcType);
+        getEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1);
+    }
+
+    genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genIntToFloatCast: Generate code to cast an int/long to float/double
+//
+// Arguments:
+//    treeNode - The GT_CAST node
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    Cast is a non-overflow conversion.
+//    The treeNode must have an assigned register.
+//    SrcType= int32/uint32/int64/uint64 and DstType=float/double.
+//
+void CodeGen::genIntToFloatCast(GenTreePtr treeNode)
+{
+    // int type --> float/double conversions are always non-overflow ones
+    assert(treeNode->OperGet() == GT_CAST);
+    assert(!treeNode->gtOverflow());
+
+    regNumber targetReg = treeNode->gtRegNum;
+    assert(genIsValidFloatReg(targetReg));
+
+    GenTreePtr op1 = treeNode->gtOp.gtOp1;
+#ifdef DEBUG
+    if (!op1->isContained())
+    {
+        assert(genIsValidIntReg(op1->gtRegNum));
+    }
+#endif
+
+    var_types dstType = treeNode->CastToType();
+    var_types srcType = op1->TypeGet();
+    assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
+
+#if !defined(_TARGET_64BIT_)
+    NYI_IF(varTypeIsLong(srcType), "Conversion from long to float");
+#endif // !defined(_TARGET_64BIT_)
+
+    // Since xarch emitter doesn't handle reporting gc-info correctly while casting away gc-ness we
+    // ensure srcType of a cast is non gc-type.  Codegen should never see BYREF as source type except
+    // for GT_LCL_VAR_ADDR and GT_LCL_FLD_ADDR that represent stack addresses and can be considered
+    // as TYP_I_IMPL. In all other cases where src operand is a gc-type and not known to be on stack,
+    // Front-end (see fgMorphCast()) ensures this by assigning gc-type local to a non gc-type
+    // temp and using temp as operand of cast operation.
+    if (srcType == TYP_BYREF)
+    {
+        noway_assert(op1->OperGet() == GT_LCL_VAR_ADDR || op1->OperGet() == GT_LCL_FLD_ADDR);
+        srcType = TYP_I_IMPL;
+    }
+
+    // force the srcType to unsigned if GT_UNSIGNED flag is set
+    if (treeNode->gtFlags & GTF_UNSIGNED)
+    {
+        srcType = genUnsignedType(srcType);
+    }
+
+    noway_assert(!varTypeIsGC(srcType));
+
+    // We should never be seeing srcType whose size is not sizeof(int) nor sizeof(long).
+    // For conversions from byte/sbyte/int16/uint16 to float/double, we would expect
+    // either the front-end or lowering phase to have generated two levels of cast.
+    // The first one is for widening smaller int type to int32 and the second one is
+    // to the float/double.
+    emitAttr srcSize = EA_ATTR(genTypeSize(srcType));
+    noway_assert((srcSize == EA_ATTR(genTypeSize(TYP_INT))) || (srcSize == EA_ATTR(genTypeSize(TYP_LONG))));
+
+    // Also we don't expect to see uint32 -> float/double and uint64 -> float conversions
+    // here since they should have been lowered apropriately.
+    noway_assert(srcType != TYP_UINT);
+    noway_assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT));
+
+    // To convert int to a float/double, cvtsi2ss/sd SSE2 instruction is used
+    // which does a partial write to lower 4/8 bytes of xmm register keeping the other
+    // upper bytes unmodified.  If "cvtsi2ss/sd xmmReg, r32/r64" occurs inside a loop,
+    // the partial write could introduce a false dependency and could cause a stall
+    // if there are further uses of xmmReg. We have such a case occuring with a
+    // customer reported version of SpectralNorm benchmark, resulting in 2x perf
+    // regression.  To avoid false dependency, we emit "xorps xmmReg, xmmReg" before
+    // cvtsi2ss/sd instruction.
+
+    genConsumeOperands(treeNode->AsOp());
+    getEmitter()->emitIns_R_R(INS_xorps, EA_4BYTE, treeNode->gtRegNum, treeNode->gtRegNum);
+
+    // Note that here we need to specify srcType that will determine
+    // the size of source reg/mem operand and rex.w prefix.
+    instruction ins = ins_FloatConv(dstType, TYP_INT);
+    getEmitter()->emitInsBinary(ins, emitTypeSize(srcType), treeNode, op1);
+
+    // Handle the case of srcType = TYP_ULONG. SSE2 conversion instruction
+    // will interpret ULONG value as LONG.  Hence we need to adjust the
+    // result if sign-bit of srcType is set.
+    if (srcType == TYP_ULONG)
+    {
+        // The instruction sequence below is less accurate than what clang
+        // and gcc generate. However, we keep the current sequence for backward compatiblity.
+        // If we change the instructions below, FloatingPointUtils::convertUInt64ToDobule
+        // should be also updated for consistent conversion result.
+        assert(dstType == TYP_DOUBLE);
+        assert(!op1->isContained());
+
+        // Set the flags without modifying op1.
+        // test op1Reg, op1Reg
+        inst_RV_RV(INS_test, op1->gtRegNum, op1->gtRegNum, srcType);
+
+        // No need to adjust result if op1 >= 0 i.e. positive
+        // Jge label
+        BasicBlock* label = genCreateTempLabel();
+        inst_JMP(EJ_jge, label);
+
+        // Adjust the result
+        // result = result + 0x43f00000 00000000
+        // addsd resultReg,  0x43f00000 00000000
+        GenTreePtr* cns = &u8ToDblBitmask;
+        if (*cns == nullptr)
+        {
+            double d;
+            static_assert_no_msg(sizeof(double) == sizeof(__int64));
+            *((__int64*)&d) = 0x43f0000000000000LL;
+
+            *cns = genMakeConst(&d, dstType, treeNode, true);
+        }
+        inst_RV_TT(INS_addsd, treeNode->gtRegNum, *cns);
+
+        genDefineTempLabel(label);
+    }
+
+    genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genFloatToIntCast: Generate code to cast float/double to int/long
+//
+// Arguments:
+//    treeNode - The GT_CAST node
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    Cast is a non-overflow conversion.
+//    The treeNode must have an assigned register.
+//    SrcType=float/double and DstType= int32/uint32/int64/uint64
+//
+// TODO-XArch-CQ: (Low-pri) - generate in-line code when DstType = uint64
+//
+void CodeGen::genFloatToIntCast(GenTreePtr treeNode)
+{
+    // we don't expect to see overflow detecting float/double --> int type conversions here
+    // as they should have been converted into helper calls by front-end.
+    assert(treeNode->OperGet() == GT_CAST);
+    assert(!treeNode->gtOverflow());
+
+    regNumber targetReg = treeNode->gtRegNum;
+    assert(genIsValidIntReg(targetReg));
+
+    GenTreePtr op1 = treeNode->gtOp.gtOp1;
+#ifdef DEBUG
+    if (!op1->isContained())
+    {
+        assert(genIsValidFloatReg(op1->gtRegNum));
+    }
+#endif
+
+    var_types dstType = treeNode->CastToType();
+    var_types srcType = op1->TypeGet();
+    assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType));
+
+    // We should never be seeing dstType whose size is neither sizeof(TYP_INT) nor sizeof(TYP_LONG).
+    // For conversions to byte/sbyte/int16/uint16 from float/double, we would expect the
+    // front-end or lowering phase to have generated two levels of cast. The first one is
+    // for float or double to int32/uint32 and the second one for narrowing int32/uint32 to
+    // the required smaller int type.
+    emitAttr dstSize = EA_ATTR(genTypeSize(dstType));
+    noway_assert((dstSize == EA_ATTR(genTypeSize(TYP_INT))) || (dstSize == EA_ATTR(genTypeSize(TYP_LONG))));
+
+    // We shouldn't be seeing uint64 here as it should have been converted
+    // into a helper call by either front-end or lowering phase.
+    noway_assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))));
+
+    // If the dstType is TYP_UINT, we have 32-bits to encode the
+    // float number. Any of 33rd or above bits can be the sign bit.
+    // To acheive it we pretend as if we are converting it to a long.
+    if (varTypeIsUnsigned(dstType) && (dstSize == EA_ATTR(genTypeSize(TYP_INT))))
+    {
+        dstType = TYP_LONG;
+    }
+
+    // Note that we need to specify dstType here so that it will determine
+    // the size of destination integer register and also the rex.w prefix.
+    genConsumeOperands(treeNode->AsOp());
+    instruction ins = ins_FloatConv(TYP_INT, srcType);
+    getEmitter()->emitInsBinary(ins, emitTypeSize(dstType), treeNode, op1);
+    genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// genCkfinite: Generate code for ckfinite opcode.
+//
+// Arguments:
+//    treeNode - The GT_CKFINITE node
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    GT_CKFINITE node has reserved an internal register.
+//
+// TODO-XArch-CQ - mark the operand as contained if known to be in
+// memory (e.g. field or an array element).
+//
+void CodeGen::genCkfinite(GenTreePtr treeNode)
+{
+    assert(treeNode->OperGet() == GT_CKFINITE);
+
+    GenTreePtr op1        = treeNode->gtOp.gtOp1;
+    var_types  targetType = treeNode->TypeGet();
+    int        expMask    = (targetType == TYP_FLOAT) ? 0x7F800000 : 0x7FF00000; // Bit mask to extract exponent.
+    regNumber  targetReg  = treeNode->gtRegNum;
+
+    // Extract exponent into a register.
+    assert(treeNode->gtRsvdRegs != RBM_NONE);
+    assert(genCountBits(treeNode->gtRsvdRegs) == 1);
+    regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
+
+    genConsumeReg(op1);
+
+#ifdef _TARGET_64BIT_
+
+    // Copy the floating-point value to an integer register. If we copied a float to a long, then
+    // right-shift the value so the high 32 bits of the floating-point value sit in the low 32
+    // bits of the integer register.
+    instruction ins = ins_CopyFloatToInt(targetType, (targetType == TYP_FLOAT) ? TYP_INT : TYP_LONG);
+    inst_RV_RV(ins, op1->gtRegNum, tmpReg, targetType);
+    if (targetType == TYP_DOUBLE)
+    {
+        // right shift by 32 bits to get to exponent.
+        inst_RV_SH(INS_shr, EA_8BYTE, tmpReg, 32);
+    }
+
+    // Mask exponent with all 1's and check if the exponent is all 1's
+    inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE);
+    inst_RV_IV(INS_cmp, tmpReg, expMask, EA_4BYTE);
+
+    // If exponent is all 1's, throw ArithmeticException
+    genJumpToThrowHlpBlk(EJ_je, SCK_ARITH_EXCPN);
+
+    // if it is a finite value copy it to targetReg
+    if (targetReg != op1->gtRegNum)
+    {
+        inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
+    }
+
+#else // !_TARGET_64BIT_
+
+    // If the target type is TYP_DOUBLE, we want to extract the high 32 bits into the register.
+    // There is no easy way to do this. To not require an extra register, we'll use shuffles
+    // to move the high 32 bits into the low 32 bits, then then shuffle it back, since we
+    // need to produce the value into the target register.
+    //
+    // For TYP_DOUBLE, we'll generate (for targetReg != op1->gtRegNum):
+    //    movaps targetReg, op1->gtRegNum
+    //    shufps targetReg, targetReg, 0xB1	// WZYX => ZWXY
+    //    mov_xmm2i tmpReg, targetReg		// tmpReg <= Y
+    //    and tmpReg, <mask>
+    //    cmp tmpReg, <mask>
+    //    je <throw block>
+    //    movaps targetReg, op1->gtRegNum   // copy the value again, instead of un-shuffling it
+    //
+    // For TYP_DOUBLE with (targetReg == op1->gtRegNum):
+    //    shufps targetReg, targetReg, 0xB1	// WZYX => ZWXY
+    //    mov_xmm2i tmpReg, targetReg		// tmpReg <= Y
+    //    and tmpReg, <mask>
+    //    cmp tmpReg, <mask>
+    //    je <throw block>
+    //    shufps targetReg, targetReg, 0xB1	// ZWXY => WZYX
+    //
+    // For TYP_FLOAT, it's the same as _TARGET_64BIT_:
+    //    mov_xmm2i tmpReg, targetReg		// tmpReg <= low 32 bits
+    //    and tmpReg, <mask>
+    //    cmp tmpReg, <mask>
+    //    je <throw block>
+    //    movaps targetReg, op1->gtRegNum   // only if targetReg != op1->gtRegNum
+
+    regNumber copyToTmpSrcReg; // The register we'll copy to the integer temp.
+
+    if (targetType == TYP_DOUBLE)
+    {
+        if (targetReg != op1->gtRegNum)
+        {
+            inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
+        }
+        inst_RV_RV_IV(INS_shufps, EA_16BYTE, targetReg, targetReg, 0xb1);
+        copyToTmpSrcReg = targetReg;
+    }
+    else
+    {
+        copyToTmpSrcReg = op1->gtRegNum;
+    }
+
+    // Copy only the low 32 bits. This will be the high order 32 bits of the floating-point
+    // value, no matter the floating-point type.
+    inst_RV_RV(ins_CopyFloatToInt(TYP_FLOAT, TYP_INT), copyToTmpSrcReg, tmpReg, TYP_FLOAT);
+
+    // Mask exponent with all 1's and check if the exponent is all 1's
+    inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE);
+    inst_RV_IV(INS_cmp, tmpReg, expMask, EA_4BYTE);
+
+    // If exponent is all 1's, throw ArithmeticException
+    genJumpToThrowHlpBlk(EJ_je, SCK_ARITH_EXCPN);
+
+    if (targetReg != op1->gtRegNum)
+    {
+        // In both the TYP_FLOAT and TYP_DOUBLE case, the op1 register is untouched,
+        // so copy it to the targetReg. This is faster and smaller for TYP_DOUBLE
+        // than re-shuffling the targetReg.
+        inst_RV_RV(ins_Copy(targetType), targetReg, op1->gtRegNum, targetType);
+    }
+    else if (targetType == TYP_DOUBLE)
+    {
+        // We need to re-shuffle the targetReg to get the correct result.
+        inst_RV_RV_IV(INS_shufps, EA_16BYTE, targetReg, targetReg, 0xb1);
+    }
+
+#endif // !_TARGET_64BIT_
+
+    genProduceReg(treeNode);
+}
+
+#ifdef _TARGET_AMD64_
+int CodeGenInterface::genSPtoFPdelta()
+{
+    int delta;
+
+#ifdef PLATFORM_UNIX
+
+    // We require frame chaining on Unix to support native tool unwinding (such as
+    // unwinding by the native debugger). We have a CLR-only extension to the
+    // unwind codes (UWOP_SET_FPREG_LARGE) to support SP->FP offsets larger than 240.
+    // If Unix ever supports EnC, the RSP == RBP assumption will have to be reevaluated.
+    delta = genTotalFrameSize();
+
+#else // !PLATFORM_UNIX
+
+    // As per Amd64 ABI, RBP offset from initial RSP can be between 0 and 240 if
+    // RBP needs to be reported in unwind codes.  This case would arise for methods
+    // with localloc.
+    if (compiler->compLocallocUsed)
+    {
+        // We cannot base delta computation on compLclFrameSize since it changes from
+        // tentative to final frame layout and hence there is a possibility of
+        // under-estimating offset of vars from FP, which in turn results in under-
+        // estimating instruction size.
+        //
+        // To be predictive and so as never to under-estimate offset of vars from FP
+        // we will always position FP at min(240, outgoing arg area size).
+        delta = Min(240, (int)compiler->lvaOutgoingArgSpaceSize);
+    }
+    else if (compiler->opts.compDbgEnC)
+    {
+        // vm assumption on EnC methods is that rsp and rbp are equal
+        delta = 0;
+    }
+    else
+    {
+        delta = genTotalFrameSize();
+    }
+
+#endif // !PLATFORM_UNIX
+
+    return delta;
+}
+
+//---------------------------------------------------------------------
+// genTotalFrameSize - return the total size of the stack frame, including local size,
+// callee-saved register size, etc. For AMD64, this does not include the caller-pushed
+// return address.
+//
+// Return value:
+//    Total frame size
+//
+
+int CodeGenInterface::genTotalFrameSize()
+{
+    assert(!IsUninitialized(compiler->compCalleeRegsPushed));
+
+    int totalFrameSize = compiler->compCalleeRegsPushed * REGSIZE_BYTES + compiler->compLclFrameSize;
+
+    assert(totalFrameSize >= 0);
+    return totalFrameSize;
+}
+
+//---------------------------------------------------------------------
+// genCallerSPtoFPdelta - return the offset from Caller-SP to the frame pointer.
+// This number is going to be negative, since the Caller-SP is at a higher
+// address than the frame pointer.
+//
+// There must be a frame pointer to call this function!
+//
+// We can't compute this directly from the Caller-SP, since the frame pointer
+// is based on a maximum delta from Initial-SP, so first we find SP, then
+// compute the FP offset.
+
+int CodeGenInterface::genCallerSPtoFPdelta()
+{
+    assert(isFramePointerUsed());
+    int callerSPtoFPdelta;
+
+    callerSPtoFPdelta = genCallerSPtoInitialSPdelta() + genSPtoFPdelta();
+
+    assert(callerSPtoFPdelta <= 0);
+    return callerSPtoFPdelta;
+}
+
+//---------------------------------------------------------------------
+// genCallerSPtoInitialSPdelta - return the offset from Caller-SP to Initial SP.
+//
+// This number will be negative.
+
+int CodeGenInterface::genCallerSPtoInitialSPdelta()
+{
+    int callerSPtoSPdelta = 0;
+
+    callerSPtoSPdelta -= genTotalFrameSize();
+    callerSPtoSPdelta -= REGSIZE_BYTES; // caller-pushed return address
+
+    // compCalleeRegsPushed does not account for the frame pointer
+    // TODO-Cleanup: shouldn't this be part of genTotalFrameSize?
+    if (isFramePointerUsed())
+    {
+        callerSPtoSPdelta -= REGSIZE_BYTES;
+    }
+
+    assert(callerSPtoSPdelta <= 0);
+    return callerSPtoSPdelta;
+}
+#endif // _TARGET_AMD64_
+
+//-----------------------------------------------------------------------------------------
+// genSSE2BitwiseOp - generate SSE2 code for the given oper as "Operand BitWiseOp BitMask"
+//
+// Arguments:
+//    treeNode  - tree node
+//
+// Return value:
+//    None
+//
+// Assumptions:
+//     i) tree oper is one of GT_NEG or GT_INTRINSIC Abs()
+//    ii) tree type is floating point type.
+//   iii) caller of this routine needs to call genProduceReg()
+void CodeGen::genSSE2BitwiseOp(GenTreePtr treeNode)
+{
+    regNumber targetReg  = treeNode->gtRegNum;
+    var_types targetType = treeNode->TypeGet();
+    assert(varTypeIsFloating(targetType));
+
+    float       f;
+    double      d;
+    GenTreePtr* bitMask  = nullptr;
+    instruction ins      = INS_invalid;
+    void*       cnsAddr  = nullptr;
+    bool        dblAlign = false;
+
+    switch (treeNode->OperGet())
+    {
+        case GT_NEG:
+            // Neg(x) = flip the sign bit.
+            // Neg(f) = f ^ 0x80000000
+            // Neg(d) = d ^ 0x8000000000000000
+            ins = genGetInsForOper(GT_XOR, targetType);
+            if (targetType == TYP_FLOAT)
+            {
+                bitMask = &negBitmaskFlt;
+
+                static_assert_no_msg(sizeof(float) == sizeof(int));
+                *((int*)&f) = 0x80000000;
+                cnsAddr     = &f;
+            }
+            else
+            {
+                bitMask = &negBitmaskDbl;
+
+                static_assert_no_msg(sizeof(double) == sizeof(__int64));
+                *((__int64*)&d) = 0x8000000000000000LL;
+                cnsAddr         = &d;
+                dblAlign        = true;
+            }
+            break;
+
+        case GT_INTRINSIC:
+            assert(treeNode->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs);
+
+            // Abs(x) = set sign-bit to zero
+            // Abs(f) = f & 0x7fffffff
+            // Abs(d) = d & 0x7fffffffffffffff
+            ins = genGetInsForOper(GT_AND, targetType);
+            if (targetType == TYP_FLOAT)
+            {
+                bitMask = &absBitmaskFlt;
+
+                static_assert_no_msg(sizeof(float) == sizeof(int));
+                *((int*)&f) = 0x7fffffff;
+                cnsAddr     = &f;
+            }
+            else
+            {
+                bitMask = &absBitmaskDbl;
+
+                static_assert_no_msg(sizeof(double) == sizeof(__int64));
+                *((__int64*)&d) = 0x7fffffffffffffffLL;
+                cnsAddr         = &d;
+                dblAlign        = true;
+            }
+            break;
+
+        default:
+            assert(!"genSSE2: unsupported oper");
+            unreached();
+            break;
+    }
+
+    if (*bitMask == nullptr)
+    {
+        assert(cnsAddr != nullptr);
+        *bitMask = genMakeConst(cnsAddr, targetType, treeNode, dblAlign);
+    }
+
+    // We need an additional register for bitmask.
+    // Make sure we have one allocated.
+    assert(treeNode->gtRsvdRegs != RBM_NONE);
+    assert(genCountBits(treeNode->gtRsvdRegs) == 1);
+    regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
+
+    // Move operand into targetReg only if the reg reserved for
+    // internal purpose is not the same as targetReg.
+    GenTreePtr op1 = treeNode->gtOp.gtOp1;
+    assert(!op1->isContained());
+    regNumber operandReg = genConsumeReg(op1);
+    if (tmpReg != targetReg)
+    {
+        if (operandReg != targetReg)
+        {
+            inst_RV_RV(ins_Copy(targetType), targetReg, operandReg, targetType);
+        }
+
+        operandReg = tmpReg;
+    }
+
+    inst_RV_TT(ins_Load(targetType, false), tmpReg, *bitMask);
+    assert(ins != INS_invalid);
+    inst_RV_RV(ins, targetReg, operandReg, targetType);
+}
+
+//---------------------------------------------------------------------
+// genIntrinsic - generate code for a given intrinsic
+//
+// Arguments
+//    treeNode - the GT_INTRINSIC node
+//
+// Return value:
+//    None
+//
+void CodeGen::genIntrinsic(GenTreePtr treeNode)
+{
+    // Right now only Sqrt/Abs are treated as math intrinsics.
+    switch (treeNode->gtIntrinsic.gtIntrinsicId)
+    {
+        case CORINFO_INTRINSIC_Sqrt:
+            noway_assert(treeNode->TypeGet() == TYP_DOUBLE);
+            genConsumeOperands(treeNode->AsOp());
+            getEmitter()->emitInsBinary(ins_FloatSqrt(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode,
+                                        treeNode->gtOp.gtOp1);
+            break;
+
+        case CORINFO_INTRINSIC_Abs:
+            genSSE2BitwiseOp(treeNode);
+            break;
+
+        default:
+            assert(!"genIntrinsic: Unsupported intrinsic");
+            unreached();
+    }
+
+    genProduceReg(treeNode);
+}
+
+//-------------------------------------------------------------------------- //
+// getBaseVarForPutArgStk - returns the baseVarNum for passing a stack arg.
+//
+// Arguments
+//    treeNode - the GT_PUTARG_STK node
+//
+// Return value:
+//    The number of the base variable.
+//
+// Note:
+//    If tail call the outgoing args are placed in the caller's incoming arg stack space.
+//    Otherwise, they go in the outgoing arg area on the current frame.
+//
+//    On Windows the caller always creates slots (homing space) in its frame for the
+//    first 4 arguments of a callee (register passed args). So, the baseVarNum is always 0.
+//    For System V systems there is no such calling convention requirement, and the code needs to find
+//    the first stack passed argument from the caller. This is done by iterating over
+//    all the lvParam variables and finding the first with lvArgReg equals to REG_STK.
+//
+unsigned CodeGen::getBaseVarForPutArgStk(GenTreePtr treeNode)
+{
+    assert(treeNode->OperGet() == GT_PUTARG_STK);
+
+    unsigned baseVarNum;
+
+#if FEATURE_FASTTAILCALL
+    bool putInIncomingArgArea = treeNode->AsPutArgStk()->putInIncomingArgArea;
+#else
+    const bool putInIncomingArgArea = false;
+#endif
+
+    // Whether to setup stk arg in incoming or out-going arg area?
+    // Fast tail calls implemented as epilog+jmp = stk arg is setup in incoming arg area.
+    // All other calls - stk arg is setup in out-going arg area.
+    if (putInIncomingArgArea)
+    {
+        // See the note in the function header re: finding the first stack passed argument.
+        baseVarNum = getFirstArgWithStackSlot();
+        assert(baseVarNum != BAD_VAR_NUM);
+
+#ifdef DEBUG
+        // This must be a fast tail call.
+        assert(treeNode->AsPutArgStk()->gtCall->AsCall()->IsFastTailCall());
+
+        // Since it is a fast tail call, the existence of first incoming arg is guaranteed
+        // because fast tail call requires that in-coming arg area of caller is >= out-going
+        // arg area required for tail call.
+        LclVarDsc* varDsc = &(compiler->lvaTable[baseVarNum]);
+        assert(varDsc != nullptr);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+        assert(!varDsc->lvIsRegArg && varDsc->lvArgReg == REG_STK);
+#else  // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+        // On Windows this assert is always true. The first argument will always be in REG_ARG_0 or REG_FLTARG_0.
+        assert(varDsc->lvIsRegArg && (varDsc->lvArgReg == REG_ARG_0 || varDsc->lvArgReg == REG_FLTARG_0));
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+#endif // !DEBUG
+    }
+    else
+    {
+#if FEATURE_FIXED_OUT_ARGS
+        baseVarNum = compiler->lvaOutgoingArgSpaceVar;
+#else  // !FEATURE_FIXED_OUT_ARGS
+        NYI_X86("Stack args for x86/RyuJIT");
+        baseVarNum = BAD_VAR_NUM;
+#endif // !FEATURE_FIXED_OUT_ARGS
+    }
+
+    return baseVarNum;
+}
+
+//--------------------------------------------------------------------- //
+// genPutStructArgStk - generate code for passing an arg on the stack.
+//
+// Arguments
+//    treeNode      - the GT_PUTARG_STK node
+//    targetType    - the type of the treeNode
+//
+// Return value:
+//    None
+//
+void CodeGen::genPutArgStk(GenTreePtr treeNode)
+{
+    var_types targetType = treeNode->TypeGet();
+#ifdef _TARGET_X86_
+    noway_assert(targetType != TYP_STRUCT);
+
+    // The following logic is applicable for x86 arch.
+    assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
+
+    GenTreePtr data = treeNode->gtOp.gtOp1;
+
+    // On a 32-bit target, all of the long arguments have been decomposed into
+    // a separate putarg_stk for each of the upper and lower halves.
+    noway_assert(targetType != TYP_LONG);
+
+    int argSize = genTypeSize(genActualType(targetType));
+    genStackLevel += argSize;
+
+    // TODO-Cleanup: Handle this in emitInsMov() in emitXArch.cpp?
+    if (data->isContainedIntOrIImmed())
+    {
+        if (data->IsIconHandle())
+        {
+            inst_IV_handle(INS_push, data->gtIntCon.gtIconVal);
+        }
+        else
+        {
+            inst_IV(INS_push, data->gtIntCon.gtIconVal);
+        }
+    }
+    else if (data->isContained())
+    {
+        NYI_X86("Contained putarg_stk of non-constant");
+    }
+    else
+    {
+        genConsumeReg(data);
+        if (varTypeIsIntegralOrI(targetType))
+        {
+            inst_RV(INS_push, data->gtRegNum, targetType);
+        }
+        else
+        {
+            // Decrement SP.
+            inst_RV_IV(INS_sub, REG_SPBASE, argSize, emitActualTypeSize(TYP_I_IMPL));
+            getEmitter()->emitIns_AR_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, REG_SPBASE, 0);
+        }
+    }
+#else // !_TARGET_X86_
+    {
+        unsigned baseVarNum = getBaseVarForPutArgStk(treeNode);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+        if (varTypeIsStruct(targetType))
+        {
+            genPutStructArgStk(treeNode, baseVarNum);
+            return;
+        }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+        noway_assert(targetType != TYP_STRUCT);
+        assert(!varTypeIsFloating(targetType) || (targetType == treeNode->gtGetOp1()->TypeGet()));
+
+        // Get argument offset on stack.
+        // Here we cross check that argument offset hasn't changed from lowering to codegen since
+        // we are storing arg slot number in GT_PUTARG_STK node in lowering phase.
+        int              argOffset      = treeNode->AsPutArgStk()->getArgOffset();
+
+#ifdef DEBUG
+        fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(treeNode->AsPutArgStk()->gtCall, treeNode);
+        assert(curArgTabEntry);
+        assert(argOffset == (int)curArgTabEntry->slotNum * TARGET_POINTER_SIZE);
+#endif
+
+        GenTreePtr data = treeNode->gtGetOp1();
+
+        if (data->isContained())
+        {
+            getEmitter()->emitIns_S_I(ins_Store(targetType), emitTypeSize(targetType), baseVarNum, argOffset,
+                                      (int)data->AsIntConCommon()->IconValue());
+        }
+        else
+        {
+            genConsumeReg(data);
+            getEmitter()->emitIns_S_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, baseVarNum,
+                                      argOffset);
+        }
+    }
+#endif // !_TARGET_X86_
+}
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+//---------------------------------------------------------------------
+// genPutStructArgStk - generate code for copying a struct arg on the stack by value.
+//                In case there are references to heap object in the struct,
+//                it generates the gcinfo as well.
+//
+// Arguments
+//    treeNode      - the GT_PUTARG_STK node
+//    baseVarNum    - the variable number relative to which to put the argument on the stack.
+//                    For tail calls this is the baseVarNum = 0.
+//                    For non tail calls this is the outgoingArgSpace.
+//
+// Return value:
+//    None
+//
+void CodeGen::genPutStructArgStk(GenTreePtr treeNode, unsigned baseVarNum)
+{
+    assert(treeNode->OperGet() == GT_PUTARG_STK);
+    assert(baseVarNum != BAD_VAR_NUM);
+
+    var_types targetType = treeNode->TypeGet();
+
+    if (varTypeIsSIMD(targetType))
+    {
+        regNumber srcReg = genConsumeReg(treeNode->gtGetOp1());
+        assert((srcReg != REG_NA) && (genIsValidFloatReg(srcReg)));
+        getEmitter()->emitIns_S_R(ins_Store(targetType), emitTypeSize(targetType), srcReg, baseVarNum,
+                                  treeNode->AsPutArgStk()->getArgOffset());
+        return;
+    }
+
+    assert(targetType == TYP_STRUCT);
+
+    GenTreePutArgStk* putArgStk = treeNode->AsPutArgStk();
+    if (putArgStk->gtNumberReferenceSlots == 0)
+    {
+        switch (putArgStk->gtPutArgStkKind)
+        {
+            case GenTreePutArgStk::PutArgStkKindRepInstr:
+                genStructPutArgRepMovs(putArgStk, baseVarNum);
+                break;
+            case GenTreePutArgStk::PutArgStkKindUnroll:
+                genStructPutArgUnroll(putArgStk, baseVarNum);
+                break;
+            default:
+                unreached();
+        }
+    }
+    else
+    {
+        // No need to disable GC the way COPYOBJ does. Here the refs are copied in atomic operations always.
+
+        // Consume these registers.
+        // They may now contain gc pointers (depending on their type; gcMarkRegPtrVal will "do the right thing").
+        genConsumePutStructArgStk(putArgStk, REG_RDI, REG_RSI, REG_NA, baseVarNum);
+        GenTreePtr dstAddr = putArgStk;
+        GenTreePtr src     = putArgStk->gtOp.gtOp1;
+        assert(src->OperGet() == GT_OBJ);
+        GenTreePtr srcAddr = src->gtGetOp1();
+
+        unsigned slots = putArgStk->gtNumSlots;
+
+        // We are always on the stack we don't need to use the write barrier.
+        BYTE*    gcPtrs     = putArgStk->gtGcPtrs;
+        unsigned gcPtrCount = putArgStk->gtNumberReferenceSlots;
+
+        unsigned i           = 0;
+        unsigned copiedSlots = 0;
+        while (i < slots)
+        {
+            switch (gcPtrs[i])
+            {
+                case TYPE_GC_NONE:
+                    // Let's see if we can use rep movsq instead of a sequence of movsq instructions
+                    // to save cycles and code size.
+                    {
+                        unsigned nonGcSlotCount = 0;
+
+                        do
+                        {
+                            nonGcSlotCount++;
+                            i++;
+                        } while (i < slots && gcPtrs[i] == TYPE_GC_NONE);
+
+                        // If we have a very small contiguous non-gc region, it's better just to
+                        // emit a sequence of movsq instructions
+                        if (nonGcSlotCount < CPOBJ_NONGC_SLOTS_LIMIT)
+                        {
+                            copiedSlots += nonGcSlotCount;
+                            while (nonGcSlotCount > 0)
+                            {
+                                instGen(INS_movsq);
+                                nonGcSlotCount--;
+                            }
+                        }
+                        else
+                        {
+                            getEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, REG_RCX, nonGcSlotCount);
+                            copiedSlots += nonGcSlotCount;
+                            instGen(INS_r_movsq);
+                        }
+                    }
+                    break;
+
+                case TYPE_GC_REF:   // Is an object ref
+                case TYPE_GC_BYREF: // Is an interior pointer - promote it but don't scan it
+                {
+                    // We have a GC (byref or ref) pointer
+                    // TODO-Amd64-Unix: Here a better solution (for code size and CQ) would be to use movsq instruction,
+                    // but the logic for emitting a GC info record is not available (it is internal for the emitter
+                    // only.) See emitGCVarLiveUpd function. If we could call it separately, we could do
+                    // instGen(INS_movsq); and emission of gc info.
+
+                    var_types memType;
+                    if (gcPtrs[i] == TYPE_GC_REF)
+                    {
+                        memType = TYP_REF;
+                    }
+                    else
+                    {
+                        assert(gcPtrs[i] == TYPE_GC_BYREF);
+                        memType = TYP_BYREF;
+                    }
+
+                    getEmitter()->emitIns_R_AR(ins_Load(memType), emitTypeSize(memType), REG_RCX, REG_RSI, 0);
+                    getEmitter()->emitIns_S_R(ins_Store(memType), emitTypeSize(memType), REG_RCX, baseVarNum,
+                                              ((copiedSlots + putArgStk->gtSlotNum) * TARGET_POINTER_SIZE));
+
+                    // Source for the copy operation.
+                    // If a LocalAddr, use EA_PTRSIZE - copy from stack.
+                    // If not a LocalAddr, use EA_BYREF - the source location is not on the stack.
+                    getEmitter()->emitIns_R_I(INS_add, ((src->OperIsLocalAddr()) ? EA_PTRSIZE : EA_BYREF), REG_RSI,
+                                              TARGET_POINTER_SIZE);
+
+                    // Always copying to the stack - outgoing arg area
+                    // (or the outgoing arg area of the caller for a tail call) - use EA_PTRSIZE.
+                    getEmitter()->emitIns_R_I(INS_add, EA_PTRSIZE, REG_RDI, TARGET_POINTER_SIZE);
+                    copiedSlots++;
+                    gcPtrCount--;
+                    i++;
+                }
+                break;
+
+                default:
+                    unreached();
+                    break;
+            }
+        }
+
+        assert(gcPtrCount == 0);
+    }
+}
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+/*****************************************************************************
+ *
+ *  Create and record GC Info for the function.
+ */
+#ifdef _TARGET_AMD64_
+void
+#else  // !_TARGET_AMD64_
+void*
+#endif // !_TARGET_AMD64_
+CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr))
+{
+#ifdef JIT32_GCENCODER
+    return genCreateAndStoreGCInfoJIT32(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
+#else  // !JIT32_GCENCODER
+    genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr));
+#endif // !JIT32_GCENCODER
+}
+
+#ifdef JIT32_GCENCODER
+void* CodeGen::genCreateAndStoreGCInfoJIT32(unsigned codeSize,
+                                            unsigned prologSize,
+                                            unsigned epilogSize DEBUGARG(void* codePtr))
+{
+    BYTE    headerBuf[64];
+    InfoHdr header;
+
+    int s_cached;
+#ifdef DEBUG
+    size_t headerSize =
+#endif
+        compiler->compInfoBlkSize =
+            gcInfo.gcInfoBlockHdrSave(headerBuf, 0, codeSize, prologSize, epilogSize, &header, &s_cached);
+
+    size_t argTabOffset = 0;
+    size_t ptrMapSize   = gcInfo.gcPtrTableSize(header, codeSize, &argTabOffset);
+
+#if DISPLAY_SIZES
+
+    if (genInterruptible)
+    {
+        gcHeaderISize += compiler->compInfoBlkSize;
+        gcPtrMapISize += ptrMapSize;
+    }
+    else
+    {
+        gcHeaderNSize += compiler->compInfoBlkSize;
+        gcPtrMapNSize += ptrMapSize;
+    }
+
+#endif // DISPLAY_SIZES
+
+    compiler->compInfoBlkSize += ptrMapSize;
+
+    /* Allocate the info block for the method */
+
+    compiler->compInfoBlkAddr = (BYTE*)compiler->info.compCompHnd->allocGCInfo(compiler->compInfoBlkSize);
+
+#if 0 // VERBOSE_SIZES
+    // TODO-X86-Cleanup: 'dataSize', below, is not defined
+
+//  if  (compiler->compInfoBlkSize > codeSize && compiler->compInfoBlkSize > 100)
+    {
+        printf("[%7u VM, %7u+%7u/%7u x86 %03u/%03u%%] %s.%s\n",
+               compiler->info.compILCodeSize,
+               compiler->compInfoBlkSize,
+               codeSize + dataSize,
+               codeSize + dataSize - prologSize - epilogSize,
+               100 * (codeSize + dataSize) / compiler->info.compILCodeSize,
+               100 * (codeSize + dataSize + compiler->compInfoBlkSize) / compiler->info.compILCodeSize,
+               compiler->info.compClassName,
+               compiler->info.compMethodName);
+}
+
+#endif
+
+    /* Fill in the info block and return it to the caller */
+
+    void* infoPtr = compiler->compInfoBlkAddr;
+
+    /* Create the method info block: header followed by GC tracking tables */
+
+    compiler->compInfoBlkAddr +=
+        gcInfo.gcInfoBlockHdrSave(compiler->compInfoBlkAddr, -1, codeSize, prologSize, epilogSize, &header, &s_cached);
+
+    assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize);
+    compiler->compInfoBlkAddr = gcInfo.gcPtrTableSave(compiler->compInfoBlkAddr, header, codeSize, &argTabOffset);
+    assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + headerSize + ptrMapSize);
+
+#ifdef DEBUG
+
+    if (0)
+    {
+        BYTE*    temp = (BYTE*)infoPtr;
+        unsigned size = compiler->compInfoBlkAddr - temp;
+        BYTE*    ptab = temp + headerSize;
+
+        noway_assert(size == headerSize + ptrMapSize);
+
+        printf("Method info block - header [%u bytes]:", headerSize);
+
+        for (unsigned i = 0; i < size; i++)
+        {
+            if (temp == ptab)
+            {
+                printf("\nMethod info block - ptrtab [%u bytes]:", ptrMapSize);
+                printf("\n    %04X: %*c", i & ~0xF, 3 * (i & 0xF), ' ');
+            }
+            else
+            {
+                if (!(i % 16))
+                    printf("\n    %04X: ", i);
+            }
+
+            printf("%02X ", *temp++);
+        }
+
+        printf("\n");
+    }
+
+#endif // DEBUG
+
+#if DUMP_GC_TABLES
+
+    if (compiler->opts.dspGCtbls)
+    {
+        const BYTE* base = (BYTE*)infoPtr;
+        unsigned    size;
+        unsigned    methodSize;
+        InfoHdr     dumpHeader;
+
+        printf("GC Info for method %s\n", compiler->info.compFullName);
+        printf("GC info size = %3u\n", compiler->compInfoBlkSize);
+
+        size = gcInfo.gcInfoBlockHdrDump(base, &dumpHeader, &methodSize);
+        // printf("size of header encoding is %3u\n", size);
+        printf("\n");
+
+        if (compiler->opts.dspGCtbls)
+        {
+            base += size;
+            size = gcInfo.gcDumpPtrTable(base, dumpHeader, methodSize);
+            // printf("size of pointer table is %3u\n", size);
+            printf("\n");
+            noway_assert(compiler->compInfoBlkAddr == (base + size));
+        }
+    }
+
+#ifdef DEBUG
+    if (jitOpts.testMask & 128)
+    {
+        for (unsigned offs = 0; offs < codeSize; offs++)
+        {
+            gcInfo.gcFindPtrsInFrame(infoPtr, codePtr, offs);
+        }
+    }
+#endif // DEBUG
+#endif // DUMP_GC_TABLES
+
+    /* Make sure we ended up generating the expected number of bytes */
+
+    noway_assert(compiler->compInfoBlkAddr == (BYTE*)infoPtr + compiler->compInfoBlkSize);
+
+    return infoPtr;
+}
+
+#else // !JIT32_GCENCODER
+void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr))
+{
+    IAllocator*    allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC());
+    GcInfoEncoder* gcInfoEncoder  = new (compiler, CMK_GC)
+        GcInfoEncoder(compiler->info.compCompHnd, compiler->info.compMethodInfo, allowZeroAlloc, NOMEM);
+    assert(gcInfoEncoder);
+
+    // Follow the code pattern of the x86 gc info encoder (genCreateAndStoreGCInfoJIT32).
+    gcInfo.gcInfoBlockHdrSave(gcInfoEncoder, codeSize, prologSize);
+
+    // First we figure out the encoder ID's for the stack slots and registers.
+    gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_ASSIGN_SLOTS);
+    // Now we've requested all the slots we'll need; "finalize" these (make more compact data structures for them).
+    gcInfoEncoder->FinalizeSlotIds();
+    // Now we can actually use those slot ID's to declare live ranges.
+    gcInfo.gcMakeRegPtrTable(gcInfoEncoder, codeSize, prologSize, GCInfo::MAKE_REG_PTR_MODE_DO_WORK);
+
+#if defined(DEBUGGING_SUPPORT)
+    if (compiler->opts.compDbgEnC)
+    {
+        // what we have to preserve is called the "frame header" (see comments in VM\eetwain.cpp)
+        // which is:
+        //  -return address
+        //  -saved off RBP
+        //  -saved 'this' pointer and bool for synchronized methods
+
+        // 4 slots for RBP + return address + RSI + RDI
+        int preservedAreaSize = 4 * REGSIZE_BYTES;
+
+        if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
+        {
+            if (!(compiler->info.compFlags & CORINFO_FLG_STATIC))
+            {
+                preservedAreaSize += REGSIZE_BYTES;
+            }
+
+            // bool in synchronized methods that tracks whether the lock has been taken (takes 4 bytes on stack)
+            preservedAreaSize += 4;
+        }
+
+        // Used to signal both that the method is compiled for EnC, and also the size of the block at the top of the
+        // frame
+        gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize);
+    }
+#endif
+
+    gcInfoEncoder->Build();
+
+    // GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
+    // let's save the values anyway for debugging purposes
+    compiler->compInfoBlkAddr = gcInfoEncoder->Emit();
+    compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface
+}
+#endif // !JIT32_GCENCODER
+
+/*****************************************************************************
+ *  Emit a call to a helper function.
+ *
+ */
+
+void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTargetReg)
+{
+    void* addr  = nullptr;
+    void* pAddr = nullptr;
+
+    emitter::EmitCallType callType = emitter::EC_FUNC_TOKEN;
+    addr                           = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, &pAddr);
+    regNumber callTarget           = REG_NA;
+    regMaskTP killMask             = compiler->compHelperCallKillSet((CorInfoHelpFunc)helper);
+
+    if (!addr)
+    {
+        assert(pAddr != nullptr);
+
+        // Absolute indirect call addr
+        // Note: Order of checks is important. First always check for pc-relative and next
+        // zero-relative.  Because the former encoding is 1-byte smaller than the latter.
+        if (genCodeIndirAddrCanBeEncodedAsPCRelOffset((size_t)pAddr) ||
+            genCodeIndirAddrCanBeEncodedAsZeroRelOffset((size_t)pAddr))
+        {
+            // generate call whose target is specified by 32-bit offset relative to PC or zero.
+            callType = emitter::EC_FUNC_TOKEN_INDIR;
+            addr     = pAddr;
+        }
+        else
+        {
+#ifdef _TARGET_AMD64_
+            // If this indirect address cannot be encoded as 32-bit offset relative to PC or Zero,
+            // load it into REG_HELPER_CALL_TARGET and use register indirect addressing mode to
+            // make the call.
+            //    mov   reg, addr
+            //    call  [reg]
+
+            if (callTargetReg == REG_NA)
+            {
+                // If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but
+                // this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET.
+                callTargetReg            = REG_DEFAULT_HELPER_CALL_TARGET;
+                regMaskTP callTargetMask = genRegMask(callTargetReg);
+                noway_assert((callTargetMask & killMask) == callTargetMask);
+            }
+            else
+            {
+                // The call target must not overwrite any live variable, though it may not be in the
+                // kill set for the call.
+                regMaskTP callTargetMask = genRegMask(callTargetReg);
+                noway_assert((callTargetMask & regSet.rsMaskVars) == RBM_NONE);
+            }
+#endif
+
+            callTarget = callTargetReg;
+            CodeGen::genSetRegToIcon(callTarget, (ssize_t)pAddr, TYP_I_IMPL);
+            callType = emitter::EC_INDIR_ARD;
+        }
+    }
+
+    getEmitter()->emitIns_Call(callType, compiler->eeFindHelper(helper), INDEBUG_LDISASM_COMMA(nullptr) addr, argSize,
+                               retSize FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(EA_UNKNOWN), gcInfo.gcVarPtrSetCur,
+                               gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
+                               BAD_IL_OFFSET, // IL offset
+                               callTarget,    // ireg
+                               REG_NA, 0, 0,  // xreg, xmul, disp
+                               false,         // isJump
+                               emitter::emitNoGChelper(helper));
+
+    regTracker.rsTrashRegSet(killMask);
+    regTracker.rsTrashRegsForGCInterruptability();
+}
+
+#if !defined(_TARGET_64BIT_)
+//-----------------------------------------------------------------------------
+//
+// Code Generation for Long integers
+//
+//-----------------------------------------------------------------------------
+
+//------------------------------------------------------------------------
+// genStoreLongLclVar: Generate code to store a non-enregistered long lclVar
+//
+// Arguments:
+//    treeNode - A TYP_LONG lclVar node.
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    'treeNode' must be a TYP_LONG lclVar node for a lclVar that has NOT been promoted.
+//    Its operand must be a GT_LONG node.
+//
+void CodeGen::genStoreLongLclVar(GenTree* treeNode)
+{
+    emitter* emit = getEmitter();
+
+    GenTreeLclVarCommon* lclNode = treeNode->AsLclVarCommon();
+    unsigned             lclNum  = lclNode->gtLclNum;
+    LclVarDsc*           varDsc  = &(compiler->lvaTable[lclNum]);
+    assert(varDsc->TypeGet() == TYP_LONG);
+    assert(!varDsc->lvPromoted);
+    GenTreePtr op1 = treeNode->gtOp.gtOp1;
+    noway_assert(op1->OperGet() == GT_LONG);
+    genConsumeRegs(op1);
+
+    // Definitions of register candidates will have been lowered to 2 int lclVars.
+    assert(!treeNode->InReg());
+
+    GenTreePtr loVal = op1->gtGetOp1();
+    GenTreePtr hiVal = op1->gtGetOp2();
+    // NYI: Contained immediates.
+    NYI_IF((loVal->gtRegNum == REG_NA) || (hiVal->gtRegNum == REG_NA), "Store of long lclVar with contained immediate");
+    emit->emitIns_R_S(ins_Store(TYP_INT), EA_4BYTE, loVal->gtRegNum, lclNum, 0);
+    emit->emitIns_R_S(ins_Store(TYP_INT), EA_4BYTE, hiVal->gtRegNum, lclNum, genTypeSize(TYP_INT));
+}
+#endif // !defined(_TARGET_64BIT_)
+
+/*****************************************************************************
+* Unit testing of the XArch emitter: generate a bunch of instructions into the prolog
+* (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late
+* disassembler thinks the instructions as the same as we do.
+*/
+
+// Uncomment "#define ALL_ARM64_EMITTER_UNIT_TESTS" to run all the unit tests here.
+// After adding a unit test, and verifying it works, put it under this #ifdef, so we don't see it run every time.
+//#define ALL_XARCH_EMITTER_UNIT_TESTS
+
+#if defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
+void CodeGen::genAmd64EmitterUnitTests()
+{
+    if (!verbose)
+    {
+        return;
+    }
+
+    if (!compiler->opts.altJit)
+    {
+        // No point doing this in a "real" JIT.
+        return;
+    }
+
+    // Mark the "fake" instructions in the output.
+    printf("*************** In genAmd64EmitterUnitTests()\n");
+
+    // We use this:
+    //      genDefineTempLabel(genCreateTempLabel());
+    // to create artificial labels to help separate groups of tests.
+
+    //
+    // Loads
+    //
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef ALL_XARCH_EMITTER_UNIT_TESTS
+#ifdef FEATURE_AVX_SUPPORT
+    genDefineTempLabel(genCreateTempLabel());
+
+    // vhaddpd     ymm0,ymm1,ymm2
+    getEmitter()->emitIns_R_R_R(INS_haddpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+    // vaddss      xmm0,xmm1,xmm2
+    getEmitter()->emitIns_R_R_R(INS_addss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+    // vaddsd      xmm0,xmm1,xmm2
+    getEmitter()->emitIns_R_R_R(INS_addsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+    // vaddps      xmm0,xmm1,xmm2
+    getEmitter()->emitIns_R_R_R(INS_addps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+    // vaddps      ymm0,ymm1,ymm2
+    getEmitter()->emitIns_R_R_R(INS_addps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+    // vaddpd      xmm0,xmm1,xmm2
+    getEmitter()->emitIns_R_R_R(INS_addpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+    // vaddpd      ymm0,ymm1,ymm2
+    getEmitter()->emitIns_R_R_R(INS_addpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+    // vsubss      xmm0,xmm1,xmm2
+    getEmitter()->emitIns_R_R_R(INS_subss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+    // vsubsd      xmm0,xmm1,xmm2
+    getEmitter()->emitIns_R_R_R(INS_subsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+    // vsubps      ymm0,ymm1,ymm2
+    getEmitter()->emitIns_R_R_R(INS_subps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+    // vsubps      ymm0,ymm1,ymm2
+    getEmitter()->emitIns_R_R_R(INS_subps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+    // vsubpd      xmm0,xmm1,xmm2
+    getEmitter()->emitIns_R_R_R(INS_subpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+    // vsubpd      ymm0,ymm1,ymm2
+    getEmitter()->emitIns_R_R_R(INS_subpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+    // vmulss      xmm0,xmm1,xmm2
+    getEmitter()->emitIns_R_R_R(INS_mulss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+    // vmulsd      xmm0,xmm1,xmm2
+    getEmitter()->emitIns_R_R_R(INS_mulsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+    // vmulps      xmm0,xmm1,xmm2
+    getEmitter()->emitIns_R_R_R(INS_mulps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+    // vmulpd      xmm0,xmm1,xmm2
+    getEmitter()->emitIns_R_R_R(INS_mulpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+    // vmulps      ymm0,ymm1,ymm2
+    getEmitter()->emitIns_R_R_R(INS_mulps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+    // vmulpd      ymm0,ymm1,ymm2
+    getEmitter()->emitIns_R_R_R(INS_mulpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+    // vandps      xmm0,xmm1,xmm2
+    getEmitter()->emitIns_R_R_R(INS_andps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+    // vandpd      xmm0,xmm1,xmm2
+    getEmitter()->emitIns_R_R_R(INS_andpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+    // vandps      ymm0,ymm1,ymm2
+    getEmitter()->emitIns_R_R_R(INS_andps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+    // vandpd      ymm0,ymm1,ymm2
+    getEmitter()->emitIns_R_R_R(INS_andpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+    // vorps      xmm0,xmm1,xmm2
+    getEmitter()->emitIns_R_R_R(INS_orps, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+    // vorpd      xmm0,xmm1,xmm2
+    getEmitter()->emitIns_R_R_R(INS_orpd, EA_16BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+    // vorps      ymm0,ymm1,ymm2
+    getEmitter()->emitIns_R_R_R(INS_orps, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+    // vorpd      ymm0,ymm1,ymm2
+    getEmitter()->emitIns_R_R_R(INS_orpd, EA_32BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+    // vdivss      xmm0,xmm1,xmm2
+    getEmitter()->emitIns_R_R_R(INS_divss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+    // vdivsd      xmm0,xmm1,xmm2
+    getEmitter()->emitIns_R_R_R(INS_divsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+    // vdivss      xmm0,xmm1,xmm2
+    getEmitter()->emitIns_R_R_R(INS_divss, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+    // vdivsd      xmm0,xmm1,xmm2
+    getEmitter()->emitIns_R_R_R(INS_divsd, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+
+    // vdivss      xmm0,xmm1,xmm2
+    getEmitter()->emitIns_R_R_R(INS_cvtss2sd, EA_4BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+    // vdivsd      xmm0,xmm1,xmm2
+    getEmitter()->emitIns_R_R_R(INS_cvtsd2ss, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
+#endif // FEATURE_AVX_SUPPORT
+#endif // ALL_XARCH_EMITTER_UNIT_TESTS
+    printf("*************** End of genAmd64EmitterUnitTests()\n");
+}
+
+#endif // defined(DEBUG) && defined(LATE_DISASM) && defined(_TARGET_AMD64_)
+
+/*****************************************************************************/
+#ifdef DEBUGGING_SUPPORT
+/*****************************************************************************
+ *                          genSetScopeInfo
+ *
+ * Called for every scope info piece to record by the main genSetScopeInfo()
+ */
+
+void CodeGen::genSetScopeInfo(unsigned            which,
+                              UNATIVE_OFFSET      startOffs,
+                              UNATIVE_OFFSET      length,
+                              unsigned            varNum,
+                              unsigned            LVnum,
+                              bool                avail,
+                              Compiler::siVarLoc& varLoc)
+{
+    /* We need to do some mapping while reporting back these variables */
+
+    unsigned ilVarNum = compiler->compMap2ILvarNum(varNum);
+    noway_assert((int)ilVarNum != ICorDebugInfo::UNKNOWN_ILNUM);
+
+    VarName name = nullptr;
+
+#ifdef DEBUG
+
+    for (unsigned scopeNum = 0; scopeNum < compiler->info.compVarScopesCount; scopeNum++)
+    {
+        if (LVnum == compiler->info.compVarScopes[scopeNum].vsdLVnum)
+        {
+            name = compiler->info.compVarScopes[scopeNum].vsdName;
+        }
+    }
+
+    // Hang on to this compiler->info.
+
+    TrnslLocalVarInfo& tlvi = genTrnslLocalVarInfo[which];
+
+    tlvi.tlviVarNum    = ilVarNum;
+    tlvi.tlviLVnum     = LVnum;
+    tlvi.tlviName      = name;
+    tlvi.tlviStartPC   = startOffs;
+    tlvi.tlviLength    = length;
+    tlvi.tlviAvailable = avail;
+    tlvi.tlviVarLoc    = varLoc;
+
+#endif // DEBUG
+
+    compiler->eeSetLVinfo(which, startOffs, length, ilVarNum, LVnum, name, avail, varLoc);
+}
+#endif // DEBUGGING_SUPPORT
+
+#endif // _TARGET_AMD64_
+
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/compiler.cpp b/src/jit/compiler.cpp
new file mode 100644
index 0000000000..afbecdfc60
--- /dev/null
+++ b/src/jit/compiler.cpp
@@ -0,0 +1,10380 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                          Compiler                                         XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif // _MSC_VER
+#include "hostallocator.h"
+#include "emit.h"
+#include "ssabuilder.h"
+#include "valuenum.h"
+#include "rangecheck.h"
+
+#ifndef LEGACY_BACKEND
+#include "lower.h"
+#endif // !LEGACY_BACKEND
+
+#include "jittelemetry.h"
+
+#if defined(DEBUG)
+// Column settings for COMPlus_JitDumpIR.  We could(should) make these programmable.
+#define COLUMN_OPCODE 30
+#define COLUMN_OPERANDS (COLUMN_OPCODE + 25)
+#define COLUMN_KINDS 110
+#define COLUMN_FLAGS (COLUMN_KINDS + 32)
+#endif
+
+#if defined(DEBUG)
+unsigned Compiler::jitTotalMethodCompiled = 0;
+#endif // defined(DEBUG)
+
+#if defined(DEBUG)
+LONG Compiler::jitNestingLevel = 0;
+#endif // defined(DEBUG)
+
+#ifdef ALT_JIT
+// static
+bool                Compiler::s_pAltJitExcludeAssembliesListInitialized = false;
+AssemblyNamesList2* Compiler::s_pAltJitExcludeAssembliesList            = nullptr;
+#endif // ALT_JIT
+
+/*****************************************************************************/
+inline unsigned getCurTime()
+{
+    SYSTEMTIME tim;
+
+    GetSystemTime(&tim);
+
+    return (((tim.wHour * 60) + tim.wMinute) * 60 + tim.wSecond) * 1000 + tim.wMilliseconds;
+}
+
+/*****************************************************************************/
+#ifdef DEBUG
+/*****************************************************************************/
+
+static FILE* jitSrcFilePtr;
+
+static unsigned jitCurSrcLine;
+
+void Compiler::JitLogEE(unsigned level, const char* fmt, ...)
+{
+    va_list args;
+
+    if (verbose)
+    {
+        va_start(args, fmt);
+        vflogf(jitstdout, fmt, args);
+        va_end(args);
+    }
+
+    va_start(args, fmt);
+    vlogf(level, fmt, args);
+    va_end(args);
+}
+
+void Compiler::compDspSrcLinesByLineNum(unsigned line, bool seek)
+{
+    if (!jitSrcFilePtr)
+    {
+        return;
+    }
+
+    if (jitCurSrcLine == line)
+    {
+        return;
+    }
+
+    if (jitCurSrcLine > line)
+    {
+        if (!seek)
+        {
+            return;
+        }
+
+        if (fseek(jitSrcFilePtr, 0, SEEK_SET) != 0)
+        {
+            printf("Compiler::compDspSrcLinesByLineNum:  fseek returned an error.\n");
+        }
+        jitCurSrcLine = 0;
+    }
+
+    if (!seek)
+    {
+        printf(";\n");
+    }
+
+    do
+    {
+        char   temp[128];
+        size_t llen;
+
+        if (!fgets(temp, sizeof(temp), jitSrcFilePtr))
+        {
+            return;
+        }
+
+        if (seek)
+        {
+            continue;
+        }
+
+        llen = strlen(temp);
+        if (llen && temp[llen - 1] == '\n')
+        {
+            temp[llen - 1] = 0;
+        }
+
+        printf(";   %s\n", temp);
+    } while (++jitCurSrcLine < line);
+
+    if (!seek)
+    {
+        printf(";\n");
+    }
+}
+
+/*****************************************************************************/
+
+void Compiler::compDspSrcLinesByNativeIP(UNATIVE_OFFSET curIP)
+{
+#ifdef DEBUGGING_SUPPORT
+
+    static IPmappingDsc* nextMappingDsc;
+    static unsigned      lastLine;
+
+    if (!opts.dspLines)
+    {
+        return;
+    }
+
+    if (curIP == 0)
+    {
+        if (genIPmappingList)
+        {
+            nextMappingDsc = genIPmappingList;
+            lastLine       = jitGetILoffs(nextMappingDsc->ipmdILoffsx);
+
+            unsigned firstLine = jitGetILoffs(nextMappingDsc->ipmdILoffsx);
+
+            unsigned earlierLine = (firstLine < 5) ? 0 : firstLine - 5;
+
+            compDspSrcLinesByLineNum(earlierLine, true); // display previous 5 lines
+            compDspSrcLinesByLineNum(firstLine, false);
+        }
+        else
+        {
+            nextMappingDsc = nullptr;
+        }
+
+        return;
+    }
+
+    if (nextMappingDsc)
+    {
+        UNATIVE_OFFSET offset = nextMappingDsc->ipmdNativeLoc.CodeOffset(genEmitter);
+
+        if (offset <= curIP)
+        {
+            IL_OFFSET nextOffs = jitGetILoffs(nextMappingDsc->ipmdILoffsx);
+
+            if (lastLine < nextOffs)
+            {
+                compDspSrcLinesByLineNum(nextOffs);
+            }
+            else
+            {
+                // This offset corresponds to a previous line. Rewind to that line
+
+                compDspSrcLinesByLineNum(nextOffs - 2, true);
+                compDspSrcLinesByLineNum(nextOffs);
+            }
+
+            lastLine       = nextOffs;
+            nextMappingDsc = nextMappingDsc->ipmdNext;
+        }
+    }
+
+#endif
+}
+
+/*****************************************************************************/
+#endif // DEBUG
+
+/*****************************************************************************/
+#if defined(DEBUG) || MEASURE_NODE_SIZE || MEASURE_BLOCK_SIZE || DISPLAY_SIZES || CALL_ARG_STATS
+
+static unsigned genMethodCnt;  // total number of methods JIT'ted
+unsigned        genMethodICnt; // number of interruptible methods
+unsigned        genMethodNCnt; // number of non-interruptible methods
+static unsigned genSmallMethodsNeedingExtraMemoryCnt = 0;
+
+#endif
+
+/*****************************************************************************/
+#if MEASURE_NODE_SIZE
+NodeSizeStats genNodeSizeStats;
+NodeSizeStats genNodeSizeStatsPerFunc;
+
+unsigned  genTreeNcntHistBuckets[] = {10, 20, 30, 40, 50, 100, 200, 300, 400, 500, 1000, 5000, 10000, 0};
+Histogram genTreeNcntHist(HostAllocator::getHostAllocator(), genTreeNcntHistBuckets);
+
+unsigned  genTreeNsizHistBuckets[] = {1000, 5000, 10000, 50000, 100000, 500000, 1000000, 0};
+Histogram genTreeNsizHist(HostAllocator::getHostAllocator(), genTreeNsizHistBuckets);
+#endif // MEASURE_NODE_SIZE
+
+/*****************************************************************************
+ *
+ *  Variables to keep track of total code amounts.
+ */
+
+#if DISPLAY_SIZES
+
+size_t grossVMsize; // Total IL code size
+size_t grossNCsize; // Native code + data size
+size_t totalNCsize; // Native code + data + GC info size (TODO-Cleanup: GC info size only accurate for JIT32_GCENCODER)
+size_t gcHeaderISize; // GC header      size: interruptible methods
+size_t gcPtrMapISize; // GC pointer map size: interruptible methods
+size_t gcHeaderNSize; // GC header      size: non-interruptible methods
+size_t gcPtrMapNSize; // GC pointer map size: non-interruptible methods
+
+#endif // DISPLAY_SIZES
+
+/*****************************************************************************
+ *
+ *  Variables to keep track of argument counts.
+ */
+
+#if CALL_ARG_STATS
+
+unsigned argTotalCalls;
+unsigned argHelperCalls;
+unsigned argStaticCalls;
+unsigned argNonVirtualCalls;
+unsigned argVirtualCalls;
+
+unsigned argTotalArgs; // total number of args for all calls (including objectPtr)
+unsigned argTotalDWordArgs;
+unsigned argTotalLongArgs;
+unsigned argTotalFloatArgs;
+unsigned argTotalDoubleArgs;
+
+unsigned argTotalRegArgs;
+unsigned argTotalTemps;
+unsigned argTotalLclVar;
+unsigned argTotalDeferred;
+unsigned argTotalConst;
+
+unsigned argTotalObjPtr;
+unsigned argTotalGTF_ASGinArgs;
+
+unsigned argMaxTempsPerMethod;
+
+unsigned  argCntBuckets[] = {0, 1, 2, 3, 4, 5, 6, 10, 0};
+Histogram argCntTable(HostAllocator::getHostAllocator(), argCntBuckets);
+
+unsigned  argDWordCntBuckets[] = {0, 1, 2, 3, 4, 5, 6, 10, 0};
+Histogram argDWordCntTable(HostAllocator::getHostAllocator(), argDWordCntBuckets);
+
+unsigned  argDWordLngCntBuckets[] = {0, 1, 2, 3, 4, 5, 6, 10, 0};
+Histogram argDWordLngCntTable(HostAllocator::getHostAllocator(), argDWordLngCntBuckets);
+
+unsigned  argTempsCntBuckets[] = {0, 1, 2, 3, 4, 5, 6, 10, 0};
+Histogram argTempsCntTable(HostAllocator::getHostAllocator(), argTempsCntBuckets);
+
+#endif // CALL_ARG_STATS
+
+/*****************************************************************************
+ *
+ *  Variables to keep track of basic block counts.
+ */
+
+#if COUNT_BASIC_BLOCKS
+
+//          --------------------------------------------------
+//          Basic block count frequency table:
+//          --------------------------------------------------
+//              <=         1 ===>  26872 count ( 56% of total)
+//               2 ..      2 ===>    669 count ( 58% of total)
+//               3 ..      3 ===>   4687 count ( 68% of total)
+//               4 ..      5 ===>   5101 count ( 78% of total)
+//               6 ..     10 ===>   5575 count ( 90% of total)
+//              11 ..     20 ===>   3028 count ( 97% of total)
+//              21 ..     50 ===>   1108 count ( 99% of total)
+//              51 ..    100 ===>    182 count ( 99% of total)
+//             101 ..   1000 ===>     34 count (100% of total)
+//            1001 ..  10000 ===>      0 count (100% of total)
+//          --------------------------------------------------
+
+unsigned  bbCntBuckets[] = {1, 2, 3, 5, 10, 20, 50, 100, 1000, 10000, 0};
+Histogram bbCntTable(HostAllocator::getHostAllocator(), bbCntBuckets);
+
+/* Histogram for the IL opcode size of methods with a single basic block */
+
+unsigned  bbSizeBuckets[] = {1, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 0};
+Histogram bbOneBBSizeTable(HostAllocator::getHostAllocator(), bbSizeBuckets);
+
+#endif // COUNT_BASIC_BLOCKS
+
+/*****************************************************************************
+ *
+ *  Used by optFindNaturalLoops to gather statistical information such as
+ *   - total number of natural loops
+ *   - number of loops with 1, 2, ... exit conditions
+ *   - number of loops that have an iterator (for like)
+ *   - number of loops that have a constant iterator
+ */
+
+#if COUNT_LOOPS
+
+unsigned totalLoopMethods;        // counts the total number of methods that have natural loops
+unsigned maxLoopsPerMethod;       // counts the maximum number of loops a method has
+unsigned totalLoopOverflows;      // # of methods that identified more loops than we can represent
+unsigned totalLoopCount;          // counts the total number of natural loops
+unsigned totalUnnatLoopCount;     // counts the total number of (not-necessarily natural) loops
+unsigned totalUnnatLoopOverflows; // # of methods that identified more unnatural loops than we can represent
+unsigned iterLoopCount;           // counts the # of loops with an iterator (for like)
+unsigned simpleTestLoopCount;     // counts the # of loops with an iterator and a simple loop condition (iter < const)
+unsigned constIterLoopCount;      // counts the # of loops with a constant iterator (for like)
+bool     hasMethodLoops;          // flag to keep track if we already counted a method as having loops
+unsigned loopsThisMethod;         // counts the number of loops in the current method
+bool     loopOverflowThisMethod;  // True if we exceeded the max # of loops in the method.
+
+/* Histogram for number of loops in a method */
+
+unsigned  loopCountBuckets[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0};
+Histogram loopCountTable(HostAllocator::getHostAllocator(), loopCountBuckets);
+
+/* Histogram for number of loop exits */
+
+unsigned  loopExitCountBuckets[] = {0, 1, 2, 3, 4, 5, 6, 0};
+Histogram loopExitCountTable(HostAllocator::getHostAllocator(), loopExitCountBuckets);
+
+#endif // COUNT_LOOPS
+
+//------------------------------------------------------------------------
+// getJitGCType: Given the VM's CorInfoGCType convert it to the JIT's var_types
+//
+// Arguments:
+//    gcType    - an enum value that originally came from an element
+//                of the BYTE[] returned from getClassGClayout()
+//
+// Return Value:
+//    The corresponsing enum value from the JIT's var_types
+//
+// Notes:
+//   The gcLayout of each field of a struct is returned from getClassGClayout()
+//   as a BYTE[] but each BYTE element is actually a CorInfoGCType value
+//   Note when we 'know' that there is only one element in theis array
+//   the JIT will often pass the address of a single BYTE, instead of a BYTE[]
+//
+
+var_types Compiler::getJitGCType(BYTE gcType)
+{
+    var_types     result      = TYP_UNKNOWN;
+    CorInfoGCType corInfoType = (CorInfoGCType)gcType;
+
+    if (corInfoType == TYPE_GC_NONE)
+    {
+        result = TYP_I_IMPL;
+    }
+    else if (corInfoType == TYPE_GC_REF)
+    {
+        result = TYP_REF;
+    }
+    else if (corInfoType == TYPE_GC_BYREF)
+    {
+        result = TYP_BYREF;
+    }
+    else
+    {
+        noway_assert(!"Bad value of 'gcType'");
+    }
+    return result;
+}
+
+#if FEATURE_MULTIREG_ARGS
+//---------------------------------------------------------------------------
+// getStructGcPtrsFromOp: Given a GenTree node of TYP_STRUCT that represents
+//                        a pass by value argument, return the gcPtr layout
+//                        for the pointers sized fields
+// Arguments:
+//    op         - the operand of TYP_STRUCT that is passed by value
+//    gcPtrsOut  - an array of BYTES that are written by this method
+//                 they will contain the VM's CorInfoGCType values
+//                 for each pointer sized field
+// Return Value:
+//     Two [or more] values are written into the gcPtrs array
+//
+// Note that for ARM64 there will alwys be exactly two pointer sized fields
+
+void Compiler::getStructGcPtrsFromOp(GenTreePtr op, BYTE* gcPtrsOut)
+{
+    assert(op->TypeGet() == TYP_STRUCT);
+
+#ifdef _TARGET_ARM64_
+    if (op->OperGet() == GT_OBJ)
+    {
+        CORINFO_CLASS_HANDLE objClass = op->gtObj.gtClass;
+
+        int structSize = info.compCompHnd->getClassSize(objClass);
+        assert(structSize <= 2 * TARGET_POINTER_SIZE);
+
+        BYTE gcPtrsTmp[2] = {TYPE_GC_NONE, TYPE_GC_NONE};
+
+        info.compCompHnd->getClassGClayout(objClass, &gcPtrsTmp[0]);
+
+        gcPtrsOut[0] = gcPtrsTmp[0];
+        gcPtrsOut[1] = gcPtrsTmp[1];
+    }
+    else if (op->OperGet() == GT_LCL_VAR)
+    {
+        GenTreeLclVarCommon* varNode = op->AsLclVarCommon();
+        unsigned             varNum  = varNode->gtLclNum;
+        assert(varNum < lvaCount);
+        LclVarDsc* varDsc = &lvaTable[varNum];
+
+        // At this point any TYP_STRUCT LclVar must be a 16-byte pass by value argument
+        assert(varDsc->lvSize() == 2 * TARGET_POINTER_SIZE);
+
+        gcPtrsOut[0] = varDsc->lvGcLayout[0];
+        gcPtrsOut[1] = varDsc->lvGcLayout[1];
+    }
+    else
+#endif
+    {
+        noway_assert(!"Unsupported Oper for getStructGcPtrsFromOp");
+    }
+}
+#endif // FEATURE_MULTIREG_ARGS
+
+#ifdef ARM_SOFTFP
+//---------------------------------------------------------------------------
+// IsSingleFloat32Struct:
+//    Check if the given struct type contains only one float32 value type
+//
+// Arguments:
+//    clsHnd     - the handle for the struct type
+//
+// Return Value:
+//    true if the given struct type contains only one float32 value type,
+//    false otherwise.
+//
+
+bool Compiler::isSingleFloat32Struct(CORINFO_CLASS_HANDLE clsHnd)
+{
+    for (;;)
+    {
+        // all of class chain must be of value type and must have only one field
+        if (!info.compCompHnd->isValueClass(clsHnd) && info.compCompHnd->getClassNumInstanceFields(clsHnd) != 1)
+        {
+            return false;
+        }
+
+        CORINFO_CLASS_HANDLE* pClsHnd   = &clsHnd;
+        CORINFO_FIELD_HANDLE  fldHnd    = info.compCompHnd->getFieldInClass(clsHnd, 0);
+        CorInfoType           fieldType = info.compCompHnd->getFieldType(fldHnd, pClsHnd);
+
+        switch (fieldType)
+        {
+            case CORINFO_TYPE_VALUECLASS:
+                clsHnd = *pClsHnd;
+                break;
+
+            case CORINFO_TYPE_FLOAT:
+                return true;
+
+            default:
+                return false;
+        }
+    }
+}
+#endif // ARM_SOFTFP
+
+//-----------------------------------------------------------------------------
+// getPrimitiveTypeForStruct:
+//     Get the "primitive" type that is is used for a struct
+//     of size 'structSize'.
+//     We examine 'clsHnd' to check the GC layout of the struct and
+//     return TYP_REF for structs that simply wrap an object.
+//     If the struct is a one element HFA, we will return the
+//     proper floating point type.
+//
+// Arguments:
+//    structSize - the size of the struct type, cannot be zero
+//    clsHnd     - the handle for the struct type, used when may have
+//                 an HFA or if we need the GC layout for an object ref.
+//
+// Return Value:
+//    The primitive type (i.e. byte, short, int, long, ref, float, double)
+//    used to pass or return structs of this size.
+//    If we shouldn't use a "primitive" type then TYP_UNKNOWN is returned.
+// Notes:
+//    For 32-bit targets (X86/ARM32) the 64-bit TYP_LONG type is not
+//    considered a primitive type by this method.
+//    So a struct that wraps a 'long' is passed and returned in the
+//    same way as any other 8-byte struct
+//    For ARM32 if we have an HFA struct that wraps a 64-bit double
+//    we will return TYP_DOUBLE.
+//
+var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS_HANDLE clsHnd)
+{
+    assert(structSize != 0);
+
+    var_types useType;
+
+    switch (structSize)
+    {
+        case 1:
+            useType = TYP_BYTE;
+            break;
+
+        case 2:
+            useType = TYP_SHORT;
+            break;
+
+#ifndef _TARGET_XARCH_
+        case 3:
+            useType = TYP_INT;
+            break;
+
+#endif // _TARGET_XARCH_
+
+#ifdef _TARGET_64BIT_
+        case 4:
+            if (IsHfa(clsHnd))
+            {
+                // A structSize of 4 with IsHfa, it must be an HFA of one float
+                useType = TYP_FLOAT;
+            }
+            else
+            {
+                useType = TYP_INT;
+            }
+            break;
+
+#ifndef _TARGET_XARCH_
+        case 5:
+        case 6:
+        case 7:
+            useType = TYP_I_IMPL;
+            break;
+
+#endif // _TARGET_XARCH_
+#endif // _TARGET_64BIT_
+
+        case TARGET_POINTER_SIZE:
+#ifdef ARM_SOFTFP
+            // For ARM_SOFTFP, HFA is unsupported so we need to check in another way
+            // This matters only for size-4 struct cause bigger structs would be processed with RetBuf
+            if (isSingleFloat32Struct(clsHnd))
+#else  // !ARM_SOFTFP
+            if (IsHfa(clsHnd))
+#endif // ARM_SOFTFP
+            {
+#ifdef _TARGET_64BIT_
+                var_types hfaType = GetHfaType(clsHnd);
+
+                // A structSize of 8 with IsHfa, we have two possiblities:
+                // An HFA of one double or an HFA of two floats
+                //
+                // Check and exclude the case of an HFA of two floats
+                if (hfaType == TYP_DOUBLE)
+                {
+                    // We have an HFA of one double
+                    useType = TYP_DOUBLE;
+                }
+                else
+                {
+                    assert(hfaType == TYP_FLOAT);
+
+                    // We have an HFA of two floats
+                    // This should be passed or returned in two FP registers
+                    useType = TYP_UNKNOWN;
+                }
+#else  // a 32BIT target
+                // A structSize of 4 with IsHfa, it must be an HFA of one float
+                useType = TYP_FLOAT;
+#endif // _TARGET_64BIT_
+            }
+            else
+            {
+                BYTE gcPtr = 0;
+                // Check if this pointer-sized struct is wrapping a GC object
+                info.compCompHnd->getClassGClayout(clsHnd, &gcPtr);
+                useType = getJitGCType(gcPtr);
+            }
+            break;
+
+#ifdef _TARGET_ARM_
+        case 8:
+            if (IsHfa(clsHnd))
+            {
+                var_types hfaType = GetHfaType(clsHnd);
+
+                // A structSize of 8 with IsHfa, we have two possiblities:
+                // An HFA of one double or an HFA of two floats
+                //
+                // Check and exclude the case of an HFA of two floats
+                if (hfaType == TYP_DOUBLE)
+                {
+                    // We have an HFA of one double
+                    useType = TYP_DOUBLE;
+                }
+                else
+                {
+                    assert(hfaType == TYP_FLOAT);
+
+                    // We have an HFA of two floats
+                    // This should be passed or returned in two FP registers
+                    useType = TYP_UNKNOWN;
+                }
+            }
+            else
+            {
+                // We don't have an HFA
+                useType = TYP_UNKNOWN;
+            }
+            break;
+#endif // _TARGET_ARM_
+
+        default:
+            useType = TYP_UNKNOWN;
+            break;
+    }
+
+    return useType;
+}
+
+//-----------------------------------------------------------------------------
+// getArgTypeForStruct:
+//     Get the type that is used to pass values of the given struct type.
+//     If you have already retrieved the struct size then it should be
+//     passed as the optional third argument, as this allows us to avoid
+//     an extra call to getClassSize(clsHnd)
+//
+// Arguments:
+//    clsHnd       - the handle for the struct type
+//    wbPassStruct - An "out" argument with information about how
+//                   the struct is to be passed
+//    structSize   - the size of the struct type,
+//                   or zero if we should call getClassSize(clsHnd)
+//
+// Return Value:
+//    For wbPassStruct you can pass a 'nullptr' and nothing will be written
+//     or returned for that out parameter.
+//    When *wbPassStruct is SPK_PrimitiveType this method's return value
+//       is the primitive type used to pass the struct.
+//    When *wbPassStruct is SPK_ByReference this method's return value
+//       is always TYP_UNKNOWN and the struct type is passed by reference to a copy
+//    When *wbPassStruct is SPK_ByValue or SPK_ByValueAsHfa this method's return value
+//       is always TYP_STRUCT and the struct type is passed by value either
+//       using multiple registers or on the stack.
+//
+// Assumptions:
+//    The size must be the size of the given type.
+//    The given class handle must be for a value type (struct).
+//
+// Notes:
+//    About HFA types:
+//        When the clsHnd is a one element HFA type we return the appropriate
+//         floating point primitive type and *wbPassStruct is SPK_PrimitiveType
+//        If there are two or more elements in the HFA type then the this method's
+//         return value is TYP_STRUCT and *wbPassStruct is SPK_ByValueAsHfa
+//
+var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
+                                        structPassingKind*   wbPassStruct,
+                                        unsigned             structSize /* = 0 */)
+{
+    var_types         useType         = TYP_UNKNOWN;
+    structPassingKind howToPassStruct = SPK_Unknown; // We must change this before we return
+
+    if (structSize == 0)
+    {
+        structSize = info.compCompHnd->getClassSize(clsHnd);
+    }
+    assert(structSize > 0);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+    // An 8-byte struct may need to be passed in a floating point register
+    // So we always consult the struct "Classifier" routine
+    //
+    SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+    eeGetSystemVAmd64PassStructInRegisterDescriptor(clsHnd, &structDesc);
+
+    // If we have one eightByteCount then we can set 'useType' based on that
+    if (structDesc.eightByteCount == 1)
+    {
+        // Set 'useType' to the type of the first eightbyte item
+        useType = GetEightByteType(structDesc, 0);
+    }
+
+#elif defined(_TARGET_X86_)
+
+    // On x86 we never pass structs as primitive types (unless the VM unwraps them for us)
+    useType = TYP_UNKNOWN;
+
+#else // all other targets
+
+    // The largest primitive type is 8 bytes (TYP_DOUBLE)
+    // so we can skip calling getPrimitiveTypeForStruct when we
+    // have a struct that is larger than that.
+    //
+    if (structSize <= sizeof(double))
+    {
+        // We set the "primitive" useType based upon the structSize
+        // and also examine the clsHnd to see if it is an HFA of count one
+        useType = getPrimitiveTypeForStruct(structSize, clsHnd);
+    }
+
+#endif // all other targets
+
+    // Did we change this struct type into a simple "primitive" type?
+    //
+    if (useType != TYP_UNKNOWN)
+    {
+        // Yes, we should use the "primitive" type in 'useType'
+        howToPassStruct = SPK_PrimitiveType;
+    }
+    else // We can't replace the struct with a "primitive" type
+    {
+        // See if we can pass this struct by value, possibly in multiple registers
+        // or if we should pass it by reference to a copy
+        //
+        if (structSize <= MAX_PASS_MULTIREG_BYTES)
+        {
+            // Structs that are HFA's are passed by value in multiple registers
+            if (IsHfa(clsHnd))
+            {
+                // HFA's of count one should have been handled by getPrimitiveTypeForStruct
+                assert(GetHfaCount(clsHnd) >= 2);
+
+                // setup wbPassType and useType indicate that this is passed by value as an HFA
+                //  using multiple registers
+                //  (when all of the parameters registers are used, then the stack will be used)
+                howToPassStruct = SPK_ByValueAsHfa;
+                useType         = TYP_STRUCT;
+            }
+            else // Not an HFA struct type
+            {
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+                // The case of (structDesc.eightByteCount == 1) should have already been handled
+                if (structDesc.eightByteCount > 1)
+                {
+                    // setup wbPassType and useType indicate that this is passed by value in multiple registers
+                    //  (when all of the parameters registers are used, then the stack will be used)
+                    howToPassStruct = SPK_ByValue;
+                    useType         = TYP_STRUCT;
+                }
+                else
+                {
+                    assert(structDesc.eightByteCount == 0);
+                    // Otherwise we pass this struct by reference to a copy
+                    // setup wbPassType and useType indicate that this is passed using one register
+                    //  (by reference to a copy)
+                    howToPassStruct = SPK_ByReference;
+                    useType         = TYP_UNKNOWN;
+                }
+
+#elif defined(_TARGET_ARM64_)
+
+                // Structs that are pointer sized or smaller should have been handled by getPrimitiveTypeForStruct
+                assert(structSize > TARGET_POINTER_SIZE);
+
+                // On ARM64 structs that are 9-16 bytes are passed by value in multiple registers
+                //
+                if (structSize <= (TARGET_POINTER_SIZE * 2))
+                {
+                    // setup wbPassType and useType indicate that this is passed by value in multiple registers
+                    //  (when all of the parameters registers are used, then the stack will be used)
+                    howToPassStruct = SPK_ByValue;
+                    useType         = TYP_STRUCT;
+                }
+                else // a structSize that is 17-32 bytes in size
+                {
+                    // Otherwise we pass this struct by reference to a copy
+                    // setup wbPassType and useType indicate that this is passed using one register
+                    //  (by reference to a copy)
+                    howToPassStruct = SPK_ByReference;
+                    useType         = TYP_UNKNOWN;
+                }
+
+#elif defined(_TARGET_X86_) || defined(_TARGET_ARM_)
+
+                // Otherwise we pass this struct by value on the stack
+                // setup wbPassType and useType indicate that this is passed by value according to the X86/ARM32 ABI
+                howToPassStruct = SPK_ByValue;
+                useType         = TYP_STRUCT;
+
+#else //  _TARGET_XXX_
+
+                noway_assert(!"Unhandled TARGET in getArgTypeForStruct (with FEATURE_MULTIREG_ARGS=1)");
+
+#endif //  _TARGET_XXX_
+            }
+        }
+        else // (structSize > MAX_PASS_MULTIREG_BYTES)
+        {
+            // We have a (large) struct that can't be replaced with a "primitive" type
+            // and can't be passed in multiple registers
+            CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
+
+            // Otherwise we pass this struct by value on the stack
+            // setup wbPassType and useType indicate that this is passed by value according to the X86/ARM32 ABI
+            howToPassStruct = SPK_ByValue;
+            useType         = TYP_STRUCT;
+
+#elif defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+
+            // Otherwise we pass this struct by reference to a copy
+            // setup wbPassType and useType indicate that this is passed using one register (by reference to a copy)
+            howToPassStruct = SPK_ByReference;
+            useType         = TYP_UNKNOWN;
+
+#else //  _TARGET_XXX_
+
+            noway_assert(!"Unhandled TARGET in getArgTypeForStruct");
+
+#endif //  _TARGET_XXX_
+        }
+    }
+
+    // 'howToPassStruct' must be set to one of the valid values before we return
+    assert(howToPassStruct != SPK_Unknown);
+    if (wbPassStruct != nullptr)
+    {
+        *wbPassStruct = howToPassStruct;
+    }
+    return useType;
+}
+
+//-----------------------------------------------------------------------------
+// getReturnTypeForStruct:
+//     Get the type that is used to return values of the given struct type.
+//     If you have already retrieved the struct size then it should be
+//     passed as the optional third argument, as this allows us to avoid
+//     an extra call to getClassSize(clsHnd)
+//
+// Arguments:
+//    clsHnd         - the handle for the struct type
+//    wbReturnStruct - An "out" argument with information about how
+//                     the struct is to be returned
+//    structSize     - the size of the struct type,
+//                     or zero if we should call getClassSize(clsHnd)
+//
+// Return Value:
+//    For wbReturnStruct you can pass a 'nullptr' and nothing will be written
+//     or returned for that out parameter.
+//    When *wbReturnStruct is SPK_PrimitiveType this method's return value
+//       is the primitive type used to return the struct.
+//    When *wbReturnStruct is SPK_ByReference this method's return value
+//       is always TYP_UNKNOWN and the struct type is returned using a return buffer
+//    When *wbReturnStruct is SPK_ByValue or SPK_ByValueAsHfa this method's return value
+//       is always TYP_STRUCT and the struct type is returned using multiple registers.
+//
+// Assumptions:
+//    The size must be the size of the given type.
+//    The given class handle must be for a value type (struct).
+//
+// Notes:
+//    About HFA types:
+//        When the clsHnd is a one element HFA type then this method's return
+//          value is the appropriate floating point primitive type and
+//          *wbReturnStruct is SPK_PrimitiveType.
+//        If there are two or more elements in the HFA type and the target supports
+//          multireg return types then the return value is TYP_STRUCT and
+//          *wbReturnStruct is SPK_ByValueAsHfa.
+//        Additionally if there are two or more elements in the HFA type and
+//          the target doesn't support multreg return types then it is treated
+//          as if it wasn't an HFA type.
+//    About returning TYP_STRUCT:
+//        Whenever this method's return value is TYP_STRUCT it always means
+//         that multiple registers are used to return this struct.
+//
+var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
+                                           structPassingKind*   wbReturnStruct /* = nullptr */,
+                                           unsigned             structSize /* = 0 */)
+{
+    var_types         useType           = TYP_UNKNOWN;
+    structPassingKind howToReturnStruct = SPK_Unknown; // We must change this before we return
+
+    assert(clsHnd != NO_CLASS_HANDLE);
+
+    if (structSize == 0)
+    {
+        structSize = info.compCompHnd->getClassSize(clsHnd);
+    }
+    assert(structSize > 0);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+    // An 8-byte struct may need to be returned in a floating point register
+    // So we always consult the struct "Classifier" routine
+    //
+    SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+    eeGetSystemVAmd64PassStructInRegisterDescriptor(clsHnd, &structDesc);
+
+    // If we have one eightByteCount then we can set 'useType' based on that
+    if (structDesc.eightByteCount == 1)
+    {
+        // Set 'useType' to the type of the first eightbyte item
+        useType = GetEightByteType(structDesc, 0);
+        assert(structDesc.passedInRegisters == true);
+    }
+
+#else // not UNIX_AMD64
+
+    // The largest primitive type is 8 bytes (TYP_DOUBLE)
+    // so we can skip calling getPrimitiveTypeForStruct when we
+    // have a struct that is larger than that.
+    //
+    if (structSize <= sizeof(double))
+    {
+        // We set the "primitive" useType based upon the structSize
+        // and also examine the clsHnd to see if it is an HFA of count one
+        useType = getPrimitiveTypeForStruct(structSize, clsHnd);
+    }
+
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+#ifdef _TARGET_64BIT_
+    // Note this handles an odd case when FEATURE_MULTIREG_RET is disabled and HFAs are enabled
+    //
+    // getPrimitiveTypeForStruct will return TYP_UNKNOWN for a struct that is an HFA of two floats
+    // because when HFA are enabled, normally we would use two FP registers to pass or return it
+    //
+    // But if we don't have support for multiple register return types, we have to change this.
+    // Since we what we have an 8-byte struct (float + float)  we change useType to TYP_I_IMPL
+    // so that the struct is returned instead using an 8-byte integer register.
+    //
+    if ((FEATURE_MULTIREG_RET == 0) && (useType == TYP_UNKNOWN) && (structSize == (2 * sizeof(float))) && IsHfa(clsHnd))
+    {
+        useType = TYP_I_IMPL;
+    }
+#endif
+
+    // Did we change this struct type into a simple "primitive" type?
+    //
+    if (useType != TYP_UNKNOWN)
+    {
+        // Yes, we should use the "primitive" type in 'useType'
+        howToReturnStruct = SPK_PrimitiveType;
+    }
+    else // We can't replace the struct with a "primitive" type
+    {
+        // See if we can return this struct by value, possibly in multiple registers
+        // or if we should return it using a return buffer register
+        //
+        if ((FEATURE_MULTIREG_RET == 1) && (structSize <= MAX_RET_MULTIREG_BYTES))
+        {
+            // Structs that are HFA's are returned in multiple registers
+            if (IsHfa(clsHnd))
+            {
+                // HFA's of count one should have been handled by getPrimitiveTypeForStruct
+                assert(GetHfaCount(clsHnd) >= 2);
+
+                // setup wbPassType and useType indicate that this is returned by value as an HFA
+                //  using multiple registers
+                howToReturnStruct = SPK_ByValueAsHfa;
+                useType           = TYP_STRUCT;
+            }
+            else // Not an HFA struct type
+            {
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+                // The case of (structDesc.eightByteCount == 1) should have already been handled
+                if (structDesc.eightByteCount > 1)
+                {
+                    // setup wbPassType and useType indicate that this is returned by value in multiple registers
+                    howToReturnStruct = SPK_ByValue;
+                    useType           = TYP_STRUCT;
+                    assert(structDesc.passedInRegisters == true);
+                }
+                else
+                {
+                    assert(structDesc.eightByteCount == 0);
+                    // Otherwise we return this struct using a return buffer
+                    // setup wbPassType and useType indicate that this is return using a return buffer register
+                    //  (reference to a return buffer)
+                    howToReturnStruct = SPK_ByReference;
+                    useType           = TYP_UNKNOWN;
+                    assert(structDesc.passedInRegisters == false);
+                }
+
+#elif defined(_TARGET_ARM64_)
+
+                // Structs that are pointer sized or smaller should have been handled by getPrimitiveTypeForStruct
+                assert(structSize > TARGET_POINTER_SIZE);
+
+                // On ARM64 structs that are 9-16 bytes are returned by value in multiple registers
+                //
+                if (structSize <= (TARGET_POINTER_SIZE * 2))
+                {
+                    // setup wbPassType and useType indicate that this is return by value in multiple registers
+                    howToReturnStruct = SPK_ByValue;
+                    useType           = TYP_STRUCT;
+                }
+                else // a structSize that is 17-32 bytes in size
+                {
+                    // Otherwise we return this struct using a return buffer
+                    // setup wbPassType and useType indicate that this is returned using a return buffer register
+                    //  (reference to a return buffer)
+                    howToReturnStruct = SPK_ByReference;
+                    useType           = TYP_UNKNOWN;
+                }
+
+#elif defined(_TARGET_ARM_) || defined(_TARGET_X86_)
+
+                // Otherwise we return this struct using a return buffer
+                // setup wbPassType and useType indicate that this is returned using a return buffer register
+                //  (reference to a return buffer)
+                howToReturnStruct = SPK_ByReference;
+                useType           = TYP_UNKNOWN;
+
+#else //  _TARGET_XXX_
+
+                noway_assert(!"Unhandled TARGET in getReturnTypeForStruct (with FEATURE_MULTIREG_ARGS=1)");
+
+#endif //  _TARGET_XXX_
+            }
+        }
+        else // (structSize > MAX_RET_MULTIREG_BYTES) || (FEATURE_MULTIREG_RET == 0)
+        {
+            // We have a (large) struct that can't be replaced with a "primitive" type
+            // and can't be returned in multiple registers
+
+            // We return this struct using a return buffer register
+            // setup wbPassType and useType indicate that this is returned using a return buffer register
+            //  (reference to a return buffer)
+            howToReturnStruct = SPK_ByReference;
+            useType           = TYP_UNKNOWN;
+        }
+    }
+
+    // 'howToReturnStruct' must be set to one of the valid values before we return
+    assert(howToReturnStruct != SPK_Unknown);
+    if (wbReturnStruct != nullptr)
+    {
+        *wbReturnStruct = howToReturnStruct;
+    }
+    return useType;
+}
+
+/*****************************************************************************
+ * variables to keep track of how many iterations we go in a dataflow pass
+ */
+
+#if DATAFLOW_ITER
+
+unsigned CSEiterCount; // counts the # of iteration for the CSE dataflow
+unsigned CFiterCount;  // counts the # of iteration for the Const Folding dataflow
+
+#endif // DATAFLOW_ITER
+
+#if MEASURE_BLOCK_SIZE
+size_t genFlowNodeSize;
+size_t genFlowNodeCnt;
+#endif // MEASURE_BLOCK_SIZE
+
+/*****************************************************************************/
+// We keep track of methods we've already compiled.
+
+/*****************************************************************************
+ *  Declare the statics
+ */
+
+#ifdef DEBUG
+/* static */
+unsigned Compiler::s_compMethodsCount = 0; // to produce unique label names
+
+/* static */
+bool Compiler::s_dspMemStats = false;
+#endif
+
+#ifndef DEBUGGING_SUPPORT
+/* static */
+const bool Compiler::Options::compDbgCode = false;
+#endif
+
+#ifndef PROFILING_SUPPORTED
+const bool Compiler::Options::compNoPInvokeInlineCB = false;
+#endif
+
+/*****************************************************************************
+ *
+ *  One time initialization code
+ */
+
+/* static */
+void Compiler::compStartup()
+{
+#if DISPLAY_SIZES
+    grossVMsize = grossNCsize = totalNCsize = 0;
+#endif // DISPLAY_SIZES
+
+    // Initialize the JIT's allocator.
+    ArenaAllocator::startup();
+
+    /* Initialize the table of tree node sizes */
+
+    GenTree::InitNodeSize();
+
+#ifdef JIT32_GCENCODER
+    // Initialize the GC encoder lookup table
+
+    GCInfo::gcInitEncoderLookupTable();
+#endif
+
+    /* Initialize the emitter */
+
+    emitter::emitInit();
+
+    // Static vars of ValueNumStore
+    ValueNumStore::InitValueNumStoreStatics();
+
+    compDisplayStaticSizes(jitstdout);
+}
+
+/*****************************************************************************
+ *
+ *  One time finalization code
+ */
+
+/* static */
+void Compiler::compShutdown()
+{
+#ifdef ALT_JIT
+    if (s_pAltJitExcludeAssembliesList != nullptr)
+    {
+        s_pAltJitExcludeAssembliesList->~AssemblyNamesList2(); // call the destructor
+        s_pAltJitExcludeAssembliesList = nullptr;
+    }
+#endif // ALT_JIT
+
+    ArenaAllocator::shutdown();
+
+    /* Shut down the emitter */
+
+    emitter::emitDone();
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+    // Finish reading and/or writing inline xml
+    InlineStrategy::FinalizeXml();
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+#if defined(DEBUG) || MEASURE_NODE_SIZE || MEASURE_BLOCK_SIZE || DISPLAY_SIZES || CALL_ARG_STATS
+    if (genMethodCnt == 0)
+    {
+        return;
+    }
+#endif
+
+    // Where should we write our statistics output?
+    FILE* fout = jitstdout;
+
+#ifdef FEATURE_JIT_METHOD_PERF
+    if (compJitTimeLogFilename != NULL)
+    {
+        // I assume that this will return NULL if it fails for some reason, and
+        // that...
+        FILE* jitTimeLogFile = _wfopen(compJitTimeLogFilename, W("a"));
+        // ...Print will return silently with a NULL argument.
+        CompTimeSummaryInfo::s_compTimeSummary.Print(jitTimeLogFile);
+        fclose(jitTimeLogFile);
+    }
+#endif // FEATURE_JIT_METHOD_PERF
+
+#if FUNC_INFO_LOGGING
+    if (compJitFuncInfoFile != nullptr)
+    {
+        fclose(compJitFuncInfoFile);
+        compJitFuncInfoFile = nullptr;
+    }
+#endif // FUNC_INFO_LOGGING
+
+#if COUNT_RANGECHECKS
+    if (optRangeChkAll > 0)
+    {
+        fprintf(fout, "Removed %u of %u range checks\n", optRangeChkRmv, optRangeChkAll);
+    }
+#endif // COUNT_RANGECHECKS
+
+#if DISPLAY_SIZES
+
+    if (grossVMsize && grossNCsize)
+    {
+        fprintf(fout, "\n");
+        fprintf(fout, "--------------------------------------\n");
+        fprintf(fout, "Function and GC info size stats\n");
+        fprintf(fout, "--------------------------------------\n");
+
+        fprintf(fout, "[%7u VM, %8u %6s %4u%%] %s\n", grossVMsize, grossNCsize, Target::g_tgtCPUName,
+                100 * grossNCsize / grossVMsize, "Total (excluding GC info)");
+
+        fprintf(fout, "[%7u VM, %8u %6s %4u%%] %s\n", grossVMsize, totalNCsize, Target::g_tgtCPUName,
+                100 * totalNCsize / grossVMsize, "Total (including GC info)");
+
+        if (gcHeaderISize || gcHeaderNSize)
+        {
+            fprintf(fout, "\n");
+
+            fprintf(fout, "GC tables   : [%7uI,%7uN] %7u byt  (%u%% of IL, %u%% of %s).\n",
+                    gcHeaderISize + gcPtrMapISize, gcHeaderNSize + gcPtrMapNSize, totalNCsize - grossNCsize,
+                    100 * (totalNCsize - grossNCsize) / grossVMsize, 100 * (totalNCsize - grossNCsize) / grossNCsize,
+                    Target::g_tgtCPUName);
+
+            fprintf(fout, "GC headers  : [%7uI,%7uN] %7u byt, [%4.1fI,%4.1fN] %4.1f byt/meth\n", gcHeaderISize,
+                    gcHeaderNSize, gcHeaderISize + gcHeaderNSize, (float)gcHeaderISize / (genMethodICnt + 0.001),
+                    (float)gcHeaderNSize / (genMethodNCnt + 0.001),
+                    (float)(gcHeaderISize + gcHeaderNSize) / genMethodCnt);
+
+            fprintf(fout, "GC ptr maps : [%7uI,%7uN] %7u byt, [%4.1fI,%4.1fN] %4.1f byt/meth\n", gcPtrMapISize,
+                    gcPtrMapNSize, gcPtrMapISize + gcPtrMapNSize, (float)gcPtrMapISize / (genMethodICnt + 0.001),
+                    (float)gcPtrMapNSize / (genMethodNCnt + 0.001),
+                    (float)(gcPtrMapISize + gcPtrMapNSize) / genMethodCnt);
+        }
+        else
+        {
+            fprintf(fout, "\n");
+
+            fprintf(fout, "GC tables   take up %u bytes (%u%% of instr, %u%% of %6s code).\n",
+                    totalNCsize - grossNCsize, 100 * (totalNCsize - grossNCsize) / grossVMsize,
+                    100 * (totalNCsize - grossNCsize) / grossNCsize, Target::g_tgtCPUName);
+        }
+
+#ifdef DEBUG
+#if DOUBLE_ALIGN
+        fprintf(fout, "%u out of %u methods generated with double-aligned stack\n",
+                Compiler::s_lvaDoubleAlignedProcsCount, genMethodCnt);
+#endif
+#endif
+    }
+
+#endif // DISPLAY_SIZES
+
+#if CALL_ARG_STATS
+    compDispCallArgStats(fout);
+#endif
+
+#if COUNT_BASIC_BLOCKS
+    fprintf(fout, "--------------------------------------------------\n");
+    fprintf(fout, "Basic block count frequency table:\n");
+    fprintf(fout, "--------------------------------------------------\n");
+    bbCntTable.dump(fout);
+    fprintf(fout, "--------------------------------------------------\n");
+
+    fprintf(fout, "\n");
+
+    fprintf(fout, "--------------------------------------------------\n");
+    fprintf(fout, "IL method size frequency table for methods with a single basic block:\n");
+    fprintf(fout, "--------------------------------------------------\n");
+    bbOneBBSizeTable.dump(fout);
+    fprintf(fout, "--------------------------------------------------\n");
+#endif // COUNT_BASIC_BLOCKS
+
+#if COUNT_LOOPS
+
+    fprintf(fout, "\n");
+    fprintf(fout, "---------------------------------------------------\n");
+    fprintf(fout, "Loop stats\n");
+    fprintf(fout, "---------------------------------------------------\n");
+    fprintf(fout, "Total number of methods with loops is %5u\n", totalLoopMethods);
+    fprintf(fout, "Total number of              loops is %5u\n", totalLoopCount);
+    fprintf(fout, "Maximum number of loops per method is %5u\n", maxLoopsPerMethod);
+    fprintf(fout, "# of methods overflowing nat loop table is %5u\n", totalLoopOverflows);
+    fprintf(fout, "Total number of 'unnatural' loops is %5u\n", totalUnnatLoopCount);
+    fprintf(fout, "# of methods overflowing unnat loop limit is %5u\n", totalUnnatLoopOverflows);
+    fprintf(fout, "Total number of loops with an         iterator is %5u\n", iterLoopCount);
+    fprintf(fout, "Total number of loops with a simple   iterator is %5u\n", simpleTestLoopCount);
+    fprintf(fout, "Total number of loops with a constant iterator is %5u\n", constIterLoopCount);
+
+    fprintf(fout, "--------------------------------------------------\n");
+    fprintf(fout, "Loop count frequency table:\n");
+    fprintf(fout, "--------------------------------------------------\n");
+    loopCountTable.dump(fout);
+    fprintf(fout, "--------------------------------------------------\n");
+    fprintf(fout, "Loop exit count frequency table:\n");
+    fprintf(fout, "--------------------------------------------------\n");
+    loopExitCountTable.dump(fout);
+    fprintf(fout, "--------------------------------------------------\n");
+
+#endif // COUNT_LOOPS
+
+#if DATAFLOW_ITER
+
+    fprintf(fout, "---------------------------------------------------\n");
+    fprintf(fout, "Total number of iterations in the CSE dataflow loop is %5u\n", CSEiterCount);
+    fprintf(fout, "Total number of iterations in the  CF dataflow loop is %5u\n", CFiterCount);
+
+#endif // DATAFLOW_ITER
+
+#if MEASURE_NODE_SIZE
+
+    fprintf(fout, "\n");
+    fprintf(fout, "---------------------------------------------------\n");
+    fprintf(fout, "GenTree node allocation stats\n");
+    fprintf(fout, "---------------------------------------------------\n");
+
+    fprintf(fout, "Allocated %6u tree nodes (%7u bytes total, avg %4u bytes per method)\n",
+            genNodeSizeStats.genTreeNodeCnt, genNodeSizeStats.genTreeNodeSize,
+            genNodeSizeStats.genTreeNodeSize / genMethodCnt);
+
+    fprintf(fout, "Allocated %7u bytes of unused tree node space (%3.2f%%)\n",
+            genNodeSizeStats.genTreeNodeSize - genNodeSizeStats.genTreeNodeActualSize,
+            (float)(100 * (genNodeSizeStats.genTreeNodeSize - genNodeSizeStats.genTreeNodeActualSize)) /
+                genNodeSizeStats.genTreeNodeSize);
+
+    fprintf(fout, "\n");
+    fprintf(fout, "---------------------------------------------------\n");
+    fprintf(fout, "Distribution of per-method GenTree node counts:\n");
+    genTreeNcntHist.dump(fout);
+
+    fprintf(fout, "\n");
+    fprintf(fout, "---------------------------------------------------\n");
+    fprintf(fout, "Distribution of per-method GenTree node  allocations (in bytes):\n");
+    genTreeNsizHist.dump(fout);
+
+#endif // MEASURE_NODE_SIZE
+
+#if MEASURE_BLOCK_SIZE
+
+    fprintf(fout, "\n");
+    fprintf(fout, "---------------------------------------------------\n");
+    fprintf(fout, "BasicBlock and flowList/BasicBlockList allocation stats\n");
+    fprintf(fout, "---------------------------------------------------\n");
+
+    fprintf(fout, "Allocated %6u basic blocks (%7u bytes total, avg %4u bytes per method)\n", BasicBlock::s_Count,
+            BasicBlock::s_Size, BasicBlock::s_Size / genMethodCnt);
+    fprintf(fout, "Allocated %6u flow nodes (%7u bytes total, avg %4u bytes per method)\n", genFlowNodeCnt,
+            genFlowNodeSize, genFlowNodeSize / genMethodCnt);
+
+#endif // MEASURE_BLOCK_SIZE
+
+#if MEASURE_MEM_ALLOC
+
+#ifdef DEBUG
+    // Under debug, we only dump memory stats when the COMPlus_* variable is defined.
+    // Under non-debug, we don't have the COMPlus_* variable, and we always dump it.
+    if (s_dspMemStats)
+#endif
+    {
+        fprintf(fout, "\nAll allocations:\n");
+        s_aggMemStats.Print(jitstdout);
+
+        fprintf(fout, "\nLargest method:\n");
+        s_maxCompMemStats.Print(jitstdout);
+    }
+
+#endif // MEASURE_MEM_ALLOC
+
+#if LOOP_HOIST_STATS
+#ifdef DEBUG // Always display loop stats in retail
+    if (JitConfig.DisplayLoopHoistStats() != 0)
+#endif // DEBUG
+    {
+        PrintAggregateLoopHoistStats(jitstdout);
+    }
+#endif // LOOP_HOIST_STATS
+
+#if MEASURE_PTRTAB_SIZE
+
+    fprintf(fout, "\n");
+    fprintf(fout, "---------------------------------------------------\n");
+    fprintf(fout, "GC pointer table stats\n");
+    fprintf(fout, "---------------------------------------------------\n");
+
+    fprintf(fout, "Reg pointer descriptor size (internal): %8u (avg %4u per method)\n", GCInfo::s_gcRegPtrDscSize,
+            GCInfo::s_gcRegPtrDscSize / genMethodCnt);
+
+    fprintf(fout, "Total pointer table size: %8u (avg %4u per method)\n", GCInfo::s_gcTotalPtrTabSize,
+            GCInfo::s_gcTotalPtrTabSize / genMethodCnt);
+
+#endif // MEASURE_PTRTAB_SIZE
+
+#if MEASURE_NODE_SIZE || MEASURE_BLOCK_SIZE || MEASURE_PTRTAB_SIZE || DISPLAY_SIZES
+
+    if (genMethodCnt != 0)
+    {
+        fprintf(fout, "\n");
+        fprintf(fout, "A total of %6u methods compiled", genMethodCnt);
+#if DISPLAY_SIZES
+        if (genMethodICnt || genMethodNCnt)
+        {
+            fprintf(fout, " (%u interruptible, %u non-interruptible)", genMethodICnt, genMethodNCnt);
+        }
+#endif // DISPLAY_SIZES
+        fprintf(fout, ".\n");
+    }
+
+#endif // MEASURE_NODE_SIZE || MEASURE_BLOCK_SIZE || MEASURE_PTRTAB_SIZE || DISPLAY_SIZES
+
+#if EMITTER_STATS
+    emitterStats(fout);
+#endif
+
+#if MEASURE_FATAL
+    fprintf(fout, "\n");
+    fprintf(fout, "---------------------------------------------------\n");
+    fprintf(fout, "Fatal errors stats\n");
+    fprintf(fout, "---------------------------------------------------\n");
+    fprintf(fout, "   badCode:             %u\n", fatal_badCode);
+    fprintf(fout, "   noWay:               %u\n", fatal_noWay);
+    fprintf(fout, "   NOMEM:               %u\n", fatal_NOMEM);
+    fprintf(fout, "   noWayAssertBody:     %u\n", fatal_noWayAssertBody);
+#ifdef DEBUG
+    fprintf(fout, "   noWayAssertBodyArgs: %u\n", fatal_noWayAssertBodyArgs);
+#endif // DEBUG
+    fprintf(fout, "   NYI:                 %u\n", fatal_NYI);
+#endif // MEASURE_FATAL
+}
+
+/*****************************************************************************
+ *  Display static data structure sizes.
+ */
+
+/* static */
+void Compiler::compDisplayStaticSizes(FILE* fout)
+{
+
+#if MEASURE_NODE_SIZE
+    /*
+        IMPORTANT:  Use the following code to check the alignment of
+                    GenTree members (in a retail build, of course).
+     */
+
+    GenTree* gtDummy = nullptr;
+
+    fprintf(fout, "\n");
+    fprintf(fout, "Offset / size of gtOper         = %2u / %2u\n", offsetof(GenTree, gtOper), sizeof(gtDummy->gtOper));
+    fprintf(fout, "Offset / size of gtType         = %2u / %2u\n", offsetof(GenTree, gtType), sizeof(gtDummy->gtType));
+#if FEATURE_ANYCSE
+    fprintf(fout, "Offset / size of gtCSEnum       = %2u / %2u\n", offsetof(GenTree, gtCSEnum),
+            sizeof(gtDummy->gtCSEnum));
+#endif // FEATURE_ANYCSE
+#if ASSERTION_PROP
+    fprintf(fout, "Offset / size of gtAssertionNum = %2u / %2u\n", offsetof(GenTree, gtAssertionNum),
+            sizeof(gtDummy->gtAssertionNum));
+#endif // ASSERTION_PROP
+#if FEATURE_STACK_FP_X87
+    fprintf(fout, "Offset / size of gtFPlvl        = %2u / %2u\n", offsetof(GenTree, gtFPlvl),
+            sizeof(gtDummy->gtFPlvl));
+#endif // FEATURE_STACK_FP_X87
+    // TODO: The section that report GenTree sizes should be made into a public static member function of the GenTree
+    // class (see https://github.com/dotnet/coreclr/pull/493)
+    // fprintf(fout, "Offset / size of gtCostEx       = %2u / %2u\n", offsetof(GenTree, _gtCostEx     ),
+    // sizeof(gtDummy->_gtCostEx     ));
+    // fprintf(fout, "Offset / size of gtCostSz       = %2u / %2u\n", offsetof(GenTree, _gtCostSz     ),
+    // sizeof(gtDummy->_gtCostSz     ));
+    fprintf(fout, "Offset / size of gtFlags        = %2u / %2u\n", offsetof(GenTree, gtFlags),
+            sizeof(gtDummy->gtFlags));
+    fprintf(fout, "Offset / size of gtVNPair       = %2u / %2u\n", offsetof(GenTree, gtVNPair),
+            sizeof(gtDummy->gtVNPair));
+    fprintf(fout, "Offset / size of gtRsvdRegs     = %2u / %2u\n", offsetof(GenTree, gtRsvdRegs),
+            sizeof(gtDummy->gtRsvdRegs));
+#ifdef LEGACY_BACKEND
+    fprintf(fout, "Offset / size of gtUsedRegs     = %2u / %2u\n", offsetof(GenTree, gtUsedRegs),
+            sizeof(gtDummy->gtUsedRegs));
+#endif // LEGACY_BACKEND
+#ifndef LEGACY_BACKEND
+    fprintf(fout, "Offset / size of gtLsraInfo     = %2u / %2u\n", offsetof(GenTree, gtLsraInfo),
+            sizeof(gtDummy->gtLsraInfo));
+#endif // !LEGACY_BACKEND
+    fprintf(fout, "Offset / size of gtNext         = %2u / %2u\n", offsetof(GenTree, gtNext), sizeof(gtDummy->gtNext));
+    fprintf(fout, "Offset / size of gtPrev         = %2u / %2u\n", offsetof(GenTree, gtPrev), sizeof(gtDummy->gtPrev));
+    fprintf(fout, "\n");
+
+#if SMALL_TREE_NODES
+    fprintf(fout, "Small tree node size        = %3u\n", TREE_NODE_SZ_SMALL);
+#endif // SMALL_TREE_NODES
+    fprintf(fout, "Large tree node size        = %3u\n", TREE_NODE_SZ_LARGE);
+    fprintf(fout, "Size of GenTree             = %3u\n", sizeof(GenTree));
+    fprintf(fout, "Size of GenTreeUnOp         = %3u\n", sizeof(GenTreeUnOp));
+    fprintf(fout, "Size of GenTreeOp           = %3u\n", sizeof(GenTreeOp));
+    fprintf(fout, "Size of GenTreeVal          = %3u\n", sizeof(GenTreeVal));
+    fprintf(fout, "Size of GenTreeIntConCommon = %3u\n", sizeof(GenTreeIntConCommon));
+    fprintf(fout, "Size of GenTreePhysReg      = %3u\n", sizeof(GenTreePhysReg));
+#ifndef LEGACY_BACKEND
+    fprintf(fout, "Size of GenTreeJumpTable    = %3u\n", sizeof(GenTreeJumpTable));
+#endif // !LEGACY_BACKEND
+    fprintf(fout, "Size of GenTreeIntCon       = %3u\n", sizeof(GenTreeIntCon));
+    fprintf(fout, "Size of GenTreeLngCon       = %3u\n", sizeof(GenTreeLngCon));
+    fprintf(fout, "Size of GenTreeDblCon       = %3u\n", sizeof(GenTreeDblCon));
+    fprintf(fout, "Size of GenTreeStrCon       = %3u\n", sizeof(GenTreeStrCon));
+    fprintf(fout, "Size of GenTreeLclVarCommon = %3u\n", sizeof(GenTreeLclVarCommon));
+    fprintf(fout, "Size of GenTreeLclVar       = %3u\n", sizeof(GenTreeLclVar));
+    fprintf(fout, "Size of GenTreeLclFld       = %3u\n", sizeof(GenTreeLclFld));
+    fprintf(fout, "Size of GenTreeRegVar       = %3u\n", sizeof(GenTreeRegVar));
+    fprintf(fout, "Size of GenTreeCast         = %3u\n", sizeof(GenTreeCast));
+    fprintf(fout, "Size of GenTreeBox          = %3u\n", sizeof(GenTreeBox));
+    fprintf(fout, "Size of GenTreeField        = %3u\n", sizeof(GenTreeField));
+    fprintf(fout, "Size of GenTreeArgList      = %3u\n", sizeof(GenTreeArgList));
+    fprintf(fout, "Size of GenTreeColon        = %3u\n", sizeof(GenTreeColon));
+    fprintf(fout, "Size of GenTreeCall         = %3u\n", sizeof(GenTreeCall));
+    fprintf(fout, "Size of GenTreeCmpXchg      = %3u\n", sizeof(GenTreeCmpXchg));
+    fprintf(fout, "Size of GenTreeFptrVal      = %3u\n", sizeof(GenTreeFptrVal));
+    fprintf(fout, "Size of GenTreeQmark        = %3u\n", sizeof(GenTreeQmark));
+    fprintf(fout, "Size of GenTreeIntrinsic    = %3u\n", sizeof(GenTreeIntrinsic));
+    fprintf(fout, "Size of GenTreeIndex        = %3u\n", sizeof(GenTreeIndex));
+    fprintf(fout, "Size of GenTreeArrLen       = %3u\n", sizeof(GenTreeArrLen));
+    fprintf(fout, "Size of GenTreeBoundsChk    = %3u\n", sizeof(GenTreeBoundsChk));
+    fprintf(fout, "Size of GenTreeArrElem      = %3u\n", sizeof(GenTreeArrElem));
+    fprintf(fout, "Size of GenTreeAddrMode     = %3u\n", sizeof(GenTreeAddrMode));
+    fprintf(fout, "Size of GenTreeIndir        = %3u\n", sizeof(GenTreeIndir));
+    fprintf(fout, "Size of GenTreeStoreInd     = %3u\n", sizeof(GenTreeStoreInd));
+    fprintf(fout, "Size of GenTreeRetExpr      = %3u\n", sizeof(GenTreeRetExpr));
+    fprintf(fout, "Size of GenTreeStmt         = %3u\n", sizeof(GenTreeStmt));
+    fprintf(fout, "Size of GenTreeObj          = %3u\n", sizeof(GenTreeObj));
+    fprintf(fout, "Size of GenTreeClsVar       = %3u\n", sizeof(GenTreeClsVar));
+    fprintf(fout, "Size of GenTreeArgPlace     = %3u\n", sizeof(GenTreeArgPlace));
+    fprintf(fout, "Size of GenTreeLabel        = %3u\n", sizeof(GenTreeLabel));
+    fprintf(fout, "Size of GenTreePhiArg       = %3u\n", sizeof(GenTreePhiArg));
+    fprintf(fout, "Size of GenTreePutArgStk    = %3u\n", sizeof(GenTreePutArgStk));
+    fprintf(fout, "\n");
+#endif // MEASURE_NODE_SIZE
+
+#if MEASURE_BLOCK_SIZE
+
+    BasicBlock* bbDummy = nullptr;
+
+    fprintf(fout, "\n");
+    fprintf(fout, "Offset / size of bbNext                = %3u / %3u\n", offsetof(BasicBlock, bbNext),
+            sizeof(bbDummy->bbNext));
+    fprintf(fout, "Offset / size of bbNum                 = %3u / %3u\n", offsetof(BasicBlock, bbNum),
+            sizeof(bbDummy->bbNum));
+    fprintf(fout, "Offset / size of bbPostOrderNum        = %3u / %3u\n", offsetof(BasicBlock, bbPostOrderNum),
+            sizeof(bbDummy->bbPostOrderNum));
+    fprintf(fout, "Offset / size of bbRefs                = %3u / %3u\n", offsetof(BasicBlock, bbRefs),
+            sizeof(bbDummy->bbRefs));
+    fprintf(fout, "Offset / size of bbFlags               = %3u / %3u\n", offsetof(BasicBlock, bbFlags),
+            sizeof(bbDummy->bbFlags));
+    fprintf(fout, "Offset / size of bbWeight              = %3u / %3u\n", offsetof(BasicBlock, bbWeight),
+            sizeof(bbDummy->bbWeight));
+    fprintf(fout, "Offset / size of bbJumpKind            = %3u / %3u\n", offsetof(BasicBlock, bbJumpKind),
+            sizeof(bbDummy->bbJumpKind));
+    fprintf(fout, "Offset / size of bbJumpOffs            = %3u / %3u\n", offsetof(BasicBlock, bbJumpOffs),
+            sizeof(bbDummy->bbJumpOffs));
+    fprintf(fout, "Offset / size of bbJumpDest            = %3u / %3u\n", offsetof(BasicBlock, bbJumpDest),
+            sizeof(bbDummy->bbJumpDest));
+    fprintf(fout, "Offset / size of bbJumpSwt             = %3u / %3u\n", offsetof(BasicBlock, bbJumpSwt),
+            sizeof(bbDummy->bbJumpSwt));
+    fprintf(fout, "Offset / size of bbTreeList            = %3u / %3u\n", offsetof(BasicBlock, bbTreeList),
+            sizeof(bbDummy->bbTreeList));
+    fprintf(fout, "Offset / size of bbEntryState          = %3u / %3u\n", offsetof(BasicBlock, bbEntryState),
+            sizeof(bbDummy->bbEntryState));
+    fprintf(fout, "Offset / size of bbStkTempsIn          = %3u / %3u\n", offsetof(BasicBlock, bbStkTempsIn),
+            sizeof(bbDummy->bbStkTempsIn));
+    fprintf(fout, "Offset / size of bbStkTempsOut         = %3u / %3u\n", offsetof(BasicBlock, bbStkTempsOut),
+            sizeof(bbDummy->bbStkTempsOut));
+    fprintf(fout, "Offset / size of bbTryIndex            = %3u / %3u\n", offsetof(BasicBlock, bbTryIndex),
+            sizeof(bbDummy->bbTryIndex));
+    fprintf(fout, "Offset / size of bbHndIndex            = %3u / %3u\n", offsetof(BasicBlock, bbHndIndex),
+            sizeof(bbDummy->bbHndIndex));
+    fprintf(fout, "Offset / size of bbCatchTyp            = %3u / %3u\n", offsetof(BasicBlock, bbCatchTyp),
+            sizeof(bbDummy->bbCatchTyp));
+    fprintf(fout, "Offset / size of bbStkDepth            = %3u / %3u\n", offsetof(BasicBlock, bbStkDepth),
+            sizeof(bbDummy->bbStkDepth));
+    fprintf(fout, "Offset / size of bbFPinVars            = %3u / %3u\n", offsetof(BasicBlock, bbFPinVars),
+            sizeof(bbDummy->bbFPinVars));
+    fprintf(fout, "Offset / size of bbPreds               = %3u / %3u\n", offsetof(BasicBlock, bbPreds),
+            sizeof(bbDummy->bbPreds));
+    fprintf(fout, "Offset / size of bbReach               = %3u / %3u\n", offsetof(BasicBlock, bbReach),
+            sizeof(bbDummy->bbReach));
+    fprintf(fout, "Offset / size of bbIDom                = %3u / %3u\n", offsetof(BasicBlock, bbIDom),
+            sizeof(bbDummy->bbIDom));
+    fprintf(fout, "Offset / size of bbDfsNum              = %3u / %3u\n", offsetof(BasicBlock, bbDfsNum),
+            sizeof(bbDummy->bbDfsNum));
+    fprintf(fout, "Offset / size of bbCodeOffs            = %3u / %3u\n", offsetof(BasicBlock, bbCodeOffs),
+            sizeof(bbDummy->bbCodeOffs));
+    fprintf(fout, "Offset / size of bbCodeOffsEnd         = %3u / %3u\n", offsetof(BasicBlock, bbCodeOffsEnd),
+            sizeof(bbDummy->bbCodeOffsEnd));
+    fprintf(fout, "Offset / size of bbVarUse              = %3u / %3u\n", offsetof(BasicBlock, bbVarUse),
+            sizeof(bbDummy->bbVarUse));
+    fprintf(fout, "Offset / size of bbVarDef              = %3u / %3u\n", offsetof(BasicBlock, bbVarDef),
+            sizeof(bbDummy->bbVarDef));
+    fprintf(fout, "Offset / size of bbVarTmp              = %3u / %3u\n", offsetof(BasicBlock, bbVarTmp),
+            sizeof(bbDummy->bbVarTmp));
+    fprintf(fout, "Offset / size of bbLiveIn              = %3u / %3u\n", offsetof(BasicBlock, bbLiveIn),
+            sizeof(bbDummy->bbLiveIn));
+    fprintf(fout, "Offset / size of bbLiveOut             = %3u / %3u\n", offsetof(BasicBlock, bbLiveOut),
+            sizeof(bbDummy->bbLiveOut));
+    fprintf(fout, "Offset / size of bbHeapSsaPhiFunc      = %3u / %3u\n", offsetof(BasicBlock, bbHeapSsaPhiFunc),
+            sizeof(bbDummy->bbHeapSsaPhiFunc));
+    fprintf(fout, "Offset / size of bbHeapSsaNumIn        = %3u / %3u\n", offsetof(BasicBlock, bbHeapSsaNumIn),
+            sizeof(bbDummy->bbHeapSsaNumIn));
+    fprintf(fout, "Offset / size of bbHeapSsaNumOut       = %3u / %3u\n", offsetof(BasicBlock, bbHeapSsaNumOut),
+            sizeof(bbDummy->bbHeapSsaNumOut));
+
+#ifdef DEBUGGING_SUPPORT
+    fprintf(fout, "Offset / size of bbScope               = %3u / %3u\n", offsetof(BasicBlock, bbScope),
+            sizeof(bbDummy->bbScope));
+#endif // DEBUGGING_SUPPORT
+
+    fprintf(fout, "Offset / size of bbCseGen              = %3u / %3u\n", offsetof(BasicBlock, bbCseGen),
+            sizeof(bbDummy->bbCseGen));
+    fprintf(fout, "Offset / size of bbCseIn               = %3u / %3u\n", offsetof(BasicBlock, bbCseIn),
+            sizeof(bbDummy->bbCseIn));
+    fprintf(fout, "Offset / size of bbCseOut              = %3u / %3u\n", offsetof(BasicBlock, bbCseOut),
+            sizeof(bbDummy->bbCseOut));
+
+    fprintf(fout, "Offset / size of bbEmitCookie          = %3u / %3u\n", offsetof(BasicBlock, bbEmitCookie),
+            sizeof(bbDummy->bbEmitCookie));
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+    fprintf(fout, "Offset / size of bbUnwindNopEmitCookie = %3u / %3u\n", offsetof(BasicBlock, bbUnwindNopEmitCookie),
+            sizeof(bbDummy->bbUnwindNopEmitCookie));
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+
+#ifdef VERIFIER
+    fprintf(fout, "Offset / size of bbStackIn             = %3u / %3u\n", offsetof(BasicBlock, bbStackIn),
+            sizeof(bbDummy->bbStackIn));
+    fprintf(fout, "Offset / size of bbStackOut            = %3u / %3u\n", offsetof(BasicBlock, bbStackOut),
+            sizeof(bbDummy->bbStackOut));
+    fprintf(fout, "Offset / size of bbTypesIn             = %3u / %3u\n", offsetof(BasicBlock, bbTypesIn),
+            sizeof(bbDummy->bbTypesIn));
+    fprintf(fout, "Offset / size of bbTypesOut            = %3u / %3u\n", offsetof(BasicBlock, bbTypesOut),
+            sizeof(bbDummy->bbTypesOut));
+#endif // VERIFIER
+
+#if FEATURE_STACK_FP_X87
+    fprintf(fout, "Offset / size of bbFPStateX87          = %3u / %3u\n", offsetof(BasicBlock, bbFPStateX87),
+            sizeof(bbDummy->bbFPStateX87));
+#endif // FEATURE_STACK_FP_X87
+
+#ifdef DEBUG
+    fprintf(fout, "Offset / size of bbLoopNum             = %3u / %3u\n", offsetof(BasicBlock, bbLoopNum),
+            sizeof(bbDummy->bbLoopNum));
+#endif // DEBUG
+
+    fprintf(fout, "\n");
+    fprintf(fout, "Size   of BasicBlock                   = %3u\n", sizeof(BasicBlock));
+
+#endif // MEASURE_BLOCK_SIZE
+
+#if EMITTER_STATS
+    emitterStaticStats(fout);
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Constructor
+ */
+
+void Compiler::compInit(ArenaAllocator* pAlloc, InlineInfo* inlineInfo)
+{
+    assert(pAlloc);
+    compAllocator = pAlloc;
+
+    // Inlinee Compile object will only be allocated when needed for the 1st time.
+    InlineeCompiler = nullptr;
+
+    // Set the inline info.
+    impInlineInfo = inlineInfo;
+
+    eeInfoInitialized = false;
+
+    compDoAggressiveInlining = false;
+
+    if (compIsForInlining())
+    {
+        m_inlineStrategy = nullptr;
+        compInlineResult = inlineInfo->inlineResult;
+        compAsIAllocator = nullptr; // We shouldn't be using the compAsIAllocator for other than the root compiler.
+#if MEASURE_MEM_ALLOC
+        compAsIAllocatorBitset    = nullptr;
+        compAsIAllocatorGC        = nullptr;
+        compAsIAllocatorLoopHoist = nullptr;
+#ifdef DEBUG
+        compAsIAllocatorDebugOnly = nullptr;
+#endif // DEBUG
+#endif // MEASURE_MEM_ALLOC
+
+        compQMarks = nullptr;
+    }
+    else
+    {
+        m_inlineStrategy = new (this, CMK_Inlining) InlineStrategy(this);
+        compInlineResult = nullptr;
+        compAsIAllocator = new (this, CMK_Unknown) CompAllocator(this, CMK_AsIAllocator);
+#if MEASURE_MEM_ALLOC
+        compAsIAllocatorBitset    = new (this, CMK_Unknown) CompAllocator(this, CMK_bitset);
+        compAsIAllocatorGC        = new (this, CMK_Unknown) CompAllocator(this, CMK_GC);
+        compAsIAllocatorLoopHoist = new (this, CMK_Unknown) CompAllocator(this, CMK_LoopHoist);
+#ifdef DEBUG
+        compAsIAllocatorDebugOnly = new (this, CMK_Unknown) CompAllocator(this, CMK_DebugOnly);
+#endif // DEBUG
+#endif // MEASURE_MEM_ALLOC
+
+        compQMarks = new (this, CMK_Unknown) ExpandArrayStack<GenTreePtr>(getAllocator());
+    }
+
+#ifdef FEATURE_TRACELOGGING
+    // Make sure JIT telemetry is initialized as soon as allocations can be made
+    // but no later than a point where noway_asserts can be thrown.
+    //    1. JIT telemetry could allocate some objects internally.
+    //    2. NowayAsserts are tracked through telemetry.
+    //    Note: JIT telemetry could gather data when compiler is not fully initialized.
+    //          So you have to initialize the compiler variables you use for telemetry.
+    assert((unsigned)PHASE_PRE_IMPORT == 0);
+    previousCompletedPhase = PHASE_PRE_IMPORT;
+    info.compILCodeSize    = 0;
+    info.compMethodHnd     = nullptr;
+    compJitTelemetry.Initialize(this);
+#endif
+
+#ifdef DEBUG
+    bRangeAllowStress = false;
+#endif
+
+    fgInit();
+    lvaInit();
+
+    if (!compIsForInlining())
+    {
+        codeGen = getCodeGenerator(this);
+#ifdef LEGACY_BACKEND
+        raInit();
+#endif // LEGACY_BACKEND
+        optInit();
+#ifndef LEGACY_BACKEND
+        hashBv::Init(this);
+#endif // !LEGACY_BACKEND
+
+        compVarScopeMap = nullptr;
+
+        // If this method were a real constructor for Compiler, these would
+        // become method initializations.
+        impPendingBlockMembers    = ExpandArray<BYTE>(getAllocator());
+        impSpillCliquePredMembers = ExpandArray<BYTE>(getAllocator());
+        impSpillCliqueSuccMembers = ExpandArray<BYTE>(getAllocator());
+
+        memset(&lvHeapPerSsaData, 0, sizeof(PerSsaArray));
+        lvHeapPerSsaData.Init(getAllocator());
+        lvHeapNumSsaNames = 0;
+
+        //
+        // Initialize all the per-method statistics gathering data structures.
+        //
+
+        optLoopsCloned = 0;
+
+#if MEASURE_MEM_ALLOC
+        genMemStats.Init();
+#endif // MEASURE_MEM_ALLOC
+#if LOOP_HOIST_STATS
+        m_loopsConsidered             = 0;
+        m_curLoopHasHoistedExpression = false;
+        m_loopsWithHoistedExpressions = 0;
+        m_totalHoistedExpressions     = 0;
+#endif // LOOP_HOIST_STATS
+#if MEASURE_NODE_SIZE
+        genNodeSizeStatsPerFunc.Init();
+#endif // MEASURE_NODE_SIZE
+    }
+    else
+    {
+        codeGen = nullptr;
+    }
+
+    compJmpOpUsed         = false;
+    compLongUsed          = false;
+    compTailCallUsed      = false;
+    compLocallocUsed      = false;
+    compQmarkRationalized = false;
+    compQmarkUsed         = false;
+    compFloatingPointUsed = false;
+    compUnsafeCastUsed    = false;
+#if CPU_USES_BLOCK_MOVE
+    compBlkOpUsed = false;
+#endif
+#if FEATURE_STACK_FP_X87
+    compMayHaveTransitionBlocks = false;
+#endif
+    compNeedsGSSecurityCookie = false;
+    compGSReorderStackLayout  = false;
+#if STACK_PROBES
+    compStackProbePrologDone = false;
+#endif
+
+    compGeneratingProlog = false;
+    compGeneratingEpilog = false;
+
+#ifndef LEGACY_BACKEND
+    compLSRADone = false;
+#endif // !LEGACY_BACKEND
+    compRationalIRForm = false;
+
+#ifdef DEBUG
+    compCodeGenDone        = false;
+    compRegSetCheckLevel   = 0;
+    opts.compMinOptsIsUsed = false;
+#endif
+    opts.compMinOptsIsSet = false;
+
+    // Used by fgFindJumpTargets for inlining heuristics.
+    opts.instrCount = 0;
+
+    // Used to track when we should consider running EarlyProp
+    optMethodFlags = 0;
+
+    for (unsigned i = 0; i < MAX_LOOP_NUM; i++)
+    {
+        AllVarSetOps::AssignNoCopy(this, optLoopTable[i].lpAsgVars, AllVarSetOps::UninitVal());
+    }
+
+#ifdef DEBUG
+    m_nodeTestData      = nullptr;
+    m_loopHoistCSEClass = FIRST_LOOP_HOIST_CSE_CLASS;
+#endif
+    m_switchDescMap      = nullptr;
+    m_blockToEHPreds     = nullptr;
+    m_fieldSeqStore      = nullptr;
+    m_zeroOffsetFieldMap = nullptr;
+    m_arrayInfoMap       = nullptr;
+    m_heapSsaMap         = nullptr;
+    m_refAnyClass        = nullptr;
+
+#ifdef DEBUG
+    if (!compIsForInlining())
+    {
+        compDoComponentUnitTestsOnce();
+    }
+#endif // DEBUG
+
+    vnStore               = nullptr;
+    m_opAsgnVarDefSsaNums = nullptr;
+    m_indirAssignMap      = nullptr;
+    fgSsaPassesCompleted  = 0;
+    fgVNPassesCompleted   = 0;
+
+    // check that HelperCallProperties are initialized
+
+    assert(s_helperCallProperties.IsPure(CORINFO_HELP_GETSHARED_GCSTATIC_BASE));
+    assert(!s_helperCallProperties.IsPure(CORINFO_HELP_GETFIELDOBJ)); // quick sanity check
+
+    // We start with the flow graph in tree-order
+    fgOrder = FGOrderTree;
+
+#ifdef FEATURE_SIMD
+    // SIMD Types
+    SIMDFloatHandle   = nullptr;
+    SIMDDoubleHandle  = nullptr;
+    SIMDIntHandle     = nullptr;
+    SIMDUShortHandle  = nullptr;
+    SIMDUByteHandle   = nullptr;
+    SIMDShortHandle   = nullptr;
+    SIMDByteHandle    = nullptr;
+    SIMDLongHandle    = nullptr;
+    SIMDUIntHandle    = nullptr;
+    SIMDULongHandle   = nullptr;
+    SIMDVector2Handle = nullptr;
+    SIMDVector3Handle = nullptr;
+    SIMDVector4Handle = nullptr;
+    SIMDVectorHandle  = nullptr;
+#endif
+
+#ifdef DEBUG
+    inlRNG = nullptr;
+#endif
+
+    compUsesThrowHelper = false;
+}
+
+/*****************************************************************************
+ *
+ *  Destructor
+ */
+
+void Compiler::compDone()
+{
+}
+
+void* Compiler::compGetHelperFtn(CorInfoHelpFunc ftnNum,        /* IN  */
+                                 void**          ppIndirection) /* OUT */
+{
+    void* addr;
+
+    if (info.compMatchedVM)
+    {
+        addr = info.compCompHnd->getHelperFtn(ftnNum, ppIndirection);
+    }
+    else
+    {
+        // If we don't have a matched VM, we won't get valid results when asking for a helper function.
+        addr = (void*)0xCA11CA11; // "callcall"
+    }
+
+    return addr;
+}
+
+unsigned Compiler::compGetTypeSize(CorInfoType cit, CORINFO_CLASS_HANDLE clsHnd)
+{
+    var_types sigType = genActualType(JITtype2varType(cit));
+    unsigned  sigSize;
+    sigSize = genTypeSize(sigType);
+    if (cit == CORINFO_TYPE_VALUECLASS)
+    {
+        sigSize = info.compCompHnd->getClassSize(clsHnd);
+    }
+    else if (cit == CORINFO_TYPE_REFANY)
+    {
+        sigSize = 2 * sizeof(void*);
+    }
+    return sigSize;
+}
+
+#ifdef DEBUG
+static bool DidComponentUnitTests = false;
+
+void Compiler::compDoComponentUnitTestsOnce()
+{
+    if (!JitConfig.RunComponentUnitTests())
+    {
+        return;
+    }
+
+    if (!DidComponentUnitTests)
+    {
+        DidComponentUnitTests = true;
+        ValueNumStore::RunTests(this);
+        BitSetSupport::TestSuite(getAllocatorDebugOnly());
+    }
+}
+#endif // DEBUG
+
+/******************************************************************************
+ *
+ *  The Emitter uses this callback function to allocate its memory
+ */
+
+/* static */
+void* Compiler::compGetMemCallback(void* p, size_t size, CompMemKind cmk)
+{
+    assert(p);
+
+    return ((Compiler*)p)->compGetMem(size, cmk);
+}
+
+/*****************************************************************************
+ *
+ *  The central memory allocation routine used by the compiler. Normally this
+ *  is a simple inline method defined in compiler.hpp, but for debugging it's
+ *  often convenient to keep it non-inline.
+ */
+
+#ifdef DEBUG
+
+void* Compiler::compGetMem(size_t sz, CompMemKind cmk)
+{
+#if 0
+#if SMALL_TREE_NODES
+    if  (sz != TREE_NODE_SZ_SMALL &&
+         sz != TREE_NODE_SZ_LARGE && sz > 32)
+    {
+        printf("Alloc %3u bytes\n", sz);
+    }
+#else
+    if  (sz != sizeof(GenTree)    && sz > 32)
+    {
+        printf("Alloc %3u bytes\n", sz);
+    }
+#endif
+#endif // 0
+
+#if MEASURE_MEM_ALLOC
+    genMemStats.AddAlloc(sz, cmk);
+#endif
+
+    void* ptr = compAllocator->allocateMemory(sz);
+
+    // Verify that the current block is aligned. Only then will the next
+    // block allocated be on an aligned boundary.
+    assert((size_t(ptr) & (sizeof(size_t) - 1)) == 0);
+
+    return ptr;
+}
+
+#endif
+
+/*****************************************************************************/
+#ifdef DEBUG
+/*****************************************************************************/
+
+VarName Compiler::compVarName(regNumber reg, bool isFloatReg)
+{
+    if (isFloatReg)
+    {
+#if FEATURE_STACK_FP_X87
+        assert(reg < FP_STK_SIZE); // would like to have same assert as below but sometimes you get -1?
+#else
+        assert(genIsValidFloatReg(reg));
+#endif
+    }
+    else
+    {
+        assert(genIsValidReg(reg));
+    }
+
+    if ((info.compVarScopesCount > 0) && compCurBB && opts.varNames)
+    {
+        unsigned   lclNum;
+        LclVarDsc* varDsc;
+
+        /* Look for the matching register */
+        for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+        {
+            /* If the variable is not in a register, or not in the register we're looking for, quit. */
+            /* Also, if it is a compiler generated variable (i.e. slot# > info.compVarScopesCount), don't bother. */
+            if ((varDsc->lvRegister != 0) && (varDsc->lvRegNum == reg) && (varDsc->IsFloatRegType() || !isFloatReg) &&
+                (varDsc->lvSlotNum < info.compVarScopesCount))
+            {
+                /* check if variable in that register is live */
+                if (VarSetOps::IsMember(this, compCurLife, varDsc->lvVarIndex))
+                {
+                    /* variable is live - find the corresponding slot */
+                    VarScopeDsc* varScope =
+                        compFindLocalVar(varDsc->lvSlotNum, compCurBB->bbCodeOffs, compCurBB->bbCodeOffsEnd);
+                    if (varScope)
+                    {
+                        return varScope->vsdName;
+                    }
+                }
+            }
+        }
+
+#ifdef LEGACY_BACKEND
+        // maybe var is marked dead, but still used (last use)
+        if (!isFloatReg && codeGen->regSet.rsUsedTree[reg] != NULL)
+        {
+            GenTreePtr nodePtr;
+
+            if (GenTree::OperIsUnary(codeGen->regSet.rsUsedTree[reg]->OperGet()))
+            {
+                assert(codeGen->regSet.rsUsedTree[reg]->gtOp.gtOp1 != NULL);
+                nodePtr = codeGen->regSet.rsUsedTree[reg]->gtOp.gtOp1;
+            }
+            else
+            {
+                nodePtr = codeGen->regSet.rsUsedTree[reg];
+            }
+
+            if ((nodePtr->gtOper == GT_REG_VAR) && (nodePtr->gtRegVar.gtRegNum == reg) &&
+                (nodePtr->gtRegVar.gtLclNum < info.compVarScopesCount))
+            {
+                VarScopeDsc* varScope =
+                    compFindLocalVar(nodePtr->gtRegVar.gtLclNum, compCurBB->bbCodeOffs, compCurBB->bbCodeOffsEnd);
+                if (varScope)
+                    return varScope->vsdName;
+            }
+        }
+#endif // LEGACY_BACKEND
+    }
+    return nullptr;
+}
+
+const char* Compiler::compRegVarName(regNumber reg, bool displayVar, bool isFloatReg)
+{
+
+#ifdef _TARGET_ARM_
+    isFloatReg = genIsValidFloatReg(reg);
+#endif
+
+    if (displayVar && (reg != REG_NA))
+    {
+        VarName varName = compVarName(reg, isFloatReg);
+
+        if (varName)
+        {
+            const int   NAME_VAR_REG_BUFFER_LEN = 4 + 256 + 1;
+            static char nameVarReg[2][NAME_VAR_REG_BUFFER_LEN]; // to avoid overwriting the buffer when have 2
+                                                                // consecutive calls before printing
+            static int index = 0;                               // for circular index into the name array
+
+            index = (index + 1) % 2; // circular reuse of index
+            sprintf_s(nameVarReg[index], NAME_VAR_REG_BUFFER_LEN, "%s'%s'", getRegName(reg, isFloatReg),
+                      VarNameToStr(varName));
+
+            return nameVarReg[index];
+        }
+    }
+
+    /* no debug info required or no variable in that register
+       -> return standard name */
+
+    return getRegName(reg, isFloatReg);
+}
+
+#define MAX_REG_PAIR_NAME_LENGTH 10
+
+const char* Compiler::compRegPairName(regPairNo regPair)
+{
+    static char regNameLong[MAX_REG_PAIR_NAME_LENGTH];
+
+    if (regPair == REG_PAIR_NONE)
+    {
+        return "NA|NA";
+    }
+
+    assert(regPair >= REG_PAIR_FIRST && regPair <= REG_PAIR_LAST);
+
+    strcpy_s(regNameLong, sizeof(regNameLong), compRegVarName(genRegPairLo(regPair)));
+    strcat_s(regNameLong, sizeof(regNameLong), "|");
+    strcat_s(regNameLong, sizeof(regNameLong), compRegVarName(genRegPairHi(regPair)));
+    return regNameLong;
+}
+
+const char* Compiler::compRegNameForSize(regNumber reg, size_t size)
+{
+    if (size == 0 || size >= 4)
+    {
+        return compRegVarName(reg, true);
+    }
+
+    // clang-format off
+    static
+    const char  *   sizeNames[][2] =
+    {
+        { "al", "ax" },
+        { "cl", "cx" },
+        { "dl", "dx" },
+        { "bl", "bx" },
+#ifdef _TARGET_AMD64_
+        {  "spl",   "sp" }, // ESP
+        {  "bpl",   "bp" }, // EBP
+        {  "sil",   "si" }, // ESI
+        {  "dil",   "di" }, // EDI
+        {  "r8b",  "r8w" },
+        {  "r9b",  "r9w" },
+        { "r10b", "r10w" },
+        { "r11b", "r11w" },
+        { "r12b", "r12w" },
+        { "r13b", "r13w" },
+        { "r14b", "r14w" },
+        { "r15b", "r15w" },
+#endif // _TARGET_AMD64_
+    };
+    // clang-format on
+
+    assert(isByteReg(reg));
+    assert(genRegMask(reg) & RBM_BYTE_REGS);
+    assert(size == 1 || size == 2);
+
+    return sizeNames[reg][size - 1];
+}
+
+const char* Compiler::compFPregVarName(unsigned fpReg, bool displayVar)
+{
+    const int   NAME_VAR_REG_BUFFER_LEN = 4 + 256 + 1;
+    static char nameVarReg[2][NAME_VAR_REG_BUFFER_LEN]; // to avoid overwriting the buffer when have 2 consecutive calls
+                                                        // before printing
+    static int index = 0;                               // for circular index into the name array
+
+    index = (index + 1) % 2; // circular reuse of index
+
+#if FEATURE_STACK_FP_X87
+    /* 'fpReg' is the distance from the bottom of the stack, ie.
+     * it is independant of the current FP stack level
+     */
+
+    if (displayVar && codeGen->genFPregCnt)
+    {
+        assert(fpReg < FP_STK_SIZE);
+        assert(compCodeGenDone || (fpReg <= codeGen->compCurFPState.m_uStackSize));
+
+        int pos = codeGen->genFPregCnt - (fpReg + 1 - codeGen->genGetFPstkLevel());
+        if (pos >= 0)
+        {
+            VarName varName = compVarName((regNumber)pos, true);
+
+            if (varName)
+            {
+                sprintf_s(nameVarReg[index], NAME_VAR_REG_BUFFER_LEN, "ST(%d)'%s'", fpReg, VarNameToStr(varName));
+                return nameVarReg[index];
+            }
+        }
+    }
+#endif // FEATURE_STACK_FP_X87
+
+    /* no debug info required or no variable in that register
+       -> return standard name */
+
+    sprintf_s(nameVarReg[index], NAME_VAR_REG_BUFFER_LEN, "ST(%d)", fpReg);
+    return nameVarReg[index];
+}
+
+const char* Compiler::compLocalVarName(unsigned varNum, unsigned offs)
+{
+    unsigned     i;
+    VarScopeDsc* t;
+
+    for (i = 0, t = info.compVarScopes; i < info.compVarScopesCount; i++, t++)
+    {
+        if (t->vsdVarNum != varNum)
+        {
+            continue;
+        }
+
+        if (offs >= t->vsdLifeBeg && offs < t->vsdLifeEnd)
+        {
+            return VarNameToStr(t->vsdName);
+        }
+    }
+
+    return nullptr;
+}
+
+/*****************************************************************************/
+#endif // DEBUG
+/*****************************************************************************/
+
+void Compiler::compSetProcessor()
+{
+    unsigned compileFlags = opts.eeFlags;
+
+#if defined(_TARGET_ARM_)
+    info.genCPU = CPU_ARM;
+#elif defined(_TARGET_AMD64_)
+    info.genCPU = CPU_X64;
+#elif defined(_TARGET_X86_)
+    if (compileFlags & CORJIT_FLG_TARGET_P4)
+        info.genCPU = CPU_X86_PENTIUM_4;
+    else
+        info.genCPU = CPU_X86;
+#endif
+
+    //
+    // Processor specific optimizations
+    //
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_AMD64_
+    opts.compUseFCOMI   = false;
+    opts.compUseCMOV    = true;
+    opts.compCanUseSSE2 = true;
+
+#ifdef FEATURE_AVX_SUPPORT
+    // COMPlus_EnableAVX can be used to disable using AVX if available on a target machine.
+    // Note that FEATURE_AVX_SUPPORT is not enabled for ctpjit
+    opts.compCanUseAVX = false;
+    if (((compileFlags & CORJIT_FLG_PREJIT) == 0) && ((compileFlags & CORJIT_FLG_USE_AVX2) != 0))
+    {
+        if (JitConfig.EnableAVX() != 0)
+        {
+            opts.compCanUseAVX = true;
+            if (!compIsForInlining())
+            {
+                codeGen->getEmitter()->SetUseAVX(true);
+            }
+        }
+    }
+#endif
+#endif //_TARGET_AMD64_
+
+#ifdef _TARGET_X86_
+    opts.compUseFCOMI   = ((opts.eeFlags & CORJIT_FLG_USE_FCOMI) != 0);
+    opts.compUseCMOV    = ((opts.eeFlags & CORJIT_FLG_USE_CMOV) != 0);
+    opts.compCanUseSSE2 = ((opts.eeFlags & CORJIT_FLG_USE_SSE2) != 0);
+
+#ifdef DEBUG
+    if (opts.compUseFCOMI)
+        opts.compUseFCOMI = !compStressCompile(STRESS_USE_FCOMI, 50);
+    if (opts.compUseCMOV)
+        opts.compUseCMOV = !compStressCompile(STRESS_USE_CMOV, 50);
+
+    // Should we override the SSE2 setting
+    enum
+    {
+        SSE2_FORCE_DISABLE = 0,
+        SSE2_FORCE_USE     = 1,
+        SSE2_FORCE_INVALID = -1
+    };
+
+    if (JitConfig.JitCanUseSSE2() == SSE2_FORCE_DISABLE)
+        opts.compCanUseSSE2 = false;
+    else if (JitConfig.JitCanUseSSE2() == SSE2_FORCE_USE)
+        opts.compCanUseSSE2 = true;
+    else if (opts.compCanUseSSE2)
+        opts.compCanUseSSE2 = !compStressCompile(STRESS_GENERIC_VARN, 50);
+#endif // DEBUG
+#endif // _TARGET_X86_
+}
+
+#ifdef PROFILING_SUPPORTED
+// A Dummy routine to receive Enter/Leave/Tailcall profiler callbacks.
+// These are used when complus_JitEltHookEnabled=1
+#ifdef _TARGET_AMD64_
+void DummyProfilerELTStub(UINT_PTR ProfilerHandle, UINT_PTR callerSP)
+{
+    return;
+}
+#else  //! _TARGET_AMD64_
+void DummyProfilerELTStub(UINT_PTR ProfilerHandle)
+{
+    return;
+}
+#endif //!_TARGET_AMD64_
+
+#endif // PROFILING_SUPPORTED
+
+bool Compiler::compIsFullTrust()
+{
+    return (info.compCompHnd->canSkipMethodVerification(info.compMethodHnd) == CORINFO_VERIFICATION_CAN_SKIP);
+}
+
+bool Compiler::compShouldThrowOnNoway(
+#ifdef FEATURE_TRACELOGGING
+    const char* filename, unsigned line
+#endif
+    )
+{
+#ifdef FEATURE_TRACELOGGING
+    compJitTelemetry.NotifyNowayAssert(filename, line);
+#endif
+    // In min opts, we don't want the noway assert to go through the exception
+    // path. Instead we want it to just silently go through codegen for
+    // compat reasons.
+    // If we are not in full trust, we should always fire for security.
+    return !opts.MinOpts() || !compIsFullTrust();
+}
+
+// ConfigInteger does not offer an option for decimal flags.  Any numbers are interpreted as hex.
+// I could add the decimal option to ConfigInteger or I could write a function to reinterpret this
+// value as the user intended.
+unsigned ReinterpretHexAsDecimal(unsigned in)
+{
+    // ex: in: 0x100 returns: 100
+    unsigned result = 0;
+    unsigned index  = 1;
+
+    // default value
+    if (in == INT_MAX)
+    {
+        return in;
+    }
+
+    while (in)
+    {
+        unsigned digit = in % 16;
+        in >>= 4;
+        assert(digit < 10);
+        result += digit * index;
+        index *= 10;
+    }
+    return result;
+}
+
+void Compiler::compInitOptions(CORJIT_FLAGS* jitFlags)
+{
+#ifdef UNIX_AMD64_ABI
+    opts.compNeedToAlignFrame = false;
+#endif // UNIX_AMD64_ABI
+    memset(&opts, 0, sizeof(opts));
+
+    unsigned compileFlags = jitFlags->corJitFlags;
+
+    if (compIsForInlining())
+    {
+        assert((compileFlags & CORJIT_FLG_LOST_WHEN_INLINING) == 0);
+        assert(compileFlags & CORJIT_FLG_SKIP_VERIFICATION);
+    }
+
+    opts.jitFlags  = jitFlags;
+    opts.eeFlags   = compileFlags;
+    opts.compFlags = CLFLG_MAXOPT; // Default value is for full optimization
+
+    if (opts.eeFlags & (CORJIT_FLG_DEBUG_CODE | CORJIT_FLG_MIN_OPT))
+    {
+        opts.compFlags = CLFLG_MINOPT;
+    }
+    // Don't optimize .cctors (except prejit) or if we're an inlinee
+    else if (!(opts.eeFlags & CORJIT_FLG_PREJIT) && ((info.compFlags & FLG_CCTOR) == FLG_CCTOR) && !compIsForInlining())
+    {
+        opts.compFlags = CLFLG_MINOPT;
+    }
+
+    // Default value is to generate a blend of size and speed optimizations
+    //
+    opts.compCodeOpt = BLENDED_CODE;
+
+    // If the EE sets SIZE_OPT or if we are compiling a Class constructor
+    // we will optimize for code size at the expense of speed
+    //
+    if ((opts.eeFlags & CORJIT_FLG_SIZE_OPT) || ((info.compFlags & FLG_CCTOR) == FLG_CCTOR))
+    {
+        opts.compCodeOpt = SMALL_CODE;
+    }
+    //
+    // If the EE sets SPEED_OPT we will optimize for speed at the expense of code size
+    //
+    else if (opts.eeFlags & CORJIT_FLG_SPEED_OPT)
+    {
+        opts.compCodeOpt = FAST_CODE;
+        assert((opts.eeFlags & CORJIT_FLG_SIZE_OPT) == 0);
+    }
+
+//-------------------------------------------------------------------------
+
+#ifdef DEBUGGING_SUPPORT
+    opts.compDbgCode = (opts.eeFlags & CORJIT_FLG_DEBUG_CODE) != 0;
+    opts.compDbgInfo = (opts.eeFlags & CORJIT_FLG_DEBUG_INFO) != 0;
+    opts.compDbgEnC  = (opts.eeFlags & CORJIT_FLG_DEBUG_EnC) != 0;
+#if REGEN_SHORTCUTS || REGEN_CALLPAT
+    // We never want to have debugging enabled when regenerating GC encoding patterns
+    opts.compDbgCode = false;
+    opts.compDbgInfo = false;
+    opts.compDbgEnC  = false;
+#endif
+#endif
+
+    compSetProcessor();
+
+#ifdef DEBUG
+    opts.dspOrder = false;
+    if (compIsForInlining())
+    {
+        verbose = impInlineInfo->InlinerCompiler->verbose;
+    }
+    else
+    {
+        verbose = false;
+        codeGen->setVerbose(false);
+    }
+    verboseTrees     = verbose && shouldUseVerboseTrees();
+    verboseSsa       = verbose && shouldUseVerboseSsa();
+    asciiTrees       = shouldDumpASCIITrees();
+    opts.dspDiffable = compIsForInlining() ? impInlineInfo->InlinerCompiler->opts.dspDiffable : false;
+#endif
+
+    opts.compNeedSecurityCheck = false;
+    opts.altJit                = false;
+
+#if defined(LATE_DISASM) && !defined(DEBUG)
+    // For non-debug builds with the late disassembler built in, we currently always do late disassembly
+    // (we have no way to determine when not to, since we don't have class/method names).
+    // In the DEBUG case, this is initialized to false, below.
+    opts.doLateDisasm = true;
+#endif
+
+#ifdef DEBUG
+
+    const JitConfigValues::MethodSet* pfAltJit;
+    if (opts.eeFlags & CORJIT_FLG_PREJIT)
+    {
+        pfAltJit = &JitConfig.AltJitNgen();
+    }
+    else
+    {
+        pfAltJit = &JitConfig.AltJit();
+    }
+
+#ifdef ALT_JIT
+    if (pfAltJit->contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+    {
+        opts.altJit = true;
+    }
+
+    unsigned altJitLimit = ReinterpretHexAsDecimal(JitConfig.AltJitLimit());
+    if (altJitLimit > 0 && Compiler::jitTotalMethodCompiled >= altJitLimit)
+    {
+        opts.altJit = false;
+    }
+#endif // ALT_JIT
+
+#else // !DEBUG
+
+    const char* altJitVal;
+    if (opts.eeFlags & CORJIT_FLG_PREJIT)
+    {
+        altJitVal = JitConfig.AltJitNgen().list();
+    }
+    else
+    {
+        altJitVal = JitConfig.AltJit().list();
+    }
+
+#ifdef ALT_JIT
+    // In release mode, you either get all methods or no methods. You must use "*" as the parameter, or we ignore it.
+    // You don't get to give a regular expression of methods to match.
+    // (Partially, this is because we haven't computed and stored the method and class name except in debug, and it
+    // might be expensive to do so.)
+    if ((altJitVal != nullptr) && (strcmp(altJitVal, "*") == 0))
+    {
+        opts.altJit = true;
+    }
+#endif // ALT_JIT
+
+#endif // !DEBUG
+
+#ifdef ALT_JIT
+    // Take care of COMPlus_AltJitExcludeAssemblies.
+    if (opts.altJit)
+    {
+        // First, initialize the AltJitExcludeAssemblies list, but only do it once.
+        if (!s_pAltJitExcludeAssembliesListInitialized)
+        {
+            const wchar_t* wszAltJitExcludeAssemblyList = JitConfig.AltJitExcludeAssemblies();
+            if (wszAltJitExcludeAssemblyList != nullptr)
+            {
+                // NOTE: The Assembly name list is allocated in the process heap, not in the no-release heap, which is
+                // reclaimed
+                // for every compilation. This is ok because we only allocate once, due to the static.
+                s_pAltJitExcludeAssembliesList = new (HostAllocator::getHostAllocator())
+                    AssemblyNamesList2(wszAltJitExcludeAssemblyList, HostAllocator::getHostAllocator());
+            }
+            s_pAltJitExcludeAssembliesListInitialized = true;
+        }
+
+        if (s_pAltJitExcludeAssembliesList != nullptr)
+        {
+            // We have an exclusion list. See if this method is in an assembly that is on the list.
+            // Note that we check this for every method, since we might inline across modules, and
+            // if the inlinee module is on the list, we don't want to use the altjit for it.
+            const char* methodAssemblyName = info.compCompHnd->getAssemblyName(
+                info.compCompHnd->getModuleAssembly(info.compCompHnd->getClassModule(info.compClassHnd)));
+            if (s_pAltJitExcludeAssembliesList->IsInList(methodAssemblyName))
+            {
+                opts.altJit = false;
+            }
+        }
+    }
+#endif // ALT_JIT
+
+#ifdef DEBUG
+
+    bool altJitConfig = !pfAltJit->isEmpty();
+
+    //  If we have a non-empty AltJit config then we change all of these other
+    //  config values to refer only to the AltJit. Otherwise, a lot of COMPlus_* variables
+    //  would apply to both the altjit and the normal JIT, but we only care about
+    //  debugging the altjit if the COMPlus_AltJit configuration is set.
+    //
+    if (compIsForImportOnly() && (!altJitConfig || opts.altJit))
+    {
+        if (JitConfig.JitImportBreak().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+        {
+            assert(!"JitImportBreak reached");
+        }
+    }
+
+    bool    verboseDump        = false;
+    bool    dumpIR             = false;
+    bool    dumpIRTypes        = false;
+    bool    dumpIRLocals       = false;
+    bool    dumpIRRegs         = false;
+    bool    dumpIRSsa          = false;
+    bool    dumpIRValnums      = false;
+    bool    dumpIRCosts        = false;
+    bool    dumpIRFlags        = false;
+    bool    dumpIRKinds        = false;
+    bool    dumpIRNodes        = false;
+    bool    dumpIRNoLists      = false;
+    bool    dumpIRNoLeafs      = false;
+    bool    dumpIRNoStmts      = false;
+    bool    dumpIRTrees        = false;
+    bool    dumpIRLinear       = false;
+    bool    dumpIRDataflow     = false;
+    bool    dumpIRBlockHeaders = false;
+    bool    dumpIRExit         = false;
+    LPCWSTR dumpIRPhase        = nullptr;
+    LPCWSTR dumpIRFormat       = nullptr;
+
+    if (!altJitConfig || opts.altJit)
+    {
+        LPCWSTR dumpIRFormat = nullptr;
+
+        // We should only enable 'verboseDump' when we are actually compiling a matching method
+        // and not enable it when we are just considering inlining a matching method.
+        //
+        if (!compIsForInlining())
+        {
+            if (opts.eeFlags & CORJIT_FLG_PREJIT)
+            {
+                if (JitConfig.NgenDump().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+                {
+                    verboseDump = true;
+                }
+                unsigned ngenHashDumpVal = (unsigned)JitConfig.NgenHashDump();
+                if ((ngenHashDumpVal != (DWORD)-1) && (ngenHashDumpVal == info.compMethodHash()))
+                {
+                    verboseDump = true;
+                }
+                if (JitConfig.NgenDumpIR().contains(info.compMethodName, info.compClassName,
+                                                    &info.compMethodInfo->args))
+                {
+                    dumpIR = true;
+                }
+                unsigned ngenHashDumpIRVal = (unsigned)JitConfig.NgenHashDumpIR();
+                if ((ngenHashDumpIRVal != (DWORD)-1) && (ngenHashDumpIRVal == info.compMethodHash()))
+                {
+                    dumpIR = true;
+                }
+                dumpIRFormat = JitConfig.NgenDumpIRFormat();
+                dumpIRPhase  = JitConfig.NgenDumpIRPhase();
+            }
+            else
+            {
+                if (JitConfig.JitDump().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+                {
+                    verboseDump = true;
+                }
+                unsigned jitHashDumpVal = (unsigned)JitConfig.JitHashDump();
+                if ((jitHashDumpVal != (DWORD)-1) && (jitHashDumpVal == info.compMethodHash()))
+                {
+                    verboseDump = true;
+                }
+                if (JitConfig.JitDumpIR().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+                {
+                    dumpIR = true;
+                }
+                unsigned jitHashDumpIRVal = (unsigned)JitConfig.JitHashDumpIR();
+                if ((jitHashDumpIRVal != (DWORD)-1) && (jitHashDumpIRVal == info.compMethodHash()))
+                {
+                    dumpIR = true;
+                }
+                dumpIRFormat = JitConfig.JitDumpIRFormat();
+                dumpIRPhase  = JitConfig.JitDumpIRPhase();
+            }
+        }
+
+        if (dumpIRPhase == nullptr)
+        {
+            dumpIRPhase = W("*");
+        }
+
+        this->dumpIRPhase = dumpIRPhase;
+
+        if (dumpIRFormat != nullptr)
+        {
+            this->dumpIRFormat = dumpIRFormat;
+        }
+
+        dumpIRTrees  = false;
+        dumpIRLinear = true;
+        if (dumpIRFormat != nullptr)
+        {
+            for (LPCWSTR p = dumpIRFormat; (*p != 0);)
+            {
+                for (; (*p != 0); p++)
+                {
+                    if (*p != L' ')
+                    {
+                        break;
+                    }
+                }
+
+                if (*p == 0)
+                {
+                    break;
+                }
+
+                static bool dumpedHelp = false;
+
+                if ((*p == L'?') && (!dumpedHelp))
+                {
+                    printf("*******************************************************************************\n");
+                    printf("\n");
+                    dFormatIR();
+                    printf("\n");
+                    printf("\n");
+                    printf("Available specifiers (comma separated):\n");
+                    printf("\n");
+                    printf("?          dump out value of COMPlus_JitDumpIRFormat and this list of values\n");
+                    printf("\n");
+                    printf("linear     linear IR dump (default)\n");
+                    printf("tree       tree IR dump (traditional)\n");
+                    printf("mixed      intermingle tree dump with linear IR dump\n");
+                    printf("\n");
+                    printf("dataflow   use data flow form of linear IR dump\n");
+                    printf("structural use structural form of linear IR dump\n");
+                    printf("all        implies structural, include everything\n");
+                    printf("\n");
+                    printf("kinds      include tree node kinds in dump, example: \"kinds=[LEAF][LOCAL]\"\n");
+                    printf("flags      include tree node flags in dump, example: \"flags=[CALL][GLOB_REF]\" \n");
+                    printf("types      includes tree node types in dump, example: \".int\"\n");
+                    printf("locals     include local numbers and tracking numbers in dump, example: \"(V3,T1)\"\n");
+                    printf("regs       include register assignments in dump, example: \"(rdx)\"\n");
+                    printf("ssa        include SSA numbers in dump, example: \"<d:3>\" or \"<u:3>\"\n");
+                    printf("valnums    include Value numbers in dump, example: \"<v:$c4>\" or \"<v:$c4,$c5>\"\n");
+                    printf("\n");
+                    printf("nolist     exclude GT_LIST nodes from dump\n");
+                    printf("noleafs    exclude LEAF nodes from dump (fold into operations)\n");
+                    printf("nostmts    exclude GT_STMTS from dump (unless required by dependencies)\n");
+                    printf("\n");
+                    printf("blkhdrs    include block headers\n");
+                    printf("exit       exit program after last phase dump (used with single method)\n");
+                    printf("\n");
+                    printf("*******************************************************************************\n");
+                    dumpedHelp = true;
+                }
+
+                if (wcsncmp(p, W("types"), 5) == 0)
+                {
+                    dumpIRTypes = true;
+                }
+
+                if (wcsncmp(p, W("locals"), 6) == 0)
+                {
+                    dumpIRLocals = true;
+                }
+
+                if (wcsncmp(p, W("regs"), 4) == 0)
+                {
+                    dumpIRRegs = true;
+                }
+
+                if (wcsncmp(p, W("ssa"), 3) == 0)
+                {
+                    dumpIRSsa = true;
+                }
+
+                if (wcsncmp(p, W("valnums"), 7) == 0)
+                {
+                    dumpIRValnums = true;
+                }
+
+                if (wcsncmp(p, W("costs"), 5) == 0)
+                {
+                    dumpIRCosts = true;
+                }
+
+                if (wcsncmp(p, W("flags"), 5) == 0)
+                {
+                    dumpIRFlags = true;
+                }
+
+                if (wcsncmp(p, W("kinds"), 5) == 0)
+                {
+                    dumpIRKinds = true;
+                }
+
+                if (wcsncmp(p, W("nodes"), 5) == 0)
+                {
+                    dumpIRNodes = true;
+                }
+
+                if (wcsncmp(p, W("exit"), 4) == 0)
+                {
+                    dumpIRExit = true;
+                }
+
+                if (wcsncmp(p, W("nolists"), 7) == 0)
+                {
+                    dumpIRNoLists = true;
+                }
+
+                if (wcsncmp(p, W("noleafs"), 7) == 0)
+                {
+                    dumpIRNoLeafs = true;
+                }
+
+                if (wcsncmp(p, W("nostmts"), 7) == 0)
+                {
+                    dumpIRNoStmts = true;
+                }
+
+                if (wcsncmp(p, W("trees"), 5) == 0)
+                {
+                    dumpIRTrees  = true;
+                    dumpIRLinear = false;
+                }
+
+                if (wcsncmp(p, W("structural"), 10) == 0)
+                {
+                    dumpIRLinear  = true;
+                    dumpIRNoStmts = false;
+                    dumpIRNoLeafs = false;
+                    dumpIRNoLists = false;
+                }
+
+                if (wcsncmp(p, W("all"), 3) == 0)
+                {
+                    dumpIRLinear  = true;
+                    dumpIRKinds   = true;
+                    dumpIRFlags   = true;
+                    dumpIRTypes   = true;
+                    dumpIRLocals  = true;
+                    dumpIRRegs    = true;
+                    dumpIRSsa     = true;
+                    dumpIRValnums = true;
+                    dumpIRCosts   = true;
+                    dumpIRNoStmts = false;
+                    dumpIRNoLeafs = false;
+                    dumpIRNoLists = false;
+                }
+
+                if (wcsncmp(p, W("linear"), 6) == 0)
+                {
+                    dumpIRTrees  = false;
+                    dumpIRLinear = true;
+                }
+
+                if (wcsncmp(p, W("mixed"), 5) == 0)
+                {
+                    dumpIRTrees  = true;
+                    dumpIRLinear = true;
+                }
+
+                if (wcsncmp(p, W("dataflow"), 8) == 0)
+                {
+                    dumpIRDataflow = true;
+                    dumpIRNoLeafs  = true;
+                    dumpIRNoLists  = true;
+                    dumpIRNoStmts  = true;
+                }
+
+                if (wcsncmp(p, W("blkhdrs"), 7) == 0)
+                {
+                    dumpIRBlockHeaders = true;
+                }
+
+                for (; (*p != 0); p++)
+                {
+                    if (*p == L',')
+                    {
+                        p++;
+                        break;
+                    }
+                }
+            }
+        }
+    }
+
+    if (verboseDump)
+    {
+        verbose = true;
+    }
+
+    if (dumpIR)
+    {
+        this->dumpIR = true;
+    }
+
+    if (dumpIRTypes)
+    {
+        this->dumpIRTypes = true;
+    }
+
+    if (dumpIRLocals)
+    {
+        this->dumpIRLocals = true;
+    }
+
+    if (dumpIRRegs)
+    {
+        this->dumpIRRegs = true;
+    }
+
+    if (dumpIRSsa)
+    {
+        this->dumpIRSsa = true;
+    }
+
+    if (dumpIRValnums)
+    {
+        this->dumpIRValnums = true;
+    }
+
+    if (dumpIRCosts)
+    {
+        this->dumpIRCosts = true;
+    }
+
+    if (dumpIRFlags)
+    {
+        this->dumpIRFlags = true;
+    }
+
+    if (dumpIRKinds)
+    {
+        this->dumpIRKinds = true;
+    }
+
+    if (dumpIRNodes)
+    {
+        this->dumpIRNodes = true;
+    }
+
+    if (dumpIRNoLists)
+    {
+        this->dumpIRNoLists = true;
+    }
+
+    if (dumpIRNoLeafs)
+    {
+        this->dumpIRNoLeafs = true;
+    }
+
+    if (dumpIRNoLeafs && dumpIRDataflow)
+    {
+        this->dumpIRDataflow = true;
+    }
+
+    if (dumpIRNoStmts)
+    {
+        this->dumpIRNoStmts = true;
+    }
+
+    if (dumpIRTrees)
+    {
+        this->dumpIRTrees = true;
+    }
+
+    if (dumpIRLinear)
+    {
+        this->dumpIRLinear = true;
+    }
+
+    if (dumpIRBlockHeaders)
+    {
+        this->dumpIRBlockHeaders = true;
+    }
+
+    if (dumpIRExit)
+    {
+        this->dumpIRExit = true;
+    }
+
+#endif // DEBUG
+
+#ifdef FEATURE_SIMD
+#ifdef _TARGET_AMD64_
+    // Minimum bar for availing SIMD benefits is SSE2 on AMD64.
+    featureSIMD = ((opts.eeFlags & CORJIT_FLG_FEATURE_SIMD) != 0);
+#endif // _TARGET_AMD64_
+#endif // FEATURE_SIMD
+
+    if (compIsForInlining() || compIsForImportOnly())
+    {
+        return;
+    }
+    // The rest of the opts fields that we initialize here
+    // should only be used when we generate code for the method
+    // They should not be used when importing or inlining
+
+    opts.genFPorder = true;
+    opts.genFPopt   = true;
+
+    opts.instrCount = 0;
+    opts.lvRefCount = 0;
+
+#if FEATURE_TAILCALL_OPT
+    // By default opportunistic tail call optimization is enabled
+    opts.compTailCallOpt     = true;
+    opts.compTailCallLoopOpt = true;
+#endif
+
+#ifdef DEBUG
+    opts.dspInstrs             = false;
+    opts.dspEmit               = false;
+    opts.dspLines              = false;
+    opts.varNames              = false;
+    opts.dmpHex                = false;
+    opts.disAsm                = false;
+    opts.disAsmSpilled         = false;
+    opts.disDiffable           = false;
+    opts.dspCode               = false;
+    opts.dspEHTable            = false;
+    opts.dspGCtbls             = false;
+    opts.disAsm2               = false;
+    opts.dspUnwind             = false;
+    s_dspMemStats              = false;
+    opts.compLongAddress       = false;
+    opts.compJitELTHookEnabled = false;
+
+#ifdef LATE_DISASM
+    opts.doLateDisasm = false;
+#endif // LATE_DISASM
+
+    compDebugBreak = false;
+
+    //  If we have a non-empty AltJit config then we change all of these other
+    //  config values to refer only to the AltJit.
+    //
+    if (!altJitConfig || opts.altJit)
+    {
+        if (opts.eeFlags & CORJIT_FLG_PREJIT)
+        {
+            if ((JitConfig.NgenOrder() & 1) == 1)
+            {
+                opts.dspOrder = true;
+            }
+
+            if (JitConfig.NgenGCDump().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+            {
+                opts.dspGCtbls = true;
+            }
+
+            if (JitConfig.NgenDisasm().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+            {
+                opts.disAsm = true;
+            }
+            if (JitConfig.NgenDisasm().contains("SPILLED", nullptr, nullptr))
+            {
+                opts.disAsmSpilled = true;
+            }
+
+            if (JitConfig.NgenUnwindDump().contains(info.compMethodName, info.compClassName,
+                                                    &info.compMethodInfo->args))
+            {
+                opts.dspUnwind = true;
+            }
+
+            if (JitConfig.NgenEHDump().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+            {
+                opts.dspEHTable = true;
+            }
+        }
+        else
+        {
+            if ((JitConfig.JitOrder() & 1) == 1)
+            {
+                opts.dspOrder = true;
+            }
+
+            if (JitConfig.JitGCDump().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+            {
+                opts.dspGCtbls = true;
+            }
+
+            if (JitConfig.JitDisasm().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+            {
+                opts.disAsm = true;
+            }
+
+            if (JitConfig.JitDisasm().contains("SPILLED", nullptr, nullptr))
+            {
+                opts.disAsmSpilled = true;
+            }
+
+            if (JitConfig.JitUnwindDump().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+            {
+                opts.dspUnwind = true;
+            }
+
+            if (JitConfig.JitEHDump().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+            {
+                opts.dspEHTable = true;
+            }
+        }
+
+#ifdef LATE_DISASM
+        if (JitConfig.JitLateDisasm().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+            opts.doLateDisasm = true;
+#endif // LATE_DISASM
+
+        // This one applies to both Ngen/Jit Disasm output: COMPlus_JitDiffableDasm=1
+        if (JitConfig.DiffableDasm() != 0)
+        {
+            opts.disDiffable = true;
+            opts.dspDiffable = true;
+        }
+
+        if (JitConfig.DisplayMemStats() != 0)
+        {
+            s_dspMemStats = true;
+        }
+
+        if (JitConfig.JitLongAddress() != 0)
+        {
+            opts.compLongAddress = true;
+        }
+    }
+
+    if (verboseDump)
+    {
+        opts.dspCode    = true;
+        opts.dspEHTable = true;
+        opts.dspGCtbls  = true;
+        opts.disAsm2    = true;
+        opts.dspUnwind  = true;
+        verbose         = true;
+        verboseTrees    = shouldUseVerboseTrees();
+        verboseSsa      = shouldUseVerboseSsa();
+        codeGen->setVerbose(true);
+    }
+
+    treesBeforeAfterMorph = (JitConfig.TreesBeforeAfterMorph() == 1);
+    morphNum              = 0; // Initialize the morphed-trees counting.
+
+    expensiveDebugCheckLevel = JitConfig.JitExpensiveDebugCheckLevel();
+    if (expensiveDebugCheckLevel == 0)
+    {
+        // If we're in a stress mode that modifies the flowgraph, make 1 the default.
+        if (fgStressBBProf() || compStressCompile(STRESS_DO_WHILE_LOOPS, 30))
+        {
+            expensiveDebugCheckLevel = 1;
+        }
+    }
+
+    if (verbose)
+    {
+        printf("****** START compiling %s (MethodHash=%08x)\n", info.compFullName, info.compMethodHash());
+        printf("Generating code for %s %s\n", Target::g_tgtPlatformName, Target::g_tgtCPUName);
+        printf(""); // in our logic this causes a flush
+    }
+
+    if (JitConfig.JitBreak().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+    {
+        assert(!"JitBreak reached");
+    }
+
+    unsigned jitHashBreakVal = (unsigned)JitConfig.JitHashBreak();
+    if ((jitHashBreakVal != (DWORD)-1) && (jitHashBreakVal == info.compMethodHash()))
+    {
+        assert(!"JitHashBreak reached");
+    }
+
+    if (verbose ||
+        JitConfig.JitDebugBreak().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args) ||
+        JitConfig.JitBreak().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+    {
+        compDebugBreak = true;
+    }
+
+    memset(compActiveStressModes, 0, sizeof(compActiveStressModes));
+
+#endif // DEBUG
+
+//-------------------------------------------------------------------------
+
+#ifdef DEBUGGING_SUPPORT
+#ifdef DEBUG
+    assert(!codeGen->isGCTypeFixed());
+    opts.compGcChecks = (JitConfig.JitGCChecks() != 0) || compStressCompile(STRESS_GENERIC_VARN, 5);
+
+    enum
+    {
+        STACK_CHECK_ON_RETURN = 0x1,
+        STACK_CHECK_ON_CALL   = 0x2,
+        STACK_CHECK_ALL       = 0x3,
+    };
+
+    DWORD dwJitStackChecks = JitConfig.JitStackChecks();
+    if (compStressCompile(STRESS_GENERIC_VARN, 5))
+    {
+        dwJitStackChecks = STACK_CHECK_ALL;
+    }
+    opts.compStackCheckOnRet  = (dwJitStackChecks & DWORD(STACK_CHECK_ON_RETURN)) != 0;
+    opts.compStackCheckOnCall = (dwJitStackChecks & DWORD(STACK_CHECK_ON_CALL)) != 0;
+#endif
+
+#ifdef PROFILING_SUPPORTED
+    opts.compNoPInvokeInlineCB = (opts.eeFlags & CORJIT_FLG_PROF_NO_PINVOKE_INLINE) ? true : false;
+
+    // Cache the profiler handle
+    if (opts.eeFlags & CORJIT_FLG_PROF_ENTERLEAVE)
+    {
+        BOOL hookNeeded;
+        BOOL indirected;
+        info.compCompHnd->GetProfilingHandle(&hookNeeded, &compProfilerMethHnd, &indirected);
+        compProfilerHookNeeded        = !!hookNeeded;
+        compProfilerMethHndIndirected = !!indirected;
+    }
+    else
+    {
+        compProfilerHookNeeded        = false;
+        compProfilerMethHnd           = nullptr;
+        compProfilerMethHndIndirected = false;
+    }
+
+#if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
+    // Right now this ELT hook option is enabled only for arm and amd64
+
+    // Honour complus_JitELTHookEnabled only if VM has not asked us to generate profiler
+    // hooks in the first place. That is, Override VM only if it hasn't asked for a
+    // profiler callback for this method.
+    if (!compProfilerHookNeeded && (JitConfig.JitELTHookEnabled() != 0))
+    {
+        opts.compJitELTHookEnabled = true;
+    }
+
+    // TBD: Exclude PInvoke stubs
+    if (opts.compJitELTHookEnabled)
+    {
+        compProfilerMethHnd           = (void*)DummyProfilerELTStub;
+        compProfilerMethHndIndirected = false;
+    }
+#endif // _TARGET_ARM_ || _TARGET_AMD64_
+
+#endif // PROFILING_SUPPORTED
+
+#if FEATURE_TAILCALL_OPT
+    const wchar_t* strTailCallOpt = JitConfig.TailCallOpt();
+    if (strTailCallOpt != nullptr)
+    {
+        opts.compTailCallOpt = (UINT)_wtoi(strTailCallOpt) != 0;
+    }
+
+    if (JitConfig.TailCallLoopOpt() == 0)
+    {
+        opts.compTailCallLoopOpt = false;
+    }
+#endif
+
+    opts.compMustInlinePInvokeCalli = (opts.eeFlags & CORJIT_FLG_IL_STUB) ? true : false;
+
+    opts.compScopeInfo = opts.compDbgInfo;
+#endif // DEBUGGING_SUPPORT
+
+#ifdef LATE_DISASM
+    codeGen->getDisAssembler().disOpenForLateDisAsm(info.compMethodName, info.compClassName,
+                                                    info.compMethodInfo->args.pSig);
+#endif
+
+//-------------------------------------------------------------------------
+
+#if RELOC_SUPPORT
+    opts.compReloc = (opts.eeFlags & CORJIT_FLG_RELOC) ? true : false;
+#endif
+
+#ifdef DEBUG
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+    // Whether encoding of absolute addr as PC-rel offset is enabled in RyuJIT
+    opts.compEnablePCRelAddr = (JitConfig.EnablePCRelAddr() != 0);
+#endif
+#endif // DEBUG
+
+    opts.compProcedureSplitting = (opts.eeFlags & CORJIT_FLG_PROCSPLIT) ? true : false;
+
+#ifdef _TARGET_ARM64_
+    // TODO-ARM64-NYI: enable hot/cold splitting
+    opts.compProcedureSplitting = false;
+#endif // _TARGET_ARM64_
+
+#ifdef DEBUG
+    opts.compProcedureSplittingEH = opts.compProcedureSplitting;
+#endif // DEBUG
+
+    if (opts.compProcedureSplitting)
+    {
+        // Note that opts.compdbgCode is true under ngen for checked assemblies!
+        opts.compProcedureSplitting = !opts.compDbgCode;
+
+#ifdef DEBUG
+        // JitForceProcedureSplitting is used to force procedure splitting on checked assemblies.
+        // This is useful for debugging on a checked build.  Note that we still only do procedure
+        // splitting in the zapper.
+        if (JitConfig.JitForceProcedureSplitting().contains(info.compMethodName, info.compClassName,
+                                                            &info.compMethodInfo->args))
+        {
+            opts.compProcedureSplitting = true;
+        }
+
+        // JitNoProcedureSplitting will always disable procedure splitting.
+        if (JitConfig.JitNoProcedureSplitting().contains(info.compMethodName, info.compClassName,
+                                                         &info.compMethodInfo->args))
+        {
+            opts.compProcedureSplitting = false;
+        }
+        //
+        // JitNoProcedureSplittingEH will disable procedure splitting in functions with EH.
+        if (JitConfig.JitNoProcedureSplittingEH().contains(info.compMethodName, info.compClassName,
+                                                           &info.compMethodInfo->args))
+        {
+            opts.compProcedureSplittingEH = false;
+        }
+#endif
+    }
+
+    fgProfileBuffer              = nullptr;
+    fgProfileData_ILSizeMismatch = false;
+    fgNumProfileRuns             = 0;
+    if (opts.eeFlags & CORJIT_FLG_BBOPT)
+    {
+        assert(!compIsForInlining());
+        HRESULT hr;
+        hr = info.compCompHnd->getBBProfileData(info.compMethodHnd, &fgProfileBufferCount, &fgProfileBuffer,
+                                                &fgNumProfileRuns);
+
+        // a failed result that also has a non-NULL fgProfileBuffer
+        // indicates that the ILSize for the method no longer matches
+        // the ILSize for the method when profile data was collected.
+        //
+        // We will discard the IBC data in this case
+        //
+        if (FAILED(hr) && (fgProfileBuffer != nullptr))
+        {
+            fgProfileData_ILSizeMismatch = true;
+            fgProfileBuffer              = nullptr;
+        }
+#ifdef DEBUG
+        // A successful result implies a non-NULL fgProfileBuffer
+        //
+        if (SUCCEEDED(hr))
+        {
+            assert(fgProfileBuffer != nullptr);
+        }
+
+        // A failed result implies a NULL fgProfileBuffer
+        //   see implementation of Compiler::fgHaveProfileData()
+        //
+        if (FAILED(hr))
+        {
+            assert(fgProfileBuffer == nullptr);
+        }
+#endif
+    }
+
+    opts.compNeedStackProbes = false;
+
+#ifdef DEBUG
+    if (JitConfig.StackProbesOverride() != 0 || compStressCompile(STRESS_GENERIC_VARN, 5))
+    {
+        opts.compNeedStackProbes = true;
+    }
+#endif
+
+#ifdef DEBUG
+    // Now, set compMaxUncheckedOffsetForNullObject for STRESS_NULL_OBJECT_CHECK
+    if (compStressCompile(STRESS_NULL_OBJECT_CHECK, 30))
+    {
+        compMaxUncheckedOffsetForNullObject = (size_t)JitConfig.JitMaxUncheckedOffset();
+        if (verbose)
+        {
+            printf("STRESS_NULL_OBJECT_CHECK: compMaxUncheckedOffsetForNullObject=0x%X\n",
+                   compMaxUncheckedOffsetForNullObject);
+        }
+    }
+
+    if (verbose)
+    {
+        printf("OPTIONS: compCodeOpt = %s\n",
+               (opts.compCodeOpt == BLENDED_CODE)
+                   ? "BLENDED_CODE"
+                   : (opts.compCodeOpt == SMALL_CODE) ? "SMALL_CODE"
+                                                      : (opts.compCodeOpt == FAST_CODE) ? "FAST_CODE" : "UNKNOWN_CODE");
+
+        printf("OPTIONS: compDbgCode = %s\n", dspBool(opts.compDbgCode));
+        printf("OPTIONS: compDbgInfo = %s\n", dspBool(opts.compDbgInfo));
+        printf("OPTIONS: compDbgEnC  = %s\n", dspBool(opts.compDbgEnC));
+        printf("OPTIONS: compProcedureSplitting   = %s\n", dspBool(opts.compProcedureSplitting));
+        printf("OPTIONS: compProcedureSplittingEH = %s\n", dspBool(opts.compProcedureSplittingEH));
+
+        if ((opts.eeFlags & CORJIT_FLG_BBOPT) && fgHaveProfileData())
+        {
+            printf("OPTIONS: using real profile data\n");
+        }
+
+        if (fgProfileData_ILSizeMismatch)
+        {
+            printf("OPTIONS: discarded IBC profile data due to mismatch in ILSize\n");
+        }
+
+        if (opts.eeFlags & CORJIT_FLG_PREJIT)
+        {
+            printf("OPTIONS: Jit invoked for ngen\n");
+        }
+        printf("OPTIONS: Stack probing is %s\n", opts.compNeedStackProbes ? "ENABLED" : "DISABLED");
+    }
+#endif
+
+    opts.compGCPollType = GCPOLL_NONE;
+    if (opts.eeFlags & CORJIT_FLG_GCPOLL_CALLS)
+    {
+        opts.compGCPollType = GCPOLL_CALL;
+    }
+    else if (opts.eeFlags & CORJIT_FLG_GCPOLL_INLINE)
+    {
+        // make sure that the EE didn't set both flags.
+        assert(opts.compGCPollType == GCPOLL_NONE);
+        opts.compGCPollType = GCPOLL_INLINE;
+    }
+}
+
+#ifdef DEBUG
+
+void JitDump(const char* pcFormat, ...)
+{
+    va_list lst;
+    va_start(lst, pcFormat);
+    vflogf(jitstdout, pcFormat, lst);
+    va_end(lst);
+}
+
+bool Compiler::compJitHaltMethod()
+{
+    /* This method returns true when we use an INS_BREAKPOINT to allow us to step into the generated native code */
+    /* Note that this these two "Jit" environment variables also work for ngen images */
+
+    if (JitConfig.JitHalt().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+    {
+        return true;
+    }
+
+    /* Use this Hash variant when there are a lot of method with the same name and different signatures */
+
+    unsigned fJitHashHaltVal = (unsigned)JitConfig.JitHashHalt();
+    if ((fJitHashHaltVal != (unsigned)-1) && (fJitHashHaltVal == info.compMethodHash()))
+    {
+        return true;
+    }
+
+    return false;
+}
+
+/*****************************************************************************
+ * Should we use a "stress-mode" for the given stressArea. We have different
+ *   areas to allow the areas to be mixed in different combinations in
+ *   different methods.
+ * 'weight' indicates how often (as a percentage) the area should be stressed.
+ *    It should reflect the usefulness:overhead ratio.
+ */
+
+const LPCWSTR Compiler::s_compStressModeNames[STRESS_COUNT + 1] = {
+#define STRESS_MODE(mode) W("STRESS_") W(#mode),
+
+    STRESS_MODES
+#undef STRESS_MODE
+};
+
+bool Compiler::compStressCompile(compStressArea stressArea, unsigned weight)
+{
+    unsigned hash;
+    DWORD    stressLevel;
+
+    if (!bRangeAllowStress)
+    {
+        return false;
+    }
+
+    if (!JitConfig.JitStressOnly().isEmpty() &&
+        !JitConfig.JitStressOnly().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+    {
+        return false;
+    }
+
+    bool           doStress = false;
+    const wchar_t* strStressModeNames;
+
+    // Does user explicitly prevent using this STRESS_MODE through the command line?
+    const wchar_t* strStressModeNamesNot = JitConfig.JitStressModeNamesNot();
+    if ((strStressModeNamesNot != nullptr) &&
+        (wcsstr(strStressModeNamesNot, s_compStressModeNames[stressArea]) != nullptr))
+    {
+        if (verbose)
+        {
+            printf("JitStressModeNamesNot contains %ws\n", s_compStressModeNames[stressArea]);
+        }
+        doStress = false;
+        goto _done;
+    }
+
+    // Does user explicitly set this STRESS_MODE through the command line?
+    strStressModeNames = JitConfig.JitStressModeNames();
+    if (strStressModeNames != nullptr)
+    {
+        if (wcsstr(strStressModeNames, s_compStressModeNames[stressArea]) != nullptr)
+        {
+            if (verbose)
+            {
+                printf("JitStressModeNames contains %ws\n", s_compStressModeNames[stressArea]);
+            }
+            doStress = true;
+            goto _done;
+        }
+
+        // This stress mode name did not match anything in the stress
+        // mode whitelist. If user has requested only enable mode,
+        // don't allow this stress mode to turn on.
+        const bool onlyEnableMode = JitConfig.JitStressModeNamesOnly() != 0;
+
+        if (onlyEnableMode)
+        {
+            doStress = false;
+            goto _done;
+        }
+    }
+
+    // 0:   No stress (Except when explicitly set in complus_JitStressModeNames)
+    // !=2: Vary stress. Performance will be slightly/moderately degraded
+    // 2:   Check-all stress. Performance will be REALLY horrible
+    stressLevel = getJitStressLevel();
+
+    assert(weight <= MAX_STRESS_WEIGHT);
+
+    /* Check for boundary conditions */
+
+    if (stressLevel == 0 || weight == 0)
+    {
+        return false;
+    }
+
+    // Should we allow unlimited stress ?
+    if (stressArea > STRESS_COUNT_VARN && stressLevel == 2)
+    {
+        return true;
+    }
+
+    if (weight == MAX_STRESS_WEIGHT)
+    {
+        doStress = true;
+        goto _done;
+    }
+
+    // Get a hash which can be compared with 'weight'
+
+    assert(stressArea != 0);
+    hash = (info.compMethodHash() ^ stressArea ^ stressLevel) % MAX_STRESS_WEIGHT;
+
+    assert(hash < MAX_STRESS_WEIGHT && weight <= MAX_STRESS_WEIGHT);
+    doStress = (hash < weight);
+
+_done:
+
+    if (doStress && !compActiveStressModes[stressArea])
+    {
+        if (verbose)
+        {
+            printf("\n\n*** JitStress: %ws ***\n\n", s_compStressModeNames[stressArea]);
+        }
+        compActiveStressModes[stressArea] = 1;
+    }
+
+    return doStress;
+}
+
+#endif // DEBUG
+
+void Compiler::compInitDebuggingInfo()
+{
+    assert(!compIsForInlining());
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In compInitDebuggingInfo() for %s\n", info.compFullName);
+    }
+#endif
+
+    /*-------------------------------------------------------------------------
+     *
+     * Get hold of the local variable records, if there are any
+     */
+
+    info.compVarScopesCount = 0;
+
+#ifdef DEBUGGING_SUPPORT
+    if (opts.compScopeInfo)
+#endif
+    {
+        eeGetVars();
+    }
+
+#ifdef DEBUGGING_SUPPORT
+    compInitVarScopeMap();
+
+    if (opts.compScopeInfo || opts.compDbgCode)
+    {
+        compInitScopeLists();
+    }
+
+    if (opts.compDbgCode && (info.compVarScopesCount > 0))
+    {
+        /* Create a new empty basic block. fgExtendDbgLifetimes() may add
+           initialization of variables which are in scope right from the
+           start of the (real) first BB (and therefore artificially marked
+           as alive) into this block.
+         */
+
+        fgEnsureFirstBBisScratch();
+
+        fgInsertStmtAtEnd(fgFirstBB, gtNewNothingNode());
+
+        JITDUMP("Debuggable code - Add new BB%02u to perform initialization of variables [%08X]\n", fgFirstBB->bbNum,
+                dspPtr(fgFirstBB));
+    }
+#endif // DEBUGGING_SUPPORT
+
+    /*-------------------------------------------------------------------------
+     *
+     * Read the stmt-offsets table and the line-number table
+     */
+
+    info.compStmtOffsetsImplicit = ICorDebugInfo::NO_BOUNDARIES;
+
+    // We can only report debug info for EnC at places where the stack is empty.
+    // Actually, at places where there are not live temps. Else, we won't be able
+    // to map between the old and the new versions correctly as we won't have
+    // any info for the live temps.
+
+    assert(!opts.compDbgEnC || !opts.compDbgInfo ||
+           0 == (info.compStmtOffsetsImplicit & ~ICorDebugInfo::STACK_EMPTY_BOUNDARIES));
+
+    info.compStmtOffsetsCount = 0;
+
+#ifdef DEBUGGING_SUPPORT
+    if (opts.compDbgInfo)
+#endif
+    {
+        /* Get hold of the line# records, if there are any */
+
+        eeGetStmtOffsets();
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("info.compStmtOffsetsCount    = %d\n", info.compStmtOffsetsCount);
+            printf("info.compStmtOffsetsImplicit = %04Xh", info.compStmtOffsetsImplicit);
+
+            if (info.compStmtOffsetsImplicit)
+            {
+                printf(" ( ");
+                if (info.compStmtOffsetsImplicit & ICorDebugInfo::STACK_EMPTY_BOUNDARIES)
+                {
+                    printf("STACK_EMPTY ");
+                }
+                if (info.compStmtOffsetsImplicit & ICorDebugInfo::NOP_BOUNDARIES)
+                {
+                    printf("NOP ");
+                }
+                if (info.compStmtOffsetsImplicit & ICorDebugInfo::CALL_SITE_BOUNDARIES)
+                {
+                    printf("CALL_SITE ");
+                }
+                printf(")");
+            }
+            printf("\n");
+            IL_OFFSET* pOffs = info.compStmtOffsets;
+            for (unsigned i = 0; i < info.compStmtOffsetsCount; i++, pOffs++)
+            {
+                printf("%02d) IL_%04Xh\n", i, *pOffs);
+            }
+        }
+#endif
+    }
+}
+
+void Compiler::compSetOptimizationLevel()
+{
+    unsigned compileFlags;
+    bool     theMinOptsValue;
+    unsigned jitMinOpts;
+
+    compileFlags = opts.eeFlags;
+
+    if (compIsForInlining())
+    {
+        theMinOptsValue = impInlineInfo->InlinerCompiler->opts.MinOpts();
+        goto _SetMinOpts;
+    }
+
+    theMinOptsValue = false;
+
+    if (opts.compFlags == CLFLG_MINOPT)
+    {
+        JITLOG((LL_INFO100, "CLFLG_MINOPT set for method %s\n", info.compFullName));
+        theMinOptsValue = true;
+    }
+
+#ifdef DEBUG
+    jitMinOpts = JitConfig.JitMinOpts();
+
+    if (!theMinOptsValue && (jitMinOpts > 0))
+    {
+        unsigned methodCount     = Compiler::jitTotalMethodCompiled;
+        unsigned methodCountMask = methodCount & 0xFFF;
+        unsigned kind            = (jitMinOpts & 0xF000000) >> 24;
+        switch (kind)
+        {
+            default:
+                if (jitMinOpts <= methodCount)
+                {
+                    if (verbose)
+                    {
+                        printf(" Optimizations disabled by JitMinOpts and methodCount\n");
+                    }
+                    theMinOptsValue = true;
+                }
+                break;
+            case 0xD:
+            {
+                unsigned firstMinopts  = (jitMinOpts >> 12) & 0xFFF;
+                unsigned secondMinopts = (jitMinOpts >> 0) & 0xFFF;
+
+                if ((firstMinopts == methodCountMask) || (secondMinopts == methodCountMask))
+                {
+                    if (verbose)
+                    {
+                        printf("0xD: Optimizations disabled by JitMinOpts and methodCountMask\n");
+                    }
+                    theMinOptsValue = true;
+                }
+            }
+            break;
+            case 0xE:
+            {
+                unsigned startMinopts = (jitMinOpts >> 12) & 0xFFF;
+                unsigned endMinopts   = (jitMinOpts >> 0) & 0xFFF;
+
+                if ((startMinopts <= methodCountMask) && (endMinopts >= methodCountMask))
+                {
+                    if (verbose)
+                    {
+                        printf("0xE: Optimizations disabled by JitMinOpts and methodCountMask\n");
+                    }
+                    theMinOptsValue = true;
+                }
+            }
+            break;
+            case 0xF:
+            {
+                unsigned bitsZero = (jitMinOpts >> 12) & 0xFFF;
+                unsigned bitsOne  = (jitMinOpts >> 0) & 0xFFF;
+
+                if (((methodCountMask & bitsOne) == bitsOne) && ((~methodCountMask & bitsZero) == bitsZero))
+                {
+                    if (verbose)
+                    {
+                        printf("0xF: Optimizations disabled by JitMinOpts and methodCountMask\n");
+                    }
+                    theMinOptsValue = true;
+                }
+            }
+            break;
+        }
+    }
+
+    if (!theMinOptsValue)
+    {
+        if (JitConfig.JitMinOptsName().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+        {
+            theMinOptsValue = true;
+        }
+    }
+
+    if (compStressCompile(STRESS_MIN_OPTS, 5))
+    {
+        theMinOptsValue = true;
+    }
+    // For PREJIT we never drop down to MinOpts
+    // unless unless CLFLG_MINOPT is set
+    else if (!(compileFlags & CORJIT_FLG_PREJIT))
+    {
+        if ((unsigned)JitConfig.JitMinOptsCodeSize() < info.compILCodeSize)
+        {
+            JITLOG((LL_INFO10, "IL Code Size exceeded, using MinOpts for method %s\n", info.compFullName));
+            theMinOptsValue = true;
+        }
+        else if ((unsigned)JitConfig.JitMinOptsInstrCount() < opts.instrCount)
+        {
+            JITLOG((LL_INFO10, "IL instruction count exceeded, using MinOpts for method %s\n", info.compFullName));
+            theMinOptsValue = true;
+        }
+        else if ((unsigned)JitConfig.JitMinOptsBbCount() < fgBBcount)
+        {
+            JITLOG((LL_INFO10, "Basic Block count exceeded, using MinOpts for method %s\n", info.compFullName));
+            theMinOptsValue = true;
+        }
+        else if ((unsigned)JitConfig.JitMinOptsLvNumCount() < lvaCount)
+        {
+            JITLOG((LL_INFO10, "Local Variable Num count exceeded, using MinOpts for method %s\n", info.compFullName));
+            theMinOptsValue = true;
+        }
+        else if ((unsigned)JitConfig.JitMinOptsLvRefCount() < opts.lvRefCount)
+        {
+            JITLOG((LL_INFO10, "Local Variable Ref count exceeded, using MinOpts for method %s\n", info.compFullName));
+            theMinOptsValue = true;
+        }
+        if (theMinOptsValue == true)
+        {
+            JITLOG((LL_INFO10000, "IL Code Size,Instr %4d,%4d, Basic Block count %3d, Local Variable Num,Ref count "
+                                  "%3d,%3d for method %s\n",
+                    info.compILCodeSize, opts.instrCount, fgBBcount, lvaCount, opts.lvRefCount, info.compFullName));
+            if (JitConfig.JitBreakOnMinOpts() != 0)
+            {
+                assert(!"MinOpts enabled");
+            }
+        }
+    }
+#else  // !DEBUG
+    // Retail check if we should force Minopts due to the complexity of the method
+    // For PREJIT we never drop down to MinOpts
+    // unless unless CLFLG_MINOPT is set
+    if (!theMinOptsValue && !(compileFlags & CORJIT_FLG_PREJIT) &&
+        ((DEFAULT_MIN_OPTS_CODE_SIZE < info.compILCodeSize) || (DEFAULT_MIN_OPTS_INSTR_COUNT < opts.instrCount) ||
+         (DEFAULT_MIN_OPTS_BB_COUNT < fgBBcount) || (DEFAULT_MIN_OPTS_LV_NUM_COUNT < lvaCount) ||
+         (DEFAULT_MIN_OPTS_LV_REF_COUNT < opts.lvRefCount)))
+    {
+        theMinOptsValue = true;
+    }
+#endif // DEBUG
+
+    JITLOG((LL_INFO10000,
+            "IL Code Size,Instr %4d,%4d, Basic Block count %3d, Local Variable Num,Ref count %3d,%3d for method %s\n",
+            info.compILCodeSize, opts.instrCount, fgBBcount, lvaCount, opts.lvRefCount, info.compFullName));
+
+#if 0
+    // The code in this #if has been useful in debugging loop cloning issues, by
+    // enabling selective enablement of the loop cloning optimization according to
+    // method hash.
+#ifdef DEBUG
+    if (!theMinOptsValue)
+    {
+    unsigned methHash = info.compMethodHash();
+    char* lostr = getenv("opthashlo");
+    unsigned methHashLo = 0;
+    if (lostr != NULL) 
+    {
+        sscanf_s(lostr, "%x", &methHashLo);
+        // methHashLo = (unsigned(atoi(lostr)) << 2);  // So we don't have to use negative numbers.
+    }
+    char* histr = getenv("opthashhi");
+    unsigned methHashHi = UINT32_MAX;
+    if (histr != NULL) 
+    {
+        sscanf_s(histr, "%x", &methHashHi);
+        // methHashHi = (unsigned(atoi(histr)) << 2);  // So we don't have to use negative numbers.
+    }
+    if (methHash < methHashLo || methHash > methHashHi)
+    {
+        theMinOptsValue = true;
+    }
+    else
+    {
+        printf("Doing optimization in  in %s (0x%x).\n", info.compFullName, methHash);
+    }
+    }
+#endif
+#endif
+
+_SetMinOpts:
+
+    // Set the MinOpts value
+    opts.SetMinOpts(theMinOptsValue);
+
+#ifdef DEBUG
+    if (verbose && !compIsForInlining())
+    {
+        printf("OPTIONS: opts.MinOpts() == %s\n", opts.MinOpts() ? "true" : "false");
+    }
+#endif
+
+    /* Control the optimizations */
+
+    if (opts.MinOpts() || opts.compDbgCode)
+    {
+        opts.compFlags &= ~CLFLG_MAXOPT;
+        opts.compFlags |= CLFLG_MINOPT;
+    }
+
+    if (!compIsForInlining())
+    {
+        codeGen->setFramePointerRequired(false);
+        codeGen->setFrameRequired(false);
+
+        if (opts.MinOpts() || opts.compDbgCode)
+        {
+            codeGen->setFrameRequired(true);
+        }
+
+#if !defined(_TARGET_AMD64_)
+        // The VM sets CORJIT_FLG_FRAMED for two reasons: (1) the COMPlus_JitFramed variable is set, or
+        // (2) the function is marked "noinline". The reason for #2 is that people mark functions
+        // noinline to ensure the show up on in a stack walk. But for AMD64, we don't need a frame
+        // pointer for the frame to show up in stack walk.
+        if (compileFlags & CORJIT_FLG_FRAMED)
+            codeGen->setFrameRequired(true);
+#endif
+
+        if (compileFlags & CORJIT_FLG_RELOC)
+        {
+            codeGen->genAlignLoops = false; // loop alignment not supported for prejitted code
+
+            // The zapper doesn't set CORJIT_FLG_ALIGN_LOOPS, and there is
+            // no reason for it to set it as the JIT doesn't currently support loop alignment
+            // for prejitted images. (The JIT doesn't know the final address of the code, hence
+            // it can't align code based on unknown addresses.)
+            assert((compileFlags & CORJIT_FLG_ALIGN_LOOPS) == 0);
+        }
+        else
+        {
+            codeGen->genAlignLoops = (compileFlags & CORJIT_FLG_ALIGN_LOOPS) != 0;
+        }
+    }
+
+    info.compUnwrapContextful = !opts.MinOpts() && !opts.compDbgCode;
+
+    fgCanRelocateEHRegions = true;
+}
+
+#ifdef _TARGET_ARMARCH_
+// Function compRsvdRegCheck:
+//  given a curState to use for calculating the total frame size
+//  it will return true if the REG_OPT_RSVD should be reserved so
+//  that it can be use to form large offsets when accessing stack
+//  based LclVar including both incoming and out going argument areas.
+//
+//  The method advances the frame layout state to curState by calling
+//  lvaFrameSize(curState).
+//
+bool Compiler::compRsvdRegCheck(FrameLayoutState curState)
+{
+    // Always do the layout even if returning early. Callers might
+    // depend on us to do the layout.
+    unsigned frameSize = lvaFrameSize(curState);
+
+    if (opts.MinOpts())
+    {
+        // Have a recovery path in case we fail to reserve REG_OPT_RSVD and go
+        // over the limit of SP and FP offset ranges due to large
+        // temps.
+        return true;
+    }
+
+    unsigned calleeSavedRegMaxSz = CALLEE_SAVED_REG_MAXSZ;
+    if (compFloatingPointUsed)
+    {
+        calleeSavedRegMaxSz += CALLEE_SAVED_FLOAT_MAXSZ;
+    }
+
+    noway_assert(frameSize > calleeSavedRegMaxSz);
+
+#if defined(_TARGET_ARM64_)
+
+    // TODO-ARM64-CQ: update this!
+    return true; // just always assume we'll need it, for now
+
+#else  // _TARGET_ARM_
+
+    // frame layout:
+    //
+    //         low addresses
+    //                         inArgs               compArgSize
+    //  origSP --->
+    //  LR     --->
+    //  R11    --->
+    //                +        callee saved regs    CALLEE_SAVED_REG_MAXSZ   (32 bytes)
+    //                     optional saved fp regs   16 * sizeof(float)       (64 bytes)
+    //                -        lclSize
+    //                             incl. TEMPS      MAX_SPILL_TEMP_SIZE
+    //                +            incl. outArgs
+    //  SP     --->
+    //                -
+    //          high addresses
+
+    // With codeGen->isFramePointerRequired we use R11 to access incoming args with positive offsets
+    // and use R11 to access LclVars with negative offsets in the non funclet or
+    // main region we use SP with positive offsets. The limiting factor in the
+    // codeGen->isFramePointerRequired case is that we need the offset to be less than or equal to 0x7C
+    // for negative offsets, but positive offsets can be imm12 limited by vldr/vstr
+    // using +/-imm8.
+    //
+    // Subtract 4 bytes for alignment of a local var because number of temps could
+    // trigger a misaligned double or long.
+    //
+    unsigned maxR11ArgLimit = (compFloatingPointUsed ? 0x03FC : 0x0FFC);
+    unsigned maxR11LclLimit = 0x0078;
+
+    if (codeGen->isFramePointerRequired())
+    {
+        unsigned maxR11LclOffs = frameSize;
+        unsigned maxR11ArgOffs = compArgSize + (2 * REGSIZE_BYTES);
+        if (maxR11LclOffs > maxR11LclLimit || maxR11ArgOffs > maxR11ArgLimit)
+        {
+            return true;
+        }
+    }
+
+    // So this case is the SP based frame case, but note that we also will use SP based
+    // offsets for R11 based frames in the non-funclet main code area. However if we have
+    // passed the above max_R11_offset check these SP checks won't fire.
+
+    // Check local coverage first. If vldr/vstr will be used the limit can be +/-imm8.
+    unsigned maxSPLclLimit = (compFloatingPointUsed ? 0x03F8 : 0x0FF8);
+    if (frameSize > (codeGen->isFramePointerUsed() ? (maxR11LclLimit + maxSPLclLimit) : maxSPLclLimit))
+    {
+        return true;
+    }
+
+    // Check arguments coverage.
+    if ((!codeGen->isFramePointerUsed() || (compArgSize > maxR11ArgLimit)) && (compArgSize + frameSize) > maxSPLclLimit)
+    {
+        return true;
+    }
+
+    // We won't need to reserve REG_OPT_RSVD.
+    //
+    return false;
+#endif // _TARGET_ARM_
+}
+#endif // _TARGET_ARMARCH_
+
+void Compiler::compFunctionTraceStart()
+{
+#ifdef DEBUG
+    if (compIsForInlining())
+    {
+        return;
+    }
+
+    if ((JitConfig.JitFunctionTrace() != 0) && !opts.disDiffable)
+    {
+        LONG newJitNestingLevel = InterlockedIncrement(&Compiler::jitNestingLevel);
+        if (newJitNestingLevel <= 0)
+        {
+            printf("{ Illegal nesting level %d }\n", newJitNestingLevel);
+        }
+
+        for (LONG i = 0; i < newJitNestingLevel - 1; i++)
+        {
+            printf("  ");
+        }
+        printf("{ Start Jitting %s (MethodHash=%08x)\n", info.compFullName,
+               info.compMethodHash()); /* } editor brace matching workaround for this printf */
+    }
+#endif // DEBUG
+}
+
+void Compiler::compFunctionTraceEnd(void* methodCodePtr, ULONG methodCodeSize, bool isNYI)
+{
+#ifdef DEBUG
+    assert(!compIsForInlining());
+
+    if ((JitConfig.JitFunctionTrace() != 0) && !opts.disDiffable)
+    {
+        LONG newJitNestingLevel = InterlockedDecrement(&Compiler::jitNestingLevel);
+        if (newJitNestingLevel < 0)
+        {
+            printf("{ Illegal nesting level %d }\n", newJitNestingLevel);
+        }
+
+        for (LONG i = 0; i < newJitNestingLevel; i++)
+        {
+            printf("  ");
+        }
+        /* { editor brace-matching workaround for following printf */
+        printf("} Jitted Entry %03x at" FMT_ADDR "method %s size %08x%s\n", Compiler::jitTotalMethodCompiled,
+               DBG_ADDR(methodCodePtr), info.compFullName, methodCodeSize,
+               isNYI ? " NYI" : (compIsForImportOnly() ? " import only" : ""));
+    }
+#endif // DEBUG
+}
+
+//*********************************************************************************************
+// #Phases
+//
+// This is the most interesting 'toplevel' function in the JIT.  It goes through the operations of
+// importing, morphing, optimizations and code generation.  This is called from the EE through the
+// code:CILJit::compileMethod function.
+//
+// For an overview of the structure of the JIT, see:
+//   https://github.com/dotnet/coreclr/blob/master/Documentation/botr/ryujit-overview.md
+//
+void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_FLAGS* compileFlags)
+{
+    if (compIsForInlining())
+    {
+        // Notify root instance that an inline attempt is about to import IL
+        impInlineRoot()->m_inlineStrategy->NoteImport();
+    }
+
+    hashBv::Init(this);
+
+    VarSetOps::AssignAllowUninitRhs(this, compCurLife, VarSetOps::UninitVal());
+
+    /* The temp holding the secret stub argument is used by fgImport() when importing the intrinsic. */
+
+    if (info.compPublishStubParam)
+    {
+        assert(lvaStubArgumentVar == BAD_VAR_NUM);
+        lvaStubArgumentVar                  = lvaGrabTempWithImplicitUse(false DEBUGARG("stub argument"));
+        lvaTable[lvaStubArgumentVar].lvType = TYP_I_IMPL;
+    }
+
+    EndPhase(PHASE_PRE_IMPORT);
+
+    compFunctionTraceStart();
+
+    /* Convert the instrs in each basic block to a tree based intermediate representation */
+
+    fgImport();
+
+    assert(!fgComputePredsDone);
+    if (fgCheapPredsValid)
+    {
+        // Remove cheap predecessors before inlining; allowing the cheap predecessor lists to be inserted
+        // with inlined blocks causes problems.
+        fgRemovePreds();
+    }
+
+    if (compIsForInlining())
+    {
+        /* Quit inlining if fgImport() failed for any reason. */
+
+        if (compDonotInline())
+        {
+            return;
+        }
+
+        /* Filter out unimported BBs */
+
+        fgRemoveEmptyBlocks();
+
+        return;
+    }
+
+    assert(!compDonotInline());
+
+    EndPhase(PHASE_IMPORTATION);
+
+    // Maybe the caller was not interested in generating code
+    if (compIsForImportOnly())
+    {
+        compFunctionTraceEnd(nullptr, 0, false);
+        return;
+    }
+
+#if !FEATURE_EH
+    // If we aren't yet supporting EH in a compiler bring-up, remove as many EH handlers as possible, so
+    // we can pass tests that contain try/catch EH, but don't actually throw any exceptions.
+    fgRemoveEH();
+#endif // !FEATURE_EH
+
+    if (compileFlags->corJitFlags & CORJIT_FLG_BBINSTR)
+    {
+        fgInstrumentMethod();
+    }
+
+    // We could allow ESP frames. Just need to reserve space for
+    // pushing EBP if the method becomes an EBP-frame after an edit.
+    // Note that requiring a EBP Frame disallows double alignment.  Thus if we change this
+    // we either have to disallow double alignment for E&C some other way or handle it in EETwain.
+
+    if (opts.compDbgEnC)
+    {
+        codeGen->setFramePointerRequired(true);
+
+        // Since we need a slots for security near ebp, its not possible
+        // to do this after an Edit without shifting all the locals.
+        // So we just always reserve space for these slots in case an Edit adds them
+        opts.compNeedSecurityCheck = true;
+
+        // We don't care about localloc right now. If we do support it,
+        // EECodeManager::FixContextForEnC() needs to handle it smartly
+        // in case the localloc was actually executed.
+        //
+        // compLocallocUsed            = true;
+    }
+
+    EndPhase(PHASE_POST_IMPORT);
+
+    /* Initialize the BlockSet epoch */
+
+    NewBasicBlockEpoch();
+
+    /* Massage the trees so that we can generate code out of them */
+
+    fgMorph();
+    EndPhase(PHASE_MORPH);
+
+    /* GS security checks for unsafe buffers */
+    if (getNeedsGSSecurityCookie())
+    {
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\n*************** -GS checks for unsafe buffers \n");
+        }
+#endif
+
+        gsGSChecksInitCookie();
+
+        if (compGSReorderStackLayout)
+        {
+            gsCopyShadowParams();
+        }
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            fgDispBasicBlocks(true);
+            printf("\n");
+        }
+#endif
+    }
+    EndPhase(PHASE_GS_COOKIE);
+
+    /* Compute bbNum, bbRefs and bbPreds */
+
+    JITDUMP("\nRenumbering the basic blocks for fgComputePred\n");
+    fgRenumberBlocks();
+
+    noway_assert(!fgComputePredsDone); // This is the first time full (not cheap) preds will be computed.
+    fgComputePreds();
+    EndPhase(PHASE_COMPUTE_PREDS);
+
+    /* If we need to emit GC Poll calls, mark the blocks that need them now.  This is conservative and can
+     * be optimized later. */
+    fgMarkGCPollBlocks();
+    EndPhase(PHASE_MARK_GC_POLL_BLOCKS);
+
+    /* From this point on the flowgraph information such as bbNum,
+     * bbRefs or bbPreds has to be kept updated */
+
+    // Compute the edge weights (if we have profile data)
+    fgComputeEdgeWeights();
+    EndPhase(PHASE_COMPUTE_EDGE_WEIGHTS);
+
+#if FEATURE_EH_FUNCLETS
+
+    /* Create funclets from the EH handlers. */
+
+    fgCreateFunclets();
+    EndPhase(PHASE_CREATE_FUNCLETS);
+
+#endif // FEATURE_EH_FUNCLETS
+
+    if (!opts.MinOpts() && !opts.compDbgCode)
+    {
+        optOptimizeLayout();
+        EndPhase(PHASE_OPTIMIZE_LAYOUT);
+
+        // Compute reachability sets and dominators.
+        fgComputeReachability();
+    }
+
+    // Transform each GT_ALLOCOBJ node into either an allocation helper call or
+    // local variable allocation on the stack.
+    ObjectAllocator objectAllocator(this);
+    objectAllocator.Run();
+
+    if (!opts.MinOpts() && !opts.compDbgCode)
+    {
+        /*  Perform loop inversion (i.e. transform "while" loops into
+            "repeat" loops) and discover and classify natural loops
+            (e.g. mark iterative loops as such). Also marks loop blocks
+            and sets bbWeight to the loop nesting levels
+        */
+
+        optOptimizeLoops();
+        EndPhase(PHASE_OPTIMIZE_LOOPS);
+
+        // Clone loops with optimization opportunities, and
+        // choose the one based on dynamic condition evaluation.
+        optCloneLoops();
+        EndPhase(PHASE_CLONE_LOOPS);
+
+        /* Unroll loops */
+        optUnrollLoops();
+        EndPhase(PHASE_UNROLL_LOOPS);
+    }
+
+#ifdef DEBUG
+    fgDebugCheckLinks();
+#endif
+
+    /* Create the variable table (and compute variable ref counts) */
+
+    lvaMarkLocalVars();
+    EndPhase(PHASE_MARK_LOCAL_VARS);
+
+    // IMPORTANT, after this point, every place where trees are modified or cloned
+    // the local variable reference counts must be updated
+    // You can test the value of the following variable to see if
+    // the local variable ref counts must be updated
+    //
+    assert(lvaLocalVarRefCounted == true);
+
+    if (!opts.MinOpts() && !opts.compDbgCode)
+    {
+        /* Optimize boolean conditions */
+
+        optOptimizeBools();
+        EndPhase(PHASE_OPTIMIZE_BOOLS);
+
+        // optOptimizeBools() might have changed the number of blocks; the dominators/reachability might be bad.
+    }
+
+    /* Figure out the order in which operators are to be evaluated */
+    fgFindOperOrder();
+    EndPhase(PHASE_FIND_OPER_ORDER);
+
+    // Weave the tree lists. Anyone who modifies the tree shapes after
+    // this point is responsible for calling fgSetStmtSeq() to keep the
+    // nodes properly linked.
+    // This can create GC poll calls, and create new BasicBlocks (without updating dominators/reachability).
+    fgSetBlockOrder();
+    EndPhase(PHASE_SET_BLOCK_ORDER);
+
+    // IMPORTANT, after this point, every place where tree topology changes must redo evaluation
+    // order (gtSetStmtInfo) and relink nodes (fgSetStmtSeq) if required.
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+    // Now  we have determined the order of evaluation and the gtCosts for every node.
+    // If verbose, dump the full set of trees here before the optimization phases mutate them
+    //
+    if (verbose)
+    {
+        fgDispBasicBlocks(true); // 'true' will call fgDumpTrees() after dumping the BasicBlocks
+        printf("\n");
+    }
+#endif
+
+    // At this point we know if we are fully interruptible or not
+    if (!opts.MinOpts() && !opts.compDbgCode)
+    {
+        bool doSsa           = true;
+        bool doEarlyProp     = true;
+        bool doValueNum      = true;
+        bool doLoopHoisting  = true;
+        bool doCopyProp      = true;
+        bool doAssertionProp = true;
+        bool doRangeAnalysis = true;
+
+#ifdef DEBUG
+        doSsa           = (JitConfig.JitDoSsa() != 0);
+        doEarlyProp     = doSsa && (JitConfig.JitDoEarlyProp() != 0);
+        doValueNum      = doSsa && (JitConfig.JitDoValueNumber() != 0);
+        doLoopHoisting  = doValueNum && (JitConfig.JitDoLoopHoisting() != 0);
+        doCopyProp      = doValueNum && (JitConfig.JitDoCopyProp() != 0);
+        doAssertionProp = doValueNum && (JitConfig.JitDoAssertionProp() != 0);
+        doRangeAnalysis = doAssertionProp && (JitConfig.JitDoRangeAnalysis() != 0);
+#endif
+
+        if (doSsa)
+        {
+            fgSsaBuild();
+            EndPhase(PHASE_BUILD_SSA);
+        }
+
+        if (doEarlyProp)
+        {
+            /* Propagate array length and rewrite getType() method call */
+            optEarlyProp();
+            EndPhase(PHASE_EARLY_PROP);
+        }
+
+        if (doValueNum)
+        {
+            fgValueNumber();
+            EndPhase(PHASE_VALUE_NUMBER);
+        }
+
+        if (doLoopHoisting)
+        {
+            /* Hoist invariant code out of loops */
+            optHoistLoopCode();
+            EndPhase(PHASE_HOIST_LOOP_CODE);
+        }
+
+        if (doCopyProp)
+        {
+            /* Perform VN based copy propagation */
+            optVnCopyProp();
+            EndPhase(PHASE_VN_COPY_PROP);
+        }
+
+#if FEATURE_ANYCSE
+        /* Remove common sub-expressions */
+        optOptimizeCSEs();
+#endif // FEATURE_ANYCSE
+
+#if ASSERTION_PROP
+        if (doAssertionProp)
+        {
+            /* Assertion propagation */
+            optAssertionPropMain();
+            EndPhase(PHASE_ASSERTION_PROP_MAIN);
+        }
+
+        if (doRangeAnalysis)
+        {
+            /* Optimize array index range checks */
+            RangeCheck rc(this);
+            rc.OptimizeRangeChecks();
+            EndPhase(PHASE_OPTIMIZE_INDEX_CHECKS);
+        }
+#endif // ASSERTION_PROP
+
+        /* update the flowgraph if we modified it during the optimization phase*/
+        if (fgModified)
+        {
+            fgUpdateFlowGraph();
+            EndPhase(PHASE_UPDATE_FLOW_GRAPH);
+
+            // Recompute the edge weight if we have modified the flow graph
+            fgComputeEdgeWeights();
+            EndPhase(PHASE_COMPUTE_EDGE_WEIGHTS2);
+        }
+    }
+
+#ifdef _TARGET_AMD64_
+    //  Check if we need to add the Quirk for the PPP backward compat issue
+    compQuirkForPPPflag = compQuirkForPPP();
+#endif
+
+    fgDetermineFirstColdBlock();
+    EndPhase(PHASE_DETERMINE_FIRST_COLD_BLOCK);
+
+#ifdef DEBUG
+    fgDebugCheckLinks(compStressCompile(STRESS_REMORPH_TREES, 50));
+
+    // Stash the current estimate of the function's size if necessary.
+    if (verbose)
+    {
+        compSizeEstimate  = 0;
+        compCycleEstimate = 0;
+        for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+        {
+            for (GenTreeStmt* stmt = block->firstStmt(); stmt != nullptr; stmt = stmt->getNextStmt())
+            {
+                compSizeEstimate += stmt->GetCostSz();
+                compCycleEstimate += stmt->GetCostEx();
+            }
+        }
+    }
+#endif
+
+#ifndef LEGACY_BACKEND
+    // rationalize trees
+    Rationalizer rat(this); // PHASE_RATIONALIZE
+    rat.Run();
+#endif // !LEGACY_BACKEND
+
+    // Here we do "simple lowering".  When the RyuJIT backend works for all
+    // platforms, this will be part of the more general lowering phase.  For now, though, we do a separate
+    // pass of "final lowering."  We must do this before (final) liveness analysis, because this creates
+    // range check throw blocks, in which the liveness must be correct.
+    fgSimpleLowering();
+    EndPhase(PHASE_SIMPLE_LOWERING);
+
+#ifdef LEGACY_BACKEND
+    /* Local variable liveness */
+    fgLocalVarLiveness();
+    EndPhase(PHASE_LCLVARLIVENESS);
+#endif // !LEGACY_BACKEND
+
+#ifdef DEBUG
+    fgDebugCheckBBlist();
+    fgDebugCheckLinks();
+#endif
+
+    /* Enable this to gather statistical data such as
+     * call and register argument info, flowgraph and loop info, etc. */
+
+    compJitStats();
+
+#ifdef _TARGET_ARM_
+    if (compLocallocUsed)
+    {
+        // We reserve REG_SAVED_LOCALLOC_SP to store SP on entry for stack unwinding
+        codeGen->regSet.rsMaskResvd |= RBM_SAVED_LOCALLOC_SP;
+    }
+#endif // _TARGET_ARM_
+#ifdef _TARGET_ARMARCH_
+    if (compRsvdRegCheck(PRE_REGALLOC_FRAME_LAYOUT))
+    {
+        // We reserve R10/IP1 in this case to hold the offsets in load/store instructions
+        codeGen->regSet.rsMaskResvd |= RBM_OPT_RSVD;
+        assert(REG_OPT_RSVD != REG_FP);
+    }
+
+#ifdef DEBUG
+    //
+    // Display the pre-regalloc frame offsets that we have tentatively decided upon
+    //
+    if (verbose)
+        lvaTableDump();
+#endif
+#endif // _TARGET_ARMARCH_
+
+    /* Assign registers to variables, etc. */
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef LEGACY_BACKEND
+    ///////////////////////////////////////////////////////////////////////////////
+    // Dominator and reachability sets are no longer valid. They haven't been
+    // maintained up to here, and shouldn't be used (unless recomputed).
+    ///////////////////////////////////////////////////////////////////////////////
+    fgDomsComputed = false;
+
+    /* Create LSRA before Lowering, this way Lowering can initialize the TreeNode Map */
+    m_pLinearScan = getLinearScanAllocator(this);
+
+    /* Lower */
+    Lowering lower(this, m_pLinearScan); // PHASE_LOWERING
+    lower.Run();
+
+    assert(lvaSortAgain == false); // We should have re-run fgLocalVarLiveness() in lower.Run()
+    lvaTrackedFixed = true;        // We can not add any new tracked variables after this point.
+
+    /* Now that lowering is completed we can proceed to perform register allocation */
+    m_pLinearScan->doLinearScan();
+    EndPhase(PHASE_LINEAR_SCAN);
+
+    // Copied from rpPredictRegUse()
+    genFullPtrRegMap = (codeGen->genInterruptible || !codeGen->isFramePointerUsed());
+#else  // LEGACY_BACKEND
+
+    lvaTrackedFixed = true; // We cannot add any new tracked variables after this point.
+    // For the classic JIT32 at this point lvaSortAgain can be set and raAssignVars() will call lvaSortOnly()
+
+    // Now do "classic" register allocation.
+    raAssignVars();
+    EndPhase(PHASE_RA_ASSIGN_VARS);
+#endif // LEGACY_BACKEND
+
+#ifdef DEBUG
+    fgDebugCheckLinks();
+#endif
+
+    /* Generate code */
+
+    codeGen->genGenerateCode(methodCodePtr, methodCodeSize);
+
+#ifdef FEATURE_JIT_METHOD_PERF
+    if (pCompJitTimer)
+        pCompJitTimer->Terminate(this, CompTimeSummaryInfo::s_compTimeSummary);
+#endif
+
+    RecordStateAtEndOfCompilation();
+
+#ifdef FEATURE_TRACELOGGING
+    compJitTelemetry.NotifyEndOfCompilation();
+#endif
+
+#if defined(DEBUG)
+    ++Compiler::jitTotalMethodCompiled;
+#endif // defined(DEBUG)
+
+    compFunctionTraceEnd(*methodCodePtr, *methodCodeSize, false);
+
+#if FUNC_INFO_LOGGING
+    if (compJitFuncInfoFile != nullptr)
+    {
+        assert(!compIsForInlining());
+#ifdef DEBUG // We only have access to info.compFullName in DEBUG builds.
+        fprintf(compJitFuncInfoFile, "%s\n", info.compFullName);
+#elif FEATURE_SIMD
+        fprintf(compJitFuncInfoFile, " %s\n", eeGetMethodFullName(info.compMethodHnd));
+#endif
+        fprintf(compJitFuncInfoFile, ""); // in our logic this causes a flush
+    }
+#endif // FUNC_INFO_LOGGING
+}
+
+/*****************************************************************************/
+void Compiler::ProcessShutdownWork(ICorStaticInfo* statInfo)
+{
+}
+
+#ifdef _TARGET_AMD64_
+//  Check if we need to add the Quirk for the PPP backward compat issue.
+//  This Quirk addresses a compatibility issue between the new RyuJit and the previous JIT64.
+//  A backward compatibity issue called 'PPP' exists where a PInvoke call passes a 32-byte struct
+//  into a native API which basically writes 48 bytes of data into the struct.
+//  With the stack frame layout used by the RyuJIT the extra 16 bytes written corrupts a
+//  caller saved register and this leads to an A/V in the calling method.
+//  The older JIT64 jit compiler just happened to have a different stack layout and/or
+//  caller saved register set so that it didn't hit the A/V in the caller.
+//  By increasing the amount of stack allocted for the struct by 32 bytes we can fix this.
+//
+//  Return true if we actually perform the Quirk, otherwise return false
+//
+bool Compiler::compQuirkForPPP()
+{
+    if (lvaCount != 2)
+    { // We require that there are exactly two locals
+        return false;
+    }
+
+    if (compTailCallUsed)
+    { // Don't try this quirk if a tail call was used
+        return false;
+    }
+
+    bool       hasOutArgs          = false;
+    LclVarDsc* varDscExposedStruct = nullptr;
+
+    unsigned   lclNum;
+    LclVarDsc* varDsc;
+
+    /* Look for struct locals that are address taken */
+    for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+    {
+        if (varDsc->lvIsParam) // It can't be a parameter
+        {
+            continue;
+        }
+
+        // We require that the OutgoingArg space lclVar exists
+        if (lclNum == lvaOutgoingArgSpaceVar)
+        {
+            hasOutArgs = true; // Record that we saw it
+            continue;
+        }
+
+        // Look for a 32-byte address exposed Struct and record its varDsc
+        if ((varDsc->TypeGet() == TYP_STRUCT) && varDsc->lvAddrExposed && (varDsc->lvExactSize == 32))
+        {
+            varDscExposedStruct = varDsc;
+        }
+    }
+
+    // We only perform the Quirk when there are two locals
+    // one of them is a address exposed struct of size 32
+    // and the other is the outgoing arg space local
+    //
+    if (hasOutArgs && (varDscExposedStruct != nullptr))
+    {
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\nAdding a backwards compatibility quirk for the 'PPP' issue\n");
+        }
+#endif // DEBUG
+
+        // Increase the exact size of this struct by 32 bytes
+        // This fixes the PPP backward compat issue
+        varDscExposedStruct->lvExactSize += 32;
+
+        return true;
+    }
+    return false;
+}
+#endif // _TARGET_AMD64_
+
+/*****************************************************************************/
+
+#ifdef DEBUG
+void* forceFrameJIT; // used to force to frame &useful for fastchecked debugging
+
+bool Compiler::skipMethod()
+{
+    static ConfigMethodRange fJitRange;
+    fJitRange.EnsureInit(JitConfig.JitRange());
+    assert(!fJitRange.Error());
+
+    // Normally JitConfig.JitRange() is null, we don't want to skip
+    // jitting any methods.
+    //
+    // So, the logic below relies on the fact that a null range string
+    // passed to ConfigMethodRange represents the set of all methods.
+
+    if (!fJitRange.Contains(info.compCompHnd, info.compMethodHnd))
+    {
+        return true;
+    }
+
+    if (JitConfig.JitExclude().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+    {
+        return true;
+    }
+
+    if (!JitConfig.JitInclude().isEmpty() &&
+        !JitConfig.JitInclude().contains(info.compMethodName, info.compClassName, &info.compMethodInfo->args))
+    {
+        return true;
+    }
+
+    return false;
+}
+
+#endif
+
+/*****************************************************************************/
+
+int Compiler::compCompile(CORINFO_METHOD_HANDLE methodHnd,
+                          CORINFO_MODULE_HANDLE classPtr,
+                          COMP_HANDLE           compHnd,
+                          CORINFO_METHOD_INFO*  methodInfo,
+                          void**                methodCodePtr,
+                          ULONG*                methodCodeSize,
+                          CORJIT_FLAGS*         compileFlags)
+{
+#ifdef FEATURE_JIT_METHOD_PERF
+    static bool checkedForJitTimeLog = false;
+
+    if (!checkedForJitTimeLog)
+    {
+        // Call into VM to get the config strings. FEATURE_JIT_METHOD_PERF is enabled for
+        // retail builds. Do not call the regular Config helper here as it would pull
+        // in a copy of the config parser into the clrjit.dll.
+        InterlockedCompareExchangeT(&Compiler::compJitTimeLogFilename, compHnd->getJitTimeLogFilename(), NULL);
+
+        // At a process or module boundary clear the file and start afresh.
+        JitTimer::PrintCsvHeader();
+
+        checkedForJitTimeLog = true;
+    }
+    if ((Compiler::compJitTimeLogFilename != NULL) || (JitTimeLogCsv() != NULL))
+    {
+        pCompJitTimer = JitTimer::Create(this, methodInfo->ILCodeSize);
+    }
+    else
+    {
+        pCompJitTimer = NULL;
+    }
+#endif // FEATURE_JIT_METHOD_PERF
+
+#ifdef DEBUG
+    Compiler* me  = this;
+    forceFrameJIT = (void*)&me; // let us see the this pointer in fastchecked build
+    // set this early so we can use it without relying on random memory values
+    verbose = compIsForInlining() ? impInlineInfo->InlinerCompiler->verbose : false;
+
+    this->dumpIR             = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIR : false;
+    this->dumpIRPhase        = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRPhase : nullptr;
+    this->dumpIRFormat       = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRFormat : nullptr;
+    this->dumpIRTypes        = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRTypes : false;
+    this->dumpIRLocals       = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRLocals : false;
+    this->dumpIRRegs         = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRRegs : false;
+    this->dumpIRSsa          = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRSsa : false;
+    this->dumpIRValnums      = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRValnums : false;
+    this->dumpIRCosts        = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRCosts : false;
+    this->dumpIRFlags        = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRFlags : false;
+    this->dumpIRKinds        = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRKinds : false;
+    this->dumpIRNodes        = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRNodes : false;
+    this->dumpIRNoLists      = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRNoLists : false;
+    this->dumpIRNoLeafs      = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRNoLeafs : false;
+    this->dumpIRNoStmts      = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRNoStmts : false;
+    this->dumpIRTrees        = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRTrees : false;
+    this->dumpIRLinear       = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRLinear : false;
+    this->dumpIRDataflow     = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRDataflow : false;
+    this->dumpIRBlockHeaders = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRBlockHeaders : NULL;
+    this->dumpIRExit         = compIsForInlining() ? impInlineInfo->InlinerCompiler->dumpIRExit : NULL;
+
+#endif
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+    info.compMethodHashPrivate = 0;
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+#if FUNC_INFO_LOGGING
+    LPCWSTR tmpJitFuncInfoFilename = JitConfig.JitFuncInfoFile();
+
+    if (tmpJitFuncInfoFilename != nullptr)
+    {
+        LPCWSTR oldFuncInfoFileName =
+            InterlockedCompareExchangeT(&compJitFuncInfoFilename, tmpJitFuncInfoFilename, NULL);
+        if (oldFuncInfoFileName == nullptr)
+        {
+            assert(compJitFuncInfoFile == nullptr);
+            compJitFuncInfoFile = _wfopen(compJitFuncInfoFilename, W("a"));
+            if (compJitFuncInfoFile == nullptr)
+            {
+#if defined(DEBUG) && !defined(FEATURE_PAL) // no 'perror' in the PAL
+                perror("Failed to open JitFuncInfoLogFile");
+#endif // defined(DEBUG) && !defined(FEATURE_PAL)
+            }
+        }
+    }
+#endif // FUNC_INFO_LOGGING
+
+    // if (s_compMethodsCount==0) setvbuf(jitstdout, NULL, _IONBF, 0);
+
+    info.compCompHnd    = compHnd;
+    info.compMethodHnd  = methodHnd;
+    info.compMethodInfo = methodInfo;
+
+    // Do we have a matched VM? Or are we "abusing" the VM to help us do JIT work (such as using an x86 native VM
+    // with an ARM-targeting "altjit").
+    info.compMatchedVM = IMAGE_FILE_MACHINE_TARGET == info.compCompHnd->getExpectedTargetArchitecture();
+
+#if defined(ALT_JIT) && defined(UNIX_AMD64_ABI)
+    // ToDo: This code is to allow us to run UNIX codegen on Windows for now. Remove when appropriate.
+    // Make sure that the generated UNIX altjit code is skipped on Windows. The static jit codegen is used to run.
+    info.compMatchedVM = false;
+#endif // UNIX_AMD64_ABI
+
+#if COR_JIT_EE_VERSION > 460
+    compMaxUncheckedOffsetForNullObject = eeGetEEInfo()->maxUncheckedOffsetForNullObject;
+#else  // COR_JIT_EE_VERSION <= 460
+    compMaxUncheckedOffsetForNullObject = MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT;
+#endif // COR_JIT_EE_VERSION > 460
+
+    // Set the context for token lookup.
+    if (compIsForInlining())
+    {
+        impTokenLookupContextHandle = impInlineInfo->tokenLookupContextHandle;
+
+        assert(impInlineInfo->inlineCandidateInfo->clsHandle == compHnd->getMethodClass(methodHnd));
+        info.compClassHnd = impInlineInfo->inlineCandidateInfo->clsHandle;
+
+        assert(impInlineInfo->inlineCandidateInfo->clsAttr == info.compCompHnd->getClassAttribs(info.compClassHnd));
+        // printf("%x != %x\n", impInlineInfo->inlineCandidateInfo->clsAttr,
+        // info.compCompHnd->getClassAttribs(info.compClassHnd));
+        info.compClassAttr = impInlineInfo->inlineCandidateInfo->clsAttr;
+    }
+    else
+    {
+        impTokenLookupContextHandle = MAKE_METHODCONTEXT(info.compMethodHnd);
+
+        info.compClassHnd  = compHnd->getMethodClass(methodHnd);
+        info.compClassAttr = info.compCompHnd->getClassAttribs(info.compClassHnd);
+    }
+
+    info.compProfilerCallback = false; // Assume false until we are told to hook this method.
+
+#if defined(DEBUG) || defined(LATE_DISASM)
+    const char* classNamePtr;
+
+    info.compMethodName = eeGetMethodName(methodHnd, &classNamePtr);
+    unsigned len        = (unsigned)roundUp(strlen(classNamePtr) + 1);
+    info.compClassName  = (char*)compGetMem(len, CMK_DebugOnly);
+    strcpy_s((char*)info.compClassName, len, classNamePtr);
+
+    info.compFullName = eeGetMethodFullName(methodHnd);
+#endif // defined(DEBUG) || defined(LATE_DISASM)
+
+#ifdef DEBUG
+    if (!compIsForInlining())
+    {
+        JitTls::GetLogEnv()->setCompiler(this);
+    }
+
+    // Have we been told to be more selective in our Jitting?
+    if (skipMethod())
+    {
+        if (compIsForInlining())
+        {
+            compInlineResult->NoteFatal(InlineObservation::CALLEE_MARKED_AS_SKIPPED);
+        }
+        return CORJIT_SKIPPED;
+    }
+
+    // Opt-in to jit stress based on method hash ranges.
+    //
+    // Note the default (with JitStressRange not set) is that all
+    // methods will be subject to stress.
+    static ConfigMethodRange fJitStressRange;
+    fJitStressRange.EnsureInit(JitConfig.JitStressRange());
+    assert(!fJitStressRange.Error());
+    bRangeAllowStress = fJitStressRange.Contains(info.compCompHnd, info.compMethodHnd);
+
+#endif // DEBUG
+
+    // Set this before the first 'BADCODE'
+    // Skip verification where possible
+    tiVerificationNeeded = (compileFlags->corJitFlags & CORJIT_FLG_SKIP_VERIFICATION) == 0;
+
+    assert(!compIsForInlining() || !tiVerificationNeeded); // Inlinees must have been verified.
+
+    // assume the code is verifiable unless proven otherwise
+    tiIsVerifiableCode = TRUE;
+
+    tiRuntimeCalloutNeeded = false;
+
+    CorInfoInstantiationVerification instVerInfo = INSTVER_GENERIC_PASSED_VERIFICATION;
+
+    if (!compIsForInlining() && tiVerificationNeeded)
+    {
+        instVerInfo = compHnd->isInstantiationOfVerifiedGeneric(methodHnd);
+
+        if (tiVerificationNeeded && (instVerInfo == INSTVER_GENERIC_FAILED_VERIFICATION))
+        {
+            CorInfoCanSkipVerificationResult canSkipVerificationResult =
+                info.compCompHnd->canSkipMethodVerification(info.compMethodHnd);
+
+            switch (canSkipVerificationResult)
+            {
+                case CORINFO_VERIFICATION_CANNOT_SKIP:
+                    // We cannot verify concrete instantiation.
+                    // We can only verify the typical/open instantiation
+                    // The VM should throw a VerificationException instead of allowing this.
+                    NO_WAY("Verification of closed instantiations is not supported");
+                    break;
+
+                case CORINFO_VERIFICATION_CAN_SKIP:
+                    // The VM should first verify the open instantiation. If unverifiable code
+                    // is detected, it should pass in CORJIT_FLG_SKIP_VERIFICATION.
+                    assert(!"The VM should have used CORJIT_FLG_SKIP_VERIFICATION");
+                    tiVerificationNeeded = false;
+                    break;
+
+                case CORINFO_VERIFICATION_RUNTIME_CHECK:
+                    // This is a concrete generic instantiation with unverifiable code, that also
+                    // needs a runtime callout.
+                    tiVerificationNeeded   = false;
+                    tiRuntimeCalloutNeeded = true;
+                    break;
+
+                case CORINFO_VERIFICATION_DONT_JIT:
+                    // We cannot verify concrete instantiation.
+                    // We can only verify the typical/open instantiation
+                    // The VM should throw a VerificationException instead of allowing this.
+                    BADCODE("NGEN of unverifiable transparent code is not supported");
+                    break;
+            }
+        }
+
+        // load any constraints for verification, noting any cycles to be rejected by the verifying importer
+        if (tiVerificationNeeded)
+        {
+            compHnd->initConstraintsForVerification(methodHnd, &info.hasCircularClassConstraints,
+                                                    &info.hasCircularMethodConstraints);
+        }
+    }
+
+    /* Setup an error trap */
+
+    struct Param
+    {
+        Compiler* pThis;
+
+        CORINFO_MODULE_HANDLE classPtr;
+        COMP_HANDLE           compHnd;
+        CORINFO_METHOD_INFO*  methodInfo;
+        void**                methodCodePtr;
+        ULONG*                methodCodeSize;
+        CORJIT_FLAGS*         compileFlags;
+
+        CorInfoInstantiationVerification instVerInfo;
+        int                              result;
+    } param;
+    param.pThis          = this;
+    param.classPtr       = classPtr;
+    param.compHnd        = compHnd;
+    param.methodInfo     = methodInfo;
+    param.methodCodePtr  = methodCodePtr;
+    param.methodCodeSize = methodCodeSize;
+    param.compileFlags   = compileFlags;
+    param.instVerInfo    = instVerInfo;
+    param.result         = CORJIT_INTERNALERROR;
+
+    setErrorTrap(compHnd, Param*, pParam, &param) // ERROR TRAP: Start normal block
+    {
+        pParam->result = pParam->pThis->compCompileHelper(pParam->classPtr, pParam->compHnd, pParam->methodInfo,
+                                                          pParam->methodCodePtr, pParam->methodCodeSize,
+                                                          pParam->compileFlags, pParam->instVerInfo);
+    }
+    finallyErrorTrap() // ERROR TRAP: The following block handles errors
+    {
+        /* Cleanup  */
+
+        if (compIsForInlining())
+        {
+            goto DoneCleanUp;
+        }
+
+        /* Tell the emitter that we're done with this function */
+
+        genEmitter->emitEndCG();
+
+    DoneCleanUp:
+        compDone();
+    }
+    endErrorTrap() // ERROR TRAP: End
+
+        return param.result;
+}
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+unsigned Compiler::Info::compMethodHash() const
+{
+    if (compMethodHashPrivate == 0)
+    {
+        compMethodHashPrivate = compCompHnd->getMethodHash(compMethodHnd);
+    }
+    return compMethodHashPrivate;
+}
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+void Compiler::compCompileFinish()
+{
+#if defined(DEBUG) || MEASURE_NODE_SIZE || MEASURE_BLOCK_SIZE || DISPLAY_SIZES || CALL_ARG_STATS
+    genMethodCnt++;
+#endif
+
+#if MEASURE_MEM_ALLOC
+    {
+        // Grab the relevant lock.
+        CritSecHolder statsLock(s_memStatsLock);
+
+        // Make the updates.
+        genMemStats.nraTotalSizeAlloc = compGetAllocator()->getTotalBytesAllocated();
+        genMemStats.nraTotalSizeUsed  = compGetAllocator()->getTotalBytesUsed();
+        s_aggMemStats.Add(genMemStats);
+        if (genMemStats.allocSz > s_maxCompMemStats.allocSz)
+        {
+            s_maxCompMemStats = genMemStats;
+        }
+    }
+
+#ifdef DEBUG
+    if (s_dspMemStats || verbose)
+    {
+        printf("\nAllocations for %s (MethodHash=%08x)\n", info.compFullName, info.compMethodHash());
+        genMemStats.Print(jitstdout);
+    }
+#endif // DEBUG
+#endif // MEASURE_MEM_ALLOC
+
+#if LOOP_HOIST_STATS
+    AddLoopHoistStats();
+#endif // LOOP_HOIST_STATS
+
+#if MEASURE_NODE_SIZE
+    genTreeNcntHist.record(static_cast<unsigned>(genNodeSizeStatsPerFunc.genTreeNodeCnt));
+    genTreeNsizHist.record(static_cast<unsigned>(genNodeSizeStatsPerFunc.genTreeNodeSize));
+#endif
+
+#if defined(DEBUG)
+    // Small methods should fit in ArenaAllocator::getDefaultPageSize(), or else
+    // we should bump up ArenaAllocator::getDefaultPageSize()
+
+    if ((info.compILCodeSize <= 32) &&     // Is it a reasonably small method?
+        (info.compNativeCodeSize < 512) && // Some trivial methods generate huge native code. eg. pushing a single huge
+                                           // struct
+        (impInlinedCodeSize <= 128) &&     // Is the the inlining reasonably bounded?
+                                           // Small methods cannot meaningfully have a big number of locals
+                                           // or arguments. We always track arguments at the start of
+                                           // the prolog which requires memory
+        (info.compLocalsCount <= 32) && (!opts.MinOpts()) && // We may have too many local variables, etc
+        (getJitStressLevel() == 0) &&                        // We need extra memory for stress
+        !compAllocator->bypassHostAllocator() && // ArenaAllocator::getDefaultPageSize() is artificially low for
+                                                 // DirectAlloc
+        (compAllocator->getTotalBytesAllocated() > (2 * ArenaAllocator::getDefaultPageSize())) &&
+// Factor of 2x is because data-structures are bigger under DEBUG
+#ifndef LEGACY_BACKEND
+        // RyuJIT backend needs memory tuning! TODO-Cleanup: remove this case when memory tuning is complete.
+        (compAllocator->getTotalBytesAllocated() > (10 * ArenaAllocator::getDefaultPageSize())) &&
+#endif
+        !verbose) // We allocate lots of memory to convert sets to strings for JitDump
+    {
+        genSmallMethodsNeedingExtraMemoryCnt++;
+
+        // Less than 1% of all methods should run into this.
+        // We cannot be more strict as there are always degenerate cases where we
+        // would need extra memory (like huge structs as locals - see lvaSetStruct()).
+        assert((genMethodCnt < 500) || (genSmallMethodsNeedingExtraMemoryCnt < (genMethodCnt / 100)));
+    }
+#endif // DEBUG
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+    m_inlineStrategy->DumpData();
+    m_inlineStrategy->DumpXml();
+
+#endif
+
+#ifdef DEBUG
+    if (opts.dspOrder)
+    {
+        // mdMethodDef __stdcall CEEInfo::getMethodDefFromMethod(CORINFO_METHOD_HANDLE hMethod)
+        mdMethodDef currentMethodToken = info.compCompHnd->getMethodDefFromMethod(info.compMethodHnd);
+
+        unsigned profCallCount = 0;
+        if (((opts.eeFlags & CORJIT_FLG_BBOPT) != 0) && fgHaveProfileData())
+        {
+            assert(fgProfileBuffer[0].ILOffset == 0);
+            profCallCount = fgProfileBuffer[0].ExecutionCount;
+        }
+
+        static bool headerPrinted = false;
+        if (!headerPrinted)
+        {
+            // clang-format off
+            headerPrinted = true;
+            printf("         |  Profiled  | Exec-    |   Method has    |   calls   | Num |LclV |AProp| CSE |   Reg   |bytes | %3s code size | \n", Target::g_tgtCPUName);
+            printf(" mdToken |     |  RGN |    Count | EH | FRM | LOOP | NRM | IND | BBs | Cnt | Cnt | Cnt |  Alloc  |  IL  |   HOT |  COLD | method name \n");
+            printf("---------+-----+------+----------+----+-----+------+-----+-----+-----+-----+-----+-----+---------+------+-------+-------+-----------\n");
+            //      06001234 | PRF |  HOT |      219 | EH | ebp | LOOP |  15 |   6 |  12 |  17 |  12 |   8 |   28 p2 |  145 |   211 |   123 | System.Example(int)
+            // clang-format on
+        }
+
+        printf("%08X | ", currentMethodToken);
+
+        CorInfoRegionKind regionKind = info.compMethodInfo->regionKind;
+
+        if (opts.altJit)
+        {
+            printf("ALT | ");
+        }
+        else if (fgHaveProfileData())
+        {
+            printf("PRF | ");
+        }
+        else
+        {
+            printf("    | ");
+        }
+
+        if (regionKind == CORINFO_REGION_NONE)
+        {
+            printf("     | ");
+        }
+        else if (regionKind == CORINFO_REGION_HOT)
+        {
+            printf(" HOT | ");
+        }
+        else if (regionKind == CORINFO_REGION_COLD)
+        {
+            printf("COLD | ");
+        }
+        else if (regionKind == CORINFO_REGION_JIT)
+        {
+            printf(" JIT | ");
+        }
+        else
+        {
+            printf("UNKN | ");
+        }
+
+        printf("%8d | ", profCallCount);
+
+        if (compHndBBtabCount > 0)
+        {
+            printf("EH | ");
+        }
+        else
+        {
+            printf("   | ");
+        }
+
+        if (rpFrameType == FT_EBP_FRAME)
+        {
+            printf("%3s | ", STR_FPBASE);
+        }
+        else if (rpFrameType == FT_ESP_FRAME)
+        {
+            printf("%3s | ", STR_SPBASE);
+        }
+#if DOUBLE_ALIGN
+        else if (rpFrameType == FT_DOUBLE_ALIGN_FRAME)
+        {
+            printf("dbl | ");
+        }
+#endif
+        else // (rpFrameType == FT_NOT_SET)
+        {
+            printf("??? | ");
+        }
+
+        if (fgHasLoops)
+        {
+            printf("LOOP |");
+        }
+        else
+        {
+            printf("     |");
+        }
+
+        printf(" %3d |", optCallCount);
+        printf(" %3d |", optIndirectCallCount);
+        printf(" %3d |", fgBBcountAtCodegen);
+        printf(" %3d |", lvaCount);
+
+        if (opts.MinOpts())
+        {
+            printf("  MinOpts  |");
+        }
+        else
+        {
+            printf(" %3d |", optAssertionCount);
+#if FEATURE_ANYCSE
+            printf(" %3d |", optCSEcount);
+#else
+            printf(" %3d |", 0);
+#endif // FEATURE_ANYCSE
+        }
+
+#ifndef LEGACY_BACKEND
+        printf(" LSRA    |"); // TODO-Cleanup: dump some interesting LSRA stat into the order file?
+#else // LEGACY_BACKEND
+        printf("%s%4d p%1d |", (tmpCount > 0) ? "T" : " ", rpStkPredict / BB_UNITY_WEIGHT, rpPasses);
+#endif // LEGACY_BACKEND
+        printf(" %4d |", info.compMethodInfo->ILCodeSize);
+        printf(" %5d |", info.compTotalHotCodeSize);
+        printf(" %5d |", info.compTotalColdCodeSize);
+
+        printf(" %s\n", eeGetMethodFullName(info.compMethodHnd));
+        printf(""); // in our logic this causes a flush
+    }
+
+    if (verbose)
+    {
+        printf("****** DONE compiling %s\n", info.compFullName);
+        printf(""); // in our logic this causes a flush
+    }
+
+    // Only call _DbgBreakCheck when we are jitting, not when we are ngen-ing
+    // For ngen the int3 or breakpoint instruction will be right at the
+    // start of the ngen method and we will stop when we execute it.
+    //
+    if ((opts.eeFlags & CORJIT_FLG_PREJIT) == 0)
+    {
+        if (compJitHaltMethod())
+        {
+#if !defined(_TARGET_ARM64_) && !defined(PLATFORM_UNIX)
+            // TODO-ARM64-NYI: re-enable this when we have an OS that supports a pop-up dialog
+
+            // Don't do an assert, but just put up the dialog box so we get just-in-time debugger
+            // launching.  When you hit 'retry' it will continue and naturally stop at the INT 3
+            // that the JIT put in the code
+            _DbgBreakCheck(__FILE__, __LINE__, "JitHalt");
+#endif
+        }
+    }
+#endif // DEBUG
+}
+
+#ifdef PSEUDORANDOM_NOP_INSERTION
+// this is zlib adler32 checksum.  source came from windows base
+
+#define BASE 65521L // largest prime smaller than 65536
+#define NMAX 5552
+// NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
+
+#define DO1(buf, i)                                                                                                    \
+    {                                                                                                                  \
+        s1 += buf[i];                                                                                                  \
+        s2 += s1;                                                                                                      \
+    }
+#define DO2(buf, i)                                                                                                    \
+    DO1(buf, i);                                                                                                       \
+    DO1(buf, i + 1);
+#define DO4(buf, i)                                                                                                    \
+    DO2(buf, i);                                                                                                       \
+    DO2(buf, i + 2);
+#define DO8(buf, i)                                                                                                    \
+    DO4(buf, i);                                                                                                       \
+    DO4(buf, i + 4);
+#define DO16(buf)                                                                                                      \
+    DO8(buf, 0);                                                                                                       \
+    DO8(buf, 8);
+
+unsigned adler32(unsigned adler, char* buf, unsigned int len)
+{
+    unsigned int s1 = adler & 0xffff;
+    unsigned int s2 = (adler >> 16) & 0xffff;
+    int          k;
+
+    if (buf == NULL)
+        return 1L;
+
+    while (len > 0)
+    {
+        k = len < NMAX ? len : NMAX;
+        len -= k;
+        while (k >= 16)
+        {
+            DO16(buf);
+            buf += 16;
+            k -= 16;
+        }
+        if (k != 0)
+            do
+            {
+                s1 += *buf++;
+                s2 += s1;
+            } while (--k);
+        s1 %= BASE;
+        s2 %= BASE;
+    }
+    return (s2 << 16) | s1;
+}
+#endif
+
+unsigned getMethodBodyChecksum(__in_z char* code, int size)
+{
+#ifdef PSEUDORANDOM_NOP_INSERTION
+    return adler32(0, code, size);
+#else
+    return 0;
+#endif
+}
+
+int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE            classPtr,
+                                COMP_HANDLE                      compHnd,
+                                CORINFO_METHOD_INFO*             methodInfo,
+                                void**                           methodCodePtr,
+                                ULONG*                           methodCodeSize,
+                                CORJIT_FLAGS*                    compileFlags,
+                                CorInfoInstantiationVerification instVerInfo)
+{
+    CORINFO_METHOD_HANDLE methodHnd = info.compMethodHnd;
+
+    info.compCode       = methodInfo->ILCode;
+    info.compILCodeSize = methodInfo->ILCodeSize;
+
+    if (info.compILCodeSize == 0)
+    {
+        BADCODE("code size is zero");
+    }
+
+    if (compIsForInlining())
+    {
+#ifdef DEBUG
+        unsigned methAttr_Old  = impInlineInfo->inlineCandidateInfo->methAttr;
+        unsigned methAttr_New  = info.compCompHnd->getMethodAttribs(info.compMethodHnd);
+        unsigned flagsToIgnore = CORINFO_FLG_DONT_INLINE | CORINFO_FLG_FORCEINLINE;
+        assert((methAttr_Old & (~flagsToIgnore)) == (methAttr_New & (~flagsToIgnore)));
+#endif
+
+        info.compFlags = impInlineInfo->inlineCandidateInfo->methAttr;
+    }
+    else
+    {
+        info.compFlags = info.compCompHnd->getMethodAttribs(info.compMethodHnd);
+#ifdef PSEUDORANDOM_NOP_INSERTION
+        info.compChecksum = getMethodBodyChecksum((char*)methodInfo->ILCode, methodInfo->ILCodeSize);
+#endif
+    }
+
+    // compInitOptions will set the correct verbose flag.
+
+    compInitOptions(compileFlags);
+
+#ifdef ALT_JIT
+    if (!compIsForInlining() && !opts.altJit)
+    {
+        // We're an altjit, but the COMPlus_AltJit configuration did not say to compile this method,
+        // so skip it.
+        return CORJIT_SKIPPED;
+    }
+#endif // ALT_JIT
+
+#ifdef DEBUG
+
+    if (verbose)
+    {
+        printf("IL to import:\n");
+        dumpILRange(info.compCode, info.compILCodeSize);
+    }
+
+#endif
+
+    // Check for COMPlus_AgressiveInlining
+    if (JitConfig.JitAggressiveInlining())
+    {
+        compDoAggressiveInlining = true;
+    }
+
+    if (compDoAggressiveInlining)
+    {
+        info.compFlags |= CORINFO_FLG_FORCEINLINE;
+    }
+
+#ifdef DEBUG
+
+    // Check for ForceInline stress.
+    if (compStressCompile(STRESS_FORCE_INLINE, 0))
+    {
+        info.compFlags |= CORINFO_FLG_FORCEINLINE;
+    }
+
+    if (compIsForInlining())
+    {
+        JITLOG((LL_INFO100000, "\nINLINER impTokenLookupContextHandle for %s is 0x%p.\n",
+                eeGetMethodFullName(info.compMethodHnd), dspPtr(impTokenLookupContextHandle)));
+    }
+
+    // Force verification if asked to do so
+    if (JitConfig.JitForceVer())
+    {
+        tiVerificationNeeded = (instVerInfo == INSTVER_NOT_INSTANTIATION);
+    }
+
+    if (tiVerificationNeeded)
+    {
+        JITLOG((LL_INFO10000, "tiVerificationNeeded initially set to true for %s\n", info.compFullName));
+    }
+#endif // DEBUG
+
+    /* Since tiVerificationNeeded can be turned off in the middle of
+       compiling a method, and it might have caused blocks to be queued up
+       for reimporting, impCanReimport can be used to check for reimporting. */
+
+    impCanReimport = (tiVerificationNeeded || compStressCompile(STRESS_CHK_REIMPORT, 15));
+
+    // Need security prolog/epilog callouts when there is a declarative security in the method.
+    tiSecurityCalloutNeeded = ((info.compFlags & CORINFO_FLG_NOSECURITYWRAP) == 0);
+
+    if (tiSecurityCalloutNeeded || (info.compFlags & CORINFO_FLG_SECURITYCHECK))
+    {
+        // We need to allocate the security object on the stack
+        // when the method being compiled has a declarative security
+        // (i.e. when CORINFO_FLG_NOSECURITYWRAP is reset for the current method).
+        // This is also the case when we inject a prolog and epilog in the method.
+        opts.compNeedSecurityCheck = true;
+    }
+
+    /* Initialize set a bunch of global values */
+
+    info.compScopeHnd      = classPtr;
+    info.compXcptnsCount   = methodInfo->EHcount;
+    info.compMaxStack      = methodInfo->maxStack;
+    compHndBBtab           = nullptr;
+    compHndBBtabCount      = 0;
+    compHndBBtabAllocCount = 0;
+
+    info.compNativeCodeSize    = 0;
+    info.compTotalHotCodeSize  = 0;
+    info.compTotalColdCodeSize = 0;
+
+#ifdef DEBUG
+    compCurBB = nullptr;
+    lvaTable  = nullptr;
+
+    // Reset node ID counter
+    compGenTreeID = 0;
+#endif
+
+    /* Initialize emitter */
+
+    if (!compIsForInlining())
+    {
+        codeGen->getEmitter()->emitBegCG(this, compHnd);
+    }
+
+    info.compIsStatic = (info.compFlags & CORINFO_FLG_STATIC) != 0;
+
+    info.compIsContextful = (info.compClassAttr & CORINFO_FLG_CONTEXTFUL) != 0;
+
+    info.compPublishStubParam = (opts.eeFlags & CORJIT_FLG_PUBLISH_SECRET_PARAM) != 0;
+
+    switch (methodInfo->args.getCallConv())
+    {
+        case CORINFO_CALLCONV_VARARG:
+        case CORINFO_CALLCONV_NATIVEVARARG:
+            info.compIsVarArgs = true;
+            break;
+        case CORINFO_CALLCONV_DEFAULT:
+            info.compIsVarArgs = false;
+            break;
+        default:
+            BADCODE("bad calling convention");
+    }
+    info.compRetNativeType = info.compRetType = JITtype2varType(methodInfo->args.retType);
+
+    info.compCallUnmanaged   = 0;
+    info.compLvFrameListRoot = BAD_VAR_NUM;
+
+#if FEATURE_FIXED_OUT_ARGS
+    lvaOutgoingArgSpaceSize = 0;
+#endif
+
+    lvaGenericsContextUsed = false;
+
+    info.compInitMem = ((methodInfo->options & CORINFO_OPT_INIT_LOCALS) != 0);
+
+    /* Allocate the local variable table */
+
+    lvaInitTypeRef();
+
+    if (!compIsForInlining())
+    {
+        compInitDebuggingInfo();
+    }
+
+    const bool forceInline = !!(info.compFlags & CORINFO_FLG_FORCEINLINE);
+
+    if (!compIsForInlining() && (opts.eeFlags & CORJIT_FLG_PREJIT))
+    {
+        // We're prejitting the root method. We also will analyze it as
+        // a potential inline candidate.
+        InlineResult prejitResult(this, methodHnd, "prejit");
+
+        // Do the initial inline screen.
+        impCanInlineIL(methodHnd, methodInfo, forceInline, &prejitResult);
+
+        // Temporarily install the prejitResult as the
+        // compInlineResult so it's available to fgFindJumpTargets
+        // and can accumulate more observations as the IL is
+        // scanned.
+        //
+        // We don't pass prejitResult in as a parameter to avoid
+        // potential aliasing confusion -- the other call to
+        // fgFindBasicBlocks may have set up compInlineResult and
+        // the code in fgFindJumpTargets references that data
+        // member extensively.
+        assert(compInlineResult == nullptr);
+        assert(impInlineInfo == nullptr);
+        compInlineResult = &prejitResult;
+
+        // Find the basic blocks. We must do this regardless of
+        // inlineability, since we are prejitting this method.
+        //
+        // This will also update the status of this method as
+        // an inline candidate.
+        fgFindBasicBlocks();
+
+        // Undo the temporary setup.
+        assert(compInlineResult == &prejitResult);
+        compInlineResult = nullptr;
+
+        // If still a viable, discretionary inline, assess
+        // profitability.
+        if (prejitResult.IsDiscretionaryCandidate())
+        {
+            prejitResult.DetermineProfitability(methodInfo);
+        }
+
+        // Handle the results of the inline analysis.
+        if (prejitResult.IsFailure())
+        {
+            // This method is a bad inlinee according to our
+            // analysis.  We will let the InlineResult destructor
+            // mark it as noinline in the prejit image to save the
+            // jit some work.
+            //
+            // This decision better not be context-dependent.
+            assert(prejitResult.IsNever());
+        }
+        else
+        {
+            // This looks like a viable inline candidate.  Since
+            // we're not actually inlining, don't report anything.
+            prejitResult.SetReported();
+        }
+    }
+    else
+    {
+        // We are jitting the root method, or inlining.
+        fgFindBasicBlocks();
+    }
+
+    // If we're inlining and the candidate is bad, bail out.
+    if (compDonotInline())
+    {
+        goto _Next;
+    }
+
+    compSetOptimizationLevel();
+
+#if COUNT_BASIC_BLOCKS
+    bbCntTable.record(fgBBcount);
+
+    if (fgBBcount == 1)
+    {
+        bbOneBBSizeTable.record(methodInfo->ILCodeSize);
+    }
+#endif // COUNT_BASIC_BLOCKS
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("Basic block list for '%s'\n", info.compFullName);
+        fgDispBasicBlocks();
+    }
+#endif
+
+#ifdef DEBUG
+    /* Give the function a unique number */
+
+    if (opts.disAsm || opts.dspEmit || verbose)
+    {
+        s_compMethodsCount = ~info.compMethodHash() & 0xffff;
+    }
+    else
+    {
+        s_compMethodsCount++;
+    }
+#endif
+
+    if (compIsForInlining())
+    {
+        compInlineResult->NoteInt(InlineObservation::CALLEE_NUMBER_OF_BASIC_BLOCKS, fgBBcount);
+
+        if (compInlineResult->IsFailure())
+        {
+            goto _Next;
+        }
+    }
+
+#ifdef DEBUG
+    if (JitConfig.DumpJittedMethods() == 1 && !compIsForInlining())
+    {
+        printf("Compiling %4d %s::%s, IL size = %u, hsh=0x%x\n", Compiler::jitTotalMethodCompiled, info.compClassName,
+               info.compMethodName, info.compILCodeSize, info.compMethodHash());
+    }
+    if (compIsForInlining())
+    {
+        compGenTreeID = impInlineInfo->InlinerCompiler->compGenTreeID;
+    }
+#endif
+
+    compCompile(methodCodePtr, methodCodeSize, compileFlags);
+
+#ifdef DEBUG
+    if (compIsForInlining())
+    {
+        impInlineInfo->InlinerCompiler->compGenTreeID = compGenTreeID;
+    }
+#endif
+
+_Next:
+
+    if (compDonotInline())
+    {
+        // Verify we have only one inline result in play.
+        assert(impInlineInfo->inlineResult == compInlineResult);
+    }
+
+    if (!compIsForInlining())
+    {
+        compCompileFinish();
+
+        // Did we just compile for a target architecture that the VM isn't expecting? If so, the VM
+        // can't used the generated code (and we better be an AltJit!).
+
+        if (!info.compMatchedVM)
+        {
+            return CORJIT_SKIPPED;
+        }
+
+#ifdef ALT_JIT
+#ifdef DEBUG
+        if (JitConfig.RunAltJitCode() == 0)
+        {
+            return CORJIT_SKIPPED;
+        }
+#endif // DEBUG
+#endif // ALT_JIT
+    }
+
+    /* Success! */
+    return CORJIT_OK;
+}
+
+/*****************************************************************************/
+#ifdef DEBUGGING_SUPPORT
+/*****************************************************************************/
+
+//------------------------------------------------------------------------
+// compFindLocalVarLinear: Linear search for variable's scope containing offset.
+//
+// Arguments:
+//     varNum    The variable number to search for in the array of scopes.
+//     offs      The offset value which should occur within the life of the variable.
+//
+// Return Value:
+//     VarScopeDsc* of a matching variable that contains the offset within its life
+//     begin and life end or nullptr when there is no match found.
+//
+//  Description:
+//     Linear search for matching variables with their life begin and end containing
+//     the offset.
+//     or NULL if one couldn't be found.
+//
+//  Note:
+//     Usually called for scope count = 4. Could be called for values upto 8.
+//
+VarScopeDsc* Compiler::compFindLocalVarLinear(unsigned varNum, unsigned offs)
+{
+    for (unsigned i = 0; i < info.compVarScopesCount; i++)
+    {
+        VarScopeDsc* dsc = &info.compVarScopes[i];
+        if ((dsc->vsdVarNum == varNum) && (dsc->vsdLifeBeg <= offs) && (dsc->vsdLifeEnd > offs))
+        {
+            return dsc;
+        }
+    }
+    return nullptr;
+}
+
+//------------------------------------------------------------------------
+// compFindLocalVar: Search for variable's scope containing offset.
+//
+// Arguments:
+//    varNum    The variable number to search for in the array of scopes.
+//    offs      The offset value which should occur within the life of the variable.
+//
+// Return Value:
+//    VarScopeDsc* of a matching variable that contains the offset within its life
+//    begin and life end.
+//    or NULL if one couldn't be found.
+//
+//  Description:
+//     Linear search for matching variables with their life begin and end containing
+//     the offset only when the scope count is < MAX_LINEAR_FIND_LCL_SCOPELIST,
+//     else use the hashtable lookup.
+//
+VarScopeDsc* Compiler::compFindLocalVar(unsigned varNum, unsigned offs)
+{
+    if (info.compVarScopesCount < MAX_LINEAR_FIND_LCL_SCOPELIST)
+    {
+        return compFindLocalVarLinear(varNum, offs);
+    }
+    else
+    {
+        VarScopeDsc* ret = compFindLocalVar(varNum, offs, offs);
+        assert(ret == compFindLocalVarLinear(varNum, offs));
+        return ret;
+    }
+}
+
+//------------------------------------------------------------------------
+// compFindLocalVar: Search for variable's scope containing offset.
+//
+// Arguments:
+//    varNum    The variable number to search for in the array of scopes.
+//    lifeBeg   The life begin of the variable's scope
+//    lifeEnd   The life end of the variable's scope
+//
+// Return Value:
+//    VarScopeDsc* of a matching variable that contains the offset within its life
+//    begin and life end, or NULL if one couldn't be found.
+//
+//  Description:
+//     Following are the steps used:
+//     1. Index into the hashtable using varNum.
+//     2. Iterate through the linked list at index varNum to find a matching
+//        var scope.
+//
+VarScopeDsc* Compiler::compFindLocalVar(unsigned varNum, unsigned lifeBeg, unsigned lifeEnd)
+{
+    assert(compVarScopeMap != nullptr);
+
+    VarScopeMapInfo* info;
+    if (compVarScopeMap->Lookup(varNum, &info))
+    {
+        VarScopeListNode* list = info->head;
+        while (list != nullptr)
+        {
+            if ((list->data->vsdLifeBeg <= lifeBeg) && (list->data->vsdLifeEnd > lifeEnd))
+            {
+                return list->data;
+            }
+            list = list->next;
+        }
+    }
+    return nullptr;
+}
+
+//-------------------------------------------------------------------------
+// compInitVarScopeMap: Create a scope map so it can be looked up by varNum
+//
+//  Description:
+//     Map.K => Map.V :: varNum => List(ScopeDsc)
+//
+//     Create a scope map that can be indexed by varNum and can be iterated
+//     on it's values to look for matching scope when given an offs or
+//     lifeBeg and lifeEnd.
+//
+//  Notes:
+//     1. Build the map only when we think linear search is slow, i.e.,
+//     MAX_LINEAR_FIND_LCL_SCOPELIST is large.
+//     2. Linked list preserves original array order.
+//
+void Compiler::compInitVarScopeMap()
+{
+    if (info.compVarScopesCount < MAX_LINEAR_FIND_LCL_SCOPELIST)
+    {
+        return;
+    }
+
+    assert(compVarScopeMap == nullptr);
+
+    compVarScopeMap = new (getAllocator()) VarNumToScopeDscMap(getAllocator());
+
+    // 599 prime to limit huge allocations; for ex: duplicated scopes on single var.
+    compVarScopeMap->Reallocate(min(info.compVarScopesCount, 599));
+
+    for (unsigned i = 0; i < info.compVarScopesCount; ++i)
+    {
+        unsigned varNum = info.compVarScopes[i].vsdVarNum;
+
+        VarScopeListNode* node = VarScopeListNode::Create(&info.compVarScopes[i], getAllocator());
+
+        // Index by varNum and if the list exists append "node" to the "list".
+        VarScopeMapInfo* info;
+        if (compVarScopeMap->Lookup(varNum, &info))
+        {
+            info->tail->next = node;
+            info->tail       = node;
+        }
+        // Create a new list.
+        else
+        {
+            info = VarScopeMapInfo::Create(node, getAllocator());
+            compVarScopeMap->Set(varNum, info);
+        }
+    }
+}
+
+static int __cdecl genCmpLocalVarLifeBeg(const void* elem1, const void* elem2)
+{
+    return (*((VarScopeDsc**)elem1))->vsdLifeBeg - (*((VarScopeDsc**)elem2))->vsdLifeBeg;
+}
+
+static int __cdecl genCmpLocalVarLifeEnd(const void* elem1, const void* elem2)
+{
+    return (*((VarScopeDsc**)elem1))->vsdLifeEnd - (*((VarScopeDsc**)elem2))->vsdLifeEnd;
+}
+
+inline void Compiler::compInitScopeLists()
+{
+    if (info.compVarScopesCount == 0)
+    {
+        compEnterScopeList = compExitScopeList = nullptr;
+        return;
+    }
+
+    // Populate the 'compEnterScopeList' and 'compExitScopeList' lists
+
+    compEnterScopeList = new (this, CMK_DebugInfo) VarScopeDsc*[info.compVarScopesCount];
+    compExitScopeList  = new (this, CMK_DebugInfo) VarScopeDsc*[info.compVarScopesCount];
+
+    for (unsigned i = 0; i < info.compVarScopesCount; i++)
+    {
+        compEnterScopeList[i] = compExitScopeList[i] = &info.compVarScopes[i];
+    }
+
+    qsort(compEnterScopeList, info.compVarScopesCount, sizeof(*compEnterScopeList), genCmpLocalVarLifeBeg);
+    qsort(compExitScopeList, info.compVarScopesCount, sizeof(*compExitScopeList), genCmpLocalVarLifeEnd);
+}
+
+void Compiler::compResetScopeLists()
+{
+    if (info.compVarScopesCount == 0)
+    {
+        return;
+    }
+
+    assert(compEnterScopeList && compExitScopeList);
+
+    compNextEnterScope = compNextExitScope = 0;
+}
+
+VarScopeDsc* Compiler::compGetNextEnterScope(unsigned offs, bool scan)
+{
+    assert(info.compVarScopesCount);
+    assert(compEnterScopeList && compExitScopeList);
+
+    if (compNextEnterScope < info.compVarScopesCount)
+    {
+        assert(compEnterScopeList[compNextEnterScope]);
+        unsigned nextEnterOff = compEnterScopeList[compNextEnterScope]->vsdLifeBeg;
+        assert(scan || (offs <= nextEnterOff));
+
+        if (!scan)
+        {
+            if (offs == nextEnterOff)
+            {
+                return compEnterScopeList[compNextEnterScope++];
+            }
+        }
+        else
+        {
+            if (nextEnterOff <= offs)
+            {
+                return compEnterScopeList[compNextEnterScope++];
+            }
+        }
+    }
+
+    return nullptr;
+}
+
+VarScopeDsc* Compiler::compGetNextExitScope(unsigned offs, bool scan)
+{
+    assert(info.compVarScopesCount);
+    assert(compEnterScopeList && compExitScopeList);
+
+    if (compNextExitScope < info.compVarScopesCount)
+    {
+        assert(compExitScopeList[compNextExitScope]);
+        unsigned nextExitOffs = compExitScopeList[compNextExitScope]->vsdLifeEnd;
+        assert(scan || (offs <= nextExitOffs));
+
+        if (!scan)
+        {
+            if (offs == nextExitOffs)
+            {
+                return compExitScopeList[compNextExitScope++];
+            }
+        }
+        else
+        {
+            if (nextExitOffs <= offs)
+            {
+                return compExitScopeList[compNextExitScope++];
+            }
+        }
+    }
+
+    return nullptr;
+}
+
+// The function will call the callback functions for scopes with boundaries
+// at instrs from the current status of the scope lists to 'offset',
+// ordered by instrs.
+
+void Compiler::compProcessScopesUntil(unsigned   offset,
+                                      VARSET_TP* inScope,
+                                      void (Compiler::*enterScopeFn)(VARSET_TP* inScope, VarScopeDsc*),
+                                      void (Compiler::*exitScopeFn)(VARSET_TP* inScope, VarScopeDsc*))
+{
+    assert(offset != BAD_IL_OFFSET);
+    assert(inScope != nullptr);
+
+    bool         foundExit = false, foundEnter = true;
+    VarScopeDsc* scope;
+    VarScopeDsc* nextExitScope  = nullptr;
+    VarScopeDsc* nextEnterScope = nullptr;
+    unsigned     offs = offset, curEnterOffs = 0;
+
+    goto START_FINDING_SCOPES;
+
+    // We need to determine the scopes which are open for the current block.
+    // This loop walks over the missing blocks between the current and the
+    // previous block, keeping the enter and exit offsets in lockstep.
+
+    do
+    {
+        foundExit = foundEnter = false;
+
+        if (nextExitScope)
+        {
+            (this->*exitScopeFn)(inScope, nextExitScope);
+            nextExitScope = nullptr;
+            foundExit     = true;
+        }
+
+        offs = nextEnterScope ? nextEnterScope->vsdLifeBeg : offset;
+
+        while ((scope = compGetNextExitScope(offs, true)) != nullptr)
+        {
+            foundExit = true;
+
+            if (!nextEnterScope || scope->vsdLifeEnd > nextEnterScope->vsdLifeBeg)
+            {
+                // We overshot the last found Enter scope. Save the scope for later
+                // and find an entering scope
+
+                nextExitScope = scope;
+                break;
+            }
+
+            (this->*exitScopeFn)(inScope, scope);
+        }
+
+        if (nextEnterScope)
+        {
+            (this->*enterScopeFn)(inScope, nextEnterScope);
+            curEnterOffs   = nextEnterScope->vsdLifeBeg;
+            nextEnterScope = nullptr;
+            foundEnter     = true;
+        }
+
+        offs = nextExitScope ? nextExitScope->vsdLifeEnd : offset;
+
+    START_FINDING_SCOPES:
+
+        while ((scope = compGetNextEnterScope(offs, true)) != nullptr)
+        {
+            foundEnter = true;
+
+            if ((nextExitScope && scope->vsdLifeBeg >= nextExitScope->vsdLifeEnd) || (scope->vsdLifeBeg > curEnterOffs))
+            {
+                // We overshot the last found exit scope. Save the scope for later
+                // and find an exiting scope
+
+                nextEnterScope = scope;
+                break;
+            }
+
+            (this->*enterScopeFn)(inScope, scope);
+
+            if (!nextExitScope)
+            {
+                curEnterOffs = scope->vsdLifeBeg;
+            }
+        }
+    } while (foundExit || foundEnter);
+}
+
+/*****************************************************************************/
+#endif // DEBUGGING_SUPPORT
+/*****************************************************************************/
+
+#if defined(DEBUGGING_SUPPORT) && defined(DEBUG)
+
+void Compiler::compDispScopeLists()
+{
+    unsigned i;
+
+    printf("Local variable scopes = %d\n", info.compVarScopesCount);
+
+    if (info.compVarScopesCount)
+    {
+        printf("    \tVarNum \tLVNum \t      Name \tBeg \tEnd\n");
+    }
+
+    printf("Sorted by enter scope:\n");
+    for (i = 0; i < info.compVarScopesCount; i++)
+    {
+        VarScopeDsc* varScope = compEnterScopeList[i];
+        assert(varScope);
+        printf("%2d: \t%02Xh \t%02Xh \t%10s \t%03Xh   \t%03Xh", i, varScope->vsdVarNum, varScope->vsdLVnum,
+               VarNameToStr(varScope->vsdName) == nullptr ? "UNKNOWN" : VarNameToStr(varScope->vsdName),
+               varScope->vsdLifeBeg, varScope->vsdLifeEnd);
+
+        if (compNextEnterScope == i)
+        {
+            printf(" <-- next enter scope");
+        }
+
+        printf("\n");
+    }
+
+    printf("Sorted by exit scope:\n");
+    for (i = 0; i < info.compVarScopesCount; i++)
+    {
+        VarScopeDsc* varScope = compExitScopeList[i];
+        assert(varScope);
+        printf("%2d: \t%02Xh \t%02Xh \t%10s \t%03Xh   \t%03Xh", i, varScope->vsdVarNum, varScope->vsdLVnum,
+               VarNameToStr(varScope->vsdName) == nullptr ? "UNKNOWN" : VarNameToStr(varScope->vsdName),
+               varScope->vsdLifeBeg, varScope->vsdLifeEnd);
+
+        if (compNextExitScope == i)
+        {
+            printf(" <-- next exit scope");
+        }
+
+        printf("\n");
+    }
+}
+
+#endif
+
+#if defined(DEBUG)
+
+void Compiler::compDispLocalVars()
+{
+    printf("info.compVarScopesCount = %d\n", info.compVarScopesCount);
+
+    if (info.compVarScopesCount > 0)
+    {
+        printf("    \tVarNum \tLVNum \t      Name \tBeg \tEnd\n");
+    }
+
+    for (unsigned i = 0; i < info.compVarScopesCount; i++)
+    {
+        VarScopeDsc* varScope = &info.compVarScopes[i];
+        printf("%2d: \t%02Xh \t%02Xh \t%10s \t%03Xh   \t%03Xh\n", i, varScope->vsdVarNum, varScope->vsdLVnum,
+               VarNameToStr(varScope->vsdName) == nullptr ? "UNKNOWN" : VarNameToStr(varScope->vsdName),
+               varScope->vsdLifeBeg, varScope->vsdLifeEnd);
+    }
+}
+
+#endif
+
+/*****************************************************************************/
+
+// Compile a single method
+
+int jitNativeCode(CORINFO_METHOD_HANDLE methodHnd,
+                  CORINFO_MODULE_HANDLE classPtr,
+                  COMP_HANDLE           compHnd,
+                  CORINFO_METHOD_INFO*  methodInfo,
+                  void**                methodCodePtr,
+                  ULONG*                methodCodeSize,
+                  CORJIT_FLAGS*         compileFlags,
+                  void*                 inlineInfoPtr)
+{
+    //
+    // A non-NULL inlineInfo means we are compiling the inlinee method.
+    //
+    InlineInfo* inlineInfo = (InlineInfo*)inlineInfoPtr;
+
+    bool jitFallbackCompile = false;
+START:
+    int result = CORJIT_INTERNALERROR;
+
+    ArenaAllocator* pAlloc = nullptr;
+    ArenaAllocator  alloc;
+
+    if (inlineInfo)
+    {
+        // Use inliner's memory allocator when compiling the inlinee.
+        pAlloc = inlineInfo->InlinerCompiler->compGetAllocator();
+    }
+    else
+    {
+        IEEMemoryManager* pMemoryManager = compHnd->getMemoryManager();
+
+        // Try to reuse the pre-inited allocator
+        pAlloc = ArenaAllocator::getPooledAllocator(pMemoryManager);
+
+        if (pAlloc == nullptr)
+        {
+            alloc  = ArenaAllocator(pMemoryManager);
+            pAlloc = &alloc;
+        }
+    }
+
+    Compiler* pComp;
+    pComp = nullptr;
+
+    struct Param
+    {
+        Compiler*       pComp;
+        ArenaAllocator* pAlloc;
+        ArenaAllocator* alloc;
+        bool            jitFallbackCompile;
+
+        CORINFO_METHOD_HANDLE methodHnd;
+        CORINFO_MODULE_HANDLE classPtr;
+        COMP_HANDLE           compHnd;
+        CORINFO_METHOD_INFO*  methodInfo;
+        void**                methodCodePtr;
+        ULONG*                methodCodeSize;
+        CORJIT_FLAGS*         compileFlags;
+        InlineInfo*           inlineInfo;
+
+        int result;
+    } param;
+    param.pComp              = nullptr;
+    param.pAlloc             = pAlloc;
+    param.alloc              = &alloc;
+    param.jitFallbackCompile = jitFallbackCompile;
+    param.methodHnd          = methodHnd;
+    param.classPtr           = classPtr;
+    param.compHnd            = compHnd;
+    param.methodInfo         = methodInfo;
+    param.methodCodePtr      = methodCodePtr;
+    param.methodCodeSize     = methodCodeSize;
+    param.compileFlags       = compileFlags;
+    param.inlineInfo         = inlineInfo;
+    param.result             = result;
+
+    setErrorTrap(compHnd, Param*, pParamOuter, &param)
+    {
+        setErrorTrap(nullptr, Param*, pParam, pParamOuter)
+        {
+            if (pParam->inlineInfo)
+            {
+                // Lazily create the inlinee compiler object
+                if (pParam->inlineInfo->InlinerCompiler->InlineeCompiler == nullptr)
+                {
+                    pParam->inlineInfo->InlinerCompiler->InlineeCompiler =
+                        (Compiler*)pParam->pAlloc->allocateMemory(roundUp(sizeof(*pParam->pComp)));
+                }
+
+                // Use the inlinee compiler object
+                pParam->pComp = pParam->inlineInfo->InlinerCompiler->InlineeCompiler;
+#ifdef DEBUG
+// memset(pParam->pComp, 0xEE, sizeof(Compiler));
+#endif
+            }
+            else
+            {
+                // Allocate create the inliner compiler object
+                pParam->pComp = (Compiler*)pParam->pAlloc->allocateMemory(roundUp(sizeof(*pParam->pComp)));
+            }
+
+            // push this compiler on the stack (TLS)
+            pParam->pComp->prevCompiler = JitTls::GetCompiler();
+            JitTls::SetCompiler(pParam->pComp);
+
+// PREFIX_ASSUME gets turned into ASSERT_CHECK and we cannot have it here
+#if defined(_PREFAST_) || defined(_PREFIX_)
+            PREFIX_ASSUME(pParam->pComp != NULL);
+#else
+            assert(pParam->pComp != nullptr);
+#endif
+
+            pParam->pComp->compInit(pParam->pAlloc, pParam->inlineInfo);
+
+#ifdef DEBUG
+            pParam->pComp->jitFallbackCompile = pParam->jitFallbackCompile;
+#endif
+
+            // Now generate the code
+            pParam->result =
+                pParam->pComp->compCompile(pParam->methodHnd, pParam->classPtr, pParam->compHnd, pParam->methodInfo,
+                                           pParam->methodCodePtr, pParam->methodCodeSize, pParam->compileFlags);
+        }
+        finallyErrorTrap()
+        {
+            // Add a dummy touch to pComp so that it is kept alive, and is easy to get to
+            // during debugging since all other data can be obtained through it.
+            //
+            if (pParamOuter->pComp) // If OOM is thrown when allocating memory for pComp, we will end up here.
+                                    // In that case, pComp is still NULL.
+            {
+                pParamOuter->pComp->info.compCode = nullptr;
+
+                // pop the compiler off the TLS stack only if it was linked above
+                assert(JitTls::GetCompiler() == pParamOuter->pComp);
+                JitTls::SetCompiler(JitTls::GetCompiler()->prevCompiler);
+            }
+
+            if (pParamOuter->inlineInfo == nullptr)
+            {
+                // Free up the allocator we were using
+                pParamOuter->pAlloc->destroy();
+            }
+        }
+        endErrorTrap()
+    }
+    impJitErrorTrap()
+    {
+        // If we were looking at an inlinee....
+        if (inlineInfo != nullptr)
+        {
+            // Note that we failed to compile the inlinee, and that
+            // there's no point trying to inline it again anywhere else.
+            inlineInfo->inlineResult->NoteFatal(InlineObservation::CALLEE_COMPILATION_ERROR);
+        }
+        param.result = __errc;
+    }
+    endErrorTrap()
+
+        result = param.result;
+
+    if (!inlineInfo && (result == CORJIT_INTERNALERROR || result == CORJIT_RECOVERABLEERROR) && !jitFallbackCompile)
+    {
+        // If we failed the JIT, reattempt with debuggable code.
+        jitFallbackCompile = true;
+
+        // Update the flags for 'safer' code generation.
+        compileFlags->corJitFlags |= CORJIT_FLG_MIN_OPT;
+        compileFlags->corJitFlags &= ~(CORJIT_FLG_SIZE_OPT | CORJIT_FLG_SPEED_OPT);
+
+        goto START;
+    }
+
+    return result;
+}
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+// GetTypeFromClassificationAndSizes:
+//   Returns the type of the eightbyte accounting for the classification and size of the eightbyte.
+//
+// args:
+//   classType: classification type
+//   size: size of the eightbyte.
+//
+// static
+var_types Compiler::GetTypeFromClassificationAndSizes(SystemVClassificationType classType, int size)
+{
+    var_types type = TYP_UNKNOWN;
+    switch (classType)
+    {
+        case SystemVClassificationTypeInteger:
+            if (size == 1)
+            {
+                type = TYP_BYTE;
+            }
+            else if (size <= 2)
+            {
+                type = TYP_SHORT;
+            }
+            else if (size <= 4)
+            {
+                type = TYP_INT;
+            }
+            else if (size <= 8)
+            {
+                type = TYP_LONG;
+            }
+            else
+            {
+                assert(false && "GetTypeFromClassificationAndSizes Invalid Integer classification type.");
+            }
+            break;
+        case SystemVClassificationTypeIntegerReference:
+            type = TYP_REF;
+            break;
+        case SystemVClassificationTypeIntegerByRef:
+            type = TYP_BYREF;
+            break;
+        case SystemVClassificationTypeSSE:
+            if (size <= 4)
+            {
+                type = TYP_FLOAT;
+            }
+            else if (size <= 8)
+            {
+                type = TYP_DOUBLE;
+            }
+            else
+            {
+                assert(false && "GetTypeFromClassificationAndSizes Invalid SSE classification type.");
+            }
+            break;
+
+        default:
+            assert(false && "GetTypeFromClassificationAndSizes Invalid classification type.");
+            break;
+    }
+
+    return type;
+}
+
+//-------------------------------------------------------------------
+// GetEightByteType: Returns the type of eightbyte slot of a struct
+//
+// Arguments:
+//   structDesc  -  struct classification description.
+//   slotNum     -  eightbyte slot number for the struct.
+//
+// Return Value:
+//    type of the eightbyte slot of the struct
+//
+// static
+var_types Compiler::GetEightByteType(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDesc,
+                                     unsigned                                                   slotNum)
+{
+    var_types eightByteType = TYP_UNDEF;
+    unsigned  len           = structDesc.eightByteSizes[slotNum];
+
+    switch (structDesc.eightByteClassifications[slotNum])
+    {
+        case SystemVClassificationTypeInteger:
+            // See typelist.h for jit type definition.
+            // All the types of size < 4 bytes are of jit type TYP_INT.
+            if (structDesc.eightByteSizes[slotNum] <= 4)
+            {
+                eightByteType = TYP_INT;
+            }
+            else if (structDesc.eightByteSizes[slotNum] <= 8)
+            {
+                eightByteType = TYP_LONG;
+            }
+            else
+            {
+                assert(false && "GetEightByteType Invalid Integer classification type.");
+            }
+            break;
+        case SystemVClassificationTypeIntegerReference:
+            assert(len == REGSIZE_BYTES);
+            eightByteType = TYP_REF;
+            break;
+        case SystemVClassificationTypeIntegerByRef:
+            assert(len == REGSIZE_BYTES);
+            eightByteType = TYP_BYREF;
+            break;
+        case SystemVClassificationTypeSSE:
+            if (structDesc.eightByteSizes[slotNum] <= 4)
+            {
+                eightByteType = TYP_FLOAT;
+            }
+            else if (structDesc.eightByteSizes[slotNum] <= 8)
+            {
+                eightByteType = TYP_DOUBLE;
+            }
+            else
+            {
+                assert(false && "GetEightByteType Invalid SSE classification type.");
+            }
+            break;
+        default:
+            assert(false && "GetEightByteType Invalid classification type.");
+            break;
+    }
+
+    return eightByteType;
+}
+
+//------------------------------------------------------------------------------------------------------
+// GetStructTypeOffset: Gets the type, size and offset of the eightbytes of a struct for System V systems.
+//
+// Arguments:
+//    'structDesc' -  struct description
+//    'type0'      -  out param; returns the type of the first eightbyte.
+//    'type1'      -  out param; returns the type of the second eightbyte.
+//    'offset0'    -  out param; returns the offset of the first eightbyte.
+//    'offset1'    -  out param; returns the offset of the second eightbyte.
+//
+// static
+void Compiler::GetStructTypeOffset(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDesc,
+                                   var_types*                                                 type0,
+                                   var_types*                                                 type1,
+                                   unsigned __int8*                                           offset0,
+                                   unsigned __int8*                                           offset1)
+{
+    *offset0 = structDesc.eightByteOffsets[0];
+    *offset1 = structDesc.eightByteOffsets[1];
+
+    *type0 = TYP_UNKNOWN;
+    *type1 = TYP_UNKNOWN;
+
+    // Set the first eightbyte data
+    if (structDesc.eightByteCount >= 1)
+    {
+        *type0 = GetEightByteType(structDesc, 0);
+    }
+
+    // Set the second eight byte data
+    if (structDesc.eightByteCount == 2)
+    {
+        *type1 = GetEightByteType(structDesc, 1);
+    }
+}
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+#ifdef DEBUG
+Compiler::NodeToIntMap* Compiler::FindReachableNodesInNodeTestData()
+{
+    NodeToIntMap* reachable = new (getAllocatorDebugOnly()) NodeToIntMap(getAllocatorDebugOnly());
+
+    if (m_nodeTestData == nullptr)
+    {
+        return reachable;
+    }
+
+    // Otherwise, iterate.
+
+    for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+    {
+        for (GenTreePtr stmt = block->FirstNonPhiDef(); stmt != nullptr; stmt = stmt->gtNext)
+        {
+            for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+            {
+                TestLabelAndNum tlAndN;
+
+                // For call nodes, translate late args to what they stand for.
+                if (tree->OperGet() == GT_CALL)
+                {
+                    GenTreeCall*    call = tree->AsCall();
+                    GenTreeArgList* args = call->gtCallArgs;
+                    unsigned        i    = 0;
+                    while (args != nullptr)
+                    {
+                        GenTreePtr arg = args->Current();
+                        if (arg->gtFlags & GTF_LATE_ARG)
+                        {
+                            // Find the corresponding late arg.
+                            GenTreePtr lateArg = nullptr;
+                            for (unsigned j = 0; j < call->fgArgInfo->ArgCount(); j++)
+                            {
+                                if (call->fgArgInfo->ArgTable()[j]->argNum == i)
+                                {
+                                    lateArg = call->fgArgInfo->ArgTable()[j]->node;
+                                    break;
+                                }
+                            }
+                            assert(lateArg != nullptr);
+                            if (GetNodeTestData()->Lookup(lateArg, &tlAndN))
+                            {
+                                reachable->Set(lateArg, 0);
+                            }
+                        }
+                        i++;
+                        args = args->Rest();
+                    }
+                }
+
+                if (GetNodeTestData()->Lookup(tree, &tlAndN))
+                {
+                    reachable->Set(tree, 0);
+                }
+            }
+        }
+    }
+    return reachable;
+}
+
+void Compiler::TransferTestDataToNode(GenTreePtr from, GenTreePtr to)
+{
+    TestLabelAndNum tlAndN;
+    // We can't currently associate multiple annotations with a single node.
+    // If we need to, we can fix this...
+
+    // If the table is null, don't create it just to do the lookup, which would fail...
+    if (m_nodeTestData != nullptr && GetNodeTestData()->Lookup(from, &tlAndN))
+    {
+        assert(!GetNodeTestData()->Lookup(to, &tlAndN));
+        // We can't currently associate multiple annotations with a single node.
+        // If we need to, we can fix this...
+        TestLabelAndNum tlAndNTo;
+        assert(!GetNodeTestData()->Lookup(to, &tlAndNTo));
+
+        GetNodeTestData()->Remove(from);
+        GetNodeTestData()->Set(to, tlAndN);
+    }
+}
+
+void Compiler::CopyTestDataToCloneTree(GenTreePtr from, GenTreePtr to)
+{
+    if (m_nodeTestData == nullptr)
+    {
+        return;
+    }
+    if (from == nullptr)
+    {
+        assert(to == nullptr);
+        return;
+    }
+    // Otherwise...
+    TestLabelAndNum tlAndN;
+    if (GetNodeTestData()->Lookup(from, &tlAndN))
+    {
+        // We can't currently associate multiple annotations with a single node.
+        // If we need to, we can fix this...
+        TestLabelAndNum tlAndNTo;
+        assert(!GetNodeTestData()->Lookup(to, &tlAndNTo));
+        GetNodeTestData()->Set(to, tlAndN);
+    }
+    // Now recurse, in parallel on both trees.
+
+    genTreeOps oper = from->OperGet();
+    unsigned   kind = from->OperKind();
+    assert(oper == to->OperGet());
+
+    // Cconstant or leaf nodes have no children.
+    if (kind & (GTK_CONST | GTK_LEAF))
+    {
+        return;
+    }
+
+    // Otherwise, is it a 'simple' unary/binary operator?
+
+    if (kind & GTK_SMPOP)
+    {
+        if (from->gtOp.gtOp1 != nullptr)
+        {
+            assert(to->gtOp.gtOp1 != nullptr);
+            CopyTestDataToCloneTree(from->gtOp.gtOp1, to->gtOp.gtOp1);
+        }
+        else
+        {
+            assert(to->gtOp.gtOp1 == nullptr);
+        }
+
+        if (from->gtGetOp2() != nullptr)
+        {
+            assert(to->gtGetOp2() != nullptr);
+            CopyTestDataToCloneTree(from->gtGetOp2(), to->gtGetOp2());
+        }
+        else
+        {
+            assert(to->gtGetOp2() == nullptr);
+        }
+
+        return;
+    }
+
+    // Otherwise, see what kind of a special operator we have here.
+
+    switch (oper)
+    {
+        case GT_STMT:
+            CopyTestDataToCloneTree(from->gtStmt.gtStmtExpr, to->gtStmt.gtStmtExpr);
+            return;
+
+        case GT_CALL:
+            CopyTestDataToCloneTree(from->gtCall.gtCallObjp, to->gtCall.gtCallObjp);
+            CopyTestDataToCloneTree(from->gtCall.gtCallArgs, to->gtCall.gtCallArgs);
+            CopyTestDataToCloneTree(from->gtCall.gtCallLateArgs, to->gtCall.gtCallLateArgs);
+
+            if (from->gtCall.gtCallType == CT_INDIRECT)
+            {
+                CopyTestDataToCloneTree(from->gtCall.gtCallCookie, to->gtCall.gtCallCookie);
+                CopyTestDataToCloneTree(from->gtCall.gtCallAddr, to->gtCall.gtCallAddr);
+            }
+            // The other call types do not have additional GenTree arguments.
+
+            return;
+
+        case GT_FIELD:
+            CopyTestDataToCloneTree(from->gtField.gtFldObj, to->gtField.gtFldObj);
+            return;
+
+        case GT_ARR_ELEM:
+            assert(from->gtArrElem.gtArrRank == to->gtArrElem.gtArrRank);
+            for (unsigned dim = 0; dim < from->gtArrElem.gtArrRank; dim++)
+            {
+                CopyTestDataToCloneTree(from->gtArrElem.gtArrInds[dim], to->gtArrElem.gtArrInds[dim]);
+            }
+            CopyTestDataToCloneTree(from->gtArrElem.gtArrObj, to->gtArrElem.gtArrObj);
+            return;
+
+        case GT_CMPXCHG:
+            CopyTestDataToCloneTree(from->gtCmpXchg.gtOpLocation, to->gtCmpXchg.gtOpLocation);
+            CopyTestDataToCloneTree(from->gtCmpXchg.gtOpValue, to->gtCmpXchg.gtOpValue);
+            CopyTestDataToCloneTree(from->gtCmpXchg.gtOpComparand, to->gtCmpXchg.gtOpComparand);
+            return;
+
+        case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+        case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+            CopyTestDataToCloneTree(from->gtBoundsChk.gtArrLen, to->gtBoundsChk.gtArrLen);
+            CopyTestDataToCloneTree(from->gtBoundsChk.gtIndex, to->gtBoundsChk.gtIndex);
+            return;
+
+        default:
+            unreached();
+    }
+}
+
+#endif // DEBUG
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                          jvc                                              XX
+XX                                                                           XX
+XX  Functions for the stand-alone version of the JIT .                       XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************/
+void codeGeneratorCodeSizeBeg()
+{
+}
+/*****************************************************************************/
+
+/*****************************************************************************
+ *
+ *  If any temporary tables are smaller than 'genMinSize2free' we won't bother
+ *  freeing them.
+ */
+
+const size_t genMinSize2free = 64;
+
+/*****************************************************************************/
+
+/*****************************************************************************
+ *
+ *  Used for counting pointer assignments.
+ */
+
+/*****************************************************************************/
+void codeGeneratorCodeSizeEnd()
+{
+}
+/*****************************************************************************
+ *
+ *  Gather statistics - mainly used for the standalone
+ *  Enable various #ifdef's to get the information you need
+ */
+
+void Compiler::compJitStats()
+{
+#if CALL_ARG_STATS
+
+    /* Method types and argument statistics */
+    compCallArgStats();
+#endif // CALL_ARG_STATS
+}
+
+#if CALL_ARG_STATS
+
+/*****************************************************************************
+ *
+ *  Gather statistics about method calls and arguments
+ */
+
+void Compiler::compCallArgStats()
+{
+    GenTreePtr args;
+    GenTreePtr argx;
+
+    BasicBlock* block;
+    GenTreePtr  stmt;
+    GenTreePtr  call;
+
+    unsigned argNum;
+
+    unsigned argDWordNum;
+    unsigned argLngNum;
+    unsigned argFltNum;
+    unsigned argDblNum;
+
+    unsigned regArgNum;
+    unsigned regArgDeferred;
+    unsigned regArgTemp;
+
+    unsigned regArgLclVar;
+    unsigned regArgConst;
+
+    unsigned argTempsThisMethod = 0;
+
+    assert(fgStmtListThreaded);
+
+    for (block = fgFirstBB; block; block = block->bbNext)
+    {
+        for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+        {
+            assert(stmt->gtOper == GT_STMT);
+
+            for (call = stmt->gtStmt.gtStmtList; call; call = call->gtNext)
+            {
+                if (call->gtOper != GT_CALL)
+                    continue;
+
+                argNum =
+
+                    regArgNum = regArgDeferred = regArgTemp =
+
+                        regArgConst = regArgLclVar =
+
+                            argDWordNum = argLngNum = argFltNum = argDblNum = 0;
+
+                argTotalCalls++;
+
+                if (!call->gtCall.gtCallObjp)
+                {
+                    if (call->gtCall.gtCallType == CT_HELPER)
+                    {
+                        argHelperCalls++;
+                    }
+                    else
+                    {
+                        argStaticCalls++;
+                    }
+                }
+                else
+                {
+                    /* We have a 'this' pointer */
+
+                    argDWordNum++;
+                    argNum++;
+                    regArgNum++;
+                    regArgDeferred++;
+                    argTotalObjPtr++;
+
+                    if (call->gtFlags & (GTF_CALL_VIRT_VTABLE | GTF_CALL_VIRT_STUB))
+                    {
+                        /* virtual function */
+                        argVirtualCalls++;
+                    }
+                    else
+                    {
+                        argNonVirtualCalls++;
+                    }
+                }
+
+#ifdef LEGACY_BACKEND
+                // TODO-Cleaenup: We need to add support below for additional node types that RyuJIT backend has in the
+                // IR.
+                // Gather arguments information.
+
+                for (args = call->gtCall.gtCallArgs; args; args = args->gtOp.gtOp2)
+                {
+                    argx = args->gtOp.gtOp1;
+
+                    argNum++;
+
+                    switch (genActualType(argx->TypeGet()))
+                    {
+                        case TYP_INT:
+                        case TYP_REF:
+                        case TYP_BYREF:
+                            argDWordNum++;
+                            break;
+
+                        case TYP_LONG:
+                            argLngNum++;
+                            break;
+
+                        case TYP_FLOAT:
+                            argFltNum++;
+                            break;
+
+                        case TYP_DOUBLE:
+                            argDblNum++;
+                            break;
+
+                        case TYP_VOID:
+                            /* This is a deferred register argument */
+                            assert(argx->gtOper == GT_NOP);
+                            assert(argx->gtFlags & GTF_LATE_ARG);
+                            argDWordNum++;
+                            break;
+                    }
+
+                    /* Is this argument a register argument? */
+
+                    if (argx->gtFlags & GTF_LATE_ARG)
+                    {
+                        regArgNum++;
+
+                        /* We either have a deferred argument or a temp */
+
+                        if (argx->gtOper == GT_NOP)
+                        {
+                            regArgDeferred++;
+                        }
+                        else
+                        {
+                            assert(argx->gtOper == GT_ASG);
+                            regArgTemp++;
+                        }
+                    }
+                }
+
+                /* Look at the register arguments and count how many constants, local vars */
+
+                for (args = call->gtCall.gtCallLateArgs; args; args = args->gtOp.gtOp2)
+                {
+                    argx = args->gtOp.gtOp1;
+
+                    switch (argx->gtOper)
+                    {
+                        case GT_CNS_INT:
+                            regArgConst++;
+                            break;
+
+                        case GT_LCL_VAR:
+                            regArgLclVar++;
+                            break;
+                    }
+                }
+
+                assert(argNum == argDWordNum + argLngNum + argFltNum + argDblNum);
+                assert(regArgNum == regArgDeferred + regArgTemp);
+
+                argTotalArgs += argNum;
+                argTotalRegArgs += regArgNum;
+
+                argTotalDWordArgs += argDWordNum;
+                argTotalLongArgs += argLngNum;
+                argTotalFloatArgs += argFltNum;
+                argTotalDoubleArgs += argDblNum;
+
+                argTotalDeferred += regArgDeferred;
+                argTotalTemps += regArgTemp;
+                argTotalConst += regArgConst;
+                argTotalLclVar += regArgLclVar;
+
+                argTempsThisMethod += regArgTemp;
+
+                argCntTable.record(argNum);
+                argDWordCntTable.record(argDWordNum);
+                argDWordLngCntTable.record(argDWordNum + (2 * argLngNum));
+#endif // LEGACY_BACKEND
+            }
+        }
+    }
+
+    argTempsCntTable.record(argTempsThisMethod);
+
+    if (argMaxTempsPerMethod < argTempsThisMethod)
+    {
+        argMaxTempsPerMethod = argTempsThisMethod;
+    }
+}
+
+/* static */
+void Compiler::compDispCallArgStats(FILE* fout)
+{
+    if (argTotalCalls == 0)
+        return;
+
+    fprintf(fout, "\n");
+    fprintf(fout, "--------------------------------------------------\n");
+    fprintf(fout, "Call stats\n");
+    fprintf(fout, "--------------------------------------------------\n");
+    fprintf(fout, "Total # of calls = %d, calls / method = %.3f\n\n", argTotalCalls,
+            (float)argTotalCalls / genMethodCnt);
+
+    fprintf(fout, "Percentage of      helper calls = %4.2f %%\n", (float)(100 * argHelperCalls) / argTotalCalls);
+    fprintf(fout, "Percentage of      static calls = %4.2f %%\n", (float)(100 * argStaticCalls) / argTotalCalls);
+    fprintf(fout, "Percentage of     virtual calls = %4.2f %%\n", (float)(100 * argVirtualCalls) / argTotalCalls);
+    fprintf(fout, "Percentage of non-virtual calls = %4.2f %%\n\n", (float)(100 * argNonVirtualCalls) / argTotalCalls);
+
+    fprintf(fout, "Average # of arguments per call = %.2f%%\n\n", (float)argTotalArgs / argTotalCalls);
+
+    fprintf(fout, "Percentage of DWORD  arguments   = %.2f %%\n", (float)(100 * argTotalDWordArgs) / argTotalArgs);
+    fprintf(fout, "Percentage of LONG   arguments   = %.2f %%\n", (float)(100 * argTotalLongArgs) / argTotalArgs);
+    fprintf(fout, "Percentage of FLOAT  arguments   = %.2f %%\n", (float)(100 * argTotalFloatArgs) / argTotalArgs);
+    fprintf(fout, "Percentage of DOUBLE arguments   = %.2f %%\n\n", (float)(100 * argTotalDoubleArgs) / argTotalArgs);
+
+    if (argTotalRegArgs == 0)
+        return;
+
+    /*
+        fprintf(fout, "Total deferred arguments     = %d \n", argTotalDeferred);
+
+        fprintf(fout, "Total temp arguments         = %d \n\n", argTotalTemps);
+
+        fprintf(fout, "Total 'this' arguments       = %d \n", argTotalObjPtr);
+        fprintf(fout, "Total local var arguments    = %d \n", argTotalLclVar);
+        fprintf(fout, "Total constant arguments     = %d \n\n", argTotalConst);
+    */
+
+    fprintf(fout, "\nRegister Arguments:\n\n");
+
+    fprintf(fout, "Percentage of deferred arguments = %.2f %%\n", (float)(100 * argTotalDeferred) / argTotalRegArgs);
+    fprintf(fout, "Percentage of temp arguments     = %.2f %%\n\n", (float)(100 * argTotalTemps) / argTotalRegArgs);
+
+    fprintf(fout, "Maximum # of temps per method    = %d\n\n", argMaxTempsPerMethod);
+
+    fprintf(fout, "Percentage of ObjPtr arguments   = %.2f %%\n", (float)(100 * argTotalObjPtr) / argTotalRegArgs);
+    // fprintf(fout, "Percentage of global arguments   = %.2f %%\n", (float)(100 * argTotalDWordGlobEf) /
+    // argTotalRegArgs);
+    fprintf(fout, "Percentage of constant arguments = %.2f %%\n", (float)(100 * argTotalConst) / argTotalRegArgs);
+    fprintf(fout, "Percentage of lcl var arguments  = %.2f %%\n\n", (float)(100 * argTotalLclVar) / argTotalRegArgs);
+
+    fprintf(fout, "--------------------------------------------------\n");
+    fprintf(fout, "Argument count frequency table (includes ObjPtr):\n");
+    fprintf(fout, "--------------------------------------------------\n");
+    argCntTable.dump(fout);
+    fprintf(fout, "--------------------------------------------------\n");
+
+    fprintf(fout, "--------------------------------------------------\n");
+    fprintf(fout, "DWORD argument count frequency table (w/o LONG):\n");
+    fprintf(fout, "--------------------------------------------------\n");
+    argDWordCntTable.dump(fout);
+    fprintf(fout, "--------------------------------------------------\n");
+
+    fprintf(fout, "--------------------------------------------------\n");
+    fprintf(fout, "Temps count frequency table (per method):\n");
+    fprintf(fout, "--------------------------------------------------\n");
+    argTempsCntTable.dump(fout);
+    fprintf(fout, "--------------------------------------------------\n");
+
+    /*
+        fprintf(fout, "--------------------------------------------------\n");
+        fprintf(fout, "DWORD argument count frequency table (w/ LONG):\n");
+        fprintf(fout, "--------------------------------------------------\n");
+        argDWordLngCntTable.dump(fout);
+        fprintf(fout, "--------------------------------------------------\n");
+    */
+}
+
+#endif // CALL_ARG_STATS
+
+// JIT time end to end, and by phases.
+
+#ifdef FEATURE_JIT_METHOD_PERF
+// Static variables
+CritSecObject       CompTimeSummaryInfo::s_compTimeSummaryLock;
+CompTimeSummaryInfo CompTimeSummaryInfo::s_compTimeSummary;
+#endif // FEATURE_JIT_METHOD_PERF
+
+#if defined(FEATURE_JIT_METHOD_PERF) || DUMP_FLOWGRAPHS
+const char* PhaseNames[] = {
+#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent) string_nm,
+#include "compphases.h"
+};
+
+const char* PhaseEnums[] = {
+#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent) #enum_nm,
+#include "compphases.h"
+};
+
+const LPCWSTR PhaseShortNames[] = {
+#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent) W(short_nm),
+#include "compphases.h"
+};
+#endif // defined(FEATURE_JIT_METHOD_PERF) || DUMP_FLOWGRAPHS
+
+#ifdef FEATURE_JIT_METHOD_PERF
+bool PhaseHasChildren[] = {
+#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent) hasChildren,
+#include "compphases.h"
+};
+
+int PhaseParent[] = {
+#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent) parent,
+#include "compphases.h"
+};
+
+CompTimeInfo::CompTimeInfo(unsigned byteCodeBytes)
+    : m_byteCodeBytes(byteCodeBytes), m_totalCycles(0), m_parentPhaseEndSlop(0), m_timerFailure(false)
+{
+    for (int i = 0; i < PHASE_NUMBER_OF; i++)
+    {
+        m_invokesByPhase[i] = 0;
+        m_cyclesByPhase[i]  = 0;
+    }
+}
+
+bool CompTimeSummaryInfo::IncludedInFilteredData(CompTimeInfo& info)
+{
+    return false; // info.m_byteCodeBytes < 10;
+}
+
+void CompTimeSummaryInfo::AddInfo(CompTimeInfo& info)
+{
+    if (info.m_timerFailure)
+        return; // Don't update if there was a failure.
+
+    CritSecHolder timeLock(s_compTimeSummaryLock);
+    m_numMethods++;
+
+    bool includeInFiltered = IncludedInFilteredData(info);
+
+    // Update the totals and maxima.
+    m_total.m_byteCodeBytes += info.m_byteCodeBytes;
+    m_maximum.m_byteCodeBytes = max(m_maximum.m_byteCodeBytes, info.m_byteCodeBytes);
+    m_total.m_totalCycles += info.m_totalCycles;
+    m_maximum.m_totalCycles = max(m_maximum.m_totalCycles, info.m_totalCycles);
+
+    if (includeInFiltered)
+    {
+        m_numFilteredMethods++;
+        m_filtered.m_byteCodeBytes += info.m_byteCodeBytes;
+        m_filtered.m_totalCycles += info.m_totalCycles;
+        m_filtered.m_parentPhaseEndSlop += info.m_parentPhaseEndSlop;
+    }
+
+    for (int i = 0; i < PHASE_NUMBER_OF; i++)
+    {
+        m_total.m_invokesByPhase[i] += info.m_invokesByPhase[i];
+        m_total.m_cyclesByPhase[i] += info.m_cyclesByPhase[i];
+        if (includeInFiltered)
+        {
+            m_filtered.m_invokesByPhase[i] += info.m_invokesByPhase[i];
+            m_filtered.m_cyclesByPhase[i] += info.m_cyclesByPhase[i];
+        }
+        m_maximum.m_cyclesByPhase[i] = max(m_maximum.m_cyclesByPhase[i], info.m_cyclesByPhase[i]);
+    }
+    m_total.m_parentPhaseEndSlop += info.m_parentPhaseEndSlop;
+    m_maximum.m_parentPhaseEndSlop = max(m_maximum.m_parentPhaseEndSlop, info.m_parentPhaseEndSlop);
+}
+
+// Static
+LPCWSTR Compiler::compJitTimeLogFilename = NULL;
+
+void CompTimeSummaryInfo::Print(FILE* f)
+{
+    if (f == NULL)
+        return;
+    // Otherwise...
+    double countsPerSec = CycleTimer::CyclesPerSecond();
+    if (countsPerSec == 0.0)
+    {
+        fprintf(f, "Processor does not have a high-frequency timer.\n");
+        return;
+    }
+
+    fprintf(f, "JIT Compilation time report:\n");
+    fprintf(f, "  Compiled %d methods.\n", m_numMethods);
+    if (m_numMethods != 0)
+    {
+        fprintf(f, "  Compiled %d bytecodes total (%d max, %8.2f avg).\n", m_total.m_byteCodeBytes,
+                m_maximum.m_byteCodeBytes, (double)m_total.m_byteCodeBytes / (double)m_numMethods);
+        double totTime_ms = ((double)m_total.m_totalCycles / countsPerSec) * 1000.0;
+        fprintf(f, "  Time: total: %10.3f Mcycles/%10.3f ms\n", ((double)m_total.m_totalCycles / 1000000.0),
+                totTime_ms);
+        fprintf(f, "          max: %10.3f Mcycles/%10.3f ms\n", ((double)m_maximum.m_totalCycles) / 1000000.0,
+                ((double)m_maximum.m_totalCycles / countsPerSec) * 1000.0);
+        fprintf(f, "          avg: %10.3f Mcycles/%10.3f ms\n",
+                ((double)m_total.m_totalCycles) / 1000000.0 / (double)m_numMethods, totTime_ms / (double)m_numMethods);
+
+        fprintf(f, "  Total time by phases:\n");
+        fprintf(f, "     PHASE                            inv/meth Mcycles    time (ms)  %% of total    max (ms)\n");
+        fprintf(f, "     --------------------------------------------------------------------------------------\n");
+        // Ensure that at least the names array and the Phases enum have the same number of entries:
+        assert(sizeof(PhaseNames) / sizeof(const char*) == PHASE_NUMBER_OF);
+        for (int i = 0; i < PHASE_NUMBER_OF; i++)
+        {
+            double phase_tot_ms = (((double)m_total.m_cyclesByPhase[i]) / countsPerSec) * 1000.0;
+            double phase_max_ms = (((double)m_maximum.m_cyclesByPhase[i]) / countsPerSec) * 1000.0;
+            // Indent nested phases, according to depth.
+            int ancPhase = PhaseParent[i];
+            while (ancPhase != -1)
+            {
+                fprintf(f, "  ");
+                ancPhase = PhaseParent[ancPhase];
+            }
+            fprintf(f, "     %-30s  %5.2f  %10.2f   %9.3f   %8.2f%%    %8.3f\n", PhaseNames[i],
+                    ((double)m_total.m_invokesByPhase[i]) / ((double)m_numMethods),
+                    ((double)m_total.m_cyclesByPhase[i]) / 1000000.0, phase_tot_ms, (phase_tot_ms * 100.0 / totTime_ms),
+                    phase_max_ms);
+        }
+        fprintf(f, "\n  'End phase slop' should be very small (if not, there's unattributed time): %9.3f Mcycles.\n",
+                m_total.m_parentPhaseEndSlop);
+    }
+    if (m_numFilteredMethods > 0)
+    {
+        fprintf(f, "  Compiled %d methods that meet the filter requirement.\n", m_numFilteredMethods);
+        fprintf(f, "  Compiled %d bytecodes total (%8.2f avg).\n", m_filtered.m_byteCodeBytes,
+                (double)m_filtered.m_byteCodeBytes / (double)m_numFilteredMethods);
+        double totTime_ms = ((double)m_filtered.m_totalCycles / countsPerSec) * 1000.0;
+        fprintf(f, "  Time: total: %10.3f Mcycles/%10.3f ms\n", ((double)m_filtered.m_totalCycles / 1000000.0),
+                totTime_ms);
+        fprintf(f, "          avg: %10.3f Mcycles/%10.3f ms\n",
+                ((double)m_filtered.m_totalCycles) / 1000000.0 / (double)m_numFilteredMethods,
+                totTime_ms / (double)m_numFilteredMethods);
+
+        fprintf(f, "  Total time by phases:\n");
+        fprintf(f, "     PHASE                            inv/meth Mcycles    time (ms)  %% of total\n");
+        fprintf(f, "     --------------------------------------------------------------------------------------\n");
+        // Ensure that at least the names array and the Phases enum have the same number of entries:
+        assert(sizeof(PhaseNames) / sizeof(const char*) == PHASE_NUMBER_OF);
+        for (int i = 0; i < PHASE_NUMBER_OF; i++)
+        {
+            double phase_tot_ms = (((double)m_filtered.m_cyclesByPhase[i]) / countsPerSec) * 1000.0;
+            // Indent nested phases, according to depth.
+            int ancPhase = PhaseParent[i];
+            while (ancPhase != -1)
+            {
+                fprintf(f, "  ");
+                ancPhase = PhaseParent[ancPhase];
+            }
+            fprintf(f, "     %-30s  %5.2f  %10.2f   %9.3f   %8.2f%%\n", PhaseNames[i],
+                    ((double)m_filtered.m_invokesByPhase[i]) / ((double)m_numFilteredMethods),
+                    ((double)m_filtered.m_cyclesByPhase[i]) / 1000000.0, phase_tot_ms,
+                    (phase_tot_ms * 100.0 / totTime_ms));
+        }
+        fprintf(f, "\n  'End phase slop' should be very small (if not, there's unattributed time): %9.3f Mcycles.\n",
+                m_filtered.m_parentPhaseEndSlop);
+    }
+}
+
+JitTimer::JitTimer(unsigned byteCodeSize) : m_info(byteCodeSize)
+{
+#ifdef DEBUG
+    m_lastPhase = (Phases)-1;
+#endif
+
+    unsigned __int64 threadCurCycles;
+    if (GetThreadCycles(&threadCurCycles))
+    {
+        m_start         = threadCurCycles;
+        m_curPhaseStart = threadCurCycles;
+    }
+}
+
+void JitTimer::EndPhase(Phases phase)
+{
+    // Otherwise...
+    // We re-run some phases currently, so this following assert doesn't work.
+    // assert((int)phase > (int)m_lastPhase);  // We should end phases in increasing order.
+
+    unsigned __int64 threadCurCycles;
+    if (GetThreadCycles(&threadCurCycles))
+    {
+        unsigned __int64 phaseCycles = (threadCurCycles - m_curPhaseStart);
+        // If this is not a leaf phase, the assumption is that the last subphase must have just recently ended.
+        // Credit the duration to "slop", the total of which should be very small.
+        if (PhaseHasChildren[phase])
+        {
+            m_info.m_parentPhaseEndSlop += phaseCycles;
+        }
+        else
+        {
+            // It is a leaf phase.  Credit duration to it.
+            m_info.m_invokesByPhase[phase]++;
+            m_info.m_cyclesByPhase[phase] += phaseCycles;
+            // Credit the phase's ancestors, if any.
+            int ancPhase = PhaseParent[phase];
+            while (ancPhase != -1)
+            {
+                m_info.m_cyclesByPhase[ancPhase] += phaseCycles;
+                ancPhase = PhaseParent[ancPhase];
+            }
+            // Did we just end the last phase?
+            if (phase + 1 == PHASE_NUMBER_OF)
+            {
+                m_info.m_totalCycles = (threadCurCycles - m_start);
+            }
+            else
+            {
+                m_curPhaseStart = threadCurCycles;
+            }
+        }
+    }
+#ifdef DEBUG
+    m_lastPhase = phase;
+#endif
+}
+
+CritSecObject JitTimer::s_csvLock;
+
+LPCWSTR Compiler::JitTimeLogCsv()
+{
+    LPCWSTR jitTimeLogCsv = JitConfig.JitTimeLogCsv();
+    return jitTimeLogCsv;
+}
+
+void JitTimer::PrintCsvHeader()
+{
+    LPCWSTR jitTimeLogCsv = Compiler::JitTimeLogCsv();
+    if (jitTimeLogCsv == NULL)
+    {
+        return;
+    }
+
+    CritSecHolder csvLock(s_csvLock);
+
+    FILE* fp = _wfopen(jitTimeLogCsv, W("r"));
+    if (fp == nullptr)
+    {
+        // File doesn't exist, so create it and write the header
+
+        // Use write mode, so we rewrite the file, and retain only the last compiled process/dll.
+        // Ex: ngen install mscorlib won't print stats for "ngen" but for "mscorsvw"
+        FILE* fp = _wfopen(jitTimeLogCsv, W("w"));
+        fprintf(fp, "\"Method Name\",");
+        fprintf(fp, "\"Method Index\",");
+        fprintf(fp, "\"IL Bytes\",");
+        fprintf(fp, "\"Basic Blocks\",");
+        fprintf(fp, "\"Opt Level\",");
+        fprintf(fp, "\"Loops Cloned\",");
+
+        for (int i = 0; i < PHASE_NUMBER_OF; i++)
+        {
+            fprintf(fp, "\"%s\",", PhaseNames[i]);
+        }
+
+        InlineStrategy::DumpCsvHeader(fp);
+
+        fprintf(fp, "\"Total Cycles\",");
+        fprintf(fp, "\"CPS\"\n");
+    }
+    fclose(fp);
+}
+
+extern ICorJitHost* g_jitHost;
+
+void JitTimer::PrintCsvMethodStats(Compiler* comp)
+{
+    LPCWSTR jitTimeLogCsv = Compiler::JitTimeLogCsv();
+    if (jitTimeLogCsv == NULL)
+    {
+        return;
+    }
+
+    // eeGetMethodFullName uses locks, so don't enter crit sec before this call.
+    const char* methName = comp->eeGetMethodFullName(comp->info.compMethodHnd);
+
+    // Try and access the SPMI index to report in the data set.
+    //
+    // If the jit is not hosted under SPMI this will return the
+    // default value of zero.
+    //
+    // Query the jit host directly here instead of going via the
+    // config cache, since value will change for each method.
+    int index = g_jitHost->getIntConfigValue(W("SuperPMIMethodContextNumber"), 0);
+
+    CritSecHolder csvLock(s_csvLock);
+
+    FILE* fp = _wfopen(jitTimeLogCsv, W("a"));
+    fprintf(fp, "\"%s\",", methName);
+    fprintf(fp, "%d,", index);
+    fprintf(fp, "%u,", comp->info.compILCodeSize);
+    fprintf(fp, "%u,", comp->fgBBcount);
+    fprintf(fp, "%u,", comp->opts.MinOpts());
+    fprintf(fp, "%u,", comp->optLoopsCloned);
+    unsigned __int64 totCycles = 0;
+    for (int i = 0; i < PHASE_NUMBER_OF; i++)
+    {
+        if (!PhaseHasChildren[i])
+            totCycles += m_info.m_cyclesByPhase[i];
+        fprintf(fp, "%I64u,", m_info.m_cyclesByPhase[i]);
+    }
+
+    comp->m_inlineStrategy->DumpCsvData(fp);
+
+    fprintf(fp, "%I64u,", m_info.m_totalCycles);
+    fprintf(fp, "%f\n", CycleTimer::CyclesPerSecond());
+    fclose(fp);
+}
+
+// Completes the timing of the current method, and adds it to "sum".
+void JitTimer::Terminate(Compiler* comp, CompTimeSummaryInfo& sum)
+{
+#ifdef DEBUG
+    unsigned __int64 totCycles2 = 0;
+    for (int i = 0; i < PHASE_NUMBER_OF; i++)
+    {
+        if (!PhaseHasChildren[i])
+            totCycles2 += m_info.m_cyclesByPhase[i];
+    }
+    // We include m_parentPhaseEndSlop in the next phase's time also (we probably shouldn't)
+    // totCycles2 += m_info.m_parentPhaseEndSlop;
+    assert(totCycles2 == m_info.m_totalCycles);
+#endif
+
+    PrintCsvMethodStats(comp);
+
+    sum.AddInfo(m_info);
+}
+#endif // FEATURE_JIT_METHOD_PERF
+
+#if MEASURE_MEM_ALLOC
+// static vars.
+CritSecObject               Compiler::s_memStatsLock;    // Default constructor.
+Compiler::AggregateMemStats Compiler::s_aggMemStats;     // Default constructor.
+Compiler::MemStats          Compiler::s_maxCompMemStats; // Default constructor.
+
+const char* Compiler::MemStats::s_CompMemKindNames[] = {
+#define CompMemKindMacro(kind) #kind,
+#include "compmemkind.h"
+};
+
+void Compiler::MemStats::Print(FILE* f)
+{
+    fprintf(f, "count: %10u, size: %10llu, max = %10llu\n", allocCnt, allocSz, allocSzMax);
+    fprintf(f, "allocateMemory: %10llu, nraUsed: %10llu\n", nraTotalSizeAlloc, nraTotalSizeUsed);
+    PrintByKind(f);
+}
+
+void Compiler::MemStats::PrintByKind(FILE* f)
+{
+    fprintf(f, "\nAlloc'd bytes by kind:\n  %20s | %10s | %7s\n", "kind", "size", "pct");
+    fprintf(f, "  %20s-+-%10s-+-%7s\n", "--------------------", "----------", "-------");
+    float allocSzF = static_cast<float>(allocSz);
+    for (int cmk = 0; cmk < CMK_Count; cmk++)
+    {
+        float pct = 100.0f * static_cast<float>(allocSzByKind[cmk]) / allocSzF;
+        fprintf(f, "  %20s | %10llu | %6.2f%%\n", s_CompMemKindNames[cmk], allocSzByKind[cmk], pct);
+    }
+    fprintf(f, "\n");
+}
+
+void Compiler::AggregateMemStats::Print(FILE* f)
+{
+    fprintf(f, "For %9u methods:\n", nMethods);
+    fprintf(f, "  count:       %12u (avg %7u per method)\n", allocCnt, allocCnt / nMethods);
+    fprintf(f, "  alloc size : %12llu (avg %7llu per method)\n", allocSz, allocSz / nMethods);
+    fprintf(f, "  max alloc  : %12llu\n", allocSzMax);
+    fprintf(f, "\n");
+    fprintf(f, "  allocateMemory   : %12llu (avg %7llu per method)\n", nraTotalSizeAlloc, nraTotalSizeAlloc / nMethods);
+    fprintf(f, "  nraUsed    : %12llu (avg %7llu per method)\n", nraTotalSizeUsed, nraTotalSizeUsed / nMethods);
+    PrintByKind(f);
+}
+#endif // MEASURE_MEM_ALLOC
+
+#if LOOP_HOIST_STATS
+// Static fields.
+CritSecObject Compiler::s_loopHoistStatsLock; // Default constructor.
+unsigned      Compiler::s_loopsConsidered             = 0;
+unsigned      Compiler::s_loopsWithHoistedExpressions = 0;
+unsigned      Compiler::s_totalHoistedExpressions     = 0;
+
+// static
+void Compiler::PrintAggregateLoopHoistStats(FILE* f)
+{
+    fprintf(f, "\n");
+    fprintf(f, "---------------------------------------------------\n");
+    fprintf(f, "Loop hoisting stats\n");
+    fprintf(f, "---------------------------------------------------\n");
+
+    double pctWithHoisted = 0.0;
+    if (s_loopsConsidered > 0)
+    {
+        pctWithHoisted = 100.0 * (double(s_loopsWithHoistedExpressions) / double(s_loopsConsidered));
+    }
+    double exprsPerLoopWithExpr = 0.0;
+    if (s_loopsWithHoistedExpressions > 0)
+    {
+        exprsPerLoopWithExpr = double(s_totalHoistedExpressions) / double(s_loopsWithHoistedExpressions);
+    }
+    fprintf(f, "Considered %d loops.  Of these, we hoisted expressions out of %d (%6.2f%%).\n", s_loopsConsidered,
+            s_loopsWithHoistedExpressions, pctWithHoisted);
+    fprintf(f, "  A total of %d expressions were hoisted, an average of %5.2f per loop-with-hoisted-expr.\n",
+            s_totalHoistedExpressions, exprsPerLoopWithExpr);
+}
+
+void Compiler::AddLoopHoistStats()
+{
+    CritSecHolder statsLock(s_loopHoistStatsLock);
+
+    s_loopsConsidered += m_loopsConsidered;
+    s_loopsWithHoistedExpressions += m_loopsWithHoistedExpressions;
+    s_totalHoistedExpressions += m_totalHoistedExpressions;
+}
+
+void Compiler::PrintPerMethodLoopHoistStats()
+{
+    double pctWithHoisted = 0.0;
+    if (m_loopsConsidered > 0)
+    {
+        pctWithHoisted = 100.0 * (double(m_loopsWithHoistedExpressions) / double(m_loopsConsidered));
+    }
+    double exprsPerLoopWithExpr = 0.0;
+    if (m_loopsWithHoistedExpressions > 0)
+    {
+        exprsPerLoopWithExpr = double(m_totalHoistedExpressions) / double(m_loopsWithHoistedExpressions);
+    }
+    printf("Considered %d loops.  Of these, we hoisted expressions out of %d (%5.2f%%).\n", m_loopsConsidered,
+           m_loopsWithHoistedExpressions, pctWithHoisted);
+    printf("  A total of %d expressions were hoisted, an average of %5.2f per loop-with-hoisted-expr.\n",
+           m_totalHoistedExpressions, exprsPerLoopWithExpr);
+}
+#endif // LOOP_HOIST_STATS
+
+//------------------------------------------------------------------------
+// RecordStateAtEndOfInlining: capture timing data (if enabled) after
+// inlining as completed.
+//
+// Note:
+// Records data needed for SQM and inlining data dumps.  Should be
+// called after inlining is complete.  (We do this after inlining
+// because this marks the last point at which the JIT is likely to
+// cause type-loading and class initialization).
+
+void Compiler::RecordStateAtEndOfInlining()
+{
+#if defined(DEBUG) || defined(INLINE_DATA) || defined(FEATURE_CLRSQM)
+
+    m_compCyclesAtEndOfInlining    = 0;
+    m_compTickCountAtEndOfInlining = 0;
+    bool b                         = CycleTimer::GetThreadCyclesS(&m_compCyclesAtEndOfInlining);
+    if (!b)
+    {
+        return; // We don't have a thread cycle counter.
+    }
+    m_compTickCountAtEndOfInlining = GetTickCount();
+
+#endif // defined(DEBUG) || defined(INLINE_DATA) || defined(FEATURE_CLRSQM)
+}
+
+//------------------------------------------------------------------------
+// RecordStateAtEndOfCompilation: capture timing data (if enabled) after
+// compilation is completed.
+
+void Compiler::RecordStateAtEndOfCompilation()
+{
+#if defined(DEBUG) || defined(INLINE_DATA) || defined(FEATURE_CLRSQM)
+
+    // Common portion
+    m_compCycles = 0;
+    unsigned __int64 compCyclesAtEnd;
+    bool             b = CycleTimer::GetThreadCyclesS(&compCyclesAtEnd);
+    if (!b)
+    {
+        return; // We don't have a thread cycle counter.
+    }
+    assert(compCyclesAtEnd >= m_compCyclesAtEndOfInlining);
+
+    m_compCycles = compCyclesAtEnd - m_compCyclesAtEndOfInlining;
+
+#endif // defined(DEBUG) || defined(INLINE_DATA) || defined(FEATURE_CLRSQM)
+
+#ifdef FEATURE_CLRSQM
+
+    // SQM only portion
+    unsigned __int64 mcycles64 = m_compCycles / ((unsigned __int64)1000000);
+    unsigned         mcycles;
+    if (mcycles64 > UINT32_MAX)
+    {
+        mcycles = UINT32_MAX;
+    }
+    else
+    {
+        mcycles = (unsigned)mcycles64;
+    }
+
+    DWORD ticksAtEnd = GetTickCount();
+    assert(ticksAtEnd >= m_compTickCountAtEndOfInlining);
+    DWORD compTicks = ticksAtEnd - m_compTickCountAtEndOfInlining;
+
+    if (mcycles >= 1000)
+    {
+        info.compCompHnd->logSQMLongJitEvent(mcycles, compTicks, info.compILCodeSize, fgBBcount, opts.MinOpts(),
+                                             info.compMethodHnd);
+    }
+
+#endif // FEATURE_CLRSQM
+}
+
+#if FUNC_INFO_LOGGING
+// static
+LPCWSTR Compiler::compJitFuncInfoFilename = nullptr;
+
+// static
+FILE* Compiler::compJitFuncInfoFile = nullptr;
+#endif // FUNC_INFO_LOGGING
+
+#ifdef DEBUG
+
+// dumpConvertedVarSet() is just like dumpVarSet(), except we assume the varset bits are tracked
+// variable indices, and we convert them to variable numbers, sort the variable numbers, and
+// print them as variable numbers. To do this, we use a temporary set indexed by
+// variable number. We can't use the "all varset" type because it is still size-limited, and might
+// not be big enough to handle all possible variable numbers.
+void dumpConvertedVarSet(Compiler* comp, VARSET_VALARG_TP vars)
+{
+    BYTE* pVarNumSet; // trivial set: one byte per varNum, 0 means not in set, 1 means in set.
+
+    size_t varNumSetBytes = comp->lvaCount * sizeof(BYTE);
+    pVarNumSet            = (BYTE*)_alloca(varNumSetBytes);
+    memset(pVarNumSet, 0, varNumSetBytes); // empty the set
+
+    VARSET_ITER_INIT(comp, iter, vars, varIndex);
+    while (iter.NextElem(comp, &varIndex))
+    {
+        unsigned varNum = comp->lvaTrackedToVarNum[varIndex];
+        assert(varNum < comp->lvaCount);
+        pVarNumSet[varNum] = 1; // This varNum is in the set
+    }
+
+    bool first = true;
+    printf("{");
+    for (size_t varNum = 0; varNum < comp->lvaCount; varNum++)
+    {
+        if (pVarNumSet[varNum] == 1)
+        {
+            if (!first)
+            {
+                printf(" ");
+            }
+            printf("V%02u", varNum);
+            first = false;
+        }
+    }
+    printf("}");
+}
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                          Debugging helpers                                XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************/
+/* The following functions are intended to be called from the debugger, to dump
+ * various data structures.
+ *
+ * The versions that start with 'c' take a Compiler* as the first argument.
+ * The versions that start with 'd' use the tlsCompiler, so don't require a Compiler*.
+ *
+ * Summary:
+ *      cBlock,      dBlock         : Display a basic block (call fgDispBasicBlock()).
+ *      cBlocks,     dBlocks        : Display all the basic blocks of a function (call fgDispBasicBlocks()).
+ *      cBlocksV,    dBlocksV       : Display all the basic blocks of a function (call fgDispBasicBlocks(true)).
+ *                                    "V" means "verbose", and will dump all the trees.
+ *      cTree,       dTree          : Display a tree (call gtDispTree()).
+ *      cTrees,      dTrees         : Display all the trees in a function (call fgDumpTrees()).
+ *      cEH,         dEH            : Display the EH handler table (call fgDispHandlerTab()).
+ *      cVar,        dVar           : Display a local variable given its number (call lvaDumpEntry()).
+ *      cVarDsc,     dVarDsc        : Display a local variable given a LclVarDsc* (call lvaDumpEntry()).
+ *      cVars,       dVars          : Display the local variable table (call lvaTableDump()).
+ *      cVarsFinal,  dVarsFinal     : Display the local variable table (call lvaTableDump(FINAL_FRAME_LAYOUT)).
+ *      cBlockCheapPreds, dBlockCheapPreds : Display a block's cheap predecessors (call block->dspCheapPreds()).
+ *      cBlockPreds, dBlockPreds    : Display a block's predecessors (call block->dspPreds()).
+ *      cBlockSuccs, dBlockSuccs    : Display a block's successors (call block->dspSuccs(compiler)).
+ *      cReach,      dReach         : Display all block reachability (call fgDispReach()).
+ *      cDoms,       dDoms          : Display all block dominators (call fgDispDoms()).
+ *      cLiveness,   dLiveness      : Display per-block variable liveness (call fgDispBBLiveness()).
+ *      cCVarSet,    dCVarSet       : Display a "converted" VARSET_TP: the varset is assumed to be tracked variable
+ *                                    indices. These are converted to variable numbers and sorted. (Calls
+ *                                    dumpConvertedVarSet()).
+ *
+ *      cFuncIR,     dFuncIR        : Display all the basic blocks of a function in linear IR form.
+ *      cLoopIR,     dLoopIR        : Display a loop in linear IR form.
+ *                   dLoopNumIR     : Display a loop (given number) in linear IR form.
+ *      cBlockIR,    dBlockIR       : Display a basic block in linear IR form.
+ *      cTreeIR,     dTreeIR        : Display a tree in linear IR form.
+ *                   dTabStopIR     : Display spaces to the next tab stop column
+ *      cTreeTypeIR  dTreeTypeIR    : Display tree type
+ *      cTreeKindsIR dTreeKindsIR   : Display tree kinds
+ *      cTreeFlagsIR dTreeFlagsIR   : Display tree flags
+ *      cOperandIR   dOperandIR     : Display tree operand
+ *      cLeafIR      dLeafIR        : Display tree leaf
+ *      cIndirIR     dIndirIR       : Display indir tree as [t#] or [leaf]
+ *      cListIR      dListIR        : Display tree list
+ *      cSsaNumIR    dSsaNumIR      : Display SSA number as <u|d:#>
+ *      cValNumIR    dValNumIR      : Display Value number as <v{l|c}:#{,R}>
+ *      cDependsIR                  : Display dependencies of a tree DEP(t# ...) node
+ *                                    based on child comma tree nodes
+ *                   dFormatIR      : Display dump format specified on command line
+ *
+ *
+ * The following don't require a Compiler* to work:
+ *      dVarSet                     : Display a VARSET_TP (call dumpVarSet()).
+ *      dRegMask                    : Display a regMaskTP (call dspRegMask(mask)).
+ */
+
+void cBlock(Compiler* comp, BasicBlock* block)
+{
+    static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+    printf("===================================================================== *Block %u\n", sequenceNumber++);
+    comp->fgTableDispBasicBlock(block);
+}
+
+void cBlocks(Compiler* comp)
+{
+    static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+    printf("===================================================================== *Blocks %u\n", sequenceNumber++);
+    comp->fgDispBasicBlocks();
+}
+
+void cBlocksV(Compiler* comp)
+{
+    static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+    printf("===================================================================== *BlocksV %u\n", sequenceNumber++);
+    comp->fgDispBasicBlocks(true);
+}
+
+void cTree(Compiler* comp, GenTree* tree)
+{
+    static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+    printf("===================================================================== *Tree %u\n", sequenceNumber++);
+    comp->gtDispTree(tree, nullptr, ">>>");
+}
+
+void cTrees(Compiler* comp)
+{
+    static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+    printf("===================================================================== *Trees %u\n", sequenceNumber++);
+    comp->fgDumpTrees(comp->fgFirstBB, nullptr);
+}
+
+void cEH(Compiler* comp)
+{
+    static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+    printf("===================================================================== *EH %u\n", sequenceNumber++);
+    comp->fgDispHandlerTab();
+}
+
+void cVar(Compiler* comp, unsigned lclNum)
+{
+    static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+    printf("===================================================================== *Var %u\n", sequenceNumber++);
+    comp->lvaDumpEntry(lclNum, Compiler::FINAL_FRAME_LAYOUT);
+}
+
+void cVarDsc(Compiler* comp, LclVarDsc* varDsc)
+{
+    static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+    printf("===================================================================== *VarDsc %u\n", sequenceNumber++);
+    unsigned lclNum = (unsigned)(varDsc - comp->lvaTable);
+    comp->lvaDumpEntry(lclNum, Compiler::FINAL_FRAME_LAYOUT);
+}
+
+void cVars(Compiler* comp)
+{
+    static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+    printf("===================================================================== *Vars %u\n", sequenceNumber++);
+    comp->lvaTableDump();
+}
+
+void cVarsFinal(Compiler* comp)
+{
+    static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+    printf("===================================================================== *Vars %u\n", sequenceNumber++);
+    comp->lvaTableDump(Compiler::FINAL_FRAME_LAYOUT);
+}
+
+void cBlockCheapPreds(Compiler* comp, BasicBlock* block)
+{
+    static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+    printf("===================================================================== *BlockCheapPreds %u\n",
+           sequenceNumber++);
+    block->dspCheapPreds();
+}
+
+void cBlockPreds(Compiler* comp, BasicBlock* block)
+{
+    static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+    printf("===================================================================== *BlockPreds %u\n", sequenceNumber++);
+    block->dspPreds();
+}
+
+void cBlockSuccs(Compiler* comp, BasicBlock* block)
+{
+    static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+    printf("===================================================================== *BlockSuccs %u\n", sequenceNumber++);
+    block->dspSuccs(comp);
+}
+
+void cReach(Compiler* comp)
+{
+    static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+    printf("===================================================================== *Reach %u\n", sequenceNumber++);
+    comp->fgDispReach();
+}
+
+void cDoms(Compiler* comp)
+{
+    static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+    printf("===================================================================== *Doms %u\n", sequenceNumber++);
+    comp->fgDispDoms();
+}
+
+void cLiveness(Compiler* comp)
+{
+    static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+    printf("===================================================================== *Liveness %u\n", sequenceNumber++);
+    comp->fgDispBBLiveness();
+}
+
+void cCVarSet(Compiler* comp, VARSET_VALARG_TP vars)
+{
+    static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+    printf("===================================================================== dCVarSet %u\n", sequenceNumber++);
+    dumpConvertedVarSet(comp, vars);
+    printf("\n"); // dumpConvertedVarSet() doesn't emit a trailing newline
+}
+
+void dBlock(BasicBlock* block)
+{
+    cBlock(JitTls::GetCompiler(), block);
+}
+
+void dBlocks()
+{
+    cBlocks(JitTls::GetCompiler());
+}
+
+void dBlocksV()
+{
+    cBlocksV(JitTls::GetCompiler());
+}
+
+void dTree(GenTree* tree)
+{
+    cTree(JitTls::GetCompiler(), tree);
+}
+
+void dTrees()
+{
+    cTrees(JitTls::GetCompiler());
+}
+
+void dEH()
+{
+    cEH(JitTls::GetCompiler());
+}
+
+void dVar(unsigned lclNum)
+{
+    cVar(JitTls::GetCompiler(), lclNum);
+}
+
+void dVarDsc(LclVarDsc* varDsc)
+{
+    cVarDsc(JitTls::GetCompiler(), varDsc);
+}
+
+void dVars()
+{
+    cVars(JitTls::GetCompiler());
+}
+
+void dVarsFinal()
+{
+    cVarsFinal(JitTls::GetCompiler());
+}
+
+void dBlockPreds(BasicBlock* block)
+{
+    cBlockPreds(JitTls::GetCompiler(), block);
+}
+
+void dBlockCheapPreds(BasicBlock* block)
+{
+    cBlockCheapPreds(JitTls::GetCompiler(), block);
+}
+
+void dBlockSuccs(BasicBlock* block)
+{
+    cBlockSuccs(JitTls::GetCompiler(), block);
+}
+
+void dReach()
+{
+    cReach(JitTls::GetCompiler());
+}
+
+void dDoms()
+{
+    cDoms(JitTls::GetCompiler());
+}
+
+void dLiveness()
+{
+    cLiveness(JitTls::GetCompiler());
+}
+
+void dCVarSet(VARSET_VALARG_TP vars)
+{
+    cCVarSet(JitTls::GetCompiler(), vars);
+}
+
+void dRegMask(regMaskTP mask)
+{
+    static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
+    printf("===================================================================== dRegMask %u\n", sequenceNumber++);
+    dspRegMask(mask);
+    printf("\n"); // dspRegMask() doesn't emit a trailing newline
+}
+
+void dBlockList(BasicBlockList* list)
+{
+    printf("WorkList: ");
+    while (list != nullptr)
+    {
+        printf("BB%02u ", list->block->bbNum);
+        list = list->next;
+    }
+    printf("\n");
+}
+
+// Global variables available in debug mode.  That are set by debug APIs for finding
+// Trees, Stmts, and/or Blocks using id or bbNum.
+// That can be used in watch window or as a way to get address of fields for data break points.
+
+GenTree*     dbTree;
+GenTreeStmt* dbStmt;
+BasicBlock*  dbTreeBlock;
+BasicBlock*  dbBlock;
+
+// Debug APIs for finding Trees, Stmts, and/or Blocks.
+// As a side effect, they set the debug variables above.
+
+GenTree* dFindTree(GenTree* tree, unsigned id)
+{
+    GenTree* child;
+
+    if (tree == nullptr)
+    {
+        return nullptr;
+    }
+
+    if (tree->gtTreeID == id)
+    {
+        dbTree = tree;
+        return tree;
+    }
+
+    unsigned childCount = tree->NumChildren();
+    for (unsigned childIndex = 0; childIndex < childCount; childIndex++)
+    {
+        child = tree->GetChild(childIndex);
+        child = dFindTree(child, id);
+        if (child != nullptr)
+        {
+            return child;
+        }
+    }
+
+    return nullptr;
+}
+
+GenTree* dFindTree(unsigned id)
+{
+    Compiler*   comp = JitTls::GetCompiler();
+    BasicBlock* block;
+    GenTree*    tree;
+
+    dbTreeBlock = nullptr;
+    dbTree      = nullptr;
+
+    for (block = comp->fgFirstBB; block != nullptr; block = block->bbNext)
+    {
+        for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
+        {
+            tree = dFindTree(stmt, id);
+            if (tree != nullptr)
+            {
+                dbTreeBlock = block;
+                return tree;
+            }
+        }
+    }
+
+    return nullptr;
+}
+
+GenTreeStmt* dFindStmt(unsigned id)
+{
+    Compiler*   comp = JitTls::GetCompiler();
+    BasicBlock* block;
+
+    dbStmt = nullptr;
+
+    unsigned stmtId = 0;
+    for (block = comp->fgFirstBB; block != nullptr; block = block->bbNext)
+    {
+        for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
+        {
+            stmtId++;
+            if (stmtId == id)
+            {
+                dbStmt = stmt;
+                return stmt;
+            }
+        }
+    }
+
+    return nullptr;
+}
+
+BasicBlock* dFindBlock(unsigned bbNum)
+{
+    Compiler*   comp  = JitTls::GetCompiler();
+    BasicBlock* block = nullptr;
+
+    dbBlock = nullptr;
+    for (block = comp->fgFirstBB; block != nullptr; block = block->bbNext)
+    {
+        if (block->bbNum == bbNum)
+        {
+            dbBlock = block;
+            break;
+        }
+    }
+
+    return block;
+}
+
+/*****************************************************************************
+ *
+ *  COMPlus_JitDumpIR support - dump out function in linear IR form
+ */
+
+void cFuncIR(Compiler* comp)
+{
+    BasicBlock* block;
+
+    printf("Method %s::%s, hsh=0x%x\n", comp->info.compClassName, comp->info.compMethodName,
+           comp->info.compMethodHash());
+
+    printf("\n");
+
+    for (block = comp->fgFirstBB; block != nullptr; block = block->bbNext)
+    {
+        cBlockIR(comp, block);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  COMPlus_JitDumpIR support - dump out the format specifiers from COMPlus_JitDumpIRFormat
+ */
+
+void dFormatIR()
+{
+    Compiler* comp = JitTls::GetCompiler();
+
+    if (comp->dumpIRFormat != nullptr)
+    {
+        printf("COMPlus_JitDumpIRFormat=%ls", comp->dumpIRFormat);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  COMPlus_JitDumpIR support - dump out function in linear IR form
+ */
+
+void dFuncIR()
+{
+    cFuncIR(JitTls::GetCompiler());
+}
+
+/*****************************************************************************
+ *
+ *  COMPlus_JitDumpIR support - dump out loop in linear IR form
+ */
+
+void cLoopIR(Compiler* comp, Compiler::LoopDsc* loop)
+{
+    BasicBlock* blockHead   = loop->lpHead;
+    BasicBlock* blockFirst  = loop->lpFirst;
+    BasicBlock* blockTop    = loop->lpTop;
+    BasicBlock* blockEntry  = loop->lpEntry;
+    BasicBlock* blockBottom = loop->lpBottom;
+    BasicBlock* blockExit   = loop->lpExit;
+    BasicBlock* blockLast   = blockBottom->bbNext;
+    BasicBlock* block;
+
+    printf("LOOP\n");
+    printf("\n");
+    printf("HEAD   BB%02u\n", blockHead->bbNum);
+    printf("FIRST  BB%02u\n", blockFirst->bbNum);
+    printf("TOP    BB%02u\n", blockTop->bbNum);
+    printf("ENTRY  BB%02u\n", blockEntry->bbNum);
+    if (loop->lpExitCnt == 1)
+    {
+        printf("EXIT   BB%02u\n", blockExit->bbNum);
+    }
+    else
+    {
+        printf("EXITS  %u", loop->lpExitCnt);
+    }
+    printf("BOTTOM BB%02u\n", blockBottom->bbNum);
+    printf("\n");
+
+    cBlockIR(comp, blockHead);
+    for (block = blockFirst; ((block != nullptr) && (block != blockLast)); block = block->bbNext)
+    {
+        cBlockIR(comp, block);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  COMPlus_JitDumpIR support - dump out loop in linear IR form
+ */
+
+void dLoopIR(Compiler::LoopDsc* loop)
+{
+    cLoopIR(JitTls::GetCompiler(), loop);
+}
+
+/*****************************************************************************
+ *
+ *  COMPlus_JitDumpIR support - dump out loop (given loop number) in linear IR form
+ */
+
+void dLoopNumIR(unsigned loopNum)
+{
+    Compiler* comp = JitTls::GetCompiler();
+
+    if (loopNum >= comp->optLoopCount)
+    {
+        printf("loopNum %u out of range\n");
+        return;
+    }
+
+    Compiler::LoopDsc* loop = &comp->optLoopTable[loopNum];
+    cLoopIR(JitTls::GetCompiler(), loop);
+}
+
+/*****************************************************************************
+ *
+ *  COMPlus_JitDumpIR support - dump spaces to specified tab stop
+ */
+
+int dTabStopIR(int curr, int tabstop)
+{
+    int chars = 0;
+
+    if (tabstop <= curr)
+    {
+        chars += printf(" ");
+    }
+
+    for (int i = curr; i < tabstop; i++)
+    {
+        chars += printf(" ");
+    }
+
+    return chars;
+}
+
+void cNodeIR(Compiler* comp, GenTree* tree);
+
+/*****************************************************************************
+ *
+ *  COMPlus_JitDumpIR support - dump out block in linear IR form
+ */
+
+void cBlockIR(Compiler* comp, BasicBlock* block)
+{
+    bool noStmts = comp->dumpIRNoStmts;
+    bool trees   = comp->dumpIRTrees;
+
+    if (comp->dumpIRBlockHeaders)
+    {
+        block->dspBlockHeader(comp);
+    }
+    else
+    {
+        printf("BB%02u:\n", block->bbNum);
+    }
+
+    printf("\n");
+
+    if (!block->IsLIR())
+    {
+        for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
+        {
+            // Print current stmt.
+
+            if (trees)
+            {
+                cTree(comp, stmt);
+                printf("\n");
+                printf("=====================================================================\n");
+            }
+
+            if (comp->compRationalIRForm)
+            {
+                GenTree* tree;
+
+                foreach_treenode_execution_order(tree, stmt)
+                {
+                    cNodeIR(comp, tree);
+                }
+            }
+            else
+            {
+                cTreeIR(comp, stmt);
+            }
+
+            if (!noStmts && !trees)
+            {
+                printf("\n");
+            }
+        }
+    }
+    else
+    {
+        for (GenTree* node = block->bbTreeList; node != nullptr; node = node->gtNext)
+        {
+            cNodeIR(comp, node);
+        }
+    }
+
+    int chars = 0;
+
+    chars += dTabStopIR(chars, COLUMN_OPCODE);
+
+    chars += printf("   ");
+    switch (block->bbJumpKind)
+    {
+        case BBJ_EHFINALLYRET:
+            chars += printf("BRANCH(EHFINALLYRET)");
+            break;
+
+        case BBJ_EHFILTERRET:
+            chars += printf("BRANCH(EHFILTERRET)");
+            break;
+
+        case BBJ_EHCATCHRET:
+            chars += printf("BRANCH(EHCATCHRETURN)");
+            chars += dTabStopIR(chars, COLUMN_OPERANDS);
+            chars += printf(" BB%02u", block->bbJumpDest->bbNum);
+            break;
+
+        case BBJ_THROW:
+            chars += printf("BRANCH(THROW)");
+            break;
+
+        case BBJ_RETURN:
+            chars += printf("BRANCH(RETURN)");
+            break;
+
+        case BBJ_NONE:
+            // For fall-through blocks
+            chars += printf("BRANCH(NONE)");
+            break;
+
+        case BBJ_ALWAYS:
+            chars += printf("BRANCH(ALWAYS)");
+            chars += dTabStopIR(chars, COLUMN_OPERANDS);
+            chars += printf(" BB%02u", block->bbJumpDest->bbNum);
+            if (block->bbFlags & BBF_KEEP_BBJ_ALWAYS)
+            {
+                chars += dTabStopIR(chars, COLUMN_KINDS);
+                chars += printf("; [KEEP_BBJ_ALWAYS]");
+            }
+            break;
+
+        case BBJ_LEAVE:
+            chars += printf("BRANCH(LEAVE)");
+            chars += dTabStopIR(chars, COLUMN_OPERANDS);
+            chars += printf(" BB%02u", block->bbJumpDest->bbNum);
+            break;
+
+        case BBJ_CALLFINALLY:
+            chars += printf("BRANCH(CALLFINALLY)");
+            chars += dTabStopIR(chars, COLUMN_OPERANDS);
+            chars += printf(" BB%02u", block->bbJumpDest->bbNum);
+            break;
+
+        case BBJ_COND:
+            chars += printf("BRANCH(COND)");
+            chars += dTabStopIR(chars, COLUMN_OPERANDS);
+            chars += printf(" BB%02u", block->bbJumpDest->bbNum);
+            break;
+
+        case BBJ_SWITCH:
+            chars += printf("BRANCH(SWITCH)");
+            chars += dTabStopIR(chars, COLUMN_OPERANDS);
+
+            unsigned jumpCnt;
+            jumpCnt = block->bbJumpSwt->bbsCount;
+            BasicBlock** jumpTab;
+            jumpTab = block->bbJumpSwt->bbsDstTab;
+            do
+            {
+                chars += printf("%c BB%02u", (jumpTab == block->bbJumpSwt->bbsDstTab) ? ' ' : ',', (*jumpTab)->bbNum);
+            } while (++jumpTab, --jumpCnt);
+            break;
+
+        default:
+            unreached();
+            break;
+    }
+
+    printf("\n");
+    if (block->bbNext != nullptr)
+    {
+        printf("\n");
+    }
+}
+
+/*****************************************************************************
+ *
+ *  COMPlus_JitDumpIR support - dump out block in linear IR form
+ */
+
+void dBlockIR(BasicBlock* block)
+{
+    cBlockIR(JitTls::GetCompiler(), block);
+}
+
+/*****************************************************************************
+ *
+ *  COMPlus_JitDumpIR support - dump out tree node type for linear IR form
+ */
+
+int cTreeTypeIR(Compiler* comp, GenTree* tree)
+{
+    int chars = 0;
+
+    var_types type = tree->TypeGet();
+
+    const char* typeName = varTypeName(type);
+    chars += printf(".%s", typeName);
+
+    return chars;
+}
+
+/*****************************************************************************
+ *
+ *  COMPlus_JitDumpIR support - dump out tree node type for linear IR form
+ */
+
+int dTreeTypeIR(GenTree* tree)
+{
+    int chars = cTreeTypeIR(JitTls::GetCompiler(), tree);
+
+    return chars;
+}
+
+/*****************************************************************************
+ *
+ *  COMPlus_JitDumpIR support - dump out tree node kind for linear IR form
+ */
+
+int cTreeKindsIR(Compiler* comp, GenTree* tree)
+{
+    int chars = 0;
+
+    unsigned kind = tree->OperKind();
+
+    chars += printf("kinds=");
+    if (kind == GTK_SPECIAL)
+    {
+        chars += printf("[SPECIAL]");
+    }
+    if (kind & GTK_CONST)
+    {
+        chars += printf("[CONST]");
+    }
+    if (kind & GTK_LEAF)
+    {
+        chars += printf("[LEAF]");
+    }
+    if (kind & GTK_UNOP)
+    {
+        chars += printf("[UNOP]");
+    }
+    if (kind & GTK_BINOP)
+    {
+        chars += printf("[BINOP]");
+    }
+    if (kind & GTK_LOGOP)
+    {
+        chars += printf("[LOGOP]");
+    }
+    if (kind & GTK_ASGOP)
+    {
+        chars += printf("[ASGOP]");
+    }
+    if (kind & GTK_COMMUTE)
+    {
+        chars += printf("[COMMUTE]");
+    }
+    if (kind & GTK_EXOP)
+    {
+        chars += printf("[EXOP]");
+    }
+    if (kind & GTK_LOCAL)
+    {
+        chars += printf("[LOCAL]");
+    }
+    if (kind & GTK_SMPOP)
+    {
+        chars += printf("[SMPOP]");
+    }
+
+    return chars;
+}
+
+/*****************************************************************************
+ *
+ *  COMPlus_JitDumpIR support - dump out tree node kind for linear IR form
+ */
+
+int dTreeKindsIR(GenTree* tree)
+{
+    int chars = cTreeKindsIR(JitTls::GetCompiler(), tree);
+
+    return chars;
+}
+
+/*****************************************************************************
+ *
+ *  COMPlus_JitDumpIR support - dump out tree node flags for linear IR form
+ */
+
+int cTreeFlagsIR(Compiler* comp, GenTree* tree)
+{
+    int chars = 0;
+
+    if (tree->gtFlags != 0)
+    {
+        chars += printf("flags=");
+
+        // Node flags
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(DEBUG)
+#if SMALL_TREE_NODES
+        if (comp->dumpIRNodes)
+        {
+            if (tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE)
+            {
+                chars += printf("[NODE_LARGE]");
+            }
+            if (tree->gtDebugFlags & GTF_DEBUG_NODE_SMALL)
+            {
+                chars += printf("[NODE_SMALL]");
+            }
+        }
+#endif // SMALL_TREE_NODES
+        if (tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED)
+        {
+            chars += printf("[MORPHED]");
+        }
+#endif // defined(DEBUG)
+
+        if (tree->gtFlags & GTF_COLON_COND)
+        {
+            chars += printf("[COLON_COND]");
+        }
+
+        // Operator flags
+
+        genTreeOps op = tree->OperGet();
+        switch (op)
+        {
+
+            case GT_LCL_VAR:
+            case GT_LCL_VAR_ADDR:
+            case GT_LCL_FLD:
+            case GT_LCL_FLD_ADDR:
+            case GT_STORE_LCL_FLD:
+            case GT_STORE_LCL_VAR:
+            case GT_REG_VAR:
+
+                if (tree->gtFlags & GTF_VAR_DEF)
+                {
+                    chars += printf("[VAR_DEF]");
+                }
+                if (tree->gtFlags & GTF_VAR_USEASG)
+                {
+                    chars += printf("[VAR_USEASG]");
+                }
+                if (tree->gtFlags & GTF_VAR_USEDEF)
+                {
+                    chars += printf("[VAR_USEDEF]");
+                }
+                if (tree->gtFlags & GTF_VAR_CAST)
+                {
+                    chars += printf("[VAR_CAST]");
+                }
+                if (tree->gtFlags & GTF_VAR_ITERATOR)
+                {
+                    chars += printf("[VAR_ITERATOR]");
+                }
+                if (tree->gtFlags & GTF_VAR_CLONED)
+                {
+                    chars += printf("[VAR_CLONED]");
+                }
+                if (tree->gtFlags & GTF_VAR_DEATH)
+                {
+                    chars += printf("[VAR_DEATH]");
+                }
+                if (tree->gtFlags & GTF_VAR_ARR_INDEX)
+                {
+                    chars += printf("[VAR_ARR_INDEX]");
+                }
+#if defined(DEBUG)
+                if (tree->gtDebugFlags & GTF_DEBUG_VAR_CSE_REF)
+                {
+                    chars += printf("[VAR_CSE_REF]");
+                }
+#endif
+                if (op == GT_REG_VAR)
+                {
+                    if (tree->gtFlags & GTF_REG_BIRTH)
+                    {
+                        chars += printf("[REG_BIRTH]");
+                    }
+                }
+                break;
+
+            case GT_NOP:
+
+                if (tree->gtFlags & GTF_NOP_DEATH)
+                {
+                    chars += printf("[NOP_DEATH]");
+                }
+                break;
+
+            case GT_NO_OP:
+
+                if (tree->gtFlags & GTF_NO_OP_NO)
+                {
+                    chars += printf("[NO_OP_NO]");
+                }
+                break;
+
+            case GT_FIELD:
+
+                if (tree->gtFlags & GTF_FLD_NULLCHECK)
+                {
+                    chars += printf("[FLD_NULLCHECK]");
+                }
+                if (tree->gtFlags & GTF_FLD_VOLATILE)
+                {
+                    chars += printf("[FLD_VOLATILE]");
+                }
+                break;
+
+            case GT_INDEX:
+
+                if (tree->gtFlags & GTF_INX_RNGCHK)
+                {
+                    chars += printf("[INX_RNGCHK]");
+                }
+                if (tree->gtFlags & GTF_INX_REFARR_LAYOUT)
+                {
+                    chars += printf("[INX_REFARR_LAYOUT]");
+                }
+                if (tree->gtFlags & GTF_INX_STRING_LAYOUT)
+                {
+                    chars += printf("[INX_STRING_LAYOUT]");
+                }
+                break;
+
+            case GT_IND:
+            case GT_STOREIND:
+
+                if (tree->gtFlags & GTF_IND_VOLATILE)
+                {
+                    chars += printf("[IND_VOLATILE]");
+                }
+                if (tree->gtFlags & GTF_IND_REFARR_LAYOUT)
+                {
+                    chars += printf("[IND_REFARR_LAYOUT]");
+                }
+                if (tree->gtFlags & GTF_IND_TGTANYWHERE)
+                {
+                    chars += printf("[IND_TGTANYWHERE]");
+                }
+                if (tree->gtFlags & GTF_IND_TLS_REF)
+                {
+                    chars += printf("[IND_TLS_REF]");
+                }
+                if (tree->gtFlags & GTF_IND_ASG_LHS)
+                {
+                    chars += printf("[IND_ASG_LHS]");
+                }
+                if (tree->gtFlags & GTF_IND_UNALIGNED)
+                {
+                    chars += printf("[IND_UNALIGNED]");
+                }
+                if (tree->gtFlags & GTF_IND_INVARIANT)
+                {
+                    chars += printf("[IND_INVARIANT]");
+                }
+                if (tree->gtFlags & GTF_IND_ARR_LEN)
+                {
+                    chars += printf("[IND_ARR_INDEX]");
+                }
+                break;
+
+            case GT_CLS_VAR:
+
+                if (tree->gtFlags & GTF_CLS_VAR_ASG_LHS)
+                {
+                    chars += printf("[CLS_VAR_ASG_LHS]");
+                }
+                break;
+
+            case GT_ADDR:
+
+                if (tree->gtFlags & GTF_ADDR_ONSTACK)
+                {
+                    chars += printf("[ADDR_ONSTACK]");
+                }
+                break;
+
+            case GT_MUL:
+
+                if (tree->gtFlags & GTF_MUL_64RSLT)
+                {
+                    chars += printf("[64RSLT]");
+                }
+                if (tree->gtFlags & GTF_ADDRMODE_NO_CSE)
+                {
+                    chars += printf("[ADDRMODE_NO_CSE]");
+                }
+                break;
+
+            case GT_ADD:
+
+                if (tree->gtFlags & GTF_ADDRMODE_NO_CSE)
+                {
+                    chars += printf("[ADDRMODE_NO_CSE]");
+                }
+                break;
+
+            case GT_LSH:
+
+                if (tree->gtFlags & GTF_ADDRMODE_NO_CSE)
+                {
+                    chars += printf("[ADDRMODE_NO_CSE]");
+                }
+                break;
+
+            case GT_MOD:
+            case GT_UMOD:
+
+                if (tree->gtFlags & GTF_MOD_INT_RESULT)
+                {
+                    chars += printf("[MOD_INT_RESULT]");
+                }
+                break;
+
+            case GT_EQ:
+            case GT_NE:
+            case GT_LT:
+            case GT_LE:
+            case GT_GT:
+            case GT_GE:
+
+                if (tree->gtFlags & GTF_RELOP_NAN_UN)
+                {
+                    chars += printf("[RELOP_NAN_UN]");
+                }
+                if (tree->gtFlags & GTF_RELOP_JMP_USED)
+                {
+                    chars += printf("[RELOP_JMP_USED]");
+                }
+                if (tree->gtFlags & GTF_RELOP_QMARK)
+                {
+                    chars += printf("[RELOP_QMARK]");
+                }
+                if (tree->gtFlags & GTF_RELOP_SMALL)
+                {
+                    chars += printf("[RELOP_SMALL]");
+                }
+                break;
+
+            case GT_QMARK:
+
+                if (tree->gtFlags & GTF_QMARK_CAST_INSTOF)
+                {
+                    chars += printf("[QMARK_CAST_INSTOF]");
+                }
+                break;
+
+            case GT_BOX:
+
+                if (tree->gtFlags & GTF_BOX_VALUE)
+                {
+                    chars += printf("[BOX_VALUE]");
+                }
+                break;
+
+            case GT_CNS_INT:
+
+            {
+                unsigned handleKind = (tree->gtFlags & GTF_ICON_HDL_MASK);
+
+                switch (handleKind)
+                {
+
+                    case GTF_ICON_SCOPE_HDL:
+
+                        chars += printf("[ICON_SCOPE_HDL]");
+                        break;
+
+                    case GTF_ICON_CLASS_HDL:
+
+                        chars += printf("[ICON_CLASS_HDL]");
+                        break;
+
+                    case GTF_ICON_METHOD_HDL:
+
+                        chars += printf("[ICON_METHOD_HDL]");
+                        break;
+
+                    case GTF_ICON_FIELD_HDL:
+
+                        chars += printf("[ICON_FIELD_HDL]");
+                        break;
+
+                    case GTF_ICON_STATIC_HDL:
+
+                        chars += printf("[ICON_STATIC_HDL]");
+                        break;
+
+                    case GTF_ICON_STR_HDL:
+
+                        chars += printf("[ICON_STR_HDL]");
+                        break;
+
+                    case GTF_ICON_PSTR_HDL:
+
+                        chars += printf("[ICON_PSTR_HDL]");
+                        break;
+
+                    case GTF_ICON_PTR_HDL:
+
+                        chars += printf("[ICON_PTR_HDL]");
+                        break;
+
+                    case GTF_ICON_VARG_HDL:
+
+                        chars += printf("[ICON_VARG_HDL]");
+                        break;
+
+                    case GTF_ICON_PINVKI_HDL:
+
+                        chars += printf("[ICON_PINVKI_HDL]");
+                        break;
+
+                    case GTF_ICON_TOKEN_HDL:
+
+                        chars += printf("[ICON_TOKEN_HDL]");
+                        break;
+
+                    case GTF_ICON_TLS_HDL:
+
+                        chars += printf("[ICON_TLD_HDL]");
+                        break;
+
+                    case GTF_ICON_FTN_ADDR:
+
+                        chars += printf("[ICON_FTN_ADDR]");
+                        break;
+
+                    case GTF_ICON_CIDMID_HDL:
+
+                        chars += printf("[ICON_CIDMID_HDL]");
+                        break;
+
+                    case GTF_ICON_BBC_PTR:
+
+                        chars += printf("[ICON_BBC_PTR]");
+                        break;
+
+                    case GTF_ICON_FIELD_OFF:
+
+                        chars += printf("[ICON_FIELD_OFF]");
+                        break;
+                }
+            }
+            break;
+
+            case GT_OBJ:
+            case GT_STORE_OBJ:
+                if (tree->AsObj()->HasGCPtr())
+                {
+                    chars += printf("[BLK_HASGCPTR]");
+                }
+                __fallthrough;
+
+            case GT_BLK:
+            case GT_DYN_BLK:
+            case GT_STORE_BLK:
+            case GT_STORE_DYN_BLK:
+
+                if (tree->gtFlags & GTF_BLK_VOLATILE)
+                {
+                    chars += printf("[BLK_VOLATILE]");
+                }
+                if (tree->AsBlk()->IsUnaligned())
+                {
+                    chars += printf("[BLK_UNALIGNED]");
+                }
+                break;
+
+            case GT_CALL:
+
+                if (tree->gtFlags & GTF_CALL_UNMANAGED)
+                {
+                    chars += printf("[CALL_UNMANAGED]");
+                }
+                if (tree->gtFlags & GTF_CALL_INLINE_CANDIDATE)
+                {
+                    chars += printf("[CALL_INLINE_CANDIDATE]");
+                }
+                if (tree->gtFlags & GTF_CALL_NONVIRT)
+                {
+                    chars += printf("[CALL_NONVIRT]");
+                }
+                if (tree->gtFlags & GTF_CALL_VIRT_VTABLE)
+                {
+                    chars += printf("[CALL_VIRT_VTABLE]");
+                }
+                if (tree->gtFlags & GTF_CALL_VIRT_STUB)
+                {
+                    chars += printf("[CALL_VIRT_STUB]");
+                }
+                if (tree->gtFlags & GTF_CALL_NULLCHECK)
+                {
+                    chars += printf("[CALL_NULLCHECK]");
+                }
+                if (tree->gtFlags & GTF_CALL_POP_ARGS)
+                {
+                    chars += printf("[CALL_POP_ARGS]");
+                }
+                if (tree->gtFlags & GTF_CALL_HOISTABLE)
+                {
+                    chars += printf("[CALL_HOISTABLE]");
+                }
+                if (tree->gtFlags & GTF_CALL_REG_SAVE)
+                {
+                    chars += printf("[CALL_REG_SAVE]");
+                }
+
+                // More flags associated with calls.
+
+                {
+                    GenTreeCall* call = tree->AsCall();
+
+                    if (call->gtCallMoreFlags & GTF_CALL_M_EXPLICIT_TAILCALL)
+                    {
+                        chars += printf("[CALL_M_EXPLICIT_TAILCALL]");
+                    }
+                    if (call->gtCallMoreFlags & GTF_CALL_M_TAILCALL)
+                    {
+                        chars += printf("[CALL_M_TAILCALL]");
+                    }
+                    if (call->gtCallMoreFlags & GTF_CALL_M_VARARGS)
+                    {
+                        chars += printf("[CALL_M_VARARGS]");
+                    }
+                    if (call->gtCallMoreFlags & GTF_CALL_M_RETBUFFARG)
+                    {
+                        chars += printf("[CALL_M_RETBUFFARG]");
+                    }
+                    if (call->gtCallMoreFlags & GTF_CALL_M_DELEGATE_INV)
+                    {
+                        chars += printf("[CALL_M_DELEGATE_INV]");
+                    }
+                    if (call->gtCallMoreFlags & GTF_CALL_M_NOGCCHECK)
+                    {
+                        chars += printf("[CALL_M_NOGCCHECK]");
+                    }
+                    if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
+                    {
+                        chars += printf("[CALL_M_SPECIAL_INTRINSIC]");
+                    }
+
+                    if (call->IsUnmanaged())
+                    {
+                        if (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL)
+                        {
+                            chars += printf("[CALL_M_UNMGD_THISCALL]");
+                        }
+                    }
+                    else if (call->IsVirtualStub())
+                    {
+                        if (call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT)
+                        {
+                            chars += printf("[CALL_M_VIRTSTUB_REL_INDIRECT]");
+                        }
+                    }
+                    else if (!call->IsVirtual())
+                    {
+                        if (call->gtCallMoreFlags & GTF_CALL_M_NONVIRT_SAME_THIS)
+                        {
+                            chars += printf("[CALL_M_NONVIRT_SAME_THIS]");
+                        }
+                    }
+
+                    if (call->gtCallMoreFlags & GTF_CALL_M_FRAME_VAR_DEATH)
+                    {
+                        chars += printf("[CALL_M_FRAME_VAR_DEATH]");
+                    }
+#ifndef LEGACY_BACKEND
+                    if (call->gtCallMoreFlags & GTF_CALL_M_TAILCALL_VIA_HELPER)
+                    {
+                        chars += printf("[CALL_M_TAILCALL_VIA_HELPER]");
+                    }
+#endif
+#if FEATURE_TAILCALL_OPT
+                    if (call->gtCallMoreFlags & GTF_CALL_M_IMPLICIT_TAILCALL)
+                    {
+                        chars += printf("[CALL_M_IMPLICIT_TAILCALL]");
+                    }
+#endif
+                    if (call->gtCallMoreFlags & GTF_CALL_M_PINVOKE)
+                    {
+                        chars += printf("[CALL_M_PINVOKE]");
+                    }
+                }
+                break;
+
+            case GT_STMT:
+
+                if (tree->gtFlags & GTF_STMT_CMPADD)
+                {
+                    chars += printf("[STMT_CMPADD]");
+                }
+                if (tree->gtFlags & GTF_STMT_HAS_CSE)
+                {
+                    chars += printf("[STMT_HAS_CSE]");
+                }
+                break;
+
+            default:
+
+            {
+                unsigned flags = (tree->gtFlags & (~(unsigned)(GTF_COMMON_MASK | GTF_OVERFLOW)));
+                if (flags != 0)
+                {
+                    chars += printf("[%08X]", flags);
+                }
+            }
+            break;
+        }
+
+        // Common flags.
+
+        if (tree->gtFlags & GTF_ASG)
+        {
+            chars += printf("[ASG]");
+        }
+        if (tree->gtFlags & GTF_CALL)
+        {
+            chars += printf("[CALL]");
+        }
+        switch (op)
+        {
+            case GT_MUL:
+            case GT_CAST:
+            case GT_ADD:
+            case GT_SUB:
+            case GT_ASG_ADD:
+            case GT_ASG_SUB:
+                if (tree->gtFlags & GTF_OVERFLOW)
+                {
+                    chars += printf("[OVERFLOW]");
+                }
+                break;
+            default:
+                break;
+        }
+        if (tree->gtFlags & GTF_EXCEPT)
+        {
+            chars += printf("[EXCEPT]");
+        }
+        if (tree->gtFlags & GTF_GLOB_REF)
+        {
+            chars += printf("[GLOB_REF]");
+        }
+        if (tree->gtFlags & GTF_ORDER_SIDEEFF)
+        {
+            chars += printf("[ORDER_SIDEEFF]");
+        }
+        if (tree->gtFlags & GTF_REVERSE_OPS)
+        {
+            if (op != GT_LCL_VAR)
+            {
+                chars += printf("[REVERSE_OPS]");
+            }
+        }
+        if (tree->gtFlags & GTF_REG_VAL)
+        {
+            chars += printf("[REG_VAL]");
+        }
+        if (tree->gtFlags & GTF_SPILLED)
+        {
+            chars += printf("[SPILLED_OPER]");
+        }
+#if defined(LEGACY_BACKEND)
+        if (tree->gtFlags & GTF_SPILLED_OP2)
+        {
+            chars += printf("[SPILLED_OP2]");
+        }
+#endif
+        if (tree->gtFlags & GTF_ZSF_SET)
+        {
+            chars += printf("[ZSF_SET]");
+        }
+#if FEATURE_SET_FLAGS
+        if (tree->gtFlags & GTF_SET_FLAGS)
+        {
+            if ((op != GT_IND) && (op != GT_STOREIND))
+            {
+                chars += printf("[ZSF_SET_FLAGS]");
+            }
+        }
+#endif
+        if (tree->gtFlags & GTF_IND_NONFAULTING)
+        {
+            if ((op == GT_IND) || (op == GT_STOREIND))
+            {
+                chars += printf("[IND_NONFAULTING]");
+            }
+        }
+        if (tree->gtFlags & GTF_MAKE_CSE)
+        {
+            chars += printf("[MAKE_CSE]");
+        }
+        if (tree->gtFlags & GTF_DONT_CSE)
+        {
+            chars += printf("[DONT_CSE]");
+        }
+        if (tree->gtFlags & GTF_BOOLEAN)
+        {
+            chars += printf("[BOOLEAN]");
+        }
+        if (tree->gtFlags & GTF_SMALL_OK)
+        {
+            chars += printf("[SMALL_OK]");
+        }
+        if (tree->gtFlags & GTF_UNSIGNED)
+        {
+            chars += printf("[SMALL_UNSIGNED]");
+        }
+        if (tree->gtFlags & GTF_LATE_ARG)
+        {
+            chars += printf("[SMALL_LATE_ARG]");
+        }
+        if (tree->gtFlags & GTF_SPILL)
+        {
+            chars += printf("[SPILL]");
+        }
+        if (tree->gtFlags & GTF_SPILL_HIGH)
+        {
+            chars += printf("[SPILL_HIGH]");
+        }
+        if (tree->gtFlags & GTF_REUSE_REG_VAL)
+        {
+            if (op == GT_CNS_INT)
+            {
+                chars += printf("[REUSE_REG_VAL]");
+            }
+        }
+    }
+
+    return chars;
+}
+
+/*****************************************************************************
+ *
+ *  COMPlus_JitDumpIR support - dump out tree node flags for linear IR form
+ */
+
+int dTreeFlagsIR(GenTree* tree)
+{
+    int chars = cTreeFlagsIR(JitTls::GetCompiler(), tree);
+
+    return chars;
+}
+
+/*****************************************************************************
+ *
+ *  COMPlus_JitDumpIR support - dump out SSA number on tree node for linear IR form
+ */
+
+int cSsaNumIR(Compiler* comp, GenTree* tree)
+{
+    int chars = 0;
+
+    if (tree->gtLclVarCommon.HasSsaName())
+    {
+        if (tree->gtFlags & GTF_VAR_USEASG)
+        {
+            assert(tree->gtFlags & GTF_VAR_DEF);
+            chars += printf("<u:%d><d:%d>", tree->gtLclVarCommon.gtSsaNum, comp->GetSsaNumForLocalVarDef(tree));
+        }
+        else
+        {
+            chars += printf("<%s:%d>", (tree->gtFlags & GTF_VAR_DEF) ? "d" : "u", tree->gtLclVarCommon.gtSsaNum);
+        }
+    }
+
+    return chars;
+}
+
+/*****************************************************************************
+ *
+ *  COMPlus_JitDumpIR support - dump out SSA number on tree node for linear IR form
+ */
+
+int dSsaNumIR(GenTree* tree)
+{
+    int chars = cSsaNumIR(JitTls::GetCompiler(), tree);
+
+    return chars;
+}
+
+/*****************************************************************************
+ *
+ *  COMPlus_JitDumpIR support - dump out Value Number on tree node for linear IR form
+ */
+
+int cValNumIR(Compiler* comp, GenTree* tree)
+{
+    int chars = 0;
+
+    if (tree->gtVNPair.GetLiberal() != ValueNumStore::NoVN)
+    {
+        assert(tree->gtVNPair.GetConservative() != ValueNumStore::NoVN);
+        ValueNumPair vnp = tree->gtVNPair;
+        ValueNum     vn;
+        if (vnp.BothEqual())
+        {
+            chars += printf("<v:");
+            vn = vnp.GetLiberal();
+            chars += printf(STR_VN "%x", vn);
+            if (ValueNumStore::isReservedVN(vn))
+            {
+                chars += printf("R");
+            }
+            chars += printf(">");
+        }
+        else
+        {
+            vn = vnp.GetLiberal();
+            chars += printf("<v:");
+            chars += printf(STR_VN "%x", vn);
+            if (ValueNumStore::isReservedVN(vn))
+            {
+                chars += printf("R");
+            }
+            chars += printf(",");
+            vn = vnp.GetConservative();
+            chars += printf(STR_VN "%x", vn);
+            if (ValueNumStore::isReservedVN(vn))
+            {
+                chars += printf("R");
+            }
+            chars += printf(">");
+        }
+    }
+
+    return chars;
+}
+
+/*****************************************************************************
+ *
+ *  COMPlus_JitDumpIR support - dump out Value Number on tree node for linear IR form
+ */
+
+int dValNumIR(GenTree* tree)
+{
+    int chars = cValNumIR(JitTls::GetCompiler(), tree);
+
+    return chars;
+}
+
+/*****************************************************************************
+ *
+ *  COMPlus_JitDumpIR support - dump out tree leaf node for linear IR form
+ */
+
+int cLeafIR(Compiler* comp, GenTree* tree)
+{
+    int         chars  = 0;
+    genTreeOps  op     = tree->OperGet();
+    const char* ilKind = nullptr;
+    const char* ilName = nullptr;
+    unsigned    ilNum  = 0;
+    unsigned    lclNum = 0;
+    bool        hasSsa = false;
+
+    switch (op)
+    {
+
+        case GT_PHI_ARG:
+        case GT_LCL_VAR:
+        case GT_LCL_VAR_ADDR:
+        case GT_STORE_LCL_VAR:
+        case GT_REG_VAR:
+
+            lclNum = tree->gtLclVarCommon.gtLclNum;
+            comp->gtGetLclVarNameInfo(lclNum, &ilKind, &ilName, &ilNum);
+            if (ilName != nullptr)
+            {
+                chars += printf("%s", ilName);
+            }
+            else
+            {
+                LclVarDsc* varDsc = comp->lvaTable + lclNum;
+                chars += printf("%s%d", ilKind, ilNum);
+                if (comp->dumpIRLocals)
+                {
+                    chars += printf("(V%02u", lclNum);
+                    if (varDsc->lvTracked)
+                    {
+                        chars += printf(":T%02u", varDsc->lvVarIndex);
+                    }
+                    if (comp->dumpIRRegs)
+                    {
+                        if (varDsc->lvRegister)
+                        {
+                            if (isRegPairType(varDsc->TypeGet()))
+                            {
+                                chars += printf(":%s:%s",
+                                                getRegName(varDsc->lvOtherReg), // hi32
+                                                getRegName(varDsc->lvRegNum));  // lo32
+                            }
+                            else
+                            {
+                                chars += printf(":%s", getRegName(varDsc->lvRegNum));
+                            }
+                        }
+                        else
+                        {
+                            switch (tree->GetRegTag())
+                            {
+                                case GenTree::GT_REGTAG_REG:
+                                    chars += printf(":%s", comp->compRegVarName(tree->gtRegNum));
+                                    break;
+#if CPU_LONG_USES_REGPAIR
+                                case GenTree::GT_REGTAG_REGPAIR:
+                                    chars += printf(":%s", comp->compRegPairName(tree->gtRegPair));
+                                    break;
+#endif
+                                default:
+                                    break;
+                            }
+                        }
+                    }
+                    chars += printf(")");
+                }
+                else if (comp->dumpIRRegs)
+                {
+                    if (varDsc->lvRegister)
+                    {
+                        chars += printf("(");
+                        if (isRegPairType(varDsc->TypeGet()))
+                        {
+                            chars += printf("%s:%s",
+                                            getRegName(varDsc->lvOtherReg), // hi32
+                                            getRegName(varDsc->lvRegNum));  // lo32
+                        }
+                        else
+                        {
+                            chars += printf("%s", getRegName(varDsc->lvRegNum));
+                        }
+                        chars += printf(")");
+                    }
+                    else
+                    {
+                        switch (tree->GetRegTag())
+                        {
+                            case GenTree::GT_REGTAG_REG:
+                                chars += printf("(%s)", comp->compRegVarName(tree->gtRegNum));
+                                break;
+#if CPU_LONG_USES_REGPAIR
+                            case GenTree::GT_REGTAG_REGPAIR:
+                                chars += printf("(%s)", comp->compRegPairName(tree->gtRegPair));
+                                break;
+#endif
+                            default:
+                                break;
+                        }
+                    }
+                }
+            }
+
+            if (op == GT_REG_VAR)
+            {
+                if (isFloatRegType(tree->gtType))
+                {
+                    assert(tree->gtRegVar.gtRegNum == tree->gtRegNum);
+                    chars += printf("(FPV%u)", tree->gtRegNum);
+                }
+                else
+                {
+                    chars += printf("(%s)", comp->compRegVarName(tree->gtRegVar.gtRegNum));
+                }
+            }
+
+            hasSsa = true;
+            break;
+
+        case GT_LCL_FLD:
+        case GT_LCL_FLD_ADDR:
+        case GT_STORE_LCL_FLD:
+
+            lclNum = tree->gtLclVarCommon.gtLclNum;
+            comp->gtGetLclVarNameInfo(lclNum, &ilKind, &ilName, &ilNum);
+            if (ilName != nullptr)
+            {
+                chars += printf("%s+%u", ilName, tree->gtLclFld.gtLclOffs);
+            }
+            else
+            {
+                chars += printf("%s%d+%u", ilKind, ilNum, tree->gtLclFld.gtLclOffs);
+                LclVarDsc* varDsc = comp->lvaTable + lclNum;
+                if (comp->dumpIRLocals)
+                {
+                    chars += printf("(V%02u", lclNum);
+                    if (varDsc->lvTracked)
+                    {
+                        chars += printf(":T%02u", varDsc->lvVarIndex);
+                    }
+                    if (comp->dumpIRRegs)
+                    {
+                        if (varDsc->lvRegister)
+                        {
+                            if (isRegPairType(varDsc->TypeGet()))
+                            {
+                                chars += printf(":%s:%s",
+                                                getRegName(varDsc->lvOtherReg), // hi32
+                                                getRegName(varDsc->lvRegNum));  // lo32
+                            }
+                            else
+                            {
+                                chars += printf(":%s", getRegName(varDsc->lvRegNum));
+                            }
+                        }
+                        else
+                        {
+                            switch (tree->GetRegTag())
+                            {
+                                case GenTree::GT_REGTAG_REG:
+                                    chars += printf(":%s", comp->compRegVarName(tree->gtRegNum));
+                                    break;
+#if CPU_LONG_USES_REGPAIR
+                                case GenTree::GT_REGTAG_REGPAIR:
+                                    chars += printf(":%s", comp->compRegPairName(tree->gtRegPair));
+                                    break;
+#endif
+                                default:
+                                    break;
+                            }
+                        }
+                    }
+                    chars += printf(")");
+                }
+                else if (comp->dumpIRRegs)
+                {
+                    if (varDsc->lvRegister)
+                    {
+                        chars += printf("(");
+                        if (isRegPairType(varDsc->TypeGet()))
+                        {
+                            chars += printf("%s:%s",
+                                            getRegName(varDsc->lvOtherReg), // hi32
+                                            getRegName(varDsc->lvRegNum));  // lo32
+                        }
+                        else
+                        {
+                            chars += printf("%s", getRegName(varDsc->lvRegNum));
+                        }
+                        chars += printf(")");
+                    }
+                    else
+                    {
+                        switch (tree->GetRegTag())
+                        {
+                            case GenTree::GT_REGTAG_REG:
+                                chars += printf("(%s)", comp->compRegVarName(tree->gtRegNum));
+                                break;
+#if CPU_LONG_USES_REGPAIR
+                            case GenTree::GT_REGTAG_REGPAIR:
+                                chars += printf("(%s)", comp->compRegPairName(tree->gtRegPair));
+                                break;
+#endif
+                            default:
+                                break;
+                        }
+                    }
+                }
+            }
+
+            // TODO: We probably want to expand field sequence.
+            // gtDispFieldSeq(tree->gtLclFld.gtFieldSeq);
+
+            hasSsa = true;
+            break;
+
+        case GT_CNS_INT:
+
+            if (tree->IsIconHandle())
+            {
+#if 0
+            // TODO: Commented out because sometimes the CLR throws
+            // and exception when asking the names of some handles.
+            // Need to investigate.
+
+            const char* className;
+            const char* fieldName;
+            const char* methodName;
+            const wchar_t* str;
+
+            switch (tree->GetIconHandleFlag())
+            {
+
+            case GTF_ICON_SCOPE_HDL:
+
+                chars += printf("SCOPE(?)");
+                break;
+
+            case GTF_ICON_CLASS_HDL:
+
+                className = comp->eeGetClassName((CORINFO_CLASS_HANDLE)tree->gtIntCon.gtIconVal);
+                chars += printf("CLASS(%s)", className);
+                break;
+
+            case GTF_ICON_METHOD_HDL:
+
+                methodName = comp->eeGetMethodName((CORINFO_METHOD_HANDLE)tree->gtIntCon.gtIconVal,
+                    &className);
+                chars += printf("METHOD(%s.%s)", className, methodName);
+                break;
+
+            case GTF_ICON_FIELD_HDL:
+
+                fieldName = comp->eeGetFieldName((CORINFO_FIELD_HANDLE)tree->gtIntCon.gtIconVal,
+                    &className);
+                chars += printf("FIELD(%s.%s) ", className, fieldName);
+                break;
+
+            case GTF_ICON_STATIC_HDL:
+
+                fieldName = comp->eeGetFieldName((CORINFO_FIELD_HANDLE)tree->gtIntCon.gtIconVal,
+                    &className);
+                chars += printf("STATIC_FIELD(%s.%s)", className, fieldName);
+                break;
+
+            case GTF_ICON_STR_HDL:
+
+                str = comp->eeGetCPString(tree->gtIntCon.gtIconVal);
+                chars += printf("\"%S\"", str);
+                break;
+
+            case GTF_ICON_PSTR_HDL:
+
+                chars += printf("PSTR(?)");
+                break;
+
+            case GTF_ICON_PTR_HDL:
+
+                chars += printf("PTR(?)");
+                break;
+
+            case GTF_ICON_VARG_HDL:
+
+                chars += printf("VARARG(?)");
+                break;
+
+            case GTF_ICON_PINVKI_HDL:
+
+                chars += printf("PINVOKE(?)");
+                break;
+
+            case GTF_ICON_TOKEN_HDL:
+
+                chars += printf("TOKEN(%08X)", tree->gtIntCon.gtIconVal);
+                break;
+
+            case GTF_ICON_TLS_HDL:
+
+                chars += printf("TLS(?)");
+                break;
+
+            case GTF_ICON_FTN_ADDR:
+
+                chars += printf("FTN(?)");
+                break;
+
+            case GTF_ICON_CIDMID_HDL:
+
+                chars += printf("CIDMID(?)");
+                break;
+
+            case GTF_ICON_BBC_PTR:
+
+                chars += printf("BBC(?)");
+                break;
+
+            default:
+
+                chars += printf("HANDLE(?)");
+                break;
+            }
+#else
+#ifdef _TARGET_64BIT_
+                if ((tree->gtIntCon.gtIconVal & 0xFFFFFFFF00000000LL) != 0)
+                {
+                    chars += printf("HANDLE(0x%llx)", dspPtr(tree->gtIntCon.gtIconVal));
+                }
+                else
+#endif
+                {
+                    chars += printf("HANDLE(0x%0x)", dspPtr(tree->gtIntCon.gtIconVal));
+                }
+#endif
+            }
+            else
+            {
+                if (tree->TypeGet() == TYP_REF)
+                {
+                    assert(tree->gtIntCon.gtIconVal == 0);
+                    chars += printf("null");
+                }
+#ifdef _TARGET_64BIT_
+                else if ((tree->gtIntCon.gtIconVal & 0xFFFFFFFF00000000LL) != 0)
+                {
+                    chars += printf("0x%llx", tree->gtIntCon.gtIconVal);
+                }
+                else
+#endif
+                {
+                    chars += printf("%ld(0x%x)", tree->gtIntCon.gtIconVal, tree->gtIntCon.gtIconVal);
+                }
+            }
+            break;
+
+        case GT_CNS_LNG:
+
+            chars += printf("CONST(LONG)");
+            break;
+
+        case GT_CNS_DBL:
+
+            chars += printf("CONST(DOUBLE)");
+            break;
+
+        case GT_CNS_STR:
+
+            chars += printf("CONST(STR)");
+            break;
+
+        case GT_JMP:
+
+        {
+            const char* methodName;
+            const char* className;
+
+            methodName = comp->eeGetMethodName((CORINFO_METHOD_HANDLE)tree->gtVal.gtVal1, &className);
+            chars += printf(" %s.%s", className, methodName);
+        }
+        break;
+
+        case GT_NO_OP:
+        case GT_START_NONGC:
+        case GT_PROF_HOOK:
+        case GT_CATCH_ARG:
+        case GT_MEMORYBARRIER:
+        case GT_ARGPLACE:
+        case GT_PINVOKE_PROLOG:
+#ifndef LEGACY_BACKEND
+        case GT_JMPTABLE:
+#endif
+            // Do nothing.
+            break;
+
+        case GT_RET_EXPR:
+
+            chars += printf("t%d", tree->gtRetExpr.gtInlineCandidate->gtTreeID);
+            break;
+
+        case GT_PHYSREG:
+
+            chars += printf("%s", getRegName(tree->gtPhysReg.gtSrcReg, varTypeIsFloating(tree)));
+            break;
+
+        case GT_LABEL:
+
+            if (tree->gtLabel.gtLabBB)
+            {
+                chars += printf("BB%02u", tree->gtLabel.gtLabBB->bbNum);
+            }
+            else
+            {
+                chars += printf("BB?");
+            }
+            break;
+
+        case GT_IL_OFFSET:
+
+            if (tree->gtStmt.gtStmtILoffsx == BAD_IL_OFFSET)
+            {
+                chars += printf("?");
+            }
+            else
+            {
+                chars += printf("0x%x", jitGetILoffs(tree->gtStmt.gtStmtILoffsx));
+            }
+            break;
+
+        case GT_CLS_VAR:
+        case GT_CLS_VAR_ADDR:
+        default:
+
+            if (tree->OperIsLeaf())
+            {
+                chars += printf("<leaf nyi: %s>", tree->OpName(tree->OperGet()));
+            }
+
+            chars += printf("t%d", tree->gtTreeID);
+            break;
+    }
+
+    if (comp->dumpIRTypes)
+    {
+        chars += cTreeTypeIR(comp, tree);
+    }
+    if (comp->dumpIRValnums)
+    {
+        chars += cValNumIR(comp, tree);
+    }
+    if (hasSsa && comp->dumpIRSsa)
+    {
+        chars += cSsaNumIR(comp, tree);
+    }
+
+    return chars;
+}
+
+/*****************************************************************************
+ *
+ *  COMPlus_JitDumpIR support - dump out tree leaf node for linear IR form
+ */
+
+int dLeafIR(GenTree* tree)
+{
+    int chars = cLeafIR(JitTls::GetCompiler(), tree);
+
+    return chars;
+}
+
+/*****************************************************************************
+ *
+ *  COMPlus_JitDumpIR support - dump out tree indir node for linear IR form
+ */
+
+int cIndirIR(Compiler* comp, GenTree* tree)
+{
+    assert(tree->gtOper == GT_IND);
+
+    int      chars = 0;
+    GenTree* child;
+
+    chars += printf("[");
+    child = tree->GetChild(0);
+    chars += cLeafIR(comp, child);
+    chars += printf("]");
+
+    return chars;
+}
+
+/*****************************************************************************
+ *
+ *  COMPlus_JitDumpIR support - dump out tree indir node for linear IR form
+ */
+
+int dIndirIR(GenTree* tree)
+{
+    int chars = cIndirIR(JitTls::GetCompiler(), tree);
+
+    return chars;
+}
+
+/*****************************************************************************
+ *
+ *  COMPlus_JitDumpIR support - dump out tree operand node for linear IR form
+ */
+
+int cOperandIR(Compiler* comp, GenTree* operand)
+{
+    int chars = 0;
+
+    if (operand == nullptr)
+    {
+        chars += printf("t?");
+        return chars;
+    }
+
+    bool dumpTypes    = comp->dumpIRTypes;
+    bool dumpValnums  = comp->dumpIRValnums;
+    bool foldIndirs   = comp->dumpIRDataflow;
+    bool foldLeafs    = comp->dumpIRNoLeafs;
+    bool foldCommas   = comp->dumpIRDataflow;
+    bool dumpDataflow = comp->dumpIRDataflow;
+    bool foldLists    = comp->dumpIRNoLists;
+    bool dumpRegs     = comp->dumpIRRegs;
+
+    genTreeOps op = operand->OperGet();
+
+    if (foldLeafs && operand->OperIsLeaf())
+    {
+        if ((op == GT_ARGPLACE) && foldLists)
+        {
+            return chars;
+        }
+        chars += cLeafIR(comp, operand);
+    }
+    else if (dumpDataflow && (operand->OperIsAssignment() || (op == GT_STORE_LCL_VAR) || (op == GT_STORE_LCL_FLD)))
+    {
+        operand = operand->GetChild(0);
+        chars += cOperandIR(comp, operand);
+    }
+    else if ((op == GT_INDEX) && foldIndirs)
+    {
+        chars += printf("[t%d]", operand->gtTreeID);
+        if (dumpTypes)
+        {
+            chars += cTreeTypeIR(comp, operand);
+        }
+        if (dumpValnums)
+        {
+            chars += cValNumIR(comp, operand);
+        }
+    }
+    else if ((op == GT_IND) && foldIndirs)
+    {
+        chars += cIndirIR(comp, operand);
+        if (dumpTypes)
+        {
+            chars += cTreeTypeIR(comp, operand);
+        }
+        if (dumpValnums)
+        {
+            chars += cValNumIR(comp, operand);
+        }
+    }
+    else if ((op == GT_COMMA) && foldCommas)
+    {
+        operand = operand->GetChild(1);
+        chars += cOperandIR(comp, operand);
+    }
+    else if ((op == GT_LIST) && foldLists)
+    {
+        GenTree* list       = operand;
+        unsigned childCount = list->NumChildren();
+
+        operand          = list->GetChild(0);
+        int operandChars = cOperandIR(comp, operand);
+        chars += operandChars;
+        if (childCount > 1)
+        {
+            if (operandChars > 0)
+            {
+                chars += printf(", ");
+            }
+            operand = list->GetChild(1);
+            if (operand->gtOper == GT_LIST)
+            {
+                chars += cListIR(comp, operand);
+            }
+            else
+            {
+                chars += cOperandIR(comp, operand);
+            }
+        }
+    }
+    else
+    {
+        chars += printf("t%d", operand->gtTreeID);
+        if (dumpRegs)
+        {
+            regNumber regNum = operand->GetReg();
+            if (regNum != REG_NA)
+            {
+                chars += printf("(%s)", getRegName(regNum));
+            }
+        }
+        if (dumpTypes)
+        {
+            chars += cTreeTypeIR(comp, operand);
+        }
+        if (dumpValnums)
+        {
+            chars += cValNumIR(comp, operand);
+        }
+    }
+
+    return chars;
+}
+
+/*****************************************************************************
+ *
+ *  COMPlus_JitDumpIR support - dump out tree operand node for linear IR form
+ */
+
+int dOperandIR(GenTree* operand)
+{
+    int chars = cOperandIR(JitTls::GetCompiler(), operand);
+
+    return chars;
+}
+
+/*****************************************************************************
+ *
+ *  COMPlus_JitDumpIR support - dump out tree list of nodes for linear IR form
+ */
+
+int cListIR(Compiler* comp, GenTree* list)
+{
+    int chars = 0;
+    int operandChars;
+
+    assert(list->gtOper == GT_LIST);
+
+    GenTree* child;
+    unsigned childCount;
+
+    childCount = list->NumChildren();
+    assert(childCount == 1 || childCount == 2);
+
+    operandChars = 0;
+    for (unsigned childIndex = 0; childIndex < childCount; childIndex++)
+    {
+        if ((childIndex > 0) && (operandChars > 0))
+        {
+            chars += printf(", ");
+        }
+
+        child        = list->GetChild(childIndex);
+        operandChars = cOperandIR(comp, child);
+        chars += operandChars;
+    }
+
+    return chars;
+}
+
+/*****************************************************************************
+ *
+ *  COMPlus_JitDumpIR support - dump out tree list of nodes for linear IR form
+ */
+
+int dListIR(GenTree* list)
+{
+    int chars = cListIR(JitTls::GetCompiler(), list);
+
+    return chars;
+}
+
+/*****************************************************************************
+ *
+ *  COMPlus_JitDumpIR support - dump out tree dependencies based on comma nodes for linear IR form
+ */
+
+int cDependsIR(Compiler* comp, GenTree* comma, bool* first)
+{
+    int chars = 0;
+
+    assert(comma->gtOper == GT_COMMA);
+
+    GenTree* child;
+
+    child = comma->GetChild(0);
+    if (child->gtOper == GT_COMMA)
+    {
+        chars += cDependsIR(comp, child, first);
+    }
+    else
+    {
+        if (!(*first))
+        {
+            chars += printf(", ");
+        }
+        chars += printf("t%d", child->gtTreeID);
+        *first = false;
+    }
+
+    child = comma->GetChild(1);
+    if (child->gtOper == GT_COMMA)
+    {
+        chars += cDependsIR(comp, child, first);
+    }
+
+    return chars;
+}
+
+/*****************************************************************************
+ *
+ *  COMPlus_JitDumpIR support - dump out tree dependencies based on comma nodes for linear IR form
+ */
+
+int dDependsIR(GenTree* comma)
+{
+    int  chars = 0;
+    bool first = TRUE;
+
+    chars = cDependsIR(JitTls::GetCompiler(), comma, &first);
+
+    return chars;
+}
+
+/*****************************************************************************
+ *
+ *  COMPlus_JitDumpIR support - dump out tree node in linear IR form
+ */
+
+void cNodeIR(Compiler* comp, GenTree* tree)
+{
+    bool       foldLeafs    = comp->dumpIRNoLeafs;
+    bool       foldIndirs   = comp->dumpIRDataflow;
+    bool       foldLists    = comp->dumpIRNoLists;
+    bool       dataflowView = comp->dumpIRDataflow;
+    bool       dumpTypes    = comp->dumpIRTypes;
+    bool       dumpValnums  = comp->dumpIRValnums;
+    bool       noStmts      = comp->dumpIRNoStmts;
+    genTreeOps op           = tree->OperGet();
+    unsigned   childCount   = tree->NumChildren();
+    GenTree*   child;
+
+    // What are we skipping?
+
+    if (tree->OperIsLeaf())
+    {
+        if (foldLeafs)
+        {
+            return;
+        }
+    }
+    else if (op == GT_IND)
+    {
+        if (foldIndirs)
+        {
+            return;
+        }
+    }
+    else if (op == GT_LIST)
+    {
+        if (foldLists)
+        {
+            return;
+        }
+    }
+    else if (op == GT_STMT)
+    {
+        if (noStmts)
+        {
+            if (dataflowView)
+            {
+                child = tree->GetChild(0);
+                if (child->gtOper != GT_COMMA)
+                {
+                    return;
+                }
+            }
+            else
+            {
+                return;
+            }
+        }
+    }
+    else if (op == GT_COMMA)
+    {
+        if (dataflowView)
+        {
+            return;
+        }
+    }
+
+    bool nodeIsValue = tree->IsValue();
+
+    // Dump tree id or dataflow destination.
+
+    int chars = 0;
+
+    // if (comp->compRationalIRForm)
+    // {
+    //   chars += printf("R");
+    // }
+
+    chars += printf("    ");
+    if (dataflowView && tree->OperIsAssignment())
+    {
+        child = tree->GetChild(0);
+        chars += cOperandIR(comp, child);
+    }
+    else if (dataflowView && ((op == GT_STORE_LCL_VAR) || (op == GT_STORE_LCL_FLD)))
+    {
+        chars += cLeafIR(comp, tree);
+    }
+    else if (dataflowView && (op == GT_STOREIND))
+    {
+        child = tree->GetChild(0);
+        chars += printf("[");
+        chars += cOperandIR(comp, child);
+        chars += printf("]");
+        if (dumpTypes)
+        {
+            chars += cTreeTypeIR(comp, tree);
+        }
+        if (dumpValnums)
+        {
+            chars += cValNumIR(comp, tree);
+        }
+    }
+    else if (nodeIsValue)
+    {
+        chars += printf("t%d", tree->gtTreeID);
+        if (comp->dumpIRRegs)
+        {
+            regNumber regNum = tree->GetReg();
+            if (regNum != REG_NA)
+            {
+                chars += printf("(%s)", getRegName(regNum));
+            }
+        }
+        if (dumpTypes)
+        {
+            chars += cTreeTypeIR(comp, tree);
+        }
+        if (dumpValnums)
+        {
+            chars += cValNumIR(comp, tree);
+        }
+    }
+
+    // Dump opcode and tree ID if need in dataflow view.
+
+    chars += dTabStopIR(chars, COLUMN_OPCODE);
+    const char* opName = tree->OpName(op);
+    chars += printf(" %c %s", nodeIsValue ? '=' : ' ', opName);
+
+    if (dataflowView)
+    {
+        if (tree->OperIsAssignment() || (op == GT_STORE_LCL_VAR) || (op == GT_STORE_LCL_FLD) || (op == GT_STOREIND))
+        {
+            chars += printf("(t%d)", tree->gtTreeID);
+        }
+    }
+
+    // Dump modifiers for opcodes to help with readability
+
+    if (op == GT_CALL)
+    {
+        GenTreeCall* call = tree->AsCall();
+
+        if (call->gtCallType == CT_USER_FUNC)
+        {
+            if (call->IsVirtualStub())
+            {
+                chars += printf(":VS");
+            }
+            else if (call->IsVirtualVtable())
+            {
+                chars += printf(":VT");
+            }
+            else if (call->IsVirtual())
+            {
+                chars += printf(":V");
+            }
+        }
+        else if (call->gtCallType == CT_HELPER)
+        {
+            chars += printf(":H");
+        }
+        else if (call->gtCallType == CT_INDIRECT)
+        {
+            chars += printf(":I");
+        }
+        else if (call->IsUnmanaged())
+        {
+            chars += printf(":U");
+        }
+        else
+        {
+            if (call->IsVirtualStub())
+            {
+                chars += printf(":XVS");
+            }
+            else if (call->IsVirtualVtable())
+            {
+                chars += printf(":XVT");
+            }
+            else
+            {
+                chars += printf(":?");
+            }
+        }
+
+        if (call->IsUnmanaged())
+        {
+            if (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL)
+            {
+                chars += printf(":T");
+            }
+        }
+
+        if (tree->gtFlags & GTF_CALL_NULLCHECK)
+        {
+            chars += printf(":N");
+        }
+    }
+    else if (op == GT_INTRINSIC)
+    {
+        CorInfoIntrinsics intrin = tree->gtIntrinsic.gtIntrinsicId;
+
+        chars += printf(":");
+        switch (intrin)
+        {
+            case CORINFO_INTRINSIC_Sin:
+                chars += printf("Sin");
+                break;
+            case CORINFO_INTRINSIC_Cos:
+                chars += printf("Cos");
+                break;
+            case CORINFO_INTRINSIC_Sqrt:
+                chars += printf("Sqrt");
+                break;
+            case CORINFO_INTRINSIC_Cosh:
+                chars += printf("Cosh");
+                break;
+            case CORINFO_INTRINSIC_Sinh:
+                chars += printf("Sinh");
+                break;
+            case CORINFO_INTRINSIC_Tan:
+                chars += printf("Tan");
+                break;
+            case CORINFO_INTRINSIC_Tanh:
+                chars += printf("Tanh");
+                break;
+            case CORINFO_INTRINSIC_Asin:
+                chars += printf("Asin");
+                break;
+            case CORINFO_INTRINSIC_Acos:
+                chars += printf("Acos");
+                break;
+            case CORINFO_INTRINSIC_Atan:
+                chars += printf("Atan");
+                break;
+            case CORINFO_INTRINSIC_Atan2:
+                chars += printf("Atan2");
+                break;
+            case CORINFO_INTRINSIC_Log10:
+                chars += printf("Log10");
+                break;
+            case CORINFO_INTRINSIC_Pow:
+                chars += printf("Pow");
+                break;
+            case CORINFO_INTRINSIC_Exp:
+                chars += printf("Exp");
+                break;
+            case CORINFO_INTRINSIC_Ceiling:
+                chars += printf("Ceiling");
+                break;
+            case CORINFO_INTRINSIC_Floor:
+                chars += printf("Floor");
+                break;
+            default:
+                chars += printf("unknown(%d)", intrin);
+                break;
+        }
+    }
+
+    // Dump operands.
+
+    chars += dTabStopIR(chars, COLUMN_OPERANDS);
+
+    // Dump operator specific fields as operands
+
+    switch (op)
+    {
+        default:
+            break;
+        case GT_FIELD:
+
+        {
+            const char* className = nullptr;
+            const char* fieldName = comp->eeGetFieldName(tree->gtField.gtFldHnd, &className);
+
+            chars += printf(" %s.%s", className, fieldName);
+        }
+        break;
+
+        case GT_CALL:
+
+            if (tree->gtCall.gtCallType != CT_INDIRECT)
+            {
+                const char* methodName;
+                const char* className;
+
+                methodName = comp->eeGetMethodName(tree->gtCall.gtCallMethHnd, &className);
+
+                chars += printf(" %s.%s", className, methodName);
+            }
+            break;
+
+        case GT_STORE_LCL_VAR:
+        case GT_STORE_LCL_FLD:
+
+            if (!dataflowView)
+            {
+                chars += printf(" ");
+                chars += cLeafIR(comp, tree);
+            }
+            break;
+
+        case GT_STORE_CLS_VAR:
+
+            chars += printf(" ???");
+            break;
+
+        case GT_LEA:
+
+            GenTreeAddrMode* lea    = tree->AsAddrMode();
+            GenTree*         base   = lea->Base();
+            GenTree*         index  = lea->Index();
+            unsigned         scale  = lea->gtScale;
+            unsigned         offset = lea->gtOffset;
+
+            chars += printf(" [");
+            if (base != nullptr)
+            {
+                chars += cOperandIR(comp, base);
+            }
+            if (index != nullptr)
+            {
+                if (base != nullptr)
+                {
+                    chars += printf("+");
+                }
+                chars += cOperandIR(comp, index);
+                if (scale > 1)
+                {
+                    chars += printf("*%u", scale);
+                }
+            }
+            if ((offset != 0) || ((base == nullptr) && (index == nullptr)))
+            {
+                if ((base != nullptr) || (index != nullptr))
+                {
+                    chars += printf("+");
+                }
+                chars += printf("%u", offset);
+            }
+            chars += printf("]");
+            break;
+    }
+
+    // Dump operands.
+
+    if (tree->OperIsLeaf())
+    {
+        chars += printf(" ");
+        chars += cLeafIR(comp, tree);
+    }
+    else if (op == GT_LEA)
+    {
+        // Already dumped it above.
+    }
+    else if (op == GT_PHI)
+    {
+        if (tree->gtOp.gtOp1 != nullptr)
+        {
+            bool first = true;
+            for (GenTreeArgList* args = tree->gtOp.gtOp1->AsArgList(); args != nullptr; args = args->Rest())
+            {
+                child = args->Current();
+                if (!first)
+                {
+                    chars += printf(",");
+                }
+                first = false;
+                chars += printf(" ");
+                chars += cOperandIR(comp, child);
+            }
+        }
+    }
+    else
+    {
+        bool hasComma     = false;
+        bool first        = true;
+        int  operandChars = 0;
+        for (unsigned childIndex = 0; childIndex < childCount; childIndex++)
+        {
+            child = tree->GetChild(childIndex);
+            if (child == nullptr)
+            {
+                continue;
+            }
+
+            if (child->gtOper == GT_COMMA)
+            {
+                hasComma = true;
+            }
+
+            if (dataflowView && (childIndex == 0))
+            {
+                if ((op == GT_ASG) || (op == GT_STOREIND))
+                {
+                    continue;
+                }
+            }
+
+            if (!first)
+            {
+                chars += printf(",");
+            }
+
+            bool isList = (child->gtOper == GT_LIST);
+            if (!isList || !foldLists)
+            {
+                if (foldLeafs && (child->gtOper == GT_ARGPLACE))
+                {
+                    continue;
+                }
+                chars += printf(" ");
+                operandChars = cOperandIR(comp, child);
+                chars += operandChars;
+                if (operandChars > 0)
+                {
+                    first = false;
+                }
+            }
+            else
+            {
+                assert(isList);
+                chars += printf(" ");
+                operandChars = cOperandIR(comp, child);
+                chars += operandChars;
+                if (operandChars > 0)
+                {
+                    first = false;
+                }
+            }
+        }
+
+        if (dataflowView && hasComma)
+        {
+            chars += printf(", DEPS(");
+            first = true;
+            for (unsigned childIndex = 0; childIndex < childCount; childIndex++)
+            {
+                child = tree->GetChild(childIndex);
+                if (child->gtOper == GT_COMMA)
+                {
+                    chars += cDependsIR(comp, child, &first);
+                }
+            }
+            chars += printf(")");
+        }
+    }
+
+    // Dump kinds, flags, costs
+
+    if (comp->dumpIRKinds || comp->dumpIRFlags || comp->dumpIRCosts)
+    {
+        chars += dTabStopIR(chars, COLUMN_KINDS);
+        chars += printf(";");
+        if (comp->dumpIRKinds)
+        {
+            chars += printf(" ");
+            chars += cTreeKindsIR(comp, tree);
+        }
+        if (comp->dumpIRFlags && (tree->gtFlags != 0))
+        {
+            if (comp->dumpIRKinds)
+            {
+                chars += dTabStopIR(chars, COLUMN_FLAGS);
+            }
+            else
+            {
+                chars += printf(" ");
+            }
+            chars += cTreeFlagsIR(comp, tree);
+        }
+        if (comp->dumpIRCosts && (tree->gtCostsInitialized))
+        {
+            chars += printf(" CostEx=%d, CostSz=%d", tree->GetCostEx(), tree->GetCostSz());
+        }
+    }
+
+    printf("\n");
+}
+
+/*****************************************************************************
+ *
+ *  COMPlus_JitDumpIR support - dump out tree in linear IR form
+ */
+
+void cTreeIR(Compiler* comp, GenTree* tree)
+{
+    bool       foldLeafs    = comp->dumpIRNoLeafs;
+    bool       foldIndirs   = comp->dumpIRDataflow;
+    bool       foldLists    = comp->dumpIRNoLists;
+    bool       dataflowView = comp->dumpIRDataflow;
+    bool       dumpTypes    = comp->dumpIRTypes;
+    bool       dumpValnums  = comp->dumpIRValnums;
+    bool       noStmts      = comp->dumpIRNoStmts;
+    genTreeOps op           = tree->OperGet();
+    unsigned   childCount   = tree->NumChildren();
+    GenTree*   child;
+
+    // Recurse and dump trees that this node depends on.
+
+    if (tree->OperIsLeaf())
+    {
+    }
+    else if (tree->OperIsBinary() && tree->IsReverseOp())
+    {
+        child = tree->GetChild(1);
+        cTreeIR(comp, child);
+        child = tree->GetChild(0);
+        cTreeIR(comp, child);
+    }
+    else if (op == GT_PHI)
+    {
+        // Don't recurse.
+    }
+    else
+    {
+        assert(!tree->IsReverseOp());
+        for (unsigned childIndex = 0; childIndex < childCount; childIndex++)
+        {
+            child = tree->GetChild(childIndex);
+            if (child != nullptr)
+            {
+                cTreeIR(comp, child);
+            }
+        }
+    }
+
+    cNodeIR(comp, tree);
+}
+
+/*****************************************************************************
+ *
+ *  COMPlus_JitDumpIR support - dump out tree in linear IR form
+ */
+
+void dTreeIR(GenTree* tree)
+{
+    cTreeIR(JitTls::GetCompiler(), tree);
+}
+
+#endif // DEBUG
+
+#if VARSET_COUNTOPS
+// static
+BitSetSupport::BitSetOpCounter Compiler::m_varsetOpCounter("VarSetOpCounts.log");
+#endif
+#if ALLVARSET_COUNTOPS
+// static
+BitSetSupport::BitSetOpCounter Compiler::m_allvarsetOpCounter("AllVarSetOpCounts.log");
+#endif
+
+// static
+HelperCallProperties Compiler::s_helperCallProperties;
+
+/*****************************************************************************/
+/*****************************************************************************/
diff --git a/src/jit/compiler.h b/src/jit/compiler.h
new file mode 100644
index 0000000000..05047c5ecb
--- /dev/null
+++ b/src/jit/compiler.h
@@ -0,0 +1,9301 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                           Compiler                                        XX
+XX                                                                           XX
+XX  Represents the method data we are currently JIT-compiling.               XX
+XX  An instance of this class is created for every method we JIT.            XX
+XX  This contains all the info needed for the method. So allocating a        XX
+XX  a new instance per method makes it thread-safe.                          XX
+XX  It should be used to do all the memory management for the compiler run.  XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************/
+#ifndef _COMPILER_H_
+#define _COMPILER_H_
+/*****************************************************************************/
+
+#include "jit.h"
+#include "opcode.h"
+#include "varset.h"
+#include "gentree.h"
+#include "lir.h"
+#include "block.h"
+#include "inline.h"
+#include "jiteh.h"
+#include "instr.h"
+#include "regalloc.h"
+#include "sm.h"
+#include "simplerhash.h"
+#include "cycletimer.h"
+#include "blockset.h"
+#include "jitstd.h"
+#include "arraystack.h"
+#include "hashbv.h"
+#include "fp.h"
+#include "expandarray.h"
+#include "tinyarray.h"
+#include "valuenum.h"
+#include "reglist.h"
+#include "jittelemetry.h"
+#ifdef LATE_DISASM
+#include "disasm.h"
+#endif
+
+#include "codegeninterface.h"
+#include "regset.h"
+#include "jitgcinfo.h"
+
+#if DUMP_GC_TABLES && defined(JIT32_GCENCODER)
+#include "gcdump.h"
+#endif
+
+#include "emit.h"
+
+#include "simd.h"
+
+// This is only used locally in the JIT to indicate that
+// a verification block should be inserted
+#define SEH_VERIFICATION_EXCEPTION 0xe0564552 // VER
+
+/*****************************************************************************
+ *                  Forward declarations
+ */
+
+struct InfoHdr;            // defined in GCInfo.h
+struct escapeMapping_t;    // defined in flowgraph.cpp
+class emitter;             // defined in emit.h
+struct ShadowParamVarInfo; // defined in GSChecks.cpp
+struct InitVarDscInfo;     // defined in register_arg_convention.h
+class FgStack;             // defined in flowgraph.cpp
+#if FEATURE_STACK_FP_X87
+struct FlatFPStateX87; // defined in fp.h
+#endif
+#if FEATURE_ANYCSE
+class CSE_DataFlow; // defined in OptCSE.cpp
+#endif
+#ifdef DEBUG
+struct IndentStack;
+#endif
+
+// The following are defined in this file, Compiler.h
+
+class Compiler;
+
+/*****************************************************************************
+ *                  Unwind info
+ */
+
+#include "unwind.h"
+
+/*****************************************************************************/
+
+//
+// Declare global operator new overloads that use the Compiler::compGetMem() function for allocation.
+//
+
+// Or the more-general IAllocator interface.
+void* __cdecl operator new(size_t n, IAllocator* alloc);
+void* __cdecl operator new[](size_t n, IAllocator* alloc);
+
+// I wanted to make the second argument optional, with default = CMK_Unknown, but that
+// caused these to be ambiguous with the global placement new operators.
+void* __cdecl operator new(size_t n, Compiler* context, CompMemKind cmk);
+void* __cdecl operator new[](size_t n, Compiler* context, CompMemKind cmk);
+void* __cdecl operator new(size_t n, void* p, const jitstd::placement_t& syntax_difference);
+
+// Requires the definitions of "operator new" so including "LoopCloning.h" after the definitions.
+#include "loopcloning.h"
+
+/*****************************************************************************/
+
+/* This is included here and not earlier as it needs the definition of "CSE"
+ * which is defined in the section above */
+
+/*****************************************************************************/
+
+unsigned genLog2(unsigned value);
+unsigned genLog2(unsigned __int64 value);
+
+var_types genActualType(var_types type);
+var_types genUnsignedType(var_types type);
+var_types genSignedType(var_types type);
+
+unsigned ReinterpretHexAsDecimal(unsigned);
+
+/*****************************************************************************/
+
+#ifdef FEATURE_SIMD
+#ifdef FEATURE_AVX_SUPPORT
+const unsigned TEMP_MAX_SIZE = YMM_REGSIZE_BYTES;
+#else  // !FEATURE_AVX_SUPPORT
+const unsigned TEMP_MAX_SIZE = XMM_REGSIZE_BYTES;
+#endif // !FEATURE_AVX_SUPPORT
+#else  // !FEATURE_SIMD
+const unsigned TEMP_MAX_SIZE = sizeof(double);
+#endif // !FEATURE_SIMD
+const unsigned TEMP_SLOT_COUNT = (TEMP_MAX_SIZE / sizeof(int));
+
+const unsigned FLG_CCTOR = (CORINFO_FLG_CONSTRUCTOR | CORINFO_FLG_STATIC);
+
+#ifdef DEBUG
+const int BAD_STK_OFFS = 0xBAADF00D; // for LclVarDsc::lvStkOffs
+#endif
+
+// The following holds the Local var info (scope information)
+typedef const char* VarName; // Actual ASCII string
+struct VarScopeDsc
+{
+    IL_OFFSET vsdLifeBeg; // instr offset of beg of life
+    IL_OFFSET vsdLifeEnd; // instr offset of end of life
+    unsigned  vsdVarNum;  // (remapped) LclVarDsc number
+
+#ifdef DEBUG
+    VarName vsdName; // name of the var
+#endif
+
+    unsigned vsdLVnum; // 'which' in eeGetLVinfo().
+                       // Also, it is the index of this entry in the info.compVarScopes array,
+                       // which is useful since the array is also accessed via the
+                       // compEnterScopeList and compExitScopeList sorted arrays.
+};
+
+/*****************************************************************************
+ *
+ *  The following holds the local variable counts and the descriptor table.
+ */
+
+// This is the location of a definition.
+struct DefLoc
+{
+    BasicBlock* m_blk;
+    GenTreePtr  m_tree;
+
+    DefLoc() : m_blk(nullptr), m_tree(nullptr)
+    {
+    }
+};
+
+// This class encapsulates all info about a local variable that may vary for different SSA names
+// in the family.
+class LclSsaVarDsc
+{
+public:
+    ValueNumPair m_vnPair;
+    DefLoc       m_defLoc;
+
+    LclSsaVarDsc()
+    {
+    }
+};
+
+typedef ExpandArray<LclSsaVarDsc> PerSsaArray;
+
+class LclVarDsc
+{
+public:
+    // The constructor. Most things can just be zero'ed.
+    LclVarDsc(Compiler* comp);
+
+    // note this only packs because var_types is a typedef of unsigned char
+    var_types lvType : 5; // TYP_INT/LONG/FLOAT/DOUBLE/REF
+
+    unsigned char lvIsParam : 1;           // is this a parameter?
+    unsigned char lvIsRegArg : 1;          // is this a register argument?
+    unsigned char lvFramePointerBased : 1; // 0 = off of REG_SPBASE (e.g., ESP), 1 = off of REG_FPBASE (e.g., EBP)
+
+    unsigned char lvStructGcCount : 3; // if struct, how many GC pointer (stop counting at 7). The only use of values >1
+                                       // is to help determine whether to use block init in the prolog.
+    unsigned char lvOnFrame : 1;       // (part of) the variable lives on the frame
+    unsigned char lvDependReg : 1;     // did the predictor depend upon this being enregistered
+    unsigned char lvRegister : 1;      // assigned to live in a register? For RyuJIT backend, this is only set if the
+                                       // variable is in the same register for the entire function.
+    unsigned char lvTracked : 1;       // is this a tracked variable?
+    bool          lvTrackedNonStruct()
+    {
+        return lvTracked && lvType != TYP_STRUCT;
+    }
+    unsigned char lvPinned : 1; // is this a pinned variable?
+
+    unsigned char lvMustInit : 1;    // must be initialized
+    unsigned char lvAddrExposed : 1; // The address of this variable is "exposed" -- passed as an argument, stored in a
+                                     // global location, etc.
+                                     // We cannot reason reliably about the value of the variable.
+    unsigned char lvDoNotEnregister : 1; // Do not enregister this variable.
+    unsigned char lvFieldAccessed : 1;   // The var is a struct local, and a field of the variable is accessed.  Affects
+                                         // struct promotion.
+
+#ifdef DEBUG
+    // These further document the reasons for setting "lvDoNotEnregister".  (Note that "lvAddrExposed" is one of the
+    // reasons;
+    // also, lvType == TYP_STRUCT prevents enregistration.  At least one of the reasons should be true.
+    unsigned char lvVMNeedsStackAddr : 1; // The VM may have access to a stack-relative address of the variable, and
+                                          // read/write its value.
+    unsigned char lvLiveInOutOfHndlr : 1; // The variable was live in or out of an exception handler, and this required
+                                          // the variable to be
+                                          // in the stack (at least at those boundaries.)
+    unsigned char lvLclFieldExpr : 1;     // The variable is not a struct, but was accessed like one (e.g., reading a
+                                          // particular byte from an int).
+    unsigned char lvLclBlockOpAddr : 1;   // The variable was written to via a block operation that took its address.
+    unsigned char lvLiveAcrossUCall : 1;  // The variable is live across an unmanaged call.
+#endif
+    unsigned char lvIsCSE : 1;       // Indicates if this LclVar is a CSE variable.
+    unsigned char lvRefAssign : 1;   // involved in pointer assignment
+    unsigned char lvHasLdAddrOp : 1; // has ldloca or ldarga opcode on this local.
+    unsigned char lvStackByref : 1;  // This is a compiler temporary of TYP_BYREF that is known to point into our local
+                                     // stack frame.
+
+    unsigned char lvArgWrite : 1; // variable is a parameter and STARG was used on it
+    unsigned char lvIsTemp : 1;   // Short-lifetime compiler temp
+#if OPT_BOOL_OPS
+    unsigned char lvIsBoolean : 1; // set if variable is boolean
+#endif
+    unsigned char lvRngOptDone : 1; // considered for range check opt?
+    unsigned char lvLoopInc : 1;    // incremented in the loop?
+    unsigned char lvLoopAsg : 1;    // reassigned  in the loop (other than a monotonic inc/dec for the index var)?
+    unsigned char lvArrIndx : 1;    // used as an array index?
+    unsigned char lvArrIndxOff : 1; // used as an array index with an offset?
+    unsigned char lvArrIndxDom : 1; // index dominates loop exit
+#if ASSERTION_PROP
+    unsigned char lvSingleDef : 1;    // variable has a single def
+    unsigned char lvDisqualify : 1;   // variable is no longer OK for add copy optimization
+    unsigned char lvVolatileHint : 1; // hint for AssertionProp
+#endif
+#if FANCY_ARRAY_OPT
+    unsigned char lvAssignOne : 1; // assigned at least  once?
+    unsigned char lvAssignTwo : 1; // assigned at least twice?
+#endif
+
+    unsigned char lvSpilled : 1; // enregistered variable was spilled
+#ifndef _TARGET_64BIT_
+    unsigned char lvStructDoubleAlign : 1; // Must we double align this struct?
+#endif                                     // !_TARGET_64BIT_
+#ifdef _TARGET_64BIT_
+    unsigned char lvQuirkToLong : 1; // Quirk to allocate this LclVar as a 64-bit long
+#endif
+#ifdef DEBUG
+    unsigned char lvKeepType : 1;       // Don't change the type of this variable
+    unsigned char lvNoLclFldStress : 1; // Can't apply local field stress on this one
+#endif
+    unsigned char lvIsPtr : 1; // Might this be used in an address computation? (used by buffer overflow security
+                               // checks)
+    unsigned char lvIsUnsafeBuffer : 1; // Does this contain an unsafe buffer requiring buffer overflow security checks?
+    unsigned char lvPromoted : 1; // True when this local is a promoted struct, a normed struct, or a "split" long on a
+                                  // 32-bit target.
+    unsigned char lvIsStructField : 1;          // Is this local var a field of a promoted struct local?
+    unsigned char lvContainsFloatingFields : 1; // Does this struct contains floating point fields?
+    unsigned char lvOverlappingFields : 1;      // True when we have a struct with possibly overlapping fields
+    unsigned char lvContainsHoles : 1;          // True when we have a promoted struct that contains holes
+    unsigned char lvCustomLayout : 1;           // True when this struct has "CustomLayout"
+
+    unsigned char lvIsMultiRegArg : 1; // true if this is a multireg LclVar struct used in an argument context
+    unsigned char lvIsMultiRegRet : 1; // true if this is a multireg LclVar struct assigned from a multireg call
+
+#ifdef FEATURE_HFA
+    unsigned char _lvIsHfa : 1;          // Is this a struct variable who's class handle is an HFA type
+    unsigned char _lvIsHfaRegArg : 1;    // Is this a HFA argument variable?    // TODO-CLEANUP: Remove this and replace
+                                         // with (lvIsRegArg && lvIsHfa())
+    unsigned char _lvHfaTypeIsFloat : 1; // Is the HFA type float or double?
+#endif                                   // FEATURE_HFA
+
+#ifdef DEBUG
+    // TODO-Cleanup: See the note on lvSize() - this flag is only in use by asserts that are checking for struct
+    // types, and is needed because of cases where TYP_STRUCT is bashed to an integral type.
+    // Consider cleaning this up so this workaround is not required.
+    unsigned char lvUnusedStruct : 1; // All references to this promoted struct are through its field locals.
+                                      // I.e. there is no longer any reference to the struct directly.
+                                      // In this case we can simply remove this struct local.
+#endif
+#ifndef LEGACY_BACKEND
+    unsigned char lvLRACandidate : 1; // Tracked for linear scan register allocation purposes
+#endif                                // !LEGACY_BACKEND
+
+#ifdef FEATURE_SIMD
+    // Note that both SIMD vector args and locals are marked as lvSIMDType = true, but the
+    // type of an arg node is TYP_BYREF and a local node is TYP_SIMD*.
+    unsigned char lvSIMDType : 1;            // This is a SIMD struct
+    unsigned char lvUsedInSIMDIntrinsic : 1; // This tells lclvar is used for simd intrinsic
+#endif                                       // FEATURE_SIMD
+    unsigned char lvRegStruct : 1;           // This is a reg-sized non-field-addressed struct.
+
+    union {
+        unsigned lvFieldLclStart; // The index of the local var representing the first field in the promoted struct
+                                  // local.
+        unsigned lvParentLcl; // The index of the local var representing the parent (i.e. the promoted struct local).
+                              // Valid on promoted struct local fields.
+#ifdef FEATURE_SIMD
+        var_types lvBaseType; // The base type of a SIMD local var.  Valid on TYP_SIMD locals.
+#endif                        // FEATURE_SIMD
+    };
+
+    unsigned char lvFieldCnt; //  Number of fields in the promoted VarDsc.
+    unsigned char lvFldOffset;
+    unsigned char lvFldOrdinal;
+
+#if FEATURE_MULTIREG_ARGS
+    regNumber lvRegNumForSlot(unsigned slotNum)
+    {
+        if (slotNum == 0)
+        {
+            return lvArgReg;
+        }
+        else if (slotNum == 1)
+        {
+            return lvOtherArgReg;
+        }
+        else
+        {
+            assert(false && "Invalid slotNum!");
+        }
+
+        unreached();
+    }
+#endif // FEATURE_MULTIREG_ARGS
+
+    bool lvIsHfa() const
+    {
+#ifdef FEATURE_HFA
+        return _lvIsHfa;
+#else
+        return false;
+#endif
+    }
+
+    void lvSetIsHfa()
+    {
+#ifdef FEATURE_HFA
+        _lvIsHfa = true;
+#endif
+    }
+
+    bool lvIsHfaRegArg() const
+    {
+#ifdef FEATURE_HFA
+        return _lvIsHfaRegArg;
+#else
+        return false;
+#endif
+    }
+
+    void lvSetIsHfaRegArg()
+    {
+#ifdef FEATURE_HFA
+        _lvIsHfaRegArg = true;
+#endif
+    }
+
+    bool lvHfaTypeIsFloat() const
+    {
+#ifdef FEATURE_HFA
+        return _lvHfaTypeIsFloat;
+#else
+        return false;
+#endif
+    }
+
+    void lvSetHfaTypeIsFloat(bool value)
+    {
+#ifdef FEATURE_HFA
+        _lvHfaTypeIsFloat = value;
+#endif
+    }
+
+    // on Arm64 - Returns 1-4 indicating the number of register slots used by the HFA
+    // on Arm32 - Returns the total number of single FP register slots used by the HFA, max is 8
+    //
+    unsigned lvHfaSlots() const
+    {
+        assert(lvIsHfa());
+        assert(lvType == TYP_STRUCT);
+#ifdef _TARGET_ARM_
+        return lvExactSize / sizeof(float);
+#else  //  _TARGET_ARM64_
+        if (lvHfaTypeIsFloat())
+        {
+            return lvExactSize / sizeof(float);
+        }
+        else
+        {
+            return lvExactSize / sizeof(double);
+        }
+#endif //  _TARGET_ARM64_
+    }
+
+    // lvIsMultiRegArgOrRet()
+    //     returns true if this is a multireg LclVar struct used in an argument context
+    //               or if this is a multireg LclVar struct assigned from a multireg call
+    bool lvIsMultiRegArgOrRet()
+    {
+        return lvIsMultiRegArg || lvIsMultiRegRet;
+    }
+
+private:
+    regNumberSmall _lvRegNum; // Used to store the register this variable is in (or, the low register of a
+                              // register pair). For LEGACY_BACKEND, this is only set if lvRegister is
+                              // non-zero. For non-LEGACY_BACKEND, it is set during codegen any time the
+                              // variable is enregistered (in non-LEGACY_BACKEND, lvRegister is only set
+                              // to non-zero if the variable gets the same register assignment for its entire
+                              // lifetime).
+#if !defined(_TARGET_64BIT_)
+    regNumberSmall _lvOtherReg; // Used for "upper half" of long var.
+#endif                          // !defined(_TARGET_64BIT_)
+
+    regNumberSmall _lvArgReg; // The register in which this argument is passed.
+
+#if FEATURE_MULTIREG_ARGS
+    regNumberSmall _lvOtherArgReg; // Used for the second part of the struct passed in a register.
+                                   // Note this is defined but not used by ARM32
+#endif                             // FEATURE_MULTIREG_ARGS
+
+#ifndef LEGACY_BACKEND
+    union {
+        regNumberSmall _lvArgInitReg;     // the register      into which the argument is moved at entry
+        regPairNoSmall _lvArgInitRegPair; // the register pair into which the argument is moved at entry
+    };
+#endif // !LEGACY_BACKEND
+
+public:
+    // The register number is stored in a small format (8 bits), but the getters return and the setters take
+    // a full-size (unsigned) format, to localize the casts here.
+
+    /////////////////////
+
+    __declspec(property(get = GetRegNum, put = SetRegNum)) regNumber lvRegNum;
+
+    regNumber GetRegNum() const
+    {
+        return (regNumber)_lvRegNum;
+    }
+
+    void SetRegNum(regNumber reg)
+    {
+        _lvRegNum = (regNumberSmall)reg;
+        assert(_lvRegNum == reg);
+    }
+
+/////////////////////
+
+#if defined(_TARGET_64BIT_)
+    __declspec(property(get = GetOtherReg, put = SetOtherReg)) regNumber lvOtherReg;
+
+    regNumber GetOtherReg() const
+    {
+        assert(!"shouldn't get here"); // can't use "unreached();" because it's NORETURN, which causes C4072
+                                       // "unreachable code" warnings
+        return REG_NA;
+    }
+
+    void SetOtherReg(regNumber reg)
+    {
+        assert(!"shouldn't get here"); // can't use "unreached();" because it's NORETURN, which causes C4072
+                                       // "unreachable code" warnings
+    }
+#else  // !_TARGET_64BIT_
+    __declspec(property(get = GetOtherReg, put = SetOtherReg)) regNumber lvOtherReg;
+
+    regNumber GetOtherReg() const
+    {
+        return (regNumber)_lvOtherReg;
+    }
+
+    void SetOtherReg(regNumber reg)
+    {
+        _lvOtherReg = (regNumberSmall)reg;
+        assert(_lvOtherReg == reg);
+    }
+#endif // !_TARGET_64BIT_
+
+    /////////////////////
+
+    __declspec(property(get = GetArgReg, put = SetArgReg)) regNumber lvArgReg;
+
+    regNumber GetArgReg() const
+    {
+        return (regNumber)_lvArgReg;
+    }
+
+    void SetArgReg(regNumber reg)
+    {
+        _lvArgReg = (regNumberSmall)reg;
+        assert(_lvArgReg == reg);
+    }
+
+#if FEATURE_MULTIREG_ARGS
+    __declspec(property(get = GetOtherArgReg, put = SetOtherArgReg)) regNumber lvOtherArgReg;
+
+    regNumber GetOtherArgReg() const
+    {
+        return (regNumber)_lvOtherArgReg;
+    }
+
+    void SetOtherArgReg(regNumber reg)
+    {
+        _lvOtherArgReg = (regNumberSmall)reg;
+        assert(_lvOtherArgReg == reg);
+    }
+#endif // FEATURE_MULTIREG_ARGS
+
+#ifdef FEATURE_SIMD
+    // Is this is a SIMD struct?
+    bool lvIsSIMDType() const
+    {
+        return lvSIMDType;
+    }
+
+    // Is this is a SIMD struct which is used for SIMD intrinsic?
+    bool lvIsUsedInSIMDIntrinsic() const
+    {
+        return lvUsedInSIMDIntrinsic;
+    }
+#else
+    // If feature_simd not enabled, return false
+    bool lvIsSIMDType() const
+    {
+        return false;
+    }
+    bool lvIsUsedInSIMDIntrinsic() const
+    {
+        return false;
+    }
+#endif
+
+/////////////////////
+
+#ifndef LEGACY_BACKEND
+    __declspec(property(get = GetArgInitReg, put = SetArgInitReg)) regNumber lvArgInitReg;
+
+    regNumber GetArgInitReg() const
+    {
+        return (regNumber)_lvArgInitReg;
+    }
+
+    void SetArgInitReg(regNumber reg)
+    {
+        _lvArgInitReg = (regNumberSmall)reg;
+        assert(_lvArgInitReg == reg);
+    }
+
+    /////////////////////
+
+    __declspec(property(get = GetArgInitRegPair, put = SetArgInitRegPair)) regPairNo lvArgInitRegPair;
+
+    regPairNo GetArgInitRegPair() const
+    {
+        regPairNo regPair = (regPairNo)_lvArgInitRegPair;
+        assert(regPair >= REG_PAIR_FIRST && regPair <= REG_PAIR_LAST);
+        return regPair;
+    }
+
+    void SetArgInitRegPair(regPairNo regPair)
+    {
+        assert(regPair >= REG_PAIR_FIRST && regPair <= REG_PAIR_LAST);
+        _lvArgInitRegPair = (regPairNoSmall)regPair;
+        assert(_lvArgInitRegPair == regPair);
+    }
+
+    /////////////////////
+
+    bool lvIsRegCandidate() const
+    {
+        return lvLRACandidate != 0;
+    }
+
+    bool lvIsInReg() const
+    {
+        return lvIsRegCandidate() && (lvRegNum != REG_STK);
+    }
+
+#else // LEGACY_BACKEND
+
+    bool lvIsRegCandidate() const
+    {
+        return lvTracked != 0;
+    }
+
+    bool lvIsInReg() const
+    {
+        return lvRegister != 0;
+    }
+
+#endif // LEGACY_BACKEND
+
+    regMaskTP lvRegMask() const
+    {
+        regMaskTP regMask = RBM_NONE;
+        if (varTypeIsFloating(TypeGet()))
+        {
+            if (lvRegNum != REG_STK)
+            {
+                regMask = genRegMaskFloat(lvRegNum, TypeGet());
+            }
+        }
+        else
+        {
+            if (lvRegNum != REG_STK)
+            {
+                regMask = genRegMask(lvRegNum);
+            }
+
+            // For longs we may have two regs
+            if (isRegPairType(lvType) && lvOtherReg != REG_STK)
+            {
+                regMask |= genRegMask(lvOtherReg);
+            }
+        }
+        return regMask;
+    }
+
+    regMaskSmall lvPrefReg; // set of regs it prefers to live in
+
+    unsigned short lvVarIndex;  // variable tracking index
+    unsigned short lvRefCnt;    // unweighted (real) reference count
+    unsigned       lvRefCntWtd; // weighted reference count
+    int            lvStkOffs;   // stack offset of home
+    unsigned       lvExactSize; // (exact) size of the type in bytes
+
+    // Is this a promoted struct?
+    // This method returns true only for structs (including SIMD structs), not for
+    // locals that are split on a 32-bit target.
+    // It is only necessary to use this:
+    //   1) if only structs are wanted, and
+    //   2) if Lowering has already been done.
+    // Otherwise lvPromoted is valid.
+    bool lvPromotedStruct()
+    {
+#if !defined(_TARGET_64BIT_)
+        return (lvPromoted && !varTypeIsLong(lvType));
+#else  // defined(_TARGET_64BIT_)
+        return lvPromoted;
+#endif // defined(_TARGET_64BIT_)
+    }
+
+    unsigned lvSize() // Size needed for storage representation. Only used for structs or TYP_BLK.
+    {
+        // TODO-Review: Sometimes we get called on ARM with HFA struct variables that have been promoted,
+        // where the struct itself is no longer used because all access is via its member fields.
+        // When that happens, the struct is marked as unused and its type has been changed to
+        // TYP_INT (to keep the GC tracking code from looking at it).
+        // See Compiler::raAssignVars() for details. For example:
+        //      N002 (  4,  3) [00EA067C] -------------               return    struct $346
+        //      N001 (  3,  2) [00EA0628] -------------                  lclVar    struct(U) V03 loc2
+        //                                                                        float  V03.f1 (offs=0x00) -> V12 tmp7
+        //                                                                        f8 (last use) (last use) $345
+        // Here, the "struct(U)" shows that the "V03 loc2" variable is unused. Not shown is that V03
+        // is now TYP_INT in the local variable table. It's not really unused, because it's in the tree.
+
+        assert(varTypeIsStruct(lvType) || (lvType == TYP_BLK) || (lvPromoted && lvUnusedStruct));
+        return (unsigned)(roundUp(lvExactSize, TARGET_POINTER_SIZE));
+    }
+
+#if defined(DEBUGGING_SUPPORT) || defined(DEBUG)
+    unsigned lvSlotNum; // original slot # (if remapped)
+#endif
+
+    typeInfo lvVerTypeInfo; // type info needed for verification
+
+    BYTE* lvGcLayout; // GC layout info for structs
+
+#if FANCY_ARRAY_OPT
+    GenTreePtr lvKnownDim; // array size if known
+#endif
+
+#if ASSERTION_PROP
+    BlockSet   lvRefBlks;          // Set of blocks that contain refs
+    GenTreePtr lvDefStmt;          // Pointer to the statement with the single definition
+    void       lvaDisqualifyVar(); // Call to disqualify a local variable from use in optAddCopies
+#endif
+    var_types TypeGet() const
+    {
+        return (var_types)lvType;
+    }
+    bool lvStackAligned() const
+    {
+        assert(lvIsStructField);
+        return ((lvFldOffset % sizeof(void*)) == 0);
+    }
+    bool lvNormalizeOnLoad() const
+    {
+        return varTypeIsSmall(TypeGet()) &&
+               // lvIsStructField is treated the same as the aliased local, see fgDoNormalizeOnStore.
+               (lvIsParam || lvAddrExposed || lvIsStructField);
+    }
+
+    bool lvNormalizeOnStore()
+    {
+        return varTypeIsSmall(TypeGet()) &&
+               // lvIsStructField is treated the same as the aliased local, see fgDoNormalizeOnStore.
+               !(lvIsParam || lvAddrExposed || lvIsStructField);
+    }
+
+    void lvaResetSortAgainFlag(Compiler* pComp);
+    void decRefCnts(BasicBlock::weight_t weight, Compiler* pComp, bool propagate = true);
+    void incRefCnts(BasicBlock::weight_t weight, Compiler* pComp, bool propagate = true);
+    void setPrefReg(regNumber regNum, Compiler* pComp);
+    void addPrefReg(regMaskTP regMask, Compiler* pComp);
+    bool IsFloatRegType() const
+    {
+        return isFloatRegType(lvType) || lvIsHfaRegArg();
+    }
+    var_types GetHfaType() const
+    {
+        return lvIsHfa() ? (lvHfaTypeIsFloat() ? TYP_FLOAT : TYP_DOUBLE) : TYP_UNDEF;
+    }
+    void SetHfaType(var_types type)
+    {
+        assert(varTypeIsFloating(type));
+        lvSetHfaTypeIsFloat(type == TYP_FLOAT);
+    }
+
+#ifndef LEGACY_BACKEND
+    var_types lvaArgType();
+#endif
+
+    PerSsaArray lvPerSsaData;
+
+#ifdef DEBUG
+    // Keep track of the # of SsaNames, for a bounds check.
+    unsigned lvNumSsaNames;
+#endif
+
+    // Returns the address of the per-Ssa data for the given ssaNum (which is required
+    // not to be the SsaConfig::RESERVED_SSA_NUM, which indicates that the variable is
+    // not an SSA variable).
+    LclSsaVarDsc* GetPerSsaData(unsigned ssaNum)
+    {
+        assert(ssaNum != SsaConfig::RESERVED_SSA_NUM);
+        assert(SsaConfig::RESERVED_SSA_NUM == 0);
+        unsigned zeroBased = ssaNum - SsaConfig::UNINIT_SSA_NUM;
+        assert(zeroBased < lvNumSsaNames);
+        return &lvPerSsaData.GetRef(zeroBased);
+    }
+
+#ifdef DEBUG
+public:
+    void PrintVarReg() const
+    {
+        if (isRegPairType(TypeGet()))
+        {
+            printf("%s:%s", getRegName(lvOtherReg), // hi32
+                   getRegName(lvRegNum));           // lo32
+        }
+        else
+        {
+            printf("%s", getRegName(lvRegNum));
+        }
+    }
+#endif // DEBUG
+
+}; // class LclVarDsc
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                           TempsInfo                                       XX
+XX                                                                           XX
+XX  The temporary lclVars allocated by the compiler for code generation      XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************
+ *
+ *  The following keeps track of temporaries allocated in the stack frame
+ *  during code-generation (after register allocation). These spill-temps are
+ *  only used if we run out of registers while evaluating a tree.
+ *
+ *  These are different from the more common temps allocated by lvaGrabTemp().
+ */
+
+class TempDsc
+{
+public:
+    TempDsc* tdNext;
+
+private:
+    int tdOffs;
+#ifdef DEBUG
+    static const int BAD_TEMP_OFFSET = 0xDDDDDDDD; // used as a sentinel "bad value" for tdOffs in DEBUG
+#endif                                             // DEBUG
+
+    int       tdNum;
+    BYTE      tdSize;
+    var_types tdType;
+
+public:
+    TempDsc(int _tdNum, unsigned _tdSize, var_types _tdType) : tdNum(_tdNum), tdSize((BYTE)_tdSize), tdType(_tdType)
+    {
+#ifdef DEBUG
+        assert(tdNum <
+               0); // temps must have a negative number (so they have a different number from all local variables)
+        tdOffs = BAD_TEMP_OFFSET;
+#endif // DEBUG
+        if (tdNum != _tdNum)
+        {
+            IMPL_LIMITATION("too many spill temps");
+        }
+    }
+
+#ifdef DEBUG
+    bool tdLegalOffset() const
+    {
+        return tdOffs != BAD_TEMP_OFFSET;
+    }
+#endif // DEBUG
+
+    int tdTempOffs() const
+    {
+        assert(tdLegalOffset());
+        return tdOffs;
+    }
+    void tdSetTempOffs(int offs)
+    {
+        tdOffs = offs;
+        assert(tdLegalOffset());
+    }
+    void tdAdjustTempOffs(int offs)
+    {
+        tdOffs += offs;
+        assert(tdLegalOffset());
+    }
+
+    int tdTempNum() const
+    {
+        assert(tdNum < 0);
+        return tdNum;
+    }
+    unsigned tdTempSize() const
+    {
+        return tdSize;
+    }
+    var_types tdTempType() const
+    {
+        return tdType;
+    }
+};
+
+// interface to hide linearscan implementation from rest of compiler
+class LinearScanInterface
+{
+public:
+    virtual void doLinearScan()                                = 0;
+    virtual void recordVarLocationsAtStartOfBB(BasicBlock* bb) = 0;
+};
+
+LinearScanInterface* getLinearScanAllocator(Compiler* comp);
+
+// Information about arrays: their element type and size, and the offset of the first element.
+// We label GT_IND's that are array indices with GTF_IND_ARR_INDEX, and, for such nodes,
+// associate an array info via the map retrieved by GetArrayInfoMap().  This information is used,
+// for example, in value numbering of array index expressions.
+struct ArrayInfo
+{
+    var_types            m_elemType;
+    CORINFO_CLASS_HANDLE m_elemStructType;
+    unsigned             m_elemSize;
+    unsigned             m_elemOffset;
+
+    ArrayInfo() : m_elemType(TYP_UNDEF), m_elemStructType(nullptr), m_elemSize(0), m_elemOffset(0)
+    {
+    }
+
+    ArrayInfo(var_types elemType, unsigned elemSize, unsigned elemOffset, CORINFO_CLASS_HANDLE elemStructType)
+        : m_elemType(elemType), m_elemStructType(elemStructType), m_elemSize(elemSize), m_elemOffset(elemOffset)
+    {
+    }
+};
+
+// This enumeration names the phases into which we divide compilation.  The phases should completely
+// partition a compilation.
+enum Phases
+{
+#define CompPhaseNameMacro(enum_nm, string_nm, short_nm, hasChildren, parent) enum_nm,
+#include "compphases.h"
+    PHASE_NUMBER_OF
+};
+
+extern const char*   PhaseNames[];
+extern const char*   PhaseEnums[];
+extern const LPCWSTR PhaseShortNames[];
+
+//---------------------------------------------------------------
+// Compilation time.
+//
+
+// A "CompTimeInfo" is a structure for tracking the compilation time of one or more methods.
+// We divide a compilation into a sequence of contiguous phases, and track the total (per-thread) cycles
+// of the compilation, as well as the cycles for each phase.  We also track the number of bytecodes.
+// If there is a failure in reading a timer at any point, the "CompTimeInfo" becomes invalid, as indicated
+// by "m_timerFailure" being true.
+// If FEATURE_JIT_METHOD_PERF is not set, we define a minimal form of this, enough to let other code compile.
+struct CompTimeInfo
+{
+#ifdef FEATURE_JIT_METHOD_PERF
+    // The string names of the phases.
+    static const char* PhaseNames[];
+
+    static bool PhaseHasChildren[];
+    static int  PhaseParent[];
+
+    unsigned         m_byteCodeBytes;
+    unsigned __int64 m_totalCycles;
+    unsigned __int64 m_invokesByPhase[PHASE_NUMBER_OF];
+    unsigned __int64 m_cyclesByPhase[PHASE_NUMBER_OF];
+    // For better documentation, we call EndPhase on
+    // non-leaf phases.  We should also call EndPhase on the
+    // last leaf subphase; obviously, the elapsed cycles between the EndPhase
+    // for the last leaf subphase and the EndPhase for an ancestor should be very small.
+    // We add all such "redundant end phase" intervals to this variable below; we print
+    // it out in a report, so we can verify that it is, indeed, very small.  If it ever
+    // isn't, this means that we're doing something significant between the end of the last
+    // declared subphase and the end of its parent.
+    unsigned __int64 m_parentPhaseEndSlop;
+    bool             m_timerFailure;
+
+    CompTimeInfo(unsigned byteCodeBytes);
+#endif
+};
+
+#ifdef FEATURE_JIT_METHOD_PERF
+
+// This class summarizes the JIT time information over the course of a run: the number of methods compiled,
+// and the total and maximum timings.  (These are instances of the "CompTimeInfo" type described above).
+// The operation of adding a single method's timing to the summary may be performed concurrently by several
+// threads, so it is protected by a lock.
+// This class is intended to be used as a singleton type, with only a single instance.
+class CompTimeSummaryInfo
+{
+    // This lock protects the fields of all CompTimeSummaryInfo(s) (of which we expect there to be one).
+    static CritSecObject s_compTimeSummaryLock;
+
+    int          m_numMethods;
+    CompTimeInfo m_total;
+    CompTimeInfo m_maximum;
+
+    int          m_numFilteredMethods;
+    CompTimeInfo m_filtered;
+
+    // This method computes the number of cycles/sec for the current machine.  The cycles are those counted
+    // by GetThreadCycleTime; we assume that these are of equal duration, though that is not necessarily true.
+    // If any OS interaction fails, returns 0.0.
+    double CyclesPerSecond();
+
+    // This can use what ever data you want to determine if the value to be added
+    // belongs in the filtered section (it's always included in the unfiltered section)
+    bool IncludedInFilteredData(CompTimeInfo& info);
+
+public:
+    // This is the unique CompTimeSummaryInfo object for this instance of the runtime.
+    static CompTimeSummaryInfo s_compTimeSummary;
+
+    CompTimeSummaryInfo() : m_numMethods(0), m_total(0), m_maximum(0), m_numFilteredMethods(0), m_filtered(0)
+    {
+    }
+
+    // Assumes that "info" is a completed CompTimeInfo for a compilation; adds it to the summary.
+    // This is thread safe.
+    void AddInfo(CompTimeInfo& info);
+
+    // Print the summary information to "f".
+    // This is not thread-safe; assumed to be called by only one thread.
+    void Print(FILE* f);
+};
+
+// A JitTimer encapsulates a CompTimeInfo for a single compilation. It also tracks the start of compilation,
+// and when the current phase started.  This is intended to be part of a Compilation object.  This is
+// disabled (FEATURE_JIT_METHOD_PERF not defined) when FEATURE_CORECLR is set, or on non-windows platforms.
+//
+class JitTimer
+{
+    unsigned __int64 m_start;         // Start of the compilation.
+    unsigned __int64 m_curPhaseStart; // Start of the current phase.
+#ifdef DEBUG
+    Phases m_lastPhase; // The last phase that was completed (or (Phases)-1 to start).
+#endif
+    CompTimeInfo m_info; // The CompTimeInfo for this compilation.
+
+    static CritSecObject s_csvLock; // Lock to protect the time log file.
+    void PrintCsvMethodStats(Compiler* comp);
+
+private:
+    void* operator new(size_t);
+    void* operator new[](size_t);
+    void operator delete(void*);
+    void operator delete[](void*);
+
+public:
+    // Initialized the timer instance
+    JitTimer(unsigned byteCodeSize);
+
+    static JitTimer* Create(Compiler* comp, unsigned byteCodeSize)
+    {
+        return ::new (comp, CMK_Unknown) JitTimer(byteCodeSize);
+    }
+
+    static void PrintCsvHeader();
+
+    // Ends the current phase (argument is for a redundant check).
+    void EndPhase(Phases phase);
+
+    // Completes the timing of the current method, which is assumed to have "byteCodeBytes" bytes of bytecode,
+    // and adds it to "sum".
+    void Terminate(Compiler* comp, CompTimeSummaryInfo& sum);
+
+    // Attempts to query the cycle counter of the current thread.  If successful, returns "true" and sets
+    // *cycles to the cycle counter value.  Otherwise, returns false and sets the "m_timerFailure" flag of
+    // "m_info" to true.
+    bool GetThreadCycles(unsigned __int64* cycles)
+    {
+        bool res = CycleTimer::GetThreadCyclesS(cycles);
+        if (!res)
+        {
+            m_info.m_timerFailure = true;
+        }
+        return res;
+    }
+};
+#endif // FEATURE_JIT_METHOD_PERF
+
+//------------------- Function/Funclet info -------------------------------
+DECLARE_TYPED_ENUM(FuncKind, BYTE)
+{
+    FUNC_ROOT,        // The main/root function (always id==0)
+        FUNC_HANDLER, // a funclet associated with an EH handler (finally, fault, catch, filter handler)
+        FUNC_FILTER,  // a funclet associated with an EH filter
+        FUNC_COUNT
+}
+END_DECLARE_TYPED_ENUM(FuncKind, BYTE)
+
+class emitLocation;
+
+struct FuncInfoDsc
+{
+    FuncKind       funKind;
+    BYTE           funFlags;   // Currently unused, just here for padding
+    unsigned short funEHIndex; // index, into the ebd table, of innermost EH clause corresponding to this
+                               // funclet. It is only valid if funKind field indicates this is a
+                               // EH-related funclet: FUNC_HANDLER or FUNC_FILTER
+
+#if defined(_TARGET_AMD64_)
+
+    // TODO-AMD64-Throughput: make the AMD64 info more like the ARM info to avoid having this large static array.
+    emitLocation* startLoc;
+    emitLocation* endLoc;
+    emitLocation* coldStartLoc; // locations for the cold section, if there is one.
+    emitLocation* coldEndLoc;
+    UNWIND_INFO   unwindHeader;
+    // Maximum of 255 UNWIND_CODE 'nodes' and then the unwind header. If there are an odd
+    // number of codes, the VM or Zapper will 4-byte align the whole thing.
+    BYTE     unwindCodes[offsetof(UNWIND_INFO, UnwindCode) + (0xFF * sizeof(UNWIND_CODE))];
+    unsigned unwindCodeSlot;
+
+#ifdef UNIX_AMD64_ABI
+    jitstd::vector<CFI_CODE>* cfiCodes;
+#endif // UNIX_AMD64_ABI
+
+#elif defined(_TARGET_ARMARCH_)
+
+    UnwindInfo  uwi;     // Unwind information for this function/funclet's hot  section
+    UnwindInfo* uwiCold; // Unwind information for this function/funclet's cold section
+                         //   Note: we only have a pointer here instead of the actual object,
+                         //   to save memory in the JIT case (compared to the NGEN case),
+                         //   where we don't have any cold section.
+                         //   Note 2: we currently don't support hot/cold splitting in functions
+                         //   with EH, so uwiCold will be NULL for all funclets.
+
+#endif // _TARGET_ARMARCH_
+
+    // Eventually we may want to move rsModifiedRegsMask, lvaOutgoingArgSize, and anything else
+    // that isn't shared between the main function body and funclets.
+};
+
+struct fgArgTabEntry
+{
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+    fgArgTabEntry()
+    {
+        otherRegNum = REG_NA;
+        isStruct    = false; // is this a struct arg
+    }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+    GenTreePtr node; // Initially points at the Op1 field of 'parent', but if the argument is replaced with an GT_ASG or
+                     // placeholder
+                     //  it will point at the actual argument in the gtCallLateArgs list.
+    GenTreePtr parent; // Points at the GT_LIST node in the gtCallArgs for this argument
+
+    unsigned argNum; // The original argument number, also specifies the required argument evaluation order from the IL
+
+    regNumber regNum; // The (first) register to use when passing this argument, set to REG_STK for arguments passed on
+                      // the stack
+    unsigned numRegs; // Count of number of registers that this argument uses
+
+    // A slot is a pointer sized region in the OutArg area.
+    unsigned slotNum;  // When an argument is passed in the OutArg area this is the slot number in the OutArg area
+    unsigned numSlots; // Count of number of slots that this argument uses
+
+    unsigned alignment;  // 1 or 2 (slots/registers)
+    unsigned lateArgInx; // index into gtCallLateArgs list
+    unsigned tmpNum;     // the LclVar number if we had to force evaluation of this arg
+
+    bool isSplit : 1;       // True when this argument is split between the registers and OutArg area
+    bool needTmp : 1;       // True when we force this argument's evaluation into a temp LclVar
+    bool needPlace : 1;     // True when we must replace this argument with a placeholder node
+    bool isTmp : 1;         // True when we setup a temp LclVar for this argument due to size issues with the struct
+    bool processed : 1;     // True when we have decided the evaluation order for this argument in the gtCallLateArgs
+    bool isHfaRegArg : 1;   // True when the argument is passed as a HFA in FP registers.
+    bool isBackFilled : 1;  // True when the argument fills a register slot skipped due to alignment requirements of
+                            // previous arguments.
+    bool isNonStandard : 1; // True if it is an arg that is passed in a reg other than a standard arg reg, or is forced
+                            // to be on the stack despite its arg list position.
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+    bool isStruct : 1; // True if this is a struct arg
+
+    regNumber otherRegNum; // The (second) register to use when passing this argument.
+
+    SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+#ifdef _TARGET_ARM_
+    void SetIsHfaRegArg(bool hfaRegArg)
+    {
+        isHfaRegArg = hfaRegArg;
+    }
+
+    void SetIsBackFilled(bool backFilled)
+    {
+        isBackFilled = backFilled;
+    }
+
+    bool IsBackFilled() const
+    {
+        return isBackFilled;
+    }
+#else  // !_TARGET_ARM_
+    // To make the callers easier, we allow these calls (and the isHfaRegArg and isBackFilled data members) for all
+    // platforms.
+    void SetIsHfaRegArg(bool hfaRegArg)
+    {
+    }
+
+    void SetIsBackFilled(bool backFilled)
+    {
+    }
+
+    bool IsBackFilled() const
+    {
+        return false;
+    }
+#endif // !_TARGET_ARM_
+
+#ifdef DEBUG
+    void Dump();
+#endif
+};
+typedef struct fgArgTabEntry* fgArgTabEntryPtr;
+
+//-------------------------------------------------------------------------
+//
+//  The class fgArgInfo is used to handle the arguments
+//  when morphing a GT_CALL node.
+//
+
+class fgArgInfo
+{
+    Compiler*  compiler;    // Back pointer to the compiler instance so that we can allocate memory
+    GenTreePtr callTree;    // Back pointer to the GT_CALL node for this fgArgInfo
+    unsigned   argCount;    // Updatable arg count value
+    unsigned   nextSlotNum; // Updatable slot count value
+    unsigned   stkLevel;    // Stack depth when we make this call (for x86)
+
+    unsigned          argTableSize; // size of argTable array (equal to the argCount when done with fgMorphArgs)
+    bool              hasRegArgs;   // true if we have one or more register arguments
+    bool              hasStackArgs; // true if we have one or more stack arguments
+    bool              argsComplete; // marker for state
+    bool              argsSorted;   // marker for state
+    fgArgTabEntryPtr* argTable;     // variable sized array of per argument descrption: (i.e. argTable[argTableSize])
+
+private:
+    void AddArg(fgArgTabEntryPtr curArgTabEntry);
+
+public:
+    fgArgInfo(Compiler* comp, GenTreePtr call, unsigned argCount);
+    fgArgInfo(GenTreePtr newCall, GenTreePtr oldCall);
+
+    fgArgTabEntryPtr AddRegArg(
+        unsigned argNum, GenTreePtr node, GenTreePtr parent, regNumber regNum, unsigned numRegs, unsigned alignment);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+    fgArgTabEntryPtr AddRegArg(
+        unsigned                                                         argNum,
+        GenTreePtr                                                       node,
+        GenTreePtr                                                       parent,
+        regNumber                                                        regNum,
+        unsigned                                                         numRegs,
+        unsigned                                                         alignment,
+        const bool                                                       isStruct,
+        const regNumber                                                  otherRegNum   = REG_NA,
+        const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr = nullptr);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+    fgArgTabEntryPtr AddStkArg(unsigned   argNum,
+                               GenTreePtr node,
+                               GenTreePtr parent,
+                               unsigned   numSlots,
+                               unsigned alignment FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool isStruct));
+
+    void             RemorphReset();
+    fgArgTabEntryPtr RemorphRegArg(
+        unsigned argNum, GenTreePtr node, GenTreePtr parent, regNumber regNum, unsigned numRegs, unsigned alignment);
+
+    void RemorphStkArg(unsigned argNum, GenTreePtr node, GenTreePtr parent, unsigned numSlots, unsigned alignment);
+
+    void SplitArg(unsigned argNum, unsigned numRegs, unsigned numSlots);
+
+    void EvalToTmp(unsigned argNum, unsigned tmpNum, GenTreePtr newNode);
+
+    void ArgsComplete();
+
+    void SortArgs();
+
+    void EvalArgsToTemps();
+
+    void RecordStkLevel(unsigned stkLvl);
+    unsigned RetrieveStkLevel();
+
+    unsigned ArgCount()
+    {
+        return argCount;
+    }
+    fgArgTabEntryPtr* ArgTable()
+    {
+        return argTable;
+    }
+    unsigned GetNextSlotNum()
+    {
+        return nextSlotNum;
+    }
+    bool HasRegArgs()
+    {
+        return hasRegArgs;
+    }
+    bool HasStackArgs()
+    {
+        return hasStackArgs;
+    }
+};
+
+#ifdef DEBUG
+// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+// We have the ability to mark source expressions with "Test Labels."
+// These drive assertions within the JIT, or internal JIT testing.  For example, we could label expressions
+// that should be CSE defs, and other expressions that should uses of those defs, with a shared label.
+
+enum TestLabel // This must be kept identical to System.Runtime.CompilerServices.JitTestLabel.TestLabel.
+{
+    TL_SsaName,
+    TL_VN,        // Defines a "VN equivalence class".  (For full VN, including exceptions thrown).
+    TL_VNNorm,    // Like above, but uses the non-exceptional value of the expression.
+    TL_CSE_Def,   //  This must be identified in the JIT as a CSE def
+    TL_CSE_Use,   //  This must be identified in the JIT as a CSE use
+    TL_LoopHoist, // Expression must (or must not) be hoisted out of the loop.
+};
+
+struct TestLabelAndNum
+{
+    TestLabel m_tl;
+    ssize_t   m_num;
+
+    TestLabelAndNum() : m_tl(TestLabel(0)), m_num(0)
+    {
+    }
+};
+
+typedef SimplerHashTable<GenTreePtr, PtrKeyFuncs<GenTree>, TestLabelAndNum, JitSimplerHashBehavior> NodeToTestDataMap;
+
+// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+#endif // DEBUG
+
+// This class implements the "IAllocator" interface, so that we can use
+// utilcode collection classes in the JIT, and have them use the JIT's allocator.
+
+class CompAllocator : public IAllocator
+{
+    Compiler* m_comp;
+#if MEASURE_MEM_ALLOC
+    CompMemKind m_cmk;
+#endif
+public:
+    CompAllocator(Compiler* comp, CompMemKind cmk)
+        : m_comp(comp)
+#if MEASURE_MEM_ALLOC
+        , m_cmk(cmk)
+#endif
+    {
+    }
+
+    inline void* Alloc(size_t sz);
+
+    inline void* ArrayAlloc(size_t elems, size_t elemSize);
+
+    // For the compiler's no-release allocator, free operations are no-ops.
+    void Free(void* p)
+    {
+    }
+};
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX   The big guy. The sections are currently organized as :                  XX
+XX                                                                           XX
+XX    o  GenTree and BasicBlock                                              XX
+XX    o  LclVarsInfo                                                         XX
+XX    o  Importer                                                            XX
+XX    o  FlowGraph                                                           XX
+XX    o  Optimizer                                                           XX
+XX    o  RegAlloc                                                            XX
+XX    o  EEInterface                                                         XX
+XX    o  TempsInfo                                                           XX
+XX    o  RegSet                                                              XX
+XX    o  GCInfo                                                              XX
+XX    o  Instruction                                                         XX
+XX    o  ScopeInfo                                                           XX
+XX    o  PrologScopeInfo                                                     XX
+XX    o  CodeGenerator                                                       XX
+XX    o  UnwindInfo                                                          XX
+XX    o  Compiler                                                            XX
+XX    o  typeInfo                                                            XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+class Compiler
+{
+    friend class emitter;
+    friend class UnwindInfo;
+    friend class UnwindFragmentInfo;
+    friend class UnwindEpilogInfo;
+    friend class JitTimer;
+    friend class LinearScan;
+    friend class fgArgInfo;
+    friend class Rationalizer;
+    friend class Phase;
+    friend class Lowering;
+    friend class CSE_DataFlow;
+    friend class CSE_Heuristic;
+    friend class CodeGenInterface;
+    friend class CodeGen;
+    friend class LclVarDsc;
+    friend class TempDsc;
+    friend class LIR;
+    friend class ObjectAllocator;
+
+#ifndef _TARGET_64BIT_
+    friend class DecomposeLongs;
+#endif // !_TARGET_64BIT_
+
+    /*
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XX                                                                           XX
+    XX  Misc structs definitions                                                 XX
+    XX                                                                           XX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    */
+
+public:
+    hashBvGlobalData hbvGlobalData; // Used by the hashBv bitvector package.
+
+#ifdef DEBUG
+    bool    verbose;
+    bool    dumpIR;
+    bool    dumpIRNodes;
+    bool    dumpIRTypes;
+    bool    dumpIRKinds;
+    bool    dumpIRLocals;
+    bool    dumpIRRegs;
+    bool    dumpIRSsa;
+    bool    dumpIRValnums;
+    bool    dumpIRCosts;
+    bool    dumpIRFlags;
+    bool    dumpIRNoLists;
+    bool    dumpIRNoLeafs;
+    bool    dumpIRNoStmts;
+    bool    dumpIRTrees;
+    bool    dumpIRLinear;
+    bool    dumpIRDataflow;
+    bool    dumpIRBlockHeaders;
+    bool    dumpIRExit;
+    LPCWSTR dumpIRPhase;
+    LPCWSTR dumpIRFormat;
+    bool    verboseTrees;
+    bool    shouldUseVerboseTrees();
+    bool    asciiTrees; // If true, dump trees using only ASCII characters
+    bool    shouldDumpASCIITrees();
+    bool    verboseSsa; // If true, produce especially verbose dump output in SSA construction.
+    bool    shouldUseVerboseSsa();
+    bool    treesBeforeAfterMorph; // If true, print trees before/after morphing (paired by an intra-compilation id:
+    int     morphNum; // This counts the the trees that have been morphed, allowing us to label each uniquely.
+
+    const char* VarNameToStr(VarName name)
+    {
+        return name;
+    }
+
+    DWORD expensiveDebugCheckLevel;
+#endif
+
+#if FEATURE_MULTIREG_RET
+    GenTreePtr impAssignMultiRegTypeToVar(GenTreePtr op, CORINFO_CLASS_HANDLE hClass);
+#endif // FEATURE_MULTIREG_RET
+
+#ifdef ARM_SOFTFP
+    bool isSingleFloat32Struct(CORINFO_CLASS_HANDLE hClass);
+#endif // ARM_SOFTFP
+
+    //-------------------------------------------------------------------------
+    // Functions to handle homogeneous floating-point aggregates (HFAs) in ARM.
+    // HFAs are one to four element structs where each element is the same
+    // type, either all float or all double. They are treated specially
+    // in the ARM Procedure Call Standard, specifically, they are passed in
+    // floating-point registers instead of the general purpose registers.
+    //
+
+    bool IsHfa(CORINFO_CLASS_HANDLE hClass);
+    bool IsHfa(GenTreePtr tree);
+
+    var_types GetHfaType(GenTreePtr tree);
+    unsigned GetHfaCount(GenTreePtr tree);
+
+    var_types GetHfaType(CORINFO_CLASS_HANDLE hClass);
+    unsigned GetHfaCount(CORINFO_CLASS_HANDLE hClass);
+
+    bool IsMultiRegPassedType(CORINFO_CLASS_HANDLE hClass);
+    bool IsMultiRegReturnedType(CORINFO_CLASS_HANDLE hClass);
+
+    //-------------------------------------------------------------------------
+    // The following is used for validating format of EH table
+    //
+
+    struct EHNodeDsc;
+    typedef struct EHNodeDsc* pEHNodeDsc;
+
+    EHNodeDsc* ehnTree; // root of the tree comprising the EHnodes.
+    EHNodeDsc* ehnNext; // root of the tree comprising the EHnodes.
+
+    struct EHNodeDsc
+    {
+        enum EHBlockType
+        {
+            TryNode,
+            FilterNode,
+            HandlerNode,
+            FinallyNode,
+            FaultNode
+        };
+
+        EHBlockType ehnBlockType;   // kind of EH block
+        IL_OFFSET   ehnStartOffset; // IL offset of start of the EH block
+        IL_OFFSET ehnEndOffset; // IL offset past end of the EH block. (TODO: looks like verInsertEhNode() sets this to
+                                // the last IL offset, not "one past the last one", i.e., the range Start to End is
+                                // inclusive).
+        pEHNodeDsc ehnNext;     // next (non-nested) block in sequential order
+        pEHNodeDsc ehnChild;    // leftmost nested block
+        union {
+            pEHNodeDsc ehnTryNode;     // for filters and handlers, the corresponding try node
+            pEHNodeDsc ehnHandlerNode; // for a try node, the corresponding handler node
+        };
+        pEHNodeDsc ehnFilterNode; // if this is a try node and has a filter, otherwise 0
+        pEHNodeDsc ehnEquivalent; // if blockType=tryNode, start offset and end offset is same,
+
+        inline void ehnSetTryNodeType()
+        {
+            ehnBlockType = TryNode;
+        }
+        inline void ehnSetFilterNodeType()
+        {
+            ehnBlockType = FilterNode;
+        }
+        inline void ehnSetHandlerNodeType()
+        {
+            ehnBlockType = HandlerNode;
+        }
+        inline void ehnSetFinallyNodeType()
+        {
+            ehnBlockType = FinallyNode;
+        }
+        inline void ehnSetFaultNodeType()
+        {
+            ehnBlockType = FaultNode;
+        }
+
+        inline BOOL ehnIsTryBlock()
+        {
+            return ehnBlockType == TryNode;
+        }
+        inline BOOL ehnIsFilterBlock()
+        {
+            return ehnBlockType == FilterNode;
+        }
+        inline BOOL ehnIsHandlerBlock()
+        {
+            return ehnBlockType == HandlerNode;
+        }
+        inline BOOL ehnIsFinallyBlock()
+        {
+            return ehnBlockType == FinallyNode;
+        }
+        inline BOOL ehnIsFaultBlock()
+        {
+            return ehnBlockType == FaultNode;
+        }
+
+        // returns true if there is any overlap between the two nodes
+        static inline BOOL ehnIsOverlap(pEHNodeDsc node1, pEHNodeDsc node2)
+        {
+            if (node1->ehnStartOffset < node2->ehnStartOffset)
+            {
+                return (node1->ehnEndOffset >= node2->ehnStartOffset);
+            }
+            else
+            {
+                return (node1->ehnStartOffset <= node2->ehnEndOffset);
+            }
+        }
+
+        // fails with BADCODE if inner is not completely nested inside outer
+        static inline BOOL ehnIsNested(pEHNodeDsc inner, pEHNodeDsc outer)
+        {
+            return ((inner->ehnStartOffset >= outer->ehnStartOffset) && (inner->ehnEndOffset <= outer->ehnEndOffset));
+        }
+    };
+
+//-------------------------------------------------------------------------
+// Exception handling functions
+//
+
+#if !FEATURE_EH_FUNCLETS
+
+    bool ehNeedsShadowSPslots()
+    {
+        return (info.compXcptnsCount || opts.compDbgEnC);
+    }
+
+    // 0 for methods with no EH
+    // 1 for methods with non-nested EH, or where only the try blocks are nested
+    // 2 for a method with a catch within a catch
+    // etc.
+    unsigned ehMaxHndNestingCount;
+
+#endif // !FEATURE_EH_FUNCLETS
+
+    static bool jitIsBetween(unsigned value, unsigned start, unsigned end);
+    static bool jitIsBetweenInclusive(unsigned value, unsigned start, unsigned end);
+
+    bool bbInCatchHandlerILRange(BasicBlock* blk);
+    bool bbInFilterILRange(BasicBlock* blk);
+    bool bbInTryRegions(unsigned regionIndex, BasicBlock* blk);
+    bool bbInExnFlowRegions(unsigned regionIndex, BasicBlock* blk);
+    bool bbInHandlerRegions(unsigned regionIndex, BasicBlock* blk);
+    bool bbInCatchHandlerRegions(BasicBlock* tryBlk, BasicBlock* hndBlk);
+    unsigned short bbFindInnermostCommonTryRegion(BasicBlock* bbOne, BasicBlock* bbTwo);
+
+    unsigned short bbFindInnermostTryRegionContainingHandlerRegion(unsigned handlerIndex);
+    unsigned short bbFindInnermostHandlerRegionContainingTryRegion(unsigned tryIndex);
+
+    // Returns true if "block" is the start of a try region.
+    bool bbIsTryBeg(BasicBlock* block);
+
+    // Returns true if "block" is the start of a handler or filter region.
+    bool bbIsHandlerBeg(BasicBlock* block);
+
+    // Returns true iff "block" is where control flows if an exception is raised in the
+    // try region, and sets "*regionIndex" to the index of the try for the handler.
+    // Differs from "IsHandlerBeg" in the case of filters, where this is true for the first
+    // block of the filter, but not for the filter's handler.
+    bool bbIsExFlowBlock(BasicBlock* block, unsigned* regionIndex);
+
+    bool ehHasCallableHandlers();
+
+    // Return the EH descriptor for the given region index.
+    EHblkDsc* ehGetDsc(unsigned regionIndex);
+
+    // Return the EH index given a region descriptor.
+    unsigned ehGetIndex(EHblkDsc* ehDsc);
+
+    // Return the EH descriptor index of the enclosing try, for the given region index.
+    unsigned ehGetEnclosingTryIndex(unsigned regionIndex);
+
+    // Return the EH descriptor index of the enclosing handler, for the given region index.
+    unsigned ehGetEnclosingHndIndex(unsigned regionIndex);
+
+    // Return the EH descriptor for the most nested 'try' region this BasicBlock is a member of (or nullptr if this
+    // block is not in a 'try' region).
+    EHblkDsc* ehGetBlockTryDsc(BasicBlock* block);
+
+    // Return the EH descriptor for the most nested filter or handler region this BasicBlock is a member of (or nullptr
+    // if this block is not in a filter or handler region).
+    EHblkDsc* ehGetBlockHndDsc(BasicBlock* block);
+
+    // Return the EH descriptor for the most nested region that may handle exceptions raised in this BasicBlock (or
+    // nullptr if this block's exceptions propagate to caller).
+    EHblkDsc* ehGetBlockExnFlowDsc(BasicBlock* block);
+
+    EHblkDsc* ehIsBlockTryLast(BasicBlock* block);
+    EHblkDsc* ehIsBlockHndLast(BasicBlock* block);
+    bool ehIsBlockEHLast(BasicBlock* block);
+
+    bool ehBlockHasExnFlowDsc(BasicBlock* block);
+
+    // Return the region index of the most nested EH region this block is in.
+    unsigned ehGetMostNestedRegionIndex(BasicBlock* block, bool* inTryRegion);
+
+    // Find the true enclosing try index, ignoring 'mutual protect' try. Uses IL ranges to check.
+    unsigned ehTrueEnclosingTryIndexIL(unsigned regionIndex);
+
+    // Return the index of the most nested enclosing region for a particular EH region. Returns NO_ENCLOSING_INDEX
+    // if there is no enclosing region. If the returned index is not NO_ENCLOSING_INDEX, then '*inTryRegion'
+    // is set to 'true' if the enclosing region is a 'try', or 'false' if the enclosing region is a handler.
+    // (It can never be a filter.)
+    unsigned ehGetEnclosingRegionIndex(unsigned regionIndex, bool* inTryRegion);
+
+    // A block has been deleted. Update the EH table appropriately.
+    void ehUpdateForDeletedBlock(BasicBlock* block);
+
+    // Determine whether a block can be deleted while preserving the EH normalization rules.
+    bool ehCanDeleteEmptyBlock(BasicBlock* block);
+
+    // Update the 'last' pointers in the EH table to reflect new or deleted blocks in an EH region.
+    void ehUpdateLastBlocks(BasicBlock* oldLast, BasicBlock* newLast);
+
+    // For a finally handler, find the region index that the BBJ_CALLFINALLY lives in that calls the handler,
+    // or NO_ENCLOSING_INDEX if the BBJ_CALLFINALLY lives in the main function body. Normally, the index
+    // is the same index as the handler (and the BBJ_CALLFINALLY lives in the 'try' region), but for AMD64 the
+    // BBJ_CALLFINALLY lives in the enclosing try or handler region, whichever is more nested, or the main function
+    // body. If the returned index is not NO_ENCLOSING_INDEX, then '*inTryRegion' is set to 'true' if the
+    // BBJ_CALLFINALLY lives in the returned index's 'try' region, or 'false' if lives in the handler region. (It never
+    // lives in a filter.)
+    unsigned ehGetCallFinallyRegionIndex(unsigned finallyIndex, bool* inTryRegion);
+
+    // Find the range of basic blocks in which all BBJ_CALLFINALLY will be found that target the 'finallyIndex' region's
+    // handler. Set begBlk to the first block, and endBlk to the block after the last block of the range
+    // (nullptr if the last block is the last block in the program).
+    // Precondition: 'finallyIndex' is the EH region of a try/finally clause.
+    void ehGetCallFinallyBlockRange(unsigned finallyIndex, BasicBlock** begBlk, BasicBlock** endBlk);
+
+#ifdef DEBUG
+    // Given a BBJ_CALLFINALLY block and the EH region index of the finally it is calling, return
+    // 'true' if the BBJ_CALLFINALLY is in the correct EH region.
+    bool ehCallFinallyInCorrectRegion(BasicBlock* blockCallFinally, unsigned finallyIndex);
+#endif // DEBUG
+
+#if FEATURE_EH_FUNCLETS
+    // Do we need a PSPSym in the main function? For codegen purposes, we only need one
+    // if there is a filter that protects a region with a nested EH clause (such as a
+    // try/catch nested in the 'try' body of a try/filter/filter-handler). See
+    // genFuncletProlog() for more details. However, the VM seems to use it for more
+    // purposes, maybe including debugging. Until we are sure otherwise, always create
+    // a PSPSym for functions with any EH.
+    bool ehNeedsPSPSym() const
+    {
+        return compHndBBtabCount > 0;
+    }
+
+    bool     ehAnyFunclets();  // Are there any funclets in this function?
+    unsigned ehFuncletCount(); // Return the count of funclets in the function
+
+    unsigned bbThrowIndex(BasicBlock* blk); // Get the index to use as the cache key for sharing throw blocks
+#else                                       // !FEATURE_EH_FUNCLETS
+    bool ehAnyFunclets()
+    {
+        return false;
+    }
+    unsigned ehFuncletCount()
+    {
+        return 0;
+    }
+
+    unsigned bbThrowIndex(BasicBlock* blk)
+    {
+        return blk->bbTryIndex;
+    } // Get the index to use as the cache key for sharing throw blocks
+#endif                                      // !FEATURE_EH_FUNCLETS
+
+    // Returns a flowList representing the "EH predecessors" of "blk".  These are the normal predecessors of
+    // "blk", plus one special case: if "blk" is the first block of a handler, considers the predecessor(s) of the first
+    // first block of the corresponding try region to be "EH predecessors".  (If there is a single such predecessor,
+    // for example, we want to consider that the immediate dominator of the catch clause start block, so it's
+    // convenient to also consider it a predecessor.)
+    flowList* BlockPredsWithEH(BasicBlock* blk);
+
+    // This table is useful for memoization of the method above.
+    typedef SimplerHashTable<BasicBlock*, PtrKeyFuncs<BasicBlock>, flowList*, JitSimplerHashBehavior>
+                        BlockToFlowListMap;
+    BlockToFlowListMap* m_blockToEHPreds;
+    BlockToFlowListMap* GetBlockToEHPreds()
+    {
+        if (m_blockToEHPreds == nullptr)
+        {
+            m_blockToEHPreds = new (getAllocator()) BlockToFlowListMap(getAllocator());
+        }
+        return m_blockToEHPreds;
+    }
+
+    void* ehEmitCookie(BasicBlock* block);
+    UNATIVE_OFFSET ehCodeOffset(BasicBlock* block);
+
+    EHblkDsc* ehInitHndRange(BasicBlock* src, IL_OFFSET* hndBeg, IL_OFFSET* hndEnd, bool* inFilter);
+
+    EHblkDsc* ehInitTryRange(BasicBlock* src, IL_OFFSET* tryBeg, IL_OFFSET* tryEnd);
+
+    EHblkDsc* ehInitHndBlockRange(BasicBlock* blk, BasicBlock** hndBeg, BasicBlock** hndLast, bool* inFilter);
+
+    EHblkDsc* ehInitTryBlockRange(BasicBlock* blk, BasicBlock** tryBeg, BasicBlock** tryLast);
+
+    void fgSetTryEnd(EHblkDsc* handlerTab, BasicBlock* newTryLast);
+
+    void fgSetHndEnd(EHblkDsc* handlerTab, BasicBlock* newHndLast);
+
+    void fgSkipRmvdBlocks(EHblkDsc* handlerTab);
+
+    void fgAllocEHTable();
+
+    void fgRemoveEHTableEntry(unsigned XTnum);
+
+#if FEATURE_EH_FUNCLETS
+
+    EHblkDsc* fgAddEHTableEntry(unsigned XTnum);
+
+#endif // FEATURE_EH_FUNCLETS
+
+#if !FEATURE_EH
+    void fgRemoveEH();
+#endif // !FEATURE_EH
+
+    void fgSortEHTable();
+
+    // Causes the EH table to obey some well-formedness conditions, by inserting
+    // empty BB's when necessary:
+    //   * No block is both the first block of a handler and the first block of a try.
+    //   * No block is the first block of multiple 'try' regions.
+    //   * No block is the last block of multiple EH regions.
+    void fgNormalizeEH();
+    bool fgNormalizeEHCase1();
+    bool fgNormalizeEHCase2();
+    bool fgNormalizeEHCase3();
+
+#ifdef DEBUG
+    void dispIncomingEHClause(unsigned num, const CORINFO_EH_CLAUSE& clause);
+    void dispOutgoingEHClause(unsigned num, const CORINFO_EH_CLAUSE& clause);
+    void fgVerifyHandlerTab();
+    void fgDispHandlerTab();
+#endif // DEBUG
+
+    bool fgNeedToSortEHTable;
+
+    void verInitEHTree(unsigned numEHClauses);
+    void verInsertEhNode(CORINFO_EH_CLAUSE* clause, EHblkDsc* handlerTab);
+    void verInsertEhNodeInTree(EHNodeDsc** ppRoot, EHNodeDsc* node);
+    void verInsertEhNodeParent(EHNodeDsc** ppRoot, EHNodeDsc* node);
+    void verCheckNestingLevel(EHNodeDsc* initRoot);
+
+    /*
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XX                                                                           XX
+    XX                        GenTree and BasicBlock                             XX
+    XX                                                                           XX
+    XX  Functions to allocate and display the GenTrees and BasicBlocks           XX
+    XX                                                                           XX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    */
+
+    // Functions to create nodes
+    GenTreeStmt* gtNewStmt(GenTreePtr expr = nullptr, IL_OFFSETX offset = BAD_IL_OFFSET);
+
+    // For unary opers.
+    GenTreePtr gtNewOperNode(genTreeOps oper, var_types type, GenTreePtr op1, bool doSimplifications = TRUE);
+
+    // For binary opers.
+    GenTreePtr gtNewOperNode(genTreeOps oper, var_types type, GenTreePtr op1, GenTreePtr op2);
+
+    GenTreePtr gtNewQmarkNode(var_types type, GenTreePtr cond, GenTreePtr colon);
+
+    GenTreePtr gtNewLargeOperNode(genTreeOps oper,
+                                  var_types  type = TYP_I_IMPL,
+                                  GenTreePtr op1  = nullptr,
+                                  GenTreePtr op2  = nullptr);
+
+    GenTreeIntCon* gtNewIconNode(ssize_t value, var_types type = TYP_INT);
+
+    GenTree* gtNewPhysRegNode(regNumber reg, var_types type);
+
+    GenTree* gtNewPhysRegNode(regNumber reg, GenTree* src);
+
+    GenTreePtr gtNewJmpTableNode();
+    GenTreePtr gtNewIconHandleNode(
+        size_t value, unsigned flags, FieldSeqNode* fields = nullptr, unsigned handle1 = 0, void* handle2 = nullptr);
+
+    unsigned gtTokenToIconFlags(unsigned token);
+
+    GenTreePtr gtNewIconEmbHndNode(void*    value,
+                                   void*    pValue,
+                                   unsigned flags,
+                                   unsigned handle1           = 0,
+                                   void*    handle2           = nullptr,
+                                   void*    compileTimeHandle = nullptr);
+
+    GenTreePtr gtNewIconEmbScpHndNode(CORINFO_MODULE_HANDLE scpHnd, unsigned hnd1 = 0, void* hnd2 = nullptr);
+    GenTreePtr gtNewIconEmbClsHndNode(CORINFO_CLASS_HANDLE clsHnd, unsigned hnd1 = 0, void* hnd2 = nullptr);
+    GenTreePtr gtNewIconEmbMethHndNode(CORINFO_METHOD_HANDLE methHnd, unsigned hnd1 = 0, void* hnd2 = nullptr);
+    GenTreePtr gtNewIconEmbFldHndNode(CORINFO_FIELD_HANDLE fldHnd, unsigned hnd1 = 0, void* hnd2 = nullptr);
+
+    GenTreePtr gtNewStringLiteralNode(InfoAccessType iat, void* pValue);
+
+    GenTreePtr gtNewLconNode(__int64 value);
+
+    GenTreePtr gtNewDconNode(double value);
+
+    GenTreePtr gtNewSconNode(int CPX, CORINFO_MODULE_HANDLE scpHandle);
+
+    GenTreePtr gtNewZeroConNode(var_types type);
+
+    GenTreePtr gtNewOneConNode(var_types type);
+
+    GenTreeBlk* gtNewBlkOpNode(
+        genTreeOps oper, GenTreePtr dst, GenTreePtr srcOrFillVal, GenTreePtr sizeOrClsTok, bool isVolatile);
+
+    GenTree* gtNewBlkOpNode(GenTreePtr dst, GenTreePtr srcOrFillVal, unsigned size, bool isVolatile, bool isCopyBlock);
+
+protected:
+    void gtBlockOpInit(GenTreePtr result, GenTreePtr dst, GenTreePtr srcOrFillVal, bool isVolatile);
+
+public:
+    GenTree* gtNewObjNode(CORINFO_CLASS_HANDLE structHnd, GenTreePtr addr);
+    void gtSetObjGcInfo(GenTreeObj* objNode);
+    GenTree* gtNewStructVal(CORINFO_CLASS_HANDLE structHnd, GenTreePtr addr);
+    GenTree* gtNewBlockVal(GenTreePtr addr, unsigned size);
+
+    GenTree* gtNewCpObjNode(GenTreePtr dst, GenTreePtr src, CORINFO_CLASS_HANDLE structHnd, bool isVolatile);
+
+    GenTreeArgList* gtNewListNode(GenTreePtr op1, GenTreeArgList* op2);
+
+    GenTreeCall* gtNewCallNode(gtCallTypes           callType,
+                               CORINFO_METHOD_HANDLE handle,
+                               var_types             type,
+                               GenTreeArgList*       args,
+                               IL_OFFSETX            ilOffset = BAD_IL_OFFSET);
+
+    GenTreeCall* gtNewIndCallNode(GenTreePtr      addr,
+                                  var_types       type,
+                                  GenTreeArgList* args,
+                                  IL_OFFSETX      ilOffset = BAD_IL_OFFSET);
+
+    GenTreeCall* gtNewHelperCallNode(unsigned        helper,
+                                     var_types       type,
+                                     unsigned        flags = 0,
+                                     GenTreeArgList* args  = nullptr);
+
+    GenTreePtr gtNewLclvNode(unsigned lnum, var_types type, IL_OFFSETX ILoffs = BAD_IL_OFFSET);
+
+#ifdef FEATURE_SIMD
+    GenTreeSIMD* gtNewSIMDNode(
+        var_types type, GenTreePtr op1, SIMDIntrinsicID simdIntrinsicID, var_types baseType, unsigned size);
+    GenTreeSIMD* gtNewSIMDNode(var_types       type,
+                               GenTreePtr      op1,
+                               GenTreePtr      op2,
+                               SIMDIntrinsicID simdIntrinsicID,
+                               var_types       baseType,
+                               unsigned        size);
+#endif
+
+    GenTreePtr gtNewLclLNode(unsigned lnum, var_types type, IL_OFFSETX ILoffs = BAD_IL_OFFSET);
+    GenTreeLclFld* gtNewLclFldNode(unsigned lnum, var_types type, unsigned offset);
+    GenTreePtr gtNewInlineCandidateReturnExpr(GenTreePtr inlineCandidate, var_types type);
+
+    GenTreePtr gtNewCodeRef(BasicBlock* block);
+
+    GenTreePtr gtNewFieldRef(
+        var_types typ, CORINFO_FIELD_HANDLE fldHnd, GenTreePtr obj = nullptr, DWORD offset = 0, bool nullcheck = false);
+
+    GenTreePtr gtNewIndexRef(var_types typ, GenTreePtr arrayOp, GenTreePtr indexOp);
+
+    GenTreeArgList* gtNewArgList(GenTreePtr op);
+    GenTreeArgList* gtNewArgList(GenTreePtr op1, GenTreePtr op2);
+    GenTreeArgList* gtNewArgList(GenTreePtr op1, GenTreePtr op2, GenTreePtr op3);
+
+    GenTreeArgList* gtNewAggregate(GenTree* element);
+
+    static fgArgTabEntryPtr gtArgEntryByArgNum(GenTreePtr call, unsigned argNum);
+    static fgArgTabEntryPtr gtArgEntryByNode(GenTreePtr call, GenTreePtr node);
+    fgArgTabEntryPtr gtArgEntryByLateArgIndex(GenTreePtr call, unsigned lateArgInx);
+    bool gtArgIsThisPtr(fgArgTabEntryPtr argEntry);
+
+    GenTreePtr gtNewAssignNode(GenTreePtr dst, GenTreePtr src);
+
+    GenTreePtr gtNewTempAssign(unsigned tmp, GenTreePtr val);
+
+    GenTreePtr gtNewRefCOMfield(GenTreePtr              objPtr,
+                                CORINFO_RESOLVED_TOKEN* pResolvedToken,
+                                CORINFO_ACCESS_FLAGS    access,
+                                CORINFO_FIELD_INFO*     pFieldInfo,
+                                var_types               lclTyp,
+                                CORINFO_CLASS_HANDLE    structType,
+                                GenTreePtr              assg);
+
+    GenTreePtr gtNewNothingNode();
+
+    GenTreePtr gtNewArgPlaceHolderNode(var_types type, CORINFO_CLASS_HANDLE clsHnd);
+
+    GenTreePtr gtUnusedValNode(GenTreePtr expr);
+
+    GenTreePtr gtNewCastNode(var_types typ, GenTreePtr op1, var_types castType);
+
+    GenTreePtr gtNewCastNodeL(var_types typ, GenTreePtr op1, var_types castType);
+
+    GenTreePtr gtNewAllocObjNode(unsigned int helper, CORINFO_CLASS_HANDLE clsHnd, var_types type, GenTreePtr op1);
+
+    //------------------------------------------------------------------------
+    // Other GenTree functions
+
+    GenTreePtr gtClone(GenTree* tree, bool complexOK = false);
+
+    GenTreePtr gtCloneExpr(GenTree* tree, unsigned addFlags = 0, unsigned varNum = (unsigned)-1, int varVal = 0);
+
+    GenTreePtr gtReplaceTree(GenTreePtr stmt, GenTreePtr tree, GenTreePtr replacementTree);
+
+    void gtUpdateSideEffects(GenTreePtr tree, unsigned oldGtFlags, unsigned newGtFlags);
+
+    // Returns "true" iff the complexity (not formally defined, but first interpretation
+    // is #of nodes in subtree) of "tree" is greater than "limit".
+    // (This is somewhat redundant with the "gtCostEx/gtCostSz" fields, but can be used
+    // before they have been set.)
+    bool gtComplexityExceeds(GenTreePtr* tree, unsigned limit);
+
+    bool gtCompareTree(GenTree* op1, GenTree* op2);
+
+    GenTreePtr gtReverseCond(GenTree* tree);
+
+    bool gtHasRef(GenTree* tree, ssize_t lclNum, bool defOnly);
+
+    bool gtHasLocalsWithAddrOp(GenTreePtr tree);
+
+    unsigned gtHashValue(GenTree* tree);
+
+    unsigned gtSetListOrder(GenTree* list, bool regs);
+
+    void gtWalkOp(GenTree** op1, GenTree** op2, GenTree* adr, bool constOnly);
+
+#ifdef DEBUG
+    GenTreePtr gtWalkOpEffectiveVal(GenTreePtr op);
+#endif
+
+    void gtPrepareCost(GenTree* tree);
+    bool gtIsLikelyRegVar(GenTree* tree);
+
+    unsigned gtSetEvalOrderAndRestoreFPstkLevel(GenTree* tree);
+
+    // Returns true iff the secondNode can be swapped with firstNode.
+    bool gtCanSwapOrder(GenTree* firstNode, GenTree* secondNode);
+
+    unsigned gtSetEvalOrder(GenTree* tree);
+
+#if FEATURE_STACK_FP_X87
+    bool gtFPstLvlRedo;
+    void gtComputeFPlvls(GenTreePtr tree);
+#endif // FEATURE_STACK_FP_X87
+
+    void gtSetStmtInfo(GenTree* stmt);
+
+    // Returns "true" iff "node" has any of the side effects in "flags".
+    bool gtNodeHasSideEffects(GenTreePtr node, unsigned flags);
+
+    // Returns "true" iff "tree" or its (transitive) children have any of the side effects in "flags".
+    bool gtTreeHasSideEffects(GenTreePtr tree, unsigned flags);
+
+    // Appends 'expr' in front of 'list'
+    //    'list' will typically start off as 'nullptr'
+    //    when 'list' is non-null a GT_COMMA node is used to insert 'expr'
+    GenTreePtr gtBuildCommaList(GenTreePtr list, GenTreePtr expr);
+
+    void gtExtractSideEffList(GenTreePtr  expr,
+                              GenTreePtr* pList,
+                              unsigned    flags      = GTF_SIDE_EFFECT,
+                              bool        ignoreRoot = false);
+
+    GenTreePtr gtGetThisArg(GenTreePtr call);
+
+    // Static fields of struct types (and sometimes the types that those are reduced to) are represented by having the
+    // static field contain an object pointer to the boxed struct.  This simplifies the GC implementation...but
+    // complicates the JIT somewhat.  This predicate returns "true" iff a node with type "fieldNodeType", representing
+    // the given "fldHnd", is such an object pointer.
+    bool gtIsStaticFieldPtrToBoxedStruct(var_types fieldNodeType, CORINFO_FIELD_HANDLE fldHnd);
+
+    // Return true if call is a recursive call; return false otherwise.
+    bool gtIsRecursiveCall(GenTreeCall* call)
+    {
+        return (call->gtCallMethHnd == info.compMethodHnd);
+    }
+
+    //-------------------------------------------------------------------------
+
+    GenTreePtr gtFoldExpr(GenTreePtr tree);
+    GenTreePtr
+#ifdef __clang__
+        // TODO-Amd64-Unix: Remove this when the clang optimizer is fixed and/or the method implementation is
+        // refactored in a simpler code. This is a workaround for a bug in the clang-3.5 optimizer. The issue is that in
+        // release build the optimizer is mistyping (or just wrongly decides to use 32 bit operation for a corner case
+        // of MIN_LONG) the args of the (ltemp / lval2) to int (it does a 32 bit div operation instead of 64 bit) - see
+        // the implementation of the method in gentree.cpp. For the case of lval1 and lval2 equal to MIN_LONG
+        // (0x8000000000000000) this results in raising a SIGFPE. The method implementation is rather complex. Disable
+        // optimizations for now.
+        __attribute__((optnone))
+#endif // __clang__
+        gtFoldExprConst(GenTreePtr tree);
+    GenTreePtr gtFoldExprSpecial(GenTreePtr tree);
+    GenTreePtr gtFoldExprCompare(GenTreePtr tree);
+
+    //-------------------------------------------------------------------------
+    // Get the handle, if any.
+    CORINFO_CLASS_HANDLE gtGetStructHandleIfPresent(GenTreePtr tree);
+    // Get the handle, and assert if not found.
+    CORINFO_CLASS_HANDLE gtGetStructHandle(GenTreePtr tree);
+
+//-------------------------------------------------------------------------
+// Functions to display the trees
+
+#ifdef DEBUG
+    void gtDispNode(GenTreePtr tree, IndentStack* indentStack, __in_z const char* msg, bool isLIR);
+
+    void gtDispVN(GenTreePtr tree);
+    void gtDispConst(GenTreePtr tree);
+    void gtDispLeaf(GenTreePtr tree, IndentStack* indentStack);
+    void gtDispNodeName(GenTreePtr tree);
+    void gtDispRegVal(GenTreePtr tree);
+
+    enum IndentInfo
+    {
+        IINone,
+        IIArc,
+        IIArcTop,
+        IIArcBottom,
+        IIEmbedded,
+        IIError,
+        IndentInfoCount
+    };
+    void gtDispChild(GenTreePtr           child,
+                     IndentStack*         indentStack,
+                     IndentInfo           arcType,
+                     __in_opt const char* msg     = nullptr,
+                     bool                 topOnly = false);
+    void gtDispTree(GenTreePtr           tree,
+                    IndentStack*         indentStack = nullptr,
+                    __in_opt const char* msg         = nullptr,
+                    bool                 topOnly     = false,
+                    bool                 isLIR       = false);
+    void gtGetLclVarNameInfo(unsigned lclNum, const char** ilKindOut, const char** ilNameOut, unsigned* ilNumOut);
+    int gtGetLclVarName(unsigned lclNum, char* buf, unsigned buf_remaining);
+    char* gtGetLclVarName(unsigned lclNum);
+    void gtDispLclVar(unsigned varNum, bool padForBiggestDisp = true);
+    void gtDispTreeList(GenTreePtr tree, IndentStack* indentStack = nullptr);
+    void gtGetArgMsg(GenTreePtr call, GenTreePtr arg, unsigned argNum, int listCount, char* bufp, unsigned bufLength);
+    void gtGetLateArgMsg(GenTreePtr call, GenTreePtr arg, int argNum, int listCount, char* bufp, unsigned bufLength);
+    void gtDispArgList(GenTreePtr tree, IndentStack* indentStack);
+    void gtDispFieldSeq(FieldSeqNode* pfsn);
+
+    void gtDispRange(LIR::ReadOnlyRange const& range);
+
+    void gtDispTreeRange(LIR::Range& containingRange, GenTree* tree);
+
+    void gtDispLIRNode(GenTree* node);
+#endif
+
+    // For tree walks
+
+    enum fgWalkResult
+    {
+        WALK_CONTINUE,
+        WALK_SKIP_SUBTREES,
+        WALK_ABORT
+    };
+    struct fgWalkData;
+    typedef fgWalkResult(fgWalkPreFn)(GenTreePtr* pTree, fgWalkData* data);
+    typedef fgWalkResult(fgWalkPostFn)(GenTreePtr* pTree, fgWalkData* data);
+
+#ifdef DEBUG
+    static fgWalkPreFn gtAssertColonCond;
+#endif
+    static fgWalkPreFn gtMarkColonCond;
+    static fgWalkPreFn gtClearColonCond;
+
+    GenTreePtr* gtFindLink(GenTreePtr stmt, GenTreePtr node);
+    bool gtHasCatchArg(GenTreePtr tree);
+    bool gtHasUnmanagedCall(GenTreePtr tree);
+
+    typedef ArrayStack<GenTree*> GenTreeStack;
+
+    static bool gtHasCallOnStack(GenTreeStack* parentStack);
+    void gtCheckQuirkAddrExposedLclVar(GenTreePtr argTree, GenTreeStack* parentStack);
+
+//=========================================================================
+// BasicBlock functions
+#ifdef DEBUG
+    // This is a debug flag we will use to assert when creating block during codegen
+    // as this interferes with procedure splitting. If you know what you're doing, set
+    // it to true before creating the block. (DEBUG only)
+    bool fgSafeBasicBlockCreation;
+#endif
+
+    BasicBlock* bbNewBasicBlock(BBjumpKinds jumpKind);
+
+    /*
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XX                                                                           XX
+    XX                           LclVarsInfo                                     XX
+    XX                                                                           XX
+    XX   The variables to be used by the code generator.                         XX
+    XX                                                                           XX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    */
+
+    //
+    // For both PROMOTION_TYPE_NONE and PROMOTION_TYPE_DEPENDENT the struct will
+    // be placed in the stack frame and it's fields must be laid out sequentially.
+    //
+    // For PROMOTION_TYPE_INDEPENDENT each of the struct's fields is replaced by
+    //  a local variable that can be enregistered or placed in the stack frame.
+    //  The fields do not need to be laid out sequentially
+    //
+    enum lvaPromotionType
+    {
+        PROMOTION_TYPE_NONE,        // The struct local is not promoted
+        PROMOTION_TYPE_INDEPENDENT, // The struct local is promoted,
+                                    //   and its field locals are independent of its parent struct local.
+        PROMOTION_TYPE_DEPENDENT    // The struct local is promoted,
+                                    //   but its field locals depend on its parent struct local.
+    };
+
+    static int __cdecl RefCntCmp(const void* op1, const void* op2);
+    static int __cdecl WtdRefCntCmp(const void* op1, const void* op2);
+
+    /*****************************************************************************/
+
+    enum FrameLayoutState
+    {
+        NO_FRAME_LAYOUT,
+        INITIAL_FRAME_LAYOUT,
+        PRE_REGALLOC_FRAME_LAYOUT,
+        REGALLOC_FRAME_LAYOUT,
+        TENTATIVE_FRAME_LAYOUT,
+        FINAL_FRAME_LAYOUT
+    };
+
+public:
+    bool     lvaRefCountingStarted; // Set to true when we have started counting the local vars
+    bool     lvaLocalVarRefCounted; // Set to true after we have called lvaMarkLocalVars()
+    bool     lvaSortAgain;          // true: We need to sort the lvaTable
+    bool     lvaTrackedFixed;       // true: We cannot add new 'tracked' variable
+    unsigned lvaCount;              // total number of locals
+
+    unsigned   lvaRefCount; // total number of references to locals
+    LclVarDsc* lvaTable;    // variable descriptor table
+    unsigned   lvaTableCnt; // lvaTable size (>= lvaCount)
+
+    LclVarDsc** lvaRefSorted; // table sorted by refcount
+
+    unsigned short lvaTrackedCount;       // actual # of locals being tracked
+    unsigned lvaTrackedCountInSizeTUnits; // min # of size_t's sufficient to hold a bit for all the locals being tracked
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+    // Only for AMD64 System V cache the first caller stack homed argument.
+    unsigned lvaFirstStackIncomingArgNum; // First argument with stack slot in the caller.
+#endif                                    // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+#ifdef DEBUG
+    VARSET_TP lvaTrackedVars; // set of tracked variables
+#endif
+#ifndef _TARGET_64BIT_
+    VARSET_TP lvaLongVars; // set of long (64-bit) variables
+#endif
+    VARSET_TP lvaFloatVars; // set of floating-point (32-bit and 64-bit) variables
+
+    unsigned lvaCurEpoch; // VarSets are relative to a specific set of tracked var indices.
+                          // It that changes, this changes.  VarSets from different epochs
+                          // cannot be meaningfully combined.
+
+    unsigned GetCurLVEpoch()
+    {
+        return lvaCurEpoch;
+    }
+
+    // reverse map of tracked number to var number
+    unsigned lvaTrackedToVarNum[lclMAX_TRACKED];
+
+#ifdef LEGACY_BACKEND
+    // variable interference graph
+    VARSET_TP lvaVarIntf[lclMAX_TRACKED];
+#endif
+
+    // variable preference graph
+    VARSET_TP lvaVarPref[lclMAX_TRACKED];
+
+#if DOUBLE_ALIGN
+#ifdef DEBUG
+    // # of procs compiled a with double-aligned stack
+    static unsigned s_lvaDoubleAlignedProcsCount;
+#endif
+#endif
+
+    // Getters and setters for address-exposed and do-not-enregister local var properties.
+    bool lvaVarAddrExposed(unsigned varNum);
+    void lvaSetVarAddrExposed(unsigned varNum);
+    bool lvaVarDoNotEnregister(unsigned varNum);
+#ifdef DEBUG
+    // Reasons why we can't enregister.  Some of these correspond to debug properties of local vars.
+    enum DoNotEnregisterReason
+    {
+        DNER_AddrExposed,
+        DNER_IsStruct,
+        DNER_LocalField,
+        DNER_VMNeedsStackAddr,
+        DNER_LiveInOutOfHandler,
+        DNER_LiveAcrossUnmanagedCall,
+        DNER_BlockOp, // Is read or written via a block operation that explicitly takes the address.
+#ifdef JIT32_GCENCODER
+        DNER_PinningRef,
+#endif
+    };
+#endif
+    void lvaSetVarDoNotEnregister(unsigned varNum DEBUGARG(DoNotEnregisterReason reason));
+
+    unsigned lvaVarargsHandleArg;
+#ifdef _TARGET_X86_
+    unsigned lvaVarargsBaseOfStkArgs; // Pointer (computed based on incoming varargs handle) to the start of the stack
+                                      // arguments
+#endif                                // _TARGET_X86_
+
+    unsigned lvaInlinedPInvokeFrameVar; // variable representing the InlinedCallFrame
+    unsigned lvaReversePInvokeFrameVar; // variable representing the reverse PInvoke frame
+#if FEATURE_FIXED_OUT_ARGS
+    unsigned lvaPInvokeFrameRegSaveVar; // variable representing the RegSave for PInvoke inlining.
+#endif
+    unsigned lvaMonAcquired; // boolean variable introduced into in synchronized methods
+                             // that tracks whether the lock has been taken
+
+    unsigned lvaArg0Var; // The lclNum of arg0. Normally this will be info.compThisArg.
+                         // However, if there is a "ldarga 0" or "starg 0" in the IL,
+                         // we will redirect all "ldarg(a) 0" and "starg 0" to this temp.
+
+    unsigned lvaInlineeReturnSpillTemp; // The temp to spill the non-VOID return expression
+                                        // in case there are multiple BBJ_RETURN blocks in the inlinee.
+
+#if FEATURE_FIXED_OUT_ARGS
+    unsigned lvaOutgoingArgSpaceVar;  // dummy TYP_LCLBLK var for fixed outgoing argument space
+    unsigned lvaOutgoingArgSpaceSize; // size of fixed outgoing argument space
+#endif                                // FEATURE_FIXED_OUT_ARGS
+
+#ifdef _TARGET_ARM_
+    // On architectures whose ABIs allow structs to be passed in registers, struct promotion will sometimes
+    // require us to "rematerialize" a struct from it's separate constituent field variables.  Packing several sub-word
+    // field variables into an argument register is a hard problem.  It's easier to reserve a word of memory into which
+    // such field can be copied, after which the assembled memory word can be read into the register.  We will allocate
+    // this variable to be this scratch word whenever struct promotion occurs.
+    unsigned lvaPromotedStructAssemblyScratchVar;
+#endif // _TARGET_ARM_
+
+#ifdef DEBUG
+    unsigned lvaReturnEspCheck; // confirms ESP not corrupted on return
+    unsigned lvaCallEspCheck;   // confirms ESP not corrupted after a call
+#endif
+
+    bool lvaGenericsContextUsed;
+
+    bool lvaKeepAliveAndReportThis(); // Synchronized instance method of a reference type, or
+                                      // CORINFO_GENERICS_CTXT_FROM_THIS?
+    bool lvaReportParamTypeArg();     // Exceptions and CORINFO_GENERICS_CTXT_FROM_PARAMTYPEARG?
+
+//-------------------------------------------------------------------------
+// All these frame offsets are inter-related and must be kept in sync
+
+#if !FEATURE_EH_FUNCLETS
+    // This is used for the callable handlers
+    unsigned lvaShadowSPslotsVar; // TYP_BLK variable for all the shadow SP slots
+#endif                            // FEATURE_EH_FUNCLETS
+
+    unsigned lvaCachedGenericContextArgOffs;
+    unsigned lvaCachedGenericContextArgOffset(); // For CORINFO_CALLCONV_PARAMTYPE and if generic context is passed as
+                                                 // THIS pointer
+
+    unsigned lvaLocAllocSPvar; // variable which has the result of the last alloca/localloc
+
+    unsigned lvaNewObjArrayArgs; // variable with arguments for new MD array helper
+
+    // TODO-Review: Prior to reg predict we reserve 24 bytes for Spill temps.
+    //              after the reg predict we will use a computed maxTmpSize
+    //              which is based upon the number of spill temps predicted by reg predict
+    //              All this is necessary because if we under-estimate the size of the spill
+    //              temps we could fail when encoding instructions that reference stack offsets for ARM.
+    //
+    // Pre codegen max spill temp size.
+    static const unsigned MAX_SPILL_TEMP_SIZE = 24;
+
+    //-------------------------------------------------------------------------
+
+    unsigned lvaGetMaxSpillTempSize();
+#ifdef _TARGET_ARM_
+    bool lvaIsPreSpilled(unsigned lclNum, regMaskTP preSpillMask);
+#endif // _TARGET_ARM_
+    void lvaAssignFrameOffsets(FrameLayoutState curState);
+    void lvaFixVirtualFrameOffsets();
+
+#ifndef LEGACY_BACKEND
+    void lvaUpdateArgsWithInitialReg();
+#endif // !LEGACY_BACKEND
+
+    void lvaAssignVirtualFrameOffsetsToArgs();
+#ifdef UNIX_AMD64_ABI
+    int lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize, int argOffs, int* callerArgOffset);
+#else  // !UNIX_AMD64_ABI
+    int lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize, int argOffs);
+#endif // !UNIX_AMD64_ABI
+    void lvaAssignVirtualFrameOffsetsToLocals();
+    int lvaAllocLocalAndSetVirtualOffset(unsigned lclNum, unsigned size, int stkOffs);
+#ifdef _TARGET_AMD64_
+    // Returns true if compCalleeRegsPushed (including RBP if used as frame pointer) is even.
+    bool lvaIsCalleeSavedIntRegCountEven();
+#endif
+    void lvaAlignFrame();
+    void lvaAssignFrameOffsetsToPromotedStructs();
+    int lvaAllocateTemps(int stkOffs, bool mustDoubleAlign);
+
+#ifdef DEBUG
+    void lvaDumpRegLocation(unsigned lclNum);
+    void lvaDumpFrameLocation(unsigned lclNum);
+    void lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t refCntWtdWidth = 6);
+    void lvaTableDump(FrameLayoutState curState = NO_FRAME_LAYOUT); // NO_FRAME_LAYOUT means use the current frame
+                                                                    // layout state defined by lvaDoneFrameLayout
+#endif
+
+// Limit frames size to 1GB. The maximum is 2GB in theory - make it intentionally smaller
+// to avoid bugs from borderline cases.
+#define MAX_FrameSize 0x3FFFFFFF
+    void lvaIncrementFrameSize(unsigned size);
+
+    unsigned lvaFrameSize(FrameLayoutState curState);
+
+    // Returns the caller-SP-relative offset for the SP/FP relative offset determined by FP based.
+    int lvaToCallerSPRelativeOffset(int offs, bool isFpBased);
+
+    // Returns the caller-SP-relative offset for the local variable "varNum."
+    int lvaGetCallerSPRelativeOffset(unsigned varNum);
+
+    // Returns the SP-relative offset for the local variable "varNum". Illegal to ask this for functions with localloc.
+    int lvaGetSPRelativeOffset(unsigned varNum);
+
+    int lvaToInitialSPRelativeOffset(unsigned offset, bool isFpBased);
+    int lvaGetInitialSPRelativeOffset(unsigned varNum);
+
+    //------------------------ For splitting types ----------------------------
+
+    void lvaInitTypeRef();
+
+    void lvaInitArgs(InitVarDscInfo* varDscInfo);
+    void lvaInitThisPtr(InitVarDscInfo* varDscInfo);
+    void lvaInitRetBuffArg(InitVarDscInfo* varDscInfo);
+    void lvaInitUserArgs(InitVarDscInfo* varDscInfo);
+    void lvaInitGenericsCtxt(InitVarDscInfo* varDscInfo);
+    void lvaInitVarArgsHandle(InitVarDscInfo* varDscInfo);
+
+    void lvaInitVarDsc(LclVarDsc*              varDsc,
+                       unsigned                varNum,
+                       CorInfoType             corInfoType,
+                       CORINFO_CLASS_HANDLE    typeHnd,
+                       CORINFO_ARG_LIST_HANDLE varList,
+                       CORINFO_SIG_INFO*       varSig);
+
+    static unsigned lvaTypeRefMask(var_types type);
+
+    var_types lvaGetActualType(unsigned lclNum);
+    var_types lvaGetRealType(unsigned lclNum);
+
+    //-------------------------------------------------------------------------
+
+    void lvaInit();
+
+    unsigned lvaArgSize(const void* argTok);
+    unsigned lvaLclSize(unsigned varNum);
+    unsigned lvaLclExactSize(unsigned varNum);
+
+    bool lvaLclVarRefs(GenTreePtr tree, GenTreePtr* findPtr, varRefKinds* refsPtr, void* result);
+
+    // Call lvaLclVarRefs on "true"; accumulate "*result" into whichever of
+    // "allVars" and "trkdVars" is indiated by the nullness of "findPtr"; return
+    // the return result.
+    bool lvaLclVarRefsAccum(
+        GenTreePtr tree, GenTreePtr* findPtr, varRefKinds* refsPtr, ALLVARSET_TP* allVars, VARSET_TP* trkdVars);
+
+    // If "findPtr" is non-NULL, assumes "result" is an "ALLVARSET_TP*", and
+    // (destructively) unions "allVars" into "*result".  Otherwise, assumes "result" is a "VARSET_TP*",
+    // and (destructively) unions "trkedVars" into "*result".
+    void lvaLclVarRefsAccumIntoRes(GenTreePtr*         findPtr,
+                                   void*               result,
+                                   ALLVARSET_VALARG_TP allVars,
+                                   VARSET_VALARG_TP    trkdVars);
+
+    bool lvaHaveManyLocals() const;
+
+    unsigned lvaGrabTemp(bool shortLifetime DEBUGARG(const char* reason));
+    unsigned lvaGrabTemps(unsigned cnt DEBUGARG(const char* reason));
+    unsigned lvaGrabTempWithImplicitUse(bool shortLifetime DEBUGARG(const char* reason));
+
+    void lvaSortOnly();
+    void lvaSortByRefCount();
+    void lvaDumpRefCounts();
+
+    void lvaMarkLocalVars(BasicBlock* block);
+
+    void lvaMarkLocalVars(); // Local variable ref-counting
+
+    void lvaAllocOutgoingArgSpace(); // 'Commit' lvaOutgoingArgSpaceSize and lvaOutgoingArgSpaceVar
+
+    VARSET_VALRET_TP lvaStmtLclMask(GenTreePtr stmt);
+
+    static fgWalkPreFn lvaIncRefCntsCB;
+    void lvaIncRefCnts(GenTreePtr tree);
+
+    static fgWalkPreFn lvaDecRefCntsCB;
+    void lvaDecRefCnts(GenTreePtr tree);
+    void lvaDecRefCnts(BasicBlock* basicBlock, GenTreePtr tree);
+    void lvaRecursiveDecRefCounts(GenTreePtr tree);
+    void lvaRecursiveIncRefCounts(GenTreePtr tree);
+
+#ifdef DEBUG
+    struct lvaStressLclFldArgs
+    {
+        Compiler* m_pCompiler;
+        bool      m_bFirstPass;
+    };
+
+    static fgWalkPreFn lvaStressLclFldCB;
+    void               lvaStressLclFld();
+
+    void lvaDispVarSet(VARSET_VALARG_TP set, VARSET_VALARG_TP allVars);
+    void lvaDispVarSet(VARSET_VALARG_TP set);
+
+#endif
+
+#ifdef _TARGET_ARM_
+    int lvaFrameAddress(int varNum, bool mustBeFPBased, regNumber* pBaseReg, int addrModeOffset);
+#else
+    int lvaFrameAddress(int varNum, bool* pFPbased);
+#endif
+
+    bool lvaIsParameter(unsigned varNum);
+    bool lvaIsRegArgument(unsigned varNum);
+    BOOL lvaIsOriginalThisArg(unsigned varNum); // Is this varNum the original this argument?
+    BOOL lvaIsOriginalThisReadOnly();           // return TRUE if there is no place in the code
+                                                // that writes to arg0
+
+    // Struct parameters that are passed by reference are marked as both lvIsParam and lvIsTemp
+    // (this is an overload of lvIsTemp because there are no temp parameters).
+    // For x64 this is 3, 5, 6, 7, >8 byte structs that are passed by reference.
+    // For ARM64, this is structs larger than 16 bytes that are passed by reference.
+    bool lvaIsImplicitByRefLocal(unsigned varNum)
+    {
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+        LclVarDsc* varDsc = &(lvaTable[varNum]);
+        if (varDsc->lvIsParam && varDsc->lvIsTemp)
+        {
+            assert((varDsc->lvType == TYP_STRUCT) || (varDsc->lvType == TYP_BYREF));
+            return true;
+        }
+#endif // defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+        return false;
+    }
+
+    // Returns true if this local var is a multireg struct
+    bool lvaIsMultiregStruct(LclVarDsc* varDsc);
+
+    // If the class is a TYP_STRUCT, get/set a class handle describing it
+
+    CORINFO_CLASS_HANDLE lvaGetStruct(unsigned varNum);
+    void lvaSetStruct(unsigned varNum, CORINFO_CLASS_HANDLE typeHnd, bool unsafeValueClsCheck, bool setTypeInfo = true);
+
+#define MAX_NumOfFieldsInPromotableStruct 4 // Maximum number of fields in promotable struct
+
+    // Info about struct fields
+    struct lvaStructFieldInfo
+    {
+        CORINFO_FIELD_HANDLE fldHnd;
+        unsigned char        fldOffset;
+        unsigned char        fldOrdinal;
+        var_types            fldType;
+        unsigned             fldSize;
+        CORINFO_CLASS_HANDLE fldTypeHnd;
+    };
+
+    // Info about struct to be promoted.
+    struct lvaStructPromotionInfo
+    {
+        CORINFO_CLASS_HANDLE typeHnd;
+        bool                 canPromote;
+        bool                 requiresScratchVar;
+        bool                 containsHoles;
+        bool                 customLayout;
+        unsigned char        fieldCnt;
+        lvaStructFieldInfo   fields[MAX_NumOfFieldsInPromotableStruct];
+
+        lvaStructPromotionInfo()
+            : typeHnd(nullptr), canPromote(false), requiresScratchVar(false), containsHoles(false), customLayout(false)
+        {
+        }
+    };
+
+    static int __cdecl lvaFieldOffsetCmp(const void* field1, const void* field2);
+    void lvaCanPromoteStructType(CORINFO_CLASS_HANDLE    typeHnd,
+                                 lvaStructPromotionInfo* StructPromotionInfo,
+                                 bool                    sortFields);
+    void lvaCanPromoteStructVar(unsigned lclNum, lvaStructPromotionInfo* StructPromotionInfo);
+    void lvaPromoteStructVar(unsigned lclNum, lvaStructPromotionInfo* StructPromotionInfo);
+#if !defined(_TARGET_64BIT_)
+    void lvaPromoteLongVars();
+#endif // !defined(_TARGET_64BIT_)
+    unsigned lvaGetFieldLocal(LclVarDsc* varDsc, unsigned int fldOffset);
+    lvaPromotionType lvaGetPromotionType(const LclVarDsc* varDsc);
+    lvaPromotionType lvaGetPromotionType(unsigned varNum);
+    lvaPromotionType lvaGetParentPromotionType(const LclVarDsc* varDsc);
+    lvaPromotionType lvaGetParentPromotionType(unsigned varNum);
+    bool lvaIsFieldOfDependentlyPromotedStruct(const LclVarDsc* varDsc);
+    bool lvaIsGCTracked(const LclVarDsc* varDsc);
+
+    BYTE* lvaGetGcLayout(unsigned varNum);
+    bool lvaTypeIsGC(unsigned varNum);
+    unsigned lvaGSSecurityCookie; // LclVar number
+    bool     lvaTempsHaveLargerOffsetThanVars();
+
+    unsigned lvaSecurityObject;  // variable representing the security object on the stack
+    unsigned lvaStubArgumentVar; // variable representing the secret stub argument coming in EAX
+
+#if FEATURE_EH_FUNCLETS
+    unsigned lvaPSPSym; // variable representing the PSPSym
+#endif
+
+    InlineInfo*     impInlineInfo;
+    InlineStrategy* m_inlineStrategy;
+
+    // The Compiler* that is the root of the inlining tree of which "this" is a member.
+    Compiler* impInlineRoot();
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+    unsigned __int64 getInlineCycleCount()
+    {
+        return m_compCycles;
+    }
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+    bool fgNoStructPromotion;      // Set to TRUE to turn off struct promotion for this method.
+    bool fgNoStructParamPromotion; // Set to TRUE to turn off struct promotion for parameters this method.
+
+    //=========================================================================
+    //                          PROTECTED
+    //=========================================================================
+
+protected:
+//---------------- Local variable ref-counting ----------------------------
+
+#if ASSERTION_PROP
+    BasicBlock* lvaMarkRefsCurBlock;
+    GenTreePtr  lvaMarkRefsCurStmt;
+#endif
+    BasicBlock::weight_t lvaMarkRefsWeight;
+
+    static fgWalkPreFn lvaMarkLclRefsCallback;
+    void lvaMarkLclRefs(GenTreePtr tree);
+
+    // Keeps the mapping from SSA #'s to VN's for the implicit "Heap" variable.
+    PerSsaArray lvHeapPerSsaData;
+    unsigned    lvHeapNumSsaNames;
+
+public:
+    // Returns the address of the per-Ssa data for "Heap" at the given ssaNum (which is required
+    // not to be the SsaConfig::RESERVED_SSA_NUM, which indicates that the variable is
+    // not an SSA variable).
+    LclSsaVarDsc* GetHeapPerSsaData(unsigned ssaNum)
+    {
+        assert(ssaNum != SsaConfig::RESERVED_SSA_NUM);
+        assert(SsaConfig::RESERVED_SSA_NUM == 0);
+        ssaNum--;
+        assert(ssaNum < lvHeapNumSsaNames);
+        return &lvHeapPerSsaData.GetRef(ssaNum);
+    }
+
+    /*
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XX                                                                           XX
+    XX                           Importer                                        XX
+    XX                                                                           XX
+    XX   Imports the given method and converts it to semantic trees              XX
+    XX                                                                           XX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    */
+
+public:
+    void impInit();
+
+    void impImport(BasicBlock* method);
+
+    CORINFO_CLASS_HANDLE impGetRefAnyClass();
+    CORINFO_CLASS_HANDLE impGetRuntimeArgumentHandle();
+    CORINFO_CLASS_HANDLE impGetTypeHandleClass();
+    CORINFO_CLASS_HANDLE impGetStringClass();
+    CORINFO_CLASS_HANDLE impGetObjectClass();
+
+    //=========================================================================
+    //                          PROTECTED
+    //=========================================================================
+
+protected:
+    //-------------------- Stack manipulation ---------------------------------
+
+    unsigned impStkSize; // Size of the full stack
+
+#define SMALL_STACK_SIZE 16 // number of elements in impSmallStack
+
+    StackEntry impSmallStack[SMALL_STACK_SIZE]; // Use this array if possible
+
+    struct SavedStack // used to save/restore stack contents.
+    {
+        unsigned    ssDepth; // number of values on stack
+        StackEntry* ssTrees; // saved tree values
+    };
+
+    bool impIsPrimitive(CorInfoType type);
+    bool impILConsumesAddr(const BYTE* codeAddr, CORINFO_METHOD_HANDLE fncHandle, CORINFO_MODULE_HANDLE scpHandle);
+
+    void impResolveToken(const BYTE* addr, CORINFO_RESOLVED_TOKEN* pResolvedToken, CorInfoTokenKind kind);
+    void impPushOnStackNoType(GenTreePtr tree);
+
+    void impPushOnStack(GenTreePtr tree, typeInfo ti);
+    void       impPushNullObjRefOnStack();
+    StackEntry impPopStack();
+    StackEntry impPopStack(CORINFO_CLASS_HANDLE& structTypeRet);
+    GenTreePtr impPopStack(typeInfo& ti);
+    StackEntry& impStackTop(unsigned n = 0);
+
+    void impSaveStackState(SavedStack* savePtr, bool copy);
+    void impRestoreStackState(SavedStack* savePtr);
+
+    GenTreePtr impImportLdvirtftn(GenTreePtr              thisPtr,
+                                  CORINFO_RESOLVED_TOKEN* pResolvedToken,
+                                  CORINFO_CALL_INFO*      pCallInfo);
+
+    void impImportAndPushBox(CORINFO_RESOLVED_TOKEN* pResolvedToken);
+
+    void impImportNewObjArray(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_CALL_INFO* pCallInfo);
+
+    bool impCanPInvokeInline(var_types callRetTyp);
+    bool impCanPInvokeInlineCallSite(var_types callRetTyp);
+    void impCheckForPInvokeCall(GenTreePtr call, CORINFO_METHOD_HANDLE methHnd, CORINFO_SIG_INFO* sig, unsigned mflags);
+    GenTreePtr impImportIndirectCall(CORINFO_SIG_INFO* sig, IL_OFFSETX ilOffset = BAD_IL_OFFSET);
+    void impPopArgsForUnmanagedCall(GenTreePtr call, CORINFO_SIG_INFO* sig);
+
+    void impInsertHelperCall(CORINFO_HELPER_DESC* helperCall);
+    void impHandleAccessAllowed(CorInfoIsAccessAllowedResult result, CORINFO_HELPER_DESC* helperCall);
+    void impHandleAccessAllowedInternal(CorInfoIsAccessAllowedResult result, CORINFO_HELPER_DESC* helperCall);
+
+    void impInsertCalloutForDelegate(CORINFO_METHOD_HANDLE callerMethodHnd,
+                                     CORINFO_METHOD_HANDLE calleeMethodHnd,
+                                     CORINFO_CLASS_HANDLE  delegateTypeHnd);
+
+    var_types impImportCall(OPCODE                  opcode,
+                            CORINFO_RESOLVED_TOKEN* pResolvedToken,
+                            CORINFO_RESOLVED_TOKEN* pConstrainedResolvedToken, // Is this a "constrained." call on a
+                                                                               // type parameter?
+                            GenTreePtr         newobjThis,
+                            int                prefixFlags,
+                            CORINFO_CALL_INFO* callInfo,
+                            IL_OFFSET          rawILOffset);
+
+    bool impMethodInfo_hasRetBuffArg(CORINFO_METHOD_INFO* methInfo);
+
+    GenTreePtr impFixupCallStructReturn(GenTreePtr call, CORINFO_CLASS_HANDLE retClsHnd);
+
+    GenTreePtr impInitCallLongReturn(GenTreePtr call);
+
+    GenTreePtr impFixupStructReturnType(GenTreePtr op, CORINFO_CLASS_HANDLE retClsHnd);
+
+#ifdef DEBUG
+    var_types impImportJitTestLabelMark(int numArgs);
+#endif // DEBUG
+
+    GenTreePtr impInitClass(CORINFO_RESOLVED_TOKEN* pResolvedToken);
+
+    GenTreePtr impImportStaticReadOnlyField(void* fldAddr, var_types lclTyp);
+
+    GenTreePtr impImportStaticFieldAccess(CORINFO_RESOLVED_TOKEN* pResolvedToken,
+                                          CORINFO_ACCESS_FLAGS    access,
+                                          CORINFO_FIELD_INFO*     pFieldInfo,
+                                          var_types               lclTyp);
+
+    static void impBashVarAddrsToI(GenTreePtr tree1, GenTreePtr tree2 = nullptr);
+
+    GenTreePtr impImplicitIorI4Cast(GenTreePtr tree, var_types dstTyp);
+
+    GenTreePtr impImplicitR4orR8Cast(GenTreePtr tree, var_types dstTyp);
+
+    void impImportLeave(BasicBlock* block);
+    void impResetLeaveBlock(BasicBlock* block, unsigned jmpAddr);
+    BOOL       impLocAllocOnStack();
+    GenTreePtr impIntrinsic(CORINFO_CLASS_HANDLE  clsHnd,
+                            CORINFO_METHOD_HANDLE method,
+                            CORINFO_SIG_INFO*     sig,
+                            int                   memberRef,
+                            bool                  readonlyCall,
+                            bool                  tailCall,
+                            CorInfoIntrinsics*    pIntrinsicID);
+    GenTreePtr impArrayAccessIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
+                                       CORINFO_SIG_INFO*    sig,
+                                       int                  memberRef,
+                                       bool                 readonlyCall,
+                                       CorInfoIntrinsics    intrinsicID);
+    GenTreePtr impInitializeArrayIntrinsic(CORINFO_SIG_INFO* sig);
+
+    GenTreePtr impMethodPointer(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_CALL_INFO* pCallInfo);
+
+    GenTreePtr impTransformThis(GenTreePtr              thisPtr,
+                                CORINFO_RESOLVED_TOKEN* pConstrainedResolvedToken,
+                                CORINFO_THIS_TRANSFORM  transform);
+
+    //----------------- Manipulating the trees and stmts ----------------------
+
+    GenTreePtr impTreeList; // Trees for the BB being imported
+    GenTreePtr impTreeLast; // The last tree for the current BB
+
+    enum
+    {
+        CHECK_SPILL_ALL  = -1,
+        CHECK_SPILL_NONE = -2
+    };
+
+public:
+    void impBeginTreeList();
+    void impEndTreeList(BasicBlock* block, GenTreePtr firstStmt, GenTreePtr lastStmt);
+    void impEndTreeList(BasicBlock* block);
+    void impAppendStmtCheck(GenTreePtr stmt, unsigned chkLevel);
+    void impAppendStmt(GenTreePtr stmt, unsigned chkLevel);
+    void impInsertStmtBefore(GenTreePtr stmt, GenTreePtr stmtBefore);
+    GenTreePtr impAppendTree(GenTreePtr tree, unsigned chkLevel, IL_OFFSETX offset);
+    void impInsertTreeBefore(GenTreePtr tree, IL_OFFSETX offset, GenTreePtr stmtBefore);
+    void impAssignTempGen(unsigned    tmp,
+                          GenTreePtr  val,
+                          unsigned    curLevel,
+                          GenTreePtr* pAfterStmt = nullptr,
+                          IL_OFFSETX  ilOffset   = BAD_IL_OFFSET,
+                          BasicBlock* block      = nullptr);
+    void impAssignTempGen(unsigned             tmpNum,
+                          GenTreePtr           val,
+                          CORINFO_CLASS_HANDLE structHnd,
+                          unsigned             curLevel,
+                          GenTreePtr*          pAfterStmt = nullptr,
+                          IL_OFFSETX           ilOffset   = BAD_IL_OFFSET,
+                          BasicBlock*          block      = nullptr);
+    GenTreePtr impCloneExpr(GenTreePtr           tree,
+                            GenTreePtr*          clone,
+                            CORINFO_CLASS_HANDLE structHnd,
+                            unsigned             curLevel,
+                            GenTreePtr* pAfterStmt DEBUGARG(const char* reason));
+    GenTreePtr impAssignStruct(GenTreePtr           dest,
+                               GenTreePtr           src,
+                               CORINFO_CLASS_HANDLE structHnd,
+                               unsigned             curLevel,
+                               GenTreePtr*          pAfterStmt = nullptr,
+                               BasicBlock*          block      = nullptr);
+    GenTreePtr impAssignStructPtr(GenTreePtr           dest,
+                                  GenTreePtr           src,
+                                  CORINFO_CLASS_HANDLE structHnd,
+                                  unsigned             curLevel,
+                                  GenTreePtr*          pAfterStmt = nullptr,
+                                  BasicBlock*          block      = nullptr);
+
+    GenTreePtr impGetStructAddr(GenTreePtr           structVal,
+                                CORINFO_CLASS_HANDLE structHnd,
+                                unsigned             curLevel,
+                                bool                 willDeref);
+
+    var_types impNormStructType(CORINFO_CLASS_HANDLE structHnd,
+                                BYTE*                gcLayout     = nullptr,
+                                unsigned*            numGCVars    = nullptr,
+                                var_types*           simdBaseType = nullptr);
+
+    GenTreePtr impNormStructVal(GenTreePtr           structVal,
+                                CORINFO_CLASS_HANDLE structHnd,
+                                unsigned             curLevel,
+                                bool                 forceNormalization = false);
+
+    GenTreePtr impTokenToHandle(CORINFO_RESOLVED_TOKEN* pResolvedToken,
+                                BOOL*                   pRuntimeLookup    = nullptr,
+                                BOOL                    mustRestoreHandle = FALSE,
+                                BOOL                    importParent      = FALSE);
+
+    GenTreePtr impParentClassTokenToHandle(CORINFO_RESOLVED_TOKEN* pResolvedToken,
+                                           BOOL*                   pRuntimeLookup    = nullptr,
+                                           BOOL                    mustRestoreHandle = FALSE)
+    {
+        return impTokenToHandle(pResolvedToken, pRuntimeLookup, mustRestoreHandle, TRUE);
+    }
+
+    GenTreePtr impLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken,
+                               CORINFO_LOOKUP*         pLookup,
+                               unsigned                flags,
+                               void*                   compileTimeHandle);
+
+    GenTreePtr impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken,
+                                      CORINFO_LOOKUP*         pLookup,
+                                      void*                   compileTimeHandle);
+
+    GenTreePtr impReadyToRunLookupToTree(CORINFO_CONST_LOOKUP* pLookup, unsigned flags, void* compileTimeHandle);
+
+    GenTreePtr impReadyToRunHelperToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken,
+                                         CorInfoHelpFunc         helper,
+                                         var_types               type,
+                                         GenTreeArgList*         arg                = nullptr,
+                                         CORINFO_LOOKUP_KIND*    pGenericLookupKind = nullptr);
+
+    GenTreePtr impCastClassOrIsInstToTree(GenTreePtr              op1,
+                                          GenTreePtr              op2,
+                                          CORINFO_RESOLVED_TOKEN* pResolvedToken,
+                                          bool                    isCastClass);
+
+    bool VarTypeIsMultiByteAndCanEnreg(var_types            type,
+                                       CORINFO_CLASS_HANDLE typeClass,
+                                       unsigned*            typeSize,
+                                       bool                 forReturn);
+
+    static bool IsIntrinsicImplementedByUserCall(CorInfoIntrinsics intrinsicId);
+    static bool IsTargetIntrinsic(CorInfoIntrinsics intrinsicId);
+    static bool IsMathIntrinsic(CorInfoIntrinsics intrinsicId);
+    static bool IsMathIntrinsic(GenTreePtr tree);
+
+private:
+    //----------------- Importing the method ----------------------------------
+
+    CORINFO_CONTEXT_HANDLE impTokenLookupContextHandle; // The context used for looking up tokens.
+
+#ifdef DEBUG
+    unsigned    impCurOpcOffs;
+    const char* impCurOpcName;
+    bool        impNestedStackSpill;
+
+    // For displaying instrs with generated native code (-n:B)
+    GenTreePtr impLastILoffsStmt; // oldest stmt added for which we did not gtStmtLastILoffs
+    void       impNoteLastILoffs();
+#endif
+
+    /* IL offset of the stmt currently being imported. It gets set to
+       BAD_IL_OFFSET after it has been set in the appended trees. Then it gets
+       updated at IL offsets for which we have to report mapping info.
+       It also includes flag bits, so use jitGetILoffs()
+       to get the actual IL offset value.
+    */
+
+    IL_OFFSETX impCurStmtOffs;
+    void impCurStmtOffsSet(IL_OFFSET offs);
+
+    void impNoteBranchOffs();
+
+    unsigned impInitBlockLineInfo();
+
+    GenTreePtr impCheckForNullPointer(GenTreePtr obj);
+    bool impIsThis(GenTreePtr obj);
+    bool impIsLDFTN_TOKEN(const BYTE* delegateCreateStart, const BYTE* newobjCodeAddr);
+    bool impIsDUP_LDVIRTFTN_TOKEN(const BYTE* delegateCreateStart, const BYTE* newobjCodeAddr);
+    bool impIsAnySTLOC(OPCODE opcode)
+    {
+        return ((opcode == CEE_STLOC) || (opcode == CEE_STLOC_S) ||
+                ((opcode >= CEE_STLOC_0) && (opcode <= CEE_STLOC_3)));
+    }
+
+    GenTreeArgList* impPopList(unsigned          count,
+                               unsigned*         flagsPtr,
+                               CORINFO_SIG_INFO* sig,
+                               GenTreeArgList*   prefixTree = nullptr);
+
+    GenTreeArgList* impPopRevList(unsigned          count,
+                                  unsigned*         flagsPtr,
+                                  CORINFO_SIG_INFO* sig,
+                                  unsigned          skipReverseCount = 0);
+
+    /*
+     * Get current IL offset with stack-empty info incoporated
+     */
+    IL_OFFSETX impCurILOffset(IL_OFFSET offs, bool callInstruction = false);
+
+    //---------------- Spilling the importer stack ----------------------------
+
+    struct PendingDsc
+    {
+        PendingDsc*   pdNext;
+        BasicBlock*   pdBB;
+        SavedStack    pdSavedStack;
+        ThisInitState pdThisPtrInit;
+    };
+
+    PendingDsc* impPendingList; // list of BBs currently waiting to be imported.
+    PendingDsc* impPendingFree; // Freed up dscs that can be reused
+
+    // We keep a byte-per-block map (dynamically extended) in the top-level Compiler object of a compilation.
+    ExpandArray<BYTE> impPendingBlockMembers;
+
+    // Return the byte for "b" (allocating/extending impPendingBlockMembers if necessary.)
+    // Operates on the map in the top-level ancestor.
+    BYTE impGetPendingBlockMember(BasicBlock* blk)
+    {
+        return impInlineRoot()->impPendingBlockMembers.Get(blk->bbInd());
+    }
+
+    // Set the byte for "b" to "val" (allocating/extending impPendingBlockMembers if necessary.)
+    // Operates on the map in the top-level ancestor.
+    void impSetPendingBlockMember(BasicBlock* blk, BYTE val)
+    {
+        impInlineRoot()->impPendingBlockMembers.Set(blk->bbInd(), val);
+    }
+
+    bool impCanReimport;
+
+    bool impSpillStackEntry(unsigned level,
+                            unsigned varNum
+#ifdef DEBUG
+                            ,
+                            bool        bAssertOnRecursion,
+                            const char* reason
+#endif
+                            );
+
+    void impSpillStackEnsure(bool spillLeaves = false);
+    void impEvalSideEffects();
+    void impSpillSpecialSideEff();
+    void impSpillSideEffects(bool spillGlobEffects, unsigned chkLevel DEBUGARG(const char* reason));
+    void               impSpillValueClasses();
+    void               impSpillEvalStack();
+    static fgWalkPreFn impFindValueClasses;
+    void impSpillLclRefs(ssize_t lclNum);
+
+    BasicBlock* impPushCatchArgOnStack(BasicBlock* hndBlk, CORINFO_CLASS_HANDLE clsHnd);
+
+    void impImportBlockCode(BasicBlock* block);
+
+    void impReimportMarkBlock(BasicBlock* block);
+    void impReimportMarkSuccessors(BasicBlock* block);
+
+    void impVerifyEHBlock(BasicBlock* block, bool isTryStart);
+
+    void impImportBlockPending(BasicBlock* block);
+
+    // Similar to impImportBlockPending, but assumes that block has already been imported once and is being
+    // reimported for some reason.  It specifically does *not* look at verCurrentState to set the EntryState
+    // for the block, but instead, just re-uses the block's existing EntryState.
+    void impReimportBlockPending(BasicBlock* block);
+
+    var_types impGetByRefResultType(genTreeOps oper, bool fUnsigned, GenTreePtr* pOp1, GenTreePtr* pOp2);
+
+    void impImportBlock(BasicBlock* block);
+
+    // Assumes that "block" is a basic block that completes with a non-empty stack. We will assign the values
+    // on the stack to local variables (the "spill temp" variables). The successor blocks will assume that
+    // its incoming stack contents are in those locals. This requires "block" and its successors to agree on
+    // the variables that will be used -- and for all the predecessors of those successors, and the
+    // successors of those predecessors, etc. Call such a set of blocks closed under alternating
+    // successor/predecessor edges a "spill clique." A block is a "predecessor" or "successor" member of the
+    // clique (or, conceivably, both). Each block has a specified sequence of incoming and outgoing spill
+    // temps. If "block" already has its outgoing spill temps assigned (they are always a contiguous series
+    // of local variable numbers, so we represent them with the base local variable number), returns that.
+    // Otherwise, picks a set of spill temps, and propagates this choice to all blocks in the spill clique of
+    // which "block" is a member (asserting, in debug mode, that no block in this clique had its spill temps
+    // chosen already. More precisely, that the incoming or outgoing spill temps are not chosen, depending
+    // on which kind of member of the clique the block is).
+    unsigned impGetSpillTmpBase(BasicBlock* block);
+
+    // Assumes that "block" is a basic block that completes with a non-empty stack. We have previously
+    // assigned the values on the stack to local variables (the "spill temp" variables). The successor blocks
+    // will assume that its incoming stack contents are in those locals. This requires "block" and its
+    // successors to agree on the variables and their types that will be used.  The CLI spec allows implicit
+    // conversions between 'int' and 'native int' or 'float' and 'double' stack types. So one predecessor can
+    // push an int and another can push a native int.  For 64-bit we have chosen to implement this by typing
+    // the "spill temp" as native int, and then importing (or re-importing as needed) so that all the
+    // predecessors in the "spill clique" push a native int (sign-extending if needed), and all the
+    // successors receive a native int. Similarly float and double are unified to double.
+    // This routine is called after a type-mismatch is detected, and it will walk the spill clique to mark
+    // blocks for re-importation as appropriate (both successors, so they get the right incoming type, and
+    // predecessors, so they insert an upcast if needed).
+    void impReimportSpillClique(BasicBlock* block);
+
+    // When we compute a "spill clique" (see above) these byte-maps are allocated to have a byte per basic
+    // block, and represent the predecessor and successor members of the clique currently being computed.
+    // *** Access to these will need to be locked in a parallel compiler.
+    ExpandArray<BYTE> impSpillCliquePredMembers;
+    ExpandArray<BYTE> impSpillCliqueSuccMembers;
+
+    enum SpillCliqueDir
+    {
+        SpillCliquePred,
+        SpillCliqueSucc
+    };
+
+    // Abstract class for receiving a callback while walking a spill clique
+    class SpillCliqueWalker
+    {
+    public:
+        virtual void Visit(SpillCliqueDir predOrSucc, BasicBlock* blk) = 0;
+    };
+
+    // This class is used for setting the bbStkTempsIn and bbStkTempsOut on the blocks within a spill clique
+    class SetSpillTempsBase : public SpillCliqueWalker
+    {
+        unsigned m_baseTmp;
+
+    public:
+        SetSpillTempsBase(unsigned baseTmp) : m_baseTmp(baseTmp)
+        {
+        }
+        virtual void Visit(SpillCliqueDir predOrSucc, BasicBlock* blk);
+    };
+
+    // This class is used for implementing impReimportSpillClique part on each block within the spill clique
+    class ReimportSpillClique : public SpillCliqueWalker
+    {
+        Compiler* m_pComp;
+
+    public:
+        ReimportSpillClique(Compiler* pComp) : m_pComp(pComp)
+        {
+        }
+        virtual void Visit(SpillCliqueDir predOrSucc, BasicBlock* blk);
+    };
+
+    // This is the heart of the algorithm for walking spill cliques. It invokes callback->Visit for each
+    // predecessor or successor within the spill clique
+    void impWalkSpillCliqueFromPred(BasicBlock* pred, SpillCliqueWalker* callback);
+
+    // For a BasicBlock that has already been imported, the EntryState has an array of GenTrees for the
+    // incoming locals. This walks that list an resets the types of the GenTrees to match the types of
+    // the VarDscs. They get out of sync when we have int/native int issues (see impReimportSpillClique).
+    void impRetypeEntryStateTemps(BasicBlock* blk);
+
+    BYTE impSpillCliqueGetMember(SpillCliqueDir predOrSucc, BasicBlock* blk);
+    void impSpillCliqueSetMember(SpillCliqueDir predOrSucc, BasicBlock* blk, BYTE val);
+
+    void impPushVar(GenTree* op, typeInfo tiRetVal);
+    void impLoadVar(unsigned lclNum, IL_OFFSET offset, typeInfo tiRetVal);
+    void impLoadVar(unsigned lclNum, IL_OFFSET offset)
+    {
+        impLoadVar(lclNum, offset, lvaTable[lclNum].lvVerTypeInfo);
+    }
+    void impLoadArg(unsigned ilArgNum, IL_OFFSET offset);
+    void impLoadLoc(unsigned ilLclNum, IL_OFFSET offset);
+    bool impReturnInstruction(BasicBlock* block, int prefixFlags, OPCODE& opcode);
+
+#ifdef _TARGET_ARM_
+    void impMarkLclDstNotPromotable(unsigned tmpNum, GenTreePtr op, CORINFO_CLASS_HANDLE hClass);
+#endif
+
+    // A free list of linked list nodes used to represent to-do stacks of basic blocks.
+    struct BlockListNode
+    {
+        BasicBlock*    m_blk;
+        BlockListNode* m_next;
+        BlockListNode(BasicBlock* blk, BlockListNode* next = nullptr) : m_blk(blk), m_next(next)
+        {
+        }
+        void* operator new(size_t sz, Compiler* comp);
+    };
+    BlockListNode* impBlockListNodeFreeList;
+
+    BlockListNode* AllocBlockListNode();
+    void FreeBlockListNode(BlockListNode* node);
+
+    bool impIsValueType(typeInfo* pTypeInfo);
+    var_types mangleVarArgsType(var_types type);
+
+#if FEATURE_VARARG
+    regNumber getCallArgIntRegister(regNumber floatReg);
+    regNumber getCallArgFloatRegister(regNumber intReg);
+#endif // FEATURE_VARARG
+
+#if defined(DEBUG)
+    static unsigned jitTotalMethodCompiled;
+#endif
+
+#ifdef DEBUG
+    static LONG jitNestingLevel;
+#endif // DEBUG
+
+    bool seenConditionalJump;
+
+    static BOOL impIsAddressInLocal(GenTreePtr tree, GenTreePtr* lclVarTreeOut);
+
+    void impMakeDiscretionaryInlineObservations(InlineInfo* pInlineInfo, InlineResult* inlineResult);
+
+    // STATIC inlining decision based on the IL code.
+    void impCanInlineIL(CORINFO_METHOD_HANDLE fncHandle,
+                        CORINFO_METHOD_INFO*  methInfo,
+                        bool                  forceInline,
+                        InlineResult*         inlineResult);
+
+    void impCheckCanInline(GenTreePtr             call,
+                           CORINFO_METHOD_HANDLE  fncHandle,
+                           unsigned               methAttr,
+                           CORINFO_CONTEXT_HANDLE exactContextHnd,
+                           InlineCandidateInfo**  ppInlineCandidateInfo,
+                           InlineResult*          inlineResult);
+
+    void impInlineRecordArgInfo(InlineInfo*   pInlineInfo,
+                                GenTreePtr    curArgVal,
+                                unsigned      argNum,
+                                InlineResult* inlineResult);
+
+    void impInlineInitVars(InlineInfo* pInlineInfo);
+
+    unsigned impInlineFetchLocal(unsigned lclNum DEBUGARG(const char* reason));
+
+    GenTreePtr impInlineFetchArg(unsigned lclNum, InlArgInfo* inlArgInfo, InlLclVarInfo* lclTypeInfo);
+
+    BOOL impInlineIsThis(GenTreePtr tree, InlArgInfo* inlArgInfo);
+
+    BOOL impInlineIsGuaranteedThisDerefBeforeAnySideEffects(GenTreePtr  additionalTreesToBeEvaluatedBefore,
+                                                            GenTreePtr  variableBeingDereferenced,
+                                                            InlArgInfo* inlArgInfo);
+
+    void impMarkInlineCandidate(GenTreePtr call, CORINFO_CONTEXT_HANDLE exactContextHnd, CORINFO_CALL_INFO* callInfo);
+
+    bool impTailCallRetTypeCompatible(var_types            callerRetType,
+                                      CORINFO_CLASS_HANDLE callerRetTypeClass,
+                                      var_types            calleeRetType,
+                                      CORINFO_CLASS_HANDLE calleeRetTypeClass);
+
+    bool impIsTailCallILPattern(bool        tailPrefixed,
+                                OPCODE      curOpcode,
+                                const BYTE* codeAddrOfNextOpcode,
+                                const BYTE* codeEnd,
+                                bool        isRecursive,
+                                bool*       IsCallPopRet = nullptr);
+
+    bool impIsImplicitTailCallCandidate(
+        OPCODE curOpcode, const BYTE* codeAddrOfNextOpcode, const BYTE* codeEnd, int prefixFlags, bool isRecursive);
+
+    /*
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XX                                                                           XX
+    XX                           FlowGraph                                       XX
+    XX                                                                           XX
+    XX   Info about the basic-blocks, their contents and the flow analysis       XX
+    XX                                                                           XX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    */
+
+public:
+    BasicBlock* fgFirstBB;        // Beginning of the basic block list
+    BasicBlock* fgLastBB;         // End of the basic block list
+    BasicBlock* fgFirstColdBlock; // First block to be placed in the cold section
+#if FEATURE_EH_FUNCLETS
+    BasicBlock* fgFirstFuncletBB; // First block of outlined funclets (to allow block insertion before the funclets)
+#endif
+    BasicBlock* fgFirstBBScratch;   // Block inserted for initialization stuff. Is nullptr if no such block has been
+                                    // created.
+    BasicBlockList* fgReturnBlocks; // list of BBJ_RETURN blocks
+    unsigned        fgEdgeCount;    // # of control flow edges between the BBs
+    unsigned        fgBBcount;      // # of BBs in the method
+#ifdef DEBUG
+    unsigned fgBBcountAtCodegen; // # of BBs in the method at the start of codegen
+#endif
+    unsigned     fgBBNumMax;       // The max bbNum that has been assigned to basic blocks
+    unsigned     fgDomBBcount;     // # of BBs for which we have dominator and reachability information
+    BasicBlock** fgBBInvPostOrder; // The flow graph stored in an array sorted in topological order, needed to compute
+                                   // dominance. Indexed by block number. Size: fgBBNumMax + 1.
+
+    // After the dominance tree is computed, we cache a DFS preorder number and DFS postorder number to compute
+    // dominance queries in O(1). fgDomTreePreOrder and fgDomTreePostOrder are arrays giving the block's preorder and
+    // postorder number, respectively. The arrays are indexed by basic block number. (Note that blocks are numbered
+    // starting from one. Thus, we always waste element zero. This makes debugging easier and makes the code less likely
+    // to suffer from bugs stemming from forgetting to add or subtract one from the block number to form an array
+    // index). The arrays are of size fgBBNumMax + 1.
+    unsigned* fgDomTreePreOrder;
+    unsigned* fgDomTreePostOrder;
+
+    bool fgBBVarSetsInited;
+
+    // Allocate array like T* a = new T[fgBBNumMax + 1];
+    // Using helper so we don't keep forgetting +1.
+    template <typename T>
+    T* fgAllocateTypeForEachBlk(CompMemKind cmk = CMK_Unknown)
+    {
+        return (T*)compGetMem((fgBBNumMax + 1) * sizeof(T), cmk);
+    }
+
+    // BlockSets are relative to a specific set of BasicBlock numbers. If that changes
+    // (if the blocks are renumbered), this changes. BlockSets from different epochs
+    // cannot be meaningfully combined. Note that new blocks can be created with higher
+    // block numbers without changing the basic block epoch. These blocks *cannot*
+    // participate in a block set until the blocks are all renumbered, causing the epoch
+    // to change. This is useful if continuing to use previous block sets is valuable.
+    // If the epoch is zero, then it is uninitialized, and block sets can't be used.
+    unsigned fgCurBBEpoch;
+
+    unsigned GetCurBasicBlockEpoch()
+    {
+        return fgCurBBEpoch;
+    }
+
+    // The number of basic blocks in the current epoch. When the blocks are renumbered,
+    // this is fgBBcount. As blocks are added, fgBBcount increases, fgCurBBEpochSize remains
+    // the same, until a new BasicBlock epoch is created, such as when the blocks are all renumbered.
+    unsigned fgCurBBEpochSize;
+
+    // The number of "size_t" elements required to hold a bitset large enough for fgCurBBEpochSize
+    // bits. This is precomputed to avoid doing math every time BasicBlockBitSetTraits::GetArrSize() is called.
+    unsigned fgBBSetCountInSizeTUnits;
+
+    void NewBasicBlockEpoch()
+    {
+        INDEBUG(unsigned oldEpochArrSize = fgBBSetCountInSizeTUnits);
+
+        // We have a new epoch. Compute and cache the size needed for new BlockSets.
+        fgCurBBEpoch++;
+        fgCurBBEpochSize = fgBBNumMax + 1;
+        fgBBSetCountInSizeTUnits =
+            unsigned(roundUp(fgCurBBEpochSize, sizeof(size_t) * 8)) / unsigned(sizeof(size_t) * 8);
+
+#ifdef DEBUG
+        // All BlockSet objects are now invalid!
+        fgReachabilitySetsValid = false; // the bbReach sets are now invalid!
+        fgEnterBlksSetValid     = false; // the fgEnterBlks set is now invalid!
+
+        if (verbose)
+        {
+            unsigned epochArrSize = BasicBlockBitSetTraits::GetArrSize(this, sizeof(size_t));
+            printf("\nNew BlockSet epoch %d, # of blocks (including unused BB00): %u, bitset array size: %u (%s)",
+                   fgCurBBEpoch, fgCurBBEpochSize, epochArrSize, (epochArrSize <= 1) ? "short" : "long");
+            if ((fgCurBBEpoch != 1) && ((oldEpochArrSize <= 1) != (epochArrSize <= 1)))
+            {
+                // If we're not just establishing the first epoch, and the epoch array size has changed such that we're
+                // going to change our bitset representation from short (just a size_t bitset) to long (a pointer to an
+                // array of size_t bitsets), then print that out.
+                printf("; NOTE: BlockSet size was previously %s!", (oldEpochArrSize <= 1) ? "short" : "long");
+            }
+            printf("\n");
+        }
+#endif // DEBUG
+    }
+
+    void EnsureBasicBlockEpoch()
+    {
+        if (fgCurBBEpochSize != fgBBNumMax + 1)
+        {
+            NewBasicBlockEpoch();
+        }
+    }
+
+    BasicBlock* fgNewBasicBlock(BBjumpKinds jumpKind);
+    void fgEnsureFirstBBisScratch();
+    bool fgFirstBBisScratch();
+    bool fgBBisScratch(BasicBlock* block);
+
+    void fgExtendEHRegionBefore(BasicBlock* block);
+    void fgExtendEHRegionAfter(BasicBlock* block);
+
+    BasicBlock* fgNewBBbefore(BBjumpKinds jumpKind, BasicBlock* block, bool extendRegion);
+
+    BasicBlock* fgNewBBafter(BBjumpKinds jumpKind, BasicBlock* block, bool extendRegion);
+
+    BasicBlock* fgNewBBinRegion(BBjumpKinds jumpKind,
+                                unsigned    tryIndex,
+                                unsigned    hndIndex,
+                                BasicBlock* nearBlk,
+                                bool        putInFilter = false,
+                                bool        runRarely   = false,
+                                bool        insertAtEnd = false);
+
+    BasicBlock* fgNewBBinRegion(BBjumpKinds jumpKind,
+                                BasicBlock* srcBlk,
+                                bool        runRarely   = false,
+                                bool        insertAtEnd = false);
+
+    BasicBlock* fgNewBBinRegion(BBjumpKinds jumpKind);
+
+    BasicBlock* fgNewBBinRegionWorker(BBjumpKinds jumpKind,
+                                      BasicBlock* afterBlk,
+                                      unsigned    xcptnIndex,
+                                      bool        putInTryRegion);
+
+    void fgInsertBBbefore(BasicBlock* insertBeforeBlk, BasicBlock* newBlk);
+    void fgInsertBBafter(BasicBlock* insertAfterBlk, BasicBlock* newBlk);
+    void fgUnlinkBlock(BasicBlock* block);
+
+#if OPT_BOOL_OPS // Used to detect multiple logical "not" assignments.
+    bool fgMultipleNots;
+#endif
+
+    bool fgModified;         // True if the flow graph has been modified recently
+    bool fgComputePredsDone; // Have we computed the bbPreds list
+    bool fgCheapPredsValid;  // Is the bbCheapPreds list valid?
+    bool fgDomsComputed;     // Have we computed the dominator sets?
+
+    bool     fgHasSwitch;  // any BBJ_SWITCH jumps?
+    bool     fgHasPostfix; // any postfix ++/-- found?
+    unsigned fgIncrCount;  // number of increment nodes found
+
+    BlockSet fgEnterBlks; // Set of blocks which have a special transfer of control; the "entry" blocks plus EH handler
+                          // begin blocks.
+
+#ifdef DEBUG
+    bool fgReachabilitySetsValid; // Are the bbReach sets valid?
+    bool fgEnterBlksSetValid;     // Is the fgEnterBlks set valid?
+#endif                            // DEBUG
+
+    bool fgRemoveRestOfBlock; // true if we know that we will throw
+    bool fgStmtRemoved;       // true if we remove statements -> need new DFA
+
+    // There are two modes for ordering of the trees.
+    //  - In FGOrderTree, the dominant ordering is the tree order, and the nodes contained in
+    //    each tree and sub-tree are contiguous, and can be traversed (in gtNext/gtPrev order)
+    //    by traversing the tree according to the order of the operands.
+    //  - In FGOrderLinear, the dominant ordering is the linear order.
+
+    enum FlowGraphOrder
+    {
+        FGOrderTree,
+        FGOrderLinear
+    };
+    FlowGraphOrder fgOrder;
+
+    // The following are boolean flags that keep track of the state of internal data structures
+
+    bool                 fgStmtListThreaded;
+    bool                 fgCanRelocateEHRegions;   // true if we are allowed to relocate the EH regions
+    bool                 fgEdgeWeightsComputed;    // true after we have called fgComputeEdgeWeights
+    bool                 fgHaveValidEdgeWeights;   // true if we were successful in computing all of the edge weights
+    bool                 fgSlopUsedInEdgeWeights;  // true if their was some slop used when computing the edge weights
+    bool                 fgRangeUsedInEdgeWeights; // true if some of the edgeWeight are expressed in Min..Max form
+    bool                 fgNeedsUpdateFlowGraph;   // true if we need to run fgUpdateFlowGraph
+    BasicBlock::weight_t fgCalledWeight;           // count of the number of times this method was called
+                                                   // This is derived from the profile data
+                                                   // or is BB_UNITY_WEIGHT when we don't have profile data
+
+#if FEATURE_EH_FUNCLETS
+    bool fgFuncletsCreated; // true if the funclet creation phase has been run
+#endif                      // FEATURE_EH_FUNCLETS
+
+    bool fgGlobalMorph;  // indicates if we are during the global morphing phase
+                         // since fgMorphTree can be called from several places
+    bool fgExpandInline; // indicates that we are creating tree for the inliner
+
+    bool     impBoxTempInUse; // the temp below is valid and available
+    unsigned impBoxTemp;      // a temporary that is used for boxing
+
+#ifdef DEBUG
+    bool jitFallbackCompile; // Are we doing a fallback compile? That is, have we executed a NO_WAY assert,
+                             //   and we are trying to compile again in a "safer", minopts mode?
+#endif
+
+#if defined(DEBUG)
+    unsigned impInlinedCodeSize;
+#endif
+
+    //-------------------------------------------------------------------------
+
+    void fgInit();
+
+    void fgImport();
+
+    void fgInline();
+
+    GenTreePtr fgGetCritSectOfStaticMethod();
+
+#if !defined(_TARGET_X86_)
+
+    void fgAddSyncMethodEnterExit();
+
+    GenTree* fgCreateMonitorTree(unsigned lvaMonitorBool, unsigned lvaThisVar, BasicBlock* block, bool enter);
+
+    void fgConvertSyncReturnToLeave(BasicBlock* block);
+
+#endif // !_TARGET_X86_
+
+    void fgAddReversePInvokeEnterExit();
+
+    bool fgMoreThanOneReturnBlock();
+
+    // The number of separate return points in the method.
+    unsigned fgReturnCount;
+
+    void fgAddInternal();
+
+    bool fgFoldConditional(BasicBlock* block);
+
+    void fgMorphStmts(BasicBlock* block, bool* mult, bool* lnot, bool* loadw);
+    void fgMorphBlocks();
+
+    bool fgMorphBlockStmt(BasicBlock* block, GenTreePtr stmt DEBUGARG(const char* msg));
+
+    void fgSetOptions();
+
+#ifdef DEBUG
+    static fgWalkPreFn fgAssertNoQmark;
+    void fgPreExpandQmarkChecks(GenTreePtr expr);
+    void        fgPostExpandQmarkChecks();
+    static void fgCheckQmarkAllowedForm(GenTreePtr tree);
+#endif
+
+    IL_OFFSET fgFindBlockILOffset(BasicBlock* block);
+
+    BasicBlock* fgSplitBlockAtBeginning(BasicBlock* curr);
+    BasicBlock* fgSplitBlockAtEnd(BasicBlock* curr);
+    BasicBlock* fgSplitBlockAfterStatement(BasicBlock* curr, GenTree* stmt);
+    BasicBlock* fgSplitBlockAfterNode(BasicBlock* curr, GenTree* node); // for LIR
+    BasicBlock* fgSplitEdge(BasicBlock* curr, BasicBlock* succ);
+
+    GenTreeStmt* fgNewStmtFromTree(GenTreePtr tree, BasicBlock* block, IL_OFFSETX offs);
+    GenTreeStmt* fgNewStmtFromTree(GenTreePtr tree);
+    GenTreeStmt* fgNewStmtFromTree(GenTreePtr tree, BasicBlock* block);
+    GenTreeStmt* fgNewStmtFromTree(GenTreePtr tree, IL_OFFSETX offs);
+
+    GenTreePtr fgGetTopLevelQmark(GenTreePtr expr, GenTreePtr* ppDst = nullptr);
+    void fgExpandQmarkForCastInstOf(BasicBlock* block, GenTreePtr stmt);
+    void fgExpandQmarkStmt(BasicBlock* block, GenTreePtr expr);
+    void fgExpandQmarkNodes();
+
+    void fgMorph();
+
+    // Do "simple lowering."  This functionality is (conceptually) part of "general"
+    // lowering that is distributed between fgMorph and the lowering phase of LSRA.
+    void fgSimpleLowering();
+
+    bool fgShouldCreateAssignOp(GenTreePtr tree, bool* bReverse);
+
+    GenTreePtr fgInitThisClass();
+
+    GenTreePtr fgGetStaticsCCtorHelper(CORINFO_CLASS_HANDLE cls, CorInfoHelpFunc helper);
+
+    GenTreePtr fgGetSharedCCtor(CORINFO_CLASS_HANDLE cls);
+
+    void fgLocalVarLiveness();
+
+    void fgLocalVarLivenessInit();
+
+#ifdef LEGACY_BACKEND
+    GenTreePtr fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode, GenTreePtr relopNode, GenTreePtr asgdLclVar);
+#else
+    void fgPerNodeLocalVarLiveness(GenTree* node, GenTree* asgdLclVar);
+    void fgPerStatementLocalVarLiveness(GenTree* node, GenTree* asgdLclVar);
+#endif
+    void fgPerBlockLocalVarLiveness();
+
+    VARSET_VALRET_TP fgGetHandlerLiveVars(BasicBlock* block);
+
+    void fgLiveVarAnalysis(bool updateInternalOnly = false);
+
+    // This is used in the liveness computation, as a temporary.  When we use the
+    // arbitrary-length VarSet representation, it is better not to allocate a new one
+    // at each call.
+    VARSET_TP fgMarkIntfUnionVS;
+
+    bool fgMarkIntf(VARSET_VALARG_TP varSet);
+
+    bool fgMarkIntf(VARSET_VALARG_TP varSet1, VARSET_VALARG_TP varSet2);
+
+    void fgUpdateRefCntForClone(BasicBlock* addedToBlock, GenTreePtr clonedTree);
+
+    void fgUpdateRefCntForExtract(GenTreePtr wholeTree, GenTreePtr keptTree);
+
+    void fgComputeLifeCall(VARSET_TP& life, GenTreeCall* call);
+
+    bool fgComputeLifeLocal(VARSET_TP& life, VARSET_TP& keepAliveVars, GenTree* lclVarNode, GenTree* node);
+
+    VARSET_VALRET_TP fgComputeLife(VARSET_VALARG_TP life,
+                                   GenTreePtr       startNode,
+                                   GenTreePtr       endNode,
+                                   VARSET_VALARG_TP volatileVars,
+                                   bool* pStmtInfoDirty DEBUGARG(bool* treeModf));
+
+    VARSET_VALRET_TP fgComputeLifeLIR(VARSET_VALARG_TP life, BasicBlock* block, VARSET_VALARG_TP volatileVars);
+
+    bool fgRemoveDeadStore(GenTree**  pTree,
+                           LclVarDsc* varDsc,
+                           VARSET_TP  life,
+                           bool*      doAgain,
+                           bool* pStmtInfoDirty DEBUGARG(bool* treeModf));
+
+    bool fgTryRemoveDeadLIRStore(LIR::Range& blockRange, GenTree* node, GenTree** next);
+
+    // For updating liveset during traversal AFTER fgComputeLife has completed
+    VARSET_VALRET_TP fgGetVarBits(GenTreePtr tree);
+    VARSET_VALRET_TP fgUpdateLiveSet(VARSET_VALARG_TP liveSet, GenTreePtr tree);
+
+    // Returns the set of live variables after endTree,
+    // assuming that liveSet is the set of live variables BEFORE tree.
+    // Requires that fgComputeLife has completed, and that tree is in the same
+    // statement as endTree, and that it comes before endTree in execution order
+
+    VARSET_VALRET_TP fgUpdateLiveSet(VARSET_VALARG_TP liveSet, GenTreePtr tree, GenTreePtr endTree)
+    {
+        VARSET_TP VARSET_INIT(this, newLiveSet, liveSet);
+        while (tree != nullptr && tree != endTree->gtNext)
+        {
+            VarSetOps::AssignNoCopy(this, newLiveSet, fgUpdateLiveSet(newLiveSet, tree));
+            tree = tree->gtNext;
+        }
+        assert(tree == endTree->gtNext);
+        return newLiveSet;
+    }
+
+    void fgInterBlockLocalVarLiveness();
+
+    // The presence of "x op= y" operations presents some difficulties for SSA: this is both a use of some SSA name of
+    // "x", and a def of a new SSA name for "x".  The tree only has one local variable for "x", so it has to choose
+    // whether to treat that as the use or def.  It chooses the "use", and thus the old SSA name.  This map allows us
+    // to record/recover the "def" SSA number, given the lcl var node for "x" in such a tree.
+    typedef SimplerHashTable<GenTreePtr, PtrKeyFuncs<GenTree>, unsigned, JitSimplerHashBehavior> NodeToUnsignedMap;
+    NodeToUnsignedMap* m_opAsgnVarDefSsaNums;
+    NodeToUnsignedMap* GetOpAsgnVarDefSsaNums()
+    {
+        if (m_opAsgnVarDefSsaNums == nullptr)
+        {
+            m_opAsgnVarDefSsaNums = new (getAllocator()) NodeToUnsignedMap(getAllocator());
+        }
+        return m_opAsgnVarDefSsaNums;
+    }
+
+    // Requires value numbering phase to have completed. Returns the value number ("gtVN") of the
+    // "tree," EXCEPT in the case of GTF_VAR_USEASG, because the tree node's gtVN member is the
+    // "use" VN. Performs a lookup into the map of (use asg tree -> def VN.) to return the "def's"
+    // VN.
+    inline ValueNum GetUseAsgDefVNOrTreeVN(GenTreePtr tree);
+
+    // Requires that "lcl" has the GTF_VAR_DEF flag set.  Returns the SSA number of "lcl".
+    // Except: assumes that lcl is a def, and if it is
+    // a def appearing in "lcl op= rhs" (GTF_VAR_USEASG), looks up and returns the SSA number for the "def",
+    // rather than the "use" SSA number recorded in the tree "lcl".
+    inline unsigned GetSsaNumForLocalVarDef(GenTreePtr lcl);
+
+    // Some assignments assign to a local "indirectly": they are part of a comma expression that takes the address
+    // of the local (or a field thereof), assigns this address to a temp, and uses an indirection of this temp as
+    // the LHS of the assignment.  This actually arises in exactly one situation.  At the source level we assign one
+    // struct local to another: "s1 = s2".  This becomes a copyblk.  If "s2" is promoted into  field variables "s2f0",
+    // ..."s2fn", then the copyblk will morph to a comma expression that takes the address of "s1" and does field-wise
+    // assignments:
+    //   (byref addrS1 = &s1,
+    //    *(addrS1 * offsetof(f0)) = s2f0,
+    //    ...
+    //    *(addrS1 * offsetof(fn)) = s2fn)
+    //
+    // It would be a shame, given the simple form at the source level, to be unable to track the values in the
+    // fields of "s1" after this.  But "s1" does not appear in the assignments that modify it.  How, then, to
+    // give it SSA names and value numbers?
+    //
+    // The solution is to use the side table described below to annotate each of the field-wise assignments at the
+    // end with an instance of the structure below, whose fields are described in the declaration.
+    struct IndirectAssignmentAnnotation
+    {
+        unsigned      m_lclNum;   // The local num that is being indirectly assigned.
+        FieldSeqNode* m_fieldSeq; // If the LHS of the struct assignment is itself a struct field dereference,
+                                  // as in "s0.g = s2", then "m_lclNum" would be "s0", and "m_fieldSeq" would
+                                  // be the singleton field sequence "g".  The individual assignments would
+                                  // further append the fields of "s.g" to that.
+        bool m_isEntire;          // True iff this assignment writes all of m_lclNum.  (This can occur if the
+                                  // structure has a single field).
+        unsigned m_defSsaNum;     // The new SSA number of "m_lclNum" after the assignment.
+        unsigned m_useSsaNum;     // Only valid if "m_isEntire" is false; if so, the SSA number of "m_lclNum" before the
+                                  // assignment.
+
+        IndirectAssignmentAnnotation(unsigned      lclNum,
+                                     FieldSeqNode* fldSeq,
+                                     bool          isEntire,
+                                     unsigned      defSsaNum = SsaConfig::RESERVED_SSA_NUM,
+                                     unsigned      useSsaNum = SsaConfig::RESERVED_SSA_NUM)
+            : m_lclNum(lclNum), m_fieldSeq(fldSeq), m_isEntire(isEntire), m_defSsaNum(defSsaNum), m_useSsaNum(useSsaNum)
+        {
+        }
+    };
+    typedef SimplerHashTable<GenTreePtr, PtrKeyFuncs<GenTree>, IndirectAssignmentAnnotation*, JitSimplerHashBehavior>
+                          NodeToIndirAssignMap;
+    NodeToIndirAssignMap* m_indirAssignMap;
+    NodeToIndirAssignMap* GetIndirAssignMap()
+    {
+        if (m_indirAssignMap == nullptr)
+        {
+            // Create a CompAllocator that labels sub-structure with CMK_IndirAssignMap, and use that for allocation.
+            IAllocator* ialloc = new (this, CMK_IndirAssignMap) CompAllocator(this, CMK_IndirAssignMap);
+            m_indirAssignMap   = new (ialloc) NodeToIndirAssignMap(ialloc);
+        }
+        return m_indirAssignMap;
+    }
+
+    // Performs SSA conversion.
+    void fgSsaBuild();
+
+    // Reset any data structures to the state expected by "fgSsaBuild", so it can be run again.
+    void fgResetForSsa();
+
+    unsigned fgSsaPassesCompleted; // Number of times fgSsaBuild has been run.
+
+    // Returns "true" iff lcl "lclNum" should be excluded from SSA.
+    inline bool fgExcludeFromSsa(unsigned lclNum);
+
+    // The value numbers for this compilation.
+    ValueNumStore* vnStore;
+
+public:
+    ValueNumStore* GetValueNumStore()
+    {
+        return vnStore;
+    }
+
+    // Do value numbering (assign a value number to each
+    // tree node).
+    void fgValueNumber();
+
+    // Updates "fgCurHeap" via the assignment H[elemTypeEq][arrVN][inx][fldSeq] = rhsVN.
+    // Assumes that "elemTypeEq" is the (equivalence class rep) of the array element type.
+    // The 'indType' is the indirection type of the lhs of the assignment and will typically
+    // match the element type of the array or fldSeq.  When this type doesn't match
+    // or if the fldSeq is 'NotAField' we invalidate the array contents H[elemTypeEq][arrVN]
+    //
+    void fgValueNumberArrIndexAssign(CORINFO_CLASS_HANDLE elemTypeEq,
+                                     ValueNum             arrVN,
+                                     ValueNum             inxVN,
+                                     FieldSeqNode*        fldSeq,
+                                     ValueNum             rhsVN,
+                                     var_types            indType);
+
+    // Requires that "tree" is a GT_IND marked as an array index, and that its address argument
+    // has been parsed to yield the other input arguments.  If evaluation of the address
+    // can raise exceptions, those should be captured in the exception set "excVN."
+    // Assumes that "elemTypeEq" is the (equivalence class rep) of the array element type.
+    // Marks "tree" with the VN for H[elemTypeEq][arrVN][inx][fldSeq] (for the liberal VN; a new unique
+    // VN for the conservative VN.)  Also marks the tree's argument as the address of an array element.
+    // The type tree->TypeGet() will typically match the element type of the array or fldSeq.
+    // When this type doesn't match or if the fldSeq is 'NotAField' we return a new unique VN
+    //
+    ValueNum fgValueNumberArrIndexVal(GenTreePtr           tree,
+                                      CORINFO_CLASS_HANDLE elemTypeEq,
+                                      ValueNum             arrVN,
+                                      ValueNum             inxVN,
+                                      ValueNum             excVN,
+                                      FieldSeqNode*        fldSeq);
+
+    // Requires "funcApp" to be a VNF_PtrToArrElem, and "addrXvn" to represent the exception set thrown
+    // by evaluating the array index expression "tree".  Returns the value number resulting from
+    // dereferencing the array in the current heap state.  If "tree" is non-null, it must be the
+    // "GT_IND" that does the dereference, and it is given the returned value number.
+    ValueNum fgValueNumberArrIndexVal(GenTreePtr tree, struct VNFuncApp* funcApp, ValueNum addrXvn);
+
+    unsigned fgVNPassesCompleted; // Number of times fgValueNumber has been run.
+
+    // Utility functions for fgValueNumber.
+
+    // Perform value-numbering for the trees in "blk".  When giving VN's to the SSA
+    // names defined by phi definitions at the start of "blk", "newVNsForPhis" indicates
+    // that these should be given new VN's, irrespective of the values of the LHS.
+    // If "false", then we may assume that all inputs to phi RHS's of such definitions
+    // have already been assigned value numbers; if they are all assigned the *same* value
+    // number, then the LHS SSA name gets the same VN.
+    void fgValueNumberBlock(BasicBlock* blk, bool newVNsForPhis);
+
+    // Requires that "entryBlock" is the entry block of loop "loopNum", and that "loopNum" is the
+    // innermost loop of which "entryBlock" is the entry.  Returns the value number that should be
+    // assumed for the heap at the start "entryBlk".
+    ValueNum fgHeapVNForLoopSideEffects(BasicBlock* entryBlock, unsigned loopNum);
+
+    // Called when an operation (performed by "tree", described by "msg") may cause the global Heap to be mutated.
+    void fgMutateHeap(GenTreePtr tree DEBUGARG(const char* msg));
+
+    // Tree caused an update in the current heap VN.  If "tree" has an associated heap SSA #, record that
+    // value in that SSA #.
+    void fgValueNumberRecordHeapSsa(GenTreePtr tree);
+
+    // The input 'tree' is a leaf node that is a constant
+    // Assign the proper value number to the tree
+    void fgValueNumberTreeConst(GenTreePtr tree);
+
+    // Assumes that all inputs to "tree" have had value numbers assigned; assigns a VN to tree.
+    // (With some exceptions: the VN of the lhs of an assignment is assigned as part of the
+    // assignment.)
+    // If "evalAsgLhsInd" is true, evaluate a GT_IND node, even if it's labeled as the LHS of
+    // an assignment.
+    void fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd = false);
+
+    // Does value-numbering for a block assignment.
+    void fgValueNumberBlockAssignment(GenTreePtr tree, bool evalAsgLhsInd);
+
+    // Does value-numbering for a cast tree.
+    void fgValueNumberCastTree(GenTreePtr tree);
+
+    // Does value-numbering for an intrinsic tree.
+    void fgValueNumberIntrinsic(GenTreePtr tree);
+
+    // Does value-numbering for a call.  We interpret some helper calls.
+    void fgValueNumberCall(GenTreeCall* call);
+
+    // The VN of some nodes in "args" may have changed -- reassign VNs to the arg list nodes.
+    void fgUpdateArgListVNs(GenTreeArgList* args);
+
+    // Does value-numbering for a helper "call" that has a VN function symbol "vnf".
+    void fgValueNumberHelperCallFunc(GenTreeCall* call, VNFunc vnf, ValueNumPair vnpExc);
+
+    // Requires "helpCall" to be a helper call.  Assigns it a value number;
+    // we understand the semantics of some of the calls.  Returns "true" if
+    // the call may modify the heap (we assume arbitrary memory side effects if so).
+    bool fgValueNumberHelperCall(GenTreeCall* helpCall);
+
+    // Requires "helpFunc" to be pure.  Returns the corresponding VNFunc.
+    VNFunc fgValueNumberHelperMethVNFunc(CorInfoHelpFunc helpFunc);
+
+    // This is the current value number for the "Heap" implicit variable while
+    // doing value numbering.  This is the value number under the "liberal" interpretation
+    // of heap values; the "conservative" interpretation needs no VN, since every access of
+    // the heap yields an unknown value.
+    ValueNum fgCurHeapVN;
+
+    // Return a "pseudo"-class handle for an array element type.  If "elemType" is TYP_STRUCT,
+    // requires "elemStructType" to be non-null (and to have a low-order zero).  Otherwise, low order bit
+    // is 1, and the rest is an encoding of "elemTyp".
+    static CORINFO_CLASS_HANDLE EncodeElemType(var_types elemTyp, CORINFO_CLASS_HANDLE elemStructType)
+    {
+        if (elemStructType != nullptr)
+        {
+            assert(varTypeIsStruct(elemTyp) || elemTyp == TYP_REF || elemTyp == TYP_BYREF ||
+                   varTypeIsIntegral(elemTyp));
+            assert((size_t(elemStructType) & 0x1) == 0x0); // Make sure the encoding below is valid.
+            return elemStructType;
+        }
+        else
+        {
+            elemTyp = varTypeUnsignedToSigned(elemTyp);
+            return CORINFO_CLASS_HANDLE(size_t(elemTyp) << 1 | 0x1);
+        }
+    }
+    // If "clsHnd" is the result of an "EncodePrim" call, returns true and sets "*pPrimType" to the
+    // var_types it represents.  Otherwise, returns TYP_STRUCT (on the assumption that "clsHnd" is
+    // the struct type of the element).
+    static var_types DecodeElemType(CORINFO_CLASS_HANDLE clsHnd)
+    {
+        size_t clsHndVal = size_t(clsHnd);
+        if (clsHndVal & 0x1)
+        {
+            return var_types(clsHndVal >> 1);
+        }
+        else
+        {
+            return TYP_STRUCT;
+        }
+    }
+
+    // Convert a BYTE which represents the VM's CorInfoGCtype to the JIT's var_types
+    var_types getJitGCType(BYTE gcType);
+
+    enum structPassingKind
+    {
+        SPK_Unknown,       // Invalid value, never returned
+        SPK_PrimitiveType, // The struct is passed/returned using a primitive type.
+        SPK_ByValue,       // The struct is passed/returned by value (using the ABI rules)
+                           //  for ARM64 and UNIX_X64 in multiple registers. (when all of the
+                           //   parameters registers are used, then the stack will be used)
+                           //  for X86 passed on the stack, for ARM32 passed in registers
+                           //   or the stack or split between registers and the stack.
+        SPK_ByValueAsHfa,  // The struct is passed/returned as an HFA in multiple registers.
+        SPK_ByReference
+    }; // The struct is passed/returned by reference to a copy/buffer.
+
+    // Get the "primitive" type that is is used when we are given a struct of size 'structSize'.
+    // For pointer sized structs the 'clsHnd' is used to determine if the struct contains GC ref.
+    // A "primitive" type is one of the scalar types: byte, short, int, long, ref, float, double
+    // If we can't or shouldn't use a "primitive" type then TYP_UNKNOWN is returned.
+    //
+    var_types getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS_HANDLE clsHnd);
+
+    // Get the type that is used to pass values of the given struct type.
+    // If you have already retrieved the struct size then pass it as the optional third argument
+    //
+    var_types getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
+                                  structPassingKind*   wbPassStruct,
+                                  unsigned             structSize = 0);
+
+    // Get the type that is used to return values of the given struct type.
+    // If you have already retrieved the struct size then pass it as the optional third argument
+    //
+    var_types getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
+                                     structPassingKind*   wbPassStruct = nullptr,
+                                     unsigned             structSize = 0);
+
+#ifdef DEBUG
+    // Print a representation of "vnp" or "vn" on standard output.
+    // If "level" is non-zero, we also print out a partial expansion of the value.
+    void vnpPrint(ValueNumPair vnp, unsigned level);
+    void vnPrint(ValueNum vn, unsigned level);
+#endif
+
+    // Dominator computation member functions
+    // Not exposed outside Compiler
+protected:
+    bool fgDominate(BasicBlock* b1, BasicBlock* b2); // Return true if b1 dominates b2
+
+    bool fgReachable(BasicBlock* b1, BasicBlock* b2); // Returns true if block b1 can reach block b2
+
+    void fgComputeDoms(); // Computes the immediate dominators for each basic block in the
+                          // flow graph.  We first assume the fields bbIDom on each
+                          // basic block are invalid. This computation is needed later
+                          // by fgBuildDomTree to build the dominance tree structure.
+                          // Based on: A Simple, Fast Dominance Algorithm
+                          // by Keith D. Cooper, Timothy J. Harvey, and Ken Kennedy
+
+    BlockSet_ValRet_T fgGetDominatorSet(BasicBlock* block); // Returns a set of blocks that dominate the given block.
+    // Note: this is relatively slow compared to calling fgDominate(),
+    // especially if dealing with a single block versus block check.
+
+    void fgComputeReachabilitySets(); // Compute bbReach sets. (Also sets BBF_GC_SAFE_POINT flag on blocks.)
+
+    void fgComputeEnterBlocksSet(); // Compute the set of entry blocks, 'fgEnterBlks'.
+
+    bool fgRemoveUnreachableBlocks(); // Remove blocks determined to be unreachable by the bbReach sets.
+
+    void fgComputeReachability(); // Perform flow graph node reachability analysis.
+
+    BasicBlock* fgIntersectDom(BasicBlock* a, BasicBlock* b); // Intersect two immediate dominator sets.
+
+    void fgDfsInvPostOrder(); // In order to compute dominance using fgIntersectDom, the flow graph nodes must be
+                              // processed in topological sort, this function takes care of that.
+
+    void fgDfsInvPostOrderHelper(BasicBlock* block, BlockSet& visited, unsigned* count);
+
+    BlockSet_ValRet_T fgDomFindStartNodes(); // Computes which basic blocks don't have incoming edges in the flow graph.
+                                             // Returns this as a set.
+
+    BlockSet_ValRet_T fgDomTreeEntryNodes(BasicBlockList** domTree); // Computes which nodes in the dominance forest are
+                                                                     // root nodes. Returns this as a set.
+
+#ifdef DEBUG
+    void fgDispDomTree(BasicBlockList** domTree); // Helper that prints out the Dominator Tree in debug builds.
+#endif                                            // DEBUG
+
+    void fgBuildDomTree(); // Once we compute all the immediate dominator sets for each node in the flow graph
+                           // (performed by fgComputeDoms), this procedure builds the dominance tree represented
+                           // adjacency lists.
+
+    // In order to speed up the queries of the form 'Does A dominates B', we can perform a DFS preorder and postorder
+    // traversal of the dominance tree and the dominance query will become A dominates B iif preOrder(A) <= preOrder(B)
+    // && postOrder(A) >= postOrder(B) making the computation O(1).
+    void fgTraverseDomTree(unsigned bbNum, BasicBlockList** domTree, unsigned* preNum, unsigned* postNum);
+
+    // When the flow graph changes, we need to update the block numbers, predecessor lists, reachability sets, and
+    // dominators.
+    void fgUpdateChangedFlowGraph();
+
+public:
+    // Compute the predecessors of the blocks in the control flow graph.
+    void fgComputePreds();
+
+    // Remove all predecessor information.
+    void fgRemovePreds();
+
+    // Compute the cheap flow graph predecessors lists. This is used in some early phases
+    // before the full predecessors lists are computed.
+    void fgComputeCheapPreds();
+
+private:
+    void fgAddCheapPred(BasicBlock* block, BasicBlock* blockPred);
+
+    void fgRemoveCheapPred(BasicBlock* block, BasicBlock* blockPred);
+
+public:
+    enum GCPollType
+    {
+        GCPOLL_NONE,
+        GCPOLL_CALL,
+        GCPOLL_INLINE
+    };
+
+    // Initialize the per-block variable sets (used for liveness analysis).
+    void fgInitBlockVarSets();
+
+    // true if we've gone through and created GC Poll calls.
+    bool fgGCPollsCreated;
+    void fgMarkGCPollBlocks();
+    void fgCreateGCPolls();
+    bool fgCreateGCPoll(GCPollType pollType, BasicBlock* block);
+
+    // Requires that "block" is a block that returns from
+    // a finally.  Returns the number of successors (jump targets of
+    // of blocks in the covered "try" that did a "LEAVE".)
+    unsigned fgNSuccsOfFinallyRet(BasicBlock* block);
+
+    // Requires that "block" is a block that returns (in the sense of BBJ_EHFINALLYRET) from
+    // a finally.  Returns its "i"th successor (jump targets of
+    // of blocks in the covered "try" that did a "LEAVE".)
+    // Requires that "i" < fgNSuccsOfFinallyRet(block).
+    BasicBlock* fgSuccOfFinallyRet(BasicBlock* block, unsigned i);
+
+private:
+    // Factor out common portions of the impls of the methods above.
+    void fgSuccOfFinallyRetWork(BasicBlock* block, unsigned i, BasicBlock** bres, unsigned* nres);
+
+public:
+    // For many purposes, it is desirable to be able to enumerate the *distinct* targets of a switch statement,
+    // skipping duplicate targets.  (E.g., in flow analyses that are only interested in the set of possible targets.)
+    // SwitchUniqueSuccSet contains the non-duplicated switch targets.
+    // (Code that modifies the jump table of a switch has an obligation to call Compiler::UpdateSwitchTableTarget,
+    // which in turn will call the "UpdateTarget" method of this type if a SwitchUniqueSuccSet has already
+    // been computed for the switch block.  If a switch block is deleted or is transformed into a non-switch,
+    // we leave the entry associated with the block, but it will no longer be accessed.)
+    struct SwitchUniqueSuccSet
+    {
+        unsigned     numDistinctSuccs; // Number of distinct targets of the switch.
+        BasicBlock** nonDuplicates;    // Array of "numDistinctSuccs", containing all the distinct switch target
+                                       // successors.
+
+        // The switch block "switchBlk" just had an entry with value "from" modified to the value "to".
+        // Update "this" as necessary: if "from" is no longer an element of the jump table of "switchBlk",
+        // remove it from "this", and ensure that "to" is a member.  Use "alloc" to do any required allocation.
+        void UpdateTarget(IAllocator* alloc, BasicBlock* switchBlk, BasicBlock* from, BasicBlock* to);
+    };
+
+    typedef SimplerHashTable<BasicBlock*, PtrKeyFuncs<BasicBlock>, SwitchUniqueSuccSet, JitSimplerHashBehavior>
+        BlockToSwitchDescMap;
+
+private:
+    // Maps BasicBlock*'s that end in switch statements to SwitchUniqueSuccSets that allow
+    // iteration over only the distinct successors.
+    BlockToSwitchDescMap* m_switchDescMap;
+
+public:
+    BlockToSwitchDescMap* GetSwitchDescMap()
+    {
+        if (m_switchDescMap == nullptr)
+        {
+            m_switchDescMap = new (getAllocator()) BlockToSwitchDescMap(getAllocator());
+        }
+        return m_switchDescMap;
+    }
+
+    // Invalidate the map of unique switch block successors. For example, since the hash key of the map
+    // depends on block numbers, we must invalidate the map when the blocks are renumbered, to ensure that
+    // we don't accidentally look up and return the wrong switch data.
+    void InvalidateUniqueSwitchSuccMap()
+    {
+        m_switchDescMap = nullptr;
+    }
+
+    // Requires "switchBlock" to be a block that ends in a switch.  Returns
+    // the corresponding SwitchUniqueSuccSet.
+    SwitchUniqueSuccSet GetDescriptorForSwitch(BasicBlock* switchBlk);
+
+    // The switch block "switchBlk" just had an entry with value "from" modified to the value "to".
+    // Update "this" as necessary: if "from" is no longer an element of the jump table of "switchBlk",
+    // remove it from "this", and ensure that "to" is a member.
+    void UpdateSwitchTableTarget(BasicBlock* switchBlk, BasicBlock* from, BasicBlock* to);
+
+    // Remove the "SwitchUniqueSuccSet" of "switchBlk" in the BlockToSwitchDescMap.
+    void fgInvalidateSwitchDescMapEntry(BasicBlock* switchBlk);
+
+    BasicBlock* fgFirstBlockOfHandler(BasicBlock* block);
+
+    flowList* fgGetPredForBlock(BasicBlock* block, BasicBlock* blockPred);
+
+    flowList* fgGetPredForBlock(BasicBlock* block, BasicBlock* blockPred, flowList*** ptrToPred);
+
+    flowList* fgSpliceOutPred(BasicBlock* block, BasicBlock* blockPred);
+
+    flowList* fgRemoveRefPred(BasicBlock* block, BasicBlock* blockPred);
+
+    flowList* fgRemoveAllRefPreds(BasicBlock* block, BasicBlock* blockPred);
+
+    flowList* fgRemoveAllRefPreds(BasicBlock* block, flowList** ptrToPred);
+
+    void fgRemoveBlockAsPred(BasicBlock* block);
+
+    void fgChangeSwitchBlock(BasicBlock* oldSwitchBlock, BasicBlock* newSwitchBlock);
+
+    void fgReplaceSwitchJumpTarget(BasicBlock* blockSwitch, BasicBlock* newTarget, BasicBlock* oldTarget);
+
+    void fgReplaceJumpTarget(BasicBlock* block, BasicBlock* newTarget, BasicBlock* oldTarget);
+
+    void fgReplacePred(BasicBlock* block, BasicBlock* oldPred, BasicBlock* newPred);
+
+    flowList* fgAddRefPred(BasicBlock* block,
+                           BasicBlock* blockPred,
+                           flowList*   oldEdge           = nullptr,
+                           bool        initializingPreds = false); // Only set to 'true' when we are computing preds in
+                                                                   // fgComputePreds()
+
+    void fgFindBasicBlocks();
+
+    bool fgIsBetterFallThrough(BasicBlock* bCur, BasicBlock* bAlt);
+
+    bool fgCheckEHCanInsertAfterBlock(BasicBlock* blk, unsigned regionIndex, bool putInTryRegion);
+
+    BasicBlock* fgFindInsertPoint(unsigned    regionIndex,
+                                  bool        putInTryRegion,
+                                  BasicBlock* startBlk,
+                                  BasicBlock* endBlk,
+                                  BasicBlock* nearBlk,
+                                  BasicBlock* jumpBlk,
+                                  bool        runRarely);
+
+    unsigned fgGetNestingLevel(BasicBlock* block, unsigned* pFinallyNesting = nullptr);
+
+    void fgRemoveEmptyBlocks();
+
+    void fgRemoveStmt(BasicBlock* block, GenTreePtr stmt, bool updateRefCnt = true);
+
+    bool fgCheckRemoveStmt(BasicBlock* block, GenTreePtr stmt);
+
+    void fgCreateLoopPreHeader(unsigned lnum);
+
+    void fgUnreachableBlock(BasicBlock* block);
+
+    void fgRemoveJTrue(BasicBlock* block);
+
+    BasicBlock* fgLastBBInMainFunction();
+
+    BasicBlock* fgEndBBAfterMainFunction();
+
+    void fgUnlinkRange(BasicBlock* bBeg, BasicBlock* bEnd);
+
+    void fgRemoveBlock(BasicBlock* block, bool unreachable);
+
+    bool fgCanCompactBlocks(BasicBlock* block, BasicBlock* bNext);
+
+    void fgCompactBlocks(BasicBlock* block, BasicBlock* bNext);
+
+    void fgUpdateLoopsAfterCompacting(BasicBlock* block, BasicBlock* bNext);
+
+    BasicBlock* fgConnectFallThrough(BasicBlock* bSrc, BasicBlock* bDst);
+
+    bool fgRenumberBlocks();
+
+    bool fgExpandRarelyRunBlocks();
+
+    bool fgEhAllowsMoveBlock(BasicBlock* bBefore, BasicBlock* bAfter);
+
+    void fgMoveBlocksAfter(BasicBlock* bStart, BasicBlock* bEnd, BasicBlock* insertAfterBlk);
+
+    enum FG_RELOCATE_TYPE
+    {
+        FG_RELOCATE_TRY,    // relocate the 'try' region
+        FG_RELOCATE_HANDLER // relocate the handler region (including the filter if necessary)
+    };
+    BasicBlock* fgRelocateEHRange(unsigned regionIndex, FG_RELOCATE_TYPE relocateType);
+
+#if FEATURE_EH_FUNCLETS
+#if defined(_TARGET_ARM_)
+    void fgClearFinallyTargetBit(BasicBlock* block);
+#endif // defined(_TARGET_ARM_)
+    bool fgIsIntraHandlerPred(BasicBlock* predBlock, BasicBlock* block);
+    bool fgAnyIntraHandlerPreds(BasicBlock* block);
+    void fgInsertFuncletPrologBlock(BasicBlock* block);
+    void fgCreateFuncletPrologBlocks();
+    void fgCreateFunclets();
+#else  // !FEATURE_EH_FUNCLETS
+    bool fgRelocateEHRegions();
+#endif // !FEATURE_EH_FUNCLETS
+
+    bool fgOptimizeUncondBranchToSimpleCond(BasicBlock* block, BasicBlock* target);
+
+    bool fgBlockEndFavorsTailDuplication(BasicBlock* block);
+
+    bool fgBlockIsGoodTailDuplicationCandidate(BasicBlock* block);
+
+    bool fgOptimizeFallthroughTailDup(BasicBlock* block, BasicBlock* target);
+
+    bool fgOptimizeEmptyBlock(BasicBlock* block);
+
+    bool fgOptimizeBranchToEmptyUnconditional(BasicBlock* block, BasicBlock* bDest);
+
+    bool fgOptimizeBranch(BasicBlock* bJump);
+
+    bool fgOptimizeSwitchBranches(BasicBlock* block);
+
+    bool fgOptimizeBranchToNext(BasicBlock* block, BasicBlock* bNext, BasicBlock* bPrev);
+
+    bool fgOptimizeSwitchJumps();
+#ifdef DEBUG
+    void fgPrintEdgeWeights();
+#endif
+    void fgComputeEdgeWeights();
+
+    void fgReorderBlocks();
+
+    void fgDetermineFirstColdBlock();
+
+    bool fgIsForwardBranch(BasicBlock* bJump, BasicBlock* bSrc = nullptr);
+
+    bool fgUpdateFlowGraph(bool doTailDup = false);
+
+    void fgFindOperOrder();
+
+    // method that returns if you should split here
+    typedef bool(fgSplitPredicate)(GenTree* tree, GenTree* parent, fgWalkData* data);
+
+    void fgSetBlockOrder();
+
+    void fgRemoveReturnBlock(BasicBlock* block);
+
+    /* Helper code that has been factored out */
+    inline void fgConvertBBToThrowBB(BasicBlock* block);
+
+    bool fgCastNeeded(GenTreePtr tree, var_types toType);
+    GenTreePtr fgDoNormalizeOnStore(GenTreePtr tree);
+    GenTreePtr fgMakeTmpArgNode(
+        unsigned tmpVarNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool passedInRegisters));
+
+    // The following check for loops that don't execute calls
+    bool fgLoopCallMarked;
+
+    void fgLoopCallTest(BasicBlock* srcBB, BasicBlock* dstBB);
+    void fgLoopCallMark();
+
+    void fgMarkLoopHead(BasicBlock* block);
+
+    unsigned fgGetCodeEstimate(BasicBlock* block);
+
+#if DUMP_FLOWGRAPHS
+    const char* fgProcessEscapes(const char* nameIn, escapeMapping_t* map);
+    FILE* fgOpenFlowGraphFile(bool* wbDontClose, Phases phase, LPCWSTR type);
+    bool fgDumpFlowGraph(Phases phase);
+
+#endif // DUMP_FLOWGRAPHS
+
+#ifdef DEBUG
+    void fgDispDoms();
+    void fgDispReach();
+    void fgDispBBLiveness(BasicBlock* block);
+    void fgDispBBLiveness();
+    void fgTableDispBasicBlock(BasicBlock* block, int ibcColWidth = 0);
+    void fgDispBasicBlocks(BasicBlock* firstBlock, BasicBlock* lastBlock, bool dumpTrees);
+    void fgDispBasicBlocks(bool dumpTrees = false);
+    void fgDumpStmtTree(GenTreePtr stmt, unsigned blkNum);
+    void fgDumpBlock(BasicBlock* block);
+    void fgDumpTrees(BasicBlock* firstBlock, BasicBlock* lastBlock);
+
+    static fgWalkPreFn fgStress64RsltMulCB;
+    void               fgStress64RsltMul();
+    void               fgDebugCheckUpdate();
+    void fgDebugCheckBBlist(bool checkBBNum = false, bool checkBBRefs = true);
+    void fgDebugCheckBlockLinks();
+    void fgDebugCheckLinks(bool morphTrees = false);
+    void fgDebugCheckNodeLinks(BasicBlock* block, GenTreePtr stmt);
+    void fgDebugCheckFlags(GenTreePtr tree);
+#endif
+
+#ifdef LEGACY_BACKEND
+    static void fgOrderBlockOps(GenTreePtr  tree,
+                                regMaskTP   reg0,
+                                regMaskTP   reg1,
+                                regMaskTP   reg2,
+                                GenTreePtr* opsPtr,   // OUT
+                                regMaskTP*  regsPtr); // OUT
+#endif                                                // LEGACY_BACKEND
+
+    static GenTreePtr fgGetFirstNode(GenTreePtr tree);
+    static bool fgTreeIsInStmt(GenTree* tree, GenTreeStmt* stmt);
+
+    inline bool fgIsInlining()
+    {
+        return fgExpandInline;
+    }
+
+    void fgTraverseRPO();
+
+    //--------------------- Walking the trees in the IR -----------------------
+
+    struct fgWalkData
+    {
+        Compiler*     compiler;
+        fgWalkPreFn*  wtprVisitorFn;
+        fgWalkPostFn* wtpoVisitorFn;
+        void*         pCallbackData; // user-provided data
+        bool          wtprLclsOnly;  // whether to only visit lclvar nodes
+        GenTreePtr    parent;        // parent of current node, provided to callback
+        GenTreeStack* parentStack;   // stack of parent nodes, if asked for
+#ifdef DEBUG
+        bool printModified; // callback can use this
+#endif
+    };
+
+    template <bool      computeStack>
+    static fgWalkResult fgWalkTreePreRec(GenTreePtr* pTree, fgWalkData* fgWalkPre);
+
+    // general purpose tree-walker that is capable of doing pre- and post- order
+    // callbacks at the same time
+    template <bool doPreOrder, bool doPostOrder>
+    static fgWalkResult fgWalkTreeRec(GenTreePtr* pTree, fgWalkData* fgWalkPre);
+
+    fgWalkResult fgWalkTreePre(GenTreePtr*  pTree,
+                               fgWalkPreFn* visitor,
+                               void*        pCallBackData = nullptr,
+                               bool         lclVarsOnly   = false,
+                               bool         computeStack  = false);
+
+    fgWalkResult fgWalkTree(GenTreePtr*   pTree,
+                            fgWalkPreFn*  preVisitor,
+                            fgWalkPostFn* postVisitor,
+                            void*         pCallBackData = nullptr);
+
+    void fgWalkAllTreesPre(fgWalkPreFn* visitor, void* pCallBackData);
+
+    //----- Postorder
+
+    template <bool      computeStack>
+    static fgWalkResult fgWalkTreePostRec(GenTreePtr* pTree, fgWalkData* fgWalkPre);
+
+    fgWalkResult fgWalkTreePost(GenTreePtr*   pTree,
+                                fgWalkPostFn* visitor,
+                                void*         pCallBackData = nullptr,
+                                bool          computeStack  = false);
+
+    // An fgWalkPreFn that looks for expressions that have inline throws in
+    // minopts mode. Basically it looks for tress with gtOverflowEx() or
+    // GTF_IND_RNGCHK.  It returns WALK_ABORT if one is found.  It
+    // returns WALK_SKIP_SUBTREES if GTF_EXCEPT is not set (assumes flags
+    // properly propagated to parent trees).  It returns WALK_CONTINUE
+    // otherwise.
+    static fgWalkResult fgChkThrowCB(GenTreePtr* pTree, Compiler::fgWalkData* data);
+    static fgWalkResult fgChkLocAllocCB(GenTreePtr* pTree, Compiler::fgWalkData* data);
+    static fgWalkResult fgChkQmarkCB(GenTreePtr* pTree, Compiler::fgWalkData* data);
+
+    /**************************************************************************
+     *                          PROTECTED
+     *************************************************************************/
+
+protected:
+    friend class SsaBuilder;
+    friend struct ValueNumberState;
+
+    //--------------------- Detect the basic blocks ---------------------------
+
+    BasicBlock** fgBBs; // Table of pointers to the BBs
+
+    void        fgInitBBLookup();
+    BasicBlock* fgLookupBB(unsigned addr);
+
+    void fgMarkJumpTarget(BYTE* jumpTarget, IL_OFFSET offs);
+
+    void fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* jumpTarget);
+
+    void fgMarkBackwardJump(BasicBlock* startBlock, BasicBlock* endBlock);
+
+    void fgLinkBasicBlocks();
+
+    void fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* jumpTarget);
+
+    void fgCheckBasicBlockControlFlow();
+
+    void fgControlFlowPermitted(BasicBlock* blkSrc,
+                                BasicBlock* blkDest,
+                                BOOL        IsLeave = false /* is the src a leave block */);
+
+    bool fgFlowToFirstBlockOfInnerTry(BasicBlock* blkSrc, BasicBlock* blkDest, bool sibling);
+
+    void fgObserveInlineConstants(OPCODE opcode, const FgStack& stack, bool isInlining);
+
+    void fgAdjustForAddressExposedOrWrittenThis();
+
+    bool                        fgProfileData_ILSizeMismatch;
+    ICorJitInfo::ProfileBuffer* fgProfileBuffer;
+    ULONG                       fgProfileBufferCount;
+    ULONG                       fgNumProfileRuns;
+
+    unsigned fgStressBBProf()
+    {
+#ifdef DEBUG
+        unsigned result = JitConfig.JitStressBBProf();
+        if (result == 0)
+        {
+            if (compStressCompile(STRESS_BB_PROFILE, 15))
+            {
+                result = 1;
+            }
+        }
+        return result;
+#else
+        return 0;
+#endif
+    }
+
+    bool fgHaveProfileData();
+    bool fgGetProfileWeightForBasicBlock(IL_OFFSET offset, unsigned* weight);
+
+    bool fgIsUsingProfileWeights()
+    {
+        return (fgHaveProfileData() || fgStressBBProf());
+    }
+    void fgInstrumentMethod();
+
+//-------- Insert a statement at the start or end of a basic block --------
+
+#ifdef DEBUG
+public:
+    static bool fgBlockContainsStatementBounded(BasicBlock* block, GenTree* stmt, bool answerOnBoundExceeded = true);
+#endif
+
+public:
+    GenTreeStmt* fgInsertStmtAtEnd(BasicBlock* block, GenTreePtr node);
+
+public: // Used by linear scan register allocation
+    GenTreeStmt* fgInsertStmtNearEnd(BasicBlock* block, GenTreePtr node);
+
+private:
+    GenTreePtr fgInsertStmtAtBeg(BasicBlock* block, GenTreePtr stmt);
+    GenTreePtr fgInsertStmtAfter(BasicBlock* block, GenTreePtr insertionPoint, GenTreePtr stmt);
+
+public: // Used by linear scan register allocation
+    GenTreePtr fgInsertStmtBefore(BasicBlock* block, GenTreePtr insertionPoint, GenTreePtr stmt);
+
+private:
+    GenTreePtr fgInsertStmtListAfter(BasicBlock* block, GenTreePtr stmtAfter, GenTreePtr stmtList);
+
+    GenTreePtr fgMorphSplitTree(GenTree** splitPoint, GenTree* stmt, BasicBlock* blk);
+
+    //                  Create a new temporary variable to hold the result of *ppTree,
+    //                  and transform the graph accordingly.
+    GenTree* fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE structType = nullptr);
+    GenTree* fgMakeMultiUse(GenTree** ppTree);
+
+    //                  After replacing oldChild with newChild, fixup the fgArgTabEntryPtr
+    //                  if it happens to be an argument to a call.
+    void fgFixupIfCallArg(ArrayStack<GenTree*>* parentStack, GenTree* oldChild, GenTree* newChild);
+
+public:
+    void fgFixupArgTabEntryPtr(GenTreePtr parentCall, GenTreePtr oldArg, GenTreePtr newArg);
+
+private:
+    //                  Recognize a bitwise rotation pattern and convert into a GT_ROL or a GT_ROR node.
+    GenTreePtr fgRecognizeAndMorphBitwiseRotation(GenTreePtr tree);
+    bool fgOperIsBitwiseRotationRoot(genTreeOps oper);
+
+    //-------- Determine the order in which the trees will be evaluated -------
+
+    unsigned fgTreeSeqNum;
+    GenTree* fgTreeSeqLst;
+    GenTree* fgTreeSeqBeg;
+
+    GenTree* fgSetTreeSeq(GenTree* tree, GenTree* prev = nullptr, bool isLIR = false);
+    void fgSetTreeSeqHelper(GenTree* tree, bool isLIR);
+    void fgSetTreeSeqFinish(GenTreePtr tree, bool isLIR);
+    void fgSetStmtSeq(GenTree* tree);
+    void fgSetBlockOrder(BasicBlock* block);
+
+    //------------------------- Morphing --------------------------------------
+
+    unsigned fgPtrArgCntCur;
+    unsigned fgPtrArgCntMax;
+    hashBv*  fgOutgoingArgTemps;
+    hashBv*  fgCurrentlyInUseArgTemps;
+
+    bool compCanEncodePtrArgCntMax();
+
+    void fgSetRngChkTarget(GenTreePtr tree, bool delay = true);
+
+#if REARRANGE_ADDS
+    void fgMoveOpsLeft(GenTreePtr tree);
+#endif
+
+    bool fgIsCommaThrow(GenTreePtr tree, bool forFolding = false);
+
+    bool fgIsThrow(GenTreePtr tree);
+
+    bool fgInDifferentRegions(BasicBlock* blk1, BasicBlock* blk2);
+    bool fgIsBlockCold(BasicBlock* block);
+
+    GenTreePtr fgMorphCastIntoHelper(GenTreePtr tree, int helper, GenTreePtr oper);
+
+    GenTreePtr fgMorphIntoHelperCall(GenTreePtr tree, int helper, GenTreeArgList* args);
+
+    GenTreePtr fgMorphStackArgForVarArgs(unsigned lclNum, var_types varType, unsigned lclOffs);
+
+    bool fgMorphRelopToQmark(GenTreePtr tree);
+
+    // A "MorphAddrContext" carries information from the surrounding context.  If we are evaluating a byref address,
+    // it is useful to know whether the address will be immediately dereferenced, or whether the address value will
+    // be used, perhaps by passing it as an argument to a called method.  This affects how null checking is done:
+    // for sufficiently small offsets, we can rely on OS page protection to implicitly null-check addresses that we
+    // know will be dereferenced.  To know that reliance on implicit null checking is sound, we must further know that
+    // all offsets between the top-level indirection and the bottom are constant, and that their sum is sufficiently
+    // small; hence the other fields of MorphAddrContext.  Finally, the odd structure of GT_COPYBLK, in which the second
+    // argument is a GT_LIST, requires us to "tell" that List node that its parent is a GT_COPYBLK, so it "knows" that
+    // each of its arguments should be evaluated in MACK_Ind contexts.  (This would not be true for GT_LIST nodes
+    // representing method call argument lists.)
+    enum MorphAddrContextKind
+    {
+        MACK_Ind,
+        MACK_Addr,
+        MACK_CopyBlock, // This is necessary so we know we have to start a new "Ind" context for each of the
+                        // addresses in the arg list.
+    };
+    struct MorphAddrContext
+    {
+        MorphAddrContextKind m_kind;
+        bool                 m_allConstantOffsets; // Valid only for "m_kind == MACK_Ind".  True iff all offsets between
+                                                   // top-level indirection and here have been constants.
+        size_t m_totalOffset; // Valid only for "m_kind == MACK_Ind", and if "m_allConstantOffsets" is true.
+                              // In that case, is the sum of those constant offsets.
+
+        MorphAddrContext(MorphAddrContextKind kind) : m_kind(kind), m_allConstantOffsets(true), m_totalOffset(0)
+        {
+        }
+    };
+
+    // A MACK_CopyBlock context is immutable, so we can just make one of these and share it.
+    static MorphAddrContext s_CopyBlockMAC;
+
+#ifdef FEATURE_SIMD
+    GenTreePtr fgCopySIMDNode(GenTreeSIMD* simdNode);
+    GenTreePtr getSIMDStructFromField(GenTreePtr tree,
+                                      var_types* baseTypeOut,
+                                      unsigned*  indexOut,
+                                      unsigned*  simdSizeOut,
+                                      bool       ignoreUsedInSIMDIntrinsic = false);
+    GenTreePtr fgMorphFieldAssignToSIMDIntrinsicSet(GenTreePtr tree);
+    GenTreePtr fgMorphFieldToSIMDIntrinsicGet(GenTreePtr tree);
+    bool fgMorphCombineSIMDFieldAssignments(BasicBlock* block, GenTreePtr stmt);
+    void impMarkContiguousSIMDFieldAssignments(GenTreePtr stmt);
+
+    // fgPreviousCandidateSIMDFieldAsgStmt is only used for tracking previous simd field assignment
+    // in function: Complier::impMarkContiguousSIMDFieldAssignments.
+    GenTreePtr fgPreviousCandidateSIMDFieldAsgStmt;
+
+#endif // FEATURE_SIMD
+    GenTreePtr fgMorphArrayIndex(GenTreePtr tree);
+    GenTreePtr fgMorphCast(GenTreePtr tree);
+    GenTreePtr fgUnwrapProxy(GenTreePtr objRef);
+    GenTreeCall* fgMorphArgs(GenTreeCall* call);
+
+    void fgMakeOutgoingStructArgCopy(GenTreeCall*         call,
+                                     GenTree*             args,
+                                     unsigned             argIndex,
+                                     CORINFO_CLASS_HANDLE copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(
+                                         const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structDescPtr));
+
+    void fgFixupStructReturn(GenTreePtr call);
+    GenTreePtr fgMorphLocalVar(GenTreePtr tree);
+    bool fgAddrCouldBeNull(GenTreePtr addr);
+    GenTreePtr fgMorphField(GenTreePtr tree, MorphAddrContext* mac);
+    bool fgCanFastTailCall(GenTreeCall* call);
+    void fgMorphTailCall(GenTreeCall* call);
+    void fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCall* recursiveTailCall);
+    GenTreePtr fgAssignRecursiveCallArgToCallerParam(GenTreePtr       arg,
+                                                     fgArgTabEntryPtr argTabEntry,
+                                                     BasicBlock*      block,
+                                                     IL_OFFSETX       callILOffset,
+                                                     GenTreePtr       tmpAssignmentInsertionPoint,
+                                                     GenTreePtr       paramAssignmentInsertionPoint);
+    static int fgEstimateCallStackSize(GenTreeCall* call);
+    GenTreePtr fgMorphCall(GenTreeCall* call);
+    void fgMorphCallInline(GenTreeCall* call, InlineResult* result);
+    void fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result);
+#if DEBUG
+    void fgNoteNonInlineCandidate(GenTreePtr tree, GenTreeCall* call);
+    static fgWalkPreFn fgFindNonInlineCandidate;
+#endif
+    GenTreePtr fgOptimizeDelegateConstructor(GenTreePtr call, CORINFO_CONTEXT_HANDLE* ExactContextHnd);
+    GenTreePtr fgMorphLeaf(GenTreePtr tree);
+    void fgAssignSetVarDef(GenTreePtr tree);
+    GenTreePtr fgMorphOneAsgBlockOp(GenTreePtr tree);
+    GenTreePtr fgMorphInitBlock(GenTreePtr tree);
+    GenTreePtr fgMorphBlkToInd(GenTreeBlk* tree, var_types type);
+    GenTreePtr fgMorphGetStructAddr(GenTreePtr* pTree, CORINFO_CLASS_HANDLE clsHnd, bool isRValue = false);
+    GenTreePtr fgMorphBlkNode(GenTreePtr tree, bool isDest);
+    GenTreePtr fgMorphBlockOperand(GenTreePtr tree, var_types asgType, unsigned blockWidth, bool isDest);
+    GenTreePtr fgMorphCopyBlock(GenTreePtr tree);
+    GenTreePtr fgMorphForRegisterFP(GenTreePtr tree);
+    GenTreePtr fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac = nullptr);
+    GenTreePtr fgMorphSmpOpPre(GenTreePtr tree);
+    GenTreePtr fgMorphDivByConst(GenTreeOp* tree);
+    GenTreePtr fgMorphModByConst(GenTreeOp* tree);
+    GenTreePtr fgMorphModToSubMulDiv(GenTreeOp* tree);
+    GenTreePtr fgMorphSmpOpOptional(GenTreeOp* tree);
+    GenTreePtr fgMorphRecognizeBoxNullable(GenTree* compare);
+    bool fgShouldUseMagicNumberDivide(GenTreeOp* tree);
+
+    GenTreePtr fgMorphToEmulatedFP(GenTreePtr tree);
+    GenTreePtr fgMorphConst(GenTreePtr tree);
+
+public:
+    GenTreePtr fgMorphTree(GenTreePtr tree, MorphAddrContext* mac = nullptr);
+
+private:
+#if LOCAL_ASSERTION_PROP
+    void fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTreePtr tree));
+#endif
+    void fgMorphTreeDone(GenTreePtr tree, GenTreePtr oldTree = nullptr DEBUGARG(int morphNum = 0));
+
+    GenTreePtr fgMorphStmt;
+
+    unsigned fgGetBigOffsetMorphingTemp(var_types type); // We cache one temp per type to be
+                                                         // used when morphing big offset.
+
+    //----------------------- Liveness analysis -------------------------------
+
+    VARSET_TP fgCurUseSet; // vars used     by block (before an assignment)
+    VARSET_TP fgCurDefSet; // vars assigned by block (before a use)
+
+    bool fgCurHeapUse;   // True iff the current basic block uses the heap before defining it.
+    bool fgCurHeapDef;   // True iff the current basic block defines the heap.
+    bool fgCurHeapHavoc; // True if  the current basic block is known to set the heap to a "havoc" value.
+
+    void fgMarkUseDef(GenTreeLclVarCommon* tree, GenTree* asgdLclVar = nullptr);
+
+#ifdef DEBUGGING_SUPPORT
+    void fgBeginScopeLife(VARSET_TP* inScope, VarScopeDsc* var);
+    void fgEndScopeLife(VARSET_TP* inScope, VarScopeDsc* var);
+
+    void fgMarkInScope(BasicBlock* block, VARSET_VALARG_TP inScope);
+    void fgUnmarkInScope(BasicBlock* block, VARSET_VALARG_TP unmarkScope);
+
+    void fgExtendDbgScopes();
+    void fgExtendDbgLifetimes();
+
+#ifdef DEBUG
+    void fgDispDebugScopes();
+#endif // DEBUG
+
+#endif // DEBUGGING_SUPPORT
+
+    //-------------------------------------------------------------------------
+    //
+    //  The following keeps track of any code we've added for things like array
+    //  range checking or explicit calls to enable GC, and so on.
+    //
+public:
+    struct AddCodeDsc
+    {
+        AddCodeDsc*     acdNext;
+        BasicBlock*     acdDstBlk; // block  to  which we jump
+        unsigned        acdData;
+        SpecialCodeKind acdKind; // what kind of a special block is this?
+        unsigned short  acdStkLvl;
+    };
+
+private:
+    static unsigned acdHelper(SpecialCodeKind codeKind);
+
+    AddCodeDsc* fgAddCodeList;
+    bool        fgAddCodeModf;
+    bool        fgRngChkThrowAdded;
+    AddCodeDsc* fgExcptnTargetCache[SCK_COUNT];
+
+    BasicBlock* fgRngChkTarget(BasicBlock* block, unsigned stkDepth, SpecialCodeKind kind);
+
+    BasicBlock* fgAddCodeRef(BasicBlock* srcBlk, unsigned refData, SpecialCodeKind kind, unsigned stkDepth = 0);
+
+public:
+    AddCodeDsc* fgFindExcptnTarget(SpecialCodeKind kind, unsigned refData);
+
+private:
+    bool fgIsCodeAdded();
+
+    bool fgIsThrowHlpBlk(BasicBlock* block);
+    unsigned fgThrowHlpBlkStkLevel(BasicBlock* block);
+
+    unsigned fgBigOffsetMorphingTemps[TYP_COUNT];
+
+    unsigned fgCheckInlineDepthAndRecursion(InlineInfo* inlineInfo);
+    void fgInvokeInlineeCompiler(GenTreeCall* call, InlineResult* result);
+    void fgInsertInlineeBlocks(InlineInfo* pInlineInfo);
+    GenTreePtr fgInlinePrependStatements(InlineInfo* inlineInfo);
+
+#if FEATURE_MULTIREG_RET
+    GenTreePtr fgGetStructAsStructPtr(GenTreePtr tree);
+    GenTreePtr fgAssignStructInlineeToVar(GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd);
+    void fgAttachStructInlineeToAsg(GenTreePtr tree, GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd);
+#endif // FEATURE_MULTIREG_RET
+
+    static fgWalkPreFn fgUpdateInlineReturnExpressionPlaceHolder;
+
+#ifdef DEBUG
+    static fgWalkPreFn fgDebugCheckInlineCandidates;
+#endif
+
+    void         fgPromoteStructs();
+    fgWalkResult fgMorphStructField(GenTreePtr tree, fgWalkData* fgWalkPre);
+    fgWalkResult fgMorphLocalField(GenTreePtr tree, fgWalkData* fgWalkPre);
+    void fgMarkImplicitByRefArgs();
+    bool fgMorphImplicitByRefArgs(GenTree** pTree, fgWalkData* fgWalkPre);
+    static fgWalkPreFn  fgMarkAddrTakenLocalsPreCB;
+    static fgWalkPostFn fgMarkAddrTakenLocalsPostCB;
+    void                fgMarkAddressExposedLocals();
+    bool fgNodesMayInterfere(GenTree* store, GenTree* load);
+
+    // Returns true if the type of tree is of size at least "width", or if "tree" is not a
+    // local variable.
+    bool fgFitsInOrNotLoc(GenTreePtr tree, unsigned width);
+
+    // The given local variable, required to be a struct variable, is being assigned via
+    // a "lclField", to make it masquerade as an integral type in the ABI.  Make sure that
+    // the variable is not enregistered, and is therefore not promoted independently.
+    void fgLclFldAssign(unsigned lclNum);
+
+    static fgWalkPreFn gtHasLocalsWithAddrOpCB;
+    bool gtCanOptimizeTypeEquality(GenTreePtr tree);
+    bool gtIsTypeHandleToRuntimeTypeHelper(GenTreePtr tree);
+    bool gtIsActiveCSE_Candidate(GenTreePtr tree);
+
+#ifdef DEBUG
+    bool fgPrintInlinedMethods;
+#endif
+
+    bool fgIsBigOffset(size_t offset);
+
+    // The following are used when morphing special cases of integer div/mod operations and also by codegen
+    bool fgIsSignedDivOptimizable(GenTreePtr divisor);
+    bool fgIsUnsignedDivOptimizable(GenTreePtr divisor);
+    bool fgIsSignedModOptimizable(GenTreePtr divisor);
+    bool fgIsUnsignedModOptimizable(GenTreePtr divisor);
+
+    /*
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XX                                                                           XX
+    XX                           Optimizer                                       XX
+    XX                                                                           XX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    */
+
+public:
+    void optInit();
+
+protected:
+    LclVarDsc* optIsTrackedLocal(GenTreePtr tree);
+
+public:
+    void optRemoveRangeCheck(
+        GenTreePtr tree, GenTreePtr stmt, bool updateCSEcounts, unsigned sideEffFlags = 0, bool forceRemove = false);
+    bool optIsRangeCheckRemovable(GenTreePtr tree);
+
+protected:
+    static fgWalkPreFn optValidRangeCheckIndex;
+    static fgWalkPreFn optRemoveTreeVisitor; // Helper passed to Compiler::fgWalkAllTreesPre() to decrement the LclVar
+                                             // usage counts
+
+    void optRemoveTree(GenTreePtr deadTree, GenTreePtr keepList);
+
+    /**************************************************************************
+     *
+     *************************************************************************/
+
+protected:
+    // Do hoisting for all loops.
+    void optHoistLoopCode();
+
+    // To represent sets of VN's that have already been hoisted in outer loops.
+    typedef SimplerHashTable<ValueNum, SmallPrimitiveKeyFuncs<ValueNum>, bool, JitSimplerHashBehavior> VNToBoolMap;
+    typedef VNToBoolMap VNSet;
+
+    struct LoopHoistContext
+    {
+    private:
+        // The set of variables hoisted in the current loop (or nullptr if there are none).
+        VNSet* m_pHoistedInCurLoop;
+
+    public:
+        // Value numbers of expressions that have been hoisted in parent loops in the loop nest.
+        VNSet m_hoistedInParentLoops;
+        // Value numbers of expressions that have been hoisted in the current (or most recent) loop in the nest.
+        // Previous decisions on loop-invariance of value numbers in the current loop.
+        VNToBoolMap m_curLoopVnInvariantCache;
+
+        VNSet* GetHoistedInCurLoop(Compiler* comp)
+        {
+            if (m_pHoistedInCurLoop == nullptr)
+            {
+                m_pHoistedInCurLoop = new (comp->getAllocatorLoopHoist()) VNSet(comp->getAllocatorLoopHoist());
+            }
+            return m_pHoistedInCurLoop;
+        }
+
+        VNSet* ExtractHoistedInCurLoop()
+        {
+            VNSet* res          = m_pHoistedInCurLoop;
+            m_pHoistedInCurLoop = nullptr;
+            return res;
+        }
+
+        LoopHoistContext(Compiler* comp)
+            : m_pHoistedInCurLoop(nullptr)
+            , m_hoistedInParentLoops(comp->getAllocatorLoopHoist())
+            , m_curLoopVnInvariantCache(comp->getAllocatorLoopHoist())
+        {
+        }
+    };
+
+    // Do hoisting for loop "lnum" (an index into the optLoopTable), and all loops nested within it.
+    // Tracks the expressions that have been hoisted by containing loops by temporary recording their
+    // value numbers in "m_hoistedInParentLoops".  This set is not modified by the call.
+    void optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt);
+
+    // Do hoisting for a particular loop ("lnum" is an index into the optLoopTable.)
+    // Assumes that expressions have been hoisted in containing loops if their value numbers are in
+    // "m_hoistedInParentLoops".
+    //
+    void optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt);
+
+    // Hoist all expressions in "blk" that are invariant in loop "lnum" (an index into the optLoopTable)
+    // outside of that loop.  Exempt expressions whose value number is in "m_hoistedInParentLoops"; add VN's of hoisted
+    // expressions to "hoistInLoop".
+    void optHoistLoopExprsForBlock(BasicBlock* blk, unsigned lnum, LoopHoistContext* hoistCtxt);
+
+    // Return true if the tree looks profitable to hoist out of loop 'lnum'.
+    bool optIsProfitableToHoistableTree(GenTreePtr tree, unsigned lnum);
+
+    // Hoist all proper sub-expressions of "tree" (which occurs in "stmt", which occurs in "blk")
+    // that are invariant in loop "lnum" (an index into the optLoopTable)
+    // outside of that loop.  Exempt expressions whose value number is in "hoistedInParents"; add VN's of hoisted
+    // expressions to "hoistInLoop".
+    // Returns "true" iff "tree" is loop-invariant (wrt "lnum").
+    // Assumes that the value of "*firstBlockAndBeforeSideEffect" indicates that we're in the first block, and before
+    // any possible globally visible side effects.  Assume is called in evaluation order, and updates this.
+    bool optHoistLoopExprsForTree(GenTreePtr        tree,
+                                  unsigned          lnum,
+                                  LoopHoistContext* hoistCtxt,
+                                  bool*             firstBlockAndBeforeSideEffect,
+                                  bool*             pHoistable);
+
+    // Performs the hoisting 'tree' into the PreHeader for loop 'lnum'
+    void optHoistCandidate(GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt);
+
+    // Returns true iff the ValueNum "vn" represents a value that is loop-invariant in "lnum".
+    //   Constants and init values are always loop invariant.
+    //   VNPhi's connect VN's to the SSA definition, so we can know if the SSA def occurs in the loop.
+    bool optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNToBoolMap* recordedVNs);
+
+    // Returns "true" iff "tree" is valid at the head of loop "lnum", in the context of the hoist substitution
+    // "subst".  If "tree" is a local SSA var, it is valid if its SSA definition occurs outside of the loop, or
+    // if it is in the domain of "subst" (meaning that it's definition has been previously hoisted, with a "standin"
+    // local.)  If tree is a constant, it is valid.  Otherwise, if it is an operator, it is valid iff its children are.
+    bool optTreeIsValidAtLoopHead(GenTreePtr tree, unsigned lnum);
+
+    // If "blk" is the entry block of a natural loop, returns true and sets "*pLnum" to the index of the loop
+    // in the loop table.
+    bool optBlockIsLoopEntry(BasicBlock* blk, unsigned* pLnum);
+
+    // Records the set of "side effects" of all loops: fields (object instance and static)
+    // written to, and SZ-array element type equivalence classes updated.
+    void optComputeLoopSideEffects();
+
+private:
+    // Requires "lnum" to be the index of an outermost loop in the loop table.  Traverses the body of that loop,
+    // including all nested loops, and records the set of "side effects" of the loop: fields (object instance and
+    // static) written to, and SZ-array element type equivalence classes updated.
+    void optComputeLoopNestSideEffects(unsigned lnum);
+
+    // Add the side effects of "blk" (which is required to be within a loop) to all loops of which it is a part.
+    void optComputeLoopSideEffectsOfBlock(BasicBlock* blk);
+
+    // Hoist the expression "expr" out of loop "lnum".
+    void optPerformHoistExpr(GenTreePtr expr, unsigned lnum);
+
+public:
+    void optOptimizeBools();
+
+private:
+    GenTree* optIsBoolCond(GenTree* condBranch, GenTree** compPtr, bool* boolPtr);
+#ifdef DEBUG
+    void optOptimizeBoolsGcStress(BasicBlock* condBlock);
+#endif
+public:
+    void optOptimizeLayout(); // Optimize the BasicBlock layout of the method
+
+    void optOptimizeLoops(); // for "while-do" loops duplicates simple loop conditions and transforms
+                             // the loop into a "do-while" loop
+                             // Also finds all natural loops and records them in the loop table
+
+    // Optionally clone loops in the loop table.
+    void optCloneLoops();
+
+    // Clone loop "loopInd" in the loop table.
+    void optCloneLoop(unsigned loopInd, LoopCloneContext* context);
+
+    // Ensure that loop "loopInd" has a unique head block.  (If the existing entry has
+    // non-loop predecessors other than the head entry, create a new, empty block that goes (only) to the entry,
+    // and redirects the preds of the entry to this new block.)  Sets the weight of the newly created block to
+    // "ambientWeight".
+    void optEnsureUniqueHead(unsigned loopInd, unsigned ambientWeight);
+
+    void optUnrollLoops(); // Unrolls loops (needs to have cost info)
+
+protected:
+    // This enumeration describes what is killed by a call.
+
+    enum callInterf
+    {
+        CALLINT_NONE,       // no interference                               (most helpers)
+        CALLINT_REF_INDIRS, // kills GC ref indirections                     (SETFIELD OBJ)
+        CALLINT_SCL_INDIRS, // kills non GC ref indirections                 (SETFIELD non-OBJ)
+        CALLINT_ALL_INDIRS, // kills both GC ref and non GC ref indirections (SETFIELD STRUCT)
+        CALLINT_ALL,        // kills everything                              (normal method call)
+    };
+
+public:
+    // A "LoopDsc" describes a ("natural") loop.  We (currently) require the body of a loop to be a contiguous (in
+    // bbNext order) sequence of basic blocks.  (At times, we may require the blocks in a loop to be "properly numbered"
+    // in bbNext order; we use comparisons on the bbNum to decide order.)
+    // The blocks that define the body are
+    //   first <= top <= entry <= bottom   .
+    // The "head" of the loop is a block outside the loop that has "entry" as a successor. We only support loops with a
+    // single 'head' block. The meanings of these blocks are given in the definitions below. Also see the picture at
+    // Compiler::optFindNaturalLoops().
+    struct LoopDsc
+    {
+        BasicBlock* lpHead;  // HEAD of the loop (not part of the looping of the loop) -- has ENTRY as a successor.
+        BasicBlock* lpFirst; // FIRST block (in bbNext order) reachable within this loop.  (May be part of a nested
+                             // loop, but not the outer loop.)
+        BasicBlock* lpTop;   // loop TOP (the back edge from lpBottom reaches here) (in most cases FIRST and TOP are the
+                             // same)
+        BasicBlock* lpEntry; // the ENTRY in the loop (in most cases TOP or BOTTOM)
+        BasicBlock* lpBottom; // loop BOTTOM (from here we have a back edge to the TOP)
+        BasicBlock* lpExit;   // if a single exit loop this is the EXIT (in most cases BOTTOM)
+
+        callInterf   lpAsgCall;     // "callInterf" for calls in the loop
+        ALLVARSET_TP lpAsgVars;     // set of vars assigned within the loop (all vars, not just tracked)
+        varRefKinds  lpAsgInds : 8; // set of inds modified within the loop
+
+        unsigned short lpFlags; // Mask of the LPFLG_* constants
+
+        unsigned char lpExitCnt; // number of exits from the loop
+
+        unsigned char lpParent;  // The index of the most-nested loop that completely contains this one,
+                                 // or else BasicBlock::NOT_IN_LOOP if no such loop exists.
+        unsigned char lpChild;   // The index of a nested loop, or else BasicBlock::NOT_IN_LOOP if no child exists.
+                                 // (Actually, an "immediately" nested loop --
+                                 // no other child of this loop is a parent of lpChild.)
+        unsigned char lpSibling; // The index of another loop that is an immediate child of lpParent,
+                                 // or else BasicBlock::NOT_IN_LOOP.  One can enumerate all the children of a loop
+                                 // by following "lpChild" then "lpSibling" links.
+
+#define LPFLG_DO_WHILE 0x0001 // it's a do-while loop (i.e ENTRY is at the TOP)
+#define LPFLG_ONE_EXIT 0x0002 // the loop has only one exit
+
+#define LPFLG_ITER 0x0004      // for (i = icon or lclVar; test_condition(); i++)
+#define LPFLG_HOISTABLE 0x0008 // the loop is in a form that is suitable for hoisting expressions
+#define LPFLG_CONST 0x0010     // for (i=icon;i<icon;i++){ ... } - constant loop
+
+#define LPFLG_VAR_INIT 0x0020   // iterator is initialized with a local var (var # found in lpVarInit)
+#define LPFLG_CONST_INIT 0x0040 // iterator is initialized with a constant (found in lpConstInit)
+
+#define LPFLG_VAR_LIMIT 0x0100    // iterator is compared with a local var (var # found in lpVarLimit)
+#define LPFLG_CONST_LIMIT 0x0200  // iterator is compared with a constant (found in lpConstLimit)
+#define LPFLG_ARRLEN_LIMIT 0x0400 // iterator is compared with a.len or a[i].len (found in lpArrLenLimit)
+
+#define LPFLG_HAS_PREHEAD 0x0800 // lpHead is known to be a preHead for this loop
+#define LPFLG_REMOVED 0x1000     // has been removed from the loop table (unrolled or optimized away)
+#define LPFLG_DONT_UNROLL 0x2000 // do not unroll this loop
+
+#define LPFLG_ASGVARS_YES 0x4000 // "lpAsgVars" has been  computed
+#define LPFLG_ASGVARS_INC 0x8000 // "lpAsgVars" is incomplete -- vars beyond those representable in an AllVarSet
+                                 // type are assigned to.
+
+        bool lpLoopHasHeapHavoc; // The loop contains an operation that we assume has arbitrary heap side effects.
+                                 // If this is set, the fields below may not be accurate (since they become irrelevant.)
+        bool lpContainsCall;     // True if executing the loop body *may* execute a call
+
+        VARSET_TP lpVarInOut;  // The set of variables that are IN or OUT during the execution of this loop
+        VARSET_TP lpVarUseDef; // The set of variables that are USE or DEF during the execution of this loop
+
+        int lpHoistedExprCount; // The register count for the non-FP expressions from inside this loop that have been
+                                // hoisted
+        int lpLoopVarCount;     // The register count for the non-FP LclVars that are read/written inside this loop
+        int lpVarInOutCount;    // The register count for the non-FP LclVars that are alive inside or accross this loop
+
+        int lpHoistedFPExprCount; // The register count for the FP expressions from inside this loop that have been
+                                  // hoisted
+        int lpLoopVarFPCount;     // The register count for the FP LclVars that are read/written inside this loop
+        int lpVarInOutFPCount;    // The register count for the FP LclVars that are alive inside or accross this loop
+
+        typedef SimplerHashTable<CORINFO_FIELD_HANDLE,
+                                 PtrKeyFuncs<struct CORINFO_FIELD_STRUCT_>,
+                                 bool,
+                                 JitSimplerHashBehavior>
+                        FieldHandleSet;
+        FieldHandleSet* lpFieldsModified; // This has entries (mappings to "true") for all static field and object
+                                          // instance fields modified
+                                          // in the loop.
+
+        typedef SimplerHashTable<CORINFO_CLASS_HANDLE,
+                                 PtrKeyFuncs<struct CORINFO_CLASS_STRUCT_>,
+                                 bool,
+                                 JitSimplerHashBehavior>
+                        ClassHandleSet;
+        ClassHandleSet* lpArrayElemTypesModified; // Bits set indicate the set of sz array element types such that
+                                                  // arrays of that type are modified
+                                                  // in the loop.
+
+        // Adds the variable liveness information for 'blk' to 'this' LoopDsc
+        void AddVariableLiveness(Compiler* comp, BasicBlock* blk);
+
+        inline void AddModifiedField(Compiler* comp, CORINFO_FIELD_HANDLE fldHnd);
+        // This doesn't *always* take a class handle -- it can also take primitive types, encoded as class handles
+        // (shifted left, with a low-order bit set to distinguish.)
+        // Use the {Encode/Decode}ElemType methods to construct/destruct these.
+        inline void AddModifiedElemType(Compiler* comp, CORINFO_CLASS_HANDLE structHnd);
+
+        /* The following values are set only for iterator loops, i.e. has the flag LPFLG_ITER set */
+
+        GenTreePtr lpIterTree;    // The "i <op>= const" tree
+        unsigned   lpIterVar();   // iterator variable #
+        int        lpIterConst(); // the constant with which the iterator is incremented
+        genTreeOps lpIterOper();  // the type of the operation on the iterator (ASG_ADD, ASG_SUB, etc.)
+        void       VERIFY_lpIterTree();
+
+        var_types lpIterOperType(); // For overflow instructions
+
+        union {
+            int lpConstInit; // initial constant value of iterator                           : Valid if LPFLG_CONST_INIT
+            unsigned lpVarInit; // initial local var number to which we initialize the iterator : Valid if
+                                // LPFLG_VAR_INIT
+        };
+
+        /* The following is for LPFLG_ITER loops only (i.e. the loop condition is "i RELOP const or var" */
+
+        GenTreePtr lpTestTree;   // pointer to the node containing the loop test
+        genTreeOps lpTestOper(); // the type of the comparison between the iterator and the limit (GT_LE, GT_GE, etc.)
+        void       VERIFY_lpTestTree();
+
+        bool       lpIsReversed(); // true if the iterator node is the second operand in the loop condition
+        GenTreePtr lpIterator();   // the iterator node in the loop test
+        GenTreePtr lpLimit();      // the limit node in the loop test
+
+        int lpConstLimit();    // limit   constant value of iterator - loop condition is "i RELOP const" : Valid if
+                               // LPFLG_CONST_LIMIT
+        unsigned lpVarLimit(); // the lclVar # in the loop condition ( "i RELOP lclVar" )                : Valid if
+                               // LPFLG_VAR_LIMIT
+        bool lpArrLenLimit(Compiler* comp, ArrIndex* index); // The array length in the loop condition ( "i RELOP
+                                                             // arr.len" or "i RELOP arr[i][j].len" )  : Valid if
+                                                             // LPFLG_ARRLEN_LIMIT
+
+        // Returns "true" iff "*this" contains the blk.
+        bool lpContains(BasicBlock* blk)
+        {
+            return lpFirst->bbNum <= blk->bbNum && blk->bbNum <= lpBottom->bbNum;
+        }
+        // Returns "true" iff "*this" (properly) contains the range [first, bottom] (allowing firsts
+        // to be equal, but requiring bottoms to be different.)
+        bool lpContains(BasicBlock* first, BasicBlock* bottom)
+        {
+            return lpFirst->bbNum <= first->bbNum && bottom->bbNum < lpBottom->bbNum;
+        }
+
+        // Returns "true" iff "*this" (properly) contains "lp2" (allowing firsts to be equal, but requiring
+        // bottoms to be different.)
+        bool lpContains(const LoopDsc& lp2)
+        {
+            return lpContains(lp2.lpFirst, lp2.lpBottom);
+        }
+
+        // Returns "true" iff "*this" is (properly) contained by the range [first, bottom]
+        // (allowing firsts to be equal, but requiring bottoms to be different.)
+        bool lpContainedBy(BasicBlock* first, BasicBlock* bottom)
+        {
+            return first->bbNum <= lpFirst->bbNum && lpBottom->bbNum < bottom->bbNum;
+        }
+
+        // Returns "true" iff "*this" is (properly) contained by "lp2"
+        // (allowing firsts to be equal, but requiring bottoms to be different.)
+        bool lpContainedBy(const LoopDsc& lp2)
+        {
+            return lpContains(lp2.lpFirst, lp2.lpBottom);
+        }
+
+        // Returns "true" iff "*this" is disjoint from the range [top, bottom].
+        bool lpDisjoint(BasicBlock* first, BasicBlock* bottom)
+        {
+            return bottom->bbNum < lpFirst->bbNum || lpBottom->bbNum < first->bbNum;
+        }
+        // Returns "true" iff "*this" is disjoint from "lp2".
+        bool lpDisjoint(const LoopDsc& lp2)
+        {
+            return lpDisjoint(lp2.lpFirst, lp2.lpBottom);
+        }
+        // Returns "true" iff the loop is well-formed (see code for defn).
+        bool lpWellFormed()
+        {
+            return lpFirst->bbNum <= lpTop->bbNum && lpTop->bbNum <= lpEntry->bbNum &&
+                   lpEntry->bbNum <= lpBottom->bbNum &&
+                   (lpHead->bbNum < lpTop->bbNum || lpHead->bbNum > lpBottom->bbNum);
+        }
+    };
+
+protected:
+    bool fgMightHaveLoop(); // returns true if there are any backedges
+    bool fgHasLoops;        // True if this method has any loops, set in fgComputeReachability
+
+public:
+    LoopDsc       optLoopTable[MAX_LOOP_NUM]; // loop descriptor table
+    unsigned char optLoopCount;               // number of tracked loops
+
+protected:
+    unsigned optCallCount;         // number of calls made in the method
+    unsigned optIndirectCallCount; // number of virtual, interface and indirect calls made in the method
+    unsigned optNativeCallCount;   // number of Pinvoke/Native calls made in the method
+    unsigned optLoopsCloned;       // number of loops cloned in the current method.
+
+#ifdef DEBUG
+    unsigned optFindLoopNumberFromBeginBlock(BasicBlock* begBlk);
+    void optPrintLoopInfo(unsigned      loopNum,
+                          BasicBlock*   lpHead,
+                          BasicBlock*   lpFirst,
+                          BasicBlock*   lpTop,
+                          BasicBlock*   lpEntry,
+                          BasicBlock*   lpBottom,
+                          unsigned char lpExitCnt,
+                          BasicBlock*   lpExit,
+                          unsigned      parentLoop = BasicBlock::NOT_IN_LOOP);
+    void optPrintLoopInfo(unsigned lnum);
+    void optPrintLoopRecording(unsigned lnum);
+
+    void optCheckPreds();
+#endif
+
+    void optSetBlockWeights();
+
+    void optMarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk, bool excludeEndBlk);
+
+    void optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk);
+
+    void optUpdateLoopsBeforeRemoveBlock(BasicBlock* block, bool skipUnmarkLoop = false);
+
+    bool optIsLoopTestEvalIntoTemp(GenTreePtr test, GenTreePtr* newTest);
+    unsigned optIsLoopIncrTree(GenTreePtr incr);
+    bool optCheckIterInLoopTest(unsigned loopInd, GenTreePtr test, BasicBlock* from, BasicBlock* to, unsigned iterVar);
+    bool optComputeIterInfo(GenTreePtr incr, BasicBlock* from, BasicBlock* to, unsigned* pIterVar);
+    bool optPopulateInitInfo(unsigned loopInd, GenTreePtr init, unsigned iterVar);
+    bool optExtractInitTestIncr(BasicBlock* head,
+                                BasicBlock* bottom,
+                                BasicBlock* exit,
+                                GenTreePtr* ppInit,
+                                GenTreePtr* ppTest,
+                                GenTreePtr* ppIncr);
+
+    void optRecordLoop(BasicBlock*   head,
+                       BasicBlock*   first,
+                       BasicBlock*   top,
+                       BasicBlock*   entry,
+                       BasicBlock*   bottom,
+                       BasicBlock*   exit,
+                       unsigned char exitCnt);
+
+    void optFindNaturalLoops();
+
+    // Ensures that all the loops in the loop nest rooted at "loopInd" (an index into the loop table) are 'canonical' --
+    // each loop has a unique "top."  Returns "true" iff the flowgraph has been modified.
+    bool optCanonicalizeLoopNest(unsigned char loopInd);
+
+    // Ensures that the loop "loopInd" (an index into the loop table) is 'canonical' -- it has a unique "top,"
+    // unshared with any other loop.  Returns "true" iff the flowgraph has been modified
+    bool optCanonicalizeLoop(unsigned char loopInd);
+
+    // Requires "l1" to be a valid loop table index, and not "BasicBlock::NOT_IN_LOOP".  Requires "l2" to be
+    // a valid loop table index, or else "BasicBlock::NOT_IN_LOOP".  Returns true
+    // iff "l2" is not NOT_IN_LOOP, and "l1" contains "l2".
+    bool optLoopContains(unsigned l1, unsigned l2);
+
+    // Requires "loopInd" to be a valid index into the loop table.
+    // Updates the loop table by changing loop "loopInd", whose head is required
+    // to be "from", to be "to".  Also performs this transformation for any
+    // loop nested in "loopInd" that shares the same head as "loopInd".
+    void optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to);
+
+    // Updates the successors of "blk": if "blk2" is a successor of "blk", and there is a mapping for "blk2->blk3" in
+    // "redirectMap", change "blk" so that "blk3" is this successor. Note that the predecessor lists are not updated.
+    void optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap);
+
+    // Marks the containsCall information to "lnum" and any parent loops.
+    void AddContainsCallAllContainingLoops(unsigned lnum);
+    // Adds the variable liveness information from 'blk' to "lnum" and any parent loops.
+    void AddVariableLivenessAllContainingLoops(unsigned lnum, BasicBlock* blk);
+    // Adds "fldHnd" to the set of modified fields of "lnum" and any parent loops.
+    void AddModifiedFieldAllContainingLoops(unsigned lnum, CORINFO_FIELD_HANDLE fldHnd);
+    // Adds "elemType" to the set of modified array element types of "lnum" and any parent loops.
+    void AddModifiedElemTypeAllContainingLoops(unsigned lnum, CORINFO_CLASS_HANDLE elemType);
+
+    // Requires that "from" and "to" have the same "bbJumpKind" (perhaps because "to" is a clone
+    // of "from".)  Copies the jump destination from "from" to "to".
+    void optCopyBlkDest(BasicBlock* from, BasicBlock* to);
+
+    // The depth of the loop described by "lnum" (an index into the loop table.) (0 == top level)
+    unsigned optLoopDepth(unsigned lnum)
+    {
+        unsigned par = optLoopTable[lnum].lpParent;
+        if (par == BasicBlock::NOT_IN_LOOP)
+        {
+            return 0;
+        }
+        else
+        {
+            return 1 + optLoopDepth(par);
+        }
+    }
+
+    void fgOptWhileLoop(BasicBlock* block);
+
+    bool optComputeLoopRep(int        constInit,
+                           int        constLimit,
+                           int        iterInc,
+                           genTreeOps iterOper,
+                           var_types  iterType,
+                           genTreeOps testOper,
+                           bool       unsignedTest,
+                           bool       dupCond,
+                           unsigned*  iterCount);
+#if FEATURE_STACK_FP_X87
+
+public:
+    VARSET_TP optAllFloatVars; // mask of all tracked      FP variables
+    VARSET_TP optAllFPregVars; // mask of all enregistered FP variables
+    VARSET_TP optAllNonFPvars; // mask of all tracked  non-FP variables
+#endif                         // FEATURE_STACK_FP_X87
+
+private:
+    static fgWalkPreFn optIsVarAssgCB;
+
+protected:
+    bool optIsVarAssigned(BasicBlock* beg, BasicBlock* end, GenTreePtr skip, unsigned var);
+
+    bool optIsVarAssgLoop(unsigned lnum, unsigned var);
+
+    int optIsSetAssgLoop(unsigned lnum, ALLVARSET_VALARG_TP vars, varRefKinds inds = VR_NONE);
+
+    bool optNarrowTree(GenTreePtr tree, var_types srct, var_types dstt, ValueNumPair vnpNarrow, bool doit);
+
+    /**************************************************************************
+     *                       Optimization conditions
+     *************************************************************************/
+
+    bool optFastCodeOrBlendedLoop(BasicBlock::weight_t bbWeight);
+    bool optPentium4(void);
+    bool optAvoidIncDec(BasicBlock::weight_t bbWeight);
+    bool optAvoidIntMult(void);
+
+#if FEATURE_ANYCSE
+
+protected:
+    //  The following is the upper limit on how many expressions we'll keep track
+    //  of for the CSE analysis.
+    //
+    static const unsigned MAX_CSE_CNT = EXPSET_SZ;
+
+    static const int MIN_CSE_COST = 2;
+
+    /* Generic list of nodes - used by the CSE logic */
+
+    struct treeLst
+    {
+        treeLst*   tlNext;
+        GenTreePtr tlTree;
+    };
+
+    typedef struct treeLst* treeLstPtr;
+
+    struct treeStmtLst
+    {
+        treeStmtLst* tslNext;
+        GenTreePtr   tslTree;  // tree node
+        GenTreePtr   tslStmt;  // statement containing the tree
+        BasicBlock*  tslBlock; // block containing the statement
+    };
+
+    typedef struct treeStmtLst* treeStmtLstPtr;
+
+    // The following logic keeps track of expressions via a simple hash table.
+
+    struct CSEdsc
+    {
+        CSEdsc* csdNextInBucket; // used by the hash table
+
+        unsigned csdHashValue; // the orginal hashkey
+
+        unsigned csdIndex;          // 1..optCSECandidateCount
+        char     csdLiveAcrossCall; // 0 or 1
+
+        unsigned short csdDefCount; // definition   count
+        unsigned short csdUseCount; // use          count  (excluding the implicit uses at defs)
+
+        unsigned csdDefWtCnt; // weighted def count
+        unsigned csdUseWtCnt; // weighted use count  (excluding the implicit uses at defs)
+
+        GenTreePtr  csdTree;  // treenode containing the 1st occurance
+        GenTreePtr  csdStmt;  // stmt containing the 1st occurance
+        BasicBlock* csdBlock; // block containing the 1st occurance
+
+        treeStmtLstPtr csdTreeList; // list of matching tree nodes: head
+        treeStmtLstPtr csdTreeLast; // list of matching tree nodes: tail
+    };
+
+    static const size_t s_optCSEhashSize;
+    CSEdsc**            optCSEhash;
+    CSEdsc**            optCSEtab;
+
+    void optCSEstop();
+
+    CSEdsc* optCSEfindDsc(unsigned index);
+    void optUnmarkCSE(GenTreePtr tree);
+
+    // user defined callback data for the tree walk function optCSE_MaskHelper()
+    struct optCSE_MaskData
+    {
+        EXPSET_TP CSE_defMask;
+        EXPSET_TP CSE_useMask;
+    };
+
+    // Treewalk helper for optCSE_DefMask and optCSE_UseMask
+    static fgWalkPreFn optCSE_MaskHelper;
+
+    // This function walks all the node for an given tree
+    // and return the mask of CSE definitions and uses for the tree
+    //
+    void optCSE_GetMaskData(GenTreePtr tree, optCSE_MaskData* pMaskData);
+
+    // Given a binary tree node return true if it is safe to swap the order of evaluation for op1 and op2.
+    bool optCSE_canSwap(GenTree* firstNode, GenTree* secondNode);
+    bool optCSE_canSwap(GenTree* tree);
+
+    static fgWalkPostFn optPropagateNonCSE;
+    static fgWalkPreFn  optHasNonCSEChild;
+
+    static fgWalkPreFn optUnmarkCSEs;
+
+    static int __cdecl optCSEcostCmpEx(const void* op1, const void* op2);
+    static int __cdecl optCSEcostCmpSz(const void* op1, const void* op2);
+
+    void optCleanupCSEs();
+
+#ifdef DEBUG
+    void optEnsureClearCSEInfo();
+#endif // DEBUG
+
+#endif // FEATURE_ANYCSE
+
+#if FEATURE_VALNUM_CSE
+    /**************************************************************************
+     *                   Value Number based CSEs
+     *************************************************************************/
+
+public:
+    void optOptimizeValnumCSEs();
+
+protected:
+    void     optValnumCSE_Init();
+    unsigned optValnumCSE_Index(GenTreePtr tree, GenTreePtr stmt);
+    unsigned optValnumCSE_Locate();
+    void     optValnumCSE_InitDataFlow();
+    void     optValnumCSE_DataFlow();
+    void     optValnumCSE_Availablity();
+    void     optValnumCSE_Heuristic();
+    void optValnumCSE_UnmarkCSEs(GenTreePtr deadTree, GenTreePtr keepList);
+
+#endif // FEATURE_VALNUM_CSE
+
+#if FEATURE_ANYCSE
+    bool     optDoCSE;             // True when we have found a duplicate CSE tree
+    bool     optValnumCSE_phase;   // True when we are executing the optValnumCSE_phase
+    unsigned optCSECandidateTotal; // Grand total of CSE candidates for both Lexical and ValNum
+    unsigned optCSECandidateCount; // Count of CSE's candidates, reset for Lexical and ValNum CSE's
+    unsigned optCSEstart;          // The first local variable number that is a CSE
+    unsigned optCSEcount;          // The total count of CSE's introduced.
+    unsigned optCSEweight;         // The weight of the current block when we are
+                                   // scanning for CSE expressions
+
+    bool optIsCSEcandidate(GenTreePtr tree);
+
+    // lclNumIsTrueCSE returns true if the LclVar was introduced by the CSE phase of the compiler
+    //
+    bool lclNumIsTrueCSE(unsigned lclNum) const
+    {
+        return ((optCSEcount > 0) && (lclNum >= optCSEstart) && (lclNum < optCSEstart + optCSEcount));
+    }
+
+    //  lclNumIsCSE returns true if the LclVar should be treated like a CSE with regards to constant prop.
+    //
+    bool lclNumIsCSE(unsigned lclNum) const
+    {
+        return lvaTable[lclNum].lvIsCSE;
+    }
+
+#ifdef DEBUG
+    bool optConfigDisableCSE();
+    bool optConfigDisableCSE2();
+#endif
+    void optOptimizeCSEs();
+
+#endif // FEATURE_ANYCSE
+
+    struct isVarAssgDsc
+    {
+        GenTreePtr ivaSkip;
+#ifdef DEBUG
+        void* ivaSelf;
+#endif
+        unsigned     ivaVar;            // Variable we are interested in, or -1
+        ALLVARSET_TP ivaMaskVal;        // Set of variables assigned to.  This is a set of all vars, not tracked vars.
+        bool         ivaMaskIncomplete; // Variables not representable in ivaMaskVal were assigned to.
+        varRefKinds  ivaMaskInd;        // What kind of indirect assignments are there?
+        callInterf   ivaMaskCall;       // What kind of calls are there?
+    };
+
+    static callInterf optCallInterf(GenTreePtr call);
+
+public:
+    // VN based copy propagation.
+    typedef ArrayStack<GenTreePtr> GenTreePtrStack;
+    typedef SimplerHashTable<unsigned, SmallPrimitiveKeyFuncs<unsigned>, GenTreePtrStack*, JitSimplerHashBehavior>
+        LclNumToGenTreePtrStack;
+
+    // Kill set to track variables with intervening definitions.
+    VARSET_TP optCopyPropKillSet;
+
+    // Copy propagation functions.
+    void optCopyProp(BasicBlock* block, GenTreePtr stmt, GenTreePtr tree, LclNumToGenTreePtrStack* curSsaName);
+    void optBlockCopyPropPopStacks(BasicBlock* block, LclNumToGenTreePtrStack* curSsaName);
+    void optBlockCopyProp(BasicBlock* block, LclNumToGenTreePtrStack* curSsaName);
+    bool optIsSsaLocal(GenTreePtr tree);
+    int optCopyProp_LclVarScore(LclVarDsc* lclVarDsc, LclVarDsc* copyVarDsc, bool preferOp2);
+    void optVnCopyProp();
+
+    /**************************************************************************
+    *               Early value propagation
+    *************************************************************************/
+    struct SSAName
+    {
+        unsigned m_lvNum;
+        unsigned m_ssaNum;
+
+        SSAName(unsigned lvNum, unsigned ssaNum) : m_lvNum(lvNum), m_ssaNum(ssaNum)
+        {
+        }
+
+        static unsigned GetHashCode(SSAName ssaNm)
+        {
+            return (ssaNm.m_lvNum << 16) | (ssaNm.m_ssaNum);
+        }
+
+        static bool Equals(SSAName ssaNm1, SSAName ssaNm2)
+        {
+            return (ssaNm1.m_lvNum == ssaNm2.m_lvNum) && (ssaNm1.m_ssaNum == ssaNm2.m_ssaNum);
+        }
+    };
+
+#define OMF_HAS_NEWARRAY 0x00000001  // Method contains 'new' of an array
+#define OMF_HAS_NEWOBJ 0x00000002    // Method contains 'new' of an object type.
+#define OMF_HAS_ARRAYREF 0x00000004  // Method contains array element loads or stores.
+#define OMF_HAS_VTABLEREF 0x00000008 // Method contains method table reference.
+#define OMF_HAS_NULLCHECK 0x00000010 // Method contains null check.
+
+    unsigned optMethodFlags;
+
+    // Recursion bound controls how far we can go backwards tracking for a SSA value.
+    // No throughput diff was found with backward walk bound between 3-8.
+    static const int optEarlyPropRecurBound = 5;
+
+    enum class optPropKind
+    {
+        OPK_INVALID,
+        OPK_ARRAYLEN,
+        OPK_OBJ_GETTYPE,
+        OPK_NULLCHECK
+    };
+
+    bool gtIsVtableRef(GenTreePtr tree);
+    GenTreePtr getArrayLengthFromAllocation(GenTreePtr tree);
+    GenTreePtr getObjectHandleNodeFromAllocation(GenTreePtr tree);
+    GenTreePtr optPropGetValueRec(unsigned lclNum, unsigned ssaNum, optPropKind valueKind, int walkDepth);
+    GenTreePtr optPropGetValue(unsigned lclNum, unsigned ssaNum, optPropKind valueKind);
+    bool optEarlyPropRewriteTree(GenTreePtr tree);
+    bool optDoEarlyPropForBlock(BasicBlock* block);
+    bool optDoEarlyPropForFunc();
+    void optEarlyProp();
+    void optFoldNullCheck(GenTreePtr tree);
+    bool optCanMoveNullCheckPastTree(GenTreePtr tree, bool isInsideTry);
+
+#if ASSERTION_PROP
+    /**************************************************************************
+     *               Value/Assertion propagation
+     *************************************************************************/
+public:
+    // Data structures for assertion prop
+    BitVecTraits* apTraits;
+    ASSERT_TP     apFull;
+    ASSERT_TP     apEmpty;
+
+    enum optAssertionKind
+    {
+        OAK_INVALID,
+        OAK_EQUAL,
+        OAK_NOT_EQUAL,
+        OAK_SUBRANGE,
+        OAK_NO_THROW,
+        OAK_COUNT
+    };
+
+    enum optOp1Kind
+    {
+        O1K_INVALID,
+        O1K_LCLVAR,
+        O1K_ARR_BND,
+        O1K_ARRLEN_OPER_BND,
+        O1K_ARRLEN_LOOP_BND,
+        O1K_CONSTANT_LOOP_BND,
+        O1K_EXACT_TYPE,
+        O1K_SUBTYPE,
+        O1K_VALUE_NUMBER,
+        O1K_COUNT
+    };
+
+    enum optOp2Kind
+    {
+        O2K_INVALID,
+        O2K_LCLVAR_COPY,
+        O2K_IND_CNS_INT,
+        O2K_CONST_INT,
+        O2K_CONST_LONG,
+        O2K_CONST_DOUBLE,
+        O2K_ARR_LEN,
+        O2K_SUBRANGE,
+        O2K_COUNT
+    };
+    struct AssertionDsc
+    {
+        optAssertionKind assertionKind;
+        struct SsaVar
+        {
+            unsigned lclNum; // assigned to or property of this local var number
+            unsigned ssaNum;
+        };
+        struct ArrBnd
+        {
+            ValueNum vnIdx;
+            ValueNum vnLen;
+        };
+        struct AssertionDscOp1
+        {
+            optOp1Kind kind; // a normal LclVar, or Exact-type or Subtype
+            ValueNum   vn;
+            union {
+                SsaVar lcl;
+                ArrBnd bnd;
+            };
+        } op1;
+        struct AssertionDscOp2
+        {
+            optOp2Kind kind; // a const or copy assignment
+            ValueNum   vn;
+            struct IntVal
+            {
+                ssize_t  iconVal;   // integer
+                unsigned iconFlags; // gtFlags
+            };
+            struct Range // integer subrange
+            {
+                ssize_t loBound;
+                ssize_t hiBound;
+            };
+            union {
+                SsaVar  lcl;
+                IntVal  u1;
+                __int64 lconVal;
+                double  dconVal;
+                Range   u2;
+            };
+        } op2;
+
+        bool IsArrLenArithBound()
+        {
+            return ((assertionKind == OAK_EQUAL || assertionKind == OAK_NOT_EQUAL) && op1.kind == O1K_ARRLEN_OPER_BND);
+        }
+        bool IsArrLenBound()
+        {
+            return ((assertionKind == OAK_EQUAL || assertionKind == OAK_NOT_EQUAL) && op1.kind == O1K_ARRLEN_LOOP_BND);
+        }
+        bool IsConstantBound()
+        {
+            return ((assertionKind == OAK_EQUAL || assertionKind == OAK_NOT_EQUAL) &&
+                    op1.kind == O1K_CONSTANT_LOOP_BND);
+        }
+        bool IsBoundsCheckNoThrow()
+        {
+            return ((assertionKind == OAK_NO_THROW) && (op1.kind == O1K_ARR_BND));
+        }
+
+        bool IsCopyAssertion()
+        {
+            return ((assertionKind == OAK_EQUAL) && (op1.kind == O1K_LCLVAR) && (op2.kind == O2K_LCLVAR_COPY));
+        }
+
+        static bool SameKind(AssertionDsc* a1, AssertionDsc* a2)
+        {
+            return a1->assertionKind == a2->assertionKind && a1->op1.kind == a2->op1.kind &&
+                   a1->op2.kind == a2->op2.kind;
+        }
+
+        static bool ComplementaryKind(optAssertionKind kind, optAssertionKind kind2)
+        {
+            if (kind == OAK_EQUAL)
+            {
+                return kind2 == OAK_NOT_EQUAL;
+            }
+            else if (kind == OAK_NOT_EQUAL)
+            {
+                return kind2 == OAK_EQUAL;
+            }
+            return false;
+        }
+
+        static ssize_t GetLowerBoundForIntegralType(var_types type)
+        {
+            switch (type)
+            {
+                case TYP_BYTE:
+                    return SCHAR_MIN;
+                case TYP_SHORT:
+                    return SHRT_MIN;
+                case TYP_INT:
+                    return INT_MIN;
+                case TYP_BOOL:
+                case TYP_UBYTE:
+                case TYP_CHAR:
+                case TYP_USHORT:
+                case TYP_UINT:
+                    return 0;
+                default:
+                    unreached();
+            }
+        }
+        static ssize_t GetUpperBoundForIntegralType(var_types type)
+        {
+            switch (type)
+            {
+                case TYP_BOOL:
+                    return 1;
+                case TYP_BYTE:
+                    return SCHAR_MAX;
+                case TYP_SHORT:
+                    return SHRT_MAX;
+                case TYP_INT:
+                    return INT_MAX;
+                case TYP_UBYTE:
+                    return UCHAR_MAX;
+                case TYP_CHAR:
+                case TYP_USHORT:
+                    return USHRT_MAX;
+                case TYP_UINT:
+                    return UINT_MAX;
+                default:
+                    unreached();
+            }
+        }
+
+        bool HasSameOp1(AssertionDsc* that, bool vnBased)
+        {
+            return (op1.kind == that->op1.kind) &&
+                   ((vnBased && (op1.vn == that->op1.vn)) || (!vnBased && (op1.lcl.lclNum == that->op1.lcl.lclNum)));
+        }
+
+        bool HasSameOp2(AssertionDsc* that, bool vnBased)
+        {
+            if (op2.kind != that->op2.kind)
+            {
+                return false;
+            }
+            switch (op2.kind)
+            {
+                case O2K_IND_CNS_INT:
+                case O2K_CONST_INT:
+                    return ((op2.u1.iconVal == that->op2.u1.iconVal) && (op2.u1.iconFlags == that->op2.u1.iconFlags));
+
+                case O2K_CONST_LONG:
+                    return (op2.lconVal == that->op2.lconVal);
+
+                case O2K_CONST_DOUBLE:
+                    // exact match because of positive and negative zero.
+                    return (memcmp(&op2.dconVal, &that->op2.dconVal, sizeof(double)) == 0);
+
+                case O2K_LCLVAR_COPY:
+                case O2K_ARR_LEN:
+                    return (op2.lcl.lclNum == that->op2.lcl.lclNum) &&
+                           (!vnBased || op2.lcl.ssaNum == that->op2.lcl.ssaNum);
+
+                case O2K_SUBRANGE:
+                    return ((op2.u2.loBound == that->op2.u2.loBound) && (op2.u2.hiBound == that->op2.u2.hiBound));
+
+                case O2K_INVALID:
+                    // we will return false
+                    break;
+
+                default:
+                    assert(!"Unexpected value for op2.kind in AssertionDsc.");
+                    break;
+            }
+            return false;
+        }
+
+        bool Complementary(AssertionDsc* that, bool vnBased)
+        {
+            return ComplementaryKind(assertionKind, that->assertionKind) && HasSameOp1(that, vnBased) &&
+                   HasSameOp2(that, vnBased);
+        }
+
+        bool Equals(AssertionDsc* that, bool vnBased)
+        {
+            return (assertionKind == that->assertionKind) && HasSameOp1(that, vnBased) && HasSameOp2(that, vnBased);
+        }
+    };
+
+    typedef unsigned short AssertionIndex;
+
+protected:
+    static fgWalkPreFn optAddCopiesCallback;
+    static fgWalkPreFn optVNAssertionPropCurStmtVisitor;
+    unsigned           optAddCopyLclNum;
+    GenTreePtr         optAddCopyAsgnNode;
+
+    bool optLocalAssertionProp;  // indicates that we are performing local assertion prop
+    bool optAssertionPropagated; // set to true if we modified the trees
+    bool optAssertionPropagatedCurrentStmt;
+#ifdef DEBUG
+    GenTreePtr optAssertionPropCurrentTree;
+#endif
+    AssertionIndex*         optComplementaryAssertionMap;
+    ExpandArray<ASSERT_TP>* optAssertionDep; // table that holds dependent assertions (assertions
+                                             // using the value of a local var) for each local var
+    AssertionDsc*  optAssertionTabPrivate;   // table that holds info about value assignments
+    AssertionIndex optAssertionCount;        // total number of assertions in the assertion table
+    AssertionIndex optMaxAssertionCount;
+
+public:
+    void optVnNonNullPropCurStmt(BasicBlock* block, GenTreePtr stmt, GenTreePtr tree);
+    fgWalkResult optVNConstantPropCurStmt(BasicBlock* block, GenTreePtr stmt, GenTreePtr tree);
+    GenTreePtr optVNConstantPropOnRelOp(GenTreePtr tree);
+    GenTreePtr optVNConstantPropOnJTrue(BasicBlock* block, GenTreePtr stmt, GenTreePtr test);
+    GenTreePtr optVNConstantPropOnTree(BasicBlock* block, GenTreePtr stmt, GenTreePtr tree);
+    GenTreePtr optPrepareTreeForReplacement(GenTreePtr extractTree, GenTreePtr replaceTree);
+
+    AssertionIndex GetAssertionCount()
+    {
+        return optAssertionCount;
+    }
+    ASSERT_TP* bbJtrueAssertionOut;
+    typedef SimplerHashTable<ValueNum, SmallPrimitiveKeyFuncs<ValueNum>, ASSERT_TP, JitSimplerHashBehavior>
+                          ValueNumToAssertsMap;
+    ValueNumToAssertsMap* optValueNumToAsserts;
+
+    static const AssertionIndex NO_ASSERTION_INDEX = 0;
+
+    // Assertion prop helpers.
+    ASSERT_TP& GetAssertionDep(unsigned lclNum);
+    AssertionDsc* optGetAssertion(AssertionIndex assertIndex);
+    void optAssertionInit(bool isLocalProp);
+    void optAssertionTraitsInit(AssertionIndex assertionCount);
+#if LOCAL_ASSERTION_PROP
+    void optAssertionReset(AssertionIndex limit);
+    void optAssertionRemove(AssertionIndex index);
+#endif
+
+    // Assertion prop data flow functions.
+    void       optAssertionPropMain();
+    GenTreePtr optVNAssertionPropCurStmt(BasicBlock* block, GenTreePtr stmt);
+    bool optIsTreeKnownIntValue(bool vnBased, GenTreePtr tree, ssize_t* pConstant, unsigned* pIconFlags);
+    ASSERT_TP* optInitAssertionDataflowFlags();
+    ASSERT_TP* optComputeAssertionGen();
+
+    // Assertion Gen functions.
+    void optAssertionGen(GenTreePtr tree);
+    AssertionIndex optAssertionGenPhiDefn(GenTreePtr tree);
+    AssertionIndex optCreateJTrueBoundsAssertion(GenTreePtr tree);
+    AssertionIndex optAssertionGenJtrue(GenTreePtr tree);
+    AssertionIndex optCreateJtrueAssertions(GenTreePtr op1, GenTreePtr op2, Compiler::optAssertionKind assertionKind);
+    AssertionIndex optFindComplementary(AssertionIndex assertionIndex);
+    void optMapComplementary(AssertionIndex assertionIndex, AssertionIndex index);
+
+    // Assertion creation functions.
+    AssertionIndex optCreateAssertion(GenTreePtr op1, GenTreePtr op2, optAssertionKind assertionKind);
+    AssertionIndex optCreateAssertion(GenTreePtr       op1,
+                                      GenTreePtr       op2,
+                                      optAssertionKind assertionKind,
+                                      AssertionDsc*    assertion);
+    void optCreateComplementaryAssertion(AssertionIndex assertionIndex, GenTreePtr op1, GenTreePtr op2);
+
+    bool optAssertionVnInvolvesNan(AssertionDsc* assertion);
+    AssertionIndex optAddAssertion(AssertionDsc* assertion);
+    void optAddVnAssertionMapping(ValueNum vn, AssertionIndex index);
+#ifdef DEBUG
+    void optPrintVnAssertionMapping();
+#endif
+    ASSERT_TP optGetVnMappedAssertions(ValueNum vn);
+
+    // Used for respective assertion propagations.
+    AssertionIndex optAssertionIsSubrange(GenTreePtr tree, var_types toType, ASSERT_VALARG_TP assertions);
+    AssertionIndex optAssertionIsSubtype(GenTreePtr tree, GenTreePtr methodTableArg, ASSERT_VALARG_TP assertions);
+    AssertionIndex optAssertionIsNonNullInternal(GenTreePtr op, ASSERT_VALARG_TP assertions);
+    bool optAssertionIsNonNull(GenTreePtr       op,
+                               ASSERT_VALARG_TP assertions DEBUGARG(bool* pVnBased) DEBUGARG(AssertionIndex* pIndex));
+
+    // Used for Relop propagation.
+    AssertionIndex optGlobalAssertionIsEqualOrNotEqual(ASSERT_VALARG_TP assertions, GenTreePtr op1, GenTreePtr op2);
+    AssertionIndex optLocalAssertionIsEqualOrNotEqual(
+        optOp1Kind op1Kind, unsigned lclNum, optOp2Kind op2Kind, ssize_t cnsVal, ASSERT_VALARG_TP assertions);
+
+    // Assertion prop for lcl var functions.
+    bool optAssertionProp_LclVarTypeCheck(GenTreePtr tree, LclVarDsc* lclVarDsc, LclVarDsc* copyVarDsc);
+    GenTreePtr optCopyAssertionProp(AssertionDsc* curAssertion,
+                                    GenTreePtr    tree,
+                                    GenTreePtr stmt DEBUGARG(AssertionIndex index));
+    GenTreePtr optConstantAssertionProp(AssertionDsc*    curAssertion,
+                                        const GenTreePtr tree,
+                                        const GenTreePtr stmt DEBUGARG(AssertionIndex index));
+    GenTreePtr optVnConstantAssertionProp(const GenTreePtr tree, const GenTreePtr stmt);
+
+    // Assertion propagation functions.
+    GenTreePtr optAssertionProp(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt);
+    GenTreePtr optAssertionProp_LclVar(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt);
+    GenTreePtr optAssertionProp_Ind(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt);
+    GenTreePtr optAssertionProp_Cast(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt);
+    GenTreePtr optAssertionProp_Call(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt);
+    GenTreePtr optAssertionProp_RelOp(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt);
+    GenTreePtr optAssertionProp_Comma(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt);
+    GenTreePtr optAssertionProp_BndsChk(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt);
+    GenTreePtr optAssertionPropGlobal_RelOp(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt);
+    GenTreePtr optAssertionPropLocal_RelOp(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt);
+    GenTreePtr optAssertionProp_Update(const GenTreePtr newTree, const GenTreePtr tree, const GenTreePtr stmt);
+    GenTreePtr optNonNullAssertionProp_Call(ASSERT_VALARG_TP assertions, const GenTreePtr tree, const GenTreePtr stmt);
+
+    // Implied assertion functions.
+    void optImpliedAssertions(AssertionIndex assertionIndex, ASSERT_TP& activeAssertions);
+    void optImpliedByTypeOfAssertions(ASSERT_TP& activeAssertions);
+    void optImpliedByCopyAssertion(AssertionDsc* copyAssertion, AssertionDsc* depAssertion, ASSERT_TP& result);
+    void optImpliedByConstAssertion(AssertionDsc* curAssertion, ASSERT_TP& result);
+
+    ASSERT_VALRET_TP optNewFullAssertSet();
+    ASSERT_VALRET_TP optNewEmptyAssertSet();
+
+#ifdef DEBUG
+    void optPrintAssertion(AssertionDsc* newAssertion, AssertionIndex assertionIndex = 0);
+    void optDebugCheckAssertion(AssertionDsc* assertion);
+    void optDebugCheckAssertions(AssertionIndex AssertionIndex);
+#endif
+    void optAddCopies();
+#endif // ASSERTION_PROP
+
+    /**************************************************************************
+     *                          Range checks
+     *************************************************************************/
+
+public:
+    struct LoopCloneVisitorInfo
+    {
+        LoopCloneContext* context;
+        unsigned          loopNum;
+        GenTreePtr        stmt;
+        LoopCloneVisitorInfo(LoopCloneContext* context, unsigned loopNum, GenTreePtr stmt)
+            : context(context), loopNum(loopNum), stmt(nullptr)
+        {
+        }
+    };
+
+    bool optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum);
+    bool optExtractArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum);
+    bool optReconstructArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum);
+    bool optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context);
+    static fgWalkPreFn optCanOptimizeByLoopCloningVisitor;
+    fgWalkResult optCanOptimizeByLoopCloning(GenTreePtr tree, LoopCloneVisitorInfo* info);
+    void optObtainLoopCloningOpts(LoopCloneContext* context);
+    bool optIsLoopClonable(unsigned loopInd);
+
+    bool optCanCloneLoops();
+
+#ifdef DEBUG
+    void optDebugLogLoopCloning(BasicBlock* block, GenTreePtr insertBefore);
+#endif
+    void optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool fastPath));
+    bool optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context);
+    bool optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context);
+    BasicBlock* optInsertLoopChoiceConditions(LoopCloneContext* context,
+                                              unsigned          loopNum,
+                                              BasicBlock*       head,
+                                              BasicBlock*       slow);
+    void optInsertLoopCloningStress(BasicBlock* head);
+
+#if COUNT_RANGECHECKS
+    static unsigned optRangeChkRmv;
+    static unsigned optRangeChkAll;
+#endif
+
+protected:
+    struct arraySizes
+    {
+        unsigned arrayVar;
+        int      arrayDim;
+
+#define MAX_ARRAYS 4 // a magic max number of arrays tracked for bounds check elimination
+    };
+
+    struct RngChkDsc
+    {
+        RngChkDsc* rcdNextInBucket; // used by the hash table
+
+        unsigned short rcdHashValue; // to make matching faster
+        unsigned short rcdIndex;     // 0..optRngChkCount-1
+
+        GenTreePtr rcdTree; // the array index tree
+    };
+
+    unsigned            optRngChkCount;
+    static const size_t optRngChkHashSize;
+
+    ssize_t optGetArrayRefScaleAndIndex(GenTreePtr mul, GenTreePtr* pIndex DEBUGARG(bool bRngChk));
+    GenTreePtr optFindLocalInit(BasicBlock* block, GenTreePtr local, VARSET_TP* pKilledInOut, bool* isKilledAfterInit);
+
+#if FANCY_ARRAY_OPT
+    bool optIsNoMore(GenTreePtr op1, GenTreePtr op2, int add1 = 0, int add2 = 0);
+#endif
+
+    bool optReachWithoutCall(BasicBlock* srcBB, BasicBlock* dstBB);
+
+protected:
+    bool optLoopsMarked;
+
+    /*
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XX                                                                           XX
+    XX                           RegAlloc                                        XX
+    XX                                                                           XX
+    XX  Does the register allocation and puts the remaining lclVars on the stack XX
+    XX                                                                           XX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    */
+
+public:
+#ifndef LEGACY_BACKEND
+    bool doLSRA() const
+    {
+        return true;
+    }
+#else  // LEGACY_BACKEND
+    bool doLSRA() const
+    {
+        return false;
+    }
+#endif // LEGACY_BACKEND
+
+#ifdef LEGACY_BACKEND
+    void raInit();
+    void raAssignVars(); // register allocation
+#endif                   // LEGACY_BACKEND
+
+    VARSET_TP raRegVarsMask; // Set of all enregistered variables (not including FEATURE_STACK_FP_X87 enregistered
+                             // variables)
+    regNumber raUpdateRegStateForArg(RegState* regState, LclVarDsc* argDsc);
+
+    void raMarkStkVars();
+
+protected:
+    // Some things are used by both LSRA and regpredict allocators.
+
+    FrameType rpFrameType;
+    bool      rpMustCreateEBPCalled; // Set to true after we have called rpMustCreateEBPFrame once
+
+#ifdef LEGACY_BACKEND
+    regMaskTP rpMaskPInvokeEpilogIntf; // pinvoke epilog trashes esi/edi holding stack args needed to setup tail call's
+                                       // args
+#endif                                 // LEGACY_BACKEND
+
+    bool rpMustCreateEBPFrame(INDEBUG(const char** wbReason));
+
+#if FEATURE_FP_REGALLOC
+    enum enumConfigRegisterFP
+    {
+        CONFIG_REGISTER_FP_NONE         = 0x0,
+        CONFIG_REGISTER_FP_CALLEE_TRASH = 0x1,
+        CONFIG_REGISTER_FP_CALLEE_SAVED = 0x2,
+        CONFIG_REGISTER_FP_FULL         = 0x3,
+    };
+    enumConfigRegisterFP raConfigRegisterFP();
+#endif // FEATURE_FP_REGALLOC
+
+public:
+    regMaskTP raConfigRestrictMaskFP();
+
+private:
+#ifndef LEGACY_BACKEND
+    LinearScanInterface* m_pLinearScan; // Linear Scan allocator
+#else                                   // LEGACY_BACKEND
+    unsigned  raAvoidArgRegMask;       // Mask of incoming argument registers that we may need to avoid
+    VARSET_TP raLclRegIntf[REG_COUNT]; // variable to register interference graph
+    bool      raNewBlocks;             // True is we added killing blocks for FPU registers
+    unsigned  rpPasses;                // Number of passes made by the register predicter
+    unsigned  rpPassesMax;             // Maximum number of passes made by the register predicter
+    unsigned  rpPassesPessimize;       // Number of passes non-pessimizing made by the register predicter
+    unsigned rpStkPredict; // Weighted count of variables were predicted STK (lower means register allocation is better)
+    unsigned rpPredictSpillCnt;     // Predicted number of integer spill tmps for the current tree
+    regMaskTP rpPredictAssignMask;  // Mask of registers to consider in rpPredictAssignRegVars()
+    VARSET_TP rpLastUseVars;        // Set of last use variables in rpPredictTreeRegUse
+    VARSET_TP rpUseInPlace;         // Set of variables that we used in place
+    int       rpAsgVarNum;          // VarNum for the target of GT_ASG node
+    bool      rpPredictAssignAgain; // Must rerun the rpPredictAssignRegVars()
+    bool      rpAddedVarIntf;       // Set to true if we need to add a new var intf
+    bool      rpLostEnreg;          // Set to true if we lost an enregister var that had lvDependReg set
+    bool      rpReverseEBPenreg;    // Decided to reverse the enregistration of EBP
+public:
+    bool rpRegAllocDone; // Set to true after we have completed register allocation
+private:
+    regMaskTP rpPredictMap[PREDICT_COUNT]; // Holds the regMaskTP for each of the enum values
+
+    void raSetupArgMasks(RegState* r);
+
+    const regNumber* raGetRegVarOrder(var_types regType, unsigned* wbVarOrderSize);
+#ifdef DEBUG
+    void raDumpVarIntf(); // Dump the variable to variable interference graph
+    void raDumpRegIntf(); // Dump the variable to register interference graph
+#endif
+    void raAdjustVarIntf();
+
+    regMaskTP rpPredictRegMask(rpPredictReg predictReg, var_types type);
+
+    bool rpRecordRegIntf(regMaskTP regMask, VARSET_VALARG_TP life DEBUGARG(const char* msg));
+
+    bool rpRecordVarIntf(unsigned varNum, VARSET_VALARG_TP intfVar DEBUGARG(const char* msg));
+    regMaskTP rpPredictRegPick(var_types type, rpPredictReg predictReg, regMaskTP lockedRegs);
+
+    regMaskTP rpPredictGrabReg(var_types type, rpPredictReg predictReg, regMaskTP lockedRegs);
+
+    static fgWalkPreFn rpMarkRegIntf;
+
+    regMaskTP rpPredictAddressMode(
+        GenTreePtr tree, var_types type, regMaskTP lockedRegs, regMaskTP rsvdRegs, GenTreePtr lenCSE);
+
+    void rpPredictRefAssign(unsigned lclNum);
+
+    regMaskTP rpPredictBlkAsgRegUse(GenTreePtr tree, rpPredictReg predictReg, regMaskTP lockedRegs, regMaskTP rsvdRegs);
+
+    regMaskTP rpPredictTreeRegUse(GenTreePtr tree, rpPredictReg predictReg, regMaskTP lockedRegs, regMaskTP rsvdRegs);
+
+    regMaskTP rpPredictAssignRegVars(regMaskTP regAvail);
+
+    void rpPredictRegUse(); // Entry point
+
+    unsigned raPredictTreeRegUse(GenTreePtr tree);
+    unsigned raPredictListRegUse(GenTreePtr list);
+
+    void raSetRegVarOrder(var_types  regType,
+                          regNumber* customVarOrder,
+                          unsigned*  customVarOrderSize,
+                          regMaskTP  prefReg,
+                          regMaskTP  avoidReg);
+
+    // We use (unsigned)-1 as an uninitialized sentinel for rpStkPredict and
+    // also as the maximum value of lvRefCntWtd. Don't allow overflow, and
+    // saturate at UINT_MAX - 1, to avoid using the sentinel.
+    void raAddToStkPredict(unsigned val)
+    {
+        unsigned newStkPredict = rpStkPredict + val;
+        if ((newStkPredict < rpStkPredict) || (newStkPredict == UINT_MAX))
+            rpStkPredict = UINT_MAX - 1;
+        else
+            rpStkPredict = newStkPredict;
+    }
+
+#ifdef DEBUG
+#if !FEATURE_FP_REGALLOC
+    void raDispFPlifeInfo();
+#endif
+#endif
+
+    regMaskTP genReturnRegForTree(GenTreePtr tree);
+#endif // LEGACY_BACKEND
+
+    /* raIsVarargsStackArg is called by raMaskStkVars and by
+       lvaSortByRefCount.  It identifies the special case
+       where a varargs function has a parameter passed on the
+       stack, other than the special varargs handle.  Such parameters
+       require special treatment, because they cannot be tracked
+       by the GC (their offsets in the stack are not known
+       at compile time).
+    */
+
+    bool raIsVarargsStackArg(unsigned lclNum)
+    {
+#ifdef _TARGET_X86_
+
+        LclVarDsc* varDsc = &lvaTable[lclNum];
+
+        assert(varDsc->lvIsParam);
+
+        return (info.compIsVarArgs && !varDsc->lvIsRegArg && (lclNum != lvaVarargsHandleArg));
+
+#else // _TARGET_X86_
+
+        return false;
+
+#endif // _TARGET_X86_
+    }
+
+#ifdef LEGACY_BACKEND
+    // Records the current prediction, if it's better than any previous recorded prediction.
+    void rpRecordPrediction();
+    // Applies the best recorded prediction, if one exists and is better than the current prediction.
+    void rpUseRecordedPredictionIfBetter();
+
+    // Data members used in the methods above.
+    unsigned rpBestRecordedStkPredict;
+    struct VarRegPrediction
+    {
+        bool           m_isEnregistered;
+        regNumberSmall m_regNum;
+        regNumberSmall m_otherReg;
+    };
+    VarRegPrediction* rpBestRecordedPrediction;
+#endif // LEGACY_BACKEND
+
+    /*
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XX                                                                           XX
+    XX                           EEInterface                                     XX
+    XX                                                                           XX
+    XX   Get to the class and method info from the Execution Engine given        XX
+    XX   tokens for the class and method                                         XX
+    XX                                                                           XX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    */
+
+public:
+    /* These are the different addressing modes used to access a local var.
+     * The JIT has to report the location of the locals back to the EE
+     * for debugging purposes.
+     */
+
+    enum siVarLocType
+    {
+        VLT_REG,
+        VLT_REG_BYREF, // this type is currently only used for value types on X64
+        VLT_REG_FP,
+        VLT_STK,
+        VLT_STK_BYREF, // this type is currently only used for value types on X64
+        VLT_REG_REG,
+        VLT_REG_STK,
+        VLT_STK_REG,
+        VLT_STK2,
+        VLT_FPSTK,
+        VLT_FIXED_VA,
+
+        VLT_COUNT,
+        VLT_INVALID
+    };
+
+    struct siVarLoc
+    {
+        siVarLocType vlType;
+
+        union {
+            // VLT_REG/VLT_REG_FP -- Any pointer-sized enregistered value (TYP_INT, TYP_REF, etc)
+            // eg. EAX
+            // VLT_REG_BYREF -- the specified register contains the address of the variable
+            // eg. [EAX]
+
+            struct
+            {
+                regNumber vlrReg;
+            } vlReg;
+
+            // VLT_STK       -- Any 32 bit value which is on the stack
+            // eg. [ESP+0x20], or [EBP-0x28]
+            // VLT_STK_BYREF -- the specified stack location contains the address of the variable
+            // eg. mov EAX, [ESP+0x20]; [EAX]
+
+            struct
+            {
+                regNumber     vlsBaseReg;
+                NATIVE_OFFSET vlsOffset;
+            } vlStk;
+
+            // VLT_REG_REG -- TYP_LONG/TYP_DOUBLE with both DWords enregistered
+            // eg. RBM_EAXEDX
+
+            struct
+            {
+                regNumber vlrrReg1;
+                regNumber vlrrReg2;
+            } vlRegReg;
+
+            // VLT_REG_STK -- Partly enregistered TYP_LONG/TYP_DOUBLE
+            // eg { LowerDWord=EAX UpperDWord=[ESP+0x8] }
+
+            struct
+            {
+                regNumber vlrsReg;
+
+                struct
+                {
+                    regNumber     vlrssBaseReg;
+                    NATIVE_OFFSET vlrssOffset;
+                } vlrsStk;
+            } vlRegStk;
+
+            // VLT_STK_REG -- Partly enregistered TYP_LONG/TYP_DOUBLE
+            // eg { LowerDWord=[ESP+0x8] UpperDWord=EAX }
+
+            struct
+            {
+                struct
+                {
+                    regNumber     vlsrsBaseReg;
+                    NATIVE_OFFSET vlsrsOffset;
+                } vlsrStk;
+
+                regNumber vlsrReg;
+            } vlStkReg;
+
+            // VLT_STK2 -- Any 64 bit value which is on the stack, in 2 successsive DWords
+            // eg 2 DWords at [ESP+0x10]
+
+            struct
+            {
+                regNumber     vls2BaseReg;
+                NATIVE_OFFSET vls2Offset;
+            } vlStk2;
+
+            // VLT_FPSTK -- enregisterd TYP_DOUBLE (on the FP stack)
+            // eg. ST(3). Actually it is ST("FPstkHeight - vpFpStk")
+
+            struct
+            {
+                unsigned vlfReg;
+            } vlFPstk;
+
+            // VLT_FIXED_VA -- fixed argument of a varargs function.
+            // The argument location depends on the size of the variable
+            // arguments (...). Inspecting the VARARGS_HANDLE indicates the
+            // location of the first arg. This argument can then be accessed
+            // relative to the position of the first arg
+
+            struct
+            {
+                unsigned vlfvOffset;
+            } vlFixedVarArg;
+
+            // VLT_MEMORY
+
+            struct
+            {
+                void* rpValue; // pointer to the in-process
+                               // location of the value.
+            } vlMemory;
+        };
+
+        // Helper functions
+
+        bool vlIsInReg(regNumber reg);
+        bool vlIsOnStk(regNumber reg, signed offset);
+    };
+
+    /*************************************************************************/
+
+public:
+    // Get handles
+
+    void eeGetCallInfo(CORINFO_RESOLVED_TOKEN* pResolvedToken,
+                       CORINFO_RESOLVED_TOKEN* pConstrainedToken,
+                       CORINFO_CALLINFO_FLAGS  flags,
+                       CORINFO_CALL_INFO*      pResult);
+    inline CORINFO_CALLINFO_FLAGS addVerifyFlag(CORINFO_CALLINFO_FLAGS flags);
+
+    void eeGetFieldInfo(CORINFO_RESOLVED_TOKEN* pResolvedToken,
+                        CORINFO_ACCESS_FLAGS    flags,
+                        CORINFO_FIELD_INFO*     pResult);
+
+    // Get the flags
+
+    BOOL eeIsValueClass(CORINFO_CLASS_HANDLE clsHnd);
+
+#if defined(DEBUG) || defined(FEATURE_JIT_METHOD_PERF) || defined(FEATURE_SIMD)
+
+    bool IsSuperPMIException(unsigned code)
+    {
+        // Copied from NDP\clr\src\ToolBox\SuperPMI\SuperPMI-Shared\ErrorHandling.h
+
+        const unsigned EXCEPTIONCODE_DebugBreakorAV = 0xe0421000;
+        const unsigned EXCEPTIONCODE_MC             = 0xe0422000;
+        const unsigned EXCEPTIONCODE_LWM            = 0xe0423000;
+        const unsigned EXCEPTIONCODE_SASM           = 0xe0424000;
+        const unsigned EXCEPTIONCODE_SSYM           = 0xe0425000;
+        const unsigned EXCEPTIONCODE_CALLUTILS      = 0xe0426000;
+        const unsigned EXCEPTIONCODE_TYPEUTILS      = 0xe0427000;
+        const unsigned EXCEPTIONCODE_ASSERT         = 0xe0440000;
+
+        switch (code)
+        {
+            case EXCEPTIONCODE_DebugBreakorAV:
+            case EXCEPTIONCODE_MC:
+            case EXCEPTIONCODE_LWM:
+            case EXCEPTIONCODE_SASM:
+            case EXCEPTIONCODE_SSYM:
+            case EXCEPTIONCODE_CALLUTILS:
+            case EXCEPTIONCODE_TYPEUTILS:
+            case EXCEPTIONCODE_ASSERT:
+                return true;
+            default:
+                return false;
+        }
+    }
+
+    const char* eeGetMethodName(CORINFO_METHOD_HANDLE hnd, const char** className);
+    const char* eeGetMethodFullName(CORINFO_METHOD_HANDLE hnd);
+
+    bool eeIsNativeMethod(CORINFO_METHOD_HANDLE method);
+    CORINFO_METHOD_HANDLE eeGetMethodHandleForNative(CORINFO_METHOD_HANDLE method);
+#endif
+
+    var_types eeGetArgType(CORINFO_ARG_LIST_HANDLE list, CORINFO_SIG_INFO* sig);
+    var_types eeGetArgType(CORINFO_ARG_LIST_HANDLE list, CORINFO_SIG_INFO* sig, bool* isPinned);
+    unsigned eeGetArgSize(CORINFO_ARG_LIST_HANDLE list, CORINFO_SIG_INFO* sig);
+
+    // VOM info, method sigs
+
+    void eeGetSig(unsigned               sigTok,
+                  CORINFO_MODULE_HANDLE  scope,
+                  CORINFO_CONTEXT_HANDLE context,
+                  CORINFO_SIG_INFO*      retSig);
+
+    void eeGetCallSiteSig(unsigned               sigTok,
+                          CORINFO_MODULE_HANDLE  scope,
+                          CORINFO_CONTEXT_HANDLE context,
+                          CORINFO_SIG_INFO*      retSig);
+
+    void eeGetMethodSig(CORINFO_METHOD_HANDLE methHnd, CORINFO_SIG_INFO* retSig, CORINFO_CLASS_HANDLE owner = nullptr);
+
+    // Method entry-points, instrs
+
+    void* eeGetFieldAddress(CORINFO_FIELD_HANDLE handle, void*** ppIndir);
+
+    CORINFO_METHOD_HANDLE eeMarkNativeTarget(CORINFO_METHOD_HANDLE method);
+
+    CORINFO_EE_INFO eeInfo;
+    bool            eeInfoInitialized;
+
+    CORINFO_EE_INFO* eeGetEEInfo();
+
+    // Gets the offset of a SDArray's first element
+    unsigned eeGetArrayDataOffset(var_types type);
+    // Gets the offset of a MDArray's first element
+    unsigned eeGetMDArrayDataOffset(var_types type, unsigned rank);
+
+    GenTreePtr eeGetPInvokeCookie(CORINFO_SIG_INFO* szMetaSig);
+
+    // Returns the page size for the target machine as reported by the EE.
+    inline size_t eeGetPageSize()
+    {
+#if COR_JIT_EE_VERSION > 460
+        return eeGetEEInfo()->osPageSize;
+#else  // COR_JIT_EE_VERSION <= 460
+        return CORINFO_PAGE_SIZE;
+#endif // COR_JIT_EE_VERSION > 460
+    }
+
+    // Returns the frame size at which we will generate a loop to probe the stack.
+    inline size_t getVeryLargeFrameSize()
+    {
+#ifdef _TARGET_ARM_
+        // The looping probe code is 40 bytes, whereas the straight-line probing for
+        // the (0x2000..0x3000) case is 44, so use looping for anything 0x2000 bytes
+        // or greater, to generate smaller code.
+        return 2 * eeGetPageSize();
+#else
+        return 3 * eeGetPageSize();
+#endif
+    }
+
+    inline bool generateCFIUnwindCodes()
+    {
+#if COR_JIT_EE_VERSION > 460 && defined(UNIX_AMD64_ABI)
+        return eeGetEEInfo()->targetAbi == CORINFO_CORERT_ABI;
+#else
+        return false;
+#endif
+    }
+
+    // Exceptions
+
+    unsigned eeGetEHcount(CORINFO_METHOD_HANDLE handle);
+
+    // Debugging support - Line number info
+
+    void eeGetStmtOffsets();
+
+    unsigned eeBoundariesCount;
+
+    struct boundariesDsc
+    {
+        UNATIVE_OFFSET nativeIP;
+        IL_OFFSET      ilOffset;
+        unsigned       sourceReason;
+    } * eeBoundaries; // Boundaries to report to EE
+    void eeSetLIcount(unsigned count);
+    void eeSetLIinfo(unsigned which, UNATIVE_OFFSET offs, unsigned srcIP, bool stkEmpty, bool callInstruction);
+    void eeSetLIdone();
+
+#ifdef DEBUG
+    static void eeDispILOffs(IL_OFFSET offs);
+    static void eeDispLineInfo(const boundariesDsc* line);
+    void eeDispLineInfos();
+#endif // DEBUG
+
+    // Debugging support - Local var info
+
+    void eeGetVars();
+
+    unsigned eeVarsCount;
+
+    struct VarResultInfo
+    {
+        UNATIVE_OFFSET startOffset;
+        UNATIVE_OFFSET endOffset;
+        DWORD          varNumber;
+        siVarLoc       loc;
+    } * eeVars;
+    void eeSetLVcount(unsigned count);
+    void eeSetLVinfo(unsigned        which,
+                     UNATIVE_OFFSET  startOffs,
+                     UNATIVE_OFFSET  length,
+                     unsigned        varNum,
+                     unsigned        LVnum,
+                     VarName         namex,
+                     bool            avail,
+                     const siVarLoc& loc);
+    void eeSetLVdone();
+
+#ifdef DEBUG
+    void eeDispVar(ICorDebugInfo::NativeVarInfo* var);
+    void eeDispVars(CORINFO_METHOD_HANDLE ftn, ULONG32 cVars, ICorDebugInfo::NativeVarInfo* vars);
+#endif // DEBUG
+
+    // ICorJitInfo wrappers
+
+    void eeReserveUnwindInfo(BOOL isFunclet, BOOL isColdCode, ULONG unwindSize);
+
+    void eeAllocUnwindInfo(BYTE*          pHotCode,
+                           BYTE*          pColdCode,
+                           ULONG          startOffset,
+                           ULONG          endOffset,
+                           ULONG          unwindSize,
+                           BYTE*          pUnwindBlock,
+                           CorJitFuncKind funcKind);
+
+    void eeSetEHcount(unsigned cEH);
+
+    void eeSetEHinfo(unsigned EHnumber, const CORINFO_EH_CLAUSE* clause);
+
+    WORD eeGetRelocTypeHint(void* target);
+
+    // ICorStaticInfo wrapper functions
+
+    bool eeTryResolveToken(CORINFO_RESOLVED_TOKEN* resolvedToken);
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#ifdef DEBUG
+    static void dumpSystemVClassificationType(SystemVClassificationType ct);
+#endif // DEBUG
+
+    void eeGetSystemVAmd64PassStructInRegisterDescriptor(
+        /*IN*/ CORINFO_CLASS_HANDLE                                  structHnd,
+        /*OUT*/ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structPassInRegDescPtr);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+    template <typename ParamType>
+    bool eeRunWithErrorTrap(void (*function)(ParamType*), ParamType* param)
+    {
+        return eeRunWithErrorTrapImp(reinterpret_cast<void (*)(void*)>(function), reinterpret_cast<void*>(param));
+    }
+
+    bool eeRunWithErrorTrapImp(void (*function)(void*), void* param);
+
+    // Utility functions
+
+    const char* eeGetFieldName(CORINFO_FIELD_HANDLE fieldHnd, const char** classNamePtr = nullptr);
+
+#if defined(DEBUG)
+    const wchar_t* eeGetCPString(size_t stringHandle);
+#endif
+
+    const char* eeGetClassName(CORINFO_CLASS_HANDLE clsHnd);
+
+    static CORINFO_METHOD_HANDLE eeFindHelper(unsigned helper);
+    static CorInfoHelpFunc eeGetHelperNum(CORINFO_METHOD_HANDLE method);
+
+    static fgWalkPreFn CountSharedStaticHelper;
+    static bool IsSharedStaticHelper(GenTreePtr tree);
+    static bool IsTreeAlwaysHoistable(GenTreePtr tree);
+
+    static CORINFO_FIELD_HANDLE eeFindJitDataOffs(unsigned jitDataOffs);
+    // returns true/false if 'field' is a Jit Data offset
+    static bool eeIsJitDataOffs(CORINFO_FIELD_HANDLE field);
+    // returns a number < 0 if 'field' is not a Jit Data offset, otherwise the data offset (limited to 2GB)
+    static int eeGetJitDataOffs(CORINFO_FIELD_HANDLE field);
+
+    /*****************************************************************************/
+
+public:
+    void tmpInit();
+
+    enum TEMP_USAGE_TYPE
+    {
+        TEMP_USAGE_FREE,
+        TEMP_USAGE_USED
+    };
+
+    static var_types tmpNormalizeType(var_types type);
+    TempDsc* tmpGetTemp(var_types type); // get temp for the given type
+    void tmpRlsTemp(TempDsc* temp);
+    TempDsc* tmpFindNum(int temp, TEMP_USAGE_TYPE usageType = TEMP_USAGE_FREE) const;
+
+    void     tmpEnd();
+    TempDsc* tmpListBeg(TEMP_USAGE_TYPE usageType = TEMP_USAGE_FREE) const;
+    TempDsc* tmpListNxt(TempDsc* curTemp, TEMP_USAGE_TYPE usageType = TEMP_USAGE_FREE) const;
+    void tmpDone();
+
+#ifdef DEBUG
+    bool tmpAllFree() const;
+#endif // DEBUG
+
+#ifndef LEGACY_BACKEND
+    void tmpPreAllocateTemps(var_types type, unsigned count);
+#endif // !LEGACY_BACKEND
+
+protected:
+#ifdef LEGACY_BACKEND
+    unsigned tmpIntSpillMax;    // number of int-sized spill temps
+    unsigned tmpDoubleSpillMax; // number of double-sized spill temps
+#endif                          // LEGACY_BACKEND
+
+    unsigned tmpCount; // Number of temps
+    unsigned tmpSize;  // Size of all the temps
+#ifdef DEBUG
+public:
+    // Used by RegSet::rsSpillChk()
+    unsigned tmpGetCount; // Temps which haven't been released yet
+#endif
+private:
+    static unsigned tmpSlot(unsigned size); // which slot in tmpFree[] or tmpUsed[] to use
+
+    TempDsc* tmpFree[TEMP_MAX_SIZE / sizeof(int)];
+    TempDsc* tmpUsed[TEMP_MAX_SIZE / sizeof(int)];
+
+    /*
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XX                                                                           XX
+    XX                           CodeGenerator                                   XX
+    XX                                                                           XX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    */
+
+public:
+    CodeGenInterface* codeGen;
+
+#ifdef DEBUGGING_SUPPORT
+
+    //  The following holds information about instr offsets in terms of generated code.
+
+    struct IPmappingDsc
+    {
+        IPmappingDsc* ipmdNext;      // next line# record
+        IL_OFFSETX    ipmdILoffsx;   // the instr offset
+        emitLocation  ipmdNativeLoc; // the emitter location of the native code corresponding to the IL offset
+        bool          ipmdIsLabel;   // Can this code be a branch label?
+    };
+
+    // Record the instr offset mapping to the generated code
+
+    IPmappingDsc* genIPmappingList;
+    IPmappingDsc* genIPmappingLast;
+
+    // Managed RetVal - A side hash table meant to record the mapping from a
+    // GT_CALL node to its IL offset.  This info is used to emit sequence points
+    // that can be used by debugger to determine the native offset at which the
+    // managed RetVal will be available.
+    //
+    // In fact we can store IL offset in a GT_CALL node.  This was ruled out in
+    // favor of a side table for two reasons: 1) We need IL offset for only those
+    // GT_CALL nodes (created during importation) that correspond to an IL call and
+    // whose return type is other than TYP_VOID. 2) GT_CALL node is a frequently used
+    // structure and IL offset is needed only when generating debuggable code. Therefore
+    // it is desirable to avoid memory size penalty in retail scenarios.
+    typedef SimplerHashTable<GenTreePtr, PtrKeyFuncs<GenTree>, IL_OFFSETX, JitSimplerHashBehavior>
+                           CallSiteILOffsetTable;
+    CallSiteILOffsetTable* genCallSite2ILOffsetMap;
+#endif // DEBUGGING_SUPPORT
+
+    unsigned    genReturnLocal; // Local number for the return value when applicable.
+    BasicBlock* genReturnBB;    // jumped to when not optimizing for speed.
+
+    // The following properties are part of CodeGenContext.  Getters are provided here for
+    // convenience and backward compatibility, but the properties can only be set by invoking
+    // the setter on CodeGenContext directly.
+
+    __declspec(property(get = getEmitter)) emitter* genEmitter;
+    emitter* getEmitter()
+    {
+        return codeGen->getEmitter();
+    }
+
+    const bool isFramePointerUsed()
+    {
+        return codeGen->isFramePointerUsed();
+    }
+
+    __declspec(property(get = getInterruptible, put = setInterruptible)) bool genInterruptible;
+    bool getInterruptible()
+    {
+        return codeGen->genInterruptible;
+    }
+    void setInterruptible(bool value)
+    {
+        codeGen->setInterruptible(value);
+    }
+
+#if DOUBLE_ALIGN
+    const bool genDoubleAlign()
+    {
+        return codeGen->doDoubleAlign();
+    }
+    DWORD getCanDoubleAlign(); // Defined & used only by RegAlloc
+#endif                         // DOUBLE_ALIGN
+    __declspec(property(get = getFullPtrRegMap, put = setFullPtrRegMap)) bool genFullPtrRegMap;
+    bool getFullPtrRegMap()
+    {
+        return codeGen->genFullPtrRegMap;
+    }
+    void setFullPtrRegMap(bool value)
+    {
+        codeGen->setFullPtrRegMap(value);
+    }
+
+// Things that MAY belong either in CodeGen or CodeGenContext
+
+#if FEATURE_EH_FUNCLETS
+    FuncInfoDsc*   compFuncInfos;
+    unsigned short compCurrFuncIdx;
+    unsigned short compFuncInfoCount;
+
+    unsigned short compFuncCount()
+    {
+        assert(fgFuncletsCreated);
+        return compFuncInfoCount;
+    }
+
+#else // !FEATURE_EH_FUNCLETS
+
+    // This is a no-op when there are no funclets!
+    void genUpdateCurrentFunclet(BasicBlock* block)
+    {
+        return;
+    }
+
+    FuncInfoDsc compFuncInfoRoot;
+
+    static const unsigned compCurrFuncIdx = 0;
+
+    unsigned short compFuncCount()
+    {
+        return 1;
+    }
+
+#endif // !FEATURE_EH_FUNCLETS
+
+    FuncInfoDsc* funCurrentFunc();
+    void funSetCurrentFunc(unsigned funcIdx);
+    FuncInfoDsc* funGetFunc(unsigned funcIdx);
+    unsigned int funGetFuncIdx(BasicBlock* block);
+
+    // LIVENESS
+
+    VARSET_TP  compCurLife;     // current live variables
+    GenTreePtr compCurLifeTree; // node after which compCurLife has been computed
+
+    template <bool ForCodeGen>
+    void compChangeLife(VARSET_VALARG_TP newLife DEBUGARG(GenTreePtr tree));
+
+    void genChangeLife(VARSET_VALARG_TP newLife DEBUGARG(GenTreePtr tree))
+    {
+        compChangeLife</*ForCodeGen*/ true>(newLife DEBUGARG(tree));
+    }
+
+    template <bool ForCodeGen>
+    void compUpdateLife(GenTreePtr tree);
+
+    // Updates "compCurLife" to its state after evaluate of "true".  If "pLastUseVars" is
+    // non-null, sets "*pLastUseVars" to the set of tracked variables for which "tree" was a last
+    // use.  (Can be more than one var in the case of dependently promoted struct vars.)
+    template <bool ForCodeGen>
+    void compUpdateLifeVar(GenTreePtr tree, VARSET_TP* pLastUseVars = nullptr);
+
+    template <bool ForCodeGen>
+    inline void compUpdateLife(VARSET_VALARG_TP newLife);
+
+    // Gets a register mask that represent the kill set for a helper call since
+    // not all JIT Helper calls follow the standard ABI on the target architecture.
+    regMaskTP compHelperCallKillSet(CorInfoHelpFunc helper);
+
+    // Gets a register mask that represent the kill set for a NoGC helper call.
+    regMaskTP compNoGCHelperCallKillSet(CorInfoHelpFunc helper);
+
+#ifdef _TARGET_ARM_
+    // Requires that "varDsc" be a promoted struct local variable being passed as an argument, beginning at
+    // "firstArgRegNum", which is assumed to have already been aligned to the register alignment restriction of the
+    // struct type. Adds bits to "*pArgSkippedRegMask" for any argument registers *not* used in passing "varDsc" --
+    // i.e., internal "holes" caused by internal alignment constraints.  For example, if the struct contained an int and
+    // a double, and we at R0 (on ARM), then R1 would be skipped, and the bit for R1 would be added to the mask.
+    void fgAddSkippedRegsInPromotedStructArg(LclVarDsc* varDsc, unsigned firstArgRegNum, regMaskTP* pArgSkippedRegMask);
+#endif // _TARGET_ARM_
+
+    // If "tree" is a indirection (GT_IND, or GT_OBJ) whose arg is an ADDR, whose arg is a LCL_VAR, return that LCL_VAR
+    // node, else NULL.
+    static GenTreePtr fgIsIndirOfAddrOfLocal(GenTreePtr tree);
+
+    // This is indexed by GT_OBJ nodes that are address of promoted struct variables, which
+    // have been annotated with the GTF_VAR_DEATH flag.  If such a node is *not* mapped in this
+    // table, one may assume that all the (tracked) field vars die at this point.  Otherwise,
+    // the node maps to a pointer to a VARSET_TP, containing set bits for each of the tracked field
+    // vars of the promoted struct local that go dead at the given node (the set bits are the bits
+    // for the tracked var indices of the field vars, as in a live var set).
+    NodeToVarsetPtrMap* m_promotedStructDeathVars;
+
+    NodeToVarsetPtrMap* GetPromotedStructDeathVars()
+    {
+        if (m_promotedStructDeathVars == nullptr)
+        {
+            m_promotedStructDeathVars = new (getAllocator()) NodeToVarsetPtrMap(getAllocator());
+        }
+        return m_promotedStructDeathVars;
+    }
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                           UnwindInfo                                      XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#if !defined(__GNUC__)
+#pragma region Unwind information
+#endif
+
+public:
+    //
+    // Infrastructure functions: start/stop/reserve/emit.
+    //
+
+    void unwindBegProlog();
+    void unwindEndProlog();
+    void unwindBegEpilog();
+    void unwindEndEpilog();
+    void unwindReserve();
+    void unwindEmit(void* pHotCode, void* pColdCode);
+
+    //
+    // Specific unwind information functions: called by code generation to indicate a particular
+    // prolog or epilog unwindable instruction has been generated.
+    //
+
+    void unwindPush(regNumber reg);
+    void unwindAllocStack(unsigned size);
+    void unwindSetFrameReg(regNumber reg, unsigned offset);
+    void unwindSaveReg(regNumber reg, unsigned offset);
+
+#if defined(_TARGET_ARM_)
+    void unwindPushMaskInt(regMaskTP mask);
+    void unwindPushMaskFloat(regMaskTP mask);
+    void unwindPopMaskInt(regMaskTP mask);
+    void unwindPopMaskFloat(regMaskTP mask);
+    void unwindBranch16();                    // The epilog terminates with a 16-bit branch (e.g., "bx lr")
+    void unwindNop(unsigned codeSizeInBytes); // Generate unwind NOP code. 'codeSizeInBytes' is 2 or 4 bytes. Only
+                                              // called via unwindPadding().
+    void unwindPadding(); // Generate a sequence of unwind NOP codes representing instructions between the last
+                          // instruction and the current location.
+#endif                    // _TARGET_ARM_
+
+#if defined(_TARGET_ARM64_)
+    void unwindNop();
+    void unwindPadding(); // Generate a sequence of unwind NOP codes representing instructions between the last
+                          // instruction and the current location.
+    void unwindSaveReg(regNumber reg, int offset);                                // str reg, [sp, #offset]
+    void unwindSaveRegPreindexed(regNumber reg, int offset);                      // str reg, [sp, #offset]!
+    void unwindSaveRegPair(regNumber reg1, regNumber reg2, int offset);           // stp reg1, reg2, [sp, #offset]
+    void unwindSaveRegPairPreindexed(regNumber reg1, regNumber reg2, int offset); // stp reg1, reg2, [sp, #offset]!
+    void unwindSaveNext();                                                        // unwind code: save_next
+    void unwindReturn(regNumber reg);                                             // ret lr
+#endif                                                                            // defined(_TARGET_ARM64_)
+
+    //
+    // Private "helper" functions for the unwind implementation.
+    //
+
+private:
+#if FEATURE_EH_FUNCLETS
+    void unwindGetFuncLocations(FuncInfoDsc*             func,
+                                bool                     getHotSectionData,
+                                /* OUT */ emitLocation** ppStartLoc,
+                                /* OUT */ emitLocation** ppEndLoc);
+#endif // FEATURE_EH_FUNCLETS
+
+    void unwindReserveFunc(FuncInfoDsc* func);
+    void unwindEmitFunc(FuncInfoDsc* func, void* pHotCode, void* pColdCode);
+
+#if defined(_TARGET_AMD64_)
+
+    void unwindReserveFuncHelper(FuncInfoDsc* func, bool isHotCode);
+    void unwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode, void* pColdCode, bool isHotCode);
+    UNATIVE_OFFSET unwindGetCurrentOffset(FuncInfoDsc* func);
+
+    void unwindBegPrologWindows();
+    void unwindPushWindows(regNumber reg);
+    void unwindAllocStackWindows(unsigned size);
+    void unwindSetFrameRegWindows(regNumber reg, unsigned offset);
+    void unwindSaveRegWindows(regNumber reg, unsigned offset);
+
+#ifdef UNIX_AMD64_ABI
+    void unwindBegPrologCFI();
+    void unwindPushCFI(regNumber reg);
+    void unwindAllocStackCFI(unsigned size);
+    void unwindSetFrameRegCFI(regNumber reg, unsigned offset);
+    void unwindSaveRegCFI(regNumber reg, unsigned offset);
+    int mapRegNumToDwarfReg(regNumber reg);
+    void createCfiCode(FuncInfoDsc* func, UCHAR codeOffset, UCHAR opcode, USHORT dwarfReg, INT offset = 0);
+#endif // UNIX_AMD64_ABI
+#elif defined(_TARGET_ARM_)
+
+    void unwindPushPopMaskInt(regMaskTP mask, bool useOpsize16);
+    void unwindPushPopMaskFloat(regMaskTP mask);
+    void unwindSplit(FuncInfoDsc* func);
+
+#endif // _TARGET_ARM_
+
+#if !defined(__GNUC__)
+#pragma endregion // Note: region is NOT under !defined(__GNUC__)
+#endif
+
+    /*
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XX                                                                           XX
+    XX                               SIMD                                        XX
+    XX                                                                           XX
+    XX   Info about SIMD types, methods and the SIMD assembly (i.e. the assembly XX
+    XX   that contains the distinguished, well-known SIMD type definitions).     XX
+    XX                                                                           XX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    */
+
+    // Get highest available instruction set for floating point codegen
+    InstructionSet getFloatingPointInstructionSet()
+    {
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+        if (canUseAVX())
+        {
+            return InstructionSet_AVX;
+        }
+
+        // min bar is SSE2
+        assert(canUseSSE2());
+        return InstructionSet_SSE2;
+#else
+        assert(!"getFPInstructionSet() is not implemented for target arch");
+        unreached();
+        return InstructionSet_NONE;
+#endif
+    }
+
+    // Get highest available instruction set for SIMD codegen
+    InstructionSet getSIMDInstructionSet()
+    {
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+        return getFloatingPointInstructionSet();
+#else
+        assert(!"Available instruction set(s) for SIMD codegen is not defined for target arch");
+        unreached();
+        return InstructionSet_NONE;
+#endif
+    }
+
+#ifdef FEATURE_SIMD
+
+    // Should we support SIMD intrinsics?
+    bool featureSIMD;
+
+    // This is a temp lclVar allocated on the stack as TYP_SIMD.  It is used to implement intrinsics
+    // that require indexed access to the individual fields of the vector, which is not well supported
+    // by the hardware.  It is allocated when/if such situations are encountered during Lowering.
+    unsigned lvaSIMDInitTempVarNum;
+
+    // SIMD Types
+    CORINFO_CLASS_HANDLE SIMDFloatHandle;
+    CORINFO_CLASS_HANDLE SIMDDoubleHandle;
+    CORINFO_CLASS_HANDLE SIMDIntHandle;
+    CORINFO_CLASS_HANDLE SIMDUShortHandle;
+    CORINFO_CLASS_HANDLE SIMDUByteHandle;
+    CORINFO_CLASS_HANDLE SIMDShortHandle;
+    CORINFO_CLASS_HANDLE SIMDByteHandle;
+    CORINFO_CLASS_HANDLE SIMDLongHandle;
+    CORINFO_CLASS_HANDLE SIMDUIntHandle;
+    CORINFO_CLASS_HANDLE SIMDULongHandle;
+    CORINFO_CLASS_HANDLE SIMDVector2Handle;
+    CORINFO_CLASS_HANDLE SIMDVector3Handle;
+    CORINFO_CLASS_HANDLE SIMDVector4Handle;
+    CORINFO_CLASS_HANDLE SIMDVectorHandle;
+
+    // Get the handle for a SIMD type.
+    CORINFO_CLASS_HANDLE gtGetStructHandleForSIMD(var_types simdType, var_types simdBaseType)
+    {
+        if (simdBaseType == TYP_FLOAT)
+        {
+            switch (simdType)
+            {
+                case TYP_SIMD8:
+                    return SIMDVector2Handle;
+                case TYP_SIMD12:
+                    return SIMDVector3Handle;
+                case TYP_SIMD16:
+                    if ((getSIMDVectorType() == TYP_SIMD32) || (SIMDVector4Handle != NO_CLASS_HANDLE))
+                    {
+                        return SIMDVector4Handle;
+                    }
+                    break;
+                case TYP_SIMD32:
+                    break;
+                default:
+                    unreached();
+            }
+        }
+        assert(simdType == getSIMDVectorType());
+        switch (simdBaseType)
+        {
+            case TYP_FLOAT:
+                return SIMDFloatHandle;
+            case TYP_DOUBLE:
+                return SIMDDoubleHandle;
+            case TYP_INT:
+                return SIMDIntHandle;
+            case TYP_CHAR:
+                return SIMDUShortHandle;
+            case TYP_USHORT:
+                return SIMDUShortHandle;
+            case TYP_UBYTE:
+                return SIMDUByteHandle;
+            case TYP_SHORT:
+                return SIMDShortHandle;
+            case TYP_BYTE:
+                return SIMDByteHandle;
+            case TYP_LONG:
+                return SIMDLongHandle;
+            case TYP_UINT:
+                return SIMDUIntHandle;
+            case TYP_ULONG:
+                return SIMDULongHandle;
+            default:
+                assert(!"Didn't find a class handle for simdType");
+        }
+        return NO_CLASS_HANDLE;
+    }
+
+    // SIMD Methods
+    CORINFO_METHOD_HANDLE SIMDVectorFloat_set_Item;
+    CORINFO_METHOD_HANDLE SIMDVectorFloat_get_Length;
+    CORINFO_METHOD_HANDLE SIMDVectorFloat_op_Addition;
+
+    // Returns true if the tree corresponds to a TYP_SIMD lcl var.
+    // Note that both SIMD vector args and locals are mared as lvSIMDType = true, but
+    // type of an arg node is TYP_BYREF and a local node is TYP_SIMD or TYP_STRUCT.
+    bool isSIMDTypeLocal(GenTree* tree)
+    {
+        return tree->OperIsLocal() && lvaTable[tree->AsLclVarCommon()->gtLclNum].lvSIMDType;
+    }
+
+    // Returns true if the type of the tree is a byref of TYP_SIMD
+    bool isAddrOfSIMDType(GenTree* tree)
+    {
+        if (tree->TypeGet() == TYP_BYREF || tree->TypeGet() == TYP_I_IMPL)
+        {
+            switch (tree->OperGet())
+            {
+                case GT_ADDR:
+                    return varTypeIsSIMD(tree->gtGetOp1());
+
+                case GT_LCL_VAR_ADDR:
+                    return lvaTable[tree->AsLclVarCommon()->gtLclNum].lvSIMDType;
+
+                default:
+                    return isSIMDTypeLocal(tree);
+            }
+        }
+
+        return false;
+    }
+
+    static bool isRelOpSIMDIntrinsic(SIMDIntrinsicID intrinsicId)
+    {
+        return (intrinsicId == SIMDIntrinsicEqual || intrinsicId == SIMDIntrinsicLessThan ||
+                intrinsicId == SIMDIntrinsicLessThanOrEqual || intrinsicId == SIMDIntrinsicGreaterThan ||
+                intrinsicId == SIMDIntrinsicGreaterThanOrEqual);
+    }
+
+    // Returns base type of a TYP_SIMD local.
+    // Returns TYP_UNKNOWN if the local is not TYP_SIMD.
+    var_types getBaseTypeOfSIMDLocal(GenTree* tree)
+    {
+        if (isSIMDTypeLocal(tree))
+        {
+            return lvaTable[tree->AsLclVarCommon()->gtLclNum].lvBaseType;
+        }
+
+        return TYP_UNKNOWN;
+    }
+
+    bool isSIMDClass(CORINFO_CLASS_HANDLE clsHnd)
+    {
+        return info.compCompHnd->isInSIMDModule(clsHnd);
+    }
+
+    bool isSIMDClass(typeInfo* pTypeInfo)
+    {
+        return pTypeInfo->IsStruct() && isSIMDClass(pTypeInfo->GetClassHandleForValueClass());
+    }
+
+    // Get the base (element) type and size in bytes for a SIMD type. Returns TYP_UNKNOWN
+    // if it is not a SIMD type or is an unsupported base type.
+    var_types getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, unsigned* sizeBytes = nullptr);
+
+    var_types getBaseTypeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd)
+    {
+        return getBaseTypeAndSizeOfSIMDType(typeHnd, nullptr);
+    }
+
+    // Get SIMD Intrinsic info given the method handle.
+    // Also sets typeHnd, argCount, baseType and sizeBytes out params.
+    const SIMDIntrinsicInfo* getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* typeHnd,
+                                                  CORINFO_METHOD_HANDLE methodHnd,
+                                                  CORINFO_SIG_INFO*     sig,
+                                                  bool                  isNewObj,
+                                                  unsigned*             argCount,
+                                                  var_types*            baseType,
+                                                  unsigned*             sizeBytes);
+
+    // Pops and returns GenTree node from importers type stack.
+    // Normalizes TYP_STRUCT value in case of GT_CALL, GT_RET_EXPR and arg nodes.
+    GenTreePtr impSIMDPopStack(var_types type, bool expectAddr = false);
+
+    // Create a GT_SIMD tree for a Get property of SIMD vector with a fixed index.
+    GenTreeSIMD* impSIMDGetFixed(var_types simdType, var_types baseType, unsigned simdSize, int index);
+
+    // Creates a GT_SIMD tree for Select operation
+    GenTreePtr impSIMDSelect(CORINFO_CLASS_HANDLE typeHnd,
+                             var_types            baseType,
+                             unsigned             simdVectorSize,
+                             GenTree*             op1,
+                             GenTree*             op2,
+                             GenTree*             op3);
+
+    // Creates a GT_SIMD tree for Min/Max operation
+    GenTreePtr impSIMDMinMax(SIMDIntrinsicID      intrinsicId,
+                             CORINFO_CLASS_HANDLE typeHnd,
+                             var_types            baseType,
+                             unsigned             simdVectorSize,
+                             GenTree*             op1,
+                             GenTree*             op2);
+
+    // Transforms operands and returns the SIMD intrinsic to be applied on
+    // transformed operands to obtain given relop result.
+    SIMDIntrinsicID impSIMDRelOp(SIMDIntrinsicID      relOpIntrinsicId,
+                                 CORINFO_CLASS_HANDLE typeHnd,
+                                 unsigned             simdVectorSize,
+                                 var_types*           baseType,
+                                 GenTree**            op1,
+                                 GenTree**            op2);
+
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+    // Transforms operands and returns the SIMD intrinsic to be applied on
+    // transformed operands to obtain == comparison result.
+    SIMDIntrinsicID impSIMDLongRelOpEqual(CORINFO_CLASS_HANDLE typeHnd,
+                                          unsigned             simdVectorSize,
+                                          GenTree**            op1,
+                                          GenTree**            op2);
+
+    // Transforms operands and returns the SIMD intrinsic to be applied on
+    // transformed operands to obtain > comparison result.
+    SIMDIntrinsicID impSIMDLongRelOpGreaterThan(CORINFO_CLASS_HANDLE typeHnd,
+                                                unsigned             simdVectorSize,
+                                                GenTree**            op1,
+                                                GenTree**            op2);
+
+    // Transforms operands and returns the SIMD intrinsic to be applied on
+    // transformed operands to obtain >= comparison result.
+    SIMDIntrinsicID impSIMDLongRelOpGreaterThanOrEqual(CORINFO_CLASS_HANDLE typeHnd,
+                                                       unsigned             simdVectorSize,
+                                                       GenTree**            op1,
+                                                       GenTree**            op2);
+
+    // Transforms operands and returns the SIMD intrinsic to be applied on
+    // transformed operands to obtain >= comparison result in case of int32
+    // and small int base type vectors.
+    SIMDIntrinsicID impSIMDIntegralRelOpGreaterThanOrEqual(
+        CORINFO_CLASS_HANDLE typeHnd, unsigned simdVectorSize, var_types baseType, GenTree** op1, GenTree** op2);
+#endif // defined(_TARGET_AMD64_) && !defined(LEGACY_BACKEND)
+
+    void setLclRelatedToSIMDIntrinsic(GenTreePtr tree);
+    bool areFieldsContiguous(GenTreePtr op1, GenTreePtr op2);
+    bool areArrayElementsContiguous(GenTreePtr op1, GenTreePtr op2);
+    bool areArgumentsContiguous(GenTreePtr op1, GenTreePtr op2);
+    GenTreePtr createAddressNodeForSIMDInit(GenTreePtr tree, unsigned simdSize);
+
+    // check methodHnd to see if it is a SIMD method that is expanded as an intrinsic in the JIT.
+    GenTreePtr impSIMDIntrinsic(OPCODE                opcode,
+                                GenTreePtr            newobjThis,
+                                CORINFO_CLASS_HANDLE  clsHnd,
+                                CORINFO_METHOD_HANDLE method,
+                                CORINFO_SIG_INFO*     sig,
+                                int                   memberRef);
+
+    GenTreePtr getOp1ForConstructor(OPCODE opcode, GenTreePtr newobjThis, CORINFO_CLASS_HANDLE clsHnd);
+
+    // Whether SIMD vector occupies part of SIMD register.
+    // SSE2: vector2f/3f are considered sub register SIMD types.
+    // AVX: vector2f, 3f and 4f are all considered sub register SIMD types.
+    bool isSubRegisterSIMDType(CORINFO_CLASS_HANDLE typeHnd)
+    {
+        unsigned  sizeBytes = 0;
+        var_types baseType  = getBaseTypeAndSizeOfSIMDType(typeHnd, &sizeBytes);
+        return (baseType == TYP_FLOAT) && (sizeBytes < getSIMDVectorRegisterByteLength());
+    }
+
+    bool isSubRegisterSIMDType(GenTreeSIMD* simdNode)
+    {
+        return (simdNode->gtSIMDSize < getSIMDVectorRegisterByteLength());
+    }
+
+    // Get the type for the hardware SIMD vector.
+    // This is the maximum SIMD type supported for this target.
+    var_types getSIMDVectorType()
+    {
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+        if (canUseAVX())
+        {
+            return TYP_SIMD32;
+        }
+        else
+        {
+            assert(canUseSSE2());
+            return TYP_SIMD16;
+        }
+#else
+        assert(!"getSIMDVectorType() unimplemented on target arch");
+        unreached();
+#endif
+    }
+
+    // Get the size of the SIMD type in bytes
+    int getSIMDTypeSizeInBytes(CORINFO_CLASS_HANDLE typeHnd)
+    {
+        unsigned sizeBytes = 0;
+        (void)getBaseTypeAndSizeOfSIMDType(typeHnd, &sizeBytes);
+        return sizeBytes;
+    }
+
+    // Get the the number of elements of basetype of SIMD vector given by its size and baseType
+    static int getSIMDVectorLength(unsigned simdSize, var_types baseType);
+
+    // Get the the number of elements of basetype of SIMD vector given by its type handle
+    int getSIMDVectorLength(CORINFO_CLASS_HANDLE typeHnd);
+
+    // Get preferred alignment of SIMD type.
+    int getSIMDTypeAlignment(var_types simdType);
+
+    // Get the number of bytes in a SIMD Vector for the current compilation.
+    unsigned getSIMDVectorRegisterByteLength()
+    {
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+        if (canUseAVX())
+        {
+            return YMM_REGSIZE_BYTES;
+        }
+        else
+        {
+            assert(canUseSSE2());
+            return XMM_REGSIZE_BYTES;
+        }
+#else
+        assert(!"getSIMDVectorRegisterByteLength() unimplemented on target arch");
+        unreached();
+#endif
+    }
+
+    // The minimum and maximum possible number of bytes in a SIMD vector.
+    unsigned int maxSIMDStructBytes()
+    {
+        return getSIMDVectorRegisterByteLength();
+    }
+    unsigned int minSIMDStructBytes()
+    {
+        return emitTypeSize(TYP_SIMD8);
+    }
+
+#ifdef FEATURE_AVX_SUPPORT
+    // (maxPossibleSIMDStructBytes is for use in a context that requires a compile-time constant.)
+    static const unsigned maxPossibleSIMDStructBytes = 32;
+#else  // !FEATURE_AVX_SUPPORT
+    static const unsigned maxPossibleSIMDStructBytes = 16;
+#endif // !FEATURE_AVX_SUPPORT
+
+    // Returns the codegen type for a given SIMD size.
+    var_types getSIMDTypeForSize(unsigned size)
+    {
+        var_types simdType = TYP_UNDEF;
+        if (size == 8)
+        {
+            simdType = TYP_SIMD8;
+        }
+        else if (size == 12)
+        {
+            simdType = TYP_SIMD12;
+        }
+        else if (size == 16)
+        {
+            simdType = TYP_SIMD16;
+        }
+#ifdef FEATURE_AVX_SUPPORT
+        else if (size == 32)
+        {
+            simdType = TYP_SIMD32;
+        }
+#endif // FEATURE_AVX_SUPPORT
+        else
+        {
+            noway_assert(!"Unexpected size for SIMD type");
+        }
+        return simdType;
+    }
+
+    unsigned getSIMDInitTempVarNum()
+    {
+        if (lvaSIMDInitTempVarNum == BAD_VAR_NUM)
+        {
+            lvaSIMDInitTempVarNum                  = lvaGrabTempWithImplicitUse(false DEBUGARG("SIMDInitTempVar"));
+            lvaTable[lvaSIMDInitTempVarNum].lvType = getSIMDVectorType();
+        }
+        return lvaSIMDInitTempVarNum;
+    }
+
+#endif // FEATURE_SIMD
+
+public:
+    //------------------------------------------------------------------------
+    // largestEnregisterableStruct: The size in bytes of the largest struct that can be enregistered.
+    //
+    // Notes: It is not guaranteed that the struct of this size or smaller WILL be a
+    //        candidate for enregistration.
+
+    unsigned largestEnregisterableStructSize()
+    {
+#ifdef FEATURE_SIMD
+        unsigned vectorRegSize = getSIMDVectorRegisterByteLength();
+        if (vectorRegSize > TARGET_POINTER_SIZE)
+        {
+            return vectorRegSize;
+        }
+        else
+#endif // FEATURE_SIMD
+        {
+            return TARGET_POINTER_SIZE;
+        }
+    }
+
+private:
+    // These routines need not be enclosed under FEATURE_SIMD since lvIsSIMDType()
+    // is defined for both FEATURE_SIMD and !FEATURE_SIMD apropriately. The use
+    // of this routines also avoids the need of #ifdef FEATURE_SIMD specific code.
+
+    // Is this var is of type simd struct?
+    bool lclVarIsSIMDType(unsigned varNum)
+    {
+        LclVarDsc* varDsc = lvaTable + varNum;
+        return varDsc->lvIsSIMDType();
+    }
+
+    // Is this Local node a SIMD local?
+    bool lclVarIsSIMDType(GenTreeLclVarCommon* lclVarTree)
+    {
+        return lclVarIsSIMDType(lclVarTree->gtLclNum);
+    }
+
+    // Returns true if the TYP_SIMD locals on stack are aligned at their
+    // preferred byte boundary specified by getSIMDTypeAlignment().
+    bool isSIMDTypeLocalAligned(unsigned varNum)
+    {
+#if defined(FEATURE_SIMD) && ALIGN_SIMD_TYPES
+        if (lclVarIsSIMDType(varNum) && lvaTable[varNum].lvType != TYP_BYREF)
+        {
+            bool ebpBased;
+            int  off = lvaFrameAddress(varNum, &ebpBased);
+            // TODO-Cleanup: Can't this use the lvExactSize on the varDsc?
+            int  alignment = getSIMDTypeAlignment(lvaTable[varNum].lvType);
+            bool isAligned = ((off % alignment) == 0);
+            noway_assert(isAligned || lvaTable[varNum].lvIsParam);
+            return isAligned;
+        }
+#endif // FEATURE_SIMD
+
+        return false;
+    }
+
+    // Whether SSE2 is available
+    bool canUseSSE2() const
+    {
+#ifdef _TARGET_XARCH_
+        return opts.compCanUseSSE2;
+#else
+        return false;
+#endif
+    }
+
+    bool canUseAVX() const
+    {
+#ifdef FEATURE_AVX_SUPPORT
+        return opts.compCanUseAVX;
+#else
+        return false;
+#endif
+    }
+
+    /*
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XX                                                                           XX
+    XX                           Compiler                                        XX
+    XX                                                                           XX
+    XX   Generic info about the compilation and the method being compiled.       XX
+    XX   It is responsible for driving the other phases.                         XX
+    XX   It is also responsible for all the memory management.                   XX
+    XX                                                                           XX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    */
+
+public:
+    Compiler* InlineeCompiler; // The Compiler instance for the inlinee
+
+    InlineResult* compInlineResult; // The result of importing the inlinee method.
+
+    bool compDoAggressiveInlining; // If true, mark every method as CORINFO_FLG_FORCEINLINE
+    bool compJmpOpUsed;            // Does the method do a JMP
+    bool compLongUsed;             // Does the method use TYP_LONG
+    bool compFloatingPointUsed;    // Does the method use TYP_FLOAT or TYP_DOUBLE
+    bool compTailCallUsed;         // Does the method do a tailcall
+    bool compLocallocUsed;         // Does the method use localloc.
+    bool compQmarkUsed;            // Does the method use GT_QMARK/GT_COLON
+    bool compQmarkRationalized;    // Is it allowed to use a GT_QMARK/GT_COLON node.
+    bool compUnsafeCastUsed;       // Does the method use LDIND/STIND to cast between scalar/refernce types
+
+    // NOTE: These values are only reliable after
+    //       the importing is completely finished.
+
+    ExpandArrayStack<GenTreePtr>* compQMarks; // The set of QMark nodes created in the current compilation, so
+                                              // we can iterate over these efficiently.
+
+#if CPU_USES_BLOCK_MOVE
+    bool compBlkOpUsed; // Does the method do a COPYBLK or INITBLK
+#endif
+
+#ifdef DEBUG
+    // State information - which phases have completed?
+    // These are kept together for easy discoverability
+
+    bool    bRangeAllowStress;
+    bool    compCodeGenDone;
+    int64_t compNumStatementLinksTraversed; // # of links traversed while doing debug checks
+    bool    fgNormalizeEHDone;              // Has the flowgraph EH normalization phase been done?
+    size_t  compSizeEstimate;               // The estimated size of the method as per `gtSetEvalOrder`.
+    size_t  compCycleEstimate;              // The estimated cycle count of the method as per `gtSetEvalOrder`
+#endif                                      // DEBUG
+
+    bool fgLocalVarLivenessDone; // Note that this one is used outside of debug.
+    bool fgLocalVarLivenessChanged;
+#if STACK_PROBES
+    bool compStackProbePrologDone;
+#endif
+#ifndef LEGACY_BACKEND
+    bool compLSRADone;
+#endif // !LEGACY_BACKEND
+    bool compRationalIRForm;
+
+    bool compUsesThrowHelper; // There is a call to a THOROW_HELPER for the compiled method.
+
+    bool compGeneratingProlog;
+    bool compGeneratingEpilog;
+    bool compNeedsGSSecurityCookie; // There is an unsafe buffer (or localloc) on the stack.
+                                    // Insert cookie on frame and code to check the cookie, like VC++ -GS.
+    bool compGSReorderStackLayout;  // There is an unsafe buffer on the stack, reorder locals and make local
+    // copies of susceptible parameters to avoid buffer overrun attacks through locals/params
+    bool getNeedsGSSecurityCookie() const
+    {
+        return compNeedsGSSecurityCookie;
+    }
+    void setNeedsGSSecurityCookie()
+    {
+        compNeedsGSSecurityCookie = true;
+    }
+
+    FrameLayoutState lvaDoneFrameLayout; // The highest frame layout state that we've completed. During
+                                         // frame layout calculations, this is the level we are currently
+                                         // computing.
+
+    //---------------------------- JITing options -----------------------------
+
+    enum codeOptimize
+    {
+        BLENDED_CODE,
+        SMALL_CODE,
+        FAST_CODE,
+
+        COUNT_OPT_CODE
+    };
+
+    struct Options
+    {
+        CORJIT_FLAGS* jitFlags;  // all flags passed from the EE
+        unsigned      eeFlags;   // CorJitFlag flags passed from the EE
+        unsigned      compFlags; // method attributes
+
+        codeOptimize compCodeOpt; // what type of code optimizations
+
+        bool compUseFCOMI;
+        bool compUseCMOV;
+#ifdef _TARGET_XARCH_
+        bool compCanUseSSE2; // Allow CodeGen to use "movq XMM" instructions
+
+#ifdef FEATURE_AVX_SUPPORT
+        bool compCanUseAVX; // Allow CodeGen to use AVX 256-bit vectors for SIMD operations
+#endif
+#endif
+
+// optimize maximally and/or favor speed over size?
+
+#define DEFAULT_MIN_OPTS_CODE_SIZE 60000
+#define DEFAULT_MIN_OPTS_INSTR_COUNT 20000
+#define DEFAULT_MIN_OPTS_BB_COUNT 2000
+#define DEFAULT_MIN_OPTS_LV_NUM_COUNT 2000
+#define DEFAULT_MIN_OPTS_LV_REF_COUNT 8000
+
+// Maximun number of locals before turning off the inlining
+#define MAX_LV_NUM_COUNT_FOR_INLINING 512
+
+        bool     compMinOpts;
+        unsigned instrCount;
+        unsigned lvRefCount;
+        bool     compMinOptsIsSet;
+#ifdef DEBUG
+        bool compMinOptsIsUsed;
+
+        inline bool MinOpts()
+        {
+            assert(compMinOptsIsSet);
+            compMinOptsIsUsed = true;
+            return compMinOpts;
+        }
+        inline bool IsMinOptsSet()
+        {
+            return compMinOptsIsSet;
+        }
+#else  // !DEBUG
+        inline bool MinOpts()
+        {
+            return compMinOpts;
+        }
+        inline bool IsMinOptsSet()
+        {
+            return compMinOptsIsSet;
+        }
+#endif // !DEBUG
+        inline void SetMinOpts(bool val)
+        {
+            assert(!compMinOptsIsUsed);
+            assert(!compMinOptsIsSet || (compMinOpts == val));
+            compMinOpts      = val;
+            compMinOptsIsSet = true;
+        }
+
+        // true if the CLFLG_* for an optimization is set.
+        inline bool OptEnabled(unsigned optFlag)
+        {
+            return !!(compFlags & optFlag);
+        }
+
+#ifdef FEATURE_READYTORUN_COMPILER
+        inline bool IsReadyToRun()
+        {
+            return (eeFlags & CORJIT_FLG_READYTORUN) != 0;
+        }
+#else
+        inline bool IsReadyToRun()
+        {
+            return false;
+        }
+#endif
+
+        // true if we should use the PINVOKE_{BEGIN,END} helpers instead of generating
+        // PInvoke transitions inline (e.g. when targeting CoreRT).
+        inline bool ShouldUsePInvokeHelpers()
+        {
+#if COR_JIT_EE_VERSION > 460
+            return (jitFlags->corJitFlags2 & CORJIT_FLG2_USE_PINVOKE_HELPERS) != 0;
+#else
+            return false;
+#endif
+        }
+
+        // true if we should use insert the REVERSE_PINVOKE_{ENTER,EXIT} helpers in the method
+        // prolog/epilog
+        inline bool IsReversePInvoke()
+        {
+#if COR_JIT_EE_VERSION > 460
+            return (jitFlags->corJitFlags2 & CORJIT_FLG2_REVERSE_PINVOKE) != 0;
+#else
+            return false;
+#endif
+        }
+
+        // true if we must generate code compatible with JIT32 quirks
+        inline bool IsJit32Compat()
+        {
+#if defined(_TARGET_X86_) && COR_JIT_EE_VERSION > 460
+            return (jitFlags->corJitFlags2 & CORJIT_FLG2_DESKTOP_QUIRKS) != 0;
+#else
+            return false;
+#endif
+        }
+
+        // true if we must generate code compatible with Jit64 quirks
+        inline bool IsJit64Compat()
+        {
+#if defined(_TARGET_AMD64_) && COR_JIT_EE_VERSION > 460
+            return (jitFlags->corJitFlags2 & CORJIT_FLG2_DESKTOP_QUIRKS) != 0;
+#elif defined(_TARGET_AMD64_) && !defined(FEATURE_CORECLR)
+            return true;
+#else
+            return false;
+#endif
+        }
+
+#ifdef DEBUGGING_SUPPORT
+        bool compScopeInfo; // Generate the LocalVar info ?
+        bool compDbgCode;   // Generate debugger-friendly code?
+        bool compDbgInfo;   // Gather debugging info?
+        bool compDbgEnC;
+#else
+        static const bool compDbgCode;
+#endif
+
+#ifdef PROFILING_SUPPORTED
+        bool compNoPInvokeInlineCB;
+#else
+        static const bool compNoPInvokeInlineCB;
+#endif
+
+        bool compMustInlinePInvokeCalli; // Unmanaged CALLI in IL stubs must be inlined
+
+#ifdef DEBUG
+        bool compGcChecks;         // Check arguments and return values to ensure they are sane
+        bool compStackCheckOnRet;  // Check ESP on return to ensure it is correct
+        bool compStackCheckOnCall; // Check ESP after every call to ensure it is correct
+
+#endif
+
+        bool compNeedSecurityCheck; // This flag really means where or not a security object needs
+                                    // to be allocated on the stack.
+                                    // It will be set to true in the following cases:
+                                    //   1. When the method being compiled has a declarative security
+                                    //        (i.e. when CORINFO_FLG_NOSECURITYWRAP is reset for the current method).
+                                    //        This is also the case when we inject a prolog and epilog in the method.
+                                    //   (or)
+                                    //   2. When the method being compiled has imperative security (i.e. the method
+                                    //        calls into another method that has CORINFO_FLG_SECURITYCHECK flag set).
+                                    //   (or)
+                                    //   3. When opts.compDbgEnC is true. (See also Compiler::compCompile).
+                                    //
+// When this flag is set, jit will allocate a gc-reference local variable (lvaSecurityObject),
+// which gets reported as a GC root to stackwalker.
+// (See also ICodeManager::GetAddrOfSecurityObject.)
+
+#if RELOC_SUPPORT
+        bool compReloc;
+#endif
+
+#ifdef DEBUG
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+        bool compEnablePCRelAddr; // Whether absolute addr be encoded as PC-rel offset by RyuJIT where possible
+#endif
+#endif // DEBUG
+
+#ifdef UNIX_AMD64_ABI
+        // This flag  is indicating if there is a need to align the frame.
+        // On AMD64-Windows, if there are calls, 4 slots for the outgoing ars are allocated, except for
+        // FastTailCall. This slots makes the frame size non-zero, so alignment logic will be called.
+        // On AMD64-Unix, there are no such slots. There is a possibility to have calls in the method with frame size of
+        // 0. The frame alignment logic won't kick in. This flags takes care of the AMD64-Unix case by remembering that
+        // there are calls and making sure the frame alignment logic is executed.
+        bool compNeedToAlignFrame;
+#endif // UNIX_AMD64_ABI
+
+        bool compProcedureSplitting; // Separate cold code from hot code
+
+        bool genFPorder; // Preserve FP order (operations are non-commutative)
+        bool genFPopt;   // Can we do frame-pointer-omission optimization?
+        bool altJit;     // True if we are an altjit and are compiling this method
+
+#ifdef DEBUG
+        bool compProcedureSplittingEH; // Separate cold code from hot code for functions with EH
+        bool dspCode;                  // Display native code generated
+        bool dspEHTable;               // Display the EH table reported to the VM
+        bool dspInstrs;                // Display the IL instructions intermixed with the native code output
+        bool dspEmit;                  // Display emitter output
+        bool dspLines;                 // Display source-code lines intermixed with native code output
+        bool dmpHex;                   // Display raw bytes in hex of native code output
+        bool varNames;                 // Display variables names in native code output
+        bool disAsm;                   // Display native code as it is generated
+        bool disAsmSpilled;            // Display native code when any register spilling occurs
+        bool disDiffable;              // Makes the Disassembly code 'diff-able'
+        bool disAsm2;                  // Display native code after it is generated using external disassembler
+        bool dspOrder;                 // Display names of each of the methods that we ngen/jit
+        bool dspUnwind;                // Display the unwind info output
+        bool dspDiffable;     // Makes the Jit Dump 'diff-able' (currently uses same COMPlus_* flag as disDiffable)
+        bool compLongAddress; // Force using large pseudo instructions for long address
+                              // (IF_LARGEJMP/IF_LARGEADR/IF_LARGLDC)
+        bool dspGCtbls;       // Display the GC tables
+#endif
+
+#ifdef LATE_DISASM
+        bool doLateDisasm; // Run the late disassembler
+#endif                     // LATE_DISASM
+
+#if DUMP_GC_TABLES && !defined(DEBUG) && defined(JIT32_GCENCODER)
+// Only the JIT32_GCENCODER implements GC dumping in non-DEBUG code.
+#pragma message("NOTE: this non-debug build has GC ptr table dumping always enabled!")
+        static const bool dspGCtbls = true;
+#endif
+
+        // We need stack probes to guarantee that we won't trigger a stack overflow
+        // when calling unmanaged code until they get a chance to set up a frame, because
+        // the EE will have no idea where it is.
+        //
+        // We will only be doing this currently for hosted environments. Unfortunately
+        // we need to take care of stubs, so potentially, we will have to do the probes
+        // for any call. We have a plan for not needing for stubs though
+        bool compNeedStackProbes;
+
+        // Whether to emit Enter/Leave/TailCall hooks using a dummy stub (DummyProfilerELTStub())
+        // This options helps one to make JIT behave as if it is under profiler.
+        bool compJitELTHookEnabled;
+
+#if FEATURE_TAILCALL_OPT
+        // Whether opportunistic or implicit tail call optimization is enabled.
+        bool compTailCallOpt;
+        // Whether optimization of transforming a recursive tail call into a loop is enabled.
+        bool compTailCallLoopOpt;
+#endif
+
+#ifdef ARM_SOFTFP
+        static const bool compUseSoftFP = true;
+#else // !ARM_SOFTFP
+        static const bool compUseSoftFP = false;
+#endif
+
+        GCPollType compGCPollType;
+    } opts;
+
+#ifdef ALT_JIT
+    static bool                s_pAltJitExcludeAssembliesListInitialized;
+    static AssemblyNamesList2* s_pAltJitExcludeAssembliesList;
+#endif // ALT_JIT
+
+#ifdef DEBUG
+
+    static bool s_dspMemStats; // Display per-phase memory statistics for every function
+
+    template <typename T>
+    T dspPtr(T p)
+    {
+        return (p == ZERO) ? ZERO : (opts.dspDiffable ? T(0xD1FFAB1E) : p);
+    }
+
+    template <typename T>
+    T dspOffset(T o)
+    {
+        return (o == ZERO) ? ZERO : (opts.dspDiffable ? T(0xD1FFAB1E) : o);
+    }
+
+    static int dspTreeID(GenTree* tree)
+    {
+        return tree->gtTreeID;
+    }
+    static void printTreeID(GenTree* tree)
+    {
+        if (tree == nullptr)
+        {
+            printf("[------]");
+        }
+        else
+        {
+            printf("[%06d]", dspTreeID(tree));
+        }
+    }
+
+#endif // DEBUG
+
+// clang-format off
+#define STRESS_MODES                                                                            \
+                                                                                                \
+        STRESS_MODE(NONE)                                                                       \
+                                                                                                \
+        /* "Variations" stress areas which we try to mix up with each other. */                 \
+        /* These should not be exhaustively used as they might */                               \
+        /* hide/trivialize other areas */                                                       \
+                                                                                                \
+        STRESS_MODE(REGS) STRESS_MODE(DBL_ALN) STRESS_MODE(LCL_FLDS) STRESS_MODE(UNROLL_LOOPS)  \
+        STRESS_MODE(MAKE_CSE) STRESS_MODE(LEGACY_INLINE) STRESS_MODE(CLONE_EXPR)                \
+        STRESS_MODE(USE_FCOMI) STRESS_MODE(USE_CMOV) STRESS_MODE(FOLD)                          \
+        STRESS_MODE(BB_PROFILE) STRESS_MODE(OPT_BOOLS_GC) STRESS_MODE(REMORPH_TREES)            \
+        STRESS_MODE(64RSLT_MUL) STRESS_MODE(DO_WHILE_LOOPS) STRESS_MODE(MIN_OPTS)               \
+        STRESS_MODE(REVERSE_FLAG)     /* Will set GTF_REVERSE_OPS whenever we can */            \
+        STRESS_MODE(REVERSE_COMMA)    /* Will reverse commas created  with gtNewCommaNode */    \
+        STRESS_MODE(TAILCALL)         /* Will make the call as a tailcall whenever legal */     \
+        STRESS_MODE(CATCH_ARG)        /* Will spill catch arg */                                \
+        STRESS_MODE(UNSAFE_BUFFER_CHECKS)                                                       \
+        STRESS_MODE(NULL_OBJECT_CHECK)                                                          \
+        STRESS_MODE(PINVOKE_RESTORE_ESP)                                                        \
+        STRESS_MODE(RANDOM_INLINE)                                                              \
+                                                                                                \
+        STRESS_MODE(GENERIC_VARN) STRESS_MODE(COUNT_VARN)                                       \
+                                                                                                \
+        /* "Check" stress areas that can be exhaustively used if we */                          \
+        /*  dont care about performance at all */                                               \
+                                                                                                \
+        STRESS_MODE(FORCE_INLINE) /* Treat every method as AggressiveInlining */                \
+        STRESS_MODE(CHK_FLOW_UPDATE)                                                            \
+        STRESS_MODE(EMITTER) STRESS_MODE(CHK_REIMPORT) STRESS_MODE(FLATFP)                      \
+                                                                                                \
+        STRESS_MODE(GENERIC_CHECK) STRESS_MODE(COUNT)                                           \
+
+    enum                compStressArea
+    {
+#define STRESS_MODE(mode) STRESS_##mode,
+        STRESS_MODES
+#undef STRESS_MODE
+    };
+// clang-format on
+
+#ifdef DEBUG
+    static const LPCWSTR s_compStressModeNames[STRESS_COUNT + 1];
+    BYTE                 compActiveStressModes[STRESS_COUNT];
+#endif // DEBUG
+
+#define MAX_STRESS_WEIGHT 100
+
+    bool compStressCompile(compStressArea stressArea, unsigned weightPercentage);
+
+#ifdef DEBUG
+
+    bool compInlineStress()
+    {
+        return compStressCompile(STRESS_LEGACY_INLINE, 50);
+    }
+
+    bool compRandomInlineStress()
+    {
+        return compStressCompile(STRESS_RANDOM_INLINE, 50);
+    }
+
+#endif // DEBUG
+
+    bool compTailCallStress()
+    {
+#ifdef DEBUG
+        return (JitConfig.TailcallStress() != 0 || compStressCompile(STRESS_TAILCALL, 5));
+#else
+        return false;
+#endif
+    }
+
+    codeOptimize compCodeOpt()
+    {
+#if 0
+        // Switching between size & speed has measurable throughput impact 
+        // (3.5% on NGen mscorlib when measured). It used to be enabled for 
+        // DEBUG, but should generate identical code between CHK & RET builds,
+        // so that's not acceptable.
+        // TODO-Throughput: Figure out what to do about size vs. speed & throughput.
+        //                  Investigate the cause of the throughput regression.
+
+        return opts.compCodeOpt;
+#else
+        return BLENDED_CODE;
+#endif
+    }
+
+#ifdef DEBUG
+    CLRRandom* inlRNG;
+#endif
+
+    //--------------------- Info about the procedure --------------------------
+
+    struct Info
+    {
+        COMP_HANDLE           compCompHnd;
+        CORINFO_MODULE_HANDLE compScopeHnd;
+        CORINFO_CLASS_HANDLE  compClassHnd;
+        CORINFO_METHOD_HANDLE compMethodHnd;
+        CORINFO_METHOD_INFO*  compMethodInfo;
+
+        BOOL hasCircularClassConstraints;
+        BOOL hasCircularMethodConstraints;
+
+#if defined(DEBUG) || defined(LATE_DISASM)
+        const char* compMethodName;
+        const char* compClassName;
+        const char* compFullName;
+#endif // defined(DEBUG) || defined(LATE_DISASM)
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+        // Method hash is logcally const, but computed
+        // on first demand.
+        mutable unsigned compMethodHashPrivate;
+        unsigned         compMethodHash() const;
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+#ifdef PSEUDORANDOM_NOP_INSERTION
+        // things for pseudorandom nop insertion
+        unsigned  compChecksum;
+        CLRRandom compRNG;
+#endif
+
+        // The following holds the FLG_xxxx flags for the method we're compiling.
+        unsigned compFlags;
+
+        // The following holds the class attributes for the method we're compiling.
+        unsigned compClassAttr;
+
+        const BYTE*    compCode;
+        IL_OFFSET      compILCodeSize;     // The IL code size
+        UNATIVE_OFFSET compNativeCodeSize; // The native code size, after instructions are issued. This
+                                           // is less than (compTotalHotCodeSize + compTotalColdCodeSize) only if:
+        // (1) the code is not hot/cold split, and we issued less code than we expected, or
+        // (2) the code is hot/cold split, and we issued less code than we expected
+        // in the cold section (the hot section will always be padded out to compTotalHotCodeSize).
+
+        bool compIsStatic : 1;         // Is the method static (no 'this' pointer)?
+        bool compIsVarArgs : 1;        // Does the method have varargs parameters?
+        bool compIsContextful : 1;     // contextful method
+        bool compInitMem : 1;          // Is the CORINFO_OPT_INIT_LOCALS bit set in the method info options?
+        bool compUnwrapContextful : 1; // JIT should unwrap proxies when possible
+        bool compProfilerCallback : 1; // JIT inserted a profiler Enter callback
+        bool compPublishStubParam : 1; // EAX captured in prolog will be available through an instrinsic
+        bool compRetBuffDefStack : 1;  // The ret buff argument definitely points into the stack.
+
+        var_types compRetType;       // Return type of the method as declared in IL
+        var_types compRetNativeType; // Normalized return type as per target arch ABI
+        unsigned  compILargsCount;   // Number of arguments (incl. implicit but not hidden)
+        unsigned  compArgsCount;     // Number of arguments (incl. implicit and     hidden)
+        unsigned  compRetBuffArg;    // position of hidden return param var (0, 1) (BAD_VAR_NUM means not present);
+        int compTypeCtxtArg; // position of hidden param for type context for generic code (CORINFO_CALLCONV_PARAMTYPE)
+        unsigned       compThisArg; // position of implicit this pointer param (not to be confused with lvaArg0Var)
+        unsigned       compILlocalsCount; // Number of vars : args + locals (incl. implicit but not hidden)
+        unsigned       compLocalsCount;   // Number of vars : args + locals (incl. implicit and     hidden)
+        unsigned       compMaxStack;
+        UNATIVE_OFFSET compTotalHotCodeSize;  // Total number of bytes of Hot Code in the method
+        UNATIVE_OFFSET compTotalColdCodeSize; // Total number of bytes of Cold Code in the method
+
+        unsigned compCallUnmanaged;   // count of unmanaged calls
+        unsigned compLvFrameListRoot; // lclNum for the Frame root
+        unsigned compXcptnsCount;     // Number of exception-handling clauses read in the method's IL.
+                                      // You should generally use compHndBBtabCount instead: it is the
+                                      // current number of EH clauses (after additions like synchronized
+                                      // methods and funclets, and removals like unreachable code deletion).
+
+        bool compMatchedVM; // true if the VM is "matched": either the JIT is a cross-compiler
+                            // and the VM expects that, or the JIT is a "self-host" compiler
+                            // (e.g., x86 hosted targeting x86) and the VM expects that.
+
+#if defined(DEBUGGING_SUPPORT) || defined(DEBUG)
+
+        /*  The following holds IL scope information about local variables.
+         */
+
+        unsigned     compVarScopesCount;
+        VarScopeDsc* compVarScopes;
+
+        /* The following holds information about instr offsets for
+         * which we need to report IP-mappings
+         */
+
+        IL_OFFSET*                   compStmtOffsets; // sorted
+        unsigned                     compStmtOffsetsCount;
+        ICorDebugInfo::BoundaryTypes compStmtOffsetsImplicit;
+
+#endif // DEBUGGING_SUPPORT || DEBUG
+
+#define CPU_X86 0x0100 // The generic X86 CPU
+#define CPU_X86_PENTIUM_4 0x0110
+
+#define CPU_X64 0x0200       // The generic x64 CPU
+#define CPU_AMD_X64 0x0210   // AMD x64 CPU
+#define CPU_INTEL_X64 0x0240 // Intel x64 CPU
+
+#define CPU_ARM 0x0300 // The generic ARM CPU
+
+        unsigned genCPU; // What CPU are we running on
+    } info;
+
+    // Returns true if the method being compiled returns a non-void and non-struct value.
+    // Note that lvaInitTypeRef() normalizes compRetNativeType for struct returns in a
+    // single register as per target arch ABI (e.g on Amd64 Windows structs of size 1, 2,
+    // 4 or 8 gets normalized to TYP_BYTE/TYP_SHORT/TYP_INT/TYP_LONG; On Arm HFA structs).
+    // Methods returning such structs are considered to return non-struct return value and
+    // this method returns true in that case.
+    bool compMethodReturnsNativeScalarType()
+    {
+        return (info.compRetType != TYP_VOID) && !varTypeIsStruct(info.compRetNativeType);
+    }
+
+    // Returns true if the method being compiled returns RetBuf addr as its return value
+    bool compMethodReturnsRetBufAddr()
+    {
+        // There are cases where implicit RetBuf argument should be explicitly returned in a register.
+        // In such cases the return type is changed to TYP_BYREF and appropriate IR is generated.
+        // These cases are:
+        // 1. Profiler Leave calllback expects the address of retbuf as return value for
+        //    methods with hidden RetBuf argument.  impReturnInstruction() when profiler
+        //    callbacks are needed creates GT_RETURN(TYP_BYREF, op1 = Addr of RetBuf) for
+        //    methods with hidden RetBufArg.
+        //
+        // 2. As per the System V ABI, the address of RetBuf needs to be returned by
+        //    methods with hidden RetBufArg in RAX. In such case GT_RETURN is of TYP_BYREF,
+        //    returning the address of RetBuf.
+        //
+        // 3. Windows 64-bit native calling convention also requires the address of RetBuff
+        //    to be returned in RAX.
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_AMD64_
+        return (info.compRetBuffArg != BAD_VAR_NUM);
+#else  // !_TARGET_AMD64_
+        return (compIsProfilerHookNeeded()) && (info.compRetBuffArg != BAD_VAR_NUM);
+#endif // !_TARGET_AMD64_
+    }
+
+    // Returns true if the method returns a value in more than one return register
+    // TODO-ARM-Bug: Deal with multi-register genReturnLocaled structs?
+    // TODO-ARM64: Does this apply for ARM64 too?
+    bool compMethodReturnsMultiRegRetType()
+    {
+#if FEATURE_MULTIREG_RET
+#if defined(_TARGET_X86_)
+        // On x86 only 64-bit longs are returned in multiple registers
+        return varTypeIsLong(info.compRetNativeType);
+#else  // targets: X64-UNIX, ARM64 or ARM32
+        // On all other targets that support multireg return values:
+        // Methods returning a struct in multiple registers have a return value of TYP_STRUCT.
+        // Such method's compRetNativeType is TYP_STRUCT without a hidden RetBufArg
+        return varTypeIsStruct(info.compRetNativeType) && (info.compRetBuffArg == BAD_VAR_NUM);
+#endif // TARGET_XXX
+#else // not FEATURE_MULTIREG_RET
+        // For this architecture there are no multireg returns
+        return false;
+#endif // FEATURE_MULTIREG_RET
+    }
+
+#if FEATURE_MULTIREG_ARGS
+    // Given a GenTree node of TYP_STRUCT that represents a pass by value argument
+    // return the gcPtr layout for the pointers sized fields
+    void getStructGcPtrsFromOp(GenTreePtr op, BYTE* gcPtrsOut);
+#endif // FEATURE_MULTIREG_ARGS
+
+    // Returns true if the method being compiled returns a value
+    bool compMethodHasRetVal()
+    {
+        return compMethodReturnsNativeScalarType() || compMethodReturnsRetBufAddr() ||
+               compMethodReturnsMultiRegRetType();
+    }
+
+#if defined(DEBUG)
+
+    void compDispLocalVars();
+
+#endif // DEBUGGING_SUPPORT || DEBUG
+
+//-------------------------- Global Compiler Data ------------------------------------
+
+#ifdef DEBUG
+    static unsigned s_compMethodsCount; // to produce unique label names
+    unsigned        compGenTreeID;
+#endif
+
+    BasicBlock* compCurBB;   // the current basic block in process
+    GenTreePtr  compCurStmt; // the current statement in process
+#ifdef DEBUG
+    unsigned compCurStmtNum; // to give all statements an increasing StmtNum when printing dumps
+#endif
+
+    //  The following is used to create the 'method JIT info' block.
+    size_t compInfoBlkSize;
+    BYTE*  compInfoBlkAddr;
+
+    EHblkDsc* compHndBBtab;           // array of EH data
+    unsigned  compHndBBtabCount;      // element count of used elements in EH data array
+    unsigned  compHndBBtabAllocCount; // element count of allocated elements in EH data array
+
+#if defined(_TARGET_X86_)
+
+    //-------------------------------------------------------------------------
+    //  Tracking of region covered by the monitor in synchronized methods
+    void* syncStartEmitCookie; // the emitter cookie for first instruction after the call to MON_ENTER
+    void* syncEndEmitCookie;   // the emitter cookie for first instruction after the call to MON_EXIT
+
+#endif // !_TARGET_X86_
+
+    Phases previousCompletedPhase; // the most recently completed phase
+
+    //-------------------------------------------------------------------------
+    //  The following keeps track of how many bytes of local frame space we've
+    //  grabbed so far in the current function, and how many argument bytes we
+    //  need to pop when we return.
+    //
+
+    unsigned compLclFrameSize; // secObject+lclBlk+locals+temps
+
+    // Count of callee-saved regs we pushed in the prolog.
+    // Does not include EBP for isFramePointerUsed() and double-aligned frames.
+    // In case of Amd64 this doesn't include float regs saved on stack.
+    unsigned compCalleeRegsPushed;
+
+#if defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
+    // Mask of callee saved float regs on stack.
+    regMaskTP compCalleeFPRegsSavedMask;
+#endif
+#ifdef _TARGET_AMD64_
+// Quirk for VS debug-launch scenario to work:
+// Bytes of padding between save-reg area and locals.
+#define VSQUIRK_STACK_PAD (2 * REGSIZE_BYTES)
+    unsigned compVSQuirkStackPaddingNeeded;
+    bool     compQuirkForPPPflag;
+#endif
+
+    unsigned compArgSize; // total size of arguments in bytes (including register args (lvIsRegArg))
+
+    unsigned compMapILargNum(unsigned ILargNum); // map accounting for hidden args
+    unsigned compMapILvarNum(unsigned ILvarNum); // map accounting for hidden args
+    unsigned compMap2ILvarNum(unsigned varNum);  // map accounting for hidden args
+
+    //-------------------------------------------------------------------------
+
+    static void compStartup();  // One-time initialization
+    static void compShutdown(); // One-time finalization
+
+    void compInit(ArenaAllocator* pAlloc, InlineInfo* inlineInfo);
+    void compDone();
+
+    static void compDisplayStaticSizes(FILE* fout);
+
+    //------------ Some utility functions --------------
+
+    void* compGetHelperFtn(CorInfoHelpFunc ftnNum,         /* IN  */
+                           void**          ppIndirection); /* OUT */
+
+    // Several JIT/EE interface functions return a CorInfoType, and also return a
+    // class handle as an out parameter if the type is a value class.  Returns the
+    // size of the type these describe.
+    unsigned compGetTypeSize(CorInfoType cit, CORINFO_CLASS_HANDLE clsHnd);
+
+#ifdef DEBUG
+    // Components used by the compiler may write unit test suites, and
+    // have them run within this method.  They will be run only once per process, and only
+    // in debug.  (Perhaps should be under the control of a COMPlus_ flag.)
+    // These should fail by asserting.
+    void compDoComponentUnitTestsOnce();
+#endif // DEBUG
+
+    int compCompile(CORINFO_METHOD_HANDLE methodHnd,
+                    CORINFO_MODULE_HANDLE classPtr,
+                    COMP_HANDLE           compHnd,
+                    CORINFO_METHOD_INFO*  methodInfo,
+                    void**                methodCodePtr,
+                    ULONG*                methodCodeSize,
+                    CORJIT_FLAGS*         compileFlags);
+    void compCompileFinish();
+    int compCompileHelper(CORINFO_MODULE_HANDLE            classPtr,
+                          COMP_HANDLE                      compHnd,
+                          CORINFO_METHOD_INFO*             methodInfo,
+                          void**                           methodCodePtr,
+                          ULONG*                           methodCodeSize,
+                          CORJIT_FLAGS*                    compileFlags,
+                          CorInfoInstantiationVerification instVerInfo);
+
+    ArenaAllocator* compGetAllocator();
+
+#if MEASURE_MEM_ALLOC
+    struct MemStats
+    {
+        unsigned allocCnt;                 // # of allocs
+        UINT64   allocSz;                  // total size of those alloc.
+        UINT64   allocSzMax;               // Maximum single allocation.
+        UINT64   allocSzByKind[CMK_Count]; // Classified by "kind".
+        UINT64   nraTotalSizeAlloc;
+        UINT64   nraTotalSizeUsed;
+
+        static const char* s_CompMemKindNames[]; // Names of the kinds.
+
+        MemStats() : allocCnt(0), allocSz(0), allocSzMax(0), nraTotalSizeAlloc(0), nraTotalSizeUsed(0)
+        {
+            for (int i = 0; i < CMK_Count; i++)
+            {
+                allocSzByKind[i] = 0;
+            }
+        }
+        MemStats(const MemStats& ms)
+            : allocCnt(ms.allocCnt)
+            , allocSz(ms.allocSz)
+            , allocSzMax(ms.allocSzMax)
+            , nraTotalSizeAlloc(ms.nraTotalSizeAlloc)
+            , nraTotalSizeUsed(ms.nraTotalSizeUsed)
+        {
+            for (int i = 0; i < CMK_Count; i++)
+            {
+                allocSzByKind[i] = ms.allocSzByKind[i];
+            }
+        }
+
+        // Until we have ubiquitous constructors.
+        void Init()
+        {
+            this->MemStats::MemStats();
+        }
+
+        void AddAlloc(size_t sz, CompMemKind cmk)
+        {
+            allocCnt += 1;
+            allocSz += sz;
+            if (sz > allocSzMax)
+            {
+                allocSzMax = sz;
+            }
+            allocSzByKind[cmk] += sz;
+        }
+
+        void Print(FILE* f);       // Print these stats to f.
+        void PrintByKind(FILE* f); // Do just the by-kind histogram part.
+    };
+    MemStats genMemStats;
+
+    struct AggregateMemStats : public MemStats
+    {
+        unsigned nMethods;
+
+        AggregateMemStats() : MemStats(), nMethods(0)
+        {
+        }
+
+        void Add(const MemStats& ms)
+        {
+            nMethods++;
+            allocCnt += ms.allocCnt;
+            allocSz += ms.allocSz;
+            allocSzMax = max(allocSzMax, ms.allocSzMax);
+            for (int i = 0; i < CMK_Count; i++)
+            {
+                allocSzByKind[i] += ms.allocSzByKind[i];
+            }
+            nraTotalSizeAlloc += ms.nraTotalSizeAlloc;
+            nraTotalSizeUsed += ms.nraTotalSizeUsed;
+        }
+
+        void Print(FILE* f); // Print these stats to jitstdout.
+    };
+
+    static CritSecObject     s_memStatsLock;    // This lock protects the data structures below.
+    static MemStats          s_maxCompMemStats; // Stats for the compilation with the largest amount allocated.
+    static AggregateMemStats s_aggMemStats;     // Aggregates statistics for all compilations.
+
+#endif // MEASURE_MEM_ALLOC
+
+#if LOOP_HOIST_STATS
+    unsigned m_loopsConsidered;
+    bool     m_curLoopHasHoistedExpression;
+    unsigned m_loopsWithHoistedExpressions;
+    unsigned m_totalHoistedExpressions;
+
+    void AddLoopHoistStats();
+    void PrintPerMethodLoopHoistStats();
+
+    static CritSecObject s_loopHoistStatsLock; // This lock protects the data structures below.
+    static unsigned      s_loopsConsidered;
+    static unsigned      s_loopsWithHoistedExpressions;
+    static unsigned      s_totalHoistedExpressions;
+
+    static void PrintAggregateLoopHoistStats(FILE* f);
+#endif // LOOP_HOIST_STATS
+
+    void* compGetMemArray(size_t numElem, size_t elemSize, CompMemKind cmk = CMK_Unknown);
+    void* compGetMemArrayA(size_t numElem, size_t elemSize, CompMemKind cmk = CMK_Unknown);
+    void* compGetMem(size_t sz, CompMemKind cmk = CMK_Unknown);
+    void* compGetMemA(size_t sz, CompMemKind cmk = CMK_Unknown);
+    static void* compGetMemCallback(void*, size_t, CompMemKind cmk = CMK_Unknown);
+    void compFreeMem(void*);
+
+    bool compIsForImportOnly();
+    bool compIsForInlining();
+    bool compDonotInline();
+
+#ifdef DEBUG
+    const char* compLocalVarName(unsigned varNum, unsigned offs);
+    VarName compVarName(regNumber reg, bool isFloatReg = false);
+    const char* compRegVarName(regNumber reg, bool displayVar = false, bool isFloatReg = false);
+    const char* compRegPairName(regPairNo regPair);
+    const char* compRegNameForSize(regNumber reg, size_t size);
+    const char* compFPregVarName(unsigned fpReg, bool displayVar = false);
+    void compDspSrcLinesByNativeIP(UNATIVE_OFFSET curIP);
+    void compDspSrcLinesByLineNum(unsigned line, bool seek = false);
+#endif // DEBUG
+
+//-------------------------------------------------------------------------
+
+#ifdef DEBUGGING_SUPPORT
+    typedef ListNode<VarScopeDsc*> VarScopeListNode;
+
+    struct VarScopeMapInfo
+    {
+        VarScopeListNode*       head;
+        VarScopeListNode*       tail;
+        static VarScopeMapInfo* Create(VarScopeListNode* node, IAllocator* alloc)
+        {
+            VarScopeMapInfo* info = new (alloc) VarScopeMapInfo;
+            info->head            = node;
+            info->tail            = node;
+            return info;
+        }
+    };
+
+    // Max value of scope count for which we would use linear search; for larger values we would use hashtable lookup.
+    static const unsigned MAX_LINEAR_FIND_LCL_SCOPELIST = 32;
+
+    typedef SimplerHashTable<unsigned, SmallPrimitiveKeyFuncs<unsigned>, VarScopeMapInfo*, JitSimplerHashBehavior>
+        VarNumToScopeDscMap;
+
+    // Map to keep variables' scope indexed by varNum containing it's scope dscs at the index.
+    VarNumToScopeDscMap* compVarScopeMap;
+
+    VarScopeDsc* compFindLocalVar(unsigned varNum, unsigned lifeBeg, unsigned lifeEnd);
+
+    VarScopeDsc* compFindLocalVar(unsigned varNum, unsigned offs);
+
+    VarScopeDsc* compFindLocalVarLinear(unsigned varNum, unsigned offs);
+
+    void compInitVarScopeMap();
+
+    VarScopeDsc** compEnterScopeList; // List has the offsets where variables
+                                      // enter scope, sorted by instr offset
+    unsigned compNextEnterScope;
+
+    VarScopeDsc** compExitScopeList; // List has the offsets where variables
+                                     // go out of scope, sorted by instr offset
+    unsigned compNextExitScope;
+
+    void compInitScopeLists();
+
+    void compResetScopeLists();
+
+    VarScopeDsc* compGetNextEnterScope(unsigned offs, bool scan = false);
+
+    VarScopeDsc* compGetNextExitScope(unsigned offs, bool scan = false);
+
+    void compProcessScopesUntil(unsigned   offset,
+                                VARSET_TP* inScope,
+                                void (Compiler::*enterScopeFn)(VARSET_TP* inScope, VarScopeDsc*),
+                                void (Compiler::*exitScopeFn)(VARSET_TP* inScope, VarScopeDsc*));
+
+#ifdef DEBUG
+    void compDispScopeLists();
+#endif // DEBUG
+
+#endif // DEBUGGING_SUPPORT
+
+    bool compIsProfilerHookNeeded();
+
+    //-------------------------------------------------------------------------
+    /*               Statistical Data Gathering                               */
+
+    void compJitStats(); // call this function and enable
+                         // various ifdef's below for statistical data
+
+#if CALL_ARG_STATS
+    void        compCallArgStats();
+    static void compDispCallArgStats(FILE* fout);
+#endif
+
+    //-------------------------------------------------------------------------
+
+protected:
+#ifdef DEBUG
+    bool skipMethod();
+#endif
+
+    ArenaAllocator* compAllocator;
+
+public:
+    // This one presents an implementation of the "IAllocator" abstract class that uses "compAllocator",
+    // suitable for use by utilcode collection types.
+    IAllocator* compAsIAllocator;
+
+#if MEASURE_MEM_ALLOC
+    IAllocator* compAsIAllocatorBitset;    // An allocator that uses the CMK_bitset tracker.
+    IAllocator* compAsIAllocatorGC;        // An allocator that uses the CMK_GC tracker.
+    IAllocator* compAsIAllocatorLoopHoist; // An allocator that uses the CMK_LoopHoist tracker.
+#ifdef DEBUG
+    IAllocator* compAsIAllocatorDebugOnly; // An allocator that uses the CMK_DebugOnly tracker.
+#endif                                     // DEBUG
+#endif                                     // MEASURE_MEM_ALLOC
+
+    void compFunctionTraceStart();
+    void compFunctionTraceEnd(void* methodCodePtr, ULONG methodCodeSize, bool isNYI);
+
+protected:
+    size_t compMaxUncheckedOffsetForNullObject;
+
+    void compInitOptions(CORJIT_FLAGS* compileFlags);
+
+    void compSetProcessor();
+    void compInitDebuggingInfo();
+    void compSetOptimizationLevel();
+#ifdef _TARGET_ARMARCH_
+    bool compRsvdRegCheck(FrameLayoutState curState);
+#endif
+    void compCompile(void** methodCodePtr, ULONG* methodCodeSize, CORJIT_FLAGS* compileFlags);
+
+    // Data required for generating profiler Enter/Leave/TailCall hooks
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef PROFILING_SUPPORTED
+    bool  compProfilerHookNeeded; // Whether profiler Enter/Leave/TailCall hook needs to be generated for the method
+    void* compProfilerMethHnd;    // Profiler handle of the method being compiled. Passed as param to ELT callbacks
+    bool  compProfilerMethHndIndirected; // Whether compProfilerHandle is pointer to the handle or is an actual handle
+#endif
+#ifdef _TARGET_AMD64_
+    bool compQuirkForPPP(); // Check if this method should be Quirked for the PPP issue
+#endif
+public:
+    // Assumes called as part of process shutdown; does any compiler-specific work associated with that.
+    static void ProcessShutdownWork(ICorStaticInfo* statInfo);
+
+    IAllocator* getAllocator()
+    {
+        return compAsIAllocator;
+    }
+
+#if MEASURE_MEM_ALLOC
+    IAllocator* getAllocatorBitset()
+    {
+        return compAsIAllocatorBitset;
+    }
+    IAllocator* getAllocatorGC()
+    {
+        return compAsIAllocatorGC;
+    }
+    IAllocator* getAllocatorLoopHoist()
+    {
+        return compAsIAllocatorLoopHoist;
+    }
+#else  // !MEASURE_MEM_ALLOC
+    IAllocator* getAllocatorBitset()
+    {
+        return compAsIAllocator;
+    }
+    IAllocator* getAllocatorGC()
+    {
+        return compAsIAllocator;
+    }
+    IAllocator* getAllocatorLoopHoist()
+    {
+        return compAsIAllocator;
+    }
+#endif // !MEASURE_MEM_ALLOC
+
+#ifdef DEBUG
+    IAllocator* getAllocatorDebugOnly()
+    {
+#if MEASURE_MEM_ALLOC
+        return compAsIAllocatorDebugOnly;
+#else  // !MEASURE_MEM_ALLOC
+        return compAsIAllocator;
+#endif // !MEASURE_MEM_ALLOC
+    }
+#endif // DEBUG
+
+    /*
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XX                                                                           XX
+    XX                           typeInfo                                        XX
+    XX                                                                           XX
+    XX   Checks for type compatibility and merges types                          XX
+    XX                                                                           XX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    */
+
+public:
+    // Set to TRUE if verification cannot be skipped for this method
+    // If we detect unverifiable code, we will lazily check
+    // canSkipMethodVerification() to see if verification is REALLY needed.
+    BOOL tiVerificationNeeded;
+
+    // It it initially TRUE, and it gets set to FALSE if we run into unverifiable code
+    // Note that this is valid only if tiVerificationNeeded was ever TRUE.
+    BOOL tiIsVerifiableCode;
+
+    // Set to TRUE if runtime callout is needed for this method
+    BOOL tiRuntimeCalloutNeeded;
+
+    // Set to TRUE if security prolog/epilog callout is needed for this method
+    // Note: This flag is different than compNeedSecurityCheck.
+    //     compNeedSecurityCheck means whether or not a security object needs
+    //         to be allocated on the stack, which is currently true for EnC as well.
+    //     tiSecurityCalloutNeeded means whether or not security callouts need
+    //         to be inserted in the jitted code.
+    BOOL tiSecurityCalloutNeeded;
+
+    // Returns TRUE if child is equal to or a subtype of parent for merge purposes
+    // This support is necessary to suport attributes that are not described in
+    // for example, signatures. For example, the permanent home byref (byref that
+    // points to the gc heap), isn't a property of method signatures, therefore,
+    // it is safe to have mismatches here (that tiCompatibleWith will not flag),
+    // but when deciding if we need to reimport a block, we need to take these
+    // in account
+    BOOL tiMergeCompatibleWith(const typeInfo& pChild, const typeInfo& pParent, bool normalisedForStack) const;
+
+    // Returns TRUE if child is equal to or a subtype of parent.
+    // normalisedForStack indicates that both types are normalised for the stack
+    BOOL tiCompatibleWith(const typeInfo& pChild, const typeInfo& pParent, bool normalisedForStack) const;
+
+    // Merges pDest and pSrc. Returns FALSE if merge is undefined.
+    // *pDest is modified to represent the merged type.  Sets "*changed" to true
+    // if this changes "*pDest".
+    BOOL tiMergeToCommonParent(typeInfo* pDest, const typeInfo* pSrc, bool* changed) const;
+
+    // Set pDest from the primitive value type.
+    // Eg. System.Int32 -> ELEMENT_TYPE_I4
+
+    BOOL tiFromPrimitiveValueClass(typeInfo* pDest, const typeInfo* pVC) const;
+
+#ifdef DEBUG
+    // <BUGNUM> VSW 471305
+    // IJW allows assigning REF to BYREF. The following allows us to temporarily
+    // bypass the assert check in gcMarkRegSetGCref and gcMarkRegSetByref
+    // We use a "short" as we need to push/pop this scope.
+    // </BUGNUM>
+    short compRegSetCheckLevel;
+#endif
+
+    /*
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XX                                                                           XX
+    XX                           IL verification stuff                           XX
+    XX                                                                           XX
+    XX                                                                           XX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    */
+
+public:
+    // The following is used to track liveness of local variables, initialization
+    // of valueclass constructors, and type safe use of IL instructions.
+
+    // dynamic state info needed for verification
+    EntryState verCurrentState;
+
+    // this ptr of object type .ctors are considered intited only after
+    // the base class ctor is called, or an alternate ctor is called.
+    // An uninited this ptr can be used to access fields, but cannot
+    // be used to call a member function.
+    BOOL verTrackObjCtorInitState;
+
+    void verInitBBEntryState(BasicBlock* block, EntryState* currentState);
+
+    // Requires that "tis" is not TIS_Bottom -- it's a definite init/uninit state.
+    void verSetThisInit(BasicBlock* block, ThisInitState tis);
+    void verInitCurrentState();
+    void verResetCurrentState(BasicBlock* block, EntryState* currentState);
+
+    // Merges the current verification state into the entry state of "block", return FALSE if that merge fails,
+    // TRUE if it succeeds.  Further sets "*changed" to true if this changes the entry state of "block".
+    BOOL verMergeEntryStates(BasicBlock* block, bool* changed);
+
+    void verConvertBBToThrowVerificationException(BasicBlock* block DEBUGARG(bool logMsg));
+    void verHandleVerificationFailure(BasicBlock* block DEBUGARG(bool logMsg));
+    typeInfo verMakeTypeInfo(CORINFO_CLASS_HANDLE clsHnd,
+                             bool bashStructToRef = false); // converts from jit type representation to typeInfo
+    typeInfo verMakeTypeInfo(CorInfoType          ciType,
+                             CORINFO_CLASS_HANDLE clsHnd); // converts from jit type representation to typeInfo
+    BOOL verIsSDArray(typeInfo ti);
+    typeInfo verGetArrayElemType(typeInfo ti);
+
+    typeInfo verParseArgSigToTypeInfo(CORINFO_SIG_INFO* sig, CORINFO_ARG_LIST_HANDLE args);
+    BOOL verNeedsVerification();
+    BOOL verIsByRefLike(const typeInfo& ti);
+    BOOL verIsSafeToReturnByRef(const typeInfo& ti);
+
+    // generic type variables range over types that satisfy IsBoxable
+    BOOL verIsBoxable(const typeInfo& ti);
+
+    void DECLSPEC_NORETURN verRaiseVerifyException(INDEBUG(const char* reason) DEBUGARG(const char* file)
+                                                       DEBUGARG(unsigned line));
+    void verRaiseVerifyExceptionIfNeeded(INDEBUG(const char* reason) DEBUGARG(const char* file)
+                                             DEBUGARG(unsigned line));
+    bool verCheckTailCallConstraint(OPCODE                  opcode,
+                                    CORINFO_RESOLVED_TOKEN* pResolvedToken,
+                                    CORINFO_RESOLVED_TOKEN* pConstrainedResolvedToken, // Is this a "constrained." call
+                                                                                       // on a type parameter?
+                                    bool speculative // If true, won't throw if verificatoin fails. Instead it will
+                                                     // return false to the caller.
+                                                     // If false, it will throw.
+                                    );
+    bool verIsBoxedValueType(typeInfo ti);
+
+    void verVerifyCall(OPCODE                  opcode,
+                       CORINFO_RESOLVED_TOKEN* pResolvedToken,
+                       CORINFO_RESOLVED_TOKEN* pConstrainedResolvedToken,
+                       bool                    tailCall,
+                       bool                    readonlyCall, // is this a "readonly." call?
+                       const BYTE*             delegateCreateStart,
+                       const BYTE*             codeAddr,
+                       CORINFO_CALL_INFO* callInfo DEBUGARG(const char* methodName));
+
+    BOOL verCheckDelegateCreation(const BYTE* delegateCreateStart, const BYTE* codeAddr, mdMemberRef& targetMemberRef);
+
+    typeInfo verVerifySTIND(const typeInfo& ptr, const typeInfo& value, const typeInfo& instrType);
+    typeInfo verVerifyLDIND(const typeInfo& ptr, const typeInfo& instrType);
+    void verVerifyField(CORINFO_RESOLVED_TOKEN*   pResolvedToken,
+                        const CORINFO_FIELD_INFO& fieldInfo,
+                        const typeInfo*           tiThis,
+                        BOOL                      mutator,
+                        BOOL                      allowPlainStructAsThis = FALSE);
+    void verVerifyCond(const typeInfo& tiOp1, const typeInfo& tiOp2, unsigned opcode);
+    void verVerifyThisPtrInitialised();
+    BOOL verIsCallToInitThisPtr(CORINFO_CLASS_HANDLE context, CORINFO_CLASS_HANDLE target);
+
+    // Register allocator
+    void raInitStackFP();
+    void raEnregisterVarsPrePassStackFP();
+    void raSetRegLclBirthDeath(GenTreePtr tree, VARSET_VALARG_TP lastlife, bool fromLDOBJ);
+    void raEnregisterVarsPostPassStackFP();
+    void raGenerateFPRefCounts();
+    void raEnregisterVarsStackFP();
+    void raUpdateHeightsForVarsStackFP(VARSET_VALARG_TP mask);
+
+    regNumber raRegForVarStackFP(unsigned varTrackedIndex);
+    void raAddPayloadStackFP(VARSET_VALARG_TP mask, unsigned weight);
+
+    // returns true if enregistering v1 would save more mem accesses than v2
+    bool raVarIsGreaterValueStackFP(LclVarDsc* lv1, LclVarDsc* lv2);
+
+#ifdef DEBUG
+    void raDumpHeightsStackFP();
+    void raDumpVariableRegIntfFloat();
+#endif
+
+#if FEATURE_STACK_FP_X87
+
+    // Currently, we use FP transition blocks in only 2 situations:
+    //
+    //      -conditional jump on longs where FP stack differs with target: it's not strictly
+    //       necessary, but its low frequency and the code would get complicated if we try to
+    //       inline the FP stack adjustment, as we have a lot of special casing going on to try
+    //       minimize the way we generate the jump code.
+    //      -case statements of switch where the FP stack differs with the one of evaluating the switch () statement
+    //       We do this as we want to codegen switch as a jumptable. Again, this is low frequency.
+    //
+    //      However, transition blocks have 2 problems
+    //
+    //          - Procedure splitting: current implementation of procedure splitting requires all basic blocks to
+    //            be known at codegen time, as it generates all hot blocks first and cold blocks later. This ties
+    //            us up in codegen and is a solvable problem (we could make procedure splitting generate blocks
+    //            in the right place without preordering them), this causes us to have to generate the transition
+    //            blocks in the cold area if we want procedure splitting.
+    //
+    //
+    //          - Thread abort exceptions and transition blocks. Transition blocks were designed under the assumption
+    //            that no exceptions can happen inside them. Unfortunately Thread.Abort can happen in any instruction,
+    //            and if we have handlers we will have to try to call them. Fixing this the right way would imply
+    //            having multiple try native code regions for a single try il region. This is doable and shouldnt be
+    //            a big change in the exception.
+    //
+    //      Given the low frequency of the cases where we have transition blocks, I've decided to dumb down
+    //      optimizations. For these 2 cases:
+    //
+    //          - When there is a chance that we will have FP transition blocks, we won't do procedure splitting.
+    //          - When a method has a handler, it won't enregister any FP variables that go thru a conditional long or
+    //          a switch statement.
+    //
+    //      If at any point we find we need to optimize this, we should throw work at unblocking the restrictions our
+    //      current procedure splitting and exception code have.
+    bool compMayHaveTransitionBlocks;
+
+    VARSET_TP raMaskDontEnregFloat; // mask for additional restrictions
+
+    VARSET_TP raLclRegIntfFloat[REG_FPCOUNT];
+
+    unsigned raCntStkStackFP;
+    unsigned raCntWtdStkDblStackFP;
+    unsigned raCntStkParamDblStackFP;
+
+    // Payload in mem accesses for enregistering a variable (we dont want to mix with refcounts)
+    // TODO: Do we want to put this in LclVarDsc?
+    unsigned raPayloadStackFP[lclMAX_TRACKED];
+    unsigned raHeightsStackFP[lclMAX_TRACKED][FP_VIRTUALREGISTERS + 1];
+#ifdef DEBUG
+    // Useful for debugging
+    unsigned raHeightsNonWeightedStackFP[lclMAX_TRACKED][FP_VIRTUALREGISTERS + 1];
+#endif
+#endif // FEATURE_STACK_FP_X87
+
+#ifdef DEBUG
+    // One line log function. Default level is 0. Increasing it gives you
+    // more log information
+
+    // levels are currently unused: #define JITDUMP(level,...)                     ();
+    void JitLogEE(unsigned level, const char* fmt, ...);
+
+    bool compDebugBreak;
+
+    bool compJitHaltMethod();
+
+#endif
+
+    /*
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XX                                                                           XX
+    XX                   GS Security checks for unsafe buffers                   XX
+    XX                                                                           XX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+    */
+public:
+    struct ShadowParamVarInfo
+    {
+        FixedBitVect* assignGroup; // the closure set of variables whose values depend on each other
+        unsigned      shadowCopy;  // Lcl var num, valid only if not set to NO_SHADOW_COPY
+
+        static bool mayNeedShadowCopy(LclVarDsc* varDsc)
+        {
+#if defined(_TARGET_AMD64_) && !defined(LEGACY_BACKEND)
+            // GS cookie logic to create shadow slots, create trees to copy reg args to shadow
+            // slots and update all trees to refer to shadow slots is done immediately after
+            // fgMorph().  Lsra could potentially mark a param as DoNotEnregister after JIT determines
+            // not to shadow a parameter.  Also, LSRA could potentially spill a param which is passed
+            // in register. Therefore, conservatively all params may need a shadow copy.  Note that
+            // GS cookie logic further checks whether the param is a ptr or an unsafe buffer before
+            // creating a shadow slot even though this routine returns true.
+            //
+            // TODO-AMD64-CQ: Revisit this conservative approach as it could create more shadow slots than
+            // required. There are two cases under which a reg arg could potentially be used from its
+            // home location:
+            //   a) LSRA marks it as DoNotEnregister (see LinearScan::identifyCandidates())
+            //   b) LSRA spills it
+            //
+            // Possible solution to address case (a)
+            //   - The conditions under which LSRA marks a varDsc as DoNotEnregister could be checked
+            //     in this routine.  Note that live out of exception handler is something we may not be
+            //     able to do it here since GS cookie logic is invoked ahead of liveness computation.
+            //     Therefore, for methods with exception handling and need GS cookie check we might have
+            //     to take conservative approach.
+            //
+            // Possible solution to address case (b)
+            //   - Whenver a parameter passed in an argument register needs to be spilled by LSRA, we
+            //     create a new spill temp if the method needs GS cookie check.
+            return varDsc->lvIsParam;
+#else // !(defined(_TARGET_AMD64_) && defined(LEGACY_BACKEND))
+            return varDsc->lvIsParam && !varDsc->lvIsRegArg;
+#endif
+        }
+
+#ifdef DEBUG
+        void Print()
+        {
+            printf("assignGroup [%p]; shadowCopy: [%d];\n", assignGroup, shadowCopy);
+        }
+#endif
+    };
+
+    GSCookie*           gsGlobalSecurityCookieAddr; // Address of global cookie for unsafe buffer checks
+    GSCookie            gsGlobalSecurityCookieVal;  // Value of global cookie if addr is NULL
+    ShadowParamVarInfo* gsShadowVarInfo;            // Table used by shadow param analysis code
+
+    void gsGSChecksInitCookie();   // Grabs cookie variable
+    void gsCopyShadowParams();     // Identify vulnerable params and create dhadow copies
+    bool gsFindVulnerableParams(); // Shadow param analysis code
+    void gsParamsToShadows();      // Insert copy code and replave param uses by shadow
+
+    static fgWalkPreFn gsMarkPtrsAndAssignGroups; // Shadow param analysis tree-walk
+    static fgWalkPreFn gsReplaceShadowParams;     // Shadow param replacement tree-walk
+
+#define DEFAULT_MAX_INLINE_SIZE 100 // Methods with >  DEFAULT_MAX_INLINE_SIZE IL bytes will never be inlined.
+                                    // This can be overwritten by setting complus_JITInlineSize env variable.
+
+#define DEFAULT_MAX_INLINE_DEPTH 20 // Methods at more than this level deep will not be inlined
+
+private:
+#ifdef FEATURE_JIT_METHOD_PERF
+    JitTimer*                  pCompJitTimer;         // Timer data structure (by phases) for current compilation.
+    static CompTimeSummaryInfo s_compJitTimerSummary; // Summary of the Timer information for the whole run.
+
+    static LPCWSTR JitTimeLogCsv();        // Retrieve the file name for CSV from ConfigDWORD.
+    static LPCWSTR compJitTimeLogFilename; // If a log file for JIT time is desired, filename to write it to.
+#endif
+    inline void EndPhase(Phases phase); // Indicate the end of the given phase.
+
+#if defined(DEBUG) || defined(INLINE_DATA) || defined(FEATURE_CLRSQM)
+    // These variables are associated with maintaining SQM data about compile time.
+    unsigned __int64 m_compCyclesAtEndOfInlining; // The thread-virtualized cycle count at the end of the inlining phase
+                                                  // in the current compilation.
+    unsigned __int64 m_compCycles;                // Net cycle count for current compilation
+    DWORD m_compTickCountAtEndOfInlining; // The result of GetTickCount() (# ms since some epoch marker) at the end of
+                                          // the inlining phase in the current compilation.
+#endif                                    // defined(DEBUG) || defined(INLINE_DATA) || defined(FEATURE_CLRSQM)
+
+    // Records the SQM-relevant (cycles and tick count).  Should be called after inlining is complete.
+    // (We do this after inlining because this marks the last point at which the JIT is likely to cause
+    // type-loading and class initialization).
+    void RecordStateAtEndOfInlining();
+    // Assumes being called at the end of compilation.  Update the SQM state.
+    void RecordStateAtEndOfCompilation();
+
+#ifdef FEATURE_CLRSQM
+    // Does anything SQM related necessary at process shutdown time.
+    static void ProcessShutdownSQMWork(ICorStaticInfo* statInfo);
+#endif // FEATURE_CLRSQM
+
+public:
+#if FUNC_INFO_LOGGING
+    static LPCWSTR compJitFuncInfoFilename; // If a log file for per-function information is required, this is the
+                                            // filename to write it to.
+    static FILE* compJitFuncInfoFile;       // And this is the actual FILE* to write to.
+#endif                                      // FUNC_INFO_LOGGING
+
+    Compiler* prevCompiler; // Previous compiler on stack for TLS Compiler* linked list for reentrant compilers.
+
+    // Is the compilation in a full trust context?
+    bool compIsFullTrust();
+
+#ifndef FEATURE_TRACELOGGING
+    // Should we actually fire the noway assert body and the exception handler?
+    bool compShouldThrowOnNoway();
+#else  // FEATURE_TRACELOGGING
+    // Should we actually fire the noway assert body and the exception handler?
+    bool compShouldThrowOnNoway(const char* filename, unsigned line);
+
+    // Telemetry instance to use per method compilation.
+    JitTelemetry compJitTelemetry;
+
+    // Get common parameters that have to be logged with most telemetry data.
+    void compGetTelemetryDefaults(const char** assemblyName,
+                                  const char** scopeName,
+                                  const char** methodName,
+                                  unsigned*    methodHash);
+#endif // !FEATURE_TRACELOGGING
+
+#ifdef DEBUG
+private:
+    NodeToTestDataMap* m_nodeTestData;
+
+    static const unsigned FIRST_LOOP_HOIST_CSE_CLASS = 1000;
+    unsigned              m_loopHoistCSEClass; // LoopHoist test annotations turn into CSE requirements; we
+                                               // label them with CSE Class #'s starting at FIRST_LOOP_HOIST_CSE_CLASS.
+                                               // Current kept in this.
+public:
+    NodeToTestDataMap* GetNodeTestData()
+    {
+        Compiler* compRoot = impInlineRoot();
+        if (compRoot->m_nodeTestData == nullptr)
+        {
+            compRoot->m_nodeTestData = new (getAllocatorDebugOnly()) NodeToTestDataMap(getAllocatorDebugOnly());
+        }
+        return compRoot->m_nodeTestData;
+    }
+
+    typedef SimplerHashTable<GenTreePtr, PtrKeyFuncs<GenTree>, int, JitSimplerHashBehavior> NodeToIntMap;
+
+    // Returns the set (i.e., the domain of the result map) of nodes that are keys in m_nodeTestData, and
+    // currently occur in the AST graph.
+    NodeToIntMap* FindReachableNodesInNodeTestData();
+
+    // Node "from" is being eliminated, and being replaced by node "to".  If "from" had any associated
+    // test data, associate that data with "to".
+    void TransferTestDataToNode(GenTreePtr from, GenTreePtr to);
+
+    // Requires that "to" is a clone of "from".  If any nodes in the "from" tree
+    // have annotations, attach similar annotations to the corresponding nodes in "to".
+    void CopyTestDataToCloneTree(GenTreePtr from, GenTreePtr to);
+
+    // These are the methods that test that the various conditions implied by the
+    // test attributes are satisfied.
+    void JitTestCheckSSA(); // SSA builder tests.
+    void JitTestCheckVN();  // Value numbering tests.
+#endif                      // DEBUG
+
+    // The "FieldSeqStore", for canonicalizing field sequences.  See the definition of FieldSeqStore for
+    // operations.
+    FieldSeqStore* m_fieldSeqStore;
+
+    FieldSeqStore* GetFieldSeqStore()
+    {
+        Compiler* compRoot = impInlineRoot();
+        if (compRoot->m_fieldSeqStore == nullptr)
+        {
+            // Create a CompAllocator that labels sub-structure with CMK_FieldSeqStore, and use that for allocation.
+            IAllocator* ialloc        = new (this, CMK_FieldSeqStore) CompAllocator(this, CMK_FieldSeqStore);
+            compRoot->m_fieldSeqStore = new (ialloc) FieldSeqStore(ialloc);
+        }
+        return compRoot->m_fieldSeqStore;
+    }
+
+    typedef SimplerHashTable<GenTreePtr, PtrKeyFuncs<GenTree>, FieldSeqNode*, JitSimplerHashBehavior> NodeToFieldSeqMap;
+
+    // Some nodes of "TYP_BYREF" or "TYP_I_IMPL" actually represent the address of a field within a struct, but since
+    // the offset of the field is zero, there's no "GT_ADD" node.  We normally attach a field sequence to the constant
+    // that is added, but what do we do when that constant is zero, and is thus not present?  We use this mechanism to
+    // attach the field sequence directly to the address node.
+    NodeToFieldSeqMap* m_zeroOffsetFieldMap;
+
+    NodeToFieldSeqMap* GetZeroOffsetFieldMap()
+    {
+        // Don't need to worry about inlining here
+        if (m_zeroOffsetFieldMap == nullptr)
+        {
+            // Create a CompAllocator that labels sub-structure with CMK_ZeroOffsetFieldMap, and use that for
+            // allocation.
+            IAllocator* ialloc   = new (this, CMK_ZeroOffsetFieldMap) CompAllocator(this, CMK_ZeroOffsetFieldMap);
+            m_zeroOffsetFieldMap = new (ialloc) NodeToFieldSeqMap(ialloc);
+        }
+        return m_zeroOffsetFieldMap;
+    }
+
+    // Requires that "op1" is a node of type "TYP_BYREF" or "TYP_I_IMPL".  We are dereferencing this with the fields in
+    // "fieldSeq", whose offsets are required all to be zero.  Ensures that any field sequence annotation currently on
+    // "op1" or its components is augmented by appending "fieldSeq".  In practice, if "op1" is a GT_LCL_FLD, it has
+    // a field sequence as a member; otherwise, it may be the addition of an a byref and a constant, where the const
+    // has a field sequence -- in this case "fieldSeq" is appended to that of the constant; otherwise, we
+    // record the the field sequence using the ZeroOffsetFieldMap described above.
+    //
+    // One exception above is that "op1" is a node of type "TYP_REF" where "op1" is a GT_LCL_VAR.
+    // This happens when System.Object vtable pointer is a regular field at offset 0 in System.Private.CoreLib in
+    // CoreRT. Such case is handled same as the default case.
+    void fgAddFieldSeqForZeroOffset(GenTreePtr op1, FieldSeqNode* fieldSeq);
+
+    typedef SimplerHashTable<const GenTree*, PtrKeyFuncs<GenTree>, ArrayInfo, JitSimplerHashBehavior>
+                        NodeToArrayInfoMap;
+    NodeToArrayInfoMap* m_arrayInfoMap;
+
+    NodeToArrayInfoMap* GetArrayInfoMap()
+    {
+        Compiler* compRoot = impInlineRoot();
+        if (compRoot->m_arrayInfoMap == nullptr)
+        {
+            // Create a CompAllocator that labels sub-structure with CMK_ArrayInfoMap, and use that for allocation.
+            IAllocator* ialloc       = new (this, CMK_ArrayInfoMap) CompAllocator(this, CMK_ArrayInfoMap);
+            compRoot->m_arrayInfoMap = new (ialloc) NodeToArrayInfoMap(ialloc);
+        }
+        return compRoot->m_arrayInfoMap;
+    }
+
+    NodeToUnsignedMap* m_heapSsaMap;
+
+    // In some cases, we want to assign intermediate SSA #'s to heap states, and know what nodes create those heap
+    // states. (We do this for try blocks, where, if the try block doesn't do a call that loses track of the heap state,
+    // all the possible heap states are possible initial states of the corresponding catch block(s).)
+    NodeToUnsignedMap* GetHeapSsaMap()
+    {
+        Compiler* compRoot = impInlineRoot();
+        if (compRoot->m_heapSsaMap == nullptr)
+        {
+            // Create a CompAllocator that labels sub-structure with CMK_ArrayInfoMap, and use that for allocation.
+            IAllocator* ialloc     = new (this, CMK_ArrayInfoMap) CompAllocator(this, CMK_ArrayInfoMap);
+            compRoot->m_heapSsaMap = new (ialloc) NodeToUnsignedMap(ialloc);
+        }
+        return compRoot->m_heapSsaMap;
+    }
+
+    // The Refany type is the only struct type whose structure is implicitly assumed by IL.  We need its fields.
+    CORINFO_CLASS_HANDLE m_refAnyClass;
+    CORINFO_FIELD_HANDLE GetRefanyDataField()
+    {
+        if (m_refAnyClass == nullptr)
+        {
+            m_refAnyClass = info.compCompHnd->getBuiltinClass(CLASSID_TYPED_BYREF);
+        }
+        return info.compCompHnd->getFieldInClass(m_refAnyClass, 0);
+    }
+    CORINFO_FIELD_HANDLE GetRefanyTypeField()
+    {
+        if (m_refAnyClass == nullptr)
+        {
+            m_refAnyClass = info.compCompHnd->getBuiltinClass(CLASSID_TYPED_BYREF);
+        }
+        return info.compCompHnd->getFieldInClass(m_refAnyClass, 1);
+    }
+
+#if VARSET_COUNTOPS
+    static BitSetSupport::BitSetOpCounter m_varsetOpCounter;
+#endif
+#if ALLVARSET_COUNTOPS
+    static BitSetSupport::BitSetOpCounter m_allvarsetOpCounter;
+#endif
+
+    static HelperCallProperties s_helperCallProperties;
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+    static var_types GetTypeFromClassificationAndSizes(SystemVClassificationType classType, int size);
+    static var_types GetEightByteType(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDesc,
+                                      unsigned                                                   slotNum);
+    static void GetStructTypeOffset(const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR& structDesc,
+                                    var_types*                                                 type0,
+                                    var_types*                                                 type1,
+                                    unsigned __int8*                                           offset0,
+                                    unsigned __int8*                                           offset1);
+    void fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgument);
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+    void fgMorphMultiregStructArgs(GenTreeCall* call);
+    GenTreePtr fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr fgEntryPtr);
+
+}; // end of class Compiler
+
+// Inline methods of CompAllocator.
+void* CompAllocator::Alloc(size_t sz)
+{
+#if MEASURE_MEM_ALLOC
+    return m_comp->compGetMem(sz, m_cmk);
+#else
+    return m_comp->compGetMem(sz);
+#endif
+}
+
+void* CompAllocator::ArrayAlloc(size_t elems, size_t elemSize)
+{
+#if MEASURE_MEM_ALLOC
+    return m_comp->compGetMemArray(elems, elemSize, m_cmk);
+#else
+    return m_comp->compGetMemArray(elems, elemSize);
+#endif
+}
+
+// LclVarDsc constructor. Uses Compiler, so must come after Compiler definition.
+inline LclVarDsc::LclVarDsc(Compiler* comp)
+    : // Initialize the ArgRegs to REG_STK.
+    // The morph will do the right thing to change
+    // to the right register if passed in register.
+    _lvArgReg(REG_STK)
+    ,
+#if FEATURE_MULTIREG_ARGS
+    _lvOtherArgReg(REG_STK)
+    ,
+#endif // FEATURE_MULTIREG_ARGS
+#if ASSERTION_PROP
+    lvRefBlks(BlockSetOps::UninitVal())
+    ,
+#endif // ASSERTION_PROP
+    lvPerSsaData(comp->getAllocator())
+{
+}
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                   Miscellaneous Compiler stuff                            XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+// Values used to mark the types a stack slot is used for
+
+const unsigned TYPE_REF_INT      = 0x01; // slot used as a 32-bit int
+const unsigned TYPE_REF_LNG      = 0x02; // slot used as a 64-bit long
+const unsigned TYPE_REF_FLT      = 0x04; // slot used as a 32-bit float
+const unsigned TYPE_REF_DBL      = 0x08; // slot used as a 64-bit float
+const unsigned TYPE_REF_PTR      = 0x10; // slot used as a 32-bit pointer
+const unsigned TYPE_REF_BYR      = 0x20; // slot used as a byref pointer
+const unsigned TYPE_REF_STC      = 0x40; // slot used as a struct
+const unsigned TYPE_REF_TYPEMASK = 0x7F; // bits that represent the type
+
+// const unsigned TYPE_REF_ADDR_TAKEN  = 0x80; // slots address was taken
+
+/*****************************************************************************
+ *
+ *  Variables to keep track of total code amounts.
+ */
+
+#if DISPLAY_SIZES
+
+extern size_t grossVMsize;
+extern size_t grossNCsize;
+extern size_t totalNCsize;
+
+extern unsigned genMethodICnt;
+extern unsigned genMethodNCnt;
+extern size_t   gcHeaderISize;
+extern size_t   gcPtrMapISize;
+extern size_t   gcHeaderNSize;
+extern size_t   gcPtrMapNSize;
+
+#endif // DISPLAY_SIZES
+
+/*****************************************************************************
+ *
+ *  Variables to keep track of basic block counts (more data on 1 BB methods)
+ */
+
+#if COUNT_BASIC_BLOCKS
+extern Histogram bbCntTable;
+extern Histogram bbOneBBSizeTable;
+#endif
+
+/*****************************************************************************
+ *
+ *  Used by optFindNaturalLoops to gather statistical information such as
+ *   - total number of natural loops
+ *   - number of loops with 1, 2, ... exit conditions
+ *   - number of loops that have an iterator (for like)
+ *   - number of loops that have a constant iterator
+ */
+
+#if COUNT_LOOPS
+
+extern unsigned totalLoopMethods;        // counts the total number of methods that have natural loops
+extern unsigned maxLoopsPerMethod;       // counts the maximum number of loops a method has
+extern unsigned totalLoopOverflows;      // # of methods that identified more loops than we can represent
+extern unsigned totalLoopCount;          // counts the total number of natural loops
+extern unsigned totalUnnatLoopCount;     // counts the total number of (not-necessarily natural) loops
+extern unsigned totalUnnatLoopOverflows; // # of methods that identified more unnatural loops than we can represent
+extern unsigned iterLoopCount;           // counts the # of loops with an iterator (for like)
+extern unsigned simpleTestLoopCount;     // counts the # of loops with an iterator and a simple loop condition (iter <
+                                         // const)
+extern unsigned  constIterLoopCount;     // counts the # of loops with a constant iterator (for like)
+extern bool      hasMethodLoops;         // flag to keep track if we already counted a method as having loops
+extern unsigned  loopsThisMethod;        // counts the number of loops in the current method
+extern bool      loopOverflowThisMethod; // True if we exceeded the max # of loops in the method.
+extern Histogram loopCountTable;         // Histogram of loop counts
+extern Histogram loopExitCountTable;     // Histogram of loop exit counts
+
+#endif // COUNT_LOOPS
+
+/*****************************************************************************
+ * variables to keep track of how many iterations we go in a dataflow pass
+ */
+
+#if DATAFLOW_ITER
+
+extern unsigned CSEiterCount; // counts the # of iteration for the CSE dataflow
+extern unsigned CFiterCount;  // counts the # of iteration for the Const Folding dataflow
+
+#endif // DATAFLOW_ITER
+
+#if MEASURE_BLOCK_SIZE
+extern size_t genFlowNodeSize;
+extern size_t genFlowNodeCnt;
+#endif // MEASURE_BLOCK_SIZE
+
+#if MEASURE_NODE_SIZE
+struct NodeSizeStats
+{
+    void Init()
+    {
+        genTreeNodeCnt        = 0;
+        genTreeNodeSize       = 0;
+        genTreeNodeActualSize = 0;
+    }
+
+    size_t genTreeNodeCnt;
+    size_t genTreeNodeSize;       // The size we allocate
+    size_t genTreeNodeActualSize; // The actual size of the node. Note that the actual size will likely be smaller
+                                  //   than the allocated size, but we sometimes use SetOper()/ChangeOper() to change
+                                  //   a smaller node to a larger one. TODO-Cleanup: add stats on
+                                  //   SetOper()/ChangeOper() usage to quanitfy this.
+};
+extern NodeSizeStats genNodeSizeStats;        // Total node size stats
+extern NodeSizeStats genNodeSizeStatsPerFunc; // Per-function node size stats
+extern Histogram     genTreeNcntHist;
+extern Histogram     genTreeNsizHist;
+#endif // MEASURE_NODE_SIZE
+
+/*****************************************************************************
+ *  Count fatal errors (including noway_asserts).
+ */
+
+#if MEASURE_FATAL
+extern unsigned fatal_badCode;
+extern unsigned fatal_noWay;
+extern unsigned fatal_NOMEM;
+extern unsigned fatal_noWayAssertBody;
+#ifdef DEBUG
+extern unsigned fatal_noWayAssertBodyArgs;
+#endif // DEBUG
+extern unsigned fatal_NYI;
+#endif // MEASURE_FATAL
+
+/*****************************************************************************
+ * Codegen
+ */
+
+#ifdef _TARGET_XARCH_
+
+const instruction INS_SHIFT_LEFT_LOGICAL  = INS_shl;
+const instruction INS_SHIFT_RIGHT_LOGICAL = INS_shr;
+const instruction INS_SHIFT_RIGHT_ARITHM  = INS_sar;
+
+const instruction INS_AND             = INS_and;
+const instruction INS_OR              = INS_or;
+const instruction INS_XOR             = INS_xor;
+const instruction INS_NEG             = INS_neg;
+const instruction INS_TEST            = INS_test;
+const instruction INS_MUL             = INS_imul;
+const instruction INS_SIGNED_DIVIDE   = INS_idiv;
+const instruction INS_UNSIGNED_DIVIDE = INS_div;
+const instruction INS_BREAKPOINT      = INS_int3;
+const instruction INS_ADDC            = INS_adc;
+const instruction INS_SUBC            = INS_sbb;
+const instruction INS_NOT             = INS_not;
+
+#endif
+
+#ifdef _TARGET_ARM_
+
+const instruction INS_SHIFT_LEFT_LOGICAL  = INS_lsl;
+const instruction INS_SHIFT_RIGHT_LOGICAL = INS_lsr;
+const instruction INS_SHIFT_RIGHT_ARITHM  = INS_asr;
+
+const instruction INS_AND             = INS_and;
+const instruction INS_OR              = INS_orr;
+const instruction INS_XOR             = INS_eor;
+const instruction INS_NEG             = INS_rsb;
+const instruction INS_TEST            = INS_tst;
+const instruction INS_MUL             = INS_mul;
+const instruction INS_SIGNED_DIVIDE   = INS_sdiv;
+const instruction INS_UNSIGNED_DIVIDE = INS_udiv;
+const instruction INS_BREAKPOINT      = INS_bkpt;
+const instruction INS_ADDC            = INS_adc;
+const instruction INS_SUBC            = INS_sbc;
+const instruction INS_NOT             = INS_mvn;
+
+#endif
+
+#ifdef _TARGET_ARM64_
+
+const instruction INS_SHIFT_LEFT_LOGICAL  = INS_lsl;
+const instruction INS_SHIFT_RIGHT_LOGICAL = INS_lsr;
+const instruction INS_SHIFT_RIGHT_ARITHM  = INS_asr;
+
+const instruction INS_AND             = INS_and;
+const instruction INS_OR              = INS_orr;
+const instruction INS_XOR             = INS_eor;
+const instruction INS_NEG             = INS_neg;
+const instruction INS_TEST            = INS_tst;
+const instruction INS_MUL             = INS_mul;
+const instruction INS_SIGNED_DIVIDE   = INS_sdiv;
+const instruction INS_UNSIGNED_DIVIDE = INS_udiv;
+const instruction INS_BREAKPOINT      = INS_bkpt;
+const instruction INS_ADDC            = INS_adc;
+const instruction INS_SUBC            = INS_sbc;
+const instruction INS_NOT             = INS_mvn;
+
+#endif
+
+/*****************************************************************************/
+
+extern const BYTE genTypeSizes[];
+extern const BYTE genTypeAlignments[];
+extern const BYTE genTypeStSzs[];
+extern const BYTE genActualTypes[];
+
+/*****************************************************************************/
+
+// VERY_LARGE_FRAME_SIZE_REG_MASK is the set of registers we need to use for
+// the probing loop generated for very large stack frames (see `getVeryLargeFrameSize`).
+
+#ifdef _TARGET_ARM_
+#define VERY_LARGE_FRAME_SIZE_REG_MASK (RBM_R4 | RBM_R5 | RBM_R6)
+#elif defined(_TARGET_ARM64_)
+#define VERY_LARGE_FRAME_SIZE_REG_MASK (RBM_R9 | RBM_R10 | RBM_R11)
+#endif
+
+/*****************************************************************************/
+
+#define REG_CORRUPT regNumber(REG_NA + 1)
+#define RBM_CORRUPT (RBM_ILLEGAL | regMaskTP(1))
+#define REG_PAIR_CORRUPT regPairNo(REG_PAIR_NONE + 1)
+
+/*****************************************************************************/
+
+extern BasicBlock dummyBB;
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+// foreach_treenode_execution_order: An iterator that iterates through all the tree
+// nodes of a statement in execution order.
+//      __stmt: a GT_STMT type GenTree*
+//      __node: a GenTree*, already declared, that gets updated with each node in the statement, in execution order
+
+#define foreach_treenode_execution_order(__node, __stmt)                                                               \
+    for ((__node) = (__stmt)->gtStmt.gtStmtList; (__node); (__node) = (__node)->gtNext)
+
+// foreach_block: An iterator over all blocks in the function.
+//    __compiler: the Compiler* object
+//    __block   : a BasicBlock*, already declared, that gets updated each iteration.
+
+#define foreach_block(__compiler, __block)                                                                             \
+    for ((__block) = (__compiler)->fgFirstBB; (__block); (__block) = (__block)->bbNext)
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+#ifdef DEBUG
+
+void dumpConvertedVarSet(Compiler* comp, VARSET_VALARG_TP vars);
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                          Debugging helpers                                XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************/
+/* The following functions are intended to be called from the debugger, to dump
+ * various data structures. The can be used in the debugger Watch or Quick Watch
+ * windows. They are designed to be short to type and take as few arguments as
+ * possible. The 'c' versions take a Compiler*, whereas the 'd' versions use the TlsCompiler.
+ * See the function definition comment for more details.
+ */
+
+void cBlock(Compiler* comp, BasicBlock* block);
+void cBlocks(Compiler* comp);
+void cBlocksV(Compiler* comp);
+void cTree(Compiler* comp, GenTree* tree);
+void cTrees(Compiler* comp);
+void cEH(Compiler* comp);
+void cVar(Compiler* comp, unsigned lclNum);
+void cVarDsc(Compiler* comp, LclVarDsc* varDsc);
+void cVars(Compiler* comp);
+void cVarsFinal(Compiler* comp);
+void cBlockPreds(Compiler* comp, BasicBlock* block);
+void cReach(Compiler* comp);
+void cDoms(Compiler* comp);
+void cLiveness(Compiler* comp);
+void cCVarSet(Compiler* comp, VARSET_VALARG_TP vars);
+
+void cFuncIR(Compiler* comp);
+void cBlockIR(Compiler* comp, BasicBlock* block);
+void cLoopIR(Compiler* comp, Compiler::LoopDsc* loop);
+void cTreeIR(Compiler* comp, GenTree* tree);
+int cTreeTypeIR(Compiler* comp, GenTree* tree);
+int cTreeKindsIR(Compiler* comp, GenTree* tree);
+int cTreeFlagsIR(Compiler* comp, GenTree* tree);
+int cOperandIR(Compiler* comp, GenTree* operand);
+int cLeafIR(Compiler* comp, GenTree* tree);
+int cIndirIR(Compiler* comp, GenTree* tree);
+int cListIR(Compiler* comp, GenTree* list);
+int cSsaNumIR(Compiler* comp, GenTree* tree);
+int cValNumIR(Compiler* comp, GenTree* tree);
+int cDependsIR(Compiler* comp, GenTree* comma, bool* first);
+
+void dBlock(BasicBlock* block);
+void dBlocks();
+void dBlocksV();
+void dTree(GenTree* tree);
+void dTrees();
+void dEH();
+void dVar(unsigned lclNum);
+void dVarDsc(LclVarDsc* varDsc);
+void dVars();
+void dVarsFinal();
+void dBlockPreds(BasicBlock* block);
+void dReach();
+void dDoms();
+void dLiveness();
+void dCVarSet(VARSET_VALARG_TP vars);
+
+void dVarSet(VARSET_VALARG_TP vars);
+void dRegMask(regMaskTP mask);
+
+void dFuncIR();
+void dBlockIR(BasicBlock* block);
+void dTreeIR(GenTree* tree);
+void dLoopIR(Compiler::LoopDsc* loop);
+void dLoopNumIR(unsigned loopNum);
+int dTabStopIR(int curr, int tabstop);
+int dTreeTypeIR(GenTree* tree);
+int dTreeKindsIR(GenTree* tree);
+int dTreeFlagsIR(GenTree* tree);
+int dOperandIR(GenTree* operand);
+int dLeafIR(GenTree* tree);
+int dIndirIR(GenTree* tree);
+int dListIR(GenTree* list);
+int dSsaNumIR(GenTree* tree);
+int dValNumIR(GenTree* tree);
+int dDependsIR(GenTree* comma);
+void dFormatIR();
+
+GenTree* dFindTree(GenTree* tree, unsigned id);
+GenTree* dFindTree(unsigned id);
+GenTreeStmt* dFindStmt(unsigned id);
+BasicBlock* dFindBlock(unsigned bbNum);
+
+#endif // DEBUG
+
+#include "compiler.hpp" // All the shared inline functions
+
+/*****************************************************************************/
+#endif //_COMPILER_H_
+/*****************************************************************************/
diff --git a/src/jit/compiler.hpp b/src/jit/compiler.hpp
new file mode 100644
index 0000000000..eb8eb19c68
--- /dev/null
+++ b/src/jit/compiler.hpp
@@ -0,0 +1,4742 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                    Inline functions                                       XX
+XX                                                                           XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#ifndef _COMPILER_HPP_
+#define _COMPILER_HPP_
+
+#include "emit.h" // for emitter::emitAddLabel
+
+#include "bitvec.h"
+
+#include "compilerbitsettraits.hpp"
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX  Miscellaneous utility functions. Some of these are defined in Utils.cpp  XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+inline bool getInlinePInvokeEnabled()
+{
+#ifdef DEBUG
+    return JitConfig.JitPInvokeEnabled() && !JitConfig.StressCOMCall();
+#else
+    return true;
+#endif
+}
+
+inline bool getInlinePInvokeCheckEnabled()
+{
+#ifdef DEBUG
+    return JitConfig.JitPInvokeCheckEnabled() != 0;
+#else
+    return false;
+#endif
+}
+
+// Enforce float narrowing for buggy compilers (notably preWhidbey VC)
+inline float forceCastToFloat(double d)
+{
+    Volatile<float> f = (float)d;
+    return f;
+}
+
+// Enforce UInt32 narrowing for buggy compilers (notably Whidbey Beta 2 LKG)
+inline UINT32 forceCastToUInt32(double d)
+{
+    Volatile<UINT32> u = (UINT32)d;
+    return u;
+}
+
+enum RoundLevel
+{
+    ROUND_NEVER     = 0, // Never round
+    ROUND_CMP_CONST = 1, // Round values compared against constants
+    ROUND_CMP       = 2, // Round comparands and return values
+    ROUND_ALWAYS    = 3, // Round always
+
+    COUNT_ROUND_LEVEL,
+    DEFAULT_ROUND_LEVEL = ROUND_NEVER
+};
+
+inline RoundLevel getRoundFloatLevel()
+{
+#ifdef DEBUG
+    return (RoundLevel)JitConfig.JitRoundFloat();
+#else
+    return DEFAULT_ROUND_LEVEL;
+#endif
+}
+
+/*****************************************************************************/
+/*****************************************************************************
+ *
+ *  Return the lowest bit that is set
+ */
+
+template <typename T>
+inline T genFindLowestBit(T value)
+{
+    return (value & (0 - value));
+}
+
+/*****************************************************************************/
+/*****************************************************************************
+ *
+ *  Return the highest bit that is set (that is, a mask that includes just the highest bit).
+ *  TODO-ARM64-Throughput: we should convert these to use the _BitScanReverse() / _BitScanReverse64()
+ *  compiler intrinsics, but our CRT header file intrin.h doesn't define these for ARM64 yet.
+ */
+
+inline unsigned int genFindHighestBit(unsigned int mask)
+{
+    assert(mask != 0);
+    unsigned int bit = 1U << ((sizeof(unsigned int) * 8) - 1); // start looking at the top
+    while ((bit & mask) == 0)
+    {
+        bit >>= 1;
+    }
+    return bit;
+}
+
+inline unsigned __int64 genFindHighestBit(unsigned __int64 mask)
+{
+    assert(mask != 0);
+    unsigned __int64 bit = 1ULL << ((sizeof(unsigned __int64) * 8) - 1); // start looking at the top
+    while ((bit & mask) == 0)
+    {
+        bit >>= 1;
+    }
+    return bit;
+}
+
+#if 0
+// TODO-ARM64-Cleanup: These should probably be the implementation, when intrin.h is updated for ARM64
+inline
+unsigned int genFindHighestBit(unsigned int mask)
+{
+    assert(mask != 0);
+    unsigned int index;
+    _BitScanReverse(&index, mask);
+    return 1L << index;
+}
+
+inline
+unsigned __int64 genFindHighestBit(unsigned __int64 mask)
+{
+    assert(mask != 0);
+    unsigned int index;
+    _BitScanReverse64(&index, mask);
+    return 1LL << index;
+}
+#endif // 0
+
+/*****************************************************************************
+ *
+ *  Return true if the given 64-bit value has exactly zero or one bits set.
+ */
+
+template <typename T>
+inline BOOL genMaxOneBit(T value)
+{
+    return (value & (value - 1)) == 0;
+}
+
+/*****************************************************************************
+ *
+ *  Return true if the given 32-bit value has exactly zero or one bits set.
+ */
+
+inline BOOL genMaxOneBit(unsigned value)
+{
+    return (value & (value - 1)) == 0;
+}
+
+/*****************************************************************************
+ *
+ *  Given a value that has exactly one bit set, return the position of that
+ *  bit, in other words return the logarithm in base 2 of the given value.
+ */
+
+inline unsigned genLog2(unsigned value)
+{
+    return BitPosition(value);
+}
+
+/*****************************************************************************
+ *
+ *  Given a value that has exactly one bit set, return the position of that
+ *  bit, in other words return the logarithm in base 2 of the given value.
+ */
+
+inline unsigned genLog2(unsigned __int64 value)
+{
+    unsigned lo32 = (unsigned)value;
+    unsigned hi32 = (unsigned)(value >> 32);
+
+    if (lo32 != 0)
+    {
+        assert(hi32 == 0);
+        return genLog2(lo32);
+    }
+    else
+    {
+        return genLog2(hi32) + 32;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Return the lowest bit that is set in the given register mask.
+ */
+
+inline regMaskTP genFindLowestReg(regMaskTP value)
+{
+    return (regMaskTP)genFindLowestBit(value);
+}
+
+/*****************************************************************************
+ *
+ *  A rather simple routine that counts the number of bits in a given number.
+ */
+
+template <typename T>
+inline unsigned genCountBits(T bits)
+{
+    unsigned cnt = 0;
+
+    while (bits)
+    {
+        cnt++;
+        bits -= genFindLowestBit(bits);
+    }
+
+    return cnt;
+}
+
+/*****************************************************************************
+ *
+ *  Given 3 masks value, end, start, returns the bits of value between start
+ *  and end (exclusive).
+ *
+ *  value[bitNum(end) - 1, bitNum(start) + 1]
+ */
+
+inline unsigned __int64 BitsBetween(unsigned __int64 value, unsigned __int64 end, unsigned __int64 start)
+{
+    assert(start != 0);
+    assert(start < end);
+    assert((start & (start - 1)) == 0);
+    assert((end & (end - 1)) == 0);
+
+    return value & ~((start - 1) | start) & // Ones to the left of set bit in the start mask.
+           (end - 1);                       // Ones to the right of set bit in the end mask.
+}
+
+/*****************************************************************************/
+
+inline bool jitIsScaleIndexMul(size_t val)
+{
+    switch (val)
+    {
+        case 1:
+        case 2:
+        case 4:
+        case 8:
+            return true;
+
+        default:
+            return false;
+    }
+}
+
+// Returns "tree" iff "val" is a valid addressing mode scale shift amount on
+// the target architecture.
+inline bool jitIsScaleIndexShift(ssize_t val)
+{
+    // It happens that this is the right test for all our current targets: x86, x64 and ARM.
+    // This test would become target-dependent if we added a new target with a different constraint.
+    return 0 < val && val < 4;
+}
+
+/*****************************************************************************
+ * Returns true if value is between [start..end).
+ * The comparison is inclusive of start, exclusive of end.
+ */
+
+/* static */
+inline bool Compiler::jitIsBetween(unsigned value, unsigned start, unsigned end)
+{
+    return start <= value && value < end;
+}
+
+/*****************************************************************************
+ * Returns true if value is between [start..end].
+ * The comparison is inclusive of both start and end.
+ */
+
+/* static */
+inline bool Compiler::jitIsBetweenInclusive(unsigned value, unsigned start, unsigned end)
+{
+    return start <= value && value <= end;
+}
+
+/******************************************************************************************
+ * Return the EH descriptor for the given region index.
+ */
+inline EHblkDsc* Compiler::ehGetDsc(unsigned regionIndex)
+{
+    assert(regionIndex < compHndBBtabCount);
+    return &compHndBBtab[regionIndex];
+}
+
+/******************************************************************************************
+ * Return the EH descriptor index of the enclosing try, for the given region index.
+ */
+inline unsigned Compiler::ehGetEnclosingTryIndex(unsigned regionIndex)
+{
+    return ehGetDsc(regionIndex)->ebdEnclosingTryIndex;
+}
+
+/******************************************************************************************
+ * Return the EH descriptor index of the enclosing handler, for the given region index.
+ */
+inline unsigned Compiler::ehGetEnclosingHndIndex(unsigned regionIndex)
+{
+    return ehGetDsc(regionIndex)->ebdEnclosingHndIndex;
+}
+
+/******************************************************************************************
+ * Return the EH index given a region descriptor.
+ */
+inline unsigned Compiler::ehGetIndex(EHblkDsc* ehDsc)
+{
+    assert(compHndBBtab <= ehDsc && ehDsc < compHndBBtab + compHndBBtabCount);
+    return (unsigned)(ehDsc - compHndBBtab);
+}
+
+/******************************************************************************************
+ * Return the EH descriptor for the most nested 'try' region this BasicBlock is a member of
+ * (or nullptr if this block is not in a 'try' region).
+ */
+inline EHblkDsc* Compiler::ehGetBlockTryDsc(BasicBlock* block)
+{
+    if (!block->hasTryIndex())
+    {
+        return nullptr;
+    }
+
+    return ehGetDsc(block->getTryIndex());
+}
+
+/******************************************************************************************
+ * Return the EH descriptor for the most nested filter or handler region this BasicBlock is a member of
+ * (or nullptr if this block is not in a filter or handler region).
+ */
+inline EHblkDsc* Compiler::ehGetBlockHndDsc(BasicBlock* block)
+{
+    if (!block->hasHndIndex())
+    {
+        return nullptr;
+    }
+
+    return ehGetDsc(block->getHndIndex());
+}
+
+#if FEATURE_EH_FUNCLETS
+
+/*****************************************************************************
+ *  Get the FuncInfoDsc for the funclet we are currently generating code for.
+ *  This is only valid during codegen.
+ *
+ */
+inline FuncInfoDsc* Compiler::funCurrentFunc()
+{
+    return funGetFunc(compCurrFuncIdx);
+}
+
+/*****************************************************************************
+ *  Change which funclet we are currently generating code for.
+ *  This is only valid after funclets are created.
+ *
+ */
+inline void Compiler::funSetCurrentFunc(unsigned funcIdx)
+{
+    assert(fgFuncletsCreated);
+    assert(FitsIn<unsigned short>(funcIdx));
+    noway_assert(funcIdx < compFuncInfoCount);
+    compCurrFuncIdx = (unsigned short)funcIdx;
+}
+
+/*****************************************************************************
+ *  Get the FuncInfoDsc for the given funclet.
+ *  This is only valid after funclets are created.
+ *
+ */
+inline FuncInfoDsc* Compiler::funGetFunc(unsigned funcIdx)
+{
+    assert(fgFuncletsCreated);
+    assert(funcIdx < compFuncInfoCount);
+    return &compFuncInfos[funcIdx];
+}
+
+/*****************************************************************************
+ *  Get the funcIdx for the EH funclet that begins with block.
+ *  This is only valid after funclets are created.
+ *  It is only valid for blocks marked with BBF_FUNCLET_BEG because
+ *  otherwise we would have to do a more expensive check to determine
+ *  if this should return the filter funclet or the filter handler funclet.
+ *
+ */
+inline unsigned Compiler::funGetFuncIdx(BasicBlock* block)
+{
+    assert(fgFuncletsCreated);
+    assert(block->bbFlags & BBF_FUNCLET_BEG);
+
+    EHblkDsc*    eh      = ehGetDsc(block->getHndIndex());
+    unsigned int funcIdx = eh->ebdFuncIndex;
+    if (eh->ebdHndBeg != block)
+    {
+        // If this is a filter EH clause, but we want the funclet
+        // for the filter (not the filter handler), it is the previous one
+        noway_assert(eh->HasFilter());
+        noway_assert(eh->ebdFilter == block);
+        assert(funGetFunc(funcIdx)->funKind == FUNC_HANDLER);
+        assert(funGetFunc(funcIdx)->funEHIndex == funGetFunc(funcIdx - 1)->funEHIndex);
+        assert(funGetFunc(funcIdx - 1)->funKind == FUNC_FILTER);
+        funcIdx--;
+    }
+
+    return funcIdx;
+}
+
+#else // !FEATURE_EH_FUNCLETS
+
+/*****************************************************************************
+ *  Get the FuncInfoDsc for the funclet we are currently generating code for.
+ *  This is only valid during codegen.  For non-funclet platforms, this is
+ *  always the root function.
+ *
+ */
+inline FuncInfoDsc* Compiler::funCurrentFunc()
+{
+    return &compFuncInfoRoot;
+}
+
+/*****************************************************************************
+ *  Change which funclet we are currently generating code for.
+ *  This is only valid after funclets are created.
+ *
+ */
+inline void Compiler::funSetCurrentFunc(unsigned funcIdx)
+{
+    assert(funcIdx == 0);
+}
+
+/*****************************************************************************
+ *  Get the FuncInfoDsc for the givven funclet.
+ *  This is only valid after funclets are created.
+ *
+ */
+inline FuncInfoDsc* Compiler::funGetFunc(unsigned funcIdx)
+{
+    assert(funcIdx == 0);
+    return &compFuncInfoRoot;
+}
+
+/*****************************************************************************
+ *  No funclets, so always 0.
+ *
+ */
+inline unsigned Compiler::funGetFuncIdx(BasicBlock* block)
+{
+    return 0;
+}
+
+#endif // !FEATURE_EH_FUNCLETS
+
+/*****************************************************************************
+ *
+ *  Map a register mask to a register number
+ */
+
+inline regNumber genRegNumFromMask(regMaskTP mask)
+{
+    assert(mask != 0); // Must have one bit set, so can't have a mask of zero
+
+    /* Convert the mask to a register number */
+
+    regNumber regNum = (regNumber)genLog2(mask);
+
+    /* Make sure we got it right */
+
+    assert(genRegMask(regNum) == mask);
+
+    return regNum;
+}
+
+/*****************************************************************************
+ *
+ *  Return the size in bytes of the given type.
+ */
+
+extern const BYTE genTypeSizes[TYP_COUNT];
+
+template <class T>
+inline unsigned genTypeSize(T type)
+{
+    assert((unsigned)TypeGet(type) < sizeof(genTypeSizes) / sizeof(genTypeSizes[0]));
+
+    return genTypeSizes[TypeGet(type)];
+}
+
+/*****************************************************************************
+ *
+ *  Return the "stack slot count" of the given type.
+ *      returns 1 for 32-bit types and 2 for 64-bit types.
+ */
+
+extern const BYTE genTypeStSzs[TYP_COUNT];
+
+inline unsigned genTypeStSz(var_types type)
+{
+    assert((unsigned)type < sizeof(genTypeStSzs) / sizeof(genTypeStSzs[0]));
+
+    return genTypeStSzs[type];
+}
+
+/*****************************************************************************
+ *
+ *  Return the number of registers required to hold a value of the given type.
+ */
+
+/*****************************************************************************
+ *
+ *  The following function maps a 'precise' type to an actual type as seen
+ *  by the VM (for example, 'byte' maps to 'int').
+ */
+
+extern const BYTE genActualTypes[TYP_COUNT];
+
+inline var_types genActualType(var_types type)
+{
+    /* Spot check to make certain the table is in synch with the enum */
+
+    assert(genActualTypes[TYP_DOUBLE] == TYP_DOUBLE);
+    assert(genActualTypes[TYP_FNC] == TYP_FNC);
+    assert(genActualTypes[TYP_REF] == TYP_REF);
+
+    assert((unsigned)type < sizeof(genActualTypes));
+    return (var_types)genActualTypes[type];
+}
+
+/*****************************************************************************/
+
+inline var_types genUnsignedType(var_types type)
+{
+    /* Force signed types into corresponding unsigned type */
+
+    switch (type)
+    {
+        case TYP_BYTE:
+            type = TYP_UBYTE;
+            break;
+        case TYP_SHORT:
+            type = TYP_CHAR;
+            break;
+        case TYP_INT:
+            type = TYP_UINT;
+            break;
+        case TYP_LONG:
+            type = TYP_ULONG;
+            break;
+        default:
+            break;
+    }
+
+    return type;
+}
+
+/*****************************************************************************/
+
+inline var_types genSignedType(var_types type)
+{
+    /* Force non-small unsigned type into corresponding signed type */
+    /* Note that we leave the small types alone */
+
+    switch (type)
+    {
+        case TYP_UINT:
+            type = TYP_INT;
+            break;
+        case TYP_ULONG:
+            type = TYP_LONG;
+            break;
+        default:
+            break;
+    }
+
+    return type;
+}
+
+/*****************************************************************************
+ *  Can this type be passed as a parameter in a register?
+ */
+
+inline bool isRegParamType(var_types type)
+{
+#if defined(_TARGET_X86_)
+    return (type <= TYP_INT || type == TYP_REF || type == TYP_BYREF);
+#else  // !_TARGET_X86_
+    return true;
+#endif // !_TARGET_X86_
+}
+
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+/*****************************************************************************/
+// Returns true if 'type' is a struct that can be enregistered for call args
+//                         or can be returned by value in multiple registers.
+//              if 'type' is not a struct the return value will be false.
+//
+// Arguments:
+//    type      - the basic jit var_type for the item being queried
+//    typeClass - the handle for the struct when 'type' is TYP_STRUCT
+//    typeSize  - Out param (if non-null) is updated with the size of 'type'.
+//    forReturn - this is true when we asking about a GT_RETURN context;
+//                this is false when we are asking about an argument context
+//
+inline bool Compiler::VarTypeIsMultiByteAndCanEnreg(var_types            type,
+                                                    CORINFO_CLASS_HANDLE typeClass,
+                                                    unsigned*            typeSize,
+                                                    bool                 forReturn)
+{
+    bool     result = false;
+    unsigned size   = 0;
+
+    if (varTypeIsStruct(type))
+    {
+        size = info.compCompHnd->getClassSize(typeClass);
+        if (forReturn)
+        {
+            structPassingKind howToReturnStruct;
+            type = getReturnTypeForStruct(typeClass, &howToReturnStruct, size);
+        }
+        else
+        {
+            structPassingKind howToPassStruct;
+            type = getArgTypeForStruct(typeClass, &howToPassStruct, size);
+        }
+        if (type != TYP_UNKNOWN)
+        {
+            result = true;
+        }
+    }
+    else
+    {
+        size = genTypeSize(type);
+    }
+
+    if (typeSize != nullptr)
+    {
+        *typeSize = size;
+    }
+
+    return result;
+}
+#endif //_TARGET_AMD64_ || _TARGET_ARM64_
+
+/*****************************************************************************/
+
+#ifdef DEBUG
+
+inline const char* varTypeGCstring(var_types type)
+{
+    switch (type)
+    {
+        case TYP_REF:
+            return "gcr";
+        case TYP_BYREF:
+            return "byr";
+        default:
+            return "non";
+    }
+}
+
+#endif
+
+/*****************************************************************************/
+
+const char* varTypeName(var_types);
+
+/*****************************************************************************
+ *
+ *  Helpers to pull big-endian values out of a byte stream.
+ */
+
+inline unsigned genGetU1(const BYTE* addr)
+{
+    return addr[0];
+}
+
+inline signed genGetI1(const BYTE* addr)
+{
+    return (signed char)addr[0];
+}
+
+inline unsigned genGetU2(const BYTE* addr)
+{
+    return (addr[0] << 8) | addr[1];
+}
+
+inline signed genGetI2(const BYTE* addr)
+{
+    return (signed short)((addr[0] << 8) | addr[1]);
+}
+
+inline unsigned genGetU4(const BYTE* addr)
+{
+    return (addr[0] << 24) | (addr[1] << 16) | (addr[2] << 8) | addr[3];
+}
+
+/*****************************************************************************/
+//  Helpers to pull little-endian values out of a byte stream.
+
+inline unsigned __int8 getU1LittleEndian(const BYTE* ptr)
+{
+    return *(UNALIGNED unsigned __int8*)ptr;
+}
+
+inline unsigned __int16 getU2LittleEndian(const BYTE* ptr)
+{
+    return GET_UNALIGNED_VAL16(ptr);
+}
+
+inline unsigned __int32 getU4LittleEndian(const BYTE* ptr)
+{
+    return GET_UNALIGNED_VAL32(ptr);
+}
+
+inline signed __int8 getI1LittleEndian(const BYTE* ptr)
+{
+    return *(UNALIGNED signed __int8*)ptr;
+}
+
+inline signed __int16 getI2LittleEndian(const BYTE* ptr)
+{
+    return GET_UNALIGNED_VAL16(ptr);
+}
+
+inline signed __int32 getI4LittleEndian(const BYTE* ptr)
+{
+    return GET_UNALIGNED_VAL32(ptr);
+}
+
+inline signed __int64 getI8LittleEndian(const BYTE* ptr)
+{
+    return GET_UNALIGNED_VAL64(ptr);
+}
+
+inline float getR4LittleEndian(const BYTE* ptr)
+{
+    __int32 val = getI4LittleEndian(ptr);
+    return *(float*)&val;
+}
+
+inline double getR8LittleEndian(const BYTE* ptr)
+{
+    __int64 val = getI8LittleEndian(ptr);
+    return *(double*)&val;
+}
+
+/*****************************************************************************
+ *
+ *  Return the bitmask to use in the EXPSET_TP for the CSE with the given CSE index.
+ *  Each GenTree has the following field:
+ *    signed char       gtCSEnum;        // 0 or the CSE index (negated if def)
+ *  So zero is reserved to mean this node is not a CSE
+ *  and postive values indicate CSE uses and negative values indicate CSE defs.
+ *  The caller of this method must pass a non-zero postive value.
+ *  This precondition is checked by the assert on the first line of this method.
+ */
+
+inline EXPSET_TP genCSEnum2bit(unsigned index)
+{
+    assert((index > 0) && (index <= EXPSET_SZ));
+
+    return ((EXPSET_TP)1 << (index - 1));
+}
+
+#ifdef DEBUG
+const char* genES2str(EXPSET_TP set);
+const char* refCntWtd2str(unsigned refCntWtd);
+#endif
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                          GenTree                                          XX
+XX                      Inline functions                                     XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+void* GenTree::operator new(size_t sz, Compiler* comp, genTreeOps oper)
+{
+#if SMALL_TREE_NODES
+    size_t size = GenTree::s_gtNodeSizes[oper];
+#else
+    size_t     size  = TREE_NODE_SZ_LARGE;
+#endif
+
+#if MEASURE_NODE_SIZE
+    genNodeSizeStats.genTreeNodeCnt += 1;
+    genNodeSizeStats.genTreeNodeSize += size;
+    genNodeSizeStats.genTreeNodeActualSize += sz;
+
+    genNodeSizeStatsPerFunc.genTreeNodeCnt += 1;
+    genNodeSizeStatsPerFunc.genTreeNodeSize += size;
+    genNodeSizeStatsPerFunc.genTreeNodeActualSize += sz;
+#endif // MEASURE_NODE_SIZE
+
+    assert(size >= sz);
+    return comp->compGetMem(size, CMK_ASTNode);
+}
+
+// GenTree constructor
+inline GenTree::GenTree(genTreeOps oper, var_types type DEBUGARG(bool largeNode))
+{
+    gtOper     = oper;
+    gtType     = type;
+    gtFlags    = 0;
+    gtLIRFlags = 0;
+#ifdef DEBUG
+    gtDebugFlags = 0;
+#endif // DEBUG
+#ifdef LEGACY_BACKEND
+    gtUsedRegs = 0;
+#endif // LEGACY_BACKEND
+#if FEATURE_ANYCSE
+    gtCSEnum = NO_CSE;
+#endif // FEATURE_ANYCSE
+#if ASSERTION_PROP
+    ClearAssertion();
+#endif
+
+#if FEATURE_STACK_FP_X87
+    gtFPlvl = 0;
+#endif
+
+    gtNext   = nullptr;
+    gtPrev   = nullptr;
+    gtRegNum = REG_NA;
+    INDEBUG(gtRegTag = GT_REGTAG_NONE;)
+
+    INDEBUG(gtCostsInitialized = false;)
+
+#ifdef DEBUG
+#if SMALL_TREE_NODES
+    size_t size = GenTree::s_gtNodeSizes[oper];
+    if (size == TREE_NODE_SZ_SMALL && !largeNode)
+    {
+        gtDebugFlags |= GTF_DEBUG_NODE_SMALL;
+    }
+    else if (size == TREE_NODE_SZ_LARGE || largeNode)
+    {
+        gtDebugFlags |= GTF_DEBUG_NODE_LARGE;
+    }
+    else
+    {
+        assert(!"bogus node size");
+    }
+#endif
+#endif
+
+#ifdef DEBUG
+    gtSeqNum = 0;
+    gtTreeID = JitTls::GetCompiler()->compGenTreeID++;
+    gtVNPair.SetBoth(ValueNumStore::NoVN);
+    gtRegTag   = GT_REGTAG_NONE;
+    gtOperSave = GT_NONE;
+#endif
+}
+
+/*****************************************************************************/
+
+inline GenTreeStmt* Compiler::gtNewStmt(GenTreePtr expr, IL_OFFSETX offset)
+{
+    /* NOTE - GT_STMT is now a small node in retail */
+
+    GenTreeStmt* stmt = new (this, GT_STMT) GenTreeStmt(expr, offset);
+
+    return stmt;
+}
+
+/*****************************************************************************/
+
+inline GenTreePtr Compiler::gtNewOperNode(genTreeOps oper, var_types type, GenTreePtr op1, bool doSimplifications)
+{
+    assert((GenTree::OperKind(oper) & (GTK_UNOP | GTK_BINOP)) != 0);
+    assert((GenTree::OperKind(oper) & GTK_EXOP) ==
+           0); // Can't use this to construct any types that extend unary/binary operator.
+    assert(op1 != nullptr || oper == GT_PHI || oper == GT_RETFILT || oper == GT_NOP ||
+           (oper == GT_RETURN && type == TYP_VOID));
+
+    if (doSimplifications)
+    {
+        // We do some simplifications here.
+        // If this gets to be too many, try a switch...
+        // TODO-Cleanup: With the factoring out of array bounds checks, it should not be the
+        // case that we need to check for the array index case here, but without this check
+        // we get failures (see for example jit\Directed\Languages\Python\test_methods_d.exe)
+        if (oper == GT_IND)
+        {
+            // IND(ADDR(IND(x)) == IND(x)
+            if (op1->gtOper == GT_ADDR)
+            {
+                if (op1->gtOp.gtOp1->gtOper == GT_IND && (op1->gtOp.gtOp1->gtFlags & GTF_IND_ARR_INDEX) == 0)
+                {
+                    op1 = op1->gtOp.gtOp1->gtOp.gtOp1;
+                }
+            }
+        }
+        else if (oper == GT_ADDR)
+        {
+            // if "x" is not an array index, ADDR(IND(x)) == x
+            if (op1->gtOper == GT_IND && (op1->gtFlags & GTF_IND_ARR_INDEX) == 0)
+            {
+                return op1->gtOp.gtOp1;
+            }
+        }
+    }
+
+    GenTreePtr node = new (this, oper) GenTreeOp(oper, type, op1, nullptr);
+
+    //
+    // the GT_ADDR of a Local Variable implies GTF_ADDR_ONSTACK
+    //
+    if ((oper == GT_ADDR) && (op1->OperGet() == GT_LCL_VAR))
+    {
+        node->gtFlags |= GTF_ADDR_ONSTACK;
+    }
+
+    return node;
+}
+
+// Returns an opcode that is of the largest node size in use.
+inline genTreeOps LargeOpOpcode()
+{
+#if SMALL_TREE_NODES
+    // Allocate a large node
+    assert(GenTree::s_gtNodeSizes[GT_CALL] == TREE_NODE_SZ_LARGE);
+#endif
+    return GT_CALL;
+}
+
+/******************************************************************************
+ *
+ * Use to create nodes which may later be morphed to another (big) operator
+ */
+
+inline GenTreePtr Compiler::gtNewLargeOperNode(genTreeOps oper, var_types type, GenTreePtr op1, GenTreePtr op2)
+{
+    assert((GenTree::OperKind(oper) & (GTK_UNOP | GTK_BINOP)) != 0);
+    assert((GenTree::OperKind(oper) & GTK_EXOP) ==
+           0); // Can't use this to construct any types that extend unary/binary operator.
+#if SMALL_TREE_NODES
+    // Allocate a large node
+
+    assert(GenTree::s_gtNodeSizes[oper] == TREE_NODE_SZ_SMALL);
+
+    GenTreePtr node = new (this, LargeOpOpcode()) GenTreeOp(oper, type, op1, op2 DEBUGARG(/*largeNode*/ true));
+#else
+    GenTreePtr node  = new (this, oper) GenTreeOp(oper, type, op1, op2);
+#endif
+
+    return node;
+}
+
+/*****************************************************************************
+ *
+ *  allocates a integer constant entry that represents a handle (something
+ *  that may need to be fixed up).
+ */
+
+inline GenTreePtr Compiler::gtNewIconHandleNode(
+    size_t value, unsigned flags, FieldSeqNode* fields, unsigned handle1, void* handle2)
+{
+    GenTreePtr node;
+    assert((flags & (GTF_ICON_HDL_MASK | GTF_ICON_FIELD_OFF)) != 0);
+
+    // Interpret "fields == NULL" as "not a field."
+    if (fields == nullptr)
+    {
+        fields = FieldSeqStore::NotAField();
+    }
+
+#if defined(LATE_DISASM)
+    node = new (this, LargeOpOpcode()) GenTreeIntCon(TYP_I_IMPL, value, fields DEBUGARG(/*largeNode*/ true));
+
+    node->gtIntCon.gtIconHdl.gtIconHdl1 = handle1;
+    node->gtIntCon.gtIconHdl.gtIconHdl2 = handle2;
+#else
+    node             = new (this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, value, fields);
+#endif
+    node->gtFlags |= flags;
+    return node;
+}
+
+/*****************************************************************************
+ *
+ *  It may not be allowed to embed HANDLEs directly into the JITed code (for eg,
+ *  as arguments to JIT helpers). Get a corresponding value that can be embedded.
+ *  These are versions for each specific type of HANDLE
+ */
+
+inline GenTreePtr Compiler::gtNewIconEmbScpHndNode(CORINFO_MODULE_HANDLE scpHnd, unsigned hnd1, void* hnd2)
+{
+    void *embedScpHnd, *pEmbedScpHnd;
+
+    embedScpHnd = (void*)info.compCompHnd->embedModuleHandle(scpHnd, &pEmbedScpHnd);
+
+    assert((!embedScpHnd) != (!pEmbedScpHnd));
+
+    return gtNewIconEmbHndNode(embedScpHnd, pEmbedScpHnd, GTF_ICON_SCOPE_HDL, hnd1, hnd2, scpHnd);
+}
+
+//-----------------------------------------------------------------------------
+
+inline GenTreePtr Compiler::gtNewIconEmbClsHndNode(CORINFO_CLASS_HANDLE clsHnd, unsigned hnd1, void* hnd2)
+{
+    void *embedClsHnd, *pEmbedClsHnd;
+
+    embedClsHnd = (void*)info.compCompHnd->embedClassHandle(clsHnd, &pEmbedClsHnd);
+
+    assert((!embedClsHnd) != (!pEmbedClsHnd));
+
+    return gtNewIconEmbHndNode(embedClsHnd, pEmbedClsHnd, GTF_ICON_CLASS_HDL, hnd1, hnd2, clsHnd);
+}
+
+//-----------------------------------------------------------------------------
+
+inline GenTreePtr Compiler::gtNewIconEmbMethHndNode(CORINFO_METHOD_HANDLE methHnd, unsigned hnd1, void* hnd2)
+{
+    void *embedMethHnd, *pEmbedMethHnd;
+
+    embedMethHnd = (void*)info.compCompHnd->embedMethodHandle(methHnd, &pEmbedMethHnd);
+
+    assert((!embedMethHnd) != (!pEmbedMethHnd));
+
+    return gtNewIconEmbHndNode(embedMethHnd, pEmbedMethHnd, GTF_ICON_METHOD_HDL, hnd1, hnd2, methHnd);
+}
+
+//-----------------------------------------------------------------------------
+
+inline GenTreePtr Compiler::gtNewIconEmbFldHndNode(CORINFO_FIELD_HANDLE fldHnd, unsigned hnd1, void* hnd2)
+{
+    void *embedFldHnd, *pEmbedFldHnd;
+
+    embedFldHnd = (void*)info.compCompHnd->embedFieldHandle(fldHnd, &pEmbedFldHnd);
+
+    assert((!embedFldHnd) != (!pEmbedFldHnd));
+
+    return gtNewIconEmbHndNode(embedFldHnd, pEmbedFldHnd, GTF_ICON_FIELD_HDL, hnd1, hnd2, fldHnd);
+}
+
+/*****************************************************************************/
+
+inline GenTreeCall* Compiler::gtNewHelperCallNode(unsigned helper, var_types type, unsigned flags, GenTreeArgList* args)
+{
+    GenTreeCall* result = gtNewCallNode(CT_HELPER, eeFindHelper(helper), type, args);
+    result->gtFlags |= flags;
+
+#if DEBUG
+    // Helper calls are never candidates.
+
+    result->gtInlineObservation = InlineObservation::CALLSITE_IS_CALL_TO_HELPER;
+#endif
+
+    return result;
+}
+
+//------------------------------------------------------------------------
+// gtNewAllocObjNode: A little helper to create an object allocation node.
+//
+// Arguments:
+//    helper           - Value returned by ICorJitInfo::getNewHelper
+//    clsHnd           - Corresponding class handle
+//    type             - Tree return type (e.g. TYP_REF)
+//    op1              - Node containing an address of VtablePtr
+//
+// Return Value:
+//    Returns GT_ALLOCOBJ node that will be later morphed into an
+//    allocation helper call or local variable allocation on the stack.
+inline GenTreePtr Compiler::gtNewAllocObjNode(unsigned int         helper,
+                                              CORINFO_CLASS_HANDLE clsHnd,
+                                              var_types            type,
+                                              GenTreePtr           op1)
+{
+    GenTreePtr node = new (this, GT_ALLOCOBJ) GenTreeAllocObj(type, helper, clsHnd, op1);
+    return node;
+}
+
+/*****************************************************************************/
+
+inline GenTreePtr Compiler::gtNewCodeRef(BasicBlock* block)
+{
+    GenTreePtr node = new (this, GT_LABEL) GenTreeLabel(block);
+    return node;
+}
+
+/*****************************************************************************
+ *
+ *  A little helper to create a data member reference node.
+ */
+
+inline GenTreePtr Compiler::gtNewFieldRef(
+    var_types typ, CORINFO_FIELD_HANDLE fldHnd, GenTreePtr obj, DWORD offset, bool nullcheck)
+{
+#if SMALL_TREE_NODES
+    /* 'GT_FIELD' nodes may later get transformed into 'GT_IND' */
+
+    assert(GenTree::s_gtNodeSizes[GT_IND] <= GenTree::s_gtNodeSizes[GT_FIELD]);
+    GenTreePtr tree = new (this, GT_FIELD) GenTreeField(typ);
+#else
+    GenTreePtr  tree = new (this, GT_FIELD) GenTreeField(typ);
+#endif
+    tree->gtField.gtFldObj    = obj;
+    tree->gtField.gtFldHnd    = fldHnd;
+    tree->gtField.gtFldOffset = offset;
+    tree->gtFlags |= GTF_GLOB_REF;
+
+#ifdef FEATURE_READYTORUN_COMPILER
+    tree->gtField.gtFieldLookup.addr = nullptr;
+#endif
+
+    if (nullcheck)
+    {
+        tree->gtFlags |= GTF_FLD_NULLCHECK;
+    }
+
+    // If "obj" is the address of a local, note that a field of that struct local has been accessed.
+    if (obj != nullptr && obj->OperGet() == GT_ADDR && varTypeIsStruct(obj->gtOp.gtOp1) &&
+        obj->gtOp.gtOp1->OperGet() == GT_LCL_VAR)
+    {
+        unsigned lclNum                  = obj->gtOp.gtOp1->gtLclVarCommon.gtLclNum;
+        lvaTable[lclNum].lvFieldAccessed = 1;
+    }
+
+    return tree;
+}
+
+/*****************************************************************************
+ *
+ *  A little helper to create an array index node.
+ */
+
+inline GenTreePtr Compiler::gtNewIndexRef(var_types typ, GenTreePtr arrayOp, GenTreePtr indexOp)
+{
+    GenTreeIndex* gtIndx = new (this, GT_INDEX) GenTreeIndex(typ, arrayOp, indexOp, genTypeSize(typ));
+
+    return gtIndx;
+}
+
+/*****************************************************************************
+ *
+ *  Create (and check for) a "nothing" node, i.e. a node that doesn't produce
+ *  any code. We currently use a "nop" node of type void for this purpose.
+ */
+
+inline GenTreePtr Compiler::gtNewNothingNode()
+{
+    return new (this, GT_NOP) GenTreeOp(GT_NOP, TYP_VOID);
+}
+/*****************************************************************************/
+
+inline bool GenTree::IsNothingNode() const
+{
+    return (gtOper == GT_NOP && gtType == TYP_VOID);
+}
+
+/*****************************************************************************
+ *
+ *  Change the given node to a NOP - May be later changed to a GT_COMMA
+ *
+ *****************************************************************************/
+
+inline void GenTree::gtBashToNOP()
+{
+    ChangeOper(GT_NOP);
+
+    gtType     = TYP_VOID;
+    gtOp.gtOp1 = gtOp.gtOp2 = nullptr;
+
+    gtFlags &= ~(GTF_ALL_EFFECT | GTF_REVERSE_OPS);
+}
+
+// return new arg placeholder node.  Does not do anything but has a type associated
+// with it so we can keep track of register arguments in lists associated w/ call nodes
+
+inline GenTreePtr Compiler::gtNewArgPlaceHolderNode(var_types type, CORINFO_CLASS_HANDLE clsHnd)
+{
+    GenTreePtr node = new (this, GT_ARGPLACE) GenTreeArgPlace(type, clsHnd);
+    return node;
+}
+
+/*****************************************************************************/
+
+inline GenTreePtr Compiler::gtUnusedValNode(GenTreePtr expr)
+{
+    return gtNewOperNode(GT_COMMA, TYP_VOID, expr, gtNewNothingNode());
+}
+
+/*****************************************************************************
+ *
+ * A wrapper for gtSetEvalOrder and gtComputeFPlvls
+ * Necessary because the FP levels may need to be re-computed if we reverse
+ * operands
+ */
+
+inline void Compiler::gtSetStmtInfo(GenTree* stmt)
+{
+    assert(stmt->gtOper == GT_STMT);
+    GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
+
+#if FEATURE_STACK_FP_X87
+    /* We will try to compute the FP stack level at each node */
+    codeGen->genResetFPstkLevel();
+
+    /* Sometimes we need to redo the FP level computation */
+    gtFPstLvlRedo = false;
+#endif // FEATURE_STACK_FP_X87
+
+#ifdef DEBUG
+    if (verbose && 0)
+    {
+        gtDispTree(stmt);
+    }
+#endif
+
+    /* Recursively process the expression */
+
+    gtSetEvalOrder(expr);
+
+    // Set the statement to have the same costs as the top node of the tree.
+    stmt->CopyCosts(expr);
+
+#if FEATURE_STACK_FP_X87
+    /* Unused float values leave one operand on the stack */
+    assert(codeGen->genGetFPstkLevel() == 0 || codeGen->genGetFPstkLevel() == 1);
+
+    /* Do we need to recompute FP stack levels? */
+
+    if (gtFPstLvlRedo)
+    {
+        codeGen->genResetFPstkLevel();
+        gtComputeFPlvls(expr);
+        assert(codeGen->genGetFPstkLevel() == 0 || codeGen->genGetFPstkLevel() == 1);
+    }
+#endif // FEATURE_STACK_FP_X87
+}
+
+#if FEATURE_STACK_FP_X87
+inline unsigned Compiler::gtSetEvalOrderAndRestoreFPstkLevel(GenTree* tree)
+{
+    unsigned FPlvlSave     = codeGen->genFPstkLevel;
+    unsigned result        = gtSetEvalOrder(tree);
+    codeGen->genFPstkLevel = FPlvlSave;
+
+    return result;
+}
+#else  // !FEATURE_STACK_FP_X87
+inline unsigned Compiler::gtSetEvalOrderAndRestoreFPstkLevel(GenTree* tree)
+{
+    return gtSetEvalOrder(tree);
+}
+#endif // FEATURE_STACK_FP_X87
+
+/*****************************************************************************/
+#if SMALL_TREE_NODES
+/*****************************************************************************/
+
+inline void GenTree::SetOper(genTreeOps oper, ValueNumberUpdate vnUpdate)
+{
+    assert(((gtDebugFlags & GTF_DEBUG_NODE_SMALL) != 0) != ((gtDebugFlags & GTF_DEBUG_NODE_LARGE) != 0));
+
+    /* Make sure the node isn't too small for the new operator */
+
+    assert(GenTree::s_gtNodeSizes[gtOper] == TREE_NODE_SZ_SMALL ||
+           GenTree::s_gtNodeSizes[gtOper] == TREE_NODE_SZ_LARGE);
+    assert(GenTree::s_gtNodeSizes[oper] == TREE_NODE_SZ_SMALL || GenTree::s_gtNodeSizes[oper] == TREE_NODE_SZ_LARGE);
+
+    assert(GenTree::s_gtNodeSizes[oper] == TREE_NODE_SZ_SMALL || (gtDebugFlags & GTF_DEBUG_NODE_LARGE));
+
+    gtOper = oper;
+
+#ifdef DEBUG
+    // Maintain the invariant that unary operators always have NULL gtOp2.
+    // If we ever start explicitly allocating GenTreeUnOp nodes, we wouldn't be
+    // able to do that (but if we did, we'd have to have a check in gtOp -- perhaps
+    // a gtUnOp...)
+    if (OperKind(oper) == GTK_UNOP)
+    {
+        gtOp.gtOp2 = nullptr;
+    }
+#endif // DEBUG
+
+#if DEBUGGABLE_GENTREE
+    // Until we eliminate SetOper/ChangeOper, we also change the vtable of the node, so that
+    // it shows up correctly in the debugger.
+    SetVtableForOper(oper);
+#endif // DEBUGGABLE_GENTREE
+
+    if (oper == GT_CNS_INT)
+    {
+        gtIntCon.gtFieldSeq = nullptr;
+    }
+
+    if (vnUpdate == CLEAR_VN)
+    {
+        // Clear the ValueNum field as well.
+        gtVNPair.SetBoth(ValueNumStore::NoVN);
+    }
+}
+
+inline void GenTree::CopyFrom(const GenTree* src, Compiler* comp)
+{
+    /* The source may be big only if the target is also a big node */
+
+    assert((gtDebugFlags & GTF_DEBUG_NODE_LARGE) || GenTree::s_gtNodeSizes[src->gtOper] == TREE_NODE_SZ_SMALL);
+    GenTreePtr prev = gtPrev;
+    GenTreePtr next = gtNext;
+    // The VTable pointer is copied intentionally here
+    memcpy((void*)this, (void*)src, src->GetNodeSize());
+    this->gtPrev = prev;
+    this->gtNext = next;
+#ifdef DEBUG
+    gtSeqNum = 0;
+#endif
+    // Transfer any annotations.
+    if (src->OperGet() == GT_IND && src->gtFlags & GTF_IND_ARR_INDEX)
+    {
+        ArrayInfo arrInfo;
+        bool      b = comp->GetArrayInfoMap()->Lookup(src, &arrInfo);
+        assert(b);
+        comp->GetArrayInfoMap()->Set(this, arrInfo);
+    }
+}
+
+inline GenTreePtr Compiler::gtNewCastNode(var_types typ, GenTreePtr op1, var_types castType)
+{
+    GenTreePtr res = new (this, GT_CAST) GenTreeCast(typ, op1, castType);
+    return res;
+}
+
+inline GenTreePtr Compiler::gtNewCastNodeL(var_types typ, GenTreePtr op1, var_types castType)
+{
+    /* Some casts get transformed into 'GT_CALL' or 'GT_IND' nodes */
+
+    assert(GenTree::s_gtNodeSizes[GT_CALL] >= GenTree::s_gtNodeSizes[GT_CAST]);
+    assert(GenTree::s_gtNodeSizes[GT_CALL] >= GenTree::s_gtNodeSizes[GT_IND]);
+
+    /* Make a big node first and then change it to be GT_CAST */
+
+    GenTreePtr res = new (this, LargeOpOpcode()) GenTreeCast(typ, op1, castType DEBUGARG(/*largeNode*/ true));
+    return res;
+}
+
+/*****************************************************************************/
+#else // SMALL_TREE_NODES
+/*****************************************************************************/
+
+inline void GenTree::InitNodeSize()
+{
+}
+
+inline void GenTree::SetOper(genTreeOps oper, ValueNumberUpdate vnUpdate)
+{
+    gtOper = oper;
+
+    if (vnUpdate == CLEAR_VN)
+    {
+        // Clear the ValueNum field.
+        gtVNPair.SetBoth(ValueNumStore::NoVN);
+    }
+}
+
+inline void GenTree::CopyFrom(GenTreePtr src)
+{
+    *this    = *src;
+#ifdef DEBUG
+    gtSeqNum = 0;
+#endif
+}
+
+inline GenTreePtr Compiler::gtNewCastNode(var_types typ, GenTreePtr op1, var_types castType)
+{
+    GenTreePtr tree         = gtNewOperNode(GT_CAST, typ, op1);
+    tree->gtCast.gtCastType = castType;
+}
+
+inline GenTreePtr Compiler::gtNewCastNodeL(var_types typ, GenTreePtr op1, var_types castType)
+{
+    return gtNewCastNode(typ, op1, castType);
+}
+
+/*****************************************************************************/
+#endif // SMALL_TREE_NODES
+/*****************************************************************************/
+
+inline void GenTree::SetOperResetFlags(genTreeOps oper)
+{
+    SetOper(oper);
+    gtFlags &= GTF_NODE_MASK;
+}
+
+inline void GenTree::ChangeOperConst(genTreeOps oper)
+{
+#ifdef _TARGET_64BIT_
+    assert(oper != GT_CNS_LNG); // We should never see a GT_CNS_LNG for a 64-bit target!
+#endif
+    assert(OperIsConst(oper)); // use ChangeOper() instead
+    SetOperResetFlags(oper);
+    // Some constant subtypes have additional fields that must be initialized.
+    if (oper == GT_CNS_INT)
+    {
+        gtIntCon.gtFieldSeq = FieldSeqStore::NotAField();
+    }
+}
+
+inline void GenTree::ChangeOper(genTreeOps oper, ValueNumberUpdate vnUpdate)
+{
+    assert(!OperIsConst(oper)); // use ChangeOperLeaf() instead
+
+    SetOper(oper, vnUpdate);
+    gtFlags &= GTF_COMMON_MASK;
+
+    // Do "oper"-specific initializations...
+    switch (oper)
+    {
+        case GT_LCL_FLD:
+            gtLclFld.gtLclOffs  = 0;
+            gtLclFld.gtFieldSeq = FieldSeqStore::NotAField();
+            break;
+        default:
+            break;
+    }
+}
+
+inline void GenTree::ChangeOperUnchecked(genTreeOps oper)
+{
+    gtOper = oper; // Trust the caller and don't use SetOper()
+    gtFlags &= GTF_COMMON_MASK;
+}
+
+/*****************************************************************************
+ * Returns true if the node is &var (created by ldarga and ldloca)
+ */
+
+inline bool GenTree::IsVarAddr() const
+{
+    if (gtOper == GT_ADDR)
+    {
+        if (gtFlags & GTF_ADDR_ONSTACK)
+        {
+            assert((gtType == TYP_BYREF) || (gtType == TYP_I_IMPL));
+            return true;
+        }
+    }
+    return false;
+}
+
+/*****************************************************************************
+ *
+ * Returns true if the node is of the "ovf" variety, for example, add.ovf.i1.
+ * + gtOverflow() can only be called for valid operators (that is, we know it is one
+ *   of the operators which may have GTF_OVERFLOW set).
+ * + gtOverflowEx() is more expensive, and should be called only if gtOper may be
+ *   an operator for which GTF_OVERFLOW is invalid.
+ */
+
+inline bool GenTree::gtOverflow() const
+{
+#if !defined(_TARGET_64BIT_) && !defined(LEGACY_BACKEND)
+    assert(gtOper == GT_MUL || gtOper == GT_CAST || gtOper == GT_ADD || gtOper == GT_SUB || gtOper == GT_ASG_ADD ||
+           gtOper == GT_ASG_SUB || gtOper == GT_ADD_LO || gtOper == GT_SUB_LO || gtOper == GT_ADD_HI ||
+           gtOper == GT_SUB_HI);
+#else
+    assert(gtOper == GT_MUL || gtOper == GT_CAST || gtOper == GT_ADD || gtOper == GT_SUB || gtOper == GT_ASG_ADD ||
+           gtOper == GT_ASG_SUB);
+#endif
+
+    if (gtFlags & GTF_OVERFLOW)
+    {
+        assert(varTypeIsIntegral(TypeGet()));
+
+        return true;
+    }
+    else
+    {
+        return false;
+    }
+}
+
+inline bool GenTree::gtOverflowEx() const
+{
+    if (gtOper == GT_MUL || gtOper == GT_CAST || gtOper == GT_ADD || gtOper == GT_SUB ||
+#if !defined(_TARGET_64BIT_) && !defined(LEGACY_BACKEND)
+        gtOper == GT_ADD_HI || gtOper == GT_SUB_HI ||
+#endif
+        gtOper == GT_ASG_ADD || gtOper == GT_ASG_SUB)
+    {
+        return gtOverflow();
+    }
+    return false;
+}
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                          LclVarsInfo                                      XX
+XX                      Inline functions                                     XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+inline bool Compiler::lvaHaveManyLocals() const
+{
+    return (lvaCount >= lclMAX_TRACKED);
+}
+
+/*****************************************************************************
+ *
+ *  Allocate a temporary variable or a set of temp variables.
+ */
+
+inline unsigned Compiler::lvaGrabTemp(bool shortLifetime DEBUGARG(const char* reason))
+{
+    if (compIsForInlining())
+    {
+        // Grab the temp using Inliner's Compiler instance.
+        Compiler* pComp = impInlineInfo->InlinerCompiler; // The Compiler instance for the caller (i.e. the inliner)
+
+        if (pComp->lvaHaveManyLocals())
+        {
+            // Don't create more LclVar with inlining
+            compInlineResult->NoteFatal(InlineObservation::CALLSITE_TOO_MANY_LOCALS);
+        }
+
+        unsigned tmpNum = pComp->lvaGrabTemp(shortLifetime DEBUGARG(reason));
+        lvaTable        = pComp->lvaTable;
+        lvaCount        = pComp->lvaCount;
+        lvaTableCnt     = pComp->lvaTableCnt;
+        return tmpNum;
+    }
+
+    // You cannot allocate more space after frame layout!
+    noway_assert(lvaDoneFrameLayout < Compiler::TENTATIVE_FRAME_LAYOUT);
+
+    /* Check if the lvaTable has to be grown */
+    if (lvaCount + 1 > lvaTableCnt)
+    {
+        unsigned newLvaTableCnt = lvaCount + (lvaCount / 2) + 1;
+
+        // Check for overflow
+        if (newLvaTableCnt <= lvaCount)
+        {
+            IMPL_LIMITATION("too many locals");
+        }
+
+        // Note: compGetMemArray might throw.
+        LclVarDsc* newLvaTable = (LclVarDsc*)compGetMemArray(newLvaTableCnt, sizeof(*lvaTable), CMK_LvaTable);
+
+        memcpy(newLvaTable, lvaTable, lvaCount * sizeof(*lvaTable));
+        memset(newLvaTable + lvaCount, 0, (newLvaTableCnt - lvaCount) * sizeof(*lvaTable));
+
+        for (unsigned i = lvaCount; i < newLvaTableCnt; i++)
+        {
+            new (&newLvaTable[i], jitstd::placement_t()) LclVarDsc(this); // call the constructor.
+        }
+
+#if 0
+        // TODO-Cleanup: Enable this and test.
+#ifdef DEBUG   
+        // Fill the old table with junks. So to detect the un-intended use.
+        memset(lvaTable, fDefaultFill2.val_DontUse_(CLRConfig::INTERNAL_JitDefaultFill, 0xFF), lvaCount * sizeof(*lvaTable));
+#endif
+#endif
+
+        lvaTableCnt = newLvaTableCnt;
+        lvaTable    = newLvaTable;
+    }
+
+    lvaTable[lvaCount].lvType    = TYP_UNDEF; // Initialize lvType, lvIsTemp and lvOnFrame
+    lvaTable[lvaCount].lvIsTemp  = shortLifetime;
+    lvaTable[lvaCount].lvOnFrame = true;
+
+    unsigned tempNum = lvaCount;
+
+    lvaCount++;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nlvaGrabTemp returning %d (", tempNum);
+        gtDispLclVar(tempNum, false);
+        printf(")%s called for %s.\n", shortLifetime ? "" : " (a long lifetime temp)", reason);
+    }
+#endif // DEBUG
+
+    return tempNum;
+}
+
+inline unsigned Compiler::lvaGrabTemps(unsigned cnt DEBUGARG(const char* reason))
+{
+    if (compIsForInlining())
+    {
+        // Grab the temps using Inliner's Compiler instance.
+        unsigned tmpNum = impInlineInfo->InlinerCompiler->lvaGrabTemps(cnt DEBUGARG(reason));
+
+        lvaTable    = impInlineInfo->InlinerCompiler->lvaTable;
+        lvaCount    = impInlineInfo->InlinerCompiler->lvaCount;
+        lvaTableCnt = impInlineInfo->InlinerCompiler->lvaTableCnt;
+        return tmpNum;
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nlvaGrabTemps(%d) returning %d..%d (long lifetime temps) called for %s", cnt, lvaCount,
+               lvaCount + cnt - 1, reason);
+    }
+#endif
+
+    // You cannot allocate more space after frame layout!
+    noway_assert(lvaDoneFrameLayout < Compiler::TENTATIVE_FRAME_LAYOUT);
+
+    /* Check if the lvaTable has to be grown */
+    if (lvaCount + cnt > lvaTableCnt)
+    {
+        unsigned newLvaTableCnt = lvaCount + max(lvaCount / 2 + 1, cnt);
+
+        // Check for overflow
+        if (newLvaTableCnt <= lvaCount)
+        {
+            IMPL_LIMITATION("too many locals");
+        }
+
+        // Note: compGetMemArray might throw.
+        LclVarDsc* newLvaTable = (LclVarDsc*)compGetMemArray(newLvaTableCnt, sizeof(*lvaTable), CMK_LvaTable);
+
+        memcpy(newLvaTable, lvaTable, lvaCount * sizeof(*lvaTable));
+        memset(newLvaTable + lvaCount, 0, (newLvaTableCnt - lvaCount) * sizeof(*lvaTable));
+        for (unsigned i = lvaCount; i < newLvaTableCnt; i++)
+        {
+            new (&newLvaTable[i], jitstd::placement_t()) LclVarDsc(this); // call the constructor.
+        }
+
+#if 0
+#ifdef DEBUG   
+        // TODO-Cleanup: Enable this and test.
+        // Fill the old table with junks. So to detect the un-intended use.
+        memset(lvaTable, fDefaultFill2.val_DontUse_(CLRConfig::INTERNAL_JitDefaultFill, 0xFF), lvaCount * sizeof(*lvaTable));
+#endif
+#endif
+
+        lvaTableCnt = newLvaTableCnt;
+        lvaTable    = newLvaTable;
+    }
+
+    unsigned tempNum = lvaCount;
+
+    while (cnt--)
+    {
+        lvaTable[lvaCount].lvType    = TYP_UNDEF; // Initialize lvType, lvIsTemp and lvOnFrame
+        lvaTable[lvaCount].lvIsTemp  = false;
+        lvaTable[lvaCount].lvOnFrame = true;
+        lvaCount++;
+    }
+
+    return tempNum;
+}
+
+/*****************************************************************************
+ *
+ *  Allocate a temporary variable which is implicitly used by code-gen
+ *  There will be no explicit references to the temp, and so it needs to
+ *  be forced to be kept alive, and not be optimized away.
+ */
+
+inline unsigned Compiler::lvaGrabTempWithImplicitUse(bool shortLifetime DEBUGARG(const char* reason))
+{
+    if (compIsForInlining())
+    {
+        // Grab the temp using Inliner's Compiler instance.
+        unsigned tmpNum = impInlineInfo->InlinerCompiler->lvaGrabTempWithImplicitUse(shortLifetime DEBUGARG(reason));
+
+        lvaTable    = impInlineInfo->InlinerCompiler->lvaTable;
+        lvaCount    = impInlineInfo->InlinerCompiler->lvaCount;
+        lvaTableCnt = impInlineInfo->InlinerCompiler->lvaTableCnt;
+        return tmpNum;
+    }
+
+    unsigned lclNum = lvaGrabTemp(shortLifetime DEBUGARG(reason));
+
+    LclVarDsc* varDsc = &lvaTable[lclNum];
+
+    // This will prevent it from being optimized away
+    // TODO-CQ: We shouldn't have to go as far as to declare these
+    // address-exposed -- DoNotEnregister should suffice?
+    lvaSetVarAddrExposed(lclNum);
+
+    // We need lvRefCnt to be non-zero to prevent various asserts from firing.
+    varDsc->lvRefCnt    = 1;
+    varDsc->lvRefCntWtd = BB_UNITY_WEIGHT;
+
+    return lclNum;
+}
+
+/*****************************************************************************
+ *
+ *  If lvaTrackedFixed is false then set the lvaSortAgain flag
+ *   (this allows us to grow the number of tracked variables)
+ *   and zero lvRefCntWtd when lvRefCnt is zero
+ */
+
+inline void LclVarDsc::lvaResetSortAgainFlag(Compiler* comp)
+{
+    if (!comp->lvaTrackedFixed)
+    {
+        /* Flag this change, set lvaSortAgain to true */
+        comp->lvaSortAgain = true;
+    }
+    /* Set weighted ref count to zero if  ref count is zero */
+    if (lvRefCnt == 0)
+    {
+        lvRefCntWtd = 0;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Decrement the ref counts for a local variable
+ */
+
+inline void LclVarDsc::decRefCnts(BasicBlock::weight_t weight, Compiler* comp, bool propagate)
+{
+    /* Decrement lvRefCnt and lvRefCntWtd */
+    Compiler::lvaPromotionType promotionType = DUMMY_INIT(Compiler::PROMOTION_TYPE_NONE);
+    if (varTypeIsStruct(lvType))
+    {
+        promotionType = comp->lvaGetPromotionType(this);
+    }
+
+    //
+    // Decrement counts on the local itself.
+    //
+    if (lvType != TYP_STRUCT || promotionType != Compiler::PROMOTION_TYPE_INDEPENDENT)
+    {
+        assert(lvRefCnt); // Can't decrement below zero
+
+        // TODO: Well, the assert above could be bogus.
+        // If lvRefCnt has overflowed before, then might drop to 0.
+        // Therefore we do need the following check to keep lvRefCnt from underflow:
+        if (lvRefCnt > 0)
+        {
+            //
+            // Decrement lvRefCnt
+            //
+            lvRefCnt--;
+
+            //
+            // Decrement lvRefCntWtd
+            //
+            if (weight != 0)
+            {
+                if (lvIsTemp && (weight * 2 > weight))
+                {
+                    weight *= 2;
+                }
+
+                if (lvRefCntWtd <= weight)
+                { // Can't go below zero
+                    lvRefCntWtd = 0;
+                }
+                else
+                {
+                    lvRefCntWtd -= weight;
+                }
+            }
+        }
+    }
+
+    if (varTypeIsStruct(lvType) && propagate)
+    {
+        // For promoted struct locals, decrement lvRefCnt on its field locals as well.
+        if (promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT ||
+            promotionType == Compiler::PROMOTION_TYPE_DEPENDENT)
+        {
+            for (unsigned i = lvFieldLclStart; i < lvFieldLclStart + lvFieldCnt; ++i)
+            {
+                comp->lvaTable[i].decRefCnts(comp->lvaMarkRefsWeight, comp, false); // Don't propagate
+            }
+        }
+    }
+
+    if (lvIsStructField && propagate)
+    {
+        // Depending on the promotion type, decrement the ref count for the parent struct as well.
+        promotionType           = comp->lvaGetParentPromotionType(this);
+        LclVarDsc* parentvarDsc = &comp->lvaTable[lvParentLcl];
+        assert(!parentvarDsc->lvRegStruct);
+        if (promotionType == Compiler::PROMOTION_TYPE_DEPENDENT)
+        {
+            parentvarDsc->decRefCnts(comp->lvaMarkRefsWeight, comp, false); // Don't propagate
+        }
+    }
+
+    lvaResetSortAgainFlag(comp);
+
+#ifdef DEBUG
+    if (comp->verbose)
+    {
+        unsigned varNum = (unsigned)(this - comp->lvaTable);
+        assert(&comp->lvaTable[varNum] == this);
+        printf("New refCnts for V%02u: refCnt = %2u, refCntWtd = %s\n", varNum, lvRefCnt, refCntWtd2str(lvRefCntWtd));
+    }
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Increment the ref counts for a local variable
+ */
+
+inline void LclVarDsc::incRefCnts(BasicBlock::weight_t weight, Compiler* comp, bool propagate)
+{
+    Compiler::lvaPromotionType promotionType = DUMMY_INIT(Compiler::PROMOTION_TYPE_NONE);
+    if (varTypeIsStruct(lvType))
+    {
+        promotionType = comp->lvaGetPromotionType(this);
+    }
+
+    //
+    // Increment counts on the local itself.
+    //
+    if (lvType != TYP_STRUCT || promotionType != Compiler::PROMOTION_TYPE_INDEPENDENT)
+    {
+        //
+        // Increment lvRefCnt
+        //
+        int newRefCnt = lvRefCnt + 1;
+        if (newRefCnt == (unsigned short)newRefCnt) // lvRefCnt is an "unsigned short". Don't overflow it.
+        {
+            lvRefCnt = (unsigned short)newRefCnt;
+        }
+
+        // This fires when an uninitialize value for 'weight' is used (see lvaMarkRefsWeight)
+        assert(weight != 0xdddddddd);
+        //
+        // Increment lvRefCntWtd
+        //
+        if (weight != 0)
+        {
+            // We double the weight of internal temps
+            //
+            if (lvIsTemp && (weight * 2 > weight))
+            {
+                weight *= 2;
+            }
+
+            unsigned newWeight = lvRefCntWtd + weight;
+            if (newWeight >= lvRefCntWtd)
+            { // lvRefCntWtd is an "unsigned".  Don't overflow it
+                lvRefCntWtd = newWeight;
+            }
+            else
+            { // On overflow we assign ULONG_MAX
+                lvRefCntWtd = ULONG_MAX;
+            }
+        }
+    }
+
+    if (varTypeIsStruct(lvType) && propagate)
+    {
+        // For promoted struct locals, increment lvRefCnt on its field locals as well.
+        if (promotionType == Compiler::PROMOTION_TYPE_INDEPENDENT ||
+            promotionType == Compiler::PROMOTION_TYPE_DEPENDENT)
+        {
+            for (unsigned i = lvFieldLclStart; i < lvFieldLclStart + lvFieldCnt; ++i)
+            {
+                comp->lvaTable[i].incRefCnts(comp->lvaMarkRefsWeight, comp, false); // Don't propagate
+            }
+        }
+    }
+
+    if (lvIsStructField && propagate)
+    {
+        // Depending on the promotion type, increment the ref count for the parent struct as well.
+        promotionType           = comp->lvaGetParentPromotionType(this);
+        LclVarDsc* parentvarDsc = &comp->lvaTable[lvParentLcl];
+        assert(!parentvarDsc->lvRegStruct);
+        if (promotionType == Compiler::PROMOTION_TYPE_DEPENDENT)
+        {
+            parentvarDsc->incRefCnts(comp->lvaMarkRefsWeight, comp, false); // Don't propagate
+        }
+    }
+
+    lvaResetSortAgainFlag(comp);
+
+#ifdef DEBUG
+    if (comp->verbose)
+    {
+        unsigned varNum = (unsigned)(this - comp->lvaTable);
+        assert(&comp->lvaTable[varNum] == this);
+        printf("New refCnts for V%02u: refCnt = %2u, refCntWtd = %s\n", varNum, lvRefCnt, refCntWtd2str(lvRefCntWtd));
+    }
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Set the lvPrefReg field to reg
+ */
+
+inline void LclVarDsc::setPrefReg(regNumber regNum, Compiler* comp)
+{
+    regMaskTP regMask;
+    if (isFloatRegType(TypeGet()))
+    {
+        // Check for FP struct-promoted field being passed in integer register
+        //
+        if (!genIsValidFloatReg(regNum))
+        {
+            return;
+        }
+        regMask = genRegMaskFloat(regNum, TypeGet());
+    }
+    else
+    {
+        regMask = genRegMask(regNum);
+    }
+
+#ifdef _TARGET_ARM_
+    // Don't set a preferred register for a TYP_STRUCT that takes more than one register slot
+    if ((TypeGet() == TYP_STRUCT) && (lvSize() > REGSIZE_BYTES))
+        return;
+#endif
+
+    /* Only interested if we have a new register bit set */
+    if (lvPrefReg & regMask)
+    {
+        return;
+    }
+
+#ifdef DEBUG
+    if (comp->verbose)
+    {
+        if (lvPrefReg)
+        {
+            printf("Change preferred register for V%02u from ", this - comp->lvaTable);
+            dspRegMask(lvPrefReg);
+        }
+        else
+        {
+            printf("Set preferred register for V%02u", this - comp->lvaTable);
+        }
+        printf(" to ");
+        dspRegMask(regMask);
+        printf("\n");
+    }
+#endif
+
+    /* Overwrite the lvPrefReg field */
+
+    lvPrefReg = (regMaskSmall)regMask;
+
+#ifdef LEGACY_BACKEND
+    // This is specific to the classic register allocator.
+    // While walking the trees during reg predict we set the lvPrefReg mask
+    // and then re-sort the 'tracked' variable when the lvPrefReg mask changes.
+    if (lvTracked)
+    {
+        /* Flag this change, set lvaSortAgain to true */
+        comp->lvaSortAgain = true;
+    }
+#endif // LEGACY_BACKEND
+}
+
+/*****************************************************************************
+ *
+ *  Add regMask to the lvPrefReg field
+ */
+
+inline void LclVarDsc::addPrefReg(regMaskTP regMask, Compiler* comp)
+{
+    assert(regMask != RBM_NONE);
+
+#ifdef _TARGET_ARM_
+    // Don't set a preferred register for a TYP_STRUCT that takes more than one register slot
+    if ((lvType == TYP_STRUCT) && (lvSize() > sizeof(void*)))
+        return;
+#endif
+
+    /* Only interested if we have a new register bit set */
+    if (lvPrefReg & regMask)
+    {
+        return;
+    }
+
+#ifdef DEBUG
+    if (comp->verbose)
+    {
+        if (lvPrefReg)
+        {
+            printf("Additional preferred register for V%02u from ", this - comp->lvaTable);
+            dspRegMask(lvPrefReg);
+        }
+        else
+        {
+            printf("Set preferred register for V%02u", this - comp->lvaTable);
+        }
+        printf(" to ");
+        dspRegMask(lvPrefReg | regMask);
+        printf("\n");
+    }
+#endif
+
+    /* Update the lvPrefReg field */
+
+    lvPrefReg |= regMask;
+
+#ifdef LEGACY_BACKEND
+    // This is specific to the classic register allocator
+    // While walking the trees during reg predict we set the lvPrefReg mask
+    // and then resort the 'tracked' variable when the lvPrefReg mask changes
+    if (lvTracked)
+    {
+        /* Flag this change, set lvaSortAgain to true */
+        comp->lvaSortAgain = true;
+    }
+#endif // LEGACY_BACKEND
+}
+
+/*****************************************************************************
+ *
+ *  The following returns the mask of all tracked locals
+ *  referenced in a statement.
+ */
+
+inline VARSET_VALRET_TP Compiler::lvaStmtLclMask(GenTreePtr stmt)
+{
+    GenTreePtr tree;
+    unsigned   varNum;
+    LclVarDsc* varDsc;
+    VARSET_TP  VARSET_INIT_NOCOPY(lclMask, VarSetOps::MakeEmpty(this));
+
+    assert(stmt->gtOper == GT_STMT);
+    assert(fgStmtListThreaded);
+
+    for (tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+    {
+        if (tree->gtOper != GT_LCL_VAR)
+        {
+            continue;
+        }
+
+        varNum = tree->gtLclVarCommon.gtLclNum;
+        assert(varNum < lvaCount);
+        varDsc = lvaTable + varNum;
+
+        if (!varDsc->lvTracked)
+        {
+            continue;
+        }
+
+        VarSetOps::UnionD(this, lclMask, VarSetOps::MakeSingleton(this, varDsc->lvVarIndex));
+    }
+
+    return lclMask;
+}
+
+/*****************************************************************************
+ * Returns true if the lvType is a TYP_REF or a TYP_BYREF.
+ * When the lvType is a TYP_STRUCT it searches the GC layout
+ * of the struct and returns true iff it contains a GC ref.
+ */
+
+inline bool Compiler::lvaTypeIsGC(unsigned varNum)
+{
+    if (lvaTable[varNum].TypeGet() == TYP_STRUCT)
+    {
+        assert(lvaTable[varNum].lvGcLayout != nullptr); // bits are intialized
+        return (lvaTable[varNum].lvStructGcCount != 0);
+    }
+    return (varTypeIsGC(lvaTable[varNum].TypeGet()));
+}
+
+/*****************************************************************************
+ Is this a synchronized instance method? If so, we will need to report "this"
+ in the GC information, so that the EE can release the object lock
+ in case of an exception
+
+ We also need to report "this" and keep it alive for all shared generic
+ code that gets the actual generic context from the "this" pointer and
+ has exception handlers.
+
+ For example, if List<T>::m() is shared between T = object and T = string,
+ then inside m() an exception handler "catch E<T>" needs to be able to fetch
+ the 'this' pointer to find out what 'T' is in order to tell if we
+ should catch the exception or not.
+ */
+
+inline bool Compiler::lvaKeepAliveAndReportThis()
+{
+    if (info.compIsStatic || lvaTable[0].TypeGet() != TYP_REF)
+    {
+        return false;
+    }
+
+#ifdef JIT32_GCENCODER
+    if (info.compFlags & CORINFO_FLG_SYNCH)
+        return true;
+
+    if (info.compMethodInfo->options & CORINFO_GENERICS_CTXT_FROM_THIS)
+    {
+        // TODO: Check if any of the exception clauses are
+        // typed using a generic type. Else, we do not need to report this.
+        if (info.compXcptnsCount > 0)
+            return true;
+
+        if (opts.compDbgCode)
+            return true;
+
+        if (lvaGenericsContextUsed)
+            return true;
+    }
+#else // !JIT32_GCENCODER
+    // If the generics context is the this pointer we need to report it if either
+    // the VM requires us to keep the generics context alive or it is used in a look-up.
+    // We keep it alive in the lookup scenario, even when the VM didn't ask us too
+    // because collectible types need the generics context when gc-ing.
+    if ((info.compMethodInfo->options & CORINFO_GENERICS_CTXT_FROM_THIS) &&
+        (lvaGenericsContextUsed || (info.compMethodInfo->options & CORINFO_GENERICS_CTXT_KEEP_ALIVE)))
+    {
+        return true;
+    }
+#endif
+
+    return false;
+}
+
+/*****************************************************************************
+  Similar to lvaKeepAliveAndReportThis
+ */
+
+inline bool Compiler::lvaReportParamTypeArg()
+{
+    if (info.compMethodInfo->options & (CORINFO_GENERICS_CTXT_FROM_METHODDESC | CORINFO_GENERICS_CTXT_FROM_METHODTABLE))
+    {
+        assert(info.compTypeCtxtArg != -1);
+
+        // If the VM requires us to keep the generics context alive and report it (for example, if any catch
+        // clause catches a type that uses a generic parameter of this method) this flag will be set.
+        if (info.compMethodInfo->options & CORINFO_GENERICS_CTXT_KEEP_ALIVE)
+        {
+            return true;
+        }
+
+        // Otherwise, if an exact type parameter is needed in the body, report the generics context.
+        // We do this because collectible types needs the generics context when gc-ing.
+        if (lvaGenericsContextUsed)
+        {
+            return true;
+        }
+    }
+
+    // Otherwise, we don't need to report it -- the generics context parameter is unused.
+    return false;
+}
+
+//*****************************************************************************
+
+inline unsigned Compiler::lvaCachedGenericContextArgOffset()
+{
+    assert(lvaDoneFrameLayout == FINAL_FRAME_LAYOUT);
+
+    return lvaCachedGenericContextArgOffs;
+}
+
+/*****************************************************************************
+ *
+ *  Return the stack framed offset of the given variable; set *FPbased to
+ *  true if the variable is addressed off of FP, false if it's addressed
+ *  off of SP. Note that 'varNum' can be a negated spill-temporary var index.
+ *
+ *  mustBeFPBased - strong about whether the base reg is FP. But it is also
+ *  strong about not being FPBased after FINAL_FRAME_LAYOUT. i.e.,
+ *  it enforces SP based.
+ *
+ *  addrModeOffset - is the addressing mode offset, for example: v02 + 0x10
+ *  So, V02 itself is at offset sp + 0x10 and then addrModeOffset is what gets
+ *  added beyond that.
+ */
+
+inline
+#ifdef _TARGET_ARM_
+    int
+    Compiler::lvaFrameAddress(int varNum, bool mustBeFPBased, regNumber* pBaseReg, int addrModeOffset)
+#else
+    int
+    Compiler::lvaFrameAddress(int varNum, bool* pFPbased)
+#endif
+{
+    assert(lvaDoneFrameLayout != NO_FRAME_LAYOUT);
+
+    int       offset;
+    bool      FPbased;
+    bool      fConservative = false;
+    var_types type          = TYP_UNDEF;
+    if (varNum >= 0)
+    {
+        LclVarDsc* varDsc;
+
+        assert((unsigned)varNum < lvaCount);
+        varDsc               = lvaTable + varNum;
+        type                 = varDsc->TypeGet();
+        bool isPrespilledArg = false;
+#if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
+        isPrespilledArg = varDsc->lvIsParam && compIsProfilerHookNeeded() &&
+                          lvaIsPreSpilled(varNum, codeGen->regSet.rsMaskPreSpillRegs(false));
+#endif
+
+        // If we have finished with register allocation, and this isn't a stack-based local,
+        // check that this has a valid stack location.
+        if (lvaDoneFrameLayout > REGALLOC_FRAME_LAYOUT && !varDsc->lvOnFrame)
+        {
+#ifdef _TARGET_AMD64_
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
+            // On amd64, every param has a stack location, except on Unix-like systems.
+            assert(varDsc->lvIsParam);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#elif defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+            // For !LEGACY_BACKEND on x86, a stack parameter that is enregistered will have a stack location.
+            assert(varDsc->lvIsParam && !varDsc->lvIsRegArg);
+#else  // !(_TARGET_AMD64 || !(defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)))
+            // Otherwise, we only have a valid stack location for:
+            // A parameter that was passed on the stack, being homed into its register home,
+            // or a prespilled argument on arm under profiler.
+            assert((varDsc->lvIsParam && !varDsc->lvIsRegArg && varDsc->lvRegister) || isPrespilledArg);
+#endif // !(_TARGET_AMD64 || !(defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)))
+        }
+
+        FPbased = varDsc->lvFramePointerBased;
+
+#ifdef DEBUG
+#if FEATURE_FIXED_OUT_ARGS
+        if ((unsigned)varNum == lvaOutgoingArgSpaceVar)
+        {
+            assert(FPbased == false);
+        }
+        else
+#endif
+        {
+#if DOUBLE_ALIGN
+            assert(FPbased == (isFramePointerUsed() || (genDoubleAlign() && varDsc->lvIsParam && !varDsc->lvIsRegArg)));
+#else
+#ifdef _TARGET_X86_
+            assert(FPbased == isFramePointerUsed());
+#endif
+#endif
+        }
+#endif // DEBUG
+
+        offset = varDsc->lvStkOffs;
+    }
+    else // Its a spill-temp
+    {
+        FPbased = isFramePointerUsed();
+        if (lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
+        {
+            TempDsc* tmpDsc = tmpFindNum(varNum);
+#ifndef LEGACY_BACKEND
+            // The temp might be in use, since this might be during code generation.
+            if (tmpDsc == nullptr)
+            {
+                tmpDsc = tmpFindNum(varNum, Compiler::TEMP_USAGE_USED);
+            }
+#endif // !LEGACY_BACKEND
+            assert(tmpDsc != nullptr);
+            offset = tmpDsc->tdTempOffs();
+            type   = tmpDsc->tdTempType();
+        }
+        else
+        {
+            // This value is an estimate until we calculate the
+            // offset after the final frame layout
+            // ---------------------------------------------------
+            //   :                         :
+            //   +-------------------------+ base --+
+            //   | LR, ++N for ARM         |        |   frameBaseOffset (= N)
+            //   +-------------------------+        |
+            //   | R11, ++N for ARM        | <---FP |
+            //   +-------------------------+      --+
+            //   | compCalleeRegsPushed - N|        |   lclFrameOffset
+            //   +-------------------------+      --+
+            //   | lclVars                 |        |
+            //   +-------------------------+        |
+            //   | tmp[MAX_SPILL_TEMP]     |        |
+            //   | tmp[1]                  |        |
+            //   | tmp[0]                  |        |   compLclFrameSize
+            //   +-------------------------+        |
+            //   | outgoingArgSpaceSize    |        |
+            //   +-------------------------+      --+
+            //   |                         | <---SP
+            //   :                         :
+            // ---------------------------------------------------
+
+            type          = compFloatingPointUsed ? TYP_FLOAT : TYP_INT;
+            fConservative = true;
+            if (!FPbased)
+            {
+                // Worst case stack based offset.
+                CLANG_FORMAT_COMMENT_ANCHOR;
+#if FEATURE_FIXED_OUT_ARGS
+                int outGoingArgSpaceSize = lvaOutgoingArgSpaceSize;
+#else
+                int outGoingArgSpaceSize = 0;
+#endif
+                offset = outGoingArgSpaceSize + max(-varNum * TARGET_POINTER_SIZE, (int)lvaGetMaxSpillTempSize());
+            }
+            else
+            {
+                // Worst case FP based offset.
+                CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_ARM_
+                offset = codeGen->genCallerSPtoInitialSPdelta() - codeGen->genCallerSPtoFPdelta();
+#else
+                offset                   = -(codeGen->genTotalFrameSize());
+#endif
+            }
+        }
+    }
+
+#ifdef _TARGET_ARM_
+    if (FPbased)
+    {
+        if (mustBeFPBased)
+        {
+            *pBaseReg = REG_FPBASE;
+        }
+        // Change the FP-based addressing to the SP-based addressing when possible because
+        // it generates smaller code on ARM. See frame picture above for the math.
+        else
+        {
+            // If it is the final frame layout phase, we don't have a choice, we should stick
+            // to either FP based or SP based that we decided in the earlier phase. Because
+            // we have already selected the instruction. Min-opts will have R10 enabled, so just
+            // use that.
+
+            int spOffset       = fConservative ? compLclFrameSize : offset + codeGen->genSPtoFPdelta();
+            int actualOffset   = (spOffset + addrModeOffset);
+            int ldrEncodeLimit = (varTypeIsFloating(type) ? 0x3FC : 0xFFC);
+            // Use ldr sp imm encoding.
+            if (lvaDoneFrameLayout == FINAL_FRAME_LAYOUT || opts.MinOpts() || (actualOffset <= ldrEncodeLimit))
+            {
+                offset    = spOffset;
+                *pBaseReg = compLocallocUsed ? REG_SAVED_LOCALLOC_SP : REG_SPBASE;
+            }
+            // Use ldr +/-imm8 encoding.
+            else if (offset >= -0x7C && offset <= ldrEncodeLimit)
+            {
+                *pBaseReg = REG_FPBASE;
+            }
+            // Use a single movw. prefer locals.
+            else if (actualOffset <= 0xFFFC) // Fix 383910 ARM ILGEN
+            {
+                offset    = spOffset;
+                *pBaseReg = compLocallocUsed ? REG_SAVED_LOCALLOC_SP : REG_SPBASE;
+            }
+            // Use movw, movt.
+            else
+            {
+                *pBaseReg = REG_FPBASE;
+            }
+        }
+    }
+    else
+    {
+        *pBaseReg = REG_SPBASE;
+    }
+#else
+    *pFPbased                            = FPbased;
+#endif
+
+    return offset;
+}
+
+inline bool Compiler::lvaIsParameter(unsigned varNum)
+{
+    LclVarDsc* varDsc;
+
+    assert(varNum < lvaCount);
+    varDsc = lvaTable + varNum;
+
+    return varDsc->lvIsParam;
+}
+
+inline bool Compiler::lvaIsRegArgument(unsigned varNum)
+{
+    LclVarDsc* varDsc;
+
+    assert(varNum < lvaCount);
+    varDsc = lvaTable + varNum;
+
+    return varDsc->lvIsRegArg;
+}
+
+inline BOOL Compiler::lvaIsOriginalThisArg(unsigned varNum)
+{
+    assert(varNum < lvaCount);
+
+    BOOL isOriginalThisArg = (varNum == info.compThisArg) && (info.compIsStatic == false);
+
+#ifdef DEBUG
+    if (isOriginalThisArg)
+    {
+        LclVarDsc* varDsc = lvaTable + varNum;
+        // Should never write to or take the address of the original 'this' arg
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef JIT32_GCENCODER
+        // With the general encoder/decoder, when the original 'this' arg is needed as a generics context param, we
+        // copy to a new local, and mark the original as DoNotEnregister, to
+        // ensure that it is stack-allocated.  It should not be the case that the original one can be modified -- it
+        // should not be written to, or address-exposed.
+        assert(!varDsc->lvArgWrite &&
+               (!varDsc->lvAddrExposed || ((info.compMethodInfo->options & CORINFO_GENERICS_CTXT_FROM_THIS) != 0)));
+#else
+        assert(!varDsc->lvArgWrite && !varDsc->lvAddrExposed);
+#endif
+    }
+#endif
+
+    return isOriginalThisArg;
+}
+
+inline BOOL Compiler::lvaIsOriginalThisReadOnly()
+{
+    return lvaArg0Var == info.compThisArg;
+}
+
+/*****************************************************************************
+ *
+ *  The following is used to detect the cases where the same local variable#
+ *  is used both as a long/double value and a 32-bit value and/or both as an
+ *  integer/address and a float value.
+ */
+
+/* static */ inline unsigned Compiler::lvaTypeRefMask(var_types type)
+{
+    const static BYTE lvaTypeRefMasks[] = {
+#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) howUsed,
+#include "typelist.h"
+#undef DEF_TP
+    };
+
+    assert((unsigned)type < sizeof(lvaTypeRefMasks));
+    assert(lvaTypeRefMasks[type] != 0);
+
+    return lvaTypeRefMasks[type];
+}
+
+/*****************************************************************************
+ *
+ *  The following is used to detect the cases where the same local variable#
+ *  is used both as a long/double value and a 32-bit value and/or both as an
+ *  integer/address and a float value.
+ */
+
+inline var_types Compiler::lvaGetActualType(unsigned lclNum)
+{
+    return genActualType(lvaGetRealType(lclNum));
+}
+
+inline var_types Compiler::lvaGetRealType(unsigned lclNum)
+{
+    return lvaTable[lclNum].TypeGet();
+}
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                          Importer                                         XX
+XX                      Inline functions                                     XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+inline unsigned Compiler::compMapILargNum(unsigned ILargNum)
+{
+    assert(ILargNum < info.compILargsCount || tiVerificationNeeded);
+
+    // Note that this works because if compRetBuffArg/compTypeCtxtArg/lvVarargsHandleArg are not present
+    // they will be BAD_VAR_NUM (MAX_UINT), which is larger than any variable number.
+    if (ILargNum >= info.compRetBuffArg)
+    {
+        ILargNum++;
+        assert(ILargNum < info.compLocalsCount || tiVerificationNeeded); // compLocals count already adjusted.
+    }
+
+    if (ILargNum >= (unsigned)info.compTypeCtxtArg)
+    {
+        ILargNum++;
+        assert(ILargNum < info.compLocalsCount || tiVerificationNeeded); // compLocals count already adjusted.
+    }
+
+    if (ILargNum >= (unsigned)lvaVarargsHandleArg)
+    {
+        ILargNum++;
+        assert(ILargNum < info.compLocalsCount || tiVerificationNeeded); // compLocals count already adjusted.
+    }
+
+    assert(ILargNum < info.compArgsCount || tiVerificationNeeded);
+    return (ILargNum);
+}
+
+// For ARM varargs, all arguments go in integer registers, so swizzle the type
+inline var_types Compiler::mangleVarArgsType(var_types type)
+{
+#ifdef _TARGET_ARMARCH_
+    if (info.compIsVarArgs || opts.compUseSoftFP)
+    {
+        switch (type)
+        {
+            case TYP_FLOAT:
+                return TYP_INT;
+            case TYP_DOUBLE:
+                return TYP_LONG;
+            default:
+                break;
+        }
+    }
+#endif // _TARGET_ARMARCH_
+    return type;
+}
+
+// For CORECLR there is no vararg on System V systems.
+#if FEATURE_VARARG
+inline regNumber Compiler::getCallArgIntRegister(regNumber floatReg)
+{
+#ifdef _TARGET_AMD64_
+    switch (floatReg)
+    {
+        case REG_XMM0:
+            return REG_RCX;
+        case REG_XMM1:
+            return REG_RDX;
+        case REG_XMM2:
+            return REG_R8;
+        case REG_XMM3:
+            return REG_R9;
+        default:
+            unreached();
+    }
+#else  // !_TARGET_AMD64_
+    // How will float args be passed for RyuJIT/x86?
+    NYI("getCallArgIntRegister for RyuJIT/x86");
+    return REG_NA;
+#endif // !_TARGET_AMD64_
+}
+
+inline regNumber Compiler::getCallArgFloatRegister(regNumber intReg)
+{
+#ifdef _TARGET_AMD64_
+    switch (intReg)
+    {
+        case REG_RCX:
+            return REG_XMM0;
+        case REG_RDX:
+            return REG_XMM1;
+        case REG_R8:
+            return REG_XMM2;
+        case REG_R9:
+            return REG_XMM3;
+        default:
+            unreached();
+    }
+#else  // !_TARGET_AMD64_
+    // How will float args be passed for RyuJIT/x86?
+    NYI("getCallArgFloatRegister for RyuJIT/x86");
+    return REG_NA;
+#endif // !_TARGET_AMD64_
+}
+#endif // FEATURE_VARARG
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                     Register Allocator                                    XX
+XX                      Inline functions                                     XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************/
+
+inline bool rpCanAsgOperWithoutReg(GenTreePtr op, bool lclvar)
+{
+    var_types type;
+
+    switch (op->OperGet())
+    {
+        case GT_CNS_LNG:
+        case GT_CNS_INT:
+            return true;
+        case GT_LCL_VAR:
+            type = genActualType(op->TypeGet());
+            if (lclvar && ((type == TYP_INT) || (type == TYP_REF) || (type == TYP_BYREF)))
+            {
+                return true;
+            }
+            break;
+        default:
+            break;
+    }
+
+    return false;
+}
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                       FlowGraph                                           XX
+XX                      Inline functions                                     XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+inline bool Compiler::compCanEncodePtrArgCntMax()
+{
+#ifdef JIT32_GCENCODER
+    // DDB 204533:
+    // The GC encoding for fully interruptible methods does not
+    // support more than 1023 pushed arguments, so we have to
+    // use a partially interruptible GC info/encoding.
+    //
+    return (fgPtrArgCntMax < MAX_PTRARG_OFS);
+#else // JIT32_GCENCODER
+    return true;
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Call the given function pointer for all nodes in the tree. The 'visitor'
+ *  fn should return one of the following values:
+ *
+ *  WALK_ABORT          stop walking and return immediately
+ *  WALK_CONTINUE       continue walking
+ *  WALK_SKIP_SUBTREES  don't walk any subtrees of the node just visited
+ *
+ *  computeStack - true if we want to make stack visible to callback function
+ */
+
+inline Compiler::fgWalkResult Compiler::fgWalkTreePre(
+    GenTreePtr* pTree, fgWalkPreFn* visitor, void* callBackData, bool lclVarsOnly, bool computeStack)
+
+{
+    fgWalkData walkData;
+
+    walkData.compiler      = this;
+    walkData.wtprVisitorFn = visitor;
+    walkData.pCallbackData = callBackData;
+    walkData.parent        = nullptr;
+    walkData.wtprLclsOnly  = lclVarsOnly;
+#ifdef DEBUG
+    walkData.printModified = false;
+#endif
+
+    fgWalkResult result;
+    if (computeStack)
+    {
+        GenTreeStack parentStack(this);
+        walkData.parentStack = &parentStack;
+        result               = fgWalkTreePreRec<true>(pTree, &walkData);
+    }
+    else
+    {
+        walkData.parentStack = nullptr;
+        result               = fgWalkTreePreRec<false>(pTree, &walkData);
+    }
+
+#ifdef DEBUG
+    if (verbose && walkData.printModified)
+    {
+        gtDispTree(*pTree);
+    }
+#endif
+
+    return result;
+}
+
+/*****************************************************************************
+ *
+ *  Same as above, except the tree walk is performed in a depth-first fashion,
+ *  The 'visitor' fn should return one of the following values:
+ *
+ *  WALK_ABORT          stop walking and return immediately
+ *  WALK_CONTINUE       continue walking
+ *
+ *  computeStack - true if we want to make stack visible to callback function
+ */
+
+inline Compiler::fgWalkResult Compiler::fgWalkTreePost(GenTreePtr*   pTree,
+                                                       fgWalkPostFn* visitor,
+                                                       void*         callBackData,
+                                                       bool          computeStack)
+{
+    fgWalkData walkData;
+
+    walkData.compiler      = this;
+    walkData.wtpoVisitorFn = visitor;
+    walkData.pCallbackData = callBackData;
+    walkData.parent        = nullptr;
+
+    fgWalkResult result;
+    if (computeStack)
+    {
+        GenTreeStack parentStack(this);
+        walkData.parentStack = &parentStack;
+        result               = fgWalkTreePostRec<true>(pTree, &walkData);
+    }
+    else
+    {
+        walkData.parentStack = nullptr;
+        result               = fgWalkTreePostRec<false>(pTree, &walkData);
+    }
+
+    assert(result == WALK_CONTINUE || result == WALK_ABORT);
+
+    return result;
+}
+
+/*****************************************************************************
+ *
+ * Has this block been added to throw an inlined exception
+ * Returns true if the block was added to throw one of:
+ *    range-check exception
+ *    argument exception (used by feature SIMD)
+ *    argument range-check exception (used by feature SIMD)
+ *    divide by zero exception  (Not used on X86/X64)
+ *    null reference exception (Not currently used)
+ *    overflow exception
+ */
+
+inline bool Compiler::fgIsThrowHlpBlk(BasicBlock* block)
+{
+    if (!fgIsCodeAdded())
+    {
+        return false;
+    }
+
+    if (!(block->bbFlags & BBF_INTERNAL) || block->bbJumpKind != BBJ_THROW)
+    {
+        return false;
+    }
+
+    GenTree* call = block->lastNode();
+
+#ifdef DEBUG
+    if (block->IsLIR())
+    {
+        LIR::Range& blockRange = LIR::AsRange(block);
+        for (LIR::Range::ReverseIterator node = blockRange.rbegin(), end = blockRange.rend(); node != end; ++node)
+        {
+            if (node->OperGet() == GT_CALL)
+            {
+                assert(*node == call);
+                assert(node == blockRange.rbegin());
+                break;
+            }
+        }
+    }
+#endif
+
+    if (!call || (call->gtOper != GT_CALL))
+    {
+        return false;
+    }
+
+    if (!((call->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_RNGCHKFAIL)) ||
+          (call->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_THROWDIVZERO)) ||
+#if COR_JIT_EE_VERSION > 460
+          (call->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_THROWNULLREF)) ||
+#endif // COR_JIT_EE_VERSION
+          (call->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_OVERFLOW))))
+    {
+        return false;
+    }
+
+    // We can get to this point for blocks that we didn't create as throw helper blocks
+    // under stress, with crazy flow graph optimizations. So, walk the fgAddCodeList
+    // for the final determination.
+
+    for (AddCodeDsc* add = fgAddCodeList; add; add = add->acdNext)
+    {
+        if (block == add->acdDstBlk)
+        {
+            return add->acdKind == SCK_RNGCHK_FAIL || add->acdKind == SCK_DIV_BY_ZERO || add->acdKind == SCK_OVERFLOW
+#if COR_JIT_EE_VERSION > 460
+                   || add->acdKind == SCK_ARG_EXCPN || add->acdKind == SCK_ARG_RNG_EXCPN
+#endif // COR_JIT_EE_VERSION
+                ;
+        }
+    }
+
+    // We couldn't find it in the fgAddCodeList
+    return false;
+}
+
+/*****************************************************************************
+ *
+ *  Return the stackLevel of the inserted block that throws exception
+ *  (by calling the EE helper).
+ */
+
+inline unsigned Compiler::fgThrowHlpBlkStkLevel(BasicBlock* block)
+{
+    for (AddCodeDsc* add = fgAddCodeList; add; add = add->acdNext)
+    {
+        if (block == add->acdDstBlk)
+        {
+            // Compute assert cond separately as assert macro cannot have conditional compilation directives.
+            bool cond =
+                (add->acdKind == SCK_RNGCHK_FAIL || add->acdKind == SCK_DIV_BY_ZERO || add->acdKind == SCK_OVERFLOW
+#if COR_JIT_EE_VERSION > 460
+                 || add->acdKind == SCK_ARG_EXCPN || add->acdKind == SCK_ARG_RNG_EXCPN
+#endif // COR_JIT_EE_VERSION
+                 );
+            assert(cond);
+
+            // TODO: bbTgtStkDepth is DEBUG-only.
+            // Should we use it regularly and avoid this search.
+            assert(block->bbTgtStkDepth == add->acdStkLvl);
+            return add->acdStkLvl;
+        }
+    }
+
+    noway_assert(!"fgThrowHlpBlkStkLevel should only be called if fgIsThrowHlpBlk() is true, but we can't find the "
+                  "block in the fgAddCodeList list");
+
+    /* We couldn't find the basic block: it must not have been a throw helper block */
+
+    return 0;
+}
+
+/*
+    Small inline function to change a given block to a throw block.
+
+*/
+inline void Compiler::fgConvertBBToThrowBB(BasicBlock* block)
+{
+    block->bbJumpKind = BBJ_THROW;
+    block->bbSetRunRarely(); // any block with a throw is rare
+}
+
+/*****************************************************************************
+ *
+ *  Return true if we've added any new basic blocks.
+ */
+
+inline bool Compiler::fgIsCodeAdded()
+{
+    return fgAddCodeModf;
+}
+
+/*****************************************************************************
+  Is the offset too big?
+*/
+inline bool Compiler::fgIsBigOffset(size_t offset)
+{
+    return (offset > compMaxUncheckedOffsetForNullObject);
+}
+
+/***********************************************************************************
+*
+*  Returns true if back-end will do other than integer division which currently occurs only
+*  if "divisor" is a positive integer constant and a power of 2 other than 1 and INT_MIN
+*/
+
+inline bool Compiler::fgIsSignedDivOptimizable(GenTreePtr divisor)
+{
+    if (!opts.MinOpts() && divisor->IsCnsIntOrI())
+    {
+        ssize_t ival = divisor->gtIntConCommon.IconValue();
+
+        /* Is the divisor a power of 2 (excluding INT_MIN) ?.
+           The intent of the third condition below is to exclude INT_MIN on a 64-bit platform
+           and during codegen we need to encode ival-1 within 32 bits.  If ival were INT_MIN
+           then ival-1 would cause underflow.
+
+           Note that we could put #ifdef around the third check so that it is applied only on
+           64-bit platforms but the below is a more generic way to express it as it is a no-op
+           on 32-bit platforms.
+         */
+        return (ival > 0 && genMaxOneBit(ival) && ((ssize_t)(int)ival == ival));
+    }
+
+    return false;
+}
+
+/************************************************************************************
+*
+*  Returns true if back-end will do other than integer division which currently occurs
+* if "divisor" is an unsigned integer constant and a power of 2 other than 1 and zero.
+*/
+
+inline bool Compiler::fgIsUnsignedDivOptimizable(GenTreePtr divisor)
+{
+    if (!opts.MinOpts() && divisor->IsCnsIntOrI())
+    {
+        size_t ival = divisor->gtIntCon.gtIconVal;
+
+        /* Is the divisor a power of 2 ? */
+        return ival && genMaxOneBit(ival);
+    }
+
+    return false;
+}
+
+/*****************************************************************************
+*
+*  Returns true if back-end will do other than integer division which currently occurs
+*  if "divisor" is a positive integer constant and a power of 2 other than zero
+*/
+
+inline bool Compiler::fgIsSignedModOptimizable(GenTreePtr divisor)
+{
+    if (!opts.MinOpts() && divisor->IsCnsIntOrI())
+    {
+        size_t ival = divisor->gtIntCon.gtIconVal;
+
+        /* Is the divisor a power of 2  ? */
+        return ssize_t(ival) > 0 && genMaxOneBit(ival);
+    }
+
+    return false;
+}
+
+/*****************************************************************************
+*
+*  Returns true if back-end will do other than integer division which currently occurs
+*  if "divisor" is a positive integer constant and a power of 2 other than zero
+*/
+
+inline bool Compiler::fgIsUnsignedModOptimizable(GenTreePtr divisor)
+{
+    if (!opts.MinOpts() && divisor->IsCnsIntOrI())
+    {
+        size_t ival = divisor->gtIntCon.gtIconVal;
+
+        /* Is the divisor a power of 2  ? */
+        return ival != 0 && ival == (unsigned)genFindLowestBit(ival);
+    }
+
+    return false;
+}
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                          TempsInfo                                        XX
+XX                      Inline functions                                     XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************/
+
+/* static */ inline unsigned Compiler::tmpSlot(unsigned size)
+{
+    noway_assert(size >= sizeof(int));
+    noway_assert(size <= TEMP_MAX_SIZE);
+    assert((size % sizeof(int)) == 0);
+
+    assert(size < UINT32_MAX);
+    return size / sizeof(int) - 1;
+}
+
+/*****************************************************************************
+ *
+ *  Finish allocating temps - should be called each time after a pass is made
+ *  over a function body.
+ */
+
+inline void Compiler::tmpEnd()
+{
+#ifdef DEBUG
+    if (verbose && (tmpCount > 0))
+    {
+        printf("%d tmps used\n", tmpCount);
+    }
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ *  Shuts down the temp-tracking code. Should be called once per function
+ *  compiled.
+ */
+
+inline void Compiler::tmpDone()
+{
+#ifdef DEBUG
+    unsigned count;
+    TempDsc* temp;
+
+    assert(tmpAllFree());
+    for (temp = tmpListBeg(), count = temp ? 1 : 0; temp; temp = tmpListNxt(temp), count += temp ? 1 : 0)
+    {
+        assert(temp->tdLegalOffset());
+    }
+
+    // Make sure that all the temps were released
+    assert(count == tmpCount);
+    assert(tmpGetCount == 0);
+#endif // DEBUG
+}
+
+#ifdef DEBUG
+inline bool Compiler::shouldUseVerboseTrees()
+{
+    return (JitConfig.JitDumpVerboseTrees() == 1);
+}
+
+inline bool Compiler::shouldUseVerboseSsa()
+{
+    return (JitConfig.JitDumpVerboseSsa() == 1);
+}
+
+//------------------------------------------------------------------------
+// shouldDumpASCIITrees: Should we use only ASCII characters for tree dumps?
+//
+// Notes:
+//    This is set to default to 1 in clrConfigValues.h
+
+inline bool Compiler::shouldDumpASCIITrees()
+{
+    return (JitConfig.JitDumpASCII() == 1);
+}
+
+/*****************************************************************************
+ *  Should we enable JitStress mode?
+ *   0:   No stress
+ *   !=2: Vary stress. Performance will be slightly/moderately degraded
+ *   2:   Check-all stress. Performance will be REALLY horrible
+ */
+
+inline DWORD getJitStressLevel()
+{
+    return JitConfig.JitStress();
+}
+
+/*****************************************************************************
+ *  Should we do the strict check for non-virtual call to the virtual method?
+ */
+
+inline DWORD StrictCheckForNonVirtualCallToVirtualMethod()
+{
+    return JitConfig.JitStrictCheckForNonVirtualCallToVirtualMethod() == 1;
+}
+
+#endif // DEBUG
+
+/*****************************************************************************/
+/* Map a register argument number ("RegArgNum") to a register number ("RegNum").
+ * A RegArgNum is in this range:
+ *      [0, MAX_REG_ARG)        -- for integer registers
+ *      [0, MAX_FLOAT_REG_ARG)  -- for floating point registers
+ * Note that RegArgNum's are overlapping for integer and floating-point registers,
+ * while RegNum's are not (for ARM anyway, though for x86, it might be different).
+ * If we have a fixed return buffer register and are given it's index
+ * we return the fixed return buffer register
+ */
+
+inline regNumber genMapIntRegArgNumToRegNum(unsigned argNum)
+{
+    if (hasFixedRetBuffReg() && (argNum == theFixedRetBuffArgNum()))
+    {
+        return theFixedRetBuffReg();
+    }
+
+    assert(argNum < ArrLen(intArgRegs));
+
+    return intArgRegs[argNum];
+}
+
+inline regNumber genMapFloatRegArgNumToRegNum(unsigned argNum)
+{
+#ifndef _TARGET_X86_
+    assert(argNum < ArrLen(fltArgRegs));
+
+    return fltArgRegs[argNum];
+#else
+    assert(!"no x86 float arg regs\n");
+    return REG_NA;
+#endif
+}
+
+__forceinline regNumber genMapRegArgNumToRegNum(unsigned argNum, var_types type)
+{
+    if (varTypeIsFloating(type))
+    {
+        return genMapFloatRegArgNumToRegNum(argNum);
+    }
+    else
+    {
+        return genMapIntRegArgNumToRegNum(argNum);
+    }
+}
+
+/*****************************************************************************/
+/* Map a register argument number ("RegArgNum") to a register mask of the associated register.
+ * Note that for floating-pointer registers, only the low register for a register pair
+ * (for a double on ARM) is returned.
+ */
+
+inline regMaskTP genMapIntRegArgNumToRegMask(unsigned argNum)
+{
+    assert(argNum < ArrLen(intArgMasks));
+
+    return intArgMasks[argNum];
+}
+
+inline regMaskTP genMapFloatRegArgNumToRegMask(unsigned argNum)
+{
+#ifndef _TARGET_X86_
+    assert(argNum < ArrLen(fltArgMasks));
+
+    return fltArgMasks[argNum];
+#else
+    assert(!"no x86 float arg regs\n");
+    return RBM_NONE;
+#endif
+}
+
+__forceinline regMaskTP genMapArgNumToRegMask(unsigned argNum, var_types type)
+{
+    regMaskTP result;
+    if (varTypeIsFloating(type))
+    {
+        result = genMapFloatRegArgNumToRegMask(argNum);
+#ifdef _TARGET_ARM_
+        if (type == TYP_DOUBLE)
+        {
+            assert((result & RBM_DBL_REGS) != 0);
+            result |= (result << 1);
+        }
+#endif
+    }
+    else
+    {
+        result = genMapIntRegArgNumToRegMask(argNum);
+    }
+    return result;
+}
+
+/*****************************************************************************/
+/* Map a register number ("RegNum") to a register argument number ("RegArgNum")
+ * If we have a fixed return buffer register we return theFixedRetBuffArgNum
+ */
+
+inline unsigned genMapIntRegNumToRegArgNum(regNumber regNum)
+{
+    assert(genRegMask(regNum) & fullIntArgRegMask());
+
+    switch (regNum)
+    {
+        case REG_ARG_0:
+            return 0;
+#if MAX_REG_ARG >= 2
+        case REG_ARG_1:
+            return 1;
+#if MAX_REG_ARG >= 3
+        case REG_ARG_2:
+            return 2;
+#if MAX_REG_ARG >= 4
+        case REG_ARG_3:
+            return 3;
+#if MAX_REG_ARG >= 5
+        case REG_ARG_4:
+            return 4;
+#if MAX_REG_ARG >= 6
+        case REG_ARG_5:
+            return 5;
+#if MAX_REG_ARG >= 7
+        case REG_ARG_6:
+            return 6;
+#if MAX_REG_ARG >= 8
+        case REG_ARG_7:
+            return 7;
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+#endif
+        default:
+            // Check for the Arm64 fixed return buffer argument register
+            if (hasFixedRetBuffReg() && (regNum == theFixedRetBuffReg()))
+            {
+                return theFixedRetBuffArgNum();
+            }
+            else
+            {
+                assert(!"invalid register arg register");
+                return BAD_VAR_NUM;
+            }
+    }
+}
+
+inline unsigned genMapFloatRegNumToRegArgNum(regNumber regNum)
+{
+    assert(genRegMask(regNum) & RBM_FLTARG_REGS);
+
+#ifdef _TARGET_ARM_
+    return regNum - REG_F0;
+#elif defined(_TARGET_ARM64_)
+    return regNum - REG_V0;
+#elif defined(UNIX_AMD64_ABI)
+    return regNum - REG_FLTARG_0;
+#else
+
+#if MAX_FLOAT_REG_ARG >= 1
+    switch (regNum)
+    {
+        case REG_FLTARG_0:
+            return 0;
+#if MAX_REG_ARG >= 2
+        case REG_FLTARG_1:
+            return 1;
+#if MAX_REG_ARG >= 3
+        case REG_FLTARG_2:
+            return 2;
+#if MAX_REG_ARG >= 4
+        case REG_FLTARG_3:
+            return 3;
+#if MAX_REG_ARG >= 5
+        case REG_FLTARG_4:
+            return 4;
+#endif
+#endif
+#endif
+#endif
+        default:
+            assert(!"invalid register arg register");
+            return BAD_VAR_NUM;
+    }
+#else
+    assert(!"flt reg args not allowed");
+    return BAD_VAR_NUM;
+#endif
+#endif // !arm
+}
+
+inline unsigned genMapRegNumToRegArgNum(regNumber regNum, var_types type)
+{
+    if (varTypeIsFloating(type))
+    {
+        return genMapFloatRegNumToRegArgNum(regNum);
+    }
+    else
+    {
+        return genMapIntRegNumToRegArgNum(regNum);
+    }
+}
+
+/*****************************************************************************/
+/* Return a register mask with the first 'numRegs' argument registers set.
+ */
+
+inline regMaskTP genIntAllRegArgMask(unsigned numRegs)
+{
+    assert(numRegs <= MAX_REG_ARG);
+
+    regMaskTP result = RBM_NONE;
+    for (unsigned i = 0; i < numRegs; i++)
+    {
+        result |= intArgMasks[i];
+    }
+    return result;
+}
+
+#if !FEATURE_STACK_FP_X87
+
+inline regMaskTP genFltAllRegArgMask(unsigned numRegs)
+{
+    assert(numRegs <= MAX_FLOAT_REG_ARG);
+
+    regMaskTP result = RBM_NONE;
+    for (unsigned i = 0; i < numRegs; i++)
+    {
+        result |= fltArgMasks[i];
+    }
+    return result;
+}
+
+#endif // !FEATURE_STACK_FP_X87
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                          Liveness                                         XX
+XX                      Inline functions                                     XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************
+ *
+ *  Update the current set of live variables based on the life set recorded
+ *  in the given expression tree node.
+ */
+
+template <bool ForCodeGen>
+inline void Compiler::compUpdateLife(GenTreePtr tree)
+{
+    // TODO-Cleanup: We shouldn't really be calling this more than once
+    if (tree == compCurLifeTree)
+    {
+        return;
+    }
+
+    if (!tree->OperIsNonPhiLocal() && fgIsIndirOfAddrOfLocal(tree) == nullptr)
+    {
+        return;
+    }
+
+    compUpdateLifeVar<ForCodeGen>(tree);
+}
+
+template <bool ForCodeGen>
+inline void Compiler::compUpdateLife(VARSET_VALARG_TP newLife)
+{
+    if (!VarSetOps::Equal(this, compCurLife, newLife))
+    {
+        compChangeLife<ForCodeGen>(newLife DEBUGARG(nullptr));
+    }
+#ifdef DEBUG
+    else
+    {
+        if (verbose)
+        {
+            printf("Liveness not changing: %s ", VarSetOps::ToString(this, compCurLife));
+            dumpConvertedVarSet(this, compCurLife);
+            printf("\n");
+        }
+    }
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ *  We stash cookies in basic blocks for the code emitter; this call retrieves
+ *  the cookie associated with the given basic block.
+ */
+
+inline void* emitCodeGetCookie(BasicBlock* block)
+{
+    assert(block);
+    return block->bbEmitCookie;
+}
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                          Optimizer                                        XX
+XX                      Inline functions                                     XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#if LOCAL_ASSERTION_PROP
+
+/*****************************************************************************
+ *
+ *  The following resets the value assignment table
+ *  used only during local assertion prop
+ */
+
+inline void Compiler::optAssertionReset(AssertionIndex limit)
+{
+    PREFAST_ASSUME(optAssertionCount <= optMaxAssertionCount);
+
+    while (optAssertionCount > limit)
+    {
+        AssertionIndex index        = optAssertionCount;
+        AssertionDsc*  curAssertion = optGetAssertion(index);
+        optAssertionCount--;
+        unsigned lclNum = curAssertion->op1.lcl.lclNum;
+        assert(lclNum < lvaTableCnt);
+        BitVecOps::RemoveElemD(apTraits, GetAssertionDep(lclNum), index - 1);
+
+        //
+        // Find the Copy assertions
+        //
+        if ((curAssertion->assertionKind == OAK_EQUAL) && (curAssertion->op1.kind == O1K_LCLVAR) &&
+            (curAssertion->op2.kind == O2K_LCLVAR_COPY))
+        {
+            //
+            //  op2.lcl.lclNum no longer depends upon this assertion
+            //
+            lclNum = curAssertion->op2.lcl.lclNum;
+            BitVecOps::RemoveElemD(apTraits, GetAssertionDep(lclNum), index - 1);
+        }
+    }
+    while (optAssertionCount < limit)
+    {
+        AssertionIndex index        = ++optAssertionCount;
+        AssertionDsc*  curAssertion = optGetAssertion(index);
+        unsigned       lclNum       = curAssertion->op1.lcl.lclNum;
+        BitVecOps::AddElemD(apTraits, GetAssertionDep(lclNum), index - 1);
+
+        //
+        // Check for Copy assertions
+        //
+        if ((curAssertion->assertionKind == OAK_EQUAL) && (curAssertion->op1.kind == O1K_LCLVAR) &&
+            (curAssertion->op2.kind == O2K_LCLVAR_COPY))
+        {
+            //
+            //  op2.lcl.lclNum now depends upon this assertion
+            //
+            lclNum = curAssertion->op2.lcl.lclNum;
+            BitVecOps::AddElemD(apTraits, GetAssertionDep(lclNum), index - 1);
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  The following removes the i-th entry in the value assignment table
+ *  used only during local assertion prop
+ */
+
+inline void Compiler::optAssertionRemove(AssertionIndex index)
+{
+    assert(index > 0);
+    assert(index <= optAssertionCount);
+    PREFAST_ASSUME(optAssertionCount <= optMaxAssertionCount);
+
+    AssertionDsc* curAssertion = optGetAssertion(index);
+
+    //  Two cases to consider if (index == optAssertionCount) then the last
+    //  entry in the table is to be removed and that happens automatically when
+    //  optAssertionCount is decremented and we can just clear the optAssertionDep bits
+    //  The other case is when index < optAssertionCount and here we overwrite the
+    //  index-th entry in the table with the data found at the end of the table
+    //  Since we are reordering the rable the optAssertionDep bits need to be recreated
+    //  using optAssertionReset(0) and optAssertionReset(newAssertionCount) will
+    //  correctly update the optAssertionDep bits
+    //
+    if (index == optAssertionCount)
+    {
+        unsigned lclNum = curAssertion->op1.lcl.lclNum;
+        BitVecOps::RemoveElemD(apTraits, GetAssertionDep(lclNum), index - 1);
+
+        //
+        // Check for Copy assertions
+        //
+        if ((curAssertion->assertionKind == OAK_EQUAL) && (curAssertion->op1.kind == O1K_LCLVAR) &&
+            (curAssertion->op2.kind == O2K_LCLVAR_COPY))
+        {
+            //
+            //  op2.lcl.lclNum no longer depends upon this assertion
+            //
+            lclNum = curAssertion->op2.lcl.lclNum;
+            BitVecOps::RemoveElemD(apTraits, GetAssertionDep(lclNum), index - 1);
+        }
+
+        optAssertionCount--;
+    }
+    else
+    {
+        AssertionDsc*  lastAssertion     = optGetAssertion(optAssertionCount);
+        AssertionIndex newAssertionCount = optAssertionCount - 1;
+
+        optAssertionReset(0); // This make optAssertionCount equal 0
+
+        memcpy(curAssertion,  // the entry to be removed
+               lastAssertion, // last entry in the table
+               sizeof(AssertionDsc));
+
+        optAssertionReset(newAssertionCount);
+    }
+}
+#endif // LOCAL_ASSERTION_PROP
+
+inline void Compiler::LoopDsc::AddModifiedField(Compiler* comp, CORINFO_FIELD_HANDLE fldHnd)
+{
+    if (lpFieldsModified == nullptr)
+    {
+        lpFieldsModified =
+            new (comp->getAllocatorLoopHoist()) Compiler::LoopDsc::FieldHandleSet(comp->getAllocatorLoopHoist());
+    }
+    lpFieldsModified->Set(fldHnd, true);
+}
+
+inline void Compiler::LoopDsc::AddModifiedElemType(Compiler* comp, CORINFO_CLASS_HANDLE structHnd)
+{
+    if (lpArrayElemTypesModified == nullptr)
+    {
+        lpArrayElemTypesModified =
+            new (comp->getAllocatorLoopHoist()) Compiler::LoopDsc::ClassHandleSet(comp->getAllocatorLoopHoist());
+    }
+    lpArrayElemTypesModified->Set(structHnd, true);
+}
+
+inline void Compiler::LoopDsc::VERIFY_lpIterTree()
+{
+#ifdef DEBUG
+    assert(lpFlags & LPFLG_ITER);
+
+    // iterTree should be "lcl <op>= const"
+
+    assert(lpIterTree);
+
+    assert(lpIterTree->OperKind() & GTK_ASGOP); // +=, -=, etc or = +, = -, etc
+
+    if (lpIterTree->OperGet() == GT_ASG)
+    {
+        GenTreePtr lhs = lpIterTree->gtOp.gtOp1;
+        GenTreePtr rhs = lpIterTree->gtOp.gtOp2;
+        assert(lhs->OperGet() == GT_LCL_VAR);
+
+        switch (rhs->gtOper)
+        {
+            case GT_ADD:
+            case GT_SUB:
+            case GT_MUL:
+            case GT_RSH:
+            case GT_LSH:
+                break;
+            default:
+                assert(!"Unknown operator for loop increment");
+        }
+        assert(rhs->gtOp.gtOp1->OperGet() == GT_LCL_VAR);
+        assert(rhs->gtOp.gtOp1->AsLclVarCommon()->GetLclNum() == lhs->AsLclVarCommon()->GetLclNum());
+        assert(rhs->gtOp.gtOp2->OperGet() == GT_CNS_INT);
+    }
+    else
+    {
+        assert(lpIterTree->gtOp.gtOp1->OperGet() == GT_LCL_VAR);
+        assert(lpIterTree->gtOp.gtOp2->OperGet() == GT_CNS_INT);
+    }
+#endif
+}
+
+//-----------------------------------------------------------------------------
+
+inline unsigned Compiler::LoopDsc::lpIterVar()
+{
+    VERIFY_lpIterTree();
+    return lpIterTree->gtOp.gtOp1->gtLclVarCommon.gtLclNum;
+}
+
+//-----------------------------------------------------------------------------
+
+inline int Compiler::LoopDsc::lpIterConst()
+{
+    VERIFY_lpIterTree();
+    if (lpIterTree->OperGet() == GT_ASG)
+    {
+        GenTreePtr rhs = lpIterTree->gtOp.gtOp2;
+        return (int)rhs->gtOp.gtOp2->gtIntCon.gtIconVal;
+    }
+    else
+    {
+        return (int)lpIterTree->gtOp.gtOp2->gtIntCon.gtIconVal;
+    }
+}
+
+//-----------------------------------------------------------------------------
+
+inline genTreeOps Compiler::LoopDsc::lpIterOper()
+{
+    VERIFY_lpIterTree();
+    if (lpIterTree->OperGet() == GT_ASG)
+    {
+        GenTreePtr rhs = lpIterTree->gtOp.gtOp2;
+        return rhs->OperGet();
+    }
+    else
+    {
+        return lpIterTree->OperGet();
+    }
+}
+
+inline var_types Compiler::LoopDsc::lpIterOperType()
+{
+    VERIFY_lpIterTree();
+
+    var_types type = lpIterTree->TypeGet();
+    assert(genActualType(type) == TYP_INT);
+
+    if ((lpIterTree->gtFlags & GTF_UNSIGNED) && type == TYP_INT)
+    {
+        type = TYP_UINT;
+    }
+
+    return type;
+}
+
+inline void Compiler::LoopDsc::VERIFY_lpTestTree()
+{
+#ifdef DEBUG
+    assert(lpFlags & LPFLG_ITER);
+    assert(lpTestTree);
+
+    genTreeOps oper = lpTestTree->OperGet();
+    assert(GenTree::OperIsCompare(oper));
+
+    GenTreePtr iterator = nullptr;
+    GenTreePtr limit    = nullptr;
+    if ((lpTestTree->gtOp.gtOp2->gtOper == GT_LCL_VAR) && (lpTestTree->gtOp.gtOp2->gtFlags & GTF_VAR_ITERATOR) != 0)
+    {
+        iterator = lpTestTree->gtOp.gtOp2;
+        limit    = lpTestTree->gtOp.gtOp1;
+    }
+    else if ((lpTestTree->gtOp.gtOp1->gtOper == GT_LCL_VAR) &&
+             (lpTestTree->gtOp.gtOp1->gtFlags & GTF_VAR_ITERATOR) != 0)
+    {
+        iterator = lpTestTree->gtOp.gtOp1;
+        limit    = lpTestTree->gtOp.gtOp2;
+    }
+    else
+    {
+        // one of the nodes has to be the iterator
+        assert(false);
+    }
+
+    if (lpFlags & LPFLG_CONST_LIMIT)
+    {
+        assert(limit->OperIsConst());
+    }
+    if (lpFlags & LPFLG_VAR_LIMIT)
+    {
+        assert(limit->OperGet() == GT_LCL_VAR);
+    }
+    if (lpFlags & LPFLG_ARRLEN_LIMIT)
+    {
+        assert(limit->OperGet() == GT_ARR_LENGTH);
+    }
+#endif
+}
+
+//-----------------------------------------------------------------------------
+
+inline bool Compiler::LoopDsc::lpIsReversed()
+{
+    VERIFY_lpTestTree();
+    return ((lpTestTree->gtOp.gtOp2->gtOper == GT_LCL_VAR) &&
+            (lpTestTree->gtOp.gtOp2->gtFlags & GTF_VAR_ITERATOR) != 0);
+}
+
+//-----------------------------------------------------------------------------
+
+inline genTreeOps Compiler::LoopDsc::lpTestOper()
+{
+    VERIFY_lpTestTree();
+    genTreeOps op = lpTestTree->OperGet();
+    return lpIsReversed() ? GenTree::SwapRelop(op) : op;
+}
+
+//-----------------------------------------------------------------------------
+
+inline GenTreePtr Compiler::LoopDsc::lpIterator()
+{
+    VERIFY_lpTestTree();
+
+    return lpIsReversed() ? lpTestTree->gtOp.gtOp2 : lpTestTree->gtOp.gtOp1;
+}
+
+//-----------------------------------------------------------------------------
+
+inline GenTreePtr Compiler::LoopDsc::lpLimit()
+{
+    VERIFY_lpTestTree();
+
+    return lpIsReversed() ? lpTestTree->gtOp.gtOp1 : lpTestTree->gtOp.gtOp2;
+}
+
+//-----------------------------------------------------------------------------
+
+inline int Compiler::LoopDsc::lpConstLimit()
+{
+    VERIFY_lpTestTree();
+    assert(lpFlags & LPFLG_CONST_LIMIT);
+
+    GenTreePtr limit = lpLimit();
+    assert(limit->OperIsConst());
+    return (int)limit->gtIntCon.gtIconVal;
+}
+
+//-----------------------------------------------------------------------------
+
+inline unsigned Compiler::LoopDsc::lpVarLimit()
+{
+    VERIFY_lpTestTree();
+    assert(lpFlags & LPFLG_VAR_LIMIT);
+
+    GenTreePtr limit = lpLimit();
+    assert(limit->OperGet() == GT_LCL_VAR);
+    return limit->gtLclVarCommon.gtLclNum;
+}
+
+//-----------------------------------------------------------------------------
+
+inline bool Compiler::LoopDsc::lpArrLenLimit(Compiler* comp, ArrIndex* index)
+{
+    VERIFY_lpTestTree();
+    assert(lpFlags & LPFLG_ARRLEN_LIMIT);
+
+    GenTreePtr limit = lpLimit();
+    assert(limit->OperGet() == GT_ARR_LENGTH);
+
+    // Check if we have a.length or a[i][j].length
+    if (limit->gtArrLen.ArrRef()->gtOper == GT_LCL_VAR)
+    {
+        index->arrLcl = limit->gtArrLen.ArrRef()->gtLclVarCommon.gtLclNum;
+        index->rank   = 0;
+        return true;
+    }
+    // We have a[i].length, extract a[i] pattern.
+    else if (limit->gtArrLen.ArrRef()->gtOper == GT_COMMA)
+    {
+        return comp->optReconstructArrIndex(limit->gtArrLen.ArrRef(), index, BAD_VAR_NUM);
+    }
+    return false;
+}
+
+/*****************************************************************************
+ *  Is "var" assigned in the loop "lnum" ?
+ */
+
+inline bool Compiler::optIsVarAssgLoop(unsigned lnum, unsigned var)
+{
+    assert(lnum < optLoopCount);
+    if (var < lclMAX_ALLSET_TRACKED)
+    {
+        ALLVARSET_TP ALLVARSET_INIT_NOCOPY(vs, AllVarSetOps::MakeSingleton(this, var));
+        return optIsSetAssgLoop(lnum, vs) != 0;
+    }
+    else
+    {
+        return optIsVarAssigned(optLoopTable[lnum].lpHead->bbNext, optLoopTable[lnum].lpBottom, nullptr, var);
+    }
+}
+
+/*****************************************************************************
+ * If the tree is a tracked local variable, return its LclVarDsc ptr.
+ */
+
+inline LclVarDsc* Compiler::optIsTrackedLocal(GenTreePtr tree)
+{
+    LclVarDsc* varDsc;
+    unsigned   lclNum;
+
+    if (tree->gtOper != GT_LCL_VAR)
+    {
+        return nullptr;
+    }
+
+    lclNum = tree->gtLclVarCommon.gtLclNum;
+
+    assert(lclNum < lvaCount);
+    varDsc = lvaTable + lclNum;
+
+    /* if variable not tracked, return NULL */
+    if (!varDsc->lvTracked)
+    {
+        return nullptr;
+    }
+
+    return varDsc;
+}
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                Optimization activation rules                              XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+// are we compiling for fast code, or are we compiling for blended code and
+// inside a loop?
+// We return true for BLENDED_CODE if the Block executes more than BB_LOOP_WEIGHT/2
+inline bool Compiler::optFastCodeOrBlendedLoop(BasicBlock::weight_t bbWeight)
+{
+    return (compCodeOpt() == FAST_CODE) ||
+           ((compCodeOpt() == BLENDED_CODE) && (bbWeight > (BB_LOOP_WEIGHT / 2 * BB_UNITY_WEIGHT)));
+}
+
+// are we running on a Intel Pentium 4?
+inline bool Compiler::optPentium4(void)
+{
+    return (info.genCPU == CPU_X86_PENTIUM_4);
+}
+
+// should we use add/sub instead of inc/dec? (faster on P4, but increases size)
+inline bool Compiler::optAvoidIncDec(BasicBlock::weight_t bbWeight)
+{
+    return optPentium4() && optFastCodeOrBlendedLoop(bbWeight);
+}
+
+// should we try to replace integer multiplication with lea/add/shift sequences?
+inline bool Compiler::optAvoidIntMult(void)
+{
+    return (compCodeOpt() != SMALL_CODE);
+}
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                          EEInterface                                      XX
+XX                      Inline functions                                     XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+extern var_types JITtype2varType(CorInfoType type);
+
+#include "ee_il_dll.hpp"
+
+inline CORINFO_METHOD_HANDLE Compiler::eeFindHelper(unsigned helper)
+{
+    assert(helper < CORINFO_HELP_COUNT);
+
+    /* Helpers are marked by the fact that they are odd numbers
+     * force this to be an odd number (will shift it back to extract) */
+
+    return ((CORINFO_METHOD_HANDLE)(size_t)((helper << 2) + 1));
+}
+
+inline CorInfoHelpFunc Compiler::eeGetHelperNum(CORINFO_METHOD_HANDLE method)
+{
+    // Helpers are marked by the fact that they are odd numbers
+    if (!(((size_t)method) & 1))
+    {
+        return (CORINFO_HELP_UNDEF);
+    }
+    return ((CorInfoHelpFunc)(((size_t)method) >> 2));
+}
+
+inline Compiler::fgWalkResult Compiler::CountSharedStaticHelper(GenTreePtr* pTree, fgWalkData* data)
+{
+    if (Compiler::IsSharedStaticHelper(*pTree))
+    {
+        int* pCount = (int*)data->pCallbackData;
+        (*pCount)++;
+    }
+
+    return WALK_CONTINUE;
+}
+
+//  TODO-Cleanup: Replace calls to IsSharedStaticHelper with new HelperCallProperties
+//
+
+inline bool Compiler::IsSharedStaticHelper(GenTreePtr tree)
+{
+    if (tree->gtOper != GT_CALL || tree->gtCall.gtCallType != CT_HELPER)
+    {
+        return false;
+    }
+
+    CorInfoHelpFunc helper = eeGetHelperNum(tree->gtCall.gtCallMethHnd);
+
+    bool result1 =
+        // More helpers being added to IsSharedStaticHelper (that have similar behaviors but are not true
+        // ShareStaticHelperts)
+        helper == CORINFO_HELP_STRCNS || helper == CORINFO_HELP_BOX ||
+
+        // helpers being added to IsSharedStaticHelper
+        helper == CORINFO_HELP_GETSTATICFIELDADDR_CONTEXT || helper == CORINFO_HELP_GETSTATICFIELDADDR_TLS ||
+        helper == CORINFO_HELP_GETGENERICS_GCSTATIC_BASE || helper == CORINFO_HELP_GETGENERICS_NONGCSTATIC_BASE ||
+        helper == CORINFO_HELP_GETGENERICS_GCTHREADSTATIC_BASE ||
+        helper == CORINFO_HELP_GETGENERICS_NONGCTHREADSTATIC_BASE ||
+
+        helper == CORINFO_HELP_GETSHARED_GCSTATIC_BASE || helper == CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE ||
+        helper == CORINFO_HELP_GETSHARED_GCSTATIC_BASE_NOCTOR ||
+        helper == CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_NOCTOR ||
+        helper == CORINFO_HELP_GETSHARED_GCSTATIC_BASE_DYNAMICCLASS ||
+        helper == CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_DYNAMICCLASS ||
+        helper == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE ||
+        helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE ||
+        helper == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR ||
+        helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR ||
+        helper == CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_DYNAMICCLASS ||
+        helper == CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_DYNAMICCLASS ||
+#ifdef FEATURE_READYTORUN_COMPILER
+        helper == CORINFO_HELP_READYTORUN_STATIC_BASE ||
+#endif
+        helper == CORINFO_HELP_CLASSINIT_SHARED_DYNAMICCLASS;
+#if 0
+    // See above TODO-Cleanup
+    bool result2 = s_helperCallProperties.IsPure(helper) && s_helperCallProperties.NonNullReturn(helper);
+    assert (result1 == result2);
+#endif
+    return result1;
+}
+
+inline bool Compiler::IsTreeAlwaysHoistable(GenTreePtr tree)
+{
+    if (IsSharedStaticHelper(tree))
+    {
+        return (GTF_CALL_HOISTABLE & tree->gtFlags) ? true : false;
+    }
+    else
+    {
+        return false;
+    }
+}
+
+//
+// Note that we want to have two special FIELD_HANDLES that will both
+// be considered non-Data Offset handles
+//
+// The special values that we use are FLD_GLOBAL_DS and FLD_GLOBAL_FS
+//
+
+inline bool jitStaticFldIsGlobAddr(CORINFO_FIELD_HANDLE fldHnd)
+{
+    return (fldHnd == FLD_GLOBAL_DS || fldHnd == FLD_GLOBAL_FS);
+}
+
+#if defined(DEBUG) || defined(FEATURE_JIT_METHOD_PERF) || defined(FEATURE_SIMD)
+
+inline bool Compiler::eeIsNativeMethod(CORINFO_METHOD_HANDLE method)
+{
+    return ((((size_t)method) & 0x2) == 0x2);
+}
+
+inline CORINFO_METHOD_HANDLE Compiler::eeGetMethodHandleForNative(CORINFO_METHOD_HANDLE method)
+{
+    assert((((size_t)method) & 0x3) == 0x2);
+    return (CORINFO_METHOD_HANDLE)(((size_t)method) & ~0x3);
+}
+#endif
+
+inline CORINFO_METHOD_HANDLE Compiler::eeMarkNativeTarget(CORINFO_METHOD_HANDLE method)
+{
+    assert((((size_t)method) & 0x3) == 0);
+    if (method == nullptr)
+    {
+        return method;
+    }
+    else
+    {
+        return (CORINFO_METHOD_HANDLE)(((size_t)method) | 0x2);
+    }
+}
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                          Compiler                                         XX
+XX                      Inline functions                                     XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#ifndef DEBUG
+inline bool Compiler::compStressCompile(compStressArea stressArea, unsigned weightPercentage)
+{
+    return false;
+}
+#endif
+
+inline ArenaAllocator* Compiler::compGetAllocator()
+{
+    return compAllocator;
+}
+
+/*****************************************************************************
+ *
+ *  Allocate memory from the no-release allocator. All such memory will be
+ *  freed up simulataneously at the end of the procedure
+ */
+
+#ifndef DEBUG
+
+inline void* Compiler::compGetMem(size_t sz, CompMemKind cmk)
+{
+    assert(sz);
+
+#if MEASURE_MEM_ALLOC
+    genMemStats.AddAlloc(sz, cmk);
+#endif
+
+    return compAllocator->allocateMemory(sz);
+}
+
+#endif
+
+/*****************************************************************************
+ *
+ * A common memory allocation for arrays of structures involves the
+ * multiplication of the number of elements with the size of each element.
+ * If this computation overflows, then the memory allocation might succeed,
+ * but not allocate sufficient memory for all the elements.  This can cause
+ * us to overwrite the allocation, and AV or worse, corrupt memory.
+ *
+ * This method checks for overflow, and succeeds only when it detects
+ * that there's no overflow.  It should be cheap, because when inlined with
+ * a constant elemSize, the division should be done in compile time, and so
+ * at run time we simply have a check of numElem against some number (this
+ * is why we __forceinline).
+ */
+
+#define MAX_MEMORY_PER_ALLOCATION (512 * 1024 * 1024)
+
+__forceinline void* Compiler::compGetMemArray(size_t numElem, size_t elemSize, CompMemKind cmk)
+{
+    if (numElem > (MAX_MEMORY_PER_ALLOCATION / elemSize))
+    {
+        NOMEM();
+    }
+
+    return compGetMem(numElem * elemSize, cmk);
+}
+
+__forceinline void* Compiler::compGetMemArrayA(size_t numElem, size_t elemSize, CompMemKind cmk)
+{
+    if (numElem > (MAX_MEMORY_PER_ALLOCATION / elemSize))
+    {
+        NOMEM();
+    }
+
+    return compGetMemA(numElem * elemSize, cmk);
+}
+
+/******************************************************************************
+ *
+ *  Roundup the allocated size so that if this memory block is aligned,
+ *  then the next block allocated too will be aligned.
+ *  The JIT will always try to keep all the blocks aligned.
+ */
+
+inline void* Compiler::compGetMemA(size_t sz, CompMemKind cmk)
+{
+    assert(sz);
+
+    size_t allocSz = roundUp(sz, sizeof(size_t));
+
+#if MEASURE_MEM_ALLOC
+    genMemStats.AddAlloc(allocSz, cmk);
+#endif
+
+    void* ptr = compAllocator->allocateMemory(allocSz);
+
+    // Verify that the current block is aligned. Only then will the next
+    // block allocated be on an aligned boundary.
+    assert((size_t(ptr) & (sizeof(size_t) - 1)) == 0);
+
+    return ptr;
+}
+
+inline void Compiler::compFreeMem(void* ptr)
+{
+}
+
+#define compFreeMem(ptr) compFreeMem((void*)ptr)
+
+inline bool Compiler::compIsProfilerHookNeeded()
+{
+#ifdef PROFILING_SUPPORTED
+    return compProfilerHookNeeded
+
+#if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
+           // IL stubs are excluded by VM and we need to do the same even running
+           // under a complus env hook to generate profiler hooks
+           || (opts.compJitELTHookEnabled && !(opts.eeFlags & CORJIT_FLG_IL_STUB))
+#endif
+        ;
+#else // PROFILING_SUPPORTED
+    return false;
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Check for the special case where the object is the constant 0.
+ *  As we can't even fold the tree (null+fldOffs), we are left with
+ *  op1 and op2 both being a constant. This causes lots of problems.
+ *  We simply grab a temp and assign 0 to it and use it in place of the NULL.
+ */
+
+inline GenTreePtr Compiler::impCheckForNullPointer(GenTreePtr obj)
+{
+    /* If it is not a GC type, we will be able to fold it.
+       So don't need to do anything */
+
+    if (!varTypeIsGC(obj->TypeGet()))
+    {
+        return obj;
+    }
+
+    if (obj->gtOper == GT_CNS_INT)
+    {
+        assert(obj->gtType == TYP_REF || obj->gtType == TYP_BYREF);
+        assert(obj->gtIntCon.gtIconVal == 0);
+
+        unsigned tmp = lvaGrabTemp(true DEBUGARG("CheckForNullPointer"));
+
+        // We don't need to spill while appending as we are only assigning
+        // NULL to a freshly-grabbed temp.
+
+        impAssignTempGen(tmp, obj, (unsigned)CHECK_SPILL_NONE);
+
+        obj = gtNewLclvNode(tmp, obj->gtType);
+    }
+
+    return obj;
+}
+
+/*****************************************************************************
+ *
+ *  Check for the special case where the object is the methods original 'this' pointer.
+ *  Note that, the original 'this' pointer is always local var 0 for non-static method,
+ *  even if we might have created the copy of 'this' pointer in lvaArg0Var.
+ */
+
+inline bool Compiler::impIsThis(GenTreePtr obj)
+{
+    if (compIsForInlining())
+    {
+        return impInlineInfo->InlinerCompiler->impIsThis(obj);
+    }
+    else
+    {
+        return ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR) && lvaIsOriginalThisArg(obj->gtLclVarCommon.gtLclNum));
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Check to see if the delegate is created using "LDFTN <TOK>" or not.
+ */
+
+inline bool Compiler::impIsLDFTN_TOKEN(const BYTE* delegateCreateStart, const BYTE* newobjCodeAddr)
+{
+    assert(newobjCodeAddr[0] == CEE_NEWOBJ);
+    return (newobjCodeAddr - delegateCreateStart == 6 && // LDFTN <TOK> takes 6 bytes
+            delegateCreateStart[0] == CEE_PREFIX1 && delegateCreateStart[1] == (CEE_LDFTN & 0xFF));
+}
+
+/*****************************************************************************
+ *
+ *  Check to see if the delegate is created using "DUP LDVIRTFTN <TOK>" or not.
+ */
+
+inline bool Compiler::impIsDUP_LDVIRTFTN_TOKEN(const BYTE* delegateCreateStart, const BYTE* newobjCodeAddr)
+{
+    assert(newobjCodeAddr[0] == CEE_NEWOBJ);
+    return (newobjCodeAddr - delegateCreateStart == 7 && // DUP LDVIRTFTN <TOK> takes 6 bytes
+            delegateCreateStart[0] == CEE_DUP && delegateCreateStart[1] == CEE_PREFIX1 &&
+            delegateCreateStart[2] == (CEE_LDVIRTFTN & 0xFF));
+}
+/*****************************************************************************
+ *
+ * Returns true if the compiler instance is created for import only (verification).
+ */
+
+inline bool Compiler::compIsForImportOnly()
+{
+    return ((opts.eeFlags & CORJIT_FLG_IMPORT_ONLY) != 0);
+}
+
+/*****************************************************************************
+ *
+ *  Returns true if the compiler instance is created for inlining.
+ */
+
+inline bool Compiler::compIsForInlining()
+{
+    return (impInlineInfo != nullptr);
+}
+
+/*****************************************************************************
+ *
+ *  Check the inline result field in the compiler to see if inlining failed or not.
+ */
+
+inline bool Compiler::compDonotInline()
+{
+    if (compIsForInlining())
+    {
+        assert(compInlineResult != nullptr);
+        return compInlineResult->IsFailure();
+    }
+    else
+    {
+        return false;
+    }
+}
+
+inline bool Compiler::impIsPrimitive(CorInfoType jitType)
+{
+    return ((CORINFO_TYPE_BOOL <= jitType && jitType <= CORINFO_TYPE_DOUBLE) || jitType == CORINFO_TYPE_PTR);
+}
+
+/*****************************************************************************
+ *
+ *  Get the promotion type of a struct local.
+ */
+
+inline Compiler::lvaPromotionType Compiler::lvaGetPromotionType(const LclVarDsc* varDsc)
+{
+    assert(!varDsc->lvPromoted || varTypeIsPromotable(varDsc) || varDsc->lvUnusedStruct);
+
+    if (!varDsc->lvPromoted)
+    {
+        // no struct promotion for this LclVar
+        return PROMOTION_TYPE_NONE;
+    }
+    if (varDsc->lvDoNotEnregister)
+    {
+        // The struct is not enregistered
+        return PROMOTION_TYPE_DEPENDENT;
+    }
+    if (!varDsc->lvIsParam)
+    {
+        // The struct is a register candidate
+        return PROMOTION_TYPE_INDEPENDENT;
+    }
+
+    // Has struct promotion for arguments been disabled using COMPlus_JitNoStructPromotion=2
+    if (fgNoStructParamPromotion)
+    {
+        // The struct parameter is not enregistered
+        return PROMOTION_TYPE_DEPENDENT;
+    }
+
+    // We have a parameter that could be enregistered
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+
+    // The struct parameter is a register candidate
+    return PROMOTION_TYPE_INDEPENDENT;
+#else
+    // The struct parameter is not enregistered
+    return PROMOTION_TYPE_DEPENDENT;
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Get the promotion type of a struct local.
+ */
+
+inline Compiler::lvaPromotionType Compiler::lvaGetPromotionType(unsigned varNum)
+{
+    assert(varNum < lvaCount);
+    return lvaGetPromotionType(&lvaTable[varNum]);
+}
+
+/*****************************************************************************
+ *
+ *  Given a field local, get the promotion type of its parent struct local.
+ */
+
+inline Compiler::lvaPromotionType Compiler::lvaGetParentPromotionType(const LclVarDsc* varDsc)
+{
+    assert(varDsc->lvIsStructField);
+    assert(varDsc->lvParentLcl < lvaCount);
+
+    lvaPromotionType promotionType = lvaGetPromotionType(varDsc->lvParentLcl);
+    assert(promotionType != PROMOTION_TYPE_NONE);
+    return promotionType;
+}
+
+/*****************************************************************************
+ *
+ *  Given a field local, get the promotion type of its parent struct local.
+ */
+
+inline Compiler::lvaPromotionType Compiler::lvaGetParentPromotionType(unsigned varNum)
+{
+    assert(varNum < lvaCount);
+    return lvaGetParentPromotionType(&lvaTable[varNum]);
+}
+
+/*****************************************************************************
+ *
+ *  Return true if the local is a field local of a promoted struct of type PROMOTION_TYPE_DEPENDENT.
+ *  Return false otherwise.
+ */
+
+inline bool Compiler::lvaIsFieldOfDependentlyPromotedStruct(const LclVarDsc* varDsc)
+{
+    if (!varDsc->lvIsStructField)
+    {
+        return false;
+    }
+
+    lvaPromotionType promotionType = lvaGetParentPromotionType(varDsc);
+    if (promotionType == PROMOTION_TYPE_DEPENDENT)
+    {
+        return true;
+    }
+
+    assert(promotionType == PROMOTION_TYPE_INDEPENDENT);
+    return false;
+}
+
+//------------------------------------------------------------------------
+// lvaIsGCTracked: Determine whether this var should be reported
+//    as tracked for GC purposes.
+//
+// Arguments:
+//    varDsc - the LclVarDsc for the var in question.
+//
+// Return Value:
+//    Returns true if the variable should be reported as tracked in the GC info.
+//
+// Notes:
+//    This never returns true for struct variables, even if they are tracked.
+//    This is because struct variables are never tracked as a whole for GC purposes.
+//    It is up to the caller to ensure that the fields of struct variables are
+//    correctly tracked.
+//    On Amd64, we never GC-track fields of dependently promoted structs, even
+//    though they may be tracked for optimization purposes.
+//    It seems that on x86 and arm, we simply don't track these
+//    fields, though I have not verified that.  I attempted to make these GC-tracked,
+//    but there was too much logic that depends on these being untracked, so changing
+//    this would require non-trivial effort.
+
+inline bool Compiler::lvaIsGCTracked(const LclVarDsc* varDsc)
+{
+    if (varDsc->lvTracked && (varDsc->lvType == TYP_REF || varDsc->lvType == TYP_BYREF))
+    {
+#ifdef _TARGET_AMD64_
+        return !lvaIsFieldOfDependentlyPromotedStruct(varDsc);
+#else  // !_TARGET_AMD64_
+        return true;
+#endif // !_TARGET_AMD64_
+    }
+    else
+    {
+        return false;
+    }
+}
+
+inline void Compiler::EndPhase(Phases phase)
+{
+#if defined(FEATURE_JIT_METHOD_PERF)
+    if (pCompJitTimer != NULL)
+        pCompJitTimer->EndPhase(phase);
+#endif
+#if DUMP_FLOWGRAPHS
+    fgDumpFlowGraph(phase);
+#endif // DUMP_FLOWGRAPHS
+    previousCompletedPhase = phase;
+#ifdef DEBUG
+    if (dumpIR)
+    {
+        if ((*dumpIRPhase == L'*') || (wcscmp(dumpIRPhase, PhaseShortNames[phase]) == 0))
+        {
+            printf("\n");
+            printf("IR after %s (switch: %ls)\n", PhaseEnums[phase], PhaseShortNames[phase]);
+            printf("\n");
+
+            if (dumpIRLinear)
+            {
+                dFuncIR();
+            }
+            else if (dumpIRTrees)
+            {
+                dTrees();
+            }
+
+            // If we are just dumping a single method and we have a request to exit
+            // after dumping, do so now.
+
+            if (dumpIRExit && ((*dumpIRPhase != L'*') || (phase == PHASE_EMIT_GCEH)))
+            {
+                exit(0);
+            }
+        }
+    }
+#endif
+}
+
+/*****************************************************************************/
+bool Compiler::fgExcludeFromSsa(unsigned lclNum)
+{
+    if (opts.MinOpts())
+    {
+        return true; // If we're doing MinOpts, no SSA vars.
+    }
+
+    LclVarDsc* varDsc = &lvaTable[lclNum];
+
+    if (varDsc->lvAddrExposed)
+    {
+        return true; // We exclude address-exposed variables.
+    }
+    if (!varDsc->lvTracked)
+    {
+        return true; // SSA is only done for tracked variables
+    }
+    // lvPromoted structs are never tracked...
+    assert(!varDsc->lvPromoted);
+
+    if (varDsc->lvOverlappingFields)
+    {
+        return true; // Don't use SSA on structs that have overlapping fields
+    }
+
+    if (varDsc->lvIsStructField && (lvaGetParentPromotionType(lclNum) != PROMOTION_TYPE_INDEPENDENT))
+    {
+        // SSA must exclude struct fields that are not independent
+        // - because we don't model the struct assignment properly when multiple fields can be assigned by one struct
+        //   assignment.
+        // - SSA doesn't allow a single node to contain multiple SSA definitions.
+        // - and PROMOTION_TYPE_DEPENDEDNT fields  are never candidates for a register.
+        //
+        // Example mscorlib method: CompatibilitySwitches:IsCompatibilitySwitchSet
+        //
+        return true;
+    }
+    // otherwise this variable is *not* excluded for SSA
+    return false;
+}
+
+/*****************************************************************************/
+ValueNum Compiler::GetUseAsgDefVNOrTreeVN(GenTreePtr op)
+{
+    if (op->gtFlags & GTF_VAR_USEASG)
+    {
+        unsigned lclNum = op->AsLclVarCommon()->GetLclNum();
+        unsigned ssaNum = GetSsaNumForLocalVarDef(op);
+        return lvaTable[lclNum].GetPerSsaData(ssaNum)->m_vnPair.GetConservative();
+    }
+    else
+    {
+        return op->gtVNPair.GetConservative();
+    }
+}
+
+/*****************************************************************************/
+unsigned Compiler::GetSsaNumForLocalVarDef(GenTreePtr lcl)
+{
+    // Address-taken variables don't have SSA numbers.
+    if (fgExcludeFromSsa(lcl->AsLclVarCommon()->gtLclNum))
+    {
+        return SsaConfig::RESERVED_SSA_NUM;
+    }
+
+    assert(lcl->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEDEF));
+    if (lcl->gtFlags & GTF_VAR_USEASG)
+    {
+        assert((lcl->gtFlags & GTF_VAR_USEDEF) == 0);
+        // It's an "lcl op= rhs" assignment.  "lcl" is both used and defined here;
+        // we've chosen in this case to annotate "lcl" with the SSA number (and VN) of the use,
+        // and to store the SSA number of the def in a side table.
+        unsigned ssaNum;
+        // In case of a remorph (fgMorph) in CSE/AssertionProp after SSA phase, there
+        // wouldn't be an entry for the USEASG portion of the indir addr, return
+        // reserved.
+        if (!GetOpAsgnVarDefSsaNums()->Lookup(lcl, &ssaNum))
+        {
+            return SsaConfig::RESERVED_SSA_NUM;
+        }
+        return ssaNum;
+    }
+    else
+    {
+        return lcl->AsLclVarCommon()->gtSsaNum;
+    }
+}
+
+/*****************************************************************************
+ *  operator new
+ *
+ *  Note that compGetMem is an arena allocator that returns memory that is
+ *  not zero-initialized and can contain data from a prior allocation lifetime.
+ *  it also requires that 'sz' be aligned to a multiple of sizeof(int)
+ */
+
+inline void* __cdecl operator new(size_t sz, Compiler* context, CompMemKind cmk)
+{
+    sz = AlignUp(sz, sizeof(int));
+    assert(sz != 0 && (sz & (sizeof(int) - 1)) == 0);
+    return context->compGetMem(sz, cmk);
+}
+
+inline void* __cdecl operator new[](size_t sz, Compiler* context, CompMemKind cmk)
+{
+    sz = AlignUp(sz, sizeof(int));
+    assert(sz != 0 && (sz & (sizeof(int) - 1)) == 0);
+    return context->compGetMem(sz, cmk);
+}
+
+inline void* __cdecl operator new(size_t sz, void* p, const jitstd::placement_t& /* syntax_difference */)
+{
+    return p;
+}
+
+inline void* __cdecl operator new(size_t sz, IAllocator* alloc)
+{
+    return alloc->Alloc(sz);
+}
+
+inline void* __cdecl operator new[](size_t sz, IAllocator* alloc)
+{
+    return alloc->Alloc(sz);
+}
+
+/*****************************************************************************/
+
+#ifdef DEBUG
+
+inline void printRegMask(regMaskTP mask)
+{
+    printf(REG_MASK_ALL_FMT, mask);
+}
+
+inline char* regMaskToString(regMaskTP mask, Compiler* context)
+{
+    const size_t cchRegMask = 24;
+    char*        regmask    = new (context, CMK_Unknown) char[cchRegMask];
+
+    sprintf_s(regmask, cchRegMask, REG_MASK_ALL_FMT, mask);
+
+    return regmask;
+}
+
+inline void printRegMaskInt(regMaskTP mask)
+{
+    printf(REG_MASK_INT_FMT, (mask & RBM_ALLINT));
+}
+
+inline char* regMaskIntToString(regMaskTP mask, Compiler* context)
+{
+    const size_t cchRegMask = 24;
+    char*        regmask    = new (context, CMK_Unknown) char[cchRegMask];
+
+    sprintf_s(regmask, cchRegMask, REG_MASK_INT_FMT, (mask & RBM_ALLINT));
+
+    return regmask;
+}
+
+#endif // DEBUG
+
+inline void BasicBlock::InitVarSets(Compiler* comp)
+{
+    VarSetOps::AssignNoCopy(comp, bbVarUse, VarSetOps::MakeEmpty(comp));
+    VarSetOps::AssignNoCopy(comp, bbVarDef, VarSetOps::MakeEmpty(comp));
+    VarSetOps::AssignNoCopy(comp, bbVarTmp, VarSetOps::MakeEmpty(comp));
+    VarSetOps::AssignNoCopy(comp, bbLiveIn, VarSetOps::MakeEmpty(comp));
+    VarSetOps::AssignNoCopy(comp, bbLiveOut, VarSetOps::MakeEmpty(comp));
+    VarSetOps::AssignNoCopy(comp, bbScope, VarSetOps::MakeEmpty(comp));
+
+    bbHeapUse     = false;
+    bbHeapDef     = false;
+    bbHeapLiveIn  = false;
+    bbHeapLiveOut = false;
+}
+
+// Returns true if the basic block ends with GT_JMP
+inline bool BasicBlock::endsWithJmpMethod(Compiler* comp)
+{
+    if (comp->compJmpOpUsed && (bbJumpKind == BBJ_RETURN) && (bbFlags & BBF_HAS_JMP))
+    {
+        GenTree* lastNode = this->lastNode();
+        assert(lastNode != nullptr);
+        return lastNode->OperGet() == GT_JMP;
+    }
+
+    return false;
+}
+
+// Returns true if the basic block ends with either
+//  i) GT_JMP or
+// ii) tail call (implicit or explicit)
+//
+// Params:
+//    comp              - Compiler instance
+//    fastTailCallsOnly - Only consider fast tail calls excluding tail calls via helper.
+inline bool BasicBlock::endsWithTailCallOrJmp(Compiler* comp, bool fastTailCallsOnly /*=false*/)
+{
+    GenTreePtr tailCall                       = nullptr;
+    bool       tailCallsConvertibleToLoopOnly = false;
+    return endsWithJmpMethod(comp) ||
+           endsWithTailCall(comp, fastTailCallsOnly, tailCallsConvertibleToLoopOnly, &tailCall);
+}
+
+//------------------------------------------------------------------------------
+// endsWithTailCall : Check if the block ends with a tail call.
+//
+// Arguments:
+//    comp                            - compiler instance
+//    fastTailCallsOnly               - check for fast tail calls only
+//    tailCallsConvertibleToLoopOnly  - check for tail calls convertible to loop only
+//    tailCall                        - a pointer to a tree that will be set to the call tree if the block
+//                                      ends with a tail call and will be set to nullptr otherwise.
+//
+// Return Value:
+//    true if the block ends with a tail call; false otherwise.
+//
+// Notes:
+//    At most one of fastTailCallsOnly and tailCallsConvertibleToLoopOnly flags can be true.
+
+inline bool BasicBlock::endsWithTailCall(Compiler* comp,
+                                         bool      fastTailCallsOnly,
+                                         bool      tailCallsConvertibleToLoopOnly,
+                                         GenTree** tailCall)
+{
+    assert(!fastTailCallsOnly || !tailCallsConvertibleToLoopOnly);
+    *tailCall   = nullptr;
+    bool result = false;
+
+    // Is this a tail call?
+    // The reason for keeping this under RyuJIT is so as not to impact existing Jit32 x86 and arm
+    // targets.
+    if (comp->compTailCallUsed)
+    {
+        if (fastTailCallsOnly || tailCallsConvertibleToLoopOnly)
+        {
+            // Only fast tail calls or only tail calls convertible to loops
+            result = (bbFlags & BBF_HAS_JMP) && (bbJumpKind == BBJ_RETURN);
+        }
+        else
+        {
+            // Fast tail calls, tail calls convertible to loops, and tails calls dispatched via helper
+            result = (bbJumpKind == BBJ_THROW) || ((bbFlags & BBF_HAS_JMP) && (bbJumpKind == BBJ_RETURN));
+        }
+
+        if (result)
+        {
+            GenTree* lastNode = this->lastNode();
+            if (lastNode->OperGet() == GT_CALL)
+            {
+                GenTreeCall* call = lastNode->AsCall();
+                if (tailCallsConvertibleToLoopOnly)
+                {
+                    result = call->IsTailCallConvertibleToLoop();
+                }
+                else if (fastTailCallsOnly)
+                {
+                    result = call->IsFastTailCall();
+                }
+                else
+                {
+                    result = call->IsTailCall();
+                }
+
+                if (result)
+                {
+                    *tailCall = call;
+                }
+            }
+            else
+            {
+                result = false;
+            }
+        }
+    }
+
+    return result;
+}
+
+//------------------------------------------------------------------------------
+// endsWithTailCallConvertibleToLoop : Check if the block ends with a tail call convertible to loop.
+//
+// Arguments:
+//    comp  -  compiler instance
+//    tailCall  -  a pointer to a tree that will be set to the call tree if the block
+//                 ends with a tail call convertible to loop and will be set to nullptr otherwise.
+//
+// Return Value:
+//    true if the block ends with a tail call convertible to loop.
+
+inline bool BasicBlock::endsWithTailCallConvertibleToLoop(Compiler* comp, GenTree** tailCall)
+{
+    bool fastTailCallsOnly              = false;
+    bool tailCallsConvertibleToLoopOnly = true;
+    return endsWithTailCall(comp, fastTailCallsOnly, tailCallsConvertibleToLoopOnly, tailCall);
+}
+
+inline static bool StructHasOverlappingFields(DWORD attribs)
+{
+    return ((attribs & CORINFO_FLG_OVERLAPPING_FIELDS) != 0);
+}
+
+inline static bool StructHasCustomLayout(DWORD attribs)
+{
+    return ((attribs & CORINFO_FLG_CUSTOMLAYOUT) != 0);
+}
+
+/*****************************************************************************
+ * This node should not be referenced by anyone now. Set its values to garbage
+ * to catch extra references
+ */
+
+inline void DEBUG_DESTROY_NODE(GenTreePtr tree)
+{
+#ifdef DEBUG
+    // printf("DEBUG_DESTROY_NODE for [0x%08x]\n", tree);
+
+    // Save gtOper in case we want to find out what this node was
+    tree->gtOperSave = tree->gtOper;
+
+    tree->gtType = TYP_UNDEF;
+    tree->gtFlags |= 0xFFFFFFFF & ~GTF_NODE_MASK;
+    if (tree->OperIsSimple())
+    {
+        tree->gtOp.gtOp1 = tree->gtOp.gtOp2 = nullptr;
+    }
+    // Must do this last, because the "gtOp" check above will fail otherwise.
+    // Don't call SetOper, because GT_COUNT is not a valid value
+    tree->gtOper = GT_COUNT;
+#endif
+}
+
+/*****************************************************************************/
+#endif //_COMPILER_HPP_
+/*****************************************************************************/
diff --git a/src/jit/compilerbitsettraits.h b/src/jit/compilerbitsettraits.h
new file mode 100644
index 0000000000..4365c518d7
--- /dev/null
+++ b/src/jit/compilerbitsettraits.h
@@ -0,0 +1,130 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef CompilerBitSetTraits_DEFINED
+#define CompilerBitSetTraits_DEFINED 1
+
+#include "bitset.h"
+#include "compiler.h"
+#include "iallocator.h"
+#include "bitsetasshortlong.h"
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// CompAllocBitSetTraits: a base class for other BitSet traits classes.
+//
+// The classes in this file define "BitSetTraits" arguments to the "BitSetOps" type, ones that assume that
+// Compiler* is the "Env" type.
+//
+// This class just captures the compiler's allocator as an IAllocator.
+//
+class CompAllocBitSetTraits
+{
+public:
+    static inline IAllocator* GetAllocator(class Compiler* comp);
+
+#ifdef DEBUG
+    static inline IAllocator* GetDebugOnlyAllocator(class Compiler* comp);
+#endif // DEBUG
+};
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// TrackedVarBitSetTraits
+//
+// This class is customizes the bit set to represent sets of tracked local vars.
+// The size of the bitset is determined by the # of tracked locals (up to some internal
+// maximum), and the Compiler* tracks the tracked local epochs.
+//
+class TrackedVarBitSetTraits : public CompAllocBitSetTraits
+{
+public:
+    static inline unsigned GetSize(Compiler* comp);
+
+    static inline unsigned GetArrSize(Compiler* comp, unsigned elemSize);
+
+    static inline unsigned GetEpoch(class Compiler* comp);
+
+    static inline BitSetSupport::BitSetOpCounter* GetOpCounter(Compiler* comp);
+};
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// AllVarBitSetTraits
+//
+// This class is customizes the bit set to represent sets of all local vars (tracked or not) --
+// at least up to some maximum index.  (This index is private to the Compiler, and it is
+// the responsibility of the compiler not to use indices >= this maximum.)
+// We rely on the fact that variables are never deleted, and therefore use the
+// total # of locals as the epoch number (up to the maximum).
+//
+class AllVarBitSetTraits : public CompAllocBitSetTraits
+{
+public:
+    static inline unsigned GetSize(Compiler* comp);
+
+    static inline unsigned GetArrSize(Compiler* comp, unsigned elemSize);
+
+    static inline unsigned GetEpoch(class Compiler* comp);
+
+    static inline BitSetSupport::BitSetOpCounter* GetOpCounter(Compiler* comp);
+};
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// BasicBlockBitSetTraits
+//
+// This class is customizes the bit set to represent sets of BasicBlocks.
+// The size of the bitset is determined by maximum assigned BasicBlock number
+// (Compiler::fgBBNumMax) (Note that fgBBcount is not equal to this during inlining,
+// when fgBBcount is the number of blocks in the inlined function, but the assigned
+// block numbers are higher than the inliner function. fgBBNumMax counts both.
+// Thus, if you only care about the inlinee, during inlining, this bit set will waste
+// the lower numbered block bits.) The Compiler* tracks the BasicBlock epochs.
+//
+class BasicBlockBitSetTraits : public CompAllocBitSetTraits
+{
+public:
+    static inline unsigned GetSize(Compiler* comp);
+
+    static inline unsigned GetArrSize(Compiler* comp, unsigned elemSize);
+
+    static inline unsigned GetEpoch(class Compiler* comp);
+
+    static inline BitSetSupport::BitSetOpCounter* GetOpCounter(Compiler* comp);
+};
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// BitVecTraits
+//
+// This class simplifies creation and usage of "ShortLong" bitsets.
+//
+struct BitVecTraits
+{
+private:
+    unsigned  size;
+    Compiler* comp;
+
+public:
+    BitVecTraits(unsigned size, Compiler* comp) : size(size), comp(comp)
+    {
+    }
+
+    static inline IAllocator* GetAllocator(BitVecTraits* b);
+
+#ifdef DEBUG
+    static inline IAllocator* GetDebugOnlyAllocator(BitVecTraits* b);
+#endif // DEBUG
+
+    static inline unsigned GetSize(BitVecTraits* b);
+
+    static inline unsigned GetArrSize(BitVecTraits* b, unsigned elemSize);
+
+    static inline unsigned GetEpoch(BitVecTraits* b);
+
+    static inline BitSetSupport::BitSetOpCounter* GetOpCounter(BitVecTraits* b);
+};
+
+#endif // CompilerBitSetTraits_DEFINED
diff --git a/src/jit/compilerbitsettraits.hpp b/src/jit/compilerbitsettraits.hpp
new file mode 100644
index 0000000000..e2ba2f8a7a
--- /dev/null
+++ b/src/jit/compilerbitsettraits.hpp
@@ -0,0 +1,181 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef CompilerBitSetTraits_HPP_DEFINED
+#define CompilerBitSetTraits_HPP_DEFINED 1
+
+#include "compilerbitsettraits.h"
+#include "compiler.h"
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// CompAllocBitSetTraits
+//
+///////////////////////////////////////////////////////////////////////////////
+
+// static
+IAllocator* CompAllocBitSetTraits::GetAllocator(Compiler* comp)
+{
+    return comp->getAllocatorBitset();
+}
+
+#ifdef DEBUG
+// static
+IAllocator* CompAllocBitSetTraits::GetDebugOnlyAllocator(Compiler* comp)
+{
+    return comp->getAllocatorDebugOnly();
+}
+#endif // DEBUG
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// TrackedVarBitSetTraits
+//
+///////////////////////////////////////////////////////////////////////////////
+
+// static
+unsigned TrackedVarBitSetTraits::GetSize(Compiler* comp)
+{
+    return comp->lvaTrackedCount;
+}
+
+// static
+unsigned TrackedVarBitSetTraits::GetArrSize(Compiler* comp, unsigned elemSize)
+{
+    assert(elemSize == sizeof(size_t));
+    return comp->lvaTrackedCountInSizeTUnits;
+}
+
+// static
+unsigned TrackedVarBitSetTraits::GetEpoch(Compiler* comp)
+{
+    return comp->GetCurLVEpoch();
+}
+
+// static
+BitSetSupport::BitSetOpCounter* TrackedVarBitSetTraits::GetOpCounter(Compiler* comp)
+{
+#if VARSET_COUNTOPS
+    return &Compiler::m_varsetOpCounter;
+#else
+    return nullptr;
+#endif
+}
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// AllVarBitSetTraits
+//
+///////////////////////////////////////////////////////////////////////////////
+
+// static
+unsigned AllVarBitSetTraits::GetSize(Compiler* comp)
+{
+    return min(comp->lvaCount, lclMAX_ALLSET_TRACKED);
+}
+
+// static
+unsigned AllVarBitSetTraits::GetArrSize(Compiler* comp, unsigned elemSize)
+{
+    return unsigned(roundUp(GetSize(comp), elemSize));
+}
+
+// static
+unsigned AllVarBitSetTraits::GetEpoch(Compiler* comp)
+{
+    return GetSize(comp);
+}
+
+// static
+BitSetSupport::BitSetOpCounter* AllVarBitSetTraits::GetOpCounter(Compiler* comp)
+{
+#if ALLVARSET_COUNTOPS
+    return &Compiler::m_allvarsetOpCounter;
+#else
+    return nullptr;
+#endif
+}
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// BasicBlockBitSetTraits
+//
+///////////////////////////////////////////////////////////////////////////////
+
+// static
+unsigned BasicBlockBitSetTraits::GetSize(Compiler* comp)
+{
+    return comp->fgCurBBEpochSize;
+}
+
+// static
+unsigned BasicBlockBitSetTraits::GetArrSize(Compiler* comp, unsigned elemSize)
+{
+    // Assert that the epoch has been initialized. This is a convenient place to assert this because
+    // GetArrSize() is called for every function, via IsShort().
+    assert(GetEpoch(comp) != 0);
+
+    assert(elemSize == sizeof(size_t));
+    return comp->fgBBSetCountInSizeTUnits; // This is precomputed to avoid doing math every time this function is called
+}
+
+// static
+unsigned BasicBlockBitSetTraits::GetEpoch(Compiler* comp)
+{
+    return comp->GetCurBasicBlockEpoch();
+}
+
+// static
+BitSetSupport::BitSetOpCounter* BasicBlockBitSetTraits::GetOpCounter(Compiler* comp)
+{
+    return nullptr;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// BitVecTraits
+//
+///////////////////////////////////////////////////////////////////////////////
+
+// static
+IAllocator* BitVecTraits::GetAllocator(BitVecTraits* b)
+{
+    return b->comp->getAllocatorBitset();
+}
+
+#ifdef DEBUG
+// static
+IAllocator* BitVecTraits::GetDebugOnlyAllocator(BitVecTraits* b)
+{
+    return b->comp->getAllocatorDebugOnly();
+}
+#endif // DEBUG
+
+// static
+unsigned BitVecTraits::GetSize(BitVecTraits* b)
+{
+    return b->size;
+}
+
+// static
+unsigned BitVecTraits::GetArrSize(BitVecTraits* b, unsigned elemSize)
+{
+    assert(elemSize == sizeof(size_t));
+    unsigned elemBits = 8 * elemSize;
+    return (unsigned)roundUp(b->size, elemBits) / elemBits;
+}
+
+// static
+unsigned BitVecTraits::GetEpoch(BitVecTraits* b)
+{
+    return b->size;
+}
+
+// static
+BitSetSupport::BitSetOpCounter* BitVecTraits::GetOpCounter(BitVecTraits* b)
+{
+    return nullptr;
+}
+
+#endif // CompilerBitSetTraits_HPP_DEFINED
diff --git a/src/jit/compmemkind.h b/src/jit/compmemkind.h
new file mode 100644
index 0000000000..e27d2071f7
--- /dev/null
+++ b/src/jit/compmemkind.h
@@ -0,0 +1,56 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+#ifndef CompMemKindMacro
+#error Define CompMemKindMacro before including this file.
+#endif
+
+// This list of macro invocations should be used to define the CompMemKind enumeration,
+// and the corresponding array of string names for these enum members.
+
+// clang-format off
+CompMemKindMacro(AssertionProp)
+CompMemKindMacro(ASTNode)
+CompMemKindMacro(InstDesc)
+CompMemKindMacro(ImpStack)
+CompMemKindMacro(BasicBlock)
+CompMemKindMacro(fgArgInfo)
+CompMemKindMacro(fgArgInfoPtrArr)
+CompMemKindMacro(FlowList)
+CompMemKindMacro(TreeStatementList)
+CompMemKindMacro(SiScope)
+CompMemKindMacro(FlatFPStateX87)
+CompMemKindMacro(DominatorMemory)
+CompMemKindMacro(LSRA)
+CompMemKindMacro(LSRA_Interval)
+CompMemKindMacro(LSRA_RefPosition)
+CompMemKindMacro(Reachability)
+CompMemKindMacro(SSA)
+CompMemKindMacro(ValueNumber)
+CompMemKindMacro(LvaTable)
+CompMemKindMacro(UnwindInfo)
+CompMemKindMacro(hashBv)
+CompMemKindMacro(bitset)
+CompMemKindMacro(FixedBitVect)
+CompMemKindMacro(AsIAllocator)
+CompMemKindMacro(IndirAssignMap)
+CompMemKindMacro(FieldSeqStore)
+CompMemKindMacro(ZeroOffsetFieldMap)
+CompMemKindMacro(ArrayInfoMap)
+CompMemKindMacro(HeapPhiArg)
+CompMemKindMacro(CSE)
+CompMemKindMacro(GC)
+CompMemKindMacro(CorSig)
+CompMemKindMacro(Inlining)
+CompMemKindMacro(ArrayStack)
+CompMemKindMacro(DebugInfo)
+CompMemKindMacro(DebugOnly)
+CompMemKindMacro(Codegen)
+CompMemKindMacro(LoopOpt)
+CompMemKindMacro(LoopHoist)
+CompMemKindMacro(Unknown)
+//clang-format on
+
+#undef CompMemKindMacro
diff --git a/src/jit/compphases.h b/src/jit/compphases.h
new file mode 100644
index 0000000000..f193d04647
--- /dev/null
+++ b/src/jit/compphases.h
@@ -0,0 +1,91 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+// File: CompPhases.h
+//
+
+//
+// Names of x86 JIT phases, in order.  Assumes that the caller defines CompPhaseNameMacro
+// in a useful way before including this file, e.g., to define the phase enumeration and the
+// corresponding array of string names of those phases.  This include file undefines CompPhaseNameMacro
+// after the last use.
+// The arguments are:
+//   CompPhaseNameMacro(enumName, stringName, hasChildren, parent)
+//     "enumName" is an Enumeration-style all-caps name.
+//     "stringName" is a self-explanatory.
+//     "hasChildren" is true if this phase is broken out into subphases.
+//         (We should never do EndPhase on a phase that has children, only on 'leaf phases.')
+//     "parent" is -1 for leaf phases, otherwise it is the "enumName" of the parent phase.
+
+// clang-format off
+CompPhaseNameMacro(PHASE_PRE_IMPORT,             "Pre-import",                     "PRE-IMP",  false, -1)
+CompPhaseNameMacro(PHASE_IMPORTATION,            "Importation",                    "IMPORT",   false, -1)
+CompPhaseNameMacro(PHASE_POST_IMPORT,            "Post-import",                    "POST-IMP", false, -1)
+CompPhaseNameMacro(PHASE_MORPH,                  "Morph",                          "MORPH",    false, -1)
+CompPhaseNameMacro(PHASE_GS_COOKIE,              "GS Cookie",                      "GS-COOK",  false, -1)
+CompPhaseNameMacro(PHASE_COMPUTE_PREDS,          "Compute preds",                  "PREDS",    false, -1)
+CompPhaseNameMacro(PHASE_MARK_GC_POLL_BLOCKS,    "Mark GC poll blocks",            "GC-POLL",  false, -1)
+CompPhaseNameMacro(PHASE_COMPUTE_EDGE_WEIGHTS,   "Compute edge weights (1)",       "EDG-WGT",  false, -1)
+#if FEATURE_EH_FUNCLETS
+CompPhaseNameMacro(PHASE_CREATE_FUNCLETS,        "Create EH funclets",             "EH-FUNC",  false, -1)
+#endif // FEATURE_EH_FUNCLETS
+CompPhaseNameMacro(PHASE_OPTIMIZE_LAYOUT,        "Optimize layout",                "LAYOUT",   false, -1)
+CompPhaseNameMacro(PHASE_ALLOCATE_OBJECTS,       "Allocate Objects",               "ALLOC-OBJ",false, -1)
+CompPhaseNameMacro(PHASE_OPTIMIZE_LOOPS,         "Optimize loops",                 "LOOP-OPT", false, -1)
+CompPhaseNameMacro(PHASE_CLONE_LOOPS,            "Clone loops",                    "LP-CLONE", false, -1)
+CompPhaseNameMacro(PHASE_UNROLL_LOOPS,           "Unroll loops",                   "UNROLL",   false, -1)
+CompPhaseNameMacro(PHASE_HOIST_LOOP_CODE,        "Hoist loop code",                "LP-HOIST", false, -1)
+CompPhaseNameMacro(PHASE_MARK_LOCAL_VARS,        "Mark local vars",                "MARK-LCL", false, -1)
+CompPhaseNameMacro(PHASE_OPTIMIZE_BOOLS,         "Optimize bools",                 "OPT-BOOL", false, -1)
+CompPhaseNameMacro(PHASE_FIND_OPER_ORDER,        "Find oper order",                "OPER-ORD", false, -1)
+CompPhaseNameMacro(PHASE_SET_BLOCK_ORDER,        "Set block order",                "BLK-ORD",  false, -1)
+CompPhaseNameMacro(PHASE_BUILD_SSA,              "Build SSA representation",       "SSA",      true,  -1)
+CompPhaseNameMacro(PHASE_BUILD_SSA_TOPOSORT,     "SSA: topological sort",          "SSA-SORT", false, PHASE_BUILD_SSA)
+CompPhaseNameMacro(PHASE_BUILD_SSA_DOMS,         "SSA: Doms1",                     "SSA-DOMS", false, PHASE_BUILD_SSA)
+CompPhaseNameMacro(PHASE_BUILD_SSA_LIVENESS,     "SSA: liveness",                  "SSA-LIVE", false, PHASE_BUILD_SSA)
+CompPhaseNameMacro(PHASE_BUILD_SSA_IDF,          "SSA: IDF",                       "SSA-IDF",  false, PHASE_BUILD_SSA)
+CompPhaseNameMacro(PHASE_BUILD_SSA_INSERT_PHIS,  "SSA: insert phis",               "SSA-PHI",  false, PHASE_BUILD_SSA)
+CompPhaseNameMacro(PHASE_BUILD_SSA_RENAME,       "SSA: rename",                    "SSA-REN",  false, PHASE_BUILD_SSA)
+
+CompPhaseNameMacro(PHASE_EARLY_PROP,             "Early Value Propagation",        "ERL-PROP", false, -1)
+CompPhaseNameMacro(PHASE_VALUE_NUMBER,           "Do value numbering",             "VAL-NUM",  false, -1)
+
+CompPhaseNameMacro(PHASE_OPTIMIZE_INDEX_CHECKS,  "Optimize index checks",          "OPT-CHK",  false, -1)
+
+#if FEATURE_VALNUM_CSE
+CompPhaseNameMacro(PHASE_OPTIMIZE_VALNUM_CSES,   "Optimize Valnum CSEs",           "OPT-CSE",  false, -1)
+#endif  
+
+CompPhaseNameMacro(PHASE_VN_COPY_PROP,           "VN based copy prop",             "CP-PROP",  false, -1)
+#if ASSERTION_PROP
+CompPhaseNameMacro(PHASE_ASSERTION_PROP_MAIN,    "Assertion prop",                 "AST-PROP", false, -1)
+#endif
+CompPhaseNameMacro(PHASE_UPDATE_FLOW_GRAPH,      "Update flow graph",              "UPD-FG",   false, -1)
+CompPhaseNameMacro(PHASE_COMPUTE_EDGE_WEIGHTS2,  "Compute edge weights (2)",       "EDG-WGT2", false, -1)
+CompPhaseNameMacro(PHASE_DETERMINE_FIRST_COLD_BLOCK, "Determine first cold block", "COLD-BLK", false, -1)
+CompPhaseNameMacro(PHASE_RATIONALIZE,            "Rationalize IR",                 "RAT",      false, -1)
+CompPhaseNameMacro(PHASE_SIMPLE_LOWERING,        "Do 'simple' lowering",           "SMP-LWR",  false, -1)
+
+CompPhaseNameMacro(PHASE_LCLVARLIVENESS,         "Local var liveness",             "LIVENESS", true, -1)
+CompPhaseNameMacro(PHASE_LCLVARLIVENESS_INIT,    "Local var liveness init",        "LIV-INIT", false, PHASE_LCLVARLIVENESS)
+CompPhaseNameMacro(PHASE_LCLVARLIVENESS_PERBLOCK,"Per block local var liveness",   "LIV-BLK",  false, PHASE_LCLVARLIVENESS)
+CompPhaseNameMacro(PHASE_LCLVARLIVENESS_INTERBLOCK,  "Global local var liveness",  "LIV-GLBL", false, PHASE_LCLVARLIVENESS)
+
+#ifdef LEGACY_BACKEND
+CompPhaseNameMacro(PHASE_RA_ASSIGN_VARS,         "RA assign vars",                 "REGALLOC", false, -1)
+#endif // LEGACY_BACKEND
+CompPhaseNameMacro(PHASE_LOWERING_DECOMP,        "Lowering decomposition",         "LWR-DEC",  false, -1)
+CompPhaseNameMacro(PHASE_LOWERING,               "Lowering nodeinfo",              "LWR-INFO", false, -1)
+#ifndef LEGACY_BACKEND
+CompPhaseNameMacro(PHASE_LINEAR_SCAN,            "Linear scan register alloc",     "LSRA",     true, -1)
+CompPhaseNameMacro(PHASE_LINEAR_SCAN_BUILD,      "LSRA build intervals",           "LSRA-BLD", false, PHASE_LINEAR_SCAN)
+CompPhaseNameMacro(PHASE_LINEAR_SCAN_ALLOC,      "LSRA allocate",                  "LSRA-ALL", false, PHASE_LINEAR_SCAN)
+CompPhaseNameMacro(PHASE_LINEAR_SCAN_RESOLVE,    "LSRA resolve",                   "LSRA-RES", false, PHASE_LINEAR_SCAN)
+#endif // !LEGACY_BACKEND
+CompPhaseNameMacro(PHASE_GENERATE_CODE,          "Generate code",                  "CODEGEN",  false, -1)
+CompPhaseNameMacro(PHASE_EMIT_CODE,              "Emit code",                      "EMIT",     false, -1)
+CompPhaseNameMacro(PHASE_EMIT_GCEH,              "Emit GC+EH tables",              "EMT-GCEH", false, -1)
+// clang-format on
+
+#undef CompPhaseNameMacro
diff --git a/src/jit/conventions.txt b/src/jit/conventions.txt
new file mode 100644
index 0000000000..2984ed6043
--- /dev/null
+++ b/src/jit/conventions.txt
@@ -0,0 +1,81 @@
+This file contains an extracted, plain-text version of some of the "CLR JIT
+Coding Conventions" document, that can be used as a template when writing new
+comments in the JIT source code. The definitive coding conventions document is
+located here:
+
+https://github.com/dotnet/coreclr/blob/master/Documentation/coding-guidelines/clr-jit-coding-conventions.md
+
+
+********** Section 7.1.5 TODO comments
+
+This is the format to be used:
+
+// TODO[-Arch][-Platform][-CQ|-Throughput|-Cleanup|-Bug|-Bug?]: description of the issue
+
+-- One type modifier (CQ, Throughput, Cleanup, Bug or Bug?) must be specified.
+-- The -Arch and -Platform modifiers are optional, and should generally specify
+actual architectures in all-caps (e.g. AMD64, X86, ARM, ARM64), and then in
+Pascal casing for Platforms and architecture classes (e.g. ARMArch, LdStArch, XArch, Unix, Windows).
+-- This list is not intended to be exhaustive.
+
+Examples:
+
+    // TODO-LdStArch-Bug: Should regTmp be a dst on the node or an internal reg?
+    // Either way, it is not currently being handled by Lowering.
+
+    // TODO-CQ: based on whether src type is aligned use movaps instead.
+
+    // TODO-Cleanup: Add a comment about why this is unreached() for RyuJIT backend.
+
+    // TODO-Arm64-Bug: handle large constants!  Probably need something like the ARM
+    // case above: if (arm_Valid_Imm_For_Instr(ins, val)) ...
+
+
+********** Section 9.4 Function header comment
+
+All functions, except trivial accessors and wrappers, should have a function
+header comment which describes the behavior and the implementation details of
+the function. The format of the function header in an implementation file is
+as shown below.
+
+Within the comment, argument names (and other program-related names) should be
+surrounded by double quotes, to emphasize that they are program objects, and
+not simple English words. This helps clarify those cases where a function
+argument might be parsed (by a human) in either way.
+
+Any of the sections that do not apply to a method may be skipped. For example,
+if a method has no arguments, the "Arguments" section can be omitted. If a
+function is a void return function, the "Return Value" section can be omitted.
+
+If you can formulate any assumptions as asserts in the code itself, you should
+do so. The "Assumptions" section is intended to encapsulate things that are
+harder (or impossible) to formulate as asserts, or to provide a place to write
+a more easily read English description of any assumptions that exist, even if
+they can be written with asserts.
+
+
+//------------------------------------------------------------------------
+// <Function name>: <Short description of the function>
+//
+// <Full description of the function>
+//
+// Arguments:
+//    <argument1-name> - Description of argument 1
+//    <argument2-name> - Description of argument 2
+//    ... one line for each function argument
+//
+// Return Value:
+//    Description of the values this function could return
+//    and under what conditions. When the return value is a
+//    described as a function of the arguments, those arguments
+//    should be mentioned specifically by name.
+//
+// Assumptions:
+//    Any entry and exit conditions, such as required preconditions of
+//    data structures, memory to be freed by caller, etc.
+//
+// Notes:
+//    More detailed notes about the function.
+//    What errors can the function return?
+//    What other methods are related or alternatives to be considered?
+
diff --git a/src/jit/copyprop.cpp b/src/jit/copyprop.cpp
new file mode 100644
index 0000000000..bf714f0963
--- /dev/null
+++ b/src/jit/copyprop.cpp
@@ -0,0 +1,463 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+//
+//                                    CopyProp
+//
+// This stage performs value numbering based copy propagation. Since copy propagation
+// is about data flow, we cannot find them in assertion prop phase. In assertion prop
+// we can identify copies, like so: if (a == b) else, i.e., control flow assertions.
+//
+// To identify data flow copies, we'll follow a similar approach to SSA renaming.
+// We would walk each path in the graph keeping track of every live definition. Thus
+// when we see a variable that shares the VN with a live definition, we'd replace this
+// variable with the variable in the live definition, if suitable.
+//
+///////////////////////////////////////////////////////////////////////////////////////
+
+#include "jitpch.h"
+#include "ssabuilder.h"
+
+template <typename T>
+inline static T* allocate_any(jitstd::allocator<void>& alloc, size_t count = 1)
+{
+    return jitstd::allocator<T>(alloc).allocate(count);
+}
+
+/**************************************************************************************
+ *
+ * Corresponding to the live definition pushes, pop the stack as we finish a sub-paths
+ * of the graph originating from the block. Refer SSA renaming for any additional info.
+ * "curSsaName" tracks the currently live definitions.
+ */
+void Compiler::optBlockCopyPropPopStacks(BasicBlock* block, LclNumToGenTreePtrStack* curSsaName)
+{
+    for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+    {
+        for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+        {
+            if (!tree->IsLocal())
+            {
+                continue;
+            }
+            unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
+            if (fgExcludeFromSsa(lclNum))
+            {
+                continue;
+            }
+            if (tree->gtFlags & GTF_VAR_DEF)
+            {
+                GenTreePtrStack* stack = nullptr;
+                curSsaName->Lookup(lclNum, &stack);
+                stack->Pop();
+                if (stack->Height() == 0)
+                {
+                    curSsaName->Remove(lclNum);
+                }
+            }
+        }
+    }
+}
+
+/*******************************************************************************************************
+ *
+ * Given the "lclVar" and "copyVar" compute if the copy prop will be beneficial.
+ *
+ */
+int Compiler::optCopyProp_LclVarScore(LclVarDsc* lclVarDsc, LclVarDsc* copyVarDsc, bool preferOp2)
+{
+    int score = 0;
+
+    if (lclVarDsc->lvVolatileHint)
+    {
+        score += 4;
+    }
+
+    if (copyVarDsc->lvVolatileHint)
+    {
+        score -= 4;
+    }
+
+    if (lclVarDsc->lvDoNotEnregister)
+    {
+        score += 4;
+    }
+
+    if (copyVarDsc->lvDoNotEnregister)
+    {
+        score -= 4;
+    }
+
+#ifdef _TARGET_X86_
+    // For doubles we also prefer to change parameters into non-parameter local variables
+    if (lclVarDsc->lvType == TYP_DOUBLE)
+    {
+        if (lclVarDsc->lvIsParam)
+        {
+            score += 2;
+        }
+
+        if (copyVarDsc->lvIsParam)
+        {
+            score -= 2;
+        }
+    }
+#endif
+
+    // Otherwise we prefer to use the op2LclNum
+    return score + ((preferOp2) ? 1 : -1);
+}
+
+/**************************************************************************************
+ *
+ * Perform copy propagation on a given tree as we walk the graph and if it is a local
+ * variable, then look up all currently live definitions and check if any of those
+ * definitions share the same value number. If so, then we can make the replacement.
+ *
+ */
+void Compiler::optCopyProp(BasicBlock* block, GenTreePtr stmt, GenTreePtr tree, LclNumToGenTreePtrStack* curSsaName)
+{
+    // TODO-Review: EH successor/predecessor iteration seems broken.
+    if (block->bbCatchTyp == BBCT_FINALLY || block->bbCatchTyp == BBCT_FAULT)
+    {
+        return;
+    }
+
+    // If not local nothing to do.
+    if (!tree->IsLocal())
+    {
+        return;
+    }
+    if (tree->OperGet() == GT_PHI_ARG || tree->OperGet() == GT_LCL_FLD)
+    {
+        return;
+    }
+
+    // Propagate only on uses.
+    if (tree->gtFlags & GTF_VAR_DEF || tree->gtFlags & GTF_VAR_USEDEF)
+    {
+        return;
+    }
+    unsigned lclNum = tree->AsLclVarCommon()->GetLclNum();
+
+    // Skip address exposed variables.
+    if (fgExcludeFromSsa(lclNum))
+    {
+        return;
+    }
+
+    assert(tree->gtVNPair.GetConservative() != ValueNumStore::NoVN);
+
+    for (LclNumToGenTreePtrStack::KeyIterator iter = curSsaName->Begin(); !iter.Equal(curSsaName->End()); ++iter)
+    {
+        unsigned newLclNum = iter.Get();
+
+        GenTreePtr op = iter.GetValue()->Index(0);
+
+        // Nothing to do if same.
+        if (lclNum == newLclNum)
+        {
+            continue;
+        }
+
+        // Skip variables with assignments embedded in the statement (i.e., with a comma). Because we
+        // are not currently updating their SSA names as live in the copy-prop pass of the stmt.
+        if (VarSetOps::IsMember(this, optCopyPropKillSet, lvaTable[newLclNum].lvVarIndex))
+        {
+            continue;
+        }
+
+        if (op->gtFlags & GTF_VAR_CAST)
+        {
+            continue;
+        }
+        if (gsShadowVarInfo != nullptr && lvaTable[newLclNum].lvIsParam &&
+            gsShadowVarInfo[newLclNum].shadowCopy == lclNum)
+        {
+            continue;
+        }
+        ValueNum opVN = GetUseAsgDefVNOrTreeVN(op);
+        if (opVN == ValueNumStore::NoVN)
+        {
+            continue;
+        }
+        if (op->TypeGet() != tree->TypeGet())
+        {
+            continue;
+        }
+        if (opVN != tree->gtVNPair.GetConservative())
+        {
+            continue;
+        }
+        if (optCopyProp_LclVarScore(&lvaTable[lclNum], &lvaTable[newLclNum], true) <= 0)
+        {
+            continue;
+        }
+        // Check whether the newLclNum is live before being substituted. Otherwise, we could end
+        // up in a situation where there must've been a phi node that got pruned because the variable
+        // is not live anymore. For example,
+        //  if
+        //     x0 = 1
+        //  else
+        //     x1 = 2
+        //  print(c) <-- x is not live here. Let's say 'c' shares the value number with "x0."
+        //
+        // If we simply substituted 'c' with "x0", we would be wrong. Ideally, there would be a phi
+        // node x2 = phi(x0, x1) which can then be used to substitute 'c' with. But because of pruning
+        // there would be no such phi node. To solve this we'll check if 'x' is live, before replacing
+        // 'c' with 'x.'
+        if (!lvaTable[newLclNum].lvVerTypeInfo.IsThisPtr())
+        {
+            if (lvaTable[newLclNum].lvAddrExposed)
+            {
+                continue;
+            }
+
+            // We compute liveness only on tracked variables. So skip untracked locals.
+            if (!lvaTable[newLclNum].lvTracked)
+            {
+                continue;
+            }
+
+            // Because of this dependence on live variable analysis, CopyProp phase is immediately
+            // after Liveness, SSA and VN.
+            if (!VarSetOps::IsMember(this, compCurLife, lvaTable[newLclNum].lvVarIndex))
+            {
+                continue;
+            }
+        }
+        unsigned newSsaNum = SsaConfig::RESERVED_SSA_NUM;
+        if (op->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEDEF))
+        {
+            newSsaNum = GetSsaNumForLocalVarDef(op);
+        }
+        else // parameters, this pointer etc.
+        {
+            newSsaNum = op->AsLclVarCommon()->GetSsaNum();
+        }
+
+        if (newSsaNum == SsaConfig::RESERVED_SSA_NUM)
+        {
+            continue;
+        }
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            JITDUMP("VN based copy assertion for ");
+            printTreeID(tree);
+            printf(" V%02d @%08X by ", lclNum, tree->GetVN(VNK_Conservative));
+            printTreeID(op);
+            printf(" V%02d @%08X.\n", newLclNum, op->GetVN(VNK_Conservative));
+            gtDispTree(tree, nullptr, nullptr, true);
+        }
+#endif
+
+        lvaTable[lclNum].decRefCnts(block->getBBWeight(this), this);
+        lvaTable[newLclNum].incRefCnts(block->getBBWeight(this), this);
+        tree->gtLclVarCommon.SetLclNum(newLclNum);
+        tree->AsLclVarCommon()->SetSsaNum(newSsaNum);
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("copy propagated to:\n");
+            gtDispTree(tree, nullptr, nullptr, true);
+        }
+#endif
+        break;
+    }
+    return;
+}
+
+/**************************************************************************************
+ *
+ * Helper to check if tree is a local that participates in SSA numbering.
+ */
+bool Compiler::optIsSsaLocal(GenTreePtr tree)
+{
+    return tree->IsLocal() && !fgExcludeFromSsa(tree->AsLclVarCommon()->GetLclNum());
+}
+
+/**************************************************************************************
+ *
+ * Perform copy propagation using currently live definitions on the current block's
+ * variables. Also as new definitions are encountered update the "curSsaName" which
+ * tracks the currently live definitions.
+ *
+ */
+void Compiler::optBlockCopyProp(BasicBlock* block, LclNumToGenTreePtrStack* curSsaName)
+{
+    JITDUMP("Copy Assertion for BB%02u\n", block->bbNum);
+
+    // There are no definitions at the start of the block. So clear it.
+    compCurLifeTree = nullptr;
+    VarSetOps::Assign(this, compCurLife, block->bbLiveIn);
+    for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+    {
+        VarSetOps::ClearD(this, optCopyPropKillSet);
+
+        // Walk the tree to find if any local variable can be replaced with current live definitions.
+        for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+        {
+            compUpdateLife</*ForCodeGen*/ false>(tree);
+            optCopyProp(block, stmt, tree, curSsaName);
+
+            // TODO-Review: Merge this loop with the following loop to correctly update the
+            // live SSA num while also propagating copies.
+            //
+            // 1. This loop performs copy prop with currently live (on-top-of-stack) SSA num.
+            // 2. The subsequent loop maintains a stack for each lclNum with
+            //    currently active SSA numbers when definitions are encountered.
+            //
+            // If there is an embedded definition using a "comma" in a stmt, then the currently
+            // live SSA number will get updated only in the next loop (2). However, this new
+            // definition is now supposed to be live (on tos). If we did not update the stacks
+            // using (2), copy prop (1) will use a SSA num defined outside the stmt ignoring the
+            // embedded update. Killing the variable is a simplification to produce 0 ASM diffs
+            // for an update release.
+            //
+            if (optIsSsaLocal(tree) && (tree->gtFlags & GTF_VAR_DEF))
+            {
+                VarSetOps::AddElemD(this, optCopyPropKillSet, lvaTable[tree->gtLclVarCommon.gtLclNum].lvVarIndex);
+            }
+        }
+
+        // This logic must be in sync with SSA renaming process.
+        for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+        {
+            if (!optIsSsaLocal(tree))
+            {
+                continue;
+            }
+
+            unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
+
+            // As we encounter a definition add it to the stack as a live definition.
+            if (tree->gtFlags & GTF_VAR_DEF)
+            {
+                GenTreePtrStack* stack;
+                if (!curSsaName->Lookup(lclNum, &stack))
+                {
+                    stack = new (getAllocator()) GenTreePtrStack(this);
+                }
+                stack->Push(tree);
+                curSsaName->Set(lclNum, stack);
+            }
+            // If we encounter first use of a param or this pointer add it as a live definition.
+            // Since they are always live, do it only once.
+            else if ((tree->gtOper == GT_LCL_VAR) && !(tree->gtFlags & (GTF_VAR_USEASG | GTF_VAR_USEDEF)) &&
+                     (lvaTable[lclNum].lvIsParam || lvaTable[lclNum].lvVerTypeInfo.IsThisPtr()))
+            {
+                GenTreePtrStack* stack;
+                if (!curSsaName->Lookup(lclNum, &stack))
+                {
+                    stack = new (getAllocator()) GenTreePtrStack(this);
+                    stack->Push(tree);
+                    curSsaName->Set(lclNum, stack);
+                }
+            }
+        }
+    }
+}
+
+/**************************************************************************************
+ *
+ * This stage performs value numbering based copy propagation. Since copy propagation
+ * is about data flow, we cannot find them in assertion prop phase. In assertion prop
+ * we can identify copies that like so: if (a == b) else, i.e., control flow assertions.
+ *
+ * To identify data flow copies, we follow a similar approach to SSA renaming. We walk
+ * each path in the graph keeping track of every live definition. Thus when we see a
+ * variable that shares the VN with a live definition, we'd replace this variable with
+ * the variable in the live definition.
+ *
+ * We do this to be in conventional SSA form. This can very well be changed later.
+ *
+ * For example, on some path in the graph:
+ *    a0 = x0
+ *    :            <- other blocks
+ *    :
+ *    a1 = y0
+ *    :
+ *    :            <- other blocks
+ *    b0 = x0, we cannot substitute x0 with a0, because currently our backend doesn't
+ * treat lclNum and ssaNum together as a variable, but just looks at lclNum. If we
+ * substituted x0 with a0, then we'd be in general SSA form.
+ *
+ */
+void Compiler::optVnCopyProp()
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In optVnCopyProp()\n");
+    }
+#endif
+
+    if (fgSsaPassesCompleted == 0)
+    {
+        return;
+    }
+    jitstd::allocator<void> allocator(getAllocator());
+
+    // Compute the domTree to use.
+    BlkToBlkSetMap* domTree = new (getAllocator()) BlkToBlkSetMap(getAllocator());
+    domTree->Reallocate(fgBBcount * 3 / 2); // Prime the allocation
+    SsaBuilder::ComputeDominators(this, domTree);
+
+    struct BlockWork
+    {
+        BasicBlock* m_blk;
+        bool        m_processed;
+
+        BlockWork(BasicBlock* blk, bool processed = false) : m_blk(blk), m_processed(processed)
+        {
+        }
+    };
+    typedef jitstd::vector<BlockWork> BlockWorkStack;
+
+    VarSetOps::AssignNoCopy(this, compCurLife, VarSetOps::MakeEmpty(this));
+    VarSetOps::AssignNoCopy(this, optCopyPropKillSet, VarSetOps::MakeEmpty(this));
+
+    // The map from lclNum to its recently live definitions as a stack.
+    LclNumToGenTreePtrStack curSsaName(getAllocator());
+
+    BlockWorkStack* worklist =
+        new (allocate_any<BlockWorkStack>(allocator), jitstd::placement_t()) BlockWorkStack(allocator);
+
+    worklist->push_back(BlockWork(fgFirstBB));
+    while (!worklist->empty())
+    {
+        BlockWork work = worklist->back();
+        worklist->pop_back();
+
+        BasicBlock* block = work.m_blk;
+        if (work.m_processed)
+        {
+            // Pop all the live definitions for this block.
+            optBlockCopyPropPopStacks(block, &curSsaName);
+            continue;
+        }
+
+        // Generate copy assertions in this block, and keeping curSsaName variable up to date.
+        worklist->push_back(BlockWork(block, true));
+
+        optBlockCopyProp(block, &curSsaName);
+
+        // Add dom children to work on.
+        BlkSet* pBlkSet;
+        if (domTree->Lookup(block, &pBlkSet))
+        {
+            for (BlkSet::KeyIterator child = pBlkSet->Begin(); !child.Equal(pBlkSet->End()); ++child)
+            {
+                worklist->push_back(BlockWork(child.Get()));
+            }
+        }
+    }
+
+    // Tracked variable count increases after CopyProp, so don't keep a shorter array around.
+    // Destroy (release) the varset.
+    VarSetOps::AssignNoCopy(this, compCurLife, VarSetOps::UninitVal());
+}
diff --git a/src/jit/cpp.hint b/src/jit/cpp.hint
new file mode 100644
index 0000000000..b2fe33cc98
--- /dev/null
+++ b/src/jit/cpp.hint
@@ -0,0 +1,27 @@
+// cpp.hint for the JIT
+//
+// These hints are designed to improve the IntelliSense experience when browsing the JIT codebase.
+// Note that they don't need to be correct code; they just need to fix the IntelliSense problems that
+// exist without the hint.
+//
+// See the article on hints in MSDN for more information on their necessity and use:
+//    http://msdn.microsoft.com/en-us/library/dd997977.aspx
+
+#define foreach_treenode_execution_order(__node, __stmt) for (;;)
+
+#define foreach_block(__compiler, __block) for (;;)
+
+#define FOREACH_REGISTER_FILE(file) for (;;)
+
+// From jit.h
+
+#define DECLARE_TYPED_ENUM(tag,baseType) enum tag : baseType
+
+#define END_DECLARE_TYPED_ENUM(tag,baseType) ;
+
+#define INDEBUG(x)          x
+#define INDEBUG_COMMA(x)    x,
+#define DEBUGARG(x)         , x
+
+#define PROTO_ARG(x)   x ,
+#define PROTO_ARGL(x)  , x
+\ No newline at end of file
diff --git a/src/jit/crossgen/.gitmirror b/src/jit/crossgen/.gitmirror
new file mode 100644
index 0000000000..f507630f94
--- /dev/null
+++ b/src/jit/crossgen/.gitmirror
@@ -0,0 +1 @@
+Only contents of this folder, excluding subfolders, will be mirrored by the Git-TFS Mirror. 
+\ No newline at end of file
diff --git a/src/jit/crossgen/CMakeLists.txt b/src/jit/crossgen/CMakeLists.txt
new file mode 100644
index 0000000000..f79d9e72ce
--- /dev/null
+++ b/src/jit/crossgen/CMakeLists.txt
@@ -0,0 +1,7 @@
+include(${CLR_DIR}/crossgen.cmake)
+
+if(CLR_CMAKE_TARGET_ARCH_I386 OR CLR_CMAKE_TARGET_ARCH_ARM)
+  add_definitions(-DLEGACY_BACKEND)
+endif()
+
+add_library_clr(${JIT_BASE_NAME}_crossgen ${SOURCES})
diff --git a/src/jit/crossgen/jit_crossgen.nativeproj b/src/jit/crossgen/jit_crossgen.nativeproj
new file mode 100644
index 0000000000..f8552dc2f5
--- /dev/null
+++ b/src/jit/crossgen/jit_crossgen.nativeproj
@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003" ToolsVersion="dogfood">
+
+    <!--Import the settings-->
+    <Import Project="$(_NTDRIVE)$(_NTROOT)\ndp\clr\xplat\SetCrossGen.props" />
+    <Import Project="$(_NTDRIVE)$(_NTROOT)\ndp\clr\clr.props" />
+
+    <PropertyGroup>
+        <BuildSysBinaries>true</BuildSysBinaries>
+        <OutputName>jit_crossgen</OutputName>
+        <FeatureMergeJitAndEngine>true</FeatureMergeJitAndEngine>
+        <TargetType>LIBRARY</TargetType>
+
+        <ClDefines Condition="'$(BuildArchitecture)' == 'i386'">$(ClDefines);LEGACY_BACKEND</ClDefines>
+        <ClDefines Condition="'$(BuildArchitecture)' == 'arm'">$(ClDefines);LEGACY_BACKEND</ClDefines>
+    </PropertyGroup>
+
+    <Import Project="..\jit.settings.targets" />
+
+</Project>
diff --git a/src/jit/dataflow.h b/src/jit/dataflow.h
new file mode 100644
index 0000000000..c9803a0cc1
--- /dev/null
+++ b/src/jit/dataflow.h
@@ -0,0 +1,81 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+//   This class is used to perform data flow optimizations.
+//   An example usage would be:
+//
+//     DataFlow flow(m_pCompiler);
+//     flow.ForwardAnalysis(callback);
+//
+//  The "callback" object needs to implement the necessary callback
+//  functions that  the "flow" object will call as the data flow
+//  analysis progresses.
+//
+#pragma once
+
+#include "compiler.h"
+#include "jitstd.h"
+
+class DataFlow
+{
+private:
+    DataFlow();
+
+public:
+    // The callback interface that needs to be implemented by anyone
+    // needing updates by the dataflow object.
+    class Callback
+    {
+    public:
+        Callback(Compiler* pCompiler);
+
+        void StartMerge(BasicBlock* block);
+        void Merge(BasicBlock* block, BasicBlock* pred, flowList* preds);
+        bool EndMerge(BasicBlock* block);
+
+    private:
+        Compiler* m_pCompiler;
+    };
+
+    DataFlow(Compiler* pCompiler);
+
+    template <typename TCallback>
+    void ForwardAnalysis(TCallback& callback);
+
+private:
+    Compiler* m_pCompiler;
+};
+
+template <typename TCallback>
+void DataFlow::ForwardAnalysis(TCallback& callback)
+{
+    jitstd::list<BasicBlock*> worklist(jitstd::allocator<void>(m_pCompiler->getAllocator()));
+
+    worklist.insert(worklist.begin(), m_pCompiler->fgFirstBB);
+    while (!worklist.empty())
+    {
+        BasicBlock* block = *(worklist.begin());
+        worklist.erase(worklist.begin());
+
+        callback.StartMerge(block);
+        {
+            flowList* preds = m_pCompiler->BlockPredsWithEH(block);
+            for (flowList* pred = preds; pred; pred = pred->flNext)
+            {
+                callback.Merge(block, pred->flBlock, preds);
+            }
+        }
+
+        if (callback.EndMerge(block))
+        {
+            AllSuccessorIter succsBegin = block->GetAllSuccs(m_pCompiler).begin();
+            AllSuccessorIter succsEnd   = block->GetAllSuccs(m_pCompiler).end();
+            for (AllSuccessorIter succ = succsBegin; succ != succsEnd; ++succ)
+            {
+                worklist.insert(worklist.end(), *succ);
+            }
+        }
+    }
+}
diff --git a/src/jit/decomposelongs.cpp b/src/jit/decomposelongs.cpp
new file mode 100644
index 0000000000..cf66487367
--- /dev/null
+++ b/src/jit/decomposelongs.cpp
@@ -0,0 +1,1028 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                               DecomposeLongs                              XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX*/
+
+//
+// This file contains code to decompose 64-bit LONG operations on 32-bit platforms
+// into multiple single-register operations so individual register usage and requirements
+// are explicit for LSRA. The rationale behind this is to avoid adding code complexity
+// downstream caused by the introduction of handling longs as special cases,
+// especially in LSRA.
+//
+// Long decomposition happens on a statement immediately prior to more general
+// purpose lowering.
+//
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
+#ifndef _TARGET_64BIT_ // DecomposeLongs is only used on 32-bit platforms
+
+#include "decomposelongs.h"
+
+//------------------------------------------------------------------------
+// DecomposeLongs::PrepareForDecomposition:
+//    Do one-time preparation required for LONG decomposition. Namely,
+//    promote long variables to multi-register structs.
+//
+// Arguments:
+//    None
+//
+// Return Value:
+//    None.
+//
+void DecomposeLongs::PrepareForDecomposition()
+{
+    m_compiler->lvaPromoteLongVars();
+}
+
+//------------------------------------------------------------------------
+// DecomposeLongs::DecomposeBlock:
+//    Do LONG decomposition on all the nodes in the given block. This must
+//    be done before lowering the block, as decomposition can insert
+//    additional nodes.
+//
+// Arguments:
+//    block - the block to process
+//
+// Return Value:
+//    None.
+//
+void DecomposeLongs::DecomposeBlock(BasicBlock* block)
+{
+    assert(block == m_compiler->compCurBB); // compCurBB must already be set.
+    assert(block->isEmpty() || block->IsLIR());
+
+    m_blockWeight = block->getBBWeight(m_compiler);
+    m_range = &LIR::AsRange(block);
+    DecomposeRangeHelper();
+}
+
+//------------------------------------------------------------------------
+// DecomposeLongs::DecomposeRange:
+//    Do LONG decomposition on all the nodes in the given range. This must
+//    be done before inserting a range of un-decomposed IR into a block
+//    that has already been decomposed.
+//
+// Arguments:
+//    compiler    - The compiler context.
+//    blockWeight - The weight of the block into which the range will be
+//                  inserted.
+//    range       - The range to decompose.
+//
+// Return Value:
+//    None.
+//
+void DecomposeLongs::DecomposeRange(Compiler* compiler, unsigned blockWeight, LIR::Range& range)
+{
+    assert(compiler != nullptr);
+
+    DecomposeLongs decomposer(compiler);
+    decomposer.m_blockWeight = blockWeight;
+    decomposer.m_range = &range;
+
+    decomposer.DecomposeRangeHelper();
+}
+
+//------------------------------------------------------------------------
+// DecomposeLongs::DecomposeRangeHelper:
+//    Decompiose each node in the current range.
+//
+//    Decomposition is done as an execution-order walk. Decomposition of
+//    a particular node can create new nodes that need to be further
+//    decomposed at higher levels. That is, decomposition "bubbles up"
+//    through dataflow.
+//
+void DecomposeLongs::DecomposeRangeHelper()
+{
+    assert(m_range != nullptr);
+
+    GenTree* node = Range().FirstNonPhiNode();
+    while (node != nullptr)
+    {
+        LIR::Use use;
+        if (!Range().TryGetUse(node, &use))
+        {
+            use = LIR::Use::GetDummyUse(Range(), node);
+        }
+
+        node = DecomposeNode(use);
+    }
+
+    assert(Range().CheckLIR(m_compiler));
+}
+
+//------------------------------------------------------------------------
+// DecomposeNode: Decompose long-type trees into lower and upper halves.
+//
+// Arguments:
+//    use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+//    The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeNode(LIR::Use& use)
+{
+    GenTree* tree = use.Def();
+
+    // Handle the case where we are implicitly using the lower half of a long lclVar.
+    if ((tree->TypeGet() == TYP_INT) && tree->OperIsLocal())
+    {
+        LclVarDsc* varDsc = m_compiler->lvaTable + tree->AsLclVarCommon()->gtLclNum;
+        if (varTypeIsLong(varDsc) && varDsc->lvPromoted)
+        {
+#ifdef DEBUG
+            if (m_compiler->verbose)
+            {
+                printf("Changing implicit reference to lo half of long lclVar to an explicit reference of its promoted "
+                       "half:\n");
+                m_compiler->gtDispTreeRange(Range(), tree);
+            }
+#endif // DEBUG
+            m_compiler->lvaDecRefCnts(tree);
+            unsigned loVarNum = varDsc->lvFieldLclStart;
+            tree->AsLclVarCommon()->SetLclNum(loVarNum);
+            m_compiler->lvaIncRefCnts(tree);
+            return tree->gtNext;
+        }
+    }
+
+    if (tree->TypeGet() != TYP_LONG)
+    {
+        return tree->gtNext;
+    }
+
+#ifdef DEBUG
+    if (m_compiler->verbose)
+    {
+        printf("Decomposing TYP_LONG tree.  BEFORE:\n");
+        m_compiler->gtDispTreeRange(Range(), tree);
+    }
+#endif // DEBUG
+
+    GenTree* nextNode = nullptr;
+    switch (tree->OperGet())
+    {
+        case GT_PHI:
+        case GT_PHI_ARG:
+            nextNode = tree->gtNext;
+            break;
+
+        case GT_LCL_VAR:
+            nextNode = DecomposeLclVar(use);
+            break;
+
+        case GT_LCL_FLD:
+            nextNode = DecomposeLclFld(use);
+            break;
+
+        case GT_STORE_LCL_VAR:
+            nextNode = DecomposeStoreLclVar(use);
+            break;
+
+        case GT_CAST:
+            nextNode = DecomposeCast(use);
+            break;
+
+        case GT_CNS_LNG:
+            nextNode = DecomposeCnsLng(use);
+            break;
+
+        case GT_CALL:
+            nextNode = DecomposeCall(use);
+            break;
+
+        case GT_RETURN:
+            assert(tree->gtOp.gtOp1->OperGet() == GT_LONG);
+            break;
+
+        case GT_STOREIND:
+            nextNode = DecomposeStoreInd(use);
+            break;
+
+        case GT_STORE_LCL_FLD:
+            assert(tree->gtOp.gtOp1->OperGet() == GT_LONG);
+            NYI("st.lclFld of of TYP_LONG");
+            break;
+
+        case GT_IND:
+            nextNode = DecomposeInd(use);
+            break;
+
+        case GT_NOT:
+            nextNode = DecomposeNot(use);
+            break;
+
+        case GT_NEG:
+            nextNode = DecomposeNeg(use);
+            break;
+
+        // Binary operators. Those that require different computation for upper and lower half are
+        // handled by the use of GetHiOper().
+        case GT_ADD:
+        case GT_SUB:
+        case GT_OR:
+        case GT_XOR:
+        case GT_AND:
+            nextNode = DecomposeArith(use);
+            break;
+
+        case GT_MUL:
+            NYI("Arithmetic binary operators on TYP_LONG - GT_MUL");
+            break;
+
+        case GT_DIV:
+            NYI("Arithmetic binary operators on TYP_LONG - GT_DIV");
+            break;
+
+        case GT_MOD:
+            NYI("Arithmetic binary operators on TYP_LONG - GT_MOD");
+            break;
+
+        case GT_UDIV:
+            NYI("Arithmetic binary operators on TYP_LONG - GT_UDIV");
+            break;
+
+        case GT_UMOD:
+            NYI("Arithmetic binary operators on TYP_LONG - GT_UMOD");
+            break;
+
+        case GT_LSH:
+        case GT_RSH:
+        case GT_RSZ:
+            nextNode = DecomposeShift(use);
+            break;
+
+        case GT_ROL:
+        case GT_ROR:
+            NYI("Arithmetic binary operators on TYP_LONG - ROTATE");
+            break;
+
+        case GT_MULHI:
+            NYI("Arithmetic binary operators on TYP_LONG - MULHI");
+            break;
+
+        case GT_LOCKADD:
+        case GT_XADD:
+        case GT_XCHG:
+        case GT_CMPXCHG:
+            NYI("Interlocked operations on TYP_LONG");
+            break;
+
+        default:
+        {
+            JITDUMP("Illegal TYP_LONG node %s in Decomposition.", GenTree::NodeName(tree->OperGet()));
+            noway_assert(!"Illegal TYP_LONG node in Decomposition.");
+            break;
+        }
+    }
+
+#ifdef DEBUG
+    if (m_compiler->verbose)
+    {
+        // NOTE: st_lcl_var doesn't dump properly afterwards.
+        printf("Decomposing TYP_LONG tree.  AFTER:\n");
+        m_compiler->gtDispTreeRange(Range(), use.Def());
+    }
+#endif
+
+    return nextNode;
+}
+
+//------------------------------------------------------------------------
+// FinalizeDecomposition: A helper function to finalize LONG decomposition by
+// taking the resulting two halves of the decomposition, and tie them together
+// with a new GT_LONG node that will replace the original node.
+//
+// Arguments:
+//    use - the LIR::Use object for the def that needs to be decomposed.
+//    loResult - the decomposed low part
+//    hiResult - the decomposed high part. This must follow loResult in the linear order,
+//               as the new GT_LONG node will be inserted immediately after it.
+//
+// Return Value:
+//    The next node to process.
+//
+GenTree* DecomposeLongs::FinalizeDecomposition(LIR::Use& use, GenTree* loResult, GenTree* hiResult)
+{
+    assert(use.IsInitialized());
+    assert(loResult != nullptr);
+    assert(hiResult != nullptr);
+    assert(Range().Contains(loResult));
+    assert(Range().Contains(hiResult));
+    assert(loResult->Precedes(hiResult));
+
+    GenTree* gtLong = new (m_compiler, GT_LONG) GenTreeOp(GT_LONG, TYP_LONG, loResult, hiResult);
+    Range().InsertAfter(hiResult, gtLong);
+
+    use.ReplaceWith(m_compiler, gtLong);
+
+    return gtLong->gtNext;
+}
+
+//------------------------------------------------------------------------
+// DecomposeLclVar: Decompose GT_LCL_VAR.
+//
+// Arguments:
+//    use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+//    The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeLclVar(LIR::Use& use)
+{
+    assert(use.IsInitialized());
+    assert(use.Def()->OperGet() == GT_LCL_VAR);
+
+    GenTree*   tree   = use.Def();
+    unsigned   varNum = tree->AsLclVarCommon()->gtLclNum;
+    LclVarDsc* varDsc = m_compiler->lvaTable + varNum;
+    m_compiler->lvaDecRefCnts(tree);
+
+    GenTree* loResult = tree;
+    loResult->gtType  = TYP_INT;
+
+    GenTree* hiResult = m_compiler->gtNewLclLNode(varNum, TYP_INT);
+    Range().InsertAfter(loResult, hiResult);
+
+    if (varDsc->lvPromoted)
+    {
+        assert(varDsc->lvFieldCnt == 2);
+        unsigned loVarNum = varDsc->lvFieldLclStart;
+        unsigned hiVarNum = loVarNum + 1;
+        loResult->AsLclVarCommon()->SetLclNum(loVarNum);
+        hiResult->AsLclVarCommon()->SetLclNum(hiVarNum);
+    }
+    else
+    {
+        noway_assert(varDsc->lvLRACandidate == false);
+
+        loResult->SetOper(GT_LCL_FLD);
+        loResult->AsLclFld()->gtLclOffs  = 0;
+        loResult->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField();
+
+        hiResult->SetOper(GT_LCL_FLD);
+        hiResult->AsLclFld()->gtLclOffs  = 4;
+        hiResult->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField();
+    }
+
+    m_compiler->lvaIncRefCnts(loResult);
+    m_compiler->lvaIncRefCnts(hiResult);
+
+    return FinalizeDecomposition(use, loResult, hiResult);
+}
+
+//------------------------------------------------------------------------
+// DecomposeLclFld: Decompose GT_LCL_FLD.
+//
+// Arguments:
+//    use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+//    The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeLclFld(LIR::Use& use)
+{
+    assert(use.IsInitialized());
+    assert(use.Def()->OperGet() == GT_LCL_FLD);
+
+    GenTree*       tree     = use.Def();
+    GenTreeLclFld* loResult = tree->AsLclFld();
+    loResult->gtType        = TYP_INT;
+
+    GenTree* hiResult = m_compiler->gtNewLclFldNode(loResult->gtLclNum, TYP_INT, loResult->gtLclOffs + 4);
+    Range().InsertAfter(loResult, hiResult);
+
+    return FinalizeDecomposition(use, loResult, hiResult);
+}
+
+//------------------------------------------------------------------------
+// DecomposeStoreLclVar: Decompose GT_STORE_LCL_VAR.
+//
+// Arguments:
+//    use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+//    The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeStoreLclVar(LIR::Use& use)
+{
+    assert(use.IsInitialized());
+    assert(use.Def()->OperGet() == GT_STORE_LCL_VAR);
+
+    GenTree* tree = use.Def();
+    GenTree* rhs  = tree->gtGetOp1();
+    if ((rhs->OperGet() == GT_PHI) || (rhs->OperGet() == GT_CALL))
+    {
+        // GT_CALLs are not decomposed, so will not be converted to GT_LONG
+        // GT_STORE_LCL_VAR = GT_CALL are handled in genMultiRegCallStoreToLocal
+        return tree->gtNext;
+    }
+
+    noway_assert(rhs->OperGet() == GT_LONG);
+    unsigned   varNum = tree->AsLclVarCommon()->gtLclNum;
+    LclVarDsc* varDsc = m_compiler->lvaTable + varNum;
+    m_compiler->lvaDecRefCnts(tree);
+
+    GenTree* loRhs   = rhs->gtGetOp1();
+    GenTree* hiRhs   = rhs->gtGetOp2();
+    GenTree* hiStore = m_compiler->gtNewLclLNode(varNum, TYP_INT);
+
+    if (varDsc->lvPromoted)
+    {
+        assert(varDsc->lvFieldCnt == 2);
+
+        unsigned loVarNum = varDsc->lvFieldLclStart;
+        unsigned hiVarNum = loVarNum + 1;
+        tree->AsLclVarCommon()->SetLclNum(loVarNum);
+        hiStore->SetOper(GT_STORE_LCL_VAR);
+        hiStore->AsLclVarCommon()->SetLclNum(hiVarNum);
+    }
+    else
+    {
+        noway_assert(varDsc->lvLRACandidate == false);
+
+        tree->SetOper(GT_STORE_LCL_FLD);
+        tree->AsLclFld()->gtLclOffs  = 0;
+        tree->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField();
+
+        hiStore->SetOper(GT_STORE_LCL_FLD);
+        hiStore->AsLclFld()->gtLclOffs  = 4;
+        hiStore->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField();
+    }
+
+    // 'tree' is going to steal the loRhs node for itself, so we need to remove the
+    // GT_LONG node from the threading.
+    Range().Remove(rhs);
+
+    tree->gtOp.gtOp1 = loRhs;
+    tree->gtType     = TYP_INT;
+
+    hiStore->gtOp.gtOp1 = hiRhs;
+    hiStore->gtFlags |= GTF_VAR_DEF;
+
+    m_compiler->lvaIncRefCnts(tree);
+    m_compiler->lvaIncRefCnts(hiStore);
+
+    Range().InsertAfter(tree, hiStore);
+
+    return hiStore->gtNext;
+}
+
+//------------------------------------------------------------------------
+// DecomposeCast: Decompose GT_CAST.
+//
+// Arguments:
+//    use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+//    The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeCast(LIR::Use& use)
+{
+    assert(use.IsInitialized());
+    assert(use.Def()->OperGet() == GT_CAST);
+
+    GenTree* tree     = use.Def();
+    GenTree* loResult = nullptr;
+    GenTree* hiResult = nullptr;
+
+    assert(tree->gtPrev == tree->gtGetOp1());
+    NYI_IF(tree->gtOverflow(), "TYP_LONG cast with overflow");
+    switch (tree->AsCast()->CastFromType())
+    {
+        case TYP_INT:
+            if (tree->gtFlags & GTF_UNSIGNED)
+            {
+                loResult = tree->gtGetOp1();
+                Range().Remove(tree);
+
+                hiResult = new (m_compiler, GT_CNS_INT) GenTreeIntCon(TYP_INT, 0);
+                Range().InsertAfter(loResult, hiResult);
+            }
+            else
+            {
+                NYI("Lowering of signed cast TYP_INT->TYP_LONG");
+            }
+            break;
+
+        default:
+            NYI("Unimplemented type for Lowering of cast to TYP_LONG");
+            break;
+    }
+
+    return FinalizeDecomposition(use, loResult, hiResult);
+}
+
+//------------------------------------------------------------------------
+// DecomposeCnsLng: Decompose GT_CNS_LNG.
+//
+// Arguments:
+//    use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+//    The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeCnsLng(LIR::Use& use)
+{
+    assert(use.IsInitialized());
+    assert(use.Def()->OperGet() == GT_CNS_LNG);
+
+    GenTree* tree  = use.Def();
+    INT32    hiVal = tree->AsLngCon()->HiVal();
+
+    GenTree* loResult = tree;
+    loResult->ChangeOperConst(GT_CNS_INT);
+    loResult->gtType = TYP_INT;
+
+    GenTree* hiResult = new (m_compiler, GT_CNS_INT) GenTreeIntCon(TYP_INT, hiVal);
+    Range().InsertAfter(loResult, hiResult);
+
+    return FinalizeDecomposition(use, loResult, hiResult);
+}
+
+//------------------------------------------------------------------------
+// DecomposeCall: Decompose GT_CALL.
+//
+// Arguments:
+//    use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+//    The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeCall(LIR::Use& use)
+{
+    assert(use.IsInitialized());
+    assert(use.Def()->OperGet() == GT_CALL);
+
+    // We only need to force var = call() if the call's result is used.
+    if (use.IsDummyUse())
+        return use.Def()->gtNext;
+
+    GenTree* user = use.User();
+    if (user->OperGet() == GT_STORE_LCL_VAR)
+    {
+        // If parent is already a STORE_LCL_VAR, we can skip it if
+        // it is already marked as lvIsMultiRegRet.
+        unsigned varNum = user->AsLclVarCommon()->gtLclNum;
+        if (m_compiler->lvaTable[varNum].lvIsMultiRegRet)
+        {
+            return use.Def()->gtNext;
+        }
+        else if (!m_compiler->lvaTable[varNum].lvPromoted)
+        {
+            // If var wasn't promoted, we can just set lvIsMultiRegRet.
+            m_compiler->lvaTable[varNum].lvIsMultiRegRet = true;
+            return use.Def()->gtNext;
+        }
+    }
+
+    GenTree* originalNode = use.Def();
+
+    // Otherwise, we need to force var = call()
+    unsigned varNum                              = use.ReplaceWithLclVar(m_compiler, m_blockWeight);
+    m_compiler->lvaTable[varNum].lvIsMultiRegRet = true;
+
+    // Decompose the new LclVar use
+    return DecomposeLclVar(use);
+}
+
+//------------------------------------------------------------------------
+// DecomposeStoreInd: Decompose GT_STOREIND.
+//
+// Arguments:
+//    use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+//    The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeStoreInd(LIR::Use& use)
+{
+    assert(use.IsInitialized());
+    assert(use.Def()->OperGet() == GT_STOREIND);
+
+    GenTree* tree = use.Def();
+
+    assert(tree->gtOp.gtOp2->OperGet() == GT_LONG);
+
+    // Example input (address expression omitted):
+    //
+    //  t51 = const     int    0x37C05E7D
+    // t154 = const     int    0x2A0A3C80
+    //      / --*  t51    int
+    //      + --*  t154   int
+    // t155 = *gt_long   long
+    //      / --*  t52    byref
+    //      + --*  t155   long
+    //      *  storeIndir long
+
+    GenTree* gtLong      = tree->gtOp.gtOp2;
+
+    // Save address to a temp. It is used in storeIndLow and storeIndHigh trees.
+    LIR::Use address(Range(), &tree->gtOp.gtOp1, tree);
+    address.ReplaceWithLclVar(m_compiler, m_blockWeight);
+    JITDUMP("[DecomposeStoreInd]: Saving address tree to a temp var:\n");
+    DISPTREERANGE(Range(), address.Def());
+
+    if (!gtLong->gtOp.gtOp1->OperIsLeaf())
+    {
+        LIR::Use op1(Range(), &gtLong->gtOp.gtOp1, gtLong);
+        op1.ReplaceWithLclVar(m_compiler, m_blockWeight);
+        JITDUMP("[DecomposeStoreInd]: Saving low data tree to a temp var:\n");
+        DISPTREERANGE(Range(), op1.Def());
+    }
+
+    if (!gtLong->gtOp.gtOp2->OperIsLeaf())
+    {
+        LIR::Use op2(Range(), &gtLong->gtOp.gtOp2, gtLong);
+        op2.ReplaceWithLclVar(m_compiler, m_blockWeight);
+        JITDUMP("[DecomposeStoreInd]: Saving high data tree to a temp var:\n");
+        DISPTREERANGE(Range(), op2.Def());
+    }
+
+    GenTree* addrBase    = tree->gtOp.gtOp1;
+    GenTree* dataHigh    = gtLong->gtOp.gtOp2;
+    GenTree* dataLow     = gtLong->gtOp.gtOp1;
+    GenTree* storeIndLow = tree;
+
+    Range().Remove(gtLong);
+    Range().Remove(dataHigh);
+    storeIndLow->gtOp.gtOp2 = dataLow;
+    storeIndLow->gtType     = TYP_INT;
+
+    GenTree* addrBaseHigh = new (m_compiler, GT_LCL_VAR)
+        GenTreeLclVar(GT_LCL_VAR, addrBase->TypeGet(), addrBase->AsLclVarCommon()->GetLclNum(), BAD_IL_OFFSET);
+    GenTree* addrHigh =
+        new (m_compiler, GT_LEA) GenTreeAddrMode(TYP_REF, addrBaseHigh, nullptr, 0, genTypeSize(TYP_INT));
+    GenTree* storeIndHigh = new (m_compiler, GT_STOREIND) GenTreeStoreInd(TYP_INT, addrHigh, dataHigh);
+    storeIndHigh->gtFlags = (storeIndLow->gtFlags & (GTF_ALL_EFFECT | GTF_LIVENESS_MASK));
+    storeIndHigh->gtFlags |= GTF_REVERSE_OPS;
+
+    m_compiler->lvaIncRefCnts(addrBaseHigh);
+
+    Range().InsertAfter(storeIndLow, dataHigh, addrBaseHigh, addrHigh, storeIndHigh);
+
+    return storeIndHigh;
+
+    // Example final output:
+    //
+    //      /--*  t52    byref
+    //      *  st.lclVar byref  V07 rat0
+    // t158 = lclVar    byref  V07 rat0
+    //  t51 = const     int    0x37C05E7D
+    //      /--*  t158   byref
+    //      +--*  t51    int
+    //      *  storeIndir int
+    // t154 = const     int    0x2A0A3C80
+    // t159 = lclVar    byref  V07 rat0
+    //        /--*  t159   byref
+    // t160 = *  lea(b + 4)  ref
+    //      /--*  t154   int
+    //      +--*  t160   ref
+    //      *  storeIndir int
+}
+
+//------------------------------------------------------------------------
+// DecomposeInd: Decompose GT_IND.
+//
+// Arguments:
+//    use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+//    The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeInd(LIR::Use& use)
+{
+    GenTree* indLow = use.Def();
+
+    LIR::Use address(Range(), &indLow->gtOp.gtOp1, indLow);
+    address.ReplaceWithLclVar(m_compiler, m_blockWeight);
+    JITDUMP("[DecomposeInd]: Saving addr tree to a temp var:\n");
+    DISPTREERANGE(Range(), address.Def());
+
+    // Change the type of lower ind.
+    indLow->gtType = TYP_INT;
+
+    // Create tree of ind(addr+4)
+    GenTreePtr addrBase     = indLow->gtGetOp1();
+    GenTreePtr addrBaseHigh = new (m_compiler, GT_LCL_VAR)
+        GenTreeLclVar(GT_LCL_VAR, addrBase->TypeGet(), addrBase->AsLclVarCommon()->GetLclNum(), BAD_IL_OFFSET);
+    GenTreePtr addrHigh =
+        new (m_compiler, GT_LEA) GenTreeAddrMode(TYP_REF, addrBaseHigh, nullptr, 0, genTypeSize(TYP_INT));
+    GenTreePtr indHigh = new (m_compiler, GT_IND) GenTreeIndir(GT_IND, TYP_INT, addrHigh, nullptr);
+
+    m_compiler->lvaIncRefCnts(addrBaseHigh);
+
+    Range().InsertAfter(indLow, addrBaseHigh, addrHigh, indHigh);
+
+    return FinalizeDecomposition(use, indLow, indHigh);
+}
+
+//------------------------------------------------------------------------
+// DecomposeNot: Decompose GT_NOT.
+//
+// Arguments:
+//    use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+//    The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeNot(LIR::Use& use)
+{
+    assert(use.IsInitialized());
+    assert(use.Def()->OperGet() == GT_NOT);
+
+    GenTree* tree   = use.Def();
+    GenTree* gtLong = tree->gtGetOp1();
+    noway_assert(gtLong->OperGet() == GT_LONG);
+    GenTree* loOp1 = gtLong->gtGetOp1();
+    GenTree* hiOp1 = gtLong->gtGetOp2();
+
+    Range().Remove(gtLong);
+
+    GenTree* loResult    = tree;
+    loResult->gtType     = TYP_INT;
+    loResult->gtOp.gtOp1 = loOp1;
+
+    GenTree* hiResult = new (m_compiler, GT_NOT) GenTreeOp(GT_NOT, TYP_INT, hiOp1, nullptr);
+    Range().InsertAfter(loResult, hiResult);
+
+    return FinalizeDecomposition(use, loResult, hiResult);
+}
+
+//------------------------------------------------------------------------
+// DecomposeNeg: Decompose GT_NEG.
+//
+// Arguments:
+//    use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+//    The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeNeg(LIR::Use& use)
+{
+    assert(use.IsInitialized());
+    assert(use.Def()->OperGet() == GT_NEG);
+
+    GenTree* tree   = use.Def();
+    GenTree* gtLong = tree->gtGetOp1();
+    noway_assert(gtLong->OperGet() == GT_LONG);
+
+    LIR::Use op1(Range(), &gtLong->gtOp.gtOp1, gtLong);
+    op1.ReplaceWithLclVar(m_compiler, m_blockWeight);
+
+    LIR::Use op2(Range(), &gtLong->gtOp.gtOp2, gtLong);
+    op2.ReplaceWithLclVar(m_compiler, m_blockWeight);
+
+    // Neither GT_NEG nor the introduced temporaries have side effects.
+    tree->gtFlags &= ~GTF_ALL_EFFECT;
+    GenTree* loOp1 = gtLong->gtGetOp1();
+    GenTree* hiOp1 = gtLong->gtGetOp2();
+
+    Range().Remove(gtLong);
+
+    GenTree* loResult    = tree;
+    loResult->gtType     = TYP_INT;
+    loResult->gtOp.gtOp1 = loOp1;
+
+    GenTree* zero     = m_compiler->gtNewZeroConNode(TYP_INT);
+    GenTree* hiAdjust = m_compiler->gtNewOperNode(GT_ADD_HI, TYP_INT, hiOp1, zero);
+    GenTree* hiResult = m_compiler->gtNewOperNode(GT_NEG, TYP_INT, hiAdjust);
+    hiResult->gtFlags = tree->gtFlags;
+
+    Range().InsertAfter(loResult, zero, hiAdjust, hiResult);
+
+    return FinalizeDecomposition(use, loResult, hiResult);
+}
+
+//------------------------------------------------------------------------
+// DecomposeArith: Decompose GT_ADD, GT_SUB, GT_OR, GT_XOR, GT_AND.
+//
+// Arguments:
+//    use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+//    The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeArith(LIR::Use& use)
+{
+    assert(use.IsInitialized());
+
+    GenTree*   tree = use.Def();
+    genTreeOps oper = tree->OperGet();
+
+    assert((oper == GT_ADD) || (oper == GT_SUB) || (oper == GT_OR) || (oper == GT_XOR) || (oper == GT_AND));
+
+    GenTree* op1 = tree->gtGetOp1();
+    GenTree* op2 = tree->gtGetOp2();
+
+    // Both operands must have already been decomposed into GT_LONG operators.
+    noway_assert((op1->OperGet() == GT_LONG) && (op2->OperGet() == GT_LONG));
+
+    // Capture the lo and hi halves of op1 and op2.
+    GenTree* loOp1 = op1->gtGetOp1();
+    GenTree* hiOp1 = op1->gtGetOp2();
+    GenTree* loOp2 = op2->gtGetOp1();
+    GenTree* hiOp2 = op2->gtGetOp2();
+
+    // Now, remove op1 and op2 from the node list.
+    Range().Remove(op1);
+    Range().Remove(op2);
+
+    // We will reuse "tree" for the loResult, which will now be of TYP_INT, and its operands
+    // will be the lo halves of op1 from above.
+    GenTree* loResult = tree;
+    loResult->SetOper(GetLoOper(oper));
+    loResult->gtType     = TYP_INT;
+    loResult->gtOp.gtOp1 = loOp1;
+    loResult->gtOp.gtOp2 = loOp2;
+
+    GenTree* hiResult = new (m_compiler, oper) GenTreeOp(GetHiOper(oper), TYP_INT, hiOp1, hiOp2);
+    Range().InsertAfter(loResult, hiResult);
+
+    if ((oper == GT_ADD) || (oper == GT_SUB))
+    {
+        if (loResult->gtOverflow())
+        {
+            hiResult->gtFlags |= GTF_OVERFLOW;
+            loResult->gtFlags &= ~GTF_OVERFLOW;
+        }
+        if (loResult->gtFlags & GTF_UNSIGNED)
+        {
+            hiResult->gtFlags |= GTF_UNSIGNED;
+        }
+    }
+
+    return FinalizeDecomposition(use, loResult, hiResult);
+}
+
+//------------------------------------------------------------------------
+// DecomposeShift: Decompose GT_LSH, GT_RSH, GT_RSZ. For shift nodes, we need to use
+// the shift helper functions, so we here convert the shift into a helper call by
+// pulling its arguments out of linear order and making them the args to a call, then
+// replacing the original node with the new call.
+//
+// Arguments:
+//    use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+//    The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeShift(LIR::Use& use)
+{
+    assert(use.IsInitialized());
+
+    GenTree* tree   = use.Def();
+    GenTree* gtLong = tree->gtGetOp1();
+    genTreeOps oper = tree->OperGet();
+
+    assert((oper == GT_LSH) || (oper == GT_RSH) || (oper == GT_RSZ));
+
+    LIR::Use loOp1Use(Range(), &gtLong->gtOp.gtOp1, gtLong);
+    loOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight);
+
+    LIR::Use hiOp1Use(Range(), &gtLong->gtOp.gtOp2, gtLong);
+    hiOp1Use.ReplaceWithLclVar(m_compiler, m_blockWeight);
+
+    LIR::Use shiftWidthUse(Range(), &tree->gtOp.gtOp2, tree);
+    shiftWidthUse.ReplaceWithLclVar(m_compiler, m_blockWeight);
+
+    GenTree* loOp1 = gtLong->gtGetOp1();
+    GenTree* hiOp1 = gtLong->gtGetOp2();
+
+    GenTree* shiftWidthOp = tree->gtGetOp2();
+
+    Range().Remove(gtLong);
+    Range().Remove(loOp1);
+    Range().Remove(hiOp1);
+
+    Range().Remove(shiftWidthOp);
+
+    // TODO-X86-CQ: If the shift operand is a GT_CNS_INT, we should pipe the instructions through to codegen
+    // and generate the shift instructions ourselves there, rather than replacing it with a helper call.
+
+    unsigned helper;
+
+    switch (oper)
+    {
+        case GT_LSH:
+            helper = CORINFO_HELP_LLSH;
+            break;
+        case GT_RSH:
+            helper = CORINFO_HELP_LRSH;
+            break;
+        case GT_RSZ:
+            helper = CORINFO_HELP_LRSZ;
+            break;
+        default:
+            unreached();
+    }
+
+    GenTreeArgList* argList = m_compiler->gtNewArgList(loOp1, hiOp1, shiftWidthOp);
+
+    GenTree* call = m_compiler->gtNewHelperCallNode(helper, TYP_LONG, 0, argList);
+
+    GenTreeCall*    callNode    = call->AsCall();
+    ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc();
+    retTypeDesc->InitializeLongReturnType(m_compiler);
+
+    call = m_compiler->fgMorphArgs(callNode);
+    Range().InsertAfter(tree, LIR::SeqTree(m_compiler, call));
+    
+    Range().Remove(tree);
+    use.ReplaceWith(m_compiler, call);
+    return call;
+}
+
+//------------------------------------------------------------------------
+// GetHiOper: Convert arithmetic operator to "high half" operator of decomposed node.
+//
+// Arguments:
+//    oper - operator to map
+//
+// Return Value:
+//    mapped operator
+//
+// static
+genTreeOps DecomposeLongs::GetHiOper(genTreeOps oper)
+{
+    switch (oper)
+    {
+        case GT_ADD:
+            return GT_ADD_HI;
+            break;
+        case GT_SUB:
+            return GT_SUB_HI;
+            break;
+        case GT_MUL:
+            return GT_MUL_HI;
+            break;
+        case GT_DIV:
+            return GT_DIV_HI;
+            break;
+        case GT_MOD:
+            return GT_MOD_HI;
+            break;
+        case GT_OR:
+            return GT_OR;
+            break;
+        case GT_AND:
+            return GT_AND;
+            break;
+        case GT_XOR:
+            return GT_XOR;
+            break;
+        default:
+            assert(!"GetHiOper called for invalid oper");
+            return GT_NONE;
+    }
+}
+
+//------------------------------------------------------------------------
+// GetLoOper: Convert arithmetic operator to "low half" operator of decomposed node.
+//
+// Arguments:
+//    oper - operator to map
+//
+// Return Value:
+//    mapped operator
+//
+// static
+genTreeOps DecomposeLongs::GetLoOper(genTreeOps oper)
+{
+    switch (oper)
+    {
+        case GT_ADD:
+            return GT_ADD_LO;
+            break;
+        case GT_SUB:
+            return GT_SUB_LO;
+            break;
+        case GT_OR:
+            return GT_OR;
+            break;
+        case GT_AND:
+            return GT_AND;
+            break;
+        case GT_XOR:
+            return GT_XOR;
+            break;
+        default:
+            assert(!"GetLoOper called for invalid oper");
+            return GT_NONE;
+    }
+}
+
+#endif // !_TARGET_64BIT_
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/decomposelongs.h b/src/jit/decomposelongs.h
new file mode 100644
index 0000000000..af9b342fb2
--- /dev/null
+++ b/src/jit/decomposelongs.h
@@ -0,0 +1,67 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                               DecomposeLongs                              XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#ifndef _DECOMPOSELONGS_H_
+#define _DECOMPOSELONGS_H_
+
+#include "compiler.h"
+
+class DecomposeLongs
+{
+public:
+    DecomposeLongs(Compiler* compiler) : m_compiler(compiler)
+    {
+    }
+
+    void PrepareForDecomposition();
+    void DecomposeBlock(BasicBlock* block);
+
+    static void DecomposeRange(Compiler* compiler, unsigned blockWeight, LIR::Range& range);
+
+private:
+    inline LIR::Range& Range() const
+    {
+        return *m_range;
+    }
+
+    // Driver functions
+    void DecomposeRangeHelper();
+    GenTree* DecomposeNode(LIR::Use& use);
+
+    // Per-node type decompose cases
+    GenTree* DecomposeLclVar(LIR::Use& use);
+    GenTree* DecomposeLclFld(LIR::Use& use);
+    GenTree* DecomposeStoreLclVar(LIR::Use& use);
+    GenTree* DecomposeCast(LIR::Use& use);
+    GenTree* DecomposeCnsLng(LIR::Use& use);
+    GenTree* DecomposeCall(LIR::Use& use);
+    GenTree* DecomposeInd(LIR::Use& use);
+    GenTree* DecomposeStoreInd(LIR::Use& use);
+    GenTree* DecomposeNot(LIR::Use& use);
+    GenTree* DecomposeNeg(LIR::Use& use);
+    GenTree* DecomposeArith(LIR::Use& use);
+    GenTree* DecomposeShift(LIR::Use& use);
+
+    // Helper functions
+    GenTree* FinalizeDecomposition(LIR::Use& use, GenTree* loResult, GenTree* hiResult);
+
+    static genTreeOps GetHiOper(genTreeOps oper);
+    static genTreeOps GetLoOper(genTreeOps oper);
+
+    // Data
+    Compiler*   m_compiler;
+    unsigned    m_blockWeight;
+    LIR::Range* m_range;
+};
+
+#endif // _DECOMPOSELONGS_H_
diff --git a/src/jit/delayload.cpp b/src/jit/delayload.cpp
new file mode 100644
index 0000000000..895a13a6bf
--- /dev/null
+++ b/src/jit/delayload.cpp
@@ -0,0 +1,10 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+#pragma hdrstop
+
+#include "shimload.h"
+
+ExternC PfnDliHook __pfnDliNotifyHook = ShimDelayLoadHook;
diff --git a/src/jit/disasm.cpp b/src/jit/disasm.cpp
new file mode 100644
index 0000000000..925f2c3343
--- /dev/null
+++ b/src/jit/disasm.cpp
@@ -0,0 +1,1568 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+/***********************************************************************
+*
+* File: dis.cpp
+*
+
+*
+* File Comments:
+*
+*  This file handles disassembly. It is adapted from the MS linker.
+*
+***********************************************************************/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+/*****************************************************************************/
+#ifdef LATE_DISASM
+/*****************************************************************************/
+
+// Define DISASM_DEBUG to get verbose output of late disassembler inner workings.
+//#define DISASM_DEBUG
+#ifdef DISASM_DEBUG
+#ifdef DEBUG
+#define DISASM_DUMP(...)                                                                                               \
+    if (VERBOSE)                                                                                                       \
+    printf(__VA_ARGS__)
+#else // !DEBUG
+#define DISASM_DUMP(...) printf(__VA_ARGS__)
+#endif // !DEBUG
+#else  // !DISASM_DEBUG
+#define DISASM_DUMP(...)
+#endif // !DISASM_DEBUG
+
+/*****************************************************************************/
+
+#define MAX_CLASSNAME_LENGTH 1024
+
+#if defined(_AMD64_)
+
+#pragma comment(linker,                                                                                                \
+                "/ALTERNATENAME:__imp_?CchFormatAddr@DIS@@QEBA_K_KPEAG0@Z=__imp_?CchFormatAddr@DIS@@QEBA_K_KPEA_W0@Z")
+#pragma comment(linker,                                                                                                \
+                "/ALTERNATENAME:__imp_?CchFormatInstr@DIS@@QEBA_KPEAG_K@Z=__imp_?CchFormatInstr@DIS@@QEBA_KPEA_W_K@Z")
+#pragma comment(                                                                                                       \
+    linker,                                                                                                            \
+    "/ALTERNATENAME:__imp_?PfncchaddrSet@DIS@@QEAAP6A_KPEBV1@_KPEAG1PEA_K@ZP6A_K01213@Z@Z=__imp_?PfncchaddrSet@DIS@@QEAAP6A_KPEBV1@_KPEA_W1PEA_K@ZP6A_K01213@Z@Z")
+#pragma comment(                                                                                                       \
+    linker,                                                                                                            \
+    "/ALTERNATENAME:__imp_?PfncchregSet@DIS@@QEAAP6A_KPEBV1@W4REGA@1@PEAG_K@ZP6A_K0123@Z@Z=__imp_?PfncchregSet@DIS@@QEAAP6A_KPEBV1@W4REGA@1@PEA_W_K@ZP6A_K0123@Z@Z")
+#pragma comment(                                                                                                       \
+    linker,                                                                                                            \
+    "/ALTERNATENAME:__imp_?PfncchregrelSet@DIS@@QEAAP6A_KPEBV1@W4REGA@1@KPEAG_KPEAK@ZP6A_K01K234@Z@Z=__imp_?PfncchregrelSet@DIS@@QEAAP6A_KPEBV1@W4REGA@1@KPEA_W_KPEAK@ZP6A_K01K234@Z@Z")
+#pragma comment(                                                                                                       \
+    linker,                                                                                                            \
+    "/ALTERNATENAME:__imp_?PfncchfixupSet@DIS@@QEAAP6A_KPEBV1@_K1PEAG1PEA_K@ZP6A_K011213@Z@Z=__imp_?PfncchfixupSet@DIS@@QEAAP6A_KPEBV1@_K1PEA_W1PEA_K@ZP6A_K011213@Z@Z")
+
+#elif defined(_X86_)
+
+#pragma comment(linker, "/ALTERNATENAME:__imp_?CchFormatAddr@DIS@@QBEI_KPAGI@Z=__imp_?CchFormatAddr@DIS@@QBEI_KPA_WI@Z")
+#pragma comment(linker, "/ALTERNATENAME:__imp_?CchFormatInstr@DIS@@QBEIPAGI@Z=__imp_?CchFormatInstr@DIS@@QBEIPA_WI@Z")
+#pragma comment(                                                                                                       \
+    linker,                                                                                                            \
+    "/ALTERNATENAME:__imp_?PfncchaddrSet@DIS@@QAEP6GIPBV1@_KPAGIPA_K@ZP6GI012I3@Z@Z=__imp_?PfncchaddrSet@DIS@@QAEP6GIPBV1@_KPA_WIPA_K@ZP6GI012I3@Z@Z")
+#pragma comment(                                                                                                       \
+    linker,                                                                                                            \
+    "/ALTERNATENAME:__imp_?PfncchregSet@DIS@@QAEP6GIPBV1@W4REGA@1@PAGI@ZP6GI012I@Z@Z=__imp_?PfncchregSet@DIS@@QAEP6GIPBV1@W4REGA@1@PA_WI@ZP6GI012I@Z@Z")
+#pragma comment(                                                                                                       \
+    linker,                                                                                                            \
+    "/ALTERNATENAME:__imp_?PfncchregrelSet@DIS@@QAEP6GIPBV1@W4REGA@1@KPAGIPAK@ZP6GI01K2I3@Z@Z=__imp_?PfncchregrelSet@DIS@@QAEP6GIPBV1@W4REGA@1@KPA_WIPAK@ZP6GI01K2I3@Z@Z")
+#pragma comment(                                                                                                       \
+    linker,                                                                                                            \
+    "/ALTERNATENAME:__imp_?PfncchfixupSet@DIS@@QAEP6GIPBV1@_KIPAGIPA_K@ZP6GI01I2I3@Z@Z=__imp_?PfncchfixupSet@DIS@@QAEP6GIPBV1@_KIPA_WIPA_K@ZP6GI01I2I3@Z@Z")
+
+#endif
+
+/*****************************************************************************
+ * Given an absolute address from the beginning of the code
+ * find the corresponding emitter block and the relative offset
+ * of the current address in that block
+ * Was used to get to the fixup list of each block. The new emitter has
+ * no such fixups. Something needs to be added for this.
+ */
+
+// These structs were defined in emit.h. Fake them here so DisAsm.cpp can compile
+
+typedef struct codeFix
+{
+    codeFix* cfNext;
+    unsigned cfFixup;
+} * codeFixPtr;
+
+typedef struct codeBlk
+{
+    codeFix* cbFixupLst;
+} * codeBlkPtr;
+
+/*****************************************************************************
+ * The following is the callback for jump label and direct function calls fixups.
+ * "addr" represents the address of jump that has to be
+ * replaced with a label or function name.
+ *
+ * Return 1 if a name was written representing the address, 0 otherwise.
+ */
+
+/* static */
+size_t __stdcall DisAssembler::disCchAddr(
+    const DIS* pdis, DIS::ADDR addr, __in_ecount(cchMax) wchar_t* wz, size_t cchMax, DWORDLONG* pdwDisp)
+{
+    DisAssembler* pDisAsm = (DisAssembler*)pdis->PvClient();
+    assert(pDisAsm);
+    return pDisAsm->disCchAddrMember(pdis, addr, wz, cchMax, pdwDisp);
+}
+
+size_t DisAssembler::disCchAddrMember(
+    const DIS* pdis, DIS::ADDR addr, __in_ecount(cchMax) wchar_t* wz, size_t cchMax, DWORDLONG* pdwDisp)
+{
+    /* First check the termination type of the instruction
+     * because this might be a helper or static function call
+     * check to see if we have a fixup for the current address */
+
+    size_t retval = 0; // assume we don't know
+
+#if defined(_TARGET_XARCH_)
+
+    DISX86::TRMTA terminationType = DISX86::TRMTA(pdis->Trmta());
+
+    DISASM_DUMP("AddrMember %p (%p), termType %u\n", addr, disGetLinearAddr((size_t)addr), terminationType);
+
+    switch (terminationType)
+    {
+        // int disCallSize;
+
+        case DISX86::trmtaJmpShort:
+        case DISX86::trmtaJmpCcShort:
+
+            /* We have a short jump in the current code block - generate the label to which we jump */
+
+            assert(0 <= disTarget && disTarget < disTotalCodeSize);
+            swprintf_s(wz, cchMax, W("short L_%02u"), disLabels[disTarget]);
+            retval = 1;
+            break;
+
+        case DISX86::trmtaJmpNear:
+        case DISX86::trmtaJmpCcNear:
+
+            /* We have a near jump. Check if is in the current code block.
+             * Otherwise we have no target for it. */
+
+            if (0 <= disTarget && disTarget < disTotalCodeSize)
+            {
+                swprintf_s(wz, cchMax, W("L_%02u"), disLabels[disTarget]);
+                retval = 1;
+            }
+            break;
+
+        case DISX86::trmtaCallNear16:
+        case DISX86::trmtaCallNear32:
+
+            /* check for local calls (i.e. CALL label) */
+
+            if (0 <= disTarget && disTarget < disTotalCodeSize)
+            {
+                /* not a "call ds:[0000]" - go ahead */
+                /* disTarget within block boundary -> local call */
+
+                swprintf_s(wz, cchMax, W("short L_%02u"), disLabels[disTarget]);
+                retval = 1;
+                break;
+            }
+
+            /* this is a near call - in our case usually VM helper functions */
+
+            /* find the emitter block and the offset of the call fixup */
+            /* for the fixup offset we have to add the opcode size for the call - in the case of a near call is 1 */
+
+            // disCallSize = 1;
+
+            {
+                size_t      absoluteTarget = (size_t)disGetLinearAddr(disTarget);
+                const char* name           = disGetMethodFullName(absoluteTarget);
+                if (name != nullptr)
+                {
+                    swprintf_s(wz, cchMax, W("%p %S"), dspAddr(absoluteTarget), name);
+                    retval = 1;
+                    break;
+                }
+            }
+
+            break;
+
+#ifdef _TARGET_AMD64_
+
+        case DISX86::trmtaFallThrough:
+
+            /* memory indirect case. Could be for an LEA for the base address of a switch table, which is an arbitrary
+             * address, currently of the first block after the prolog. */
+
+            /* find the emitter block and the offset for the fixup
+             * "addr" is the address of the immediate */
+
+            break;
+
+#endif // _TARGET_AMD64_
+
+        default:
+
+            printf("Termination type is %d\n", (int)terminationType);
+            assert(!"treat this case\n");
+            break;
+    }
+
+#elif defined(_TARGET_ARM64_)
+
+    DISARM64::TRMTA terminationType = DISARM64::TRMTA(pdis->Trmta());
+
+    DISASM_DUMP("AddrMember %p (%p), termType %u\n", addr, disGetLinearAddr((size_t)addr), terminationType);
+
+    switch (terminationType)
+    {
+        // int disCallSize;
+
+        case DISARM64::TRMTA::trmtaBra:
+        case DISARM64::TRMTA::trmtaBraCase:
+        case DISARM64::TRMTA::trmtaBraCc:
+        case DISARM64::TRMTA::trmtaBraCcCase:
+        case DISARM64::TRMTA::trmtaBraCcInd:
+        case DISARM64::TRMTA::trmtaBraInd:
+
+            /* We have a jump. Check if is in the current code block.
+             * Otherwise we have no target for it. */
+
+            if (0 <= disTarget && disTarget < disTotalCodeSize)
+            {
+                swprintf_s(wz, cchMax, W("L_%02u"), disLabels[disTarget]);
+                retval = 1;
+            }
+            break;
+
+        case DISARM64::trmtaCall:
+        case DISARM64::trmtaCallCc:
+        case DISARM64::trmtaCallCcInd:
+        case DISARM64::trmtaCallInd:
+
+            /* check for local calls (i.e. CALL label) */
+
+            if (0 <= disTarget && disTarget < disTotalCodeSize)
+            {
+                /* not a "call [0000]" - go ahead */
+                /* disTarget within block boundary -> local call */
+
+                swprintf_s(wz, cchMax, W("L_%02u"), disLabels[disTarget]);
+                retval = 1;
+                break;
+            }
+
+            /* this is a near call - in our case usually VM helper functions */
+
+            /* find the emitter block and the offset of the call fixup */
+            /* for the fixup offset we have to add the opcode size for the call - in the case of a near call is 1 */
+
+            // disCallSize = 1;
+
+            {
+                size_t      absoluteTarget = (size_t)disGetLinearAddr(disTarget);
+                const char* name           = disGetMethodFullName(absoluteTarget);
+                if (name != nullptr)
+                {
+                    swprintf_s(wz, cchMax, W("%p %S"), dspAddr(absoluteTarget), name);
+                    retval = 1;
+                    break;
+                }
+            }
+
+            break;
+
+        case DISARM64::trmtaFallThrough:
+
+            /* memory indirect case. Could be for an LEA for the base address of a switch table, which is an arbitrary
+             * address, currently of the first block after the prolog. */
+
+            /* find the emitter block and the offset for the fixup
+             * "addr" is the address of the immediate */
+
+            {
+                DIS::INSTRUCTION instr;
+                DIS::OPERAND     ops[DISARM64::coperandMax];
+                bool             ok = pdis->FDecode(&instr, ops, ArrLen(ops));
+                if (ok)
+                {
+                    bool isAddress = false;
+                    switch ((DISARM64::OPA)instr.opa)
+                    {
+                        case DISARM64::opaAdr:
+                        case DISARM64::opaAdrp:
+                            isAddress = true;
+                            break;
+                        default:
+                            break;
+                    }
+
+                    if (isAddress && 0 <= addr && addr < disTotalCodeSize)
+                    {
+                        swprintf_s(wz, cchMax, W("L_%02u"), disLabels[addr]);
+                        retval = 1;
+                    }
+                }
+            }
+            break;
+
+        default:
+
+            printf("Termination type is %d\n", (int)terminationType);
+            assert(!"treat this case\n");
+            break;
+    }
+
+#else // _TARGET_*
+#error Unsupported or unset target architecture
+#endif // _TARGET_*
+
+    if (retval == 0)
+    {
+        if (disDiffable)
+        {
+            swprintf_s(wz, cchMax, W("%p"), dspAddr((void*)1));
+        }
+    }
+    else
+    {
+        /* no displacement */
+
+        *pdwDisp = 0x0;
+    }
+
+    return retval;
+}
+
+/*****************************************************************************
+ * We annotate some instructions to get info needed to display the symbols
+ * for that instruction.
+ *
+ * Return 1 if a name was written representing the address, 0 otherwise.
+ */
+
+/* static */
+size_t __stdcall DisAssembler::disCchFixup(
+    const DIS* pdis, DIS::ADDR addr, size_t size, __in_ecount(cchMax) wchar_t* wz, size_t cchMax, DWORDLONG* pdwDisp)
+{
+    DisAssembler* pDisAsm = (DisAssembler*)pdis->PvClient();
+    assert(pDisAsm);
+
+    return pDisAsm->disCchFixupMember(pdis, addr, size, wz, cchMax, pdwDisp);
+}
+
+size_t DisAssembler::disCchFixupMember(
+    const DIS* pdis, DIS::ADDR addr, size_t size, __in_ecount(cchMax) wchar_t* wz, size_t cchMax, DWORDLONG* pdwDisp)
+{
+#if defined(_TARGET_XARCH_)
+
+    DISX86::TRMTA terminationType = DISX86::TRMTA(pdis->Trmta());
+    // DIS::ADDR disIndAddr;
+
+    DISASM_DUMP("FixupMember %016I64X (%08IX), size %d, termType %u\n", addr, disGetLinearAddr((size_t)addr), size,
+                terminationType);
+
+    // Is there a relocation registered for the address?
+
+    size_t absoluteAddr = (size_t)disGetLinearAddr((size_t)addr);
+    size_t targetAddr;
+    bool   anyReloc = GetRelocationMap()->Lookup(absoluteAddr, &targetAddr);
+
+    switch (terminationType)
+    {
+        DIS::ADDR disCallSize;
+
+        case DISX86::trmtaFallThrough:
+
+            /* memory indirect case */
+
+            assert(addr > pdis->Addr());
+
+            /* find the emitter block and the offset for the fixup
+             * "addr" is the address of the immediate */
+
+            if (anyReloc)
+            {
+                // Make instructions like "mov rcx, 7FE8247A638h" diffable.
+                swprintf_s(wz, cchMax, W("%IXh"), dspAddr(targetAddr));
+                break;
+            }
+
+            return 0;
+
+        case DISX86::trmtaJmpInd:
+
+            /* pretty rare case - something like "jmp [eax*4]"
+             * not a function call or anything worth annotating */
+
+            return 0;
+
+        case DISX86::trmtaTrap:
+        case DISX86::trmtaTrapCc:
+
+            /* some instructions like division have a TRAP termination type - ignore it */
+
+            return 0;
+
+        case DISX86::trmtaJmpShort:
+        case DISX86::trmtaJmpCcShort:
+
+        case DISX86::trmtaJmpNear:
+        case DISX86::trmtaJmpCcNear:
+
+            /* these are treated by the CchAddr callback - skip them */
+
+            return 0;
+
+        case DISX86::trmtaCallNear16:
+        case DISX86::trmtaCallNear32:
+
+            if (anyReloc)
+            {
+                const char* name = disGetMethodFullName(targetAddr);
+                if (name != nullptr)
+                {
+                    swprintf_s(wz, cchMax, W("%p %S"), dspAddr(targetAddr), name);
+                    break;
+                }
+            }
+
+            /* these are treated by the CchAddr callback - skip them */
+
+            return 0;
+
+        case DISX86::trmtaCallInd:
+
+            /* here we have an indirect call - find the indirect address */
+
+            // BYTE * code = disGetLinearAddr((size_t)addr);
+            // disIndAddr = (DIS::ADDR) (code+0);
+
+            /* find the size of the call opcode - less the immediate */
+            /* for the fixup offset we have to add the opcode size for the call */
+            /* addr is the address of the immediate, pdis->Addr() returns the address of the disassembled instruction */
+
+            assert(addr > pdis->Addr());
+            disCallSize = addr - pdis->Addr();
+
+            /* find the emitter block and the offset of the call fixup */
+
+            return 0;
+
+        default:
+
+            printf("Termination type is %d\n", (int)terminationType);
+            assert(!"treat this case\n");
+            break;
+    }
+
+#elif defined(_TARGET_ARM64_)
+
+    DISARM64::TRMTA terminationType = DISARM64::TRMTA(pdis->Trmta());
+    // DIS::ADDR disIndAddr;
+
+    DISASM_DUMP("FixupMember %016I64X (%08IX), size %d, termType %u\n", addr, disGetLinearAddr((size_t)addr), size,
+                terminationType);
+
+    // Is there a relocation registered for the address?
+
+    size_t absoluteAddr = (size_t)disGetLinearAddr((size_t)addr);
+    size_t targetAddr;
+    bool   anyReloc = GetRelocationMap()->Lookup(absoluteAddr, &targetAddr);
+
+    switch (terminationType)
+    {
+        DIS::ADDR disCallSize;
+
+        case DISARM64::TRMTA::trmtaUnknown:
+            return 0;
+
+        case DISARM64::TRMTA::trmtaFallThrough:
+
+            if (anyReloc)
+            {
+                /* memory indirect case */
+
+                assert(addr > pdis->Addr());
+
+                /* find the emitter block and the offset for the fixup
+                 * "addr" is the address of the immediate */
+
+                // Make instructions like "mov rcx, 7FE8247A638h" diffable.
+                swprintf_s(wz, cchMax, W("%IXh"), dspAddr(targetAddr));
+                break;
+            }
+
+            return 0;
+
+        case DISARM64::TRMTA::trmtaBraInd:
+        case DISARM64::TRMTA::trmtaBraCcInd:
+
+            /* pretty rare case - something like "jmp [eax*4]"
+             * not a function call or anything worth annotating */
+
+            return 0;
+
+        case DISARM64::TRMTA::trmtaTrap:
+        case DISARM64::TRMTA::trmtaTrapCc:
+
+            /* some instructions like division have a TRAP termination type - ignore it */
+
+            return 0;
+
+        case DISARM64::TRMTA::trmtaBra:
+        case DISARM64::TRMTA::trmtaBraCase:
+        case DISARM64::TRMTA::trmtaBraCc:
+        case DISARM64::TRMTA::trmtaBraCcCase:
+
+            /* these are treated by the CchAddr callback - skip them */
+
+            return 0;
+
+        case DISARM64::TRMTA::trmtaCall:
+        case DISARM64::TRMTA::trmtaCallCc:
+
+            if (anyReloc)
+            {
+                const char* name = disGetMethodFullName(targetAddr);
+                if (name != nullptr)
+                {
+                    swprintf_s(wz, cchMax, W("%p %S"), dspAddr(targetAddr), name);
+                    break;
+                }
+            }
+
+            /* these are treated by the CchAddr callback - skip them */
+
+            return 0;
+
+        case DISARM64::TRMTA::trmtaCallInd:
+        case DISARM64::TRMTA::trmtaCallCcInd:
+
+            /* here we have an indirect call - find the indirect address */
+
+            // BYTE * code = disGetLinearAddr((size_t)addr);
+            // disIndAddr = (DIS::ADDR) (code+0);
+
+            /* find the size of the call opcode - less the immediate */
+            /* for the fixup offset we have to add the opcode size for the call */
+            /* addr is the address of the immediate, pdis->Addr() returns the address of the disassembled instruction */
+
+            assert(addr > pdis->Addr());
+            disCallSize = addr - pdis->Addr();
+
+            /* find the emitter block and the offset of the call fixup */
+
+            return 0;
+
+        default:
+
+            printf("Termination type is %d\n", (int)terminationType);
+            assert(!"treat this case\n");
+            break;
+    }
+
+#else // _TARGET_*
+#error Unsupported or unset target architecture
+#endif // _TARGET_*
+
+    /* no displacement */
+
+    *pdwDisp = 0x0;
+
+    return 1;
+}
+
+/*****************************************************************************
+ * This the callback for register-relative operands in an instruction.
+ * If the register is ESP or EBP, the operand may be a local variable
+ * or a parameter, else the operand may be an instance variable
+ *
+ * Return 1 if a name was written representing the register-relative operand, 0 otherwise.
+ */
+
+/* static */
+size_t __stdcall DisAssembler::disCchRegRel(
+    const DIS* pdis, DIS::REGA reg, DWORD disp, __in_ecount(cchMax) wchar_t* wz, size_t cchMax, DWORD* pdwDisp)
+{
+    DisAssembler* pDisAsm = (DisAssembler*)pdis->PvClient();
+    assert(pDisAsm);
+
+    return pDisAsm->disCchRegRelMember(pdis, reg, disp, wz, cchMax, pdwDisp);
+}
+
+size_t DisAssembler::disCchRegRelMember(
+    const DIS* pdis, DIS::REGA reg, DWORD disp, __in_ecount(cchMax) wchar_t* wz, size_t cchMax, DWORD* pdwDisp)
+{
+#if defined(_TARGET_XARCH_)
+
+    DISX86::TRMTA terminationType = DISX86::TRMTA(pdis->Trmta());
+    // DIS::ADDR disIndAddr;
+
+    DISASM_DUMP("RegRelMember reg %u, disp %u, termType %u\n", reg, disp, terminationType);
+
+    switch (terminationType)
+    {
+        int         disOpcodeSize;
+        const char* var;
+
+        case DISX86::trmtaFallThrough:
+
+        /* some instructions like division have a TRAP termination type - ignore it */
+
+        case DISX86::trmtaTrap:
+        case DISX86::trmtaTrapCc:
+
+            var = disComp->codeGen->siStackVarName((size_t)(pdis->Addr() - disStartAddr), pdis->Cb(), reg, disp);
+            if (var)
+            {
+                swprintf_s(wz, cchMax, W("%hs+%Xh '%hs'"), getRegName(reg), disp, var);
+                *pdwDisp = 0;
+
+                return 1;
+            }
+
+            /* This case consists of non-static members */
+
+            /* find the emitter block and the offset for the fixup
+             * fixup is emited after the coding of the instruction - size = word (2 bytes)
+             * GRRRR!!! - for the 16 bit case we have to check for the address size prefix = 0x66
+             */
+
+            if (*disGetLinearAddr(disCurOffset) == 0x66)
+            {
+                disOpcodeSize = 3;
+            }
+            else
+            {
+                disOpcodeSize = 2;
+            }
+
+            return 0;
+
+        case DISX86::trmtaCallNear16:
+        case DISX86::trmtaCallNear32:
+        case DISX86::trmtaJmpInd:
+
+            break;
+
+        case DISX86::trmtaCallInd:
+
+            /* check if this is a one byte displacement */
+
+            if ((signed char)disp == (int)disp)
+            {
+                /* we have a one byte displacement -> there were no previous callbacks */
+
+                /* find the size of the call opcode - less the immediate */
+                /* this is a call R/M indirect -> opcode size is 2 */
+
+                disOpcodeSize = 2;
+
+                /* find the emitter block and the offset of the call fixup */
+
+                return 0;
+            }
+            else
+            {
+                /* check if we already have a symbol name as replacement */
+
+                if (disHasName)
+                {
+                    /* CchFixup has been called before - we have a symbol name saved in global var disFuncTempBuf */
+
+                    swprintf_s(wz, cchMax, W("%hs+%u '%hs'"), getRegName(reg), disp, disFuncTempBuf);
+                    *pdwDisp   = 0;
+                    disHasName = false;
+                    return 1;
+                }
+                else
+                {
+                    return 0;
+                }
+            }
+
+        default:
+
+            printf("Termination type is %d\n", (int)terminationType);
+            assert(!"treat this case\n");
+
+            break;
+    }
+
+#elif defined(_TARGET_ARM64_)
+
+    DISARM64::TRMTA terminationType = DISARM64::TRMTA(pdis->Trmta());
+
+    DISASM_DUMP("RegRelMember reg %u, disp %u, termType %u\n", reg, disp, terminationType);
+
+    switch (terminationType)
+    {
+        int         disOpcodeSize;
+        const char* var;
+
+        case DISARM64::TRMTA::trmtaFallThrough:
+
+        /* some instructions like division have a TRAP termination type - ignore it */
+
+        case DISARM64::TRMTA::trmtaTrap:
+        case DISARM64::TRMTA::trmtaTrapCc:
+
+            var = disComp->codeGen->siStackVarName((size_t)(pdis->Addr() - disStartAddr), pdis->Cb(), reg, disp);
+            if (var)
+            {
+                swprintf_s(wz, cchMax, W("%hs+%Xh '%hs'"), getRegName(reg), disp, var);
+                *pdwDisp = 0;
+
+                return 1;
+            }
+
+            /* This case consists of non-static members */
+
+            // TODO-ARM64-Bug?: Is this correct?
+            disOpcodeSize = 2;
+            return 0;
+
+        case DISARM64::TRMTA::trmtaCall:
+        case DISARM64::TRMTA::trmtaCallCc:
+        case DISARM64::TRMTA::trmtaBraInd:
+        case DISARM64::TRMTA::trmtaBraCcInd:
+            break;
+
+        case DISARM64::TRMTA::trmtaCallInd:
+        case DISARM64::TRMTA::trmtaCallCcInd:
+
+            /* check if this is a one byte displacement */
+
+            if ((signed char)disp == (int)disp)
+            {
+                /* we have a one byte displacement -> there were no previous callbacks */
+
+                /* find the size of the call opcode - less the immediate */
+                /* this is a call R/M indirect -> opcode size is 2 */
+
+                // TODO-ARM64-Bug?: Is this correct?
+                disOpcodeSize = 2;
+
+                /* find the emitter block and the offset of the call fixup */
+
+                return 0;
+            }
+            else
+            {
+                /* check if we already have a symbol name as replacement */
+
+                if (disHasName)
+                {
+                    /* CchFixup has been called before - we have a symbol name saved in global var disFuncTempBuf */
+
+                    swprintf_s(wz, cchMax, W("%hs+%u '%hs'"), getRegName(reg), disp, disFuncTempBuf);
+                    *pdwDisp   = 0;
+                    disHasName = false;
+                    return 1;
+                }
+                else
+                {
+                    return 0;
+                }
+            }
+
+        default:
+
+            printf("Termination type is %d\n", (int)terminationType);
+            assert(!"treat this case\n");
+
+            break;
+    }
+
+#else // _TARGET_*
+#error Unsupported or unset target architecture
+#endif // _TARGET_*
+
+    /* save displacement */
+
+    *pdwDisp = disp;
+
+    return 1;
+}
+
+/*****************************************************************************
+ *
+ * Callback for register operands. Most probably, this is a local variable or
+ * a parameter
+ *
+ * Return 1 if a name was written representing the register, 0 otherwise.
+ */
+
+/* static */
+size_t __stdcall DisAssembler::disCchReg(const DIS* pdis, DIS::REGA reg, __in_ecount(cchMax) wchar_t* wz, size_t cchMax)
+{
+    DisAssembler* pDisAsm = (DisAssembler*)pdis->PvClient();
+    assert(pDisAsm);
+
+    return pDisAsm->disCchRegMember(pdis, reg, wz, cchMax);
+}
+
+size_t DisAssembler::disCchRegMember(const DIS* pdis, DIS::REGA reg, __in_ecount(cchMax) wchar_t* wz, size_t cchMax)
+{
+    // TODO-Review: DIS::REGA does not directly map to our regNumber! E.g., look at DISARM64::REGA --
+    // the Wt registers come first (and do map to our regNumber), but the Xt registers follow.
+    // Until this is fixed, don't use this function!
+    disHasName = false;
+    return 0;
+
+#if 0
+    const char * var = disComp->codeGen->siRegVarName(
+                                            (size_t)(pdis->Addr() - disStartAddr),
+                                            pdis->Cb(),
+                                            reg);
+
+    if (var)
+    {
+        if (disHasName)
+        {
+            /* CchRegRel has been called before - we have a symbol name saved in global var disFuncTempBuf */
+
+            swprintf_s(wz, cchMax, W("%hs'%hs.%hs'"), getRegName(reg), var, disFuncTempBuf);
+            disHasName = false;
+            return 1;
+        }
+        else
+        {
+            swprintf_s(wz, cchMax, W("%hs'%hs'"), getRegName(reg), var);
+            return 1;
+        }
+    }
+    else
+    {
+        if (disHasName)
+        {
+            /* this is the ugly case when a variable is incorrectly presumed dead */
+
+            swprintf_s(wz, cchMax, W("%hs'%hs.%hs'"), getRegName(reg), "<InstVar>", disFuncTempBuf);
+            disHasName = false;
+            return 1;
+        }
+
+        /* just to make sure we didn't bungle if var returns NULL */
+        disHasName = false;
+        return 0;
+    }
+#endif // 0
+}
+
+/*****************************************************************************
+ * Helper function to lazily create a map from code address to CORINFO_METHOD_HANDLE.
+ */
+AddrToMethodHandleMap* DisAssembler::GetAddrToMethodHandleMap()
+{
+    if (disAddrToMethodHandleMap == nullptr)
+    {
+        assert(disComp->getAllocator() != nullptr);
+        disAddrToMethodHandleMap = new (disComp->getAllocator()) AddrToMethodHandleMap(disComp->getAllocator());
+    }
+    return disAddrToMethodHandleMap;
+}
+
+/*****************************************************************************
+ * Helper function to lazily create a map from code address to CORINFO_METHOD_HANDLE.
+ */
+AddrToMethodHandleMap* DisAssembler::GetHelperAddrToMethodHandleMap()
+{
+    if (disHelperAddrToMethodHandleMap == nullptr)
+    {
+        assert(disComp->getAllocator() != nullptr);
+        disHelperAddrToMethodHandleMap = new (disComp->getAllocator()) AddrToMethodHandleMap(disComp->getAllocator());
+    }
+    return disHelperAddrToMethodHandleMap;
+}
+
+/*****************************************************************************
+ * Helper function to lazily create a map from relocation address to relocation target address.
+ */
+AddrToAddrMap* DisAssembler::GetRelocationMap()
+{
+    if (disRelocationMap == nullptr)
+    {
+        assert(disComp->getAllocator() != nullptr);
+        disRelocationMap = new (disComp->getAllocator()) AddrToAddrMap(disComp->getAllocator());
+    }
+    return disRelocationMap;
+}
+
+/*****************************************************************************
+ * Return the count of bytes disassembled.
+ */
+
+size_t DisAssembler::CbDisassemble(DIS*        pdis,
+                                   size_t      offs,
+                                   DIS::ADDR   addr,
+                                   const BYTE* pb,
+                                   size_t      cbMax,
+                                   FILE*       pfile,
+                                   bool        findLabels,
+                                   bool        printit /* = false */,
+                                   bool        dispOffs /* = false */,
+                                   bool        dispCodeBytes /* = false */)
+{
+    assert(pdis);
+
+    size_t cb = pdis->CbDisassemble(addr, pb, cbMax);
+
+    if (cb == 0)
+    {
+        DISASM_DUMP("CbDisassemble offs %Iu addr %I64u\n", offs, addr);
+        // assert(!"can't disassemble instruction!!!");
+        fprintf(pfile, "MSVCDIS can't disassemble instruction @ offset %Iu (0x%02x)!!!\n", offs, offs);
+#if defined(_TARGET_ARM64_)
+        fprintf(pfile, "%08Xh\n", *(unsigned int*)pb);
+        return 4;
+#else
+        fprintf(pfile, "%02Xh\n", *pb);
+        return 1;
+#endif
+    }
+
+#if defined(_TARGET_ARM64_)
+    assert(cb == 4); // all instructions are 4 bytes!
+#endif               // _TARGET_ARM64_
+
+    /* remember current offset and instruction size */
+
+    disCurOffset = (size_t)addr;
+    disInstSize  = cb;
+
+    /* Set the disTarget address */
+
+    disTarget = (size_t)pdis->AddrTarget();
+
+    if (findLabels)
+    {
+#if defined(_TARGET_XARCH_)
+        DISX86::TRMTA terminationType = DISX86::TRMTA(pdis->Trmta());
+
+        /* check the termination type of the instruction */
+
+        switch (terminationType)
+        {
+            case DISX86::trmtaCallNear16:
+            case DISX86::trmtaCallNear32:
+            case DISX86::trmtaCallFar:
+
+            {
+                // Don't count addresses in the relocation table
+                size_t targetAddr;
+                size_t absoluteAddr =
+                    (size_t)disGetLinearAddr((size_t)pdis->AddrAddress(1)); // Get the address in the instruction of the
+                                                                            // call target address (the address the
+                                                                            // reloc is applied to).
+                if (GetRelocationMap()->Lookup(absoluteAddr, &targetAddr))
+                {
+                    break;
+                }
+            }
+
+                __fallthrough;
+
+            case DISX86::trmtaJmpShort:
+            case DISX86::trmtaJmpNear:
+            case DISX86::trmtaJmpFar:
+            case DISX86::trmtaJmpCcShort:
+            case DISX86::trmtaJmpCcNear:
+
+                /* a CALL is local iff the disTarget is within the block boundary */
+
+                /* mark the jump label in the disTarget vector and return */
+
+                if (disTarget != DIS::addrNil) // There seems to be an assumption that you can't branch to the first
+                                               // address of the function (prolog).
+                {
+                    if (0 <= disTarget && disTarget < disTotalCodeSize)
+                    {
+                        /* we're OK, disTarget within block boundary */
+
+                        disLabels[disTarget] = 1;
+                    }
+                }
+                break;
+
+            case DISX86::trmtaFallThrough:
+                // We'd like to be able to get a label for code like "lea rcx, [4]" that we use for jump tables, but I
+                // can't figure out how.
+                break;
+
+            default:
+
+                /* jump is not in the current code block */
+                break;
+
+        } // end switch
+#elif defined(_TARGET_ARM64_)
+        DISARM64::TRMTA terminationType = DISARM64::TRMTA(pdis->Trmta());
+
+        /* check the termination type of the instruction */
+
+        switch (terminationType)
+        {
+            case DISARM64::TRMTA::trmtaCall:
+            case DISARM64::TRMTA::trmtaCallCc:
+
+            {
+                // Don't count addresses in the relocation table
+                size_t targetAddr;
+                size_t absoluteAddr =
+                    (size_t)disGetLinearAddr((size_t)pdis->AddrAddress(1)); // Get the address in the instruction of the
+                                                                            // call target address (the address the
+                                                                            // reloc is applied to).
+                if (GetRelocationMap()->Lookup(absoluteAddr, &targetAddr))
+                {
+                    break;
+                }
+            }
+
+                __fallthrough;
+
+            case DISARM64::TRMTA::trmtaBra:
+            case DISARM64::TRMTA::trmtaBraCase:
+            case DISARM64::TRMTA::trmtaBraCc:
+            case DISARM64::TRMTA::trmtaBraCcCase:
+
+                /* a CALL is local iff the disTarget is within the block boundary */
+
+                /* mark the jump label in the disTarget vector and return */
+
+                if (disTarget != DIS::addrNil) // There seems to be an assumption that you can't branch to the first
+                                               // address of the function (prolog).
+                {
+                    if (0 <= disTarget && disTarget < disTotalCodeSize)
+                    {
+                        /* we're OK, disTarget within block boundary */
+
+                        disLabels[disTarget] = 1;
+                    }
+                }
+                break;
+
+            case DISARM64::TRMTA::trmtaFallThrough:
+            {
+                DIS::INSTRUCTION instr;
+                DIS::OPERAND     ops[DISARM64::coperandMax];
+                bool             ok = pdis->FDecode(&instr, ops, ArrLen(ops));
+                if (ok)
+                {
+                    switch ((DISARM64::OPA)instr.opa)
+                    {
+                        case DISARM64::opaAdr:
+                        case DISARM64::opaAdrp:
+                            // operand 1 is an address
+                            assert(instr.coperand >= 2);
+                            assert(ops[1].opcls == DIS::opclsImmediate);
+                            assert(ops[1].imcls == DIS::imclsAddress);
+                            disTarget = ops[1].dwl;
+                            break;
+                        default:
+                            break;
+                    }
+
+                    if (0 <= disTarget && disTarget < disTotalCodeSize)
+                    {
+                        /* we're OK, disTarget within block boundary */
+
+                        disLabels[disTarget] = 1;
+                    }
+                }
+            }
+            break;
+
+            default:
+
+                /* jump is not in the current code block */
+                break;
+
+        } // end switch
+#else // _TARGET_*
+#error Unsupported or unset target architecture
+#endif // _TARGET_*
+
+        return cb;
+    } // end if
+
+    /* check if we have a label here */
+
+    if (printit)
+    {
+        if (disLabels[addr])
+        {
+            /* print the label and the offset */
+
+            fprintf(pfile, "L_%02u:\n", disLabels[addr]);
+        }
+    }
+
+    wchar_t wz[MAX_CLASSNAME_LENGTH];
+    pdis->CchFormatInstr(wz, sizeof(wz) / sizeof(wz[0]));
+
+    if (printit)
+    {
+        if (dispOffs)
+        {
+            fprintf(pfile, "%03X", offs);
+        }
+
+#ifdef _TARGET_ARM64_
+#define CCH_INDENT 8 // fixed sized instructions, always 8 characters
+#elif defined(_TARGET_AMD64_)
+#define CCH_INDENT 30 // large constants sometimes
+#else
+#define CCH_INDENT 24
+#endif
+
+        size_t cchIndent = CCH_INDENT;
+
+        if (dispCodeBytes)
+        {
+            static size_t cchBytesMax = -1;
+
+            if (cchBytesMax == -1)
+            {
+                cchBytesMax = pdis->CchFormatBytesMax();
+            }
+
+            wchar_t wzBytes[MAX_CLASSNAME_LENGTH];
+            assert(cchBytesMax < MAX_CLASSNAME_LENGTH);
+
+            size_t cchBytes = pdis->CchFormatBytes(wzBytes, sizeof(wzBytes) / sizeof(wzBytes[0]));
+
+            if (cchBytes > CCH_INDENT)
+            {
+                // Truncate the bytes if they are too long
+
+                static const wchar_t* elipses    = W("...\0");
+                const size_t          cchElipses = 4;
+
+                memcpy(&wzBytes[CCH_INDENT - cchElipses], elipses, cchElipses * sizeof(wchar_t));
+
+                cchBytes = CCH_INDENT;
+            }
+
+            fprintf(pfile, "  %ls", wzBytes);
+            cchIndent = CCH_INDENT - cchBytes;
+        }
+
+        // print the dis-assembled instruction
+
+        fprintf(pfile, "%*c %ls\n", cchIndent, ' ', wz);
+    }
+
+    return cb;
+}
+
+// TODO-Cleanup: this is currently unused, unreferenced.
+size_t CbDisassembleWithBytes(DIS* pdis, DIS::ADDR addr, const BYTE* pb, size_t cbMax, FILE* pfile)
+{
+    assert(pdis);
+    DisAssembler* pDisAsm = (DisAssembler*)pdis->PvClient();
+    assert(pDisAsm);
+
+    wchar_t wz[MAX_CLASSNAME_LENGTH];
+
+    pdis->CchFormatAddr(addr, wz, sizeof(wz) / sizeof(wz[0]));
+
+    size_t cchIndent = (size_t)fprintf(pfile, "  %ls: ", wz);
+
+    size_t cb = pdis->CbDisassemble(addr, pb, cbMax);
+
+    if (cb == 0)
+    {
+        fprintf(pfile, "%02Xh\n", *pb);
+        return (1);
+    }
+
+    size_t cchBytesMax = pdis->CchFormatBytesMax();
+
+    if (cchBytesMax > 18)
+    {
+        // Limit bytes coded to 18 characters
+
+        cchBytesMax = 18;
+    }
+
+    wchar_t wzBytes[64];
+    size_t  cchBytes = pdis->CchFormatBytes(wzBytes, sizeof(wzBytes) / sizeof(wzBytes[0]));
+
+    wchar_t* pwzBytes;
+    wchar_t* pwzNext;
+
+    for (pwzBytes = wzBytes; pwzBytes != NULL; pwzBytes = pwzNext)
+    {
+        BOOL fFirst = (pwzBytes == wzBytes);
+
+        cchBytes = wcslen(pwzBytes);
+
+        if (cchBytes <= cchBytesMax)
+        {
+            pwzNext = NULL;
+        }
+
+        else
+        {
+            wchar_t ch            = pwzBytes[cchBytesMax];
+            pwzBytes[cchBytesMax] = '\0';
+
+            if (ch == W(' '))
+            {
+                pwzNext = pwzBytes + cchBytesMax + 1;
+            }
+
+            else
+            {
+                pwzNext = wcsrchr(pwzBytes, W(' '));
+                assert(pwzNext);
+
+                pwzBytes[cchBytesMax] = ch;
+                *pwzNext++            = '\0';
+            }
+        }
+
+        if (fFirst)
+        {
+            pdis->CchFormatInstr(wz, sizeof(wz) / sizeof(wz[0]));
+            fprintf(pfile, "%-*ls %ls\n", cchBytesMax, pwzBytes, wz);
+        }
+
+        else
+        {
+            fprintf(pfile, "%*c%ls\n", cchIndent, ' ', pwzBytes);
+        }
+    }
+
+    return (cb);
+}
+
+void DisAssembler::DisasmBuffer(FILE* pfile, bool printit)
+{
+    DIS* pdis = NULL;
+
+#ifdef _TARGET_X86_
+    pdis = DIS::PdisNew(DIS::distX86);
+#elif defined(_TARGET_AMD64_)
+    pdis = DIS::PdisNew(DIS::distX8664);
+#elif defined(_TARGET_ARM64_)
+    pdis = DIS::PdisNew(DIS::distArm64);
+#else // _TARGET_*
+#error Unsupported or unset target architecture
+#endif
+
+    if (pdis == NULL)
+    {
+        assert(!"out of memory in disassembler?");
+        return;
+    }
+
+#ifdef _TARGET_64BIT_
+    pdis->SetAddr64(true);
+#endif
+
+    // Store a pointer to the DisAssembler so that the callback functions
+    // can get to it.
+
+    pdis->PvClientSet((void*)this);
+
+    /* Calculate addresses */
+
+    size_t    ibCur = 0;
+    DIS::ADDR addr  = 0; // Always emit code with respect to a "0" base address.
+
+    /* First walk the code to find all jump targets */
+
+    while (ibCur < disTotalCodeSize)
+    {
+        size_t cb;
+
+        cb = CbDisassemble(pdis, ibCur, addr + ibCur, disGetLinearAddr(ibCur), disGetBufferSize(ibCur), pfile,
+                           true); // find labels
+
+        // CbDisassemble returning > MAX_INT... give me a break.
+        ibCur += cb;
+    }
+
+    /* reset the label counter and start assigning consecutive number labels to the label locations */
+
+    BYTE label = 0;
+    for (unsigned i = 0; i < disTotalCodeSize; i++)
+    {
+        if (disLabels[i] != 0)
+        {
+            disLabels[i] = ++label;
+        }
+    }
+
+    /* Re-initialize addresses for disassemble phase */
+
+    ibCur = 0;
+    addr  = 0;
+
+    // Set callbacks only if we are displaying it. Else, the scheduler has called it
+
+    if (printit)
+    {
+        /* Set the callback functions for symbol lookup */
+
+        pdis->PfncchaddrSet(disCchAddr);
+        pdis->PfncchfixupSet(disCchFixup);
+        pdis->PfncchregrelSet(disCchRegRel);
+        pdis->PfncchregSet(disCchReg);
+    }
+
+    while (ibCur < disTotalCodeSize)
+    {
+        size_t cb;
+
+        cb = CbDisassemble(pdis, ibCur, addr + ibCur, disGetLinearAddr(ibCur), disGetBufferSize(ibCur), pfile,
+                           false, // find labels
+                           printit,
+                           !disDiffable, // display relative offset
+#ifdef DEBUG
+                           !disDiffable // Display code bytes?
+#else
+                           false // Display code bytes?
+#endif
+                           );
+
+        ibCur += (unsigned)cb;
+    }
+
+    delete pdis;
+}
+
+/*****************************************************************************
+ * Given a linear offset into the code, find a pointer to the actual code (either in the hot or cold section)
+ *
+ * Arguments:
+ *      offset  - The linear offset into the code. It must point within the code.
+ */
+
+const BYTE* DisAssembler::disGetLinearAddr(size_t offset)
+{
+    if (offset < disHotCodeSize)
+    {
+        return (const BYTE*)disHotCodeBlock + offset;
+    }
+    else
+    {
+        return (const BYTE*)disColdCodeBlock + offset - disHotCodeSize;
+    }
+}
+
+/*****************************************************************************
+ * Given a linear offset into the code, determine how many bytes are remaining in the buffer.
+ * This will only return the number of bytes left in either the hot or cold buffer. This is used
+ * to avoid walking off the end of the buffer.
+ *
+ * Arguments:
+ *      offset  - The linear offset into the code. It must point within the code.
+ */
+
+size_t DisAssembler::disGetBufferSize(size_t offset)
+{
+    if (offset < disHotCodeSize)
+    {
+        return disHotCodeSize - offset;
+    }
+    else
+    {
+        return disHotCodeSize + disColdCodeSize - offset;
+    }
+}
+
+/*****************************************************************************
+ * Get the function name for a given absolute address.
+ */
+
+const char* DisAssembler::disGetMethodFullName(size_t addr)
+{
+    CORINFO_METHOD_HANDLE res;
+
+    // First check the JIT helper table: they're very common.
+    if (GetHelperAddrToMethodHandleMap()->Lookup(addr, &res))
+    {
+        return disComp->eeGetMethodFullName(res);
+    }
+
+    // Next check the "normal" registered call targets
+    if (GetAddrToMethodHandleMap()->Lookup(addr, &res))
+    {
+        return disComp->eeGetMethodFullName(res);
+    }
+
+    return nullptr;
+}
+
+/*****************************************************************************
+ * Register a called function address as associated with a CORINFO_METHOD_HANDLE.
+ *
+ * Arguments:
+ *      addr    - The absolute address of the target function.
+ *      methHnd - The method handle associated with 'addr'.
+ */
+
+void DisAssembler::disSetMethod(size_t addr, CORINFO_METHOD_HANDLE methHnd)
+{
+    if (!disComp->opts.doLateDisasm)
+    {
+        return;
+    }
+
+    if (disComp->eeGetHelperNum(methHnd))
+    {
+        DISASM_DUMP("Helper function: %p => %p\n", addr, methHnd);
+        GetHelperAddrToMethodHandleMap()->Set(addr, methHnd);
+    }
+    else
+    {
+        DISASM_DUMP("Function: %p => %p\n", addr, methHnd);
+        GetAddrToMethodHandleMap()->Set(addr, methHnd);
+    }
+}
+
+/*****************************************************************************
+ * Register a relocation.
+ *
+ * Arguments:
+ *      relocAddr   - The absolute address the relocation applies to.
+ *      targetAddr  - The absolute address the relocation points to.
+ */
+
+void DisAssembler::disRecordRelocation(size_t relocAddr, size_t targetAddr)
+{
+    if (!disComp->opts.doLateDisasm)
+    {
+        return;
+    }
+
+    DISASM_DUMP("Relocation %p => %p\n", relocAddr, targetAddr);
+    GetRelocationMap()->Set(relocAddr, targetAddr);
+}
+
+/*****************************************************************************
+ *
+ * Disassemble the code which has been generated
+ */
+
+void DisAssembler::disAsmCode(BYTE* hotCodePtr, size_t hotCodeSize, BYTE* coldCodePtr, size_t coldCodeSize)
+{
+    if (!disComp->opts.doLateDisasm)
+    {
+        return;
+    }
+
+#ifdef DEBUG
+    // Should we make it diffable?
+    disDiffable = disComp->opts.dspDiffable;
+#else  // !DEBUG
+    // NOTE: non-debug builds are always diffable!
+    disDiffable = true;
+#endif // !DEBUG
+
+#ifdef DEBUG
+    const wchar_t* fileName = JitConfig.JitLateDisasmTo();
+    if (fileName != nullptr)
+    {
+        errno_t ec = _wfopen_s(&disAsmFile, fileName, W("a+"));
+        if (ec != 0)
+        {
+            disAsmFile = nullptr;
+        }
+    }
+#else  // !DEBUG
+    // NOTE: non-DEBUG builds always use jitstdout currently!
+    disAsmFile = jitstdout;
+#endif // !DEBUG
+
+    if (disAsmFile == nullptr)
+    {
+        disAsmFile = jitstdout;
+    }
+
+    // As this writes to a common file, this is not reentrant.
+
+    assert(hotCodeSize > 0);
+    if (coldCodeSize == 0)
+    {
+        fprintf(disAsmFile, "************************** %hs:%hs size 0x%04IX **************************\n\n",
+                disCurClassName, disCurMethodName, hotCodeSize);
+
+        fprintf(disAsmFile, "Base address : %ph\n", dspAddr(hotCodePtr));
+    }
+    else
+    {
+        fprintf(disAsmFile,
+                "************************** %hs:%hs hot size 0x%04IX cold size 0x%04IX **************************\n\n",
+                disCurClassName, disCurMethodName, hotCodeSize, coldCodeSize);
+
+        fprintf(disAsmFile, "Hot  address : %ph\n", dspAddr(hotCodePtr));
+        fprintf(disAsmFile, "Cold address : %ph\n", dspAddr(coldCodePtr));
+    }
+
+    disStartAddr     = 0;
+    disHotCodeBlock  = (size_t)hotCodePtr;
+    disHotCodeSize   = hotCodeSize;
+    disColdCodeBlock = (size_t)coldCodePtr;
+    disColdCodeSize  = coldCodeSize;
+
+    disTotalCodeSize = disHotCodeSize + disColdCodeSize;
+
+    disLabels = new (disComp, CMK_DebugOnly) BYTE[disTotalCodeSize]();
+
+    DisasmBuffer(disAsmFile, /* printIt */ true);
+    fprintf(disAsmFile, "\n");
+
+    if (disAsmFile != jitstdout)
+    {
+        fclose(disAsmFile);
+    }
+    else
+    {
+        fflush(disAsmFile);
+    }
+}
+
+/*****************************************************************************/
+// This function is called for every method. Checks if we are supposed to disassemble
+// the method, and where to send the disassembly output.
+
+void DisAssembler::disOpenForLateDisAsm(const char* curMethodName, const char* curClassName, PCCOR_SIGNATURE sig)
+{
+    if (!disComp->opts.doLateDisasm)
+    {
+        return;
+    }
+
+    disCurMethodName = curMethodName;
+    disCurClassName  = curClassName;
+}
+
+/*****************************************************************************/
+
+void DisAssembler::disInit(Compiler* pComp)
+{
+    assert(pComp);
+    disComp                        = pComp;
+    disHasName                     = false;
+    disLabels                      = nullptr;
+    disAddrToMethodHandleMap       = nullptr;
+    disHelperAddrToMethodHandleMap = nullptr;
+    disRelocationMap               = nullptr;
+    disDiffable                    = false;
+    disAsmFile                     = nullptr;
+}
+
+/*****************************************************************************/
+#endif // LATE_DISASM
+/*****************************************************************************/
diff --git a/src/jit/disasm.h b/src/jit/disasm.h
new file mode 100644
index 0000000000..972243e4dc
--- /dev/null
+++ b/src/jit/disasm.h
@@ -0,0 +1,226 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                          DisAsm                                           XX
+XX                                                                           XX
+XX  The dis-assembler to display the native code generated                   XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************/
+#ifndef _DIS_H_
+#define _DIS_H_
+/*****************************************************************************/
+#ifdef LATE_DISASM
+
+// free() is deprecated (we should only allocate and free memory through CLR hosting interfaces)
+// and is redefined in clrhost.h to cause a compiler error.
+// We don't call free(), but this function is mentioned in STL headers included by msvcdis.h
+// (and free() is only called by STL functions that we don't use).
+// To avoid the compiler error, but at the same time ensure that we don't accidentally use free(),
+// free() is redefined to cause a runtime error instead of a compile time error.
+#undef free
+#ifdef DEBUG
+#define free(x) assert(false && "Must not call free(). Use a ClrXXX function instead.")
+#endif
+
+#if CHECK_STRUCT_PADDING
+#pragma warning(pop)
+#endif // CHECK_STRUCT_PADDING
+
+#define _OLD_IOSTREAMS
+// This pragma is needed because public\vc\inc\xiosbase contains
+// a static local variable
+#pragma warning(disable : 4640)
+#include "msvcdis.h"
+#pragma warning(default : 4640)
+
+#ifdef _TARGET_XARCH_
+#include "disx86.h"
+#elif defined(_TARGET_ARM64_)
+#include "disarm64.h"
+#else // _TARGET_*
+#error Unsupported or unset target architecture
+#endif
+
+#if CHECK_STRUCT_PADDING
+#pragma warning(push)
+#pragma warning(default : 4820) // 'bytes' bytes padding added after construct 'member_name'
+#endif                          // CHECK_STRUCT_PADDING
+
+/*****************************************************************************/
+
+#ifdef _HOST_64BIT_
+template <typename T>
+struct SizeTKeyFuncs : LargePrimitiveKeyFuncs<T>
+{
+};
+#else  // !_HOST_64BIT_
+template <typename T>
+struct SizeTKeyFuncs : SmallPrimitiveKeyFuncs<T>
+{
+};
+#endif // _HOST_64BIT_
+
+typedef SimplerHashTable<size_t, SizeTKeyFuncs<size_t>, CORINFO_METHOD_HANDLE, JitSimplerHashBehavior>
+    AddrToMethodHandleMap;
+typedef SimplerHashTable<size_t, SizeTKeyFuncs<size_t>, size_t, JitSimplerHashBehavior> AddrToAddrMap;
+
+class Compiler;
+
+class DisAssembler
+{
+public:
+    // Constructor
+    void disInit(Compiler* pComp);
+
+    // Initialize the class for the current method being generated.
+    void disOpenForLateDisAsm(const char* curMethodName, const char* curClassName, PCCOR_SIGNATURE sig);
+
+    // Disassemble a buffer: called after code for a method is generated.
+    void disAsmCode(BYTE* hotCodePtr, size_t hotCodeSize, BYTE* coldCodePtr, size_t coldCodeSize);
+
+    // Register an address to be associated with a method handle.
+    void disSetMethod(size_t addr, CORINFO_METHOD_HANDLE methHnd);
+
+    // Register a relocation address.
+    void disRecordRelocation(size_t relocAddr, size_t targetAddr);
+
+private:
+    /* Address of the hot and cold code blocks to dissasemble */
+    size_t disHotCodeBlock;
+    size_t disColdCodeBlock;
+
+    /* Size of the hot and cold code blocks to dissasemble */
+    size_t disHotCodeSize;
+    size_t disColdCodeSize;
+
+    /* Total code size (simply cached version of disHotCodeSize + disColdCodeSize) */
+    size_t disTotalCodeSize;
+
+    /* Address where the code block is to be loaded */
+    size_t disStartAddr;
+
+    /* Current offset in the code block */
+    size_t disCurOffset;
+
+    /* Size (in bytes) of current dissasembled instruction */
+    size_t disInstSize;
+
+    /* Target address of a jump */
+    size_t disTarget;
+
+    /* temporary buffer for function names */
+    // TODO-Review: there is some issue here where this is never set!
+    char disFuncTempBuf[1024];
+
+    /* Method and class name to output */
+    const char* disCurMethodName;
+    const char* disCurClassName;
+
+    /* flag that signals when replacing a symbol name has been deferred for following callbacks */
+    // TODO-Review: there is some issue here where this is never set to 'true'!
+    bool disHasName;
+
+    /* An array of labels, for jumps, LEAs, etc. There is one element in the array for each byte in the generated code.
+     * That byte is zero if the corresponding byte of generated code is not a label. Otherwise, the value
+     * is a label number.
+     */
+    BYTE* disLabels;
+
+    void DisasmBuffer(FILE* pfile, bool printit);
+
+    /* For the purposes of disassembly, we pretend that the hot and cold sections are linear, and not split.
+     * These functions create this model for the rest of the disassembly code.
+     */
+
+    /* Given a linear offset into the code, find a pointer to the actual code (either in the hot or cold section) */
+    const BYTE* disGetLinearAddr(size_t offset);
+
+    /* Given a linear offset into the code, determine how many bytes are left in the hot or cold buffer the offset
+     * points to */
+    size_t disGetBufferSize(size_t offset);
+
+    // Map of instruction addresses to call target method handles for normal calls.
+    AddrToMethodHandleMap* disAddrToMethodHandleMap;
+    AddrToMethodHandleMap* GetAddrToMethodHandleMap();
+
+    // Map of instruction addresses to call target method handles for JIT helper calls.
+    AddrToMethodHandleMap* disHelperAddrToMethodHandleMap;
+    AddrToMethodHandleMap* GetHelperAddrToMethodHandleMap();
+
+    // Map of relocation addresses to relocation target.
+    AddrToAddrMap* disRelocationMap;
+    AddrToAddrMap* GetRelocationMap();
+
+    const char* disGetMethodFullName(size_t addr);
+
+    FILE* disAsmFile;
+
+    Compiler* disComp;
+
+    bool disDiffable; // 'true' if the output should be diffable (hide or obscure absolute addresses)
+
+    template <typename T>
+    T dspAddr(T addr)
+    {
+        return (addr == 0) ? 0 : (disDiffable ? T(0xD1FFAB1E) : addr);
+    }
+
+    /* Callbacks from msdis */
+
+    static size_t __stdcall disCchAddr(
+        const DIS* pdis, DIS::ADDR addr, __in_ecount(cchMax) wchar_t* wz, size_t cchMax, DWORDLONG* pdwDisp);
+
+    size_t disCchAddrMember(
+        const DIS* pdis, DIS::ADDR addr, __in_ecount(cchMax) wchar_t* wz, size_t cchMax, DWORDLONG* pdwDisp);
+
+    static size_t __stdcall disCchFixup(const DIS*                   pdis,
+                                        DIS::ADDR                    addr,
+                                        size_t                       size,
+                                        __in_ecount(cchMax) wchar_t* wz,
+                                        size_t                       cchMax,
+                                        DWORDLONG*                   pdwDisp);
+
+    size_t disCchFixupMember(const DIS*                   pdis,
+                             DIS::ADDR                    addr,
+                             size_t                       size,
+                             __in_ecount(cchMax) wchar_t* wz,
+                             size_t                       cchMax,
+                             DWORDLONG*                   pdwDisp);
+
+    static size_t __stdcall disCchRegRel(
+        const DIS* pdis, DIS::REGA reg, DWORD disp, __in_ecount(cchMax) wchar_t* wz, size_t cchMax, DWORD* pdwDisp);
+
+    size_t disCchRegRelMember(
+        const DIS* pdis, DIS::REGA reg, DWORD disp, __in_ecount(cchMax) wchar_t* wz, size_t cchMax, DWORD* pdwDisp);
+
+    static size_t __stdcall disCchReg(const DIS* pdis, DIS::REGA reg, __in_ecount(cchMax) wchar_t* wz, size_t cchMax);
+
+    size_t disCchRegMember(const DIS* pdis, DIS::REGA reg, __in_ecount(cchMax) wchar_t* wz, size_t cchMax);
+
+    /* Disassemble helper */
+
+    size_t CbDisassemble(DIS*        pdis,
+                         size_t      offs,
+                         DIS::ADDR   addr,
+                         const BYTE* pb,
+                         size_t      cbMax,
+                         FILE*       pfile,
+                         bool        findLabels,
+                         bool        printit       = false,
+                         bool        dispOffs      = false,
+                         bool        dispCodeBytes = false);
+};
+
+/*****************************************************************************/
+#endif // LATE_DISASM
+/*****************************************************************************/
+#endif // _DIS_H_
+/*****************************************************************************/
diff --git a/src/jit/dll/.gitmirror b/src/jit/dll/.gitmirror
new file mode 100644
index 0000000000..f507630f94
--- /dev/null
+++ b/src/jit/dll/.gitmirror
@@ -0,0 +1 @@
+Only contents of this folder, excluding subfolders, will be mirrored by the Git-TFS Mirror. 
+\ No newline at end of file
diff --git a/src/jit/dll/CMakeLists.txt b/src/jit/dll/CMakeLists.txt
new file mode 100644
index 0000000000..01e58dbbb8
--- /dev/null
+++ b/src/jit/dll/CMakeLists.txt
@@ -0,0 +1,35 @@
+project(ClrJit)
+
+if(CLR_CMAKE_PLATFORM_ARCH_I386 OR CLR_CMAKE_PLATFORM_ARCH_ARM)
+    add_definitions(-DLEGACY_BACKEND)
+endif(CLR_CMAKE_PLATFORM_ARCH_I386 OR CLR_CMAKE_PLATFORM_ARCH_ARM)
+
+# Disable the following for UNIX altjit on Windows
+if(CLR_CMAKE_PLATFORM_UNIX)
+    add_compile_options(-fPIC)
+
+    add_library_clr(${JIT_BASE_NAME}_static
+      STATIC
+      ${SHARED_LIB_SOURCES}
+    )
+    add_dependencies(${JIT_BASE_NAME}_static coreclrpal gcinfo)
+else()
+    add_library_clr(${JIT_BASE_NAME}_static
+       ${SOURCES}
+    )
+# Disable up to here (see above) the following for UNIX altjit on Windows 
+# Enable the following for UNIX altjit on Windows
+#    add_library_clr(ClrJit
+#       SHARED
+#       ${SHARED_LIB_SOURCES}
+#    )
+
+# Enable the following for UNIX altjit on Windows
+#target_link_libraries(ClrJit
+#      utilcode
+#      gcinfo
+#      runtime_library
+#    )
+
+# Disable the following for UNIX altjit on Windows
+endif(CLR_CMAKE_PLATFORM_UNIX)
diff --git a/src/jit/dll/clrjit.def b/src/jit/dll/clrjit.def
new file mode 100644
index 0000000000..1603af74ca
--- /dev/null
+++ b/src/jit/dll/clrjit.def
@@ -0,0 +1,7 @@
+; Licensed to the .NET Foundation under one or more agreements.
+; The .NET Foundation licenses this file to you under the MIT license.
+; See the LICENSE file in the project root for more information.
+EXPORTS
+    getJit
+    jitStartup
+    sxsJitStartup
diff --git a/src/jit/dll/jit.nativeproj b/src/jit/dll/jit.nativeproj
new file mode 100644
index 0000000000..97981e7eff
--- /dev/null
+++ b/src/jit/dll/jit.nativeproj
@@ -0,0 +1,84 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003" ToolsVersion="dogfood">
+
+  <!-- Import the CLR's settings -->
+
+  <Import Project="$(_NTDRIVE)$(_NTROOT)\ndp\clr\clr.props" />
+
+  <PropertyGroup Label="Globals">
+    <SccProjectName>SAK</SccProjectName>
+    <SccAuxPath>SAK</SccAuxPath>
+    <SccLocalPath>SAK</SccLocalPath>
+    <SccProvider>SAK</SccProvider>
+  </PropertyGroup>
+
+  <PropertyGroup>
+
+    <!-- Set the output -->
+
+    <OutputName>clrjit</OutputName>
+    <TargetType Condition="'$(FeatureMergeJitAndEngine)'=='true'">LIBRARY</TargetType>
+    <TargetType Condition="'$(FeatureMergeJitAndEngine)'!='true'">DYNLINK</TargetType>
+    <FileToMarkForSigning>$(BinariesDirectory)\clrjit.dll</FileToMarkForSigning>
+    <StaticLinkJit>false</StaticLinkJit>
+    <BuildCoreBinaries>true</BuildCoreBinaries>
+    <BuildSysBinaries>true</BuildSysBinaries>
+
+    <DllEntryPoint>_DllMainCRTStartup</DllEntryPoint>
+    <LinkSubsystem>windows</LinkSubsystem>
+    <LibCLib Condition="'$(StaticLinkJit)'!='true'">$(ClrCrtLib)</LibCLib>
+
+    <LinkModuleDefinitionFile>$(OutputName).def</LinkModuleDefinitionFile>
+
+    <ClDefines Condition="'$(BuildArchitecture)' == 'amd64'">$(ClDefines);FEATURE_SIMD;FEATURE_AVX_SUPPORT</ClDefines>
+
+    <Win32DllLibs>$(SdkLibPath)\kernel32.lib;$(SdkLibPath)\user32.lib;$(SdkLibPath)\advapi32.lib;$(SdkLibPath)\oleaut32.lib;$(SdkLibPath)\uuid.lib</Win32DllLibs>
+    <Win32DllLibs>$(Win32DllLibs);$(ClrLibPath)\utilcode.lib</Win32DllLibs>
+
+    <!-- Profile-guided optimization -->
+
+    <PogoOptimize Condition="('$(BuildArchitecture)' == 'arm')">false</PogoOptimize>
+    <PogoInstrument Condition="('$(BuildArchitecture)' == 'arm') and ('$(_BuildType)' == 'ret') and ('$(BuildProjectName)' == '')">true</PogoInstrument>
+    <PogoUpdate Condition="('$(BuildArchitecture)' == 'arm') and ('$(_BuildType)' == 'ret') and ('$(BuildProjectName)' == '')">true</PogoUpdate>
+    <Win32DllLibs Condition="'$(PogoInstrument)' == 'true' and '$(BuildArchitecture)' == 'amd64'">$(Win32DllLibs);$(CrtLibPath)\pgort.lib</Win32DllLibs>
+    <Win32DllLibs Condition="'$(PogoInstrument)' == 'true' and '$(BuildArchitecture)' == 'arm'">$(Win32DllLibs);$(CrtLibPath)\pgort.lib;$(SdkLibPath)\ntdll.lib</Win32DllLibs>
+    <OptimizationDataRelativeDir>$(_BuildArch)\CLR\Base</OptimizationDataRelativeDir>
+
+    <!-- Do we want to build with msvcdis disassembly capability? This should be enabled for DEBUG, disabled otherwise.
+         However, it can be useful for debugging purposes, such as generating assembly diffs between CHK and RET JITs,
+         to enable it temporarily in non-DEBUG builds, by forcing the EnableLateDisasm property to 'true'.
+    -->
+    <EnableLateDisasm Condition="'$(DebugBuild)' == 'true' and '$(BuildArchitecture)' != 'arm' and '$(BuildProjectName)' != 'CoreSys'">true</EnableLateDisasm>
+    <!--
+    <EnableLateDisasm Condition="'$(BuildArchitecture)' != 'arm' and '$(BuildProjectName)' != 'CoreSys'">true</EnableLateDisasm>
+    -->
+    <ClDefines Condition="'$(EnableLateDisasm)' == 'true'">$(ClDefines);LATE_DISASM=1</ClDefines>
+    <LinkDelayLoad Condition="'$(EnableLateDisasm)' == 'true'">$(LinkDelayLoad);msvcdis$(VC_NONCRT_ProdVerX).dll</LinkDelayLoad>
+    <UseDelayimpLib Condition="'$(EnableLateDisasm)' == 'true' and '$(FeatureMergeJitAndEngine)'!='true'">true</UseDelayimpLib>
+
+    <!-- Disable merge of text and rdata for DevDiv:696146-->
+    <LinkMergeRData Condition="'$(BuildArchitecture)'=='i386'">false</LinkMergeRData>
+  </PropertyGroup>
+
+  <!-- Leaf Project Items -->
+
+  <ItemGroup>
+    <ProjectReference Include="$(ClrSrcDirectory)utilcode\dyncrt\dyncrt.nativeproj" />
+    <TargetLib Include="$(SdkLibPath)\mscoree.lib" />
+    <TargetLib Condition="'$(BuildArchitecture)'!='i386'" Include="$(ClrLibPath)\gcinfo.lib">
+      <ProjectReference>$(ClrSrcDirectory)gcinfo\lib\gcinfo.nativeproj</ProjectReference>
+    </TargetLib>
+    <TargetLib Condition="'$(UseDelayimpLib)' == 'true'" Include="$(ClrLibPath)\delayimp.lib">
+      <ProjectReference>$(ClrSrcDirectory)delayimp\delayimp.nativeproj</ProjectReference>
+    </TargetLib>
+    <TargetLib Condition="'$(DebugBuild)' == 'true'" Include="$(ClrLibPath)\gcdump.lib">
+      <ProjectReference>$(ClrSrcDirectory)gcdump\lib\gcdump.nativeproj</ProjectReference>
+    </TargetLib>
+    <TargetLib Condition="'$(DebugBuild)' == 'true'" Include="$(SdkLibPath)\ole32.lib" />
+    <TargetLib Condition="'$(EnableLateDisasm)' == 'true'" Include="$(VCToolsLibPath)\msvcdis.lib" />
+    <RCResourceFile Include="..\native.rc" />
+  </ItemGroup>
+
+  <Import Project="..\jit.settings.targets" />
+
+</Project>
diff --git a/src/jit/earlyprop.cpp b/src/jit/earlyprop.cpp
new file mode 100644
index 0000000000..70d1012aa0
--- /dev/null
+++ b/src/jit/earlyprop.cpp
@@ -0,0 +1,671 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+//                                    Early Value Propagation
+//
+// This phase performs an SSA-based value propagation optimization that currently only applies to array
+// lengths, runtime type handles, and explicit null checks. An SSA-based backwards tracking of local variables
+// is performed at each point of interest, e.g., an array length reference site, a method table reference site, or
+// an indirection.
+// The tracking continues until an interesting value is encountered. The value is then used to rewrite
+// the source site or the value.
+//
+///////////////////////////////////////////////////////////////////////////////////////
+
+#include "jitpch.h"
+#include "ssabuilder.h"
+
+bool Compiler::optDoEarlyPropForFunc()
+{
+    bool propArrayLen  = (optMethodFlags & OMF_HAS_NEWARRAY) && (optMethodFlags & OMF_HAS_ARRAYREF);
+    bool propGetType   = (optMethodFlags & OMF_HAS_NEWOBJ) && (optMethodFlags & OMF_HAS_VTABLEREF);
+    bool propNullCheck = (optMethodFlags & OMF_HAS_NULLCHECK) != 0;
+    return propArrayLen || propGetType || propNullCheck;
+}
+
+bool Compiler::optDoEarlyPropForBlock(BasicBlock* block)
+{
+    bool bbHasArrayRef  = (block->bbFlags & BBF_HAS_IDX_LEN) != 0;
+    bool bbHasVtableRef = (block->bbFlags & BBF_HAS_VTABREF) != 0;
+    bool bbHasNullCheck = (block->bbFlags & BBF_HAS_NULLCHECK) != 0;
+    return bbHasArrayRef || bbHasVtableRef || bbHasNullCheck;
+}
+
+//--------------------------------------------------------------------
+// gtIsVtableRef: Return true if the tree is a method table reference.
+//
+// Arguments:
+//    tree           - The input tree.
+//
+// Return Value:
+//    Return true if the tree is a method table reference.
+
+bool Compiler::gtIsVtableRef(GenTreePtr tree)
+{
+    if (tree->OperGet() == GT_IND)
+    {
+        GenTree* addr = tree->AsIndir()->Addr();
+
+        if (addr->OperIsAddrMode())
+        {
+            GenTreeAddrMode* addrMode = addr->AsAddrMode();
+
+            return (!addrMode->HasIndex() && (addrMode->Base()->TypeGet() == TYP_REF));
+        }
+    }
+
+    return false;
+}
+
+//------------------------------------------------------------------------------
+// getArrayLengthFromAllocation: Return the array length for an array allocation
+//                               helper call.
+//
+// Arguments:
+//    tree           - The array allocation helper call.
+//
+// Return Value:
+//    Return the array length node.
+
+GenTreePtr Compiler::getArrayLengthFromAllocation(GenTreePtr tree)
+{
+    assert(tree != nullptr);
+
+    if (tree->OperGet() == GT_CALL)
+    {
+        GenTreeCall* call = tree->AsCall();
+
+        if (call->gtCallType == CT_HELPER)
+        {
+            if (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWARR_1_DIRECT) ||
+                call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWARR_1_OBJ) ||
+                call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWARR_1_VC) ||
+                call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWARR_1_ALIGN8))
+            {
+                // This is an array allocation site. Grab the array length node.
+                return gtArgEntryByArgNum(call, 1)->node;
+            }
+        }
+    }
+
+    return nullptr;
+}
+
+//-----------------------------------------------------------------------------
+// getObjectHandleNodeFromAllocation: Return the type handle for an object allocation
+//                              helper call.
+//
+// Arguments:
+//    tree           - The object allocation helper call.
+//
+// Return Value:
+//    Return the object type handle node.
+
+GenTreePtr Compiler::getObjectHandleNodeFromAllocation(GenTreePtr tree)
+{
+    assert(tree != nullptr);
+
+    if (tree->OperGet() == GT_CALL)
+    {
+        GenTreeCall* call = tree->AsCall();
+
+        if (call->gtCallType == CT_HELPER)
+        {
+            if (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWFAST) ||
+                call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWSFAST) ||
+                call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWSFAST_ALIGN8) ||
+                call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWARR_1_DIRECT) ||
+                call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWARR_1_OBJ) ||
+                call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWARR_1_VC) ||
+                call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_NEWARR_1_ALIGN8))
+            {
+                // This is an object allocation site. Return the runtime type handle node.
+                fgArgTabEntryPtr argTabEntry = gtArgEntryByArgNum(call, 0);
+                return argTabEntry->node;
+            }
+        }
+    }
+
+    return nullptr;
+}
+
+//------------------------------------------------------------------------------------------
+// optEarlyProp: The entry point of the early value propagation.
+//
+// Notes:
+//    This phase performs an SSA-based value propagation, including
+//      1. Array length propagation.
+//      2. Runtime type handle propagation.
+//      3. Null check folding.
+//
+//    For array length propagation, a demand-driven SSA-based backwards tracking of constant
+//    array lengths is performed at each array length reference site which is in form of a
+//    GT_ARR_LENGTH node. When a GT_ARR_LENGTH node is seen, the array ref pointer which is
+//    the only child node of the GT_ARR_LENGTH is tracked. This is only done for array ref
+//    pointers that have valid SSA forms.The tracking is along SSA use-def chain and stops
+//    at the original array allocation site where we can grab the array length. The
+//    GT_ARR_LENGTH node will then be rewritten to a GT_CNS_INT node if the array length is
+//    constant.
+//
+//    Similarly, the same algorithm also applies to rewriting a method table (also known as
+//    vtable) reference site which is in form of GT_INDIR node. The base pointer, which is
+//    an object reference pointer, is treated in the same way as an array reference pointer.
+//
+//    Null check folding tries to find GT_INDIR(obj + const) that GT_NULLCHECK(obj) can be folded into
+///   and removed. Currently, the algorithm only matches GT_INDIR and GT_NULLCHECK in the same basic block.
+
+void Compiler::optEarlyProp()
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In optEarlyProp()\n");
+    }
+#endif
+
+    assert(fgSsaPassesCompleted == 1);
+
+    if (!optDoEarlyPropForFunc())
+    {
+        return;
+    }
+
+    for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+    {
+        if (!optDoEarlyPropForBlock(block))
+        {
+            continue;
+        }
+
+        compCurBB = block;
+
+        for (GenTreeStmt* stmt = block->firstStmt(); stmt != nullptr;)
+        {
+            // Preserve the next link before the propagation and morph.
+            GenTreeStmt* next = stmt->gtNextStmt;
+
+            compCurStmt = stmt;
+
+            // Walk the stmt tree in linear order to rewrite any array length reference with a
+            // constant array length.
+            bool isRewritten    = false;
+            bool bbHasNullCheck = (block->bbFlags & BBF_HAS_NULLCHECK) != 0;
+            for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree != nullptr; tree = tree->gtNext)
+            {
+                if (optEarlyPropRewriteTree(tree))
+                {
+                    isRewritten = true;
+                }
+            }
+
+            // Morph the stmt and update the evaluation order if the stmt has been rewritten.
+            if (isRewritten)
+            {
+                gtSetStmtInfo(stmt);
+                fgSetStmtSeq(stmt);
+            }
+
+            stmt = next;
+        }
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        JITDUMP("\nAfter optEarlyProp:\n");
+        fgDispBasicBlocks(/*dumpTrees*/ true);
+    }
+#endif
+}
+
+//----------------------------------------------------------------
+// optEarlyPropRewriteValue: Rewrite a tree to the actual value.
+//
+// Arguments:
+//    tree           - The input tree node to be rewritten.
+//
+// Return Value:
+//    Return true iff "tree" is successfully rewritten.
+
+bool Compiler::optEarlyPropRewriteTree(GenTreePtr tree)
+{
+    GenTreePtr  objectRefPtr = nullptr;
+    optPropKind propKind     = optPropKind::OPK_INVALID;
+
+    if (tree->OperGet() == GT_ARR_LENGTH)
+    {
+        objectRefPtr = tree->gtOp.gtOp1;
+        propKind     = optPropKind::OPK_ARRAYLEN;
+    }
+    else if ((tree->OperGet() == GT_IND) && !varTypeIsStruct(tree))
+    {
+        // TODO-1stClassStructs: The above condition should apply equally to all indirections,
+        // but previously the implicit indirections due to a struct assignment were not
+        // considered, so we are currently limiting it to non-structs to preserve existing
+        // behavior.
+        // optFoldNullCheck takes care of updating statement info if a null check is removed.
+        optFoldNullCheck(tree);
+
+        if (gtIsVtableRef(tree))
+        {
+            // Don't propagate type handles that are used as null checks, which are usually in
+            // form of
+            //      *  stmtExpr  void  (top level)
+            //      \--*  indir     int
+            //          \--*  lclVar    ref    V02 loc0
+            if (compCurStmt->gtStmt.gtStmtExpr == tree)
+            {
+                return false;
+            }
+
+            objectRefPtr = tree->gtOp.gtOp1;
+            propKind     = optPropKind::OPK_OBJ_GETTYPE;
+        }
+        else
+        {
+            return false;
+        }
+    }
+    else
+    {
+        return false;
+    }
+
+    if (!objectRefPtr->OperIsScalarLocal() || fgExcludeFromSsa(objectRefPtr->AsLclVarCommon()->GetLclNum()))
+
+    {
+        return false;
+    }
+
+    bool       isRewritten = false;
+    GenTreePtr root        = compCurStmt;
+    unsigned   lclNum      = objectRefPtr->AsLclVarCommon()->GetLclNum();
+    unsigned   ssaNum      = objectRefPtr->AsLclVarCommon()->GetSsaNum();
+
+    GenTreePtr actualVal = optPropGetValue(lclNum, ssaNum, propKind);
+
+    if (actualVal != nullptr)
+    {
+        if (propKind == optPropKind::OPK_ARRAYLEN)
+        {
+            assert(actualVal->IsCnsIntOrI());
+
+            if (actualVal->gtIntCon.gtIconVal > INT32_MAX)
+            {
+                // Don't propagate array lengths that are beyond the maximum value of a GT_ARR_LENGTH.
+                // node. CORINFO_HELP_NEWARR_1_OBJ helper call allows to take a long integer as the
+                // array length argument, but the type of GT_ARR_LENGTH is always INT32.
+                return false;
+            }
+        }
+        else if (propKind == optPropKind::OPK_OBJ_GETTYPE)
+        {
+            assert(actualVal->IsCnsIntOrI());
+        }
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("optEarlyProp Rewriting BB%02u\n", compCurBB->bbNum);
+            gtDispTree(root);
+            printf("\n");
+        }
+#endif
+        // Rewrite the tree using a copy of "actualVal"
+        GenTreePtr actualValCopy;
+        var_types  origType = tree->gtType;
+        // Propagating a constant into an array index expression requires calling
+        // LabelIndex to update the FieldSeq annotations.  EarlyProp may replace
+        // array length expressions with constants, so check if this is an array
+        // length operator that is part of an array index expression.
+        bool isIndexExpr = (tree->OperGet() == GT_ARR_LENGTH && ((tree->gtFlags & GTF_ARRLEN_ARR_IDX) != 0));
+
+        if (actualVal->GetNodeSize() <= tree->GetNodeSize())
+        {
+            actualValCopy = tree;
+        }
+        else
+        {
+            actualValCopy = gtNewLargeOperNode(GT_ADD, TYP_INT);
+        }
+
+        fgWalkTreePre(&tree, Compiler::lvaDecRefCntsCB, (void*)this, true);
+
+        actualValCopy->CopyFrom(actualVal, this);
+        actualValCopy->gtType = origType;
+        if (isIndexExpr)
+        {
+            actualValCopy->LabelIndex(this);
+        }
+
+        fgWalkTreePre(&actualValCopy, Compiler::lvaIncRefCntsCB, (void*)this, true);
+
+        if (actualValCopy != tree)
+        {
+            gtReplaceTree(root, tree, actualValCopy);
+        }
+
+        isRewritten = true;
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("to\n");
+            gtDispTree(compCurStmt);
+            printf("\n");
+        }
+#endif
+    }
+
+    return isRewritten;
+}
+
+//-------------------------------------------------------------------------------------------
+// optPropGetValue: Given an SSA object ref pointer, get the value needed based on valueKind.
+//
+// Arguments:
+//    lclNum         - The local var number of the ref pointer.
+//    ssaNum         - The SSA var number of the ref pointer.
+//    valueKind      - The kind of value of interest.
+//
+// Return Value:
+//    Return the corresponding value based on valueKind.
+
+GenTreePtr Compiler::optPropGetValue(unsigned lclNum, unsigned ssaNum, optPropKind valueKind)
+{
+    return optPropGetValueRec(lclNum, ssaNum, valueKind, 0);
+}
+
+//-----------------------------------------------------------------------------------
+// optPropGetValueRec: Given an SSA object ref pointer, get the value needed based on valueKind
+//                     within a recursion bound.
+//
+// Arguments:
+//    lclNum         - The local var number of the array pointer.
+//    ssaNum         - The SSA var number of the array pointer.
+//    valueKind      - The kind of value of interest.
+//    walkDepth      - Current recursive walking depth.
+//
+// Return Value:
+//    Return the corresponding value based on valueKind.
+
+GenTreePtr Compiler::optPropGetValueRec(unsigned lclNum, unsigned ssaNum, optPropKind valueKind, int walkDepth)
+{
+    if (ssaNum == SsaConfig::RESERVED_SSA_NUM)
+    {
+        return nullptr;
+    }
+
+    SSAName    ssaName(lclNum, ssaNum);
+    GenTreePtr value = nullptr;
+
+    // Bound the recursion with a hard limit.
+    if (walkDepth > optEarlyPropRecurBound)
+    {
+        return nullptr;
+    }
+
+    // Track along the use-def chain to get the array length
+    GenTreePtr treelhs = lvaTable[lclNum].GetPerSsaData(ssaNum)->m_defLoc.m_tree;
+
+    if (treelhs == nullptr)
+    {
+        // Incoming parameters or live-in variables don't have actual definition tree node
+        // for their FIRST_SSA_NUM. See SsaBuilder::RenameVariables.
+        assert(ssaNum == SsaConfig::FIRST_SSA_NUM);
+    }
+    else
+    {
+        GenTreePtr* lhsPtr;
+        GenTreePtr  treeDefParent = treelhs->gtGetParent(&lhsPtr);
+
+        if (treeDefParent->OperGet() == GT_ASG)
+        {
+            assert(treelhs == treeDefParent->gtGetOp1());
+            GenTreePtr treeRhs = treeDefParent->gtGetOp2();
+
+            if (treeRhs->OperIsScalarLocal() && !fgExcludeFromSsa(treeRhs->AsLclVarCommon()->GetLclNum()))
+            {
+                // Recursively track the Rhs
+                unsigned rhsLclNum = treeRhs->AsLclVarCommon()->GetLclNum();
+                unsigned rhsSsaNum = treeRhs->AsLclVarCommon()->GetSsaNum();
+
+                value = optPropGetValueRec(rhsLclNum, rhsSsaNum, valueKind, walkDepth + 1);
+            }
+            else
+            {
+                if (valueKind == optPropKind::OPK_ARRAYLEN)
+                {
+                    value = getArrayLengthFromAllocation(treeRhs);
+                    if (value != nullptr)
+                    {
+                        if (!value->IsCnsIntOrI())
+                        {
+                            // Leave out non-constant-sized array
+                            value = nullptr;
+                        }
+                    }
+                }
+                else if (valueKind == optPropKind::OPK_OBJ_GETTYPE)
+                {
+                    value = getObjectHandleNodeFromAllocation(treeRhs);
+                    if (value != nullptr)
+                    {
+                        if (!value->IsCnsIntOrI())
+                        {
+                            // Leave out non-constant-sized array
+                            value = nullptr;
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    return value;
+}
+
+//----------------------------------------------------------------
+// optFoldNullChecks: Try to find a GT_NULLCHECK node that can be folded into the GT_INDIR node.
+//
+// Arguments:
+//    tree           - The input GT_INDIR tree.
+//
+
+void Compiler::optFoldNullCheck(GenTreePtr tree)
+{
+    //
+    // Check for a pattern like this:
+    //
+    //                         =
+    //                       /   \
+    //                      x    comma
+    //                           /   \
+    //                     nullcheck  +
+    //                         |     / \
+    //                         y    y  const
+    //
+    //
+    //                    some trees in the same
+    //                    basic block with
+    //                    no unsafe side effects
+    //
+    //                           indir
+    //                             |
+    //                             x
+    //
+    // where the const is suitably small
+    // and transform it into
+    //
+    //                         =
+    //                       /   \
+    //                      x     +
+    //                           / \
+    //                          y  const
+    //
+    //
+    //              some trees with no unsafe side effects here
+    //
+    //                           indir
+    //                             |
+    //                             x
+
+    assert(tree->OperGet() == GT_IND);
+    if (tree->gtGetOp1()->OperGet() == GT_LCL_VAR)
+    {
+        // Check if we have the pattern above and find the nullcheck node if we do.
+
+        // Find the definition of the indirected local (x in the picture)
+        GenTreePtr indLocalTree = tree->gtGetOp1();
+        unsigned   lclNum       = indLocalTree->AsLclVarCommon()->GetLclNum();
+        unsigned   ssaNum       = indLocalTree->AsLclVarCommon()->GetSsaNum();
+
+        if (ssaNum != SsaConfig::RESERVED_SSA_NUM)
+        {
+            DefLoc      defLoc   = lvaTable[lclNum].GetPerSsaData(ssaNum)->m_defLoc;
+            BasicBlock* defBlock = defLoc.m_blk;
+
+            if (compCurBB == defBlock)
+            {
+                GenTreePtr defTree   = defLoc.m_tree;
+                GenTreePtr defParent = defTree->gtGetParent(nullptr);
+
+                if ((defParent->OperGet() == GT_ASG) && (defParent->gtNext == nullptr))
+                {
+                    GenTreePtr defRHS = defParent->gtGetOp2();
+                    if (defRHS->OperGet() == GT_COMMA)
+                    {
+                        if (defRHS->gtGetOp1()->OperGet() == GT_NULLCHECK)
+                        {
+                            GenTreePtr nullCheckTree = defRHS->gtGetOp1();
+                            if (nullCheckTree->gtGetOp1()->OperGet() == GT_LCL_VAR)
+                            {
+                                // We found a candidate for 'y' in the picture
+                                unsigned nullCheckLclNum = nullCheckTree->gtGetOp1()->AsLclVarCommon()->GetLclNum();
+
+                                if (defRHS->gtGetOp2()->OperGet() == GT_ADD)
+                                {
+                                    GenTreePtr additionNode = defRHS->gtGetOp2();
+                                    if ((additionNode->gtGetOp1()->OperGet() == GT_LCL_VAR) &&
+                                        (additionNode->gtGetOp1()->gtLclVarCommon.gtLclNum == nullCheckLclNum))
+                                    {
+                                        GenTreePtr offset = additionNode->gtGetOp2();
+                                        if (offset->IsCnsIntOrI())
+                                        {
+                                            if (!fgIsBigOffset(offset->gtIntConCommon.IconValue()))
+                                            {
+                                                // Walk from the use to the def in reverse execution order to see
+                                                // if any nodes have unsafe side effects.
+                                                GenTreePtr     currentTree        = indLocalTree->gtPrev;
+                                                bool           isInsideTry        = compCurBB->hasTryIndex();
+                                                bool           canRemoveNullCheck = true;
+                                                const unsigned maxNodesWalked     = 25;
+                                                unsigned       nodesWalked        = 0;
+
+                                                // First walk the nodes in the statement containing the indirection
+                                                // in reverse execution order starting with the indirection's
+                                                // predecessor.
+                                                while (canRemoveNullCheck && (currentTree != nullptr))
+                                                {
+                                                    if ((nodesWalked++ > maxNodesWalked) ||
+                                                        !optCanMoveNullCheckPastTree(currentTree, isInsideTry))
+                                                    {
+                                                        canRemoveNullCheck = false;
+                                                    }
+                                                    else
+                                                    {
+                                                        currentTree = currentTree->gtPrev;
+                                                    }
+                                                }
+
+                                                // Then walk the statement list in reverse execution order
+                                                // until we get to the statement containing the null check.
+                                                // We only need to check the side effects at the root of each statement.
+                                                GenTreePtr curStmt = compCurStmt->gtPrev;
+                                                currentTree        = curStmt->gtStmt.gtStmtExpr;
+                                                while (canRemoveNullCheck && (currentTree != defParent))
+                                                {
+                                                    if ((nodesWalked++ > maxNodesWalked) ||
+                                                        !optCanMoveNullCheckPastTree(currentTree, isInsideTry))
+                                                    {
+                                                        canRemoveNullCheck = false;
+                                                    }
+                                                    else
+                                                    {
+                                                        curStmt = curStmt->gtStmt.gtPrevStmt;
+                                                        assert(curStmt != nullptr);
+                                                        currentTree = curStmt->gtStmt.gtStmtExpr;
+                                                    }
+                                                }
+
+                                                if (canRemoveNullCheck)
+                                                {
+                                                    // Remove the null check
+                                                    nullCheckTree->gtFlags &= ~(GTF_EXCEPT | GTF_DONT_CSE);
+
+                                                    // Set this flag to prevent reordering
+                                                    nullCheckTree->gtFlags |= GTF_ORDER_SIDEEFF;
+
+                                                    defRHS->gtFlags &= ~(GTF_EXCEPT | GTF_DONT_CSE);
+                                                    defRHS->gtFlags |=
+                                                        additionNode->gtFlags & (GTF_EXCEPT | GTF_DONT_CSE);
+
+                                                    // Re-morph the statement.
+                                                    fgMorphBlockStmt(compCurBB, curStmt DEBUGARG("optFoldNullCheck"));
+
+                                                    // Recalculate the gtCostSz, etc...
+                                                    gtSetStmtInfo(curStmt);
+
+                                                    // Re-thread the nodes
+                                                    fgSetStmtSeq(curStmt);
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+//----------------------------------------------------------------
+// optCanMoveNullCheckPastTree: Check if GT_NULLCHECK can be folded into a node that
+//                              is after tree is execution order.
+//
+// Arguments:
+//    tree           - The input GT_INDIR tree.
+//    isInsideTry    - True if tree is inside try, false otherwise
+//
+// Return Value:
+//    True if GT_NULLCHECK can be folded into a node that is after tree is execution order,
+//    false otherwise.
+
+bool Compiler::optCanMoveNullCheckPastTree(GenTreePtr tree, bool isInsideTry)
+{
+    bool result = true;
+    if (isInsideTry)
+    {
+        // We disallow calls, exception sources, and all assignments.
+        // Assignments to locals are disallowed inside try because
+        // they may be live in the handler.
+        if ((tree->gtFlags & GTF_SIDE_EFFECT) != 0)
+        {
+            result = false;
+        }
+    }
+    else
+    {
+        // We disallow calls, exception sources, and assignments to
+        // global memory.
+        if (GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS(tree->gtFlags))
+        {
+            result = false;
+        }
+    }
+    return result;
+}
+\ No newline at end of file
diff --git a/src/jit/ee_il_dll.cpp b/src/jit/ee_il_dll.cpp
new file mode 100755
index 0000000000..527244221e
--- /dev/null
+++ b/src/jit/ee_il_dll.cpp
@@ -0,0 +1,1552 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                            ee_jit.cpp                                     XX
+XX                                                                           XX
+XX   The functionality needed for the JIT DLL. Includes the DLL entry point  XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+#include "emit.h"
+#include "corexcep.h"
+
+#if !defined(PLATFORM_UNIX)
+#include <io.h>    // For _dup, _setmode
+#include <fcntl.h> // For _O_TEXT
+#include <errno.h> // For EINVAL
+#endif
+
+/*****************************************************************************/
+
+FILE* jitstdout = nullptr;
+
+ICorJitHost*   g_jitHost        = nullptr;
+static CILJit* ILJitter         = nullptr; // The one and only JITTER I return
+bool           g_jitInitialized = false;
+#ifndef FEATURE_MERGE_JIT_AND_ENGINE
+HINSTANCE g_hInst = nullptr;
+#endif // FEATURE_MERGE_JIT_AND_ENGINE
+
+/*****************************************************************************/
+
+#ifdef DEBUG
+
+JitOptions jitOpts = {
+    nullptr, // methodName
+    nullptr, // className
+    0.1,     // CGknob
+    0,       // testMask
+
+    (JitOptions*)nullptr // lastDummyField.
+};
+
+#endif // DEBUG
+
+/*****************************************************************************/
+
+extern "C" void __stdcall jitStartup(ICorJitHost* jitHost)
+{
+    if (g_jitInitialized)
+    {
+        return;
+    }
+
+    g_jitHost = jitHost;
+
+    assert(!JitConfig.isInitialized());
+    JitConfig.initialize(jitHost);
+
+#if defined(PLATFORM_UNIX)
+    jitstdout = procstdout();
+#else
+    if (jitstdout == nullptr)
+    {
+        int stdoutFd = _fileno(procstdout());
+        // Check fileno error output(s) -1 may overlap with errno result
+        // but is included for completness.
+        // We want to detect the case where the initial handle is null
+        // or bogus and avoid making further calls.
+        if ((stdoutFd != -1) && (stdoutFd != -2) && (errno != EINVAL))
+        {
+            int jitstdoutFd = _dup(_fileno(procstdout()));
+            // Check the error status returned by dup.
+            if (jitstdoutFd != -1)
+            {
+                _setmode(jitstdoutFd, _O_TEXT);
+                jitstdout = _fdopen(jitstdoutFd, "w");
+                assert(jitstdout != nullptr);
+
+                // Prevent the FILE* from buffering its output in order to avoid calls to
+                // `fflush()` throughout the code.
+                setvbuf(jitstdout, nullptr, _IONBF, 0);
+            }
+        }
+    }
+
+    // If jitstdout is still null, fallback to whatever procstdout() was
+    // initially set to.
+    if (jitstdout == nullptr)
+    {
+        jitstdout = procstdout();
+    }
+#endif // PLATFORM_UNIX
+
+#ifdef FEATURE_TRACELOGGING
+    JitTelemetry::NotifyDllProcessAttach();
+#endif
+    Compiler::compStartup();
+
+    g_jitInitialized = true;
+}
+
+void jitShutdown()
+{
+    if (!g_jitInitialized)
+    {
+        return;
+    }
+
+    Compiler::compShutdown();
+
+    if (jitstdout != procstdout())
+    {
+        fclose(jitstdout);
+    }
+
+#ifdef FEATURE_TRACELOGGING
+    JitTelemetry::NotifyDllProcessDetach();
+#endif
+}
+
+#ifndef FEATURE_MERGE_JIT_AND_ENGINE
+
+extern "C" BOOL WINAPI DllMain(HANDLE hInstance, DWORD dwReason, LPVOID pvReserved)
+{
+    if (dwReason == DLL_PROCESS_ATTACH)
+    {
+        g_hInst = (HINSTANCE)hInstance;
+        DisableThreadLibraryCalls((HINSTANCE)hInstance);
+#if defined(SELF_NO_HOST) && COR_JIT_EE_VERSION <= 460
+        jitStartup(JitHost::getJitHost());
+#endif
+    }
+    else if (dwReason == DLL_PROCESS_DETACH)
+    {
+        jitShutdown();
+    }
+
+    return TRUE;
+}
+
+HINSTANCE GetModuleInst()
+{
+    return (g_hInst);
+}
+
+extern "C" void __stdcall sxsJitStartup(CoreClrCallbacks const& cccallbacks)
+{
+#ifndef SELF_NO_HOST
+    InitUtilcode(cccallbacks);
+#endif
+
+#if COR_JIT_EE_VERSION <= 460
+    jitStartup(JitHost::getJitHost());
+#endif
+}
+
+#endif // !FEATURE_MERGE_JIT_AND_ENGINE
+
+/*****************************************************************************/
+
+struct CILJitSingletonAllocator
+{
+    int x;
+};
+const CILJitSingletonAllocator CILJitSingleton = {0};
+
+void* __cdecl operator new(size_t, const CILJitSingletonAllocator&)
+{
+    static char CILJitBuff[sizeof(CILJit)];
+    return CILJitBuff;
+}
+
+ICorJitCompiler* g_realJitCompiler = nullptr;
+
+ICorJitCompiler* __stdcall getJit()
+{
+    if (ILJitter == nullptr)
+    {
+        ILJitter = new (CILJitSingleton) CILJit();
+    }
+    return (ILJitter);
+}
+
+/*****************************************************************************/
+
+// Information kept in thread-local storage. This is used in the noway_assert exceptional path.
+// If you are using it more broadly in retail code, you would need to understand the
+// performance implications of accessing TLS.
+//
+// If the JIT is being statically linked, these methods must be implemented by the consumer.
+#if !defined(FEATURE_MERGE_JIT_AND_ENGINE) || !defined(FEATURE_IMPLICIT_TLS)
+
+__declspec(thread) void* gJitTls = nullptr;
+
+static void* GetJitTls()
+{
+    return gJitTls;
+}
+
+void SetJitTls(void* value)
+{
+    gJitTls = value;
+}
+
+#else // !defined(FEATURE_MERGE_JIT_AND_ENGINE) || !defined(FEATURE_IMPLICIT_TLS)
+
+extern "C" {
+void* GetJitTls();
+void SetJitTls(void* value);
+}
+
+#endif // // defined(FEATURE_MERGE_JIT_AND_ENGINE) && defined(FEATURE_IMPLICIT_TLS)
+
+#if defined(DEBUG)
+
+JitTls::JitTls(ICorJitInfo* jitInfo) : m_compiler(nullptr), m_logEnv(jitInfo)
+{
+    m_next = reinterpret_cast<JitTls*>(GetJitTls());
+    SetJitTls(this);
+}
+
+JitTls::~JitTls()
+{
+    SetJitTls(m_next);
+}
+
+LogEnv* JitTls::GetLogEnv()
+{
+    return &reinterpret_cast<JitTls*>(GetJitTls())->m_logEnv;
+}
+
+Compiler* JitTls::GetCompiler()
+{
+    return reinterpret_cast<JitTls*>(GetJitTls())->m_compiler;
+}
+
+void JitTls::SetCompiler(Compiler* compiler)
+{
+    reinterpret_cast<JitTls*>(GetJitTls())->m_compiler = compiler;
+}
+
+#else // defined(DEBUG)
+
+JitTls::JitTls(ICorJitInfo* jitInfo)
+{
+}
+
+JitTls::~JitTls()
+{
+}
+
+Compiler* JitTls::GetCompiler()
+{
+    return reinterpret_cast<Compiler*>(GetJitTls());
+}
+
+void JitTls::SetCompiler(Compiler* compiler)
+{
+    SetJitTls(compiler);
+}
+
+#endif // !defined(DEBUG)
+
+//****************************************************************************
+// The main JIT function for the 32 bit JIT.  See code:ICorJitCompiler#EEToJitInterface for more on the EE-JIT
+// interface. Things really don't get going inside the JIT until the code:Compiler::compCompile#Phases
+// method.  Usually that is where you want to go.
+
+CorJitResult CILJit::compileMethod(
+    ICorJitInfo* compHnd, CORINFO_METHOD_INFO* methodInfo, unsigned flags, BYTE** entryAddress, ULONG* nativeSizeOfCode)
+{
+    if (g_realJitCompiler != nullptr)
+    {
+        return g_realJitCompiler->compileMethod(compHnd, methodInfo, flags, entryAddress, nativeSizeOfCode);
+    }
+
+    CORJIT_FLAGS jitFlags = {0};
+
+    DWORD jitFlagsSize = 0;
+#if COR_JIT_EE_VERSION > 460
+    if (flags == CORJIT_FLG_CALL_GETJITFLAGS)
+    {
+        jitFlagsSize = compHnd->getJitFlags(&jitFlags, sizeof(jitFlags));
+    }
+#endif
+
+    assert(jitFlagsSize <= sizeof(jitFlags));
+    if (jitFlagsSize == 0)
+    {
+        jitFlags.corJitFlags = flags;
+    }
+
+    int                   result;
+    void*                 methodCodePtr = nullptr;
+    CORINFO_METHOD_HANDLE methodHandle  = methodInfo->ftn;
+
+    JitTls jitTls(compHnd); // Initialize any necessary thread-local state
+
+    assert(methodInfo->ILCode);
+
+    result = jitNativeCode(methodHandle, methodInfo->scope, compHnd, methodInfo, &methodCodePtr, nativeSizeOfCode,
+                           &jitFlags, nullptr);
+
+    if (result == CORJIT_OK)
+    {
+        *entryAddress = (BYTE*)methodCodePtr;
+    }
+
+    return CorJitResult(result);
+}
+
+/*****************************************************************************
+ * Notification from VM to clear any caches
+ */
+void CILJit::clearCache(void)
+{
+    if (g_realJitCompiler != nullptr)
+    {
+        g_realJitCompiler->clearCache();
+        // Continue...
+    }
+
+    return;
+}
+
+/*****************************************************************************
+ * Notify vm that we have something to clean up
+ */
+BOOL CILJit::isCacheCleanupRequired(void)
+{
+    BOOL doCleanup;
+
+    if (g_realJitCompiler != nullptr)
+    {
+        if (g_realJitCompiler->isCacheCleanupRequired())
+        {
+            return TRUE;
+        }
+        // Continue...
+    }
+
+    return FALSE;
+}
+
+void CILJit::ProcessShutdownWork(ICorStaticInfo* statInfo)
+{
+    if (g_realJitCompiler != nullptr)
+    {
+        g_realJitCompiler->ProcessShutdownWork(statInfo);
+        // Continue, by shutting down this JIT as well.
+    }
+
+#ifdef FEATURE_MERGE_JIT_AND_ENGINE
+    jitShutdown();
+#endif
+
+    Compiler::ProcessShutdownWork(statInfo);
+}
+
+/*****************************************************************************
+ * Verify the JIT/EE interface identifier.
+ */
+void CILJit::getVersionIdentifier(GUID* versionIdentifier)
+{
+    if (g_realJitCompiler != nullptr)
+    {
+        g_realJitCompiler->getVersionIdentifier(versionIdentifier);
+        return;
+    }
+
+    assert(versionIdentifier != nullptr);
+    memcpy(versionIdentifier, &JITEEVersionIdentifier, sizeof(GUID));
+}
+
+/*****************************************************************************
+ * Determine the maximum length of SIMD vector supported by this JIT.
+ */
+unsigned CILJit::getMaxIntrinsicSIMDVectorLength(DWORD cpuCompileFlags)
+{
+    if (g_realJitCompiler != nullptr)
+    {
+        return g_realJitCompiler->getMaxIntrinsicSIMDVectorLength(cpuCompileFlags);
+    }
+
+#ifdef _TARGET_AMD64_
+#ifdef FEATURE_AVX_SUPPORT
+    if (((cpuCompileFlags & CORJIT_FLG_PREJIT) == 0) && ((cpuCompileFlags & CORJIT_FLG_FEATURE_SIMD) != 0) &&
+        ((cpuCompileFlags & CORJIT_FLG_USE_AVX2) != 0))
+    {
+        if (JitConfig.EnableAVX() != 0)
+        {
+            return 32;
+        }
+    }
+#endif // FEATURE_AVX_SUPPORT
+    return 16;
+#else  // !_TARGET_AMD64_
+    return 0;
+#endif // !_TARGET_AMD64_
+}
+
+void CILJit::setRealJit(ICorJitCompiler* realJitCompiler)
+{
+    g_realJitCompiler = realJitCompiler;
+}
+
+/*****************************************************************************
+ * Returns the number of bytes required for the given type argument
+ */
+
+unsigned Compiler::eeGetArgSize(CORINFO_ARG_LIST_HANDLE list, CORINFO_SIG_INFO* sig)
+{
+#if defined(_TARGET_AMD64_)
+
+    // Everything fits into a single 'slot' size
+    // to accommodate irregular sized structs, they are passed byref
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+    CORINFO_CLASS_HANDLE argClass;
+    CorInfoType          argTypeJit = strip(info.compCompHnd->getArgType(sig, list, &argClass));
+    var_types            argType    = JITtype2varType(argTypeJit);
+    if (varTypeIsStruct(argType))
+    {
+        unsigned structSize = info.compCompHnd->getClassSize(argClass);
+        return structSize; // TODO: roundUp() needed here?
+    }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+    return sizeof(size_t);
+
+#else // !_TARGET_AMD64_
+
+    CORINFO_CLASS_HANDLE argClass;
+    CorInfoType          argTypeJit = strip(info.compCompHnd->getArgType(sig, list, &argClass));
+    var_types            argType    = JITtype2varType(argTypeJit);
+
+    if (varTypeIsStruct(argType))
+    {
+        unsigned structSize = info.compCompHnd->getClassSize(argClass);
+
+        // make certain the EE passes us back the right thing for refanys
+        assert(argTypeJit != CORINFO_TYPE_REFANY || structSize == 2 * sizeof(void*));
+
+        // For each target that supports passing struct args in multiple registers
+        // apply the target specific rules for them here:
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if FEATURE_MULTIREG_ARGS
+#if defined(_TARGET_ARM64_)
+        // Any structs that are larger than MAX_PASS_MULTIREG_BYTES are always passed by reference
+        if (structSize > MAX_PASS_MULTIREG_BYTES)
+        {
+            // This struct is passed by reference using a single 'slot'
+            return TARGET_POINTER_SIZE;
+        }
+        else
+        {
+            // Is the struct larger than 16 bytes
+            if (structSize > (2 * TARGET_POINTER_SIZE))
+            {
+                var_types hfaType = GetHfaType(argClass); // set to float or double if it is an HFA, otherwise TYP_UNDEF
+                bool      isHfa   = (hfaType != TYP_UNDEF);
+                if (!isHfa)
+                {
+                    // This struct is passed by reference using a single 'slot'
+                    return TARGET_POINTER_SIZE;
+                }
+            }
+            // otherwise will we pass this struct by value in multiple registers
+        }
+#elif defined(_TARGET_ARM_)
+//  otherwise will we pass this struct by value in multiple registers
+#else
+        NYI("unknown target");
+#endif // defined(_TARGET_XXX_)
+#endif // FEATURE_MULTIREG_ARGS
+
+        // we pass this struct by value in multiple registers
+        return (unsigned)roundUp(structSize, TARGET_POINTER_SIZE);
+    }
+    else
+    {
+        unsigned argSize = sizeof(int) * genTypeStSz(argType);
+        assert(0 < argSize && argSize <= sizeof(__int64));
+        return (unsigned)roundUp(argSize, TARGET_POINTER_SIZE);
+    }
+#endif
+}
+
+/*****************************************************************************/
+
+GenTreePtr Compiler::eeGetPInvokeCookie(CORINFO_SIG_INFO* szMetaSig)
+{
+    void *cookie, *pCookie;
+    cookie = info.compCompHnd->GetCookieForPInvokeCalliSig(szMetaSig, &pCookie);
+    assert((cookie == nullptr) != (pCookie == nullptr));
+
+    return gtNewIconEmbHndNode(cookie, pCookie, GTF_ICON_PINVKI_HDL);
+}
+
+//------------------------------------------------------------------------
+// eeGetArrayDataOffset: Gets the offset of a SDArray's first element
+//
+// Arguments:
+//    type - The array element type
+//
+// Return Value:
+//    The offset to the first array element.
+
+unsigned Compiler::eeGetArrayDataOffset(var_types type)
+{
+    return varTypeIsGC(type) ? eeGetEEInfo()->offsetOfObjArrayData : offsetof(CORINFO_Array, u1Elems);
+}
+
+//------------------------------------------------------------------------
+// eeGetMDArrayDataOffset: Gets the offset of a MDArray's first element
+//
+// Arguments:
+//    type - The array element type
+//    rank - The array rank
+//
+// Return Value:
+//    The offset to the first array element.
+//
+// Assumptions:
+//    The rank should be greater than 0.
+
+unsigned Compiler::eeGetMDArrayDataOffset(var_types type, unsigned rank)
+{
+    assert(rank > 0);
+    // Note that below we're specifically using genTypeSize(TYP_INT) because array
+    // indices are not native int.
+    return eeGetArrayDataOffset(type) + 2 * genTypeSize(TYP_INT) * rank;
+}
+
+/*****************************************************************************/
+
+void Compiler::eeGetStmtOffsets()
+{
+    ULONG32                      offsetsCount;
+    DWORD*                       offsets;
+    ICorDebugInfo::BoundaryTypes offsetsImplicit;
+
+    info.compCompHnd->getBoundaries(info.compMethodHnd, &offsetsCount, &offsets, &offsetsImplicit);
+
+    /* Set the implicit boundaries */
+
+    info.compStmtOffsetsImplicit = (ICorDebugInfo::BoundaryTypes)offsetsImplicit;
+
+    /* Process the explicit boundaries */
+
+    info.compStmtOffsetsCount = 0;
+
+    if (offsetsCount == 0)
+    {
+        return;
+    }
+
+    info.compStmtOffsets = new (this, CMK_DebugInfo) IL_OFFSET[offsetsCount];
+
+    for (unsigned i = 0; i < offsetsCount; i++)
+    {
+        if (offsets[i] > info.compILCodeSize)
+        {
+            continue;
+        }
+
+        info.compStmtOffsets[info.compStmtOffsetsCount] = offsets[i];
+        info.compStmtOffsetsCount++;
+    }
+
+    info.compCompHnd->freeArray(offsets);
+}
+
+/*****************************************************************************
+ *
+ *                  Debugging support - Local var info
+ */
+
+void Compiler::eeSetLVcount(unsigned count)
+{
+    assert(opts.compScopeInfo);
+
+    JITDUMP("VarLocInfo count is %d\n", count);
+
+    eeVarsCount = count;
+    if (eeVarsCount)
+    {
+        eeVars = (VarResultInfo*)info.compCompHnd->allocateArray(eeVarsCount * sizeof(eeVars[0]));
+    }
+    else
+    {
+        eeVars = nullptr;
+    }
+}
+
+void Compiler::eeSetLVinfo(unsigned                  which,
+                           UNATIVE_OFFSET            startOffs,
+                           UNATIVE_OFFSET            length,
+                           unsigned                  varNum,
+                           unsigned                  LVnum,
+                           VarName                   name,
+                           bool                      avail,
+                           const Compiler::siVarLoc& varLoc)
+{
+    // ICorDebugInfo::VarLoc and Compiler::siVarLoc have to overlap
+    // This is checked in siInit()
+
+    assert(opts.compScopeInfo);
+    assert(eeVarsCount > 0);
+    assert(which < eeVarsCount);
+
+    if (eeVars != nullptr)
+    {
+        eeVars[which].startOffset = startOffs;
+        eeVars[which].endOffset   = startOffs + length;
+        eeVars[which].varNumber   = varNum;
+        eeVars[which].loc         = varLoc;
+    }
+}
+
+void Compiler::eeSetLVdone()
+{
+    // necessary but not sufficient condition that the 2 struct definitions overlap
+    assert(sizeof(eeVars[0]) == sizeof(ICorDebugInfo::NativeVarInfo));
+    assert(opts.compScopeInfo);
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        eeDispVars(info.compMethodHnd, eeVarsCount, (ICorDebugInfo::NativeVarInfo*)eeVars);
+    }
+#endif // DEBUG
+
+    info.compCompHnd->setVars(info.compMethodHnd, eeVarsCount, (ICorDebugInfo::NativeVarInfo*)eeVars);
+
+    eeVars = nullptr; // We give up ownership after setVars()
+}
+
+void Compiler::eeGetVars()
+{
+    ICorDebugInfo::ILVarInfo* varInfoTable;
+    ULONG32                   varInfoCount;
+    bool                      extendOthers;
+
+    info.compCompHnd->getVars(info.compMethodHnd, &varInfoCount, &varInfoTable, &extendOthers);
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("getVars() returned cVars = %d, extendOthers = %s\n", varInfoCount, extendOthers ? "true" : "false");
+    }
+#endif
+
+    // Over allocate in case extendOthers is set.
+
+    SIZE_T varInfoCountExtra = varInfoCount;
+    if (extendOthers)
+    {
+        varInfoCountExtra += info.compLocalsCount;
+    }
+
+    if (varInfoCountExtra == 0)
+    {
+        return;
+    }
+
+    info.compVarScopes = new (this, CMK_DebugInfo) VarScopeDsc[varInfoCountExtra];
+
+    VarScopeDsc*              localVarPtr = info.compVarScopes;
+    ICorDebugInfo::ILVarInfo* v           = varInfoTable;
+
+    for (unsigned i = 0; i < varInfoCount; i++, v++)
+    {
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("var:%d start:%d end:%d\n", v->varNumber, v->startOffset, v->endOffset);
+        }
+#endif
+
+        if (v->startOffset >= v->endOffset)
+        {
+            continue;
+        }
+
+        assert(v->startOffset <= info.compILCodeSize);
+        assert(v->endOffset <= info.compILCodeSize);
+
+        localVarPtr->vsdLifeBeg = v->startOffset;
+        localVarPtr->vsdLifeEnd = v->endOffset;
+        localVarPtr->vsdLVnum   = i;
+        localVarPtr->vsdVarNum  = compMapILvarNum(v->varNumber);
+
+#ifdef DEBUG
+        localVarPtr->vsdName = gtGetLclVarName(localVarPtr->vsdVarNum);
+#endif
+
+        localVarPtr++;
+        info.compVarScopesCount++;
+    }
+
+    /* If extendOthers is set, then assume the scope of unreported vars
+       is the entire method. Note that this will cause fgExtendDbgLifetimes()
+       to zero-initalize all of them. This will be expensive if it's used
+       for too many variables.
+     */
+    if (extendOthers)
+    {
+        // Allocate a bit-array for all the variables and initialize to false
+
+        bool*    varInfoProvided = (bool*)compGetMemA(info.compLocalsCount * sizeof(varInfoProvided[0]));
+        unsigned i;
+        for (i = 0; i < info.compLocalsCount; i++)
+        {
+            varInfoProvided[i] = false;
+        }
+
+        // Find which vars have absolutely no varInfo provided
+
+        for (i = 0; i < info.compVarScopesCount; i++)
+        {
+            varInfoProvided[info.compVarScopes[i].vsdVarNum] = true;
+        }
+
+        // Create entries for the variables with no varInfo
+
+        for (unsigned varNum = 0; varNum < info.compLocalsCount; varNum++)
+        {
+            if (varInfoProvided[varNum])
+            {
+                continue;
+            }
+
+            // Create a varInfo with scope over the entire method
+
+            localVarPtr->vsdLifeBeg = 0;
+            localVarPtr->vsdLifeEnd = info.compILCodeSize;
+            localVarPtr->vsdVarNum  = varNum;
+            localVarPtr->vsdLVnum   = info.compVarScopesCount;
+
+#ifdef DEBUG
+            localVarPtr->vsdName = gtGetLclVarName(localVarPtr->vsdVarNum);
+#endif
+
+            localVarPtr++;
+            info.compVarScopesCount++;
+        }
+    }
+
+    assert(localVarPtr <= info.compVarScopes + varInfoCountExtra);
+
+    if (varInfoCount != 0)
+    {
+        info.compCompHnd->freeArray(varInfoTable);
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        compDispLocalVars();
+    }
+#endif // DEBUG
+}
+
+#ifdef DEBUG
+void Compiler::eeDispVar(ICorDebugInfo::NativeVarInfo* var)
+{
+    const char* name = nullptr;
+
+    if (var->varNumber == (DWORD)ICorDebugInfo::VARARGS_HND_ILNUM)
+    {
+        name = "varargsHandle";
+    }
+    else if (var->varNumber == (DWORD)ICorDebugInfo::RETBUF_ILNUM)
+    {
+        name = "retBuff";
+    }
+    else if (var->varNumber == (DWORD)ICorDebugInfo::TYPECTXT_ILNUM)
+    {
+        name = "typeCtx";
+    }
+    printf("%3d(%10s) : From %08Xh to %08Xh, in ", var->varNumber,
+           (VarNameToStr(name) == nullptr) ? "UNKNOWN" : VarNameToStr(name), var->startOffset, var->endOffset);
+
+    switch (var->loc.vlType)
+    {
+        case VLT_REG:
+        case VLT_REG_BYREF:
+        case VLT_REG_FP:
+            printf("%s", getRegName(var->loc.vlReg.vlrReg));
+            if (var->loc.vlType == (ICorDebugInfo::VarLocType)VLT_REG_BYREF)
+            {
+                printf(" byref");
+            }
+            break;
+
+        case VLT_STK:
+        case VLT_STK_BYREF:
+            if ((int)var->loc.vlStk.vlsBaseReg != (int)ICorDebugInfo::REGNUM_AMBIENT_SP)
+            {
+                printf("%s[%d] (1 slot)", getRegName(var->loc.vlStk.vlsBaseReg), var->loc.vlStk.vlsOffset);
+            }
+            else
+            {
+                printf(STR_SPBASE "'[%d] (1 slot)", var->loc.vlStk.vlsOffset);
+            }
+            if (var->loc.vlType == (ICorDebugInfo::VarLocType)VLT_REG_BYREF)
+            {
+                printf(" byref");
+            }
+            break;
+
+#ifndef _TARGET_AMD64_
+        case VLT_REG_REG:
+            printf("%s-%s", getRegName(var->loc.vlRegReg.vlrrReg1), getRegName(var->loc.vlRegReg.vlrrReg2));
+            break;
+
+        case VLT_REG_STK:
+            if ((int)var->loc.vlRegStk.vlrsStk.vlrssBaseReg != (int)ICorDebugInfo::REGNUM_AMBIENT_SP)
+            {
+                printf("%s-%s[%d]", getRegName(var->loc.vlRegStk.vlrsReg),
+                       getRegName(var->loc.vlRegStk.vlrsStk.vlrssBaseReg), var->loc.vlRegStk.vlrsStk.vlrssOffset);
+            }
+            else
+            {
+                printf("%s-" STR_SPBASE "'[%d]", getRegName(var->loc.vlRegStk.vlrsReg),
+                       var->loc.vlRegStk.vlrsStk.vlrssOffset);
+            }
+            break;
+
+        case VLT_STK_REG:
+            unreached(); // unexpected
+
+        case VLT_STK2:
+            if ((int)var->loc.vlStk2.vls2BaseReg != (int)ICorDebugInfo::REGNUM_AMBIENT_SP)
+            {
+                printf("%s[%d] (2 slots)", getRegName(var->loc.vlStk2.vls2BaseReg), var->loc.vlStk2.vls2Offset);
+            }
+            else
+            {
+                printf(STR_SPBASE "'[%d] (2 slots)", var->loc.vlStk2.vls2Offset);
+            }
+            break;
+
+        case VLT_FPSTK:
+            printf("ST(L-%d)", var->loc.vlFPstk.vlfReg);
+            break;
+
+        case VLT_FIXED_VA:
+            printf("fxd_va[%d]", var->loc.vlFixedVarArg.vlfvOffset);
+            break;
+#endif // !_TARGET_AMD64_
+
+        default:
+            unreached(); // unexpected
+    }
+
+    printf("\n");
+}
+
+// Same parameters as ICorStaticInfo::setVars().
+void Compiler::eeDispVars(CORINFO_METHOD_HANDLE ftn, ULONG32 cVars, ICorDebugInfo::NativeVarInfo* vars)
+{
+    printf("*************** Variable debug info\n");
+    printf("%d vars\n", cVars);
+    for (unsigned i = 0; i < cVars; i++)
+    {
+        eeDispVar(&vars[i]);
+    }
+}
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ *                  Debugging support - Line number info
+ */
+
+void Compiler::eeSetLIcount(unsigned count)
+{
+    assert(opts.compDbgInfo);
+
+    eeBoundariesCount = count;
+    if (eeBoundariesCount)
+    {
+        eeBoundaries = (boundariesDsc*)info.compCompHnd->allocateArray(eeBoundariesCount * sizeof(eeBoundaries[0]));
+    }
+    else
+    {
+        eeBoundaries = nullptr;
+    }
+}
+
+void Compiler::eeSetLIinfo(
+    unsigned which, UNATIVE_OFFSET nativeOffset, IL_OFFSET ilOffset, bool stkEmpty, bool callInstruction)
+{
+    assert(opts.compDbgInfo);
+    assert(eeBoundariesCount > 0);
+    assert(which < eeBoundariesCount);
+
+    if (eeBoundaries != nullptr)
+    {
+        eeBoundaries[which].nativeIP     = nativeOffset;
+        eeBoundaries[which].ilOffset     = ilOffset;
+        eeBoundaries[which].sourceReason = stkEmpty ? ICorDebugInfo::STACK_EMPTY : 0;
+        eeBoundaries[which].sourceReason |= callInstruction ? ICorDebugInfo::CALL_INSTRUCTION : 0;
+    }
+}
+
+void Compiler::eeSetLIdone()
+{
+    assert(opts.compDbgInfo);
+
+#if defined(DEBUG)
+    if (verbose)
+    {
+        eeDispLineInfos();
+    }
+#endif // DEBUG
+
+    // necessary but not sufficient condition that the 2 struct definitions overlap
+    assert(sizeof(eeBoundaries[0]) == sizeof(ICorDebugInfo::OffsetMapping));
+
+    info.compCompHnd->setBoundaries(info.compMethodHnd, eeBoundariesCount, (ICorDebugInfo::OffsetMapping*)eeBoundaries);
+
+    eeBoundaries = nullptr; // we give up ownership after setBoundaries();
+}
+
+#if defined(DEBUG)
+
+/* static */
+void Compiler::eeDispILOffs(IL_OFFSET offs)
+{
+    const char* specialOffs[] = {"EPILOG", "PROLOG", "NO_MAP"};
+
+    switch ((int)offs) // Need the cast since offs is unsigned and the case statements are comparing to signed.
+    {
+        case ICorDebugInfo::EPILOG:
+        case ICorDebugInfo::PROLOG:
+        case ICorDebugInfo::NO_MAPPING:
+            assert(DWORD(ICorDebugInfo::EPILOG) + 1 == (unsigned)ICorDebugInfo::PROLOG);
+            assert(DWORD(ICorDebugInfo::EPILOG) + 2 == (unsigned)ICorDebugInfo::NO_MAPPING);
+            int specialOffsNum;
+            specialOffsNum = offs - DWORD(ICorDebugInfo::EPILOG);
+            printf("%s", specialOffs[specialOffsNum]);
+            break;
+        default:
+            printf("0x%04X", offs);
+    }
+}
+
+/* static */
+void Compiler::eeDispLineInfo(const boundariesDsc* line)
+{
+    printf("IL offs ");
+
+    eeDispILOffs(line->ilOffset);
+
+    printf(" : 0x%08X", line->nativeIP);
+    if (line->sourceReason != 0)
+    {
+        // It seems like it should probably never be zero since ICorDebugInfo::SOURCE_TYPE_INVALID is zero.
+        // However, the JIT has always generated this and printed "stack non-empty".
+
+        printf(" ( ");
+        if ((line->sourceReason & ICorDebugInfo::STACK_EMPTY) != 0)
+        {
+            printf("STACK_EMPTY ");
+        }
+        if ((line->sourceReason & ICorDebugInfo::CALL_INSTRUCTION) != 0)
+        {
+            printf("CALL_INSTRUCTION ");
+        }
+        if ((line->sourceReason & ICorDebugInfo::CALL_SITE) != 0)
+        {
+            printf("CALL_SITE ");
+        }
+        printf(")");
+    }
+    printf("\n");
+
+    // We don't expect to see any other bits.
+    assert((line->sourceReason & ~(ICorDebugInfo::STACK_EMPTY | ICorDebugInfo::CALL_INSTRUCTION)) == 0);
+}
+
+void Compiler::eeDispLineInfos()
+{
+    printf("IP mapping count : %d\n", eeBoundariesCount); // this might be zero
+    for (unsigned i = 0; i < eeBoundariesCount; i++)
+    {
+        eeDispLineInfo(&eeBoundaries[i]);
+    }
+    printf("\n");
+}
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ *                      ICorJitInfo wrapper functions
+ *
+ * In many cases here, we don't tell the VM about various unwind or EH information if
+ * we're an altjit for an unexpected architecture. If it's not a same architecture JIT
+ * (e.g., host AMD64, target ARM64), then VM will get confused anyway.
+ */
+
+void Compiler::eeReserveUnwindInfo(BOOL isFunclet, BOOL isColdCode, ULONG unwindSize)
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("reserveUnwindInfo(isFunclet=%s, isColdCode=%s, unwindSize=0x%x)\n", isFunclet ? "TRUE" : "FALSE",
+               isColdCode ? "TRUE" : "FALSE", unwindSize);
+    }
+#endif // DEBUG
+
+    if (info.compMatchedVM)
+    {
+        info.compCompHnd->reserveUnwindInfo(isFunclet, isColdCode, unwindSize);
+    }
+}
+
+void Compiler::eeAllocUnwindInfo(BYTE*          pHotCode,
+                                 BYTE*          pColdCode,
+                                 ULONG          startOffset,
+                                 ULONG          endOffset,
+                                 ULONG          unwindSize,
+                                 BYTE*          pUnwindBlock,
+                                 CorJitFuncKind funcKind)
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("allocUnwindInfo(pHotCode=0x%p, pColdCode=0x%p, startOffset=0x%x, endOffset=0x%x, unwindSize=0x%x, "
+               "pUnwindBlock=0x%p, funKind=%d",
+               dspPtr(pHotCode), dspPtr(pColdCode), startOffset, endOffset, unwindSize, dspPtr(pUnwindBlock), funcKind);
+        switch (funcKind)
+        {
+            case CORJIT_FUNC_ROOT:
+                printf(" (main function)");
+                break;
+            case CORJIT_FUNC_HANDLER:
+                printf(" (handler)");
+                break;
+            case CORJIT_FUNC_FILTER:
+                printf(" (filter)");
+                break;
+            default:
+                printf(" (ILLEGAL)");
+                break;
+        }
+        printf(")\n");
+    }
+#endif // DEBUG
+
+    if (info.compMatchedVM)
+    {
+        info.compCompHnd->allocUnwindInfo(pHotCode, pColdCode, startOffset, endOffset, unwindSize, pUnwindBlock,
+                                          funcKind);
+    }
+}
+
+void Compiler::eeSetEHcount(unsigned cEH)
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("setEHcount(cEH=%u)\n", cEH);
+    }
+#endif // DEBUG
+
+    if (info.compMatchedVM)
+    {
+        info.compCompHnd->setEHcount(cEH);
+    }
+}
+
+void Compiler::eeSetEHinfo(unsigned EHnumber, const CORINFO_EH_CLAUSE* clause)
+{
+#ifdef DEBUG
+    if (opts.dspEHTable)
+    {
+        dispOutgoingEHClause(EHnumber, *clause);
+    }
+#endif // DEBUG
+
+    if (info.compMatchedVM)
+    {
+        info.compCompHnd->setEHinfo(EHnumber, clause);
+    }
+}
+
+WORD Compiler::eeGetRelocTypeHint(void* target)
+{
+    if (info.compMatchedVM)
+    {
+        return info.compCompHnd->getRelocTypeHint(target);
+    }
+    else
+    {
+        // No hints
+        return (WORD)-1;
+    }
+}
+
+CORINFO_FIELD_HANDLE Compiler::eeFindJitDataOffs(unsigned dataOffs)
+{
+    // Data offsets are marked by the fact that the low two bits are 0b01 0x1
+    assert(dataOffs < 0x40000000);
+    return (CORINFO_FIELD_HANDLE)(size_t)((dataOffs << iaut_SHIFT) | iaut_DATA_OFFSET);
+}
+
+bool Compiler::eeIsJitDataOffs(CORINFO_FIELD_HANDLE field)
+{
+    // if 'field' is a jit data offset it has to fit into a 32-bit unsigned int
+    unsigned value = static_cast<unsigned>(reinterpret_cast<uintptr_t>(field));
+    if (((CORINFO_FIELD_HANDLE)(size_t)value) != field)
+    {
+        return false; // upper bits were set, not a jit data offset
+    }
+
+    // Data offsets are marked by the fact that the low two bits are 0b01 0x1
+    return (value & iaut_MASK) == iaut_DATA_OFFSET;
+}
+
+int Compiler::eeGetJitDataOffs(CORINFO_FIELD_HANDLE field)
+{
+    // Data offsets are marked by the fact that the low two bits are 0b01 0x1
+    if (eeIsJitDataOffs(field))
+    {
+        unsigned dataOffs = static_cast<unsigned>(reinterpret_cast<uintptr_t>(field));
+        assert(((CORINFO_FIELD_HANDLE)(size_t)dataOffs) == field);
+        assert(dataOffs < 0x40000000);
+        return (static_cast<int>(reinterpret_cast<intptr_t>(field))) >> iaut_SHIFT;
+    }
+    else
+    {
+        return -1;
+    }
+}
+
+/*****************************************************************************
+ *
+ *                      ICorStaticInfo wrapper functions
+ */
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+#ifdef DEBUG
+void Compiler::dumpSystemVClassificationType(SystemVClassificationType ct)
+{
+    switch (ct)
+    {
+        case SystemVClassificationTypeUnknown:
+            printf("UNKNOWN");
+            break;
+        case SystemVClassificationTypeStruct:
+            printf("Struct");
+            break;
+        case SystemVClassificationTypeNoClass:
+            printf("NoClass");
+            break;
+        case SystemVClassificationTypeMemory:
+            printf("Memory");
+            break;
+        case SystemVClassificationTypeInteger:
+            printf("Integer");
+            break;
+        case SystemVClassificationTypeIntegerReference:
+            printf("IntegerReference");
+            break;
+        case SystemVClassificationTypeIntegerByRef:
+            printf("IntegerByReference");
+            break;
+        case SystemVClassificationTypeSSE:
+            printf("SSE");
+            break;
+        default:
+            printf("ILLEGAL");
+            break;
+    }
+}
+#endif // DEBUG
+
+void Compiler::eeGetSystemVAmd64PassStructInRegisterDescriptor(
+    /*IN*/ CORINFO_CLASS_HANDLE                                  structHnd,
+    /*OUT*/ SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* structPassInRegDescPtr)
+{
+    bool ok = info.compCompHnd->getSystemVAmd64PassStructInRegisterDescriptor(structHnd, structPassInRegDescPtr);
+    noway_assert(ok);
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("**** getSystemVAmd64PassStructInRegisterDescriptor(0x%x (%s), ...) =>\n", dspPtr(structHnd),
+               eeGetClassName(structHnd));
+        printf("        passedInRegisters = %s\n", dspBool(structPassInRegDescPtr->passedInRegisters));
+        if (structPassInRegDescPtr->passedInRegisters)
+        {
+            printf("        eightByteCount   = %d\n", structPassInRegDescPtr->eightByteCount);
+            for (unsigned int i = 0; i < structPassInRegDescPtr->eightByteCount; i++)
+            {
+                printf("        eightByte #%d -- classification: ", i);
+                dumpSystemVClassificationType(structPassInRegDescPtr->eightByteClassifications[i]);
+                printf(", byteSize: %d, byteOffset: %d\n", structPassInRegDescPtr->eightByteSizes[i],
+                       structPassInRegDescPtr->eightByteOffsets[i]);
+            }
+        }
+    }
+#endif // DEBUG
+}
+
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+#if COR_JIT_EE_VERSION <= 460
+
+// Validate the token to determine whether to turn the bad image format exception into
+// verification failure (for backward compatibility)
+static bool isValidTokenForTryResolveToken(ICorJitInfo* corInfo, CORINFO_RESOLVED_TOKEN* resolvedToken)
+{
+    if (!corInfo->isValidToken(resolvedToken->tokenScope, resolvedToken->token))
+        return false;
+
+    CorInfoTokenKind tokenType = resolvedToken->tokenType;
+    switch (TypeFromToken(resolvedToken->token))
+    {
+        case mdtModuleRef:
+        case mdtTypeDef:
+        case mdtTypeRef:
+        case mdtTypeSpec:
+            if ((tokenType & CORINFO_TOKENKIND_Class) == 0)
+                return false;
+            break;
+
+        case mdtMethodDef:
+        case mdtMethodSpec:
+            if ((tokenType & CORINFO_TOKENKIND_Method) == 0)
+                return false;
+            break;
+
+        case mdtFieldDef:
+            if ((tokenType & CORINFO_TOKENKIND_Field) == 0)
+                return false;
+            break;
+
+        case mdtMemberRef:
+            if ((tokenType & (CORINFO_TOKENKIND_Method | CORINFO_TOKENKIND_Field)) == 0)
+                return false;
+            break;
+
+        default:
+            return false;
+    }
+
+    return true;
+}
+
+// This type encapsulates the information necessary for `TryResolveTokenFilter` and
+// `eeTryResolveToken` below.
+struct TryResolveTokenFilterParam
+{
+    ICorJitInfo*            m_corInfo;
+    CORINFO_RESOLVED_TOKEN* m_resolvedToken;
+    EXCEPTION_POINTERS      m_exceptionPointers;
+    bool                    m_success;
+};
+
+LONG TryResolveTokenFilter(struct _EXCEPTION_POINTERS* exceptionPointers, void* theParam)
+{
+    assert(exceptionPointers->ExceptionRecord->ExceptionCode != SEH_VERIFICATION_EXCEPTION);
+
+    // Backward compatibility: Convert bad image format exceptions thrown by the EE while resolving token to
+    // verification exceptions if we are verifying. Verification exceptions will cause the JIT of the basic block to
+    // fail, but the JITing of the whole method is still going to succeed. This is done for backward compatibility only.
+    // Ideally, we would always treat bad tokens in the IL stream as fatal errors.
+    if (exceptionPointers->ExceptionRecord->ExceptionCode == EXCEPTION_COMPLUS)
+    {
+        auto* param = reinterpret_cast<TryResolveTokenFilterParam*>(theParam);
+        if (!isValidTokenForTryResolveToken(param->m_corInfo, param->m_resolvedToken))
+        {
+            param->m_exceptionPointers = *exceptionPointers;
+            return param->m_corInfo->FilterException(exceptionPointers);
+        }
+    }
+
+    return EXCEPTION_CONTINUE_SEARCH;
+}
+
+bool Compiler::eeTryResolveToken(CORINFO_RESOLVED_TOKEN* resolvedToken)
+{
+    TryResolveTokenFilterParam param;
+    param.m_corInfo       = info.compCompHnd;
+    param.m_resolvedToken = resolvedToken;
+    param.m_success       = true;
+
+    PAL_TRY(TryResolveTokenFilterParam*, pParam, &param)
+    {
+        pParam->m_corInfo->resolveToken(pParam->m_resolvedToken);
+    }
+    PAL_EXCEPT_FILTER(TryResolveTokenFilter)
+    {
+        if (param.m_exceptionPointers.ExceptionRecord->ExceptionCode == EXCEPTION_COMPLUS)
+        {
+            param.m_corInfo->HandleException(&param.m_exceptionPointers);
+        }
+
+        param.m_success = false;
+    }
+    PAL_ENDTRY
+
+    return param.m_success;
+}
+
+struct TrapParam
+{
+    ICorJitInfo*       m_corInfo;
+    EXCEPTION_POINTERS m_exceptionPointers;
+
+    void (*m_function)(void*);
+    void* m_param;
+    bool  m_success;
+};
+
+static LONG __EEFilter(PEXCEPTION_POINTERS exceptionPointers, void* param)
+{
+    auto* trapParam                = reinterpret_cast<TrapParam*>(param);
+    trapParam->m_exceptionPointers = *exceptionPointers;
+    return trapParam->m_corInfo->FilterException(exceptionPointers);
+}
+
+bool Compiler::eeRunWithErrorTrapImp(void (*function)(void*), void* param)
+{
+    TrapParam trapParam;
+    trapParam.m_corInfo  = info.compCompHnd;
+    trapParam.m_function = function;
+    trapParam.m_param    = param;
+    trapParam.m_success  = true;
+
+    PAL_TRY(TrapParam*, __trapParam, &trapParam)
+    {
+        __trapParam->m_function(__trapParam->m_param);
+    }
+    PAL_EXCEPT_FILTER(__EEFilter)
+    {
+        trapParam.m_corInfo->HandleException(&trapParam.m_exceptionPointers);
+        trapParam.m_success = false;
+    }
+    PAL_ENDTRY
+
+    return trapParam.m_success;
+}
+
+#else // CORJIT_EE_VER <= 460
+
+bool Compiler::eeTryResolveToken(CORINFO_RESOLVED_TOKEN* resolvedToken)
+{
+    return info.compCompHnd->tryResolveToken(resolvedToken);
+}
+
+bool Compiler::eeRunWithErrorTrapImp(void (*function)(void*), void* param)
+{
+    return info.compCompHnd->runWithErrorTrap(function, param);
+}
+
+#endif // CORJIT_EE_VER > 460
+
+/*****************************************************************************
+ *
+ *                      Utility functions
+ */
+
+#if defined(DEBUG) || defined(FEATURE_JIT_METHOD_PERF) || defined(FEATURE_SIMD)
+
+/*****************************************************************************/
+
+// static helper names - constant array
+const char* jitHlpFuncTable[CORINFO_HELP_COUNT] = {
+#define JITHELPER(code, pfnHelper, sig) #code,
+#define DYNAMICJITHELPER(code, pfnHelper, sig) #code,
+#include "jithelpers.h"
+};
+
+/*****************************************************************************
+*
+*  Filter wrapper to handle exception filtering.
+*  On Unix compilers don't support SEH.
+*/
+
+struct FilterSuperPMIExceptionsParam_ee_il
+{
+    Compiler*             pThis;
+    Compiler::Info*       pJitInfo;
+    CORINFO_FIELD_HANDLE  field;
+    CORINFO_METHOD_HANDLE method;
+    CORINFO_CLASS_HANDLE  clazz;
+    const char**          classNamePtr;
+    const char*           fieldOrMethodOrClassNamePtr;
+    EXCEPTION_POINTERS    exceptionPointers;
+};
+
+static LONG FilterSuperPMIExceptions_ee_il(PEXCEPTION_POINTERS pExceptionPointers, LPVOID lpvParam)
+{
+    FilterSuperPMIExceptionsParam_ee_il* pSPMIEParam = (FilterSuperPMIExceptionsParam_ee_il*)lpvParam;
+    pSPMIEParam->exceptionPointers                   = *pExceptionPointers;
+
+    if (pSPMIEParam->pThis->IsSuperPMIException(pExceptionPointers->ExceptionRecord->ExceptionCode))
+    {
+        return EXCEPTION_EXECUTE_HANDLER;
+    }
+
+    return EXCEPTION_CONTINUE_SEARCH;
+}
+
+const char* Compiler::eeGetMethodName(CORINFO_METHOD_HANDLE method, const char** classNamePtr)
+{
+    if (eeGetHelperNum(method))
+    {
+        if (classNamePtr != nullptr)
+        {
+            *classNamePtr = "HELPER";
+        }
+        CorInfoHelpFunc ftnNum = eeGetHelperNum(method);
+        const char*     name   = info.compCompHnd->getHelperName(ftnNum);
+
+        // If it's something unknown from a RET VM, or from SuperPMI, then use our own helper name table.
+        if ((strcmp(name, "AnyJITHelper") == 0) || (strcmp(name, "Yickish helper name") == 0))
+        {
+            if (ftnNum < CORINFO_HELP_COUNT)
+            {
+                name = jitHlpFuncTable[ftnNum];
+            }
+        }
+        return name;
+    }
+
+    if (eeIsNativeMethod(method))
+    {
+        if (classNamePtr != nullptr)
+        {
+            *classNamePtr = "NATIVE";
+        }
+        method = eeGetMethodHandleForNative(method);
+    }
+
+    FilterSuperPMIExceptionsParam_ee_il param;
+
+    param.pThis        = this;
+    param.pJitInfo     = &info;
+    param.method       = method;
+    param.classNamePtr = classNamePtr;
+
+    PAL_TRY(FilterSuperPMIExceptionsParam_ee_il*, pParam, &param)
+    {
+        pParam->fieldOrMethodOrClassNamePtr =
+            pParam->pJitInfo->compCompHnd->getMethodName(pParam->method, pParam->classNamePtr);
+    }
+    PAL_EXCEPT_FILTER(FilterSuperPMIExceptions_ee_il)
+    {
+        if (param.classNamePtr != nullptr)
+        {
+            *(param.classNamePtr) = "hackishClassName";
+        }
+
+        param.fieldOrMethodOrClassNamePtr = "hackishMethodName";
+    }
+    PAL_ENDTRY
+
+    return param.fieldOrMethodOrClassNamePtr;
+}
+
+const char* Compiler::eeGetFieldName(CORINFO_FIELD_HANDLE field, const char** classNamePtr)
+{
+    FilterSuperPMIExceptionsParam_ee_il param;
+
+    param.pThis        = this;
+    param.pJitInfo     = &info;
+    param.field        = field;
+    param.classNamePtr = classNamePtr;
+
+    PAL_TRY(FilterSuperPMIExceptionsParam_ee_il*, pParam, &param)
+    {
+        pParam->fieldOrMethodOrClassNamePtr =
+            pParam->pJitInfo->compCompHnd->getFieldName(pParam->field, pParam->classNamePtr);
+    }
+    PAL_EXCEPT_FILTER(FilterSuperPMIExceptions_ee_il)
+    {
+        param.fieldOrMethodOrClassNamePtr = "hackishFieldName";
+    }
+    PAL_ENDTRY
+
+    return param.fieldOrMethodOrClassNamePtr;
+}
+
+const char* Compiler::eeGetClassName(CORINFO_CLASS_HANDLE clsHnd)
+{
+    FilterSuperPMIExceptionsParam_ee_il param;
+
+    param.pThis    = this;
+    param.pJitInfo = &info;
+    param.clazz    = clsHnd;
+
+    PAL_TRY(FilterSuperPMIExceptionsParam_ee_il*, pParam, &param)
+    {
+        pParam->fieldOrMethodOrClassNamePtr = pParam->pJitInfo->compCompHnd->getClassName(pParam->clazz);
+    }
+    PAL_EXCEPT_FILTER(FilterSuperPMIExceptions_ee_il)
+    {
+        param.fieldOrMethodOrClassNamePtr = "hackishClassName";
+    }
+    PAL_ENDTRY
+    return param.fieldOrMethodOrClassNamePtr;
+}
+
+#endif // DEBUG || FEATURE_JIT_METHOD_PERF
+
+#ifdef DEBUG
+
+const wchar_t* Compiler::eeGetCPString(size_t strHandle)
+{
+    char buff[512 + sizeof(CORINFO_String)];
+
+    // make this bulletproof, so it works even if we are wrong.
+    if (ReadProcessMemory(GetCurrentProcess(), (void*)strHandle, buff, 4, nullptr) == 0)
+    {
+        return (nullptr);
+    }
+
+    CORINFO_String* asString = *((CORINFO_String**)strHandle);
+
+    if (ReadProcessMemory(GetCurrentProcess(), asString, buff, sizeof(buff), nullptr) == 0)
+    {
+        return (nullptr);
+    }
+
+    if (asString->stringLen >= 255 || asString->chars[asString->stringLen] != 0)
+    {
+        return nullptr;
+    }
+
+    return (asString->chars);
+}
+
+#endif // DEBUG
diff --git a/src/jit/ee_il_dll.hpp b/src/jit/ee_il_dll.hpp
new file mode 100644
index 0000000000..d9bf95fde8
--- /dev/null
+++ b/src/jit/ee_il_dll.hpp
@@ -0,0 +1,204 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+extern ICorJitHost* g_jitHost;
+
+class CILJit : public ICorJitCompiler
+{
+    CorJitResult __stdcall compileMethod(ICorJitInfo*         comp,            /* IN */
+                                         CORINFO_METHOD_INFO* methodInfo,      /* IN */
+                                         unsigned             flags,           /* IN */
+                                         BYTE**               nativeEntry,     /* OUT */
+                                         ULONG*               nativeSizeOfCode /* OUT */
+                                         );
+
+    void clearCache(void);
+    BOOL isCacheCleanupRequired(void);
+
+    void ProcessShutdownWork(ICorStaticInfo* statInfo);
+
+    void getVersionIdentifier(GUID* versionIdentifier /* OUT */
+                              );
+
+    unsigned getMaxIntrinsicSIMDVectorLength(DWORD cpuCompileFlags);
+
+    void setRealJit(ICorJitCompiler* realJitCompiler);
+};
+
+/*****************************************************************************
+ *
+ *              Functions to get various handles
+ */
+
+FORCEINLINE
+void Compiler::eeGetCallInfo(CORINFO_RESOLVED_TOKEN* pResolvedToken,
+                             CORINFO_RESOLVED_TOKEN* pConstrainedToken,
+                             CORINFO_CALLINFO_FLAGS  flags,
+                             CORINFO_CALL_INFO*      pResult)
+{
+    info.compCompHnd->getCallInfo(pResolvedToken, pConstrainedToken, info.compMethodHnd, flags, pResult);
+}
+
+FORCEINLINE
+void Compiler::eeGetFieldInfo(CORINFO_RESOLVED_TOKEN* pResolvedToken,
+                              CORINFO_ACCESS_FLAGS    accessFlags,
+                              CORINFO_FIELD_INFO*     pResult)
+{
+    info.compCompHnd->getFieldInfo(pResolvedToken, info.compMethodHnd, accessFlags, pResult);
+}
+
+/*****************************************************************************
+ *
+ *          VOS info, method sigs, etc
+ */
+
+FORCEINLINE
+BOOL Compiler::eeIsValueClass(CORINFO_CLASS_HANDLE clsHnd)
+{
+    return info.compCompHnd->isValueClass(clsHnd);
+}
+
+FORCEINLINE
+void Compiler::eeGetSig(unsigned               sigTok,
+                        CORINFO_MODULE_HANDLE  scope,
+                        CORINFO_CONTEXT_HANDLE context,
+                        CORINFO_SIG_INFO*      retSig)
+{
+    info.compCompHnd->findSig(scope, sigTok, context, retSig);
+
+    assert(!varTypeIsComposite(JITtype2varType(retSig->retType)) || retSig->retTypeClass != nullptr);
+}
+
+FORCEINLINE
+void Compiler::eeGetMethodSig(CORINFO_METHOD_HANDLE methHnd, CORINFO_SIG_INFO* sigRet, CORINFO_CLASS_HANDLE owner)
+{
+    info.compCompHnd->getMethodSig(methHnd, sigRet, owner);
+
+    assert(!varTypeIsComposite(JITtype2varType(sigRet->retType)) || sigRet->retTypeClass != nullptr);
+}
+
+/**********************************************************************
+ * For varargs we need the number of arguments at the call site
+ */
+
+FORCEINLINE
+void Compiler::eeGetCallSiteSig(unsigned               sigTok,
+                                CORINFO_MODULE_HANDLE  scope,
+                                CORINFO_CONTEXT_HANDLE context,
+                                CORINFO_SIG_INFO*      sigRet)
+{
+    info.compCompHnd->findCallSiteSig(scope, sigTok, context, sigRet);
+
+    assert(!varTypeIsComposite(JITtype2varType(sigRet->retType)) || sigRet->retTypeClass != nullptr);
+}
+
+/*****************************************************************************/
+inline var_types Compiler::eeGetArgType(CORINFO_ARG_LIST_HANDLE list, CORINFO_SIG_INFO* sig)
+{
+    CORINFO_CLASS_HANDLE argClass;
+    return (JITtype2varType(strip(info.compCompHnd->getArgType(sig, list, &argClass))));
+}
+
+/*****************************************************************************/
+inline var_types Compiler::eeGetArgType(CORINFO_ARG_LIST_HANDLE list, CORINFO_SIG_INFO* sig, bool* isPinned)
+{
+    CORINFO_CLASS_HANDLE argClass;
+    CorInfoTypeWithMod   type = info.compCompHnd->getArgType(sig, list, &argClass);
+    *isPinned                 = ((type & ~CORINFO_TYPE_MASK) != 0);
+    return JITtype2varType(strip(type));
+}
+
+/*****************************************************************************
+ *
+ *                  Native Direct Optimizations
+ */
+
+inline CORINFO_EE_INFO* Compiler::eeGetEEInfo()
+{
+    if (!eeInfoInitialized)
+    {
+        info.compCompHnd->getEEInfo(&eeInfo);
+        eeInfoInitialized = true;
+    }
+
+    return &eeInfo;
+}
+
+/*****************************************************************************
+ *
+ *  Convert the type returned from the VM to a var_type.
+ */
+
+inline var_types JITtype2varType(CorInfoType type)
+{
+
+    static const unsigned char varTypeMap[CORINFO_TYPE_COUNT] = {
+        // see the definition of enum CorInfoType in file inc/corinfo.h
+        TYP_UNDEF,  // CORINFO_TYPE_UNDEF           = 0x0,
+        TYP_VOID,   // CORINFO_TYPE_VOID            = 0x1,
+        TYP_BOOL,   // CORINFO_TYPE_BOOL            = 0x2,
+        TYP_CHAR,   // CORINFO_TYPE_CHAR            = 0x3,
+        TYP_BYTE,   // CORINFO_TYPE_BYTE            = 0x4,
+        TYP_UBYTE,  // CORINFO_TYPE_UBYTE           = 0x5,
+        TYP_SHORT,  // CORINFO_TYPE_SHORT           = 0x6,
+        TYP_CHAR,   // CORINFO_TYPE_USHORT          = 0x7,
+        TYP_INT,    // CORINFO_TYPE_INT             = 0x8,
+        TYP_INT,    // CORINFO_TYPE_UINT            = 0x9,
+        TYP_LONG,   // CORINFO_TYPE_LONG            = 0xa,
+        TYP_LONG,   // CORINFO_TYPE_ULONG           = 0xb,
+        TYP_I_IMPL, // CORINFO_TYPE_NATIVEINT       = 0xc,
+        TYP_I_IMPL, // CORINFO_TYPE_NATIVEUINT      = 0xd,
+        TYP_FLOAT,  // CORINFO_TYPE_FLOAT           = 0xe,
+        TYP_DOUBLE, // CORINFO_TYPE_DOUBLE          = 0xf,
+        TYP_REF,    // CORINFO_TYPE_STRING          = 0x10,         // Not used, should remove
+        TYP_I_IMPL, // CORINFO_TYPE_PTR             = 0x11,
+        TYP_BYREF,  // CORINFO_TYPE_BYREF           = 0x12,
+        TYP_STRUCT, // CORINFO_TYPE_VALUECLASS      = 0x13,
+        TYP_REF,    // CORINFO_TYPE_CLASS           = 0x14,
+        TYP_STRUCT, // CORINFO_TYPE_REFANY          = 0x15,
+
+        // Generic type variables only appear when we're doing
+        // verification of generic code, in which case we're running
+        // in "import only" mode.  Annoyingly the "import only"
+        // mode of the JIT actually does a fair bit of compilation,
+        // so we have to trick the compiler into thinking it's compiling
+        // a real instantiation.  We do that by just pretending we're
+        // compiling the "object" instantiation of the code, i.e. by
+        // turing all generic type variables refs, except for a few
+        // choice places to do with verification, where we use
+        // verification types and CLASS_HANDLEs to track the difference.
+
+        TYP_REF, // CORINFO_TYPE_VAR             = 0x16,
+    };
+
+    // spot check to make certain enumerations have not changed
+
+    assert(varTypeMap[CORINFO_TYPE_CLASS] == TYP_REF);
+    assert(varTypeMap[CORINFO_TYPE_BYREF] == TYP_BYREF);
+    assert(varTypeMap[CORINFO_TYPE_PTR] == TYP_I_IMPL);
+    assert(varTypeMap[CORINFO_TYPE_INT] == TYP_INT);
+    assert(varTypeMap[CORINFO_TYPE_UINT] == TYP_INT);
+    assert(varTypeMap[CORINFO_TYPE_DOUBLE] == TYP_DOUBLE);
+    assert(varTypeMap[CORINFO_TYPE_VOID] == TYP_VOID);
+    assert(varTypeMap[CORINFO_TYPE_VALUECLASS] == TYP_STRUCT);
+    assert(varTypeMap[CORINFO_TYPE_REFANY] == TYP_STRUCT);
+
+    assert(type < CORINFO_TYPE_COUNT);
+    assert(varTypeMap[type] != TYP_UNDEF);
+
+    return ((var_types)varTypeMap[type]);
+};
+
+inline CORINFO_CALLINFO_FLAGS combine(CORINFO_CALLINFO_FLAGS flag1, CORINFO_CALLINFO_FLAGS flag2)
+{
+    return (CORINFO_CALLINFO_FLAGS)(flag1 | flag2);
+}
+inline CORINFO_CALLINFO_FLAGS Compiler::addVerifyFlag(CORINFO_CALLINFO_FLAGS flags)
+{
+    if (tiVerificationNeeded)
+    {
+        flags = combine(flags, CORINFO_CALLINFO_VERIFICATION);
+    }
+    return flags;
+}
diff --git a/src/jit/eeinterface.cpp b/src/jit/eeinterface.cpp
new file mode 100644
index 0000000000..d8db947f02
--- /dev/null
+++ b/src/jit/eeinterface.cpp
@@ -0,0 +1,212 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                          EEInterface                                      XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+// ONLY FUNCTIONS common to all variants of the JIT (EXE, DLL) should go here)
+// otherwise they belong in the corresponding directory.
+
+#include "jitpch.h"
+
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#if defined(DEBUG) || defined(FEATURE_JIT_METHOD_PERF) || defined(FEATURE_SIMD)
+
+#pragma warning(push)
+#pragma warning(disable : 4701) // difficult to get rid of C4701 with 'sig' below
+
+/*****************************************************************************/
+
+/*****************************************************************************
+*
+*  Filter wrapper to handle exception filtering.
+*  On Unix compilers don't support SEH.
+*/
+
+struct FilterSuperPMIExceptionsParam_eeinterface
+{
+    Compiler*               pThis;
+    Compiler::Info*         pJitInfo;
+    bool                    hasThis;
+    size_t                  siglength;
+    CORINFO_SIG_INFO        sig;
+    CORINFO_ARG_LIST_HANDLE argLst;
+    CORINFO_METHOD_HANDLE   hnd;
+    const char*             returnType;
+    EXCEPTION_POINTERS      exceptionPointers;
+};
+
+static LONG FilterSuperPMIExceptions_eeinterface(PEXCEPTION_POINTERS pExceptionPointers, LPVOID lpvParam)
+{
+    FilterSuperPMIExceptionsParam_eeinterface* pSPMIEParam = (FilterSuperPMIExceptionsParam_eeinterface*)lpvParam;
+    pSPMIEParam->exceptionPointers                         = *pExceptionPointers;
+
+    if (pSPMIEParam->pThis->IsSuperPMIException(pExceptionPointers->ExceptionRecord->ExceptionCode))
+    {
+        return EXCEPTION_EXECUTE_HANDLER;
+    }
+
+    return EXCEPTION_CONTINUE_SEARCH;
+}
+
+const char* Compiler::eeGetMethodFullName(CORINFO_METHOD_HANDLE hnd)
+{
+    const char* className;
+    const char* methodName = eeGetMethodName(hnd, &className);
+    if ((eeGetHelperNum(hnd) != CORINFO_HELP_UNDEF) || eeIsNativeMethod(hnd))
+    {
+        return methodName;
+    }
+
+    FilterSuperPMIExceptionsParam_eeinterface param;
+    param.returnType = nullptr;
+    param.pThis      = this;
+    param.hasThis    = false;
+    param.siglength  = 0;
+    param.hnd        = hnd;
+    param.pJitInfo   = &info;
+
+    size_t   length = 0;
+    unsigned i;
+
+    /* Generating the full signature is a two-pass process. First we have to walk
+       the components in order to assess the total size, then we allocate the buffer
+       and copy the elements into it.
+     */
+
+    /* Right now there is a race-condition in the EE, className can be nullptr */
+
+    /* initialize length with length of className and '.' */
+
+    if (className)
+    {
+        length = strlen(className) + 1;
+    }
+    else
+    {
+        assert(strlen("<NULL>.") == 7);
+        length = 7;
+    }
+
+    /* add length of methodName and opening bracket */
+    length += strlen(methodName) + 1;
+
+    /* figure out the signature */
+
+    EXCEPTION_POINTERS exceptionPointers;
+
+    PAL_TRY(FilterSuperPMIExceptionsParam_eeinterface*, pParam, &param)
+    {
+        unsigned i;
+        pParam->pThis->eeGetMethodSig(pParam->hnd, &pParam->sig);
+        pParam->argLst = pParam->sig.args;
+
+        for (i = 0; i < pParam->sig.numArgs; i++)
+        {
+            var_types type = pParam->pThis->eeGetArgType(pParam->argLst, &pParam->sig);
+
+            pParam->siglength += strlen(varTypeName(type));
+            pParam->argLst = pParam->pJitInfo->compCompHnd->getArgNext(pParam->argLst);
+        }
+
+        /* add ',' if there is more than one argument */
+
+        if (pParam->sig.numArgs > 1)
+        {
+            pParam->siglength += (pParam->sig.numArgs - 1);
+        }
+
+        if (JITtype2varType(pParam->sig.retType) != TYP_VOID)
+        {
+            pParam->returnType = varTypeName(JITtype2varType(pParam->sig.retType));
+            pParam->siglength += strlen(pParam->returnType) + 1; // don't forget the delimiter ':'
+        }
+
+        // Does it have a 'this' pointer? Don't count explicit this, which has the this pointer type as the first
+        // element of the arg type list
+        if (pParam->sig.hasThis() && !pParam->sig.hasExplicitThis())
+        {
+            assert(strlen(":this") == 5);
+            pParam->siglength += 5;
+            pParam->hasThis = true;
+        }
+    }
+    PAL_EXCEPT_FILTER(FilterSuperPMIExceptions_eeinterface)
+    {
+        param.siglength = 0;
+    }
+    PAL_ENDTRY
+
+    /* add closing bracket and null terminator */
+
+    length += param.siglength + 2;
+
+    char* retName = (char*)compGetMemA(length, CMK_DebugOnly);
+
+    /* Now generate the full signature string in the allocated buffer */
+
+    if (className)
+    {
+        strcpy_s(retName, length, className);
+        strcat_s(retName, length, ":");
+    }
+    else
+    {
+        strcpy_s(retName, length, "<NULL>.");
+    }
+
+    strcat_s(retName, length, methodName);
+
+    // append the signature
+    strcat_s(retName, length, "(");
+
+    if (param.siglength > 0)
+    {
+        param.argLst = param.sig.args;
+
+        for (i = 0; i < param.sig.numArgs; i++)
+        {
+            var_types type = eeGetArgType(param.argLst, &param.sig);
+            strcat_s(retName, length, varTypeName(type));
+
+            param.argLst = info.compCompHnd->getArgNext(param.argLst);
+            if (i + 1 < param.sig.numArgs)
+            {
+                strcat_s(retName, length, ",");
+            }
+        }
+    }
+
+    strcat_s(retName, length, ")");
+
+    if (param.returnType != nullptr)
+    {
+        strcat_s(retName, length, ":");
+        strcat_s(retName, length, param.returnType);
+    }
+
+    if (param.hasThis)
+    {
+        strcat_s(retName, length, ":this");
+    }
+
+    assert(strlen(retName) == (length - 1));
+
+    return (retName);
+}
+
+#pragma warning(pop)
+
+#endif // defined(DEBUG) || defined(FEATURE_JIT_METHOD_PERF) || defined(FEATURE_SIMD)
+
+/*****************************************************************************/
diff --git a/src/jit/emit.cpp b/src/jit/emit.cpp
new file mode 100644
index 0000000000..5c991ddf1b
--- /dev/null
+++ b/src/jit/emit.cpp
@@ -0,0 +1,7158 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                              emit.cpp                                     XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "hostallocator.h"
+#include "instr.h"
+#include "emit.h"
+#include "codegen.h"
+
+/*****************************************************************************
+ *
+ *  Represent an emitter location.
+ */
+
+void emitLocation::CaptureLocation(emitter* emit)
+{
+    ig      = emit->emitCurIG;
+    codePos = emit->emitCurOffset();
+
+    assert(Valid());
+}
+
+bool emitLocation::IsCurrentLocation(emitter* emit) const
+{
+    assert(Valid());
+    return (ig == emit->emitCurIG) && (codePos == emit->emitCurOffset());
+}
+
+UNATIVE_OFFSET emitLocation::CodeOffset(emitter* emit) const
+{
+    assert(Valid());
+    return emit->emitCodeOffset(ig, codePos);
+}
+
+int emitLocation::GetInsNum() const
+{
+    return emitGetInsNumFromCodePos(codePos);
+}
+
+#ifdef _TARGET_AMD64_
+// Get the instruction offset in the current instruction group, which must be a funclet prolog group.
+// This is used to find an instruction offset used in unwind data.
+// TODO-AMD64-Bug?: We only support a single main function prolog group, but allow for multiple funclet prolog
+// groups (not that we actually use that flexibility, since the funclet prolog will be small). How to
+// handle that?
+UNATIVE_OFFSET emitLocation::GetFuncletPrologOffset(emitter* emit) const
+{
+    assert(ig->igFuncIdx != 0);
+    assert((ig->igFlags & IGF_FUNCLET_PROLOG) != 0);
+    assert(ig == emit->emitCurIG);
+
+    return emit->emitCurIGsize;
+}
+#endif // _TARGET_AMD64_
+
+#ifdef DEBUG
+void emitLocation::Print() const
+{
+    unsigned insNum = emitGetInsNumFromCodePos(codePos);
+    unsigned insOfs = emitGetInsOfsFromCodePos(codePos);
+    printf("(G_M%03u_IG%02u,ins#%d,ofs#%d)", Compiler::s_compMethodsCount, ig->igNum, insNum, insOfs);
+}
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ *  Return the name of an instruction format.
+ */
+
+#if defined(DEBUG) || EMITTER_STATS
+
+const char* emitter::emitIfName(unsigned f)
+{
+    static const char* const ifNames[] = {
+#define IF_DEF(en, op1, op2) "IF_" #en,
+#include "emitfmts.h"
+    };
+
+    static char errBuff[32];
+
+    if (f < sizeof(ifNames) / sizeof(*ifNames))
+    {
+        return ifNames[f];
+    }
+
+    sprintf_s(errBuff, sizeof(errBuff), "??%u??", f);
+    return errBuff;
+}
+
+#endif
+
+#ifdef TRANSLATE_PDB
+
+/* these are protected */
+
+AddrMap*  emitter::emitPDBOffsetTable = 0;
+LocalMap* emitter::emitPDBLocalTable  = 0;
+bool      emitter::emitIsPDBEnabled   = true;
+BYTE*     emitter::emitILBaseOfCode   = 0;
+BYTE*     emitter::emitILMethodBase   = 0;
+BYTE*     emitter::emitILMethodStart  = 0;
+BYTE*     emitter::emitImgBaseOfCode  = 0;
+
+void emitter::MapCode(int ilOffset, BYTE* imgDest)
+{
+    if (emitIsPDBEnabled)
+    {
+        emitPDBOffsetTable->MapSrcToDest(ilOffset, (int)(imgDest - emitImgBaseOfCode));
+    }
+}
+
+void emitter::MapFunc(int                imgOff,
+                      int                procLen,
+                      int                dbgStart,
+                      int                dbgEnd,
+                      short              frameReg,
+                      int                stkAdjust,
+                      int                lvaCount,
+                      OptJit::LclVarDsc* lvaTable,
+                      bool               framePtr)
+{
+    if (emitIsPDBEnabled)
+    {
+        // this code stores information about local symbols for the PDB translation
+
+        assert(lvaCount >= 0); // don't allow a negative count
+
+        LvaDesc* rgLvaDesc = 0;
+
+        if (lvaCount > 0)
+        {
+            rgLvaDesc = new LvaDesc[lvaCount];
+
+            if (!rgLvaDesc)
+            {
+                NOMEM();
+            }
+
+            LvaDesc*           pDst = rgLvaDesc;
+            OptJit::LclVarDsc* pSrc = lvaTable;
+            for (int i = 0; i < lvaCount; ++i, ++pDst, ++pSrc)
+            {
+                pDst->slotNum = pSrc->lvSlotNum;
+                pDst->isReg   = pSrc->lvRegister;
+                pDst->reg     = (pSrc->lvRegister ? pSrc->lvRegNum : frameReg);
+                pDst->off     = pSrc->lvStkOffs + stkAdjust;
+            }
+        }
+
+        emitPDBLocalTable->AddFunc((int)(emitILMethodBase - emitILBaseOfCode), imgOff - (int)emitImgBaseOfCode, procLen,
+                                   dbgStart - imgOff, dbgEnd - imgOff, lvaCount, rgLvaDesc, framePtr);
+        // do not delete rgLvaDesc here -- responsibility is now on emitPDBLocalTable destructor
+    }
+}
+
+/* these are public */
+
+void emitter::SetILBaseOfCode(BYTE* pTextBase)
+{
+    emitILBaseOfCode = pTextBase;
+}
+
+void emitter::SetILMethodBase(BYTE* pMethodEntry)
+{
+    emitILMethodBase = pMethodEntry;
+}
+
+void emitter::SetILMethodStart(BYTE* pMethodCode)
+{
+    emitILMethodStart = pMethodCode;
+}
+
+void emitter::SetImgBaseOfCode(BYTE* pTextBase)
+{
+    emitImgBaseOfCode = pTextBase;
+}
+
+void emitter::SetIDBaseToProlog()
+{
+    emitInstrDescILBase = (int)(emitILMethodBase - emitILBaseOfCode);
+}
+
+void emitter::SetIDBaseToOffset(int methodOffset)
+{
+    emitInstrDescILBase = methodOffset + (int)(emitILMethodStart - emitILBaseOfCode);
+}
+
+void emitter::DisablePDBTranslation()
+{
+    // this function should disable PDB translation code
+    emitIsPDBEnabled = false;
+}
+
+bool emitter::IsPDBEnabled()
+{
+    return emitIsPDBEnabled;
+}
+
+void emitter::InitTranslationMaps(int ilCodeSize)
+{
+    if (emitIsPDBEnabled)
+    {
+        emitPDBOffsetTable = AddrMap::Create(ilCodeSize);
+        emitPDBLocalTable  = LocalMap::Create();
+    }
+}
+
+void emitter::DeleteTranslationMaps()
+{
+    if (emitPDBOffsetTable)
+    {
+        delete emitPDBOffsetTable;
+        emitPDBOffsetTable = 0;
+    }
+    if (emitPDBLocalTable)
+    {
+        delete emitPDBLocalTable;
+        emitPDBLocalTable = 0;
+    }
+}
+
+void emitter::InitTranslator(PDBRewriter* pPDB, int* rgSecMap, IMAGE_SECTION_HEADER** rgpHeader, int numSections)
+{
+    if (emitIsPDBEnabled)
+    {
+        pPDB->InitMaps(rgSecMap,           // new PE section header order
+                       rgpHeader,          // array of section headers
+                       numSections,        // number of sections
+                       emitPDBOffsetTable, // code offset translation table
+                       emitPDBLocalTable); // slot variable translation table
+    }
+}
+
+#endif // TRANSLATE_PDB
+
+/*****************************************************************************/
+
+#if EMITTER_STATS
+
+static unsigned totAllocdSize;
+static unsigned totActualSize;
+
+unsigned emitter::emitIFcounts[emitter::IF_COUNT];
+
+static unsigned  emitSizeBuckets[] = {100, 1024 * 1, 1024 * 2, 1024 * 3, 1024 * 4, 1024 * 5, 1024 * 10, 0};
+static Histogram emitSizeTable(HostAllocator::getHostAllocator(), emitSizeBuckets);
+
+static unsigned  GCrefsBuckets[] = {0, 1, 2, 5, 10, 20, 50, 128, 256, 512, 1024, 0};
+static Histogram GCrefsTable(HostAllocator::getHostAllocator(), GCrefsBuckets);
+
+static unsigned  stkDepthBuckets[] = {0, 1, 2, 5, 10, 16, 32, 128, 1024, 0};
+static Histogram stkDepthTable(HostAllocator::getHostAllocator(), stkDepthBuckets);
+
+size_t emitter::emitSizeMethod;
+
+size_t   emitter::emitTotMemAlloc;
+unsigned emitter::emitTotalInsCnt;
+unsigned emitter::emitTotalIGcnt;
+unsigned emitter::emitTotalPhIGcnt;
+unsigned emitter::emitTotalIGjmps;
+unsigned emitter::emitTotalIGptrs;
+unsigned emitter::emitTotalIGicnt;
+size_t   emitter::emitTotalIGsize;
+unsigned emitter::emitTotalIGmcnt;
+
+unsigned emitter::emitSmallDspCnt;
+unsigned emitter::emitLargeDspCnt;
+
+unsigned emitter::emitSmallCnsCnt;
+unsigned emitter::emitLargeCnsCnt;
+unsigned emitter::emitSmallCns[SMALL_CNS_TSZ];
+
+void emitterStaticStats(FILE* fout)
+{
+    // insGroup members
+
+    fprintf(fout, "\n");
+    fprintf(fout, "insGroup:\n");
+    fprintf(fout, "Offset of igNext              = %2u\n", offsetof(insGroup, igNext));
+#ifdef DEBUG
+    fprintf(fout, "Offset of igSelf              = %2u\n", offsetof(insGroup, igSelf));
+#endif
+    fprintf(fout, "Offset of igNum               = %2u\n", offsetof(insGroup, igNum));
+    fprintf(fout, "Offset of igOffs              = %2u\n", offsetof(insGroup, igOffs));
+    fprintf(fout, "Offset of igFuncIdx           = %2u\n", offsetof(insGroup, igFuncIdx));
+    fprintf(fout, "Offset of igFlags             = %2u\n", offsetof(insGroup, igFlags));
+    fprintf(fout, "Offset of igSize              = %2u\n", offsetof(insGroup, igSize));
+    fprintf(fout, "Offset of igData              = %2u\n", offsetof(insGroup, igData));
+#if EMIT_TRACK_STACK_DEPTH
+    fprintf(fout, "Offset of igStkLvl            = %2u\n", offsetof(insGroup, igStkLvl));
+#endif
+    fprintf(fout, "Offset of igGCregs            = %2u\n", offsetof(insGroup, igGCregs));
+    fprintf(fout, "Offset of igInsCnt            = %2u\n", offsetof(insGroup, igInsCnt));
+    fprintf(fout, "Size   of insGroup            = %u\n", sizeof(insGroup));
+
+    // insPlaceholderGroupData members
+
+    fprintf(fout, "\n");
+    fprintf(fout, "insPlaceholderGroupData:\n");
+    fprintf(fout, "Offset of igPhNext                = %2u\n", offsetof(insPlaceholderGroupData, igPhNext));
+    fprintf(fout, "Offset of igPhBB                  = %2u\n", offsetof(insPlaceholderGroupData, igPhBB));
+    fprintf(fout, "Offset of igPhInitGCrefVars       = %2u\n", offsetof(insPlaceholderGroupData, igPhInitGCrefVars));
+    fprintf(fout, "Offset of igPhInitGCrefRegs       = %2u\n", offsetof(insPlaceholderGroupData, igPhInitGCrefRegs));
+    fprintf(fout, "Offset of igPhInitByrefRegs       = %2u\n", offsetof(insPlaceholderGroupData, igPhInitByrefRegs));
+    fprintf(fout, "Offset of igPhPrevGCrefVars       = %2u\n", offsetof(insPlaceholderGroupData, igPhPrevGCrefVars));
+    fprintf(fout, "Offset of igPhPrevGCrefRegs       = %2u\n", offsetof(insPlaceholderGroupData, igPhPrevGCrefRegs));
+    fprintf(fout, "Offset of igPhPrevByrefRegs       = %2u\n", offsetof(insPlaceholderGroupData, igPhPrevByrefRegs));
+    fprintf(fout, "Offset of igPhType                = %2u\n", offsetof(insPlaceholderGroupData, igPhType));
+    fprintf(fout, "Size   of insPlaceholderGroupData = %u\n", sizeof(insPlaceholderGroupData));
+
+    fprintf(fout, "\n");
+    fprintf(fout, "Size   of tinyID      = %2u\n", TINY_IDSC_SIZE);
+    fprintf(fout, "Size   of instrDesc   = %2u\n", sizeof(emitter::instrDesc));
+    // fprintf(fout, "Offset of _idIns      = %2u\n", offsetof(emitter::instrDesc, _idIns      ));
+    // fprintf(fout, "Offset of _idInsFmt   = %2u\n", offsetof(emitter::instrDesc, _idInsFmt   ));
+    // fprintf(fout, "Offset of _idOpSize   = %2u\n", offsetof(emitter::instrDesc, _idOpSize   ));
+    // fprintf(fout, "Offset of idSmallCns  = %2u\n", offsetof(emitter::instrDesc, idSmallCns  ));
+    // fprintf(fout, "Offset of _idAddrUnion= %2u\n", offsetof(emitter::instrDesc, _idAddrUnion));
+    // fprintf(fout, "\n");
+    // fprintf(fout, "Size   of _idAddrUnion= %2u\n", sizeof(((emitter::instrDesc*)0)->_idAddrUnion));
+
+    fprintf(fout, "\n");
+    fprintf(fout, "GCInfo::regPtrDsc:\n");
+    fprintf(fout, "Offset of rpdNext           = %2u\n", offsetof(GCInfo::regPtrDsc, rpdNext));
+    fprintf(fout, "Offset of rpdOffs           = %2u\n", offsetof(GCInfo::regPtrDsc, rpdOffs));
+    fprintf(fout, "Offset of <union>           = %2u\n", offsetof(GCInfo::regPtrDsc, rpdPtrArg));
+    fprintf(fout, "Size   of GCInfo::regPtrDsc = %2u\n", sizeof(GCInfo::regPtrDsc));
+
+    fprintf(fout, "\n");
+}
+
+void emitterStats(FILE* fout)
+{
+    if (totAllocdSize > 0)
+    {
+        assert(totActualSize <= totAllocdSize);
+
+        fprintf(fout, "\nTotal allocated code size = %u\n", totAllocdSize);
+
+        if (totActualSize < totAllocdSize)
+        {
+            fprintf(fout, "Total generated code size = %u  ", totActualSize);
+
+            fprintf(fout, "(%4.3f%% waste)", 100 * ((totAllocdSize - totActualSize) / (double)totActualSize));
+            fprintf(fout, "\n");
+        }
+
+        assert(emitter::emitTotalInsCnt);
+
+        fprintf(fout, "Average of %4.2f bytes of code generated per instruction\n",
+                (double)totActualSize / emitter::emitTotalInsCnt);
+    }
+
+    fprintf(fout, "\nInstruction format frequency table:\n\n");
+
+    unsigned f, ic = 0, dc = 0;
+
+    for (f = 0; f < emitter::IF_COUNT; f++)
+    {
+        ic += emitter::emitIFcounts[f];
+    }
+
+    for (f = 0; f < emitter::IF_COUNT; f++)
+    {
+        unsigned c = emitter::emitIFcounts[f];
+
+        if ((c > 0) && (1000 * c >= ic))
+        {
+            dc += c;
+            fprintf(fout, "          %-13s %8u (%5.2f%%)\n", emitter::emitIfName(f), c, 100.0 * c / ic);
+        }
+    }
+
+    fprintf(fout, "         --------------------------------\n");
+    fprintf(fout, "          %-13s %8u (%5.2f%%)\n", "Total shown", dc, 100.0 * dc / ic);
+
+    if (emitter::emitTotalIGmcnt)
+    {
+        fprintf(fout, "Total of %8u methods\n", emitter::emitTotalIGmcnt);
+        fprintf(fout, "Total of %8u insGroup\n", emitter::emitTotalIGcnt);
+        fprintf(fout, "Total of %8u insPlaceholderGroupData\n", emitter::emitTotalPhIGcnt);
+        fprintf(fout, "Total of %8u instructions\n", emitter::emitTotalIGicnt);
+        fprintf(fout, "Total of %8u jumps\n", emitter::emitTotalIGjmps);
+        fprintf(fout, "Total of %8u GC livesets\n", emitter::emitTotalIGptrs);
+        fprintf(fout, "\n");
+        fprintf(fout, "Average of %8.1lf insGroup     per method\n",
+                (double)emitter::emitTotalIGcnt / emitter::emitTotalIGmcnt);
+        fprintf(fout, "Average of %8.1lf insPhGroup   per method\n",
+                (double)emitter::emitTotalPhIGcnt / emitter::emitTotalIGmcnt);
+        fprintf(fout, "Average of %8.1lf instructions per method\n",
+                (double)emitter::emitTotalIGicnt / emitter::emitTotalIGmcnt);
+        fprintf(fout, "Average of %8.1lf desc.  bytes per method\n",
+                (double)emitter::emitTotalIGsize / emitter::emitTotalIGmcnt);
+        fprintf(fout, "Average of %8.1lf jumps        per method\n",
+                (double)emitter::emitTotalIGjmps / emitter::emitTotalIGmcnt);
+        fprintf(fout, "Average of %8.1lf GC livesets  per method\n",
+                (double)emitter::emitTotalIGptrs / emitter::emitTotalIGmcnt);
+        fprintf(fout, "\n");
+        fprintf(fout, "Average of %8.1lf instructions per group \n",
+                (double)emitter::emitTotalIGicnt / emitter::emitTotalIGcnt);
+        fprintf(fout, "Average of %8.1lf desc.  bytes per group \n",
+                (double)emitter::emitTotalIGsize / emitter::emitTotalIGcnt);
+        fprintf(fout, "Average of %8.1lf jumps        per group \n",
+                (double)emitter::emitTotalIGjmps / emitter::emitTotalIGcnt);
+        fprintf(fout, "\n");
+        fprintf(fout, "Average of %8.1lf bytes        per instrDesc\n",
+                (double)emitter::emitTotalIGsize / emitter::emitTotalIGicnt);
+        fprintf(fout, "\n");
+        fprintf(fout, "A total of %8u desc.  bytes\n", emitter::emitTotalIGsize);
+        fprintf(fout, "\n");
+    }
+
+    fprintf(fout, "Descriptor size distribution:\n");
+    emitSizeTable.dump(fout);
+    fprintf(fout, "\n");
+
+    fprintf(fout, "GC ref frame variable counts:\n");
+    GCrefsTable.dump(fout);
+    fprintf(fout, "\n");
+
+    fprintf(fout, "Max. stack depth distribution:\n");
+    stkDepthTable.dump(fout);
+    fprintf(fout, "\n");
+
+    int      i;
+    unsigned c;
+    unsigned m;
+
+    if (emitter::emitSmallCnsCnt || emitter::emitLargeCnsCnt)
+    {
+        fprintf(fout, "SmallCnsCnt = %6u\n", emitter::emitSmallCnsCnt);
+        fprintf(fout, "LargeCnsCnt = %6u (%3u %% of total)\n", emitter::emitLargeCnsCnt,
+                100 * emitter::emitLargeCnsCnt / (emitter::emitLargeCnsCnt + emitter::emitSmallCnsCnt));
+    }
+
+#if 0
+    // TODO-Cleanup: WHy is this in #if 0 - Is EMITTER_STATS ever used? Fix or delete this.
+    if  (emitter::emitSmallCnsCnt)
+    {
+        fprintf(fout, "\n");
+
+        m = emitter::emitSmallCnsCnt/1000 + 1;
+
+        for (i = ID_MIN_SMALL_CNS; i < ID_MAX_SMALL_CNS; i++)
+        {
+            c = emitter::emitSmallCns[i-ID_MIN_SMALL_CNS];
+            if  (c >= m)
+                fprintf(fout, "cns[%4d] = %u\n", i, c);
+        }
+    }
+#endif // 0
+
+    fprintf(fout, "%8u bytes allocated in the emitter\n", emitter::emitTotMemAlloc);
+}
+
+#endif // EMITTER_STATS
+
+/*****************************************************************************/
+
+const unsigned short emitTypeSizes[] = {
+#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) sze,
+#include "typelist.h"
+#undef DEF_TP
+};
+
+const unsigned short emitTypeActSz[] = {
+#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) asze,
+#include "typelist.h"
+#undef DEF_TP
+};
+
+/*****************************************************************************/
+/*****************************************************************************
+ *
+ *  Initialize the emitter - called once, at DLL load time.
+ */
+
+void emitter::emitInit()
+{
+}
+
+/*****************************************************************************
+ *
+ *  Shut down the emitter - called once, at DLL exit time.
+ */
+
+void emitter::emitDone()
+{
+}
+
+/*****************************************************************************
+ *
+ *  Allocate memory.
+ */
+
+void* emitter::emitGetMem(size_t sz)
+{
+    assert(sz % sizeof(int) == 0);
+
+#if EMITTER_STATS
+    emitTotMemAlloc += sz;
+#endif
+
+    return emitComp->compGetMem(sz, CMK_InstDesc);
+}
+
+/*****************************************************************************
+ *
+ *  emitLclVarAddr support methods
+ */
+void emitLclVarAddr::initLclVarAddr(int varNum, unsigned offset)
+{
+    if (varNum < 32768)
+    {
+        if (varNum >= 0)
+        {
+            if (offset < 32768)
+            {
+                _lvaTag    = LVA_STANDARD_ENCODING;
+                _lvaExtra  = offset;           // offset known to be in [0..32767]
+                _lvaVarNum = (unsigned)varNum; // varNum known to be in [0..32767]
+            }
+            else // offset >= 32768
+            {
+                // We could support larger local offsets here at the cost of less varNums
+                if (offset >= 65536)
+                {
+                    IMPL_LIMITATION("JIT doesn't support offsets larger than 65535 into valuetypes\n");
+                }
+
+                _lvaTag    = LVA_LARGE_OFFSET;
+                _lvaExtra  = (offset - 32768); // (offset-32768) is known to be in [0..32767]
+                _lvaVarNum = (unsigned)varNum; // varNum known to be in [0..32767]
+            }
+        }
+        else // varNum < 0, These are used for Compiler spill temps
+        {
+            if (varNum < -32767)
+            {
+                IMPL_LIMITATION("JIT doesn't support more than 32767 Compiler Spill temps\n");
+            }
+            if (offset > 32767)
+            {
+                IMPL_LIMITATION(
+                    "JIT doesn't support offsets larger than 32767 into valuetypes for Compiler Spill temps\n");
+            }
+
+            _lvaTag    = LVA_COMPILER_TEMP;
+            _lvaExtra  = offset;              //  offset known to be in [0..32767]
+            _lvaVarNum = (unsigned)(-varNum); // -varNum known to be in [1..32767]
+        }
+    }
+    else // varNum >= 32768
+    {
+        if (offset >= 256)
+        {
+            IMPL_LIMITATION("JIT doesn't support offsets larger than 255 into valuetypes for local vars > 32767\n");
+        }
+        if (varNum >= 0x00400000)
+        { // 0x00400000 == 2^22
+            IMPL_LIMITATION("JIT doesn't support more than 2^22 variables\n");
+        }
+
+        _lvaTag    = LVA_LARGE_VARNUM;
+        _lvaVarNum = varNum & 0x00007FFF;         // varNum bits 14 to 0
+        _lvaExtra  = (varNum & 0x003F8000) >> 15; // varNum bits 21 to 15 in _lvaExtra bits  6 to 0, 7 bits total
+        _lvaExtra |= (offset << 7);               // offset bits  7 to 0  in _lvaExtra bits 14 to 7, 8 bits total
+    }
+}
+
+// Returns the variable to access. Note that it returns a negative number for compiler spill temps.
+int emitLclVarAddr::lvaVarNum()
+{
+    switch (_lvaTag)
+    {
+        case LVA_COMPILER_TEMP:
+            return -((int)_lvaVarNum);
+        case LVA_LARGE_VARNUM:
+            return (int)(((_lvaExtra & 0x007F) << 15) + _lvaVarNum);
+        default: // LVA_STANDARD_ENCODING or LVA_LARGE_OFFSET
+            assert((_lvaTag == LVA_STANDARD_ENCODING) || (_lvaTag == LVA_LARGE_OFFSET));
+            return (int)_lvaVarNum;
+    }
+}
+
+unsigned emitLclVarAddr::lvaOffset() // returns the offset into the variable to access
+{
+    switch (_lvaTag)
+    {
+        case LVA_LARGE_OFFSET:
+            return (32768 + _lvaExtra);
+        case LVA_LARGE_VARNUM:
+            return (_lvaExtra & 0x7F80) >> 7;
+        default: // LVA_STANDARD_ENCODING or LVA_COMPILER_TEMP
+            assert((_lvaTag == LVA_STANDARD_ENCODING) || (_lvaTag == LVA_COMPILER_TEMP));
+            return _lvaExtra;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Record some info about the method about to be emitted.
+ */
+
+void emitter::emitBegCG(Compiler* comp, COMP_HANDLE cmpHandle)
+{
+    emitComp      = comp;
+    emitCmpHandle = cmpHandle;
+}
+
+void emitter::emitEndCG()
+{
+}
+
+/*****************************************************************************
+ *
+ *  Prepare the given IG for emission of code.
+ */
+
+void emitter::emitGenIG(insGroup* ig)
+{
+    /* Set the "current IG" value */
+
+    emitCurIG = ig;
+
+#if EMIT_TRACK_STACK_DEPTH
+
+    /* Record the stack level on entry to this group */
+
+    ig->igStkLvl = emitCurStackLvl;
+
+    // If we don't have enough bits in igStkLvl, refuse to compile
+
+    if (ig->igStkLvl != emitCurStackLvl)
+    {
+        IMPL_LIMITATION("Too many arguments pushed on stack");
+    }
+
+//  printf("Start IG #%02u [stk=%02u]\n", ig->igNum, emitCurStackLvl);
+
+#endif
+
+    if (emitNoGCIG)
+    {
+        ig->igFlags |= IGF_NOGCINTERRUPT;
+    }
+
+    /* Prepare to issue instructions */
+
+    emitCurIGinsCnt = 0;
+    emitCurIGsize   = 0;
+
+    assert(emitCurIGjmpList == nullptr);
+
+    /* Allocate the temp instruction buffer if we haven't done so */
+
+    if (emitCurIGfreeBase == nullptr)
+    {
+        emitIGbuffSize    = SC_IG_BUFFER_SIZE;
+        emitCurIGfreeBase = (BYTE*)emitGetMem(emitIGbuffSize);
+    }
+
+    emitCurIGfreeNext = emitCurIGfreeBase;
+    emitCurIGfreeEndp = emitCurIGfreeBase + emitIGbuffSize;
+}
+
+/*****************************************************************************
+ *
+ *  Finish and save the current IG.
+ */
+
+insGroup* emitter::emitSavIG(bool emitAdd)
+{
+    insGroup* ig;
+    BYTE*     id;
+
+    size_t sz;
+    size_t gs;
+
+    assert(emitCurIGfreeNext <= emitCurIGfreeEndp);
+
+    /* Get hold of the IG descriptor */
+
+    ig = emitCurIG;
+    assert(ig);
+
+    /* Compute how much code we've generated */
+
+    sz = emitCurIGfreeNext - emitCurIGfreeBase;
+
+    /* Compute the total size we need to allocate */
+
+    gs = roundUp(sz);
+
+    /* Do we need space for GC? */
+
+    if (!(ig->igFlags & IGF_EMIT_ADD))
+    {
+        /* Is the initial set of live GC vars different from the previous one? */
+
+        if (emitForceStoreGCState || !VarSetOps::Equal(emitComp, emitPrevGCrefVars, emitInitGCrefVars))
+        {
+            /* Remember that we will have a new set of live GC variables */
+
+            ig->igFlags |= IGF_GC_VARS;
+
+#if EMITTER_STATS
+            emitTotalIGptrs++;
+#endif
+
+            /* We'll allocate extra space to record the liveset */
+
+            gs += sizeof(VARSET_TP);
+        }
+
+        /* Is the initial set of live Byref regs different from the previous one? */
+
+        /* Remember that we will have a new set of live GC variables */
+
+        ig->igFlags |= IGF_BYREF_REGS;
+
+        /* We'll allocate extra space (DWORD aligned) to record the GC regs */
+
+        gs += sizeof(int);
+    }
+
+    /* Allocate space for the instructions and optional liveset */
+
+    id = (BYTE*)emitGetMem(gs);
+
+    /* Do we need to store the byref regs */
+
+    if (ig->igFlags & IGF_BYREF_REGS)
+    {
+        /* Record the byref regs in front the of the instructions */
+
+        *castto(id, unsigned*)++ = (unsigned)emitInitByrefRegs;
+    }
+
+    /* Do we need to store the liveset? */
+
+    if (ig->igFlags & IGF_GC_VARS)
+    {
+        /* Record the liveset in front the of the instructions */
+        VarSetOps::AssignNoCopy(emitComp, (*castto(id, VARSET_TP*)), VarSetOps::MakeEmpty(emitComp));
+        VarSetOps::Assign(emitComp, (*castto(id, VARSET_TP*)++), emitInitGCrefVars);
+    }
+
+    /* Record the collected instructions */
+
+    assert((ig->igFlags & IGF_PLACEHOLDER) == 0);
+    ig->igData = id;
+
+    memcpy(id, emitCurIGfreeBase, sz);
+
+#ifdef DEBUG
+    if (false && emitComp->verbose) // this is not useful in normal dumps (hence it is normally under if (false)
+    {
+        // If there's an error during emission, we may want to connect the post-copy address
+        // of an instrDesc with the pre-copy address (the one that was originally created).  This
+        // printing enables that.
+        printf("copying instruction group from [0x%x..0x%x) to [0x%x..0x%x).\n", dspPtr(emitCurIGfreeBase),
+               dspPtr(emitCurIGfreeBase + sz), dspPtr(id), dspPtr(id + sz));
+    }
+#endif
+
+    /* Record how many instructions and bytes of code this group contains */
+
+    noway_assert((BYTE)emitCurIGinsCnt == emitCurIGinsCnt);
+    noway_assert((unsigned short)emitCurIGsize == emitCurIGsize);
+
+    ig->igInsCnt = (BYTE)emitCurIGinsCnt;
+    ig->igSize   = (unsigned short)emitCurIGsize;
+    emitCurCodeOffset += emitCurIGsize;
+    assert(IsCodeAligned(emitCurCodeOffset));
+
+#if EMITTER_STATS
+    emitTotalIGicnt += emitCurIGinsCnt;
+    emitTotalIGsize += sz;
+    emitSizeMethod += sz;
+#endif
+
+    // printf("Group [%08X]%3u has %2u instructions (%4u bytes at %08X)\n", ig, ig->igNum, emitCurIGinsCnt, sz, id);
+
+    /* Record the live GC register set - if and only if it is not an emitter added block */
+
+    if (!(ig->igFlags & IGF_EMIT_ADD))
+    {
+        ig->igGCregs = (regMaskSmall)emitInitGCrefRegs;
+    }
+
+    if (!emitAdd)
+    {
+        /* Update the previous recorded live GC ref sets, but not if
+           if we are starting an "overflow" buffer. Note that this is
+           only used to determine whether we need to store or not store
+           the GC ref sets for the next IG, which is dependent on exactly
+           what the state of the emitter GC ref sets will be when the
+           next IG is processed in the emitter.
+         */
+
+        VarSetOps::Assign(emitComp, emitPrevGCrefVars, emitThisGCrefVars);
+        emitPrevGCrefRegs = emitThisGCrefRegs;
+        emitPrevByrefRegs = emitThisByrefRegs;
+
+        emitForceStoreGCState = false;
+    }
+
+#ifdef DEBUG
+    if (emitComp->opts.dspCode)
+    {
+        printf("\n      G_M%03u_IG%02u:", Compiler::s_compMethodsCount, ig->igNum);
+        if (emitComp->verbose)
+        {
+            printf("        ; offs=%06XH, funclet=%02u", ig->igOffs, ig->igFuncIdx);
+        }
+        else
+        {
+            printf("        ; funclet=%02u", ig->igFuncIdx);
+        }
+        printf("\n");
+    }
+#endif
+
+    /* Did we have any jumps in this group? */
+
+    if (emitCurIGjmpList)
+    {
+        instrDescJmp* list = nullptr;
+        instrDescJmp* last = nullptr;
+
+        /* Move jumps to the global list, update their 'next' links */
+
+        do
+        {
+            /* Grab the jump and remove it from the list */
+
+            instrDescJmp* oj = emitCurIGjmpList;
+            emitCurIGjmpList = oj->idjNext;
+
+            /* Figure out the address of where the jump got copied */
+
+            size_t        of = (BYTE*)oj - emitCurIGfreeBase;
+            instrDescJmp* nj = (instrDescJmp*)(ig->igData + of);
+
+            // printf("Jump moved from %08X to %08X\n", oj, nj);
+            // printf("jmp [%08X] at %08X + %03u\n", nj, ig, nj->idjOffs);
+
+            assert(nj->idjIG == ig);
+            assert(nj->idIns() == oj->idIns());
+            assert(nj->idjNext == oj->idjNext);
+
+            /* Make sure the jumps are correctly ordered */
+
+            assert(last == nullptr || last->idjOffs > nj->idjOffs);
+
+            if (ig->igFlags & IGF_FUNCLET_PROLOG)
+            {
+                // Our funclet prologs have short jumps, if the prolog would ever have
+                // long jumps, then we'd have to insert the list in sorted order than
+                // just append to the emitJumpList.
+                noway_assert(nj->idjShort);
+                if (nj->idjShort)
+                {
+                    continue;
+                }
+            }
+
+            /* Append the new jump to the list */
+
+            nj->idjNext = list;
+            list        = nj;
+
+            if (last == nullptr)
+            {
+                last = nj;
+            }
+        } while (emitCurIGjmpList);
+
+        if (last != nullptr)
+        {
+            /* Append the jump(s) from this IG to the global list */
+            bool prologJump = (ig == emitPrologIG);
+            if ((emitJumpList == nullptr) || prologJump)
+            {
+                last->idjNext = emitJumpList;
+                emitJumpList  = list;
+            }
+            else
+            {
+                last->idjNext         = nullptr;
+                emitJumpLast->idjNext = list;
+            }
+
+            if (!prologJump || (emitJumpLast == nullptr))
+            {
+                emitJumpLast = last;
+            }
+        }
+    }
+
+    /* Fix the last instruction field */
+
+    if (sz != 0)
+    {
+        assert(emitLastIns != nullptr);
+        assert(emitCurIGfreeBase <= (BYTE*)emitLastIns);
+        assert((BYTE*)emitLastIns < emitCurIGfreeBase + sz);
+        emitLastIns = (instrDesc*)((BYTE*)id + ((BYTE*)emitLastIns - (BYTE*)emitCurIGfreeBase));
+    }
+
+    /* Reset the buffer free pointers */
+
+    emitCurIGfreeNext = emitCurIGfreeBase;
+
+    return ig;
+}
+
+#ifdef LEGACY_BACKEND
+void emitter::emitTmpSizeChanged(unsigned tmpSize)
+{
+    assert(emitGrowableMaxByteOffs <= SCHAR_MAX);
+
+#ifdef DEBUG
+    // Workaround for FP code
+    bool bAssert = JitConfig.JitMaxTempAssert() ? true : false;
+
+    if (tmpSize > emitMaxTmpSize && bAssert)
+    {
+        // TODO-Review: We have a known issue involving floating point code and this assert.
+        // The generated code will be ok, This is only a warning.
+        // To not receive this assert again you can set the registry key: JITMaxTempAssert=0.
+        //
+        assert(!"Incorrect max tmp size set.");
+    }
+#endif
+
+    if (tmpSize <= emitMaxTmpSize)
+        return;
+
+    unsigned change = tmpSize - emitMaxTmpSize;
+
+    /* If we have used a small offset to access a variable, growing the
+       temp size is a problem if we should have used a large offset instead.
+       Detect if such a situation happens and bail */
+
+    if (emitGrowableMaxByteOffs <= SCHAR_MAX && (emitGrowableMaxByteOffs + change) > SCHAR_MAX)
+    {
+#ifdef DEBUG
+        if (emitComp->verbose)
+            printf("Under-estimated var offset encoding size for ins #%Xh\n", emitMaxByteOffsIdNum);
+#endif
+        IMPL_LIMITATION("Should have used large offset to access var");
+    }
+
+    emitMaxTmpSize = tmpSize;
+    emitGrowableMaxByteOffs += change;
+}
+#endif // LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ *  Start generating code to be scheduled; called once per method.
+ */
+
+void emitter::emitBegFN(bool hasFramePtr
+#if defined(DEBUG)
+                        ,
+                        bool chkAlign
+#endif
+#ifdef LEGACY_BACKEND
+                        ,
+                        unsigned lclSize
+#endif // LEGACY_BACKEND
+                        ,
+                        unsigned maxTmpSize)
+{
+    insGroup* ig;
+
+    /* Assume we won't need the temp instruction buffer */
+
+    emitCurIGfreeBase = nullptr;
+    emitIGbuffSize    = 0;
+
+    /* Record stack frame info (the temp size is just an estimate) */
+
+    emitHasFramePtr = hasFramePtr;
+
+    emitMaxTmpSize = maxTmpSize;
+
+#ifdef LEGACY_BACKEND
+    emitLclSize             = lclSize;
+    emitGrowableMaxByteOffs = 0;
+#ifdef DEBUG
+    emitMaxByteOffsIdNum = (unsigned)-1;
+#endif // DEBUG
+#endif // LEGACY_BACKEND
+
+#ifdef DEBUG
+    emitChkAlign = chkAlign;
+#endif
+
+    /* We have no epilogs yet */
+
+    emitEpilogSize = 0;
+    emitEpilogCnt  = 0;
+
+#ifdef _TARGET_XARCH_
+    emitExitSeqBegLoc.Init();
+    emitExitSeqSize = INT_MAX;
+#endif // _TARGET_XARCH_
+
+    emitPlaceholderList = emitPlaceholderLast = nullptr;
+
+#ifdef JIT32_GCENCODER
+    emitEpilogList = emitEpilogLast = NULL;
+#endif // JIT32_GCENCODER
+
+    /* We don't have any jumps */
+
+    emitJumpList = emitJumpLast = nullptr;
+    emitCurIGjmpList            = nullptr;
+
+    emitFwdJumps   = false;
+    emitNoGCIG     = false;
+    emitForceNewIG = false;
+
+    /* We have not recorded any live sets */
+
+    assert(VarSetOps::IsEmpty(emitComp, emitThisGCrefVars));
+    assert(VarSetOps::IsEmpty(emitComp, emitInitGCrefVars));
+    assert(VarSetOps::IsEmpty(emitComp, emitPrevGCrefVars));
+    emitThisGCrefRegs = RBM_NONE;
+    emitInitGCrefRegs = RBM_NONE;
+    emitPrevGCrefRegs = RBM_NONE;
+    emitThisByrefRegs = RBM_NONE;
+    emitInitByrefRegs = RBM_NONE;
+    emitPrevByrefRegs = RBM_NONE;
+
+    emitForceStoreGCState = false;
+
+#ifdef DEBUG
+
+    emitIssuing = false;
+
+#endif
+
+    /* Assume there will be no GC ref variables */
+
+    emitGCrFrameOffsMin = emitGCrFrameOffsMax = emitGCrFrameOffsCnt = 0;
+#ifdef DEBUG
+    emitGCrFrameLiveTab = nullptr;
+#endif
+
+    /* We have no groups / code at this point */
+
+    emitIGlist = emitIGlast = nullptr;
+
+    emitCurCodeOffset = 0;
+    emitFirstColdIG   = nullptr;
+    emitTotalCodeSize = 0;
+
+#if EMITTER_STATS
+    emitTotalIGmcnt++;
+    emitSizeMethod = 0;
+#endif
+
+    emitInsCount = 0;
+
+    /* The stack is empty now */
+
+    emitCurStackLvl = 0;
+
+#if EMIT_TRACK_STACK_DEPTH
+    emitMaxStackDepth = 0;
+    emitCntStackDepth = sizeof(int);
+#endif
+
+    /* No data sections have been created */
+
+    emitDataSecCur = nullptr;
+
+    memset(&emitConsDsc, 0, sizeof(emitConsDsc));
+
+#ifdef PSEUDORANDOM_NOP_INSERTION
+    // for random NOP insertion
+
+    emitEnableRandomNops();
+    emitComp->info.compRNG.Init(emitComp->info.compChecksum);
+    emitNextNop           = emitNextRandomNop();
+    emitInInstrumentation = false;
+#endif // PSEUDORANDOM_NOP_INSERTION
+
+    /* Create the first IG, it will be used for the prolog */
+
+    emitNxtIGnum = 1;
+
+    emitPrologIG = emitIGlist = emitIGlast = emitCurIG = ig = emitAllocIG();
+
+    emitLastIns = nullptr;
+
+    ig->igNext = nullptr;
+
+#ifdef DEBUG
+    emitScratchSigInfo = nullptr;
+#endif // DEBUG
+
+    /* Append another group, to start generating the method body */
+
+    emitNewIG();
+}
+
+#ifdef PSEUDORANDOM_NOP_INSERTION
+int emitter::emitNextRandomNop()
+{
+    return emitComp->info.compRNG.Next(1, 9);
+}
+#endif
+
+/*****************************************************************************
+ *
+ *  Done generating code to be scheduled; called once per method.
+ */
+
+void emitter::emitEndFN()
+{
+}
+
+// member function iiaIsJitDataOffset for idAddrUnion, defers to Compiler::eeIsJitDataOffs
+bool emitter::instrDesc::idAddrUnion::iiaIsJitDataOffset() const
+{
+    return Compiler::eeIsJitDataOffs(iiaFieldHnd);
+}
+
+// member function iiaGetJitDataOffset for idAddrUnion, defers to Compiler::eeGetJitDataOffs
+int emitter::instrDesc::idAddrUnion::iiaGetJitDataOffset() const
+{
+    assert(iiaIsJitDataOffset());
+    return Compiler::eeGetJitDataOffs(iiaFieldHnd);
+}
+
+void emitter::dispIns(instrDesc* id)
+{
+#ifdef DEBUG
+    emitInsSanityCheck(id);
+
+    if (emitComp->opts.dspCode)
+    {
+        emitDispIns(id, true, false, false);
+    }
+
+#if EMIT_TRACK_STACK_DEPTH
+    assert((int)emitCurStackLvl >= 0);
+#endif
+    size_t sz = emitSizeOfInsDsc(id);
+    assert(id->idDebugOnlyInfo()->idSize == sz);
+#endif // DEBUG
+
+#if EMITTER_STATS
+    emitIFcounts[id->idInsFmt()]++;
+#endif
+}
+
+void emitter::appendToCurIG(instrDesc* id)
+{
+    emitCurIGsize += id->idCodeSize();
+}
+
+/*****************************************************************************
+ *
+ *  Display (optionally) an instruction offset.
+ */
+
+#ifdef DEBUG
+
+void emitter::emitDispInsOffs(unsigned offs, bool doffs)
+{
+    if (doffs)
+    {
+        printf("%06X", offs);
+    }
+    else
+    {
+        printf("      ");
+    }
+}
+
+#endif // DEBUG
+
+#ifdef JIT32_GCENCODER
+
+/*****************************************************************************
+ *
+ *  Call the specified function pointer for each epilog block in the current
+ *  method with the epilog's relative code offset. Returns the sum of the
+ *  values returned by the callback.
+ */
+
+size_t emitter::emitGenEpilogLst(size_t (*fp)(void*, unsigned), void* cp)
+{
+    EpilogList* el;
+    size_t      sz;
+
+    for (el = emitEpilogList, sz = 0; el; el = el->elNext)
+    {
+        assert(el->elIG->igFlags & IGF_EPILOG);
+
+        UNATIVE_OFFSET ofs =
+            el->elIG->igOffs; // The epilog starts at the beginning of the IG, so the IG offset is correct
+
+        sz += fp(cp, ofs);
+    }
+
+    return sz;
+}
+
+#endif // JIT32_GCENCODER
+
+/*****************************************************************************
+ *
+ *  The following series of methods allocates instruction descriptors.
+ */
+
+void* emitter::emitAllocInstr(size_t sz, emitAttr opsz)
+{
+    instrDesc* id;
+
+#ifdef DEBUG
+    // Under STRESS_EMITTER, put every instruction in its own instruction group.
+    // We can't do this for a prolog, epilog, funclet prolog, or funclet epilog,
+    // because those are generated out of order. We currently have a limitation
+    // where the jump shortening pass uses the instruction group number to determine
+    // if something is earlier or later in the code stream. This implies that
+    // these groups cannot be more than a single instruction group. Note that
+    // the prolog/epilog placeholder groups ARE generated in order, and are
+    // re-used. But generating additional groups would not work.
+    if (emitComp->compStressCompile(Compiler::STRESS_EMITTER, 1) && emitCurIGinsCnt && !emitIGisInProlog(emitCurIG) &&
+        !emitIGisInEpilog(emitCurIG)
+#if FEATURE_EH_FUNCLETS
+        && !emitIGisInFuncletProlog(emitCurIG) && !emitIGisInFuncletEpilog(emitCurIG)
+#endif // FEATURE_EH_FUNCLETS
+            )
+    {
+        emitNxtIG(true);
+    }
+#endif
+
+#ifdef PSEUDORANDOM_NOP_INSERTION
+    // TODO-ARM-Bug?: PSEUDORANDOM_NOP_INSERTION is not defined for _TARGET_ARM_
+    //     ARM - This is currently broken on _TARGET_ARM_
+    //     When nopSize is odd we misalign emitCurIGsize
+    //
+    if (!(emitComp->opts.eeFlags & CORJIT_FLG_PREJIT) && !emitInInstrumentation &&
+        !emitIGisInProlog(emitCurIG) // don't do this in prolog or epilog
+        && !emitIGisInEpilog(emitCurIG) &&
+        emitRandomNops // sometimes we turn off where exact codegen is needed (pinvoke inline)
+        )
+    {
+        if (emitNextNop == 0)
+        {
+            int nopSize           = 4;
+            emitInInstrumentation = true;
+            instrDesc* idnop      = emitNewInstr();
+            emitInInstrumentation = false;
+            idnop->idInsFmt(IF_NONE);
+            idnop->idIns(INS_nop);
+#if defined(_TARGET_XARCH_)
+            idnop->idCodeSize(nopSize);
+#else
+#error "Undefined target for pseudorandom NOP insertion"
+#endif
+
+            emitCurIGsize += nopSize;
+            emitNextNop = emitNextRandomNop();
+        }
+        else
+            emitNextNop--;
+    }
+#endif // PSEUDORANDOM_NOP_INSERTION
+
+    assert(IsCodeAligned(emitCurIGsize));
+
+    /* Make sure we have enough space for the new instruction */
+
+    if ((emitCurIGfreeNext + sz >= emitCurIGfreeEndp) || emitForceNewIG)
+    {
+        emitNxtIG(true);
+    }
+
+    /* Grab the space for the instruction */
+
+    emitLastIns = id = (instrDesc*)emitCurIGfreeNext;
+    emitCurIGfreeNext += sz;
+
+    assert(sz >= sizeof(void*));
+    memset(id, 0, sz);
+
+    // These fields should have been zero-ed by the above
+    assert(id->idReg1() == regNumber(0));
+    assert(id->idReg2() == regNumber(0));
+#ifdef _TARGET_XARCH_
+    assert(id->idCodeSize() == 0);
+#endif
+
+#if HAS_TINY_DESC
+    /* Is the second area to be cleared actually present? */
+    if (sz >= SMALL_IDSC_SIZE)
+    {
+        /* Clear the second 4 bytes, or the 'SMALL' part */
+        *(int*)((BYTE*)id + (SMALL_IDSC_SIZE - sizeof(int))) = 0;
+
+        // These fields should have been zero-ed by the above
+        assert(id->idIsLargeCns() == false);
+        assert(id->idIsLargeDsp() == false);
+        assert(id->idIsLargeCall() == false);
+    }
+#endif
+
+    // Make sure that idAddrUnion is just a union of various pointer sized things
+    C_ASSERT(sizeof(CORINFO_FIELD_HANDLE) <= sizeof(void*));
+    C_ASSERT(sizeof(CORINFO_METHOD_HANDLE) <= sizeof(void*));
+    C_ASSERT(sizeof(emitter::emitAddrMode) <= sizeof(void*));
+    C_ASSERT(sizeof(emitLclVarAddr) <= sizeof(void*));
+    C_ASSERT(sizeof(emitter::instrDesc) == (SMALL_IDSC_SIZE + sizeof(void*)));
+
+    emitInsCount++;
+
+#if defined(DEBUG) || defined(LATE_DISASM)
+    /* In debug mode we clear/set some additional fields */
+
+    instrDescDebugInfo* info = (instrDescDebugInfo*)emitGetMem(sizeof(*info));
+
+    info->idNum        = emitInsCount;
+    info->idSize       = sz;
+    info->idVarRefOffs = 0;
+    info->idMemCookie  = 0;
+    info->idClsCookie  = nullptr;
+#ifdef TRANSLATE_PDB
+    info->idilStart = emitInstrDescILBase;
+#endif
+    info->idFinallyCall = false;
+    info->idCatchRet    = false;
+    info->idCallSig     = nullptr;
+
+    id->idDebugOnlyInfo(info);
+
+#endif // defined(DEBUG) || defined(LATE_DISASM)
+
+    /* Store the size and handle the two special values
+       that indicate GCref and ByRef */
+
+    if (EA_IS_GCREF(opsz))
+    {
+        /* A special value indicates a GCref pointer value */
+
+        id->idGCref(GCT_GCREF);
+        id->idOpSize(EA_PTRSIZE);
+    }
+    else if (EA_IS_BYREF(opsz))
+    {
+        /* A special value indicates a Byref pointer value */
+
+        id->idGCref(GCT_BYREF);
+        id->idOpSize(EA_PTRSIZE);
+    }
+    else
+    {
+        id->idGCref(GCT_NONE);
+        id->idOpSize(EA_SIZE(opsz));
+    }
+
+#if RELOC_SUPPORT
+    // Amd64: ip-relative addressing is supported even when not generating relocatable ngen code
+    if (EA_IS_DSP_RELOC(opsz)
+#ifndef _TARGET_AMD64_
+        && emitComp->opts.compReloc
+#endif //_TARGET_AMD64_
+        )
+    {
+        /* Mark idInfo()->idDspReloc to remember that the            */
+        /* address mode has a displacement that is relocatable       */
+        id->idSetIsDspReloc();
+    }
+
+    if (EA_IS_CNS_RELOC(opsz) && emitComp->opts.compReloc)
+    {
+        /* Mark idInfo()->idCnsReloc to remember that the            */
+        /* instruction has an immediate constant that is relocatable */
+        id->idSetIsCnsReloc();
+    }
+#endif
+
+#if EMITTER_STATS
+    emitTotalInsCnt++;
+#endif
+
+    /* Update the instruction count */
+
+    emitCurIGinsCnt++;
+
+    return id;
+}
+
+#ifdef DEBUG
+
+/*****************************************************************************
+ *
+ *  Make sure the code offsets of all instruction groups look reasonable.
+ */
+void emitter::emitCheckIGoffsets()
+{
+    insGroup* tempIG;
+    size_t    offsIG;
+
+    for (tempIG = emitIGlist, offsIG = 0; tempIG; tempIG = tempIG->igNext)
+    {
+        if (tempIG->igOffs != offsIG)
+        {
+            printf("Block #%u has offset %08X, expected %08X\n", tempIG->igNum, tempIG->igOffs, offsIG);
+            assert(!"bad block offset");
+        }
+
+        offsIG += tempIG->igSize;
+    }
+
+    if (emitTotalCodeSize && emitTotalCodeSize != offsIG)
+    {
+        printf("Total code size is %08X, expected %08X\n", emitTotalCodeSize, offsIG);
+
+        assert(!"bad total code size");
+    }
+}
+
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ *  Begin generating a method prolog.
+ */
+
+void emitter::emitBegProlog()
+{
+    assert(emitComp->compGeneratingProlog);
+
+#if EMIT_TRACK_STACK_DEPTH
+
+    /* Don't measure stack depth inside the prolog, it's misleading */
+
+    emitCntStackDepth = 0;
+
+    assert(emitCurStackLvl == 0);
+
+#endif
+
+    emitNoGCIG     = true;
+    emitForceNewIG = false;
+
+    /* Switch to the pre-allocated prolog IG */
+
+    emitGenIG(emitPrologIG);
+
+    /* Nothing is live on entry to the prolog */
+
+    // These were initialized to Empty at the start of compilation.
+    VarSetOps::ClearD(emitComp, emitInitGCrefVars);
+    VarSetOps::ClearD(emitComp, emitPrevGCrefVars);
+    emitInitGCrefRegs = RBM_NONE;
+    emitPrevGCrefRegs = RBM_NONE;
+    emitInitByrefRegs = RBM_NONE;
+    emitPrevByrefRegs = RBM_NONE;
+}
+
+/*****************************************************************************
+ *
+ *  Return the code offset of the current location in the prolog.
+ */
+
+unsigned emitter::emitGetPrologOffsetEstimate()
+{
+    /* For now only allow a single prolog ins group */
+
+    assert(emitPrologIG);
+    assert(emitPrologIG == emitCurIG);
+
+    return emitCurIGsize;
+}
+
+/*****************************************************************************
+ *
+ *  Mark the code offset of the current location as the end of the prolog,
+ *  so it can be used later to compute the actual size of the prolog.
+ */
+
+void emitter::emitMarkPrologEnd()
+{
+    assert(emitComp->compGeneratingProlog);
+
+    /* For now only allow a single prolog ins group */
+
+    assert(emitPrologIG);
+    assert(emitPrologIG == emitCurIG);
+
+    emitPrologEndPos = emitCurOffset();
+}
+
+/*****************************************************************************
+ *
+ *  Finish generating a method prolog.
+ */
+
+void emitter::emitEndProlog()
+{
+    assert(emitComp->compGeneratingProlog);
+
+    size_t prolSz;
+
+    insGroup* tempIG;
+
+    emitNoGCIG = false;
+
+    /* Save the prolog IG if non-empty or if only one block */
+
+    if (emitCurIGnonEmpty() || emitCurIG == emitPrologIG)
+    {
+        emitSavIG();
+    }
+
+#if EMIT_TRACK_STACK_DEPTH
+    /* Reset the stack depth values */
+
+    emitCurStackLvl   = 0;
+    emitCntStackDepth = sizeof(int);
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Create a placeholder instruction group to be used by a prolog or epilog,
+ *  either for the main function, or a funclet.
+ */
+
+void emitter::emitCreatePlaceholderIG(insGroupPlaceholderType igType,
+                                      BasicBlock*             igBB,
+                                      VARSET_VALARG_TP        GCvars,
+                                      regMaskTP               gcrefRegs,
+                                      regMaskTP               byrefRegs,
+                                      bool                    last)
+{
+    assert(igBB != nullptr);
+
+    bool emitAdd = false;
+
+    if (igType == IGPT_EPILOG
+#if FEATURE_EH_FUNCLETS
+        || igType == IGPT_FUNCLET_EPILOG
+#endif // FEATURE_EH_FUNCLETS
+        )
+    {
+#ifdef _TARGET_AMD64_
+        emitOutputPreEpilogNOP();
+#endif // _TARGET_AMD64_
+
+        emitAdd = true;
+    }
+
+    if (emitCurIGnonEmpty())
+    {
+        emitNxtIG(emitAdd);
+    }
+
+    /* Update GC tracking for the beginning of the placeholder IG */
+
+    if (!emitAdd)
+    {
+        VarSetOps::Assign(emitComp, emitThisGCrefVars, GCvars);
+        VarSetOps::Assign(emitComp, emitInitGCrefVars, GCvars);
+        emitThisGCrefRegs = emitInitGCrefRegs = gcrefRegs;
+        emitThisByrefRegs = emitInitByrefRegs = byrefRegs;
+    }
+
+    /* Convert the group to a placeholder group */
+
+    insGroup* igPh = emitCurIG;
+
+    igPh->igFlags |= IGF_PLACEHOLDER;
+
+    /* Note that we might be re-using a previously created but empty IG. In this
+     * case, we need to make sure any re-used fields, such as igFuncIdx, are correct.
+     */
+
+    igPh->igFuncIdx = emitComp->compCurrFuncIdx;
+
+    /* Create a separate block of memory to store placeholder information.
+     * We could use unions to put some of this into the insGroup itself, but we don't
+     * want to grow the insGroup, and it's difficult to make sure the
+     * insGroup fields are getting set and used elsewhere.
+     */
+
+    igPh->igPhData = new (emitComp, CMK_InstDesc) insPlaceholderGroupData;
+
+    igPh->igPhData->igPhNext = nullptr;
+    igPh->igPhData->igPhType = igType;
+    igPh->igPhData->igPhBB   = igBB;
+
+    VarSetOps::AssignNoCopy(emitComp, igPh->igPhData->igPhPrevGCrefVars, VarSetOps::UninitVal());
+    VarSetOps::Assign(emitComp, igPh->igPhData->igPhPrevGCrefVars, emitPrevGCrefVars);
+    igPh->igPhData->igPhPrevGCrefRegs = emitPrevGCrefRegs;
+    igPh->igPhData->igPhPrevByrefRegs = emitPrevByrefRegs;
+
+    VarSetOps::AssignNoCopy(emitComp, igPh->igPhData->igPhInitGCrefVars, VarSetOps::UninitVal());
+    VarSetOps::Assign(emitComp, igPh->igPhData->igPhInitGCrefVars, emitInitGCrefVars);
+    igPh->igPhData->igPhInitGCrefRegs = emitInitGCrefRegs;
+    igPh->igPhData->igPhInitByrefRegs = emitInitByrefRegs;
+
+#if EMITTER_STATS
+    emitTotalPhIGcnt += 1;
+#endif
+
+    // Mark function prologs and epilogs properly in the igFlags bits. These bits
+    // will get used and propagated when the placeholder is converted to a non-placeholder
+    // during prolog/epilog generation.
+
+    if (igType == IGPT_EPILOG)
+    {
+        igPh->igFlags |= IGF_EPILOG;
+    }
+#if FEATURE_EH_FUNCLETS
+    else if (igType == IGPT_FUNCLET_PROLOG)
+    {
+        igPh->igFlags |= IGF_FUNCLET_PROLOG;
+    }
+#ifdef DEBUG
+    else if (igType == IGPT_FUNCLET_EPILOG)
+    {
+        igPh->igFlags |= IGF_FUNCLET_EPILOG;
+    }
+#endif // DEBUG
+#endif // FEATURE_EH_FUNCLETS
+
+    /* Link it into the placeholder list */
+
+    if (emitPlaceholderList)
+    {
+        emitPlaceholderLast->igPhData->igPhNext = igPh;
+    }
+    else
+    {
+        emitPlaceholderList = igPh;
+    }
+
+    emitPlaceholderLast = igPh;
+
+    // Give an estimated size of this placeholder IG and
+    // increment emitCurCodeOffset since we are not calling emitNewIG()
+    //
+    emitCurIGsize += MAX_PLACEHOLDER_IG_SIZE;
+    emitCurCodeOffset += emitCurIGsize;
+
+#ifdef DEBUGGING_SUPPORT
+
+#if FEATURE_EH_FUNCLETS
+    // Add the appropriate IP mapping debugging record for this placeholder
+    // group.
+
+    // genExitCode() adds the mapping for main function epilogs
+    if (emitComp->opts.compDbgInfo)
+    {
+        if (igType == IGPT_FUNCLET_PROLOG)
+        {
+            codeGen->genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::PROLOG, true);
+        }
+        else if (igType == IGPT_FUNCLET_EPILOG)
+        {
+            codeGen->genIPmappingAdd((IL_OFFSETX)ICorDebugInfo::EPILOG, true);
+        }
+    }
+#endif // FEATURE_EH_FUNCLETS
+
+#endif // DEBUGGING_SUPPORT
+
+    /* Start a new IG if more code follows */
+
+    if (last)
+    {
+        emitCurIG = nullptr;
+    }
+    else
+    {
+        if (igType == IGPT_EPILOG
+#if FEATURE_EH_FUNCLETS
+            || igType == IGPT_FUNCLET_EPILOG
+#endif // FEATURE_EH_FUNCLETS
+            )
+        {
+            // If this was an epilog, then assume this is the end of any currently in progress
+            // no-GC region. If a block after the epilog needs to be no-GC, it needs to call
+            // emitter::emitDisableGC() directly. This behavior is depended upon by the fast
+            // tailcall implementation, which disables GC at the beginning of argument setup,
+            // but assumes that after the epilog it will be re-enabled.
+            emitNoGCIG = false;
+        }
+
+        emitNewIG();
+
+        // We don't know what the GC ref state will be at the end of the placeholder
+        // group. So, force the next IG to store all the GC ref state variables;
+        // don't omit them because emitPrev* is the same as emitInit*, because emitPrev*
+        // will be inaccurate. (Note that, currently, GCrefRegs and ByrefRegs are always
+        // saved anyway.)
+        //
+        // There is no need to re-initialize the emitPrev* variables, as they won't be used
+        // with emitForceStoreGCState==true, and will be re-initialized just before
+        // emitForceStoreGCState is set to false;
+
+        emitForceStoreGCState = true;
+
+        /* The group after the placeholder group doesn't get the "propagate" flags */
+
+        emitCurIG->igFlags &= ~IGF_PROPAGATE_MASK;
+    }
+
+#ifdef DEBUG
+    if (emitComp->verbose)
+    {
+        printf("*************** After placeholder IG creation\n");
+        emitDispIGlist(false);
+    }
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Generate all prologs and epilogs
+ */
+
+void emitter::emitGeneratePrologEpilog()
+{
+#ifdef DEBUG
+    unsigned prologCnt = 0;
+    unsigned epilogCnt = 0;
+#if FEATURE_EH_FUNCLETS
+    unsigned funcletPrologCnt = 0;
+    unsigned funcletEpilogCnt = 0;
+#endif // FEATURE_EH_FUNCLETS
+#endif // DEBUG
+
+    insGroup* igPh;
+    insGroup* igPhNext;
+
+    // Generating the prolog/epilog is going to destroy the placeholder group,
+    // so save the "next" pointer before that happens.
+
+    for (igPh = emitPlaceholderList; igPh != nullptr; igPh = igPhNext)
+    {
+        assert(igPh->igFlags & IGF_PLACEHOLDER);
+
+        igPhNext = igPh->igPhData->igPhNext;
+
+        BasicBlock* igPhBB = igPh->igPhData->igPhBB;
+
+        switch (igPh->igPhData->igPhType)
+        {
+            case IGPT_PROLOG: // currently unused
+                INDEBUG(++prologCnt);
+                break;
+
+            case IGPT_EPILOG:
+                INDEBUG(++epilogCnt);
+                emitBegFnEpilog(igPh);
+                codeGen->genFnEpilog(igPhBB);
+                emitEndFnEpilog();
+                break;
+
+#if FEATURE_EH_FUNCLETS
+
+            case IGPT_FUNCLET_PROLOG:
+                INDEBUG(++funcletPrologCnt);
+                emitBegFuncletProlog(igPh);
+                codeGen->genFuncletProlog(igPhBB);
+                emitEndFuncletProlog();
+                break;
+
+            case IGPT_FUNCLET_EPILOG:
+                INDEBUG(++funcletEpilogCnt);
+                emitBegFuncletEpilog(igPh);
+                codeGen->genFuncletEpilog();
+                emitEndFuncletEpilog();
+                break;
+
+#endif // FEATURE_EH_FUNCLETS
+
+            default:
+                unreached();
+        }
+    }
+
+#ifdef DEBUG
+    if (emitComp->verbose)
+    {
+        printf("%d prologs, %d epilogs", prologCnt, epilogCnt);
+#if FEATURE_EH_FUNCLETS
+        printf(", %d funclet prologs, %d funclet epilogs", funcletPrologCnt, funcletEpilogCnt);
+#endif // FEATURE_EH_FUNCLETS
+        printf("\n");
+
+// prolog/epilog code doesn't use this yet
+// noway_assert(prologCnt == 1);
+// noway_assert(epilogCnt == emitEpilogCnt); // Is this correct?
+#if FEATURE_EH_FUNCLETS
+        assert(funcletPrologCnt == emitComp->ehFuncletCount());
+#endif // FEATURE_EH_FUNCLETS
+    }
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ *  Begin all prolog and epilog generation
+ */
+
+void emitter::emitStartPrologEpilogGeneration()
+{
+    /* Save the current IG if it's non-empty */
+
+    if (emitCurIGnonEmpty())
+    {
+        emitSavIG();
+    }
+    else
+    {
+        assert(emitCurIG == nullptr);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Finish all prolog and epilog generation
+ */
+
+void emitter::emitFinishPrologEpilogGeneration()
+{
+    /* Update the offsets of all the blocks */
+
+    emitRecomputeIGoffsets();
+
+    /* We should not generate any more code after this */
+
+    emitCurIG = nullptr;
+}
+
+/*****************************************************************************
+ *
+ *  Common code for prolog / epilog beginning. Convert the placeholder group to actual code IG,
+ *  and set it as the current group.
+ */
+
+void emitter::emitBegPrologEpilog(insGroup* igPh)
+{
+    assert(igPh->igFlags & IGF_PLACEHOLDER);
+
+    /* Save the current IG if it's non-empty */
+
+    if (emitCurIGnonEmpty())
+    {
+        emitSavIG();
+    }
+
+    /* Convert the placeholder group to a normal group.
+     * We need to be very careful to re-initialize the IG properly.
+     * It turns out, this means we only need to clear the placeholder bit
+     * and clear the igPhData field, and emitGenIG() will do the rest,
+     * since in the placeholder IG we didn't touch anything that is set by emitAllocIG().
+     */
+
+    igPh->igFlags &= ~IGF_PLACEHOLDER;
+    emitNoGCIG     = true;
+    emitForceNewIG = false;
+
+    /* Set up the GC info that we stored in the placeholder */
+
+    VarSetOps::Assign(emitComp, emitPrevGCrefVars, igPh->igPhData->igPhPrevGCrefVars);
+    emitPrevGCrefRegs = igPh->igPhData->igPhPrevGCrefRegs;
+    emitPrevByrefRegs = igPh->igPhData->igPhPrevByrefRegs;
+
+    VarSetOps::Assign(emitComp, emitThisGCrefVars, igPh->igPhData->igPhInitGCrefVars);
+    VarSetOps::Assign(emitComp, emitInitGCrefVars, igPh->igPhData->igPhInitGCrefVars);
+    emitThisGCrefRegs = emitInitGCrefRegs = igPh->igPhData->igPhInitGCrefRegs;
+    emitThisByrefRegs = emitInitByrefRegs = igPh->igPhData->igPhInitByrefRegs;
+
+    igPh->igPhData = nullptr;
+
+    /* Create a non-placeholder group pointer that we'll now use */
+
+    insGroup* ig = igPh;
+
+    /* Set the current function using the function index we stored */
+
+    emitComp->funSetCurrentFunc(ig->igFuncIdx);
+
+    /* Set the new IG as the place to generate code */
+
+    emitGenIG(ig);
+
+#if EMIT_TRACK_STACK_DEPTH
+
+    /* Don't measure stack depth inside the prolog / epilog, it's misleading */
+
+    emitCntStackDepth = 0;
+
+    assert(emitCurStackLvl == 0);
+
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Common code for end of prolog / epilog
+ */
+
+void emitter::emitEndPrologEpilog()
+{
+    emitNoGCIG = false;
+
+    /* Save the IG if non-empty */
+
+    if (emitCurIGnonEmpty())
+    {
+        emitSavIG();
+    }
+
+    assert(emitCurIGsize <= MAX_PLACEHOLDER_IG_SIZE);
+
+#if EMIT_TRACK_STACK_DEPTH
+    /* Reset the stack depth values */
+
+    emitCurStackLvl   = 0;
+    emitCntStackDepth = sizeof(int);
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Begin generating a main function epilog.
+ */
+
+void emitter::emitBegFnEpilog(insGroup* igPh)
+{
+    emitEpilogCnt++;
+
+    emitBegPrologEpilog(igPh);
+
+#ifdef JIT32_GCENCODER
+
+    EpilogList* el = new (emitComp, CMK_GC) EpilogList;
+    el->elNext     = NULL;
+    el->elIG       = emitCurIG;
+
+    if (emitEpilogLast)
+        emitEpilogLast->elNext = el;
+    else
+        emitEpilogList = el;
+
+    emitEpilogLast = el;
+
+#endif // JIT32_GCENCODER
+
+    /* Remember current position so that we can compute total epilog size */
+
+    emitEpilogBegLoc.CaptureLocation(this);
+}
+
+/*****************************************************************************
+ *
+ *  Finish generating a funclet epilog.
+ */
+
+void emitter::emitEndFnEpilog()
+{
+    emitEndPrologEpilog();
+
+    UNATIVE_OFFSET newSize;
+    UNATIVE_OFFSET epilogBegCodeOffset = emitEpilogBegLoc.CodeOffset(this);
+#ifdef _TARGET_XARCH_
+    UNATIVE_OFFSET epilogExitSeqStartCodeOffset = emitExitSeqBegLoc.CodeOffset(this);
+#else
+    UNATIVE_OFFSET epilogExitSeqStartCodeOffset = emitCodeOffset(emitCurIG, emitCurOffset());
+#endif
+
+    newSize = epilogExitSeqStartCodeOffset - epilogBegCodeOffset;
+
+#ifdef _TARGET_X86_
+
+    /* Compute total epilog size */
+
+    assert(emitEpilogSize == 0 || emitEpilogSize == newSize); // All epilogs must be identical
+    emitEpilogSize                     = newSize;
+    UNATIVE_OFFSET epilogEndCodeOffset = emitCodeOffset(emitCurIG, emitCurOffset());
+    assert(epilogExitSeqStartCodeOffset != epilogEndCodeOffset);
+
+    newSize = epilogEndCodeOffset - epilogExitSeqStartCodeOffset;
+    if (newSize < emitExitSeqSize)
+    {
+        // We expect either the epilog to be the same every time, or that
+        // one will be a ret or a ret <n> and others will be a jmp addr or jmp [addr];
+        // we make the epilogs the minimum of these.  Note that this ONLY works
+        // because the only instruction is the last one and thus a slight
+        // underestimation of the epilog size is harmless (since the EIP
+        // can not be between instructions).
+        assert(emitEpilogCnt == 1 ||
+               (emitExitSeqSize - newSize) <= 5 // delta between size of various forms of jmp (size is either 6 or 5)
+                                                // and various forms of ret (size is either 1 or 3). The combination can
+                                                // be anything been 1 and 5.
+               );
+        emitExitSeqSize = newSize;
+    }
+
+#endif // _TARGET_X86_
+}
+
+#if FEATURE_EH_FUNCLETS
+
+/*****************************************************************************
+ *
+ *  Begin generating a funclet prolog.
+ */
+
+void emitter::emitBegFuncletProlog(insGroup* igPh)
+{
+    emitBegPrologEpilog(igPh);
+}
+
+/*****************************************************************************
+ *
+ *  Finish generating a funclet prolog.
+ */
+
+void emitter::emitEndFuncletProlog()
+{
+    emitEndPrologEpilog();
+}
+
+/*****************************************************************************
+ *
+ *  Begin generating a funclet epilog.
+ */
+
+void emitter::emitBegFuncletEpilog(insGroup* igPh)
+{
+    emitBegPrologEpilog(igPh);
+}
+
+/*****************************************************************************
+ *
+ *  Finish generating a funclet epilog.
+ */
+
+void emitter::emitEndFuncletEpilog()
+{
+    emitEndPrologEpilog();
+}
+
+#endif // FEATURE_EH_FUNCLETS
+
+#ifdef JIT32_GCENCODER
+
+/*****************************************************************************
+ *
+ *  Return non-zero if the current method only has one epilog, which is
+ *  at the very end of the method body.
+ */
+
+bool emitter::emitHasEpilogEnd()
+{
+    if (emitEpilogCnt == 1 && (emitIGlast->igFlags & IGF_EPILOG)) // This wouldn't work for funclets
+        return true;
+    else
+        return false;
+}
+
+#endif // JIT32_GCENCODER
+
+#ifdef _TARGET_XARCH_
+
+/*****************************************************************************
+ *
+ *  Mark the beginning of the epilog exit sequence by remembering our position.
+ */
+
+void emitter::emitStartExitSeq()
+{
+    assert(emitComp->compGeneratingEpilog);
+
+    emitExitSeqBegLoc.CaptureLocation(this);
+}
+
+#endif // _TARGET_XARCH_
+
+/*****************************************************************************
+ *
+ *  The code generator tells us the range of GC ref locals through this
+ *  method. Needless to say, locals and temps should be allocated so that
+ *  the size of the range is as small as possible.
+ *
+ * offsLo - The FP offset from which the GC pointer range starts.
+ * offsHi - The FP offset at which the GC pointer region ends (exclusive).
+ */
+
+void emitter::emitSetFrameRangeGCRs(int offsLo, int offsHi)
+{
+    assert(emitComp->compGeneratingProlog);
+    assert(offsHi > offsLo);
+
+#ifdef DEBUG
+
+    //  A total of    47254 methods compiled.
+    //
+    //  GC ref frame variable counts:
+    //
+    //      <=         0 ===>  43175 count ( 91% of total)
+    //       1 ..      1 ===>   2367 count ( 96% of total)
+    //       2 ..      2 ===>    887 count ( 98% of total)
+    //       3 ..      5 ===>    579 count ( 99% of total)
+    //       6 ..     10 ===>    141 count ( 99% of total)
+    //      11 ..     20 ===>     40 count ( 99% of total)
+    //      21 ..     50 ===>     42 count ( 99% of total)
+    //      51 ..    128 ===>     15 count ( 99% of total)
+    //     129 ..    256 ===>      4 count ( 99% of total)
+    //     257 ..    512 ===>      4 count (100% of total)
+    //     513 ..   1024 ===>      0 count (100% of total)
+
+    if (emitComp->verbose)
+    {
+        unsigned count = (offsHi - offsLo) / sizeof(void*);
+        printf("%u tracked GC refs are at stack offsets ", count);
+
+        if (offsLo >= 0)
+        {
+            printf(" %04X ...  %04X\n", offsLo, offsHi);
+            assert(offsHi >= 0);
+        }
+        else
+#if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
+            if (!emitComp->compIsProfilerHookNeeded())
+#endif
+        {
+#ifdef _TARGET_AMD64_
+            // doesn't have to be all negative on amd
+            printf("-%04X ... %04X\n", -offsLo, offsHi);
+#else
+            printf("-%04X ... -%04X\n", -offsLo, -offsHi);
+            assert(offsHi <= 0);
+#endif
+        }
+#if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
+        else
+        {
+            // Under profiler due to prespilling of arguments, offHi need not be < 0
+            if (offsHi < 0)
+                printf("-%04X ... -%04X\n", -offsLo, -offsHi);
+            else
+                printf("-%04X ... %04X\n", -offsLo, offsHi);
+        }
+#endif
+    }
+
+#endif // DEBUG
+
+    assert(((offsHi - offsLo) % sizeof(void*)) == 0);
+    assert((offsLo % sizeof(void*)) == 0);
+    assert((offsHi % sizeof(void*)) == 0);
+
+    emitGCrFrameOffsMin = offsLo;
+    emitGCrFrameOffsMax = offsHi;
+    emitGCrFrameOffsCnt = (offsHi - offsLo) / sizeof(void*);
+}
+
+/*****************************************************************************
+ *
+ *  The code generator tells us the range of local variables through this
+ *  method.
+ */
+
+void emitter::emitSetFrameRangeLcls(int offsLo, int offsHi)
+{
+}
+
+/*****************************************************************************
+ *
+ *  The code generator tells us the range of used arguments through this
+ *  method.
+ */
+
+void emitter::emitSetFrameRangeArgs(int offsLo, int offsHi)
+{
+}
+
+/*****************************************************************************
+ *
+ *  A conversion table used to map an operand size value (in bytes) into its
+ *  small encoding (0 through 3), and vice versa.
+ */
+
+const emitter::opSize emitter::emitSizeEncode[] = {
+    emitter::OPSZ1, emitter::OPSZ2,  OPSIZE_INVALID, emitter::OPSZ4,  OPSIZE_INVALID, OPSIZE_INVALID, OPSIZE_INVALID,
+    emitter::OPSZ8, OPSIZE_INVALID,  OPSIZE_INVALID, OPSIZE_INVALID,  OPSIZE_INVALID, OPSIZE_INVALID, OPSIZE_INVALID,
+    OPSIZE_INVALID, emitter::OPSZ16, OPSIZE_INVALID, OPSIZE_INVALID,  OPSIZE_INVALID, OPSIZE_INVALID, OPSIZE_INVALID,
+    OPSIZE_INVALID, OPSIZE_INVALID,  OPSIZE_INVALID, OPSIZE_INVALID,  OPSIZE_INVALID, OPSIZE_INVALID, OPSIZE_INVALID,
+    OPSIZE_INVALID, OPSIZE_INVALID,  OPSIZE_INVALID, emitter::OPSZ32,
+};
+
+const emitAttr emitter::emitSizeDecode[emitter::OPSZ_COUNT] = {EA_1BYTE, EA_2BYTE,  EA_4BYTE,
+                                                               EA_8BYTE, EA_16BYTE, EA_32BYTE};
+
+/*****************************************************************************
+ *
+ *  Allocate an instruction descriptor for an instruction that uses both
+ *  a displacement and a constant.
+ */
+
+emitter::instrDesc* emitter::emitNewInstrCnsDsp(emitAttr size, ssize_t cns, int dsp)
+{
+    if (dsp == 0)
+    {
+        if (instrDesc::fitsInSmallCns(cns))
+        {
+            instrDesc* id = emitAllocInstr(size);
+
+            id->idSmallCns(cns);
+
+#if EMITTER_STATS
+            emitSmallCnsCnt++;
+            emitSmallCns[cns - ID_MIN_SMALL_CNS]++;
+            emitSmallDspCnt++;
+#endif
+
+            return id;
+        }
+        else
+        {
+            instrDescCns* id = emitAllocInstrCns(size);
+
+            id->idSetIsLargeCns();
+            id->idcCnsVal = cns;
+
+#if EMITTER_STATS
+            emitLargeCnsCnt++;
+            emitSmallDspCnt++;
+#endif
+
+            return id;
+        }
+    }
+    else
+    {
+        if (instrDesc::fitsInSmallCns(cns))
+        {
+            instrDescDsp* id = emitAllocInstrDsp(size);
+
+            id->idSetIsLargeDsp();
+            id->iddDspVal = dsp;
+
+            id->idSmallCns(cns);
+
+#if EMITTER_STATS
+            emitLargeDspCnt++;
+            emitSmallCnsCnt++;
+            emitSmallCns[cns - ID_MIN_SMALL_CNS]++;
+#endif
+
+            return id;
+        }
+        else
+        {
+            instrDescCnsDsp* id = emitAllocInstrCnsDsp(size);
+
+            id->idSetIsLargeCns();
+            id->iddcCnsVal = cns;
+
+            id->idSetIsLargeDsp();
+            id->iddcDspVal = dsp;
+
+#if EMITTER_STATS
+            emitLargeDspCnt++;
+            emitLargeCnsCnt++;
+#endif
+
+            return id;
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Returns true if garbage-collection won't happen within the helper call.
+ *  Don't need to record live pointers for such call sites.
+ */
+
+bool emitter::emitNoGChelper(unsigned IHX)
+{
+    // TODO-Throughput: Make this faster (maybe via a simple table of bools?)
+
+    switch (IHX)
+    {
+        case CORINFO_HELP_UNDEF:
+            return false;
+
+        case CORINFO_HELP_PROF_FCN_LEAVE:
+        case CORINFO_HELP_PROF_FCN_ENTER:
+#ifdef _TARGET_AMD64_
+        case CORINFO_HELP_PROF_FCN_TAILCALL:
+#endif
+        case CORINFO_HELP_LLSH:
+        case CORINFO_HELP_LRSH:
+        case CORINFO_HELP_LRSZ:
+
+//  case CORINFO_HELP_LMUL:
+//  case CORINFO_HELP_LDIV:
+//  case CORINFO_HELP_LMOD:
+//  case CORINFO_HELP_ULDIV:
+//  case CORINFO_HELP_ULMOD:
+
+#ifdef _TARGET_X86_
+        case CORINFO_HELP_ASSIGN_REF_EAX:
+        case CORINFO_HELP_ASSIGN_REF_ECX:
+        case CORINFO_HELP_ASSIGN_REF_EBX:
+        case CORINFO_HELP_ASSIGN_REF_EBP:
+        case CORINFO_HELP_ASSIGN_REF_ESI:
+        case CORINFO_HELP_ASSIGN_REF_EDI:
+
+        case CORINFO_HELP_CHECKED_ASSIGN_REF_EAX:
+        case CORINFO_HELP_CHECKED_ASSIGN_REF_ECX:
+        case CORINFO_HELP_CHECKED_ASSIGN_REF_EBX:
+        case CORINFO_HELP_CHECKED_ASSIGN_REF_EBP:
+        case CORINFO_HELP_CHECKED_ASSIGN_REF_ESI:
+        case CORINFO_HELP_CHECKED_ASSIGN_REF_EDI:
+#endif
+
+        case CORINFO_HELP_ASSIGN_REF:
+
+        case CORINFO_HELP_CHECKED_ASSIGN_REF:
+
+        case CORINFO_HELP_GETSHARED_GCSTATIC_BASE_NOCTOR:
+
+        case CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_NOCTOR:
+
+        case CORINFO_HELP_ASSIGN_BYREF:
+
+        case CORINFO_HELP_INIT_PINVOKE_FRAME:
+
+            return true;
+    }
+
+    return false;
+}
+
+/*****************************************************************************
+ *
+ *  Mark the current spot as having a label.
+ */
+
+void* emitter::emitAddLabel(VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, BOOL isFinallyTarget)
+{
+    /* Create a new IG if the current one is non-empty */
+
+    if (emitCurIGnonEmpty())
+    {
+        emitNxtIG();
+    }
+
+    VarSetOps::Assign(emitComp, emitThisGCrefVars, GCvars);
+    VarSetOps::Assign(emitComp, emitInitGCrefVars, GCvars);
+    emitThisGCrefRegs = emitInitGCrefRegs = gcrefRegs;
+    emitThisByrefRegs = emitInitByrefRegs = byrefRegs;
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+    if (isFinallyTarget)
+    {
+        emitCurIG->igFlags |= IGF_FINALLY_TARGET;
+    }
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+
+#ifdef DEBUG
+    if (EMIT_GC_VERBOSE)
+    {
+        printf("Label: IG%02u, GCvars=%s ", emitCurIG->igNum, VarSetOps::ToString(emitComp, GCvars));
+        dumpConvertedVarSet(emitComp, GCvars);
+        printf(", gcrefRegs=");
+        printRegMaskInt(gcrefRegs);
+        emitDispRegSet(gcrefRegs);
+        printf(", byrefRegs=");
+        printRegMaskInt(byrefRegs);
+        emitDispRegSet(byrefRegs);
+        printf("\n");
+    }
+#endif
+    return emitCurIG;
+}
+
+#ifdef _TARGET_ARMARCH_
+
+// Does the argument location point to an IG at the end of a function or funclet?
+// We can ignore the codePos part of the location, since it doesn't affect the
+// determination. If 'emitLocNextFragment' is non-NULL, it indicates the first
+// IG of the next fragment, so it represents a function end.
+bool emitter::emitIsFuncEnd(emitLocation* emitLoc, emitLocation* emitLocNextFragment /* = NULL */)
+{
+    assert(emitLoc);
+
+    insGroup* ig = emitLoc->GetIG();
+    assert(ig);
+
+    // Are we at the end of the IG list?
+    if ((emitLocNextFragment != NULL) && (ig->igNext == emitLocNextFragment->GetIG()))
+        return true;
+
+    // Safety check
+    if (ig->igNext == NULL)
+        return true;
+
+    // Is the next IG the start of a funclet prolog?
+    if (ig->igNext->igFlags & IGF_FUNCLET_PROLOG)
+        return true;
+
+#if FEATURE_EH_FUNCLETS
+
+    // Is the next IG a placeholder group for a funclet prolog?
+    if ((ig->igNext->igFlags & IGF_PLACEHOLDER) && (ig->igNext->igPhData->igPhType == IGPT_FUNCLET_PROLOG))
+    {
+        return true;
+    }
+
+#endif // FEATURE_EH_FUNCLETS
+
+    return false;
+}
+
+/*****************************************************************************
+ *
+ * Split the region from 'startLoc' to 'endLoc' into fragments by calling
+ * a callback function to indicate the beginning of a fragment. The initial code,
+ * starting at 'startLoc', doesn't get a callback, but the first code fragment,
+ * about 'maxSplitSize' bytes out does, as does the beginning of each fragment
+ * after that. There is no callback for the end (only the beginning of the last
+ * fragment gets a callback). A fragment must contain at least one instruction
+ * group. It should be smaller than 'maxSplitSize', although it may be larger to
+ * satisfy the "at least one instruction group" rule. Do not split prologs or
+ * epilogs. (Currently, prologs exist in a single instruction group at the main
+ * function beginning, so they aren't split. Funclets, however, might span IGs,
+ * so we can't split in between them.)
+ *
+ * Note that the locations must be the start of instruction groups; the part of
+ * the location indicating offset within a group must be zero.
+ *
+ * If 'startLoc' is NULL, it means the start of the code.
+ * If 'endLoc'   is NULL, it means the end   of the code.
+ */
+
+void emitter::emitSplit(emitLocation*         startLoc,
+                        emitLocation*         endLoc,
+                        UNATIVE_OFFSET        maxSplitSize,
+                        void*                 context,
+                        emitSplitCallbackType callbackFunc)
+{
+    insGroup*      igStart = (startLoc == NULL) ? emitIGlist : startLoc->GetIG();
+    insGroup*      igEnd   = (endLoc == NULL) ? NULL : endLoc->GetIG();
+    insGroup*      igPrev;
+    insGroup*      ig;
+    insGroup*      igLastReported;
+    insGroup*      igLastCandidate;
+    UNATIVE_OFFSET curSize;
+    UNATIVE_OFFSET candidateSize;
+
+    for (igPrev = NULL, ig = igLastReported = igStart, igLastCandidate = NULL, candidateSize = 0, curSize = 0;
+         ig != igEnd && ig != NULL; igPrev = ig, ig = ig->igNext)
+    {
+        // Keep looking until we've gone past the maximum split size
+        if (curSize >= maxSplitSize)
+        {
+            bool reportCandidate = true;
+
+            // Is there a candidate?
+            if (igLastCandidate == NULL)
+            {
+#ifdef DEBUG
+                if (EMITVERBOSE)
+                    printf("emitSplit: can't split at IG%02u; we don't have a candidate to report\n", ig->igNum);
+#endif
+                reportCandidate = false;
+            }
+
+            // Don't report the same thing twice (this also happens for the first block, since igLastReported is
+            // initialized to igStart).
+            if (igLastCandidate == igLastReported)
+            {
+#ifdef DEBUG
+                if (EMITVERBOSE)
+                    printf("emitSplit: can't split at IG%02u; we already reported it\n", igLastCandidate->igNum);
+#endif
+                reportCandidate = false;
+            }
+
+            // Report it!
+            if (reportCandidate)
+            {
+#ifdef DEBUG
+                if (EMITVERBOSE && (candidateSize >= maxSplitSize))
+                    printf("emitSplit: split at IG%02u is size %d, larger than requested maximum size of %d\n",
+                           igLastCandidate->igNum, candidateSize, maxSplitSize);
+#endif
+
+                // hand memory ownership to the callback function
+                emitLocation* pEmitLoc = new (emitComp, CMK_Unknown) emitLocation(igLastCandidate);
+                callbackFunc(context, pEmitLoc);
+                igLastReported  = igLastCandidate;
+                igLastCandidate = NULL;
+                curSize -= candidateSize;
+            }
+        }
+
+        // Update the current candidate to be this block, if it isn't in the middle of a
+        // prolog or epilog, which we can't split. All we know is that certain
+        // IGs are marked as prolog or epilog. We don't actually know if two adjacent
+        // IGs are part of the *same* prolog or epilog, so we have to assume they are.
+
+        if (igPrev && (((igPrev->igFlags & IGF_FUNCLET_PROLOG) && (ig->igFlags & IGF_FUNCLET_PROLOG)) ||
+                       ((igPrev->igFlags & IGF_EPILOG) && (ig->igFlags & IGF_EPILOG))))
+        {
+            // We can't update the candidate
+        }
+        else
+        {
+            igLastCandidate = ig;
+            candidateSize   = curSize;
+        }
+
+        curSize += ig->igSize;
+
+    } // end for loop
+}
+
+/*****************************************************************************
+ *
+ * Given an instruction group, find the array of instructions (instrDesc) and
+ * number of instructions in the array. If the IG is the current IG, we assume
+ * that igData does NOT hold the instructions; they are unsaved and pointed
+ * to by emitCurIGfreeBase.
+ *
+ * This function can't be called for placeholder groups, which have no instrDescs.
+ */
+
+void emitter::emitGetInstrDescs(insGroup* ig, instrDesc** id, int* insCnt)
+{
+    assert(!(ig->igFlags & IGF_PLACEHOLDER));
+    if (ig == emitCurIG)
+    {
+        *id     = (instrDesc*)emitCurIGfreeBase;
+        *insCnt = emitCurIGinsCnt;
+    }
+    else
+    {
+        *id     = (instrDesc*)ig->igData;
+        *insCnt = ig->igInsCnt;
+    }
+
+    assert(*id);
+}
+
+/*****************************************************************************
+ *
+ * Given a location (an 'emitLocation'), find the instruction group (IG) and
+ * instruction descriptor (instrDesc) corresponding to that location. Returns
+ * 'true' if there is an instruction, 'false' if there is no instruction
+ * (i.e., we're at the end of the instruction list). Also, optionally return
+ * the number of instructions that follow that instruction in the IG (in *pinsRemaining,
+ * if pinsRemaining is non-NULL), which can be used for iterating over the
+ * remaining instrDescs in the IG.
+ *
+ * We assume that emitCurIG points to the end of the instructions we care about.
+ * For the prologs or epilogs, it points to the last IG of the prolog or epilog
+ * that is being generated. For body code gen, it points to the place we are currently
+ * adding code, namely, the end of currently generated code.
+ */
+
+bool emitter::emitGetLocationInfo(emitLocation* emitLoc,
+                                  insGroup**    pig,
+                                  instrDesc**   pid,
+                                  int*          pinsRemaining /* = NULL */)
+{
+    assert(emitLoc != nullptr);
+    assert(emitLoc->Valid());
+    assert(emitLoc->GetIG() != nullptr);
+    assert(pig != nullptr);
+    assert(pid != nullptr);
+
+    insGroup*  ig = emitLoc->GetIG();
+    instrDesc* id;
+    int        insNum = emitLoc->GetInsNum();
+    int        insCnt;
+
+    emitGetInstrDescs(ig, &id, &insCnt);
+    assert(insNum <= insCnt);
+
+    // There is a special-case: if the insNum points to the end, then we "wrap" and
+    // consider that the instruction it is pointing at is actually the first instruction
+    // of the next non-empty IG (which has its own valid emitLocation). This handles the
+    // case where you capture a location, then the next instruction creates a new IG.
+
+    if (insNum == insCnt)
+    {
+        if (ig == emitCurIG)
+        {
+            // No instructions beyond the current location.
+            return false;
+        }
+
+        for (ig = ig->igNext; ig; ig = ig->igNext)
+        {
+            emitGetInstrDescs(ig, &id, &insCnt);
+
+            if (insCnt > 0)
+            {
+                insNum = 0; // Pretend the index is 0 -- the first instruction
+                break;
+            }
+
+            if (ig == emitCurIG)
+            {
+                // There aren't any instructions in the current IG, and this is
+                // the current location, so we're at the end.
+                return false;
+            }
+        }
+
+        if (ig == NULL)
+        {
+            // 'ig' can't be NULL, or we went past the current IG represented by 'emitCurIG'.
+            // Perhaps 'loc' was corrupt coming in?
+            noway_assert(!"corrupt emitter location");
+            return false;
+        }
+    }
+
+    // Now find the instrDesc within this group that corresponds to the location
+
+    assert(insNum < insCnt);
+
+    int i;
+    for (i = 0; i != insNum; ++i)
+    {
+        castto(id, BYTE*) += emitSizeOfInsDsc(id);
+    }
+
+    // Return the info we found
+
+    *pig = ig;
+    *pid = id;
+
+    if (pinsRemaining)
+    {
+        *pinsRemaining = insCnt - insNum - 1;
+    }
+
+    return true;
+}
+
+/*****************************************************************************
+ *
+ * Compute the next instrDesc, either in this IG, or in a subsequent IG. 'id'
+ * will point to this instrDesc. 'ig' and 'insRemaining' will also be updated.
+ * Returns true if there is an instruction, or false if we've iterated over all
+ * the instructions up to the current instruction (based on 'emitCurIG').
+ */
+
+bool emitter::emitNextID(insGroup*& ig, instrDesc*& id, int& insRemaining)
+{
+    if (insRemaining > 0)
+    {
+        castto(id, BYTE*) += emitSizeOfInsDsc(id);
+        --insRemaining;
+        return true;
+    }
+
+    // We're out of instrDesc in 'ig'. Is this the current IG? If so, we're done.
+
+    if (ig == emitCurIG)
+    {
+        return false;
+    }
+
+    for (ig = ig->igNext; ig; ig = ig->igNext)
+    {
+        int insCnt;
+        emitGetInstrDescs(ig, &id, &insCnt);
+
+        if (insCnt > 0)
+        {
+            insRemaining = insCnt - 1;
+            return true;
+        }
+
+        if (ig == emitCurIG)
+        {
+            return false;
+        }
+    }
+
+    return false;
+}
+
+/*****************************************************************************
+ *
+ * Walk instrDesc's from the location given by 'locFrom', up to the current location.
+ * For each instruction, call the callback function 'processFunc'. 'context' is simply
+ * passed through to the callback function.
+ */
+
+void emitter::emitWalkIDs(emitLocation* locFrom, emitProcessInstrFunc_t processFunc, void* context)
+{
+    insGroup*  ig;
+    instrDesc* id;
+    int        insRemaining;
+
+    if (!emitGetLocationInfo(locFrom, &ig, &id, &insRemaining))
+        return; // no instructions at the 'from' location
+
+    do
+    {
+        // process <<id>>
+        (*processFunc)(id, context);
+
+    } while (emitNextID(ig, id, insRemaining));
+}
+
+/*****************************************************************************
+ *
+ * A callback function for emitWalkIDs() that calls Compiler::unwindNop().
+ */
+
+void emitter::emitGenerateUnwindNop(instrDesc* id, void* context)
+{
+    Compiler* comp = (Compiler*)context;
+#if defined(_TARGET_ARM_)
+    comp->unwindNop(id->idCodeSize());
+#elif defined(_TARGET_ARM64_)
+    comp->unwindNop();
+#endif // defined(_TARGET_ARM64_)
+}
+
+/*****************************************************************************
+ *
+ * emitUnwindNopPadding: call unwindNop() for every instruction from a given
+ * location 'emitLoc' up to the current location.
+ */
+
+void emitter::emitUnwindNopPadding(emitLocation* locFrom, Compiler* comp)
+{
+    emitWalkIDs(locFrom, emitGenerateUnwindNop, comp);
+}
+
+#endif // _TARGET_ARMARCH_
+
+#if defined(_TARGET_ARM_)
+
+/*****************************************************************************
+ *
+ * Return the instruction size in bytes for the instruction at the specified location.
+ * This is used to assert that the unwind code being generated on ARM has the
+ * same size as the instruction for which it is being generated (since on ARM
+ * the unwind codes have a one-to-one relationship with instructions, and the
+ * unwind codes have an implicit instruction size that must match the instruction size.)
+ * An instruction must exist at the specified location.
+ */
+
+unsigned emitter::emitGetInstructionSize(emitLocation* emitLoc)
+{
+    insGroup*  ig;
+    instrDesc* id;
+
+    bool anyInstrs = emitGetLocationInfo(emitLoc, &ig, &id);
+    assert(anyInstrs); // There better be an instruction at this location (otherwise, we're at the end of the
+                       // instruction list)
+    return id->idCodeSize();
+}
+
+#endif // defined(_TARGET_ARM_)
+
+/*****************************************************************************/
+#ifdef DEBUG
+/*****************************************************************************
+ *
+ *  Returns the name for the register to use to access frame based variables
+ */
+
+const char* emitter::emitGetFrameReg()
+{
+    if (emitHasFramePtr)
+    {
+        return STR_FPBASE;
+    }
+    else
+    {
+        return STR_SPBASE;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Display a register set in a readable form.
+ */
+
+void emitter::emitDispRegSet(regMaskTP regs)
+{
+    regNumber reg;
+    bool      sp = false;
+
+    printf(" {");
+
+    for (reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
+    {
+        if ((regs & genRegMask(reg)) == 0)
+        {
+            continue;
+        }
+
+        if (sp)
+        {
+            printf(" ");
+        }
+        else
+        {
+            sp = true;
+        }
+
+        printf("%s", emitRegName(reg));
+    }
+
+    printf("}");
+}
+
+/*****************************************************************************
+ *
+ *  Display the current GC ref variable set in a readable form.
+ */
+
+void emitter::emitDispVarSet()
+{
+    unsigned vn;
+    int      of;
+    bool     sp = false;
+
+    for (vn = 0, of = emitGCrFrameOffsMin; vn < emitGCrFrameOffsCnt; vn += 1, of += sizeof(void*))
+    {
+        if (emitGCrFrameLiveTab[vn])
+        {
+            if (sp)
+            {
+                printf(" ");
+            }
+            else
+            {
+                sp = true;
+            }
+
+            printf("[%s", emitGetFrameReg());
+
+            if (of < 0)
+            {
+                printf("-%02XH", -of);
+            }
+            else if (of > 0)
+            {
+                printf("+%02XH", +of);
+            }
+
+            printf("]");
+        }
+    }
+
+    if (!sp)
+    {
+        printf("none");
+    }
+}
+
+/*****************************************************************************/
+#endif // DEBUG
+
+#if MULTIREG_HAS_SECOND_GC_RET
+//------------------------------------------------------------------------
+// emitSetSecondRetRegGCType: Sets the GC type of the second return register for instrDescCGCA struct.
+//
+// Arguments:
+//    id            - The large call instr descriptor to set the second GC return register type on.
+//    secondRetSize - The EA_SIZE for second return register type.
+//
+// Return Value:
+//    None
+//
+
+void emitter::emitSetSecondRetRegGCType(instrDescCGCA* id, emitAttr secondRetSize)
+{
+    if (EA_IS_GCREF(secondRetSize))
+    {
+        id->idSecondGCref(GCT_GCREF);
+    }
+    else if (EA_IS_BYREF(secondRetSize))
+    {
+        id->idSecondGCref(GCT_BYREF);
+    }
+    else
+    {
+        id->idSecondGCref(GCT_NONE);
+    }
+}
+#endif // MULTIREG_HAS_SECOND_GC_RET
+
+/*****************************************************************************
+ *
+ *  Allocate an instruction descriptor for an indirect call.
+ *
+ *  We use two different descriptors to save space - the common case records
+ *  no GC variables and has both a very small argument count and an address
+ *  mode displacement; the other case records the current GC var set,
+ *  the call scope, and an arbitrarily large argument count and the
+ *  address mode displacement.
+ */
+
+emitter::instrDesc* emitter::emitNewInstrCallInd(int              argCnt,
+                                                 ssize_t          disp,
+                                                 VARSET_VALARG_TP GCvars,
+                                                 regMaskTP        gcrefRegs,
+                                                 regMaskTP        byrefRegs,
+                                                 emitAttr         retSizeIn
+                                                     MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize))
+{
+    emitAttr retSize = (retSizeIn != EA_UNKNOWN) ? retSizeIn : EA_PTRSIZE;
+
+    bool gcRefRegsInScratch = ((gcrefRegs & RBM_CALLEE_TRASH) != 0);
+
+    // Allocate a larger descriptor if any GC values need to be saved
+    // or if we have an absurd number of arguments or a large address
+    // mode displacement, or we have some byref registers
+    //
+    // On Amd64 System V OSs a larger descriptor is also needed if the
+    // call returns a two-register-returned struct and the second
+    // register (RDX) is a GCRef or ByRef pointer.
+
+    if (!VarSetOps::IsEmpty(emitComp, GCvars) || // any frame GCvars live
+        (gcRefRegsInScratch) ||                  // any register gc refs live in scratch regs
+        (byrefRegs != 0) ||                      // any register byrefs live
+        (disp < AM_DISP_MIN) ||                  // displacement too negative
+        (disp > AM_DISP_MAX) ||                  // displacement too positive
+        (argCnt > ID_MAX_SMALL_CNS) ||           // too many args
+        (argCnt < 0)                             // caller pops arguments
+                                                 // There is a second ref/byref return register.
+        MULTIREG_HAS_SECOND_GC_RET_ONLY(|| EA_IS_GCREF_OR_BYREF(secondRetSize)))
+    {
+        instrDescCGCA* id;
+
+        id = emitAllocInstrCGCA(retSize);
+
+        id->idSetIsLargeCall();
+
+        VarSetOps::Assign(emitComp, id->idcGCvars, GCvars);
+        id->idcGcrefRegs = gcrefRegs;
+        id->idcByrefRegs = byrefRegs;
+        id->idcArgCnt    = argCnt;
+        id->idcDisp      = disp;
+
+#if MULTIREG_HAS_SECOND_GC_RET
+        emitSetSecondRetRegGCType(id, secondRetSize);
+#endif // MULTIREG_HAS_SECOND_GC_RET
+
+        return id;
+    }
+    else
+    {
+        instrDesc* id;
+
+        id = emitNewInstrCns(retSize, argCnt);
+
+        /* Make sure we didn't waste space unexpectedly */
+        assert(!id->idIsLargeCns());
+
+        /* Store the displacement and make sure the value fit */
+        id->idAddr()->iiaAddrMode.amDisp = disp;
+        assert(id->idAddr()->iiaAddrMode.amDisp == disp);
+
+        /* Save the the live GC registers in the unused register fields */
+        emitEncodeCallGCregs(gcrefRegs, id);
+
+        return id;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Allocate an instruction descriptor for a direct call.
+ *
+ *  We use two different descriptors to save space - the common case records
+ *  with no GC variables or byrefs and has a very small argument count, and no
+ *  explicit scope;
+ *  the other case records the current GC var set, the call scope,
+ *  and an arbitrarily large argument count.
+ */
+
+emitter::instrDesc* emitter::emitNewInstrCallDir(int              argCnt,
+                                                 VARSET_VALARG_TP GCvars,
+                                                 regMaskTP        gcrefRegs,
+                                                 regMaskTP        byrefRegs,
+                                                 emitAttr         retSizeIn
+                                                     MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize))
+{
+    emitAttr retSize = (retSizeIn != EA_UNKNOWN) ? retSizeIn : EA_PTRSIZE;
+
+    // Allocate a larger descriptor if new GC values need to be saved
+    // or if we have an absurd number of arguments or if we need to
+    // save the scope.
+    //
+    // On Amd64 System V OSs a larger descriptor is also needed if the
+    // call returns a two-register-returned struct and the second
+    // register (RDX) is a GCRef or ByRef pointer.
+
+    bool gcRefRegsInScratch = ((gcrefRegs & RBM_CALLEE_TRASH) != 0);
+
+    if (!VarSetOps::IsEmpty(emitComp, GCvars) || // any frame GCvars live
+        gcRefRegsInScratch ||                    // any register gc refs live in scratch regs
+        (byrefRegs != 0) ||                      // any register byrefs live
+        (argCnt > ID_MAX_SMALL_CNS) ||           // too many args
+        (argCnt < 0)                             // caller pops arguments
+                                                 // There is a second ref/byref return register.
+        MULTIREG_HAS_SECOND_GC_RET_ONLY(|| EA_IS_GCREF_OR_BYREF(secondRetSize)))
+    {
+        instrDescCGCA* id = emitAllocInstrCGCA(retSize);
+
+        // printf("Direct call with GC vars / big arg cnt / explicit scope\n");
+
+        id->idSetIsLargeCall();
+
+        VarSetOps::Assign(emitComp, id->idcGCvars, GCvars);
+        id->idcGcrefRegs = gcrefRegs;
+        id->idcByrefRegs = byrefRegs;
+        id->idcDisp      = 0;
+        id->idcArgCnt    = argCnt;
+
+#if MULTIREG_HAS_SECOND_GC_RET
+        emitSetSecondRetRegGCType(id, secondRetSize);
+#endif // MULTIREG_HAS_SECOND_GC_RET
+
+        return id;
+    }
+    else
+    {
+        instrDesc* id = emitNewInstrCns(retSize, argCnt);
+
+        // printf("Direct call w/o  GC vars / big arg cnt / explicit scope\n");
+
+        /* Make sure we didn't waste space unexpectedly */
+        assert(!id->idIsLargeCns());
+
+        /* Save the the live GC registers in the unused register fields */
+        emitEncodeCallGCregs(gcrefRegs, id);
+
+        return id;
+    }
+}
+
+/*****************************************************************************/
+#ifdef DEBUG
+/*****************************************************************************
+ *
+ *  Return a string with the name of the given class field (blank string (not
+ *  NULL) is returned when the name isn't available).
+ */
+
+const char* emitter::emitFldName(CORINFO_FIELD_HANDLE fieldVal)
+{
+    if (emitComp->opts.varNames)
+    {
+        const char* memberName;
+        const char* className;
+
+        const int   TEMP_BUFFER_LEN = 1024;
+        static char buff[TEMP_BUFFER_LEN];
+
+        memberName = emitComp->eeGetFieldName(fieldVal, &className);
+
+        sprintf_s(buff, TEMP_BUFFER_LEN, "'<%s>.%s'", className, memberName);
+        return buff;
+    }
+    else
+    {
+        return "";
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Return a string with the name of the given function (blank string (not
+ *  NULL) is returned when the name isn't available).
+ */
+
+const char* emitter::emitFncName(CORINFO_METHOD_HANDLE methHnd)
+{
+    return emitComp->eeGetMethodFullName(methHnd);
+}
+
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ *  Be very careful, some instruction descriptors are allocated as "tiny" and
+ *  don't have some of the tail fields of instrDesc (in particular, "idInfo").
+ */
+
+const BYTE emitter::emitFmtToOps[] = {
+#define IF_DEF(en, op1, op2) ID_OP_##op2,
+#include "emitfmts.h"
+};
+
+#ifdef DEBUG
+const unsigned emitter::emitFmtCount = sizeof(emitFmtToOps) / sizeof(emitFmtToOps[0]);
+#endif
+
+/*****************************************************************************
+ *
+ *  Display the current instruction group list.
+ */
+
+#ifdef DEBUG
+
+void emitter::emitDispIGflags(unsigned flags)
+{
+    if (flags & IGF_GC_VARS)
+    {
+        printf(", gcvars");
+    }
+    if (flags & IGF_BYREF_REGS)
+    {
+        printf(", byref");
+    }
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+    if (flags & IGF_FINALLY_TARGET)
+    {
+        printf(", ftarget");
+    }
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+    if (flags & IGF_FUNCLET_PROLOG)
+    {
+        printf(", funclet prolog");
+    }
+    if (flags & IGF_FUNCLET_EPILOG)
+    {
+        printf(", funclet epilog");
+    }
+    if (flags & IGF_EPILOG)
+    {
+        printf(", epilog");
+    }
+    if (flags & IGF_NOGCINTERRUPT)
+    {
+        printf(", nogc");
+    }
+    if (flags & IGF_UPD_ISZ)
+    {
+        printf(", isz");
+    }
+    if (flags & IGF_EMIT_ADD)
+    {
+        printf(", emitadd");
+    }
+}
+
+void emitter::emitDispIG(insGroup* ig, insGroup* igPrev, bool verbose)
+{
+    const int TEMP_BUFFER_LEN = 40;
+    char      buff[TEMP_BUFFER_LEN];
+
+    sprintf_s(buff, TEMP_BUFFER_LEN, "G_M%03u_IG%02u:        ", Compiler::s_compMethodsCount, ig->igNum);
+    printf("%s; ", buff);
+    if ((igPrev == nullptr) || (igPrev->igFuncIdx != ig->igFuncIdx))
+    {
+        printf("func=%02u, ", ig->igFuncIdx);
+    }
+
+    if (ig->igFlags & IGF_PLACEHOLDER)
+    {
+        insGroup* igPh = ig;
+
+        const char* pszType;
+        switch (igPh->igPhData->igPhType)
+        {
+            case IGPT_PROLOG:
+                pszType = "prolog";
+                break;
+            case IGPT_EPILOG:
+                pszType = "epilog";
+                break;
+#if FEATURE_EH_FUNCLETS
+            case IGPT_FUNCLET_PROLOG:
+                pszType = "funclet prolog";
+                break;
+            case IGPT_FUNCLET_EPILOG:
+                pszType = "funclet epilog";
+                break;
+#endif // FEATURE_EH_FUNCLETS
+            default:
+                pszType = "UNKNOWN";
+                break;
+        }
+        printf("%s placeholder, next placeholder=", pszType);
+        if (igPh->igPhData->igPhNext)
+        {
+            printf("IG%02u ", igPh->igPhData->igPhNext->igNum);
+        }
+        else
+        {
+            printf("<END>");
+        }
+        printf(", BB=%08XH (BB%02u)", dspPtr(igPh->igPhData->igPhBB),
+               (igPh->igPhData->igPhBB != nullptr) ? igPh->igPhData->igPhBB->bbNum : 0);
+
+        emitDispIGflags(igPh->igFlags);
+
+        if (ig == emitCurIG)
+        {
+            printf(" <-- Current IG");
+        }
+        if (igPh == emitPlaceholderList)
+        {
+            printf(" <-- First placeholder");
+        }
+        if (igPh == emitPlaceholderLast)
+        {
+            printf(" <-- Last placeholder");
+        }
+        printf("\n");
+
+        printf("%*s;   PrevGCVars=%s ", strlen(buff), "",
+               VarSetOps::ToString(emitComp, igPh->igPhData->igPhPrevGCrefVars));
+        dumpConvertedVarSet(emitComp, igPh->igPhData->igPhPrevGCrefVars);
+        printf(", PrevGCrefRegs=");
+        printRegMaskInt(igPh->igPhData->igPhPrevGCrefRegs);
+        emitDispRegSet(igPh->igPhData->igPhPrevGCrefRegs);
+        printf(", PrevByrefRegs=");
+        printRegMaskInt(igPh->igPhData->igPhPrevByrefRegs);
+        emitDispRegSet(igPh->igPhData->igPhPrevByrefRegs);
+        printf("\n");
+
+        printf("%*s;   InitGCVars=%s ", strlen(buff), "",
+               VarSetOps::ToString(emitComp, igPh->igPhData->igPhInitGCrefVars));
+        dumpConvertedVarSet(emitComp, igPh->igPhData->igPhInitGCrefVars);
+        printf(", InitGCrefRegs=");
+        printRegMaskInt(igPh->igPhData->igPhInitGCrefRegs);
+        emitDispRegSet(igPh->igPhData->igPhInitGCrefRegs);
+        printf(", InitByrefRegs=");
+        printRegMaskInt(igPh->igPhData->igPhInitByrefRegs);
+        emitDispRegSet(igPh->igPhData->igPhInitByrefRegs);
+        printf("\n");
+
+        assert(!(ig->igFlags & IGF_GC_VARS));
+        assert(!(ig->igFlags & IGF_BYREF_REGS));
+    }
+    else
+    {
+        printf("offs=%06XH, size=%04XH", ig->igOffs, ig->igSize);
+
+        if (ig->igFlags & IGF_GC_VARS)
+        {
+            printf(", gcVars=%s ", VarSetOps::ToString(emitComp, ig->igGCvars()));
+            dumpConvertedVarSet(emitComp, ig->igGCvars());
+        }
+
+        if (!(ig->igFlags & IGF_EMIT_ADD))
+        {
+            printf(", gcrefRegs=");
+            printRegMaskInt(ig->igGCregs);
+            emitDispRegSet(ig->igGCregs);
+        }
+
+        if (ig->igFlags & IGF_BYREF_REGS)
+        {
+            printf(", byrefRegs=");
+            printRegMaskInt(ig->igByrefRegs());
+            emitDispRegSet(ig->igByrefRegs());
+        }
+
+        emitDispIGflags(ig->igFlags);
+
+        if (ig == emitCurIG)
+        {
+            printf(" <-- Current IG");
+        }
+        if (ig == emitPrologIG)
+        {
+            printf(" <-- Prolog IG");
+        }
+        printf("\n");
+
+        if (verbose)
+        {
+            BYTE*          ins = ig->igData;
+            UNATIVE_OFFSET ofs = ig->igOffs;
+            unsigned       cnt = ig->igInsCnt;
+
+            if (cnt)
+            {
+                printf("\n");
+
+                do
+                {
+                    instrDesc* id = (instrDesc*)ins;
+
+                    emitDispIns(id, false, true, false, ofs, nullptr, 0, ig);
+
+                    ins += emitSizeOfInsDsc(id);
+                    ofs += emitInstCodeSz(id);
+                } while (--cnt);
+
+                printf("\n");
+            }
+        }
+    }
+}
+
+void emitter::emitDispIGlist(bool verbose)
+{
+    insGroup* ig;
+    insGroup* igPrev;
+
+    for (igPrev = nullptr, ig = emitIGlist; ig; igPrev = ig, ig = ig->igNext)
+    {
+        emitDispIG(ig, igPrev, verbose);
+    }
+}
+
+void emitter::emitDispGCinfo()
+{
+    printf("Emitter GC tracking info:");
+    printf("\n  emitPrevGCrefVars(0x%p)=%016llX ", dspPtr(&emitPrevGCrefVars), emitPrevGCrefVars);
+    dumpConvertedVarSet(emitComp, emitPrevGCrefVars);
+    printf("\n  emitPrevGCrefRegs(0x%p)=", dspPtr(&emitPrevGCrefRegs));
+    printRegMaskInt(emitPrevGCrefRegs);
+    emitDispRegSet(emitPrevGCrefRegs);
+    printf("\n  emitPrevByrefRegs(0x%p)=", dspPtr(&emitPrevByrefRegs));
+    printRegMaskInt(emitPrevByrefRegs);
+    emitDispRegSet(emitPrevByrefRegs);
+    printf("\n  emitInitGCrefVars(0x%p)=%016llX ", dspPtr(&emitInitGCrefVars), emitInitGCrefVars);
+    dumpConvertedVarSet(emitComp, emitInitGCrefVars);
+    printf("\n  emitInitGCrefRegs(0x%p)=", dspPtr(&emitInitGCrefRegs));
+    printRegMaskInt(emitInitGCrefRegs);
+    emitDispRegSet(emitInitGCrefRegs);
+    printf("\n  emitInitByrefRegs(0x%p)=", dspPtr(&emitInitByrefRegs));
+    printRegMaskInt(emitInitByrefRegs);
+    emitDispRegSet(emitInitByrefRegs);
+    printf("\n  emitThisGCrefVars(0x%p)=%016llX ", dspPtr(&emitThisGCrefVars), emitThisGCrefVars);
+    dumpConvertedVarSet(emitComp, emitThisGCrefVars);
+    printf("\n  emitThisGCrefRegs(0x%p)=", dspPtr(&emitThisGCrefRegs));
+    printRegMaskInt(emitThisGCrefRegs);
+    emitDispRegSet(emitThisGCrefRegs);
+    printf("\n  emitThisByrefRegs(0x%p)=", dspPtr(&emitThisByrefRegs));
+    printRegMaskInt(emitThisByrefRegs);
+    emitDispRegSet(emitThisByrefRegs);
+    printf("\n\n");
+}
+
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ *  Issue the given instruction. Basically, this is just a thin wrapper around
+ *  emitOutputInstr() that does a few debug checks.
+ */
+
+size_t emitter::emitIssue1Instr(insGroup* ig, instrDesc* id, BYTE** dp)
+{
+    size_t is;
+
+    /* Record the beginning offset of the instruction */
+
+    BYTE* curInsAdr = *dp;
+
+    /* Issue the next instruction */
+
+    // printf("[S=%02u] " , emitCurStackLvl);
+
+    is = emitOutputInstr(ig, id, dp);
+
+// printf("[S=%02u]\n", emitCurStackLvl);
+
+#if EMIT_TRACK_STACK_DEPTH
+
+    /*
+        If we're generating a full pointer map and the stack
+        is empty, there better not be any "pending" argument
+        push entries.
+     */
+
+    assert(emitFullGCinfo == false || emitCurStackLvl != 0 || u2.emitGcArgTrackCnt == 0);
+
+#endif
+
+#if defined(DEBUGGING_SUPPORT) || defined(DEBUG)
+
+    /* Did the size of the instruction match our expectations? */
+
+    UNATIVE_OFFSET csz = (UNATIVE_OFFSET)(*dp - curInsAdr);
+
+    if (csz != id->idCodeSize())
+    {
+        /* It is fatal to under-estimate the instruction size */
+        noway_assert(emitInstCodeSz(id) >= csz);
+
+#if DEBUG_EMIT
+        if (EMITVERBOSE)
+        {
+            printf("Instruction predicted size = %u, actual = %u\n", emitInstCodeSz(id), csz);
+        }
+#endif // DEBUG_EMIT
+
+        /* The instruction size estimate wasn't accurate; remember this */
+
+        ig->igFlags |= IGF_UPD_ISZ;
+#if defined(_TARGET_XARCH_)
+        id->idCodeSize(csz);
+#elif defined(_TARGET_ARM_)
+// This is done as part of emitSetShortJump();
+// insSize isz = emitInsSize(id->idInsFmt());
+// id->idInsSize(isz);
+#else
+        /* It is fatal to over-estimate the instruction size */
+        IMPL_LIMITATION("Over-estimated instruction size");
+#endif
+    }
+
+#endif
+
+#ifdef DEBUG
+    /* Make sure the instruction descriptor size also matches our expectations */
+    if (is != emitSizeOfInsDsc(id))
+    {
+        printf("%s at %u: Expected size = %u , actual size = %u\n", emitIfName(id->idInsFmt()),
+               id->idDebugOnlyInfo()->idNum, is, emitSizeOfInsDsc(id));
+        assert(is == emitSizeOfInsDsc(id));
+    }
+#endif
+
+    return is;
+}
+
+/*****************************************************************************
+ *
+ *  Update the offsets of all the instruction groups (note: please don't be
+ *  lazy and call this routine frequently, it walks the list of instruction
+ *  groups and thus it isn't cheap).
+ */
+
+void emitter::emitRecomputeIGoffsets()
+{
+    UNATIVE_OFFSET offs;
+    insGroup*      ig;
+
+    for (ig = emitIGlist, offs = 0; ig; ig = ig->igNext)
+    {
+        ig->igOffs = offs;
+        assert(IsCodeAligned(ig->igOffs));
+        offs += ig->igSize;
+    }
+
+    /* Set the total code size */
+
+    emitTotalCodeSize = offs;
+
+#ifdef DEBUG
+    emitCheckIGoffsets();
+#endif
+}
+
+/*****************************************************************************
+ *  Bind targets of relative jumps to choose the smallest possible encoding.
+ *  X86 and AMD64 have a small and large encoding.
+ *  ARM has a small, medium, and large encoding. The large encoding is a pseudo-op
+ *      to handle greater range than the conditional branch instructions can handle.
+ *  ARM64 has a small and large encoding for both conditional branch and loading label addresses.
+ *      The large encodings are pseudo-ops that represent a multiple instruction sequence, similar to ARM. (Currently
+ *      NYI).
+ */
+
+void emitter::emitJumpDistBind()
+{
+#ifdef DEBUG
+    if (emitComp->verbose)
+    {
+        printf("*************** In emitJumpDistBind()\n");
+    }
+    if (EMIT_INSTLIST_VERBOSE)
+    {
+        printf("\nInstruction list before jump distance binding:\n\n");
+        emitDispIGlist(true);
+    }
+#endif
+
+    instrDescJmp* jmp;
+
+    UNATIVE_OFFSET minShortExtra; // The smallest offset greater than that required for a jump to be converted
+                                  // to a small jump. If it is small enough, we will iterate in hopes of
+                                  // converting those jumps we missed converting the first (or second...) time.
+
+#if defined(_TARGET_ARM_)
+    UNATIVE_OFFSET minMediumExtra; // Same as 'minShortExtra', but for medium-sized jumps.
+#endif                             // _TARGET_ARM_
+
+    UNATIVE_OFFSET adjIG;
+    UNATIVE_OFFSET adjLJ;
+    insGroup*      lstIG;
+#ifdef DEBUG
+    insGroup* prologIG = emitPrologIG;
+#endif // DEBUG
+
+    int jmp_iteration = 1;
+
+/*****************************************************************************/
+/* If we iterate to look for more jumps to shorten, we start again here.     */
+/*****************************************************************************/
+
+AGAIN:
+
+#ifdef DEBUG
+    emitCheckIGoffsets();
+#endif
+
+/*
+    In the following loop we convert all jump targets from "BasicBlock *"
+    to "insGroup *" values. We also estimate which jumps will be short.
+ */
+
+#ifdef DEBUG
+    insGroup*     lastIG = nullptr;
+    instrDescJmp* lastLJ = nullptr;
+#endif
+
+    lstIG         = nullptr;
+    adjLJ         = 0;
+    adjIG         = 0;
+    minShortExtra = (UNATIVE_OFFSET)-1;
+
+#if defined(_TARGET_ARM_)
+    minMediumExtra = (UNATIVE_OFFSET)-1;
+#endif // _TARGET_ARM_
+
+    for (jmp = emitJumpList; jmp; jmp = jmp->idjNext)
+    {
+        insGroup* jmpIG;
+        insGroup* tgtIG;
+
+        UNATIVE_OFFSET jsz; // size of the jump instruction in bytes
+
+        UNATIVE_OFFSET ssz = 0; // small  jump size
+        NATIVE_OFFSET  nsd = 0; // small  jump max. neg distance
+        NATIVE_OFFSET  psd = 0; // small  jump max. pos distance
+
+#if defined(_TARGET_ARM_)
+        UNATIVE_OFFSET msz = 0; // medium jump size
+        NATIVE_OFFSET  nmd = 0; // medium jump max. neg distance
+        NATIVE_OFFSET  pmd = 0; // medium jump max. pos distance
+        NATIVE_OFFSET  mextra;  // How far beyond the medium jump range is this jump offset?
+#endif                          // _TARGET_ARM_
+
+        NATIVE_OFFSET  extra;           // How far beyond the short jump range is this jump offset?
+        UNATIVE_OFFSET srcInstrOffs;    // offset of the source instruction of the jump
+        UNATIVE_OFFSET srcEncodingOffs; // offset of the source used by the instruction set to calculate the relative
+                                        // offset of the jump
+        UNATIVE_OFFSET dstOffs;
+        NATIVE_OFFSET  jmpDist; // the relative jump distance, as it will be encoded
+        UNATIVE_OFFSET oldSize;
+        UNATIVE_OFFSET sizeDif;
+
+#ifdef _TARGET_XARCH_
+        assert(jmp->idInsFmt() == IF_LABEL || jmp->idInsFmt() == IF_RWR_LABEL || jmp->idInsFmt() == IF_SWR_LABEL);
+
+        /* Figure out the smallest size we can end up with */
+
+        if (jmp->idInsFmt() == IF_LABEL)
+        {
+            if (emitIsCondJump(jmp))
+            {
+                ssz = JCC_SIZE_SMALL;
+                nsd = JCC_DIST_SMALL_MAX_NEG;
+                psd = JCC_DIST_SMALL_MAX_POS;
+            }
+            else
+            {
+                ssz = JMP_SIZE_SMALL;
+                nsd = JMP_DIST_SMALL_MAX_NEG;
+                psd = JMP_DIST_SMALL_MAX_POS;
+            }
+        }
+#endif // _TARGET_XARCH_
+
+#ifdef _TARGET_ARM_
+        assert((jmp->idInsFmt() == IF_T2_J1) || (jmp->idInsFmt() == IF_T2_J2) || (jmp->idInsFmt() == IF_T1_I) ||
+               (jmp->idInsFmt() == IF_T1_K) || (jmp->idInsFmt() == IF_T1_M) || (jmp->idInsFmt() == IF_T2_M1) ||
+               (jmp->idInsFmt() == IF_T2_N1) || (jmp->idInsFmt() == IF_T1_J3) || (jmp->idInsFmt() == IF_LARGEJMP));
+
+        /* Figure out the smallest size we can end up with */
+
+        if (emitIsCondJump(jmp))
+        {
+            ssz = JCC_SIZE_SMALL;
+            nsd = JCC_DIST_SMALL_MAX_NEG;
+            psd = JCC_DIST_SMALL_MAX_POS;
+
+            msz = JCC_SIZE_MEDIUM;
+            nmd = JCC_DIST_MEDIUM_MAX_NEG;
+            pmd = JCC_DIST_MEDIUM_MAX_POS;
+        }
+        else if (emitIsCmpJump(jmp))
+        {
+            ssz = JMP_SIZE_SMALL;
+            nsd = 0;
+            psd = 126;
+        }
+        else if (emitIsUncondJump(jmp))
+        {
+            ssz = JMP_SIZE_SMALL;
+            nsd = JMP_DIST_SMALL_MAX_NEG;
+            psd = JMP_DIST_SMALL_MAX_POS;
+        }
+        else if (emitIsLoadLabel(jmp))
+        {
+            ssz = LBL_SIZE_SMALL;
+            nsd = LBL_DIST_SMALL_MAX_NEG;
+            psd = LBL_DIST_SMALL_MAX_POS;
+        }
+        else
+        {
+            assert(!"Unknown jump instruction");
+        }
+#endif // _TARGET_ARM_
+
+#ifdef _TARGET_ARM64_
+        /* Figure out the smallest size we can end up with */
+
+        if (emitIsCondJump(jmp))
+        {
+            ssz = JCC_SIZE_SMALL;
+            nsd = JCC_DIST_SMALL_MAX_NEG;
+            psd = JCC_DIST_SMALL_MAX_POS;
+        }
+        else if (emitIsUncondJump(jmp))
+        {
+            // Nothing to do; we don't shrink these.
+            assert(jmp->idjShort);
+            ssz = JMP_SIZE_SMALL;
+        }
+        else if (emitIsCmpJump(jmp))
+        {
+            NYI("branch shortening compare-and-branch instructions");
+        }
+        else if (emitIsLoadLabel(jmp))
+        {
+            ssz = LBL_SIZE_SMALL;
+            nsd = LBL_DIST_SMALL_MAX_NEG;
+            psd = LBL_DIST_SMALL_MAX_POS;
+        }
+        else if (emitIsLoadConstant(jmp))
+        {
+            ssz = LDC_SIZE_SMALL;
+            nsd = LDC_DIST_SMALL_MAX_NEG;
+            psd = LDC_DIST_SMALL_MAX_POS;
+        }
+        else
+        {
+            assert(!"Unknown jump instruction");
+        }
+#endif // _TARGET_ARM64_
+
+/* Make sure the jumps are properly ordered */
+
+#ifdef DEBUG
+        assert(lastLJ == nullptr || lastIG != jmp->idjIG || lastLJ->idjOffs < jmp->idjOffs);
+        lastLJ = (lastIG == jmp->idjIG) ? jmp : nullptr;
+
+        assert(lastIG == nullptr || lastIG->igNum <= jmp->idjIG->igNum || jmp->idjIG == prologIG ||
+               emitNxtIGnum > unsigned(0xFFFF)); // igNum might overflow
+        lastIG = jmp->idjIG;
+#endif // DEBUG
+
+        /* Get hold of the current jump size */
+
+        jsz = emitSizeOfJump(jmp);
+
+        /* Get the group the jump is in */
+
+        jmpIG = jmp->idjIG;
+
+        /* Are we in a group different from the previous jump? */
+
+        if (lstIG != jmpIG)
+        {
+            /* Were there any jumps before this one? */
+
+            if (lstIG)
+            {
+                /* Adjust the offsets of the intervening blocks */
+
+                do
+                {
+                    lstIG = lstIG->igNext;
+                    assert(lstIG);
+                    // printf("Adjusted offset of block %02u from %04X to %04X\n", lstIG->igNum, lstIG->igOffs,
+                    // lstIG->igOffs - adjIG);
+                    lstIG->igOffs -= adjIG;
+                    assert(IsCodeAligned(lstIG->igOffs));
+                } while (lstIG != jmpIG);
+            }
+
+            /* We've got the first jump in a new group */
+
+            adjLJ = 0;
+            lstIG = jmpIG;
+        }
+
+        /* Apply any local size adjustment to the jump's relative offset */
+
+        jmp->idjOffs -= adjLJ;
+
+        // If this is a jump via register, the instruction size does not change, so we are done.
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_ARM64_)
+        // JIT code and data will be allocated together for arm64 so the relative offset to JIT data is known.
+        // In case such offset can be encodeable for `ldr` (+-1MB), shorten it.
+        if (jmp->idAddr()->iiaIsJitDataOffset())
+        {
+            // Reference to JIT data
+            assert(jmp->idIsBound());
+            UNATIVE_OFFSET srcOffs = jmpIG->igOffs + jmp->idjOffs;
+
+            int doff = jmp->idAddr()->iiaGetJitDataOffset();
+            assert(doff >= 0);
+            ssize_t imm = emitGetInsSC(jmp);
+            assert((imm >= 0) && (imm < 0x1000)); // 0x1000 is arbitrary, currently 'imm' is always 0
+
+            unsigned dataOffs = (unsigned)(doff + imm);
+            assert(dataOffs < emitDataSize());
+
+            // Conservately assume JIT data starts after the entire code size.
+            // TODO-ARM64: we might consider only hot code size which will be computed later in emitComputeCodeSizes().
+            assert(emitTotalCodeSize > 0);
+            UNATIVE_OFFSET maxDstOffs = emitTotalCodeSize + dataOffs;
+
+            // Check if the distance is within the encoding length.
+            jmpDist = maxDstOffs - srcOffs;
+            extra   = jmpDist - psd;
+            if (extra <= 0)
+            {
+                goto SHORT_JMP;
+            }
+
+            // Keep the large form.
+            continue;
+        }
+#endif
+
+        /* Have we bound this jump's target already? */
+
+        if (jmp->idIsBound())
+        {
+            /* Does the jump already have the smallest size? */
+
+            if (jmp->idjShort)
+            {
+                assert(emitSizeOfJump(jmp) == ssz);
+
+                // We should not be jumping/branching across funclets/functions
+                emitCheckFuncletBranch(jmp, jmpIG);
+
+                continue;
+            }
+
+            tgtIG = jmp->idAddr()->iiaIGlabel;
+        }
+        else
+        {
+            /* First time we've seen this label, convert its target */
+            CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+            if (EMITVERBOSE)
+            {
+                printf("Binding: ");
+                emitDispIns(jmp, false, false, false);
+                printf("Binding L_M%03u_BB%02u ", Compiler::s_compMethodsCount, jmp->idAddr()->iiaBBlabel->bbNum);
+            }
+#endif // DEBUG
+
+            tgtIG = (insGroup*)emitCodeGetCookie(jmp->idAddr()->iiaBBlabel);
+
+#ifdef DEBUG
+            if (EMITVERBOSE)
+            {
+                if (tgtIG)
+                {
+                    printf("to G_M%03u_IG%02u\n", Compiler::s_compMethodsCount, tgtIG->igNum);
+                }
+                else
+                {
+                    printf("-- ERROR, no emitter cookie for BB%02u; it is probably missing BBF_JMP_TARGET or "
+                           "BBF_HAS_LABEL.\n",
+                           jmp->idAddr()->iiaBBlabel->bbNum);
+                }
+            }
+            assert(tgtIG);
+#endif // DEBUG
+
+            /* Record the bound target */
+
+            jmp->idAddr()->iiaIGlabel = tgtIG;
+            jmp->idSetIsBound();
+        }
+
+        // We should not be jumping/branching across funclets/functions
+        emitCheckFuncletBranch(jmp, jmpIG);
+
+#ifdef _TARGET_XARCH_
+        /* Done if this is not a variable-sized jump */
+
+        if ((jmp->idIns() == INS_push) || (jmp->idIns() == INS_mov) || (jmp->idIns() == INS_call) ||
+            (jmp->idIns() == INS_push_hide))
+        {
+            continue;
+        }
+#endif
+#ifdef _TARGET_ARM_
+        if ((jmp->idIns() == INS_push) || (jmp->idIns() == INS_mov) || (jmp->idIns() == INS_movt) ||
+            (jmp->idIns() == INS_movw))
+        {
+            continue;
+        }
+#endif
+#ifdef _TARGET_ARM64_
+        // There is only one size of unconditional branch; we don't support functions larger than 2^28 bytes (our branch
+        // range).
+        if (emitIsUncondJump(jmp))
+        {
+            continue;
+        }
+#endif
+
+        /*
+            In the following distance calculations, if we're not actually
+            scheduling the code (i.e. reordering instructions), we can
+            use the actual offset of the jump (rather than the beg/end of
+            the instruction group) since the jump will not be moved around
+            and thus its offset is accurate.
+
+            First we need to figure out whether this jump is a forward or
+            backward one; to do this we simply look at the ordinals of the
+            group that contains the jump and the target.
+         */
+
+        srcInstrOffs = jmpIG->igOffs + jmp->idjOffs;
+
+        /* Note that the destination is always the beginning of an IG, so no need for an offset inside it */
+        dstOffs = tgtIG->igOffs;
+
+#if defined(_TARGET_ARM_)
+        srcEncodingOffs =
+            srcInstrOffs + 4; // For relative branches, ARM PC is always considered to be the instruction address + 4
+#elif defined(_TARGET_ARM64_)
+        srcEncodingOffs =
+            srcInstrOffs; // For relative branches, ARM64 PC is always considered to be the instruction address
+#else
+        srcEncodingOffs = srcInstrOffs + ssz; // Encoding offset of relative offset for small branch
+#endif
+
+        if (jmpIG->igNum < tgtIG->igNum)
+        {
+            /* Forward jump */
+
+            /* Adjust the target offset by the current delta. This is a worst-case estimate, as jumps between
+               here and the target could be shortened, causing the actual distance to shrink.
+             */
+
+            dstOffs -= adjIG;
+
+            /* Compute the distance estimate */
+
+            jmpDist = dstOffs - srcEncodingOffs;
+
+            /* How much beyond the max. short distance does the jump go? */
+
+            extra = jmpDist - psd;
+
+#if DEBUG_EMIT
+            assert(jmp->idDebugOnlyInfo() != nullptr);
+            if (jmp->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+            {
+                if (INTERESTING_JUMP_NUM == 0)
+                {
+                    printf("[1] Jump %u:\n", jmp->idDebugOnlyInfo()->idNum);
+                }
+                printf("[1] Jump  block is at %08X\n", jmpIG->igOffs);
+                printf("[1] Jump reloffset is %04X\n", jmp->idjOffs);
+                printf("[1] Jump source is at %08X\n", srcEncodingOffs);
+                printf("[1] Label block is at %08X\n", dstOffs);
+                printf("[1] Jump  dist. is    %04X\n", jmpDist);
+                if (extra > 0)
+                {
+                    printf("[1] Dist excess [S] = %d  \n", extra);
+                }
+            }
+            if (EMITVERBOSE)
+            {
+                printf("Estimate of fwd jump [%08X/%03u]: %04X -> %04X = %04X\n", dspPtr(jmp),
+                       jmp->idDebugOnlyInfo()->idNum, srcInstrOffs, dstOffs, jmpDist);
+            }
+#endif // DEBUG_EMIT
+
+            if (extra <= 0)
+            {
+                /* This jump will be a short one */
+                goto SHORT_JMP;
+            }
+        }
+        else
+        {
+            /* Backward jump */
+
+            /* Compute the distance estimate */
+
+            jmpDist = srcEncodingOffs - dstOffs;
+
+            /* How much beyond the max. short distance does the jump go? */
+
+            extra = jmpDist + nsd;
+
+#if DEBUG_EMIT
+            assert(jmp->idDebugOnlyInfo() != nullptr);
+            if (jmp->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+            {
+                if (INTERESTING_JUMP_NUM == 0)
+                {
+                    printf("[2] Jump %u:\n", jmp->idDebugOnlyInfo()->idNum);
+                }
+                printf("[2] Jump  block is at %08X\n", jmpIG->igOffs);
+                printf("[2] Jump reloffset is %04X\n", jmp->idjOffs);
+                printf("[2] Jump source is at %08X\n", srcEncodingOffs);
+                printf("[2] Label block is at %08X\n", dstOffs);
+                printf("[2] Jump  dist. is    %04X\n", jmpDist);
+                if (extra > 0)
+                {
+                    printf("[2] Dist excess [S] = %d  \n", extra);
+                }
+            }
+            if (EMITVERBOSE)
+            {
+                printf("Estimate of bwd jump [%08X/%03u]: %04X -> %04X = %04X\n", dspPtr(jmp),
+                       jmp->idDebugOnlyInfo()->idNum, srcInstrOffs, dstOffs, jmpDist);
+            }
+#endif // DEBUG_EMIT
+
+            if (extra <= 0)
+            {
+                /* This jump will be a short one */
+                goto SHORT_JMP;
+            }
+        }
+
+        /* We arrive here if the jump couldn't be made short, at least for now */
+
+        /* We had better not have eagerly marked the jump as short
+         * in emitIns_J(). If we did, then it has to be able to stay short
+         * as emitIns_J() uses the worst case scenario, and blocks can
+         * only move closer together after that.
+         */
+        assert(jmp->idjShort == 0);
+
+        /* Keep track of the closest distance we got */
+
+        if (minShortExtra > (unsigned)extra)
+        {
+            minShortExtra = (unsigned)extra;
+        }
+
+#if defined(_TARGET_ARM_)
+
+        // If we're here, we couldn't convert to a small jump.
+        // Handle conversion to medium-sized conditional jumps.
+        // 'srcInstrOffs', 'srcEncodingOffs', 'dstOffs', 'jmpDist' have already been computed
+        // and don't need to be recomputed.
+
+        if (emitIsCondJump(jmp))
+        {
+            if (jmpIG->igNum < tgtIG->igNum)
+            {
+                /* Forward jump */
+
+                /* How much beyond the max. medium distance does the jump go? */
+
+                mextra = jmpDist - pmd;
+
+#if DEBUG_EMIT
+                assert(jmp->idDebugOnlyInfo() != NULL);
+                if (jmp->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+                {
+                    if (mextra > 0)
+                    {
+                        if (INTERESTING_JUMP_NUM == 0)
+                            printf("[6] Jump %u:\n", jmp->idDebugOnlyInfo()->idNum);
+                        printf("[6] Dist excess [S] = %d  \n", mextra);
+                    }
+                }
+#endif // DEBUG_EMIT
+
+                if (mextra <= 0)
+                {
+                    /* This jump will be a medium one */
+                    goto MEDIUM_JMP;
+                }
+            }
+            else
+            {
+                /* Backward jump */
+
+                /* How much beyond the max. medium distance does the jump go? */
+
+                mextra = jmpDist + nmd;
+
+#if DEBUG_EMIT
+                assert(jmp->idDebugOnlyInfo() != NULL);
+                if (jmp->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+                {
+                    if (mextra > 0)
+                    {
+                        if (INTERESTING_JUMP_NUM == 0)
+                            printf("[7] Jump %u:\n", jmp->idDebugOnlyInfo()->idNum);
+                        printf("[7] Dist excess [S] = %d  \n", mextra);
+                    }
+                }
+#endif // DEBUG_EMIT
+
+                if (mextra <= 0)
+                {
+                    /* This jump will be a medium one */
+                    goto MEDIUM_JMP;
+                }
+            }
+
+            /* We arrive here if the jump couldn't be made medium, at least for now */
+
+            /* Keep track of the closest distance we got */
+
+            if (minMediumExtra > (unsigned)mextra)
+                minMediumExtra = (unsigned)mextra;
+        }
+
+#endif // _TARGET_ARM_
+
+        /*****************************************************************************
+         * We arrive here if the jump must stay long, at least for now.
+         * Go try the next one.
+         */
+
+        continue;
+
+    /*****************************************************************************/
+    /* Handle conversion to short jump                                           */
+    /*****************************************************************************/
+
+    SHORT_JMP:
+
+        /* Try to make this jump a short one */
+
+        emitSetShortJump(jmp);
+
+        if (!jmp->idjShort)
+        {
+            continue; // This jump must be kept long
+        }
+
+        /* This jump is becoming either short or medium */
+
+        oldSize = jsz;
+        jsz     = ssz;
+        assert(oldSize >= jsz);
+        sizeDif = oldSize - jsz;
+
+#if defined(_TARGET_XARCH_)
+        jmp->idCodeSize(jsz);
+#elif defined(_TARGET_ARM_)
+#if 0
+        // This is done as part of emitSetShortJump():
+        insSize isz = emitInsSize(jmp->idInsFmt());
+        jmp->idInsSize(isz);
+#endif
+#elif defined(_TARGET_ARM64_)
+        // The size of IF_LARGEJMP/IF_LARGEADR/IF_LARGELDC are 8 or 12.
+        // All other code size is 4.
+        assert((sizeDif == 4) || (sizeDif == 8));
+#else
+#error Unsupported or unset target architecture
+#endif
+
+        goto NEXT_JMP;
+
+#if defined(_TARGET_ARM_)
+
+    /*****************************************************************************/
+    /* Handle conversion to medium jump                                          */
+    /*****************************************************************************/
+
+    MEDIUM_JMP:
+
+        /* Try to make this jump a medium one */
+
+        emitSetMediumJump(jmp);
+
+        if (jmp->idCodeSize() > msz)
+        {
+            continue; // This jump wasn't shortened
+        }
+        assert(jmp->idCodeSize() == msz);
+
+        /* This jump is becoming medium */
+
+        oldSize = jsz;
+        jsz     = msz;
+        assert(oldSize >= jsz);
+        sizeDif = oldSize - jsz;
+
+        goto NEXT_JMP;
+
+#endif // _TARGET_ARM_
+
+    /*****************************************************************************/
+
+    NEXT_JMP:
+
+        /* Make sure the size of the jump is marked correctly */
+
+        assert((0 == (jsz | jmpDist)) || (jsz == emitSizeOfJump(jmp)));
+
+#ifdef DEBUG
+        if (EMITVERBOSE)
+        {
+            printf("Shrinking jump [%08X/%03u]\n", dspPtr(jmp), jmp->idDebugOnlyInfo()->idNum);
+        }
+#endif
+        noway_assert((unsigned short)sizeDif == sizeDif);
+
+        adjIG += sizeDif;
+        adjLJ += sizeDif;
+        jmpIG->igSize -= (unsigned short)sizeDif;
+        emitTotalCodeSize -= sizeDif;
+
+        /* The jump size estimate wasn't accurate; flag its group */
+
+        jmpIG->igFlags |= IGF_UPD_ISZ;
+
+    } // end for each jump
+
+    /* Did we shorten any jumps? */
+
+    if (adjIG)
+    {
+        /* Adjust offsets of any remaining blocks */
+
+        assert(lstIG);
+
+        for (;;)
+        {
+            lstIG = lstIG->igNext;
+            if (!lstIG)
+            {
+                break;
+            }
+            // printf("Adjusted offset of block %02u from %04X to %04X\n", lstIG->igNum, lstIG->igOffs,
+            // lstIG->igOffs - adjIG);
+            lstIG->igOffs -= adjIG;
+            assert(IsCodeAligned(lstIG->igOffs));
+        }
+
+#ifdef DEBUG
+        emitCheckIGoffsets();
+#endif
+
+        /* Is there a chance of other jumps becoming short? */
+        CLANG_FORMAT_COMMENT_ANCHOR;
+#ifdef DEBUG
+#if defined(_TARGET_ARM_)
+        if (EMITVERBOSE)
+            printf("Total shrinkage = %3u, min extra short jump size = %3u, min extra medium jump size = %u\n", adjIG,
+                   minShortExtra, minMediumExtra);
+#else
+        if (EMITVERBOSE)
+        {
+            printf("Total shrinkage = %3u, min extra jump size = %3u\n", adjIG, minShortExtra);
+        }
+#endif
+#endif
+
+        if ((minShortExtra <= adjIG)
+#if defined(_TARGET_ARM_)
+            || (minMediumExtra <= adjIG)
+#endif // _TARGET_ARM_
+                )
+        {
+            jmp_iteration++;
+
+#ifdef DEBUG
+            if (EMITVERBOSE)
+            {
+                printf("Iterating branch shortening. Iteration = %d\n", jmp_iteration);
+            }
+#endif
+
+            goto AGAIN;
+        }
+    }
+}
+
+void emitter::emitCheckFuncletBranch(instrDesc* jmp, insGroup* jmpIG)
+{
+#ifdef DEBUG
+    // We should not be jumping/branching across funclets/functions
+    // Except possibly a 'call' to a finally funclet for a local unwind
+    // or a 'return' from a catch handler (that can go just about anywhere)
+    // This routine attempts to validate that any branches across funclets
+    // meets one of those criteria...
+    assert(jmp->idIsBound());
+
+#ifdef _TARGET_AMD64_
+    // An lea of a code address (for constant data stored with the code)
+    // is treated like a jump for emission purposes but is not really a jump so
+    // we don't have to check anything here.
+    if (jmp->idIns() == INS_lea)
+    {
+        return;
+    }
+#endif
+
+#ifdef _TARGET_ARMARCH_
+    if (jmp->idAddr()->iiaHasInstrCount())
+    {
+        // Too hard to figure out funclets from just an instruction count
+        // You're on your own!
+        return;
+    }
+#endif // _TARGET_ARMARCH_
+
+#ifdef _TARGET_ARM64_
+    // No interest if it's not jmp.
+    if (emitIsLoadLabel(jmp) || emitIsLoadConstant(jmp))
+    {
+        return;
+    }
+#endif // _TARGET_ARM64_
+
+    insGroup* tgtIG = jmp->idAddr()->iiaIGlabel;
+    assert(tgtIG);
+    if (tgtIG->igFuncIdx != jmpIG->igFuncIdx)
+    {
+        if (jmp->idDebugOnlyInfo()->idFinallyCall)
+        {
+            // We don't record enough information to determine this accurately, so instead
+            // we assume that any branch to the very start of a finally is OK.
+
+            // No branches back to the root method
+            assert(tgtIG->igFuncIdx > 0);
+            FuncInfoDsc* tgtFunc = emitComp->funGetFunc(tgtIG->igFuncIdx);
+            assert(tgtFunc->funKind == FUNC_HANDLER);
+            EHblkDsc* tgtEH = emitComp->ehGetDsc(tgtFunc->funEHIndex);
+
+            // Only branches to finallys (not faults, catches, filters, etc.)
+            assert(tgtEH->HasFinallyHandler());
+
+            // Only to the first block of the finally (which is properly marked)
+            BasicBlock* tgtBlk = tgtEH->ebdHndBeg;
+            assert(tgtBlk->bbFlags & BBF_FUNCLET_BEG);
+
+            // And now we made it back to where we started
+            assert(tgtIG == emitCodeGetCookie(tgtBlk));
+            assert(tgtIG->igFuncIdx == emitComp->funGetFuncIdx(tgtBlk));
+        }
+        else if (jmp->idDebugOnlyInfo()->idCatchRet)
+        {
+            // Again there isn't enough information to prove this correct
+            // so just allow a 'branch' to any other 'parent' funclet
+
+            FuncInfoDsc* jmpFunc = emitComp->funGetFunc(jmpIG->igFuncIdx);
+            assert(jmpFunc->funKind == FUNC_HANDLER);
+            EHblkDsc* jmpEH = emitComp->ehGetDsc(jmpFunc->funEHIndex);
+
+            // Only branches out of catches
+            assert(jmpEH->HasCatchHandler());
+
+            FuncInfoDsc* tgtFunc = emitComp->funGetFunc(tgtIG->igFuncIdx);
+            assert(tgtFunc);
+            if (tgtFunc->funKind == FUNC_HANDLER)
+            {
+                // An outward chain to the containing funclet/EH handler
+                // Note that it might be anywhere within nested try bodies
+                assert(jmpEH->ebdEnclosingHndIndex == tgtFunc->funEHIndex);
+            }
+            else
+            {
+                // This funclet is 'top level' and so it is branching back to the
+                // root function, and should have no containing EH handlers
+                // but it could be nested within try bodies...
+                assert(tgtFunc->funKind == FUNC_ROOT);
+                assert(jmpEH->ebdEnclosingHndIndex == EHblkDsc::NO_ENCLOSING_INDEX);
+            }
+        }
+        else
+        {
+            printf("Hit an illegal branch between funclets!");
+            assert(tgtIG->igFuncIdx == jmpIG->igFuncIdx);
+        }
+    }
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ *  Compute the code sizes that we're going to use to allocate the code buffers.
+ *
+ *  This sets:
+ *
+ *      emitTotalHotCodeSize
+ *      emitTotalColdCodeSize
+ *      Compiler::info.compTotalHotCodeSize
+ *      Compiler::info.compTotalColdCodeSize
+ */
+
+void emitter::emitComputeCodeSizes()
+{
+    assert((emitComp->fgFirstColdBlock == nullptr) == (emitFirstColdIG == nullptr));
+
+    if (emitFirstColdIG)
+    {
+        emitTotalHotCodeSize  = emitFirstColdIG->igOffs;
+        emitTotalColdCodeSize = emitTotalCodeSize - emitTotalHotCodeSize;
+    }
+    else
+    {
+        emitTotalHotCodeSize  = emitTotalCodeSize;
+        emitTotalColdCodeSize = 0;
+    }
+
+    emitComp->info.compTotalHotCodeSize  = emitTotalHotCodeSize;
+    emitComp->info.compTotalColdCodeSize = emitTotalColdCodeSize;
+
+#ifdef DEBUG
+    if (emitComp->verbose)
+    {
+        printf("\nHot  code size = 0x%X bytes\n", emitTotalHotCodeSize);
+        printf("Cold code size = 0x%X bytes\n", emitTotalColdCodeSize);
+    }
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Called at the end of code generation, this method creates the code, data
+ *  and GC info blocks for the method.  Returns the size of the method (which must fit in an unsigned).
+ */
+
+unsigned emitter::emitEndCodeGen(Compiler* comp,
+                                 bool      contTrkPtrLcls,
+                                 bool      fullyInt,
+                                 bool      fullPtrMap,
+                                 bool      returnsGCr,
+                                 unsigned  xcptnsCount,
+                                 unsigned* prologSize,
+                                 unsigned* epilogSize,
+                                 void**    codeAddr,
+                                 void**    coldCodeAddr,
+                                 void**    consAddr)
+{
+#ifdef DEBUG
+    if (emitComp->verbose)
+    {
+        printf("*************** In emitEndCodeGen()\n");
+    }
+#endif
+
+    insGroup* ig;
+
+    BYTE* consBlock;
+    BYTE* codeBlock;
+    BYTE* coldCodeBlock;
+    BYTE* cp;
+
+    assert(emitCurIG == nullptr);
+
+    emitCodeBlock = nullptr;
+    emitConsBlock = nullptr;
+
+    /* Tell everyone whether we have fully interruptible code or not */
+
+    emitFullyInt   = fullyInt;
+    emitFullGCinfo = fullPtrMap;
+
+#if EMITTER_STATS
+    GCrefsTable.record(emitGCrFrameOffsCnt);
+    emitSizeTable.record(static_cast<unsigned>(emitSizeMethod));
+    stkDepthTable.record(emitMaxStackDepth);
+#endif // EMITTER_STATS
+
+    // Default values, correct even if EMIT_TRACK_STACK_DEPTH is 0.
+    emitSimpleStkUsed         = true;
+    u1.emitSimpleStkMask      = 0;
+    u1.emitSimpleByrefStkMask = 0;
+
+#if EMIT_TRACK_STACK_DEPTH
+    /* Convert max. stack depth from # of bytes to # of entries */
+
+    emitMaxStackDepth /= sizeof(int);
+
+    /* Should we use the simple stack */
+
+    if (emitMaxStackDepth > MAX_SIMPLE_STK_DEPTH || emitFullGCinfo)
+    {
+        /* We won't use the "simple" argument table */
+
+        emitSimpleStkUsed = false;
+
+        /* Allocate the argument tracking table */
+
+        if (emitMaxStackDepth <= sizeof(u2.emitArgTrackLcl))
+        {
+            u2.emitArgTrackTab = (BYTE*)u2.emitArgTrackLcl;
+        }
+        else
+        {
+            u2.emitArgTrackTab = (BYTE*)emitGetMem(roundUp(emitMaxStackDepth));
+        }
+
+        u2.emitArgTrackTop   = u2.emitArgTrackTab;
+        u2.emitGcArgTrackCnt = 0;
+    }
+#endif
+
+    if (emitEpilogCnt == 0)
+    {
+        /* No epilogs, make sure the epilog size is set to 0 */
+
+        emitEpilogSize = 0;
+
+#ifdef _TARGET_XARCH_
+        emitExitSeqSize = 0;
+#endif // _TARGET_XARCH_
+    }
+
+    /* Return the size of the epilog to the caller */
+
+    *epilogSize = emitEpilogSize;
+
+#ifdef _TARGET_XARCH_
+    *epilogSize += emitExitSeqSize;
+#endif // _TARGET_XARCH_
+
+#ifdef DEBUG
+    if (EMIT_INSTLIST_VERBOSE)
+    {
+        printf("\nInstruction list before instruction issue:\n\n");
+        emitDispIGlist(true);
+    }
+
+    emitCheckIGoffsets();
+#endif
+
+    /* Allocate the code block (and optionally the data blocks) */
+
+    // If we're doing procedure splitting and we found cold blocks, then
+    // allocate hot and cold buffers.  Otherwise only allocate a hot
+    // buffer.
+
+    coldCodeBlock = nullptr;
+
+    CorJitAllocMemFlag allocMemFlag = CORJIT_ALLOCMEM_DEFAULT_CODE_ALIGN;
+
+#ifdef _TARGET_X86_
+    //
+    // These are the heuristics we use to decide whether or not to force the
+    // code to be 16-byte aligned.
+    //
+    // 1. For ngen code with IBC data, use 16-byte alignment if the method
+    //    has been called more than BB_VERY_HOT_WEIGHT times.
+    // 2. For JITed code and ngen code without IBC data, use 16-byte alignment
+    //    when the code is 16 bytes or smaller. We align small getters/setters
+    //    because of they are penalized heavily on certain hardware when not 16-byte
+    //    aligned (VSWhidbey #373938). To minimize size impact of this optimization,
+    //    we do not align large methods because of the penalty is amortized for them.
+    //
+    if (emitComp->fgHaveProfileData())
+    {
+        if (emitComp->fgCalledWeight > (BB_VERY_HOT_WEIGHT * emitComp->fgNumProfileRuns))
+        {
+            allocMemFlag = CORJIT_ALLOCMEM_FLG_16BYTE_ALIGN;
+        }
+    }
+    else
+    {
+        if (emitTotalHotCodeSize <= 16)
+        {
+            allocMemFlag = CORJIT_ALLOCMEM_FLG_16BYTE_ALIGN;
+        }
+    }
+#endif
+
+#ifdef _TARGET_ARM64_
+    // For arm64, we want to allocate JIT data always adjacent to code similar to what native compiler does.
+    // This way allows us to use a single `ldr` to access such data like float constant/jmp table.
+    if (emitTotalColdCodeSize > 0)
+    {
+        // JIT data might be far away from the cold code.
+        NYI_ARM64("Need to handle fix-up to data from cold code.");
+    }
+
+    UNATIVE_OFFSET roDataAlignmentDelta = 0;
+    if (emitConsDsc.dsdOffs)
+    {
+        UNATIVE_OFFSET roDataAlignment = sizeof(void*); // 8 Byte align by default.
+        roDataAlignmentDelta = (UNATIVE_OFFSET)ALIGN_UP(emitTotalHotCodeSize, roDataAlignment) - emitTotalHotCodeSize;
+        assert((roDataAlignmentDelta == 0) || (roDataAlignmentDelta == 4));
+    }
+    emitCmpHandle->allocMem(emitTotalHotCodeSize + roDataAlignmentDelta + emitConsDsc.dsdOffs, emitTotalColdCodeSize, 0,
+                            xcptnsCount, allocMemFlag, (void**)&codeBlock, (void**)&coldCodeBlock, (void**)&consBlock);
+
+    consBlock = codeBlock + emitTotalHotCodeSize + roDataAlignmentDelta;
+
+#else
+    emitCmpHandle->allocMem(emitTotalHotCodeSize, emitTotalColdCodeSize, emitConsDsc.dsdOffs, xcptnsCount, allocMemFlag,
+                            (void**)&codeBlock, (void**)&coldCodeBlock, (void**)&consBlock);
+#endif
+
+    // if (emitConsDsc.dsdOffs)
+    //     printf("Cons=%08X\n", consBlock);
+
+    /* Give the block addresses to the caller and other functions here */
+
+    *codeAddr = emitCodeBlock = codeBlock;
+    *coldCodeAddr = emitColdCodeBlock = coldCodeBlock;
+    *consAddr = emitConsBlock = consBlock;
+
+    /* Nothing has been pushed on the stack */
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if EMIT_TRACK_STACK_DEPTH
+    emitCurStackLvl = 0;
+#endif
+
+    /* Assume no live GC ref variables on entry */
+
+    VarSetOps::ClearD(emitComp, emitThisGCrefVars); // This is initialized to Empty at the start of codegen.
+    emitThisGCrefRegs = emitThisByrefRegs = RBM_NONE;
+    emitThisGCrefVset                     = true;
+
+#ifdef DEBUG
+
+    emitIssuing = true;
+
+    // We don't use these after this point
+
+    VarSetOps::AssignNoCopy(emitComp, emitPrevGCrefVars, VarSetOps::UninitVal());
+    emitPrevGCrefRegs = emitPrevByrefRegs = 0xBAADFEED;
+
+    VarSetOps::AssignNoCopy(emitComp, emitInitGCrefVars, VarSetOps::UninitVal());
+    emitInitGCrefRegs = emitInitByrefRegs = 0xBAADFEED;
+
+#endif
+
+    /* Initialize the GC ref variable lifetime tracking logic */
+
+    codeGen->gcInfo.gcVarPtrSetInit();
+
+    emitSyncThisObjOffs = -1;     /* -1  means no offset set */
+    emitSyncThisObjReg  = REG_NA; /* REG_NA  means not set */
+
+#ifdef JIT32_GCENCODER
+    if (emitComp->lvaKeepAliveAndReportThis())
+    {
+        assert(emitComp->lvaIsOriginalThisArg(0));
+        LclVarDsc* thisDsc = &emitComp->lvaTable[0];
+
+        /* If "this" (which is passed in as a register argument in REG_ARG_0)
+           is enregistered, we normally spot the "mov REG_ARG_0 -> thisReg"
+           in the prolog and note the location of "this" at that point.
+           However, if 'this' is enregistered into REG_ARG_0 itself, no code
+           will be generated in the prolog, so we explicitly need to note
+           the location of "this" here.
+           NOTE that we can do this even if "this" is not enregistered in
+           REG_ARG_0, and it will result in more accurate "this" info over the
+           prolog. However, as methods are not interruptible over the prolog,
+           we try to save space by avoiding that.
+         */
+
+        if (thisDsc->lvRegister)
+        {
+            emitSyncThisObjReg = thisDsc->lvRegNum;
+
+            if (emitSyncThisObjReg == (int)REG_ARG_0 &&
+                (codeGen->intRegState.rsCalleeRegArgMaskLiveIn & genRegMask(REG_ARG_0)))
+            {
+                if (emitFullGCinfo)
+                {
+                    emitGCregLiveSet(GCT_GCREF, genRegMask(REG_ARG_0),
+                                     emitCodeBlock, // from offset 0
+                                     true);
+                }
+                else
+                {
+                    /* If emitFullGCinfo==false, the we don't use any
+                       regPtrDsc's and so explictly note the location
+                       of "this" in GCEncode.cpp
+                     */
+                }
+            }
+        }
+    }
+#endif // JIT32_GCENCODER
+
+    emitContTrkPtrLcls = contTrkPtrLcls;
+
+    /* Are there any GC ref variables on the stack? */
+
+    if (emitGCrFrameOffsCnt)
+    {
+        size_t     siz;
+        unsigned   cnt;
+        unsigned   num;
+        LclVarDsc* dsc;
+        int*       tab;
+
+        /* Allocate and clear emitGCrFrameLiveTab[]. This is the table
+           mapping "stkOffs -> varPtrDsc". It holds a pointer to
+           the liveness descriptor that was created when the
+           variable became alive. When the variable becomes dead, the
+           descriptor will be appended to the liveness descriptor list, and
+           the entry in emitGCrFrameLiveTab[] will be made NULL.
+
+           Note that if all GC refs are assigned consecutively,
+           emitGCrFrameLiveTab[] can be only as big as the number of GC refs
+           present, instead of lvaTrackedCount.
+         */
+
+        siz                 = emitGCrFrameOffsCnt * sizeof(*emitGCrFrameLiveTab);
+        emitGCrFrameLiveTab = (varPtrDsc**)emitGetMem(roundUp(siz));
+        memset(emitGCrFrameLiveTab, 0, siz);
+
+        /* Allocate and fill in emitGCrFrameOffsTab[]. This is the table
+           mapping "varIndex -> stkOffs".
+           Non-ptrs or reg vars have entries of -1.
+           Entries of Tracked stack byrefs have the lower bit set to 1.
+        */
+
+        emitTrkVarCnt = cnt = emitComp->lvaTrackedCount;
+        assert(cnt);
+        emitGCrFrameOffsTab = tab = (int*)emitGetMem(cnt * sizeof(int));
+
+        memset(emitGCrFrameOffsTab, -1, cnt * sizeof(int));
+
+        /* Now fill in all the actual used entries */
+
+        for (num = 0, dsc = emitComp->lvaTable, cnt = emitComp->lvaCount; num < cnt; num++, dsc++)
+        {
+            if (!dsc->lvOnFrame || (dsc->lvIsParam && !dsc->lvIsRegArg))
+            {
+                continue;
+            }
+
+#if FEATURE_FIXED_OUT_ARGS
+            if (num == emitComp->lvaOutgoingArgSpaceVar)
+            {
+                continue;
+            }
+#endif // FEATURE_FIXED_OUT_ARGS
+
+            int offs = dsc->lvStkOffs;
+
+            /* Is it within the interesting range of offsets */
+
+            if (offs >= emitGCrFrameOffsMin && offs < emitGCrFrameOffsMax)
+            {
+                /* Are tracked stack ptr locals laid out contiguously?
+                   If not, skip non-ptrs. The emitter is optimized to work
+                   with contiguous ptrs, but for EditNContinue, the variables
+                   are laid out in the order they occur in the local-sig.
+                 */
+
+                if (!emitContTrkPtrLcls)
+                {
+                    if (!emitComp->lvaIsGCTracked(dsc))
+                    {
+                        continue;
+                    }
+                }
+
+                unsigned indx = dsc->lvVarIndex;
+
+                assert(!dsc->lvRegister);
+                assert(dsc->lvTracked);
+                assert(dsc->lvRefCnt != 0);
+
+                assert(dsc->TypeGet() == TYP_REF || dsc->TypeGet() == TYP_BYREF);
+
+                assert(indx < emitComp->lvaTrackedCount);
+
+// printf("Variable #%2u/%2u is at stack offset %d\n", num, indx, offs);
+
+#ifdef JIT32_GCENCODER
+                /* Remember the frame offset of the "this" argument for synchronized methods */
+                if (emitComp->lvaIsOriginalThisArg(num) && emitComp->lvaKeepAliveAndReportThis())
+                {
+                    emitSyncThisObjOffs = offs;
+                    offs |= this_OFFSET_FLAG;
+                }
+#endif // JIT32_GCENCODER
+
+                if (dsc->TypeGet() == TYP_BYREF)
+                {
+                    offs |= byref_OFFSET_FLAG;
+                }
+                tab[indx] = offs;
+            }
+        }
+    }
+    else
+    {
+#ifdef DEBUG
+        emitTrkVarCnt       = 0;
+        emitGCrFrameOffsTab = nullptr;
+#endif
+    }
+
+#ifdef DEBUG
+    if (emitComp->verbose)
+    {
+        printf("\n***************************************************************************\n");
+        printf("Instructions as they come out of the scheduler\n\n");
+    }
+#endif
+
+    /* Issue all instruction groups in order */
+    cp = codeBlock;
+
+#define DEFAULT_CODE_BUFFER_INIT 0xcc
+
+    for (ig = emitIGlist; ig; ig = ig->igNext)
+    {
+        assert(!(ig->igFlags & IGF_PLACEHOLDER)); // There better not be any placeholder groups left
+
+        /* Is this the first cold block? */
+        if (ig == emitFirstColdIG)
+        {
+            unsigned actualHotCodeSize = emitCurCodeOffs(cp);
+
+            /* Fill in eventual unused space */
+            while (emitCurCodeOffs(cp) < emitTotalHotCodeSize)
+            {
+                *cp++ = DEFAULT_CODE_BUFFER_INIT;
+            }
+
+            assert(coldCodeBlock);
+            cp = coldCodeBlock;
+#ifdef DEBUG
+            if (emitComp->opts.disAsm || emitComp->opts.dspEmit || emitComp->verbose)
+            {
+                printf("\n************** Beginning of cold code **************\n");
+            }
+#endif
+        }
+
+        /* Are we overflowing? */
+        if (ig->igNext && ig->igNum + 1 != ig->igNext->igNum)
+        {
+            NO_WAY("Too many instruction groups");
+        }
+
+        // If this instruction group is returned to from a funclet implementing a finally,
+        // on architectures where it is necessary generate GC info for the current instruction as
+        // if it were the instruction following a call.
+        emitGenGCInfoIfFuncletRetTarget(ig, cp);
+
+        instrDesc* id = (instrDesc*)ig->igData;
+
+#ifdef DEBUG
+
+        /* Print the IG label, but only if it is a branch label */
+
+        if (emitComp->opts.disAsm || emitComp->opts.dspEmit || emitComp->verbose)
+        {
+            if (emitComp->verbose)
+            {
+                printf("\n");
+                emitDispIG(ig); // Display the flags, IG data, etc.
+            }
+            else
+            {
+                printf("\nG_M%03u_IG%02u:\n", Compiler::s_compMethodsCount, ig->igNum);
+            }
+        }
+
+#endif // DEBUG
+
+        BYTE* bp = cp;
+
+        /* Record the actual offset of the block, noting the difference */
+
+        emitOffsAdj = ig->igOffs - emitCurCodeOffs(cp);
+        assert(emitOffsAdj >= 0);
+
+#if DEBUG_EMIT
+        if ((emitOffsAdj != 0) && emitComp->verbose)
+        {
+            printf("Block predicted offs = %08X, actual = %08X -> size adj = %d\n", ig->igOffs, emitCurCodeOffs(cp),
+                   emitOffsAdj);
+        }
+#endif // DEBUG_EMIT
+
+        ig->igOffs = emitCurCodeOffs(cp);
+        assert(IsCodeAligned(ig->igOffs));
+
+#if EMIT_TRACK_STACK_DEPTH
+
+        /* Set the proper stack level if appropriate */
+
+        if (ig->igStkLvl != emitCurStackLvl)
+        {
+            /* We are pushing stuff implicitly at this label */
+
+            assert((unsigned)ig->igStkLvl > (unsigned)emitCurStackLvl);
+            emitStackPushN(cp, (ig->igStkLvl - (unsigned)emitCurStackLvl) / sizeof(int));
+        }
+
+#endif
+
+        /* Update current GC information for non-overflow IG (not added implicitly by the emitter) */
+
+        if (!(ig->igFlags & IGF_EMIT_ADD))
+        {
+            /* Is there a new set of live GC ref variables? */
+
+            if (ig->igFlags & IGF_GC_VARS)
+            {
+                emitUpdateLiveGCvars(ig->igGCvars(), cp);
+            }
+            else if (!emitThisGCrefVset)
+            {
+                emitUpdateLiveGCvars(emitThisGCrefVars, cp);
+            }
+
+            /* Update the set of live GC ref registers */
+
+            {
+                regMaskTP GCregs = ig->igGCregs;
+
+                if (GCregs != emitThisGCrefRegs)
+                {
+                    emitUpdateLiveGCregs(GCT_GCREF, GCregs, cp);
+                }
+            }
+
+            /* Is there a new set of live byref registers? */
+
+            if (ig->igFlags & IGF_BYREF_REGS)
+            {
+                unsigned byrefRegs = ig->igByrefRegs();
+
+                if (byrefRegs != emitThisByrefRegs)
+                {
+                    emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, cp);
+                }
+            }
+        }
+        else
+        {
+            // These are not set for "overflow" groups
+            assert(!(ig->igFlags & IGF_GC_VARS));
+            assert(!(ig->igFlags & IGF_BYREF_REGS));
+        }
+
+        /* Issue each instruction in order */
+
+        emitCurIG = ig;
+
+        for (unsigned cnt = ig->igInsCnt; cnt; cnt--)
+        {
+            castto(id, BYTE*) += emitIssue1Instr(ig, id, &cp);
+        }
+
+        emitCurIG = nullptr;
+
+        assert(ig->igSize >= cp - bp);
+        ig->igSize = (unsigned short)(cp - bp);
+    }
+
+#if EMIT_TRACK_STACK_DEPTH
+    assert(emitCurStackLvl == 0);
+#endif
+
+    /* Output any initialized data we may have */
+
+    if (emitConsDsc.dsdOffs)
+    {
+        emitOutputDataSec(&emitConsDsc, consBlock);
+    }
+
+    /* Make sure all GC ref variables are marked as dead */
+
+    if (emitGCrFrameOffsCnt)
+    {
+        unsigned    vn;
+        int         of;
+        varPtrDsc** dp;
+
+        for (vn = 0, of = emitGCrFrameOffsMin, dp = emitGCrFrameLiveTab; vn < emitGCrFrameOffsCnt;
+             vn++, of += sizeof(void*), dp++)
+        {
+            if (*dp)
+            {
+                emitGCvarDeadSet(of, cp, vn);
+            }
+        }
+    }
+
+    /* No GC registers are live any more */
+
+    if (emitThisByrefRegs)
+    {
+        emitUpdateLiveGCregs(GCT_BYREF, RBM_NONE, cp);
+    }
+    if (emitThisGCrefRegs)
+    {
+        emitUpdateLiveGCregs(GCT_GCREF, RBM_NONE, cp);
+    }
+
+    /* Patch any forward jumps */
+
+    if (emitFwdJumps)
+    {
+        instrDescJmp* jmp;
+
+        for (jmp = emitJumpList; jmp; jmp = jmp->idjNext)
+        {
+            insGroup* tgt;
+#ifdef _TARGET_XARCH_
+            assert(jmp->idInsFmt() == IF_LABEL || jmp->idInsFmt() == IF_RWR_LABEL || jmp->idInsFmt() == IF_SWR_LABEL);
+#endif
+            tgt = jmp->idAddr()->iiaIGlabel;
+
+            if (jmp->idjTemp.idjAddr == nullptr)
+            {
+                continue;
+            }
+
+            if (jmp->idjOffs != tgt->igOffs)
+            {
+                BYTE* adr = jmp->idjTemp.idjAddr;
+                int   adj = jmp->idjOffs - tgt->igOffs;
+#ifdef _TARGET_ARM_
+                // On Arm, the offset is encoded in unit of 2 bytes.
+                adj >>= 1;
+#endif
+
+#if DEBUG_EMIT
+                if (jmp->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+                {
+#ifdef _TARGET_ARM_
+                    printf("[5] This output is broken for ARM, since it doesn't properly decode the jump offsets of "
+                           "the instruction at adr\n");
+#endif
+
+                    if (INTERESTING_JUMP_NUM == 0)
+                    {
+                        printf("[5] Jump %u:\n", jmp->idDebugOnlyInfo()->idNum);
+                    }
+
+                    if (jmp->idjShort)
+                    {
+                        printf("[5] Jump        is at %08X\n", (adr + 1 - emitCodeBlock));
+                        printf("[5] Jump distance is  %02X - %02X = %02X\n", *(BYTE*)adr, adj, *(BYTE*)adr - adj);
+                    }
+                    else
+                    {
+                        printf("[5] Jump        is at %08X\n", (adr + 4 - emitCodeBlock));
+                        printf("[5] Jump distance is  %08X - %02X = %08X\n", *(int*)adr, adj, *(int*)adr - adj);
+                    }
+                }
+#endif // DEBUG_EMIT
+
+                if (jmp->idjShort)
+                {
+                    // Patch Forward Short Jump
+                    CLANG_FORMAT_COMMENT_ANCHOR;
+#if defined(_TARGET_XARCH_)
+                    *(BYTE*)adr -= (BYTE)adj;
+#elif defined(_TARGET_ARM_)
+                    // The following works because the jump offset is in the low order bits of the instruction.
+                    // Presumably we could also just call "emitOutputLJ(NULL, adr, jmp)", like for long jumps?
+                    *(short int*)adr -= (short)adj;
+#elif defined(_TARGET_ARM64_)
+                    assert(!jmp->idAddr()->iiaHasInstrCount());
+                    emitOutputLJ(NULL, adr, jmp);
+#else
+#error Unsupported or unset target architecture
+#endif
+                }
+                else
+                {
+                    // Patch Forward non-Short Jump
+                    CLANG_FORMAT_COMMENT_ANCHOR;
+#if defined(_TARGET_XARCH_)
+                    *(int*)adr -= adj;
+#elif defined(_TARGET_ARMARCH_)
+                    assert(!jmp->idAddr()->iiaHasInstrCount());
+                    emitOutputLJ(NULL, adr, jmp);
+#else
+#error Unsupported or unset target architecture
+#endif
+                }
+            }
+        }
+    }
+
+#ifdef DEBUG
+    if (emitComp->opts.disAsm)
+    {
+        printf("\n");
+    }
+
+    if (emitComp->verbose)
+    {
+        printf("Allocated method code size = %4u , actual size = %4u\n", emitTotalCodeSize, cp - codeBlock);
+    }
+#endif
+
+    unsigned actualCodeSize = emitCurCodeOffs(cp);
+
+    /* Fill in eventual unused space */
+    while (emitCurCodeOffs(cp) < emitTotalCodeSize)
+    {
+        *cp++ = DEFAULT_CODE_BUFFER_INIT;
+    }
+
+#if EMITTER_STATS
+    totAllocdSize += emitTotalCodeSize;
+    totActualSize += actualCodeSize;
+#endif
+
+#ifdef DEBUG
+
+    // Make sure these didn't change during the "issuing" phase
+
+    assert(VarSetOps::MayBeUninit(emitPrevGCrefVars));
+    assert(emitPrevGCrefRegs == 0xBAADFEED);
+    assert(emitPrevByrefRegs == 0xBAADFEED);
+
+    assert(VarSetOps::MayBeUninit(emitInitGCrefVars));
+    assert(emitInitGCrefRegs == 0xBAADFEED);
+    assert(emitInitByrefRegs == 0xBAADFEED);
+
+#endif
+
+    // Assign the real prolog size
+    *prologSize = emitCodeOffset(emitPrologIG, emitPrologEndPos);
+
+    /* Return the amount of code we've generated */
+
+    return actualCodeSize;
+}
+
+// See specification comment at the declaration.
+void emitter::emitGenGCInfoIfFuncletRetTarget(insGroup* ig, BYTE* cp)
+{
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+    // We only emit this GC information on targets where finally's are implemented via funclets,
+    // and the finally is invoked, during non-exceptional execution, via a branch with a predefined
+    // link register, rather than a "true call" for which we would already generate GC info.  Currently,
+    // this means precisely ARM.
+    if (ig->igFlags & IGF_FINALLY_TARGET)
+    {
+        // We don't actually have a call instruction in this case, so we don't have
+        // a real size for that instruction.  We'll use 1.
+        emitStackPop(cp, /*isCall*/ true, /*callInstrSize*/ 1, /*args*/ 0);
+
+        /* Do we need to record a call location for GC purposes? */
+        if (!emitFullGCinfo)
+        {
+            emitRecordGCcall(cp, /*callInstrSize*/ 1);
+        }
+    }
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+}
+
+/*****************************************************************************
+ *
+ *  We have an instruction in an insGroup and we need to know the
+ *  instruction number for this instruction
+ */
+
+unsigned emitter::emitFindInsNum(insGroup* ig, instrDesc* idMatch)
+{
+    instrDesc* id = (instrDesc*)ig->igData;
+
+    // Check if we are the first instruction in the group
+    if (id == idMatch)
+    {
+        return 0;
+    }
+
+    /* Walk the list of instructions until we find a match */
+    unsigned insNum       = 0;
+    unsigned insRemaining = ig->igInsCnt;
+
+    while (insRemaining > 0)
+    {
+        castto(id, BYTE*) += emitSizeOfInsDsc(id);
+        insNum++;
+        insRemaining--;
+
+        if (id == idMatch)
+        {
+            return insNum;
+        }
+    }
+    assert(!"emitFindInsNum failed");
+    return -1;
+}
+
+/*****************************************************************************
+ *
+ *  We've been asked for the code offset of an instruction but alas one or
+ *  more instruction sizes in the block have been mis-predicted, so we have
+ *  to find the true offset by looking for the instruction within the group.
+ */
+
+UNATIVE_OFFSET emitter::emitFindOffset(insGroup* ig, unsigned insNum)
+{
+    instrDesc*     id = (instrDesc*)ig->igData;
+    UNATIVE_OFFSET of = 0;
+
+#ifdef DEBUG
+    /* Make sure we were passed reasonable arguments */
+    assert(ig && ig->igSelf == ig);
+    assert(ig->igInsCnt >= insNum);
+#endif
+
+    /* Walk the instruction list until all are counted */
+
+    while (insNum > 0)
+    {
+        of += emitInstCodeSz(id);
+
+        castto(id, BYTE*) += emitSizeOfInsDsc(id);
+
+        insNum--;
+    }
+
+    return of;
+}
+
+/*****************************************************************************
+ *
+ *  Start generating a constant data section for the current
+ *  function. Returns the offset of the section in the appropriate data
+ *  block.
+ */
+
+UNATIVE_OFFSET emitter::emitDataGenBeg(UNATIVE_OFFSET size, bool dblAlign, bool codeLtab)
+{
+    unsigned     secOffs;
+    dataSection* secDesc;
+
+    assert(emitDataSecCur == nullptr);
+
+    /* The size better not be some kind of an odd thing */
+
+    assert(size && size % sizeof(int) == 0);
+
+    /* Get hold of the current offset */
+
+    secOffs = emitConsDsc.dsdOffs;
+
+    /* Are we require to align this request on an eight byte boundry? */
+    if (dblAlign && (secOffs % sizeof(double) != 0))
+    {
+        /* Need to skip 4 bytes to honor dblAlign */
+        /* Must allocate a dummy 4 byte integer */
+        int zero = 0;
+        emitDataGenBeg(4, false, false);
+        emitDataGenData(0, &zero, 4);
+        emitDataGenEnd();
+
+        /* Get the new secOffs */
+        secOffs = emitConsDsc.dsdOffs;
+        /* Now it should be a multiple of 8 */
+        assert(secOffs % sizeof(double) == 0);
+    }
+
+    /* Advance the current offset */
+
+    emitConsDsc.dsdOffs += size;
+
+    /* Allocate a data section descriptor and add it to the list */
+
+    secDesc = emitDataSecCur = (dataSection*)emitGetMem(roundUp(sizeof(*secDesc) + size));
+
+    secDesc->dsSize = size;
+
+    secDesc->dsType = dataSection::data;
+
+    secDesc->dsNext = nullptr;
+
+    if (emitConsDsc.dsdLast)
+    {
+        emitConsDsc.dsdLast->dsNext = secDesc;
+    }
+    else
+    {
+        emitConsDsc.dsdList = secDesc;
+    }
+    emitConsDsc.dsdLast = secDesc;
+
+    return secOffs;
+}
+
+//  Start generating a constant data section for the current function
+//  populated with BasicBlock references.
+//  You can choose the references to be either absolute pointers, or
+//  4-byte relative addresses.
+//  Currently the relative references are relative to the start of the
+//  first block (this is somewhat arbitrary)
+
+UNATIVE_OFFSET emitter::emitBBTableDataGenBeg(unsigned numEntries, bool relativeAddr)
+{
+    unsigned     secOffs;
+    dataSection* secDesc;
+
+    assert(emitDataSecCur == nullptr);
+
+    UNATIVE_OFFSET emittedSize;
+
+    if (relativeAddr)
+    {
+        emittedSize = numEntries * 4;
+    }
+    else
+    {
+        emittedSize = numEntries * TARGET_POINTER_SIZE;
+    }
+
+    /* Get hold of the current offset */
+
+    secOffs = emitConsDsc.dsdOffs;
+
+    /* Advance the current offset */
+
+    emitConsDsc.dsdOffs += emittedSize;
+
+    /* Allocate a data section descriptor and add it to the list */
+
+    secDesc = emitDataSecCur = (dataSection*)emitGetMem(roundUp(sizeof(*secDesc) + numEntries * sizeof(BasicBlock*)));
+
+    secDesc->dsSize = emittedSize;
+
+    secDesc->dsType = relativeAddr ? dataSection::blockRelative32 : dataSection::blockAbsoluteAddr;
+
+    secDesc->dsNext = nullptr;
+
+    if (emitConsDsc.dsdLast)
+    {
+        emitConsDsc.dsdLast->dsNext = secDesc;
+    }
+    else
+    {
+        emitConsDsc.dsdList = secDesc;
+    }
+
+    emitConsDsc.dsdLast = secDesc;
+
+    return secOffs;
+}
+
+/*****************************************************************************
+ *
+ *  Emit the given block of bits into the current data section.
+ */
+
+void emitter::emitDataGenData(unsigned offs, const void* data, size_t size)
+{
+    assert(emitDataSecCur && (emitDataSecCur->dsSize >= offs + size));
+
+    assert(emitDataSecCur->dsType == dataSection::data);
+
+    memcpy(emitDataSecCur->dsCont + offs, data, size);
+}
+
+/*****************************************************************************
+ *
+ *  Emit the address of the given basic block into the current data section.
+ */
+
+void emitter::emitDataGenData(unsigned index, BasicBlock* label)
+{
+    assert(emitDataSecCur != nullptr);
+    assert(emitDataSecCur->dsType == dataSection::blockAbsoluteAddr ||
+           emitDataSecCur->dsType == dataSection::blockRelative32);
+
+    unsigned emittedElemSize = emitDataSecCur->dsType == dataSection::blockAbsoluteAddr ? TARGET_POINTER_SIZE : 4;
+
+    assert(emitDataSecCur->dsSize >= emittedElemSize * (index + 1));
+
+    ((BasicBlock**)(emitDataSecCur->dsCont))[index] = label;
+}
+
+/*****************************************************************************
+ *
+ *  We're done generating a data section.
+ */
+
+void emitter::emitDataGenEnd()
+{
+
+#ifdef DEBUG
+    assert(emitDataSecCur);
+    emitDataSecCur = nullptr;
+#endif
+}
+
+/********************************************************************************
+ * Generates a data section constant
+ *
+ * Parameters:
+ *     cnsAddr  - memory location containing constant value
+ *     cnsSize  - size of constant in bytes
+ *     dblAlign - whether to double align the data section constant
+ *
+ * Returns constant number as offset into data section.
+ */
+UNATIVE_OFFSET emitter::emitDataConst(const void* cnsAddr, unsigned cnsSize, bool dblAlign)
+{
+    // When generating SMALL_CODE, we don't bother with dblAlign
+    if (dblAlign && (emitComp->compCodeOpt() == Compiler::SMALL_CODE))
+    {
+        dblAlign = false;
+    }
+
+    UNATIVE_OFFSET cnum = emitDataGenBeg(cnsSize, dblAlign, false);
+    emitDataGenData(0, cnsAddr, cnsSize);
+    emitDataGenEnd();
+
+    return cnum;
+}
+
+/*****************************************************************************
+ *
+ *  Output the given data section at the specified address.
+ */
+
+void emitter::emitOutputDataSec(dataSecDsc* sec, BYTE* dst)
+{
+#ifdef DEBUG
+    if (EMITVERBOSE)
+    {
+        printf("\nEmitting data sections: %u total bytes\n", sec->dsdOffs);
+    }
+
+    unsigned secNum = 0;
+#endif
+
+    assert(dst);
+    assert(sec->dsdOffs);
+    assert(sec->dsdList);
+
+    /* Walk and emit the contents of all the data blocks */
+
+    dataSection* dsc;
+
+    for (dsc = sec->dsdList; dsc; dsc = dsc->dsNext)
+    {
+        size_t dscSize = dsc->dsSize;
+
+        // absolute label table
+        if (dsc->dsType == dataSection::blockAbsoluteAddr)
+        {
+            JITDUMP("  section %u, size %u, block absolute addr\n", secNum++, dscSize);
+
+            assert(dscSize && dscSize % sizeof(BasicBlock*) == 0);
+            size_t numElems = dscSize / TARGET_POINTER_SIZE;
+            BYTE** bDst     = (BYTE**)dst;
+            for (unsigned i = 0; i < numElems; i++)
+            {
+                BasicBlock* block = ((BasicBlock**)dsc->dsCont)[i];
+
+                // Convert the BasicBlock* value to an IG address
+                insGroup* lab = (insGroup*)emitCodeGetCookie(block);
+
+                // Append the appropriate address to the destination
+                BYTE* target = emitOffsetToPtr(lab->igOffs);
+
+#ifdef _TARGET_ARM_
+                target = (BYTE*)((size_t)target | 1); // Or in thumb bit
+#endif
+                bDst[i] = target;
+                if (emitComp->opts.compReloc)
+                {
+                    emitRecordRelocation(&(bDst[i]), target, IMAGE_REL_BASED_HIGHLOW);
+                }
+
+                JITDUMP("  BB%02u: 0x%p\n", block->bbNum, bDst[i]);
+            }
+        }
+        // relative label table
+        else if (dsc->dsType == dataSection::blockRelative32)
+        {
+            JITDUMP("  section %u, size %u, block relative addr\n", secNum++, dscSize);
+
+            unsigned  elemSize = 4;
+            size_t    numElems = dscSize / 4;
+            unsigned* uDst     = (unsigned*)dst;
+            insGroup* labFirst = (insGroup*)emitCodeGetCookie(emitComp->fgFirstBB);
+
+            for (unsigned i = 0; i < numElems; i++)
+            {
+                BasicBlock* block = ((BasicBlock**)dsc->dsCont)[i];
+
+                // Convert the BasicBlock* value to an IG address
+                insGroup* lab = (insGroup*)emitCodeGetCookie(block);
+
+                assert(FitsIn<uint32_t>(lab->igOffs - labFirst->igOffs));
+                uDst[i] = lab->igOffs - labFirst->igOffs;
+
+                JITDUMP("  BB%02u: 0x%x\n", block->bbNum, uDst[i]);
+            }
+        }
+        else
+        {
+            JITDUMP("  section %u, size %u, raw data\n", secNum++, dscSize);
+
+            // Simple binary data: copy the bytes to the target
+            assert(dsc->dsType == dataSection::data);
+
+            memcpy(dst, dsc->dsCont, dscSize);
+
+#ifdef DEBUG
+            if (EMITVERBOSE)
+            {
+                printf("  ");
+                for (size_t i = 0; i < dscSize; i++)
+                {
+                    printf("%02x ", dsc->dsCont[i]);
+                    if ((((i + 1) % 16) == 0) && (i + 1 != dscSize))
+                    {
+                        printf("\n  ");
+                    }
+                }
+                printf("\n");
+            }
+#endif // DEBUG
+        }
+        dst += dscSize;
+    }
+}
+
+/*****************************************************************************/
+/*****************************************************************************
+ *
+ *  Record the fact that the given variable now contains a live GC ref.
+ */
+
+void emitter::emitGCvarLiveSet(int offs, GCtype gcType, BYTE* addr, ssize_t disp)
+{
+    assert(emitIssuing);
+
+    varPtrDsc* desc;
+
+    assert((abs(offs) % sizeof(ssize_t)) == 0);
+    assert(needsGC(gcType));
+
+    /* Compute the index into the GC frame table if the caller didn't do it */
+
+    if (disp == -1)
+    {
+        disp = (offs - emitGCrFrameOffsMin) / sizeof(void*);
+    }
+
+    assert((size_t)disp < emitGCrFrameOffsCnt);
+
+    /* Allocate a lifetime record */
+
+    desc = new (emitComp, CMK_GC) varPtrDsc;
+
+    desc->vpdBegOfs = emitCurCodeOffs(addr);
+#ifdef DEBUG
+    desc->vpdEndOfs = 0xFACEDEAD;
+#endif
+
+    desc->vpdVarNum = offs;
+
+    desc->vpdNext = nullptr;
+
+    /* the lower 2 bits encode props about the stk ptr */
+
+    if (offs == emitSyncThisObjOffs)
+    {
+        desc->vpdVarNum |= this_OFFSET_FLAG;
+    }
+
+    if (gcType == GCT_BYREF)
+    {
+        desc->vpdVarNum |= byref_OFFSET_FLAG;
+    }
+
+    /* Append the new entry to the end of the list */
+    if (codeGen->gcInfo.gcVarPtrLast == nullptr)
+    {
+        assert(codeGen->gcInfo.gcVarPtrList == nullptr);
+        codeGen->gcInfo.gcVarPtrList = codeGen->gcInfo.gcVarPtrLast = desc;
+    }
+    else
+    {
+        assert(codeGen->gcInfo.gcVarPtrList != nullptr);
+        codeGen->gcInfo.gcVarPtrLast->vpdNext = desc;
+        codeGen->gcInfo.gcVarPtrLast          = desc;
+    }
+
+    /* Record the variable descriptor in the table */
+
+    assert(emitGCrFrameLiveTab[disp] == nullptr);
+    emitGCrFrameLiveTab[disp] = desc;
+
+#ifdef DEBUG
+    if (EMITVERBOSE)
+    {
+        printf("[%08X] %s var born at [%s", dspPtr(desc), GCtypeStr(gcType), emitGetFrameReg());
+
+        if (offs < 0)
+        {
+            printf("-%02XH", -offs);
+        }
+        else if (offs > 0)
+        {
+            printf("+%02XH", +offs);
+        }
+
+        printf("]\n");
+    }
+#endif
+
+    /* The "global" live GC variable mask is no longer up-to-date */
+
+    emitThisGCrefVset = false;
+}
+
+/*****************************************************************************
+ *
+ *  Record the fact that the given variable no longer contains a live GC ref.
+ */
+
+void emitter::emitGCvarDeadSet(int offs, BYTE* addr, ssize_t disp)
+{
+    assert(emitIssuing);
+
+    varPtrDsc* desc;
+
+    assert(abs(offs) % sizeof(int) == 0);
+
+    /* Compute the index into the GC frame table if the caller didn't do it */
+
+    if (disp == -1)
+    {
+        disp = (offs - emitGCrFrameOffsMin) / sizeof(void*);
+    }
+
+    assert((unsigned)disp < emitGCrFrameOffsCnt);
+
+    /* Get hold of the lifetime descriptor and clear the entry */
+
+    desc                      = emitGCrFrameLiveTab[disp];
+    emitGCrFrameLiveTab[disp] = nullptr;
+
+    assert(desc);
+    assert((desc->vpdVarNum & ~OFFSET_MASK) == (unsigned)offs);
+
+    /* Record the death code offset */
+
+    assert(desc->vpdEndOfs == 0xFACEDEAD);
+    desc->vpdEndOfs = emitCurCodeOffs(addr);
+
+#ifdef DEBUG
+    if (EMITVERBOSE)
+    {
+        GCtype gcType = (desc->vpdVarNum & byref_OFFSET_FLAG) ? GCT_BYREF : GCT_GCREF;
+        bool   isThis = (desc->vpdVarNum & this_OFFSET_FLAG) != 0;
+
+        printf("[%08X] %s%s var died at [%s", dspPtr(desc), GCtypeStr(gcType), isThis ? "this-ptr" : "",
+               emitGetFrameReg());
+
+        if (offs < 0)
+        {
+            printf("-%02XH", -offs);
+        }
+        else if (offs > 0)
+        {
+            printf("+%02XH", +offs);
+        }
+
+        printf("]\n");
+    }
+#endif
+
+    /* The "global" live GC variable mask is no longer up-to-date */
+
+    emitThisGCrefVset = false;
+}
+
+/*****************************************************************************
+ *
+ *  Record a new set of live GC ref variables.
+ */
+
+void emitter::emitUpdateLiveGCvars(VARSET_VALARG_TP vars, BYTE* addr)
+{
+    assert(emitIssuing);
+
+    // Don't track GC changes in epilogs
+    if (emitIGisInEpilog(emitCurIG))
+    {
+        return;
+    }
+
+    /* Is the current set accurate and unchanged? */
+
+    if (emitThisGCrefVset && VarSetOps::Equal(emitComp, emitThisGCrefVars, vars))
+    {
+        return;
+    }
+
+#ifdef DEBUG
+    if (EMIT_GC_VERBOSE)
+    {
+        printf("New GC ref live vars=%s ", VarSetOps::ToString(emitComp, vars));
+        dumpConvertedVarSet(emitComp, vars);
+        printf("\n");
+    }
+#endif
+
+    VarSetOps::Assign(emitComp, emitThisGCrefVars, vars);
+
+    /* Are there any GC ref variables on the stack? */
+
+    if (emitGCrFrameOffsCnt)
+    {
+        int*     tab;
+        unsigned cnt = emitTrkVarCnt;
+        unsigned num;
+
+        /* Test all the tracked variable bits in the mask */
+
+        for (num = 0, tab = emitGCrFrameOffsTab; num < cnt; num++, tab++)
+        {
+            int val = *tab;
+
+            if (val != -1)
+            {
+                // byref_OFFSET_FLAG and this_OFFSET_FLAG are set
+                //  in the table-offsets for byrefs and this-ptr
+
+                int offs = val & ~OFFSET_MASK;
+
+                // printf("var #%2u at %3d is now %s\n", num, offs, (vars & 1) ? "live" : "dead");
+
+                if (VarSetOps::IsMember(emitComp, vars, num))
+                {
+                    GCtype gcType = (val & byref_OFFSET_FLAG) ? GCT_BYREF : GCT_GCREF;
+                    emitGCvarLiveUpd(offs, INT_MAX, gcType, addr);
+                }
+                else
+                {
+                    emitGCvarDeadUpd(offs, addr);
+                }
+            }
+        }
+    }
+
+    emitThisGCrefVset = true;
+}
+
+/*****************************************************************************
+ *
+ *  Record a call location for GC purposes (we know that this is a method that
+ *  will not be fully interruptible).
+ */
+
+void emitter::emitRecordGCcall(BYTE* codePos, unsigned char callInstrSize)
+{
+    assert(emitIssuing);
+    assert(!emitFullGCinfo);
+
+    unsigned offs = emitCurCodeOffs(codePos);
+    unsigned regs = (emitThisGCrefRegs | emitThisByrefRegs) & ~RBM_INTRET;
+    callDsc* call;
+
+#ifdef JIT32_GCENCODER
+    // The JIT32 GCInfo encoder allows us to (as the comment previously here said):
+    // "Bail if this is a totally boring call", but the GCInfoEncoder/Decoder interface
+    // requires a definition for every call site, so we skip these "early outs" when we're
+    // using the general encoder.
+    if (regs == 0)
+    {
+#if EMIT_TRACK_STACK_DEPTH
+        if (emitCurStackLvl == 0)
+            return;
+#endif
+        /* Nope, only interesting calls get recorded */
+
+        if (emitSimpleStkUsed)
+        {
+            if (!u1.emitSimpleStkMask)
+                return;
+        }
+        else
+        {
+            if (u2.emitGcArgTrackCnt == 0)
+                return;
+        }
+    }
+#endif // JIT32_GCENCODER
+
+#ifdef DEBUG
+
+    if (EMIT_GC_VERBOSE)
+    {
+        printf("; Call at %04X [stk=%u], GCvars=", offs - callInstrSize, emitCurStackLvl);
+        emitDispVarSet();
+        printf(", gcrefRegs=");
+        printRegMaskInt(emitThisGCrefRegs);
+        emitDispRegSet(emitThisGCrefRegs);
+        // printRegMaskInt(emitThisGCrefRegs & ~RBM_INTRET & RBM_CALLEE_SAVED);    // only display callee-saved
+        // emitDispRegSet (emitThisGCrefRegs & ~RBM_INTRET & RBM_CALLEE_SAVED);    // only display callee-saved
+        printf(", byrefRegs=");
+        printRegMaskInt(emitThisByrefRegs);
+        emitDispRegSet(emitThisByrefRegs);
+        // printRegMaskInt(emitThisByrefRegs & ~RBM_INTRET & RBM_CALLEE_SAVED);    // only display callee-saved
+        // emitDispRegSet (emitThisByrefRegs & ~RBM_INTRET & RBM_CALLEE_SAVED);    // only display callee-saved
+        printf("\n");
+    }
+
+#endif
+
+    /* Allocate a 'call site' descriptor and start filling it in */
+
+    call = new (emitComp, CMK_GC) callDsc;
+
+    call->cdBlock = nullptr;
+    call->cdOffs  = offs;
+#ifndef JIT32_GCENCODER
+    call->cdCallInstrSize = callInstrSize;
+#endif
+    call->cdNext = nullptr;
+
+    call->cdGCrefRegs = (regMaskSmall)emitThisGCrefRegs;
+    call->cdByrefRegs = (regMaskSmall)emitThisByrefRegs;
+
+#if EMIT_TRACK_STACK_DEPTH
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
+    noway_assert(FitsIn<USHORT>(emitCurStackLvl / ((unsigned)sizeof(unsigned))));
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#endif
+
+    // Append the call descriptor to the list */
+    if (codeGen->gcInfo.gcCallDescLast == nullptr)
+    {
+        assert(codeGen->gcInfo.gcCallDescList == nullptr);
+        codeGen->gcInfo.gcCallDescList = codeGen->gcInfo.gcCallDescLast = call;
+    }
+    else
+    {
+        assert(codeGen->gcInfo.gcCallDescList != nullptr);
+        codeGen->gcInfo.gcCallDescLast->cdNext = call;
+        codeGen->gcInfo.gcCallDescLast         = call;
+    }
+
+    /* Record the current "pending" argument list */
+
+    if (emitSimpleStkUsed)
+    {
+        /* The biggest call is less than MAX_SIMPLE_STK_DEPTH. So use
+           small format */
+
+        call->u1.cdArgMask      = u1.emitSimpleStkMask;
+        call->u1.cdByrefArgMask = u1.emitSimpleByrefStkMask;
+        call->cdArgCnt          = 0;
+    }
+    else
+    {
+        /* The current call has too many arguments, so we need to report the
+           offsets of each individual GC arg. */
+
+        call->cdArgCnt = u2.emitGcArgTrackCnt;
+        if (call->cdArgCnt == 0)
+        {
+            call->u1.cdArgMask = call->u1.cdByrefArgMask = 0;
+            return;
+        }
+
+        call->cdArgTable = new (emitComp, CMK_GC) unsigned[u2.emitGcArgTrackCnt];
+
+        unsigned gcArgs = 0;
+        unsigned stkLvl = emitCurStackLvl / sizeof(int);
+
+        for (unsigned i = 0; i < stkLvl; i++)
+        {
+            GCtype gcType = (GCtype)u2.emitArgTrackTab[stkLvl - i - 1];
+
+            if (needsGC(gcType))
+            {
+                call->cdArgTable[gcArgs] = i * sizeof(void*);
+
+                if (gcType == GCT_BYREF)
+                {
+                    call->cdArgTable[gcArgs] |= byref_OFFSET_FLAG;
+                }
+
+                gcArgs++;
+            }
+        }
+
+        assert(gcArgs == u2.emitGcArgTrackCnt);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Record a new set of live GC ref registers.
+ */
+
+void emitter::emitUpdateLiveGCregs(GCtype gcType, regMaskTP regs, BYTE* addr)
+{
+    assert(emitIssuing);
+
+    // Don't track GC changes in epilogs
+    if (emitIGisInEpilog(emitCurIG))
+    {
+        return;
+    }
+
+    regMaskTP life;
+    regMaskTP dead;
+    regMaskTP chg;
+
+#ifdef DEBUG
+    if (EMIT_GC_VERBOSE)
+    {
+        printf("New %sReg live regs=", GCtypeStr(gcType));
+        printRegMaskInt(regs);
+        emitDispRegSet(regs);
+        printf("\n");
+    }
+#endif
+
+    assert(needsGC(gcType));
+
+    regMaskTP& emitThisXXrefRegs = (gcType == GCT_GCREF) ? emitThisGCrefRegs : emitThisByrefRegs;
+    regMaskTP& emitThisYYrefRegs = (gcType == GCT_GCREF) ? emitThisByrefRegs : emitThisGCrefRegs;
+    assert(emitThisXXrefRegs != regs);
+
+    if (emitFullGCinfo)
+    {
+        /* Figure out which GC registers are becoming live/dead at this point */
+
+        dead = (emitThisXXrefRegs & ~regs);
+        life = (~emitThisXXrefRegs & regs);
+
+        /* Can't simultaneously become live and dead at the same time */
+
+        assert((dead | life) != 0);
+        assert((dead & life) == 0);
+
+        /* Compute the 'changing state' mask */
+
+        chg = (dead | life);
+
+        do
+        {
+            regMaskTP bit = genFindLowestBit(chg);
+            regNumber reg = genRegNumFromMask(bit);
+
+            if (life & bit)
+            {
+                emitGCregLiveUpd(gcType, reg, addr);
+            }
+            else
+            {
+                emitGCregDeadUpd(reg, addr);
+            }
+
+            chg -= bit;
+        } while (chg);
+
+        assert(emitThisXXrefRegs == regs);
+    }
+    else
+    {
+        emitThisYYrefRegs &= ~regs; // Kill the regs from the other GC type (if live)
+        emitThisXXrefRegs = regs;   // Mark them as live in the requested GC type
+    }
+
+    // The 2 GC reg masks can't be overlapping
+
+    assert((emitThisGCrefRegs & emitThisByrefRegs) == 0);
+}
+
+/*****************************************************************************
+ *
+ *  Record the fact that the given register now contains a live GC ref.
+ */
+
+void emitter::emitGCregLiveSet(GCtype gcType, regMaskTP regMask, BYTE* addr, bool isThis)
+{
+    assert(emitIssuing);
+    assert(needsGC(gcType));
+
+    regPtrDsc* regPtrNext;
+
+    assert(!isThis || emitComp->lvaKeepAliveAndReportThis());
+    // assert(emitFullyInt || isThis);
+    assert(emitFullGCinfo);
+
+    assert(((emitThisGCrefRegs | emitThisByrefRegs) & regMask) == 0);
+
+    /* Allocate a new regptr entry and fill it in */
+
+    regPtrNext            = codeGen->gcInfo.gcRegPtrAllocDsc();
+    regPtrNext->rpdGCtype = gcType;
+
+    regPtrNext->rpdOffs            = emitCurCodeOffs(addr);
+    regPtrNext->rpdArg             = FALSE;
+    regPtrNext->rpdCall            = FALSE;
+    regPtrNext->rpdIsThis          = isThis;
+    regPtrNext->rpdCompiler.rpdAdd = (regMaskSmall)regMask;
+    regPtrNext->rpdCompiler.rpdDel = 0;
+}
+
+/*****************************************************************************
+ *
+ *  Record the fact that the given register no longer contains a live GC ref.
+ */
+
+void emitter::emitGCregDeadSet(GCtype gcType, regMaskTP regMask, BYTE* addr)
+{
+    assert(emitIssuing);
+    assert(needsGC(gcType));
+
+    regPtrDsc* regPtrNext;
+
+    // assert(emitFullyInt);
+    assert(emitFullGCinfo);
+
+    assert(((emitThisGCrefRegs | emitThisByrefRegs) & regMask) != 0);
+
+    /* Allocate a new regptr entry and fill it in */
+
+    regPtrNext            = codeGen->gcInfo.gcRegPtrAllocDsc();
+    regPtrNext->rpdGCtype = gcType;
+
+    regPtrNext->rpdOffs            = emitCurCodeOffs(addr);
+    regPtrNext->rpdCall            = FALSE;
+    regPtrNext->rpdIsThis          = FALSE;
+    regPtrNext->rpdArg             = FALSE;
+    regPtrNext->rpdCompiler.rpdAdd = 0;
+    regPtrNext->rpdCompiler.rpdDel = (regMaskSmall)regMask;
+}
+
+/*****************************************************************************
+ *
+ *  Emit an 8-bit integer as code.
+ */
+
+unsigned char emitter::emitOutputByte(BYTE* dst, ssize_t val)
+{
+    *castto(dst, unsigned char*) = (unsigned char)val;
+
+#ifdef DEBUG
+    if (emitComp->opts.dspEmit)
+    {
+        printf("; emit_byte 0%02XH\n", val & 0xFF);
+    }
+#ifdef _TARGET_AMD64_
+    // if we're emitting code bytes, ensure that we've already emitted the rex prefix!
+    assert(((val & 0xFF00000000LL) == 0) || ((val & 0xFFFFFFFF00000000LL) == 0xFFFFFFFF00000000LL));
+#endif // _TARGET_AMD64_
+#endif
+
+    return sizeof(unsigned char);
+}
+
+/*****************************************************************************
+ *
+ *  Emit a 16-bit integer as code.
+ */
+
+unsigned char emitter::emitOutputWord(BYTE* dst, ssize_t val)
+{
+    MISALIGNED_WR_I2(dst, (short)val);
+
+#ifdef DEBUG
+    if (emitComp->opts.dspEmit)
+    {
+        printf("; emit_word 0%02XH,0%02XH\n", (val & 0xFF), (val >> 8) & 0xFF);
+    }
+#ifdef _TARGET_AMD64_
+    // if we're emitting code bytes, ensure that we've already emitted the rex prefix!
+    assert(((val & 0xFF00000000LL) == 0) || ((val & 0xFFFFFFFF00000000LL) == 0xFFFFFFFF00000000LL));
+#endif // _TARGET_AMD64_
+#endif
+
+    return sizeof(short);
+}
+
+/*****************************************************************************
+ *
+ *  Emit a 32-bit integer as code.
+ */
+
+unsigned char emitter::emitOutputLong(BYTE* dst, ssize_t val)
+{
+    MISALIGNED_WR_I4(dst, (int)val);
+
+#ifdef DEBUG
+    if (emitComp->opts.dspEmit)
+    {
+        printf("; emit_long 0%08XH\n", val);
+    }
+#ifdef _TARGET_AMD64_
+    // if we're emitting code bytes, ensure that we've already emitted the rex prefix!
+    assert(((val & 0xFF00000000LL) == 0) || ((val & 0xFFFFFFFF00000000LL) == 0xFFFFFFFF00000000LL));
+#endif // _TARGET_AMD64_
+#endif
+
+    return sizeof(int);
+}
+
+/*****************************************************************************
+ *
+ *  Emit a pointer-sized integer as code.
+ */
+
+unsigned char emitter::emitOutputSizeT(BYTE* dst, ssize_t val)
+{
+    MISALIGNED_WR_ST(dst, val);
+
+#ifdef DEBUG
+    if (emitComp->opts.dspEmit)
+    {
+#ifdef _TARGET_AMD64_
+        printf("; emit_size_t 0%016llXH\n", (size_t)val);
+#else  // _TARGET_AMD64_
+        printf("; emit_size_t 0%08XH\n", (size_t)val);
+#endif // _TARGET_AMD64_
+    }
+#endif // DEBUG
+
+    return sizeof(size_t);
+}
+
+/*****************************************************************************
+ *
+ *  Given a block cookie and a code position, return the actual code offset;
+ *  this can only be called at the end of code generation.
+ */
+
+UNATIVE_OFFSET emitter::emitCodeOffset(void* blockPtr, unsigned codePos)
+{
+    insGroup* ig;
+
+    UNATIVE_OFFSET of;
+    unsigned       no = emitGetInsNumFromCodePos(codePos);
+
+    /* Make sure we weren't passed some kind of a garbage thing */
+
+    ig = (insGroup*)blockPtr;
+#ifdef DEBUG
+    assert(ig && ig->igSelf == ig);
+#endif
+
+    /* The first and last offsets are always easy */
+
+    if (no == 0)
+    {
+        of = 0;
+    }
+    else if (no == ig->igInsCnt)
+    {
+        of = ig->igSize;
+    }
+    else if (ig->igFlags & IGF_UPD_ISZ)
+    {
+        /*
+            Some instruction sizes have changed, so we'll have to figure
+            out the instruction offset "the hard way".
+         */
+
+        of = emitFindOffset(ig, no);
+    }
+    else
+    {
+        /* All instructions correctly predicted, the offset stays the same */
+
+        of = emitGetInsOfsFromCodePos(codePos);
+
+        // printf("[IG=%02u;ID=%03u;OF=%04X] <= %08X\n", ig->igNum, emitGetInsNumFromCodePos(codePos), of, codePos);
+
+        /* Make sure the offset estimate is accurate */
+
+        assert(of == emitFindOffset(ig, emitGetInsNumFromCodePos(codePos)));
+    }
+
+    return ig->igOffs + of;
+}
+
+/*****************************************************************************
+ *
+ *  Record the fact that the given register now contains a live GC ref.
+ */
+
+void emitter::emitGCregLiveUpd(GCtype gcType, regNumber reg, BYTE* addr)
+{
+    assert(emitIssuing);
+
+    // Don't track GC changes in epilogs
+    if (emitIGisInEpilog(emitCurIG))
+    {
+        return;
+    }
+
+    assert(needsGC(gcType));
+
+    regMaskTP regMask = genRegMask(reg);
+
+    regMaskTP& emitThisXXrefRegs = (gcType == GCT_GCREF) ? emitThisGCrefRegs : emitThisByrefRegs;
+    regMaskTP& emitThisYYrefRegs = (gcType == GCT_GCREF) ? emitThisByrefRegs : emitThisGCrefRegs;
+
+    if ((emitThisXXrefRegs & regMask) == 0)
+    {
+        // If the register was holding the other GC type, that type should
+        // go dead now
+
+        if (emitThisYYrefRegs & regMask)
+        {
+            emitGCregDeadUpd(reg, addr);
+        }
+
+        // For synchronized methods, "this" is always alive and in the same register.
+        // However, if we generate any code after the epilog block (where "this"
+        // goes dead), "this" will come alive again. We need to notice that.
+        // Note that we only expect isThis to be true at an insGroup boundary.
+
+        bool isThis = (reg == emitSyncThisObjReg) ? true : false;
+
+        if (emitFullGCinfo)
+        {
+            emitGCregLiveSet(gcType, regMask, addr, isThis);
+        }
+
+        emitThisXXrefRegs |= regMask;
+
+#ifdef DEBUG
+        if (EMIT_GC_VERBOSE)
+        {
+            printf("%sReg +[%s]\n", GCtypeStr(gcType), emitRegName(reg));
+        }
+#endif
+    }
+
+    // The 2 GC reg masks can't be overlapping
+
+    assert((emitThisGCrefRegs & emitThisByrefRegs) == 0);
+}
+
+/*****************************************************************************
+ *
+ *  Record the fact that the given set of registers no longer contain live GC refs.
+ */
+
+void emitter::emitGCregDeadUpdMask(regMaskTP regs, BYTE* addr)
+{
+    assert(emitIssuing);
+
+    // Don't track GC changes in epilogs
+    if (emitIGisInEpilog(emitCurIG))
+    {
+        return;
+    }
+
+    // First, handle the gcref regs going dead
+
+    regMaskTP gcrefRegs = emitThisGCrefRegs & regs;
+
+    // "this" can never go dead in synchronized methods, except in the epilog
+    // after the call to CORINFO_HELP_MON_EXIT.
+    assert(emitSyncThisObjReg == REG_NA || (genRegMask(emitSyncThisObjReg) & regs) == 0);
+
+    if (gcrefRegs)
+    {
+        assert((emitThisByrefRegs & gcrefRegs) == 0);
+
+        if (emitFullGCinfo)
+        {
+            emitGCregDeadSet(GCT_GCREF, gcrefRegs, addr);
+        }
+
+        emitThisGCrefRegs &= ~gcrefRegs;
+
+#ifdef DEBUG
+        if (EMIT_GC_VERBOSE)
+        {
+            printf("gcrReg ");
+            printRegMaskInt(gcrefRegs);
+            printf(" -");
+            emitDispRegSet(gcrefRegs);
+            printf("\n");
+        }
+#endif
+    }
+
+    // Second, handle the byref regs going dead
+
+    regMaskTP byrefRegs = emitThisByrefRegs & regs;
+
+    if (byrefRegs)
+    {
+        assert((emitThisGCrefRegs & byrefRegs) == 0);
+
+        if (emitFullGCinfo)
+        {
+            emitGCregDeadSet(GCT_BYREF, byrefRegs, addr);
+        }
+
+        emitThisByrefRegs &= ~byrefRegs;
+
+#ifdef DEBUG
+        if (EMIT_GC_VERBOSE)
+        {
+            printf("byrReg ");
+            printRegMaskInt(byrefRegs);
+            printf(" -");
+            emitDispRegSet(byrefRegs);
+            printf("\n");
+        }
+#endif
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Record the fact that the given register no longer contains a live GC ref.
+ */
+
+void emitter::emitGCregDeadUpd(regNumber reg, BYTE* addr)
+{
+    assert(emitIssuing);
+
+    // Don't track GC changes in epilogs
+    if (emitIGisInEpilog(emitCurIG))
+    {
+        return;
+    }
+
+    regMaskTP regMask = genRegMask(reg);
+
+    if ((emitThisGCrefRegs & regMask) != 0)
+    {
+        assert((emitThisByrefRegs & regMask) == 0);
+
+        if (emitFullGCinfo)
+        {
+            emitGCregDeadSet(GCT_GCREF, regMask, addr);
+        }
+
+        emitThisGCrefRegs &= ~regMask;
+
+#ifdef DEBUG
+        if (EMIT_GC_VERBOSE)
+        {
+            printf("%s -[%s]\n", "gcrReg", emitRegName(reg));
+        }
+#endif
+    }
+    else if ((emitThisByrefRegs & regMask) != 0)
+    {
+        if (emitFullGCinfo)
+        {
+            emitGCregDeadSet(GCT_BYREF, regMask, addr);
+        }
+
+        emitThisByrefRegs &= ~regMask;
+
+#ifdef DEBUG
+        if (EMIT_GC_VERBOSE)
+        {
+            printf("%s -[%s]\n", "byrReg", emitRegName(reg));
+        }
+#endif
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Record the fact that the given variable now contains a live GC ref.
+ *  varNum may be INT_MAX or negative (indicating a spill temp) only if
+ *    offs is guaranteed to be the offset of a tracked GC ref. Else we
+ *    need a valid value to check if the variable is tracked or not.
+ */
+
+void emitter::emitGCvarLiveUpd(int offs, int varNum, GCtype gcType, BYTE* addr)
+{
+    assert(abs(offs) % sizeof(int) == 0);
+    assert(needsGC(gcType));
+
+#if FEATURE_FIXED_OUT_ARGS
+    if ((unsigned)varNum == emitComp->lvaOutgoingArgSpaceVar)
+    {
+        if (emitFullGCinfo)
+        {
+            /* Append an "arg push" entry to track a GC written to the
+               outgoing argument space.
+               Allocate a new ptr arg entry and fill it in */
+
+            regPtrDsc* regPtrNext = gcInfo->gcRegPtrAllocDsc();
+            regPtrNext->rpdGCtype = gcType;
+            regPtrNext->rpdOffs   = emitCurCodeOffs(addr);
+            regPtrNext->rpdArg    = TRUE;
+            regPtrNext->rpdCall   = FALSE;
+            noway_assert(FitsIn<unsigned short>(offs));
+            regPtrNext->rpdPtrArg  = (unsigned short)offs;
+            regPtrNext->rpdArgType = (unsigned short)GCInfo::rpdARG_PUSH;
+            regPtrNext->rpdIsThis  = FALSE;
+
+#ifdef DEBUG
+            if (EMIT_GC_VERBOSE)
+            {
+                printf("[%04X] %s arg write\n", offs, GCtypeStr(gcType));
+            }
+#endif
+        }
+    }
+    else
+#endif // FEATURE_FIXED_OUT_ARGS
+    {
+        /* Is the frame offset within the "interesting" range? */
+
+        if (offs >= emitGCrFrameOffsMin && offs < emitGCrFrameOffsMax)
+        {
+            /* Normally all variables in this range must be tracked stack
+               pointers. However, for EnC, we relax this condition. So we
+               must check if this is not such a variable.
+               Note that varNum might be negative, indicating a spill temp.
+            */
+
+            if (varNum != INT_MAX)
+            {
+                bool isTracked = false;
+                if (varNum >= 0)
+                {
+                    // This is NOT a spill temp
+                    LclVarDsc* varDsc = &emitComp->lvaTable[varNum];
+                    isTracked         = emitComp->lvaIsGCTracked(varDsc);
+                }
+                else
+                {
+                    // Is it an untracked spill temp?
+                    isTracked = TRACK_GC_TEMP_LIFETIMES;
+                }
+                if (!isTracked)
+                {
+#if DOUBLE_ALIGN
+                    assert(!emitContTrkPtrLcls ||
+                           // EBP based variables in the double-aligned frames are indeed input arguments.
+                           // and we don't require them to fall into the "interesting" range.
+                           ((emitComp->rpFrameType == FT_DOUBLE_ALIGN_FRAME) && (varNum >= 0) &&
+                            (emitComp->lvaTable[varNum].lvFramePointerBased == 1)));
+#else
+                    assert(!emitContTrkPtrLcls);
+#endif
+                    return;
+                }
+            }
+
+            size_t disp;
+
+            /* Compute the index into the GC frame table */
+
+            disp = (offs - emitGCrFrameOffsMin) / sizeof(void*);
+            assert(disp < emitGCrFrameOffsCnt);
+
+            /* If the variable is currently dead, mark it as live */
+
+            if (emitGCrFrameLiveTab[disp] == nullptr)
+            {
+                emitGCvarLiveSet(offs, gcType, addr, disp);
+            }
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Record the fact that the given variable no longer contains a live GC ref.
+ */
+
+void emitter::emitGCvarDeadUpd(int offs, BYTE* addr)
+{
+    assert(emitIssuing);
+    assert(abs(offs) % sizeof(int) == 0);
+
+    /* Is the frame offset within the "interesting" range? */
+
+    if (offs >= emitGCrFrameOffsMin && offs < emitGCrFrameOffsMax)
+    {
+        size_t disp;
+
+        /* Compute the index into the GC frame table */
+
+        disp = (offs - emitGCrFrameOffsMin) / sizeof(void*);
+        assert(disp < emitGCrFrameOffsCnt);
+
+        /* If the variable is currently live, mark it as dead */
+
+        if (emitGCrFrameLiveTab[disp] != nullptr)
+        {
+            emitGCvarDeadSet(offs, addr, disp);
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Allocate a new IG and link it in to the global list after the current IG
+ */
+
+insGroup* emitter::emitAllocAndLinkIG()
+{
+    insGroup* ig = emitAllocIG();
+
+    assert(emitCurIG);
+
+    emitInsertIGAfter(emitCurIG, ig);
+
+    /* Propagate some IG flags from the current group to the new group */
+
+    ig->igFlags |= (emitCurIG->igFlags & IGF_PROPAGATE_MASK);
+
+    /* Set the new IG as the current IG */
+
+    emitCurIG = ig;
+
+    return ig;
+}
+
+/*****************************************************************************
+ *
+ *  Allocate an instruction group descriptor and assign it the next index.
+ */
+
+insGroup* emitter::emitAllocIG()
+{
+    insGroup* ig;
+
+    /* Allocate a group descriptor */
+
+    size_t sz = sizeof(insGroup);
+    ig        = (insGroup*)emitGetMem(sz);
+
+#ifdef DEBUG
+    ig->igSelf = ig;
+#endif
+
+#if EMITTER_STATS
+    emitTotalIGcnt += 1;
+    emitTotalIGsize += sz;
+    emitSizeMethod += sz;
+#endif
+
+    /* Do basic initialization */
+
+    emitInitIG(ig);
+
+    return ig;
+}
+
+/*****************************************************************************
+ *
+ *  Initialize an instruction group
+ */
+
+void emitter::emitInitIG(insGroup* ig)
+{
+    /* Assign the next available index to the instruction group */
+
+    ig->igNum = emitNxtIGnum;
+
+    emitNxtIGnum++;
+
+    /* Record the (estimated) code offset of the group */
+
+    ig->igOffs = emitCurCodeOffset;
+    assert(IsCodeAligned(ig->igOffs));
+
+    /* Set the current function index */
+
+    ig->igFuncIdx = emitComp->compCurrFuncIdx;
+
+    ig->igFlags = 0;
+
+    /* Zero out some fields to avoid printing garbage in JitDumps. These
+       really only need to be set in DEBUG, but do it in all cases to make
+       sure we act the same in non-DEBUG builds.
+    */
+
+    ig->igSize   = 0;
+    ig->igGCregs = RBM_NONE;
+    ig->igInsCnt = 0;
+}
+
+/*****************************************************************************
+ *
+ *  Insert instruction group 'ig' after 'igInsertAfterIG'
+ */
+
+void emitter::emitInsertIGAfter(insGroup* insertAfterIG, insGroup* ig)
+{
+    assert(emitIGlist);
+    assert(emitIGlast);
+
+    ig->igNext            = insertAfterIG->igNext;
+    insertAfterIG->igNext = ig;
+
+    if (emitIGlast == insertAfterIG)
+    {
+        // If we are inserting at the end, then update the 'last' pointer
+        emitIGlast = ig;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Save the current IG and start a new one.
+ */
+
+void emitter::emitNxtIG(bool emitAdd)
+{
+    /* Right now we don't allow multi-IG prologs */
+
+    assert(emitCurIG != emitPrologIG);
+
+    /* First save the current group */
+
+    emitSavIG(emitAdd);
+
+    /* Update the GC live sets for the group's start
+     * Do it only if not an emitter added block */
+
+    if (!emitAdd)
+    {
+        VarSetOps::Assign(emitComp, emitInitGCrefVars, emitThisGCrefVars);
+        emitInitGCrefRegs = emitThisGCrefRegs;
+        emitInitByrefRegs = emitThisByrefRegs;
+    }
+
+    /* Start generating the new group */
+
+    emitNewIG();
+
+    /* If this is an emitter added block, flag it */
+
+    if (emitAdd)
+    {
+        emitCurIG->igFlags |= IGF_EMIT_ADD;
+    }
+
+    // We've created a new IG; no need to force another one.
+    emitForceNewIG = false;
+}
+
+/*****************************************************************************
+ *
+ *  emitGetInsSC: Get the instruction's constant value.
+ */
+
+ssize_t emitter::emitGetInsSC(instrDesc* id)
+{
+#ifdef _TARGET_ARM_ // should it be _TARGET_ARMARCH_? Why do we need this? Note that on ARM64 we store scaled immediates
+                    // for some formats
+    if (id->idIsLclVar())
+    {
+        int varNum = id->idAddr()->iiaLclVar.lvaVarNum();
+
+        regNumber baseReg;
+        int       offs = id->idAddr()->iiaLclVar.lvaOffset();
+#if defined(_TARGET_ARM_)
+        int adr = emitComp->lvaFrameAddress(varNum, id->idIsLclFPBase(), &baseReg, offs);
+        int dsp = adr + offs;
+        if ((id->idIns() == INS_sub) || (id->idIns() == INS_subw))
+            dsp = -dsp;
+#elif defined(_TARGET_ARM64_)
+        // TODO-ARM64-Cleanup: this is currently unreachable. Do we need it?
+        bool FPbased;
+        int  adr = emitComp->lvaFrameAddress(varNum, &FPbased);
+        int  dsp = adr + offs;
+        if (id->idIns() == INS_sub)
+            dsp = -dsp;
+#endif
+        return dsp;
+    }
+    else
+#endif // _TARGET_ARM_
+        if (id->idIsLargeCns())
+    {
+        return ((instrDescCns*)id)->idcCnsVal;
+    }
+    else
+    {
+        return id->idSmallCns();
+    }
+}
+
+/*****************************************************************************/
+#if EMIT_TRACK_STACK_DEPTH
+/*****************************************************************************
+ *
+ *  Record a push of a single dword on the stack.
+ */
+
+void emitter::emitStackPush(BYTE* addr, GCtype gcType)
+{
+#ifdef DEBUG
+    assert(IsValidGCtype(gcType));
+#endif
+
+    if (emitSimpleStkUsed)
+    {
+        assert(!emitFullGCinfo); // Simple stk not used for emitFullGCinfo
+        assert(emitCurStackLvl / sizeof(int) < MAX_SIMPLE_STK_DEPTH);
+
+        u1.emitSimpleStkMask <<= 1;
+        u1.emitSimpleStkMask |= (unsigned)needsGC(gcType);
+
+        u1.emitSimpleByrefStkMask <<= 1;
+        u1.emitSimpleByrefStkMask |= (gcType == GCT_BYREF);
+
+        assert((u1.emitSimpleStkMask & u1.emitSimpleByrefStkMask) == u1.emitSimpleByrefStkMask);
+    }
+    else
+    {
+        emitStackPushLargeStk(addr, gcType);
+    }
+
+    emitCurStackLvl += sizeof(int);
+}
+
+/*****************************************************************************
+ *
+ *  Record a push of a bunch of non-GC dwords on the stack.
+ */
+
+void emitter::emitStackPushN(BYTE* addr, unsigned count)
+{
+    assert(count);
+
+    if (emitSimpleStkUsed)
+    {
+        assert(!emitFullGCinfo); // Simple stk not used for emitFullGCinfo
+
+        u1.emitSimpleStkMask <<= count;
+        u1.emitSimpleByrefStkMask <<= count;
+    }
+    else
+    {
+        emitStackPushLargeStk(addr, GCT_NONE, count);
+    }
+
+    emitCurStackLvl += count * sizeof(int);
+}
+
+/*****************************************************************************
+ *
+ *  Record a pop of the given number of dwords from the stack.
+ */
+
+void emitter::emitStackPop(BYTE* addr, bool isCall, unsigned char callInstrSize, unsigned count)
+{
+    assert(emitCurStackLvl / sizeof(int) >= count);
+    assert(!isCall || callInstrSize > 0);
+
+    if (count)
+    {
+        if (emitSimpleStkUsed)
+        {
+            assert(!emitFullGCinfo); // Simple stk not used for emitFullGCinfo
+
+            unsigned cnt = count;
+
+            do
+            {
+                u1.emitSimpleStkMask >>= 1;
+                u1.emitSimpleByrefStkMask >>= 1;
+            } while (--cnt);
+        }
+        else
+        {
+            emitStackPopLargeStk(addr, isCall, callInstrSize, count);
+        }
+
+        emitCurStackLvl -= count * sizeof(int);
+    }
+    else
+    {
+        assert(isCall);
+
+        // For the general encoder we do the call below always when it's a call, to ensure that the call is
+        // recorded (when we're doing the ptr reg map for a non-fully-interruptible method).
+        if (emitFullGCinfo
+#ifndef JIT32_GCENCODER
+            || (emitComp->genFullPtrRegMap && (!emitComp->genInterruptible) && isCall)
+#endif // JIT32_GCENCODER
+                )
+        {
+            emitStackPopLargeStk(addr, isCall, callInstrSize, 0);
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Record a push of a single word on the stack for a full pointer map.
+ */
+
+void emitter::emitStackPushLargeStk(BYTE* addr, GCtype gcType, unsigned count)
+{
+    S_UINT32 level(emitCurStackLvl / sizeof(int));
+
+    assert(IsValidGCtype(gcType));
+    assert(count);
+    assert(!emitSimpleStkUsed);
+
+    do
+    {
+        /* Push an entry for this argument on the tracking stack */
+
+        // printf("Pushed [%d] at lvl %2u [max=%u]\n", isGCref, emitArgTrackTop - emitArgTrackTab, emitMaxStackDepth);
+
+        assert(level.IsOverflow() || u2.emitArgTrackTop == u2.emitArgTrackTab + level.Value());
+        *u2.emitArgTrackTop++ = (BYTE)gcType;
+        assert(u2.emitArgTrackTop <= u2.emitArgTrackTab + emitMaxStackDepth);
+
+        if (!emitHasFramePtr || needsGC(gcType))
+        {
+            if (emitFullGCinfo)
+            {
+                /* Append an "arg push" entry if this is a GC ref or
+                   FPO method. Allocate a new ptr arg entry and fill it in */
+
+                regPtrDsc* regPtrNext = codeGen->gcInfo.gcRegPtrAllocDsc();
+                regPtrNext->rpdGCtype = gcType;
+
+                regPtrNext->rpdOffs = emitCurCodeOffs(addr);
+                regPtrNext->rpdArg  = TRUE;
+                regPtrNext->rpdCall = FALSE;
+                if (level.IsOverflow() || !FitsIn<unsigned short>(level.Value()))
+                {
+                    IMPL_LIMITATION("Too many/too big arguments to encode GC information");
+                }
+                regPtrNext->rpdPtrArg  = (unsigned short)level.Value();
+                regPtrNext->rpdArgType = (unsigned short)GCInfo::rpdARG_PUSH;
+                regPtrNext->rpdIsThis  = FALSE;
+
+#ifdef DEBUG
+                if (EMIT_GC_VERBOSE)
+                {
+                    printf("[%08X] %s arg push %u\n", dspPtr(regPtrNext), GCtypeStr(gcType), level.Value());
+                }
+#endif
+            }
+
+            /* This is an "interesting" argument push */
+
+            u2.emitGcArgTrackCnt++;
+        }
+        level += 1;
+        assert(!level.IsOverflow());
+    } while (--count);
+}
+
+/*****************************************************************************
+ *
+ *  Record a pop of the given number of words from the stack for a full ptr
+ *  map.
+ */
+
+void emitter::emitStackPopLargeStk(BYTE* addr, bool isCall, unsigned char callInstrSize, unsigned count)
+{
+    assert(emitIssuing);
+
+    unsigned argStkCnt;
+    S_UINT16 argRecCnt(0); // arg count for ESP, ptr-arg count for EBP
+    unsigned gcrefRegs, byrefRegs;
+
+#ifdef JIT32_GCENCODER
+    // For the general encoder, we always need to record calls, so we make this call
+    // even when emitSimpleStkUsed is true.
+    assert(!emitSimpleStkUsed);
+#endif
+
+    /* Count how many pointer records correspond to this "pop" */
+
+    for (argStkCnt = count; argStkCnt; argStkCnt--)
+    {
+        assert(u2.emitArgTrackTop > u2.emitArgTrackTab);
+
+        GCtype gcType = (GCtype)(*--u2.emitArgTrackTop);
+
+        assert(IsValidGCtype(gcType));
+
+        // printf("Popped [%d] at lvl %u\n", GCtypeStr(gcType), emitArgTrackTop - emitArgTrackTab);
+
+        // This is an "interesting" argument
+
+        if (!emitHasFramePtr || needsGC(gcType))
+        {
+            argRecCnt += 1;
+        }
+    }
+
+    assert(u2.emitArgTrackTop >= u2.emitArgTrackTab);
+    assert(u2.emitArgTrackTop == u2.emitArgTrackTab + emitCurStackLvl / sizeof(int) - count);
+    noway_assert(!argRecCnt.IsOverflow());
+
+    /* We're about to pop the corresponding arg records */
+
+    u2.emitGcArgTrackCnt -= argRecCnt.Value();
+
+#ifdef JIT32_GCENCODER
+    // For the general encoder, we always have to record calls, so we don't take this early return.
+    if (!emitFullGCinfo)
+        return;
+#endif
+
+    // Do we have any interesting (i.e., callee-saved) registers live here?
+
+    gcrefRegs = byrefRegs = 0;
+
+    // We make a bitmask whose bits correspond to callee-saved register indices (in the sequence
+    // of callee-saved registers only).
+    for (unsigned calleeSavedRegIdx = 0; calleeSavedRegIdx < CNT_CALLEE_SAVED; calleeSavedRegIdx++)
+    {
+        regMaskTP calleeSavedRbm = raRbmCalleeSaveOrder[calleeSavedRegIdx];
+        if (emitThisGCrefRegs & calleeSavedRbm)
+        {
+            gcrefRegs |= (1 << calleeSavedRegIdx);
+        }
+        if (emitThisByrefRegs & calleeSavedRbm)
+        {
+            byrefRegs |= (1 << calleeSavedRegIdx);
+        }
+    }
+
+#ifdef JIT32_GCENCODER
+    // For the general encoder, we always have to record calls, so we don't take this early return.    /* Are there any
+    // args to pop at this call site?
+
+    if (argRecCnt.Value() == 0)
+    {
+        /*
+            Or do we have a partially interruptible EBP-less frame, and any
+            of EDI,ESI,EBX,EBP are live, or is there an outer/pending call?
+         */
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if !FPO_INTERRUPTIBLE
+        if (emitFullyInt || (gcrefRegs == 0 && byrefRegs == 0 && u2.emitGcArgTrackCnt == 0))
+#endif
+            return;
+    }
+#endif // JIT32_GCENCODER
+
+    /* Only calls may pop more than one value */
+    // More detail:
+    // _cdecl calls accomplish this popping via a post-call-instruction SP adjustment.
+    // The "rpdCall" field below should be interpreted as "the instruction accomplishes
+    // call-related popping, even if it's not itself a call".  Therefore, we don't just
+    // use the "isCall" input argument, which means that the instruction actually is a call --
+    // we use the OR of "isCall" or the "pops more than one value."
+
+    bool isCallRelatedPop = (argRecCnt.Value() > 1);
+
+    /* Allocate a new ptr arg entry and fill it in */
+
+    regPtrDsc* regPtrNext = codeGen->gcInfo.gcRegPtrAllocDsc();
+    regPtrNext->rpdGCtype = GCT_GCREF; // Pops need a non-0 value (??)
+
+    regPtrNext->rpdOffs = emitCurCodeOffs(addr);
+    regPtrNext->rpdCall = (isCall || isCallRelatedPop);
+#ifndef JIT32_GCENCODER
+    if (regPtrNext->rpdCall)
+    {
+        assert(isCall || callInstrSize == 0);
+        regPtrNext->rpdCallInstrSize = callInstrSize;
+    }
+#endif
+    regPtrNext->rpdCallGCrefRegs = gcrefRegs;
+    regPtrNext->rpdCallByrefRegs = byrefRegs;
+    regPtrNext->rpdArg           = TRUE;
+    regPtrNext->rpdArgType       = (unsigned short)GCInfo::rpdARG_POP;
+    regPtrNext->rpdPtrArg        = argRecCnt.Value();
+
+#ifdef DEBUG
+    if (EMIT_GC_VERBOSE)
+    {
+        printf("[%08X] ptr arg pop  %u\n", dspPtr(regPtrNext), count);
+    }
+#endif
+}
+
+/*****************************************************************************
+ *  For caller-pop arguments, we report the arguments as pending arguments.
+ *  However, any GC arguments are now dead, so we need to report them
+ *  as non-GC.
+ */
+
+void emitter::emitStackKillArgs(BYTE* addr, unsigned count, unsigned char callInstrSize)
+{
+    assert(count > 0);
+
+    if (emitSimpleStkUsed)
+    {
+        assert(!emitFullGCinfo); // Simple stk not used for emitFullGCInfo
+
+        /* We don't need to report this to the GC info, but we do need
+           to kill mark the ptrs on the stack as non-GC */
+
+        assert(emitCurStackLvl / sizeof(int) >= count);
+
+        for (unsigned lvl = 0; lvl < count; lvl++)
+        {
+            u1.emitSimpleStkMask &= ~(1 << lvl);
+            u1.emitSimpleByrefStkMask &= ~(1 << lvl);
+        }
+    }
+    else
+    {
+        BYTE*    argTrackTop = u2.emitArgTrackTop;
+        S_UINT16 gcCnt(0);
+
+        for (unsigned i = 0; i < count; i++)
+        {
+            assert(argTrackTop > u2.emitArgTrackTab);
+
+            --argTrackTop;
+
+            GCtype gcType = (GCtype)(*argTrackTop);
+            assert(IsValidGCtype(gcType));
+
+            if (needsGC(gcType))
+            {
+                // printf("Killed %s at lvl %u\n", GCtypeStr(gcType), argTrackTop - emitArgTrackTab);
+
+                *argTrackTop = GCT_NONE;
+                gcCnt += 1;
+            }
+        }
+
+        noway_assert(!gcCnt.IsOverflow());
+
+        /* We're about to kill the corresponding (pointer) arg records */
+
+        if (emitHasFramePtr)
+        {
+            u2.emitGcArgTrackCnt -= gcCnt.Value();
+        }
+
+        if (!emitFullGCinfo)
+        {
+            return;
+        }
+
+        /* Right after the call, the arguments are still sitting on the
+           stack, but they are effectively dead. For fully-interruptible
+           methods, we need to report that */
+
+        if (emitFullGCinfo && gcCnt.Value())
+        {
+            /* Allocate a new ptr arg entry and fill it in */
+
+            regPtrDsc* regPtrNext = codeGen->gcInfo.gcRegPtrAllocDsc();
+            regPtrNext->rpdGCtype = GCT_GCREF; // Kills need a non-0 value (??)
+
+            regPtrNext->rpdOffs = emitCurCodeOffs(addr);
+
+            regPtrNext->rpdArg     = TRUE;
+            regPtrNext->rpdArgType = (unsigned short)GCInfo::rpdARG_KILL;
+            regPtrNext->rpdPtrArg  = gcCnt.Value();
+
+#ifdef DEBUG
+            if (EMIT_GC_VERBOSE)
+            {
+                printf("[%08X] ptr arg kill %u\n", dspPtr(regPtrNext), count);
+            }
+#endif
+        }
+
+        /* Now that ptr args have been marked as non-ptrs, we need to record
+           the call itself as one that has no arguments. */
+
+        emitStackPopLargeStk(addr, true, callInstrSize, 0);
+    }
+}
+
+/*****************************************************************************
+ *  A helper for recording a relocation with the EE.
+ */
+void emitter::emitRecordRelocation(void* location,            /* IN */
+                                   void* target,              /* IN */
+                                   WORD  fRelocType,          /* IN */
+                                   WORD  slotNum /* = 0 */,   /* IN */
+                                   INT32 addlDelta /* = 0 */) /* IN */
+{
+    // If we're an unmatched altjit, don't tell the VM anything. We still record the relocation for
+    // late disassembly; maybe we'll need it?
+    if (emitComp->info.compMatchedVM)
+    {
+        emitCmpHandle->recordRelocation(location, target, fRelocType, slotNum, addlDelta);
+    }
+#if defined(LATE_DISASM)
+    codeGen->getDisAssembler().disRecordRelocation((size_t)location, (size_t)target);
+#endif // defined(LATE_DISASM)
+}
+
+/*****************************************************************************
+ *  A helper for recording a call site with the EE.
+ */
+void emitter::emitRecordCallSite(ULONG                 instrOffset,  /* IN */
+                                 CORINFO_SIG_INFO*     callSig,      /* IN */
+                                 CORINFO_METHOD_HANDLE methodHandle) /* IN */
+{
+#if defined(DEBUG)
+    // Since CORINFO_SIG_INFO is a heavyweight structure, in most cases we can
+    // lazily obtain it here using the given method handle (we only save the sig
+    // info when we explicitly need it, i.e. for CALLI calls, vararg calls, and
+    // tail calls).
+    if (callSig == nullptr)
+    {
+        assert(methodHandle != nullptr);
+
+        if (Compiler::eeGetHelperNum(methodHandle) == CORINFO_HELP_UNDEF)
+        {
+            if (emitScratchSigInfo == nullptr)
+            {
+                emitScratchSigInfo = new (emitComp, CMK_CorSig) CORINFO_SIG_INFO;
+            }
+
+            emitComp->eeGetMethodSig(methodHandle, emitScratchSigInfo);
+            callSig = emitScratchSigInfo;
+        }
+    }
+
+    emitCmpHandle->recordCallSite(instrOffset, callSig, methodHandle);
+#endif // defined(DEBUG)
+}
+
+/*****************************************************************************/
+#endif // EMIT_TRACK_STACK_DEPTH
+/*****************************************************************************/
+/*****************************************************************************/
+
+#ifdef DEBUG
+
+/*****************************************************************************
+ *  Given a code offset, return a string representing a label for that offset.
+ *  If the code offset is just after the end of the code of the function, the
+ *  label will be "END". If the code offset doesn't correspond to any known
+ *  offset, the label will be "UNKNOWN". The strings are returned from static
+ *  buffers. This function rotates amongst four such static buffers (there are
+ *  cases where this function is called four times to provide data for a single
+ *  printf()).
+ */
+
+const char* emitter::emitOffsetToLabel(unsigned offs)
+{
+    const size_t    TEMP_BUFFER_LEN = 40;
+    static unsigned curBuf          = 0;
+    static char     buf[4][TEMP_BUFFER_LEN];
+    char*           retbuf;
+
+    insGroup*      ig;
+    UNATIVE_OFFSET of;
+    UNATIVE_OFFSET nextof = 0;
+
+    for (ig = emitIGlist; ig != nullptr; ig = ig->igNext)
+    {
+        assert(nextof == ig->igOffs);
+
+        if (ig->igOffs == offs)
+        {
+            // Found it!
+            sprintf_s(buf[curBuf], TEMP_BUFFER_LEN, "G_M%03u_IG%02u", Compiler::s_compMethodsCount, ig->igNum);
+            retbuf = buf[curBuf];
+            curBuf = (curBuf + 1) % 4;
+            return retbuf;
+        }
+        else if (ig->igOffs > offs)
+        {
+            // We went past the requested offset but didn't find it.
+            sprintf_s(buf[curBuf], TEMP_BUFFER_LEN, "UNKNOWN");
+            retbuf = buf[curBuf];
+            curBuf = (curBuf + 1) % 4;
+            return retbuf;
+        }
+
+        nextof = ig->igOffs + ig->igSize;
+    }
+
+    if (nextof == offs)
+    {
+        // It's a pseudo-label to the end.
+        sprintf_s(buf[curBuf], TEMP_BUFFER_LEN, "END");
+        retbuf = buf[curBuf];
+        curBuf = (curBuf + 1) % 4;
+        return retbuf;
+    }
+    else
+    {
+        sprintf_s(buf[curBuf], TEMP_BUFFER_LEN, "UNKNOWN");
+        retbuf = buf[curBuf];
+        curBuf = (curBuf + 1) % 4;
+        return retbuf;
+    }
+}
+
+#endif // DEBUG
diff --git a/src/jit/emit.h b/src/jit/emit.h
new file mode 100644
index 0000000000..8fb24bcd60
--- /dev/null
+++ b/src/jit/emit.h
@@ -0,0 +1,2742 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+/*****************************************************************************/
+
+#ifndef _EMIT_H_
+#define _EMIT_H_
+
+#include "instr.h"
+
+#ifndef _GCINFO_H_
+#include "gcinfo.h"
+#endif
+
+#include "jitgcinfo.h"
+
+/*****************************************************************************/
+#ifdef TRANSLATE_PDB
+#ifndef _ADDRMAP_INCLUDED_
+#include "addrmap.h"
+#endif
+#ifndef _LOCALMAP_INCLUDED_
+#include "localmap.h"
+#endif
+#ifndef _PDBREWRITE_H_
+#include "pdbrewrite.h"
+#endif
+#endif // TRANSLATE_PDB
+
+/*****************************************************************************/
+#ifdef _MSC_VER
+#pragma warning(disable : 4200) // allow arrays of 0 size inside structs
+#endif
+#define TRACK_GC_TEMP_LIFETIMES 0
+
+/*****************************************************************************/
+
+#if 0
+#define EMITVERBOSE 1
+#else
+#define EMITVERBOSE (emitComp->verbose)
+#endif
+
+#if 0
+#define EMIT_GC_VERBOSE 0
+#else
+#define EMIT_GC_VERBOSE (emitComp->verbose)
+#endif
+
+#if 1
+#define EMIT_INSTLIST_VERBOSE 0
+#else
+#define EMIT_INSTLIST_VERBOSE (emitComp->verbose)
+#endif
+
+/*****************************************************************************/
+
+#ifdef DEBUG
+#define DEBUG_EMIT 1
+#else
+#define DEBUG_EMIT 0
+#endif
+
+#if EMITTER_STATS
+void emitterStats(FILE* fout);
+void emitterStaticStats(FILE* fout); // Static stats about the emitter (data structure offsets, sizes, etc.)
+#endif
+
+void printRegMaskInt(regMaskTP mask);
+
+/*****************************************************************************/
+/* Forward declarations */
+
+class emitLocation;
+class emitter;
+struct insGroup;
+
+typedef void (*emitSplitCallbackType)(void* context, emitLocation* emitLoc);
+
+/*****************************************************************************/
+
+//-----------------------------------------------------------------------------
+
+inline bool needsGC(GCtype gcType)
+{
+    if (gcType == GCT_NONE)
+    {
+        return false;
+    }
+    else
+    {
+        assert(gcType == GCT_GCREF || gcType == GCT_BYREF);
+        return true;
+    }
+}
+
+//-----------------------------------------------------------------------------
+
+#ifdef DEBUG
+
+inline bool IsValidGCtype(GCtype gcType)
+{
+    return (gcType == GCT_NONE || gcType == GCT_GCREF || gcType == GCT_BYREF);
+}
+
+// Get a string name to represent the GC type
+
+inline const char* GCtypeStr(GCtype gcType)
+{
+    switch (gcType)
+    {
+        case GCT_NONE:
+            return "npt";
+        case GCT_GCREF:
+            return "gcr";
+        case GCT_BYREF:
+            return "byr";
+        default:
+            assert(!"Invalid GCtype");
+            return "err";
+    }
+}
+
+#endif // DEBUG
+
+/*****************************************************************************/
+
+#if DEBUG_EMIT
+#define INTERESTING_JUMP_NUM -1 // set to 0 to see all jump info
+//#define INTERESTING_JUMP_NUM    0
+#endif
+
+/*****************************************************************************
+ *
+ *  Represent an emitter location.
+ */
+
+class emitLocation
+{
+public:
+    emitLocation() : ig(nullptr), codePos(0)
+    {
+    }
+
+    emitLocation(insGroup* _ig) : ig(_ig), codePos(0)
+    {
+    }
+
+    emitLocation(void* emitCookie) : ig((insGroup*)emitCookie), codePos(0)
+    {
+    }
+
+    // A constructor for code that needs to call it explicitly.
+    void Init()
+    {
+        this->emitLocation::emitLocation();
+    }
+
+    void CaptureLocation(emitter* emit);
+
+    bool IsCurrentLocation(emitter* emit) const;
+
+    // This function is highly suspect, since it presumes knowledge of the codePos "cookie",
+    // and doesn't look at the 'ig' pointer.
+    bool IsOffsetZero() const
+    {
+        return (codePos == 0);
+    }
+
+    UNATIVE_OFFSET CodeOffset(emitter* emit) const;
+
+    insGroup* GetIG() const
+    {
+        return ig;
+    }
+
+    int GetInsNum() const;
+
+    bool operator!=(const emitLocation& other) const
+    {
+        return (ig != other.ig) || (codePos != other.codePos);
+    }
+
+    bool operator==(const emitLocation& other) const
+    {
+        return !(*this != other);
+    }
+
+    bool Valid() const
+    {
+        // Things we could validate:
+        //   1. the instruction group pointer is non-nullptr.
+        //   2. 'ig' is a legal pointer to an instruction group.
+        //   3. 'codePos' is a legal offset into 'ig'.
+        // Currently, we just do #1.
+        // #2 and #3 should only be done in DEBUG, if they are implemented.
+
+        if (ig == nullptr)
+        {
+            return false;
+        }
+
+        return true;
+    }
+
+#ifdef _TARGET_AMD64_
+    UNATIVE_OFFSET GetFuncletPrologOffset(emitter* emit) const;
+#endif // _TARGET_AMD64_
+
+#ifdef DEBUG
+    void Print() const;
+#endif // DEBUG
+
+private:
+    insGroup* ig;      // the instruction group
+    unsigned  codePos; // the code position within the IG (see emitCurOffset())
+};
+
+/************************************************************************/
+/*          The following describes an instruction group                */
+/************************************************************************/
+
+DECLARE_TYPED_ENUM(insGroupPlaceholderType, unsigned char)
+{
+    IGPT_PROLOG, // currently unused
+        IGPT_EPILOG,
+#if FEATURE_EH_FUNCLETS
+        IGPT_FUNCLET_PROLOG, IGPT_FUNCLET_EPILOG,
+#endif // FEATURE_EH_FUNCLETS
+}
+END_DECLARE_TYPED_ENUM(insGroupPlaceholderType, unsigned char)
+
+#if defined(_MSC_VER) && defined(_TARGET_ARM_)
+// ARM aligns structures that contain 64-bit ints or doubles on 64-bit boundaries. This causes unwanted
+// padding to be added to the end, so sizeof() is unnecessarily big.
+#pragma pack(push)
+#pragma pack(4)
+#endif // defined(_MSC_VER) && defined(_TARGET_ARM_)
+
+struct insPlaceholderGroupData
+{
+    insGroup*               igPhNext;
+    BasicBlock*             igPhBB;
+    VARSET_TP               igPhInitGCrefVars;
+    regMaskTP               igPhInitGCrefRegs;
+    regMaskTP               igPhInitByrefRegs;
+    VARSET_TP               igPhPrevGCrefVars;
+    regMaskTP               igPhPrevGCrefRegs;
+    regMaskTP               igPhPrevByrefRegs;
+    insGroupPlaceholderType igPhType;
+}; // end of struct insPlaceholderGroupData
+
+struct insGroup
+{
+    insGroup* igNext;
+
+#ifdef DEBUG
+    insGroup* igSelf; // for consistency checking
+#endif
+
+    UNATIVE_OFFSET igNum;     // for ordering (and display) purposes
+    UNATIVE_OFFSET igOffs;    // offset of this group within method
+    unsigned int   igFuncIdx; // Which function/funclet does this belong to? (Index into Compiler::compFuncInfos array.)
+    unsigned short igFlags;   // see IGF_xxx below
+    unsigned short igSize;    // # of bytes of code in this group
+
+#define IGF_GC_VARS 0x0001    // new set of live GC ref variables
+#define IGF_BYREF_REGS 0x0002 // new set of live by-ref registers
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+#define IGF_FINALLY_TARGET 0x0004 // this group is the start of a basic block that is returned to after a finally.
+#endif                            // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+#define IGF_FUNCLET_PROLOG 0x0008 // this group belongs to a funclet prolog
+#ifdef DEBUG
+#define IGF_FUNCLET_EPILOG 0x0010 // this group belongs to a funclet epilog. Currently, this is only needed for DEBUG.
+#endif
+#define IGF_EPILOG 0x0020        // this group belongs to a main function epilog
+#define IGF_NOGCINTERRUPT 0x0040 // this IG is is a no-interrupt region (prolog, epilog, etc.)
+#define IGF_UPD_ISZ 0x0080       // some instruction sizes updated
+#define IGF_PLACEHOLDER 0x0100   // this is a placeholder group, to be filled in later
+#define IGF_EMIT_ADD 0x0200      // this is a block added by the emitter
+                                 // because the codegen block was too big. Also used for
+                                 // placeholder IGs that aren't also labels.
+
+// Mask of IGF_* flags that should be propagated to new blocks when they are created.
+// This allows prologs and epilogs to be any number of IGs, but still be
+// automatically marked properly.
+#if FEATURE_EH_FUNCLETS
+#ifdef DEBUG
+#define IGF_PROPAGATE_MASK (IGF_EPILOG | IGF_FUNCLET_PROLOG | IGF_FUNCLET_EPILOG)
+#else // DEBUG
+#define IGF_PROPAGATE_MASK (IGF_EPILOG | IGF_FUNCLET_PROLOG)
+#endif // DEBUG
+#else  // FEATURE_EH_FUNCLETS
+#define IGF_PROPAGATE_MASK (IGF_EPILOG)
+#endif // FEATURE_EH_FUNCLETS
+
+    // Try to do better packing based on how large regMaskSmall is (8, 16, or 64 bits).
+    CLANG_FORMAT_COMMENT_ANCHOR;
+#if REGMASK_BITS <= 32
+
+    union {
+        BYTE*                    igData;   // addr of instruction descriptors
+        insPlaceholderGroupData* igPhData; // when igFlags & IGF_PLACEHOLDER
+    };
+
+#if EMIT_TRACK_STACK_DEPTH
+    unsigned igStkLvl; // stack level on entry
+#endif
+    regMaskSmall  igGCregs; // set of registers with live GC refs
+    unsigned char igInsCnt; // # of instructions  in this group
+
+#else // REGMASK_BITS
+
+    regMaskSmall igGCregs; // set of registers with live GC refs
+
+    union {
+        BYTE*                    igData;   // addr of instruction descriptors
+        insPlaceholderGroupData* igPhData; // when igFlags & IGF_PLACEHOLDER
+    };
+
+#if EMIT_TRACK_STACK_DEPTH
+    unsigned igStkLvl; // stack level on entry
+#endif
+
+    unsigned char igInsCnt; // # of instructions  in this group
+
+#endif // REGMASK_BITS
+
+    VARSET_VALRET_TP igGCvars() const
+    {
+        assert(igFlags & IGF_GC_VARS);
+
+        BYTE* ptr = (BYTE*)igData;
+        ptr -= sizeof(VARSET_TP);
+
+        return *(VARSET_TP*)ptr;
+    }
+
+    unsigned igByrefRegs() const
+    {
+        assert(igFlags & IGF_BYREF_REGS);
+
+        BYTE* ptr = (BYTE*)igData;
+
+        if (igFlags & IGF_GC_VARS)
+        {
+            ptr -= sizeof(VARSET_TP);
+        }
+
+        ptr -= sizeof(unsigned);
+
+        return *(unsigned*)ptr;
+    }
+
+}; // end of struct insGroup
+
+//  For AMD64 the maximum prolog/epilog size supported on the OS is 256 bytes
+//  Since it is incorrect for us to be jumping across funclet prolog/epilogs
+//  we will use the following estimate as the maximum placeholder size.
+//
+#define MAX_PLACEHOLDER_IG_SIZE 256
+
+#if defined(_MSC_VER) && defined(_TARGET_ARM_)
+#pragma pack(pop)
+#endif // defined(_MSC_VER) && defined(_TARGET_ARM_)
+
+/*****************************************************************************/
+
+#define DEFINE_ID_OPS
+#include "emitfmts.h"
+#undef DEFINE_ID_OPS
+
+enum LclVarAddrTag
+{
+    LVA_STANDARD_ENCODING = 0,
+    LVA_LARGE_OFFSET      = 1,
+    LVA_COMPILER_TEMP     = 2,
+    LVA_LARGE_VARNUM      = 3
+};
+
+struct emitLclVarAddr
+{
+    // Constructor
+    void initLclVarAddr(int varNum, unsigned offset);
+
+    int lvaVarNum(); // Returns the variable to access. Note that it returns a negative number for compiler spill temps.
+    unsigned lvaOffset(); // returns the offset into the variable to access
+
+    // This struct should be 32 bits in size for the release build.
+    // We have this constraint because this type is used in a union
+    // with several other pointer sized types in the instrDesc struct.
+    //
+protected:
+    unsigned _lvaVarNum : 15; // Usually the lvaVarNum
+    unsigned _lvaExtra : 15;  // Usually the lvaOffset
+    unsigned _lvaTag : 2;     // tag field to support larger varnums
+};
+
+enum idAddrUnionTag
+{
+    iaut_ALIGNED_POINTER = 0x0,
+    iaut_DATA_OFFSET     = 0x1,
+    iaut_INST_COUNT      = 0x2,
+    iaut_UNUSED_TAG      = 0x3,
+
+    iaut_MASK  = 0x3,
+    iaut_SHIFT = 2
+};
+
+class emitter
+{
+    friend class emitLocation;
+    friend class Compiler;
+    friend class CodeGen;
+    friend class CodeGenInterface;
+
+public:
+    /*************************************************************************
+     *
+     *  Define the public entry points.
+     */
+
+    // Constructor.
+    emitter()
+    {
+#ifdef DEBUG
+        // There seem to be some cases where this is used without being initialized via CodeGen::inst_set_SV_var().
+        emitVarRefOffs = 0;
+#endif // DEBUG
+#ifdef FEATURE_AVX_SUPPORT
+        SetUseAVX(false);
+#endif // FEATURE_AVX_SUPPORT
+    }
+
+#include "emitpub.h"
+
+protected:
+    /************************************************************************/
+    /*                        Miscellaneous stuff                           */
+    /************************************************************************/
+
+    Compiler* emitComp;
+    GCInfo*   gcInfo;
+    CodeGen*  codeGen;
+
+    typedef GCInfo::varPtrDsc varPtrDsc;
+    typedef GCInfo::regPtrDsc regPtrDsc;
+    typedef GCInfo::CallDsc   callDsc;
+
+    void* emitGetMem(size_t sz);
+
+    DECLARE_TYPED_ENUM(opSize, unsigned)
+    {
+        OPSZ1 = 0, OPSZ2 = 1, OPSZ4 = 2, OPSZ8 = 3, OPSZ16 = 4, OPSZ32 = 5, OPSZ_COUNT = 6,
+#ifdef _TARGET_AMD64_
+        OPSZP = OPSZ8,
+#else
+        OPSZP = OPSZ4,
+#endif
+    }
+    END_DECLARE_TYPED_ENUM(opSize, unsigned)
+
+#define OPSIZE_INVALID ((opSize)0xffff)
+
+    static const emitter::opSize emitSizeEncode[];
+    static const emitAttr        emitSizeDecode[];
+
+    static emitter::opSize emitEncodeSize(emitAttr size);
+    static emitAttr emitDecodeSize(emitter::opSize ensz);
+
+    // Currently, we only allow one IG for the prolog
+    bool emitIGisInProlog(const insGroup* ig)
+    {
+        return ig == emitPrologIG;
+    }
+
+    bool emitIGisInEpilog(const insGroup* ig)
+    {
+        return (ig != nullptr) && ((ig->igFlags & IGF_EPILOG) != 0);
+    }
+
+#if FEATURE_EH_FUNCLETS
+
+    bool emitIGisInFuncletProlog(const insGroup* ig)
+    {
+        return (ig != nullptr) && ((ig->igFlags & IGF_FUNCLET_PROLOG) != 0);
+    }
+
+#ifdef DEBUG
+    bool emitIGisInFuncletEpilog(const insGroup* ig)
+    {
+        return (ig != nullptr) && ((ig->igFlags & IGF_FUNCLET_EPILOG) != 0);
+    }
+#endif // DEBUG
+#endif // FEATURE_EH_FUNCLETS
+
+    // If "ig" corresponds to the start of a basic block that is the
+    // target of a funclet return, generate GC information for it's start
+    // address "cp", as if it were the return address of a call.
+    void emitGenGCInfoIfFuncletRetTarget(insGroup* ig, BYTE* cp);
+
+    void emitRecomputeIGoffsets();
+
+    /************************************************************************/
+    /*          The following describes a single instruction                */
+    /************************************************************************/
+
+    DECLARE_TYPED_ENUM(insFormat, unsigned)
+    {
+#define IF_DEF(en, op1, op2) IF_##en,
+#include "emitfmts.h"
+
+        IF_COUNT
+    }
+    END_DECLARE_TYPED_ENUM(insFormat, unsigned)
+
+#define AM_DISP_BITS ((sizeof(unsigned) * 8) - 2 * (REGNUM_BITS + 1) - 2)
+#define AM_DISP_BIG_VAL (-(1 << (AM_DISP_BITS - 1)))
+#define AM_DISP_MIN (-((1 << (AM_DISP_BITS - 1)) - 1))
+#define AM_DISP_MAX (+((1 << (AM_DISP_BITS - 1)) - 1))
+
+    struct emitAddrMode
+    {
+        regNumber       amBaseReg : REGNUM_BITS + 1;
+        regNumber       amIndxReg : REGNUM_BITS + 1;
+        emitter::opSize amScale : 2;
+        int             amDisp : AM_DISP_BITS;
+    };
+
+#if defined(DEBUG) || defined(LATE_DISASM) // LATE_DISASM needs the idMemCookie on calls to display the call target name
+
+    struct instrDesc;
+
+    struct instrDescDebugInfo
+    {
+        unsigned idNum;
+        size_t   idSize;       // size of the instruction descriptor
+        unsigned idVarRefOffs; // IL offset for LclVar reference
+        size_t   idMemCookie;  // for display of member names in addr modes
+        void*    idClsCookie;  // for display of member names in addr modes
+#ifdef TRANSLATE_PDB
+        unsigned int idilStart; // instruction descriptor source information for PDB translation
+#endif
+        bool              idFinallyCall; // Branch instruction is a call to finally
+        bool              idCatchRet;    // Instruction is for a catch 'return'
+        CORINFO_SIG_INFO* idCallSig;     // Used to report native call site signatures to the EE
+    };
+
+#endif // defined(DEBUG) || defined(LATE_DISASM)
+
+#ifdef _TARGET_ARM_
+    unsigned insEncodeSetFlags(insFlags sf);
+
+    DECLARE_TYPED_ENUM(insSize, unsigned)
+    {
+        ISZ_16BIT, ISZ_32BIT, ISZ_48BIT // pseudo-instruction for conditional branch with imm24 range,
+                                        // encoded as IT of condition followed by an unconditional branch
+    }
+    END_DECLARE_TYPED_ENUM(insSize, unsigned)
+
+    unsigned insEncodeShiftOpts(insOpts opt);
+    unsigned insEncodePUW_G0(insOpts opt, int imm);
+    unsigned insEncodePUW_H0(insOpts opt, int imm);
+
+#endif // _TARGET_ARM_
+
+#if defined(_TARGET_X86_) && defined(LEGACY_BACKEND)
+#define HAS_TINY_DESC 1
+#else
+#define HAS_TINY_DESC 0
+#endif
+
+    struct instrDescCns;
+
+    struct instrDesc
+    {
+    private:
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+        // The assembly instruction
+        instruction _idIns : 9;
+#else  // !defined(_TARGET_XARCH_) || defined(LEGACY_BACKEND)
+        // The assembly instruction
+        instruction _idIns : 8;
+#endif // !defined(_TARGET_XARCH_) || defined(LEGACY_BACKEND)
+        // The format for the instruction
+        insFormat _idInsFmt : 8;
+
+    public:
+        instruction idIns() const
+        {
+            return _idIns;
+        }
+        void idIns(instruction ins)
+        {
+            _idIns = ins;
+            assert(_idIns == ins);
+        }
+
+        insFormat idInsFmt() const
+        {
+            return _idInsFmt;
+        }
+        void idInsFmt(insFormat insFmt)
+        {
+#if defined(_TARGET_ARM64_)
+            noway_assert(insFmt != IF_NONE); // Only the x86 emitter uses IF_NONE, it is invalid for ARM64 (and ARM32)
+#endif
+            _idInsFmt = insFmt;
+            assert(_idInsFmt == insFmt);
+        }
+
+        /*
+            The idReg1 and idReg2 fields hold the first and second register
+            operand(s), whenever these are present. Note that the size of
+            these fields ranges from 3 to 6 bits, and care needs to be taken
+            to make sure all of these fields stay reasonably packed.
+         */
+
+        void idSetRelocFlags(emitAttr attr)
+        {
+            _idCnsReloc = (EA_IS_CNS_RELOC(attr) ? 1 : 0);
+            _idDspReloc = (EA_IS_DSP_RELOC(attr) ? 1 : 0);
+        }
+
+        ////////////////////////////////////////////////////////////////////////
+        // Space taken up to here:
+        // x86:   16 bits
+        // amd64: 17 bits
+        // arm:   16 bits
+        // arm64: 16 bits
+
+    private:
+#ifdef _TARGET_XARCH_
+        unsigned _idCodeSize : 4; // size of instruction in bytes
+#endif
+
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+        opSize _idOpSize : 3; // operand size: 0=1 , 1=2 , 2=4 , 3=8, 4=16, 5=32
+                              // At this point we have fully consumed first DWORD so that next field
+                              // doesn't cross a byte boundary.
+#elif defined(_TARGET_ARM64_)
+// Moved the definition of '_idOpSize' later so that we don't cross a 32-bit boundary when laying out bitfields
+#else  // ARM or x86-LEGACY_BACKEND
+        opSize _idOpSize : 2; // operand size: 0=1 , 1=2 , 2=4 , 3=8
+#endif // ARM or x86-LEGACY_BACKEND
+
+        // On Amd64, this is where the second DWORD begins
+        // On System V a call could return a struct in 2 registers. The instrDescCGCA struct below has  member that
+        // stores the GC-ness of the second register.
+        // It is added to the instrDescCGCA and not here (the base struct) since it is not needed by all the
+        // instructions. This struct (instrDesc) is very carefully kept to be no more than 128 bytes. There is no more
+        // space to add members for keeping GC-ness of the second return registers. It will also bloat the base struct
+        // unnecessarily since the GC-ness of the second register is only needed for call instructions.
+        // The instrDescCGCA struct's member keeping the GC-ness of the first return register is _idcSecondRetRegGCType.
+        GCtype _idGCref : 2; // GCref operand? (value is a "GCtype")
+
+        // Note that we use the _idReg1 and _idReg2 fields to hold
+        // the live gcrefReg mask for the call instructions on x86/x64
+        //
+        regNumber _idReg1 : REGNUM_BITS; // register num
+
+        regNumber _idReg2 : REGNUM_BITS;
+
+        ////////////////////////////////////////////////////////////////////////
+        // Space taken up to here:
+        // x86:   30 bits
+        // amd64: 38 bits
+        // arm:   32 bits
+        // arm64: 30 bits
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if HAS_TINY_DESC
+        //
+        // For x86 use last two bits to differentiate if we are tiny or small
+        //
+        unsigned _idTinyDsc : 1;  // is this a "tiny"  descriptor?
+        unsigned _idSmallDsc : 1; // is this a "small" descriptor?
+
+#else // !HAS_TINY_DESC
+
+        //
+        // On x86/arm platforms we have used 32 bits so far (4 bytes)
+        // On amd64 we have used 38 bits so far (4 bytes + 6 bits)
+        //
+
+        //
+        // For amd64 we just can't fit anything useful into a single DWORD
+        // So we eliminate the notion of 'tiny', and have small (2 DWORDS)
+        // or not small (which is bigger, just like x86)
+        //
+
+        unsigned _idSmallDsc : 1;  // is this a "small" descriptor?
+        unsigned _idLargeCns : 1;  // does a large constant     follow?
+        unsigned _idLargeDsp : 1;  // does a large displacement follow?
+        unsigned _idLargeCall : 1; // large call descriptor used
+
+        unsigned _idBound : 1;      // jump target / frame offset bound
+        unsigned _idCallRegPtr : 1; // IL indirect calls: addr in reg
+        unsigned _idCallAddr : 1;   // IL indirect calls: can make a direct call to iiaAddr
+        unsigned _idNoGC : 1;       // Some helpers don't get recorded in GC tables
+
+#ifdef _TARGET_ARM64_
+        opSize   _idOpSize : 3;     // operand size: 0=1 , 1=2 , 2=4 , 3=8, 4=16
+        insOpts  _idInsOpt : 6;     // options for instructions
+        unsigned _idLclVar : 1;     // access a local on stack
+#endif
+
+#ifdef _TARGET_ARM_
+        insSize  _idInsSize : 2;    // size of instruction: 16, 32 or 48 bits
+        insFlags _idInsFlags : 1;   // will this instruction set the flags
+        unsigned _idLclVar : 1;     // access a local on stack
+        unsigned _idLclFPBase : 1;  // access a local on stack - SP based offset
+        insOpts  _idInsOpt : 3;     // options for Load/Store instructions
+
+// For arm we have used 16 bits
+#define ID_EXTRA_BITFIELD_BITS (16)
+
+#elif defined(_TARGET_ARM64_)
+// For Arm64, we have used 15 bits from the second DWORD.
+#define ID_EXTRA_BITFIELD_BITS (16)
+#elif defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+// For xarch !LEGACY_BACKEND, we have used 14 bits from the second DWORD.
+#define ID_EXTRA_BITFIELD_BITS (14)
+#elif defined(_TARGET_X86_)
+// For x86, we have used 6 bits from the second DWORD.
+#define ID_EXTRA_BITFIELD_BITS (6)
+#else
+#error Unsupported or unset target architecture
+#endif
+
+        ////////////////////////////////////////////////////////////////////////
+        // Space taken up to here:
+        // x86:   38 bits  // if HAS_TINY_DESC is not defined (which it is)
+        // amd64: 46 bits
+        // arm:   48 bits
+        // arm64: 48 bits
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef RELOC_SUPPORT
+
+        unsigned _idCnsReloc : 1; // LargeCns is an RVA and needs reloc tag
+        unsigned _idDspReloc : 1; // LargeDsp is an RVA and needs reloc tag
+
+#define ID_EXTRA_RELOC_BITS (2)
+
+#else // RELOC_SUPPORT
+
+#define ID_EXTRA_RELOC_BITS (0)
+
+#endif // RELOC_SUPPORT
+
+        ////////////////////////////////////////////////////////////////////////
+        // Space taken up to here (assuming RELOC_SUPPORT):
+        // x86:   40 bits
+        // amd64: 48 bits
+        // arm:   50 bits
+        // arm64: 50 bits
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#define ID_EXTRA_BITS (ID_EXTRA_RELOC_BITS + ID_EXTRA_BITFIELD_BITS)
+
+/* Use whatever bits are left over for small constants */
+
+#define ID_BIT_SMALL_CNS (32 - ID_EXTRA_BITS)
+#define ID_MIN_SMALL_CNS 0
+#define ID_MAX_SMALL_CNS (int)((1 << ID_BIT_SMALL_CNS) - 1U)
+
+        ////////////////////////////////////////////////////////////////////////
+        // Small constant size (assuming RELOC_SUPPORT):
+        // x86:   24 bits
+        // amd64: 16 bits
+        // arm:   14 bits
+        // arm64: 14 bits
+
+        unsigned _idSmallCns : ID_BIT_SMALL_CNS;
+
+        ////////////////////////////////////////////////////////////////////////
+        // Space taken up to here (with RELOC_SUPPORT): 64 bits, all architectures, by design.
+        ////////////////////////////////////////////////////////////////////////
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#endif // !HAS_TINY_DESC
+
+#if defined(DEBUG) || defined(LATE_DISASM)
+
+        instrDescDebugInfo* _idDebugOnlyInfo;
+
+    public:
+        instrDescDebugInfo* idDebugOnlyInfo() const
+        {
+            return _idDebugOnlyInfo;
+        }
+        void idDebugOnlyInfo(instrDescDebugInfo* info)
+        {
+            _idDebugOnlyInfo = info;
+        }
+
+    private:
+#endif // defined(DEBUG) || defined(LATE_DISASM)
+
+        //
+        // This is the end of the smallest instrDesc we can allocate for all
+        //   platforms.
+        // Non-DEBUG sizes:
+        //   x86: 32 bits, and it is called the 'tiny' descriptor.
+        //   amd64/arm/arm64: 64 bits, and it is called the 'small' descriptor.
+        // DEBUG sizes (includes one pointer):
+        //   x86:   2 DWORDs, 64 bits
+        //   amd64: 4 DWORDs, 128 bits
+        //   arm:   3 DWORDs, 96 bits
+        //   arm64: 4 DWORDs, 128 bits
+        // There should no padding or alignment issues on any platform or
+        //   configuration (including DEBUG which has 1 extra pointer).
+        //
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if HAS_TINY_DESC
+
+        unsigned _idLargeCns : 1;  // does a large constant     follow?
+        unsigned _idLargeDsp : 1;  // does a large displacement follow?
+        unsigned _idLargeCall : 1; // large call descriptor used
+        unsigned _idBound : 1;     // jump target / frame offset bound
+
+        unsigned _idCallRegPtr : 1; // IL indirect calls: addr in reg
+        unsigned _idCallAddr : 1;   // IL indirect calls: can make a direct call to iiaAddr
+        unsigned _idNoGC : 1;       // Some helpers don't get recorded in GC tables
+
+#define ID_EXTRA_BITFIELD_BITS (7)
+
+//
+// For x86, we are using  7 bits from the second DWORD for bitfields.
+//
+
+#ifdef RELOC_SUPPORT
+
+        unsigned _idCnsReloc : 1; // LargeCns is an RVA and needs reloc tag
+        unsigned _idDspReloc : 1; // LargeDsp is an RVA and needs reloc tag
+
+#define ID_EXTRA_RELOC_BITS (2)
+
+#else // RELOC_SUPPORT
+
+#define ID_EXTRA_RELOC_BITS (0)
+
+#endif // RELOC_SUPPORT
+
+#define ID_EXTRA_REG_BITS (0)
+
+#define ID_EXTRA_BITS (ID_EXTRA_BITFIELD_BITS + ID_EXTRA_RELOC_BITS + ID_EXTRA_REG_BITS)
+
+/* Use whatever bits are left over for small constants */
+
+#define ID_BIT_SMALL_CNS (32 - ID_EXTRA_BITS)
+#define ID_MIN_SMALL_CNS 0
+#define ID_MAX_SMALL_CNS (int)((1 << ID_BIT_SMALL_CNS) - 1U)
+
+        // For x86 (assuming RELOC_SUPPORT) we have 23 bits remaining for the
+        //   small constant in this extra DWORD.
+
+        unsigned _idSmallCns : ID_BIT_SMALL_CNS;
+
+#endif // HAS_TINY_DESC
+
+//
+// This is the end of the 'small' instrDesc which is the same on all
+//   platforms (except 64-bit DEBUG which is a little bigger).
+// Non-DEBUG sizes:
+//   x86/amd64/arm/arm64: 64 bits
+// DEBUG sizes (includes one pointer):
+//   x86:   2 DWORDs, 64 bits
+//   amd64: 4 DWORDs, 128 bits
+//   arm:   3 DWORDs, 96 bits
+//   arm64: 4 DWORDs, 128 bits
+// There should no padding or alignment issues on any platform or
+//   configuration (including DEBUG which has 1 extra pointer).
+//
+
+/*
+    If you add lots more fields that need to be cleared (such
+    as various flags), you might need to update the body of
+    emitter::emitAllocInstr() to clear them.
+ */
+
+#if defined(DEBUG) || defined(LATE_DISASM)
+#define TINY_IDSC_DEBUG_EXTRA (sizeof(void*))
+#else
+#define TINY_IDSC_DEBUG_EXTRA (0)
+#endif
+
+#if HAS_TINY_DESC
+#define TINY_IDSC_SIZE (4 + TINY_IDSC_DEBUG_EXTRA)
+#define SMALL_IDSC_SIZE (8 + TINY_IDSC_DEBUG_EXTRA)
+#else
+#define TINY_IDSC_SIZE (8 + TINY_IDSC_DEBUG_EXTRA)
+#define SMALL_IDSC_SIZE TINY_IDSC_SIZE
+#endif
+
+        void checkSizes();
+
+        union idAddrUnion {
+            // TODO-Cleanup: We should really add a DEBUG-only tag to this union so we can add asserts
+            // about reading what we think is here, to avoid unexpected corruption issues.
+
+            emitLclVarAddr iiaLclVar;
+            BasicBlock*    iiaBBlabel;
+            insGroup*      iiaIGlabel;
+            BYTE*          iiaAddr;
+            emitAddrMode   iiaAddrMode;
+
+            CORINFO_FIELD_HANDLE iiaFieldHnd; // iiaFieldHandle is also used to encode
+                                              // an offset into the JIT data constant area
+            bool iiaIsJitDataOffset() const;
+            int  iiaGetJitDataOffset() const;
+
+#ifdef _TARGET_ARMARCH_
+
+            // iiaEncodedInstrCount and its accessor functions are used to specify an instruction
+            // count for jumps, instead of using a label and multiple blocks. This is used in the
+            // prolog as well as for IF_LARGEJMP pseudo-branch instructions.
+            int iiaEncodedInstrCount;
+
+            bool iiaHasInstrCount() const
+            {
+                return (iiaEncodedInstrCount & iaut_MASK) == iaut_INST_COUNT;
+            }
+            int iiaGetInstrCount() const
+            {
+                assert(iiaHasInstrCount());
+                return (iiaEncodedInstrCount >> iaut_SHIFT);
+            }
+            void iiaSetInstrCount(int count)
+            {
+                assert(abs(count) < 10);
+                iiaEncodedInstrCount = (count << iaut_SHIFT) | iaut_INST_COUNT;
+            }
+
+            struct
+            {
+                regNumber _idReg3 : REGNUM_BITS;
+                regNumber _idReg4 : REGNUM_BITS;
+#ifdef _TARGET_ARM64_
+                unsigned _idReg3Scaled : 1; // Reg3 is scaled by idOpSize bits
+#endif
+            };
+#elif defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+            struct
+            {
+                regNumber _idReg3 : REGNUM_BITS;
+            };
+#endif // defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+
+        } _idAddrUnion;
+
+        /* Trivial wrappers to return properly typed enums */
+    public:
+#if HAS_TINY_DESC
+
+        bool idIsTiny() const
+        {
+            return (_idTinyDsc != 0);
+        }
+        void idSetIsTiny()
+        {
+            _idTinyDsc = 1;
+        }
+
+#else
+
+        bool idIsTiny() const
+        {
+            return false;
+        }
+        void idSetIsTiny()
+        {
+            _idSmallDsc = 1;
+        }
+
+#endif // HAS_TINY_DESC
+
+        bool idIsSmallDsc() const
+        {
+            return (_idSmallDsc != 0);
+        }
+        void idSetIsSmallDsc()
+        {
+            _idSmallDsc = 1;
+        }
+
+#if defined(_TARGET_XARCH_)
+
+        unsigned idCodeSize() const
+        {
+            return _idCodeSize;
+        }
+        void idCodeSize(unsigned sz)
+        {
+            _idCodeSize = sz;
+            assert(sz == _idCodeSize);
+        }
+
+#elif defined(_TARGET_ARM64_)
+        unsigned idCodeSize() const
+        {
+            int size = 4;
+            switch (idInsFmt())
+            {
+                case IF_LARGEADR:
+                // adrp + add
+                case IF_LARGEJMP:
+                    // b<cond> + b<uncond>
+                    size = 8;
+                    break;
+                case IF_LARGELDC:
+                    if (isVectorRegister(idReg1()))
+                    {
+                        // adrp + ldr + fmov
+                        size = 12;
+                    }
+                    else
+                    {
+                        // adrp + ldr
+                        size = 8;
+                    }
+                    break;
+                default:
+                    break;
+            }
+
+            return size;
+        }
+
+#elif defined(_TARGET_ARM_)
+
+        bool idInstrIsT1() const
+        {
+            return (_idInsSize == ISZ_16BIT);
+        }
+        unsigned idCodeSize() const
+        {
+            unsigned result = (_idInsSize == ISZ_16BIT) ? 2 : (_idInsSize == ISZ_32BIT) ? 4 : 6;
+            return result;
+        }
+        insSize idInsSize() const
+        {
+            return _idInsSize;
+        }
+        void idInsSize(insSize isz)
+        {
+            _idInsSize = isz;
+            assert(isz == _idInsSize);
+        }
+        insFlags idInsFlags() const
+        {
+            return _idInsFlags;
+        }
+        void idInsFlags(insFlags sf)
+        {
+            _idInsFlags = sf;
+            assert(sf == _idInsFlags);
+        }
+#endif // _TARGET_ARM_
+
+        emitAttr idOpSize()
+        {
+            return emitDecodeSize(_idOpSize);
+        }
+        void idOpSize(emitAttr opsz)
+        {
+            _idOpSize = emitEncodeSize(opsz);
+        }
+
+        GCtype idGCref() const
+        {
+            return (GCtype)_idGCref;
+        }
+        void idGCref(GCtype gctype)
+        {
+            _idGCref = gctype;
+        }
+
+        regNumber idReg1() const
+        {
+            return _idReg1;
+        }
+        void idReg1(regNumber reg)
+        {
+            _idReg1 = reg;
+            assert(reg == _idReg1);
+        }
+
+        regNumber idReg2() const
+        {
+            return _idReg2;
+        }
+        void idReg2(regNumber reg)
+        {
+            _idReg2 = reg;
+            assert(reg == _idReg2);
+        }
+
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+        regNumber idReg3() const
+        {
+            assert(!idIsTiny());
+            assert(!idIsSmallDsc());
+            return idAddr()->_idReg3;
+        }
+        void idReg3(regNumber reg)
+        {
+            assert(!idIsTiny());
+            assert(!idIsSmallDsc());
+            idAddr()->_idReg3 = reg;
+            assert(reg == idAddr()->_idReg3);
+        }
+#endif // defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+#ifdef _TARGET_ARMARCH_
+        insOpts idInsOpt() const
+        {
+            return (insOpts)_idInsOpt;
+        }
+        void idInsOpt(insOpts opt)
+        {
+            _idInsOpt = opt;
+            assert(opt == _idInsOpt);
+        }
+
+        regNumber idReg3() const
+        {
+            assert(!idIsTiny());
+            assert(!idIsSmallDsc());
+            return idAddr()->_idReg3;
+        }
+        void idReg3(regNumber reg)
+        {
+            assert(!idIsTiny());
+            assert(!idIsSmallDsc());
+            idAddr()->_idReg3 = reg;
+            assert(reg == idAddr()->_idReg3);
+        }
+        regNumber idReg4() const
+        {
+            assert(!idIsTiny());
+            assert(!idIsSmallDsc());
+            return idAddr()->_idReg4;
+        }
+        void idReg4(regNumber reg)
+        {
+            assert(!idIsTiny());
+            assert(!idIsSmallDsc());
+            idAddr()->_idReg4 = reg;
+            assert(reg == idAddr()->_idReg4);
+        }
+#ifdef _TARGET_ARM64_
+        bool idReg3Scaled() const
+        {
+            assert(!idIsTiny());
+            assert(!idIsSmallDsc());
+            return (idAddr()->_idReg3Scaled == 1);
+        }
+        void idReg3Scaled(bool val)
+        {
+            assert(!idIsTiny());
+            assert(!idIsSmallDsc());
+            idAddr()->_idReg3Scaled = val ? 1 : 0;
+        }
+#endif // _TARGET_ARM64_
+
+#endif // _TARGET_ARMARCH_
+
+        inline static bool fitsInSmallCns(ssize_t val)
+        {
+            return ((val >= ID_MIN_SMALL_CNS) && (val <= ID_MAX_SMALL_CNS));
+        }
+
+        bool idIsLargeCns() const
+        {
+            assert(!idIsTiny());
+            return _idLargeCns != 0;
+        }
+        void idSetIsLargeCns()
+        {
+            assert(!idIsTiny());
+            _idLargeCns = 1;
+        }
+
+        bool idIsLargeDsp() const
+        {
+            assert(!idIsTiny());
+            return _idLargeDsp != 0;
+        }
+        void idSetIsLargeDsp()
+        {
+            assert(!idIsTiny());
+            _idLargeDsp = 1;
+        }
+        void idSetIsSmallDsp()
+        {
+            assert(!idIsTiny());
+            _idLargeDsp = 0;
+        }
+
+        bool idIsLargeCall() const
+        {
+            assert(!idIsTiny());
+            return _idLargeCall != 0;
+        }
+        void idSetIsLargeCall()
+        {
+            assert(!idIsTiny());
+            _idLargeCall = 1;
+        }
+
+        bool idIsBound() const
+        {
+            assert(!idIsTiny());
+            return _idBound != 0;
+        }
+        void idSetIsBound()
+        {
+            assert(!idIsTiny());
+            _idBound = 1;
+        }
+
+        bool idIsCallRegPtr() const
+        {
+            assert(!idIsTiny());
+            return _idCallRegPtr != 0;
+        }
+        void idSetIsCallRegPtr()
+        {
+            assert(!idIsTiny());
+            _idCallRegPtr = 1;
+        }
+
+        bool idIsCallAddr() const
+        {
+            assert(!idIsTiny());
+            return _idCallAddr != 0;
+        }
+        void idSetIsCallAddr()
+        {
+            assert(!idIsTiny());
+            _idCallAddr = 1;
+        }
+
+        // Only call instructions that call helper functions may be marked as "IsNoGC", indicating
+        // that a thread executing such a call cannot be stopped for GC.  Thus, in partially-interruptible
+        // code, it is not necessary to generate GC info for a call so labeled.
+        bool idIsNoGC() const
+        {
+            assert(!idIsTiny());
+            return _idNoGC != 0;
+        }
+        void idSetIsNoGC(bool val)
+        {
+            assert(!idIsTiny());
+            _idNoGC = val;
+        }
+
+#ifdef _TARGET_ARMARCH_
+        bool idIsLclVar() const
+        {
+            return !idIsTiny() && _idLclVar != 0;
+        }
+        void idSetIsLclVar()
+        {
+            assert(!idIsTiny());
+            _idLclVar = 1;
+        }
+#endif // _TARGET_ARMARCH_
+
+#if defined(_TARGET_ARM_)
+        bool idIsLclFPBase() const
+        {
+            return !idIsTiny() && _idLclFPBase != 0;
+        }
+        void idSetIsLclFPBase()
+        {
+            assert(!idIsTiny());
+            _idLclFPBase = 1;
+        }
+#endif // defined(_TARGET_ARM_)
+
+#ifdef RELOC_SUPPORT
+
+        bool idIsCnsReloc() const
+        {
+            assert(!idIsTiny());
+            return _idCnsReloc != 0;
+        }
+        void idSetIsCnsReloc()
+        {
+            assert(!idIsTiny());
+            _idCnsReloc = 1;
+        }
+
+        bool idIsDspReloc() const
+        {
+            assert(!idIsTiny());
+            return _idDspReloc != 0;
+        }
+        void idSetIsDspReloc(bool val = true)
+        {
+            assert(!idIsTiny());
+            _idDspReloc = val;
+        }
+        bool idIsReloc()
+        {
+            return idIsDspReloc() || idIsCnsReloc();
+        }
+
+#endif
+
+        unsigned idSmallCns() const
+        {
+            assert(!idIsTiny());
+            return _idSmallCns;
+        }
+        void idSmallCns(size_t value)
+        {
+            assert(!idIsTiny());
+            assert(fitsInSmallCns(value));
+            _idSmallCns = value;
+        }
+
+        inline const idAddrUnion* idAddr() const
+        {
+            assert(!idIsSmallDsc() && !idIsTiny());
+            return &this->_idAddrUnion;
+        }
+
+        inline idAddrUnion* idAddr()
+        {
+            assert(!idIsSmallDsc() && !idIsTiny());
+            return &this->_idAddrUnion;
+        }
+    }; // End of  struct instrDesc
+
+    void dispIns(instrDesc* id);
+
+    void appendToCurIG(instrDesc* id);
+
+    /********************************************************************************************/
+
+    struct instrDescJmp : instrDesc
+    {
+        instrDescJmp* idjNext; // next jump in the group/method
+        insGroup*     idjIG;   // containing group
+
+        union {
+            BYTE* idjAddr; // address of jump ins (for patching)
+        } idjTemp;
+
+        unsigned idjOffs : 30;    // Before jump emission, this is the byte offset within IG of the jump instruction.
+                                  // After emission, for forward jumps, this is the target offset -- in bytes from the
+                                  // beginning of the function -- of the target instruction of the jump, used to
+                                  // determine if this jump needs to be patched.
+        unsigned idjShort : 1;    // is the jump known to be a short  one?
+        unsigned idjKeepLong : 1; // should the jump be kept long? (used for
+                                  // hot to cold and cold to hot jumps)
+    };
+
+#if !defined(_TARGET_ARM64_) // This shouldn't be needed for ARM32, either, but I don't want to touch the ARM32 JIT.
+    struct instrDescLbl : instrDescJmp
+    {
+        emitLclVarAddr dstLclVar;
+    };
+#endif // !_TARGET_ARM64_
+
+    struct instrDescCns : instrDesc // large const
+    {
+        ssize_t idcCnsVal;
+    };
+
+    struct instrDescDsp : instrDesc // large displacement
+    {
+        ssize_t iddDspVal;
+    };
+
+    struct instrDescCnsDsp : instrDesc // large cons + disp
+    {
+        ssize_t iddcCnsVal;
+        int     iddcDspVal;
+    };
+
+    struct instrDescAmd : instrDesc // large addrmode disp
+    {
+        ssize_t idaAmdVal;
+    };
+
+    struct instrDescCnsAmd : instrDesc // large cons + addrmode disp
+    {
+        ssize_t idacCnsVal;
+        ssize_t idacAmdVal;
+    };
+
+    struct instrDescCGCA : instrDesc // call with ...
+    {
+        VARSET_TP idcGCvars;    // ... updated GC vars or
+        ssize_t   idcDisp;      // ... big addrmode disp
+        regMaskTP idcGcrefRegs; // ... gcref registers
+        regMaskTP idcByrefRegs; // ... byref registers
+        unsigned  idcArgCnt;    // ... lots of args or (<0 ==> caller pops args)
+
+#if MULTIREG_HAS_SECOND_GC_RET
+        // This method handle the GC-ness of the second register in a 2 register returned struct on System V.
+        GCtype idSecondGCref() const
+        {
+            return (GCtype)_idcSecondRetRegGCType;
+        }
+        void idSecondGCref(GCtype gctype)
+        {
+            _idcSecondRetRegGCType = gctype;
+        }
+
+    private:
+        // This member stores the GC-ness of the second register in a 2 register returned struct on System V.
+        // It is added to the call struct since it is not needed by the base instrDesc struct, which keeps GC-ness
+        // of the first register for the instCall nodes.
+        // The base instrDesc is very carefully kept to be no more than 128 bytes. There is no more space to add members
+        // for keeping GC-ness of the second return registers. It will also bloat the base struct unnecessarily
+        // since the GC-ness of the second register is only needed for call instructions.
+        // The base struct's member keeping the GC-ness of the first return register is _idGCref.
+        GCtype _idcSecondRetRegGCType : 2; // ... GC type for the second return register.
+#endif                                     // MULTIREG_HAS_SECOND_GC_RET
+    };
+
+    struct instrDescArmFP : instrDesc
+    {
+        regNumber r1;
+        regNumber r2;
+        regNumber r3;
+    };
+
+    insUpdateModes emitInsUpdateMode(instruction ins);
+    insFormat emitInsModeFormat(instruction ins, insFormat base);
+
+    static const BYTE emitInsModeFmtTab[];
+#ifdef DEBUG
+    static const unsigned emitInsModeFmtCnt;
+#endif
+
+    size_t emitGetInstrDescSize(const instrDesc* id);
+    size_t emitGetInstrDescSizeSC(const instrDesc* id);
+
+    ssize_t emitGetInsCns(instrDesc* id);
+    ssize_t emitGetInsDsp(instrDesc* id);
+    ssize_t emitGetInsAmd(instrDesc* id);
+    ssize_t emitGetInsCnsDsp(instrDesc* id, ssize_t* dspPtr);
+    ssize_t emitGetInsSC(instrDesc* id);
+    ssize_t emitGetInsCIdisp(instrDesc* id);
+    unsigned emitGetInsCIargs(instrDesc* id);
+
+    // Return the argument count for a direct call "id".
+    int emitGetInsCDinfo(instrDesc* id);
+
+    unsigned emitInsCount;
+
+/************************************************************************/
+/*           A few routines used for debug display purposes             */
+/************************************************************************/
+
+#if defined(DEBUG) || EMITTER_STATS
+
+    static const char* emitIfName(unsigned f);
+
+#endif // defined(DEBUG) || EMITTER_STATS
+
+#ifdef DEBUG
+
+    unsigned emitVarRefOffs;
+
+    const char* emitRegName(regNumber reg, emitAttr size = EA_PTRSIZE, bool varName = true);
+    const char* emitFloatRegName(regNumber reg, emitAttr size = EA_PTRSIZE, bool varName = true);
+
+    const char* emitFldName(CORINFO_FIELD_HANDLE fieldVal);
+    const char* emitFncName(CORINFO_METHOD_HANDLE callVal);
+
+    void emitDispIGflags(unsigned flags);
+    void emitDispIG(insGroup* ig, insGroup* igPrev = nullptr, bool verbose = false);
+    void emitDispIGlist(bool verbose = false);
+    void emitDispGCinfo();
+    void emitDispClsVar(CORINFO_FIELD_HANDLE fldHnd, ssize_t offs, bool reloc = false);
+    void emitDispFrameRef(int varx, int disp, int offs, bool asmfm);
+    void emitDispInsOffs(unsigned offs, bool doffs);
+    void emitDispInsHex(BYTE* code, size_t sz);
+
+#else // !DEBUG
+#define emitVarRefOffs 0
+#endif // !DEBUG
+
+    /************************************************************************/
+    /*                      Method prolog and epilog                        */
+    /************************************************************************/
+
+    unsigned emitPrologEndPos;
+
+    unsigned       emitEpilogCnt;
+    UNATIVE_OFFSET emitEpilogSize;
+
+#ifdef _TARGET_XARCH_
+
+    void           emitStartExitSeq(); // Mark the start of the "return" sequence
+    emitLocation   emitExitSeqBegLoc;
+    UNATIVE_OFFSET emitExitSeqSize; // minimum size of any return sequence - the 'ret' after the epilog
+
+#endif // _TARGET_XARCH_
+
+    insGroup* emitPlaceholderList; // per method placeholder list - head
+    insGroup* emitPlaceholderLast; // per method placeholder list - tail
+
+#ifdef JIT32_GCENCODER
+
+    // The x86 GC encoder needs to iterate over a list of epilogs to generate a table of
+    // epilog offsets. Epilogs always start at the beginning of an IG, so save the first
+    // IG of the epilog, and use it to find the epilog offset at the end of code generation.
+    struct EpilogList
+    {
+        EpilogList* elNext;
+        insGroup*   elIG;
+    };
+
+    EpilogList* emitEpilogList; // per method epilog list - head
+    EpilogList* emitEpilogLast; // per method epilog list - tail
+
+public:
+    bool emitHasEpilogEnd();
+
+    size_t emitGenEpilogLst(size_t (*fp)(void*, unsigned), void* cp);
+
+#endif // JIT32_GCENCODER
+
+    void emitBegPrologEpilog(insGroup* igPh);
+    void emitEndPrologEpilog();
+
+    emitLocation emitEpilogBegLoc;
+
+    void emitBegFnEpilog(insGroup* igPh);
+    void emitEndFnEpilog();
+
+#if FEATURE_EH_FUNCLETS
+
+    void emitBegFuncletProlog(insGroup* igPh);
+    void emitEndFuncletProlog();
+
+    void emitBegFuncletEpilog(insGroup* igPh);
+    void emitEndFuncletEpilog();
+
+#endif // FEATURE_EH_FUNCLETS
+
+/************************************************************************/
+/*           Members and methods used in PDB translation                */
+/************************************************************************/
+
+#ifdef TRANSLATE_PDB
+
+    inline void SetIDSource(instrDesc* pID);
+    void MapCode(int ilOffset, BYTE* imgDest);
+    void MapFunc(int                imgOff,
+                 int                procLen,
+                 int                dbgStart,
+                 int                dbgEnd,
+                 short              frameReg,
+                 int                stkAdjust,
+                 int                lvaCount,
+                 OptJit::LclVarDsc* lvaTable,
+                 bool               framePtr);
+
+private:
+    int              emitInstrDescILBase; // code offset of IL that produced this instruction desctriptor
+    int              emitInstrDescILBase; // code offset of IL that produced this instruction desctriptor
+    static AddrMap*  emitPDBOffsetTable;  // translation table for mapping IL addresses to native addresses
+    static LocalMap* emitPDBLocalTable;   // local symbol translation table
+    static bool      emitIsPDBEnabled;    // flag to disable PDB translation code when a PDB is not found
+    static BYTE*     emitILBaseOfCode;    // start of IL .text section
+    static BYTE*     emitILMethodBase;    // beginning of IL method (start of header)
+    static BYTE*     emitILMethodStart;   // beginning of IL method code (right after the header)
+    static BYTE*     emitImgBaseOfCode;   // start of the image .text section
+
+#endif
+
+    /************************************************************************/
+    /*    Methods to record a code position and later convert to offset     */
+    /************************************************************************/
+
+    unsigned emitFindInsNum(insGroup* ig, instrDesc* id);
+    UNATIVE_OFFSET emitFindOffset(insGroup* ig, unsigned insNum);
+
+/************************************************************************/
+/*        Members and methods used to issue (encode) instructions.      */
+/************************************************************************/
+
+#ifdef DEBUG
+    // If we have started issuing instructions from the list of instrDesc, this is set
+    bool emitIssuing;
+#endif
+
+    BYTE* emitCodeBlock;     // Hot code block
+    BYTE* emitColdCodeBlock; // Cold code block
+    BYTE* emitConsBlock;     // Read-only (constant) data block
+
+    UNATIVE_OFFSET emitTotalHotCodeSize;
+    UNATIVE_OFFSET emitTotalColdCodeSize;
+
+    UNATIVE_OFFSET emitCurCodeOffs(BYTE* dst)
+    {
+        size_t distance;
+        if ((dst >= emitCodeBlock) && (dst <= (emitCodeBlock + emitTotalHotCodeSize)))
+        {
+            distance = (dst - emitCodeBlock);
+        }
+        else
+        {
+            assert(emitFirstColdIG);
+            assert(emitColdCodeBlock);
+            assert((dst >= emitColdCodeBlock) && (dst <= (emitColdCodeBlock + emitTotalColdCodeSize)));
+
+            distance = (dst - emitColdCodeBlock + emitTotalHotCodeSize);
+        }
+        noway_assert((UNATIVE_OFFSET)distance == distance);
+        return (UNATIVE_OFFSET)distance;
+    }
+
+    BYTE* emitOffsetToPtr(UNATIVE_OFFSET offset)
+    {
+        if (offset < emitTotalHotCodeSize)
+        {
+            return emitCodeBlock + offset;
+        }
+        else
+        {
+            assert(offset < (emitTotalHotCodeSize + emitTotalColdCodeSize));
+
+            return emitColdCodeBlock + (offset - emitTotalHotCodeSize);
+        }
+    }
+
+    BYTE* emitDataOffsetToPtr(UNATIVE_OFFSET offset)
+    {
+        assert(offset < emitDataSize());
+        return emitConsBlock + offset;
+    }
+
+    bool emitJumpCrossHotColdBoundary(size_t srcOffset, size_t dstOffset)
+    {
+        if (emitTotalColdCodeSize == 0)
+        {
+            return false;
+        }
+
+        assert(srcOffset < (emitTotalHotCodeSize + emitTotalColdCodeSize));
+        assert(dstOffset < (emitTotalHotCodeSize + emitTotalColdCodeSize));
+
+        return ((srcOffset < emitTotalHotCodeSize) != (dstOffset < emitTotalHotCodeSize));
+    }
+
+    unsigned char emitOutputByte(BYTE* dst, ssize_t val);
+    unsigned char emitOutputWord(BYTE* dst, ssize_t val);
+    unsigned char emitOutputLong(BYTE* dst, ssize_t val);
+    unsigned char emitOutputSizeT(BYTE* dst, ssize_t val);
+
+    size_t emitIssue1Instr(insGroup* ig, instrDesc* id, BYTE** dp);
+    size_t emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp);
+
+    bool emitHasFramePtr;
+
+#ifdef PSEUDORANDOM_NOP_INSERTION
+    bool emitInInstrumentation;
+#endif // PSEUDORANDOM_NOP_INSERTION
+
+    unsigned emitMaxTmpSize;
+
+#ifdef LEGACY_BACKEND
+    unsigned emitLclSize;
+    unsigned emitGrowableMaxByteOffs;
+    void emitTmpSizeChanged(unsigned tmpSize);
+#ifdef DEBUG
+    unsigned emitMaxByteOffsIdNum;
+#endif // DEBUG
+#endif // LEGACY_BACKEND
+
+#ifdef DEBUG
+    bool emitChkAlign; // perform some alignment checks
+#endif
+
+    insGroup* emitCurIG;
+
+    void emitSetShortJump(instrDescJmp* id);
+    void emitSetMediumJump(instrDescJmp* id);
+    UNATIVE_OFFSET emitSizeOfJump(instrDescJmp* jmp);
+    UNATIVE_OFFSET emitInstCodeSz(instrDesc* id);
+
+#ifndef LEGACY_BACKEND
+    CORINFO_FIELD_HANDLE emitLiteralConst(ssize_t cnsValIn, emitAttr attr = EA_8BYTE);
+    CORINFO_FIELD_HANDLE emitFltOrDblConst(GenTreeDblCon* tree, emitAttr attr = EA_UNKNOWN);
+    regNumber emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src);
+    regNumber emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src1, GenTree* src2);
+    void emitInsMov(instruction ins, emitAttr attr, GenTree* node);
+    insFormat emitMapFmtForIns(insFormat fmt, instruction ins);
+    insFormat emitMapFmtAtoM(insFormat fmt);
+    void emitHandleMemOp(GenTreeIndir* indir, instrDesc* id, insFormat fmt, instruction ins);
+    void spillIntArgRegsToShadowSlots();
+#endif // !LEGACY_BACKEND
+
+/************************************************************************/
+/*      The logic that creates and keeps track of instruction groups    */
+/************************************************************************/
+
+#ifdef _TARGET_ARM_
+// The only place where this limited instruction group size is a problem is
+// in the prolog, where we only support a single instruction group. We should really fix that.
+// ARM can require a bigger prolog instruction group. One scenario is where a
+// function uses all the incoming integer and single-precision floating-point arguments,
+// and must store them all to the frame on entry. If the frame is very large, we generate
+// ugly code like "movw r10, 0x488; add r10, sp; vstr s0, [r10]" for each store, which
+// eats up our insGroup buffer.
+#define SC_IG_BUFFER_SIZE (100 * sizeof(instrDesc) + 14 * TINY_IDSC_SIZE)
+#else // !_TARGET_ARM_
+#define SC_IG_BUFFER_SIZE (50 * sizeof(instrDesc) + 14 * TINY_IDSC_SIZE)
+#endif // !_TARGET_ARM_
+
+    size_t emitIGbuffSize;
+
+    insGroup* emitIGlist; // first  instruction group
+    insGroup* emitIGlast; // last   instruction group
+    insGroup* emitIGthis; // issued instruction group
+
+    insGroup* emitPrologIG; // prolog instruction group
+
+    instrDescJmp* emitJumpList;       // list of local jumps in method
+    instrDescJmp* emitJumpLast;       // last of local jumps in method
+    void          emitJumpDistBind(); // Bind all the local jumps in method
+
+    void emitCheckFuncletBranch(instrDesc* jmp, insGroup* jmpIG); // Check for illegal branches between funclets
+
+    bool emitFwdJumps;   // forward jumps present?
+    bool emitNoGCIG;     // Are we generating IGF_NOGCINTERRUPT insGroups (for prologs, epilogs, etc.)
+    bool emitForceNewIG; // If we generate an instruction, and not another instruction group, force create a new emitAdd
+                         // instruction group.
+
+    BYTE* emitCurIGfreeNext; // next available byte in buffer
+    BYTE* emitCurIGfreeEndp; // one byte past the last available byte in buffer
+    BYTE* emitCurIGfreeBase; // first byte address
+
+    unsigned emitCurIGinsCnt; // # of collected instr's in buffer
+    unsigned emitCurIGsize;   // estimated code size of current group in bytes
+    UNATIVE_OFFSET emitCurCodeOffset; // current code offset within group
+    UNATIVE_OFFSET emitTotalCodeSize; // bytes of code in entire method
+
+    insGroup* emitFirstColdIG; // first cold instruction group
+
+    void emitSetFirstColdIGCookie(void* bbEmitCookie)
+    {
+        emitFirstColdIG = (insGroup*)bbEmitCookie;
+    }
+
+    int emitOffsAdj; // current code offset adjustment
+
+    instrDescJmp* emitCurIGjmpList; // list of jumps   in current IG
+
+    // emitPrev* and emitInit* are only used during code generation, not during
+    // emission (issuing), to determine what GC values to store into an IG.
+    // Note that only the Vars ones are actually used, apparently due to bugs
+    // in that tracking. See emitSavIG(): the important use of ByrefRegs is commented
+    // out, and GCrefRegs is always saved.
+
+    VARSET_TP emitPrevGCrefVars;
+    regMaskTP emitPrevGCrefRegs;
+    regMaskTP emitPrevByrefRegs;
+
+    VARSET_TP emitInitGCrefVars;
+    regMaskTP emitInitGCrefRegs;
+    regMaskTP emitInitByrefRegs;
+
+    // If this is set, we ignore comparing emitPrev* and emitInit* to determine
+    // whether to save GC state (to save space in the IG), and always save it.
+
+    bool emitForceStoreGCState;
+
+    // emitThis* variables are used during emission, to track GC updates
+    // on a per-instruction basis. During code generation, per-instruction
+    // tracking is done with variables gcVarPtrSetCur, gcRegGCrefSetCur,
+    // and gcRegByrefSetCur. However, these are also used for a slightly
+    // different purpose during code generation: to try to minimize the
+    // amount of GC data stored to an IG, by only storing deltas from what
+    // we expect to see at an IG boundary. Also, only emitThisGCrefVars is
+    // really the only one used; the others seem to be calculated, but not
+    // used due to bugs.
+
+    VARSET_TP emitThisGCrefVars;
+    regMaskTP emitThisGCrefRegs; // Current set of registers holding GC references
+    regMaskTP emitThisByrefRegs; // Current set of registers holding BYREF references
+
+    bool emitThisGCrefVset; // Is "emitThisGCrefVars" up to date?
+
+    regNumber emitSyncThisObjReg; // where is "this" enregistered for synchronized methods?
+
+#if MULTIREG_HAS_SECOND_GC_RET
+    void emitSetSecondRetRegGCType(instrDescCGCA* id, emitAttr secondRetSize);
+#endif // MULTIREG_HAS_SECOND_GC_RET
+
+    static void emitEncodeCallGCregs(regMaskTP regs, instrDesc* id);
+    static unsigned emitDecodeCallGCregs(instrDesc* id);
+
+    unsigned emitNxtIGnum;
+
+    // random nop insertion to break up nop sleds
+    unsigned emitNextNop;
+    bool     emitRandomNops;
+    void     emitEnableRandomNops()
+    {
+        emitRandomNops = true;
+    }
+    void emitDisableRandomNops()
+    {
+        emitRandomNops = false;
+    }
+
+    insGroup* emitAllocAndLinkIG();
+    insGroup* emitAllocIG();
+    void emitInitIG(insGroup* ig);
+    void emitInsertIGAfter(insGroup* insertAfterIG, insGroup* ig);
+
+    void emitNewIG();
+    void emitDisableGC();
+    void emitEnableGC();
+    void emitGenIG(insGroup* ig);
+    insGroup* emitSavIG(bool emitAdd = false);
+    void emitNxtIG(bool emitAdd = false);
+
+    bool emitCurIGnonEmpty()
+    {
+        return (emitCurIG && emitCurIGfreeNext > emitCurIGfreeBase);
+    }
+
+    instrDesc* emitLastIns;
+
+#ifdef DEBUG
+    void emitCheckIGoffsets();
+#endif
+
+    // Terminates any in-progress instruction group, making the current IG a new empty one.
+    // Mark this instruction group as having a label; return the the new instruction group.
+    // Sets the emitter's record of the currently live GC variables
+    // and registers.  The "isFinallyTarget" parameter indicates that the current location is
+    // the start of a basic block that is returned to after a finally clause in non-exceptional execution.
+    void* emitAddLabel(VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, BOOL isFinallyTarget = FALSE);
+
+#ifdef _TARGET_ARMARCH_
+
+    void emitGetInstrDescs(insGroup* ig, instrDesc** id, int* insCnt);
+
+    bool emitGetLocationInfo(emitLocation* emitLoc, insGroup** pig, instrDesc** pid, int* pinsRemaining = NULL);
+
+    bool emitNextID(insGroup*& ig, instrDesc*& id, int& insRemaining);
+
+    typedef void (*emitProcessInstrFunc_t)(instrDesc* id, void* context);
+
+    void emitWalkIDs(emitLocation* locFrom, emitProcessInstrFunc_t processFunc, void* context);
+
+    static void emitGenerateUnwindNop(instrDesc* id, void* context);
+
+#endif // _TARGET_ARMARCH_
+
+#if defined(_TARGET_ARM_)
+    emitter::insFormat emitInsFormat(instruction ins);
+    size_t emitInsCode(instruction ins, insFormat fmt);
+#endif
+
+#ifdef _TARGET_X86_
+    void emitMarkStackLvl(unsigned stackLevel);
+#endif
+
+    int emitNextRandomNop();
+
+    void* emitAllocInstr(size_t sz, emitAttr attr);
+
+    instrDesc* emitAllocInstr(emitAttr attr)
+    {
+        return (instrDesc*)emitAllocInstr(sizeof(instrDesc), attr);
+    }
+
+    instrDescJmp* emitAllocInstrJmp()
+    {
+        return (instrDescJmp*)emitAllocInstr(sizeof(instrDescJmp), EA_1BYTE);
+    }
+
+#if !defined(_TARGET_ARM64_)
+    instrDescLbl* emitAllocInstrLbl()
+    {
+        return (instrDescLbl*)emitAllocInstr(sizeof(instrDescLbl), EA_4BYTE);
+    }
+#endif // !_TARGET_ARM64_
+
+    instrDescCns* emitAllocInstrCns(emitAttr attr)
+    {
+        return (instrDescCns*)emitAllocInstr(sizeof(instrDescCns), attr);
+    }
+    instrDescCns* emitAllocInstrCns(emitAttr attr, int cns)
+    {
+        instrDescCns* result = (instrDescCns*)emitAllocInstr(sizeof(instrDescCns), attr);
+        result->idSetIsLargeCns();
+        result->idcCnsVal = cns;
+        return result;
+    }
+
+    instrDescDsp* emitAllocInstrDsp(emitAttr attr)
+    {
+        return (instrDescDsp*)emitAllocInstr(sizeof(instrDescDsp), attr);
+    }
+
+    instrDescCnsDsp* emitAllocInstrCnsDsp(emitAttr attr)
+    {
+        return (instrDescCnsDsp*)emitAllocInstr(sizeof(instrDescCnsDsp), attr);
+    }
+
+    instrDescAmd* emitAllocInstrAmd(emitAttr attr)
+    {
+        return (instrDescAmd*)emitAllocInstr(sizeof(instrDescAmd), attr);
+    }
+
+    instrDescCnsAmd* emitAllocInstrCnsAmd(emitAttr attr)
+    {
+        return (instrDescCnsAmd*)emitAllocInstr(sizeof(instrDescCnsAmd), attr);
+    }
+
+    instrDescCGCA* emitAllocInstrCGCA(emitAttr attr)
+    {
+        return (instrDescCGCA*)emitAllocInstr(sizeof(instrDescCGCA), attr);
+    }
+
+    instrDesc* emitNewInstrTiny(emitAttr attr);
+    instrDesc* emitNewInstrSmall(emitAttr attr);
+    instrDesc* emitNewInstr(emitAttr attr = EA_4BYTE);
+    instrDesc* emitNewInstrSC(emitAttr attr, ssize_t cns);
+    instrDesc* emitNewInstrCns(emitAttr attr, ssize_t cns);
+    instrDesc* emitNewInstrDsp(emitAttr attr, ssize_t dsp);
+    instrDesc* emitNewInstrCnsDsp(emitAttr attr, ssize_t cns, int dsp);
+    instrDescJmp* emitNewInstrJmp();
+
+#if !defined(_TARGET_ARM64_)
+    instrDescLbl* emitNewInstrLbl();
+#endif // !_TARGET_ARM64_
+
+    static const BYTE emitFmtToOps[];
+
+#ifdef DEBUG
+    static const unsigned emitFmtCount;
+#endif
+
+    bool emitIsTinyInsDsc(instrDesc* id);
+    bool emitIsScnsInsDsc(instrDesc* id);
+
+    size_t emitSizeOfInsDsc(instrDesc* id);
+
+    /************************************************************************/
+    /*        The following keeps track of stack-based GC values            */
+    /************************************************************************/
+
+    unsigned emitTrkVarCnt;
+    int*     emitGCrFrameOffsTab; // Offsets of tracked stack ptr vars (varTrkIndex -> stkOffs)
+
+    unsigned    emitGCrFrameOffsCnt; // Number of       tracked stack ptr vars
+    int         emitGCrFrameOffsMin; // Min offset of a tracked stack ptr var
+    int         emitGCrFrameOffsMax; // Max offset of a tracked stack ptr var
+    bool        emitContTrkPtrLcls;  // All lcl between emitGCrFrameOffsMin/Max are only tracked stack ptr vars
+    varPtrDsc** emitGCrFrameLiveTab; // Cache of currently live varPtrs (stkOffs -> varPtrDsc)
+
+    int emitArgFrameOffsMin;
+    int emitArgFrameOffsMax;
+
+    int emitLclFrameOffsMin;
+    int emitLclFrameOffsMax;
+
+    int emitSyncThisObjOffs; // what is the offset of "this" for synchronized methods?
+
+public:
+    void emitSetFrameRangeGCRs(int offsLo, int offsHi);
+    void emitSetFrameRangeLcls(int offsLo, int offsHi);
+    void emitSetFrameRangeArgs(int offsLo, int offsHi);
+
+    static instruction emitJumpKindToIns(emitJumpKind jumpKind);
+    static emitJumpKind emitInsToJumpKind(instruction ins);
+    static emitJumpKind emitReverseJumpKind(emitJumpKind jumpKind);
+
+#ifdef _TARGET_ARM_
+    static unsigned emitJumpKindCondCode(emitJumpKind jumpKind);
+#endif
+
+#ifdef DEBUG
+    void emitInsSanityCheck(instrDesc* id);
+#endif
+
+#ifdef _TARGET_ARMARCH_
+    // Returns true if instruction "id->idIns()" writes to a register that might be used to contain a GC
+    // pointer. This exempts the SP and PC registers, and floating point registers. Memory access
+    // instructions that pre- or post-increment their memory address registers are *not* considered to write
+    // to GC registers, even if that memory address is a by-ref: such an instruction cannot change the GC
+    // status of that register, since it must be a byref before and remains one after.
+    //
+    // This may return false positives.
+    bool emitInsMayWriteToGCReg(instrDesc* id);
+
+    // Returns "true" if instruction "id->idIns()" writes to a LclVar stack location.
+    bool emitInsWritesToLclVarStackLoc(instrDesc* id);
+
+    // Returns true if the instruction may write to more than one register.
+    bool emitInsMayWriteMultipleRegs(instrDesc* id);
+#endif // _TARGET_ARMARCH_
+
+    /************************************************************************/
+    /*    The following is used to distinguish helper vs non-helper calls   */
+    /************************************************************************/
+
+    static bool emitNoGChelper(unsigned IHX);
+
+    /************************************************************************/
+    /*         The following logic keeps track of live GC ref values        */
+    /************************************************************************/
+
+    bool emitFullGCinfo; // full GC pointer maps?
+    bool emitFullyInt;   // fully interruptible code?
+
+#if EMIT_TRACK_STACK_DEPTH
+    unsigned emitCntStackDepth; // 0 in prolog/epilog, One DWORD elsewhere
+    unsigned emitMaxStackDepth; // actual computed max. stack depth
+#endif
+
+    /* Stack modelling wrt GC */
+
+    bool emitSimpleStkUsed; // using the "simple" stack table?
+
+    union {
+        struct // if emitSimpleStkUsed==true
+        {
+#define BITS_IN_BYTE (8)
+#define MAX_SIMPLE_STK_DEPTH (BITS_IN_BYTE * sizeof(unsigned))
+
+            unsigned emitSimpleStkMask;      // bit per pushed dword (if it fits. Lowest bit <==> last pushed arg)
+            unsigned emitSimpleByrefStkMask; // byref qualifier for emitSimpleStkMask
+        } u1;
+
+        struct // if emitSimpleStkUsed==false
+        {
+            BYTE   emitArgTrackLcl[16]; // small local table to avoid malloc
+            BYTE*  emitArgTrackTab;     // base of the argument tracking stack
+            BYTE*  emitArgTrackTop;     // top  of the argument tracking stack
+            USHORT emitGcArgTrackCnt;   // count of pending arg records (stk-depth for frameless methods, gc ptrs on stk
+                                        // for framed methods)
+        } u2;
+    };
+
+    unsigned emitCurStackLvl; // amount of bytes pushed on stack
+
+#if EMIT_TRACK_STACK_DEPTH
+    /* Functions for stack tracking */
+
+    void emitStackPush(BYTE* addr, GCtype gcType);
+
+    void emitStackPushN(BYTE* addr, unsigned count);
+
+    void emitStackPop(BYTE* addr, bool isCall, unsigned char callInstrSize, unsigned count = 1);
+
+    void emitStackKillArgs(BYTE* addr, unsigned count, unsigned char callInstrSize);
+
+    void emitRecordGCcall(BYTE* codePos, unsigned char callInstrSize);
+
+    // Helpers for the above
+
+    void emitStackPushLargeStk(BYTE* addr, GCtype gcType, unsigned count = 1);
+    void emitStackPopLargeStk(BYTE* addr, bool isCall, unsigned char callInstrSize, unsigned count = 1);
+#endif // EMIT_TRACK_STACK_DEPTH
+
+    /* Liveness of stack variables, and registers */
+
+    void emitUpdateLiveGCvars(int offs, BYTE* addr, bool birth);
+    void emitUpdateLiveGCvars(VARSET_VALARG_TP vars, BYTE* addr);
+    void emitUpdateLiveGCregs(GCtype gcType, regMaskTP regs, BYTE* addr);
+
+#ifdef DEBUG
+    const char* emitGetFrameReg();
+    void emitDispRegSet(regMaskTP regs);
+    void emitDispVarSet();
+#endif
+
+    void emitGCregLiveUpd(GCtype gcType, regNumber reg, BYTE* addr);
+    void emitGCregLiveSet(GCtype gcType, regMaskTP mask, BYTE* addr, bool isThis);
+    void emitGCregDeadUpdMask(regMaskTP, BYTE* addr);
+    void emitGCregDeadUpd(regNumber reg, BYTE* addr);
+    void emitGCregDeadSet(GCtype gcType, regMaskTP mask, BYTE* addr);
+
+    void emitGCvarLiveUpd(int offs, int varNum, GCtype gcType, BYTE* addr);
+    void emitGCvarLiveSet(int offs, GCtype gcType, BYTE* addr, ssize_t disp = -1);
+    void emitGCvarDeadUpd(int offs, BYTE* addr);
+    void emitGCvarDeadSet(int offs, BYTE* addr, ssize_t disp = -1);
+
+    GCtype emitRegGCtype(regNumber reg);
+
+    // We have a mixture of code emission methods, some of which return the size of the emitted instruction,
+    // requiring the caller to add this to the current code pointer (dst += <call to emit code>), others of which
+    // return the updated code pointer (dst = <call to emit code>).  Sometimes we'd like to get the size of
+    // the generated instruction for the latter style.  This method accomplishes that --
+    // "emitCodeWithInstructionSize(dst, <call to emitCode>, &instrSize)" will do the call, and set
+    // "*instrSize" to the after-before code pointer difference.  Returns the result of the call.  (And
+    // asserts that the instruction size fits in an unsigned char.)
+    static BYTE* emitCodeWithInstructionSize(BYTE* codePtrBefore, BYTE* newCodePointer, unsigned char* instrSize);
+
+    /************************************************************************/
+    /*      The following logic keeps track of initialized data sections    */
+    /************************************************************************/
+
+    /* One of these is allocated for every blob of initialized data */
+
+    struct dataSection
+    {
+        enum sectionType
+        {
+            data,
+            blockAbsoluteAddr,
+            blockRelative32
+        };
+
+        dataSection*   dsNext;
+        UNATIVE_OFFSET dsSize;
+        sectionType    dsType;
+        // variable-sized array used to store the constant data
+        // or BasicBlock* array in the block cases.
+        BYTE dsCont[0];
+    };
+
+    /* These describe the entire initialized/uninitialized data sections */
+
+    struct dataSecDsc
+    {
+        dataSection*   dsdList;
+        dataSection*   dsdLast;
+        UNATIVE_OFFSET dsdOffs;
+    };
+
+    dataSecDsc emitConsDsc;
+
+    dataSection* emitDataSecCur;
+
+    void emitOutputDataSec(dataSecDsc* sec, BYTE* dst);
+
+    /************************************************************************/
+    /*              Handles to the current class and method.                */
+    /************************************************************************/
+
+    COMP_HANDLE emitCmpHandle;
+
+    /************************************************************************/
+    /*               Helpers for interface to EE                            */
+    /************************************************************************/
+
+    void emitRecordRelocation(void* location,       /* IN */
+                              void* target,         /* IN */
+                              WORD  fRelocType,     /* IN */
+                              WORD  slotNum   = 0,  /* IN */
+                              INT32 addlDelta = 0); /* IN */
+
+    void emitRecordCallSite(ULONG                 instrOffset,   /* IN */
+                            CORINFO_SIG_INFO*     callSig,       /* IN */
+                            CORINFO_METHOD_HANDLE methodHandle); /* IN */
+
+#ifdef DEBUG
+    // This is a scratch buffer used to minimize the number of sig info structs
+    // we have to allocate for recordCallSite.
+    CORINFO_SIG_INFO* emitScratchSigInfo;
+#endif // DEBUG
+
+/************************************************************************/
+/*               Logic to collect and display statistics                */
+/************************************************************************/
+
+#if EMITTER_STATS
+
+    friend void emitterStats(FILE* fout);
+    friend void emitterStaticStats(FILE* fout);
+
+    static size_t emitSizeMethod;
+
+    static unsigned emitTotalInsCnt;
+
+    static unsigned emitTotalIGcnt;   // total number of insGroup allocated
+    static unsigned emitTotalPhIGcnt; // total number of insPlaceholderGroupData allocated
+    static unsigned emitTotalIGicnt;
+    static size_t   emitTotalIGsize;
+    static unsigned emitTotalIGmcnt; // total method count
+    static unsigned emitTotalIGjmps;
+    static unsigned emitTotalIGptrs;
+
+    static size_t emitTotMemAlloc;
+
+    static unsigned emitSmallDspCnt;
+    static unsigned emitLargeDspCnt;
+
+    static unsigned emitSmallCnsCnt;
+#define SMALL_CNS_TSZ 256
+    static unsigned emitSmallCns[SMALL_CNS_TSZ];
+    static unsigned emitLargeCnsCnt;
+
+    static unsigned emitIFcounts[IF_COUNT];
+
+#endif // EMITTER_STATS
+
+/*************************************************************************
+ *
+ *  Define any target-dependent emitter members.
+ */
+
+#include "emitdef.h"
+
+    // It would be better if this were a constructor, but that would entail revamping the allocation
+    // infrastructure of the entire JIT...
+    void Init()
+    {
+        VarSetOps::AssignNoCopy(emitComp, emitPrevGCrefVars, VarSetOps::MakeEmpty(emitComp));
+        VarSetOps::AssignNoCopy(emitComp, emitInitGCrefVars, VarSetOps::MakeEmpty(emitComp));
+        VarSetOps::AssignNoCopy(emitComp, emitThisGCrefVars, VarSetOps::MakeEmpty(emitComp));
+    }
+};
+
+/*****************************************************************************
+ *
+ *  Define any target-dependent inlines.
+ */
+
+#include "emitinl.h"
+
+inline void emitter::instrDesc::checkSizes()
+{
+#ifdef DEBUG
+#if HAS_TINY_DESC
+    C_ASSERT(TINY_IDSC_SIZE == (offsetof(instrDesc, _idDebugOnlyInfo) + sizeof(instrDescDebugInfo*)));
+#else // !tiny
+    C_ASSERT(SMALL_IDSC_SIZE == (offsetof(instrDesc, _idDebugOnlyInfo) + sizeof(instrDescDebugInfo*)));
+#endif
+#endif
+    C_ASSERT(SMALL_IDSC_SIZE == offsetof(instrDesc, _idAddrUnion));
+}
+
+/*****************************************************************************
+ *
+ *  Returns true if the given instruction descriptor is a "tiny" or a "small
+ *  constant" one (i.e. one of the descriptors that don't have all instrDesc
+ *  fields allocated).
+ */
+
+inline bool emitter::emitIsTinyInsDsc(instrDesc* id)
+{
+    return id->idIsTiny();
+}
+
+inline bool emitter::emitIsScnsInsDsc(instrDesc* id)
+{
+    return id->idIsSmallDsc();
+}
+
+/*****************************************************************************
+ *
+ *  Given an instruction, return its "update mode" (RD/WR/RW).
+ */
+
+inline insUpdateModes emitter::emitInsUpdateMode(instruction ins)
+{
+#ifdef DEBUG
+    assert((unsigned)ins < emitInsModeFmtCnt);
+#endif
+    return (insUpdateModes)emitInsModeFmtTab[ins];
+}
+
+/*****************************************************************************
+ *
+ *  Return the number of epilog blocks generated so far.
+ */
+
+inline unsigned emitter::emitGetEpilogCnt()
+{
+    return emitEpilogCnt;
+}
+
+/*****************************************************************************
+ *
+ *  Return the current size of the specified data section.
+ */
+
+inline UNATIVE_OFFSET emitter::emitDataSize()
+{
+    return emitConsDsc.dsdOffs;
+}
+
+/*****************************************************************************
+ *
+ *  Return a handle to the current position in the output stream. This can
+ *  be later converted to an actual code offset in bytes.
+ */
+
+inline void* emitter::emitCurBlock()
+{
+    return emitCurIG;
+}
+
+/*****************************************************************************
+ *
+ *  The emitCurOffset() method returns a cookie that identifies the current
+ *  position in the instruction stream. Due to things like scheduling (and
+ *  the fact that the final size of some instructions cannot be known until
+ *  the end of code generation), we return a value with the instruction number
+ *  and its estimated offset to the caller.
+ */
+
+inline unsigned emitGetInsNumFromCodePos(unsigned codePos)
+{
+    return (codePos & 0xFFFF);
+}
+
+inline unsigned emitGetInsOfsFromCodePos(unsigned codePos)
+{
+    return (codePos >> 16);
+}
+
+inline unsigned emitter::emitCurOffset()
+{
+    unsigned codePos = emitCurIGinsCnt + (emitCurIGsize << 16);
+
+    assert(emitGetInsOfsFromCodePos(codePos) == emitCurIGsize);
+    assert(emitGetInsNumFromCodePos(codePos) == emitCurIGinsCnt);
+
+    // printf("[IG=%02u;ID=%03u;OF=%04X] => %08X\n", emitCurIG->igNum, emitCurIGinsCnt, emitCurIGsize, codePos);
+
+    return codePos;
+}
+
+extern const unsigned short emitTypeSizes[TYP_COUNT];
+
+template <class T>
+inline emitAttr emitTypeSize(T type)
+{
+    assert(TypeGet(type) < TYP_COUNT);
+    assert(emitTypeSizes[TypeGet(type)] > 0);
+    return (emitAttr)emitTypeSizes[TypeGet(type)];
+}
+
+extern const unsigned short emitTypeActSz[TYP_COUNT];
+
+inline emitAttr emitActualTypeSize(var_types type)
+{
+    assert(type < TYP_COUNT);
+    assert(emitTypeActSz[type] > 0);
+    return (emitAttr)emitTypeActSz[type];
+}
+
+/*****************************************************************************
+ *
+ *  Convert between an operand size in bytes and a smaller encoding used for
+ *  storage in instruction descriptors.
+ */
+
+/* static */ inline emitter::opSize emitter::emitEncodeSize(emitAttr size)
+{
+    assert(size == EA_1BYTE || size == EA_2BYTE || size == EA_4BYTE || size == EA_8BYTE || size == EA_16BYTE ||
+           size == EA_32BYTE);
+
+    return emitSizeEncode[((int)size) - 1];
+}
+
+/* static */ inline emitAttr emitter::emitDecodeSize(emitter::opSize ensz)
+{
+    assert(((unsigned)ensz) < OPSZ_COUNT);
+
+    return emitSizeDecode[ensz];
+}
+
+/*****************************************************************************
+ *
+ *  Little helpers to allocate various flavors of instructions.
+ */
+
+inline emitter::instrDesc* emitter::emitNewInstrTiny(emitAttr attr)
+{
+    instrDesc* id;
+
+    id = (instrDesc*)emitAllocInstr(TINY_IDSC_SIZE, attr);
+    id->idSetIsTiny();
+
+    return id;
+}
+
+inline emitter::instrDesc* emitter::emitNewInstrSmall(emitAttr attr)
+{
+    instrDesc* id;
+
+    // This is larger than the Tiny Descr
+    id = (instrDesc*)emitAllocInstr(SMALL_IDSC_SIZE, attr);
+    id->idSetIsSmallDsc();
+
+    return id;
+}
+
+inline emitter::instrDesc* emitter::emitNewInstr(emitAttr attr)
+{
+    // This is larger than the Small Descr
+    return emitAllocInstr(attr);
+}
+
+inline emitter::instrDescJmp* emitter::emitNewInstrJmp()
+{
+    return emitAllocInstrJmp();
+}
+
+#if !defined(_TARGET_ARM64_)
+inline emitter::instrDescLbl* emitter::emitNewInstrLbl()
+{
+    return emitAllocInstrLbl();
+}
+#endif // !_TARGET_ARM64_
+
+inline emitter::instrDesc* emitter::emitNewInstrDsp(emitAttr attr, ssize_t dsp)
+{
+    if (dsp == 0)
+    {
+        instrDesc* id = emitAllocInstr(attr);
+
+#if EMITTER_STATS
+        emitSmallDspCnt++;
+#endif
+
+        return id;
+    }
+    else
+    {
+        instrDescDsp* id = emitAllocInstrDsp(attr);
+
+        id->idSetIsLargeDsp();
+        id->iddDspVal = dsp;
+
+#if EMITTER_STATS
+        emitLargeDspCnt++;
+#endif
+
+        return id;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Allocate an instruction descriptor for an instruction with a constant operand.
+ *  The instruction descriptor uses the idAddrUnion to save additional info
+ *  so the smallest size that this can be is sizeof(instrDesc).
+ *  Note that this very similar to emitter::emitNewInstrSC(), except it never
+ *  allocates a small descriptor.
+ */
+inline emitter::instrDesc* emitter::emitNewInstrCns(emitAttr attr, ssize_t cns)
+{
+    if (instrDesc::fitsInSmallCns(cns))
+    {
+        instrDesc* id = emitAllocInstr(attr);
+
+        id->idSmallCns(cns);
+
+#if EMITTER_STATS
+        emitSmallCnsCnt++;
+        if (cns - ID_MIN_SMALL_CNS >= SMALL_CNS_TSZ)
+            emitSmallCns[SMALL_CNS_TSZ - 1]++;
+        else
+            emitSmallCns[cns - ID_MIN_SMALL_CNS]++;
+#endif
+
+        return id;
+    }
+    else
+    {
+        instrDescCns* id = emitAllocInstrCns(attr);
+
+        id->idSetIsLargeCns();
+        id->idcCnsVal = cns;
+
+#if EMITTER_STATS
+        emitLargeCnsCnt++;
+#endif
+
+        return id;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Get the instrDesc size, general purpose version
+ *
+ */
+
+inline size_t emitter::emitGetInstrDescSize(const instrDesc* id)
+{
+    if (id->idIsTiny())
+    {
+        return TINY_IDSC_SIZE;
+    }
+
+    if (id->idIsSmallDsc())
+    {
+        return SMALL_IDSC_SIZE;
+    }
+
+    if (id->idIsLargeCns())
+    {
+        return sizeof(instrDescCns);
+    }
+
+    return sizeof(instrDesc);
+}
+
+/*****************************************************************************
+ *
+ *  Allocate an instruction descriptor for an instruction with a small integer
+ *  constant operand. This is the same as emitNewInstrCns() except that here
+ *  any constant that is small enough for instrDesc::fitsInSmallCns() only gets
+ *  allocated SMALL_IDSC_SIZE bytes (and is thus a small descriptor, whereas
+ *  emitNewInstrCns() always allocates at least sizeof(instrDesc).
+ */
+
+inline emitter::instrDesc* emitter::emitNewInstrSC(emitAttr attr, ssize_t cns)
+{
+    instrDesc* id;
+
+    if (instrDesc::fitsInSmallCns(cns))
+    {
+        id = (instrDesc*)emitAllocInstr(SMALL_IDSC_SIZE, attr);
+
+        id->idSmallCns(cns);
+        id->idSetIsSmallDsc();
+    }
+    else
+    {
+        id = (instrDesc*)emitAllocInstr(sizeof(instrDescCns), attr);
+
+        id->idSetIsLargeCns();
+        ((instrDescCns*)id)->idcCnsVal = cns;
+    }
+
+    return id;
+}
+
+/*****************************************************************************
+ *
+ *  Get the instrDesc size for something that contains a constant
+ */
+
+inline size_t emitter::emitGetInstrDescSizeSC(const instrDesc* id)
+{
+    if (id->idIsSmallDsc())
+    {
+        return SMALL_IDSC_SIZE;
+    }
+    else if (id->idIsLargeCns())
+    {
+        return sizeof(instrDescCns);
+    }
+    else
+    {
+        return sizeof(instrDesc);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  The following helpers should be used to access the various values that
+ *  get stored in different places within the instruction descriptor.
+ */
+
+inline ssize_t emitter::emitGetInsCns(instrDesc* id)
+{
+    return id->idIsLargeCns() ? ((instrDescCns*)id)->idcCnsVal : id->idSmallCns();
+}
+
+inline ssize_t emitter::emitGetInsDsp(instrDesc* id)
+{
+    if (id->idIsLargeDsp())
+    {
+        if (id->idIsLargeCns())
+        {
+            return ((instrDescCnsDsp*)id)->iddcDspVal;
+        }
+        return ((instrDescDsp*)id)->iddDspVal;
+    }
+    return 0;
+}
+
+inline ssize_t emitter::emitGetInsCnsDsp(instrDesc* id, ssize_t* dspPtr)
+{
+    if (id->idIsLargeCns())
+    {
+        if (id->idIsLargeDsp())
+        {
+            *dspPtr = ((instrDescCnsDsp*)id)->iddcDspVal;
+            return ((instrDescCnsDsp*)id)->iddcCnsVal;
+        }
+        else
+        {
+            *dspPtr = 0;
+            return ((instrDescCns*)id)->idcCnsVal;
+        }
+    }
+    else
+    {
+        if (id->idIsLargeDsp())
+        {
+            *dspPtr = ((instrDescDsp*)id)->iddDspVal;
+            return id->idSmallCns();
+        }
+        else
+        {
+            *dspPtr = 0;
+            return id->idSmallCns();
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Get hold of the argument count for an indirect call.
+ */
+
+inline unsigned emitter::emitGetInsCIargs(instrDesc* id)
+{
+    if (id->idIsLargeCall())
+    {
+        return ((instrDescCGCA*)id)->idcArgCnt;
+    }
+    else
+    {
+        assert(id->idIsLargeDsp() == false);
+        assert(id->idIsLargeCns() == false);
+
+        ssize_t cns = emitGetInsCns(id);
+        assert((unsigned)cns == (size_t)cns);
+        return (unsigned)cns;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Returns true if the given register contains a live GC ref.
+ */
+
+inline GCtype emitter::emitRegGCtype(regNumber reg)
+{
+    assert(emitIssuing);
+
+    if ((emitThisGCrefRegs & genRegMask(reg)) != 0)
+    {
+        return GCT_GCREF;
+    }
+    else if ((emitThisByrefRegs & genRegMask(reg)) != 0)
+    {
+        return GCT_BYREF;
+    }
+    else
+    {
+        return GCT_NONE;
+    }
+}
+
+#ifdef DEBUG
+
+#if EMIT_TRACK_STACK_DEPTH
+#define CHECK_STACK_DEPTH() assert((int)emitCurStackLvl >= 0)
+#else
+#define CHECK_STACK_DEPTH()
+#endif
+
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ *  Return true when a given code offset is properly aligned for the target
+ */
+
+inline bool IsCodeAligned(UNATIVE_OFFSET offset)
+{
+    return ((offset & (CODE_ALIGN - 1)) == 0);
+}
+
+// Static:
+inline BYTE* emitter::emitCodeWithInstructionSize(BYTE* codePtrBefore, BYTE* newCodePointer, unsigned char* instrSize)
+{
+    // DLD: Perhaps this method should return the instruction size, and we should do dst += <that size>
+    // as is done in other cases?
+    assert(newCodePointer >= codePtrBefore);
+    ClrSafeInt<unsigned char> callInstrSizeSafe = ClrSafeInt<unsigned char>(newCodePointer - codePtrBefore);
+    assert(!callInstrSizeSafe.IsOverflow());
+    *instrSize = callInstrSizeSafe.Value();
+    return newCodePointer;
+}
+
+/*****************************************************************************
+ *
+ *  Add a new IG to the current list, and get it ready to receive code.
+ */
+
+inline void emitter::emitNewIG()
+{
+    insGroup* ig = emitAllocAndLinkIG();
+
+    /* It's linked in. Now, set it up to accept code */
+
+    emitGenIG(ig);
+}
+
+// Start a new instruction group that is not interruptable
+inline void emitter::emitDisableGC()
+{
+    emitNoGCIG = true;
+
+    if (emitCurIGnonEmpty())
+    {
+        emitNxtIG(true);
+    }
+    else
+    {
+        emitCurIG->igFlags |= IGF_NOGCINTERRUPT;
+    }
+}
+
+// Start a new instruction group that is interruptable
+inline void emitter::emitEnableGC()
+{
+    emitNoGCIG = false;
+
+    // The next time an instruction needs to be generated, force a new instruction group.
+    // It will be an emitAdd group in that case. Note that the next thing we see might be
+    // a label, which will force a non-emitAdd group.
+    //
+    // Note that we can't just create a new instruction group here, because we don't know
+    // if there are going to be any instructions added to it, and we don't support empty
+    // instruction groups.
+    emitForceNewIG = true;
+}
+
+/*****************************************************************************/
+#endif // _EMIT_H_
+/*****************************************************************************/
diff --git a/src/jit/emitarm.cpp b/src/jit/emitarm.cpp
new file mode 100644
index 0000000000..1f57048a80
--- /dev/null
+++ b/src/jit/emitarm.cpp
@@ -0,0 +1,7623 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                             emitArm.cpp                                   XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#if defined(_TARGET_ARM_)
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+#include "instr.h"
+#include "emit.h"
+#include "codegen.h"
+
+/*****************************************************************************/
+
+const instruction emitJumpKindInstructions[] = {
+    INS_nop,
+
+#define JMP_SMALL(en, rev, ins) INS_##ins,
+#include "emitjmps.h"
+};
+
+const emitJumpKind emitReverseJumpKinds[] = {
+    EJ_NONE,
+
+#define JMP_SMALL(en, rev, ins) EJ_##rev,
+#include "emitjmps.h"
+};
+
+/*****************************************************************************
+ * Look up the instruction for a jump kind
+ */
+
+/*static*/ instruction emitter::emitJumpKindToIns(emitJumpKind jumpKind)
+{
+    assert((unsigned)jumpKind < ArrLen(emitJumpKindInstructions));
+    return emitJumpKindInstructions[jumpKind];
+}
+
+/*****************************************************************************
+ * Look up the jump kind for an instruction. It better be a conditional
+ * branch instruction with a jump kind!
+ */
+
+/*static*/ emitJumpKind emitter::emitInsToJumpKind(instruction ins)
+{
+    for (unsigned i = 0; i < ArrLen(emitJumpKindInstructions); i++)
+    {
+        if (ins == emitJumpKindInstructions[i])
+        {
+            emitJumpKind ret = (emitJumpKind)i;
+            assert(EJ_NONE < ret && ret < EJ_COUNT);
+            return ret;
+        }
+    }
+    unreached();
+}
+
+/*****************************************************************************
+ * Reverse the conditional jump
+ */
+
+/*static*/ emitJumpKind emitter::emitReverseJumpKind(emitJumpKind jumpKind)
+{
+    assert(jumpKind < EJ_COUNT);
+    return emitReverseJumpKinds[jumpKind];
+}
+
+/*****************************************************************************
+ *
+ *  Return the allocated size (in bytes) of the given instruction descriptor.
+ */
+
+size_t emitter::emitSizeOfInsDsc(instrDesc* id)
+{
+    assert(!emitIsTinyInsDsc(id));
+
+    if (emitIsScnsInsDsc(id))
+        return SMALL_IDSC_SIZE;
+
+    assert((unsigned)id->idInsFmt() < emitFmtCount);
+
+    ID_OPS idOp         = (ID_OPS)emitFmtToOps[id->idInsFmt()];
+    bool   isCallIns    = (id->idIns() == INS_bl) || (id->idIns() == INS_blx);
+    bool   maybeCallIns = (id->idIns() == INS_b) || (id->idIns() == INS_bx);
+
+    // An INS_call instruction may use a "fat" direct/indirect call descriptor
+    // except for a local call to a label (i.e. call to a finally).
+    // Only ID_OP_CALL and ID_OP_SPEC check for this, so we enforce that the
+    // INS_call instruction always uses one of these idOps.
+
+    assert(!isCallIns ||         // either not a call or
+           idOp == ID_OP_CALL || // is a direct call
+           idOp == ID_OP_SPEC || // is an indirect call
+           idOp == ID_OP_JMP);   // is a local call to finally clause
+
+    switch (idOp)
+    {
+        case ID_OP_NONE:
+            break;
+
+        case ID_OP_JMP:
+            return sizeof(instrDescJmp);
+
+        case ID_OP_LBL:
+            return sizeof(instrDescLbl);
+
+        case ID_OP_CALL:
+        case ID_OP_SPEC:
+            assert(isCallIns || maybeCallIns);
+            if (id->idIsLargeCall())
+            {
+                /* Must be a "fat" indirect call descriptor */
+                return sizeof(instrDescCGCA);
+            }
+            else
+            {
+                assert(!id->idIsLargeDsp());
+                assert(!id->idIsLargeCns());
+                return sizeof(instrDesc);
+            }
+            break;
+
+        default:
+            NO_WAY("unexpected instruction descriptor format");
+            break;
+    }
+
+    if (id->idIsLargeCns())
+    {
+        if (id->idIsLargeDsp())
+            return sizeof(instrDescCnsDsp);
+        else
+            return sizeof(instrDescCns);
+    }
+    else
+    {
+        if (id->idIsLargeDsp())
+            return sizeof(instrDescDsp);
+        else
+            return sizeof(instrDesc);
+    }
+}
+
+bool offsetFitsInVectorMem(int disp)
+{
+    unsigned imm = unsigned_abs(disp);
+    return ((imm & 0x03fc) == imm);
+}
+
+#ifdef DEBUG
+/*****************************************************************************
+ *
+ *  The following called for each recorded instruction -- use for debugging.
+ */
+void emitter::emitInsSanityCheck(instrDesc* id)
+{
+    /* What instruction format have we got? */
+
+    switch (id->idInsFmt())
+    {
+        case IF_T1_A: // T1_A    ................
+        case IF_T2_A: // T2_A    ................ ................
+            break;
+
+        case IF_T1_B: // T1_B    ........cccc....                                           cond
+        case IF_T2_B: // T2_B    ................ ............iiii                          imm4
+            assert(emitGetInsSC(id) < 0x10);
+            break;
+
+        case IF_T1_C: // T1_C    .....iiiiinnnddd                       R1  R2              imm5
+            assert(isLowRegister(id->idReg1()));
+            assert(isLowRegister(id->idReg2()));
+            if (emitInsIsLoadOrStore(id->idIns()))
+            {
+                emitAttr size = id->idOpSize();
+                int      imm  = emitGetInsSC(id);
+
+                imm = insUnscaleImm(imm, size);
+                assert(imm < 0x20);
+            }
+            else
+            {
+                assert(id->idSmallCns() < 0x20);
+            }
+            break;
+
+        case IF_T1_D0: // T1_D0   ........Dmmmmddd                       R1* R2*
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isGeneralRegister(id->idReg2()));
+            break;
+
+        case IF_T1_D1: // T1_D1   .........mmmm...                       R1*
+            assert(isGeneralRegister(id->idReg1()));
+            break;
+
+        case IF_T1_D2: // T1_D2   .........mmmm...                               R3*
+            assert(isGeneralRegister(id->idReg3()));
+            break;
+
+        case IF_T1_E: // T1_E    ..........nnnddd                       R1  R2
+            assert(isLowRegister(id->idReg1()));
+            assert(isLowRegister(id->idReg2()));
+            assert(id->idSmallCns() < 0x20);
+            break;
+
+        case IF_T1_F: // T1_F    .........iiiiiii                       SP                  imm7
+            assert(id->idReg1() == REG_SP);
+            assert(id->idOpSize() == EA_4BYTE);
+            assert((emitGetInsSC(id) & ~0x1FC) == 0);
+            break;
+
+        case IF_T1_G: // T1_G    .......iiinnnddd                       R1  R2              imm3
+            assert(isLowRegister(id->idReg1()));
+            assert(isLowRegister(id->idReg2()));
+            assert(id->idSmallCns() < 0x8);
+            break;
+
+        case IF_T1_H: // T1_H    .......mmmnnnddd                       R1  R2  R3
+            assert(isLowRegister(id->idReg1()));
+            assert(isLowRegister(id->idReg2()));
+            assert(isLowRegister(id->idReg3()));
+            break;
+
+        case IF_T1_I: // T1_I    ......i.iiiiiddd                       R1                  imm6
+            assert(isLowRegister(id->idReg1()));
+            break;
+
+        case IF_T1_J0: // T1_J0   .....dddiiiiiiii                       R1                  imm8
+            assert(isLowRegister(id->idReg1()));
+            assert(emitGetInsSC(id) < 0x100);
+            break;
+
+        case IF_T1_J1: // T1_J1   .....dddiiiiiiii                       R1                  <regmask8>
+            assert(isLowRegister(id->idReg1()));
+            assert(emitGetInsSC(id) < 0x100);
+            break;
+
+        case IF_T1_J2: // T1_J2   .....dddiiiiiiii                       R1  SP              imm8
+            assert(isLowRegister(id->idReg1()));
+            assert(id->idReg2() == REG_SP);
+            assert(id->idOpSize() == EA_4BYTE);
+            assert((emitGetInsSC(id) & ~0x3FC) == 0);
+            break;
+
+        case IF_T1_L0: // T1_L0   ........iiiiiiii                                           imm8
+            assert(emitGetInsSC(id) < 0x100);
+            break;
+
+        case IF_T1_L1: // T1_L1   .......Rrrrrrrrr                                           <regmask8+2>
+            assert(emitGetInsSC(id) < 0x400);
+            break;
+
+        case IF_T2_C0: // T2_C0   ...........Snnnn .iiiddddiishmmmm       R1  R2  R3      S, imm5, sh
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isGeneralRegister(id->idReg2()));
+            assert(isGeneralRegister(id->idReg3()));
+            assert(emitGetInsSC(id) < 0x20);
+            break;
+
+        case IF_T2_C4: // T2_C4   ...........Snnnn ....dddd....mmmm       R1  R2  R3      S
+        case IF_T2_C5: // T2_C5   ............nnnn ....dddd....mmmm       R1  R2  R3
+        case IF_T2_G1: // T2_G1   ............nnnn ttttTTTT........       R1  R2  R3
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isGeneralRegister(id->idReg2()));
+            assert(isGeneralRegister(id->idReg3()));
+            break;
+
+        case IF_T2_C1: // T2_C1   ...........S.... .iiiddddiishmmmm       R1  R2          S, imm5, sh
+        case IF_T2_C2: // T2_C2   ...........S.... .iiiddddii..mmmm       R1  R2          S, imm5
+        case IF_T2_C8: // T2_C8   ............nnnn .iii....iishmmmm       R1  R2             imm5, sh
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isGeneralRegister(id->idReg2()));
+            assert(emitGetInsSC(id) < 0x20);
+            break;
+
+        case IF_T2_C6: // T2_C6   ................ ....dddd..iimmmm       R1  R2                   imm2
+        case IF_T2_C7: // T2_C7   ............nnnn ..........shmmmm       R1  R2                   imm2
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isGeneralRegister(id->idReg2()));
+            assert(emitGetInsSC(id) < 0x4);
+            break;
+
+        case IF_T2_C3:  // T2_C3   ...........S.... ....dddd....mmmm       R1  R2          S
+        case IF_T2_C9:  // T2_C9   ............nnnn ............mmmm       R1  R2
+        case IF_T2_C10: // T2_C10  ............mmmm ....dddd....mmmm       R1  R2
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isGeneralRegister(id->idReg2()));
+            break;
+
+        case IF_T2_D0: // T2_D0   ............nnnn .iiiddddii.wwwww       R1  R2             imm5, imm5
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isGeneralRegister(id->idReg2()));
+            assert(emitGetInsSC(id) < 0x400);
+            break;
+
+        case IF_T2_D1: // T2_D1   ................ .iiiddddii.wwwww       R1                 imm5, imm5
+            assert(isGeneralRegister(id->idReg1()));
+            assert(emitGetInsSC(id) < 0x400);
+            break;
+
+        case IF_T2_E0: // T2_E0   ............nnnn tttt......shmmmm       R1  R2  R3               imm2
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isGeneralRegister(id->idReg2()));
+            if (id->idIsLclVar())
+            {
+                assert(isGeneralRegister(codeGen->rsGetRsvdReg()));
+            }
+            else
+            {
+                assert(isGeneralRegister(id->idReg3()));
+                assert(emitGetInsSC(id) < 0x4);
+            }
+            break;
+
+        case IF_T2_E1: // T2_E1   ............nnnn tttt............       R1  R2
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isGeneralRegister(id->idReg2()));
+            break;
+
+        case IF_T2_E2: // T2_E2   ................ tttt............       R1
+            assert(isGeneralRegister(id->idReg1()));
+            break;
+
+        case IF_T2_F1: // T2_F1    ............nnnn ttttdddd....mmmm       R1  R2  R3  R4
+        case IF_T2_F2: // T2_F2    ............nnnn aaaadddd....mmmm       R1  R2  R3  R4
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isGeneralRegister(id->idReg2()));
+            assert(isGeneralRegister(id->idReg3()));
+            assert(isGeneralRegister(id->idReg4()));
+            break;
+
+        case IF_T2_G0: // T2_G0   .......PU.W.nnnn ttttTTTTiiiiiiii       R1  R2  R3         imm8, PUW
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isGeneralRegister(id->idReg2()));
+            assert(isGeneralRegister(id->idReg3()));
+            assert(unsigned_abs(emitGetInsSC(id)) < 0x100);
+            break;
+
+        case IF_T2_H0: // T2_H0   ............nnnn tttt.PUWiiiiiiii       R1  R2             imm8, PUW
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isGeneralRegister(id->idReg2()));
+            assert(unsigned_abs(emitGetInsSC(id)) < 0x100);
+            break;
+
+        case IF_T2_H1: // T2_H1   ............nnnn tttt....iiiiiiii       R1  R2             imm8
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isGeneralRegister(id->idReg2()));
+            assert(emitGetInsSC(id) < 0x100);
+            break;
+
+        case IF_T2_H2: // T2_H2   ............nnnn ........iiiiiiii       R1                 imm8
+            assert(isGeneralRegister(id->idReg1()));
+            assert(emitGetInsSC(id) < 0x100);
+            break;
+
+        case IF_T2_I0: // T2_I0   ..........W.nnnn rrrrrrrrrrrrrrrr       R1              W, imm16
+            assert(isGeneralRegister(id->idReg1()));
+            assert(emitGetInsSC(id) < 0x10000);
+            break;
+
+        case IF_T2_N: // T2_N    .....i......iiii .iiiddddiiiiiiii       R1                 imm16
+            assert(isGeneralRegister(id->idReg1()));
+            break;
+
+        case IF_T2_N2: // T2_N2   .....i......iiii .iiiddddiiiiiiii       R1                 imm16
+            assert(isGeneralRegister(id->idReg1()));
+            assert((size_t)emitGetInsSC(id) < emitDataSize());
+            break;
+
+        case IF_T2_I1: // T2_I1   ................ rrrrrrrrrrrrrrrr                          imm16
+            assert(emitGetInsSC(id) < 0x10000);
+            break;
+
+        case IF_T2_K1: // T2_K1   ............nnnn ttttiiiiiiiiiiii       R1  R2             imm12
+        case IF_T2_M0: // T2_M0   .....i......nnnn .iiiddddiiiiiiii       R1  R2             imm12
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isGeneralRegister(id->idReg2()));
+            assert(emitGetInsSC(id) < 0x1000);
+            break;
+
+        case IF_T2_L0: // T2_L0   .....i.....Snnnn .iiiddddiiiiiiii       R1  R2          S, imm8<<imm4
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isGeneralRegister(id->idReg2()));
+            assert(isModImmConst(emitGetInsSC(id)));
+            break;
+
+        case IF_T2_K4: // T2_K4   ........U....... ttttiiiiiiiiiiii       R1  PC          U, imm12
+        case IF_T2_M1: // T2_M1   .....i.......... .iiiddddiiiiiiii       R1  PC             imm12
+            assert(isGeneralRegister(id->idReg1()));
+            assert(id->idReg2() == REG_PC);
+            assert(emitGetInsSC(id) < 0x1000);
+            break;
+
+        case IF_T2_K3: // T2_K3   ........U....... ....iiiiiiiiiiii       PC              U, imm12
+            assert(id->idReg1() == REG_PC);
+            assert(emitGetInsSC(id) < 0x1000);
+            break;
+
+        case IF_T2_K2: // T2_K2   ............nnnn ....iiiiiiiiiiii       R1                 imm12
+            assert(isGeneralRegister(id->idReg1()));
+            assert(emitGetInsSC(id) < 0x1000);
+            break;
+
+        case IF_T2_L1: // T2_L1   .....i.....S.... .iiiddddiiiiiiii       R1              S, imm8<<imm4
+        case IF_T2_L2: // T2_L2   .....i......nnnn .iii....iiiiiiii       R1                 imm8<<imm4
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isModImmConst(emitGetInsSC(id)));
+            break;
+
+        case IF_T1_J3: // T1_J3   .....dddiiiiiiii                        R1  PC             imm8
+            assert(isGeneralRegister(id->idReg1()));
+            assert(id->idReg2() == REG_PC);
+            assert(emitGetInsSC(id) < 0x100);
+            break;
+
+        case IF_T1_K:  // T1_K    ....cccciiiiiiii                        Branch             imm8, cond4
+        case IF_T1_M:  // T1_M    .....iiiiiiiiiii                        Branch             imm11
+        case IF_T2_J1: // T2_J1   .....Scccciiiiii ..j.jiiiiiiiiiii       Branch             imm20, cond4
+        case IF_T2_J2: // T2_J2   .....Siiiiiiiiii ..j.jiiiiiiiiii.       Branch             imm24
+        case IF_T2_N1: // T2_N    .....i......iiii .iiiddddiiiiiiii       R1                 imm16
+        case IF_T2_J3: // T2_J3   .....Siiiiiiiiii ..j.jiiiiiiiiii.       Call               imm24
+        case IF_LARGEJMP:
+            break;
+
+        case IF_T2_VFP3:
+            if (id->idOpSize() == EA_8BYTE)
+            {
+                assert(isDoubleReg(id->idReg1()));
+                assert(isDoubleReg(id->idReg2()));
+                assert(isDoubleReg(id->idReg3()));
+            }
+            else
+            {
+                assert(id->idOpSize() == EA_4BYTE);
+                assert(isFloatReg(id->idReg1()));
+                assert(isFloatReg(id->idReg2()));
+                assert(isFloatReg(id->idReg3()));
+            }
+            break;
+
+        case IF_T2_VFP2:
+            assert(isFloatReg(id->idReg1()));
+            assert(isFloatReg(id->idReg2()));
+            break;
+
+        case IF_T2_VLDST:
+            if (id->idOpSize() == EA_8BYTE)
+                assert(isDoubleReg(id->idReg1()));
+            else
+                assert(isFloatReg(id->idReg1()));
+            assert(isGeneralRegister(id->idReg2()));
+            assert(offsetFitsInVectorMem(emitGetInsSC(id)));
+            break;
+
+        case IF_T2_VMOVD:
+            assert(id->idOpSize() == EA_8BYTE);
+            if (id->idIns() == INS_vmov_d2i)
+            {
+                assert(isGeneralRegister(id->idReg1()));
+                assert(isGeneralRegister(id->idReg2()));
+                assert(isDoubleReg(id->idReg3()));
+            }
+            else
+            {
+                assert(id->idIns() == INS_vmov_i2d);
+                assert(isDoubleReg(id->idReg1()));
+                assert(isGeneralRegister(id->idReg2()));
+                assert(isGeneralRegister(id->idReg3()));
+            }
+            break;
+
+        case IF_T2_VMOVS:
+            assert(id->idOpSize() == EA_4BYTE);
+            if (id->idIns() == INS_vmov_i2f)
+            {
+                assert(isFloatReg(id->idReg1()));
+                assert(isGeneralRegister(id->idReg2()));
+            }
+            else
+            {
+                assert(id->idIns() == INS_vmov_f2i);
+                assert(isGeneralRegister(id->idReg1()));
+                assert(isFloatReg(id->idReg2()));
+            }
+            break;
+
+        default:
+            printf("unexpected format %s\n", emitIfName(id->idInsFmt()));
+            assert(!"Unexpected format");
+            break;
+    }
+}
+#endif // DEBUG
+
+bool emitter::emitInsMayWriteToGCReg(instrDesc* id)
+{
+    instruction ins = id->idIns();
+    insFormat   fmt = id->idInsFmt();
+
+    switch (fmt)
+    {
+
+        // These are the formats with "destination" or "target" registers:
+        case IF_T1_C:
+        case IF_T1_D0:
+        case IF_T1_E:
+        case IF_T1_G:
+        case IF_T1_H:
+        case IF_T1_J0:
+        case IF_T1_J1:
+        case IF_T1_J2:
+        case IF_T1_J3:
+        case IF_T2_C0:
+        case IF_T2_C1:
+        case IF_T2_C2:
+        case IF_T2_C3:
+        case IF_T2_C4:
+        case IF_T2_C5:
+        case IF_T2_C6:
+        case IF_T2_C10:
+        case IF_T2_D0:
+        case IF_T2_D1:
+        case IF_T2_F1:
+        case IF_T2_F2:
+        case IF_T2_L0:
+        case IF_T2_L1:
+        case IF_T2_M0:
+        case IF_T2_M1:
+        case IF_T2_N:
+        case IF_T2_N1:
+        case IF_T2_N2:
+        case IF_T2_VFP3:
+        case IF_T2_VFP2:
+        case IF_T2_VLDST:
+        case IF_T2_E0:
+        case IF_T2_E1:
+        case IF_T2_E2:
+        case IF_T2_G0:
+        case IF_T2_G1:
+        case IF_T2_H0:
+        case IF_T2_H1:
+        case IF_T2_K1:
+        case IF_T2_K4:
+            // Some formats with "destination" or "target" registers are actually used for store instructions, for the
+            // "source" value written to memory.
+            // Similarly, PUSH has a target register, indicating the start of the set of registers to push.  POP
+            // *does* write to at least one register, so we do not make that a special case.
+            // Various compare/test instructions do not write (except to the flags). Technically "teq" does not need to
+            // be
+            // be in this list because it has no forms matched above, but I'm putting it here for completeness.
+            switch (ins)
+            {
+                case INS_str:
+                case INS_strb:
+                case INS_strh:
+                case INS_strd:
+                case INS_strex:
+                case INS_strexb:
+                case INS_strexd:
+                case INS_strexh:
+                case INS_push:
+                case INS_cmp:
+                case INS_cmn:
+                case INS_tst:
+                case INS_teq:
+                    return false;
+                default:
+                    return true;
+            }
+        case IF_T2_VMOVS:
+            // VMOV.i2f reads from the integer register. Conversely VMOV.f2i writes to GC pointer-sized
+            // integer register that might have previously held GC pointers, so they need to be included.
+            assert(id->idGCref() == GCT_NONE);
+            return (ins == INS_vmov_f2i);
+
+        case IF_T2_VMOVD:
+            // VMOV.i2d reads from the integer registers. Conversely VMOV.d2i writes to GC pointer-sized
+            // integer registers that might have previously held GC pointers, so they need to be included.
+            assert(id->idGCref() == GCT_NONE);
+            return (ins == INS_vmov_d2i);
+
+        default:
+            return false;
+    }
+}
+
+bool emitter::emitInsWritesToLclVarStackLoc(instrDesc* id)
+{
+    if (!id->idIsLclVar())
+        return false;
+
+    instruction ins = id->idIns();
+
+    // This list is related to the list of instructions used to store local vars in emitIns_S_R().
+    // We don't accept writing to float local vars.
+
+    switch (ins)
+    {
+        case INS_strb:
+        case INS_strh:
+        case INS_str:
+            return true;
+        default:
+            return false;
+    }
+}
+
+bool emitter::emitInsMayWriteMultipleRegs(instrDesc* id)
+{
+    instruction ins = id->idIns();
+
+    switch (ins)
+    {
+        case INS_ldm:
+        case INS_ldmdb:
+        case INS_pop:
+        case INS_smlal:
+        case INS_smull:
+        case INS_umlal:
+        case INS_umull:
+        case INS_vmov_d2i:
+            return true;
+        default:
+            return false;
+    }
+}
+
+/*****************************************************************************/
+#ifdef DEBUG
+/*****************************************************************************
+ *
+ *  Return a string that represents the given register.
+ */
+
+const char* emitter::emitRegName(regNumber reg, emitAttr attr, bool varName)
+{
+    assert(reg < REG_COUNT);
+
+    const char* rn = emitComp->compRegVarName(reg, varName, false);
+
+    assert(strlen(rn) >= 1);
+
+    return rn;
+}
+
+const char* emitter::emitFloatRegName(regNumber reg, emitAttr attr, bool varName)
+{
+    assert(reg < REG_COUNT);
+
+    const char* rn = emitComp->compRegVarName(reg, varName, true);
+
+    assert(strlen(rn) >= 1);
+
+    return rn;
+}
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ *  Returns the base encoding of the given CPU instruction.
+ */
+
+emitter::insFormat emitter::emitInsFormat(instruction ins)
+{
+    // clang-format off
+    const static insFormat insFormats[] =
+    {
+        #define INST1(id, nm, fp, ldst, fmt, e1                                ) fmt,
+        #define INST2(id, nm, fp, ldst, fmt, e1, e2                            ) fmt,
+        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        ) fmt,
+        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    ) fmt,
+        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                ) fmt,
+        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            ) fmt,
+        #define INST8(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8    ) fmt,
+        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) fmt,
+        #include "instrs.h"
+    };
+    // clang-format on
+
+    assert(ins < ArrLen(insFormats));
+    assert((insFormats[ins] != IF_NONE));
+
+    return insFormats[ins];
+}
+
+// INST_FP is 1
+#define LD 2
+#define ST 4
+#define CMP 8
+
+// clang-format off
+/*static*/ const BYTE CodeGenInterface::instInfo[] =
+{
+    #define INST1(id, nm, fp, ldst, fmt, e1                                ) ldst | INST_FP*fp,
+    #define INST2(id, nm, fp, ldst, fmt, e1, e2                            ) ldst | INST_FP*fp,
+    #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        ) ldst | INST_FP*fp,
+    #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    ) ldst | INST_FP*fp,
+    #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                ) ldst | INST_FP*fp,
+    #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            ) ldst | INST_FP*fp,
+    #define INST8(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8    ) ldst | INST_FP*fp,
+    #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) ldst | INST_FP*fp,
+    #include "instrs.h"
+};
+// clang-format on
+
+/*****************************************************************************
+ *
+ *  Returns true if the instruction is some kind of load instruction
+ */
+
+bool emitter::emitInsIsLoad(instruction ins)
+{
+    // We have pseudo ins like lea which are not included in emitInsLdStTab.
+    if (ins < ArrLen(CodeGenInterface::instInfo))
+        return (CodeGenInterface::instInfo[ins] & LD) ? true : false;
+    else
+        return false;
+}
+
+/*****************************************************************************
+ *
+ *  Returns true if the instruction is some kind of compare or test instruction
+ */
+
+bool emitter::emitInsIsCompare(instruction ins)
+{
+    // We have pseudo ins like lea which are not included in emitInsLdStTab.
+    if (ins < ArrLen(CodeGenInterface::instInfo))
+        return (CodeGenInterface::instInfo[ins] & CMP) ? true : false;
+    else
+        return false;
+}
+
+/*****************************************************************************
+ *
+ *  Returns true if the instruction is some kind of store instruction
+ */
+
+bool emitter::emitInsIsStore(instruction ins)
+{
+    // We have pseudo ins like lea which are not included in emitInsLdStTab.
+    if (ins < ArrLen(CodeGenInterface::instInfo))
+        return (CodeGenInterface::instInfo[ins] & ST) ? true : false;
+    else
+        return false;
+}
+
+/*****************************************************************************
+ *
+ *  Returns true if the instruction is some kind of load/store instruction
+ */
+
+bool emitter::emitInsIsLoadOrStore(instruction ins)
+{
+    // We have pseudo ins like lea which are not included in emitInsLdStTab.
+    if (ins < ArrLen(CodeGenInterface::instInfo))
+        return (CodeGenInterface::instInfo[ins] & (LD | ST)) ? true : false;
+    else
+        return false;
+}
+
+#undef LD
+#undef ST
+#undef CMP
+
+/*****************************************************************************
+ *
+ *  Returns the specific encoding of the given CPU instruction and format
+ */
+
+size_t emitter::emitInsCode(instruction ins, insFormat fmt)
+{
+    // clang-format off
+    const static size_t insCodes1[] =
+    {
+        #define INST1(id, nm, fp, ldst, fmt, e1                                ) e1,
+        #define INST2(id, nm, fp, ldst, fmt, e1, e2                            ) e1,
+        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        ) e1,
+        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    ) e1,
+        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                ) e1,
+        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            ) e1,
+        #define INST8(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8    ) e1,
+        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e1,
+        #include "instrs.h"
+    };
+    const static size_t insCodes2[] =
+    {
+        #define INST1(id, nm, fp, ldst, fmt, e1                                )
+        #define INST2(id, nm, fp, ldst, fmt, e1, e2                            ) e2,
+        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        ) e2,
+        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    ) e2,
+        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                ) e2,
+        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            ) e2,
+        #define INST8(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8    ) e2,
+        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e2,
+        #include "instrs.h"
+    };
+    const static size_t insCodes3[] =
+    {
+        #define INST1(id, nm, fp, ldst, fmt, e1                                )
+        #define INST2(id, nm, fp, ldst, fmt, e1, e2                            )
+        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        ) e3,
+        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    ) e3,
+        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                ) e3,
+        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            ) e3,
+        #define INST8(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8    ) e3,
+        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e3,
+        #include "instrs.h"
+    };
+    const static size_t insCodes4[] =
+    {
+        #define INST1(id, nm, fp, ldst, fmt, e1                                )
+        #define INST2(id, nm, fp, ldst, fmt, e1, e2                            )
+        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        )
+        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    ) e4,
+        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                ) e4,
+        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            ) e4,
+        #define INST8(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8    ) e4,
+        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e4,
+        #include "instrs.h"
+    };
+    const static size_t insCodes5[] =
+    {
+        #define INST1(id, nm, fp, ldst, fmt, e1                                )
+        #define INST2(id, nm, fp, ldst, fmt, e1, e2                            )
+        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        )
+        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    )
+        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                ) e5,
+        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            ) e5,
+        #define INST8(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8    ) e5,
+        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e5,
+        #include "instrs.h"
+    };
+    const static size_t insCodes6[] =
+    {
+        #define INST1(id, nm, fp, ldst, fmt, e1                                )
+        #define INST2(id, nm, fp, ldst, fmt, e1, e2                            )
+        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        )
+        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    )
+        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                )
+        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            ) e6,
+        #define INST8(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8    ) e6,
+        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e6,
+        #include "instrs.h"
+    };
+    const static size_t insCodes7[] =
+    {
+        #define INST1(id, nm, fp, ldst, fmt, e1                                )
+        #define INST2(id, nm, fp, ldst, fmt, e1, e2                            )
+        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        )
+        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    )
+        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                )
+        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            )
+        #define INST8(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8    ) e7,
+        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e7,
+        #include "instrs.h"
+    };
+    const static size_t insCodes8[] =
+    {
+        #define INST1(id, nm, fp, ldst, fmt, e1                                )
+        #define INST2(id, nm, fp, ldst, fmt, e1, e2                            )
+        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        )
+        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    )
+        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                )
+        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            )
+        #define INST8(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8    ) e8,
+        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e8,
+        #include "instrs.h"
+    };
+    const static size_t insCodes9[] =
+    {
+        #define INST1(id, nm, fp, ldst, fmt, e1                                )
+        #define INST2(id, nm, fp, ldst, fmt, e1, e2                            )
+        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        )
+        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    )
+        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                )
+        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            )
+        #define INST8(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8    )
+        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e9,
+        #include "instrs.h"
+    };
+    const static insFormat formatEncode9[9]  = { IF_T1_D0, IF_T1_H,  IF_T1_J0, IF_T1_G,  IF_T2_L0, IF_T2_C0, IF_T1_F,  IF_T1_J2, IF_T1_J3 };
+    const static insFormat formatEncode8[8]  = { IF_T1_H,  IF_T1_C,  IF_T2_E0, IF_T2_H0, IF_T2_K1, IF_T2_K4, IF_T1_J2, IF_T1_J3 };
+    const static insFormat formatEncode6A[6] = { IF_T1_H,  IF_T1_C,  IF_T2_E0, IF_T2_H0, IF_T2_K1, IF_T2_K4};
+    const static insFormat formatEncode6B[6] = { IF_T1_H,  IF_T1_C,  IF_T2_E0, IF_T2_H0, IF_T2_K1, IF_T1_J2 };
+    const static insFormat formatEncode5A[5] = { IF_T1_E,  IF_T1_D0, IF_T1_J0, IF_T2_L1, IF_T2_C3 };
+    const static insFormat formatEncode5B[5] = { IF_T1_E,  IF_T1_D0, IF_T1_J0, IF_T2_L2, IF_T2_C8 };
+    const static insFormat formatEncode4A[4] = { IF_T1_E,  IF_T1_C,  IF_T2_C4, IF_T2_C2 };
+    const static insFormat formatEncode4B[4] = { IF_T2_K2, IF_T2_H2, IF_T2_C7, IF_T2_K3 };
+    const static insFormat formatEncode3A[3] = { IF_T1_E,  IF_T2_C0, IF_T2_L0 };
+    const static insFormat formatEncode3B[3] = { IF_T1_E,  IF_T2_C8, IF_T2_L2 };
+    const static insFormat formatEncode3C[3] = { IF_T1_E,  IF_T2_C1, IF_T2_L1 };
+    const static insFormat formatEncode3D[3] = { IF_T1_L1, IF_T2_E2, IF_T2_I1 };
+    const static insFormat formatEncode3E[3] = { IF_T2_N,  IF_T2_N1, IF_T2_N2 };
+    const static insFormat formatEncode3F[3] = { IF_T1_M,  IF_T2_J2, IF_T2_J3 };
+    const static insFormat formatEncode2A[2] = { IF_T1_K,  IF_T2_J1 };
+    const static insFormat formatEncode2B[2] = { IF_T1_D1, IF_T1_D2 };
+    const static insFormat formatEncode2C[2] = { IF_T1_D2, IF_T2_J3 };
+    const static insFormat formatEncode2D[2] = { IF_T1_J1, IF_T2_I0 };
+    const static insFormat formatEncode2E[2] = { IF_T1_E,  IF_T2_C6 };
+    const static insFormat formatEncode2F[2] = { IF_T1_E,  IF_T2_C5 };
+    const static insFormat formatEncode2G[2] = { IF_T1_J3, IF_T2_M1 };
+    // clang-format on
+
+    size_t    code   = BAD_CODE;
+    insFormat insFmt = emitInsFormat(ins);
+    bool      found  = false;
+    int       index  = 0;
+
+    switch (insFmt)
+    {
+        case IF_EN9:
+            for (index = 0; index < 9; index++)
+            {
+                if (fmt == formatEncode9[index])
+                {
+                    found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN8:
+            for (index = 0; index < 8; index++)
+            {
+                if (fmt == formatEncode8[index])
+                {
+                    found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN6A:
+            for (index = 0; index < 6; index++)
+            {
+                if (fmt == formatEncode6A[index])
+                {
+                    found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN6B:
+            for (index = 0; index < 6; index++)
+            {
+                if (fmt == formatEncode6B[index])
+                {
+                    found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN5A:
+            for (index = 0; index < 5; index++)
+            {
+                if (fmt == formatEncode5A[index])
+                {
+                    found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN5B:
+            for (index = 0; index < 5; index++)
+            {
+                if (fmt == formatEncode5B[index])
+                {
+                    found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN4A:
+            for (index = 0; index < 4; index++)
+            {
+                if (fmt == formatEncode4A[index])
+                {
+                    found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN4B:
+            for (index = 0; index < 4; index++)
+            {
+                if (fmt == formatEncode4B[index])
+                {
+                    found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN3A:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3A[index])
+                {
+                    found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN3B:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3B[index])
+                {
+                    found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_EN3C:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3C[index])
+                {
+                    found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_EN3D:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3D[index])
+                {
+                    found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_EN3E:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3E[index])
+                {
+                    found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_EN3F:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3F[index])
+                {
+                    found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN2A:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2A[index])
+                {
+                    found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_EN2B:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2B[index])
+                {
+                    found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_EN2C:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2C[index])
+                {
+                    found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_EN2D:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2D[index])
+                {
+                    found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_EN2E:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2E[index])
+                {
+                    found = true;
+                    break;
+                }
+            }
+            break;
+        case IF_EN2F:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2F[index])
+                {
+                    found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN2G:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2G[index])
+                {
+                    found = true;
+                    break;
+                }
+            }
+            break;
+
+        default:
+            index = 0;
+            found = true;
+            break;
+    }
+
+    assert(found);
+
+    switch (index)
+    {
+        case 0:
+            assert(ins < ArrLen(insCodes1));
+            code = insCodes1[ins];
+            break;
+        case 1:
+            assert(ins < ArrLen(insCodes2));
+            code = insCodes2[ins];
+            break;
+        case 2:
+            assert(ins < ArrLen(insCodes3));
+            code = insCodes3[ins];
+            break;
+        case 3:
+            assert(ins < ArrLen(insCodes4));
+            code = insCodes4[ins];
+            break;
+        case 4:
+            assert(ins < ArrLen(insCodes5));
+            code = insCodes5[ins];
+            break;
+        case 5:
+            assert(ins < ArrLen(insCodes6));
+            code = insCodes6[ins];
+            break;
+        case 6:
+            assert(ins < ArrLen(insCodes7));
+            code = insCodes7[ins];
+            break;
+        case 7:
+            assert(ins < ArrLen(insCodes8));
+            code = insCodes8[ins];
+            break;
+        case 8:
+            assert(ins < ArrLen(insCodes9));
+            code = insCodes9[ins];
+            break;
+    }
+
+    assert((code != BAD_CODE));
+
+    return code;
+}
+
+/*****************************************************************************
+ *
+ *  Return the code size of the given instruction format. The 'insSize' return type enum
+ *  indicates a 16 bit, 32 bit, or 48 bit instruction.
+ */
+
+emitter::insSize emitter::emitInsSize(insFormat insFmt)
+{
+    if ((insFmt >= IF_T1_A) && (insFmt < IF_T2_A))
+        return ISZ_16BIT;
+
+    if ((insFmt >= IF_T2_A) && (insFmt < IF_INVALID))
+        return ISZ_32BIT;
+
+    if (insFmt == IF_LARGEJMP)
+        return ISZ_48BIT;
+
+    assert(!"Invalid insFormat");
+    return ISZ_48BIT;
+}
+
+/*****************************************************************************
+ *
+ *  isModImmConst() returns true when immediate 'val32' can be encoded
+ *   using the special modified immediate constant available in Thumb
+ */
+
+/*static*/ bool emitter::isModImmConst(int val32)
+{
+    unsigned uval32 = (unsigned)val32;
+    unsigned imm8   = uval32 & 0xff;
+
+    /* encode = 0000x */
+    if (imm8 == uval32)
+        return true;
+
+    unsigned imm32a = (imm8 << 16) | imm8;
+    /* encode = 0001x */
+    if (imm32a == uval32)
+        return true;
+
+    unsigned imm32b = (imm32a << 8);
+    /* encode = 0010x */
+    if (imm32b == uval32)
+        return true;
+
+    unsigned imm32c = (imm32a | imm32b);
+    /* encode = 0011x */
+    if (imm32c == uval32)
+        return true;
+
+    unsigned mask32 = 0x00000ff;
+
+    unsigned encode = 31; /* 11111 */
+    unsigned temp;
+
+    do
+    {
+        mask32 <<= 1;
+        temp = uval32 & ~mask32;
+        if (temp == 0)
+            return true;
+        encode--;
+    } while (encode >= 8);
+
+    return false;
+}
+
+/*****************************************************************************
+ *
+ *  encodeModImmConst() returns the special ARM 12-bit immediate encoding.
+ *   that is used to encode the immediate.  (4-bits, 8-bits)
+ *   If the imm can not be encoded then 0x0BADC0DE is returned.
+ */
+
+/*static*/ int emitter::encodeModImmConst(int val32)
+{
+    unsigned uval32 = (unsigned)val32;
+    unsigned imm8   = uval32 & 0xff;
+    unsigned encode = imm8 >> 7;
+    unsigned imm32a;
+    unsigned imm32b;
+    unsigned imm32c;
+    unsigned mask32;
+    unsigned temp;
+
+    /* encode = 0000x */
+    if (imm8 == uval32)
+    {
+        goto DONE;
+    }
+
+    imm32a = (imm8 << 16) | imm8;
+    /* encode = 0001x */
+    if (imm32a == uval32)
+    {
+        encode += 2;
+        goto DONE;
+    }
+
+    imm32b = (imm32a << 8);
+    /* encode = 0010x */
+    if (imm32b == uval32)
+    {
+        encode += 4;
+        goto DONE;
+    }
+
+    imm32c = (imm32a | imm32b);
+    /* encode = 0011x */
+    if (imm32c == uval32)
+    {
+        encode += 6;
+        goto DONE;
+    }
+
+    mask32 = 0x00000ff;
+
+    encode = 31; /* 11111 */
+    do
+    {
+        mask32 <<= 1;
+        temp = uval32 & ~mask32;
+        if (temp == 0)
+        {
+            imm8 = (uval32 & mask32) >> (32 - encode);
+            assert((imm8 & 0x80) != 0);
+            goto DONE;
+        }
+        encode--;
+    } while (encode >= 8);
+
+    assert(!"encodeModImmConst failed!");
+    return BAD_CODE;
+
+DONE:
+    unsigned result = (encode << 7) | (imm8 & 0x7f);
+    assert(result <= 0x0fff);
+    assert(result >= 0);
+    return (int)result;
+}
+
+/*****************************************************************************
+ *
+ *  emitIns_valid_imm_for_alu() returns true when the immediate 'imm'
+ *   can be encoded using the 12-bit funky Arm immediate encoding
+ */
+/*static*/ bool emitter::emitIns_valid_imm_for_alu(int imm)
+{
+    if (isModImmConst(imm))
+        return true;
+    return false;
+}
+
+/*****************************************************************************
+ *
+ *  emitIns_valid_imm_for_mov() returns true when the immediate 'imm'
+ *   can be encoded using a single mov or mvn instruction.
+ */
+/*static*/ bool emitter::emitIns_valid_imm_for_mov(int imm)
+{
+    if ((imm & 0x0000ffff) == imm) // 16-bit immediate
+        return true;
+    if (isModImmConst(imm)) // funky arm immediate
+        return true;
+    if (isModImmConst(~imm)) // funky arm immediate via mvn
+        return true;
+    return false;
+}
+
+/*****************************************************************************
+ *
+ *  emitIns_valid_imm_for_small_mov() returns true when the immediate 'imm'
+ *   can be encoded using a single 2-byte mov instruction.
+ */
+/*static*/ bool emitter::emitIns_valid_imm_for_small_mov(regNumber reg, int imm, insFlags flags)
+{
+    return isLowRegister(reg) && insSetsFlags(flags) && ((imm & 0x00ff) == imm);
+}
+
+/*****************************************************************************
+ *
+ *  emitIns_valid_imm_for_add() returns true when the immediate 'imm'
+ *   can be encoded using a single add or sub instruction.
+ */
+/*static*/ bool emitter::emitIns_valid_imm_for_add(int imm, insFlags flags)
+{
+    if ((unsigned_abs(imm) <= 0x00000fff) && (flags != INS_FLAGS_SET)) // 12-bit immediate via add/sub
+        return true;
+    if (isModImmConst(imm)) // funky arm immediate
+        return true;
+    if (isModImmConst(-imm)) // funky arm immediate via sub
+        return true;
+    return false;
+}
+
+/*****************************************************************************
+ *
+ *  emitIns_valid_imm_for_add_sp() returns true when the immediate 'imm'
+ *   can be encoded in "add Rd,SP,i10".
+ */
+/*static*/ bool emitter::emitIns_valid_imm_for_add_sp(int imm)
+{
+    if ((imm & 0x03fc) == imm)
+        return true;
+    return false;
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction with no operands.
+ */
+
+void emitter::emitIns(instruction ins)
+{
+    instrDesc* id  = emitNewInstrSmall(EA_4BYTE);
+    insFormat  fmt = emitInsFormat(ins);
+    insSize    isz = emitInsSize(fmt);
+
+    assert((fmt == IF_T1_A) || (fmt == IF_T2_A));
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsSize(isz);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction with a single immediate value.
+ */
+
+void emitter::emitIns_I(instruction ins, emitAttr attr, ssize_t imm)
+{
+    insFormat fmt    = IF_NONE;
+    bool      hasLR  = false;
+    bool      hasPC  = false;
+    bool      useT2  = false;
+    bool      onlyT1 = false;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+#ifdef FEATURE_ITINSTRUCTION
+        case INS_it:
+        case INS_itt:
+        case INS_ite:
+        case INS_ittt:
+        case INS_itte:
+        case INS_itet:
+        case INS_itee:
+        case INS_itttt:
+        case INS_ittte:
+        case INS_ittet:
+        case INS_ittee:
+        case INS_itett:
+        case INS_itete:
+        case INS_iteet:
+        case INS_iteee:
+            assert((imm & 0x0F) == imm);
+            fmt  = IF_T1_B;
+            attr = EA_4BYTE;
+            break;
+#endif // FEATURE_ITINSTRUCTION
+
+        case INS_push:
+            assert((imm & 0xA000) == 0); // Cannot push PC or SP
+
+            if (imm & 0x4000) // Is the LR being pushed?
+                hasLR = true;
+
+            goto COMMON_PUSH_POP;
+
+        case INS_pop:
+            assert((imm & 0x2000) == 0);      // Cannot pop SP
+            assert((imm & 0xC000) != 0xC000); // Cannot pop both PC and LR
+
+            if (imm & 0x8000) // Is the PC being popped?
+                hasPC = true;
+            if (imm & 0x4000) // Is the LR being popped?
+            {
+                hasLR = true;
+                useT2 = true;
+            }
+
+        COMMON_PUSH_POP:
+
+            if (((imm - 1) & imm) == 0) // Is only one or zero bits set in imm?
+            {
+                if (((imm == 0) && !hasLR) || // imm has no bits set, but hasLR is set
+                    (!hasPC && !hasLR))       // imm has one bit set, and neither of hasPC/hasLR are set
+                {
+                    onlyT1 = true; // if only one bit is set we must use the T1 encoding
+                }
+            }
+
+            imm &= ~0xE000; // ensure that PC, LR and SP bits are removed from imm
+
+            if (((imm & 0x00ff) == imm) && !useT2)
+            {
+                fmt = IF_T1_L1;
+            }
+            else if (!onlyT1)
+            {
+                fmt = IF_T2_I1;
+            }
+            else
+            {
+                // We have to use the Thumb-2 push single register encoding
+                regNumber reg = genRegNumFromMask(imm);
+                emitIns_R(ins, attr, reg);
+                return;
+            }
+
+            //
+            // Encode the PC and LR bits as the lowest two bits
+            //
+            imm <<= 2;
+            if (hasPC)
+                imm |= 2;
+            if (hasLR)
+                imm |= 1;
+
+            assert(imm != 0);
+
+            break;
+
+#if 0
+    // TODO-ARM-Cleanup: Enable or delete.
+    case INS_bkpt:   // Windows uses a different encoding
+        if ((imm & 0x0000ffff) == imm)
+        {
+            fmt = IF_T1_L0;
+        }
+        else
+        {
+            assert(!"Instruction cannot be encoded");
+        }
+        break;
+#endif
+
+        case INS_dmb:
+        case INS_ism:
+            if ((imm & 0x000f) == imm)
+            {
+                fmt  = IF_T2_B;
+                attr = EA_4BYTE;
+            }
+            else
+            {
+                assert(!"Instruction cannot be encoded");
+            }
+            break;
+
+        default:
+            unreached();
+    }
+    assert((fmt == IF_T1_B) || (fmt == IF_T1_L0) || (fmt == IF_T1_L1) || (fmt == IF_T2_I1) || (fmt == IF_T2_B));
+
+    instrDesc* id  = emitNewInstrSC(attr, imm);
+    insSize    isz = emitInsSize(fmt);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsSize(isz);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing a single register.
+ */
+
+void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg)
+{
+    emitAttr  size = EA_SIZE(attr);
+    insFormat fmt  = IF_NONE;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_pop:
+        case INS_push:
+            if (isLowRegister(reg))
+            {
+                int regmask = 1 << ((int)reg);
+                emitIns_I(ins, attr, regmask);
+                return;
+            }
+            assert(size == EA_PTRSIZE);
+            fmt = IF_T2_E2;
+            break;
+
+        case INS_vmrs:
+            assert(size == EA_PTRSIZE);
+            fmt = IF_T2_E2;
+            break;
+
+        case INS_bx:
+            assert(size == EA_PTRSIZE);
+            fmt = IF_T1_D1;
+            break;
+        case INS_rsb:
+        case INS_mvn:
+            emitIns_R_R_I(ins, attr, reg, reg, 0);
+            return;
+
+        default:
+            unreached();
+    }
+    assert((fmt == IF_T1_D1) || (fmt == IF_T2_E2));
+
+    instrDesc* id  = emitNewInstrSmall(attr);
+    insSize    isz = emitInsSize(fmt);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsSize(isz);
+    id->idReg1(reg);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing a register and a constant.
+ */
+
+void emitter::emitIns_R_I(
+    instruction ins, emitAttr attr, regNumber reg, int imm, insFlags flags /* = INS_FLAGS_DONT_CARE */)
+
+{
+    insFormat fmt = IF_NONE;
+    insFlags  sf  = INS_FLAGS_DONT_CARE;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_add:
+        case INS_sub:
+            if ((reg == REG_SP) && insDoesNotSetFlags(flags) && ((imm & 0x01fc) == imm))
+            {
+                fmt = IF_T1_F;
+                sf  = INS_FLAGS_NOT_SET;
+            }
+            else if (isLowRegister(reg) && insSetsFlags(flags) && (unsigned_abs(imm) <= 0x00ff))
+            {
+                if (imm < 0)
+                {
+                    assert((ins == INS_add) || (ins == INS_sub));
+                    if (ins == INS_add)
+                        ins = INS_sub;
+                    else // ins == INS_sub
+                        ins = INS_add;
+                    imm     = -imm;
+                }
+                fmt = IF_T1_J0;
+                sf  = INS_FLAGS_SET;
+            }
+            else
+            {
+                // otherwise we have to use a Thumb-2 encoding
+                emitIns_R_R_I(ins, attr, reg, reg, imm, flags);
+                return;
+            }
+            break;
+
+        case INS_adc:
+            emitIns_R_R_I(ins, attr, reg, reg, imm, flags);
+            return;
+
+        case INS_vpush:
+        case INS_vpop:
+            assert(imm > 0);
+            if (attr == EA_8BYTE)
+            {
+                assert(isDoubleReg(reg));
+                assert(imm <= 16);
+                imm *= 2;
+            }
+            else
+            {
+                assert(attr == EA_4BYTE);
+                assert(isFloatReg(reg));
+                assert(imm <= 16);
+            }
+            assert(((reg - REG_F0) + imm) <= 32);
+            imm *= 4;
+
+            if (ins == INS_vpush)
+                imm = -imm;
+
+            sf  = INS_FLAGS_NOT_SET;
+            fmt = IF_T2_VLDST;
+            break;
+
+        case INS_stm:
+        {
+            sf = INS_FLAGS_NOT_SET;
+
+            bool hasLR  = false;
+            bool hasPC  = false;
+            bool useT2  = false;
+            bool onlyT1 = false;
+
+            assert((imm & 0x2000) == 0);      // Cannot pop SP
+            assert((imm & 0xC000) != 0xC000); // Cannot pop both PC and LR
+            assert((imm & 0xFFFF0000) == 0);  // Can only contain lower 16 bits
+
+            if (imm & 0x8000) // Is the PC being popped?
+                hasPC = true;
+
+            if (imm & 0x4000) // Is the LR being pushed?
+            {
+                hasLR = true;
+                useT2 = true;
+            }
+
+            if (!isLowRegister(reg))
+                useT2 = true;
+
+            if (((imm - 1) & imm) == 0) // Is only one or zero bits set in imm?
+            {
+                if (((imm == 0) && !hasLR) || // imm has no bits set, but hasLR is set
+                    (!hasPC && !hasLR))       // imm has one bit set, and neither of hasPC/hasLR are set
+                {
+                    onlyT1 = true; // if only one bit is set we must use the T1 encoding
+                }
+            }
+
+            imm &= ~0xE000; // ensure that PC, LR and SP bits are removed from imm
+
+            if (((imm & 0x00ff) == imm) && !useT2)
+            {
+                fmt = IF_T1_J1;
+            }
+            else if (!onlyT1)
+            {
+                fmt = IF_T2_I0;
+            }
+            else
+            {
+                assert(!"Instruction cannot be encoded");
+                // We have to use the Thumb-2 str single register encoding
+                // reg = genRegNumFromMask(imm);
+                // emitIns_R(ins, attr, reg);
+                return;
+            }
+
+            //
+            // Encode the PC and LR bits as the lowest two bits
+            //
+            if (fmt == IF_T2_I0)
+            {
+                imm <<= 2;
+                if (hasPC)
+                    imm |= 2;
+                if (hasLR)
+                    imm |= 1;
+            }
+            assert(imm != 0);
+        }
+        break;
+
+        case INS_and:
+        case INS_bic:
+        case INS_eor:
+        case INS_orr:
+        case INS_orn:
+        case INS_rsb:
+        case INS_sbc:
+
+        case INS_ror:
+        case INS_asr:
+        case INS_lsl:
+        case INS_lsr:
+            // use the Reg, Reg, Imm encoding
+            emitIns_R_R_I(ins, attr, reg, reg, imm, flags);
+            return;
+
+        case INS_mov:
+            assert(!EA_IS_CNS_RELOC(attr));
+
+            if (isLowRegister(reg) && insSetsFlags(flags) && ((imm & 0x00ff) == imm))
+            {
+                fmt = IF_T1_J0;
+                sf  = INS_FLAGS_SET;
+            }
+            else if (isModImmConst(imm))
+            {
+                fmt = IF_T2_L1;
+                sf  = insMustSetFlags(flags);
+            }
+            else if (isModImmConst(~imm)) // See if we can use move negated instruction instead
+            {
+                ins = INS_mvn;
+                imm = ~imm;
+                fmt = IF_T2_L1;
+                sf  = insMustSetFlags(flags);
+            }
+            else if (insDoesNotSetFlags(flags) && ((imm & 0x0000ffff) == imm))
+            {
+                // mov => movw instruction
+                ins = INS_movw;
+                fmt = IF_T2_N;
+                sf  = INS_FLAGS_NOT_SET;
+            }
+            else
+            {
+                assert(!"Instruction cannot be encoded");
+            }
+            break;
+
+        case INS_movw:
+        case INS_movt:
+            assert(insDoesNotSetFlags(flags));
+            sf = INS_FLAGS_NOT_SET;
+            if ((imm & 0x0000ffff) == imm || EA_IS_RELOC(attr))
+            {
+                fmt = IF_T2_N;
+            }
+            else
+            {
+                assert(!"Instruction cannot be encoded");
+            }
+            break;
+
+        case INS_mvn:
+            if (isModImmConst(imm))
+            {
+                fmt = IF_T2_L1;
+                sf  = insMustSetFlags(flags);
+            }
+            else
+            {
+                assert(!"Instruction cannot be encoded");
+            }
+            break;
+
+        case INS_cmp:
+            assert(!EA_IS_CNS_RELOC(attr));
+            assert(insSetsFlags(flags));
+            sf = INS_FLAGS_SET;
+            if (isLowRegister(reg) && ((imm & 0x0ff) == imm))
+            {
+                fmt = IF_T1_J0;
+            }
+            else if (isModImmConst(imm))
+            {
+                fmt = IF_T2_L2;
+            }
+            else if (isModImmConst(-imm))
+            {
+                ins = INS_cmn;
+                fmt = IF_T2_L2;
+                imm = -imm;
+            }
+            else
+            {
+#ifndef LEGACY_BACKEND
+                assert(!"emitIns_R_I: immediate doesn't fit into the instruction");
+#else  // LEGACY_BACKEND
+                // Load val into a register
+                regNumber valReg = codeGen->regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
+                codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, valReg, (ssize_t)imm);
+                emitIns_R_R(ins, attr, reg, valReg, flags);
+#endif // LEGACY_BACKEND
+                return;
+            }
+            break;
+
+        case INS_cmn:
+        case INS_tst:
+        case INS_teq:
+            assert(insSetsFlags(flags));
+            sf = INS_FLAGS_SET;
+            if (isModImmConst(imm))
+            {
+                fmt = IF_T2_L2;
+            }
+            else
+            {
+                assert(!"Instruction cannot be encoded");
+            }
+            break;
+
+#ifdef FEATURE_PLI_INSTRUCTION
+        case INS_pli:
+            assert(insDoesNotSetFlags(flags));
+            if ((reg == REG_SP) && (unsigned_abs(imm) <= 0x0fff))
+            {
+                fmt = IF_T2_K3;
+                sf  = INS_FLAGS_NOT_SET;
+            }
+            __fallthrough;
+#endif // FEATURE_PLI_INSTRUCTION
+
+        case INS_pld:
+        case INS_pldw:
+            assert(insDoesNotSetFlags(flags));
+            sf = INS_FLAGS_NOT_SET;
+            if ((imm >= 0) && (imm <= 0x0fff))
+            {
+                fmt = IF_T2_K2;
+            }
+            else if ((imm < 0) && (-imm <= 0x00ff))
+            {
+                imm = -imm;
+                fmt = IF_T2_H2;
+            }
+            else
+            {
+                assert(!"Instruction cannot be encoded");
+            }
+            break;
+
+        default:
+            unreached();
+    }
+    assert((fmt == IF_T1_F) || (fmt == IF_T1_J0) || (fmt == IF_T1_J1) || (fmt == IF_T2_H2) || (fmt == IF_T2_I0) ||
+           (fmt == IF_T2_K2) || (fmt == IF_T2_K3) || (fmt == IF_T2_L1) || (fmt == IF_T2_L2) || (fmt == IF_T2_M1) ||
+           (fmt == IF_T2_N) || (fmt == IF_T2_VLDST));
+
+    assert(sf != INS_FLAGS_DONT_CARE);
+
+    instrDesc* id  = emitNewInstrSC(attr, imm);
+    insSize    isz = emitInsSize(fmt);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsSize(isz);
+    id->idInsFlags(sf);
+    id->idReg1(reg);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing two registers
+ */
+
+void emitter::emitIns_R_R(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insFlags flags /* = INS_FLAGS_DONT_CARE */)
+
+{
+    emitAttr  size = EA_SIZE(attr);
+    insFormat fmt  = IF_NONE;
+    insFlags  sf   = INS_FLAGS_DONT_CARE;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_add:
+            if (insDoesNotSetFlags(flags))
+            {
+                fmt = IF_T1_D0;
+                sf  = INS_FLAGS_NOT_SET;
+                break;
+            }
+            __fallthrough;
+
+        case INS_sub:
+            // Use the Thumb-1 reg,reg,reg encoding
+            emitIns_R_R_R(ins, attr, reg1, reg1, reg2, flags);
+            return;
+
+        case INS_mov:
+            if (insDoesNotSetFlags(flags))
+            {
+                assert(reg1 != reg2);
+                fmt = IF_T1_D0;
+                sf  = INS_FLAGS_NOT_SET;
+            }
+            else // insSetsFlags(flags)
+            {
+                sf = INS_FLAGS_SET;
+                if (isLowRegister(reg1) && isLowRegister(reg2))
+                {
+                    fmt = IF_T1_E;
+                }
+                else
+                {
+                    fmt = IF_T2_C3;
+                }
+            }
+            break;
+
+        case INS_cmp:
+            assert(insSetsFlags(flags));
+            sf = INS_FLAGS_SET;
+            if (isLowRegister(reg1) && isLowRegister(reg2))
+            {
+                fmt = IF_T1_E; // both are low registers
+            }
+            else
+            {
+                fmt = IF_T1_D0; // one or both are high registers
+            }
+            break;
+
+        case INS_vmov_f2i:
+            assert(isGeneralRegister(reg1));
+            assert(isFloatReg(reg2));
+            fmt = IF_T2_VMOVS;
+            sf  = INS_FLAGS_NOT_SET;
+            break;
+
+        case INS_vmov_i2f:
+            assert(isFloatReg(reg1));
+            assert(isGeneralRegister(reg2));
+            fmt = IF_T2_VMOVS;
+            sf  = INS_FLAGS_NOT_SET;
+            break;
+
+        case INS_vcvt_d2i:
+        case INS_vcvt_d2u:
+        case INS_vcvt_d2f:
+            assert(isFloatReg(reg1));
+            assert(isDoubleReg(reg2));
+            goto VCVT_COMMON;
+
+        case INS_vcvt_f2d:
+        case INS_vcvt_u2d:
+        case INS_vcvt_i2d:
+            assert(isDoubleReg(reg1));
+            assert(isFloatReg(reg2));
+            goto VCVT_COMMON;
+
+        case INS_vcvt_u2f:
+        case INS_vcvt_i2f:
+        case INS_vcvt_f2i:
+        case INS_vcvt_f2u:
+            assert(size == EA_4BYTE);
+            assert(isFloatReg(reg1));
+            assert(isFloatReg(reg2));
+            goto VCVT_COMMON;
+
+        case INS_vmov:
+            assert(reg1 != reg2);
+            __fallthrough;
+
+        case INS_vabs:
+        case INS_vsqrt:
+        case INS_vcmp:
+        case INS_vneg:
+            if (size == EA_8BYTE)
+            {
+                assert(isDoubleReg(reg1));
+                assert(isDoubleReg(reg2));
+            }
+            else
+            {
+                assert(isFloatReg(reg1));
+                assert(isFloatReg(reg2));
+            }
+            __fallthrough;
+
+        VCVT_COMMON:
+            fmt = IF_T2_VFP2;
+            sf  = INS_FLAGS_NOT_SET;
+            break;
+
+        case INS_vadd:
+        case INS_vmul:
+        case INS_vsub:
+        case INS_vdiv:
+            emitIns_R_R_R(ins, attr, reg1, reg1, reg2);
+            return;
+
+        case INS_vldr:
+        case INS_vstr:
+        case INS_ldr:
+        case INS_ldrb:
+        case INS_ldrsb:
+        case INS_ldrh:
+        case INS_ldrsh:
+
+        case INS_str:
+        case INS_strb:
+        case INS_strh:
+            emitIns_R_R_I(ins, attr, reg1, reg2, 0);
+            return;
+
+        case INS_adc:
+        case INS_and:
+        case INS_bic:
+        case INS_eor:
+        case INS_orr:
+        case INS_sbc:
+            if (insSetsFlags(flags) && isLowRegister(reg1) && isLowRegister(reg2))
+            {
+                fmt = IF_T1_E;
+                sf  = INS_FLAGS_SET;
+                break;
+            }
+            __fallthrough;
+
+        case INS_orn:
+            // assert below fired for bug 281892 where the two operands of an OR were
+            // the same static field load which got cse'd.
+            // there's no reason why this assert would be true in general
+            // assert(reg1 != reg2);
+            // Use the Thumb-2 three register encoding
+            emitIns_R_R_R_I(ins, attr, reg1, reg1, reg2, 0, flags);
+            return;
+
+        case INS_asr:
+        case INS_lsl:
+        case INS_lsr:
+        case INS_ror:
+            // assert below fired for bug 296394 where the two operands of an
+            // arithmetic right shift were the same local variable
+            // there's no reason why this assert would be true in general
+            // assert(reg1 != reg2);
+            if (insSetsFlags(flags) && isLowRegister(reg1) && isLowRegister(reg2))
+            {
+                fmt = IF_T1_E;
+                sf  = INS_FLAGS_SET;
+            }
+            else
+            {
+                // Use the Thumb-2 three register encoding
+                emitIns_R_R_R(ins, attr, reg1, reg1, reg2, flags);
+                return;
+            }
+            break;
+
+        case INS_mul:
+            // We will prefer the T2 encoding, unless (flags == INS_FLAGS_SET)
+            // The thumb-1 instruction executes much slower as it must always set the flags
+            //
+            if (insMustSetFlags(flags) && isLowRegister(reg1) && isLowRegister(reg2))
+            {
+                fmt = IF_T1_E;
+                sf  = INS_FLAGS_SET;
+            }
+            else
+            {
+                // Use the Thumb-2 three register encoding
+                emitIns_R_R_R(ins, attr, reg1, reg2, reg1, flags);
+                return;
+            }
+            break;
+
+        case INS_mvn:
+        case INS_cmn:
+        case INS_tst:
+            if (insSetsFlags(flags) && isLowRegister(reg1) && isLowRegister(reg2))
+            {
+                fmt = IF_T1_E;
+                sf  = INS_FLAGS_SET;
+            }
+            else
+            {
+                // Use the Thumb-2 register with shift encoding
+                emitIns_R_R_I(ins, attr, reg1, reg2, 0, flags);
+                return;
+            }
+            break;
+
+        case INS_sxtb:
+        case INS_uxtb:
+            assert(size == EA_1BYTE);
+            goto EXTEND_COMMON;
+
+        case INS_sxth:
+        case INS_uxth:
+            assert(size == EA_2BYTE);
+        EXTEND_COMMON:
+            assert(insDoesNotSetFlags(flags));
+            if (isLowRegister(reg1) && isLowRegister(reg2))
+            {
+                fmt = IF_T1_E;
+                sf  = INS_FLAGS_NOT_SET;
+            }
+            else
+            {
+                // Use the Thumb-2 reg,reg with rotation encoding
+                emitIns_R_R_I(ins, attr, reg1, reg2, 0, INS_FLAGS_NOT_SET);
+                return;
+            }
+            break;
+
+        case INS_tbb:
+            assert(size == EA_1BYTE);
+            assert(insDoesNotSetFlags(flags));
+            fmt = IF_T2_C9;
+            sf  = INS_FLAGS_NOT_SET;
+            break;
+
+        case INS_tbh:
+            assert(size == EA_2BYTE);
+            assert(insDoesNotSetFlags(flags));
+            fmt = IF_T2_C9;
+            sf  = INS_FLAGS_NOT_SET;
+            break;
+
+        case INS_clz:
+            assert(insDoesNotSetFlags(flags));
+            fmt = IF_T2_C10;
+            sf  = INS_FLAGS_NOT_SET;
+            break;
+
+        case INS_ldrexb:
+        case INS_strexb:
+            assert(size == EA_1BYTE);
+            assert(insDoesNotSetFlags(flags));
+            fmt = IF_T2_E1;
+            sf  = INS_FLAGS_NOT_SET;
+            break;
+
+        case INS_ldrexh:
+        case INS_strexh:
+            assert(size == EA_2BYTE);
+            assert(insDoesNotSetFlags(flags));
+            fmt = IF_T2_E1;
+            sf  = INS_FLAGS_NOT_SET;
+            break;
+        default:
+#ifdef DEBUG
+            printf("did not expect instruction %s\n", codeGen->genInsName(ins));
+#endif
+            unreached();
+    }
+
+    assert((fmt == IF_T1_D0) || (fmt == IF_T1_E) || (fmt == IF_T2_C3) || (fmt == IF_T2_C9) || (fmt == IF_T2_C10) ||
+           (fmt == IF_T2_VFP2) || (fmt == IF_T2_VMOVD) || (fmt == IF_T2_VMOVS) || (fmt == IF_T2_E1));
+
+    assert(sf != INS_FLAGS_DONT_CARE);
+
+    instrDesc* id  = emitNewInstrSmall(attr);
+    insSize    isz = emitInsSize(fmt);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsSize(isz);
+    id->idInsFlags(sf);
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing a register and two constants.
+ */
+
+void emitter::emitIns_R_I_I(
+    instruction ins, emitAttr attr, regNumber reg, int imm1, int imm2, insFlags flags /* = INS_FLAGS_DONT_CARE */)
+
+{
+    insFormat fmt = IF_NONE;
+    insFlags  sf  = INS_FLAGS_DONT_CARE;
+    int       imm = 0; // combined immediates
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_bfc:
+        {
+            int lsb = imm1;
+            int msb = lsb + imm2 - 1;
+
+            assert((lsb >= 0) && (lsb <= 31)); // required for encoding of INS_bfc
+            assert((msb >= 0) && (msb <= 31)); // required for encoding of INS_bfc
+            assert(msb >= lsb);                // required for encoding of INS_bfc
+
+            imm = (lsb << 5) | msb;
+
+            assert(insDoesNotSetFlags(flags));
+            fmt = IF_T2_D1;
+            sf  = INS_FLAGS_NOT_SET;
+        }
+        break;
+
+        default:
+            unreached();
+    }
+    assert(fmt == IF_T2_D1);
+    assert(sf != INS_FLAGS_DONT_CARE);
+
+    instrDesc* id  = emitNewInstrSC(attr, imm);
+    insSize    isz = emitInsSize(fmt);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsSize(isz);
+    id->idInsFlags(sf);
+    id->idReg1(reg);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing two registers and a constant.
+ */
+
+void emitter::emitIns_R_R_I(instruction ins,
+                            emitAttr    attr,
+                            regNumber   reg1,
+                            regNumber   reg2,
+                            int         imm,
+                            insFlags    flags /* = INS_FLAGS_DONT_CARE */,
+                            insOpts     opt /* = INS_OPTS_NONE */)
+{
+    emitAttr  size = EA_SIZE(attr);
+    insFormat fmt  = IF_NONE;
+    insFlags  sf   = INS_FLAGS_DONT_CARE;
+
+    if (ins == INS_lea)
+    {
+        ins = INS_add;
+    }
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_add:
+            assert(insOptsNone(opt));
+
+            // Can we possibly encode the immediate 'imm' using a Thumb-1 encoding?
+            if ((reg2 == REG_SP) && insDoesNotSetFlags(flags) && ((imm & 0x03fc) == imm))
+            {
+                if ((reg1 == REG_SP) && ((imm & 0x01fc) == imm))
+                {
+                    // Use Thumb-1 encoding
+                    emitIns_R_I(ins, attr, reg1, imm, flags);
+                    return;
+                }
+                else if (isLowRegister(reg1))
+                {
+                    fmt = IF_T1_J2;
+                    sf  = INS_FLAGS_NOT_SET;
+                    break;
+                }
+            }
+            __fallthrough;
+
+        case INS_sub:
+            assert(insOptsNone(opt));
+
+            // Is it just a mov?
+            if (imm == 0)
+            {
+                // Is the mov even necessary?
+                // Fix 383915 ARM ILGEN
+                if (reg1 != reg2)
+                {
+                    emitIns_R_R(INS_mov, attr, reg1, reg2, flags);
+                }
+                return;
+            }
+            // Can we encode the immediate 'imm' using a Thumb-1 encoding?
+            else if (isLowRegister(reg1) && isLowRegister(reg2) && insSetsFlags(flags) && (unsigned_abs(imm) <= 0x0007))
+            {
+                if (imm < 0)
+                {
+                    assert((ins == INS_add) || (ins == INS_sub));
+                    if (ins == INS_add)
+                        ins = INS_sub;
+                    else
+                        ins = INS_add;
+                    imm     = -imm;
+                }
+                fmt = IF_T1_G;
+                sf  = INS_FLAGS_SET;
+            }
+            else if ((reg1 == reg2) && isLowRegister(reg1) && insSetsFlags(flags) && (unsigned_abs(imm) <= 0x00ff))
+            {
+                if (imm < 0)
+                {
+                    assert((ins == INS_add) || (ins == INS_sub));
+                    if (ins == INS_add)
+                        ins = INS_sub;
+                    else
+                        ins = INS_add;
+                    imm     = -imm;
+                }
+                // Use Thumb-1 encoding
+                emitIns_R_I(ins, attr, reg1, imm, flags);
+                return;
+            }
+            else if (isModImmConst(imm))
+            {
+                fmt = IF_T2_L0;
+                sf  = insMustSetFlags(flags);
+            }
+            else if (isModImmConst(-imm))
+            {
+                assert((ins == INS_add) || (ins == INS_sub));
+                ins = (ins == INS_add) ? INS_sub : INS_add;
+                imm = -imm;
+                fmt = IF_T2_L0;
+                sf  = insMustSetFlags(flags);
+            }
+            else if (insDoesNotSetFlags(flags) && (unsigned_abs(imm) <= 0x0fff))
+            {
+                if (imm < 0)
+                {
+                    assert((ins == INS_add) || (ins == INS_sub));
+                    ins = (ins == INS_add) ? INS_sub : INS_add;
+                    imm = -imm;
+                }
+                // add/sub => addw/subw instruction
+                // Note that even when using the w prefix the immediate is still only 12 bits?
+                ins = (ins == INS_add) ? INS_addw : INS_subw;
+                fmt = IF_T2_M0;
+                sf  = INS_FLAGS_NOT_SET;
+            }
+            else
+            {
+                assert(!"Instruction cannot be encoded");
+            }
+            break;
+
+        case INS_and:
+        case INS_bic:
+        case INS_orr:
+        case INS_orn:
+            assert(insOptsNone(opt));
+            if (isModImmConst(imm))
+            {
+                fmt = IF_T2_L0;
+                sf  = insMustSetFlags(flags);
+            }
+            else if (isModImmConst(~imm))
+            {
+                fmt = IF_T2_L0;
+                sf  = insMustSetFlags(flags);
+                imm = ~imm;
+
+                if (ins == INS_and)
+                    ins = INS_bic;
+                else if (ins == INS_bic)
+                    ins = INS_and;
+                else if (ins == INS_orr)
+                    ins = INS_orn;
+                else if (ins == INS_orn)
+                    ins = INS_orr;
+                else
+                    assert(!"Instruction cannot be encoded");
+            }
+            else
+            {
+                assert(!"Instruction cannot be encoded");
+            }
+            break;
+
+        case INS_rsb:
+            assert(insOptsNone(opt));
+            if (imm == 0 && isLowRegister(reg1) && isLowRegister(reg2) && insSetsFlags(flags))
+            {
+                fmt = IF_T1_E;
+                sf  = INS_FLAGS_SET;
+                break;
+            }
+            __fallthrough;
+
+        case INS_adc:
+        case INS_eor:
+        case INS_sbc:
+            assert(insOptsNone(opt));
+            if (isModImmConst(imm))
+            {
+                fmt = IF_T2_L0;
+                sf  = insMustSetFlags(flags);
+            }
+            else
+            {
+                assert(!"Instruction cannot be encoded");
+            }
+            break;
+
+        case INS_adr:
+            assert(insOptsNone(opt));
+            assert(insDoesNotSetFlags(flags));
+            assert(reg2 == REG_PC);
+            sf = INS_FLAGS_NOT_SET;
+
+            if (isLowRegister(reg1) && ((imm & 0x00ff) == imm))
+            {
+                fmt = IF_T1_J3;
+            }
+            else if ((imm & 0x0fff) == imm)
+            {
+                fmt = IF_T2_M1;
+            }
+            else
+            {
+                assert(!"Instruction cannot be encoded");
+            }
+            break;
+
+        case INS_mvn:
+            assert((imm >= 0) && (imm <= 31)); // required for encoding
+            assert(!insOptAnyInc(opt));
+            if (imm == 0)
+            {
+                assert(insOptsNone(opt));
+                if (isLowRegister(reg1) && isLowRegister(reg2) && insSetsFlags(flags))
+                {
+                    // Use the Thumb-1 reg,reg encoding
+                    emitIns_R_R(ins, attr, reg1, reg2, flags);
+                    return;
+                }
+            }
+            else // imm > 0  &&  imm <= 31
+            {
+                assert(insOptAnyShift(opt));
+            }
+            fmt = IF_T2_C1;
+            sf  = insMustSetFlags(flags);
+            break;
+
+        case INS_cmp:
+        case INS_cmn:
+        case INS_teq:
+        case INS_tst:
+            assert(insSetsFlags(flags));
+            assert((imm >= 0) && (imm <= 31)); // required for encoding
+            assert(!insOptAnyInc(opt));
+            if (imm == 0)
+            {
+                assert(insOptsNone(opt));
+                if (ins == INS_cmp)
+                {
+                    // Use the Thumb-1 reg,reg encoding
+                    emitIns_R_R(ins, attr, reg1, reg2, flags);
+                    return;
+                }
+                if (((ins == INS_cmn) || (ins == INS_tst)) && isLowRegister(reg1) && isLowRegister(reg2))
+                {
+                    // Use the Thumb-1 reg,reg encoding
+                    emitIns_R_R(ins, attr, reg1, reg2, flags);
+                    return;
+                }
+            }
+            else // imm > 0  &&  imm <= 31)
+            {
+                assert(insOptAnyShift(opt));
+                if (insOptsRRX(opt))
+                    assert(imm == 1);
+            }
+
+            fmt = IF_T2_C8;
+            sf  = INS_FLAGS_SET;
+            break;
+
+        case INS_ror:
+        case INS_asr:
+        case INS_lsl:
+        case INS_lsr:
+            assert(insOptsNone(opt));
+
+            // On ARM, the immediate shift count of LSL and ROR must be between 1 and 31. For LSR and ASR, it is between
+            // 1 and 32, though we don't ever use 32. Although x86 allows an immediate shift count of 8-bits in
+            // instruction encoding, the CPU looks at only the lower 5 bits. As per ECMA, specifying a shift count to
+            // the IL SHR, SHL, or SHL.UN instruction that is greater than or equal to the width of the type will yield
+            // an undefined value. We choose that undefined value in this case to match x86 behavior, by only using the
+            // lower 5 bits of the constant shift count.
+            imm &= 0x1f;
+
+            if (imm == 0)
+            {
+                // Additional Fix 383915 ARM ILGEN
+                if ((reg1 != reg2) || insMustSetFlags(flags))
+                {
+                    // Use MOV/MOVS instriction
+                    emitIns_R_R(INS_mov, attr, reg1, reg2, flags);
+                }
+                return;
+            }
+
+            if (insSetsFlags(flags) && (ins != INS_ror) && isLowRegister(reg1) && isLowRegister(reg2))
+            {
+                fmt = IF_T1_C;
+                sf  = INS_FLAGS_SET;
+            }
+            else
+            {
+                fmt = IF_T2_C2;
+                sf  = insMustSetFlags(flags);
+            }
+            break;
+
+        case INS_sxtb:
+        case INS_uxtb:
+            assert(size == EA_1BYTE);
+            goto EXTEND_COMMON;
+
+        case INS_sxth:
+        case INS_uxth:
+            assert(size == EA_2BYTE);
+        EXTEND_COMMON:
+            assert(insOptsNone(opt));
+            assert(insDoesNotSetFlags(flags));
+            assert((imm & 0x018) == imm); // required for encoding
+
+            if ((imm == 0) && isLowRegister(reg1) && isLowRegister(reg2))
+            {
+                // Use Thumb-1 encoding
+                emitIns_R_R(ins, attr, reg1, reg2, INS_FLAGS_NOT_SET);
+                return;
+            }
+
+            fmt = IF_T2_C6;
+            sf  = INS_FLAGS_NOT_SET;
+            break;
+
+        case INS_pld:
+        case INS_pldw:
+#ifdef FEATURE_PLI_INSTRUCTION
+        case INS_pli:
+#endif // FEATURE_PLI_INSTRUCTION
+            assert(insOptsNone(opt));
+            assert(insDoesNotSetFlags(flags));
+            assert((imm & 0x003) == imm); // required for encoding
+
+            fmt = IF_T2_C7;
+            sf  = INS_FLAGS_NOT_SET;
+            break;
+
+        case INS_ldrb:
+        case INS_strb:
+            assert(size == EA_1BYTE);
+            assert(insDoesNotSetFlags(flags));
+
+            if (isLowRegister(reg1) && isLowRegister(reg2) && insOptsNone(opt) && ((imm & 0x001f) == imm))
+            {
+                fmt = IF_T1_C;
+                sf  = INS_FLAGS_NOT_SET;
+                break;
+            }
+            goto COMMON_THUMB2_LDST;
+
+        case INS_ldrsb:
+            assert(size == EA_1BYTE);
+            goto COMMON_THUMB2_LDST;
+
+        case INS_ldrh:
+        case INS_strh:
+            assert(size == EA_2BYTE);
+            assert(insDoesNotSetFlags(flags));
+
+            if (isLowRegister(reg1) && isLowRegister(reg2) && insOptsNone(opt) && ((imm & 0x003e) == imm))
+            {
+                fmt = IF_T1_C;
+                sf  = INS_FLAGS_NOT_SET;
+                break;
+            }
+            goto COMMON_THUMB2_LDST;
+
+        case INS_ldrsh:
+            assert(size == EA_2BYTE);
+            goto COMMON_THUMB2_LDST;
+
+        case INS_vldr:
+        case INS_vstr:
+        case INS_vldm:
+        case INS_vstm:
+            assert(fmt == IF_NONE);
+            assert(insDoesNotSetFlags(flags));
+            assert(offsetFitsInVectorMem(imm)); // required for encoding
+            if (insOptAnyInc(opt))
+            {
+                if (insOptsPostInc(opt))
+                {
+                    assert(imm > 0);
+                }
+                else // insOptsPreDec(opt)
+                {
+                    assert(imm < 0);
+                }
+            }
+            else
+            {
+                assert(insOptsNone(opt));
+            }
+
+            sf  = INS_FLAGS_NOT_SET;
+            fmt = IF_T2_VLDST;
+            break;
+
+        case INS_ldr:
+        case INS_str:
+            assert(size == EA_4BYTE);
+            assert(insDoesNotSetFlags(flags));
+
+            // Can we possibly encode the immediate 'imm' using a Thumb-1 encoding?
+            if (isLowRegister(reg1) && insOptsNone(opt) && ((imm & 0x03fc) == imm))
+            {
+                if (reg2 == REG_SP)
+                {
+                    fmt = IF_T1_J2;
+                    sf  = INS_FLAGS_NOT_SET;
+                    break;
+                }
+                else if (reg2 == REG_PC)
+                {
+                    if (ins == INS_ldr)
+                    {
+                        fmt = IF_T1_J3;
+                        sf  = INS_FLAGS_NOT_SET;
+                        break;
+                    }
+                }
+                else if (isLowRegister(reg2))
+                {
+                    // Only the smaller range 'imm' can be encoded
+                    if ((imm & 0x07c) == imm)
+                    {
+                        fmt = IF_T1_C;
+                        sf  = INS_FLAGS_NOT_SET;
+                        break;
+                    }
+                }
+            }
+            //
+            // If we did not find a thumb-1 encoding above
+            //
+            __fallthrough;
+
+        COMMON_THUMB2_LDST:
+            assert(fmt == IF_NONE);
+            assert(insDoesNotSetFlags(flags));
+            sf = INS_FLAGS_NOT_SET;
+
+            if (insOptAnyInc(opt))
+            {
+                if (insOptsPostInc(opt))
+                    assert(imm > 0);
+                else // insOptsPreDec(opt)
+                    assert(imm < 0);
+
+                if (unsigned_abs(imm) <= 0x00ff)
+                {
+                    fmt = IF_T2_H0;
+                }
+                else
+                {
+                    assert(!"Instruction cannot be encoded");
+                }
+            }
+            else
+            {
+                assert(insOptsNone(opt));
+                if ((reg2 == REG_PC) && (unsigned_abs(imm) <= 0x0fff))
+                {
+                    fmt = IF_T2_K4;
+                }
+                else if ((imm & 0x0fff) == imm)
+                {
+                    fmt = IF_T2_K1;
+                }
+                else if (unsigned_abs(imm) <= 0x0ff)
+                {
+                    fmt = IF_T2_H0;
+                }
+                else
+                {
+                    // Load imm into a register
+                    regNumber rsvdReg = codeGen->rsGetRsvdReg();
+                    codeGen->instGen_Set_Reg_To_Imm(EA_4BYTE, rsvdReg, (ssize_t)imm);
+                    emitIns_R_R_R(ins, attr, reg1, reg2, rsvdReg);
+                    return;
+                }
+            }
+            break;
+
+        case INS_ldrex:
+        case INS_strex:
+            assert(insOptsNone(opt));
+            assert(insDoesNotSetFlags(flags));
+            sf = INS_FLAGS_NOT_SET;
+
+            if ((imm & 0x03fc) == imm)
+            {
+                fmt = IF_T2_H0;
+            }
+            else
+            {
+                assert(!"Instruction cannot be encoded");
+            }
+            break;
+
+        default:
+            assert(!"Unexpected instruction");
+    }
+    assert((fmt == IF_T1_C) || (fmt == IF_T1_E) || (fmt == IF_T1_G) || (fmt == IF_T1_J2) || (fmt == IF_T1_J3) ||
+           (fmt == IF_T2_C1) || (fmt == IF_T2_C2) || (fmt == IF_T2_C6) || (fmt == IF_T2_C7) || (fmt == IF_T2_C8) ||
+           (fmt == IF_T2_H0) || (fmt == IF_T2_H1) || (fmt == IF_T2_K1) || (fmt == IF_T2_K4) || (fmt == IF_T2_L0) ||
+           (fmt == IF_T2_M0) || (fmt == IF_T2_VLDST) || (fmt == IF_T2_M1));
+    assert(sf != INS_FLAGS_DONT_CARE);
+
+    instrDesc* id  = emitNewInstrSC(attr, imm);
+    insSize    isz = emitInsSize(fmt);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsSize(isz);
+    id->idInsFlags(sf);
+    id->idInsOpt(opt);
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing three registers.
+ */
+
+void emitter::emitIns_R_R_R(instruction ins,
+                            emitAttr    attr,
+                            regNumber   reg1,
+                            regNumber   reg2,
+                            regNumber   reg3,
+                            insFlags    flags /* = INS_FLAGS_DONT_CARE */)
+{
+    emitAttr  size = EA_SIZE(attr);
+    insFormat fmt  = IF_NONE;
+    insFlags  sf   = INS_FLAGS_DONT_CARE;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_add:
+            // Encodings do not support SP in the reg3 slot
+            if (reg3 == REG_SP)
+            {
+                // Swap reg2 and reg3
+                reg3 = reg2;
+                reg2 = REG_SP;
+            }
+            __fallthrough;
+
+        case INS_sub:
+            assert(reg3 != REG_SP);
+
+            if (isLowRegister(reg1) && isLowRegister(reg2) && isLowRegister(reg3) && insSetsFlags(flags))
+            {
+                fmt = IF_T1_H;
+                sf  = INS_FLAGS_SET;
+                break;
+            }
+
+            if ((ins == INS_add) && insDoesNotSetFlags(flags))
+            {
+                if (reg1 == reg2)
+                {
+                    // Use the Thumb-1 regdest,reg encoding
+                    emitIns_R_R(ins, attr, reg1, reg3, flags);
+                    return;
+                }
+                if (reg1 == reg3)
+                {
+                    // Use the Thumb-1 regdest,reg encoding
+                    emitIns_R_R(ins, attr, reg1, reg2, flags);
+                    return;
+                }
+            }
+
+            // Use the Thumb-2 reg,reg,reg with shift encoding
+            emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, 0, flags);
+            return;
+
+        case INS_adc:
+        case INS_and:
+        case INS_bic:
+        case INS_eor:
+        case INS_orr:
+        case INS_sbc:
+            if (reg1 == reg2)
+            {
+                // Try to encode as a Thumb-1 instruction
+                emitIns_R_R(ins, attr, reg1, reg3, flags);
+                return;
+            }
+            __fallthrough;
+
+        case INS_orn:
+            // Use the Thumb-2 three register encoding, with imm=0
+            emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, 0, flags);
+            return;
+
+        case INS_asr:
+        case INS_lsl:
+        case INS_lsr:
+            if (reg1 == reg2 && insSetsFlags(flags) && isLowRegister(reg1) && isLowRegister(reg3))
+            {
+                // Use the Thumb-1 regdest,reg encoding
+                emitIns_R_R(ins, attr, reg1, reg3, flags);
+                return;
+            }
+            __fallthrough;
+
+        case INS_ror:
+            fmt = IF_T2_C4;
+            sf  = insMustSetFlags(flags);
+            break;
+
+        case INS_mul:
+            if (insMustSetFlags(flags))
+            {
+                if ((reg1 == reg2) && isLowRegister(reg1))
+                {
+                    // Use the Thumb-1 regdest,reg encoding
+                    emitIns_R_R(ins, attr, reg1, reg3, flags);
+                    return;
+                }
+                if ((reg1 == reg3) && isLowRegister(reg1))
+                {
+                    // Use the Thumb-1 regdest,reg encoding
+                    emitIns_R_R(ins, attr, reg1, reg2, flags);
+                    return;
+                }
+                else
+                {
+                    assert(!"Instruction cannot be encoded");
+                }
+            }
+            __fallthrough;
+
+        case INS_sdiv:
+        case INS_udiv:
+            assert(insDoesNotSetFlags(flags));
+            fmt = IF_T2_C5;
+            sf  = INS_FLAGS_NOT_SET;
+            break;
+
+        case INS_ldrb:
+        case INS_strb:
+        case INS_ldrsb:
+            assert(size == EA_1BYTE);
+            goto COMMON_THUMB1_LDST;
+
+        case INS_ldrsh:
+        case INS_ldrh:
+        case INS_strh:
+            assert(size == EA_2BYTE);
+            goto COMMON_THUMB1_LDST;
+
+        case INS_ldr:
+        case INS_str:
+            assert(size == EA_4BYTE);
+
+        COMMON_THUMB1_LDST:
+            assert(insDoesNotSetFlags(flags));
+
+            if (isLowRegister(reg1) && isLowRegister(reg2) && isLowRegister(reg3))
+            {
+                fmt = IF_T1_H;
+                sf  = INS_FLAGS_NOT_SET;
+            }
+            else
+            {
+                // Use the Thumb-2 reg,reg,reg with shift encoding
+                emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, 0, flags);
+                return;
+            }
+            break;
+
+        case INS_vadd:
+        case INS_vmul:
+        case INS_vsub:
+        case INS_vdiv:
+            if (size == EA_8BYTE)
+            {
+                assert(isDoubleReg(reg1));
+                assert(isDoubleReg(reg2));
+                assert(isDoubleReg(reg3));
+            }
+            else
+            {
+                assert(isFloatReg(reg1));
+                assert(isFloatReg(reg2));
+                assert(isFloatReg(reg3));
+            }
+            fmt = IF_T2_VFP3;
+            sf  = INS_FLAGS_NOT_SET;
+            break;
+
+        case INS_vmov_i2d:
+            assert(isDoubleReg(reg1));
+            assert(isGeneralRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            fmt = IF_T2_VMOVD;
+            sf  = INS_FLAGS_NOT_SET;
+            break;
+
+        case INS_vmov_d2i:
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegister(reg2));
+            assert(isDoubleReg(reg3));
+            fmt = IF_T2_VMOVD;
+            sf  = INS_FLAGS_NOT_SET;
+            break;
+
+        case INS_ldrexd:
+        case INS_strexd:
+            assert(insDoesNotSetFlags(flags));
+            fmt = IF_T2_G1;
+            sf  = INS_FLAGS_NOT_SET;
+            break;
+
+        default:
+            unreached();
+    }
+    assert((fmt == IF_T1_H) || (fmt == IF_T2_C4) || (fmt == IF_T2_C5) || (fmt == IF_T2_VFP3) || (fmt == IF_T2_VMOVD) ||
+           (fmt == IF_T2_G1));
+    assert(sf != INS_FLAGS_DONT_CARE);
+
+    instrDesc* id  = emitNewInstr(attr);
+    insSize    isz = emitInsSize(fmt);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsSize(isz);
+    id->idInsFlags(sf);
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idReg3(reg3);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing two registers and two constants.
+ */
+
+void emitter::emitIns_R_R_I_I(instruction ins,
+                              emitAttr    attr,
+                              regNumber   reg1,
+                              regNumber   reg2,
+                              int         imm1,
+                              int         imm2,
+                              insFlags    flags /* = INS_FLAGS_DONT_CARE */)
+{
+    insFormat fmt = IF_NONE;
+    insFlags  sf  = INS_FLAGS_DONT_CARE;
+
+    int lsb   = imm1;
+    int width = imm2;
+    int msb   = lsb + width - 1;
+    int imm   = 0; /* combined immediate */
+
+    assert((lsb >= 0) && (lsb <= 31));    // required for encodings
+    assert((width > 0) && (width <= 32)); // required for encodings
+    assert((msb >= 0) && (msb <= 31));    // required for encodings
+    assert(msb >= lsb);                   // required for encodings
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_bfi:
+            assert(insDoesNotSetFlags(flags));
+            imm = (lsb << 5) | msb;
+
+            fmt = IF_T2_D0;
+            sf  = INS_FLAGS_NOT_SET;
+            break;
+
+        case INS_sbfx:
+        case INS_ubfx:
+            assert(insDoesNotSetFlags(flags));
+            imm = (lsb << 5) | (width - 1);
+
+            fmt = IF_T2_D0;
+            sf  = INS_FLAGS_NOT_SET;
+            break;
+
+        default:
+            unreached();
+    }
+    assert((fmt == IF_T2_D0));
+    assert(sf != INS_FLAGS_DONT_CARE);
+
+    instrDesc* id  = emitNewInstrSC(attr, imm);
+    insSize    isz = emitInsSize(fmt);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsSize(isz);
+    id->idInsFlags(sf);
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing three registers and a constant.
+ */
+
+void emitter::emitIns_R_R_R_I(instruction ins,
+                              emitAttr    attr,
+                              regNumber   reg1,
+                              regNumber   reg2,
+                              regNumber   reg3,
+                              int         imm,
+                              insFlags    flags /* = INS_FLAGS_DONT_CARE */,
+                              insOpts     opt /* = INS_OPTS_NONE */)
+{
+    emitAttr  size = EA_SIZE(attr);
+    insFormat fmt  = IF_NONE;
+    insFlags  sf   = INS_FLAGS_DONT_CARE;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+
+        case INS_add:
+        case INS_sub:
+            if (imm == 0)
+            {
+                if (isLowRegister(reg1) && isLowRegister(reg2) && isLowRegister(reg3) && insSetsFlags(flags))
+                {
+                    // Use the Thumb-1 reg,reg,reg encoding
+                    emitIns_R_R_R(ins, attr, reg1, reg2, reg3, flags);
+                    return;
+                }
+                if ((ins == INS_add) && insDoesNotSetFlags(flags))
+                {
+                    if (reg1 == reg2)
+                    {
+                        // Use the Thumb-1 regdest,reg encoding
+                        emitIns_R_R(ins, attr, reg1, reg3, flags);
+                        return;
+                    }
+                    if (reg1 == reg3)
+                    {
+                        // Use the Thumb-1 regdest,reg encoding
+                        emitIns_R_R(ins, attr, reg1, reg2, flags);
+                        return;
+                    }
+                }
+            }
+            __fallthrough;
+
+        case INS_adc:
+        case INS_and:
+        case INS_bic:
+        case INS_eor:
+        case INS_orn:
+        case INS_orr:
+        case INS_sbc:
+            assert((imm >= 0) && (imm <= 31)); // required for encoding
+            assert(!insOptAnyInc(opt));
+            if (imm == 0)
+            {
+                if (opt == INS_OPTS_LSL) // left shift of zero
+                    opt = INS_OPTS_NONE; //           is a nop
+
+                assert(insOptsNone(opt));
+                if (isLowRegister(reg1) && isLowRegister(reg2) && isLowRegister(reg3) && insSetsFlags(flags))
+                {
+                    if (reg1 == reg2)
+                    {
+                        // Use the Thumb-1 regdest,reg encoding
+                        emitIns_R_R(ins, attr, reg1, reg3, flags);
+                        return;
+                    }
+                    if ((reg1 == reg3) && (ins != INS_bic) && (ins != INS_orn) && (ins != INS_sbc))
+                    {
+                        // Use the Thumb-1 regdest,reg encoding
+                        emitIns_R_R(ins, attr, reg1, reg2, flags);
+                        return;
+                    }
+                }
+            }
+            else // imm > 0  &&  imm <= 31)
+            {
+                assert(insOptAnyShift(opt));
+                if (insOptsRRX(opt))
+                    assert(imm == 1);
+            }
+            fmt = IF_T2_C0;
+            sf  = insMustSetFlags(flags);
+            break;
+
+        case INS_ldrb:
+        case INS_ldrsb:
+        case INS_strb:
+            assert(size == EA_1BYTE);
+            goto COMMON_THUMB2_LDST;
+
+        case INS_ldrh:
+        case INS_ldrsh:
+        case INS_strh:
+            assert(size == EA_2BYTE);
+            goto COMMON_THUMB2_LDST;
+
+        case INS_ldr:
+        case INS_str:
+            assert(size == EA_4BYTE);
+
+        COMMON_THUMB2_LDST:
+            assert(insDoesNotSetFlags(flags));
+            assert((imm & 0x0003) == imm); // required for encoding
+
+            if ((imm == 0) && insOptsNone(opt) && isLowRegister(reg1) && isLowRegister(reg2) && isLowRegister(reg3))
+            {
+                // Use the Thumb-1 reg,reg,reg encoding
+                emitIns_R_R_R(ins, attr, reg1, reg2, reg3, flags);
+                return;
+            }
+            assert(insOptsNone(opt) || insOptsLSL(opt));
+            fmt = IF_T2_E0;
+            sf  = INS_FLAGS_NOT_SET;
+            break;
+
+        case INS_ldrd:
+        case INS_strd:
+            assert(insDoesNotSetFlags(flags));
+            assert((imm & 0x03) == 0);
+            sf = INS_FLAGS_NOT_SET;
+
+            if (insOptAnyInc(opt))
+            {
+                if (insOptsPostInc(opt))
+                    assert(imm > 0);
+                else // insOptsPreDec(opt)
+                    assert(imm < 0);
+            }
+            else
+            {
+                assert(insOptsNone(opt));
+            }
+
+            if (unsigned_abs(imm) <= 0x03fc)
+            {
+                imm >>= 2;
+                fmt = IF_T2_G0;
+            }
+            else
+            {
+                assert(!"Instruction cannot be encoded");
+            }
+            break;
+
+        default:
+            unreached();
+    }
+    assert((fmt == IF_T2_C0) || (fmt == IF_T2_E0) || (fmt == IF_T2_G0));
+    assert(sf != INS_FLAGS_DONT_CARE);
+
+    // 3-reg ops can't use the small instrdesc
+    instrDescCns* id = emitAllocInstrCns(attr);
+    id->idSetIsLargeCns();
+    id->idcCnsVal = imm;
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsSize(emitInsSize(fmt));
+
+    id->idInsFlags(sf);
+    id->idInsOpt(opt);
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idReg3(reg3);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing four registers.
+ */
+
+void emitter::emitIns_R_R_R_R(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, regNumber reg4)
+{
+    insFormat fmt = IF_NONE;
+    insFlags  sf  = INS_FLAGS_NOT_SET;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+
+        case INS_smull:
+        case INS_umull:
+        case INS_smlal:
+        case INS_umlal:
+            assert(reg1 != reg2); // Illegal encoding
+            fmt = IF_T2_F1;
+            break;
+        case INS_mla:
+        case INS_mls:
+            fmt = IF_T2_F2;
+            break;
+        default:
+            unreached();
+    }
+    assert((fmt == IF_T2_F1) || (fmt == IF_T2_F2));
+
+    instrDesc* id  = emitNewInstr(attr);
+    insSize    isz = emitInsSize(fmt);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsSize(isz);
+    id->idInsFlags(sf);
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idReg3(reg3);
+    id->idReg4(reg4);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction with a static data member operand. If 'size' is 0, the
+ *  instruction operates on the address of the static member instead of its
+ *  value (e.g. "push offset clsvar", rather than "push dword ptr [clsvar]").
+ */
+
+void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs)
+{
+    NYI("emitIns_C");
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing stack-based local variable.
+ */
+
+void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs)
+{
+    NYI("emitIns_S");
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing a register and a stack-based local variable.
+ */
+void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs)
+{
+    if (ins == INS_mov)
+    {
+        assert(!"Please use ins_Load() to select the correct instruction");
+    }
+
+    switch (ins)
+    {
+        case INS_add:
+        case INS_ldr:
+        case INS_ldrh:
+        case INS_ldrb:
+        case INS_ldrsh:
+        case INS_ldrsb:
+        case INS_vldr:
+        case INS_vmov:
+        case INS_movw:
+        case INS_movt:
+            break;
+
+        case INS_lea:
+            ins = INS_add;
+            break;
+
+        default:
+            NYI("emitIns_R_S");
+            return;
+    }
+
+    insFormat fmt = IF_NONE;
+    insFlags  sf  = INS_FLAGS_NOT_SET;
+    regNumber reg2;
+
+    /* Figure out the variable's frame position */
+    int      base;
+    int      disp;
+    unsigned undisp;
+
+    base = emitComp->lvaFrameAddress(varx, emitComp->funCurrentFunc()->funKind != FUNC_ROOT, &reg2, offs);
+
+    disp   = base + offs;
+    undisp = unsigned_abs(disp);
+
+    if (CodeGen::instIsFP(ins))
+    {
+        // all fp mem ops take 8 bit immediate, multiplied by 4, plus sign
+        //
+        // Note if undisp is not a multiple of four we will fail later on
+        //   when we try to encode this instruction
+        // Its better to fail later with a better error message than
+        //   to fail here when the RBM_OPT_RSVD is not available
+        //
+        if (undisp <= 0x03fb)
+        {
+            fmt = IF_T2_VLDST;
+        }
+        else
+        {
+            regNumber rsvdReg = codeGen->rsGetRsvdReg();
+            emitIns_genStackOffset(rsvdReg, varx, offs);
+            emitIns_R_R(INS_add, EA_4BYTE, rsvdReg, reg2);
+            emitIns_R_R_I(ins, attr, reg1, rsvdReg, 0);
+            return;
+        }
+    }
+    else if (emitInsIsLoadOrStore(ins))
+    {
+        if (isLowRegister(reg1) && (reg2 == REG_SP) && (ins == INS_ldr) && ((disp & 0x03fc) == disp && disp <= 0x03f8))
+        {
+            fmt = IF_T1_J2;
+        }
+        else if (disp >= 0 && disp <= 0x0ffb)
+        {
+            fmt = IF_T2_K1;
+        }
+        else if (undisp <= 0x0fb)
+        {
+            fmt = IF_T2_H0;
+        }
+        else
+        {
+            // Load disp into a register
+            regNumber rsvdReg = codeGen->rsGetRsvdReg();
+            emitIns_genStackOffset(rsvdReg, varx, offs);
+            fmt = IF_T2_E0;
+        }
+    }
+    else if (ins == INS_add)
+    {
+        if (isLowRegister(reg1) && (reg2 == REG_SP) && ((disp & 0x03fc) == disp && disp <= 0x03f8))
+        {
+            fmt = IF_T1_J2;
+        }
+        else if (undisp <= 0x0ffb)
+        {
+            if (disp < 0)
+            {
+                ins  = INS_sub;
+                disp = -disp;
+            }
+            // add/sub => addw/subw instruction
+            // Note that even when using the w prefix the immediate is still only 12 bits?
+            ins = (ins == INS_add) ? INS_addw : INS_subw;
+            fmt = IF_T2_M0;
+        }
+        else
+        {
+            // Load disp into a register
+            regNumber rsvdReg = codeGen->rsGetRsvdReg();
+            emitIns_genStackOffset(rsvdReg, varx, offs);
+            emitIns_R_R_R(ins, attr, reg1, reg2, rsvdReg);
+            return;
+        }
+    }
+    else if (ins == INS_movw || ins == INS_movt)
+    {
+        fmt = IF_T2_N;
+    }
+
+    assert((fmt == IF_T1_J2) || (fmt == IF_T2_E0) || (fmt == IF_T2_H0) || (fmt == IF_T2_K1) || (fmt == IF_T2_L0) ||
+           (fmt == IF_T2_N) || (fmt == IF_T2_VLDST) || (fmt == IF_T2_M0));
+    assert(sf != INS_FLAGS_DONT_CARE);
+
+    instrDesc* id  = emitNewInstrCns(attr, disp);
+    insSize    isz = emitInsSize(fmt);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsSize(isz);
+    id->idInsFlags(sf);
+    id->idInsOpt(INS_OPTS_NONE);
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
+    id->idSetIsLclVar();
+    if (reg2 == REG_FP)
+        id->idSetIsLclFPBase();
+
+#ifdef DEBUG
+    id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
+#endif
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+// generate the offset of &varx + offs into a register
+void emitter::emitIns_genStackOffset(regNumber r, int varx, int offs)
+{
+    regNumber regBase;
+    int       base;
+    int       disp;
+
+    base = emitComp->lvaFrameAddress(varx, emitComp->funCurrentFunc()->funKind != FUNC_ROOT, &regBase, offs);
+    disp = base + offs;
+
+    emitIns_R_S(INS_movw, EA_4BYTE, r, varx, offs);
+
+    if ((disp & 0xffff) != disp)
+    {
+        emitIns_R_S(INS_movt, EA_4BYTE, r, varx, offs);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing a stack-based local variable and a register
+ */
+void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs)
+{
+    if (ins == INS_mov)
+    {
+        assert(!"Please use ins_Store() to select the correct instruction");
+    }
+
+    switch (ins)
+    {
+        case INS_str:
+        case INS_strh:
+        case INS_strb:
+        case INS_vstr:
+            break;
+
+        default:
+            NYI("emitIns_R_S");
+            return;
+    }
+
+    insFormat fmt = IF_NONE;
+    insFlags  sf  = INS_FLAGS_NOT_SET;
+    regNumber reg2;
+
+    /* Figure out the variable's frame position */
+    int      base;
+    int      disp;
+    unsigned undisp;
+
+    base = emitComp->lvaFrameAddress(varx, emitComp->funCurrentFunc()->funKind != FUNC_ROOT, &reg2, offs);
+
+    disp   = base + offs;
+    undisp = unsigned_abs(disp);
+
+    if (CodeGen::instIsFP(ins))
+    {
+        // all fp mem ops take 8 bit immediate, multiplied by 4, plus sign
+        //
+        // Note if undisp is not a multiple of four we will fail later on
+        //   when we try to encode this instruction
+        // Its better to fail later with a better error message than
+        //   to fail here when the RBM_OPT_RSVD is not available
+        //
+        if (undisp <= 0x03fb)
+        {
+            fmt = IF_T2_VLDST;
+        }
+        else
+        {
+            regNumber rsvdReg = codeGen->rsGetRsvdReg();
+            emitIns_genStackOffset(rsvdReg, varx, offs);
+            emitIns_R_R(INS_add, EA_4BYTE, rsvdReg, reg2);
+            emitIns_R_R_I(ins, attr, reg1, rsvdReg, 0);
+            return;
+        }
+    }
+    else if (isLowRegister(reg1) && (reg2 == REG_SP) && (ins == INS_str) && ((disp & 0x03fc) == disp && disp <= 0x03f8))
+    {
+        fmt = IF_T1_J2;
+    }
+    else if (disp >= 0 && disp <= 0x0ffb)
+    {
+        fmt = IF_T2_K1;
+    }
+    else if (undisp <= 0x0fb)
+    {
+        fmt = IF_T2_H0;
+    }
+    else
+    {
+        // Load disp into a register
+        regNumber rsvdReg = codeGen->rsGetRsvdReg();
+        emitIns_genStackOffset(rsvdReg, varx, offs);
+        fmt = IF_T2_E0;
+    }
+    assert((fmt == IF_T1_J2) || (fmt == IF_T2_E0) || (fmt == IF_T2_H0) || (fmt == IF_T2_VLDST) || (fmt == IF_T2_K1));
+    assert(sf != INS_FLAGS_DONT_CARE);
+
+    instrDesc* id  = emitNewInstrCns(attr, disp);
+    insSize    isz = emitInsSize(fmt);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsSize(isz);
+    id->idInsFlags(sf);
+    id->idInsOpt(INS_OPTS_NONE);
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
+    id->idSetIsLclVar();
+    if (reg2 == REG_FP)
+        id->idSetIsLclFPBase();
+#ifdef DEBUG
+    id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
+#endif
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing stack-based local variable and an immediate
+ */
+void emitter::emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val)
+{
+    NYI("emitIns_S_I");
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction with a register + static member operands.
+ */
+void emitter::emitIns_R_C(instruction ins, emitAttr attr, regNumber reg, CORINFO_FIELD_HANDLE fldHnd, int offs)
+{
+    if (ins == INS_mov)
+    {
+        assert(!"Please use ins_Load() to select the correct instruction");
+    }
+    assert(emitInsIsLoad(ins) || (ins == INS_lea));
+    if (ins == INS_lea)
+    {
+        ins = INS_add;
+    }
+
+    int     doff = Compiler::eeGetJitDataOffs(fldHnd);
+    ssize_t addr = NULL;
+
+    if (doff >= 0)
+    {
+        NYI_ARM("JitDataOffset static fields");
+    }
+    else if (fldHnd == FLD_GLOBAL_FS)
+    {
+        NYI_ARM("Thread-Local-Storage static fields");
+    }
+    else if (fldHnd == FLD_GLOBAL_DS)
+    {
+        addr = (ssize_t)offs;
+        offs = 0;
+    }
+    else
+    {
+        assert(!jitStaticFldIsGlobAddr(fldHnd));
+        addr = (ssize_t)emitComp->info.compCompHnd->getFieldAddress(fldHnd, NULL);
+        if (addr == NULL)
+            NO_WAY("could not obtain address of static field");
+    }
+
+    // We can use reg to load the constant address,
+    //  as long as it is not a floating point register
+    regNumber regTmp = reg;
+
+    if (isFloatReg(regTmp))
+    {
+#ifndef LEGACY_BACKEND
+        assert(!"emitIns_R_C() cannot be called with floating point target");
+#else  // LEGACY_BACKEND
+        regTmp = codeGen->regSet.rsPickFreeReg(RBM_ALLINT & ~genRegMask(reg));
+#endif // LEGACY_BACKEND
+    }
+
+    // Load address of CLS_VAR into a register
+    codeGen->instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regTmp, addr);
+
+    if ((ins != INS_add) || (offs != 0) || (reg != regTmp))
+    {
+        emitIns_R_R_I(ins, attr, reg, regTmp, offs);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction with a static member + register operands.
+ */
+
+void emitter::emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs)
+{
+#ifndef LEGACY_BACKEND
+    assert(!"emitIns_C_R not supported for RyuJIT backend");
+#else  // LEGACY_BACKEND
+    if (ins == INS_mov)
+    {
+        assert(!"Please use ins_Store() to select the correct instruction");
+    }
+    assert(emitInsIsStore(ins));
+
+    int     doff = Compiler::eeGetJitDataOffs(fldHnd);
+    ssize_t addr = NULL;
+
+    if (doff >= 0)
+    {
+        NYI_ARM("JitDataOffset static fields");
+    }
+    else if (fldHnd == FLD_GLOBAL_FS)
+    {
+        NYI_ARM("Thread-Local-Storage static fields");
+    }
+    else if (fldHnd == FLD_GLOBAL_DS)
+    {
+        addr = (ssize_t)offs;
+        offs = 0;
+    }
+    else
+    {
+        assert(!jitStaticFldIsGlobAddr(fldHnd));
+        addr = (ssize_t)emitComp->info.compCompHnd->getFieldAddress(fldHnd, NULL);
+        if (addr == NULL)
+            NO_WAY("could not obtain address of static field");
+    }
+
+    regNumber regTmp = codeGen->regSet.rsPickFreeReg(RBM_ALLINT & ~genRegMask(reg));
+
+    // Load address of CLS_VAR into a register
+    codeGen->instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, regTmp, addr);
+
+    emitIns_R_R_I(ins, attr, reg, regTmp, offs);
+#endif // LEGACY_BACKEND
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction with a static member + constant.
+ */
+
+void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs, ssize_t val)
+{
+    NYI("emitIns_C_I");
+}
+
+/*****************************************************************************
+ *
+ *  The following adds instructions referencing address modes.
+ */
+
+void emitter::emitIns_I_AR(
+    instruction ins, emitAttr attr, int val, regNumber reg, int offs, int memCookie, void* clsCookie)
+{
+    NYI("emitIns_I_AR");
+}
+
+void emitter::emitIns_R_AR(instruction ins,
+                           emitAttr    attr,
+                           regNumber   ireg,
+                           regNumber   reg,
+                           int         offs,
+                           int         memCookie /* = 0 */,
+                           void*       clsCookie /* = NULL */)
+{
+    if (ins == INS_mov)
+    {
+        assert(!"Please use ins_Load() to select the correct instruction");
+    }
+
+    if (ins == INS_lea)
+    {
+        if (emitter::emitIns_valid_imm_for_add(offs, INS_FLAGS_DONT_CARE))
+        {
+            emitIns_R_R_I(INS_add, attr, ireg, reg, offs);
+        }
+        else
+        {
+#ifndef LEGACY_BACKEND
+            assert(!"emitIns_R_AR: immediate doesn't fit in the instruction");
+#else  // LEGACY_BACKEND
+            // Load val into a register
+            regNumber immReg = codeGen->regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(ireg) & ~genRegMask(reg));
+            codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, immReg, (ssize_t)offs);
+            emitIns_R_R_R(INS_add, attr, ireg, reg, immReg);
+#endif // LEGACY_BACKEND
+        }
+        return;
+    }
+    else if (emitInsIsLoad(ins))
+    {
+        emitIns_R_R_I(ins, attr, ireg, reg, offs);
+        return;
+    }
+    else if ((ins == INS_mov) || (ins == INS_ldr))
+    {
+        if (EA_SIZE(attr) == EA_4BYTE)
+        {
+            emitIns_R_R_I(INS_ldr, attr, ireg, reg, offs);
+            return;
+        }
+    }
+    else if (ins == INS_vldr)
+    {
+        emitIns_R_R_I(ins, attr, ireg, reg, offs);
+    }
+    NYI("emitIns_R_AR");
+}
+
+void emitter::emitIns_R_AI(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp)
+{
+    if (emitInsIsLoad(ins))
+    {
+        // We can use ireg to load the constant address,
+        //  as long as it is not a floating point register
+        regNumber regTmp = ireg;
+
+        if (isFloatReg(regTmp))
+        {
+#ifndef LEGACY_BACKEND
+            assert(!"emitIns_R_AI with floating point reg");
+#else  // LEGACY_BACKEND
+            regTmp = codeGen->regSet.rsPickFreeReg(RBM_ALLINT & ~genRegMask(ireg));
+#endif // LEGACY_BACKEND
+        }
+
+        codeGen->instGen_Set_Reg_To_Imm(EA_IS_RELOC(attr) ? EA_HANDLE_CNS_RELOC : EA_PTRSIZE, regTmp, disp);
+        emitIns_R_R_I(ins, EA_TYPE(attr), ireg, regTmp, 0);
+        return;
+    }
+    NYI("emitIns_R_AI");
+}
+
+void emitter::emitIns_AR_R(instruction ins,
+                           emitAttr    attr,
+                           regNumber   ireg,
+                           regNumber   reg,
+                           int         offs,
+                           int         memCookie /* = 0 */,
+                           void*       clsCookie /* = NULL */)
+{
+    if (ins == INS_mov)
+    {
+        assert(!"Please use ins_Store() to select the correct instruction");
+    }
+    emitIns_R_R_I(ins, attr, ireg, reg, offs);
+}
+
+void emitter::emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp)
+{
+    if (ins == INS_mov)
+    {
+        assert(!"Please use ins_Load() to select the correct instruction");
+    }
+
+    if (ins == INS_lea)
+    {
+        emitIns_R_R_R(INS_add, attr, ireg, reg, rg2);
+        if (disp != 0)
+        {
+            emitIns_R_R_I(INS_add, attr, ireg, ireg, disp);
+        }
+        return;
+    }
+    else if (emitInsIsLoad(ins))
+    {
+        if (disp == 0)
+        {
+            emitIns_R_R_R_I(ins, attr, ireg, reg, rg2, 0, INS_FLAGS_DONT_CARE, INS_OPTS_NONE);
+            return;
+        }
+    }
+    assert(!"emitIns_R_ARR: Unexpected instruction");
+}
+
+void emitter::emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp)
+{
+    if (ins == INS_mov)
+    {
+        assert(!"Please use ins_Store() to select the correct instruction");
+    }
+    if (emitInsIsStore(ins))
+    {
+        if (disp == 0)
+        {
+            emitIns_R_R_R(ins, attr, ireg, reg, rg2);
+        }
+        else
+        {
+            emitIns_R_R_R(INS_add, attr, ireg, reg, rg2);
+            emitIns_R_R_I(ins, attr, ireg, ireg, disp);
+        }
+        return;
+    }
+    assert(!"emitIns_ARR_R: Unexpected instruction");
+}
+
+void emitter::emitIns_R_ARX(
+    instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, unsigned mul, int disp)
+{
+    if (ins == INS_mov)
+    {
+        assert(!"Please use ins_Load() to select the correct instruction");
+    }
+
+    unsigned shift = genLog2((unsigned)mul);
+
+    if ((ins == INS_lea) || emitInsIsLoad(ins))
+    {
+        if (ins == INS_lea)
+        {
+            ins = INS_add;
+        }
+        if (disp == 0)
+        {
+            emitIns_R_R_R_I(ins, attr, ireg, reg, rg2, (int)shift, INS_FLAGS_DONT_CARE, INS_OPTS_LSL);
+            return;
+        }
+        else
+        {
+            bool useForm2     = false;
+            bool mustUseForm1 = ((disp % mul) != 0) || (reg == ireg);
+            if (!mustUseForm1)
+            {
+                // If all of the below things are true we can generate a Thumb-1 add instruction
+                //  followed by a Thumb-2 add instruction
+                // We also useForm1 when reg is a low register since the second instruction
+                //  can then always be generated using a Thumb-1 add
+                //
+                if ((reg >= REG_R8) && (ireg < REG_R8) && (rg2 < REG_R8) && ((disp >> shift) <= 7))
+                {
+                    useForm2 = true;
+                }
+            }
+
+            if (useForm2)
+            {
+                // Form2:
+                //     Thumb-1   instruction    add     Rd, Rx, disp>>shift
+                //     Thumb-2   instructions   ldr     Rd, Rb, Rd LSL shift
+                //
+                emitIns_R_R_I(INS_add, EA_4BYTE, ireg, rg2, disp >> shift);
+                emitIns_R_R_R_I(ins, attr, ireg, reg, ireg, shift, INS_FLAGS_NOT_SET, INS_OPTS_LSL);
+            }
+            else
+            {
+                // Form1:
+                //     Thumb-2   instruction    add     Rd, Rb, Rx LSL shift
+                //     Thumb-1/2 instructions   ldr     Rd, Rd, disp
+                //
+                emitIns_R_R_R_I(INS_add, attr, ireg, reg, rg2, shift, INS_FLAGS_NOT_SET, INS_OPTS_LSL);
+                emitIns_R_R_I(ins, attr, ireg, ireg, disp);
+            }
+            return;
+        }
+    }
+
+    assert(!"emitIns_R_ARX: Unexpected instruction");
+}
+
+/*****************************************************************************
+ *
+ *  Record that a jump instruction uses the short encoding
+ *
+ */
+void emitter::emitSetShortJump(instrDescJmp* id)
+{
+    if (id->idjKeepLong)
+        return;
+
+    if (emitIsCondJump(id))
+    {
+        id->idInsFmt(IF_T1_K);
+    }
+    else if (emitIsCmpJump(id))
+    {
+        // These are always only ever short!
+        assert(id->idjShort);
+        return;
+    }
+    else if (emitIsUncondJump(id))
+    {
+        id->idInsFmt(IF_T1_M);
+    }
+    else if (emitIsLoadLabel(id))
+    {
+        return; // Keep long - we don't know the alignment of the target
+    }
+    else
+    {
+        assert(!"Unknown instruction in emitSetShortJump()");
+    }
+
+    id->idjShort = true;
+
+#if DEBUG_EMIT
+    if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+    {
+        printf("[8] Converting jump %u to short\n", id->idDebugOnlyInfo()->idNum);
+    }
+#endif // DEBUG_EMIT
+
+    insSize isz = emitInsSize(id->idInsFmt());
+    id->idInsSize(isz);
+}
+
+/*****************************************************************************
+ *
+ *  Record that a jump instruction uses the medium encoding
+ *
+ */
+void emitter::emitSetMediumJump(instrDescJmp* id)
+{
+    if (id->idjKeepLong)
+        return;
+
+#if DEBUG_EMIT
+    if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+    {
+        printf("[9] Converting jump %u to medium\n", id->idDebugOnlyInfo()->idNum);
+    }
+#endif // DEBUG_EMIT
+
+    assert(emitIsCondJump(id));
+    id->idInsFmt(IF_T2_J1);
+    id->idjShort = false;
+
+    insSize isz = emitInsSize(id->idInsFmt());
+    id->idInsSize(isz);
+}
+
+/*****************************************************************************
+ *
+ *  Add a jmp instruction.
+ *  When dst is NULL, instrCount specifies number of instructions
+ *       to jump: positive is forward, negative is backward.
+ *  Unconditional branches have two sizes: short and long.
+ *  Conditional branches have three sizes: short, medium, and long. A long
+ *     branch is a pseudo-instruction that represents two instructions:
+ *     a short conditional branch to branch around a large unconditional
+ *     branch. Thus, we can handle branch offsets of imm24 instead of just imm20.
+ */
+
+void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount /* = 0 */)
+{
+    insFormat fmt = IF_NONE;
+
+    if (dst != NULL)
+    {
+        assert(dst->bbFlags & BBF_JMP_TARGET);
+    }
+    else
+    {
+        assert(instrCount != 0);
+    }
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_b:
+            fmt = IF_T2_J2; /* Assume the jump will be long */
+            break;
+
+        case INS_beq:
+        case INS_bne:
+        case INS_bhs:
+        case INS_blo:
+        case INS_bmi:
+        case INS_bpl:
+        case INS_bvs:
+        case INS_bvc:
+        case INS_bhi:
+        case INS_bls:
+        case INS_bge:
+        case INS_blt:
+        case INS_bgt:
+        case INS_ble:
+            fmt = IF_LARGEJMP; /* Assume the jump will be long */
+            break;
+
+        default:
+            unreached();
+    }
+    assert((fmt == IF_LARGEJMP) || (fmt == IF_T2_J2));
+
+    instrDescJmp* id  = emitNewInstrJmp();
+    insSize       isz = emitInsSize(fmt);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsSize(isz);
+
+#ifdef DEBUG
+    // Mark the finally call
+    if (ins == INS_b && emitComp->compCurBB->bbJumpKind == BBJ_CALLFINALLY)
+    {
+        id->idDebugOnlyInfo()->idFinallyCall = true;
+    }
+#endif // DEBUG
+
+    /* Assume the jump will be long */
+
+    id->idjShort = 0;
+    if (dst != NULL)
+    {
+        id->idAddr()->iiaBBlabel = dst;
+        id->idjKeepLong          = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst);
+
+#ifdef DEBUG
+        if (emitComp->opts.compLongAddress) // Force long branches
+            id->idjKeepLong = 1;
+#endif // DEBUG
+    }
+    else
+    {
+        id->idAddr()->iiaSetInstrCount(instrCount);
+        id->idjKeepLong = false;
+        /* This jump must be short */
+        emitSetShortJump(id);
+        id->idSetIsBound();
+    }
+
+    /* Record the jump's IG and offset within it */
+
+    id->idjIG   = emitCurIG;
+    id->idjOffs = emitCurIGsize;
+
+    /* Append this jump to this IG's jump list */
+
+    id->idjNext      = emitCurIGjmpList;
+    emitCurIGjmpList = id;
+
+#if EMITTER_STATS
+    emitTotalIGjmps++;
+#endif
+
+    /* Figure out the max. size of the jump/call instruction */
+
+    if (!id->idjKeepLong)
+    {
+        insGroup* tgt = NULL;
+
+        /* Can we guess at the jump distance? */
+
+        if (dst != NULL)
+        {
+            tgt = (insGroup*)emitCodeGetCookie(dst);
+        }
+
+        if (tgt)
+        {
+            UNATIVE_OFFSET srcOffs;
+            int            jmpDist;
+
+            assert(JMP_SIZE_SMALL == JCC_SIZE_SMALL);
+
+            /* This is a backward jump - figure out the distance */
+
+            srcOffs = emitCurCodeOffset + emitCurIGsize;
+
+            /* Compute the distance estimate */
+
+            jmpDist = srcOffs - tgt->igOffs;
+            assert(jmpDist >= 0);
+            jmpDist += 4; // Adjustment for ARM PC
+
+            switch (fmt)
+            {
+                case IF_T2_J2:
+                    if (JMP_DIST_SMALL_MAX_NEG <= -jmpDist)
+                    {
+                        /* This jump surely will be short */
+                        emitSetShortJump(id);
+                    }
+                    break;
+
+                case IF_LARGEJMP:
+                    if (JCC_DIST_SMALL_MAX_NEG <= -jmpDist)
+                    {
+                        /* This jump surely will be short */
+                        emitSetShortJump(id);
+                    }
+                    else if (JCC_DIST_MEDIUM_MAX_NEG <= -jmpDist)
+                    {
+                        /* This jump surely will be medium */
+                        emitSetMediumJump(id);
+                    }
+                    break;
+
+                default:
+                    unreached();
+                    break;
+            }
+        }
+    }
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add a label instruction.
+ */
+
+void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg)
+{
+    insFormat fmt = IF_NONE;
+
+    assert(dst->bbFlags & BBF_JMP_TARGET);
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_movt:
+        case INS_movw:
+            fmt = IF_T2_N1;
+            break;
+        default:
+            unreached();
+    }
+    assert(fmt == IF_T2_N1);
+
+    instrDescJmp* id  = emitNewInstrJmp();
+    insSize       isz = emitInsSize(fmt);
+
+    id->idIns(ins);
+    id->idReg1(reg);
+    id->idInsFmt(fmt);
+    id->idInsSize(isz);
+
+#ifdef DEBUG
+    // Mark the catch return
+    if (emitComp->compCurBB->bbJumpKind == BBJ_EHCATCHRET)
+    {
+        id->idDebugOnlyInfo()->idCatchRet = true;
+    }
+#endif // DEBUG
+
+    id->idAddr()->iiaBBlabel = dst;
+    id->idjShort             = false;
+    id->idjKeepLong          = true;
+
+    /* Record the jump's IG and offset within it */
+
+    id->idjIG   = emitCurIG;
+    id->idjOffs = emitCurIGsize;
+
+    /* Append this jump to this IG's jump list */
+
+    id->idjNext      = emitCurIGjmpList;
+    emitCurIGjmpList = id;
+
+    // Set the relocation flags - these give hint to zap to perform
+    // relocation of the specified 32bit address.
+    id->idSetRelocFlags(attr);
+
+#if EMITTER_STATS
+    emitTotalIGjmps++;
+#endif
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add a data label instruction.
+ */
+
+void emitter::emitIns_R_D(instruction ins, emitAttr attr, unsigned offs, regNumber reg)
+{
+    noway_assert((ins == INS_movw) || (ins == INS_movt));
+
+    insFormat  fmt = IF_T2_N2;
+    instrDesc* id  = emitNewInstrSC(attr, offs);
+    insSize    isz = emitInsSize(fmt);
+
+    id->idIns(ins);
+    id->idReg1(reg);
+    id->idInsFmt(fmt);
+    id->idInsSize(isz);
+
+#if RELOC_SUPPORT
+    if (emitComp->opts.compReloc)
+    {
+        // Set the relocation flags - these give hint to zap to perform
+        // relocation of the specified 32bit address.
+        id->idSetRelocFlags(attr);
+    }
+#endif // RELOC_SUPPORT
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+void emitter::emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg)
+{
+    assert(dst->bbFlags & BBF_JMP_TARGET);
+
+    instrDescJmp* id;
+    if (ins == INS_adr)
+    {
+        id = emitNewInstrLbl();
+
+        id->idIns(INS_adr);
+        id->idInsFmt(IF_T2_M1);
+        id->idInsSize(emitInsSize(IF_T2_M1));
+        id->idAddr()->iiaBBlabel = dst;
+        id->idReg1(reg);
+        id->idReg2(REG_PC);
+
+        /* Assume the label reference will be long */
+
+        id->idjShort    = 0;
+        id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst);
+    }
+    else
+    {
+        assert(ins == INS_cbz || INS_cbnz);
+        assert(isLowRegister(reg));
+        id = emitNewInstrJmp();
+
+        id->idIns(ins);
+        id->idInsFmt(IF_T1_I);
+        id->idInsSize(emitInsSize(IF_T1_I));
+        id->idReg1(reg);
+
+        /* This jump better be short or-else! */
+        id->idjShort             = true;
+        id->idAddr()->iiaBBlabel = dst;
+        id->idjKeepLong          = false;
+    }
+
+    /* Record the jump's IG and offset within it */
+
+    id->idjIG   = emitCurIG;
+    id->idjOffs = emitCurIGsize;
+
+    /* Append this jump to this IG's jump list */
+
+    id->idjNext      = emitCurIGjmpList;
+    emitCurIGjmpList = id;
+
+#if EMITTER_STATS
+    emitTotalIGjmps++;
+#endif
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add a call instruction (direct or indirect).
+ *      argSize<0 means that the caller will pop the arguments
+ *
+ * The other arguments are interpreted depending on callType as shown:
+ * Unless otherwise specified, ireg,xreg,xmul,disp should have default values.
+ *
+ * EC_FUNC_TOKEN       : addr is the method address
+ * EC_FUNC_ADDR        : addr is the absolute address of the function
+ *
+ * If callType is one of these emitCallTypes, addr has to be NULL.
+ * EC_INDIR_R          : "call ireg".
+ *
+ * For ARM xreg, xmul and disp are never used and should always be 0/REG_NA.
+ *
+ *  Please consult the "debugger team notification" comment in genFnProlog().
+ */
+
+void emitter::emitIns_Call(EmitCallType          callType,
+                           CORINFO_METHOD_HANDLE methHnd,                   // used for pretty printing
+                           INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE
+                           void*            addr,
+                           ssize_t          argSize,
+                           emitAttr         retSize,
+                           VARSET_VALARG_TP ptrVars,
+                           regMaskTP        gcrefRegs,
+                           regMaskTP        byrefRegs,
+                           IL_OFFSETX       ilOffset /* = BAD_IL_OFFSET */,
+                           regNumber        ireg /* = REG_NA */,
+                           regNumber        xreg /* = REG_NA */,
+                           unsigned         xmul /* = 0     */,
+                           int              disp /* = 0     */,
+                           bool             isJump /* = false */,
+                           bool             isNoGC /* = false */,
+                           bool             isProfLeaveCB /* = false */)
+{
+    /* Sanity check the arguments depending on callType */
+
+    assert(callType < EC_COUNT);
+    assert((callType != EC_FUNC_TOKEN && callType != EC_FUNC_ADDR) ||
+           (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp == 0));
+    assert(callType < EC_INDIR_R || addr == NULL);
+    assert(callType != EC_INDIR_R || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0 && disp == 0));
+
+    // ARM never uses these
+    assert(xreg == REG_NA && xmul == 0 && disp == 0);
+
+    // Our stack level should be always greater than the bytes of arguments we push. Just
+    // a sanity test.
+    assert((unsigned)abs(argSize) <= codeGen->genStackLevel);
+
+    int        argCnt;
+    instrDesc* id;
+
+    /* This is the saved set of registers after a normal call */
+    regMaskTP savedSet = RBM_CALLEE_SAVED;
+
+    /* some special helper calls have a different saved set registers */
+
+    if (isNoGC)
+    {
+        assert(emitNoGChelper(Compiler::eeGetHelperNum(methHnd)));
+
+        // This call will preserve the liveness of most registers
+        //
+        // - On the ARM the NOGC helpers will preserve all registers,
+        //   except for those listed in the RBM_CALLEE_TRASH_NOGC mask
+
+        savedSet = RBM_ALLINT & ~RBM_CALLEE_TRASH_NOGC;
+
+        // In case of Leave profiler callback, we need to preserve liveness of REG_PROFILER_RET_SCRATCH
+        if (isProfLeaveCB)
+        {
+            savedSet |= RBM_PROFILER_RET_SCRATCH;
+        }
+    }
+    else
+    {
+        assert(!emitNoGChelper(Compiler::eeGetHelperNum(methHnd)));
+    }
+
+    /* Trim out any callee-trashed registers from the live set */
+
+    gcrefRegs &= savedSet;
+    byrefRegs &= savedSet;
+
+#ifdef DEBUG
+    if (EMIT_GC_VERBOSE)
+    {
+        printf("Call: GCvars=%s ", VarSetOps::ToString(emitComp, ptrVars));
+        dumpConvertedVarSet(emitComp, ptrVars);
+        printf(", gcrefRegs=");
+        printRegMaskInt(gcrefRegs);
+        emitDispRegSet(gcrefRegs);
+        printf(", byrefRegs=");
+        printRegMaskInt(byrefRegs);
+        emitDispRegSet(byrefRegs);
+        printf("\n");
+    }
+#endif
+
+    assert(argSize % (int)sizeof(void*) == 0);
+    argCnt = argSize / (int)sizeof(void*);
+
+#ifdef DEBUGGING_SUPPORT
+    /* Managed RetVal: emit sequence point for the call */
+    if (emitComp->opts.compDbgInfo && ilOffset != BAD_IL_OFFSET)
+    {
+        codeGen->genIPmappingAdd(ilOffset, false);
+    }
+#endif
+
+    /*
+        We need to allocate the appropriate instruction descriptor based
+        on whether this is a direct/indirect call, and whether we need to
+        record an updated set of live GC variables.
+
+        The stats for a ton of classes is as follows:
+
+            Direct call w/o  GC vars        220,216
+            Indir. call w/o  GC vars        144,781
+
+            Direct call with GC vars          9,440
+            Indir. call with GC vars          5,768
+     */
+
+    if (callType >= EC_INDIR_R)
+    {
+        /* Indirect call, virtual calls */
+
+        assert(callType == EC_INDIR_R);
+
+        id = emitNewInstrCallInd(argCnt, disp, ptrVars, gcrefRegs, byrefRegs, retSize);
+    }
+    else
+    {
+        /* Helper/static/nonvirtual/function calls (direct or through handle),
+           and calls to an absolute addr. */
+
+        assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_ADDR);
+
+        id = emitNewInstrCallDir(argCnt, ptrVars, gcrefRegs, byrefRegs, retSize);
+    }
+
+    /* Update the emitter's live GC ref sets */
+
+    VarSetOps::Assign(emitComp, emitThisGCrefVars, ptrVars);
+    emitThisGCrefRegs = gcrefRegs;
+    emitThisByrefRegs = byrefRegs;
+
+    /* Set the instruction - special case jumping a function */
+    instruction ins;
+    insFormat   fmt = IF_NONE;
+
+    id->idSetIsNoGC(isNoGC);
+
+    /* Record the address: method, indirection, or funcptr */
+
+    if (callType > EC_FUNC_ADDR)
+    {
+        /* This is an indirect call (either a virtual call or func ptr call) */
+
+        switch (callType)
+        {
+            case EC_INDIR_R: // the address is in a register
+
+                id->idSetIsCallRegPtr();
+
+                if (isJump)
+                {
+                    ins = INS_bx; // INS_bx  Reg
+                }
+                else
+                {
+                    ins = INS_blx; // INS_blx Reg
+                }
+                fmt = IF_T1_D2;
+
+                id->idIns(ins);
+                id->idInsFmt(fmt);
+                id->idInsSize(emitInsSize(fmt));
+                id->idReg3(ireg);
+                assert(xreg == REG_NA);
+                break;
+
+            default:
+                NO_WAY("unexpected instruction");
+                break;
+        }
+    }
+    else
+    {
+        /* This is a simple direct call: "call helper/method/addr" */
+
+        assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_ADDR);
+
+        assert(addr != NULL);
+        assert(codeGen->validImmForBL((ssize_t)addr));
+
+        if (isJump)
+        {
+            ins = INS_b; // INS_b imm24
+        }
+        else
+        {
+            ins = INS_bl; // INS_bl imm24
+        }
+
+        fmt = IF_T2_J3;
+
+        id->idIns(ins);
+        id->idInsFmt(fmt);
+        id->idInsSize(emitInsSize(fmt));
+
+        id->idAddr()->iiaAddr = (BYTE*)addr;
+
+        if (callType == EC_FUNC_ADDR)
+        {
+            id->idSetIsCallAddr();
+        }
+
+#if RELOC_SUPPORT
+        if (emitComp->opts.compReloc)
+        {
+            // Since this is an indirect call through a pointer and we don't
+            // currently pass in emitAttr into this function we have decided
+            // to always mark the displacement as being relocatable.
+
+            id->idSetIsDspReloc();
+        }
+#endif
+    }
+
+#ifdef DEBUG
+    if (EMIT_GC_VERBOSE)
+    {
+        if (id->idIsLargeCall())
+        {
+            printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum,
+                   VarSetOps::ToString(emitComp, ((instrDescCGCA*)id)->idcGCvars));
+        }
+    }
+#endif
+
+#if defined(DEBUG) || defined(LATE_DISASM)
+    id->idDebugOnlyInfo()->idMemCookie = (size_t)methHnd; // method token
+    id->idDebugOnlyInfo()->idClsCookie = 0;
+    id->idDebugOnlyInfo()->idCallSig   = sigInfo;
+#endif
+
+#if defined(LATE_DISASM)
+    if (addr != nullptr)
+    {
+        codeGen->getDisAssembler().disSetMethod((size_t)addr, methHnd);
+    }
+#endif // defined(LATE_DISASM)
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Returns an encoding for the specified register (any-reg) to be used in
+ *   a Thumb-1 encoding in the M4 position
+ */
+
+inline unsigned insEncodeRegT1_M4(regNumber reg)
+{
+    assert(reg < REG_STK);
+
+    return reg << 3;
+}
+
+/*****************************************************************************
+ *
+ *  Returns an encoding for the specified register (any-reg) to be used in
+ *   a Thumb-1 encoding in the D4 position
+ */
+
+inline unsigned insEncodeRegT1_D4(regNumber reg)
+{
+    assert(reg < REG_STK);
+
+    return (reg & 0x7) | ((reg & 0x8) << 4);
+}
+
+/*****************************************************************************
+ *
+ *  Returns an encoding for the specified register (low-only) to be used in
+ *   a Thumb-1 encoding in the M3 position
+ */
+
+inline unsigned insEncodeRegT1_M3(regNumber reg)
+{
+    assert(reg < REG_R8);
+
+    return reg << 6;
+}
+
+/*****************************************************************************
+ *
+ *  Returns an encoding for the specified register (low-only) to be used in
+ *   a Thumb-1 encoding in the N3 position
+ */
+
+inline unsigned insEncodeRegT1_N3(regNumber reg)
+{
+    assert(reg < REG_R8);
+
+    return reg << 3;
+}
+
+/*****************************************************************************
+ *
+ *  Returns an encoding for the specified register (low-only) to be used in
+ *   a Thumb-1 encoding in the D3 position
+ */
+
+inline unsigned insEncodeRegT1_D3(regNumber reg)
+{
+    assert(reg < REG_R8);
+
+    return reg;
+}
+/*****************************************************************************
+ *
+ *  Returns an encoding for the specified register (low-only) to be used in
+ *   a Thumb-1 encoding in the DI position
+ */
+
+inline unsigned insEncodeRegT1_DI(regNumber reg)
+{
+    assert(reg < REG_R8);
+
+    return reg << 8;
+}
+
+/*****************************************************************************
+ *
+ *  Returns an encoding for the specified register to be used in
+ *   a Thumb-2 encoding in the N position
+ */
+
+inline unsigned insEncodeRegT2_N(regNumber reg)
+{
+    assert(reg < REG_STK);
+
+    return reg << 16;
+}
+
+inline unsigned floatRegIndex(regNumber reg, int size)
+{
+    // theoretically this could support quad floats as well but for now...
+    assert(size == EA_8BYTE || size == EA_4BYTE);
+
+    if (size == EA_8BYTE)
+        assert(emitter::isDoubleReg(reg));
+    else
+        assert(emitter::isFloatReg(reg));
+
+    unsigned result = reg - REG_F0;
+
+    // the assumption here is that the register F8 also refers to D4
+    if (size == EA_8BYTE)
+    {
+        result >>= 1;
+    }
+
+    return result;
+}
+
+// variant: SOME arm VFP instructions use the convention that
+// for doubles, the split bit holds the msb of the register index
+// for singles it holds the lsb
+// excerpt : d = if dp_operation then UInt(D:Vd)
+// if single  UInt(Vd:D);
+
+inline unsigned floatRegEncoding(unsigned index, int size, bool variant = false)
+{
+    if (!variant || size == EA_8BYTE)
+        return index;
+    else
+    {
+        return ((index & 1) << 4) | (index >> 1);
+    }
+}
+
+// thumb2 VFP M register encoding
+inline unsigned insEncodeRegT2_VectorM(regNumber reg, int size, bool variant)
+{
+    unsigned enc = floatRegIndex(reg, size);
+    enc          = floatRegEncoding(enc, size, variant);
+    return ((enc & 0xf) << 0) | ((enc & 0x10) << 1);
+}
+
+// thumb2 VFP N register encoding
+inline unsigned insEncodeRegT2_VectorN(regNumber reg, int size, bool variant)
+{
+    unsigned enc = floatRegIndex(reg, size);
+    enc          = floatRegEncoding(enc, size, variant);
+    return ((enc & 0xf) << 16) | ((enc & 0x10) << 3);
+}
+
+// thumb2 VFP D register encoding
+inline unsigned insEncodeRegT2_VectorD(regNumber reg, int size, bool variant)
+{
+    unsigned enc = floatRegIndex(reg, size);
+    enc          = floatRegEncoding(enc, size, variant);
+    return ((enc & 0xf) << 12) | ((enc & 0x10) << 18);
+}
+
+/*****************************************************************************
+ *
+ *  Returns an encoding for the specified register to be used in
+ *   a Thumb-2 encoding in the T position
+ */
+
+inline unsigned insEncodeRegT2_T(regNumber reg)
+{
+    assert(reg < REG_STK);
+
+    return reg << 12;
+}
+
+/*****************************************************************************
+ *
+ *  Returns an encoding for the specified register to be used in
+ *   a Thumb-2 encoding in the D position
+ */
+
+inline unsigned insEncodeRegT2_D(regNumber reg)
+{
+    assert(reg < REG_STK);
+
+    return reg << 8;
+}
+
+/*****************************************************************************
+ *
+ *  Returns an encoding for the specified register to be used in
+ *   a Thumb-2 encoding in the M position
+ */
+
+inline unsigned insEncodeRegT2_M(regNumber reg)
+{
+    assert(reg < REG_STK);
+
+    return reg;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding for the Set Flags bit to be used in a Thumb-2 encoding
+ */
+
+unsigned emitter::insEncodeSetFlags(insFlags sf)
+{
+    if (sf == INS_FLAGS_SET)
+        return (1 << 20);
+    else
+        return 0;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding for the Shift Type bits to be used in a Thumb-2 encoding
+ */
+
+unsigned emitter::insEncodeShiftOpts(insOpts opt)
+{
+    if (opt == INS_OPTS_NONE)
+        return 0;
+    else if (opt == INS_OPTS_LSL)
+        return 0x00;
+    else if (opt == INS_OPTS_LSR)
+        return 0x10;
+    else if (opt == INS_OPTS_ASR)
+        return 0x20;
+    else if (opt == INS_OPTS_ROR)
+        return 0x30;
+    else if (opt == INS_OPTS_RRX)
+        return 0x30;
+
+    assert(!"Invalid insOpts");
+    return 0;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding for the PUW bits to be used in a T2_G0 Thumb-2 encoding
+ */
+
+unsigned emitter::insEncodePUW_G0(insOpts opt, int imm)
+{
+    unsigned result = 0;
+
+    if (opt != INS_OPTS_LDST_POST_INC)
+        result |= (1 << 24); // The P bit
+
+    if (imm >= 0)
+        result |= (1 << 23); // The U bit
+
+    if (opt != INS_OPTS_NONE)
+        result |= (1 << 21); // The W bits
+    return result;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding for the PUW bits to be used in a T2_H0 Thumb-2 encoding
+ */
+
+unsigned emitter::insEncodePUW_H0(insOpts opt, int imm)
+{
+    unsigned result = 0;
+
+    if (opt != INS_OPTS_LDST_POST_INC)
+        result |= (1 << 10); // The P bit
+
+    if (imm >= 0)
+        result |= (1 << 9); // The U bit
+
+    if (opt != INS_OPTS_NONE)
+        result |= (1 << 8); // The W bits
+
+    return result;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding for the Shift Count bits to be used in a Thumb-2 encoding
+ */
+
+inline unsigned insEncodeShiftCount(int imm)
+{
+    unsigned result;
+
+    assert((imm & 0x001F) == imm);
+    result = (imm & 0x03) << 6;
+    result |= (imm & 0x1C) << 10;
+
+    return result;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding for the immediate use by BFI/BFC Thumb-2 encodings
+ */
+
+inline unsigned insEncodeBitFieldImm(int imm)
+{
+    unsigned result;
+
+    assert((imm & 0x03FF) == imm);
+    result = (imm & 0x001f);
+    result |= (imm & 0x0060) << 1;
+    result |= (imm & 0x0380) << 5;
+
+    return result;
+}
+
+/*****************************************************************************
+ *
+ *  Unscales the immediate based on the operand size in 'size'
+ */
+/*static*/ int emitter::insUnscaleImm(int imm, emitAttr size)
+{
+    switch (size)
+    {
+        case EA_8BYTE:
+        case EA_4BYTE:
+            assert((imm & 0x0003) == 0);
+            imm >>= 2;
+            break;
+
+        case EA_2BYTE:
+            assert((imm & 0x0001) == 0);
+            imm >>= 1;
+            break;
+
+        case EA_1BYTE:
+            // Do nothing
+            break;
+
+        default:
+            assert(!"Invalid value in size");
+            break;
+    }
+    return imm;
+}
+
+/*****************************************************************************
+ *
+ *  Emit a Thumb-1 instruction (a 16-bit integer as code)
+ */
+
+/*static*/ unsigned emitter::emitOutput_Thumb1Instr(BYTE* dst, ssize_t code)
+{
+    unsigned short word1 = code & 0xffff;
+    assert(word1 == code);
+
+#ifdef DEBUG
+    unsigned short top5bits = (word1 & 0xf800) >> 11;
+    assert(top5bits < 29);
+#endif
+
+    MISALIGNED_WR_I2(dst, word1);
+
+    return sizeof(short);
+}
+/*****************************************************************************
+ *
+ *  Emit a Thumb-2 instruction (two 16-bit integers as code)
+ */
+
+/*static*/ unsigned emitter::emitOutput_Thumb2Instr(BYTE* dst, ssize_t code)
+{
+    unsigned short word1 = (code >> 16) & 0xffff;
+    unsigned short word2 = (code)&0xffff;
+    assert(((word1 << 16) | word2) == code);
+
+#ifdef DEBUG
+    unsigned short top5bits = (word1 & 0xf800) >> 11;
+    assert(top5bits >= 29);
+#endif
+
+    MISALIGNED_WR_I2(dst, word1);
+    dst += 2;
+    MISALIGNED_WR_I2(dst, word2);
+
+    return sizeof(short) * 2;
+}
+
+/*****************************************************************************
+ *
+ *  Output a local jump instruction.
+ *  Note that this may be invoked to overwrite an existing jump instruction at 'dst'
+ *  to handle forward branch patching.
+ */
+
+BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i)
+{
+    unsigned srcOffs;
+    unsigned dstOffs;
+    ssize_t  distVal;
+
+    instrDescJmp* id  = (instrDescJmp*)i;
+    instruction   ins = id->idIns();
+    ssize_t       code;
+
+    bool loadLabel = false;
+    bool isJump    = false;
+    bool relAddr   = true; // does the instruction use relative-addressing?
+
+    size_t sdistneg;
+
+    switch (ins)
+    {
+        default:
+            sdistneg = JCC_DIST_SMALL_MAX_NEG;
+            isJump   = true;
+            break;
+
+        case INS_cbz:
+        case INS_cbnz:
+            // One size fits all!
+            sdistneg = 0;
+            isJump   = true;
+            break;
+
+        case INS_adr:
+            sdistneg  = LBL_DIST_SMALL_MAX_NEG;
+            loadLabel = true;
+            break;
+
+        case INS_movw:
+        case INS_movt:
+            sdistneg  = LBL_DIST_SMALL_MAX_NEG;
+            relAddr   = false;
+            loadLabel = true;
+            break;
+    }
+
+    /* Figure out the distance to the target */
+
+    srcOffs = emitCurCodeOffs(dst);
+    if (id->idAddr()->iiaHasInstrCount())
+    {
+        assert(ig != NULL);
+        int      instrCount = id->idAddr()->iiaGetInstrCount();
+        unsigned insNum     = emitFindInsNum(ig, id);
+        if (instrCount < 0)
+        {
+            // Backward branches using instruction count must be within the same instruction group.
+            assert(insNum + 1 >= (unsigned)(-instrCount));
+        }
+        dstOffs = ig->igOffs + emitFindOffset(ig, (insNum + 1 + instrCount));
+    }
+    else
+    {
+        dstOffs = id->idAddr()->iiaIGlabel->igOffs;
+    }
+
+    if (relAddr)
+    {
+        if (ins == INS_adr)
+        {
+            // for adr, the distance is calculated from 4-byte aligned srcOffs.
+            distVal = (ssize_t)((emitOffsetToPtr(dstOffs) - (BYTE*)(((size_t)emitOffsetToPtr(srcOffs)) & ~3)) + 1);
+        }
+        else
+        {
+            distVal = (ssize_t)(emitOffsetToPtr(dstOffs) - emitOffsetToPtr(srcOffs));
+        }
+    }
+    else
+    {
+        assert(ins == INS_movw || ins == INS_movt);
+        distVal = (ssize_t)emitOffsetToPtr(dstOffs) + 1; // Or in thumb bit
+    }
+
+    if (dstOffs <= srcOffs)
+    {
+/* This is a backward jump - distance is known at this point */
+
+#if DEBUG_EMIT
+        if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+        {
+            size_t blkOffs = id->idjIG->igOffs;
+
+            if (INTERESTING_JUMP_NUM == 0)
+                printf("[3] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
+            printf("[3] Jump  block is at %08X - %02X = %08X\n", blkOffs, emitOffsAdj, blkOffs - emitOffsAdj);
+            printf("[3] Jump        is at %08X - %02X = %08X\n", srcOffs, emitOffsAdj, srcOffs - emitOffsAdj);
+            printf("[3] Label block is at %08X - %02X = %08X\n", dstOffs, emitOffsAdj, dstOffs - emitOffsAdj);
+        }
+#endif
+
+        // This format only supports forward branches
+        noway_assert(id->idInsFmt() != IF_T1_I);
+
+        /* Can we use a short jump? */
+
+        if (isJump && ((unsigned)(distVal - 4) >= (unsigned)sdistneg))
+        {
+            emitSetShortJump(id);
+        }
+    }
+    else
+    {
+        /* This is a  forward jump - distance will be an upper limit */
+
+        emitFwdJumps = true;
+
+        /* The target offset will be closer by at least 'emitOffsAdj', but only if this
+           jump doesn't cross the hot-cold boundary. */
+
+        if (!emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
+        {
+            dstOffs -= emitOffsAdj;
+            distVal -= emitOffsAdj;
+        }
+
+        /* Record the location of the jump for later patching */
+
+        id->idjOffs = dstOffs;
+
+        /* Are we overflowing the id->idjOffs bitfield? */
+        if (id->idjOffs != dstOffs)
+            IMPL_LIMITATION("Method is too large");
+
+#if DEBUG_EMIT
+        if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+        {
+            size_t blkOffs = id->idjIG->igOffs;
+
+            if (INTERESTING_JUMP_NUM == 0)
+                printf("[4] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
+            printf("[4] Jump  block is at %08X\n", blkOffs);
+            printf("[4] Jump        is at %08X\n", srcOffs);
+            printf("[4] Label block is at %08X - %02X = %08X\n", dstOffs + emitOffsAdj, emitOffsAdj, dstOffs);
+        }
+#endif
+    }
+
+    /* Adjust the offset to emit relative to the end of the instruction */
+
+    if (relAddr)
+        distVal -= 4;
+
+#ifdef DEBUG
+    if (0 && emitComp->verbose)
+    {
+        size_t sz          = 4; // Thumb-2 pretends all instructions are 4-bytes long for computing jump offsets?
+        int    distValSize = id->idjShort ? 4 : 8;
+        printf("; %s jump [%08X/%03u] from %0*X to %0*X: dist = %08XH\n", (dstOffs <= srcOffs) ? "Fwd" : "Bwd",
+               dspPtr(id), id->idDebugOnlyInfo()->idNum, distValSize, srcOffs + sz, distValSize, dstOffs, distVal);
+    }
+#endif
+
+    insFormat fmt = id->idInsFmt();
+
+    if (isJump)
+    {
+        /* What size jump should we use? */
+
+        if (id->idjShort)
+        {
+            /* Short jump */
+
+            assert(!id->idjKeepLong);
+            assert(emitJumpCrossHotColdBoundary(srcOffs, dstOffs) == false);
+
+            assert(JMP_SIZE_SMALL == JCC_SIZE_SMALL);
+            assert(JMP_SIZE_SMALL == 2);
+
+            /* For forward jumps, record the address of the distance value */
+            id->idjTemp.idjAddr = (distVal > 0) ? dst : NULL;
+
+            dst = emitOutputShortBranch(dst, ins, fmt, distVal, id);
+        }
+        else
+        {
+            /* Long  jump */
+
+            /* For forward jumps, record the address of the distance value */
+            id->idjTemp.idjAddr = (dstOffs > srcOffs) ? dst : NULL;
+
+            if (fmt == IF_LARGEJMP)
+            {
+                // This is a pseudo-instruction format representing a large conditional branch, to allow
+                // us to get a greater branch target range than we can get by using a straightforward conditional
+                // branch. It is encoded as a short conditional branch that branches around a long unconditional
+                // branch.
+                //
+                // Conceptually, we have:
+                //
+                //      b<cond> L_target
+                //
+                // The code we emit is:
+                //
+                //      b<!cond> L_not  // 2 bytes. Note that we reverse the condition.
+                //      b L_target      // 4 bytes
+                //   L_not:
+                //
+                // Note that we don't actually insert any blocks: we simply encode "b <!cond> L_not" as a branch with
+                // the correct offset. Note also that this works for both integer and floating-point conditions, because
+                // the condition inversion takes ordered/unordered into account, preserving NaN behavior. For example,
+                // "GT" (greater than) is inverted to "LE" (less than, equal, or unordered).
+                //
+                // History: previously, we generated:
+                //      it<cond>
+                //      b L_target
+                // but the "it" instruction was deprecated, so we can't use it.
+
+                dst = emitOutputShortBranch(dst,
+                                            emitJumpKindToIns(emitReverseJumpKind(
+                                                emitInsToJumpKind(ins))), // reverse the conditional instruction
+                                            IF_T1_K,
+                                            6 - 4, /* 6 bytes from start of this large conditional pseudo-instruction to
+                                                      L_not. Jumps are encoded as offset from instr address + 4. */
+                                            NULL /* only used for cbz/cbnz */);
+
+                // Now, pretend we've got a normal unconditional branch, and fall through to the code to emit that.
+                ins = INS_b;
+                fmt = IF_T2_J2;
+
+                // The distVal was computed based on the beginning of the pseudo-instruction, which is
+                // the IT. So subtract the size of the IT from the offset, so it is relative to the
+                // unconditional branch.
+                distVal -= 2;
+            }
+
+            code = emitInsCode(ins, fmt);
+
+            if (fmt == IF_T2_J1)
+            {
+                // Can't use this form for jumps between the hot and cold regions
+                assert(!id->idjKeepLong);
+                assert(emitJumpCrossHotColdBoundary(srcOffs, dstOffs) == false);
+
+                assert((distVal & 1) == 0);
+                assert(distVal >= -1048576);
+                assert(distVal <= 1048574);
+
+                if (distVal < 0)
+                    code |= 1 << 26;
+                code |= ((distVal >> 1) & 0x0007ff);
+                code |= (((distVal >> 1) & 0x01f800) << 5);
+                code |= (((distVal >> 1) & 0x020000) >> 4);
+                code |= (((distVal >> 1) & 0x040000) >> 7);
+            }
+            else if (fmt == IF_T2_J2)
+            {
+                assert((distVal & 1) == 0);
+#ifdef RELOC_SUPPORT
+                if (emitComp->opts.compReloc && emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
+                {
+                    // dst isn't an actual final target location, just some intermediate
+                    // location.  Thus we cannot make any guarantees about distVal (not
+                    // even the direction/sign).  Instead we don't encode any offset and
+                    // rely on the relocation to do all the work
+                }
+                else
+#endif
+                {
+                    assert(distVal >= -16777216);
+                    assert(distVal <= 16777214);
+
+                    if (distVal < 0)
+                        code |= 1 << 26;
+                    code |= ((distVal >> 1) & 0x0007ff);
+                    code |= (((distVal >> 1) & 0x1ff800) << 5);
+
+                    bool S  = (distVal < 0);
+                    bool I1 = ((distVal & 0x00800000) == 0);
+                    bool I2 = ((distVal & 0x00400000) == 0);
+
+                    if (S ^ I1)
+                        code |= (1 << 13); // J1 bit
+                    if (S ^ I2)
+                        code |= (1 << 11); // J2 bit
+                }
+            }
+            else
+            {
+                assert(!"Unknown fmt");
+            }
+
+            unsigned instrSize = emitOutput_Thumb2Instr(dst, code);
+
+#ifdef RELOC_SUPPORT
+            if (emitComp->opts.compReloc)
+            {
+                if (emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
+                {
+                    assert(id->idjKeepLong);
+                    if (emitComp->info.compMatchedVM)
+                    {
+                        void* target = emitOffsetToPtr(dstOffs);
+                        emitRecordRelocation((void*)dst, target, IMAGE_REL_BASED_THUMB_BRANCH24);
+                    }
+                }
+            }
+#endif // RELOC_SUPPORT
+
+            dst += instrSize;
+        }
+    }
+    else if (loadLabel)
+    {
+        /* For forward jumps, record the address of the distance value */
+        id->idjTemp.idjAddr = (distVal > 0) ? dst : NULL;
+
+        code = emitInsCode(ins, fmt);
+
+        if (fmt == IF_T1_J3)
+        {
+            assert((dstOffs & 3) == 0); // The target label must be 4-byte aligned
+            assert(distVal >= 0);
+            assert(distVal <= 1022);
+            code |= ((distVal >> 2) & 0xff);
+
+            dst += emitOutput_Thumb1Instr(dst, code);
+        }
+        else if (fmt == IF_T2_M1)
+        {
+            assert(distVal >= -4095);
+            assert(distVal <= +4095);
+            if (distVal < 0)
+            {
+                code |= 0x00A0 << 16;
+                distVal = -distVal;
+            }
+            assert((distVal & 0x0fff) == distVal);
+            code |= (distVal & 0x00ff);
+            code |= ((distVal & 0x0700) << 4);
+
+            code |= ((distVal & 0x0800) << 15);
+            code |= id->idReg1() << 8;
+
+            dst += emitOutput_Thumb2Instr(dst, code);
+        }
+        else if (fmt == IF_T2_N1)
+        {
+            code |= insEncodeRegT2_D(id->idReg1());
+            unsigned imm = distVal;
+            if (ins == INS_movw)
+            {
+                imm &= 0xffff;
+            }
+            else
+            {
+                imm = (imm >> 16) & 0xffff;
+            }
+            ((instrDescJmp*)id)->idjTemp.idjAddr = (dstOffs > srcOffs) ? dst : NULL;
+
+            assert((imm & 0x0000ffff) == imm);
+            code |= (imm & 0x00ff);
+            code |= ((imm & 0x0700) << 4);
+            code |= ((imm & 0x0800) << 15);
+            code |= ((imm & 0xf000) << 4);
+            dst += emitOutput_Thumb2Instr(dst, code);
+
+            if (id->idIsCnsReloc() || id->idIsDspReloc())
+            {
+                assert(ins == INS_movt || ins == INS_movw);
+                if ((ins == INS_movt) && emitComp->info.compMatchedVM)
+                    emitRecordRelocation((void*)(dst - 8), (void*)distVal, IMAGE_REL_BASED_THUMB_MOV32);
+            }
+        }
+        else
+        {
+            assert(!"Unknown fmt");
+        }
+    }
+
+    return dst;
+}
+
+/*****************************************************************************
+ *
+ *  Output a short branch instruction.
+ */
+
+BYTE* emitter::emitOutputShortBranch(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, instrDescJmp* id)
+{
+    size_t code;
+
+    code = emitInsCode(ins, fmt);
+
+    if (fmt == IF_T1_K)
+    {
+        assert((distVal & 1) == 0);
+        assert(distVal >= -256);
+        assert(distVal <= 254);
+
+        if (distVal < 0)
+            code |= 1 << 7;
+        code |= ((distVal >> 1) & 0x7f);
+    }
+    else if (fmt == IF_T1_M)
+    {
+        assert((distVal & 1) == 0);
+        assert(distVal >= -2048);
+        assert(distVal <= 2046);
+
+        if (distVal < 0)
+            code |= 1 << 10;
+        code |= ((distVal >> 1) & 0x3ff);
+    }
+    else if (fmt == IF_T1_I)
+    {
+        assert(id != NULL);
+        assert(ins == INS_cbz || INS_cbnz);
+        assert((distVal & 1) == 0);
+        assert(distVal >= 0);
+        assert(distVal <= 126);
+
+        code |= ((distVal << 3) & 0x0200);
+        code |= ((distVal << 2) & 0x00F8);
+        code |= (id->idReg1() & 0x0007);
+    }
+    else
+    {
+        assert(!"Unknown fmt");
+    }
+
+    dst += emitOutput_Thumb1Instr(dst, code);
+
+    return dst;
+}
+
+#ifdef FEATURE_ITINSTRUCTION
+
+/*****************************************************************************
+ * The "IT" instruction is deprecated (with a very few exceptions). Don't generate it!
+ * Don't delete this code, though, in case we ever want to bring it back.
+ *****************************************************************************/
+
+/*****************************************************************************
+ *
+ *  Output an IT instruction.
+ */
+
+BYTE* emitter::emitOutputIT(BYTE* dst, instruction ins, insFormat fmt, ssize_t condcode)
+{
+    ssize_t imm0;
+    size_t  code, mask, bit;
+
+    code = emitInsCode(ins, fmt);
+    code |= (condcode << 4);        // encode firstcond
+    imm0 = condcode & 1;            // this is firstcond[0]
+    mask = code & 0x0f;             // initialize mask encoded in opcode
+    bit  = 0x08;                    // where in mask we are encoding
+    while ((mask & (bit - 1)) != 0) // are the remaining bits all zeros?
+    {                               //  then we are done
+        // otherwise determine the setting of bit
+        if ((imm0 == 1) ^ ((bit & mask) != 0))
+        {
+            code |= bit; // set the current bit
+        }
+        else
+        {
+            code &= ~bit; // clear the current bit
+        }
+        bit >>= 1;
+    }
+    dst += emitOutput_Thumb1Instr(dst, code);
+
+    return dst;
+}
+
+#endif // FEATURE_ITINSTRUCTION
+
+/*****************************************************************************
+ *
+ *  Output a 32-bit nop instruction.
+ */
+
+BYTE* emitter::emitOutputNOP(BYTE* dst, instruction ins, insFormat fmt)
+{
+    size_t code = emitInsCode(ins, fmt);
+
+    dst += emitOutput_Thumb2Instr(dst, code);
+
+    return dst;
+}
+
+/*****************************************************************************
+*
+ *  Append the machine code corresponding to the given instruction descriptor
+ *  to the code block at '*dp'; the base of the code block is 'bp', and 'ig'
+ *  is the instruction group that contains the instruction. Updates '*dp' to
+ *  point past the generated code, and returns the size of the instruction
+ *  descriptor in bytes.
+ */
+
+size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
+{
+    BYTE*         dst           = *dp;
+    BYTE*         odst          = dst;
+    size_t        code          = 0;
+    size_t        sz            = 0;
+    instruction   ins           = id->idIns();
+    insFormat     fmt           = id->idInsFmt();
+    emitAttr      size          = id->idOpSize();
+    unsigned char callInstrSize = 0;
+    ssize_t       condcode;
+
+#ifdef DEBUG
+    bool dspOffs = emitComp->opts.dspGCtbls || !emitComp->opts.disDiffable;
+#endif // DEBUG
+
+    assert(REG_NA == (int)REG_NA);
+
+    VARSET_TP VARSET_INIT_NOCOPY(GCvars, VarSetOps::UninitVal());
+
+    /* What instruction format have we got? */
+
+    switch (fmt)
+    {
+        int       imm;
+        int       imm0;
+        int       mask;
+        int       bit;
+        BYTE*     addr;
+        regMaskTP gcrefRegs;
+        regMaskTP byrefRegs;
+
+        case IF_T1_A: // T1_A    ................
+            sz   = SMALL_IDSC_SIZE;
+            code = emitInsCode(ins, fmt);
+            dst += emitOutput_Thumb1Instr(dst, code);
+            break;
+
+#ifdef FEATURE_ITINSTRUCTION
+        case IF_T1_B: // T1_B    ........cccc....                                           cond
+            assert(id->idGCref() == GCT_NONE);
+            condcode = emitGetInsSC(id);
+            dst      = emitOutputIT(dst, ins, fmt, condcode);
+            sz       = SMALL_IDSC_SIZE;
+            break;
+#endif // FEATURE_ITINSTRUCTION
+
+        case IF_T1_C: // T1_C    .....iiiiinnnddd                       R1  R2              imm5
+            sz   = SMALL_IDSC_SIZE;
+            imm  = emitGetInsSC(id);
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeRegT1_D3(id->idReg1());
+            code |= insEncodeRegT1_N3(id->idReg2());
+            if (emitInsIsLoadOrStore(ins))
+            {
+                imm = insUnscaleImm(imm, size);
+            }
+            assert((imm & 0x001f) == imm);
+            code |= (imm << 6);
+            dst += emitOutput_Thumb1Instr(dst, code);
+            break;
+
+        case IF_T1_D0: // T1_D0   ........Dmmmmddd                       R1* R2*
+            sz   = SMALL_IDSC_SIZE;
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeRegT1_D4(id->idReg1());
+            code |= insEncodeRegT1_M4(id->idReg2());
+            dst += emitOutput_Thumb1Instr(dst, code);
+            break;
+
+        case IF_T1_E: // T1_E    ..........nnnddd                       R1  R2
+            sz   = SMALL_IDSC_SIZE;
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeRegT1_D3(id->idReg1());
+            code |= insEncodeRegT1_N3(id->idReg2());
+            dst += emitOutput_Thumb1Instr(dst, code);
+            break;
+
+        case IF_T1_F: // T1_F    .........iiiiiii                       SP                  imm7
+            sz   = emitGetInstrDescSize(id);
+            imm  = emitGetInsSC(id);
+            code = emitInsCode(ins, fmt);
+            imm  = insUnscaleImm(imm, size);
+            assert((imm & 0x007F) == imm);
+            code |= imm;
+            dst += emitOutput_Thumb1Instr(dst, code);
+            break;
+
+        case IF_T1_G: // T1_G    .......iiinnnddd                       R1  R2              imm3
+            sz   = SMALL_IDSC_SIZE;
+            imm  = emitGetInsSC(id);
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeRegT1_D3(id->idReg1());
+            code |= insEncodeRegT1_N3(id->idReg2());
+            assert((imm & 0x0007) == imm);
+            code |= (imm << 6);
+            dst += emitOutput_Thumb1Instr(dst, code);
+            break;
+
+        case IF_T1_H: // T1_H    .......mmmnnnddd                       R1  R2  R3
+            sz   = emitGetInstrDescSize(id);
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeRegT1_D3(id->idReg1());
+            code |= insEncodeRegT1_N3(id->idReg2());
+            code |= insEncodeRegT1_M3(id->idReg3());
+            dst += emitOutput_Thumb1Instr(dst, code);
+            break;
+
+        case IF_T1_I: // T1_I    ......i.iiiiiddd                       R1                  imm6
+            assert(id->idIsBound());
+
+            dst = emitOutputLJ(ig, dst, id);
+            sz  = sizeof(instrDescJmp);
+            break;
+
+        case IF_T1_J0: // T1_J0   .....dddiiiiiiii                       R1                  imm8
+        case IF_T1_J1: // T1_J1   .....dddiiiiiiii                       R1                  <regmask8>
+        case IF_T1_J2: // T1_J2   .....dddiiiiiiii                       R1  SP              imm8
+            sz   = emitGetInstrDescSize(id);
+            imm  = emitGetInsSC(id);
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeRegT1_DI(id->idReg1());
+            if (fmt == IF_T1_J2)
+            {
+                imm = insUnscaleImm(imm, size);
+            }
+            assert((imm & 0x00ff) == imm);
+            code |= imm;
+            dst += emitOutput_Thumb1Instr(dst, code);
+            break;
+
+        case IF_T1_L0: // T1_L0   ........iiiiiiii                                           imm8
+        case IF_T1_L1: // T1_L1   .......Rrrrrrrrr                                           <regmask8>
+            sz   = emitGetInstrDescSize(id);
+            imm  = emitGetInsSC(id);
+            code = emitInsCode(ins, fmt);
+            if (fmt == IF_T1_L1)
+            {
+                assert((imm & 0x3) != 0x3);
+                if (imm & 0x3)
+                    code |= 0x0100; //  R bit
+                imm >>= 2;
+            }
+            assert((imm & 0x00ff) == imm);
+            code |= imm;
+            dst += emitOutput_Thumb1Instr(dst, code);
+            break;
+
+        case IF_T2_A: // T2_A    ................ ................
+            sz   = SMALL_IDSC_SIZE;
+            code = emitInsCode(ins, fmt);
+            dst += emitOutput_Thumb2Instr(dst, code);
+            break;
+
+        case IF_T2_B: // T2_B    ................ ............iiii                          imm4
+            sz   = SMALL_IDSC_SIZE;
+            imm  = emitGetInsSC(id);
+            code = emitInsCode(ins, fmt);
+            assert((imm & 0x000F) == imm);
+            code |= imm;
+            dst += emitOutput_Thumb2Instr(dst, code);
+            break;
+
+        case IF_T2_C0: // T2_C0   ...........Snnnn .iiiddddiishmmmm       R1  R2  R3      S, imm5, sh
+        case IF_T2_C4: // T2_C4   ...........Snnnn ....dddd....mmmm       R1  R2  R3      S
+        case IF_T2_C5: // T2_C5   ............nnnn ....dddd....mmmm       R1  R2  R3
+            sz   = emitGetInstrDescSize(id);
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeRegT2_D(id->idReg1());
+            code |= insEncodeRegT2_N(id->idReg2());
+            code |= insEncodeRegT2_M(id->idReg3());
+            if (fmt != IF_T2_C5)
+                code |= insEncodeSetFlags(id->idInsFlags());
+            if (fmt == IF_T2_C0)
+            {
+                imm = emitGetInsSC(id);
+                code |= insEncodeShiftCount(imm);
+                code |= insEncodeShiftOpts(id->idInsOpt());
+            }
+            dst += emitOutput_Thumb2Instr(dst, code);
+            break;
+
+        case IF_T2_C1: // T2_C1   ...........S.... .iiiddddiishmmmm       R1  R2          S, imm5, sh
+        case IF_T2_C2: // T2_C2   ...........S.... .iiiddddii..mmmm       R1  R2          S, imm5
+        case IF_T2_C6: // T2_C6   ................ ....dddd..iimmmm       R1  R2                   imm2
+            sz   = SMALL_IDSC_SIZE;
+            imm  = emitGetInsSC(id);
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeRegT2_D(id->idReg1());
+            code |= insEncodeRegT2_M(id->idReg2());
+            if (fmt == IF_T2_C6)
+            {
+                assert((imm & 0x0018) == imm);
+                code |= (imm << 1);
+            }
+            else
+            {
+                code |= insEncodeSetFlags(id->idInsFlags());
+                code |= insEncodeShiftCount(imm);
+                if (fmt == IF_T2_C1)
+                    code |= insEncodeShiftOpts(id->idInsOpt());
+            }
+            dst += emitOutput_Thumb2Instr(dst, code);
+            break;
+
+        case IF_T2_C3: // T2_C3   ...........S.... ....dddd....mmmm       R1  R2          S
+            sz   = SMALL_IDSC_SIZE;
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeRegT2_D(id->idReg1());
+            code |= insEncodeRegT2_M(id->idReg2());
+            code |= insEncodeSetFlags(id->idInsFlags());
+            dst += emitOutput_Thumb2Instr(dst, code);
+            break;
+
+        case IF_T2_C7: // T2_C7   ............nnnn ..........shmmmm       R1  R2                   imm2
+        case IF_T2_C8: // T2_C8   ............nnnn .iii....iishmmmm       R1  R2             imm5, sh
+            sz   = SMALL_IDSC_SIZE;
+            imm  = emitGetInsSC(id);
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeRegT2_N(id->idReg1());
+            code |= insEncodeRegT2_M(id->idReg2());
+            if (fmt == IF_T2_C7)
+            {
+                assert((imm & 0x0003) == imm);
+                code |= (imm << 4);
+            }
+            else if (fmt == IF_T2_C8)
+            {
+                code |= insEncodeShiftCount(imm);
+                code |= insEncodeShiftOpts(id->idInsOpt());
+            }
+            dst += emitOutput_Thumb2Instr(dst, code);
+            break;
+
+        case IF_T2_C9: // T2_C9   ............nnnn ............mmmm       R1  R2
+            sz   = SMALL_IDSC_SIZE;
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeRegT2_N(id->idReg1());
+            code |= insEncodeRegT2_M(id->idReg2());
+            dst += emitOutput_Thumb2Instr(dst, code);
+            break;
+
+        case IF_T2_C10: // T2_C10  ............mmmm ....dddd....mmmm       R1  R2
+            sz   = SMALL_IDSC_SIZE;
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeRegT2_D(id->idReg1());
+            code |= insEncodeRegT2_M(id->idReg2());
+            code |= insEncodeRegT2_N(id->idReg2());
+            dst += emitOutput_Thumb2Instr(dst, code);
+            break;
+
+        case IF_T2_D0: // T2_D0   ............nnnn .iiiddddii.wwwww       R1  R2             imm5, imm5
+        case IF_T2_D1: // T2_D1   ................ .iiiddddii.wwwww       R1                 imm5, imm5
+            sz   = SMALL_IDSC_SIZE;
+            imm  = emitGetInsSC(id);
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeRegT2_D(id->idReg1());
+            if (fmt == IF_T2_D0)
+                code |= insEncodeRegT2_N(id->idReg2());
+            code |= insEncodeBitFieldImm(imm);
+            dst += emitOutput_Thumb2Instr(dst, code);
+            break;
+
+        case IF_T2_E0: // T2_E0   ............nnnn tttt......shmmmm       R1  R2  R3               imm2
+        case IF_T2_E1: // T2_E1   ............nnnn tttt............       R1  R2
+        case IF_T2_E2: // T2_E2   ................ tttt............       R1
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeRegT2_T(id->idReg1());
+            if (fmt == IF_T2_E0)
+            {
+                sz = emitGetInstrDescSize(id);
+                code |= insEncodeRegT2_N(id->idReg2());
+                if (id->idIsLclVar())
+                {
+                    code |= insEncodeRegT2_M(codeGen->rsGetRsvdReg());
+                    imm = 0;
+                }
+                else
+                {
+                    code |= insEncodeRegT2_M(id->idReg3());
+                    imm = emitGetInsSC(id);
+                    assert((imm & 0x0003) == imm);
+                    code |= (imm << 4);
+                }
+            }
+            else
+            {
+                sz = SMALL_IDSC_SIZE;
+                if (fmt != IF_T2_E2)
+                {
+                    code |= insEncodeRegT2_N(id->idReg2());
+                }
+            }
+            dst += emitOutput_Thumb2Instr(dst, code);
+            break;
+
+        case IF_T2_F1: // T2_F1    ............nnnn ttttdddd....mmmm       R1  R2  R3  R4
+            sz = emitGetInstrDescSize(id);
+            ;
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeRegT2_T(id->idReg1());
+            code |= insEncodeRegT2_D(id->idReg2());
+            code |= insEncodeRegT2_N(id->idReg3());
+            code |= insEncodeRegT2_M(id->idReg4());
+            dst += emitOutput_Thumb2Instr(dst, code);
+            break;
+
+        case IF_T2_F2: // T2_F2    ............nnnn aaaadddd....mmmm       R1  R2  R3  R4
+            sz   = emitGetInstrDescSize(id);
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeRegT2_D(id->idReg1());
+            code |= insEncodeRegT2_N(id->idReg2());
+            code |= insEncodeRegT2_M(id->idReg3());
+            code |= insEncodeRegT2_T(id->idReg4());
+            dst += emitOutput_Thumb2Instr(dst, code);
+            break;
+
+        case IF_T2_G0: // T2_G0   .......PU.W.nnnn ttttTTTTiiiiiiii       R1  R2  R3         imm8, PUW
+        case IF_T2_G1: // T2_G1   ............nnnn ttttTTTT........       R1  R2  R3
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeRegT2_T(id->idReg1());
+            code |= insEncodeRegT2_D(id->idReg2());
+            code |= insEncodeRegT2_N(id->idReg3());
+            if (fmt == IF_T2_G0)
+            {
+                sz  = emitGetInstrDescSizeSC(id);
+                imm = emitGetInsSC(id);
+                assert(unsigned_abs(imm) <= 0x00ff);
+                code |= abs(imm);
+                code |= insEncodePUW_G0(id->idInsOpt(), imm);
+            }
+            else
+            {
+                sz = emitGetInstrDescSize(id);
+            }
+            dst += emitOutput_Thumb2Instr(dst, code);
+            break;
+
+        case IF_T2_H0: // T2_H0   ............nnnn tttt.PUWiiiiiiii       R1  R2             imm8, PUW
+        case IF_T2_H1: // T2_H1   ............nnnn tttt....iiiiiiii       R1  R2             imm8
+        case IF_T2_H2: // T2_H2   ............nnnn ........iiiiiiii       R1                 imm8
+            sz   = emitGetInstrDescSizeSC(id);
+            imm  = emitGetInsSC(id);
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeRegT2_T(id->idReg1());
+
+            if (fmt != IF_T2_H2)
+                code |= insEncodeRegT2_N(id->idReg2());
+
+            if (fmt == IF_T2_H0)
+            {
+                assert(unsigned_abs(imm) <= 0x00ff);
+                code |= insEncodePUW_H0(id->idInsOpt(), imm);
+                code |= unsigned_abs(imm);
+            }
+            else
+            {
+                assert((imm & 0x00ff) == imm);
+                code |= imm;
+            }
+            dst += emitOutput_Thumb2Instr(dst, code);
+            break;
+
+        case IF_T2_I0: // T2_I0   ..........W.nnnn rrrrrrrrrrrrrrrr       R1              W, imm16
+        case IF_T2_I1: // T2_I1   ................ rrrrrrrrrrrrrrrr                          imm16
+            sz   = emitGetInstrDescSizeSC(id);
+            code = emitInsCode(ins, fmt);
+            if (fmt == IF_T2_I0)
+            {
+                code |= insEncodeRegT2_N(id->idReg1());
+                code |= (1 << 21); //  W bit
+            }
+            imm = emitGetInsSC(id);
+            assert((imm & 0x3) != 0x3);
+            if (imm & 0x2)
+                code |= 0x8000; //  PC bit
+            if (imm & 0x1)
+                code |= 0x4000; //  LR bit
+            imm >>= 2;
+            assert(imm <= 0x1fff); //  13 bits
+            code |= imm;
+            dst += emitOutput_Thumb2Instr(dst, code);
+            break;
+
+        case IF_T2_K1: // T2_K1   ............nnnn ttttiiiiiiiiiiii       R1  R2             imm12
+        case IF_T2_K4: // T2_K4   ........U....... ttttiiiiiiiiiiii       R1  PC          U, imm12
+        case IF_T2_K3: // T2_K3   ........U....... ....iiiiiiiiiiii       PC              U, imm12
+            sz   = emitGetInstrDescSize(id);
+            imm  = emitGetInsSC(id);
+            code = emitInsCode(ins, fmt);
+            if (fmt != IF_T2_K3)
+            {
+                code |= insEncodeRegT2_T(id->idReg1());
+            }
+            if (fmt == IF_T2_K1)
+            {
+                code |= insEncodeRegT2_N(id->idReg2());
+                assert(imm <= 0xfff); //  12 bits
+                code |= imm;
+            }
+            else
+            {
+                assert(unsigned_abs(imm) <= 0xfff); //  12 bits (signed)
+                code |= abs(imm);
+                if (imm >= 0)
+                    code |= (1 << 23); //  U bit
+            }
+            dst += emitOutput_Thumb2Instr(dst, code);
+            break;
+
+        case IF_T2_K2: // T2_K2   ............nnnn ....iiiiiiiiiiii       R1                 imm12
+            sz   = emitGetInstrDescSizeSC(id);
+            imm  = emitGetInsSC(id);
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeRegT2_N(id->idReg1());
+            assert(imm <= 0xfff); //  12 bits
+            code |= imm;
+            dst += emitOutput_Thumb2Instr(dst, code);
+            break;
+
+        case IF_T2_L0: // T2_L0   .....i.....Snnnn .iiiddddiiiiiiii       R1  R2          S, imm8<<imm4
+        case IF_T2_L1: // T2_L1   .....i.....S.... .iiiddddiiiiiiii       R1              S, imm8<<imm4
+        case IF_T2_L2: // T2_L2   .....i......nnnn .iii....iiiiiiii       R1                 imm8<<imm4
+            sz   = emitGetInstrDescSize(id);
+            imm  = emitGetInsSC(id);
+            code = emitInsCode(ins, fmt);
+
+            if (fmt == IF_T2_L2)
+                code |= insEncodeRegT2_N(id->idReg1());
+            else
+            {
+                code |= insEncodeSetFlags(id->idInsFlags());
+                code |= insEncodeRegT2_D(id->idReg1());
+                if (fmt == IF_T2_L0)
+                    code |= insEncodeRegT2_N(id->idReg2());
+            }
+            assert(isModImmConst(imm)); // Funky ARM imm encoding
+            imm = encodeModImmConst(imm);
+            assert(imm <= 0xfff); //  12 bits
+            code |= (imm & 0x00ff);
+            code |= (imm & 0x0700) << 4;
+            code |= (imm & 0x0800) << 15;
+            dst += emitOutput_Thumb2Instr(dst, code);
+            break;
+
+        case IF_T2_M0: // T2_M0   .....i......nnnn .iiiddddiiiiiiii       R1  R2             imm12
+            sz   = emitGetInstrDescSizeSC(id);
+            imm  = emitGetInsSC(id);
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeRegT2_D(id->idReg1());
+            if (fmt == IF_T2_M0)
+                code |= insEncodeRegT2_N(id->idReg2());
+            imm = emitGetInsSC(id);
+            assert(imm <= 0xfff); //  12 bits
+            code |= (imm & 0x00ff);
+            code |= (imm & 0x0700) << 4;
+            code |= (imm & 0x0800) << 15;
+            dst += emitOutput_Thumb2Instr(dst, code);
+            break;
+
+        case IF_T2_N:  // T2_N    .....i......iiii .iiiddddiiiiiiii       R1                 imm16
+        case IF_T2_N2: // T2_N2   .....i......iiii .iiiddddiiiiiiii       R1                 imm16
+            sz   = emitGetInstrDescSizeSC(id);
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeRegT2_D(id->idReg1());
+            imm = emitGetInsSC(id);
+            if (fmt == IF_T2_N2)
+            {
+                assert(!id->idIsLclVar());
+                assert((ins == INS_movw) || (ins == INS_movt));
+                imm += (size_t)emitConsBlock;
+#ifdef RELOC_SUPPORT
+                if (!id->idIsCnsReloc() && !id->idIsDspReloc())
+#endif
+                {
+                    goto SPLIT_IMM;
+                }
+            }
+            else if (id->idIsLclVar())
+            {
+            SPLIT_IMM:
+                if (ins == INS_movw)
+                {
+                    imm &= 0xffff;
+                }
+                else
+                {
+                    imm = (imm >> 16) & 0xffff;
+                }
+            }
+
+#ifdef RELOC_SUPPORT
+            if (id->idIsCnsReloc() || id->idIsDspReloc())
+            {
+                assert((ins == INS_movt) || (ins == INS_movw));
+                dst += emitOutput_Thumb2Instr(dst, code);
+                if ((ins == INS_movt) && emitComp->info.compMatchedVM)
+                    emitRecordRelocation((void*)(dst - 8), (void*)imm, IMAGE_REL_BASED_THUMB_MOV32);
+            }
+            else
+#endif // RELOC_SUPPORT
+            {
+                assert((imm & 0x0000ffff) == imm);
+                code |= (imm & 0x00ff);
+                code |= ((imm & 0x0700) << 4);
+                code |= ((imm & 0x0800) << 15);
+                code |= ((imm & 0xf000) << 4);
+                dst += emitOutput_Thumb2Instr(dst, code);
+            }
+            break;
+
+        case IF_T2_VFP3:
+            // these are the binary operators
+            // d = n - m
+            sz   = emitGetInstrDescSize(id);
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeRegT2_VectorN(id->idReg2(), size, true);
+            code |= insEncodeRegT2_VectorM(id->idReg3(), size, true);
+            code |= insEncodeRegT2_VectorD(id->idReg1(), size, true);
+            if (size == EA_8BYTE)
+                code |= 1 << 8;
+            dst += emitOutput_Thumb2Instr(dst, code);
+            break;
+
+        case IF_T2_VFP2:
+        {
+            emitAttr srcSize;
+            emitAttr dstSize;
+            size_t   szCode = 0;
+
+            switch (ins)
+            {
+                case INS_vcvt_i2d:
+                case INS_vcvt_u2d:
+                case INS_vcvt_f2d:
+                    srcSize = EA_4BYTE;
+                    dstSize = EA_8BYTE;
+                    break;
+
+                case INS_vcvt_d2i:
+                case INS_vcvt_d2u:
+                case INS_vcvt_d2f:
+                    srcSize = EA_8BYTE;
+                    dstSize = EA_4BYTE;
+                    break;
+
+                case INS_vmov:
+                case INS_vabs:
+                case INS_vsqrt:
+                case INS_vcmp:
+                case INS_vneg:
+                    if (id->idOpSize() == EA_8BYTE)
+                        szCode |= (1 << 8);
+                    __fallthrough;
+
+                default:
+                    srcSize = dstSize = id->idOpSize();
+                    break;
+            }
+
+            sz   = emitGetInstrDescSize(id);
+            code = emitInsCode(ins, fmt);
+            code |= szCode;
+            code |= insEncodeRegT2_VectorD(id->idReg1(), dstSize, true);
+            code |= insEncodeRegT2_VectorM(id->idReg2(), srcSize, true);
+
+            dst += emitOutput_Thumb2Instr(dst, code);
+            break;
+        }
+
+        case IF_T2_VLDST:
+            sz   = emitGetInstrDescSizeSC(id);
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeRegT2_N(id->idReg2());
+            code |= insEncodeRegT2_VectorD(id->idReg1(), size, true);
+
+            imm = emitGetInsSC(id);
+            if (imm < 0)
+                imm = -imm; // bit 23 at 0 means negate
+            else
+                code |= 1 << 23; // set the positive bit
+
+            // offset is +/- 1020
+            assert(!(imm % 4));
+            assert(imm >> 10 == 0);
+            code |= imm >> 2;
+            // bit 8 is set for doubles
+            if (id->idOpSize() == EA_8BYTE)
+                code |= (1 << 8);
+            dst += emitOutput_Thumb2Instr(dst, code);
+            break;
+
+        case IF_T2_VMOVD:
+            // 3op assemble a double from two int regs (or back)
+            sz   = emitGetInstrDescSize(id);
+            code = emitInsCode(ins, fmt);
+            if (ins == INS_vmov_i2d)
+            {
+                code |= insEncodeRegT2_VectorM(id->idReg1(), size, true);
+                code |= id->idReg2() << 12;
+                code |= id->idReg3() << 16;
+            }
+            else
+            {
+                assert(ins == INS_vmov_d2i);
+                code |= id->idReg1() << 12;
+                code |= id->idReg2() << 16;
+                code |= insEncodeRegT2_VectorM(id->idReg3(), size, true);
+            }
+            dst += emitOutput_Thumb2Instr(dst, code);
+            break;
+
+        case IF_T2_VMOVS:
+            // 2op assemble a float from one int reg (or back)
+            sz   = emitGetInstrDescSize(id);
+            code = emitInsCode(ins, fmt);
+            if (ins == INS_vmov_f2i)
+            {
+                code |= insEncodeRegT2_VectorN(id->idReg2(), EA_4BYTE, true);
+                code |= id->idReg1() << 12;
+            }
+            else
+            {
+                assert(ins == INS_vmov_i2f);
+                code |= insEncodeRegT2_VectorN(id->idReg1(), EA_4BYTE, true);
+                code |= id->idReg2() << 12;
+            }
+
+            dst += emitOutput_Thumb2Instr(dst, code);
+            break;
+
+        case IF_T1_J3: // T1_J3   .....dddiiiiiiii                        R1  PC             imm8
+        case IF_T2_M1: // T2_M1   .....i.......... .iiiddddiiiiiiii       R1  PC             imm12
+            assert(id->idGCref() == GCT_NONE);
+            assert(id->idIsBound());
+
+            dst = emitOutputLJ(ig, dst, id);
+            sz  = sizeof(instrDescLbl);
+            break;
+
+        case IF_T1_K:  // T1_K    ....cccciiiiiiii                       Branch              imm8, cond4
+        case IF_T1_M:  // T1_M    .....iiiiiiiiiii                       Branch              imm11
+        case IF_T2_J1: // T2_J1   .....Scccciiiiii ..j.jiiiiiiiiiii      Branch              imm20, cond4
+        case IF_T2_J2: // T2_J2   .....Siiiiiiiiii ..j.jiiiiiiiiii.      Branch              imm24
+        case IF_T2_N1: // T2_N    .....i......iiii .iiiddddiiiiiiii       R1                 imm16
+        case IF_LARGEJMP:
+            assert(id->idGCref() == GCT_NONE);
+            assert(id->idIsBound());
+
+            dst = emitOutputLJ(ig, dst, id);
+            sz  = sizeof(instrDescJmp);
+            break;
+
+        case IF_T1_D1: // T1_D1   .........mmmm...                       R1*
+
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeRegT1_M4(id->idReg1());
+            dst += emitOutput_Thumb1Instr(dst, code);
+            sz = SMALL_IDSC_SIZE;
+            break;
+
+        case IF_T1_D2: // T1_D2   .........mmmm...                                R3*
+
+            /* Is this a "fat" call descriptor? */
+
+            if (id->idIsLargeCall())
+            {
+                instrDescCGCA* idCall = (instrDescCGCA*)id;
+                gcrefRegs             = idCall->idcGcrefRegs;
+                byrefRegs             = idCall->idcByrefRegs;
+                VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars);
+                sz = sizeof(instrDescCGCA);
+            }
+            else
+            {
+                assert(!id->idIsLargeDsp());
+                assert(!id->idIsLargeCns());
+
+                gcrefRegs = emitDecodeCallGCregs(id);
+                byrefRegs = 0;
+                VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp));
+                sz = sizeof(instrDesc);
+            }
+
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeRegT1_M4(id->idReg3());
+            callInstrSize = SafeCvtAssert<unsigned char>(emitOutput_Thumb1Instr(dst, code));
+            dst += callInstrSize;
+            goto DONE_CALL;
+
+        case IF_T2_J3: // T2_J3   .....Siiiiiiiiii ..j.jiiiiiiiiii.      Call                imm24
+
+            /* Is this a "fat" call descriptor? */
+
+            if (id->idIsLargeCall())
+            {
+                instrDescCGCA* idCall = (instrDescCGCA*)id;
+                gcrefRegs             = idCall->idcGcrefRegs;
+                byrefRegs             = idCall->idcByrefRegs;
+                VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars);
+                sz = sizeof(instrDescCGCA);
+            }
+            else
+            {
+                assert(!id->idIsLargeDsp());
+                assert(!id->idIsLargeCns());
+
+                gcrefRegs = emitDecodeCallGCregs(id);
+                byrefRegs = 0;
+                VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp));
+                sz = sizeof(instrDesc);
+            }
+
+            addr = id->idAddr()->iiaAddr;
+            code = emitInsCode(ins, fmt);
+
+#ifdef RELOC_SUPPORT
+            if (id->idIsDspReloc())
+            {
+                callInstrSize = SafeCvtAssert<unsigned char>(emitOutput_Thumb2Instr(dst, code));
+                dst += callInstrSize;
+                if (emitComp->info.compMatchedVM)
+                    emitRecordRelocation((void*)(dst - 4), addr, IMAGE_REL_BASED_THUMB_BRANCH24);
+            }
+            else
+#endif // RELOC_SUPPORT
+            {
+                addr = (BYTE*)((size_t)addr & ~1); // Clear the lowest bit from target address
+
+                /* Calculate PC relative displacement */
+                int  disp = addr - (dst + 4);
+                bool S    = (disp < 0);
+                bool I1   = ((disp & 0x00800000) == 0);
+                bool I2   = ((disp & 0x00400000) == 0);
+
+                if (S)
+                    code |= (1 << 26); // S bit
+                if (S ^ I1)
+                    code |= (1 << 13); // J1 bit
+                if (S ^ I2)
+                    code |= (1 << 11); // J2 bit
+
+                int immLo = (disp & 0x00000ffe) >> 1;
+                int immHi = (disp & 0x003ff000) >> 12;
+
+                code |= (immHi << 16);
+                code |= immLo;
+
+                disp = abs(disp);
+                assert((disp & 0x00fffffe) == disp);
+
+                callInstrSize = SafeCvtAssert<unsigned char>(emitOutput_Thumb2Instr(dst, code));
+                dst += callInstrSize;
+            }
+
+        DONE_CALL:
+
+            /* We update the GC info before the call as the variables cannot be
+               used by the call. Killing variables before the call helps with
+               boundary conditions if the call is CORINFO_HELP_THROW - see bug 50029.
+               If we ever track aliased variables (which could be used by the
+               call), we would have to keep them alive past the call. */
+
+            emitUpdateLiveGCvars(GCvars, *dp);
+
+            // If the method returns a GC ref, mark R0 appropriately.
+            if (id->idGCref() == GCT_GCREF)
+                gcrefRegs |= RBM_R0;
+            else if (id->idGCref() == GCT_BYREF)
+                byrefRegs |= RBM_R0;
+
+            // If the GC register set has changed, report the new set.
+            if (gcrefRegs != emitThisGCrefRegs)
+                emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst);
+
+            if (byrefRegs != emitThisByrefRegs)
+                emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, dst);
+
+            // Some helper calls may be marked as not requiring GC info to be recorded.
+            if ((!id->idIsNoGC()))
+            {
+                // On ARM, as on AMD64, we don't change the stack pointer to push/pop args.
+                // So we're not really doing a "stack pop" here (note that "args" is 0), but we use this mechanism
+                // to record the call for GC info purposes.  (It might be best to use an alternate call,
+                // and protect "emitStackPop" under the EMIT_TRACK_STACK_DEPTH preprocessor variable.)
+                emitStackPop(dst, /*isCall*/ true, callInstrSize, /*args*/ 0);
+
+                /* Do we need to record a call location for GC purposes? */
+
+                if (!emitFullGCinfo)
+                {
+                    emitRecordGCcall(dst, callInstrSize);
+                }
+            }
+
+            break;
+
+        /********************************************************************/
+        /*                            oops                                  */
+        /********************************************************************/
+
+        default:
+
+#ifdef DEBUG
+            printf("unexpected format %s\n", emitIfName(id->idInsFmt()));
+            assert(!"don't know how to encode this instruction");
+#endif
+            break;
+    }
+
+    // Determine if any registers now hold GC refs, or whether a register that was overwritten held a GC ref.
+    // We assume here that "id->idGCref()" is not GC_NONE only if the instruction described by "id" writes a
+    // GC ref to register "id->idReg1()".  (It may, apparently, also not be GC_NONE in other cases, such as
+    // for stores, but we ignore those cases here.)
+    if (emitInsMayWriteToGCReg(id)) // True if "id->idIns()" writes to a register than can hold GC ref.
+    {
+        // If we ever generate instructions that write to multiple registers (LDM, or POP),
+        // then we'd need to more work here to ensure that changes in the status of GC refs are
+        // tracked properly.
+        if (emitInsMayWriteMultipleRegs(id))
+        {
+            // We explicitly list the multiple-destination-target instruction that we expect to
+            // be emitted outside of the prolog and epilog here.
+            switch (ins)
+            {
+                case INS_smull:
+                case INS_umull:
+                case INS_smlal:
+                case INS_umlal:
+                case INS_vmov_d2i:
+                    // For each of these, idReg1() and idReg2() are the destination registers.
+                    emitGCregDeadUpd(id->idReg1(), dst);
+                    emitGCregDeadUpd(id->idReg2(), dst);
+                    break;
+                default:
+                    assert(false); // We need to recognize this multi-target instruction...
+            }
+        }
+        else
+        {
+            if (id->idGCref() != GCT_NONE)
+            {
+                emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
+            }
+            else
+            {
+                // I also assume that "idReg1" is the destination register of all instructions that write to registers.
+                emitGCregDeadUpd(id->idReg1(), dst);
+            }
+        }
+    }
+
+    // Now we determine if the instruction has written to a (local variable) stack location, and either written a GC
+    // ref or overwritten one.
+    if (emitInsWritesToLclVarStackLoc(id))
+    {
+        int       varNum = id->idAddr()->iiaLclVar.lvaVarNum();
+        unsigned  ofs    = AlignDown(id->idAddr()->iiaLclVar.lvaOffset(), sizeof(size_t));
+        regNumber regBase;
+        int       adr = emitComp->lvaFrameAddress(varNum, true, &regBase, ofs);
+        if (id->idGCref() != GCT_NONE)
+        {
+            emitGCvarLiveUpd(adr + ofs, varNum, id->idGCref(), dst);
+        }
+        else
+        {
+            // If the type of the local is a gc ref type, update the liveness.
+            var_types vt;
+            if (varNum >= 0)
+            {
+                // "Regular" (non-spill-temp) local.
+                vt = var_types(emitComp->lvaTable[varNum].lvType);
+            }
+            else
+            {
+                TempDsc* tmpDsc = emitComp->tmpFindNum(varNum);
+                vt              = tmpDsc->tdTempType();
+            }
+            if (vt == TYP_REF || vt == TYP_BYREF)
+                emitGCvarDeadUpd(adr + ofs, dst);
+        }
+    }
+
+#ifdef DEBUG
+    /* Make sure we set the instruction descriptor size correctly */
+
+    size_t expected = emitSizeOfInsDsc(id);
+    assert(sz == expected);
+
+    if (emitComp->opts.disAsm || emitComp->opts.dspEmit || emitComp->verbose)
+    {
+        emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(odst), *dp, (dst - *dp), ig);
+    }
+
+    if (emitComp->compDebugBreak)
+    {
+        // set JitEmitPrintRefRegs=1 will print out emitThisGCrefRegs and emitThisByrefRegs
+        // at the beginning of this method.
+        if (JitConfig.JitEmitPrintRefRegs() != 0)
+        {
+            printf("Before emitOutputInstr for id->idDebugOnlyInfo()->idNum=0x%02x\n", id->idDebugOnlyInfo()->idNum);
+            printf("  emitThisGCrefRegs(0x%p)=", dspPtr(&emitThisGCrefRegs));
+            printRegMaskInt(emitThisGCrefRegs);
+            emitDispRegSet(emitThisGCrefRegs);
+            printf("\n");
+            printf("  emitThisByrefRegs(0x%p)=", dspPtr(&emitThisByrefRegs));
+            printRegMaskInt(emitThisByrefRegs);
+            emitDispRegSet(emitThisByrefRegs);
+            printf("\n");
+        }
+
+        // For example, set JitBreakEmitOutputInstr=a6 will break when this method is called for
+        // emitting instruction a6, (i.e. IN00a6 in jitdump).
+        if ((unsigned)JitConfig.JitBreakEmitOutputInstr() == id->idDebugOnlyInfo()->idNum)
+        {
+            assert(!"JitBreakEmitOutputInstr reached");
+        }
+    }
+#endif
+
+    /* All instructions are expected to generate code */
+
+    assert(*dp != dst);
+
+    *dp = dst;
+
+    return sz;
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+#ifdef DEBUG
+
+static bool insAlwaysSetFlags(instruction ins)
+{
+    bool result = false;
+    switch (ins)
+    {
+        case INS_cmp:
+        case INS_cmn:
+        case INS_teq:
+        case INS_tst:
+            result = true;
+            break;
+
+        default:
+            break;
+    }
+    return result;
+}
+
+/*****************************************************************************
+ *
+ *  Display the instruction name, optionally the instruction
+ *   can add the "s" suffix if it must set the flags.
+ */
+void emitter::emitDispInst(instruction ins, insFlags flags)
+{
+    const char* insstr = codeGen->genInsName(ins);
+    int         len    = strlen(insstr);
+
+    /* Display the instruction name */
+
+    printf("%s", insstr);
+    if (insSetsFlags(flags) && !insAlwaysSetFlags(ins))
+    {
+        printf("s");
+        len++;
+    }
+
+    //
+    // Add at least one space after the instruction name
+    // and add spaces until we have reach the normal size of 8
+    do
+    {
+        printf(" ");
+        len++;
+    } while (len < 8);
+}
+
+/*****************************************************************************
+ *
+ *  Display an reloc value
+ *  If we are formatting for an assembly listing don't print the hex value
+ *  since it will prevent us from doing assembly diffs
+ */
+void emitter::emitDispReloc(int value, bool addComma)
+{
+    if (emitComp->opts.disAsm)
+    {
+        printf("(reloc)");
+    }
+    else
+    {
+        printf("(reloc 0x%x)", dspPtr(value));
+    }
+
+    if (addComma)
+        printf(", ");
+}
+
+#define STRICT_ARM_ASM 0
+
+/*****************************************************************************
+ *
+ *  Display an immediate value
+ */
+void emitter::emitDispImm(int imm, bool addComma, bool alwaysHex /* =false */)
+{
+    if (!alwaysHex && (imm > -1000) && (imm < 1000))
+        printf("%d", imm);
+    else if ((imm > 0) ||
+             (imm == -imm) || // -0x80000000 == 0x80000000. So we don't want to add an extra "-" at the beginning.
+             (emitComp->opts.disDiffable && (imm == 0xD1FFAB1E))) // Don't display this as negative
+        printf("0x%02x", imm);
+    else // val <= -1000
+        printf("-0x%02x", -imm);
+
+    if (addComma)
+        printf(", ");
+}
+
+/*****************************************************************************
+ *
+ *  Display an arm condition for the IT instructions
+ */
+void emitter::emitDispCond(int cond)
+{
+    const static char* armCond[16] = {"eq", "ne", "hs", "lo", "mi", "pl", "vs", "vc",
+                                      "hi", "ls", "ge", "lt", "gt", "le", "AL", "NV"}; // The last two are invalid
+    assert(0 <= cond && (unsigned)cond < ArrLen(armCond));
+    printf(armCond[cond]);
+}
+
+/*****************************************************************************
+ *
+ *  Display a register range in a range format
+ */
+void emitter::emitDispRegRange(regNumber reg, int len, emitAttr attr)
+{
+    printf("{");
+    emitDispReg(reg, attr, false);
+    if (len > 1)
+    {
+        printf("-");
+        emitDispReg((regNumber)(reg + len - 1), attr, false);
+    }
+    printf("}");
+}
+
+/*****************************************************************************
+ *
+ *  Display an register mask in a list format
+ */
+void emitter::emitDispRegmask(int imm, bool encodedPC_LR)
+{
+    bool printedOne = false;
+    bool hasPC;
+    bool hasLR;
+
+    if (encodedPC_LR)
+    {
+        hasPC = (imm & 2) != 0;
+        hasLR = (imm & 1) != 0;
+        imm >>= 2;
+    }
+    else
+    {
+        hasPC = (imm & RBM_PC) != 0;
+        hasLR = (imm & RBM_LR) != 0;
+        imm &= ~(RBM_PC | RBM_LR);
+    }
+
+    regNumber reg = REG_R0;
+    unsigned  bit = 1;
+
+    printf("{");
+    while (imm != 0)
+    {
+        if (bit & imm)
+        {
+            if (printedOne)
+                printf(",");
+            printf("%s", emitRegName(reg));
+            printedOne = true;
+            imm -= bit;
+        }
+
+        reg = regNumber(reg + 1);
+        bit <<= 1;
+    }
+
+    if (hasLR)
+    {
+        if (printedOne)
+            printf(",");
+        printf("%s", emitRegName(REG_LR));
+        printedOne = true;
+    }
+
+    if (hasPC)
+    {
+        if (printedOne)
+            printf(",");
+        printf("%s", emitRegName(REG_PC));
+        printedOne = true;
+    }
+    printf("}");
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding for the Shift Type bits to be used in a Thumb-2 encoding
+ */
+
+void emitter::emitDispShiftOpts(insOpts opt)
+{
+    if (opt == INS_OPTS_LSL)
+        printf(" LSL ");
+    else if (opt == INS_OPTS_LSR)
+        printf(" LSR ");
+    else if (opt == INS_OPTS_ASR)
+        printf(" ASR ");
+    else if (opt == INS_OPTS_ROR)
+        printf(" ROR ");
+    else if (opt == INS_OPTS_RRX)
+        printf(" RRX ");
+}
+
+/*****************************************************************************
+ *
+ *  Display a register
+ */
+void emitter::emitDispReg(regNumber reg, emitAttr attr, bool addComma)
+{
+    if (isFloatReg(reg))
+    {
+        const char* size = attr == EA_8BYTE ? "d" : "s";
+        printf("%s%s", size, emitFloatRegName(reg, attr) + 1);
+    }
+    else
+    {
+        printf("%s", emitRegName(reg, attr));
+    }
+
+    if (addComma)
+        printf(", ");
+}
+
+void emitter::emitDispFloatReg(regNumber reg, emitAttr attr, bool addComma)
+{
+}
+
+/*****************************************************************************
+ *
+ *  Display an addressing operand [reg]
+ */
+void emitter::emitDispAddrR(regNumber reg, emitAttr attr)
+{
+    printf("[");
+    emitDispReg(reg, attr, false);
+    printf("]");
+    emitDispGC(attr);
+}
+
+/*****************************************************************************
+ *
+ *  Display an addressing operand [reg + imm]
+ */
+void emitter::emitDispAddrRI(regNumber reg, int imm, emitAttr attr)
+{
+    bool regIsSPorFP = (reg == REG_SP) || (reg == REG_FP);
+
+    printf("[");
+    emitDispReg(reg, attr, false);
+    if (imm != 0)
+    {
+        if (imm >= 0)
+        {
+#if STRICT_ARM_ASM
+            printf(", ");
+#else
+            printf("+");
+#endif
+        }
+        emitDispImm(imm, false, regIsSPorFP);
+    }
+    printf("]");
+    emitDispGC(attr);
+}
+
+/*****************************************************************************
+ *
+ *  Display an addressing operand [reg + reg]
+ */
+void emitter::emitDispAddrRR(regNumber reg1, regNumber reg2, emitAttr attr)
+{
+    printf("[");
+    emitDispReg(reg1, attr, false);
+#if STRICT_ARM_ASM
+    printf(", ");
+#else
+    printf("+");
+#endif
+    emitDispReg(reg2, attr, false);
+    printf("]");
+    emitDispGC(attr);
+}
+
+/*****************************************************************************
+ *
+ *  Display an addressing operand [reg + reg * imm]
+ */
+void emitter::emitDispAddrRRI(regNumber reg1, regNumber reg2, int imm, emitAttr attr)
+{
+    printf("[");
+    emitDispReg(reg1, attr, false);
+#if STRICT_ARM_ASM
+    printf(", ");
+    emitDispReg(reg2, attr, false);
+    if (imm > 0)
+    {
+        printf(" LSL ");
+        emitDispImm(1 << imm, false);
+    }
+#else
+    printf("+");
+    if (imm > 0)
+    {
+        emitDispImm(1 << imm, false);
+        printf("*");
+    }
+    emitDispReg(reg2, attr, false);
+#endif
+    printf("]");
+    emitDispGC(attr);
+}
+
+/*****************************************************************************
+ *
+ *  Display an addressing operand [reg + imm]
+ */
+void emitter::emitDispAddrPUW(regNumber reg, int imm, insOpts opt, emitAttr attr)
+{
+    bool regIsSPorFP = (reg == REG_SP) || (reg == REG_FP);
+
+    printf("[");
+    emitDispReg(reg, attr, false);
+    if (insOptAnyInc(opt))
+        printf("!");
+
+    if (imm != 0)
+    {
+        if (imm >= 0)
+        {
+#if STRICT_ARM_ASM
+            printf(", ");
+#else
+            printf("+");
+#endif
+        }
+        emitDispImm(imm, false, regIsSPorFP);
+    }
+    printf("]");
+
+    emitDispGC(attr);
+}
+
+/*****************************************************************************
+ *
+ *  Display the gc-ness of the operand
+ */
+void emitter::emitDispGC(emitAttr attr)
+{
+#if 0
+    // TODO-ARM-Cleanup: Fix or delete.
+    if (attr == EA_GCREF)
+        printf(" @gc");
+    else if (attr == EA_BYREF)
+        printf(" @byref");
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Display (optionally) the instruction encoding in hex
+ */
+
+void emitter::emitDispInsHex(BYTE* code, size_t sz)
+{
+    // We do not display the instruction hex if we want diff-able disassembly
+    if (!emitComp->opts.disDiffable)
+    {
+        if (sz == 2)
+        {
+            printf("  %04X     ", (*((unsigned short*)code)));
+        }
+        else if (sz == 4)
+        {
+            printf("  %04X %04X", (*((unsigned short*)(code + 0))), (*((unsigned short*)(code + 2))));
+        }
+    }
+}
+
+/****************************************************************************
+ *
+ *  Display the given instruction.
+ */
+
+void emitter::emitDispInsHelp(
+    instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* code, size_t sz, insGroup* ig)
+{
+    if (EMITVERBOSE)
+    {
+        unsigned idNum = id->idDebugOnlyInfo()->idNum; // Do not remove this!  It is needed for VisualStudio
+                                                       // conditional breakpoints
+
+        printf("IN%04x: ", idNum);
+    }
+
+    if (code == NULL)
+        sz = 0;
+
+    if (!emitComp->opts.dspEmit && !isNew && !asmfm && sz)
+        doffs = true;
+
+    /* Display the instruction offset */
+
+    emitDispInsOffs(offset, doffs);
+
+    /* Display the instruction hex code */
+
+    emitDispInsHex(code, sz);
+
+    printf("      ");
+
+    /* Get the instruction and format */
+
+    instruction ins = id->idIns();
+    insFormat   fmt = id->idInsFmt();
+
+    emitDispInst(ins, id->idInsFlags());
+
+    /* If this instruction has just been added, check its size */
+
+    assert(isNew == false || (int)emitSizeOfInsDsc(id) == emitCurIGfreeNext - (BYTE*)id);
+
+    /* Figure out the operand size */
+    emitAttr attr;
+    if (id->idGCref() == GCT_GCREF)
+        attr = EA_GCREF;
+    else if (id->idGCref() == GCT_BYREF)
+        attr = EA_BYREF;
+    else
+        attr = id->idOpSize();
+
+    switch (fmt)
+    {
+        int         imm;
+        int         offs;
+        const char* methodName;
+
+        case IF_T1_A: // None
+        case IF_T2_A:
+            break;
+
+        case IF_T1_L0: // Imm
+        case IF_T2_B:
+            emitDispImm(emitGetInsSC(id), false);
+            break;
+
+        case IF_T1_B: // <cond>
+            emitDispCond(emitGetInsSC(id));
+            break;
+
+        case IF_T1_L1: // <regmask8>
+        case IF_T2_I1: // <regmask16>
+            emitDispRegmask(emitGetInsSC(id), true);
+            break;
+
+        case IF_T2_E2: // Reg
+            if (id->idIns() == INS_vmrs)
+            {
+                if (id->idReg1() != REG_R15)
+                {
+                    emitDispReg(id->idReg1(), attr, true);
+                    printf("FPSCR");
+                }
+                else
+                {
+                    printf("APSR, FPSCR");
+                }
+            }
+            else
+            {
+                emitDispReg(id->idReg1(), attr, false);
+            }
+            break;
+
+        case IF_T1_D1:
+            emitDispReg(id->idReg1(), attr, false);
+            break;
+
+        case IF_T1_D2:
+            emitDispReg(id->idReg3(), attr, false);
+            {
+                CORINFO_METHOD_HANDLE handle = (CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie;
+                if (handle != 0)
+                {
+                    methodName = emitComp->eeGetMethodFullName(handle);
+                    printf("\t\t// %s", methodName);
+                }
+            }
+            break;
+
+        case IF_T1_F: // SP, Imm
+            emitDispReg(REG_SP, attr, true);
+            emitDispImm(emitGetInsSC(id), false);
+            break;
+
+        case IF_T1_J0: // Reg, Imm
+        case IF_T2_L1:
+        case IF_T2_L2:
+        case IF_T2_N:
+            emitDispReg(id->idReg1(), attr, true);
+            imm = emitGetInsSC(id);
+            if (fmt == IF_T2_N)
+            {
+                if (emitComp->opts.disDiffable)
+                    imm = 0xD1FF;
+#if RELOC_SUPPORT
+                if (id->idIsCnsReloc() || id->idIsDspReloc())
+                {
+                    if (emitComp->opts.disDiffable)
+                        imm = 0xD1FFAB1E;
+                    printf("%s RELOC ", (id->idIns() == INS_movw) ? "LOW" : "HIGH");
+                }
+#endif // RELOC_SUPPORT
+            }
+            emitDispImm(imm, false, (fmt == IF_T2_N));
+            break;
+
+        case IF_T2_N2:
+            emitDispReg(id->idReg1(), attr, true);
+            imm = emitGetInsSC(id);
+            {
+                dataSection*  jdsc = 0;
+                NATIVE_OFFSET offs = 0;
+
+                /* Find the appropriate entry in the data section list */
+
+                for (jdsc = emitConsDsc.dsdList; jdsc; jdsc = jdsc->dsNext)
+                {
+                    UNATIVE_OFFSET size = jdsc->dsSize;
+
+                    /* Is this a label table? */
+
+                    if (jdsc->dsType == dataSection::blockAbsoluteAddr)
+                    {
+                        if (offs == imm)
+                            break;
+                    }
+
+                    offs += size;
+                }
+
+                assert(jdsc != NULL);
+
+#ifdef RELOC_SUPPORT
+                if (id->idIsDspReloc())
+                {
+                    printf("reloc ");
+                }
+#endif
+                printf("%s ADDRESS J_M%03u_DS%02u", (id->idIns() == INS_movw) ? "LOW" : "HIGH",
+                       Compiler::s_compMethodsCount, imm);
+
+                // After the MOVT, dump the table
+                if (id->idIns() == INS_movt)
+                {
+                    unsigned     cnt = jdsc->dsSize / TARGET_POINTER_SIZE;
+                    BasicBlock** bbp = (BasicBlock**)jdsc->dsCont;
+
+                    bool isBound = (emitCodeGetCookie(*bbp) != NULL);
+
+                    if (isBound)
+                    {
+                        printf("\n\n    J_M%03u_DS%02u LABEL   DWORD", Compiler::s_compMethodsCount, imm);
+
+                        /* Display the label table (it's stored as "BasicBlock*" values) */
+
+                        do
+                        {
+                            insGroup* lab;
+
+                            /* Convert the BasicBlock* value to an IG address */
+
+                            lab = (insGroup*)emitCodeGetCookie(*bbp++);
+                            assert(lab);
+
+                            printf("\n            DD      G_M%03u_IG%02u", Compiler::s_compMethodsCount, lab->igNum);
+                        } while (--cnt);
+                    }
+                }
+            }
+            break;
+
+        case IF_T2_H2: // [Reg+imm]
+        case IF_T2_K2:
+            emitDispAddrRI(id->idReg1(), emitGetInsSC(id), attr);
+            break;
+
+        case IF_T2_K3: // [PC+imm]
+            emitDispAddrRI(REG_PC, emitGetInsSC(id), attr);
+            break;
+
+        case IF_T1_J1: // reg, <regmask8>
+        case IF_T2_I0: // reg, <regmask16>
+            emitDispReg(id->idReg1(), attr, false);
+            printf("!, ");
+            emitDispRegmask(emitGetInsSC(id), false);
+            break;
+
+        case IF_T1_D0: // Reg, Reg
+        case IF_T1_E:
+        case IF_T2_C3:
+        case IF_T2_C9:
+        case IF_T2_C10:
+            emitDispReg(id->idReg1(), attr, true);
+            emitDispReg(id->idReg2(), attr, false);
+            if (fmt == IF_T1_E && id->idIns() == INS_rsb)
+            {
+                printf(", 0");
+            }
+            break;
+
+        case IF_T2_E1: // Reg, [Reg]
+            emitDispReg(id->idReg1(), attr, true);
+            emitDispAddrR(id->idReg2(), attr);
+            break;
+
+        case IF_T2_D1: // Reg, Imm, Imm
+            emitDispReg(id->idReg1(), attr, true);
+            imm = emitGetInsSC(id);
+            {
+                int lsb  = (imm >> 5) & 0x1f;
+                int msb  = imm & 0x1f;
+                int imm1 = lsb;
+                int imm2 = msb + 1 - lsb;
+                emitDispImm(imm1, true);
+                emitDispImm(imm2, false);
+            }
+            break;
+
+        case IF_T1_C: // Reg, Reg, Imm
+        case IF_T1_G:
+        case IF_T2_C2:
+        case IF_T2_H1:
+        case IF_T2_K1:
+        case IF_T2_L0:
+        case IF_T2_M0:
+            emitDispReg(id->idReg1(), attr, true);
+            imm = emitGetInsSC(id);
+            if (emitInsIsLoadOrStore(ins))
+            {
+                emitDispAddrRI(id->idReg2(), imm, attr);
+            }
+            else
+            {
+                emitDispReg(id->idReg2(), attr, true);
+                emitDispImm(imm, false);
+            }
+            break;
+
+        case IF_T1_J2:
+            emitDispReg(id->idReg1(), attr, true);
+            imm = emitGetInsSC(id);
+            if (emitInsIsLoadOrStore(ins))
+            {
+                emitDispAddrRI(REG_SP, imm, attr);
+            }
+            else
+            {
+                emitDispReg(REG_SP, attr, true);
+                emitDispImm(imm, false);
+            }
+            break;
+
+        case IF_T2_K4:
+            emitDispReg(id->idReg1(), attr, true);
+            emitDispAddrRI(REG_PC, emitGetInsSC(id), attr);
+            break;
+
+        case IF_T2_C1:
+        case IF_T2_C8:
+            emitDispReg(id->idReg1(), attr, true);
+            emitDispReg(id->idReg2(), attr, false);
+            imm = emitGetInsSC(id);
+            if (id->idInsOpt() == INS_OPTS_RRX)
+            {
+                emitDispShiftOpts(id->idInsOpt());
+                assert(imm == 1);
+            }
+            else if (imm > 0)
+            {
+                emitDispShiftOpts(id->idInsOpt());
+                emitDispImm(imm, false);
+            }
+            break;
+
+        case IF_T2_C6:
+            imm = emitGetInsSC(id);
+            emitDispReg(id->idReg1(), attr, true);
+            emitDispReg(id->idReg2(), attr, (imm != 0));
+            if (imm != 0)
+            {
+                emitDispImm(imm, false);
+            }
+            break;
+
+        case IF_T2_C7:
+            emitDispAddrRRI(id->idReg1(), id->idReg2(), emitGetInsSC(id), attr);
+            break;
+
+        case IF_T2_H0:
+            emitDispReg(id->idReg1(), attr, true);
+            emitDispAddrPUW(id->idReg2(), emitGetInsSC(id), id->idInsOpt(), attr);
+            break;
+
+        case IF_T1_H: // Reg, Reg, Reg
+            emitDispReg(id->idReg1(), attr, true);
+            if (emitInsIsLoadOrStore(ins))
+            {
+                emitDispAddrRR(id->idReg2(), id->idReg3(), attr);
+            }
+            else
+            {
+                emitDispReg(id->idReg2(), attr, true);
+                emitDispReg(id->idReg3(), attr, false);
+            }
+            break;
+
+        case IF_T2_C4:
+        case IF_T2_C5:
+            emitDispReg(id->idReg1(), attr, true);
+            emitDispReg(id->idReg2(), attr, true);
+            emitDispReg(id->idReg3(), attr, false);
+            break;
+
+        case IF_T2_VFP3:
+            emitDispReg(id->idReg1(), attr, true);
+            emitDispReg(id->idReg2(), attr, true);
+            emitDispReg(id->idReg3(), attr, false);
+            break;
+
+        case IF_T2_VFP2:
+            switch (id->idIns())
+            {
+                case INS_vcvt_d2i:
+                case INS_vcvt_d2u:
+                case INS_vcvt_d2f:
+                    emitDispReg(id->idReg1(), EA_4BYTE, true);
+                    emitDispReg(id->idReg2(), EA_8BYTE, false);
+                    break;
+
+                case INS_vcvt_i2d:
+                case INS_vcvt_u2d:
+                case INS_vcvt_f2d:
+                    emitDispReg(id->idReg1(), EA_8BYTE, true);
+                    emitDispReg(id->idReg2(), EA_4BYTE, false);
+                    break;
+
+                // we just use the type on the instruction
+                // unless it is an asymmetrical one like the converts
+                default:
+                    emitDispReg(id->idReg1(), attr, true);
+                    emitDispReg(id->idReg2(), attr, false);
+                    break;
+            }
+            break;
+
+        case IF_T2_VLDST:
+            imm = emitGetInsSC(id);
+            switch (id->idIns())
+            {
+                case INS_vldr:
+                case INS_vstr:
+                    emitDispReg(id->idReg1(), attr, true);
+                    emitDispAddrPUW(id->idReg2(), imm, id->idInsOpt(), attr);
+                    break;
+
+                case INS_vldm:
+                case INS_vstm:
+                    emitDispReg(id->idReg2(), attr, false);
+                    if (insOptAnyInc(id->idInsOpt()))
+                        printf("!");
+                    printf(", ");
+                    emitDispRegRange(id->idReg1(), abs(imm) >> 2, attr);
+                    break;
+
+                case INS_vpush:
+                case INS_vpop:
+                    emitDispRegRange(id->idReg1(), abs(imm) >> 2, attr);
+                    break;
+
+                default:
+                    unreached();
+            }
+            break;
+
+        case IF_T2_VMOVD:
+            switch (id->idIns())
+            {
+                case INS_vmov_i2d:
+                    emitDispReg(id->idReg1(), attr, true); // EA_8BYTE
+                    emitDispReg(id->idReg2(), EA_4BYTE, true);
+                    emitDispReg(id->idReg3(), EA_4BYTE, false);
+                    break;
+                case INS_vmov_d2i:
+                    emitDispReg(id->idReg1(), EA_4BYTE, true);
+                    emitDispReg(id->idReg2(), EA_4BYTE, true);
+                    emitDispReg(id->idReg3(), attr, false); // EA_8BYTE
+                    break;
+                default:
+                    unreached();
+            }
+            break;
+
+        case IF_T2_VMOVS:
+            emitDispReg(id->idReg1(), attr, true);
+            emitDispReg(id->idReg2(), attr, false);
+            break;
+
+        case IF_T2_G1:
+            emitDispReg(id->idReg1(), attr, true);
+            emitDispAddrRR(id->idReg2(), id->idReg3(), attr);
+            break;
+
+        case IF_T2_D0: // Reg, Reg, Imm, Imm
+            emitDispReg(id->idReg1(), attr, true);
+            emitDispReg(id->idReg2(), attr, true);
+            imm = emitGetInsSC(id);
+            if (ins == INS_bfi)
+            {
+                int lsb  = (imm >> 5) & 0x1f;
+                int msb  = imm & 0x1f;
+                int imm1 = lsb;
+                int imm2 = msb + 1 - lsb;
+                emitDispImm(imm1, true);
+                emitDispImm(imm2, false);
+            }
+            else
+            {
+                int lsb     = (imm >> 5) & 0x1f;
+                int widthm1 = imm & 0x1f;
+                int imm1    = lsb;
+                int imm2    = widthm1 + 1;
+                emitDispImm(imm1, true);
+                emitDispImm(imm2, false);
+            }
+            break;
+
+        case IF_T2_C0: // Reg, Reg, Reg, Imm
+            emitDispReg(id->idReg1(), attr, true);
+            emitDispReg(id->idReg2(), attr, true);
+            emitDispReg(id->idReg3(), attr, false);
+            imm = emitGetInsSC(id);
+            if (id->idInsOpt() == INS_OPTS_RRX)
+            {
+                emitDispShiftOpts(id->idInsOpt());
+                assert(imm == 1);
+            }
+            else if (imm > 0)
+            {
+                emitDispShiftOpts(id->idInsOpt());
+                emitDispImm(imm, false);
+            }
+            break;
+
+        case IF_T2_E0:
+            emitDispReg(id->idReg1(), attr, true);
+            if (id->idIsLclVar())
+            {
+                emitDispAddrRRI(id->idReg2(), codeGen->rsGetRsvdReg(), 0, attr);
+            }
+            else
+            {
+                emitDispAddrRRI(id->idReg2(), id->idReg3(), emitGetInsSC(id), attr);
+            }
+            break;
+
+        case IF_T2_G0:
+            emitDispReg(id->idReg1(), attr, true);
+            emitDispReg(id->idReg2(), attr, true);
+            emitDispAddrPUW(id->idReg3(), emitGetInsSC(id), id->idInsOpt(), attr);
+            break;
+
+        case IF_T2_F1: // Reg, Reg, Reg, Reg
+        case IF_T2_F2:
+            emitDispReg(id->idReg1(), attr, true);
+            emitDispReg(id->idReg2(), attr, true);
+            emitDispReg(id->idReg3(), attr, true);
+            emitDispReg(id->idReg4(), attr, false);
+            break;
+
+        case IF_T1_J3:
+        case IF_T2_M1: // Load Label
+            emitDispReg(id->idReg1(), attr, true);
+            if (id->idIsBound())
+                printf("G_M%03u_IG%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaIGlabel->igNum);
+            else
+                printf("L_M%03u_BB%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaBBlabel->bbNum);
+            break;
+
+        case IF_T1_I: // Special Compare-and-branch
+            emitDispReg(id->idReg1(), attr, true);
+            __fallthrough;
+
+        case IF_T1_K: // Special Branch, conditional
+        case IF_T1_M:
+            assert(((instrDescJmp*)id)->idjShort);
+            printf("SHORT ");
+            __fallthrough;
+
+        case IF_T2_N1:
+            if (fmt == IF_T2_N1)
+            {
+                emitDispReg(id->idReg1(), attr, true);
+                printf("%s ADDRESS ", (id->idIns() == INS_movw) ? "LOW" : "HIGH");
+            }
+            __fallthrough;
+
+        case IF_T2_J1:
+        case IF_T2_J2:
+        case IF_LARGEJMP:
+        {
+            if (id->idAddr()->iiaHasInstrCount())
+            {
+                int instrCount = id->idAddr()->iiaGetInstrCount();
+
+                if (ig == NULL)
+                {
+                    printf("pc%s%d instructions", (instrCount >= 0) ? "+" : "", instrCount);
+                }
+                else
+                {
+                    unsigned       insNum  = emitFindInsNum(ig, id);
+                    UNATIVE_OFFSET srcOffs = ig->igOffs + emitFindOffset(ig, insNum + 1);
+                    UNATIVE_OFFSET dstOffs = ig->igOffs + emitFindOffset(ig, insNum + 1 + instrCount);
+                    ssize_t        relOffs = (ssize_t)(emitOffsetToPtr(dstOffs) - emitOffsetToPtr(srcOffs));
+                    printf("pc%s%d (%d instructions)", (relOffs >= 0) ? "+" : "", relOffs, instrCount);
+                }
+            }
+            else if (id->idIsBound())
+                printf("G_M%03u_IG%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaIGlabel->igNum);
+            else
+                printf("L_M%03u_BB%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaBBlabel->bbNum);
+        }
+        break;
+
+        case IF_T2_J3:
+            if (id->idIsCallAddr())
+            {
+                offs       = (ssize_t)id->idAddr()->iiaAddr;
+                methodName = "";
+            }
+            else
+            {
+                offs       = 0;
+                methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
+            }
+
+            if (offs)
+            {
+                if (id->idIsDspReloc())
+                    printf("reloc ");
+                printf("%08X", offs);
+            }
+            else
+            {
+                printf("%s", methodName);
+            }
+
+            break;
+
+        default:
+            printf("unexpected format %s", emitIfName(id->idInsFmt()));
+            assert(!"unexpectedFormat");
+            break;
+    }
+
+    if (id->idDebugOnlyInfo()->idVarRefOffs)
+    {
+        printf("\t// ");
+        emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
+                         id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
+    }
+
+    printf("\n");
+}
+
+void emitter::emitDispIns(
+    instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* code, size_t sz, insGroup* ig)
+{
+    insFormat fmt = id->idInsFmt();
+
+    /* Special-case IF_LARGEJMP */
+
+    if ((fmt == IF_LARGEJMP) && id->idIsBound())
+    {
+        // This is a pseudo-instruction format representing a large conditional branch. See the comment
+        // in emitter::emitOutputLJ() for the full description.
+        //
+        // For this pseudo-instruction, we will actually generate:
+        //
+        //      b<!cond> L_not  // 2 bytes. Note that we reverse the condition.
+        //      b L_target      // 4 bytes
+        //   L_not:
+        //
+        // These instructions don't exist in the actual instruction stream, so we need to fake them
+        // up to display them.
+        //
+        // Note: don't touch the actual instrDesc. If we accidentally messed it up, it would create a very
+        // difficult to find bug.
+
+        instrDescJmp  idJmp;
+        instrDescJmp* pidJmp = &idJmp;
+
+        memset(&idJmp, 0, sizeof(idJmp));
+
+        pidJmp->idIns(emitJumpKindToIns(emitReverseJumpKind(emitInsToJumpKind(id->idIns())))); // reverse the
+                                                                                               // conditional
+                                                                                               // instruction
+        pidJmp->idInsFmt(IF_T1_K);
+        pidJmp->idInsSize(emitInsSize(IF_T1_K));
+        pidJmp->idjShort = 1;
+        pidJmp->idAddr()->iiaSetInstrCount(1);
+        pidJmp->idDebugOnlyInfo(id->idDebugOnlyInfo()); // share the idDebugOnlyInfo() field
+
+        size_t bcondSizeOrZero = (code == NULL) ? 0 : 2; // branch is 2 bytes
+        emitDispInsHelp(pidJmp, false, doffs, asmfm, offset, code, bcondSizeOrZero,
+                        NULL /* force display of pc-relative branch */);
+
+        code += bcondSizeOrZero;
+        offset += 2;
+
+        // Next, display the unconditional branch
+
+        // Reset the local instrDesc
+        memset(&idJmp, 0, sizeof(idJmp));
+
+        pidJmp->idIns(INS_b);
+        pidJmp->idInsFmt(IF_T2_J2);
+        pidJmp->idInsSize(emitInsSize(IF_T2_J2));
+        pidJmp->idjShort = 0;
+        if (id->idIsBound())
+        {
+            pidJmp->idSetIsBound();
+            pidJmp->idAddr()->iiaIGlabel = id->idAddr()->iiaIGlabel;
+        }
+        else
+        {
+            pidJmp->idAddr()->iiaBBlabel = id->idAddr()->iiaBBlabel;
+        }
+        pidJmp->idDebugOnlyInfo(id->idDebugOnlyInfo()); // share the idDebugOnlyInfo() field
+
+        size_t brSizeOrZero = (code == NULL) ? 0 : 4; // unconditional branch is 4 bytes
+        emitDispInsHelp(pidJmp, isNew, doffs, asmfm, offset, code, brSizeOrZero, ig);
+    }
+    else
+    {
+        emitDispInsHelp(id, isNew, doffs, asmfm, offset, code, sz, ig);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Display a stack frame reference.
+ */
+
+void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm)
+{
+    printf("[");
+
+    if (varx < 0)
+        printf("TEMP_%02u", -varx);
+    else
+        emitComp->gtDispLclVar(+varx, false);
+
+    if (disp < 0)
+        printf("-0x%02x", -disp);
+    else if (disp > 0)
+        printf("+0x%02x", +disp);
+
+    printf("]");
+
+    if (varx >= 0 && emitComp->opts.varNames)
+    {
+        LclVarDsc*  varDsc;
+        const char* varName;
+
+        assert((unsigned)varx < emitComp->lvaCount);
+        varDsc  = emitComp->lvaTable + varx;
+        varName = emitComp->compLocalVarName(varx, offs);
+
+        if (varName)
+        {
+            printf("'%s", varName);
+
+            if (disp < 0)
+                printf("-%d", -disp);
+            else if (disp > 0)
+                printf("+%d", +disp);
+
+            printf("'");
+        }
+    }
+}
+
+#endif // DEBUG
+
+#ifndef LEGACY_BACKEND
+
+// this is very similar to emitInsBinary and probably could be folded in to same
+// except the requirements on the incoming parameter are different,
+// ex: the memory op in storeind case must NOT be contained
+void emitter::emitInsMov(instruction ins, emitAttr attr, GenTree* node)
+{
+    switch (node->OperGet())
+    {
+        case GT_IND:
+        {
+            GenTree* addr = node->gtGetOp1();
+            assert(!addr->isContained());
+            codeGen->genConsumeReg(addr);
+            emitIns_R_R(ins, attr, node->gtRegNum, addr->gtRegNum);
+        }
+        break;
+
+        case GT_STOREIND:
+        {
+            GenTree* addr = node->gtGetOp1();
+            GenTree* data = node->gtOp.gtOp2;
+
+            assert(!addr->isContained());
+            assert(!data->isContained());
+            codeGen->genConsumeReg(addr);
+            codeGen->genConsumeReg(data);
+
+            if (addr->OperGet() == GT_CLS_VAR_ADDR)
+            {
+                emitIns_C_R(ins, attr, addr->gtClsVar.gtClsVarHnd, data->gtRegNum, 0);
+            }
+            else
+            {
+                emitIns_R_R(ins, attr, addr->gtRegNum, data->gtRegNum);
+            }
+        }
+        break;
+
+        case GT_STORE_LCL_VAR:
+        {
+            GenTreeLclVarCommon* varNode = node->AsLclVarCommon();
+
+            GenTree* data = node->gtOp.gtOp1->gtEffectiveVal();
+            codeGen->inst_set_SV_var(varNode);
+            assert(varNode->gtRegNum == REG_NA); // stack store
+
+            if (data->isContainedIntOrIImmed())
+            {
+                emitIns_S_I(ins, attr, varNode->GetLclNum(), 0, (int)data->AsIntConCommon()->IconValue());
+                codeGen->genUpdateLife(varNode);
+            }
+            else
+            {
+                assert(!data->isContained());
+                codeGen->genConsumeReg(data);
+                emitIns_S_R(ins, attr, data->gtRegNum, varNode->GetLclNum(), 0);
+                codeGen->genUpdateLife(varNode);
+            }
+        }
+            return;
+
+        default:
+            unreached();
+    }
+}
+
+// The callee must call genConsumeReg() for any non-contained srcs
+// and genProduceReg() for any non-contained dsts.
+
+regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src)
+{
+    regNumber result = REG_NA;
+
+    // dst can only be a reg
+    assert(!dst->isContained());
+
+    // src can be immed or reg
+    assert(!src->isContained() || src->isContainedIntOrIImmed());
+
+    // find immed (if any) - it cannot be a dst
+    GenTreeIntConCommon* intConst = nullptr;
+    if (src->isContainedIntOrIImmed())
+    {
+        intConst = src->AsIntConCommon();
+    }
+
+    if (intConst)
+    {
+        emitIns_R_I(ins, attr, dst->gtRegNum, intConst->IconValue());
+        return dst->gtRegNum;
+    }
+    else
+    {
+        emitIns_R_R(ins, attr, dst->gtRegNum, src->gtRegNum);
+        return dst->gtRegNum;
+    }
+}
+
+#endif // !LEGACY_BACKEND
+#endif // defined(_TARGET_ARM_)
diff --git a/src/jit/emitarm.h b/src/jit/emitarm.h
new file mode 100644
index 0000000000..1440148f42
--- /dev/null
+++ b/src/jit/emitarm.h
@@ -0,0 +1,414 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#if defined(_TARGET_ARM_)
+
+/************************************************************************/
+/*         Routines that compute the size of / encode instructions      */
+/************************************************************************/
+
+struct CnsVal
+{
+    int cnsVal;
+#ifdef RELOC_SUPPORT
+    bool cnsReloc;
+#endif
+};
+
+insSize emitInsSize(insFormat insFmt);
+
+BYTE* emitOutputAM(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc = NULL);
+BYTE* emitOutputSV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc = NULL);
+BYTE* emitOutputCV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc = NULL);
+
+BYTE* emitOutputR(BYTE* dst, instrDesc* id);
+BYTE* emitOutputRI(BYTE* dst, instrDesc* id);
+BYTE* emitOutputRR(BYTE* dst, instrDesc* id);
+BYTE* emitOutputIV(BYTE* dst, instrDesc* id);
+#ifdef FEATURE_ITINSTRUCTION
+BYTE* emitOutputIT(BYTE* dst, instruction ins, insFormat fmt, ssize_t condcode);
+#endif // FEATURE_ITINSTRUCTION
+BYTE* emitOutputNOP(BYTE* dst, instruction ins, insFormat fmt);
+
+BYTE* emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* id);
+BYTE* emitOutputShortBranch(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, instrDescJmp* id);
+
+static unsigned emitOutput_Thumb1Instr(BYTE* dst, ssize_t code);
+static unsigned emitOutput_Thumb2Instr(BYTE* dst, ssize_t code);
+
+/************************************************************************/
+/*             Debug-only routines to display instructions              */
+/************************************************************************/
+
+#ifdef DEBUG
+
+const char* emitFPregName(unsigned reg, bool varName = true);
+
+void emitDispInst(instruction ins, insFlags flags);
+void emitDispReloc(int value, bool addComma);
+void emitDispImm(int imm, bool addComma, bool alwaysHex = false);
+void emitDispCond(int cond);
+void emitDispShiftOpts(insOpts opt);
+void emitDispRegmask(int imm, bool encodedPC_LR);
+void emitDispRegRange(regNumber reg, int len, emitAttr attr);
+void emitDispReg(regNumber reg, emitAttr attr, bool addComma);
+void emitDispFloatReg(regNumber reg, emitAttr attr, bool addComma);
+void emitDispAddrR(regNumber reg, emitAttr attr);
+void emitDispAddrRI(regNumber reg, int imm, emitAttr attr);
+void emitDispAddrRR(regNumber reg1, regNumber reg2, emitAttr attr);
+void emitDispAddrRRI(regNumber reg1, regNumber reg2, int imm, emitAttr attr);
+void emitDispAddrPUW(regNumber reg, int imm, insOpts opt, emitAttr attr);
+void emitDispGC(emitAttr attr);
+
+void emitDispInsHelp(instrDesc* id,
+                     bool       isNew,
+                     bool       doffs,
+                     bool       asmfm,
+                     unsigned   offs = 0,
+                     BYTE*      code = 0,
+                     size_t     sz   = 0,
+                     insGroup*  ig   = NULL);
+void emitDispIns(instrDesc* id,
+                 bool       isNew,
+                 bool       doffs,
+                 bool       asmfm,
+                 unsigned   offs = 0,
+                 BYTE*      code = 0,
+                 size_t     sz   = 0,
+                 insGroup*  ig   = NULL);
+
+#endif // DEBUG
+
+/************************************************************************/
+/*  Private members that deal with target-dependent instr. descriptors  */
+/************************************************************************/
+
+private:
+instrDesc* emitNewInstrAmd(emitAttr attr, int dsp);
+instrDesc* emitNewInstrAmdCns(emitAttr attr, int dsp, int cns);
+
+instrDesc* emitNewInstrCallDir(
+    int argCnt, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, emitAttr retSize);
+
+instrDesc* emitNewInstrCallInd(
+    int argCnt, ssize_t disp, VARSET_VALARG_TP GCvars, regMaskTP gcrefRegs, regMaskTP byrefRegs, emitAttr retSize);
+
+void emitGetInsCns(instrDesc* id, CnsVal* cv);
+int emitGetInsAmdCns(instrDesc* id, CnsVal* cv);
+void emitGetInsDcmCns(instrDesc* id, CnsVal* cv);
+int emitGetInsAmdAny(instrDesc* id);
+
+/************************************************************************/
+/*               Private helpers for instruction output                 */
+/************************************************************************/
+
+private:
+bool emitInsIsCompare(instruction ins);
+bool emitInsIsLoad(instruction ins);
+bool emitInsIsStore(instruction ins);
+bool emitInsIsLoadOrStore(instruction ins);
+
+/*****************************************************************************
+*
+*  Convert between an index scale in bytes to a smaller encoding used for
+*  storage in instruction descriptors.
+*/
+
+inline emitter::opSize emitEncodeScale(size_t scale)
+{
+    assert(scale == 1 || scale == 2 || scale == 4 || scale == 8);
+
+    return emitSizeEncode[scale - 1];
+}
+
+inline emitAttr emitDecodeScale(unsigned ensz)
+{
+    assert(ensz < 4);
+
+    return emitter::emitSizeDecode[ensz];
+}
+
+static bool isModImmConst(int imm);
+
+static int encodeModImmConst(int imm);
+
+static int insUnscaleImm(int imm, emitAttr size);
+
+/************************************************************************/
+/*           Public inline informational methods                        */
+/************************************************************************/
+
+public:
+inline static bool isLowRegister(regNumber reg)
+{
+    return (reg <= REG_R7);
+}
+
+inline static bool isGeneralRegister(regNumber reg)
+{
+    return (reg <= REG_R15);
+}
+
+inline static bool isFloatReg(regNumber reg)
+{
+    return (reg >= REG_F0 && reg <= REG_F31);
+}
+
+inline static bool isDoubleReg(regNumber reg)
+{
+    return isFloatReg(reg) && ((reg % 2) == 0);
+}
+
+inline static bool insSetsFlags(insFlags flags)
+{
+    return (flags != INS_FLAGS_NOT_SET);
+}
+
+inline static bool insDoesNotSetFlags(insFlags flags)
+{
+    return (flags != INS_FLAGS_SET);
+}
+
+inline static insFlags insMustSetFlags(insFlags flags)
+{
+    return (flags == INS_FLAGS_SET) ? INS_FLAGS_SET : INS_FLAGS_NOT_SET;
+}
+
+inline static insFlags insMustNotSetFlags(insFlags flags)
+{
+    return (flags == INS_FLAGS_NOT_SET) ? INS_FLAGS_NOT_SET : INS_FLAGS_SET;
+}
+
+inline static bool insOptsNone(insOpts opt)
+{
+    return (opt == INS_OPTS_NONE);
+}
+
+inline static bool insOptAnyInc(insOpts opt)
+{
+    return (opt == INS_OPTS_LDST_PRE_DEC) || (opt == INS_OPTS_LDST_POST_INC);
+}
+
+inline static bool insOptsPreDec(insOpts opt)
+{
+    return (opt == INS_OPTS_LDST_PRE_DEC);
+}
+
+inline static bool insOptsPostInc(insOpts opt)
+{
+    return (opt == INS_OPTS_LDST_POST_INC);
+}
+
+inline static bool insOptAnyShift(insOpts opt)
+{
+    return ((opt >= INS_OPTS_RRX) && (opt <= INS_OPTS_ROR));
+}
+
+inline static bool insOptsRRX(insOpts opt)
+{
+    return (opt == INS_OPTS_RRX);
+}
+
+inline static bool insOptsLSL(insOpts opt)
+{
+    return (opt == INS_OPTS_LSL);
+}
+
+inline static bool insOptsLSR(insOpts opt)
+{
+    return (opt == INS_OPTS_LSR);
+}
+
+inline static bool insOptsASR(insOpts opt)
+{
+    return (opt == INS_OPTS_ASR);
+}
+
+inline static bool insOptsROR(insOpts opt)
+{
+    return (opt == INS_OPTS_ROR);
+}
+
+/************************************************************************/
+/*           The public entry points to output instructions             */
+/************************************************************************/
+
+public:
+static bool emitIns_valid_imm_for_alu(int imm);
+static bool emitIns_valid_imm_for_mov(int imm);
+static bool emitIns_valid_imm_for_small_mov(regNumber reg, int imm, insFlags flags);
+static bool emitIns_valid_imm_for_add(int imm, insFlags flags);
+static bool emitIns_valid_imm_for_add_sp(int imm);
+
+void emitIns(instruction ins);
+
+void emitIns_I(instruction ins, emitAttr attr, ssize_t imm);
+
+void emitIns_R(instruction ins, emitAttr attr, regNumber reg);
+
+void emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t imm, insFlags flags = INS_FLAGS_DONT_CARE);
+
+void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insFlags flags = INS_FLAGS_DONT_CARE);
+
+void emitIns_R_I_I(
+    instruction ins, emitAttr attr, regNumber reg1, int imm1, int imm2, insFlags flags = INS_FLAGS_DONT_CARE);
+
+void emitIns_R_R_I(instruction ins,
+                   emitAttr    attr,
+                   regNumber   reg1,
+                   regNumber   reg2,
+                   int         imm,
+                   insFlags    flags = INS_FLAGS_DONT_CARE,
+                   insOpts     opt   = INS_OPTS_NONE);
+
+void emitIns_R_R_R(instruction ins,
+                   emitAttr    attr,
+                   regNumber   reg1,
+                   regNumber   reg2,
+                   regNumber   reg3,
+                   insFlags    flags = INS_FLAGS_DONT_CARE);
+
+void emitIns_R_R_I_I(instruction ins,
+                     emitAttr    attr,
+                     regNumber   reg1,
+                     regNumber   reg2,
+                     int         imm1,
+                     int         imm2,
+                     insFlags    flags = INS_FLAGS_DONT_CARE);
+
+void emitIns_R_R_R_I(instruction ins,
+                     emitAttr    attr,
+                     regNumber   reg1,
+                     regNumber   reg2,
+                     regNumber   reg3,
+                     int         imm,
+                     insFlags    flags = INS_FLAGS_DONT_CARE,
+                     insOpts     opt   = INS_OPTS_NONE);
+
+void emitIns_R_R_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, regNumber reg4);
+
+void emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, int offs);
+
+void emitIns_S(instruction ins, emitAttr attr, int varx, int offs);
+
+void emitIns_genStackOffset(regNumber r, int varx, int offs);
+
+void emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs);
+
+void emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs);
+
+void emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val);
+
+void emitIns_R_C(instruction ins, emitAttr attr, regNumber reg, CORINFO_FIELD_HANDLE fldHnd, int offs);
+
+void emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs);
+
+void emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, ssize_t offs, ssize_t val);
+
+void emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg);
+
+void emitIns_R_D(instruction ins, emitAttr attr, unsigned offs, regNumber reg);
+
+void emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg);
+
+void emitIns_I_AR(
+    instruction ins, emitAttr attr, int val, regNumber reg, int offs, int memCookie = 0, void* clsCookie = NULL);
+
+void emitIns_R_AR(
+    instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs, int memCookie = 0, void* clsCookie = NULL);
+
+void emitIns_R_AI(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp);
+
+void emitIns_AR_R(
+    instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs, int memCookie = 0, void* clsCookie = NULL);
+
+void emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp);
+
+void emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp);
+
+void emitIns_R_ARX(
+    instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, unsigned mul, int disp);
+
+enum EmitCallType
+{
+
+    // I have included here, but commented out, all the values used by the x86 emitter.
+    // However, ARM has a much reduced instruction set, and so the ARM emitter only
+    // supports a subset of the x86 variants.  By leaving them commented out, it becomes
+    // a compile time error if code tries to use them (and hopefully see this comment
+    // and know why they are unavailible on ARM), while making it easier to stay
+    // in-sync with x86 and possibly add them back in if needed.
+
+    EC_FUNC_TOKEN, //   Direct call to a helper/static/nonvirtual/global method
+                   //  EC_FUNC_TOKEN_INDIR,    // Indirect call to a helper/static/nonvirtual/global method
+    EC_FUNC_ADDR,  // Direct call to an absolute address
+
+    //  EC_FUNC_VIRTUAL,        // Call to a virtual method (using the vtable)
+    EC_INDIR_R, // Indirect call via register
+                //  EC_INDIR_SR,            // Indirect call via stack-reference (local var)
+                //  EC_INDIR_C,             // Indirect call via static class var
+                //  EC_INDIR_ARD,           // Indirect call via an addressing mode
+
+    EC_COUNT
+};
+
+void emitIns_Call(EmitCallType          callType,
+                  CORINFO_METHOD_HANDLE methHnd,                   // used for pretty printing
+                  INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE
+                  void*            addr,
+                  ssize_t          argSize,
+                  emitAttr         retSize,
+                  VARSET_VALARG_TP ptrVars,
+                  regMaskTP        gcrefRegs,
+                  regMaskTP        byrefRegs,
+                  IL_OFFSETX       ilOffset      = BAD_IL_OFFSET,
+                  regNumber        ireg          = REG_NA,
+                  regNumber        xreg          = REG_NA,
+                  unsigned         xmul          = 0,
+                  int              disp          = 0,
+                  bool             isJump        = false,
+                  bool             isNoGC        = false,
+                  bool             isProfLeaveCB = false);
+
+/*****************************************************************************
+ *
+ *  Given an instrDesc, return true if it's a conditional jump.
+ */
+
+inline bool emitIsCondJump(instrDesc* jmp)
+{
+    return (jmp->idInsFmt() == IF_T2_J1) || (jmp->idInsFmt() == IF_T1_K) || (jmp->idInsFmt() == IF_LARGEJMP);
+}
+
+/*****************************************************************************
+ *
+ *  Given an instrDesc, return true if it's a comapre and jump.
+ */
+
+inline bool emitIsCmpJump(instrDesc* jmp)
+{
+    return (jmp->idInsFmt() == IF_T1_I);
+}
+
+/*****************************************************************************
+ *
+ *  Given a instrDesc, return true if it's an unconditional jump.
+ */
+
+inline bool emitIsUncondJump(instrDesc* jmp)
+{
+    return (jmp->idInsFmt() == IF_T2_J2) || (jmp->idInsFmt() == IF_T1_M);
+}
+
+/*****************************************************************************
+ *
+ *  Given a instrDesc, return true if it's a load label instruction.
+ */
+
+inline bool emitIsLoadLabel(instrDesc* jmp)
+{
+    return (jmp->idInsFmt() == IF_T2_M1) || (jmp->idInsFmt() == IF_T1_J3) || (jmp->idInsFmt() == IF_T2_N1);
+}
+
+#endif // _TARGET_ARM_
diff --git a/src/jit/emitarm64.cpp b/src/jit/emitarm64.cpp
new file mode 100644
index 0000000000..a632ec12c8
--- /dev/null
+++ b/src/jit/emitarm64.cpp
@@ -0,0 +1,11167 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                             emitArm64.cpp                                 XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#if defined(_TARGET_ARM64_)
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+#include "instr.h"
+#include "emit.h"
+#include "codegen.h"
+
+/* static */ bool emitter::strictArmAsm = true;
+
+/*****************************************************************************/
+
+const instruction emitJumpKindInstructions[] = {
+    INS_nop,
+
+#define JMP_SMALL(en, rev, ins) INS_##ins,
+#include "emitjmps.h"
+};
+
+const emitJumpKind emitReverseJumpKinds[] = {
+    EJ_NONE,
+
+#define JMP_SMALL(en, rev, ins) EJ_##rev,
+#include "emitjmps.h"
+};
+
+/*****************************************************************************
+ * Look up the instruction for a jump kind
+ */
+
+/*static*/ instruction emitter::emitJumpKindToIns(emitJumpKind jumpKind)
+{
+    assert((unsigned)jumpKind < ArrLen(emitJumpKindInstructions));
+    return emitJumpKindInstructions[jumpKind];
+}
+
+/*****************************************************************************
+* Look up the jump kind for an instruction. It better be a conditional
+* branch instruction with a jump kind!
+*/
+
+/*static*/ emitJumpKind emitter::emitInsToJumpKind(instruction ins)
+{
+    for (unsigned i = 0; i < ArrLen(emitJumpKindInstructions); i++)
+    {
+        if (ins == emitJumpKindInstructions[i])
+        {
+            emitJumpKind ret = (emitJumpKind)i;
+            assert(EJ_NONE < ret && ret < EJ_COUNT);
+            return ret;
+        }
+    }
+    unreached();
+}
+
+/*****************************************************************************
+ * Reverse the conditional jump
+ */
+
+/*static*/ emitJumpKind emitter::emitReverseJumpKind(emitJumpKind jumpKind)
+{
+    assert(jumpKind < EJ_COUNT);
+    return emitReverseJumpKinds[jumpKind];
+}
+
+/*****************************************************************************
+ *
+ *  Return the allocated size (in bytes) of the given instruction descriptor.
+ */
+
+size_t emitter::emitSizeOfInsDsc(instrDesc* id)
+{
+    assert(!emitIsTinyInsDsc(id));
+
+    if (emitIsScnsInsDsc(id))
+        return SMALL_IDSC_SIZE;
+
+    assert((unsigned)id->idInsFmt() < emitFmtCount);
+
+    ID_OPS idOp      = (ID_OPS)emitFmtToOps[id->idInsFmt()];
+    bool   isCallIns = (id->idIns() == INS_bl) || (id->idIns() == INS_blr) || (id->idIns() == INS_b_tail) ||
+                     (id->idIns() == INS_br_tail);
+    bool maybeCallIns = (id->idIns() == INS_b) || (id->idIns() == INS_br);
+
+    switch (idOp)
+    {
+        case ID_OP_NONE:
+            break;
+
+        case ID_OP_JMP:
+            return sizeof(instrDescJmp);
+
+        case ID_OP_CALL:
+            assert(isCallIns || maybeCallIns);
+            if (id->idIsLargeCall())
+            {
+                /* Must be a "fat" call descriptor */
+                return sizeof(instrDescCGCA);
+            }
+            else
+            {
+                assert(!id->idIsLargeDsp());
+                assert(!id->idIsLargeCns());
+                return sizeof(instrDesc);
+            }
+            break;
+
+        default:
+            NO_WAY("unexpected instruction descriptor format");
+            break;
+    }
+
+    if (id->idIsLargeCns())
+    {
+        if (id->idIsLargeDsp())
+            return sizeof(instrDescCnsDsp);
+        else
+            return sizeof(instrDescCns);
+    }
+    else
+    {
+        if (id->idIsLargeDsp())
+            return sizeof(instrDescDsp);
+        else
+            return sizeof(instrDesc);
+    }
+}
+
+#ifdef DEBUG
+/*****************************************************************************
+ *
+ *  The following called for each recorded instruction -- use for debugging.
+ */
+void emitter::emitInsSanityCheck(instrDesc* id)
+{
+    /* What instruction format have we got? */
+
+    switch (id->idInsFmt())
+    {
+        instruction ins;
+        emitAttr    elemsize;
+        emitAttr    datasize;
+        emitAttr    dstsize;
+        emitAttr    srcsize;
+        ssize_t     imm;
+        unsigned    immShift;
+        ssize_t     index;
+        ssize_t     index2;
+
+        case IF_BI_0A: // BI_0A   ......iiiiiiiiii iiiiiiiiiiiiiiii               simm26:00
+            break;
+
+        case IF_BI_0B: // BI_0B   ......iiiiiiiiii iiiiiiiiiiii....               simm19:00
+            break;
+
+        case IF_LARGEJMP:
+        case IF_LARGEADR:
+        case IF_LARGELDC:
+            break;
+
+        case IF_BI_0C: // BI_0C   ......iiiiiiiiii iiiiiiiiiiiiiiii               simm26:00
+            break;
+
+        case IF_BI_1A: // BI_1A   ......iiiiiiiiii iiiiiiiiiiittttt      Rt       simm19:00
+            assert(isValidGeneralDatasize(id->idOpSize()));
+            assert(isGeneralRegister(id->idReg1()));
+            break;
+
+        case IF_BI_1B: // BI_1B   B.......bbbbbiii iiiiiiiiiiittttt      Rt imm6, simm14:00
+            assert(isValidGeneralDatasize(id->idOpSize()));
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isValidImmShift(emitGetInsSC(id), id->idOpSize()));
+            break;
+
+        case IF_BR_1A: // BR_1A   ................ ......nnnnn.....         Rn
+            assert(isGeneralRegister(id->idReg1()));
+            break;
+
+        case IF_BR_1B: // BR_1B   ................ ......nnnnn.....         Rn
+            assert(isGeneralRegister(id->idReg3()));
+            break;
+
+        case IF_LS_1A: // LS_1A   .X......iiiiiiii iiiiiiiiiiittttt      Rt    PC imm(1MB)
+            assert(isGeneralRegister(id->idReg1()) || isVectorRegister(id->idReg1()));
+            assert(insOptsNone(id->idInsOpt()));
+            break;
+
+        case IF_LS_2A:                                // LS_2A   .X.......X...... ......nnnnnttttt      Rt Rn
+            assert(isIntegerRegister(id->idReg1()) || // ZR
+                   isVectorRegister(id->idReg1()));
+            assert(isIntegerRegister(id->idReg2())); // SP
+            assert(emitGetInsSC(id) == 0);
+            assert(insOptsNone(id->idInsOpt()));
+            break;
+
+        case IF_LS_2B: // LS_2B   .X.......Xiiiiii iiiiiinnnnnttttt      Rt Rn    imm(0-4095)
+            assert(isIntegerRegister(id->idReg1()) || // ZR
+                   isVectorRegister(id->idReg1()));
+            assert(isIntegerRegister(id->idReg2())); // SP
+            assert(isValidUimm12(emitGetInsSC(id)));
+            assert(insOptsNone(id->idInsOpt()));
+            break;
+
+        case IF_LS_2C: // LS_2C   .X.......X.iiiii iiiiPPnnnnnttttt      Rt Rn    imm(-256..+255) no/pre/post inc
+            assert(isIntegerRegister(id->idReg1()) || // ZR
+                   isVectorRegister(id->idReg1()));
+            assert(isIntegerRegister(id->idReg2())); // SP
+            assert(emitGetInsSC(id) >= -0x100);
+            assert(emitGetInsSC(id) < 0x100);
+            assert(insOptsNone(id->idInsOpt()) || insOptsIndexed(id->idInsOpt()));
+            break;
+
+        case IF_LS_3A: // LS_3A   .X.......X.mmmmm oooS..nnnnnttttt      Rt Rn Rm ext(Rm) LSL {}
+            assert(isIntegerRegister(id->idReg1()) || // ZR
+                   isVectorRegister(id->idReg1()));
+            assert(isIntegerRegister(id->idReg2())); // SP
+            if (id->idIsLclVar())
+            {
+                assert(isGeneralRegister(codeGen->rsGetRsvdReg()));
+            }
+            else
+            {
+                assert(isGeneralRegister(id->idReg3()));
+            }
+            assert(insOptsLSExtend(id->idInsOpt()));
+            break;
+
+        case IF_LS_3B: // LS_3B   X............... .aaaaannnnnttttt      Rt Ra Rn
+            assert((isValidGeneralDatasize(id->idOpSize()) && isIntegerRegister(id->idReg1())) ||
+                   (isValidVectorLSPDatasize(id->idOpSize()) && isVectorRegister(id->idReg1())));
+            assert(isIntegerRegister(id->idReg1()) || // ZR
+                   isVectorRegister(id->idReg1()));
+            assert(isIntegerRegister(id->idReg2()) || // ZR
+                   isVectorRegister(id->idReg2()));
+            assert(isIntegerRegister(id->idReg3())); // SP
+            assert(emitGetInsSC(id) == 0);
+            assert(insOptsNone(id->idInsOpt()));
+            break;
+
+        case IF_LS_3C: // LS_3C   X.........iiiiii iaaaaannnnnttttt      Rt Ra Rn imm(im7,sh)
+            assert((isValidGeneralDatasize(id->idOpSize()) && isIntegerRegister(id->idReg1())) ||
+                   (isValidVectorLSPDatasize(id->idOpSize()) && isVectorRegister(id->idReg1())));
+            assert(isIntegerRegister(id->idReg1()) || // ZR
+                   isVectorRegister(id->idReg1()));
+            assert(isIntegerRegister(id->idReg2()) || // ZR
+                   isVectorRegister(id->idReg2()));
+            assert(isIntegerRegister(id->idReg3())); // SP
+            assert(emitGetInsSC(id) >= -0x40);
+            assert(emitGetInsSC(id) < 0x40);
+            assert(insOptsNone(id->idInsOpt()) || insOptsIndexed(id->idInsOpt()));
+            break;
+
+        case IF_DI_1A: // DI_1A   X.......shiiiiii iiiiiinnnnn.....         Rn    imm(i12,sh)
+            assert(isValidGeneralDatasize(id->idOpSize()));
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isValidUimm12(emitGetInsSC(id)));
+            assert(insOptsNone(id->idInsOpt()) || insOptsLSL12(id->idInsOpt()));
+            break;
+
+        case IF_DI_1B: // DI_1B   X........hwiiiii iiiiiiiiiiiddddd      Rd       imm(i16,hw)
+            assert(isValidGeneralDatasize(id->idOpSize()));
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isValidImmHWVal(emitGetInsSC(id), id->idOpSize()));
+            break;
+
+        case IF_DI_1C: // DI_1C   X........Nrrrrrr ssssssnnnnn.....         Rn    imm(N,r,s)
+            assert(isValidGeneralDatasize(id->idOpSize()));
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isValidImmNRS(emitGetInsSC(id), id->idOpSize()));
+            break;
+
+        case IF_DI_1D: // DI_1D   X........Nrrrrrr ssssss.....ddddd      Rd       imm(N,r,s)
+            assert(isValidGeneralDatasize(id->idOpSize()));
+            assert(isIntegerRegister(id->idReg1())); // SP
+            assert(isValidImmNRS(emitGetInsSC(id), id->idOpSize()));
+            break;
+
+        case IF_DI_1E: // DI_1E   .ii.....iiiiiiii iiiiiiiiiiiddddd      Rd       simm21
+            assert(isGeneralRegister(id->idReg1()));
+            break;
+
+        case IF_DI_1F: // DI_1F   X..........iiiii cccc..nnnnn.nzcv      Rn imm5  nzcv cond
+            assert(isValidGeneralDatasize(id->idOpSize()));
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isValidImmCondFlagsImm5(emitGetInsSC(id)));
+            break;
+
+        case IF_DI_2A: // DI_2A   X.......shiiiiii iiiiiinnnnnddddd      Rd Rn    imm(i12,sh)
+            assert(isValidGeneralDatasize(id->idOpSize()));
+            assert(isIntegerRegister(id->idReg1())); // SP
+            assert(isIntegerRegister(id->idReg2())); // SP
+            assert(isValidUimm12(emitGetInsSC(id)));
+            assert(insOptsNone(id->idInsOpt()) || insOptsLSL12(id->idInsOpt()));
+            break;
+
+        case IF_DI_2B: // DI_2B   X.........Xnnnnn ssssssnnnnnddddd      Rd Rn    imm(0-63)
+            assert(isValidGeneralDatasize(id->idOpSize()));
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isGeneralRegister(id->idReg2()));
+            assert(isValidImmShift(emitGetInsSC(id), id->idOpSize()));
+            break;
+
+        case IF_DI_2C: // DI_2C   X........Nrrrrrr ssssssnnnnnddddd      Rd Rn    imm(N,r,s)
+            assert(isValidGeneralDatasize(id->idOpSize()));
+            assert(isIntegerRegister(id->idReg1())); // SP
+            assert(isGeneralRegister(id->idReg2()));
+            assert(isValidImmNRS(emitGetInsSC(id), id->idOpSize()));
+            break;
+
+        case IF_DI_2D: // DI_2D   X........Nrrrrrr ssssssnnnnnddddd      Rd Rn    imr, imms   (N,r,s)
+            assert(isValidGeneralDatasize(id->idOpSize()));
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isGeneralRegister(id->idReg2()));
+            assert(isValidImmNRS(emitGetInsSC(id), id->idOpSize()));
+            break;
+
+        case IF_DR_1D: // DR_1D   X............... cccc.......ddddd      Rd       cond
+            assert(isValidGeneralDatasize(id->idOpSize()));
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isValidImmCond(emitGetInsSC(id)));
+            break;
+
+        case IF_DR_2A: // DR_2A   X..........mmmmm ......nnnnn.....         Rn Rm
+            assert(isValidGeneralDatasize(id->idOpSize()));
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isGeneralRegister(id->idReg2()));
+            break;
+
+        case IF_DR_2B: // DR_2B   X.......sh.mmmmm ssssssnnnnn.....         Rn Rm {LSL,LSR,ASR,ROR} imm(0-63)
+            assert(isValidGeneralDatasize(id->idOpSize()));
+            assert(isIntegerRegister(id->idReg1())); // ZR
+            assert(isGeneralRegister(id->idReg2()));
+            assert(isValidImmShift(emitGetInsSC(id), id->idOpSize()));
+            if (!insOptsNone(id->idInsOpt()))
+            {
+                if (id->idIns() == INS_tst) // tst allows ROR, cmp/cmn don't
+                {
+                    assert(insOptsAnyShift(id->idInsOpt()));
+                }
+                else
+                {
+                    assert(insOptsAluShift(id->idInsOpt()));
+                }
+            }
+            assert(insOptsNone(id->idInsOpt()) || (emitGetInsSC(id) > 0));
+            break;
+
+        case IF_DR_2C: // DR_2C   X..........mmmmm ooosssnnnnn.....         Rn Rm ext(Rm) LSL imm(0-4)
+            assert(isValidGeneralDatasize(id->idOpSize()));
+            assert(isIntegerRegister(id->idReg1())); // SP
+            assert(isGeneralRegister(id->idReg2()));
+            assert(insOptsNone(id->idInsOpt()) || insOptsLSL(id->idInsOpt()) || insOptsAnyExtend(id->idInsOpt()));
+            assert(emitGetInsSC(id) >= 0);
+            assert(emitGetInsSC(id) <= 4);
+            if (insOptsLSL(id->idInsOpt()))
+            {
+                assert(emitGetInsSC(id) > 0);
+            }
+            break;
+
+        case IF_DR_2D: // DR_2D   X..........nnnnn cccc..nnnnnmmmmm      Rd Rn    cond
+            assert(isValidGeneralDatasize(id->idOpSize()));
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isGeneralRegister(id->idReg2()));
+            assert(isValidImmCond(emitGetInsSC(id)));
+            break;
+
+        case IF_DR_2E: // DR_2E   X..........mmmmm ...........ddddd      Rd    Rm
+            assert(isValidGeneralDatasize(id->idOpSize()));
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isIntegerRegister(id->idReg2())); // ZR
+            break;
+
+        case IF_DR_2F: // DR_2F   X.......sh.mmmmm ssssss.....ddddd      Rd    Rm {LSL,LSR,ASR} imm(0-63)
+            assert(isValidGeneralDatasize(id->idOpSize()));
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isGeneralRegister(id->idReg2()));
+            assert(isValidImmShift(emitGetInsSC(id), id->idOpSize()));
+            assert(insOptsNone(id->idInsOpt()) || insOptsAluShift(id->idInsOpt()));
+            assert(insOptsNone(id->idInsOpt()) || (emitGetInsSC(id) > 0));
+            break;
+
+        case IF_DR_2G: // DR_2G   X............... ......nnnnnddddd      Rd    Rm
+            assert(isValidGeneralDatasize(id->idOpSize()));
+            assert(isIntegerRegister(id->idReg1())); // SP
+            assert(isIntegerRegister(id->idReg2())); // SP
+            break;
+
+        case IF_DR_2H: // DR_2H   X........X...... ......nnnnnddddd      Rd Rn
+            assert(isValidGeneralDatasize(id->idOpSize()));
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isGeneralRegister(id->idReg2()));
+            break;
+
+        case IF_DR_2I: // DR_2I   X..........mmmmm cccc..nnnnn.nzcv      Rn Rm    nzcv cond
+            assert(isValidGeneralDatasize(id->idOpSize()));
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isGeneralRegister(id->idReg2()));
+            assert(isValidImmCondFlags(emitGetInsSC(id)));
+            break;
+
+        case IF_DR_3A: // DR_3A   X..........mmmmm ......nnnnnmmmmm      Rd Rn Rm
+            assert(isValidGeneralDatasize(id->idOpSize()));
+            assert(isIntegerRegister(id->idReg1())); // SP
+            assert(isIntegerRegister(id->idReg2())); // SP
+            if (id->idIsLclVar())
+            {
+                assert(isGeneralRegister(codeGen->rsGetRsvdReg()));
+            }
+            else
+            {
+                assert(isGeneralRegister(id->idReg3()));
+            }
+            assert(insOptsNone(id->idInsOpt()));
+            break;
+
+        case IF_DR_3B: // DR_3B   X.......sh.mmmmm ssssssnnnnnddddd      Rd Rn Rm {LSL,LSR,ASR,ROR} imm(0-63)
+            assert(isValidGeneralDatasize(id->idOpSize()));
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isGeneralRegister(id->idReg2()));
+            assert(isGeneralRegister(id->idReg3()));
+            assert(isValidImmShift(emitGetInsSC(id), id->idOpSize()));
+            assert(insOptsNone(id->idInsOpt()) || insOptsAnyShift(id->idInsOpt()));
+            assert(insOptsNone(id->idInsOpt()) || (emitGetInsSC(id) > 0));
+            break;
+
+        case IF_DR_3C: // DR_3C   X..........mmmmm ooosssnnnnnddddd      Rd Rn Rm ext(Rm) LSL imm(0-4)
+            assert(isValidGeneralDatasize(id->idOpSize()));
+            assert(isIntegerRegister(id->idReg1())); // SP
+            assert(isIntegerRegister(id->idReg2())); // SP
+            assert(isGeneralRegister(id->idReg3()));
+            assert(insOptsNone(id->idInsOpt()) || insOptsLSL(id->idInsOpt()) || insOptsAnyExtend(id->idInsOpt()));
+            assert(emitGetInsSC(id) >= 0);
+            assert(emitGetInsSC(id) <= 4);
+            if (insOptsLSL(id->idInsOpt()))
+            {
+                assert((emitGetInsSC(id) > 0) ||
+                       (id->idReg2() == REG_ZR)); // REG_ZR encodes SP and we allow a shift of zero
+            }
+            break;
+
+        case IF_DR_3D: // DR_3D   X..........mmmmm cccc..nnnnnmmmmm      Rd Rn Rm cond
+            assert(isValidGeneralDatasize(id->idOpSize()));
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isGeneralRegister(id->idReg2()));
+            assert(isGeneralRegister(id->idReg3()));
+            assert(isValidImmCond(emitGetInsSC(id)));
+            break;
+
+        case IF_DR_3E: // DR_3E   X........X.mmmmm ssssssnnnnnddddd      Rd Rn Rm imm(0-63)
+            assert(isValidGeneralDatasize(id->idOpSize()));
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isGeneralRegister(id->idReg2()));
+            assert(isGeneralRegister(id->idReg3()));
+            assert(isValidImmShift(emitGetInsSC(id), id->idOpSize()));
+            assert(insOptsNone(id->idInsOpt()));
+            break;
+
+        case IF_DR_4A: // DR_4A   X..........mmmmm .aaaaannnnnddddd      Rd Rn Rm Ra
+            assert(isValidGeneralDatasize(id->idOpSize()));
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isGeneralRegister(id->idReg2()));
+            assert(isGeneralRegister(id->idReg3()));
+            assert(isGeneralRegister(id->idReg4()));
+            break;
+
+        case IF_DV_1A: // DV_1A   .........X.iiiii iii........ddddd      Vd imm8    (fmov - immediate scalar)
+            assert(insOptsNone(id->idInsOpt()));
+            elemsize = id->idOpSize();
+            assert(isValidVectorElemsizeFloat(elemsize));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isValidUimm8(emitGetInsSC(id)));
+            break;
+
+        case IF_DV_1B: // DV_1B   .QX..........iii cmod..iiiiiddddd      Vd imm8    (immediate vector)
+            ins      = id->idIns();
+            imm      = emitGetInsSC(id) & 0x0ff;
+            immShift = (emitGetInsSC(id) & 0x700) >> 8;
+            assert(immShift >= 0);
+            datasize = id->idOpSize();
+            assert(isValidVectorDatasize(datasize));
+            assert(isValidArrangement(datasize, id->idInsOpt()));
+            elemsize = optGetElemsize(id->idInsOpt());
+            if (ins == INS_fmov)
+            {
+                assert(isValidVectorElemsizeFloat(elemsize));
+                assert(id->idInsOpt() != INS_OPTS_1D); // Reserved encoding
+                assert(immShift == 0);
+            }
+            else
+            {
+                assert(isValidVectorElemsize(elemsize));
+                assert((immShift != 4) && (immShift != 7)); // always invalid values
+                if (ins != INS_movi)                        // INS_mvni, INS_orr, INS_bic
+                {
+                    assert((elemsize != EA_1BYTE) && (elemsize != EA_8BYTE)); // only H or S
+                    if (elemsize == EA_2BYTE)
+                    {
+                        assert(immShift < 2);
+                    }
+                    else // (elemsize == EA_4BYTE)
+                    {
+                        if (ins != INS_mvni)
+                        {
+                            assert(immShift < 4);
+                        }
+                    }
+                }
+            }
+            assert(isVectorRegister(id->idReg1()));
+            assert(isValidUimm8(imm));
+            break;
+
+        case IF_DV_1C: // DV_1C   .........X...... ......nnnnn.....      Vn #0.0    (fcmp - with zero)
+            assert(insOptsNone(id->idInsOpt()));
+            elemsize = id->idOpSize();
+            assert(isValidVectorElemsizeFloat(elemsize));
+            assert(isVectorRegister(id->idReg1()));
+            break;
+
+        case IF_DV_2A: // DV_2A   .Q.......X...... ......nnnnnddddd      Vd Vn      (fabs, fcvt - vector)
+        case IF_DV_2M: // DV_2M   .Q......XX...... ......nnnnnddddd      Vd Vn      (abs, neg   - vector)
+            assert(isValidVectorDatasize(id->idOpSize()));
+            assert(isValidArrangement(id->idOpSize(), id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isVectorRegister(id->idReg2()));
+            break;
+
+        case IF_DV_2N: // DV_2N   .........iiiiiii ......nnnnnddddd      Vd Vn imm   (shift - scalar)
+            assert(id->idOpSize() == EA_8BYTE);
+            assert(insOptsNone(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isVectorRegister(id->idReg2()));
+            assert(isValidImmShift(emitGetInsSC(id), EA_8BYTE));
+            break;
+
+        case IF_DV_2O: // DV_2O   .Q.......iiiiiii ......nnnnnddddd      Vd Vn imm   (shift - vector)
+            assert(isValidVectorDatasize(id->idOpSize()));
+            assert(isValidArrangement(id->idOpSize(), id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isVectorRegister(id->idReg2()));
+            elemsize = optGetElemsize(id->idInsOpt());
+            assert(isValidImmShift(emitGetInsSC(id), elemsize));
+            break;
+
+        case IF_DV_2B: // DV_2B   .Q.........iiiii ......nnnnnddddd      Rd Vn[]  (umov/smov    - to general)
+            elemsize = id->idOpSize();
+            index    = emitGetInsSC(id);
+            assert(insOptsNone(id->idInsOpt()));
+            assert(isValidVectorIndex(EA_16BYTE, elemsize, index));
+            assert(isValidVectorElemsize(elemsize));
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isVectorRegister(id->idReg2()));
+            break;
+
+        case IF_DV_2C: // DV_2C   .Q.........iiiii ......nnnnnddddd      Vd Rn    (dup/ins - vector from general)
+            if (id->idIns() == INS_dup)
+            {
+                datasize = id->idOpSize();
+                assert(isValidVectorDatasize(datasize));
+                assert(isValidArrangement(datasize, id->idInsOpt()));
+                elemsize = optGetElemsize(id->idInsOpt());
+            }
+            else // INS_ins
+            {
+                datasize = EA_16BYTE;
+                elemsize = id->idOpSize();
+                assert(isValidVectorElemsize(elemsize));
+            }
+            assert(isVectorRegister(id->idReg1()));
+            assert(isGeneralRegisterOrZR(id->idReg2()));
+            break;
+
+        case IF_DV_2D: // DV_2D   .Q.........iiiii ......nnnnnddddd      Vd Vn[]  (dup - vector)
+            datasize = id->idOpSize();
+            assert(isValidVectorDatasize(datasize));
+            assert(isValidArrangement(datasize, id->idInsOpt()));
+            elemsize = optGetElemsize(id->idInsOpt());
+            index    = emitGetInsSC(id);
+            assert(isValidVectorIndex(datasize, elemsize, index));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isVectorRegister(id->idReg2()));
+            break;
+
+        case IF_DV_2E: // DV_2E   ...........iiiii ......nnnnnddddd      Vd Vn[]  (dup - scalar)
+            elemsize = id->idOpSize();
+            index    = emitGetInsSC(id);
+            assert(isValidVectorIndex(EA_16BYTE, elemsize, index));
+            assert(isValidVectorElemsize(elemsize));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isVectorRegister(id->idReg2()));
+            break;
+
+        case IF_DV_2F: // DV_2F   ...........iiiii .jjjj.nnnnnddddd      Vd[] Vn[] (ins - element)
+            imm      = emitGetInsSC(id);
+            index    = (imm >> 4) & 0xf;
+            index2   = imm & 0xf;
+            elemsize = id->idOpSize();
+            assert(isValidVectorElemsize(elemsize));
+            assert(isValidVectorIndex(EA_16BYTE, elemsize, index));
+            assert(isValidVectorIndex(EA_16BYTE, elemsize, index2));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isVectorRegister(id->idReg2()));
+            break;
+
+        case IF_DV_2L: // DV_2L   ........XX...... ......nnnnnddddd      Vd Vn      (abs, neg - scalar)
+            assert(id->idOpSize() == EA_8BYTE); // only type D is supported
+            __fallthrough;
+
+        case IF_DV_2G: // DV_2G   .........X...... ......nnnnnddddd      Vd Vn      (fmov, fcvtXX - register)
+        case IF_DV_2K: // DV_2K   .........X.mmmmm ......nnnnn.....      Vn Vm      (fcmp)
+            assert(insOptsNone(id->idInsOpt()));
+            assert(isValidVectorElemsizeFloat(id->idOpSize()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isVectorRegister(id->idReg2()));
+            break;
+
+        case IF_DV_2H: // DV_2H   X........X...... ......nnnnnddddd      Rd Vn      (fmov/fcvtXX - to general)
+            assert(insOptsConvertFloatToInt(id->idInsOpt()));
+            dstsize = optGetDstsize(id->idInsOpt());
+            srcsize = optGetSrcsize(id->idInsOpt());
+            assert(isValidGeneralDatasize(dstsize));
+            assert(isValidVectorElemsizeFloat(srcsize));
+            assert(dstsize == id->idOpSize());
+            assert(isGeneralRegister(id->idReg1()));
+            assert(isVectorRegister(id->idReg2()));
+            break;
+
+        case IF_DV_2I: // DV_2I   X........X...... ......nnnnnddddd      Vd Rn      (fmov/Xcvtf - from general)
+            assert(insOptsConvertIntToFloat(id->idInsOpt()));
+            dstsize = optGetDstsize(id->idInsOpt());
+            srcsize = optGetSrcsize(id->idInsOpt());
+            assert(isValidGeneralDatasize(srcsize));
+            assert(isValidVectorElemsizeFloat(dstsize));
+            assert(dstsize == id->idOpSize());
+            assert(isVectorRegister(id->idReg1()));
+            assert(isGeneralRegister(id->idReg2()));
+            break;
+
+        case IF_DV_2J: // DV_2J   ........SS.....D D.....nnnnnddddd      Vd Vn      (fcvt)
+            assert(insOptsConvertFloatToFloat(id->idInsOpt()));
+            dstsize = optGetDstsize(id->idInsOpt());
+            srcsize = optGetSrcsize(id->idInsOpt());
+            assert(isValidVectorFcvtsize(srcsize));
+            assert(isValidVectorFcvtsize(dstsize));
+            assert(dstsize == id->idOpSize());
+            assert(isVectorRegister(id->idReg1()));
+            assert(isVectorRegister(id->idReg2()));
+            break;
+
+        case IF_DV_3A: // DV_3A   .Q......XX.mmmmm ......nnnnnddddd      Vd Vn Vm   (vector)
+            assert(isValidVectorDatasize(id->idOpSize()));
+            assert(isValidArrangement(id->idOpSize(), id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isVectorRegister(id->idReg2()));
+            assert(isVectorRegister(id->idReg3()));
+            elemsize = optGetElemsize(id->idInsOpt());
+            ins      = id->idIns();
+            if (ins == INS_mul)
+            {
+                assert(elemsize != EA_8BYTE); // can't use 2D or 1D
+            }
+            else if (ins == INS_pmul)
+            {
+                assert(elemsize == EA_1BYTE); // only supports 8B or 16B
+            }
+            break;
+
+        case IF_DV_3AI: // DV_3AI  .Q......XXLMmmmm ....H.nnnnnddddd      Vd Vn Vm[] (vector by elem)
+            assert(isValidVectorDatasize(id->idOpSize()));
+            assert(isValidArrangement(id->idOpSize(), id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isVectorRegister(id->idReg2()));
+            assert(isVectorRegister(id->idReg3()));
+            elemsize = optGetElemsize(id->idInsOpt());
+            assert(isValidVectorIndex(EA_16BYTE, elemsize, emitGetInsSC(id)));
+            // Only has encodings for H or S elemsize
+            assert((elemsize == EA_2BYTE) || (elemsize == EA_4BYTE));
+            break;
+
+        case IF_DV_3B: // DV_3B   .Q.......X.mmmmm ......nnnnnddddd      Vd Vn Vm   (vector)
+            assert(isValidVectorDatasize(id->idOpSize()));
+            assert(isValidArrangement(id->idOpSize(), id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isVectorRegister(id->idReg2()));
+            assert(isVectorRegister(id->idReg3()));
+            break;
+
+        case IF_DV_3BI: // DV_3BI  .Q.......XLmmmmm ....H.nnnnnddddd      Vd Vn Vm[] (vector by elem)
+            assert(isValidVectorDatasize(id->idOpSize()));
+            assert(isValidArrangement(id->idOpSize(), id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isVectorRegister(id->idReg2()));
+            assert(isVectorRegister(id->idReg3()));
+            elemsize = optGetElemsize(id->idInsOpt());
+            assert(isValidVectorIndex(id->idOpSize(), elemsize, emitGetInsSC(id)));
+            break;
+
+        case IF_DV_3C: // DV_3C   .Q.........mmmmm ......nnnnnddddd      Vd Vn Vm   (vector)
+            assert(isValidVectorDatasize(id->idOpSize()));
+            assert(isValidArrangement(id->idOpSize(), id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isVectorRegister(id->idReg2()));
+            assert(isVectorRegister(id->idReg3()));
+            break;
+
+        case IF_DV_3D: // DV_3D   .........X.mmmmm ......nnnnnddddd      Vd Vn Vm   (scalar)
+            assert(isValidScalarDatasize(id->idOpSize()));
+            assert(insOptsNone(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isVectorRegister(id->idReg2()));
+            assert(isVectorRegister(id->idReg3()));
+            break;
+
+        case IF_DV_3DI: // DV_3DI  .........XLmmmmm ....H.nnnnnddddd      Vd Vn Vm[] (scalar by elem)
+            assert(isValidScalarDatasize(id->idOpSize()));
+            assert(insOptsNone(id->idInsOpt()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isVectorRegister(id->idReg2()));
+            assert(isVectorRegister(id->idReg3()));
+            elemsize = id->idOpSize();
+            assert(isValidVectorIndex(EA_16BYTE, elemsize, emitGetInsSC(id)));
+            break;
+
+        case IF_DV_3E: // DV_3E   ...........mmmmm ......nnnnnddddd      Vd Vn Vm  (scalar)
+            assert(insOptsNone(id->idInsOpt()));
+            assert(id->idOpSize() == EA_8BYTE);
+            assert(isVectorRegister(id->idReg1()));
+            assert(isVectorRegister(id->idReg2()));
+            assert(isVectorRegister(id->idReg3()));
+            break;
+
+        case IF_DV_4A: // DR_4A   .........X.mmmmm .aaaaannnnnddddd      Rd Rn Rm Ra (scalar)
+            assert(isValidGeneralDatasize(id->idOpSize()));
+            assert(isVectorRegister(id->idReg1()));
+            assert(isVectorRegister(id->idReg2()));
+            assert(isVectorRegister(id->idReg3()));
+            assert(isVectorRegister(id->idReg4()));
+            break;
+
+        case IF_SN_0A: // SN_0A   ................ ................
+        case IF_SI_0A: // SI_0A   ...........iiiii iiiiiiiiiii.....               imm16
+        case IF_SI_0B: // SI_0B   ................ ....bbbb........               imm4 - barrier
+            break;
+
+        default:
+            printf("unexpected format %s\n", emitIfName(id->idInsFmt()));
+            assert(!"Unexpected format");
+            break;
+    }
+}
+#endif // DEBUG
+
+bool emitter::emitInsMayWriteToGCReg(instrDesc* id)
+{
+    instruction ins = id->idIns();
+    insFormat   fmt = id->idInsFmt();
+
+    switch (fmt)
+    {
+
+        // These are the formats with "destination" registers:
+
+        case IF_DI_1B: // DI_1B   X........hwiiiii iiiiiiiiiiiddddd      Rd       imm(i16,hw)
+        case IF_DI_1D: // DI_1D   X........Nrrrrrr ssssss.....ddddd      Rd       imm(N,r,s)
+        case IF_DI_1E: // DI_1E   .ii.....iiiiiiii iiiiiiiiiiiddddd      Rd       simm21
+
+        case IF_DI_2A: // DI_2A   X.......shiiiiii iiiiiinnnnnddddd      Rd Rn    imm(i12,sh)
+        case IF_DI_2B: // DI_2B   X.........Xnnnnn ssssssnnnnnddddd      Rd Rn    imm(0-63)
+        case IF_DI_2C: // DI_2C   X........Nrrrrrr ssssssnnnnnddddd      Rd Rn    imm(N,r,s)
+        case IF_DI_2D: // DI_2D   X........Nrrrrrr ssssssnnnnnddddd      Rd Rn    imr, imms   (N,r,s)
+
+        case IF_DR_1D: // DR_1D   X............... cccc.......ddddd      Rd       cond
+
+        case IF_DR_2D: // DR_2D   X..........nnnnn cccc..nnnnnddddd      Rd Rn    cond
+        case IF_DR_2E: // DR_2E   X..........mmmmm ...........ddddd      Rd    Rm
+        case IF_DR_2F: // DR_2F   X.......sh.mmmmm ssssss.....ddddd      Rd    Rm {LSL,LSR,ASR} imm(0-63)
+        case IF_DR_2G: // DR_2G   X............... ......nnnnnddddd      Rd Rn
+        case IF_DR_2H: // DR_2H   X........X...... ......nnnnnddddd      Rd Rn
+
+        case IF_DR_3A: // DR_3A   X..........mmmmm ......nnnnnddddd      Rd Rn Rm
+        case IF_DR_3B: // DR_3B   X.......sh.mmmmm ssssssnnnnnddddd      Rd Rn Rm {LSL,LSR,ASR} imm(0-63)
+        case IF_DR_3C: // DR_3C   X..........mmmmm xxxsssnnnnnddddd      Rd Rn Rm ext(Rm) LSL imm(0-4)
+        case IF_DR_3D: // DR_3D   X..........mmmmm cccc..nnnnnddddd      Rd Rn Rm cond
+        case IF_DR_3E: // DR_3E   X........X.mmmmm ssssssnnnnnddddd      Rd Rn Rm imm(0-63)
+
+        case IF_DR_4A: // DR_4A   X..........mmmmm .aaaaannnnnddddd      Rd Rn Rm Ra
+
+        case IF_DV_2B: // DV_2B   .Q.........iiiii ......nnnnnddddd      Rd Vn[]    (umov - to general)
+        case IF_DV_2H: // DV_2H   X........X...... ......nnnnnddddd      Rd Vn      (fmov - to general)
+
+            return true;
+
+        case IF_DV_2C:  // DV_2C   .Q.........iiiii ......nnnnnddddd      Vd Rn      (dup/ins - vector from general)
+        case IF_DV_2D:  // DV_2D   .Q.........iiiii ......nnnnnddddd      Vd Vn[]    (dup - vector)
+        case IF_DV_2E:  // DV_2E   ...........iiiii ......nnnnnddddd      Vd Vn[]    (dup - scalar)
+        case IF_DV_2F:  // DV_2F   ...........iiiii .jjjj.nnnnnddddd      Vd[] Vn[]  (ins - element)
+        case IF_DV_2G:  // DV_2G   .........X...... ......nnnnnddddd      Vd Vn      (fmov, fcvtXX - register)
+        case IF_DV_2I:  // DV_2I   X........X...... ......nnnnnddddd      Vd Rn      (fmov - from general)
+        case IF_DV_2J:  // DV_2J   ........SS.....D D.....nnnnnddddd      Vd Vn      (fcvt)
+        case IF_DV_2K:  // DV_2K   .........X.mmmmm ......nnnnn.....      Vn Vm      (fcmp)
+        case IF_DV_2L:  // DV_2L   ........XX...... ......nnnnnddddd      Vd Vn      (abs, neg - scalar)
+        case IF_DV_2M:  // DV_2M   .Q......XX...... ......nnnnnddddd      Vd Vn      (abs, neg - vector)
+        case IF_DV_3A:  // DV_3A   .Q......XX.mmmmm ......nnnnnddddd      Vd Vn Vm   (vector)
+        case IF_DV_3AI: // DV_3AI  .Q......XXLMmmmm ....H.nnnnnddddd      Vd Vn Vm[] (vector)
+        case IF_DV_3B:  // DV_3B   .Q.......X.mmmmm ......nnnnnddddd      Vd Vn Vm   (vector)
+        case IF_DV_3BI: // DV_3BI  .Q.......XLmmmmm ....H.nnnnnddddd      Vd Vn Vm[] (vector by elem)
+        case IF_DV_3C:  // DV_3C   .Q.........mmmmm ......nnnnnddddd      Vd Vn Vm   (vector)
+        case IF_DV_3D:  // DV_3D   .........X.mmmmm ......nnnnnddddd      Vd Vn Vm   (scalar)
+        case IF_DV_3DI: // DV_3DI  .........XLmmmmm ....H.nnnnnddddd      Vd Vn Vm[] (scalar by elem)
+        case IF_DV_3E:  // DV_3E   ...........mmmmm ......nnnnnddddd      Vd Vn Vm   (scalar)
+        case IF_DV_4A:  // DV_4A   .........X.mmmmm .aaaaannnnnddddd      Vd Va Vn Vm (scalar)
+            // Tracked GC pointers cannot be placed into the SIMD registers.
+            return false;
+
+        // These are the load/store formats with "target" registers:
+
+        case IF_LS_1A: // LS_1A   XX...V..iiiiiiii iiiiiiiiiiittttt      Rt    PC imm(1MB)
+        case IF_LS_2A: // LS_2A   .X.......X...... ......nnnnnttttt      Rt Rn
+        case IF_LS_2B: // LS_2B   .X.......Xiiiiii iiiiiinnnnnttttt      Rt Rn    imm(0-4095)
+        case IF_LS_2C: // LS_2C   .X.......X.iiiii iiiiP.nnnnnttttt      Rt Rn    imm(-256..+255) pre/post inc
+        case IF_LS_3A: // LS_3A   .X.......X.mmmmm xxxS..nnnnnttttt      Rt Rn Rm ext(Rm) LSL {}
+        case IF_LS_3B: // LS_3B   X............... .aaaaannnnnttttt      Rt Ra Rn
+        case IF_LS_3C: // LS_3C   X.........iiiiii iaaaaannnnnttttt      Rt Ra Rn imm(im7,sh)
+
+            // For the Store instructions the "target" register is actually a "source" value
+
+            if (emitInsIsStore(ins))
+            {
+                return false;
+            }
+            else
+            {
+                assert(emitInsIsLoad(ins));
+                return true;
+            }
+
+        default:
+            return false;
+    }
+}
+
+bool emitter::emitInsWritesToLclVarStackLoc(instrDesc* id)
+{
+    if (!id->idIsLclVar())
+        return false;
+
+    instruction ins = id->idIns();
+
+    // This list is related to the list of instructions used to store local vars in emitIns_S_R().
+    // We don't accept writing to float local vars.
+
+    switch (ins)
+    {
+        case INS_strb:
+        case INS_strh:
+        case INS_str:
+        case INS_stur:
+        case INS_sturb:
+        case INS_sturh:
+            return true;
+        default:
+            return false;
+    }
+}
+
+bool emitter::emitInsMayWriteMultipleRegs(instrDesc* id)
+{
+    instruction ins = id->idIns();
+
+    switch (ins)
+    {
+        case INS_ldp:
+        case INS_ldpsw:
+        case INS_ldnp:
+            return true;
+        default:
+            return false;
+    }
+}
+
+// For the small loads/store instruction we adjust the size 'attr'
+// depending upon whether we have a load or a store
+//
+emitAttr emitter::emitInsAdjustLoadStoreAttr(instruction ins, emitAttr attr)
+{
+    if (EA_SIZE(attr) <= EA_4BYTE)
+    {
+        if (emitInsIsLoad(ins))
+        {
+            // The value of 'ins' encodes the size to load
+            // we use EA_8BYTE here because it is the size we will write (into dataReg)
+            // it is also required when ins is INS_ldrsw
+            //
+            attr = EA_8BYTE;
+        }
+        else
+        {
+            assert(emitInsIsStore(ins));
+
+            // The value of 'ins' encodes the size to store
+            // we use EA_4BYTE here because it is the size of the register
+            // that we want to display when storing small values
+            //
+            attr = EA_4BYTE;
+        }
+    }
+    return attr;
+}
+
+// Takes an instrDesc 'id' and uses the instruction 'ins' to determine the
+// size of the target register that is written or read by the instruction.
+// Note that even if EA_4BYTE is returned a load instruction will still
+// always zero the upper 4 bytes of the target register.
+// This method is required so that we can distinguish between loads that are
+// sign-extending as they can have two different sizes for their target register.
+// Additionally for instructions like 'ldr' and 'str' these can load/store
+// either 4 byte or 8 bytes to/from the target register.
+// By convention the small unsigned load instructions are considered to write
+// a 4 byte sized target register, though since these also zero the upper 4 bytes
+// they could equally be considered to write the unsigned value to full 8 byte register.
+//
+emitAttr emitter::emitInsTargetRegSize(instrDesc* id)
+{
+    instruction ins    = id->idIns();
+    emitAttr    result = EA_UNKNOWN;
+
+    // This is used to determine the size of the target registers for a load/store instruction
+
+    switch (ins)
+    {
+        case INS_ldrb:
+        case INS_strb:
+        case INS_ldurb:
+        case INS_sturb:
+            result = EA_4BYTE;
+            break;
+
+        case INS_ldrh:
+        case INS_strh:
+        case INS_ldurh:
+        case INS_sturh:
+            result = EA_4BYTE;
+            break;
+
+        case INS_ldrsb:
+        case INS_ldursb:
+        case INS_ldrsh:
+        case INS_ldursh:
+            if (id->idOpSize() == EA_8BYTE)
+                result = EA_8BYTE;
+            else
+                result = EA_4BYTE;
+            break;
+
+        case INS_ldrsw:
+        case INS_ldursw:
+        case INS_ldpsw:
+            result = EA_8BYTE;
+            break;
+
+        case INS_ldp:
+        case INS_stp:
+        case INS_ldnp:
+        case INS_stnp:
+            result = id->idOpSize();
+            break;
+
+        case INS_ldr:
+        case INS_str:
+        case INS_ldur:
+        case INS_stur:
+            result = id->idOpSize();
+            break;
+
+        default:
+            NO_WAY("unexpected instruction");
+            break;
+    }
+    return result;
+}
+
+// Takes an instrDesc and uses the instruction to determine the 'size' of the
+// data that is loaded from memory.
+//
+emitAttr emitter::emitInsLoadStoreSize(instrDesc* id)
+{
+    instruction ins    = id->idIns();
+    emitAttr    result = EA_UNKNOWN;
+
+    // The 'result' returned is the 'size' of the data that is loaded from memory.
+
+    switch (ins)
+    {
+        case INS_ldrb:
+        case INS_strb:
+        case INS_ldurb:
+        case INS_sturb:
+        case INS_ldrsb:
+        case INS_ldursb:
+            result = EA_1BYTE;
+            break;
+
+        case INS_ldrh:
+        case INS_strh:
+        case INS_ldurh:
+        case INS_sturh:
+        case INS_ldrsh:
+        case INS_ldursh:
+            result = EA_2BYTE;
+            break;
+
+        case INS_ldrsw:
+        case INS_ldursw:
+        case INS_ldpsw:
+            result = EA_4BYTE;
+            break;
+
+        case INS_ldp:
+        case INS_stp:
+        case INS_ldnp:
+        case INS_stnp:
+            result = id->idOpSize();
+            break;
+
+        case INS_ldr:
+        case INS_str:
+        case INS_ldur:
+        case INS_stur:
+            result = id->idOpSize();
+            break;
+
+        default:
+            NO_WAY("unexpected instruction");
+            break;
+    }
+    return result;
+}
+
+/*****************************************************************************/
+#ifdef DEBUG
+
+// clang-format off
+static const char * const  xRegNames[] =
+{
+    #define REGDEF(name, rnum, mask, xname, wname) xname,
+    #include "register.h"
+};
+
+static const char * const  wRegNames[] =
+{
+    #define REGDEF(name, rnum, mask, xname, wname) wname,
+    #include "register.h"
+};
+
+static const char * const  vRegNames[] =
+{
+    "v0",  "v1",  "v2",  "v3",  "v4", 
+    "v5",  "v6",  "v7",  "v8",  "v9", 
+    "v10", "v11", "v12", "v13", "v14", 
+    "v15", "v16", "v17", "v18", "v19", 
+    "v20", "v21", "v22", "v23", "v24", 
+    "v25", "v26", "v27", "v28", "v29",
+    "v30", "v31"
+};
+
+static const char * const  qRegNames[] =
+{
+    "q0",  "q1",  "q2",  "q3",  "q4", 
+    "q5",  "q6",  "q7",  "q8",  "q9", 
+    "q10", "q11", "q12", "q13", "q14", 
+    "q15", "q16", "q17", "q18", "q19", 
+    "q20", "q21", "q22", "q23", "q24", 
+    "q25", "q26", "q27", "q28", "q29",
+    "q30", "q31"
+};
+
+static const char * const  hRegNames[] =
+{
+    "h0",  "h1",  "h2",  "h3",  "h4", 
+    "h5",  "h6",  "h7",  "h8",  "h9", 
+    "h10", "h11", "h12", "h13", "h14", 
+    "h15", "h16", "h17", "h18", "h19", 
+    "h20", "h21", "h22", "h23", "h24", 
+    "h25", "h26", "h27", "h28", "h29",
+    "h30", "h31"
+};
+static const char * const  bRegNames[] =
+{
+    "b0",  "b1",  "b2",  "b3",  "b4", 
+    "b5",  "b6",  "b7",  "b8",  "b9", 
+    "b10", "b11", "b12", "b13", "b14", 
+    "b15", "b16", "b17", "b18", "b19", 
+    "b20", "b21", "b22", "b23", "b24", 
+    "b25", "b26", "b27", "b28", "b29",
+    "b30", "b31"
+};
+// clang-format on
+
+/*****************************************************************************
+ *
+ *  Return a string that represents the given register.
+ */
+
+const char* emitter::emitRegName(regNumber reg, emitAttr size, bool varName)
+{
+    assert(reg < REG_COUNT);
+
+    const char* rn = nullptr;
+
+    if (size == EA_8BYTE)
+    {
+        rn = xRegNames[reg];
+    }
+    else if (size == EA_4BYTE)
+    {
+        rn = wRegNames[reg];
+    }
+    else if (isVectorRegister(reg))
+    {
+        if (size == EA_16BYTE)
+        {
+            rn = qRegNames[reg - REG_V0];
+        }
+        else if (size == EA_2BYTE)
+        {
+            rn = hRegNames[reg - REG_V0];
+        }
+        else if (size == EA_1BYTE)
+        {
+            rn = bRegNames[reg - REG_V0];
+        }
+    }
+
+    assert(rn != nullptr);
+
+    return rn;
+}
+
+/*****************************************************************************
+ *
+ *  Return a string that represents the given register.
+ */
+
+const char* emitter::emitVectorRegName(regNumber reg)
+{
+    assert((reg >= REG_V0) && (reg <= REG_V31));
+
+    int index = (int)reg - (int)REG_V0;
+
+    return vRegNames[index];
+}
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ *  Returns the base encoding of the given CPU instruction.
+ */
+
+emitter::insFormat emitter::emitInsFormat(instruction ins)
+{
+    // clang-format off
+    const static insFormat insFormats[] =
+    {
+        #define INST1(id, nm, fp, ldst, fmt, e1                                ) fmt,
+        #define INST2(id, nm, fp, ldst, fmt, e1, e2                            ) fmt,
+        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        ) fmt,
+        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    ) fmt,
+        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                ) fmt,
+        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            ) fmt,
+        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) fmt,
+        #include "instrs.h"
+    };
+    // clang-format on
+
+    assert(ins < ArrLen(insFormats));
+    assert((insFormats[ins] != IF_NONE));
+
+    return insFormats[ins];
+}
+
+// INST_FP is 1
+#define LD 2
+#define ST 4
+#define CMP 8
+
+// clang-format off
+/*static*/ const BYTE CodeGenInterface::instInfo[] =
+{
+    #define INST1(id, nm, fp, ldst, fmt, e1                                ) ldst | INST_FP*fp,
+    #define INST2(id, nm, fp, ldst, fmt, e1, e2                            ) ldst | INST_FP*fp,
+    #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        ) ldst | INST_FP*fp,
+    #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    ) ldst | INST_FP*fp,
+    #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                ) ldst | INST_FP*fp,
+    #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            ) ldst | INST_FP*fp,
+    #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) ldst | INST_FP*fp,
+    #include "instrs.h"
+};
+// clang-format on
+
+/*****************************************************************************
+ *
+ *  Returns true if the instruction is some kind of compare or test instruction
+ */
+
+bool emitter::emitInsIsCompare(instruction ins)
+{
+    // We have pseudo ins like lea which are not included in emitInsLdStTab.
+    if (ins < ArrLen(CodeGenInterface::instInfo))
+        return (CodeGenInterface::instInfo[ins] & CMP) ? true : false;
+    else
+        return false;
+}
+
+/*****************************************************************************
+ *
+ *  Returns true if the instruction is some kind of load instruction
+ */
+
+bool emitter::emitInsIsLoad(instruction ins)
+{
+    // We have pseudo ins like lea which are not included in emitInsLdStTab.
+    if (ins < ArrLen(CodeGenInterface::instInfo))
+        return (CodeGenInterface::instInfo[ins] & LD) ? true : false;
+    else
+        return false;
+}
+/*****************************************************************************
+ *
+ *  Returns true if the instruction is some kind of store instruction
+ */
+
+bool emitter::emitInsIsStore(instruction ins)
+{
+    // We have pseudo ins like lea which are not included in emitInsLdStTab.
+    if (ins < ArrLen(CodeGenInterface::instInfo))
+        return (CodeGenInterface::instInfo[ins] & ST) ? true : false;
+    else
+        return false;
+}
+
+/*****************************************************************************
+ *
+ *  Returns true if the instruction is some kind of load/store instruction
+ */
+
+bool emitter::emitInsIsLoadOrStore(instruction ins)
+{
+    // We have pseudo ins like lea which are not included in emitInsLdStTab.
+    if (ins < ArrLen(CodeGenInterface::instInfo))
+        return (CodeGenInterface::instInfo[ins] & (LD | ST)) ? true : false;
+    else
+        return false;
+}
+
+#undef LD
+#undef ST
+#undef CMP
+
+/*****************************************************************************
+ *
+ *  Returns the specific encoding of the given CPU instruction and format
+ */
+
+emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt)
+{
+    // clang-format off
+    const static code_t insCodes1[] =
+    {
+        #define INST1(id, nm, fp, ldst, fmt, e1                                ) e1,
+        #define INST2(id, nm, fp, ldst, fmt, e1, e2                            ) e1,
+        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        ) e1,
+        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    ) e1,
+        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                ) e1,
+        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            ) e1,
+        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e1,
+        #include "instrs.h"
+    };
+    const static code_t insCodes2[] =
+    {
+        #define INST1(id, nm, fp, ldst, fmt, e1                                )
+        #define INST2(id, nm, fp, ldst, fmt, e1, e2                            ) e2,
+        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        ) e2,
+        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    ) e2,
+        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                ) e2,
+        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            ) e2,
+        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e2,
+        #include "instrs.h"
+    };
+    const static code_t insCodes3[] =
+    {
+        #define INST1(id, nm, fp, ldst, fmt, e1                                )
+        #define INST2(id, nm, fp, ldst, fmt, e1, e2                            )
+        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        ) e3,
+        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    ) e3,
+        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                ) e3,
+        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            ) e3,
+        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e3,
+        #include "instrs.h"
+    };
+    const static code_t insCodes4[] =
+    {
+        #define INST1(id, nm, fp, ldst, fmt, e1                                )
+        #define INST2(id, nm, fp, ldst, fmt, e1, e2                            )
+        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        )
+        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    ) e4,
+        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                ) e4,
+        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            ) e4,
+        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e4,
+        #include "instrs.h"
+    };
+    const static code_t insCodes5[] =
+    {
+        #define INST1(id, nm, fp, ldst, fmt, e1                                )
+        #define INST2(id, nm, fp, ldst, fmt, e1, e2                            )
+        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        )
+        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    )
+        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                ) e5,
+        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            ) e5,
+        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e5,
+        #include "instrs.h"
+    };
+    const static code_t insCodes6[] =
+    {
+        #define INST1(id, nm, fp, ldst, fmt, e1                                )
+        #define INST2(id, nm, fp, ldst, fmt, e1, e2                            )
+        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        )
+        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    )
+        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                )
+        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            ) e6,
+        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e6,
+        #include "instrs.h"
+    };
+    const static code_t insCodes7[] =
+    {
+        #define INST1(id, nm, fp, ldst, fmt, e1                                )
+        #define INST2(id, nm, fp, ldst, fmt, e1, e2                            )
+        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        )
+        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    )
+        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                )
+        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            )
+        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e7,
+        #include "instrs.h"
+    };
+    const static code_t insCodes8[] =
+    {
+        #define INST1(id, nm, fp, ldst, fmt, e1                                )
+        #define INST2(id, nm, fp, ldst, fmt, e1, e2                            )
+        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        )
+        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    )
+        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                )
+        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            )
+        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e8,
+        #include "instrs.h"
+    };
+    const static code_t insCodes9[] =
+    {
+        #define INST1(id, nm, fp, ldst, fmt, e1                                )
+        #define INST2(id, nm, fp, ldst, fmt, e1, e2                            )
+        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        )
+        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    )
+        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                )
+        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            )
+        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) e9,
+        #include "instrs.h"
+    };
+    // clang-format on
+
+    const static insFormat formatEncode9[9] = {IF_DR_2E, IF_DR_2G, IF_DI_1B, IF_DI_1D, IF_DV_3C,
+                                               IF_DV_2B, IF_DV_2C, IF_DV_2E, IF_DV_2F};
+    const static insFormat formatEncode6A[6] = {IF_DR_3A, IF_DR_3B, IF_DR_3C, IF_DI_2A, IF_DV_3A, IF_DV_3E};
+    const static insFormat formatEncode5A[5] = {IF_LS_2A, IF_LS_2B, IF_LS_2C, IF_LS_3A, IF_LS_1A};
+    const static insFormat formatEncode5B[5] = {IF_DV_2G, IF_DV_2H, IF_DV_2I, IF_DV_1A, IF_DV_1B};
+    const static insFormat formatEncode5C[5] = {IF_DR_3A, IF_DR_3B, IF_DI_2C, IF_DV_3C, IF_DV_1B};
+    const static insFormat formatEncode4A[4] = {IF_LS_2A, IF_LS_2B, IF_LS_2C, IF_LS_3A};
+    const static insFormat formatEncode4B[4] = {IF_DR_3A, IF_DR_3B, IF_DR_3C, IF_DI_2A};
+    const static insFormat formatEncode4C[4] = {IF_DR_2A, IF_DR_2B, IF_DR_2C, IF_DI_1A};
+    const static insFormat formatEncode4D[4] = {IF_DV_3B, IF_DV_3D, IF_DV_3BI, IF_DV_3DI};
+    const static insFormat formatEncode4E[4] = {IF_DR_3A, IF_DR_3B, IF_DI_2C, IF_DV_3C};
+    const static insFormat formatEncode4F[4] = {IF_DR_3A, IF_DR_3B, IF_DV_3C, IF_DV_1B};
+    const static insFormat formatEncode4G[4] = {IF_DR_2E, IF_DR_2F, IF_DV_2M, IF_DV_2L};
+    const static insFormat formatEncode3A[3] = {IF_DR_3A, IF_DR_3B, IF_DI_2C};
+    const static insFormat formatEncode3B[3] = {IF_DR_2A, IF_DR_2B, IF_DI_1C};
+    const static insFormat formatEncode3C[3] = {IF_DR_3A, IF_DR_3B, IF_DV_3C};
+    const static insFormat formatEncode3D[3] = {IF_DV_2C, IF_DV_2D, IF_DV_2E};
+    const static insFormat formatEncode3E[3] = {IF_DV_3B, IF_DV_3BI, IF_DV_3DI};
+    const static insFormat formatEncode3F[3] = {IF_DV_2A, IF_DV_2G, IF_DV_2H};
+    const static insFormat formatEncode3G[3] = {IF_DV_2A, IF_DV_2G, IF_DV_2I};
+    const static insFormat formatEncode3H[3] = {IF_DR_3A, IF_DV_3A, IF_DV_3AI};
+    const static insFormat formatEncode3I[3] = {IF_DR_2E, IF_DR_2F, IF_DV_2M};
+    const static insFormat formatEncode2A[2] = {IF_DR_2E, IF_DR_2F};
+    const static insFormat formatEncode2B[2] = {IF_DR_3A, IF_DR_3B};
+    const static insFormat formatEncode2C[2] = {IF_DR_3A, IF_DI_2D};
+    const static insFormat formatEncode2D[2] = {IF_DR_3A, IF_DI_2B};
+    const static insFormat formatEncode2E[2] = {IF_LS_3B, IF_LS_3C};
+    const static insFormat formatEncode2F[2] = {IF_DR_2I, IF_DI_1F};
+    const static insFormat formatEncode2G[2] = {IF_DV_3B, IF_DV_3D};
+    const static insFormat formatEncode2H[2] = {IF_DV_2C, IF_DV_2F};
+    const static insFormat formatEncode2I[2] = {IF_DV_2K, IF_DV_1C};
+    const static insFormat formatEncode2J[2] = {IF_DV_2A, IF_DV_2G};
+    const static insFormat formatEncode2K[2] = {IF_DV_2M, IF_DV_2L};
+    const static insFormat formatEncode2L[2] = {IF_DV_2G, IF_DV_2M};
+    const static insFormat formatEncode2M[2] = {IF_DV_3A, IF_DV_3AI};
+    const static insFormat formatEncode2N[2] = {IF_DV_2N, IF_DV_2O};
+
+    code_t    code           = BAD_CODE;
+    insFormat insFmt         = emitInsFormat(ins);
+    bool      encoding_found = false;
+    int       index          = -1;
+
+    switch (insFmt)
+    {
+        case IF_EN9:
+            for (index = 0; index < 9; index++)
+            {
+                if (fmt == formatEncode9[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN6A:
+            for (index = 0; index < 6; index++)
+            {
+                if (fmt == formatEncode6A[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN5A:
+            for (index = 0; index < 5; index++)
+            {
+                if (fmt == formatEncode5A[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN5B:
+            for (index = 0; index < 5; index++)
+            {
+                if (fmt == formatEncode5B[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN5C:
+            for (index = 0; index < 5; index++)
+            {
+                if (fmt == formatEncode5C[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN4A:
+            for (index = 0; index < 4; index++)
+            {
+                if (fmt == formatEncode4A[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN4B:
+            for (index = 0; index < 4; index++)
+            {
+                if (fmt == formatEncode4B[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN4C:
+            for (index = 0; index < 4; index++)
+            {
+                if (fmt == formatEncode4C[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN4D:
+            for (index = 0; index < 4; index++)
+            {
+                if (fmt == formatEncode4D[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN4E:
+            for (index = 0; index < 4; index++)
+            {
+                if (fmt == formatEncode4E[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN4F:
+            for (index = 0; index < 4; index++)
+            {
+                if (fmt == formatEncode4F[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN4G:
+            for (index = 0; index < 4; index++)
+            {
+                if (fmt == formatEncode4G[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN3A:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3A[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN3B:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3B[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN3C:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3C[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN3D:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3D[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN3E:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3E[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN3F:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3F[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN3G:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3G[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN3H:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3H[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN3I:
+            for (index = 0; index < 3; index++)
+            {
+                if (fmt == formatEncode3I[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN2A:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2A[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN2B:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2B[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN2C:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2C[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN2D:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2D[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN2E:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2E[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN2F:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2F[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN2G:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2G[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN2H:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2H[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN2I:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2I[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN2J:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2J[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN2K:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2K[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN2L:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2L[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN2M:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2M[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_EN2N:
+            for (index = 0; index < 2; index++)
+            {
+                if (fmt == formatEncode2N[index])
+                {
+                    encoding_found = true;
+                    break;
+                }
+            }
+            break;
+
+        case IF_BI_0A:
+        case IF_BI_0B:
+        case IF_BI_0C:
+        case IF_BI_1A:
+        case IF_BI_1B:
+        case IF_BR_1A:
+        case IF_BR_1B:
+        case IF_LS_1A:
+        case IF_LS_2A:
+        case IF_LS_2B:
+        case IF_LS_2C:
+        case IF_LS_3A:
+        case IF_LS_3B:
+        case IF_LS_3C:
+        case IF_DI_1A:
+        case IF_DI_1B:
+        case IF_DI_1C:
+        case IF_DI_1D:
+        case IF_DI_1E:
+        case IF_DI_1F:
+        case IF_DI_2A:
+        case IF_DI_2B:
+        case IF_DI_2C:
+        case IF_DI_2D:
+        case IF_DR_1D:
+        case IF_DR_2A:
+        case IF_DR_2B:
+        case IF_DR_2C:
+        case IF_DR_2D:
+        case IF_DR_2E:
+        case IF_DR_2F:
+        case IF_DR_2G:
+        case IF_DR_2H:
+        case IF_DR_2I:
+        case IF_DR_3A:
+        case IF_DR_3B:
+        case IF_DR_3C:
+        case IF_DR_3D:
+        case IF_DR_3E:
+        case IF_DR_4A:
+        case IF_DV_1A:
+        case IF_DV_1B:
+        case IF_DV_1C:
+        case IF_DV_2A:
+        case IF_DV_2B:
+        case IF_DV_2C:
+        case IF_DV_2D:
+        case IF_DV_2E:
+        case IF_DV_2F:
+        case IF_DV_2G:
+        case IF_DV_2H:
+        case IF_DV_2I:
+        case IF_DV_2J:
+        case IF_DV_2K:
+        case IF_DV_2L:
+        case IF_DV_2M:
+        case IF_DV_2N:
+        case IF_DV_2O:
+        case IF_DV_3A:
+        case IF_DV_3AI:
+        case IF_DV_3B:
+        case IF_DV_3BI:
+        case IF_DV_3C:
+        case IF_DV_3D:
+        case IF_DV_3DI:
+        case IF_DV_3E:
+        case IF_DV_4A:
+        case IF_SN_0A:
+        case IF_SI_0A:
+        case IF_SI_0B:
+
+            index          = 0;
+            encoding_found = true;
+            break;
+
+        default:
+
+            encoding_found = false;
+            break;
+    }
+
+    assert(encoding_found);
+
+    switch (index)
+    {
+        case 0:
+            assert(ins < ArrLen(insCodes1));
+            code = insCodes1[ins];
+            break;
+        case 1:
+            assert(ins < ArrLen(insCodes2));
+            code = insCodes2[ins];
+            break;
+        case 2:
+            assert(ins < ArrLen(insCodes3));
+            code = insCodes3[ins];
+            break;
+        case 3:
+            assert(ins < ArrLen(insCodes4));
+            code = insCodes4[ins];
+            break;
+        case 4:
+            assert(ins < ArrLen(insCodes5));
+            code = insCodes5[ins];
+            break;
+        case 5:
+            assert(ins < ArrLen(insCodes6));
+            code = insCodes6[ins];
+            break;
+        case 6:
+            assert(ins < ArrLen(insCodes7));
+            code = insCodes7[ins];
+            break;
+        case 7:
+            assert(ins < ArrLen(insCodes8));
+            code = insCodes8[ins];
+            break;
+        case 8:
+            assert(ins < ArrLen(insCodes9));
+            code = insCodes9[ins];
+            break;
+    }
+
+    assert((code != BAD_CODE));
+
+    return code;
+}
+
+// true if this 'imm' can be encoded as a input operand to a mov instruction
+/*static*/ bool emitter::emitIns_valid_imm_for_mov(INT64 imm, emitAttr size)
+{
+    // Check for "MOV (wide immediate)".
+    if (canEncodeHalfwordImm(imm, size))
+        return true;
+
+    // Next try the ones-complement form of 'halfword immediate' imm(i16,hw),
+    // namely "MOV (inverted wide immediate)".
+    ssize_t notOfImm = NOT_helper(imm, getBitWidth(size));
+    if (canEncodeHalfwordImm(notOfImm, size))
+        return true;
+
+    // Finally try "MOV (bitmask immediate)" imm(N,r,s)
+    if (canEncodeBitMaskImm(imm, size))
+        return true;
+
+    return false;
+}
+
+// true if this 'imm' can be encoded as a input operand to a vector movi instruction
+/*static*/ bool emitter::emitIns_valid_imm_for_movi(INT64 imm, emitAttr elemsize)
+{
+    if (elemsize == EA_8BYTE)
+    {
+        UINT64 uimm = imm;
+        while (uimm != 0)
+        {
+            INT64 loByte = uimm & 0xFF;
+            if ((loByte == 0) || (loByte == 0xFF))
+            {
+                uimm >>= 8;
+            }
+            else
+            {
+                return false;
+            }
+        }
+        assert(uimm == 0);
+        return true;
+    }
+    else
+    {
+        // First try the standard 'byteShifted immediate' imm(i8,bySh)
+        if (canEncodeByteShiftedImm(imm, elemsize, true))
+            return true;
+
+        // Next try the ones-complement form of the 'immediate' imm(i8,bySh)
+        ssize_t notOfImm = NOT_helper(imm, getBitWidth(elemsize));
+        if (canEncodeByteShiftedImm(notOfImm, elemsize, true))
+            return true;
+    }
+    return false;
+}
+
+// true if this 'imm' can be encoded as a input operand to a fmov instruction
+/*static*/ bool emitter::emitIns_valid_imm_for_fmov(double immDbl)
+{
+    if (canEncodeFloatImm8(immDbl))
+        return true;
+
+    return false;
+}
+
+// true if this 'imm' can be encoded as a input operand to an add instruction
+/*static*/ bool emitter::emitIns_valid_imm_for_add(INT64 imm, emitAttr size)
+{
+    if (unsigned_abs(imm) <= 0x0fff)
+        return true;
+    else if (canEncodeWithShiftImmBy12(imm)) // Try the shifted by 12 encoding
+        return true;
+
+    return false;
+}
+
+// true if this 'imm' can be encoded as a input operand to an non-add/sub alu instruction
+/*static*/ bool emitter::emitIns_valid_imm_for_cmp(INT64 imm, emitAttr size)
+{
+    return emitIns_valid_imm_for_add(imm, size);
+}
+
+// true if this 'imm' can be encoded as a input operand to an non-add/sub alu instruction
+/*static*/ bool emitter::emitIns_valid_imm_for_alu(INT64 imm, emitAttr size)
+{
+    if (canEncodeBitMaskImm(imm, size))
+        return true;
+
+    return false;
+}
+
+// true if this 'imm' can be encoded as the offset in a ldr/str instruction
+/*static*/ bool emitter::emitIns_valid_imm_for_ldst_offset(INT64 imm, emitAttr attr)
+{
+    if (imm == 0)
+        return true; // Encodable using IF_LS_2A
+
+    if ((imm >= -256) && (imm <= 255))
+        return true; // Encodable using IF_LS_2C (or possibly IF_LS_2B)
+
+    if (imm < 0)
+        return false; // not encodable
+
+    emitAttr size  = EA_SIZE(attr);
+    unsigned scale = NaturalScale_helper(size);
+    ssize_t  mask  = size - 1; // the mask of low bits that must be zero to encode the immediate
+
+    if (((imm & mask) == 0) && ((imm >> scale) < 0x1000))
+        return true; // Encodable using IF_LS_2B
+
+    return false; // not encodable
+}
+
+/************************************************************************
+ *
+ *   A helper method to return the natural scale for an EA 'size'
+ */
+
+/*static*/ unsigned emitter::NaturalScale_helper(emitAttr size)
+{
+    assert(size == EA_1BYTE || size == EA_2BYTE || size == EA_4BYTE || size == EA_8BYTE || size == EA_16BYTE);
+
+    unsigned result = 0;
+    unsigned utemp  = (unsigned)size;
+
+    // Compute log base 2 of utemp (aka 'size')
+    while (utemp > 1)
+    {
+        result++;
+        utemp >>= 1;
+    }
+
+    return result;
+}
+
+/************************************************************************
+ *
+ *  A helper method to perform a Rotate-Right shift operation
+ *  the source is 'value' and it is rotated right by 'sh' bits
+ *  'value' is considered to be a fixed size 'width' set of bits.
+ *
+ *  Example
+ *      value is '00001111', sh is 2 and width is 8
+ *     result is '11000011'
+ */
+
+/*static*/ UINT64 emitter::ROR_helper(UINT64 value, unsigned sh, unsigned width)
+{
+    assert(width <= 64);
+    // Check that 'value' fits in 'width' bits
+    assert((width == 64) || (value < (1ULL << width)));
+    // We don't support shifts >= width
+    assert(sh < width);
+
+    UINT64 result;
+
+    unsigned rsh = sh;
+    unsigned lsh = width - rsh;
+
+    result = (value >> rsh);
+    result |= (value << lsh);
+
+    if (width < 64)
+    {
+        // mask off any extra bits that we got from the left shift
+        result &= ((1ULL << width) - 1);
+    }
+    return result;
+}
+/************************************************************************
+ *
+ *  A helper method to perform a 'NOT' bitwise complement operation.
+ *  'value' is considered to be a fixed size 'width' set of bits.
+ *
+ *  Example
+ *      value is '01001011', and width is 8
+ *     result is '10110100'
+ */
+
+/*static*/ UINT64 emitter::NOT_helper(UINT64 value, unsigned width)
+{
+    assert(width <= 64);
+
+    UINT64 result = ~value;
+
+    if (width < 64)
+    {
+        // Check that 'value' fits in 'width' bits. Don't consider "sign" bits above width.
+        UINT64 maxVal       = 1ULL << width;
+        UINT64 lowBitsMask  = maxVal - 1;
+        UINT64 signBitsMask = ~lowBitsMask | (1ULL << (width - 1)); // The high bits must be set, and the top bit
+                                                                    // (sign bit) must be set.
+        assert((value < maxVal) || ((value & signBitsMask) == signBitsMask));
+
+        // mask off any extra bits that we got from the complement operation
+        result &= lowBitsMask;
+    }
+
+    return result;
+}
+
+/************************************************************************
+ *
+ *  A helper method to perform a bit Replicate operation
+ *  the source is 'value' with a fixed size 'width' set of bits.
+ *  value is replicated to fill out 32 or 64 bits as determined by 'size'.
+ *
+ *  Example
+ *      value is '11000011' (0xE3), width is 8 and size is EA_8BYTE
+ *     result is '11000011 11000011 11000011 11000011 11000011 11000011 11000011 11000011'
+ *               0xE3E3E3E3E3E3E3E3
+ */
+
+/*static*/ UINT64 emitter::Replicate_helper(UINT64 value, unsigned width, emitAttr size)
+{
+    assert(emitter::isValidGeneralDatasize(size));
+
+    unsigned immWidth = (size == EA_8BYTE) ? 64 : 32;
+    assert(width <= immWidth);
+
+    UINT64   result     = value;
+    unsigned filledBits = width;
+
+    while (filledBits < immWidth)
+    {
+        value <<= width;
+        result |= value;
+        filledBits += width;
+    }
+    return result;
+}
+
+/************************************************************************
+ *
+ *  Convert an imm(N,r,s) into a 64-bit immediate
+ *  inputs 'bmImm' a bitMaskImm struct
+ *         'size' specifies the size of the result (64 or 32 bits)
+ */
+
+/*static*/ INT64 emitter::emitDecodeBitMaskImm(const emitter::bitMaskImm bmImm, emitAttr size)
+{
+    assert(isValidGeneralDatasize(size)); // Only EA_4BYTE or EA_8BYTE forms
+
+    unsigned N = bmImm.immN; // read the N,R and S values from the 'bitMaskImm' encoding
+    unsigned R = bmImm.immR;
+    unsigned S = bmImm.immS;
+
+    unsigned elemWidth = 64; // used when immN == 1
+
+    if (bmImm.immN == 0) // find the smaller elemWidth when immN == 0
+    {
+        // Scan S for the highest bit not set
+        elemWidth = 32;
+        for (unsigned bitNum = 5; bitNum > 0; bitNum--)
+        {
+            unsigned oneBit = elemWidth;
+            if ((S & oneBit) == 0)
+                break;
+            elemWidth /= 2;
+        }
+    }
+    else
+    {
+        assert(size == EA_8BYTE);
+    }
+
+    unsigned maskSR = elemWidth - 1;
+
+    S &= maskSR;
+    R &= maskSR;
+
+    // encoding for S is one less than the number of consecutive one bits
+    S++; // Number of consecutive ones to generate in 'welem'
+
+    // At this point:
+    //
+    //    'elemWidth' is the number of bits that we will use for the ROR and Replicate operations
+    //    'S'         is the number of consecutive 1 bits for the immediate
+    //    'R'         is the number of bits that we will Rotate Right the immediate
+    //    'size'      selects the final size of the immedate that we return (64 or 32 bits)
+
+    assert(S < elemWidth); // 'elemWidth' consecutive one's is a reserved encoding
+
+    UINT64 welem;
+    UINT64 wmask;
+
+    welem = (1ULL << S) - 1;
+
+    wmask = ROR_helper(welem, R, elemWidth);
+    wmask = Replicate_helper(wmask, elemWidth, size);
+
+    return wmask;
+}
+
+/*****************************************************************************
+ *
+ *  Check if an immediate can use the left shifted by 12 bits encoding
+ */
+
+/*static*/ bool emitter::canEncodeWithShiftImmBy12(INT64 imm)
+{
+    if (imm < 0)
+    {
+        imm = -imm; // convert to unsigned
+    }
+
+    if (imm < 0)
+    {
+        return false; // Must be MIN_INT64
+    }
+
+    if ((imm & 0xfff) != 0) // Now the low 12 bits all have to be zero
+    {
+        return false;
+    }
+
+    imm >>= 12; // shift right by 12 bits
+
+    return (imm <= 0x0fff); // Does it fit in 12 bits
+}
+
+/*****************************************************************************
+ *
+ *  Normalize the 'imm' so that the upper bits, as defined by 'size' are zero
+ */
+
+/*static*/ INT64 emitter::normalizeImm64(INT64 imm, emitAttr size)
+{
+    unsigned immWidth = getBitWidth(size);
+    INT64    result   = imm;
+
+    if (immWidth < 64)
+    {
+        // Check that 'imm' fits in 'immWidth' bits. Don't consider "sign" bits above width.
+        INT64 maxVal      = 1LL << immWidth;
+        INT64 lowBitsMask = maxVal - 1;
+        INT64 hiBitsMask  = ~lowBitsMask;
+        INT64 signBitsMask =
+            hiBitsMask | (1LL << (immWidth - 1)); // The high bits must be set, and the top bit (sign bit) must be set.
+        assert((imm < maxVal) || ((imm & signBitsMask) == signBitsMask));
+
+        // mask off the hiBits
+        result &= lowBitsMask;
+    }
+    return result;
+}
+
+/*****************************************************************************
+ *
+ *  Normalize the 'imm' so that the upper bits, as defined by 'size' are zero
+ */
+
+/*static*/ INT32 emitter::normalizeImm32(INT32 imm, emitAttr size)
+{
+    unsigned immWidth = getBitWidth(size);
+    INT32    result   = imm;
+
+    if (immWidth < 32)
+    {
+        // Check that 'imm' fits in 'immWidth' bits. Don't consider "sign" bits above width.
+        INT32 maxVal       = 1 << immWidth;
+        INT32 lowBitsMask  = maxVal - 1;
+        INT32 hiBitsMask   = ~lowBitsMask;
+        INT32 signBitsMask = hiBitsMask | (1 << (immWidth - 1)); // The high bits must be set, and the top bit
+                                                                 // (sign bit) must be set.
+        assert((imm < maxVal) || ((imm & signBitsMask) == signBitsMask));
+
+        // mask off the hiBits
+        result &= lowBitsMask;
+    }
+    return result;
+}
+
+/************************************************************************
+ *
+ *  returns true if 'imm' of 'size bits (32/64) can be encoded
+ *  using the ARM64 'bitmask immediate' form.
+ *  When a non-null value is passed for 'wbBMI' then this method
+ *  writes back the 'N','S' and 'R' values use to encode this immediate
+ *
+ */
+
+/*static*/ bool emitter::canEncodeBitMaskImm(INT64 imm, emitAttr size, emitter::bitMaskImm* wbBMI)
+{
+    assert(isValidGeneralDatasize(size)); // Only EA_4BYTE or EA_8BYTE forms
+
+    unsigned immWidth = (size == EA_8BYTE) ? 64 : 32;
+    unsigned maxLen   = (size == EA_8BYTE) ? 6 : 5;
+
+    imm = normalizeImm64(imm, size);
+
+    // Starting with len=1, elemWidth is 2 bits
+    //               len=2, elemWidth is 4 bits
+    //               len=3, elemWidth is 8 bits
+    //               len=4, elemWidth is 16 bits
+    //               len=5, elemWidth is 32 bits
+    // (optionally)  len=6, elemWidth is 64 bits
+    //
+    for (unsigned len = 1; (len <= maxLen); len++)
+    {
+        unsigned elemWidth = 1 << len;
+        UINT64   elemMask  = ((UINT64)-1) >> (64 - elemWidth);
+        UINT64   tempImm   = (UINT64)imm;        // A working copy of 'imm' that we can mutate
+        UINT64   elemVal   = tempImm & elemMask; // The low 'elemWidth' bits of 'imm'
+
+        // Check for all 1's or 0's as these can't be encoded
+        if ((elemVal == 0) || (elemVal == elemMask))
+            continue;
+
+        // 'checkedBits' is the count of bits that are known to match 'elemVal' when replicated
+        unsigned checkedBits = elemWidth; // by definition the first 'elemWidth' bits match
+
+        // Now check to see if each of the next bits match...
+        //
+        while (checkedBits < immWidth)
+        {
+            tempImm >>= elemWidth;
+
+            UINT64 nextElem = tempImm & elemMask;
+            if (nextElem != elemVal)
+            {
+                // Not matching, exit this loop and checkedBits will not be equal to immWidth
+                break;
+            }
+
+            // The 'nextElem' is matching, so increment 'checkedBits'
+            checkedBits += elemWidth;
+        }
+
+        // Did the full immediate contain bits that can be formed by repeating 'elemVal'?
+        if (checkedBits == immWidth)
+        {
+            // We are not quite done, since the only values that we can encode as a
+            // 'bitmask immediate' are those that can be formed by starting with a
+            // bit string of 0*1* that is rotated by some number of bits.
+            //
+            // We check to see if 'elemVal' can be formed using these restrictions.
+            //
+            // Observation:
+            // Rotating by one bit any value that passes these restrictions
+            // can be xor-ed with the original value and will result it a string
+            // of bits that have exactly two 1 bits: 'elemRorXor'
+            // Further the distance between the two one bits tells us the value
+            // of S and the location of the 1 bits tells us the value of R
+            //
+            // Some examples:   (immWidth is 8)
+            //
+            // S=4,R=0   S=5,R=3   S=3,R=6
+            // elemVal:        00001111  11100011  00011100
+            // elemRor:        10000111  11110001  00001110
+            // elemRorXor:     10001000  00010010  00010010
+            //      compute S  45678---  ---5678-  ---3210-
+            //      compute R  01234567  ---34567  ------67
+
+            UINT64 elemRor    = ROR_helper(elemVal, 1, elemWidth); // Rotate 'elemVal' Right by one bit
+            UINT64 elemRorXor = elemVal ^ elemRor;                 // Xor elemVal and elemRor
+
+            // If we only have a two-bit change in elemROR then we can form a mask for this value
+            unsigned bitCount = 0;
+            UINT64   oneBit   = 0x1;
+            unsigned R        = elemWidth; // R is shift count for ROR (rotate right shift)
+            unsigned S        = 0;         // S is number of consecutive one bits
+            int      incr     = -1;
+
+            // Loop over the 'elemWidth' bits in 'elemRorXor'
+            //
+            for (unsigned bitNum = 0; bitNum < elemWidth; bitNum++)
+            {
+                if (incr == -1)
+                {
+                    R--; // We decrement R by one whenever incr is -1
+                }
+                if (bitCount == 1)
+                {
+                    S += incr; // We incr/decr S, after we find the first one bit in 'elemRorXor'
+                }
+
+                // Is this bit position a 1 bit in 'elemRorXor'?
+                //
+                if (oneBit & elemRorXor)
+                {
+                    bitCount++;
+                    // Is this the first 1 bit that we found in 'elemRorXor'?
+                    if (bitCount == 1)
+                    {
+                        // Does this 1 bit represent a transition to zero bits?
+                        bool toZeros = ((oneBit & elemVal) != 0);
+                        if (toZeros)
+                        {
+                            // S :: Count down from elemWidth
+                            S    = elemWidth;
+                            incr = -1;
+                        }
+                        else // this 1 bit represent a transition to one bits.
+                        {
+                            // S :: Count up from zero
+                            S    = 0;
+                            incr = +1;
+                        }
+                    }
+                    else // bitCount > 1
+                    {
+                        // We found the second (or third...) 1 bit in 'elemRorXor'
+                        incr = 0; // stop decrementing 'R'
+
+                        if (bitCount > 2)
+                        {
+                            // More than 2 transitions from 0/1 in 'elemVal'
+                            // This means that 'elemVal' can't be encoded
+                            // using a 'bitmask immediate'.
+                            //
+                            // Furthermore, it will continue to fail
+                            // with any larger 'len' that we try.
+                            // so just return false.
+                            //
+                            return false;
+                        }
+                    }
+                }
+
+                // shift oneBit left by one bit to test the next position
+                oneBit <<= 1;
+            }
+
+            // We expect that bitCount will always be two at this point
+            // but just in case return false for any bad cases.
+            //
+            assert(bitCount == 2);
+            if (bitCount != 2)
+                return false;
+
+            // Perform some sanity checks on the values of 'S' and 'R'
+            assert(S > 0);
+            assert(S < elemWidth);
+            assert(R < elemWidth);
+
+            // Does the caller want us to return the N,R,S encoding values?
+            //
+            if (wbBMI != nullptr)
+            {
+
+                // The encoding used for S is one less than the
+                //  number of consecutive one bits
+                S--;
+
+                if (len == 6)
+                {
+                    wbBMI->immN = 1;
+                }
+                else
+                {
+                    wbBMI->immN = 0;
+                    // The encoding used for 'S' here is a bit peculiar.
+                    //
+                    // The upper bits need to be complemented, followed by a zero bit
+                    // then the value of 'S-1'
+                    //
+                    unsigned upperBitsOfS = 64 - (1 << (len + 1));
+                    S |= upperBitsOfS;
+                }
+                wbBMI->immR = R;
+                wbBMI->immS = S;
+
+                // Verify that what we are returning is correct.
+                assert(imm == emitDecodeBitMaskImm(*wbBMI, size));
+            }
+            // Tell the caller that we can successfully encode this immediate
+            // using a 'bitmask immediate'.
+            //
+            return true;
+        }
+    }
+    return false;
+}
+
+/************************************************************************
+ *
+ *  Convert a 64-bit immediate into its 'bitmask immediate' representation imm(N,r,s)
+ */
+
+/*static*/ emitter::bitMaskImm emitter::emitEncodeBitMaskImm(INT64 imm, emitAttr size)
+{
+    emitter::bitMaskImm result;
+    result.immNRS = 0;
+
+    bool canEncode = canEncodeBitMaskImm(imm, size, &result);
+    assert(canEncode);
+
+    return result;
+}
+
+/************************************************************************
+ *
+ *  Convert an imm(i16,hw) into a 32/64-bit immediate
+ *  inputs 'hwImm' a halfwordImm struct
+ *         'size' specifies the size of the result (64 or 32 bits)
+ */
+
+/*static*/ INT64 emitter::emitDecodeHalfwordImm(const emitter::halfwordImm hwImm, emitAttr size)
+{
+    assert(isValidGeneralDatasize(size)); // Only EA_4BYTE or EA_8BYTE forms
+
+    unsigned hw  = hwImm.immHW;
+    INT64    val = (INT64)hwImm.immVal;
+
+    assert((hw <= 1) || (size == EA_8BYTE));
+
+    INT64 result = val << (16 * hw);
+    return result;
+}
+
+/************************************************************************
+ *
+ *  returns true if 'imm' of 'size' bits (32/64) can be encoded
+ *  using the ARM64 'halfword immediate' form.
+ *  When a non-null value is passed for 'wbHWI' then this method
+ *  writes back the 'immHW' and 'immVal' values use to encode this immediate
+ *
+ */
+
+/*static*/ bool emitter::canEncodeHalfwordImm(INT64 imm, emitAttr size, emitter::halfwordImm* wbHWI)
+{
+    assert(isValidGeneralDatasize(size)); // Only EA_4BYTE or EA_8BYTE forms
+
+    unsigned immWidth = (size == EA_8BYTE) ? 64 : 32;
+    unsigned maxHW    = (size == EA_8BYTE) ? 4 : 2;
+
+    // setup immMask to a (EA_4BYTE) 0x00000000_FFFFFFFF or (EA_8BYTE) 0xFFFFFFFF_FFFFFFFF
+    const UINT64 immMask = ((UINT64)-1) >> (64 - immWidth);
+    const INT64  mask16  = (INT64)0xFFFF;
+
+    imm = normalizeImm64(imm, size);
+
+    // Try each of the valid hw shift sizes
+    for (unsigned hw = 0; (hw < maxHW); hw++)
+    {
+        INT64 curMask   = mask16 << (hw * 16); // Represents the mask of the bits in the current halfword
+        INT64 checkBits = immMask & ~curMask;
+
+        // Excluding the current halfword (using ~curMask)
+        //  does the immediate have zero bits in every other bit that we care about?
+        //  note we care about all 64-bits for EA_8BYTE
+        //  and we care about the lowest 32 bits for EA_4BYTE
+        //
+        if ((imm & checkBits) == 0)
+        {
+            // Does the caller want us to return the imm(i16,hw) encoding values?
+            //
+            if (wbHWI != nullptr)
+            {
+                INT64 val     = ((imm & curMask) >> (hw * 16)) & mask16;
+                wbHWI->immHW  = hw;
+                wbHWI->immVal = val;
+
+                // Verify that what we are returning is correct.
+                assert(imm == emitDecodeHalfwordImm(*wbHWI, size));
+            }
+            // Tell the caller that we can successfully encode this immediate
+            // using a 'halfword immediate'.
+            //
+            return true;
+        }
+    }
+    return false;
+}
+
+/************************************************************************
+ *
+ *  Convert a 64-bit immediate into its 'halfword immediate' representation imm(i16,hw)
+ */
+
+/*static*/ emitter::halfwordImm emitter::emitEncodeHalfwordImm(INT64 imm, emitAttr size)
+{
+    emitter::halfwordImm result;
+    result.immHWVal = 0;
+
+    bool canEncode = canEncodeHalfwordImm(imm, size, &result);
+    assert(canEncode);
+
+    return result;
+}
+
+/************************************************************************
+ *
+ *  Convert an imm(i8,sh) into a 16/32-bit immediate
+ *  inputs 'bsImm' a byteShiftedImm struct
+ *         'size' specifies the size of the result (16 or 32 bits)
+ */
+
+/*static*/ INT32 emitter::emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size)
+{
+    bool     onesShift = (bsImm.immOnes == 1);
+    unsigned bySh      = bsImm.immBY;         // Num Bytes to shift 0,1,2,3
+    INT32    val       = (INT32)bsImm.immVal; // 8-bit immediate
+    INT32    result    = val;
+
+    if (bySh > 0)
+    {
+        assert((size == EA_2BYTE) || (size == EA_4BYTE)); // Only EA_2BYTE or EA_4BYTE forms
+        if (size == EA_2BYTE)
+        {
+            assert(bySh < 2);
+        }
+        else
+        {
+            assert(bySh < 4);
+        }
+
+        result <<= (8 * bySh);
+
+        if (onesShift)
+        {
+            result |= ((1 << (8 * bySh)) - 1);
+        }
+    }
+    return result;
+}
+
+/************************************************************************
+ *
+ *  returns true if 'imm' of 'size' bits (16/32) can be encoded
+ *  using the ARM64 'byteShifted immediate' form.
+ *  When a non-null value is passed for 'wbBSI' then this method
+ *  writes back the 'immBY' and 'immVal' values use to encode this immediate
+ *
+ */
+
+/*static*/ bool emitter::canEncodeByteShiftedImm(INT64                    imm,
+                                                 emitAttr                 size,
+                                                 bool                     allow_MSL,
+                                                 emitter::byteShiftedImm* wbBSI)
+{
+    bool     canEncode = false;
+    bool     onesShift = false; // true if we use the shifting ones variant
+    unsigned bySh      = 0;     // number of bytes to shift: 0, 1, 2, 3
+    unsigned imm8      = 0;     // immediate to use in the encoding
+
+    imm = normalizeImm64(imm, size);
+
+    if (size == EA_1BYTE)
+    {
+        imm8 = (unsigned)imm;
+        assert(imm8 < 0x100);
+        canEncode = true;
+    }
+    else if (size == EA_8BYTE)
+    {
+        imm8 = (unsigned)imm;
+        assert(imm8 < 0x100);
+        canEncode = true;
+    }
+    else
+    {
+        assert((size == EA_2BYTE) || (size == EA_4BYTE)); // Only EA_2BYTE or EA_4BYTE forms
+
+        unsigned immWidth = (size == EA_4BYTE) ? 32 : 16;
+        unsigned maxBY    = (size == EA_4BYTE) ? 4 : 2;
+
+        // setup immMask to a (EA_2BYTE) 0x0000FFFF or (EA_4BYTE) 0xFFFFFFFF
+        const UINT32 immMask = ((UINT32)-1) >> (32 - immWidth);
+        const INT32  mask8   = (INT32)0xFF;
+
+        // Try each of the valid by shift sizes
+        for (bySh = 0; (bySh < maxBY); bySh++)
+        {
+            INT32 curMask   = mask8 << (bySh * 8); // Represents the mask of the bits in the current byteShifted
+            INT32 checkBits = immMask & ~curMask;
+            INT32 immCheck  = (imm & checkBits);
+
+            // Excluding the current byte (using ~curMask)
+            //  does the immediate have zero bits in every other bit that we care about?
+            //  or can be use the shifted one variant?
+            //  note we care about all 32-bits for EA_4BYTE
+            //  and we care about the lowest 16 bits for EA_2BYTE
+            //
+            if (immCheck == 0)
+            {
+                canEncode = true;
+            }
+            if (allow_MSL)
+            {
+                if ((bySh == 1) && (immCheck == 0xFF))
+                {
+                    canEncode = true;
+                    onesShift = true;
+                }
+                else if ((bySh == 2) && (immCheck == 0xFFFF))
+                {
+                    canEncode = true;
+                    onesShift = true;
+                }
+            }
+            if (canEncode)
+            {
+                imm8 = (unsigned)(((imm & curMask) >> (bySh * 8)) & mask8);
+                break;
+            }
+        }
+    }
+
+    if (canEncode)
+    {
+        // Does the caller want us to return the imm(i8,bySh) encoding values?
+        //
+        if (wbBSI != nullptr)
+        {
+            wbBSI->immOnes = onesShift;
+            wbBSI->immBY   = bySh;
+            wbBSI->immVal  = imm8;
+
+            // Verify that what we are returning is correct.
+            assert(imm == emitDecodeByteShiftedImm(*wbBSI, size));
+        }
+        // Tell the caller that we can successfully encode this immediate
+        // using a 'byteShifted immediate'.
+        //
+        return true;
+    }
+    return false;
+}
+
+/************************************************************************
+ *
+ *  Convert a 32-bit immediate into its 'byteShifted immediate' representation imm(i8,by)
+ */
+
+/*static*/ emitter::byteShiftedImm emitter::emitEncodeByteShiftedImm(INT64 imm, emitAttr size, bool allow_MSL)
+{
+    emitter::byteShiftedImm result;
+    result.immBSVal = 0;
+
+    bool canEncode = canEncodeByteShiftedImm(imm, size, allow_MSL, &result);
+    assert(canEncode);
+
+    return result;
+}
+
+/************************************************************************
+ *
+ *  Convert a 'float 8-bit immediate' into a double.
+ *  inputs 'fpImm' a floatImm8 struct
+ */
+
+/*static*/ double emitter::emitDecodeFloatImm8(const emitter::floatImm8 fpImm)
+{
+    unsigned sign  = fpImm.immSign;
+    unsigned exp   = fpImm.immExp ^ 0x4;
+    unsigned mant  = fpImm.immMant + 16;
+    unsigned scale = 16 * 8;
+
+    while (exp > 0)
+    {
+        scale /= 2;
+        exp--;
+    }
+
+    double result = ((double)mant) / ((double)scale);
+    if (sign == 1)
+    {
+        result = -result;
+    }
+
+    return result;
+}
+
+/************************************************************************
+ *
+ *  returns true if the 'immDbl' can be encoded using the 'float 8-bit immediate' form.
+ *  also returns the encoding if wbFPI is non-null
+ *
+ */
+
+/*static*/ bool emitter::canEncodeFloatImm8(double immDbl, emitter::floatImm8* wbFPI)
+{
+    bool   canEncode = false;
+    double val       = immDbl;
+
+    int sign = 0;
+    if (val < 0.0)
+    {
+        val  = -val;
+        sign = 1;
+    }
+
+    int exp = 0;
+    while ((val < 1.0) && (exp >= -4))
+    {
+        val *= 2.0;
+        exp--;
+    }
+    while ((val >= 2.0) && (exp <= 5))
+    {
+        val *= 0.5;
+        exp++;
+    }
+    exp += 3;
+    val *= 16.0;
+    int ival = (int)val;
+
+    if ((exp >= 0) && (exp <= 7))
+    {
+        if (val == (double)ival)
+        {
+            canEncode = true;
+
+            if (wbFPI != nullptr)
+            {
+                ival -= 16;
+                assert((ival >= 0) && (ival <= 15));
+
+                wbFPI->immSign = sign;
+                wbFPI->immExp  = exp ^ 0x4;
+                wbFPI->immMant = ival;
+                unsigned imm8  = wbFPI->immFPIVal;
+                assert((imm8 >= 0) && (imm8 <= 0xff));
+            }
+        }
+    }
+
+    return canEncode;
+}
+
+/************************************************************************
+ *
+ *  Convert a double into its 'float 8-bit immediate' representation
+ */
+
+/*static*/ emitter::floatImm8 emitter::emitEncodeFloatImm8(double immDbl)
+{
+    emitter::floatImm8 result;
+    result.immFPIVal = 0;
+
+    bool canEncode = canEncodeFloatImm8(immDbl, &result);
+    assert(canEncode);
+
+    return result;
+}
+
+/*****************************************************************************
+ *
+ *  For the given 'ins' returns the reverse instruction
+ *  if one exists, otherwise returns INS_INVALID
+ */
+
+/*static*/ instruction emitter::insReverse(instruction ins)
+{
+    switch (ins)
+    {
+        case INS_add:
+            return INS_sub;
+        case INS_adds:
+            return INS_subs;
+
+        case INS_sub:
+            return INS_add;
+        case INS_subs:
+            return INS_adds;
+
+        case INS_cmp:
+            return INS_cmn;
+        case INS_cmn:
+            return INS_cmp;
+
+        case INS_ccmp:
+            return INS_ccmn;
+        case INS_ccmn:
+            return INS_ccmp;
+
+        default:
+            return INS_invalid;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  For the given 'datasize' and 'elemsize', make the proper arrangement option
+ *  returns the insOpts that specifies the vector register arrangement
+ *  if one does not exist returns INS_OPTS_NONE
+ */
+
+/*static*/ insOpts emitter::optMakeArrangement(emitAttr datasize, emitAttr elemsize)
+{
+    insOpts result = INS_OPTS_NONE;
+
+    if (datasize == EA_8BYTE)
+    {
+        switch (elemsize)
+        {
+            case EA_1BYTE:
+                result = INS_OPTS_8B;
+                break;
+            case EA_2BYTE:
+                result = INS_OPTS_4H;
+                break;
+            case EA_4BYTE:
+                result = INS_OPTS_2S;
+                break;
+            case EA_8BYTE:
+                result = INS_OPTS_1D;
+                break;
+            default:
+                // TODO-Cleanup: add unreached() here
+                break;
+        }
+    }
+    else if (datasize == EA_16BYTE)
+    {
+        switch (elemsize)
+        {
+            case EA_1BYTE:
+                result = INS_OPTS_16B;
+                break;
+            case EA_2BYTE:
+                result = INS_OPTS_8H;
+                break;
+            case EA_4BYTE:
+                result = INS_OPTS_4S;
+                break;
+            case EA_8BYTE:
+                result = INS_OPTS_2D;
+                break;
+            default:
+                // TODO-Cleanup: add unreached() here
+                break;
+        }
+    }
+    return result;
+}
+
+/*****************************************************************************
+ *
+ *  For the given 'datasize' and arrangement 'opts'
+ *  returns true is the pair spcifies a valid arrangement
+ */
+/*static*/ bool emitter::isValidArrangement(emitAttr datasize, insOpts opt)
+{
+    if (datasize == EA_8BYTE)
+    {
+        if ((opt == INS_OPTS_8B) || (opt == INS_OPTS_4H) || (opt == INS_OPTS_2S) || (opt == INS_OPTS_1D))
+        {
+            return true;
+        }
+    }
+    else if (datasize == EA_16BYTE)
+    {
+        if ((opt == INS_OPTS_16B) || (opt == INS_OPTS_8H) || (opt == INS_OPTS_4S) || (opt == INS_OPTS_2D))
+        {
+            return true;
+        }
+    }
+    return false;
+}
+
+//  For the given 'arrangement' returns the 'datasize' specified by the vector register arrangement
+//  asserts and returns EA_UNKNOWN if an invalid 'arrangement' value is passed
+//
+/*static*/ emitAttr emitter::optGetDatasize(insOpts arrangement)
+{
+    if ((arrangement == INS_OPTS_8B) || (arrangement == INS_OPTS_4H) || (arrangement == INS_OPTS_2S) ||
+        (arrangement == INS_OPTS_1D))
+    {
+        return EA_8BYTE;
+    }
+    else if ((arrangement == INS_OPTS_16B) || (arrangement == INS_OPTS_8H) || (arrangement == INS_OPTS_4S) ||
+             (arrangement == INS_OPTS_2D))
+    {
+        return EA_16BYTE;
+    }
+    else
+    {
+        assert(!" invalid 'arrangement' value");
+        return EA_UNKNOWN;
+    }
+}
+
+//  For the given 'arrangement' returns the 'elemsize' specified by the vector register arrangement
+//  asserts and returns EA_UNKNOWN if an invalid 'arrangement' value is passed
+//
+/*static*/ emitAttr emitter::optGetElemsize(insOpts arrangement)
+{
+    if ((arrangement == INS_OPTS_8B) || (arrangement == INS_OPTS_16B))
+    {
+        return EA_1BYTE;
+    }
+    else if ((arrangement == INS_OPTS_4H) || (arrangement == INS_OPTS_8H))
+    {
+        return EA_2BYTE;
+    }
+    else if ((arrangement == INS_OPTS_2S) || (arrangement == INS_OPTS_4S))
+    {
+        return EA_4BYTE;
+    }
+    else if ((arrangement == INS_OPTS_1D) || (arrangement == INS_OPTS_2D))
+    {
+        return EA_8BYTE;
+    }
+    else
+    {
+        assert(!" invalid 'arrangement' value");
+        return EA_UNKNOWN;
+    }
+}
+
+//  For the given 'arrangement' returns the 'widen-arrangement' specified by the vector register arrangement
+//  asserts and returns INS_OPTS_NONE if an invalid 'arrangement' value is passed
+//
+/*static*/ insOpts emitter::optWidenElemsize(insOpts arrangement)
+{
+    if ((arrangement == INS_OPTS_8B) || (arrangement == INS_OPTS_16B))
+    {
+        return INS_OPTS_8H;
+    }
+    else if ((arrangement == INS_OPTS_4H) || (arrangement == INS_OPTS_8H))
+    {
+        return INS_OPTS_4S;
+    }
+    else if ((arrangement == INS_OPTS_2S) || (arrangement == INS_OPTS_4S))
+    {
+        return INS_OPTS_2D;
+    }
+    else
+    {
+        assert(!" invalid 'arrangement' value");
+        return INS_OPTS_NONE;
+    }
+}
+
+//  For the given 'conversion' returns the 'dstsize' specified by the conversion option
+/*static*/ emitAttr emitter::optGetDstsize(insOpts conversion)
+{
+    switch (conversion)
+    {
+        case INS_OPTS_S_TO_8BYTE:
+        case INS_OPTS_D_TO_8BYTE:
+        case INS_OPTS_4BYTE_TO_D:
+        case INS_OPTS_8BYTE_TO_D:
+        case INS_OPTS_S_TO_D:
+        case INS_OPTS_H_TO_D:
+
+            return EA_8BYTE;
+
+        case INS_OPTS_S_TO_4BYTE:
+        case INS_OPTS_D_TO_4BYTE:
+        case INS_OPTS_4BYTE_TO_S:
+        case INS_OPTS_8BYTE_TO_S:
+        case INS_OPTS_D_TO_S:
+        case INS_OPTS_H_TO_S:
+
+            return EA_4BYTE;
+
+        case INS_OPTS_S_TO_H:
+        case INS_OPTS_D_TO_H:
+
+            return EA_2BYTE;
+
+        default:
+            assert(!" invalid 'conversion' value");
+            return EA_UNKNOWN;
+    }
+}
+
+//  For the given 'conversion' returns the 'srcsize' specified by the conversion option
+/*static*/ emitAttr emitter::optGetSrcsize(insOpts conversion)
+{
+    switch (conversion)
+    {
+        case INS_OPTS_D_TO_8BYTE:
+        case INS_OPTS_D_TO_4BYTE:
+        case INS_OPTS_8BYTE_TO_D:
+        case INS_OPTS_8BYTE_TO_S:
+        case INS_OPTS_D_TO_S:
+        case INS_OPTS_D_TO_H:
+
+            return EA_8BYTE;
+
+        case INS_OPTS_S_TO_8BYTE:
+        case INS_OPTS_S_TO_4BYTE:
+        case INS_OPTS_4BYTE_TO_S:
+        case INS_OPTS_4BYTE_TO_D:
+        case INS_OPTS_S_TO_D:
+        case INS_OPTS_S_TO_H:
+
+            return EA_4BYTE;
+
+        case INS_OPTS_H_TO_S:
+        case INS_OPTS_H_TO_D:
+
+            return EA_2BYTE;
+
+        default:
+            assert(!" invalid 'conversion' value");
+            return EA_UNKNOWN;
+    }
+}
+
+//    For the given 'size' and 'index' returns true if it specifies a valid index for a vector register of 'size'
+/*static*/ bool emitter::isValidVectorIndex(emitAttr datasize, emitAttr elemsize, ssize_t index)
+{
+    assert(isValidVectorDatasize(datasize));
+    assert(isValidVectorElemsize(elemsize));
+
+    bool result = false;
+    if (index >= 0)
+    {
+        if (datasize == EA_8BYTE)
+        {
+            switch (elemsize)
+            {
+                case EA_1BYTE:
+                    result = (index < 8);
+                    break;
+                case EA_2BYTE:
+                    result = (index < 4);
+                    break;
+                case EA_4BYTE:
+                    result = (index < 2);
+                    break;
+                case EA_8BYTE:
+                    result = (index < 1);
+                    break;
+                default:
+                    // TODO-Cleanup: add unreached() here
+                    break;
+            }
+        }
+        else if (datasize == EA_16BYTE)
+        {
+            switch (elemsize)
+            {
+                case EA_1BYTE:
+                    result = (index < 16);
+                    break;
+                case EA_2BYTE:
+                    result = (index < 8);
+                    break;
+                case EA_4BYTE:
+                    result = (index < 4);
+                    break;
+                case EA_8BYTE:
+                    result = (index < 2);
+                    break;
+                default:
+                    // TODO-Cleanup: add unreached() here
+                    break;
+            }
+        }
+    }
+    return result;
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction with no operands.
+ */
+
+void emitter::emitIns(instruction ins)
+{
+    instrDesc* id  = emitNewInstrSmall(EA_8BYTE);
+    insFormat  fmt = emitInsFormat(ins);
+
+    assert(fmt == IF_SN_0A);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction with a single immediate value.
+ */
+
+void emitter::emitIns_I(instruction ins, emitAttr attr, ssize_t imm)
+{
+    insFormat fmt = IF_NONE;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_brk:
+            if ((imm & 0x0000ffff) == imm)
+            {
+                fmt = IF_SI_0A;
+            }
+            else
+            {
+                assert(!"Instruction cannot be encoded: IF_SI_0A");
+            }
+            break;
+        default:
+            // TODO-Cleanup: add unreached() here
+            break;
+    }
+    assert(fmt != IF_NONE);
+
+    instrDesc* id = emitNewInstrSC(attr, imm);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing a single register.
+ */
+
+void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg)
+{
+    emitAttr   size = EA_SIZE(attr);
+    insFormat  fmt  = IF_NONE;
+    instrDesc* id   = nullptr;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_br:
+        case INS_ret:
+            assert(isGeneralRegister(reg));
+            id = emitNewInstrSmall(attr);
+            id->idReg1(reg);
+            fmt = IF_BR_1A;
+            break;
+
+        default:
+            unreached();
+    }
+
+    assert(fmt != IF_NONE);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing a register and a constant.
+ */
+
+void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t imm, insOpts opt /* = INS_OPTS_NONE */)
+{
+    emitAttr  size      = EA_SIZE(attr);
+    emitAttr  elemsize  = EA_UNKNOWN;
+    insFormat fmt       = IF_NONE;
+    bool      canEncode = false;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        bitMaskImm     bmi;
+        halfwordImm    hwi;
+        byteShiftedImm bsi;
+        ssize_t        notOfImm;
+
+        case INS_tst:
+            assert(insOptsNone(opt));
+            assert(isGeneralRegister(reg));
+            bmi.immNRS = 0;
+            canEncode  = canEncodeBitMaskImm(imm, size, &bmi);
+            if (canEncode)
+            {
+                imm = bmi.immNRS;
+                assert(isValidImmNRS(imm, size));
+                fmt = IF_DI_1C;
+            }
+            break;
+
+        case INS_movk:
+        case INS_movn:
+        case INS_movz:
+            assert(isValidGeneralDatasize(size));
+            assert(insOptsNone(opt)); // No LSL here (you must use emitIns_R_I_I if a shift is needed)
+            assert(isGeneralRegister(reg));
+            assert(isValidUimm16(imm));
+
+            hwi.immHW  = 0;
+            hwi.immVal = imm;
+            assert(imm == emitDecodeHalfwordImm(hwi, size));
+
+            imm       = hwi.immHWVal;
+            canEncode = true;
+            fmt       = IF_DI_1B;
+            break;
+
+        case INS_mov:
+            assert(isValidGeneralDatasize(size));
+            assert(insOptsNone(opt)); // No explicit LSL here
+            // We will automatically determine the shift based upon the imm
+
+            // First try the standard 'halfword immediate' imm(i16,hw)
+            hwi.immHWVal = 0;
+            canEncode    = canEncodeHalfwordImm(imm, size, &hwi);
+            if (canEncode)
+            {
+                // uses a movz encoding
+                assert(isGeneralRegister(reg));
+                imm = hwi.immHWVal;
+                assert(isValidImmHWVal(imm, size));
+                fmt = IF_DI_1B;
+                break;
+            }
+
+            // Next try the ones-complement form of 'halfword immediate' imm(i16,hw)
+            notOfImm  = NOT_helper(imm, getBitWidth(size));
+            canEncode = canEncodeHalfwordImm(notOfImm, size, &hwi);
+            if (canEncode)
+            {
+                assert(isGeneralRegister(reg));
+                imm = hwi.immHWVal;
+                ins = INS_movn; // uses a movn encoding
+                assert(isValidImmHWVal(imm, size));
+                fmt = IF_DI_1B;
+                break;
+            }
+
+            // Finally try the 'bitmask immediate' imm(N,r,s)
+            bmi.immNRS = 0;
+            canEncode  = canEncodeBitMaskImm(imm, size, &bmi);
+            if (canEncode)
+            {
+                assert(isGeneralRegisterOrSP(reg));
+                reg = encodingSPtoZR(reg);
+                imm = bmi.immNRS;
+                assert(isValidImmNRS(imm, size));
+                fmt = IF_DI_1D;
+                break;
+            }
+            else
+            {
+                assert(!"Instruction cannot be encoded: mov imm");
+            }
+
+            break;
+
+        case INS_movi:
+            assert(isValidVectorDatasize(size));
+            assert(isVectorRegister(reg));
+            if (insOptsNone(opt) && (size == EA_8BYTE))
+            {
+                opt = INS_OPTS_1D;
+            }
+            assert(isValidArrangement(size, opt));
+            elemsize = optGetElemsize(opt);
+
+            if (elemsize == EA_8BYTE)
+            {
+                size_t   uimm = imm;
+                ssize_t  imm8 = 0;
+                unsigned pos  = 0;
+                canEncode     = true;
+                bool failed   = false;
+                while (uimm != 0)
+                {
+                    INT64 loByte = uimm & 0xFF;
+                    if (((loByte == 0) || (loByte == 0xFF)) && (pos < 8))
+                    {
+                        if (loByte == 0xFF)
+                        {
+                            imm8 |= (1 << pos);
+                        }
+                        uimm >>= 8;
+                        pos++;
+                    }
+                    else
+                    {
+                        canEncode = false;
+                        break;
+                    }
+                }
+                imm = imm8;
+                assert(isValidUimm8(imm));
+                fmt = IF_DV_1B;
+                break;
+            }
+            else
+            {
+                // Vector operation
+
+                // No explicit LSL/MSL is used for the immediate
+                // We will automatically determine the shift based upon the value of imm
+
+                // First try the standard 'byteShifted immediate' imm(i8,bySh)
+                bsi.immBSVal = 0;
+                canEncode    = canEncodeByteShiftedImm(imm, elemsize, true, &bsi);
+                if (canEncode)
+                {
+                    imm = bsi.immBSVal;
+                    assert(isValidImmBSVal(imm, size));
+                    fmt = IF_DV_1B;
+                    break;
+                }
+
+                // Next try the ones-complement form of the 'immediate' imm(i8,bySh)
+                if ((elemsize == EA_2BYTE) || (elemsize == EA_4BYTE)) // Only EA_2BYTE or EA_4BYTE forms
+                {
+                    notOfImm  = NOT_helper(imm, getBitWidth(elemsize));
+                    canEncode = canEncodeByteShiftedImm(notOfImm, elemsize, true, &bsi);
+                    if (canEncode)
+                    {
+                        imm = bsi.immBSVal;
+                        ins = INS_mvni; // uses a mvni encoding
+                        assert(isValidImmBSVal(imm, size));
+                        fmt = IF_DV_1B;
+                        break;
+                    }
+                }
+            }
+            break;
+
+        case INS_orr:
+        case INS_bic:
+        case INS_mvni:
+            assert(isValidVectorDatasize(size));
+            assert(isVectorRegister(reg));
+            assert(isValidArrangement(size, opt));
+            elemsize = optGetElemsize(opt);
+            assert((elemsize == EA_2BYTE) || (elemsize == EA_4BYTE)); // Only EA_2BYTE or EA_4BYTE forms
+
+            // Vector operation
+
+            // No explicit LSL/MSL is used for the immediate
+            // We will automatically determine the shift based upon the value of imm
+
+            // First try the standard 'byteShifted immediate' imm(i8,bySh)
+            bsi.immBSVal = 0;
+            canEncode    = canEncodeByteShiftedImm(imm, elemsize,
+                                                (ins == INS_mvni), // mvni supports the ones shifting variant (aka MSL)
+                                                &bsi);
+            if (canEncode)
+            {
+                imm = bsi.immBSVal;
+                assert(isValidImmBSVal(imm, size));
+                fmt = IF_DV_1B;
+                break;
+            }
+            break;
+
+        case INS_cmp:
+        case INS_cmn:
+            assert(insOptsNone(opt));
+            assert(isGeneralRegister(reg));
+
+            if (unsigned_abs(imm) <= 0x0fff)
+            {
+                if (imm < 0)
+                {
+                    ins = insReverse(ins);
+                    imm = -imm;
+                }
+                assert(isValidUimm12(imm));
+                canEncode = true;
+                fmt       = IF_DI_1A;
+            }
+            else if (canEncodeWithShiftImmBy12(imm)) // Try the shifted by 12 encoding
+            {
+                // Encoding will use a 12-bit left shift of the immediate
+                opt = INS_OPTS_LSL12;
+                if (imm < 0)
+                {
+                    ins = insReverse(ins);
+                    imm = -imm;
+                }
+                assert((imm & 0xfff) == 0);
+                imm >>= 12;
+                assert(isValidUimm12(imm));
+                canEncode = true;
+                fmt       = IF_DI_1A;
+            }
+            else
+            {
+                assert(!"Instruction cannot be encoded: IF_DI_1A");
+            }
+            break;
+
+        default:
+            // TODO-Cleanup: add unreached() here
+            break;
+
+    } // end switch (ins)
+
+    assert(canEncode);
+    assert(fmt != IF_NONE);
+
+    instrDesc* id = emitNewInstrSC(attr, imm);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(opt);
+
+    id->idReg1(reg);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing a register and a floating point constant.
+ */
+
+void emitter::emitIns_R_F(
+    instruction ins, emitAttr attr, regNumber reg, double immDbl, insOpts opt /* = INS_OPTS_NONE */)
+
+{
+    emitAttr  size      = EA_SIZE(attr);
+    emitAttr  elemsize  = EA_UNKNOWN;
+    insFormat fmt       = IF_NONE;
+    ssize_t   imm       = 0;
+    bool      canEncode = false;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        floatImm8 fpi;
+
+        case INS_fcmp:
+        case INS_fcmpe:
+            assert(insOptsNone(opt));
+            assert(isValidVectorElemsizeFloat(size));
+            assert(isVectorRegister(reg));
+            if (immDbl == 0.0)
+            {
+                canEncode = true;
+                fmt       = IF_DV_1C;
+            }
+            break;
+
+        case INS_fmov:
+            assert(isVectorRegister(reg));
+            fpi.immFPIVal = 0;
+            canEncode     = canEncodeFloatImm8(immDbl, &fpi);
+
+            if (insOptsAnyArrangement(opt))
+            {
+                // Vector operation
+                assert(isValidVectorDatasize(size));
+                assert(isValidArrangement(size, opt));
+                elemsize = optGetElemsize(opt);
+                assert(isValidVectorElemsizeFloat(elemsize));
+                assert(opt != INS_OPTS_1D); // Reserved encoding
+
+                if (canEncode)
+                {
+                    imm = fpi.immFPIVal;
+                    assert((imm >= 0) && (imm <= 0xff));
+                    fmt = IF_DV_1B;
+                }
+            }
+            else
+            {
+                // Scalar operation
+                assert(insOptsNone(opt));
+                assert(isValidVectorElemsizeFloat(size));
+
+                if (canEncode)
+                {
+                    imm = fpi.immFPIVal;
+                    assert((imm >= 0) && (imm <= 0xff));
+                    fmt = IF_DV_1A;
+                }
+            }
+            break;
+
+        default:
+            // TODO-Cleanup: add unreached() here
+            break;
+
+    } // end switch (ins)
+
+    assert(canEncode);
+    assert(fmt != IF_NONE);
+
+    instrDesc* id = emitNewInstrSC(attr, imm);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(opt);
+
+    id->idReg1(reg);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing two registers
+ */
+
+void emitter::emitIns_R_R(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insOpts opt /* = INS_OPTS_NONE */)
+{
+    emitAttr  size     = EA_SIZE(attr);
+    emitAttr  elemsize = EA_UNKNOWN;
+    insFormat fmt      = IF_NONE;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_mov:
+            assert(insOptsNone(opt));
+            // Is the mov even necessary?
+            if (reg1 == reg2)
+            {
+                // A mov with a EA_4BYTE has the side-effect of clearing the upper bits
+                // So only eliminate mov instructions that are not clearing the upper bits
+                //
+                if (isGeneralRegisterOrSP(reg1) && (size == EA_8BYTE))
+                {
+                    return;
+                }
+                else if (isVectorRegister(reg1) && (size == EA_16BYTE))
+                {
+                    return;
+                }
+            }
+
+            // Check for the 'mov' aliases for the vector registers
+            if (isVectorRegister(reg1))
+            {
+                if (isVectorRegister(reg2) && isValidVectorDatasize(size))
+                {
+                    return emitIns_R_R_R(INS_mov, size, reg1, reg2, reg2);
+                }
+                else
+                {
+                    return emitIns_R_R_I(INS_mov, size, reg1, reg2, 0);
+                }
+            }
+            else
+            {
+                if (isVectorRegister(reg2))
+                {
+                    assert(isGeneralRegister(reg1));
+                    return emitIns_R_R_I(INS_mov, size, reg1, reg2, 0);
+                }
+            }
+
+            // Is this a MOV to/from SP instruction?
+            if ((reg1 == REG_SP) || (reg2 == REG_SP))
+            {
+                assert(isGeneralRegisterOrSP(reg1));
+                assert(isGeneralRegisterOrSP(reg2));
+                reg1 = encodingSPtoZR(reg1);
+                reg2 = encodingSPtoZR(reg2);
+                fmt  = IF_DR_2G;
+            }
+            else
+            {
+                assert(insOptsNone(opt));
+                assert(isGeneralRegister(reg1));
+                assert(isGeneralRegisterOrZR(reg2));
+                fmt = IF_DR_2E;
+            }
+            break;
+
+        case INS_abs:
+        case INS_not:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            // for 'NOT' we can construct the arrangement: 8B or 16B
+            if ((ins == INS_not) && insOptsNone(opt))
+            {
+                assert(isValidVectorDatasize(size));
+                elemsize = EA_1BYTE;
+                opt      = optMakeArrangement(size, elemsize);
+            }
+            if (insOptsNone(opt))
+            {
+                // Scalar operation
+                assert(size == EA_8BYTE); // Only type D is supported
+                fmt = IF_DV_2L;
+            }
+            else
+            {
+                // Vector operation
+                assert(insOptsAnyArrangement(opt));
+                assert(isValidVectorDatasize(size));
+                assert(isValidArrangement(size, opt));
+                elemsize = optGetElemsize(opt);
+                if (ins == INS_not)
+                {
+                    assert(elemsize == EA_1BYTE);
+                }
+                fmt = IF_DV_2M;
+            }
+            break;
+
+        case INS_mvn:
+        case INS_neg:
+            if (isVectorRegister(reg1))
+            {
+                assert(isVectorRegister(reg2));
+                // for 'mvn' we can construct the arrangement: 8B or 16b
+                if ((ins == INS_mvn) && insOptsNone(opt))
+                {
+                    assert(isValidVectorDatasize(size));
+                    elemsize = EA_1BYTE;
+                    opt      = optMakeArrangement(size, elemsize);
+                }
+                if (insOptsNone(opt))
+                {
+                    // Scalar operation
+                    assert(size == EA_8BYTE); // Only type D is supported
+                    fmt = IF_DV_2L;
+                }
+                else
+                {
+                    // Vector operation
+                    assert(isValidVectorDatasize(size));
+                    assert(isValidArrangement(size, opt));
+                    elemsize = optGetElemsize(opt);
+                    if (ins == INS_mvn)
+                    {
+                        assert(elemsize == EA_1BYTE); // Only supports 8B or 16B
+                    }
+                    fmt = IF_DV_2M;
+                }
+                break;
+            }
+            __fallthrough;
+
+        case INS_negs:
+            assert(insOptsNone(opt));
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegisterOrZR(reg2));
+            fmt = IF_DR_2E;
+            break;
+
+        case INS_sxtw:
+            assert(size == EA_8BYTE);
+            __fallthrough;
+
+        case INS_sxtb:
+        case INS_sxth:
+        case INS_uxtb:
+        case INS_uxth:
+            assert(insOptsNone(opt));
+            assert(isValidGeneralDatasize(size));
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegister(reg2));
+            fmt = IF_DR_2H;
+            break;
+
+        case INS_sxtl:
+        case INS_sxtl2:
+        case INS_uxtl:
+        case INS_uxtl2:
+            return emitIns_R_R_I(ins, size, reg1, reg2, 0, opt);
+
+        case INS_cls:
+        case INS_clz:
+        case INS_rbit:
+        case INS_rev16:
+        case INS_rev32:
+        case INS_cnt:
+            if (isVectorRegister(reg1))
+            {
+                assert(isVectorRegister(reg2));
+                assert(isValidVectorDatasize(size));
+                assert(isValidArrangement(size, opt));
+                elemsize = optGetElemsize(opt);
+                if ((ins == INS_cls) || (ins == INS_clz))
+                {
+                    assert(elemsize != EA_8BYTE); // No encoding for type D
+                }
+                else if (ins == INS_rev32)
+                {
+                    assert((elemsize == EA_2BYTE) || (elemsize == EA_1BYTE));
+                }
+                else
+                {
+                    assert(elemsize == EA_1BYTE); // Only supports 8B or 16B
+                }
+                fmt = IF_DV_2M;
+                break;
+            }
+            if (ins == INS_cnt)
+            {
+                // Doesn't have general register version(s)
+                break;
+            }
+
+            __fallthrough;
+
+        case INS_rev:
+            assert(insOptsNone(opt));
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegister(reg2));
+            if (ins == INS_rev32)
+            {
+                assert(size == EA_8BYTE);
+            }
+            else
+            {
+                assert(isValidGeneralDatasize(size));
+            }
+            fmt = IF_DR_2G;
+            break;
+
+        case INS_rev64:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isValidVectorDatasize(size));
+            assert(isValidArrangement(size, opt));
+            elemsize = optGetElemsize(opt);
+            assert(elemsize != EA_8BYTE); // No encoding for type D
+            fmt = IF_DV_2M;
+            break;
+
+        case INS_ldr:
+        case INS_ldrb:
+        case INS_ldrh:
+        case INS_ldrsb:
+        case INS_ldrsh:
+        case INS_ldrsw:
+        case INS_str:
+        case INS_strb:
+        case INS_strh:
+
+        case INS_cmp:
+        case INS_cmn:
+        case INS_tst:
+            assert(insOptsNone(opt));
+            emitIns_R_R_I(ins, attr, reg1, reg2, 0, INS_OPTS_NONE);
+            return;
+
+        case INS_fmov:
+            assert(isValidVectorElemsizeFloat(size));
+
+            // Is the mov even necessary?
+            if (reg1 == reg2)
+            {
+                return;
+            }
+
+            if (isVectorRegister(reg1))
+            {
+                if (isVectorRegister(reg2))
+                {
+                    assert(insOptsNone(opt));
+                    fmt = IF_DV_2G;
+                }
+                else
+                {
+                    assert(isGeneralRegister(reg2));
+
+                    // if the optional conversion specifier is not present we calculate it
+                    if (opt == INS_OPTS_NONE)
+                    {
+                        opt = (size == EA_4BYTE) ? INS_OPTS_4BYTE_TO_S : INS_OPTS_8BYTE_TO_D;
+                    }
+                    assert(insOptsConvertIntToFloat(opt));
+
+                    fmt = IF_DV_2I;
+                }
+            }
+            else
+            {
+                assert(isGeneralRegister(reg1));
+                assert(isVectorRegister(reg2));
+
+                // if the optional conversion specifier is not present we calculate it
+                if (opt == INS_OPTS_NONE)
+                {
+                    opt = (size == EA_4BYTE) ? INS_OPTS_S_TO_4BYTE : INS_OPTS_D_TO_8BYTE;
+                }
+                assert(insOptsConvertFloatToInt(opt));
+
+                fmt = IF_DV_2H;
+            }
+            break;
+
+        case INS_fcmp:
+        case INS_fcmpe:
+            assert(insOptsNone(opt));
+            assert(isValidVectorElemsizeFloat(size));
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            fmt = IF_DV_2K;
+            break;
+
+        case INS_fcvtns:
+        case INS_fcvtnu:
+        case INS_fcvtas:
+        case INS_fcvtau:
+        case INS_fcvtps:
+        case INS_fcvtpu:
+        case INS_fcvtms:
+        case INS_fcvtmu:
+        case INS_fcvtzs:
+        case INS_fcvtzu:
+            if (insOptsAnyArrangement(opt))
+            {
+                // Vector operation
+                assert(isVectorRegister(reg1));
+                assert(isVectorRegister(reg2));
+                assert(isValidVectorDatasize(size));
+                assert(isValidArrangement(size, opt));
+                elemsize = optGetElemsize(opt);
+                assert(isValidVectorElemsizeFloat(elemsize));
+                assert(opt != INS_OPTS_1D); // Reserved encoding
+                fmt = IF_DV_2A;
+            }
+            else
+            {
+                // Scalar operation
+                assert(isVectorRegister(reg2));
+                if (isVectorRegister(reg1))
+                {
+                    assert(insOptsNone(opt));
+                    assert(isValidVectorElemsizeFloat(size));
+                    fmt = IF_DV_2G;
+                }
+                else
+                {
+                    assert(isGeneralRegister(reg1));
+                    assert(insOptsConvertFloatToInt(opt));
+                    assert(isValidVectorElemsizeFloat(size));
+                    fmt = IF_DV_2H;
+                }
+            }
+            break;
+
+        case INS_scvtf:
+        case INS_ucvtf:
+            if (insOptsAnyArrangement(opt))
+            {
+                // Vector operation
+                assert(isVectorRegister(reg1));
+                assert(isVectorRegister(reg2));
+                assert(isValidVectorDatasize(size));
+                assert(isValidArrangement(size, opt));
+                elemsize = optGetElemsize(opt);
+                assert(isValidVectorElemsizeFloat(elemsize));
+                assert(opt != INS_OPTS_1D); // Reserved encoding
+                fmt = IF_DV_2A;
+            }
+            else
+            {
+                // Scalar operation
+                assert(isVectorRegister(reg1));
+                if (isVectorRegister(reg2))
+                {
+                    assert(insOptsNone(opt));
+                    assert(isValidVectorElemsizeFloat(size));
+                    fmt = IF_DV_2G;
+                }
+                else
+                {
+                    assert(isGeneralRegister(reg2));
+                    assert(insOptsConvertIntToFloat(opt));
+                    assert(isValidVectorElemsizeFloat(size));
+                    fmt = IF_DV_2I;
+                }
+            }
+            break;
+
+        case INS_fabs:
+        case INS_fneg:
+        case INS_fsqrt:
+        case INS_frinta:
+        case INS_frinti:
+        case INS_frintm:
+        case INS_frintn:
+        case INS_frintp:
+        case INS_frintx:
+        case INS_frintz:
+            if (insOptsAnyArrangement(opt))
+            {
+                // Vector operation
+                assert(isVectorRegister(reg1));
+                assert(isVectorRegister(reg2));
+                assert(isValidVectorDatasize(size));
+                assert(isValidArrangement(size, opt));
+                elemsize = optGetElemsize(opt);
+                assert(isValidVectorElemsizeFloat(elemsize));
+                assert(opt != INS_OPTS_1D); // Reserved encoding
+                fmt = IF_DV_2A;
+            }
+            else
+            {
+                // Scalar operation
+                assert(insOptsNone(opt));
+                assert(isValidVectorElemsizeFloat(size));
+                assert(isVectorRegister(reg1));
+                assert(isVectorRegister(reg2));
+                fmt = IF_DV_2G;
+            }
+            break;
+
+        case INS_fcvt:
+            assert(insOptsConvertFloatToFloat(opt));
+            assert(isValidVectorFcvtsize(size));
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            fmt = IF_DV_2J;
+            break;
+
+        default:
+            // TODO-Cleanup: add unreached() here
+            break;
+
+    } // end switch (ins)
+
+    assert(fmt != IF_NONE);
+
+    instrDesc* id = emitNewInstrSmall(attr);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(opt);
+
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing a register and two constants.
+ */
+
+void emitter::emitIns_R_I_I(
+    instruction ins, emitAttr attr, regNumber reg, ssize_t imm1, ssize_t imm2, insOpts opt /* = INS_OPTS_NONE */)
+{
+    emitAttr  size   = EA_SIZE(attr);
+    insFormat fmt    = IF_NONE;
+    size_t    immOut = 0; // composed from imm1 and imm2 and stored in the instrDesc
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        bool        canEncode;
+        halfwordImm hwi;
+
+        case INS_mov:
+            ins = INS_movz; // INS_mov with LSL is an alias for INS_movz LSL
+            __fallthrough;
+
+        case INS_movk:
+        case INS_movn:
+        case INS_movz:
+            assert(isValidGeneralDatasize(size));
+            assert(isGeneralRegister(reg));
+            assert(isValidUimm16(imm1));
+            assert(insOptsLSL(opt)); // Must be INS_OPTS_LSL
+
+            if (size == EA_8BYTE)
+            {
+                assert((imm2 == 0) || (imm2 == 16) || // shift amount: 0, 16, 32 or 48
+                       (imm2 == 32) || (imm2 == 48));
+            }
+            else // EA_4BYTE
+            {
+                assert((imm2 == 0) || (imm2 == 16)); // shift amount: 0 or 16
+            }
+
+            hwi.immHWVal = 0;
+
+            switch (imm2)
+            {
+                case 0:
+                    hwi.immHW = 0;
+                    canEncode = true;
+                    break;
+
+                case 16:
+                    hwi.immHW = 1;
+                    canEncode = true;
+                    break;
+
+                case 32:
+                    hwi.immHW = 2;
+                    canEncode = true;
+                    break;
+
+                case 48:
+                    hwi.immHW = 3;
+                    canEncode = true;
+                    break;
+
+                default:
+                    canEncode = false;
+            }
+
+            if (canEncode)
+            {
+                hwi.immVal = imm1;
+
+                immOut = hwi.immHWVal;
+                assert(isValidImmHWVal(immOut, size));
+                fmt = IF_DI_1B;
+            }
+            break;
+
+        default:
+            // TODO-Cleanup: add unreached() here
+            break;
+
+    } // end switch (ins)
+
+    assert(fmt != IF_NONE);
+
+    instrDesc* id = emitNewInstrSC(attr, immOut);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+
+    id->idReg1(reg);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing two registers and a constant.
+ */
+
+void emitter::emitIns_R_R_I(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm, insOpts opt /* = INS_OPTS_NONE */)
+{
+    emitAttr  size       = EA_SIZE(attr);
+    emitAttr  elemsize   = EA_UNKNOWN;
+    insFormat fmt        = IF_NONE;
+    bool      isLdSt     = false;
+    bool      isSIMD     = false;
+    bool      isAddSub   = false;
+    bool      setFlags   = false;
+    unsigned  scale      = 0;
+    bool      unscaledOp = false;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        bool       canEncode;
+        bitMaskImm bmi;
+
+        case INS_mov:
+            // Check for the 'mov' aliases for the vector registers
+            assert(insOptsNone(opt));
+            assert(isValidVectorElemsize(size));
+            elemsize = size;
+            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+
+            if (isVectorRegister(reg1))
+            {
+                if (isGeneralRegisterOrZR(reg2))
+                {
+                    fmt = IF_DV_2C; // Alias for 'ins'
+                    break;
+                }
+                else if (isVectorRegister(reg2))
+                {
+                    fmt = IF_DV_2E; // Alias for 'dup'
+                    break;
+                }
+            }
+            else // isGeneralRegister(reg1)
+            {
+                assert(isGeneralRegister(reg1));
+                if (isVectorRegister(reg2))
+                {
+                    fmt = IF_DV_2B; // Alias for 'umov'
+                    break;
+                }
+            }
+            assert(!" invalid INS_mov operands");
+            break;
+
+        case INS_lsl:
+        case INS_lsr:
+        case INS_asr:
+            assert(insOptsNone(opt));
+            assert(isValidGeneralDatasize(size));
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegister(reg2));
+            assert(isValidImmShift(imm, size));
+            fmt = IF_DI_2D;
+            break;
+
+        case INS_ror:
+            assert(insOptsNone(opt));
+            assert(isValidGeneralDatasize(size));
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegister(reg2));
+            assert(isValidImmShift(imm, size));
+            fmt = IF_DI_2B;
+            break;
+
+        case INS_sshr:
+        case INS_ssra:
+        case INS_srshr:
+        case INS_srsra:
+        case INS_shl:
+        case INS_ushr:
+        case INS_usra:
+        case INS_urshr:
+        case INS_ursra:
+        case INS_sri:
+        case INS_sli:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            if (insOptsAnyArrangement(opt))
+            {
+                // Vector operation
+                assert(isValidVectorDatasize(size));
+                assert(isValidArrangement(size, opt));
+                elemsize = optGetElemsize(opt);
+                assert(isValidVectorElemsize(elemsize));
+                assert(isValidImmShift(imm, elemsize));
+                assert(opt != INS_OPTS_1D); // Reserved encoding
+                fmt = IF_DV_2O;
+                break;
+            }
+            else
+            {
+                // Scalar operation
+                assert(insOptsNone(opt));
+                assert(size == EA_8BYTE); // only supported size
+                assert(isValidImmShift(imm, size));
+                fmt = IF_DV_2N;
+            }
+            break;
+
+        case INS_sxtl:
+        case INS_uxtl:
+            assert(imm == 0);
+            __fallthrough;
+
+        case INS_shrn:
+        case INS_rshrn:
+        case INS_sshll:
+        case INS_ushll:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            // Vector operation
+            assert(size == EA_8BYTE);
+            assert(isValidArrangement(size, opt));
+            elemsize = optGetElemsize(opt);
+            assert(elemsize != EA_8BYTE); // Reserved encodings
+            assert(isValidVectorElemsize(elemsize));
+            assert(isValidImmShift(imm, elemsize));
+            fmt = IF_DV_2O;
+            break;
+
+        case INS_sxtl2:
+        case INS_uxtl2:
+            assert(imm == 0);
+            __fallthrough;
+
+        case INS_shrn2:
+        case INS_rshrn2:
+        case INS_sshll2:
+        case INS_ushll2:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            // Vector operation
+            assert(size == EA_16BYTE);
+            assert(isValidArrangement(size, opt));
+            elemsize = optGetElemsize(opt);
+            assert(elemsize != EA_8BYTE); // Reserved encodings
+            assert(isValidVectorElemsize(elemsize));
+            assert(isValidImmShift(imm, elemsize));
+            fmt = IF_DV_2O;
+            break;
+
+        case INS_mvn:
+        case INS_neg:
+        case INS_negs:
+            assert(isValidGeneralDatasize(size));
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegisterOrZR(reg2));
+
+            if (imm == 0)
+            {
+                assert(insOptsNone(opt)); // a zero imm, means no alu shift kind
+
+                fmt = IF_DR_2E;
+            }
+            else
+            {
+                if (ins == INS_mvn)
+                {
+                    assert(insOptsAnyShift(opt)); // a non-zero imm, must select shift kind
+                }
+                else // neg or negs
+                {
+                    assert(insOptsAluShift(opt)); // a non-zero imm, must select shift kind, can't use ROR
+                }
+                assert(isValidImmShift(imm, size));
+                fmt = IF_DR_2F;
+            }
+            break;
+
+        case INS_tst:
+            assert(isValidGeneralDatasize(size));
+            assert(isGeneralRegisterOrZR(reg1));
+            assert(isGeneralRegister(reg2));
+
+            if (insOptsAnyShift(opt))
+            {
+                assert(isValidImmShift(imm, size) && (imm != 0));
+                fmt = IF_DR_2B;
+            }
+            else
+            {
+                assert(insOptsNone(opt)); // a zero imm, means no alu shift kind
+                assert(imm == 0);
+                fmt = IF_DR_2A;
+            }
+            break;
+
+        case INS_cmp:
+        case INS_cmn:
+            assert(isValidGeneralDatasize(size));
+            assert(isGeneralRegisterOrSP(reg1));
+            assert(isGeneralRegister(reg2));
+
+            reg1 = encodingSPtoZR(reg1);
+            if (insOptsAnyExtend(opt))
+            {
+                assert((imm >= 0) && (imm <= 4));
+
+                fmt = IF_DR_2C;
+            }
+            else if (imm == 0)
+            {
+                assert(insOptsNone(opt)); // a zero imm, means no alu shift kind
+
+                fmt = IF_DR_2A;
+            }
+            else
+            {
+                assert(insOptsAnyShift(opt)); // a non-zero imm, must select shift kind
+                assert(isValidImmShift(imm, size));
+                fmt = IF_DR_2B;
+            }
+            break;
+
+        case INS_ands:
+        case INS_and:
+        case INS_eor:
+        case INS_orr:
+            assert(insOptsNone(opt));
+            assert(isGeneralRegister(reg2));
+            if (ins == INS_ands)
+            {
+                assert(isGeneralRegister(reg1));
+            }
+            else
+            {
+                assert(isGeneralRegisterOrSP(reg1));
+                reg1 = encodingSPtoZR(reg1);
+            }
+
+            bmi.immNRS = 0;
+            canEncode  = canEncodeBitMaskImm(imm, size, &bmi);
+            if (canEncode)
+            {
+                imm = bmi.immNRS;
+                assert(isValidImmNRS(imm, size));
+                fmt = IF_DI_2C;
+            }
+            break;
+
+        case INS_dup: // by element, imm selects the element of reg2
+            assert(isVectorRegister(reg1));
+            if (isVectorRegister(reg2))
+            {
+                if (insOptsAnyArrangement(opt))
+                {
+                    // Vector operation
+                    assert(isValidVectorDatasize(size));
+                    assert(isValidArrangement(size, opt));
+                    elemsize = optGetElemsize(opt);
+                    assert(isValidVectorElemsize(elemsize));
+                    assert(isValidVectorIndex(size, elemsize, imm));
+                    assert(opt != INS_OPTS_1D); // Reserved encoding
+                    fmt = IF_DV_2D;
+                    break;
+                }
+                else
+                {
+                    // Scalar operation
+                    assert(insOptsNone(opt));
+                    elemsize = size;
+                    assert(isValidVectorElemsize(elemsize));
+                    assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+                    fmt = IF_DV_2E;
+                    break;
+                }
+            }
+            __fallthrough;
+
+        case INS_ins: // (MOV from general)
+            assert(insOptsNone(opt));
+            assert(isValidVectorElemsize(size));
+            assert(isVectorRegister(reg1));
+            assert(isGeneralRegisterOrZR(reg2));
+            elemsize = size;
+            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+            fmt = IF_DV_2C;
+            break;
+
+        case INS_umov: // (MOV to general)
+            assert(insOptsNone(opt));
+            assert(isValidVectorElemsize(size));
+            assert(isGeneralRegister(reg1));
+            assert(isVectorRegister(reg2));
+            elemsize = size;
+            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+            fmt = IF_DV_2B;
+            break;
+
+        case INS_smov:
+            assert(insOptsNone(opt));
+            assert(isValidVectorElemsize(size));
+            assert(size != EA_8BYTE); // no encoding, use INS_umov
+            assert(isGeneralRegister(reg1));
+            assert(isVectorRegister(reg2));
+            elemsize = size;
+            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+            fmt = IF_DV_2B;
+            break;
+
+        case INS_add:
+        case INS_sub:
+            setFlags = false;
+            isAddSub = true;
+            break;
+
+        case INS_adds:
+        case INS_subs:
+            setFlags = true;
+            isAddSub = true;
+            break;
+
+        case INS_ldrsb:
+        case INS_ldursb:
+            // 'size' specifies how we sign-extend into 4 or 8 bytes of the target register
+            assert(isValidGeneralDatasize(size));
+            unscaledOp = (ins == INS_ldursb);
+            scale      = 0;
+            isLdSt     = true;
+            break;
+
+        case INS_ldrsh:
+        case INS_ldursh:
+            // 'size' specifies how we sign-extend into 4 or 8 bytes of the target register
+            assert(isValidGeneralDatasize(size));
+            unscaledOp = (ins == INS_ldursh);
+            scale      = 1;
+            isLdSt     = true;
+            break;
+
+        case INS_ldrsw:
+        case INS_ldursw:
+            // 'size' specifies how we sign-extend into 4 or 8 bytes of the target register
+            assert(size == EA_8BYTE);
+            unscaledOp = (ins == INS_ldursw);
+            scale      = 2;
+            isLdSt     = true;
+            break;
+
+        case INS_ldrb:
+        case INS_strb:
+            // size is ignored
+            unscaledOp = false;
+            scale      = 0;
+            isLdSt     = true;
+            break;
+
+        case INS_ldurb:
+        case INS_sturb:
+            // size is ignored
+            unscaledOp = true;
+            scale      = 0;
+            isLdSt     = true;
+            break;
+
+        case INS_ldrh:
+        case INS_strh:
+            // size is ignored
+            unscaledOp = false;
+            scale      = 1;
+            isLdSt     = true;
+            break;
+
+        case INS_ldurh:
+        case INS_sturh:
+            // size is ignored
+            unscaledOp = true;
+            scale      = 0;
+            isLdSt     = true;
+            break;
+
+        case INS_ldr:
+        case INS_str:
+            // Is the target a vector register?
+            if (isVectorRegister(reg1))
+            {
+                assert(isValidVectorLSDatasize(size));
+                assert(isGeneralRegisterOrSP(reg2));
+                isSIMD = true;
+            }
+            else
+            {
+                assert(isValidGeneralDatasize(size));
+            }
+            unscaledOp = false;
+            scale      = NaturalScale_helper(size);
+            isLdSt     = true;
+            break;
+
+        case INS_ldur:
+        case INS_stur:
+            // Is the target a vector register?
+            if (isVectorRegister(reg1))
+            {
+                assert(isValidVectorLSDatasize(size));
+                assert(isGeneralRegisterOrSP(reg2));
+                isSIMD = true;
+            }
+            else
+            {
+                assert(isValidGeneralDatasize(size));
+            }
+            unscaledOp = true;
+            scale      = 0;
+            isLdSt     = true;
+            break;
+
+        default:
+            // TODO-Cleanup: add unreached() here
+            break;
+
+    } // end switch (ins)
+
+    if (isLdSt)
+    {
+        assert(!isAddSub);
+
+        if (isSIMD)
+        {
+            assert(isValidVectorLSDatasize(size));
+            assert(isVectorRegister(reg1));
+            assert((scale >= 0) && (scale <= 4));
+        }
+        else
+        {
+            assert(isValidGeneralLSDatasize(size));
+            assert(isGeneralRegisterOrZR(reg1));
+            assert((scale >= 0) && (scale <= 3));
+        }
+
+        assert(isGeneralRegisterOrSP(reg2));
+
+        // Load/Store reserved encodings:
+        if (insOptsIndexed(opt))
+        {
+            assert(reg1 != reg2);
+        }
+
+        reg2 = encodingSPtoZR(reg2);
+
+        ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
+        if (imm == 0)
+        {
+            assert(insOptsNone(opt)); // PRE/POST Index doesn't make sense with an immediate of zero
+
+            fmt = IF_LS_2A;
+        }
+        else if (insOptsIndexed(opt) || unscaledOp || (imm < 0) || ((imm & mask) != 0))
+        {
+            if ((imm >= -256) && (imm <= 255))
+            {
+                fmt = IF_LS_2C;
+            }
+            else
+            {
+                assert(!"Instruction cannot be encoded: IF_LS_2C");
+            }
+        }
+        else if (imm > 0)
+        {
+            assert(insOptsNone(opt));
+            assert(!unscaledOp);
+
+            if (((imm & mask) == 0) && ((imm >> scale) < 0x1000))
+            {
+                imm >>= scale; // The immediate is scaled by the size of the ld/st
+
+                fmt = IF_LS_2B;
+            }
+            else
+            {
+                assert(!"Instruction cannot be encoded: IF_LS_2B");
+            }
+        }
+    }
+    else if (isAddSub)
+    {
+        assert(!isLdSt);
+        assert(insOptsNone(opt));
+
+        if (setFlags) // Can't encode SP with setFlags
+        {
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegister(reg2));
+        }
+        else
+        {
+            assert(isGeneralRegisterOrSP(reg1));
+            assert(isGeneralRegisterOrSP(reg2));
+
+            // Is it just a mov?
+            if (imm == 0)
+            {
+                // Is the mov even necessary?
+                if (reg1 != reg2)
+                {
+                    emitIns_R_R(INS_mov, attr, reg1, reg2);
+                }
+                return;
+            }
+
+            reg1 = encodingSPtoZR(reg1);
+            reg2 = encodingSPtoZR(reg2);
+        }
+
+        if (unsigned_abs(imm) <= 0x0fff)
+        {
+            if (imm < 0)
+            {
+                ins = insReverse(ins);
+                imm = -imm;
+            }
+            assert(isValidUimm12(imm));
+            fmt = IF_DI_2A;
+        }
+        else if (canEncodeWithShiftImmBy12(imm)) // Try the shifted by 12 encoding
+        {
+            // Encoding will use a 12-bit left shift of the immediate
+            opt = INS_OPTS_LSL12;
+            if (imm < 0)
+            {
+                ins = insReverse(ins);
+                imm = -imm;
+            }
+            assert((imm & 0xfff) == 0);
+            imm >>= 12;
+            assert(isValidUimm12(imm));
+            fmt = IF_DI_2A;
+        }
+        else
+        {
+            assert(!"Instruction cannot be encoded: IF_DI_2A");
+        }
+    }
+
+    assert(fmt != IF_NONE);
+
+    instrDesc* id = emitNewInstrSC(attr, imm);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(opt);
+
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+*
+*  Add an instruction referencing two registers and a constant.
+*  Also checks for a large immediate that needs a second instruction
+*  and will load it in reg1
+*
+*  - Supports instructions: add, adds, sub, subs, and, ands, eor and orr
+*  - Requires that reg1 is a general register and not SP or ZR
+*  - Requires that reg1 != reg2
+*/
+void emitter::emitIns_R_R_Imm(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm)
+{
+    assert(isGeneralRegister(reg1));
+    assert(reg1 != reg2);
+
+    bool immFits = true;
+
+    switch (ins)
+    {
+        case INS_add:
+        case INS_adds:
+        case INS_sub:
+        case INS_subs:
+            immFits = emitter::emitIns_valid_imm_for_add(imm, attr);
+            break;
+
+        case INS_ands:
+        case INS_and:
+        case INS_eor:
+        case INS_orr:
+            immFits = emitter::emitIns_valid_imm_for_alu(imm, attr);
+            break;
+
+        default:
+            assert(!"Unsupported instruction in emitIns_R_R_Imm");
+    }
+
+    if (immFits)
+    {
+        emitIns_R_R_I(ins, attr, reg1, reg2, imm);
+    }
+    else
+    {
+        // Load 'imm' into the reg1 register
+        // then issue:   'ins'  reg1, reg2, reg1
+        //
+        codeGen->instGen_Set_Reg_To_Imm(attr, reg1, imm);
+        emitIns_R_R_R(ins, attr, reg1, reg2, reg1);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing three registers.
+ */
+
+void emitter::emitIns_R_R_R(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, insOpts opt) /* = INS_OPTS_NONE */
+{
+    emitAttr  size     = EA_SIZE(attr);
+    emitAttr  elemsize = EA_UNKNOWN;
+    insFormat fmt      = IF_NONE;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_lsl:
+        case INS_lsr:
+        case INS_asr:
+        case INS_ror:
+        case INS_adc:
+        case INS_adcs:
+        case INS_sbc:
+        case INS_sbcs:
+        case INS_udiv:
+        case INS_sdiv:
+        case INS_mneg:
+        case INS_smull:
+        case INS_smnegl:
+        case INS_smulh:
+        case INS_umull:
+        case INS_umnegl:
+        case INS_umulh:
+        case INS_lslv:
+        case INS_lsrv:
+        case INS_asrv:
+        case INS_rorv:
+            assert(insOptsNone(opt));
+            assert(isValidGeneralDatasize(size));
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            fmt = IF_DR_3A;
+            break;
+
+        case INS_mul:
+            if (insOptsNone(opt))
+            {
+                // general register
+                assert(isValidGeneralDatasize(size));
+                assert(isGeneralRegister(reg1));
+                assert(isGeneralRegister(reg2));
+                assert(isGeneralRegister(reg3));
+                fmt = IF_DR_3A;
+                break;
+            }
+            __fallthrough;
+
+        case INS_mla:
+        case INS_mls:
+        case INS_pmul:
+            assert(insOptsAnyArrangement(opt));
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(isValidVectorDatasize(size));
+            assert(isValidArrangement(size, opt));
+            elemsize = optGetElemsize(opt);
+            if (ins == INS_pmul)
+            {
+                assert(elemsize == EA_1BYTE); // only supports 8B or 16B
+            }
+            else // INS_mul, INS_mla, INS_mls
+            {
+                assert(elemsize != EA_8BYTE); // can't use 2D or 1D
+            }
+            fmt = IF_DV_3A;
+            break;
+
+        case INS_add:
+        case INS_sub:
+            if (isVectorRegister(reg1))
+            {
+                assert(isVectorRegister(reg2));
+                assert(isVectorRegister(reg3));
+
+                if (insOptsAnyArrangement(opt))
+                {
+                    // Vector operation
+                    assert(opt != INS_OPTS_1D); // Reserved encoding
+                    assert(isValidVectorDatasize(size));
+                    assert(isValidArrangement(size, opt));
+                    fmt = IF_DV_3A;
+                }
+                else
+                {
+                    // Scalar operation
+                    assert(insOptsNone(opt));
+                    assert(size == EA_8BYTE);
+                    fmt = IF_DV_3E;
+                }
+                break;
+            }
+            __fallthrough;
+
+        case INS_adds:
+        case INS_subs:
+            emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, 0, INS_OPTS_NONE);
+            return;
+
+        case INS_saba:
+        case INS_sabd:
+        case INS_uaba:
+        case INS_uabd:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsAnyArrangement(opt));
+
+            // Vector operation
+            assert(isValidVectorDatasize(size));
+            assert(isValidArrangement(size, opt));
+            elemsize = optGetElemsize(opt);
+            assert(elemsize != EA_8BYTE); // can't use 2D or 1D
+
+            fmt = IF_DV_3A;
+            break;
+
+        case INS_mov:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(reg2 == reg3);
+            assert(isValidVectorDatasize(size));
+            // INS_mov is an alias for INS_orr (vector register)
+            if (opt == INS_OPTS_NONE)
+            {
+                elemsize = EA_1BYTE;
+                opt      = optMakeArrangement(size, elemsize);
+            }
+            assert(isValidArrangement(size, opt));
+            fmt = IF_DV_3C;
+            break;
+
+        case INS_and:
+        case INS_bic:
+        case INS_eor:
+        case INS_orr:
+        case INS_orn:
+            if (isVectorRegister(reg1))
+            {
+                assert(isValidVectorDatasize(size));
+                assert(isVectorRegister(reg2));
+                assert(isVectorRegister(reg3));
+                if (opt == INS_OPTS_NONE)
+                {
+                    elemsize = EA_1BYTE;
+                    opt      = optMakeArrangement(size, elemsize);
+                }
+                assert(isValidArrangement(size, opt));
+                fmt = IF_DV_3C;
+                break;
+            }
+            __fallthrough;
+
+        case INS_ands:
+        case INS_bics:
+        case INS_eon:
+            emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, 0, INS_OPTS_NONE);
+            return;
+
+        case INS_bsl:
+        case INS_bit:
+        case INS_bif:
+            assert(isValidVectorDatasize(size));
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            if (opt == INS_OPTS_NONE)
+            {
+                elemsize = EA_1BYTE;
+                opt      = optMakeArrangement(size, elemsize);
+            }
+            assert(isValidArrangement(size, opt));
+            fmt = IF_DV_3C;
+            break;
+
+        case INS_fadd:
+        case INS_fsub:
+        case INS_fdiv:
+        case INS_fmax:
+        case INS_fmin:
+        case INS_fabd:
+        case INS_fmul:
+        case INS_fmulx:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            if (insOptsAnyArrangement(opt))
+            {
+                // Vector operation
+                assert(isValidVectorDatasize(size));
+                assert(isValidArrangement(size, opt));
+                elemsize = optGetElemsize(opt);
+                assert(isValidVectorElemsizeFloat(elemsize));
+                assert(opt != INS_OPTS_1D); // Reserved encoding
+                fmt = IF_DV_3B;
+            }
+            else
+            {
+                // Scalar operation
+                assert(insOptsNone(opt));
+                assert(isValidScalarDatasize(size));
+                fmt = IF_DV_3D;
+            }
+            break;
+
+        case INS_fnmul:
+            // Scalar operation
+            assert(insOptsNone(opt));
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(isValidScalarDatasize(size));
+            fmt = IF_DV_3D;
+            break;
+
+        case INS_fmla:
+        case INS_fmls:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(insOptsAnyArrangement(opt)); // no scalar encoding, use 4-operand 'fmadd' or 'fmsub'
+
+            // Vector operation
+            assert(isValidVectorDatasize(size));
+            assert(isValidArrangement(size, opt));
+            elemsize = optGetElemsize(opt);
+            assert(isValidVectorElemsizeFloat(elemsize));
+            assert(opt != INS_OPTS_1D); // Reserved encoding
+            fmt = IF_DV_3B;
+            break;
+
+        case INS_ldr:
+        case INS_ldrb:
+        case INS_ldrh:
+        case INS_ldrsb:
+        case INS_ldrsh:
+        case INS_ldrsw:
+        case INS_str:
+        case INS_strb:
+        case INS_strh:
+            emitIns_R_R_R_Ext(ins, attr, reg1, reg2, reg3, opt);
+            return;
+
+        case INS_ldp:
+        case INS_ldpsw:
+        case INS_ldnp:
+        case INS_stp:
+        case INS_stnp:
+            emitIns_R_R_R_I(ins, attr, reg1, reg2, reg3, 0);
+            return;
+
+        default:
+            // TODO-Cleanup: add unreached() here
+            break;
+
+    } // end switch (ins)
+
+    assert(fmt != IF_NONE);
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(opt);
+
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idReg3(reg3);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing three registers and a constant.
+ */
+
+void emitter::emitIns_R_R_R_I(instruction ins,
+                              emitAttr    attr,
+                              regNumber   reg1,
+                              regNumber   reg2,
+                              regNumber   reg3,
+                              ssize_t     imm,
+                              insOpts     opt /* = INS_OPTS_NONE */)
+{
+    emitAttr  size     = EA_SIZE(attr);
+    emitAttr  elemsize = EA_UNKNOWN;
+    insFormat fmt      = IF_NONE;
+    bool      isLdSt   = false;
+    bool      isSIMD   = false;
+    bool      isAddSub = false;
+    bool      setFlags = false;
+    unsigned  scale    = 0;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_extr:
+            assert(insOptsNone(opt));
+            assert(isValidGeneralDatasize(size));
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            assert(isValidImmShift(imm, size));
+            fmt = IF_DR_3E;
+            break;
+
+        case INS_and:
+        case INS_ands:
+        case INS_eor:
+        case INS_orr:
+        case INS_bic:
+        case INS_bics:
+        case INS_eon:
+        case INS_orn:
+            assert(isValidGeneralDatasize(size));
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            assert(isValidImmShift(imm, size));
+            if (imm == 0)
+            {
+                assert(insOptsNone(opt)); // a zero imm, means no shift kind
+                fmt = IF_DR_3A;
+            }
+            else
+            {
+                assert(insOptsAnyShift(opt)); // a non-zero imm, must select shift kind
+                fmt = IF_DR_3B;
+            }
+            break;
+
+        case INS_fmul: // by element, imm[0..3] selects the element of reg3
+        case INS_fmla:
+        case INS_fmls:
+        case INS_fmulx:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            if (insOptsAnyArrangement(opt))
+            {
+                // Vector operation
+                assert(isValidVectorDatasize(size));
+                assert(isValidArrangement(size, opt));
+                elemsize = optGetElemsize(opt);
+                assert(isValidVectorElemsizeFloat(elemsize));
+                assert(isValidVectorIndex(size, elemsize, imm));
+                assert(opt != INS_OPTS_1D); // Reserved encoding
+                fmt = IF_DV_3BI;
+            }
+            else
+            {
+                // Scalar operation
+                assert(insOptsNone(opt));
+                assert(isValidScalarDatasize(size));
+                elemsize = size;
+                assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+                fmt = IF_DV_3DI;
+            }
+            break;
+
+        case INS_mul: // by element, imm[0..7] selects the element of reg3
+        case INS_mla:
+        case INS_mls:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            // Vector operation
+            assert(insOptsAnyArrangement(opt));
+            assert(isValidVectorDatasize(size));
+            assert(isValidArrangement(size, opt));
+            elemsize = optGetElemsize(opt);
+            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+            // Only has encodings for H or S elemsize
+            assert((elemsize == EA_2BYTE) || (elemsize == EA_4BYTE));
+            // Only has encodings for V0..V15
+            if ((elemsize == EA_2BYTE) && (reg3 >= REG_V16))
+            {
+                noway_assert(!"Invalid reg3");
+            }
+            fmt = IF_DV_3AI;
+            break;
+
+        case INS_add:
+        case INS_sub:
+            setFlags = false;
+            isAddSub = true;
+            break;
+
+        case INS_adds:
+        case INS_subs:
+            setFlags = true;
+            isAddSub = true;
+            break;
+
+        case INS_ldpsw:
+            scale  = 2;
+            isLdSt = true;
+            break;
+
+        case INS_ldnp:
+        case INS_stnp:
+            assert(insOptsNone(opt)); // Can't use Pre/Post index on these two instructions
+            __fallthrough;
+
+        case INS_ldp:
+        case INS_stp:
+            // Is the target a vector register?
+            if (isVectorRegister(reg1))
+            {
+                scale  = NaturalScale_helper(size);
+                isSIMD = true;
+            }
+            else
+            {
+                scale = (size == EA_8BYTE) ? 3 : 2;
+            }
+            isLdSt = true;
+            break;
+
+        default:
+            // TODO-Cleanup: add unreached() here
+            break;
+
+    } // end switch (ins)
+
+    if (isLdSt)
+    {
+        assert(!isAddSub);
+        assert(isGeneralRegisterOrSP(reg3));
+        assert(insOptsNone(opt) || insOptsIndexed(opt));
+
+        if (isSIMD)
+        {
+            assert(isValidVectorLSPDatasize(size));
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert((scale >= 2) && (scale <= 4));
+        }
+        else
+        {
+            assert(isValidGeneralDatasize(size));
+            assert(isGeneralRegisterOrZR(reg1));
+            assert(isGeneralRegisterOrZR(reg2));
+            assert((scale == 2) || (scale == 3));
+        }
+
+        // Load/Store Pair reserved encodings:
+        if (emitInsIsLoad(ins))
+        {
+            assert(reg1 != reg2);
+        }
+        if (insOptsIndexed(opt))
+        {
+            assert(reg1 != reg3);
+            assert(reg2 != reg3);
+        }
+
+        reg3 = encodingSPtoZR(reg3);
+
+        ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
+        if (imm == 0)
+        {
+            assert(insOptsNone(opt)); // PRE/POST Index doesn't make sense with an immediate of zero
+
+            fmt = IF_LS_3B;
+        }
+        else
+        {
+            if ((imm & mask) == 0)
+            {
+                imm >>= scale; // The immediate is scaled by the size of the ld/st
+
+                if ((imm >= -64) && (imm <= 63))
+                {
+                    fmt = IF_LS_3C;
+                }
+            }
+#ifdef DEBUG
+            if (fmt != IF_LS_3C)
+            {
+                assert(!"Instruction cannot be encoded: IF_LS_3C");
+            }
+#endif
+        }
+    }
+    else if (isAddSub)
+    {
+        bool reg2IsSP = (reg2 == REG_SP);
+        assert(!isLdSt);
+        assert(isValidGeneralDatasize(size));
+        assert(isGeneralRegister(reg3));
+
+        if (setFlags || insOptsAluShift(opt)) // Can't encode SP in reg1 with setFlags or AluShift option
+        {
+            assert(isGeneralRegisterOrZR(reg1));
+        }
+        else
+        {
+            assert(isGeneralRegisterOrSP(reg1));
+            reg1 = encodingSPtoZR(reg1);
+        }
+
+        if (insOptsAluShift(opt)) // Can't encode SP in reg2 with AluShift option
+        {
+            assert(isGeneralRegister(reg2));
+        }
+        else
+        {
+            assert(isGeneralRegisterOrSP(reg2));
+            reg2 = encodingSPtoZR(reg2);
+        }
+
+        if (insOptsAnyExtend(opt))
+        {
+            assert((imm >= 0) && (imm <= 4));
+
+            fmt = IF_DR_3C;
+        }
+        else if (insOptsAluShift(opt))
+        {
+            // imm should be non-zero and in [1..63]
+            assert(isValidImmShift(imm, size) && (imm != 0));
+            fmt = IF_DR_3B;
+        }
+        else if (imm == 0)
+        {
+            assert(insOptsNone(opt));
+
+            if (reg2IsSP)
+            {
+                // To encode the SP register as reg2 we must use the IF_DR_3C encoding
+                // and also specify a LSL of zero (imm == 0)
+                opt = INS_OPTS_LSL;
+                fmt = IF_DR_3C;
+            }
+            else
+            {
+                fmt = IF_DR_3A;
+            }
+        }
+        else
+        {
+            assert(!"Instruction cannot be encoded: Add/Sub IF_DR_3A");
+        }
+    }
+    assert(fmt != IF_NONE);
+
+    instrDesc* id = emitNewInstrCns(attr, imm);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(opt);
+
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idReg3(reg3);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing three registers, with an extend option
+ */
+
+void emitter::emitIns_R_R_R_Ext(instruction ins,
+                                emitAttr    attr,
+                                regNumber   reg1,
+                                regNumber   reg2,
+                                regNumber   reg3,
+                                insOpts     opt,         /* = INS_OPTS_NONE */
+                                int         shiftAmount) /* = -1 -- unset   */
+{
+    emitAttr  size   = EA_SIZE(attr);
+    insFormat fmt    = IF_NONE;
+    bool      isSIMD = false;
+    int       scale  = -1;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_ldrb:
+        case INS_ldrsb:
+        case INS_strb:
+            scale = 0;
+            break;
+
+        case INS_ldrh:
+        case INS_ldrsh:
+        case INS_strh:
+            scale = 1;
+            break;
+
+        case INS_ldrsw:
+            scale = 2;
+            break;
+
+        case INS_ldr:
+        case INS_str:
+            // Is the target a vector register?
+            if (isVectorRegister(reg1))
+            {
+                assert(isValidVectorLSDatasize(size));
+                scale  = NaturalScale_helper(size);
+                isSIMD = true;
+            }
+            else
+            {
+                assert(isValidGeneralDatasize(size));
+                scale = (size == EA_8BYTE) ? 3 : 2;
+            }
+
+            break;
+
+        default:
+            // TODO-Cleanup: add unreached() here
+            break;
+
+    } // end switch (ins)
+
+    assert(scale != -1);
+    assert(insOptsLSExtend(opt));
+
+    if (isSIMD)
+    {
+        assert(isValidVectorLSDatasize(size));
+        assert(isVectorRegister(reg1));
+    }
+    else
+    {
+        assert(isValidGeneralLSDatasize(size));
+        assert(isGeneralRegisterOrZR(reg1));
+    }
+
+    assert(isGeneralRegisterOrSP(reg2));
+    assert(isGeneralRegister(reg3));
+
+    // Load/Store reserved encodings:
+    if (insOptsIndexed(opt))
+    {
+        assert(reg1 != reg2);
+    }
+
+    if (shiftAmount == -1)
+    {
+        shiftAmount = insOptsLSL(opt) ? scale : 0;
+    }
+    assert((shiftAmount == scale) || (shiftAmount == 0));
+
+    reg2 = encodingSPtoZR(reg2);
+    fmt  = IF_LS_3A;
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(opt);
+
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idReg3(reg3);
+    id->idReg3Scaled(shiftAmount == scale);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing two registers and two constants.
+ */
+
+void emitter::emitIns_R_R_I_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int imm1, int imm2)
+{
+    emitAttr  size     = EA_SIZE(attr);
+    emitAttr  elemsize = EA_UNKNOWN;
+    insFormat fmt      = IF_NONE;
+    size_t    immOut   = 0; // composed from imm1 and imm2 and stored in the instrDesc
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        int        lsb;
+        int        width;
+        bitMaskImm bmi;
+
+        case INS_bfm:
+        case INS_sbfm:
+        case INS_ubfm:
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegister(reg2));
+            assert(isValidImmShift(imm1, size));
+            assert(isValidImmShift(imm2, size));
+            bmi.immNRS = 0;
+            bmi.immN   = (size == EA_8BYTE);
+            bmi.immR   = imm1;
+            bmi.immS   = imm2;
+            immOut     = bmi.immNRS;
+            fmt        = IF_DI_2D;
+            break;
+
+        case INS_bfi:
+        case INS_sbfiz:
+        case INS_ubfiz:
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegister(reg2));
+            lsb   = getBitWidth(size) - imm1;
+            width = imm2 - 1;
+            assert(isValidImmShift(lsb, size));
+            assert(isValidImmShift(width, size));
+            bmi.immNRS = 0;
+            bmi.immN   = (size == EA_8BYTE);
+            bmi.immR   = lsb;
+            bmi.immS   = width;
+            immOut     = bmi.immNRS;
+            fmt        = IF_DI_2D;
+            break;
+
+        case INS_bfxil:
+        case INS_sbfx:
+        case INS_ubfx:
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegister(reg2));
+            lsb   = imm1;
+            width = imm2 + imm1 - 1;
+            assert(isValidImmShift(lsb, size));
+            assert(isValidImmShift(width, size));
+            bmi.immNRS = 0;
+            bmi.immN   = (size == EA_8BYTE);
+            bmi.immR   = imm1;
+            bmi.immS   = imm2 + imm1 - 1;
+            immOut     = bmi.immNRS;
+            fmt        = IF_DI_2D;
+            break;
+
+        case INS_mov:
+        case INS_ins:
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            elemsize = size;
+            assert(isValidVectorElemsize(elemsize));
+            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm1));
+            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm2));
+            immOut = (imm1 << 4) + imm2;
+            fmt    = IF_DV_2F;
+            break;
+
+        default:
+            // TODO-Cleanup: add unreached() here
+            break;
+    }
+    assert(fmt != IF_NONE);
+
+    instrDesc* id = emitNewInstrSC(attr, immOut);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing four registers.
+ */
+
+void emitter::emitIns_R_R_R_R(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, regNumber reg4)
+{
+    emitAttr  size = EA_SIZE(attr);
+    insFormat fmt  = IF_NONE;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_madd:
+        case INS_msub:
+        case INS_smaddl:
+        case INS_smsubl:
+        case INS_umaddl:
+        case INS_umsubl:
+            assert(isValidGeneralDatasize(size));
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            assert(isGeneralRegister(reg4));
+            fmt = IF_DR_4A;
+            break;
+
+        case INS_fmadd:
+        case INS_fmsub:
+        case INS_fnmadd:
+        case INS_fnmsub:
+            // Scalar operation
+            assert(isValidScalarDatasize(size));
+            assert(isVectorRegister(reg1));
+            assert(isVectorRegister(reg2));
+            assert(isVectorRegister(reg3));
+            assert(isVectorRegister(reg4));
+            fmt = IF_DV_4A;
+            break;
+
+        case INS_invalid:
+            fmt = IF_NONE;
+            break;
+
+        default:
+            // TODO-Cleanup: add unreached() here
+            break;
+    }
+    assert(fmt != IF_NONE);
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idReg3(reg3);
+    id->idReg4(reg4);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing a register and a condition code
+ */
+
+void emitter::emitIns_R_COND(instruction ins, emitAttr attr, regNumber reg, insCond cond)
+{
+    emitAttr     size = EA_SIZE(attr);
+    insFormat    fmt  = IF_NONE;
+    condFlagsImm cfi;
+    cfi.immCFVal = 0;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_cset:
+        case INS_csetm:
+            assert(isGeneralRegister(reg));
+            cfi.cond = cond;
+            fmt      = IF_DR_1D;
+            break;
+
+        default:
+            // TODO-Cleanup: add unreached() here
+            break;
+
+    } // end switch (ins)
+
+    assert(fmt != IF_NONE);
+    assert(isValidImmCond(cfi.immCFVal));
+
+    instrDesc* id = emitNewInstrSC(attr, cfi.immCFVal);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(INS_OPTS_NONE);
+
+    id->idReg1(reg);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing two registers and a condition code
+ */
+
+void emitter::emitIns_R_R_COND(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insCond cond)
+{
+    emitAttr     size = EA_SIZE(attr);
+    insFormat    fmt  = IF_NONE;
+    condFlagsImm cfi;
+    cfi.immCFVal = 0;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_cinc:
+        case INS_cinv:
+        case INS_cneg:
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegister(reg2));
+            cfi.cond = cond;
+            fmt      = IF_DR_2D;
+            break;
+        default:
+            // TODO-Cleanup: add unreached() here
+            break;
+
+    } // end switch (ins)
+
+    assert(fmt != IF_NONE);
+    assert(isValidImmCond(cfi.immCFVal));
+
+    instrDesc* id = emitNewInstrSC(attr, cfi.immCFVal);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(INS_OPTS_NONE);
+
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing two registers and a condition code
+ */
+
+void emitter::emitIns_R_R_R_COND(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, insCond cond)
+{
+    emitAttr     size = EA_SIZE(attr);
+    insFormat    fmt  = IF_NONE;
+    condFlagsImm cfi;
+    cfi.immCFVal = 0;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_csel:
+        case INS_csinc:
+        case INS_csinv:
+        case INS_csneg:
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegister(reg2));
+            assert(isGeneralRegister(reg3));
+            cfi.cond = cond;
+            fmt      = IF_DR_3D;
+            break;
+
+        default:
+            // TODO-Cleanup: add unreached() here
+            break;
+
+    } // end switch (ins)
+
+    assert(fmt != IF_NONE);
+    assert(isValidImmCond(cfi.immCFVal));
+
+    instrDesc* id = emitNewInstr(attr);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(INS_OPTS_NONE);
+
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idReg3(reg3);
+    id->idSmallCns(cfi.immCFVal);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing two registers the flags and a condition code
+ */
+
+void emitter::emitIns_R_R_FLAGS_COND(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insCflags flags, insCond cond)
+{
+    emitAttr     size = EA_SIZE(attr);
+    insFormat    fmt  = IF_NONE;
+    condFlagsImm cfi;
+    cfi.immCFVal = 0;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_ccmp:
+        case INS_ccmn:
+            assert(isGeneralRegister(reg1));
+            assert(isGeneralRegister(reg2));
+            cfi.flags = flags;
+            cfi.cond  = cond;
+            fmt       = IF_DR_2I;
+            break;
+        default:
+            // TODO-Cleanup: add unreached() here
+            break;
+    } // end switch (ins)
+
+    assert(fmt != IF_NONE);
+    assert(isValidImmCondFlags(cfi.immCFVal));
+
+    instrDesc* id = emitNewInstrSC(attr, cfi.immCFVal);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(INS_OPTS_NONE);
+
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing a register, an immediate, the flags and a condition code
+ */
+
+void emitter::emitIns_R_I_FLAGS_COND(
+    instruction ins, emitAttr attr, regNumber reg, int imm, insCflags flags, insCond cond)
+{
+    emitAttr     size = EA_SIZE(attr);
+    insFormat    fmt  = IF_NONE;
+    condFlagsImm cfi;
+    cfi.immCFVal = 0;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_ccmp:
+        case INS_ccmn:
+            assert(isGeneralRegister(reg));
+            if (imm < 0)
+            {
+                ins = insReverse(ins);
+                imm = -imm;
+            }
+            if ((imm >= 0) && (imm <= 31))
+            {
+                cfi.imm5  = imm;
+                cfi.flags = flags;
+                cfi.cond  = cond;
+                fmt       = IF_DI_1F;
+            }
+            else
+            {
+                assert(!"Instruction cannot be encoded: ccmp/ccmn imm5");
+            }
+            break;
+        default:
+            // TODO-Cleanup: add unreached() here
+            break;
+    } // end switch (ins)
+
+    assert(fmt != IF_NONE);
+    assert(isValidImmCondFlagsImm5(cfi.immCFVal));
+
+    instrDesc* id = emitNewInstrSC(attr, cfi.immCFVal);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(INS_OPTS_NONE);
+
+    id->idReg1(reg);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add a memory barrier instruction with a 'barrier' immediate
+ */
+
+void emitter::emitIns_BARR(instruction ins, insBarrier barrier)
+{
+    insFormat fmt = IF_NONE;
+    ssize_t   imm = 0;
+
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_dsb:
+        case INS_dmb:
+        case INS_isb:
+
+            fmt = IF_SI_0B;
+            imm = (ssize_t)barrier;
+            break;
+        default:
+            // TODO-Cleanup: add unreached() here
+            break;
+    } // end switch (ins)
+
+    assert(fmt != IF_NONE);
+
+    instrDesc* id = emitNewInstrSC(EA_8BYTE, imm);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(INS_OPTS_NONE);
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction with a static data member operand. If 'size' is 0, the
+ *  instruction operates on the address of the static member instead of its
+ *  value (e.g. "push offset clsvar", rather than "push dword ptr [clsvar]").
+ */
+
+void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs)
+{
+    NYI("emitIns_C");
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing stack-based local variable.
+ */
+
+void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs)
+{
+    NYI("emitIns_S");
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing a register and a stack-based local variable.
+ */
+void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs)
+{
+    emitAttr  size  = EA_SIZE(attr);
+    insFormat fmt   = IF_NONE;
+    int       disp  = 0;
+    unsigned  scale = 0;
+
+    assert(offs >= 0);
+
+    // TODO-ARM64-CQ: use unscaled loads?
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_strb:
+        case INS_ldrb:
+        case INS_ldrsb:
+            scale = 0;
+            break;
+
+        case INS_strh:
+        case INS_ldrh:
+        case INS_ldrsh:
+            scale = 1;
+            break;
+
+        case INS_ldrsw:
+            scale = 2;
+            break;
+
+        case INS_str:
+        case INS_ldr:
+            assert(isValidGeneralDatasize(size));
+            scale = (size == EA_8BYTE) ? 3 : 2;
+            break;
+
+        case INS_lea:
+            assert(size == EA_8BYTE);
+            scale = 0;
+            break;
+
+        default:
+            NYI("emitIns_R_S"); // FP locals?
+            return;
+
+    } // end switch (ins)
+
+    /* Figure out the variable's frame position */
+    ssize_t imm;
+    int     base;
+    bool    FPbased;
+
+    base = emitComp->lvaFrameAddress(varx, &FPbased);
+    disp = base + offs;
+    assert((scale >= 0) && (scale <= 3));
+
+    regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE;
+    reg2           = encodingSPtoZR(reg2);
+
+    if (ins == INS_lea)
+    {
+        if (disp >= 0)
+        {
+            ins = INS_add;
+            imm = disp;
+        }
+        else
+        {
+            ins = INS_sub;
+            imm = -disp;
+        }
+
+        if (imm <= 0x0fff)
+        {
+            fmt = IF_DI_2A; // add reg1,reg2,#disp
+        }
+        else
+        {
+            regNumber rsvdReg = codeGen->rsGetRsvdReg();
+            codeGen->instGen_Set_Reg_To_Imm(size, rsvdReg, imm);
+            fmt = IF_DR_3A; // add reg1,reg2,rsvdReg
+        }
+    }
+    else
+    {
+        bool    useRegForImm = false;
+        ssize_t mask         = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
+
+        imm = disp;
+        if (imm == 0)
+        {
+            fmt = IF_LS_2A;
+        }
+        else if ((imm < 0) || ((imm & mask) != 0))
+        {
+            if ((imm >= -256) && (imm <= 255))
+            {
+                fmt = IF_LS_2C;
+            }
+            else
+            {
+                useRegForImm = true;
+            }
+        }
+        else if (imm > 0)
+        {
+            if (((imm & mask) == 0) && ((imm >> scale) < 0x1000))
+            {
+                imm >>= scale; // The immediate is scaled by the size of the ld/st
+
+                fmt = IF_LS_2B;
+            }
+            else
+            {
+                useRegForImm = true;
+            }
+        }
+
+        if (useRegForImm)
+        {
+            regNumber rsvdReg = codeGen->rsGetRsvdReg();
+            codeGen->instGen_Set_Reg_To_Imm(size, rsvdReg, imm);
+            fmt = IF_LS_3A;
+        }
+    }
+
+    assert(fmt != IF_NONE);
+
+    instrDesc* id = emitNewInstrCns(attr, imm);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(INS_OPTS_NONE);
+
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
+    id->idSetIsLclVar();
+
+#ifdef DEBUG
+    id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
+#endif
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing a stack-based local variable and a register
+ */
+void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs)
+{
+    assert(offs >= 0);
+    emitAttr  size          = EA_SIZE(attr);
+    insFormat fmt           = IF_NONE;
+    int       disp          = 0;
+    unsigned  scale         = 0;
+    bool      isVectorStore = false;
+
+    // TODO-ARM64-CQ: use unscaled loads?
+    /* Figure out the encoding format of the instruction */
+    switch (ins)
+    {
+        case INS_strb:
+            scale = 0;
+            assert(isGeneralRegisterOrZR(reg1));
+            break;
+
+        case INS_strh:
+            scale = 1;
+            assert(isGeneralRegisterOrZR(reg1));
+            break;
+
+        case INS_str:
+            if (isGeneralRegisterOrZR(reg1))
+            {
+                assert(isValidGeneralDatasize(size));
+                scale = (size == EA_8BYTE) ? 3 : 2;
+            }
+            else
+            {
+                assert(isVectorRegister(reg1));
+                assert(isValidVectorLSDatasize(size));
+                scale         = NaturalScale_helper(size);
+                isVectorStore = true;
+            }
+            break;
+
+        default:
+            NYI("emitIns_S_R"); // FP locals?
+            return;
+
+    } // end switch (ins)
+
+    /* Figure out the variable's frame position */
+    int  base;
+    bool FPbased;
+
+    base = emitComp->lvaFrameAddress(varx, &FPbased);
+    disp = base + offs;
+    assert(scale >= 0);
+    if (isVectorStore)
+    {
+        assert(scale <= 4);
+    }
+    else
+    {
+        assert(scale <= 3);
+    }
+
+    // TODO-ARM64-CQ: with compLocallocUsed, should we use REG_SAVED_LOCALLOC_SP instead?
+    regNumber reg2 = FPbased ? REG_FPBASE : REG_SPBASE;
+    reg2           = encodingSPtoZR(reg2);
+
+    bool    useRegForImm = false;
+    ssize_t imm          = disp;
+    ssize_t mask         = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
+    if (imm == 0)
+    {
+        fmt = IF_LS_2A;
+    }
+    else if ((imm < 0) || ((imm & mask) != 0))
+    {
+        if ((imm >= -256) && (imm <= 255))
+        {
+            fmt = IF_LS_2C;
+        }
+        else
+        {
+            useRegForImm = true;
+        }
+    }
+    else if (imm > 0)
+    {
+        if (((imm & mask) == 0) && ((imm >> scale) < 0x1000))
+        {
+            imm >>= scale; // The immediate is scaled by the size of the ld/st
+
+            fmt = IF_LS_2B;
+        }
+        else
+        {
+            useRegForImm = true;
+        }
+    }
+
+    if (useRegForImm)
+    {
+        // The reserved register is not stored in idReg3() since that field overlaps with iiaLclVar.
+        // It is instead implicit when idSetIsLclVar() is set, with this encoding format.
+        regNumber rsvdReg = codeGen->rsGetRsvdReg();
+        codeGen->instGen_Set_Reg_To_Imm(size, rsvdReg, imm);
+        fmt = IF_LS_3A;
+    }
+
+    assert(fmt != IF_NONE);
+
+    instrDesc* id = emitNewInstrCns(attr, imm);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(INS_OPTS_NONE);
+
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
+    id->idSetIsLclVar();
+
+#ifdef DEBUG
+    id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
+#endif
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing stack-based local variable and an immediate
+ */
+void emitter::emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val)
+{
+    NYI("emitIns_S_I");
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction with a register + static member operands.
+ *  Constant is stored into JIT data which is adjacent to code.
+ *  No relocation is needed. PC-relative offset will be encoded directly into instruction.
+ *
+ */
+void emitter::emitIns_R_C(
+    instruction ins, emitAttr attr, regNumber reg, regNumber addrReg, CORINFO_FIELD_HANDLE fldHnd, int offs)
+{
+    assert(offs >= 0);
+    assert(instrDesc::fitsInSmallCns(offs));
+
+    emitAttr      size = EA_SIZE(attr);
+    insFormat     fmt  = IF_NONE;
+    int           disp = 0;
+    instrDescJmp* id   = emitNewInstrJmp();
+
+    switch (ins)
+    {
+        case INS_adr:
+            // This is case to get address to the constant data.
+            fmt = IF_LARGEADR;
+            assert(isGeneralRegister(reg));
+            assert(isValidGeneralDatasize(size));
+            break;
+
+        case INS_ldr:
+            fmt = IF_LARGELDC;
+            if (isVectorRegister(reg))
+            {
+                assert(isValidScalarDatasize(size));
+                // For vector (float/double) register, we should have an integer address reg to
+                // compute long address which consists of page address and page offset.
+                // For integer constant, this is not needed since the dest reg can be used to
+                // compute address as well as contain the final contents.
+                assert(isGeneralRegister(reg) || (addrReg != REG_NA));
+            }
+            else
+            {
+                assert(isGeneralRegister(reg));
+                assert(isValidGeneralDatasize(size));
+            }
+            break;
+        default:
+            unreached();
+    }
+
+    assert(fmt != IF_NONE);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(INS_OPTS_NONE);
+    id->idSmallCns(offs);
+    id->idOpSize(size);
+    id->idAddr()->iiaFieldHnd = fldHnd;
+    id->idSetIsBound(); // We won't patch address since we will know the exact distance once JIT code and data are
+                        // allocated together.
+
+    id->idReg1(reg); // destination register that will get the constant value.
+    if (addrReg != REG_NA)
+    {
+        id->idReg2(addrReg); // integer register to compute long address (used for vector dest when we end up with long
+                             // address)
+    }
+    id->idjShort = false; // Assume loading constant from long address
+
+    // Keep it long if it's in cold code.
+    id->idjKeepLong = emitComp->fgIsBlockCold(emitComp->compCurBB);
+
+#ifdef DEBUG
+    if (emitComp->opts.compLongAddress)
+        id->idjKeepLong = 1;
+#endif // DEBUG
+
+    // If it's possible to be shortened, then put it in jump list
+    // to be revisited by emitJumpDistBind.
+    if (!id->idjKeepLong)
+    {
+        /* Record the jump's IG and offset within it */
+        id->idjIG   = emitCurIG;
+        id->idjOffs = emitCurIGsize;
+
+        /* Append this jump to this IG's jump list */
+        id->idjNext      = emitCurIGjmpList;
+        emitCurIGjmpList = id;
+
+#if EMITTER_STATS
+        emitTotalIGjmps++;
+#endif
+    }
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction with a static member + constant.
+ */
+
+void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, ssize_t offs, ssize_t val)
+{
+    NYI("emitIns_C_I");
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction with a static member + register operands.
+ */
+
+void emitter::emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs)
+{
+    assert(!"emitIns_C_R not supported for RyuJIT backend");
+}
+
+void emitter::emitIns_R_AR(instruction ins,
+                           emitAttr    attr,
+                           regNumber   ireg,
+                           regNumber   reg,
+                           int         offs,
+                           int         memCookie /* = 0 */,
+                           void*       clsCookie /* = NULL */)
+{
+    NYI("emitIns_R_AR");
+}
+
+// This computes address from the immediate which is relocatable.
+void emitter::emitIns_R_AI(instruction ins, emitAttr attr, regNumber ireg, ssize_t addr)
+{
+    assert(EA_IS_RELOC(attr));
+    emitAttr      size    = EA_SIZE(attr);
+    insFormat     fmt     = IF_DI_1E;
+    bool          needAdd = false;
+    instrDescJmp* id      = emitNewInstrJmp();
+
+    switch (ins)
+    {
+        case INS_adrp:
+            // This computes page address.
+            // page offset is needed using add.
+            needAdd = true;
+            break;
+        case INS_adr:
+            break;
+        default:
+            unreached();
+    }
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idInsOpt(INS_OPTS_NONE);
+    id->idOpSize(size);
+    id->idAddr()->iiaAddr = (BYTE*)addr;
+    id->idReg1(ireg);
+    id->idSetIsDspReloc();
+
+    dispIns(id);
+    appendToCurIG(id);
+
+    if (needAdd)
+    {
+        // add reg, reg, imm
+        ins           = INS_add;
+        fmt           = IF_DI_2A;
+        instrDesc* id = emitAllocInstr(attr);
+        assert(id->idIsReloc());
+
+        id->idIns(ins);
+        id->idInsFmt(fmt);
+        id->idInsOpt(INS_OPTS_NONE);
+        id->idOpSize(size);
+        id->idAddr()->iiaAddr = (BYTE*)addr;
+        id->idReg1(ireg);
+        id->idReg2(ireg);
+
+        dispIns(id);
+        appendToCurIG(id);
+    }
+}
+
+void emitter::emitIns_AR_R(instruction ins,
+                           emitAttr    attr,
+                           regNumber   ireg,
+                           regNumber   reg,
+                           int         offs,
+                           int         memCookie /* = 0 */,
+                           void*       clsCookie /* = NULL */)
+{
+    NYI("emitIns_AR_R");
+}
+
+void emitter::emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp)
+{
+    NYI("emitIns_R_ARR");
+}
+
+void emitter::emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp)
+{
+    NYI("emitIns_R_ARR");
+}
+
+void emitter::emitIns_R_ARX(
+    instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, unsigned mul, int disp)
+{
+    NYI("emitIns_R_ARR");
+}
+
+/*****************************************************************************
+ *
+ *  Record that a jump instruction uses the short encoding
+ *
+ */
+void emitter::emitSetShortJump(instrDescJmp* id)
+{
+    if (id->idjKeepLong)
+        return;
+
+    insFormat fmt = IF_NONE;
+    if (emitIsCondJump(id))
+    {
+        fmt = IF_BI_0B;
+    }
+    else if (emitIsLoadLabel(id))
+    {
+        fmt = IF_DI_1E;
+    }
+    else if (emitIsLoadConstant(id))
+    {
+        fmt = IF_LS_1A;
+    }
+    else
+    {
+        unreached();
+    }
+
+    id->idInsFmt(fmt);
+    id->idjShort = true;
+}
+
+/*****************************************************************************
+ *
+ *  Add a label instruction.
+ */
+
+void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg)
+{
+    assert(dst->bbFlags & BBF_JMP_TARGET);
+
+    insFormat fmt = IF_NONE;
+
+    switch (ins)
+    {
+        case INS_adr:
+            fmt = IF_LARGEADR;
+            break;
+        default:
+            unreached();
+    }
+
+    instrDescJmp* id = emitNewInstrJmp();
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idjShort             = false;
+    id->idAddr()->iiaBBlabel = dst;
+    id->idReg1(reg);
+    id->idOpSize(EA_PTRSIZE);
+
+#ifdef DEBUG
+    // Mark the catch return
+    if (emitComp->compCurBB->bbJumpKind == BBJ_EHCATCHRET)
+    {
+        id->idDebugOnlyInfo()->idCatchRet = true;
+    }
+#endif // DEBUG
+
+    id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst);
+
+#ifdef DEBUG
+    if (emitComp->opts.compLongAddress)
+        id->idjKeepLong = 1;
+#endif // DEBUG
+
+    /* Record the jump's IG and offset within it */
+
+    id->idjIG   = emitCurIG;
+    id->idjOffs = emitCurIGsize;
+
+    /* Append this jump to this IG's jump list */
+
+    id->idjNext      = emitCurIGjmpList;
+    emitCurIGjmpList = id;
+
+#if EMITTER_STATS
+    emitTotalIGjmps++;
+#endif
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add a data label instruction.
+ */
+
+void emitter::emitIns_R_D(instruction ins, emitAttr attr, unsigned offs, regNumber reg)
+{
+    NYI("emitIns_R_D");
+}
+
+void emitter::emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg)
+{
+    NYI("emitIns_J_R");
+}
+
+void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount)
+{
+    insFormat fmt = IF_NONE;
+
+    if (dst != nullptr)
+    {
+        assert(dst->bbFlags & BBF_JMP_TARGET);
+    }
+    else
+    {
+        assert(instrCount != 0);
+    }
+
+    /* Figure out the encoding format of the instruction */
+
+    bool idjShort = false;
+    switch (ins)
+    {
+        case INS_bl_local:
+        case INS_b:
+            // Unconditional jump is a single form.
+            idjShort = true;
+            fmt      = IF_BI_0A;
+            break;
+
+        case INS_beq:
+        case INS_bne:
+        case INS_bhs:
+        case INS_blo:
+        case INS_bmi:
+        case INS_bpl:
+        case INS_bvs:
+        case INS_bvc:
+        case INS_bhi:
+        case INS_bls:
+        case INS_bge:
+        case INS_blt:
+        case INS_bgt:
+        case INS_ble:
+            // Assume conditional jump is long.
+            fmt = IF_LARGEJMP;
+            break;
+
+        default:
+            unreached();
+            break;
+    }
+
+    instrDescJmp* id = emitNewInstrJmp();
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idjShort = idjShort;
+
+#ifdef DEBUG
+    // Mark the finally call
+    if (ins == INS_bl_local && emitComp->compCurBB->bbJumpKind == BBJ_CALLFINALLY)
+    {
+        id->idDebugOnlyInfo()->idFinallyCall = true;
+    }
+#endif // DEBUG
+
+    if (dst != nullptr)
+    {
+        id->idAddr()->iiaBBlabel = dst;
+
+        // Skip unconditional jump that has a single form.
+        // TODO-ARM64-NYI: enable hot/cold splittingNYI.
+        // The target needs to be relocated.
+        if (!idjShort)
+        {
+            id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst);
+
+#ifdef DEBUG
+            if (emitComp->opts.compLongAddress) // Force long branches
+                id->idjKeepLong = 1;
+#endif // DEBUG
+        }
+    }
+    else
+    {
+        id->idAddr()->iiaSetInstrCount(instrCount);
+        id->idjKeepLong = false;
+        /* This jump must be short */
+        emitSetShortJump(id);
+        id->idSetIsBound();
+    }
+
+    /* Record the jump's IG and offset within it */
+
+    id->idjIG   = emitCurIG;
+    id->idjOffs = emitCurIGsize;
+
+    /* Append this jump to this IG's jump list */
+
+    id->idjNext      = emitCurIGjmpList;
+    emitCurIGjmpList = id;
+
+#if EMITTER_STATS
+    emitTotalIGjmps++;
+#endif
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Add a call instruction (direct or indirect).
+ *      argSize<0 means that the caller will pop the arguments
+ *
+ * The other arguments are interpreted depending on callType as shown:
+ * Unless otherwise specified, ireg,xreg,xmul,disp should have default values.
+ *
+ * EC_FUNC_TOKEN       : addr is the method address
+ * EC_FUNC_ADDR        : addr is the absolute address of the function
+ *
+ * If callType is one of these emitCallTypes, addr has to be NULL.
+ * EC_INDIR_R          : "call ireg".
+ *
+ * For ARM xreg, xmul and disp are never used and should always be 0/REG_NA.
+ *
+ *  Please consult the "debugger team notification" comment in genFnProlog().
+ */
+
+void emitter::emitIns_Call(EmitCallType          callType,
+                           CORINFO_METHOD_HANDLE methHnd,
+                           INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE
+                           void*            addr,
+                           ssize_t          argSize,
+                           emitAttr         retSize,
+                           emitAttr         secondRetSize,
+                           VARSET_VALARG_TP ptrVars,
+                           regMaskTP        gcrefRegs,
+                           regMaskTP        byrefRegs,
+                           IL_OFFSETX       ilOffset /* = BAD_IL_OFFSET */,
+                           regNumber        ireg /* = REG_NA */,
+                           regNumber        xreg /* = REG_NA */,
+                           unsigned         xmul /* = 0     */,
+                           ssize_t          disp /* = 0     */,
+                           bool             isJump /* = false */,
+                           bool             isNoGC /* = false */,
+                           bool             isProfLeaveCB /* = false */)
+{
+    /* Sanity check the arguments depending on callType */
+
+    assert(callType < EC_COUNT);
+    assert((callType != EC_FUNC_TOKEN && callType != EC_FUNC_ADDR) ||
+           (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp == 0));
+    assert(callType < EC_INDIR_R || addr == NULL);
+    assert(callType != EC_INDIR_R || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0 && disp == 0));
+
+    // ARM never uses these
+    assert(xreg == REG_NA && xmul == 0 && disp == 0);
+
+    // Our stack level should be always greater than the bytes of arguments we push. Just
+    // a sanity test.
+    assert((unsigned)abs(argSize) <= codeGen->genStackLevel);
+
+    int        argCnt;
+    instrDesc* id;
+
+    /* This is the saved set of registers after a normal call */
+    regMaskTP savedSet = RBM_CALLEE_SAVED;
+
+    /* some special helper calls have a different saved set registers */
+
+    if (isNoGC)
+    {
+        assert(emitNoGChelper(Compiler::eeGetHelperNum(methHnd)));
+
+        // This call will preserve the liveness of most registers
+        //
+        // - On the ARM64 the NOGC helpers will preserve all registers,
+        //   except for those listed in the RBM_CALLEE_TRASH_NOGC mask
+
+        savedSet = RBM_ALLINT & ~RBM_CALLEE_TRASH_NOGC;
+
+        // In case of Leave profiler callback, we need to preserve liveness of REG_PROFILER_RET_SCRATCH
+        if (isProfLeaveCB)
+        {
+            savedSet |= RBM_PROFILER_RET_SCRATCH;
+        }
+    }
+    else
+    {
+        assert(!emitNoGChelper(Compiler::eeGetHelperNum(methHnd)));
+    }
+
+    /* Trim out any callee-trashed registers from the live set */
+
+    gcrefRegs &= savedSet;
+    byrefRegs &= savedSet;
+
+#ifdef DEBUG
+    if (EMIT_GC_VERBOSE)
+    {
+        printf("Call: GCvars=%s ", VarSetOps::ToString(emitComp, ptrVars));
+        dumpConvertedVarSet(emitComp, ptrVars);
+        printf(", gcrefRegs=");
+        printRegMaskInt(gcrefRegs);
+        emitDispRegSet(gcrefRegs);
+        printf(", byrefRegs=");
+        printRegMaskInt(byrefRegs);
+        emitDispRegSet(byrefRegs);
+        printf("\n");
+    }
+#endif
+
+    assert(argSize % REGSIZE_BYTES == 0);
+    argCnt = (int)(argSize / (int)sizeof(void*));
+
+#ifdef DEBUGGING_SUPPORT
+    /* Managed RetVal: emit sequence point for the call */
+    if (emitComp->opts.compDbgInfo && ilOffset != BAD_IL_OFFSET)
+    {
+        codeGen->genIPmappingAdd(ilOffset, false);
+    }
+#endif
+
+    /*
+        We need to allocate the appropriate instruction descriptor based
+        on whether this is a direct/indirect call, and whether we need to
+        record an updated set of live GC variables.
+     */
+
+    if (callType >= EC_INDIR_R)
+    {
+        /* Indirect call, virtual calls */
+
+        assert(callType == EC_INDIR_R);
+
+        id = emitNewInstrCallInd(argCnt, disp, ptrVars, gcrefRegs, byrefRegs, retSize, secondRetSize);
+    }
+    else
+    {
+        /* Helper/static/nonvirtual/function calls (direct or through handle),
+           and calls to an absolute addr. */
+
+        assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_ADDR);
+
+        id = emitNewInstrCallDir(argCnt, ptrVars, gcrefRegs, byrefRegs, retSize, secondRetSize);
+    }
+
+    /* Update the emitter's live GC ref sets */
+
+    VarSetOps::Assign(emitComp, emitThisGCrefVars, ptrVars);
+    emitThisGCrefRegs = gcrefRegs;
+    emitThisByrefRegs = byrefRegs;
+
+    /* Set the instruction - special case jumping a function */
+    instruction ins;
+    insFormat   fmt = IF_NONE;
+
+    id->idSetIsNoGC(isNoGC);
+
+    /* Record the address: method, indirection, or funcptr */
+
+    if (callType > EC_FUNC_ADDR)
+    {
+        /* This is an indirect call (either a virtual call or func ptr call) */
+
+        switch (callType)
+        {
+            case EC_INDIR_R: // the address is in a register
+
+                id->idSetIsCallRegPtr();
+
+                if (isJump)
+                {
+                    ins = INS_br_tail; // INS_br_tail  Reg
+                }
+                else
+                {
+                    ins = INS_blr; // INS_blr Reg
+                }
+                fmt = IF_BR_1B;
+
+                id->idIns(ins);
+                id->idInsFmt(fmt);
+
+                id->idReg3(ireg);
+                assert(xreg == REG_NA);
+                break;
+
+            default:
+                NO_WAY("unexpected instruction");
+                break;
+        }
+    }
+    else
+    {
+        /* This is a simple direct call: "call helper/method/addr" */
+
+        assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_ADDR);
+
+        assert(addr != NULL);
+
+        if (isJump)
+        {
+            ins = INS_b_tail; // INS_b_tail imm28
+        }
+        else
+        {
+            ins = INS_bl; // INS_bl imm28
+        }
+        fmt = IF_BI_0C;
+
+        id->idIns(ins);
+        id->idInsFmt(fmt);
+
+        id->idAddr()->iiaAddr = (BYTE*)addr;
+
+        if (callType == EC_FUNC_ADDR)
+        {
+            id->idSetIsCallAddr();
+        }
+
+#if RELOC_SUPPORT
+        if (emitComp->opts.compReloc)
+        {
+            id->idSetIsDspReloc();
+        }
+#endif
+    }
+
+#ifdef DEBUG
+    if (EMIT_GC_VERBOSE)
+    {
+        if (id->idIsLargeCall())
+        {
+            printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum,
+                   VarSetOps::ToString(emitComp, ((instrDescCGCA*)id)->idcGCvars));
+        }
+    }
+#endif
+
+#if defined(DEBUG) || defined(LATE_DISASM)
+    id->idDebugOnlyInfo()->idMemCookie = (size_t)methHnd; // method token
+    id->idDebugOnlyInfo()->idClsCookie = 0;
+    id->idDebugOnlyInfo()->idCallSig   = sigInfo;
+#endif
+
+#if defined(LATE_DISASM)
+    if (addr != nullptr)
+    {
+        codeGen->getDisAssembler().disSetMethod((size_t)addr, methHnd);
+    }
+#endif // defined(LATE_DISASM)
+
+    dispIns(id);
+    appendToCurIG(id);
+}
+
+/*****************************************************************************
+ *
+ *  Returns true if 'imm' is valid Cond encoding
+ */
+
+/*static*/ bool emitter::isValidImmCond(ssize_t imm)
+{
+    // range check the ssize_t value, to make sure it is a small unsigned value
+    // and that only the bits in the cfi.cond are set
+    if ((imm < 0) || (imm > 0xF))
+        return false;
+
+    condFlagsImm cfi;
+    cfi.immCFVal = (unsigned)imm;
+
+    return (cfi.cond <= INS_COND_LE); // Don't allow 14 & 15 (AL & NV).
+}
+
+/*****************************************************************************
+ *
+ *  Returns true if 'imm' is valid Cond/Flags encoding
+ */
+
+/*static*/ bool emitter::isValidImmCondFlags(ssize_t imm)
+{
+    // range check the ssize_t value, to make sure it is a small unsigned value
+    // and that only the bits in the cfi.cond or cfi.flags are set
+    if ((imm < 0) || (imm > 0xFF))
+        return false;
+
+    condFlagsImm cfi;
+    cfi.immCFVal = (unsigned)imm;
+
+    return (cfi.cond <= INS_COND_LE); // Don't allow 14 & 15 (AL & NV).
+}
+
+/*****************************************************************************
+ *
+ *  Returns true if 'imm' is valid Cond/Flags/Imm5 encoding
+ */
+
+/*static*/ bool emitter::isValidImmCondFlagsImm5(ssize_t imm)
+{
+    // range check the ssize_t value, to make sure it is a small unsigned value
+    // and that only the bits in the cfi.cond, cfi.flags or cfi.imm5 are set
+    if ((imm < 0) || (imm > 0x1FFF))
+        return false;
+
+    condFlagsImm cfi;
+    cfi.immCFVal = (unsigned)imm;
+
+    return (cfi.cond <= INS_COND_LE); // Don't allow 14 & 15 (AL & NV).
+}
+
+/*****************************************************************************
+ *
+ *  Returns an encoding for the specified register used in the 'Rd' position
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeReg_Rd(regNumber reg)
+{
+    assert(isIntegerRegister(reg));
+    emitter::code_t ureg = (emitter::code_t)reg;
+    assert((ureg >= 0) && (ureg <= 31));
+    return ureg;
+}
+
+/*****************************************************************************
+ *
+ *  Returns an encoding for the specified register used in the 'Rt' position
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeReg_Rt(regNumber reg)
+{
+    assert(isIntegerRegister(reg));
+    emitter::code_t ureg = (emitter::code_t)reg;
+    assert((ureg >= 0) && (ureg <= 31));
+    return ureg;
+}
+
+/*****************************************************************************
+ *
+ *  Returns an encoding for the specified register used in the 'Rn' position
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeReg_Rn(regNumber reg)
+{
+    assert(isIntegerRegister(reg));
+    emitter::code_t ureg = (emitter::code_t)reg;
+    assert((ureg >= 0) && (ureg <= 31));
+    return ureg << 5;
+}
+
+/*****************************************************************************
+ *
+ *  Returns an encoding for the specified register used in the 'Rm' position
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeReg_Rm(regNumber reg)
+{
+    assert(isIntegerRegister(reg));
+    emitter::code_t ureg = (emitter::code_t)reg;
+    assert((ureg >= 0) && (ureg <= 31));
+    return ureg << 16;
+}
+
+/*****************************************************************************
+ *
+ *  Returns an encoding for the specified register used in the 'Ra' position
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeReg_Ra(regNumber reg)
+{
+    assert(isIntegerRegister(reg));
+    emitter::code_t ureg = (emitter::code_t)reg;
+    assert((ureg >= 0) && (ureg <= 31));
+    return ureg << 10;
+}
+
+/*****************************************************************************
+ *
+ *  Returns an encoding for the specified register used in the 'Vd' position
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeReg_Vd(regNumber reg)
+{
+    assert(emitter::isVectorRegister(reg));
+    emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_V0;
+    assert((ureg >= 0) && (ureg <= 31));
+    return ureg;
+}
+
+/*****************************************************************************
+ *
+ *  Returns an encoding for the specified register used in the 'Vt' position
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeReg_Vt(regNumber reg)
+{
+    assert(emitter::isVectorRegister(reg));
+    emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_V0;
+    assert((ureg >= 0) && (ureg <= 31));
+    return ureg;
+}
+
+/*****************************************************************************
+ *
+ *  Returns an encoding for the specified register used in the 'Vn' position
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeReg_Vn(regNumber reg)
+{
+    assert(emitter::isVectorRegister(reg));
+    emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_V0;
+    assert((ureg >= 0) && (ureg <= 31));
+    return ureg << 5;
+}
+
+/*****************************************************************************
+ *
+ *  Returns an encoding for the specified register used in the 'Vm' position
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeReg_Vm(regNumber reg)
+{
+    assert(emitter::isVectorRegister(reg));
+    emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_V0;
+    assert((ureg >= 0) && (ureg <= 31));
+    return ureg << 16;
+}
+
+/*****************************************************************************
+ *
+ *  Returns an encoding for the specified register used in the 'Va' position
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeReg_Va(regNumber reg)
+{
+    assert(emitter::isVectorRegister(reg));
+    emitter::code_t ureg = (emitter::code_t)reg - (emitter::code_t)REG_V0;
+    assert((ureg >= 0) && (ureg <= 31));
+    return ureg << 10;
+}
+
+/*****************************************************************************
+ *
+ *  Returns an encoding for the specified condition code.
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeCond(insCond cond)
+{
+    emitter::code_t uimm = (emitter::code_t)cond;
+    return uimm << 12;
+}
+
+/*****************************************************************************
+ *
+ *  Returns an encoding for the condition code with the lowest bit inverted (marked by invert(<cond>) in the
+ *  architecture manual).
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeInvertedCond(insCond cond)
+{
+    emitter::code_t uimm = (emitter::code_t)cond;
+    uimm ^= 1; // invert the lowest bit
+    return uimm << 12;
+}
+
+/*****************************************************************************
+ *
+ *  Returns an encoding for the specified flags.
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeFlags(insCflags flags)
+{
+    emitter::code_t uimm = (emitter::code_t)flags;
+    return uimm;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding for the Shift Count bits to be used for Arm64 encodings
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeShiftCount(ssize_t imm, emitAttr size)
+{
+    assert((imm & 0x003F) == imm);
+    assert(((imm & 0x0020) == 0) || (size == EA_8BYTE));
+
+    return (emitter::code_t)imm << 10;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to select a 64-bit datasize for an Arm64 instruction
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeDatasize(emitAttr size)
+{
+    if (size == EA_8BYTE)
+    {
+        return 0x80000000; // set the bit at location 31
+    }
+    else
+    {
+        assert(size == EA_4BYTE);
+        return 0;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to select the datasize for the general load/store Arm64 instructions
+ *
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeDatasizeLS(emitter::code_t code, emitAttr size)
+{
+    if (code & 0x00800000) // Is this a sign-extending opcode? (i.e. ldrsw, ldrsh, ldrsb)
+    {
+        assert((size == EA_4BYTE) || (size == EA_8BYTE));
+        if ((code & 0x80000000) == 0) // Is it a ldrsh or ldrsb and not ldrsw ?
+        {
+            if (size == EA_4BYTE) // Do we need to encode the 32-bit Rt size bit?
+            {
+                return 0x00400000; // set the bit at location 22
+            }
+        }
+    }
+    else if (code & 0x80000000) // Is this a ldr/str/ldur/stur opcode?
+    {
+        assert((size == EA_4BYTE) || (size == EA_8BYTE));
+        if (size == EA_8BYTE) // Do we need to encode the 64-bit size bit?
+        {
+            return 0x40000000; // set the bit at location 30
+        }
+    }
+    return 0;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to select the datasize for the vector load/store Arm64 instructions
+ *
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeDatasizeVLS(emitter::code_t code, emitAttr size)
+{
+    code_t result = 0;
+
+    // Check bit 29
+    if ((code & 0x20000000) == 0)
+    {
+        // LDR literal
+
+        if (size == EA_16BYTE)
+        {
+            // set the operation size in bit 31
+            result = 0x80000000;
+        }
+        else if (size == EA_8BYTE)
+        {
+            // set the operation size in bit 30
+            result = 0x40000000;
+        }
+        else
+        {
+            assert(size == EA_4BYTE);
+            // no bits are set
+            result = 0x00000000;
+        }
+    }
+    else
+    {
+        // LDR non-literal
+
+        if (size == EA_16BYTE)
+        {
+            // The operation size in bits 31 and 30 are zero
+            // Bit 23 specifies a 128-bit Load/Store
+            result = 0x00800000;
+        }
+        else if (size == EA_8BYTE)
+        {
+            // set the operation size in bits 31 and 30
+            result = 0xC0000000;
+        }
+        else if (size == EA_4BYTE)
+        {
+            // set the operation size in bit 31
+            result = 0x80000000;
+        }
+        else if (size == EA_2BYTE)
+        {
+            // set the operation size in bit 30
+            result = 0x40000000;
+        }
+        else
+        {
+            assert(size == EA_1BYTE);
+            // The operation size in bits 31 and 30 are zero
+            result = 0x00000000;
+        }
+    }
+
+    // Or in bit 26 to indicate a Vector register is used as 'target'
+    result |= 0x04000000;
+
+    return result;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to select the datasize for the vector load/store Arm64 instructions
+ *
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeDatasizeVPLS(emitter::code_t code, emitAttr size)
+{
+    code_t result = 0;
+
+    if (size == EA_16BYTE)
+    {
+        // The operation size in bits 31 and 30 are zero
+        // Bit 23 specifies a 128-bit Load/Store
+        result = 0x80000000;
+    }
+    else if (size == EA_8BYTE)
+    {
+        // set the operation size in bits 31 and 30
+        result = 0x40000000;
+    }
+    else if (size == EA_4BYTE)
+    {
+        // set the operation size in bit 31
+        result = 0x00000000;
+    }
+
+    // Or in bit 26 to indicate a Vector register is used as 'target'
+    result |= 0x04000000;
+
+    return result;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to set the size bit and the N bits for a 'bitfield' instruction
+ *
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeDatasizeBF(emitter::code_t code, emitAttr size)
+{
+    // is bit 30 equal to 0?
+    if ((code & 0x40000000) == 0) // is the opcode one of extr, sxtb, sxth or sxtw
+    {
+        if (size == EA_8BYTE) // Do we need to set the sf and N bits?
+        {
+            return 0x80400000; // set the sf-bit at location 31 and the N-bit at location 22
+        }
+    }
+    return 0; // don't set any bits
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to select the 64/128-bit datasize for an Arm64 vector instruction
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeVectorsize(emitAttr size)
+{
+    if (size == EA_16BYTE)
+    {
+        return 0x40000000; // set the bit at location 30
+    }
+    else
+    {
+        assert(size == EA_8BYTE);
+        return 0;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to select 'index' for an Arm64 vector elem instruction
+ */
+/*static*/ emitter::code_t emitter::insEncodeVectorIndex(emitAttr elemsize, ssize_t index)
+{
+    code_t bits = (code_t)index;
+    if (elemsize == EA_1BYTE)
+    {
+        bits <<= 1;
+        bits |= 1;
+    }
+    else if (elemsize == EA_2BYTE)
+    {
+        bits <<= 2;
+        bits |= 2;
+    }
+    else if (elemsize == EA_4BYTE)
+    {
+        bits <<= 3;
+        bits |= 4;
+    }
+    else
+    {
+        assert(elemsize == EA_8BYTE);
+        bits <<= 4;
+        bits |= 8;
+    }
+    assert((bits >= 1) && (bits <= 0x1f));
+
+    return (bits << 16); // bits at locations [20,19,18,17,16]
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to select 'index2' for an Arm64 'ins' elem instruction
+ */
+/*static*/ emitter::code_t emitter::insEncodeVectorIndex2(emitAttr elemsize, ssize_t index2)
+{
+    code_t bits = (code_t)index2;
+    if (elemsize == EA_1BYTE)
+    {
+        // bits are correct
+    }
+    else if (elemsize == EA_2BYTE)
+    {
+        bits <<= 1;
+    }
+    else if (elemsize == EA_4BYTE)
+    {
+        bits <<= 2;
+    }
+    else
+    {
+        assert(elemsize == EA_8BYTE);
+        bits <<= 3;
+    }
+    assert((bits >= 0) && (bits <= 0xf));
+
+    return (bits << 11); // bits at locations [14,13,12,11]
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to select the 'index' for an Arm64 'mul' by elem instruction
+ */
+/*static*/ emitter::code_t emitter::insEncodeVectorIndexLMH(emitAttr elemsize, ssize_t index)
+{
+    code_t bits = 0;
+
+    if (elemsize == EA_2BYTE)
+    {
+        assert((index >= 0) && (index <= 7));
+        if (index & 0x4)
+        {
+            bits |= (1 << 11); // set bit 11 'H'
+        }
+        if (index & 0x2)
+        {
+            bits |= (1 << 21); // set bit 21 'L'
+        }
+        if (index & 0x1)
+        {
+            bits |= (1 << 20); // set bit 20 'M'
+        }
+    }
+    else if (elemsize == EA_4BYTE)
+    {
+        assert((index >= 0) && (index <= 3));
+        if (index & 0x2)
+        {
+            bits |= (1 << 11); // set bit 11 'H'
+        }
+        if (index & 0x1)
+        {
+            bits |= (1 << 21); // set bit 21 'L'
+        }
+    }
+    else
+    {
+        assert(!"Invalid 'elemsize' value");
+    }
+
+    return bits;
+}
+
+/*****************************************************************************
+ *
+ *   Returns the encoding to shift by 'shift' for an Arm64 vector or scalar instruction
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeVectorShift(emitAttr size, ssize_t shift)
+{
+    assert(shift < getBitWidth(size));
+
+    code_t imm = (code_t)(getBitWidth(size) + shift);
+
+    return imm << 16;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 vector instruction
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeElemsize(emitAttr size)
+{
+    if (size == EA_8BYTE)
+    {
+        return 0x00C00000; // set the bit at location 23 and 22
+    }
+    else if (size == EA_4BYTE)
+    {
+        return 0x00800000; // set the bit at location 23
+    }
+    else if (size == EA_2BYTE)
+    {
+        return 0x00400000; // set the bit at location 22
+    }
+    assert(size == EA_1BYTE);
+    return 0x00000000;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to select the 4/8 byte elemsize for an Arm64 float vector instruction
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeFloatElemsize(emitAttr size)
+{
+    if (size == EA_8BYTE)
+    {
+        return 0x00400000; // set the bit at location 22
+    }
+    assert(size == EA_4BYTE);
+    return 0x00000000;
+}
+
+// Returns the encoding to select the index for an Arm64 float vector by elem instruction
+/*static*/ emitter::code_t emitter::insEncodeFloatIndex(emitAttr elemsize, ssize_t index)
+{
+    code_t result = 0x00000000;
+    if (elemsize == EA_8BYTE)
+    {
+        assert((index >= 0) && (index <= 1));
+        if (index == 1)
+        {
+            result |= 0x00000800; // 'H' - set the bit at location 11
+        }
+    }
+    else
+    {
+        assert(elemsize == EA_4BYTE);
+        assert((index >= 0) && (index <= 3));
+        if (index & 2)
+        {
+            result |= 0x00000800; // 'H' - set the bit at location 11
+        }
+        if (index & 1)
+        {
+            result |= 0x00200000; // 'L' - set the bit at location 21
+        }
+    }
+    return result;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to select the fcvt operation for Arm64 instructions
+ */
+/*static*/ emitter::code_t emitter::insEncodeConvertOpt(insFormat fmt, insOpts conversion)
+{
+    code_t result = 0;
+    switch (conversion)
+    {
+        case INS_OPTS_S_TO_D: // Single to Double
+            assert(fmt == IF_DV_2J);
+            result = 0x00008000; // type=00, opc=01
+            break;
+
+        case INS_OPTS_D_TO_S: // Double to Single
+            assert(fmt == IF_DV_2J);
+            result = 0x00400000; // type=01, opc=00
+            break;
+
+        case INS_OPTS_H_TO_S: // Half to Single
+            assert(fmt == IF_DV_2J);
+            result = 0x00C00000; // type=11, opc=00
+            break;
+
+        case INS_OPTS_H_TO_D: // Half to Double
+            assert(fmt == IF_DV_2J);
+            result = 0x00C08000; // type=11, opc=01
+            break;
+
+        case INS_OPTS_S_TO_H: // Single to Half
+            assert(fmt == IF_DV_2J);
+            result = 0x00018000; // type=00, opc=11
+            break;
+
+        case INS_OPTS_D_TO_H: // Double to Half
+            assert(fmt == IF_DV_2J);
+            result = 0x00418000; // type=01, opc=11
+            break;
+
+        case INS_OPTS_S_TO_4BYTE: // Single to INT32
+            assert(fmt == IF_DV_2H);
+            result = 0x00000000; // sf=0, type=00
+            break;
+
+        case INS_OPTS_D_TO_4BYTE: // Double to INT32
+            assert(fmt == IF_DV_2H);
+            result = 0x00400000; // sf=0, type=01
+            break;
+
+        case INS_OPTS_S_TO_8BYTE: // Single to INT64
+            assert(fmt == IF_DV_2H);
+            result = 0x80000000; // sf=1, type=00
+            break;
+
+        case INS_OPTS_D_TO_8BYTE: // Double to INT64
+            assert(fmt == IF_DV_2H);
+            result = 0x80400000; // sf=1, type=01
+            break;
+
+        case INS_OPTS_4BYTE_TO_S: // INT32 to Single
+            assert(fmt == IF_DV_2I);
+            result = 0x00000000; // sf=0, type=00
+            break;
+
+        case INS_OPTS_4BYTE_TO_D: // INT32 to Double
+            assert(fmt == IF_DV_2I);
+            result = 0x00400000; // sf=0, type=01
+            break;
+
+        case INS_OPTS_8BYTE_TO_S: // INT64 to Single
+            assert(fmt == IF_DV_2I);
+            result = 0x80000000; // sf=1, type=00
+            break;
+
+        case INS_OPTS_8BYTE_TO_D: // INT64 to Double
+            assert(fmt == IF_DV_2I);
+            result = 0x80400000; // sf=1, type=01
+            break;
+
+        default:
+            assert(!"Invalid 'conversion' value");
+            break;
+    }
+    return result;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to have the Rn register be updated Pre/Post indexed
+ *  or not updated
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeIndexedOpt(insOpts opt)
+{
+    assert(emitter::insOptsNone(opt) || emitter::insOptsIndexed(opt));
+
+    if (emitter::insOptsIndexed(opt))
+    {
+        if (emitter::insOptsPostIndex(opt))
+        {
+            return 0x00000400; // set the bit at location 10
+        }
+        else
+        {
+            assert(emitter::insOptsPreIndex(opt));
+            return 0x00000C00; // set the bit at location 10 and 11
+        }
+    }
+    else
+    {
+        assert(emitter::insOptsNone(opt));
+        return 0; // bits 10 and 11 are zero
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding for a ldp/stp instruction to have the Rn register
+ *  be updated Pre/Post indexed or not updated
+ */
+
+/*static*/ emitter::code_t emitter::insEncodePairIndexedOpt(instruction ins, insOpts opt)
+{
+    assert(emitter::insOptsNone(opt) || emitter::insOptsIndexed(opt));
+
+    if ((ins == INS_ldnp) || (ins == INS_stnp))
+    {
+        assert(emitter::insOptsNone(opt));
+        return 0; // bits 23 and 24 are zero
+    }
+    else
+    {
+        if (emitter::insOptsIndexed(opt))
+        {
+            if (emitter::insOptsPostIndex(opt))
+            {
+                return 0x00800000; // set the bit at location 23
+            }
+            else
+            {
+                assert(emitter::insOptsPreIndex(opt));
+                return 0x01800000; // set the bit at location 24 and 23
+            }
+        }
+        else
+        {
+            assert(emitter::insOptsNone(opt));
+            return 0x01000000; // set the bit at location 24
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to apply a Shift Type on the Rm register
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeShiftType(insOpts opt)
+{
+    if (emitter::insOptsNone(opt))
+    {
+        // None implies the we encode LSL (with a zero immediate)
+        opt = INS_OPTS_LSL;
+    }
+    assert(emitter::insOptsAnyShift(opt));
+
+    emitter::code_t option = (emitter::code_t)opt - (emitter::code_t)INS_OPTS_LSL;
+    assert(option <= 3);
+
+    return option << 22; // bits 23, 22
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to apply a 12 bit left shift to the immediate
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeShiftImm12(insOpts opt)
+{
+    if (emitter::insOptsLSL12(opt))
+    {
+        return 0x00400000; // set the bit at location 22
+    }
+    return 0;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to have the Rm register use an extend operation
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeExtend(insOpts opt)
+{
+    if (emitter::insOptsNone(opt) || (opt == INS_OPTS_LSL))
+    {
+        // None or LSL implies the we encode UXTX
+        opt = INS_OPTS_UXTX;
+    }
+    assert(emitter::insOptsAnyExtend(opt));
+
+    emitter::code_t option = (emitter::code_t)opt - (emitter::code_t)INS_OPTS_UXTB;
+    assert(option <= 7);
+
+    return option << 13; // bits 15,14,13
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to scale the Rm register by {0,1,2,3,4}
+ *  when using an extend operation
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeExtendScale(ssize_t imm)
+{
+    assert((imm >= 0) && (imm <= 4));
+
+    return (emitter::code_t)imm << 10; // bits 12,11,10
+}
+
+/*****************************************************************************
+ *
+ *  Returns the encoding to have the Rm register be auto scaled by the ld/st size
+ */
+
+/*static*/ emitter::code_t emitter::insEncodeReg3Scale(bool isScaled)
+{
+    if (isScaled)
+    {
+        return 0x00001000; // set the bit at location 12
+    }
+    else
+    {
+        return 0;
+    }
+}
+
+BYTE* emitter::emitOutputLoadLabel(BYTE* dst, BYTE* srcAddr, BYTE* dstAddr, instrDescJmp* id)
+{
+    instruction ins    = id->idIns();
+    insFormat   fmt    = id->idInsFmt();
+    regNumber   dstReg = id->idReg1();
+    if (id->idjShort)
+    {
+        // adr x, [rel addr] --  compute address: current addr(ip) + rel addr.
+        assert(ins == INS_adr);
+        assert(fmt == IF_DI_1E);
+        ssize_t distVal = (ssize_t)(dstAddr - srcAddr);
+        dst             = emitOutputShortAddress(dst, ins, fmt, distVal, dstReg);
+    }
+    else
+    {
+        // adrp x, [rel page addr] -- compute page address: current page addr + rel page addr
+        assert(fmt == IF_LARGEADR);
+        ssize_t relPageAddr =
+            (((ssize_t)dstAddr & 0xFFFFFFFFFFFFF000LL) - ((ssize_t)srcAddr & 0xFFFFFFFFFFFFF000LL)) >> 12;
+        dst = emitOutputShortAddress(dst, INS_adrp, IF_DI_1E, relPageAddr, dstReg);
+
+        // add x, x, page offs -- compute address = page addr + page offs
+        ssize_t imm12 = (ssize_t)dstAddr & 0xFFF; // 12 bits
+        assert(isValidUimm12(imm12));
+        code_t code =
+            emitInsCode(INS_add, IF_DI_2A);  // DI_2A  X0010001shiiiiii iiiiiinnnnnddddd   1100 0000   imm(i12, sh)
+        code |= insEncodeDatasize(EA_8BYTE); // X
+        code |= ((code_t)imm12 << 10);       // iiiiiiiiiiii
+        code |= insEncodeReg_Rd(dstReg);     // ddddd
+        code |= insEncodeReg_Rn(dstReg);     // nnnnn
+        dst += emitOutput_Instr(dst, code);
+    }
+    return dst;
+}
+
+/*****************************************************************************
+ *
+ *  Output a local jump or other instruction with a pc-relative immediate.
+ *  Note that this may be invoked to overwrite an existing jump instruction at 'dst'
+ *  to handle forward branch patching.
+ */
+
+BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i)
+{
+    instrDescJmp* id = (instrDescJmp*)i;
+
+    unsigned srcOffs;
+    unsigned dstOffs;
+    BYTE*    srcAddr;
+    BYTE*    dstAddr;
+    ssize_t  distVal;
+    ssize_t  loBits;
+
+    // Set default ins/fmt from id.
+    instruction ins = id->idIns();
+    insFormat   fmt = id->idInsFmt();
+
+    bool loadLabel    = false;
+    bool isJump       = false;
+    bool loadConstant = false;
+
+    switch (ins)
+    {
+        default:
+            isJump = true;
+            break;
+
+        case INS_tbz:
+        case INS_tbnz:
+        case INS_cbz:
+        case INS_cbnz:
+            isJump = true;
+            break;
+
+        case INS_ldr:
+        case INS_ldrsw:
+            loadConstant = true;
+            break;
+
+        case INS_adr:
+        case INS_adrp:
+            loadLabel = true;
+            break;
+    }
+
+    /* Figure out the distance to the target */
+
+    srcOffs = emitCurCodeOffs(dst);
+    srcAddr = emitOffsetToPtr(srcOffs);
+
+    if (id->idAddr()->iiaIsJitDataOffset())
+    {
+        assert(loadConstant || loadLabel);
+        int doff = id->idAddr()->iiaGetJitDataOffset();
+        assert(doff >= 0);
+        ssize_t imm = emitGetInsSC(id);
+        assert((imm >= 0) && (imm < 0x1000)); // 0x1000 is arbitrary, currently 'imm' is always 0
+
+        unsigned dataOffs = (unsigned)(doff + imm);
+        assert(dataOffs < emitDataSize());
+        dstAddr = emitDataOffsetToPtr(dataOffs);
+
+        regNumber dstReg  = id->idReg1();
+        regNumber addrReg = dstReg; // an integer register to compute long address.
+        emitAttr  opSize  = id->idOpSize();
+
+        if (loadConstant)
+        {
+            if (id->idjShort)
+            {
+                // ldr x/v, [rel addr] -- load constant from current addr(ip) + rel addr.
+                assert(ins == INS_ldr);
+                assert(fmt == IF_LS_1A);
+                distVal = (ssize_t)(dstAddr - srcAddr);
+                dst     = emitOutputShortConstant(dst, ins, fmt, distVal, dstReg, opSize);
+            }
+            else
+            {
+                // adrp x, [rel page addr] -- compute page address: current page addr + rel page addr
+                assert(fmt == IF_LARGELDC);
+                ssize_t relPageAddr =
+                    (((ssize_t)dstAddr & 0xFFFFFFFFFFFFF000LL) - ((ssize_t)srcAddr & 0xFFFFFFFFFFFFF000LL)) >> 12;
+                if (isVectorRegister(dstReg))
+                {
+                    // Update addrReg with the reserved integer register
+                    // since we cannot use dstReg (vector) to load constant directly from memory.
+                    addrReg = id->idReg2();
+                    assert(isGeneralRegister(addrReg));
+                }
+                ins = INS_adrp;
+                fmt = IF_DI_1E;
+                dst = emitOutputShortAddress(dst, ins, fmt, relPageAddr, addrReg);
+
+                // ldr x, [x, page offs] -- load constant from page address + page offset into integer register.
+                ssize_t imm12 = (ssize_t)dstAddr & 0xFFF; // 12 bits
+                assert(isValidUimm12(imm12));
+                ins = INS_ldr;
+                fmt = IF_LS_2B;
+                dst = emitOutputShortConstant(dst, ins, fmt, imm12, addrReg, opSize);
+
+                // fmov v, d -- copy constant in integer register to vector register.
+                // This is needed only for vector constant.
+                if (addrReg != dstReg)
+                {
+                    //  fmov    Vd,Rn                DV_2I  X00111100X100111 000000nnnnnddddd   1E27 0000   Vd,Rn
+                    //  (scalar, from general)
+                    assert(isVectorRegister(dstReg) && isGeneralRegister(addrReg));
+                    ins         = INS_fmov;
+                    fmt         = IF_DV_2I;
+                    code_t code = emitInsCode(ins, fmt);
+
+                    code |= insEncodeReg_Vd(dstReg);  // ddddd
+                    code |= insEncodeReg_Rn(addrReg); // nnnnn
+                    if (id->idOpSize() == EA_8BYTE)
+                    {
+                        code |= 0x80400000; // X ... X
+                    }
+                    dst += emitOutput_Instr(dst, code);
+                }
+            }
+        }
+        else
+        {
+            assert(loadLabel);
+            dst = emitOutputLoadLabel(dst, srcAddr, dstAddr, id);
+        }
+
+        return dst;
+    }
+
+    assert(loadLabel || isJump);
+
+    if (id->idAddr()->iiaHasInstrCount())
+    {
+        assert(ig != NULL);
+        int      instrCount = id->idAddr()->iiaGetInstrCount();
+        unsigned insNum     = emitFindInsNum(ig, id);
+        if (instrCount < 0)
+        {
+            // Backward branches using instruction count must be within the same instruction group.
+            assert(insNum + 1 >= (unsigned)(-instrCount));
+        }
+        dstOffs = ig->igOffs + emitFindOffset(ig, (insNum + 1 + instrCount));
+        dstAddr = emitOffsetToPtr(dstOffs);
+    }
+    else
+    {
+        dstOffs = id->idAddr()->iiaIGlabel->igOffs;
+        dstAddr = emitOffsetToPtr(dstOffs);
+    }
+
+    distVal = (ssize_t)(dstAddr - srcAddr);
+
+    if (dstOffs <= srcOffs)
+    {
+#if DEBUG_EMIT
+        /* This is a backward jump - distance is known at this point */
+
+        if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+        {
+            size_t blkOffs = id->idjIG->igOffs;
+
+            if (INTERESTING_JUMP_NUM == 0)
+                printf("[3] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
+            printf("[3] Jump  block is at %08X - %02X = %08X\n", blkOffs, emitOffsAdj, blkOffs - emitOffsAdj);
+            printf("[3] Jump        is at %08X - %02X = %08X\n", srcOffs, emitOffsAdj, srcOffs - emitOffsAdj);
+            printf("[3] Label block is at %08X - %02X = %08X\n", dstOffs, emitOffsAdj, dstOffs - emitOffsAdj);
+        }
+#endif
+    }
+    else
+    {
+        /* This is a  forward jump - distance will be an upper limit */
+
+        emitFwdJumps = true;
+
+        /* The target offset will be closer by at least 'emitOffsAdj', but only if this
+           jump doesn't cross the hot-cold boundary. */
+
+        if (!emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
+        {
+            dstOffs -= emitOffsAdj;
+            distVal -= emitOffsAdj;
+        }
+
+        /* Record the location of the jump for later patching */
+
+        id->idjOffs = dstOffs;
+
+        /* Are we overflowing the id->idjOffs bitfield? */
+        if (id->idjOffs != dstOffs)
+            IMPL_LIMITATION("Method is too large");
+
+#if DEBUG_EMIT
+        if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+        {
+            size_t blkOffs = id->idjIG->igOffs;
+
+            if (INTERESTING_JUMP_NUM == 0)
+                printf("[4] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
+            printf("[4] Jump  block is at %08X\n", blkOffs);
+            printf("[4] Jump        is at %08X\n", srcOffs);
+            printf("[4] Label block is at %08X - %02X = %08X\n", dstOffs + emitOffsAdj, emitOffsAdj, dstOffs);
+        }
+#endif
+    }
+
+#ifdef DEBUG
+    if (0 && emitComp->verbose)
+    {
+        size_t sz          = 4;
+        int    distValSize = id->idjShort ? 4 : 8;
+        printf("; %s jump [%08X/%03u] from %0*X to %0*X: dist = %08XH\n", (dstOffs <= srcOffs) ? "Fwd" : "Bwd",
+               dspPtr(id), id->idDebugOnlyInfo()->idNum, distValSize, srcOffs + sz, distValSize, dstOffs, distVal);
+    }
+#endif
+
+    /* For forward jumps, record the address of the distance value */
+    id->idjTemp.idjAddr = (distVal > 0) ? dst : NULL;
+
+    if (emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
+    {
+        assert(!id->idjShort);
+        NYI_ARM64("Relocation Support for long address");
+    }
+
+    assert(insOptsNone(id->idInsOpt()));
+
+    if (isJump)
+    {
+        if (id->idjShort)
+        {
+            // Short conditional/unconditional jump
+            assert(!id->idjKeepLong);
+            assert(emitJumpCrossHotColdBoundary(srcOffs, dstOffs) == false);
+            assert((fmt == IF_BI_0A) || (fmt == IF_BI_0B));
+        }
+        else
+        {
+            // Long conditional jump
+            assert(fmt == IF_LARGEJMP);
+            // This is a pseudo-instruction format representing a large conditional branch, to allow
+            // us to get a greater branch target range than we can get by using a straightforward conditional
+            // branch. It is encoded as a short conditional branch that branches around a long unconditional
+            // branch.
+            //
+            // Conceptually, we have:
+            //
+            //      b<cond> L_target
+            //
+            // The code we emit is:
+            //
+            //      b<!cond> L_not  // 4 bytes. Note that we reverse the condition.
+            //      b L_target      // 4 bytes
+            //   L_not:
+            //
+            // Note that we don't actually insert any blocks: we simply encode "b <!cond> L_not" as a branch with
+            // the correct offset. Note also that this works for both integer and floating-point conditions, because
+            // the condition inversion takes ordered/unordered into account, preserving NaN behavior. For example,
+            // "GT" (greater than) is inverted to "LE" (less than, equal, or unordered).
+            dst =
+                emitOutputShortBranch(dst,
+                                      emitJumpKindToIns(emitReverseJumpKind(
+                                          emitInsToJumpKind(ins))), // reverse the conditional instruction
+                                      IF_BI_0B,
+                                      8, /* 8 bytes from start of this large conditional pseudo-instruction to L_not. */
+                                      nullptr /* only used for tbz/tbnzcbz/cbnz */);
+
+            // Now, pretend we've got a normal unconditional branch, and fall through to the code to emit that.
+            ins = INS_b;
+            fmt = IF_BI_0A;
+
+            // The distVal was computed based on the beginning of the pseudo-instruction,
+            // So subtract the size of the conditional branch so that it is relative to the
+            // unconditional branch.
+            distVal -= 4;
+        }
+
+        dst = emitOutputShortBranch(dst, ins, fmt, distVal, id);
+    }
+    else if (loadLabel)
+    {
+        dst = emitOutputLoadLabel(dst, srcAddr, dstAddr, id);
+    }
+
+    return dst;
+}
+
+/*****************************************************************************
+*
+*  Output a short branch instruction.
+*/
+BYTE* emitter::emitOutputShortBranch(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, instrDescJmp* id)
+{
+    code_t code = emitInsCode(ins, fmt);
+
+    ssize_t loBits = (distVal & 3);
+    noway_assert(loBits == 0);
+    distVal >>= 2; // branch offset encodings are scaled by 4.
+
+    if (fmt == IF_BI_0A)
+    {
+        // INS_b or INS_bl_local
+        noway_assert(isValidSimm26(distVal));
+        distVal &= 0x3FFFFFFLL;
+        code |= distVal;
+    }
+    else if (fmt == IF_BI_0B) // BI_0B   01010100iiiiiiii iiiiiiiiiiiXXXXX      simm19:00
+    {
+        // INS_beq, INS_bne, etc...
+        noway_assert(isValidSimm19(distVal));
+        distVal &= 0x7FFFFLL;
+        code |= distVal << 5;
+    }
+    else if (fmt == IF_BI_1A) // BI_1A   X.......iiiiiiii iiiiiiiiiiittttt      Rt simm19:00
+    {
+        // INS_cbz or INS_cbnz
+        assert(id != nullptr);
+        code |= insEncodeDatasize(id->idOpSize()); // X
+        code |= insEncodeReg_Rt(id->idReg1());     // ttttt
+
+        noway_assert(isValidSimm19(distVal));
+        distVal &= 0x7FFFFLL; // 19 bits
+        code |= distVal << 5;
+    }
+    else if (fmt == IF_BI_1B) // BI_1B   B.......bbbbbiii iiiiiiiiiiittttt      Rt imm6, simm14:00
+    {
+        // INS_tbz or INS_tbnz
+        assert(id != nullptr);
+        ssize_t imm = emitGetInsSC(id);
+        assert(isValidImmShift(imm, id->idOpSize()));
+
+        if (imm & 0x20) // test bit 32-63 ?
+        {
+            code |= 0x80000000; // B
+        }
+        code |= ((imm & 0x1F) << 19);          // bbbbb
+        code |= insEncodeReg_Rt(id->idReg1()); // ttttt
+
+        noway_assert(isValidSimm14(distVal));
+        distVal &= 0x3FFFLL; // 14 bits
+        code |= distVal << 5;
+    }
+    else
+    {
+        assert(!"Unknown fmt for emitOutputShortBranch");
+    }
+
+    dst += emitOutput_Instr(dst, code);
+
+    return dst;
+}
+
+/*****************************************************************************
+*
+*  Output a short address instruction.
+*/
+BYTE* emitter::emitOutputShortAddress(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, regNumber reg)
+{
+    ssize_t loBits = (distVal & 3);
+    distVal >>= 2;
+
+    code_t code = emitInsCode(ins, fmt);
+    if (fmt == IF_DI_1E) // DI_1E   .ii.....iiiiiiii iiiiiiiiiiiddddd      Rd simm21
+    {
+        // INS_adr or INS_adrp
+        code |= insEncodeReg_Rd(reg); // ddddd
+
+        noway_assert(isValidSimm19(distVal));
+        distVal &= 0x7FFFFLL; // 19 bits
+        code |= distVal << 5;
+        code |= loBits << 29; //  2 bits
+    }
+    else
+    {
+        assert(!"Unknown fmt for emitOutputShortAddress");
+    }
+
+    dst += emitOutput_Instr(dst, code);
+
+    return dst;
+}
+
+/*****************************************************************************
+*
+*  Output a short constant instruction.
+*/
+BYTE* emitter::emitOutputShortConstant(
+    BYTE* dst, instruction ins, insFormat fmt, ssize_t imm, regNumber reg, emitAttr opSize)
+{
+    code_t code = emitInsCode(ins, fmt);
+
+    if (fmt == IF_LS_1A)
+    {
+        // LS_1A   XX...V..iiiiiiii iiiiiiiiiiittttt      Rt simm21
+        // INS_ldr or INS_ldrsw (PC-Relative)
+
+        ssize_t loBits = (imm & 3);
+        noway_assert(loBits == 0);
+        ssize_t distVal = imm >>= 2; // load offset encodings are scaled by 4.
+
+        noway_assert(isValidSimm19(distVal));
+
+        // Is the target a vector register?
+        if (isVectorRegister(reg))
+        {
+            code |= insEncodeDatasizeVLS(code, opSize); // XX V
+            code |= insEncodeReg_Vt(reg);               // ttttt
+        }
+        else
+        {
+            assert(isGeneralRegister(reg));
+            // insEncodeDatasizeLS is not quite right for this case.
+            // So just specialize it.
+            if ((ins == INS_ldr) && (opSize == EA_8BYTE))
+            {
+                // set the operation size in bit 30
+                code |= 0x40000000;
+            }
+
+            code |= insEncodeReg_Rt(reg); // ttttt
+        }
+
+        distVal &= 0x7FFFFLL; // 19 bits
+        code |= distVal << 5;
+    }
+    else if (fmt == IF_LS_2B)
+    {
+        //  ldr     Rt,[Xn+pimm12]       LS_2B  1X11100101iiiiii iiiiiinnnnnttttt   B940 0000   imm(0-4095<<{2,3})
+        // INS_ldr or INS_ldrsw (PC-Relative)
+        noway_assert(isValidUimm12(imm));
+        assert(isGeneralRegister(reg));
+
+        if (opSize == EA_8BYTE)
+        {
+            // insEncodeDatasizeLS is not quite right for this case.
+            // So just specialize it.
+            if (ins == INS_ldr)
+            {
+                // set the operation size in bit 30
+                code |= 0x40000000;
+            }
+            // Low 3 bits should be 0 -- 8 byte JIT data should be aligned on 8 byte.
+            assert((imm & 7) == 0);
+            imm >>= 3;
+        }
+        else
+        {
+            assert(opSize == EA_4BYTE);
+            // Low 2 bits should be 0 -- 4 byte aligned data.
+            assert((imm & 3) == 0);
+            imm >>= 2;
+        }
+
+        code |= insEncodeReg_Rt(reg); // ttttt
+        code |= insEncodeReg_Rn(reg); // nnnnn
+        code |= imm << 10;
+    }
+    else
+    {
+        assert(!"Unknown fmt for emitOutputShortConstant");
+    }
+
+    dst += emitOutput_Instr(dst, code);
+
+    return dst;
+}
+/*****************************************************************************
+ *
+ *  Output a call instruction.
+ */
+
+unsigned emitter::emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* id, code_t code)
+{
+    const unsigned char callInstrSize = sizeof(code_t); // 4 bytes
+    regMaskTP           gcrefRegs;
+    regMaskTP           byrefRegs;
+
+    VARSET_TP VARSET_INIT_NOCOPY(GCvars, VarSetOps::UninitVal());
+
+    // Is this a "fat" call descriptor?
+    if (id->idIsLargeCall())
+    {
+        instrDescCGCA* idCall = (instrDescCGCA*)id;
+        gcrefRegs             = idCall->idcGcrefRegs;
+        byrefRegs             = idCall->idcByrefRegs;
+        VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars);
+    }
+    else
+    {
+        assert(!id->idIsLargeDsp());
+        assert(!id->idIsLargeCns());
+
+        gcrefRegs = emitDecodeCallGCregs(id);
+        byrefRegs = 0;
+        VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp));
+    }
+
+    /* We update the GC info before the call as the variables cannot be
+        used by the call. Killing variables before the call helps with
+        boundary conditions if the call is CORINFO_HELP_THROW - see bug 50029.
+        If we ever track aliased variables (which could be used by the
+        call), we would have to keep them alive past the call. */
+
+    emitUpdateLiveGCvars(GCvars, dst);
+
+    // Now output the call instruction and update the 'dst' pointer
+    //
+    unsigned outputInstrSize = emitOutput_Instr(dst, code);
+    dst += outputInstrSize;
+
+    // All call instructions are 4-byte in size on ARM64
+    //
+    assert(outputInstrSize == callInstrSize);
+
+    // If the method returns a GC ref, mark INTRET (R0) appropriately.
+    if (id->idGCref() == GCT_GCREF)
+    {
+        gcrefRegs |= RBM_INTRET;
+    }
+    else if (id->idGCref() == GCT_BYREF)
+    {
+        byrefRegs |= RBM_INTRET;
+    }
+
+    // If is a multi-register return method is called, mark INTRET_1 (X1) appropriately
+    if (id->idIsLargeCall())
+    {
+        instrDescCGCA* idCall = (instrDescCGCA*)id;
+        if (idCall->idSecondGCref() == GCT_GCREF)
+        {
+            gcrefRegs |= RBM_INTRET_1;
+        }
+        else if (idCall->idSecondGCref() == GCT_BYREF)
+        {
+            byrefRegs |= RBM_INTRET_1;
+        }
+    }
+
+    // If the GC register set has changed, report the new set.
+    if (gcrefRegs != emitThisGCrefRegs)
+    {
+        emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst);
+    }
+    // If the Byref register set has changed, report the new set.
+    if (byrefRegs != emitThisByrefRegs)
+    {
+        emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, dst);
+    }
+
+    // Some helper calls may be marked as not requiring GC info to be recorded.
+    if ((!id->idIsNoGC()))
+    {
+        // On ARM64, as on AMD64, we don't change the stack pointer to push/pop args.
+        // So we're not really doing a "stack pop" here (note that "args" is 0), but we use this mechanism
+        // to record the call for GC info purposes.  (It might be best to use an alternate call,
+        // and protect "emitStackPop" under the EMIT_TRACK_STACK_DEPTH preprocessor variable.)
+        emitStackPop(dst, /*isCall*/ true, callInstrSize, /*args*/ 0);
+
+        // Do we need to record a call location for GC purposes?
+        //
+        if (!emitFullGCinfo)
+        {
+            emitRecordGCcall(dst, callInstrSize);
+        }
+    }
+    return callInstrSize;
+}
+
+/*****************************************************************************
+ *
+ *  Emit a 32-bit Arm64 instruction
+ */
+
+/*static*/ unsigned emitter::emitOutput_Instr(BYTE* dst, code_t code)
+{
+    assert(sizeof(code_t) == 4);
+    *((code_t*)dst) = code;
+
+    return sizeof(code_t);
+}
+
+/*****************************************************************************
+*
+ *  Append the machine code corresponding to the given instruction descriptor
+ *  to the code block at '*dp'; the base of the code block is 'bp', and 'ig'
+ *  is the instruction group that contains the instruction. Updates '*dp' to
+ *  point past the generated code, and returns the size of the instruction
+ *  descriptor in bytes.
+ */
+
+size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
+{
+    BYTE*         dst  = *dp;
+    BYTE*         odst = dst;
+    code_t        code = 0;
+    size_t        sz   = emitGetInstrDescSize(id); // TODO-ARM64-Cleanup: on ARM, this is set in each case. why?
+    instruction   ins  = id->idIns();
+    insFormat     fmt  = id->idInsFmt();
+    emitAttr      size = id->idOpSize();
+    unsigned char callInstrSize = 0;
+    unsigned      condcode;
+
+#ifdef DEBUG
+#if DUMP_GC_TABLES
+    bool dspOffs = emitComp->opts.dspGCtbls;
+#else
+    bool dspOffs = !emitComp->opts.disDiffable;
+#endif
+#endif // DEBUG
+
+    assert(REG_NA == (int)REG_NA);
+
+    VARSET_TP VARSET_INIT_NOCOPY(GCvars, VarSetOps::UninitVal());
+
+    /* What instruction format have we got? */
+
+    switch (fmt)
+    {
+        ssize_t  imm;
+        ssize_t  index;
+        ssize_t  index2;
+        unsigned scale;
+        unsigned cmode;
+        unsigned immShift;
+        bool     hasShift;
+        emitAttr extSize;
+        emitAttr elemsize;
+        emitAttr datasize;
+
+        case IF_BI_0A: // BI_0A   ......iiiiiiiiii iiiiiiiiiiiiiiii               simm26:00
+        case IF_BI_0B: // BI_0B   ......iiiiiiiiii iiiiiiiiiii.....               simm19:00
+        case IF_LARGEJMP:
+            assert(id->idGCref() == GCT_NONE);
+            assert(id->idIsBound());
+            dst = emitOutputLJ(ig, dst, id);
+            sz  = sizeof(instrDescJmp);
+            break;
+
+        case IF_BI_0C: // BI_0C   ......iiiiiiiiii iiiiiiiiiiiiiiii               simm26:00
+            code = emitInsCode(ins, fmt);
+            sz   = id->idIsLargeCall() ? sizeof(instrDescCGCA) : sizeof(instrDesc);
+            dst += emitOutputCall(ig, dst, id, code);
+            // Always call RecordRelocation so that we wire in a JumpStub when we don't reach
+            emitRecordRelocation(odst, id->idAddr()->iiaAddr, IMAGE_REL_ARM64_BRANCH26);
+            break;
+
+        case IF_BI_1A: // BI_1A   ......iiiiiiiiii iiiiiiiiiiittttt      Rt       simm19:00
+            assert(insOptsNone(id->idInsOpt()));
+            assert(id->idIsBound());
+
+            dst = emitOutputLJ(ig, dst, id);
+            sz  = sizeof(instrDescJmp);
+            break;
+
+        case IF_BI_1B: // BI_1B   B.......bbbbbiii iiiiiiiiiiittttt      Rt imm6, simm14:00
+            assert(insOptsNone(id->idInsOpt()));
+            assert(id->idIsBound());
+
+            dst = emitOutputLJ(ig, dst, id);
+            sz  = sizeof(instrDescJmp);
+            break;
+
+        case IF_BR_1A: // BR_1A   ................ ......nnnnn.....         Rn
+            assert(insOptsNone(id->idInsOpt()));
+            assert((ins == INS_ret) || (ins == INS_br));
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeReg_Rn(id->idReg1()); // nnnnn
+
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_BR_1B: // BR_1B   ................ ......nnnnn.....         Rn
+            assert(insOptsNone(id->idInsOpt()));
+            assert((ins == INS_br_tail) || (ins == INS_blr));
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeReg_Rn(id->idReg3()); // nnnnn
+
+            sz = id->idIsLargeCall() ? sizeof(instrDescCGCA) : sizeof(instrDesc);
+            dst += emitOutputCall(ig, dst, id, code);
+            break;
+
+        case IF_LS_1A: // LS_1A   XX...V..iiiiiiii iiiiiiiiiiittttt      Rt    PC imm(1MB)
+        case IF_LARGELDC:
+            assert(insOptsNone(id->idInsOpt()));
+            assert(id->idIsBound());
+
+            dst = emitOutputLJ(ig, dst, id);
+            sz  = sizeof(instrDescJmp);
+            break;
+
+        case IF_LS_2A: // LS_2A   .X.......X...... ......nnnnnttttt      Rt Rn
+            assert(insOptsNone(id->idInsOpt()));
+            code = emitInsCode(ins, fmt);
+            // Is the target a vector register?
+            if (isVectorRegister(id->idReg1()))
+            {
+                code &= 0x3FFFFFFF;                                 // clear the size bits
+                code |= insEncodeDatasizeVLS(code, id->idOpSize()); // XX
+                code |= insEncodeReg_Vt(id->idReg1());              // ttttt
+            }
+            else
+            {
+                code |= insEncodeDatasizeLS(code, id->idOpSize()); // .X.......X
+                code |= insEncodeReg_Rt(id->idReg1());             // ttttt
+            }
+            code |= insEncodeReg_Rn(id->idReg2()); // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_LS_2B: // LS_2B   .X.......Xiiiiii iiiiiinnnnnttttt      Rt Rn    imm(0-4095)
+            assert(insOptsNone(id->idInsOpt()));
+            imm = emitGetInsSC(id);
+            assert(isValidUimm12(imm));
+            code = emitInsCode(ins, fmt);
+            // Is the target a vector register?
+            if (isVectorRegister(id->idReg1()))
+            {
+                code &= 0x3FFFFFFF;                                 // clear the size bits
+                code |= insEncodeDatasizeVLS(code, id->idOpSize()); // XX
+                code |= insEncodeReg_Vt(id->idReg1());              // ttttt
+            }
+            else
+            {
+                code |= insEncodeDatasizeLS(code, id->idOpSize()); // .X.......X
+                code |= insEncodeReg_Rt(id->idReg1());             // ttttt
+            }
+            code |= ((code_t)imm << 10);           // iiiiiiiiiiii
+            code |= insEncodeReg_Rn(id->idReg2()); // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_LS_2C: // LS_2C   .X.......X.iiiii iiiiPPnnnnnttttt      Rt Rn    imm(-256..+255) no/pre/post inc
+            assert(insOptsNone(id->idInsOpt()) || insOptsIndexed(id->idInsOpt()));
+            imm = emitGetInsSC(id);
+            assert((imm >= -256) && (imm <= 255)); // signed 9 bits
+            imm &= 0x1ff;                          // force into unsigned 9 bit representation
+            code = emitInsCode(ins, fmt);
+            // Is the target a vector register?
+            if (isVectorRegister(id->idReg1()))
+            {
+                code &= 0x3FFFFFFF;                                 // clear the size bits
+                code |= insEncodeDatasizeVLS(code, id->idOpSize()); // XX
+                code |= insEncodeReg_Vt(id->idReg1());              // ttttt
+            }
+            else
+            {
+                code |= insEncodeDatasizeLS(code, id->idOpSize()); // .X.......X
+                code |= insEncodeReg_Rt(id->idReg1());             // ttttt
+            }
+            code |= insEncodeIndexedOpt(id->idInsOpt()); // PP
+            code |= ((code_t)imm << 12);                 // iiiiiiiii
+            code |= insEncodeReg_Rn(id->idReg2());       // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_LS_3A: // LS_3A   .X.......X.mmmmm oooS..nnnnnttttt      Rt Rn Rm ext(Rm) LSL {}
+            assert(insOptsLSExtend(id->idInsOpt()));
+            code = emitInsCode(ins, fmt);
+            // Is the target a vector register?
+            if (isVectorRegister(id->idReg1()))
+            {
+                code &= 0x3FFFFFFF;                                 // clear the size bits
+                code |= insEncodeDatasizeVLS(code, id->idOpSize()); // XX
+                code |= insEncodeReg_Vt(id->idReg1());              // ttttt
+            }
+            else
+            {
+                code |= insEncodeDatasizeLS(code, id->idOpSize()); // .X.......X
+                code |= insEncodeReg_Rt(id->idReg1());             // ttttt
+            }
+            code |= insEncodeExtend(id->idInsOpt()); // ooo
+            code |= insEncodeReg_Rn(id->idReg2());   // nnnnn
+            if (id->idIsLclVar())
+            {
+                code |= insEncodeReg_Rm(codeGen->rsGetRsvdReg()); // mmmmm
+            }
+            else
+            {
+                code |= insEncodeReg3Scale(id->idReg3Scaled()); // S
+                code |= insEncodeReg_Rm(id->idReg3());          // mmmmm
+            }
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_LS_3B: // LS_3B   X............... .aaaaannnnnddddd      Rd Ra Rn
+            assert(insOptsNone(id->idInsOpt()));
+            code = emitInsCode(ins, fmt);
+            // Is the target a vector register?
+            if (isVectorRegister(id->idReg1()))
+            {
+                code &= 0x3FFFFFFF;                                  // clear the size bits
+                code |= insEncodeDatasizeVPLS(code, id->idOpSize()); // XX
+                code |= insEncodeReg_Vt(id->idReg1());               // ttttt
+                code |= insEncodeReg_Va(id->idReg2());               // aaaaa
+            }
+            else
+            {
+                code |= insEncodeDatasize(id->idOpSize()); // X
+                code |= insEncodeReg_Rt(id->idReg1());     // ttttt
+                code |= insEncodeReg_Ra(id->idReg2());     // aaaaa
+            }
+            code |= insEncodeReg_Rn(id->idReg3()); // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_LS_3C: // LS_3C   X......PP.iiiiii iaaaaannnnnddddd      Rd Ra Rn imm(im7,sh)
+            assert(insOptsNone(id->idInsOpt()) || insOptsIndexed(id->idInsOpt()));
+            imm = emitGetInsSC(id);
+            assert((imm >= -64) && (imm <= 63)); // signed 7 bits
+            imm &= 0x7f;                         // force into unsigned 7 bit representation
+            code = emitInsCode(ins, fmt);
+            // Is the target a vector register?
+            if (isVectorRegister(id->idReg1()))
+            {
+                code &= 0x3FFFFFFF;                                  // clear the size bits
+                code |= insEncodeDatasizeVPLS(code, id->idOpSize()); // XX
+                code |= insEncodeReg_Vt(id->idReg1());               // ttttt
+                code |= insEncodeReg_Va(id->idReg2());               // aaaaa
+            }
+            else
+            {
+                code |= insEncodeDatasize(id->idOpSize()); // X
+                code |= insEncodeReg_Rt(id->idReg1());     // ttttt
+                code |= insEncodeReg_Ra(id->idReg2());     // aaaaa
+            }
+            code |= insEncodePairIndexedOpt(ins, id->idInsOpt()); // PP
+            code |= ((code_t)imm << 15);                          // iiiiiiiii
+            code |= insEncodeReg_Rn(id->idReg3());                // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DI_1A: // DI_1A   X.......shiiiiii iiiiiinnnnn.....         Rn    imm(i12,sh)
+            assert(insOptsNone(id->idInsOpt()) || insOptsLSL12(id->idInsOpt()));
+            imm = emitGetInsSC(id);
+            assert(isValidUimm12(imm));
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeDatasize(id->idOpSize());   // X
+            code |= insEncodeShiftImm12(id->idInsOpt()); // sh
+            code |= ((code_t)imm << 10);                 // iiiiiiiiiiii
+            code |= insEncodeReg_Rn(id->idReg1());       // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DI_1B: // DI_1B   X........hwiiiii iiiiiiiiiiiddddd      Rd       imm(i16,hw)
+            imm = emitGetInsSC(id);
+            assert(isValidImmHWVal(imm, id->idOpSize()));
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeDatasize(id->idOpSize()); // X
+            code |= ((code_t)imm << 5);                // hwiiiii iiiiiiiiiii
+            code |= insEncodeReg_Rd(id->idReg1());     // ddddd
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DI_1C: // DI_1C   X........Nrrrrrr ssssssnnnnn.....         Rn    imm(N,r,s)
+            imm = emitGetInsSC(id);
+            assert(isValidImmNRS(imm, id->idOpSize()));
+            code = emitInsCode(ins, fmt);
+            code |= ((code_t)imm << 10);               // Nrrrrrrssssss
+            code |= insEncodeDatasize(id->idOpSize()); // X
+            code |= insEncodeReg_Rn(id->idReg1());     // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DI_1D: // DI_1D   X........Nrrrrrr ssssss.....ddddd      Rd       imm(N,r,s)
+            imm = emitGetInsSC(id);
+            assert(isValidImmNRS(imm, id->idOpSize()));
+            code = emitInsCode(ins, fmt);
+            code |= ((code_t)imm << 10);               // Nrrrrrrssssss
+            code |= insEncodeDatasize(id->idOpSize()); // X
+            code |= insEncodeReg_Rd(id->idReg1());     // ddddd
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DI_1E: // DI_1E   .ii.....iiiiiiii iiiiiiiiiiiddddd      Rd       simm21
+        case IF_LARGEADR:
+            assert(insOptsNone(id->idInsOpt()));
+            if (id->idIsReloc())
+            {
+                code = emitInsCode(ins, fmt);
+                code |= insEncodeReg_Rd(id->idReg1()); // ddddd
+                dst += emitOutput_Instr(dst, code);
+                emitRecordRelocation(odst, id->idAddr()->iiaAddr, IMAGE_REL_ARM64_PAGEBASE_REL21);
+            }
+            else
+            {
+                // Local jmp/load case which does not need a relocation.
+                assert(id->idIsBound());
+                dst = emitOutputLJ(ig, dst, id);
+            }
+            sz = sizeof(instrDescJmp);
+            break;
+
+        case IF_DI_1F: // DI_1F   X..........iiiii cccc..nnnnn.nzcv      Rn imm5  nzcv cond
+            imm = emitGetInsSC(id);
+            assert(isValidImmCondFlagsImm5(imm));
+            {
+                condFlagsImm cfi;
+                cfi.immCFVal = (unsigned)imm;
+                code         = emitInsCode(ins, fmt);
+                code |= insEncodeDatasize(id->idOpSize()); // X
+                code |= insEncodeReg_Rn(id->idReg1());     // nnnnn
+                code |= ((code_t)cfi.imm5 << 16);          // iiiii
+                code |= insEncodeFlags(cfi.flags);         // nzcv
+                code |= insEncodeCond(cfi.cond);           // cccc
+                dst += emitOutput_Instr(dst, code);
+            }
+            break;
+
+        case IF_DI_2A: // DI_2A   X.......shiiiiii iiiiiinnnnnddddd      Rd Rn    imm(i12,sh)
+            assert(insOptsNone(id->idInsOpt()) || insOptsLSL12(id->idInsOpt()));
+            imm = emitGetInsSC(id);
+            assert(isValidUimm12(imm));
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeDatasize(id->idOpSize());   // X
+            code |= insEncodeShiftImm12(id->idInsOpt()); // sh
+            code |= ((code_t)imm << 10);                 // iiiiiiiiiiii
+            code |= insEncodeReg_Rd(id->idReg1());       // ddddd
+            code |= insEncodeReg_Rn(id->idReg2());       // nnnnn
+            dst += emitOutput_Instr(dst, code);
+
+            if (id->idIsReloc())
+            {
+                assert(sz == sizeof(instrDesc));
+                assert(id->idAddr()->iiaAddr != nullptr);
+                emitRecordRelocation(odst, id->idAddr()->iiaAddr, IMAGE_REL_ARM64_PAGEOFFSET_12A);
+            }
+            break;
+
+        case IF_DI_2B: // DI_2B   X.........Xnnnnn ssssssnnnnnddddd      Rd Rn    imm(0-63)
+            code = emitInsCode(ins, fmt);
+            imm  = emitGetInsSC(id);
+            assert(isValidImmShift(imm, id->idOpSize()));
+            code |= insEncodeDatasizeBF(code, id->idOpSize()); // X........X
+            code |= insEncodeReg_Rd(id->idReg1());             // ddddd
+            code |= insEncodeReg_Rn(id->idReg2());             // nnnnn
+            code |= insEncodeReg_Rm(id->idReg2());             // Reg2 also in mmmmm
+            code |= insEncodeShiftCount(imm, id->idOpSize());  // ssssss
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DI_2C: // DI_2C   X........Nrrrrrr ssssssnnnnnddddd      Rd Rn    imm(N,r,s)
+            imm = emitGetInsSC(id);
+            assert(isValidImmNRS(imm, id->idOpSize()));
+            code = emitInsCode(ins, fmt);
+            code |= ((code_t)imm << 10);               // Nrrrrrrssssss
+            code |= insEncodeDatasize(id->idOpSize()); // X
+            code |= insEncodeReg_Rd(id->idReg1());     // ddddd
+            code |= insEncodeReg_Rn(id->idReg2());     // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DI_2D: // DI_2D   X........Nrrrrrr ssssssnnnnnddddd      Rd Rn    imr, imms   (N,r,s)
+            if (ins == INS_asr || ins == INS_lsl || ins == INS_lsr)
+            {
+                imm = emitGetInsSC(id);
+                assert(isValidImmShift(imm, id->idOpSize()));
+
+                // Shift immediates are aliases of the SBFM/UBFM instructions
+                // that actually take 2 registers and 2 constants,
+                // Since we stored the shift immediate value
+                // we need to calculate the N,R and S values here.
+
+                bitMaskImm bmi;
+                bmi.immNRS = 0;
+
+                bmi.immN = (size == EA_8BYTE) ? 1 : 0;
+                bmi.immR = imm;
+                bmi.immS = (size == EA_8BYTE) ? 0x3f : 0x1f;
+
+                // immR and immS are now set correctly for INS_asr and INS_lsr
+                // but for INS_lsl we have to adjust the values for immR and immS
+                //
+                if (ins == INS_lsl)
+                {
+                    bmi.immR = -imm & bmi.immS;
+                    bmi.immS = bmi.immS - imm;
+                }
+
+                // setup imm with the proper 13 bit value N:R:S
+                //
+                imm = bmi.immNRS;
+            }
+            else
+            {
+                // The other instructions have already have encoded N,R and S values
+                imm = emitGetInsSC(id);
+            }
+            assert(isValidImmNRS(imm, id->idOpSize()));
+
+            code = emitInsCode(ins, fmt);
+            code |= ((code_t)imm << 10);               // Nrrrrrrssssss
+            code |= insEncodeDatasize(id->idOpSize()); // X
+            code |= insEncodeReg_Rd(id->idReg1());     // ddddd
+            code |= insEncodeReg_Rn(id->idReg2());     // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DR_1D: // DR_1D   X............... cccc.......ddddd      Rd       cond
+            imm = emitGetInsSC(id);
+            assert(isValidImmCond(imm));
+            {
+                condFlagsImm cfi;
+                cfi.immCFVal = (unsigned)imm;
+                code         = emitInsCode(ins, fmt);
+                code |= insEncodeDatasize(id->idOpSize()); // X
+                code |= insEncodeReg_Rd(id->idReg1());     // ddddd
+                code |= insEncodeInvertedCond(cfi.cond);   // cccc
+                dst += emitOutput_Instr(dst, code);
+            }
+            break;
+
+        case IF_DR_2A: // DR_2A   X..........mmmmm ......nnnnn.....         Rn Rm
+            assert(insOptsNone(id->idInsOpt()));
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeDatasize(id->idOpSize()); // X
+            code |= insEncodeReg_Rn(id->idReg1());     // nnnnn
+            code |= insEncodeReg_Rm(id->idReg2());     // mmmmm
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DR_2B: // DR_2B   X.......sh.mmmmm ssssssnnnnn.....         Rn Rm {LSL,LSR,ASR,ROR} imm(0-63)
+            code = emitInsCode(ins, fmt);
+            imm  = emitGetInsSC(id);
+            assert(isValidImmShift(imm, id->idOpSize()));
+            code |= insEncodeDatasize(id->idOpSize());        // X
+            code |= insEncodeShiftType(id->idInsOpt());       // sh
+            code |= insEncodeShiftCount(imm, id->idOpSize()); // ssssss
+            code |= insEncodeReg_Rn(id->idReg1());            // nnnnn
+            code |= insEncodeReg_Rm(id->idReg2());            // mmmmm
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DR_2C: // DR_2C   X..........mmmmm ooosssnnnnn.....         Rn Rm ext(Rm) LSL imm(0-4)
+            code = emitInsCode(ins, fmt);
+            imm  = emitGetInsSC(id);
+            assert((imm >= 0) && (imm <= 4));          // imm [0..4]
+            code |= insEncodeDatasize(id->idOpSize()); // X
+            code |= insEncodeExtend(id->idInsOpt());   // ooo
+            code |= insEncodeExtendScale(imm);         // sss
+            code |= insEncodeReg_Rn(id->idReg1());     // nnnnn
+            code |= insEncodeReg_Rm(id->idReg2());     // mmmmm
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DR_2D: // DR_2D   X..........nnnnn cccc..nnnnnddddd      Rd Rn    cond
+            imm = emitGetInsSC(id);
+            assert(isValidImmCond(imm));
+            {
+                condFlagsImm cfi;
+                cfi.immCFVal = (unsigned)imm;
+                code         = emitInsCode(ins, fmt);
+                code |= insEncodeDatasize(id->idOpSize()); // X
+                code |= insEncodeReg_Rd(id->idReg1());     // ddddd
+                code |= insEncodeReg_Rn(id->idReg2());     // nnnnn
+                code |= insEncodeReg_Rm(id->idReg2());     // mmmmm
+                code |= insEncodeInvertedCond(cfi.cond);   // cccc
+                dst += emitOutput_Instr(dst, code);
+            }
+            break;
+
+        case IF_DR_2E: // DR_2E   X..........mmmmm ...........ddddd      Rd    Rm
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeDatasize(id->idOpSize()); // X
+            code |= insEncodeReg_Rd(id->idReg1());     // ddddd
+            code |= insEncodeReg_Rm(id->idReg2());     // mmmmm
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DR_2F: // DR_2F   X.......sh.mmmmm ssssss.....ddddd      Rd    Rm {LSL,LSR,ASR} imm(0-63)
+            code = emitInsCode(ins, fmt);
+            imm  = emitGetInsSC(id);
+            assert(isValidImmShift(imm, id->idOpSize()));
+            code |= insEncodeDatasize(id->idOpSize());        // X
+            code |= insEncodeShiftType(id->idInsOpt());       // sh
+            code |= insEncodeShiftCount(imm, id->idOpSize()); // ssssss
+            code |= insEncodeReg_Rd(id->idReg1());            // ddddd
+            code |= insEncodeReg_Rm(id->idReg2());            // mmmmm
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DR_2G: // DR_2G   X............... .....xnnnnnddddd      Rd Rn
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeDatasize(id->idOpSize()); // X
+            if (ins == INS_rev)
+            {
+                if (size == EA_8BYTE)
+                {
+                    code |= 0x00000400; // x - bit at location 10
+                }
+            }
+            code |= insEncodeReg_Rd(id->idReg1()); // ddddd
+            code |= insEncodeReg_Rn(id->idReg2()); // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DR_2H: // DR_2H   X........X...... ......nnnnnddddd      Rd Rn
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeDatasizeBF(code, id->idOpSize()); // X........X
+            code |= insEncodeReg_Rd(id->idReg1());             // ddddd
+            code |= insEncodeReg_Rn(id->idReg2());             // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DR_2I: // DR_2I   X..........mmmmm cccc..nnnnn.nzcv      Rn Rm    nzcv cond
+            imm = emitGetInsSC(id);
+            assert(isValidImmCondFlags(imm));
+            {
+                condFlagsImm cfi;
+                cfi.immCFVal = (unsigned)imm;
+                code         = emitInsCode(ins, fmt);
+                code |= insEncodeDatasize(id->idOpSize()); // X
+                code |= insEncodeReg_Rn(id->idReg1());     // nnnnn
+                code |= insEncodeReg_Rm(id->idReg2());     // mmmmm
+                code |= insEncodeFlags(cfi.flags);         // nzcv
+                code |= insEncodeCond(cfi.cond);           // cccc
+                dst += emitOutput_Instr(dst, code);
+            }
+            break;
+
+        case IF_DR_3A: // DR_3A   X..........mmmmm ......nnnnnmmmmm      Rd Rn Rm
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeDatasize(id->idOpSize()); // X
+            code |= insEncodeReg_Rd(id->idReg1());     // ddddd
+            code |= insEncodeReg_Rn(id->idReg2());     // nnnnn
+            if (id->idIsLclVar())
+            {
+                code |= insEncodeReg_Rm(codeGen->rsGetRsvdReg()); // mmmmm
+            }
+            else
+            {
+                code |= insEncodeReg_Rm(id->idReg3()); // mmmmm
+            }
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DR_3B: // DR_3B   X.......sh.mmmmm ssssssnnnnnddddd      Rd Rn Rm {LSL,LSR,ASR} imm(0-63)
+            code = emitInsCode(ins, fmt);
+            imm  = emitGetInsSC(id);
+            assert(isValidImmShift(imm, id->idOpSize()));
+            code |= insEncodeDatasize(id->idOpSize());        // X
+            code |= insEncodeReg_Rd(id->idReg1());            // ddddd
+            code |= insEncodeReg_Rn(id->idReg2());            // nnnnn
+            code |= insEncodeReg_Rm(id->idReg3());            // mmmmm
+            code |= insEncodeShiftType(id->idInsOpt());       // sh
+            code |= insEncodeShiftCount(imm, id->idOpSize()); // ssssss
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DR_3C: // DR_3C   X..........mmmmm ooosssnnnnnddddd      Rd Rn Rm ext(Rm) LSL imm(0-4)
+            code = emitInsCode(ins, fmt);
+            imm  = emitGetInsSC(id);
+            assert((imm >= 0) && (imm <= 4));          // imm [0..4]
+            code |= insEncodeDatasize(id->idOpSize()); // X
+            code |= insEncodeExtend(id->idInsOpt());   // ooo
+            code |= insEncodeExtendScale(imm);         // sss
+            code |= insEncodeReg_Rd(id->idReg1());     // ddddd
+            code |= insEncodeReg_Rn(id->idReg2());     // nnnnn
+            code |= insEncodeReg_Rm(id->idReg3());     // mmmmm
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DR_3D: // DR_3D   X..........mmmmm cccc..nnnnnddddd      Rd Rn Rm cond
+            imm = emitGetInsSC(id);
+            assert(isValidImmCond(imm));
+            {
+                condFlagsImm cfi;
+                cfi.immCFVal = (unsigned)imm;
+                code         = emitInsCode(ins, fmt);
+                code |= insEncodeDatasize(id->idOpSize()); // X
+                code |= insEncodeReg_Rd(id->idReg1());     // ddddd
+                code |= insEncodeReg_Rn(id->idReg2());     // nnnnn
+                code |= insEncodeReg_Rm(id->idReg3());     // mmmmm
+                code |= insEncodeCond(cfi.cond);           // cccc
+                dst += emitOutput_Instr(dst, code);
+            }
+            break;
+
+        case IF_DR_3E: // DR_3E   X........X.mmmmm ssssssnnnnnddddd      Rd Rn Rm imm(0-63)
+            code = emitInsCode(ins, fmt);
+            imm  = emitGetInsSC(id);
+            assert(isValidImmShift(imm, id->idOpSize()));
+            code |= insEncodeDatasizeBF(code, id->idOpSize()); // X........X
+            code |= insEncodeReg_Rd(id->idReg1());             // ddddd
+            code |= insEncodeReg_Rn(id->idReg2());             // nnnnn
+            code |= insEncodeReg_Rm(id->idReg3());             // mmmmm
+            code |= insEncodeShiftCount(imm, id->idOpSize());  // ssssss
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DR_4A: // DR_4A   X..........mmmmm .aaaaannnnnmmmmm      Rd Rn Rm Ra
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeDatasize(id->idOpSize()); // X
+            code |= insEncodeReg_Rd(id->idReg1());     // ddddd
+            code |= insEncodeReg_Rn(id->idReg2());     // nnnnn
+            code |= insEncodeReg_Rm(id->idReg3());     // mmmmm
+            code |= insEncodeReg_Ra(id->idReg4());     // aaaaa
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DV_1A: // DV_1A   .........X.iiiii iii........ddddd      Vd imm8    (fmov - immediate scalar)
+            imm      = emitGetInsSC(id);
+            elemsize = id->idOpSize();
+            code     = emitInsCode(ins, fmt);
+            code |= insEncodeFloatElemsize(elemsize); // X
+            code |= ((code_t)imm << 13);              // iiiii iii
+            code |= insEncodeReg_Vd(id->idReg1());    // ddddd
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DV_1B: // DV_1B   .QX..........iii cmod..iiiiiddddd      Vd imm8    (immediate vector)
+            imm      = emitGetInsSC(id) & 0x0ff;
+            immShift = (emitGetInsSC(id) & 0x700) >> 8;
+            elemsize = optGetElemsize(id->idInsOpt());
+            cmode    = 0;
+            switch (elemsize)
+            { // cmode
+                case EA_1BYTE:
+                    cmode = 0xE; // 1110
+                    break;
+                case EA_2BYTE:
+                    cmode = 0x8;
+                    cmode |= (immShift << 1); // 10x0
+                    break;
+                case EA_4BYTE:
+                    if (immShift < 4)
+                    {
+                        cmode = 0x0;
+                        cmode |= (immShift << 1); // 0xx0
+                    }
+                    else // MSL
+                    {
+                        cmode = 0xC;
+                        if (immShift & 2)
+                            cmode |= 1; // 110x
+                    }
+                    break;
+                case EA_8BYTE:
+                    cmode = 0xE; // 1110
+                    break;
+                default:
+                    // TODO-Cleanup: add unreached() here
+                    break;
+            }
+
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeVectorsize(id->idOpSize()); // Q
+            if ((ins == INS_fmov) || (ins == INS_movi))
+            {
+                if (elemsize == EA_8BYTE)
+                {
+                    code |= 0x20000000; // X
+                }
+            }
+            if (ins != INS_fmov)
+            {
+                assert((cmode >= 0) && (cmode <= 0xF));
+                code |= (cmode << 12); // cmod
+            }
+            code |= (((code_t)imm >> 5) << 16);    // iii
+            code |= (((code_t)imm & 0x1f) << 5);   // iiiii
+            code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DV_1C: // DV_1C   .........X...... ......nnnnn.....      Vn #0.0    (fcmp - with zero)
+            elemsize = id->idOpSize();
+            code     = emitInsCode(ins, fmt);
+            code |= insEncodeFloatElemsize(elemsize); // X
+            code |= insEncodeReg_Vn(id->idReg1());    // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DV_2A: // DV_2A   .Q.......X...... ......nnnnnddddd      Vd Vn      (fabs, fcvt - vector)
+            elemsize = optGetElemsize(id->idInsOpt());
+            code     = emitInsCode(ins, fmt);
+            code |= insEncodeVectorsize(id->idOpSize()); // Q
+            code |= insEncodeFloatElemsize(elemsize);    // X
+            code |= insEncodeReg_Vd(id->idReg1());       // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());       // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DV_2B: // DV_2B   .Q.........iiiii ......nnnnnddddd      Rd Vn[] (umov/smov    - to general)
+            elemsize = id->idOpSize();
+            index    = emitGetInsSC(id);
+            datasize = (elemsize == EA_8BYTE) ? EA_16BYTE : EA_8BYTE;
+            if (ins == INS_smov)
+            {
+                datasize = EA_16BYTE;
+            }
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeVectorsize(datasize);         // Q
+            code |= insEncodeVectorIndex(elemsize, index); // iiiii
+            code |= insEncodeReg_Rd(id->idReg1());         // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());         // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DV_2C: // DV_2C   .Q.........iiiii ......nnnnnddddd      Vd Rn   (dup/ins - vector from general)
+            if (ins == INS_dup)
+            {
+                datasize = id->idOpSize();
+                elemsize = optGetElemsize(id->idInsOpt());
+                index    = 0;
+            }
+            else // INS_ins
+            {
+                datasize = EA_16BYTE;
+                elemsize = id->idOpSize();
+                index    = emitGetInsSC(id);
+            }
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeVectorsize(datasize);         // Q
+            code |= insEncodeVectorIndex(elemsize, index); // iiiii
+            code |= insEncodeReg_Vd(id->idReg1());         // ddddd
+            code |= insEncodeReg_Rn(id->idReg2());         // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DV_2D: // DV_2D   .Q.........iiiii ......nnnnnddddd      Vd Vn[]   (dup - vector)
+            index    = emitGetInsSC(id);
+            elemsize = optGetElemsize(id->idInsOpt());
+            code     = emitInsCode(ins, fmt);
+            code |= insEncodeVectorsize(id->idOpSize());   // Q
+            code |= insEncodeVectorIndex(elemsize, index); // iiiii
+            code |= insEncodeReg_Vd(id->idReg1());         // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());         // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DV_2E: // DV_2E   ...........iiiii ......nnnnnddddd      Vd Vn[]   (dup - scalar)
+            index    = emitGetInsSC(id);
+            elemsize = id->idOpSize();
+            code     = emitInsCode(ins, fmt);
+            code |= insEncodeVectorIndex(elemsize, index); // iiiii
+            code |= insEncodeReg_Vd(id->idReg1());         // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());         // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DV_2F: // DV_2F   ...........iiiii .jjjj.nnnnnddddd      Vd[] Vn[] (ins - element)
+            elemsize = id->idOpSize();
+            imm      = emitGetInsSC(id);
+            index    = (imm >> 4) & 0xf;
+            index2   = imm & 0xf;
+            code     = emitInsCode(ins, fmt);
+            code |= insEncodeVectorIndex(elemsize, index);   // iiiii
+            code |= insEncodeVectorIndex2(elemsize, index2); // jjjj
+            code |= insEncodeReg_Vd(id->idReg1());           // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());           // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DV_2G: // DV_2G   .........X...... ......nnnnnddddd      Vd Vn      (fmov,fcvtXX - register)
+            elemsize = id->idOpSize();
+            code     = emitInsCode(ins, fmt);
+            code |= insEncodeFloatElemsize(elemsize); // X
+            code |= insEncodeReg_Vd(id->idReg1());    // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());    // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DV_2H: // DV_2H   X........X...... ......nnnnnddddd      Rd Vn      (fmov - to general)
+            elemsize = id->idOpSize();
+            code     = emitInsCode(ins, fmt);
+            code |= insEncodeConvertOpt(fmt, id->idInsOpt()); // X   X
+            code |= insEncodeReg_Rd(id->idReg1());            // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());            // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DV_2I: // DV_2I   X........X...... ......nnnnnddddd      Vd Rn      (fmov - from general)
+            elemsize = id->idOpSize();
+            code     = emitInsCode(ins, fmt);
+            code |= insEncodeConvertOpt(fmt, id->idInsOpt()); // X   X
+            code |= insEncodeReg_Vd(id->idReg1());            // ddddd
+            code |= insEncodeReg_Rn(id->idReg2());            // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DV_2J: // DV_2J   ........SS.....D D.....nnnnnddddd      Vd Vn      (fcvt)
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeConvertOpt(fmt, id->idInsOpt()); // SS DD
+            code |= insEncodeReg_Vd(id->idReg1());            // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());            // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DV_2K: // DV_2K   .........X.mmmmm ......nnnnn.....      Vn Vm      (fcmp)
+            elemsize = id->idOpSize();
+            code     = emitInsCode(ins, fmt);
+            code |= insEncodeFloatElemsize(elemsize); // X
+            code |= insEncodeReg_Vn(id->idReg1());    // nnnnn
+            code |= insEncodeReg_Vm(id->idReg2());    // mmmmm
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DV_2L: // DV_2L   ........XX...... ......nnnnnddddd      Vd Vn      (abs, neg - scalar)
+            elemsize = id->idOpSize();
+            code     = emitInsCode(ins, fmt);
+            code |= insEncodeElemsize(elemsize);   // XX
+            code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+            code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DV_2M: // DV_2M   .Q......XX...... ......nnnnnddddd      Vd Vn      (abs, neg   - vector)
+            elemsize = optGetElemsize(id->idInsOpt());
+            code     = emitInsCode(ins, fmt);
+            code |= insEncodeVectorsize(id->idOpSize()); // Q
+            code |= insEncodeElemsize(elemsize);         // XX
+            code |= insEncodeReg_Vd(id->idReg1());       // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());       // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DV_2N: // DV_2N   .........iiiiiii ......nnnnnddddd      Vd Vn imm   (shift - scalar)
+            imm  = emitGetInsSC(id);
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeVectorShift(EA_8BYTE, imm); // iiiiiii
+            code |= insEncodeReg_Vd(id->idReg1());       // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());       // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DV_2O: // DV_2O   .Q.......iiiiiii ......nnnnnddddd      Vd Vn imm   (shift - vector)
+            imm      = emitGetInsSC(id);
+            elemsize = optGetElemsize(id->idInsOpt());
+            code     = emitInsCode(ins, fmt);
+            code |= insEncodeVectorsize(id->idOpSize()); // Q
+            code |= insEncodeVectorShift(elemsize, imm); // iiiiiii
+            code |= insEncodeReg_Vd(id->idReg1());       // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());       // nnnnn
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DV_3A: // DV_3A   .Q......XX.mmmmm ......nnnnnddddd      Vd Vn Vm   (vector)
+            code     = emitInsCode(ins, fmt);
+            elemsize = optGetElemsize(id->idInsOpt());
+            code |= insEncodeVectorsize(id->idOpSize()); // Q
+            code |= insEncodeElemsize(elemsize);         // XX
+            code |= insEncodeReg_Vd(id->idReg1());       // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());       // nnnnn
+            code |= insEncodeReg_Vm(id->idReg3());       // mmmmm
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DV_3AI: // DV_3AI  .Q......XXLMmmmm ....H.nnnnnddddd      Vd Vn Vm[] (vector)
+            code     = emitInsCode(ins, fmt);
+            imm      = emitGetInsSC(id);
+            elemsize = optGetElemsize(id->idInsOpt());
+            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+            code |= insEncodeVectorsize(id->idOpSize());    // Q
+            code |= insEncodeElemsize(elemsize);            // XX
+            code |= insEncodeVectorIndexLMH(elemsize, imm); // LM H
+            code |= insEncodeReg_Vd(id->idReg1());          // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());          // nnnnn
+            code |= insEncodeReg_Vm(id->idReg3());          // mmmmm
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DV_3B: // DV_3B   .Q.......X.mmmmm ......nnnnnddddd      Vd Vn Vm   (vector)
+            code     = emitInsCode(ins, fmt);
+            elemsize = optGetElemsize(id->idInsOpt());
+            code |= insEncodeVectorsize(id->idOpSize()); // Q
+            code |= insEncodeFloatElemsize(elemsize);    // X
+            code |= insEncodeReg_Vd(id->idReg1());       // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());       // nnnnn
+            code |= insEncodeReg_Vm(id->idReg3());       // mmmmm
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DV_3BI: // DV_3BI  .Q.......XLmmmmm ....H.nnnnnddddd      Vd Vn Vm[] (vector by elem)
+            code     = emitInsCode(ins, fmt);
+            imm      = emitGetInsSC(id);
+            elemsize = optGetElemsize(id->idInsOpt());
+            assert(isValidVectorIndex(id->idOpSize(), elemsize, imm));
+            code |= insEncodeVectorsize(id->idOpSize()); // Q
+            code |= insEncodeFloatElemsize(elemsize);    // X
+            code |= insEncodeFloatIndex(elemsize, imm);  // L H
+            code |= insEncodeReg_Vd(id->idReg1());       // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());       // nnnnn
+            code |= insEncodeReg_Vm(id->idReg3());       // mmmmm
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DV_3C: // DV_3C   .Q.........mmmmm ......nnnnnddddd      Vd Vn Vm   (vector)
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeVectorsize(id->idOpSize()); // Q
+            code |= insEncodeReg_Vd(id->idReg1());       // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());       // nnnnn
+            code |= insEncodeReg_Vm(id->idReg3());       // mmmmm
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DV_3D: // DV_3D   .........X.mmmmm ......nnnnnddddd      Vd Vn Vm   (scalar)
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeFloatElemsize(id->idOpSize()); // X
+            code |= insEncodeReg_Vd(id->idReg1());          // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());          // nnnnn
+            code |= insEncodeReg_Vm(id->idReg3());          // mmmmm
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DV_3DI: // DV_3DI  .........XLmmmmm ....H.nnnnnddddd      Vd Vn Vm[] (scalar by elem)
+            code     = emitInsCode(ins, fmt);
+            imm      = emitGetInsSC(id);
+            elemsize = id->idOpSize();
+            assert(isValidVectorIndex(EA_16BYTE, elemsize, imm));
+            code |= insEncodeFloatElemsize(elemsize);   // X
+            code |= insEncodeFloatIndex(elemsize, imm); // L H
+            code |= insEncodeReg_Vd(id->idReg1());      // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());      // nnnnn
+            code |= insEncodeReg_Vm(id->idReg3());      // mmmmm
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DV_3E: // DV_3E   ...........mmmmm ......nnnnnddddd      Vd Vn Vm   (scalar)
+            code = emitInsCode(ins, fmt);
+            code |= insEncodeReg_Vd(id->idReg1()); // ddddd
+            code |= insEncodeReg_Vn(id->idReg2()); // nnnnn
+            code |= insEncodeReg_Vm(id->idReg3()); // mmmmm
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_DV_4A: // DV_4A   .........X.mmmmm .aaaaannnnnddddd      Vd Va Vn Vm (scalar)
+            code     = emitInsCode(ins, fmt);
+            elemsize = id->idOpSize();
+            code |= insEncodeFloatElemsize(elemsize); // X
+            code |= insEncodeReg_Vd(id->idReg1());    // ddddd
+            code |= insEncodeReg_Vn(id->idReg2());    // nnnnn
+            code |= insEncodeReg_Vm(id->idReg3());    // mmmmm
+            code |= insEncodeReg_Va(id->idReg4());    // aaaaa
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SN_0A: // SN_0A   ................ ................
+            code = emitInsCode(ins, fmt);
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SI_0A: // SI_0A   ...........iiiii iiiiiiiiiii.....               imm16
+            imm = emitGetInsSC(id);
+            assert(isValidUimm16(imm));
+            code = emitInsCode(ins, fmt);
+            code |= ((code_t)imm << 5); // iiiii iiiiiiiiiii
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        case IF_SI_0B: // SI_0B   ................ ....bbbb........               imm4 - barrier
+            imm = emitGetInsSC(id);
+            assert((imm >= 0) && (imm <= 15));
+            code = emitInsCode(ins, fmt);
+            code |= ((code_t)imm << 8); // bbbb
+            dst += emitOutput_Instr(dst, code);
+            break;
+
+        default:
+            assert(!"Unexpected format");
+            break;
+    }
+
+    // Determine if any registers now hold GC refs, or whether a register that was overwritten held a GC ref.
+    // We assume here that "id->idGCref()" is not GC_NONE only if the instruction described by "id" writes a
+    // GC ref to register "id->idReg1()".  (It may, apparently, also not be GC_NONE in other cases, such as
+    // for stores, but we ignore those cases here.)
+    if (emitInsMayWriteToGCReg(id)) // True if "id->idIns()" writes to a register than can hold GC ref.
+    {
+        // If we ever generate instructions that write to multiple registers,
+        // then we'd need to more work here to ensure that changes in the status of GC refs are
+        // tracked properly.
+        if (emitInsMayWriteMultipleRegs(id))
+        {
+            // INS_ldp etc...
+            // We assume that "idReg1" and "idReg2" are the destination register for all instructions
+            emitGCregDeadUpd(id->idReg1(), dst);
+            emitGCregDeadUpd(id->idReg2(), dst);
+        }
+        else
+        {
+            // We assume that "idReg1" is the destination register for all instructions
+            if (id->idGCref() != GCT_NONE)
+            {
+                emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
+            }
+            else
+            {
+                emitGCregDeadUpd(id->idReg1(), dst);
+            }
+        }
+    }
+
+    // Now we determine if the instruction has written to a (local variable) stack location, and either written a GC
+    // ref or overwritten one.
+    if (emitInsWritesToLclVarStackLoc(id))
+    {
+        int      varNum = id->idAddr()->iiaLclVar.lvaVarNum();
+        unsigned ofs    = AlignDown(id->idAddr()->iiaLclVar.lvaOffset(), sizeof(size_t));
+        bool     FPbased;
+        int      adr = emitComp->lvaFrameAddress(varNum, &FPbased);
+        if (id->idGCref() != GCT_NONE)
+        {
+            emitGCvarLiveUpd(adr + ofs, varNum, id->idGCref(), dst);
+        }
+        else
+        {
+            // If the type of the local is a gc ref type, update the liveness.
+            var_types vt;
+            if (varNum >= 0)
+            {
+                // "Regular" (non-spill-temp) local.
+                vt = var_types(emitComp->lvaTable[varNum].lvType);
+            }
+            else
+            {
+                TempDsc* tmpDsc = emitComp->tmpFindNum(varNum);
+                vt              = tmpDsc->tdTempType();
+            }
+            if (vt == TYP_REF || vt == TYP_BYREF)
+                emitGCvarDeadUpd(adr + ofs, dst);
+        }
+    }
+
+#ifdef DEBUG
+    /* Make sure we set the instruction descriptor size correctly */
+
+    size_t expected = emitSizeOfInsDsc(id);
+    assert(sz == expected);
+
+    if (emitComp->opts.disAsm || emitComp->opts.dspEmit || emitComp->verbose)
+    {
+        emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(odst), *dp, (dst - *dp), ig);
+    }
+
+    if (emitComp->compDebugBreak)
+    {
+        // For example, set JitBreakEmitOutputInstr=a6 will break when this method is called for
+        // emitting instruction a6, (i.e. IN00a6 in jitdump).
+        if ((unsigned)JitConfig.JitBreakEmitOutputInstr() == id->idDebugOnlyInfo()->idNum)
+        {
+            assert(!"JitBreakEmitOutputInstr reached");
+        }
+    }
+#endif
+
+    /* All instructions are expected to generate code */
+
+    assert(*dp != dst);
+
+    *dp = dst;
+
+    return sz;
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+#ifdef DEBUG
+
+/*****************************************************************************
+ *
+ *  Display the instruction name
+ */
+void emitter::emitDispInst(instruction ins)
+{
+    const char* insstr = codeGen->genInsName(ins);
+    size_t      len    = strlen(insstr);
+
+    /* Display the instruction name */
+
+    printf("%s", insstr);
+
+    //
+    // Add at least one space after the instruction name
+    // and add spaces until we have reach the normal size of 8
+    do
+    {
+        printf(" ");
+        len++;
+    } while (len < 8);
+}
+
+/*****************************************************************************
+ *
+ *  Display an reloc value
+ *  If we are formatting for an assembly listing don't print the hex value
+ *  since it will prevent us from doing assembly diffs
+ */
+void emitter::emitDispReloc(int value, bool addComma)
+{
+    if (emitComp->opts.disAsm)
+    {
+        printf("(reloc)");
+    }
+    else
+    {
+        printf("(reloc 0x%x)", dspPtr(value));
+    }
+
+    if (addComma)
+        printf(", ");
+}
+
+/*****************************************************************************
+ *
+ *  Display an immediate value
+ */
+void emitter::emitDispImm(ssize_t imm, bool addComma, bool alwaysHex /* =false */)
+{
+    if (strictArmAsm)
+    {
+        printf("#");
+    }
+
+    // Munge any pointers if we want diff-able disassembly
+    if (emitComp->opts.disDiffable)
+    {
+        ssize_t top44bits = (imm >> 20);
+        if ((top44bits != 0) && (top44bits != -1))
+            imm = 0xD1FFAB1E;
+    }
+
+    if (!alwaysHex && (imm > -1000) && (imm < 1000))
+    {
+        printf("%d", imm);
+    }
+    else
+    {
+        if ((imm < 0) && ((imm & 0xFFFFFFFF00000000LL) == 0xFFFFFFFF00000000LL))
+        {
+            printf("-");
+            imm = -imm;
+        }
+
+        if ((imm & 0xFFFFFFFF00000000LL) != 0)
+        {
+            printf("0x%llx", imm);
+        }
+        else
+        {
+            printf("0x%02x", imm);
+        }
+    }
+
+    if (addComma)
+        printf(", ");
+}
+
+/*****************************************************************************
+ *
+ *  Display a float zero constant
+ */
+void emitter::emitDispFloatZero()
+{
+    if (strictArmAsm)
+    {
+        printf("#");
+    }
+    printf("0.0");
+}
+
+/*****************************************************************************
+ *
+ *  Display an encoded float constant value
+ */
+void emitter::emitDispFloatImm(ssize_t imm8)
+{
+    assert((0 <= imm8) && (imm8 <= 0x0ff));
+    if (strictArmAsm)
+    {
+        printf("#");
+    }
+
+    floatImm8 fpImm;
+    fpImm.immFPIVal = (unsigned)imm8;
+    double result   = emitDecodeFloatImm8(fpImm);
+
+    printf("%.4f", result);
+}
+
+/*****************************************************************************
+ *
+ *  Display an immediate that is optionally LSL12.
+ */
+void emitter::emitDispImmOptsLSL12(ssize_t imm, insOpts opt)
+{
+    if (!strictArmAsm && insOptsLSL12(opt))
+    {
+        imm <<= 12;
+    }
+    emitDispImm(imm, false);
+    if (strictArmAsm && insOptsLSL12(opt))
+    {
+        printf(", LSL #12");
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Display an ARM64 condition code for the conditional instructions
+ */
+void emitter::emitDispCond(insCond cond)
+{
+    const static char* armCond[16] = {"eq", "ne", "hs", "lo", "mi", "pl", "vs", "vc",
+                                      "hi", "ls", "ge", "lt", "gt", "le", "AL", "NV"}; // The last two are invalid
+    unsigned imm = (unsigned)cond;
+    assert((0 <= imm) && (imm < ArrLen(armCond)));
+    printf(armCond[imm]);
+}
+
+/*****************************************************************************
+ *
+ *  Display an ARM64 flags for the conditional instructions
+ */
+void emitter::emitDispFlags(insCflags flags)
+{
+    const static char* armFlags[16] = {"0", "v",  "c",  "cv",  "z",  "zv",  "zc",  "zcv",
+                                       "n", "nv", "nc", "ncv", "nz", "nzv", "nzc", "nzcv"};
+    unsigned imm = (unsigned)flags;
+    assert((0 <= imm) && (imm < ArrLen(armFlags)));
+    printf(armFlags[imm]);
+}
+
+/*****************************************************************************
+ *
+ *  Display an ARM64 'barrier' for the memory barrier instructions
+ */
+void emitter::emitDispBarrier(insBarrier barrier)
+{
+    const static char* armBarriers[16] = {"#0", "oshld", "oshst", "osh", "#4",  "nshld", "nshst", "nsh",
+                                          "#8", "ishld", "ishst", "ish", "#12", "ld",    "st",    "sy"};
+    unsigned imm = (unsigned)barrier;
+    assert((0 <= imm) && (imm < ArrLen(armBarriers)));
+    printf(armBarriers[imm]);
+}
+
+/*****************************************************************************
+ *
+ *  Prints the encoding for the Shift Type encoding
+ */
+
+void emitter::emitDispShiftOpts(insOpts opt)
+{
+    if (opt == INS_OPTS_LSL)
+        printf(" LSL ");
+    else if (opt == INS_OPTS_LSR)
+        printf(" LSR ");
+    else if (opt == INS_OPTS_ASR)
+        printf(" ASR ");
+    else if (opt == INS_OPTS_ROR)
+        printf(" ROR ");
+    else if (opt == INS_OPTS_MSL)
+        printf(" MSL ");
+    else
+        assert(!"Bad value");
+}
+
+/*****************************************************************************
+ *
+ *  Prints the encoding for the Extend Type encoding
+ */
+
+void emitter::emitDispExtendOpts(insOpts opt)
+{
+    if (opt == INS_OPTS_UXTB)
+        printf("UXTB");
+    else if (opt == INS_OPTS_UXTH)
+        printf("UXTH");
+    else if (opt == INS_OPTS_UXTW)
+        printf("UXTW");
+    else if (opt == INS_OPTS_UXTX)
+        printf("UXTX");
+    else if (opt == INS_OPTS_SXTB)
+        printf("SXTB");
+    else if (opt == INS_OPTS_SXTH)
+        printf("SXTH");
+    else if (opt == INS_OPTS_SXTW)
+        printf("SXTW");
+    else if (opt == INS_OPTS_SXTX)
+        printf("SXTX");
+    else
+        assert(!"Bad value");
+}
+
+/*****************************************************************************
+ *
+ *  Prints the encoding for the Extend Type encoding in loads/stores
+ */
+
+void emitter::emitDispLSExtendOpts(insOpts opt)
+{
+    if (opt == INS_OPTS_LSL)
+        printf("LSL");
+    else if (opt == INS_OPTS_UXTW)
+        printf("UXTW");
+    else if (opt == INS_OPTS_UXTX)
+        printf("UXTX");
+    else if (opt == INS_OPTS_SXTW)
+        printf("SXTW");
+    else if (opt == INS_OPTS_SXTX)
+        printf("SXTX");
+    else
+        assert(!"Bad value");
+}
+
+/*****************************************************************************
+ *
+ *  Display a register
+ */
+void emitter::emitDispReg(regNumber reg, emitAttr attr, bool addComma)
+{
+    emitAttr size = EA_SIZE(attr);
+    printf(emitRegName(reg, size));
+
+    if (addComma)
+        printf(", ");
+}
+
+/*****************************************************************************
+ *
+ *  Display a vector register with an arrangement suffix
+ */
+void emitter::emitDispVectorReg(regNumber reg, insOpts opt, bool addComma)
+{
+    assert(isVectorRegister(reg));
+    printf(emitVectorRegName(reg));
+    emitDispArrangement(opt);
+
+    if (addComma)
+        printf(", ");
+}
+
+/*****************************************************************************
+ *
+ *  Display an vector register index suffix
+ */
+void emitter::emitDispVectorRegIndex(regNumber reg, emitAttr elemsize, ssize_t index, bool addComma)
+{
+    assert(isVectorRegister(reg));
+    printf(emitVectorRegName(reg));
+
+    switch (elemsize)
+    {
+        case EA_1BYTE:
+            printf(".b");
+            break;
+        case EA_2BYTE:
+            printf(".h");
+            break;
+        case EA_4BYTE:
+            printf(".s");
+            break;
+        case EA_8BYTE:
+            printf(".d");
+            break;
+        default:
+            assert(!"invalid elemsize");
+            break;
+    }
+
+    printf("[%d]", index);
+
+    if (addComma)
+        printf(", ");
+}
+
+/*****************************************************************************
+ *
+ *  Display an arrangement suffix
+ */
+void emitter::emitDispArrangement(insOpts opt)
+{
+    const char* str = "???";
+
+    switch (opt)
+    {
+        case INS_OPTS_8B:
+            str = "8b";
+            break;
+        case INS_OPTS_16B:
+            str = "16b";
+            break;
+        case INS_OPTS_4H:
+            str = "4h";
+            break;
+        case INS_OPTS_8H:
+            str = "8h";
+            break;
+        case INS_OPTS_2S:
+            str = "2s";
+            break;
+        case INS_OPTS_4S:
+            str = "4s";
+            break;
+        case INS_OPTS_1D:
+            str = "1d";
+            break;
+        case INS_OPTS_2D:
+            str = "2d";
+            break;
+
+        default:
+            assert(!"Invalid insOpt for vector register");
+    }
+    printf(".");
+    printf(str);
+}
+
+/*****************************************************************************
+ *
+ *  Display a register with an optional shift operation
+ */
+void emitter::emitDispShiftedReg(regNumber reg, insOpts opt, ssize_t imm, emitAttr attr)
+{
+    emitAttr size = EA_SIZE(attr);
+    assert((imm & 0x003F) == imm);
+    assert(((imm & 0x0020) == 0) || (size == EA_8BYTE));
+
+    printf(emitRegName(reg, size));
+
+    if (imm > 0)
+    {
+        if (strictArmAsm)
+        {
+            printf(",");
+        }
+        emitDispShiftOpts(opt);
+        emitDispImm(imm, false);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Display a register with an optional extend and scale operations
+ */
+void emitter::emitDispExtendReg(regNumber reg, insOpts opt, ssize_t imm)
+{
+    assert((imm >= 0) && (imm <= 4));
+    assert(insOptsNone(opt) || insOptsAnyExtend(opt) || (opt == INS_OPTS_LSL));
+
+    // size is based on the extend option, not the instr size.
+    emitAttr size = insOpts32BitExtend(opt) ? EA_4BYTE : EA_8BYTE;
+
+    if (strictArmAsm)
+    {
+        if (insOptsNone(opt))
+        {
+            emitDispReg(reg, size, false);
+        }
+        else
+        {
+            emitDispReg(reg, size, true);
+            if (opt == INS_OPTS_LSL)
+                printf("LSL");
+            else
+                emitDispExtendOpts(opt);
+            if ((imm > 0) || (opt == INS_OPTS_LSL))
+            {
+                printf(" ");
+                emitDispImm(imm, false);
+            }
+        }
+    }
+    else // !strictArmAsm
+    {
+        if (insOptsNone(opt))
+        {
+            emitDispReg(reg, size, false);
+        }
+        else
+        {
+            if (opt != INS_OPTS_LSL)
+            {
+                emitDispExtendOpts(opt);
+                printf("(");
+                emitDispReg(reg, size, false);
+                printf(")");
+            }
+        }
+        if (imm > 0)
+        {
+            printf("*");
+            emitDispImm(1 << imm, false);
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Display an addressing operand [reg + imm]
+ */
+void emitter::emitDispAddrRI(regNumber reg, insOpts opt, ssize_t imm)
+{
+    reg = encodingZRtoSP(reg); // ZR (R31) encodes the SP register
+
+    if (strictArmAsm)
+    {
+        printf("[");
+
+        emitDispReg(reg, EA_8BYTE, false);
+
+        if (!insOptsPostIndex(opt) && (imm != 0))
+        {
+            printf(",");
+            emitDispImm(imm, false);
+        }
+        printf("]");
+
+        if (insOptsPreIndex(opt))
+        {
+            printf("!");
+        }
+        else if (insOptsPostIndex(opt))
+        {
+            printf(",");
+            emitDispImm(imm, false);
+        }
+    }
+    else // !strictArmAsm
+    {
+        printf("[");
+
+        const char* operStr = "++";
+        if (imm < 0)
+        {
+            operStr = "--";
+            imm     = -imm;
+        }
+
+        if (insOptsPreIndex(opt))
+        {
+            printf(operStr);
+        }
+
+        emitDispReg(reg, EA_8BYTE, false);
+
+        if (insOptsPostIndex(opt))
+        {
+            printf(operStr);
+        }
+
+        if (insOptsIndexed(opt))
+        {
+            printf(", ");
+        }
+        else
+        {
+            printf("%c", operStr[1]);
+        }
+        emitDispImm(imm, false);
+        printf("]");
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Display an addressing operand [reg + extended reg]
+ */
+void emitter::emitDispAddrRRExt(regNumber reg1, regNumber reg2, insOpts opt, bool isScaled, emitAttr size)
+{
+    reg1 = encodingZRtoSP(reg1); // ZR (R31) encodes the SP register
+
+    unsigned scale = 0;
+    if (isScaled)
+    {
+        scale = NaturalScale_helper(size);
+    }
+
+    printf("[");
+
+    if (strictArmAsm)
+    {
+        emitDispReg(reg1, EA_8BYTE, true);
+        emitDispExtendReg(reg2, opt, scale);
+    }
+    else // !strictArmAsm
+    {
+        emitDispReg(reg1, EA_8BYTE, false);
+        printf("+");
+        emitDispExtendReg(reg2, opt, scale);
+    }
+
+    printf("]");
+}
+
+/*****************************************************************************
+ *
+ *  Display (optionally) the instruction encoding in hex
+ */
+
+void emitter::emitDispInsHex(BYTE* code, size_t sz)
+{
+    // We do not display the instruction hex if we want diff-able disassembly
+    if (!emitComp->opts.disDiffable)
+    {
+        if (sz == 4)
+        {
+            printf("  %08X    ", (*((code_t*)code)));
+        }
+        else
+        {
+            printf("              ");
+        }
+    }
+}
+
+/****************************************************************************
+ *
+ *  Display the given instruction.
+ */
+
+void emitter::emitDispIns(
+    instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* pCode, size_t sz, insGroup* ig)
+{
+    if (EMITVERBOSE)
+    {
+        unsigned idNum =
+            id->idDebugOnlyInfo()->idNum; // Do not remove this!  It is needed for VisualStudio conditional breakpoints
+
+        printf("IN%04x: ", idNum);
+    }
+
+    if (pCode == NULL)
+        sz = 0;
+
+    if (!emitComp->opts.dspEmit && !isNew && !asmfm && sz)
+        doffs = true;
+
+    /* Display the instruction offset */
+
+    emitDispInsOffs(offset, doffs);
+
+    /* Display the instruction hex code */
+
+    emitDispInsHex(pCode, sz);
+
+    printf("      ");
+
+    /* Get the instruction and format */
+
+    instruction ins = id->idIns();
+    insFormat   fmt = id->idInsFmt();
+
+    emitDispInst(ins);
+
+    /* If this instruction has just been added, check its size */
+
+    assert(isNew == false || (int)emitSizeOfInsDsc(id) == emitCurIGfreeNext - (BYTE*)id);
+
+    /* Figure out the operand size */
+    emitAttr size = id->idOpSize();
+    emitAttr attr = size;
+    if (id->idGCref() == GCT_GCREF)
+        attr = EA_GCREF;
+    else if (id->idGCref() == GCT_BYREF)
+        attr = EA_BYREF;
+
+    switch (fmt)
+    {
+        code_t       code;
+        ssize_t      imm;
+        int          doffs;
+        bool         isExtendAlias;
+        bool         canEncode;
+        bitMaskImm   bmi;
+        halfwordImm  hwi;
+        condFlagsImm cfi;
+        unsigned     scale;
+        unsigned     immShift;
+        bool         hasShift;
+        ssize_t      offs;
+        const char*  methodName;
+        emitAttr     elemsize;
+        emitAttr     datasize;
+        emitAttr     srcsize;
+        emitAttr     dstsize;
+        ssize_t      index;
+        ssize_t      index2;
+
+        case IF_BI_0A: // BI_0A   ......iiiiiiiiii iiiiiiiiiiiiiiii               simm26:00
+        case IF_BI_0B: // BI_0B   ......iiiiiiiiii iiiiiiiiiii.....               simm19:00
+        case IF_LARGEJMP:
+        {
+            if (fmt == IF_LARGEJMP)
+            {
+                printf("(LARGEJMP)");
+            }
+            if (id->idAddr()->iiaHasInstrCount())
+            {
+                int instrCount = id->idAddr()->iiaGetInstrCount();
+
+                if (ig == nullptr)
+                {
+                    printf("pc%s%d instructions", (instrCount >= 0) ? "+" : "", instrCount);
+                }
+                else
+                {
+                    unsigned       insNum  = emitFindInsNum(ig, id);
+                    UNATIVE_OFFSET srcOffs = ig->igOffs + emitFindOffset(ig, insNum + 1);
+                    UNATIVE_OFFSET dstOffs = ig->igOffs + emitFindOffset(ig, insNum + 1 + instrCount);
+                    ssize_t        relOffs = (ssize_t)(emitOffsetToPtr(dstOffs) - emitOffsetToPtr(srcOffs));
+                    printf("pc%s%d (%d instructions)", (relOffs >= 0) ? "+" : "", relOffs, instrCount);
+                }
+            }
+            else if (id->idIsBound())
+            {
+                printf("G_M%03u_IG%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaIGlabel->igNum);
+            }
+            else
+            {
+                printf("L_M%03u_BB%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaBBlabel->bbNum);
+            }
+        }
+        break;
+
+        case IF_BI_0C: // BI_0C   ......iiiiiiiiii iiiiiiiiiiiiiiii               simm26:00
+            if (id->idIsCallAddr())
+            {
+                offs       = (ssize_t)id->idAddr()->iiaAddr;
+                methodName = "";
+            }
+            else
+            {
+                offs       = 0;
+                methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
+            }
+
+            if (offs)
+            {
+                if (id->idIsDspReloc())
+                    printf("reloc ");
+                printf("%08X", offs);
+            }
+            else
+            {
+                printf("%s", methodName);
+            }
+            break;
+
+        case IF_BI_1A: // BI_1A   ......iiiiiiiiii iiiiiiiiiiittttt      Rt       simm19:00
+            assert(insOptsNone(id->idInsOpt()));
+            emitDispReg(id->idReg1(), size, true);
+            if (id->idIsBound())
+            {
+                printf("G_M%03u_IG%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaIGlabel->igNum);
+            }
+            else
+            {
+                printf("L_M%03u_BB%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaBBlabel->bbNum);
+            }
+            break;
+
+        case IF_BI_1B: // BI_1B   B.......bbbbbiii iiiiiiiiiiittttt      Rt imm6, simm14:00
+            assert(insOptsNone(id->idInsOpt()));
+            emitDispReg(id->idReg1(), size, true);
+            emitDispImm(emitGetInsSC(id), true);
+            if (id->idIsBound())
+            {
+                printf("G_M%03u_IG%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaIGlabel->igNum);
+            }
+            else
+            {
+                printf("L_M%03u_BB%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaBBlabel->bbNum);
+            }
+            break;
+
+        case IF_BR_1A: // BR_1A   ................ ......nnnnn.....         Rn
+            assert(insOptsNone(id->idInsOpt()));
+            emitDispReg(id->idReg1(), size, false);
+            break;
+
+        case IF_BR_1B: // BR_1B   ................ ......nnnnn.....         Rn
+            assert(insOptsNone(id->idInsOpt()));
+            emitDispReg(id->idReg3(), size, false);
+            break;
+
+        case IF_LS_1A: // LS_1A   XX...V..iiiiiiii iiiiiiiiiiittttt      Rt    PC imm(1MB)
+        case IF_DI_1E: // DI_1E   .ii.....iiiiiiii iiiiiiiiiiiddddd      Rd       simm21
+        case IF_LARGELDC:
+        case IF_LARGEADR:
+            assert(insOptsNone(id->idInsOpt()));
+            emitDispReg(id->idReg1(), size, true);
+            imm = emitGetInsSC(id);
+
+            /* Is this actually a reference to a data section? */
+            if (fmt == IF_LARGEADR)
+            {
+                printf("(LARGEADR)");
+            }
+            else if (fmt == IF_LARGELDC)
+            {
+                printf("(LARGELDC)");
+            }
+
+            printf("[");
+            if (id->idAddr()->iiaIsJitDataOffset())
+            {
+                doffs = Compiler::eeGetJitDataOffs(id->idAddr()->iiaFieldHnd);
+                /* Display a data section reference */
+
+                if (doffs & 1)
+                    printf("@CNS%02u", doffs - 1);
+                else
+                    printf("@RWD%02u", doffs);
+
+                if (imm != 0)
+                    printf("%+Id", imm);
+            }
+            else
+            {
+                assert(imm == 0);
+                if (id->idIsReloc())
+                {
+                    printf("RELOC ");
+                    emitDispImm((ssize_t)id->idAddr()->iiaAddr, false);
+                }
+                else if (id->idIsBound())
+                {
+                    printf("G_M%03u_IG%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaIGlabel->igNum);
+                }
+                else
+                {
+                    printf("L_M%03u_BB%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaBBlabel->bbNum);
+                }
+            }
+            printf("]");
+            break;
+
+        case IF_LS_2A: // LS_2A   .X.......X...... ......nnnnnttttt      Rt Rn
+            assert(insOptsNone(id->idInsOpt()));
+            assert(emitGetInsSC(id) == 0);
+            emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true);
+            emitDispAddrRI(id->idReg2(), id->idInsOpt(), 0);
+            break;
+
+        case IF_LS_2B: // LS_2B   .X.......Xiiiiii iiiiiinnnnnttttt      Rt Rn    imm(0-4095)
+            assert(insOptsNone(id->idInsOpt()));
+            imm   = emitGetInsSC(id);
+            scale = NaturalScale_helper(emitInsLoadStoreSize(id));
+            imm <<= scale; // The immediate is scaled by the size of the ld/st
+            emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true);
+            emitDispAddrRI(id->idReg2(), id->idInsOpt(), imm);
+            break;
+
+        case IF_LS_2C: // LS_2C   .X.......X.iiiii iiiiPPnnnnnttttt      Rt Rn    imm(-256..+255) no/pre/post inc
+            assert(insOptsNone(id->idInsOpt()) || insOptsIndexed(id->idInsOpt()));
+            imm = emitGetInsSC(id);
+            emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true);
+            emitDispAddrRI(id->idReg2(), id->idInsOpt(), imm);
+            break;
+
+        case IF_LS_3A: // LS_3A   .X.......X.mmmmm oooS..nnnnnttttt      Rt Rn Rm ext(Rm) LSL {}
+            assert(insOptsLSExtend(id->idInsOpt()));
+            emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true);
+            if (id->idIsLclVar())
+            {
+                emitDispAddrRRExt(id->idReg2(), codeGen->rsGetRsvdReg(), id->idInsOpt(), false, size);
+            }
+            else
+            {
+                emitDispAddrRRExt(id->idReg2(), id->idReg3(), id->idInsOpt(), id->idReg3Scaled(), size);
+            }
+            break;
+
+        case IF_LS_3B: // LS_3B   X............... .aaaaannnnnddddd      Rt Ra Rn
+            assert(insOptsNone(id->idInsOpt()));
+            assert(emitGetInsSC(id) == 0);
+            emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true);
+            emitDispReg(id->idReg2(), emitInsTargetRegSize(id), true);
+            emitDispAddrRI(id->idReg3(), id->idInsOpt(), 0);
+            break;
+
+        case IF_LS_3C: // LS_3C   X.........iiiiii iaaaaannnnnddddd      Rt Ra Rn imm(im7,sh)
+            assert(insOptsNone(id->idInsOpt()) || insOptsIndexed(id->idInsOpt()));
+            imm   = emitGetInsSC(id);
+            scale = NaturalScale_helper(emitInsLoadStoreSize(id));
+            imm <<= scale;
+            emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true);
+            emitDispReg(id->idReg2(), emitInsTargetRegSize(id), true);
+            emitDispAddrRI(id->idReg3(), id->idInsOpt(), imm);
+            break;
+
+        case IF_DI_1A: // DI_1A   X.......shiiiiii iiiiiinnnnn.....      Rn       imm(i12,sh)
+            emitDispReg(id->idReg1(), size, true);
+            emitDispImmOptsLSL12(emitGetInsSC(id), id->idInsOpt());
+            break;
+
+        case IF_DI_1B: // DI_1B   X........hwiiiii iiiiiiiiiiiddddd      Rd       imm(i16,hw)
+            emitDispReg(id->idReg1(), size, true);
+            hwi.immHWVal = (unsigned)emitGetInsSC(id);
+            if (ins == INS_mov)
+            {
+                emitDispImm(emitDecodeHalfwordImm(hwi, size), false);
+            }
+            else // movz, movn, movk
+            {
+                emitDispImm(hwi.immVal, false);
+                if (hwi.immHW != 0)
+                {
+                    emitDispShiftOpts(INS_OPTS_LSL);
+                    emitDispImm(hwi.immHW * 16, false);
+                }
+            }
+            break;
+
+        case IF_DI_1C: // DI_1C   X........Nrrrrrr ssssssnnnnn.....         Rn    imm(N,r,s)
+            emitDispReg(id->idReg1(), size, true);
+            bmi.immNRS = (unsigned)emitGetInsSC(id);
+            emitDispImm(emitDecodeBitMaskImm(bmi, size), false);
+            break;
+
+        case IF_DI_1D: // DI_1D   X........Nrrrrrr ssssss.....ddddd      Rd       imm(N,r,s)
+            emitDispReg(encodingZRtoSP(id->idReg1()), size, true);
+            bmi.immNRS = (unsigned)emitGetInsSC(id);
+            emitDispImm(emitDecodeBitMaskImm(bmi, size), false);
+            break;
+
+        case IF_DI_2A: // DI_2A   X.......shiiiiii iiiiiinnnnnddddd      Rd Rn    imm(i12,sh)
+            if ((ins == INS_add) || (ins == INS_sub))
+            {
+                emitDispReg(encodingZRtoSP(id->idReg1()), size, true);
+                emitDispReg(encodingZRtoSP(id->idReg2()), size, true);
+            }
+            else
+            {
+                emitDispReg(id->idReg1(), size, true);
+                emitDispReg(id->idReg2(), size, true);
+            }
+            emitDispImmOptsLSL12(emitGetInsSC(id), id->idInsOpt());
+            break;
+
+        case IF_DI_2B: // DI_2B   X........X.nnnnn ssssssnnnnnddddd      Rd Rn    imm(0-63)
+            emitDispReg(id->idReg1(), size, true);
+            emitDispReg(id->idReg2(), size, true);
+            emitDispImm(emitGetInsSC(id), false);
+            break;
+
+        case IF_DI_2C: // DI_2C   X........Nrrrrrr ssssssnnnnnddddd      Rd Rn    imm(N,r,s)
+            if (ins == INS_ands)
+            {
+                emitDispReg(id->idReg1(), size, true);
+            }
+            else
+            {
+                emitDispReg(encodingZRtoSP(id->idReg1()), size, true);
+            }
+            emitDispReg(id->idReg2(), size, true);
+            bmi.immNRS = (unsigned)emitGetInsSC(id);
+            emitDispImm(emitDecodeBitMaskImm(bmi, size), false);
+            break;
+
+        case IF_DI_2D: // DI_2D   X........Nrrrrrr ssssssnnnnnddddd      Rd Rn    imr, ims   (N,r,s)
+            emitDispReg(id->idReg1(), size, true);
+            emitDispReg(id->idReg2(), size, true);
+
+            imm        = emitGetInsSC(id);
+            bmi.immNRS = (unsigned)imm;
+
+            switch (ins)
+            {
+                case INS_bfm:
+                case INS_sbfm:
+                case INS_ubfm:
+                    emitDispImm(bmi.immR, true);
+                    emitDispImm(bmi.immS, false);
+                    break;
+
+                case INS_bfi:
+                case INS_sbfiz:
+                case INS_ubfiz:
+                    emitDispImm(getBitWidth(size) - bmi.immR, true);
+                    emitDispImm(bmi.immS + 1, false);
+                    break;
+
+                case INS_bfxil:
+                case INS_sbfx:
+                case INS_ubfx:
+                    emitDispImm(bmi.immR, true);
+                    emitDispImm(bmi.immS - bmi.immR + 1, false);
+                    break;
+
+                case INS_asr:
+                case INS_lsr:
+                case INS_lsl:
+                    emitDispImm(imm, false);
+                    break;
+
+                default:
+                    assert(!"Unexpected instruction in IF_DI_2D");
+            }
+
+            break;
+
+        case IF_DI_1F: // DI_1F   X..........iiiii cccc..nnnnn.nzcv      Rn imm5  nzcv cond
+            emitDispReg(id->idReg1(), size, true);
+            cfi.immCFVal = (unsigned)emitGetInsSC(id);
+            emitDispImm(cfi.imm5, true);
+            emitDispFlags(cfi.flags);
+            printf(",");
+            emitDispCond(cfi.cond);
+            break;
+
+        case IF_DR_1D: // DR_1D   X............... cccc.......mmmmm      Rd       cond
+            emitDispReg(id->idReg1(), size, true);
+            cfi.immCFVal = (unsigned)emitGetInsSC(id);
+            emitDispCond(cfi.cond);
+            break;
+
+        case IF_DR_2A: // DR_2A   X..........mmmmm ......nnnnn.....         Rn Rm
+            emitDispReg(id->idReg1(), size, true);
+            emitDispReg(id->idReg2(), size, false);
+            break;
+
+        case IF_DR_2B: // DR_2B   X.......sh.mmmmm ssssssnnnnn.....         Rn Rm {LSL,LSR,ASR,ROR} imm(0-63)
+            emitDispReg(id->idReg1(), size, true);
+            emitDispShiftedReg(id->idReg2(), id->idInsOpt(), emitGetInsSC(id), size);
+            break;
+
+        case IF_DR_2C: // DR_2C   X..........mmmmm ooosssnnnnn.....         Rn Rm ext(Rm) LSL imm(0-4)
+            emitDispReg(encodingZRtoSP(id->idReg1()), size, true);
+            imm = emitGetInsSC(id);
+            emitDispExtendReg(id->idReg2(), id->idInsOpt(), imm);
+            break;
+
+        case IF_DR_2D: // DR_2D   X..........nnnnn cccc..nnnnnddddd      Rd Rn    cond
+            emitDispReg(id->idReg1(), size, true);
+            emitDispReg(id->idReg2(), size, true);
+            cfi.immCFVal = (unsigned)emitGetInsSC(id);
+            emitDispCond(cfi.cond);
+            break;
+
+        case IF_DR_2E: // DR_2E   X..........mmmmm ...........ddddd      Rd    Rm
+            emitDispReg(id->idReg1(), size, true);
+            emitDispReg(id->idReg2(), size, false);
+            break;
+
+        case IF_DR_2F: // DR_2F   X.......sh.mmmmm ssssss.....ddddd      Rd    Rm {LSL,LSR,ASR} imm(0-63)
+            emitDispReg(id->idReg1(), size, true);
+            emitDispShiftedReg(id->idReg2(), id->idInsOpt(), emitGetInsSC(id), size);
+            break;
+
+        case IF_DR_2G: // DR_2G   X............... ......nnnnnddddd      Rd Rn
+            emitDispReg(encodingZRtoSP(id->idReg1()), size, true);
+            emitDispReg(encodingZRtoSP(id->idReg2()), size, false);
+            break;
+
+        case IF_DR_2H: // DR_2H   X........X...... ......nnnnnddddd      Rd Rn
+            emitDispReg(id->idReg1(), size, true);
+            emitDispReg(id->idReg2(), size, false);
+            break;
+
+        case IF_DR_2I: // DR_2I   X..........mmmmm cccc..nnnnn.nzcv      Rn Rm    nzcv cond
+            emitDispReg(id->idReg1(), size, true);
+            emitDispReg(id->idReg2(), size, true);
+            cfi.immCFVal = (unsigned)emitGetInsSC(id);
+            emitDispFlags(cfi.flags);
+            printf(",");
+            emitDispCond(cfi.cond);
+            break;
+
+        case IF_DR_3A: // DR_3A   X..........mmmmm ......nnnnnmmmmm      Rd Rn Rm
+            if ((ins == INS_add) || (ins == INS_sub))
+            {
+                emitDispReg(encodingZRtoSP(id->idReg1()), size, true);
+                emitDispReg(encodingZRtoSP(id->idReg2()), size, true);
+            }
+            else
+            {
+                emitDispReg(id->idReg1(), size, true);
+                emitDispReg(id->idReg2(), size, true);
+            }
+            if (id->idIsLclVar())
+            {
+                emitDispReg(codeGen->rsGetRsvdReg(), size, false);
+            }
+            else
+            {
+                emitDispReg(id->idReg3(), size, false);
+            }
+
+            break;
+
+        case IF_DR_3B: // DR_3B   X.......sh.mmmmm ssssssnnnnnddddd      Rd Rn Rm {LSL,LSR,ASR} imm(0-63)
+            emitDispReg(id->idReg1(), size, true);
+            emitDispReg(id->idReg2(), size, true);
+            emitDispShiftedReg(id->idReg3(), id->idInsOpt(), emitGetInsSC(id), size);
+            break;
+
+        case IF_DR_3C: // DR_3C   X..........mmmmm ooosssnnnnnddddd      Rd Rn Rm ext(Rm) LSL imm(0-4)
+            emitDispReg(encodingZRtoSP(id->idReg1()), size, true);
+            emitDispReg(encodingZRtoSP(id->idReg2()), size, true);
+            imm = emitGetInsSC(id);
+            emitDispExtendReg(id->idReg3(), id->idInsOpt(), imm);
+            break;
+
+        case IF_DR_3D: // DR_3D   X..........mmmmm cccc..nnnnnmmmmm      Rd Rn Rm cond
+            emitDispReg(id->idReg1(), size, true);
+            emitDispReg(id->idReg2(), size, true);
+            emitDispReg(id->idReg3(), size, true);
+            cfi.immCFVal = (unsigned)emitGetInsSC(id);
+            emitDispCond(cfi.cond);
+            break;
+
+        case IF_DR_3E: // DR_3E   X........X.mmmmm ssssssnnnnnddddd      Rd Rn Rm imm(0-63)
+            emitDispReg(id->idReg1(), size, true);
+            emitDispReg(id->idReg2(), size, true);
+            emitDispReg(id->idReg3(), size, true);
+            emitDispImm(emitGetInsSC(id), false);
+            break;
+
+        case IF_DR_4A: // DR_4A   X..........mmmmm .aaaaannnnnmmmmm      Rd Rn Rm Ra
+            emitDispReg(id->idReg1(), size, true);
+            emitDispReg(id->idReg2(), size, true);
+            emitDispReg(id->idReg3(), size, true);
+            emitDispReg(id->idReg4(), size, false);
+            break;
+
+        case IF_DV_1A: // DV_1A   .........X.iiiii iii........ddddd      Vd imm8 (fmov - immediate scalar)
+            elemsize = id->idOpSize();
+            emitDispReg(id->idReg1(), elemsize, true);
+            emitDispFloatImm(emitGetInsSC(id));
+            break;
+
+        case IF_DV_1B: // DV_1B   .QX..........iii cmod..iiiiiddddd      Vd imm8 (immediate vector)
+            imm      = emitGetInsSC(id) & 0x0ff;
+            immShift = (emitGetInsSC(id) & 0x700) >> 8;
+            hasShift = (immShift != 0);
+            elemsize = optGetElemsize(id->idInsOpt());
+            if (id->idInsOpt() == INS_OPTS_1D)
+            {
+                assert(elemsize == size);
+                emitDispReg(id->idReg1(), size, true);
+            }
+            else
+            {
+                emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+            }
+            if (ins == INS_fmov)
+            {
+                emitDispFloatImm(imm);
+                assert(hasShift == false);
+            }
+            else
+            {
+                if (elemsize == EA_8BYTE)
+                {
+                    assert(ins == INS_movi);
+                    ssize_t       imm64 = 0;
+                    const ssize_t mask8 = 0xFF;
+                    for (unsigned b = 0; b < 8; b++)
+                    {
+                        if (imm & (1 << b))
+                        {
+                            imm64 |= (mask8 << (b * 8));
+                        }
+                    }
+                    emitDispImm(imm64, hasShift, true);
+                }
+                else
+                {
+                    emitDispImm(imm, hasShift, true);
+                }
+                if (hasShift)
+                {
+                    insOpts  opt   = (immShift & 0x4) ? INS_OPTS_MSL : INS_OPTS_LSL;
+                    unsigned shift = (immShift & 0x3) * 8;
+                    emitDispShiftOpts(opt);
+                    emitDispImm(shift, false);
+                }
+            }
+            break;
+
+        case IF_DV_1C: // DV_1C   .........X...... ......nnnnn.....      Vn #0.0 (fcmp - with zero)
+            elemsize = id->idOpSize();
+            emitDispReg(id->idReg1(), elemsize, true);
+            emitDispFloatZero();
+            break;
+
+        case IF_DV_2A: // DV_2A   .Q.......X...... ......nnnnnddddd      Vd Vn   (fabs, fcvt - vector)
+        case IF_DV_2M: // DV_2M   .Q......XX...... ......nnnnnddddd      Vd Vn   (abs, neg   - vector)
+            emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+            emitDispVectorReg(id->idReg2(), id->idInsOpt(), false);
+            break;
+
+        case IF_DV_2N: // DV_2N   .........iiiiiii ......nnnnnddddd      Vd Vn imm   (shift - scalar)
+            elemsize = id->idOpSize();
+            emitDispReg(id->idReg1(), elemsize, true);
+            emitDispReg(id->idReg2(), elemsize, true);
+            emitDispImm(emitGetInsSC(id), false);
+            break;
+
+        case IF_DV_2O: // DV_2O   .Q.......iiiiiii ......nnnnnddddd      Vd Vn imm   (shift - vector)
+            imm = emitGetInsSC(id);
+            // Do we have a sxtl or uxtl instruction?
+            isExtendAlias = ((ins == INS_sxtl) || (ins == INS_sxtl2) || (ins == INS_uxtl) || (ins == INS_uxtl2));
+            code          = emitInsCode(ins, fmt);
+            if (code & 0x00008000) // widen/narrow opcodes
+            {
+                if (code & 0x00002000) // SHL opcodes
+                {
+                    emitDispVectorReg(id->idReg1(), optWidenElemsize(id->idInsOpt()), true);
+                    emitDispVectorReg(id->idReg2(), id->idInsOpt(), !isExtendAlias);
+                }
+                else // SHR opcodes
+                {
+                    emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+                    emitDispVectorReg(id->idReg2(), optWidenElemsize(id->idInsOpt()), !isExtendAlias);
+                }
+            }
+            else
+            {
+                emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+                emitDispVectorReg(id->idReg2(), id->idInsOpt(), !isExtendAlias);
+            }
+            // Print the immediate unless we have a sxtl or uxtl instruction
+            if (!isExtendAlias)
+            {
+                emitDispImm(imm, false);
+            }
+            break;
+
+        case IF_DV_2B: // DV_2B   .Q.........iiiii ......nnnnnddddd      Rd Vn[] (umov/smov    - to general)
+            srcsize = id->idOpSize();
+            index   = emitGetInsSC(id);
+            if (ins == INS_smov)
+            {
+                dstsize = EA_8BYTE;
+            }
+            else // INS_umov or INS_mov
+            {
+                dstsize = (srcsize == EA_8BYTE) ? EA_8BYTE : EA_4BYTE;
+            }
+            emitDispReg(id->idReg1(), dstsize, true);
+            emitDispVectorRegIndex(id->idReg2(), srcsize, index, false);
+            break;
+
+        case IF_DV_2C: // DV_2C   .Q.........iiiii ......nnnnnddddd      Vd Rn   (dup/ins - vector from general)
+            if (ins == INS_dup)
+            {
+                datasize = id->idOpSize();
+                assert(isValidVectorDatasize(datasize));
+                assert(isValidArrangement(datasize, id->idInsOpt()));
+                elemsize = optGetElemsize(id->idInsOpt());
+                emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+            }
+            else // INS_ins
+            {
+                elemsize = id->idOpSize();
+                index    = emitGetInsSC(id);
+                assert(isValidVectorElemsize(elemsize));
+                emitDispVectorRegIndex(id->idReg1(), elemsize, index, true);
+            }
+            emitDispReg(id->idReg2(), (elemsize == EA_8BYTE) ? EA_8BYTE : EA_4BYTE, false);
+            break;
+
+        case IF_DV_2D: // DV_2D   .Q.........iiiii ......nnnnnddddd      Vd Vn[]   (dup - vector)
+            datasize = id->idOpSize();
+            assert(isValidVectorDatasize(datasize));
+            assert(isValidArrangement(datasize, id->idInsOpt()));
+            elemsize = optGetElemsize(id->idInsOpt());
+            index    = emitGetInsSC(id);
+            emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+            emitDispVectorRegIndex(id->idReg2(), elemsize, index, false);
+            break;
+
+        case IF_DV_2E: // DV_2E   ...........iiiii ......nnnnnddddd      Vd Vn[]   (dup - scalar)
+            elemsize = id->idOpSize();
+            index    = emitGetInsSC(id);
+            emitDispReg(id->idReg1(), elemsize, true);
+            emitDispVectorRegIndex(id->idReg2(), elemsize, index, false);
+            break;
+
+        case IF_DV_2F: // DV_2F   ...........iiiii .jjjj.nnnnnddddd      Vd[] Vn[] (ins - element)
+            imm      = emitGetInsSC(id);
+            index    = (imm >> 4) & 0xf;
+            index2   = imm & 0xf;
+            elemsize = id->idOpSize();
+            emitDispVectorRegIndex(id->idReg1(), elemsize, index, true);
+            emitDispVectorRegIndex(id->idReg2(), elemsize, index2, false);
+            break;
+
+        case IF_DV_2G: // DV_2G   .........X...... ......nnnnnddddd      Vd Vn      (fmov, fcvtXX - register)
+        case IF_DV_2K: // DV_2K   .........X.mmmmm ......nnnnn.....      Vn Vm      (fcmp)
+        case IF_DV_2L: // DV_2L   ........XX...... ......nnnnnddddd      Vd Vn      (abs, neg - scalar)
+            elemsize = id->idOpSize();
+            emitDispReg(id->idReg1(), elemsize, true);
+            emitDispReg(id->idReg2(), elemsize, false);
+            break;
+
+        case IF_DV_2H: // DV_2H   X........X...... ......nnnnnddddd      Rd Vn      (fmov, fcvtXX - to general)
+        case IF_DV_2I: // DV_2I   X........X...... ......nnnnnddddd      Vd Rn      (fmov, Xcvtf - from general)
+        case IF_DV_2J: // DV_2J   ........SS.....D D.....nnnnnddddd      Vd Vn      (fcvt)
+            dstsize = optGetDstsize(id->idInsOpt());
+            srcsize = optGetSrcsize(id->idInsOpt());
+
+            emitDispReg(id->idReg1(), dstsize, true);
+            emitDispReg(id->idReg2(), srcsize, false);
+            break;
+
+        case IF_DV_3A: // DV_3A   .Q......XX.mmmmm ......nnnnnddddd      Vd Vn Vm  (vector)
+        case IF_DV_3B: // DV_3B   .Q.........mmmmm ......nnnnnddddd      Vd Vn Vm  (vector)
+            emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+            emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
+            emitDispVectorReg(id->idReg3(), id->idInsOpt(), false);
+            break;
+
+        case IF_DV_3C: // DV_3C   .Q.........mmmmm ......nnnnnddddd      Vd Vn Vm  (vector)
+            emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+            if (ins != INS_mov)
+            {
+                emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
+            }
+            emitDispVectorReg(id->idReg3(), id->idInsOpt(), false);
+            break;
+
+        case IF_DV_3AI: // DV_3AI  .Q......XXLMmmmm ....H.nnnnnddddd      Vd Vn Vm[] (vector by elem)
+        case IF_DV_3BI: // DV_3BI  .Q........Lmmmmm ....H.nnnnnddddd      Vd Vn Vm[] (vector by elem)
+            emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
+            emitDispVectorReg(id->idReg2(), id->idInsOpt(), true);
+            elemsize = optGetElemsize(id->idInsOpt());
+            emitDispVectorRegIndex(id->idReg3(), elemsize, emitGetInsSC(id), false);
+            break;
+
+        case IF_DV_3D: // DV_3D   .........X.mmmmm ......nnnnnddddd      Vd Vn Vm  (scalar)
+        case IF_DV_3E: // DV_3E   ...........mmmmm ......nnnnnddddd      Vd Vn Vm  (scalar)
+            emitDispReg(id->idReg1(), size, true);
+            emitDispReg(id->idReg2(), size, true);
+            emitDispReg(id->idReg3(), size, false);
+            break;
+
+        case IF_DV_3DI: // DV_3DI  .........XLmmmmm ....H.nnnnnddddd      Vd Vn Vm[] (scalar by elem)
+            emitDispReg(id->idReg1(), size, true);
+            emitDispReg(id->idReg2(), size, true);
+            elemsize = size;
+            emitDispVectorRegIndex(id->idReg3(), elemsize, emitGetInsSC(id), false);
+            break;
+
+        case IF_DV_4A: // DV_4A   .........X.mmmmm .aaaaannnnnddddd      Vd Va Vn Vm (scalar)
+            emitDispReg(id->idReg1(), size, true);
+            emitDispReg(id->idReg2(), size, true);
+            emitDispReg(id->idReg3(), size, true);
+            emitDispReg(id->idReg4(), size, false);
+            break;
+
+        case IF_SN_0A: // SN_0A   ................ ................
+            break;
+
+        case IF_SI_0A: // SI_0A   ...........iiiii iiiiiiiiiii.....               imm16
+            emitDispImm(emitGetInsSC(id), false);
+            break;
+
+        case IF_SI_0B: // SI_0B   ................ ....bbbb........               imm4 - barrier
+            emitDispBarrier((insBarrier)emitGetInsSC(id));
+            break;
+
+        default:
+            printf("unexpected format %s", emitIfName(id->idInsFmt()));
+            assert(!"unexpectedFormat");
+            break;
+    }
+
+    if (id->idDebugOnlyInfo()->idVarRefOffs)
+    {
+        printf("\t// ");
+        emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
+                         id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
+    }
+
+    printf("\n");
+}
+
+/*****************************************************************************
+ *
+ *  Display a stack frame reference.
+ */
+
+void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm)
+{
+    printf("[");
+
+    if (varx < 0)
+        printf("TEMP_%02u", -varx);
+    else
+        emitComp->gtDispLclVar(+varx, false);
+
+    if (disp < 0)
+        printf("-0x%02x", -disp);
+    else if (disp > 0)
+        printf("+0x%02x", +disp);
+
+    printf("]");
+
+    if (varx >= 0 && emitComp->opts.varNames)
+    {
+        LclVarDsc*  varDsc;
+        const char* varName;
+
+        assert((unsigned)varx < emitComp->lvaCount);
+        varDsc  = emitComp->lvaTable + varx;
+        varName = emitComp->compLocalVarName(varx, offs);
+
+        if (varName)
+        {
+            printf("'%s", varName);
+
+            if (disp < 0)
+                printf("-%d", -disp);
+            else if (disp > 0)
+                printf("+%d", +disp);
+
+            printf("'");
+        }
+    }
+}
+
+#endif // DEBUG
+
+// Generate code for a load or store operation with a potentially complex addressing mode
+// This method handles the case of a GT_IND with contained GT_LEA op1 of the x86 form [base + index*sccale + offset]
+// Since Arm64 does not directly support this complex of an addressing mode
+// we may generates up to three instructions for this for Arm64
+//
+void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTreeIndir* indir)
+{
+    emitAttr ldstAttr = isVectorRegister(dataReg) ? attr : emitInsAdjustLoadStoreAttr(ins, attr);
+
+    GenTree* addr = indir->Addr();
+
+    if (addr->isContained())
+    {
+        assert(addr->OperGet() == GT_LCL_VAR_ADDR || addr->OperGet() == GT_LEA);
+
+        int   offset = 0;
+        DWORD lsl    = 0;
+
+        if (addr->OperGet() == GT_LEA)
+        {
+            offset = (int)addr->AsAddrMode()->gtOffset;
+            if (addr->AsAddrMode()->gtScale > 0)
+            {
+                assert(isPow2(addr->AsAddrMode()->gtScale));
+                BitScanForward(&lsl, addr->AsAddrMode()->gtScale);
+            }
+        }
+
+        GenTree* memBase = indir->Base();
+
+        if (indir->HasIndex())
+        {
+            GenTree* index = indir->Index();
+
+            if (offset != 0)
+            {
+                regMaskTP tmpRegMask = indir->gtRsvdRegs;
+                regNumber tmpReg     = genRegNumFromMask(tmpRegMask);
+                noway_assert(tmpReg != REG_NA);
+
+                if (emitIns_valid_imm_for_add(offset, EA_8BYTE))
+                {
+                    if (lsl > 0)
+                    {
+                        // Generate code to set tmpReg = base + index*scale
+                        emitIns_R_R_R_I(INS_add, EA_PTRSIZE, tmpReg, memBase->gtRegNum, index->gtRegNum, lsl,
+                                        INS_OPTS_LSL);
+                    }
+                    else // no scale
+                    {
+                        // Generate code to set tmpReg = base + index
+                        emitIns_R_R_R(INS_add, EA_PTRSIZE, tmpReg, memBase->gtRegNum, index->gtRegNum);
+                    }
+
+                    noway_assert(emitInsIsLoad(ins) || (tmpReg != dataReg));
+
+                    // Then load/store dataReg from/to [tmpReg + offset]
+                    emitIns_R_R_I(ins, ldstAttr, dataReg, tmpReg, offset);
+                    ;
+                }
+                else // large offset
+                {
+                    // First load/store tmpReg with the large offset constant
+                    codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset);
+                    // Then add the base register
+                    //      rd = rd + base
+                    emitIns_R_R_R(INS_add, EA_PTRSIZE, tmpReg, tmpReg, memBase->gtRegNum);
+
+                    noway_assert(emitInsIsLoad(ins) || (tmpReg != dataReg));
+                    noway_assert(tmpReg != index->gtRegNum);
+
+                    // Then load/store dataReg from/to [tmpReg + index*scale]
+                    emitIns_R_R_R_I(ins, ldstAttr, dataReg, tmpReg, index->gtRegNum, lsl, INS_OPTS_LSL);
+                }
+            }
+            else // (offset == 0)
+            {
+                if (lsl > 0)
+                {
+                    // Then load/store dataReg from/to [memBase + index*scale]
+                    emitIns_R_R_R_I(ins, ldstAttr, dataReg, memBase->gtRegNum, index->gtRegNum, lsl, INS_OPTS_LSL);
+                }
+                else // no scale
+                {
+                    // Then load/store dataReg from/to [memBase + index]
+                    emitIns_R_R_R(ins, ldstAttr, dataReg, memBase->gtRegNum, index->gtRegNum);
+                }
+            }
+        }
+        else // no Index register
+        {
+            if (emitIns_valid_imm_for_ldst_offset(offset, EA_SIZE(attr)))
+            {
+                // Then load/store dataReg from/to [memBase + offset]
+                emitIns_R_R_I(ins, ldstAttr, dataReg, memBase->gtRegNum, offset);
+            }
+            else
+            {
+                // We require a tmpReg to hold the offset
+                regMaskTP tmpRegMask = indir->gtRsvdRegs;
+                regNumber tmpReg     = genRegNumFromMask(tmpRegMask);
+                noway_assert(tmpReg != REG_NA);
+
+                // First load/store tmpReg with the large offset constant
+                codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, tmpReg, offset);
+
+                // Then load/store dataReg from/to [memBase + tmpReg]
+                emitIns_R_R_R(ins, ldstAttr, dataReg, memBase->gtRegNum, tmpReg);
+            }
+        }
+    }
+    else // addr is not contained, so we evaluate it into a register
+    {
+        codeGen->genConsumeReg(addr);
+        // Then load/store dataReg from/to [addrReg]
+        emitIns_R_R(ins, ldstAttr, dataReg, addr->gtRegNum);
+    }
+}
+
+// Generates an integer data section constant and returns a field handle representing
+// the data offset to access the constant via a load instruction.
+// This is called during ngen for any relocatable constants
+//
+CORINFO_FIELD_HANDLE emitter::emitLiteralConst(ssize_t cnsValIn, emitAttr attr /*=EA_8BYTE*/)
+{
+    ssize_t constValue = cnsValIn;
+    void*   cnsAddr    = &constValue;
+    bool    dblAlign;
+
+    if (attr == EA_4BYTE)
+    {
+        dblAlign = false;
+    }
+    else
+    {
+        assert(attr == EA_8BYTE);
+        dblAlign = true;
+    }
+
+    // Access to inline data is 'abstracted' by a special type of static member
+    // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
+    // to constant data, not a real static field.
+
+    UNATIVE_OFFSET cnsSize = (attr == EA_4BYTE) ? 4 : 8;
+    UNATIVE_OFFSET cnum    = emitDataConst(cnsAddr, cnsSize, dblAlign);
+    return emitComp->eeFindJitDataOffs(cnum);
+}
+
+// Generates a float or double data section constant and returns field handle representing
+// the data offset to access the constant.  This is called by emitInsBinary() in case
+// of contained float of double constants.
+CORINFO_FIELD_HANDLE emitter::emitFltOrDblConst(GenTreeDblCon* tree, emitAttr attr /*=EA_UNKNOWN*/)
+{
+    if (attr == EA_UNKNOWN)
+    {
+        attr = emitTypeSize(tree->TypeGet());
+    }
+    else
+    {
+        assert(emitTypeSize(tree->TypeGet()) == attr);
+    }
+
+    double constValue = tree->gtDblCon.gtDconVal;
+    void*  cnsAddr;
+    float  f;
+    bool   dblAlign;
+
+    if (attr == EA_4BYTE)
+    {
+        f        = forceCastToFloat(constValue);
+        cnsAddr  = &f;
+        dblAlign = false;
+    }
+    else
+    {
+        cnsAddr  = &constValue;
+        dblAlign = true;
+    }
+
+    // Access to inline data is 'abstracted' by a special type of static member
+    // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
+    // to constant data, not a real static field.
+
+    UNATIVE_OFFSET cnsSize = (attr == EA_4BYTE) ? 4 : 8;
+    UNATIVE_OFFSET cnum    = emitDataConst(cnsAddr, cnsSize, dblAlign);
+    return emitComp->eeFindJitDataOffs(cnum);
+}
+
+// The callee must call genConsumeReg() for any non-contained srcs
+// and genProduceReg() for any non-contained dsts.
+
+regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src)
+{
+    regNumber result = REG_NA;
+
+    // dst can only be a reg
+    assert(!dst->isContained());
+
+    // src can be immed or reg
+    assert(!src->isContained() || src->isContainedIntOrIImmed());
+
+    // find immed (if any) - it cannot be a dst
+    GenTreeIntConCommon* intConst = nullptr;
+    if (src->isContainedIntOrIImmed())
+    {
+        intConst = src->AsIntConCommon();
+    }
+
+    if (intConst)
+    {
+        emitIns_R_I(ins, attr, dst->gtRegNum, intConst->IconValue());
+        return dst->gtRegNum;
+    }
+    else
+    {
+        emitIns_R_R(ins, attr, dst->gtRegNum, src->gtRegNum);
+        return dst->gtRegNum;
+    }
+}
+
+// The callee must call genConsumeReg() for any non-contained srcs
+// and genProduceReg() for any non-contained dsts.
+
+regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src1, GenTree* src2)
+{
+    regNumber result = REG_NA;
+
+    // dst can only be a reg
+    assert(!dst->isContained());
+
+    // find immed (if any) - it cannot be a dst
+    // Only one src can be an int.
+    GenTreeIntConCommon* intConst  = nullptr;
+    GenTree*             nonIntReg = nullptr;
+
+    if (varTypeIsFloating(dst))
+    {
+        // src1 can only be a reg
+        assert(!src1->isContained());
+        // src2 can only be a reg
+        assert(!src2->isContained());
+    }
+    else // not floating point
+    {
+        // src2 can be immed or reg
+        assert(!src2->isContained() || src2->isContainedIntOrIImmed());
+
+        // Check src2 first as we can always allow it to be a contained immediate
+        if (src2->isContainedIntOrIImmed())
+        {
+            intConst  = src2->AsIntConCommon();
+            nonIntReg = src1;
+        }
+        // Only for commutative operations do we check src1 and allow it to be a contained immediate
+        else if (dst->OperIsCommutative())
+        {
+            // src1 can be immed or reg
+            assert(!src1->isContained() || src1->isContainedIntOrIImmed());
+
+            // Check src1 and allow it to be a contained immediate
+            if (src1->isContainedIntOrIImmed())
+            {
+                assert(!src2->isContainedIntOrIImmed());
+                intConst  = src1->AsIntConCommon();
+                nonIntReg = src2;
+            }
+        }
+        else
+        {
+            // src1 can only be a reg
+            assert(!src1->isContained());
+        }
+    }
+    bool      isMulOverflow = false;
+    bool      isUnsignedMul = false;
+    regNumber extraReg      = REG_NA;
+    if (dst->gtOverflowEx())
+    {
+        if (ins == INS_add)
+        {
+            ins = INS_adds;
+        }
+        else if (ins == INS_sub)
+        {
+            ins = INS_subs;
+        }
+        else if (ins == INS_mul)
+        {
+            isMulOverflow = true;
+            isUnsignedMul = ((dst->gtFlags & GTF_UNSIGNED) != 0);
+            assert(intConst == nullptr); // overflow format doesn't support an int constant operand
+        }
+        else
+        {
+            assert(!"Invalid ins for overflow check");
+        }
+    }
+    if (intConst != nullptr)
+    {
+        emitIns_R_R_I(ins, attr, dst->gtRegNum, nonIntReg->gtRegNum, intConst->IconValue());
+    }
+    else
+    {
+        if (isMulOverflow)
+        {
+            // Make sure that we have an internal register
+            assert(genCountBits(dst->gtRsvdRegs) == 2);
+
+            // There will be two bits set in tmpRegsMask.
+            // Remove the bit for 'dst->gtRegNum' from 'tmpRegsMask'
+            regMaskTP tmpRegsMask = dst->gtRsvdRegs & ~genRegMask(dst->gtRegNum);
+            assert(tmpRegsMask != RBM_NONE);
+            regMaskTP tmpRegMask = genFindLowestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask
+            extraReg             = genRegNumFromMask(tmpRegMask); // set tmpReg from that mask
+
+            if (isUnsignedMul)
+            {
+                if (attr == EA_4BYTE)
+                {
+                    // Compute 8 byte results from 4 byte by 4 byte multiplication.
+                    emitIns_R_R_R(INS_umull, EA_8BYTE, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
+
+                    // Get the high result by shifting dst.
+                    emitIns_R_R_I(INS_lsr, EA_8BYTE, extraReg, dst->gtRegNum, 32);
+                }
+                else
+                {
+                    assert(attr == EA_8BYTE);
+                    // Compute the high result.
+                    emitIns_R_R_R(INS_umulh, attr, extraReg, src1->gtRegNum, src2->gtRegNum);
+
+                    // Now multiply without skewing the high result.
+                    emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
+                }
+
+                // zero-sign bit comparision to detect overflow.
+                emitIns_R_I(INS_cmp, attr, extraReg, 0);
+            }
+            else
+            {
+                int bitShift = 0;
+                if (attr == EA_4BYTE)
+                {
+                    // Compute 8 byte results from 4 byte by 4 byte multiplication.
+                    emitIns_R_R_R(INS_smull, EA_8BYTE, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
+
+                    // Get the high result by shifting dst.
+                    emitIns_R_R_I(INS_lsr, EA_8BYTE, extraReg, dst->gtRegNum, 32);
+
+                    bitShift = 31;
+                }
+                else
+                {
+                    assert(attr == EA_8BYTE);
+                    // Save the high result in a temporary register.
+                    emitIns_R_R_R(INS_smulh, attr, extraReg, src1->gtRegNum, src2->gtRegNum);
+
+                    // Now multiply without skewing the high result.
+                    emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
+
+                    bitShift = 63;
+                }
+
+                // Sign bit comparision to detect overflow.
+                emitIns_R_R_I(INS_cmp, attr, extraReg, dst->gtRegNum, bitShift, INS_OPTS_ASR);
+            }
+        }
+        else
+        {
+            // We can just multiply.
+            emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
+        }
+    }
+
+    if (dst->gtOverflowEx())
+    {
+        assert(!varTypeIsFloating(dst));
+        codeGen->genCheckOverflow(dst);
+    }
+
+    return dst->gtRegNum;
+}
+
+#endif // defined(_TARGET_ARM64_)
diff --git a/src/jit/emitarm64.h b/src/jit/emitarm64.h
new file mode 100644
index 0000000000..5459a0d6c8
--- /dev/null
+++ b/src/jit/emitarm64.h
@@ -0,0 +1,909 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#if defined(_TARGET_ARM64_)
+
+// The ARM64 instructions are all 32 bits in size.
+// we use an unsigned int to hold the encoded instructions.
+// This typedef defines the type that we use to hold encoded instructions.
+//
+typedef unsigned int code_t;
+
+static bool strictArmAsm;
+
+/************************************************************************/
+/*         Routines that compute the size of / encode instructions      */
+/************************************************************************/
+
+struct CnsVal
+{
+    ssize_t cnsVal;
+#ifdef RELOC_SUPPORT
+    bool cnsReloc;
+#endif
+};
+
+#ifdef DEBUG
+
+/************************************************************************/
+/*             Debug-only routines to display instructions              */
+/************************************************************************/
+
+const char* emitFPregName(unsigned reg, bool varName = true);
+const char* emitVectorRegName(regNumber reg);
+
+void emitDispInst(instruction ins);
+void emitDispReloc(int value, bool addComma);
+void emitDispImm(ssize_t imm, bool addComma, bool alwaysHex = false);
+void emitDispFloatZero();
+void emitDispFloatImm(ssize_t imm8);
+void emitDispImmOptsLSL12(ssize_t imm, insOpts opt);
+void emitDispCond(insCond cond);
+void emitDispFlags(insCflags flags);
+void emitDispBarrier(insBarrier barrier);
+void emitDispShiftOpts(insOpts opt);
+void emitDispExtendOpts(insOpts opt);
+void emitDispLSExtendOpts(insOpts opt);
+void emitDispReg(regNumber reg, emitAttr attr, bool addComma);
+void emitDispVectorReg(regNumber reg, insOpts opt, bool addComma);
+void emitDispVectorRegIndex(regNumber reg, emitAttr elemsize, ssize_t index, bool addComma);
+void emitDispArrangement(insOpts opt);
+void emitDispShiftedReg(regNumber reg, insOpts opt, ssize_t imm, emitAttr attr);
+void emitDispExtendReg(regNumber reg, insOpts opt, ssize_t imm);
+void emitDispAddrRI(regNumber reg, insOpts opt, ssize_t imm);
+void emitDispAddrRRExt(regNumber reg1, regNumber reg2, insOpts opt, bool isScaled, emitAttr size);
+
+void emitDispIns(instrDesc* id,
+                 bool       isNew,
+                 bool       doffs,
+                 bool       asmfm,
+                 unsigned   offs  = 0,
+                 BYTE*      pCode = 0,
+                 size_t     sz    = 0,
+                 insGroup*  ig    = NULL);
+#endif // DEBUG
+
+/************************************************************************/
+/*  Private members that deal with target-dependent instr. descriptors  */
+/************************************************************************/
+
+private:
+instrDesc* emitNewInstrAmd(emitAttr attr, int dsp);
+instrDesc* emitNewInstrAmdCns(emitAttr attr, int dsp, int cns);
+
+instrDesc* emitNewInstrCallDir(int              argCnt,
+                               VARSET_VALARG_TP GCvars,
+                               regMaskTP        gcrefRegs,
+                               regMaskTP        byrefRegs,
+                               emitAttr         retSize,
+                               emitAttr         secondRetSize);
+
+instrDesc* emitNewInstrCallInd(int              argCnt,
+                               ssize_t          disp,
+                               VARSET_VALARG_TP GCvars,
+                               regMaskTP        gcrefRegs,
+                               regMaskTP        byrefRegs,
+                               emitAttr         retSize,
+                               emitAttr         secondRetSize);
+
+void emitGetInsCns(instrDesc* id, CnsVal* cv);
+ssize_t emitGetInsAmdCns(instrDesc* id, CnsVal* cv);
+void emitGetInsDcmCns(instrDesc* id, CnsVal* cv);
+ssize_t emitGetInsAmdAny(instrDesc* id);
+
+/************************************************************************/
+/*               Private helpers for instruction output                 */
+/************************************************************************/
+
+private:
+bool emitInsIsCompare(instruction ins);
+bool emitInsIsLoad(instruction ins);
+bool emitInsIsStore(instruction ins);
+bool emitInsIsLoadOrStore(instruction ins);
+emitAttr emitInsAdjustLoadStoreAttr(instruction ins, emitAttr attr);
+emitAttr emitInsTargetRegSize(instrDesc* id);
+emitAttr emitInsLoadStoreSize(instrDesc* id);
+
+emitter::insFormat emitInsFormat(instruction ins);
+emitter::code_t emitInsCode(instruction ins, insFormat fmt);
+
+// Generate code for a load or store operation and handle the case of contained GT_LEA op1 with [base + index<<scale +
+// offset]
+void emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTreeIndir* indir);
+
+//  Emit the 32-bit Arm64 instruction 'code' into the 'dst'  buffer
+static unsigned emitOutput_Instr(BYTE* dst, code_t code);
+
+// A helper method to return the natural scale for an EA 'size'
+static unsigned NaturalScale_helper(emitAttr size);
+
+// A helper method to perform a Rotate-Right shift operation
+static UINT64 ROR_helper(UINT64 value, unsigned sh, unsigned width);
+
+// A helper method to perform a 'NOT' bitwise complement operation
+static UINT64 NOT_helper(UINT64 value, unsigned width);
+
+// A helper method to perform a bit Replicate operation
+static UINT64 Replicate_helper(UINT64 value, unsigned width, emitAttr size);
+
+/************************************************************************
+*
+* This union is used to to encode/decode the special ARM64 immediate values
+* that is listed as imm(N,r,s) and referred to as 'bitmask immediate'
+*/
+
+union bitMaskImm {
+    struct
+    {
+        unsigned immS : 6; // bits 0..5
+        unsigned immR : 6; // bits 6..11
+        unsigned immN : 1; // bits 12
+    };
+    unsigned immNRS; // concat N:R:S forming a 13-bit unsigned immediate
+};
+
+/************************************************************************
+*
+*  Convert between a 64-bit immediate and its 'bitmask immediate'
+*   representation imm(i16,hw)
+*/
+
+static emitter::bitMaskImm emitEncodeBitMaskImm(INT64 imm, emitAttr size);
+
+static INT64 emitDecodeBitMaskImm(const emitter::bitMaskImm bmImm, emitAttr size);
+
+/************************************************************************
+*
+* This union is used to to encode/decode the special ARM64 immediate values
+* that is listed as imm(i16,hw) and referred to as 'halfword immediate'
+*/
+
+union halfwordImm {
+    struct
+    {
+        unsigned immVal : 16; // bits  0..15
+        unsigned immHW : 2;   // bits 16..17
+    };
+    unsigned immHWVal; // concat HW:Val forming a 18-bit unsigned immediate
+};
+
+/************************************************************************
+*
+*  Convert between a 64-bit immediate and its 'halfword immediate'
+*   representation imm(i16,hw)
+*/
+
+static emitter::halfwordImm emitEncodeHalfwordImm(INT64 imm, emitAttr size);
+
+static INT64 emitDecodeHalfwordImm(const emitter::halfwordImm hwImm, emitAttr size);
+
+/************************************************************************
+*
+* This union is used to encode/decode the special ARM64 immediate values
+* that is listed as imm(i16,by) and referred to as 'byteShifted immediate'
+*/
+
+union byteShiftedImm {
+    struct
+    {
+        unsigned immVal : 8;  // bits  0..7
+        unsigned immBY : 2;   // bits  8..9
+        unsigned immOnes : 1; // bit   10
+    };
+    unsigned immBSVal; // concat Ones:BY:Val forming a 10-bit unsigned immediate
+};
+
+/************************************************************************
+*
+*  Convert between a 16/32-bit immediate and its 'byteShifted immediate'
+*   representation imm(i8,by)
+*/
+
+static emitter::byteShiftedImm emitEncodeByteShiftedImm(INT64 imm, emitAttr size, bool allow_MSL);
+
+static INT32 emitDecodeByteShiftedImm(const emitter::byteShiftedImm bsImm, emitAttr size);
+
+/************************************************************************
+*
+* This union is used to to encode/decode the special ARM64 immediate values
+* that are use for FMOV immediate and referred to as 'float 8-bit immediate'
+*/
+
+union floatImm8 {
+    struct
+    {
+        unsigned immMant : 4; // bits 0..3
+        unsigned immExp : 3;  // bits 4..6
+        unsigned immSign : 1; // bits 7
+    };
+    unsigned immFPIVal; // concat Sign:Exp:Mant forming an 8-bit unsigned immediate
+};
+
+/************************************************************************
+*
+*  Convert between a double and its 'float 8-bit immediate' representation
+*/
+
+static emitter::floatImm8 emitEncodeFloatImm8(double immDbl);
+
+static double emitDecodeFloatImm8(const emitter::floatImm8 fpImm);
+
+/************************************************************************
+*
+*  This union is used to to encode/decode the cond, nzcv and imm5 values for
+*   instructions that use them in the small constant immediate field
+*/
+
+union condFlagsImm {
+    struct
+    {
+        insCond   cond : 4;  // bits  0..3
+        insCflags flags : 4; // bits  4..7
+        unsigned  imm5 : 5;  // bits  8..12
+    };
+    unsigned immCFVal; // concat imm5:flags:cond forming an 13-bit unsigned immediate
+};
+
+// Returns an encoding for the specified register used in the 'Rd' position
+static code_t insEncodeReg_Rd(regNumber reg);
+
+// Returns an encoding for the specified register used in the 'Rt' position
+static code_t insEncodeReg_Rt(regNumber reg);
+
+// Returns an encoding for the specified register used in the 'Rn' position
+static code_t insEncodeReg_Rn(regNumber reg);
+
+// Returns an encoding for the specified register used in the 'Rm' position
+static code_t insEncodeReg_Rm(regNumber reg);
+
+// Returns an encoding for the specified register used in the 'Ra' position
+static code_t insEncodeReg_Ra(regNumber reg);
+
+// Returns an encoding for the specified register used in the 'Vd' position
+static code_t insEncodeReg_Vd(regNumber reg);
+
+// Returns an encoding for the specified register used in the 'Vt' position
+static code_t insEncodeReg_Vt(regNumber reg);
+
+// Returns an encoding for the specified register used in the 'Vn' position
+static code_t insEncodeReg_Vn(regNumber reg);
+
+// Returns an encoding for the specified register used in the 'Vm' position
+static code_t insEncodeReg_Vm(regNumber reg);
+
+// Returns an encoding for the specified register used in the 'Va' position
+static code_t insEncodeReg_Va(regNumber reg);
+
+// Returns an encoding for the imm which represents the condition code.
+static code_t insEncodeCond(insCond cond);
+
+// Returns an encoding for the imm whioch represents the 'condition code'
+//  with the lowest bit inverted (marked by invert(<cond>) in the architecture manual.
+static code_t insEncodeInvertedCond(insCond cond);
+
+// Returns an encoding for the imm which represents the flags.
+static code_t insEncodeFlags(insCflags flags);
+
+// Returns the encoding for the Shift Count bits to be used for Arm64 encodings
+static code_t insEncodeShiftCount(ssize_t imm, emitAttr size);
+
+// Returns the encoding to select the datasize for most Arm64 instructions
+static code_t insEncodeDatasize(emitAttr size);
+
+// Returns the encoding to select the datasize for the general load/store Arm64 instructions
+static code_t insEncodeDatasizeLS(code_t code, emitAttr size);
+
+// Returns the encoding to select the datasize for the vector load/store Arm64 instructions
+static code_t insEncodeDatasizeVLS(code_t code, emitAttr size);
+
+// Returns the encoding to select the datasize for the vector load/store pair Arm64 instructions
+static code_t insEncodeDatasizeVPLS(code_t code, emitAttr size);
+
+// Returns the encoding to select the datasize for bitfield Arm64 instructions
+static code_t insEncodeDatasizeBF(code_t code, emitAttr size);
+
+// Returns the encoding to select the vectorsize for SIMD Arm64 instructions
+static code_t insEncodeVectorsize(emitAttr size);
+
+// Returns the encoding to select 'index' for an Arm64 vector elem instruction
+static code_t insEncodeVectorIndex(emitAttr elemsize, ssize_t index);
+
+// Returns the encoding to select 'index2' for an Arm64 'ins' elem instruction
+static code_t insEncodeVectorIndex2(emitAttr elemsize, ssize_t index2);
+
+// Returns the encoding to select 'index' for an Arm64 'mul' elem instruction
+static code_t insEncodeVectorIndexLMH(emitAttr elemsize, ssize_t index);
+
+// Returns the encoding to shift by 'shift' bits for an Arm64 vector or scalar instruction
+static code_t insEncodeVectorShift(emitAttr size, ssize_t shift);
+
+// Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 vector instruction
+static code_t insEncodeElemsize(emitAttr size);
+
+// Returns the encoding to select the 4/8 byte elemsize for an Arm64 float vector instruction
+static code_t insEncodeFloatElemsize(emitAttr size);
+
+// Returns the encoding to select the index for an Arm64 float vector by elem instruction
+static code_t insEncodeFloatIndex(emitAttr elemsize, ssize_t index);
+
+// Returns the encoding to select the 'conversion' operation for a type 'fmt' Arm64 instruction
+static code_t insEncodeConvertOpt(insFormat fmt, insOpts conversion);
+
+// Returns the encoding to have the Rn register of a ld/st reg be Pre/Post/Not indexed updated
+static code_t insEncodeIndexedOpt(insOpts opt);
+
+// Returns the encoding to have the Rn register of a ld/st pair be Pre/Post/Not indexed updated
+static code_t insEncodePairIndexedOpt(instruction ins, insOpts opt);
+
+// Returns the encoding to apply a Shift Type on the Rm register
+static code_t insEncodeShiftType(insOpts opt);
+
+// Returns the encoding to apply a 12 bit left shift to the immediate
+static code_t insEncodeShiftImm12(insOpts opt);
+
+// Returns the encoding to have the Rm register use an extend operation
+static code_t insEncodeExtend(insOpts opt);
+
+// Returns the encoding to scale the Rm register by {0,1,2,3,4} in an extend operation
+static code_t insEncodeExtendScale(ssize_t imm);
+
+// Returns the encoding to have the Rm register be auto scaled by the ld/st size
+static code_t insEncodeReg3Scale(bool isScaled);
+
+// Returns true if 'reg' represents an integer register.
+static bool isIntegerRegister(regNumber reg)
+{
+    return (reg >= REG_INT_FIRST) && (reg <= REG_INT_LAST);
+}
+
+// Returns true if 'value' is a legal unsigned immediate 8 bit encoding (such as for fMOV).
+static bool isValidUimm8(ssize_t value)
+{
+    return (0 <= value) && (value <= 0xFFLL);
+};
+
+// Returns true if 'value' is a legal unsigned immediate 12 bit encoding (such as for CMP, CMN).
+static bool isValidUimm12(ssize_t value)
+{
+    return (0 <= value) && (value <= 0xFFFLL);
+};
+
+// Returns true if 'value' is a legal unsigned immediate 16 bit encoding (such as for MOVZ, MOVN, MOVK).
+static bool isValidUimm16(ssize_t value)
+{
+    return (0 <= value) && (value <= 0xFFFFLL);
+};
+
+// Returns true if 'value' is a legal signed immediate 26 bit encoding (such as for B or BL).
+static bool isValidSimm26(ssize_t value)
+{
+    return (-0x2000000LL <= value) && (value <= 0x1FFFFFFLL);
+};
+
+// Returns true if 'value' is a legal signed immediate 19 bit encoding (such as for B.cond, CBNZ, CBZ).
+static bool isValidSimm19(ssize_t value)
+{
+    return (-0x40000LL <= value) && (value <= 0x3FFFFLL);
+};
+
+// Returns true if 'value' is a legal signed immediate 14 bit encoding (such as for TBNZ, TBZ).
+static bool isValidSimm14(ssize_t value)
+{
+    return (-0x2000LL <= value) && (value <= 0x1FFFLL);
+};
+
+// Returns true if 'value' represents a valid 'bitmask immediate' encoding.
+static bool isValidImmNRS(size_t value, emitAttr size)
+{
+    return (value >= 0) && (value < 0x2000);
+} // any unsigned 13-bit immediate
+
+// Returns true if 'value' represents a valid 'halfword immediate' encoding.
+static bool isValidImmHWVal(size_t value, emitAttr size)
+{
+    return (value >= 0) && (value < 0x40000);
+} // any unsigned 18-bit immediate
+
+// Returns true if 'value' represents a valid 'byteShifted immediate' encoding.
+static bool isValidImmBSVal(size_t value, emitAttr size)
+{
+    return (value >= 0) && (value < 0x800);
+} // any unsigned 11-bit immediate
+
+//  The return value replaces REG_ZR with REG_SP
+static regNumber encodingZRtoSP(regNumber reg)
+{
+    return (reg == REG_ZR) ? REG_SP : reg;
+} // ZR (R31) encodes the SP register
+
+//  The return value replaces REG_SP with REG_ZR
+static regNumber encodingSPtoZR(regNumber reg)
+{
+    return (reg == REG_SP) ? REG_ZR : reg;
+} // SP is encoded using ZR (R31)
+
+//  For the given 'ins' returns the reverse instruction, if one exists, otherwise returns INS_INVALID
+static instruction insReverse(instruction ins);
+
+//  For the given 'datasize' and 'elemsize' returns the insOpts that specifies the vector register arrangement
+static insOpts optMakeArrangement(emitAttr datasize, emitAttr elemsize);
+
+//    For the given 'datasize' and 'opt' returns true if it specifies a valid vector register arrangement
+static bool isValidArrangement(emitAttr datasize, insOpts opt);
+
+//  For the given 'arrangement' returns the 'datasize' specified by the vector register arrangement
+static emitAttr optGetDatasize(insOpts arrangement);
+
+//  For the given 'arrangement' returns the 'elemsize' specified by the vector register arrangement
+static emitAttr optGetElemsize(insOpts arrangement);
+
+//  For the given 'arrangement' returns the 'widen-arrangement' specified by the vector register arrangement
+static insOpts optWidenElemsize(insOpts arrangement);
+
+//  For the given 'conversion' returns the 'dstsize' specified by the conversion option
+static emitAttr optGetDstsize(insOpts conversion);
+
+//  For the given 'conversion' returns the 'srcsize' specified by the conversion option
+static emitAttr optGetSrcsize(insOpts conversion);
+
+//    For the given 'datasize', 'elemsize' and 'index' returns true, if it specifies a valid 'index'
+//    for an element of size 'elemsize' in a vector register of size 'datasize'
+static bool isValidVectorIndex(emitAttr datasize, emitAttr elemsize, ssize_t index);
+
+/************************************************************************/
+/*           Public inline informational methods                        */
+/************************************************************************/
+
+public:
+// true if this 'imm' can be encoded as a input operand to a mov instruction
+static bool emitIns_valid_imm_for_mov(INT64 imm, emitAttr size);
+
+// true if this 'imm' can be encoded as a input operand to a vector movi instruction
+static bool emitIns_valid_imm_for_movi(INT64 imm, emitAttr size);
+
+// true if this 'immDbl' can be encoded as a input operand to a fmov instruction
+static bool emitIns_valid_imm_for_fmov(double immDbl);
+
+// true if this 'imm' can be encoded as a input operand to an add instruction
+static bool emitIns_valid_imm_for_add(INT64 imm, emitAttr size);
+
+// true if this 'imm' can be encoded as a input operand to a cmp instruction
+static bool emitIns_valid_imm_for_cmp(INT64 imm, emitAttr size);
+
+// true if this 'imm' can be encoded as a input operand to an alu instruction
+static bool emitIns_valid_imm_for_alu(INT64 imm, emitAttr size);
+
+// true if this 'imm' can be encoded as the offset in a ldr/str instruction
+static bool emitIns_valid_imm_for_ldst_offset(INT64 imm, emitAttr size);
+
+// true if 'imm' can use the left shifted by 12 bits encoding
+static bool canEncodeWithShiftImmBy12(INT64 imm);
+
+// Normalize the 'imm' so that the upper bits, as defined by 'size' are zero
+static INT64 normalizeImm64(INT64 imm, emitAttr size);
+
+// Normalize the 'imm' so that the upper bits, as defined by 'size' are zero
+static INT32 normalizeImm32(INT32 imm, emitAttr size);
+
+// true if 'imm' can be encoded using a 'bitmask immediate', also returns the encoding if wbBMI is non-null
+static bool canEncodeBitMaskImm(INT64 imm, emitAttr size, emitter::bitMaskImm* wbBMI = nullptr);
+
+// true if 'imm' can be encoded using a 'halfword immediate', also returns the encoding if wbHWI is non-null
+static bool canEncodeHalfwordImm(INT64 imm, emitAttr size, emitter::halfwordImm* wbHWI = nullptr);
+
+// true if 'imm' can be encoded using a 'byteShifted immediate', also returns the encoding if wbBSI is non-null
+static bool canEncodeByteShiftedImm(INT64 imm, emitAttr size, bool allow_MSL, emitter::byteShiftedImm* wbBSI = nullptr);
+
+// true if 'immDbl' can be encoded using a 'float immediate', also returns the encoding if wbFPI is non-null
+static bool canEncodeFloatImm8(double immDbl, emitter::floatImm8* wbFPI = nullptr);
+
+// Returns the number of bits used by the given 'size'.
+inline static unsigned getBitWidth(emitAttr size)
+{
+    assert(size <= EA_8BYTE);
+    return (unsigned)size * BITS_PER_BYTE;
+}
+
+// Returns true if the imm represents a valid bit shift or bit position for the given 'size' [0..31] or [0..63]
+inline static unsigned isValidImmShift(ssize_t imm, emitAttr size)
+{
+    return (imm >= 0) && (imm < getBitWidth(size));
+}
+
+inline static bool isValidGeneralDatasize(emitAttr size)
+{
+    return (size == EA_8BYTE) || (size == EA_4BYTE);
+}
+
+inline static bool isValidScalarDatasize(emitAttr size)
+{
+    return (size == EA_8BYTE) || (size == EA_4BYTE);
+}
+
+inline static bool isValidVectorDatasize(emitAttr size)
+{
+    return (size == EA_16BYTE) || (size == EA_8BYTE);
+}
+
+inline static bool isValidGeneralLSDatasize(emitAttr size)
+{
+    return (size == EA_8BYTE) || (size == EA_4BYTE) || (size == EA_2BYTE) || (size == EA_1BYTE);
+}
+
+inline static bool isValidVectorLSDatasize(emitAttr size)
+{
+    return (size == EA_16BYTE) || (size == EA_8BYTE) || (size == EA_4BYTE) || (size == EA_2BYTE) || (size == EA_1BYTE);
+}
+
+inline static bool isValidVectorLSPDatasize(emitAttr size)
+{
+    return (size == EA_16BYTE) || (size == EA_8BYTE) || (size == EA_4BYTE);
+}
+
+inline static bool isValidVectorElemsize(emitAttr size)
+{
+    return (size == EA_8BYTE) || (size == EA_4BYTE) || (size == EA_2BYTE) || (size == EA_1BYTE);
+}
+
+inline static bool isValidVectorFcvtsize(emitAttr size)
+{
+    return (size == EA_8BYTE) || (size == EA_4BYTE) || (size == EA_2BYTE);
+}
+
+inline static bool isValidVectorElemsizeFloat(emitAttr size)
+{
+    return (size == EA_8BYTE) || (size == EA_4BYTE);
+}
+
+inline static bool isGeneralRegister(regNumber reg)
+{
+    return (reg >= REG_INT_FIRST) && (reg <= REG_LR);
+} // Excludes REG_ZR
+
+inline static bool isGeneralRegisterOrZR(regNumber reg)
+{
+    return (reg >= REG_INT_FIRST) && (reg <= REG_ZR);
+} // Includes REG_ZR
+
+inline static bool isGeneralRegisterOrSP(regNumber reg)
+{
+    return isGeneralRegister(reg) || (reg == REG_SP);
+} // Includes REG_SP, Excludes REG_ZR
+
+inline static bool isVectorRegister(regNumber reg)
+{
+    return (reg >= REG_FP_FIRST && reg <= REG_FP_LAST);
+}
+
+inline static bool isFloatReg(regNumber reg)
+{
+    return isVectorRegister(reg);
+}
+
+inline static bool insOptsNone(insOpts opt)
+{
+    return (opt == INS_OPTS_NONE);
+}
+
+inline static bool insOptsIndexed(insOpts opt)
+{
+    return (opt == INS_OPTS_PRE_INDEX) || (opt == INS_OPTS_POST_INDEX);
+}
+
+inline static bool insOptsPreIndex(insOpts opt)
+{
+    return (opt == INS_OPTS_PRE_INDEX);
+}
+
+inline static bool insOptsPostIndex(insOpts opt)
+{
+    return (opt == INS_OPTS_POST_INDEX);
+}
+
+inline static bool insOptsLSL12(insOpts opt) // special 12-bit shift only used for imm12
+{
+    return (opt == INS_OPTS_LSL12);
+}
+
+inline static bool insOptsAnyShift(insOpts opt)
+{
+    return ((opt >= INS_OPTS_LSL) && (opt <= INS_OPTS_ROR));
+}
+
+inline static bool insOptsAluShift(insOpts opt) // excludes ROR
+{
+    return ((opt >= INS_OPTS_LSL) && (opt <= INS_OPTS_ASR));
+}
+
+inline static bool insOptsVectorImmShift(insOpts opt)
+{
+    return ((opt == INS_OPTS_LSL) || (opt == INS_OPTS_MSL));
+}
+
+inline static bool insOptsLSL(insOpts opt)
+{
+    return (opt == INS_OPTS_LSL);
+}
+
+inline static bool insOptsLSR(insOpts opt)
+{
+    return (opt == INS_OPTS_LSR);
+}
+
+inline static bool insOptsASR(insOpts opt)
+{
+    return (opt == INS_OPTS_ASR);
+}
+
+inline static bool insOptsROR(insOpts opt)
+{
+    return (opt == INS_OPTS_ROR);
+}
+
+inline static bool insOptsAnyExtend(insOpts opt)
+{
+    return ((opt >= INS_OPTS_UXTB) && (opt <= INS_OPTS_SXTX));
+}
+
+inline static bool insOptsLSExtend(insOpts opt)
+{
+    return ((opt == INS_OPTS_NONE) || (opt == INS_OPTS_LSL) || (opt == INS_OPTS_UXTW) || (opt == INS_OPTS_SXTW) ||
+            (opt == INS_OPTS_UXTX) || (opt == INS_OPTS_SXTX));
+}
+
+inline static bool insOpts32BitExtend(insOpts opt)
+{
+    return ((opt == INS_OPTS_UXTW) || (opt == INS_OPTS_SXTW));
+}
+
+inline static bool insOpts64BitExtend(insOpts opt)
+{
+    return ((opt == INS_OPTS_UXTX) || (opt == INS_OPTS_SXTX));
+}
+
+inline static bool insOptsAnyArrangement(insOpts opt)
+{
+    return ((opt >= INS_OPTS_8B) && (opt <= INS_OPTS_2D));
+}
+
+inline static bool insOptsConvertFloatToFloat(insOpts opt)
+{
+    return ((opt >= INS_OPTS_S_TO_D) && (opt <= INS_OPTS_D_TO_H));
+}
+
+inline static bool insOptsConvertFloatToInt(insOpts opt)
+{
+    return ((opt >= INS_OPTS_S_TO_4BYTE) && (opt <= INS_OPTS_D_TO_8BYTE));
+}
+
+inline static bool insOptsConvertIntToFloat(insOpts opt)
+{
+    return ((opt >= INS_OPTS_4BYTE_TO_S) && (opt <= INS_OPTS_8BYTE_TO_D));
+}
+
+static bool isValidImmCond(ssize_t imm);
+static bool isValidImmCondFlags(ssize_t imm);
+static bool isValidImmCondFlagsImm5(ssize_t imm);
+
+/************************************************************************/
+/*           The public entry points to output instructions             */
+/************************************************************************/
+
+public:
+void emitIns(instruction ins);
+
+void emitIns_I(instruction ins, emitAttr attr, ssize_t imm);
+
+void emitIns_R(instruction ins, emitAttr attr, regNumber reg);
+
+void emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t imm, insOpts opt = INS_OPTS_NONE);
+
+void emitIns_R_F(instruction ins, emitAttr attr, regNumber reg, double immDbl, insOpts opt = INS_OPTS_NONE);
+
+void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insOpts opt = INS_OPTS_NONE);
+
+void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insFlags flags)
+{
+    emitIns_R_R(ins, attr, reg1, reg2);
+}
+
+void emitIns_R_I_I(
+    instruction ins, emitAttr attr, regNumber reg1, ssize_t imm1, ssize_t imm2, insOpts opt = INS_OPTS_NONE);
+
+void emitIns_R_R_I(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm, insOpts opt = INS_OPTS_NONE);
+
+// Checks for a large immediate that needs a second instruction
+void emitIns_R_R_Imm(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm);
+
+void emitIns_R_R_R(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, insOpts opt = INS_OPTS_NONE);
+
+void emitIns_R_R_R_I(instruction ins,
+                     emitAttr    attr,
+                     regNumber   reg1,
+                     regNumber   reg2,
+                     regNumber   reg3,
+                     ssize_t     imm,
+                     insOpts     opt = INS_OPTS_NONE);
+
+void emitIns_R_R_R_Ext(instruction ins,
+                       emitAttr    attr,
+                       regNumber   reg1,
+                       regNumber   reg2,
+                       regNumber   reg3,
+                       insOpts     opt         = INS_OPTS_NONE,
+                       int         shiftAmount = -1);
+
+void emitIns_R_R_I_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int imm1, int imm2);
+
+void emitIns_R_R_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, regNumber reg4);
+
+void emitIns_R_COND(instruction ins, emitAttr attr, regNumber reg, insCond cond);
+
+void emitIns_R_R_COND(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insCond cond);
+
+void emitIns_R_R_R_COND(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, insCond cond);
+
+void emitIns_R_R_FLAGS_COND(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insCflags flags, insCond cond);
+
+void emitIns_R_I_FLAGS_COND(instruction ins, emitAttr attr, regNumber reg1, int imm, insCflags flags, insCond cond);
+
+void emitIns_BARR(instruction ins, insBarrier barrier);
+
+void emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, int offs);
+
+void emitIns_S(instruction ins, emitAttr attr, int varx, int offs);
+
+void emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs);
+
+void emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs);
+
+void emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val);
+
+void emitIns_R_C(
+    instruction ins, emitAttr attr, regNumber reg, regNumber tmpReg, CORINFO_FIELD_HANDLE fldHnd, int offs);
+
+void emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs);
+
+void emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, ssize_t offs, ssize_t val);
+
+void emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg);
+
+void emitIns_R_D(instruction ins, emitAttr attr, unsigned offs, regNumber reg);
+
+void emitIns_J_R(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg);
+
+void emitIns_I_AR(
+    instruction ins, emitAttr attr, int val, regNumber reg, int offs, int memCookie = 0, void* clsCookie = NULL);
+
+void emitIns_R_AR(
+    instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs, int memCookie = 0, void* clsCookie = NULL);
+
+void emitIns_R_AI(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp);
+
+void emitIns_AR_R(
+    instruction ins, emitAttr attr, regNumber ireg, regNumber reg, int offs, int memCookie = 0, void* clsCookie = NULL);
+
+void emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp);
+
+void emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp);
+
+void emitIns_R_ARX(
+    instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, unsigned mul, int disp);
+
+enum EmitCallType
+{
+
+    // I have included here, but commented out, all the values used by the x86 emitter.
+    // However, ARM has a much reduced instruction set, and so the ARM emitter only
+    // supports a subset of the x86 variants.  By leaving them commented out, it becomes
+    // a compile time error if code tries to use them (and hopefully see this comment
+    // and know why they are unavailible on ARM), while making it easier to stay
+    // in-sync with x86 and possibly add them back in if needed.
+
+    EC_FUNC_TOKEN, //   Direct call to a helper/static/nonvirtual/global method
+                   //  EC_FUNC_TOKEN_INDIR,    // Indirect call to a helper/static/nonvirtual/global method
+    EC_FUNC_ADDR,  // Direct call to an absolute address
+
+    //  EC_FUNC_VIRTUAL,        // Call to a virtual method (using the vtable)
+    EC_INDIR_R, // Indirect call via register
+                //  EC_INDIR_SR,            // Indirect call via stack-reference (local var)
+                //  EC_INDIR_C,             // Indirect call via static class var
+                //  EC_INDIR_ARD,           // Indirect call via an addressing mode
+
+    EC_COUNT
+};
+
+void emitIns_Call(EmitCallType          callType,
+                  CORINFO_METHOD_HANDLE methHnd,
+                  INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE
+                  void*            addr,
+                  ssize_t          argSize,
+                  emitAttr         retSize,
+                  emitAttr         secondRetSize,
+                  VARSET_VALARG_TP ptrVars,
+                  regMaskTP        gcrefRegs,
+                  regMaskTP        byrefRegs,
+                  IL_OFFSETX       ilOffset      = BAD_IL_OFFSET,
+                  regNumber        ireg          = REG_NA,
+                  regNumber        xreg          = REG_NA,
+                  unsigned         xmul          = 0,
+                  ssize_t          disp          = 0,
+                  bool             isJump        = false,
+                  bool             isNoGC        = false,
+                  bool             isProfLeaveCB = false);
+
+BYTE* emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i);
+unsigned emitOutputCall(insGroup* ig, BYTE* dst, instrDesc* i, code_t code);
+BYTE* emitOutputLoadLabel(BYTE* dst, BYTE* srcAddr, BYTE* dstAddr, instrDescJmp* id);
+BYTE* emitOutputShortBranch(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, instrDescJmp* id);
+BYTE* emitOutputShortAddress(BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, regNumber reg);
+BYTE* emitOutputShortConstant(
+    BYTE* dst, instruction ins, insFormat fmt, ssize_t distVal, regNumber reg, emitAttr opSize);
+
+/*****************************************************************************
+ *
+ *  Given an instrDesc, return true if it's a conditional jump.
+ */
+
+inline bool emitIsCondJump(instrDesc* jmp)
+{
+    return ((jmp->idInsFmt() == IF_BI_0B) || (jmp->idInsFmt() == IF_LARGEJMP));
+}
+
+/*****************************************************************************
+ *
+ *  Given an instrDesc, return true if it's a compare and jump.
+ */
+
+inline bool emitIsCmpJump(instrDesc* jmp)
+{
+    return ((jmp->idInsFmt() == IF_BI_1A) || (jmp->idInsFmt() == IF_BI_1B));
+}
+
+/*****************************************************************************
+ *
+ *  Given a instrDesc, return true if it's an unconditional jump.
+ */
+
+inline bool emitIsUncondJump(instrDesc* jmp)
+{
+    return (jmp->idInsFmt() == IF_BI_0A);
+}
+
+/*****************************************************************************
+ *
+ *  Given a instrDesc, return true if it's a direct call.
+ */
+
+inline bool emitIsDirectCall(instrDesc* call)
+{
+    return (call->idInsFmt() == IF_BI_0C);
+}
+
+/*****************************************************************************
+ *
+ *  Given a instrDesc, return true if it's a load label instruction.
+ */
+
+inline bool emitIsLoadLabel(instrDesc* jmp)
+{
+    return ((jmp->idInsFmt() == IF_DI_1E) || // adr or arp
+            (jmp->idInsFmt() == IF_LARGEADR));
+}
+
+/*****************************************************************************
+*
+*  Given a instrDesc, return true if it's a load constant instruction.
+*/
+
+inline bool emitIsLoadConstant(instrDesc* jmp)
+{
+    return ((jmp->idInsFmt() == IF_LS_1A) || // ldr
+            (jmp->idInsFmt() == IF_LARGELDC));
+}
+
+#endif // _TARGET_ARM64_
diff --git a/src/jit/emitdef.h b/src/jit/emitdef.h
new file mode 100644
index 0000000000..f7f9325b79
--- /dev/null
+++ b/src/jit/emitdef.h
@@ -0,0 +1,22 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+/*****************************************************************************/
+
+#ifndef _EMITDEF_H_
+#define _EMITDEF_H_
+/*****************************************************************************/
+
+#if defined(_TARGET_XARCH_)
+#include "emitxarch.h"
+#elif defined(_TARGET_ARM_)
+#include "emitarm.h"
+#elif defined(_TARGET_ARM64_)
+#include "emitarm64.h"
+#else
+#error Unsupported or unset target architecture
+#endif
+
+/*****************************************************************************/
+#endif //_EMITDEF_H_
+/*****************************************************************************/
diff --git a/src/jit/emitfmts.h b/src/jit/emitfmts.h
new file mode 100644
index 0000000000..587033f2e9
--- /dev/null
+++ b/src/jit/emitfmts.h
@@ -0,0 +1,14 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//////////////////////////////////////////////////////////////////////////////
+
+#if defined(_TARGET_XARCH_)
+#include "emitfmtsxarch.h"
+#elif defined(_TARGET_ARM_)
+#include "emitfmtsarm.h"
+#elif defined(_TARGET_ARM64_)
+#include "emitfmtsarm64.h"
+#else
+#error Unsupported or unset target architecture
+#endif // target type
diff --git a/src/jit/emitfmtsarm.h b/src/jit/emitfmtsarm.h
new file mode 100644
index 0000000000..bc7492003a
--- /dev/null
+++ b/src/jit/emitfmtsarm.h
@@ -0,0 +1,153 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//////////////////////////////////////////////////////////////////////////////
+
+// clang-format off
+#if !defined(_TARGET_ARM_)
+  #error Unexpected target type
+#endif
+
+#ifdef  DEFINE_ID_OPS
+//////////////////////////////////////////////////////////////////////////////
+
+#undef  DEFINE_ID_OPS
+
+enum    ID_OPS
+{
+    ID_OP_NONE,                             // no additional arguments
+    ID_OP_SCNS,                             // small const  operand (21-bits or less, no reloc)
+    ID_OP_JMP,                              // local jump
+    ID_OP_LBL,                              // label operand
+    ID_OP_CALL,                             // direct method call
+    ID_OP_SPEC,                             // special handling required
+};
+
+//////////////////////////////////////////////////////////////////////////////
+#else // !DEFINE_ID_OPS
+//////////////////////////////////////////////////////////////////////////////
+
+#ifndef IF_DEF
+#error  Must define IF_DEF macro before including this file
+#endif
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// enum insFormat   instruction            enum ID_OPS 
+//                  scheduling 
+//                  (unused)                       
+//////////////////////////////////////////////////////////////////////////////
+
+IF_DEF(NONE,        IS_NONE,               NONE)     //
+
+IF_DEF(LABEL,       IS_NONE,               JMP )     // label
+//IF_DEF(SWR_LABEL,   IS_NONE,               LBL )     // write label to stack
+//IF_DEF(METHOD,      IS_NONE,               CALL)     // method
+//IF_DEF(CNS,         IS_NONE,               SCNS)     // const
+
+IF_DEF(LARGEJMP,    IS_NONE,               JMP)      // large conditional branch pseudo-op
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+IF_DEF(EN9,         IS_NONE,               NONE)     // Instruction has 9 possible encoding types   
+IF_DEF(EN8,         IS_NONE,               NONE)     // Instruction has 8 possible encoding types   
+IF_DEF(EN6A,        IS_NONE,               NONE)     // Instruction has 6 possible encoding types, type A
+IF_DEF(EN6B,        IS_NONE,               NONE)     // Instruction has 6 possible encoding types, type B
+IF_DEF(EN5A,        IS_NONE,               NONE)     // Instruction has 5 possible encoding types, type A
+IF_DEF(EN5B,        IS_NONE,               NONE)     // Instruction has 5 possible encoding types, type B
+IF_DEF(EN4A,        IS_NONE,               NONE)     // Instruction has 4 possible encoding types, type A
+IF_DEF(EN4B,        IS_NONE,               NONE)     // Instruction has 4 possible encoding types, type B
+IF_DEF(EN3A,        IS_NONE,               NONE)     // Instruction has 3 possible encoding types, type A 
+IF_DEF(EN3B,        IS_NONE,               NONE)     // Instruction has 3 possible encoding types, type B 
+IF_DEF(EN3C,        IS_NONE,               NONE)     // Instruction has 3 possible encoding types, type C 
+IF_DEF(EN3D,        IS_NONE,               NONE)     // Instruction has 3 possible encoding types, type D 
+IF_DEF(EN3E,        IS_NONE,               NONE)     // Instruction has 3 possible encoding types, type E
+IF_DEF(EN3F,        IS_NONE,               NONE)     // Instruction has 3 possible encoding types, type F
+IF_DEF(EN2A,        IS_NONE,               NONE)     // Instruction has 2 possible encoding types, type A 
+IF_DEF(EN2B,        IS_NONE,               NONE)     // Instruction has 2 possible encoding types, type B 
+IF_DEF(EN2C,        IS_NONE,               NONE)     // Instruction has 2 possible encoding types, type C 
+IF_DEF(EN2D,        IS_NONE,               NONE)     // Instruction has 2 possible encoding types, type D
+IF_DEF(EN2E,        IS_NONE,               NONE)     // Instruction has 2 possible encoding types, type E 
+IF_DEF(EN2F,        IS_NONE,               NONE)     // Instruction has 2 possible encoding types, type F 
+IF_DEF(EN2G,        IS_NONE,               NONE)     // Instruction has 2 possible encoding types, type G
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+IF_DEF(T1_A,        IS_NONE,               NONE)     // T1_A    ................                                        
+IF_DEF(T1_B,        IS_NONE,               NONE)     // T1_B    ........cccc....                                           cond                
+IF_DEF(T1_C,        IS_NONE,               NONE)     // T1_C    .....iiiiimmmddd                       R1  R2              imm5                            
+IF_DEF(T1_D0,       IS_NONE,               NONE)     // T1_D0   ........Dmmmmddd                       R1* R2*                
+IF_DEF(T1_D1,       IS_NONE,               SPEC)     // T1_D1   .........mmmm...                       R1*                                         
+IF_DEF(T1_D2,       IS_NONE,               SPEC)     // T1_D2   .........mmmm...                               R3*                                         
+IF_DEF(T1_E,        IS_NONE,               NONE)     // T1_E    ..........mmmddd                       R1  R2                                          
+IF_DEF(T1_F,        IS_NONE,               NONE)     // T1_F    .........iiiiiii                       SP                  imm7                
+IF_DEF(T1_G,        IS_NONE,               NONE)     // T1_G    .......iiinnnddd                       R1  R2              imm3                            
+IF_DEF(T1_H,        IS_NONE,               NONE)     // T1_H    .......mmmnnnddd                       R1  R2  R3                
+IF_DEF(T1_I,        IS_NONE,               JMP )     // T1_I    ......i.iiiiinnn                       R1                  imm6                
+IF_DEF(T1_J0,       IS_NONE,               NONE)     // T1_J    .....dddiiiiiiii                       R1                  imm8  
+IF_DEF(T1_J1,       IS_NONE,               NONE)     // T1_J    .....dddiiiiiiii                       R1                  <regmask8>  
+IF_DEF(T1_J2,       IS_NONE,               NONE)     // T1_J    .....dddiiiiiiii                       R1  SP              imm8  
+IF_DEF(T1_J3,       IS_NONE,               LBL )     // T1_J    .....dddiiiiiiii                       R1  PC              imm8  
+IF_DEF(T1_K,        IS_NONE,               JMP )     // T1_K    ....cccciiiiiiii                       Branch              imm8, cond4                
+IF_DEF(T1_L0,       IS_NONE,               NONE)     // T1_L0   ........iiiiiiii                                           imm8  
+IF_DEF(T1_L1,       IS_NONE,               NONE)     // T1_L1   ........rrrrrrrr                                           <regmask8> 
+IF_DEF(T1_M,        IS_NONE,               JMP )     // T1_M    .....iiiiiiiiiii                       Branch              imm11                
+
+
+IF_DEF(T2_A,        IS_NONE,               NONE)     // T2_A    ................ ................                       
+IF_DEF(T2_B,        IS_NONE,               NONE)     // T2_B    ................ ............iiii                          imm4                
+IF_DEF(T2_C0,       IS_NONE,               NONE)     // T2_C0   ...........Snnnn .iiiddddiishmmmm       R1  R2  R3      S, imm5, sh                
+IF_DEF(T2_C1,       IS_NONE,               NONE)     // T2_C1   ...........S.... .iiiddddiishmmmm       R1  R2          S, imm5, sh                
+IF_DEF(T2_C2,       IS_NONE,               NONE)     // T2_C2   ...........S.... .iiiddddii..mmmm       R1  R2          S, imm5                
+IF_DEF(T2_C3,       IS_NONE,               NONE)     // T2_C3   ...........S.... ....dddd....mmmm       R1  R2          S                      
+IF_DEF(T2_C4,       IS_NONE,               NONE)     // T2_C4   ...........Snnnn ....dddd....mmmm       R1  R2  R3      S                  
+IF_DEF(T2_C5,       IS_NONE,               NONE)     // T2_C5   ............nnnn ....dddd....mmmm       R1  R2  R3                 
+IF_DEF(T2_C6,       IS_NONE,               NONE)     // T2_C6   ................ ....dddd..iimmmm       R1  R2                   imm2                
+IF_DEF(T2_C7,       IS_NONE,               NONE)     // T2_C7   ............nnnn ..........shmmmm       R1  R2                   imm2                
+IF_DEF(T2_C8,       IS_NONE,               NONE)     // T2_C8   ............nnnn .iii....iishmmmm       R1  R2             imm5, sh                
+IF_DEF(T2_C9,       IS_NONE,               NONE)     // T2_C9   ............nnnn ............mmmm       R1  R2                     
+IF_DEF(T2_C10,      IS_NONE,               NONE)     // T2_C10  ............mmmm ....dddd....mmmm       R1  R2                     
+IF_DEF(T2_D0,       IS_NONE,               NONE)     // T2_D0   ............nnnn .iiiddddii.wwwww       R1  R2             imm5, imm5                
+IF_DEF(T2_D1,       IS_NONE,               NONE)     // T2_D1   ................ .iiiddddii.wwwww       R1                 imm5, imm5                
+IF_DEF(T2_E0,       IS_NONE,               NONE)     // T2_E0   ............nnnn tttt......shmmmm       R1  R2  R3               imm2                
+IF_DEF(T2_E1,       IS_NONE,               NONE)     // T2_E1   ............nnnn tttt............       R1  R2                  
+IF_DEF(T2_E2,       IS_NONE,               NONE)     // T2_E2   ................ tttt............       R1                
+IF_DEF(T2_F1,       IS_NONE,               NONE)     // T2_F1   ............nnnn ttttdddd....mmmm       R1  R2  R3  R4   
+IF_DEF(T2_F2,       IS_NONE,               NONE)     // T2_F2   ............nnnn aaaadddd....mmmm       R1  R2  R3  R4 
+IF_DEF(T2_G0,       IS_NONE,               NONE)     // T2_G0   .......PU.W.nnnn ttttTTTTiiiiiiii       R1  R2  R3         imm8, PUW                     
+IF_DEF(T2_G1,       IS_NONE,               NONE)     // T2_G1   ............nnnn ttttTTTT........       R1  R2  R3                
+IF_DEF(T2_H0,       IS_NONE,               NONE)     // T2_H0   ............nnnn tttt.PUWiiiiiiii       R1  R2             imm8, PUW                
+IF_DEF(T2_H1,       IS_NONE,               NONE)     // T2_H1   ............nnnn tttt....iiiiiiii       R1  R2             imm8                
+IF_DEF(T2_H2,       IS_NONE,               NONE)     // T2_H2   ............nnnn ........iiiiiiii       R1                 imm8                
+IF_DEF(T2_I0,       IS_NONE,               NONE)     // T2_I0   ..........W.nnnn rrrrrrrrrrrrrrrr       R1              W, imm16                  
+IF_DEF(T2_I1,       IS_NONE,               NONE)     // T2_I1   ................ rrrrrrrrrrrrrrrr                          imm16                
+IF_DEF(T2_J1,       IS_NONE,               JMP )     // T2_J1   .....Scccciiiiii ..j.jiiiiiiiiiii       Branch             imm20, cond4                
+IF_DEF(T2_J2,       IS_NONE,               JMP )     // T2_J2   .....Siiiiiiiiii ..j.jiiiiiiiiii.       Branch             imm24 
+IF_DEF(T2_J3,       IS_NONE,               CALL)     // T2_J3   .....Siiiiiiiiii ..j.jiiiiiiiiii.       Call               imm24 
+IF_DEF(T2_K1,       IS_NONE,               NONE)     // T2_K1   ............nnnn ttttiiiiiiiiiiii       R1  R2             imm12                
+IF_DEF(T2_K2,       IS_NONE,               NONE)     // T2_K2   ............nnnn ....iiiiiiiiiiii       R1                 imm12                
+IF_DEF(T2_K3,       IS_NONE,               NONE)     // T2_K3   ........U....... ....iiiiiiiiiiii       PC              U, imm12                
+IF_DEF(T2_K4,       IS_NONE,               NONE)     // T2_K4   ........U....... ttttiiiiiiiiiiii       R1  PC          U, imm12                
+IF_DEF(T2_L0,       IS_NONE,               NONE)     // T2_L0   .....i.....Snnnn .iiiddddiiiiiiii       R1  R2          S, imm8<<imm4                
+IF_DEF(T2_L1,       IS_NONE,               NONE)     // T2_L1   .....i.....S.... .iiiddddiiiiiiii       R1              S, imm8<<imm4           
+IF_DEF(T2_L2,       IS_NONE,               NONE)     // T2_L2   .....i......nnnn .iii....iiiiiiii       R1                 imm8<<imm4                
+IF_DEF(T2_M0,       IS_NONE,               NONE)     // T2_M0   .....i......nnnn .iiiddddiiiiiiii       R1  R2             imm12                
+IF_DEF(T2_M1,       IS_NONE,               LBL )     // T2_M1   .....i.......... .iiiddddiiiiiiii       R1  PC             imm12                
+IF_DEF(T2_N,        IS_NONE,               NONE)     // T2_N    .....i......iiii .iiiddddiiiiiiii       R1                 imm16    ; movw/movt
+IF_DEF(T2_N1,       IS_NONE,               JMP)      // T2_N1   .....i......iiii .iiiddddiiiiiiii       R1                 imm16    ; movw/movt of a code address
+IF_DEF(T2_N2,       IS_NONE,               NONE)     // T2_N2   .....i......iiii .iiiddddiiiiiiii       R1                 imm16    ; movw/movt of a data address
+IF_DEF(T2_VLDST,    IS_NONE,               NONE)     // T2_VLDST 11101101UD0Lnnnn dddd101Ziiiiiiii      D1  R2             imm(+-1020) 
+IF_DEF(T2_VFP2,     IS_NONE,               NONE)     // T2_VFP2  111011101D110--- dddd101Z--M0mmmm      D1  D2
+IF_DEF(T2_VFP3,     IS_NONE,               NONE)     // T2_VFP3  11101110-D--nnnn dddd101ZN-M0mmmm      D1  D2  D3
+IF_DEF(T2_VMOVS,    IS_NONE,               NONE)   
+IF_DEF(T2_VMOVD,    IS_NONE,               NONE)   
+
+IF_DEF(INVALID,     IS_NONE,               NONE)     //
+
+//////////////////////////////////////////////////////////////////////////////
+#undef IF_DEF
+//////////////////////////////////////////////////////////////////////////////
+
+#endif // !DEFINE_ID_OPS
+//////////////////////////////////////////////////////////////////////////////
+// clang-format on
diff --git a/src/jit/emitfmtsarm64.h b/src/jit/emitfmtsarm64.h
new file mode 100644
index 0000000000..c4be8ae45a
--- /dev/null
+++ b/src/jit/emitfmtsarm64.h
@@ -0,0 +1,210 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//////////////////////////////////////////////////////////////////////////////
+
+// clang-format off
+#if !defined(_TARGET_ARM64_)
+#error Unexpected target type
+#endif
+
+#ifdef DEFINE_ID_OPS
+//////////////////////////////////////////////////////////////////////////////
+
+#undef DEFINE_ID_OPS
+
+enum ID_OPS
+{
+    ID_OP_NONE, // no additional arguments
+    ID_OP_SCNS, // small const  operand (21-bits or less, no reloc)
+    ID_OP_JMP,  // local jump
+    ID_OP_CALL, // method call
+    ID_OP_SPEC, // special handling required
+};
+
+//////////////////////////////////////////////////////////////////////////////
+#else // !DEFINE_ID_OPS
+//////////////////////////////////////////////////////////////////////////////
+
+#ifndef IF_DEF
+#error Must define IF_DEF macro before including this file
+#endif
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// enum insFormat   instruction            enum ID_OPS
+//                  scheduling
+//                  (unused)
+//////////////////////////////////////////////////////////////////////////////
+
+IF_DEF(NONE, IS_NONE, NONE) //
+
+IF_DEF(LABEL, IS_NONE, JMP)    // label
+IF_DEF(LARGEJMP, IS_NONE, JMP) // large conditional branch pseudo-op (cond branch + uncond branch)
+IF_DEF(LARGEADR, IS_NONE, JMP) // large address pseudo-op (adrp + add)
+IF_DEF(LARGELDC, IS_NONE, JMP) // large constant pseudo-op (adrp + ldr)
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+IF_DEF(EN9, IS_NONE, NONE)  // Instruction has 9 possible encoding types
+IF_DEF(EN6A, IS_NONE, NONE) // Instruction has 6 possible encoding types, type A
+IF_DEF(EN5A, IS_NONE, NONE) // Instruction has 5 possible encoding types, type A
+IF_DEF(EN5B, IS_NONE, NONE) // Instruction has 5 possible encoding types, type B
+IF_DEF(EN5C, IS_NONE, NONE) // Instruction has 5 possible encoding types, type C
+IF_DEF(EN4A, IS_NONE, NONE) // Instruction has 4 possible encoding types, type A
+IF_DEF(EN4B, IS_NONE, NONE) // Instruction has 4 possible encoding types, type B
+IF_DEF(EN4C, IS_NONE, NONE) // Instruction has 4 possible encoding types, type C
+IF_DEF(EN4D, IS_NONE, NONE) // Instruction has 4 possible encoding types, type D
+IF_DEF(EN4E, IS_NONE, NONE) // Instruction has 4 possible encoding types, type E
+IF_DEF(EN4F, IS_NONE, NONE) // Instruction has 4 possible encoding types, type F
+IF_DEF(EN4G, IS_NONE, NONE) // Instruction has 4 possible encoding types, type G
+IF_DEF(EN3A, IS_NONE, NONE) // Instruction has 3 possible encoding types, type A
+IF_DEF(EN3B, IS_NONE, NONE) // Instruction has 3 possible encoding types, type B
+IF_DEF(EN3C, IS_NONE, NONE) // Instruction has 3 possible encoding types, type C
+IF_DEF(EN3D, IS_NONE, NONE) // Instruction has 3 possible encoding types, type D
+IF_DEF(EN3E, IS_NONE, NONE) // Instruction has 3 possible encoding types, type E
+IF_DEF(EN3F, IS_NONE, NONE) // Instruction has 3 possible encoding types, type F
+IF_DEF(EN3G, IS_NONE, NONE) // Instruction has 3 possible encoding types, type G
+IF_DEF(EN3H, IS_NONE, NONE) // Instruction has 3 possible encoding types, type H
+IF_DEF(EN3I, IS_NONE, NONE) // Instruction has 3 possible encoding types, type I
+IF_DEF(EN2A, IS_NONE, NONE) // Instruction has 2 possible encoding types, type A
+IF_DEF(EN2B, IS_NONE, NONE) // Instruction has 2 possible encoding types, type B
+IF_DEF(EN2C, IS_NONE, NONE) // Instruction has 2 possible encoding types, type C
+IF_DEF(EN2D, IS_NONE, NONE) // Instruction has 2 possible encoding types, type D
+IF_DEF(EN2E, IS_NONE, NONE) // Instruction has 2 possible encoding types, type E
+IF_DEF(EN2F, IS_NONE, NONE) // Instruction has 2 possible encoding types, type F
+IF_DEF(EN2G, IS_NONE, NONE) // Instruction has 2 possible encoding types, type G
+IF_DEF(EN2H, IS_NONE, NONE) // Instruction has 2 possible encoding types, type H
+IF_DEF(EN2I, IS_NONE, NONE) // Instruction has 2 possible encoding types, type I
+IF_DEF(EN2J, IS_NONE, NONE) // Instruction has 2 possible encoding types, type J
+IF_DEF(EN2K, IS_NONE, NONE) // Instruction has 2 possible encoding types, type K
+IF_DEF(EN2L, IS_NONE, NONE) // Instruction has 2 possible encoding types, type L
+IF_DEF(EN2M, IS_NONE, NONE) // Instruction has 2 possible encoding types, type M
+IF_DEF(EN2N, IS_NONE, NONE) // Instruction has 2 possible encoding types, type N
+
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// Key for insFormat names:
+//
+// Above (Specifies multiple encodings)
+//
+//   EN#? ::  (count of the number of encodings)
+//            (? is a unique letter A,B,C...)
+//
+// Below  (Specifies an exact instruction encoding)
+//
+//       -- the first two characters are
+//
+//   DI  :: Data Processing - Immediate
+//   DR  :: Data Processing - Register
+//   DV  :: Data Processing - Vector Register
+//   LS  :: Loads and Stores
+//   BI  :: Branches - Immediate
+//   BR  :: Branches - Register
+//   SN  :: System - No Registers or Immediates
+//   SI  :: System - Immediate
+//
+//   _   :: a separator char '_'
+//
+//       -- the next two characters are
+//
+//   #   :: number of registers in the encoding
+//   ?   :: A unique letter A,B,C,...
+//       -- optional third character
+//   I   :: by elem immediate
+//
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+IF_DEF(BI_0A, IS_NONE, JMP)  // BI_0A   ......iiiiiiiiii iiiiiiiiiiiiiiii               simm26:00   b
+IF_DEF(BI_0B, IS_NONE, JMP)  // BI_0B   ......iiiiiiiiii iiiiiiiiiii.....               simm19:00   b<cond>
+IF_DEF(BI_0C, IS_NONE, CALL) // BI_0C   ......iiiiiiiiii iiiiiiiiiiiiiiii               simm26:00   bl
+IF_DEF(BI_1A, IS_NONE, JMP)  // BI_1A   X.......iiiiiiii iiiiiiiiiiittttt      Rt       simm19:00   cbz cbnz
+IF_DEF(BI_1B, IS_NONE, JMP)  // BI_1B   B.......bbbbbiii iiiiiiiiiiittttt      Rt imm6  simm14:00   tbz tbnz
+IF_DEF(BR_1A, IS_NONE, CALL) // BR_1A   ................ ......nnnnn.....         Rn                ret
+IF_DEF(BR_1B, IS_NONE, CALL) // BR_1B   ................ ......nnnnn.....         Rn                br blr
+
+IF_DEF(LS_1A, IS_NONE, JMP)  // LS_1A   XX...V..iiiiiiii iiiiiiiiiiittttt      Rt    PC imm(1MB)
+IF_DEF(LS_2A, IS_NONE, NONE) // LS_2A   .X.......X...... ......nnnnnttttt      Rt Rn
+IF_DEF(LS_2B, IS_NONE, NONE) // LS_2B   .X.......Xiiiiii iiiiiinnnnnttttt      Rt Rn    imm(0-4095)
+IF_DEF(LS_2C, IS_NONE, NONE) // LS_2C   .X.......X.iiiii iiiiP.nnnnnttttt      Rt Rn    imm(-256..+255) pre/post inc
+IF_DEF(LS_3A, IS_NONE, NONE) // LS_3A   .X.......X.mmmmm xxxS..nnnnnttttt      Rt Rn Rm ext(Rm) LSL {}
+IF_DEF(LS_3B, IS_NONE, NONE) // LS_3B   X............... .aaaaannnnnddddd      Rd Ra Rn
+IF_DEF(LS_3C, IS_NONE, NONE) // LS_3C   X.........iiiiii iaaaaannnnnddddd      Rd Ra Rn imm(im7,sh)
+
+IF_DEF(DI_1A, IS_NONE, NONE) // DI_1A   X.......shiiiiii iiiiiinnnnn.....         Rn    imm(i12,sh)
+IF_DEF(DI_1B, IS_NONE, NONE) // DI_1B   X........hwiiiii iiiiiiiiiiiddddd      Rd       imm(i16,hw)
+IF_DEF(DI_1C, IS_NONE, NONE) // DI_1C   X........Nrrrrrr ssssssnnnnn.....         Rn    imm(N,r,s)
+IF_DEF(DI_1D, IS_NONE, NONE) // DI_1D   X........Nrrrrrr ssssss.....ddddd      Rd       imm(N,r,s)
+IF_DEF(DI_1E, IS_NONE, JMP)  // DI_1E   .ii.....iiiiiiii iiiiiiiiiiiddddd      Rd       simm21
+IF_DEF(DI_1F, IS_NONE, NONE) // DI_1F   X..........iiiii cccc..nnnnn.nzcv      Rn imm5  nzcv cond
+
+IF_DEF(DI_2A, IS_NONE, NONE) // DI_2A   X.......shiiiiii iiiiiinnnnnddddd      Rd Rn    imm(i12,sh)
+IF_DEF(DI_2B, IS_NONE, NONE) // DI_2B   X.........Xnnnnn ssssssnnnnnddddd      Rd Rn    imm(0-63)
+IF_DEF(DI_2C, IS_NONE, NONE) // DI_2C   X........Nrrrrrr ssssssnnnnnddddd      Rd Rn    imm(N,r,s)
+IF_DEF(DI_2D, IS_NONE, NONE) // DI_2D   X........Nrrrrrr ssssssnnnnnddddd      Rd Rn    imr, imms   (N,r,s)
+
+IF_DEF(DR_1D, IS_NONE, NONE) // DR_1D   X............... cccc.......ddddd      Rd       cond
+
+IF_DEF(DR_2A, IS_NONE, NONE) // DR_2A   X..........mmmmm ......nnnnn.....         Rn Rm
+IF_DEF(DR_2B, IS_NONE, NONE) // DR_2B   X.......sh.mmmmm ssssssnnnnn.....         Rn Rm {LSL,LSR,ASR} imm(0-63)
+IF_DEF(DR_2C, IS_NONE, NONE) // DR_2C   X..........mmmmm xxxsssnnnnn.....         Rn Rm ext(Rm) LSL imm(0-4)
+IF_DEF(DR_2D, IS_NONE, NONE) // DR_2D   X..........nnnnn cccc..nnnnnddddd      Rd Rn    cond
+IF_DEF(DR_2E, IS_NONE, NONE) // DR_2E   X..........mmmmm ...........ddddd      Rd    Rm
+IF_DEF(DR_2F, IS_NONE, NONE) // DR_2F   X.......sh.mmmmm ssssss.....ddddd      Rd    Rm {LSL,LSR,ASR} imm(0-63)
+IF_DEF(DR_2G, IS_NONE, NONE) // DR_2G   X............... ......nnnnnddddd      Rd Rn
+IF_DEF(DR_2H, IS_NONE, NONE) // DR_2H   X........X...... ......nnnnnddddd      Rd Rn
+IF_DEF(DR_2I, IS_NONE, NONE) // DR_2I   X..........mmmmm cccc..nnnnn.nzcv      Rn Rm    nzcv cond
+
+IF_DEF(DR_3A, IS_NONE, NONE) // DR_3A   X..........mmmmm ......nnnnnddddd      Rd Rn Rm
+IF_DEF(DR_3B, IS_NONE, NONE) // DR_3B   X.......sh.mmmmm ssssssnnnnnddddd      Rd Rn Rm {LSL,LSR,ASR} imm(0-63)
+IF_DEF(DR_3C, IS_NONE, NONE) // DR_3C   X..........mmmmm xxxsssnnnnnddddd      Rd Rn Rm ext(Rm) LSL imm(0-4)
+IF_DEF(DR_3D, IS_NONE, NONE) // DR_3D   X..........mmmmm cccc..nnnnnddddd      Rd Rn Rm cond
+IF_DEF(DR_3E, IS_NONE, NONE) // DR_3E   X........X.mmmmm ssssssnnnnnddddd      Rd Rn Rm imm(0-63)
+
+IF_DEF(DR_4A, IS_NONE, NONE) // DR_4A   X..........mmmmm .aaaaannnnnddddd      Rd Rn Rm Ra
+
+IF_DEF(DV_1A, IS_NONE, NONE) // DV_1A   .........X.iiiii iii........ddddd      Vd imm8    (fmov - immediate scalar)
+IF_DEF(DV_1B, IS_NONE, NONE) // DV_1B   .QX..........iii jjjj..iiiiiddddd      Vd imm8    (fmov/movi - immediate vector)
+IF_DEF(DV_1C, IS_NONE, NONE) // DV_1C   .........X...... ......nnnnn.....      Vn #0.0    (fcmp - with zero)
+
+IF_DEF(DV_2A, IS_NONE, NONE) // DV_2A   .Q.......X...... ......nnnnnddddd      Vd Vn      (fabs, fcvtXX - vector)
+IF_DEF(DV_2B, IS_NONE, NONE) // DV_2B   .Q.........iiiii ......nnnnnddddd      Rd Vn[]    (umov/smov    - to general)
+IF_DEF(DV_2C, IS_NONE, NONE) // DV_2C   .Q.........iiiii ......nnnnnddddd      Vd Rn      (dup/ins - vector from
+                             // general)
+IF_DEF(DV_2D, IS_NONE, NONE) // DV_2D   .Q.........iiiii ......nnnnnddddd      Vd Vn[]    (dup - vector)
+IF_DEF(DV_2E, IS_NONE, NONE) // DV_2E   ...........iiiii ......nnnnnddddd      Vd Vn[]    (dup - scalar)
+IF_DEF(DV_2F, IS_NONE, NONE) // DV_2F   ...........iiiii .jjjj.nnnnnddddd      Vd[] Vn[]  (ins - element)
+IF_DEF(DV_2G, IS_NONE, NONE) // DV_2G   .........X...... ......nnnnnddddd      Vd Vn      (fmov, fcvtXX - register)
+IF_DEF(DV_2H, IS_NONE, NONE) // DV_2H   X........X...... ......nnnnnddddd      Rd Vn      (fmov, fcvtXX - to general)
+IF_DEF(DV_2I, IS_NONE, NONE) // DV_2I   X........X...... ......nnnnnddddd      Vd Rn      (fmov, fcvtXX - from general)
+IF_DEF(DV_2J, IS_NONE, NONE) // DV_2J   .........d...... D.....nnnnnddddd      Vd Vn      (fcvt)
+IF_DEF(DV_2K, IS_NONE, NONE) // DV_2K   .........X.mmmmm ......nnnnn.....      Vn Vm      (fcmp)
+IF_DEF(DV_2L, IS_NONE, NONE) // DV_2L   ........XX...... ......nnnnnddddd      Vd Vn      (abs, neg - scalar)
+IF_DEF(DV_2M, IS_NONE, NONE) // DV_2M   .Q......XX...... ......nnnnnddddd      Vd Vn      (abs, neg - vector)
+IF_DEF(DV_2N, IS_NONE, NONE) // DV_2N   .........iiiiiii ......nnnnnddddd      Vd Vn imm  (shift - scalar)
+IF_DEF(DV_2O, IS_NONE, NONE) // DV_2O   .Q.......iiiiiii ......nnnnnddddd      Vd Vn imm  (shift - vector)
+
+IF_DEF(DV_3A, IS_NONE, NONE)  // DV_3A   .Q......XX.mmmmm ......nnnnnddddd      Vd Vn Vm   (vector)
+IF_DEF(DV_3AI, IS_NONE, NONE) // DV_3AI  .Q......XXLMmmmm ....H.nnnnnddddd      Vd Vn Vm[] (vector by elem)
+IF_DEF(DV_3B, IS_NONE, NONE)  // DV_3B   .Q.......X.mmmmm ......nnnnnddddd      Vd Vn Vm   (vector)
+IF_DEF(DV_3BI, IS_NONE, NONE) // DV_3BI  .Q.......XLmmmmm ....H.nnnnnddddd      Vd Vn Vm[] (vector by elem)
+IF_DEF(DV_3C, IS_NONE, NONE)  // DV_3C   .Q.........mmmmm ......nnnnnddddd      Vd Vn Vm   (vector)
+IF_DEF(DV_3D, IS_NONE, NONE)  // DV_3D   .........X.mmmmm ......nnnnnddddd      Vd Vn Vm   (scalar)
+IF_DEF(DV_3DI, IS_NONE, NONE) // DV_3DI  .........XLmmmmm ....H.nnnnnddddd      Vd Vn Vm[] (scalar by elem)
+IF_DEF(DV_3E, IS_NONE, NONE)  // DV_3E   ...........mmmmm ......nnnnnddddd      Vd Vn Vm   (scalar)
+
+IF_DEF(DV_4A, IS_NONE, NONE) // DV_4A   .........X.mmmmm .aaaaannnnnddddd      Vd Vn Vm Va (scalar)
+
+IF_DEF(SN_0A, IS_NONE, NONE) // SN_0A   ................ ................
+IF_DEF(SI_0A, IS_NONE, NONE) // SI_0A   ...........iiiii iiiiiiiiiii.....               imm16
+IF_DEF(SI_0B, IS_NONE, NONE) // SI_0B   ................ ....bbbb........               imm4 - barrier
+
+IF_DEF(INVALID, IS_NONE, NONE) //
+
+//////////////////////////////////////////////////////////////////////////////
+#undef IF_DEF
+//////////////////////////////////////////////////////////////////////////////
+
+#endif // !DEFINE_ID_OPS
+//////////////////////////////////////////////////////////////////////////////
+// clang-format on
diff --git a/src/jit/emitfmtsxarch.h b/src/jit/emitfmtsxarch.h
new file mode 100644
index 0000000000..49afcb5c8b
--- /dev/null
+++ b/src/jit/emitfmtsxarch.h
@@ -0,0 +1,240 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//////////////////////////////////////////////////////////////////////////////
+
+//
+//  This file was previously known as emitfmts.h
+//
+
+// clang-format off
+#if !defined(_TARGET_XARCH_)
+  #error Unexpected target type
+#endif
+
+#ifdef  DEFINE_ID_OPS
+//////////////////////////////////////////////////////////////////////////////
+
+#undef  DEFINE_ID_OPS
+
+enum    ID_OPS
+{
+    ID_OP_NONE,                             // no additional arguments
+    ID_OP_SCNS,                             // small const  operand (21-bits or less, no reloc)
+    ID_OP_CNS,                              // constant     operand
+    ID_OP_DSP,                              // displacement operand
+    ID_OP_DSP_CNS,                          // displacement + constant
+    ID_OP_AMD,                              // addrmode with dsp
+    ID_OP_AMD_CNS,                          // addrmode with dsp + constant
+    ID_OP_JMP,                              // local jump
+    ID_OP_LBL,                              // label operand
+    ID_OP_CALL,                             // direct method call
+    ID_OP_SPEC,                             // special handling required
+};
+
+//////////////////////////////////////////////////////////////////////////////
+#else // !DEFINE_ID_OPS
+//////////////////////////////////////////////////////////////////////////////
+
+#ifdef  DEFINE_IS_OPS
+#undef  DEFINE_IS_OPS
+
+#else // DEFINE_IS_OPS
+
+//////////////////////////////////////////////////////////////////////////////
+
+#ifndef IF_DEF
+#error  Must define IF_DEF macro before including this file
+#endif
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// A note on the naming convention for instruction forms (IF_xxxxx).
+// For 3-character code XYY, generally we have:
+//      X =
+//          R - register
+//          M - memory
+//          S - stack
+//          A - address mode
+//      YY =
+//          RD - read
+//          WR - write
+//          RW - read / write
+//
+// The following sequences don't follow this pattern:
+//      XYY =
+//          CNS - constant
+//          SHF - shift-constant
+//
+// For IF_XXX_YYY, the first operand is XXX, the second operand is YYY.
+//
+//////////////////////////////////////////////////////////////////////////////
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// enum insFormat   instruction            enum ID_OPS 
+//                  scheduling 
+//                  (unused)                         
+//////////////////////////////////////////////////////////////////////////////
+
+IF_DEF(NONE,        IS_NONE,                    NONE)     // no operands
+
+IF_DEF(LABEL,       IS_NONE,                    JMP )     // label
+IF_DEF(RWR_LABEL,   IS_R1_WR,                   JMP )     // write label to register
+IF_DEF(SWR_LABEL,   IS_SF_WR,                   LBL )     // write label to stack
+
+IF_DEF(METHOD,      IS_NONE,                    CALL)     // method
+IF_DEF(METHPTR,     IS_NONE,                    CALL)     // method ptr (glbl)
+
+IF_DEF(CNS,         IS_NONE,                    SCNS)     // const
+
+//----------------------------------------------------------------------------
+// NOTE: The order of the "RD/WR/RW" varieties must match that of
+//       the "insUpdateModes" enum in "instr.h".
+//----------------------------------------------------------------------------
+
+IF_DEF(RRD,         IS_R1_RD,                   NONE)     // read   reg
+IF_DEF(RWR,         IS_R1_WR,                   NONE)     // write  reg
+IF_DEF(RRW,         IS_R1_RW,                   NONE)     // r/w    reg
+
+IF_DEF(RRD_CNS,     IS_R1_RD,                   SCNS)     // read   reg , const
+IF_DEF(RWR_CNS,     IS_R1_WR,                   SCNS)     // write  reg , const
+IF_DEF(RRW_CNS,     IS_R1_RW,                   SCNS)     // r/w    reg , const
+IF_DEF(RRW_SHF,     IS_R1_RW,                   SCNS)     // r/w    reg , shift-const
+
+IF_DEF(RRD_RRD,     IS_R1_RD|IS_R2_RD,          NONE)     // read   reg , read reg2
+IF_DEF(RWR_RRD,     IS_R1_WR|IS_R2_RD,          NONE)     // write  reg , read reg2
+IF_DEF(RRW_RRD,     IS_R1_RW|IS_R2_RD,          NONE)     // r/w    reg , read reg2
+IF_DEF(RRW_RRW,     IS_R1_RW|IS_R2_RW,          NONE)     // r/w    reg , r/w reg2 - for XCHG reg, reg2
+IF_DEF(RRW_RRW_CNS, IS_R1_RW|IS_R2_RW,          SCNS)     // r/w    reg , r/w  reg2 , const
+
+IF_DEF(RWR_RRD_RRD, IS_R1_WR|IS_R2_RD|IS_R3_RD, NONE)     // write  reg , read reg2 , read reg3
+
+//----------------------------------------------------------------------------
+// The following formats are used for direct addresses (e.g. static data members)
+//----------------------------------------------------------------------------
+
+IF_DEF(MRD,         IS_GM_RD,                   SPEC)     // read  [mem] (indirect call req. SPEC)
+IF_DEF(MWR,         IS_GM_WR,                   DSP)      // write [mem]
+IF_DEF(MRW,         IS_GM_RW,                   DSP)      // r/w   [mem]
+IF_DEF(MRD_OFF,     IS_GM_RD,                   DSP)      // offset mem
+
+IF_DEF(RRD_MRD,     IS_GM_RD|IS_R1_RD,          DSP)      // read   reg , read [mem]
+IF_DEF(RWR_MRD,     IS_GM_RD|IS_R1_WR,          DSP)      // write  reg , read [mem]
+IF_DEF(RRW_MRD,     IS_GM_RD|IS_R1_RW,          DSP)      // r/w    reg , read [mem]
+
+IF_DEF(RWR_MRD_OFF, IS_GM_RD|IS_R1_WR,          DSP)      // write  reg , offset mem
+
+IF_DEF(MRD_RRD,     IS_GM_RD|IS_R1_RD,          DSP)      // read  [mem], read  reg
+IF_DEF(MWR_RRD,     IS_GM_WR|IS_R1_RD,          DSP)      // write [mem], read  reg
+IF_DEF(MRW_RRD,     IS_GM_RW|IS_R1_RD,          DSP)      // r/w   [mem], read  reg
+
+IF_DEF(MRD_CNS,     IS_GM_RD,                   DSP_CNS)  // read  [mem], const
+IF_DEF(MWR_CNS,     IS_GM_WR,                   DSP_CNS)  // write [mem], const
+IF_DEF(MRW_CNS,     IS_GM_RW,                   DSP_CNS)  // r/w   [mem], const
+
+IF_DEF(MRW_SHF,     IS_GM_RW,                   DSP_CNS)  // shift [mem], const
+
+//----------------------------------------------------------------------------
+// The following formats are used for stack frame refs
+//----------------------------------------------------------------------------
+
+IF_DEF(SRD,         IS_SF_RD,                   SPEC)     // read  [stk] (indirect call req. SPEC)
+IF_DEF(SWR,         IS_SF_WR,                   NONE)     // write [stk]
+IF_DEF(SRW,         IS_SF_RW,                   NONE)     // r/w   [stk]
+
+IF_DEF(RRD_SRD,     IS_SF_RD|IS_R1_RD,          NONE)     // read   reg , read [stk]
+IF_DEF(RWR_SRD,     IS_SF_RD|IS_R1_WR,          NONE)     // write  reg , read [stk]
+IF_DEF(RRW_SRD,     IS_SF_RD|IS_R1_RW,          NONE)     // r/w    reg , read [stk]
+
+IF_DEF(SRD_RRD,     IS_SF_RD|IS_R1_RD,          NONE)     // read  [stk], read  reg
+IF_DEF(SWR_RRD,     IS_SF_WR|IS_R1_RD,          NONE)     // write [stk], read  reg
+IF_DEF(SRW_RRD,     IS_SF_RW|IS_R1_RD,          NONE)     // r/w   [stk], read  reg
+
+IF_DEF(SRD_CNS,     IS_SF_RD,                   CNS )     // read  [stk], const
+IF_DEF(SWR_CNS,     IS_SF_WR,                   CNS )     // write [stk], const
+IF_DEF(SRW_CNS,     IS_SF_RW,                   CNS )     // r/w   [stk], const
+
+IF_DEF(SRW_SHF,     IS_SF_RW,                   CNS )     // shift [stk], const
+
+//----------------------------------------------------------------------------
+// The following formats are used for indirect address modes
+//----------------------------------------------------------------------------
+
+
+IF_DEF(ARD,         IS_AM_RD,                   SPEC)     // read  [adr] (indirect call req. SPEC)
+IF_DEF(AWR,         IS_AM_WR,                   AMD )     // write [adr]
+IF_DEF(ARW,         IS_AM_RW,                   AMD )     // r/w   [adr]
+
+IF_DEF(RRD_ARD,     IS_AM_RD|IS_R1_RD,          AMD )     // read   reg , read [adr]
+IF_DEF(RWR_ARD,     IS_AM_RD|IS_R1_WR,          AMD )     // write  reg , read [adr]
+IF_DEF(RRW_ARD,     IS_AM_RD|IS_R1_RW,          AMD )     // r/w    reg , read [adr]
+
+IF_DEF(ARD_RRD,     IS_AM_RD|IS_R1_RD,          AMD )     // read  [adr], read  reg
+IF_DEF(AWR_RRD,     IS_AM_WR|IS_R1_RD,          AMD )     // write [adr], read  reg
+IF_DEF(ARW_RRD,     IS_AM_RW|IS_R1_RD,          AMD )     // r/w   [adr], read  reg
+
+IF_DEF(ARD_CNS,     IS_AM_RD,                   AMD_CNS)  // read  [adr], const
+IF_DEF(AWR_CNS,     IS_AM_WR,                   AMD_CNS)  // write [adr], const
+IF_DEF(ARW_CNS,     IS_AM_RW,                   AMD_CNS)  // r/w   [adr], const
+
+IF_DEF(ARW_SHF,     IS_AM_RW,                   AMD_CNS)  // shift [adr], const
+
+
+
+//----------------------------------------------------------------------------
+// The following formats are used for FP coprocessor instructions
+//----------------------------------------------------------------------------
+#if FEATURE_STACK_FP_X87
+
+IF_DEF(FRD,         IS_FP_STK,                  NONE)     // read  ST(n)
+IF_DEF(FWR,         IS_FP_STK,                  NONE)     // write ST(n)
+IF_DEF(FRW,         IS_FP_STK,                  NONE)     // r/w   ST(n)
+
+IF_DEF(TRD,         IS_FP_STK,                  NONE)     // read  ST(0)
+IF_DEF(TWR,         IS_FP_STK,                  NONE)     // write ST(0)
+IF_DEF(TRW,         IS_FP_STK,                  NONE)     // r/w   ST(0)
+
+IF_DEF(FRD_TRD,     IS_FP_STK,                  NONE)     // read  ST(n), read ST(0)
+IF_DEF(FWR_TRD,     IS_FP_STK,                  NONE)     // write ST(n), read ST(0)
+IF_DEF(FRW_TRD,     IS_FP_STK,                  NONE)     // r/w   ST(n), read ST(0)
+
+IF_DEF(TRD_FRD,     IS_FP_STK,                  NONE)     // read  ST(0), read ST(n)
+IF_DEF(TWR_FRD,     IS_FP_STK,                  NONE)     // write ST(0), read ST(n)
+IF_DEF(TRW_FRD,     IS_FP_STK,                  NONE)     // r/w   ST(0), read ST(n)
+
+IF_DEF(TRD_SRD,     IS_FP_STK|IS_SF_RD,         NONE)     // read  ST(0), read [stk]
+IF_DEF(TWR_SRD,     IS_FP_STK|IS_SF_RD,         NONE)     // write ST(0), read [stk]
+IF_DEF(TRW_SRD,     IS_FP_STK|IS_SF_RD,         NONE)     // r/w   ST(0), read [stk]
+
+//////(SRD_TRD,     IS_FP_STK|IS_SF_RD,         NONE)     // read  [stk], read ST(n)
+IF_DEF(SWR_TRD,     IS_FP_STK|IS_SF_WR,         NONE)     // write [stk], read ST(n)
+//////(SRW_TRD,     IS_FP_STK|IS_SF_RW,         NONE)     // r/w   [stk], read ST(n)
+
+IF_DEF(TRD_MRD,     IS_FP_STK|IS_GM_RD,         NONE)     // read  ST(0), read [mem]
+IF_DEF(TWR_MRD,     IS_FP_STK|IS_GM_RD,         NONE)     // write ST(0), read [mem]
+IF_DEF(TRW_MRD,     IS_FP_STK|IS_GM_RD,         NONE)     // r/w   ST(0), read [mem]
+
+//////(MRD_TRD,     IS_FP_STK|IS_GM_RD,         NONE)     // read  [mem], read ST(n)
+IF_DEF(MWR_TRD,     IS_FP_STK|IS_GM_WR,         NONE)     // write [mem], read ST(n)
+//////(MRW_TRD,     IS_FP_STK|IS_GM_RW,         NONE)     // r/w   [mem], read ST(n)
+
+IF_DEF(TRD_ARD,     IS_FP_STK|IS_AM_RD,         AMD )     // read  ST(0), read [adr]
+IF_DEF(TWR_ARD,     IS_FP_STK|IS_AM_RD,         AMD )     // write ST(0), read [adr]
+IF_DEF(TRW_ARD,     IS_FP_STK|IS_AM_RD,         AMD )     // r/w   ST(0), read [adr]
+
+//////(ARD_TRD,     IS_FP_STK|IS_AM_RD,         AMD )     // read  [adr], read ST(n)
+IF_DEF(AWR_TRD,     IS_FP_STK|IS_AM_WR,         AMD )     // write [adr], read ST(n)
+//////(ARW_TRD,     IS_FP_STK|IS_AM_RW,         AMD )     // r/w   [adr], read ST(n)
+
+#endif // FEATURE_STACK_FP_X87
+
+//////////////////////////////////////////////////////////////////////////////
+
+#undef IF_DEF
+
+//////////////////////////////////////////////////////////////////////////////
+#endif // DEFINE_IS_OPS
+#endif // DEFINE_ID_OPS
+//////////////////////////////////////////////////////////////////////////////
+// clang-format on
diff --git a/src/jit/emitinl.h b/src/jit/emitinl.h
new file mode 100644
index 0000000000..302b8ea448
--- /dev/null
+++ b/src/jit/emitinl.h
@@ -0,0 +1,508 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+/*****************************************************************************/
+
+#ifndef _EMITINL_H_
+#define _EMITINL_H_
+/*****************************************************************************/
+/*****************************************************************************
+ *
+ *  Return the number of bytes of machine code the given instruction will
+ *  produce.
+ */
+
+inline UNATIVE_OFFSET emitter::emitInstCodeSz(instrDesc* id)
+{
+    return id->idCodeSize();
+}
+
+inline UNATIVE_OFFSET emitter::emitSizeOfJump(instrDescJmp* jmp)
+{
+    return jmp->idCodeSize();
+}
+
+#ifdef _TARGET_XARCH_
+
+/* static */
+inline bool emitter::instrIs3opImul(instruction ins)
+{
+#ifdef _TARGET_X86_
+    return ((ins >= INS_imul_AX) && (ins <= INS_imul_DI));
+#else // _TARGET_AMD64
+    return ((ins >= INS_imul_AX) && (ins <= INS_imul_15));
+#endif
+}
+
+/* static */
+inline bool emitter::instrIsExtendedReg3opImul(instruction ins)
+{
+#ifdef _TARGET_X86_
+    return false;
+#else // _TARGET_AMD64
+    return ((ins >= INS_imul_08) && (ins <= INS_imul_15));
+#endif
+}
+
+/* static */
+inline bool emitter::instrHasImplicitRegPairDest(instruction ins)
+{
+    return (ins == INS_mulEAX) || (ins == INS_imulEAX) || (ins == INS_div) || (ins == INS_idiv);
+}
+
+// Because we don't actually have support for encoding these 3-op
+// multiplies we fake it with special opcodes.  Make sure they are
+// contiguous.
+/* static */
+inline void emitter::check3opImulValues()
+{
+    assert(INS_imul_AX - INS_imul_AX == REG_EAX);
+    assert(INS_imul_BX - INS_imul_AX == REG_EBX);
+    assert(INS_imul_CX - INS_imul_AX == REG_ECX);
+    assert(INS_imul_DX - INS_imul_AX == REG_EDX);
+    assert(INS_imul_BP - INS_imul_AX == REG_EBP);
+    assert(INS_imul_SI - INS_imul_AX == REG_ESI);
+    assert(INS_imul_DI - INS_imul_AX == REG_EDI);
+#ifdef _TARGET_AMD64_
+    assert(INS_imul_08 - INS_imul_AX == REG_R8);
+    assert(INS_imul_09 - INS_imul_AX == REG_R9);
+    assert(INS_imul_10 - INS_imul_AX == REG_R10);
+    assert(INS_imul_11 - INS_imul_AX == REG_R11);
+    assert(INS_imul_12 - INS_imul_AX == REG_R12);
+    assert(INS_imul_13 - INS_imul_AX == REG_R13);
+    assert(INS_imul_14 - INS_imul_AX == REG_R14);
+    assert(INS_imul_15 - INS_imul_AX == REG_R15);
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Return the instruction that uses the given register in the imul instruction
+ */
+
+/* static */
+inline instruction emitter::inst3opImulForReg(regNumber reg)
+{
+    assert(genIsValidIntReg(reg));
+
+    instruction ins = instruction(reg + INS_imul_AX);
+    check3opImulValues();
+    assert(instrIs3opImul(ins));
+
+    return ins;
+}
+
+/*****************************************************************************
+ *
+ *  Return the register which is used implicitly by the IMUL_REG instruction
+ */
+
+/* static */
+inline regNumber emitter::inst3opImulReg(instruction ins)
+{
+    regNumber reg = ((regNumber)(ins - INS_imul_AX));
+
+    assert(genIsValidIntReg(reg));
+
+    /* Make sure we return the appropriate register */
+
+    check3opImulValues();
+
+    return reg;
+}
+#endif
+
+/*****************************************************************************
+ *
+ *  The following helpers should be used to access the various values that
+ *  get stored in different places within the instruction descriptor.
+ */
+
+inline ssize_t emitter::emitGetInsAmd(instrDesc* id)
+{
+    return id->idIsLargeDsp() ? ((instrDescAmd*)id)->idaAmdVal : id->idAddr()->iiaAddrMode.amDisp;
+}
+
+inline int emitter::emitGetInsCDinfo(instrDesc* id)
+{
+    if (id->idIsLargeCall())
+    {
+        return ((instrDescCGCA*)id)->idcArgCnt;
+    }
+    else
+    {
+        assert(!id->idIsLargeDsp());
+        assert(!id->idIsLargeCns());
+        ssize_t cns = emitGetInsCns(id);
+
+        // We only encode 32-bit ints, so this is safe
+        noway_assert((int)cns == cns);
+
+        return (int)cns;
+    }
+}
+
+inline void emitter::emitGetInsCns(instrDesc* id, CnsVal* cv)
+{
+#ifdef RELOC_SUPPORT
+    cv->cnsReloc = id->idIsCnsReloc();
+#endif
+    if (id->idIsLargeCns())
+    {
+        cv->cnsVal = ((instrDescCns*)id)->idcCnsVal;
+    }
+    else
+    {
+        cv->cnsVal = id->idSmallCns();
+    }
+}
+
+inline ssize_t emitter::emitGetInsAmdCns(instrDesc* id, CnsVal* cv)
+{
+#ifdef RELOC_SUPPORT
+    cv->cnsReloc = id->idIsCnsReloc();
+#endif
+    if (id->idIsLargeDsp())
+    {
+        if (id->idIsLargeCns())
+        {
+            cv->cnsVal = ((instrDescCnsAmd*)id)->idacCnsVal;
+            return ((instrDescCnsAmd*)id)->idacAmdVal;
+        }
+        else
+        {
+            cv->cnsVal = id->idSmallCns();
+            return ((instrDescAmd*)id)->idaAmdVal;
+        }
+    }
+    else
+    {
+        if (id->idIsLargeCns())
+        {
+            cv->cnsVal = ((instrDescCns*)id)->idcCnsVal;
+        }
+        else
+        {
+            cv->cnsVal = id->idSmallCns();
+        }
+
+        return id->idAddr()->iiaAddrMode.amDisp;
+    }
+}
+
+inline void emitter::emitGetInsDcmCns(instrDesc* id, CnsVal* cv)
+{
+#ifdef RELOC_SUPPORT
+    cv->cnsReloc = id->idIsCnsReloc();
+#endif
+    if (id->idIsLargeCns())
+    {
+        if (id->idIsLargeDsp())
+        {
+            cv->cnsVal = ((instrDescCnsDsp*)id)->iddcCnsVal;
+        }
+        else
+        {
+            cv->cnsVal = ((instrDescCns*)id)->idcCnsVal;
+        }
+    }
+    else
+    {
+        cv->cnsVal = id->idSmallCns();
+    }
+}
+
+inline ssize_t emitter::emitGetInsAmdAny(instrDesc* id)
+{
+    if (id->idIsLargeDsp())
+    {
+        if (id->idIsLargeCns())
+        {
+            return ((instrDescCnsAmd*)id)->idacAmdVal;
+        }
+        return ((instrDescAmd*)id)->idaAmdVal;
+    }
+
+    return id->idAddr()->iiaAddrMode.amDisp;
+}
+
+/*****************************************************************************
+ *
+ *  Convert between a register mask and a smaller version for storage.
+ */
+
+/*static*/ inline void emitter::emitEncodeCallGCregs(regMaskTP regmask, instrDesc* id)
+{
+    assert((regmask & RBM_CALLEE_TRASH) == 0);
+
+    unsigned encodeMask;
+
+#ifdef _TARGET_X86_
+    assert(REGNUM_BITS >= 3);
+    encodeMask = 0;
+
+    if ((regmask & RBM_ESI) != RBM_NONE)
+        encodeMask |= 0x01;
+    if ((regmask & RBM_EDI) != RBM_NONE)
+        encodeMask |= 0x02;
+    if ((regmask & RBM_EBX) != RBM_NONE)
+        encodeMask |= 0x04;
+
+    id->idReg1((regNumber)encodeMask); // Save in idReg1
+
+#elif defined(_TARGET_AMD64_)
+    assert(REGNUM_BITS >= 4);
+    encodeMask = 0;
+
+    if ((regmask & RBM_RSI) != RBM_NONE)
+    {
+        encodeMask |= 0x01;
+    }
+    if ((regmask & RBM_RDI) != RBM_NONE)
+    {
+        encodeMask |= 0x02;
+    }
+    if ((regmask & RBM_RBX) != RBM_NONE)
+    {
+        encodeMask |= 0x04;
+    }
+    if ((regmask & RBM_RBP) != RBM_NONE)
+    {
+        encodeMask |= 0x08;
+    }
+
+    id->idReg1((regNumber)encodeMask); // Save in idReg1
+
+    encodeMask = 0;
+
+    if ((regmask & RBM_R12) != RBM_NONE)
+    {
+        encodeMask |= 0x01;
+    }
+    if ((regmask & RBM_R13) != RBM_NONE)
+    {
+        encodeMask |= 0x02;
+    }
+    if ((regmask & RBM_R14) != RBM_NONE)
+    {
+        encodeMask |= 0x04;
+    }
+    if ((regmask & RBM_R15) != RBM_NONE)
+    {
+        encodeMask |= 0x08;
+    }
+
+    id->idReg2((regNumber)encodeMask); // Save in idReg2
+
+#elif defined(_TARGET_ARM_)
+    assert(REGNUM_BITS >= 4);
+    encodeMask = 0;
+
+    if ((regmask & RBM_R4) != RBM_NONE)
+        encodeMask |= 0x01;
+    if ((regmask & RBM_R5) != RBM_NONE)
+        encodeMask |= 0x02;
+    if ((regmask & RBM_R6) != RBM_NONE)
+        encodeMask |= 0x04;
+    if ((regmask & RBM_R7) != RBM_NONE)
+        encodeMask |= 0x08;
+
+    id->idReg1((regNumber)encodeMask); // Save in idReg1
+
+    encodeMask = 0;
+
+    if ((regmask & RBM_R8) != RBM_NONE)
+        encodeMask |= 0x01;
+    if ((regmask & RBM_R9) != RBM_NONE)
+        encodeMask |= 0x02;
+    if ((regmask & RBM_R10) != RBM_NONE)
+        encodeMask |= 0x04;
+    if ((regmask & RBM_R11) != RBM_NONE)
+        encodeMask |= 0x08;
+
+    id->idReg2((regNumber)encodeMask); // Save in idReg2
+
+#elif defined(_TARGET_ARM64_)
+    assert(REGNUM_BITS >= 5);
+    encodeMask = 0;
+
+    if ((regmask & RBM_R19) != RBM_NONE)
+        encodeMask |= 0x01;
+    if ((regmask & RBM_R20) != RBM_NONE)
+        encodeMask |= 0x02;
+    if ((regmask & RBM_R21) != RBM_NONE)
+        encodeMask |= 0x04;
+    if ((regmask & RBM_R22) != RBM_NONE)
+        encodeMask |= 0x08;
+    if ((regmask & RBM_R23) != RBM_NONE)
+        encodeMask |= 0x10;
+
+    id->idReg1((regNumber)encodeMask); // Save in idReg1
+
+    encodeMask = 0;
+
+    if ((regmask & RBM_R24) != RBM_NONE)
+        encodeMask |= 0x01;
+    if ((regmask & RBM_R25) != RBM_NONE)
+        encodeMask |= 0x02;
+    if ((regmask & RBM_R26) != RBM_NONE)
+        encodeMask |= 0x04;
+    if ((regmask & RBM_R27) != RBM_NONE)
+        encodeMask |= 0x08;
+    if ((regmask & RBM_R28) != RBM_NONE)
+        encodeMask |= 0x10;
+
+    id->idReg2((regNumber)encodeMask); // Save in idReg2
+
+#else
+    NYI("unknown target");
+#endif
+}
+
+/*static*/ inline unsigned emitter::emitDecodeCallGCregs(instrDesc* id)
+{
+    unsigned regmask = 0;
+    unsigned encodeMask;
+
+#ifdef _TARGET_X86_
+    assert(REGNUM_BITS >= 3);
+    encodeMask = id->idReg1();
+
+    if ((encodeMask & 0x01) != 0)
+        regmask |= RBM_ESI;
+    if ((encodeMask & 0x02) != 0)
+        regmask |= RBM_EDI;
+    if ((encodeMask & 0x04) != 0)
+        regmask |= RBM_EBX;
+#elif defined(_TARGET_AMD64_)
+    assert(REGNUM_BITS >= 4);
+    encodeMask = id->idReg1();
+
+    if ((encodeMask & 0x01) != 0)
+    {
+        regmask |= RBM_RSI;
+    }
+    if ((encodeMask & 0x02) != 0)
+    {
+        regmask |= RBM_RDI;
+    }
+    if ((encodeMask & 0x04) != 0)
+    {
+        regmask |= RBM_RBX;
+    }
+    if ((encodeMask & 0x08) != 0)
+    {
+        regmask |= RBM_RBP;
+    }
+
+    encodeMask = id->idReg2();
+
+    if ((encodeMask & 0x01) != 0)
+    {
+        regmask |= RBM_R12;
+    }
+    if ((encodeMask & 0x02) != 0)
+    {
+        regmask |= RBM_R13;
+    }
+    if ((encodeMask & 0x04) != 0)
+    {
+        regmask |= RBM_R14;
+    }
+    if ((encodeMask & 0x08) != 0)
+    {
+        regmask |= RBM_R15;
+    }
+
+#elif defined(_TARGET_ARM_)
+    assert(REGNUM_BITS >= 4);
+    encodeMask = id->idReg1();
+
+    if ((encodeMask & 0x01) != 0)
+        regmask |= RBM_R4;
+    if ((encodeMask & 0x02) != 0)
+        regmask |= RBM_R5;
+    if ((encodeMask & 0x04) != 0)
+        regmask |= RBM_R6;
+    if ((encodeMask & 0x08) != 0)
+        regmask |= RBM_R7;
+
+    encodeMask = id->idReg2();
+
+    if ((encodeMask & 0x01) != 0)
+        regmask |= RBM_R8;
+    if ((encodeMask & 0x02) != 0)
+        regmask |= RBM_R9;
+    if ((encodeMask & 0x04) != 0)
+        regmask |= RBM_R10;
+    if ((encodeMask & 0x08) != 0)
+        regmask |= RBM_R11;
+
+#elif defined(_TARGET_ARM64_)
+    assert(REGNUM_BITS >= 5);
+    encodeMask = id->idReg1();
+
+    if ((encodeMask & 0x01) != 0)
+        regmask |= RBM_R19;
+    if ((encodeMask & 0x02) != 0)
+        regmask |= RBM_R20;
+    if ((encodeMask & 0x04) != 0)
+        regmask |= RBM_R21;
+    if ((encodeMask & 0x08) != 0)
+        regmask |= RBM_R22;
+    if ((encodeMask & 0x10) != 0)
+        regmask |= RBM_R23;
+
+    encodeMask = id->idReg2();
+
+    if ((encodeMask & 0x01) != 0)
+        regmask |= RBM_R24;
+    if ((encodeMask & 0x02) != 0)
+        regmask |= RBM_R25;
+    if ((encodeMask & 0x04) != 0)
+        regmask |= RBM_R26;
+    if ((encodeMask & 0x08) != 0)
+        regmask |= RBM_R27;
+    if ((encodeMask & 0x10) != 0)
+        regmask |= RBM_R28;
+
+#else
+    NYI("unknown target");
+#endif
+
+    return regmask;
+}
+
+#ifdef _TARGET_XARCH_
+inline bool insIsCMOV(instruction ins)
+{
+    return ((ins >= INS_cmovo) && (ins <= INS_cmovg));
+}
+#endif
+
+/*****************************************************************************
+ *
+ *  Call the specified function pointer for each insGroup in the current
+ *  method that is marked IGF_NOGCINTERRUPT. Stops if the callback returns
+ *  false. Returns the final result of the callback.
+ */
+template <typename Callback>
+bool emitter::emitGenNoGCLst(Callback& cb)
+{
+    for (insGroup* ig = emitIGlist; ig; ig = ig->igNext)
+    {
+        if (ig->igFlags & IGF_NOGCINTERRUPT)
+        {
+            if (!cb(ig->igFuncIdx, ig->igOffs, ig->igSize))
+            {
+                return false;
+            }
+        }
+    }
+
+    return true;
+}
+
+/*****************************************************************************/
+#endif //_EMITINL_H_
+/*****************************************************************************/
diff --git a/src/jit/emitjmps.h b/src/jit/emitjmps.h
new file mode 100644
index 0000000000..60815d13ea
--- /dev/null
+++ b/src/jit/emitjmps.h
@@ -0,0 +1,58 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// clang-format off
+#ifndef JMP_SMALL
+#error Must define JMP_SMALL macro before including this file
+#endif
+
+#if defined(_TARGET_XARCH_)
+
+//       jump   reverse instruction
+JMP_SMALL(jmp   , jmp   , jmp    )
+JMP_SMALL(jo    , jno   , jo     )
+JMP_SMALL(jno   , jo    , jno    )
+JMP_SMALL(jb    , jae   , jb     )
+JMP_SMALL(jae   , jb    , jae    )
+JMP_SMALL(je    , jne   , je     )
+JMP_SMALL(jne   , je    , jne    )
+JMP_SMALL(jbe   , ja    , jbe    )
+JMP_SMALL(ja    , jbe   , ja     )
+JMP_SMALL(js    , jns   , js     )
+JMP_SMALL(jns   , js    , jns    )
+JMP_SMALL(jpe   , jpo   , jpe    )
+JMP_SMALL(jpo   , jpe   , jpo    )
+JMP_SMALL(jl    , jge   , jl     )
+JMP_SMALL(jge   , jl    , jge    )
+JMP_SMALL(jle   , jg    , jle    )
+JMP_SMALL(jg    , jle   , jg     )
+
+#elif defined(_TARGET_ARMARCH_)
+
+//       jump   reverse instruction condcode
+JMP_SMALL(jmp   , jmp   , b      )  // AL always
+JMP_SMALL(eq    , ne    , beq    )  // EQ
+JMP_SMALL(ne    , eq    , bne    )  // NE
+JMP_SMALL(hs    , lo    , bhs    )  // HS also CS
+JMP_SMALL(lo    , hs    , blo    )  // LO also CC
+JMP_SMALL(mi    , pl    , bmi    )  // MI
+JMP_SMALL(pl    , mi    , bpl    )  // PL
+JMP_SMALL(vs    , vc    , bvs    )  // VS
+JMP_SMALL(vc    , vs    , bvc    )  // VC
+JMP_SMALL(hi    , ls    , bhi    )  // HI
+JMP_SMALL(ls    , hi    , bls    )  // LS
+JMP_SMALL(ge    , lt    , bge    )  // GE
+JMP_SMALL(lt    , ge    , blt    )  // LT
+JMP_SMALL(gt    , le    , bgt    )  // GT
+JMP_SMALL(le    , gt    , ble    )  // LE
+
+#else
+  #error Unsupported or unset target architecture
+#endif // target type
+
+/*****************************************************************************/
+#undef JMP_SMALL
+/*****************************************************************************/
+
+// clang-format on
diff --git a/src/jit/emitpub.h b/src/jit/emitpub.h
new file mode 100644
index 0000000000..a2f041a5f3
--- /dev/null
+++ b/src/jit/emitpub.h
@@ -0,0 +1,162 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/************************************************************************/
+/*       Overall emitter control (including startup and shutdown)       */
+/************************************************************************/
+
+static void emitInit();
+static void emitDone();
+
+void emitBegCG(Compiler* comp, COMP_HANDLE cmpHandle);
+void emitEndCG();
+
+void emitBegFN(bool hasFramePtr
+#if defined(DEBUG)
+               ,
+               bool checkAlign
+#endif
+#ifdef LEGACY_BACKEND
+               ,
+               unsigned lclSize
+#endif // LEGACY_BACKEND
+               ,
+               unsigned maxTmpSize);
+
+void emitEndFN();
+
+void emitComputeCodeSizes();
+
+unsigned emitEndCodeGen(Compiler* comp,
+                        bool      contTrkPtrLcls,
+                        bool      fullyInt,
+                        bool      fullPtrMap,
+                        bool      returnsGCr,
+                        unsigned  xcptnsCount,
+                        unsigned* prologSize,
+                        unsigned* epilogSize,
+                        void**    codeAddr,
+                        void**    coldCodeAddr,
+                        void**    consAddr);
+
+/************************************************************************/
+/*                      Method prolog and epilog                        */
+/************************************************************************/
+
+unsigned emitGetEpilogCnt();
+
+template <typename Callback>
+bool emitGenNoGCLst(Callback& cb);
+
+void     emitBegProlog();
+unsigned emitGetPrologOffsetEstimate();
+void     emitMarkPrologEnd();
+void     emitEndProlog();
+
+void emitCreatePlaceholderIG(insGroupPlaceholderType igType,
+                             BasicBlock*             igBB,
+                             VARSET_VALARG_TP        GCvars,
+                             regMaskTP               gcrefRegs,
+                             regMaskTP               byrefRegs,
+                             bool                    last);
+
+void emitGeneratePrologEpilog();
+void emitStartPrologEpilogGeneration();
+void emitFinishPrologEpilogGeneration();
+
+/************************************************************************/
+/*           Record a code position and later convert it to offset      */
+/************************************************************************/
+
+void*    emitCurBlock();
+unsigned emitCurOffset();
+
+UNATIVE_OFFSET emitCodeOffset(void* blockPtr, unsigned codeOffs);
+
+#ifdef DEBUG
+const char* emitOffsetToLabel(unsigned offs);
+#endif // DEBUG
+
+/************************************************************************/
+/*                   Output target-independent instructions             */
+/************************************************************************/
+
+void emitIns_J(instruction ins, BasicBlock* dst, int instrCount = 0);
+
+/************************************************************************/
+/*                   Emit initialized data sections                     */
+/************************************************************************/
+
+UNATIVE_OFFSET emitDataGenBeg(UNATIVE_OFFSET size, bool dblAlign, bool codeLtab);
+
+UNATIVE_OFFSET emitBBTableDataGenBeg(unsigned numEntries, bool relativeAddr);
+
+void emitDataGenData(unsigned offs, const void* data, size_t size);
+
+void emitDataGenData(unsigned offs, BasicBlock* label);
+
+void emitDataGenEnd();
+
+UNATIVE_OFFSET emitDataConst(const void* cnsAddr, unsigned cnsSize, bool dblAlign);
+
+UNATIVE_OFFSET emitDataSize();
+
+/************************************************************************/
+/*                   Instruction information                            */
+/************************************************************************/
+
+#ifdef _TARGET_XARCH_
+static bool instrIs3opImul(instruction ins);
+static bool instrIsExtendedReg3opImul(instruction ins);
+static bool instrHasImplicitRegPairDest(instruction ins);
+static void      check3opImulValues();
+static regNumber inst3opImulReg(instruction ins);
+static instruction inst3opImulForReg(regNumber reg);
+#endif
+
+/************************************************************************/
+/*                   Emit PDB offset translation information            */
+/************************************************************************/
+
+#ifdef TRANSLATE_PDB
+
+static void SetILBaseOfCode(BYTE* pTextBase);
+static void SetILMethodBase(BYTE* pMethodEntry);
+static void SetILMethodStart(BYTE* pMethodCode);
+static void SetImgBaseOfCode(BYTE* pTextBase);
+
+void SetIDBaseToProlog();
+void SetIDBaseToOffset(int methodOffset);
+
+static void DisablePDBTranslation();
+static bool IsPDBEnabled();
+
+static void InitTranslationMaps(int ilCodeSize);
+static void DeleteTranslationMaps();
+static void InitTranslator(PDBRewriter* pPDB, int* rgSecMap, IMAGE_SECTION_HEADER** rgpHeader, int numSections);
+#endif
+
+/************************************************************************/
+/*                   Interface for generating unwind information        */
+/************************************************************************/
+
+#ifdef _TARGET_ARMARCH_
+
+bool emitIsFuncEnd(emitLocation* emitLoc, emitLocation* emitLocNextFragment = NULL);
+
+void emitSplit(emitLocation*         startLoc,
+               emitLocation*         endLoc,
+               UNATIVE_OFFSET        maxSplitSize,
+               void*                 context,
+               emitSplitCallbackType callbackFunc);
+
+void emitUnwindNopPadding(emitLocation* locFrom, Compiler* comp);
+
+#endif // _TARGET_ARMARCH_
+
+#if defined(_TARGET_ARM_)
+
+unsigned emitGetInstructionSize(emitLocation* emitLoc);
+
+#endif // defined(_TARGET_ARM_)
diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp
new file mode 100644
index 0000000000..d43f766ee8
--- /dev/null
+++ b/src/jit/emitxarch.cpp
@@ -0,0 +1,11398 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                             emitX86.cpp                                   XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#if defined(_TARGET_XARCH_)
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+#include "instr.h"
+#include "emit.h"
+#include "codegen.h"
+
+bool IsSSE2Instruction(instruction ins)
+{
+    return (ins >= INS_FIRST_SSE2_INSTRUCTION && ins <= INS_LAST_SSE2_INSTRUCTION);
+}
+
+bool IsSSEOrAVXInstruction(instruction ins)
+{
+#ifdef FEATURE_AVX_SUPPORT
+    return (ins >= INS_FIRST_SSE2_INSTRUCTION && ins <= INS_LAST_AVX_INSTRUCTION);
+#else  // !FEATURE_AVX_SUPPORT
+    return IsSSE2Instruction(ins);
+#endif // !FEATURE_AVX_SUPPORT
+}
+
+bool emitter::IsAVXInstruction(instruction ins)
+{
+#ifdef FEATURE_AVX_SUPPORT
+    return (UseAVX() && IsSSEOrAVXInstruction(ins));
+#else
+    return false;
+#endif
+}
+
+#define REX_PREFIX_MASK 0xFF00000000LL
+
+#ifdef FEATURE_AVX_SUPPORT
+// Returns true if the AVX instruction is a binary operator that requires 3 operands.
+// When we emit an instruction with only two operands, we will duplicate the destination
+// as a source.
+// TODO-XArch-Cleanup: This is a temporary solution for now. Eventually this needs to
+// be formalized by adding an additional field to instruction table to
+// to indicate whether a 3-operand instruction.
+bool emitter::IsThreeOperandBinaryAVXInstruction(instruction ins)
+{
+    return IsAVXInstruction(ins) &&
+           (ins == INS_cvtsi2ss || ins == INS_cvtsi2sd || ins == INS_cvtss2sd || ins == INS_cvtsd2ss ||
+            ins == INS_addss || ins == INS_addsd || ins == INS_subss || ins == INS_subsd || ins == INS_mulss ||
+            ins == INS_mulsd || ins == INS_divss || ins == INS_divsd || ins == INS_addps || ins == INS_addpd ||
+            ins == INS_subps || ins == INS_subpd || ins == INS_mulps || ins == INS_mulpd || ins == INS_cmpps ||
+            ins == INS_cmppd || ins == INS_andps || ins == INS_andpd || ins == INS_orps || ins == INS_orpd ||
+            ins == INS_xorps || ins == INS_xorpd || ins == INS_dpps || ins == INS_dppd || ins == INS_haddpd ||
+            ins == INS_por || ins == INS_pand || ins == INS_pandn || ins == INS_pcmpeqd || ins == INS_pcmpgtd ||
+            ins == INS_pcmpeqw || ins == INS_pcmpgtw || ins == INS_pcmpeqb || ins == INS_pcmpgtb ||
+            ins == INS_pcmpeqq || ins == INS_pcmpgtq || ins == INS_pmulld || ins == INS_pmullw ||
+
+            ins == INS_shufps || ins == INS_shufpd || ins == INS_minps || ins == INS_minss || ins == INS_minpd ||
+            ins == INS_minsd || ins == INS_divps || ins == INS_divpd || ins == INS_maxps || ins == INS_maxpd ||
+            ins == INS_maxss || ins == INS_maxsd || ins == INS_andnps || ins == INS_andnpd || ins == INS_paddb ||
+            ins == INS_paddw || ins == INS_paddd || ins == INS_paddq || ins == INS_psubb || ins == INS_psubw ||
+            ins == INS_psubd || ins == INS_psubq || ins == INS_pmuludq || ins == INS_pxor || ins == INS_pmaxub ||
+            ins == INS_pminub || ins == INS_pmaxsw || ins == INS_pminsw || ins == INS_insertps || ins == INS_vinsertf128 ||
+            ins == INS_punpckldq
+
+            );
+}
+
+// Returns true if the AVX instruction is a move operator that requires 3 operands.
+// When we emit an instruction with only two operands, we will duplicate the source
+// register in the vvvv field.  This is because these merge sources into the dest.
+// TODO-XArch-Cleanup: This is a temporary solution for now. Eventually this needs to
+// be formalized by adding an additional field to instruction table to
+// to indicate whether a 3-operand instruction.
+bool emitter::IsThreeOperandMoveAVXInstruction(instruction ins)
+{
+    return IsAVXInstruction(ins) &&
+           (ins == INS_movlpd || ins == INS_movlps || ins == INS_movhpd || ins == INS_movhps || ins == INS_movss);
+}
+#endif // FEATURE_AVX_SUPPORT
+
+// Returns true if the AVX instruction is a 4-byte opcode.
+// Note that this should be true for any of the instructions in instrsXArch.h
+// that use the SSE38 or SSE3A macro.
+// TODO-XArch-Cleanup: This is a temporary solution for now. Eventually this
+// needs to be addressed by expanding instruction encodings.
+bool Is4ByteAVXInstruction(instruction ins)
+{
+#ifdef FEATURE_AVX_SUPPORT
+    return (ins == INS_dpps || ins == INS_dppd || ins == INS_insertps || ins == INS_pcmpeqq || ins == INS_pcmpgtq ||
+            ins == INS_vbroadcastss || ins == INS_vbroadcastsd || ins == INS_vpbroadcastb || ins == INS_vpbroadcastw ||
+            ins == INS_vpbroadcastd || ins == INS_vpbroadcastq || ins == INS_vextractf128 || ins == INS_vinsertf128 ||
+            ins == INS_pmulld);
+#else
+    return false;
+#endif
+}
+
+#ifdef FEATURE_AVX_SUPPORT
+// Returns true if this instruction requires a VEX prefix
+// All AVX instructions require a VEX prefix
+bool emitter::TakesVexPrefix(instruction ins)
+{
+    // special case vzeroupper as it requires 2-byte VEX prefix
+    if (ins == INS_vzeroupper)
+    {
+        return false;
+    }
+
+    return IsAVXInstruction(ins);
+}
+
+// Add base VEX prefix without setting W, R, X, or B bits
+// L bit will be set based on emitter attr.
+//
+// 3-byte VEX prefix = C4 <R,X,B,m-mmmm> <W,vvvv,L,pp>
+//  - R, X, B, W - bits to express corresponding REX prefixes
+//  - m-mmmmm (5-bit)
+//    0-00001 - implied leading 0F opcode byte
+//    0-00010 - implied leading 0F 38 opcode bytes
+//    0-00011 - implied leading 0F 3A opcode bytes
+//    Rest    - reserved for future use and usage of them will uresult in Undefined instruction exception
+//
+// - vvvv (4-bits) - register specifier in 1's complement form; must be 1111 if unused
+// - L - scalar or AVX-128 bit operations (L=0),  256-bit operations (L=1)
+// - pp (2-bits) - opcode extension providing equivalent functionality of a SIMD size prefix
+//                 these prefixes are treated mandatory when used with escape opcode 0Fh for
+//                 some SIMD instructions
+//   00  - None   (0F    - packed float)
+//   01  - 66     (66 0F - packed double)
+//   10  - F3     (F3 0F - scalar float
+//   11  - F2     (F2 0F - scalar double)
+//
+// TODO-AMD64-CQ: for simplicity of implementation this routine always adds 3-byte VEX
+// prefix. Based on 'attr' param we could add 2-byte VEX prefix in case of scalar
+// and AVX-128 bit operations.
+#define DEFAULT_3BYTE_VEX_PREFIX 0xC4E07800000000ULL
+#define LBIT_IN_3BYTE_VEX_PREFIX 0X00000400000000ULL
+size_t emitter::AddVexPrefix(instruction ins, size_t code, emitAttr attr)
+{
+    // Only AVX instructions require VEX prefix
+    assert(IsAVXInstruction(ins));
+
+    // Shouldn't have already added Vex prefix
+    assert(!hasVexPrefix(code));
+
+    // Set L bit to 1 in case of instructions that operate on 256-bits.
+    code |= DEFAULT_3BYTE_VEX_PREFIX;
+    if (attr == EA_32BYTE)
+    {
+        code |= LBIT_IN_3BYTE_VEX_PREFIX;
+    }
+
+    return code;
+}
+#endif // FEATURE_AVX_SUPPORT
+
+// Returns true if this instruction, for the given EA_SIZE(attr), will require a REX.W prefix
+bool TakesRexWPrefix(instruction ins, emitAttr attr)
+{
+#ifdef _TARGET_AMD64_
+    // movsx should always sign extend out to 8 bytes just because we don't track
+    // whether the dest should be 4 bytes or 8 bytes (attr indicates the size
+    // of the source, not the dest).
+    // A 4-byte movzx is equivalent to an 8 byte movzx, so it is not special
+    // cased here.
+    //
+    // Rex_jmp = jmp with rex prefix always requires rex.w prefix.
+    if (ins == INS_movsx || ins == INS_rex_jmp)
+    {
+        return true;
+    }
+
+    if (EA_SIZE(attr) != EA_8BYTE)
+    {
+        return false;
+    }
+
+    if (IsSSEOrAVXInstruction(ins))
+    {
+        if (ins == INS_cvttsd2si || ins == INS_cvttss2si || ins == INS_cvtsd2si || ins == INS_cvtss2si ||
+            ins == INS_cvtsi2sd || ins == INS_cvtsi2ss || ins == INS_mov_xmm2i || ins == INS_mov_i2xmm)
+        {
+            return true;
+        }
+
+        return false;
+    }
+
+    // TODO-XArch-Cleanup: Better way to not emit REX.W when we don't need it, than just testing all these
+    // opcodes...
+    // These are all the instructions that default to 8-byte operand without the REX.W bit
+    // With 1 special case: movzx because the 4 byte version still zeros-out the hi 4 bytes
+    // so we never need it
+    if ((ins != INS_push) && (ins != INS_pop) && (ins != INS_movq) && (ins != INS_movzx) && (ins != INS_push_hide) &&
+        (ins != INS_pop_hide) && (ins != INS_ret) && (ins != INS_call) && !((ins >= INS_i_jmp) && (ins <= INS_l_jg)))
+    {
+        return true;
+    }
+    else
+    {
+        return false;
+    }
+#else  //!_TARGET_AMD64 = _TARGET_X86_
+    return false;
+#endif //!_TARGET_AMD64_
+}
+
+// Returns true if using this register will require a REX.* prefix.
+// Since XMM registers overlap with YMM registers, this routine
+// can also be used to know whether a YMM register if the
+// instruction in question is AVX.
+bool IsExtendedReg(regNumber reg)
+{
+#ifdef _TARGET_AMD64_
+    return ((reg >= REG_R8) && (reg <= REG_R15)) || ((reg >= REG_XMM8) && (reg <= REG_XMM15));
+#else
+    // X86 JIT operates in 32-bit mode and hence extended reg are not available.
+    return false;
+#endif
+}
+
+// Returns true if using this register, for the given EA_SIZE(attr), will require a REX.* prefix
+bool IsExtendedReg(regNumber reg, emitAttr attr)
+{
+#ifdef _TARGET_AMD64_
+    // Not a register, so doesn't need a prefix
+    if (reg > REG_XMM15)
+    {
+        return false;
+    }
+
+    // Opcode field only has 3 bits for the register, these high registers
+    // need a 4th bit, that comes from the REX prefix (eiter REX.X, REX.R, or REX.B)
+    if (IsExtendedReg(reg))
+    {
+        return true;
+    }
+
+    if (EA_SIZE(attr) != EA_1BYTE)
+    {
+        return false;
+    }
+
+    // There are 12 one byte registers addressible 'below' r8b:
+    //     al, cl, dl, bl, ah, ch, dh, bh, spl, bpl, sil, dil.
+    // The first 4 are always addressible, the last 8 are divided into 2 sets:
+    //     ah,  ch,  dh,  bh
+    //          -- or --
+    //     spl, bpl, sil, dil
+    // Both sets are encoded exactly the same, the difference is the presence
+    // of a REX prefix, even a REX prefix with no other bits set (0x40).
+    // So in order to get to the second set we need a REX prefix (but no bits).
+    //
+    // TODO-AMD64-CQ: if we ever want to start using the first set, we'll need a different way of
+    // encoding/tracking/encoding registers.
+    return (reg >= REG_RSP);
+#else
+    // X86 JIT operates in 32-bit mode and hence extended reg are not available.
+    return false;
+#endif
+}
+
+// Since XMM registers overlap with YMM registers, this routine
+// can also used to know whether a YMM register in case of AVX instructions.
+//
+// Legacy X86: we have XMM0-XMM7 available but this routine cannot be used to
+// determine whether a reg is XMM because they share the same reg numbers
+// with integer registers.  Hence always return false.
+bool IsXMMReg(regNumber reg)
+{
+#ifndef LEGACY_BACKEND
+#ifdef _TARGET_AMD64_
+    return (reg >= REG_XMM0) && (reg <= REG_XMM15);
+#else  // !_TARGET_AMD64_
+    return (reg >= REG_XMM0) && (reg <= REG_XMM7);
+#endif // !_TARGET_AMD64_
+#else  // LEGACY_BACKEND
+    return false;
+#endif // LEGACY_BACKEND
+}
+
+// Returns bits to be encoded in instruction for the given register.
+regNumber RegEncoding(regNumber reg)
+{
+#ifndef LEGACY_BACKEND
+    // XMM registers do not share the same reg numbers as integer registers.
+    // But register encoding of integer and XMM registers is the same.
+    // Therefore, subtract XMMBASE from regNumber to get the register encoding
+    // in case of XMM registers.
+    return (regNumber)((IsXMMReg(reg) ? reg - XMMBASE : reg) & 0x7);
+#else  // LEGACY_BACKEND
+    // Legacy X86: XMM registers share the same reg numbers as integer registers and
+    // hence nothing to do to get reg encoding.
+    return (regNumber)(reg & 0x7);
+#endif // LEGACY_BACKEND
+}
+
+// Utility routines that abstract the logic of adding REX.W, REX.R, REX.X, REX.B and REX prefixes
+// SSE2: separate 1-byte prefix gets added before opcode.
+// AVX:  specific bits within VEX prefix need to be set in bit-inverted form.
+size_t emitter::AddRexWPrefix(instruction ins, size_t code)
+{
+#ifdef _TARGET_AMD64_
+    if (UseAVX() && IsAVXInstruction(ins))
+    {
+        // W-bit is available only in 3-byte VEX prefix that starts with byte C4.
+        assert(hasVexPrefix(code));
+
+        // W-bit is the only bit that is added in non bit-inverted form.
+        return code | 0x00008000000000ULL;
+    }
+
+    return code | 0x4800000000ULL;
+#else
+    assert(!"UNREACHED");
+    return code;
+#endif
+}
+
+#ifdef _TARGET_AMD64_
+
+size_t emitter::AddRexRPrefix(instruction ins, size_t code)
+{
+    if (UseAVX() && IsAVXInstruction(ins))
+    {
+        // Right now support 3-byte VEX prefix
+        assert(hasVexPrefix(code));
+
+        // R-bit is added in bit-inverted form.
+        return code & 0xFF7FFFFFFFFFFFULL;
+    }
+
+    return code | 0x4400000000ULL;
+}
+
+size_t emitter::AddRexXPrefix(instruction ins, size_t code)
+{
+    if (UseAVX() && IsAVXInstruction(ins))
+    {
+        // Right now support 3-byte VEX prefix
+        assert(hasVexPrefix(code));
+
+        // X-bit is added in bit-inverted form.
+        return code & 0xFFBFFFFFFFFFFFULL;
+    }
+
+    return code | 0x4200000000ULL;
+}
+
+size_t emitter::AddRexBPrefix(instruction ins, size_t code)
+{
+    if (UseAVX() && IsAVXInstruction(ins))
+    {
+        // Right now support 3-byte VEX prefix
+        assert(hasVexPrefix(code));
+
+        // B-bit is added in bit-inverted form.
+        return code & 0xFFDFFFFFFFFFFFULL;
+    }
+
+    return code | 0x4100000000ULL;
+}
+
+// Adds REX prefix (0x40) without W, R, X or B bits set
+size_t emitter::AddRexPrefix(instruction ins, size_t code)
+{
+    assert(!UseAVX() || !IsAVXInstruction(ins));
+    return code | 0x4000000000ULL;
+}
+
+bool isPrefix(BYTE b)
+{
+    assert(b != 0);    // Caller should check this
+    assert(b != 0x67); // We don't use the address size prefix
+    assert(b != 0x65); // The GS segment override prefix is emitted separately
+    assert(b != 0x64); // The FS segment override prefix is emitted separately
+    assert(b != 0xF0); // The lock prefix is emitted separately
+    assert(b != 0x2E); // We don't use the CS segment override prefix
+    assert(b != 0x3E); // Or the DS segment override prefix
+    assert(b != 0x26); // Or the ES segment override prefix
+    assert(b != 0x36); // Or the SS segment override prefix
+
+    // That just leaves the size prefixes used in SSE opcodes:
+    //      Scalar Double  Scalar Single  Packed Double
+    return ((b == 0xF2) || (b == 0xF3) || (b == 0x66));
+}
+
+#endif //_TARGET_AMD64_
+
+// Outputs VEX prefix (in case of AVX instructions) and REX.R/X/W/B otherwise.
+unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, size_t& code)
+{
+#ifdef _TARGET_AMD64_ // TODO-x86: This needs to be enabled for AVX support on x86.
+    if (hasVexPrefix(code))
+    {
+        // Only AVX instructions should have a VEX prefix
+        assert(UseAVX() && IsAVXInstruction(ins));
+        size_t vexPrefix = (code >> 32) & 0x00FFFFFF;
+        code &= 0x00000000FFFFFFFFLL;
+
+        WORD leadingBytes = 0;
+        BYTE check        = (code >> 24) & 0xFF;
+        if (check != 0)
+        {
+            // 3-byte opcode: with the bytes ordered as 0x2211RM33 or
+            // 4-byte opcode: with the bytes ordered as 0x22114433
+            // check for a prefix in the 11 position
+            BYTE sizePrefix = (code >> 16) & 0xFF;
+            if (sizePrefix != 0 && isPrefix(sizePrefix))
+            {
+                // 'pp' bits in byte2 of VEX prefix allows us to encode SIMD size prefixes as two bits
+                //
+                //   00  - None   (0F    - packed float)
+                //   01  - 66     (66 0F - packed double)
+                //   10  - F3     (F3 0F - scalar float
+                //   11  - F2     (F2 0F - scalar double)
+                switch (sizePrefix)
+                {
+                    case 0x66:
+                        vexPrefix |= 0x01;
+                        break;
+                    case 0xF3:
+                        vexPrefix |= 0x02;
+                        break;
+                    case 0xF2:
+                        vexPrefix |= 0x03;
+                        break;
+                    default:
+                        assert(!"unrecognized SIMD size prefix");
+                        unreached();
+                }
+
+                // Now the byte in the 22 position must be an escape byte 0F
+                leadingBytes = check;
+                assert(leadingBytes == 0x0F);
+
+                // Get rid of both sizePrefix and escape byte
+                code &= 0x0000FFFFLL;
+
+                // Check the byte in the 33 position to see if it is 3A or 38.
+                // In such a case escape bytes must be 0x0F3A or 0x0F38
+                check = code & 0xFF;
+                if (check == 0x3A || check == 0x38)
+                {
+                    leadingBytes = (leadingBytes << 8) | check;
+                    code &= 0x0000FF00LL;
+                }
+            }
+        }
+        else
+        {
+            // 2-byte opcode with the bytes ordered as 0x0011RM22
+            // the byte in position 11 must be an escape byte.
+            leadingBytes = (code >> 16) & 0xFF;
+            assert(leadingBytes == 0x0F || leadingBytes == 0x00);
+            code &= 0xFFFF;
+        }
+
+        // If there is an escape byte it must be 0x0F or 0x0F3A or 0x0F38
+        // m-mmmmm bits in byte 1 of VEX prefix allows us to encode these
+        // implied leading bytes
+        switch (leadingBytes)
+        {
+            case 0x00:
+                // there is no leading byte
+                break;
+            case 0x0F:
+                vexPrefix |= 0x0100;
+                break;
+            case 0x0F38:
+                vexPrefix |= 0x0200;
+                break;
+            case 0x0F3A:
+                vexPrefix |= 0x0300;
+                break;
+            default:
+                assert(!"encountered unknown leading bytes");
+                unreached();
+        }
+
+        // At this point
+        //     VEX.2211RM33 got transformed as VEX.0000RM33
+        //     VEX.0011RM22 got transformed as VEX.0000RM22
+        //
+        // Now output VEX prefix leaving the 4-byte opcode
+        emitOutputByte(dst, ((vexPrefix >> 16) & 0xFF));
+        emitOutputByte(dst + 1, ((vexPrefix >> 8) & 0xFF));
+        emitOutputByte(dst + 2, vexPrefix & 0xFF);
+        return 3;
+    }
+    else if (code > 0x00FFFFFFFFLL)
+    {
+        BYTE prefix = (code >> 32) & 0xFF;
+        noway_assert(prefix >= 0x40 && prefix <= 0x4F);
+        code &= 0x00000000FFFFFFFFLL;
+
+        // TODO-AMD64-Cleanup: when we remove the prefixes (just the SSE opcodes right now)
+        // we can remove this code as well
+
+        // The REX prefix is required to come after all other prefixes.
+        // Some of our 'opcodes' actually include some prefixes, if that
+        // is the case, shift them over and place the REX prefix after
+        // the other prefixes, and emit any prefix that got moved out.
+        BYTE check = (code >> 24) & 0xFF;
+        if (check == 0)
+        {
+            // 3-byte opcode: with the bytes ordered as 0x00113322
+            // check for a prefix in the 11 position
+            check = (code >> 16) & 0xFF;
+            if (check != 0 && isPrefix(check))
+            {
+                // Swap the rex prefix and whatever this prefix is
+                code = (((DWORD)prefix << 16) | (code & 0x0000FFFFLL));
+                // and then emit the other prefix
+                return emitOutputByte(dst, check);
+            }
+        }
+        else
+        {
+            // 4-byte opcode with the bytes ordered as 0x22114433
+            // first check for a prefix in the 11 position
+            BYTE check2 = (code >> 16) & 0xFF;
+            if (isPrefix(check2))
+            {
+                assert(!isPrefix(check)); // We currently don't use this, so it is untested
+                if (isPrefix(check))
+                {
+                    // 3 prefixes were rex = rr, check = c1, check2 = c2 encoded as 0xrrc1c2XXXX
+                    // Change to c2rrc1XXXX, and emit check2 now
+                    code = (((size_t)prefix << 24) | ((size_t)check << 16) | (code & 0x0000FFFFLL));
+                }
+                else
+                {
+                    // 2 prefixes were rex = rr, check2 = c2 encoded as 0xrrXXc2XXXX, (check is part of the opcode)
+                    // Change to c2XXrrXXXX, and emit check2 now
+                    code = (((size_t)check << 24) | ((size_t)prefix << 16) | (code & 0x0000FFFFLL));
+                }
+                return emitOutputByte(dst, check2);
+            }
+        }
+
+        return emitOutputByte(dst, prefix);
+    }
+#endif // _TARGET_AMD64_
+
+    return 0;
+}
+
+#ifdef _TARGET_AMD64_
+/*****************************************************************************
+ * Is the last instruction emitted a call instruction?
+ */
+bool emitter::emitIsLastInsCall()
+{
+    if ((emitLastIns != nullptr) && (emitLastIns->idIns() == INS_call))
+    {
+        return true;
+    }
+
+    return false;
+}
+
+/*****************************************************************************
+ * We're about to create an epilog. If the last instruction we output was a 'call',
+ * then we need to insert a NOP, to allow for proper exception-handling behavior.
+ */
+void emitter::emitOutputPreEpilogNOP()
+{
+    if (emitIsLastInsCall())
+    {
+        emitIns(INS_nop);
+    }
+}
+
+#endif //_TARGET_AMD64_
+
+// Size of rex prefix in bytes
+unsigned emitter::emitGetRexPrefixSize(instruction ins)
+{
+
+    // In case of AVX instructions, REX prefixes are part of VEX prefix.
+    // And hence requires no additional byte to encode REX prefixes.
+    if (IsAVXInstruction(ins))
+    {
+        return 0;
+    }
+
+    // If not AVX, then we would need 1-byte to encode REX prefix.
+    return 1;
+}
+
+// Size of vex prefix in bytes
+unsigned emitter::emitGetVexPrefixSize(instruction ins, emitAttr attr)
+{
+    // TODO-XArch-CQ: right now we default to 3-byte VEX prefix. There is a
+    // scope for size win by using 2-byte vex prefix for some of the
+    // scalar, avx-128 and most common avx-256 instructions.
+    if (IsAVXInstruction(ins))
+    {
+        return 3;
+    }
+
+    // If not AVX, then we don't need to encode vex prefix.
+    return 0;
+}
+
+// VEX prefix encodes some bytes of the opcode and as a result, overall size of the instruction reduces.
+// Therefore, to estimate the size adding VEX prefix size and size of instruction opcode bytes will always overstimate.
+// Instead this routine will adjust the size of VEX prefix based on the number of bytes of opcode it encodes so that
+// instruction size estimate will be accurate.
+// Basically this function will decrease the vexPrefixSize,
+// so that opcodeSize + vexPrefixAdjustedSize will be the right size.
+// rightOpcodeSize + vexPrefixSize
+//=(opcodeSize - ExtrabytesSize) + vexPrefixSize
+//=opcodeSize + (vexPrefixSize - ExtrabytesSize)
+//=opcodeSize + vexPrefixAdjustedSize
+unsigned emitter::emitGetVexPrefixAdjustedSize(instruction ins, emitAttr attr, size_t code)
+{
+#ifdef FEATURE_AVX_SUPPORT
+    if (IsAVXInstruction(ins))
+    {
+        unsigned vexPrefixAdjustedSize = emitGetVexPrefixSize(ins, attr);
+        // Currently vex prefix size is hard coded as 3 bytes,
+        // In future we should support 2 bytes vex prefix.
+        assert(vexPrefixAdjustedSize == 3);
+
+        // In this case, opcode will contains escape prefix at least one byte,
+        // vexPrefixAdjustedSize should be minus one.
+        vexPrefixAdjustedSize -= 1;
+
+        // Get the fourth byte in Opcode.
+        // If this byte is non-zero, then we should check whether the opcode contains SIMD prefix or not.
+        BYTE check = (code >> 24) & 0xFF;
+        if (check != 0)
+        {
+            // 3-byte opcode: with the bytes ordered as 0x2211RM33 or
+            // 4-byte opcode: with the bytes ordered as 0x22114433
+            // Simd prefix is at the first byte.
+            BYTE sizePrefix = (code >> 16) & 0xFF;
+            if (sizePrefix != 0 && isPrefix(sizePrefix))
+            {
+                vexPrefixAdjustedSize -= 1;
+            }
+
+            // If the opcode size is 4 bytes, then the second escape prefix is at fourth byte in opcode.
+            // But in this case the opcode has not counted R\M part.
+            // opcodeSize + VexPrefixAdjustedSize - ExtraEscapePrefixSize + ModR\MSize
+            //=opcodeSize + VexPrefixAdjustedSize -1 + 1
+            //=opcodeSize + VexPrefixAdjustedSize
+            // So although we may have second byte escape prefix, we won't decrease vexPrefixAjustedSize.
+        }
+
+        return vexPrefixAdjustedSize;
+    }
+#endif // FEATURE_AVX_SUPPORT
+
+    return 0;
+}
+
+// Get size of rex or vex prefix emitted in code
+unsigned emitter::emitGetPrefixSize(size_t code)
+{
+#ifdef FEATURE_AVX_SUPPORT
+    if (code & VEX_PREFIX_MASK_3BYTE)
+    {
+        return 3;
+    }
+    else
+#endif
+        if (code & REX_PREFIX_MASK)
+    {
+        return 1;
+    }
+
+    return 0;
+}
+
+#ifdef _TARGET_X86_
+/*****************************************************************************
+ *
+ *  Record a non-empty stack
+ */
+
+void emitter::emitMarkStackLvl(unsigned stackLevel)
+{
+    assert(int(stackLevel) >= 0);
+    assert(emitCurStackLvl == 0);
+    assert(emitCurIG->igStkLvl == 0);
+    assert(emitCurIGfreeNext == emitCurIGfreeBase);
+
+    assert(stackLevel && stackLevel % sizeof(int) == 0);
+
+    emitCurStackLvl = emitCurIG->igStkLvl = stackLevel;
+
+    if (emitMaxStackDepth < emitCurStackLvl)
+        emitMaxStackDepth = emitCurStackLvl;
+}
+#endif
+
+/*****************************************************************************
+ *
+ *  Get hold of the address mode displacement value for an indirect call.
+ */
+
+inline ssize_t emitter::emitGetInsCIdisp(instrDesc* id)
+{
+    if (id->idIsLargeCall())
+    {
+        return ((instrDescCGCA*)id)->idcDisp;
+    }
+    else
+    {
+        assert(!id->idIsLargeDsp());
+        assert(!id->idIsLargeCns());
+
+        return id->idAddr()->iiaAddrMode.amDisp;
+    }
+}
+
+/** ***************************************************************************
+ *
+ *  The following table is used by the instIsFP()/instUse/DefFlags() helpers.
+ */
+
+#define INST_DEF_FL 0x20 // does the instruction set flags?
+#define INST_USE_FL 0x40 // does the instruction use flags?
+
+// clang-format off
+const BYTE          CodeGenInterface::instInfo[] =
+{
+    #define INST0(id, nm, fp, um, rf, wf, mr                 ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
+    #define INST1(id, nm, fp, um, rf, wf, mr                 ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
+    #define INST2(id, nm, fp, um, rf, wf, mr, mi             ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
+    #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm         ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
+    #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4     ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
+    #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr ) (INST_USE_FL*rf|INST_DEF_FL*wf|INST_FP*fp),
+    #include "instrs.h"
+    #undef  INST0
+    #undef  INST1
+    #undef  INST2
+    #undef  INST3
+    #undef  INST4
+    #undef  INST5
+};
+// clang-format on
+
+/*****************************************************************************
+ *
+ *  Initialize the table used by emitInsModeFormat().
+ */
+
+// clang-format off
+const BYTE          emitter::emitInsModeFmtTab[] =
+{
+    #define INST0(id, nm, fp, um, rf, wf, mr                ) um,
+    #define INST1(id, nm, fp, um, rf, wf, mr                ) um,
+    #define INST2(id, nm, fp, um, rf, wf, mr, mi            ) um,
+    #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm        ) um,
+    #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4    ) um,
+    #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) um,
+    #include "instrs.h"
+    #undef  INST0
+    #undef  INST1
+    #undef  INST2
+    #undef  INST3
+    #undef  INST4
+    #undef  INST5
+};
+// clang-format on
+
+#ifdef DEBUG
+unsigned const emitter::emitInsModeFmtCnt = sizeof(emitInsModeFmtTab) / sizeof(emitInsModeFmtTab[0]);
+#endif
+
+/*****************************************************************************
+ *
+ *  Combine the given base format with the update mode of the instuction.
+ */
+
+inline emitter::insFormat emitter::emitInsModeFormat(instruction ins, insFormat base)
+{
+    assert(IF_RRD + IUM_RD == IF_RRD);
+    assert(IF_RRD + IUM_WR == IF_RWR);
+    assert(IF_RRD + IUM_RW == IF_RRW);
+
+    return (insFormat)(base + emitInsUpdateMode(ins));
+}
+
+/*****************************************************************************
+ *
+ *  A version of scInsModeFormat() that handles X87 floating-point instructions.
+ */
+
+#if FEATURE_STACK_FP_X87
+emitter::insFormat emitter::emitInsModeFormat(instruction ins, insFormat base, insFormat FPld, insFormat FPst)
+{
+    if (CodeGen::instIsFP(ins))
+    {
+        assert(IF_TRD_SRD + 1 == IF_TWR_SRD);
+        assert(IF_TRD_SRD + 2 == IF_TRW_SRD);
+
+        assert(IF_TRD_MRD + 1 == IF_TWR_MRD);
+        assert(IF_TRD_MRD + 2 == IF_TRW_MRD);
+
+        assert(IF_TRD_ARD + 1 == IF_TWR_ARD);
+        assert(IF_TRD_ARD + 2 == IF_TRW_ARD);
+
+        switch (ins)
+        {
+            case INS_fst:
+            case INS_fstp:
+            case INS_fistp:
+            case INS_fistpl:
+                return (insFormat)(FPst);
+
+            case INS_fld:
+            case INS_fild:
+                return (insFormat)(FPld + 1);
+
+            case INS_fcomp:
+            case INS_fcompp:
+            case INS_fcomip:
+                return (insFormat)(FPld);
+
+            default:
+                return (insFormat)(FPld + 2);
+        }
+    }
+    else
+    {
+        return emitInsModeFormat(ins, base);
+    }
+}
+#endif // FEATURE_STACK_FP_X87
+
+// This is a helper we need due to Vs Whidbey #254016 in order to distinguish
+// if we can not possibly be updating an integer register. This is not the best
+// solution, but the other ones (see bug) are going to be much more complicated.
+// The issue here is that on legacy x86, the XMM registers use the same register numbers
+// as the general purpose registers, so we need to distinguish them.
+// We really only need this for x86 where this issue exists.
+bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id)
+{
+    instruction ins = id->idIns();
+
+    // The following SSE2 instructions write to a general purpose integer register.
+    if (!IsSSEOrAVXInstruction(ins) || ins == INS_mov_xmm2i || ins == INS_cvttsd2si
+#ifndef LEGACY_BACKEND
+        || ins == INS_cvttss2si || ins == INS_cvtsd2si || ins == INS_cvtss2si
+#endif // !LEGACY_BACKEND
+        )
+    {
+        return false;
+    }
+
+    return true;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the base encoding of the given CPU instruction.
+ */
+
+inline size_t insCode(instruction ins)
+{
+    // clang-format off
+    const static
+    size_t          insCodes[] =
+    {
+        #define INST0(id, nm, fp, um, rf, wf, mr                ) mr,
+        #define INST1(id, nm, fp, um, rf, wf, mr                ) mr,
+        #define INST2(id, nm, fp, um, rf, wf, mr, mi            ) mr,
+        #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm        ) mr,
+        #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4    ) mr,
+        #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) mr,
+        #include "instrs.h"
+        #undef  INST0
+        #undef  INST1
+        #undef  INST2
+        #undef  INST3
+        #undef  INST4
+        #undef  INST5
+    };
+    // clang-format on
+
+    assert((unsigned)ins < sizeof(insCodes) / sizeof(insCodes[0]));
+    assert((insCodes[ins] != BAD_CODE));
+
+    return insCodes[ins];
+}
+
+/*****************************************************************************
+ *
+ *  Returns the "[r/m], 32-bit icon" encoding of the given CPU instruction.
+ */
+
+inline size_t insCodeMI(instruction ins)
+{
+    // clang-format off
+    const static
+    size_t          insCodesMI[] =
+    {
+        #define INST0(id, nm, fp, um, rf, wf, mr                )
+        #define INST1(id, nm, fp, um, rf, wf, mr                )
+        #define INST2(id, nm, fp, um, rf, wf, mr, mi            ) mi,
+        #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm        ) mi,
+        #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4    ) mi,
+        #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) mi,
+        #include "instrs.h"
+        #undef  INST0
+        #undef  INST1
+        #undef  INST2
+        #undef  INST3
+        #undef  INST4
+        #undef  INST5
+    };
+    // clang-format on
+
+    assert((unsigned)ins < sizeof(insCodesMI) / sizeof(insCodesMI[0]));
+    assert((insCodesMI[ins] != BAD_CODE));
+
+    return insCodesMI[ins];
+}
+
+/*****************************************************************************
+ *
+ *  Returns the "reg, [r/m]" encoding of the given CPU instruction.
+ */
+
+inline size_t insCodeRM(instruction ins)
+{
+    // clang-format off
+    const static
+    size_t          insCodesRM[] =
+    {
+        #define INST0(id, nm, fp, um, rf, wf, mr                )
+        #define INST1(id, nm, fp, um, rf, wf, mr                )
+        #define INST2(id, nm, fp, um, rf, wf, mr, mi            )
+        #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm        ) rm,
+        #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4    ) rm,
+        #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) rm,
+        #include "instrs.h"
+        #undef  INST0
+        #undef  INST1
+        #undef  INST2
+        #undef  INST3
+        #undef  INST4
+        #undef  INST5
+    };
+    // clang-format on
+
+    assert((unsigned)ins < sizeof(insCodesRM) / sizeof(insCodesRM[0]));
+    assert((insCodesRM[ins] != BAD_CODE));
+
+    return insCodesRM[ins];
+}
+
+/*****************************************************************************
+ *
+ *  Returns the "AL/AX/EAX, imm" accumulator encoding of the given instruction.
+ */
+
+inline size_t insCodeACC(instruction ins)
+{
+    // clang-format off
+    const static
+    size_t          insCodesACC[] =
+    {
+        #define INST0(id, nm, fp, um, rf, wf, mr                )
+        #define INST1(id, nm, fp, um, rf, wf, mr                )
+        #define INST2(id, nm, fp, um, rf, wf, mr, mi            )
+        #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm        )
+        #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4    ) a4,
+        #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) a4,
+        #include "instrs.h"
+        #undef  INST0
+        #undef  INST1
+        #undef  INST2
+        #undef  INST3
+        #undef  INST4
+        #undef  INST5
+    };
+    // clang-format on
+
+    assert((unsigned)ins < sizeof(insCodesACC) / sizeof(insCodesACC[0]));
+    assert((insCodesACC[ins] != BAD_CODE));
+
+    return insCodesACC[ins];
+}
+
+/*****************************************************************************
+ *
+ *  Returns the "register" encoding of the given CPU instruction.
+ */
+
+inline size_t insCodeRR(instruction ins)
+{
+    // clang-format off
+    const static
+    size_t          insCodesRR[] =
+    {
+        #define INST0(id, nm, fp, um, rf, wf, mr                )
+        #define INST1(id, nm, fp, um, rf, wf, mr                )
+        #define INST2(id, nm, fp, um, rf, wf, mr, mi            )
+        #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm        )
+        #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4    )
+        #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) rr,
+        #include "instrs.h"
+        #undef  INST0
+        #undef  INST1
+        #undef  INST2
+        #undef  INST3
+        #undef  INST4
+        #undef  INST5
+    };
+    // clang-format on
+
+    assert((unsigned)ins < sizeof(insCodesRR) / sizeof(insCodesRR[0]));
+    assert((insCodesRR[ins] != BAD_CODE));
+
+    return insCodesRR[ins];
+}
+
+// clang-format off
+const static
+size_t          insCodesMR[] =
+{
+    #define INST0(id, nm, fp, um, rf, wf, mr                )
+    #define INST1(id, nm, fp, um, rf, wf, mr                ) mr,
+    #define INST2(id, nm, fp, um, rf, wf, mr, mi            ) mr,
+    #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm        ) mr,
+    #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4    ) mr,
+    #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) mr,
+    #include "instrs.h"
+    #undef  INST0
+    #undef  INST1
+    #undef  INST2
+    #undef  INST3
+    #undef  INST4
+    #undef  INST5
+};
+// clang-format on
+
+// Returns true iff the give CPU instruction has an MR encoding.
+inline size_t hasCodeMR(instruction ins)
+{
+    assert((unsigned)ins < sizeof(insCodesMR) / sizeof(insCodesMR[0]));
+    return ((insCodesMR[ins] != BAD_CODE));
+}
+
+/*****************************************************************************
+ *
+ *  Returns the "[r/m], reg" or "[r/m]" encoding of the given CPU instruction.
+ */
+
+inline size_t insCodeMR(instruction ins)
+{
+    assert((unsigned)ins < sizeof(insCodesMR) / sizeof(insCodesMR[0]));
+    assert((insCodesMR[ins] != BAD_CODE));
+
+    return insCodesMR[ins];
+}
+
+/*****************************************************************************
+ *
+ *  Returns an encoding for the specified register to be used in the bit0-2
+ *  part of an opcode.
+ */
+
+inline unsigned emitter::insEncodeReg012(instruction ins, regNumber reg, emitAttr size, size_t* code)
+{
+    assert(reg < REG_STK);
+
+#ifndef LEGACY_BACKEND
+#ifdef _TARGET_AMD64_
+    // Either code is not NULL or reg is not an extended reg.
+    // If reg is an extended reg, instruction needs to be prefixed with 'REX'
+    // which would require code != NULL.
+    assert(code != nullptr || !IsExtendedReg(reg));
+
+    if (IsExtendedReg(reg))
+    {
+        *code = AddRexBPrefix(ins, *code); // REX.B
+    }
+    else if ((EA_SIZE(size) == EA_1BYTE) && (reg > REG_RBX) && (code != nullptr))
+    {
+        // We are assuming that we only use/encode SPL, BPL, SIL and DIL
+        // not the corresponding AH, CH, DH, or BH
+        *code = AddRexPrefix(ins, *code); // REX
+    }
+#endif // _TARGET_AMD64_
+
+    reg = RegEncoding(reg);
+    assert(reg < 8);
+    return reg;
+
+#else // LEGACY_BACKEND
+
+    assert(reg < 8);
+    return reg;
+
+#endif // LEGACY_BACKEND
+}
+
+/*****************************************************************************
+ *
+ *  Returns an encoding for the specified register to be used in the bit3-5
+ *  part of an opcode.
+ */
+
+inline unsigned emitter::insEncodeReg345(instruction ins, regNumber reg, emitAttr size, size_t* code)
+{
+    assert(reg < REG_STK);
+
+#ifndef LEGACY_BACKEND
+#ifdef _TARGET_AMD64_
+    // Either code is not NULL or reg is not an extended reg.
+    // If reg is an extended reg, instruction needs to be prefixed with 'REX'
+    // which would require code != NULL.
+    assert(code != nullptr || !IsExtendedReg(reg));
+
+    if (IsExtendedReg(reg))
+    {
+        *code = AddRexRPrefix(ins, *code); // REX.R
+    }
+    else if ((EA_SIZE(size) == EA_1BYTE) && (reg > REG_RBX) && (code != nullptr))
+    {
+        // We are assuming that we only use/encode SPL, BPL, SIL and DIL
+        // not the corresponding AH, CH, DH, or BH
+        *code = AddRexPrefix(ins, *code); // REX
+    }
+#endif // _TARGET_AMD64_
+
+    reg = RegEncoding(reg);
+    assert(reg < 8);
+    return (reg << 3);
+
+#else  // LEGACY_BACKEND
+    assert(reg < 8);
+    return (reg << 3);
+#endif // LEGACY_BACKEND
+}
+
+/***********************************************************************************
+ *
+ *  Returns modified AVX opcode with the specified register encoded in bits 3-6 of
+ *  byte 2 of VEX prefix.
+ */
+inline size_t emitter::insEncodeReg3456(instruction ins, regNumber reg, emitAttr size, size_t code)
+{
+#ifdef FEATURE_AVX_SUPPORT
+    assert(reg < REG_STK);
+    assert(IsAVXInstruction(ins));
+    assert(hasVexPrefix(code));
+
+    // Get 4-bit register encoding
+    // RegEncoding() gives lower 3 bits
+    // IsExtendedReg() gives MSB.
+    size_t regBits = RegEncoding(reg);
+    if (IsExtendedReg(reg))
+    {
+        regBits |= 0x08;
+    }
+
+    // VEX prefix encodes register operand in 1's complement form
+    // Shift count = 4-bytes of opcode + 0-2 bits
+    assert(regBits <= 0xF);
+    regBits <<= 35;
+    return code ^ regBits;
+
+#else
+    return code;
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Returns an encoding for the specified register to be used in the bit3-5
+ *  part of an SIB byte (unshifted).
+ *  Used exclusively to generate the REX.X bit and truncate the register.
+ */
+
+inline unsigned emitter::insEncodeRegSIB(instruction ins, regNumber reg, size_t* code)
+{
+    assert(reg < REG_STK);
+
+#ifdef _TARGET_AMD64_
+    // Either code is not NULL or reg is not an extended reg.
+    // If reg is an extended reg, instruction needs to be prefixed with 'REX'
+    // which would require code != NULL.
+    assert(code != nullptr || reg < REG_R8 || (reg >= REG_XMM0 && reg < REG_XMM8));
+
+    if (IsExtendedReg(reg))
+    {
+        *code = AddRexXPrefix(ins, *code); // REX.X
+    }
+    reg = RegEncoding(reg);
+#endif
+
+    assert(reg < 8);
+    return reg;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the "[r/m]" opcode with the mod/RM field set to register.
+ */
+
+inline size_t emitter::insEncodeMRreg(instruction ins, size_t code)
+{
+    // If Byte 4 (which is 0xFF00) is 0, that's where the RM encoding goes.
+    // Otherwise, it will be placed after the 4 byte encoding.
+    if ((code & 0xFF00) == 0)
+    {
+        assert((code & 0xC000) == 0);
+        code |= 0xC000;
+    }
+
+    return code;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the "[r/m], icon" opcode with the mod/RM field set to register.
+ */
+
+inline size_t insEncodeMIreg(instruction ins, size_t code)
+{
+    assert((code & 0xC000) == 0);
+    code |= 0xC000;
+    return code;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the given "[r/m]" opcode with the mod/RM field set to register.
+ */
+
+inline size_t insEncodeRMreg(instruction ins, size_t code)
+{
+    // If Byte 4 (which is 0xFF00) is 0, that's where the RM encoding goes.
+    // Otherwise, it will be placed after the 4 byte encoding.
+    if ((code & 0xFF00) == 0)
+    {
+        assert((code & 0xC000) == 0);
+        code |= 0xC000;
+    }
+    return code;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the "byte ptr [r/m]" opcode with the mod/RM field set to
+ *  the given register.
+ */
+
+inline size_t emitter::insEncodeMRreg(instruction ins, regNumber reg, emitAttr size, size_t code)
+{
+    assert((code & 0xC000) == 0);
+    code |= 0xC000;
+    unsigned regcode = insEncodeReg012(ins, reg, size, &code) << 8;
+    code |= regcode;
+    return code;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the "byte ptr [r/m], icon" opcode with the mod/RM field set to
+ *  the given register.
+ */
+
+inline size_t emitter::insEncodeMIreg(instruction ins, regNumber reg, emitAttr size, size_t code)
+{
+    assert((code & 0xC000) == 0);
+    code |= 0xC000;
+    unsigned regcode = insEncodeReg012(ins, reg, size, &code) << 8;
+    code |= regcode;
+    return code;
+}
+
+/*****************************************************************************
+ *
+ *  Returns true iff the given instruction does not have a "[r/m], icon" form, but *does* have a
+ *  "reg,reg,imm8" form.
+ */
+inline bool insNeedsRRIb(instruction ins)
+{
+    // If this list gets longer, use a switch or a table.
+    return ins == INS_imul;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the "reg,reg,imm8" opcode with both the reg's set to the
+ *  the given register.
+ */
+inline size_t emitter::insEncodeRRIb(instruction ins, regNumber reg, emitAttr size)
+{
+    assert(size == EA_4BYTE); // All we handle for now.
+    assert(insNeedsRRIb(ins));
+    // If this list gets longer, use a switch, or a table lookup.
+    size_t   code    = 0x69c0;
+    unsigned regcode = insEncodeReg012(ins, reg, size, &code);
+    // We use the same register as source and destination.  (Could have another version that does both regs...)
+    code |= regcode;
+    code |= (regcode << 3);
+    return code;
+}
+
+/*****************************************************************************
+ *
+ *  Returns the "+reg" opcode with the the given register set into the low
+ *  nibble of the opcode
+ */
+
+inline size_t emitter::insEncodeOpreg(instruction ins, regNumber reg, emitAttr size)
+{
+    size_t   code    = insCodeRR(ins);
+    unsigned regcode = insEncodeReg012(ins, reg, size, &code);
+    code |= regcode;
+    return code;
+}
+
+/*****************************************************************************
+ *
+ *  Return the 'SS' field value for the given index scale factor.
+ */
+
+inline unsigned insSSval(unsigned scale)
+{
+    assert(scale == 1 || scale == 2 || scale == 4 || scale == 8);
+
+    const static BYTE scales[] = {
+        0x00, // 1
+        0x40, // 2
+        0xFF, // 3
+        0x80, // 4
+        0xFF, // 5
+        0xFF, // 6
+        0xFF, // 7
+        0xC0, // 8
+    };
+
+    return scales[scale - 1];
+}
+
+const instruction emitJumpKindInstructions[] = {INS_nop,
+
+#define JMP_SMALL(en, rev, ins) INS_##ins,
+#include "emitjmps.h"
+
+                                                INS_call};
+
+const emitJumpKind emitReverseJumpKinds[] = {
+    EJ_NONE,
+
+#define JMP_SMALL(en, rev, ins) EJ_##rev,
+#include "emitjmps.h"
+};
+
+/*****************************************************************************
+ * Look up the instruction for a jump kind
+ */
+
+/*static*/ instruction emitter::emitJumpKindToIns(emitJumpKind jumpKind)
+{
+    assert((unsigned)jumpKind < ArrLen(emitJumpKindInstructions));
+    return emitJumpKindInstructions[jumpKind];
+}
+
+/*****************************************************************************
+ * Reverse the conditional jump
+ */
+
+/* static */ emitJumpKind emitter::emitReverseJumpKind(emitJumpKind jumpKind)
+{
+    assert(jumpKind < EJ_COUNT);
+    return emitReverseJumpKinds[jumpKind];
+}
+
+/*****************************************************************************
+ * The size for these instructions is less than EA_4BYTE,
+ * but the target register need not be byte-addressable
+ */
+
+inline bool emitInstHasNoCode(instruction ins)
+{
+    if (ins == INS_align)
+    {
+        return true;
+    }
+
+    return false;
+}
+
+/*****************************************************************************
+ * When encoding instructions that operate on byte registers
+ * we have to ensure that we use a low register (EAX, EBX, ECX or EDX)
+ * otherwise we will incorrectly encode the instruction
+ */
+
+bool emitter::emitVerifyEncodable(instruction ins, emitAttr size, regNumber reg1, regNumber reg2 /* = REG_NA */)
+{
+#if CPU_HAS_BYTE_REGS
+    if (size != EA_1BYTE) // Not operating on a byte register is fine
+    {
+        return true;
+    }
+
+    if ((ins != INS_movsx) && // These two instructions support high register
+        (ins != INS_movzx))   // encodings for reg1
+    {
+        // reg1 must be a byte-able register
+        if ((genRegMask(reg1) & RBM_BYTE_REGS) == 0)
+        {
+            return false;
+        }
+    }
+    // if reg2 is not REG_NA then reg2 must be a byte-able register
+    if ((reg2 != REG_NA) && ((genRegMask(reg2) & RBM_BYTE_REGS) == 0))
+    {
+        return false;
+    }
+#endif
+    // The instruction can be encoded
+    return true;
+}
+
+/*****************************************************************************
+ *
+ *  Estimate the size (in bytes of generated code) of the given instruction.
+ */
+
+inline UNATIVE_OFFSET emitter::emitInsSize(size_t code)
+{
+    UNATIVE_OFFSET size = (code & 0xFF000000) ? 4 : (code & 0x00FF0000) ? 3 : 2;
+#ifdef _TARGET_AMD64_
+    size += emitGetPrefixSize(code);
+#endif
+    return size;
+}
+
+inline UNATIVE_OFFSET emitter::emitInsSizeRM(instruction ins)
+{
+    return emitInsSize(insCodeRM(ins));
+}
+
+inline UNATIVE_OFFSET emitter::emitInsSizeRR(instruction ins, regNumber reg1, regNumber reg2, emitAttr attr)
+{
+    emitAttr size = EA_SIZE(attr);
+
+    UNATIVE_OFFSET sz;
+#ifdef _TARGET_AMD64_
+    // If Byte 4 (which is 0xFF00) is non-zero, that's where the RM encoding goes.
+    // Otherwise, it will be placed after the 4 byte encoding, making the total 5 bytes.
+    // This would probably be better expressed as a different format or something?
+    if (insCodeRM(ins) & 0xFF00)
+    {
+        sz = 5;
+    }
+    else
+#endif // _TARGET_AMD64_
+    {
+        size_t code = insCodeRM(ins);
+        sz          = emitInsSize(insEncodeRMreg(ins, code));
+    }
+
+    // Most 16-bit operand instructions will need a prefix
+    if (size == EA_2BYTE && ins != INS_movsx && ins != INS_movzx)
+    {
+        sz += 1;
+    }
+
+    // VEX prefix
+    sz += emitGetVexPrefixAdjustedSize(ins, size, insCodeRM(ins));
+
+    // REX prefix
+    if ((TakesRexWPrefix(ins, size) && ((ins != INS_xor) || (reg1 != reg2))) || IsExtendedReg(reg1, attr) ||
+        IsExtendedReg(reg2, attr))
+    {
+        sz += emitGetRexPrefixSize(ins);
+    }
+
+    return sz;
+}
+
+/*****************************************************************************/
+
+inline UNATIVE_OFFSET emitter::emitInsSizeSV(size_t code, int var, int dsp)
+{
+    UNATIVE_OFFSET size = emitInsSize(code);
+    UNATIVE_OFFSET offs;
+    bool           offsIsUpperBound = true;
+    bool           EBPbased         = true;
+
+    /*  Is this a temporary? */
+
+    if (var < 0)
+    {
+        /* An address off of ESP takes an extra byte */
+
+        if (!emitHasFramePtr)
+        {
+            size++;
+        }
+
+#ifndef LEGACY_BACKEND
+        // The offset is already assigned. Find the temp.
+        TempDsc* tmp = emitComp->tmpFindNum(var, Compiler::TEMP_USAGE_USED);
+        if (tmp == nullptr)
+        {
+            // It might be in the free lists, if we're working on zero initializing the temps.
+            tmp = emitComp->tmpFindNum(var, Compiler::TEMP_USAGE_FREE);
+        }
+        assert(tmp != nullptr);
+        offs = tmp->tdTempOffs();
+
+        // We only care about the magnitude of the offset here, to determine instruction size.
+        if (emitComp->isFramePointerUsed())
+        {
+            if ((int)offs < 0)
+            {
+                offs = -(int)offs;
+            }
+        }
+        else
+        {
+            // SP-based offsets must already be positive.
+            assert((int)offs >= 0);
+        }
+#else  // LEGACY_BACKEND
+        /* We'll have to estimate the max. possible offset of this temp */
+
+        // TODO: Get an estimate of the temp offset instead of assuming
+        // TODO: that any temp may be at the max. temp offset!!!!!!!!!!
+
+        if (emitComp->lvaTempsHaveLargerOffsetThanVars())
+        {
+            offs = emitLclSize + emitMaxTmpSize;
+        }
+        else
+        {
+            offs = emitMaxTmpSize;
+        }
+
+        offsIsUpperBound = false;
+#endif // LEGACY_BACKEND
+    }
+    else
+    {
+
+        /* Get the frame offset of the (non-temp) variable */
+
+        offs = dsp + emitComp->lvaFrameAddress(var, &EBPbased);
+
+        /* An address off of ESP takes an extra byte */
+
+        if (!EBPbased)
+        {
+            ++size;
+        }
+
+        /* Is this a stack parameter reference? */
+
+        if (emitComp->lvaIsParameter(var)
+#if !defined(_TARGET_AMD64_) || defined(UNIX_AMD64_ABI)
+            && !emitComp->lvaIsRegArgument(var)
+#endif // !_TARGET_AMD64_ || UNIX_AMD64_ABI
+                )
+        {
+            /* If no EBP frame, arguments are off of ESP, above temps */
+
+            if (!EBPbased)
+            {
+                assert((int)offs >= 0);
+
+                offsIsUpperBound = false; // since #temps can increase
+                offs += emitMaxTmpSize;
+            }
+        }
+        else
+        {
+            /* Locals off of EBP are at negative offsets */
+
+            if (EBPbased)
+            {
+#if defined(_TARGET_AMD64_) && !defined(PLATFORM_UNIX)
+                // If localloc is not used, then ebp chaining is done and hence
+                // offset of locals will be at negative offsets, Otherwise offsets
+                // will be positive.  In future, when RBP gets positioned in the
+                // middle of the frame so as to optimize instruction encoding size,
+                // the below asserts needs to be modified appropriately.
+                // However, for Unix platforms, we always do frame pointer chaining,
+                // so offsets from the frame pointer will always be negative.
+                if (emitComp->compLocallocUsed || emitComp->opts.compDbgEnC)
+                {
+                    noway_assert((int)offs >= 0);
+                }
+                else
+#endif
+                {
+                    // Dev10 804810 - failing this assert can lead to bad codegen and runtime crashes
+                    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef UNIX_AMD64_ABI
+                    LclVarDsc* varDsc         = emitComp->lvaTable + var;
+                    bool       isRegPassedArg = varDsc->lvIsParam && varDsc->lvIsRegArg;
+                    // Register passed args could have a stack offset of 0.
+                    noway_assert((int)offs < 0 || isRegPassedArg);
+#else  // !UNIX_AMD64_ABI
+                    noway_assert((int)offs < 0);
+#endif // !UNIX_AMD64_ABI
+                }
+
+                assert(emitComp->lvaTempsHaveLargerOffsetThanVars());
+
+                // lvaInlinedPInvokeFrameVar and lvaStubArgumentVar are placed below the temps
+                if (unsigned(var) == emitComp->lvaInlinedPInvokeFrameVar ||
+                    unsigned(var) == emitComp->lvaStubArgumentVar)
+                {
+                    offs -= emitMaxTmpSize;
+                }
+
+                if ((int)offs < 0)
+                {
+                    // offset is negative
+                    return size + ((int(offs) >= SCHAR_MIN) ? sizeof(char) : sizeof(int));
+                }
+#ifdef _TARGET_AMD64_
+                // This case arises for localloc frames
+                else
+                {
+                    return size + ((offs <= SCHAR_MAX) ? sizeof(char) : sizeof(int));
+                }
+#endif
+            }
+
+            if (emitComp->lvaTempsHaveLargerOffsetThanVars() == false)
+            {
+                offs += emitMaxTmpSize;
+            }
+        }
+    }
+
+    assert((int)offs >= 0);
+
+#if !FEATURE_FIXED_OUT_ARGS
+
+    /* Are we addressing off of ESP? */
+
+    if (!emitHasFramePtr)
+    {
+        /* Adjust the effective offset if necessary */
+
+        if (emitCntStackDepth)
+            offs += emitCurStackLvl;
+
+        // we could (and used to) check for the special case [sp] here but the stack offset
+        // estimator was off, and there is very little harm in overestimating for such a
+        // rare case.
+    }
+
+#endif // !FEATURE_FIXED_OUT_ARGS
+
+//  printf("lcl = %04X, tmp = %04X, stk = %04X, offs = %04X\n",
+//         emitLclSize, emitMaxTmpSize, emitCurStackLvl, offs);
+
+#ifdef _TARGET_AMD64_
+    bool useSmallEncoding = (SCHAR_MIN <= (int)offs) && ((int)offs <= SCHAR_MAX);
+#else
+    bool useSmallEncoding = (offs <= size_t(SCHAR_MAX));
+#endif
+
+#ifdef LEGACY_BACKEND
+    /* If we are using a small encoding, there is a danger that we might
+       end up having to use a larger encoding. Record 'offs' so that
+       we can detect if such a situation occurs */
+
+    if (useSmallEncoding && !offsIsUpperBound)
+    {
+        if (emitGrowableMaxByteOffs < offs)
+        {
+            emitGrowableMaxByteOffs = offs;
+#ifdef DEBUG
+            // Remember which instruction this is
+            emitMaxByteOffsIdNum = emitInsCount;
+#endif
+        }
+    }
+#endif // LEGACY_BACKEND
+
+    // If it is ESP based, and the offset is zero, we will not encode the disp part.
+    if (!EBPbased && offs == 0)
+    {
+        return size;
+    }
+    else
+    {
+        return size + (useSmallEncoding ? sizeof(char) : sizeof(int));
+    }
+}
+
+inline UNATIVE_OFFSET emitter::emitInsSizeSV(instrDesc* id, int var, int dsp, int val)
+{
+    instruction    ins       = id->idIns();
+    UNATIVE_OFFSET valSize   = EA_SIZE_IN_BYTES(id->idOpSize());
+    UNATIVE_OFFSET prefix    = 0;
+    bool           valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
+
+#ifdef _TARGET_AMD64_
+    // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
+    // all other opcodes take a sign-extended 4-byte immediate
+    noway_assert(valSize <= sizeof(int) || !id->idIsCnsReloc());
+#endif // _TARGET_AMD64_
+
+    if (valSize > sizeof(int))
+    {
+        valSize = sizeof(int);
+    }
+
+#ifdef RELOC_SUPPORT
+    if (id->idIsCnsReloc())
+    {
+        valInByte = false; // relocs can't be placed in a byte
+        assert(valSize == sizeof(int));
+    }
+#endif
+
+    if (valInByte)
+    {
+        valSize = sizeof(char);
+    }
+
+    // 16-bit operand instructions need a prefix.
+    // This referes to 66h size prefix override
+    if (id->idOpSize() == EA_2BYTE)
+    {
+        prefix = 1;
+    }
+
+    return prefix + valSize + emitInsSizeSV(insCodeMI(ins), var, dsp);
+}
+
+/*****************************************************************************/
+
+static bool baseRegisterRequiresSibByte(regNumber base)
+{
+#ifdef _TARGET_AMD64_
+    return base == REG_ESP || base == REG_R12;
+#else
+    return base == REG_ESP;
+#endif
+}
+
+static bool baseRegisterRequiresDisplacement(regNumber base)
+{
+#ifdef _TARGET_AMD64_
+    return base == REG_EBP || base == REG_R13;
+#else
+    return base == REG_EBP;
+#endif
+}
+
+UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, size_t code)
+{
+    emitAttr    attrSize = id->idOpSize();
+    instruction ins      = id->idIns();
+    /* The displacement field is in an unusual place for calls */
+    ssize_t        dsp       = (ins == INS_call) ? emitGetInsCIdisp(id) : emitGetInsAmdAny(id);
+    bool           dspInByte = ((signed char)dsp == (ssize_t)dsp);
+    bool           dspIsZero = (dsp == 0);
+    UNATIVE_OFFSET size;
+
+    // Note that the values in reg and rgx are used in this method to decide
+    // how many bytes will be needed by the address [reg+rgx+cns]
+    // this includes the prefix bytes when reg or rgx are registers R8-R15
+    regNumber reg;
+    regNumber rgx;
+
+    // The idAddr field is a union and only some of the instruction formats use the iiaAddrMode variant
+    // these are IF_AWR_*, IF_ARD_*, IF_ARW_* and IF_*_ARD
+    // ideally these should really be the only idInsFmts that we see here
+    //  but we have some outliers to deal with:
+    //     emitIns_R_L adds IF_RWR_LABEL and calls emitInsSizeAM
+    //     emitInsRMW adds IF_MRW_CNS, IF_MRW_RRD, IF_MRW_SHF, and calls emitInsSizeAM
+
+    switch (id->idInsFmt())
+    {
+        case IF_RWR_LABEL:
+        case IF_MRW_CNS:
+        case IF_MRW_RRD:
+        case IF_MRW_SHF:
+            reg = REG_NA;
+            rgx = REG_NA;
+            break;
+
+        default:
+            reg = id->idAddr()->iiaAddrMode.amBaseReg;
+            rgx = id->idAddr()->iiaAddrMode.amIndxReg;
+            break;
+    }
+
+#ifdef RELOC_SUPPORT
+    if (id->idIsDspReloc())
+    {
+        dspInByte = false; // relocs can't be placed in a byte
+        dspIsZero = false; // relocs won't always be zero
+    }
+#endif
+
+    if (code & 0xFF000000)
+    {
+        size = 4;
+    }
+    else if (code & 0x00FF0000)
+    {
+        assert((attrSize == EA_4BYTE) || (attrSize == EA_PTRSIZE) // Only for x64
+               || (attrSize == EA_16BYTE)                         // only for x64
+               || (ins == INS_movzx) || (ins == INS_movsx));
+
+        size = 3;
+    }
+    else
+    {
+        size = 2;
+
+        // Most 16-bit operands will require a size prefix.
+        // This refers to 66h size prefix override.
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if FEATURE_STACK_FP_X87
+        if ((attrSize == EA_2BYTE) && (ins != INS_fldcw) && (ins != INS_fnstcw))
+#else  // FEATURE_STACK_FP_X87
+        if (attrSize == EA_2BYTE)
+#endif // FEATURE_STACK_FP_X87
+        {
+            size++;
+        }
+    }
+
+#ifdef _TARGET_AMD64_
+    size += emitGetVexPrefixAdjustedSize(ins, attrSize, code);
+
+    if (code & REX_PREFIX_MASK)
+    {
+        // REX prefix
+        size += emitGetRexPrefixSize(ins);
+    }
+    else if (TakesRexWPrefix(ins, attrSize))
+    {
+        // REX.W prefix
+        size += emitGetRexPrefixSize(ins);
+    }
+    else if (IsExtendedReg(reg, EA_PTRSIZE) || IsExtendedReg(rgx, EA_PTRSIZE) || IsExtendedReg(id->idReg1(), attrSize))
+    {
+        // Should have a REX byte
+        size += emitGetRexPrefixSize(ins);
+    }
+#endif // _TARGET_AMD64_
+
+    if (rgx == REG_NA)
+    {
+        /* The address is of the form "[reg+disp]" */
+
+        if (reg == REG_NA)
+        {
+            /* The address is of the form "[disp]" */
+
+            size += sizeof(INT32);
+
+#ifdef _TARGET_AMD64_
+            // If id is not marked for reloc, add 1 additional byte for SIB that follows disp32
+            if (!id->idIsDspReloc())
+            {
+                size++;
+            }
+#endif
+            return size;
+        }
+
+        // If the base register is ESP (or R12 on 64-bit systems), a SIB byte must be used.
+        if (baseRegisterRequiresSibByte(reg))
+        {
+            size++;
+        }
+
+        // If the base register is EBP (or R13 on 64-bit systems), a displacement is required.
+        // Otherwise, the displacement can be elided if it is zero.
+        if (dspIsZero && !baseRegisterRequiresDisplacement(reg))
+        {
+            return size;
+        }
+
+        /* Does the offset fit in a byte? */
+
+        if (dspInByte)
+        {
+            size += sizeof(char);
+        }
+        else
+        {
+            size += sizeof(INT32);
+        }
+    }
+    else
+    {
+        /* An index register is present */
+
+        size++;
+
+        /* Is the index value scaled? */
+
+        if (emitDecodeScale(id->idAddr()->iiaAddrMode.amScale) > 1)
+        {
+            /* Is there a base register? */
+
+            if (reg != REG_NA)
+            {
+                /* The address is "[reg + {2/4/8} * rgx + icon]" */
+
+                if (dspIsZero && !baseRegisterRequiresDisplacement(reg))
+                {
+                    /* The address is "[reg + {2/4/8} * rgx]" */
+                }
+                else
+                {
+                    /* The address is "[reg + {2/4/8} * rgx + disp]" */
+
+                    if (dspInByte)
+                    {
+                        size += sizeof(char);
+                    }
+                    else
+                    {
+                        size += sizeof(int);
+                    }
+                }
+            }
+            else
+            {
+                /* The address is "[{2/4/8} * rgx + icon]" */
+
+                size += sizeof(INT32);
+            }
+        }
+        else
+        {
+            if (dspIsZero && baseRegisterRequiresDisplacement(reg) && !baseRegisterRequiresDisplacement(rgx))
+            {
+                /* Swap reg and rgx, such that reg is not EBP/R13 */
+                regNumber tmp                       = reg;
+                id->idAddr()->iiaAddrMode.amBaseReg = reg = rgx;
+                id->idAddr()->iiaAddrMode.amIndxReg = rgx = tmp;
+            }
+
+            /* The address is "[reg+rgx+dsp]" */
+
+            if (dspIsZero && !baseRegisterRequiresDisplacement(reg))
+            {
+                /* This is [reg+rgx]" */
+            }
+            else
+            {
+                /* This is [reg+rgx+dsp]" */
+
+                if (dspInByte)
+                {
+                    size += sizeof(char);
+                }
+                else
+                {
+                    size += sizeof(int);
+                }
+            }
+        }
+    }
+
+    return size;
+}
+
+inline UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, size_t code, int val)
+{
+    instruction    ins       = id->idIns();
+    UNATIVE_OFFSET valSize   = EA_SIZE_IN_BYTES(id->idOpSize());
+    bool           valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
+
+#ifdef _TARGET_AMD64_
+    // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
+    // all other opcodes take a sign-extended 4-byte immediate
+    noway_assert(valSize <= sizeof(INT32) || !id->idIsCnsReloc());
+#endif // _TARGET_AMD64_
+
+    if (valSize > sizeof(INT32))
+    {
+        valSize = sizeof(INT32);
+    }
+
+#ifdef RELOC_SUPPORT
+    if (id->idIsCnsReloc())
+    {
+        valInByte = false; // relocs can't be placed in a byte
+        assert(valSize == sizeof(INT32));
+    }
+#endif
+
+    if (valInByte)
+    {
+        valSize = sizeof(char);
+    }
+
+    return valSize + emitInsSizeAM(id, code);
+}
+
+inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, size_t code)
+{
+    instruction ins = id->idIns();
+
+    // fgMorph changes any statics that won't fit into 32-bit addresses
+    // into constants with an indir, rather than GT_CLS_VAR
+    // so we should only hit this path for statics that are RIP-relative
+    UNATIVE_OFFSET size = sizeof(INT32);
+
+    // Most 16-bit operand instructions will need a prefix.
+    // This refers to 66h size prefix override.
+
+    if (id->idOpSize() == EA_2BYTE && ins != INS_movzx && ins != INS_movsx)
+    {
+        size++;
+    }
+
+    return size + emitInsSize(code);
+}
+
+inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, size_t code, int val)
+{
+    instruction    ins       = id->idIns();
+    UNATIVE_OFFSET valSize   = EA_SIZE_IN_BYTES(id->idOpSize());
+    bool           valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
+
+#ifndef _TARGET_AMD64_
+    // occasionally longs get here on x86
+    if (valSize > sizeof(INT32))
+        valSize = sizeof(INT32);
+#endif // !_TARGET_AMD64_
+
+#ifdef RELOC_SUPPORT
+    if (id->idIsCnsReloc())
+    {
+        valInByte = false; // relocs can't be placed in a byte
+        assert(valSize == sizeof(INT32));
+    }
+#endif
+
+    if (valInByte)
+    {
+        valSize = sizeof(char);
+    }
+
+    return valSize + emitInsSizeCV(id, code);
+}
+
+/*****************************************************************************
+ *
+ *  Allocate instruction descriptors for instructions with address modes.
+ */
+
+inline emitter::instrDesc* emitter::emitNewInstrAmd(emitAttr size, ssize_t dsp)
+{
+    if (dsp < AM_DISP_MIN || dsp > AM_DISP_MAX)
+    {
+        instrDescAmd* id = emitAllocInstrAmd(size);
+
+        id->idSetIsLargeDsp();
+#ifdef DEBUG
+        id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL;
+#endif
+        id->idaAmdVal = dsp;
+
+        return id;
+    }
+    else
+    {
+        instrDesc* id = emitAllocInstr(size);
+
+        id->idAddr()->iiaAddrMode.amDisp = dsp;
+        assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit
+
+        return id;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Set the displacement field in an instruction. Only handles instrDescAmd type.
+ */
+
+inline void emitter::emitSetAmdDisp(instrDescAmd* id, ssize_t dsp)
+{
+    if (dsp < AM_DISP_MIN || dsp > AM_DISP_MAX)
+    {
+        id->idSetIsLargeDsp();
+#ifdef DEBUG
+        id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL;
+#endif
+        id->idaAmdVal = dsp;
+    }
+    else
+    {
+        id->idSetIsSmallDsp();
+        id->idAddr()->iiaAddrMode.amDisp = dsp;
+        assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Allocate an instruction descriptor for an instruction that uses both
+ *  an address mode displacement and a constant.
+ */
+
+emitter::instrDesc* emitter::emitNewInstrAmdCns(emitAttr size, ssize_t dsp, int cns)
+{
+    if (dsp >= AM_DISP_MIN && dsp <= AM_DISP_MAX)
+    {
+        if (cns >= ID_MIN_SMALL_CNS && cns <= ID_MAX_SMALL_CNS)
+        {
+            instrDesc* id = emitAllocInstr(size);
+
+            id->idSmallCns(cns);
+
+            id->idAddr()->iiaAddrMode.amDisp = dsp;
+            assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit
+
+            return id;
+        }
+        else
+        {
+            instrDescCns* id = emitAllocInstrCns(size);
+
+            id->idSetIsLargeCns();
+            id->idcCnsVal = cns;
+
+            id->idAddr()->iiaAddrMode.amDisp = dsp;
+            assert(id->idAddr()->iiaAddrMode.amDisp == dsp); // make sure the value fit
+
+            return id;
+        }
+    }
+    else
+    {
+        if (cns >= ID_MIN_SMALL_CNS && cns <= ID_MAX_SMALL_CNS)
+        {
+            instrDescAmd* id = emitAllocInstrAmd(size);
+
+            id->idSetIsLargeDsp();
+#ifdef DEBUG
+            id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL;
+#endif
+            id->idaAmdVal = dsp;
+
+            id->idSmallCns(cns);
+
+            return id;
+        }
+        else
+        {
+            instrDescCnsAmd* id = emitAllocInstrCnsAmd(size);
+
+            id->idSetIsLargeCns();
+            id->idacCnsVal = cns;
+
+            id->idSetIsLargeDsp();
+#ifdef DEBUG
+            id->idAddr()->iiaAddrMode.amDisp = AM_DISP_BIG_VAL;
+#endif
+            id->idacAmdVal = dsp;
+
+            return id;
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  The next instruction will be a loop head entry point
+ *  So insert a dummy instruction here to ensure that
+ *  the x86 I-cache alignment rule is followed.
+ */
+
+void emitter::emitLoopAlign()
+{
+    /* Insert a pseudo-instruction to ensure that we align
+       the next instruction properly */
+
+    instrDesc* id = emitNewInstrTiny(EA_1BYTE);
+    id->idIns(INS_align);
+    id->idCodeSize(15); // We may need to skip up to 15 bytes of code
+    emitCurIGsize += 15;
+}
+
+/*****************************************************************************
+ *
+ *  Add a NOP instruction of the given size.
+ */
+
+void emitter::emitIns_Nop(unsigned size)
+{
+    assert(size <= 15);
+
+    instrDesc* id = emitNewInstr();
+    id->idIns(INS_nop);
+    id->idInsFmt(IF_NONE);
+    id->idCodeSize(size);
+
+    dispIns(id);
+    emitCurIGsize += size;
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction with no operands.
+ */
+#ifdef DEBUG
+static bool isX87InsWithNoOperands(instruction ins)
+{
+#if FEATURE_STACK_FP_X87
+    return (ins == INS_f2xm1 || ins == INS_fchs || ins == INS_fld1 || ins == INS_fld1 || ins == INS_fldl2e ||
+            ins == INS_fldz || ins == INS_fprem || ins == INS_frndint || ins == INS_fscale);
+#else  // !FEATURE_STACK_FP_X87
+    return false;
+#endif // !FEATURE_STACK_FP_X87
+}
+#endif // DEBUG
+
+void emitter::emitIns(instruction ins)
+{
+    UNATIVE_OFFSET sz;
+    instrDesc*     id   = emitNewInstr();
+    size_t         code = insCodeMR(ins);
+
+#ifdef DEBUG
+#if FEATURE_STACK_FP_X87
+    if (ins != INS_fabs && ins != INS_fsqrt && ins != INS_fsin && ins != INS_fcos)
+#endif // FEATURE_STACK_FP_X87
+
+    {
+        // We cannot have #ifdef inside macro expansion.
+        bool assertCond = (ins == INS_cdq || isX87InsWithNoOperands(ins) || ins == INS_int3 || ins == INS_lock ||
+                           ins == INS_leave || ins == INS_movsb || ins == INS_movsd || ins == INS_movsp ||
+                           ins == INS_nop || ins == INS_r_movsb || ins == INS_r_movsd || ins == INS_r_movsp ||
+                           ins == INS_r_stosb || ins == INS_r_stosd || ins == INS_r_stosp || ins == INS_ret ||
+                           ins == INS_sahf || ins == INS_stosb || ins == INS_stosd || ins == INS_stosp
+#ifndef LEGACY_BACKEND
+                           || ins == INS_vzeroupper
+#endif
+                           );
+
+        assert(assertCond);
+    }
+#endif // DEBUG
+
+#ifdef _TARGET_AMD64_
+    assert((code & REX_PREFIX_MASK) == 0); // Can't have a REX bit with no operands, right?
+#endif                                     // _TARGET_AMD64_
+
+    if (code & 0xFF000000)
+    {
+        sz = 2; // TODO-XArch-Bug?: Shouldn't this be 4? Or maybe we should assert that we don't see this case.
+    }
+    else if (code & 0x00FF0000)
+    {
+        sz = 3;
+    }
+    else if (code & 0x0000FF00)
+    {
+        sz = 2;
+    }
+    else
+    {
+        sz = 1;
+    }
+
+#ifndef LEGACY_BACKEND
+    // Account for 2-byte VEX prefix in case of vzeroupper
+    if (ins == INS_vzeroupper)
+    {
+        sz += 2;
+    }
+#endif
+
+    insFormat fmt = IF_NONE;
+
+#if FEATURE_STACK_FP_X87
+    if (CodeGen::instIsFP(ins))
+    {
+        fmt = emitInsModeFormat(ins, IF_TRD);
+    }
+#endif // FEATURE_STACK_FP_X87
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
+#if !defined(LEGACY_BACKEND)
+// Add an instruction with no operands, but whose encoding depends on the size
+// (Only CDQ/CQO currently)
+void emitter::emitIns(instruction ins, emitAttr attr)
+{
+    UNATIVE_OFFSET sz;
+    instrDesc*     id   = emitNewInstr(attr);
+    size_t         code = insCodeMR(ins);
+    assert(ins == INS_cdq);
+    assert((code & 0xFFFFFF00) == 0);
+    sz = 1;
+
+    insFormat fmt = IF_NONE;
+
+    sz += emitGetVexPrefixAdjustedSize(ins, attr, code);
+    if (TakesRexWPrefix(ins, attr))
+    {
+        sz += emitGetRexPrefixSize(ins);
+    }
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
+//------------------------------------------------------------------------
+// emitMapFmtForIns: map the instruction format based on the instruction.
+// Shift-by-a-constant instructions have a special format.
+//
+// Arguments:
+//    fmt - the instruction format to map
+//    ins - the instruction
+//
+// Returns:
+//    The mapped instruction format.
+//
+emitter::insFormat emitter::emitMapFmtForIns(insFormat fmt, instruction ins)
+{
+    switch (ins)
+    {
+        case INS_rol_N:
+        case INS_ror_N:
+        case INS_rcl_N:
+        case INS_rcr_N:
+        case INS_shl_N:
+        case INS_shr_N:
+        case INS_sar_N:
+        {
+            switch (fmt)
+            {
+                case IF_RRW_CNS:
+                    return IF_RRW_SHF;
+                case IF_MRW_CNS:
+                    return IF_MRW_SHF;
+                case IF_SRW_CNS:
+                    return IF_SRW_SHF;
+                case IF_ARW_CNS:
+                    return IF_ARW_SHF;
+                default:
+                    unreached();
+            }
+        }
+
+        default:
+            return fmt;
+    }
+}
+
+//------------------------------------------------------------------------
+// emitMapFmtAtoM: map the address mode formats ARD, ARW, and AWR to their direct address equivalents.
+//
+// Arguments:
+//    fmt - the instruction format to map
+//
+// Returns:
+//    The mapped instruction format.
+//
+emitter::insFormat emitter::emitMapFmtAtoM(insFormat fmt)
+{
+    switch (fmt)
+    {
+        case IF_ARD:
+            return IF_MRD;
+        case IF_AWR:
+            return IF_MWR;
+        case IF_ARW:
+            return IF_MRW;
+
+        case IF_RRD_ARD:
+            return IF_RRD_MRD;
+        case IF_RWR_ARD:
+            return IF_RWR_MRD;
+        case IF_RRW_ARD:
+            return IF_RRW_MRD;
+
+        case IF_ARD_RRD:
+            return IF_MRD_RRD;
+        case IF_AWR_RRD:
+            return IF_MWR_RRD;
+        case IF_ARW_RRD:
+            return IF_MRW_RRD;
+
+        case IF_ARD_CNS:
+            return IF_MRD_CNS;
+        case IF_AWR_CNS:
+            return IF_MWR_CNS;
+        case IF_ARW_CNS:
+            return IF_MRW_CNS;
+
+        case IF_ARW_SHF:
+            return IF_MRW_SHF;
+
+        default:
+            unreached();
+    }
+}
+
+//------------------------------------------------------------------------
+// emitHandleMemOp: For a memory operand, fill in the relevant fields of the instrDesc.
+//
+// Arguments:
+//    indir - the memory operand.
+//    id - the instrDesc to fill in.
+//    fmt - the instruction format to use. This must be one of the ARD, AWR, or ARW formats. If necessary (such as for
+//          GT_CLS_VAR_ADDR), this function will map it to the correct format.
+//    ins - the instruction we are generating. This might affect the instruction format we choose.
+//
+// Assumptions:
+//    The correctly sized instrDesc must already be created, e.g., via emitNewInstrAmd() or emitNewInstrAmdCns();
+//
+// Post-conditions:
+//    For base address of int constant:
+//        -- the caller must have added the int constant base to the instrDesc when creating it via
+//           emitNewInstrAmdCns().
+//    For simple address modes (base + scale * index + offset):
+//        -- the base register, index register, and scale factor are set.
+//        -- the caller must have added the addressing mode offset int constant to the instrDesc when creating it via
+//           emitNewInstrAmdCns().
+//
+//    The instruction format is set.
+//
+//    idSetIsDspReloc() is called if necessary.
+//
+void emitter::emitHandleMemOp(GenTreeIndir* indir, instrDesc* id, insFormat fmt, instruction ins)
+{
+    assert(fmt != IF_NONE);
+
+    GenTree* memBase = indir->Base();
+
+    if ((memBase != nullptr) && memBase->isContained() && (memBase->OperGet() == GT_CLS_VAR_ADDR))
+    {
+        CORINFO_FIELD_HANDLE fldHnd = memBase->gtClsVar.gtClsVarHnd;
+
+        // Static always need relocs
+        if (!jitStaticFldIsGlobAddr(fldHnd))
+        {
+            // Contract:
+            // fgMorphField() changes any statics that won't fit into 32-bit addresses into
+            // constants with an indir, rather than GT_CLS_VAR, based on reloc type hint given
+            // by VM. Hence emitter should always mark GT_CLS_VAR_ADDR as relocatable.
+            //
+            // Data section constants: these get allocated close to code block of the method and
+            // always addressable IP relative.  These too should be marked as relocatable.
+
+            id->idSetIsDspReloc();
+        }
+
+        id->idAddr()->iiaFieldHnd = fldHnd;
+        id->idInsFmt(emitMapFmtForIns(emitMapFmtAtoM(fmt), ins));
+    }
+    else if ((memBase != nullptr) && memBase->IsCnsIntOrI() && memBase->isContained())
+    {
+        // Absolute addresses marked as contained should fit within the base of addr mode.
+        assert(memBase->AsIntConCommon()->FitsInAddrBase(emitComp));
+
+        // Either not generating relocatable code or addr must be an icon handle
+        assert(!emitComp->opts.compReloc || memBase->IsIconHandle());
+
+        if (memBase->AsIntConCommon()->AddrNeedsReloc(emitComp))
+        {
+            id->idSetIsDspReloc();
+        }
+
+        id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
+        id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
+        id->idAddr()->iiaAddrMode.amScale   = emitter::OPSZ1; // for completeness
+
+        id->idInsFmt(emitMapFmtForIns(fmt, ins));
+
+        // Absolute address must have already been set in the instrDesc constructor.
+        assert(emitGetInsAmdAny(id) == memBase->AsIntConCommon()->IconValue());
+    }
+    else
+    {
+        if (memBase != nullptr)
+        {
+            id->idAddr()->iiaAddrMode.amBaseReg = memBase->gtRegNum;
+        }
+        else
+        {
+            id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
+        }
+
+        if (indir->HasIndex())
+        {
+            id->idAddr()->iiaAddrMode.amIndxReg = indir->Index()->gtRegNum;
+        }
+        else
+        {
+            id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
+        }
+        id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(indir->Scale());
+
+        id->idInsFmt(emitMapFmtForIns(fmt, ins));
+
+        // disp must have already been set in the instrDesc constructor.
+        assert(emitGetInsAmdAny(id) == ssize_t(indir->Offset())); // make sure "disp" is stored properly
+    }
+}
+
+// Takes care of storing all incoming register parameters
+// into its corresponding shadow space (defined by the x64 ABI)
+void emitter::spillIntArgRegsToShadowSlots()
+{
+    unsigned       argNum;
+    instrDesc*     id;
+    UNATIVE_OFFSET sz;
+
+    assert(emitComp->compGeneratingProlog);
+
+    for (argNum = 0; argNum < MAX_REG_ARG; ++argNum)
+    {
+        regNumber argReg = intArgRegs[argNum];
+
+        // The offsets for the shadow space start at RSP + 8
+        // (right before the caller return address)
+        int offset = (argNum + 1) * EA_PTRSIZE;
+
+        id = emitNewInstrAmd(EA_PTRSIZE, offset);
+        id->idIns(INS_mov);
+        id->idInsFmt(IF_AWR_RRD);
+        id->idAddr()->iiaAddrMode.amBaseReg = REG_SPBASE;
+        id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
+        id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(1);
+
+        // The offset has already been set in the intrDsc ctor,
+        // make sure we got it right.
+        assert(emitGetInsAmdAny(id) == ssize_t(offset));
+
+        id->idReg1(argReg);
+        sz = emitInsSizeAM(id, insCodeMR(INS_mov));
+        id->idCodeSize(sz);
+        emitCurIGsize += sz;
+    }
+}
+
+// this is very similar to emitInsBinary and probably could be folded in to same
+// except the requirements on the incoming parameter are different,
+// ex: the memory op in storeind case must NOT be contained
+void emitter::emitInsMov(instruction ins, emitAttr attr, GenTree* node)
+{
+    UNATIVE_OFFSET sz;
+    instrDesc*     id;
+
+    switch (node->OperGet())
+    {
+        case GT_IND:
+        {
+            GenTreeIndir* mem  = node->AsIndir();
+            GenTreePtr    addr = mem->Addr();
+
+            if (addr->OperGet() == GT_CLS_VAR_ADDR)
+            {
+                emitIns_R_C(ins, attr, mem->gtRegNum, addr->gtClsVar.gtClsVarHnd, 0);
+                return;
+            }
+            else if (addr->OperGet() == GT_LCL_VAR_ADDR)
+            {
+                GenTreeLclVarCommon* varNode = addr->AsLclVarCommon();
+                emitIns_R_S(ins, attr, mem->gtRegNum, varNode->GetLclNum(), 0);
+                codeGen->genUpdateLife(varNode);
+                return;
+            }
+            else
+            {
+                assert(addr->OperIsAddrMode() || (addr->IsCnsIntOrI() && addr->isContained()) || !addr->isContained());
+                size_t offset = mem->Offset();
+                id            = emitNewInstrAmd(attr, offset);
+                id->idIns(ins);
+                id->idReg1(mem->gtRegNum);
+                emitHandleMemOp(mem, id, IF_RWR_ARD, ins);
+                sz = emitInsSizeAM(id, insCodeRM(ins));
+                id->idCodeSize(sz);
+            }
+        }
+        break;
+
+        case GT_STOREIND:
+        {
+            GenTreeStoreInd* mem    = node->AsStoreInd();
+            GenTreePtr       addr   = mem->Addr();
+            size_t           offset = mem->Offset();
+            GenTree*         data   = mem->Data();
+
+            if (addr->OperGet() == GT_CLS_VAR_ADDR)
+            {
+                if (data->isContainedIntOrIImmed())
+                {
+                    emitIns_C_I(ins, attr, addr->gtClsVar.gtClsVarHnd, 0, (int)data->AsIntConCommon()->IconValue());
+                }
+                else
+                {
+                    assert(!data->isContained());
+                    emitIns_C_R(ins, attr, addr->gtClsVar.gtClsVarHnd, data->gtRegNum, 0);
+                }
+                return;
+            }
+            else if (addr->OperGet() == GT_LCL_VAR_ADDR)
+            {
+                GenTreeLclVarCommon* varNode = addr->AsLclVarCommon();
+                if (data->isContainedIntOrIImmed())
+                {
+                    emitIns_S_I(ins, attr, varNode->GetLclNum(), 0, (int)data->AsIntConCommon()->IconValue());
+                }
+                else
+                {
+                    assert(!data->isContained());
+                    emitIns_S_R(ins, attr, data->gtRegNum, varNode->GetLclNum(), 0);
+                }
+                codeGen->genUpdateLife(varNode);
+                return;
+            }
+            else if (data->isContainedIntOrIImmed())
+            {
+                int icon = (int)data->AsIntConCommon()->IconValue();
+                id       = emitNewInstrAmdCns(attr, offset, icon);
+                id->idIns(ins);
+                emitHandleMemOp(mem, id, IF_AWR_CNS, ins);
+                sz = emitInsSizeAM(id, insCodeMI(ins), icon);
+                id->idCodeSize(sz);
+            }
+            else
+            {
+                assert(!data->isContained());
+                id = emitNewInstrAmd(attr, offset);
+                id->idIns(ins);
+                emitHandleMemOp(mem, id, IF_AWR_RRD, ins);
+                id->idReg1(data->gtRegNum);
+                sz = emitInsSizeAM(id, insCodeMR(ins));
+                id->idCodeSize(sz);
+            }
+        }
+        break;
+
+        case GT_STORE_LCL_VAR:
+        {
+            GenTreeLclVarCommon* varNode = node->AsLclVarCommon();
+            GenTree*             data    = varNode->gtOp.gtOp1->gtEffectiveVal();
+            codeGen->inst_set_SV_var(varNode);
+            assert(varNode->gtRegNum == REG_NA); // stack store
+
+            if (data->isContainedIntOrIImmed())
+            {
+                emitIns_S_I(ins, attr, varNode->GetLclNum(), 0, (int)data->AsIntConCommon()->IconValue());
+            }
+            else
+            {
+                assert(!data->isContained());
+                emitIns_S_R(ins, attr, data->gtRegNum, varNode->GetLclNum(), 0);
+            }
+            codeGen->genUpdateLife(varNode);
+        }
+            return;
+
+        default:
+            unreached();
+    }
+
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
+CORINFO_FIELD_HANDLE emitter::emitLiteralConst(ssize_t cnsValIn, emitAttr attr /*= EA_8BYTE*/)
+{
+    NYI("emitLiteralConst");
+    return nullptr;
+}
+
+// Generates a float or double data section constant and returns field handle representing
+// the data offset to access the constant.  This is called by emitInsBinary() in case
+// of contained float of double constants.
+CORINFO_FIELD_HANDLE emitter::emitFltOrDblConst(GenTreeDblCon* tree, emitAttr attr /*=EA_UNKNOWN*/)
+{
+    if (attr == EA_UNKNOWN)
+    {
+        attr = emitTypeSize(tree->TypeGet());
+    }
+    else
+    {
+        assert(emitTypeSize(tree->TypeGet()) == attr);
+    }
+
+    double constValue = tree->gtDblCon.gtDconVal;
+    void*  cnsAddr;
+    float  f;
+    bool   dblAlign;
+
+    if (attr == EA_4BYTE)
+    {
+        f        = forceCastToFloat(constValue);
+        cnsAddr  = &f;
+        dblAlign = false;
+    }
+    else
+    {
+        cnsAddr  = &constValue;
+        dblAlign = true;
+    }
+
+    // Access to inline data is 'abstracted' by a special type of static member
+    // (produced by eeFindJitDataOffs) which the emitter recognizes as being a reference
+    // to constant data, not a real static field.
+
+    UNATIVE_OFFSET cnsSize = (attr == EA_4BYTE) ? 4 : 8;
+    UNATIVE_OFFSET cnum    = emitDataConst(cnsAddr, cnsSize, dblAlign);
+    return emitComp->eeFindJitDataOffs(cnum);
+}
+
+// The callee must call genConsumeReg() for all sources, including address registers
+// of both source and destination, and genProduceReg() for the destination register, if any.
+
+regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src)
+{
+    // dst can only be a reg or modrm
+    assert(!dst->isContained() || dst->isContainedMemoryOp() ||
+           instrIs3opImul(ins)); // dst on these isn't really the dst
+
+#ifdef DEBUG
+    // src can be anything but both src and dst cannot be addr modes
+    // or at least cannot be contained addr modes
+    if (dst->isContainedMemoryOp())
+    {
+        assert(!src->isContainedMemoryOp());
+    }
+
+    if (src->isContainedMemoryOp())
+    {
+        assert(!dst->isContainedMemoryOp());
+    }
+#endif
+
+    // find which operand is a memory op (if any)
+    // and what its base is
+    GenTreeIndir* mem     = nullptr;
+    GenTree*      memBase = nullptr;
+
+    if (dst->isContainedIndir())
+    {
+        mem = dst->AsIndir();
+    }
+    else if (src->isContainedIndir())
+    {
+        mem = src->AsIndir();
+    }
+
+    if (mem)
+    {
+        memBase = mem->gtOp1;
+    }
+
+    // Find immed (if any) - it cannot be the dst
+    // SSE2 instructions allow only the second operand to be a memory operand.
+    GenTreeIntConCommon* intConst = nullptr;
+    GenTreeDblCon*       dblConst = nullptr;
+    if (src->isContainedIntOrIImmed())
+    {
+        intConst = src->AsIntConCommon();
+    }
+    else if (src->isContainedFltOrDblImmed())
+    {
+        dblConst = src->AsDblCon();
+    }
+
+    // find local field if any
+    GenTreeLclFld* lclField = nullptr;
+    if (src->isContainedLclField())
+    {
+        lclField = src->AsLclFld();
+    }
+    else if (dst->isLclField() && dst->gtRegNum == REG_NA)
+    {
+        lclField = dst->AsLclFld();
+    }
+
+    // find contained lcl var if any
+    GenTreeLclVar* lclVar = nullptr;
+    if (src->isContainedLclVar())
+    {
+        assert(src->IsRegOptional());
+        lclVar = src->AsLclVar();
+    }
+    else if (dst->isContainedLclVar())
+    {
+        assert(dst->IsRegOptional());
+        lclVar = dst->AsLclVar();
+    }
+
+    // find contained spill tmp if any
+    TempDsc* tmpDsc = nullptr;
+    if (src->isContainedSpillTemp())
+    {
+        assert(src->IsRegOptional());
+        tmpDsc = codeGen->getSpillTempDsc(src);
+    }
+    else if (dst->isContainedSpillTemp())
+    {
+        assert(dst->IsRegOptional());
+        tmpDsc = codeGen->getSpillTempDsc(dst);
+    }
+
+    // First handle the simple non-memory cases
+    //
+    if ((mem == nullptr) && (lclField == nullptr) && (lclVar == nullptr) && (tmpDsc == nullptr))
+    {
+        if (intConst != nullptr)
+        {
+            // reg, immed
+            assert(!dst->isContained());
+
+            emitIns_R_I(ins, attr, dst->gtRegNum, intConst->IconValue());
+            // TODO-XArch-Bug?: does the caller call regTracker.rsTrackRegTrash(dst->gtRegNum) or
+            // rsTrackRegIntCns(dst->gtRegNum, intConst->IconValue()) (as appropriate)?
+        }
+        else if (dblConst != nullptr)
+        {
+            // Emit a data section constant for float or double constant.
+            CORINFO_FIELD_HANDLE hnd = emitFltOrDblConst(dblConst);
+
+            emitIns_R_C(ins, attr, dst->gtRegNum, hnd, 0);
+        }
+        else
+        {
+            // reg, reg
+            assert(!src->isContained() && !dst->isContained());
+
+            if (instrHasImplicitRegPairDest(ins))
+            {
+                emitIns_R(ins, attr, src->gtRegNum);
+            }
+            else
+            {
+                emitIns_R_R(ins, attr, dst->gtRegNum, src->gtRegNum);
+            }
+            // ToDo-XArch-Bug?: does the caller call regTracker.rsTrackRegTrash(dst->gtRegNum) or, for ins=MOV:
+            // regTracker.rsTrackRegCopy(dst->gtRegNum, src->gtRegNum); ?
+        }
+
+        return dst->gtRegNum;
+    }
+
+    // Next handle the cases where we have a stack based local memory operand.
+    //
+    unsigned varNum = BAD_VAR_NUM;
+    unsigned offset = (unsigned)-1;
+
+    if (lclField != nullptr)
+    {
+        varNum = lclField->AsLclVarCommon()->GetLclNum();
+        offset = lclField->gtLclFld.gtLclOffs;
+    }
+    else if (lclVar != nullptr)
+    {
+        varNum = lclVar->AsLclVarCommon()->GetLclNum();
+        offset = 0;
+    }
+    else if (tmpDsc != nullptr)
+    {
+        varNum = tmpDsc->tdTempNum();
+        offset = 0;
+    }
+
+    // Spill temp numbers are negative and start with -1
+    // which also happens to be BAD_VAR_NUM. For this reason
+    // we also need to check 'tmpDsc != nullptr' here.
+    if (varNum != BAD_VAR_NUM || tmpDsc != nullptr)
+    {
+        // Is the memory op in the source position?
+        if (src->isContainedLclField() || src->isContainedLclVar() || src->isContainedSpillTemp())
+        {
+            if (instrHasImplicitRegPairDest(ins))
+            {
+                // src is a stack based local variable
+                // dst is implicit - RDX:RAX
+                emitIns_S(ins, attr, varNum, offset);
+            }
+            else
+            {
+                // src is a stack based local variable
+                // dst is a register
+                emitIns_R_S(ins, attr, dst->gtRegNum, varNum, offset);
+            }
+        }
+        else // The memory op is in the dest position.
+        {
+            assert(dst->gtRegNum == REG_NA || dst->IsRegOptional());
+
+            // src could be int or reg
+            if (src->isContainedIntOrIImmed())
+            {
+                // src is an contained immediate
+                // dst is a stack based local variable
+                emitIns_S_I(ins, attr, varNum, offset, (int)src->gtIntConCommon.IconValue());
+            }
+            else
+            {
+                // src is a register
+                // dst is a stack based local variable
+                assert(!src->isContained());
+                emitIns_S_R(ins, attr, src->gtRegNum, varNum, offset);
+            }
+        }
+
+        if (tmpDsc != nullptr)
+        {
+            emitComp->tmpRlsTemp(tmpDsc);
+        }
+
+        return dst->gtRegNum;
+    }
+
+    // Now we are left with only the cases where the instruction has some kind of a memory operand
+    //
+    assert(mem != nullptr);
+
+    // Next handle the class static variable cases
+    //
+    if (memBase->OperGet() == GT_CLS_VAR_ADDR)
+    {
+        // Is the memory op in the source position?
+        if (mem == src)
+        {
+            if (instrHasImplicitRegPairDest(ins))
+            {
+                // src is a class static variable
+                // dst is implicit - RDX:RAX
+                emitIns_C(ins, attr, memBase->gtClsVar.gtClsVarHnd, 0);
+            }
+            else
+            {
+                // src is a class static variable
+                // dst is a register
+                emitIns_R_C(ins, attr, dst->gtRegNum, memBase->gtClsVar.gtClsVarHnd, 0);
+            }
+        }
+        else // The memory op is in the dest position.
+        {
+            if (src->isContained())
+            {
+                // src is an contained immediate
+                // dst is a class static variable
+                emitIns_C_I(ins, attr, memBase->gtClsVar.gtClsVarHnd, 0, (int)src->gtIntConCommon.IconValue());
+            }
+            else
+            {
+                // src is a register
+                // dst is a class static variable
+                emitIns_C_R(ins, attr, memBase->gtClsVar.gtClsVarHnd, src->gtRegNum, 0);
+            }
+        }
+
+        return dst->gtRegNum;
+    }
+
+    // Finally we handle addressing modes case [regBase + regIndex*scale + const]
+    //
+    // We will have to construct and fill in the instruction descriptor for this case
+    //
+    instrDesc* id = nullptr;
+
+    // Is the src an immediate constant?
+    if (intConst)
+    {
+        // [mem], imm
+        id = emitNewInstrAmdCns(attr, mem->Offset(), (int)intConst->IconValue());
+    }
+    else // [mem], reg OR reg, [mem]
+    {
+        size_t offset = mem->Offset();
+        id            = emitNewInstrAmd(attr, offset);
+        id->idIns(ins);
+
+        GenTree* regTree = (src == mem) ? dst : src;
+
+        // there must be one non-contained src
+        assert(!regTree->isContained());
+        id->idReg1(regTree->gtRegNum);
+    }
+    assert(id != nullptr);
+
+    id->idIns(ins); // Set the instruction.
+
+    // Determine the instruction format
+    //
+    insFormat fmt = IF_NONE;
+    if (mem == dst)
+    {
+        if (!src->isContained())
+        {
+            fmt = emitInsModeFormat(ins, IF_ARD_RRD);
+        }
+        else
+        {
+            fmt = emitInsModeFormat(ins, IF_ARD_CNS);
+        }
+    }
+    else
+    {
+        assert(!dst->isContained());
+        if (instrHasImplicitRegPairDest(ins))
+        {
+            fmt = emitInsModeFormat(ins, IF_ARD);
+        }
+        else
+        {
+            fmt = emitInsModeFormat(ins, IF_RRD_ARD);
+        }
+    }
+    assert(fmt != IF_NONE);
+    emitHandleMemOp(mem, id, fmt, ins);
+
+    // Determine the instruction size
+    //
+    UNATIVE_OFFSET sz = 0;
+    if (intConst)
+    {
+        sz = emitInsSizeAM(id, insCodeMI(ins), (int)intConst->IconValue());
+    }
+    else
+    {
+        if (mem == dst)
+        {
+            sz = emitInsSizeAM(id, insCodeMR(ins));
+        }
+        else // mem == src
+        {
+            if (instrHasImplicitRegPairDest(ins))
+            {
+                sz = emitInsSizeAM(id, insCode(ins));
+            }
+            else
+            {
+                sz = emitInsSizeAM(id, insCodeRM(ins));
+            }
+        }
+    }
+    assert(sz != 0);
+
+    regNumber result = REG_NA;
+    if (src == mem)
+    {
+        result = dst->gtRegNum;
+    }
+
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+
+    return result;
+}
+
+//------------------------------------------------------------------------
+// emitInsRMW: Emit logic for Read-Modify-Write binary instructions.
+//
+// Responsible for emitting a single instruction that will perform an operation of the form:
+//      *addr = *addr <BinOp> src
+// For example:
+//      ADD [RAX], RCX
+//
+// Arguments:
+//    ins - instruction to generate
+//    attr - emitter attribute for instruction
+//    storeInd - indir for RMW addressing mode
+//    src - source operand of instruction
+//
+// Assumptions:
+//    Lowering has taken care of recognizing the StoreInd pattern of:
+//          StoreInd( AddressTree, BinOp( Ind ( AddressTree ), Operand ) )
+//    The address to store is already sitting in a register.
+//
+// Notes:
+//    This is a no-produce operation, meaning that no register output will
+//    be produced for future use in the code stream.
+//
+void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTreeStoreInd* storeInd, GenTree* src)
+{
+    GenTreePtr addr = storeInd->Addr();
+    addr            = addr->gtSkipReloadOrCopy();
+    assert(addr->OperGet() == GT_LCL_VAR || addr->OperGet() == GT_LCL_VAR_ADDR || addr->OperGet() == GT_LEA ||
+           addr->OperGet() == GT_CLS_VAR_ADDR || addr->OperGet() == GT_CNS_INT);
+
+    instrDesc*     id = nullptr;
+    UNATIVE_OFFSET sz;
+
+    size_t offset = 0;
+    if (addr->OperGet() != GT_CLS_VAR_ADDR)
+    {
+        offset = storeInd->Offset();
+    }
+
+    if (src->isContainedIntOrIImmed())
+    {
+        GenTreeIntConCommon* intConst = src->AsIntConCommon();
+        id                            = emitNewInstrAmdCns(attr, offset, (int)intConst->IconValue());
+        emitHandleMemOp(storeInd, id, IF_ARW_CNS, ins);
+        id->idIns(ins);
+        sz = emitInsSizeAM(id, insCodeMI(ins), (int)intConst->IconValue());
+    }
+    else
+    {
+        assert(!src->isContained()); // there must be one non-contained src
+
+        // ind, reg
+        id = emitNewInstrAmd(attr, offset);
+        emitHandleMemOp(storeInd, id, IF_ARW_RRD, ins);
+        id->idReg1(src->gtRegNum);
+        id->idIns(ins);
+        sz = emitInsSizeAM(id, insCodeMR(ins));
+    }
+
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
+//------------------------------------------------------------------------
+// emitInsRMW: Emit logic for Read-Modify-Write unary instructions.
+//
+// Responsible for emitting a single instruction that will perform an operation of the form:
+//      *addr = UnaryOp *addr
+// For example:
+//      NOT [RAX]
+//
+// Arguments:
+//    ins - instruction to generate
+//    attr - emitter attribute for instruction
+//    storeInd - indir for RMW addressing mode
+//
+// Assumptions:
+//    Lowering has taken care of recognizing the StoreInd pattern of:
+//          StoreInd( AddressTree, UnaryOp( Ind ( AddressTree ) ) )
+//    The address to store is already sitting in a register.
+//
+// Notes:
+//    This is a no-produce operation, meaning that no register output will
+//    be produced for future use in the code stream.
+//
+void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTreeStoreInd* storeInd)
+{
+    GenTreePtr addr = storeInd->Addr();
+    addr            = addr->gtSkipReloadOrCopy();
+    assert(addr->OperGet() == GT_LCL_VAR || addr->OperGet() == GT_LCL_VAR_ADDR || addr->OperGet() == GT_CLS_VAR_ADDR ||
+           addr->OperGet() == GT_LEA || addr->OperGet() == GT_CNS_INT);
+
+    size_t offset = 0;
+    if (addr->OperGet() != GT_CLS_VAR_ADDR)
+    {
+        offset = storeInd->Offset();
+    }
+
+    instrDesc* id = emitNewInstrAmd(attr, offset);
+    emitHandleMemOp(storeInd, id, IF_ARW, ins);
+    id->idIns(ins);
+    UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins));
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
+#endif // !LEGACY_BACKEND
+
+#if FEATURE_STACK_FP_X87
+/*****************************************************************************
+ *
+ *  Add an instruction of the form "op ST(0),ST(n)".
+ */
+
+void emitter::emitIns_F0_F(instruction ins, unsigned fpreg)
+{
+    UNATIVE_OFFSET sz  = 2;
+    instrDesc*     id  = emitNewInstr();
+    insFormat      fmt = emitInsModeFormat(ins, IF_TRD_FRD);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idReg1((regNumber)fpreg);
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction of the form "op ST(n),ST(0)".
+ */
+
+void emitter::emitIns_F_F0(instruction ins, unsigned fpreg)
+{
+    UNATIVE_OFFSET sz  = 2;
+    instrDesc*     id  = emitNewInstr();
+    insFormat      fmt = emitInsModeFormat(ins, IF_FRD_TRD);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idReg1((regNumber)fpreg);
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+#endif // FEATURE_STACK_FP_X87
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing a single register.
+ */
+
+void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg)
+{
+    emitAttr size = EA_SIZE(attr);
+
+    assert(size <= EA_PTRSIZE);
+    noway_assert(emitVerifyEncodable(ins, size, reg));
+
+    UNATIVE_OFFSET sz;
+    instrDesc*     id = emitNewInstrTiny(attr);
+
+    switch (ins)
+    {
+        case INS_inc:
+        case INS_dec:
+#ifdef _TARGET_AMD64_
+
+            sz = 2; // x64 has no 1-byte opcode (it is the same encoding as the REX prefix)
+
+#else // !_TARGET_AMD64_
+
+            if (size == EA_1BYTE)
+                sz = 2; // Use the long form as the small one has no 'w' bit
+            else
+                sz    = 1; // Use short form
+
+#endif // !_TARGET_AMD64_
+
+            break;
+
+        case INS_pop:
+        case INS_pop_hide:
+        case INS_push:
+        case INS_push_hide:
+
+            /* We don't currently push/pop small values */
+
+            assert(size == EA_PTRSIZE);
+
+            sz = 1;
+            break;
+
+        default:
+
+            /* All the sixteen INS_setCCs are contiguous. */
+
+            if (INS_seto <= ins && ins <= INS_setg)
+            {
+                // Rough check that we used the endpoints for the range check
+
+                assert(INS_seto + 0xF == INS_setg);
+
+                // The caller must specify EA_1BYTE for 'attr'
+
+                assert(attr == EA_1BYTE);
+
+                /* We expect this to always be a 'big' opcode */
+
+                assert(insEncodeMRreg(ins, reg, attr, insCodeMR(ins)) & 0x00FF0000);
+
+                size = attr;
+
+                sz = 3;
+                break;
+            }
+            else
+            {
+                sz = 2;
+                break;
+            }
+    }
+    insFormat fmt = emitInsModeFormat(ins, IF_RRD);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idReg1(reg);
+
+    // 16-bit operand instructions will need a prefix.
+    // This refers to 66h size prefix override.
+    if (size == EA_2BYTE)
+    {
+        sz += 1;
+    }
+
+    // Vex bytes
+    sz += emitGetVexPrefixAdjustedSize(ins, attr, insEncodeMRreg(ins, reg, attr, insCodeMR(ins)));
+
+    // REX byte
+    if (IsExtendedReg(reg, attr) || TakesRexWPrefix(ins, attr))
+    {
+        sz += emitGetRexPrefixSize(ins);
+    }
+
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+
+#if !FEATURE_FIXED_OUT_ARGS
+
+    if (ins == INS_push)
+    {
+        emitCurStackLvl += emitCntStackDepth;
+
+        if (emitMaxStackDepth < emitCurStackLvl)
+            emitMaxStackDepth = emitCurStackLvl;
+    }
+    else if (ins == INS_pop)
+    {
+        emitCurStackLvl -= emitCntStackDepth;
+        assert((int)emitCurStackLvl >= 0);
+    }
+
+#endif // !FEATURE_FIXED_OUT_ARGS
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing a register and a constant.
+ */
+
+void emitter::emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t val)
+{
+    emitAttr size = EA_SIZE(attr);
+
+    // Allow emitting SSE2/AVX SIMD instructions of R_I form that can specify EA_16BYTE or EA_32BYTE
+    assert(size <= EA_PTRSIZE || IsSSEOrAVXInstruction(ins));
+
+    noway_assert(emitVerifyEncodable(ins, size, reg));
+
+#ifdef _TARGET_AMD64_
+    // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
+    // all other opcodes take a sign-extended 4-byte immediate
+    noway_assert(size < EA_8BYTE || ins == INS_mov || ((int)val == val && !EA_IS_CNS_RELOC(attr)));
+#endif
+
+    UNATIVE_OFFSET sz;
+    instrDesc*     id;
+    insFormat      fmt       = emitInsModeFormat(ins, IF_RRD_CNS);
+    bool           valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
+
+    // Figure out the size of the instruction
+    switch (ins)
+    {
+        case INS_mov:
+#ifdef _TARGET_AMD64_
+            // mov reg, imm64 is equivalent to mov reg, imm32 if the high order bits are all 0
+            // and this isn't a reloc constant.
+            if (((size > EA_4BYTE) && (0 == (val & 0xFFFFFFFF00000000LL))) && !EA_IS_CNS_RELOC(attr))
+            {
+                attr = size = EA_4BYTE;
+            }
+
+            if (size > EA_4BYTE)
+            {
+                sz = 9; // Really it is 10, but we'll add one more later
+                break;
+            }
+#endif // _TARGET_AMD64_
+            sz = 5;
+            break;
+
+        case INS_rcl_N:
+        case INS_rcr_N:
+        case INS_rol_N:
+        case INS_ror_N:
+        case INS_shl_N:
+        case INS_shr_N:
+        case INS_sar_N:
+            assert(val != 1);
+            fmt = IF_RRW_SHF;
+            sz  = 3;
+            val &= 0x7F;
+            valInByte = true; // shift amount always placed in a byte
+            break;
+
+        default:
+
+            if (EA_IS_CNS_RELOC(attr))
+            {
+                valInByte = false; // relocs can't be placed in a byte
+            }
+
+            if (valInByte)
+            {
+                if (IsSSEOrAVXInstruction(ins))
+                {
+                    sz = 5;
+                }
+                else
+                {
+                    sz = 3;
+                }
+            }
+            else
+            {
+                if (reg == REG_EAX && !instrIs3opImul(ins))
+                {
+                    sz = 1;
+                }
+                else
+                {
+                    sz = 2;
+                }
+
+#ifdef _TARGET_AMD64_
+                if (size > EA_4BYTE)
+                {
+                    // We special-case anything that takes a full 8-byte constant.
+                    sz += 4;
+                }
+                else
+#endif // _TARGET_AMD64_
+                {
+                    sz += EA_SIZE_IN_BYTES(attr);
+                }
+            }
+            break;
+    }
+
+    // Vex prefix size
+    sz += emitGetVexPrefixSize(ins, attr);
+
+    // Do we need a REX prefix for AMD64? We need one if we are using any extended register (REX.R), or if we have a
+    // 64-bit sized operand (REX.W). Note that IMUL in our encoding is special, with a "built-in", implicit, target
+    // register. So we also need to check if that built-in register is an extended register.
+    if (IsExtendedReg(reg, attr) || TakesRexWPrefix(ins, size) || instrIsExtendedReg3opImul(ins))
+    {
+        sz += emitGetRexPrefixSize(ins);
+    }
+
+#ifdef _TARGET_X86_
+    assert(reg < 8);
+#endif
+
+    id = emitNewInstrSC(attr, val);
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idReg1(reg);
+
+    // 16-bit operand instructions will need a prefix
+    if (size == EA_2BYTE)
+    {
+        sz += 1;
+    }
+
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+
+#if !FEATURE_FIXED_OUT_ARGS
+
+    if (reg == REG_ESP)
+    {
+        if (emitCntStackDepth)
+        {
+            if (ins == INS_sub)
+            {
+                S_UINT32 newStackLvl(emitCurStackLvl);
+                newStackLvl += S_UINT32(val);
+                noway_assert(!newStackLvl.IsOverflow());
+
+                emitCurStackLvl = newStackLvl.Value();
+
+                if (emitMaxStackDepth < emitCurStackLvl)
+                    emitMaxStackDepth = emitCurStackLvl;
+            }
+            else if (ins == INS_add)
+            {
+                S_UINT32 newStackLvl = S_UINT32(emitCurStackLvl) - S_UINT32(val);
+                noway_assert(!newStackLvl.IsOverflow());
+
+                emitCurStackLvl = newStackLvl.Value();
+            }
+        }
+    }
+
+#endif // !FEATURE_FIXED_OUT_ARGS
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction referencing an integer constant.
+ */
+
+void emitter::emitIns_I(instruction ins, emitAttr attr, int val)
+{
+    UNATIVE_OFFSET sz;
+    instrDesc*     id;
+    bool           valInByte = ((signed char)val == val);
+
+#ifdef _TARGET_AMD64_
+    // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
+    // all other opcodes take a sign-extended 4-byte immediate
+    noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
+#endif
+
+    if (EA_IS_CNS_RELOC(attr))
+    {
+        valInByte = false; // relocs can't be placed in a byte
+    }
+
+    switch (ins)
+    {
+        case INS_loop:
+        case INS_jge:
+            sz = 2;
+            break;
+
+        case INS_ret:
+            sz = 3;
+            break;
+
+        case INS_push_hide:
+        case INS_push:
+            sz = valInByte ? 2 : 5;
+            break;
+
+        default:
+            NO_WAY("unexpected instruction");
+    }
+
+    id = emitNewInstrSC(attr, val);
+    id->idIns(ins);
+    id->idInsFmt(IF_CNS);
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+
+#if !FEATURE_FIXED_OUT_ARGS
+
+    if (ins == INS_push)
+    {
+        emitCurStackLvl += emitCntStackDepth;
+
+        if (emitMaxStackDepth < emitCurStackLvl)
+            emitMaxStackDepth = emitCurStackLvl;
+    }
+
+#endif // !FEATURE_FIXED_OUT_ARGS
+}
+
+/*****************************************************************************
+ *
+ *  Add a "jump through a table" instruction.
+ */
+
+void emitter::emitIns_IJ(emitAttr attr, regNumber reg, unsigned base)
+{
+    assert(EA_SIZE(attr) == EA_4BYTE);
+
+    UNATIVE_OFFSET    sz  = 3 + 4;
+    const instruction ins = INS_i_jmp;
+
+    if (IsExtendedReg(reg, attr))
+    {
+        sz += emitGetRexPrefixSize(ins);
+    }
+
+    instrDesc* id = emitNewInstrAmd(attr, base);
+
+    id->idIns(ins);
+    id->idInsFmt(IF_ARD);
+    id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
+    id->idAddr()->iiaAddrMode.amIndxReg = reg;
+    id->idAddr()->iiaAddrMode.amScale   = emitter::OPSZP;
+
+#ifdef DEBUG
+    id->idDebugOnlyInfo()->idMemCookie = base;
+#endif
+
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction with a static data member operand. If 'size' is 0, the
+ *  instruction operates on the address of the static member instead of its
+ *  value (e.g. "push offset clsvar", rather than "push dword ptr [clsvar]").
+ */
+
+void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs)
+{
+#if RELOC_SUPPORT
+    // Static always need relocs
+    if (!jitStaticFldIsGlobAddr(fldHnd))
+    {
+        attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
+    }
+#endif
+
+    UNATIVE_OFFSET sz;
+    instrDesc*     id;
+
+    /* Are we pushing the offset of the class variable? */
+
+    if (EA_IS_OFFSET(attr))
+    {
+        assert(ins == INS_push);
+        sz = 1 + sizeof(void*);
+
+        id = emitNewInstrDsp(EA_1BYTE, offs);
+        id->idIns(ins);
+        id->idInsFmt(IF_MRD_OFF);
+    }
+    else
+    {
+#if FEATURE_STACK_FP_X87
+        insFormat fmt = emitInsModeFormat(ins, IF_MRD, IF_TRD_MRD, IF_MWR_TRD);
+#else  // !FEATURE_STACK_FP_X87
+        insFormat fmt = emitInsModeFormat(ins, IF_MRD);
+#endif // !FEATURE_STACK_FP_X87
+
+        id = emitNewInstrDsp(attr, offs);
+        id->idIns(ins);
+        id->idInsFmt(fmt);
+        sz = emitInsSizeCV(id, insCodeMR(ins));
+    }
+
+    // Vex prefix size
+    sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));
+
+    if (TakesRexWPrefix(ins, attr))
+    {
+        // REX.W prefix
+        sz += emitGetRexPrefixSize(ins);
+    }
+
+    id->idAddr()->iiaFieldHnd = fldHnd;
+
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+
+#if !FEATURE_FIXED_OUT_ARGS
+
+    if (ins == INS_push)
+    {
+        emitCurStackLvl += emitCntStackDepth;
+
+        if (emitMaxStackDepth < emitCurStackLvl)
+            emitMaxStackDepth = emitCurStackLvl;
+    }
+    else if (ins == INS_pop)
+    {
+        emitCurStackLvl -= emitCntStackDepth;
+        assert((int)emitCurStackLvl >= 0);
+    }
+
+#endif // !FEATURE_FIXED_OUT_ARGS
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction with two register operands.
+ */
+
+void emitter::emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2)
+{
+    emitAttr size = EA_SIZE(attr);
+
+    /* We don't want to generate any useless mov instructions! */
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_AMD64_
+    // Same-reg 4-byte mov can be useful because it performs a
+    // zero-extension to 8 bytes.
+    assert(ins != INS_mov || reg1 != reg2 || size == EA_4BYTE);
+#else
+    assert(ins != INS_mov || reg1 != reg2);
+#endif // _TARGET_AMD64_
+
+    assert(size <= EA_32BYTE);
+    noway_assert(emitVerifyEncodable(ins, size, reg1, reg2));
+
+    UNATIVE_OFFSET sz = emitInsSizeRR(ins, reg1, reg2, attr);
+
+    /* Special case: "XCHG" uses a different format */
+    insFormat fmt = (ins == INS_xchg) ? IF_RRW_RRW : emitInsModeFormat(ins, IF_RRD_RRD);
+
+    instrDesc* id = emitNewInstrTiny(attr);
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction with two register operands and an integer constant.
+ */
+
+void emitter::emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int ival)
+{
+    // SSE2 version requires 5 bytes and AVX version 6 bytes
+    UNATIVE_OFFSET sz = 4;
+    if (IsSSEOrAVXInstruction(ins))
+    {
+        sz = UseAVX() ? 6 : 5;
+    }
+
+#ifdef _TARGET_AMD64_
+    // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
+    // all other opcodes take a sign-extended 4-byte immediate
+    noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
+#endif
+
+    instrDesc* id = emitNewInstrSC(attr, ival);
+
+    // REX prefix
+    if (IsExtendedReg(reg1, attr) || IsExtendedReg(reg2, attr))
+    {
+        sz += emitGetRexPrefixSize(ins);
+    }
+
+    id->idIns(ins);
+    id->idInsFmt(IF_RRW_RRW_CNS);
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+#ifdef FEATURE_AVX_SUPPORT
+/*****************************************************************************
+*
+*  Add an instruction with three register operands.
+*/
+
+void emitter::emitIns_R_R_R(instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2)
+{
+    assert(IsSSEOrAVXInstruction(ins));
+    assert(IsThreeOperandAVXInstruction(ins));
+    // Currently vex prefix only use three bytes mode.
+    // size = vex + opcode + ModR/M = 3 + 1 + 1 = 5
+    // TODO-XArch-CQ: We should create function which can calculate all kinds of AVX instructions size in future
+    UNATIVE_OFFSET sz = 5;
+
+    instrDesc* id = emitNewInstr(attr);
+    id->idIns(ins);
+    id->idInsFmt(IF_RWR_RRD_RRD);
+    id->idReg1(targetReg);
+    id->idReg2(reg1);
+    id->idReg3(reg2);
+
+    id->idCodeSize(sz);
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
+#endif
+/*****************************************************************************
+ *
+ *  Add an instruction with a register + static member operands.
+ */
+void emitter::emitIns_R_C(instruction ins, emitAttr attr, regNumber reg, CORINFO_FIELD_HANDLE fldHnd, int offs)
+{
+#if RELOC_SUPPORT
+    // Static always need relocs
+    if (!jitStaticFldIsGlobAddr(fldHnd))
+    {
+        attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
+    }
+#endif
+
+    emitAttr size = EA_SIZE(attr);
+
+    assert(size <= EA_32BYTE);
+    noway_assert(emitVerifyEncodable(ins, size, reg));
+
+    UNATIVE_OFFSET sz;
+    instrDesc*     id;
+
+    // Are we MOV'ing the offset of the class variable into EAX?
+    if (EA_IS_OFFSET(attr))
+    {
+        id = emitNewInstrDsp(EA_1BYTE, offs);
+        id->idIns(ins);
+        id->idInsFmt(IF_RWR_MRD_OFF);
+
+        assert(ins == INS_mov && reg == REG_EAX);
+
+        // Special case: "mov eax, [addr]" is smaller
+        sz = 1 + sizeof(void*);
+    }
+    else
+    {
+        insFormat fmt = emitInsModeFormat(ins, IF_RRD_MRD);
+
+        id = emitNewInstrDsp(attr, offs);
+        id->idIns(ins);
+        id->idInsFmt(fmt);
+
+#ifdef _TARGET_X86_
+        // Special case: "mov eax, [addr]" is smaller.
+        // This case is not enabled for amd64 as it always uses RIP relative addressing
+        // and it results in smaller instruction size than encoding 64-bit addr in the
+        // instruction.
+        if (ins == INS_mov && reg == REG_EAX)
+        {
+            sz = 1 + sizeof(void*);
+            if (size == EA_2BYTE)
+                sz += 1;
+        }
+        else
+#endif //_TARGET_X86_
+        {
+            sz = emitInsSizeCV(id, insCodeRM(ins));
+        }
+
+        // Special case: mov reg, fs:[ddd]
+        if (fldHnd == FLD_GLOBAL_FS)
+        {
+            sz += 1;
+        }
+    }
+
+    // VEX prefix
+    sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
+
+    // REX prefix
+    if (TakesRexWPrefix(ins, attr) || IsExtendedReg(reg, attr))
+    {
+        sz += emitGetRexPrefixSize(ins);
+    }
+
+    id->idReg1(reg);
+    id->idCodeSize(sz);
+
+    id->idAddr()->iiaFieldHnd = fldHnd;
+
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction with a static member + register operands.
+ */
+
+void emitter::emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs)
+{
+#if RELOC_SUPPORT
+    // Static always need relocs
+    if (!jitStaticFldIsGlobAddr(fldHnd))
+    {
+        attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
+    }
+#endif
+
+    emitAttr size = EA_SIZE(attr);
+
+#if defined(_TARGET_X86_) && !FEATURE_STACK_FP_X87
+    // For x86 RyuJIT it is valid to storeind a double sized operand in an xmm reg to memory
+    assert(size <= EA_8BYTE);
+#else
+    assert(size <= EA_PTRSIZE);
+#endif
+
+    noway_assert(emitVerifyEncodable(ins, size, reg));
+
+    instrDesc* id  = emitNewInstrDsp(attr, offs);
+    insFormat  fmt = emitInsModeFormat(ins, IF_MRD_RRD);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+
+    UNATIVE_OFFSET sz;
+
+#ifdef _TARGET_X86_
+    // Special case: "mov [addr], EAX" is smaller.
+    // This case is not enable for amd64 as it always uses RIP relative addressing
+    // and it will result in smaller instruction size than encoding 64-bit addr in
+    // the instruction.
+    if (ins == INS_mov && reg == REG_EAX)
+    {
+        sz = 1 + sizeof(void*);
+        if (size == EA_2BYTE)
+            sz += 1;
+    }
+    else
+#endif //_TARGET_X86_
+    {
+        sz = emitInsSizeCV(id, insCodeMR(ins));
+    }
+
+    // Special case: mov reg, fs:[ddd]
+    if (fldHnd == FLD_GLOBAL_FS)
+    {
+        sz += 1;
+    }
+
+    // VEX prefix
+    sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));
+
+    // REX prefix
+    if (TakesRexWPrefix(ins, attr) || IsExtendedReg(reg, attr))
+    {
+        sz += emitGetRexPrefixSize(ins);
+    }
+
+    id->idReg1(reg);
+    id->idCodeSize(sz);
+
+    id->idAddr()->iiaFieldHnd = fldHnd;
+
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
+/*****************************************************************************
+ *
+ *  Add an instruction with a static member + constant.
+ */
+
+void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs, int val)
+{
+#if RELOC_SUPPORT
+    // Static always need relocs
+    if (!jitStaticFldIsGlobAddr(fldHnd))
+    {
+        attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
+    }
+#endif
+
+    insFormat fmt;
+
+    switch (ins)
+    {
+        case INS_rcl_N:
+        case INS_rcr_N:
+        case INS_rol_N:
+        case INS_ror_N:
+        case INS_shl_N:
+        case INS_shr_N:
+        case INS_sar_N:
+            assert(val != 1);
+            fmt = IF_MRW_SHF;
+            val &= 0x7F;
+            break;
+
+        default:
+            fmt = emitInsModeFormat(ins, IF_MRD_CNS);
+            break;
+    }
+
+    instrDesc* id = emitNewInstrCnsDsp(attr, val, offs);
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+
+    size_t         code = insCodeMI(ins);
+    UNATIVE_OFFSET sz   = emitInsSizeCV(id, code, val);
+
+#ifdef _TARGET_AMD64_
+    // Vex prefix
+    sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMI(ins));
+
+    // REX prefix, if not already included in "code"
+    if (TakesRexWPrefix(ins, attr) && (code & REX_PREFIX_MASK) == 0)
+    {
+        sz += emitGetRexPrefixSize(ins);
+    }
+#endif // _TARGET_AMD64_
+
+    id->idAddr()->iiaFieldHnd = fldHnd;
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
+void emitter::emitIns_J_S(instruction ins, emitAttr attr, BasicBlock* dst, int varx, int offs)
+{
+    assert(ins == INS_mov);
+    assert(dst->bbFlags & BBF_JMP_TARGET);
+
+    instrDescLbl* id = emitNewInstrLbl();
+
+    id->idIns(ins);
+    id->idInsFmt(IF_SWR_LABEL);
+    id->idAddr()->iiaBBlabel = dst;
+
+    /* The label reference is always long */
+
+    id->idjShort    = 0;
+    id->idjKeepLong = 1;
+
+    /* Record the current IG and offset within it */
+
+    id->idjIG   = emitCurIG;
+    id->idjOffs = emitCurIGsize;
+
+    /* Append this instruction to this IG's jump list */
+
+    id->idjNext      = emitCurIGjmpList;
+    emitCurIGjmpList = id;
+
+    UNATIVE_OFFSET sz = sizeof(INT32) + emitInsSizeSV(insCodeMI(ins), varx, offs);
+    id->dstLclVar.initLclVarAddr(varx, offs);
+#ifdef DEBUG
+    id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
+#endif
+
+#if EMITTER_STATS
+    emitTotalIGjmps++;
+#endif
+
+#if RELOC_SUPPORT
+#ifndef _TARGET_AMD64_
+    // Storing the address of a basicBlock will need a reloc
+    // as the instruction uses the absolute address,
+    // not a relative address.
+    //
+    // On Amd64, Absolute code addresses should always go through a reloc to
+    // to be encoded as RIP rel32 offset.
+    if (emitComp->opts.compReloc)
+#endif
+    {
+        id->idSetIsDspReloc();
+    }
+#endif // RELOC_SUPPORT
+
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
+/*****************************************************************************
+ *
+ *  Add a label instruction.
+ */
+void emitter::emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg)
+{
+    assert(ins == INS_lea);
+    assert(dst->bbFlags & BBF_JMP_TARGET);
+
+    instrDescJmp* id = emitNewInstrJmp();
+
+    id->idIns(ins);
+    id->idReg1(reg);
+    id->idInsFmt(IF_RWR_LABEL);
+    id->idOpSize(EA_SIZE(attr)); // emitNewInstrJmp() sets the size (incorrectly) to EA_1BYTE
+    id->idAddr()->iiaBBlabel = dst;
+
+    /* The label reference is always long */
+
+    id->idjShort    = 0;
+    id->idjKeepLong = 1;
+
+    /* Record the current IG and offset within it */
+
+    id->idjIG   = emitCurIG;
+    id->idjOffs = emitCurIGsize;
+
+    /* Append this instruction to this IG's jump list */
+
+    id->idjNext      = emitCurIGjmpList;
+    emitCurIGjmpList = id;
+
+#ifdef DEBUG
+    // Mark the catch return
+    if (emitComp->compCurBB->bbJumpKind == BBJ_EHCATCHRET)
+    {
+        id->idDebugOnlyInfo()->idCatchRet = true;
+    }
+#endif // DEBUG
+
+#if EMITTER_STATS
+    emitTotalIGjmps++;
+#endif
+
+    UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
+    id->idCodeSize(sz);
+
+    // Set the relocation flags - these give hint to zap to perform
+    // relocation of the specified 32bit address.
+    id->idSetRelocFlags(attr);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
+/*****************************************************************************
+ *
+ *  The following adds instructions referencing address modes.
+ */
+
+void emitter::emitIns_I_AR(
+    instruction ins, emitAttr attr, int val, regNumber reg, int disp, int memCookie, void* clsCookie)
+{
+    assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
+
+#ifdef _TARGET_AMD64_
+    // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
+    // all other opcodes take a sign-extended 4-byte immediate
+    noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
+#endif
+
+    insFormat fmt;
+
+    switch (ins)
+    {
+        case INS_rcl_N:
+        case INS_rcr_N:
+        case INS_rol_N:
+        case INS_ror_N:
+        case INS_shl_N:
+        case INS_shr_N:
+        case INS_sar_N:
+            assert(val != 1);
+            fmt = IF_ARW_SHF;
+            val &= 0x7F;
+            break;
+
+        default:
+            fmt = emitInsModeFormat(ins, IF_ARD_CNS);
+            break;
+    }
+
+    /*
+    Useful if you want to trap moves with 0 constant
+    if (ins == INS_mov && val == 0 && EA_SIZE(attr) >= EA_4BYTE)
+    {
+        printf("MOV 0\n");
+    }
+    */
+
+    UNATIVE_OFFSET sz;
+    instrDesc*     id = emitNewInstrAmdCns(attr, disp, val);
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+
+    assert((memCookie == NULL) == (clsCookie == nullptr));
+
+#ifdef DEBUG
+    id->idDebugOnlyInfo()->idMemCookie = memCookie;
+    id->idDebugOnlyInfo()->idClsCookie = clsCookie;
+#endif
+
+    id->idAddr()->iiaAddrMode.amBaseReg = reg;
+    id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
+
+    assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
+
+    sz = emitInsSizeAM(id, insCodeMI(ins), val);
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
+void emitter::emitIns_I_AI(instruction ins, emitAttr attr, int val, ssize_t disp)
+{
+    assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
+
+#ifdef _TARGET_AMD64_
+    // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
+    // all other opcodes take a sign-extended 4-byte immediate
+    noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
+#endif
+
+    insFormat fmt;
+
+    switch (ins)
+    {
+        case INS_rcl_N:
+        case INS_rcr_N:
+        case INS_rol_N:
+        case INS_ror_N:
+        case INS_shl_N:
+        case INS_shr_N:
+        case INS_sar_N:
+            assert(val != 1);
+            fmt = IF_ARW_SHF;
+            val &= 0x7F;
+            break;
+
+        default:
+            fmt = emitInsModeFormat(ins, IF_ARD_CNS);
+            break;
+    }
+
+    /*
+    Useful if you want to trap moves with 0 constant
+    if (ins == INS_mov && val == 0 && EA_SIZE(attr) >= EA_4BYTE)
+    {
+        printf("MOV 0\n");
+    }
+    */
+
+    UNATIVE_OFFSET sz;
+    instrDesc*     id = emitNewInstrAmdCns(attr, disp, val);
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+
+    id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
+    id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
+
+    assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
+
+    sz = emitInsSizeAM(id, insCodeMI(ins), val);
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
+void emitter::emitIns_R_AR(
+    instruction ins, emitAttr attr, regNumber ireg, regNumber base, int disp, int memCookie, void* clsCookie)
+{
+    assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_32BYTE) && (ireg != REG_NA));
+    noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
+
+    if (ins == INS_lea)
+    {
+        if (ireg == base && disp == 0)
+        {
+            // Maybe the emitter is not the common place for this optimization, but it's a better choke point
+            // for all the emitIns(ins, tree), we would have to be analyzing at each call site
+            //
+            return;
+        }
+    }
+
+    UNATIVE_OFFSET sz;
+    instrDesc*     id  = emitNewInstrAmd(attr, disp);
+    insFormat      fmt = emitInsModeFormat(ins, IF_RRD_ARD);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idReg1(ireg);
+
+    assert((memCookie == NULL) == (clsCookie == nullptr));
+
+#ifdef DEBUG
+    id->idDebugOnlyInfo()->idMemCookie = memCookie;
+    id->idDebugOnlyInfo()->idClsCookie = clsCookie;
+#endif
+
+    id->idAddr()->iiaAddrMode.amBaseReg = base;
+    id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
+
+    assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
+
+    sz = emitInsSizeAM(id, insCodeRM(ins));
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
+void emitter::emitIns_R_AI(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp)
+{
+    assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
+    noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
+
+    UNATIVE_OFFSET sz;
+    instrDesc*     id  = emitNewInstrAmd(attr, disp);
+    insFormat      fmt = emitInsModeFormat(ins, IF_RRD_ARD);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idReg1(ireg);
+
+    id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
+    id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
+
+    assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
+
+    sz = emitInsSizeAM(id, insCodeRM(ins));
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
+void emitter::emitIns_AR_R(
+    instruction ins, emitAttr attr, regNumber ireg, regNumber base, int disp, int memCookie, void* clsCookie)
+{
+    UNATIVE_OFFSET sz;
+    instrDesc*     id = emitNewInstrAmd(attr, disp);
+    insFormat      fmt;
+
+    if (ireg == REG_NA)
+    {
+#if FEATURE_STACK_FP_X87
+        fmt = emitInsModeFormat(ins, IF_ARD, IF_TRD_ARD, IF_AWR_TRD);
+#else  // !FEATURE_STACK_FP_X87
+        fmt       = emitInsModeFormat(ins, IF_ARD);
+#endif // !FEATURE_STACK_FP_X87
+    }
+    else
+    {
+        fmt = emitInsModeFormat(ins, IF_ARD_RRD);
+
+        assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_32BYTE));
+        noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
+
+        id->idReg1(ireg);
+    }
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+
+    assert((memCookie == NULL) == (clsCookie == nullptr));
+
+#ifdef DEBUG
+    id->idDebugOnlyInfo()->idMemCookie = memCookie;
+    id->idDebugOnlyInfo()->idClsCookie = clsCookie;
+#endif
+
+    id->idAddr()->iiaAddrMode.amBaseReg = base;
+    id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
+
+    assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
+
+    sz = emitInsSizeAM(id, insCodeMR(ins));
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+
+#if !FEATURE_FIXED_OUT_ARGS
+
+    if (ins == INS_push)
+    {
+        emitCurStackLvl += emitCntStackDepth;
+
+        if (emitMaxStackDepth < emitCurStackLvl)
+            emitMaxStackDepth = emitCurStackLvl;
+    }
+    else if (ins == INS_pop)
+    {
+        emitCurStackLvl -= emitCntStackDepth;
+        assert((int)emitCurStackLvl >= 0);
+    }
+
+#endif // !FEATURE_FIXED_OUT_ARGS
+}
+
+void emitter::emitIns_AI_R(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp)
+{
+    UNATIVE_OFFSET sz;
+    instrDesc*     id = emitNewInstrAmd(attr, disp);
+    insFormat      fmt;
+
+    if (ireg == REG_NA)
+    {
+#if FEATURE_STACK_FP_X87
+        fmt = emitInsModeFormat(ins, IF_ARD, IF_TRD_ARD, IF_AWR_TRD);
+#else  // FEATURE_STACK_FP_X87
+        fmt       = emitInsModeFormat(ins, IF_ARD);
+#endif // FEATURE_STACK_FP_X87
+    }
+    else
+    {
+        fmt = emitInsModeFormat(ins, IF_ARD_RRD);
+
+        assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
+        noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
+
+        id->idReg1(ireg);
+    }
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+
+    id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
+    id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
+
+    assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
+
+    sz = emitInsSizeAM(id, insCodeMR(ins));
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+
+#if !FEATURE_FIXED_OUT_ARGS
+
+    if (ins == INS_push)
+    {
+        emitCurStackLvl += emitCntStackDepth;
+
+        if (emitMaxStackDepth < emitCurStackLvl)
+            emitMaxStackDepth = emitCurStackLvl;
+    }
+    else if (ins == INS_pop)
+    {
+        emitCurStackLvl -= emitCntStackDepth;
+        assert((int)emitCurStackLvl >= 0);
+    }
+
+#endif // !FEATURE_FIXED_OUT_ARGS
+}
+
+void emitter::emitIns_I_ARR(instruction ins, emitAttr attr, int val, regNumber reg, regNumber rg2, int disp)
+{
+    assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
+
+#ifdef _TARGET_AMD64_
+    // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
+    // all other opcodes take a sign-extended 4-byte immediate
+    noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
+#endif
+
+    insFormat fmt;
+
+    switch (ins)
+    {
+        case INS_rcl_N:
+        case INS_rcr_N:
+        case INS_rol_N:
+        case INS_ror_N:
+        case INS_shl_N:
+        case INS_shr_N:
+        case INS_sar_N:
+            assert(val != 1);
+            fmt = IF_ARW_SHF;
+            val &= 0x7F;
+            break;
+
+        default:
+            fmt = emitInsModeFormat(ins, IF_ARD_CNS);
+            break;
+    }
+
+    UNATIVE_OFFSET sz;
+    instrDesc*     id = emitNewInstrAmdCns(attr, disp, val);
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+
+    id->idAddr()->iiaAddrMode.amBaseReg = reg;
+    id->idAddr()->iiaAddrMode.amIndxReg = rg2;
+    id->idAddr()->iiaAddrMode.amScale   = emitter::OPSZ1;
+
+    assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
+
+    sz = emitInsSizeAM(id, insCodeMI(ins), val);
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
+void emitter::emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber base, regNumber index, int disp)
+{
+    assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
+    noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
+
+    UNATIVE_OFFSET sz;
+    instrDesc*     id  = emitNewInstrAmd(attr, disp);
+    insFormat      fmt = emitInsModeFormat(ins, IF_RRD_ARD);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idReg1(ireg);
+
+    id->idAddr()->iiaAddrMode.amBaseReg = base;
+    id->idAddr()->iiaAddrMode.amIndxReg = index;
+    id->idAddr()->iiaAddrMode.amScale   = emitter::OPSZ1;
+
+    assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
+
+    sz = emitInsSizeAM(id, insCodeRM(ins));
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
+void emitter::emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber index, int disp)
+{
+    UNATIVE_OFFSET sz;
+    instrDesc*     id = emitNewInstrAmd(attr, disp);
+    insFormat      fmt;
+
+    if (ireg == REG_NA)
+    {
+#if FEATURE_STACK_FP_X87
+        fmt = emitInsModeFormat(ins, IF_ARD, IF_TRD_ARD, IF_AWR_TRD);
+#else  // FEATURE_STACK_FP_X87
+        fmt       = emitInsModeFormat(ins, IF_ARD);
+#endif // FEATURE_STACK_FP_X87
+    }
+    else
+    {
+        fmt = emitInsModeFormat(ins, IF_ARD_RRD);
+
+        assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
+        noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
+
+        id->idReg1(ireg);
+    }
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+
+    id->idAddr()->iiaAddrMode.amBaseReg = reg;
+    id->idAddr()->iiaAddrMode.amIndxReg = index;
+    id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(1);
+
+    assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
+
+    sz = emitInsSizeAM(id, insCodeMR(ins));
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+
+#if !FEATURE_FIXED_OUT_ARGS
+
+    if (ins == INS_push)
+    {
+        emitCurStackLvl += emitCntStackDepth;
+
+        if (emitMaxStackDepth < emitCurStackLvl)
+            emitMaxStackDepth = emitCurStackLvl;
+    }
+    else if (ins == INS_pop)
+    {
+        emitCurStackLvl -= emitCntStackDepth;
+        assert((int)emitCurStackLvl >= 0);
+    }
+
+#endif // !FEATURE_FIXED_OUT_ARGS
+}
+
+void emitter::emitIns_I_ARX(
+    instruction ins, emitAttr attr, int val, regNumber reg, regNumber rg2, unsigned mul, int disp)
+{
+    assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
+
+#ifdef _TARGET_AMD64_
+    // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
+    // all other opcodes take a sign-extended 4-byte immediate
+    noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
+#endif
+
+    insFormat fmt;
+
+    switch (ins)
+    {
+        case INS_rcl_N:
+        case INS_rcr_N:
+        case INS_rol_N:
+        case INS_ror_N:
+        case INS_shl_N:
+        case INS_shr_N:
+        case INS_sar_N:
+            assert(val != 1);
+            fmt = IF_ARW_SHF;
+            val &= 0x7F;
+            break;
+
+        default:
+            fmt = emitInsModeFormat(ins, IF_ARD_CNS);
+            break;
+    }
+
+    UNATIVE_OFFSET sz;
+    instrDesc*     id = emitNewInstrAmdCns(attr, disp, val);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+
+    id->idAddr()->iiaAddrMode.amBaseReg = reg;
+    id->idAddr()->iiaAddrMode.amIndxReg = rg2;
+    id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(mul);
+
+    assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
+
+    sz = emitInsSizeAM(id, insCodeMI(ins), val);
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
+void emitter::emitIns_R_ARX(
+    instruction ins, emitAttr attr, regNumber ireg, regNumber base, regNumber index, unsigned mul, int disp)
+{
+    assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
+    noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
+
+    UNATIVE_OFFSET sz;
+    instrDesc*     id  = emitNewInstrAmd(attr, disp);
+    insFormat      fmt = emitInsModeFormat(ins, IF_RRD_ARD);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idReg1(ireg);
+
+    id->idAddr()->iiaAddrMode.amBaseReg = base;
+    id->idAddr()->iiaAddrMode.amIndxReg = index;
+    id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(mul);
+
+    assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
+
+    sz = emitInsSizeAM(id, insCodeRM(ins));
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
+void emitter::emitIns_ARX_R(
+    instruction ins, emitAttr attr, regNumber ireg, regNumber base, regNumber index, unsigned mul, int disp)
+{
+    UNATIVE_OFFSET sz;
+    instrDesc*     id = emitNewInstrAmd(attr, disp);
+    insFormat      fmt;
+
+    if (ireg == REG_NA)
+    {
+#if FEATURE_STACK_FP_X87
+        fmt = emitInsModeFormat(ins, IF_ARD, IF_TRD_ARD, IF_AWR_TRD);
+#else  // !FEATURE_STACK_FP_X87
+        fmt       = emitInsModeFormat(ins, IF_ARD);
+#endif // !FEATURE_STACK_FP_X87
+    }
+    else
+    {
+        fmt = emitInsModeFormat(ins, IF_ARD_RRD);
+
+        noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
+        assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
+
+        id->idReg1(ireg);
+    }
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+
+    id->idAddr()->iiaAddrMode.amBaseReg = base;
+    id->idAddr()->iiaAddrMode.amIndxReg = index;
+    id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(mul);
+
+    assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
+
+    sz = emitInsSizeAM(id, insCodeMR(ins));
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+
+#if !FEATURE_FIXED_OUT_ARGS
+
+    if (ins == INS_push)
+    {
+        emitCurStackLvl += emitCntStackDepth;
+
+        if (emitMaxStackDepth < emitCurStackLvl)
+            emitMaxStackDepth = emitCurStackLvl;
+    }
+    else if (ins == INS_pop)
+    {
+        emitCurStackLvl -= emitCntStackDepth;
+        assert((int)emitCurStackLvl >= 0);
+    }
+
+#endif // !FEATURE_FIXED_OUT_ARGS
+}
+
+void emitter::emitIns_I_AX(instruction ins, emitAttr attr, int val, regNumber reg, unsigned mul, int disp)
+{
+    assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
+
+#ifdef _TARGET_AMD64_
+    // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
+    // all other opcodes take a sign-extended 4-byte immediate
+    noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
+#endif
+
+    insFormat fmt;
+
+    switch (ins)
+    {
+        case INS_rcl_N:
+        case INS_rcr_N:
+        case INS_rol_N:
+        case INS_ror_N:
+        case INS_shl_N:
+        case INS_shr_N:
+        case INS_sar_N:
+            assert(val != 1);
+            fmt = IF_ARW_SHF;
+            val &= 0x7F;
+            break;
+
+        default:
+            fmt = emitInsModeFormat(ins, IF_ARD_CNS);
+            break;
+    }
+
+    UNATIVE_OFFSET sz;
+    instrDesc*     id = emitNewInstrAmdCns(attr, disp, val);
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+
+    id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
+    id->idAddr()->iiaAddrMode.amIndxReg = reg;
+    id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(mul);
+
+    assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
+
+    sz = emitInsSizeAM(id, insCodeMI(ins), val);
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
+void emitter::emitIns_R_AX(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp)
+{
+    assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE) && (ireg != REG_NA));
+    noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
+
+    UNATIVE_OFFSET sz;
+    instrDesc*     id  = emitNewInstrAmd(attr, disp);
+    insFormat      fmt = emitInsModeFormat(ins, IF_RRD_ARD);
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idReg1(ireg);
+
+    id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
+    id->idAddr()->iiaAddrMode.amIndxReg = reg;
+    id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(mul);
+
+    assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
+
+    sz = emitInsSizeAM(id, insCodeRM(ins));
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
+void emitter::emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp)
+{
+    UNATIVE_OFFSET sz;
+    instrDesc*     id = emitNewInstrAmd(attr, disp);
+    insFormat      fmt;
+
+    if (ireg == REG_NA)
+    {
+#if FEATURE_STACK_FP_X87
+        fmt = emitInsModeFormat(ins, IF_ARD, IF_TRD_ARD, IF_AWR_TRD);
+#else  // !FEATURE_STACK_FP_X87
+        fmt       = emitInsModeFormat(ins, IF_ARD);
+#endif // !FEATURE_STACK_FP_X87
+    }
+    else
+    {
+        fmt = emitInsModeFormat(ins, IF_ARD_RRD);
+        noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), ireg));
+        assert((CodeGen::instIsFP(ins) == false) && (EA_SIZE(attr) <= EA_8BYTE));
+
+        id->idReg1(ireg);
+    }
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+
+    id->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
+    id->idAddr()->iiaAddrMode.amIndxReg = reg;
+    id->idAddr()->iiaAddrMode.amScale   = emitEncodeScale(mul);
+
+    assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
+
+    sz = emitInsSizeAM(id, insCodeMR(ins));
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+
+#if !FEATURE_FIXED_OUT_ARGS
+
+    if (ins == INS_push)
+    {
+        emitCurStackLvl += emitCntStackDepth;
+
+        if (emitMaxStackDepth < emitCurStackLvl)
+            emitMaxStackDepth = emitCurStackLvl;
+    }
+    else if (ins == INS_pop)
+    {
+        emitCurStackLvl -= emitCntStackDepth;
+        assert((int)emitCurStackLvl >= 0);
+    }
+
+#endif // !FEATURE_FIXED_OUT_ARGS
+}
+
+/*****************************************************************************
+ *
+ *  The following add instructions referencing stack-based local variables.
+ */
+
+void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs)
+{
+    instrDesc*     id = emitNewInstr(attr);
+    UNATIVE_OFFSET sz = emitInsSizeSV(insCodeMR(ins), varx, offs);
+#if FEATURE_STACK_FP_X87
+    insFormat fmt = emitInsModeFormat(ins, IF_SRD, IF_TRD_SRD, IF_SWR_TRD);
+#else  // !FEATURE_STACK_FP_X87
+    insFormat fmt = emitInsModeFormat(ins, IF_SRD);
+#endif // !FEATURE_STACK_FP_X87
+
+    // 16-bit operand instructions will need a prefix
+    if (EA_SIZE(attr) == EA_2BYTE)
+    {
+        sz += 1;
+    }
+
+    // VEX prefix
+    sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));
+
+    // 64-bit operand instructions will need a REX.W prefix
+    if (TakesRexWPrefix(ins, attr))
+    {
+        sz += emitGetRexPrefixSize(ins);
+    }
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
+    id->idCodeSize(sz);
+
+#ifdef DEBUG
+    id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
+#endif
+    dispIns(id);
+    emitCurIGsize += sz;
+
+#if !FEATURE_FIXED_OUT_ARGS
+
+    if (ins == INS_push)
+    {
+        emitCurStackLvl += emitCntStackDepth;
+
+        if (emitMaxStackDepth < emitCurStackLvl)
+            emitMaxStackDepth = emitCurStackLvl;
+    }
+    else if (ins == INS_pop)
+    {
+        emitCurStackLvl -= emitCntStackDepth;
+        assert((int)emitCurStackLvl >= 0);
+    }
+
+#endif // !FEATURE_FIXED_OUT_ARGS
+}
+
+void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs)
+{
+    instrDesc*     id  = emitNewInstr(attr);
+    UNATIVE_OFFSET sz  = emitInsSizeSV(insCodeMR(ins), varx, offs);
+    insFormat      fmt = emitInsModeFormat(ins, IF_SRD_RRD);
+
+    // 16-bit operand instructions will need a prefix
+    if (EA_SIZE(attr) == EA_2BYTE)
+    {
+        sz++;
+    }
+
+    // VEX prefix
+    sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins));
+
+    // 64-bit operand instructions will need a REX.W prefix
+    if (TakesRexWPrefix(ins, attr) || IsExtendedReg(ireg, attr))
+    {
+        sz += emitGetRexPrefixSize(ins);
+    }
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idReg1(ireg);
+    id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
+    id->idCodeSize(sz);
+#ifdef DEBUG
+    id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
+#endif
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
+void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs)
+{
+    emitAttr size = EA_SIZE(attr);
+    noway_assert(emitVerifyEncodable(ins, size, ireg));
+
+    instrDesc*     id  = emitNewInstr(attr);
+    UNATIVE_OFFSET sz  = emitInsSizeSV(insCodeRM(ins), varx, offs);
+    insFormat      fmt = emitInsModeFormat(ins, IF_RRD_SRD);
+
+    // Most 16-bit operand instructions need a prefix
+    if (size == EA_2BYTE && ins != INS_movsx && ins != INS_movzx)
+    {
+        sz++;
+    }
+
+    // VEX prefix
+    sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
+
+    // 64-bit operand instructions will need a REX.W prefix
+    if (TakesRexWPrefix(ins, attr) || IsExtendedReg(ireg, attr))
+    {
+        sz += emitGetRexPrefixSize(ins);
+    }
+
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    id->idReg1(ireg);
+    id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
+    id->idCodeSize(sz);
+#ifdef DEBUG
+    id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
+#endif
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
+void emitter::emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val)
+{
+#ifdef _TARGET_AMD64_
+    // mov reg, imm64 is the only opcode which takes a full 8 byte immediate
+    // all other opcodes take a sign-extended 4-byte immediate
+    noway_assert(EA_SIZE(attr) < EA_8BYTE || !EA_IS_CNS_RELOC(attr));
+#endif
+
+    insFormat fmt;
+
+    switch (ins)
+    {
+        case INS_rcl_N:
+        case INS_rcr_N:
+        case INS_rol_N:
+        case INS_ror_N:
+        case INS_shl_N:
+        case INS_shr_N:
+        case INS_sar_N:
+            assert(val != 1);
+            fmt = IF_SRW_SHF;
+            val &= 0x7F;
+            break;
+
+        default:
+            fmt = emitInsModeFormat(ins, IF_SRD_CNS);
+            break;
+    }
+
+    instrDesc* id = emitNewInstrCns(attr, val);
+    id->idIns(ins);
+    id->idInsFmt(fmt);
+    UNATIVE_OFFSET sz = emitInsSizeSV(id, varx, offs, val);
+
+    // VEX prefix
+    sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMI(ins));
+
+    // 64-bit operand instructions will need a REX.W prefix
+    if (TakesRexWPrefix(ins, attr))
+    {
+        sz += emitGetRexPrefixSize(ins);
+    }
+
+    id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
+    id->idCodeSize(sz);
+#ifdef DEBUG
+    id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
+#endif
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
+/*****************************************************************************
+ *
+ *  Record that a jump instruction uses the short encoding
+ *
+ */
+void emitter::emitSetShortJump(instrDescJmp* id)
+{
+    if (id->idjKeepLong)
+    {
+        return;
+    }
+
+    id->idjShort = true;
+}
+
+/*****************************************************************************
+ *
+ *  Add a jmp instruction.
+ */
+
+void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount /* = 0 */)
+{
+    UNATIVE_OFFSET sz;
+    instrDescJmp*  id = emitNewInstrJmp();
+
+    assert(dst->bbFlags & BBF_JMP_TARGET);
+
+    id->idIns(ins);
+    id->idInsFmt(IF_LABEL);
+    id->idAddr()->iiaBBlabel = dst;
+
+#ifdef DEBUG
+    // Mark the finally call
+    if (ins == INS_call && emitComp->compCurBB->bbJumpKind == BBJ_CALLFINALLY)
+    {
+        id->idDebugOnlyInfo()->idFinallyCall = true;
+    }
+#endif // DEBUG
+
+    /* Assume the jump will be long */
+
+    id->idjShort    = 0;
+    id->idjKeepLong = emitComp->fgInDifferentRegions(emitComp->compCurBB, dst);
+
+    /* Record the jump's IG and offset within it */
+
+    id->idjIG   = emitCurIG;
+    id->idjOffs = emitCurIGsize;
+
+    /* Append this jump to this IG's jump list */
+
+    id->idjNext      = emitCurIGjmpList;
+    emitCurIGjmpList = id;
+
+#if EMITTER_STATS
+    emitTotalIGjmps++;
+#endif
+
+    /* Figure out the max. size of the jump/call instruction */
+
+    if (ins == INS_call)
+    {
+        sz = CALL_INST_SIZE;
+    }
+    else if (ins == INS_push || ins == INS_push_hide)
+    {
+#if RELOC_SUPPORT
+        // Pushing the address of a basicBlock will need a reloc
+        // as the instruction uses the absolute address,
+        // not a relative address
+        if (emitComp->opts.compReloc)
+        {
+            id->idSetIsDspReloc();
+        }
+#endif
+        sz = PUSH_INST_SIZE;
+    }
+    else
+    {
+        insGroup* tgt;
+
+        /* This is a jump - assume the worst */
+
+        sz = (ins == INS_jmp) ? JMP_SIZE_LARGE : JCC_SIZE_LARGE;
+
+        /* Can we guess at the jump distance? */
+
+        tgt = (insGroup*)emitCodeGetCookie(dst);
+
+        if (tgt)
+        {
+            int            extra;
+            UNATIVE_OFFSET srcOffs;
+            int            jmpDist;
+
+            assert(JMP_SIZE_SMALL == JCC_SIZE_SMALL);
+
+            /* This is a backward jump - figure out the distance */
+
+            srcOffs = emitCurCodeOffset + emitCurIGsize + JMP_SIZE_SMALL;
+
+            /* Compute the distance estimate */
+
+            jmpDist = srcOffs - tgt->igOffs;
+            assert((int)jmpDist > 0);
+
+            /* How much beyond the max. short distance does the jump go? */
+
+            extra = jmpDist + JMP_DIST_SMALL_MAX_NEG;
+
+#if DEBUG_EMIT
+            if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+            {
+                if (INTERESTING_JUMP_NUM == 0)
+                {
+                    printf("[0] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
+                }
+                printf("[0] Jump source is at %08X\n", srcOffs);
+                printf("[0] Label block is at %08X\n", tgt->igOffs);
+                printf("[0] Jump  distance  - %04X\n", jmpDist);
+                if (extra > 0)
+                {
+                    printf("[0] Distance excess = %d  \n", extra);
+                }
+            }
+#endif
+
+            if (extra <= 0 && !id->idjKeepLong)
+            {
+                /* Wonderful - this jump surely will be short */
+
+                emitSetShortJump(id);
+                sz = JMP_SIZE_SMALL;
+            }
+        }
+#if DEBUG_EMIT
+        else
+        {
+            if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+            {
+                if (INTERESTING_JUMP_NUM == 0)
+                {
+                    printf("[0] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
+                }
+                printf("[0] Jump source is at %04X/%08X\n", emitCurIGsize,
+                       emitCurCodeOffset + emitCurIGsize + JMP_SIZE_SMALL);
+                printf("[0] Label block is unknown\n");
+            }
+        }
+#endif
+    }
+
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+
+#if !FEATURE_FIXED_OUT_ARGS
+
+    if (ins == INS_push)
+    {
+        emitCurStackLvl += emitCntStackDepth;
+
+        if (emitMaxStackDepth < emitCurStackLvl)
+            emitMaxStackDepth = emitCurStackLvl;
+    }
+
+#endif // !FEATURE_FIXED_OUT_ARGS
+}
+
+/*****************************************************************************
+ *
+ *  Add a call instruction (direct or indirect).
+ *      argSize<0 means that the caller will pop the arguments
+ *
+ * The other arguments are interpreted depending on callType as shown:
+ * Unless otherwise specified, ireg,xreg,xmul,disp should have default values.
+ *
+ * EC_FUNC_TOKEN       : addr is the method address
+ * EC_FUNC_TOKEN_INDIR : addr is the indirect method address
+ * EC_FUNC_ADDR        : addr is the absolute address of the function
+ * EC_FUNC_VIRTUAL     : "call [ireg+disp]"
+ *
+ * If callType is one of these emitCallTypes, addr has to be NULL.
+ * EC_INDIR_R          : "call ireg".
+ * EC_INDIR_SR         : "call lcl<disp>" (eg. call [ebp-8]).
+ * EC_INDIR_C          : "call clsVar<disp>" (eg. call [clsVarAddr])
+ * EC_INDIR_ARD        : "call [ireg+xreg*xmul+disp]"
+ *
+ */
+
+void emitter::emitIns_Call(EmitCallType          callType,
+                           CORINFO_METHOD_HANDLE methHnd,
+                           INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE
+                           void*    addr,
+                           ssize_t  argSize,
+                           emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
+                           VARSET_VALARG_TP ptrVars,
+                           regMaskTP        gcrefRegs,
+                           regMaskTP        byrefRegs,
+                           IL_OFFSETX       ilOffset, // = BAD_IL_OFFSET
+                           regNumber        ireg,     // = REG_NA
+                           regNumber        xreg,     // = REG_NA
+                           unsigned         xmul,     // = 0
+                           ssize_t          disp,     // = 0
+                           bool             isJump,   // = false
+                           bool             isNoGC)   // = false
+{
+    /* Sanity check the arguments depending on callType */
+
+    assert(callType < EC_COUNT);
+    assert((callType != EC_FUNC_TOKEN && callType != EC_FUNC_TOKEN_INDIR && callType != EC_FUNC_ADDR) ||
+           (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp == 0));
+    assert(callType != EC_FUNC_VIRTUAL || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0));
+    assert(callType < EC_INDIR_R || callType == EC_INDIR_ARD || callType == EC_INDIR_C || addr == nullptr);
+    assert(callType != EC_INDIR_R || (ireg < REG_COUNT && xreg == REG_NA && xmul == 0 && disp == 0));
+    assert(callType != EC_INDIR_SR ||
+           (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp < (int)emitComp->lvaCount));
+    assert(callType != EC_INDIR_C || (ireg == REG_NA && xreg == REG_NA && xmul == 0 && disp != 0));
+
+    // Our stack level should be always greater than the bytes of arguments we push. Just
+    // a sanity test.
+    assert((unsigned)abs((signed)argSize) <= codeGen->genStackLevel);
+
+#if STACK_PROBES
+    if (emitComp->opts.compNeedStackProbes)
+    {
+        // If we've pushed more than JIT_RESERVED_STACK allows, do an aditional stack probe
+        // Else, just make sure the prolog does a probe for us. Invariant we're trying
+        // to get is that at any point we go out to unmanaged code, there is at least
+        // CORINFO_STACKPROBE_DEPTH bytes of stack available.
+        //
+        // The reason why we are not doing one probe for the max size at the prolog
+        // is that when don't have the max depth precomputed (it can depend on codegen),
+        // and we need it at the time we generate locallocs
+        //
+        // Compiler::lvaAssignFrameOffsets sets up compLclFrameSize, which takes in
+        // account everything except for the arguments of a callee.
+        //
+        //
+        //
+        if ((sizeof(void*) + // return address for call
+             emitComp->genStackLevel +
+             // Current stack level. This gets resetted on every
+             // localloc and on the prolog (invariant is that
+             // genStackLevel is 0 on basic block entry and exit and
+             // after any alloca). genStackLevel will include any arguments
+             // to the call, so we will insert an aditional probe if
+             // we've consumed more than JIT_RESERVED_STACK bytes
+             // of stack, which is what the prolog probe covers (in
+             // addition to the EE requested size)
+             (emitComp->compHndBBtabCount * sizeof(void*))
+             // Hidden slots for calling finallys
+             ) >= JIT_RESERVED_STACK)
+        {
+            // This happens when you have a call with a lot of arguments or a call is done
+            // when there's a lot of stuff pushed on the stack (for example a call whos returned
+            // value is an argument of another call that has pushed stuff on the stack)
+            // This should't be very frequent.
+            // For different values of JIT_RESERVED_STACK
+            //
+            // For mscorlib (109605 calls)
+            //
+            // 14190 probes in prologs (56760 bytes of code)
+            //
+            // JIT_RESERVED_STACK = 16 : 5452 extra probes
+            // JIT_RESERVED_STACK = 32 : 1084 extra probes
+            // JIT_RESERVED_STACK = 64 :    1 extra probes
+            // JIT_RESERVED_STACK = 96 :    0 extra probes
+            emitComp->genGenerateStackProbe();
+        }
+        else
+        {
+            if (emitComp->compGeneratingProlog || emitComp->compGeneratingEpilog)
+            {
+                if (emitComp->compStackProbePrologDone)
+                {
+                    // We already generated a probe and this call is not happening
+                    // at a depth >= JIT_RESERVED_STACK, so nothing to do here
+                }
+                else
+                {
+                    // 3 possible ways to get here:
+                    // - We are in an epilog and haven't generated a probe in the prolog.
+                    //   This shouldn't happen as we don't generate any calls in epilog.
+                    // - We are in the prolog, but doing a call before generating the probe.
+                    //   This shouldn't happen at all.
+                    // - We are in the prolog, did not generate a probe but now we need
+                    //   to generate a probe because we need a call (eg: profiler). We'll
+                    //   need a probe.
+                    //
+                    // In any case, we need a probe
+
+                    // Ignore the profiler callback for now.
+                    if (!emitComp->compIsProfilerHookNeeded())
+                    {
+                        assert(!"We do not expect to get here");
+                        emitComp->genGenerateStackProbe();
+                    }
+                }
+            }
+            else
+            {
+                // We will need a probe and will generate it in the prolog
+                emitComp->genNeedPrologStackProbe = true;
+            }
+        }
+    }
+#endif // STACK_PROBES
+
+    int argCnt;
+
+    UNATIVE_OFFSET sz;
+    instrDesc*     id;
+
+    /* This is the saved set of registers after a normal call */
+    unsigned savedSet = RBM_CALLEE_SAVED;
+
+    /* some special helper calls have a different saved set registers */
+
+    if (isNoGC)
+    {
+        // Get the set of registers that this call kills and remove it from the saved set.
+        savedSet = RBM_ALLINT & ~emitComp->compNoGCHelperCallKillSet(Compiler::eeGetHelperNum(methHnd));
+    }
+    else
+    {
+        assert(!emitNoGChelper(Compiler::eeGetHelperNum(methHnd)));
+    }
+
+    /* Trim out any callee-trashed registers from the live set */
+
+    gcrefRegs &= savedSet;
+    byrefRegs &= savedSet;
+
+#ifdef DEBUG
+    if (EMIT_GC_VERBOSE)
+    {
+        printf("\t\t\t\t\t\t\tCall: GCvars=%s ", VarSetOps::ToString(emitComp, ptrVars));
+        dumpConvertedVarSet(emitComp, ptrVars);
+        printf(", gcrefRegs=");
+        printRegMaskInt(gcrefRegs);
+        emitDispRegSet(gcrefRegs);
+        printf(", byrefRegs=");
+        printRegMaskInt(byrefRegs);
+        emitDispRegSet(byrefRegs);
+        printf("\n");
+    }
+#endif
+
+    assert(argSize % sizeof(void*) == 0);
+    argCnt = (int)(argSize / (ssize_t)sizeof(void*)); // we need a signed-divide
+
+#ifdef DEBUGGING_SUPPORT
+    /* Managed RetVal: emit sequence point for the call */
+    if (emitComp->opts.compDbgInfo && ilOffset != BAD_IL_OFFSET)
+    {
+        codeGen->genIPmappingAdd(ilOffset, false);
+    }
+#endif
+
+    /*
+        We need to allocate the appropriate instruction descriptor based
+        on whether this is a direct/indirect call, and whether we need to
+        record an updated set of live GC variables.
+
+        The stats for a ton of classes is as follows:
+
+            Direct call w/o  GC vars        220,216
+            Indir. call w/o  GC vars        144,781
+
+            Direct call with GC vars          9,440
+            Indir. call with GC vars          5,768
+     */
+
+    if (callType >= EC_FUNC_VIRTUAL)
+    {
+        /* Indirect call, virtual calls */
+
+        assert(callType == EC_FUNC_VIRTUAL || callType == EC_INDIR_R || callType == EC_INDIR_SR ||
+               callType == EC_INDIR_C || callType == EC_INDIR_ARD);
+
+        id = emitNewInstrCallInd(argCnt, disp, ptrVars, gcrefRegs, byrefRegs,
+                                 retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize));
+    }
+    else
+    {
+        // Helper/static/nonvirtual/function calls (direct or through handle),
+        // and calls to an absolute addr.
+
+        assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_TOKEN_INDIR || callType == EC_FUNC_ADDR);
+
+        id = emitNewInstrCallDir(argCnt, ptrVars, gcrefRegs, byrefRegs,
+                                 retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize));
+    }
+
+    /* Update the emitter's live GC ref sets */
+
+    VarSetOps::Assign(emitComp, emitThisGCrefVars, ptrVars);
+    emitThisGCrefRegs = gcrefRegs;
+    emitThisByrefRegs = byrefRegs;
+
+    /* Set the instruction - special case jumping a function */
+    instruction ins = INS_call;
+
+    if (isJump)
+    {
+        assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_TOKEN_INDIR);
+        if (callType == EC_FUNC_TOKEN)
+        {
+            ins = INS_l_jmp;
+        }
+        else
+        {
+            ins = INS_i_jmp;
+        }
+    }
+    id->idIns(ins);
+
+    id->idSetIsNoGC(isNoGC);
+
+    // Record the address: method, indirection, or funcptr
+    if (callType >= EC_FUNC_VIRTUAL)
+    {
+        // This is an indirect call (either a virtual call or func ptr call)
+
+        switch (callType)
+        {
+            case EC_INDIR_C:
+                // Indirect call using an absolute code address.
+                // Must be marked as relocatable and is done at the
+                // branch target location.
+                goto CALL_ADDR_MODE;
+
+            case EC_INDIR_R: // the address is in a register
+
+                id->idSetIsCallRegPtr();
+
+                __fallthrough;
+
+            case EC_INDIR_ARD: // the address is an indirection
+
+                goto CALL_ADDR_MODE;
+
+            case EC_INDIR_SR: // the address is in a lcl var
+
+                id->idInsFmt(IF_SRD);
+                // disp is really a lclVarNum
+                noway_assert((unsigned)disp == (size_t)disp);
+                id->idAddr()->iiaLclVar.initLclVarAddr((unsigned)disp, 0);
+                sz = emitInsSizeSV(insCodeMR(INS_call), (unsigned)disp, 0);
+
+                break;
+
+            case EC_FUNC_VIRTUAL:
+
+            CALL_ADDR_MODE:
+
+                // fall-through
+
+                // The function is "ireg" if id->idIsCallRegPtr(),
+                // else [ireg+xmul*xreg+disp]
+
+                id->idInsFmt(IF_ARD);
+
+                id->idAddr()->iiaAddrMode.amBaseReg = ireg;
+                id->idAddr()->iiaAddrMode.amIndxReg = xreg;
+                id->idAddr()->iiaAddrMode.amScale   = xmul ? emitEncodeScale(xmul) : emitter::OPSZ1;
+
+                sz = emitInsSizeAM(id, insCodeMR(INS_call));
+
+                if (ireg == REG_NA && xreg == REG_NA)
+                {
+                    if (codeGen->genCodeIndirAddrNeedsReloc(disp))
+                    {
+                        id->idSetIsDspReloc();
+                    }
+#ifdef _TARGET_AMD64_
+                    else
+                    {
+                        // An absolute indir address that doesn't need reloc should fit within 32-bits
+                        // to be encoded as offset relative to zero.  This addr mode requires an extra
+                        // SIB byte
+                        noway_assert(static_cast<int>(reinterpret_cast<intptr_t>(addr)) == (size_t)addr);
+                        sz++;
+                    }
+#endif //_TARGET_AMD64_
+                }
+
+                break;
+
+            default:
+                NO_WAY("unexpected instruction");
+                break;
+        }
+    }
+    else if (callType == EC_FUNC_TOKEN_INDIR)
+    {
+        /* "call [method_addr]" */
+
+        assert(addr != nullptr);
+
+        id->idInsFmt(IF_METHPTR);
+        id->idAddr()->iiaAddr = (BYTE*)addr;
+        sz                    = 6;
+
+#if RELOC_SUPPORT
+        // Since this is an indirect call through a pointer and we don't
+        // currently pass in emitAttr into this function, we query codegen
+        // whether addr needs a reloc.
+        if (codeGen->genCodeIndirAddrNeedsReloc((size_t)addr))
+        {
+            id->idSetIsDspReloc();
+        }
+#ifdef _TARGET_AMD64_
+        else
+        {
+            // An absolute indir address that doesn't need reloc should fit within 32-bits
+            // to be encoded as offset relative to zero.  This addr mode requires an extra
+            // SIB byte
+            noway_assert(static_cast<int>(reinterpret_cast<intptr_t>(addr)) == (size_t)addr);
+            sz++;
+        }
+#endif //_TARGET_AMD64_
+#endif // RELOC_SUPPORT
+    }
+    else
+    {
+        /* This is a simple direct call: "call helper/method/addr" */
+
+        assert(callType == EC_FUNC_TOKEN || callType == EC_FUNC_ADDR);
+
+        assert(addr != nullptr);
+
+        id->idInsFmt(IF_METHOD);
+        sz = 5;
+
+        id->idAddr()->iiaAddr = (BYTE*)addr;
+
+        if (callType == EC_FUNC_ADDR)
+        {
+            id->idSetIsCallAddr();
+        }
+
+#if RELOC_SUPPORT
+        // Direct call to a method and no addr indirection is needed.
+        if (codeGen->genCodeAddrNeedsReloc((size_t)addr))
+        {
+            id->idSetIsDspReloc();
+        }
+#endif
+    }
+
+#ifdef DEBUG
+    if (emitComp->verbose && 0)
+    {
+        if (id->idIsLargeCall())
+        {
+            if (callType >= EC_FUNC_VIRTUAL)
+            {
+                printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum,
+                       VarSetOps::ToString(emitComp, ((instrDescCGCA*)id)->idcGCvars));
+            }
+            else
+            {
+                printf("[%02u] Rec call GC vars = %s\n", id->idDebugOnlyInfo()->idNum,
+                       VarSetOps::ToString(emitComp, ((instrDescCGCA*)id)->idcGCvars));
+            }
+        }
+    }
+#endif
+
+#if defined(DEBUG) || defined(LATE_DISASM)
+    id->idDebugOnlyInfo()->idMemCookie = (size_t)methHnd; // method token
+    id->idDebugOnlyInfo()->idClsCookie = nullptr;
+    id->idDebugOnlyInfo()->idCallSig   = sigInfo;
+#endif
+
+#if defined(LATE_DISASM)
+    if (addr != nullptr)
+    {
+        codeGen->getDisAssembler().disSetMethod((size_t)addr, methHnd);
+    }
+#endif // defined(LATE_DISASM)
+
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+
+#if !FEATURE_FIXED_OUT_ARGS
+
+    /* The call will pop the arguments */
+
+    if (emitCntStackDepth && argSize > 0)
+    {
+        noway_assert((ssize_t)emitCurStackLvl >= argSize);
+        emitCurStackLvl -= (int)argSize;
+        assert((int)emitCurStackLvl >= 0);
+    }
+
+#endif // !FEATURE_FIXED_OUT_ARGS
+}
+
+#ifdef DEBUG
+/*****************************************************************************
+ *
+ *  The following called for each recorded instruction -- use for debugging.
+ */
+void emitter::emitInsSanityCheck(instrDesc* id)
+{
+    // make certain you only try to put relocs on things that can have them.
+    ID_OPS idOp = (ID_OPS)emitFmtToOps[id->idInsFmt()];
+    if ((idOp == ID_OP_SCNS) && id->idIsLargeCns())
+    {
+        idOp = ID_OP_CNS;
+    }
+
+    if (!id->idIsTiny())
+    {
+        if (id->idIsDspReloc())
+        {
+            assert(idOp == ID_OP_NONE || idOp == ID_OP_AMD || idOp == ID_OP_DSP || idOp == ID_OP_DSP_CNS ||
+                   idOp == ID_OP_AMD_CNS || idOp == ID_OP_SPEC || idOp == ID_OP_CALL || idOp == ID_OP_JMP ||
+                   idOp == ID_OP_LBL);
+        }
+
+        if (id->idIsCnsReloc())
+        {
+            assert(idOp == ID_OP_CNS || idOp == ID_OP_AMD_CNS || idOp == ID_OP_DSP_CNS || idOp == ID_OP_SPEC ||
+                   idOp == ID_OP_CALL || idOp == ID_OP_JMP);
+        }
+    }
+}
+#endif
+
+/*****************************************************************************
+ *
+ *  Return the allocated size (in bytes) of the given instruction descriptor.
+ */
+
+size_t emitter::emitSizeOfInsDsc(instrDesc* id)
+{
+    if (emitIsTinyInsDsc(id))
+    {
+        return TINY_IDSC_SIZE;
+    }
+
+    if (emitIsScnsInsDsc(id))
+    {
+        return SMALL_IDSC_SIZE;
+    }
+
+    assert((unsigned)id->idInsFmt() < emitFmtCount);
+
+    ID_OPS idOp = (ID_OPS)emitFmtToOps[id->idInsFmt()];
+
+    // An INS_call instruction may use a "fat" direct/indirect call descriptor
+    // except for a local call to a label (i.e. call to a finally)
+    // Only ID_OP_CALL and ID_OP_SPEC check for this, so we enforce that the
+    //  INS_call instruction always uses one of these idOps
+
+    if (id->idIns() == INS_call)
+    {
+        assert(idOp == ID_OP_CALL || // is a direct   call
+               idOp == ID_OP_SPEC || // is a indirect call
+               idOp == ID_OP_JMP);   // is a local call to finally clause
+    }
+
+    switch (idOp)
+    {
+        case ID_OP_NONE:
+            break;
+
+        case ID_OP_LBL:
+            return sizeof(instrDescLbl);
+
+        case ID_OP_JMP:
+            return sizeof(instrDescJmp);
+
+        case ID_OP_CALL:
+        case ID_OP_SPEC:
+            if (id->idIsLargeCall())
+            {
+                /* Must be a "fat" indirect call descriptor */
+                return sizeof(instrDescCGCA);
+            }
+
+            __fallthrough;
+
+        case ID_OP_SCNS:
+        case ID_OP_CNS:
+        case ID_OP_DSP:
+        case ID_OP_DSP_CNS:
+        case ID_OP_AMD:
+        case ID_OP_AMD_CNS:
+            if (id->idIsLargeCns())
+            {
+                if (id->idIsLargeDsp())
+                {
+                    return sizeof(instrDescCnsDsp);
+                }
+                else
+                {
+                    return sizeof(instrDescCns);
+                }
+            }
+            else
+            {
+                if (id->idIsLargeDsp())
+                {
+                    return sizeof(instrDescDsp);
+                }
+                else
+                {
+                    return sizeof(instrDesc);
+                }
+            }
+
+        default:
+            NO_WAY("unexpected instruction descriptor format");
+            break;
+    }
+
+    return sizeof(instrDesc);
+}
+
+/*****************************************************************************/
+#ifdef DEBUG
+/*****************************************************************************
+ *
+ *  Return a string that represents the given register.
+ */
+
+const char* emitter::emitRegName(regNumber reg, emitAttr attr, bool varName)
+{
+    static char          rb[2][128];
+    static unsigned char rbc = 0;
+
+    const char* rn = emitComp->compRegVarName(reg, varName);
+
+#ifdef _TARGET_AMD64_
+    char suffix = '\0';
+
+    switch (EA_SIZE(attr))
+    {
+        case EA_32BYTE:
+            return emitYMMregName(reg);
+
+        case EA_16BYTE:
+            return emitXMMregName(reg);
+
+        case EA_8BYTE:
+            break;
+
+        case EA_4BYTE:
+            if (reg > REG_R15)
+            {
+                break;
+            }
+
+            if (reg > REG_RDI)
+            {
+                suffix = 'd';
+                goto APPEND_SUFFIX;
+            }
+            rbc        = (rbc + 1) % 2;
+            rb[rbc][0] = 'e';
+            rb[rbc][1] = rn[1];
+            rb[rbc][2] = rn[2];
+            rb[rbc][3] = 0;
+            rn         = rb[rbc];
+            break;
+
+        case EA_2BYTE:
+            if (reg > REG_RDI)
+            {
+                suffix = 'w';
+                goto APPEND_SUFFIX;
+            }
+            rn++;
+            break;
+
+        case EA_1BYTE:
+            if (reg > REG_RDI)
+            {
+                suffix = 'b';
+            APPEND_SUFFIX:
+                rbc        = (rbc + 1) % 2;
+                rb[rbc][0] = rn[0];
+                rb[rbc][1] = rn[1];
+                if (rn[2])
+                {
+                    assert(rn[3] == 0);
+                    rb[rbc][2] = rn[2];
+                    rb[rbc][3] = suffix;
+                    rb[rbc][4] = 0;
+                }
+                else
+                {
+                    rb[rbc][2] = suffix;
+                    rb[rbc][3] = 0;
+                }
+            }
+            else
+            {
+                rbc        = (rbc + 1) % 2;
+                rb[rbc][0] = rn[1];
+                if (reg < 4)
+                {
+                    rb[rbc][1] = 'l';
+                    rb[rbc][2] = 0;
+                }
+                else
+                {
+                    rb[rbc][1] = rn[2];
+                    rb[rbc][2] = 'l';
+                    rb[rbc][3] = 0;
+                }
+            }
+
+            rn = rb[rbc];
+            break;
+
+        default:
+            break;
+    }
+#endif // _TARGET_AMD64_
+
+#ifdef _TARGET_X86_
+    assert(strlen(rn) >= 3);
+
+    switch (EA_SIZE(attr))
+    {
+#ifndef LEGACY_BACKEND
+        case EA_32BYTE:
+            return emitYMMregName(reg);
+
+        case EA_16BYTE:
+            return emitXMMregName(reg);
+#endif // LEGACY_BACKEND
+
+        case EA_4BYTE:
+            break;
+
+        case EA_2BYTE:
+            rn++;
+            break;
+
+        case EA_1BYTE:
+            rbc        = (rbc + 1) % 2;
+            rb[rbc][0] = rn[1];
+            rb[rbc][1] = 'l';
+            strcpy_s(&rb[rbc][2], sizeof(rb[0]) - 2, rn + 3);
+
+            rn = rb[rbc];
+            break;
+
+        default:
+            break;
+    }
+#endif // _TARGET_X86_
+
+#if 0
+    // The following is useful if you want register names to be tagged with * or ^ representing gcref or byref, respectively,
+    // however it's possibly not interesting most of the time.
+    if (EA_IS_GCREF(attr) || EA_IS_BYREF(attr))
+    {
+        if (rn != rb[rbc])
+        {
+            rbc = (rbc+1)%2;
+            strcpy_s(rb[rbc], sizeof(rb[rbc]), rn);
+            rn = rb[rbc];
+        }
+
+        if (EA_IS_GCREF(attr))
+        {
+            strcat_s(rb[rbc], sizeof(rb[rbc]), "*");
+        }
+        else if (EA_IS_BYREF(attr))
+        {
+            strcat_s(rb[rbc], sizeof(rb[rbc]), "^");
+        }
+    }
+#endif // 0
+
+    return rn;
+}
+
+/*****************************************************************************
+ *
+ *  Return a string that represents the given FP register.
+ */
+
+const char* emitter::emitFPregName(unsigned reg, bool varName)
+{
+    assert(reg < REG_COUNT);
+
+    return emitComp->compFPregVarName((regNumber)(reg), varName);
+}
+
+/*****************************************************************************
+ *
+ *  Return a string that represents the given XMM register.
+ */
+
+const char* emitter::emitXMMregName(unsigned reg)
+{
+    static const char* const regNames[] = {
+#define REGDEF(name, rnum, mask, sname) "x" sname,
+#ifndef LEGACY_BACKEND
+#include "register.h"
+#else // LEGACY_BACKEND
+#include "registerxmm.h"
+#endif // LEGACY_BACKEND
+    };
+
+    assert(reg < REG_COUNT);
+    assert(reg < sizeof(regNames) / sizeof(regNames[0]));
+
+    return regNames[reg];
+}
+
+/*****************************************************************************
+ *
+ *  Return a string that represents the given YMM register.
+ */
+
+const char* emitter::emitYMMregName(unsigned reg)
+{
+    static const char* const regNames[] = {
+#define REGDEF(name, rnum, mask, sname) "y" sname,
+#ifndef LEGACY_BACKEND
+#include "register.h"
+#else // LEGACY_BACKEND
+#include "registerxmm.h"
+#endif // LEGACY_BACKEND
+    };
+
+    assert(reg < REG_COUNT);
+    assert(reg < sizeof(regNames) / sizeof(regNames[0]));
+
+    return regNames[reg];
+}
+
+/*****************************************************************************
+ *
+ *  Display a static data member reference.
+ */
+
+void emitter::emitDispClsVar(CORINFO_FIELD_HANDLE fldHnd, ssize_t offs, bool reloc /* = false */)
+{
+    int doffs;
+
+    /* Filter out the special case of fs:[offs] */
+
+    // Munge any pointers if we want diff-able disassembly
+    if (emitComp->opts.disDiffable)
+    {
+        ssize_t top12bits = (offs >> 20);
+        if ((top12bits != 0) && (top12bits != -1))
+        {
+            offs = 0xD1FFAB1E;
+        }
+    }
+
+    if (fldHnd == FLD_GLOBAL_FS)
+    {
+        printf("FS:[0x%04X]", offs);
+        return;
+    }
+
+    if (fldHnd == FLD_GLOBAL_DS)
+    {
+        printf("[0x%04X]", offs);
+        return;
+    }
+
+    printf("[");
+
+    doffs = Compiler::eeGetJitDataOffs(fldHnd);
+
+#ifdef RELOC_SUPPORT
+    if (reloc)
+    {
+        printf("reloc ");
+    }
+#endif
+
+    if (doffs >= 0)
+    {
+        if (doffs & 1)
+        {
+            printf("@CNS%02u", doffs - 1);
+        }
+        else
+        {
+            printf("@RWD%02u", doffs);
+        }
+
+        if (offs)
+        {
+            printf("%+Id", offs);
+        }
+    }
+    else
+    {
+        printf("classVar[%#x]", emitComp->dspPtr(fldHnd));
+
+        if (offs)
+        {
+            printf("%+Id", offs);
+        }
+    }
+
+    printf("]");
+
+    if (emitComp->opts.varNames && offs < 0)
+    {
+        printf("'%s", emitComp->eeGetFieldName(fldHnd));
+        if (offs)
+        {
+            printf("%+Id", offs);
+        }
+        printf("'");
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Display a stack frame reference.
+ */
+
+void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm)
+{
+    int  addr;
+    bool bEBP;
+
+    printf("[");
+
+    if (!asmfm || emitComp->lvaDoneFrameLayout == Compiler::NO_FRAME_LAYOUT)
+    {
+        if (varx < 0)
+        {
+            printf("TEMP_%02u", -varx);
+        }
+        else
+        {
+            printf("V%02u", +varx);
+        }
+
+        if (disp < 0)
+        {
+            printf("-0x%X", -disp);
+        }
+        else if (disp > 0)
+        {
+            printf("+0x%X", +disp);
+        }
+    }
+
+    if (emitComp->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT)
+    {
+        if (!asmfm)
+        {
+            printf(" ");
+        }
+
+        addr = emitComp->lvaFrameAddress(varx, &bEBP) + disp;
+
+        if (bEBP)
+        {
+            printf(STR_FPBASE);
+
+            if (addr < 0)
+            {
+                printf("-%02XH", -addr);
+            }
+            else if (addr > 0)
+            {
+                printf("+%02XH", addr);
+            }
+        }
+        else
+        {
+            /* Adjust the offset by amount currently pushed on the stack */
+
+            printf(STR_SPBASE);
+
+            if (addr < 0)
+            {
+                printf("-%02XH", -addr);
+            }
+            else if (addr > 0)
+            {
+                printf("+%02XH", addr);
+            }
+
+#if !FEATURE_FIXED_OUT_ARGS
+
+            if (emitCurStackLvl)
+                printf("+%02XH", emitCurStackLvl);
+
+#endif // !FEATURE_FIXED_OUT_ARGS
+        }
+    }
+
+    printf("]");
+
+    if (varx >= 0 && emitComp->opts.varNames)
+    {
+        LclVarDsc*  varDsc;
+        const char* varName;
+
+        assert((unsigned)varx < emitComp->lvaCount);
+        varDsc  = emitComp->lvaTable + varx;
+        varName = emitComp->compLocalVarName(varx, offs);
+
+        if (varName)
+        {
+            printf("'%s", varName);
+
+            if (disp < 0)
+            {
+                printf("-%d", -disp);
+            }
+            else if (disp > 0)
+            {
+                printf("+%d", +disp);
+            }
+
+            printf("'");
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Display an reloc value
+ *  If we are formatting for an assembly listing don't print the hex value
+ *  since it will prevent us from doing assembly diffs
+ */
+void emitter::emitDispReloc(ssize_t value)
+{
+    if (emitComp->opts.disAsm)
+    {
+        printf("(reloc)");
+    }
+    else
+    {
+        printf("(reloc 0x%Ix)", emitComp->dspPtr(value));
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Display an address mode.
+ */
+
+void emitter::emitDispAddrMode(instrDesc* id, bool noDetail)
+{
+    bool    nsep = false;
+    ssize_t disp;
+
+    unsigned     jtno = 0;
+    dataSection* jdsc = nullptr;
+
+    /* The displacement field is in an unusual place for calls */
+
+    disp = (id->idIns() == INS_call) ? emitGetInsCIdisp(id) : emitGetInsAmdAny(id);
+
+    /* Display a jump table label if this is a switch table jump */
+
+    if (id->idIns() == INS_i_jmp)
+    {
+        UNATIVE_OFFSET offs = 0;
+
+        /* Find the appropriate entry in the data section list */
+
+        for (jdsc = emitConsDsc.dsdList, jtno = 0; jdsc; jdsc = jdsc->dsNext)
+        {
+            UNATIVE_OFFSET size = jdsc->dsSize;
+
+            /* Is this a label table? */
+
+            if (size & 1)
+            {
+                size--;
+                jtno++;
+
+                if (offs == id->idDebugOnlyInfo()->idMemCookie)
+                {
+                    break;
+                }
+            }
+
+            offs += size;
+        }
+
+        /* If we've found a matching entry then is a table jump */
+
+        if (jdsc)
+        {
+#ifdef RELOC_SUPPORT
+            if (id->idIsDspReloc())
+            {
+                printf("reloc ");
+            }
+#endif
+            printf("J_M%03u_DS%02u", Compiler::s_compMethodsCount, id->idDebugOnlyInfo()->idMemCookie);
+        }
+
+        disp -= id->idDebugOnlyInfo()->idMemCookie;
+    }
+
+    bool frameRef = false;
+
+    printf("[");
+
+    if (id->idAddr()->iiaAddrMode.amBaseReg != REG_NA)
+    {
+        printf("%s", emitRegName(id->idAddr()->iiaAddrMode.amBaseReg));
+        nsep = true;
+        if (id->idAddr()->iiaAddrMode.amBaseReg == REG_ESP)
+        {
+            frameRef = true;
+        }
+        else if (emitComp->isFramePointerUsed() && id->idAddr()->iiaAddrMode.amBaseReg == REG_EBP)
+        {
+            frameRef = true;
+        }
+    }
+
+    if (id->idAddr()->iiaAddrMode.amIndxReg != REG_NA)
+    {
+        size_t scale = emitDecodeScale(id->idAddr()->iiaAddrMode.amScale);
+
+        if (nsep)
+        {
+            printf("+");
+        }
+        if (scale > 1)
+        {
+            printf("%u*", scale);
+        }
+        printf("%s", emitRegName(id->idAddr()->iiaAddrMode.amIndxReg));
+        nsep = true;
+    }
+
+#ifdef RELOC_SUPPORT
+    if ((id->idIsDspReloc()) && (id->idIns() != INS_i_jmp))
+    {
+        if (nsep)
+        {
+            printf("+");
+        }
+        emitDispReloc(disp);
+    }
+    else
+#endif
+    {
+        // Munge any pointers if we want diff-able disassembly
+        if (emitComp->opts.disDiffable)
+        {
+            ssize_t top12bits = (disp >> 20);
+            if ((top12bits != 0) && (top12bits != -1))
+            {
+                disp = 0xD1FFAB1E;
+            }
+        }
+
+        if (disp > 0)
+        {
+            if (nsep)
+            {
+                printf("+");
+            }
+            if (frameRef)
+            {
+                printf("%02XH", disp);
+            }
+            else if (disp < 1000)
+            {
+                printf("%d", disp);
+            }
+            else if (disp <= 0xFFFF)
+            {
+                printf("%04XH", disp);
+            }
+            else
+            {
+                printf("%08XH", disp);
+            }
+        }
+        else if (disp < 0)
+        {
+            if (frameRef)
+            {
+                printf("-%02XH", -disp);
+            }
+            else if (disp > -1000)
+            {
+                printf("-%d", -disp);
+            }
+            else if (disp >= -0xFFFF)
+            {
+                printf("-%04XH", -disp);
+            }
+            else if ((disp & 0x7F000000) != 0x7F000000)
+            {
+                printf("%08XH", disp);
+            }
+            else
+            {
+                printf("-%08XH", -disp);
+            }
+        }
+        else if (!nsep)
+        {
+            printf("%04XH", disp);
+        }
+    }
+
+    printf("]");
+
+    if (id->idDebugOnlyInfo()->idClsCookie)
+    {
+        if (id->idIns() == INS_call)
+        {
+            printf("%s", emitFncName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie));
+        }
+        else
+        {
+            printf("%s", emitFldName((CORINFO_FIELD_HANDLE)id->idDebugOnlyInfo()->idMemCookie));
+        }
+    }
+    // pretty print string if it looks like one
+    else if (id->idGCref() == GCT_GCREF && id->idIns() == INS_mov && id->idAddr()->iiaAddrMode.amBaseReg == REG_NA)
+    {
+        const wchar_t* str = emitComp->eeGetCPString(disp);
+        if (str != nullptr)
+        {
+            printf("      '%S'", str);
+        }
+    }
+
+    if (jdsc && !noDetail)
+    {
+        unsigned     cnt = (jdsc->dsSize - 1) / sizeof(void*);
+        BasicBlock** bbp = (BasicBlock**)jdsc->dsCont;
+
+#ifdef _TARGET_AMD64_
+#define SIZE_LETTER "Q"
+#else
+#define SIZE_LETTER "D"
+#endif
+        printf("\n\n    J_M%03u_DS%02u LABEL   " SIZE_LETTER "WORD", Compiler::s_compMethodsCount, jtno);
+
+        /* Display the label table (it's stored as "BasicBlock*" values) */
+
+        do
+        {
+            insGroup* lab;
+
+            /* Convert the BasicBlock* value to an IG address */
+
+            lab = (insGroup*)emitCodeGetCookie(*bbp++);
+            assert(lab);
+
+            printf("\n            D" SIZE_LETTER "      G_M%03u_IG%02u", Compiler::s_compMethodsCount, lab->igNum);
+        } while (--cnt);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  If the given instruction is a shift, display the 2nd operand.
+ */
+
+void emitter::emitDispShift(instruction ins, int cnt)
+{
+    switch (ins)
+    {
+        case INS_rcl_1:
+        case INS_rcr_1:
+        case INS_rol_1:
+        case INS_ror_1:
+        case INS_shl_1:
+        case INS_shr_1:
+        case INS_sar_1:
+            printf(", 1");
+            break;
+
+        case INS_rcl:
+        case INS_rcr:
+        case INS_rol:
+        case INS_ror:
+        case INS_shl:
+        case INS_shr:
+        case INS_sar:
+            printf(", cl");
+            break;
+
+        case INS_rcl_N:
+        case INS_rcr_N:
+        case INS_rol_N:
+        case INS_ror_N:
+        case INS_shl_N:
+        case INS_shr_N:
+        case INS_sar_N:
+            printf(", %d", cnt);
+            break;
+
+        default:
+            break;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Display (optionally) the bytes for the instruction encoding in hex
+ */
+
+void emitter::emitDispInsHex(BYTE* code, size_t sz)
+{
+    // We do not display the instruction hex if we want diff-able disassembly
+    if (!emitComp->opts.disDiffable)
+    {
+#ifdef _TARGET_AMD64_
+        // how many bytes per instruction we format for
+        const size_t digits = 10;
+#else // _TARGET_X86
+        const size_t digits = 6;
+#endif
+        printf(" ");
+        for (unsigned i = 0; i < sz; i++)
+        {
+            printf("%02X", (*((BYTE*)(code + i))));
+        }
+
+        if (sz < digits)
+        {
+            printf("%.*s", 2 * (digits - sz), "                         ");
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Display the given instruction.
+ */
+
+void emitter::emitDispIns(
+    instrDesc* id, bool isNew, bool doffs, bool asmfm, unsigned offset, BYTE* code, size_t sz, insGroup* ig)
+{
+    emitAttr    attr;
+    const char* sstr;
+
+    instruction ins = id->idIns();
+
+    if (emitComp->verbose)
+    {
+        unsigned idNum = id->idDebugOnlyInfo()->idNum;
+        printf("IN%04x: ", idNum);
+    }
+
+#ifdef RELOC_SUPPORT
+#define ID_INFO_DSP_RELOC ((bool)(id->idIsDspReloc()))
+#else
+#define ID_INFO_DSP_RELOC false
+#endif
+    /* Display a constant value if the instruction references one */
+
+    if (!isNew)
+    {
+        switch (id->idInsFmt())
+        {
+            int offs;
+
+            case IF_MRD_RRD:
+            case IF_MWR_RRD:
+            case IF_MRW_RRD:
+
+            case IF_RRD_MRD:
+            case IF_RWR_MRD:
+            case IF_RRW_MRD:
+
+            case IF_MRD_CNS:
+            case IF_MWR_CNS:
+            case IF_MRW_CNS:
+            case IF_MRW_SHF:
+
+            case IF_MRD:
+            case IF_MWR:
+            case IF_MRW:
+
+#if FEATURE_STACK_FP_X87
+
+            case IF_TRD_MRD:
+            case IF_TWR_MRD:
+            case IF_TRW_MRD:
+
+            // case IF_MRD_TRD:
+            // case IF_MRW_TRD:
+            case IF_MWR_TRD:
+
+#endif // FEATURE_STACK_FP_X87
+            case IF_MRD_OFF:
+
+                /* Is this actually a reference to a data section? */
+
+                offs = Compiler::eeGetJitDataOffs(id->idAddr()->iiaFieldHnd);
+
+                if (offs >= 0)
+                {
+                    void* addr;
+
+                    /* Display a data section reference */
+
+                    assert((unsigned)offs < emitConsDsc.dsdOffs);
+                    addr = emitConsBlock ? emitConsBlock + offs : nullptr;
+
+#if 0
+                // TODO-XArch-Cleanup: Fix or remove this code.
+                /* Is the operand an integer or floating-point value? */
+
+                bool isFP = false;
+
+                if  (CodeGen::instIsFP(id->idIns()))
+                {
+                    switch (id->idIns())
+                    {
+                    case INS_fild:
+                    case INS_fildl:
+                        break;
+
+                    default:
+                        isFP = true;
+                        break;
+                    }
+                }
+
+                if (offs & 1)
+                    printf("@CNS%02u", offs);
+                else
+                    printf("@RWD%02u", offs);
+
+                printf("      ");
+
+                if  (addr)
+                {
+                    addr = 0;
+                    // TODO-XArch-Bug?:
+                    //          This was busted by switching the order
+                    //          in which we output the code block vs.
+                    //          the data blocks -- when we get here,
+                    //          the data block has not been filled in
+                    //          yet, so we'll display garbage.
+
+                    if  (isFP)
+                    {
+                        if  (id->idOpSize() == EA_4BYTE)
+                            printf("DF      %f \n", addr ? *(float   *)addr : 0);
+                        else
+                            printf("DQ      %lf\n", addr ? *(double  *)addr : 0);
+                    }
+                    else
+                    {
+                        if  (id->idOpSize() <= EA_4BYTE)
+                            printf("DD      %d \n", addr ? *(int     *)addr : 0);
+                        else
+                            printf("DQ      %D \n", addr ? *(__int64 *)addr : 0);
+                    }
+                }
+#endif
+                }
+                break;
+
+            default:
+                break;
+        }
+    }
+
+    // printf("[F=%s] "   , emitIfName(id->idInsFmt()));
+    // printf("INS#%03u: ", id->idDebugOnlyInfo()->idNum);
+    // printf("[S=%02u] " , emitCurStackLvl); if (isNew) printf("[M=%02u] ", emitMaxStackDepth);
+    // printf("[S=%02u] " , emitCurStackLvl/sizeof(INT32));
+    // printf("[A=%08X] " , emitSimpleStkMask);
+    // printf("[A=%08X] " , emitSimpleByrefStkMask);
+    // printf("[L=%02u] " , id->idCodeSize());
+
+    if (!emitComp->opts.dspEmit && !isNew && !asmfm)
+    {
+        doffs = true;
+    }
+
+    /* Display the instruction offset */
+
+    emitDispInsOffs(offset, doffs);
+
+    if (code != nullptr)
+    {
+        /* Display the instruction hex code */
+
+        emitDispInsHex(code, sz);
+    }
+
+    /* Display the instruction name */
+
+    sstr = codeGen->genInsName(ins);
+#ifdef FEATURE_AVX_SUPPORT
+    if (IsAVXInstruction(ins))
+    {
+        printf(" v%-8s", sstr);
+    }
+    else
+#endif // FEATURE_AVX_SUPPORT
+    {
+        printf(" %-9s", sstr);
+    }
+#ifndef FEATURE_PAL
+    if (strnlen_s(sstr, 10) > 8)
+#else  // FEATURE_PAL
+    if (strnlen(sstr, 10) > 8)
+#endif // FEATURE_PAL
+    {
+        printf(" ");
+    }
+
+    /* By now the size better be set to something */
+
+    assert(emitInstCodeSz(id) || emitInstHasNoCode(ins));
+
+    /* Figure out the operand size */
+
+    if (id->idGCref() == GCT_GCREF)
+    {
+        attr = EA_GCREF;
+        sstr = "gword ptr ";
+    }
+    else if (id->idGCref() == GCT_BYREF)
+    {
+        attr = EA_BYREF;
+        sstr = "bword ptr ";
+    }
+    else
+    {
+        attr = id->idOpSize();
+        sstr = codeGen->genSizeStr(attr);
+
+        if (ins == INS_lea)
+        {
+#ifdef _TARGET_AMD64_
+            assert((attr == EA_4BYTE) || (attr == EA_8BYTE));
+#else
+            assert(attr == EA_4BYTE);
+#endif
+            sstr = "";
+        }
+    }
+
+    /* Now see what instruction format we've got */
+
+    // First print the implicit register usage
+    if (instrHasImplicitRegPairDest(ins))
+    {
+        printf("%s:%s, ", emitRegName(REG_EDX, id->idOpSize()), emitRegName(REG_EAX, id->idOpSize()));
+    }
+    else if (instrIs3opImul(ins))
+    {
+        regNumber tgtReg = inst3opImulReg(ins);
+        printf("%s, ", emitRegName(tgtReg, id->idOpSize()));
+    }
+
+    switch (id->idInsFmt())
+    {
+        ssize_t     val;
+        ssize_t     offs;
+        CnsVal      cnsVal;
+        const char* methodName;
+
+        case IF_CNS:
+            val = emitGetInsSC(id);
+#ifdef _TARGET_AMD64_
+            // no 8-byte immediates allowed here!
+            assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
+#endif
+#ifdef RELOC_SUPPORT
+            if (id->idIsCnsReloc())
+            {
+                emitDispReloc(val);
+            }
+            else
+#endif
+            {
+            PRINT_CONSTANT:
+                // Munge any pointers if we want diff-able disassembly
+                if (emitComp->opts.disDiffable)
+                {
+                    ssize_t top12bits = (val >> 20);
+                    if ((top12bits != 0) && (top12bits != -1))
+                    {
+                        val = 0xD1FFAB1E;
+                    }
+                }
+                if ((val > -1000) && (val < 1000))
+                {
+                    printf("%d", val);
+                }
+                else if ((val > 0) || ((val & 0x7F000000) != 0x7F000000))
+                {
+                    printf("0x%IX", val);
+                }
+                else
+                { // (val < 0)
+                    printf("-0x%IX", -val);
+                }
+            }
+            break;
+
+        case IF_ARD:
+        case IF_AWR:
+        case IF_ARW:
+
+#if FEATURE_STACK_FP_X87
+
+        case IF_TRD_ARD:
+        case IF_TWR_ARD:
+        case IF_TRW_ARD:
+
+        // case IF_ARD_TRD:
+        case IF_AWR_TRD:
+// case IF_ARW_TRD:
+
+#endif // FEATURE_STACK_FP_X87
+            if (ins == INS_call && id->idIsCallRegPtr())
+            {
+                printf("%s", emitRegName(id->idAddr()->iiaAddrMode.amBaseReg));
+                break;
+            }
+
+            printf("%s", sstr);
+            emitDispAddrMode(id, isNew);
+            emitDispShift(ins);
+
+            if (ins == INS_call)
+            {
+                assert(id->idInsFmt() == IF_ARD);
+
+                /* Ignore indirect calls */
+
+                if (id->idDebugOnlyInfo()->idMemCookie == 0)
+                {
+                    break;
+                }
+
+                assert(id->idDebugOnlyInfo()->idMemCookie);
+
+                /* This is a virtual call */
+
+                methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
+                printf("%s", methodName);
+            }
+            break;
+
+        case IF_RRD_ARD:
+        case IF_RWR_ARD:
+        case IF_RRW_ARD:
+            if (IsAVXInstruction(ins))
+            {
+                printf("%s, %s", emitYMMregName((unsigned)id->idReg1()), sstr);
+            }
+            else if (IsSSE2Instruction(ins))
+            {
+                printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), sstr);
+            }
+            else
+#ifdef _TARGET_AMD64_
+                if (ins == INS_movsxd)
+            {
+                printf("%s, %s", emitRegName(id->idReg1(), EA_8BYTE), sstr);
+            }
+            else
+#endif
+                if (ins == INS_movsx || ins == INS_movzx)
+            {
+                printf("%s, %s", emitRegName(id->idReg1(), EA_PTRSIZE), sstr);
+            }
+            else
+            {
+                printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
+            }
+            emitDispAddrMode(id);
+            break;
+
+        case IF_ARD_RRD:
+        case IF_AWR_RRD:
+        case IF_ARW_RRD:
+
+            printf("%s", sstr);
+            emitDispAddrMode(id);
+            if (IsAVXInstruction(ins))
+            {
+                printf(", %s", emitYMMregName((unsigned)id->idReg1()));
+            }
+            else if (IsSSE2Instruction(ins))
+            {
+                printf(", %s", emitXMMregName((unsigned)id->idReg1()));
+            }
+            else
+            {
+                printf(", %s", emitRegName(id->idReg1(), attr));
+            }
+            break;
+
+        case IF_ARD_CNS:
+        case IF_AWR_CNS:
+        case IF_ARW_CNS:
+        case IF_ARW_SHF:
+
+            printf("%s", sstr);
+            emitDispAddrMode(id);
+            emitGetInsAmdCns(id, &cnsVal);
+            val = cnsVal.cnsVal;
+#ifdef _TARGET_AMD64_
+            // no 8-byte immediates allowed here!
+            assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
+#endif
+            if (id->idInsFmt() == IF_ARW_SHF)
+            {
+                emitDispShift(ins, (BYTE)val);
+            }
+            else
+            {
+                printf(", ");
+#ifdef RELOC_SUPPORT
+                if (cnsVal.cnsReloc)
+                {
+                    emitDispReloc(val);
+                }
+                else
+#endif
+                {
+                    goto PRINT_CONSTANT;
+                }
+            }
+            break;
+
+        case IF_SRD:
+        case IF_SWR:
+        case IF_SRW:
+
+#if FEATURE_STACK_FP_X87
+        case IF_TRD_SRD:
+        case IF_TWR_SRD:
+        case IF_TRW_SRD:
+
+        // case IF_SRD_TRD:
+        // case IF_SRW_TRD:
+        case IF_SWR_TRD:
+
+#endif // FEATURE_STACK_FP_X87
+
+            printf("%s", sstr);
+
+#if !FEATURE_FIXED_OUT_ARGS
+            if (ins == INS_pop)
+                emitCurStackLvl -= sizeof(int);
+#endif
+
+            emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
+                             id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
+
+#if !FEATURE_FIXED_OUT_ARGS
+            if (ins == INS_pop)
+                emitCurStackLvl += sizeof(int);
+#endif
+
+            emitDispShift(ins);
+            break;
+
+        case IF_SRD_RRD:
+        case IF_SWR_RRD:
+        case IF_SRW_RRD:
+
+            printf("%s", sstr);
+
+            emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
+                             id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
+
+            if (IsAVXInstruction(ins))
+            {
+                printf(", %s", emitYMMregName((unsigned)id->idReg1()));
+            }
+            else if (IsSSE2Instruction(ins))
+            {
+                printf(", %s", emitXMMregName((unsigned)id->idReg1()));
+            }
+            else
+            {
+                printf(", %s", emitRegName(id->idReg1(), attr));
+            }
+            break;
+
+        case IF_SRD_CNS:
+        case IF_SWR_CNS:
+        case IF_SRW_CNS:
+        case IF_SRW_SHF:
+
+            printf("%s", sstr);
+
+            emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
+                             id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
+
+            emitGetInsCns(id, &cnsVal);
+            val = cnsVal.cnsVal;
+#ifdef _TARGET_AMD64_
+            // no 8-byte immediates allowed here!
+            assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
+#endif
+            if (id->idInsFmt() == IF_SRW_SHF)
+            {
+                emitDispShift(ins, (BYTE)val);
+            }
+            else
+            {
+                printf(", ");
+#ifdef RELOC_SUPPORT
+                if (cnsVal.cnsReloc)
+                {
+                    emitDispReloc(val);
+                }
+                else
+#endif
+                {
+                    goto PRINT_CONSTANT;
+                }
+            }
+            break;
+
+        case IF_RRD_SRD:
+        case IF_RWR_SRD:
+        case IF_RRW_SRD:
+            if (IsAVXInstruction(ins))
+            {
+                printf("%s, %s", emitYMMregName((unsigned)id->idReg1()), sstr);
+            }
+            else if (IsSSE2Instruction(ins))
+            {
+                printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), sstr);
+            }
+            else
+#ifdef _TARGET_AMD64_
+                if (ins == INS_movsxd)
+            {
+                printf("%s, %s", emitRegName(id->idReg1(), EA_8BYTE), sstr);
+            }
+            else
+#endif
+                if (ins == INS_movsx || ins == INS_movzx)
+            {
+                printf("%s, %s", emitRegName(id->idReg1(), EA_PTRSIZE), sstr);
+            }
+            else
+            {
+                printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
+            }
+
+            emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
+                             id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
+
+            break;
+
+        case IF_RRD_RRD:
+        case IF_RWR_RRD:
+        case IF_RRW_RRD:
+
+            if (ins == INS_mov_i2xmm)
+            {
+                printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), emitRegName(id->idReg2(), attr));
+            }
+            else if (ins == INS_mov_xmm2i)
+            {
+                printf("%s, %s", emitRegName(id->idReg2(), attr), emitXMMregName((unsigned)id->idReg1()));
+            }
+#ifndef LEGACY_BACKEND
+            else if ((ins == INS_cvtsi2ss) || (ins == INS_cvtsi2sd))
+            {
+                printf(" %s, %s", emitXMMregName((unsigned)id->idReg1()), emitRegName(id->idReg2(), attr));
+            }
+#endif
+            else if ((ins == INS_cvttsd2si)
+#ifndef LEGACY_BACKEND
+                     || (ins == INS_cvtss2si) || (ins == INS_cvtsd2si) || (ins == INS_cvttss2si)
+#endif
+                         )
+            {
+                printf(" %s, %s", emitRegName(id->idReg1(), attr), emitXMMregName((unsigned)id->idReg2()));
+            }
+            else if (IsAVXInstruction(ins))
+            {
+                printf("%s, %s", emitYMMregName((unsigned)id->idReg1()), emitYMMregName((unsigned)id->idReg2()));
+            }
+            else if (IsSSE2Instruction(ins))
+            {
+                printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), emitXMMregName((unsigned)id->idReg2()));
+            }
+#ifdef _TARGET_AMD64_
+            else if (ins == INS_movsxd)
+            {
+                printf("%s, %s", emitRegName(id->idReg1(), EA_8BYTE), emitRegName(id->idReg2(), EA_4BYTE));
+            }
+#endif // _TARGET_AMD64_
+            else if (ins == INS_movsx || ins == INS_movzx)
+            {
+                printf("%s, %s", emitRegName(id->idReg1(), EA_PTRSIZE), emitRegName(id->idReg2(), attr));
+            }
+            else
+            {
+                printf("%s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr));
+            }
+            break;
+
+        case IF_RRW_RRW:
+            assert(ins == INS_xchg);
+            printf("%s,", emitRegName(id->idReg1(), attr));
+            printf(" %s", emitRegName(id->idReg2(), attr));
+            break;
+
+#ifdef FEATURE_AVX_SUPPORT
+        case IF_RWR_RRD_RRD:
+            assert(IsAVXInstruction(ins));
+            assert(IsThreeOperandAVXInstruction(ins));
+            printf("%s, ", emitRegName(id->idReg1(), attr));
+            printf("%s, ", emitRegName(id->idReg2(), attr));
+            printf("%s", emitRegName(id->idReg3(), attr));
+            break;
+#endif
+        case IF_RRW_RRW_CNS:
+            if (IsAVXInstruction(ins))
+            {
+                printf("%s,", emitYMMregName((unsigned)id->idReg1()), attr);
+                printf(" %s", emitYMMregName((unsigned)id->idReg2()), attr);
+            }
+            else
+            {
+                printf("%s,", emitRegName(id->idReg1(), attr));
+                printf(" %s", emitRegName(id->idReg2(), attr));
+            }
+            val = emitGetInsSC(id);
+#ifdef _TARGET_AMD64_
+            // no 8-byte immediates allowed here!
+            assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
+#endif
+            printf(", ");
+#ifdef RELOC_SUPPORT
+            if (id->idIsCnsReloc())
+            {
+                emitDispReloc(val);
+            }
+            else
+#endif
+            {
+                goto PRINT_CONSTANT;
+            }
+            break;
+
+        case IF_RRD:
+        case IF_RWR:
+        case IF_RRW:
+            printf("%s", emitRegName(id->idReg1(), attr));
+            emitDispShift(ins);
+            break;
+
+        case IF_RRW_SHF:
+            printf("%s", emitRegName(id->idReg1(), attr));
+            emitDispShift(ins, (BYTE)emitGetInsSC(id));
+            break;
+
+        case IF_RRD_MRD:
+        case IF_RWR_MRD:
+        case IF_RRW_MRD:
+
+            if (ins == INS_movsx || ins == INS_movzx)
+            {
+                attr = EA_PTRSIZE;
+            }
+#ifdef _TARGET_AMD64_
+            else if (ins == INS_movsxd)
+            {
+                attr = EA_PTRSIZE;
+            }
+#endif
+            if (IsAVXInstruction(ins))
+            {
+                printf("%s, %s", emitYMMregName((unsigned)id->idReg1()), sstr);
+            }
+            else if (IsSSE2Instruction(ins))
+            {
+                printf("%s, %s", emitXMMregName((unsigned)id->idReg1()), sstr);
+            }
+            else
+            {
+                printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
+            }
+            offs = emitGetInsDsp(id);
+            emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
+            break;
+
+        case IF_RWR_MRD_OFF:
+
+            printf("%s, %s", emitRegName(id->idReg1(), attr), "offset");
+            offs = emitGetInsDsp(id);
+            emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
+            break;
+
+        case IF_MRD_RRD:
+        case IF_MWR_RRD:
+        case IF_MRW_RRD:
+
+            printf("%s", sstr);
+            offs = emitGetInsDsp(id);
+            emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
+            printf(", %s", emitRegName(id->idReg1(), attr));
+            break;
+
+        case IF_MRD_CNS:
+        case IF_MWR_CNS:
+        case IF_MRW_CNS:
+        case IF_MRW_SHF:
+
+            printf("%s", sstr);
+            offs = emitGetInsDsp(id);
+            emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
+            emitGetInsDcmCns(id, &cnsVal);
+            val = cnsVal.cnsVal;
+#ifdef _TARGET_AMD64_
+            // no 8-byte immediates allowed here!
+            assert((val >= 0xFFFFFFFF80000000LL) && (val <= 0x000000007FFFFFFFLL));
+#endif
+#ifdef RELOC_SUPPORT
+            if (cnsVal.cnsReloc)
+            {
+                emitDispReloc(val);
+            }
+            else
+#endif
+                if (id->idInsFmt() == IF_MRW_SHF)
+            {
+                emitDispShift(ins, (BYTE)val);
+            }
+            else
+            {
+                printf(", ");
+                goto PRINT_CONSTANT;
+            }
+            break;
+
+        case IF_MRD:
+        case IF_MWR:
+        case IF_MRW:
+
+#if FEATURE_STACK_FP_X87
+
+        case IF_TRD_MRD:
+        case IF_TWR_MRD:
+        case IF_TRW_MRD:
+
+        // case IF_MRD_TRD:
+        // case IF_MRW_TRD:
+        case IF_MWR_TRD:
+
+#endif // FEATURE_STACK_FP_X87
+
+            printf("%s", sstr);
+            offs = emitGetInsDsp(id);
+            emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
+            emitDispShift(ins);
+            break;
+
+        case IF_MRD_OFF:
+
+            printf("offset ");
+            offs = emitGetInsDsp(id);
+            emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
+            break;
+
+        case IF_RRD_CNS:
+        case IF_RWR_CNS:
+        case IF_RRW_CNS:
+            printf("%s, ", emitRegName(id->idReg1(), attr));
+            val = emitGetInsSC(id);
+#ifdef RELOC_SUPPORT
+            if (id->idIsCnsReloc())
+            {
+                emitDispReloc(val);
+            }
+            else
+#endif
+            {
+                goto PRINT_CONSTANT;
+            }
+            break;
+
+#if FEATURE_STACK_FP_X87
+        case IF_TRD_FRD:
+        case IF_TWR_FRD:
+        case IF_TRW_FRD:
+            switch (ins)
+            {
+                case INS_fld:
+                case INS_fxch:
+                    break;
+
+                default:
+                    printf("%s, ", emitFPregName(0));
+                    break;
+            }
+            printf("%s", emitFPregName((unsigned)id->idReg1()));
+            break;
+
+        case IF_FRD_TRD:
+        case IF_FWR_TRD:
+        case IF_FRW_TRD:
+            printf("%s", emitFPregName((unsigned)id->idReg1()));
+            if (ins != INS_fst && ins != INS_fstp)
+                printf(", %s", emitFPregName(0));
+            break;
+#endif // FEATURE_STACK_FP_X87
+
+        case IF_LABEL:
+        case IF_RWR_LABEL:
+        case IF_SWR_LABEL:
+
+            if (ins == INS_lea)
+            {
+                printf("%s, ", emitRegName(id->idReg1(), attr));
+            }
+            else if (ins == INS_mov)
+            {
+                /* mov   dword ptr [frame.callSiteReturnAddress], label */
+                assert(id->idInsFmt() == IF_SWR_LABEL);
+                instrDescLbl* idlbl = (instrDescLbl*)id;
+
+                emitDispFrameRef(idlbl->dstLclVar.lvaVarNum(), idlbl->dstLclVar.lvaOffset(), 0, asmfm);
+
+                printf(", ");
+            }
+
+            if (((instrDescJmp*)id)->idjShort)
+            {
+                printf("SHORT ");
+            }
+
+            if (id->idIsBound())
+            {
+                printf("G_M%03u_IG%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaIGlabel->igNum);
+            }
+            else
+            {
+                printf("L_M%03u_BB%02u", Compiler::s_compMethodsCount, id->idAddr()->iiaBBlabel->bbNum);
+            }
+            break;
+
+        case IF_METHOD:
+        case IF_METHPTR:
+            if (id->idIsCallAddr())
+            {
+                offs       = (ssize_t)id->idAddr()->iiaAddr;
+                methodName = "";
+            }
+            else
+            {
+                offs       = 0;
+                methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
+            }
+
+            if (id->idInsFmt() == IF_METHPTR)
+            {
+                printf("[");
+            }
+
+            if (offs)
+            {
+                if (id->idIsDspReloc())
+                {
+                    printf("reloc ");
+                }
+                printf("%08X", offs);
+            }
+            else
+            {
+                printf("%s", methodName);
+            }
+
+            if (id->idInsFmt() == IF_METHPTR)
+            {
+                printf("]");
+            }
+
+            break;
+
+#if FEATURE_STACK_FP_X87
+        case IF_TRD:
+        case IF_TWR:
+        case IF_TRW:
+#endif // FEATURE_STACK_FP_X87
+        case IF_NONE:
+            break;
+
+        default:
+            printf("unexpected format %s", emitIfName(id->idInsFmt()));
+            assert(!"unexpectedFormat");
+            break;
+    }
+
+    if (sz != 0 && sz != id->idCodeSize() && (!asmfm || emitComp->verbose))
+    {
+        // Code size in the instrDesc is different from the actual code size we've been given!
+        printf(" (ECS:%d, ACS:%d)", id->idCodeSize(), sz);
+    }
+
+    printf("\n");
+}
+
+/*****************************************************************************/
+#endif
+
+/*****************************************************************************
+ *
+ *  Output nBytes bytes of NOP instructions
+ */
+
+static BYTE* emitOutputNOP(BYTE* dst, size_t nBytes)
+{
+    assert(nBytes <= 15);
+
+#ifndef _TARGET_AMD64_
+    // TODO-X86-CQ: when VIA C3 CPU's are out of circulation, switch to the
+    // more efficient real NOP: 0x0F 0x1F +modR/M
+    // Also can't use AMD recommended, multiple size prefixes (i.e. 0x66 0x66 0x90 for 3 byte NOP)
+    // because debugger and msdis don't like it, so maybe VIA doesn't either
+    // So instead just stick to repeating single byte nops
+
+    switch (nBytes)
+    {
+        case 15:
+            *dst++ = 0x90;
+            __fallthrough;
+        case 14:
+            *dst++ = 0x90;
+            __fallthrough;
+        case 13:
+            *dst++ = 0x90;
+            __fallthrough;
+        case 12:
+            *dst++ = 0x90;
+            __fallthrough;
+        case 11:
+            *dst++ = 0x90;
+            __fallthrough;
+        case 10:
+            *dst++ = 0x90;
+            __fallthrough;
+        case 9:
+            *dst++ = 0x90;
+            __fallthrough;
+        case 8:
+            *dst++ = 0x90;
+            __fallthrough;
+        case 7:
+            *dst++ = 0x90;
+            __fallthrough;
+        case 6:
+            *dst++ = 0x90;
+            __fallthrough;
+        case 5:
+            *dst++ = 0x90;
+            __fallthrough;
+        case 4:
+            *dst++ = 0x90;
+            __fallthrough;
+        case 3:
+            *dst++ = 0x90;
+            __fallthrough;
+        case 2:
+            *dst++ = 0x90;
+            __fallthrough;
+        case 1:
+            *dst++ = 0x90;
+            break;
+        case 0:
+            break;
+    }
+#else  // _TARGET_AMD64_
+    switch (nBytes)
+    {
+        case 2:
+            *dst++ = 0x66;
+            __fallthrough;
+        case 1:
+            *dst++ = 0x90;
+            break;
+        case 0:
+            break;
+        case 3:
+            *dst++ = 0x0F;
+            *dst++ = 0x1F;
+            *dst++ = 0x00;
+            break;
+        case 4:
+            *dst++ = 0x0F;
+            *dst++ = 0x1F;
+            *dst++ = 0x40;
+            *dst++ = 0x00;
+            break;
+        case 6:
+            *dst++ = 0x66;
+            __fallthrough;
+        case 5:
+            *dst++ = 0x0F;
+            *dst++ = 0x1F;
+            *dst++ = 0x44;
+            *dst++ = 0x00;
+            *dst++ = 0x00;
+            break;
+        case 7:
+            *dst++ = 0x0F;
+            *dst++ = 0x1F;
+            *dst++ = 0x80;
+            *dst++ = 0x00;
+            *dst++ = 0x00;
+            *dst++ = 0x00;
+            *dst++ = 0x00;
+            break;
+        case 15:
+            // More than 3 prefixes is slower than just 2 NOPs
+            dst = emitOutputNOP(emitOutputNOP(dst, 7), 8);
+            break;
+        case 14:
+            // More than 3 prefixes is slower than just 2 NOPs
+            dst = emitOutputNOP(emitOutputNOP(dst, 7), 7);
+            break;
+        case 13:
+            // More than 3 prefixes is slower than just 2 NOPs
+            dst = emitOutputNOP(emitOutputNOP(dst, 5), 8);
+            break;
+        case 12:
+            // More than 3 prefixes is slower than just 2 NOPs
+            dst = emitOutputNOP(emitOutputNOP(dst, 4), 8);
+            break;
+        case 11:
+            *dst++ = 0x66;
+            __fallthrough;
+        case 10:
+            *dst++ = 0x66;
+            __fallthrough;
+        case 9:
+            *dst++ = 0x66;
+            __fallthrough;
+        case 8:
+            *dst++ = 0x0F;
+            *dst++ = 0x1F;
+            *dst++ = 0x84;
+            *dst++ = 0x00;
+            *dst++ = 0x00;
+            *dst++ = 0x00;
+            *dst++ = 0x00;
+            *dst++ = 0x00;
+            break;
+    }
+#endif // _TARGET_AMD64_
+
+    return dst;
+}
+
+/*****************************************************************************
+ *
+ *  Output an instruction involving an address mode.
+ */
+
+BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc)
+{
+    regNumber reg;
+    regNumber rgx;
+    ssize_t   dsp;
+    bool      dspInByte;
+    bool      dspIsZero;
+
+    instruction ins  = id->idIns();
+    emitAttr    size = id->idOpSize();
+    size_t      opsz = EA_SIZE_IN_BYTES(size);
+
+    // Get the base/index registers
+    reg = id->idAddr()->iiaAddrMode.amBaseReg;
+    rgx = id->idAddr()->iiaAddrMode.amIndxReg;
+
+    // For INS_call the instruction size is actually the return value size
+    if (ins == INS_call)
+    {
+        // Special case: call via a register
+        if (id->idIsCallRegPtr())
+        {
+            size_t opcode = insEncodeMRreg(INS_call, reg, EA_PTRSIZE, insCodeMR(INS_call));
+
+            dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, opcode);
+            dst += emitOutputWord(dst, opcode);
+            goto DONE;
+        }
+
+        // The displacement field is in an unusual place for calls
+        dsp = emitGetInsCIdisp(id);
+
+#ifdef _TARGET_AMD64_
+
+        // Compute the REX prefix if it exists
+        if (IsExtendedReg(reg, EA_PTRSIZE))
+        {
+            insEncodeReg012(ins, reg, EA_PTRSIZE, &code);
+            reg = RegEncoding(reg);
+        }
+
+        if (IsExtendedReg(rgx, EA_PTRSIZE))
+        {
+            insEncodeRegSIB(ins, rgx, &code);
+            rgx = RegEncoding(rgx);
+        }
+
+        // And emit the REX prefix
+        dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+#endif // _TARGET_AMD64_
+
+        goto GOT_DSP;
+    }
+
+    // Is there a large constant operand?
+    if (addc && (size > EA_1BYTE))
+    {
+        ssize_t cval = addc->cnsVal;
+
+        // Does the constant fit in a byte?
+        if ((signed char)cval == cval &&
+#ifdef RELOC_SUPPORT
+            addc->cnsReloc == false &&
+#endif
+            ins != INS_mov && ins != INS_test)
+        {
+            if (id->idInsFmt() != IF_ARW_SHF)
+            {
+                code |= 2;
+            }
+
+            opsz = 1;
+        }
+    }
+
+    // Emit VEX prefix if required
+    // There are some callers who already add VEX prefix and call this routine.
+    // Therefore, add VEX prefix is one is not already present.
+    code = AddVexPrefixIfNeededAndNotPresent(ins, code, size);
+
+    // For this format, moves do not support a third operand, so we only need to handle the binary ops.
+    if (IsThreeOperandBinaryAVXInstruction(ins))
+    {
+        // Encode source operand reg in 'vvvv' bits in 1's compliement form
+        // The order of operands are reversed, therefore use reg2 as the source.
+        code = insEncodeReg3456(ins, id->idReg1(), size, code);
+    }
+
+    // Emit the REX prefix if required
+    if (TakesRexWPrefix(ins, size))
+    {
+        code = AddRexWPrefix(ins, code);
+    }
+
+    if (IsExtendedReg(reg, EA_PTRSIZE))
+    {
+        insEncodeReg012(ins, reg, EA_PTRSIZE, &code);
+        reg = RegEncoding(reg);
+    }
+
+    if (IsExtendedReg(rgx, EA_PTRSIZE))
+    {
+        insEncodeRegSIB(ins, rgx, &code);
+        rgx = RegEncoding(rgx);
+    }
+
+    // Is this a 'big' opcode?
+    if (code & 0xFF000000)
+    {
+        // Output the REX prefix
+        dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+        // Output the highest word of the opcode
+        // We need to check again as in case of AVX instructions leading opcode bytes are stripped off
+        // and encoded as part of VEX prefix.
+        if (code & 0xFF000000)
+        {
+            dst += emitOutputWord(dst, code >> 16);
+            code &= 0x0000FFFF;
+        }
+    }
+    else if (code & 0x00FF0000)
+    {
+        // Output the REX prefix
+        dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+        // Output the highest byte of the opcode
+        if (code & 0x00FF0000)
+        {
+            dst += emitOutputByte(dst, code >> 16);
+            code &= 0x0000FFFF;
+        }
+
+        // Use the large version if this is not a byte. This trick will not
+        // work in case of SSE2 and AVX instructions.
+        if ((size != EA_1BYTE) && (ins != INS_imul) && !IsSSE2Instruction(ins) && !IsAVXInstruction(ins))
+        {
+            code++;
+        }
+    }
+    else if (CodeGen::instIsFP(ins))
+    {
+#if FEATURE_STACK_FP_X87
+        assert(size == EA_4BYTE || size == EA_8BYTE || ins == INS_fldcw || ins == INS_fnstcw);
+#else  // !FEATURE_STACK_FP_X87
+        assert(size == EA_4BYTE || size == EA_8BYTE);
+#endif // ! FEATURE_STACK_FP_X87
+
+        if (size == EA_8BYTE)
+        {
+            code += 4;
+        }
+    }
+    else if (!IsSSE2Instruction(ins) && !IsAVXInstruction(ins))
+    {
+        /* Is the operand size larger than a byte? */
+
+        switch (size)
+        {
+            case EA_1BYTE:
+                break;
+
+            case EA_2BYTE:
+
+                /* Output a size prefix for a 16-bit operand */
+
+                dst += emitOutputByte(dst, 0x66);
+
+                __fallthrough;
+
+            case EA_4BYTE:
+#ifdef _TARGET_AMD64_
+            case EA_8BYTE:
+#endif
+
+                /* Set the 'w' bit to get the large version */
+
+                code |= 0x1;
+                break;
+
+#ifdef _TARGET_X86_
+            case EA_8BYTE:
+
+                /* Double operand - set the appropriate bit */
+
+                code |= 0x04;
+                break;
+
+#endif // _TARGET_X86_
+
+            default:
+                NO_WAY("unexpected size");
+                break;
+        }
+    }
+
+    // Output the REX prefix
+    dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+    // Get the displacement value
+    dsp = emitGetInsAmdAny(id);
+
+GOT_DSP:
+
+    dspInByte = ((signed char)dsp == (ssize_t)dsp);
+    dspIsZero = (dsp == 0);
+
+#ifdef RELOC_SUPPORT
+    if (id->idIsDspReloc())
+    {
+        dspInByte = false; // relocs can't be placed in a byte
+    }
+#endif
+
+    // Is there a [scaled] index component?
+    if (rgx == REG_NA)
+    {
+        // The address is of the form "[reg+disp]"
+        switch (reg)
+        {
+            case REG_NA:
+                if (id->idIsDspReloc())
+                {
+                    INT32 addlDelta = 0;
+
+                    // The address is of the form "[disp]"
+                    // On x86 - disp is relative to zero
+                    // On Amd64 - disp is relative to RIP
+                    dst += emitOutputWord(dst, code | 0x0500);
+
+                    if (addc)
+                    {
+                        // It is of the form "ins [disp], immed"
+                        // For emitting relocation, we also need to take into account of the
+                        // additional bytes of code emitted for immed val.
+
+                        ssize_t cval = addc->cnsVal;
+
+#ifdef _TARGET_AMD64_
+                        // all these opcodes only take a sign-extended 4-byte immediate
+                        noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
+#else
+                        noway_assert(opsz <= 4);
+#endif
+
+                        switch (opsz)
+                        {
+                            case 0:
+                            case 4:
+                            case 8:
+                                addlDelta = -4;
+                                break;
+                            case 2:
+                                addlDelta = -2;
+                                break;
+                            case 1:
+                                addlDelta = -1;
+                                break;
+
+                            default:
+                                assert(!"unexpected operand size");
+                                unreached();
+                        }
+                    }
+
+#ifdef _TARGET_AMD64_
+                    // We emit zero on Amd64, to avoid the assert in emitOutputLong()
+                    dst += emitOutputLong(dst, 0);
+#else
+                    dst += emitOutputLong(dst, dsp);
+#endif
+                    emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_DISP32, 0,
+                                         addlDelta);
+                }
+                else
+                {
+#ifdef _TARGET_X86_
+                    dst += emitOutputWord(dst, code | 0x0500);
+#else  //_TARGET_AMD64_
+                    // Amd64: addr fits within 32-bits and can be encoded as a displacement relative to zero.
+                    // This addr mode should never be used while generating relocatable ngen code nor if
+                    // the addr can be encoded as pc-relative address.
+                    noway_assert(!emitComp->opts.compReloc);
+                    noway_assert(codeGen->genAddrRelocTypeHint((size_t)dsp) != IMAGE_REL_BASED_REL32);
+                    noway_assert((int)dsp == dsp);
+
+                    // This requires, specifying a SIB byte after ModRM byte.
+                    dst += emitOutputWord(dst, code | 0x0400);
+                    dst += emitOutputByte(dst, 0x25);
+#endif //_TARGET_AMD64_
+                    dst += emitOutputLong(dst, dsp);
+                }
+                break;
+
+            case REG_EBP:
+                // Does the offset fit in a byte?
+                if (dspInByte)
+                {
+                    dst += emitOutputWord(dst, code | 0x4500);
+                    dst += emitOutputByte(dst, dsp);
+                }
+                else
+                {
+                    dst += emitOutputWord(dst, code | 0x8500);
+                    dst += emitOutputLong(dst, dsp);
+
+#ifdef RELOC_SUPPORT
+                    if (id->idIsDspReloc())
+                    {
+                        emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
+                    }
+#endif
+                }
+                break;
+
+            case REG_ESP:
+#ifdef LEGACY_BACKEND
+                // REG_ESP could be REG_R12, which applies to any instruction
+                //
+                // This assert isn't too helpful from the OptJit point of view
+                //
+                // a better question is why is it here at all
+                //
+                assert((ins == INS_lea) || (ins == INS_mov) || (ins == INS_test) || (ins == INS_cmp) ||
+                       (ins == INS_fld && dspIsZero) || (ins == INS_fstp && dspIsZero) ||
+                       (ins == INS_fistp && dspIsZero) || IsSSE2Instruction(ins) || IsAVXInstruction(ins) ||
+                       (ins == INS_or));
+#endif // LEGACY_BACKEND
+
+                // Is the offset 0 or does it at least fit in a byte?
+                if (dspIsZero)
+                {
+                    dst += emitOutputWord(dst, code | 0x0400);
+                    dst += emitOutputByte(dst, 0x24);
+                }
+                else if (dspInByte)
+                {
+                    dst += emitOutputWord(dst, code | 0x4400);
+                    dst += emitOutputByte(dst, 0x24);
+                    dst += emitOutputByte(dst, dsp);
+                }
+                else
+                {
+                    dst += emitOutputWord(dst, code | 0x8400);
+                    dst += emitOutputByte(dst, 0x24);
+                    dst += emitOutputLong(dst, dsp);
+#ifdef RELOC_SUPPORT
+                    if (id->idIsDspReloc())
+                    {
+                        emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
+                    }
+#endif
+                }
+                break;
+
+            default:
+                // Put the register in the opcode
+                code |= insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) << 8;
+
+                // Is there a displacement?
+                if (dspIsZero)
+                {
+                    // This is simply "[reg]"
+                    dst += emitOutputWord(dst, code);
+                }
+                else
+                {
+                    // This is [reg + dsp]" -- does the offset fit in a byte?
+                    if (dspInByte)
+                    {
+                        dst += emitOutputWord(dst, code | 0x4000);
+                        dst += emitOutputByte(dst, dsp);
+                    }
+                    else
+                    {
+                        dst += emitOutputWord(dst, code | 0x8000);
+                        dst += emitOutputLong(dst, dsp);
+#ifdef RELOC_SUPPORT
+                        if (id->idIsDspReloc())
+                        {
+                            emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
+                        }
+#endif
+                    }
+                }
+
+                break;
+        }
+    }
+    else
+    {
+        unsigned regByte;
+
+        // We have a scaled index operand
+        unsigned mul = emitDecodeScale(id->idAddr()->iiaAddrMode.amScale);
+
+        // Is the index operand scaled?
+        if (mul > 1)
+        {
+            // Is there a base register?
+            if (reg != REG_NA)
+            {
+                // The address is "[reg + {2/4/8} * rgx + icon]"
+                regByte = insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) |
+                          insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr) | insSSval(mul);
+
+                // Emit [ebp + {2/4/8} * rgz] as [ebp + {2/4/8} * rgx + 0]
+                if (dspIsZero && reg != REG_EBP)
+                {
+                    // The address is "[reg + {2/4/8} * rgx]"
+                    dst += emitOutputWord(dst, code | 0x0400);
+                    dst += emitOutputByte(dst, regByte);
+                }
+                else
+                {
+                    // The address is "[reg + {2/4/8} * rgx + disp]"
+                    if (dspInByte)
+                    {
+                        dst += emitOutputWord(dst, code | 0x4400);
+                        dst += emitOutputByte(dst, regByte);
+                        dst += emitOutputByte(dst, dsp);
+                    }
+                    else
+                    {
+                        dst += emitOutputWord(dst, code | 0x8400);
+                        dst += emitOutputByte(dst, regByte);
+                        dst += emitOutputLong(dst, dsp);
+#ifdef RELOC_SUPPORT
+                        if (id->idIsDspReloc())
+                        {
+                            emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
+                        }
+#endif
+                    }
+                }
+            }
+            else
+            {
+                // The address is "[{2/4/8} * rgx + icon]"
+                regByte = insEncodeReg012(ins, REG_EBP, EA_PTRSIZE, nullptr) |
+                          insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr) | insSSval(mul);
+
+                dst += emitOutputWord(dst, code | 0x0400);
+                dst += emitOutputByte(dst, regByte);
+
+                // Special case: jump through a jump table
+                if (ins == INS_i_jmp)
+                {
+                    dsp += (size_t)emitConsBlock;
+                }
+
+                dst += emitOutputLong(dst, dsp);
+#ifdef RELOC_SUPPORT
+                if (id->idIsDspReloc())
+                {
+                    emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
+                }
+#endif
+            }
+        }
+        else
+        {
+            // The address is "[reg+rgx+dsp]"
+            regByte = insEncodeReg012(ins, reg, EA_PTRSIZE, nullptr) | insEncodeReg345(ins, rgx, EA_PTRSIZE, nullptr);
+
+            if (dspIsZero && reg != REG_EBP)
+            {
+                // This is [reg+rgx]"
+                dst += emitOutputWord(dst, code | 0x0400);
+                dst += emitOutputByte(dst, regByte);
+            }
+            else
+            {
+                // This is [reg+rgx+dsp]" -- does the offset fit in a byte?
+                if (dspInByte)
+                {
+                    dst += emitOutputWord(dst, code | 0x4400);
+                    dst += emitOutputByte(dst, regByte);
+                    dst += emitOutputByte(dst, dsp);
+                }
+                else
+                {
+                    dst += emitOutputWord(dst, code | 0x8400);
+                    dst += emitOutputByte(dst, regByte);
+                    dst += emitOutputLong(dst, dsp);
+#ifdef RELOC_SUPPORT
+                    if (id->idIsDspReloc())
+                    {
+                        emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)dsp, IMAGE_REL_BASED_HIGHLOW);
+                    }
+#endif
+                }
+            }
+        }
+    }
+
+    // Now generate the constant value, if present
+    if (addc)
+    {
+        ssize_t cval = addc->cnsVal;
+
+#ifdef _TARGET_AMD64_
+        // all these opcodes only take a sign-extended 4-byte immediate
+        noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
+#endif
+
+        switch (opsz)
+        {
+            case 0:
+            case 4:
+            case 8:
+                dst += emitOutputLong(dst, cval);
+                break;
+            case 2:
+                dst += emitOutputWord(dst, cval);
+                break;
+            case 1:
+                dst += emitOutputByte(dst, cval);
+                break;
+
+            default:
+                assert(!"unexpected operand size");
+        }
+
+#ifdef RELOC_SUPPORT
+        if (addc->cnsReloc)
+        {
+            emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW);
+            assert(opsz == 4);
+        }
+#endif
+    }
+
+DONE:
+
+    // Does this instruction operate on a GC ref value?
+    if (id->idGCref())
+    {
+        switch (id->idInsFmt())
+        {
+            case IF_ARD:
+            case IF_AWR:
+            case IF_ARW:
+                break;
+
+            case IF_RRD_ARD:
+                break;
+
+            case IF_RWR_ARD:
+                emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
+                break;
+
+            case IF_RRW_ARD:
+                assert(id->idGCref() == GCT_BYREF);
+
+#ifdef DEBUG
+                regMaskTP regMask;
+                regMask = genRegMask(id->idReg1());
+
+                // <BUGNUM> VSW 335101 </BUGNUM>
+                // Either id->idReg1(), id->idAddr()->iiaAddrMode.amBaseReg, or id->idAddr()->iiaAddrMode.amIndxReg
+                // could be a BYREF.
+                // For example in the following case:
+                //     mov     EDX, bword ptr [EBP-78H] ; EDX becomes BYREF after this instr.
+                //     add     EAX, bword ptr [EDX+8]   ; It is the EDX that's causing id->idGCref to be GCT_BYREF.
+                //                                      ; EAX becomes BYREF after this instr.
+                // <BUGNUM> DD 273707 </BUGNUM>
+                //     add     EDX, bword ptr [036464E0H] ; int + static field (technically a GCREF)=BYREF
+                regMaskTP baseRegMask;
+                if (reg == REG_NA)
+                {
+                    baseRegMask = RBM_NONE;
+                }
+                else
+                {
+                    baseRegMask = genRegMask(reg);
+                }
+                regMaskTP indexRegMask;
+                if (rgx == REG_NA)
+                {
+                    indexRegMask = RBM_NONE;
+                }
+                else
+                {
+                    indexRegMask = genRegMask(rgx);
+                }
+
+                // r1 could have been a GCREF as GCREF + int=BYREF
+                //                            or BYREF+/-int=BYREF
+                assert(((reg == REG_NA) && (rgx == REG_NA) && (ins == INS_add || ins == INS_sub)) ||
+                       (((regMask | baseRegMask | indexRegMask) & emitThisGCrefRegs) && (ins == INS_add)) ||
+                       (((regMask | baseRegMask | indexRegMask) & emitThisByrefRegs) &&
+                        (ins == INS_add || ins == INS_sub)));
+#endif
+                // Mark it as holding a GCT_BYREF
+                emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
+                break;
+
+            case IF_ARD_RRD:
+            case IF_AWR_RRD:
+                break;
+
+            case IF_ARD_CNS:
+            case IF_AWR_CNS:
+                break;
+
+            case IF_ARW_RRD:
+            case IF_ARW_CNS:
+                assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub));
+                break;
+
+            default:
+#ifdef DEBUG
+                emitDispIns(id, false, false, false);
+#endif
+                assert(!"unexpected GC ref instruction format");
+        }
+
+        // mul can never produce a GC ref
+        assert(!instrIs3opImul(ins));
+        assert(ins != INS_mulEAX && ins != INS_imulEAX);
+    }
+    else
+    {
+        if (emitInsCanOnlyWriteSSE2OrAVXReg(id))
+        {
+        }
+        else
+        {
+            switch (id->idInsFmt())
+            {
+                case IF_RWR_ARD:
+                    emitGCregDeadUpd(id->idReg1(), dst);
+                    break;
+                default:
+                    break;
+            }
+
+            if (ins == INS_mulEAX || ins == INS_imulEAX)
+            {
+                emitGCregDeadUpd(REG_EAX, dst);
+                emitGCregDeadUpd(REG_EDX, dst);
+            }
+
+            // For the three operand imul instruction the target register
+            // is encoded in the opcode
+
+            if (instrIs3opImul(ins))
+            {
+                regNumber tgtReg = inst3opImulReg(ins);
+                emitGCregDeadUpd(tgtReg, dst);
+            }
+        }
+    }
+
+    return dst;
+}
+
+/*****************************************************************************
+ *
+ *  Output an instruction involving a stack frame value.
+ */
+
+BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc)
+{
+    int  adr;
+    int  dsp;
+    bool EBPbased;
+    bool dspInByte;
+    bool dspIsZero;
+
+    instruction ins  = id->idIns();
+    emitAttr    size = id->idOpSize();
+    size_t      opsz = EA_SIZE_IN_BYTES(size);
+
+    assert(ins != INS_imul || id->idReg1() == REG_EAX || size == EA_4BYTE || size == EA_8BYTE);
+
+    // Is there a large constant operand?
+    if (addc && (size > EA_1BYTE))
+    {
+        ssize_t cval = addc->cnsVal;
+
+        // Does the constant fit in a byte?
+        if ((signed char)cval == cval &&
+#ifdef RELOC_SUPPORT
+            addc->cnsReloc == false &&
+#endif
+            ins != INS_mov && ins != INS_test)
+        {
+            if (id->idInsFmt() != IF_SRW_SHF)
+            {
+                code |= 2;
+            }
+
+            opsz = 1;
+        }
+    }
+
+    // Add VEX prefix if required.
+    // There are some callers who already add VEX prefix and call this routine.
+    // Therefore, add VEX prefix is one is not already present.
+    code = AddVexPrefixIfNeededAndNotPresent(ins, code, size);
+
+    // Compute the REX prefix
+    if (TakesRexWPrefix(ins, size))
+    {
+        code = AddRexWPrefix(ins, code);
+    }
+
+    // Special case emitting AVX instructions
+    if (Is4ByteAVXInstruction(ins))
+    {
+        size_t regcode = insEncodeReg345(ins, id->idReg1(), size, &code);
+        dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+        // Emit last opcode byte
+        assert((code & 0xFF) == 0);
+        dst += emitOutputByte(dst, (code >> 8) & 0xFF);
+        code = regcode;
+    }
+    // Is this a 'big' opcode?
+    else if (code & 0xFF000000)
+    {
+        // Output the REX prefix
+        dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+        // Output the highest word of the opcode
+        // We need to check again because in case of AVX instructions the leading
+        // escape byte(s) (e.g. 0x0F) will be encoded as part of VEX prefix.
+        if (code & 0xFF000000)
+        {
+            dst += emitOutputWord(dst, code >> 16);
+            code &= 0x0000FFFF;
+        }
+    }
+    else if (code & 0x00FF0000)
+    {
+        // Output the REX prefix
+        dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+        // Output the highest byte of the opcode.
+        // We need to check again because in case of AVX instructions the leading
+        // escape byte(s) (e.g. 0x0F) will be encoded as part of VEX prefix.
+        if (code & 0x00FF0000)
+        {
+            dst += emitOutputByte(dst, code >> 16);
+            code &= 0x0000FFFF;
+        }
+
+        // Use the large version if this is not a byte
+        if ((size != EA_1BYTE) && (ins != INS_imul) && (!insIsCMOV(ins)) && !IsSSE2Instruction(ins) &&
+            !IsAVXInstruction(ins))
+        {
+            code |= 0x1;
+        }
+    }
+    else if (CodeGen::instIsFP(ins))
+    {
+        assert(size == EA_4BYTE || size == EA_8BYTE);
+
+        if (size == EA_8BYTE)
+        {
+            code += 4;
+        }
+    }
+    else if (!IsSSE2Instruction(ins) && !IsAVXInstruction(ins))
+    {
+        // Is the operand size larger than a byte?
+        switch (size)
+        {
+            case EA_1BYTE:
+                break;
+
+            case EA_2BYTE:
+                // Output a size prefix for a 16-bit operand
+                dst += emitOutputByte(dst, 0x66);
+                __fallthrough;
+
+            case EA_4BYTE:
+#ifdef _TARGET_AMD64_
+            case EA_8BYTE:
+#endif // _TARGET_AMD64_
+
+                /* Set the 'w' size bit to indicate 32-bit operation
+                 * Note that incrementing "code" for INS_call (0xFF) would
+                 * overflow, whereas setting the lower bit to 1 just works out
+                 */
+
+                code |= 0x01;
+                break;
+
+#ifdef _TARGET_X86_
+            case EA_8BYTE:
+
+                // Double operand - set the appropriate bit.
+                // I don't know what a legitimate reason to end up in this case would be
+                // considering that FP is taken care of above...
+                // what is an instruction that takes a double which is not covered by the
+                // above instIsFP? Of the list in instrsxarch, only INS_fprem
+                code |= 0x04;
+                NO_WAY("bad 8 byte op");
+                break;
+#endif // _TARGET_X86_
+
+            default:
+                NO_WAY("unexpected size");
+                break;
+        }
+    }
+
+    // Output the REX prefix
+    dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+    // Figure out the variable's frame position
+    int varNum = id->idAddr()->iiaLclVar.lvaVarNum();
+
+    adr = emitComp->lvaFrameAddress(varNum, &EBPbased);
+    dsp = adr + id->idAddr()->iiaLclVar.lvaOffset();
+
+    dspInByte = ((signed char)dsp == (int)dsp);
+    dspIsZero = (dsp == 0);
+
+#ifdef RELOC_SUPPORT
+    // for stack varaibles the dsp should never be a reloc
+    assert(id->idIsDspReloc() == 0);
+#endif
+
+    if (EBPbased)
+    {
+        // EBP-based variable: does the offset fit in a byte?
+        if (Is4ByteAVXInstruction(ins))
+        {
+            if (dspInByte)
+            {
+                dst += emitOutputByte(dst, code | 0x45);
+                dst += emitOutputByte(dst, dsp);
+            }
+            else
+            {
+                dst += emitOutputByte(dst, code | 0x85);
+                dst += emitOutputLong(dst, dsp);
+            }
+        }
+        else
+        {
+            if (dspInByte)
+            {
+                dst += emitOutputWord(dst, code | 0x4500);
+                dst += emitOutputByte(dst, dsp);
+            }
+            else
+            {
+                dst += emitOutputWord(dst, code | 0x8500);
+                dst += emitOutputLong(dst, dsp);
+            }
+        }
+    }
+    else
+    {
+
+#if !FEATURE_FIXED_OUT_ARGS
+        // Adjust the offset by the amount currently pushed on the CPU stack
+        dsp += emitCurStackLvl;
+#endif
+
+        dspInByte = ((signed char)dsp == (int)dsp);
+        dspIsZero = (dsp == 0);
+
+        // Does the offset fit in a byte?
+        if (Is4ByteAVXInstruction(ins))
+        {
+            if (dspInByte)
+            {
+                if (dspIsZero)
+                {
+                    dst += emitOutputByte(dst, code | 0x04);
+                    dst += emitOutputByte(dst, 0x24);
+                }
+                else
+                {
+                    dst += emitOutputByte(dst, code | 0x44);
+                    dst += emitOutputByte(dst, 0x24);
+                    dst += emitOutputByte(dst, dsp);
+                }
+            }
+            else
+            {
+                dst += emitOutputByte(dst, code | 0x84);
+                dst += emitOutputByte(dst, 0x24);
+                dst += emitOutputLong(dst, dsp);
+            }
+        }
+        else
+        {
+            if (dspInByte)
+            {
+                if (dspIsZero)
+                {
+                    dst += emitOutputWord(dst, code | 0x0400);
+                    dst += emitOutputByte(dst, 0x24);
+                }
+                else
+                {
+                    dst += emitOutputWord(dst, code | 0x4400);
+                    dst += emitOutputByte(dst, 0x24);
+                    dst += emitOutputByte(dst, dsp);
+                }
+            }
+            else
+            {
+                dst += emitOutputWord(dst, code | 0x8400);
+                dst += emitOutputByte(dst, 0x24);
+                dst += emitOutputLong(dst, dsp);
+            }
+        }
+    }
+
+    // Now generate the constant value, if present
+    if (addc)
+    {
+        ssize_t cval = addc->cnsVal;
+
+#ifdef _TARGET_AMD64_
+        // all these opcodes only take a sign-extended 4-byte immediate
+        noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
+#endif
+
+        switch (opsz)
+        {
+            case 0:
+            case 4:
+            case 8:
+                dst += emitOutputLong(dst, cval);
+                break;
+            case 2:
+                dst += emitOutputWord(dst, cval);
+                break;
+            case 1:
+                dst += emitOutputByte(dst, cval);
+                break;
+
+            default:
+                assert(!"unexpected operand size");
+        }
+
+#ifdef RELOC_SUPPORT
+        if (addc->cnsReloc)
+        {
+            emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW);
+            assert(opsz == 4);
+        }
+#endif
+    }
+
+    // Does this instruction operate on a GC ref value?
+    if (id->idGCref())
+    {
+        // Factor in the sub-variable offset
+        adr += AlignDown(id->idAddr()->iiaLclVar.lvaOffset(), TARGET_POINTER_SIZE);
+
+        switch (id->idInsFmt())
+        {
+            case IF_SRD:
+                // Read  stack                    -- no change
+                break;
+
+            case IF_SWR: // Stack Write (So we need to update GC live for stack var)
+                // Write stack                    -- GC var may be born
+                emitGCvarLiveUpd(adr, varNum, id->idGCref(), dst);
+                break;
+
+            case IF_SRD_CNS:
+                // Read  stack                    -- no change
+                break;
+
+            case IF_SWR_CNS:
+                // Write stack                    -- no change
+                break;
+
+            case IF_SRD_RRD:
+            case IF_RRD_SRD:
+                // Read  stack   , read  register -- no change
+                break;
+
+            case IF_RWR_SRD: // Register Write, Stack Read (So we need to update GC live for register)
+
+                // Read  stack   , write register -- GC reg may be born
+                emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
+                break;
+
+            case IF_SWR_RRD: // Stack Write, Register Read (So we need to update GC live for stack var)
+                // Read  register, write stack    -- GC var may be born
+                emitGCvarLiveUpd(adr, varNum, id->idGCref(), dst);
+                break;
+
+            case IF_RRW_SRD: // Register Read/Write, Stack Read (So we need to update GC live for register)
+
+                // reg could have been a GCREF as GCREF + int=BYREF
+                //                             or BYREF+/-int=BYREF
+                assert(id->idGCref() == GCT_BYREF && (ins == INS_add || ins == INS_sub));
+                emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
+                break;
+
+            case IF_SRW_CNS:
+            case IF_SRW_RRD:
+            // += -= of a byref, no change
+
+            case IF_SRW:
+                break;
+
+            default:
+#ifdef DEBUG
+                emitDispIns(id, false, false, false);
+#endif
+                assert(!"unexpected GC ref instruction format");
+        }
+    }
+    else
+    {
+        if (emitInsCanOnlyWriteSSE2OrAVXReg(id))
+        {
+        }
+        else
+        {
+            switch (id->idInsFmt())
+            {
+                case IF_RWR_SRD: // Register Write, Stack Read
+                case IF_RRW_SRD: // Register Read/Write, Stack Read
+                    emitGCregDeadUpd(id->idReg1(), dst);
+                    break;
+                default:
+                    break;
+            }
+
+            if (ins == INS_mulEAX || ins == INS_imulEAX)
+            {
+                emitGCregDeadUpd(REG_EAX, dst);
+                emitGCregDeadUpd(REG_EDX, dst);
+            }
+
+            // For the three operand imul instruction the target register
+            // is encoded in the opcode
+
+            if (instrIs3opImul(ins))
+            {
+                regNumber tgtReg = inst3opImulReg(ins);
+                emitGCregDeadUpd(tgtReg, dst);
+            }
+        }
+    }
+
+    return dst;
+}
+
+/*****************************************************************************
+ *
+ *  Output an instruction with a static data member (class variable).
+ */
+
+BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc)
+{
+    BYTE*                addr;
+    CORINFO_FIELD_HANDLE fldh;
+    ssize_t              offs;
+    int                  doff;
+
+    emitAttr    size      = id->idOpSize();
+    size_t      opsz      = EA_SIZE_IN_BYTES(size);
+    instruction ins       = id->idIns();
+    bool        isMoffset = false;
+
+    // Get hold of the field handle and offset
+    fldh = id->idAddr()->iiaFieldHnd;
+    offs = emitGetInsDsp(id);
+
+    // Special case: mov reg, fs:[ddd]
+    if (fldh == FLD_GLOBAL_FS)
+    {
+        dst += emitOutputByte(dst, 0x64);
+    }
+
+    // Compute VEX prefix
+    // Some of its callers already add VEX prefix and then call this routine.
+    // Therefore add VEX prefix is not already present.
+    code = AddVexPrefixIfNeededAndNotPresent(ins, code, size);
+
+    // Compute the REX prefix
+    if (TakesRexWPrefix(ins, size))
+    {
+        code = AddRexWPrefix(ins, code);
+    }
+
+    // Is there a large constant operand?
+    if (addc && (size > EA_1BYTE))
+    {
+        ssize_t cval = addc->cnsVal;
+        // Does the constant fit in a byte?
+        if ((signed char)cval == cval &&
+#ifdef RELOC_SUPPORT
+            addc->cnsReloc == false &&
+#endif
+            ins != INS_mov && ins != INS_test)
+        {
+            if (id->idInsFmt() != IF_MRW_SHF)
+            {
+                code |= 2;
+            }
+
+            opsz = 1;
+        }
+    }
+#ifdef _TARGET_X86_
+    else
+    {
+        // Special case: "mov eax, [addr]" and "mov [addr], eax"
+        // Amd64: this is one case where addr can be 64-bit in size.  This is
+        // currently unused or not enabled on amd64 as it always uses RIP
+        // relative addressing which results in smaller instruction size.
+        if (ins == INS_mov && id->idReg1() == REG_EAX)
+        {
+            switch (id->idInsFmt())
+            {
+                case IF_RWR_MRD:
+
+                    assert((unsigned)code ==
+                           (insCodeRM(ins) | (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << 8) | 0x0500));
+
+                    code &= ~((size_t)0xFFFFFFFF);
+                    code |= 0xA0;
+                    isMoffset = true;
+                    break;
+
+                case IF_MWR_RRD:
+
+                    assert((unsigned)code ==
+                           (insCodeMR(ins) | (insEncodeReg345(ins, REG_EAX, EA_PTRSIZE, NULL) << 8) | 0x0500));
+
+                    code &= ~((size_t)0xFFFFFFFF);
+                    code |= 0xA2;
+                    isMoffset = true;
+                    break;
+
+                default:
+                    break;
+            }
+        }
+    }
+#endif //_TARGET_X86_
+
+    // Special case emitting AVX instructions
+    if (Is4ByteAVXInstruction(ins))
+    {
+        size_t regcode = insEncodeReg345(ins, id->idReg1(), size, &code);
+        dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+        // Emit last opcode byte
+        // TODO-XArch-CQ: Right now support 4-byte opcode instructions only
+        assert((code & 0xFF) == 0);
+        dst += emitOutputByte(dst, (code >> 8) & 0xFF);
+        code = 0;
+
+        // Emit Mod,R/M byte
+        dst += emitOutputByte(dst, regcode | 0x05);
+    }
+    // Is this a 'big' opcode?
+    else if (code & 0xFF000000)
+    {
+        // Output the REX prefix
+        dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+        // Output the highest word of the opcode.
+        // Check again since AVX instructions encode leading opcode bytes as part of VEX prefix.
+        if (code & 0xFF000000)
+        {
+            dst += emitOutputWord(dst, code >> 16);
+        }
+        code &= 0x0000FFFF;
+    }
+    else if (code & 0x00FF0000)
+    {
+        // Output the REX prefix
+        dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+        // Check again as VEX prefix would have encoded leading opcode byte
+        if (code & 0x00FF0000)
+        {
+            dst += emitOutputByte(dst, code >> 16);
+            code &= 0x0000FFFF;
+        }
+
+        if ((ins == INS_movsx || ins == INS_movzx || ins == INS_cmpxchg || ins == INS_xchg || ins == INS_xadd ||
+             insIsCMOV(ins)) &&
+            size != EA_1BYTE)
+        {
+            // movsx and movzx are 'big' opcodes but also have the 'w' bit
+            code++;
+        }
+    }
+    else if (CodeGen::instIsFP(ins))
+    {
+        assert(size == EA_4BYTE || size == EA_8BYTE);
+
+        if (size == EA_8BYTE)
+        {
+            code += 4;
+        }
+    }
+    else
+    {
+        // Is the operand size larger than a byte?
+        switch (size)
+        {
+            case EA_1BYTE:
+                break;
+
+            case EA_2BYTE:
+                // Output a size prefix for a 16-bit operand
+                dst += emitOutputByte(dst, 0x66);
+                __fallthrough;
+
+            case EA_4BYTE:
+#ifdef _TARGET_AMD64_
+            case EA_8BYTE:
+#endif
+                // Set the 'w' bit to get the large version
+                code |= 0x1;
+                break;
+
+#ifdef _TARGET_X86_
+            case EA_8BYTE:
+                // Double operand - set the appropriate bit
+                code |= 0x04;
+                break;
+#endif // _TARGET_X86_
+
+            default:
+                assert(!"unexpected size");
+        }
+    }
+
+    // Output the REX prefix
+    dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+    if (code)
+    {
+        if (id->idInsFmt() == IF_MRD_OFF || id->idInsFmt() == IF_RWR_MRD_OFF || isMoffset)
+        {
+            dst += emitOutputByte(dst, code);
+        }
+        else
+        {
+            dst += emitOutputWord(dst, code);
+        }
+    }
+
+    // Do we have a constant or a static data member?
+    doff = Compiler::eeGetJitDataOffs(fldh);
+    if (doff >= 0)
+    {
+        addr = emitConsBlock + doff;
+
+        int byteSize = EA_SIZE_IN_BYTES(size);
+
+#ifndef LEGACY_BACKEND
+        // this instruction has a fixed size (4) src.
+        if (ins == INS_cvttss2si || ins == INS_cvtss2sd || ins == INS_vbroadcastss)
+        {
+            byteSize = 4;
+        }
+        // This has a fixed size (8) source.
+        if (ins == INS_vbroadcastsd)
+        {
+            byteSize = 8;
+        }
+#endif // !LEGACY_BACKEND
+
+        // Check that the offset is properly aligned (i.e. the ddd in [ddd])
+        assert((emitChkAlign == false) || (ins == INS_lea) || (((size_t)addr & (byteSize - 1)) == 0));
+    }
+    else
+    {
+        // Special case: mov reg, fs:[ddd] or mov reg, [ddd]
+        if (jitStaticFldIsGlobAddr(fldh))
+        {
+            addr = nullptr;
+        }
+        else
+        {
+            addr = (BYTE*)emitComp->info.compCompHnd->getFieldAddress(fldh, nullptr);
+            if (addr == nullptr)
+            {
+                NO_WAY("could not obtain address of static field");
+            }
+        }
+    }
+
+    BYTE* target = (addr + offs);
+
+    if (!isMoffset)
+    {
+        INT32 addlDelta = 0;
+
+        if (addc)
+        {
+            // It is of the form "ins [disp], immed"
+            // For emitting relocation, we also need to take into account of the
+            // additional bytes of code emitted for immed val.
+
+            ssize_t cval = addc->cnsVal;
+
+#ifdef _TARGET_AMD64_
+            // all these opcodes only take a sign-extended 4-byte immediate
+            noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
+#else
+            noway_assert(opsz <= 4);
+#endif
+
+            switch (opsz)
+            {
+                case 0:
+                case 4:
+                case 8:
+                    addlDelta = -4;
+                    break;
+                case 2:
+                    addlDelta = -2;
+                    break;
+                case 1:
+                    addlDelta = -1;
+                    break;
+
+                default:
+                    assert(!"unexpected operand size");
+                    unreached();
+            }
+        }
+
+#ifdef _TARGET_AMD64_
+        // All static field and data section constant accesses should be marked as relocatable
+        noway_assert(id->idIsDspReloc());
+        dst += emitOutputLong(dst, 0);
+#else  //_TARGET_X86_
+        dst += emitOutputLong(dst, (int)target);
+#endif //_TARGET_X86_
+
+#ifdef RELOC_SUPPORT
+        if (id->idIsDspReloc())
+        {
+            emitRecordRelocation((void*)(dst - sizeof(int)), target, IMAGE_REL_BASED_DISP32, 0, addlDelta);
+        }
+#endif
+    }
+    else
+    {
+#ifdef _TARGET_AMD64_
+        // This code path should never be hit on amd64 since it always uses RIP relative addressing.
+        // In future if ever there is a need to enable this special case, also enable the logic
+        // that sets isMoffset to true on amd64.
+        unreached();
+#else //_TARGET_X86_
+
+        dst += emitOutputSizeT(dst, (ssize_t)target);
+
+#ifdef RELOC_SUPPORT
+        if (id->idIsDspReloc())
+        {
+            emitRecordRelocation((void*)(dst - sizeof(void*)), target, IMAGE_REL_BASED_MOFFSET);
+        }
+#endif
+
+#endif //_TARGET_X86_
+    }
+
+    // Now generate the constant value, if present
+    if (addc)
+    {
+        ssize_t cval = addc->cnsVal;
+
+#ifdef _TARGET_AMD64_
+        // all these opcodes only take a sign-extended 4-byte immediate
+        noway_assert(opsz < 8 || ((int)cval == cval && !addc->cnsReloc));
+#endif
+
+        switch (opsz)
+        {
+            case 0:
+            case 4:
+            case 8:
+                dst += emitOutputLong(dst, cval);
+                break;
+            case 2:
+                dst += emitOutputWord(dst, cval);
+                break;
+            case 1:
+                dst += emitOutputByte(dst, cval);
+                break;
+
+            default:
+                assert(!"unexpected operand size");
+        }
+#ifdef RELOC_SUPPORT
+        if (addc->cnsReloc)
+        {
+            emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)cval, IMAGE_REL_BASED_HIGHLOW);
+            assert(opsz == 4);
+        }
+#endif
+    }
+
+    // Does this instruction operate on a GC ref value?
+    if (id->idGCref())
+    {
+        switch (id->idInsFmt())
+        {
+            case IF_MRD:
+            case IF_MRW:
+            case IF_MWR:
+                break;
+
+            case IF_RRD_MRD:
+                break;
+
+            case IF_RWR_MRD:
+                emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
+                break;
+
+            case IF_MRD_RRD:
+            case IF_MWR_RRD:
+            case IF_MRW_RRD:
+                break;
+
+            case IF_MRD_CNS:
+            case IF_MWR_CNS:
+            case IF_MRW_CNS:
+                break;
+
+            case IF_RRW_MRD:
+
+                assert(id->idGCref() == GCT_BYREF);
+                assert(ins == INS_add || ins == INS_sub);
+
+                // Mark it as holding a GCT_BYREF
+                emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
+                break;
+
+            default:
+#ifdef DEBUG
+                emitDispIns(id, false, false, false);
+#endif
+                assert(!"unexpected GC ref instruction format");
+        }
+    }
+    else
+    {
+        if (emitInsCanOnlyWriteSSE2OrAVXReg(id))
+        {
+        }
+        else
+        {
+            switch (id->idInsFmt())
+            {
+                case IF_RWR_MRD:
+                    emitGCregDeadUpd(id->idReg1(), dst);
+                    break;
+                default:
+                    break;
+            }
+
+            if (ins == INS_mulEAX || ins == INS_imulEAX)
+            {
+                emitGCregDeadUpd(REG_EAX, dst);
+                emitGCregDeadUpd(REG_EDX, dst);
+            }
+
+            // For the three operand imul instruction the target register
+            // is encoded in the opcode
+
+            if (instrIs3opImul(ins))
+            {
+                regNumber tgtReg = inst3opImulReg(ins);
+                emitGCregDeadUpd(tgtReg, dst);
+            }
+        }
+    }
+
+    return dst;
+}
+
+/*****************************************************************************
+ *
+ *  Output an instruction with one register operand.
+ */
+
+BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id)
+{
+    size_t code;
+
+    instruction ins  = id->idIns();
+    regNumber   reg  = id->idReg1();
+    emitAttr    size = id->idOpSize();
+
+    // We would to update GC info correctly
+    assert(!IsSSE2Instruction(ins));
+    assert(!IsAVXInstruction(ins));
+
+    // Get the 'base' opcode
+    switch (ins)
+    {
+        case INS_inc:
+        case INS_dec:
+
+#ifdef _TARGET_AMD64_
+            if (true)
+#else
+            if (size == EA_1BYTE)
+#endif
+            {
+                assert(INS_inc_l == INS_inc + 1);
+                assert(INS_dec_l == INS_dec + 1);
+
+                // Can't use the compact form, use the long form
+                ins = (instruction)(ins + 1);
+                if (size == EA_2BYTE)
+                {
+                    // Output a size prefix for a 16-bit operand
+                    dst += emitOutputByte(dst, 0x66);
+                }
+
+                code = insCodeRR(ins);
+                if (size != EA_1BYTE)
+                {
+                    // Set the 'w' bit to get the large version
+                    code |= 0x1;
+                }
+
+                if (TakesRexWPrefix(ins, size))
+                {
+                    code = AddRexWPrefix(ins, code);
+                }
+
+                // Register...
+                unsigned regcode = insEncodeReg012(ins, reg, size, &code);
+
+                // Output the REX prefix
+                dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+                dst += emitOutputWord(dst, code | (regcode << 8));
+            }
+            else
+            {
+                if (size == EA_2BYTE)
+                {
+                    // Output a size prefix for a 16-bit operand
+                    dst += emitOutputByte(dst, 0x66);
+                }
+                dst += emitOutputByte(dst, insCodeRR(ins) | insEncodeReg012(ins, reg, size, nullptr));
+            }
+            break;
+
+        case INS_pop:
+        case INS_pop_hide:
+        case INS_push:
+        case INS_push_hide:
+
+            assert(size == EA_PTRSIZE);
+            code = insEncodeOpreg(ins, reg, size);
+
+            assert(!TakesVexPrefix(ins));
+            assert(!TakesRexWPrefix(ins, size));
+
+            // Output the REX prefix
+            dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+            dst += emitOutputByte(dst, code);
+            break;
+
+        case INS_seto:
+        case INS_setno:
+        case INS_setb:
+        case INS_setae:
+        case INS_sete:
+        case INS_setne:
+        case INS_setbe:
+        case INS_seta:
+        case INS_sets:
+        case INS_setns:
+        case INS_setpe:
+        case INS_setpo:
+        case INS_setl:
+        case INS_setge:
+        case INS_setle:
+        case INS_setg:
+
+            assert(id->idGCref() == GCT_NONE);
+            assert(size == EA_1BYTE);
+
+            code = insEncodeMRreg(ins, reg, EA_1BYTE, insCodeMR(ins));
+
+            // Output the REX prefix
+            dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+            // We expect this to always be a 'big' opcode
+            assert(code & 0x00FF0000);
+
+            dst += emitOutputByte(dst, code >> 16);
+            dst += emitOutputWord(dst, code & 0x0000FFFF);
+
+            break;
+
+        case INS_mulEAX:
+        case INS_imulEAX:
+
+            // Kill off any GC refs in EAX or EDX
+            emitGCregDeadUpd(REG_EAX, dst);
+            emitGCregDeadUpd(REG_EDX, dst);
+
+            __fallthrough;
+
+        default:
+
+            assert(id->idGCref() == GCT_NONE);
+
+            code = insEncodeMRreg(ins, reg, size, insCodeMR(ins));
+
+            if (size != EA_1BYTE)
+            {
+                // Set the 'w' bit to get the large version
+                code |= 0x1;
+
+                if (size == EA_2BYTE)
+                {
+                    // Output a size prefix for a 16-bit operand
+                    dst += emitOutputByte(dst, 0x66);
+                }
+            }
+
+            code = AddVexPrefixIfNeeded(ins, code, size);
+
+            if (TakesRexWPrefix(ins, size))
+            {
+                code = AddRexWPrefix(ins, code);
+            }
+
+            // Output the REX prefix
+            dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+            dst += emitOutputWord(dst, code);
+            break;
+    }
+
+    // Are we writing the register? if so then update the GC information
+    switch (id->idInsFmt())
+    {
+        case IF_RRD:
+            break;
+        case IF_RWR:
+            if (id->idGCref())
+            {
+                emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
+            }
+            else
+            {
+                emitGCregDeadUpd(id->idReg1(), dst);
+            }
+            break;
+        case IF_RRW:
+        {
+#ifdef DEBUG
+            regMaskTP regMask = genRegMask(reg);
+#endif
+            if (id->idGCref())
+            {
+                // The reg must currently be holding either a gcref or a byref
+                // and the instruction must be inc or dec
+                assert(((emitThisGCrefRegs | emitThisByrefRegs) & regMask) &&
+                       (ins == INS_inc || ins == INS_dec || ins == INS_inc_l || ins == INS_dec_l));
+                assert(id->idGCref() == GCT_BYREF);
+                // Mark it as holding a GCT_BYREF
+                emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
+            }
+            else
+            {
+                // Can't use RRW to trash a GC ref.  It's OK for unverifiable code
+                // to trash Byrefs.
+                assert((emitThisGCrefRegs & regMask) == 0);
+            }
+        }
+        break;
+        default:
+#ifdef DEBUG
+            emitDispIns(id, false, false, false);
+#endif
+            assert(!"unexpected instruction format");
+            break;
+    }
+
+    return dst;
+}
+
+/*****************************************************************************
+ *
+ *  Output an instruction with two register operands.
+ */
+
+BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
+{
+    size_t code;
+
+    instruction ins  = id->idIns();
+    regNumber   reg1 = id->idReg1();
+    regNumber   reg2 = id->idReg2();
+    emitAttr    size = id->idOpSize();
+
+    // Get the 'base' opcode
+    code = insCodeRM(ins);
+    code = AddVexPrefixIfNeeded(ins, code, size);
+    if (IsSSE2Instruction(ins) || IsAVXInstruction(ins))
+    {
+        code = insEncodeRMreg(ins, code);
+
+        if (TakesRexWPrefix(ins, size))
+        {
+            code = AddRexWPrefix(ins, code);
+        }
+    }
+    else if ((ins == INS_movsx) || (ins == INS_movzx) || (insIsCMOV(ins)))
+    {
+        code = insEncodeRMreg(ins, code) | (int)(size == EA_2BYTE);
+#ifdef _TARGET_AMD64_
+
+        assert((size < EA_4BYTE) || (insIsCMOV(ins)));
+        if ((size == EA_8BYTE) || (ins == INS_movsx))
+        {
+            code = AddRexWPrefix(ins, code);
+        }
+    }
+    else if (ins == INS_movsxd)
+    {
+        code = insEncodeRMreg(ins, code);
+
+#endif // _TARGET_AMD64_
+    }
+    else
+    {
+        code = insEncodeMRreg(ins, insCodeMR(ins));
+
+        if (ins != INS_test)
+        {
+            code |= 2;
+        }
+
+        switch (size)
+        {
+            case EA_1BYTE:
+                noway_assert(RBM_BYTE_REGS & genRegMask(reg1));
+                noway_assert(RBM_BYTE_REGS & genRegMask(reg2));
+                break;
+
+            case EA_2BYTE:
+                // Output a size prefix for a 16-bit operand
+                dst += emitOutputByte(dst, 0x66);
+                __fallthrough;
+
+            case EA_4BYTE:
+                // Set the 'w' bit to get the large version
+                code |= 0x1;
+                break;
+
+#ifdef _TARGET_AMD64_
+            case EA_8BYTE:
+                // TODO-AMD64-CQ: Better way to not emit REX.W when we don't need it
+                // Don't need to zero out the high bits explicitly
+                if ((ins != INS_xor) || (reg1 != reg2))
+                {
+                    code = AddRexWPrefix(ins, code);
+                }
+
+                // Set the 'w' bit to get the large version
+                code |= 0x1;
+                break;
+
+#endif // _TARGET_AMD64_
+
+            default:
+                assert(!"unexpected size");
+        }
+    }
+
+    unsigned regCode = insEncodeReg345(ins, reg1, size, &code);
+    regCode |= insEncodeReg012(ins, reg2, size, &code);
+
+    // In case of AVX instructions that take 3 operands, we generally want to encode reg1
+    // as first source.  In this case, reg1 is both a source and a destination.
+    // The exception is the "merge" 3-operand case, where we have a move instruction, such
+    // as movss, and we want to merge the source with itself.
+    //
+    // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
+    // now we use the single source as source1 and source2.
+    if (IsThreeOperandBinaryAVXInstruction(ins))
+    {
+        // encode source/dest operand reg in 'vvvv' bits in 1's compliement form
+        code = insEncodeReg3456(ins, reg1, size, code);
+    }
+    else if (IsThreeOperandMoveAVXInstruction(ins))
+    {
+        // encode source operand reg in 'vvvv' bits in 1's compliement form
+        code = insEncodeReg3456(ins, reg2, size, code);
+    }
+
+    // Output the REX prefix
+    dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+    // Is this a 'big' opcode?
+    if (code & 0xFF000000)
+    {
+        // Output the highest word of the opcode
+        dst += emitOutputWord(dst, code >> 16);
+        code &= 0x0000FFFF;
+    }
+    else if (code & 0x00FF0000)
+    {
+        dst += emitOutputByte(dst, code >> 16);
+        code &= 0x0000FFFF;
+    }
+
+    // If byte 4 is 0xC0, then it contains the Mod/RM encoding for a 3-byte
+    // encoding.  Otherwise, this is an instruction with a 4-byte encoding,
+    // and the MOd/RM encoding needs to go in the 5th byte.
+    // TODO-XArch-CQ: Currently, this will only support registers in the 5th byte.
+    // We probably need a different mechanism to identify the 4-byte encodings.
+    if ((code & 0xFF) == 0x00)
+    {
+        // This case happens for AVX instructions only
+        assert(IsAVXInstruction(ins));
+        if ((code & 0xFF00) == 0xC000)
+        {
+            dst += emitOutputByte(dst, (0xC0 | regCode));
+        }
+        else
+        {
+            dst += emitOutputByte(dst, (code >> 8) & 0xFF);
+            dst += emitOutputByte(dst, (0xC0 | regCode));
+        }
+    }
+    else if ((code & 0xFF00) == 0xC000)
+    {
+        dst += emitOutputWord(dst, code | (regCode << 8));
+    }
+    else
+    {
+        dst += emitOutputWord(dst, code);
+        dst += emitOutputByte(dst, (0xC0 | regCode));
+    }
+
+    // Does this instruction operate on a GC ref value?
+    if (id->idGCref())
+    {
+        switch (id->idInsFmt())
+        {
+            case IF_RRD_RRD:
+                break;
+
+            case IF_RWR_RRD:
+
+                if (emitSyncThisObjReg != REG_NA && emitIGisInProlog(emitCurIG) && reg2 == (int)REG_ARG_0)
+                {
+                    // We're relocating "this" in the prolog
+                    assert(emitComp->lvaIsOriginalThisArg(0));
+                    assert(emitComp->lvaTable[0].lvRegister);
+                    assert(emitComp->lvaTable[0].lvRegNum == reg1);
+
+                    if (emitFullGCinfo)
+                    {
+                        emitGCregLiveSet(id->idGCref(), genRegMask(reg1), dst, true);
+                        break;
+                    }
+                    else
+                    {
+                        /* If emitFullGCinfo==false, the we don't use any
+                           regPtrDsc's and so explictly note the location
+                           of "this" in GCEncode.cpp
+                         */
+                    }
+                }
+
+                emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
+                break;
+
+            case IF_RRW_RRD:
+
+                switch (id->idIns())
+                {
+                    /*
+                        This must be one of the following cases:
+
+                        xor reg, reg        to assign NULL
+
+                        and r1 , r2         if (ptr1 && ptr2) ...
+                        or  r1 , r2         if (ptr1 || ptr2) ...
+
+                        add r1 , r2         to compute a normal byref
+                        sub r1 , r2         to compute a strange byref (VC only)
+
+                    */
+                    case INS_xor:
+                        assert(id->idReg1() == id->idReg2());
+                        emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
+                        break;
+
+                    case INS_or:
+                    case INS_and:
+                        emitGCregDeadUpd(id->idReg1(), dst);
+                        break;
+
+                    case INS_add:
+                    case INS_sub:
+                        assert(id->idGCref() == GCT_BYREF);
+
+#ifdef DEBUG
+                        regMaskTP regMask;
+                        regMask = genRegMask(reg1) | genRegMask(reg2);
+
+                        // r1/r2 could have been a GCREF as GCREF + int=BYREF
+                        //                            or BYREF+/-int=BYREF
+                        assert(((regMask & emitThisGCrefRegs) && (ins == INS_add)) ||
+                               ((regMask & emitThisByrefRegs) && (ins == INS_add || ins == INS_sub)));
+#endif
+                        // Mark r1 as holding a byref
+                        emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
+                        break;
+
+                    default:
+#ifdef DEBUG
+                        emitDispIns(id, false, false, false);
+#endif
+                        assert(!"unexpected GC reg update instruction");
+                }
+
+                break;
+
+            case IF_RRW_RRW:
+                // This must be "xchg reg1, reg2"
+                assert(id->idIns() == INS_xchg);
+
+                // If we got here, the GC-ness of the registers doesn't match, so we have to "swap" them in the GC
+                // register pointer mask.
+                CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef LEGACY_BACKEND
+                GCtype gc1, gc2;
+
+                gc1 = emitRegGCtype(reg1);
+                gc2 = emitRegGCtype(reg2);
+
+                if (gc1 != gc2)
+                {
+                    // Kill the GC-info about the GC registers
+
+                    if (needsGC(gc1))
+                    {
+                        emitGCregDeadUpd(reg1, dst);
+                    }
+
+                    if (needsGC(gc2))
+                    {
+                        emitGCregDeadUpd(reg2, dst);
+                    }
+
+                    // Now, swap the info
+
+                    if (needsGC(gc1))
+                    {
+                        emitGCregLiveUpd(gc1, reg2, dst);
+                    }
+
+                    if (needsGC(gc2))
+                    {
+                        emitGCregLiveUpd(gc2, reg1, dst);
+                    }
+                }
+#endif // !LEGACY_BACKEND
+                break;
+
+            default:
+#ifdef DEBUG
+                emitDispIns(id, false, false, false);
+#endif
+                assert(!"unexpected GC ref instruction format");
+        }
+    }
+    else
+    {
+        if (emitInsCanOnlyWriteSSE2OrAVXReg(id))
+        {
+        }
+        else
+        {
+            switch (id->idInsFmt())
+            {
+                case IF_RRD_CNS:
+                    // INS_mulEAX can not be used with any of these formats
+                    assert(ins != INS_mulEAX && ins != INS_imulEAX);
+
+                    // For the three operand imul instruction the target
+                    // register is encoded in the opcode
+
+                    if (instrIs3opImul(ins))
+                    {
+                        regNumber tgtReg = inst3opImulReg(ins);
+                        emitGCregDeadUpd(tgtReg, dst);
+                    }
+                    break;
+
+                case IF_RWR_RRD:
+                case IF_RRW_RRD:
+                    // INS_movxmm2i writes to reg2.
+                    if (ins == INS_mov_xmm2i)
+                    {
+                        emitGCregDeadUpd(id->idReg2(), dst);
+                    }
+                    else
+                    {
+                        emitGCregDeadUpd(id->idReg1(), dst);
+                    }
+                    break;
+
+                default:
+                    break;
+            }
+        }
+    }
+
+    return dst;
+}
+
+#ifdef FEATURE_AVX_SUPPORT
+BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id)
+{
+    size_t code;
+
+    instruction ins = id->idIns();
+    assert(IsAVXInstruction(ins));
+    assert(IsThreeOperandAVXInstruction(ins));
+    regNumber targetReg = id->idReg1();
+    regNumber src1      = id->idReg2();
+    regNumber src2      = id->idReg3();
+    emitAttr  size      = id->idOpSize();
+
+    code = insCodeRM(ins);
+    code = AddVexPrefixIfNeeded(ins, code, size);
+    code = insEncodeRMreg(ins, code);
+
+    if (TakesRexWPrefix(ins, size))
+    {
+        code = AddRexWPrefix(ins, code);
+    }
+
+    unsigned regCode = insEncodeReg345(ins, targetReg, size, &code);
+    regCode |= insEncodeReg012(ins, src2, size, &code);
+    // encode source operand reg in 'vvvv' bits in 1's compliement form
+    code = insEncodeReg3456(ins, src1, size, code);
+
+    // Output the REX prefix
+    dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+    // Is this a 'big' opcode?
+    if (code & 0xFF000000)
+    {
+        // Output the highest word of the opcode
+        dst += emitOutputWord(dst, code >> 16);
+        code &= 0x0000FFFF;
+    }
+    else if (code & 0x00FF0000)
+    {
+        dst += emitOutputByte(dst, code >> 16);
+        code &= 0x0000FFFF;
+    }
+
+    // If byte 4 is 0xC0, then it contains the Mod/RM encoding for a 3-byte
+    // encoding.  Otherwise, this is an instruction with a 4-byte encoding,
+    // and the MOd/RM encoding needs to go in the 5th byte.
+    // TODO-XArch-CQ: Currently, this will only support registers in the 5th byte.
+    // We probably need a different mechanism to identify the 4-byte encodings.
+    if ((code & 0xFF) == 0x00)
+    {
+        // This case happens for AVX instructions only
+        assert(IsAVXInstruction(ins));
+        if ((code & 0xFF00) == 0xC000)
+        {
+            dst += emitOutputByte(dst, (0xC0 | regCode));
+        }
+        else
+        {
+            dst += emitOutputByte(dst, (code >> 8) & 0xFF);
+            dst += emitOutputByte(dst, (0xC0 | regCode));
+        }
+    }
+    else if ((code & 0xFF00) == 0xC000)
+    {
+        dst += emitOutputWord(dst, code | (regCode << 8));
+    }
+    else
+    {
+        dst += emitOutputWord(dst, code);
+        dst += emitOutputByte(dst, (0xC0 | regCode));
+    }
+
+    noway_assert(!id->idGCref());
+
+    return dst;
+}
+#endif
+
+/*****************************************************************************
+ *
+ *  Output an instruction with a register and constant operands.
+ */
+
+BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id)
+{
+    size_t      code;
+    emitAttr    size      = id->idOpSize();
+    instruction ins       = id->idIns();
+    regNumber   reg       = id->idReg1();
+    ssize_t     val       = emitGetInsSC(id);
+    bool        valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
+
+#ifdef RELOC_SUPPORT
+    if (id->idIsCnsReloc())
+    {
+        valInByte = false; // relocs can't be placed in a byte
+    }
+#endif
+
+    noway_assert(emitVerifyEncodable(ins, size, reg));
+
+#ifndef LEGACY_BACKEND
+    if (IsSSEOrAVXInstruction(ins))
+    {
+        // Handle SSE2 instructions of the form "opcode reg, immed8"
+
+        assert(id->idGCref() == GCT_NONE);
+        assert(valInByte);
+        assert(ins == INS_psrldq || ins == INS_pslldq);
+
+        // Get the 'base' opcode.
+        code = insCodeMI(ins);
+        code = AddVexPrefixIfNeeded(ins, code, size);
+        code = insEncodeMIreg(ins, reg, size, code);
+        assert(code & 0x00FF0000);
+        if (TakesVexPrefix(ins))
+        {
+            // The 'vvvv' bits encode the destination register, which for this case (RI)
+            // is the same as the source.
+            code = insEncodeReg3456(ins, reg, size, code);
+        }
+
+        // In case of psrldq
+        // Reg/Opcode = 3
+        // R/M = reg1
+        //
+        // In case of pslldq
+        // Reg/Opcode = 7
+        // R/M = reg1
+        regNumber regOpcode = (regNumber)((ins == INS_psrldq) ? 3 : 7);
+        unsigned regcode = (insEncodeReg345(ins, regOpcode, size, &code) | insEncodeReg012(ins, reg, size, &code)) << 8;
+
+        // Output the REX prefix
+        dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+        if (code & 0xFF000000)
+        {
+            dst += emitOutputWord(dst, code >> 16);
+        }
+        else if (code & 0xFF0000)
+        {
+            dst += emitOutputByte(dst, code >> 16);
+        }
+
+        dst += emitOutputWord(dst, code | regcode);
+
+        dst += emitOutputByte(dst, val);
+
+        return dst;
+    }
+#endif // !LEGACY_BACKEND
+
+    // The 'mov' opcode is special
+    if (ins == INS_mov)
+    {
+        code = insCodeACC(ins);
+        assert(code < 0x100);
+
+        code |= 0x08; // Set the 'w' bit
+        unsigned regcode = insEncodeReg012(ins, reg, size, &code);
+        code |= regcode;
+
+        // This is INS_mov and will not take VEX prefix
+        assert(!TakesVexPrefix(ins));
+
+        if (TakesRexWPrefix(ins, size))
+        {
+            code = AddRexWPrefix(ins, code);
+        }
+
+        dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+        dst += emitOutputByte(dst, code);
+        if (size == EA_4BYTE)
+        {
+            dst += emitOutputLong(dst, val);
+        }
+#ifdef _TARGET_AMD64_
+        else
+        {
+            assert(size == EA_PTRSIZE);
+            dst += emitOutputSizeT(dst, val);
+        }
+#endif
+
+#ifdef RELOC_SUPPORT
+        if (id->idIsCnsReloc())
+        {
+            emitRecordRelocation((void*)(dst - (unsigned)EA_SIZE(size)), (void*)(size_t)val, IMAGE_REL_BASED_MOFFSET);
+        }
+#endif
+
+        goto DONE;
+    }
+
+    // Decide which encoding is the shortest
+    bool useSigned, useACC;
+
+    if (reg == REG_EAX && !instrIs3opImul(ins))
+    {
+        if (size == EA_1BYTE || (ins == INS_test))
+        {
+            // For al, ACC encoding is always the smallest
+            useSigned = false;
+            useACC    = true;
+        }
+        else
+        {
+            /* For ax/eax, we avoid ACC encoding for small constants as we
+             * can emit the small constant and have it sign-extended.
+             * For big constants, the ACC encoding is better as we can use
+             * the 1 byte opcode
+             */
+
+            if (valInByte)
+            {
+                // avoid using ACC encoding
+                useSigned = true;
+                useACC    = false;
+            }
+            else
+            {
+                useSigned = false;
+                useACC    = true;
+            }
+        }
+    }
+    else
+    {
+        useACC = false;
+
+        if (valInByte)
+        {
+            useSigned = true;
+        }
+        else
+        {
+            useSigned = false;
+        }
+    }
+
+    // "test" has no 's' bit
+    if (ins == INS_test)
+    {
+        useSigned = false;
+    }
+
+    // Get the 'base' opcode
+    if (useACC)
+    {
+        assert(!useSigned);
+        code = insCodeACC(ins);
+    }
+    else
+    {
+        assert(!useSigned || valInByte);
+
+        // Some instructions (at least 'imul') do not have a
+        // r/m, immed form, but do have a dstReg,srcReg,imm8 form.
+        if (valInByte && useSigned && insNeedsRRIb(ins))
+        {
+            code = insEncodeRRIb(ins, reg, size);
+        }
+        else
+        {
+            code = insCodeMI(ins);
+            code = AddVexPrefixIfNeeded(ins, code, size);
+            code = insEncodeMIreg(ins, reg, size, code);
+        }
+    }
+
+    switch (size)
+    {
+        case EA_1BYTE:
+            break;
+
+        case EA_2BYTE:
+            // Output a size prefix for a 16-bit operand
+            dst += emitOutputByte(dst, 0x66);
+            __fallthrough;
+
+        case EA_4BYTE:
+            // Set the 'w' bit to get the large version
+            code |= 0x1;
+            break;
+
+#ifdef _TARGET_AMD64_
+        case EA_8BYTE:
+            /* Set the 'w' bit to get the large version */
+            /* and the REX.W bit to get the really large version */
+
+            code = AddRexWPrefix(ins, code);
+            code |= 0x1;
+            break;
+#endif
+
+        default:
+            assert(!"unexpected size");
+    }
+
+    // Output the REX prefix
+    dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+    // Does the value fit in a sign-extended byte?
+    // Important!  Only set the 's' bit when we have a size larger than EA_1BYTE.
+    // Note: A sign-extending immediate when (size == EA_1BYTE) is invalid in 64-bit mode.
+
+    if (useSigned && (size > EA_1BYTE))
+    {
+        // We can just set the 's' bit, and issue an immediate byte
+
+        code |= 0x2; // Set the 's' bit to use a sign-extended immediate byte.
+        dst += emitOutputWord(dst, code);
+        dst += emitOutputByte(dst, val);
+    }
+    else
+    {
+        // Can we use an accumulator (EAX) encoding?
+        if (useACC)
+        {
+            dst += emitOutputByte(dst, code);
+        }
+        else
+        {
+            dst += emitOutputWord(dst, code);
+        }
+
+        switch (size)
+        {
+            case EA_1BYTE:
+                dst += emitOutputByte(dst, val);
+                break;
+            case EA_2BYTE:
+                dst += emitOutputWord(dst, val);
+                break;
+            case EA_4BYTE:
+                dst += emitOutputLong(dst, val);
+                break;
+#ifdef _TARGET_AMD64_
+            case EA_8BYTE:
+                dst += emitOutputLong(dst, val);
+                break;
+#endif // _TARGET_AMD64_
+            default:
+                break;
+        }
+
+#ifdef RELOC_SUPPORT
+        if (id->idIsCnsReloc())
+        {
+            emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)val, IMAGE_REL_BASED_HIGHLOW);
+            assert(size == EA_4BYTE);
+        }
+#endif
+    }
+
+DONE:
+
+    // Does this instruction operate on a GC ref value?
+    if (id->idGCref())
+    {
+        switch (id->idInsFmt())
+        {
+            case IF_RRD_CNS:
+                break;
+
+            case IF_RWR_CNS:
+                emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
+                break;
+
+            case IF_RRW_CNS:
+                assert(id->idGCref() == GCT_BYREF);
+
+#ifdef DEBUG
+                regMaskTP regMask;
+                regMask = genRegMask(reg);
+                // FIXNOW review the other places and relax the assert there too
+
+                // The reg must currently be holding either a gcref or a byref
+                // GCT_GCREF+int = GCT_BYREF, and GCT_BYREF+/-int = GCT_BYREF
+                if (emitThisGCrefRegs & regMask)
+                {
+                    assert(ins == INS_add);
+                }
+                if (emitThisByrefRegs & regMask)
+                {
+                    assert(ins == INS_add || ins == INS_sub);
+                }
+#endif
+                // Mark it as holding a GCT_BYREF
+                emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
+                break;
+
+            default:
+#ifdef DEBUG
+                emitDispIns(id, false, false, false);
+#endif
+                assert(!"unexpected GC ref instruction format");
+        }
+
+        // mul can never produce a GC ref
+        assert(!instrIs3opImul(ins));
+        assert(ins != INS_mulEAX && ins != INS_imulEAX);
+    }
+    else
+    {
+        switch (id->idInsFmt())
+        {
+            case IF_RRD_CNS:
+                // INS_mulEAX can not be used with any of these formats
+                assert(ins != INS_mulEAX && ins != INS_imulEAX);
+
+                // For the three operand imul instruction the target
+                // register is encoded in the opcode
+
+                if (instrIs3opImul(ins))
+                {
+                    regNumber tgtReg = inst3opImulReg(ins);
+                    emitGCregDeadUpd(tgtReg, dst);
+                }
+                break;
+
+            case IF_RRW_CNS:
+            case IF_RWR_CNS:
+                assert(!instrIs3opImul(ins));
+
+                emitGCregDeadUpd(id->idReg1(), dst);
+                break;
+
+            default:
+#ifdef DEBUG
+                emitDispIns(id, false, false, false);
+#endif
+                assert(!"unexpected GC ref instruction format");
+        }
+    }
+
+    return dst;
+}
+
+/*****************************************************************************
+ *
+ *  Output an instruction with a constant operand.
+ */
+
+BYTE* emitter::emitOutputIV(BYTE* dst, instrDesc* id)
+{
+    size_t      code;
+    instruction ins       = id->idIns();
+    emitAttr    size      = id->idOpSize();
+    ssize_t     val       = emitGetInsSC(id);
+    bool        valInByte = ((signed char)val == val);
+
+    // We would to update GC info correctly
+    assert(!IsSSE2Instruction(ins));
+    assert(!IsAVXInstruction(ins));
+
+#ifdef _TARGET_AMD64_
+    // all these opcodes take a sign-extended 4-byte immediate, max
+    noway_assert(size < EA_8BYTE || ((int)val == val && !id->idIsCnsReloc()));
+#endif
+
+#ifdef RELOC_SUPPORT
+    if (id->idIsCnsReloc())
+    {
+        valInByte = false; // relocs can't be placed in a byte
+
+        // Of these instructions only the push instruction can have reloc
+        assert(ins == INS_push || ins == INS_push_hide);
+    }
+#endif
+
+    switch (ins)
+    {
+        case INS_jge:
+            assert((val >= -128) && (val <= 127));
+            dst += emitOutputByte(dst, insCode(ins));
+            dst += emitOutputByte(dst, val);
+            break;
+
+        case INS_loop:
+            assert((val >= -128) && (val <= 127));
+            dst += emitOutputByte(dst, insCodeMI(ins));
+            dst += emitOutputByte(dst, val);
+            break;
+
+        case INS_ret:
+            assert(val);
+            dst += emitOutputByte(dst, insCodeMI(ins));
+            dst += emitOutputWord(dst, val);
+            break;
+
+        case INS_push_hide:
+        case INS_push:
+            code = insCodeMI(ins);
+
+            // Does the operand fit in a byte?
+            if (valInByte)
+            {
+                dst += emitOutputByte(dst, code | 2);
+                dst += emitOutputByte(dst, val);
+            }
+            else
+            {
+                if (TakesRexWPrefix(ins, size))
+                {
+                    code = AddRexWPrefix(ins, code);
+                    dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+                }
+
+                dst += emitOutputByte(dst, code);
+                dst += emitOutputLong(dst, val);
+#ifdef RELOC_SUPPORT
+                if (id->idIsCnsReloc())
+                {
+                    emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)(size_t)val, IMAGE_REL_BASED_HIGHLOW);
+                }
+#endif
+            }
+
+            // Did we push a GC ref value?
+            if (id->idGCref())
+            {
+#ifdef DEBUG
+                printf("UNDONE: record GCref push [cns]\n");
+#endif
+            }
+
+            break;
+
+        default:
+            assert(!"unexpected instruction");
+    }
+
+    return dst;
+}
+
+/*****************************************************************************
+ *
+ *  Output a local jump instruction.
+ *  This function also handles non-jumps that have jump-like characteristics, like RIP-relative LEA of a label that
+ *  needs to get bound to an actual address and processed by branch shortening.
+ */
+
+BYTE* emitter::emitOutputLJ(BYTE* dst, instrDesc* i)
+{
+    unsigned srcOffs;
+    unsigned dstOffs;
+    ssize_t  distVal;
+
+    instrDescJmp* id  = (instrDescJmp*)i;
+    instruction   ins = id->idIns();
+    bool          jmp;
+    bool          relAddr = true; // does the instruction use relative-addressing?
+
+    // SSE2 doesnt make any sense here
+    assert(!IsSSE2Instruction(ins));
+    assert(!IsAVXInstruction(ins));
+
+    size_t ssz;
+    size_t lsz;
+
+    switch (ins)
+    {
+        default:
+            ssz = JCC_SIZE_SMALL;
+            lsz = JCC_SIZE_LARGE;
+            jmp = true;
+            break;
+
+        case INS_jmp:
+            ssz = JMP_SIZE_SMALL;
+            lsz = JMP_SIZE_LARGE;
+            jmp = true;
+            break;
+
+        case INS_call:
+            ssz = lsz = CALL_INST_SIZE;
+            jmp       = false;
+            break;
+
+        case INS_push_hide:
+        case INS_push:
+            ssz = lsz = 5;
+            jmp       = false;
+            relAddr   = false;
+            break;
+
+        case INS_mov:
+        case INS_lea:
+            ssz = lsz = id->idCodeSize();
+            jmp       = false;
+            relAddr   = false;
+            break;
+    }
+
+    // Figure out the distance to the target
+    srcOffs = emitCurCodeOffs(dst);
+    dstOffs = id->idAddr()->iiaIGlabel->igOffs;
+
+    if (relAddr)
+    {
+        distVal = (ssize_t)(emitOffsetToPtr(dstOffs) - emitOffsetToPtr(srcOffs));
+    }
+    else
+    {
+        distVal = (ssize_t)emitOffsetToPtr(dstOffs);
+    }
+
+    if (dstOffs <= srcOffs)
+    {
+        // This is a backward jump - distance is known at this point
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if DEBUG_EMIT
+        if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+        {
+            size_t blkOffs = id->idjIG->igOffs;
+
+            if (INTERESTING_JUMP_NUM == 0)
+            {
+                printf("[3] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
+            }
+            printf("[3] Jump  block is at %08X - %02X = %08X\n", blkOffs, emitOffsAdj, blkOffs - emitOffsAdj);
+            printf("[3] Jump        is at %08X - %02X = %08X\n", srcOffs, emitOffsAdj, srcOffs - emitOffsAdj);
+            printf("[3] Label block is at %08X - %02X = %08X\n", dstOffs, emitOffsAdj, dstOffs - emitOffsAdj);
+        }
+#endif
+
+        // Can we use a short jump?
+        if (jmp && distVal - ssz >= (size_t)JMP_DIST_SMALL_MAX_NEG)
+        {
+            emitSetShortJump(id);
+        }
+    }
+    else
+    {
+        // This is a  forward jump - distance will be an upper limit
+        emitFwdJumps = true;
+
+        // The target offset will be closer by at least 'emitOffsAdj', but only if this
+        // jump doesn't cross the hot-cold boundary.
+        if (!emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
+        {
+            dstOffs -= emitOffsAdj;
+            distVal -= emitOffsAdj;
+        }
+
+        // Record the location of the jump for later patching
+        id->idjOffs = dstOffs;
+
+        // Are we overflowing the id->idjOffs bitfield?
+        if (id->idjOffs != dstOffs)
+        {
+            IMPL_LIMITATION("Method is too large");
+        }
+
+#if DEBUG_EMIT
+        if (id->idDebugOnlyInfo()->idNum == (unsigned)INTERESTING_JUMP_NUM || INTERESTING_JUMP_NUM == 0)
+        {
+            size_t blkOffs = id->idjIG->igOffs;
+
+            if (INTERESTING_JUMP_NUM == 0)
+            {
+                printf("[4] Jump %u:\n", id->idDebugOnlyInfo()->idNum);
+            }
+            printf("[4] Jump  block is at %08X\n", blkOffs);
+            printf("[4] Jump        is at %08X\n", srcOffs);
+            printf("[4] Label block is at %08X - %02X = %08X\n", dstOffs + emitOffsAdj, emitOffsAdj, dstOffs);
+        }
+#endif
+
+        // Can we use a short jump?
+        if (jmp && distVal - ssz <= (size_t)JMP_DIST_SMALL_MAX_POS)
+        {
+            emitSetShortJump(id);
+        }
+    }
+
+    // Adjust the offset to emit relative to the end of the instruction
+    if (relAddr)
+    {
+        distVal -= id->idjShort ? ssz : lsz;
+    }
+
+#ifdef DEBUG
+    if (0 && emitComp->verbose)
+    {
+        size_t sz          = id->idjShort ? ssz : lsz;
+        int    distValSize = id->idjShort ? 4 : 8;
+        printf("; %s jump [%08X/%03u] from %0*X to %0*X: dist = %08XH\n", (dstOffs <= srcOffs) ? "Fwd" : "Bwd",
+               emitComp->dspPtr(id), id->idDebugOnlyInfo()->idNum, distValSize, srcOffs + sz, distValSize, dstOffs,
+               distVal);
+    }
+#endif
+
+    // What size jump should we use?
+    if (id->idjShort)
+    {
+        // Short jump
+        assert(!id->idjKeepLong);
+        assert(emitJumpCrossHotColdBoundary(srcOffs, dstOffs) == false);
+
+        assert(JMP_SIZE_SMALL == JCC_SIZE_SMALL);
+        assert(JMP_SIZE_SMALL == 2);
+
+        assert(jmp);
+
+        if (emitInstCodeSz(id) != JMP_SIZE_SMALL)
+        {
+            emitOffsAdj += emitInstCodeSz(id) - JMP_SIZE_SMALL;
+
+#ifdef DEBUG
+            if (emitComp->verbose)
+            {
+                printf("; NOTE: size of jump [%08X] mis-predicted\n", emitComp->dspPtr(id));
+            }
+#endif
+        }
+
+        dst += emitOutputByte(dst, insCode(ins));
+
+        // For forward jumps, record the address of the distance value
+        id->idjTemp.idjAddr = (distVal > 0) ? dst : nullptr;
+
+        dst += emitOutputByte(dst, distVal);
+    }
+    else
+    {
+        size_t code;
+
+        // Long  jump
+        if (jmp)
+        {
+            assert(INS_jmp + (INS_l_jmp - INS_jmp) == INS_l_jmp);
+            assert(INS_jo + (INS_l_jmp - INS_jmp) == INS_l_jo);
+            assert(INS_jb + (INS_l_jmp - INS_jmp) == INS_l_jb);
+            assert(INS_jae + (INS_l_jmp - INS_jmp) == INS_l_jae);
+            assert(INS_je + (INS_l_jmp - INS_jmp) == INS_l_je);
+            assert(INS_jne + (INS_l_jmp - INS_jmp) == INS_l_jne);
+            assert(INS_jbe + (INS_l_jmp - INS_jmp) == INS_l_jbe);
+            assert(INS_ja + (INS_l_jmp - INS_jmp) == INS_l_ja);
+            assert(INS_js + (INS_l_jmp - INS_jmp) == INS_l_js);
+            assert(INS_jns + (INS_l_jmp - INS_jmp) == INS_l_jns);
+            assert(INS_jpe + (INS_l_jmp - INS_jmp) == INS_l_jpe);
+            assert(INS_jpo + (INS_l_jmp - INS_jmp) == INS_l_jpo);
+            assert(INS_jl + (INS_l_jmp - INS_jmp) == INS_l_jl);
+            assert(INS_jge + (INS_l_jmp - INS_jmp) == INS_l_jge);
+            assert(INS_jle + (INS_l_jmp - INS_jmp) == INS_l_jle);
+            assert(INS_jg + (INS_l_jmp - INS_jmp) == INS_l_jg);
+
+            code = insCode((instruction)(ins + (INS_l_jmp - INS_jmp)));
+        }
+        else if (ins == INS_push || ins == INS_push_hide)
+        {
+            assert(insCodeMI(INS_push) == 0x68);
+            code = 0x68;
+        }
+        else if (ins == INS_mov)
+        {
+            // Make it look like IF_SWR_CNS so that emitOutputSV emits the r/m32 for us
+            insFormat tmpInsFmt   = id->idInsFmt();
+            insGroup* tmpIGlabel  = id->idAddr()->iiaIGlabel;
+            bool      tmpDspReloc = id->idIsDspReloc();
+
+            id->idInsFmt(IF_SWR_CNS);
+            id->idAddr()->iiaLclVar = ((instrDescLbl*)id)->dstLclVar;
+            id->idSetIsDspReloc(false);
+
+            dst = emitOutputSV(dst, id, insCodeMI(ins));
+
+            // Restore id fields with original values
+            id->idInsFmt(tmpInsFmt);
+            id->idAddr()->iiaIGlabel = tmpIGlabel;
+            id->idSetIsDspReloc(tmpDspReloc);
+            code = 0xCC;
+        }
+        else if (ins == INS_lea)
+        {
+            // Make an instrDesc that looks like IF_RWR_ARD so that emitOutputAM emits the r/m32 for us.
+            // We basically are doing what emitIns_R_AI does.
+            // TODO-XArch-Cleanup: revisit this.
+            instrDescAmd  idAmdStackLocal;
+            instrDescAmd* idAmd = &idAmdStackLocal;
+            *(instrDesc*)idAmd  = *(instrDesc*)id; // copy all the "core" fields
+            memset((BYTE*)idAmd + sizeof(instrDesc), 0,
+                   sizeof(instrDescAmd) - sizeof(instrDesc)); // zero out the tail that wasn't copied
+
+            idAmd->idInsFmt(IF_RWR_ARD);
+            idAmd->idAddr()->iiaAddrMode.amBaseReg = REG_NA;
+            idAmd->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
+            emitSetAmdDisp(idAmd, distVal); // set the displacement
+            idAmd->idSetIsDspReloc(id->idIsDspReloc());
+            assert(emitGetInsAmdAny(idAmd) == distVal); // make sure "disp" is stored properly
+
+            UNATIVE_OFFSET sz = emitInsSizeAM(idAmd, insCodeRM(ins));
+            idAmd->idCodeSize(sz);
+
+            code = insCodeRM(ins);
+            code |= (insEncodeReg345(ins, id->idReg1(), EA_PTRSIZE, &code) << 8);
+
+            dst = emitOutputAM(dst, idAmd, code, nullptr);
+
+            code = 0xCC;
+
+            // For forward jumps, record the address of the distance value
+            // Hard-coded 4 here because we already output the displacement, as the last thing.
+            id->idjTemp.idjAddr = (dstOffs > srcOffs) ? (dst - 4) : nullptr;
+
+            // We're done
+            return dst;
+        }
+        else
+        {
+            code = 0xE8;
+        }
+
+        if (ins != INS_mov)
+        {
+            dst += emitOutputByte(dst, code);
+
+            if (code & 0xFF00)
+            {
+                dst += emitOutputByte(dst, code >> 8);
+            }
+        }
+
+        // For forward jumps, record the address of the distance value
+        id->idjTemp.idjAddr = (dstOffs > srcOffs) ? dst : nullptr;
+
+        dst += emitOutputLong(dst, distVal);
+
+#ifndef _TARGET_AMD64_ // all REL32 on AMD have to go through recordRelocation
+        if (emitComp->opts.compReloc)
+#endif
+        {
+            if (!relAddr)
+            {
+                emitRecordRelocation((void*)(dst - sizeof(INT32)), (void*)distVal, IMAGE_REL_BASED_HIGHLOW);
+            }
+            else if (emitJumpCrossHotColdBoundary(srcOffs, dstOffs))
+            {
+                assert(id->idjKeepLong);
+                emitRecordRelocation((void*)(dst - sizeof(INT32)), dst + distVal, IMAGE_REL_BASED_REL32);
+            }
+        }
+    }
+
+    // Local calls kill all registers
+    if (ins == INS_call && (emitThisGCrefRegs | emitThisByrefRegs))
+    {
+        emitGCregDeadUpdMask(emitThisGCrefRegs | emitThisByrefRegs, dst);
+    }
+
+    return dst;
+}
+
+/*****************************************************************************
+ *
+ *  Append the machine code corresponding to the given instruction descriptor
+ *  to the code block at '*dp'; the base of the code block is 'bp', and 'ig'
+ *  is the instruction group that contains the instruction. Updates '*dp' to
+ *  point past the generated code, and returns the size of the instruction
+ *  descriptor in bytes.
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
+{
+    assert(emitIssuing);
+
+    BYTE*         dst           = *dp;
+    size_t        sz            = sizeof(instrDesc);
+    instruction   ins           = id->idIns();
+    unsigned char callInstrSize = 0;
+
+#ifdef DEBUG
+    bool dspOffs = emitComp->opts.dspGCtbls;
+#endif // DEBUG
+
+    emitAttr size = id->idOpSize();
+
+    assert(REG_NA == (int)REG_NA);
+
+    assert(ins != INS_imul || size >= EA_4BYTE);                  // Has no 'w' bit
+    assert(instrIs3opImul(id->idIns()) == 0 || size >= EA_4BYTE); // Has no 'w' bit
+
+    VARSET_TP VARSET_INIT_NOCOPY(GCvars, VarSetOps::UninitVal());
+
+    // What instruction format have we got?
+    switch (id->idInsFmt())
+    {
+        size_t code;
+        size_t regcode;
+        int    args;
+        CnsVal cnsVal;
+
+        BYTE* addr;
+        bool  recCall;
+
+        regMaskTP gcrefRegs;
+        regMaskTP byrefRegs;
+
+        /********************************************************************/
+        /*                        No operands                               */
+        /********************************************************************/
+        case IF_NONE:
+            // the loop alignment pseudo instruction
+            if (ins == INS_align)
+            {
+                sz  = TINY_IDSC_SIZE;
+                dst = emitOutputNOP(dst, (-(int)(size_t)dst) & 0x0f);
+                assert(((size_t)dst & 0x0f) == 0);
+                break;
+            }
+
+            if (ins == INS_nop)
+            {
+                dst = emitOutputNOP(dst, id->idCodeSize());
+                break;
+            }
+
+            // the cdq instruction kills the EDX register implicitly
+            if (ins == INS_cdq)
+            {
+                emitGCregDeadUpd(REG_EDX, dst);
+            }
+
+            __fallthrough;
+
+#if FEATURE_STACK_FP_X87
+        case IF_TRD:
+        case IF_TWR:
+        case IF_TRW:
+#endif // FEATURE_STACK_FP_X87
+
+            assert(id->idGCref() == GCT_NONE);
+
+            code = insCodeMR(ins);
+
+#ifdef _TARGET_AMD64_
+            // Support only scalar AVX instructions and hence size is hard coded to 4-byte.
+            code = AddVexPrefixIfNeeded(ins, code, EA_4BYTE);
+
+            if (ins == INS_cdq && TakesRexWPrefix(ins, id->idOpSize()))
+            {
+                code = AddRexWPrefix(ins, code);
+            }
+            dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+#endif
+            // Is this a 'big' opcode?
+            if (code & 0xFF000000)
+            {
+                // The high word and then the low word
+                dst += emitOutputWord(dst, code >> 16);
+                code &= 0x0000FFFF;
+                dst += emitOutputWord(dst, code);
+            }
+            else if (code & 0x00FF0000)
+            {
+                // The high byte and then the low word
+                dst += emitOutputByte(dst, code >> 16);
+                code &= 0x0000FFFF;
+                dst += emitOutputWord(dst, code);
+            }
+            else if (code & 0xFF00)
+            {
+                // The 2 byte opcode
+                dst += emitOutputWord(dst, code);
+            }
+            else
+            {
+                // The 1 byte opcode
+                dst += emitOutputByte(dst, code);
+            }
+
+            break;
+
+        /********************************************************************/
+        /*                Simple constant, local label, method              */
+        /********************************************************************/
+
+        case IF_CNS:
+            dst = emitOutputIV(dst, id);
+            sz  = emitSizeOfInsDsc(id);
+            break;
+
+        case IF_LABEL:
+        case IF_RWR_LABEL:
+        case IF_SWR_LABEL:
+            assert(id->idGCref() == GCT_NONE);
+            assert(id->idIsBound());
+
+            // TODO-XArch-Cleanup: handle IF_RWR_LABEL in emitOutputLJ() or change it to emitOutputAM()?
+            dst = emitOutputLJ(dst, id);
+            sz  = (id->idInsFmt() == IF_SWR_LABEL ? sizeof(instrDescLbl) : sizeof(instrDescJmp));
+            break;
+
+        case IF_METHOD:
+        case IF_METHPTR:
+            // Assume we'll be recording this call
+            recCall = true;
+
+            // Get hold of the argument count and field Handle
+            args = emitGetInsCDinfo(id);
+
+            // Is this a "fat" call descriptor?
+            if (id->idIsLargeCall())
+            {
+                instrDescCGCA* idCall = (instrDescCGCA*)id;
+                gcrefRegs             = idCall->idcGcrefRegs;
+                byrefRegs             = idCall->idcByrefRegs;
+                VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars);
+                sz = sizeof(instrDescCGCA);
+            }
+            else
+            {
+                assert(!id->idIsLargeDsp());
+                assert(!id->idIsLargeCns());
+
+                gcrefRegs = emitDecodeCallGCregs(id);
+                byrefRegs = 0;
+                VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp));
+                sz = sizeof(instrDesc);
+            }
+
+            addr = (BYTE*)id->idAddr()->iiaAddr;
+            assert(addr != nullptr);
+
+            // Some helpers don't get recorded in GC tables
+            if (id->idIsNoGC())
+            {
+                recCall = false;
+            }
+
+            // What kind of a call do we have here?
+            if (id->idInsFmt() == IF_METHPTR)
+            {
+                // This is call indirect via a method pointer
+
+                code = insCodeMR(ins);
+                if (ins == INS_i_jmp)
+                {
+                    code |= 1;
+                }
+
+                if (id->idIsDspReloc())
+                {
+                    dst += emitOutputWord(dst, code | 0x0500);
+#ifdef _TARGET_AMD64_
+                    dst += emitOutputLong(dst, 0);
+#else
+                    dst += emitOutputLong(dst, (int)addr);
+#endif
+                    emitRecordRelocation((void*)(dst - sizeof(int)), addr, IMAGE_REL_BASED_DISP32);
+                }
+                else
+                {
+#ifdef _TARGET_X86_
+                    dst += emitOutputWord(dst, code | 0x0500);
+#else  //_TARGET_AMD64_
+                    // Amd64: addr fits within 32-bits and can be encoded as a displacement relative to zero.
+                    // This addr mode should never be used while generating relocatable ngen code nor if
+                    // the addr can be encoded as pc-relative address.
+                    noway_assert(!emitComp->opts.compReloc);
+                    noway_assert(codeGen->genAddrRelocTypeHint((size_t)addr) != IMAGE_REL_BASED_REL32);
+                    noway_assert(static_cast<int>(reinterpret_cast<intptr_t>(addr)) == (ssize_t)addr);
+
+                    // This requires, specifying a SIB byte after ModRM byte.
+                    dst += emitOutputWord(dst, code | 0x0400);
+                    dst += emitOutputByte(dst, 0x25);
+#endif //_TARGET_AMD64_
+                    dst += emitOutputLong(dst, static_cast<int>(reinterpret_cast<intptr_t>(addr)));
+                }
+                goto DONE_CALL;
+            }
+
+            // Else
+            // This is call direct where we know the target, thus we can
+            // use a direct call; the target to jump to is in iiaAddr.
+            assert(id->idInsFmt() == IF_METHOD);
+
+            // Output the call opcode followed by the target distance
+            dst += (ins == INS_l_jmp) ? emitOutputByte(dst, insCode(ins)) : emitOutputByte(dst, insCodeMI(ins));
+
+            ssize_t offset;
+#ifdef _TARGET_AMD64_
+            // All REL32 on Amd64 go through recordRelocation.  Here we will output zero to advance dst.
+            offset = 0;
+            assert(id->idIsDspReloc());
+#else
+            // Calculate PC relative displacement.
+            // Although you think we should be using sizeof(void*), the x86 and x64 instruction set
+            // only allow a 32-bit offset, so we correctly use sizeof(INT32)
+            offset = addr - (dst + sizeof(INT32));
+#endif
+
+            dst += emitOutputLong(dst, offset);
+
+#ifdef RELOC_SUPPORT
+            if (id->idIsDspReloc())
+            {
+                emitRecordRelocation((void*)(dst - sizeof(INT32)), addr, IMAGE_REL_BASED_REL32);
+            }
+#endif
+
+        DONE_CALL:
+
+            /* We update the GC info before the call as the variables cannot be
+               used by the call. Killing variables before the call helps with
+               boundary conditions if the call is CORINFO_HELP_THROW - see bug 50029.
+               If we ever track aliased variables (which could be used by the
+               call), we would have to keep them alive past the call.
+             */
+            assert(FitsIn<unsigned char>(dst - *dp));
+            callInstrSize = static_cast<unsigned char>(dst - *dp);
+            emitUpdateLiveGCvars(GCvars, *dp);
+
+            // If the method returns a GC ref, mark EAX appropriately
+            if (id->idGCref() == GCT_GCREF)
+            {
+                gcrefRegs |= RBM_EAX;
+            }
+            else if (id->idGCref() == GCT_BYREF)
+            {
+                byrefRegs |= RBM_EAX;
+            }
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+            // If is a multi-register return method is called, mark RDX appropriately (for System V AMD64).
+            if (id->idIsLargeCall())
+            {
+                instrDescCGCA* idCall = (instrDescCGCA*)id;
+                if (idCall->idSecondGCref() == GCT_GCREF)
+                {
+                    gcrefRegs |= RBM_RDX;
+                }
+                else if (idCall->idSecondGCref() == GCT_BYREF)
+                {
+                    byrefRegs |= RBM_RDX;
+                }
+            }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+            // If the GC register set has changed, report the new set
+            if (gcrefRegs != emitThisGCrefRegs)
+            {
+                emitUpdateLiveGCregs(GCT_GCREF, gcrefRegs, dst);
+            }
+
+            if (byrefRegs != emitThisByrefRegs)
+            {
+                emitUpdateLiveGCregs(GCT_BYREF, byrefRegs, dst);
+            }
+
+            if (recCall || args)
+            {
+                // For callee-pop, all arguments will be popped  after the call.
+                // For caller-pop, any GC arguments will go dead after the call.
+
+                assert(callInstrSize != 0);
+
+                if (args >= 0)
+                {
+                    emitStackPop(dst, /*isCall*/ true, callInstrSize, args);
+                }
+                else
+                {
+                    emitStackKillArgs(dst, -args, callInstrSize);
+                }
+            }
+
+            // Do we need to record a call location for GC purposes?
+            if (!emitFullGCinfo && recCall)
+            {
+                assert(callInstrSize != 0);
+                emitRecordGCcall(dst, callInstrSize);
+            }
+
+#ifdef DEBUG
+            if (ins == INS_call)
+            {
+                emitRecordCallSite(emitCurCodeOffs(*dp), id->idDebugOnlyInfo()->idCallSig,
+                                   (CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie);
+            }
+#endif // DEBUG
+
+            break;
+
+        /********************************************************************/
+        /*                      One register operand                        */
+        /********************************************************************/
+
+        case IF_RRD:
+        case IF_RWR:
+        case IF_RRW:
+            dst = emitOutputR(dst, id);
+            sz  = TINY_IDSC_SIZE;
+            break;
+
+        /********************************************************************/
+        /*                 Register and register/constant                   */
+        /********************************************************************/
+
+        case IF_RRW_SHF:
+            code = insCodeMR(ins);
+            // Emit the VEX prefix if it exists
+            code = AddVexPrefixIfNeeded(ins, code, size);
+            code = insEncodeMRreg(ins, id->idReg1(), size, code);
+
+            // set the W bit
+            if (size != EA_1BYTE)
+            {
+                code |= 1;
+            }
+
+            // Emit the REX prefix if it exists
+            if (TakesRexWPrefix(ins, size))
+            {
+                code = AddRexWPrefix(ins, code);
+            }
+
+            // Output a size prefix for a 16-bit operand
+            if (size == EA_2BYTE)
+            {
+                dst += emitOutputByte(dst, 0x66);
+            }
+
+            dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+            dst += emitOutputWord(dst, code);
+            dst += emitOutputByte(dst, emitGetInsSC(id));
+            sz = emitSizeOfInsDsc(id);
+            break;
+
+        case IF_RRD_RRD:
+        case IF_RWR_RRD:
+        case IF_RRW_RRD:
+        case IF_RRW_RRW:
+            dst = emitOutputRR(dst, id);
+            sz  = TINY_IDSC_SIZE;
+            break;
+
+        case IF_RRD_CNS:
+        case IF_RWR_CNS:
+        case IF_RRW_CNS:
+            dst = emitOutputRI(dst, id);
+            sz  = emitSizeOfInsDsc(id);
+            break;
+
+#ifdef FEATURE_AVX_SUPPORT
+        case IF_RWR_RRD_RRD:
+            dst = emitOutputRRR(dst, id);
+            sz  = emitSizeOfInsDsc(id);
+            break;
+#endif
+
+        case IF_RRW_RRW_CNS:
+            assert(id->idGCref() == GCT_NONE);
+
+            // Get the 'base' opcode (it's a big one)
+            // Also, determine which operand goes where in the ModRM byte.
+            regNumber mReg;
+            regNumber rReg;
+            // if (ins == INS_shld || ins == INS_shrd || ins == INS_vextractf128 || ins == INS_vinsertf128)
+            if (hasCodeMR(ins))
+            {
+                code = insCodeMR(ins);
+                // Emit the VEX prefix if it exists
+                code = AddVexPrefixIfNeeded(ins, code, size);
+                code = insEncodeMRreg(ins, code);
+                mReg = id->idReg1();
+                rReg = id->idReg2();
+            }
+            else
+            {
+                code = insCodeRM(ins);
+                // Emit the VEX prefix if it exists
+                code = AddVexPrefixIfNeeded(ins, code, size);
+                code = insEncodeRMreg(ins, code);
+                mReg = id->idReg2();
+                rReg = id->idReg1();
+            }
+            assert(code & 0x00FF0000);
+
+#ifdef FEATURE_AVX_SUPPORT
+            if (TakesVexPrefix(ins))
+            {
+                if (IsThreeOperandBinaryAVXInstruction(ins))
+                {
+                    // Encode source/dest operand reg in 'vvvv' bits in 1's complement form
+                    // This code will have to change when we support 3 operands.
+                    // For now, we always overload this source with the destination (always reg1).
+                    // (Though we will need to handle the few ops that can have the 'vvvv' bits as destination,
+                    // e.g. pslldq, when/if we support those instructions with 2 registers.)
+                    // (see x64 manual Table 2-9. Instructions with a VEX.vvvv destination)
+                    code = insEncodeReg3456(ins, id->idReg1(), size, code);
+                }
+                else if (IsThreeOperandMoveAVXInstruction(ins))
+                {
+                    // This is a "merge" move instruction.
+                    // Encode source operand reg in 'vvvv' bits in 1's complement form
+                    code = insEncodeReg3456(ins, id->idReg2(), size, code);
+                }
+            }
+#endif // FEATURE_AVX_SUPPORT
+
+            regcode = (insEncodeReg345(ins, rReg, size, &code) | insEncodeReg012(ins, mReg, size, &code)) << 8;
+
+            // Output the REX prefix
+            dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
+
+            if (UseAVX() && Is4ByteAVXInstruction(ins))
+            {
+                // We just need to output the last byte of the opcode.
+                assert((code & 0xFF) == 0);
+                assert((code & 0xFF00) != 0xC000);
+                dst += emitOutputByte(dst, (code >> 8) & 0xFF);
+                code = 0;
+            }
+            else if (code & 0xFF000000)
+            {
+                dst += emitOutputWord(dst, code >> 16);
+                code &= 0x0000FFFF;
+            }
+            else if (code & 0x00FF0000)
+            {
+                dst += emitOutputByte(dst, code >> 16);
+                code &= 0x0000FFFF;
+            }
+
+            // Note that regcode is shifted by 8-bits above to align with RM byte.
+            if (code != 0)
+            {
+                assert((code & 0xFF00) == 0xC000);
+                dst += emitOutputWord(dst, code | regcode);
+            }
+            else
+            {
+                // This case occurs for AVX instructions.
+                // Note that regcode is left shifted by 8-bits.
+                assert(Is4ByteAVXInstruction(ins));
+                dst += emitOutputByte(dst, 0xC0 | (regcode >> 8));
+            }
+
+            dst += emitOutputByte(dst, emitGetInsSC(id));
+            sz = emitSizeOfInsDsc(id);
+            break;
+
+        /********************************************************************/
+        /*                      Address mode operand                        */
+        /********************************************************************/
+
+        case IF_ARD:
+        case IF_AWR:
+        case IF_ARW:
+
+#if FEATURE_STACK_FP_X87
+
+        case IF_TRD_ARD:
+        case IF_TWR_ARD:
+        case IF_TRW_ARD:
+
+        // case IF_ARD_TRD:
+        // case IF_ARW_TRD:
+        case IF_AWR_TRD:
+
+#endif // FEATURE_STACK_FP_X87
+
+            dst = emitCodeWithInstructionSize(dst, emitOutputAM(dst, id, insCodeMR(ins)), &callInstrSize);
+
+            switch (ins)
+            {
+                case INS_call:
+
+                IND_CALL:
+                    // Get hold of the argument count and method handle
+                    args = emitGetInsCIargs(id);
+
+                    // Is this a "fat" call descriptor?
+                    if (id->idIsLargeCall())
+                    {
+                        instrDescCGCA* idCall = (instrDescCGCA*)id;
+
+                        gcrefRegs = idCall->idcGcrefRegs;
+                        byrefRegs = idCall->idcByrefRegs;
+                        VarSetOps::Assign(emitComp, GCvars, idCall->idcGCvars);
+                        sz = sizeof(instrDescCGCA);
+                    }
+                    else
+                    {
+                        assert(!id->idIsLargeDsp());
+                        assert(!id->idIsLargeCns());
+
+                        gcrefRegs = emitDecodeCallGCregs(id);
+                        byrefRegs = 0;
+                        VarSetOps::AssignNoCopy(emitComp, GCvars, VarSetOps::MakeEmpty(emitComp));
+                        sz = sizeof(instrDesc);
+                    }
+
+                    recCall = true;
+
+                    goto DONE_CALL;
+
+                default:
+                    sz = emitSizeOfInsDsc(id);
+                    break;
+            }
+            break;
+
+        case IF_RRD_ARD:
+        case IF_RWR_ARD:
+        case IF_RRW_ARD:
+            code    = insCodeRM(ins);
+            code    = AddVexPrefixIfNeeded(ins, code, size);
+            regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
+            dst     = emitOutputAM(dst, id, code | regcode);
+            sz      = emitSizeOfInsDsc(id);
+            break;
+
+        case IF_ARD_RRD:
+        case IF_AWR_RRD:
+        case IF_ARW_RRD:
+            code    = insCodeMR(ins);
+            code    = AddVexPrefixIfNeeded(ins, code, size);
+            regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
+            dst     = emitOutputAM(dst, id, code | regcode);
+            sz      = emitSizeOfInsDsc(id);
+            break;
+
+        case IF_ARD_CNS:
+        case IF_AWR_CNS:
+        case IF_ARW_CNS:
+            emitGetInsAmdCns(id, &cnsVal);
+            dst = emitOutputAM(dst, id, insCodeMI(ins), &cnsVal);
+            sz  = emitSizeOfInsDsc(id);
+            break;
+
+        case IF_ARW_SHF:
+            emitGetInsAmdCns(id, &cnsVal);
+            dst = emitOutputAM(dst, id, insCodeMR(ins), &cnsVal);
+            sz  = emitSizeOfInsDsc(id);
+            break;
+
+        /********************************************************************/
+        /*                      Stack-based operand                         */
+        /********************************************************************/
+
+        case IF_SRD:
+        case IF_SWR:
+        case IF_SRW:
+
+#if FEATURE_STACK_FP_X87
+
+        case IF_TRD_SRD:
+        case IF_TWR_SRD:
+        case IF_TRW_SRD:
+
+        // case IF_SRD_TRD:
+        // case IF_SRW_TRD:
+        case IF_SWR_TRD:
+
+#endif // FEATURE_STACK_FP_X87
+
+            assert(ins != INS_pop_hide);
+            if (ins == INS_pop)
+            {
+                // The offset in "pop [ESP+xxx]" is relative to the new ESP value
+                CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if !FEATURE_FIXED_OUT_ARGS
+                emitCurStackLvl -= sizeof(int);
+#endif
+                dst = emitOutputSV(dst, id, insCodeMR(ins));
+
+#if !FEATURE_FIXED_OUT_ARGS
+                emitCurStackLvl += sizeof(int);
+#endif
+                break;
+            }
+
+            dst = emitCodeWithInstructionSize(dst, emitOutputSV(dst, id, insCodeMR(ins)), &callInstrSize);
+
+            if (ins == INS_call)
+            {
+                goto IND_CALL;
+            }
+
+            break;
+
+        case IF_SRD_CNS:
+        case IF_SWR_CNS:
+        case IF_SRW_CNS:
+            emitGetInsCns(id, &cnsVal);
+            dst = emitOutputSV(dst, id, insCodeMI(ins), &cnsVal);
+            sz  = emitSizeOfInsDsc(id);
+            break;
+
+        case IF_SRW_SHF:
+            emitGetInsCns(id, &cnsVal);
+            dst = emitOutputSV(dst, id, insCodeMR(ins), &cnsVal);
+            sz  = emitSizeOfInsDsc(id);
+            break;
+
+        case IF_RRD_SRD:
+        case IF_RWR_SRD:
+        case IF_RRW_SRD:
+            code = insCodeRM(ins);
+
+            // 4-byte AVX instructions are special cased inside emitOutputSV
+            // since they do not have space to encode ModRM byte.
+            if (Is4ByteAVXInstruction(ins))
+            {
+                dst = emitOutputSV(dst, id, code);
+            }
+            else
+            {
+                code = AddVexPrefixIfNeeded(ins, code, size);
+
+                // In case of AVX instructions that take 3 operands, encode reg1 as first source.
+                // Note that reg1 is both a source and a destination.
+                //
+                // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
+                // now we use the single source as source1 and source2.
+                // For this format, moves do not support a third operand, so we only need to handle the binary ops.
+                if (IsThreeOperandBinaryAVXInstruction(ins))
+                {
+                    // encode source operand reg in 'vvvv' bits in 1's compliement form
+                    code = insEncodeReg3456(ins, id->idReg1(), size, code);
+                }
+
+                regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
+                dst     = emitOutputSV(dst, id, code | regcode);
+            }
+            break;
+
+        case IF_SRD_RRD:
+        case IF_SWR_RRD:
+        case IF_SRW_RRD:
+            code = insCodeMR(ins);
+            code = AddVexPrefixIfNeeded(ins, code, size);
+
+            // In case of AVX instructions that take 3 operands, encode reg1 as first source.
+            // Note that reg1 is both a source and a destination.
+            //
+            // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
+            // now we use the single source as source1 and source2.
+            // For this format, moves do not support a third operand, so we only need to handle the binary ops.
+            if (IsThreeOperandBinaryAVXInstruction(ins))
+            {
+                // encode source operand reg in 'vvvv' bits in 1's compliement form
+                code = insEncodeReg3456(ins, id->idReg1(), size, code);
+            }
+
+            regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
+            dst     = emitOutputSV(dst, id, code | regcode);
+            break;
+
+        /********************************************************************/
+        /*                    Direct memory address                         */
+        /********************************************************************/
+
+        case IF_MRD:
+        case IF_MRW:
+        case IF_MWR:
+
+#if FEATURE_STACK_FP_X87
+
+        case IF_TRD_MRD:
+        case IF_TWR_MRD:
+        case IF_TRW_MRD:
+
+        // case IF_MRD_TRD:
+        // case IF_MRW_TRD:
+        case IF_MWR_TRD:
+
+#endif // FEATURE_STACK_FP_X87
+
+            noway_assert(ins != INS_call);
+            dst = emitOutputCV(dst, id, insCodeMR(ins) | 0x0500);
+            sz  = emitSizeOfInsDsc(id);
+            break;
+
+        case IF_MRD_OFF:
+            dst = emitOutputCV(dst, id, insCodeMI(ins));
+            break;
+
+        case IF_RRD_MRD:
+        case IF_RWR_MRD:
+        case IF_RRW_MRD:
+            code = insCodeRM(ins);
+            // Special case 4-byte AVX instructions
+            if (Is4ByteAVXInstruction(ins))
+            {
+                dst = emitOutputCV(dst, id, code);
+            }
+            else
+            {
+                code = AddVexPrefixIfNeeded(ins, code, size);
+
+                // In case of AVX instructions that take 3 operands, encode reg1 as first source.
+                // Note that reg1 is both a source and a destination.
+                //
+                // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
+                // now we use the single source as source1 and source2.
+                // For this format, moves do not support a third operand, so we only need to handle the binary ops.
+                if (IsThreeOperandBinaryAVXInstruction(ins))
+                {
+                    // encode source operand reg in 'vvvv' bits in 1's compliement form
+                    code = insEncodeReg3456(ins, id->idReg1(), size, code);
+                }
+
+                regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
+                dst     = emitOutputCV(dst, id, code | regcode | 0x0500);
+            }
+            sz = emitSizeOfInsDsc(id);
+            break;
+
+        case IF_RWR_MRD_OFF:
+            code = insCode(ins);
+            code = AddVexPrefixIfNeeded(ins, code, size);
+
+            // In case of AVX instructions that take 3 operands, encode reg1 as first source.
+            // Note that reg1 is both a source and a destination.
+            //
+            // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
+            // now we use the single source as source1 and source2.
+            // For this format, moves do not support a third operand, so we only need to handle the binary ops.
+            if (IsThreeOperandBinaryAVXInstruction(ins))
+            {
+                // encode source operand reg in 'vvvv' bits in 1's compliement form
+                code = insEncodeReg3456(ins, id->idReg1(), size, code);
+            }
+
+            regcode = insEncodeReg012(id->idIns(), id->idReg1(), size, &code);
+            dst     = emitOutputCV(dst, id, code | 0x30 | regcode);
+            sz      = emitSizeOfInsDsc(id);
+            break;
+
+        case IF_MRD_RRD:
+        case IF_MWR_RRD:
+        case IF_MRW_RRD:
+            code = insCodeMR(ins);
+#ifdef FEATURE_AVX_SUPPORT
+            code = AddVexPrefixIfNeeded(ins, code, size);
+
+            // In case of AVX instructions that take 3 operands, encode reg1 as first source.
+            // Note that reg1 is both a source and a destination.
+            //
+            // TODO-XArch-CQ: Eventually we need to support 3 operand instruction formats. For
+            // now we use the single source as source1 and source2.
+            // For this format, moves do not support a third operand, so we only need to handle the binary ops.
+            if (IsThreeOperandBinaryAVXInstruction(ins))
+            {
+                // encode source operand reg in 'vvvv' bits in 1's compliement form
+                code = insEncodeReg3456(ins, id->idReg1(), size, code);
+            }
+#endif // FEATURE_AVX_SUPPORT
+
+            regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
+            dst     = emitOutputCV(dst, id, code | regcode | 0x0500);
+            sz      = emitSizeOfInsDsc(id);
+            break;
+
+        case IF_MRD_CNS:
+        case IF_MWR_CNS:
+        case IF_MRW_CNS:
+            emitGetInsDcmCns(id, &cnsVal);
+            dst = emitOutputCV(dst, id, insCodeMI(ins) | 0x0500, &cnsVal);
+            sz  = emitSizeOfInsDsc(id);
+            break;
+
+        case IF_MRW_SHF:
+            emitGetInsDcmCns(id, &cnsVal);
+            dst = emitOutputCV(dst, id, insCodeMR(ins) | 0x0500, &cnsVal);
+            sz  = emitSizeOfInsDsc(id);
+            break;
+
+#if FEATURE_STACK_FP_X87
+
+        /********************************************************************/
+        /*                  FP coprocessor stack operands                   */
+        /********************************************************************/
+
+        case IF_TRD_FRD:
+        case IF_TWR_FRD:
+        case IF_TRW_FRD:
+            assert(id->idGCref() == GCT_NONE);
+            dst += emitOutputWord(dst, insCodeMR(ins) | 0xC000 | (id->idReg1() << 8));
+            break;
+
+        case IF_FRD_TRD:
+        case IF_FWR_TRD:
+        case IF_FRW_TRD:
+            assert(id->idGCref() == GCT_NONE);
+            dst += emitOutputWord(dst, insCodeMR(ins) | 0xC004 | (id->idReg1() << 8));
+            break;
+
+#endif // FEATURE_STACK_FP_X87
+
+        /********************************************************************/
+        /*                            oops                                  */
+        /********************************************************************/
+
+        default:
+
+#ifdef DEBUG
+            printf("unexpected format %s\n", emitIfName(id->idInsFmt()));
+            assert(!"don't know how to encode this instruction");
+#endif
+            break;
+    }
+
+    // Make sure we set the instruction descriptor size correctly
+    assert(sz == emitSizeOfInsDsc(id));
+
+#if !FEATURE_FIXED_OUT_ARGS
+
+    // Make sure we keep the current stack level up to date
+    if (!emitIGisInProlog(ig) && !emitIGisInEpilog(ig))
+    {
+        switch (ins)
+        {
+            case INS_push:
+                // Please note: {INS_push_hide,IF_LABEL} is used to push the address of the
+                // finally block for calling it locally for an op_leave.
+                emitStackPush(dst, id->idGCref());
+                break;
+
+            case INS_pop:
+                emitStackPop(dst, false, /*callInstrSize*/ 0, 1);
+                break;
+
+            case INS_sub:
+                // Check for "sub ESP, icon"
+                if (ins == INS_sub && id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP)
+                {
+                    assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL);
+                    emitStackPushN(dst, (unsigned)(emitGetInsSC(id) / sizeof(void*)));
+                }
+                break;
+
+            case INS_add:
+                // Check for "add ESP, icon"
+                if (ins == INS_add && id->idInsFmt() == IF_RRW_CNS && id->idReg1() == REG_ESP)
+                {
+                    assert((size_t)emitGetInsSC(id) < 0x00000000FFFFFFFFLL);
+                    emitStackPop(dst, /*isCall*/ false, /*callInstrSize*/ 0,
+                                 (unsigned)(emitGetInsSC(id) / sizeof(void*)));
+                }
+                break;
+
+            default:
+                break;
+        }
+    }
+
+#endif // !FEATURE_FIXED_OUT_ARGS
+
+    assert((int)emitCurStackLvl >= 0);
+
+    // Only epilog "instructions" and some pseudo-instrs
+    // are allowed not to generate any code
+
+    assert(*dp != dst || emitInstHasNoCode(ins));
+
+#ifdef DEBUG
+    if (emitComp->opts.disAsm || emitComp->opts.dspEmit || emitComp->verbose)
+    {
+        emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(*dp), *dp, (dst - *dp));
+    }
+
+    if (emitComp->compDebugBreak)
+    {
+        // set JitEmitPrintRefRegs=1 will print out emitThisGCrefRegs and emitThisByrefRegs
+        // at the beginning of this method.
+        if (JitConfig.JitEmitPrintRefRegs() != 0)
+        {
+            printf("Before emitOutputInstr for id->idDebugOnlyInfo()->idNum=0x%02x\n", id->idDebugOnlyInfo()->idNum);
+            printf("  emitThisGCrefRegs(0x%p)=", emitComp->dspPtr(&emitThisGCrefRegs));
+            printRegMaskInt(emitThisGCrefRegs);
+            emitDispRegSet(emitThisGCrefRegs);
+            printf("\n");
+            printf("  emitThisByrefRegs(0x%p)=", emitComp->dspPtr(&emitThisByrefRegs));
+            printRegMaskInt(emitThisByrefRegs);
+            emitDispRegSet(emitThisByrefRegs);
+            printf("\n");
+        }
+
+        // For example, set JitBreakEmitOutputInstr=a6 will break when this method is called for
+        // emitting instruction a6, (i.e. IN00a6 in jitdump).
+        if ((unsigned)JitConfig.JitBreakEmitOutputInstr() == id->idDebugOnlyInfo()->idNum)
+        {
+            assert(!"JitBreakEmitOutputInstr reached");
+        }
+    }
+#endif
+
+#ifdef TRANSLATE_PDB
+    if (*dp != dst)
+    {
+        // only map instruction groups to instruction groups
+        MapCode(id->idDebugOnlyInfo()->idilStart, *dp);
+    }
+#endif
+
+    *dp = dst;
+
+#ifdef DEBUG
+    if (ins == INS_mulEAX || ins == INS_imulEAX)
+    {
+        // INS_mulEAX has implicit target of Edx:Eax. Make sure
+        // that we detected this cleared its GC-status.
+
+        assert(((RBM_EAX | RBM_EDX) & (emitThisGCrefRegs | emitThisByrefRegs)) == 0);
+    }
+
+    if (instrIs3opImul(ins))
+    {
+        // The target of the 3-operand imul is implicitly encoded. Make sure
+        // that we detected the implicit register and cleared its GC-status.
+
+        regMaskTP regMask = genRegMask(inst3opImulReg(ins));
+        assert((regMask & (emitThisGCrefRegs | emitThisByrefRegs)) == 0);
+    }
+#endif
+
+    return sz;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+#endif // defined(_TARGET_XARCH_)
diff --git a/src/jit/emitxarch.h b/src/jit/emitxarch.h
new file mode 100644
index 0000000000..dfd7e6ec50
--- /dev/null
+++ b/src/jit/emitxarch.h
@@ -0,0 +1,437 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#if defined(_TARGET_XARCH_)
+
+/************************************************************************/
+/*           Public inline informational methods                        */
+/************************************************************************/
+
+public:
+inline static bool isGeneralRegister(regNumber reg)
+{
+    return (reg <= REG_INT_LAST);
+}
+
+inline static bool isFloatReg(regNumber reg)
+{
+    return (reg >= REG_FP_FIRST && reg <= REG_FP_LAST);
+}
+
+inline static bool isDoubleReg(regNumber reg)
+{
+    return isFloatReg(reg);
+}
+
+/************************************************************************/
+/*         Routines that compute the size of / encode instructions      */
+/************************************************************************/
+
+struct CnsVal
+{
+    ssize_t cnsVal;
+#ifdef RELOC_SUPPORT
+    bool cnsReloc;
+#endif
+};
+
+UNATIVE_OFFSET emitInsSize(size_t code);
+UNATIVE_OFFSET emitInsSizeRM(instruction ins);
+UNATIVE_OFFSET emitInsSizeSV(size_t code, int var, int dsp);
+UNATIVE_OFFSET emitInsSizeSV(instrDesc* id, int var, int dsp, int val);
+UNATIVE_OFFSET emitInsSizeRR(instruction ins, regNumber reg1, regNumber reg2, emitAttr attr);
+UNATIVE_OFFSET emitInsSizeAM(instrDesc* id, size_t code);
+UNATIVE_OFFSET emitInsSizeAM(instrDesc* id, size_t code, int val);
+UNATIVE_OFFSET emitInsSizeCV(instrDesc* id, size_t code);
+UNATIVE_OFFSET emitInsSizeCV(instrDesc* id, size_t code, int val);
+
+BYTE* emitOutputAM(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc = nullptr);
+BYTE* emitOutputSV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc = nullptr);
+BYTE* emitOutputCV(BYTE* dst, instrDesc* id, size_t code, CnsVal* addc = nullptr);
+
+BYTE* emitOutputR(BYTE* dst, instrDesc* id);
+BYTE* emitOutputRI(BYTE* dst, instrDesc* id);
+BYTE* emitOutputRR(BYTE* dst, instrDesc* id);
+BYTE* emitOutputIV(BYTE* dst, instrDesc* id);
+
+#ifdef FEATURE_AVX_SUPPORT
+BYTE* emitOutputRRR(BYTE* dst, instrDesc* id);
+#endif
+
+BYTE* emitOutputLJ(BYTE* dst, instrDesc* id);
+
+unsigned emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, size_t& code);
+unsigned emitGetRexPrefixSize(instruction ins);
+unsigned emitGetVexPrefixSize(instruction ins, emitAttr attr);
+unsigned emitGetPrefixSize(size_t code);
+unsigned emitGetVexPrefixAdjustedSize(instruction ins, emitAttr attr, size_t code);
+
+unsigned insEncodeReg345(instruction ins, regNumber reg, emitAttr size, size_t* code);
+unsigned insEncodeReg012(instruction ins, regNumber reg, emitAttr size, size_t* code);
+size_t insEncodeReg3456(instruction ins, regNumber reg, emitAttr size, size_t code);
+unsigned insEncodeRegSIB(instruction ins, regNumber reg, size_t* code);
+
+size_t insEncodeMRreg(instruction ins, size_t code);
+size_t insEncodeMRreg(instruction ins, regNumber reg, emitAttr size, size_t code);
+size_t insEncodeRRIb(instruction ins, regNumber reg, emitAttr size);
+size_t insEncodeOpreg(instruction ins, regNumber reg, emitAttr size);
+
+bool IsAVXInstruction(instruction ins);
+size_t insEncodeMIreg(instruction ins, regNumber reg, emitAttr size, size_t code);
+
+size_t AddRexWPrefix(instruction ins, size_t code);
+size_t AddRexRPrefix(instruction ins, size_t code);
+size_t AddRexXPrefix(instruction ins, size_t code);
+size_t AddRexBPrefix(instruction ins, size_t code);
+size_t AddRexPrefix(instruction ins, size_t code);
+
+#ifdef FEATURE_AVX_SUPPORT
+// 3-byte VEX prefix starts with byte 0xC4
+#define VEX_PREFIX_MASK_3BYTE 0xC4000000000000LL
+bool TakesVexPrefix(instruction ins);
+// Returns true if the instruction encoding already contains VEX prefix
+bool hasVexPrefix(size_t code)
+{
+    return (code & VEX_PREFIX_MASK_3BYTE) != 0;
+}
+size_t AddVexPrefix(instruction ins, size_t code, emitAttr attr);
+size_t AddVexPrefixIfNeeded(instruction ins, size_t code, emitAttr size)
+{
+    if (TakesVexPrefix(ins))
+    {
+        code = AddVexPrefix(ins, code, size);
+    }
+    return code;
+}
+size_t AddVexPrefixIfNeededAndNotPresent(instruction ins, size_t code, emitAttr size)
+{
+    if (TakesVexPrefix(ins) && !hasVexPrefix(code))
+    {
+        code = AddVexPrefix(ins, code, size);
+    }
+    return code;
+}
+bool useAVXEncodings;
+bool UseAVX()
+{
+    return useAVXEncodings;
+}
+void SetUseAVX(bool value)
+{
+    useAVXEncodings = value;
+}
+bool IsThreeOperandBinaryAVXInstruction(instruction ins);
+bool IsThreeOperandMoveAVXInstruction(instruction ins);
+bool IsThreeOperandAVXInstruction(instruction ins)
+{
+    return (IsThreeOperandBinaryAVXInstruction(ins) || IsThreeOperandMoveAVXInstruction(ins));
+}
+#else  // !FEATURE_AVX_SUPPORT
+bool UseAVX()
+{
+    return false;
+}
+bool hasVexPrefix(size_t code)
+{
+    return false;
+}
+bool IsThreeOperandBinaryAVXInstruction(instruction ins)
+{
+    return false;
+}
+bool IsThreeOperandMoveAVXInstruction(instruction ins)
+{
+    return false;
+}
+bool IsThreeOperandAVXInstruction(instruction ins)
+{
+    return false;
+}
+bool TakesVexPrefix(instruction ins)
+{
+    return false;
+}
+size_t AddVexPrefixIfNeeded(instruction ins, size_t code, emitAttr attr)
+{
+    return code;
+}
+size_t AddVexPrefixIfNeededAndNotPresent(instruction ins, size_t code, emitAttr size)
+{
+    return code;
+}
+#endif // !FEATURE_AVX_SUPPORT
+
+/************************************************************************/
+/*             Debug-only routines to display instructions              */
+/************************************************************************/
+
+#ifdef DEBUG
+
+const char* emitFPregName(unsigned reg, bool varName = true);
+
+void emitDispReloc(ssize_t value);
+void emitDispAddrMode(instrDesc* id, bool noDetail = false);
+void emitDispShift(instruction ins, int cnt = 0);
+
+void emitDispIns(instrDesc* id,
+                 bool       isNew,
+                 bool       doffs,
+                 bool       asmfm,
+                 unsigned   offs = 0,
+                 BYTE*      code = nullptr,
+                 size_t     sz   = 0,
+                 insGroup*  ig   = nullptr);
+
+const char* emitXMMregName(unsigned reg);
+const char* emitYMMregName(unsigned reg);
+
+#endif
+
+/************************************************************************/
+/*  Private members that deal with target-dependent instr. descriptors  */
+/************************************************************************/
+
+private:
+void emitSetAmdDisp(instrDescAmd* id, ssize_t dsp);
+instrDesc* emitNewInstrAmd(emitAttr attr, ssize_t dsp);
+instrDesc* emitNewInstrAmdCns(emitAttr attr, ssize_t dsp, int cns);
+
+instrDesc* emitNewInstrCallDir(int              argCnt,
+                               VARSET_VALARG_TP GCvars,
+                               regMaskTP        gcrefRegs,
+                               regMaskTP        byrefRegs,
+                               emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize));
+
+instrDesc* emitNewInstrCallInd(int              argCnt,
+                               ssize_t          disp,
+                               VARSET_VALARG_TP GCvars,
+                               regMaskTP        gcrefRegs,
+                               regMaskTP        byrefRegs,
+                               emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize));
+
+void emitGetInsCns(instrDesc* id, CnsVal* cv);
+ssize_t emitGetInsAmdCns(instrDesc* id, CnsVal* cv);
+void emitGetInsDcmCns(instrDesc* id, CnsVal* cv);
+ssize_t emitGetInsAmdAny(instrDesc* id);
+
+/************************************************************************/
+/*               Private helpers for instruction output                 */
+/************************************************************************/
+
+private:
+insFormat emitInsModeFormat(instruction ins, insFormat base, insFormat FPld, insFormat FPst);
+
+bool emitVerifyEncodable(instruction ins, emitAttr size, regNumber reg1, regNumber reg2 = REG_NA);
+
+bool emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id);
+
+/*****************************************************************************
+*
+*  Convert between an index scale in bytes to a smaller encoding used for
+*  storage in instruction descriptors.
+*/
+
+inline emitter::opSize emitEncodeScale(size_t scale)
+{
+    assert(scale == 1 || scale == 2 || scale == 4 || scale == 8);
+
+    return emitSizeEncode[scale - 1];
+}
+
+inline emitAttr emitDecodeScale(unsigned ensz)
+{
+    assert(ensz < 4);
+
+    return emitter::emitSizeDecode[ensz];
+}
+
+/************************************************************************/
+/*           The public entry points to output instructions             */
+/************************************************************************/
+
+public:
+void emitLoopAlign();
+
+void emitIns(instruction ins);
+
+void emitIns(instruction ins, emitAttr attr);
+
+void emitInsRMW(instruction inst, emitAttr attr, GenTreeStoreInd* storeInd, GenTreePtr src);
+
+void emitInsRMW(instruction inst, emitAttr attr, GenTreeStoreInd* storeInd);
+
+void emitIns_Nop(unsigned size);
+
+void emitIns_I(instruction ins, emitAttr attr, int val);
+
+void emitIns_R(instruction ins, emitAttr attr, regNumber reg);
+
+void emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, int offs);
+
+void emitIns_R_I(instruction ins, emitAttr attr, regNumber reg, ssize_t val);
+
+void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2);
+
+void emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int ival);
+
+#ifdef FEATURE_AVX_SUPPORT
+void emitIns_R_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3);
+#endif
+
+void emitIns_S(instruction ins, emitAttr attr, int varx, int offs);
+
+void emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs);
+
+void emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs);
+
+void emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val);
+
+void emitIns_R_C(instruction ins, emitAttr attr, regNumber reg, CORINFO_FIELD_HANDLE fldHnd, int offs);
+
+void emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs);
+
+void emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, int offs, int val);
+
+void emitIns_IJ(emitAttr attr, regNumber reg, unsigned base);
+
+void emitIns_J_S(instruction ins, emitAttr attr, BasicBlock* dst, int varx, int offs);
+
+void emitIns_R_L(instruction ins, emitAttr attr, BasicBlock* dst, regNumber reg);
+
+void emitIns_R_D(instruction ins, emitAttr attr, unsigned offs, regNumber reg);
+
+void emitIns_I_AR(
+    instruction ins, emitAttr attr, int val, regNumber reg, int offs, int memCookie = 0, void* clsCookie = nullptr);
+
+void emitIns_I_AI(instruction ins, emitAttr attr, int val, ssize_t disp);
+
+void emitIns_R_AR(instruction ins,
+                  emitAttr    attr,
+                  regNumber   ireg,
+                  regNumber   reg,
+                  int         offs,
+                  int         memCookie = 0,
+                  void*       clsCookie = nullptr);
+
+void emitIns_R_AI(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp);
+
+void emitIns_AR_R(instruction ins,
+                  emitAttr    attr,
+                  regNumber   ireg,
+                  regNumber   reg,
+                  int         offs,
+                  int         memCookie = 0,
+                  void*       clsCookie = nullptr);
+
+void emitIns_AI_R(instruction ins, emitAttr attr, regNumber ireg, ssize_t disp);
+
+void emitIns_I_ARR(instruction ins, emitAttr attr, int val, regNumber reg, regNumber rg2, int disp);
+
+void emitIns_R_ARR(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp);
+
+void emitIns_ARR_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, int disp);
+
+void emitIns_I_ARX(instruction ins, emitAttr attr, int val, regNumber reg, regNumber rg2, unsigned mul, int disp);
+
+void emitIns_R_ARX(
+    instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, unsigned mul, int disp);
+
+void emitIns_ARX_R(
+    instruction ins, emitAttr attr, regNumber ireg, regNumber reg, regNumber rg2, unsigned mul, int disp);
+
+void emitIns_I_AX(instruction ins, emitAttr attr, int val, regNumber reg, unsigned mul, int disp);
+
+void emitIns_R_AX(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp);
+
+void emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp);
+
+#if FEATURE_STACK_FP_X87
+void emitIns_F_F0(instruction ins, unsigned fpreg);
+
+void emitIns_F0_F(instruction ins, unsigned fpreg);
+#endif // FEATURE_STACK_FP_X87
+
+enum EmitCallType
+{
+    EC_FUNC_TOKEN,       //   Direct call to a helper/static/nonvirtual/global method
+    EC_FUNC_TOKEN_INDIR, // Indirect call to a helper/static/nonvirtual/global method
+    EC_FUNC_ADDR,        // Direct call to an absolute address
+
+    EC_FUNC_VIRTUAL, // Call to a virtual method (using the vtable)
+    EC_INDIR_R,      // Indirect call via register
+    EC_INDIR_SR,     // Indirect call via stack-reference (local var)
+    EC_INDIR_C,      // Indirect call via static class var
+    EC_INDIR_ARD,    // Indirect call via an addressing mode
+
+    EC_COUNT
+};
+
+void emitIns_Call(EmitCallType          callType,
+                  CORINFO_METHOD_HANDLE methHnd,
+                  CORINFO_SIG_INFO*     sigInfo, // used to report call sites to the EE
+                  void*                 addr,
+                  ssize_t               argSize,
+                  emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
+                  VARSET_VALARG_TP ptrVars,
+                  regMaskTP        gcrefRegs,
+                  regMaskTP        byrefRegs,
+                  GenTreeIndir*    indir,
+                  bool             isJump = false,
+                  bool             isNoGC = false);
+
+void emitIns_Call(EmitCallType          callType,
+                  CORINFO_METHOD_HANDLE methHnd,
+                  INDEBUG_LDISASM_COMMA(CORINFO_SIG_INFO* sigInfo) // used to report call sites to the EE
+                  void*    addr,
+                  ssize_t  argSize,
+                  emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize),
+                  VARSET_VALARG_TP ptrVars,
+                  regMaskTP        gcrefRegs,
+                  regMaskTP        byrefRegs,
+                  IL_OFFSETX       ilOffset = BAD_IL_OFFSET,
+                  regNumber        ireg     = REG_NA,
+                  regNumber        xreg     = REG_NA,
+                  unsigned         xmul     = 0,
+                  ssize_t          disp     = 0,
+                  bool             isJump   = false,
+                  bool             isNoGC   = false);
+
+#ifdef _TARGET_AMD64_
+// Is the last instruction emitted a call instruction?
+bool emitIsLastInsCall();
+
+// Insert a NOP at the end of the the current instruction group if the last emitted instruction was a 'call',
+// because the next instruction group will be an epilog.
+void emitOutputPreEpilogNOP();
+#endif // _TARGET_AMD64_
+
+/*****************************************************************************
+ *
+ *  Given a jump, return true if it's a conditional jump.
+ */
+
+inline bool emitIsCondJump(instrDesc* jmp)
+{
+    instruction ins = jmp->idIns();
+
+    assert(jmp->idInsFmt() == IF_LABEL);
+
+    return (ins != INS_call && ins != INS_jmp);
+}
+
+/*****************************************************************************
+ *
+ *  Given a jump, return true if it's an unconditional jump.
+ */
+
+inline bool emitIsUncondJump(instrDesc* jmp)
+{
+    instruction ins = jmp->idIns();
+
+    assert(jmp->idInsFmt() == IF_LABEL);
+
+    return (ins == INS_jmp);
+}
+
+#endif // _TARGET_XARCH_
diff --git a/src/jit/error.cpp b/src/jit/error.cpp
new file mode 100644
index 0000000000..71c3301045
--- /dev/null
+++ b/src/jit/error.cpp
@@ -0,0 +1,536 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                           error.cpp                                       XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+#include "compiler.h"
+
+#if MEASURE_FATAL
+unsigned fatal_badCode;
+unsigned fatal_noWay;
+unsigned fatal_NOMEM;
+unsigned fatal_noWayAssertBody;
+#ifdef DEBUG
+unsigned fatal_noWayAssertBodyArgs;
+#endif // DEBUG
+unsigned fatal_NYI;
+#endif // MEASURE_FATAL
+
+/*****************************************************************************/
+void DECLSPEC_NORETURN fatal(int errCode)
+{
+#ifdef DEBUG
+    if (errCode != CORJIT_SKIPPED) // Don't stop on NYI: use COMPlus_AltJitAssertOnNYI for that.
+    {
+        if (JitConfig.DebugBreakOnVerificationFailure())
+        {
+            DebugBreak();
+        }
+    }
+#endif // DEBUG
+
+    ULONG_PTR exceptArg = errCode;
+    RaiseException(FATAL_JIT_EXCEPTION, EXCEPTION_NONCONTINUABLE, 1, &exceptArg);
+    UNREACHABLE();
+}
+
+/*****************************************************************************/
+void DECLSPEC_NORETURN badCode()
+{
+#if MEASURE_FATAL
+    fatal_badCode += 1;
+#endif // MEASURE_FATAL
+
+    fatal(CORJIT_BADCODE);
+}
+
+/*****************************************************************************/
+void DECLSPEC_NORETURN noWay()
+{
+#if MEASURE_FATAL
+    fatal_noWay += 1;
+#endif // MEASURE_FATAL
+
+    fatal(CORJIT_INTERNALERROR);
+}
+
+/*****************************************************************************/
+void DECLSPEC_NORETURN NOMEM()
+{
+#if MEASURE_FATAL
+    fatal_NOMEM += 1;
+#endif // MEASURE_FATAL
+
+    fatal(CORJIT_OUTOFMEM);
+}
+
+/*****************************************************************************/
+void DECLSPEC_NORETURN noWayAssertBody()
+{
+#if MEASURE_FATAL
+    fatal_noWayAssertBody += 1;
+#endif // MEASURE_FATAL
+
+#ifndef DEBUG
+    // Even in retail, if we hit a noway, and we have this variable set, we don't want to fall back
+    // to MinOpts, which might hide a regression. Instead, hit a breakpoint (and crash). We don't
+    // have the assert code to fall back on here.
+    // The debug path goes through this function also, to do the call to 'fatal'.
+    // This kind of noway is hit for unreached().
+    if (JitConfig.JitEnableNoWayAssert())
+    {
+        DebugBreak();
+    }
+#endif // !DEBUG
+
+    fatal(CORJIT_RECOVERABLEERROR);
+}
+
+inline static bool ShouldThrowOnNoway(
+#ifdef FEATURE_TRACELOGGING
+    const char* filename, unsigned line
+#endif
+    )
+{
+    return JitTls::GetCompiler() == nullptr ||
+           JitTls::GetCompiler()->compShouldThrowOnNoway(
+#ifdef FEATURE_TRACELOGGING
+               filename, line
+#endif
+               );
+}
+
+/*****************************************************************************/
+void noWayAssertBodyConditional(
+#ifdef FEATURE_TRACELOGGING
+    const char* filename, unsigned line
+#endif
+    )
+{
+#ifdef FEATURE_TRACELOGGING
+    if (ShouldThrowOnNoway(filename, line))
+#else
+    if (ShouldThrowOnNoway())
+#endif // FEATURE_TRACELOGGING
+    {
+        noWayAssertBody();
+    }
+}
+
+#if !defined(_TARGET_X86_) || !defined(LEGACY_BACKEND)
+
+/*****************************************************************************/
+void notYetImplemented(const char* msg, const char* filename, unsigned line)
+{
+#if FUNC_INFO_LOGGING
+#ifdef DEBUG
+    LogEnv* env = JitTls::GetLogEnv();
+    if (env != nullptr)
+    {
+        const Compiler* const pCompiler = env->compiler;
+        if (pCompiler->verbose)
+        {
+            printf("\n\n%s - NYI (%s:%d - %s)\n", pCompiler->info.compFullName, filename, line, msg);
+        }
+    }
+    if (Compiler::compJitFuncInfoFile != nullptr)
+    {
+        fprintf(Compiler::compJitFuncInfoFile, "%s - NYI (%s:%d - %s)\n",
+                (env == nullptr) ? "UNKNOWN" : env->compiler->info.compFullName, filename, line, msg);
+        fflush(Compiler::compJitFuncInfoFile);
+    }
+#else  // !DEBUG
+    if (Compiler::compJitFuncInfoFile != nullptr)
+    {
+        fprintf(Compiler::compJitFuncInfoFile, "NYI (%s:%d - %s)\n", filename, line, msg);
+        fflush(Compiler::compJitFuncInfoFile);
+    }
+#endif // !DEBUG
+#endif // FUNC_INFO_LOGGING
+
+#ifdef DEBUG
+    Compiler* pCompiler = JitTls::GetCompiler();
+    if (pCompiler != nullptr)
+    {
+        // Assume we're within a compFunctionTrace boundary, which might not be true.
+        pCompiler->compFunctionTraceEnd(nullptr, 0, true);
+    }
+#endif // DEBUG
+
+    DWORD value = JitConfig.AltJitAssertOnNYI();
+
+    // 0 means just silently skip
+    // If we are in retail builds, assume ignore
+    // 1 means popup the assert (abort=abort, retry=debugger, ignore=skip)
+    // 2 means silently don't skip (same as 3 for retail)
+    // 3 means popup the assert (abort=abort, retry=debugger, ignore=don't skip)
+    if (value & 1)
+    {
+#ifdef DEBUG
+        assertAbort(msg, filename, line);
+#endif
+    }
+
+    if ((value & 2) == 0)
+    {
+#if MEASURE_FATAL
+        fatal_NYI += 1;
+#endif // MEASURE_FATAL
+
+        fatal(CORJIT_SKIPPED);
+    }
+}
+
+#endif // #if !defined(_TARGET_X86_) || !defined(LEGACY_BACKEND)
+
+/*****************************************************************************/
+LONG __JITfilter(PEXCEPTION_POINTERS pExceptionPointers, LPVOID lpvParam)
+{
+    DWORD exceptCode = pExceptionPointers->ExceptionRecord->ExceptionCode;
+
+    if (exceptCode == FATAL_JIT_EXCEPTION)
+    {
+        ErrorTrapParam* pParam = (ErrorTrapParam*)lpvParam;
+
+        assert(pExceptionPointers->ExceptionRecord->NumberParameters == 1);
+        pParam->errc = (int)pExceptionPointers->ExceptionRecord->ExceptionInformation[0];
+
+        ICorJitInfo* jitInfo = pParam->jitInfo;
+
+        if (jitInfo != nullptr)
+        {
+            jitInfo->reportFatalError((CorJitResult)pParam->errc);
+        }
+
+        return EXCEPTION_EXECUTE_HANDLER;
+    }
+
+    return EXCEPTION_CONTINUE_SEARCH;
+}
+
+/*****************************************************************************/
+#ifdef DEBUG
+
+DWORD getBreakOnBadCode()
+{
+    return JitConfig.JitBreakOnBadCode();
+}
+
+/*****************************************************************************/
+void debugError(const char* msg, const char* file, unsigned line)
+{
+    const char* tail = strrchr(file, '\\');
+    if (tail)
+    {
+        file = tail + 1;
+    }
+
+    LogEnv* env = JitTls::GetLogEnv();
+
+    logf(LL_ERROR, "COMPILATION FAILED: file: %s:%d compiling method %s reason %s\n", file, line,
+         env->compiler->info.compFullName, msg);
+
+    // We now only assert when user explicitly set ComPlus_JitRequired=1
+    // If ComPlus_JitRequired is 0 or is not set, we will not assert.
+    if (JitConfig.JitRequired() == 1 || getBreakOnBadCode())
+    {
+        // Don't assert if verification is done.
+        if (!env->compiler->tiVerificationNeeded || getBreakOnBadCode())
+        {
+            assertAbort(msg, "NO-FILE", 0);
+        }
+    }
+
+    BreakIfDebuggerPresent();
+}
+
+/*****************************************************************************/
+LogEnv::LogEnv(ICorJitInfo* aCompHnd) : compHnd(aCompHnd), compiler(nullptr)
+{
+}
+
+/*****************************************************************************/
+extern "C" void __cdecl assertAbort(const char* why, const char* file, unsigned line)
+{
+    const char* msg       = why;
+    LogEnv*     env       = JitTls::GetLogEnv();
+    const int   BUFF_SIZE = 8192;
+    char*       buff      = (char*)alloca(BUFF_SIZE);
+    if (env->compiler)
+    {
+        _snprintf_s(buff, BUFF_SIZE, _TRUNCATE, "Assertion failed '%s' in '%s' (IL size %d)\n", why,
+                    env->compiler->info.compFullName, env->compiler->info.compILCodeSize);
+        msg = buff;
+    }
+    printf(""); // null string means flush
+
+#if FUNC_INFO_LOGGING
+    if (Compiler::compJitFuncInfoFile != nullptr)
+    {
+        fprintf(Compiler::compJitFuncInfoFile, "%s - Assertion failed (%s:%d - %s)\n",
+                (env == nullptr) ? "UNKNOWN" : env->compiler->info.compFullName, file, line, why);
+    }
+#endif // FUNC_INFO_LOGGING
+
+    if (env->compHnd->doAssert(file, line, msg))
+    {
+        DebugBreak();
+    }
+
+#ifdef ALT_JIT
+    // If we hit an assert, and we got here, it's either because the user hit "ignore" on the
+    // dialog pop-up, or they set COMPlus_ContinueOnAssert=1 to not emit a pop-up, but just continue.
+    // If we're an altjit, we have two options: (1) silently continue, as a normal JIT would, probably
+    // leading to additional asserts, or (2) tell the VM that the AltJit wants to skip this function,
+    // thus falling back to the fallback JIT. Setting COMPlus_AltJitSkipOnAssert=1 chooses this "skip"
+    // to the fallback JIT behavior. This is useful when doing ASM diffs, where we only want to see
+    // the first assert for any function, but we don't want to kill the whole ngen process on the
+    // first assert (which would happen if you used COMPlus_NoGuiOnAssert=1 for example).
+    if (JitConfig.AltJitSkipOnAssert() != 0)
+    {
+        fatal(CORJIT_SKIPPED);
+    }
+#elif defined(_TARGET_ARM64_)
+    // TODO-ARM64-NYI: remove this after the JIT no longer asserts during startup
+    //
+    // When we are bringing up the new Arm64 JIT we set COMPlus_ContinueOnAssert=1
+    // We only want to hit one assert then we will fall back to the interpreter.
+    //
+    bool interpreterFallback = (JitConfig.InterpreterFallback() != 0);
+
+    if (interpreterFallback)
+    {
+        fatal(CORJIT_SKIPPED);
+    }
+#endif
+}
+
+/*********************************************************************/
+BOOL vlogf(unsigned level, const char* fmt, va_list args)
+{
+    return JitTls::GetLogEnv()->compHnd->logMsg(level, fmt, args);
+}
+
+int vflogf(FILE* file, const char* fmt, va_list args)
+{
+    // 0-length string means flush
+    if (fmt[0] == '\0')
+    {
+        fflush(file);
+        return 0;
+    }
+
+    const int BUFF_SIZE = 8192;
+    char      buffer[BUFF_SIZE];
+    int       written = _vsnprintf_s(&buffer[0], BUFF_SIZE, _TRUNCATE, fmt, args);
+
+    if (JitConfig.JitDumpToDebugger())
+    {
+        OutputDebugStringA(buffer);
+    }
+
+    // We use fputs here so that this executes as fast a possible
+    fputs(&buffer[0], file);
+    return written;
+}
+
+int flogf(FILE* file, const char* fmt, ...)
+{
+    va_list args;
+    va_start(args, fmt);
+    int written = vflogf(file, fmt, args);
+    va_end(args);
+    return written;
+}
+
+/*********************************************************************/
+int logf(const char* fmt, ...)
+{
+    va_list     args;
+    static bool logToEEfailed = false;
+    int         written       = 0;
+    //
+    // We remember when the EE failed to log, because vlogf()
+    // is very slow in a checked build.
+    //
+    // If it fails to log an LL_INFO1000 message once
+    // it will always fail when logging an LL_INFO1000 message.
+    //
+    if (!logToEEfailed)
+    {
+        va_start(args, fmt);
+        if (!vlogf(LL_INFO1000, fmt, args))
+        {
+            logToEEfailed = true;
+        }
+        va_end(args);
+    }
+
+    if (logToEEfailed)
+    {
+        // if the EE refuses to log it, we try to send it to stdout
+        va_start(args, fmt);
+        written = vflogf(jitstdout, fmt, args);
+        va_end(args);
+    }
+#if 0  // Enable this only when you need it
+    else
+    {
+        //
+        // The EE just successfully logged our message
+        //
+        static ConfigDWORD fJitBreakOnDumpToken;
+        DWORD breakOnDumpToken = fJitBreakOnDumpToken.val(CLRConfig::INTERNAL_BreakOnDumpToken);
+        static DWORD forbidEntry = 0;
+        
+        if ((breakOnDumpToken != 0xffffffff) && (forbidEntry == 0)) 
+        {
+            forbidEntry = 1;
+            
+            // Use value of 0 to get the dump
+            static DWORD currentLine = 1;
+            
+            if (currentLine == breakOnDumpToken) 
+            {
+                assert(!"Dump token reached");
+            }
+            
+            printf("(Token=0x%x) ", currentLine++);
+            forbidEntry = 0;
+        }
+    }
+#endif // 0
+    va_end(args);
+
+    return written;
+}
+
+/*********************************************************************/
+void gcDump_logf(const char* fmt, ...)
+{
+    va_list     args;
+    static bool logToEEfailed = false;
+    //
+    // We remember when the EE failed to log, because vlogf()
+    // is very slow in a checked build.
+    //
+    // If it fails to log an LL_INFO1000 message once
+    // it will always fail when logging an LL_INFO1000 message.
+    //
+    if (!logToEEfailed)
+    {
+        va_start(args, fmt);
+        if (!vlogf(LL_INFO1000, fmt, args))
+        {
+            logToEEfailed = true;
+        }
+        va_end(args);
+    }
+
+    if (logToEEfailed)
+    {
+        // if the EE refuses to log it, we try to send it to stdout
+        va_start(args, fmt);
+        vflogf(jitstdout, fmt, args);
+        va_end(args);
+    }
+#if 0  // Enable this only when you need it
+    else
+    {
+        //
+        // The EE just successfully logged our message
+        //
+        static ConfigDWORD fJitBreakOnDumpToken;
+        DWORD breakOnDumpToken = fJitBreakOnDumpToken.val(CLRConfig::INTERNAL_BreakOnDumpToken);
+        static DWORD forbidEntry = 0;
+        
+        if ((breakOnDumpToken != 0xffffffff) && (forbidEntry == 0)) 
+        {
+            forbidEntry = 1;
+            
+            // Use value of 0 to get the dump
+            static DWORD currentLine = 1;
+            
+            if (currentLine == breakOnDumpToken) 
+            {
+                assert(!"Dump token reached");
+            }
+            
+            printf("(Token=0x%x) ", currentLine++);
+            forbidEntry = 0;
+        }
+    }
+#endif // 0
+    va_end(args);
+}
+
+/*********************************************************************/
+void logf(unsigned level, const char* fmt, ...)
+{
+    va_list args;
+    va_start(args, fmt);
+    vlogf(level, fmt, args);
+    va_end(args);
+}
+
+void DECLSPEC_NORETURN badCode3(const char* msg, const char* msg2, int arg, __in_z const char* file, unsigned line)
+{
+    const int BUFF_SIZE = 512;
+    char      buf1[BUFF_SIZE];
+    char      buf2[BUFF_SIZE];
+    sprintf_s(buf1, BUFF_SIZE, "%s%s", msg, msg2);
+    sprintf_s(buf2, BUFF_SIZE, buf1, arg);
+
+    debugError(buf2, file, line);
+    badCode();
+}
+
+void noWayAssertAbortHelper(const char* cond, const char* file, unsigned line)
+{
+    // Show the assert UI.
+    if (JitConfig.JitEnableNoWayAssert())
+    {
+        assertAbort(cond, file, line);
+    }
+}
+
+void noWayAssertBodyConditional(const char* cond, const char* file, unsigned line)
+{
+#ifdef FEATURE_TRACELOGGING
+    if (ShouldThrowOnNoway(file, line))
+#else
+    if (ShouldThrowOnNoway())
+#endif
+    {
+        noWayAssertBody(cond, file, line);
+    }
+    // In CHK we want the assert UI to show up in min-opts.
+    else
+    {
+        noWayAssertAbortHelper(cond, file, line);
+    }
+}
+
+void DECLSPEC_NORETURN noWayAssertBody(const char* cond, const char* file, unsigned line)
+{
+#if MEASURE_FATAL
+    fatal_noWayAssertBodyArgs += 1;
+#endif // MEASURE_FATAL
+
+    noWayAssertAbortHelper(cond, file, line);
+    noWayAssertBody();
+}
+
+#endif // DEBUG
diff --git a/src/jit/error.h b/src/jit/error.h
new file mode 100644
index 0000000000..c56971aaf7
--- /dev/null
+++ b/src/jit/error.h
@@ -0,0 +1,295 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+/*****************************************************************************/
+
+#ifndef _ERROR_H_
+#define _ERROR_H_
+/*****************************************************************************/
+
+#include <corjit.h>   // for CORJIT_INTERNALERROR
+#include <safemath.h> // For FitsIn, used by SafeCvt methods.
+
+#define FATAL_JIT_EXCEPTION 0x02345678
+class Compiler;
+
+struct ErrorTrapParam
+{
+    int                errc;
+    ICorJitInfo*       jitInfo;
+    EXCEPTION_POINTERS exceptionPointers;
+    ErrorTrapParam()
+    {
+        jitInfo = nullptr;
+    }
+};
+
+// Only catch JIT internal errors (will not catch EE generated Errors)
+extern LONG __JITfilter(PEXCEPTION_POINTERS pExceptionPointers, LPVOID lpvParam);
+
+#define setErrorTrap(compHnd, ParamType, paramDef, paramRef)                                                           \
+    struct __JITParam : ErrorTrapParam                                                                                 \
+    {                                                                                                                  \
+        ParamType param;                                                                                               \
+    } __JITparam;                                                                                                      \
+    __JITparam.errc    = CORJIT_INTERNALERROR;                                                                         \
+    __JITparam.jitInfo = compHnd;                                                                                      \
+    __JITparam.param   = paramRef;                                                                                     \
+    PAL_TRY(__JITParam*, __JITpParam, &__JITparam)                                                                     \
+    {                                                                                                                  \
+        ParamType paramDef = __JITpParam->param;
+
+// Only catch JIT internal errors (will not catch EE generated Errors)
+#define impJitErrorTrap()                                                                                              \
+    }                                                                                                                  \
+    PAL_EXCEPT_FILTER(__JITfilter)                                                                                     \
+    {                                                                                                                  \
+        int __errc = __JITparam.errc;                                                                                  \
+        (void)__errc;
+
+#define endErrorTrap()                                                                                                 \
+    }                                                                                                                  \
+    PAL_ENDTRY
+
+#define finallyErrorTrap()                                                                                             \
+    }                                                                                                                  \
+    PAL_FINALLY                                                                                                        \
+    {
+
+/*****************************************************************************/
+
+extern void debugError(const char* msg, const char* file, unsigned line);
+extern void DECLSPEC_NORETURN badCode();
+extern void                   DECLSPEC_NORETURN
+badCode3(const char* msg, const char* msg2, int arg, __in_z const char* file, unsigned line);
+extern void DECLSPEC_NORETURN noWay();
+extern void DECLSPEC_NORETURN NOMEM();
+extern void DECLSPEC_NORETURN fatal(int errCode);
+
+extern void DECLSPEC_NORETURN noWayAssertBody();
+extern void DECLSPEC_NORETURN noWayAssertBody(const char* cond, const char* file, unsigned line);
+
+// Conditionally invoke the noway assert body. The conditional predicate is evaluated using a method on the tlsCompiler.
+// If a noway_assert is hit, we ask the Compiler whether to raise an exception (i.e., conditionally raise exception.)
+// To have backward compatibility between v4.5 and v4.0, in min-opts we take a shot at codegen rather than rethrow.
+extern void noWayAssertBodyConditional(
+#ifdef FEATURE_TRACELOGGING
+    const char* file, unsigned line
+#endif
+    );
+extern void noWayAssertBodyConditional(const char* cond, const char* file, unsigned line);
+
+#if !defined(_TARGET_X86_) || !defined(LEGACY_BACKEND)
+
+// This guy can return based on Config flag/Debugger
+extern void notYetImplemented(const char* msg, const char* file, unsigned line);
+#define NYI(msg) notYetImplemented("NYI: " #msg, __FILE__, __LINE__)
+#define NYI_IF(cond, msg)                                                                                              \
+    if (cond)                                                                                                          \
+    notYetImplemented("NYI: " #msg, __FILE__, __LINE__)
+
+#ifdef _TARGET_AMD64_
+
+#define NYI_AMD64(msg) notYetImplemented("NYI_AMD64: " #msg, __FILE__, __LINE__)
+#define NYI_X86(msg)                                                                                                   \
+    do                                                                                                                 \
+    {                                                                                                                  \
+    } while (0)
+#define NYI_ARM(msg)                                                                                                   \
+    do                                                                                                                 \
+    {                                                                                                                  \
+    } while (0)
+#define NYI_ARM64(msg)                                                                                                 \
+    do                                                                                                                 \
+    {                                                                                                                  \
+    } while (0)
+
+#elif defined(_TARGET_X86_)
+
+#define NYI_AMD64(msg)                                                                                                 \
+    do                                                                                                                 \
+    {                                                                                                                  \
+    } while (0)
+#define NYI_X86(msg) notYetImplemented("NYI_X86: " #msg, __FILE__, __LINE__)
+#define NYI_ARM(msg)                                                                                                   \
+    do                                                                                                                 \
+    {                                                                                                                  \
+    } while (0)
+#define NYI_ARM64(msg)                                                                                                 \
+    do                                                                                                                 \
+    {                                                                                                                  \
+    } while (0)
+
+#elif defined(_TARGET_ARM_)
+
+#define NYI_AMD64(msg)                                                                                                 \
+    do                                                                                                                 \
+    {                                                                                                                  \
+    } while (0)
+#define NYI_X86(msg)                                                                                                   \
+    do                                                                                                                 \
+    {                                                                                                                  \
+    } while (0)
+#define NYI_ARM(msg) notYetImplemented("NYI_ARM: " #msg, __FILE__, __LINE__)
+#define NYI_ARM64(msg)                                                                                                 \
+    do                                                                                                                 \
+    {                                                                                                                  \
+    } while (0)
+
+#elif defined(_TARGET_ARM64_)
+
+#define NYI_AMD64(msg)                                                                                                 \
+    do                                                                                                                 \
+    {                                                                                                                  \
+    } while (0)
+#define NYI_X86(msg)                                                                                                   \
+    do                                                                                                                 \
+    {                                                                                                                  \
+    } while (0)
+#define NYI_ARM(msg)                                                                                                   \
+    do                                                                                                                 \
+    {                                                                                                                  \
+    } while (0)
+#define NYI_ARM64(msg) notYetImplemented("NYI_ARM64: " #msg, __FILE__, __LINE__)
+
+#else
+
+#error "Unknown platform, not x86, ARM, or AMD64?"
+
+#endif
+
+#else // defined(_TARGET_X86_) && defined(LEGACY_BACKEND)
+
+#define NYI(msg) assert(!msg)
+#define NYI_AMD64(msg)                                                                                                 \
+    do                                                                                                                 \
+    {                                                                                                                  \
+    } while (0)
+#define NYI_ARM(msg)                                                                                                   \
+    do                                                                                                                 \
+    {                                                                                                                  \
+    } while (0)
+#define NYI_ARM64(msg)                                                                                                 \
+    do                                                                                                                 \
+    {                                                                                                                  \
+    } while (0)
+
+#endif // _TARGET_X86_
+
+#if !defined(_TARGET_X86_) && !defined(FEATURE_STACK_FP_X87)
+#define NYI_FLAT_FP_X87(msg) notYetImplemented("NYI: " #msg, __FILE__, __LINE__)
+#define NYI_FLAT_FP_X87_NC(msg) notYetImplemented("NYI: " #msg, __FILE__, __LINE__)
+
+#else
+
+#define NYI_FLAT_FP_X87(msg)                                                                                           \
+    do                                                                                                                 \
+    {                                                                                                                  \
+    } while (0)
+#define NYI_FLAT_FP_X87_NC(msg)                                                                                        \
+    do                                                                                                                 \
+    {                                                                                                                  \
+    } while (0)
+
+#endif // !_TARGET_X86_ && !FEATURE_STACK_FP_X87
+
+#ifdef DEBUG
+#define NO_WAY(msg) (debugError(msg, __FILE__, __LINE__), noWay())
+// Used for fallback stress mode
+#define NO_WAY_NOASSERT(msg) noWay()
+#define BADCODE(msg) (debugError(msg, __FILE__, __LINE__), badCode())
+#define BADCODE3(msg, msg2, arg) badCode3(msg, msg2, arg, __FILE__, __LINE__)
+// Used for an assert that we want to convert into BADCODE to force minopts, or in minopts to force codegen.
+#define noway_assert(cond)                                                                                             \
+    do                                                                                                                 \
+    {                                                                                                                  \
+        if (!(cond))                                                                                                   \
+        {                                                                                                              \
+            noWayAssertBodyConditional(#cond, __FILE__, __LINE__);                                                     \
+        }                                                                                                              \
+    } while (0)
+#define unreached() noWayAssertBody("unreached", __FILE__, __LINE__)
+
+#else
+
+#define NO_WAY(msg) noWay()
+#define BADCODE(msg) badCode()
+#define BADCODE3(msg, msg2, arg) badCode()
+
+#ifdef FEATURE_TRACELOGGING
+#define NOWAY_ASSERT_BODY_ARGUMENTS __FILE__, __LINE__
+#else
+#define NOWAY_ASSERT_BODY_ARGUMENTS
+#endif
+
+#define noway_assert(cond)                                                                                             \
+    do                                                                                                                 \
+    {                                                                                                                  \
+        if (!(cond))                                                                                                   \
+        {                                                                                                              \
+            noWayAssertBodyConditional(NOWAY_ASSERT_BODY_ARGUMENTS);                                                   \
+        }                                                                                                              \
+    } while (0)
+#define unreached() noWayAssertBody()
+
+#endif
+
+// IMPL_LIMITATION is called when we encounter valid IL that is not
+// supported by our current implementation because of various
+// limitations (that could be removed in the future)
+#define IMPL_LIMITATION(msg) NO_WAY(msg)
+
+#if defined(_HOST_X86_)
+
+// While debugging in an Debugger, the "int 3" will cause the program to break
+// Outside, the exception handler will just filter out the "int 3".
+
+#define BreakIfDebuggerPresent()                                                                                       \
+    do                                                                                                                 \
+    {                                                                                                                  \
+        __try                                                                                                          \
+        {                                                                                                              \
+            __asm {int 3}                                                                                              \
+        }                                                                                                              \
+        __except (EXCEPTION_EXECUTE_HANDLER)                                                                           \
+        {                                                                                                              \
+        }                                                                                                              \
+    } while (0)
+
+#else
+#define BreakIfDebuggerPresent()                                                                                       \
+    do                                                                                                                 \
+    {                                                                                                                  \
+        if (IsDebuggerPresent())                                                                                       \
+            DebugBreak();                                                                                              \
+    } while (0)
+#endif
+
+#ifdef DEBUG
+DWORD getBreakOnBadCode();
+#endif
+
+// For narrowing numeric conversions, the following two methods ensure that the
+// source value fits in the destination type, using either "assert" or
+// "noway_assert" to validate the conversion.  Obviously, each returns the source value as
+// the destination type.
+
+// (There is an argument that these should be macros, to let the preprocessor capture
+// a more useful file/line for the error message.  But then we have to use comma expressions
+// so that these can be used in expressions, etc., which is ugly.  So I propose we rely on
+// getting stack traces in other ways.)
+template <typename Dst, typename Src>
+inline Dst SafeCvtAssert(Src val)
+{
+    assert(FitsIn<Dst>(val));
+    return static_cast<Dst>(val);
+}
+
+template <typename Dst, typename Src>
+inline Dst SafeCvtNowayAssert(Src val)
+{
+    noway_assert(FitsIn<Dst>(val));
+    return static_cast<Dst>(val);
+}
+
+#endif
diff --git a/src/jit/flowgraph.cpp b/src/jit/flowgraph.cpp
new file mode 100644
index 0000000000..1c68bfd96a
--- /dev/null
+++ b/src/jit/flowgraph.cpp
@@ -0,0 +1,22276 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                          FlowGraph                                        XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "allocacheck.h" // for alloca
+
+/*****************************************************************************/
+
+void Compiler::fgInit()
+{
+    impInit();
+
+    /* Initialization for fgWalkTreePre() and fgWalkTreePost() */
+
+    fgFirstBBScratch = nullptr;
+
+#ifdef DEBUG
+    fgPrintInlinedMethods = JitConfig.JitPrintInlinedMethods() == 1;
+#endif // DEBUG
+
+    /* We haven't yet computed the bbPreds lists */
+    fgComputePredsDone = false;
+
+    /* We haven't yet computed the bbCheapPreds lists */
+    fgCheapPredsValid = false;
+
+    /* We haven't yet computed the edge weight */
+    fgEdgeWeightsComputed    = false;
+    fgHaveValidEdgeWeights   = false;
+    fgSlopUsedInEdgeWeights  = false;
+    fgRangeUsedInEdgeWeights = true;
+    fgNeedsUpdateFlowGraph   = false;
+    fgCalledWeight           = BB_ZERO_WEIGHT;
+
+    /* We haven't yet computed the dominator sets */
+    fgDomsComputed = false;
+
+#ifdef DEBUG
+    fgReachabilitySetsValid = false;
+#endif // DEBUG
+
+    /* We don't know yet which loops will always execute calls */
+    fgLoopCallMarked = false;
+
+    /* We haven't created GC Poll blocks yet. */
+    fgGCPollsCreated = false;
+
+    /* Initialize the basic block list */
+
+    fgFirstBB        = nullptr;
+    fgLastBB         = nullptr;
+    fgFirstColdBlock = nullptr;
+
+#if FEATURE_EH_FUNCLETS
+    fgFirstFuncletBB  = nullptr;
+    fgFuncletsCreated = false;
+#endif // FEATURE_EH_FUNCLETS
+
+    fgBBcount = 0;
+
+#ifdef DEBUG
+    fgBBcountAtCodegen = 0;
+#endif // DEBUG
+
+    fgBBNumMax        = 0;
+    fgEdgeCount       = 0;
+    fgDomBBcount      = 0;
+    fgBBVarSetsInited = false;
+    fgReturnCount     = 0;
+
+    // Initialize BlockSet data.
+    fgCurBBEpoch             = 0;
+    fgCurBBEpochSize         = 0;
+    fgBBSetCountInSizeTUnits = 0;
+
+    genReturnBB = nullptr;
+
+    /* We haven't reached the global morphing phase */
+    fgGlobalMorph  = false;
+    fgExpandInline = false;
+    fgModified     = false;
+
+#ifdef DEBUG
+    fgSafeBasicBlockCreation = true;
+#endif // DEBUG
+
+    fgLocalVarLivenessDone = false;
+
+    /* Statement list is not threaded yet */
+
+    fgStmtListThreaded = false;
+
+    // Initialize the logic for adding code. This is used to insert code such
+    // as the code that raises an exception when an array range check fails.
+
+    fgAddCodeList = nullptr;
+    fgAddCodeModf = false;
+
+    for (int i = 0; i < SCK_COUNT; i++)
+    {
+        fgExcptnTargetCache[i] = nullptr;
+    }
+
+    /* Keep track of the max count of pointer arguments */
+
+    fgPtrArgCntCur = 0;
+    fgPtrArgCntMax = 0;
+
+    /* This global flag is set whenever we remove a statement */
+    fgStmtRemoved = false;
+
+    /* This global flag is set whenever we add a throw block for a RngChk */
+    fgRngChkThrowAdded = false; /* reset flag for fgIsCodeAdded() */
+
+    fgIncrCount = 0;
+
+    /* We will record a list of all BBJ_RETURN blocks here */
+    fgReturnBlocks = nullptr;
+
+    /* This is set by fgComputeReachability */
+    fgEnterBlks = BlockSetOps::UninitVal();
+
+#ifdef DEBUG
+    fgEnterBlksSetValid = false;
+#endif // DEBUG
+
+#if !FEATURE_EH_FUNCLETS
+    ehMaxHndNestingCount = 0;
+#endif // !FEATURE_EH_FUNCLETS
+
+    /* Init the fgBigOffsetMorphingTemps to be BAD_VAR_NUM. */
+    for (int i = 0; i < TYP_COUNT; i++)
+    {
+        fgBigOffsetMorphingTemps[i] = BAD_VAR_NUM;
+    }
+
+    fgNoStructPromotion      = false;
+    fgNoStructParamPromotion = false;
+
+    optValnumCSE_phase = false; // referenced in fgMorphSmpOp()
+
+#ifdef DEBUG
+    fgNormalizeEHDone = false;
+#endif // DEBUG
+
+#ifdef DEBUG
+    if (!compIsForInlining())
+    {
+        if ((JitConfig.JitNoStructPromotion() & 1) == 1)
+        {
+            fgNoStructPromotion = true;
+        }
+        if ((JitConfig.JitNoStructPromotion() & 2) == 2)
+        {
+            fgNoStructParamPromotion = true;
+        }
+    }
+#endif // DEBUG
+
+    if (!compIsForInlining())
+    {
+        m_promotedStructDeathVars = nullptr;
+    }
+#ifdef FEATURE_SIMD
+    fgPreviousCandidateSIMDFieldAsgStmt = nullptr;
+#endif
+}
+
+bool Compiler::fgHaveProfileData()
+{
+    if (compIsForInlining() || compIsForImportOnly())
+    {
+        return false;
+    }
+
+    return (fgProfileBuffer != nullptr);
+}
+
+bool Compiler::fgGetProfileWeightForBasicBlock(IL_OFFSET offset, unsigned* weightWB)
+{
+    noway_assert(weightWB != nullptr);
+    unsigned weight = 0;
+
+#ifdef DEBUG
+    unsigned hashSeed = fgStressBBProf();
+    if (hashSeed != 0)
+    {
+        unsigned hash = (info.compMethodHash() * hashSeed) ^ (offset * 1027);
+
+        // We need to especially stress the procedure splitting codepath.  Therefore
+        // one third the time we should return a weight of zero.
+        // Otherwise we should return some random weight (usually between 0 and 288).
+        // The below gives a weight of zero, 44% of the time
+
+        if (hash % 3 == 0)
+        {
+            weight = 0;
+        }
+        else if (hash % 11 == 0)
+        {
+            weight = (hash % 23) * (hash % 29) * (hash % 31);
+        }
+        else
+        {
+            weight = (hash % 17) * (hash % 19);
+        }
+
+        // The first block is never given a weight of zero
+        if ((offset == 0) && (weight == 0))
+        {
+            weight = 1 + (hash % 5);
+        }
+
+        *weightWB = weight;
+        return true;
+    }
+#endif // DEBUG
+
+    if (fgHaveProfileData() == false)
+    {
+        return false;
+    }
+
+    noway_assert(!compIsForInlining());
+    for (unsigned i = 0; i < fgProfileBufferCount; i++)
+    {
+        if (fgProfileBuffer[i].ILOffset == offset)
+        {
+            weight = fgProfileBuffer[i].ExecutionCount;
+
+            *weightWB = weight;
+            return true;
+        }
+    }
+
+    *weightWB = 0;
+    return true;
+}
+
+void Compiler::fgInstrumentMethod()
+{
+    noway_assert(!compIsForInlining());
+
+    // Count the number of basic blocks in the method
+
+    int         countOfBlocks = 0;
+    BasicBlock* block;
+    for (block = fgFirstBB; block; block = block->bbNext)
+    {
+        if (!(block->bbFlags & BBF_IMPORTED) || (block->bbFlags & BBF_INTERNAL))
+        {
+            continue;
+        }
+        countOfBlocks++;
+    }
+
+    // Allocate the profile buffer
+
+    ICorJitInfo::ProfileBuffer* bbProfileBuffer;
+
+    HRESULT res = info.compCompHnd->allocBBProfileBuffer(countOfBlocks, &bbProfileBuffer);
+
+    ICorJitInfo::ProfileBuffer* bbProfileBufferStart = bbProfileBuffer;
+
+    GenTreePtr stmt;
+
+    if (!SUCCEEDED(res))
+    {
+        // The E_NOTIMPL status is returned when we are profiling a generic method from a different assembly
+        if (res == E_NOTIMPL)
+        {
+            // In such cases we still want to add the method entry callback node
+
+            GenTreeArgList* args = gtNewArgList(gtNewIconEmbMethHndNode(info.compMethodHnd));
+            GenTreePtr      call = gtNewHelperCallNode(CORINFO_HELP_BBT_FCN_ENTER, TYP_VOID, 0, args);
+
+            stmt = gtNewStmt(call);
+        }
+        else
+        {
+            noway_assert(!"Error:  failed to allocate bbProfileBuffer");
+            return;
+        }
+    }
+    else
+    {
+        // Assign a buffer entry for each basic block
+
+        for (block = fgFirstBB; block; block = block->bbNext)
+        {
+            if (!(block->bbFlags & BBF_IMPORTED) || (block->bbFlags & BBF_INTERNAL))
+            {
+                continue;
+            }
+
+            bbProfileBuffer->ILOffset = block->bbCodeOffs;
+
+            GenTreePtr addr;
+            GenTreePtr value;
+
+            value = gtNewOperNode(GT_IND, TYP_INT, gtNewIconEmbHndNode((void*)&bbProfileBuffer->ExecutionCount, nullptr,
+                                                                       GTF_ICON_BBC_PTR));
+            value = gtNewOperNode(GT_ADD, TYP_INT, value, gtNewIconNode(1));
+
+            addr = gtNewOperNode(GT_IND, TYP_INT, gtNewIconEmbHndNode((void*)&bbProfileBuffer->ExecutionCount, nullptr,
+                                                                      GTF_ICON_BBC_PTR));
+
+            addr = gtNewAssignNode(addr, value);
+
+            fgInsertStmtAtBeg(block, addr);
+
+            countOfBlocks--;
+            bbProfileBuffer++;
+        }
+        noway_assert(countOfBlocks == 0);
+
+        // Add the method entry callback node
+
+        GenTreeArgList* args = gtNewArgList(gtNewIconEmbMethHndNode(info.compMethodHnd));
+        GenTreePtr      call = gtNewHelperCallNode(CORINFO_HELP_BBT_FCN_ENTER, TYP_VOID, 0, args);
+
+        GenTreePtr handle =
+            gtNewIconEmbHndNode((void*)&bbProfileBufferStart->ExecutionCount, nullptr, GTF_ICON_BBC_PTR);
+        GenTreePtr value = gtNewOperNode(GT_IND, TYP_INT, handle);
+        GenTreePtr relop = gtNewOperNode(GT_NE, TYP_INT, value, gtNewIconNode(0, TYP_INT));
+        relop->gtFlags |= GTF_RELOP_QMARK;
+        GenTreePtr colon = new (this, GT_COLON) GenTreeColon(TYP_VOID, gtNewNothingNode(), call);
+        GenTreePtr cond  = gtNewQmarkNode(TYP_VOID, relop, colon);
+        stmt             = gtNewStmt(cond);
+    }
+
+    fgEnsureFirstBBisScratch();
+
+    fgInsertStmtAtEnd(fgFirstBB, stmt);
+}
+
+/*****************************************************************************
+ *
+ *  Create a basic block and append it to the current BB list.
+ */
+
+BasicBlock* Compiler::fgNewBasicBlock(BBjumpKinds jumpKind)
+{
+    // This method must not be called after the exception table has been
+    // constructed, because it doesn't not provide support for patching
+    // the exception table.
+
+    noway_assert(compHndBBtabCount == 0);
+
+    BasicBlock* block;
+
+    /* Allocate the block descriptor */
+
+    block = bbNewBasicBlock(jumpKind);
+    noway_assert(block->bbJumpKind == jumpKind);
+
+    /* Append the block to the end of the global basic block list */
+
+    if (fgFirstBB)
+    {
+        fgLastBB->setNext(block);
+    }
+    else
+    {
+        fgFirstBB     = block;
+        block->bbPrev = nullptr;
+    }
+
+    fgLastBB = block;
+
+    return block;
+}
+
+/*****************************************************************************
+ *
+ *  Ensures that fgFirstBB is a scratch BasicBlock that we have added.
+ *  This can be used to add initialization code (without worrying
+ *  about other blocks jumping to it).
+ *
+ *  Callers have to be careful that they do not mess up the order of things
+ *  added to fgEnsureFirstBBisScratch in a way as to change semantics.
+ */
+
+void Compiler::fgEnsureFirstBBisScratch()
+{
+    // Have we already allocated a scratch block?
+
+    if (fgFirstBBisScratch())
+    {
+        return;
+    }
+
+    assert(fgFirstBBScratch == nullptr);
+
+    BasicBlock* block = bbNewBasicBlock(BBJ_NONE);
+
+    if (fgFirstBB != nullptr)
+    {
+        // If we have profile data the new block will inherit fgFirstBlock's weight
+        if (fgFirstBB->bbFlags & BBF_PROF_WEIGHT)
+        {
+            block->inheritWeight(fgFirstBB);
+        }
+        fgInsertBBbefore(fgFirstBB, block);
+    }
+    else
+    {
+        noway_assert(fgLastBB == nullptr);
+        fgFirstBB = block;
+        fgLastBB  = block;
+    }
+
+    noway_assert(fgLastBB != nullptr);
+
+    block->bbFlags |= (BBF_INTERNAL | BBF_IMPORTED);
+
+    fgFirstBBScratch = fgFirstBB;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("New scratch BB%02u\n", block->bbNum);
+    }
+#endif
+}
+
+bool Compiler::fgFirstBBisScratch()
+{
+    if (fgFirstBBScratch != nullptr)
+    {
+        assert(fgFirstBBScratch == fgFirstBB);
+        assert(fgFirstBBScratch->bbFlags & BBF_INTERNAL);
+        assert(fgFirstBBScratch->countOfInEdges() == 1);
+
+        // Normally, the first scratch block is a fall-through block. However, if the block after it was an empty
+        // BBJ_ALWAYS block, it might get removed, and the code that removes it will make the first scratch block
+        // a BBJ_ALWAYS block.
+        assert((fgFirstBBScratch->bbJumpKind == BBJ_NONE) || (fgFirstBBScratch->bbJumpKind == BBJ_ALWAYS));
+
+        return true;
+    }
+    else
+    {
+        return false;
+    }
+}
+
+bool Compiler::fgBBisScratch(BasicBlock* block)
+{
+    return fgFirstBBisScratch() && (block == fgFirstBB);
+}
+
+#ifdef DEBUG
+// Check to see if block contains a statement but don't spend more than a certain
+// budget doing this per method compiled.
+// If the budget is exceeded, return 'answerOnBoundExceeded' as the answer.
+/* static */
+bool Compiler::fgBlockContainsStatementBounded(BasicBlock* block, GenTree* stmt, bool answerOnBoundExceeded /*= true*/)
+{
+    const __int64 maxLinks = 1000000000;
+
+    assert(stmt->gtOper == GT_STMT);
+
+    __int64* numTraversed = &JitTls::GetCompiler()->compNumStatementLinksTraversed;
+
+    if (*numTraversed > maxLinks)
+    {
+        return answerOnBoundExceeded;
+    }
+
+    GenTree* curr = block->firstStmt();
+    do
+    {
+        (*numTraversed)++;
+        if (curr == stmt)
+        {
+            break;
+        }
+        curr = curr->gtNext;
+    } while (curr);
+    return curr != nullptr;
+}
+#endif // DEBUG
+
+//------------------------------------------------------------------------
+// fgInsertStmtAtBeg: Insert the given tree or statement at the start of the given basic block.
+//
+// Arguments:
+//    block     - The block into which 'stmt' will be inserted.
+//    stmt      - The statement to be inserted.
+//
+// Return Value:
+//    Returns the new (potentially) GT_STMT node.
+//
+// Notes:
+//    If 'stmt' is not already a statement, a new statement is created from it.
+//    We always insert phi statements at the beginning.
+//    In other cases, if there are any phi assignments and/or an assignment of
+//    the GT_CATCH_ARG, we insert after those.
+
+GenTreePtr Compiler::fgInsertStmtAtBeg(BasicBlock* block, GenTreePtr stmt)
+{
+    if (stmt->gtOper != GT_STMT)
+    {
+        stmt = gtNewStmt(stmt);
+    }
+
+    GenTreePtr list = block->firstStmt();
+
+    if (!stmt->IsPhiDefnStmt())
+    {
+        GenTreePtr insertBeforeStmt = block->FirstNonPhiDefOrCatchArgAsg();
+        if (insertBeforeStmt != nullptr)
+        {
+            return fgInsertStmtBefore(block, insertBeforeStmt, stmt);
+        }
+        else if (list != nullptr)
+        {
+            return fgInsertStmtAtEnd(block, stmt);
+        }
+        // Otherwise, we will simply insert at the beginning, below.
+    }
+
+    /* The new tree will now be the first one of the block */
+
+    block->bbTreeList = stmt;
+    stmt->gtNext      = list;
+
+    /* Are there any statements in the block? */
+
+    if (list)
+    {
+        GenTreePtr last;
+
+        /* There is at least one statement already */
+
+        last = list->gtPrev;
+        noway_assert(last && last->gtNext == nullptr);
+
+        /* Insert the statement in front of the first one */
+
+        list->gtPrev = stmt;
+        stmt->gtPrev = last;
+    }
+    else
+    {
+        /* The block was completely empty */
+
+        stmt->gtPrev = stmt;
+    }
+
+    return stmt;
+}
+
+/*****************************************************************************
+ *
+ *  Insert the given tree or statement at the end of the given basic block.
+ *  Returns the (potentially) new GT_STMT node.
+ *  If the block can be a conditional block, use fgInsertStmtNearEnd.
+ */
+
+GenTreeStmt* Compiler::fgInsertStmtAtEnd(BasicBlock* block, GenTreePtr node)
+{
+    GenTreePtr   list = block->firstStmt();
+    GenTreeStmt* stmt;
+
+    if (node->gtOper != GT_STMT)
+    {
+        stmt = gtNewStmt(node);
+    }
+    else
+    {
+        stmt = node->AsStmt();
+    }
+
+    assert(stmt->gtNext == nullptr); // We don't set it, and it needs to be this after the insert
+
+    if (list)
+    {
+        GenTreePtr last;
+
+        /* There is at least one statement already */
+
+        last = list->gtPrev;
+        noway_assert(last && last->gtNext == nullptr);
+
+        /* Append the statement after the last one */
+
+        last->gtNext = stmt;
+        stmt->gtPrev = last;
+        list->gtPrev = stmt;
+    }
+    else
+    {
+        /* The block is completely empty */
+
+        block->bbTreeList = stmt;
+        stmt->gtPrev      = stmt;
+    }
+
+    return stmt;
+}
+
+/*****************************************************************************
+ *
+ *  Insert the given tree or statement at the end of the given basic block, but before
+ *  the GT_JTRUE, if present.
+ *  Returns the (potentially) new GT_STMT node.
+ */
+
+GenTreeStmt* Compiler::fgInsertStmtNearEnd(BasicBlock* block, GenTreePtr node)
+{
+    GenTreeStmt* stmt;
+
+    // This routine can only be used when in tree order.
+    assert(fgOrder == FGOrderTree);
+
+    if ((block->bbJumpKind == BBJ_COND) || (block->bbJumpKind == BBJ_SWITCH) || (block->bbJumpKind == BBJ_RETURN))
+    {
+        if (node->gtOper != GT_STMT)
+        {
+            stmt = gtNewStmt(node);
+        }
+        else
+        {
+            stmt = node->AsStmt();
+        }
+
+        GenTreeStmt* first = block->firstStmt();
+        noway_assert(first);
+        GenTreeStmt* last = block->lastStmt();
+        noway_assert(last && last->gtNext == nullptr);
+        GenTreePtr after = last->gtPrev;
+
+#if DEBUG
+        if (block->bbJumpKind == BBJ_COND)
+        {
+            noway_assert(last->gtStmtExpr->gtOper == GT_JTRUE);
+        }
+        else if (block->bbJumpKind == BBJ_RETURN)
+        {
+            noway_assert((last->gtStmtExpr->gtOper == GT_RETURN) || (last->gtStmtExpr->gtOper == GT_JMP) ||
+                         // BBJ_RETURN blocks in functions returning void do not get a GT_RETURN node if they
+                         // have a .tail prefix (even if canTailCall returns false for these calls)
+                         // code:Compiler::impImportBlockCode (search for the RET: label)
+                         // Ditto for real tail calls (all code after them has been removed)
+                         ((last->gtStmtExpr->gtOper == GT_CALL) &&
+                          ((info.compRetType == TYP_VOID) || last->gtStmtExpr->AsCall()->IsTailCall())));
+        }
+        else
+        {
+            noway_assert(block->bbJumpKind == BBJ_SWITCH);
+            noway_assert(last->gtStmtExpr->gtOper == GT_SWITCH);
+        }
+#endif // DEBUG
+
+        /* Append 'stmt' before 'last' */
+
+        stmt->gtNext = last;
+        last->gtPrev = stmt;
+
+        if (first == last)
+        {
+            /* There is only one stmt in the block */
+
+            block->bbTreeList = stmt;
+            stmt->gtPrev      = last;
+        }
+        else
+        {
+            noway_assert(after && (after->gtNext == last));
+
+            /* Append 'stmt' after 'after' */
+
+            after->gtNext = stmt;
+            stmt->gtPrev  = after;
+        }
+
+        return stmt;
+    }
+    else
+    {
+        return fgInsertStmtAtEnd(block, node);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Insert the given statement "stmt" after GT_STMT node "insertionPoint".
+ *  Returns the newly inserted GT_STMT node.
+ *  Note that the gtPrev list of statement nodes is circular, but the gtNext list is not.
+ */
+
+GenTreePtr Compiler::fgInsertStmtAfter(BasicBlock* block, GenTreePtr insertionPoint, GenTreePtr stmt)
+{
+    assert(block->bbTreeList != nullptr);
+    noway_assert(insertionPoint->gtOper == GT_STMT);
+    noway_assert(stmt->gtOper == GT_STMT);
+    assert(fgBlockContainsStatementBounded(block, insertionPoint));
+    assert(!fgBlockContainsStatementBounded(block, stmt, false));
+
+    if (insertionPoint->gtNext == nullptr)
+    {
+        // Ok, we want to insert after the last statement of the block.
+        stmt->gtNext = nullptr;
+        stmt->gtPrev = insertionPoint;
+
+        insertionPoint->gtNext = stmt;
+
+        // Update the backward link of the first statement of the block
+        // to point to the new last statement.
+        assert(block->bbTreeList->gtPrev == insertionPoint);
+        block->bbTreeList->gtPrev = stmt;
+    }
+    else
+    {
+        stmt->gtNext = insertionPoint->gtNext;
+        stmt->gtPrev = insertionPoint;
+
+        insertionPoint->gtNext->gtPrev = stmt;
+        insertionPoint->gtNext         = stmt;
+    }
+
+    return stmt;
+}
+
+//  Insert the given tree or statement before GT_STMT node "insertionPoint".
+//  Returns the newly inserted GT_STMT node.
+
+GenTreePtr Compiler::fgInsertStmtBefore(BasicBlock* block, GenTreePtr insertionPoint, GenTreePtr stmt)
+{
+    assert(block->bbTreeList != nullptr);
+    noway_assert(insertionPoint->gtOper == GT_STMT);
+    noway_assert(stmt->gtOper == GT_STMT);
+    assert(fgBlockContainsStatementBounded(block, insertionPoint));
+    assert(!fgBlockContainsStatementBounded(block, stmt, false));
+
+    if (insertionPoint == block->bbTreeList)
+    {
+        // We're inserting before the first statement in the block.
+        GenTreePtr list = block->bbTreeList;
+        GenTreePtr last = list->gtPrev;
+
+        stmt->gtNext = list;
+        stmt->gtPrev = last;
+
+        block->bbTreeList = stmt;
+        list->gtPrev      = stmt;
+    }
+    else
+    {
+        stmt->gtNext = insertionPoint;
+        stmt->gtPrev = insertionPoint->gtPrev;
+
+        insertionPoint->gtPrev->gtNext = stmt;
+        insertionPoint->gtPrev         = stmt;
+    }
+
+    return stmt;
+}
+
+/*****************************************************************************
+ *
+ *  Insert the list of statements stmtList after the stmtAfter in block.
+ *  Return the last statement stmtList.
+ */
+
+GenTreePtr Compiler::fgInsertStmtListAfter(BasicBlock* block,     // the block where stmtAfter is in.
+                                           GenTreePtr  stmtAfter, // the statement where stmtList should be inserted
+                                                                  // after.
+                                           GenTreePtr stmtList)
+{
+    // Currently we can handle when stmtAfter and stmtList are non-NULL. This makes everything easy.
+    noway_assert(stmtAfter && stmtAfter->gtOper == GT_STMT);
+    noway_assert(stmtList && stmtList->gtOper == GT_STMT);
+
+    GenTreePtr stmtLast = stmtList->gtPrev; // Last statement in a non-empty list, circular in the gtPrev list.
+    noway_assert(stmtLast);
+    noway_assert(stmtLast->gtNext == nullptr);
+
+    GenTreePtr stmtNext = stmtAfter->gtNext;
+
+    if (!stmtNext)
+    {
+        stmtAfter->gtNext         = stmtList;
+        stmtList->gtPrev          = stmtAfter;
+        block->bbTreeList->gtPrev = stmtLast;
+        goto _Done;
+    }
+
+    stmtAfter->gtNext = stmtList;
+    stmtList->gtPrev  = stmtAfter;
+
+    stmtLast->gtNext = stmtNext;
+    stmtNext->gtPrev = stmtLast;
+
+_Done:
+
+    noway_assert(block->bbTreeList == nullptr || block->bbTreeList->gtPrev->gtNext == nullptr);
+
+    return stmtLast;
+}
+
+/*
+    Removes a block from the return block list
+*/
+void Compiler::fgRemoveReturnBlock(BasicBlock* block)
+{
+    if (fgReturnBlocks == nullptr)
+    {
+        return;
+    }
+
+    if (fgReturnBlocks->block == block)
+    {
+        // It's the 1st entry, assign new head of list.
+        fgReturnBlocks = fgReturnBlocks->next;
+        return;
+    }
+
+    for (BasicBlockList* retBlocks = fgReturnBlocks; retBlocks->next != nullptr; retBlocks = retBlocks->next)
+    {
+        if (retBlocks->next->block == block)
+        {
+            // Found it; splice it out.
+            retBlocks->next = retBlocks->next->next;
+            return;
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// fgGetPredForBlock: Find and return the predecessor edge corresponding to a given predecessor block.
+//
+// Arguments:
+//    block -- The block with the predecessor list to operate on.
+//    blockPred -- The predecessor block to find in the predecessor list.
+//
+// Return Value:
+//    The flowList edge corresponding to "blockPred". If "blockPred" is not in the predecessor list of "block",
+//    then returns nullptr.
+//
+// Assumptions:
+//    -- This only works on the full predecessor lists, not the cheap preds lists.
+
+flowList* Compiler::fgGetPredForBlock(BasicBlock* block, BasicBlock* blockPred)
+{
+    noway_assert(block);
+    noway_assert(blockPred);
+    assert(!fgCheapPredsValid);
+
+    flowList* pred;
+
+    for (pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
+    {
+        if (blockPred == pred->flBlock)
+        {
+            return pred;
+        }
+    }
+
+    return nullptr;
+}
+
+//------------------------------------------------------------------------
+// fgGetPredForBlock: Find and return the predecessor edge corresponding to a given predecessor block.
+// Also returns the address of the pointer that points to this edge, to make it possible to remove this edge from the
+// predecessor list without doing another linear search over the edge list.
+//
+// Arguments:
+//    block -- The block with the predecessor list to operate on.
+//    blockPred -- The predecessor block to find in the predecessor list.
+//    ptrToPred -- Out parameter: set to the address of the pointer that points to the returned predecessor edge.
+//
+// Return Value:
+//    The flowList edge corresponding to "blockPred". If "blockPred" is not in the predecessor list of "block",
+//    then returns nullptr.
+//
+// Assumptions:
+//    -- This only works on the full predecessor lists, not the cheap preds lists.
+
+flowList* Compiler::fgGetPredForBlock(BasicBlock* block, BasicBlock* blockPred, flowList*** ptrToPred)
+{
+    assert(block);
+    assert(blockPred);
+    assert(ptrToPred);
+    assert(!fgCheapPredsValid);
+
+    flowList** predPrevAddr;
+    flowList*  pred;
+
+    for (predPrevAddr = &block->bbPreds, pred = *predPrevAddr; pred != nullptr;
+         predPrevAddr = &pred->flNext, pred = *predPrevAddr)
+    {
+        if (blockPred == pred->flBlock)
+        {
+            *ptrToPred = predPrevAddr;
+            return pred;
+        }
+    }
+
+    *ptrToPred = nullptr;
+    return nullptr;
+}
+
+//------------------------------------------------------------------------
+// fgSpliceOutPred: Removes a predecessor edge for a block from the predecessor list.
+//
+// Arguments:
+//    block -- The block with the predecessor list to operate on.
+//    blockPred -- The predecessor block to remove from the predecessor list. It must be a predecessor of "block".
+//
+// Return Value:
+//    The flowList edge that was removed.
+//
+// Assumptions:
+//    -- "blockPred" must be a predecessor block of "block".
+//    -- This simply splices out the flowList object. It doesn't update block ref counts, handle duplicate counts, etc.
+//       For that, use fgRemoveRefPred() or fgRemoveAllRefPred().
+//    -- This only works on the full predecessor lists, not the cheap preds lists.
+//
+// Notes:
+//    -- This must walk the predecessor list to find the block in question. If the predecessor edge
+//       is found using fgGetPredForBlock(), consider using the version that hands back the predecessor pointer
+//       address instead, to avoid this search.
+//    -- Marks fgModified = true, since the flow graph has changed.
+
+flowList* Compiler::fgSpliceOutPred(BasicBlock* block, BasicBlock* blockPred)
+{
+    assert(!fgCheapPredsValid);
+    noway_assert(block->bbPreds);
+
+    flowList* oldEdge = nullptr;
+
+    // Is this the first block in the pred list?
+    if (blockPred == block->bbPreds->flBlock)
+    {
+        oldEdge        = block->bbPreds;
+        block->bbPreds = block->bbPreds->flNext;
+    }
+    else
+    {
+        flowList* pred;
+        for (pred = block->bbPreds; (pred->flNext != nullptr) && (blockPred != pred->flNext->flBlock);
+             pred = pred->flNext)
+        {
+            // empty
+        }
+        oldEdge = pred->flNext;
+        if (oldEdge == nullptr)
+        {
+            noway_assert(!"Should always find the blockPred");
+        }
+        pred->flNext = pred->flNext->flNext;
+    }
+
+    // Any changes to the flow graph invalidate the dominator sets.
+    fgModified = true;
+
+    return oldEdge;
+}
+
+//------------------------------------------------------------------------
+// fgAddRefPred: Increment block->bbRefs by one and add "blockPred" to the predecessor list of "block".
+//
+// Arguments:
+//    block -- A block to operate on.
+//    blockPred -- The predecessor block to add to the predecessor list.
+//    oldEdge -- Optional (default: nullptr). If non-nullptr, and a new edge is created (and the dup count
+//               of an existing edge is not just incremented), the edge weights are copied from this edge.
+//    initializingPreds -- Optional (default: false). Only set to "true" when the initial preds computation is
+//    happening.
+//
+// Return Value:
+//    The flow edge representing the predecessor.
+//
+// Assumptions:
+//    -- This only works on the full predecessor lists, not the cheap preds lists.
+//
+// Notes:
+//    -- block->bbRefs is incremented by one to account for the reduction in incoming edges.
+//    -- block->bbRefs is adjusted even if preds haven't been computed. If preds haven't been computed,
+//       the preds themselves aren't touched.
+//    -- fgModified is set if a new flow edge is created (but not if an existing flow edge dup count is incremented),
+//       indicating that the flow graph shape has changed.
+
+flowList* Compiler::fgAddRefPred(BasicBlock* block,
+                                 BasicBlock* blockPred,
+                                 flowList*   oldEdge /* = nullptr */,
+                                 bool        initializingPreds /* = false */)
+{
+    assert(block != nullptr);
+    assert(blockPred != nullptr);
+
+    block->bbRefs++;
+
+    if (!fgComputePredsDone && !initializingPreds)
+    {
+        // Why is someone trying to update the preds list when the preds haven't been created?
+        // Ignore them! This can happen when fgMorph is called before the preds list is created.
+        return nullptr;
+    }
+
+    assert(!fgCheapPredsValid);
+
+    flowList* flow = fgGetPredForBlock(block, blockPred);
+
+    if (flow)
+    {
+        noway_assert(flow->flDupCount > 0);
+        flow->flDupCount++;
+    }
+    else
+    {
+        flow = new (this, CMK_FlowList) flowList();
+
+#if MEASURE_BLOCK_SIZE
+        genFlowNodeCnt += 1;
+        genFlowNodeSize += sizeof(flowList);
+#endif // MEASURE_BLOCK_SIZE
+
+        // Any changes to the flow graph invalidate the dominator sets.
+        fgModified = true;
+
+        // Keep the predecessor list in lowest to highest bbNum order
+        // This allows us to discover the loops in optFindNaturalLoops
+        //  from innermost to outermost.
+
+        // TODO-Throughput: This search is quadratic if you have many jumps
+        // to the same target.   We need to either not bother sorting for
+        // debuggable code, or sort in optFindNaturalLoops, or better, make
+        // the code in optFindNaturalLoops not depend on order.
+
+        flowList** listp = &block->bbPreds;
+        while (*listp && ((*listp)->flBlock->bbNum < blockPred->bbNum))
+        {
+            listp = &(*listp)->flNext;
+        }
+
+        flow->flNext = *listp;
+        *listp       = flow;
+
+        flow->flBlock    = blockPred;
+        flow->flDupCount = 1;
+
+        if (fgHaveValidEdgeWeights)
+        {
+            // We are creating an edge from blockPred to block
+            // and we have already computed the edge weights, so
+            // we will try to setup this new edge with valid edge weights.
+            //
+            if (oldEdge != nullptr)
+            {
+                // If our caller has given us the old edge weights
+                // then we will use them.
+                //
+                flow->flEdgeWeightMin = oldEdge->flEdgeWeightMin;
+                flow->flEdgeWeightMax = oldEdge->flEdgeWeightMax;
+            }
+            else
+            {
+                // Set the max edge weight to be the minimum of block's or blockPred's weight
+                //
+                flow->flEdgeWeightMax = min(block->bbWeight, blockPred->bbWeight);
+
+                // If we are inserting a conditional block the minimum weight is zero,
+                // otherwise it is the same as the edge's max weight.
+                if (blockPred->NumSucc() > 1)
+                {
+                    flow->flEdgeWeightMin = BB_ZERO_WEIGHT;
+                }
+                else
+                {
+                    flow->flEdgeWeightMin = flow->flEdgeWeightMax;
+                }
+            }
+        }
+        else
+        {
+            flow->flEdgeWeightMin = BB_ZERO_WEIGHT;
+            flow->flEdgeWeightMax = BB_MAX_WEIGHT;
+        }
+    }
+    return flow;
+}
+
+//------------------------------------------------------------------------
+// fgRemoveRefPred: Decrements the reference count of a predecessor edge from "blockPred" to "block",
+// removing the edge if it is no longer necessary.
+//
+// Arguments:
+//    block -- A block to operate on.
+//    blockPred -- The predecessor block to remove from the predecessor list. It must be a predecessor of "block".
+//
+// Return Value:
+//    If the flow edge was removed (the predecessor has a "dup count" of 1),
+//        returns the flow graph edge that was removed. This means "blockPred" is no longer a predecessor of "block".
+//    Otherwise, returns nullptr. This means that "blockPred" is still a predecessor of "block" (because "blockPred"
+//        is a switch with multiple cases jumping to "block", or a BBJ_COND with both conditional and fall-through
+//        paths leading to "block").
+//
+// Assumptions:
+//    -- "blockPred" must be a predecessor block of "block".
+//    -- This only works on the full predecessor lists, not the cheap preds lists.
+//
+// Notes:
+//    -- block->bbRefs is decremented by one to account for the reduction in incoming edges.
+//    -- block->bbRefs is adjusted even if preds haven't been computed. If preds haven't been computed,
+//       the preds themselves aren't touched.
+//    -- fgModified is set if a flow edge is removed (but not if an existing flow edge dup count is decremented),
+//       indicating that the flow graph shape has changed.
+
+flowList* Compiler::fgRemoveRefPred(BasicBlock* block, BasicBlock* blockPred)
+{
+    noway_assert(block != nullptr);
+    noway_assert(blockPred != nullptr);
+
+    noway_assert(block->countOfInEdges() > 0);
+    block->bbRefs--;
+
+    // Do nothing if we haven't calculated the predecessor list yet.
+    // Yes, this does happen.
+    // For example the predecessor lists haven't been created yet when we do fgMorph.
+    // But fgMorph calls fgFoldConditional, which in turn calls fgRemoveRefPred.
+    if (!fgComputePredsDone)
+    {
+        return nullptr;
+    }
+
+    assert(!fgCheapPredsValid);
+
+    flowList** ptrToPred;
+    flowList*  pred = fgGetPredForBlock(block, blockPred, &ptrToPred);
+    noway_assert(pred);
+    noway_assert(pred->flDupCount > 0);
+
+    pred->flDupCount--;
+
+    if (pred->flDupCount == 0)
+    {
+        // Splice out the predecessor edge since it's no longer necessary.
+        *ptrToPred = pred->flNext;
+
+        // Any changes to the flow graph invalidate the dominator sets.
+        fgModified = true;
+
+        return pred;
+    }
+    else
+    {
+        return nullptr;
+    }
+}
+
+//------------------------------------------------------------------------
+// fgRemoveAllRefPreds: Removes a predecessor edge from one block to another, no matter what the "dup count" is.
+//
+// Arguments:
+//    block -- A block to operate on.
+//    blockPred -- The predecessor block to remove from the predecessor list. It must be a predecessor of "block".
+//
+// Return Value:
+//    Returns the flow graph edge that was removed. The dup count on the edge is no longer valid.
+//
+// Assumptions:
+//    -- "blockPred" must be a predecessor block of "block".
+//    -- This only works on the full predecessor lists, not the cheap preds lists.
+//
+// Notes:
+//    block->bbRefs is decremented to account for the reduction in incoming edges.
+
+flowList* Compiler::fgRemoveAllRefPreds(BasicBlock* block, BasicBlock* blockPred)
+{
+    assert(block != nullptr);
+    assert(blockPred != nullptr);
+    assert(fgComputePredsDone);
+    assert(!fgCheapPredsValid);
+    assert(block->countOfInEdges() > 0);
+
+    flowList** ptrToPred;
+    flowList*  pred = fgGetPredForBlock(block, blockPred, &ptrToPred);
+    assert(pred != nullptr);
+    assert(pred->flDupCount > 0);
+
+    assert(block->bbRefs >= pred->flDupCount);
+    block->bbRefs -= pred->flDupCount;
+
+    // Now splice out the predecessor edge.
+    *ptrToPred = pred->flNext;
+
+    // Any changes to the flow graph invalidate the dominator sets.
+    fgModified = true;
+
+    return pred;
+}
+
+//------------------------------------------------------------------------
+// fgRemoveAllRefPreds: Remove a predecessor edge, given the address of a pointer to it in the
+// predecessor list, no matter what the "dup count" is.
+//
+// Arguments:
+//    block -- A block with the predecessor list to operate on.
+//    ptrToPred -- The address of a pointer to the predecessor to remove.
+//
+// Return Value:
+//    The removed predecessor edge. The dup count on the edge is no longer valid.
+//
+// Assumptions:
+//    -- The predecessor edge must be in the predecessor list for "block".
+//    -- This only works on the full predecessor lists, not the cheap preds lists.
+//
+// Notes:
+//    block->bbRefs is decremented by the dup count of the predecessor edge, to account for the reduction in incoming
+//    edges.
+
+flowList* Compiler::fgRemoveAllRefPreds(BasicBlock* block, flowList** ptrToPred)
+{
+    assert(block != nullptr);
+    assert(ptrToPred != nullptr);
+    assert(fgComputePredsDone);
+    assert(!fgCheapPredsValid);
+    assert(block->countOfInEdges() > 0);
+
+    flowList* pred = *ptrToPred;
+    assert(pred != nullptr);
+    assert(pred->flDupCount > 0);
+
+    assert(block->bbRefs >= pred->flDupCount);
+    block->bbRefs -= pred->flDupCount;
+
+    // Now splice out the predecessor edge.
+    *ptrToPred = pred->flNext;
+
+    // Any changes to the flow graph invalidate the dominator sets.
+    fgModified = true;
+
+    return pred;
+}
+
+/*
+    Removes all the appearances of block as predecessor of others
+*/
+
+void Compiler::fgRemoveBlockAsPred(BasicBlock* block)
+{
+    assert(!fgCheapPredsValid);
+
+    PREFIX_ASSUME(block != nullptr);
+
+    BasicBlock* bNext;
+
+    switch (block->bbJumpKind)
+    {
+        case BBJ_CALLFINALLY:
+            if (!(block->bbFlags & BBF_RETLESS_CALL))
+            {
+                assert(block->isBBCallAlwaysPair());
+
+                /* The block after the BBJ_CALLFINALLY block is not reachable */
+                bNext = block->bbNext;
+
+                /* bNext is an unreachable BBJ_ALWAYS block */
+                noway_assert(bNext->bbJumpKind == BBJ_ALWAYS);
+
+                while (bNext->countOfInEdges() > 0)
+                {
+                    fgRemoveRefPred(bNext, bNext->bbPreds->flBlock);
+                }
+            }
+
+            __fallthrough;
+
+        case BBJ_COND:
+        case BBJ_ALWAYS:
+        case BBJ_EHCATCHRET:
+
+            /* Update the predecessor list for 'block->bbJumpDest' and 'block->bbNext' */
+            fgRemoveRefPred(block->bbJumpDest, block);
+
+            if (block->bbJumpKind != BBJ_COND)
+            {
+                break;
+            }
+
+            /* If BBJ_COND fall through */
+            __fallthrough;
+
+        case BBJ_NONE:
+
+            /* Update the predecessor list for 'block->bbNext' */
+            fgRemoveRefPred(block->bbNext, block);
+            break;
+
+        case BBJ_EHFILTERRET:
+
+            block->bbJumpDest->bbRefs++; // To compensate the bbRefs-- inside fgRemoveRefPred
+            fgRemoveRefPred(block->bbJumpDest, block);
+            break;
+
+        case BBJ_EHFINALLYRET:
+        {
+            /* Remove block as the predecessor of the bbNext of all
+               BBJ_CALLFINALLY blocks calling this finally. No need
+               to look for BBJ_CALLFINALLY for fault handlers. */
+
+            unsigned  hndIndex = block->getHndIndex();
+            EHblkDsc* ehDsc    = ehGetDsc(hndIndex);
+
+            if (ehDsc->HasFinallyHandler())
+            {
+                BasicBlock* begBlk;
+                BasicBlock* endBlk;
+                ehGetCallFinallyBlockRange(hndIndex, &begBlk, &endBlk);
+
+                BasicBlock* finBeg = ehDsc->ebdHndBeg;
+
+                for (BasicBlock* bcall = begBlk; bcall != endBlk; bcall = bcall->bbNext)
+                {
+                    if ((bcall->bbFlags & BBF_REMOVED) || bcall->bbJumpKind != BBJ_CALLFINALLY ||
+                        bcall->bbJumpDest != finBeg)
+                    {
+                        continue;
+                    }
+
+                    assert(bcall->isBBCallAlwaysPair());
+                    fgRemoveRefPred(bcall->bbNext, block);
+                }
+            }
+        }
+        break;
+
+        case BBJ_THROW:
+        case BBJ_RETURN:
+            break;
+
+        case BBJ_SWITCH:
+        {
+            unsigned     jumpCnt = block->bbJumpSwt->bbsCount;
+            BasicBlock** jumpTab = block->bbJumpSwt->bbsDstTab;
+
+            do
+            {
+                fgRemoveRefPred(*jumpTab, block);
+            } while (++jumpTab, --jumpCnt);
+
+            break;
+        }
+
+        default:
+            noway_assert(!"Block doesn't have a valid bbJumpKind!!!!");
+            break;
+    }
+}
+
+/*****************************************************************************
+ * fgChangeSwitchBlock:
+ *
+ * We have a BBJ_SWITCH jump at 'oldSwitchBlock' and we want to move this
+ * switch jump over to 'newSwitchBlock'.  All of the blocks that are jumped
+ * to from jumpTab[] need to have their predecessor lists updated by removing
+ * the 'oldSwitchBlock' and adding 'newSwitchBlock'.
+ */
+
+void Compiler::fgChangeSwitchBlock(BasicBlock* oldSwitchBlock, BasicBlock* newSwitchBlock)
+{
+    noway_assert(oldSwitchBlock != nullptr);
+    noway_assert(newSwitchBlock != nullptr);
+    noway_assert(oldSwitchBlock->bbJumpKind == BBJ_SWITCH);
+
+    unsigned     jumpCnt = oldSwitchBlock->bbJumpSwt->bbsCount;
+    BasicBlock** jumpTab = oldSwitchBlock->bbJumpSwt->bbsDstTab;
+
+    unsigned i;
+
+    // Walk the switch's jump table, updating the predecessor for each branch.
+    for (i = 0; i < jumpCnt; i++)
+    {
+        BasicBlock* bJump = jumpTab[i];
+        noway_assert(bJump != nullptr);
+
+        // Note that if there are duplicate branch targets in the switch jump table,
+        // fgRemoveRefPred()/fgAddRefPred() will do the right thing: the second and
+        // subsequent duplicates will simply subtract from and add to the duplicate
+        // count (respectively).
+
+        //
+        // Remove the old edge [oldSwitchBlock => bJump]
+        //
+        fgRemoveRefPred(bJump, oldSwitchBlock);
+
+        //
+        // Create the new edge [newSwitchBlock => bJump]
+        //
+        fgAddRefPred(bJump, newSwitchBlock);
+    }
+
+    if (m_switchDescMap != nullptr)
+    {
+        SwitchUniqueSuccSet uniqueSuccSet;
+
+        // If already computed and cached the unique descriptors for the old block, let's
+        // update those for the new block.
+        if (m_switchDescMap->Lookup(oldSwitchBlock, &uniqueSuccSet))
+        {
+            m_switchDescMap->Set(newSwitchBlock, uniqueSuccSet);
+        }
+        else
+        {
+            fgInvalidateSwitchDescMapEntry(newSwitchBlock);
+        }
+        fgInvalidateSwitchDescMapEntry(oldSwitchBlock);
+    }
+}
+
+/*****************************************************************************
+ * fgReplaceSwitchJumpTarget:
+ *
+ * We have a BBJ_SWITCH at 'blockSwitch' and we want to replace all entries
+ * in the jumpTab[] such that so that jumps that previously went to
+ * 'oldTarget' now go to 'newTarget'.
+ * We also must update the predecessor lists for 'oldTarget' and 'newPred'.
+ */
+
+void Compiler::fgReplaceSwitchJumpTarget(BasicBlock* blockSwitch, BasicBlock* newTarget, BasicBlock* oldTarget)
+{
+    noway_assert(blockSwitch != nullptr);
+    noway_assert(newTarget != nullptr);
+    noway_assert(oldTarget != nullptr);
+    noway_assert(blockSwitch->bbJumpKind == BBJ_SWITCH);
+
+    // For the jump targets values that match oldTarget of our BBJ_SWITCH
+    // replace predecessor 'blockSwitch' with 'newTarget'
+    //
+
+    unsigned     jumpCnt = blockSwitch->bbJumpSwt->bbsCount;
+    BasicBlock** jumpTab = blockSwitch->bbJumpSwt->bbsDstTab;
+
+    unsigned i = 0;
+
+    // Walk the switch's jump table looking for blocks to update the preds for
+    while (i < jumpCnt)
+    {
+        if (jumpTab[i] == oldTarget) // We will update when jumpTab[i] matches
+        {
+            // Remove the old edge [oldTarget from blockSwitch]
+            //
+            fgRemoveAllRefPreds(oldTarget, blockSwitch);
+
+            //
+            // Change the jumpTab entry to branch to the new location
+            //
+            jumpTab[i] = newTarget;
+
+            //
+            // Create the new edge [newTarget from blockSwitch]
+            //
+            flowList* newEdge = fgAddRefPred(newTarget, blockSwitch);
+
+            // Now set the correct value of newEdge->flDupCount
+            // and replace any other jumps in jumpTab[] that go to oldTarget.
+            //
+            i++;
+            while (i < jumpCnt)
+            {
+                if (jumpTab[i] == oldTarget)
+                {
+                    //
+                    // We also must update this entry in the jumpTab
+                    //
+                    jumpTab[i] = newTarget;
+                    newTarget->bbRefs++;
+
+                    //
+                    // Increment the flDupCount
+                    //
+                    newEdge->flDupCount++;
+                }
+                i++; // Check the next entry in jumpTab[]
+            }
+
+            // Maintain, if necessary, the set of unique targets of "block."
+            UpdateSwitchTableTarget(blockSwitch, oldTarget, newTarget);
+
+            // Make sure the new target has the proper bits set for being a branch target.
+            newTarget->bbFlags |= BBF_HAS_LABEL | BBF_JMP_TARGET;
+
+            return; // We have replaced the jumps to oldTarget with newTarget
+        }
+        i++; // Check the next entry in jumpTab[] for a match
+    }
+    noway_assert(!"Did not find oldTarget in jumpTab[]");
+}
+
+//------------------------------------------------------------------------
+// Compiler::fgReplaceJumpTarget: For a given block, replace the target 'oldTarget' with 'newTarget'.
+//
+// Arguments:
+//    block     - the block in which a jump target will be replaced.
+//    newTarget - the new branch target of the block.
+//    oldTarget - the old branch target of the block.
+//
+// Notes:
+// 1. Only branches are changed: BBJ_ALWAYS, the non-fallthrough path of BBJ_COND, BBJ_SWITCH, etc.
+//    We ignore other block types.
+// 2. Only the first target found is updated. If there are multiple ways for a block
+//    to reach 'oldTarget' (e.g., multiple arms of a switch), only the first one found is changed.
+// 3. The predecessor lists are not changed.
+// 4. The switch table "unique successor" cache is invalidated.
+//
+// This function is most useful early, before the full predecessor lists have been computed.
+//
+void Compiler::fgReplaceJumpTarget(BasicBlock* block, BasicBlock* newTarget, BasicBlock* oldTarget)
+{
+    assert(block != nullptr);
+
+    switch (block->bbJumpKind)
+    {
+        case BBJ_CALLFINALLY:
+        case BBJ_COND:
+        case BBJ_ALWAYS:
+        case BBJ_EHCATCHRET:
+        case BBJ_EHFILTERRET:
+        case BBJ_LEAVE: // This function will be called before import, so we still have BBJ_LEAVE
+
+            if (block->bbJumpDest == oldTarget)
+            {
+                block->bbJumpDest = newTarget;
+            }
+            break;
+
+        case BBJ_NONE:
+        case BBJ_EHFINALLYRET:
+        case BBJ_THROW:
+        case BBJ_RETURN:
+            break;
+
+        case BBJ_SWITCH:
+            unsigned jumpCnt;
+            jumpCnt = block->bbJumpSwt->bbsCount;
+            BasicBlock** jumpTab;
+            jumpTab = block->bbJumpSwt->bbsDstTab;
+
+            for (unsigned i = 0; i < jumpCnt; i++)
+            {
+                if (jumpTab[i] == oldTarget)
+                {
+                    jumpTab[i] = newTarget;
+                    break;
+                }
+            }
+            break;
+
+        default:
+            assert(!"Block doesn't have a valid bbJumpKind!!!!");
+            unreached();
+            break;
+    }
+}
+
+/*****************************************************************************
+ * Updates the predecessor list for 'block' by replacing 'oldPred' with 'newPred'.
+ * Note that a block can only appear once in the preds list (for normal preds, not
+ * cheap preds): if a predecessor has multiple ways to get to this block, then
+ * flDupCount will be >1, but the block will still appear exactly once. Thus, this
+ * function assumes that all branches from the predecessor (practically, that all
+ * switch cases that target this block) are changed to branch from the new predecessor,
+ * with the same dup count.
+ *
+ * Note that the block bbRefs is not changed, since 'block' has the same number of
+ * references as before, just from a different predecessor block.
+ */
+
+void Compiler::fgReplacePred(BasicBlock* block, BasicBlock* oldPred, BasicBlock* newPred)
+{
+    noway_assert(block != nullptr);
+    noway_assert(oldPred != nullptr);
+    noway_assert(newPred != nullptr);
+    assert(!fgCheapPredsValid);
+
+    flowList* pred;
+
+    for (pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
+    {
+        if (oldPred == pred->flBlock)
+        {
+            pred->flBlock = newPred;
+            break;
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Returns true if block b1 dominates block b2.
+ */
+
+bool Compiler::fgDominate(BasicBlock* b1, BasicBlock* b2)
+{
+    noway_assert(fgDomsComputed);
+    assert(!fgCheapPredsValid);
+
+    //
+    // If the fgModified flag is false then we made some modifications to
+    // the flow graph, like adding a new block or changing a conditional branch
+    // into an unconditional branch.
+    //
+    // We can continue to use the dominator and reachable information to
+    // unmark loops as long as we haven't renumbered the blocks or we aren't
+    // asking for information about a new block
+    //
+
+    if (b2->bbNum > fgDomBBcount)
+    {
+        if (b1 == b2)
+        {
+            return true;
+        }
+
+        for (flowList* pred = b2->bbPreds; pred != nullptr; pred = pred->flNext)
+        {
+            if (!fgDominate(b1, pred->flBlock))
+            {
+                return false;
+            }
+        }
+
+        return b2->bbPreds != nullptr;
+    }
+
+    if (b1->bbNum > fgDomBBcount)
+    {
+        // if b1 is a loop preheader and Succ is its only successor, then all predecessors of
+        // Succ either are b1 itself or are dominated by Succ. Under these conditions, b1
+        // dominates b2 if and only if Succ dominates b2 (or if b2 == b1, but we already tested
+        // for this case)
+        if (b1->bbFlags & BBF_LOOP_PREHEADER)
+        {
+            noway_assert(b1->bbFlags & BBF_INTERNAL);
+            noway_assert(b1->bbJumpKind == BBJ_NONE);
+            return fgDominate(b1->bbNext, b2);
+        }
+
+        // unknown dominators; err on the safe side and return false
+        return false;
+    }
+
+    /* Check if b1 dominates b2 */
+    unsigned numA = b1->bbNum;
+    noway_assert(numA <= fgDomBBcount);
+    unsigned numB = b2->bbNum;
+    noway_assert(numB <= fgDomBBcount);
+
+    // What we want to ask here is basically if A is in the middle of the path from B to the root (the entry node)
+    // in the dominator tree. Turns out that can be translated as:
+    //
+    //   A dom B <-> preorder(A) <= preorder(B) && postorder(A) >= postorder(B)
+    //
+    // where the equality holds when you ask if A dominates itself.
+    bool treeDom =
+        fgDomTreePreOrder[numA] <= fgDomTreePreOrder[numB] && fgDomTreePostOrder[numA] >= fgDomTreePostOrder[numB];
+
+    return treeDom;
+}
+
+/*****************************************************************************
+ *
+ *  Returns true if block b1 can reach block b2.
+ */
+
+bool Compiler::fgReachable(BasicBlock* b1, BasicBlock* b2)
+{
+    noway_assert(fgDomsComputed);
+    assert(!fgCheapPredsValid);
+
+    //
+    // If the fgModified flag is false then we made some modifications to
+    // the flow graph, like adding a new block or changing a conditional branch
+    // into an unconditional branch.
+    //
+    // We can continue to use the dominator and reachable information to
+    // unmark loops as long as we haven't renumbered the blocks or we aren't
+    // asking for information about a new block
+    //
+
+    if (b2->bbNum > fgDomBBcount)
+    {
+        if (b1 == b2)
+        {
+            return true;
+        }
+
+        for (flowList* pred = b2->bbPreds; pred != nullptr; pred = pred->flNext)
+        {
+            if (fgReachable(b1, pred->flBlock))
+            {
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+    if (b1->bbNum > fgDomBBcount)
+    {
+        noway_assert(b1->bbJumpKind == BBJ_NONE || b1->bbJumpKind == BBJ_ALWAYS || b1->bbJumpKind == BBJ_COND);
+
+        if (b1->bbFallsThrough() && fgReachable(b1->bbNext, b2))
+        {
+            return true;
+        }
+
+        if (b1->bbJumpKind == BBJ_ALWAYS || b1->bbJumpKind == BBJ_COND)
+        {
+            return fgReachable(b1->bbJumpDest, b2);
+        }
+
+        return false;
+    }
+
+    /* Check if b1 can reach b2 */
+    assert(fgReachabilitySetsValid);
+    assert(BasicBlockBitSetTraits::GetSize(this) == fgDomBBcount + 1);
+    return BlockSetOps::IsMember(this, b2->bbReach, b1->bbNum);
+}
+
+/*****************************************************************************
+ *  Update changed flow graph information.
+ *
+ *  If the flow graph has changed, we need to recompute various information if we want to use
+ *  it again.
+ */
+
+void Compiler::fgUpdateChangedFlowGraph()
+{
+    // We need to clear this so we don't hit an assert calling fgRenumberBlocks().
+    fgDomsComputed = false;
+
+    JITDUMP("\nRenumbering the basic blocks for fgUpdateChangeFlowGraph\n");
+    fgRenumberBlocks();
+
+    fgComputePreds();
+    fgComputeEnterBlocksSet();
+    fgComputeReachabilitySets();
+    fgComputeDoms();
+}
+
+/*****************************************************************************
+ *  Compute the bbReach sets.
+ *
+ *  This can be called to recompute the bbReach sets after the flow graph changes, such as when the
+ *  number of BasicBlocks change (and thus, the BlockSet epoch changes).
+ *
+ *  Finally, this also sets the BBF_GC_SAFE_POINT flag on blocks.
+ *
+ *  Assumes the predecessor lists are correct.
+ *
+ *  TODO-Throughput: This algorithm consumes O(n^2) because we're using dense bitsets to
+ *  represent reachability. While this yields O(1) time queries, it bloats the memory usage
+ *  for large code.  We can do better if we try to approach reachability by
+ *  computing the strongly connected components of the flow graph.  That way we only need
+ *  linear memory to label every block with its SCC.
+ */
+
+void Compiler::fgComputeReachabilitySets()
+{
+    assert(fgComputePredsDone);
+    assert(!fgCheapPredsValid);
+
+#ifdef DEBUG
+    fgReachabilitySetsValid = false;
+#endif // DEBUG
+
+    BasicBlock* block;
+
+    for (block = fgFirstBB; block != nullptr; block = block->bbNext)
+    {
+        // Initialize the per-block bbReach sets. (Note that we can't just call BlockSetOps::ClearD()
+        // when re-running this computation, because if the epoch changes, the size and representation of the
+        // sets might change).
+        block->bbReach = BlockSetOps::MakeEmpty(this);
+
+        /* Mark block as reaching itself */
+        BlockSetOps::AddElemD(this, block->bbReach, block->bbNum);
+    }
+
+    /* Find the reachable blocks */
+    // Also, set BBF_GC_SAFE_POINT.
+
+    bool     change;
+    BlockSet BLOCKSET_INIT_NOCOPY(newReach, BlockSetOps::MakeEmpty(this));
+    do
+    {
+        change = false;
+
+        for (block = fgFirstBB; block != nullptr; block = block->bbNext)
+        {
+            BlockSetOps::Assign(this, newReach, block->bbReach);
+
+            bool predGcSafe = (block->bbPreds != nullptr); // Do all of our predecessor blocks have a GC safe bit?
+
+            for (flowList* pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
+            {
+                BasicBlock* predBlock = pred->flBlock;
+
+                /* Union the predecessor's reachability set into newReach */
+                BlockSetOps::UnionD(this, newReach, predBlock->bbReach);
+
+                if (!(predBlock->bbFlags & BBF_GC_SAFE_POINT))
+                {
+                    predGcSafe = false;
+                }
+            }
+
+            if (predGcSafe)
+            {
+                block->bbFlags |= BBF_GC_SAFE_POINT;
+            }
+
+            if (!BlockSetOps::Equal(this, newReach, block->bbReach))
+            {
+                BlockSetOps::Assign(this, block->bbReach, newReach);
+                change = true;
+            }
+        }
+    } while (change);
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nAfter computing reachability sets:\n");
+        fgDispReach();
+    }
+
+    fgReachabilitySetsValid = true;
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *  Compute the entry blocks set.
+ *
+ *  Initialize fgEnterBlks to the set of blocks for which we don't have explicit control
+ *  flow edges. These are the entry basic block and each of the EH handler blocks.
+ *  For ARM, also include the BBJ_ALWAYS block of a BBJ_CALLFINALLY/BBJ_ALWAYS pair,
+ *  to avoid creating "retless" calls, since we need the BBJ_ALWAYS for the purpose
+ *  of unwinding, even if the call doesn't return (due to an explicit throw, for example).
+ */
+
+void Compiler::fgComputeEnterBlocksSet()
+{
+#ifdef DEBUG
+    fgEnterBlksSetValid = false;
+#endif // DEBUG
+
+    fgEnterBlks = BlockSetOps::MakeEmpty(this);
+
+    /* Now set the entry basic block */
+    BlockSetOps::AddElemD(this, fgEnterBlks, fgFirstBB->bbNum);
+    assert(fgFirstBB->bbNum == 1);
+
+    if (compHndBBtabCount > 0)
+    {
+        /* Also 'or' in the handler basic blocks */
+        EHblkDsc* HBtab;
+        EHblkDsc* HBtabEnd;
+        for (HBtab = compHndBBtab, HBtabEnd = compHndBBtab + compHndBBtabCount; HBtab < HBtabEnd; HBtab++)
+        {
+            if (HBtab->HasFilter())
+            {
+                BlockSetOps::AddElemD(this, fgEnterBlks, HBtab->ebdFilter->bbNum);
+            }
+            BlockSetOps::AddElemD(this, fgEnterBlks, HBtab->ebdHndBeg->bbNum);
+        }
+    }
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+    // TODO-ARM-Cleanup: The ARM code here to prevent creating retless calls by adding the BBJ_ALWAYS
+    // to the enter blocks is a bit of a compromise, because sometimes the blocks are already reachable,
+    // and it messes up DFS ordering to have them marked as enter block. We should prevent the
+    // creation of retless calls some other way.
+    for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+    {
+        if (block->bbJumpKind == BBJ_CALLFINALLY)
+        {
+            assert(block->isBBCallAlwaysPair());
+
+            // Don't remove the BBJ_ALWAYS block that is only here for the unwinder. It might be dead
+            // if the finally is no-return, so mark it as an entry point.
+            BlockSetOps::AddElemD(this, fgEnterBlks, block->bbNext->bbNum);
+        }
+    }
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("Enter blocks: ");
+        BLOCKSET_ITER_INIT(this, iter, fgEnterBlks, bbNum);
+        while (iter.NextElem(this, &bbNum))
+        {
+            printf("BB%02u ", bbNum);
+        }
+        printf("\n");
+    }
+#endif // DEBUG
+
+#ifdef DEBUG
+    fgEnterBlksSetValid = true;
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *  Remove unreachable blocks.
+ *
+ *  Return true if any unreachable blocks were removed.
+ */
+
+bool Compiler::fgRemoveUnreachableBlocks()
+{
+    assert(!fgCheapPredsValid);
+    assert(fgReachabilitySetsValid);
+
+    bool        hasLoops             = false;
+    bool        hasUnreachableBlocks = false;
+    BasicBlock* block;
+
+    /* Record unreachable blocks */
+    for (block = fgFirstBB; block != nullptr; block = block->bbNext)
+    {
+        /* Internal throw blocks are also reachable */
+        if (fgIsThrowHlpBlk(block))
+        {
+            goto SKIP_BLOCK;
+        }
+        else if (block == genReturnBB)
+        {
+            // Don't remove statements for the genReturnBB block, as we might have special hookups there.
+            // For example, <BUGNUM> in VSW 364383, </BUGNUM>
+            // the profiler hookup needs to have the "void GT_RETURN" statement
+            // to properly set the info.compProfilerCallback flag.
+            goto SKIP_BLOCK;
+        }
+        else
+        {
+            // If any of the entry blocks can reach this block, then we skip it.
+            if (!BlockSetOps::IsEmptyIntersection(this, fgEnterBlks, block->bbReach))
+            {
+                goto SKIP_BLOCK;
+            }
+        }
+
+        // Remove all the code for the block
+        fgUnreachableBlock(block);
+
+        // Make sure that the block was marked as removed */
+        noway_assert(block->bbFlags & BBF_REMOVED);
+
+        // Some blocks mark the end of trys and catches
+        // and can't be removed. We convert these into
+        // empty blocks of type BBJ_THROW
+
+        if (block->bbFlags & BBF_DONT_REMOVE)
+        {
+            bool bIsBBCallAlwaysPair = block->isBBCallAlwaysPair();
+
+            /* Unmark the block as removed, */
+            /* clear BBF_INTERNAL as well and set BBJ_IMPORTED */
+
+            block->bbFlags &= ~(BBF_REMOVED | BBF_INTERNAL | BBF_NEEDS_GCPOLL);
+            block->bbFlags |= BBF_IMPORTED;
+            block->bbJumpKind = BBJ_THROW;
+            block->bbSetRunRarely();
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+            // If this is a <BBJ_CALLFINALLY, BBJ_ALWAYS> pair, we have to clear BBF_FINALLY_TARGET flag on
+            // the target node (of BBJ_ALWAYS) since BBJ_CALLFINALLY node is getting converted to a BBJ_THROW.
+            if (bIsBBCallAlwaysPair)
+            {
+                noway_assert(block->bbNext->bbJumpKind == BBJ_ALWAYS);
+                fgClearFinallyTargetBit(block->bbNext->bbJumpDest);
+            }
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+        }
+        else
+        {
+            /* We have to call fgRemoveBlock next */
+            hasUnreachableBlocks = true;
+        }
+        continue;
+
+    SKIP_BLOCK:;
+
+        // if (block->isRunRarely())
+        //    continue;
+        if (block->bbJumpKind == BBJ_RETURN)
+        {
+            continue;
+        }
+
+        /* Set BBF_LOOP_HEAD if we have backwards branches to this block */
+
+        unsigned blockNum = block->bbNum;
+        for (flowList* pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
+        {
+            BasicBlock* predBlock = pred->flBlock;
+            if (blockNum <= predBlock->bbNum)
+            {
+                if (predBlock->bbJumpKind == BBJ_CALLFINALLY)
+                {
+                    continue;
+                }
+
+                /* If block can reach predBlock then we have a loop head */
+                if (BlockSetOps::IsMember(this, predBlock->bbReach, blockNum))
+                {
+                    hasLoops = true;
+
+                    /* Set the BBF_LOOP_HEAD flag */
+                    block->bbFlags |= BBF_LOOP_HEAD;
+                    break;
+                }
+            }
+        }
+    }
+
+    fgHasLoops = hasLoops;
+
+    if (hasUnreachableBlocks)
+    {
+        // Now remove the unreachable blocks
+        for (block = fgFirstBB; block != nullptr; block = block->bbNext)
+        {
+            //  If we mark the block with BBF_REMOVED then
+            //  we need to call fgRemovedBlock() on it
+
+            if (block->bbFlags & BBF_REMOVED)
+            {
+                fgRemoveBlock(block, true);
+
+                // When we have a BBJ_CALLFINALLY, BBJ_ALWAYS pair; fgRemoveBlock will remove
+                // both blocks, so we must advance 1 extra place in the block list
+                //
+                if (block->isBBCallAlwaysPair())
+                {
+                    block = block->bbNext;
+                }
+            }
+        }
+    }
+
+    return hasUnreachableBlocks;
+}
+
+/*****************************************************************************
+ *
+ *  Function called to compute the dominator and reachable sets.
+ *
+ *  Assumes the predecessor lists are computed and correct.
+ */
+
+void Compiler::fgComputeReachability()
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In fgComputeReachability\n");
+    }
+
+    fgVerifyHandlerTab();
+
+    // Make sure that the predecessor lists are accurate
+    assert(fgComputePredsDone);
+    fgDebugCheckBBlist();
+#endif // DEBUG
+
+    /* Create a list of all BBJ_RETURN blocks. The head of the list is 'fgReturnBlocks'. */
+    fgReturnBlocks = nullptr;
+
+    for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+    {
+        // If this is a BBJ_RETURN block, add it to our list of all BBJ_RETURN blocks. This list is only
+        // used to find return blocks.
+        if (block->bbJumpKind == BBJ_RETURN)
+        {
+            fgReturnBlocks = new (this, CMK_Reachability) BasicBlockList(block, fgReturnBlocks);
+        }
+    }
+
+    // Compute reachability and then delete blocks determined to be unreachable. If we delete blocks, we
+    // need to loop, as that might have caused more blocks to become unreachable. This can happen in the
+    // case where a call to a finally is unreachable and deleted (maybe the call to the finally is
+    // preceded by a throw or an infinite loop), making the blocks following the finally unreachable.
+    // However, all EH entry blocks are considered global entry blocks, causing the blocks following the
+    // call to the finally to stay rooted, until a second round of reachability is done.
+    // The dominator algorithm expects that all blocks can be reached from the fgEnterBlks set.
+    unsigned passNum = 1;
+    bool     changed;
+    do
+    {
+        // Just to be paranoid, avoid infinite loops; fall back to minopts.
+        if (passNum > 10)
+        {
+            noway_assert(!"Too many unreachable block removal loops");
+        }
+
+        /* Walk the flow graph, reassign block numbers to keep them in ascending order */
+        JITDUMP("\nRenumbering the basic blocks for fgComputeReachability pass #%u\n", passNum);
+        passNum++;
+        fgRenumberBlocks();
+
+        //
+        // Compute fgEnterBlks
+        //
+
+        fgComputeEnterBlocksSet();
+
+        //
+        // Compute bbReach
+        //
+
+        fgComputeReachabilitySets();
+
+        //
+        // Use reachability information to delete unreachable blocks.
+        // Also, determine if the flow graph has loops and set 'fgHasLoops' accordingly.
+        // Set the BBF_LOOP_HEAD flag on the block target of backwards branches.
+        //
+
+        changed = fgRemoveUnreachableBlocks();
+
+    } while (changed);
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nAfter computing reachability:\n");
+        fgDispBasicBlocks(verboseTrees);
+        printf("\n");
+    }
+
+    fgVerifyHandlerTab();
+    fgDebugCheckBBlist(true);
+#endif // DEBUG
+
+    //
+    // Now, compute the dominators
+    //
+
+    fgComputeDoms();
+}
+
+/** In order to be able to compute dominance, we need to first get a DFS reverse post order sort on the basic flow graph
+  * for the dominance algorithm to operate correctly.  The reason why we need the DFS sort is because
+  * we will build the dominance sets using the partial order induced by the DFS sorting.  With this
+  * precondition not holding true, the algorithm doesn't work properly.
+  */
+void Compiler::fgDfsInvPostOrder()
+{
+    // NOTE: This algorithm only pays attention to the actual blocks. It ignores the imaginary entry block.
+
+    // visited   :  Once we run the DFS post order sort recursive algorithm, we mark the nodes we visited to avoid
+    //              backtracking.
+    BlockSet BLOCKSET_INIT_NOCOPY(visited, BlockSetOps::MakeEmpty(this));
+
+    // We begin by figuring out which basic blocks don't have incoming edges and mark them as
+    // start nodes.  Later on we run the recursive algorithm for each node that we
+    // mark in this step.
+    BlockSet_ValRet_T startNodes = fgDomFindStartNodes();
+
+    // Make sure fgEnterBlks are still there in startNodes, even if they participate in a loop (i.e., there is
+    // an incoming edge into the block).
+    assert(fgEnterBlksSetValid);
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+    //
+    //    BlockSetOps::UnionD(this, startNodes, fgEnterBlks);
+    //
+    // This causes problems on ARM, because we for BBJ_CALLFINALLY/BBJ_ALWAYS pairs, we add the BBJ_ALWAYS
+    // to the enter blocks set to prevent flow graph optimizations from removing it and creating retless call finallies
+    // (BBF_RETLESS_CALL). This leads to an incorrect DFS ordering in some cases, because we start the recursive walk
+    // from the BBJ_ALWAYS, which is reachable from other blocks. A better solution would be to change ARM to avoid
+    // creating retless calls in a different way, not by adding BBJ_ALWAYS to fgEnterBlks.
+    //
+    // So, let us make sure at least fgFirstBB is still there, even if it participates in a loop.
+    BlockSetOps::AddElemD(this, startNodes, 1);
+    assert(fgFirstBB->bbNum == 1);
+#else
+    BlockSetOps::UnionD(this, startNodes, fgEnterBlks);
+#endif
+
+    assert(BlockSetOps::IsMember(this, startNodes, fgFirstBB->bbNum));
+
+    // Call the flowgraph DFS traversal helper.
+    unsigned postIndex = 1;
+    for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+    {
+        // If the block has no predecessors, and we haven't already visited it (because it's in fgEnterBlks but also
+        // reachable from the first block), go ahead and traverse starting from this block.
+        if (BlockSetOps::IsMember(this, startNodes, block->bbNum) &&
+            !BlockSetOps::IsMember(this, visited, block->bbNum))
+        {
+            fgDfsInvPostOrderHelper(block, visited, &postIndex);
+        }
+    }
+
+    // After the DFS reverse postorder is completed, we must have visited all the basic blocks.
+    noway_assert(postIndex == fgBBcount + 1);
+    noway_assert(fgBBNumMax == fgBBcount);
+
+#ifdef DEBUG
+    if (0 && verbose)
+    {
+        printf("\nAfter doing a post order traversal of the BB graph, this is the ordering:\n");
+        for (unsigned i = 1; i <= fgBBNumMax; ++i)
+        {
+            printf("%02u -> BB%02u\n", i, fgBBInvPostOrder[i]->bbNum);
+        }
+        printf("\n");
+    }
+#endif // DEBUG
+}
+
+BlockSet_ValRet_T Compiler::fgDomFindStartNodes()
+{
+    unsigned    j;
+    BasicBlock* block;
+
+    // startNodes ::  A set that represents which basic blocks in the flow graph don't have incoming edges.
+    // We begin assuming everything is a start block and remove any block that is being referenced by another in its
+    // successor list.
+
+    BlockSet BLOCKSET_INIT_NOCOPY(startNodes, BlockSetOps::MakeFull(this));
+
+    for (block = fgFirstBB; block != nullptr; block = block->bbNext)
+    {
+        unsigned cSucc = block->NumSucc(this);
+        for (j = 0; j < cSucc; ++j)
+        {
+            BasicBlock* succ = block->GetSucc(j, this);
+            BlockSetOps::RemoveElemD(this, startNodes, succ->bbNum);
+        }
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nDominator computation start blocks (those blocks with no incoming edges):\n");
+        BLOCKSET_ITER_INIT(this, iter, startNodes, bbNum);
+        while (iter.NextElem(this, &bbNum))
+        {
+            printf("BB%02u ", bbNum);
+        }
+        printf("\n");
+    }
+#endif // DEBUG
+
+    return startNodes;
+}
+
+//------------------------------------------------------------------------
+// fgDfsInvPostOrderHelper: Helper to assign post-order numbers to blocks.
+//
+// Arguments:
+//    block   - The starting entry block
+//    visited - The set of visited blocks
+//    count   - Pointer to the Dfs counter
+//
+// Notes:
+//    Compute a non-recursive DFS traversal of the flow graph using an
+//    evaluation stack to assign post-order numbers.
+
+void Compiler::fgDfsInvPostOrderHelper(BasicBlock* block, BlockSet& visited, unsigned* count)
+{
+    // Assume we haven't visited this node yet (callers ensure this).
+    assert(!BlockSetOps::IsMember(this, visited, block->bbNum));
+
+    // Allocate a local stack to hold the DFS traversal actions necessary
+    // to compute pre/post-ordering of the control flowgraph.
+    ArrayStack<DfsBlockEntry> stack(this);
+
+    // Push the first block on the stack to seed the traversal.
+    stack.Push(DfsBlockEntry(DSS_Pre, block));
+    // Flag the node we just visited to avoid backtracking.
+    BlockSetOps::AddElemD(this, visited, block->bbNum);
+
+    // The search is terminated once all the actions have been processed.
+    while (stack.Height() != 0)
+    {
+        DfsBlockEntry current      = stack.Pop();
+        BasicBlock*   currentBlock = current.dfsBlock;
+
+        if (current.dfsStackState == DSS_Pre)
+        {
+            // This is a pre-visit that corresponds to the first time the
+            // node is encountered in the spanning tree and receives pre-order
+            // numberings. By pushing the post-action on the stack here we
+            // are guaranteed to only process it after all of its successors
+            // pre and post actions are processed.
+            stack.Push(DfsBlockEntry(DSS_Post, currentBlock));
+
+            unsigned cSucc = currentBlock->NumSucc(this);
+            for (unsigned j = 0; j < cSucc; ++j)
+            {
+                BasicBlock* succ = currentBlock->GetSucc(j, this);
+
+                // If this is a node we haven't seen before, go ahead and process
+                if (!BlockSetOps::IsMember(this, visited, succ->bbNum))
+                {
+                    // Push a pre-visit action for this successor onto the stack and
+                    // mark it as visited in case this block has multiple successors
+                    // to the same node (multi-graph).
+                    stack.Push(DfsBlockEntry(DSS_Pre, succ));
+                    BlockSetOps::AddElemD(this, visited, succ->bbNum);
+                }
+            }
+        }
+        else
+        {
+            // This is a post-visit that corresponds to the last time the
+            // node is visited in the spanning tree and only happens after
+            // all descendents in the spanning tree have had pre and post
+            // actions applied.
+
+            assert(current.dfsStackState == DSS_Post);
+
+            unsigned invCount = fgBBcount - *count + 1;
+            assert(1 <= invCount && invCount <= fgBBNumMax);
+            fgBBInvPostOrder[invCount] = currentBlock;
+            currentBlock->bbDfsNum     = invCount;
+            ++(*count);
+        }
+    }
+}
+
+void Compiler::fgComputeDoms()
+{
+    assert(!fgCheapPredsValid);
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In fgComputeDoms\n");
+    }
+
+    fgVerifyHandlerTab();
+
+    // Make sure that the predecessor lists are accurate.
+    // Also check that the blocks are properly, densely numbered (so calling fgRenumberBlocks is not necessary).
+    fgDebugCheckBBlist(true);
+
+    // Assert things related to the BlockSet epoch.
+    assert(fgBBcount == fgBBNumMax);
+    assert(BasicBlockBitSetTraits::GetSize(this) == fgBBNumMax + 1);
+#endif // DEBUG
+
+    BlockSet BLOCKSET_INIT_NOCOPY(processedBlks, BlockSetOps::MakeEmpty(this));
+
+    fgBBInvPostOrder = new (this, CMK_DominatorMemory) BasicBlock*[fgBBNumMax + 1];
+    memset(fgBBInvPostOrder, 0, sizeof(BasicBlock*) * (fgBBNumMax + 1));
+
+    fgDfsInvPostOrder();
+    noway_assert(fgBBInvPostOrder[0] == nullptr);
+
+    // flRoot and bbRoot represent an imaginary unique entry point in the flow graph.
+    // All the orphaned EH blocks and fgFirstBB will temporarily have its predecessors list
+    // (with bbRoot as the only basic block in it) set as flRoot.
+    // Later on, we clear their predecessors and let them to be nullptr again.
+    // Since we number basic blocks starting at one, the imaginary entry block is conveniently numbered as zero.
+    flowList   flRoot;
+    BasicBlock bbRoot;
+
+    bbRoot.bbPreds  = nullptr;
+    bbRoot.bbNum    = 0;
+    bbRoot.bbIDom   = &bbRoot;
+    bbRoot.bbDfsNum = 0;
+    flRoot.flNext   = nullptr;
+    flRoot.flBlock  = &bbRoot;
+
+    fgBBInvPostOrder[0] = &bbRoot;
+
+    // Mark both bbRoot and fgFirstBB processed
+    BlockSetOps::AddElemD(this, processedBlks, 0); // bbRoot    == block #0
+    BlockSetOps::AddElemD(this, processedBlks, 1); // fgFirstBB == block #1
+    assert(fgFirstBB->bbNum == 1);
+
+    // Special case fgFirstBB to say its IDom is bbRoot.
+    fgFirstBB->bbIDom = &bbRoot;
+
+    BasicBlock* block = nullptr;
+
+    for (block = fgFirstBB->bbNext; block != nullptr; block = block->bbNext)
+    {
+        // If any basic block has no predecessors then we flag it as processed and temporarily
+        // mark its precedessor list to be flRoot.  This makes the flowgraph connected,
+        // a precondition that is needed by the dominance algorithm to operate properly.
+        if (block->bbPreds == nullptr)
+        {
+            block->bbPreds = &flRoot;
+            block->bbIDom  = &bbRoot;
+            BlockSetOps::AddElemD(this, processedBlks, block->bbNum);
+        }
+        else
+        {
+            block->bbIDom = nullptr;
+        }
+    }
+
+    // Mark the EH blocks as entry blocks and also flag them as processed.
+    if (compHndBBtabCount > 0)
+    {
+        EHblkDsc* HBtab;
+        EHblkDsc* HBtabEnd;
+        for (HBtab = compHndBBtab, HBtabEnd = compHndBBtab + compHndBBtabCount; HBtab < HBtabEnd; HBtab++)
+        {
+            if (HBtab->HasFilter())
+            {
+                HBtab->ebdFilter->bbIDom = &bbRoot;
+                BlockSetOps::AddElemD(this, processedBlks, HBtab->ebdFilter->bbNum);
+            }
+            HBtab->ebdHndBeg->bbIDom = &bbRoot;
+            BlockSetOps::AddElemD(this, processedBlks, HBtab->ebdHndBeg->bbNum);
+        }
+    }
+
+    // Now proceed to compute the immediate dominators for each basic block.
+    bool changed = true;
+    while (changed)
+    {
+        changed = false;
+        for (unsigned i = 1; i <= fgBBNumMax;
+             ++i) // Process each actual block; don't process the imaginary predecessor block.
+        {
+            flowList*   first   = nullptr;
+            BasicBlock* newidom = nullptr;
+            block               = fgBBInvPostOrder[i];
+
+            // If we have a block that has bbRoot as its bbIDom
+            // it means we flag it as processed and as an entry block so
+            // in this case we're all set.
+            if (block->bbIDom == &bbRoot)
+            {
+                continue;
+            }
+
+            // Pick up the first processed predecesor of the current block.
+            for (first = block->bbPreds; first != nullptr; first = first->flNext)
+            {
+                if (BlockSetOps::IsMember(this, processedBlks, first->flBlock->bbNum))
+                {
+                    break;
+                }
+            }
+            noway_assert(first != nullptr);
+
+            // We assume the first processed predecessor will be the
+            // immediate dominator and then compute the forward flow analysis.
+            newidom = first->flBlock;
+            for (flowList* p = block->bbPreds; p != nullptr; p = p->flNext)
+            {
+                if (p->flBlock == first->flBlock)
+                {
+                    continue;
+                }
+                if (p->flBlock->bbIDom != nullptr)
+                {
+                    // fgIntersectDom is basically the set intersection between
+                    // the dominance sets of the new IDom and the current predecessor
+                    // Since the nodes are ordered in DFS inverse post order and
+                    // IDom induces a tree, fgIntersectDom actually computes
+                    // the lowest common ancestor in the dominator tree.
+                    newidom = fgIntersectDom(p->flBlock, newidom);
+                }
+            }
+
+            // If the Immediate dominator changed, assign the new one
+            // to the current working basic block.
+            if (block->bbIDom != newidom)
+            {
+                noway_assert(newidom != nullptr);
+                block->bbIDom = newidom;
+                changed       = true;
+            }
+            BlockSetOps::AddElemD(this, processedBlks, block->bbNum);
+        }
+    }
+
+    // As stated before, once we have computed immediate dominance we need to clear
+    // all the basic blocks whose predecessor list was set to flRoot.  This
+    // reverts that and leaves the blocks the same as before.
+    for (block = fgFirstBB; block != nullptr; block = block->bbNext)
+    {
+        if (block->bbPreds == &flRoot)
+        {
+            block->bbPreds = nullptr;
+        }
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        fgDispDoms();
+    }
+#endif
+
+    fgBuildDomTree();
+
+    fgModified   = false;
+    fgDomBBcount = fgBBcount;
+    assert(fgBBcount == fgBBNumMax);
+    assert(BasicBlockBitSetTraits::GetSize(this) == fgDomBBcount + 1);
+
+    fgDomsComputed = true;
+}
+
+void Compiler::fgBuildDomTree()
+{
+    unsigned    i;
+    BasicBlock* block;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nInside fgBuildDomTree\n");
+    }
+#endif // DEBUG
+
+    // domTree :: The dominance tree represented using adjacency lists. We use BasicBlockList to represent edges.
+    // Indexed by basic block number.
+    unsigned         bbArraySize = fgBBNumMax + 1;
+    BasicBlockList** domTree     = new (this, CMK_DominatorMemory) BasicBlockList*[bbArraySize];
+
+    fgDomTreePreOrder  = new (this, CMK_DominatorMemory) unsigned[bbArraySize];
+    fgDomTreePostOrder = new (this, CMK_DominatorMemory) unsigned[bbArraySize];
+
+    // Initialize all the data structures.
+    for (i = 0; i < bbArraySize; ++i)
+    {
+        domTree[i]           = nullptr;
+        fgDomTreePreOrder[i] = fgDomTreePostOrder[i] = 0;
+    }
+
+    // Build the dominance tree.
+    for (block = fgFirstBB; block != nullptr; block = block->bbNext)
+    {
+        // If the immediate dominator is not the imaginary root (bbRoot)
+        // we proceed to append this block to the children of the dominator node.
+        if (block->bbIDom->bbNum != 0)
+        {
+            int bbNum      = block->bbIDom->bbNum;
+            domTree[bbNum] = new (this, CMK_DominatorMemory) BasicBlockList(block, domTree[bbNum]);
+        }
+        else
+        {
+            // This means this block had bbRoot set as its IDom.  We clear it out
+            // and convert the tree back to a forest.
+            block->bbIDom = nullptr;
+        }
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nAfter computing the Dominance Tree:\n");
+        fgDispDomTree(domTree);
+    }
+#endif // DEBUG
+
+    // Get the bitset that represents the roots of the dominance tree.
+    // Something to note here is that the dominance tree has been converted from a forest to a tree
+    // by using the bbRoot trick on fgComputeDoms. The reason we have a forest instead of a real tree
+    // is because we treat the EH blocks as entry nodes so the real dominance tree is not necessarily connected.
+    BlockSet_ValRet_T domTreeEntryNodes = fgDomTreeEntryNodes(domTree);
+
+    // The preorder and postorder numbers.
+    // We start from 1 to match the bbNum ordering.
+    unsigned preNum  = 1;
+    unsigned postNum = 1;
+
+    // There will be nodes in the dominance tree that will not be reachable:
+    // the catch blocks that return since they don't have any predecessor.
+    // For that matter we'll keep track of how many nodes we can
+    // reach and assert at the end that we visited all of them.
+    unsigned domTreeReachable = fgBBcount;
+
+    // Once we have the dominance tree computed, we need to traverse it
+    // to get the preorder and postorder numbers for each node.  The purpose of
+    // this is to achieve O(1) queries for of the form A dominates B.
+    for (i = 1; i <= fgBBNumMax; ++i)
+    {
+        if (BlockSetOps::IsMember(this, domTreeEntryNodes, i))
+        {
+            if (domTree[i] == nullptr)
+            {
+                // If this is an entry node but there's no children on this
+                // node, it means it's unreachable so we decrement the reachable
+                // counter.
+                --domTreeReachable;
+            }
+            else
+            {
+                // Otherwise, we do a DFS traversal of the dominator tree.
+                fgTraverseDomTree(i, domTree, &preNum, &postNum);
+            }
+        }
+    }
+
+    noway_assert(preNum == domTreeReachable + 1);
+    noway_assert(postNum == domTreeReachable + 1);
+
+    // Once we have all the reachable nodes numbered, we proceed to
+    // assign numbers to the non-reachable ones, just assign incrementing
+    // values.  We must reach fgBBcount at the end.
+
+    for (i = 1; i <= fgBBNumMax; ++i)
+    {
+        if (BlockSetOps::IsMember(this, domTreeEntryNodes, i))
+        {
+            if (domTree[i] == nullptr)
+            {
+                fgDomTreePreOrder[i]  = preNum++;
+                fgDomTreePostOrder[i] = postNum++;
+            }
+        }
+    }
+
+    noway_assert(preNum == fgBBNumMax + 1);
+    noway_assert(postNum == fgBBNumMax + 1);
+    noway_assert(fgDomTreePreOrder[0] == 0);  // Unused first element
+    noway_assert(fgDomTreePostOrder[0] == 0); // Unused first element
+
+#ifdef DEBUG
+    if (0 && verbose)
+    {
+        printf("\nAfter traversing the dominance tree:\n");
+        printf("PreOrder:\n");
+        for (i = 1; i <= fgBBNumMax; ++i)
+        {
+            printf("BB%02u : %02u\n", i, fgDomTreePreOrder[i]);
+        }
+        printf("PostOrder:\n");
+        for (i = 1; i <= fgBBNumMax; ++i)
+        {
+            printf("BB%02u : %02u\n", i, fgDomTreePostOrder[i]);
+        }
+    }
+#endif // DEBUG
+}
+
+BlockSet_ValRet_T Compiler::fgDomTreeEntryNodes(BasicBlockList** domTree)
+{
+    // domTreeEntryNodes ::  Set that represents which basic blocks are roots of the dominator forest.
+
+    BlockSet BLOCKSET_INIT_NOCOPY(domTreeEntryNodes, BlockSetOps::MakeFull(this));
+
+    // First of all we need to find all the roots of the dominance forest.
+
+    for (unsigned i = 1; i <= fgBBNumMax; ++i)
+    {
+        for (BasicBlockList* current = domTree[i]; current != nullptr; current = current->next)
+        {
+            BlockSetOps::RemoveElemD(this, domTreeEntryNodes, current->block->bbNum);
+        }
+    }
+
+    return domTreeEntryNodes;
+}
+
+#ifdef DEBUG
+void Compiler::fgDispDomTree(BasicBlockList** domTree)
+{
+    for (unsigned i = 1; i <= fgBBNumMax; ++i)
+    {
+        if (domTree[i] != nullptr)
+        {
+            printf("BB%02u : ", i);
+            for (BasicBlockList* current = domTree[i]; current != nullptr; current = current->next)
+            {
+                assert(current->block);
+                printf("BB%02u ", current->block->bbNum);
+            }
+            printf("\n");
+        }
+    }
+    printf("\n");
+}
+#endif // DEBUG
+
+//------------------------------------------------------------------------
+// fgTraverseDomTree: Assign pre/post-order numbers to the dominator tree.
+//
+// Arguments:
+//    bbNum   - The basic block number of the starting block
+//    domTree - The dominator tree (as child block lists)
+//    preNum  - Pointer to the pre-number counter
+//    postNum - Pointer to the post-number counter
+//
+// Notes:
+//    Runs a non-recursive DFS traversal of the dominator tree using an
+//    evaluation stack to assign pre-order and post-order numbers.
+//    These numberings are used to provide constant time lookup for
+//    ancestor/descendent tests between pairs of nodes in the tree.
+
+void Compiler::fgTraverseDomTree(unsigned bbNum, BasicBlockList** domTree, unsigned* preNum, unsigned* postNum)
+{
+    noway_assert(bbNum <= fgBBNumMax);
+
+    // If the block preorder number is not zero it means we already visited
+    // that node, so we skip it.
+    if (fgDomTreePreOrder[bbNum] == 0)
+    {
+        // If this is the first time we visit this node, both preorder and postnumber
+        // values must be zero.
+        noway_assert(fgDomTreePostOrder[bbNum] == 0);
+
+        // Allocate a local stack to hold the Dfs traversal actions necessary
+        // to compute pre/post-ordering of the dominator tree.
+        ArrayStack<DfsNumEntry> stack(this);
+
+        // Push the first entry number on the stack to seed the traversal.
+        stack.Push(DfsNumEntry(DSS_Pre, bbNum));
+
+        // The search is terminated once all the actions have been processed.
+        while (stack.Height() != 0)
+        {
+            DfsNumEntry current    = stack.Pop();
+            unsigned    currentNum = current.dfsNum;
+
+            if (current.dfsStackState == DSS_Pre)
+            {
+                // This pre-visit action corresponds to the first time the
+                // node is encountered during the spanning traversal.
+                noway_assert(fgDomTreePreOrder[currentNum] == 0);
+                noway_assert(fgDomTreePostOrder[currentNum] == 0);
+
+                // Assign the preorder number on the first visit.
+                fgDomTreePreOrder[currentNum] = (*preNum)++;
+
+                // Push this nodes post-action on the stack such that all successors
+                // pre-order visits occur before this nodes post-action. We will assign
+                // its post-order numbers when we pop off the stack.
+                stack.Push(DfsNumEntry(DSS_Post, currentNum));
+
+                // For each child in the dominator tree process its pre-actions.
+                for (BasicBlockList* child = domTree[currentNum]; child != nullptr; child = child->next)
+                {
+                    unsigned childNum = child->block->bbNum;
+
+                    // This is a tree so never could have been visited
+                    assert(fgDomTreePreOrder[childNum] == 0);
+
+                    // Push the successor in the dominator tree for pre-actions.
+                    stack.Push(DfsNumEntry(DSS_Pre, childNum));
+                }
+            }
+            else
+            {
+                // This post-visit action corresponds to the last time the node
+                // is encountered and only after all descendents in the spanning
+                // tree have had pre and post-order numbers assigned.
+
+                assert(current.dfsStackState == DSS_Post);
+                assert(fgDomTreePreOrder[currentNum] != 0);
+                assert(fgDomTreePostOrder[currentNum] == 0);
+
+                // Now assign this nodes post-order number.
+                fgDomTreePostOrder[currentNum] = (*postNum)++;
+            }
+        }
+    }
+}
+
+// This code finds the lowest common ancestor in the
+// dominator tree between two basic blocks. The LCA in the Dominance tree
+// represents the closest dominator between the two basic blocks. Used to
+// adjust the IDom value in fgComputDoms.
+BasicBlock* Compiler::fgIntersectDom(BasicBlock* a, BasicBlock* b)
+{
+    BasicBlock* finger1 = a;
+    BasicBlock* finger2 = b;
+    while (finger1 != finger2)
+    {
+        while (finger1->bbDfsNum > finger2->bbDfsNum)
+        {
+            finger1 = finger1->bbIDom;
+        }
+        while (finger2->bbDfsNum > finger1->bbDfsNum)
+        {
+            finger2 = finger2->bbIDom;
+        }
+    }
+    return finger1;
+}
+
+// Return a BlockSet containing all the blocks that dominate 'block'.
+BlockSet_ValRet_T Compiler::fgGetDominatorSet(BasicBlock* block)
+{
+    assert(block != nullptr);
+
+    BlockSet BLOCKSET_INIT_NOCOPY(domSet, BlockSetOps::MakeEmpty(this));
+
+    do
+    {
+        BlockSetOps::AddElemD(this, domSet, block->bbNum);
+        if (block == block->bbIDom)
+        {
+            break; // We found a cycle in the IDom list, so we're done.
+        }
+        block = block->bbIDom;
+    } while (block != nullptr);
+
+    return domSet;
+}
+
+/*****************************************************************************
+ *
+ *  fgComputeCheapPreds: Function called to compute the BasicBlock::bbCheapPreds lists.
+ *
+ *  No other block data is changed (e.g., bbRefs, bbFlags).
+ *
+ *  The cheap preds lists are similar to the normal (bbPreds) predecessor lists, but are cheaper to
+ *  compute and store, as follows:
+ *  1. A flow edge is typed BasicBlockList, which only has a block pointer and 'next' pointer. It doesn't
+ *     have weights or a dup count.
+ *  2. The preds list for a block is not sorted by block number.
+ *  3. The predecessors of the block following a BBJ_CALLFINALLY (the corresponding BBJ_ALWAYS,
+ *     for normal, non-retless calls to the finally) are not computed.
+ *  4. The cheap preds lists will contain duplicates if a single switch table has multiple branches
+ *     to the same block. Thus, we don't spend the time looking for duplicates for every edge we insert.
+ */
+void Compiler::fgComputeCheapPreds()
+{
+    noway_assert(!fgComputePredsDone); // We can't do this if we've got the full preds.
+    noway_assert(fgFirstBB != nullptr);
+
+    BasicBlock* block;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\n*************** In fgComputeCheapPreds()\n");
+        fgDispBasicBlocks();
+        printf("\n");
+    }
+#endif // DEBUG
+
+    // Clear out the cheap preds lists.
+    fgRemovePreds();
+
+    for (block = fgFirstBB; block != nullptr; block = block->bbNext)
+    {
+        switch (block->bbJumpKind)
+        {
+            case BBJ_COND:
+                fgAddCheapPred(block->bbJumpDest, block);
+                fgAddCheapPred(block->bbNext, block);
+                break;
+
+            case BBJ_CALLFINALLY:
+            case BBJ_LEAVE: // If fgComputeCheapPreds is called before all blocks are imported, BBJ_LEAVE blocks are
+                            // still in the BB list.
+            case BBJ_ALWAYS:
+            case BBJ_EHCATCHRET:
+                fgAddCheapPred(block->bbJumpDest, block);
+                break;
+
+            case BBJ_NONE:
+                fgAddCheapPred(block->bbNext, block);
+                break;
+
+            case BBJ_EHFILTERRET:
+                // Connect end of filter to catch handler.
+                // In a well-formed program, this cannot be null.  Tolerate here, so that we can call
+                // fgComputeCheapPreds before fgImport on an ill-formed program; the problem will be detected in
+                // fgImport.
+                if (block->bbJumpDest != nullptr)
+                {
+                    fgAddCheapPred(block->bbJumpDest, block);
+                }
+                break;
+
+            case BBJ_SWITCH:
+                unsigned jumpCnt;
+                jumpCnt = block->bbJumpSwt->bbsCount;
+                BasicBlock** jumpTab;
+                jumpTab = block->bbJumpSwt->bbsDstTab;
+
+                do
+                {
+                    fgAddCheapPred(*jumpTab, block);
+                } while (++jumpTab, --jumpCnt);
+
+                break;
+
+            case BBJ_EHFINALLYRET: // It's expensive to compute the preds for this case, so we don't for the cheap
+                                   // preds.
+            case BBJ_THROW:
+            case BBJ_RETURN:
+                break;
+
+            default:
+                noway_assert(!"Unexpected bbJumpKind");
+                break;
+        }
+    }
+
+    fgCheapPredsValid = true;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\n*************** After fgComputeCheapPreds()\n");
+        fgDispBasicBlocks();
+        printf("\n");
+    }
+#endif
+}
+
+/*****************************************************************************
+ * Add 'blockPred' to the cheap predecessor list of 'block'.
+ */
+
+void Compiler::fgAddCheapPred(BasicBlock* block, BasicBlock* blockPred)
+{
+    assert(!fgComputePredsDone);
+    assert(block != nullptr);
+    assert(blockPred != nullptr);
+
+    block->bbCheapPreds = new (this, CMK_FlowList) BasicBlockList(blockPred, block->bbCheapPreds);
+
+#if MEASURE_BLOCK_SIZE
+    genFlowNodeCnt += 1;
+    genFlowNodeSize += sizeof(BasicBlockList);
+#endif // MEASURE_BLOCK_SIZE
+}
+
+/*****************************************************************************
+ * Remove 'blockPred' from the cheap predecessor list of 'block'.
+ * If there are duplicate edges, only remove one of them.
+ */
+void Compiler::fgRemoveCheapPred(BasicBlock* block, BasicBlock* blockPred)
+{
+    assert(!fgComputePredsDone);
+    assert(fgCheapPredsValid);
+
+    flowList* oldEdge = nullptr;
+
+    assert(block != nullptr);
+    assert(blockPred != nullptr);
+    assert(block->bbCheapPreds != nullptr);
+
+    /* Is this the first block in the pred list? */
+    if (blockPred == block->bbCheapPreds->block)
+    {
+        block->bbCheapPreds = block->bbCheapPreds->next;
+    }
+    else
+    {
+        BasicBlockList* pred;
+        for (pred = block->bbCheapPreds; pred->next != nullptr; pred = pred->next)
+        {
+            if (blockPred == pred->next->block)
+            {
+                break;
+            }
+        }
+        noway_assert(pred->next != nullptr); // we better have found it!
+        pred->next = pred->next->next;       // splice it out
+    }
+}
+
+void Compiler::fgRemovePreds()
+{
+    C_ASSERT(offsetof(BasicBlock, bbPreds) ==
+             offsetof(BasicBlock, bbCheapPreds)); // bbPreds and bbCheapPreds are at the same place in a union,
+    C_ASSERT(sizeof(((BasicBlock*)0)->bbPreds) ==
+             sizeof(((BasicBlock*)0)->bbCheapPreds)); // and are the same size. So, this function removes both.
+
+    for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+    {
+        block->bbPreds = nullptr;
+    }
+    fgComputePredsDone = false;
+    fgCheapPredsValid  = false;
+}
+
+/*****************************************************************************
+ *
+ *  Function called to compute the bbPreds lists.
+ */
+void Compiler::fgComputePreds()
+{
+    noway_assert(fgFirstBB);
+
+    BasicBlock* block;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\n*************** In fgComputePreds()\n");
+        fgDispBasicBlocks();
+        printf("\n");
+    }
+#endif // DEBUG
+
+    // reset the refs count for each basic block
+
+    for (block = fgFirstBB; block; block = block->bbNext)
+    {
+        block->bbRefs = 0;
+    }
+
+    /* the first block is always reachable! */
+    fgFirstBB->bbRefs = 1;
+
+    /* Treat the initial block as a jump target */
+    fgFirstBB->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+
+    fgRemovePreds();
+
+    for (block = fgFirstBB; block; block = block->bbNext)
+    {
+        switch (block->bbJumpKind)
+        {
+            case BBJ_CALLFINALLY:
+                if (!(block->bbFlags & BBF_RETLESS_CALL))
+                {
+                    assert(block->isBBCallAlwaysPair());
+
+                    /* Mark the next block as being a jump target,
+                       since the call target will return there */
+                    PREFIX_ASSUME(block->bbNext != nullptr);
+                    block->bbNext->bbFlags |= (BBF_JMP_TARGET | BBF_HAS_LABEL);
+                }
+
+                __fallthrough;
+
+            case BBJ_LEAVE: // Sometimes fgComputePreds is called before all blocks are imported, so BBJ_LEAVE
+                            // blocks are still in the BB list.
+            case BBJ_COND:
+            case BBJ_ALWAYS:
+            case BBJ_EHCATCHRET:
+
+                /* Mark the jump dest block as being a jump target */
+                block->bbJumpDest->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+
+                fgAddRefPred(block->bbJumpDest, block, nullptr, true);
+
+                /* Is the next block reachable? */
+
+                if (block->bbJumpKind != BBJ_COND)
+                {
+                    break;
+                }
+
+                noway_assert(block->bbNext);
+
+                /* Fall through, the next block is also reachable */
+                __fallthrough;
+
+            case BBJ_NONE:
+
+                fgAddRefPred(block->bbNext, block, nullptr, true);
+                break;
+
+            case BBJ_EHFILTERRET:
+
+                // Connect end of filter to catch handler.
+                // In a well-formed program, this cannot be null.  Tolerate here, so that we can call
+                // fgComputePreds before fgImport on an ill-formed program; the problem will be detected in fgImport.
+                if (block->bbJumpDest != nullptr)
+                {
+                    fgAddRefPred(block->bbJumpDest, block, nullptr, true);
+                }
+                break;
+
+            case BBJ_EHFINALLYRET:
+            {
+                /* Connect the end of the finally to the successor of
+                  the call to this finally */
+
+                if (!block->hasHndIndex())
+                {
+                    NO_WAY("endfinally outside a finally/fault block.");
+                }
+
+                unsigned  hndIndex = block->getHndIndex();
+                EHblkDsc* ehDsc    = ehGetDsc(hndIndex);
+
+                if (!ehDsc->HasFinallyOrFaultHandler())
+                {
+                    NO_WAY("endfinally outside a finally/fault block.");
+                }
+
+                if (ehDsc->HasFinallyHandler())
+                {
+                    // Find all BBJ_CALLFINALLY that branched to this finally handler.
+                    BasicBlock* begBlk;
+                    BasicBlock* endBlk;
+                    ehGetCallFinallyBlockRange(hndIndex, &begBlk, &endBlk);
+
+                    BasicBlock* finBeg = ehDsc->ebdHndBeg;
+                    for (BasicBlock* bcall = begBlk; bcall != endBlk; bcall = bcall->bbNext)
+                    {
+                        if (bcall->bbJumpKind != BBJ_CALLFINALLY || bcall->bbJumpDest != finBeg)
+                        {
+                            continue;
+                        }
+
+                        noway_assert(bcall->isBBCallAlwaysPair());
+                        fgAddRefPred(bcall->bbNext, block, nullptr, true);
+                    }
+                }
+            }
+            break;
+
+            case BBJ_THROW:
+            case BBJ_RETURN:
+                break;
+
+            case BBJ_SWITCH:
+                unsigned jumpCnt;
+                jumpCnt = block->bbJumpSwt->bbsCount;
+                BasicBlock** jumpTab;
+                jumpTab = block->bbJumpSwt->bbsDstTab;
+
+                do
+                {
+                    /* Mark the target block as being a jump target */
+                    (*jumpTab)->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+
+                    fgAddRefPred(*jumpTab, block, nullptr, true);
+                } while (++jumpTab, --jumpCnt);
+
+                break;
+
+            default:
+                noway_assert(!"Unexpected bbJumpKind");
+                break;
+        }
+    }
+
+    for (unsigned EHnum = 0; EHnum < compHndBBtabCount; EHnum++)
+    {
+        EHblkDsc* ehDsc = ehGetDsc(EHnum);
+
+        if (ehDsc->HasFilter())
+        {
+            ehDsc->ebdFilter->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+        }
+
+        ehDsc->ebdHndBeg->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+    }
+
+    fgModified         = false;
+    fgComputePredsDone = true;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\n*************** After fgComputePreds()\n");
+        fgDispBasicBlocks();
+        printf("\n");
+    }
+#endif
+}
+
+unsigned Compiler::fgNSuccsOfFinallyRet(BasicBlock* block)
+{
+    BasicBlock* bb;
+    unsigned    res;
+    fgSuccOfFinallyRetWork(block, ~0, &bb, &res);
+    return res;
+}
+
+BasicBlock* Compiler::fgSuccOfFinallyRet(BasicBlock* block, unsigned i)
+{
+    BasicBlock* bb;
+    unsigned    res;
+    fgSuccOfFinallyRetWork(block, i, &bb, &res);
+    return bb;
+}
+
+void Compiler::fgSuccOfFinallyRetWork(BasicBlock* block, unsigned i, BasicBlock** bres, unsigned* nres)
+{
+    assert(block->hasHndIndex()); // Otherwise, endfinally outside a finally/fault block?
+
+    unsigned  hndIndex = block->getHndIndex();
+    EHblkDsc* ehDsc    = ehGetDsc(hndIndex);
+
+    assert(ehDsc->HasFinallyOrFaultHandler()); // Otherwise, endfinally outside a finally/fault block.
+
+    *bres            = nullptr;
+    unsigned succNum = 0;
+
+    if (ehDsc->HasFinallyHandler())
+    {
+        BasicBlock* begBlk;
+        BasicBlock* endBlk;
+        ehGetCallFinallyBlockRange(hndIndex, &begBlk, &endBlk);
+
+        BasicBlock* finBeg = ehDsc->ebdHndBeg;
+
+        for (BasicBlock* bcall = begBlk; bcall != endBlk; bcall = bcall->bbNext)
+        {
+            if (bcall->bbJumpKind != BBJ_CALLFINALLY || bcall->bbJumpDest != finBeg)
+            {
+                continue;
+            }
+
+            assert(bcall->isBBCallAlwaysPair());
+
+            if (succNum == i)
+            {
+                *bres = bcall->bbNext;
+                return;
+            }
+            succNum++;
+        }
+    }
+    assert(i == ~0u || ehDsc->HasFaultHandler()); // Should reach here only for fault blocks.
+    if (i == ~0u)
+    {
+        *nres = succNum;
+    }
+}
+
+Compiler::SwitchUniqueSuccSet Compiler::GetDescriptorForSwitch(BasicBlock* switchBlk)
+{
+    assert(switchBlk->bbJumpKind == BBJ_SWITCH);
+    BlockToSwitchDescMap* switchMap = GetSwitchDescMap();
+    SwitchUniqueSuccSet   res;
+    if (switchMap->Lookup(switchBlk, &res))
+    {
+        return res;
+    }
+    else
+    {
+        // We must compute the descriptor. Find which are dups, by creating a bit set with the unique successors.
+        // We create a temporary bitset of blocks to compute the unique set of successor blocks,
+        // since adding a block's number twice leaves just one "copy" in the bitset. Note that
+        // we specifically don't use the BlockSet type, because doing so would require making a
+        // call to EnsureBasicBlockEpoch() to make sure the epoch is up-to-date. However, that
+        // can create a new epoch, thus invalidating all existing BlockSet objects, such as
+        // reachability information stored in the blocks. To avoid that, we just use a local BitVec.
+
+        BitVecTraits blockVecTraits(fgBBNumMax + 1, this);
+        BitVec       BITVEC_INIT_NOCOPY(uniqueSuccBlocks, BitVecOps::MakeEmpty(&blockVecTraits));
+        BasicBlock** jumpTable = switchBlk->bbJumpSwt->bbsDstTab;
+        unsigned     jumpCount = switchBlk->bbJumpSwt->bbsCount;
+        for (unsigned i = 0; i < jumpCount; i++)
+        {
+            BasicBlock* targ = jumpTable[i];
+            BitVecOps::AddElemD(&blockVecTraits, uniqueSuccBlocks, targ->bbNum);
+        }
+        // Now we have a set of unique successors.
+        unsigned numNonDups = BitVecOps::Count(&blockVecTraits, uniqueSuccBlocks);
+
+        typedef BasicBlock* BasicBlockPtr;
+        BasicBlockPtr*      nonDups = new (getAllocator()) BasicBlockPtr[numNonDups];
+
+        unsigned nonDupInd = 0;
+        // At this point, all unique targets are in "uniqueSuccBlocks".  As we encounter each,
+        // add to nonDups, remove from "uniqueSuccBlocks".
+        for (unsigned i = 0; i < jumpCount; i++)
+        {
+            BasicBlock* targ = jumpTable[i];
+            if (BitVecOps::IsMember(&blockVecTraits, uniqueSuccBlocks, targ->bbNum))
+            {
+                nonDups[nonDupInd] = targ;
+                nonDupInd++;
+                BitVecOps::RemoveElemD(&blockVecTraits, uniqueSuccBlocks, targ->bbNum);
+            }
+        }
+
+        assert(nonDupInd == numNonDups);
+        assert(BitVecOps::Count(&blockVecTraits, uniqueSuccBlocks) == 0);
+        res.numDistinctSuccs = numNonDups;
+        res.nonDuplicates    = nonDups;
+        switchMap->Set(switchBlk, res);
+        return res;
+    }
+}
+
+void Compiler::SwitchUniqueSuccSet::UpdateTarget(IAllocator* alloc,
+                                                 BasicBlock* switchBlk,
+                                                 BasicBlock* from,
+                                                 BasicBlock* to)
+{
+    assert(switchBlk->bbJumpKind == BBJ_SWITCH); // Precondition.
+    unsigned     jmpTabCnt = switchBlk->bbJumpSwt->bbsCount;
+    BasicBlock** jmpTab    = switchBlk->bbJumpSwt->bbsDstTab;
+
+    // Is "from" still in the switch table (because it had more than one entry before?)
+    bool fromStillPresent = false;
+    for (unsigned i = 0; i < jmpTabCnt; i++)
+    {
+        if (jmpTab[i] == from)
+        {
+            fromStillPresent = true;
+            break;
+        }
+    }
+
+    // Is "to" already in "this"?
+    bool toAlreadyPresent = false;
+    for (unsigned i = 0; i < numDistinctSuccs; i++)
+    {
+        if (nonDuplicates[i] == to)
+        {
+            toAlreadyPresent = true;
+            break;
+        }
+    }
+
+    // Four cases:
+    //   If "from" is still present, and "to" is already present, do nothing
+    //   If "from" is still present, and "to" is not, must reallocate to add an entry.
+    //   If "from" is not still present, and "to" is not present, write "to" where "from" was.
+    //   If "from" is not still present, but "to" is present, remove "from".
+    if (fromStillPresent && toAlreadyPresent)
+    {
+        return;
+    }
+    else if (fromStillPresent && !toAlreadyPresent)
+    {
+        // reallocate to add an entry
+        typedef BasicBlock* BasicBlockPtr;
+        BasicBlockPtr*      newNonDups = new (alloc) BasicBlockPtr[numDistinctSuccs + 1];
+        memcpy(newNonDups, nonDuplicates, numDistinctSuccs * sizeof(BasicBlock*));
+        newNonDups[numDistinctSuccs] = to;
+        numDistinctSuccs++;
+        nonDuplicates = newNonDups;
+    }
+    else if (!fromStillPresent && !toAlreadyPresent)
+    {
+#ifdef DEBUG
+        // write "to" where "from" was
+        bool foundFrom = false;
+#endif // DEBUG
+        for (unsigned i = 0; i < numDistinctSuccs; i++)
+        {
+            if (nonDuplicates[i] == from)
+            {
+                nonDuplicates[i] = to;
+#ifdef DEBUG
+                foundFrom = true;
+#endif // DEBUG
+                break;
+            }
+        }
+        assert(foundFrom);
+    }
+    else
+    {
+        assert(!fromStillPresent && toAlreadyPresent);
+#ifdef DEBUG
+        // remove "from".
+        bool foundFrom = false;
+#endif // DEBUG
+        for (unsigned i = 0; i < numDistinctSuccs; i++)
+        {
+            if (nonDuplicates[i] == from)
+            {
+                nonDuplicates[i] = nonDuplicates[numDistinctSuccs - 1];
+                numDistinctSuccs--;
+#ifdef DEBUG
+                foundFrom = true;
+#endif // DEBUG
+                break;
+            }
+        }
+        assert(foundFrom);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Simple utility function to remove an entry for a block in the switch desc
+ *  map. So it can be called from other phases.
+ *
+ */
+void Compiler::fgInvalidateSwitchDescMapEntry(BasicBlock* block)
+{
+    // Check if map has no entries yet.
+    if (m_switchDescMap != nullptr)
+    {
+        m_switchDescMap->Remove(block);
+    }
+}
+
+void Compiler::UpdateSwitchTableTarget(BasicBlock* switchBlk, BasicBlock* from, BasicBlock* to)
+{
+    if (m_switchDescMap == nullptr)
+    {
+        return; // No mappings, nothing to do.
+    }
+
+    // Otherwise...
+    BlockToSwitchDescMap* switchMap = GetSwitchDescMap();
+    SwitchUniqueSuccSet*  res       = switchMap->LookupPointer(switchBlk);
+    if (res != nullptr)
+    {
+        // If no result, nothing to do. Otherwise, update it.
+        res->UpdateTarget(getAllocator(), switchBlk, from, to);
+    }
+}
+
+/*****************************************************************************
+ *  For a block that is in a handler region, find the first block of the most-nested
+ *  handler containing the block.
+ */
+BasicBlock* Compiler::fgFirstBlockOfHandler(BasicBlock* block)
+{
+    assert(block->hasHndIndex());
+    return ehGetDsc(block->getHndIndex())->ebdHndBeg;
+}
+
+/*****************************************************************************
+ *
+ *  Function called to find back edges and return blocks and mark them as needing GC Polls.  This marks all
+ *  blocks.
+ */
+void Compiler::fgMarkGCPollBlocks()
+{
+    if (GCPOLL_NONE == opts.compGCPollType)
+    {
+        return;
+    }
+
+#ifdef DEBUG
+    /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
+    fgDebugCheckBBlist();
+#endif
+
+    BasicBlock* block;
+
+    // Return blocks always need GC polls.  In addition, all back edges (including those from switch
+    // statements) need GC polls.  The poll is on the block with the outgoing back edge (or ret), rather than
+    // on the destination or on the edge itself.
+    for (block = fgFirstBB; block; block = block->bbNext)
+    {
+        bool blockNeedsPoll = false;
+        switch (block->bbJumpKind)
+        {
+            case BBJ_COND:
+            case BBJ_ALWAYS:
+                blockNeedsPoll = (block->bbJumpDest->bbNum <= block->bbNum);
+                break;
+
+            case BBJ_RETURN:
+                blockNeedsPoll = true;
+                break;
+
+            case BBJ_SWITCH:
+                unsigned jumpCnt;
+                jumpCnt = block->bbJumpSwt->bbsCount;
+                BasicBlock** jumpTab;
+                jumpTab = block->bbJumpSwt->bbsDstTab;
+
+                do
+                {
+                    if ((*jumpTab)->bbNum <= block->bbNum)
+                    {
+                        blockNeedsPoll = true;
+                        break;
+                    }
+                } while (++jumpTab, --jumpCnt);
+                break;
+
+            default:
+                break;
+        }
+
+        if (blockNeedsPoll)
+        {
+            block->bbFlags |= BBF_NEEDS_GCPOLL;
+        }
+    }
+}
+
+void Compiler::fgInitBlockVarSets()
+{
+    for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+    {
+        block->InitVarSets(this);
+    }
+
+    // QMarks are much like blocks, and need their VarSets initialized.
+    assert(!compIsForInlining());
+    for (unsigned i = 0; i < compQMarks->Size(); i++)
+    {
+        GenTreePtr qmark = compQMarks->Get(i);
+        // Perhaps the gtOper of a QMark node was changed to something else since it was created and put on this list.
+        // So can't hurt to check.
+        if (qmark->OperGet() == GT_QMARK)
+        {
+            VarSetOps::AssignAllowUninitRhs(this, qmark->gtQmark.gtThenLiveSet, VarSetOps::UninitVal());
+            VarSetOps::AssignAllowUninitRhs(this, qmark->gtQmark.gtElseLiveSet, VarSetOps::UninitVal());
+        }
+    }
+    fgBBVarSetsInited = true;
+}
+
+/*****************************************************************************
+ *
+ *  The following does the final pass on BBF_NEEDS_GCPOLL and then actually creates the GC Polls.
+ */
+void Compiler::fgCreateGCPolls()
+{
+    if (GCPOLL_NONE == opts.compGCPollType)
+    {
+        return;
+    }
+
+    bool createdPollBlocks = false;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In fgCreateGCPolls() for %s\n", info.compFullName);
+    }
+#endif // DEBUG
+
+    if (!(opts.MinOpts() || opts.compDbgCode))
+    {
+        // Remove polls from well formed loops with a constant upper bound.
+        for (unsigned lnum = 0; lnum < optLoopCount; ++lnum)
+        {
+            // Look for constant counted loops that run for a short duration.  This logic is very similar to
+            // what's in code:Compiler::optUnrollLoops, since they have similar constraints.  However, this
+            // logic is much more permissive since we're not doing a complex transformation.
+
+            /* TODO-Cleanup:
+             * I feel bad cloning so much logic from optUnrollLoops
+             */
+
+            // Filter out loops not meeting the obvious preconditions.
+            //
+            if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
+            {
+                continue;
+            }
+
+            if (!(optLoopTable[lnum].lpFlags & LPFLG_CONST))
+            {
+                continue;
+            }
+
+            BasicBlock* head   = optLoopTable[lnum].lpHead;
+            BasicBlock* bottom = optLoopTable[lnum].lpBottom;
+
+            // Loops dominated by GC_SAFE_POINT won't have this set.
+            if (!(bottom->bbFlags & BBF_NEEDS_GCPOLL))
+            {
+                continue;
+            }
+
+            /* Get the loop data:
+                - initial constant
+                - limit constant
+                - iterator
+                - iterator increment
+                - increment operation type (i.e. ASG_ADD, ASG_SUB, etc...)
+                - loop test type (i.e. GT_GE, GT_LT, etc...)
+             */
+
+            int        lbeg     = optLoopTable[lnum].lpConstInit;
+            int        llim     = optLoopTable[lnum].lpConstLimit();
+            genTreeOps testOper = optLoopTable[lnum].lpTestOper();
+
+            int        lvar     = optLoopTable[lnum].lpIterVar();
+            int        iterInc  = optLoopTable[lnum].lpIterConst();
+            genTreeOps iterOper = optLoopTable[lnum].lpIterOper();
+
+            var_types iterOperType = optLoopTable[lnum].lpIterOperType();
+            bool      unsTest      = (optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0;
+            if (lvaTable[lvar].lvAddrExposed)
+            { // Can't reason about the value of the iteration variable.
+                continue;
+            }
+
+            unsigned totalIter;
+
+            /* Find the number of iterations - the function returns false if not a constant number */
+
+            if (!optComputeLoopRep(lbeg, llim, iterInc, iterOper, iterOperType, testOper, unsTest,
+                                   // The value here doesn't matter for this variation of the optimization
+                                   true, &totalIter))
+            {
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("Could not compute loop iterations for loop from BB%02u to BB%02u", head->bbNum,
+                           bottom->bbNum);
+                }
+#endif                      // DEBUG
+                (void)head; // suppress gcc error.
+
+                continue;
+            }
+
+            /* Forget it if there are too many repetitions or not a constant loop */
+
+            static const unsigned ITER_LIMIT = 256;
+            if (totalIter > ITER_LIMIT)
+            {
+                continue;
+            }
+
+            // It is safe to elminate the poll from this loop.
+            bottom->bbFlags &= ~BBF_NEEDS_GCPOLL;
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("Removing poll in block BB%02u because it forms a bounded counted loop\n", bottom->bbNum);
+            }
+#endif // DEBUG
+        }
+    }
+
+    // Final chance to optimize the polls.  Move all polls in loops from the bottom of the loop up to the
+    // loop head.  Also eliminate all epilog polls in non-leaf methods.  This only works if we have dominator
+    // information.
+    if (fgDomsComputed)
+    {
+        for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+        {
+            if (!(block->bbFlags & BBF_NEEDS_GCPOLL))
+            {
+                continue;
+            }
+
+            if (block->bbJumpKind == BBJ_COND || block->bbJumpKind == BBJ_ALWAYS)
+            {
+                // make sure that this is loop-like
+                if (!fgReachable(block->bbJumpDest, block))
+                {
+                    block->bbFlags &= ~BBF_NEEDS_GCPOLL;
+#ifdef DEBUG
+                    if (verbose)
+                    {
+                        printf("Removing poll in block BB%02u because it is not loop\n", block->bbNum);
+                    }
+#endif // DEBUG
+                    continue;
+                }
+            }
+            else if (!(block->bbJumpKind == BBJ_RETURN || block->bbJumpKind == BBJ_SWITCH))
+            {
+                noway_assert(!"GC Poll on a block that has no control transfer.");
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("Removing poll in block BB%02u because it is not a jump\n", block->bbNum);
+                }
+#endif // DEBUG
+                block->bbFlags &= ~BBF_NEEDS_GCPOLL;
+                continue;
+            }
+
+            // Because of block compaction, it's possible to end up with a block that is both poll and safe.
+            // Clean those up now.
+
+            if (block->bbFlags & BBF_GC_SAFE_POINT)
+            {
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("Removing poll in return block BB%02u because it is GC Safe\n", block->bbNum);
+                }
+#endif // DEBUG
+                block->bbFlags &= ~BBF_NEEDS_GCPOLL;
+                continue;
+            }
+
+            if (block->bbJumpKind == BBJ_RETURN)
+            {
+                if (!optReachWithoutCall(fgFirstBB, block))
+                {
+                    // check to see if there is a call along the path between the first block and the return
+                    // block.
+                    block->bbFlags &= ~BBF_NEEDS_GCPOLL;
+#ifdef DEBUG
+                    if (verbose)
+                    {
+                        printf("Removing poll in return block BB%02u because it dominated by a call\n", block->bbNum);
+                    }
+#endif // DEBUG
+                    continue;
+                }
+            }
+        }
+    }
+
+    noway_assert(!fgGCPollsCreated);
+    BasicBlock* block;
+    fgGCPollsCreated = true;
+
+    // Walk through the blocks and hunt for a block that has BBF_NEEDS_GCPOLL
+    for (block = fgFirstBB; block; block = block->bbNext)
+    {
+        // Because of block compaction, it's possible to end up with a block that is both poll and safe.
+        // And if !fgDomsComputed, we won't have cleared them, so skip them now
+        if (!(block->bbFlags & BBF_NEEDS_GCPOLL) || (block->bbFlags & BBF_GC_SAFE_POINT))
+        {
+            continue;
+        }
+
+        // This block needs a poll.  We either just insert a callout or we split the block and inline part of
+        // the test.  This depends on the value of opts.compGCPollType.
+
+        // If we're doing GCPOLL_CALL, just insert a GT_CALL node before the last node in the block.
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+        switch (block->bbJumpKind)
+        {
+            case BBJ_RETURN:
+            case BBJ_ALWAYS:
+            case BBJ_COND:
+            case BBJ_SWITCH:
+                break;
+            default:
+                noway_assert(!"Unknown block type for BBF_NEEDS_GCPOLL");
+        }
+#endif // DEBUG
+
+        noway_assert(opts.compGCPollType);
+
+        GCPollType pollType = opts.compGCPollType;
+        // pollType is set to either CALL or INLINE at this point.  Below is the list of places where we
+        // can't or don't want to emit an inline check.  Check all of those.  If after all of that we still
+        // have INLINE, then emit an inline check.
+
+        if (opts.MinOpts() || opts.compDbgCode)
+        {
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("Selecting CALL poll in block BB%02u because of debug/minopts\n", block->bbNum);
+            }
+#endif // DEBUG
+
+            // Don't split blocks and create inlined polls unless we're optimizing.
+            pollType = GCPOLL_CALL;
+        }
+        else if (genReturnBB == block)
+        {
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("Selecting CALL poll in block BB%02u because it is the single return block\n", block->bbNum);
+            }
+#endif // DEBUG
+
+            // we don't want to split the single return block
+            pollType = GCPOLL_CALL;
+        }
+        else if (BBJ_SWITCH == block->bbJumpKind)
+        {
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("Selecting CALL poll in block BB%02u because it is a loop formed by a SWITCH\n", block->bbNum);
+            }
+#endif // DEBUG
+
+            // I don't want to deal with all the outgoing edges of a switch block.
+            pollType = GCPOLL_CALL;
+        }
+
+        // TODO-Cleanup: potentially don't split if we're in an EH region.
+
+        createdPollBlocks |= fgCreateGCPoll(pollType, block);
+    }
+
+    // If we split a block to create a GC Poll, then rerun fgReorderBlocks to push the rarely run blocks out
+    // past the epilog.  We should never split blocks unless we're optimizing.
+    if (createdPollBlocks)
+    {
+        noway_assert(!opts.MinOpts() && !opts.compDbgCode);
+        fgReorderBlocks();
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Actually create a GCPoll in the given block. Returns true if it created
+ *  a basic block.
+ */
+
+bool Compiler::fgCreateGCPoll(GCPollType pollType, BasicBlock* block)
+{
+    assert(!(block->bbFlags & BBF_GC_SAFE_POINT));
+    bool createdPollBlocks;
+
+    void* addrTrap;
+    void* pAddrOfCaptureThreadGlobal;
+
+    addrTrap = info.compCompHnd->getAddrOfCaptureThreadGlobal(&pAddrOfCaptureThreadGlobal);
+
+#ifdef ENABLE_FAST_GCPOLL_HELPER
+    // I never want to split blocks if we've got two indirections here.
+    // This is a size trade-off assuming the VM has ENABLE_FAST_GCPOLL_HELPER.
+    // So don't do it when that is off
+    if (pAddrOfCaptureThreadGlobal != NULL)
+    {
+        pollType = GCPOLL_CALL;
+    }
+#endif // ENABLE_FAST_GCPOLL_HELPER
+
+    if (GCPOLL_CALL == pollType)
+    {
+        createdPollBlocks = false;
+        GenTreePtr tree   = gtNewHelperCallNode(CORINFO_HELP_POLL_GC, TYP_VOID);
+#if GTF_CALL_REG_SAVE
+        tree->gtCall.gtCallMoreFlags |= GTF_CALL_REG_SAVE;
+#endif // GTF_CALL_REG_SAVE
+
+        // for BBJ_ALWAYS I don't need to insert it before the condition.  Just append it.
+        if (block->bbJumpKind == BBJ_ALWAYS)
+        {
+            fgInsertStmtAtEnd(block, tree);
+        }
+        else
+        {
+            GenTreeStmt* newStmt = fgInsertStmtNearEnd(block, tree);
+            // For DDB156656, we need to associate the GC Poll with the IL offset (and therefore sequence
+            // point) of the tree before which we inserted the poll.  One example of when this is a
+            // problem:
+            //  if (...) {  //1
+            //      ...
+            //  } //2
+            //  else { //3
+            //      ...
+            //  }
+            //  (gcpoll) //4
+            //  return. //5
+            //
+            //  If we take the if statement at 1, we encounter a jump at 2.  This jumps over the else
+            //  and lands at 4.  4 is where we inserted the gcpoll.  However, that is associated with
+            //  the sequence point a 3.  Therefore, the debugger displays the wrong source line at the
+            //  gc poll location.
+            //
+            //  More formally, if control flow targets an instruction, that instruction must be the
+            //  start of a new sequence point.
+            if (newStmt->gtNext)
+            {
+                // Is it possible for gtNext to be NULL?
+                noway_assert(newStmt->gtNext->gtOper == GT_STMT);
+                newStmt->gtStmtILoffsx = newStmt->gtNextStmt->gtStmtILoffsx;
+            }
+        }
+
+        block->bbFlags |= BBF_GC_SAFE_POINT;
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("*** creating GC Poll in block BB%02u\n", block->bbNum);
+            gtDispTreeList(block->bbTreeList);
+        }
+#endif // DEBUG
+    }
+    else
+    {
+        createdPollBlocks = true;
+        // if we're doing GCPOLL_INLINE, then:
+        //  1) Create two new blocks: Poll and Bottom.  The original block is called Top.
+
+        // I want to create:
+        // top -> poll -> bottom (lexically)
+        // so that we jump over poll to get to bottom.
+        BasicBlock* top         = block;
+        BasicBlock* poll        = fgNewBBafter(BBJ_NONE, top, true);
+        BasicBlock* bottom      = fgNewBBafter(top->bbJumpKind, poll, true);
+        BBjumpKinds oldJumpKind = top->bbJumpKind;
+
+        // Update block flags
+        unsigned originalFlags;
+        originalFlags = top->bbFlags | BBF_GC_SAFE_POINT;
+
+        // Unlike Fei's inliner from puclr, I'm allowed to split loops.
+        // And we keep a few other flags...
+        noway_assert((originalFlags & (BBF_SPLIT_NONEXIST & ~(BBF_LOOP_HEAD | BBF_LOOP_CALL0 | BBF_LOOP_CALL1))) == 0);
+        top->bbFlags = originalFlags & (~BBF_SPLIT_LOST | BBF_GC_SAFE_POINT);
+        bottom->bbFlags |= originalFlags & (BBF_SPLIT_GAINED | BBF_IMPORTED | BBF_GC_SAFE_POINT);
+        bottom->inheritWeight(top);
+        poll->bbFlags |= originalFlags & (BBF_SPLIT_GAINED | BBF_IMPORTED | BBF_GC_SAFE_POINT);
+
+        //  9) Mark Poll as rarely run.
+        poll->bbSetRunRarely();
+
+        //  5) Bottom gets all the outgoing edges and inherited flags of Original.
+        bottom->bbJumpDest = top->bbJumpDest;
+
+        //  2) Add a GC_CALL node to Poll.
+        GenTreePtr tree = gtNewHelperCallNode(CORINFO_HELP_POLL_GC, TYP_VOID);
+#if GTF_CALL_REG_SAVE
+        tree->gtCall.gtCallMoreFlags |= GTF_CALL_REG_SAVE;
+#endif // GTF_CALL_REG_SAVE
+        fgInsertStmtAtEnd(poll, tree);
+
+        //  3) Remove the last statement from Top and add it to Bottom.
+        if (oldJumpKind != BBJ_ALWAYS)
+        {
+            // if I'm always jumping to the target, then this is not a condition that needs moving.
+            GenTreeStmt* stmt = top->firstStmt();
+            while (stmt->gtNext)
+            {
+                stmt = stmt->gtNextStmt;
+            }
+            fgRemoveStmt(top, stmt);
+            fgInsertStmtAtEnd(bottom, stmt);
+        }
+
+        // for BBJ_ALWAYS blocks, bottom is an empty block.
+
+        //  4) Create a GT_EQ node that checks against g_TrapReturningThreads.  True jumps to Bottom,
+        //  false falls through to poll.  Add this to the end of Top.  Top is now BBJ_COND.  Bottom is
+        //  now a jump target
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef ENABLE_FAST_GCPOLL_HELPER
+        // Prefer the fast gc poll helepr over the double indirection
+        noway_assert(pAddrOfCaptureThreadGlobal == nullptr);
+#endif
+
+        GenTreePtr trap;
+        if (pAddrOfCaptureThreadGlobal != nullptr)
+        {
+            trap = gtNewOperNode(GT_IND, TYP_I_IMPL,
+                                 gtNewIconHandleNode((size_t)pAddrOfCaptureThreadGlobal, GTF_ICON_PTR_HDL));
+        }
+        else
+        {
+            trap = gtNewIconHandleNode((size_t)addrTrap, GTF_ICON_PTR_HDL);
+        }
+
+        GenTreePtr trapRelop = gtNewOperNode(GT_EQ, TYP_INT,
+                                             // lhs [g_TrapReturningThreads]
+                                             gtNewOperNode(GT_IND, TYP_INT, trap),
+                                             // rhs 0
+                                             gtNewIconNode(0, TYP_INT));
+        trapRelop->gtFlags |= GTF_RELOP_JMP_USED | GTF_DONT_CSE; // Treat reading g_TrapReturningThreads as volatile.
+        GenTreePtr trapCheck = gtNewOperNode(GT_JTRUE, TYP_VOID, trapRelop);
+        fgInsertStmtAtEnd(top, trapCheck);
+        top->bbJumpDest = bottom;
+        top->bbJumpKind = BBJ_COND;
+        bottom->bbFlags |= BBF_JMP_TARGET;
+
+        //  7) Bottom has Top and Poll as its predecessors.  Poll has just Top as a predecessor.
+        fgAddRefPred(bottom, poll);
+        fgAddRefPred(bottom, top);
+        fgAddRefPred(poll, top);
+
+        //  8) Replace Top with Bottom in the predecessor list of all outgoing edges from Bottom (1 for
+        //      jumps, 2 for conditional branches, N for switches).
+        switch (oldJumpKind)
+        {
+            case BBJ_RETURN:
+                // no successors
+                break;
+            case BBJ_COND:
+                // replace predecessor in the fall through block.
+                noway_assert(bottom->bbNext);
+                fgReplacePred(bottom->bbNext, top, bottom);
+
+                // fall through for the jump target
+                __fallthrough;
+
+            case BBJ_ALWAYS:
+                fgReplacePred(bottom->bbJumpDest, top, bottom);
+                break;
+            case BBJ_SWITCH:
+                NO_WAY("SWITCH should be a call rather than an inlined poll.");
+                break;
+            default:
+                NO_WAY("Unknown block type for updating predecessor lists.");
+        }
+
+        top->bbFlags &= ~BBF_NEEDS_GCPOLL;
+        noway_assert(!(poll->bbFlags & BBF_NEEDS_GCPOLL));
+        noway_assert(!(bottom->bbFlags & BBF_NEEDS_GCPOLL));
+
+        if (compCurBB == top)
+        {
+            compCurBB = bottom;
+        }
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("*** creating inlined GC Poll in top block BB%02u\n", top->bbNum);
+            gtDispTreeList(top->bbTreeList);
+            printf(" poll block is BB%02u\n", poll->bbNum);
+            gtDispTreeList(poll->bbTreeList);
+            printf(" bottom block is BB%02u\n", bottom->bbNum);
+            gtDispTreeList(bottom->bbTreeList);
+        }
+#endif // DEBUG
+    }
+
+    return createdPollBlocks;
+}
+
+/*****************************************************************************
+ *
+ *  The following helps find a basic block given its PC offset.
+ */
+
+void Compiler::fgInitBBLookup()
+{
+    BasicBlock** dscBBptr;
+    BasicBlock*  tmpBBdesc;
+
+    /* Allocate the basic block table */
+
+    dscBBptr = fgBBs = new (this, CMK_BasicBlock) BasicBlock*[fgBBcount];
+
+    /* Walk all the basic blocks, filling in the table */
+
+    for (tmpBBdesc = fgFirstBB; tmpBBdesc; tmpBBdesc = tmpBBdesc->bbNext)
+    {
+        *dscBBptr++ = tmpBBdesc;
+    }
+
+    noway_assert(dscBBptr == fgBBs + fgBBcount);
+}
+
+BasicBlock* Compiler::fgLookupBB(unsigned addr)
+{
+    unsigned lo;
+    unsigned hi;
+
+    /* Do a binary search */
+
+    for (lo = 0, hi = fgBBcount - 1;;)
+    {
+
+    AGAIN:;
+
+        if (lo > hi)
+        {
+            break;
+        }
+
+        unsigned    mid = (lo + hi) / 2;
+        BasicBlock* dsc = fgBBs[mid];
+
+        // We introduce internal blocks for BBJ_CALLFINALLY. Skip over these.
+
+        while (dsc->bbFlags & BBF_INTERNAL)
+        {
+            dsc = dsc->bbNext;
+            mid++;
+
+            // We skipped over too many, Set hi back to the original mid - 1
+
+            if (mid > hi)
+            {
+                mid = (lo + hi) / 2;
+                hi  = mid - 1;
+                goto AGAIN;
+            }
+        }
+
+        unsigned pos = dsc->bbCodeOffs;
+
+        if (pos < addr)
+        {
+            if ((lo == hi) && (lo == (fgBBcount - 1)))
+            {
+                noway_assert(addr == dsc->bbCodeOffsEnd);
+                return nullptr; // NULL means the end of method
+            }
+            lo = mid + 1;
+            continue;
+        }
+
+        if (pos > addr)
+        {
+            hi = mid - 1;
+            continue;
+        }
+
+        return dsc;
+    }
+#ifdef DEBUG
+    printf("ERROR: Couldn't find basic block at offset %04X\n", addr);
+#endif // DEBUG
+    NO_WAY("fgLookupBB failed.");
+}
+
+/*****************************************************************************
+ *
+ *  The 'jump target' array uses the following flags to indicate what kind
+ *  of label is present.
+ */
+
+#define JT_NONE 0x00  // This IL offset is never used
+#define JT_ADDR 0x01  // merely make sure this is an OK address
+#define JT_JUMP 0x02  // 'normal' jump target
+#define JT_MULTI 0x04 // target of multiple jumps
+
+inline void Compiler::fgMarkJumpTarget(BYTE* jumpTarget, unsigned offs)
+{
+    /* Make sure we set JT_MULTI if target of multiple jumps */
+
+    noway_assert(JT_MULTI == JT_JUMP << 1);
+
+    jumpTarget[offs] |= (jumpTarget[offs] & JT_JUMP) << 1 | JT_JUMP;
+}
+
+//------------------------------------------------------------------------
+// FgStack: simple stack model for the inlinee's evaluation stack.
+//
+// Model the inputs available to various operations in the inline body.
+// Tracks constants, arguments, array lengths.
+
+class FgStack
+{
+public:
+    FgStack() : slot0(SLOT_INVALID), slot1(SLOT_INVALID), depth(0)
+    {
+        // Empty
+    }
+
+    void Clear()
+    {
+        depth = 0;
+    }
+    void PushUnknown()
+    {
+        Push(SLOT_UNKNOWN);
+    }
+    void PushConstant()
+    {
+        Push(SLOT_CONSTANT);
+    }
+    void PushArrayLen()
+    {
+        Push(SLOT_ARRAYLEN);
+    }
+    void PushArgument(unsigned arg)
+    {
+        Push(SLOT_ARGUMENT + arg);
+    }
+    unsigned GetSlot0() const
+    {
+        assert(depth >= 1);
+        return slot0;
+    }
+    unsigned GetSlot1() const
+    {
+        assert(depth >= 2);
+        return slot1;
+    }
+    static bool IsConstant(unsigned value)
+    {
+        return value == SLOT_CONSTANT;
+    }
+    static bool IsArrayLen(unsigned value)
+    {
+        return value == SLOT_ARRAYLEN;
+    }
+    static bool IsArgument(unsigned value)
+    {
+        return value >= SLOT_ARGUMENT;
+    }
+    static unsigned SlotTypeToArgNum(unsigned value)
+    {
+        assert(IsArgument(value));
+        return value - SLOT_ARGUMENT;
+    }
+    bool IsStackTwoDeep() const
+    {
+        return depth == 2;
+    }
+    bool IsStackOneDeep() const
+    {
+        return depth == 1;
+    }
+    bool IsStackAtLeastOneDeep() const
+    {
+        return depth >= 1;
+    }
+
+private:
+    enum
+    {
+        SLOT_INVALID  = UINT_MAX,
+        SLOT_UNKNOWN  = 0,
+        SLOT_CONSTANT = 1,
+        SLOT_ARRAYLEN = 2,
+        SLOT_ARGUMENT = 3
+    };
+
+    void Push(int type)
+    {
+        switch (depth)
+        {
+            case 0:
+                ++depth;
+                slot0 = type;
+                break;
+            case 1:
+                ++depth;
+                __fallthrough;
+            case 2:
+                slot1 = slot0;
+                slot0 = type;
+        }
+    }
+
+    unsigned slot0;
+    unsigned slot1;
+    unsigned depth;
+};
+
+//------------------------------------------------------------------------
+// fgFindJumpTargets: walk the IL stream, determining jump target offsets
+//
+// Arguments:
+//    codeAddr   - base address of the IL code buffer
+//    codeSize   - number of bytes in the IL code buffer
+//    jumpTarget - [OUT] byte array for flagging jump targets
+//
+// Notes:
+//    If inlining or prejitting the root, this method also makes
+//    various observations about the method that factor into inline
+//    decisions.
+//
+//    May throw an exception if the IL is malformed.
+//
+//    jumpTarget[N] is set to a JT_* value if IL offset N is a
+//    jump target in the method.
+//
+//    Also sets lvAddrExposed and lvArgWrite in lvaTable[].
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+
+void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* jumpTarget)
+{
+    const BYTE* codeBegp = codeAddr;
+    const BYTE* codeEndp = codeAddr + codeSize;
+    unsigned    varNum;
+    bool        seenJump = false;
+    var_types   varType  = DUMMY_INIT(TYP_UNDEF); // TYP_ type
+    typeInfo    ti;                               // Verifier type.
+    bool        typeIsNormed = false;
+    FgStack     pushedStack;
+    const bool  isForceInline          = (info.compFlags & CORINFO_FLG_FORCEINLINE) != 0;
+    const bool  makeInlineObservations = (compInlineResult != nullptr);
+    const bool  isInlining             = compIsForInlining();
+
+    if (makeInlineObservations)
+    {
+        // Observe force inline state and code size.
+        compInlineResult->NoteBool(InlineObservation::CALLEE_IS_FORCE_INLINE, isForceInline);
+        compInlineResult->NoteInt(InlineObservation::CALLEE_IL_CODE_SIZE, codeSize);
+
+#ifdef DEBUG
+
+        // If inlining, this method should still be a candidate.
+        if (isInlining)
+        {
+            assert(compInlineResult->IsCandidate());
+        }
+
+#endif // DEBUG
+
+        // note that we're starting to look at the opcodes.
+        compInlineResult->Note(InlineObservation::CALLEE_BEGIN_OPCODE_SCAN);
+    }
+
+    while (codeAddr < codeEndp)
+    {
+        OPCODE opcode = (OPCODE)getU1LittleEndian(codeAddr);
+        codeAddr += sizeof(__int8);
+        opts.instrCount++;
+        typeIsNormed = false;
+
+    DECODE_OPCODE:
+
+        if (opcode >= CEE_COUNT)
+        {
+            BADCODE3("Illegal opcode", ": %02X", (int)opcode);
+        }
+
+        if ((opcode >= CEE_LDARG_0 && opcode <= CEE_STLOC_S) || (opcode >= CEE_LDARG && opcode <= CEE_STLOC))
+        {
+            opts.lvRefCount++;
+        }
+
+        if (makeInlineObservations && (opcode >= CEE_LDNULL) && (opcode <= CEE_LDC_R8))
+        {
+            pushedStack.PushConstant();
+        }
+
+        unsigned sz = opcodeSizes[opcode];
+
+        switch (opcode)
+        {
+            case CEE_PREFIX1:
+            {
+                if (codeAddr >= codeEndp)
+                {
+                    goto TOO_FAR;
+                }
+                opcode = (OPCODE)(256 + getU1LittleEndian(codeAddr));
+                codeAddr += sizeof(__int8);
+                goto DECODE_OPCODE;
+            }
+
+            case CEE_PREFIX2:
+            case CEE_PREFIX3:
+            case CEE_PREFIX4:
+            case CEE_PREFIX5:
+            case CEE_PREFIX6:
+            case CEE_PREFIX7:
+            case CEE_PREFIXREF:
+            {
+                BADCODE3("Illegal opcode", ": %02X", (int)opcode);
+            }
+
+            case CEE_CALL:
+            case CEE_CALLVIRT:
+            {
+                // There has to be code after the call, otherwise the inlinee is unverifiable.
+                if (isInlining)
+                {
+
+                    noway_assert(codeAddr < codeEndp - sz);
+                }
+
+                // If the method has a call followed by a ret, assume that
+                // it is a wrapper method.
+                if (makeInlineObservations)
+                {
+                    if ((OPCODE)getU1LittleEndian(codeAddr + sz) == CEE_RET)
+                    {
+                        compInlineResult->Note(InlineObservation::CALLEE_LOOKS_LIKE_WRAPPER);
+                    }
+                }
+            }
+            break;
+
+            case CEE_LEAVE:
+            case CEE_LEAVE_S:
+            case CEE_BR:
+            case CEE_BR_S:
+            case CEE_BRFALSE:
+            case CEE_BRFALSE_S:
+            case CEE_BRTRUE:
+            case CEE_BRTRUE_S:
+            case CEE_BEQ:
+            case CEE_BEQ_S:
+            case CEE_BGE:
+            case CEE_BGE_S:
+            case CEE_BGE_UN:
+            case CEE_BGE_UN_S:
+            case CEE_BGT:
+            case CEE_BGT_S:
+            case CEE_BGT_UN:
+            case CEE_BGT_UN_S:
+            case CEE_BLE:
+            case CEE_BLE_S:
+            case CEE_BLE_UN:
+            case CEE_BLE_UN_S:
+            case CEE_BLT:
+            case CEE_BLT_S:
+            case CEE_BLT_UN:
+            case CEE_BLT_UN_S:
+            case CEE_BNE_UN:
+            case CEE_BNE_UN_S:
+            {
+                seenJump = true;
+
+                if (codeAddr > codeEndp - sz)
+                {
+                    goto TOO_FAR;
+                }
+
+                // Compute jump target address
+                signed jmpDist = (sz == 1) ? getI1LittleEndian(codeAddr) : getI4LittleEndian(codeAddr);
+
+                if (compIsForInlining() && jmpDist == 0 &&
+                    (opcode == CEE_LEAVE || opcode == CEE_LEAVE_S || opcode == CEE_BR || opcode == CEE_BR_S))
+                {
+                    break; /* NOP */
+                }
+
+                unsigned jmpAddr = (IL_OFFSET)(codeAddr - codeBegp) + sz + jmpDist;
+
+                // Make sure target is reasonable
+                if (jmpAddr >= codeSize)
+                {
+                    BADCODE3("code jumps to outer space", " at offset %04X", (IL_OFFSET)(codeAddr - codeBegp));
+                }
+
+                // Mark the jump target
+                fgMarkJumpTarget(jumpTarget, jmpAddr);
+
+                // See if jump might be sensitive to inlining
+                if (makeInlineObservations && (opcode != CEE_BR_S) && (opcode != CEE_BR))
+                {
+                    fgObserveInlineConstants(opcode, pushedStack, isInlining);
+                }
+            }
+            break;
+
+            case CEE_SWITCH:
+            {
+                seenJump = true;
+
+                if (makeInlineObservations)
+                {
+                    compInlineResult->Note(InlineObservation::CALLEE_HAS_SWITCH);
+
+                    // Fail fast, if we're inlining and can't handle this.
+                    if (isInlining && compInlineResult->IsFailure())
+                    {
+                        return;
+                    }
+                }
+
+                // Make sure we don't go past the end reading the number of cases
+                if (codeAddr > codeEndp - sizeof(DWORD))
+                {
+                    goto TOO_FAR;
+                }
+
+                // Read the number of cases
+                unsigned jmpCnt = getU4LittleEndian(codeAddr);
+                codeAddr += sizeof(DWORD);
+
+                if (jmpCnt > codeSize / sizeof(DWORD))
+                {
+                    goto TOO_FAR;
+                }
+
+                // Find the end of the switch table
+                unsigned jmpBase = (unsigned)((codeAddr - codeBegp) + jmpCnt * sizeof(DWORD));
+
+                // Make sure there is more code after the switch
+                if (jmpBase >= codeSize)
+                {
+                    goto TOO_FAR;
+                }
+
+                // jmpBase is also the target of the default case, so mark it
+                fgMarkJumpTarget(jumpTarget, jmpBase);
+
+                // Process table entries
+                while (jmpCnt > 0)
+                {
+                    unsigned jmpAddr = jmpBase + getI4LittleEndian(codeAddr);
+                    codeAddr += 4;
+
+                    if (jmpAddr >= codeSize)
+                    {
+                        BADCODE3("jump target out of range", " at offset %04X", (IL_OFFSET)(codeAddr - codeBegp));
+                    }
+
+                    fgMarkJumpTarget(jumpTarget, jmpAddr);
+                    jmpCnt--;
+                }
+
+                // We've advanced past all the bytes in this instruction
+                sz = 0;
+            }
+            break;
+
+            case CEE_UNALIGNED:
+            case CEE_CONSTRAINED:
+            case CEE_READONLY:
+            case CEE_VOLATILE:
+            case CEE_TAILCALL:
+            {
+                if (codeAddr >= codeEndp)
+                {
+                    goto TOO_FAR;
+                }
+            }
+            break;
+
+            case CEE_STARG:
+            case CEE_STARG_S:
+            {
+                noway_assert(sz == sizeof(BYTE) || sz == sizeof(WORD));
+
+                if (codeAddr > codeEndp - sz)
+                {
+                    goto TOO_FAR;
+                }
+
+                varNum = (sz == sizeof(BYTE)) ? getU1LittleEndian(codeAddr) : getU2LittleEndian(codeAddr);
+                varNum = compMapILargNum(varNum); // account for possible hidden param
+
+                // This check is only intended to prevent an AV.  Bad varNum values will later
+                // be handled properly by the verifier.
+                if (varNum < lvaTableCnt)
+                {
+                    if (isInlining)
+                    {
+                        impInlineInfo->inlArgInfo[varNum].argHasStargOp = true;
+                    }
+                    else
+                    {
+                        // In non-inline cases, note written-to locals.
+                        lvaTable[varNum].lvArgWrite = 1;
+                    }
+                }
+            }
+            break;
+
+            case CEE_LDARGA:
+            case CEE_LDARGA_S:
+            case CEE_LDLOCA:
+            case CEE_LDLOCA_S:
+            {
+                // Handle address-taken args or locals
+                noway_assert(sz == sizeof(BYTE) || sz == sizeof(WORD));
+
+                if (codeAddr > codeEndp - sz)
+                {
+                    goto TOO_FAR;
+                }
+
+                varNum = (sz == sizeof(BYTE)) ? getU1LittleEndian(codeAddr) : getU2LittleEndian(codeAddr);
+
+                if (isInlining)
+                {
+                    if (opcode == CEE_LDLOCA || opcode == CEE_LDLOCA_S)
+                    {
+                        varType = impInlineInfo->lclVarInfo[varNum + impInlineInfo->argCnt].lclTypeInfo;
+                        ti      = impInlineInfo->lclVarInfo[varNum + impInlineInfo->argCnt].lclVerTypeInfo;
+
+                        impInlineInfo->lclVarInfo[varNum + impInlineInfo->argCnt].lclHasLdlocaOp = true;
+                    }
+                    else
+                    {
+                        noway_assert(opcode == CEE_LDARGA || opcode == CEE_LDARGA_S);
+
+                        varType = impInlineInfo->lclVarInfo[varNum].lclTypeInfo;
+                        ti      = impInlineInfo->lclVarInfo[varNum].lclVerTypeInfo;
+
+                        impInlineInfo->inlArgInfo[varNum].argHasLdargaOp = true;
+
+                        pushedStack.PushArgument(varNum);
+                    }
+                }
+                else
+                {
+                    if (opcode == CEE_LDLOCA || opcode == CEE_LDLOCA_S)
+                    {
+                        if (varNum >= info.compMethodInfo->locals.numArgs)
+                        {
+                            BADCODE("bad local number");
+                        }
+
+                        varNum += info.compArgsCount;
+                    }
+                    else
+                    {
+                        noway_assert(opcode == CEE_LDARGA || opcode == CEE_LDARGA_S);
+
+                        if (varNum >= info.compILargsCount)
+                        {
+                            BADCODE("bad argument number");
+                        }
+
+                        varNum = compMapILargNum(varNum); // account for possible hidden param
+                    }
+
+                    varType = (var_types)lvaTable[varNum].lvType;
+                    ti      = lvaTable[varNum].lvVerTypeInfo;
+
+                    // Determine if the next instruction will consume
+                    // the address. If so we won't mark this var as
+                    // address taken.
+                    //
+                    // We will put structs on the stack and changing
+                    // the addrTaken of a local requires an extra pass
+                    // in the morpher so we won't apply this
+                    // optimization to structs.
+                    //
+                    // Debug code spills for every IL instruction, and
+                    // therefore it will split statements, so we will
+                    // need the address.  Note that this optimization
+                    // is based in that we know what trees we will
+                    // generate for this ldfld, and we require that we
+                    // won't need the address of this local at all
+                    noway_assert(varNum < lvaTableCnt);
+
+                    const bool notStruct    = !varTypeIsStruct(&lvaTable[varNum]);
+                    const bool notLastInstr = (codeAddr < codeEndp - sz);
+                    const bool notDebugCode = !opts.compDbgCode;
+
+                    if (notStruct && notLastInstr && notDebugCode &&
+                        impILConsumesAddr(codeAddr + sz, impTokenLookupContextHandle, info.compScopeHnd))
+                    {
+                        // We can skip the addrtaken, as next IL instruction consumes
+                        // the address.
+                    }
+                    else
+                    {
+                        lvaTable[varNum].lvHasLdAddrOp = 1;
+                        if (!info.compIsStatic && (varNum == 0))
+                        {
+                            // Addr taken on "this" pointer is significant,
+                            // go ahead to mark it as permanently addr-exposed here.
+                            lvaSetVarAddrExposed(0);
+                            // This may be conservative, but probably not very.
+                        }
+                    }
+                } // isInlining
+
+                typeIsNormed = ti.IsValueClass() && !varTypeIsStruct(varType);
+            }
+            break;
+
+            case CEE_JMP:
+
+#if !defined(_TARGET_X86_) && !defined(_TARGET_ARM_)
+                if (!isInlining)
+                {
+                    // We transform this into a set of ldarg's + tail call and
+                    // thus may push more onto the stack than originally thought.
+                    // This doesn't interfere with verification because CEE_JMP
+                    // is never verifiable, and there's nothing unsafe you can
+                    // do with a an IL stack overflow if the JIT is expecting it.
+                    info.compMaxStack = max(info.compMaxStack, info.compILargsCount);
+                    break;
+                }
+#endif // !_TARGET_X86_ && !_TARGET_ARM_
+
+                // If we are inlining, we need to fail for a CEE_JMP opcode, just like
+                // the list of other opcodes (for all platforms).
+
+                __fallthrough;
+
+            case CEE_CALLI:
+            case CEE_LOCALLOC:
+            case CEE_MKREFANY:
+            case CEE_RETHROW:
+                // CEE_CALLI should not be inlined because the JIT cannot generate an inlined call frame. If the call
+                // target
+                // is a no-marshal CALLI P/Invoke we end up calling the IL stub. We don't NGEN these stubs, so we'll
+                // have to
+                // JIT an IL stub for a trivial func. It's almost certainly a better choice to leave out the inline
+                // candidate so we can generate an inlined call frame. It might be nice to call getCallInfo to figure
+                // out
+                // what kind of call we have here.
+
+                // Consider making this only for not force inline.
+                if (makeInlineObservations)
+                {
+                    // Arguably this should be NoteFatal, but the legacy behavior is
+                    // to ignore this for the prejit root.
+                    compInlineResult->Note(InlineObservation::CALLEE_UNSUPPORTED_OPCODE);
+
+                    // Fail fast if we're inlining...
+                    if (isInlining)
+                    {
+                        assert(compInlineResult->IsFailure());
+                        return;
+                    }
+                }
+                break;
+
+            case CEE_LDARG_0:
+            case CEE_LDARG_1:
+            case CEE_LDARG_2:
+            case CEE_LDARG_3:
+                if (makeInlineObservations)
+                {
+                    pushedStack.PushArgument(opcode - CEE_LDARG_0);
+                }
+                break;
+
+            case CEE_LDARG_S:
+            case CEE_LDARG:
+            {
+                if (codeAddr > codeEndp - sz)
+                {
+                    goto TOO_FAR;
+                }
+
+                varNum = (sz == sizeof(BYTE)) ? getU1LittleEndian(codeAddr) : getU2LittleEndian(codeAddr);
+
+                if (makeInlineObservations)
+                {
+                    pushedStack.PushArgument(varNum);
+                }
+            }
+            break;
+
+            case CEE_LDLEN:
+                if (makeInlineObservations)
+                {
+                    pushedStack.PushArrayLen();
+                }
+                break;
+
+            case CEE_CEQ:
+            case CEE_CGT:
+            case CEE_CGT_UN:
+            case CEE_CLT:
+            case CEE_CLT_UN:
+                if (makeInlineObservations)
+                {
+                    fgObserveInlineConstants(opcode, pushedStack, isInlining);
+                }
+                break;
+
+            default:
+                break;
+        }
+
+        // Skip any remaining operands this opcode may have
+        codeAddr += sz;
+
+        // Note the opcode we just saw
+        if (makeInlineObservations)
+        {
+            InlineObservation obs =
+                typeIsNormed ? InlineObservation::CALLEE_OPCODE_NORMED : InlineObservation::CALLEE_OPCODE;
+            compInlineResult->NoteInt(obs, opcode);
+        }
+    }
+
+    if (codeAddr != codeEndp)
+    {
+    TOO_FAR:
+        BADCODE3("Code ends in the middle of an opcode, or there is a branch past the end of the method",
+                 " at offset %04X", (IL_OFFSET)(codeAddr - codeBegp));
+    }
+
+    if (makeInlineObservations)
+    {
+        compInlineResult->Note(InlineObservation::CALLEE_END_OPCODE_SCAN);
+
+        // If the inline is viable and discretionary, do the
+        // profitability screening.
+        if (compInlineResult->IsDiscretionaryCandidate())
+        {
+            // Make some callsite specific observations that will feed
+            // into the profitability model.
+            impMakeDiscretionaryInlineObservations(impInlineInfo, compInlineResult);
+
+            // None of those observations should have changed the
+            // inline's viability.
+            assert(compInlineResult->IsCandidate());
+
+            if (isInlining)
+            {
+                // Assess profitability...
+                CORINFO_METHOD_INFO* methodInfo = &impInlineInfo->inlineCandidateInfo->methInfo;
+                compInlineResult->DetermineProfitability(methodInfo);
+
+                if (compInlineResult->IsFailure())
+                {
+                    impInlineRoot()->m_inlineStrategy->NoteUnprofitable();
+                    JITDUMP("\n\nInline expansion aborted, inline not profitable\n");
+                    return;
+                }
+                else
+                {
+                    // The inline is still viable.
+                    assert(compInlineResult->IsCandidate());
+                }
+            }
+            else
+            {
+                // Prejit root case. Profitability assessment for this
+                // is done over in compCompileHelper.
+            }
+        }
+    }
+
+    // None of the local vars in the inlinee should have address taken or been written to.
+    // Therefore we should NOT need to enter this "if" statement.
+    if (!isInlining && !info.compIsStatic)
+    {
+        fgAdjustForAddressExposedOrWrittenThis();
+    }
+}
+
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+//------------------------------------------------------------------------
+// fgAdjustForAddressExposedOrWrittenThis: update var table for cases
+//   where the this pointer value can change.
+//
+// Notes:
+//    Modifies lvaArg0Var to refer to a temp if the value of 'this' can
+//    change. The original this (info.compThisArg) then remains
+//    unmodified in the method.  fgAddInternal is reponsible for
+//    adding the code to copy the initial this into the temp.
+
+void Compiler::fgAdjustForAddressExposedOrWrittenThis()
+{
+    // Optionally enable adjustment during stress.
+    if (!tiVerificationNeeded && compStressCompile(STRESS_GENERIC_VARN, 15))
+    {
+        lvaTable[info.compThisArg].lvArgWrite = true;
+    }
+
+    // If this is exposed or written to, create a temp for the modifiable this
+    if (lvaTable[info.compThisArg].lvAddrExposed || lvaTable[info.compThisArg].lvArgWrite)
+    {
+        // If there is a "ldarga 0" or "starg 0", grab and use the temp.
+        lvaArg0Var = lvaGrabTemp(false DEBUGARG("Address-exposed, or written this pointer"));
+        noway_assert(lvaArg0Var > (unsigned)info.compThisArg);
+        lvaTable[lvaArg0Var].lvType            = lvaTable[info.compThisArg].TypeGet();
+        lvaTable[lvaArg0Var].lvAddrExposed     = lvaTable[info.compThisArg].lvAddrExposed;
+        lvaTable[lvaArg0Var].lvDoNotEnregister = lvaTable[info.compThisArg].lvDoNotEnregister;
+#ifdef DEBUG
+        lvaTable[lvaArg0Var].lvVMNeedsStackAddr = lvaTable[info.compThisArg].lvVMNeedsStackAddr;
+        lvaTable[lvaArg0Var].lvLiveInOutOfHndlr = lvaTable[info.compThisArg].lvLiveInOutOfHndlr;
+        lvaTable[lvaArg0Var].lvLclFieldExpr     = lvaTable[info.compThisArg].lvLclFieldExpr;
+        lvaTable[lvaArg0Var].lvLiveAcrossUCall  = lvaTable[info.compThisArg].lvLiveAcrossUCall;
+#endif
+        lvaTable[lvaArg0Var].lvArgWrite    = lvaTable[info.compThisArg].lvArgWrite;
+        lvaTable[lvaArg0Var].lvVerTypeInfo = lvaTable[info.compThisArg].lvVerTypeInfo;
+
+        // Clear the TI_FLAG_THIS_PTR in the original 'this' pointer.
+        noway_assert(lvaTable[lvaArg0Var].lvVerTypeInfo.IsThisPtr());
+        lvaTable[info.compThisArg].lvVerTypeInfo.ClearThisPtr();
+        lvaTable[info.compThisArg].lvAddrExposed = false;
+        lvaTable[info.compThisArg].lvArgWrite    = false;
+    }
+}
+
+//------------------------------------------------------------------------
+// fgObserveInlineConstants: look for operations that might get optimized
+//   if this method were to be inlined, and report these to the inliner.
+//
+// Arguments:
+//    opcode     -- MSIL opcode under consideration
+//    stack      -- abstract stack model at this point in the IL
+//    isInlining -- true if we're inlining (vs compiling a prejit root)
+//
+// Notes:
+//    Currently only invoked on compare and branch opcodes.
+//
+//    If we're inlining we also look at the argument values supplied by
+//    the caller at this call site.
+//
+//    The crude stack model may overestimate stack depth.
+
+void Compiler::fgObserveInlineConstants(OPCODE opcode, const FgStack& stack, bool isInlining)
+{
+    // We should be able to record inline observations.
+    assert(compInlineResult != nullptr);
+
+    // The stack only has to be 1 deep for BRTRUE/FALSE
+    bool lookForBranchCases = stack.IsStackAtLeastOneDeep();
+
+    if (compInlineResult->UsesLegacyPolicy())
+    {
+        // LegacyPolicy misses cases where the stack is really one
+        // deep but the model says it's two deep. We need to do
+        // likewise to preseve old behavior.
+        lookForBranchCases &= !stack.IsStackTwoDeep();
+    }
+
+    if (lookForBranchCases)
+    {
+        if (opcode == CEE_BRFALSE || opcode == CEE_BRFALSE_S || opcode == CEE_BRTRUE || opcode == CEE_BRTRUE_S)
+        {
+            unsigned slot0 = stack.GetSlot0();
+            if (FgStack::IsArgument(slot0))
+            {
+                compInlineResult->Note(InlineObservation::CALLEE_ARG_FEEDS_CONSTANT_TEST);
+
+                if (isInlining)
+                {
+                    // Check for the double whammy of an incoming constant argument
+                    // feeding a constant test.
+                    unsigned varNum = FgStack::SlotTypeToArgNum(slot0);
+                    if (impInlineInfo->inlArgInfo[varNum].argNode->OperIsConst())
+                    {
+                        compInlineResult->Note(InlineObservation::CALLSITE_CONSTANT_ARG_FEEDS_TEST);
+                    }
+                }
+            }
+
+            return;
+        }
+    }
+
+    // Remaining cases require at least two things on the stack.
+    if (!stack.IsStackTwoDeep())
+    {
+        return;
+    }
+
+    unsigned slot0 = stack.GetSlot0();
+    unsigned slot1 = stack.GetSlot1();
+
+    // Arg feeds constant test
+    if ((FgStack::IsConstant(slot0) && FgStack::IsArgument(slot1)) ||
+        (FgStack::IsConstant(slot1) && FgStack::IsArgument(slot0)))
+    {
+        compInlineResult->Note(InlineObservation::CALLEE_ARG_FEEDS_CONSTANT_TEST);
+    }
+
+    // Arg feeds range check
+    if ((FgStack::IsArrayLen(slot0) && FgStack::IsArgument(slot1)) ||
+        (FgStack::IsArrayLen(slot1) && FgStack::IsArgument(slot0)))
+    {
+        compInlineResult->Note(InlineObservation::CALLEE_ARG_FEEDS_RANGE_CHECK);
+    }
+
+    // Check for an incoming arg that's a constant
+    if (isInlining)
+    {
+        if (FgStack::IsArgument(slot0))
+        {
+            unsigned varNum = FgStack::SlotTypeToArgNum(slot0);
+            if (impInlineInfo->inlArgInfo[varNum].argNode->OperIsConst())
+            {
+                compInlineResult->Note(InlineObservation::CALLSITE_CONSTANT_ARG_FEEDS_TEST);
+            }
+        }
+
+        if (FgStack::IsArgument(slot1))
+        {
+            unsigned varNum = FgStack::SlotTypeToArgNum(slot1);
+            if (impInlineInfo->inlArgInfo[varNum].argNode->OperIsConst())
+            {
+                compInlineResult->Note(InlineObservation::CALLSITE_CONSTANT_ARG_FEEDS_TEST);
+            }
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Finally link up the bbJumpDest of the blocks together
+ */
+
+void Compiler::fgMarkBackwardJump(BasicBlock* startBlock, BasicBlock* endBlock)
+{
+    noway_assert(startBlock->bbNum <= endBlock->bbNum);
+
+    for (BasicBlock* block = startBlock; block != endBlock->bbNext; block = block->bbNext)
+    {
+        if ((block->bbFlags & BBF_BACKWARD_JUMP) == 0)
+        {
+            block->bbFlags |= BBF_BACKWARD_JUMP;
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Finally link up the bbJumpDest of the blocks together
+ */
+
+void Compiler::fgLinkBasicBlocks()
+{
+    /* Create the basic block lookup tables */
+
+    fgInitBBLookup();
+
+    /* First block is always reachable */
+
+    fgFirstBB->bbRefs = 1;
+
+    /* Walk all the basic blocks, filling in the target addresses */
+
+    for (BasicBlock* curBBdesc = fgFirstBB; curBBdesc; curBBdesc = curBBdesc->bbNext)
+    {
+        switch (curBBdesc->bbJumpKind)
+        {
+            case BBJ_COND:
+            case BBJ_ALWAYS:
+            case BBJ_LEAVE:
+                curBBdesc->bbJumpDest = fgLookupBB(curBBdesc->bbJumpOffs);
+                curBBdesc->bbJumpDest->bbRefs++;
+                if (curBBdesc->bbJumpDest->bbNum <= curBBdesc->bbNum)
+                {
+                    fgMarkBackwardJump(curBBdesc->bbJumpDest, curBBdesc);
+                }
+
+                /* Is the next block reachable? */
+
+                if (curBBdesc->bbJumpKind == BBJ_ALWAYS || curBBdesc->bbJumpKind == BBJ_LEAVE)
+                {
+                    break;
+                }
+
+                if (!curBBdesc->bbNext)
+                {
+                    BADCODE("Fall thru the end of a method");
+                }
+
+            // Fall through, the next block is also reachable
+
+            case BBJ_NONE:
+                curBBdesc->bbNext->bbRefs++;
+                break;
+
+            case BBJ_EHFINALLYRET:
+            case BBJ_EHFILTERRET:
+            case BBJ_THROW:
+            case BBJ_RETURN:
+                break;
+
+            case BBJ_SWITCH:
+
+                unsigned jumpCnt;
+                jumpCnt = curBBdesc->bbJumpSwt->bbsCount;
+                BasicBlock** jumpPtr;
+                jumpPtr = curBBdesc->bbJumpSwt->bbsDstTab;
+
+                do
+                {
+                    *jumpPtr = fgLookupBB((unsigned)*(size_t*)jumpPtr);
+                    (*jumpPtr)->bbRefs++;
+                    if ((*jumpPtr)->bbNum <= curBBdesc->bbNum)
+                    {
+                        fgMarkBackwardJump(*jumpPtr, curBBdesc);
+                    }
+                } while (++jumpPtr, --jumpCnt);
+
+                /* Default case of CEE_SWITCH (next block), is at end of jumpTab[] */
+
+                noway_assert(*(jumpPtr - 1) == curBBdesc->bbNext);
+                break;
+
+            case BBJ_CALLFINALLY: // BBJ_CALLFINALLY and BBJ_EHCATCHRET don't appear until later
+            case BBJ_EHCATCHRET:
+            default:
+                noway_assert(!"Unexpected bbJumpKind");
+                break;
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Walk the instrs to create the basic blocks.
+ */
+
+void Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* jumpTarget)
+{
+    const BYTE* codeBegp = codeAddr;
+    const BYTE* codeEndp = codeAddr + codeSize;
+    bool        tailCall = false;
+    unsigned    curBBoffs;
+    BasicBlock* curBBdesc;
+
+    /* Clear the beginning offset for the first BB */
+
+    curBBoffs = 0;
+
+#ifdef DEBUGGING_SUPPORT
+    if (opts.compDbgCode && (info.compVarScopesCount > 0))
+    {
+        compResetScopeLists();
+
+        // Ignore scopes beginning at offset 0
+        while (compGetNextEnterScope(0))
+        { /* do nothing */
+        }
+        while (compGetNextExitScope(0))
+        { /* do nothing */
+        }
+    }
+#endif
+
+    BBjumpKinds jmpKind;
+
+    do
+    {
+        OPCODE     opcode;
+        unsigned   sz;
+        unsigned   jmpAddr = DUMMY_INIT(BAD_IL_OFFSET);
+        unsigned   bbFlags = 0;
+        BBswtDesc* swtDsc  = nullptr;
+        unsigned   nxtBBoffs;
+
+        opcode = (OPCODE)getU1LittleEndian(codeAddr);
+        codeAddr += sizeof(__int8);
+        jmpKind = BBJ_NONE;
+
+    DECODE_OPCODE:
+
+        /* Get the size of additional parameters */
+
+        noway_assert(opcode < CEE_COUNT);
+
+        sz = opcodeSizes[opcode];
+
+        switch (opcode)
+        {
+            signed jmpDist;
+
+            case CEE_PREFIX1:
+                if (jumpTarget[codeAddr - codeBegp] != JT_NONE)
+                {
+                    BADCODE3("jump target between prefix 0xFE and opcode", " at offset %04X",
+                             (IL_OFFSET)(codeAddr - codeBegp));
+                }
+
+                opcode = (OPCODE)(256 + getU1LittleEndian(codeAddr));
+                codeAddr += sizeof(__int8);
+                goto DECODE_OPCODE;
+
+            /* Check to see if we have a jump/return opcode */
+
+            case CEE_BRFALSE:
+            case CEE_BRFALSE_S:
+            case CEE_BRTRUE:
+            case CEE_BRTRUE_S:
+
+            case CEE_BEQ:
+            case CEE_BEQ_S:
+            case CEE_BGE:
+            case CEE_BGE_S:
+            case CEE_BGE_UN:
+            case CEE_BGE_UN_S:
+            case CEE_BGT:
+            case CEE_BGT_S:
+            case CEE_BGT_UN:
+            case CEE_BGT_UN_S:
+            case CEE_BLE:
+            case CEE_BLE_S:
+            case CEE_BLE_UN:
+            case CEE_BLE_UN_S:
+            case CEE_BLT:
+            case CEE_BLT_S:
+            case CEE_BLT_UN:
+            case CEE_BLT_UN_S:
+            case CEE_BNE_UN:
+            case CEE_BNE_UN_S:
+
+                jmpKind = BBJ_COND;
+                goto JMP;
+
+            case CEE_LEAVE:
+            case CEE_LEAVE_S:
+
+                // We need to check if we are jumping out of a finally-protected try.
+                jmpKind = BBJ_LEAVE;
+                goto JMP;
+
+            case CEE_BR:
+            case CEE_BR_S:
+                jmpKind = BBJ_ALWAYS;
+                goto JMP;
+
+            JMP:
+
+                /* Compute the target address of the jump */
+
+                jmpDist = (sz == 1) ? getI1LittleEndian(codeAddr) : getI4LittleEndian(codeAddr);
+
+                if (compIsForInlining() && jmpDist == 0 && (opcode == CEE_BR || opcode == CEE_BR_S))
+                {
+                    continue; /* NOP */
+                }
+
+                jmpAddr = (IL_OFFSET)(codeAddr - codeBegp) + sz + jmpDist;
+                break;
+
+            case CEE_SWITCH:
+            {
+                unsigned jmpBase;
+                unsigned jmpCnt; // # of switch cases (excluding defualt)
+
+                BasicBlock** jmpTab;
+                BasicBlock** jmpPtr;
+
+                /* Allocate the switch descriptor */
+
+                swtDsc = new (this, CMK_BasicBlock) BBswtDesc;
+
+                /* Read the number of entries in the table */
+
+                jmpCnt = getU4LittleEndian(codeAddr);
+                codeAddr += 4;
+
+                /* Compute  the base offset for the opcode */
+
+                jmpBase = (IL_OFFSET)((codeAddr - codeBegp) + jmpCnt * sizeof(DWORD));
+
+                /* Allocate the jump table */
+
+                jmpPtr = jmpTab = new (this, CMK_BasicBlock) BasicBlock*[jmpCnt + 1];
+
+                /* Fill in the jump table */
+
+                for (unsigned count = jmpCnt; count; count--)
+                {
+                    jmpDist = getI4LittleEndian(codeAddr);
+                    codeAddr += 4;
+
+                    // store the offset in the pointer.  We change these in fgLinkBasicBlocks().
+                    *jmpPtr++ = (BasicBlock*)(size_t)(jmpBase + jmpDist);
+                }
+
+                /* Append the default label to the target table */
+
+                *jmpPtr++ = (BasicBlock*)(size_t)jmpBase;
+
+                /* Make sure we found the right number of labels */
+
+                noway_assert(jmpPtr == jmpTab + jmpCnt + 1);
+
+                /* Compute the size of the switch opcode operands */
+
+                sz = sizeof(DWORD) + jmpCnt * sizeof(DWORD);
+
+                /* Fill in the remaining fields of the switch descriptor */
+
+                swtDsc->bbsCount  = jmpCnt + 1;
+                swtDsc->bbsDstTab = jmpTab;
+
+                /* This is definitely a jump */
+
+                jmpKind     = BBJ_SWITCH;
+                fgHasSwitch = true;
+
+#ifndef LEGACY_BACKEND
+                if (opts.compProcedureSplitting)
+                {
+                    // TODO-CQ: We might need to create a switch table; we won't know for sure until much later.
+                    // However, switch tables don't work with hot/cold splitting, currently. The switch table data needs
+                    // a relocation such that if the base (the first block after the prolog) and target of the switch
+                    // branch are put in different sections, the difference stored in the table is updated. However, our
+                    // relocation implementation doesn't support three different pointers (relocation address, base, and
+                    // target). So, we need to change our switch table implementation to be more like
+                    // JIT64: put the table in the code section, in the same hot/cold section as the switch jump itself
+                    // (maybe immediately after the switch jump), and make the "base" address be also in that section,
+                    // probably the address after the switch jump.
+                    opts.compProcedureSplitting = false;
+                    JITDUMP("Turning off procedure splitting for this method, as it might need switch tables; "
+                            "implementation limitation.\n");
+                }
+#endif // !LEGACY_BACKEND
+            }
+                goto GOT_ENDP;
+
+            case CEE_ENDFILTER:
+                bbFlags |= BBF_DONT_REMOVE;
+                jmpKind = BBJ_EHFILTERRET;
+                break;
+
+            case CEE_ENDFINALLY:
+                jmpKind = BBJ_EHFINALLYRET;
+                break;
+
+            case CEE_TAILCALL:
+                if (compIsForInlining())
+                {
+                    // TODO-CQ: We can inline some callees with explicit tail calls if we can guarantee that the calls
+                    // can be dispatched as tail calls from the caller.
+                    compInlineResult->NoteFatal(InlineObservation::CALLEE_EXPLICIT_TAIL_PREFIX);
+                    return;
+                }
+
+                __fallthrough;
+
+            case CEE_READONLY:
+            case CEE_CONSTRAINED:
+            case CEE_VOLATILE:
+            case CEE_UNALIGNED:
+                // fgFindJumpTargets should have ruled out this possibility
+                //   (i.e. a prefix opcodes as last intruction in a block)
+                noway_assert(codeAddr < codeEndp);
+
+                if (jumpTarget[codeAddr - codeBegp] != JT_NONE)
+                {
+                    BADCODE3("jump target between prefix and an opcode", " at offset %04X",
+                             (IL_OFFSET)(codeAddr - codeBegp));
+                }
+                break;
+
+            case CEE_CALL:
+            case CEE_CALLVIRT:
+            case CEE_CALLI:
+            {
+                if (compIsForInlining() ||               // Ignore tail call in the inlinee. Period.
+                    (!tailCall && !compTailCallStress()) // A new BB with BBJ_RETURN would have been created
+
+                    // after a tailcall statement.
+                    // We need to keep this invariant if we want to stress the tailcall.
+                    // That way, the potential (tail)call statement is always the last
+                    // statement in the block.
+                    // Otherwise, we will assert at the following line in fgMorphCall()
+                    //     noway_assert(fgMorphStmt->gtNext == NULL);
+                    )
+                {
+                    // Neither .tailcall prefix, no tailcall stress. So move on.
+                    break;
+                }
+
+                // Make sure the code sequence is legal for the tail call.
+                // If so, mark this BB as having a BBJ_RETURN.
+
+                if (codeAddr >= codeEndp - sz)
+                {
+                    BADCODE3("No code found after the call instruction", " at offset %04X",
+                             (IL_OFFSET)(codeAddr - codeBegp));
+                }
+
+                if (tailCall)
+                {
+                    bool isCallPopAndRet = false;
+
+                    // impIsTailCallILPattern uses isRecursive flag to determine whether ret in a fallthrough block is
+                    // allowed. We don't know at this point whether the call is recursive so we conservatively pass
+                    // false. This will only affect explicit tail calls when IL verification is not needed for the
+                    // method.
+                    bool isRecursive = false;
+                    if (!impIsTailCallILPattern(tailCall, opcode, codeAddr + sz, codeEndp, isRecursive,
+                                                &isCallPopAndRet))
+                    {
+#ifdef _TARGET_AMD64_
+                        BADCODE3("tail call not followed by ret or pop+ret", " at offset %04X",
+                                 (IL_OFFSET)(codeAddr - codeBegp));
+#else
+                        BADCODE3("tail call not followed by ret", " at offset %04X", (IL_OFFSET)(codeAddr - codeBegp));
+#endif //_TARGET_AMD64_
+                    }
+
+#ifdef _TARGET_AMD64_
+                    if (isCallPopAndRet)
+                    {
+                        // By breaking here, we let pop and ret opcodes to be
+                        // imported after tail call.  If tail prefix is honored,
+                        // stmts corresponding to pop and ret will be removed
+                        // in fgMorphCall().
+                        break;
+                    }
+#endif //_TARGET_AMD64_
+                }
+                else
+                {
+                    OPCODE nextOpcode = (OPCODE)getU1LittleEndian(codeAddr + sz);
+
+                    if (nextOpcode != CEE_RET)
+                    {
+                        noway_assert(compTailCallStress());
+                        // Next OPCODE is not a CEE_RET, bail the attempt to stress the tailcall.
+                        // (I.e. We will not make a new BB after the "call" statement.)
+                        break;
+                    }
+                }
+            }
+
+            /* For tail call, we just call CORINFO_HELP_TAILCALL, and it jumps to the
+               target. So we don't need an epilog - just like CORINFO_HELP_THROW.
+               Make the block BBJ_RETURN, but we will change it to BBJ_THROW
+               if the tailness of the call is satisfied.
+               NOTE : The next instruction is guaranteed to be a CEE_RET
+               and it will create another BasicBlock. But there may be an
+               jump directly to that CEE_RET. If we want to avoid creating
+               an unnecessary block, we need to check if the CEE_RETURN is
+               the target of a jump.
+             */
+
+            // fall-through
+
+            case CEE_JMP:
+            /* These are equivalent to a return from the current method
+               But instead of directly returning to the caller we jump and
+               execute something else in between */
+            case CEE_RET:
+                jmpKind = BBJ_RETURN;
+                break;
+
+            case CEE_THROW:
+            case CEE_RETHROW:
+                jmpKind = BBJ_THROW;
+                break;
+
+#ifdef DEBUG
+// make certain we did not forget any flow of control instructions
+// by checking the 'ctrl' field in opcode.def. First filter out all
+// non-ctrl instructions
+#define BREAK(name)                                                                                                    \
+    case name:                                                                                                         \
+        break;
+#define NEXT(name)                                                                                                     \
+    case name:                                                                                                         \
+        break;
+#define CALL(name)
+#define THROW(name)
+#undef RETURN // undef contract RETURN macro
+#define RETURN(name)
+#define META(name)
+#define BRANCH(name)
+#define COND_BRANCH(name)
+#define PHI(name)
+
+#define OPDEF(name, string, pop, push, oprType, opcType, l, s1, s2, ctrl) ctrl(name)
+#include "opcode.def"
+#undef OPDEF
+
+#undef PHI
+#undef BREAK
+#undef CALL
+#undef NEXT
+#undef THROW
+#undef RETURN
+#undef META
+#undef BRANCH
+#undef COND_BRANCH
+
+            // These ctrl-flow opcodes don't need any special handling
+            case CEE_NEWOBJ: // CTRL_CALL
+                break;
+
+            // what's left are forgotten instructions
+            default:
+                BADCODE("Unrecognized control Opcode");
+                break;
+#else  // !DEBUG
+            default:
+                break;
+#endif // !DEBUG
+        }
+
+        /* Jump over the operand */
+
+        codeAddr += sz;
+
+    GOT_ENDP:
+
+        tailCall = (opcode == CEE_TAILCALL);
+
+        /* Make sure a jump target isn't in the middle of our opcode */
+
+        if (sz)
+        {
+            IL_OFFSET offs = (IL_OFFSET)(codeAddr - codeBegp) - sz; // offset of the operand
+
+            for (unsigned i = 0; i < sz; i++, offs++)
+            {
+                if (jumpTarget[offs] != JT_NONE)
+                {
+                    BADCODE3("jump into the middle of an opcode", " at offset %04X", (IL_OFFSET)(codeAddr - codeBegp));
+                }
+            }
+        }
+
+        /* Compute the offset of the next opcode */
+
+        nxtBBoffs = (IL_OFFSET)(codeAddr - codeBegp);
+
+#ifdef DEBUGGING_SUPPORT
+
+        bool foundScope = false;
+
+        if (opts.compDbgCode && (info.compVarScopesCount > 0))
+        {
+            while (compGetNextEnterScope(nxtBBoffs))
+            {
+                foundScope = true;
+            }
+            while (compGetNextExitScope(nxtBBoffs))
+            {
+                foundScope = true;
+            }
+        }
+#endif
+
+        /* Do we have a jump? */
+
+        if (jmpKind == BBJ_NONE)
+        {
+            /* No jump; make sure we don't fall off the end of the function */
+
+            if (codeAddr == codeEndp)
+            {
+                BADCODE3("missing return opcode", " at offset %04X", (IL_OFFSET)(codeAddr - codeBegp));
+            }
+
+            /* If a label follows this opcode, we'll have to make a new BB */
+
+            bool makeBlock = (jumpTarget[nxtBBoffs] != JT_NONE);
+
+#ifdef DEBUGGING_SUPPORT
+            if (!makeBlock && foundScope)
+            {
+                makeBlock = true;
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("Splitting at BBoffs = %04u\n", nxtBBoffs);
+                }
+#endif // DEBUG
+            }
+#endif // DEBUGGING_SUPPORT
+
+            if (!makeBlock)
+            {
+                continue;
+            }
+        }
+
+        /* We need to create a new basic block */
+
+        curBBdesc = fgNewBasicBlock(jmpKind);
+
+        curBBdesc->bbFlags |= bbFlags;
+        curBBdesc->bbRefs = 0;
+
+        curBBdesc->bbCodeOffs    = curBBoffs;
+        curBBdesc->bbCodeOffsEnd = nxtBBoffs;
+
+        unsigned profileWeight;
+        if (fgGetProfileWeightForBasicBlock(curBBoffs, &profileWeight))
+        {
+            curBBdesc->setBBProfileWeight(profileWeight);
+            if (profileWeight == 0)
+            {
+                curBBdesc->bbSetRunRarely();
+            }
+            else
+            {
+                // Note that bbNewBasicBlock (called from fgNewBasicBlock) may have
+                // already marked the block as rarely run.  In that case (and when we know
+                // that the block profile weight is non-zero) we want to unmark that.
+
+                curBBdesc->bbFlags &= ~BBF_RUN_RARELY;
+            }
+        }
+
+        switch (jmpKind)
+        {
+            case BBJ_SWITCH:
+                curBBdesc->bbJumpSwt = swtDsc;
+                break;
+
+            case BBJ_COND:
+            case BBJ_ALWAYS:
+            case BBJ_LEAVE:
+                noway_assert(jmpAddr != DUMMY_INIT(BAD_IL_OFFSET));
+                curBBdesc->bbJumpOffs = jmpAddr;
+                break;
+
+            default:
+                break;
+        }
+
+        DBEXEC(verbose, curBBdesc->dspBlockHeader(this, false, false, false));
+
+        /* Remember where the next BB will start */
+
+        curBBoffs = nxtBBoffs;
+    } while (codeAddr < codeEndp);
+
+    noway_assert(codeAddr == codeEndp);
+
+    /* Finally link up the bbJumpDest of the blocks together */
+
+    fgLinkBasicBlocks();
+}
+
+/*****************************************************************************
+ *
+ *  Main entry point to discover the basic blocks for the current function.
+ */
+
+void Compiler::fgFindBasicBlocks()
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In fgFindBasicBlocks() for %s\n", info.compFullName);
+    }
+#endif
+
+    /* Allocate the 'jump target' vector
+     *
+     *  We need one extra byte as we mark
+     *  jumpTarget[info.compILCodeSize] with JT_ADDR
+     *  when we need to add a dummy block
+     *  to record the end of a try or handler region.
+     */
+    BYTE* jumpTarget = new (this, CMK_Unknown) BYTE[info.compILCodeSize + 1];
+    memset(jumpTarget, JT_NONE, info.compILCodeSize + 1);
+    noway_assert(JT_NONE == 0);
+
+    /* Walk the instrs to find all jump targets */
+
+    fgFindJumpTargets(info.compCode, info.compILCodeSize, jumpTarget);
+    if (compDonotInline())
+    {
+        return;
+    }
+
+    unsigned XTnum;
+
+    /* Are there any exception handlers? */
+
+    if (info.compXcptnsCount > 0)
+    {
+        noway_assert(!compIsForInlining());
+
+        /* Check and mark all the exception handlers */
+
+        for (XTnum = 0; XTnum < info.compXcptnsCount; XTnum++)
+        {
+            DWORD             tmpOffset;
+            CORINFO_EH_CLAUSE clause;
+            info.compCompHnd->getEHinfo(info.compMethodHnd, XTnum, &clause);
+            noway_assert(clause.HandlerLength != (unsigned)-1);
+
+            if (clause.TryLength <= 0)
+            {
+                BADCODE("try block length <=0");
+            }
+
+            /* Mark the 'try' block extent and the handler itself */
+
+            if (clause.TryOffset > info.compILCodeSize)
+            {
+                BADCODE("try offset is > codesize");
+            }
+            if (jumpTarget[clause.TryOffset] == JT_NONE)
+            {
+                jumpTarget[clause.TryOffset] = JT_ADDR;
+            }
+
+            tmpOffset = clause.TryOffset + clause.TryLength;
+            if (tmpOffset > info.compILCodeSize)
+            {
+                BADCODE("try end is > codesize");
+            }
+            if (jumpTarget[tmpOffset] == JT_NONE)
+            {
+                jumpTarget[tmpOffset] = JT_ADDR;
+            }
+
+            if (clause.HandlerOffset > info.compILCodeSize)
+            {
+                BADCODE("handler offset > codesize");
+            }
+            if (jumpTarget[clause.HandlerOffset] == JT_NONE)
+            {
+                jumpTarget[clause.HandlerOffset] = JT_ADDR;
+            }
+
+            tmpOffset = clause.HandlerOffset + clause.HandlerLength;
+            if (tmpOffset > info.compILCodeSize)
+            {
+                BADCODE("handler end > codesize");
+            }
+            if (jumpTarget[tmpOffset] == JT_NONE)
+            {
+                jumpTarget[tmpOffset] = JT_ADDR;
+            }
+
+            if (clause.Flags & CORINFO_EH_CLAUSE_FILTER)
+            {
+                if (clause.FilterOffset > info.compILCodeSize)
+                {
+                    BADCODE("filter offset > codesize");
+                }
+                if (jumpTarget[clause.FilterOffset] == JT_NONE)
+                {
+                    jumpTarget[clause.FilterOffset] = JT_ADDR;
+                }
+            }
+        }
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        bool anyJumpTargets = false;
+        printf("Jump targets:\n");
+        for (unsigned i = 0; i < info.compILCodeSize + 1; i++)
+        {
+            if (jumpTarget[i] == JT_NONE)
+            {
+                continue;
+            }
+
+            anyJumpTargets = true;
+            printf("  IL_%04x", i);
+
+            if (jumpTarget[i] & JT_ADDR)
+            {
+                printf(" addr");
+            }
+            if (jumpTarget[i] & JT_MULTI)
+            {
+                printf(" multi");
+            }
+            printf("\n");
+        }
+        if (!anyJumpTargets)
+        {
+            printf("  none\n");
+        }
+    }
+#endif // DEBUG
+
+    /* Now create the basic blocks */
+
+    fgMakeBasicBlocks(info.compCode, info.compILCodeSize, jumpTarget);
+
+    if (compIsForInlining())
+    {
+        if (compInlineResult->IsFailure())
+        {
+            return;
+        }
+
+        bool hasReturnBlocks           = false;
+        bool hasMoreThanOneReturnBlock = false;
+
+        for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+        {
+            if (block->bbJumpKind == BBJ_RETURN)
+            {
+                if (hasReturnBlocks)
+                {
+                    hasMoreThanOneReturnBlock = true;
+                    break;
+                }
+
+                hasReturnBlocks = true;
+            }
+        }
+
+        if (!hasReturnBlocks && !compInlineResult->UsesLegacyPolicy())
+        {
+            //
+            // Mark the call node as "no return". The inliner might ignore CALLEE_DOES_NOT_RETURN and
+            // fail inline for a different reasons. In that case we still want to make the "no return"
+            // information available to the caller as it can impact caller's code quality.
+            //
+
+            impInlineInfo->iciCall->gtCallMoreFlags |= GTF_CALL_M_DOES_NOT_RETURN;
+        }
+
+        compInlineResult->NoteBool(InlineObservation::CALLEE_DOES_NOT_RETURN, !hasReturnBlocks);
+
+        if (compInlineResult->IsFailure())
+        {
+            return;
+        }
+
+        noway_assert(info.compXcptnsCount == 0);
+        compHndBBtab = impInlineInfo->InlinerCompiler->compHndBBtab;
+        compHndBBtabAllocCount =
+            impInlineInfo->InlinerCompiler->compHndBBtabAllocCount; // we probably only use the table, not add to it.
+        compHndBBtabCount    = impInlineInfo->InlinerCompiler->compHndBBtabCount;
+        info.compXcptnsCount = impInlineInfo->InlinerCompiler->info.compXcptnsCount;
+
+        if (info.compRetNativeType != TYP_VOID && hasMoreThanOneReturnBlock)
+        {
+            // The lifetime of this var might expand multiple BBs. So it is a long lifetime compiler temp.
+            lvaInlineeReturnSpillTemp = lvaGrabTemp(false DEBUGARG("Inline candidate multiple BBJ_RETURN spill temp"));
+            lvaTable[lvaInlineeReturnSpillTemp].lvType = info.compRetNativeType;
+        }
+        return;
+    }
+
+    /* Mark all blocks within 'try' blocks as such */
+
+    if (info.compXcptnsCount == 0)
+    {
+        return;
+    }
+
+    if (info.compXcptnsCount > MAX_XCPTN_INDEX)
+    {
+        IMPL_LIMITATION("too many exception clauses");
+    }
+
+    /* Allocate the exception handler table */
+
+    fgAllocEHTable();
+
+    /* Assume we don't need to sort the EH table (such that nested try/catch
+     * appear before their try or handler parent). The EH verifier will notice
+     * when we do need to sort it.
+     */
+
+    fgNeedToSortEHTable = false;
+
+    verInitEHTree(info.compXcptnsCount);
+    EHNodeDsc* initRoot = ehnNext; // remember the original root since
+                                   // it may get modified during insertion
+
+    // Annotate BBs with exception handling information required for generating correct eh code
+    // as well as checking for correct IL
+
+    EHblkDsc* HBtab;
+
+    for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+    {
+        CORINFO_EH_CLAUSE clause;
+        info.compCompHnd->getEHinfo(info.compMethodHnd, XTnum, &clause);
+        noway_assert(clause.HandlerLength != (unsigned)-1); // @DEPRECATED
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            dispIncomingEHClause(XTnum, clause);
+        }
+#endif // DEBUG
+
+        IL_OFFSET tryBegOff    = clause.TryOffset;
+        IL_OFFSET tryEndOff    = tryBegOff + clause.TryLength;
+        IL_OFFSET filterBegOff = 0;
+        IL_OFFSET hndBegOff    = clause.HandlerOffset;
+        IL_OFFSET hndEndOff    = hndBegOff + clause.HandlerLength;
+
+        if (clause.Flags & CORINFO_EH_CLAUSE_FILTER)
+        {
+            filterBegOff = clause.FilterOffset;
+        }
+
+        if (tryEndOff > info.compILCodeSize)
+        {
+            BADCODE3("end of try block beyond end of method for try", " at offset %04X", tryBegOff);
+        }
+        if (hndEndOff > info.compILCodeSize)
+        {
+            BADCODE3("end of hnd block beyond end of method for try", " at offset %04X", tryBegOff);
+        }
+
+        HBtab->ebdTryBegOffset    = tryBegOff;
+        HBtab->ebdTryEndOffset    = tryEndOff;
+        HBtab->ebdFilterBegOffset = filterBegOff;
+        HBtab->ebdHndBegOffset    = hndBegOff;
+        HBtab->ebdHndEndOffset    = hndEndOff;
+
+        /* Convert the various addresses to basic blocks */
+
+        BasicBlock* tryBegBB = fgLookupBB(tryBegOff);
+        BasicBlock* tryEndBB =
+            fgLookupBB(tryEndOff); // note: this can be NULL if the try region is at the end of the function
+        BasicBlock* hndBegBB = fgLookupBB(hndBegOff);
+        BasicBlock* hndEndBB = nullptr;
+        BasicBlock* filtBB   = nullptr;
+        BasicBlock* block;
+
+        //
+        // Assert that the try/hnd beginning blocks are set up correctly
+        //
+        if (tryBegBB == nullptr)
+        {
+            BADCODE("Try Clause is invalid");
+        }
+
+        if (hndBegBB == nullptr)
+        {
+            BADCODE("Handler Clause is invalid");
+        }
+
+        tryBegBB->bbFlags |= BBF_HAS_LABEL;
+        hndBegBB->bbFlags |= BBF_HAS_LABEL | BBF_JMP_TARGET;
+
+#if HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION
+        // This will change the block weight from 0 to 1
+        // and clear the rarely run flag
+        hndBegBB->makeBlockHot();
+#else
+        hndBegBB->bbSetRunRarely();   // handler entry points are rarely executed
+#endif
+
+        if (hndEndOff < info.compILCodeSize)
+        {
+            hndEndBB = fgLookupBB(hndEndOff);
+        }
+
+        if (clause.Flags & CORINFO_EH_CLAUSE_FILTER)
+        {
+            filtBB = HBtab->ebdFilter = fgLookupBB(clause.FilterOffset);
+
+            filtBB->bbCatchTyp = BBCT_FILTER;
+            filtBB->bbFlags |= BBF_HAS_LABEL | BBF_JMP_TARGET;
+
+            hndBegBB->bbCatchTyp = BBCT_FILTER_HANDLER;
+
+#if HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION
+            // This will change the block weight from 0 to 1
+            // and clear the rarely run flag
+            filtBB->makeBlockHot();
+#else
+            filtBB->bbSetRunRarely(); // filter entry points are rarely executed
+#endif
+
+            // Mark all BBs that belong to the filter with the XTnum of the corresponding handler
+            for (block = filtBB; /**/; block = block->bbNext)
+            {
+                if (block == nullptr)
+                {
+                    BADCODE3("Missing endfilter for filter", " at offset %04X", filtBB->bbCodeOffs);
+                    return;
+                }
+
+                // Still inside the filter
+                block->setHndIndex(XTnum);
+
+                if (block->bbJumpKind == BBJ_EHFILTERRET)
+                {
+                    // Mark catch handler as successor.
+                    block->bbJumpDest = hndBegBB;
+                    assert(block->bbJumpDest->bbCatchTyp == BBCT_FILTER_HANDLER);
+                    break;
+                }
+            }
+
+            if (!block->bbNext || block->bbNext != hndBegBB)
+            {
+                BADCODE3("Filter does not immediately precede handler for filter", " at offset %04X",
+                         filtBB->bbCodeOffs);
+            }
+        }
+        else
+        {
+            HBtab->ebdTyp = clause.ClassToken;
+
+            /* Set bbCatchTyp as appropriate */
+
+            if (clause.Flags & CORINFO_EH_CLAUSE_FINALLY)
+            {
+                hndBegBB->bbCatchTyp = BBCT_FINALLY;
+            }
+            else
+            {
+                if (clause.Flags & CORINFO_EH_CLAUSE_FAULT)
+                {
+                    hndBegBB->bbCatchTyp = BBCT_FAULT;
+                }
+                else
+                {
+                    hndBegBB->bbCatchTyp = clause.ClassToken;
+
+                    // These values should be non-zero value that will
+                    // not collide with real tokens for bbCatchTyp
+                    if (clause.ClassToken == 0)
+                    {
+                        BADCODE("Exception catch type is Null");
+                    }
+
+                    noway_assert(clause.ClassToken != BBCT_FAULT);
+                    noway_assert(clause.ClassToken != BBCT_FINALLY);
+                    noway_assert(clause.ClassToken != BBCT_FILTER);
+                    noway_assert(clause.ClassToken != BBCT_FILTER_HANDLER);
+                }
+            }
+        }
+
+        /* Mark the initial block and last blocks in the 'try' region */
+
+        tryBegBB->bbFlags |= BBF_TRY_BEG | BBF_HAS_LABEL;
+
+        /*  Prevent future optimizations of removing the first block   */
+        /*  of a TRY block and the first block of an exception handler */
+
+        tryBegBB->bbFlags |= BBF_DONT_REMOVE;
+        hndBegBB->bbFlags |= BBF_DONT_REMOVE;
+        hndBegBB->bbRefs++; // The first block of a handler gets an extra, "artificial" reference count.
+
+        if (clause.Flags & CORINFO_EH_CLAUSE_FILTER)
+        {
+            filtBB->bbFlags |= BBF_DONT_REMOVE;
+            filtBB->bbRefs++; // The first block of a filter gets an extra, "artificial" reference count.
+        }
+
+        tryBegBB->bbFlags |= BBF_DONT_REMOVE;
+        hndBegBB->bbFlags |= BBF_DONT_REMOVE;
+
+        //
+        // Store the info to the table of EH block handlers
+        //
+
+        HBtab->ebdHandlerType = ToEHHandlerType(clause.Flags);
+
+        HBtab->ebdTryBeg  = tryBegBB;
+        HBtab->ebdTryLast = (tryEndBB == nullptr) ? fgLastBB : tryEndBB->bbPrev;
+
+        HBtab->ebdHndBeg  = hndBegBB;
+        HBtab->ebdHndLast = (hndEndBB == nullptr) ? fgLastBB : hndEndBB->bbPrev;
+
+        //
+        // Assert that all of our try/hnd blocks are setup correctly.
+        //
+        if (HBtab->ebdTryLast == nullptr)
+        {
+            BADCODE("Try Clause is invalid");
+        }
+
+        if (HBtab->ebdHndLast == nullptr)
+        {
+            BADCODE("Handler Clause is invalid");
+        }
+
+        //
+        // Verify that it's legal
+        //
+
+        verInsertEhNode(&clause, HBtab);
+
+    } // end foreach handler table entry
+
+    fgSortEHTable();
+
+    // Next, set things related to nesting that depend on the sorting being complete.
+
+    for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+    {
+        /* Mark all blocks in the finally/fault or catch clause */
+
+        BasicBlock* tryBegBB = HBtab->ebdTryBeg;
+        BasicBlock* hndBegBB = HBtab->ebdHndBeg;
+
+        IL_OFFSET tryBegOff = HBtab->ebdTryBegOffset;
+        IL_OFFSET tryEndOff = HBtab->ebdTryEndOffset;
+
+        IL_OFFSET hndBegOff = HBtab->ebdHndBegOffset;
+        IL_OFFSET hndEndOff = HBtab->ebdHndEndOffset;
+
+        BasicBlock* block;
+
+        for (block = hndBegBB; block && (block->bbCodeOffs < hndEndOff); block = block->bbNext)
+        {
+            if (!block->hasHndIndex())
+            {
+                block->setHndIndex(XTnum);
+            }
+
+            // All blocks in a catch handler or filter are rarely run, except the entry
+            if ((block != hndBegBB) && (hndBegBB->bbCatchTyp != BBCT_FINALLY))
+            {
+                block->bbSetRunRarely();
+            }
+        }
+
+        /* Mark all blocks within the covered range of the try */
+
+        for (block = tryBegBB; block && (block->bbCodeOffs < tryEndOff); block = block->bbNext)
+        {
+            /* Mark this BB as belonging to a 'try' block */
+
+            if (!block->hasTryIndex())
+            {
+                block->setTryIndex(XTnum);
+            }
+
+#ifdef DEBUG
+            /* Note: the BB can't span the 'try' block */
+
+            if (!(block->bbFlags & BBF_INTERNAL))
+            {
+                noway_assert(tryBegOff <= block->bbCodeOffs);
+                noway_assert(tryEndOff >= block->bbCodeOffsEnd || tryEndOff == tryBegOff);
+            }
+#endif
+        }
+
+/*  Init ebdHandlerNestingLevel of current clause, and bump up value for all
+ *  enclosed clauses (which have to be before it in the table).
+ *  Innermost try-finally blocks must precede outermost
+ *  try-finally blocks.
+ */
+
+#if !FEATURE_EH_FUNCLETS
+        HBtab->ebdHandlerNestingLevel = 0;
+#endif // !FEATURE_EH_FUNCLETS
+
+        HBtab->ebdEnclosingTryIndex = EHblkDsc::NO_ENCLOSING_INDEX;
+        HBtab->ebdEnclosingHndIndex = EHblkDsc::NO_ENCLOSING_INDEX;
+
+        noway_assert(XTnum < compHndBBtabCount);
+        noway_assert(XTnum == ehGetIndex(HBtab));
+
+        for (EHblkDsc* xtab = compHndBBtab; xtab < HBtab; xtab++)
+        {
+#if !FEATURE_EH_FUNCLETS
+            if (jitIsBetween(xtab->ebdHndBegOffs(), hndBegOff, hndEndOff))
+            {
+                xtab->ebdHandlerNestingLevel++;
+            }
+#endif // !FEATURE_EH_FUNCLETS
+
+            /* If we haven't recorded an enclosing try index for xtab then see
+             *  if this EH region should be recorded.  We check if the
+             *  first offset in the xtab lies within our region.  If so,
+             *  the last offset also must lie within the region, due to
+             *  nesting rules. verInsertEhNode(), below, will check for proper nesting.
+             */
+            if (xtab->ebdEnclosingTryIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+            {
+                bool begBetween = jitIsBetween(xtab->ebdTryBegOffs(), tryBegOff, tryEndOff);
+                if (begBetween)
+                {
+                    // Record the enclosing scope link
+                    xtab->ebdEnclosingTryIndex = (unsigned short)XTnum;
+                }
+            }
+
+            /* Do the same for the enclosing handler index.
+             */
+            if (xtab->ebdEnclosingHndIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+            {
+                bool begBetween = jitIsBetween(xtab->ebdTryBegOffs(), hndBegOff, hndEndOff);
+                if (begBetween)
+                {
+                    // Record the enclosing scope link
+                    xtab->ebdEnclosingHndIndex = (unsigned short)XTnum;
+                }
+            }
+        }
+
+    } // end foreach handler table entry
+
+#if !FEATURE_EH_FUNCLETS
+
+    EHblkDsc* HBtabEnd;
+    for (HBtab = compHndBBtab, HBtabEnd = compHndBBtab + compHndBBtabCount; HBtab < HBtabEnd; HBtab++)
+    {
+        if (ehMaxHndNestingCount <= HBtab->ebdHandlerNestingLevel)
+            ehMaxHndNestingCount = HBtab->ebdHandlerNestingLevel + 1;
+    }
+
+#endif // !FEATURE_EH_FUNCLETS
+
+#ifndef DEBUG
+    if (tiVerificationNeeded)
+#endif
+    {
+        // always run these checks for a debug build
+        verCheckNestingLevel(initRoot);
+    }
+
+#ifndef DEBUG
+    // fgNormalizeEH assumes that this test has been passed.  And Ssa assumes that fgNormalizeEHTable
+    // has been run.  So do this unless we're in minOpts mode (and always in debug).
+    if (tiVerificationNeeded || !opts.MinOpts())
+#endif
+    {
+        fgCheckBasicBlockControlFlow();
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        JITDUMP("*************** After fgFindBasicBlocks() has created the EH table\n");
+        fgDispHandlerTab();
+    }
+
+    // We can't verify the handler table until all the IL legality checks have been done (above), since bad IL
+    // (such as illegal nesting of regions) will trigger asserts here.
+    fgVerifyHandlerTab();
+#endif
+
+    fgNormalizeEH();
+}
+
+/*****************************************************************************
+ * Check control flow constraints for well formed IL. Bail if any of the constraints
+ * are violated.
+ */
+
+void Compiler::fgCheckBasicBlockControlFlow()
+{
+    assert(!fgNormalizeEHDone); // These rules aren't quite correct after EH normalization has introduced new blocks
+
+    EHblkDsc* HBtab;
+
+    for (BasicBlock* blk = fgFirstBB; blk; blk = blk->bbNext)
+    {
+        if (blk->bbFlags & BBF_INTERNAL)
+        {
+            continue;
+        }
+
+        switch (blk->bbJumpKind)
+        {
+            case BBJ_NONE: // block flows into the next one (no jump)
+
+                fgControlFlowPermitted(blk, blk->bbNext);
+
+                break;
+
+            case BBJ_ALWAYS: // block does unconditional jump to target
+
+                fgControlFlowPermitted(blk, blk->bbJumpDest);
+
+                break;
+
+            case BBJ_COND: // block conditionally jumps to the target
+
+                fgControlFlowPermitted(blk, blk->bbNext);
+
+                fgControlFlowPermitted(blk, blk->bbJumpDest);
+
+                break;
+
+            case BBJ_RETURN: // block ends with 'ret'
+
+                if (blk->hasTryIndex() || blk->hasHndIndex())
+                {
+                    BADCODE3("Return from a protected block", ". Before offset %04X", blk->bbCodeOffsEnd);
+                }
+                break;
+
+            case BBJ_EHFINALLYRET:
+            case BBJ_EHFILTERRET:
+
+                if (!blk->hasHndIndex()) // must be part of a handler
+                {
+                    BADCODE3("Missing handler", ". Before offset %04X", blk->bbCodeOffsEnd);
+                }
+
+                HBtab = ehGetDsc(blk->getHndIndex());
+
+                // Endfilter allowed only in a filter block
+                if (blk->bbJumpKind == BBJ_EHFILTERRET)
+                {
+                    if (!HBtab->HasFilter())
+                    {
+                        BADCODE("Unexpected endfilter");
+                    }
+                }
+                // endfinally allowed only in a finally/fault block
+                else if (!HBtab->HasFinallyOrFaultHandler())
+                {
+                    BADCODE("Unexpected endfinally");
+                }
+
+                // The handler block should be the innermost block
+                // Exception blocks are listed, innermost first.
+                if (blk->hasTryIndex() && (blk->getTryIndex() < blk->getHndIndex()))
+                {
+                    BADCODE("endfinally / endfilter in nested try block");
+                }
+
+                break;
+
+            case BBJ_THROW: // block ends with 'throw'
+                /* throw is permitted from every BB, so nothing to check */
+                /* importer makes sure that rethrow is done from a catch */
+                break;
+
+            case BBJ_LEAVE: // block always jumps to the target, maybe out of guarded
+                            // region. Used temporarily until importing
+                fgControlFlowPermitted(blk, blk->bbJumpDest, TRUE);
+
+                break;
+
+            case BBJ_SWITCH: // block ends with a switch statement
+
+                BBswtDesc* swtDesc;
+                swtDesc = blk->bbJumpSwt;
+
+                assert(swtDesc);
+
+                unsigned i;
+                for (i = 0; i < swtDesc->bbsCount; i++)
+                {
+                    fgControlFlowPermitted(blk, swtDesc->bbsDstTab[i]);
+                }
+
+                break;
+
+            case BBJ_EHCATCHRET:  // block ends with a leave out of a catch (only #if FEATURE_EH_FUNCLETS)
+            case BBJ_CALLFINALLY: // block always calls the target finally
+            default:
+                noway_assert(!"Unexpected bbJumpKind"); // these blocks don't get created until importing
+                break;
+        }
+    }
+}
+
+/****************************************************************************
+ * Check that the leave from the block is legal.
+ * Consider removing this check here if we  can do it cheaply during importing
+ */
+
+void Compiler::fgControlFlowPermitted(BasicBlock* blkSrc, BasicBlock* blkDest, BOOL isLeave)
+{
+    assert(!fgNormalizeEHDone); // These rules aren't quite correct after EH normalization has introduced new blocks
+
+    unsigned srcHndBeg, destHndBeg;
+    unsigned srcHndEnd, destHndEnd;
+    bool     srcInFilter, destInFilter;
+    bool     srcInCatch = false;
+
+    EHblkDsc* srcHndTab;
+
+    srcHndTab = ehInitHndRange(blkSrc, &srcHndBeg, &srcHndEnd, &srcInFilter);
+    ehInitHndRange(blkDest, &destHndBeg, &destHndEnd, &destInFilter);
+
+    /* Impose the rules for leaving or jumping from handler blocks */
+
+    if (blkSrc->hasHndIndex())
+    {
+        srcInCatch = srcHndTab->HasCatchHandler() && srcHndTab->InHndRegionILRange(blkSrc);
+
+        /* Are we jumping within the same handler index? */
+        if (BasicBlock::sameHndRegion(blkSrc, blkDest))
+        {
+            /* Do we have a filter clause? */
+            if (srcHndTab->HasFilter())
+            {
+                /* filters and catch handlers share same eh index  */
+                /* we need to check for control flow between them. */
+                if (srcInFilter != destInFilter)
+                {
+                    if (!jitIsBetween(blkDest->bbCodeOffs, srcHndBeg, srcHndEnd))
+                    {
+                        BADCODE3("Illegal control flow between filter and handler", ". Before offset %04X",
+                                 blkSrc->bbCodeOffsEnd);
+                    }
+                }
+            }
+        }
+        else
+        {
+            /* The handler indexes of blkSrc and blkDest are different */
+            if (isLeave)
+            {
+                /* Any leave instructions must not enter the dest handler from outside*/
+                if (!jitIsBetween(srcHndBeg, destHndBeg, destHndEnd))
+                {
+                    BADCODE3("Illegal use of leave to enter handler", ". Before offset %04X", blkSrc->bbCodeOffsEnd);
+                }
+            }
+            else
+            {
+                /* We must use a leave to exit a handler */
+                BADCODE3("Illegal control flow out of a handler", ". Before offset %04X", blkSrc->bbCodeOffsEnd);
+            }
+
+            /* Do we have a filter clause? */
+            if (srcHndTab->HasFilter())
+            {
+                /* It is ok to leave from the handler block of a filter, */
+                /* but not from the filter block of a filter             */
+                if (srcInFilter != destInFilter)
+                {
+                    BADCODE3("Illegal to leave a filter handler", ". Before offset %04X", blkSrc->bbCodeOffsEnd);
+                }
+            }
+
+            /* We should never leave a finally handler */
+            if (srcHndTab->HasFinallyHandler())
+            {
+                BADCODE3("Illegal to leave a finally handler", ". Before offset %04X", blkSrc->bbCodeOffsEnd);
+            }
+
+            /* We should never leave a fault handler */
+            if (srcHndTab->HasFaultHandler())
+            {
+                BADCODE3("Illegal to leave a fault handler", ". Before offset %04X", blkSrc->bbCodeOffsEnd);
+            }
+        }
+    }
+    else if (blkDest->hasHndIndex())
+    {
+        /* blkSrc was not inside a handler, but blkDst is inside a handler */
+        BADCODE3("Illegal control flow into a handler", ". Before offset %04X", blkSrc->bbCodeOffsEnd);
+    }
+
+    /* Are we jumping from a catch handler into the corresponding try? */
+    /* VB uses this for "on error goto "                               */
+
+    if (isLeave && srcInCatch)
+    {
+        // inspect all handlers containing the jump source
+
+        bool      bValidJumpToTry   = false; // are we jumping in a valid way from a catch to the corresponding try?
+        bool      bCatchHandlerOnly = true;  // false if we are jumping out of a non-catch handler
+        EHblkDsc* ehTableEnd;
+        EHblkDsc* ehDsc;
+
+        for (ehDsc = compHndBBtab, ehTableEnd = compHndBBtab + compHndBBtabCount;
+             bCatchHandlerOnly && ehDsc < ehTableEnd; ehDsc++)
+        {
+            if (ehDsc->InHndRegionILRange(blkSrc))
+            {
+                if (ehDsc->HasCatchHandler())
+                {
+                    if (ehDsc->InTryRegionILRange(blkDest))
+                    {
+                        // If we already considered the jump for a different try/catch,
+                        // we would have two overlapping try regions with two overlapping catch
+                        // regions, which is illegal.
+                        noway_assert(!bValidJumpToTry);
+
+                        // Allowed if it is the first instruction of an inner try
+                        // (and all trys in between)
+                        //
+                        // try {
+                        //  ..
+                        // _tryAgain:
+                        //  ..
+                        //      try {
+                        //      _tryNestedInner:
+                        //        ..
+                        //          try {
+                        //          _tryNestedIllegal:
+                        //            ..
+                        //          } catch {
+                        //            ..
+                        //          }
+                        //        ..
+                        //      } catch {
+                        //        ..
+                        //      }
+                        //  ..
+                        // } catch {
+                        //  ..
+                        //  leave _tryAgain         // Allowed
+                        //  ..
+                        //  leave _tryNestedInner   // Allowed
+                        //  ..
+                        //  leave _tryNestedIllegal // Not Allowed
+                        //  ..
+                        // }
+                        //
+                        // Note: The leave is allowed also from catches nested inside the catch shown above.
+
+                        /* The common case where leave is to the corresponding try */
+                        if (ehDsc->ebdIsSameTry(this, blkDest->getTryIndex()) ||
+                            /* Also allowed is a leave to the start of a try which starts in the handler's try */
+                            fgFlowToFirstBlockOfInnerTry(ehDsc->ebdTryBeg, blkDest, false))
+                        {
+                            bValidJumpToTry = true;
+                        }
+                    }
+                }
+                else
+                {
+                    // We are jumping from a handler which is not a catch handler.
+
+                    // If it's a handler, but not a catch handler, it must be either a finally or fault
+                    if (!ehDsc->HasFinallyOrFaultHandler())
+                    {
+                        BADCODE3("Handlers must be catch, finally, or fault", ". Before offset %04X",
+                                 blkSrc->bbCodeOffsEnd);
+                    }
+
+                    // Are we jumping out of this handler?
+                    if (!ehDsc->InHndRegionILRange(blkDest))
+                    {
+                        bCatchHandlerOnly = false;
+                    }
+                }
+            }
+            else if (ehDsc->InFilterRegionILRange(blkSrc))
+            {
+                // Are we jumping out of a filter?
+                if (!ehDsc->InFilterRegionILRange(blkDest))
+                {
+                    bCatchHandlerOnly = false;
+                }
+            }
+        }
+
+        if (bCatchHandlerOnly)
+        {
+            if (bValidJumpToTry)
+            {
+                return;
+            }
+            else
+            {
+                // FALL THROUGH
+                // This is either the case of a leave to outside the try/catch,
+                // or a leave to a try not nested in this try/catch.
+                // The first case is allowed, the second one will be checked
+                // later when we check the try block rules (it is illegal if we
+                // jump to the middle of the destination try).
+            }
+        }
+        else
+        {
+            BADCODE3("illegal leave to exit a finally, fault or filter", ". Before offset %04X", blkSrc->bbCodeOffsEnd);
+        }
+    }
+
+    /* Check all the try block rules */
+
+    IL_OFFSET srcTryBeg;
+    IL_OFFSET srcTryEnd;
+    IL_OFFSET destTryBeg;
+    IL_OFFSET destTryEnd;
+
+    ehInitTryRange(blkSrc, &srcTryBeg, &srcTryEnd);
+    ehInitTryRange(blkDest, &destTryBeg, &destTryEnd);
+
+    /* Are we jumping between try indexes? */
+    if (!BasicBlock::sameTryRegion(blkSrc, blkDest))
+    {
+        // Are we exiting from an inner to outer try?
+        if (jitIsBetween(srcTryBeg, destTryBeg, destTryEnd) && jitIsBetween(srcTryEnd - 1, destTryBeg, destTryEnd))
+        {
+            if (!isLeave)
+            {
+                BADCODE3("exit from try block without a leave", ". Before offset %04X", blkSrc->bbCodeOffsEnd);
+            }
+        }
+        else if (jitIsBetween(destTryBeg, srcTryBeg, srcTryEnd))
+        {
+            // check that the dest Try is first instruction of an inner try
+            if (!fgFlowToFirstBlockOfInnerTry(blkSrc, blkDest, false))
+            {
+                BADCODE3("control flow into middle of try", ". Before offset %04X", blkSrc->bbCodeOffsEnd);
+            }
+        }
+        else // there is no nesting relationship between src and dest
+        {
+            if (isLeave)
+            {
+                // check that the dest Try is first instruction of an inner try sibling
+                if (!fgFlowToFirstBlockOfInnerTry(blkSrc, blkDest, true))
+                {
+                    BADCODE3("illegal leave into middle of try", ". Before offset %04X", blkSrc->bbCodeOffsEnd);
+                }
+            }
+            else
+            {
+                BADCODE3("illegal control flow in to/out of try block", ". Before offset %04X", blkSrc->bbCodeOffsEnd);
+            }
+        }
+    }
+}
+
+/*****************************************************************************
+ *  Check that blkDest is the first block of an inner try or a sibling
+ *    with no intervening trys in between
+ */
+
+bool Compiler::fgFlowToFirstBlockOfInnerTry(BasicBlock* blkSrc, BasicBlock* blkDest, bool sibling)
+{
+    assert(!fgNormalizeEHDone); // These rules aren't quite correct after EH normalization has introduced new blocks
+
+    noway_assert(blkDest->hasTryIndex());
+
+    unsigned XTnum     = blkDest->getTryIndex();
+    unsigned lastXTnum = blkSrc->hasTryIndex() ? blkSrc->getTryIndex() : compHndBBtabCount;
+    noway_assert(XTnum < compHndBBtabCount);
+    noway_assert(lastXTnum <= compHndBBtabCount);
+
+    EHblkDsc* HBtab = ehGetDsc(XTnum);
+
+    // check that we are not jumping into middle of try
+    if (HBtab->ebdTryBeg != blkDest)
+    {
+        return false;
+    }
+
+    if (sibling)
+    {
+        noway_assert(!BasicBlock::sameTryRegion(blkSrc, blkDest));
+
+        // find the l.u.b of the two try ranges
+        // Set lastXTnum to the l.u.b.
+
+        HBtab = ehGetDsc(lastXTnum);
+
+        for (lastXTnum++, HBtab++; lastXTnum < compHndBBtabCount; lastXTnum++, HBtab++)
+        {
+            if (jitIsBetweenInclusive(blkDest->bbNum, HBtab->ebdTryBeg->bbNum, HBtab->ebdTryLast->bbNum))
+            {
+                break;
+            }
+        }
+    }
+
+    // now check there are no intervening trys between dest and l.u.b
+    // (it is ok to have intervening trys as long as they all start at
+    //  the same code offset)
+
+    HBtab = ehGetDsc(XTnum);
+
+    for (XTnum++, HBtab++; XTnum < lastXTnum; XTnum++, HBtab++)
+    {
+        if (HBtab->ebdTryBeg->bbNum < blkDest->bbNum && blkDest->bbNum <= HBtab->ebdTryLast->bbNum)
+        {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+/*****************************************************************************
+ *  Returns the handler nesting level of the block.
+ *  *pFinallyNesting is set to the nesting level of the inner-most
+ *  finally-protected try the block is in.
+ */
+
+unsigned Compiler::fgGetNestingLevel(BasicBlock* block, unsigned* pFinallyNesting)
+{
+    unsigned  curNesting = 0;            // How many handlers is the block in
+    unsigned  tryFin     = (unsigned)-1; // curNesting when we see innermost finally-protected try
+    unsigned  XTnum;
+    EHblkDsc* HBtab;
+
+    /* We find the block's handler nesting level by walking over the
+       complete exception table and find enclosing clauses. */
+
+    for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+    {
+        noway_assert(HBtab->ebdTryBeg && HBtab->ebdHndBeg);
+
+        if (HBtab->HasFinallyHandler() && (tryFin == (unsigned)-1) && bbInTryRegions(XTnum, block))
+        {
+            tryFin = curNesting;
+        }
+        else if (bbInHandlerRegions(XTnum, block))
+        {
+            curNesting++;
+        }
+    }
+
+    if (tryFin == (unsigned)-1)
+    {
+        tryFin = curNesting;
+    }
+
+    if (pFinallyNesting)
+    {
+        *pFinallyNesting = curNesting - tryFin;
+    }
+
+    return curNesting;
+}
+
+/*****************************************************************************
+ *
+ *  Import the basic blocks of the procedure.
+ */
+
+void Compiler::fgImport()
+{
+    fgHasPostfix = false;
+
+    impImport(fgFirstBB);
+
+    if (!(opts.eeFlags & CORJIT_FLG_SKIP_VERIFICATION))
+    {
+        CorInfoMethodRuntimeFlags verFlag;
+        verFlag = tiIsVerifiableCode ? CORINFO_FLG_VERIFIABLE : CORINFO_FLG_UNVERIFIABLE;
+        info.compCompHnd->setMethodAttribs(info.compMethodHnd, verFlag);
+    }
+}
+
+/*****************************************************************************
+ * This function returns true if tree is a node with a call
+ * that unconditionally throws an exception
+ */
+
+bool Compiler::fgIsThrow(GenTreePtr tree)
+{
+    if ((tree->gtOper != GT_CALL) || (tree->gtCall.gtCallType != CT_HELPER))
+    {
+        return false;
+    }
+
+    // TODO-Throughput: Replace all these calls to eeFindHelper() with a table based lookup
+
+    if ((tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_OVERFLOW)) ||
+        (tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_VERIFICATION)) ||
+        (tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_RNGCHKFAIL)) ||
+        (tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_THROWDIVZERO)) ||
+#if COR_JIT_EE_VERSION > 460
+        (tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_THROWNULLREF)) ||
+#endif // COR_JIT_EE_VERSION
+        (tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_THROW)) ||
+        (tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_RETHROW)))
+    {
+        noway_assert(tree->gtFlags & GTF_CALL);
+        noway_assert(tree->gtFlags & GTF_EXCEPT);
+        return true;
+    }
+
+    // TODO-CQ: there are a bunch of managed methods in [mscorlib]System.ThrowHelper
+    // that would be nice to recognize.
+
+    return false;
+}
+
+/*****************************************************************************
+ * This function returns true for blocks that are in different hot-cold regions.
+ * It returns false when the blocks are both in the same regions
+ */
+
+bool Compiler::fgInDifferentRegions(BasicBlock* blk1, BasicBlock* blk2)
+{
+    noway_assert(blk1 != nullptr);
+    noway_assert(blk2 != nullptr);
+
+    if (fgFirstColdBlock == nullptr)
+    {
+        return false;
+    }
+
+    // If one block is Hot and the other is Cold then we are in different regions
+    return ((blk1->bbFlags & BBF_COLD) != (blk2->bbFlags & BBF_COLD));
+}
+
+bool Compiler::fgIsBlockCold(BasicBlock* blk)
+{
+    noway_assert(blk != nullptr);
+
+    if (fgFirstColdBlock == nullptr)
+    {
+        return false;
+    }
+
+    return ((blk->bbFlags & BBF_COLD) != 0);
+}
+
+/*****************************************************************************
+ * This function returns true if tree is a GT_COMMA node with a call
+ * that unconditionally throws an exception
+ */
+
+bool Compiler::fgIsCommaThrow(GenTreePtr tree, bool forFolding /* = false */)
+{
+    // Instead of always folding comma throws,
+    // with stress enabled we only fold half the time
+
+    if (forFolding && compStressCompile(STRESS_FOLD, 50))
+    {
+        return false; /* Don't fold */
+    }
+
+    /* Check for cast of a GT_COMMA with a throw overflow */
+    if ((tree->gtOper == GT_COMMA) && (tree->gtFlags & GTF_CALL) && (tree->gtFlags & GTF_EXCEPT))
+    {
+        return (fgIsThrow(tree->gtOp.gtOp1));
+    }
+    return false;
+}
+
+//------------------------------------------------------------------------
+// fgIsIndirOfAddrOfLocal: Determine whether "tree" is an indirection of a local.
+//
+// Arguments:
+//    tree - The tree node under consideration
+//
+// Return Value:
+//    If "tree" is a indirection (GT_IND, GT_BLK, or GT_OBJ) whose arg is an ADDR,
+//    whose arg in turn is a LCL_VAR, return that LCL_VAR node, else nullptr.
+//
+// static
+GenTreePtr Compiler::fgIsIndirOfAddrOfLocal(GenTreePtr tree)
+{
+    GenTreePtr res = nullptr;
+    if (tree->OperIsIndir())
+    {
+        GenTreePtr addr = tree->AsIndir()->Addr();
+
+        // Post rationalization, we can have Indir(Lea(..) trees. Therefore to recognize
+        // Indir of addr of a local, skip over Lea in Indir(Lea(base, index, scale, offset))
+        // to get to base variable.
+        if (addr->OperGet() == GT_LEA)
+        {
+            // We use this method in backward dataflow after liveness computation - fgInterBlockLocalVarLiveness().
+            // Therefore it is critical that we don't miss 'uses' of any local.  It may seem this method overlooks
+            // if the index part of the LEA has indir( someAddrOperator ( lclVar ) ) to search for a use but it's
+            // covered by the fact we're traversing the expression in execution order and we also visit the index.
+            GenTreeAddrMode* lea  = addr->AsAddrMode();
+            GenTreePtr       base = lea->Base();
+
+            if (base != nullptr)
+            {
+                if (base->OperGet() == GT_IND)
+                {
+                    return fgIsIndirOfAddrOfLocal(base);
+                }
+                // else use base as addr
+                addr = base;
+            }
+        }
+
+        if (addr->OperGet() == GT_ADDR)
+        {
+            GenTreePtr lclvar = addr->gtOp.gtOp1;
+            if (lclvar->OperGet() == GT_LCL_VAR)
+            {
+                res = lclvar;
+            }
+        }
+        else if (addr->OperGet() == GT_LCL_VAR_ADDR)
+        {
+            res = addr;
+        }
+    }
+    return res;
+}
+
+GenTreePtr Compiler::fgGetStaticsCCtorHelper(CORINFO_CLASS_HANDLE cls, CorInfoHelpFunc helper)
+{
+    bool     bNeedClassID = true;
+    unsigned callFlags    = 0;
+
+    var_types type = TYP_BYREF;
+
+    // This is sort of ugly, as we have knowledge of what the helper is returning.
+    // We need the return type.
+    switch (helper)
+    {
+        case CORINFO_HELP_GETSHARED_GCSTATIC_BASE_NOCTOR:
+            bNeedClassID = false;
+            __fallthrough;
+
+        case CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR:
+            callFlags |= GTF_CALL_HOISTABLE;
+            __fallthrough;
+
+        case CORINFO_HELP_GETSHARED_GCSTATIC_BASE:
+        case CORINFO_HELP_GETSHARED_GCSTATIC_BASE_DYNAMICCLASS:
+        case CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_DYNAMICCLASS:
+        case CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE:
+        case CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_DYNAMICCLASS:
+            // type = TYP_BYREF;
+            break;
+
+        case CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_NOCTOR:
+            bNeedClassID = false;
+            __fallthrough;
+
+        case CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR:
+            callFlags |= GTF_CALL_HOISTABLE;
+            __fallthrough;
+
+        case CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE:
+        case CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE:
+        case CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_DYNAMICCLASS:
+        case CORINFO_HELP_CLASSINIT_SHARED_DYNAMICCLASS:
+            type = TYP_I_IMPL;
+            break;
+
+        default:
+            assert(!"unknown shared statics helper");
+            break;
+    }
+
+    GenTreeArgList* argList = nullptr;
+
+    GenTreePtr opModuleIDArg;
+    GenTreePtr opClassIDArg;
+
+    // Get the class ID
+    unsigned clsID;
+    size_t   moduleID;
+    void*    pclsID;
+    void*    pmoduleID;
+
+    clsID = info.compCompHnd->getClassDomainID(cls, &pclsID);
+
+    moduleID = info.compCompHnd->getClassModuleIdForStatics(cls, nullptr, &pmoduleID);
+
+    if (!(callFlags & GTF_CALL_HOISTABLE))
+    {
+        if (info.compCompHnd->getClassAttribs(cls) & CORINFO_FLG_BEFOREFIELDINIT)
+        {
+            callFlags |= GTF_CALL_HOISTABLE;
+        }
+    }
+
+    if (pmoduleID)
+    {
+        opModuleIDArg = gtNewIconHandleNode((size_t)pmoduleID, GTF_ICON_CIDMID_HDL);
+        opModuleIDArg = gtNewOperNode(GT_IND, TYP_I_IMPL, opModuleIDArg);
+        opModuleIDArg->gtFlags |= GTF_IND_INVARIANT;
+    }
+    else
+    {
+        opModuleIDArg = gtNewIconNode((size_t)moduleID, TYP_I_IMPL);
+    }
+
+    if (bNeedClassID)
+    {
+        if (pclsID)
+        {
+            opClassIDArg = gtNewIconHandleNode((size_t)pclsID, GTF_ICON_CIDMID_HDL);
+            opClassIDArg = gtNewOperNode(GT_IND, TYP_INT, opClassIDArg);
+            opClassIDArg->gtFlags |= GTF_IND_INVARIANT;
+        }
+        else
+        {
+            opClassIDArg = gtNewIconNode(clsID, TYP_INT);
+        }
+
+        // call the helper to get the base
+        argList = gtNewArgList(opModuleIDArg, opClassIDArg);
+    }
+    else
+    {
+        argList = gtNewArgList(opModuleIDArg);
+    }
+
+    if (!s_helperCallProperties.NoThrow(helper))
+    {
+        callFlags |= GTF_EXCEPT;
+    }
+
+    return gtNewHelperCallNode(helper, type, callFlags, argList);
+}
+
+GenTreePtr Compiler::fgGetSharedCCtor(CORINFO_CLASS_HANDLE cls)
+{
+#ifdef FEATURE_READYTORUN_COMPILER
+    if (opts.IsReadyToRun())
+    {
+        CORINFO_RESOLVED_TOKEN resolvedToken;
+        ZeroMemory(&resolvedToken, sizeof(resolvedToken));
+        resolvedToken.hClass = cls;
+
+        return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_STATIC_BASE, TYP_BYREF);
+    }
+#endif
+
+    // Call the shared non gc static helper, as its the fastest
+    return fgGetStaticsCCtorHelper(cls, info.compCompHnd->getSharedCCtorHelper(cls));
+}
+
+//
+// Returns true unless the address expression could
+// never represent a NULL
+//
+bool Compiler::fgAddrCouldBeNull(GenTreePtr addr)
+{
+    if (addr->gtOper == GT_ADDR)
+    {
+        if (addr->gtOp.gtOp1->gtOper == GT_CNS_INT)
+        {
+            GenTreePtr cns1Tree = addr->gtOp.gtOp1;
+            if (!cns1Tree->IsIconHandle())
+            {
+                // Indirection of some random constant...
+                // It is safest just to return true
+                return true;
+            }
+        }
+        else if (addr->gtOp.gtOp1->OperIsLocalAddr())
+        {
+            return false;
+        }
+        return false; // we can't have a null address
+    }
+    else if (addr->gtOper == GT_ADD)
+    {
+        if (addr->gtOp.gtOp1->gtOper == GT_CNS_INT)
+        {
+            GenTreePtr cns1Tree = addr->gtOp.gtOp1;
+            if (!cns1Tree->IsIconHandle())
+            {
+                if (!fgIsBigOffset(cns1Tree->gtIntCon.gtIconVal))
+                {
+                    // Op1 was an ordinary small constant
+                    return fgAddrCouldBeNull(addr->gtOp.gtOp2);
+                }
+            }
+            else // Op1 was a handle represented as a constant
+            {
+                // Is Op2 also a constant?
+                if (addr->gtOp.gtOp2->gtOper == GT_CNS_INT)
+                {
+                    GenTreePtr cns2Tree = addr->gtOp.gtOp2;
+                    // Is this an addition of a handle and constant
+                    if (!cns2Tree->IsIconHandle())
+                    {
+                        if (!fgIsBigOffset(cns2Tree->gtIntCon.gtIconVal))
+                        {
+                            // Op2 was an ordinary small constant
+                            return false; // we can't have a null address
+                        }
+                    }
+                }
+            }
+        }
+        else
+        {
+            // Op1 is not a constant
+            // What about Op2?
+            if (addr->gtOp.gtOp2->gtOper == GT_CNS_INT)
+            {
+                GenTreePtr cns2Tree = addr->gtOp.gtOp2;
+                // Is this an addition of a small constant
+                if (!cns2Tree->IsIconHandle())
+                {
+                    if (!fgIsBigOffset(cns2Tree->gtIntCon.gtIconVal))
+                    {
+                        // Op2 was an ordinary small constant
+                        return fgAddrCouldBeNull(addr->gtOp.gtOp1);
+                    }
+                }
+            }
+        }
+    }
+    return true; // default result: addr could be null
+}
+
+/*****************************************************************************
+ *  Optimize the call to the delegate constructor.
+ */
+
+GenTreePtr Compiler::fgOptimizeDelegateConstructor(GenTreePtr call, CORINFO_CONTEXT_HANDLE* ExactContextHnd)
+{
+    noway_assert(call->gtOper == GT_CALL);
+
+    noway_assert(call->gtCall.gtCallType == CT_USER_FUNC);
+    CORINFO_METHOD_HANDLE methHnd = call->gtCall.gtCallMethHnd;
+    CORINFO_CLASS_HANDLE  clsHnd  = info.compCompHnd->getMethodClass(methHnd);
+
+    GenTreePtr targetMethod = call->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp1;
+    noway_assert(targetMethod->TypeGet() == TYP_I_IMPL);
+    genTreeOps oper = targetMethod->OperGet();
+    if (oper == GT_FTN_ADDR || oper == GT_CALL || oper == GT_QMARK)
+    {
+        CORINFO_METHOD_HANDLE targetMethodHnd = nullptr;
+        GenTreePtr            qmarkNode       = nullptr;
+        if (oper == GT_FTN_ADDR)
+        {
+            targetMethodHnd = targetMethod->gtFptrVal.gtFptrMethod;
+        }
+        else if (oper == GT_CALL && targetMethod->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_VIRTUAL_FUNC_PTR))
+        {
+            GenTreePtr handleNode = targetMethod->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp2->gtOp.gtOp1;
+
+            if (handleNode->OperGet() == GT_CNS_INT)
+            {
+                // it's a ldvirtftn case, fetch the methodhandle off the helper for ldvirtftn. It's the 3rd arg
+                targetMethodHnd = CORINFO_METHOD_HANDLE(handleNode->gtIntCon.gtCompileTimeHandle);
+            }
+            // Sometimes the argument to this is the result of a generic dictionary lookup, which shows
+            // up as a GT_QMARK.
+            else if (handleNode->OperGet() == GT_QMARK)
+            {
+                qmarkNode = handleNode;
+            }
+        }
+        // Sometimes we don't call CORINFO_HELP_VIRTUAL_FUNC_PTR but instead just call
+        // CORINFO_HELP_RUNTIMEHANDLE_METHOD directly.
+        else if (oper == GT_QMARK)
+        {
+            qmarkNode = targetMethod;
+        }
+        if (qmarkNode)
+        {
+            noway_assert(qmarkNode->OperGet() == GT_QMARK);
+            // The argument is actually a generic dictionary lookup.  For delegate creation it looks
+            // like:
+            // GT_QMARK
+            //  GT_COLON
+            //      op1 -> call
+            //              Arg 1 -> token (has compile time handle)
+            //      op2 -> lclvar
+            //
+            //
+            // In this case I can find the token (which is a method handle) and that is the compile time
+            // handle.
+            noway_assert(qmarkNode->gtOp.gtOp2->OperGet() == GT_COLON);
+            noway_assert(qmarkNode->gtOp.gtOp2->gtOp.gtOp1->OperGet() == GT_CALL);
+            GenTreePtr runtimeLookupCall = qmarkNode->gtOp.gtOp2->gtOp.gtOp1;
+
+            // This could be any of CORINFO_HELP_RUNTIMEHANDLE_(METHOD|CLASS)(_LOG?)
+            GenTreePtr tokenNode = runtimeLookupCall->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp1;
+            noway_assert(tokenNode->OperGet() == GT_CNS_INT);
+            targetMethodHnd = CORINFO_METHOD_HANDLE(tokenNode->gtIntCon.gtCompileTimeHandle);
+        }
+
+#ifdef FEATURE_READYTORUN_COMPILER
+        if (opts.IsReadyToRun())
+        {
+            // ReadyToRun has this optimization for a non-virtual function pointers only for now.
+            if (oper == GT_FTN_ADDR)
+            {
+                // The first argument of the helper is delegate this pointer
+                GenTreeArgList*      helperArgs = gtNewArgList(call->gtCall.gtCallObjp);
+                CORINFO_CONST_LOOKUP entryPoint;
+
+                // The second argument of the helper is the target object pointers
+                helperArgs->gtOp.gtOp2 = gtNewArgList(call->gtCall.gtCallArgs->gtOp.gtOp1);
+
+                call = gtNewHelperCallNode(CORINFO_HELP_READYTORUN_DELEGATE_CTOR, TYP_VOID, GTF_EXCEPT, helperArgs);
+#if COR_JIT_EE_VERSION > 460
+                info.compCompHnd->getReadyToRunDelegateCtorHelper(targetMethod->gtFptrVal.gtLdftnResolvedToken, clsHnd,
+                                                                  &entryPoint);
+#else
+                info.compCompHnd->getReadyToRunHelper(targetMethod->gtFptrVal.gtLdftnResolvedToken,
+                                                      CORINFO_HELP_READYTORUN_DELEGATE_CTOR, &entryPoint);
+#endif
+                call->gtCall.setEntryPoint(entryPoint);
+            }
+        }
+        else
+#endif
+            if (targetMethodHnd != nullptr)
+        {
+            CORINFO_METHOD_HANDLE alternateCtor = nullptr;
+            DelegateCtorArgs      ctorData;
+            ctorData.pMethod = info.compMethodHnd;
+            ctorData.pArg3   = nullptr;
+            ctorData.pArg4   = nullptr;
+            ctorData.pArg5   = nullptr;
+
+            alternateCtor = info.compCompHnd->GetDelegateCtor(methHnd, clsHnd, targetMethodHnd, &ctorData);
+            if (alternateCtor != methHnd)
+            {
+                // we erase any inline info that may have been set for generics has it is not needed here,
+                // and in fact it will pass the wrong info to the inliner code
+                *ExactContextHnd = nullptr;
+
+                call->gtCall.gtCallMethHnd = alternateCtor;
+
+                noway_assert(call->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp2 == nullptr);
+                if (ctorData.pArg3)
+                {
+                    call->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp2 =
+                        gtNewArgList(gtNewIconHandleNode(size_t(ctorData.pArg3), GTF_ICON_FTN_ADDR));
+
+                    if (ctorData.pArg4)
+                    {
+                        call->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp2->gtOp.gtOp2 =
+                            gtNewArgList(gtNewIconHandleNode(size_t(ctorData.pArg4), GTF_ICON_FTN_ADDR));
+
+                        if (ctorData.pArg5)
+                        {
+                            call->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp2->gtOp.gtOp2->gtOp.gtOp2 =
+                                gtNewArgList(gtNewIconHandleNode(size_t(ctorData.pArg5), GTF_ICON_FTN_ADDR));
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    return call;
+}
+
+bool Compiler::fgCastNeeded(GenTreePtr tree, var_types toType)
+{
+    //
+    // If tree is a relop and we need an 4-byte integer
+    //  then we never need to insert a cast
+    //
+    if ((tree->OperKind() & GTK_RELOP) && (genActualType(toType) == TYP_INT))
+    {
+        return false;
+    }
+
+    var_types fromType;
+
+    //
+    // Is the tree as GT_CAST or a GT_CALL ?
+    //
+    if (tree->OperGet() == GT_CAST)
+    {
+        fromType = tree->CastToType();
+    }
+    else if (tree->OperGet() == GT_CALL)
+    {
+        fromType = (var_types)tree->gtCall.gtReturnType;
+    }
+    else
+    {
+        fromType = tree->TypeGet();
+    }
+
+    //
+    // If both types are the same then an additional cast is not necessary
+    //
+    if (toType == fromType)
+    {
+        return false;
+    }
+    //
+    // If the sign-ness of the two types are different then a cast is necessary
+    //
+    if (varTypeIsUnsigned(toType) != varTypeIsUnsigned(fromType))
+    {
+        return true;
+    }
+    //
+    // If the from type is the same size or smaller then an additional cast is not necessary
+    //
+    if (genTypeSize(toType) >= genTypeSize(fromType))
+    {
+        return false;
+    }
+
+    //
+    // Looks like we will need the cast
+    //
+    return true;
+}
+
+// If assigning to a local var, add a cast if the target is
+// marked as NormalizedOnStore. Returns true if any change was made
+GenTreePtr Compiler::fgDoNormalizeOnStore(GenTreePtr tree)
+{
+    //
+    // Only normalize the stores in the global morph phase
+    //
+    if (fgGlobalMorph)
+    {
+        noway_assert(tree->OperGet() == GT_ASG);
+
+        GenTreePtr op1 = tree->gtOp.gtOp1;
+        GenTreePtr op2 = tree->gtOp.gtOp2;
+
+        if (op1->gtOper == GT_LCL_VAR && genActualType(op1->TypeGet()) == TYP_INT)
+        {
+            // Small-typed arguments and aliased locals are normalized on load.
+            // Other small-typed locals are normalized on store.
+            // If it is an assignment to one of the latter, insert the cast on RHS
+            unsigned   varNum = op1->gtLclVarCommon.gtLclNum;
+            LclVarDsc* varDsc = &lvaTable[varNum];
+
+            if (varDsc->lvNormalizeOnStore())
+            {
+                noway_assert(op1->gtType <= TYP_INT);
+                op1->gtType = TYP_INT;
+
+                if (fgCastNeeded(op2, varDsc->TypeGet()))
+                {
+                    op2              = gtNewCastNode(TYP_INT, op2, varDsc->TypeGet());
+                    tree->gtOp.gtOp2 = op2;
+
+                    // Propagate GTF_COLON_COND
+                    op2->gtFlags |= (tree->gtFlags & GTF_COLON_COND);
+                }
+            }
+        }
+    }
+
+    return tree;
+}
+
+/*****************************************************************************
+ *
+ *  Mark whether the edge "srcBB -> dstBB" forms a loop that will always
+ *  execute a call or not.
+ */
+
+inline void Compiler::fgLoopCallTest(BasicBlock* srcBB, BasicBlock* dstBB)
+{
+    /* Bail if this is not a backward edge */
+
+    if (srcBB->bbNum < dstBB->bbNum)
+    {
+        return;
+    }
+
+    /* Unless we already know that there is a loop without a call here ... */
+
+    if (!(dstBB->bbFlags & BBF_LOOP_CALL0))
+    {
+        /* Check whether there is a loop path that doesn't call */
+
+        if (optReachWithoutCall(dstBB, srcBB))
+        {
+            dstBB->bbFlags |= BBF_LOOP_CALL0;
+            dstBB->bbFlags &= ~BBF_LOOP_CALL1;
+        }
+        else
+        {
+            dstBB->bbFlags |= BBF_LOOP_CALL1;
+        }
+    }
+    // if this loop will always call, then we can omit the GC Poll
+    if ((GCPOLL_NONE != opts.compGCPollType) && (dstBB->bbFlags & BBF_LOOP_CALL1))
+    {
+        srcBB->bbFlags &= ~BBF_NEEDS_GCPOLL;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Mark which loops are guaranteed to execute a call.
+ */
+
+void Compiler::fgLoopCallMark()
+{
+    BasicBlock* block;
+
+    /* If we've already marked all the block, bail */
+
+    if (fgLoopCallMarked)
+    {
+        return;
+    }
+
+    fgLoopCallMarked = true;
+
+    /* Walk the blocks, looking for backward edges */
+
+    for (block = fgFirstBB; block; block = block->bbNext)
+    {
+        switch (block->bbJumpKind)
+        {
+            case BBJ_COND:
+            case BBJ_CALLFINALLY:
+            case BBJ_ALWAYS:
+            case BBJ_EHCATCHRET:
+                fgLoopCallTest(block, block->bbJumpDest);
+                break;
+
+            case BBJ_SWITCH:
+
+                unsigned jumpCnt;
+                jumpCnt = block->bbJumpSwt->bbsCount;
+                BasicBlock** jumpPtr;
+                jumpPtr = block->bbJumpSwt->bbsDstTab;
+
+                do
+                {
+                    fgLoopCallTest(block, *jumpPtr);
+                } while (++jumpPtr, --jumpCnt);
+
+                break;
+
+            default:
+                break;
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Note the fact that the given block is a loop header.
+ */
+
+inline void Compiler::fgMarkLoopHead(BasicBlock* block)
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("fgMarkLoopHead: Checking loop head block BB%02u: ", block->bbNum);
+    }
+#endif
+
+    /* Have we decided to generate fully interruptible code already? */
+
+    if (genInterruptible)
+    {
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("method is already fully interruptible\n");
+        }
+#endif
+        return;
+    }
+
+    /* Is the loop head block known to execute a method call? */
+
+    if (block->bbFlags & BBF_GC_SAFE_POINT)
+    {
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("this block will execute a call\n");
+        }
+#endif
+        // single block loops that contain GC safe points don't need polls.
+        block->bbFlags &= ~BBF_NEEDS_GCPOLL;
+        return;
+    }
+
+    /* Are dominator sets available? */
+
+    if (fgDomsComputed)
+    {
+        /* Make sure that we know which loops will always execute calls */
+
+        if (!fgLoopCallMarked)
+        {
+            fgLoopCallMark();
+        }
+
+        /* Will every trip through our loop execute a call? */
+
+        if (block->bbFlags & BBF_LOOP_CALL1)
+        {
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("this block dominates a block that will execute a call\n");
+            }
+#endif
+            return;
+        }
+    }
+
+    /*
+     *  We have to make this method fully interruptible since we can not
+     *  ensure that this loop will execute a call every time it loops.
+     *
+     *  We'll also need to generate a full register map for this method.
+     */
+
+    assert(!codeGen->isGCTypeFixed());
+
+    if (!compCanEncodePtrArgCntMax())
+    {
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("a callsite with more than 1023 pushed args exists\n");
+        }
+#endif
+        return;
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("no guaranteed callsite exits, marking method as fully interruptible\n");
+    }
+#endif
+
+    // only enable fully interruptible code for if we're hijacking.
+    if (GCPOLL_NONE == opts.compGCPollType)
+    {
+        genInterruptible = true;
+    }
+}
+
+GenTreePtr Compiler::fgGetCritSectOfStaticMethod()
+{
+    noway_assert(!compIsForInlining());
+
+    noway_assert(info.compIsStatic); // This method should only be called for static methods.
+
+    GenTreePtr tree = nullptr;
+
+    CORINFO_LOOKUP_KIND kind = info.compCompHnd->getLocationOfThisType(info.compMethodHnd);
+
+    if (!kind.needsRuntimeLookup)
+    {
+        void *critSect = nullptr, **pCrit = nullptr;
+        critSect = info.compCompHnd->getMethodSync(info.compMethodHnd, (void**)&pCrit);
+        noway_assert((!critSect) != (!pCrit));
+
+        tree = gtNewIconEmbHndNode(critSect, pCrit, GTF_ICON_METHOD_HDL);
+    }
+    else
+    {
+        // Collectible types requires that for shared generic code, if we use the generic context paramter
+        // that we report it. (This is a conservative approach, we could detect some cases particularly when the
+        // context parameter is this that we don't need the eager reporting logic.)
+        lvaGenericsContextUsed = true;
+
+        switch (kind.runtimeLookupKind)
+        {
+            case CORINFO_LOOKUP_THISOBJ:
+            {
+                noway_assert(!"Should never get this for static method.");
+                break;
+            }
+
+            case CORINFO_LOOKUP_CLASSPARAM:
+            {
+                // In this case, the hidden param is the class handle.
+                tree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
+                break;
+            }
+
+            case CORINFO_LOOKUP_METHODPARAM:
+            {
+                // In this case, the hidden param is the method handle.
+                tree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
+                // Call helper CORINFO_HELP_GETCLASSFROMMETHODPARAM to get the class handle
+                // from the method handle.
+                tree = gtNewHelperCallNode(CORINFO_HELP_GETCLASSFROMMETHODPARAM, TYP_I_IMPL, 0, gtNewArgList(tree));
+                break;
+            }
+
+            default:
+            {
+                noway_assert(!"Unknown LOOKUP_KIND");
+                break;
+            }
+        }
+
+        noway_assert(tree); // tree should now contain the CORINFO_CLASS_HANDLE for the exact class.
+
+        // Given the class handle, get the pointer to the Monitor.
+        tree = gtNewHelperCallNode(CORINFO_HELP_GETSYNCFROMCLASSHANDLE, TYP_I_IMPL, 0, gtNewArgList(tree));
+    }
+
+    noway_assert(tree);
+    return tree;
+}
+
+#if !defined(_TARGET_X86_)
+
+/*****************************************************************************
+ *
+ *  Add monitor enter/exit calls for synchronized methods, and a try/fault
+ *  to ensure the 'exit' is called if the 'enter' was successful. On x86, we
+ *  generate monitor enter/exit calls and tell the VM the code location of
+ *  these calls. When an exception occurs between those locations, the VM
+ *  automatically releases the lock. For non-x86 platforms, the JIT is
+ *  responsible for creating a try/finally to protect the monitor enter/exit,
+ *  and the VM doesn't need to know anything special about the method during
+ *  exception processing -- it's just a normal try/finally.
+ *
+ *  We generate the following code:
+ *
+ *      void Foo()
+ *      {
+ *          unsigned byte acquired = 0;
+ *          try {
+ *              JIT_MonEnterWorker(<lock object>, &acquired);
+ *
+ *              *** all the preexisting user code goes here ***
+ *
+ *              JIT_MonExitWorker(<lock object>, &acquired);
+ *          } fault {
+ *              JIT_MonExitWorker(<lock object>, &acquired);
+ *         }
+ *      L_return:
+ *         ret
+ *      }
+ *
+ *  If the lock is actually acquired, then the 'acquired' variable is set to 1
+ *  by the helper call. During normal exit, the finally is called, 'acquired'
+ *  is 1, and the lock is released. If an exception occurs before the lock is
+ *  acquired, but within the 'try' (extremely unlikely, but possible), 'acquired'
+ *  will be 0, and the monitor exit call will quickly return without attempting
+ *  to release the lock. Otherwise, 'acquired' will be 1, and the lock will be
+ *  released during exception processing.
+ *
+ *  For synchronized methods, we generate a single return block.
+ *  We can do this without creating additional "step" blocks because "ret" blocks
+ *  must occur at the top-level (of the original code), not nested within any EH
+ *  constructs. From the CLI spec, 12.4.2.8.2.3 "ret": "Shall not be enclosed in any
+ *  protected block, filter, or handler." Also, 3.57: "The ret instruction cannot be
+ *  used to transfer control out of a try, filter, catch, or finally block. From within
+ *  a try or catch, use the leave instruction with a destination of a ret instruction
+ *  that is outside all enclosing exception blocks."
+ *
+ *  In addition, we can add a "fault" at the end of a method and be guaranteed that no
+ *  control falls through. From the CLI spec, section 12.4 "Control flow": "Control is not
+ *  permitted to simply fall through the end of a method. All paths shall terminate with one
+ *  of these instructions: ret, throw, jmp, or (tail. followed by call, calli, or callvirt)."
+ *
+ *  We only need to worry about "ret" and "throw", as the CLI spec prevents any other
+ *  alternatives. Section 15.4.3.3 "Implementation information" states about exiting
+ *  synchronized methods: "Exiting a synchronized method using a tail. call shall be
+ *  implemented as though the tail. had not been specified." Section 3.37 "jmp" states:
+ *  "The jmp instruction cannot be used to transferred control out of a try, filter,
+ *  catch, fault or finally block; or out of a synchronized region." And, "throw" will
+ *  be handled naturally; no additional work is required.
+ */
+
+void Compiler::fgAddSyncMethodEnterExit()
+{
+    assert((info.compFlags & CORINFO_FLG_SYNCH) != 0);
+
+    // We need to do this transformation before funclets are created.
+    assert(!fgFuncletsCreated);
+
+    // Assume we don't need to update the bbPreds lists.
+    assert(!fgComputePredsDone);
+
+#if !FEATURE_EH
+    // If we don't support EH, we can't add the EH needed by synchronized methods.
+    // Of course, we could simply ignore adding the EH constructs, since we don't
+    // support exceptions being thrown in this mode, but we would still need to add
+    // the monitor enter/exit, and that doesn't seem worth it for this minor case.
+    // By the time EH is working, we can just enable the whole thing.
+    NYI("No support for synchronized methods");
+#endif // !FEATURE_EH
+
+    // Create a scratch first BB where we can put the new variable initialization.
+    // Don't put the scratch BB in the protected region.
+
+    fgEnsureFirstBBisScratch();
+
+    // Create a block for the start of the try region, where the monitor enter call
+    // will go.
+
+    assert(fgFirstBB->bbFallsThrough());
+
+    BasicBlock* tryBegBB  = fgNewBBafter(BBJ_NONE, fgFirstBB, false);
+    BasicBlock* tryLastBB = fgLastBB;
+
+    // Create a block for the fault.
+
+    assert(!tryLastBB->bbFallsThrough());
+    BasicBlock* faultBB = fgNewBBafter(BBJ_EHFINALLYRET, tryLastBB, false);
+
+    assert(tryLastBB->bbNext == faultBB);
+    assert(faultBB->bbNext == nullptr);
+    assert(faultBB == fgLastBB);
+
+    { // Scope the EH region creation
+
+        // Add the new EH region at the end, since it is the least nested,
+        // and thus should be last.
+
+        EHblkDsc* newEntry;
+        unsigned  XTnew = compHndBBtabCount;
+
+        newEntry = fgAddEHTableEntry(XTnew);
+
+        // Initialize the new entry
+
+        newEntry->ebdHandlerType = EH_HANDLER_FAULT;
+
+        newEntry->ebdTryBeg  = tryBegBB;
+        newEntry->ebdTryLast = tryLastBB;
+
+        newEntry->ebdHndBeg  = faultBB;
+        newEntry->ebdHndLast = faultBB;
+
+        newEntry->ebdTyp = 0; // unused for fault
+
+        newEntry->ebdEnclosingTryIndex = EHblkDsc::NO_ENCLOSING_INDEX;
+        newEntry->ebdEnclosingHndIndex = EHblkDsc::NO_ENCLOSING_INDEX;
+
+        newEntry->ebdTryBegOffset    = tryBegBB->bbCodeOffs;
+        newEntry->ebdTryEndOffset    = tryLastBB->bbCodeOffsEnd;
+        newEntry->ebdFilterBegOffset = 0;
+        newEntry->ebdHndBegOffset    = 0; // handler doesn't correspond to any IL
+        newEntry->ebdHndEndOffset    = 0; // handler doesn't correspond to any IL
+
+        // Set some flags on the new region. This is the same as when we set up
+        // EH regions in fgFindBasicBlocks(). Note that the try has no enclosing
+        // handler, and the fault has no enclosing try.
+
+        tryBegBB->bbFlags |= BBF_HAS_LABEL | BBF_DONT_REMOVE | BBF_TRY_BEG | BBF_IMPORTED;
+
+        faultBB->bbFlags |= BBF_HAS_LABEL | BBF_DONT_REMOVE | BBF_IMPORTED;
+        faultBB->bbCatchTyp = BBCT_FAULT;
+
+        tryBegBB->setTryIndex(XTnew);
+        tryBegBB->clearHndIndex();
+
+        faultBB->clearTryIndex();
+        faultBB->setHndIndex(XTnew);
+
+        // Walk the user code blocks and set all blocks that don't already have a try handler
+        // to point to the new try handler.
+
+        BasicBlock* tmpBB;
+        for (tmpBB = tryBegBB->bbNext; tmpBB != faultBB; tmpBB = tmpBB->bbNext)
+        {
+            if (!tmpBB->hasTryIndex())
+            {
+                tmpBB->setTryIndex(XTnew);
+            }
+        }
+
+        // Walk the EH table. Make every EH entry that doesn't already have an enclosing
+        // try index mark this new entry as their enclosing try index.
+
+        unsigned  XTnum;
+        EHblkDsc* HBtab;
+
+        for (XTnum = 0, HBtab = compHndBBtab; XTnum < XTnew; XTnum++, HBtab++)
+        {
+            if (HBtab->ebdEnclosingTryIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+            {
+                HBtab->ebdEnclosingTryIndex =
+                    (unsigned short)XTnew; // This EH region wasn't previously nested, but now it is.
+            }
+        }
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            JITDUMP("Synchronized method - created additional EH descriptor EH#%u for try/fault wrapping monitor "
+                    "enter/exit\n",
+                    XTnew);
+            fgDispBasicBlocks();
+            fgDispHandlerTab();
+        }
+
+        fgVerifyHandlerTab();
+#endif // DEBUG
+    }
+
+    // Create a 'monitor acquired' boolean (actually, an unsigned byte: 1 = acquired, 0 = not acquired).
+
+    var_types typeMonAcquired = TYP_UBYTE;
+    this->lvaMonAcquired      = lvaGrabTemp(true DEBUGARG("Synchronized method monitor acquired boolean"));
+
+    lvaTable[lvaMonAcquired].lvType = typeMonAcquired;
+
+    { // Scope the variables of the variable initialization
+
+        // Initialize the 'acquired' boolean.
+
+        GenTreePtr zero     = gtNewZeroConNode(genActualType(typeMonAcquired));
+        GenTreePtr varNode  = gtNewLclvNode(lvaMonAcquired, typeMonAcquired);
+        GenTreePtr initNode = gtNewAssignNode(varNode, zero);
+
+        fgInsertStmtAtEnd(fgFirstBB, initNode);
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\nSynchronized method - Add 'acquired' initialization in first block BB%02u [%08p]\n", fgFirstBB,
+                   dspPtr(fgFirstBB));
+            gtDispTree(initNode);
+            printf("\n");
+        }
+#endif
+    }
+
+    // Make a copy of the 'this' pointer to be used in the handler so it does not inhibit enregistration
+    // of all uses of the variable.
+    unsigned lvaCopyThis = 0;
+    if (!info.compIsStatic)
+    {
+        lvaCopyThis                  = lvaGrabTemp(true DEBUGARG("Synchronized method monitor acquired boolean"));
+        lvaTable[lvaCopyThis].lvType = TYP_REF;
+
+        GenTreePtr thisNode = gtNewLclvNode(info.compThisArg, TYP_REF);
+        GenTreePtr copyNode = gtNewLclvNode(lvaCopyThis, TYP_REF);
+        GenTreePtr initNode = gtNewAssignNode(copyNode, thisNode);
+
+        fgInsertStmtAtEnd(tryBegBB, initNode);
+    }
+
+    fgCreateMonitorTree(lvaMonAcquired, info.compThisArg, tryBegBB, true /*enter*/);
+
+    // exceptional case
+    fgCreateMonitorTree(lvaMonAcquired, lvaCopyThis, faultBB, false /*exit*/);
+
+    // non-exceptional cases
+    for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+    {
+        if (block->bbJumpKind == BBJ_RETURN)
+        {
+            fgCreateMonitorTree(lvaMonAcquired, info.compThisArg, block, false /*exit*/);
+        }
+    }
+}
+
+// fgCreateMonitorTree: Create tree to execute a monitor enter or exit operation for synchronized methods
+//    lvaMonAcquired: lvaNum of boolean variable that tracks if monitor has been acquired.
+//    lvaThisVar: lvaNum of variable being used as 'this' pointer, may not be the original one.  Is only used for
+//    nonstatic methods
+//    block: block to insert the tree in.  It is inserted at the end or in the case of a return, immediately before the
+//    GT_RETURN
+//    enter: whether to create a monitor enter or exit
+
+GenTree* Compiler::fgCreateMonitorTree(unsigned lvaMonAcquired, unsigned lvaThisVar, BasicBlock* block, bool enter)
+{
+    // Insert the expression "enter/exitCrit(this, &acquired)" or "enter/exitCrit(handle, &acquired)"
+
+    var_types  typeMonAcquired = TYP_UBYTE;
+    GenTreePtr varNode         = gtNewLclvNode(lvaMonAcquired, typeMonAcquired);
+    GenTreePtr varAddrNode     = gtNewOperNode(GT_ADDR, TYP_BYREF, varNode);
+    GenTreePtr tree;
+
+    if (info.compIsStatic)
+    {
+        tree = fgGetCritSectOfStaticMethod();
+        tree = gtNewHelperCallNode(enter ? CORINFO_HELP_MON_ENTER_STATIC : CORINFO_HELP_MON_EXIT_STATIC, TYP_VOID, 0,
+                                   gtNewArgList(tree, varAddrNode));
+    }
+    else
+    {
+        tree = gtNewLclvNode(lvaThisVar, TYP_REF);
+        tree = gtNewHelperCallNode(enter ? CORINFO_HELP_MON_ENTER : CORINFO_HELP_MON_EXIT, TYP_VOID, 0,
+                                   gtNewArgList(tree, varAddrNode));
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nSynchronized method - Add monitor %s call to block BB%02u [%08p]\n", enter ? "enter" : "exit", block,
+               dspPtr(block));
+        gtDispTree(tree);
+        printf("\n");
+    }
+#endif
+
+    if (block->bbJumpKind == BBJ_RETURN && block->lastStmt()->gtStmtExpr->gtOper == GT_RETURN)
+    {
+        GenTree* retNode = block->lastStmt()->gtStmtExpr;
+        GenTree* retExpr = retNode->gtOp.gtOp1;
+
+        if (retExpr != nullptr)
+        {
+            // have to insert this immediately before the GT_RETURN so we transform:
+            // ret(...) ->
+            // ret(comma(comma(tmp=...,call mon_exit), tmp)
+            //
+            //
+            // Before morph stage, it is possible to have a case of GT_RETURN(TYP_LONG, op1) where op1's type is
+            // TYP_STRUCT (of 8-bytes) and op1 is call node. See the big comment block in impReturnInstruction()
+            // for details for the case where info.compRetType is not the same as info.compRetNativeType.  For
+            // this reason pass compMethodInfo->args.retTypeClass which is guaranteed to be a valid class handle
+            // if the return type is a value class.  Note that fgInsertCommFormTemp() in turn uses this class handle
+            // if the type of op1 is TYP_STRUCT to perform lvaSetStruct() on the new temp that is created, which
+            // in turn passes it to VM to know the size of value type.
+            GenTree* temp = fgInsertCommaFormTemp(&retNode->gtOp.gtOp1, info.compMethodInfo->args.retTypeClass);
+
+            GenTree* lclVar                 = retNode->gtOp.gtOp1->gtOp.gtOp2;
+            retNode->gtOp.gtOp1->gtOp.gtOp2 = gtNewOperNode(GT_COMMA, retExpr->TypeGet(), tree, lclVar);
+        }
+        else
+        {
+            // Insert this immediately before the GT_RETURN
+            fgInsertStmtNearEnd(block, tree);
+        }
+    }
+    else
+    {
+        fgInsertStmtAtEnd(block, tree);
+    }
+
+    return tree;
+}
+
+// Convert a BBJ_RETURN block in a synchronized method to a BBJ_ALWAYS.
+// We've previously added a 'try' block around the original program code using fgAddSyncMethodEnterExit().
+// Thus, we put BBJ_RETURN blocks inside a 'try'. In IL this is illegal. Instead, we would
+// see a 'leave' inside a 'try' that would get transformed into BBJ_CALLFINALLY/BBJ_ALWAYS blocks
+// during importing, and the BBJ_ALWAYS would point at an outer block with the BBJ_RETURN.
+// Here, we mimic some of the logic of importing a LEAVE to get the same effect for synchronized methods.
+void Compiler::fgConvertSyncReturnToLeave(BasicBlock* block)
+{
+    assert(!fgFuncletsCreated);
+    assert(info.compFlags & CORINFO_FLG_SYNCH);
+    assert(genReturnBB != nullptr);
+    assert(genReturnBB != block);
+    assert(fgReturnCount <= 1); // We have a single return for synchronized methods
+    assert(block->bbJumpKind == BBJ_RETURN);
+    assert((block->bbFlags & BBF_HAS_JMP) == 0);
+    assert(block->hasTryIndex());
+    assert(!block->hasHndIndex());
+    assert(compHndBBtabCount >= 1);
+
+    unsigned tryIndex = block->getTryIndex();
+    assert(tryIndex == compHndBBtabCount - 1); // The BBJ_RETURN must be at the top-level before we inserted the
+                                               // try/finally, which must be the last EH region.
+
+    EHblkDsc* ehDsc = ehGetDsc(tryIndex);
+    assert(ehDsc->ebdEnclosingTryIndex ==
+           EHblkDsc::NO_ENCLOSING_INDEX); // There are no enclosing regions of the BBJ_RETURN block
+    assert(ehDsc->ebdEnclosingHndIndex == EHblkDsc::NO_ENCLOSING_INDEX);
+
+    // Convert the BBJ_RETURN to BBJ_ALWAYS, jumping to genReturnBB.
+    block->bbJumpKind = BBJ_ALWAYS;
+    block->bbJumpDest = genReturnBB;
+    block->bbJumpDest->bbRefs++;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("Synchronized method - convert block BB%02u to BBJ_ALWAYS [targets BB%02u]\n", block->bbNum,
+               block->bbJumpDest->bbNum);
+    }
+#endif
+}
+
+#endif // !_TARGET_X86_
+
+//------------------------------------------------------------------------
+// fgAddReversePInvokeEnterExit: Add enter/exit calls for reverse PInvoke methods
+//
+// Arguments:
+//      None.
+//
+// Return Value:
+//      None.
+
+void Compiler::fgAddReversePInvokeEnterExit()
+{
+    assert(opts.IsReversePInvoke());
+
+#if COR_JIT_EE_VERSION > 460
+    lvaReversePInvokeFrameVar = lvaGrabTempWithImplicitUse(false DEBUGARG("Reverse Pinvoke FrameVar"));
+
+    LclVarDsc* varDsc   = &lvaTable[lvaReversePInvokeFrameVar];
+    varDsc->lvType      = TYP_BLK;
+    varDsc->lvExactSize = eeGetEEInfo()->sizeOfReversePInvokeFrame;
+
+    GenTreePtr tree;
+
+    // Add enter pinvoke exit callout at the start of prolog
+
+    tree = gtNewOperNode(GT_ADDR, TYP_I_IMPL, gtNewLclvNode(lvaReversePInvokeFrameVar, TYP_BLK));
+
+    tree = gtNewHelperCallNode(CORINFO_HELP_JIT_REVERSE_PINVOKE_ENTER, TYP_VOID, 0, gtNewArgList(tree));
+
+    fgEnsureFirstBBisScratch();
+
+    fgInsertStmtAtBeg(fgFirstBB, tree);
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nReverse PInvoke method - Add reverse pinvoke enter in first basic block [%08p]\n", dspPtr(fgFirstBB));
+        gtDispTree(tree);
+        printf("\n");
+    }
+#endif
+
+    // Add reverse pinvoke exit callout at the end of epilog
+
+    tree = gtNewOperNode(GT_ADDR, TYP_I_IMPL, gtNewLclvNode(lvaReversePInvokeFrameVar, TYP_BLK));
+
+    tree = gtNewHelperCallNode(CORINFO_HELP_JIT_REVERSE_PINVOKE_EXIT, TYP_VOID, 0, gtNewArgList(tree));
+
+    assert(genReturnBB != nullptr);
+
+    fgInsertStmtAtEnd(genReturnBB, tree);
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nReverse PInvoke method - Add reverse pinvoke exit in return basic block [%08p]\n",
+               dspPtr(genReturnBB));
+        gtDispTree(tree);
+        printf("\n");
+    }
+#endif
+
+#endif // COR_JIT_EE_VERSION > 460
+}
+
+/*****************************************************************************
+ *
+ *  Return 'true' if there is more than one BBJ_RETURN block.
+ */
+
+bool Compiler::fgMoreThanOneReturnBlock()
+{
+    unsigned retCnt = 0;
+
+    for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+    {
+        if (block->bbJumpKind == BBJ_RETURN)
+        {
+            retCnt++;
+            if (retCnt > 1)
+            {
+                return true;
+            }
+        }
+    }
+
+    return false;
+}
+
+/*****************************************************************************
+ *
+ *  Add any internal blocks/trees we may need
+ */
+
+void Compiler::fgAddInternal()
+{
+    noway_assert(!compIsForInlining());
+
+    /*
+        <BUGNUM> VSW441487 </BUGNUM>
+
+        The "this" pointer is implicitly used in the following cases:
+            1. Locking of synchronized methods
+            2. Dictionary access of shared generics code
+            3. If a method has "catch(FooException<T>)", the EH code accesses "this" to determine T.
+            4. Initializing the type from generic methods which require precise cctor semantics
+            5. Verifier does special handling of "this" in the .ctor
+
+        However, we might overwrite it with a "starg 0".
+        In this case, we will redirect all "ldarg(a)/starg(a) 0" to a temp lvaTable[lvaArg0Var]
+    */
+
+    if (!info.compIsStatic)
+    {
+        if (lvaArg0Var != info.compThisArg)
+        {
+            // When we're using the general encoder, we mark compThisArg address-taken to ensure that it is not
+            // enregistered (since the decoder always reports a stack location for "this" for generics
+            // context vars).
+            bool lva0CopiedForGenericsCtxt;
+#ifndef JIT32_GCENCODER
+            lva0CopiedForGenericsCtxt = ((info.compMethodInfo->options & CORINFO_GENERICS_CTXT_FROM_THIS) != 0);
+#else  // JIT32_GCENCODER
+            lva0CopiedForGenericsCtxt = false;
+#endif // JIT32_GCENCODER
+            noway_assert(lva0CopiedForGenericsCtxt || !lvaTable[info.compThisArg].lvAddrExposed);
+            noway_assert(!lvaTable[info.compThisArg].lvArgWrite);
+            noway_assert(lvaTable[lvaArg0Var].lvAddrExposed || lvaTable[lvaArg0Var].lvArgWrite ||
+                         lva0CopiedForGenericsCtxt);
+
+            var_types thisType = lvaTable[info.compThisArg].TypeGet();
+
+            // Now assign the original input "this" to the temp
+
+            GenTreePtr tree;
+
+            tree = gtNewLclvNode(lvaArg0Var, thisType);
+
+            tree = gtNewAssignNode(tree,                                     // dst
+                                   gtNewLclvNode(info.compThisArg, thisType) // src
+                                   );
+
+            /* Create a new basic block and stick the assignment in it */
+
+            fgEnsureFirstBBisScratch();
+
+            fgInsertStmtAtEnd(fgFirstBB, tree);
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("\nCopy \"this\" to lvaArg0Var in first basic block [%08p]\n", dspPtr(fgFirstBB));
+                gtDispTree(tree);
+                printf("\n");
+            }
+#endif
+        }
+    }
+
+    // Grab a temp for the security object.
+    // (Note: opts.compDbgEnC currently also causes the security object to be generated. See Compiler::compCompile)
+    if (opts.compNeedSecurityCheck)
+    {
+        noway_assert(lvaSecurityObject == BAD_VAR_NUM);
+        lvaSecurityObject                  = lvaGrabTempWithImplicitUse(false DEBUGARG("security check"));
+        lvaTable[lvaSecurityObject].lvType = TYP_REF;
+    }
+
+    /* Assume we will generate a single shared return sequence */
+
+    ULONG returnWeight = 0;
+    bool  oneReturn;
+    bool  allProfWeight;
+
+    //
+    //  We will generate just one epilog (return block)
+    //   when we are asked to generate enter/leave callbacks
+    //   or for methods with PInvoke
+    //   or for methods calling into unmanaged code
+    //   or for synchronized methods.
+    //
+    if (compIsProfilerHookNeeded() || (info.compCallUnmanaged != 0) || opts.IsReversePInvoke() ||
+        ((info.compFlags & CORINFO_FLG_SYNCH) != 0))
+    {
+        // We will generate only one return block
+        // We will transform the BBJ_RETURN blocks
+        //  into jumps to the one return block
+        //
+        oneReturn     = true;
+        allProfWeight = false;
+    }
+    else
+    {
+        //
+        // We are allowed to have multiple individual exits
+        // However we can still decide to have a single return
+        //
+        oneReturn     = false;
+        allProfWeight = true;
+
+        // Count the BBJ_RETURN blocks and set the returnWeight to the
+        // sum of all these blocks.
+        //
+        fgReturnCount = 0;
+        for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+        {
+            if (block->bbJumpKind == BBJ_RETURN)
+            {
+                //
+                // returnCount is the count of BBJ_RETURN blocks in this method
+                //
+                fgReturnCount++;
+                //
+                // If all BBJ_RETURN blocks have a valid profiled weights
+                // then allProfWeight will be true, else it is false
+                //
+                if ((block->bbFlags & BBF_PROF_WEIGHT) == 0)
+                {
+                    allProfWeight = false;
+                }
+                //
+                // returnWeight is the sum of the weights of all BBJ_RETURN blocks
+                returnWeight += block->bbWeight;
+            }
+        }
+
+        //
+        // If we only have one (or zero) return blocks then
+        // we do not need a special one return block
+        //
+        if (fgReturnCount > 1)
+        {
+            //
+            // should we generate a single return block?
+            //
+            if (fgReturnCount > 4)
+            {
+                // Our epilog encoding only supports up to 4 epilogs
+                // TODO-CQ: support >4 return points for ARM/AMD64, which presumably support any number of epilogs?
+                //
+                oneReturn = true;
+            }
+            else if (compCodeOpt() == SMALL_CODE)
+            {
+                // For the Small_Code case we always generate a
+                // single return block when we have multiple
+                // return points
+                //
+                oneReturn = true;
+            }
+        }
+    }
+
+#if !defined(_TARGET_X86_)
+    // Add the synchronized method enter/exit calls and try/finally protection. Note
+    // that this must happen before the one BBJ_RETURN block is created below, so the
+    // BBJ_RETURN block gets placed at the top-level, not within an EH region. (Otherwise,
+    // we'd have to be really careful when creating the synchronized method try/finally
+    // not to include the BBJ_RETURN block.)
+    if ((info.compFlags & CORINFO_FLG_SYNCH) != 0)
+    {
+        fgAddSyncMethodEnterExit();
+    }
+#endif // !_TARGET_X86_
+
+    if (oneReturn)
+    {
+        genReturnBB         = fgNewBBinRegion(BBJ_RETURN);
+        genReturnBB->bbRefs = 1; // bbRefs gets update later, for now it should be 1
+        fgReturnCount++;
+
+        if (allProfWeight)
+        {
+            //
+            // if we have profile data for all BBJ_RETURN blocks
+            // then we can set BBF_PROF_WEIGHT for genReturnBB
+            //
+            genReturnBB->bbFlags |= BBF_PROF_WEIGHT;
+        }
+        else
+        {
+            //
+            // We can't rely upon the calculated returnWeight unless
+            // all of the BBJ_RETURN blocks had valid profile weights
+            // So we will use the weight of the first block instead
+            //
+            returnWeight = fgFirstBB->bbWeight;
+        }
+
+        //
+        // Set the weight of the oneReturn block
+        //
+        genReturnBB->bbWeight = min(returnWeight, BB_MAX_WEIGHT);
+
+        if (returnWeight == 0)
+        {
+            //
+            // If necessary set the Run Rarely flag
+            //
+            genReturnBB->bbFlags |= BBF_RUN_RARELY;
+        }
+        else
+        {
+            // Make sure that the RunRarely flag is clear
+            // because fgNewBBinRegion will set it to true
+            //
+            genReturnBB->bbFlags &= ~BBF_RUN_RARELY;
+        }
+
+        genReturnBB->bbFlags |= (BBF_INTERNAL | BBF_DONT_REMOVE);
+
+        noway_assert(genReturnBB->bbNext == nullptr);
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\n genReturnBB [BB%02u] created\n", genReturnBB->bbNum);
+        }
+#endif
+    }
+    else
+    {
+        //
+        // We don't have a oneReturn block for this method
+        //
+        genReturnBB = nullptr;
+    }
+
+    // If there is a return value, then create a temp for it.  Real returns will store the value in there and
+    // it'll be reloaded by the single return.
+    if (genReturnBB && compMethodHasRetVal())
+    {
+        genReturnLocal = lvaGrabTemp(true DEBUGARG("Single return block return value"));
+
+        if (compMethodReturnsNativeScalarType())
+        {
+            lvaTable[genReturnLocal].lvType = genActualType(info.compRetNativeType);
+        }
+        else if (compMethodReturnsRetBufAddr())
+        {
+            lvaTable[genReturnLocal].lvType = TYP_BYREF;
+        }
+        else if (compMethodReturnsMultiRegRetType())
+        {
+            lvaTable[genReturnLocal].lvType = TYP_STRUCT;
+            lvaSetStruct(genReturnLocal, info.compMethodInfo->args.retTypeClass, true);
+            lvaTable[genReturnLocal].lvIsMultiRegRet = true;
+        }
+        else
+        {
+            assert(!"unreached");
+        }
+
+        if (varTypeIsFloating(lvaTable[genReturnLocal].lvType))
+        {
+            this->compFloatingPointUsed = true;
+        }
+
+        if (!varTypeIsFloating(info.compRetType))
+        {
+            lvaTable[genReturnLocal].setPrefReg(REG_INTRET, this);
+#ifdef REG_FLOATRET
+        }
+        else
+        {
+            lvaTable[genReturnLocal].setPrefReg(REG_FLOATRET, this);
+        }
+#endif
+
+#ifdef DEBUG
+        // This temporary should not be converted to a double in stress mode,
+        // because we introduce assigns to it after the stress conversion
+        lvaTable[genReturnLocal].lvKeepType = 1;
+#endif
+    }
+    else
+    {
+        genReturnLocal = BAD_VAR_NUM;
+    }
+
+    if (info.compCallUnmanaged != 0)
+    {
+        // The P/Invoke helpers only require a frame variable, so only allocate the
+        // TCB variable if we're not using them.
+        if (!opts.ShouldUsePInvokeHelpers())
+        {
+            info.compLvFrameListRoot = lvaGrabTemp(false DEBUGARG("Pinvoke FrameListRoot"));
+        }
+
+        lvaInlinedPInvokeFrameVar = lvaGrabTempWithImplicitUse(false DEBUGARG("Pinvoke FrameVar"));
+
+        LclVarDsc* varDsc = &lvaTable[lvaInlinedPInvokeFrameVar];
+        varDsc->addPrefReg(RBM_PINVOKE_TCB, this);
+        varDsc->lvType = TYP_BLK;
+        // Make room for the inlined frame.
+        varDsc->lvExactSize = eeGetEEInfo()->inlinedCallFrameInfo.size;
+#if FEATURE_FIXED_OUT_ARGS
+        // Grab and reserve space for TCB, Frame regs used in PInvoke epilog to pop the inlined frame.
+        // See genPInvokeMethodEpilog() for use of the grabbed var. This is only necessary if we are
+        // not using the P/Invoke helpers.
+        if (!opts.ShouldUsePInvokeHelpers() && compJmpOpUsed)
+        {
+            lvaPInvokeFrameRegSaveVar = lvaGrabTempWithImplicitUse(false DEBUGARG("PInvokeFrameRegSave Var"));
+            varDsc                    = &lvaTable[lvaPInvokeFrameRegSaveVar];
+            varDsc->lvType            = TYP_BLK;
+            varDsc->lvExactSize       = 2 * REGSIZE_BYTES;
+        }
+#endif
+    }
+
+    // Do we need to insert a "JustMyCode" callback?
+
+    CORINFO_JUST_MY_CODE_HANDLE* pDbgHandle = nullptr;
+    CORINFO_JUST_MY_CODE_HANDLE  dbgHandle  = nullptr;
+    if (opts.compDbgCode && !(opts.eeFlags & CORJIT_FLG_IL_STUB))
+    {
+        dbgHandle = info.compCompHnd->getJustMyCodeHandle(info.compMethodHnd, &pDbgHandle);
+    }
+
+#ifdef _TARGET_ARM64_
+    // TODO-ARM64-NYI: don't do just-my-code
+    dbgHandle  = nullptr;
+    pDbgHandle = nullptr;
+#endif // _TARGET_ARM64_
+
+    noway_assert(!dbgHandle || !pDbgHandle);
+
+    if (dbgHandle || pDbgHandle)
+    {
+        GenTreePtr guardCheckVal =
+            gtNewOperNode(GT_IND, TYP_INT, gtNewIconEmbHndNode(dbgHandle, pDbgHandle, GTF_ICON_TOKEN_HDL));
+        GenTreePtr guardCheckCond = gtNewOperNode(GT_EQ, TYP_INT, guardCheckVal, gtNewZeroConNode(TYP_INT));
+        guardCheckCond->gtFlags |= GTF_RELOP_QMARK;
+
+        // Create the callback which will yield the final answer
+
+        GenTreePtr callback = gtNewHelperCallNode(CORINFO_HELP_DBG_IS_JUST_MY_CODE, TYP_VOID);
+        callback            = new (this, GT_COLON) GenTreeColon(TYP_VOID, gtNewNothingNode(), callback);
+
+        // Stick the conditional call at the start of the method
+
+        fgEnsureFirstBBisScratch();
+        fgInsertStmtAtEnd(fgFirstBB, gtNewQmarkNode(TYP_VOID, guardCheckCond, callback));
+    }
+
+    /* Do we need to call out for security ? */
+
+    if (tiSecurityCalloutNeeded)
+    {
+        // We must have grabbed this local.
+        noway_assert(opts.compNeedSecurityCheck);
+        noway_assert(lvaSecurityObject != BAD_VAR_NUM);
+
+        GenTreePtr tree;
+
+        /* Insert the expression "call JIT_Security_Prolog(MethodHnd, &SecurityObject)" */
+
+        tree = gtNewIconEmbMethHndNode(info.compMethodHnd);
+
+        tree = gtNewHelperCallNode(info.compCompHnd->getSecurityPrologHelper(info.compMethodHnd), TYP_VOID, 0,
+                                   gtNewArgList(tree, gtNewOperNode(GT_ADDR, TYP_BYREF,
+                                                                    gtNewLclvNode(lvaSecurityObject, TYP_REF))));
+
+        /* Create a new basic block and stick the call in it */
+
+        fgEnsureFirstBBisScratch();
+
+        fgInsertStmtAtEnd(fgFirstBB, tree);
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\ntiSecurityCalloutNeeded - Add call JIT_Security_Prolog(%08p) statement ",
+                   dspPtr(info.compMethodHnd));
+            printTreeID(tree);
+            printf(" in first basic block [%08p]\n", dspPtr(fgFirstBB));
+            gtDispTree(tree);
+            printf("\n");
+        }
+#endif
+    }
+
+#if defined(_TARGET_X86_)
+
+    /* Is this a 'synchronized' method? */
+
+    if (info.compFlags & CORINFO_FLG_SYNCH)
+    {
+        GenTreePtr tree = NULL;
+
+        /* Insert the expression "enterCrit(this)" or "enterCrit(handle)" */
+
+        if (info.compIsStatic)
+        {
+            tree = fgGetCritSectOfStaticMethod();
+
+            tree = gtNewHelperCallNode(CORINFO_HELP_MON_ENTER_STATIC, TYP_VOID, 0, gtNewArgList(tree));
+        }
+        else
+        {
+            noway_assert(lvaTable[info.compThisArg].lvType == TYP_REF);
+
+            tree = gtNewLclvNode(info.compThisArg, TYP_REF);
+
+            tree = gtNewHelperCallNode(CORINFO_HELP_MON_ENTER, TYP_VOID, 0, gtNewArgList(tree));
+        }
+
+        /* Create a new basic block and stick the call in it */
+
+        fgEnsureFirstBBisScratch();
+
+        fgInsertStmtAtEnd(fgFirstBB, tree);
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\nSynchronized method - Add enterCrit statement in first basic block [%08p]\n", dspPtr(fgFirstBB));
+            gtDispTree(tree);
+            printf("\n");
+        }
+#endif
+
+        /* We must be generating a single exit point for this to work */
+
+        noway_assert(oneReturn);
+        noway_assert(genReturnBB);
+
+        /* Create the expression "exitCrit(this)" or "exitCrit(handle)" */
+
+        if (info.compIsStatic)
+        {
+            tree = fgGetCritSectOfStaticMethod();
+
+            tree = gtNewHelperCallNode(CORINFO_HELP_MON_EXIT_STATIC, TYP_VOID, 0, gtNewArgList(tree));
+        }
+        else
+        {
+            tree = gtNewLclvNode(info.compThisArg, TYP_REF);
+
+            tree = gtNewHelperCallNode(CORINFO_HELP_MON_EXIT, TYP_VOID, 0, gtNewArgList(tree));
+        }
+
+        fgInsertStmtAtEnd(genReturnBB, tree);
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\nSynchronized method - Add exit expression ");
+            printTreeID(tree);
+            printf("\n");
+        }
+#endif
+
+        // Reset cookies used to track start and end of the protected region in synchronized methods
+        syncStartEmitCookie = NULL;
+        syncEndEmitCookie   = NULL;
+    }
+
+#endif // _TARGET_X86_
+
+    /* Do we need to do runtime call out to check the security? */
+
+    if (tiRuntimeCalloutNeeded)
+    {
+        GenTreePtr tree;
+
+        /* Insert the expression "call verificationRuntimeCheck(MethodHnd)" */
+
+        tree = gtNewIconEmbMethHndNode(info.compMethodHnd);
+
+        tree = gtNewHelperCallNode(CORINFO_HELP_VERIFICATION_RUNTIME_CHECK, TYP_VOID, 0, gtNewArgList(tree));
+
+        /* Create a new basic block and stick the call in it */
+
+        fgEnsureFirstBBisScratch();
+
+        fgInsertStmtAtEnd(fgFirstBB, tree);
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\ntiRuntimeCalloutNeeded - Call verificationRuntimeCheck(%08p) statement in first basic block "
+                   "[%08p]\n",
+                   dspPtr(info.compMethodHnd), dspPtr(fgFirstBB));
+            gtDispTree(tree);
+            printf("\n");
+        }
+#endif
+    }
+
+    if (opts.IsReversePInvoke())
+    {
+        fgAddReversePInvokeEnterExit();
+    }
+
+    //
+    //  Add 'return' expression to the return block if we made it as "oneReturn" before.
+    //
+    if (oneReturn)
+    {
+        GenTreePtr tree;
+
+        //
+        // Make the 'return' expression.
+        //
+
+        // make sure to reload the return value as part of the return (it is saved by the "real return").
+        if (genReturnLocal != BAD_VAR_NUM)
+        {
+            noway_assert(compMethodHasRetVal());
+
+            GenTreePtr retTemp = gtNewLclvNode(genReturnLocal, lvaTable[genReturnLocal].TypeGet());
+
+            // make sure copy prop ignores this node (make sure it always does a reload from the temp).
+            retTemp->gtFlags |= GTF_DONT_CSE;
+            tree = gtNewOperNode(GT_RETURN, retTemp->gtType, retTemp);
+        }
+        else
+        {
+            noway_assert(info.compRetType == TYP_VOID || varTypeIsStruct(info.compRetType));
+            // return void
+            tree = new (this, GT_RETURN) GenTreeOp(GT_RETURN, TYP_VOID);
+        }
+
+        /* Add 'return' expression to the return block */
+
+        noway_assert(genReturnBB);
+
+        fgInsertStmtAtEnd(genReturnBB, tree);
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\noneReturn statement tree ");
+            printTreeID(tree);
+            printf(" added to genReturnBB [%08p]\n", dspPtr(genReturnBB));
+            gtDispTree(tree);
+            printf("\n");
+        }
+#endif
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\n*************** After fgAddInternal()\n");
+        fgDispBasicBlocks();
+        fgDispHandlerTab();
+    }
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Create a new statement from tree and wire the links up.
+ */
+GenTreeStmt* Compiler::fgNewStmtFromTree(GenTreePtr tree, BasicBlock* block, IL_OFFSETX offs)
+{
+    GenTreeStmt* stmt = gtNewStmt(tree, offs);
+    gtSetStmtInfo(stmt);
+    fgSetStmtSeq(stmt);
+
+#if DEBUG
+    if (block != nullptr)
+    {
+        fgDebugCheckNodeLinks(block, stmt);
+    }
+#endif
+
+    return stmt;
+}
+
+GenTreeStmt* Compiler::fgNewStmtFromTree(GenTreePtr tree)
+{
+    return fgNewStmtFromTree(tree, nullptr, BAD_IL_OFFSET);
+}
+
+GenTreeStmt* Compiler::fgNewStmtFromTree(GenTreePtr tree, BasicBlock* block)
+{
+    return fgNewStmtFromTree(tree, block, BAD_IL_OFFSET);
+}
+
+GenTreeStmt* Compiler::fgNewStmtFromTree(GenTreePtr tree, IL_OFFSETX offs)
+{
+    return fgNewStmtFromTree(tree, nullptr, offs);
+}
+
+//------------------------------------------------------------------------
+// fgFindBlockILOffset: Given a block, find the IL offset corresponding to the first statement
+//      in the block with a legal IL offset. Skip any leading statements that have BAD_IL_OFFSET.
+//      If no statement has an initialized statement offset (including the case where there are
+//      no statements in the block), then return BAD_IL_OFFSET. This function is used when
+//      blocks are split or modified, and we want to maintain the IL offset as much as possible
+//      to preserve good debugging behavior.
+//
+// Arguments:
+//      block - The block to check.
+//
+// Return Value:
+//      The first good IL offset of a statement in the block, or BAD_IL_OFFSET if such an IL offset
+//      cannot be found.
+//
+//      If we are not built with DEBUGGING_SUPPORT or DEBUG, then always report BAD_IL_OFFSET,
+//      since in that case statements don't contain an IL offset. The effect will be that split
+//      blocks will lose their IL offset information.
+
+IL_OFFSET Compiler::fgFindBlockILOffset(BasicBlock* block)
+{
+    // This function searches for IL offsets in statement nodes, so it can't be used in LIR. We
+    // could have a similar function for LIR that searches for GT_IL_OFFSET nodes.
+    assert(!block->IsLIR());
+
+#if defined(DEBUGGING_SUPPORT) || defined(DEBUG)
+    for (GenTree* stmt = block->bbTreeList; stmt != nullptr; stmt = stmt->gtNext)
+    {
+        assert(stmt->IsStatement());
+        if (stmt->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET)
+        {
+            return jitGetILoffs(stmt->gtStmt.gtStmtILoffsx);
+        }
+    }
+#endif // defined(DEBUGGING_SUPPORT) || defined(DEBUG)
+
+    return BAD_IL_OFFSET;
+}
+
+//------------------------------------------------------------------------------
+// fgSplitBlockAtEnd - split the given block into two blocks.
+//                   All code in the block stays in the original block.
+//                   Control falls through from original to new block, and
+//                   the new block is returned.
+//------------------------------------------------------------------------------
+BasicBlock* Compiler::fgSplitBlockAtEnd(BasicBlock* curr)
+{
+    // We'd like to use fgNewBBafter(), but we need to update the preds list before linking in the new block.
+    // (We need the successors of 'curr' to be correct when we do this.)
+    BasicBlock* newBlock = bbNewBasicBlock(curr->bbJumpKind);
+
+    // Start the new block with no refs. When we set the preds below, this will get updated correctly.
+    newBlock->bbRefs = 0;
+
+    // For each successor of the original block, set the new block as their predecessor.
+    // Note we are using the "rational" version of the successor iterator that does not hide the finallyret arcs.
+    // Without these arcs, a block 'b' may not be a member of succs(preds(b))
+    if (curr->bbJumpKind != BBJ_SWITCH)
+    {
+        unsigned numSuccs = curr->NumSucc(this);
+        for (unsigned i = 0; i < numSuccs; i++)
+        {
+            BasicBlock* succ = curr->GetSucc(i, this);
+            if (succ != newBlock)
+            {
+                JITDUMP("BB%02u previous predecessor was BB%02u, now is BB%02u\n", succ->bbNum, curr->bbNum,
+                        newBlock->bbNum);
+                fgReplacePred(succ, curr, newBlock);
+            }
+        }
+
+        newBlock->bbJumpDest = curr->bbJumpDest;
+        curr->bbJumpDest     = nullptr;
+    }
+    else
+    {
+        // In the case of a switch statement there's more complicated logic in order to wire up the predecessor lists
+        // but fortunately there's an existing method that implements this functionality.
+        newBlock->bbJumpSwt = curr->bbJumpSwt;
+
+        fgChangeSwitchBlock(curr, newBlock);
+
+        curr->bbJumpSwt = nullptr;
+    }
+
+    newBlock->inheritWeight(curr);
+
+    // Set the new block's flags. Note that the new block isn't BBF_INTERNAL unless the old block is.
+    newBlock->bbFlags = curr->bbFlags;
+
+    // Remove flags that the new block can't have.
+    newBlock->bbFlags &= ~(BBF_TRY_BEG | BBF_LOOP_HEAD | BBF_LOOP_CALL0 | BBF_LOOP_CALL1 | BBF_HAS_LABEL |
+                           BBF_JMP_TARGET | BBF_FUNCLET_BEG | BBF_LOOP_PREHEADER | BBF_KEEP_BBJ_ALWAYS);
+
+    // Remove the GC safe bit on the new block. It seems clear that if we split 'curr' at the end,
+    // such that all the code is left in 'curr', and 'newBlock' just gets the control flow, then
+    // both 'curr' and 'newBlock' could accurately retain an existing GC safe bit. However, callers
+    // use this function to split blocks in the middle, or at the beginning, and they don't seem to
+    // be careful about updating this flag appropriately. So, removing the GC safe bit is simply
+    // conservative: some functions might end up being fully interruptible that could be partially
+    // interruptible if we exercised more care here.
+    newBlock->bbFlags &= ~BBF_GC_SAFE_POINT;
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+    newBlock->bbFlags &= ~(BBF_FINALLY_TARGET);
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+
+    // The new block has no code, so we leave bbCodeOffs/bbCodeOffsEnd set to BAD_IL_OFFSET. If a caller
+    // puts code in the block, then it needs to update these.
+
+    // Insert the new block in the block list after the 'curr' block.
+    fgInsertBBafter(curr, newBlock);
+    fgExtendEHRegionAfter(curr); // The new block is in the same EH region as the old block.
+
+    // Remove flags from the old block that are no longer possible.
+    curr->bbFlags &= ~(BBF_HAS_JMP | BBF_RETLESS_CALL);
+
+    // Default to fallthru, and add the arc for that.
+    curr->bbJumpKind = BBJ_NONE;
+    fgAddRefPred(newBlock, curr);
+
+    return newBlock;
+}
+
+//------------------------------------------------------------------------------
+// fgSplitBlockAfterStatement - Split the given block, with all code after
+//                              the given statement going into the second block.
+//------------------------------------------------------------------------------
+BasicBlock* Compiler::fgSplitBlockAfterStatement(BasicBlock* curr, GenTree* stmt)
+{
+    assert(!curr->IsLIR()); // No statements in LIR, so you can't use this function.
+
+    BasicBlock* newBlock = fgSplitBlockAtEnd(curr);
+
+    if (stmt)
+    {
+        newBlock->bbTreeList = stmt->gtNext;
+        if (newBlock->bbTreeList)
+        {
+            newBlock->bbTreeList->gtPrev = curr->bbTreeList->gtPrev;
+        }
+        curr->bbTreeList->gtPrev = stmt;
+        stmt->gtNext             = nullptr;
+
+        // Update the IL offsets of the blocks to match the split.
+
+        assert(newBlock->bbCodeOffs == BAD_IL_OFFSET);
+        assert(newBlock->bbCodeOffsEnd == BAD_IL_OFFSET);
+
+        // curr->bbCodeOffs remains the same
+        newBlock->bbCodeOffsEnd = curr->bbCodeOffsEnd;
+
+        IL_OFFSET splitPointILOffset = fgFindBlockILOffset(newBlock);
+
+        curr->bbCodeOffsEnd  = splitPointILOffset;
+        newBlock->bbCodeOffs = splitPointILOffset;
+    }
+    else
+    {
+        assert(curr->bbTreeList == nullptr); // if no tree was given then it better be an empty block
+    }
+
+    return newBlock;
+}
+
+//------------------------------------------------------------------------------
+// fgSplitBlockAfterNode - Split the given block, with all code after
+//                         the given node going into the second block.
+//                         This function is only used in LIR.
+//------------------------------------------------------------------------------
+BasicBlock* Compiler::fgSplitBlockAfterNode(BasicBlock* curr, GenTree* node)
+{
+    assert(curr->IsLIR());
+
+    BasicBlock* newBlock = fgSplitBlockAtEnd(curr);
+
+    if (node != nullptr)
+    {
+        LIR::Range& currBBRange = LIR::AsRange(curr);
+
+        if (node != currBBRange.LastNode())
+        {
+            LIR::Range nodesToMove = currBBRange.Remove(node->gtNext, currBBRange.LastNode());
+            LIR::AsRange(newBlock).InsertAtBeginning(std::move(nodesToMove));
+        }
+
+        // Update the IL offsets of the blocks to match the split.
+
+        assert(newBlock->bbCodeOffs == BAD_IL_OFFSET);
+        assert(newBlock->bbCodeOffsEnd == BAD_IL_OFFSET);
+
+        // curr->bbCodeOffs remains the same
+        newBlock->bbCodeOffsEnd = curr->bbCodeOffsEnd;
+
+        // Search backwards from the end of the current block looking for the IL offset to use
+        // for the end IL offset for the original block.
+        IL_OFFSET                   splitPointILOffset = BAD_IL_OFFSET;
+        LIR::Range::ReverseIterator riter;
+        LIR::Range::ReverseIterator riterEnd;
+        for (riter = currBBRange.rbegin(), riterEnd = currBBRange.rend(); riter != riterEnd; ++riter)
+        {
+            if ((*riter)->gtOper == GT_IL_OFFSET)
+            {
+                GenTreeStmt* stmt = (*riter)->AsStmt();
+                if (stmt->gtStmtILoffsx != BAD_IL_OFFSET)
+                {
+                    splitPointILOffset = jitGetILoffs(stmt->gtStmtILoffsx);
+                    break;
+                }
+            }
+        }
+
+        curr->bbCodeOffsEnd = splitPointILOffset;
+
+        // Also use this as the beginning offset of the next block. Presumably we could/should
+        // look to see if the first node is a GT_IL_OFFSET node, and use that instead.
+        newBlock->bbCodeOffs = splitPointILOffset;
+    }
+    else
+    {
+        assert(curr->bbTreeList == nullptr); // if no node was given then it better be an empty block
+    }
+
+    return newBlock;
+}
+
+//------------------------------------------------------------------------------
+// fgSplitBlockAtBeginning - Split the given block into two blocks.
+//                         Control falls through from original to new block,
+//                         and the new block is returned.
+//                         All code in the original block goes into the new block
+//------------------------------------------------------------------------------
+BasicBlock* Compiler::fgSplitBlockAtBeginning(BasicBlock* curr)
+{
+    BasicBlock* newBlock = fgSplitBlockAtEnd(curr);
+
+    newBlock->bbTreeList = curr->bbTreeList;
+    curr->bbTreeList     = nullptr;
+
+    // The new block now has all the code, and the old block has none. Update the
+    // IL offsets for the block to reflect this.
+
+    newBlock->bbCodeOffs    = curr->bbCodeOffs;
+    newBlock->bbCodeOffsEnd = curr->bbCodeOffsEnd;
+
+    curr->bbCodeOffs    = BAD_IL_OFFSET;
+    curr->bbCodeOffsEnd = BAD_IL_OFFSET;
+
+    return newBlock;
+}
+
+//------------------------------------------------------------------------
+// fgSplitEdge: Splits the edge between a block 'curr' and its successor 'succ' by creating a new block
+//              that replaces 'succ' as a successor of 'curr', and which branches unconditionally
+//              to (or falls through to) 'succ'. Note that for a BBJ_COND block 'curr',
+//              'succ' might be the fall-through path or the branch path from 'curr'.
+//
+// Arguments:
+//    curr - A block which branches conditionally to 'succ'
+//    succ - The target block
+//
+// Return Value:
+//    Returns a new block, that is a successor of 'curr' and which branches unconditionally to 'succ'
+//
+// Assumptions:
+//    'curr' must have a bbJumpKind of BBJ_COND or BBJ_SWITCH
+//
+// Notes:
+//    The returned block is empty.
+
+BasicBlock* Compiler::fgSplitEdge(BasicBlock* curr, BasicBlock* succ)
+{
+    assert(curr->bbJumpKind == BBJ_COND || curr->bbJumpKind == BBJ_SWITCH);
+    assert(fgGetPredForBlock(succ, curr) != nullptr);
+
+    BasicBlock* newBlock;
+    if (succ == curr->bbNext)
+    {
+        // The successor is the fall-through path of a BBJ_COND, or
+        // an immediately following block of a BBJ_SWITCH (which has
+        // no fall-through path). For this case, simply insert a new
+        // fall-through block after 'curr'.
+        newBlock = fgNewBBafter(BBJ_NONE, curr, true /*extendRegion*/);
+    }
+    else
+    {
+        newBlock = fgNewBBinRegion(BBJ_ALWAYS, curr, curr->isRunRarely());
+        // The new block always jumps to 'succ'
+        newBlock->bbJumpDest = succ;
+    }
+    newBlock->bbFlags |= (curr->bbFlags & succ->bbFlags & (BBF_BACKWARD_JUMP));
+
+    JITDUMP("Splitting edge from BB%02u to BB%02u; adding BB%02u\n", curr->bbNum, succ->bbNum, newBlock->bbNum);
+
+    if (curr->bbJumpKind == BBJ_COND)
+    {
+        fgReplacePred(succ, curr, newBlock);
+        if (curr->bbJumpDest == succ)
+        {
+            // Now 'curr' jumps to newBlock
+            curr->bbJumpDest = newBlock;
+            newBlock->bbFlags |= BBF_JMP_TARGET;
+        }
+        fgAddRefPred(newBlock, curr);
+    }
+    else
+    {
+        assert(curr->bbJumpKind == BBJ_SWITCH);
+
+        // newBlock replaces 'succ' in the switch.
+        fgReplaceSwitchJumpTarget(curr, newBlock, succ);
+
+        // And 'succ' has 'newBlock' as a new predecessor.
+        fgAddRefPred(succ, newBlock);
+    }
+
+    // This isn't accurate, but it is complex to compute a reasonable number so just assume that we take the
+    // branch 50% of the time.
+    newBlock->inheritWeightPercentage(curr, 50);
+
+    // The bbLiveIn and bbLiveOut are both equal to the bbLiveIn of 'succ'
+    if (fgLocalVarLivenessDone)
+    {
+        VarSetOps::Assign(this, newBlock->bbLiveIn, succ->bbLiveIn);
+        VarSetOps::Assign(this, newBlock->bbLiveOut, succ->bbLiveIn);
+    }
+
+    return newBlock;
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+void Compiler::fgFindOperOrder()
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In fgFindOperOrder()\n");
+    }
+#endif
+
+    BasicBlock*  block;
+    GenTreeStmt* stmt;
+
+    /* Walk the basic blocks and for each statement determine
+     * the evaluation order, cost, FP levels, etc... */
+
+    for (block = fgFirstBB; block; block = block->bbNext)
+    {
+        compCurBB = block;
+        for (stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
+        {
+            /* Recursively process the statement */
+
+            compCurStmt = stmt;
+            gtSetStmtInfo(stmt);
+        }
+    }
+}
+
+/*****************************************************************************/
+void Compiler::fgSimpleLowering()
+{
+    for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+    {
+        // Walk the statement trees in this basic block, converting ArrLength nodes.
+        compCurBB = block; // Used in fgRngChkTarget.
+
+#ifdef LEGACY_BACKEND
+        for (GenTreeStmt* stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
+        {
+            for (GenTreePtr tree = stmt->gtStmtList; tree; tree = tree->gtNext)
+            {
+#else
+            LIR::Range& range         = LIR::AsRange(block);
+            for (GenTree* tree : range)
+            {
+                {
+#endif
+                if (tree->gtOper == GT_ARR_LENGTH)
+                {
+                    GenTreeArrLen* arrLen = tree->AsArrLen();
+                    GenTreePtr     arr    = arrLen->gtArrLen.ArrRef();
+                    GenTreePtr     add;
+                    GenTreePtr     con;
+
+                    /* Create the expression "*(array_addr + ArrLenOffs)" */
+
+                    noway_assert(arr->gtNext == tree);
+
+                    noway_assert(arrLen->ArrLenOffset() == offsetof(CORINFO_Array, length) ||
+                                 arrLen->ArrLenOffset() == offsetof(CORINFO_String, stringLen));
+
+                    if ((arr->gtOper == GT_CNS_INT) && (arr->gtIntCon.gtIconVal == 0))
+                    {
+                        // If the array is NULL, then we should get a NULL reference
+                        // exception when computing its length.  We need to maintain
+                        // an invariant where there is no sum of two constants node, so
+                        // let's simply return an indirection of NULL.
+
+                        add = arr;
+                    }
+                    else
+                    {
+                        con             = gtNewIconNode(arrLen->ArrLenOffset(), TYP_I_IMPL);
+                        con->gtRsvdRegs = 0;
+
+                        add             = gtNewOperNode(GT_ADD, TYP_REF, arr, con);
+                        add->gtRsvdRegs = arr->gtRsvdRegs;
+
+#ifdef LEGACY_BACKEND
+                        con->gtCopyFPlvl(arr);
+
+                        add->gtCopyFPlvl(arr);
+                        add->CopyCosts(arr);
+
+                        arr->gtNext = con;
+                        con->gtPrev = arr;
+
+                        con->gtNext = add;
+                        add->gtPrev = con;
+
+                        add->gtNext  = tree;
+                        tree->gtPrev = add;
+#else
+                            range.InsertAfter(arr, con, add);
+#endif
+                    }
+
+                    // Change to a GT_IND.
+                    tree->ChangeOperUnchecked(GT_IND);
+
+                    tree->gtOp.gtOp1 = add;
+                }
+                else if (tree->OperGet() == GT_ARR_BOUNDS_CHECK
+#ifdef FEATURE_SIMD
+                         || tree->OperGet() == GT_SIMD_CHK
+#endif // FEATURE_SIMD
+                         )
+                {
+                    // Add in a call to an error routine.
+                    fgSetRngChkTarget(tree, false);
+                }
+            }
+        }
+    }
+
+#ifdef DEBUG
+    if (verbose && fgRngChkThrowAdded)
+    {
+        printf("\nAfter fgSimpleLowering() added some RngChk throw blocks");
+        fgDispBasicBlocks();
+        fgDispHandlerTab();
+        printf("\n");
+    }
+#endif
+}
+
+/*****************************************************************************
+ */
+
+void Compiler::fgUpdateRefCntForClone(BasicBlock* addedToBlock, GenTreePtr clonedTree)
+{
+    assert(clonedTree->gtOper != GT_STMT);
+
+    if (lvaLocalVarRefCounted)
+    {
+        compCurBB = addedToBlock;
+        fgWalkTreePre(&clonedTree, Compiler::lvaIncRefCntsCB, (void*)this, true);
+    }
+}
+
+/*****************************************************************************
+ */
+
+void Compiler::fgUpdateRefCntForExtract(GenTreePtr wholeTree, GenTreePtr keptTree)
+{
+    if (lvaLocalVarRefCounted)
+    {
+        /*  Update the refCnts of removed lcl vars - The problem is that
+         *  we have to consider back the side effects trees so we first
+         *  increment all refCnts for side effects then decrement everything
+         *  in the statement
+         */
+        if (keptTree)
+        {
+            fgWalkTreePre(&keptTree, Compiler::lvaIncRefCntsCB, (void*)this, true);
+        }
+
+        fgWalkTreePre(&wholeTree, Compiler::lvaDecRefCntsCB, (void*)this, true);
+    }
+}
+
+VARSET_VALRET_TP Compiler::fgGetVarBits(GenTreePtr tree)
+{
+    VARSET_TP VARSET_INIT_NOCOPY(varBits, VarSetOps::MakeEmpty(this));
+
+    assert(tree->gtOper == GT_LCL_VAR || tree->gtOper == GT_LCL_FLD || tree->gtOper == GT_REG_VAR);
+
+    unsigned int lclNum = tree->gtLclVarCommon.gtLclNum;
+    LclVarDsc*   varDsc = lvaTable + lclNum;
+    if (varDsc->lvTracked)
+    {
+        VarSetOps::AddElemD(this, varBits, varDsc->lvVarIndex);
+    }
+    else if (varDsc->lvType == TYP_STRUCT && varDsc->lvPromoted)
+    {
+        for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
+        {
+            noway_assert(lvaTable[i].lvIsStructField);
+            if (lvaTable[i].lvTracked)
+            {
+                unsigned varIndex = lvaTable[i].lvVarIndex;
+                noway_assert(varIndex < lvaTrackedCount);
+                VarSetOps::AddElemD(this, varBits, varIndex);
+            }
+        }
+    }
+    return varBits;
+}
+
+/*****************************************************************************
+ *
+ *  Find and remove any basic blocks that are useless (e.g. they have not been
+ *  imported because they are not reachable, or they have been optimized away).
+ */
+
+void Compiler::fgRemoveEmptyBlocks()
+{
+    BasicBlock* cur;
+    BasicBlock* nxt;
+
+    /* If we remove any blocks, we'll have to do additional work */
+
+    unsigned removedBlks = 0;
+
+    for (cur = fgFirstBB; cur != nullptr; cur = nxt)
+    {
+        /* Get hold of the next block (in case we delete 'cur') */
+
+        nxt = cur->bbNext;
+
+        /* Should this block be removed? */
+
+        if (!(cur->bbFlags & BBF_IMPORTED))
+        {
+            noway_assert(cur->isEmpty());
+
+            if (ehCanDeleteEmptyBlock(cur))
+            {
+                /* Mark the block as removed */
+
+                cur->bbFlags |= BBF_REMOVED;
+
+                /* Remember that we've removed a block from the list */
+
+                removedBlks++;
+
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("BB%02u was not imported, marked as removed (%d)\n", cur->bbNum, removedBlks);
+                }
+#endif // DEBUG
+
+                /* Drop the block from the list */
+
+                fgUnlinkBlock(cur);
+            }
+            else
+            {
+                // We were prevented from deleting this block by EH normalization. Mark the block as imported.
+                cur->bbFlags |= BBF_IMPORTED;
+            }
+        }
+    }
+
+    /* If no blocks were removed, we're done */
+
+    if (removedBlks == 0)
+    {
+        return;
+    }
+
+    /*  Update all references in the exception handler table.
+     *  Mark the new blocks as non-removable.
+     *
+     *  We may have made the entire try block unreachable.
+     *  Check for this case and remove the entry from the EH table.
+     */
+
+    unsigned  XTnum;
+    EHblkDsc* HBtab;
+    INDEBUG(unsigned delCnt = 0;)
+
+    for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+    {
+    AGAIN:
+        /* If the beginning of the try block was not imported, we
+         * need to remove the entry from the EH table. */
+
+        if (HBtab->ebdTryBeg->bbFlags & BBF_REMOVED)
+        {
+            noway_assert(!(HBtab->ebdTryBeg->bbFlags & BBF_IMPORTED));
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("Beginning of try block (BB%02u) not imported "
+                       "- remove index #%u from the EH table\n",
+                       HBtab->ebdTryBeg->bbNum, XTnum + delCnt);
+            }
+            delCnt++;
+#endif // DEBUG
+
+            fgRemoveEHTableEntry(XTnum);
+
+            if (XTnum < compHndBBtabCount)
+            {
+                // There are more entries left to process, so do more. Note that
+                // HBtab now points to the next entry, that we copied down to the
+                // current slot. XTnum also stays the same.
+                goto AGAIN;
+            }
+
+            break; // no more entries (we deleted the last one), so exit the loop
+        }
+
+/* At this point we know we have a valid try block */
+
+#ifdef DEBUG
+        assert(HBtab->ebdTryBeg->bbFlags & BBF_IMPORTED);
+        assert(HBtab->ebdTryBeg->bbFlags & BBF_DONT_REMOVE);
+
+        assert(HBtab->ebdHndBeg->bbFlags & BBF_IMPORTED);
+        assert(HBtab->ebdHndBeg->bbFlags & BBF_DONT_REMOVE);
+
+        if (HBtab->HasFilter())
+        {
+            assert(HBtab->ebdFilter->bbFlags & BBF_IMPORTED);
+            assert(HBtab->ebdFilter->bbFlags & BBF_DONT_REMOVE);
+        }
+#endif // DEBUG
+
+        fgSkipRmvdBlocks(HBtab);
+    } /* end of the for loop over XTnum */
+
+    // Renumber the basic blocks
+    JITDUMP("\nRenumbering the basic blocks for fgRemoveEmptyBlocks\n");
+    fgRenumberBlocks();
+
+#ifdef DEBUG
+    fgVerifyHandlerTab();
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ * Remove a useless statement from a basic block.
+ * The default is to decrement ref counts of included vars
+ *
+ */
+
+void Compiler::fgRemoveStmt(BasicBlock* block,
+                            GenTreePtr  node,
+                            // whether to decrement ref counts for tracked vars in statement
+                            bool updateRefCount)
+{
+    noway_assert(node);
+    assert(fgOrder == FGOrderTree);
+
+    GenTreeStmt* tree = block->firstStmt();
+    GenTreeStmt* stmt = node->AsStmt();
+
+#ifdef DEBUG
+    if (verbose &&
+        stmt->gtStmtExpr->gtOper != GT_NOP) // Don't print if it is a GT_NOP. Too much noise from the inliner.
+    {
+        printf("\nRemoving statement ");
+        printTreeID(stmt);
+        printf(" in BB%02u as useless:\n", block->bbNum);
+        gtDispTree(stmt);
+    }
+#endif // DEBUG
+
+    if (opts.compDbgCode && stmt->gtPrev != stmt && stmt->gtStmtILoffsx != BAD_IL_OFFSET)
+    {
+        /* TODO: For debuggable code, should we remove significant
+           statement boundaries. Or should we leave a GT_NO_OP in its place? */
+    }
+
+    /* Is it the first statement in the list? */
+
+    GenTreeStmt* firstStmt = block->firstStmt();
+    if (firstStmt == stmt)
+    {
+        if (firstStmt->gtNext == nullptr)
+        {
+            assert(firstStmt == block->lastStmt());
+
+            /* this is the only statement - basic block becomes empty */
+            block->bbTreeList = nullptr;
+        }
+        else
+        {
+            block->bbTreeList         = tree->gtNext;
+            block->bbTreeList->gtPrev = tree->gtPrev;
+        }
+        goto DONE;
+    }
+
+    /* Is it the last statement in the list? */
+
+    if (stmt == block->lastStmt())
+    {
+        stmt->gtPrev->gtNext      = nullptr;
+        block->bbTreeList->gtPrev = stmt->gtPrev;
+        goto DONE;
+    }
+
+    tree = stmt->gtPrevStmt;
+    noway_assert(tree);
+
+    tree->gtNext         = stmt->gtNext;
+    stmt->gtNext->gtPrev = tree;
+
+DONE:
+    fgStmtRemoved = true;
+
+    if (optValnumCSE_phase)
+    {
+        optValnumCSE_UnmarkCSEs(stmt->gtStmtExpr, nullptr);
+    }
+    else
+    {
+        if (updateRefCount)
+        {
+            if (fgStmtListThreaded)
+            {
+                fgWalkTreePre(&stmt->gtStmtExpr, Compiler::lvaDecRefCntsCB, (void*)this, true);
+            }
+        }
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        if (block->bbTreeList == nullptr)
+        {
+            printf("\nBB%02u becomes empty", block->bbNum);
+        }
+        printf("\n");
+    }
+#endif // DEBUG
+}
+
+/******************************************************************************/
+// Returns true if the operator is involved in control-flow
+// TODO-Cleanup: Move this into genTreeKinds in genTree.h
+
+inline bool OperIsControlFlow(genTreeOps oper)
+{
+    switch (oper)
+    {
+        case GT_JTRUE:
+        case GT_SWITCH:
+        case GT_LABEL:
+
+        case GT_CALL:
+        case GT_JMP:
+
+        case GT_RETURN:
+        case GT_RETFILT:
+#if !FEATURE_EH_FUNCLETS
+        case GT_END_LFIN:
+#endif // !FEATURE_EH_FUNCLETS
+            return true;
+
+        default:
+            return false;
+    }
+}
+
+/******************************************************************************
+ *  Tries to throw away a stmt. The statement can be anywhere in block->bbTreeList.
+ *  Returns true if it did remove the statement.
+ */
+
+bool Compiler::fgCheckRemoveStmt(BasicBlock* block, GenTreePtr node)
+{
+    if (opts.compDbgCode)
+    {
+        return false;
+    }
+
+    GenTreeStmt* stmt = node->AsStmt();
+
+    GenTreePtr tree = stmt->gtStmtExpr;
+    genTreeOps oper = tree->OperGet();
+
+    if (OperIsControlFlow(oper) || oper == GT_NO_OP)
+    {
+        return false;
+    }
+
+    // TODO: Use a recursive version of gtNodeHasSideEffects()
+    if (tree->gtFlags & GTF_SIDE_EFFECT)
+    {
+        return false;
+    }
+
+    fgRemoveStmt(block, stmt);
+    return true;
+}
+
+/****************************************************************************************************
+ *
+ *
+ */
+bool Compiler::fgCanCompactBlocks(BasicBlock* block, BasicBlock* bNext)
+{
+    if ((block == nullptr) || (bNext == nullptr))
+    {
+        return false;
+    }
+
+    noway_assert(block->bbNext == bNext);
+
+    if (block->bbJumpKind != BBJ_NONE)
+    {
+        return false;
+    }
+
+    // If the next block has multiple incoming edges, we can still compact if the first block is empty.
+    // However, not if it is the beginning of a handler.
+    if (bNext->countOfInEdges() != 1 &&
+        (!block->isEmpty() || (block->bbFlags & BBF_FUNCLET_BEG) || (block->bbCatchTyp != BBCT_NONE)))
+    {
+        return false;
+    }
+
+    if (bNext->bbFlags & BBF_DONT_REMOVE)
+    {
+        return false;
+    }
+
+    // Don't compact the first block if it was specially created as a scratch block.
+    if (fgBBisScratch(block))
+    {
+        return false;
+    }
+
+#if defined(_TARGET_ARM_)
+    // We can't compact a finally target block, as we need to generate special code for such blocks during code
+    // generation
+    if ((bNext->bbFlags & BBF_FINALLY_TARGET) != 0)
+        return false;
+#endif
+
+    // We don't want to compact blocks that are in different Hot/Cold regions
+    //
+    if (fgInDifferentRegions(block, bNext))
+    {
+        return false;
+    }
+
+    // We cannot compact two blocks in different EH regions.
+    //
+    if (fgCanRelocateEHRegions)
+    {
+        if (!BasicBlock::sameEHRegion(block, bNext))
+        {
+            return false;
+        }
+    }
+    // if there is a switch predecessor don't bother because we'd have to update the uniquesuccs as well
+    // (if they are valid)
+    for (flowList* pred = bNext->bbPreds; pred; pred = pred->flNext)
+    {
+        if (pred->flBlock->bbJumpKind == BBJ_SWITCH)
+        {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+/*****************************************************************************************************
+ *
+ *  Function called to compact two given blocks in the flowgraph
+ *  Assumes that all necessary checks have been performed,
+ *  i.e. fgCanCompactBlocks returns true.
+ *
+ *  Uses for this function - whenever we change links, insert blocks,...
+ *  It will keep the flowgraph data in synch - bbNum, bbRefs, bbPreds
+ */
+
+void Compiler::fgCompactBlocks(BasicBlock* block, BasicBlock* bNext)
+{
+    noway_assert(block != nullptr);
+    noway_assert((block->bbFlags & BBF_REMOVED) == 0);
+    noway_assert(block->bbJumpKind == BBJ_NONE);
+
+    noway_assert(bNext == block->bbNext);
+    noway_assert(bNext != nullptr);
+    noway_assert((bNext->bbFlags & BBF_REMOVED) == 0);
+    noway_assert(bNext->countOfInEdges() == 1 || block->isEmpty());
+    noway_assert(bNext->bbPreds);
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+    noway_assert((bNext->bbFlags & BBF_FINALLY_TARGET) == 0);
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+
+    // Make sure the second block is not the start of a TRY block or an exception handler
+
+    noway_assert(bNext->bbCatchTyp == BBCT_NONE);
+    noway_assert((bNext->bbFlags & BBF_TRY_BEG) == 0);
+    noway_assert((bNext->bbFlags & BBF_DONT_REMOVE) == 0);
+
+    /* both or none must have an exception handler */
+    noway_assert(block->hasTryIndex() == bNext->hasTryIndex());
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nCompacting blocks BB%02u and BB%02u:\n", block->bbNum, bNext->bbNum);
+    }
+#endif
+
+    if (bNext->countOfInEdges() > 1)
+    {
+        JITDUMP("Second block has multiple incoming edges\n");
+
+        assert(block->isEmpty());
+        block->bbFlags |= BBF_JMP_TARGET;
+        for (flowList* pred = bNext->bbPreds; pred; pred = pred->flNext)
+        {
+            fgReplaceJumpTarget(pred->flBlock, block, bNext);
+
+            if (pred->flBlock != block)
+            {
+                fgAddRefPred(block, pred->flBlock);
+            }
+        }
+        bNext->bbPreds = nullptr;
+    }
+    else
+    {
+        noway_assert(bNext->bbPreds->flNext == nullptr);
+        noway_assert(bNext->bbPreds->flBlock == block);
+    }
+
+    /* Start compacting - move all the statements in the second block to the first block */
+
+    // First move any phi definitions of the second block after the phi defs of the first.
+    // TODO-CQ: This may be the wrong thing to do.  If we're compacting blocks, it's because a
+    // control-flow choice was constant-folded away.  So probably phi's need to go away,
+    // as well, in favor of one of the incoming branches.  Or at least be modified.
+
+    assert(block->IsLIR() == bNext->IsLIR());
+    if (block->IsLIR())
+    {
+        LIR::Range& blockRange = LIR::AsRange(block);
+        LIR::Range& nextRange  = LIR::AsRange(bNext);
+
+        // Does the next block have any phis?
+        GenTree*           nextFirstNonPhi = nullptr;
+        LIR::ReadOnlyRange nextPhis        = nextRange.PhiNodes();
+        if (!nextPhis.IsEmpty())
+        {
+            GenTree* blockLastPhi = blockRange.LastPhiNode();
+            nextFirstNonPhi       = nextPhis.LastNode()->gtNext;
+
+            LIR::Range phisToMove = nextRange.Remove(std::move(nextPhis));
+            blockRange.InsertAfter(blockLastPhi, std::move(phisToMove));
+        }
+        else
+        {
+            nextFirstNonPhi = nextRange.FirstNode();
+        }
+
+        // Does the block have any other code?
+        if (nextFirstNonPhi != nullptr)
+        {
+            LIR::Range nextNodes = nextRange.Remove(nextFirstNonPhi, nextRange.LastNode());
+            blockRange.InsertAtEnd(std::move(nextNodes));
+        }
+    }
+    else
+    {
+        GenTreePtr blkNonPhi1   = block->FirstNonPhiDef();
+        GenTreePtr bNextNonPhi1 = bNext->FirstNonPhiDef();
+        GenTreePtr blkFirst     = block->firstStmt();
+        GenTreePtr bNextFirst   = bNext->firstStmt();
+
+        // Does the second have any phis?
+        if (bNextFirst != nullptr && bNextFirst != bNextNonPhi1)
+        {
+            GenTreePtr bNextLast = bNextFirst->gtPrev;
+            assert(bNextLast->gtNext == nullptr);
+
+            // Does "blk" have phis?
+            if (blkNonPhi1 != blkFirst)
+            {
+                // Yes, has phis.
+                // Insert after the last phi of "block."
+                // First, bNextPhis after last phi of block.
+                GenTreePtr blkLastPhi;
+                if (blkNonPhi1 != nullptr)
+                {
+                    blkLastPhi = blkNonPhi1->gtPrev;
+                }
+                else
+                {
+                    blkLastPhi = blkFirst->gtPrev;
+                }
+
+                blkLastPhi->gtNext = bNextFirst;
+                bNextFirst->gtPrev = blkLastPhi;
+
+                // Now, rest of "block" after last phi of "bNext".
+                GenTreePtr bNextLastPhi = nullptr;
+                if (bNextNonPhi1 != nullptr)
+                {
+                    bNextLastPhi = bNextNonPhi1->gtPrev;
+                }
+                else
+                {
+                    bNextLastPhi = bNextFirst->gtPrev;
+                }
+
+                bNextLastPhi->gtNext = blkNonPhi1;
+                if (blkNonPhi1 != nullptr)
+                {
+                    blkNonPhi1->gtPrev = bNextLastPhi;
+                }
+                else
+                {
+                    // block has no non phis, so make the last statement be the last added phi.
+                    blkFirst->gtPrev = bNextLastPhi;
+                }
+
+                // Now update the bbTreeList of "bNext".
+                bNext->bbTreeList = bNextNonPhi1;
+                if (bNextNonPhi1 != nullptr)
+                {
+                    bNextNonPhi1->gtPrev = bNextLast;
+                }
+            }
+            else
+            {
+                if (blkFirst != nullptr) // If "block" has no statements, fusion will work fine...
+                {
+                    // First, bNextPhis at start of block.
+                    GenTreePtr blkLast = blkFirst->gtPrev;
+                    block->bbTreeList  = bNextFirst;
+                    // Now, rest of "block" (if it exists) after last phi of "bNext".
+                    GenTreePtr bNextLastPhi = nullptr;
+                    if (bNextNonPhi1 != nullptr)
+                    {
+                        // There is a first non phi, so the last phi is before it.
+                        bNextLastPhi = bNextNonPhi1->gtPrev;
+                    }
+                    else
+                    {
+                        // All the statements are phi defns, so the last one is the prev of the first.
+                        bNextLastPhi = bNextFirst->gtPrev;
+                    }
+                    bNextFirst->gtPrev   = blkLast;
+                    bNextLastPhi->gtNext = blkFirst;
+                    blkFirst->gtPrev     = bNextLastPhi;
+                    // Now update the bbTreeList of "bNext"
+                    bNext->bbTreeList = bNextNonPhi1;
+                    if (bNextNonPhi1 != nullptr)
+                    {
+                        bNextNonPhi1->gtPrev = bNextLast;
+                    }
+                }
+            }
+        }
+
+        // Now proceed with the updated bbTreeLists.
+        GenTreePtr stmtList1 = block->firstStmt();
+        GenTreePtr stmtList2 = bNext->firstStmt();
+
+        /* the block may have an empty list */
+
+        if (stmtList1)
+        {
+            GenTreePtr stmtLast1 = block->lastStmt();
+
+            /* The second block may be a GOTO statement or something with an empty bbTreeList */
+            if (stmtList2)
+            {
+                GenTreePtr stmtLast2 = bNext->lastStmt();
+
+                /* append list2 to list 1 */
+
+                stmtLast1->gtNext = stmtList2;
+                stmtList2->gtPrev = stmtLast1;
+                stmtList1->gtPrev = stmtLast2;
+            }
+        }
+        else
+        {
+            /* block was formerly empty and now has bNext's statements */
+            block->bbTreeList = stmtList2;
+        }
+    }
+
+    // Note we could update the local variable weights here by
+    // calling lvaMarkLocalVars, with the block and weight adjustment.
+
+    // If either block or bNext has a profile weight
+    // or if both block and bNext have non-zero weights
+    // then we select the highest weight block.
+
+    if ((block->bbFlags & BBF_PROF_WEIGHT) || (bNext->bbFlags & BBF_PROF_WEIGHT) ||
+        (block->bbWeight && bNext->bbWeight))
+    {
+        // We are keeping block so update its fields
+        // when bNext has a greater weight
+
+        if (block->bbWeight < bNext->bbWeight)
+        {
+            block->bbWeight = bNext->bbWeight;
+
+            block->bbFlags |= (bNext->bbFlags & BBF_PROF_WEIGHT); // Set the profile weight flag (if necessary)
+            if (block->bbWeight != 0)
+            {
+                block->bbFlags &= ~BBF_RUN_RARELY; // Clear any RarelyRun flag
+            }
+        }
+    }
+    // otherwise if either block has a zero weight we select the zero weight
+    else
+    {
+        noway_assert((block->bbWeight == BB_ZERO_WEIGHT) || (bNext->bbWeight == BB_ZERO_WEIGHT));
+        block->bbWeight = BB_ZERO_WEIGHT;
+        block->bbFlags |= BBF_RUN_RARELY; // Set the RarelyRun flag
+    }
+
+    /* set the right links */
+
+    block->bbJumpKind = bNext->bbJumpKind;
+    VarSetOps::AssignAllowUninitRhs(this, block->bbLiveOut, bNext->bbLiveOut);
+
+    // Update the beginning and ending IL offsets (bbCodeOffs and bbCodeOffsEnd).
+    // Set the beginning IL offset to the minimum, and the ending offset to the maximum, of the respective blocks.
+    // If one block has an unknown offset, we take the other block.
+    // We are merging into 'block', so if its values are correct, just leave them alone.
+    // TODO: we should probably base this on the statements within.
+
+    if (block->bbCodeOffs == BAD_IL_OFFSET)
+    {
+        block->bbCodeOffs = bNext->bbCodeOffs; // If they are both BAD_IL_OFFSET, this doesn't change anything.
+    }
+    else if (bNext->bbCodeOffs != BAD_IL_OFFSET)
+    {
+        // The are both valid offsets; compare them.
+        if (block->bbCodeOffs > bNext->bbCodeOffs)
+        {
+            block->bbCodeOffs = bNext->bbCodeOffs;
+        }
+    }
+
+    if (block->bbCodeOffsEnd == BAD_IL_OFFSET)
+    {
+        block->bbCodeOffsEnd = bNext->bbCodeOffsEnd; // If they are both BAD_IL_OFFSET, this doesn't change anything.
+    }
+    else if (bNext->bbCodeOffsEnd != BAD_IL_OFFSET)
+    {
+        // The are both valid offsets; compare them.
+        if (block->bbCodeOffsEnd < bNext->bbCodeOffsEnd)
+        {
+            block->bbCodeOffsEnd = bNext->bbCodeOffsEnd;
+        }
+    }
+
+    if (((block->bbFlags & BBF_INTERNAL) != 0) && ((bNext->bbFlags & BBF_INTERNAL) == 0))
+    {
+        // If 'block' is an internal block and 'bNext' isn't, then adjust the flags set on 'block'.
+        block->bbFlags &= ~BBF_INTERNAL; // Clear the BBF_INTERNAL flag
+        block->bbFlags |= BBF_IMPORTED;  // Set the BBF_IMPORTED flag
+    }
+
+    /* Update the flags for block with those found in bNext */
+
+    block->bbFlags |= (bNext->bbFlags & BBF_COMPACT_UPD);
+
+    /* mark bNext as removed */
+
+    bNext->bbFlags |= BBF_REMOVED;
+
+    /* Unlink bNext and update all the marker pointers if necessary */
+
+    fgUnlinkRange(block->bbNext, bNext);
+
+    // If bNext was the last block of a try or handler, update the EH table.
+
+    ehUpdateForDeletedBlock(bNext);
+
+    /* If we're collapsing a block created after the dominators are
+       computed, rename the block and reuse dominator information from
+       the other block */
+    if (fgDomsComputed && block->bbNum > fgDomBBcount)
+    {
+        BlockSetOps::Assign(this, block->bbReach, bNext->bbReach);
+        BlockSetOps::ClearD(this, bNext->bbReach);
+
+        block->bbIDom = bNext->bbIDom;
+        bNext->bbIDom = nullptr;
+
+        // In this case, there's no need to update the preorder and postorder numbering
+        // since we're changing the bbNum, this makes the basic block all set.
+        block->bbNum = bNext->bbNum;
+    }
+
+    /* Set the jump targets */
+
+    switch (bNext->bbJumpKind)
+    {
+        case BBJ_CALLFINALLY:
+            // Propagate RETLESS property
+            block->bbFlags |= (bNext->bbFlags & BBF_RETLESS_CALL);
+
+            __fallthrough;
+
+        case BBJ_COND:
+        case BBJ_ALWAYS:
+        case BBJ_EHCATCHRET:
+            block->bbJumpDest = bNext->bbJumpDest;
+
+            /* Update the predecessor list for 'bNext->bbJumpDest' */
+            fgReplacePred(bNext->bbJumpDest, bNext, block);
+
+            /* Update the predecessor list for 'bNext->bbNext' if it is different than 'bNext->bbJumpDest' */
+            if (bNext->bbJumpKind == BBJ_COND && bNext->bbJumpDest != bNext->bbNext)
+            {
+                fgReplacePred(bNext->bbNext, bNext, block);
+            }
+            break;
+
+        case BBJ_NONE:
+            /* Update the predecessor list for 'bNext->bbNext' */
+            fgReplacePred(bNext->bbNext, bNext, block);
+            break;
+
+        case BBJ_EHFILTERRET:
+            fgReplacePred(bNext->bbJumpDest, bNext, block);
+            break;
+
+        case BBJ_EHFINALLYRET:
+        {
+            unsigned  hndIndex = block->getHndIndex();
+            EHblkDsc* ehDsc    = ehGetDsc(hndIndex);
+
+            if (ehDsc->HasFinallyHandler()) // No need to do this for fault handlers
+            {
+                BasicBlock* begBlk;
+                BasicBlock* endBlk;
+                ehGetCallFinallyBlockRange(hndIndex, &begBlk, &endBlk);
+
+                BasicBlock* finBeg = ehDsc->ebdHndBeg;
+
+                for (BasicBlock* bcall = begBlk; bcall != endBlk; bcall = bcall->bbNext)
+                {
+                    if (bcall->bbJumpKind != BBJ_CALLFINALLY || bcall->bbJumpDest != finBeg)
+                    {
+                        continue;
+                    }
+
+                    noway_assert(bcall->isBBCallAlwaysPair());
+                    fgReplacePred(bcall->bbNext, bNext, block);
+                }
+            }
+        }
+        break;
+
+        case BBJ_THROW:
+        case BBJ_RETURN:
+            /* no jumps or fall through blocks to set here */
+            break;
+
+        case BBJ_SWITCH:
+            block->bbJumpSwt = bNext->bbJumpSwt;
+            // We are moving the switch jump from bNext to block.  Examine the jump targets
+            // of the BBJ_SWITCH at bNext and replace the predecessor to 'bNext' with ones to 'block'
+            fgChangeSwitchBlock(bNext, block);
+            break;
+
+        default:
+            noway_assert(!"Unexpected bbJumpKind");
+            break;
+    }
+
+    fgUpdateLoopsAfterCompacting(block, bNext);
+
+#if DEBUG
+    if (verbose && 0)
+    {
+        printf("\nAfter compacting:\n");
+        fgDispBasicBlocks(false);
+    }
+#endif
+
+#if DEBUG
+    if (JitConfig.JitSlowDebugChecksEnabled() != 0)
+    {
+        // Make sure that the predecessor lists are accurate
+        fgDebugCheckBBlist();
+    }
+#endif // DEBUG
+}
+
+void Compiler::fgUpdateLoopsAfterCompacting(BasicBlock* block, BasicBlock* bNext)
+{
+    /* Check if the removed block is not part the loop table */
+    noway_assert(bNext);
+
+    for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
+    {
+        /* Some loops may have been already removed by
+         * loop unrolling or conditional folding */
+
+        if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
+        {
+            continue;
+        }
+
+        /* Check the loop head (i.e. the block preceding the loop) */
+
+        if (optLoopTable[loopNum].lpHead == bNext)
+        {
+            optLoopTable[loopNum].lpHead = block;
+        }
+
+        /* Check the loop bottom */
+
+        if (optLoopTable[loopNum].lpBottom == bNext)
+        {
+            optLoopTable[loopNum].lpBottom = block;
+        }
+
+        /* Check the loop exit */
+
+        if (optLoopTable[loopNum].lpExit == bNext)
+        {
+            noway_assert(optLoopTable[loopNum].lpExitCnt == 1);
+            optLoopTable[loopNum].lpExit = block;
+        }
+
+        /* Check the loop entry */
+
+        if (optLoopTable[loopNum].lpEntry == bNext)
+        {
+            optLoopTable[loopNum].lpEntry = block;
+        }
+    }
+}
+
+/*****************************************************************************************************
+ *
+ *  Function called to remove a block when it is unreachable.
+ *
+ *  This function cannot remove the first block.
+ */
+
+void Compiler::fgUnreachableBlock(BasicBlock* block)
+{
+    // genReturnBB should never be removed, as we might have special hookups there.
+    // Therefore, we should never come here to remove the statements in the genReturnBB block.
+    // For example, <BUGNUM> in VSW 364383, </BUGNUM>
+    // the profiler hookup needs to have the "void GT_RETURN" statement
+    // to properly set the info.compProfilerCallback flag.
+    noway_assert(block != genReturnBB);
+
+    if (block->bbFlags & BBF_REMOVED)
+    {
+        return;
+    }
+
+/* Removing an unreachable block */
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nRemoving unreachable BB%02u\n", block->bbNum);
+    }
+#endif // DEBUG
+
+    noway_assert(block->bbPrev != nullptr); // Can use this function to remove the first block
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+    assert(!block->bbPrev->isBBCallAlwaysPair()); // can't remove the BBJ_ALWAYS of a BBJ_CALLFINALLY / BBJ_ALWAYS pair
+#endif                                            // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+
+    /* First walk the statement trees in this basic block and delete each stmt */
+
+    /* Make the block publicly available */
+    compCurBB = block;
+
+    if (block->IsLIR())
+    {
+        LIR::Range& blockRange = LIR::AsRange(block);
+        if (!blockRange.IsEmpty())
+        {
+            blockRange.Delete(this, block, blockRange.FirstNode(), blockRange.LastNode());
+        }
+    }
+    else
+    {
+        // TODO-Cleanup: I'm not sure why this happens -- if the block is unreachable, why does it have phis?
+        // Anyway, remove any phis.
+
+        GenTreePtr firstNonPhi = block->FirstNonPhiDef();
+        if (block->bbTreeList != firstNonPhi)
+        {
+            if (firstNonPhi != nullptr)
+            {
+                firstNonPhi->gtPrev = block->lastStmt();
+            }
+            block->bbTreeList = firstNonPhi;
+        }
+
+        for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
+        {
+            fgRemoveStmt(block, stmt);
+        }
+        noway_assert(block->bbTreeList == nullptr);
+    }
+
+    /* Next update the loop table and bbWeights */
+    optUpdateLoopsBeforeRemoveBlock(block);
+
+    /* Mark the block as removed */
+    block->bbFlags |= BBF_REMOVED;
+
+    /* update bbRefs and bbPreds for the blocks reached by this block */
+    fgRemoveBlockAsPred(block);
+}
+
+/*****************************************************************************************************
+ *
+ *  Function called to remove or morph a GT_JTRUE statement when we jump to the same
+ *  block when both the condition is true or false.
+ */
+void Compiler::fgRemoveJTrue(BasicBlock* block)
+{
+    noway_assert(block->bbJumpKind == BBJ_COND && block->bbJumpDest == block->bbNext);
+    assert(compRationalIRForm == block->IsLIR());
+
+    flowList* flow = fgGetPredForBlock(block->bbNext, block);
+    noway_assert(flow->flDupCount == 2);
+
+    // Change the BBJ_COND to BBJ_NONE, and adjust the refCount and dupCount.
+    block->bbJumpKind = BBJ_NONE;
+    block->bbFlags &= ~BBF_NEEDS_GCPOLL;
+    --block->bbNext->bbRefs;
+    --flow->flDupCount;
+
+#ifdef DEBUG
+    block->bbJumpDest = nullptr;
+    if (verbose)
+    {
+        printf("Block BB%02u becoming a BBJ_NONE to BB%02u (jump target is the same whether the condition is true or "
+               "false)\n",
+               block->bbNum, block->bbNext->bbNum);
+    }
+#endif
+
+    /* Remove the block jump condition */
+
+    if (block->IsLIR())
+    {
+        LIR::Range& blockRange = LIR::AsRange(block);
+
+        GenTree* test = blockRange.LastNode();
+        assert(test->OperGet() == GT_JTRUE);
+
+        bool               isClosed;
+        unsigned           sideEffects;
+        LIR::ReadOnlyRange testRange = blockRange.GetTreeRange(test, &isClosed, &sideEffects);
+
+        // TODO-LIR: this should really be checking GTF_ALL_EFFECT, but that produces unacceptable
+        //            diffs compared to the existing backend.
+        if (isClosed && ((sideEffects & GTF_SIDE_EFFECT) == 0))
+        {
+            // If the jump and its operands form a contiguous, side-effect-free range,
+            // remove them.
+            blockRange.Delete(this, block, std::move(testRange));
+        }
+        else
+        {
+            // Otherwise, just remove the jump node itself.
+            blockRange.Remove(test);
+        }
+    }
+    else
+    {
+        GenTreeStmt* test = block->lastStmt();
+        GenTree*     tree = test->gtStmtExpr;
+
+        noway_assert(tree->gtOper == GT_JTRUE);
+
+        GenTree* sideEffList = nullptr;
+
+        if (tree->gtFlags & GTF_SIDE_EFFECT)
+        {
+            gtExtractSideEffList(tree, &sideEffList);
+
+            if (sideEffList)
+            {
+                noway_assert(sideEffList->gtFlags & GTF_SIDE_EFFECT);
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("Extracted side effects list from condition...\n");
+                    gtDispTree(sideEffList);
+                    printf("\n");
+                }
+#endif
+            }
+        }
+
+        // Delete the cond test or replace it with the side effect tree
+        if (sideEffList == nullptr)
+        {
+            fgRemoveStmt(block, test);
+        }
+        else
+        {
+            test->gtStmtExpr = sideEffList;
+
+            fgMorphBlockStmt(block, test DEBUGARG("fgRemoveJTrue"));
+        }
+    }
+}
+
+/*****************************************************************************************************
+ *
+ *  Function to return the last basic block in the main part of the function. With funclets, it is
+ *  the block immediately before the first funclet.
+ *  An inclusive end of the main method.
+ */
+
+BasicBlock* Compiler::fgLastBBInMainFunction()
+{
+#if FEATURE_EH_FUNCLETS
+
+    if (fgFirstFuncletBB != nullptr)
+    {
+        return fgFirstFuncletBB->bbPrev;
+    }
+
+#endif // FEATURE_EH_FUNCLETS
+
+    assert(fgLastBB->bbNext == nullptr);
+
+    return fgLastBB;
+}
+
+/*****************************************************************************************************
+ *
+ *  Function to return the first basic block after the main part of the function. With funclets, it is
+ *  the block of the first funclet.  Otherwise it is NULL if there are no funclets (fgLastBB->bbNext).
+ *  This is equivalent to fgLastBBInMainFunction()->bbNext
+ *  An exclusive end of the main method.
+ */
+
+BasicBlock* Compiler::fgEndBBAfterMainFunction()
+{
+#if FEATURE_EH_FUNCLETS
+
+    if (fgFirstFuncletBB != nullptr)
+    {
+        return fgFirstFuncletBB;
+    }
+
+#endif // FEATURE_EH_FUNCLETS
+
+    assert(fgLastBB->bbNext == nullptr);
+
+    return nullptr;
+}
+
+// Removes the block from the bbPrev/bbNext chain
+// Updates fgFirstBB and fgLastBB if necessary
+// Does not update fgFirstFuncletBB or fgFirstColdBlock (fgUnlinkRange does)
+
+void Compiler::fgUnlinkBlock(BasicBlock* block)
+{
+    if (block->bbPrev)
+    {
+        block->bbPrev->bbNext = block->bbNext;
+        if (block->bbNext)
+        {
+            block->bbNext->bbPrev = block->bbPrev;
+        }
+        else
+        {
+            fgLastBB = block->bbPrev;
+        }
+    }
+    else
+    {
+        assert(block == fgFirstBB);
+        assert(block != fgLastBB);
+        assert((fgFirstBBScratch == nullptr) || (fgFirstBBScratch == fgFirstBB));
+
+        fgFirstBB         = block->bbNext;
+        fgFirstBB->bbPrev = nullptr;
+
+        if (fgFirstBBScratch != nullptr)
+        {
+#ifdef DEBUG
+            // We had created an initial scratch BB, but now we're deleting it.
+            if (verbose)
+            {
+                printf("Unlinking scratch BB%02u\n", block->bbNum);
+            }
+#endif // DEBUG
+            fgFirstBBScratch = nullptr;
+        }
+    }
+}
+
+/*****************************************************************************************************
+ *
+ *  Function called to unlink basic block range [bBeg .. bEnd] from the basic block list.
+ *
+ *  'bBeg' can't be the first block.
+ */
+
+void Compiler::fgUnlinkRange(BasicBlock* bBeg, BasicBlock* bEnd)
+{
+    assert(bBeg != nullptr);
+    assert(bEnd != nullptr);
+
+    BasicBlock* bPrev = bBeg->bbPrev;
+    assert(bPrev != nullptr); // Can't unlink a range starting with the first block
+
+    bPrev->setNext(bEnd->bbNext);
+
+    /* If we removed the last block in the method then update fgLastBB */
+    if (fgLastBB == bEnd)
+    {
+        fgLastBB = bPrev;
+        noway_assert(fgLastBB->bbNext == nullptr);
+    }
+
+    // If bEnd was the first Cold basic block update fgFirstColdBlock
+    if (fgFirstColdBlock == bEnd)
+    {
+        fgFirstColdBlock = bPrev->bbNext;
+    }
+
+#if FEATURE_EH_FUNCLETS
+#ifdef DEBUG
+    // You can't unlink a range that includes the first funclet block. A range certainly
+    // can't cross the non-funclet/funclet region. And you can't unlink the first block
+    // of the first funclet with this, either. (If that's necessary, it could be allowed
+    // by updating fgFirstFuncletBB to bEnd->bbNext.)
+    for (BasicBlock* tempBB = bBeg; tempBB != bEnd->bbNext; tempBB = tempBB->bbNext)
+    {
+        assert(tempBB != fgFirstFuncletBB);
+    }
+#endif // DEBUG
+#endif // FEATURE_EH_FUNCLETS
+}
+
+/*****************************************************************************************************
+ *
+ *  Function called to remove a basic block
+ */
+
+void Compiler::fgRemoveBlock(BasicBlock* block, bool unreachable)
+{
+    BasicBlock* bPrev = block->bbPrev;
+
+    /* The block has to be either unreachable or empty */
+
+    PREFIX_ASSUME(block != nullptr);
+
+    JITDUMP("fgRemoveBlock BB%02u\n", block->bbNum);
+
+    // If we've cached any mappings from switch blocks to SwitchDesc's (which contain only the
+    // *unique* successors of the switch block), invalidate that cache, since an entry in one of
+    // the SwitchDescs might be removed.
+    InvalidateUniqueSwitchSuccMap();
+
+    noway_assert((block == fgFirstBB) || (bPrev && (bPrev->bbNext == block)));
+    noway_assert(!(block->bbFlags & BBF_DONT_REMOVE));
+
+    // Should never remove a genReturnBB, as we might have special hookups there.
+    noway_assert(block != genReturnBB);
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+    // Don't remove a finally target
+    assert(!(block->bbFlags & BBF_FINALLY_TARGET));
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+
+    if (unreachable)
+    {
+        PREFIX_ASSUME(bPrev != nullptr);
+
+        fgUnreachableBlock(block);
+
+        /* If this is the last basic block update fgLastBB */
+        if (block == fgLastBB)
+        {
+            fgLastBB = bPrev;
+        }
+
+#if FEATURE_EH_FUNCLETS
+        // If block was the fgFirstFuncletBB then set fgFirstFuncletBB to block->bbNext
+        if (block == fgFirstFuncletBB)
+        {
+            fgFirstFuncletBB = block->bbNext;
+        }
+#endif // FEATURE_EH_FUNCLETS
+
+        if (bPrev->bbJumpKind == BBJ_CALLFINALLY)
+        {
+            // bPrev CALL becomes RETLESS as the BBJ_ALWAYS block is unreachable
+            bPrev->bbFlags |= BBF_RETLESS_CALL;
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+            NO_WAY("No retless call finally blocks; need unwind target instead");
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+        }
+        else if (bPrev->bbJumpKind == BBJ_ALWAYS && bPrev->bbJumpDest == block->bbNext &&
+                 !(bPrev->bbFlags & BBF_KEEP_BBJ_ALWAYS) && (block != fgFirstColdBlock) &&
+                 (block->bbNext != fgFirstColdBlock))
+        {
+            // previous block is a BBJ_ALWAYS to the next block: change to BBJ_NONE.
+            // Note that we don't do it if bPrev follows a BBJ_CALLFINALLY block (BBF_KEEP_BBJ_ALWAYS),
+            // because that would violate our invariant that BBJ_CALLFINALLY blocks are followed by
+            // BBJ_ALWAYS blocks.
+            bPrev->bbJumpKind = BBJ_NONE;
+            bPrev->bbFlags &= ~BBF_NEEDS_GCPOLL;
+        }
+
+        // If this is the first Cold basic block update fgFirstColdBlock
+        if (block == fgFirstColdBlock)
+        {
+            fgFirstColdBlock = block->bbNext;
+        }
+
+        /* Unlink this block from the bbNext chain */
+        fgUnlinkBlock(block);
+
+        /* At this point the bbPreds and bbRefs had better be zero */
+        noway_assert((block->bbRefs == 0) && (block->bbPreds == nullptr));
+
+        /*  A BBJ_CALLFINALLY is usually paired with a BBJ_ALWAYS.
+         *  If we delete such a BBJ_CALLFINALLY we also delete the BBJ_ALWAYS
+         */
+        if (block->isBBCallAlwaysPair())
+        {
+            BasicBlock* leaveBlk = block->bbNext;
+            noway_assert(leaveBlk->bbJumpKind == BBJ_ALWAYS);
+
+            leaveBlk->bbFlags &= ~BBF_DONT_REMOVE;
+            leaveBlk->bbRefs  = 0;
+            leaveBlk->bbPreds = nullptr;
+
+            fgRemoveBlock(leaveBlk, true);
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+            fgClearFinallyTargetBit(leaveBlk->bbJumpDest);
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+        }
+        else if (block->bbJumpKind == BBJ_RETURN)
+        {
+            fgRemoveReturnBlock(block);
+        }
+    }
+    else // block is empty
+    {
+        noway_assert(block->isEmpty());
+
+        /* The block cannot follow a non-retless BBJ_CALLFINALLY (because we don't know who may jump to it) */
+        noway_assert((bPrev == nullptr) || !bPrev->isBBCallAlwaysPair());
+
+        /* This cannot be the last basic block */
+        noway_assert(block != fgLastBB);
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("Removing empty BB%02u\n", block->bbNum);
+        }
+#endif // DEBUG
+
+#ifdef DEBUG
+        /* Some extra checks for the empty case */
+
+        switch (block->bbJumpKind)
+        {
+            case BBJ_NONE:
+                break;
+
+            case BBJ_ALWAYS:
+                /* Do not remove a block that jumps to itself - used for while (true){} */
+                noway_assert(block->bbJumpDest != block);
+
+                /* Empty GOTO can be removed iff bPrev is BBJ_NONE */
+                noway_assert(bPrev && bPrev->bbJumpKind == BBJ_NONE);
+                break;
+
+            default:
+                noway_assert(!"Empty block of this type cannot be removed!");
+                break;
+        }
+#endif // DEBUG
+
+        noway_assert(block->bbJumpKind == BBJ_NONE || block->bbJumpKind == BBJ_ALWAYS);
+
+        /* Who is the "real" successor of this block? */
+
+        BasicBlock* succBlock;
+
+        if (block->bbJumpKind == BBJ_ALWAYS)
+        {
+            succBlock = block->bbJumpDest;
+        }
+        else
+        {
+            succBlock = block->bbNext;
+        }
+
+        bool skipUnmarkLoop = false;
+
+        // If block is the backedge for a loop and succBlock precedes block
+        // then the succBlock becomes the new LOOP HEAD
+        // NOTE: there's an assumption here that the blocks are numbered in increasing bbNext order.
+        // NOTE 2: if fgDomsComputed is false, then we can't check reachability. However, if this is
+        // the case, then the loop structures probably are also invalid, and shouldn't be used. This
+        // can be the case late in compilation (such as Lower), where remnants of earlier created
+        // structures exist, but haven't been maintained.
+        if (block->isLoopHead() && (succBlock->bbNum <= block->bbNum))
+        {
+            succBlock->bbFlags |= BBF_LOOP_HEAD;
+            if (fgDomsComputed && fgReachable(succBlock, block))
+            {
+                /* Mark all the reachable blocks between 'succBlock' and 'block', excluding 'block' */
+                optMarkLoopBlocks(succBlock, block, true);
+            }
+        }
+        else if (succBlock->isLoopHead() && bPrev && (succBlock->bbNum <= bPrev->bbNum))
+        {
+            skipUnmarkLoop = true;
+        }
+
+        noway_assert(succBlock);
+
+        // If this is the first Cold basic block update fgFirstColdBlock
+        if (block == fgFirstColdBlock)
+        {
+            fgFirstColdBlock = block->bbNext;
+        }
+
+#if FEATURE_EH_FUNCLETS
+        // Update fgFirstFuncletBB if necessary
+        if (block == fgFirstFuncletBB)
+        {
+            fgFirstFuncletBB = block->bbNext;
+        }
+#endif // FEATURE_EH_FUNCLETS
+
+        /* First update the loop table and bbWeights */
+        optUpdateLoopsBeforeRemoveBlock(block, skipUnmarkLoop);
+
+        /* Remove the block */
+
+        if (bPrev == nullptr)
+        {
+            /* special case if this is the first BB */
+
+            noway_assert(block == fgFirstBB);
+
+            /* Must be a fall through to next block */
+
+            noway_assert(block->bbJumpKind == BBJ_NONE);
+
+            /* old block no longer gets the extra ref count for being the first block */
+            block->bbRefs--;
+            succBlock->bbRefs++;
+
+            /* Set the new firstBB */
+            fgUnlinkBlock(block);
+
+            /* Always treat the initial block as a jump target */
+            fgFirstBB->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+        }
+        else
+        {
+            fgUnlinkBlock(block);
+        }
+
+        /* mark the block as removed and set the change flag */
+
+        block->bbFlags |= BBF_REMOVED;
+
+        /* Update bbRefs and bbPreds.
+         * All blocks jumping to 'block' now jump to 'succBlock'.
+         * First, remove 'block' from the predecessor list of succBlock.
+         */
+
+        fgRemoveRefPred(succBlock, block);
+
+        for (flowList* pred = block->bbPreds; pred; pred = pred->flNext)
+        {
+            BasicBlock* predBlock = pred->flBlock;
+
+            /* Are we changing a loop backedge into a forward jump? */
+
+            if (block->isLoopHead() && (predBlock->bbNum >= block->bbNum) && (predBlock->bbNum <= succBlock->bbNum))
+            {
+                /* First update the loop table and bbWeights */
+                optUpdateLoopsBeforeRemoveBlock(predBlock);
+            }
+
+            /* If predBlock is a new predecessor, then add it to succBlock's
+               predecessor's list. */
+            if (predBlock->bbJumpKind != BBJ_SWITCH)
+            {
+                // Even if the pred is not a switch, we could have a conditional branch
+                // to the fallthrough, so duplicate there could be preds
+                for (unsigned i = 0; i < pred->flDupCount; i++)
+                {
+                    fgAddRefPred(succBlock, predBlock);
+                }
+            }
+
+            /* change all jumps to the removed block */
+            switch (predBlock->bbJumpKind)
+            {
+                default:
+                    noway_assert(!"Unexpected bbJumpKind in fgRemoveBlock()");
+                    break;
+
+                case BBJ_NONE:
+                    noway_assert(predBlock == bPrev);
+                    PREFIX_ASSUME(bPrev != nullptr);
+
+                    /* In the case of BBJ_ALWAYS we have to change the type of its predecessor */
+                    if (block->bbJumpKind == BBJ_ALWAYS)
+                    {
+                        /* bPrev now becomes a BBJ_ALWAYS */
+                        bPrev->bbJumpKind = BBJ_ALWAYS;
+                        bPrev->bbJumpDest = succBlock;
+                    }
+                    break;
+
+                case BBJ_COND:
+                    /* The links for the direct predecessor case have already been updated above */
+                    if (predBlock->bbJumpDest != block)
+                    {
+                        succBlock->bbFlags |= BBF_HAS_LABEL | BBF_JMP_TARGET;
+                        break;
+                    }
+
+                    /* Check if both side of the BBJ_COND now jump to the same block */
+                    if (predBlock->bbNext == succBlock)
+                    {
+                        // Make sure we are replacing "block" with "succBlock" in predBlock->bbJumpDest.
+                        noway_assert(predBlock->bbJumpDest == block);
+                        predBlock->bbJumpDest = succBlock;
+                        fgRemoveJTrue(predBlock);
+                        break;
+                    }
+
+                    /* Fall through for the jump case */
+                    __fallthrough;
+
+                case BBJ_CALLFINALLY:
+                case BBJ_ALWAYS:
+                case BBJ_EHCATCHRET:
+                    noway_assert(predBlock->bbJumpDest == block);
+                    predBlock->bbJumpDest = succBlock;
+                    succBlock->bbFlags |= BBF_HAS_LABEL | BBF_JMP_TARGET;
+                    break;
+
+                case BBJ_SWITCH:
+                    // Change any jumps from 'predBlock' (a BBJ_SWITCH) to 'block' to jump to 'succBlock'
+                    //
+                    // For the jump targets of 'predBlock' (a BBJ_SWITCH) that jump to 'block'
+                    // remove the old predecessor at 'block' from 'predBlock'  and
+                    // add the new predecessor at 'succBlock' from 'predBlock'
+                    //
+                    fgReplaceSwitchJumpTarget(predBlock, succBlock, block);
+                    break;
+            }
+        }
+    }
+
+    if (bPrev != nullptr)
+    {
+        switch (bPrev->bbJumpKind)
+        {
+            case BBJ_CALLFINALLY:
+                // If prev is a BBJ_CALLFINALLY it better be marked as RETLESS
+                noway_assert(bPrev->bbFlags & BBF_RETLESS_CALL);
+                break;
+
+            case BBJ_ALWAYS:
+                // Check for branch to next block. Just make sure the BBJ_ALWAYS block is not
+                // part of a BBJ_CALLFINALLY/BBJ_ALWAYS pair. We do this here and don't rely on fgUpdateFlowGraph
+                // because we can be called by ComputeDominators and it expects it to remove this jump to
+                // the next block. This is the safest fix. We should remove all this BBJ_CALLFINALLY/BBJ_ALWAYS
+                // pairing.
+
+                if ((bPrev->bbJumpDest == bPrev->bbNext) &&
+                    !fgInDifferentRegions(bPrev, bPrev->bbJumpDest)) // We don't remove a branch from Hot -> Cold
+                {
+                    if ((bPrev == fgFirstBB) || !bPrev->bbPrev->isBBCallAlwaysPair())
+                    {
+                        // It's safe to change the jump type
+                        bPrev->bbJumpKind = BBJ_NONE;
+                        bPrev->bbFlags &= ~BBF_NEEDS_GCPOLL;
+                    }
+                }
+                break;
+
+            case BBJ_COND:
+                /* Check for branch to next block */
+                if (bPrev->bbJumpDest == bPrev->bbNext)
+                {
+                    fgRemoveJTrue(bPrev);
+                }
+                break;
+
+            default:
+                break;
+        }
+
+        ehUpdateForDeletedBlock(block);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Function called to connect to block that previously had a fall through
+ */
+
+BasicBlock* Compiler::fgConnectFallThrough(BasicBlock* bSrc, BasicBlock* bDst)
+{
+    BasicBlock* jmpBlk = nullptr;
+
+    /* If bSrc is non-NULL */
+
+    if (bSrc != nullptr)
+    {
+        /* If bSrc falls through to a block that is not bDst, we will insert a jump to bDst */
+
+        if (bSrc->bbFallsThrough() && (bSrc->bbNext != bDst))
+        {
+            switch (bSrc->bbJumpKind)
+            {
+
+                case BBJ_NONE:
+                    bSrc->bbJumpKind = BBJ_ALWAYS;
+                    bSrc->bbJumpDest = bDst;
+                    bSrc->bbJumpDest->bbFlags |= (BBF_JMP_TARGET | BBF_HAS_LABEL);
+#ifdef DEBUG
+                    if (verbose)
+                    {
+                        printf("Block BB%02u ended with a BBJ_NONE, Changed to an unconditional jump to BB%02u\n",
+                               bSrc->bbNum, bSrc->bbJumpDest->bbNum);
+                    }
+#endif
+                    break;
+
+                case BBJ_CALLFINALLY:
+                case BBJ_COND:
+
+                    // Add a new block after bSrc which jumps to 'bDst'
+                    jmpBlk = fgNewBBafter(BBJ_ALWAYS, bSrc, true);
+
+                    if (fgComputePredsDone)
+                    {
+                        fgAddRefPred(jmpBlk, bSrc, fgGetPredForBlock(bDst, bSrc));
+                    }
+
+                    // When adding a new jmpBlk we will set the bbWeight and bbFlags
+                    //
+                    if (fgHaveValidEdgeWeights)
+                    {
+                        noway_assert(fgComputePredsDone);
+
+                        flowList* newEdge = fgGetPredForBlock(jmpBlk, bSrc);
+
+                        jmpBlk->bbWeight = (newEdge->flEdgeWeightMin + newEdge->flEdgeWeightMax) / 2;
+                        if (bSrc->bbWeight == 0)
+                        {
+                            jmpBlk->bbWeight = 0;
+                        }
+
+                        if (jmpBlk->bbWeight == 0)
+                        {
+                            jmpBlk->bbFlags |= BBF_RUN_RARELY;
+                        }
+
+                        BasicBlock::weight_t weightDiff = (newEdge->flEdgeWeightMax - newEdge->flEdgeWeightMin);
+                        BasicBlock::weight_t slop       = BasicBlock::GetSlopFraction(bSrc, bDst);
+
+                        //
+                        // If the [min/max] values for our edge weight is within the slop factor
+                        //  then we will set the BBF_PROF_WEIGHT flag for the block
+                        //
+                        if (weightDiff <= slop)
+                        {
+                            jmpBlk->bbFlags |= BBF_PROF_WEIGHT;
+                        }
+                    }
+                    else
+                    {
+                        // We set the bbWeight to the smaller of bSrc->bbWeight or bDst->bbWeight
+                        if (bSrc->bbWeight < bDst->bbWeight)
+                        {
+                            jmpBlk->bbWeight = bSrc->bbWeight;
+                            jmpBlk->bbFlags |= (bSrc->bbFlags & BBF_RUN_RARELY);
+                        }
+                        else
+                        {
+                            jmpBlk->bbWeight = bDst->bbWeight;
+                            jmpBlk->bbFlags |= (bDst->bbFlags & BBF_RUN_RARELY);
+                        }
+                    }
+
+                    jmpBlk->bbJumpDest = bDst;
+                    jmpBlk->bbJumpDest->bbFlags |= (BBF_JMP_TARGET | BBF_HAS_LABEL);
+
+                    if (fgComputePredsDone)
+                    {
+                        fgReplacePred(bDst, bSrc, jmpBlk);
+                    }
+                    else
+                    {
+                        jmpBlk->bbFlags |= BBF_IMPORTED;
+                    }
+
+#ifdef DEBUG
+                    if (verbose)
+                    {
+                        printf("Added an unconditional jump to BB%02u after block BB%02u\n", jmpBlk->bbJumpDest->bbNum,
+                               bSrc->bbNum);
+                    }
+#endif // DEBUG
+                    break;
+
+                default:
+                    noway_assert(!"Unexpected bbJumpKind");
+                    break;
+            }
+        }
+        else
+        {
+            // If bSrc is an unconditional branch to the next block
+            // then change it to a BBJ_NONE block
+            //
+            if ((bSrc->bbJumpKind == BBJ_ALWAYS) && !(bSrc->bbFlags & BBF_KEEP_BBJ_ALWAYS) &&
+                (bSrc->bbJumpDest == bSrc->bbNext))
+            {
+                bSrc->bbJumpKind = BBJ_NONE;
+                bSrc->bbFlags &= ~BBF_NEEDS_GCPOLL;
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("Changed an unconditional jump from BB%02u to the next block BB%02u into a BBJ_NONE block\n",
+                           bSrc->bbNum, bSrc->bbNext->bbNum);
+                }
+#endif // DEBUG
+            }
+        }
+    }
+
+    return jmpBlk;
+}
+
+/*****************************************************************************
+ Walk the flow graph, reassign block numbers to keep them in ascending order.
+ Returns 'true' if any renumbering was actually done, OR if we change the
+ maximum number of assigned basic blocks (this can happen if we do inlining,
+ create a new, high-numbered block, then that block goes away. We go to
+ renumber the blocks, none of them actually change number, but we shrink the
+ maximum assigned block number. This affects the block set epoch).
+*/
+
+bool Compiler::fgRenumberBlocks()
+{
+    // If we renumber the blocks the dominator information will be out-of-date
+    if (fgDomsComputed)
+    {
+        noway_assert(!"Can't call Compiler::fgRenumberBlocks() when fgDomsComputed==true");
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\n*************** Before renumbering the basic blocks\n");
+        fgDispBasicBlocks();
+        fgDispHandlerTab();
+    }
+#endif // DEBUG
+
+    bool        renumbered  = false;
+    bool        newMaxBBNum = false;
+    BasicBlock* block;
+
+    unsigned numStart = 1 + (compIsForInlining() ? impInlineInfo->InlinerCompiler->fgBBNumMax : 0);
+    unsigned num;
+
+    for (block = fgFirstBB, num = numStart; block != nullptr; block = block->bbNext, num++)
+    {
+        noway_assert((block->bbFlags & BBF_REMOVED) == 0);
+
+        if (block->bbNum != num)
+        {
+            renumbered = true;
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("Renumber BB%02u to BB%02u\n", block->bbNum, num);
+            }
+#endif // DEBUG
+            block->bbNum = num;
+        }
+
+        if (block->bbNext == nullptr)
+        {
+            fgLastBB  = block;
+            fgBBcount = num - numStart + 1;
+            if (compIsForInlining())
+            {
+                if (impInlineInfo->InlinerCompiler->fgBBNumMax != num)
+                {
+                    impInlineInfo->InlinerCompiler->fgBBNumMax = num;
+                    newMaxBBNum                                = true;
+                }
+            }
+            else
+            {
+                if (fgBBNumMax != num)
+                {
+                    fgBBNumMax  = num;
+                    newMaxBBNum = true;
+                }
+            }
+        }
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\n*************** After renumbering the basic blocks\n");
+        if (renumbered)
+        {
+            fgDispBasicBlocks();
+            fgDispHandlerTab();
+        }
+        else
+        {
+            printf("=============== No blocks renumbered!\n");
+        }
+    }
+#endif // DEBUG
+
+    // Now update the BlockSet epoch, which depends on the block numbers.
+    // If any blocks have been renumbered then create a new BlockSet epoch.
+    // Even if we have not renumbered any blocks, we might still need to force
+    // a new BlockSet epoch, for one of several reasons. If there are any new
+    // blocks with higher numbers than the former maximum numbered block, then we
+    // need a new epoch with a new size matching the new largest numbered block.
+    // Also, if the number of blocks is different from the last time we set the
+    // BlockSet epoch, then we need a new epoch. This wouldn't happen if we
+    // renumbered blocks after every block addition/deletion, but it might be
+    // the case that we can change the number of blocks, then set the BlockSet
+    // epoch without renumbering, then change the number of blocks again, then
+    // renumber.
+    if (renumbered || newMaxBBNum)
+    {
+        NewBasicBlockEpoch();
+
+        // The key in the unique switch successor map is dependent on the block number, so invalidate that cache.
+        InvalidateUniqueSwitchSuccMap();
+    }
+    else
+    {
+        EnsureBasicBlockEpoch();
+    }
+
+    // Tell our caller if any blocks actually were renumbered.
+    return renumbered || newMaxBBNum;
+}
+
+/*****************************************************************************
+ *
+ *  Is the BasicBlock bJump a forward branch?
+ *   Optionally bSrc can be supplied to indicate that
+ *   bJump must be forward with respect to bSrc
+ */
+bool Compiler::fgIsForwardBranch(BasicBlock* bJump, BasicBlock* bSrc /* = NULL */)
+{
+    bool result = false;
+
+    if ((bJump->bbJumpKind == BBJ_COND) || (bJump->bbJumpKind == BBJ_ALWAYS))
+    {
+        BasicBlock* bDest = bJump->bbJumpDest;
+        BasicBlock* bTemp = (bSrc == nullptr) ? bJump : bSrc;
+
+        while (true)
+        {
+            bTemp = bTemp->bbNext;
+
+            if (bTemp == nullptr)
+            {
+                break;
+            }
+
+            if (bTemp == bDest)
+            {
+                result = true;
+                break;
+            }
+        }
+    }
+
+    return result;
+}
+
+/*****************************************************************************
+ *
+ *  Function called to expand the set of rarely run blocks
+ */
+
+bool Compiler::fgExpandRarelyRunBlocks()
+{
+    bool result = false;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\n*************** In fgExpandRarelyRunBlocks()\n");
+    }
+
+    const char* reason = nullptr;
+#endif
+
+    // We expand the number of rarely run blocks by observing
+    // that a block that falls into or jumps to a rarely run block,
+    // must itself be rarely run and when we have a conditional
+    // jump in which both branches go to rarely run blocks then
+    // the block must itself be rarely run
+
+    BasicBlock* block;
+    BasicBlock* bPrev;
+
+    for (bPrev = fgFirstBB, block = bPrev->bbNext; block != nullptr; bPrev = block, block = block->bbNext)
+    {
+        if (bPrev->isRunRarely())
+        {
+            continue;
+        }
+
+        /* bPrev is known to be a normal block here */
+        switch (bPrev->bbJumpKind)
+        {
+            case BBJ_ALWAYS:
+
+                /* Is the jump target rarely run? */
+                if (bPrev->bbJumpDest->isRunRarely())
+                {
+                    INDEBUG(reason = "Unconditional jump to a rarely run block";)
+                    goto NEW_RARELY_RUN;
+                }
+                break;
+
+            case BBJ_CALLFINALLY:
+
+                // Check for a BBJ_CALLFINALLY followed by a rarely run paired BBJ_ALWAYS
+                //
+                if (bPrev->isBBCallAlwaysPair())
+                {
+                    /* Is the next block rarely run? */
+                    if (block->isRunRarely())
+                    {
+                        INDEBUG(reason = "Call of finally followed by a rarely run block";)
+                        goto NEW_RARELY_RUN;
+                    }
+                }
+                break;
+
+            case BBJ_NONE:
+
+                /* is fall through target rarely run? */
+                if (block->isRunRarely())
+                {
+                    INDEBUG(reason = "Falling into a rarely run block";)
+                    goto NEW_RARELY_RUN;
+                }
+                break;
+
+            case BBJ_COND:
+
+                if (!block->isRunRarely())
+                {
+                    continue;
+                }
+
+                /* If both targets of the BBJ_COND are run rarely then don't reorder */
+                if (bPrev->bbJumpDest->isRunRarely())
+                {
+                    /* bPrev should also be marked as run rarely */
+                    if (!bPrev->isRunRarely())
+                    {
+                        INDEBUG(reason = "Both sides of a conditional jump are rarely run";)
+
+                    NEW_RARELY_RUN:
+                        /* If the weight of the block was obtained from a profile run,
+                           than it's more accurate than our static analysis */
+                        if (bPrev->bbFlags & BBF_PROF_WEIGHT)
+                        {
+                            continue;
+                        }
+                        result = true;
+
+#ifdef DEBUG
+                        assert(reason != nullptr);
+                        if (verbose)
+                        {
+                            printf("%s, marking BB%02u as rarely run\n", reason, bPrev->bbNum);
+                        }
+#endif // DEBUG
+
+                        /* Must not have previously been marked */
+                        noway_assert(!bPrev->isRunRarely());
+
+                        /* Mark bPrev as a new rarely run block */
+                        bPrev->bbSetRunRarely();
+
+                        BasicBlock* bPrevPrev = nullptr;
+                        BasicBlock* tmpbb;
+
+                        if ((bPrev->bbFlags & BBF_KEEP_BBJ_ALWAYS) != 0)
+                        {
+                            // If we've got a BBJ_CALLFINALLY/BBJ_ALWAYS pair, treat the BBJ_CALLFINALLY as an
+                            // additional predecessor for the BBJ_ALWAYS block
+                            tmpbb = bPrev->bbPrev;
+                            noway_assert(tmpbb != nullptr);
+#if FEATURE_EH_FUNCLETS
+                            noway_assert(tmpbb->isBBCallAlwaysPair());
+                            bPrevPrev = tmpbb;
+#else
+                                if (tmpbb->bbJumpKind == BBJ_CALLFINALLY)
+                                {
+                                    bPrevPrev = tmpbb;
+                                }
+#endif
+                        }
+
+                        /* Now go back to it's earliest predecessor to see */
+                        /* if it too should now be marked as rarely run    */
+                        flowList* pred = bPrev->bbPreds;
+
+                        if ((pred != nullptr) || (bPrevPrev != nullptr))
+                        {
+                            // bPrevPrev will be set to the lexically
+                            // earliest predecessor of bPrev.
+
+                            while (pred != nullptr)
+                            {
+                                if (bPrevPrev == nullptr)
+                                {
+                                    // Initially we select the first block in the bbPreds list
+                                    bPrevPrev = pred->flBlock;
+                                    continue;
+                                }
+
+                                // Walk the flow graph lexically forward from pred->flBlock
+                                // if we find (block == bPrevPrev) then
+                                // pred->flBlock is an earlier predecessor.
+                                for (tmpbb = pred->flBlock; tmpbb != nullptr; tmpbb = tmpbb->bbNext)
+                                {
+                                    if (tmpbb == bPrevPrev)
+                                    {
+                                        /* We found an ealier predecessor */
+                                        bPrevPrev = pred->flBlock;
+                                        break;
+                                    }
+                                    else if (tmpbb == bPrev)
+                                    {
+                                        // We have reached bPrev so stop walking
+                                        // as this cannot be an earlier predecessor
+                                        break;
+                                    }
+                                }
+
+                                // Onto the next predecessor
+                                pred = pred->flNext;
+                            }
+
+                            // Walk the flow graph forward from bPrevPrev
+                            // if we don't find (tmpbb == bPrev) then our candidate
+                            // bPrevPrev is lexically after bPrev and we do not
+                            // want to select it as our new block
+
+                            for (tmpbb = bPrevPrev; tmpbb != nullptr; tmpbb = tmpbb->bbNext)
+                            {
+                                if (tmpbb == bPrev)
+                                {
+                                    // Set up block back to the lexically
+                                    // earliest predecessor of pPrev
+
+                                    block = bPrevPrev;
+                                }
+                            }
+                        }
+                    }
+                    break;
+
+                    default:
+                        break;
+                }
+        }
+    }
+
+    // Now iterate over every block to see if we can prove that a block is rarely run
+    // (i.e. when all predecessors to the block are rarely run)
+    //
+    for (bPrev = fgFirstBB, block = bPrev->bbNext; block != nullptr; bPrev = block, block = block->bbNext)
+    {
+        // If block is not run rarely, then check to make sure that it has
+        // at least one non-rarely run block.
+
+        if (!block->isRunRarely())
+        {
+            bool rare = true;
+
+            /* Make sure that block has at least one normal predecessor */
+            for (flowList* pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
+            {
+                /* Find the fall through predecessor, if any */
+                if (!pred->flBlock->isRunRarely())
+                {
+                    rare = false;
+                    break;
+                }
+            }
+
+            if (rare)
+            {
+                // If 'block' is the start of a handler or filter then we cannot make it
+                // rarely run because we may have an exceptional edge that
+                // branches here.
+                //
+                if (bbIsHandlerBeg(block))
+                {
+                    rare = false;
+                }
+            }
+
+            if (rare)
+            {
+                block->bbSetRunRarely();
+                result = true;
+
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("All branches to BB%02u are from rarely run blocks, marking as rarely run\n", block->bbNum);
+                }
+#endif // DEBUG
+
+                // When marking a BBJ_CALLFINALLY as rarely run we also mark
+                // the BBJ_ALWAYS that comes after it as rarely run
+                //
+                if (block->isBBCallAlwaysPair())
+                {
+                    BasicBlock* bNext = block->bbNext;
+                    PREFIX_ASSUME(bNext != nullptr);
+                    bNext->bbSetRunRarely();
+#ifdef DEBUG
+                    if (verbose)
+                    {
+                        printf("Also marking the BBJ_ALWAYS at BB%02u as rarely run\n", bNext->bbNum);
+                    }
+#endif // DEBUG
+                }
+            }
+        }
+
+        /* COMPACT blocks if possible */
+        if (bPrev->bbJumpKind == BBJ_NONE)
+        {
+            if (fgCanCompactBlocks(bPrev, block))
+            {
+                fgCompactBlocks(bPrev, block);
+
+                block = bPrev;
+                continue;
+            }
+        }
+        //
+        // if bPrev->bbWeight is not based upon profile data we can adjust
+        // the weights of bPrev and block
+        //
+        else if (bPrev->isBBCallAlwaysPair() &&             // we must have a BBJ_CALLFINALLY and BBK_ALWAYS pair
+                 (bPrev->bbWeight != block->bbWeight) &&    // the weights are currently different
+                 ((bPrev->bbFlags & BBF_PROF_WEIGHT) == 0)) // and the BBJ_CALLFINALLY block is not using profiled
+                                                            // weights
+        {
+            if (block->isRunRarely())
+            {
+                bPrev->bbWeight =
+                    block->bbWeight; // the BBJ_CALLFINALLY block now has the same weight as the BBJ_ALWAYS block
+                bPrev->bbFlags |= BBF_RUN_RARELY; // and is now rarely run
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("Marking the BBJ_CALLFINALLY block at BB%02u as rarely run because BB%02u is rarely run\n",
+                           bPrev->bbNum, block->bbNum);
+                }
+#endif // DEBUG
+            }
+            else if (bPrev->isRunRarely())
+            {
+                block->bbWeight =
+                    bPrev->bbWeight; // the BBJ_ALWAYS block now has the same weight as the BBJ_CALLFINALLY block
+                block->bbFlags |= BBF_RUN_RARELY; // and is now rarely run
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("Marking the BBJ_ALWAYS block at BB%02u as rarely run because BB%02u is rarely run\n",
+                           block->bbNum, bPrev->bbNum);
+                }
+#endif // DEBUG
+            }
+            else // Both blocks are hot, bPrev is known not to be using profiled weight
+            {
+                bPrev->bbWeight =
+                    block->bbWeight; // the BBJ_CALLFINALLY block now has the same weight as the BBJ_ALWAYS block
+            }
+            noway_assert(block->bbWeight == bPrev->bbWeight);
+        }
+    }
+
+    return result;
+}
+
+/*****************************************************************************
+ *
+ *  Returns true if it is allowable (based upon the EH regions)
+ *  to place block bAfter immediately after bBefore. It is allowable
+ *  if the 'bBefore' and 'bAfter' blocks are in the exact same EH region.
+ */
+
+bool Compiler::fgEhAllowsMoveBlock(BasicBlock* bBefore, BasicBlock* bAfter)
+{
+    return BasicBlock::sameEHRegion(bBefore, bAfter);
+}
+
+/*****************************************************************************
+ *
+ *  Function called to move the range of blocks [bStart .. bEnd].
+ *  The blocks are placed immediately after the insertAfterBlk.
+ *  fgFirstFuncletBB is not updated; that is the responsibility of the caller, if necessary.
+ */
+
+void Compiler::fgMoveBlocksAfter(BasicBlock* bStart, BasicBlock* bEnd, BasicBlock* insertAfterBlk)
+{
+    /* We have decided to insert the block(s) after 'insertAfterBlk' */
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("Relocated block%s [BB%02u..BB%02u] inserted after BB%02u%s\n", (bStart == bEnd) ? "" : "s",
+               bStart->bbNum, bEnd->bbNum, insertAfterBlk->bbNum,
+               (insertAfterBlk->bbNext == nullptr) ? " at the end of method" : "");
+    }
+#endif // DEBUG
+
+    /* relink [bStart .. bEnd] into the flow graph */
+
+    bEnd->bbNext = insertAfterBlk->bbNext;
+    if (insertAfterBlk->bbNext)
+    {
+        insertAfterBlk->bbNext->bbPrev = bEnd;
+    }
+    insertAfterBlk->setNext(bStart);
+
+    /* If insertAfterBlk was fgLastBB then update fgLastBB */
+    if (insertAfterBlk == fgLastBB)
+    {
+        fgLastBB = bEnd;
+        noway_assert(fgLastBB->bbNext == nullptr);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Function called to relocate a single range to the end of the method.
+ *  Only an entire consecutive region can be moved and it will be kept together.
+ *  Except for the first block, the range cannot have any blocks that jump into or out of the region.
+ *  When successful we return the bLast block which is the last block that we relocated.
+ *  When unsuccessful we return NULL.
+
+    =============================================================
+    NOTE: This function can invalidate all pointers into the EH table, as well as change the size of the EH table!
+    =============================================================
+ */
+
+BasicBlock* Compiler::fgRelocateEHRange(unsigned regionIndex, FG_RELOCATE_TYPE relocateType)
+{
+    INDEBUG(const char* reason = "None";)
+
+    // Figure out the range of blocks we're going to move
+
+    unsigned    XTnum;
+    EHblkDsc*   HBtab;
+    BasicBlock* bStart  = nullptr;
+    BasicBlock* bMiddle = nullptr;
+    BasicBlock* bLast   = nullptr;
+    BasicBlock* bPrev   = nullptr;
+
+#if FEATURE_EH_FUNCLETS
+    // We don't support moving try regions... yet?
+    noway_assert(relocateType == FG_RELOCATE_HANDLER);
+#endif // FEATURE_EH_FUNCLETS
+
+    HBtab = ehGetDsc(regionIndex);
+
+    if (relocateType == FG_RELOCATE_TRY)
+    {
+        bStart = HBtab->ebdTryBeg;
+        bLast  = HBtab->ebdTryLast;
+    }
+    else if (relocateType == FG_RELOCATE_HANDLER)
+    {
+        if (HBtab->HasFilter())
+        {
+            // The filter and handler funclets must be moved together, and remain contiguous.
+            bStart  = HBtab->ebdFilter;
+            bMiddle = HBtab->ebdHndBeg;
+            bLast   = HBtab->ebdHndLast;
+        }
+        else
+        {
+            bStart = HBtab->ebdHndBeg;
+            bLast  = HBtab->ebdHndLast;
+        }
+    }
+
+    // Our range must contain either all rarely run blocks or all non-rarely run blocks
+    bool inTheRange = false;
+    bool validRange = false;
+
+    BasicBlock* block;
+
+    noway_assert(bStart != nullptr && bLast != nullptr);
+    if (bStart == fgFirstBB)
+    {
+        INDEBUG(reason = "can not relocate first block";)
+        goto FAILURE;
+    }
+
+#if !FEATURE_EH_FUNCLETS
+    // In the funclets case, we still need to set some information on the handler blocks
+    if (bLast->bbNext == NULL)
+    {
+        INDEBUG(reason = "region is already at the end of the method";)
+        goto FAILURE;
+    }
+#endif // !FEATURE_EH_FUNCLETS
+
+    // Walk the block list for this purpose:
+    // 1. Verify that all the blocks in the range are either all rarely run or not rarely run.
+    // When creating funclets, we ignore the run rarely flag, as we need to be able to move any blocks
+    // in the range.
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if !FEATURE_EH_FUNCLETS
+    bool isRare;
+    isRare = bStart->isRunRarely();
+#endif // !FEATURE_EH_FUNCLETS
+    block = fgFirstBB;
+    while (true)
+    {
+        if (block == bStart)
+        {
+            noway_assert(inTheRange == false);
+            inTheRange = true;
+        }
+        else if (block == bLast->bbNext)
+        {
+            noway_assert(inTheRange == true);
+            inTheRange = false;
+            break; // we found the end, so we're done
+        }
+
+        if (inTheRange)
+        {
+#if !FEATURE_EH_FUNCLETS
+            // Unless all blocks are (not) run rarely we must return false.
+            if (isRare != block->isRunRarely())
+            {
+                INDEBUG(reason = "this region contains both rarely run and non-rarely run blocks";)
+                goto FAILURE;
+            }
+#endif // !FEATURE_EH_FUNCLETS
+
+            validRange = true;
+        }
+
+        if (block == nullptr)
+        {
+            break;
+        }
+
+        block = block->bbNext;
+    }
+    // Ensure that bStart .. bLast defined a valid range
+    noway_assert((validRange == true) && (inTheRange == false));
+
+    bPrev = bStart->bbPrev;
+    noway_assert(bPrev != nullptr); // Can't move a range that includes the first block of the function.
+
+    JITDUMP("Relocating %s range BB%02u..BB%02u (EH#%u) to end of BBlist\n",
+            (relocateType == FG_RELOCATE_TRY) ? "try" : "handler", bStart->bbNum, bLast->bbNum, regionIndex);
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        fgDispBasicBlocks();
+        fgDispHandlerTab();
+    }
+
+    if (!FEATURE_EH_FUNCLETS)
+    {
+        // This is really expensive, and quickly becomes O(n^n) with funclets
+        // so only do it once after we've created them (see fgCreateFunclets)
+        if (expensiveDebugCheckLevel >= 2)
+        {
+            fgDebugCheckBBlist();
+        }
+    }
+#endif // DEBUG
+
+#if FEATURE_EH_FUNCLETS
+
+    bStart->bbFlags |= BBF_FUNCLET_BEG; // Mark the start block of the funclet
+
+    if (bMiddle != nullptr)
+    {
+        bMiddle->bbFlags |= BBF_FUNCLET_BEG; // Also mark the start block of a filter handler as a funclet
+    }
+
+#endif // FEATURE_EH_FUNCLETS
+
+    BasicBlock* bNext;
+    bNext = bLast->bbNext;
+
+    /* Temporarily unlink [bStart .. bLast] from the flow graph */
+    fgUnlinkRange(bStart, bLast);
+
+    BasicBlock* insertAfterBlk;
+    insertAfterBlk = fgLastBB;
+
+#if FEATURE_EH_FUNCLETS
+
+    // There are several cases we need to consider when moving an EH range.
+    // If moving a range X, we must consider its relationship to every other EH
+    // range A in the table. Note that each entry in the table represents both
+    // a protected region and a handler region (possibly including a filter region
+    // that must live before and adjacent to the handler region), so we must
+    // consider try and handler regions independently. These are the cases:
+    // 1. A is completely contained within X (where "completely contained" means
+    //    that the 'begin' and 'last' parts of A are strictly between the 'begin'
+    //    and 'end' parts of X, and aren't equal to either, for example, they don't
+    //    share 'last' blocks). In this case, when we move X, A moves with it, and
+    //    the EH table doesn't need to change.
+    // 2. X is completely contained within A. In this case, X gets extracted from A,
+    //    and the range of A shrinks, but because A is strictly within X, the EH
+    //    table doesn't need to change.
+    // 3. A and X have exactly the same range. In this case, A is moving with X and
+    //    the EH table doesn't need to change.
+    // 4. A and X share the 'last' block. There are two sub-cases:
+    //    (a) A is a larger range than X (such that the beginning of A precedes the
+    //        beginning of X): in this case, we are moving the tail of A. We set the
+    //        'last' block of A to the the block preceding the beginning block of X.
+    //    (b) A is a smaller range than X. Thus, we are moving the entirety of A along
+    //        with X. In this case, nothing in the EH record for A needs to change.
+    // 5. A and X share the 'beginning' block (but aren't the same range, as in #3).
+    //    This can never happen here, because we are only moving handler ranges (we don't
+    //    move try ranges), and handler regions cannot start at the beginning of a try
+    //    range or handler range and be a subset.
+    //
+    // Note that A and X must properly nest for the table to be well-formed. For example,
+    // the beginning of A can't be strictly within the range of X (that is, the beginning
+    // of A isn't shared with the beginning of X) and the end of A outside the range.
+
+    for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+    {
+        if (XTnum != regionIndex) // we don't need to update our 'last' pointer
+        {
+            if (HBtab->ebdTryLast == bLast)
+            {
+                // If we moved a set of blocks that were at the end of
+                // a different try region then we may need to update ebdTryLast
+                for (block = HBtab->ebdTryBeg; block != nullptr; block = block->bbNext)
+                {
+                    if (block == bPrev)
+                    {
+                        // We were contained within it, so shrink its region by
+                        // setting its 'last'
+                        fgSetTryEnd(HBtab, bPrev);
+                        break;
+                    }
+                    else if (block == HBtab->ebdTryLast->bbNext)
+                    {
+                        // bPrev does not come after the TryBeg, thus we are larger, and
+                        // it is moving with us.
+                        break;
+                    }
+                }
+            }
+            if (HBtab->ebdHndLast == bLast)
+            {
+                // If we moved a set of blocks that were at the end of
+                // a different handler region then we must update ebdHndLast
+                for (block = HBtab->ebdHndBeg; block != nullptr; block = block->bbNext)
+                {
+                    if (block == bPrev)
+                    {
+                        fgSetHndEnd(HBtab, bPrev);
+                        break;
+                    }
+                    else if (block == HBtab->ebdHndLast->bbNext)
+                    {
+                        // bPrev does not come after the HndBeg
+                        break;
+                    }
+                }
+            }
+        }
+    } // end exception table iteration
+
+    // Insert the block(s) we are moving after fgLastBlock
+    fgMoveBlocksAfter(bStart, bLast, insertAfterBlk);
+
+    if (fgFirstFuncletBB == nullptr) // The funclet region isn't set yet
+    {
+        fgFirstFuncletBB = bStart;
+    }
+    else
+    {
+        assert(fgFirstFuncletBB !=
+               insertAfterBlk->bbNext); // We insert at the end, not at the beginning, of the funclet region.
+    }
+
+    // These asserts assume we aren't moving try regions (which we might need to do). Only
+    // try regions can have fall through into or out of the region.
+
+    noway_assert(!bPrev->bbFallsThrough()); // There can be no fall through into a filter or handler region
+    noway_assert(!bLast->bbFallsThrough()); // There can be no fall through out of a handler region
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("Create funclets: moved region\n");
+        fgDispHandlerTab();
+    }
+
+    // We have to wait to do this until we've created all the additional regions
+    // Because this relies on ebdEnclosingTryIndex and ebdEnclosingHndIndex
+    if (!FEATURE_EH_FUNCLETS)
+    {
+        // This is really expensive, and quickly becomes O(n^n) with funclets
+        // so only do it once after we've created them (see fgCreateFunclets)
+        if (expensiveDebugCheckLevel >= 2)
+        {
+            fgDebugCheckBBlist();
+        }
+    }
+#endif // DEBUG
+
+#else // FEATURE_EH_FUNCLETS
+
+        for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+        {
+            if (XTnum == regionIndex)
+            {
+                // Don't update our handler's Last info
+                continue;
+            }
+
+            if (HBtab->ebdTryLast == bLast)
+            {
+                // If we moved a set of blocks that were at the end of
+                // a different try region then we may need to update ebdTryLast
+                for (block = HBtab->ebdTryBeg; block != NULL; block = block->bbNext)
+                {
+                    if (block == bPrev)
+                    {
+                        fgSetTryEnd(HBtab, bPrev);
+                        break;
+                    }
+                    else if (block == HBtab->ebdTryLast->bbNext)
+                    {
+                        // bPrev does not come after the TryBeg
+                        break;
+                    }
+                }
+            }
+            if (HBtab->ebdHndLast == bLast)
+            {
+                // If we moved a set of blocks that were at the end of
+                // a different handler region then we must update ebdHndLast
+                for (block = HBtab->ebdHndBeg; block != NULL; block = block->bbNext)
+                {
+                    if (block == bPrev)
+                    {
+                        fgSetHndEnd(HBtab, bPrev);
+                        break;
+                    }
+                    else if (block == HBtab->ebdHndLast->bbNext)
+                    {
+                        // bPrev does not come after the HndBeg
+                        break;
+                    }
+                }
+            }
+        } // end exception table iteration
+
+        // We have decided to insert the block(s) after fgLastBlock
+        fgMoveBlocksAfter(bStart, bLast, insertAfterBlk);
+
+        // If bPrev falls through, we will insert a jump to block
+        fgConnectFallThrough(bPrev, bStart);
+
+        // If bLast falls through, we will insert a jump to bNext
+        fgConnectFallThrough(bLast, bNext);
+
+#endif // FEATURE_EH_FUNCLETS
+
+    goto DONE;
+
+FAILURE:
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** Failed fgRelocateEHRange(BB%02u..BB%02u) because %s\n", bStart->bbNum, bLast->bbNum,
+               reason);
+    }
+#endif // DEBUG
+
+    bLast = nullptr;
+
+DONE:
+
+    return bLast;
+}
+
+#if FEATURE_EH_FUNCLETS
+
+#if defined(_TARGET_ARM_)
+
+/*****************************************************************************
+ * We just removed a BBJ_CALLFINALLY/BBJ_ALWAYS pair. If this was the only such pair
+ * targeting the BBJ_ALWAYS target, then we need to clear the BBF_FINALLY_TARGET bit
+ * so that target can also be removed. 'block' is the finally target. Since we just
+ * removed the BBJ_ALWAYS, it better have the BBF_FINALLY_TARGET bit set.
+ */
+
+void Compiler::fgClearFinallyTargetBit(BasicBlock* block)
+{
+    assert((block->bbFlags & BBF_FINALLY_TARGET) != 0);
+
+    for (flowList* pred = block->bbPreds; pred; pred = pred->flNext)
+    {
+        if (pred->flBlock->bbJumpKind == BBJ_ALWAYS && pred->flBlock->bbJumpDest == block)
+        {
+            BasicBlock* pPrev = pred->flBlock->bbPrev;
+            if (pPrev != NULL)
+            {
+                if (pPrev->bbJumpKind == BBJ_CALLFINALLY)
+                {
+                    // We found a BBJ_CALLFINALLY / BBJ_ALWAYS that still points to this finally target
+                    return;
+                }
+            }
+        }
+    }
+
+    // Didn't find any BBJ_CALLFINALLY / BBJ_ALWAYS that still points here, so clear the bit
+
+    block->bbFlags &= ~BBF_FINALLY_TARGET;
+}
+
+#endif // defined(_TARGET_ARM_)
+
+/*****************************************************************************
+ * Is this an intra-handler control flow edge?
+ *
+ * 'block' is the head block of a funclet/handler region, or .
+ * 'predBlock' is a predecessor block of 'block' in the predecessor list.
+ *
+ * 'predBlock' can legally only be one of three things:
+ * 1. in the same handler region (e.g., the source of a back-edge of a loop from
+ *    'predBlock' to 'block'), including in nested regions within the handler,
+ * 2. if 'block' begins a handler that is a filter-handler, 'predBlock' must be in the 'filter' region,
+ * 3. for other handlers, 'predBlock' must be in the 'try' region corresponding to handler (or any
+ *    region nested in the 'try' region).
+ *
+ * Note that on AMD64/ARM64, the BBJ_CALLFINALLY block that calls a finally handler is not
+ * within the corresponding 'try' region: it is placed in the corresponding 'try' region's
+ * parent (which might be the main function body). This is how it is represented to the VM
+ * (with a special "cloned finally" EH table entry).
+ *
+ * Return 'true' for case #1, and 'false' otherwise.
+ */
+bool Compiler::fgIsIntraHandlerPred(BasicBlock* predBlock, BasicBlock* block)
+{
+    // Some simple preconditions (as stated above)
+    assert(!fgFuncletsCreated);
+    assert(fgGetPredForBlock(block, predBlock) != nullptr);
+    assert(block->hasHndIndex());
+
+    EHblkDsc* xtab = ehGetDsc(block->getHndIndex());
+
+#if FEATURE_EH_CALLFINALLY_THUNKS
+    if (xtab->HasFinallyHandler())
+    {
+        assert((xtab->ebdHndBeg == block) || // The normal case
+               ((xtab->ebdHndBeg->bbNext == block) &&
+                (xtab->ebdHndBeg->bbFlags & BBF_INTERNAL))); // After we've already inserted a header block, and we're
+                                                             // trying to decide how to split up the predecessor edges.
+        if (predBlock->bbJumpKind == BBJ_CALLFINALLY)
+        {
+            assert(predBlock->bbJumpDest == block);
+
+            // A BBJ_CALLFINALLY predecessor of the handler can only come from the corresponding try,
+            // not from any EH clauses nested in this handler. However, we represent the BBJ_CALLFINALLY
+            // as being in the 'try' region's parent EH region, which might be the main function body.
+
+            unsigned tryIndex = xtab->ebdEnclosingTryIndex;
+            if (tryIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+            {
+                assert(!predBlock->hasTryIndex());
+            }
+            else
+            {
+                assert(predBlock->hasTryIndex());
+                assert(tryIndex == predBlock->getTryIndex());
+                assert(ehGetDsc(tryIndex)->InTryRegionBBRange(predBlock));
+            }
+            return false;
+        }
+    }
+#endif // FEATURE_EH_CALLFINALLY_THUNKS
+
+    assert(predBlock->hasHndIndex() || predBlock->hasTryIndex());
+
+    //   We could search the try region looking for predBlock by using bbInTryRegions
+    // but that does a lexical search for the block, and then assumes funclets
+    // have been created and does a lexical search of all funclets that were pulled
+    // out of the parent try region.
+    //   First, funclets haven't been created yet, and even if they had been, we shouldn't
+    // have any funclet directly branching to another funclet (they have to return first).
+    // So we can safely use CheckIsTryRegion instead of bbInTryRegions.
+    //   Second, I believe the depth of any EH graph will on average be smaller than the
+    // breadth of the blocks within a try body. Thus it is faster to get our answer by
+    // looping outward over the region graph. However, I have added asserts, as a
+    // precaution, to ensure both algorithms agree. The asserts also check that the only
+    // way to reach the head of a funclet is from the corresponding try body or from
+    // within the funclet (and *not* any nested funclets).
+
+    if (predBlock->hasTryIndex())
+    {
+        // Because the EH clauses are listed inside-out, any nested trys will be at a
+        // lower index than the current try and if there's no enclosing try, tryIndex
+        // will terminate at NO_ENCLOSING_INDEX
+
+        unsigned tryIndex = predBlock->getTryIndex();
+        while (tryIndex < block->getHndIndex())
+        {
+            tryIndex = ehGetEnclosingTryIndex(tryIndex);
+        }
+        // tryIndex should enclose predBlock
+        assert((tryIndex == EHblkDsc::NO_ENCLOSING_INDEX) || ehGetDsc(tryIndex)->InTryRegionBBRange(predBlock));
+
+        // At this point tryIndex is either block's handler's corresponding try body
+        // or some outer try region that contains both predBlock & block or
+        // NO_ENCLOSING_REGION (because there was no try body that encloses both).
+        if (tryIndex == block->getHndIndex())
+        {
+            assert(xtab->InTryRegionBBRange(predBlock));
+            assert(!xtab->InHndRegionBBRange(predBlock));
+            return false;
+        }
+        // tryIndex should enclose block (and predBlock as previously asserted)
+        assert((tryIndex == EHblkDsc::NO_ENCLOSING_INDEX) || ehGetDsc(tryIndex)->InTryRegionBBRange(block));
+    }
+    if (xtab->HasFilter())
+    {
+        // The block is a handler. Check if the pred block is from its filter. We only need to
+        // check the end filter flag, as there is only a single filter for any handler, and we
+        // already know predBlock is a predecessor of block.
+        if (predBlock->bbJumpKind == BBJ_EHFILTERRET)
+        {
+            assert(!xtab->InHndRegionBBRange(predBlock));
+            return false;
+        }
+    }
+    // It is not in our try region (or filter), so it must be within this handler (or try bodies
+    // within this handler)
+    assert(!xtab->InTryRegionBBRange(predBlock));
+    assert(xtab->InHndRegionBBRange(predBlock));
+    return true;
+}
+
+/*****************************************************************************
+ * Does this block, first block of a handler region, have any predecessor edges
+ * that are not from its corresponding try region?
+ */
+
+bool Compiler::fgAnyIntraHandlerPreds(BasicBlock* block)
+{
+    assert(block->hasHndIndex());
+    assert(fgFirstBlockOfHandler(block) == block); // this block is the first block of a handler
+
+    flowList* pred;
+
+    for (pred = block->bbPreds; pred; pred = pred->flNext)
+    {
+        BasicBlock* predBlock = pred->flBlock;
+
+        if (fgIsIntraHandlerPred(predBlock, block))
+        {
+            // We have a predecessor that is not from our try region
+            return true;
+        }
+    }
+
+    return false;
+}
+
+/*****************************************************************************
+ * Introduce a new head block of the handler for the prolog to be put in, ahead
+ * of the current handler head 'block'.
+ * Note that this code has some similarities to fgCreateLoopPreHeader().
+ */
+
+void Compiler::fgInsertFuncletPrologBlock(BasicBlock* block)
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nCreating funclet prolog header for BB%02u\n", block->bbNum);
+    }
+#endif
+
+    assert(block->hasHndIndex());
+    assert(fgFirstBlockOfHandler(block) == block); // this block is the first block of a handler
+
+    /* Allocate a new basic block */
+
+    BasicBlock* newHead = bbNewBasicBlock(BBJ_NONE);
+
+    // In fgComputePreds() we set the BBF_JMP_TARGET and BBF_HAS_LABEL for all of the handler entry points
+    //
+    newHead->bbFlags |= (BBF_INTERNAL | BBF_JMP_TARGET | BBF_HAS_LABEL);
+    newHead->inheritWeight(block);
+    newHead->bbRefs = 0;
+
+    fgInsertBBbefore(block, newHead); // insert the new block in the block list
+    fgExtendEHRegionBefore(block);    // Update the EH table to make the prolog block the first block in the block's EH
+                                      // block.
+
+    // fgExtendEHRegionBefore mucks with the bbRefs without updating the pred list, which we will
+    // do below for this block. So, undo that change.
+    assert(newHead->bbRefs > 0);
+    newHead->bbRefs--;
+    block->bbRefs++;
+
+    // Distribute the pred list between newHead and block. Incoming edges coming from outside
+    // the handler go to the prolog. Edges coming from with the handler are back-edges, and
+    // go to the existing 'block'.
+
+    for (flowList* pred = block->bbPreds; pred; pred = pred->flNext)
+    {
+        BasicBlock* predBlock = pred->flBlock;
+        if (!fgIsIntraHandlerPred(predBlock, block))
+        {
+            // It's a jump from outside the handler; add it to the newHead preds list and remove
+            // it from the block preds list.
+
+            switch (predBlock->bbJumpKind)
+            {
+                case BBJ_CALLFINALLY:
+                    noway_assert(predBlock->bbJumpDest == block);
+                    predBlock->bbJumpDest = newHead;
+                    fgRemoveRefPred(block, predBlock);
+                    fgAddRefPred(newHead, predBlock);
+                    break;
+
+                default:
+                    // The only way into the handler is via a BBJ_CALLFINALLY (to a finally handler), or
+                    // via exception handling.
+                    noway_assert(false);
+                    break;
+            }
+        }
+    }
+
+    assert(nullptr == fgGetPredForBlock(block, newHead));
+    fgAddRefPred(block, newHead);
+
+    assert((newHead->bbFlags & (BBF_INTERNAL | BBF_JMP_TARGET | BBF_HAS_LABEL)) ==
+           (BBF_INTERNAL | BBF_JMP_TARGET | BBF_HAS_LABEL));
+}
+
+/*****************************************************************************
+ *
+ * Every funclet will have a prolog. That prolog will be inserted as the first instructions
+ * in the first block of the funclet. If the prolog is also the head block of a loop, we
+ * would end up with the prolog instructions being executed more than once.
+ * Check for this by searching the predecessor list for loops, and create a new prolog header
+ * block when needed. We detect a loop by looking for any predecessor that isn't in the
+ * handler's try region, since the only way to get into a handler is via that try region.
+ */
+
+void Compiler::fgCreateFuncletPrologBlocks()
+{
+    noway_assert(fgComputePredsDone);
+    noway_assert(!fgDomsComputed); // this function doesn't maintain the dom sets
+    assert(!fgFuncletsCreated);
+
+    bool      prologBlocksCreated = false;
+    EHblkDsc* HBtabEnd;
+    EHblkDsc* HBtab;
+
+    for (HBtab = compHndBBtab, HBtabEnd = compHndBBtab + compHndBBtabCount; HBtab < HBtabEnd; HBtab++)
+    {
+        BasicBlock* head = HBtab->ebdHndBeg;
+
+        if (fgAnyIntraHandlerPreds(head))
+        {
+            // We need to create a new block in which to place the prolog, and split the existing
+            // head block predecessor edges into those that should point to the prolog, and those
+            // that shouldn't.
+            //
+            // It's arguable that we should just always do this, and not only when we "need to",
+            // so there aren't two different code paths. However, it's unlikely to be necessary
+            // for catch handlers because they have an incoming argument (the exception object)
+            // that needs to get stored or saved, so back-arcs won't normally go to the head. It's
+            // possible when writing in IL to generate a legal loop (e.g., push an Exception object
+            // on the stack before jumping back to the catch head), but C# probably won't. This will
+            // most commonly only be needed for finallys with a do/while loop at the top of the
+            // finally.
+            //
+            // Note that we don't check filters. This might be a bug, but filters always have a filter
+            // object live on entry, so it's at least unlikely (illegal?) that a loop edge targets the
+            // filter head.
+
+            fgInsertFuncletPrologBlock(head);
+            prologBlocksCreated = true;
+        }
+    }
+
+    if (prologBlocksCreated)
+    {
+        // If we've modified the graph, reset the 'modified' flag, since the dominators haven't
+        // been computed.
+        fgModified = false;
+
+#if DEBUG
+        if (verbose)
+        {
+            JITDUMP("\nAfter fgCreateFuncletPrologBlocks()");
+            fgDispBasicBlocks();
+            fgDispHandlerTab();
+        }
+
+        fgVerifyHandlerTab();
+        fgDebugCheckBBlist();
+#endif // DEBUG
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Function to create funclets out of all EH catch/finally/fault blocks.
+ *  We only move filter and handler blocks, not try blocks.
+ */
+
+void Compiler::fgCreateFunclets()
+{
+    assert(!fgFuncletsCreated);
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In fgCreateFunclets()\n");
+    }
+#endif
+
+    fgCreateFuncletPrologBlocks();
+
+    unsigned           XTnum;
+    EHblkDsc*          HBtab;
+    const unsigned int funcCnt = ehFuncletCount() + 1;
+
+    if (!FitsIn<unsigned short>(funcCnt))
+    {
+        IMPL_LIMITATION("Too many funclets");
+    }
+
+    FuncInfoDsc* funcInfo = new (this, CMK_BasicBlock) FuncInfoDsc[funcCnt];
+
+    unsigned short funcIdx;
+
+    // Setup the root FuncInfoDsc and prepare to start associating
+    // FuncInfoDsc's with their corresponding EH region
+    memset((void*)funcInfo, 0, funcCnt * sizeof(FuncInfoDsc));
+    assert(funcInfo[0].funKind == FUNC_ROOT);
+    funcIdx = 1;
+
+    // Because we iterate from the top to the bottom of the compHndBBtab array, we are iterating
+    // from most nested (innermost) to least nested (outermost) EH region. It would be reasonable
+    // to iterate in the opposite order, but the order of funclets shouldn't matter.
+    //
+    // We move every handler region to the end of the function: each handler will become a funclet.
+    //
+    // Note that fgRelocateEHRange() can add new entries to the EH table. However, they will always
+    // be added *after* the current index, so our iteration here is not invalidated.
+    // It *can* invalidate the compHndBBtab pointer itself, though, if it gets reallocated!
+
+    for (XTnum = 0; XTnum < compHndBBtabCount; XTnum++)
+    {
+        HBtab = ehGetDsc(XTnum); // must re-compute this every loop, since fgRelocateEHRange changes the table
+        if (HBtab->HasFilter())
+        {
+            assert(funcIdx < funcCnt);
+            funcInfo[funcIdx].funKind    = FUNC_FILTER;
+            funcInfo[funcIdx].funEHIndex = (unsigned short)XTnum;
+            funcIdx++;
+        }
+        assert(funcIdx < funcCnt);
+        funcInfo[funcIdx].funKind    = FUNC_HANDLER;
+        funcInfo[funcIdx].funEHIndex = (unsigned short)XTnum;
+        HBtab->ebdFuncIndex          = funcIdx;
+        funcIdx++;
+        fgRelocateEHRange(XTnum, FG_RELOCATE_HANDLER);
+    }
+
+    // We better have populated all of them by now
+    assert(funcIdx == funcCnt);
+
+    // Publish
+    compCurrFuncIdx   = 0;
+    compFuncInfos     = funcInfo;
+    compFuncInfoCount = (unsigned short)funcCnt;
+
+    fgFuncletsCreated = true;
+
+#if DEBUG
+    if (verbose)
+    {
+        JITDUMP("\nAfter fgCreateFunclets()");
+        fgDispBasicBlocks();
+        fgDispHandlerTab();
+    }
+
+    fgVerifyHandlerTab();
+    fgDebugCheckBBlist();
+#endif // DEBUG
+}
+
+#else // !FEATURE_EH_FUNCLETS
+
+    /*****************************************************************************
+     *
+     *  Function called to relocate any and all EH regions.
+     *  Only entire consecutive EH regions will be moved and they will be kept together.
+     *  Except for the first block, the range can not have any blocks that jump into or out of the region.
+     */
+
+    bool Compiler::fgRelocateEHRegions()
+    {
+        bool result = false; // Our return value
+
+#ifdef DEBUG
+        if (verbose)
+            printf("*************** In fgRelocateEHRegions()\n");
+#endif
+
+        if (fgCanRelocateEHRegions)
+        {
+            unsigned  XTnum;
+            EHblkDsc* HBtab;
+
+            for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+            {
+                // Nested EH regions cannot be moved.
+                // Also we don't want to relocate an EH region that has a filter
+                if ((HBtab->ebdHandlerNestingLevel == 0) && !HBtab->HasFilter())
+                {
+                    bool movedTry = false;
+#if DEBUG
+                    bool movedHnd = false;
+#endif // DEBUG
+
+                    // Only try to move the outermost try region
+                    if (HBtab->ebdEnclosingTryIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+                    {
+                        // Move the entire try region if it can be moved
+                        if (HBtab->ebdTryBeg->isRunRarely())
+                        {
+                            BasicBlock* bTryLastBB = fgRelocateEHRange(XTnum, FG_RELOCATE_TRY);
+                            if (bTryLastBB != NULL)
+                            {
+                                result   = true;
+                                movedTry = true;
+                            }
+                        }
+#if DEBUG
+                        if (verbose && movedTry)
+                        {
+                            printf("\nAfter relocating an EH try region");
+                            fgDispBasicBlocks();
+                            fgDispHandlerTab();
+
+                            // Make sure that the predecessor lists are accurate
+                            if (expensiveDebugCheckLevel >= 2)
+                            {
+                                fgDebugCheckBBlist();
+                            }
+                        }
+#endif // DEBUG
+                    }
+
+                    // Currently it is not good to move the rarely run handler regions to the end of the method
+                    // because fgDetermineFirstColdBlock() must put the start of any handler region in the hot section.
+                    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if 0
+                // Now try to move the entire handler region if it can be moved.
+                // Don't try to move a finally handler unless we already moved the try region.
+                if (HBtab->ebdHndBeg->isRunRarely() &&
+                    !HBtab->ebdHndBeg->hasTryIndex() &&
+                    (movedTry || !HBtab->HasFinallyHandler()))
+                {
+                    BasicBlock* bHndLastBB = fgRelocateEHRange(XTnum, FG_RELOCATE_HANDLER);
+                    if (bHndLastBB != NULL)
+                    {
+                        result   = true;
+                        movedHnd = true;
+                    }
+                }
+#endif // 0
+
+#if DEBUG
+                    if (verbose && movedHnd)
+                    {
+                        printf("\nAfter relocating an EH handler region");
+                        fgDispBasicBlocks();
+                        fgDispHandlerTab();
+
+                        // Make sure that the predecessor lists are accurate
+                        if (expensiveDebugCheckLevel >= 2)
+                        {
+                            fgDebugCheckBBlist();
+                        }
+                    }
+#endif // DEBUG
+                }
+            }
+        }
+
+#if DEBUG
+        fgVerifyHandlerTab();
+
+        if (verbose && result)
+        {
+            printf("\nAfter fgRelocateEHRegions()");
+            fgDispBasicBlocks();
+            fgDispHandlerTab();
+            // Make sure that the predecessor lists are accurate
+            fgDebugCheckBBlist();
+        }
+#endif // DEBUG
+
+        return result;
+    }
+
+#endif // !FEATURE_EH_FUNCLETS
+
+bool flowList::setEdgeWeightMinChecked(BasicBlock::weight_t newWeight, BasicBlock::weight_t slop, bool* wbUsedSlop)
+{
+    bool result = false;
+    if ((newWeight <= flEdgeWeightMax) && (newWeight >= flEdgeWeightMin))
+    {
+        flEdgeWeightMin = newWeight;
+        result          = true;
+    }
+    else if (slop > 0)
+    {
+        // We allow for a small amount of inaccuracy in block weight counts.
+        if (flEdgeWeightMax < newWeight)
+        {
+            // We have already determined that this edge's weight
+            // is less than newWeight, so we just allow for the slop
+            if (newWeight <= (flEdgeWeightMax + slop))
+            {
+                result = true;
+
+                if (flEdgeWeightMax != 0)
+                {
+                    // We will raise flEdgeWeightMin and Max towards newWeight
+                    flEdgeWeightMin = flEdgeWeightMax;
+                    flEdgeWeightMax = newWeight;
+                }
+
+                if (wbUsedSlop != nullptr)
+                {
+                    *wbUsedSlop = true;
+                }
+            }
+        }
+        else
+        {
+            assert(flEdgeWeightMin > newWeight);
+
+            // We have already determined that this edge's weight
+            // is more than newWeight, so we just allow for the slop
+            if ((newWeight + slop) >= flEdgeWeightMin)
+            {
+                result = true;
+
+                assert(flEdgeWeightMax != 0);
+
+                // We will lower flEdgeWeightMin towards newWeight
+                flEdgeWeightMin = newWeight;
+
+                if (wbUsedSlop != nullptr)
+                {
+                    *wbUsedSlop = true;
+                }
+            }
+        }
+
+        // If we are returning true then we should have adjusted the range so that
+        // the newWeight is in new range [Min..Max] or fgEdjeWeightMax is zero.
+        // Also we should have set wbUsedSlop to true.
+        if (result == true)
+        {
+            assert((flEdgeWeightMax == 0) || ((newWeight <= flEdgeWeightMax) && (newWeight >= flEdgeWeightMin)));
+
+            if (wbUsedSlop != nullptr)
+            {
+                assert(*wbUsedSlop == true);
+            }
+        }
+    }
+
+#if DEBUG
+    if (result == false)
+    {
+        result = false; // break here
+    }
+#endif // DEBUG
+
+    return result;
+}
+
+bool flowList::setEdgeWeightMaxChecked(BasicBlock::weight_t newWeight, BasicBlock::weight_t slop, bool* wbUsedSlop)
+{
+    bool result = false;
+    if ((newWeight >= flEdgeWeightMin) && (newWeight <= flEdgeWeightMax))
+    {
+        flEdgeWeightMax = newWeight;
+        result          = true;
+    }
+    else if (slop > 0)
+    {
+        // We allow for a small amount of inaccuracy in block weight counts.
+        if (flEdgeWeightMax < newWeight)
+        {
+            // We have already determined that this edge's weight
+            // is less than newWeight, so we just allow for the slop
+            if (newWeight <= (flEdgeWeightMax + slop))
+            {
+                result = true;
+
+                if (flEdgeWeightMax != 0)
+                {
+                    // We will allow this to raise flEdgeWeightMax towards newWeight
+                    flEdgeWeightMax = newWeight;
+                }
+
+                if (wbUsedSlop != nullptr)
+                {
+                    *wbUsedSlop = true;
+                }
+            }
+        }
+        else
+        {
+            assert(flEdgeWeightMin > newWeight);
+
+            // We have already determined that this edge's weight
+            // is more than newWeight, so we just allow for the slop
+            if ((newWeight + slop) >= flEdgeWeightMin)
+            {
+                result = true;
+
+                assert(flEdgeWeightMax != 0);
+
+                // We will allow this to lower flEdgeWeightMin and Max towards newWeight
+                flEdgeWeightMax = flEdgeWeightMin;
+                flEdgeWeightMin = newWeight;
+
+                if (wbUsedSlop != nullptr)
+                {
+                    *wbUsedSlop = true;
+                }
+            }
+        }
+
+        // If we are returning true then we should have adjusted the range so that
+        // the newWeight is in new range [Min..Max] or fgEdjeWeightMax is zero
+        // Also we should have set wbUsedSlop to true, unless it is NULL
+        if (result == true)
+        {
+            assert((flEdgeWeightMax == 0) || ((newWeight <= flEdgeWeightMax) && (newWeight >= flEdgeWeightMin)));
+
+            assert((wbUsedSlop == nullptr) || (*wbUsedSlop == true));
+        }
+    }
+
+#if DEBUG
+    if (result == false)
+    {
+        result = false; // break here
+    }
+#endif // DEBUG
+
+    return result;
+}
+
+#ifdef DEBUG
+void Compiler::fgPrintEdgeWeights()
+{
+    BasicBlock* bSrc;
+    BasicBlock* bDst;
+    flowList*   edge;
+
+    // Print out all of the edge weights
+    for (bDst = fgFirstBB; bDst != nullptr; bDst = bDst->bbNext)
+    {
+        if (bDst->bbPreds != nullptr)
+        {
+            printf("    Edge weights into BB%02u :", bDst->bbNum);
+            for (edge = bDst->bbPreds; edge != nullptr; edge = edge->flNext)
+            {
+                bSrc = edge->flBlock;
+                // This is the control flow edge (bSrc -> bDst)
+
+                printf("BB%02u ", bSrc->bbNum);
+
+                if (edge->flEdgeWeightMin < BB_MAX_WEIGHT)
+                {
+                    printf("(%s", refCntWtd2str(edge->flEdgeWeightMin));
+                }
+                else
+                {
+                    printf("(MAX");
+                }
+                if (edge->flEdgeWeightMin != edge->flEdgeWeightMax)
+                {
+                    if (edge->flEdgeWeightMax < BB_MAX_WEIGHT)
+                    {
+                        printf("..%s", refCntWtd2str(edge->flEdgeWeightMax));
+                    }
+                    else
+                    {
+                        printf("..MAX");
+                    }
+                }
+                printf(")");
+                if (edge->flNext != nullptr)
+                {
+                    printf(", ");
+                }
+            }
+            printf("\n");
+        }
+    }
+}
+#endif // DEBUG
+
+// return true if there is a possibility that the method has a loop (a backedge is present)
+bool Compiler::fgMightHaveLoop()
+{
+    // Don't use a BlockSet for this temporary bitset of blocks: we don't want to have to call EnsureBasicBlockEpoch()
+    // and potentially change the block epoch.
+
+    BitVecTraits blockVecTraits(fgBBNumMax + 1, this);
+    BitVec       BLOCKSET_INIT_NOCOPY(blocksSeen, BitVecOps::MakeEmpty(&blockVecTraits));
+
+    for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+    {
+        BitVecOps::AddElemD(&blockVecTraits, blocksSeen, block->bbNum);
+
+        AllSuccessorIter succsEnd = block->GetAllSuccs(this).end();
+        for (AllSuccessorIter succs = block->GetAllSuccs(this).begin(); succs != succsEnd; ++succs)
+        {
+            BasicBlock* succ = (*succs);
+            if (BitVecOps::IsMember(&blockVecTraits, blocksSeen, succ->bbNum))
+            {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+void Compiler::fgComputeEdgeWeights()
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In fgComputeEdgeWeights()\n");
+    }
+#endif // DEBUG
+
+    if (fgIsUsingProfileWeights() == false)
+    {
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("fgComputeEdgeWeights() we do not have any profile data so we are not using the edge weights\n");
+        }
+#endif // DEBUG
+        fgHaveValidEdgeWeights = false;
+        fgCalledWeight         = BB_UNITY_WEIGHT;
+    }
+
+#if DEBUG
+    if (verbose)
+    {
+        fgDispBasicBlocks();
+        printf("\n");
+    }
+#endif // DEBUG
+
+    BasicBlock* bSrc;
+    BasicBlock* bDst;
+    flowList*   edge;
+    unsigned    iterations               = 0;
+    unsigned    goodEdgeCountCurrent     = 0;
+    unsigned    goodEdgeCountPrevious    = 0;
+    bool        inconsistentProfileData  = false;
+    bool        hasIncompleteEdgeWeights = false;
+    unsigned    numEdges                 = 0;
+    bool        usedSlop                 = false;
+    bool        changed;
+    bool        modified;
+
+    BasicBlock::weight_t returnWeight;
+    BasicBlock::weight_t slop;
+
+    // If we have any blocks that did not have profile derived weight
+    // we will try to fix their weight up here
+    //
+    modified = false;
+    do // while (changed)
+    {
+        changed      = false;
+        returnWeight = 0;
+        iterations++;
+
+        for (bDst = fgFirstBB; bDst != nullptr; bDst = bDst->bbNext)
+        {
+            if (((bDst->bbFlags & BBF_PROF_WEIGHT) == 0) && (bDst->bbPreds != nullptr))
+            {
+                BasicBlock* bOnlyNext;
+
+                // This block does not have a profile derived weight
+                //
+                BasicBlock::weight_t newWeight = BB_MAX_WEIGHT;
+
+                if (bDst->countOfInEdges() == 1)
+                {
+                    // Only one block flows into bDst
+                    bSrc = bDst->bbPreds->flBlock;
+
+                    // Does this block flow into only one other block
+                    if (bSrc->bbJumpKind == BBJ_NONE)
+                    {
+                        bOnlyNext = bSrc->bbNext;
+                    }
+                    else if (bSrc->bbJumpKind == BBJ_ALWAYS)
+                    {
+                        bOnlyNext = bSrc->bbJumpDest;
+                    }
+                    else
+                    {
+                        bOnlyNext = nullptr;
+                    }
+
+                    if ((bOnlyNext == bDst) && ((bSrc->bbFlags & BBF_PROF_WEIGHT) != 0))
+                    {
+                        // We know the exact weight of bDst
+                        newWeight = bSrc->bbWeight;
+                    }
+                }
+
+                // Does this block flow into only one other block
+                if (bDst->bbJumpKind == BBJ_NONE)
+                {
+                    bOnlyNext = bDst->bbNext;
+                }
+                else if (bDst->bbJumpKind == BBJ_ALWAYS)
+                {
+                    bOnlyNext = bDst->bbJumpDest;
+                }
+                else
+                {
+                    bOnlyNext = nullptr;
+                }
+
+                if ((bOnlyNext != nullptr) && (bOnlyNext->bbPreds != nullptr))
+                {
+                    // Does only one block flow into bOnlyNext
+                    if (bOnlyNext->countOfInEdges() == 1)
+                    {
+                        noway_assert(bOnlyNext->bbPreds->flBlock == bDst);
+
+                        // We know the exact weight of bDst
+                        newWeight = bOnlyNext->bbWeight;
+                    }
+                }
+
+                if ((newWeight != BB_MAX_WEIGHT) && (bDst->bbWeight != newWeight))
+                {
+                    changed        = true;
+                    modified       = true;
+                    bDst->bbWeight = newWeight;
+                    if (newWeight == 0)
+                    {
+                        bDst->bbFlags |= BBF_RUN_RARELY;
+                    }
+                    else
+                    {
+                        bDst->bbFlags &= ~BBF_RUN_RARELY;
+                    }
+                }
+            }
+
+            // Sum up the weights of all of the return blocks and throw blocks
+            // This is used when we have a back-edge into block 1
+            //
+            if (((bDst->bbFlags & BBF_PROF_WEIGHT) != 0) &&
+                ((bDst->bbJumpKind == BBJ_RETURN) || (bDst->bbJumpKind == BBJ_THROW)))
+            {
+                returnWeight += bDst->bbWeight;
+            }
+        }
+    }
+    // Generally when we synthesize profile estimates we do it in a way where this algorithm will converge
+    // but downstream opts that remove conditional branches may create a situation where this is not the case.
+    // For instance a loop that becomes unreachable creates a sort of 'ring oscillator' (See test b539509)
+    while (changed && iterations < 10);
+
+#if DEBUG
+    if (verbose && modified)
+    {
+        printf("fgComputeEdgeWeights() adjusted the weight of some blocks\n");
+        fgDispBasicBlocks();
+        printf("\n");
+    }
+#endif
+
+    // When we are not using profile data we have already setup fgCalledWeight
+    // only set it here if we are using profile data
+    //
+    if (fgIsUsingProfileWeights())
+    {
+        // If the first block has one ref then it's weight is the fgCalledWeight
+        // otherwise we have backedge's into the first block so instead
+        // we use the sum of the return block weights.
+        // If the profile data has a 0 for the returnWeoght
+        // then just use the first block weight rather than the 0
+        //
+        if ((fgFirstBB->countOfInEdges() == 1) || (returnWeight == 0))
+        {
+            fgCalledWeight = fgFirstBB->bbWeight;
+        }
+        else
+        {
+            fgCalledWeight = returnWeight;
+        }
+    }
+
+    // Now we will compute the initial flEdgeWeightMin and flEdgeWeightMax values
+    for (bDst = fgFirstBB; bDst != nullptr; bDst = bDst->bbNext)
+    {
+        BasicBlock::weight_t bDstWeight = bDst->bbWeight;
+
+        // We subtract out the called count so that bDstWeight is
+        // the sum of all edges that go into this block from this method.
+        //
+        if (bDst == fgFirstBB)
+        {
+            bDstWeight -= fgCalledWeight;
+        }
+
+        for (edge = bDst->bbPreds; edge != nullptr; edge = edge->flNext)
+        {
+            bool assignOK = true;
+
+            bSrc = edge->flBlock;
+            // We are processing the control flow edge (bSrc -> bDst)
+
+            numEdges++;
+
+            //
+            // If the bSrc or bDst blocks do not have exact profile weights
+            // then we must reset any values that they currently have
+            //
+
+            if (((bSrc->bbFlags & BBF_PROF_WEIGHT) == 0) || ((bDst->bbFlags & BBF_PROF_WEIGHT) == 0))
+            {
+                edge->flEdgeWeightMin = BB_ZERO_WEIGHT;
+                edge->flEdgeWeightMax = BB_MAX_WEIGHT;
+            }
+
+            slop = BasicBlock::GetSlopFraction(bSrc, bDst) + 1;
+            switch (bSrc->bbJumpKind)
+            {
+                case BBJ_ALWAYS:
+                case BBJ_EHCATCHRET:
+                case BBJ_NONE:
+                case BBJ_CALLFINALLY:
+                    // We know the exact edge weight
+                    assignOK &= edge->setEdgeWeightMinChecked(bSrc->bbWeight, slop, &usedSlop);
+                    assignOK &= edge->setEdgeWeightMaxChecked(bSrc->bbWeight, slop, &usedSlop);
+                    break;
+
+                case BBJ_COND:
+                case BBJ_SWITCH:
+                case BBJ_EHFINALLYRET:
+                case BBJ_EHFILTERRET:
+                    if (edge->flEdgeWeightMax > bSrc->bbWeight)
+                    {
+                        // The maximum edge weight to block can't be greater than the weight of bSrc
+                        assignOK &= edge->setEdgeWeightMaxChecked(bSrc->bbWeight, slop, &usedSlop);
+                    }
+                    break;
+
+                default:
+                    // We should never have an edge that starts from one of these jump kinds
+                    noway_assert(!"Unexpected bbJumpKind");
+                    break;
+            }
+
+            // The maximum edge weight to block can't be greater than the weight of bDst
+            if (edge->flEdgeWeightMax > bDstWeight)
+            {
+                assignOK &= edge->setEdgeWeightMaxChecked(bDstWeight, slop, &usedSlop);
+            }
+
+            if (!assignOK)
+            {
+                // Here we have inconsistent profile data
+                inconsistentProfileData = true;
+                // No point in continuing
+                goto EARLY_EXIT;
+            }
+        }
+    }
+
+    fgEdgeCount = numEdges;
+
+    iterations = 0;
+
+    do
+    {
+        iterations++;
+        goodEdgeCountPrevious    = goodEdgeCountCurrent;
+        goodEdgeCountCurrent     = 0;
+        hasIncompleteEdgeWeights = false;
+
+        for (bDst = fgFirstBB; bDst != nullptr; bDst = bDst->bbNext)
+        {
+            for (edge = bDst->bbPreds; edge != nullptr; edge = edge->flNext)
+            {
+                bool assignOK = true;
+
+                // We are processing the control flow edge (bSrc -> bDst)
+                bSrc = edge->flBlock;
+
+                slop = BasicBlock::GetSlopFraction(bSrc, bDst) + 1;
+                if (bSrc->bbJumpKind == BBJ_COND)
+                {
+                    int       diff;
+                    flowList* otherEdge;
+                    if (bSrc->bbNext == bDst)
+                    {
+                        otherEdge = fgGetPredForBlock(bSrc->bbJumpDest, bSrc);
+                    }
+                    else
+                    {
+                        otherEdge = fgGetPredForBlock(bSrc->bbNext, bSrc);
+                    }
+                    noway_assert(edge->flEdgeWeightMin <= edge->flEdgeWeightMax);
+                    noway_assert(otherEdge->flEdgeWeightMin <= otherEdge->flEdgeWeightMax);
+
+                    // Adjust edge->flEdgeWeightMin up or adjust otherEdge->flEdgeWeightMax down
+                    diff = ((int)bSrc->bbWeight) - ((int)edge->flEdgeWeightMin + (int)otherEdge->flEdgeWeightMax);
+                    if (diff > 0)
+                    {
+                        assignOK &= edge->setEdgeWeightMinChecked(edge->flEdgeWeightMin + diff, slop, &usedSlop);
+                    }
+                    else if (diff < 0)
+                    {
+                        assignOK &=
+                            otherEdge->setEdgeWeightMaxChecked(otherEdge->flEdgeWeightMax + diff, slop, &usedSlop);
+                    }
+
+                    // Adjust otherEdge->flEdgeWeightMin up or adjust edge->flEdgeWeightMax down
+                    diff = ((int)bSrc->bbWeight) - ((int)otherEdge->flEdgeWeightMin + (int)edge->flEdgeWeightMax);
+                    if (diff > 0)
+                    {
+                        assignOK &=
+                            otherEdge->setEdgeWeightMinChecked(otherEdge->flEdgeWeightMin + diff, slop, &usedSlop);
+                    }
+                    else if (diff < 0)
+                    {
+                        assignOK &= edge->setEdgeWeightMaxChecked(edge->flEdgeWeightMax + diff, slop, &usedSlop);
+                    }
+
+                    if (!assignOK)
+                    {
+                        // Here we have inconsistent profile data
+                        inconsistentProfileData = true;
+                        // No point in continuing
+                        goto EARLY_EXIT;
+                    }
+#ifdef DEBUG
+                    // Now edge->flEdgeWeightMin and otherEdge->flEdgeWeightMax) should add up to bSrc->bbWeight
+                    diff = ((int)bSrc->bbWeight) - ((int)edge->flEdgeWeightMin + (int)otherEdge->flEdgeWeightMax);
+                    noway_assert((-((int)slop) <= diff) && (diff <= ((int)slop)));
+
+                    // Now otherEdge->flEdgeWeightMin and edge->flEdgeWeightMax) should add up to bSrc->bbWeight
+                    diff = ((int)bSrc->bbWeight) - ((int)otherEdge->flEdgeWeightMin + (int)edge->flEdgeWeightMax);
+                    noway_assert((-((int)slop) <= diff) && (diff <= ((int)slop)));
+#endif // DEBUG
+                }
+            }
+        }
+
+        for (bDst = fgFirstBB; bDst != nullptr; bDst = bDst->bbNext)
+        {
+            BasicBlock::weight_t bDstWeight = bDst->bbWeight;
+
+            if (bDstWeight == BB_MAX_WEIGHT)
+            {
+                inconsistentProfileData = true;
+                // No point in continuing
+                goto EARLY_EXIT;
+            }
+            else
+            {
+                // We subtract out the called count so that bDstWeight is
+                // the sum of all edges that go into this block from this method.
+                //
+                if (bDst == fgFirstBB)
+                {
+                    bDstWeight -= fgCalledWeight;
+                }
+
+                UINT64 minEdgeWeightSum = 0;
+                UINT64 maxEdgeWeightSum = 0;
+
+                // Calculate the sums of the minimum and maximum edge weights
+                for (edge = bDst->bbPreds; edge != nullptr; edge = edge->flNext)
+                {
+                    // We are processing the control flow edge (bSrc -> bDst)
+                    bSrc = edge->flBlock;
+
+                    maxEdgeWeightSum += edge->flEdgeWeightMax;
+                    minEdgeWeightSum += edge->flEdgeWeightMin;
+                }
+
+                // maxEdgeWeightSum is the sum of all flEdgeWeightMax values into bDst
+                // minEdgeWeightSum is the sum of all flEdgeWeightMin values into bDst
+
+                for (edge = bDst->bbPreds; edge != nullptr; edge = edge->flNext)
+                {
+                    bool assignOK = true;
+
+                    // We are processing the control flow edge (bSrc -> bDst)
+                    bSrc = edge->flBlock;
+                    slop = BasicBlock::GetSlopFraction(bSrc, bDst) + 1;
+
+                    // otherMaxEdgesWeightSum is the sum of all of the other edges flEdgeWeightMax values
+                    // This can be used to compute a lower bound for our minimum edge weight
+                    noway_assert(maxEdgeWeightSum >= edge->flEdgeWeightMax);
+                    UINT64 otherMaxEdgesWeightSum = maxEdgeWeightSum - edge->flEdgeWeightMax;
+
+                    // otherMinEdgesWeightSum is the sum of all of the other edges flEdgeWeightMin values
+                    // This can be used to compute an upper bound for our maximum edge weight
+                    noway_assert(minEdgeWeightSum >= edge->flEdgeWeightMin);
+                    UINT64 otherMinEdgesWeightSum = minEdgeWeightSum - edge->flEdgeWeightMin;
+
+                    if (bDstWeight >= otherMaxEdgesWeightSum)
+                    {
+                        // minWeightCalc is our minWeight when every other path to bDst takes it's flEdgeWeightMax value
+                        BasicBlock::weight_t minWeightCalc =
+                            (BasicBlock::weight_t)(bDstWeight - otherMaxEdgesWeightSum);
+                        if (minWeightCalc > edge->flEdgeWeightMin)
+                        {
+                            assignOK &= edge->setEdgeWeightMinChecked(minWeightCalc, slop, &usedSlop);
+                        }
+                    }
+
+                    if (bDstWeight >= otherMinEdgesWeightSum)
+                    {
+                        // maxWeightCalc is our maxWeight when every other path to bDst takes it's flEdgeWeightMin value
+                        BasicBlock::weight_t maxWeightCalc =
+                            (BasicBlock::weight_t)(bDstWeight - otherMinEdgesWeightSum);
+                        if (maxWeightCalc < edge->flEdgeWeightMax)
+                        {
+                            assignOK &= edge->setEdgeWeightMaxChecked(maxWeightCalc, slop, &usedSlop);
+                        }
+                    }
+
+                    if (!assignOK)
+                    {
+                        // Here we have inconsistent profile data
+                        inconsistentProfileData = true;
+                        // No point in continuing
+                        goto EARLY_EXIT;
+                    }
+
+                    // When flEdgeWeightMin equals flEdgeWeightMax we have a "good" edge weight
+                    if (edge->flEdgeWeightMin == edge->flEdgeWeightMax)
+                    {
+                        // Count how many "good" edge weights we have
+                        // Each time through we should have more "good" weights
+                        // We exit the while loop when no longer find any new "good" edges
+                        goodEdgeCountCurrent++;
+                    }
+                    else
+                    {
+                        // Remember that we have seen at least one "Bad" edge weight
+                        // so that we will repeat the while loop again
+                        hasIncompleteEdgeWeights = true;
+                    }
+                }
+            }
+        }
+
+        if (inconsistentProfileData)
+        {
+            hasIncompleteEdgeWeights = true;
+            break;
+        }
+
+        if (numEdges == goodEdgeCountCurrent)
+        {
+            noway_assert(hasIncompleteEdgeWeights == false);
+            break;
+        }
+
+    } while (hasIncompleteEdgeWeights && (goodEdgeCountCurrent > goodEdgeCountPrevious) && (iterations < 8));
+
+EARLY_EXIT:;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        if (inconsistentProfileData)
+        {
+            printf("fgComputeEdgeWeights() found inconsistent profile data, not using the edge weights\n");
+        }
+        else
+        {
+            if (hasIncompleteEdgeWeights)
+            {
+                printf("fgComputeEdgeWeights() was able to compute exact edge weights for %3d of the %3d edges, using "
+                       "%d passes.\n",
+                       goodEdgeCountCurrent, numEdges, iterations);
+            }
+            else
+            {
+                printf("fgComputeEdgeWeights() was able to compute exact edge weights for all of the %3d edges, using "
+                       "%d passes.\n",
+                       numEdges, iterations);
+            }
+
+            fgPrintEdgeWeights();
+        }
+    }
+#endif // DEBUG
+
+    fgSlopUsedInEdgeWeights  = usedSlop;
+    fgRangeUsedInEdgeWeights = false;
+
+    // See if any edge weight are expressed in [min..max] form
+
+    for (bDst = fgFirstBB; bDst != nullptr; bDst = bDst->bbNext)
+    {
+        if (bDst->bbPreds != nullptr)
+        {
+            for (edge = bDst->bbPreds; edge != nullptr; edge = edge->flNext)
+            {
+                bSrc = edge->flBlock;
+                // This is the control flow edge (bSrc -> bDst)
+
+                if (edge->flEdgeWeightMin != edge->flEdgeWeightMax)
+                {
+                    fgRangeUsedInEdgeWeights = true;
+                    break;
+                }
+            }
+            if (fgRangeUsedInEdgeWeights)
+            {
+                break;
+            }
+        }
+    }
+
+    fgHaveValidEdgeWeights = !inconsistentProfileData;
+    fgEdgeWeightsComputed  = true;
+}
+
+// fgOptimizeBranchToEmptyUnconditional:
+//    optimize a jump to an empty block which ends in an unconditional branch.
+//  Args:
+//      block: source block
+//      bDest: destination
+//  Returns: true if we changed the code
+//
+bool Compiler::fgOptimizeBranchToEmptyUnconditional(BasicBlock* block, BasicBlock* bDest)
+{
+    bool optimizeJump = true;
+
+    assert(bDest->isEmpty());
+    assert(bDest->bbJumpKind == BBJ_ALWAYS);
+
+    // We do not optimize jumps between two different try regions.
+    // However jumping to a block that is not in any try region is OK
+    //
+    if (bDest->hasTryIndex() && !BasicBlock::sameTryRegion(block, bDest))
+    {
+        optimizeJump = false;
+    }
+
+    // Don't optimize a jump to a removed block
+    if (bDest->bbJumpDest->bbFlags & BBF_REMOVED)
+    {
+        optimizeJump = false;
+    }
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+    // Don't optimize a jump to a finally target. For BB1->BB2->BB3, where
+    // BB2 is a finally target, if we changed BB1 to jump directly to BB3,
+    // it would skip the finally target. BB1 might be a BBJ_ALWAYS block part
+    // of a BBJ_CALLFINALLY/BBJ_ALWAYS pair, so changing the finally target
+    // would change the unwind behavior.
+    if (bDest->bbFlags & BBF_FINALLY_TARGET)
+    {
+        optimizeJump = false;
+    }
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+
+    // Must optimize jump if bDest has been removed
+    //
+    if (bDest->bbFlags & BBF_REMOVED)
+    {
+        optimizeJump = true;
+    }
+
+    // If we are optimizing using real profile weights
+    // then don't optimize a conditional jump to an unconditional jump
+    // until after we have computed the edge weights
+    //
+    if (fgIsUsingProfileWeights() && !fgEdgeWeightsComputed)
+    {
+        fgNeedsUpdateFlowGraph = true;
+        optimizeJump           = false;
+    }
+
+    if (optimizeJump)
+    {
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\nOptimizing a jump to an unconditional jump (BB%02u -> BB%02u -> BB%02u)\n", block->bbNum,
+                   bDest->bbNum, bDest->bbJumpDest->bbNum);
+        }
+#endif // DEBUG
+
+        //
+        // When we optimize a branch to branch we need to update the profile weight
+        // of bDest by subtracting out the block/edge weight of the path that is being optimized.
+        //
+        if (fgHaveValidEdgeWeights && ((bDest->bbFlags & BBF_PROF_WEIGHT) != 0))
+        {
+            flowList* edge1 = fgGetPredForBlock(bDest, block);
+            noway_assert(edge1 != nullptr);
+
+            BasicBlock::weight_t edgeWeight;
+
+            if (edge1->flEdgeWeightMin != edge1->flEdgeWeightMax)
+            {
+                //
+                // We only have an estimate for the edge weight
+                //
+                edgeWeight = (edge1->flEdgeWeightMin + edge1->flEdgeWeightMax) / 2;
+                //
+                //  Clear the profile weight flag
+                //
+                bDest->bbFlags &= ~BBF_PROF_WEIGHT;
+            }
+            else
+            {
+                //
+                // We only have the exact edge weight
+                //
+                edgeWeight = edge1->flEdgeWeightMin;
+            }
+
+            //
+            // Update the bDest->bbWeight
+            //
+            if (bDest->bbWeight > edgeWeight)
+            {
+                bDest->bbWeight -= edgeWeight;
+            }
+            else
+            {
+                bDest->bbWeight = BB_ZERO_WEIGHT;
+                bDest->bbFlags |= BBF_RUN_RARELY; // Set the RarelyRun flag
+            }
+
+            flowList* edge2 = fgGetPredForBlock(bDest->bbJumpDest, bDest);
+
+            if (edge2 != nullptr)
+            {
+                //
+                // Update the edge2 min/max weights
+                //
+                if (edge2->flEdgeWeightMin > edge1->flEdgeWeightMin)
+                {
+                    edge2->flEdgeWeightMin -= edge1->flEdgeWeightMin;
+                }
+                else
+                {
+                    edge2->flEdgeWeightMin = BB_ZERO_WEIGHT;
+                }
+
+                if (edge2->flEdgeWeightMax > edge1->flEdgeWeightMin)
+                {
+                    edge2->flEdgeWeightMax -= edge1->flEdgeWeightMin;
+                }
+                else
+                {
+                    edge2->flEdgeWeightMax = BB_ZERO_WEIGHT;
+                }
+            }
+        }
+
+        // Optimize the JUMP to empty unconditional JUMP to go to the new target
+        block->bbJumpDest = bDest->bbJumpDest;
+
+        fgAddRefPred(bDest->bbJumpDest, block, fgRemoveRefPred(bDest, block));
+
+        return true;
+    }
+    return false;
+}
+
+// fgOptimizeEmptyBlock:
+//   Does flow optimization of an empty block (can remove it in some cases)
+//
+//  Args:
+//      block: an empty block
+//  Returns: true if we changed the code
+
+bool Compiler::fgOptimizeEmptyBlock(BasicBlock* block)
+{
+    assert(block->isEmpty());
+
+    BasicBlock* bPrev = block->bbPrev;
+
+    switch (block->bbJumpKind)
+    {
+        case BBJ_COND:
+        case BBJ_SWITCH:
+        case BBJ_THROW:
+
+            /* can never happen */
+            noway_assert(!"Conditional, switch, or throw block with empty body!");
+            break;
+
+        case BBJ_CALLFINALLY:
+        case BBJ_RETURN:
+        case BBJ_EHCATCHRET:
+        case BBJ_EHFINALLYRET:
+        case BBJ_EHFILTERRET:
+
+            /* leave them as is */
+            /* some compilers generate multiple returns and put all of them at the end -
+             * to solve that we need the predecessor list */
+
+            break;
+
+        case BBJ_ALWAYS:
+
+            // A GOTO cannot be to the next block since that
+            // should have been fixed by the  optimization above
+            // An exception is made for a jump from Hot to Cold
+            noway_assert(block->bbJumpDest != block->bbNext || ((bPrev != nullptr) && bPrev->isBBCallAlwaysPair()) ||
+                         fgInDifferentRegions(block, block->bbNext));
+
+            /* Cannot remove the first BB */
+            if (!bPrev)
+            {
+                break;
+            }
+
+            /* Do not remove a block that jumps to itself - used for while (true){} */
+            if (block->bbJumpDest == block)
+            {
+                break;
+            }
+
+            /* Empty GOTO can be removed iff bPrev is BBJ_NONE */
+            if (bPrev->bbJumpKind != BBJ_NONE)
+            {
+                break;
+            }
+
+            // can't allow fall through into cold code
+            if (block->bbNext == fgFirstColdBlock)
+            {
+                break;
+            }
+
+            /* Can fall through since this is similar with removing
+             * a BBJ_NONE block, only the successor is different */
+
+            __fallthrough;
+
+        case BBJ_NONE:
+
+            /* special case if this is the first BB */
+            if (!bPrev)
+            {
+                assert(block == fgFirstBB);
+            }
+            else
+            {
+                /* If this block follows a BBJ_CALLFINALLY do not remove it
+                 * (because we don't know who may jump to it) */
+                if (bPrev->bbJumpKind == BBJ_CALLFINALLY)
+                {
+                    break;
+                }
+            }
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+            /* Don't remove finally targets */
+            if (block->bbFlags & BBF_FINALLY_TARGET)
+                break;
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+
+#if FEATURE_EH_FUNCLETS
+            /* Don't remove an empty block that is in a different EH region
+             * from its successor block, if the block is the target of a
+             * catch return. It is required that the return address of a
+             * catch be in the correct EH region, for re-raise of thread
+             * abort exceptions to work. Insert a NOP in the empty block
+             * to ensure we generate code for the block, if we keep it.
+             */
+            {
+                BasicBlock* succBlock;
+
+                if (block->bbJumpKind == BBJ_ALWAYS)
+                {
+                    succBlock = block->bbJumpDest;
+                }
+                else
+                {
+                    succBlock = block->bbNext;
+                }
+
+                if ((succBlock != nullptr) && !BasicBlock::sameEHRegion(block, succBlock))
+                {
+                    // The empty block and the block that follows it are in different
+                    // EH regions. Is this a case where they can't be merged?
+
+                    bool okToMerge = true; // assume it's ok
+                    for (flowList* pred = block->bbPreds; pred; pred = pred->flNext)
+                    {
+                        if (pred->flBlock->bbJumpKind == BBJ_EHCATCHRET)
+                        {
+                            assert(pred->flBlock->bbJumpDest == block);
+                            okToMerge = false; // we can't get rid of the empty block
+                            break;
+                        }
+                    }
+
+                    if (!okToMerge)
+                    {
+                        // Insert a NOP in the empty block to ensure we generate code
+                        // for the catchret target in the right EH region.
+                        GenTree* nop = new (this, GT_NO_OP) GenTree(GT_NO_OP, TYP_VOID);
+
+                        if (block->IsLIR())
+                        {
+                            LIR::AsRange(block).InsertAtEnd(nop);
+                        }
+                        else
+                        {
+                            GenTreePtr nopStmt = fgInsertStmtAtEnd(block, nop);
+                            fgSetStmtSeq(nopStmt);
+                            gtSetStmtInfo(nopStmt);
+                        }
+
+#ifdef DEBUG
+                        if (verbose)
+                        {
+                            printf("\nKeeping empty block BB%02u - it is the target of a catch return\n", block->bbNum);
+                        }
+#endif // DEBUG
+
+                        break; // go to the next block
+                    }
+                }
+            }
+#endif // FEATURE_EH_FUNCLETS
+
+            if (!ehCanDeleteEmptyBlock(block))
+            {
+                // We're not allowed to remove this block due to reasons related to the EH table.
+                break;
+            }
+
+            /* special case if this is the last BB */
+            if (block == fgLastBB)
+            {
+                if (!bPrev)
+                {
+                    break;
+                }
+                fgLastBB = bPrev;
+            }
+
+            /* Remove the block */
+            compCurBB = block;
+            fgRemoveBlock(block, false);
+            return true;
+
+        default:
+            noway_assert(!"Unexpected bbJumpKind");
+            break;
+    }
+    return false;
+}
+
+// fgOptimizeSwitchBranches:
+//   Does flow optimization for a switch - bypasses jumps to empty unconditional branches,
+//      and transforms degenerate switch cases like those with 1 or 2 targets
+//
+//  Args:
+//      block: BasicBlock that contains the switch
+//  Returns: true if we changed the code
+//
+bool Compiler::fgOptimizeSwitchBranches(BasicBlock* block)
+{
+    assert(block->bbJumpKind == BBJ_SWITCH);
+
+    unsigned     jmpCnt = block->bbJumpSwt->bbsCount;
+    BasicBlock** jmpTab = block->bbJumpSwt->bbsDstTab;
+    BasicBlock*  bNewDest; // the new jump target for the current switch case
+    BasicBlock*  bDest;
+    bool         returnvalue = false;
+
+    do
+    {
+    REPEAT_SWITCH:;
+        bDest    = *jmpTab;
+        bNewDest = bDest;
+
+        // Do we have a JUMP to an empty unconditional JUMP block?
+        if (bDest->isEmpty() && (bDest->bbJumpKind == BBJ_ALWAYS) &&
+            (bDest != bDest->bbJumpDest)) // special case for self jumps
+        {
+            bool optimizeJump = true;
+
+            // We do not optimize jumps between two different try regions.
+            // However jumping to a block that is not in any try region is OK
+            //
+            if (bDest->hasTryIndex() && !BasicBlock::sameTryRegion(block, bDest))
+            {
+                optimizeJump = false;
+            }
+
+            // If we are optimize using real profile weights
+            // then don't optimize a switch jump to an unconditional jump
+            // until after we have computed the edge weights
+            //
+            if (fgIsUsingProfileWeights() && !fgEdgeWeightsComputed)
+            {
+                fgNeedsUpdateFlowGraph = true;
+                optimizeJump           = false;
+            }
+
+            if (optimizeJump)
+            {
+                bNewDest = bDest->bbJumpDest;
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("\nOptimizing a switch jump to an empty block with an unconditional jump (BB%02u -> BB%02u "
+                           "-> BB%02u)\n",
+                           block->bbNum, bDest->bbNum, bNewDest->bbNum);
+                }
+#endif // DEBUG
+            }
+        }
+
+        if (bNewDest != bDest)
+        {
+            //
+            // When we optimize a branch to branch we need to update the profile weight
+            // of bDest by subtracting out the block/edge weight of the path that is being optimized.
+            //
+            if (fgIsUsingProfileWeights() && ((bDest->bbFlags & BBF_PROF_WEIGHT) != 0))
+            {
+                if (fgHaveValidEdgeWeights)
+                {
+                    flowList*            edge                = fgGetPredForBlock(bDest, block);
+                    BasicBlock::weight_t branchThroughWeight = edge->flEdgeWeightMin;
+
+                    if (bDest->bbWeight > branchThroughWeight)
+                    {
+                        bDest->bbWeight -= branchThroughWeight;
+                    }
+                    else
+                    {
+                        bDest->bbWeight = BB_ZERO_WEIGHT;
+                        bDest->bbFlags |= BBF_RUN_RARELY;
+                    }
+                }
+            }
+
+            // Update the switch jump table
+            *jmpTab = bNewDest;
+
+            // Maintain, if necessary, the set of unique targets of "block."
+            UpdateSwitchTableTarget(block, bDest, bNewDest);
+
+            fgAddRefPred(bNewDest, block, fgRemoveRefPred(bDest, block));
+
+            // we optimized a Switch label - goto REPEAT_SWITCH to follow this new jump
+            returnvalue = true;
+
+            goto REPEAT_SWITCH;
+        }
+    } while (++jmpTab, --jmpCnt);
+
+    GenTreeStmt* switchStmt = nullptr;
+    LIR::Range*  blockRange = nullptr;
+
+    GenTree* switchTree;
+    if (block->IsLIR())
+    {
+        blockRange = &LIR::AsRange(block);
+        switchTree = blockRange->LastNode();
+
+        assert(switchTree->OperGet() == GT_SWITCH_TABLE);
+    }
+    else
+    {
+        switchStmt = block->lastStmt();
+        switchTree = switchStmt->gtStmtExpr;
+
+        assert(switchTree->OperGet() == GT_SWITCH);
+    }
+
+    noway_assert(switchTree->gtType == TYP_VOID);
+
+    // At this point all of the case jump targets have been updated such
+    // that none of them go to block that is an empty unconditional block
+    //
+    jmpTab = block->bbJumpSwt->bbsDstTab;
+    jmpCnt = block->bbJumpSwt->bbsCount;
+    // Now check for two trivial switch jumps.
+    //
+    if (block->NumSucc(this) == 1)
+    {
+        // Use BBJ_ALWAYS for a switch with only a default clause, or with only one unique successor.
+        BasicBlock* uniqueSucc = jmpTab[0];
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\nRemoving a switch jump with a single target (BB%02u)\n", block->bbNum);
+            printf("BEFORE:\n");
+        }
+#endif // DEBUG
+
+        if (block->IsLIR())
+        {
+            bool               isClosed;
+            unsigned           sideEffects;
+            LIR::ReadOnlyRange switchTreeRange = blockRange->GetTreeRange(switchTree, &isClosed, &sideEffects);
+
+            // The switch tree should form a contiguous, side-effect free range by construction. See
+            // Lowering::LowerSwitch for details.
+            assert(isClosed);
+            assert((sideEffects & GTF_ALL_EFFECT) == 0);
+
+            blockRange->Delete(this, block, std::move(switchTreeRange));
+        }
+        else
+        {
+            /* check for SIDE_EFFECTS */
+            if (switchTree->gtFlags & GTF_SIDE_EFFECT)
+            {
+                /* Extract the side effects from the conditional */
+                GenTreePtr sideEffList = nullptr;
+
+                gtExtractSideEffList(switchTree, &sideEffList);
+
+                if (sideEffList == nullptr)
+                {
+                    goto NO_SWITCH_SIDE_EFFECT;
+                }
+
+                noway_assert(sideEffList->gtFlags & GTF_SIDE_EFFECT);
+
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("\nSwitch expression has side effects! Extracting side effects...\n");
+                    gtDispTree(switchTree);
+                    printf("\n");
+                    gtDispTree(sideEffList);
+                    printf("\n");
+                }
+#endif // DEBUG
+
+                /* Replace the conditional statement with the list of side effects */
+                noway_assert(sideEffList->gtOper != GT_STMT);
+                noway_assert(sideEffList->gtOper != GT_SWITCH);
+
+                switchStmt->gtStmtExpr = sideEffList;
+
+                if (fgStmtListThreaded)
+                {
+                    /* Update the lclvar ref counts */
+                    compCurBB = block;
+                    fgUpdateRefCntForExtract(switchTree, sideEffList);
+
+                    /* Update ordering, costs, FP levels, etc. */
+                    gtSetStmtInfo(switchStmt);
+
+                    /* Re-link the nodes for this statement */
+                    fgSetStmtSeq(switchStmt);
+                }
+            }
+            else
+            {
+
+            NO_SWITCH_SIDE_EFFECT:
+
+                /* conditional has NO side effect - remove it */
+                fgRemoveStmt(block, switchStmt);
+            }
+        }
+
+        // Change the switch jump into a BBJ_ALWAYS
+        block->bbJumpDest = block->bbJumpSwt->bbsDstTab[0];
+        block->bbJumpKind = BBJ_ALWAYS;
+        if (jmpCnt > 1)
+        {
+            for (unsigned i = 1; i < jmpCnt; ++i)
+            {
+                (void)fgRemoveRefPred(jmpTab[i], block);
+            }
+        }
+
+        return true;
+    }
+    else if (block->bbJumpSwt->bbsCount == 2 && block->bbJumpSwt->bbsDstTab[1] == block->bbNext)
+    {
+        /* Use a BBJ_COND(switchVal==0) for a switch with only one
+           significant clause besides the default clause, if the
+           default clause is bbNext */
+        GenTree* switchVal = switchTree->gtOp.gtOp1;
+        noway_assert(genActualTypeIsIntOrI(switchVal->TypeGet()));
+
+        // If we are in LIR, remove the jump table from the block.
+        if (block->IsLIR())
+        {
+            GenTree* jumpTable = switchTree->gtOp.gtOp2;
+            assert(jumpTable->OperGet() == GT_JMPTABLE);
+            blockRange->Remove(jumpTable);
+        }
+
+        // Change the GT_SWITCH(switchVal) into GT_JTRUE(GT_EQ(switchVal==0)).
+        // Also mark the node as GTF_DONT_CSE as further down JIT is not capable of handling it.
+        // For example CSE could determine that the expression rooted at GT_EQ is a candidate cse and
+        // replace it with a COMMA node.  In such a case we will end up with GT_JTRUE node pointing to
+        // a COMMA node which results in noway asserts in fgMorphSmpOp(), optAssertionGen() and rpPredictTreeRegUse().
+        // For the same reason fgMorphSmpOp() marks GT_JTRUE nodes with RELOP children as GTF_DONT_CSE.
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\nConverting a switch (BB%02u) with only one significant clause besides a default target to a "
+                   "conditional branch\n",
+                   block->bbNum);
+        }
+#endif // DEBUG
+
+        switchTree->ChangeOper(GT_JTRUE);
+        GenTree* zeroConstNode = gtNewZeroConNode(genActualType(switchVal->TypeGet()));
+        GenTree* condNode      = gtNewOperNode(GT_EQ, TYP_INT, switchVal, zeroConstNode);
+        switchTree->gtOp.gtOp1 = condNode;
+        switchTree->gtOp.gtOp1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE);
+
+        if (block->IsLIR())
+        {
+            blockRange->InsertAfter(switchVal, zeroConstNode, condNode);
+        }
+        else
+        {
+            // Re-link the nodes for this statement.
+            fgSetStmtSeq(switchStmt);
+        }
+
+        block->bbJumpDest = block->bbJumpSwt->bbsDstTab[0];
+        block->bbJumpKind = BBJ_COND;
+
+        return true;
+    }
+    return returnvalue;
+}
+
+// fgBlockEndFavorsTailDuplication:
+//     Heuristic function that returns true if this block ends in a statement that looks favorable
+//     for tail-duplicating its successor (such as assigning a constant to a local).
+//  Args:
+//      block: BasicBlock we are considering duplicating the successor of
+//  Returns:
+//      true if it seems like a good idea
+//
+bool Compiler::fgBlockEndFavorsTailDuplication(BasicBlock* block)
+{
+    if (block->isRunRarely())
+    {
+        return false;
+    }
+
+    if (!block->lastStmt())
+    {
+        return false;
+    }
+    else
+    {
+        // Tail duplication tends to pay off when the last statement
+        // is an assignment of a constant, arraylength, or a relop.
+        // This is because these statements produce information about values
+        // that would otherwise be lost at the upcoming merge point.
+
+        GenTreeStmt* lastStmt = block->lastStmt();
+        GenTree*     tree     = lastStmt->gtStmtExpr;
+        if (tree->gtOper != GT_ASG)
+        {
+            return false;
+        }
+
+        if (tree->OperIsBlkOp())
+        {
+            return false;
+        }
+
+        GenTree* op2 = tree->gtOp.gtOp2;
+        if (op2->gtOper != GT_ARR_LENGTH && !op2->OperIsConst() && ((op2->OperKind() & GTK_RELOP) == 0))
+        {
+            return false;
+        }
+    }
+    return true;
+}
+
+// fgBlockIsGoodTailDuplicationCandidate:
+//     Heuristic function that examines a block (presumably one that is a merge point) to determine
+//     if it should be duplicated.
+// args:
+//     target - the tail block (candidate for duplication)
+// returns:
+//     true if this block seems like a good candidate for duplication
+//
+bool Compiler::fgBlockIsGoodTailDuplicationCandidate(BasicBlock* target)
+{
+    GenTreeStmt* stmt = target->FirstNonPhiDef();
+
+    // Here we are looking for blocks with a single statement feeding a conditional branch.
+    // These blocks are small, and when duplicated onto the tail of blocks that end in
+    // assignments, there is a high probability of the branch completely going away.
+
+    // This is by no means the only kind of tail that it is beneficial to duplicate,
+    // just the only one we recognize for now.
+
+    if (stmt != target->lastStmt())
+    {
+        return false;
+    }
+
+    if (target->bbJumpKind != BBJ_COND)
+    {
+        return false;
+    }
+
+    GenTree* tree = stmt->gtStmtExpr;
+
+    if (tree->gtOper != GT_JTRUE)
+    {
+        return false;
+    }
+
+    // must be some kind of relational operator
+    GenTree* cond = tree->gtOp.gtOp1;
+    if (!(cond->OperKind() & GTK_RELOP))
+    {
+        return false;
+    }
+
+    // op1 must be some combinations of casts of local or constant
+    GenTree* op1 = cond->gtOp.gtOp1;
+    while (op1->gtOper == GT_CAST)
+    {
+        op1 = op1->gtOp.gtOp1;
+    }
+    if (!op1->IsLocal() && !op1->OperIsConst())
+    {
+        return false;
+    }
+
+    // op2 must be some combinations of casts of local or constant
+    GenTree* op2 = cond->gtOp.gtOp2;
+    while (op2->gtOper == GT_CAST)
+    {
+        op2 = op2->gtOp.gtOp1;
+    }
+    if (!op2->IsLocal() && !op2->OperIsConst())
+    {
+        return false;
+    }
+
+    return true;
+}
+
+// fgOptimizeUncondBranchToSimpleCond:
+//    For a block which has an unconditional branch, look to see if its target block
+//    is a good candidate for tail duplication, and if so do that duplication.
+//
+// Args:
+//    block  - block with uncond branch
+//    target - block which is target of first block
+//
+// returns: true if changes were made
+
+bool Compiler::fgOptimizeUncondBranchToSimpleCond(BasicBlock* block, BasicBlock* target)
+{
+    assert(block->bbJumpKind == BBJ_ALWAYS);
+    assert(block->bbJumpDest == target);
+
+    // TODO-Review: OK if they are in the same region?
+    if (compHndBBtabCount > 0)
+    {
+        return false;
+    }
+
+    if (!fgBlockIsGoodTailDuplicationCandidate(target))
+    {
+        return false;
+    }
+
+    if (!fgBlockEndFavorsTailDuplication(block))
+    {
+        return false;
+    }
+
+    // NOTE: we do not currently hit this assert because this function is only called when
+    // `fgUpdateFlowGraph` has been called with `doTailDuplication` set to true, and the
+    // backend always calls `fgUpdateFlowGraph` with `doTailDuplication` set to false.
+    assert(!block->IsLIR());
+
+    GenTreeStmt* stmt = target->FirstNonPhiDef();
+    assert(stmt == target->lastStmt());
+
+    // Duplicate the target block at the end of this block
+
+    GenTree* cloned = gtCloneExpr(stmt->gtStmtExpr);
+    noway_assert(cloned);
+    GenTree* jmpStmt = gtNewStmt(cloned);
+
+    block->bbJumpKind = BBJ_COND;
+    block->bbJumpDest = target->bbJumpDest;
+    fgAddRefPred(block->bbJumpDest, block);
+    fgRemoveRefPred(target, block);
+
+    // add an unconditional block after this block to jump to the target block's fallthrough block
+
+    BasicBlock* next = fgNewBBafter(BBJ_ALWAYS, block, true);
+    next->bbFlags    = block->bbFlags | BBF_INTERNAL;
+    next->bbFlags &= ~(BBF_TRY_BEG | BBF_LOOP_HEAD | BBF_LOOP_CALL0 | BBF_LOOP_CALL1 | BBF_HAS_LABEL | BBF_JMP_TARGET |
+                       BBF_FUNCLET_BEG | BBF_LOOP_PREHEADER | BBF_KEEP_BBJ_ALWAYS);
+
+    next->bbJumpDest = target->bbNext;
+    target->bbNext->bbFlags |= BBF_JMP_TARGET;
+    fgAddRefPred(next, block);
+    fgAddRefPred(next->bbJumpDest, next);
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("fgOptimizeUncondBranchToSimpleCond(from BB%02u to cond BB%02u), created new uncond BB%02u\n",
+               block->bbNum, target->bbNum, next->bbNum);
+    }
+#endif // DEBUG
+
+    if (fgStmtListThreaded)
+    {
+        gtSetStmtInfo(jmpStmt);
+    }
+
+    fgInsertStmtAtEnd(block, jmpStmt);
+
+    return true;
+}
+
+// fgOptimizeBranchToNext:
+//    Optimize a block which has a branch to the following block
+// Args:
+//    block - block with a branch
+//    bNext - block which is both next and the target of the first block
+//    bPrev - block which is prior to the first block
+//
+// returns: true if changes were made
+//
+bool Compiler::fgOptimizeBranchToNext(BasicBlock* block, BasicBlock* bNext, BasicBlock* bPrev)
+{
+    assert(block->bbJumpKind == BBJ_COND || block->bbJumpKind == BBJ_ALWAYS);
+    assert(block->bbJumpDest == bNext);
+    assert(block->bbNext = bNext);
+    assert(block->bbPrev == bPrev);
+
+    if (block->bbJumpKind == BBJ_ALWAYS)
+    {
+        // We can't remove it if it is a branch from hot => cold
+        if (!fgInDifferentRegions(block, bNext))
+        {
+            // We can't remove if it is marked as BBF_KEEP_BBJ_ALWAYS
+            if (!(block->bbFlags & BBF_KEEP_BBJ_ALWAYS))
+            {
+                // We can't remove if the BBJ_ALWAYS is part of a BBJ_CALLFINALLY pair
+                if ((bPrev == nullptr) || !bPrev->isBBCallAlwaysPair())
+                {
+                    /* the unconditional jump is to the next BB  */
+                    block->bbJumpKind = BBJ_NONE;
+                    block->bbFlags &= ~BBF_NEEDS_GCPOLL;
+#ifdef DEBUG
+                    if (verbose)
+                    {
+                        printf("\nRemoving unconditional jump to next block (BB%02u -> BB%02u) (converted BB%02u to "
+                               "fall-through)\n",
+                               block->bbNum, bNext->bbNum, block->bbNum);
+                    }
+#endif // DEBUG
+                    return true;
+                }
+            }
+        }
+    }
+    else
+    {
+        /* remove the conditional statement at the end of block */
+        noway_assert(block->bbJumpKind == BBJ_COND);
+        noway_assert(block->bbTreeList);
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\nRemoving conditional jump to next block (BB%02u -> BB%02u)\n", block->bbNum, bNext->bbNum);
+        }
+#endif // DEBUG
+
+        if (block->IsLIR())
+        {
+            LIR::Range& blockRange = LIR::AsRange(block);
+            GenTree*    jmp        = blockRange.LastNode();
+            assert(jmp->OperGet() == GT_JTRUE);
+
+            bool               isClosed;
+            unsigned           sideEffects;
+            LIR::ReadOnlyRange jmpRange = blockRange.GetTreeRange(jmp, &isClosed, &sideEffects);
+
+            // TODO-LIR: this should really be checking GTF_ALL_EFFECT, but that produces unacceptable
+            //            diffs compared to the existing backend.
+            if (isClosed && ((sideEffects & GTF_SIDE_EFFECT) == 0))
+            {
+                // If the jump and its operands form a contiguous, side-effect-free range,
+                // remove them.
+                blockRange.Delete(this, block, std::move(jmpRange));
+            }
+            else
+            {
+                // Otherwise, just remove the jump node itself.
+                blockRange.Remove(jmp);
+            }
+        }
+        else
+        {
+            GenTreeStmt* cond = block->lastStmt();
+            noway_assert(cond->gtStmtExpr->gtOper == GT_JTRUE);
+
+            /* check for SIDE_EFFECTS */
+            if (cond->gtStmtExpr->gtFlags & GTF_SIDE_EFFECT)
+            {
+                /* Extract the side effects from the conditional */
+                GenTreePtr sideEffList = nullptr;
+
+                gtExtractSideEffList(cond->gtStmtExpr, &sideEffList);
+
+                if (sideEffList == nullptr)
+                {
+                    compCurBB = block;
+                    fgRemoveStmt(block, cond);
+                }
+                else
+                {
+                    noway_assert(sideEffList->gtFlags & GTF_SIDE_EFFECT);
+#ifdef DEBUG
+                    if (verbose)
+                    {
+                        printf("\nConditional has side effects! Extracting side effects...\n");
+                        gtDispTree(cond);
+                        printf("\n");
+                        gtDispTree(sideEffList);
+                        printf("\n");
+                    }
+#endif // DEBUG
+
+                    /* Replace the conditional statement with the list of side effects */
+                    noway_assert(sideEffList->gtOper != GT_STMT);
+                    noway_assert(sideEffList->gtOper != GT_JTRUE);
+
+                    cond->gtStmtExpr = sideEffList;
+
+                    if (fgStmtListThreaded)
+                    {
+                        /* Update the lclvar ref counts */
+                        compCurBB = block;
+                        fgUpdateRefCntForExtract(cond->gtStmtExpr, sideEffList);
+
+                        /* Update ordering, costs, FP levels, etc. */
+                        gtSetStmtInfo(cond);
+
+                        /* Re-link the nodes for this statement */
+                        fgSetStmtSeq(cond);
+                    }
+                }
+            }
+            else
+            {
+                compCurBB = block;
+                /* conditional has NO side effect - remove it */
+                fgRemoveStmt(block, cond);
+            }
+        }
+
+        /* Conditional is gone - simply fall into the next block */
+
+        block->bbJumpKind = BBJ_NONE;
+        block->bbFlags &= ~BBF_NEEDS_GCPOLL;
+
+        /* Update bbRefs and bbNum - Conditional predecessors to the same
+         * block are counted twice so we have to remove one of them */
+
+        noway_assert(bNext->countOfInEdges() > 1);
+        fgRemoveRefPred(bNext, block);
+
+        return true;
+    }
+    return false;
+}
+
+/*****************************************************************************
+ *
+ *  Function called to optimize an unconditional branch that branches
+ *  to a conditional branch.
+ *  Currently we require that the conditional branch jump back to the
+ *  block that follows the unconditional branch.
+ *
+ *  We can improve the code execution and layout by concatenating a copy
+ *  of the conditional branch block at the end of the conditional branch
+ *  and reversing the sense of the branch.
+ *
+ *  This is only done when the amount of code to be copied is smaller than
+ *  our calculated threshold in maxDupCostSz.
+ *
+ */
+
+bool Compiler::fgOptimizeBranch(BasicBlock* bJump)
+{
+    if (opts.MinOpts())
+    {
+        return false;
+    }
+
+    if (bJump->bbJumpKind != BBJ_ALWAYS)
+    {
+        return false;
+    }
+
+    if (bJump->bbFlags & BBF_KEEP_BBJ_ALWAYS)
+    {
+        return false;
+    }
+
+    // Don't hoist a conditional branch into the scratch block; we'd prefer it stay
+    // either BBJ_NONE or BBJ_ALWAYS.
+    if (fgBBisScratch(bJump))
+    {
+        return false;
+    }
+
+    BasicBlock* bDest = bJump->bbJumpDest;
+
+    if (bDest->bbJumpKind != BBJ_COND)
+    {
+        return false;
+    }
+
+    if (bDest->bbJumpDest != bJump->bbNext)
+    {
+        return false;
+    }
+
+    // 'bJump' must be in the same try region as the condition, since we're going to insert
+    // a duplicated condition in 'bJump', and the condition might include exception throwing code.
+    if (!BasicBlock::sameTryRegion(bJump, bDest))
+    {
+        return false;
+    }
+
+    // do not jump into another try region
+    BasicBlock* bDestNext = bDest->bbNext;
+    if (bDestNext->hasTryIndex() && !BasicBlock::sameTryRegion(bJump, bDestNext))
+    {
+        return false;
+    }
+
+    // This function is only called by fgReorderBlocks, which we do not run in the backend.
+    // If we wanted to run block reordering in the backend, we would need to be able to
+    // calculate cost information for LIR on a per-node basis in order for this function
+    // to work.
+    assert(!bJump->IsLIR());
+    assert(!bDest->IsLIR());
+
+    GenTreeStmt* stmt;
+    unsigned     estDupCostSz = 0;
+    for (stmt = bDest->firstStmt(); stmt; stmt = stmt->gtNextStmt)
+    {
+        GenTreePtr expr = stmt->gtStmtExpr;
+
+        /* We call gtPrepareCost to measure the cost of duplicating this tree */
+        gtPrepareCost(expr);
+
+        estDupCostSz += expr->gtCostSz;
+    }
+
+    bool                 allProfileWeightsAreValid = false;
+    BasicBlock::weight_t weightJump                = bJump->bbWeight;
+    BasicBlock::weight_t weightDest                = bDest->bbWeight;
+    BasicBlock::weight_t weightNext                = bJump->bbNext->bbWeight;
+    bool                 rareJump                  = bJump->isRunRarely();
+    bool                 rareDest                  = bDest->isRunRarely();
+    bool                 rareNext                  = bJump->bbNext->isRunRarely();
+
+    // If we have profile data then we calculate the number of time
+    // the loop will iterate into loopIterations
+    if (fgIsUsingProfileWeights())
+    {
+        // Only rely upon the profile weight when all three of these blocks
+        // have either good profile weights or are rarelyRun
+        //
+        if ((bJump->bbFlags & (BBF_PROF_WEIGHT | BBF_RUN_RARELY)) &&
+            (bDest->bbFlags & (BBF_PROF_WEIGHT | BBF_RUN_RARELY)) &&
+            (bJump->bbNext->bbFlags & (BBF_PROF_WEIGHT | BBF_RUN_RARELY)))
+        {
+            allProfileWeightsAreValid = true;
+
+            if ((weightJump * 100) < weightDest)
+            {
+                rareJump = true;
+            }
+
+            if ((weightNext * 100) < weightDest)
+            {
+                rareNext = true;
+            }
+
+            if (((weightDest * 100) < weightJump) && ((weightDest * 100) < weightNext))
+            {
+                rareDest = true;
+            }
+        }
+    }
+
+    unsigned maxDupCostSz = 6;
+
+    //
+    // Branches between the hot and rarely run regions
+    // should be minimized.  So we allow a larger size
+    //
+    if (rareDest != rareJump)
+    {
+        maxDupCostSz += 6;
+    }
+
+    if (rareDest != rareNext)
+    {
+        maxDupCostSz += 6;
+    }
+
+    //
+    // We we are ngen-ing:
+    // If the uncondional branch is a rarely run block then
+    // we are willing to have more code expansion since we
+    // won't be running code from this page
+    //
+    if (opts.eeFlags & CORJIT_FLG_PREJIT)
+    {
+        if (rareJump)
+        {
+            maxDupCostSz *= 2;
+        }
+    }
+
+    // If the compare has too high cost then we don't want to dup
+
+    bool costIsTooHigh = (estDupCostSz > maxDupCostSz);
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nDuplication of the conditional block BB%02u (always branch from BB%02u) %s, because the cost of "
+               "duplication (%i) is %s than %i,"
+               " validProfileWeights = %s\n",
+               bDest->bbNum, bJump->bbNum, costIsTooHigh ? "not done" : "performed", estDupCostSz,
+               costIsTooHigh ? "greater" : "less or equal", maxDupCostSz, allProfileWeightsAreValid ? "true" : "false");
+    }
+#endif // DEBUG
+
+    if (costIsTooHigh)
+    {
+        return false;
+    }
+
+    /* Looks good - duplicate the conditional block */
+
+    GenTree* newStmtList     = nullptr; // new stmt list to be added to bJump
+    GenTree* newStmtLast     = nullptr;
+    bool     cloneExprFailed = false;
+
+    /* Visit all the statements in bDest */
+
+    for (GenTree* curStmt = bDest->bbTreeList; curStmt; curStmt = curStmt->gtNext)
+    {
+        /* Clone/substitute the expression */
+
+        stmt = gtCloneExpr(curStmt)->AsStmt();
+
+        // cloneExpr doesn't handle everything
+
+        if (stmt == nullptr)
+        {
+            cloneExprFailed = true;
+            break;
+        }
+
+        /* Append the expression to our list */
+
+        if (newStmtList != nullptr)
+        {
+            newStmtLast->gtNext = stmt;
+        }
+        else
+        {
+            newStmtList = stmt;
+        }
+
+        stmt->gtPrev = newStmtLast;
+        newStmtLast  = stmt;
+    }
+
+    if (cloneExprFailed)
+    {
+        return false;
+    }
+
+    noway_assert(newStmtLast != nullptr);
+    noway_assert(stmt != nullptr);
+    noway_assert(stmt->gtOper == GT_STMT);
+
+    if ((newStmtLast == nullptr) || (stmt == nullptr) || (stmt->gtOper != GT_STMT))
+    {
+        return false;
+    }
+
+    /* Get to the condition node from the statement tree */
+
+    GenTreePtr condTree = stmt->gtStmtExpr;
+    noway_assert(condTree->gtOper == GT_JTRUE);
+
+    if (condTree->gtOper != GT_JTRUE)
+    {
+        return false;
+    }
+
+    //
+    // Set condTree to the operand to the GT_JTRUE
+    //
+    condTree = condTree->gtOp.gtOp1;
+
+    //
+    // This condTree has to be a RelOp comparison
+    //
+    if (condTree->OperIsCompare() == false)
+    {
+        return false;
+    }
+
+    // Bump up the ref-counts of any variables in 'stmt'
+    fgUpdateRefCntForClone(bJump, stmt->gtStmtExpr);
+
+    //
+    // Find the last statement in the bJump block
+    //
+    GenTreeStmt* lastStmt = nullptr;
+    for (stmt = bJump->firstStmt(); stmt; stmt = stmt->gtNextStmt)
+    {
+        lastStmt = stmt;
+    }
+    stmt = bJump->firstStmt();
+
+    /* Join the two linked lists */
+    newStmtLast->gtNext = nullptr;
+
+    if (lastStmt != nullptr)
+    {
+        stmt->gtPrev        = newStmtLast;
+        lastStmt->gtNext    = newStmtList;
+        newStmtList->gtPrev = lastStmt;
+    }
+    else
+    {
+        bJump->bbTreeList   = newStmtList;
+        newStmtList->gtPrev = newStmtLast;
+    }
+
+    //
+    // Reverse the sense of the compare
+    //
+    gtReverseCond(condTree);
+
+    bJump->bbJumpKind = BBJ_COND;
+    bJump->bbJumpDest = bDest->bbNext;
+
+    /* Mark the jump dest block as being a jump target */
+    bJump->bbJumpDest->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+
+    // We need to update the following flags of the bJump block if they were set in the bbJumpDest block
+    bJump->bbFlags |= (bJump->bbJumpDest->bbFlags &
+                       (BBF_HAS_NEWOBJ | BBF_HAS_NEWARRAY | BBF_HAS_NULLCHECK | BBF_HAS_IDX_LEN | BBF_HAS_VTABREF));
+
+    /* Update bbRefs and bbPreds */
+
+    // bJump now falls through into the next block
+    //
+    fgAddRefPred(bJump->bbNext, bJump);
+
+    // bJump no longer jumps to bDest
+    //
+    fgRemoveRefPred(bDest, bJump);
+
+    // bJump now jumps to bDest->bbNext
+    //
+    fgAddRefPred(bDest->bbNext, bJump);
+
+    if (weightJump > 0)
+    {
+        if (allProfileWeightsAreValid)
+        {
+            if (weightDest > weightJump)
+            {
+                bDest->bbWeight = (weightDest - weightJump);
+            }
+            else if (!bDest->isRunRarely())
+            {
+                bDest->bbWeight = BB_UNITY_WEIGHT;
+            }
+        }
+        else
+        {
+            BasicBlock::weight_t newWeightDest    = 0;
+            BasicBlock::weight_t unloopWeightDest = 0;
+
+            if (weightDest > weightJump)
+            {
+                newWeightDest = (weightDest - weightJump);
+            }
+            if (weightDest >= (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT) / 2)
+            {
+                newWeightDest = (weightDest * 2) / (BB_LOOP_WEIGHT * BB_UNITY_WEIGHT);
+            }
+            if ((newWeightDest > 0) || (unloopWeightDest > 0))
+            {
+                bDest->bbWeight = Max(newWeightDest, unloopWeightDest);
+            }
+        }
+    }
+
+#if DEBUG
+    if (verbose)
+    {
+        printf("\nAfter this change in fgOptimizeBranch");
+        fgDispBasicBlocks(verboseTrees);
+        printf("\n");
+    }
+#endif // DEBUG
+
+    return true;
+}
+
+/*****************************************************************************
+ *
+ *  Function called to optimize switch statements
+ */
+
+bool Compiler::fgOptimizeSwitchJumps()
+{
+    bool result = false; // Our return value
+
+#if 0
+    // TODO-CQ: Add switch jump optimizations?
+    if (!fgHasSwitch)
+        return false;
+
+    if (!fgHaveValidEdgeWeights)
+        return false;
+
+    for (BasicBlock* bSrc = fgFirstBB; bSrc != NULL; bSrc = bSrc->bbNext)
+    {
+        if (bSrc->bbJumpKind == BBJ_SWITCH)
+        {
+            unsigned        jumpCnt; jumpCnt = bSrc->bbJumpSwt->bbsCount;
+            BasicBlock**    jumpTab; jumpTab = bSrc->bbJumpSwt->bbsDstTab;
+
+            do
+            {
+                BasicBlock*   bDst       = *jumpTab;
+                flowList*     edgeToDst  = fgGetPredForBlock(bDst, bSrc);
+                double        outRatio   = (double) edgeToDst->flEdgeWeightMin  / (double) bSrc->bbWeight;
+
+                if (outRatio >= 0.60)
+                {
+                    // straighten switch here...
+                }
+            }
+            while (++jumpTab, --jumpCnt);
+        }
+    }
+#endif
+
+    return result;
+}
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+/*****************************************************************************
+ *
+ *  Function called to reorder the flowgraph of BasicBlocks such that any
+ *  rarely run blocks are placed at the end of the block list.
+ *  If we have profile information we also use that information to reverse
+ *  all conditional jumps that would benefit.
+ */
+
+void Compiler::fgReorderBlocks()
+{
+    noway_assert(opts.compDbgCode == false);
+
+#if FEATURE_EH_FUNCLETS
+    assert(fgFuncletsCreated);
+#endif // FEATURE_EH_FUNCLETS
+
+    // We can't relocate anything if we only have one block
+    if (fgFirstBB->bbNext == nullptr)
+    {
+        return;
+    }
+
+    bool newRarelyRun      = false;
+    bool movedBlocks       = false;
+    bool optimizedSwitches = false;
+
+    // First let us expand the set of run rarely blocks
+    newRarelyRun |= fgExpandRarelyRunBlocks();
+
+#if !FEATURE_EH_FUNCLETS
+    movedBlocks |= fgRelocateEHRegions();
+#endif // !FEATURE_EH_FUNCLETS
+
+    //
+    // If we are using profile weights we can change some
+    // switch jumps into conditional test and jump
+    //
+    if (fgIsUsingProfileWeights())
+    {
+        //
+        // Note that this is currently not yet implemented
+        //
+        optimizedSwitches = fgOptimizeSwitchJumps();
+        if (optimizedSwitches)
+        {
+            fgUpdateFlowGraph();
+        }
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In fgReorderBlocks()\n");
+
+        printf("\nInitial BasicBlocks");
+        fgDispBasicBlocks(verboseTrees);
+        printf("\n");
+    }
+#endif // DEBUG
+
+    BasicBlock* bNext;
+    BasicBlock* bPrev;
+    BasicBlock* block;
+    unsigned    XTnum;
+    EHblkDsc*   HBtab;
+
+    // Iterate over every block, remembering our previous block in bPrev
+    for (bPrev = fgFirstBB, block = bPrev->bbNext; block != nullptr; bPrev = block, block = block->bbNext)
+    {
+        //
+        // Consider relocating the rarely run blocks such that they are at the end of the method.
+        // We also consider reversing conditional branches so that they become a not taken forwards branch.
+        //
+
+        // If block is marked with a BBF_KEEP_BBJ_ALWAYS flag then we don't move the block
+        if ((block->bbFlags & BBF_KEEP_BBJ_ALWAYS) != 0)
+        {
+            continue;
+        }
+
+        // Finally and handlers blocks are to be kept contiguous.
+        // TODO-CQ: Allow reordering within the handler region
+        if (block->hasHndIndex() == true)
+        {
+            continue;
+        }
+
+        bool        reorderBlock   = true; // This is set to false if we decide not to reorder 'block'
+        bool        isRare         = block->isRunRarely();
+        BasicBlock* bDest          = nullptr;
+        bool        forwardBranch  = false;
+        bool        backwardBranch = false;
+
+        // Setup bDest
+        if ((bPrev->bbJumpKind == BBJ_COND) || (bPrev->bbJumpKind == BBJ_ALWAYS))
+        {
+            bDest          = bPrev->bbJumpDest;
+            forwardBranch  = fgIsForwardBranch(bPrev);
+            backwardBranch = !forwardBranch;
+        }
+
+        // We will look for bPrev as a non rarely run block followed by block as a rarely run block
+        //
+        if (bPrev->isRunRarely())
+        {
+            reorderBlock = false;
+        }
+
+        // If the weights of the bPrev, block and bDest were all obtained from a profile run
+        // then we can use them to decide if it is useful to reverse this conditional branch
+
+        BasicBlock::weight_t profHotWeight = -1;
+
+        if ((bPrev->bbFlags & BBF_PROF_WEIGHT) && (block->bbFlags & BBF_PROF_WEIGHT) &&
+            ((bDest == nullptr) || (bDest->bbFlags & BBF_PROF_WEIGHT)))
+        {
+            //
+            // All blocks have profile information
+            //
+            if (forwardBranch)
+            {
+                if (bPrev->bbJumpKind == BBJ_ALWAYS)
+                {
+                    // We can pull up the blocks that the unconditional jump branches to
+                    // if the weight of bDest is greater or equal to the weight of block
+                    // also the weight of bDest can't be zero.
+                    //
+                    if ((bDest->bbWeight < block->bbWeight) || (bDest->bbWeight == 0))
+                    {
+                        reorderBlock = false;
+                    }
+                    else
+                    {
+                        //
+                        // If this remains true then we will try to pull up bDest to succeed bPrev
+                        //
+                        bool moveDestUp = true;
+
+                        if (fgHaveValidEdgeWeights)
+                        {
+                            //
+                            // The edge bPrev -> bDest must have a higher minimum weight
+                            // than every other edge into bDest
+                            //
+                            flowList* edgeFromPrev = fgGetPredForBlock(bDest, bPrev);
+                            noway_assert(edgeFromPrev != nullptr);
+
+                            // Examine all of the other edges into bDest
+                            for (flowList* edge = bDest->bbPreds; edge != nullptr; edge = edge->flNext)
+                            {
+                                if (edge != edgeFromPrev)
+                                {
+                                    if (edge->flEdgeWeightMax >= edgeFromPrev->flEdgeWeightMin)
+                                    {
+                                        moveDestUp = false;
+                                        break;
+                                    }
+                                }
+                            }
+                        }
+                        else
+                        {
+                            //
+                            // The block bPrev must have a higher weight
+                            // than every other block that goes into bDest
+                            //
+
+                            // Examine all of the other edges into bDest
+                            for (flowList* edge = bDest->bbPreds; edge != nullptr; edge = edge->flNext)
+                            {
+                                BasicBlock* bTemp = edge->flBlock;
+
+                                if ((bTemp != bPrev) && (bTemp->bbWeight >= bPrev->bbWeight))
+                                {
+                                    moveDestUp = false;
+                                    break;
+                                }
+                            }
+                        }
+
+                        // Are we still good to move bDest up to bPrev?
+                        if (moveDestUp)
+                        {
+                            //
+                            // We will consider all blocks that have less weight than profHotWeight to be
+                            // uncommonly run blocks as compared with the hot path of bPrev taken-jump to bDest
+                            //
+                            profHotWeight = bDest->bbWeight - 1;
+                        }
+                        else
+                        {
+                            if (block->isRunRarely())
+                            {
+                                // We will move any rarely run blocks blocks
+                                profHotWeight = 0;
+                            }
+                            else
+                            {
+                                // We will move all blocks that have a weight less or equal to our fall through block
+                                profHotWeight = block->bbWeight + 1;
+                            }
+                            // But we won't try to connect with bDest
+                            bDest = nullptr;
+                        }
+                    }
+                }
+                else // (bPrev->bbJumpKind == BBJ_COND)
+                {
+                    noway_assert(bPrev->bbJumpKind == BBJ_COND);
+                    //
+                    // We will reverse branch if the taken-jump to bDest ratio (i.e. 'takenRatio')
+                    // is more than 51%
+                    //
+                    // We will setup profHotWeight to be maximum bbWeight that a block
+                    // could have for us not to want to reverse the conditional branch
+                    //
+                    // We will consider all blocks that have less weight than profHotWeight to be
+                    // uncommonly run blocks as compared with the hot path of bPrev taken-jump to bDest
+                    //
+                    if (fgHaveValidEdgeWeights)
+                    {
+                        // We have valid edge weights, however even with valid edge weights
+                        // we may have a minimum and maximum range for each edges value
+                        //
+                        // We will check that the min weight of the bPrev to bDest edge
+                        //  is more than twice the max weight of the bPrev to block edge.
+                        //
+                        //                  bPrev -->   [BB04, weight 31]
+                        //                                     |         \
+                        //          edgeToBlock -------------> O          \
+                        //          [min=8,max=10]             V           \
+                        //                  block -->   [BB05, weight 10]   \
+                        //                                                   \
+                        //          edgeToDest ----------------------------> O
+                        //          [min=21,max=23]                          |
+                        //                                                   V
+                        //                  bDest --------------->   [BB08, weight 21]
+                        //
+                        flowList* edgeToDest  = fgGetPredForBlock(bDest, bPrev);
+                        flowList* edgeToBlock = fgGetPredForBlock(block, bPrev);
+                        noway_assert(edgeToDest != nullptr);
+                        noway_assert(edgeToBlock != nullptr);
+                        //
+                        // Calculate the taken ratio
+                        //   A takenRation of 0.10 means taken 10% of the time, not taken 90% of the time
+                        //   A takenRation of 0.50 means taken 50% of the time, not taken 50% of the time
+                        //   A takenRation of 0.90 means taken 90% of the time, not taken 10% of the time
+                        //
+                        double takenCount =
+                            ((double)edgeToDest->flEdgeWeightMin + (double)edgeToDest->flEdgeWeightMax) / 2.0;
+                        double notTakenCount =
+                            ((double)edgeToBlock->flEdgeWeightMin + (double)edgeToBlock->flEdgeWeightMax) / 2.0;
+                        double totalCount = takenCount + notTakenCount;
+                        double takenRatio = takenCount / totalCount;
+
+                        // If the takenRatio is greater or equal to 51% then we will reverse the branch
+                        if (takenRatio < 0.51)
+                        {
+                            reorderBlock = false;
+                        }
+                        else
+                        {
+                            // set profHotWeight
+                            profHotWeight = (edgeToBlock->flEdgeWeightMin + edgeToBlock->flEdgeWeightMax) / 2 - 1;
+                        }
+                    }
+                    else
+                    {
+                        // We don't have valid edge weight so we will be more conservative
+                        // We could have bPrev, block or bDest as part of a loop and thus have extra weight
+                        //
+                        // We will do two checks:
+                        //   1. Check that the weight of bDest is at least two times more than block
+                        //   2. Check that the weight of bPrev is at least three times more than block
+                        //
+                        //                  bPrev -->   [BB04, weight 31]
+                        //                                     |         \
+                        //                                     V          \
+                        //                  block -->   [BB05, weight 10]  \
+                        //                                                  \
+                        //                                                  |
+                        //                                                  V
+                        //                  bDest --------------->   [BB08, weight 21]
+                        //
+                        //  For this case weightDest is calculated as (21+1)/2  or 11
+                        //            and weightPrev is calculated as (31+2)/3  also 11
+                        //
+                        //  Generally both weightDest and weightPrev should calculate
+                        //  the same value unless bPrev or bDest are part of a loop
+                        //
+                        BasicBlock::weight_t weightDest =
+                            bDest->isMaxBBWeight() ? bDest->bbWeight : (bDest->bbWeight + 1) / 2;
+                        BasicBlock::weight_t weightPrev =
+                            bPrev->isMaxBBWeight() ? bPrev->bbWeight : (bPrev->bbWeight + 2) / 3;
+
+                        // select the lower of weightDest and weightPrev
+                        profHotWeight = (weightDest < weightPrev) ? weightDest : weightPrev;
+
+                        // if the weight of block is greater (or equal) to profHotWeight then we don't reverse the cond
+                        if (block->bbWeight >= profHotWeight)
+                        {
+                            reorderBlock = false;
+                        }
+                    }
+                }
+            }
+            else // not a forwardBranch
+            {
+                if (bPrev->bbFallsThrough())
+                {
+                    goto CHECK_FOR_RARE;
+                }
+
+                // Here we should pull up the highest weight block remaining
+                // and place it here since bPrev does not fall through.
+
+                BasicBlock::weight_t highestWeight           = 0;
+                BasicBlock*          candidateBlock          = nullptr;
+                BasicBlock*          lastNonFallThroughBlock = bPrev;
+                BasicBlock*          bTmp                    = bPrev->bbNext;
+
+                while (bTmp != nullptr)
+                {
+                    // Don't try to split a Call/Always pair
+                    //
+                    if (bTmp->isBBCallAlwaysPair())
+                    {
+                        // Move bTmp forward
+                        bTmp = bTmp->bbNext;
+                    }
+
+                    //
+                    // Check for loop exit condition
+                    //
+                    if (bTmp == nullptr)
+                    {
+                        break;
+                    }
+
+                    //
+                    // if its weight is the highest one we've seen and
+                    //  the EH regions allow for us to place bTmp after bPrev
+                    //
+                    if ((bTmp->bbWeight > highestWeight) && fgEhAllowsMoveBlock(bPrev, bTmp))
+                    {
+                        // When we have a current candidateBlock that is a conditional (or unconditional) jump
+                        // to bTmp (which is a higher weighted block) then it is better to keep out current
+                        // candidateBlock and have it fall into bTmp
+                        //
+                        if ((candidateBlock == nullptr) ||
+                            ((candidateBlock->bbJumpKind != BBJ_COND) && (candidateBlock->bbJumpKind != BBJ_ALWAYS)) ||
+                            (candidateBlock->bbJumpDest != bTmp))
+                        {
+                            // otherwise we have a new candidateBlock
+                            //
+                            highestWeight  = bTmp->bbWeight;
+                            candidateBlock = lastNonFallThroughBlock->bbNext;
+                        }
+                    }
+
+                    if ((bTmp->bbFallsThrough() == false) || (bTmp->bbWeight == 0))
+                    {
+                        lastNonFallThroughBlock = bTmp;
+                    }
+
+                    bTmp = bTmp->bbNext;
+                }
+
+                // If we didn't find a suitable block then skip this
+                if (highestWeight == 0)
+                {
+                    reorderBlock = false;
+                }
+                else
+                {
+                    noway_assert(candidateBlock != nullptr);
+
+                    // If the candidateBlock is the same a block then skip this
+                    if (candidateBlock == block)
+                    {
+                        reorderBlock = false;
+                    }
+                    else
+                    {
+                        // Set bDest to the block that we want to come after bPrev
+                        bDest = candidateBlock;
+
+                        // set profHotWeight
+                        profHotWeight = highestWeight - 1;
+                    }
+                }
+            }
+        }
+        else // we don't have good profile info (or we are falling through)
+        {
+
+        CHECK_FOR_RARE:;
+
+            /* We only want to reorder when we have a rarely run   */
+            /* block right after a normal block,                   */
+            /* (bPrev is known to be a normal block at this point) */
+            if (!isRare)
+            {
+                reorderBlock = false;
+            }
+            else
+            {
+                /* If the jump target bDest is also a rarely run block then we don't want to do the reversal */
+                if (bDest && bDest->isRunRarely())
+                {
+                    reorderBlock = false; /* Both block and bDest are rarely run */
+                }
+                else
+                {
+                    // We will move any rarely run blocks blocks
+                    profHotWeight = 0;
+                }
+            }
+        }
+
+        if (reorderBlock == false)
+        {
+            //
+            // Check for an unconditional branch to a conditional branch
+            // which also branches back to our next block
+            //
+            if (fgOptimizeBranch(bPrev))
+            {
+                noway_assert(bPrev->bbJumpKind == BBJ_COND);
+            }
+            continue;
+        }
+
+        //  Now we need to determine which blocks should be moved
+        //
+        //  We consider one of two choices:
+        //
+        //  1. Moving the fall-through blocks (or rarely run blocks) down to
+        //     later in the method and hopefully connecting the jump dest block
+        //     so that it becomes the fall through block
+        //
+        //  And when bDest in not NULL, we also consider:
+        //
+        //  2. Moving the bDest block (or blocks) up to bPrev
+        //     so that it could be used as a fall through block
+        //
+        //  We will prefer option #1 if we are able to connect the jump dest
+        //  block as the fall though block otherwise will we try to use option #2
+        //
+
+        //
+        //  Consider option #1: relocating blocks starting at 'block'
+        //    to later in flowgraph
+        //
+        // We set bStart to the first block that will be relocated
+        // and bEnd to the last block that will be relocated
+
+        BasicBlock* bStart   = block;
+        BasicBlock* bEnd     = bStart;
+        bNext                = bEnd->bbNext;
+        bool connected_bDest = false;
+
+        if ((backwardBranch && !isRare) ||
+            ((block->bbFlags & BBF_DONT_REMOVE) != 0)) // Don't choose option #1 when block is the start of a try region
+        {
+            bStart = nullptr;
+            bEnd   = nullptr;
+        }
+        else
+        {
+            while (true)
+            {
+                // Don't try to split a Call/Always pair
+                //
+                if (bEnd->isBBCallAlwaysPair())
+                {
+                    // Move bEnd and bNext forward
+                    bEnd  = bNext;
+                    bNext = bNext->bbNext;
+                }
+
+                //
+                // Check for loop exit condition
+                //
+                if (bNext == nullptr)
+                {
+                    break;
+                }
+
+#if FEATURE_EH_FUNCLETS
+                // Check if we've reached the funclets region, at the end of the function
+                if (fgFirstFuncletBB == bEnd->bbNext)
+                {
+                    break;
+                }
+#endif // FEATURE_EH_FUNCLETS
+
+                if (bNext == bDest)
+                {
+                    connected_bDest = true;
+                    break;
+                }
+
+                // All the blocks must have the same try index
+                // and must not have the BBF_DONT_REMOVE flag set
+
+                if (!BasicBlock::sameTryRegion(bStart, bNext) || ((bNext->bbFlags & BBF_DONT_REMOVE) != 0))
+                {
+                    // exit the loop, bEnd is now set to the
+                    // last block that we want to relocate
+                    break;
+                }
+
+                // If we are relocating rarely run blocks..
+                if (isRare)
+                {
+                    // ... then all blocks must be rarely run
+                    if (!bNext->isRunRarely())
+                    {
+                        // exit the loop, bEnd is now set to the
+                        // last block that we want to relocate
+                        break;
+                    }
+                }
+                else
+                {
+                    // If we are moving blocks that are hot then all
+                    // of the blocks moved must be less than profHotWeight */
+                    if (bNext->bbWeight >= profHotWeight)
+                    {
+                        // exit the loop, bEnd is now set to the
+                        // last block that we would relocate
+                        break;
+                    }
+                }
+
+                // Move bEnd and bNext forward
+                bEnd  = bNext;
+                bNext = bNext->bbNext;
+            }
+
+            // Set connected_bDest to true if moving blocks [bStart .. bEnd]
+            //  connects with the the jump dest of bPrev (i.e bDest) and
+            // thus allows bPrev fall through instead of jump.
+            if (bNext == bDest)
+            {
+                connected_bDest = true;
+            }
+        }
+
+        //  Now consider option #2: Moving the jump dest block (or blocks)
+        //    up to bPrev
+        //
+        // The variables bStart2, bEnd2 and bPrev2 are used for option #2
+        //
+        // We will setup bStart2 to the first block that will be relocated
+        // and bEnd2 to the last block that will be relocated
+        // and bPrev2 to be the lexical pred of bDest
+        //
+        // If after this calculation bStart2 is NULL we cannot use option #2,
+        // otherwise bStart2, bEnd2 and bPrev2 are all non-NULL and we will use option #2
+
+        BasicBlock* bStart2 = nullptr;
+        BasicBlock* bEnd2   = nullptr;
+        BasicBlock* bPrev2  = nullptr;
+
+        // If option #1 didn't connect bDest and bDest isn't NULL
+        if ((connected_bDest == false) && (bDest != nullptr) &&
+            //  The jump target cannot be moved if it has the BBF_DONT_REMOVE flag set
+            ((bDest->bbFlags & BBF_DONT_REMOVE) == 0))
+        {
+            // We will consider option #2: relocating blocks starting at 'bDest' to succeed bPrev
+            //
+            // setup bPrev2 to be the lexical pred of bDest
+
+            bPrev2 = block;
+            while (bPrev2 != nullptr)
+            {
+                if (bPrev2->bbNext == bDest)
+                {
+                    break;
+                }
+
+                bPrev2 = bPrev2->bbNext;
+            }
+
+            if ((bPrev2 != nullptr) && fgEhAllowsMoveBlock(bPrev, bDest))
+            {
+                // We have decided that relocating bDest to be after bPrev is best
+                // Set bStart2 to the first block that will be relocated
+                // and bEnd2 to the last block that will be relocated
+                //
+                // Assigning to bStart2 selects option #2
+                //
+                bStart2 = bDest;
+                bEnd2   = bStart2;
+                bNext   = bEnd2->bbNext;
+
+                while (true)
+                {
+                    // Don't try to split a Call/Always pair
+                    //
+                    if (bEnd2->isBBCallAlwaysPair())
+                    {
+                        noway_assert(bNext->bbJumpKind == BBJ_ALWAYS);
+                        // Move bEnd2 and bNext forward
+                        bEnd2 = bNext;
+                        bNext = bNext->bbNext;
+                    }
+
+                    // Check for the Loop exit conditions
+
+                    if (bNext == nullptr)
+                    {
+                        break;
+                    }
+
+                    if (bEnd2->bbFallsThrough() == false)
+                    {
+                        break;
+                    }
+
+                    // If we are relocating rarely run blocks..
+                    // All the blocks must have the same try index,
+                    // and must not have the BBF_DONT_REMOVE flag set
+
+                    if (!BasicBlock::sameTryRegion(bStart2, bNext) || ((bNext->bbFlags & BBF_DONT_REMOVE) != 0))
+                    {
+                        // exit the loop, bEnd2 is now set to the
+                        // last block that we want to relocate
+                        break;
+                    }
+
+                    if (isRare)
+                    {
+                        /* ... then all blocks must not be rarely run */
+                        if (bNext->isRunRarely())
+                        {
+                            // exit the loop, bEnd2 is now set to the
+                            // last block that we want to relocate
+                            break;
+                        }
+                    }
+                    else
+                    {
+                        // If we are relocating hot blocks
+                        // all blocks moved must be greater than profHotWeight
+                        if (bNext->bbWeight <= profHotWeight)
+                        {
+                            // exit the loop, bEnd2 is now set to the
+                            // last block that we want to relocate
+                            break;
+                        }
+                    }
+
+                    // Move bEnd2 and bNext forward
+                    bEnd2 = bNext;
+                    bNext = bNext->bbNext;
+                }
+            }
+        }
+
+        // If we are using option #1 then ...
+        if (bStart2 == nullptr)
+        {
+            // Don't use option #1 for a backwards branch
+            if (bStart == nullptr)
+            {
+                continue;
+            }
+
+            // .... Don't move a set of blocks that are already at the end of the main method
+            if (bEnd == fgLastBBInMainFunction())
+            {
+                continue;
+            }
+        }
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            if (bDest != nullptr)
+            {
+                if (bPrev->bbJumpKind == BBJ_COND)
+                {
+                    printf("Decided to reverse conditional branch at block BB%02u branch to BB%02u ", bPrev->bbNum,
+                           bDest->bbNum);
+                }
+                else if (bPrev->bbJumpKind == BBJ_ALWAYS)
+                {
+                    printf("Decided to straighten unconditional branch at block BB%02u branch to BB%02u ", bPrev->bbNum,
+                           bDest->bbNum);
+                }
+                else
+                {
+                    printf("Decided to place hot code after BB%02u, placed BB%02u after this block ", bPrev->bbNum,
+                           bDest->bbNum);
+                }
+
+                if (profHotWeight > 0)
+                {
+                    printf("because of IBC profile data\n");
+                }
+                else
+                {
+                    if (bPrev->bbFallsThrough())
+                    {
+                        printf("since it falls into a rarely run block\n");
+                    }
+                    else
+                    {
+                        printf("since it is succeeded by a rarely run block\n");
+                    }
+                }
+            }
+            else
+            {
+                printf("Decided to relocate block(s) after block BB%02u since they are %s block(s)\n", bPrev->bbNum,
+                       block->isRunRarely() ? "rarely run" : "uncommonly run");
+            }
+        }
+#endif // DEBUG
+
+        // We will set insertAfterBlk to the block the precedes our insertion range
+        // We will set bStartPrev to be the block that precedes the set of blocks that we are moving
+        BasicBlock* insertAfterBlk;
+        BasicBlock* bStartPrev;
+
+        if (bStart2 != nullptr)
+        {
+            // Option #2: relocating blocks starting at 'bDest' to follow bPrev
+
+            // Update bStart and bEnd so that we can use these two for all later operations
+            bStart = bStart2;
+            bEnd   = bEnd2;
+
+            // Set bStartPrev to be the block that comes before bStart
+            bStartPrev = bPrev2;
+
+            // We will move [bStart..bEnd] to immediately after bPrev
+            insertAfterBlk = bPrev;
+        }
+        else
+        {
+            // option #1: Moving the fall-through blocks (or rarely run blocks) down to later in the method
+
+            // Set bStartPrev to be the block that come before bStart
+            bStartPrev = bPrev;
+
+            // We will move [bStart..bEnd] but we will pick the insert location later
+            insertAfterBlk = nullptr;
+        }
+
+        // We are going to move [bStart..bEnd] so they can't be NULL
+        noway_assert(bStart != nullptr);
+        noway_assert(bEnd != nullptr);
+
+        // bEnd can't be a BBJ_CALLFINALLY unless it is a RETLESS call
+        noway_assert((bEnd->bbJumpKind != BBJ_CALLFINALLY) || (bEnd->bbFlags & BBF_RETLESS_CALL));
+
+        // bStartPrev must be set to the block that precedes bStart
+        noway_assert(bStartPrev->bbNext == bStart);
+
+        // Since we will be unlinking [bStart..bEnd],
+        // we need to compute and remember if bStart is in each of
+        // the try and handler regions
+        //
+        bool* fStartIsInTry = nullptr;
+        bool* fStartIsInHnd = nullptr;
+
+        if (compHndBBtabCount > 0)
+        {
+            fStartIsInTry = new (this, CMK_Unknown) bool[compHndBBtabCount];
+            fStartIsInHnd = new (this, CMK_Unknown) bool[compHndBBtabCount];
+
+            for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+            {
+                fStartIsInTry[XTnum] = HBtab->InTryRegionBBRange(bStart);
+                fStartIsInHnd[XTnum] = HBtab->InHndRegionBBRange(bStart);
+            }
+        }
+
+        /* Temporarily unlink [bStart..bEnd] from the flow graph */
+        fgUnlinkRange(bStart, bEnd);
+
+        if (insertAfterBlk == nullptr)
+        {
+            // Find new location for the unlinked block(s)
+            // Set insertAfterBlk to the block which will precede the insertion point
+
+            if (!bStart->hasTryIndex() && isRare)
+            {
+                // We'll just insert the blocks at the end of the method. If the method
+                // has funclets, we will insert at the end of the main method but before
+                // any of the funclets. Note that we create funclets before we call
+                // fgReorderBlocks().
+
+                insertAfterBlk = fgLastBBInMainFunction();
+                noway_assert(insertAfterBlk != bPrev);
+            }
+            else
+            {
+                BasicBlock* startBlk;
+                BasicBlock* lastBlk;
+                EHblkDsc*   ehDsc = ehInitTryBlockRange(bStart, &startBlk, &lastBlk);
+
+                BasicBlock* endBlk;
+
+                /* Setup startBlk and endBlk as the range to search */
+
+                if (ehDsc != nullptr)
+                {
+                    endBlk = lastBlk->bbNext;
+
+                    /*
+                       Multiple (nested) try regions might start from the same BB.
+                       For example,
+
+                       try3   try2   try1
+                       |---   |---   |---   BB01
+                       |      |      |      BB02
+                       |      |      |---   BB03
+                       |      |             BB04
+                       |      |------------ BB05
+                       |                    BB06
+                       |------------------- BB07
+
+                       Now if we want to insert in try2 region, we will start with startBlk=BB01.
+                       The following loop will allow us to start from startBlk==BB04.
+                    */
+                    while (!BasicBlock::sameTryRegion(startBlk, bStart) && (startBlk != endBlk))
+                    {
+                        startBlk = startBlk->bbNext;
+                    }
+
+                    // startBlk cannot equal endBlk as it must come before endBlk
+                    if (startBlk == endBlk)
+                    {
+                        goto CANNOT_MOVE;
+                    }
+
+                    // we also can't start searching the try region at bStart
+                    if (startBlk == bStart)
+                    {
+                        // if bEnd is the last block in the method or
+                        // or if bEnd->bbNext is in a different try region
+                        // then we cannot move the blocks
+                        //
+                        if ((bEnd->bbNext == nullptr) || !BasicBlock::sameTryRegion(startBlk, bEnd->bbNext))
+                        {
+                            goto CANNOT_MOVE;
+                        }
+
+                        startBlk = bEnd->bbNext;
+
+                        // Check that the new startBlk still comes before endBlk
+
+                        // startBlk cannot equal endBlk as it must come before endBlk
+                        if (startBlk == endBlk)
+                        {
+                            goto CANNOT_MOVE;
+                        }
+
+                        BasicBlock* tmpBlk = startBlk;
+                        while ((tmpBlk != endBlk) && (tmpBlk != nullptr))
+                        {
+                            tmpBlk = tmpBlk->bbNext;
+                        }
+
+                        // when tmpBlk is NULL that means startBlk is after endBlk
+                        // so there is no way to move bStart..bEnd within the try region
+                        if (tmpBlk == nullptr)
+                        {
+                            goto CANNOT_MOVE;
+                        }
+                    }
+                }
+                else
+                {
+                    noway_assert(isRare == false);
+
+                    /* We'll search through the entire main method */
+                    startBlk = fgFirstBB;
+                    endBlk   = fgEndBBAfterMainFunction();
+                }
+
+                // Calculate nearBlk and jumpBlk and then call fgFindInsertPoint()
+                // to find our insertion block
+                //
+                {
+                    // If the set of blocks that we are moving ends with a BBJ_ALWAYS to
+                    // another [rarely run] block that comes after bPrev (forward branch)
+                    // then we can set up nearBlk to eliminate this jump sometimes
+                    //
+                    BasicBlock* nearBlk = nullptr;
+                    BasicBlock* jumpBlk = nullptr;
+
+                    if ((bEnd->bbJumpKind == BBJ_ALWAYS) && (!isRare || bEnd->bbJumpDest->isRunRarely()) &&
+                        fgIsForwardBranch(bEnd, bPrev))
+                    {
+                        // Set nearBlk to be the block in [startBlk..endBlk]
+                        // such that nearBlk->bbNext == bEnd->JumpDest
+                        // if no such block exists then set nearBlk to NULL
+                        nearBlk = startBlk;
+                        jumpBlk = bEnd;
+                        do
+                        {
+                            // We do not want to set nearBlk to bPrev
+                            // since then we will not move [bStart..bEnd]
+                            //
+                            if (nearBlk != bPrev)
+                            {
+                                // Check if nearBlk satisfies our requirement
+                                if (nearBlk->bbNext == bEnd->bbJumpDest)
+                                {
+                                    break;
+                                }
+                            }
+
+                            // Did we reach the endBlk?
+                            if (nearBlk == endBlk)
+                            {
+                                nearBlk = nullptr;
+                                break;
+                            }
+
+                            // advance nearBlk to the next block
+                            nearBlk = nearBlk->bbNext;
+
+                        } while (nearBlk != nullptr);
+                    }
+
+                    // if nearBlk is NULL then we set nearBlk to be the
+                    // first block that we want to insert after.
+                    if (nearBlk == nullptr)
+                    {
+                        if (bDest != nullptr)
+                        {
+                            // we want to insert after bDest
+                            nearBlk = bDest;
+                        }
+                        else
+                        {
+                            // we want to insert after bPrev
+                            nearBlk = bPrev;
+                        }
+                    }
+
+                    /* Set insertAfterBlk to the block which we will insert after. */
+
+                    insertAfterBlk =
+                        fgFindInsertPoint(bStart->bbTryIndex,
+                                          true, // Insert in the try region.
+                                          startBlk, endBlk, nearBlk, jumpBlk, bStart->bbWeight == BB_ZERO_WEIGHT);
+                }
+
+                /* See if insertAfterBlk is the same as where we started, */
+                /*  or if we could not find any insertion point     */
+
+                if ((insertAfterBlk == bPrev) || (insertAfterBlk == nullptr))
+                {
+                CANNOT_MOVE:;
+                    /* We couldn't move the blocks, so put everything back */
+                    /* relink [bStart .. bEnd] into the flow graph */
+
+                    bPrev->setNext(bStart);
+                    if (bEnd->bbNext)
+                    {
+                        bEnd->bbNext->bbPrev = bEnd;
+                    }
+#ifdef DEBUG
+                    if (verbose)
+                    {
+                        if (bStart != bEnd)
+                        {
+                            printf("Could not relocate blocks (BB%02u .. BB%02u)\n", bStart->bbNum, bEnd->bbNum);
+                        }
+                        else
+                        {
+                            printf("Could not relocate block BB%02u\n", bStart->bbNum);
+                        }
+                    }
+#endif // DEBUG
+                    continue;
+                }
+            }
+        }
+
+        noway_assert(insertAfterBlk != nullptr);
+        noway_assert(bStartPrev != nullptr);
+        noway_assert(bStartPrev != insertAfterBlk);
+
+#ifdef DEBUG
+        movedBlocks = true;
+
+        if (verbose)
+        {
+            const char* msg;
+            if (bStart2 != nullptr)
+            {
+                msg = "hot";
+            }
+            else
+            {
+                if (isRare)
+                {
+                    msg = "rarely run";
+                }
+                else
+                {
+                    msg = "uncommon";
+                }
+            }
+
+            printf("Relocated %s ", msg);
+            if (bStart != bEnd)
+            {
+                printf("blocks (BB%02u .. BB%02u)", bStart->bbNum, bEnd->bbNum);
+            }
+            else
+            {
+                printf("block BB%02u", bStart->bbNum);
+            }
+
+            if (bPrev->bbJumpKind == BBJ_COND)
+            {
+                printf(" by reversing conditional jump at BB%02u\n", bPrev->bbNum);
+            }
+            else
+            {
+                printf("\n", bPrev->bbNum);
+            }
+        }
+#endif // DEBUG
+
+        if (bPrev->bbJumpKind == BBJ_COND)
+        {
+            /* Reverse the bPrev jump condition */
+            GenTree* condTest = bPrev->lastStmt();
+
+            condTest = condTest->gtStmt.gtStmtExpr;
+            noway_assert(condTest->gtOper == GT_JTRUE);
+
+            condTest->gtOp.gtOp1 = gtReverseCond(condTest->gtOp.gtOp1);
+
+            if (bStart2 == nullptr)
+            {
+                /* Set the new jump dest for bPrev to the rarely run or uncommon block(s) */
+                bPrev->bbJumpDest = bStart;
+                bStart->bbFlags |= (BBF_JMP_TARGET | BBF_HAS_LABEL);
+            }
+            else
+            {
+                noway_assert(insertAfterBlk == bPrev);
+                noway_assert(insertAfterBlk->bbNext == block);
+
+                /* Set the new jump dest for bPrev to the rarely run or uncommon block(s) */
+                bPrev->bbJumpDest = block;
+                block->bbFlags |= (BBF_JMP_TARGET | BBF_HAS_LABEL);
+            }
+        }
+
+        // If we are moving blocks that are at the end of a try or handler
+        // we will need to shorten ebdTryLast or ebdHndLast
+        //
+        ehUpdateLastBlocks(bEnd, bStartPrev);
+
+        // If we are moving blocks into the end of a try region or handler region
+        // we will need to extend ebdTryLast or ebdHndLast so the blocks that we
+        // are moving are part of this try or handler region.
+        //
+        for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+        {
+            // Are we moving blocks to the end of a try region?
+            if (HBtab->ebdTryLast == insertAfterBlk)
+            {
+                if (fStartIsInTry[XTnum])
+                {
+                    // bStart..bEnd is in the try, so extend the try region
+                    fgSetTryEnd(HBtab, bEnd);
+                }
+            }
+
+            // Are we moving blocks to the end of a handler region?
+            if (HBtab->ebdHndLast == insertAfterBlk)
+            {
+                if (fStartIsInHnd[XTnum])
+                {
+                    // bStart..bEnd is in the handler, so extend the handler region
+                    fgSetHndEnd(HBtab, bEnd);
+                }
+            }
+        }
+
+        /* We have decided to insert the block(s) after 'insertAfterBlk' */
+        fgMoveBlocksAfter(bStart, bEnd, insertAfterBlk);
+
+        if (bDest)
+        {
+            /* We may need to insert an unconditional branch after bPrev to bDest */
+            fgConnectFallThrough(bPrev, bDest);
+        }
+        else
+        {
+            /* If bPrev falls through, we must insert a jump to block */
+            fgConnectFallThrough(bPrev, block);
+        }
+
+        BasicBlock* bSkip = bEnd->bbNext;
+
+        /* If bEnd falls through, we must insert a jump to bNext */
+        fgConnectFallThrough(bEnd, bNext);
+
+        if (bStart2 == nullptr)
+        {
+            /* If insertAfterBlk falls through, we are forced to     */
+            /* add a jump around the block(s) we just inserted */
+            fgConnectFallThrough(insertAfterBlk, bSkip);
+        }
+        else
+        {
+            /* We may need to insert an unconditional branch after bPrev2 to bStart */
+            fgConnectFallThrough(bPrev2, bStart);
+        }
+
+#if DEBUG
+        if (verbose)
+        {
+            printf("\nAfter this change in fgReorderBlocks");
+            fgDispBasicBlocks(verboseTrees);
+            printf("\n");
+        }
+        fgVerifyHandlerTab();
+
+        // Make sure that the predecessor lists are accurate
+        if (expensiveDebugCheckLevel >= 2)
+        {
+            fgDebugCheckBBlist();
+        }
+#endif // DEBUG
+
+        // Set our iteration point 'block' to be the new bPrev->bbNext
+        //  It will be used as the next bPrev
+        block = bPrev->bbNext;
+
+    } // end of for loop(bPrev,block)
+
+    bool changed = movedBlocks || newRarelyRun || optimizedSwitches;
+
+    if (changed)
+    {
+        fgNeedsUpdateFlowGraph = true;
+#if DEBUG
+        // Make sure that the predecessor lists are accurate
+        if (expensiveDebugCheckLevel >= 2)
+        {
+            fgDebugCheckBBlist();
+        }
+#endif // DEBUG
+    }
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*-------------------------------------------------------------------------
+ *
+ * Walk the basic blocks list to determine the first block to place in the
+ * cold section.  This would be the first of a series of rarely executed blocks
+ * such that no succeeding blocks are in a try region or an exception handler
+ * or are rarely executed.
+ */
+
+void Compiler::fgDetermineFirstColdBlock()
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\n*************** In fgDetermineFirstColdBlock()\n");
+    }
+#endif // DEBUG
+
+    // Since we may need to create a new transistion block
+    // we assert that it is OK to create new blocks.
+    //
+    assert(fgSafeBasicBlockCreation);
+
+    fgFirstColdBlock = nullptr;
+
+#if FEATURE_STACK_FP_X87
+    if (compMayHaveTransitionBlocks)
+    {
+        opts.compProcedureSplitting = false;
+
+        // See comment above declaration of compMayHaveTransitionBlocks for comments on this
+        JITDUMP("Turning off procedure splitting for this method, as it may end up having FP transition blocks\n");
+    }
+#endif // FEATURE_STACK_FP_X87
+
+    if (!opts.compProcedureSplitting)
+    {
+        JITDUMP("No procedure splitting will be done for this method\n");
+        return;
+    }
+
+#ifdef DEBUG
+    if ((compHndBBtabCount > 0) && !opts.compProcedureSplittingEH)
+    {
+        JITDUMP("No procedure splitting will be done for this method with EH (by request)\n");
+        return;
+    }
+#endif // DEBUG
+
+#if FEATURE_EH_FUNCLETS
+    // TODO-CQ: handle hot/cold splitting in functions with EH (including synchronized methods
+    // that create EH in methods without explicit EH clauses).
+
+    if (compHndBBtabCount > 0)
+    {
+        JITDUMP("No procedure splitting will be done for this method with EH (implementation limitation)\n");
+        return;
+    }
+#endif // FEATURE_EH_FUNCLETS
+
+    BasicBlock* firstColdBlock       = nullptr;
+    BasicBlock* prevToFirstColdBlock = nullptr;
+    BasicBlock* block;
+    BasicBlock* lblk;
+
+    for (lblk = nullptr, block = fgFirstBB; block != nullptr; lblk = block, block = block->bbNext)
+    {
+        bool blockMustBeInHotSection = false;
+
+#if HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION
+        if (bbIsHandlerBeg(block))
+        {
+            blockMustBeInHotSection = true;
+        }
+#endif // HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION
+
+        // Do we have a candidate for the first cold block?
+        if (firstColdBlock != nullptr)
+        {
+            // We have a candidate for first cold block
+
+            // Is this a hot block?
+            if (blockMustBeInHotSection || (block->isRunRarely() == false))
+            {
+                // We have to restart the search for the first cold block
+                firstColdBlock       = nullptr;
+                prevToFirstColdBlock = nullptr;
+            }
+        }
+        else // (firstColdBlock == NULL)
+        {
+            // We don't have a candidate for first cold block
+
+            // Is this a cold block?
+            if (!blockMustBeInHotSection && (block->isRunRarely() == true))
+            {
+                //
+                // If the last block that was hot was a BBJ_COND
+                // then we will have to add an unconditional jump
+                // so the code size for block needs be large
+                // enough to make it worth our while
+                //
+                if ((lblk == nullptr) || (lblk->bbJumpKind != BBJ_COND) || (fgGetCodeEstimate(block) >= 8))
+                {
+                    // This block is now a candidate for first cold block
+                    // Also remember the predecessor to this block
+                    firstColdBlock       = block;
+                    prevToFirstColdBlock = lblk;
+                }
+            }
+        }
+    }
+
+    if (firstColdBlock == fgFirstBB)
+    {
+        // If the first block is Cold then we can't move any blocks
+        // into the cold section
+
+        firstColdBlock = nullptr;
+    }
+
+    if (firstColdBlock != nullptr)
+    {
+        noway_assert(prevToFirstColdBlock != nullptr);
+
+        if (prevToFirstColdBlock == nullptr)
+        {
+            return; // To keep Prefast happy
+        }
+
+        // If we only have one cold block
+        // then it may not be worth it to move it
+        // into the Cold section as a jump to the
+        // Cold section is 5 bytes in size.
+        //
+        if (firstColdBlock->bbNext == nullptr)
+        {
+            // If the size of the cold block is 7 or less
+            // then we will keep it in the Hot section.
+            //
+            if (fgGetCodeEstimate(firstColdBlock) < 8)
+            {
+                firstColdBlock = nullptr;
+                goto EXIT;
+            }
+        }
+
+        // When the last Hot block fall through into the Cold section
+        // we may need to add a jump
+        //
+        if (prevToFirstColdBlock->bbFallsThrough())
+        {
+            switch (prevToFirstColdBlock->bbJumpKind)
+            {
+                default:
+                    noway_assert(!"Unhandled jumpkind in fgDetermineFirstColdBlock()");
+
+                case BBJ_CALLFINALLY:
+                    // A BBJ_CALLFINALLY that falls through is always followed
+                    // by an empty BBJ_ALWAYS.
+                    //
+                    assert(prevToFirstColdBlock->isBBCallAlwaysPair());
+                    firstColdBlock =
+                        firstColdBlock->bbNext; // Note that this assignment could make firstColdBlock == nullptr
+                    break;
+
+                case BBJ_COND:
+                    //
+                    // This is a slightly more complicated case, because we will
+                    // probably need to insert a block to jump to the cold section.
+                    //
+                    if (firstColdBlock->isEmpty() && (firstColdBlock->bbJumpKind == BBJ_ALWAYS))
+                    {
+                        // We can just use this block as the transitionBlock
+                        firstColdBlock = firstColdBlock->bbNext;
+                        // Note that this assignment could make firstColdBlock == NULL
+                    }
+                    else
+                    {
+                        BasicBlock* transitionBlock = fgNewBBafter(BBJ_ALWAYS, prevToFirstColdBlock, true);
+                        transitionBlock->bbJumpDest = firstColdBlock;
+                        transitionBlock->inheritWeight(firstColdBlock);
+
+                        noway_assert(fgComputePredsDone);
+
+                        // Update the predecessor list for firstColdBlock
+                        fgReplacePred(firstColdBlock, prevToFirstColdBlock, transitionBlock);
+
+                        // Add prevToFirstColdBlock as a predecessor for transitionBlock
+                        fgAddRefPred(transitionBlock, prevToFirstColdBlock);
+                    }
+                    break;
+
+                case BBJ_NONE:
+                    // If the block preceding the first cold block is BBJ_NONE,
+                    // convert it to BBJ_ALWAYS to force an explicit jump.
+
+                    prevToFirstColdBlock->bbJumpDest = firstColdBlock;
+                    prevToFirstColdBlock->bbJumpKind = BBJ_ALWAYS;
+                    break;
+            }
+        }
+    }
+
+    if (firstColdBlock != nullptr)
+    {
+        firstColdBlock->bbFlags |= BBF_JMP_TARGET;
+
+        for (block = firstColdBlock; block; block = block->bbNext)
+        {
+            block->bbFlags |= BBF_COLD;
+        }
+    }
+
+EXIT:;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        if (firstColdBlock)
+        {
+            printf("fgFirstColdBlock is BB%02u.\n", firstColdBlock->bbNum);
+        }
+        else
+        {
+            printf("fgFirstColdBlock is NULL.\n");
+        }
+
+        fgDispBasicBlocks();
+    }
+
+    fgVerifyHandlerTab();
+#endif // DEBUG
+
+    fgFirstColdBlock = firstColdBlock;
+}
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+/*****************************************************************************
+ *
+ *  Function called to "comb" the basic block list.
+ *  Removes any empty blocks, unreachable blocks and redundant jumps.
+ *  Most of those appear after dead store removal and folding of conditionals.
+ *
+ *  Returns: true if the flowgraph has been modified
+ *
+ *  It also compacts basic blocks
+ *   (consecutive basic blocks that should in fact be one).
+ *
+ *  NOTE:
+ *    Debuggable code and Min Optimization JIT also introduces basic blocks
+ *    but we do not optimize those!
+ */
+
+bool Compiler::fgUpdateFlowGraph(bool doTailDuplication)
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\n*************** In fgUpdateFlowGraph()");
+    }
+#endif // DEBUG
+
+    /* This should never be called for debuggable code */
+
+    noway_assert(!opts.MinOpts() && !opts.compDbgCode);
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nBefore updating the flow graph:\n");
+        fgDispBasicBlocks(verboseTrees);
+        printf("\n");
+    }
+#endif // DEBUG
+
+    /* Walk all the basic blocks - look for unconditional jumps, empty blocks, blocks to compact, etc...
+     *
+     * OBSERVATION:
+     *      Once a block is removed the predecessors are not accurate (assuming they were at the beginning)
+     *      For now we will only use the information in bbRefs because it is easier to be updated
+     */
+
+    bool modified = false;
+    bool change;
+    do
+    {
+        change = false;
+
+        BasicBlock* block;           // the current block
+        BasicBlock* bPrev = nullptr; // the previous non-worthless block
+        BasicBlock* bNext;           // the successor of the current block
+        BasicBlock* bDest;           // the jump target of the current block
+
+        for (block = fgFirstBB; block != nullptr; block = block->bbNext)
+        {
+            /*  Some blocks may be already marked removed by other optimizations
+             *  (e.g worthless loop removal), without being explicitly removed
+             *  from the list.
+             */
+
+            if (block->bbFlags & BBF_REMOVED)
+            {
+                if (bPrev)
+                {
+                    bPrev->setNext(block->bbNext);
+                }
+                else
+                {
+                    /* WEIRD first basic block is removed - should have an assert here */
+                    noway_assert(!"First basic block marked as BBF_REMOVED???");
+
+                    fgFirstBB = block->bbNext;
+                }
+                continue;
+            }
+
+        /*  We jump to the REPEAT label if we performed a change involving the current block
+         *  This is in case there are other optimizations that can show up
+         *  (e.g. - compact 3 blocks in a row)
+         *  If nothing happens, we then finish the iteration and move to the next block
+         */
+
+        REPEAT:;
+
+            bNext = block->bbNext;
+            bDest = nullptr;
+
+            if (block->bbJumpKind == BBJ_ALWAYS)
+            {
+                bDest = block->bbJumpDest;
+                if (doTailDuplication && fgOptimizeUncondBranchToSimpleCond(block, bDest))
+                {
+                    change   = true;
+                    modified = true;
+                    bDest    = block->bbJumpDest;
+                    bNext    = block->bbNext;
+                }
+            }
+
+            // Remove JUMPS to the following block
+            // and optimize any JUMPS to JUMPS
+
+            if (block->bbJumpKind == BBJ_COND || block->bbJumpKind == BBJ_ALWAYS)
+            {
+                bDest = block->bbJumpDest;
+                if (bDest == bNext)
+                {
+                    if (fgOptimizeBranchToNext(block, bNext, bPrev))
+                    {
+                        change   = true;
+                        modified = true;
+                        bDest    = nullptr;
+                    }
+                }
+            }
+
+            if (bDest != nullptr)
+            {
+                // Do we have a JUMP to an empty unconditional JUMP block?
+                if (bDest->isEmpty() && (bDest->bbJumpKind == BBJ_ALWAYS) &&
+                    (bDest != bDest->bbJumpDest)) // special case for self jumps
+                {
+                    if (fgOptimizeBranchToEmptyUnconditional(block, bDest))
+                    {
+                        change   = true;
+                        modified = true;
+                        goto REPEAT;
+                    }
+                }
+
+                // Check for a conditional branch that just skips over an empty BBJ_ALWAYS block
+
+                if ((block->bbJumpKind == BBJ_COND) &&   // block is a BBJ_COND block
+                    (bNext != nullptr) &&                // block is not the last block
+                    (bNext->bbRefs == 1) &&              // No other block jumps to bNext
+                    (bNext->bbNext == bDest) &&          // The block after bNext is the BBJ_COND jump dest
+                    (bNext->bbJumpKind == BBJ_ALWAYS) && // The next block is a BBJ_ALWAYS block
+                    bNext->isEmpty() &&                  // and it is an an empty block
+                    (bNext != bNext->bbJumpDest) &&      // special case for self jumps
+                    (bDest != fgFirstColdBlock))
+                {
+                    bool optimizeJump = true;
+
+                    // We do not optimize jumps between two different try regions.
+                    // However jumping to a block that is not in any try region is OK
+                    //
+                    if (bDest->hasTryIndex() && !BasicBlock::sameTryRegion(block, bDest))
+                    {
+                        optimizeJump = false;
+                    }
+
+                    // Also consider bNext's try region
+                    //
+                    if (bNext->hasTryIndex() && !BasicBlock::sameTryRegion(block, bNext))
+                    {
+                        optimizeJump = false;
+                    }
+
+                    // If we are optimizing using real profile weights
+                    // then don't optimize a conditional jump to an unconditional jump
+                    // until after we have computed the edge weights
+                    //
+                    if (fgIsUsingProfileWeights())
+                    {
+                        // if block and bdest are in different hot/cold regions we can't do this this optimization
+                        // because we can't allow fall-through into the cold region.
+                        if (!fgEdgeWeightsComputed || fgInDifferentRegions(block, bDest))
+                        {
+                            fgNeedsUpdateFlowGraph = true;
+                            optimizeJump           = false;
+                        }
+                    }
+
+                    if (optimizeJump)
+                    {
+#ifdef DEBUG
+                        if (verbose)
+                        {
+                            printf("\nReversing a conditional jump around an unconditional jump (BB%02u -> BB%02u -> "
+                                   "BB%02u)\n",
+                                   block->bbNum, bDest->bbNum, bNext->bbJumpDest->bbNum);
+                        }
+#endif // DEBUG
+                        /* Reverse the jump condition */
+
+                        GenTree* test = block->lastNode();
+                        noway_assert(test->gtOper == GT_JTRUE);
+
+                        GenTree* cond = gtReverseCond(test->gtOp.gtOp1);
+                        assert(cond == test->gtOp.gtOp1); // Ensure `gtReverseCond` did not create a new node.
+                        test->gtOp.gtOp1 = cond;
+
+                        // Optimize the Conditional JUMP to go to the new target
+                        block->bbJumpDest = bNext->bbJumpDest;
+
+                        fgAddRefPred(bNext->bbJumpDest, block, fgRemoveRefPred(bNext->bbJumpDest, bNext));
+
+                        /*
+                          Unlink bNext from the BasicBlock list; note that we can
+                          do this even though other blocks could jump to it - the
+                          reason is that elsewhere in this function we always
+                          redirect jumps to jumps to jump to the final label,
+                          so even if another block jumps to bNext it won't matter
+                          once we're done since any such jump will be redirected
+                          to the final target by the time we're done here.
+                        */
+
+                        fgRemoveRefPred(bNext, block);
+                        fgUnlinkBlock(bNext);
+
+                        /* Mark the block as removed */
+                        bNext->bbFlags |= BBF_REMOVED;
+
+                        // If this is the first Cold basic block update fgFirstColdBlock
+                        if (bNext == fgFirstColdBlock)
+                        {
+                            fgFirstColdBlock = bNext->bbNext;
+                        }
+
+                        //
+                        // If we removed the end of a try region or handler region
+                        // we will need to update ebdTryLast or ebdHndLast.
+                        //
+
+                        EHblkDsc* HBtab;
+                        EHblkDsc* HBtabEnd;
+
+                        for (HBtab = compHndBBtab, HBtabEnd = compHndBBtab + compHndBBtabCount; HBtab < HBtabEnd;
+                             HBtab++)
+                        {
+                            if ((HBtab->ebdTryLast == bNext) || (HBtab->ebdHndLast == bNext))
+                            {
+                                fgSkipRmvdBlocks(HBtab);
+                            }
+                        }
+
+                        // we optimized this JUMP - goto REPEAT to catch similar cases
+                        change   = true;
+                        modified = true;
+
+#ifdef DEBUG
+                        if (verbose)
+                        {
+                            printf("\nAfter reversing the jump:\n");
+                            fgDispBasicBlocks(verboseTrees);
+                        }
+#endif // DEBUG
+
+                        /*
+                           For a rare special case we cannot jump to REPEAT
+                           as jumping to REPEAT will cause us to delete 'block'
+                           because it currently appears to be unreachable.  As
+                           it is a self loop that only has a single bbRef (itself)
+                           However since the unlinked bNext has additional bbRefs
+                           (that we will later connect to 'block'), it is not really
+                           unreachable.
+                        */
+                        if ((bNext->bbRefs > 0) && (bNext->bbJumpDest == block) && (block->bbRefs == 1))
+                        {
+                            continue;
+                        }
+
+                        goto REPEAT;
+                    }
+                }
+            }
+
+            //
+            // Update the switch jump table such that it follows jumps to jumps:
+            //
+            if (block->bbJumpKind == BBJ_SWITCH)
+            {
+                if (fgOptimizeSwitchBranches(block))
+                {
+                    change   = true;
+                    modified = true;
+                    goto REPEAT;
+                }
+            }
+
+            noway_assert(!(block->bbFlags & BBF_REMOVED));
+
+            /* COMPACT blocks if possible */
+
+            if (fgCanCompactBlocks(block, bNext))
+            {
+                fgCompactBlocks(block, bNext);
+
+                /* we compacted two blocks - goto REPEAT to catch similar cases */
+                change   = true;
+                modified = true;
+                goto REPEAT;
+            }
+
+            /* Remove unreachable or empty blocks - do not consider blocks marked BBF_DONT_REMOVE or genReturnBB block
+             * These include first and last block of a TRY, exception handlers and RANGE_CHECK_FAIL THROW blocks */
+
+            if ((block->bbFlags & BBF_DONT_REMOVE) == BBF_DONT_REMOVE || block == genReturnBB)
+            {
+                bPrev = block;
+                continue;
+            }
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+            // Don't remove the BBJ_ALWAYS block of a BBJ_CALLFINALLY/BBJ_ALWAYS pair.
+            if (block->countOfInEdges() == 0 && bPrev->bbJumpKind == BBJ_CALLFINALLY)
+            {
+                assert(bPrev->isBBCallAlwaysPair());
+                noway_assert(!(bPrev->bbFlags & BBF_RETLESS_CALL));
+                noway_assert(block->bbJumpKind == BBJ_ALWAYS);
+                bPrev = block;
+                continue;
+            }
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+
+            noway_assert(!block->bbCatchTyp);
+            noway_assert(!(block->bbFlags & BBF_TRY_BEG));
+
+            /* Remove unreachable blocks
+             *
+             * We'll look for blocks that have countOfInEdges() = 0 (blocks may become
+             * unreachable due to a BBJ_ALWAYS introduced by conditional folding for example)
+             */
+
+            if (block->countOfInEdges() == 0)
+            {
+                /* no references -> unreachable - remove it */
+                /* For now do not update the bbNum, do it at the end */
+
+                fgRemoveBlock(block, true);
+
+                change   = true;
+                modified = true;
+
+                /* we removed the current block - the rest of the optimizations won't have a target
+                 * continue with the next one */
+
+                continue;
+            }
+            else if (block->countOfInEdges() == 1)
+            {
+                switch (block->bbJumpKind)
+                {
+                    case BBJ_COND:
+                    case BBJ_ALWAYS:
+                        if (block->bbJumpDest == block)
+                        {
+                            fgRemoveBlock(block, true);
+
+                            change   = true;
+                            modified = true;
+
+                            /* we removed the current block - the rest of the optimizations
+                             * won't have a target so continue with the next block */
+
+                            continue;
+                        }
+                        break;
+
+                    default:
+                        break;
+                }
+            }
+
+            noway_assert(!(block->bbFlags & BBF_REMOVED));
+
+            /* Remove EMPTY blocks */
+
+            if (block->isEmpty())
+            {
+                assert(bPrev == block->bbPrev);
+                if (fgOptimizeEmptyBlock(block))
+                {
+                    change   = true;
+                    modified = true;
+                }
+
+                /* Have we removed the block? */
+
+                if (block->bbFlags & BBF_REMOVED)
+                {
+                    /* block was removed - no change to bPrev */
+                    continue;
+                }
+            }
+
+            /* Set the predecessor of the last reachable block
+             * If we removed the current block, the predecessor remains unchanged
+             * otherwise, since the current block is ok, it becomes the predecessor */
+
+            noway_assert(!(block->bbFlags & BBF_REMOVED));
+
+            bPrev = block;
+        }
+    } while (change);
+
+    fgNeedsUpdateFlowGraph = false;
+
+#ifdef DEBUG
+    if (verbose && modified)
+    {
+        printf("\nAfter updating the flow graph:\n");
+        fgDispBasicBlocks(verboseTrees);
+        fgDispHandlerTab();
+    }
+
+    if (compRationalIRForm)
+    {
+        for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+        {
+            LIR::AsRange(block).CheckLIR(this);
+        }
+    }
+
+    fgVerifyHandlerTab();
+    // Make sure that the predecessor lists are accurate
+    fgDebugCheckBBlist();
+    fgDebugCheckUpdate();
+#endif // DEBUG
+
+    return modified;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************
+ *  Check that the flow graph is really updated
+ */
+
+#ifdef DEBUG
+
+void Compiler::fgDebugCheckUpdate()
+{
+    if (!compStressCompile(STRESS_CHK_FLOW_UPDATE, 30))
+    {
+        return;
+    }
+
+    /* We check for these conditions:
+     * no unreachable blocks  -> no blocks have countOfInEdges() = 0
+     * no empty blocks        -> no blocks have bbTreeList = 0
+     * no un-imported blocks  -> no blocks have BBF_IMPORTED not set (this is
+     *                           kind of redundand with the above, but to make sure)
+     * no un-compacted blocks -> BBJ_NONE followed by block with no jumps to it (countOfInEdges() = 1)
+     */
+
+    BasicBlock* prev;
+    BasicBlock* block;
+    for (prev = nullptr, block = fgFirstBB; block != nullptr; prev = block, block = block->bbNext)
+    {
+        /* no unreachable blocks */
+
+        if ((block->countOfInEdges() == 0) && !(block->bbFlags & BBF_DONT_REMOVE)
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+            // With funclets, we never get rid of the BBJ_ALWAYS part of a BBJ_CALLFINALLY/BBJ_ALWAYS pair,
+            // even if we can prove that the finally block never returns.
+            && (prev == NULL || block->bbJumpKind != BBJ_ALWAYS || !prev->isBBCallAlwaysPair())
+#endif // FEATURE_EH_FUNCLETS
+                )
+        {
+            noway_assert(!"Unreachable block not removed!");
+        }
+
+        /* no empty blocks */
+
+        if (block->isEmpty() && !(block->bbFlags & BBF_DONT_REMOVE))
+        {
+            switch (block->bbJumpKind)
+            {
+                case BBJ_CALLFINALLY:
+                case BBJ_EHFINALLYRET:
+                case BBJ_EHFILTERRET:
+                case BBJ_RETURN:
+                /* for BBJ_ALWAYS is probably just a GOTO, but will have to be treated */
+                case BBJ_ALWAYS:
+                case BBJ_EHCATCHRET:
+                    /* These jump kinds are allowed to have empty tree lists */
+                    break;
+
+                default:
+                    /* it may be the case that the block had more than one reference to it
+                     * so we couldn't remove it */
+
+                    if (block->countOfInEdges() == 0)
+                    {
+                        noway_assert(!"Empty block not removed!");
+                    }
+                    break;
+            }
+        }
+
+        /* no un-imported blocks */
+
+        if (!(block->bbFlags & BBF_IMPORTED))
+        {
+            /* internal blocks do not count */
+
+            if (!(block->bbFlags & BBF_INTERNAL))
+            {
+                noway_assert(!"Non IMPORTED block not removed!");
+            }
+        }
+
+        bool prevIsCallAlwaysPair = ((prev != nullptr) && prev->isBBCallAlwaysPair());
+
+        // Check for an unnecessary jumps to the next block
+        bool doAssertOnJumpToNextBlock = false; // unless we have a BBJ_COND or BBJ_ALWAYS we can not assert
+
+        if (block->bbJumpKind == BBJ_COND)
+        {
+            // A conditional branch should never jump to the next block
+            // as it can be folded into a BBJ_NONE;
+            doAssertOnJumpToNextBlock = true;
+        }
+        else if (block->bbJumpKind == BBJ_ALWAYS)
+        {
+            // Generally we will want to assert if a BBJ_ALWAYS branches to the next block
+            doAssertOnJumpToNextBlock = true;
+
+            // If the BBF_KEEP_BBJ_ALWAYS flag is set we allow it to jump to the next block
+            if (block->bbFlags & BBF_KEEP_BBJ_ALWAYS)
+            {
+                doAssertOnJumpToNextBlock = false;
+            }
+
+            // A call/always pair is also allowed to jump to the next block
+            if (prevIsCallAlwaysPair)
+            {
+                doAssertOnJumpToNextBlock = false;
+            }
+
+            // We are allowed to have a branch from a hot 'block' to a cold 'bbNext'
+            //
+            if ((block->bbNext != nullptr) && fgInDifferentRegions(block, block->bbNext))
+            {
+                doAssertOnJumpToNextBlock = false;
+            }
+        }
+
+        if (doAssertOnJumpToNextBlock)
+        {
+            if (block->bbJumpDest == block->bbNext)
+            {
+                noway_assert(!"Unnecessary jump to the next block!");
+            }
+        }
+
+        /* Make sure BBF_KEEP_BBJ_ALWAYS is set correctly */
+
+        if ((block->bbJumpKind == BBJ_ALWAYS) && prevIsCallAlwaysPair)
+        {
+            noway_assert(block->bbFlags & BBF_KEEP_BBJ_ALWAYS);
+        }
+
+        /* For a BBJ_CALLFINALLY block we make sure that we are followed by */
+        /* an BBJ_ALWAYS block with BBF_INTERNAL set */
+        /* or that it's a BBF_RETLESS_CALL */
+        if (block->bbJumpKind == BBJ_CALLFINALLY)
+        {
+            assert((block->bbFlags & BBF_RETLESS_CALL) || block->isBBCallAlwaysPair());
+        }
+
+        /* no un-compacted blocks */
+
+        if (fgCanCompactBlocks(block, block->bbNext))
+        {
+            noway_assert(!"Found un-compacted blocks!");
+        }
+    }
+}
+
+#endif // DEBUG
+
+/*****************************************************************************
+ * We've inserted a new block before 'block' that should be part of the same EH region as 'block'.
+ * Update the EH table to make this so. Also, set the new block to have the right EH region data
+ * (copy the bbTryIndex, bbHndIndex, and bbCatchTyp from 'block' to the new predecessor, and clear
+ * 'bbCatchTyp' from 'block').
+ */
+void Compiler::fgExtendEHRegionBefore(BasicBlock* block)
+{
+    assert(block->bbPrev != nullptr);
+
+    BasicBlock* bPrev = block->bbPrev;
+
+    bPrev->copyEHRegion(block);
+
+    // The first block (and only the first block) of a handler has bbCatchTyp set
+    bPrev->bbCatchTyp = block->bbCatchTyp;
+    block->bbCatchTyp = BBCT_NONE;
+
+    EHblkDsc* HBtab;
+    EHblkDsc* HBtabEnd;
+
+    for (HBtab = compHndBBtab, HBtabEnd = compHndBBtab + compHndBBtabCount; HBtab < HBtabEnd; HBtab++)
+    {
+        /* Multiple pointers in EHblkDsc can point to same block. We can not early out after the first match. */
+        if (HBtab->ebdTryBeg == block)
+        {
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("EH#%u: New first block of try: BB%02u\n", ehGetIndex(HBtab), bPrev->bbNum);
+            }
+#endif // DEBUG
+            HBtab->ebdTryBeg = bPrev;
+            bPrev->bbFlags |= BBF_TRY_BEG | BBF_DONT_REMOVE | BBF_HAS_LABEL;
+            // clear the TryBeg flag unless it begins another try region
+            if (!bbIsTryBeg(block))
+            {
+                block->bbFlags &= ~BBF_TRY_BEG;
+            }
+        }
+
+        if (HBtab->ebdHndBeg == block)
+        {
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("EH#%u: New first block of handler: BB%02u\n", ehGetIndex(HBtab), bPrev->bbNum);
+            }
+#endif // DEBUG
+
+            // The first block of a handler has an artificial extra refcount. Transfer that to the new block.
+            assert(block->bbRefs > 0);
+            block->bbRefs--;
+
+            HBtab->ebdHndBeg = bPrev;
+            bPrev->bbFlags |= BBF_DONT_REMOVE | BBF_HAS_LABEL;
+            bPrev->bbRefs++;
+
+            // If this is a handler for a filter, the last block of the filter will end with
+            // a BBJ_EJFILTERRET block that has a bbJumpDest that jumps to the first block of
+            // it's handler.  So we need to update it to keep things in sync.
+            //
+            if (HBtab->HasFilter())
+            {
+                BasicBlock* bFilterLast = HBtab->BBFilterLast();
+                assert(bFilterLast != nullptr);
+                assert(bFilterLast->bbJumpKind == BBJ_EHFILTERRET);
+                assert(bFilterLast->bbJumpDest == block);
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("EH#%u: Updating bbJumpDest for filter ret block: BB%02u => BB%02u\n", ehGetIndex(HBtab),
+                           bFilterLast->bbNum, bPrev->bbNum);
+                }
+#endif // DEBUG
+                // Change the bbJumpDest for bFilterLast from the old first 'block' to the new first 'bPrev'
+                bFilterLast->bbJumpDest = bPrev;
+            }
+        }
+
+        if (HBtab->HasFilter() && (HBtab->ebdFilter == block))
+        {
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("EH#%u: New first block of filter: BB%02u\n", ehGetIndex(HBtab), bPrev->bbNum);
+            }
+#endif // DEBUG
+
+            // The first block of a filter has an artificial extra refcount. Transfer that to the new block.
+            assert(block->bbRefs > 0);
+            block->bbRefs--;
+
+            HBtab->ebdFilter = bPrev;
+            bPrev->bbFlags |= BBF_DONT_REMOVE | BBF_HAS_LABEL;
+            bPrev->bbRefs++;
+        }
+    }
+}
+
+/*****************************************************************************
+ * We've inserted a new block after 'block' that should be part of the same EH region as 'block'.
+ * Update the EH table to make this so. Also, set the new block to have the right EH region data.
+ */
+
+void Compiler::fgExtendEHRegionAfter(BasicBlock* block)
+{
+    BasicBlock* newBlk = block->bbNext;
+    assert(newBlk != nullptr);
+
+    newBlk->copyEHRegion(block);
+    newBlk->bbCatchTyp =
+        BBCT_NONE; // Only the first block of a catch has this set, and 'newBlk' can't be the first block of a catch.
+
+    // TODO-Throughput: if the block is not in an EH region, then we don't need to walk the EH table looking for 'last'
+    // block pointers to update.
+    ehUpdateLastBlocks(block, newBlk);
+}
+
+/*****************************************************************************
+ *
+ * Insert a BasicBlock before the given block.
+ */
+
+BasicBlock* Compiler::fgNewBBbefore(BBjumpKinds jumpKind, BasicBlock* block, bool extendRegion)
+{
+    // Create a new BasicBlock and chain it in
+
+    BasicBlock* newBlk = bbNewBasicBlock(jumpKind);
+    newBlk->bbFlags |= BBF_INTERNAL;
+
+    fgInsertBBbefore(block, newBlk);
+
+    newBlk->bbRefs = 0;
+
+    if (newBlk->bbFallsThrough() && block->isRunRarely())
+    {
+        newBlk->bbSetRunRarely();
+    }
+
+    if (extendRegion)
+    {
+        fgExtendEHRegionBefore(block);
+    }
+    else
+    {
+        // When extendRegion is false the caller is responsible for setting these two values
+        newBlk->setTryIndex(MAX_XCPTN_INDEX); // Note: this is still a legal index, just unlikely
+        newBlk->setHndIndex(MAX_XCPTN_INDEX); // Note: this is still a legal index, just unlikely
+    }
+
+    // We assume that if the block we are inserting before is in the cold region, then this new
+    // block will also be in the cold region.
+    newBlk->bbFlags |= (block->bbFlags & BBF_COLD);
+
+    return newBlk;
+}
+
+/*****************************************************************************
+ *
+ * Insert a BasicBlock after the given block.
+ */
+
+BasicBlock* Compiler::fgNewBBafter(BBjumpKinds jumpKind, BasicBlock* block, bool extendRegion)
+{
+    // Create a new BasicBlock and chain it in
+
+    BasicBlock* newBlk = bbNewBasicBlock(jumpKind);
+    newBlk->bbFlags |= BBF_INTERNAL;
+
+    fgInsertBBafter(block, newBlk);
+
+    newBlk->bbRefs = 0;
+
+    if (block->bbFallsThrough() && block->isRunRarely())
+    {
+        newBlk->bbSetRunRarely();
+    }
+
+    if (extendRegion)
+    {
+        fgExtendEHRegionAfter(block);
+    }
+    else
+    {
+        // When extendRegion is false the caller is responsible for setting these two values
+        newBlk->setTryIndex(MAX_XCPTN_INDEX); // Note: this is still a legal index, just unlikely
+        newBlk->setHndIndex(MAX_XCPTN_INDEX); // Note: this is still a legal index, just unlikely
+    }
+
+    // If the new block is in the cold region (because the block we are inserting after
+    // is in the cold region), mark it as such.
+    newBlk->bbFlags |= (block->bbFlags & BBF_COLD);
+
+    return newBlk;
+}
+
+/*****************************************************************************
+ *  Inserts basic block before existing basic block.
+ *
+ *  If insertBeforeBlk is in the funclet region, then newBlk will be in the funclet region.
+ *  (If insertBeforeBlk is the first block of the funclet region, then 'newBlk' will be the
+ *  new first block of the funclet region.)
+ */
+void Compiler::fgInsertBBbefore(BasicBlock* insertBeforeBlk, BasicBlock* newBlk)
+{
+    if (insertBeforeBlk->bbPrev)
+    {
+        fgInsertBBafter(insertBeforeBlk->bbPrev, newBlk);
+    }
+    else
+    {
+        newBlk->setNext(fgFirstBB);
+
+        fgFirstBB      = newBlk;
+        newBlk->bbPrev = nullptr;
+    }
+
+#if FEATURE_EH_FUNCLETS
+
+    /* Update fgFirstFuncletBB if insertBeforeBlk is the first block of the funclet region. */
+
+    if (fgFirstFuncletBB == insertBeforeBlk)
+    {
+        fgFirstFuncletBB = newBlk;
+    }
+
+#endif // FEATURE_EH_FUNCLETS
+}
+
+/*****************************************************************************
+ *  Inserts basic block after existing basic block.
+ *
+ *  If insertBeforeBlk is in the funclet region, then newBlk will be in the funclet region.
+ *  (It can't be used to insert a block as the first block of the funclet region).
+ */
+void Compiler::fgInsertBBafter(BasicBlock* insertAfterBlk, BasicBlock* newBlk)
+{
+    newBlk->bbNext = insertAfterBlk->bbNext;
+
+    if (insertAfterBlk->bbNext)
+    {
+        insertAfterBlk->bbNext->bbPrev = newBlk;
+    }
+
+    insertAfterBlk->bbNext = newBlk;
+    newBlk->bbPrev         = insertAfterBlk;
+
+    if (fgLastBB == insertAfterBlk)
+    {
+        fgLastBB = newBlk;
+        assert(fgLastBB->bbNext == nullptr);
+    }
+}
+
+// We have two edges (bAlt => bCur) and (bCur => bNext).
+//
+// Returns true if the weight of (bAlt => bCur)
+//  is greater than the weight of (bCur => bNext).
+// We compare the edge weights if we have valid edge weights
+//  otherwise we compare blocks weights.
+//
+bool Compiler::fgIsBetterFallThrough(BasicBlock* bCur, BasicBlock* bAlt)
+{
+    // bCur can't be NULL and must be a fall through bbJumpKind
+    noway_assert(bCur != nullptr);
+    noway_assert(bCur->bbFallsThrough());
+    noway_assert(bAlt != nullptr);
+
+    // We only handle the cases when bAlt is a BBJ_ALWAYS or a BBJ_COND
+    if ((bAlt->bbJumpKind != BBJ_ALWAYS) && (bAlt->bbJumpKind != BBJ_COND))
+    {
+        return false;
+    }
+
+    // if bAlt doesn't jump to bCur it can't be a better fall through than bCur
+    if (bAlt->bbJumpDest != bCur)
+    {
+        return false;
+    }
+
+    // Currently bNext is the fall through for bCur
+    BasicBlock* bNext = bCur->bbNext;
+    noway_assert(bNext != nullptr);
+
+    // We will set result to true if bAlt is a better fall through than bCur
+    bool result;
+    if (fgHaveValidEdgeWeights)
+    {
+        // We will compare the edge weight for our two choices
+        flowList* edgeFromAlt = fgGetPredForBlock(bCur, bAlt);
+        flowList* edgeFromCur = fgGetPredForBlock(bNext, bCur);
+        noway_assert(edgeFromCur != nullptr);
+        noway_assert(edgeFromAlt != nullptr);
+
+        result = (edgeFromAlt->flEdgeWeightMin > edgeFromCur->flEdgeWeightMax);
+    }
+    else
+    {
+        if (bAlt->bbJumpKind == BBJ_ALWAYS)
+        {
+            // Our result is true if bAlt's weight is more than bCur's weight
+            result = (bAlt->bbWeight > bCur->bbWeight);
+        }
+        else
+        {
+            noway_assert(bAlt->bbJumpKind == BBJ_COND);
+            // Our result is true if bAlt's weight is more than twice bCur's weight
+            result = (bAlt->bbWeight > (2 * bCur->bbWeight));
+        }
+    }
+    return result;
+}
+
+//------------------------------------------------------------------------
+// fgCheckEHCanInsertAfterBlock: Determine if a block can be inserted after
+// 'blk' and legally be put in the EH region specified by 'regionIndex'. This
+// can be true if the most nested region the block is in is already 'regionIndex',
+// as we'll just extend the most nested region (and any region ending at the same block).
+// It can also be true if it is the end of (a set of) EH regions, such that
+// inserting the block and properly extending some EH regions (if necessary)
+// puts the block in the correct region. We only consider the case of extending
+// an EH region after 'blk' (that is, to include 'blk' and the newly insert block);
+// we don't consider inserting a block as the the first block of an EH region following 'blk'.
+//
+// Consider this example:
+//
+//      try3   try2   try1
+//      |---   |      |      BB01
+//      |      |---   |      BB02
+//      |      |      |---   BB03
+//      |      |      |      BB04
+//      |      |---   |---   BB05
+//      |                    BB06
+//      |-----------------   BB07
+//
+// Passing BB05 and try1/try2/try3 as the region to insert into (as well as putInTryRegion==true)
+// will all return 'true'. Here are the cases:
+// 1. Insert into try1: the most nested EH region BB05 is in is already try1, so we can insert after
+//    it and extend try1 (and try2).
+// 2. Insert into try2: we can extend try2, but leave try1 alone.
+// 3. Insert into try3: we can leave try1 and try2 alone, and put the new block just in try3. Note that
+//    in this case, after we "loop outwards" in the EH nesting, we get to a place where we're in the middle
+//    of the try3 region, not at the end of it.
+// In all cases, it is possible to put a block after BB05 and put it in any of these three 'try' regions legally.
+//
+// Filters are ignored; if 'blk' is in a filter, the answer will be false.
+//
+// Arguments:
+//    blk - the BasicBlock we are checking to see if we can insert after.
+//    regionIndex - the EH region we want to insert a block into. regionIndex is
+//          in the range [0..compHndBBtabCount]; 0 means "main method".
+//    putInTryRegion - 'true' if the new block should be inserted in the 'try' region of 'regionIndex'.
+//          For regionIndex 0 (the "main method"), this should be 'true'.
+//
+// Return Value:
+//    'true' if a block can be inserted after 'blk' and put in EH region 'regionIndex', else 'false'.
+//
+bool Compiler::fgCheckEHCanInsertAfterBlock(BasicBlock* blk, unsigned regionIndex, bool putInTryRegion)
+{
+    assert(blk != nullptr);
+    assert(regionIndex <= compHndBBtabCount);
+
+    if (regionIndex == 0)
+    {
+        assert(putInTryRegion);
+    }
+
+    bool     inTryRegion;
+    unsigned nestedRegionIndex = ehGetMostNestedRegionIndex(blk, &inTryRegion);
+
+    bool insertOK = true;
+    for (;;)
+    {
+        if (nestedRegionIndex == regionIndex)
+        {
+            // This block is in the region we want to be in. We can insert here if it's the right type of region.
+            // (If we want to be in the 'try' region, but the block is in the handler region, then inserting a
+            // new block after 'blk' can't put it in the 'try' region, and vice-versa, since we only consider
+            // extending regions after, not prepending to regions.)
+            // This check will be 'true' if we are trying to put something in the main function (as putInTryRegion
+            // must be 'true' if regionIndex is zero, and inTryRegion will also be 'true' if nestedRegionIndex is zero).
+            insertOK = (putInTryRegion == inTryRegion);
+            break;
+        }
+        else if (nestedRegionIndex == 0)
+        {
+            // The block is in the main function, but we want to put something in a nested region. We can't do that.
+            insertOK = false;
+            break;
+        }
+
+        assert(nestedRegionIndex > 0);
+        EHblkDsc* ehDsc = ehGetDsc(nestedRegionIndex - 1); // ehGetDsc uses [0..compHndBBtabCount) form.
+
+        if (inTryRegion)
+        {
+            if (blk != ehDsc->ebdTryLast)
+            {
+                // Not the last block? Then it must be somewhere else within the try region, so we can't insert here.
+                insertOK = false;
+                break; // exit the 'for' loop
+            }
+        }
+        else
+        {
+            // We ignore filters.
+            if (blk != ehDsc->ebdHndLast)
+            {
+                // Not the last block? Then it must be somewhere else within the handler region, so we can't insert
+                // here.
+                insertOK = false;
+                break; // exit the 'for' loop
+            }
+        }
+
+        // Things look good for this region; check the enclosing regions, if any.
+
+        nestedRegionIndex =
+            ehGetEnclosingRegionIndex(nestedRegionIndex - 1,
+                                      &inTryRegion); // ehGetEnclosingRegionIndex uses [0..compHndBBtabCount) form.
+
+        // Convert to [0..compHndBBtabCount] form.
+        nestedRegionIndex = (nestedRegionIndex == EHblkDsc::NO_ENCLOSING_INDEX) ? 0 : nestedRegionIndex + 1;
+    } // end of for(;;)
+
+    return insertOK;
+}
+
+//------------------------------------------------------------------------
+// Finds the block closest to endBlk in the range [startBlk..endBlk) after which a block can be
+// inserted easily. Note that endBlk cannot be returned; its predecessor is the last block that can
+// be returned. The new block will be put in an EH region described by the arguments regionIndex,
+// putInTryRegion, startBlk, and endBlk (explained below), so it must be legal to place to put the
+// new block after the insertion location block, give it the specified EH region index, and not break
+// EH nesting rules. This function is careful to choose a block in the correct EH region. However,
+// it assumes that the new block can ALWAYS be placed at the end (just before endBlk). That means
+// that the caller must ensure that is true.
+//
+// Below are the possible cases for the arguments to this method:
+//      1. putInTryRegion == true and regionIndex > 0:
+//         Search in the try region indicated by regionIndex.
+//      2. putInTryRegion == false and regionIndex > 0:
+//         a. If startBlk is the first block of a filter and endBlk is the block after the end of the
+//            filter (that is, the startBlk and endBlk match a filter bounds exactly), then choose a
+//            location within this filter region. (Note that, due to IL rules, filters do not have any
+//            EH nested within them.) Otherwise, filters are skipped.
+//         b. Else, search in the handler region indicated by regionIndex.
+//      3. regionIndex = 0:
+//         Search in the entire main method, excluding all EH regions. In this case, putInTryRegion must be true.
+//
+// This method makes sure to find an insertion point which would not cause the inserted block to
+// be put inside any inner try/filter/handler regions.
+//
+// The actual insertion occurs after the returned block. Note that the returned insertion point might
+// be the last block of a more nested EH region, because the new block will be inserted after the insertion
+// point, and will not extend the more nested EH region. For example:
+//
+//      try3   try2   try1
+//      |---   |      |      BB01
+//      |      |---   |      BB02
+//      |      |      |---   BB03
+//      |      |      |      BB04
+//      |      |---   |---   BB05
+//      |                    BB06
+//      |-----------------   BB07
+//
+// for regionIndex==try3, putInTryRegion==true, we might return BB05, even though BB05 will have a try index
+// for try1 (the most nested 'try' region the block is in). That's because when we insert after BB05, the new
+// block will be in the correct, desired EH region, since try1 and try2 regions will not be extended to include
+// the inserted block. Furthermore, for regionIndex==try2, putInTryRegion==true, we can also return BB05. In this
+// case, when the new block is inserted, the try1 region remains the same, but we need extend region 'try2' to
+// include the inserted block. (We also need to check all parent regions as well, just in case any parent regions
+// also end on the same block, in which case we would also need to extend the parent regions. This is standard
+// procedure when inserting a block at the end of an EH region.)
+//
+// If nearBlk is non-nullptr then we return the closest block after nearBlk that will work best.
+//
+// We try to find a block in the appropriate region that is not a fallthrough block, so we can insert after it
+// without the need to insert a jump around the inserted block.
+//
+// Note that regionIndex is numbered the same as BasicBlock::bbTryIndex and BasicBlock::bbHndIndex, that is, "0" is
+// "main method" and otherwise is +1 from normal, so we can call, e.g., ehGetDsc(tryIndex - 1).
+//
+// Arguments:
+//    regionIndex - the region index where the new block will be inserted. Zero means entire method;
+//          non-zero means either a "try" or a "handler" region, depending on what putInTryRegion says.
+//    putInTryRegion - 'true' to put the block in the 'try' region corresponding to 'regionIndex', 'false'
+//          to put the block in the handler region. Should be 'true' if regionIndex==0.
+//    startBlk - start block of range to search.
+//    endBlk - end block of range to search (don't include this block in the range). Can be nullptr to indicate
+//          the end of the function.
+//    nearBlk - If non-nullptr, try to find an insertion location closely after this block. If nullptr, we insert
+//          at the best location found towards the end of the acceptable block range.
+//    jumpBlk - When nearBlk is set, this can be set to the block which jumps to bNext->bbNext (TODO: need to review
+//    this?)
+//    runRarely - true if the block being inserted is expected to be rarely run. This helps determine
+//          the best place to put the new block, by putting in a place that has the same 'rarely run' characteristic.
+//
+// Return Value:
+//    A block with the desired characteristics, so the new block will be inserted after this one.
+//    If there is no suitable location, return nullptr. This should basically never happen.
+
+BasicBlock* Compiler::fgFindInsertPoint(unsigned    regionIndex,
+                                        bool        putInTryRegion,
+                                        BasicBlock* startBlk,
+                                        BasicBlock* endBlk,
+                                        BasicBlock* nearBlk,
+                                        BasicBlock* jumpBlk,
+                                        bool        runRarely)
+{
+    noway_assert(startBlk != nullptr);
+    noway_assert(startBlk != endBlk);
+    noway_assert((regionIndex == 0 && putInTryRegion) || // Search in the main method
+                 (putInTryRegion && regionIndex > 0 &&
+                  startBlk->bbTryIndex == regionIndex) || // Search in the specified try     region
+                 (!putInTryRegion && regionIndex > 0 &&
+                  startBlk->bbHndIndex == regionIndex)); // Search in the specified handler region
+
+#ifdef DEBUG
+    // Assert that startBlk precedes endBlk in the block list.
+    // We don't want to use bbNum to assert this condition, as we cannot depend on the block numbers being
+    // sequential at all times.
+    for (BasicBlock* b = startBlk; b != endBlk; b = b->bbNext)
+    {
+        assert(b != nullptr); // We reached the end of the block list, but never found endBlk.
+    }
+#endif // DEBUG
+
+    JITDUMP("fgFindInsertPoint(regionIndex=%u, putInTryRegion=%s, startBlk=BB%02u, endBlk=BB%02u, nearBlk=BB%02u, "
+            "jumpBlk=BB%02u, runRarely=%s)\n",
+            regionIndex, dspBool(putInTryRegion), startBlk->bbNum, (endBlk == nullptr) ? 0 : endBlk->bbNum,
+            (nearBlk == nullptr) ? 0 : nearBlk->bbNum, (jumpBlk == nullptr) ? 0 : jumpBlk->bbNum, dspBool(runRarely));
+
+    bool        reachedNear = false; // Have we reached 'nearBlk' in our search? If not, we'll keep searching.
+    bool        inFilter    = false; // Are we in a filter region that we need to skip?
+    BasicBlock* bestBlk =
+        nullptr; // Set to the best insertion point we've found so far that meets all the EH requirements.
+    BasicBlock* goodBlk =
+        nullptr; // Set to an acceptable insertion point that we'll use if we don't find a 'best' option.
+    BasicBlock* blk;
+
+    if (nearBlk != nullptr)
+    {
+        // Does the nearBlk precede the startBlk?
+        for (blk = nearBlk; blk != nullptr; blk = blk->bbNext)
+        {
+            if (blk == startBlk)
+            {
+                reachedNear = true;
+                break;
+            }
+            else if (blk == endBlk)
+            {
+                break;
+            }
+        }
+    }
+
+    for (blk = startBlk; blk != endBlk; blk = blk->bbNext)
+    {
+        // The only way (blk == nullptr) could be true is if the caller passed an endBlk that preceded startBlk in the
+        // block list, or if endBlk isn't in the block list at all. In DEBUG, we'll instead hit the similar
+        // well-formedness assert earlier in this function.
+        noway_assert(blk != nullptr);
+
+        if (blk == nearBlk)
+        {
+            reachedNear = true;
+        }
+
+        if (blk->bbCatchTyp == BBCT_FILTER)
+        {
+            // Record the fact that we entered a filter region, so we don't insert into filters...
+            // Unless the caller actually wanted the block inserted in this exact filter region.
+            // Detect this by the fact that startBlk and endBlk point to the filter begin and end.
+            if (putInTryRegion || (blk != startBlk) || (startBlk != ehGetDsc(regionIndex - 1)->ebdFilter) ||
+                (endBlk != ehGetDsc(regionIndex - 1)->ebdHndBeg))
+            {
+                inFilter = true;
+            }
+        }
+        else if (blk->bbCatchTyp == BBCT_FILTER_HANDLER)
+        {
+            // Record the fact that we exited a filter region.
+            inFilter = false;
+        }
+
+        // Don't insert a block inside this filter region.
+        if (inFilter)
+        {
+            continue;
+        }
+
+        // Note that the new block will be inserted AFTER "blk". We check to make sure that doing so
+        // would put the block in the correct EH region. We make an assumption here that you can
+        // ALWAYS insert the new block before "endBlk" (that is, at the end of the search range)
+        // and be in the correct EH region. This is must be guaranteed by the caller (as it is by
+        // fgNewBBinRegion(), which passes the search range as an exact EH region block range).
+        // Because of this assumption, we only check the EH information for blocks before the last block.
+        if (blk->bbNext != endBlk)
+        {
+            // We are in the middle of the search range. We can't insert the new block in
+            // an inner try or handler region. We can, however, set the insertion
+            // point to the last block of an EH try/handler region, if the enclosing
+            // region is the region we wish to insert in. (Since multiple regions can
+            // end at the same block, we need to search outwards, checking that the
+            // block is the last block of every EH region out to the region we want
+            // to insert in.) This is especially useful for putting a call-to-finally
+            // block on AMD64 immediately after its corresponding 'try' block, so in the
+            // common case, we'll just fall through to it. For example:
+            //
+            //      BB01
+            //      BB02 -- first block of try
+            //      BB03
+            //      BB04 -- last block of try
+            //      BB05 -- first block of finally
+            //      BB06
+            //      BB07 -- last block of handler
+            //      BB08
+            //
+            // Assume there is only one try/finally, so BB01 and BB08 are in the "main function".
+            // For AMD64 call-to-finally, we'll want to insert the BBJ_CALLFINALLY in
+            // the main function, immediately after BB04. This allows us to do that.
+
+            if (!fgCheckEHCanInsertAfterBlock(blk, regionIndex, putInTryRegion))
+            {
+                // Can't insert here.
+                continue;
+            }
+        }
+
+        // Look for an insert location:
+        // 1. We want blocks that don't end with a fall through,
+        // 2. Also, when blk equals nearBlk we may want to insert here.
+        if (!blk->bbFallsThrough() || (blk == nearBlk))
+        {
+            bool updateBestBlk = true; // We will probably update the bestBlk
+
+            // If blk falls through then we must decide whether to use the nearBlk
+            // hint
+            if (blk->bbFallsThrough())
+            {
+                noway_assert(blk == nearBlk);
+                if (jumpBlk != nullptr)
+                {
+                    updateBestBlk = fgIsBetterFallThrough(blk, jumpBlk);
+                }
+                else
+                {
+                    updateBestBlk = false;
+                }
+            }
+
+            // If we already have a best block, see if the 'runRarely' flags influences
+            // our choice. If we want a runRarely insertion point, and the existing best
+            // block is run rarely but the current block isn't run rarely, then don't
+            // update the best block.
+            // TODO-CQ: We should also handle the reverse case, where runRarely is false (we
+            // want a non-rarely-run block), but bestBlock->isRunRarely() is true. In that
+            // case, we should update the block, also. Probably what we want is:
+            //    (bestBlk->isRunRarely() != runRarely) && (blk->isRunRarely() == runRarely)
+            if (updateBestBlk && (bestBlk != nullptr) && runRarely && bestBlk->isRunRarely() && !blk->isRunRarely())
+            {
+                updateBestBlk = false;
+            }
+
+            if (updateBestBlk)
+            {
+                // We found a 'best' insertion location, so save it away.
+                bestBlk = blk;
+
+                // If we've reached nearBlk, we've satisfied all the criteria,
+                // so we're done.
+                if (reachedNear)
+                {
+                    goto DONE;
+                }
+
+                // If we haven't reached nearBlk, keep looking for a 'best' location, just
+                // in case we'll find one at or after nearBlk. If no nearBlk was specified,
+                // we prefer inserting towards the end of the given range, so keep looking
+                // for more acceptable insertion locations.
+            }
+        }
+
+        // No need to update goodBlk after we have set bestBlk, but we could still find a better
+        // bestBlk, so keep looking.
+        if (bestBlk != nullptr)
+        {
+            continue;
+        }
+
+        // Set the current block as a "good enough" insertion point, if it meets certain criteria.
+        // We'll return this block if we don't find a "best" block in the search range. The block
+        // can't be a BBJ_CALLFINALLY of a BBJ_CALLFINALLY/BBJ_ALWAYS pair (since we don't want
+        // to insert anything between these two blocks). Otherwise, we can use it. However,
+        // if we'd previously chosen a BBJ_COND block, then we'd prefer the "good" block to be
+        // something else. We keep updating it until we've reached the 'nearBlk', to push it as
+        // close to endBlk as possible.
+        if (!blk->isBBCallAlwaysPair())
+        {
+            if (goodBlk == nullptr)
+            {
+                goodBlk = blk;
+            }
+            else if ((goodBlk->bbJumpKind == BBJ_COND) || (blk->bbJumpKind != BBJ_COND))
+            {
+                if ((blk == nearBlk) || !reachedNear)
+                {
+                    goodBlk = blk;
+                }
+            }
+        }
+    }
+
+    // If we didn't find a non-fall_through block, then insert at the last good block.
+
+    if (bestBlk == nullptr)
+    {
+        bestBlk = goodBlk;
+    }
+
+DONE:;
+
+    return bestBlk;
+}
+
+//------------------------------------------------------------------------
+// Creates a new BasicBlock and inserts it in a specific EH region, given by 'tryIndex', 'hndIndex', and 'putInFilter'.
+//
+// If 'putInFilter' it true, then the block is inserted in the filter region given by 'hndIndex'. In this case, tryIndex
+// must be a less nested EH region (that is, tryIndex > hndIndex).
+//
+// Otherwise, the block is inserted in either the try region or the handler region, depending on which one is the inner
+// region. In other words, if the try region indicated by tryIndex is nested in the handler region indicated by
+// hndIndex,
+// then the new BB will be created in the try region. Vice versa.
+//
+// Note that tryIndex and hndIndex are numbered the same as BasicBlock::bbTryIndex and BasicBlock::bbHndIndex, that is,
+// "0" is "main method" and otherwise is +1 from normal, so we can call, e.g., ehGetDsc(tryIndex - 1).
+//
+// To be more specific, this function will create a new BB in one of the following 5 regions (if putInFilter is false):
+// 1. When tryIndex = 0 and hndIndex = 0:
+//    The new BB will be created in the method region.
+// 2. When tryIndex != 0 and hndIndex = 0:
+//    The new BB will be created in the try region indicated by tryIndex.
+// 3. When tryIndex == 0 and hndIndex != 0:
+//    The new BB will be created in the handler region indicated by hndIndex.
+// 4. When tryIndex != 0 and hndIndex != 0 and tryIndex < hndIndex:
+//    In this case, the try region is nested inside the handler region. Therefore, the new BB will be created
+//    in the try region indicated by tryIndex.
+// 5. When tryIndex != 0 and hndIndex != 0 and tryIndex > hndIndex:
+//    In this case, the handler region is nested inside the try region. Therefore, the new BB will be created
+//    in the handler region indicated by hndIndex.
+//
+// Note that if tryIndex != 0 and hndIndex != 0 then tryIndex must not be equal to hndIndex (this makes sense because
+// if they are equal, you are asking to put the new block in both the try and handler, which is impossible).
+//
+// The BasicBlock will not be inserted inside an EH region that is more nested than the requested tryIndex/hndIndex
+// region (so the function is careful to skip more nested EH regions when searching for a place to put the new block).
+//
+// This function cannot be used to insert a block as the first block of any region. It always inserts a block after
+// an existing block in the given region.
+//
+// If nearBlk is nullptr, or the block is run rarely, then the new block is assumed to be run rarely.
+//
+// Arguments:
+//    jumpKind - the jump kind of the new block to create.
+//    tryIndex - the try region to insert the new block in, described above. This must be a number in the range
+//               [0..compHndBBtabCount].
+//    hndIndex - the handler region to insert the new block in, described above. This must be a number in the range
+//               [0..compHndBBtabCount].
+//    nearBlk  - insert the new block closely after this block, if possible. If nullptr, put the new block anywhere
+//               in the requested region.
+//    putInFilter - put the new block in the filter region given by hndIndex, as described above.
+//    runRarely - 'true' if the new block is run rarely.
+//    insertAtEnd - 'true' if the block should be inserted at the end of the region. Note: this is currently only
+//                  implemented when inserting into the main function (not into any EH region).
+//
+// Return Value:
+//    The new block.
+
+BasicBlock* Compiler::fgNewBBinRegion(BBjumpKinds jumpKind,
+                                      unsigned    tryIndex,
+                                      unsigned    hndIndex,
+                                      BasicBlock* nearBlk,
+                                      bool        putInFilter /* = false */,
+                                      bool        runRarely /* = false */,
+                                      bool        insertAtEnd /* = false */)
+{
+    assert(tryIndex <= compHndBBtabCount);
+    assert(hndIndex <= compHndBBtabCount);
+
+    /* afterBlk is the block which will precede the newBB */
+    BasicBlock* afterBlk;
+
+    // start and end limit for inserting the block
+    BasicBlock* startBlk = nullptr;
+    BasicBlock* endBlk   = nullptr;
+
+    bool     putInTryRegion = true;
+    unsigned regionIndex    = 0;
+
+    // First, figure out which region (the "try" region or the "handler" region) to put the newBB in.
+    if ((tryIndex == 0) && (hndIndex == 0))
+    {
+        assert(!putInFilter);
+
+        endBlk = fgEndBBAfterMainFunction(); // don't put new BB in funclet region
+
+        if (insertAtEnd || (nearBlk == nullptr))
+        {
+            /* We'll just insert the block at the end of the method, before the funclets */
+
+            afterBlk = fgLastBBInMainFunction();
+            goto _FoundAfterBlk;
+        }
+        else
+        {
+            // We'll search through the entire method
+            startBlk = fgFirstBB;
+        }
+
+        noway_assert(regionIndex == 0);
+    }
+    else
+    {
+        noway_assert(tryIndex > 0 || hndIndex > 0);
+        PREFIX_ASSUME(tryIndex <= compHndBBtabCount);
+        PREFIX_ASSUME(hndIndex <= compHndBBtabCount);
+
+        // Decide which region to put in, the "try" region or the "handler" region.
+        if (tryIndex == 0)
+        {
+            noway_assert(hndIndex > 0);
+            putInTryRegion = false;
+        }
+        else if (hndIndex == 0)
+        {
+            noway_assert(tryIndex > 0);
+            noway_assert(putInTryRegion);
+            assert(!putInFilter);
+        }
+        else
+        {
+            noway_assert(tryIndex > 0 && hndIndex > 0 && tryIndex != hndIndex);
+            putInTryRegion = (tryIndex < hndIndex);
+        }
+
+        if (putInTryRegion)
+        {
+            // Try region is the inner region.
+            // In other words, try region must be nested inside the handler region.
+            noway_assert(hndIndex == 0 || bbInHandlerRegions(hndIndex - 1, ehGetDsc(tryIndex - 1)->ebdTryBeg));
+            assert(!putInFilter);
+        }
+        else
+        {
+            // Handler region is the inner region.
+            // In other words, handler region must be nested inside the try region.
+            noway_assert(tryIndex == 0 || bbInTryRegions(tryIndex - 1, ehGetDsc(hndIndex - 1)->ebdHndBeg));
+        }
+
+        // Figure out the start and end block range to search for an insertion location. Pick the beginning and
+        // ending blocks of the target EH region (the 'endBlk' is one past the last block of the EH region, to make
+        // loop iteration easier). Note that, after funclets have been created (for FEATURE_EH_FUNCLETS),
+        // this linear block range will not include blocks of handlers for try/handler clauses nested within
+        // this EH region, as those blocks have been extracted as funclets. That is ok, though, because we don't
+        // want to insert a block in any nested EH region.
+
+        if (putInTryRegion)
+        {
+            // We will put the newBB in the try region.
+            EHblkDsc* ehDsc = ehGetDsc(tryIndex - 1);
+            startBlk        = ehDsc->ebdTryBeg;
+            endBlk          = ehDsc->ebdTryLast->bbNext;
+            regionIndex     = tryIndex;
+        }
+        else if (putInFilter)
+        {
+            // We will put the newBB in the filter region.
+            EHblkDsc* ehDsc = ehGetDsc(hndIndex - 1);
+            startBlk        = ehDsc->ebdFilter;
+            endBlk          = ehDsc->ebdHndBeg;
+            regionIndex     = hndIndex;
+        }
+        else
+        {
+            // We will put the newBB in the handler region.
+            EHblkDsc* ehDsc = ehGetDsc(hndIndex - 1);
+            startBlk        = ehDsc->ebdHndBeg;
+            endBlk          = ehDsc->ebdHndLast->bbNext;
+            regionIndex     = hndIndex;
+        }
+
+        noway_assert(regionIndex > 0);
+    }
+
+    // Now find the insertion point.
+    afterBlk = fgFindInsertPoint(regionIndex, putInTryRegion, startBlk, endBlk, nearBlk, nullptr, runRarely);
+
+_FoundAfterBlk:;
+
+    /* We have decided to insert the block after 'afterBlk'. */
+    noway_assert(afterBlk != nullptr);
+
+    JITDUMP("fgNewBBinRegion(jumpKind=%u, tryIndex=%u, hndIndex=%u, putInFilter=%s, runRarely=%s, insertAtEnd=%s): "
+            "inserting after BB%02u\n",
+            jumpKind, tryIndex, hndIndex, dspBool(putInFilter), dspBool(runRarely), dspBool(insertAtEnd),
+            afterBlk->bbNum);
+
+    return fgNewBBinRegionWorker(jumpKind, afterBlk, regionIndex, putInTryRegion);
+}
+
+//------------------------------------------------------------------------
+// Creates a new BasicBlock and inserts it in the same EH region as 'srcBlk'.
+//
+// See the implementation of fgNewBBinRegion() used by this one for more notes.
+//
+// Arguments:
+//    jumpKind - the jump kind of the new block to create.
+//    srcBlk   - insert the new block in the same EH region as this block, and closely after it if possible.
+//
+// Return Value:
+//    The new block.
+
+BasicBlock* Compiler::fgNewBBinRegion(BBjumpKinds jumpKind,
+                                      BasicBlock* srcBlk,
+                                      bool        runRarely /* = false */,
+                                      bool        insertAtEnd /* = false */)
+{
+    assert(srcBlk != nullptr);
+
+    const unsigned tryIndex    = srcBlk->bbTryIndex;
+    const unsigned hndIndex    = srcBlk->bbHndIndex;
+    bool           putInFilter = false;
+
+    // Check to see if we need to put the new block in a filter. We do if srcBlk is in a filter.
+    // This can only be true if there is a handler index, and the handler region is more nested than the
+    // try region (if any). This is because no EH regions can be nested within a filter.
+    if (BasicBlock::ehIndexMaybeMoreNested(hndIndex, tryIndex))
+    {
+        assert(hndIndex != 0); // If hndIndex is more nested, we must be in some handler!
+        putInFilter = ehGetDsc(hndIndex - 1)->InFilterRegionBBRange(srcBlk);
+    }
+
+    return fgNewBBinRegion(jumpKind, tryIndex, hndIndex, srcBlk, putInFilter, runRarely, insertAtEnd);
+}
+
+//------------------------------------------------------------------------
+// Creates a new BasicBlock and inserts it at the end of the function.
+//
+// See the implementation of fgNewBBinRegion() used by this one for more notes.
+//
+// Arguments:
+//    jumpKind - the jump kind of the new block to create.
+//
+// Return Value:
+//    The new block.
+
+BasicBlock* Compiler::fgNewBBinRegion(BBjumpKinds jumpKind)
+{
+    return fgNewBBinRegion(jumpKind, 0, 0, nullptr, /* putInFilter */ false, /* runRarely */ false,
+                           /* insertAtEnd */ true);
+}
+
+//------------------------------------------------------------------------
+// Creates a new BasicBlock, and inserts it after 'afterBlk'.
+//
+// The block cannot be inserted into a more nested try/handler region than that specified by 'regionIndex'.
+// (It is given exactly 'regionIndex'.) Thus, the parameters must be passed to ensure proper EH nesting
+// rules are followed.
+//
+// Arguments:
+//    jumpKind - the jump kind of the new block to create.
+//    afterBlk - insert the new block after this one.
+//    regionIndex - the block will be put in this EH region.
+//    putInTryRegion - If true, put the new block in the 'try' region corresponding to 'regionIndex', and
+//          set its handler index to the most nested handler region enclosing that 'try' region.
+//          Otherwise, put the block in the handler region specified by 'regionIndex', and set its 'try'
+//          index to the most nested 'try' region enclosing that handler region.
+//
+// Return Value:
+//    The new block.
+
+BasicBlock* Compiler::fgNewBBinRegionWorker(BBjumpKinds jumpKind,
+                                            BasicBlock* afterBlk,
+                                            unsigned    regionIndex,
+                                            bool        putInTryRegion)
+{
+    /* Insert the new block */
+    BasicBlock* afterBlkNext = afterBlk->bbNext;
+    (void)afterBlkNext; // prevent "unused variable" error from GCC
+    BasicBlock* newBlk = fgNewBBafter(jumpKind, afterBlk, false);
+
+    if (putInTryRegion)
+    {
+        noway_assert(regionIndex <= MAX_XCPTN_INDEX);
+        newBlk->bbTryIndex = (unsigned short)regionIndex;
+        newBlk->bbHndIndex = bbFindInnermostHandlerRegionContainingTryRegion(regionIndex);
+    }
+    else
+    {
+        newBlk->bbTryIndex = bbFindInnermostTryRegionContainingHandlerRegion(regionIndex);
+        noway_assert(regionIndex <= MAX_XCPTN_INDEX);
+        newBlk->bbHndIndex = (unsigned short)regionIndex;
+    }
+
+    // We're going to compare for equal try regions (to handle the case of 'mutually protect'
+    // regions). We need to save off the current try region, otherwise we might change it
+    // before it gets compared later, thereby making future comparisons fail.
+
+    BasicBlock* newTryBeg;
+    BasicBlock* newTryLast;
+    (void)ehInitTryBlockRange(newBlk, &newTryBeg, &newTryLast);
+
+    unsigned  XTnum;
+    EHblkDsc* HBtab;
+
+    for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+    {
+        // Is afterBlk at the end of a try region?
+        if (HBtab->ebdTryLast == afterBlk)
+        {
+            noway_assert(afterBlkNext == newBlk->bbNext);
+
+            bool extendTryRegion = false;
+            if (newBlk->hasTryIndex())
+            {
+                // We're adding a block after the last block of some try region. Do
+                // we extend the try region to include the block, or not?
+                // If the try region is exactly the same as the try region
+                // associated with the new block (based on the block's try index,
+                // which represents the innermost try the block is a part of), then
+                // we extend it.
+                // If the try region is a "parent" try region -- an enclosing try region
+                // that has the same last block as the new block's try region -- then
+                // we also extend. For example:
+                //      try { // 1
+                //          ...
+                //          try { // 2
+                //          ...
+                //      } /* 2 */ } /* 1 */
+                // This example is meant to indicate that both try regions 1 and 2 end at
+                // the same block, and we're extending 2. Thus, we must also extend 1. If we
+                // only extended 2, we would break proper nesting. (Dev11 bug 137967)
+
+                extendTryRegion = HBtab->ebdIsSameTry(newTryBeg, newTryLast) || bbInTryRegions(XTnum, newBlk);
+            }
+
+            // Does newBlk extend this try region?
+            if (extendTryRegion)
+            {
+                // Yes, newBlk extends this try region
+
+                // newBlk is the now the new try last block
+                fgSetTryEnd(HBtab, newBlk);
+            }
+        }
+
+        // Is afterBlk at the end of a handler region?
+        if (HBtab->ebdHndLast == afterBlk)
+        {
+            noway_assert(afterBlkNext == newBlk->bbNext);
+
+            // Does newBlk extend this handler region?
+            bool extendHndRegion = false;
+            if (newBlk->hasHndIndex())
+            {
+                // We're adding a block after the last block of some handler region. Do
+                // we extend the handler region to include the block, or not?
+                // If the handler region is exactly the same as the handler region
+                // associated with the new block (based on the block's handler index,
+                // which represents the innermost handler the block is a part of), then
+                // we extend it.
+                // If the handler region is a "parent" handler region -- an enclosing
+                // handler region that has the same last block as the new block's handler
+                // region -- then we also extend. For example:
+                //      catch { // 1
+                //          ...
+                //          catch { // 2
+                //          ...
+                //      } /* 2 */ } /* 1 */
+                // This example is meant to indicate that both handler regions 1 and 2 end at
+                // the same block, and we're extending 2. Thus, we must also extend 1. If we
+                // only extended 2, we would break proper nesting. (Dev11 bug 372051)
+
+                extendHndRegion = bbInHandlerRegions(XTnum, newBlk);
+            }
+
+            if (extendHndRegion)
+            {
+                // Yes, newBlk extends this handler region
+
+                // newBlk is now the last block of the handler.
+                fgSetHndEnd(HBtab, newBlk);
+            }
+        }
+    }
+
+    /* If afterBlk falls through, we insert a jump around newBlk */
+    fgConnectFallThrough(afterBlk, newBlk->bbNext);
+
+#ifdef DEBUG
+    fgVerifyHandlerTab();
+#endif
+
+    return newBlk;
+}
+
+/*****************************************************************************
+ */
+
+/* static */
+unsigned Compiler::acdHelper(SpecialCodeKind codeKind)
+{
+    switch (codeKind)
+    {
+        case SCK_RNGCHK_FAIL:
+            return CORINFO_HELP_RNGCHKFAIL;
+#if COR_JIT_EE_VERSION > 460
+        case SCK_ARG_EXCPN:
+            return CORINFO_HELP_THROW_ARGUMENTEXCEPTION;
+        case SCK_ARG_RNG_EXCPN:
+            return CORINFO_HELP_THROW_ARGUMENTOUTOFRANGEEXCEPTION;
+#endif // COR_JIT_EE_VERSION
+        case SCK_DIV_BY_ZERO:
+            return CORINFO_HELP_THROWDIVZERO;
+        case SCK_ARITH_EXCPN:
+            return CORINFO_HELP_OVERFLOW;
+        default:
+            assert(!"Bad codeKind");
+            return 0;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Find/create an added code entry associated with the given block and with
+ *  the given kind.
+ */
+
+BasicBlock* Compiler::fgAddCodeRef(BasicBlock* srcBlk, unsigned refData, SpecialCodeKind kind, unsigned stkDepth)
+{
+    // Record that the code will call a THROW_HELPER
+    // so on Windows Amd64 we can allocate the 4 outgoing
+    // arg slots on the stack frame if there are no other calls.
+    compUsesThrowHelper = true;
+
+    // For debuggable code, genJumpToThrowHlpBlk() will generate the 'throw'
+    // code inline. It has to be kept consistent with fgAddCodeRef()
+    if (opts.compDbgCode)
+    {
+        return nullptr;
+    }
+
+    const static BBjumpKinds jumpKinds[] = {
+        BBJ_NONE,   // SCK_NONE
+        BBJ_THROW,  // SCK_RNGCHK_FAIL
+        BBJ_ALWAYS, // SCK_PAUSE_EXEC
+        BBJ_THROW,  // SCK_DIV_BY_ZERO
+        BBJ_THROW,  // SCK_ARITH_EXCP, SCK_OVERFLOW
+        BBJ_THROW,  // SCK_ARG_EXCPN
+        BBJ_THROW,  // SCK_ARG_RNG_EXCPN
+    };
+
+    noway_assert(sizeof(jumpKinds) == SCK_COUNT); // sanity check
+
+    /* First look for an existing entry that matches what we're looking for */
+
+    AddCodeDsc* add = fgFindExcptnTarget(kind, refData);
+
+    if (add) // found it
+    {
+#ifdef _TARGET_X86_
+        // If different range checks happen at different stack levels,
+        // they can't all jump to the same "call @rngChkFailed" AND have
+        // frameless methods, as the rngChkFailed may need to unwind the
+        // stack, and we have to be able to report the stack level.
+        //
+        // The following check forces most methods that reference an
+        // array element in a parameter list to have an EBP frame,
+        // this restriction could be removed with more careful code
+        // generation for BBJ_THROW (i.e. range check failed).
+        //
+        if (add->acdStkLvl != stkDepth)
+        {
+            codeGen->setFrameRequired(true);
+        }
+#endif // _TARGET_X86_
+
+        return add->acdDstBlk;
+    }
+
+    /* We have to allocate a new entry and prepend it to the list */
+
+    add            = new (this, CMK_Unknown) AddCodeDsc;
+    add->acdData   = refData;
+    add->acdKind   = kind;
+    add->acdStkLvl = (unsigned short)stkDepth;
+    noway_assert(add->acdStkLvl == stkDepth);
+    add->acdNext  = fgAddCodeList;
+    fgAddCodeList = add;
+
+    /* Create the target basic block */
+
+    BasicBlock* newBlk;
+
+    newBlk = add->acdDstBlk = fgNewBBinRegion(jumpKinds[kind], srcBlk, /* runRarely */ true, /* insertAtEnd */ true);
+
+    add->acdDstBlk->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        const char* msgWhere = "";
+        if (!srcBlk->hasTryIndex() && !srcBlk->hasHndIndex())
+        {
+            msgWhere = "non-EH region";
+        }
+        else if (!srcBlk->hasTryIndex())
+        {
+            msgWhere = "handler";
+        }
+        else if (!srcBlk->hasHndIndex())
+        {
+            msgWhere = "try";
+        }
+        else if (srcBlk->getTryIndex() < srcBlk->getHndIndex())
+        {
+            msgWhere = "try";
+        }
+        else
+        {
+            msgWhere = "handler";
+        }
+
+        const char* msg;
+        switch (kind)
+        {
+            case SCK_RNGCHK_FAIL:
+                msg = " for RNGCHK_FAIL";
+                break;
+            case SCK_PAUSE_EXEC:
+                msg = " for PAUSE_EXEC";
+                break;
+            case SCK_DIV_BY_ZERO:
+                msg = " for DIV_BY_ZERO";
+                break;
+            case SCK_OVERFLOW:
+                msg = " for OVERFLOW";
+                break;
+#if COR_JIT_EE_VERSION > 460
+            case SCK_ARG_EXCPN:
+                msg = " for ARG_EXCPN";
+                break;
+            case SCK_ARG_RNG_EXCPN:
+                msg = " for ARG_RNG_EXCPN";
+                break;
+#endif // COR_JIT_EE_VERSION
+            default:
+                msg = " for ??";
+                break;
+        }
+
+        printf("\nfgAddCodeRef -"
+               " Add BB in %s%s, new block BB%02u [%08p], stkDepth is %d\n",
+               msgWhere, msg, add->acdDstBlk->bbNum, dspPtr(add->acdDstBlk), stkDepth);
+    }
+#endif // DEBUG
+
+#ifdef DEBUG
+    newBlk->bbTgtStkDepth = stkDepth;
+#endif // DEBUG
+
+    /* Mark the block as added by the compiler and not removable by future flow
+       graph optimizations. Note that no bbJumpDest points to these blocks. */
+
+    newBlk->bbFlags |= BBF_IMPORTED;
+    newBlk->bbFlags |= BBF_DONT_REMOVE;
+
+    /* Remember that we're adding a new basic block */
+
+    fgAddCodeModf      = true;
+    fgRngChkThrowAdded = true;
+
+    /* Now figure out what code to insert */
+
+    GenTreeCall* tree;
+    int          helper = CORINFO_HELP_UNDEF;
+
+    switch (kind)
+    {
+        case SCK_RNGCHK_FAIL:
+            helper = CORINFO_HELP_RNGCHKFAIL;
+            break;
+
+        case SCK_DIV_BY_ZERO:
+            helper = CORINFO_HELP_THROWDIVZERO;
+            break;
+
+        case SCK_ARITH_EXCPN:
+            helper = CORINFO_HELP_OVERFLOW;
+            noway_assert(SCK_OVERFLOW == SCK_ARITH_EXCPN);
+            break;
+
+#if COR_JIT_EE_VERSION > 460
+        case SCK_ARG_EXCPN:
+            helper = CORINFO_HELP_THROW_ARGUMENTEXCEPTION;
+            break;
+
+        case SCK_ARG_RNG_EXCPN:
+            helper = CORINFO_HELP_THROW_ARGUMENTOUTOFRANGEEXCEPTION;
+            break;
+#endif // COR_JIT_EE_VERSION
+
+        // case SCK_PAUSE_EXEC:
+        //     noway_assert(!"add code to pause exec");
+
+        default:
+            noway_assert(!"unexpected code addition kind");
+            return nullptr;
+    }
+
+    noway_assert(helper != CORINFO_HELP_UNDEF);
+
+    // Add the appropriate helper call.
+    tree = gtNewHelperCallNode(helper, TYP_VOID, GTF_EXCEPT);
+
+    // There are no args here but fgMorphArgs has side effects
+    // such as setting the outgoing arg area (which is necessary
+    // on AMD if there are any calls).
+    tree = fgMorphArgs(tree);
+
+    // Store the tree in the new basic block.
+    assert(!srcBlk->isEmpty());
+    if (!srcBlk->IsLIR())
+    {
+        fgInsertStmtAtEnd(newBlk, fgNewStmtFromTree(tree));
+    }
+    else
+    {
+        LIR::AsRange(newBlk).InsertAtEnd(LIR::SeqTree(this, tree));
+    }
+
+    return add->acdDstBlk;
+}
+
+/*****************************************************************************
+ * Finds the block to jump to, to throw a given kind of exception
+ * We maintain a cache of one AddCodeDsc for each kind, to make searching fast.
+ * Note : Each block uses the same (maybe shared) block as the jump target for
+ * a given type of exception
+ */
+
+Compiler::AddCodeDsc* Compiler::fgFindExcptnTarget(SpecialCodeKind kind, unsigned refData)
+{
+    if (!(fgExcptnTargetCache[kind] && // Try the cached value first
+          fgExcptnTargetCache[kind]->acdData == refData))
+    {
+        // Too bad, have to search for the jump target for the exception
+
+        AddCodeDsc* add = nullptr;
+
+        for (add = fgAddCodeList; add != nullptr; add = add->acdNext)
+        {
+            if (add->acdData == refData && add->acdKind == kind)
+            {
+                break;
+            }
+        }
+
+        fgExcptnTargetCache[kind] = add; // Cache it
+    }
+
+    return fgExcptnTargetCache[kind];
+}
+
+/*****************************************************************************
+ *
+ *  The given basic block contains an array range check; return the label this
+ *  range check is to jump to upon failure.
+ */
+
+BasicBlock* Compiler::fgRngChkTarget(BasicBlock* block, unsigned stkDepth, SpecialCodeKind kind)
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*** Computing fgRngChkTarget for block BB%02u to stkDepth %d\n", block->bbNum, stkDepth);
+        if (!block->IsLIR())
+        {
+            gtDispTree(compCurStmt);
+        }
+    }
+#endif // DEBUG
+
+    /* We attach the target label to the containing try block (if any) */
+    noway_assert(!compIsForInlining());
+    return fgAddCodeRef(block, bbThrowIndex(block), kind, stkDepth);
+}
+
+// Sequences the tree.
+// prevTree is what gtPrev of the first node in execution order gets set to.
+// Returns the first node (execution order) in the sequenced tree.
+GenTree* Compiler::fgSetTreeSeq(GenTree* tree, GenTree* prevTree, bool isLIR)
+{
+    GenTree list;
+
+    if (prevTree == nullptr)
+    {
+        prevTree = &list;
+    }
+    fgTreeSeqLst = prevTree;
+    fgTreeSeqNum = 0;
+    fgTreeSeqBeg = nullptr;
+    fgSetTreeSeqHelper(tree, isLIR);
+
+    GenTree* result = prevTree->gtNext;
+    if (prevTree == &list)
+    {
+        list.gtNext->gtPrev = nullptr;
+    }
+
+    return result;
+}
+
+/*****************************************************************************
+ *
+ *  Assigns sequence numbers to the given tree and its sub-operands, and
+ *  threads all the nodes together via the 'gtNext' and 'gtPrev' fields.
+ *  Uses 'global' - fgTreeSeqLst
+ */
+
+void Compiler::fgSetTreeSeqHelper(GenTreePtr tree, bool isLIR)
+{
+    genTreeOps oper;
+    unsigned   kind;
+
+    noway_assert(tree);
+    assert(!IsUninitialized(tree));
+    noway_assert(tree->gtOper != GT_STMT);
+
+    /* Figure out what kind of a node we have */
+
+    oper = tree->OperGet();
+    kind = tree->OperKind();
+
+    /* Is this a leaf/constant node? */
+
+    if (kind & (GTK_CONST | GTK_LEAF))
+    {
+        fgSetTreeSeqFinish(tree, isLIR);
+        return;
+    }
+
+    // Special handling for dynamic block ops.
+    if (tree->OperIsDynBlkOp())
+    {
+        GenTreeDynBlk* dynBlk;
+        GenTree*       src;
+        GenTree*       asg = tree;
+        if (tree->OperGet() == GT_ASG)
+        {
+            dynBlk = tree->gtGetOp1()->AsDynBlk();
+            src    = tree->gtGetOp2();
+        }
+        else
+        {
+            dynBlk = tree->AsDynBlk();
+            src    = dynBlk->Data();
+            asg    = nullptr;
+        }
+        GenTree* sizeNode = dynBlk->gtDynamicSize;
+        GenTree* dstAddr  = dynBlk->Addr();
+        if (dynBlk->gtEvalSizeFirst)
+        {
+            fgSetTreeSeqHelper(sizeNode, isLIR);
+        }
+        if (tree->gtFlags & GTF_REVERSE_OPS)
+        {
+            fgSetTreeSeqHelper(src, isLIR);
+            fgSetTreeSeqHelper(dstAddr, isLIR);
+        }
+        else
+        {
+            fgSetTreeSeqHelper(dstAddr, isLIR);
+            fgSetTreeSeqHelper(src, isLIR);
+        }
+        if (!dynBlk->gtEvalSizeFirst)
+        {
+            fgSetTreeSeqHelper(sizeNode, isLIR);
+        }
+        fgSetTreeSeqFinish(dynBlk, isLIR);
+        if (asg != nullptr)
+        {
+            fgSetTreeSeqFinish(asg, isLIR);
+        }
+        return;
+    }
+
+    /* Is it a 'simple' unary/binary operator? */
+
+    if (kind & GTK_SMPOP)
+    {
+        GenTreePtr op1 = tree->gtOp.gtOp1;
+        GenTreePtr op2 = tree->gtGetOp2();
+
+        // Special handling for GT_LIST
+        if (tree->OperGet() == GT_LIST)
+        {
+            // First, handle the list items, which will be linked in forward order.
+            // As we go, we will link the GT_LIST nodes in reverse order - we will number
+            // them and update fgTreeSeqList in a subsequent traversal.
+            GenTreePtr nextList = tree;
+            GenTreePtr list     = nullptr;
+            while (nextList != nullptr && nextList->OperGet() == GT_LIST)
+            {
+                list                = nextList;
+                GenTreePtr listItem = list->gtOp.gtOp1;
+                fgSetTreeSeqHelper(listItem, isLIR);
+                nextList = list->gtOp.gtOp2;
+                if (nextList != nullptr)
+                {
+                    nextList->gtNext = list;
+                }
+                list->gtPrev = nextList;
+            }
+            // Next, handle the GT_LIST nodes.
+            // Note that fgSetTreeSeqFinish() sets the gtNext to null, so we need to capture the nextList
+            // before we call that method.
+            nextList = list;
+            do
+            {
+                assert(list != nullptr);
+                list     = nextList;
+                nextList = list->gtNext;
+                fgSetTreeSeqFinish(list, isLIR);
+            } while (list != tree);
+            return;
+        }
+
+        /* Special handling for AddrMode */
+        if (tree->OperIsAddrMode())
+        {
+            bool reverse = ((tree->gtFlags & GTF_REVERSE_OPS) != 0);
+            if (reverse)
+            {
+                assert(op1 != nullptr && op2 != nullptr);
+                fgSetTreeSeqHelper(op2, isLIR);
+            }
+            if (op1 != nullptr)
+            {
+                fgSetTreeSeqHelper(op1, isLIR);
+            }
+            if (!reverse && op2 != nullptr)
+            {
+                fgSetTreeSeqHelper(op2, isLIR);
+            }
+
+            fgSetTreeSeqFinish(tree, isLIR);
+            return;
+        }
+
+        /* Check for a nilary operator */
+
+        if (op1 == nullptr)
+        {
+            noway_assert(op2 == nullptr);
+            fgSetTreeSeqFinish(tree, isLIR);
+            return;
+        }
+
+        /* Is this a unary operator?
+         * Although UNARY GT_IND has a special structure */
+
+        if (oper == GT_IND)
+        {
+            /* Visit the indirection first - op2 may point to the
+             * jump Label for array-index-out-of-range */
+
+            fgSetTreeSeqHelper(op1, isLIR);
+            fgSetTreeSeqFinish(tree, isLIR);
+            return;
+        }
+
+        /* Now this is REALLY a unary operator */
+
+        if (!op2)
+        {
+            /* Visit the (only) operand and we're done */
+
+            fgSetTreeSeqHelper(op1, isLIR);
+            fgSetTreeSeqFinish(tree, isLIR);
+            return;
+        }
+
+        /*
+           For "real" ?: operators, we make sure the order is
+           as follows:
+
+               condition
+               1st operand
+               GT_COLON
+               2nd operand
+               GT_QMARK
+        */
+
+        if (oper == GT_QMARK)
+        {
+            noway_assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
+
+            fgSetTreeSeqHelper(op1, isLIR);
+            // Here, for the colon, the sequence does not actually represent "order of evaluation":
+            // one or the other of the branches is executed, not both.  Still, to make debugging checks
+            // work, we want the sequence to match the order in which we'll generate code, which means
+            // "else" clause then "then" clause.
+            fgSetTreeSeqHelper(op2->AsColon()->ElseNode(), isLIR);
+            fgSetTreeSeqHelper(op2, isLIR);
+            fgSetTreeSeqHelper(op2->AsColon()->ThenNode(), isLIR);
+
+            fgSetTreeSeqFinish(tree, isLIR);
+            return;
+        }
+
+        if (oper == GT_COLON)
+        {
+            fgSetTreeSeqFinish(tree, isLIR);
+            return;
+        }
+
+        /* This is a binary operator */
+
+        if (tree->gtFlags & GTF_REVERSE_OPS)
+        {
+            fgSetTreeSeqHelper(op2, isLIR);
+            fgSetTreeSeqHelper(op1, isLIR);
+        }
+        else
+        {
+            fgSetTreeSeqHelper(op1, isLIR);
+            fgSetTreeSeqHelper(op2, isLIR);
+        }
+
+        fgSetTreeSeqFinish(tree, isLIR);
+        return;
+    }
+
+    /* See what kind of a special operator we have here */
+
+    switch (oper)
+    {
+        case GT_FIELD:
+            noway_assert(tree->gtField.gtFldObj == nullptr);
+            break;
+
+        case GT_CALL:
+
+            /* We'll evaluate the 'this' argument value first */
+            if (tree->gtCall.gtCallObjp)
+            {
+                fgSetTreeSeqHelper(tree->gtCall.gtCallObjp, isLIR);
+            }
+
+            /* We'll evaluate the arguments next, left to right
+             * NOTE: setListOrder needs cleanup - eliminate the #ifdef afterwards */
+
+            if (tree->gtCall.gtCallArgs)
+            {
+                fgSetTreeSeqHelper(tree->gtCall.gtCallArgs, isLIR);
+            }
+
+            /* Evaluate the temp register arguments list
+             * This is a "hidden" list and its only purpose is to
+             * extend the life of temps until we make the call */
+
+            if (tree->gtCall.gtCallLateArgs)
+            {
+                fgSetTreeSeqHelper(tree->gtCall.gtCallLateArgs, isLIR);
+            }
+
+            if ((tree->gtCall.gtCallType == CT_INDIRECT) && (tree->gtCall.gtCallCookie != nullptr))
+            {
+                fgSetTreeSeqHelper(tree->gtCall.gtCallCookie, isLIR);
+            }
+
+            if (tree->gtCall.gtCallType == CT_INDIRECT)
+            {
+                fgSetTreeSeqHelper(tree->gtCall.gtCallAddr, isLIR);
+            }
+
+            if (tree->gtCall.gtControlExpr)
+            {
+                fgSetTreeSeqHelper(tree->gtCall.gtControlExpr, isLIR);
+            }
+
+            break;
+
+        case GT_ARR_ELEM:
+
+            fgSetTreeSeqHelper(tree->gtArrElem.gtArrObj, isLIR);
+
+            unsigned dim;
+            for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+            {
+                fgSetTreeSeqHelper(tree->gtArrElem.gtArrInds[dim], isLIR);
+            }
+
+            break;
+
+        case GT_ARR_OFFSET:
+            fgSetTreeSeqHelper(tree->gtArrOffs.gtOffset, isLIR);
+            fgSetTreeSeqHelper(tree->gtArrOffs.gtIndex, isLIR);
+            fgSetTreeSeqHelper(tree->gtArrOffs.gtArrObj, isLIR);
+            break;
+
+        case GT_CMPXCHG:
+            // Evaluate the trees left to right
+            fgSetTreeSeqHelper(tree->gtCmpXchg.gtOpLocation, isLIR);
+            fgSetTreeSeqHelper(tree->gtCmpXchg.gtOpValue, isLIR);
+            fgSetTreeSeqHelper(tree->gtCmpXchg.gtOpComparand, isLIR);
+            break;
+
+        case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+        case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+            // Evaluate the trees left to right
+            fgSetTreeSeqHelper(tree->gtBoundsChk.gtArrLen, isLIR);
+            fgSetTreeSeqHelper(tree->gtBoundsChk.gtIndex, isLIR);
+            break;
+
+        case GT_STORE_DYN_BLK:
+        case GT_DYN_BLK:
+            noway_assert(!"DYN_BLK nodes should be sequenced as a special case");
+            break;
+
+        default:
+#ifdef DEBUG
+            gtDispTree(tree);
+            noway_assert(!"unexpected operator");
+#endif // DEBUG
+            break;
+    }
+
+    fgSetTreeSeqFinish(tree, isLIR);
+}
+
+void Compiler::fgSetTreeSeqFinish(GenTreePtr tree, bool isLIR)
+{
+    // If we are sequencing a node that does not appear in LIR,
+    // do not add it to the list.
+    if (isLIR && (((tree->OperGet() == GT_LIST) && !tree->AsArgList()->IsAggregate()) || tree->OperGet() == GT_ARGPLACE))
+    {
+        return;
+    }
+
+    /* Append to the node list */
+    ++fgTreeSeqNum;
+
+#ifdef DEBUG
+    tree->gtSeqNum = fgTreeSeqNum;
+
+    if (verbose & 0)
+    {
+        printf("SetTreeOrder: ");
+        printTreeID(fgTreeSeqLst);
+        printf(" followed by ");
+        printTreeID(tree);
+        printf("\n");
+    }
+#endif // DEBUG
+
+    fgTreeSeqLst->gtNext = tree;
+    tree->gtNext         = nullptr;
+    tree->gtPrev         = fgTreeSeqLst;
+    fgTreeSeqLst         = tree;
+
+    /* Remember the very first node */
+
+    if (!fgTreeSeqBeg)
+    {
+        fgTreeSeqBeg = tree;
+        assert(tree->gtSeqNum == 1);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Figure out the order in which operators should be evaluated, along with
+ *  other information (such as the register sets trashed by each subtree).
+ *  Also finds blocks that need GC polls and inserts them as needed.
+ */
+
+void Compiler::fgSetBlockOrder()
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In fgSetBlockOrder()\n");
+    }
+#endif // DEBUG
+
+#ifdef DEBUG
+    BasicBlock::s_nMaxTrees = 0;
+#endif
+
+    /* Walk the basic blocks to assign sequence numbers */
+
+    /* If we don't compute the doms, then we never mark blocks as loops. */
+    if (fgDomsComputed)
+    {
+        for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+        {
+            /* If this block is a loop header, mark it appropriately */
+
+            if (block->isLoopHead())
+            {
+                fgMarkLoopHead(block);
+            }
+        }
+    }
+    // only enable fully interruptible code for if we're hijacking.
+    else if (GCPOLL_NONE == opts.compGCPollType)
+    {
+        /* If we don't have the dominators, use an abbreviated test for fully interruptible.  If there are
+         * any back edges, check the source and destination blocks to see if they're GC Safe.  If not, then
+         * go fully interruptible. */
+
+        /* XXX Mon 1/21/2008
+         * Wouldn't it be nice to have a block iterator that can do this loop?
+         */
+        for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+        {
+// true if the edge is forward, or if it is a back edge and either the source and dest are GC safe.
+#define EDGE_IS_GC_SAFE(src, dst)                                                                                      \
+    (((src)->bbNum < (dst)->bbNum) || (((src)->bbFlags | (dst)->bbFlags) & BBF_GC_SAFE_POINT))
+
+            bool partiallyInterruptible = true;
+            switch (block->bbJumpKind)
+            {
+                case BBJ_COND:
+                case BBJ_ALWAYS:
+                    partiallyInterruptible = EDGE_IS_GC_SAFE(block, block->bbJumpDest);
+                    break;
+
+                case BBJ_SWITCH:
+
+                    unsigned jumpCnt;
+                    jumpCnt = block->bbJumpSwt->bbsCount;
+                    BasicBlock** jumpPtr;
+                    jumpPtr = block->bbJumpSwt->bbsDstTab;
+
+                    do
+                    {
+                        partiallyInterruptible &= EDGE_IS_GC_SAFE(block, *jumpPtr);
+                    } while (++jumpPtr, --jumpCnt);
+
+                    break;
+
+                default:
+                    break;
+            }
+
+            if (!partiallyInterruptible)
+            {
+                // DDB 204533:
+                // The GC encoding for fully interruptible methods does not
+                // support more than 1023 pushed arguments, so we can't set
+                // genInterruptible here when we have 1024 or more pushed args
+                //
+                if (compCanEncodePtrArgCntMax())
+                {
+                    genInterruptible = true;
+                }
+                break;
+            }
+#undef EDGE_IS_GC_SAFE
+        }
+    }
+
+    if (!fgGCPollsCreated)
+    {
+        fgCreateGCPolls();
+    }
+
+    for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+    {
+
+#if FEATURE_FASTTAILCALL
+#ifndef JIT32_GCENCODER
+        if (block->endsWithTailCallOrJmp(this, true) && !(block->bbFlags & BBF_GC_SAFE_POINT) &&
+            optReachWithoutCall(fgFirstBB, block))
+        {
+            // We have a tail call that is reachable without making any other
+            // 'normal' call that would have counted as a GC Poll.  If we were
+            // using polls, all return blocks meeting this criteria would have
+            // already added polls and then marked as being GC safe
+            // (BBF_GC_SAFE_POINT). Thus we can only reach here when *NOT*
+            // using GC polls, but instead relying on the JIT to generate
+            // fully-interruptible code.
+            noway_assert(GCPOLL_NONE == opts.compGCPollType);
+
+            // This tail call might combine with other tail calls to form a
+            // loop.  Thus we need to either add a poll, or make the method
+            // fully interruptible.  I chose the later because that's what
+            // JIT64 does.
+            genInterruptible = true;
+        }
+#endif // !JIT32_GCENCODER
+#endif // FEATURE_FASTTAILCALL
+
+        fgSetBlockOrder(block);
+    }
+
+    /* Remember that now the tree list is threaded */
+
+    fgStmtListThreaded = true;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("The biggest BB has %4u tree nodes\n", BasicBlock::s_nMaxTrees);
+    }
+    fgDebugCheckLinks();
+#endif // DEBUG
+}
+
+/*****************************************************************************/
+
+void Compiler::fgSetStmtSeq(GenTreePtr tree)
+{
+    GenTree list; // helper node that we use to start the StmtList
+                  // It's located in front of the first node in the list
+
+    noway_assert(tree->gtOper == GT_STMT);
+
+    /* Assign numbers and next/prev links for this tree */
+
+    fgTreeSeqNum = 0;
+    fgTreeSeqLst = &list;
+    fgTreeSeqBeg = nullptr;
+
+    fgSetTreeSeqHelper(tree->gtStmt.gtStmtExpr, false);
+
+    /* Record the address of the first node */
+
+    tree->gtStmt.gtStmtList = fgTreeSeqBeg;
+
+#ifdef DEBUG
+
+    if (list.gtNext->gtPrev != &list)
+    {
+        printf("&list ");
+        printTreeID(&list);
+        printf(" != list.next->prev ");
+        printTreeID(list.gtNext->gtPrev);
+        printf("\n");
+        goto BAD_LIST;
+    }
+
+    GenTreePtr temp;
+    GenTreePtr last;
+    for (temp = list.gtNext, last = &list; temp; last = temp, temp = temp->gtNext)
+    {
+        if (temp->gtPrev != last)
+        {
+            printTreeID(temp);
+            printf("->gtPrev = ");
+            printTreeID(temp->gtPrev);
+            printf(", but last = ");
+            printTreeID(last);
+            printf("\n");
+
+        BAD_LIST:;
+
+            printf("\n");
+            gtDispTree(tree->gtStmt.gtStmtExpr);
+            printf("\n");
+
+            for (GenTreePtr bad = &list; bad; bad = bad->gtNext)
+            {
+                printf("  entry at ");
+                printTreeID(bad);
+                printf(" (prev=");
+                printTreeID(bad->gtPrev);
+                printf(",next=)");
+                printTreeID(bad->gtNext);
+                printf("\n");
+            }
+
+            printf("\n");
+            noway_assert(!"Badly linked tree");
+            break;
+        }
+    }
+#endif // DEBUG
+
+    /* Fix the first node's 'prev' link */
+
+    noway_assert(list.gtNext->gtPrev == &list);
+    list.gtNext->gtPrev = nullptr;
+
+#ifdef DEBUG
+    /* Keep track of the highest # of tree nodes */
+
+    if (BasicBlock::s_nMaxTrees < fgTreeSeqNum)
+    {
+        BasicBlock::s_nMaxTrees = fgTreeSeqNum;
+    }
+#endif // DEBUG
+}
+
+/*****************************************************************************/
+
+void Compiler::fgSetBlockOrder(BasicBlock* block)
+{
+    GenTreePtr tree;
+
+    tree = block->bbTreeList;
+    if (!tree)
+    {
+        return;
+    }
+
+    for (;;)
+    {
+        fgSetStmtSeq(tree);
+
+        /* Are there any more trees in this basic block? */
+
+        if (tree->gtNext == nullptr)
+        {
+            /* last statement in the tree list */
+            noway_assert(block->lastStmt() == tree);
+            break;
+        }
+
+#ifdef DEBUG
+        if (block->bbTreeList == tree)
+        {
+            /* first statement in the list */
+            noway_assert(tree->gtPrev->gtNext == nullptr);
+        }
+        else
+        {
+            noway_assert(tree->gtPrev->gtNext == tree);
+        }
+
+        noway_assert(tree->gtNext->gtPrev == tree);
+#endif // DEBUG
+
+        tree = tree->gtNext;
+    }
+}
+
+#ifdef LEGACY_BACKEND
+//------------------------------------------------------------------------
+// fgOrderBlockOps: Get the execution order for a block assignment
+//
+// Arguments:
+//    tree    - The block assignment
+//    reg0    - The register for the destination
+//    reg1    - The register for the source
+//    reg2    - The register for the size
+//    opsPtr  - An array of 3 GenTreePtr's, an out argument for the operands, in order
+//    regsPtr - An array of three regMaskTP - an out argument for the registers, in order
+//
+// Return Value:
+//    The return values go into the arrays that are passed in, and provide the
+//    operands and associated registers, in execution order.
+//
+// Notes:
+//    This method is somewhat convoluted in order to preserve old behavior from when
+//    block assignments had their dst and src in a GT_LIST as op1, and their size as op2.
+//    The old tree was like this:
+//                                tree->gtOp
+//                               /        \
+//                           GT_LIST  [size/clsHnd]
+//                           /      \
+//                       [dest]     [val/src]
+//
+//    The new tree looks like this:
+//                                GT_ASG
+//                               /       \
+//                           blk/obj   [val/src]
+//                           /      \
+//                    [destAddr]     [*size/clsHnd] *only for GT_DYN_BLK
+//
+//    For the (usual) case of GT_BLK or GT_OBJ, the size is always "evaluated" (i.e.
+//    instantiated into a register) last. In those cases, the GTF_REVERSE_OPS flag
+//    on the assignment works as usual.          
+//    In order to preserve previous possible orderings, the order for evaluating
+//    the size of a GT_DYN_BLK node is controlled by its gtEvalSizeFirst flag. If
+//    that is set, the size is evaluated first, and then the src and dst are evaluated
+//    according to the GTF_REVERSE_OPS flag on the assignment.
+
+void Compiler::fgOrderBlockOps(GenTreePtr  tree,
+                               regMaskTP   reg0,
+                               regMaskTP   reg1,
+                               regMaskTP   reg2,
+                               GenTreePtr* opsPtr,  // OUT
+                               regMaskTP*  regsPtr) // OUT
+{
+    assert(tree->OperIsBlkOp());
+
+    GenTreeBlk* destBlk     = tree->gtOp.gtOp1->AsBlk();
+    GenTreePtr  destAddr    = destBlk->Addr();
+    GenTreePtr  srcPtrOrVal = tree->gtOp.gtOp2;
+    if (tree->OperIsCopyBlkOp())
+    {
+        assert(srcPtrOrVal->OperIsIndir());
+        srcPtrOrVal = srcPtrOrVal->AsIndir()->Addr();
+    }
+    GenTreePtr sizeNode = (destBlk->gtOper == GT_DYN_BLK) ? destBlk->AsDynBlk()->gtDynamicSize : nullptr;
+    noway_assert((sizeNode != nullptr) || ((destBlk->gtFlags & GTF_REVERSE_OPS) == 0));
+    assert(destAddr != nullptr);
+    assert(srcPtrOrVal != nullptr);
+
+    GenTreePtr ops[3] = {
+        destAddr,    // Dest address
+        srcPtrOrVal, // Val / Src address
+        sizeNode     // Size of block
+    };
+
+    regMaskTP regs[3] = {reg0, reg1, reg2};
+
+    static int blockOpsOrder[4][3] =
+        //                destBlk->gtEvalSizeFirst |       tree->gtFlags
+        {
+            //            -------------------------+----------------------------
+            {0, 1, 2}, //          false           |              -
+            {2, 0, 1}, //          true            |              -
+            {1, 0, 2}, //          false           |       GTF_REVERSE_OPS
+            {2, 1, 0}  //          true            |       GTF_REVERSE_OPS
+        };
+
+    int orderNum = ((destBlk->gtFlags & GTF_REVERSE_OPS) != 0) * 1 + ((tree->gtFlags & GTF_REVERSE_OPS) != 0) * 2;
+
+    assert(orderNum < 4);
+
+    int* order = blockOpsOrder[orderNum];
+
+    PREFIX_ASSUME(order != NULL);
+
+    // Fill in the OUT arrays according to the order we have selected
+
+    opsPtr[0] = ops[order[0]];
+    opsPtr[1] = ops[order[1]];
+    opsPtr[2] = ops[order[2]];
+
+    regsPtr[0] = regs[order[0]];
+    regsPtr[1] = regs[order[1]];
+    regsPtr[2] = regs[order[2]];
+}
+#endif // LEGACY_BACKEND
+
+//------------------------------------------------------------------------
+// fgGetFirstNode: Get the first node in the tree, in execution order
+//
+// Arguments:
+//    tree - The top node of the tree of interest
+//
+// Return Value:
+//    The first node in execution order, that belongs to tree.
+//
+// Assumptions:
+//     'tree' must either be a leaf, or all of its constituent nodes must be contiguous
+//     in execution order.
+//     TODO-Cleanup: Add a debug-only method that verifies this.
+
+/* static */
+GenTreePtr Compiler::fgGetFirstNode(GenTreePtr tree)
+{
+    GenTreePtr child = tree;
+    while (child->NumChildren() > 0)
+    {
+        if (child->OperIsBinary() && child->IsReverseOp())
+        {
+            child = child->GetChild(1);
+        }
+        else
+        {
+            child = child->GetChild(0);
+        }
+    }
+    return child;
+}
+
+// Examine the bbTreeList and return the estimated code size for this block
+unsigned Compiler::fgGetCodeEstimate(BasicBlock* block)
+{
+    unsigned costSz = 0; // estimate of blocks code size cost
+
+    switch (block->bbJumpKind)
+    {
+        case BBJ_NONE:
+            costSz = 0;
+            break;
+        case BBJ_ALWAYS:
+        case BBJ_EHCATCHRET:
+        case BBJ_LEAVE:
+        case BBJ_COND:
+            costSz = 2;
+            break;
+        case BBJ_CALLFINALLY:
+            costSz = 5;
+            break;
+        case BBJ_SWITCH:
+            costSz = 10;
+            break;
+        case BBJ_THROW:
+            costSz = 1; // We place a int3 after the code for a throw block
+            break;
+        case BBJ_EHFINALLYRET:
+        case BBJ_EHFILTERRET:
+            costSz = 1;
+            break;
+        case BBJ_RETURN: // return from method
+            costSz = 3;
+            break;
+        default:
+            noway_assert(!"Bad bbJumpKind");
+            break;
+    }
+
+    GenTreePtr tree = block->FirstNonPhiDef();
+    if (tree)
+    {
+        do
+        {
+            noway_assert(tree->gtOper == GT_STMT);
+
+            if (tree->gtCostSz < MAX_COST)
+            {
+                costSz += tree->gtCostSz;
+            }
+            else
+            {
+                // We could walk the tree to find out the real gtCostSz,
+                // but just using MAX_COST for this trees code size works OK
+                costSz += tree->gtCostSz;
+            }
+
+            tree = tree->gtNext;
+        } while (tree);
+    }
+
+    return costSz;
+}
+
+#if DUMP_FLOWGRAPHS
+
+struct escapeMapping_t
+{
+    char        ch;
+    const char* sub;
+};
+
+// clang-format off
+static escapeMapping_t s_EscapeFileMapping[] =
+{
+    {':', "="},
+    {'<', "["},
+    {'>', "]"},
+    {';', "~semi~"},
+    {'|', "~bar~"},
+    {'&', "~amp~"},
+    {'"', "~quot~"},
+    {'*', "~star~"},
+    {0, nullptr}
+};
+
+static escapeMapping_t s_EscapeMapping[] =
+{
+    {'<', "&lt;"},
+    {'>', "&gt;"},
+    {'&', "&amp;"},
+    {'"', "&quot;"},
+    {0, nullptr}
+};
+// clang-formt on
+
+const char*   Compiler::fgProcessEscapes(const char* nameIn, escapeMapping_t* map)
+{
+    const char*  nameOut = nameIn;
+    unsigned     lengthOut;
+    unsigned     index;
+    bool         match;
+    bool         subsitutionRequired;
+    const char*  pChar;
+
+    lengthOut = 1;
+    subsitutionRequired = false;
+    pChar = nameIn;
+    while (*pChar != '\0')
+    {
+        match = false;
+        index = 0;
+        while (map[index].ch != 0)
+        {
+            if (*pChar == map[index].ch)
+            {
+                match = true;
+                break;
+            }
+            index++;
+        }
+        if (match)
+        {
+            subsitutionRequired = true;
+            lengthOut += (unsigned)strlen(map[index].sub);
+        }
+        else
+        {
+            lengthOut += 1;
+        }
+        pChar++;
+    }
+
+    if (subsitutionRequired)
+    {
+        char*   newName = (char*) compGetMemA(lengthOut, CMK_DebugOnly);
+        char*   pDest;
+        pDest = newName;
+        pChar = nameIn;
+        while (*pChar != '\0')
+        {
+            match = false;
+            index = 0;
+            while (map[index].ch != 0)
+            {
+                if (*pChar == map[index].ch)
+                {
+                    match = true;
+                    break;
+                }
+                index++;
+            }
+            if (match)
+            {
+                strcpy(pDest, map[index].sub);
+                pDest += strlen(map[index].sub);
+            }
+            else
+            {
+                *pDest++ = *pChar;
+            }
+            pChar++;
+        }
+        *pDest++ = '\0';
+        nameOut = (const char*) newName;
+    }
+
+    return nameOut;
+}
+
+static void fprintfDouble(FILE* fgxFile, double value)
+{
+    assert(value >= 0.0);
+
+    if ((value >= 0.010) || (value == 0.0))
+    {
+        fprintf(fgxFile, "\"%7.3f\"", value);
+    }
+    else if (value >= 0.00010)
+    {
+        fprintf(fgxFile, "\"%7.5f\"", value);
+    }
+    else
+    {
+        fprintf(fgxFile, "\"%7E\"", value);
+    }
+}
+
+//------------------------------------------------------------------------
+// fgOpenFlowGraphFile: Open a file to dump either the xml or dot format flow graph
+//
+// Arguments:
+//    wbDontClose - A boolean out argument that indicates whether the caller should close the file
+//    phase       - A phase identifier to indicate which phase is associated with the dump
+//    type        - A (wide) string indicating the type of dump, "dot" or "xml"
+//
+// Return Value:
+//    Opens a file to which a flowgraph can be dumped, whose name is based on the current
+//    config vales.
+
+FILE*              Compiler::fgOpenFlowGraphFile(bool*  wbDontClose, Phases phase, LPCWSTR type)
+{
+    FILE*          fgxFile;
+    LPCWSTR        pattern  = nullptr;
+    LPCWSTR        filename = nullptr;
+    LPCWSTR        pathname = nullptr;
+    const char*    escapedString;
+    bool           createDuplicateFgxFiles = true;
+
+#ifdef DEBUG
+    if (opts.eeFlags & CORJIT_FLG_PREJIT)
+    {
+        pattern = JitConfig.NgenDumpFg();
+        filename = JitConfig.NgenDumpFgFile();
+        pathname = JitConfig.NgenDumpFgDir();
+    }
+    else
+    {
+        pattern = JitConfig.JitDumpFg();
+        filename = JitConfig.JitDumpFgFile();
+        pathname = JitConfig.JitDumpFgDir();
+    }
+#endif // DEBUG
+
+    if (fgBBcount <= 1) {
+        return nullptr;
+}
+
+    if (pattern == nullptr) {
+        return nullptr;
+}
+
+    if (wcslen(pattern) == 0) {
+        return nullptr;
+}
+
+    LPCWSTR phasePattern = JitConfig.JitDumpFgPhase();
+    LPCWSTR phaseName = PhaseShortNames[phase];
+    if (phasePattern == nullptr)
+    {
+        if (phase != PHASE_DETERMINE_FIRST_COLD_BLOCK)
+        {
+            return nullptr;
+        }
+    }
+    else if (*phasePattern != W('*'))
+    {
+        if (wcsstr(phasePattern, phaseName) == nullptr)
+        {
+            return nullptr;
+        }
+    }
+
+    if (*pattern != W('*'))
+    {
+        bool hasColon = (wcschr(pattern, W(':')) != nullptr);
+
+        if (hasColon)
+        {
+            const char* className = info.compClassName;
+            if (*pattern == W('*'))
+            {
+                pattern++;
+            }
+            else
+            {
+                while ((*pattern != W(':')) && (*pattern != W('*')))
+                {
+                    if (*pattern != *className) {
+                        return nullptr;
+}
+
+                    pattern++;
+                    className++;
+                }
+                if (*pattern == W('*'))
+                {
+                    pattern++;
+                }
+                else
+                {
+                    if (*className != 0) {
+                        return nullptr;
+}
+                }
+                }
+            if (*pattern != W(':')) {
+                return nullptr;
+            }
+
+            pattern++;
+        }
+
+        const char* methodName = info.compMethodName;
+        if (*pattern == W('*'))
+        {
+            pattern++;
+        }
+        else
+        {
+            while ((*pattern != 0) && (*pattern != W('*')))
+            {
+                if (*pattern != *methodName) {
+                    return nullptr;
+}
+
+                pattern++;
+                methodName++;
+            }
+            if (*pattern == W('*'))
+            {
+                pattern++;
+            }
+            else
+            {
+                if (*methodName != 0) {
+                    return nullptr;
+}
+            }
+            }
+        if (*pattern != 0) {
+            return nullptr;
+        }
+    }
+
+    if (filename == nullptr)
+    {
+        filename = W("default");
+    }
+
+    if (wcscmp(filename, W("profiled")) == 0)
+    {
+        if ((fgFirstBB->bbFlags & BBF_PROF_WEIGHT) != 0)
+        {
+            createDuplicateFgxFiles = true;
+            goto ONE_FILE_PER_METHOD;
+        }
+        else
+        {
+            return nullptr;
+        }
+    }
+    if (wcscmp(filename, W("hot")) == 0)
+    {
+        if (info.compMethodInfo->regionKind == CORINFO_REGION_HOT)
+
+        {
+            createDuplicateFgxFiles = true;
+            goto ONE_FILE_PER_METHOD;
+        }
+        else
+        {
+            return nullptr;
+        }
+    }
+    else if (wcscmp(filename, W("cold")) == 0)
+    {
+        if (info.compMethodInfo->regionKind == CORINFO_REGION_COLD)
+        {
+            createDuplicateFgxFiles = true;
+            goto ONE_FILE_PER_METHOD;
+        }
+        else
+        {
+            return nullptr;
+        }
+    }
+    else if (wcscmp(filename, W("jit")) == 0)
+    {
+        if (info.compMethodInfo->regionKind == CORINFO_REGION_JIT)
+        {
+            createDuplicateFgxFiles = true;
+            goto ONE_FILE_PER_METHOD;
+        }
+        else
+        {
+            return nullptr;
+        }
+    }
+    else if (wcscmp(filename, W("all")) == 0)
+    {
+        createDuplicateFgxFiles = true;
+
+ONE_FILE_PER_METHOD:;
+
+        escapedString = fgProcessEscapes(info.compFullName, s_EscapeFileMapping);
+        size_t wCharCount = strlen(escapedString) + wcslen(phaseName) + 1 + strlen("~999") + wcslen(type) + 1;
+        if (pathname != nullptr)
+        {
+            wCharCount += wcslen(pathname) + 1;
+        }
+        filename = (LPCWSTR) alloca(wCharCount * sizeof(WCHAR));
+        if (pathname != nullptr)
+        {
+            swprintf_s((LPWSTR)filename, wCharCount, W("%s\\%S-%s.%s"), pathname, escapedString, phaseName, type);
+        }
+        else
+        {
+            swprintf_s((LPWSTR)filename, wCharCount, W("%S.%s"), escapedString, type);
+        }
+        fgxFile = _wfopen(filename, W("r"));   // Check if this file already exists
+        if (fgxFile != nullptr)
+        {
+            // For Generic methods we will have both hot and cold versions
+            if (createDuplicateFgxFiles == false)
+            {
+                fclose(fgxFile);
+                return nullptr;
+            }
+            // Yes, this filename already exists, so create a different one by appending ~2, ~3, etc...
+            for (int i = 2; i < 1000; i++)
+            {
+                fclose(fgxFile);
+                if (pathname != nullptr)
+                {
+                    swprintf_s((LPWSTR)filename, wCharCount, W("%s\\%S~%d.%s"), pathname, escapedString, i, type);
+                }
+                else
+                {
+                    swprintf_s((LPWSTR)filename, wCharCount, W("%S~%d.%s"), escapedString, i, type);
+                }
+                fgxFile = _wfopen(filename, W("r"));   // Check if this file exists
+                if (fgxFile == nullptr) {
+                    break;
+            }
+            }
+            // If we have already created 1000 files with this name then just fail
+            if (fgxFile != nullptr)
+            {
+                fclose(fgxFile);
+                return nullptr;
+            }
+        }
+        fgxFile = _wfopen(filename, W("a+"));
+        *wbDontClose = false;
+    }
+    else if (wcscmp(filename, W("stdout")) == 0)
+    {
+        fgxFile = jitstdout;
+        *wbDontClose = true;
+    }
+    else if (wcscmp(filename, W("stderr")) == 0)
+    {
+        fgxFile = stderr;
+        *wbDontClose = true;
+    }
+    else
+    {
+        LPCWSTR origFilename = filename;
+        size_t wCharCount = wcslen(origFilename) + wcslen(type) + 2;
+        if (pathname != nullptr)
+        {
+            wCharCount += wcslen(pathname) + 1;
+        }
+        filename = (LPCWSTR) alloca(wCharCount * sizeof(WCHAR));
+        if (pathname != nullptr)
+        {
+            swprintf_s((LPWSTR)filename, wCharCount, W("%s\\%s.%s"), pathname, origFilename, type);
+        }
+        else
+        {
+            swprintf_s((LPWSTR)filename, wCharCount, W("%s.%s"), origFilename, type);
+        }
+        fgxFile = _wfopen(filename, W("a+"));
+        *wbDontClose = false;
+    }
+
+    return fgxFile;
+}
+
+//------------------------------------------------------------------------
+// fgDumpFlowGraph: Dump the xml or dot format flow graph, if enabled for this phase.
+//
+// Arguments:
+//    phase       - A phase identifier to indicate which phase is associated with the dump,
+//                  i.e. which phase has just completed.
+//
+// Return Value:
+//    True iff a flowgraph has been dumped.
+//
+// Notes:
+//    The xml dumps are the historical mechanism for dumping the flowgraph.
+//    The dot format can be viewed by:
+//    - Graphviz (http://www.graphviz.org/)
+//      - The command "C:\Program Files (x86)\Graphviz2.38\bin\dot.exe" -Tsvg -oFoo.svg -Kdot Foo.dot
+//        will produce a Foo.svg file that can be opened with any svg-capable browser (e.g. IE).
+//    - http://rise4fun.com/Agl/
+//      - Cut and paste the graph from your .dot file, replacing the digraph on the page, and then click the play
+//        button.
+//      - It will show a rotating '/' and then render the graph in the browser.
+//    MSAGL has also been open-sourced to https://github.com/Microsoft/automatic-graph-layout.git.
+//
+//    Here are the config values that control it:
+//      COMPlus_JitDumpFg       A string (ala the COMPlus_JitDump string) indicating what methods to dump flowgraphs
+//                              for.
+//      COMPlus_JitDumpFgDir    A path to a directory into which the flowgraphs will be dumped.
+//      COMPlus_JitDumpFgFile   The filename to use. The default is "default.[xml|dot]".
+//                              Note that the new graphs will be appended to this file if it already exists.
+//      COMPlus_JitDumpFgPhase  Phase(s) after which to dump the flowgraph.
+//                              Set to the short name of a phase to see the flowgraph after that phase.
+//                              Leave unset to dump after COLD-BLK (determine first cold block) or set to * for all
+//                              phases.
+//      COMPlus_JitDumpFgDot    Set to non-zero to emit Dot instead of Xml Flowgraph dump. (Default is xml format.)
+
+bool               Compiler::fgDumpFlowGraph(Phases phase)
+{
+    bool    result    = false;
+    bool    dontClose = false;
+    bool    createDotFile = false;
+    if (JitConfig.JitDumpFgDot())
+    {
+        createDotFile = true;
+    }
+            
+    FILE*   fgxFile   = fgOpenFlowGraphFile(&dontClose, phase, createDotFile ? W("dot") : W("fgx"));
+
+    if (fgxFile == nullptr)
+    {
+        return false;
+    }
+    bool           validWeights  = fgHaveValidEdgeWeights;
+    unsigned       calledCount   = max(fgCalledWeight, BB_UNITY_WEIGHT) / BB_UNITY_WEIGHT;
+    double         weightDivisor = (double) (calledCount * BB_UNITY_WEIGHT);
+    const char*    escapedString;
+    const char*    regionString  = "NONE";
+
+    if      (info.compMethodInfo->regionKind == CORINFO_REGION_HOT)
+    {
+        regionString="HOT";
+    }
+    else if (info.compMethodInfo->regionKind == CORINFO_REGION_COLD)
+    {
+        regionString="COLD";
+    }
+    else if (info.compMethodInfo->regionKind == CORINFO_REGION_JIT)
+    {
+        regionString="JIT";
+    }
+
+    if (createDotFile)
+    {
+        fprintf(fgxFile, "digraph %s\n{\n", info.compMethodName);
+        fprintf(fgxFile, "/* Method %d, after phase %s */", Compiler::jitTotalMethodCompiled, PhaseNames[phase]);
+    }
+    else
+    {
+        fprintf(fgxFile,   "<method");
+
+        escapedString = fgProcessEscapes(info.compFullName, s_EscapeMapping);
+        fprintf(fgxFile, "\n    name=\"%s\"", escapedString);
+
+        escapedString = fgProcessEscapes(info.compClassName, s_EscapeMapping);
+        fprintf(fgxFile, "\n    className=\"%s\"", escapedString);
+
+        escapedString = fgProcessEscapes(info.compMethodName, s_EscapeMapping);
+        fprintf(fgxFile, "\n    methodName=\"%s\"", escapedString);
+        fprintf(fgxFile, "\n    ngenRegion=\"%s\"", regionString);
+
+        fprintf(fgxFile, "\n    bytesOfIL=\"%d\"", info.compILCodeSize);
+        fprintf(fgxFile, "\n    localVarCount=\"%d\"", lvaCount);
+
+        if (fgHaveProfileData())
+        {
+            fprintf(fgxFile, "\n    calledCount=\"%d\"", calledCount);
+            fprintf(fgxFile, "\n    profileData=\"true\"");
+        }
+        if (compHndBBtabCount > 0)
+        {
+            fprintf(fgxFile, "\n    hasEHRegions=\"true\"");
+        }
+        if (fgHasLoops)
+        {
+            fprintf(fgxFile, "\n    hasLoops=\"true\"");
+        }
+        if (validWeights)
+        {
+            fprintf(fgxFile, "\n    validEdgeWeights=\"true\"");
+            if (!fgSlopUsedInEdgeWeights && !fgRangeUsedInEdgeWeights)
+            {
+                fprintf(fgxFile, "\n    exactEdgeWeights=\"true\"");
+            }
+        }
+        if (fgFirstColdBlock != nullptr)
+        {
+            fprintf(fgxFile, "\n    firstColdBlock=\"%d\"", fgFirstColdBlock->bbNum);
+        }
+
+        fprintf(fgxFile,        ">");
+
+        fprintf(fgxFile, "\n    <blocks");
+        fprintf(fgxFile, "\n        blockCount=\"%d\"", fgBBcount);
+        fprintf(fgxFile,            ">");
+    }
+
+    static const char* kindImage[] = { "EHFINALLYRET", "EHFILTERRET", "EHCATCHRET", 
+                                       "THROW", "RETURN", "NONE", "ALWAYS", "LEAVE",
+                                       "CALLFINALLY", "COND", "SWITCH" };
+
+    BasicBlock* block;
+    unsigned    blockOrdinal;
+    for (block = fgFirstBB    , blockOrdinal = 1;
+         block != nullptr;
+         block = block->bbNext, blockOrdinal++)
+    {
+        if (createDotFile)
+        {
+            // Add constraint edges to try to keep nodes ordered.
+            // It seems to work best if these edges are all created first.
+            switch(block->bbJumpKind)
+            {
+            case BBJ_COND:
+            case BBJ_NONE:
+                assert(block->bbNext != nullptr);
+                fprintf(fgxFile, "    BB%02u -> BB%02u\n", block->bbNum, block->bbNext->bbNum);
+                break;
+            default:
+                // These may or may not have an edge to the next block.
+                // Add a transparent edge to keep nodes ordered.
+                if (block->bbNext != nullptr)
+                {
+                    fprintf(fgxFile, "    BB%02u -> BB%02u [arrowtail=none,color=transparent]\n", block->bbNum, block->bbNext->bbNum);
+                }
+            }
+        }
+        else
+        {
+            fprintf(fgxFile,"\n        <block");
+            fprintf(fgxFile,"\n            id=\"%d\"", block->bbNum);
+            fprintf(fgxFile,"\n            ordinal=\"%d\"", blockOrdinal);
+            fprintf(fgxFile,"\n            jumpKind=\"%s\"", kindImage[block->bbJumpKind]);
+            if (block->hasTryIndex())
+            {
+                fprintf(fgxFile,"\n            inTry=\"%s\"", "true");
+            }
+            if (block->hasHndIndex())
+            {
+                fprintf(fgxFile,"\n            inHandler=\"%s\"", "true");
+            }
+            if (((fgFirstBB->bbFlags & BBF_PROF_WEIGHT) != 0) &&
+                ((block->bbFlags     & BBF_COLD)        == 0)    )
+            {
+                fprintf(fgxFile,"\n            hot=\"true\"");
+            }
+            if (block->bbFlags & (BBF_HAS_NEWOBJ | BBF_HAS_NEWARRAY))
+            {
+                fprintf(fgxFile,"\n            callsNew=\"true\"");
+            }
+            if (block->bbFlags & BBF_LOOP_HEAD)
+            {
+                fprintf(fgxFile,"\n            loopHead=\"true\"");
+            }
+            fprintf(fgxFile,"\n            weight=");
+            fprintfDouble(fgxFile, ((double) block->bbWeight) / weightDivisor);
+            fprintf(fgxFile,"\n            codeEstimate=\"%d\"", fgGetCodeEstimate(block));
+            fprintf(fgxFile,"\n            startOffset=\"%d\"", block->bbCodeOffs);
+            fprintf(fgxFile,"\n            endOffset=\"%d\"", block->bbCodeOffsEnd);
+            fprintf(fgxFile,               ">");
+            fprintf(fgxFile,"\n        </block>");
+        }
+    }
+
+    if (!createDotFile)
+    {
+        fprintf(fgxFile, "\n    </blocks>");
+
+        fprintf(fgxFile, "\n    <edges");
+        fprintf(fgxFile, "\n        edgeCount=\"%d\"", fgEdgeCount);
+        fprintf(fgxFile,            ">");
+    }
+
+    unsigned edgeNum = 1;
+    BasicBlock* bTarget;
+    for (bTarget = fgFirstBB; bTarget != nullptr; bTarget = bTarget->bbNext)
+    {
+        double targetWeightDivisor;
+        if (bTarget->bbWeight == BB_ZERO_WEIGHT)
+        {
+            targetWeightDivisor = 1.0;
+        }
+        else
+        {
+            targetWeightDivisor = (double) bTarget->bbWeight;
+        }
+
+        flowList* edge;
+        for (edge = bTarget->bbPreds; edge != nullptr; edge = edge->flNext, edgeNum++)
+        {
+            BasicBlock*  bSource = edge->flBlock;
+            double       sourceWeightDivisor;
+            if (bSource->bbWeight == BB_ZERO_WEIGHT)
+            {
+                sourceWeightDivisor = 1.0;
+            }
+            else
+            {
+                sourceWeightDivisor = (double) bSource->bbWeight;
+            }
+            if (createDotFile)
+            {
+                // Don't duplicate the edges we added above.
+                if ((bSource->bbNum == (bTarget->bbNum - 1)) &&
+                    ((bSource->bbJumpKind == BBJ_NONE) || (bSource->bbJumpKind == BBJ_COND)))
+                {
+                    continue;
+                }
+                fprintf(fgxFile, "    BB%02u -> BB%02u", bSource->bbNum, bTarget->bbNum);
+                if ((bSource->bbNum > bTarget->bbNum))
+                {
+                    fprintf(fgxFile, "[arrowhead=normal,arrowtail=none,color=green]\n");
+                }
+                else
+                {
+                    fprintf(fgxFile, "\n");
+                }
+            }
+            else
+            {
+                fprintf(fgxFile,"\n        <edge");
+                fprintf(fgxFile,"\n            id=\"%d\"", edgeNum);
+                fprintf(fgxFile,"\n            source=\"%d\"", bSource->bbNum);
+                fprintf(fgxFile,"\n            target=\"%d\"", bTarget->bbNum);
+                if (bSource->bbJumpKind == BBJ_SWITCH)
+                {
+                    if (edge->flDupCount >= 2)
+                    {
+                        fprintf(fgxFile,"\n            switchCases=\"%d\"", edge->flDupCount);
+                    }
+                    if (bSource->bbJumpSwt->getDefault() == bTarget)
+                    {
+                        fprintf(fgxFile,"\n            switchDefault=\"true\"");
+                    }
+                }
+                if (validWeights)
+                {
+                    unsigned edgeWeight = (edge->flEdgeWeightMin + edge->flEdgeWeightMax) / 2;
+                    fprintf(fgxFile,"\n            weight=");
+                    fprintfDouble(fgxFile, ((double) edgeWeight) / weightDivisor);
+
+                    if (edge->flEdgeWeightMin != edge->flEdgeWeightMax)
+                    {
+                        fprintf(fgxFile,"\n            minWeight=");
+                        fprintfDouble(fgxFile, ((double) edge->flEdgeWeightMin) / weightDivisor);
+                        fprintf(fgxFile,"\n            maxWeight=");
+                        fprintfDouble(fgxFile, ((double) edge->flEdgeWeightMax) / weightDivisor);
+                    }
+
+                    if (edgeWeight > 0)
+                    {
+                        if (edgeWeight < bSource->bbWeight)
+                        {
+                            fprintf(fgxFile,"\n            out=");
+                            fprintfDouble(fgxFile, ((double) edgeWeight) / sourceWeightDivisor );
+                        }
+                        if (edgeWeight < bTarget->bbWeight)
+                        {
+                            fprintf(fgxFile,"\n            in=");
+                            fprintfDouble(fgxFile, ((double) edgeWeight) / targetWeightDivisor);
+                        }
+                    }
+                }
+            }
+            if (!createDotFile)
+            {
+                fprintf(fgxFile,               ">");
+                fprintf(fgxFile,"\n        </edge>");
+            }
+        }
+    }
+    if (createDotFile)
+    {
+        fprintf(fgxFile, "}\n");
+    }
+    else
+    {
+        fprintf(fgxFile, "\n    </edges>");
+        fprintf(fgxFile, "\n</method>\n");
+    }
+
+    if (dontClose)
+    {
+        // fgxFile is jitstdout or stderr
+        fprintf(fgxFile, "\n");
+    }
+    else
+    {
+        fclose(fgxFile);
+    }
+
+    return result;
+}
+
+#endif // DUMP_FLOWGRAPHS
+
+/*****************************************************************************/
+#ifdef DEBUG
+
+void                Compiler::fgDispReach()
+{
+    printf("------------------------------------------------\n");
+    printf("BBnum  Reachable by \n");
+    printf("------------------------------------------------\n");
+
+    for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+    {
+        printf("BB%02u : ", block->bbNum);
+        BLOCKSET_ITER_INIT(this, iter, block->bbReach, bbNum);
+        while (iter.NextElem(this, &bbNum))
+        {
+            printf("BB%02u ", bbNum);
+        }
+        printf("\n");
+    }
+}
+
+void                Compiler::fgDispDoms()
+{
+    // Don't bother printing this when we have a large number of BasicBlocks in the method
+    if (fgBBcount > 256)
+    {
+        return;
+    }
+
+    printf("------------------------------------------------\n");
+    printf("BBnum  Dominated by\n");
+    printf("------------------------------------------------\n");
+
+    for (unsigned i = 1; i <= fgBBNumMax; ++i)
+    {
+        BasicBlock* current = fgBBInvPostOrder[i];
+        printf("BB%02u:  ", current->bbNum);
+        while (current != current->bbIDom)
+        {
+            printf("BB%02u ", current->bbNum);
+            current = current->bbIDom;
+        }
+        printf("\n");
+    }
+}
+
+/*****************************************************************************/
+
+void                Compiler::fgTableDispBasicBlock(BasicBlock* block,
+                                                    int ibcColWidth /* = 0 */)
+{
+    unsigned        flags = block->bbFlags;
+
+    unsigned bbNumMax = compIsForInlining() ? impInlineInfo->InlinerCompiler->fgBBNumMax : fgBBNumMax;
+    int maxBlockNumWidth = CountDigits(bbNumMax);
+    maxBlockNumWidth = max(maxBlockNumWidth, 2);
+    int blockNumWidth = CountDigits(block->bbNum);
+    blockNumWidth = max(blockNumWidth, 2);
+    int blockNumPadding = maxBlockNumWidth - blockNumWidth;
+
+    printf("BB%02u%*s [%08p] %2u",
+           block->bbNum,
+           blockNumPadding, "",
+           dspPtr(block),
+           block->bbRefs);
+
+    //
+    // Display EH 'try' region index
+    //
+
+    if (block->hasTryIndex())
+    {
+        printf(" %2u", block->getTryIndex());
+    }
+    else
+    {
+        printf("   ");
+    }
+
+    //
+    // Display EH handler region index
+    //
+
+    if (block->hasHndIndex())
+    {
+        printf(" %2u", block->getHndIndex());
+    }
+    else
+    {
+        printf("   ");
+    }
+
+    printf(" ");
+
+    //
+    // Display block predecessor list
+    //
+
+    unsigned charCnt;
+    if (fgCheapPredsValid)
+    {
+        charCnt = block->dspCheapPreds();
+    }
+    else
+    {
+        charCnt = block->dspPreds();
+    }
+
+    if (charCnt < 19)
+    {
+        printf("%*s", 19 - charCnt, "");
+    }
+
+    printf(" ");
+
+    //
+    // Display block weight
+    //
+
+    if (block->isMaxBBWeight())
+    {
+        printf(" MAX  ");
+    }
+    else
+    {
+        printf("%6s", refCntWtd2str(block->getBBWeight(this)));
+    }
+
+    //
+    // Display optional IBC weight column.
+    // Note that iColWidth includes one character for a leading space, if there is an IBC column.
+    //
+
+    if (ibcColWidth > 0)
+    {
+        if (block->bbFlags & BBF_PROF_WEIGHT)
+        {
+            printf("%*u", ibcColWidth, block->bbWeight);
+        }
+        else
+        {
+            // No IBC data. Just print spaces to align the column.
+            printf("%*s", ibcColWidth, "");
+        }
+    }
+
+    printf(" ");
+
+    //
+    // Display block IL range
+    //
+
+    block->dspBlockILRange();
+
+    //
+    // Display block branch target
+    //
+
+    if  (flags & BBF_REMOVED)
+    {
+        printf(  "[removed]       ");
+    }
+    else
+    {
+        switch (block->bbJumpKind)
+        {
+        case BBJ_COND:
+            printf("-> BB%02u%*s ( cond )", block->bbJumpDest->bbNum, maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), "");
+            break;
+
+        case BBJ_CALLFINALLY:
+            printf("-> BB%02u%*s (callf )", block->bbJumpDest->bbNum, maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), "");
+            break;
+
+        case BBJ_ALWAYS:
+            if (flags & BBF_KEEP_BBJ_ALWAYS)
+            {
+                printf("-> BB%02u%*s (ALWAYS)", block->bbJumpDest->bbNum, maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), "");
+            }
+            else
+            {
+                printf("-> BB%02u%*s (always)", block->bbJumpDest->bbNum, maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), "");
+            }
+            break;
+
+        case BBJ_LEAVE:
+            printf("-> BB%02u%*s (leave )", block->bbJumpDest->bbNum, maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), "");
+            break;
+
+        case BBJ_EHFINALLYRET:
+            printf(  "%*s        (finret)", maxBlockNumWidth - 2, "");
+            break;
+
+        case BBJ_EHFILTERRET:
+            printf(  "%*s        (fltret)", maxBlockNumWidth - 2, "");
+            break;
+
+        case BBJ_EHCATCHRET:
+            printf("-> BB%02u%*s ( cret )", block->bbJumpDest->bbNum, maxBlockNumWidth - max(CountDigits(block->bbJumpDest->bbNum), 2), "");
+            break;
+
+        case BBJ_THROW:
+            printf(  "%*s        (throw )", maxBlockNumWidth - 2, "");
+            break;
+
+        case BBJ_RETURN:
+            printf(  "%*s        (return)", maxBlockNumWidth - 2, "");
+            break;
+
+        default:
+            printf(  "%*s                ", maxBlockNumWidth - 2, "");
+            break;
+
+        case BBJ_SWITCH:
+            printf("->");
+
+            unsigned        jumpCnt;
+                            jumpCnt = block->bbJumpSwt->bbsCount;
+            BasicBlock**    jumpTab;
+                            jumpTab = block->bbJumpSwt->bbsDstTab;
+            int             switchWidth;
+                            switchWidth = 0;
+            do
+            {
+                printf("%cBB%02u",
+                       (jumpTab == block->bbJumpSwt->bbsDstTab) ? ' ' : ',',
+                       (*jumpTab)->bbNum);
+                switchWidth += 1 /* space/comma */ + 2 /* BB */ + max(CountDigits((*jumpTab)->bbNum), 2);
+            }
+            while (++jumpTab, --jumpCnt);
+
+            if (switchWidth < 7)
+            {
+                printf("%*s", 8 - switchWidth, "");
+            }
+
+            printf(" (switch)");
+            break;
+        }
+    }
+
+    printf(" ");
+
+    //
+    // Display block EH region and type, including nesting indicator
+    //
+
+    if (block->hasTryIndex())
+    {
+        printf("T%d ", block->getTryIndex());
+    }
+    else
+    {
+        printf("   ");
+    }
+
+    if (block->hasHndIndex())
+    {
+        printf("H%d ", block->getHndIndex());
+    }
+    else
+    {
+        printf("   ");
+    }
+
+    if (flags & BBF_FUNCLET_BEG)
+    {
+        printf("F ");
+    }
+    else
+    {
+        printf("  ");
+    }
+
+    int cnt = 0;
+
+    switch (block->bbCatchTyp)
+    {
+    case BBCT_NONE:            break;
+    case BBCT_FAULT:           printf("fault ");   cnt += 6; break;
+    case BBCT_FINALLY:         printf("finally "); cnt += 8; break;
+    case BBCT_FILTER:          printf("filter ");  cnt += 7; break;
+    case BBCT_FILTER_HANDLER:  printf("filtHnd "); cnt += 8; break;
+    default:                   printf("catch ");   cnt += 6; break;
+    }
+
+    if (block->bbCatchTyp != BBCT_NONE)
+    {
+        cnt += 2;
+        printf("{ ");
+        /* brace matching editor workaround to compensate for the preceding line: } */
+    }
+
+    if (flags & BBF_TRY_BEG)
+    {
+        // Output a brace for every try region that this block opens
+
+        EHblkDsc* HBtab;
+        EHblkDsc* HBtabEnd;
+
+        for (HBtab = compHndBBtab, HBtabEnd = compHndBBtab + compHndBBtabCount;
+             HBtab < HBtabEnd;
+             HBtab++)
+        {
+            if (HBtab->ebdTryBeg == block)
+            {
+                cnt += 6;
+                printf("try { ");
+                /* brace matching editor workaround to compensate for the preceding line: } */
+            }
+        }
+    }
+
+    EHblkDsc* HBtab;
+    EHblkDsc* HBtabEnd;
+
+    for (HBtab = compHndBBtab, HBtabEnd = compHndBBtab + compHndBBtabCount;
+         HBtab < HBtabEnd;
+         HBtab++)
+    {
+        if (HBtab->ebdTryLast == block)
+        {
+            cnt += 2;
+            /* brace matching editor workaround to compensate for the following line: { */
+            printf("} ");
+        }
+        if (HBtab->ebdHndLast == block)
+        {
+            cnt += 2;
+            /* brace matching editor workaround to compensate for the following line: { */
+            printf("} ");
+        }
+        if (HBtab->HasFilter() && block->bbNext == HBtab->ebdHndBeg)
+        {
+            cnt += 2;
+            /* brace matching editor workaround to compensate for the following line: { */
+            printf("} ");
+        }
+    }
+
+    while (cnt < 12)
+    {
+        cnt++;
+        printf(" ");
+    }
+
+    //
+    // Display block flags
+    //
+
+    block->dspFlags();
+
+    printf("\n");
+}
+
+/****************************************************************************
+    Dump blocks from firstBlock to lastBlock.
+*/
+
+void                Compiler::fgDispBasicBlocks(BasicBlock*  firstBlock,
+                                                BasicBlock*  lastBlock,
+                                                bool         dumpTrees)
+{
+    BasicBlock* block;
+
+    int padWidth = 0;
+#ifdef _TARGET_AMD64_
+    padWidth = 8;
+#endif // _TARGET_AMD64_
+
+    // If any block has IBC data, we add an "IBC weight" column just before the 'IL range' column. This column is as
+    // wide as necessary to accommodate all the various IBC weights. It's at least 4 characters wide, to accommodate
+    // the "IBC" title and leading space.
+    int ibcColWidth = 0;
+    for (block = firstBlock; block != nullptr; block = block->bbNext)
+    {
+        if (block->bbFlags & BBF_PROF_WEIGHT)
+        {
+            int thisIbcWidth = CountDigits(block->bbWeight);
+            ibcColWidth = max(ibcColWidth, thisIbcWidth);
+        }
+
+        if (block == lastBlock) {
+            break;
+    }
+    }
+    if (ibcColWidth > 0)
+    {
+        ibcColWidth = max(ibcColWidth, 3) + 1; // + 1 for the leading space
+    }
+
+    unsigned bbNumMax = compIsForInlining() ? impInlineInfo->InlinerCompiler->fgBBNumMax : fgBBNumMax;
+    int maxBlockNumWidth = CountDigits(bbNumMax);
+    maxBlockNumWidth = max(maxBlockNumWidth, 2);
+
+    padWidth += maxBlockNumWidth - 2; // Account for functions with a large number of blocks.
+
+    printf("\n");
+    printf("------%*s------------------------------------%*s-----------------------%*s----------------------------------------\n",
+        padWidth, "------------",
+        ibcColWidth, "------------",
+        maxBlockNumWidth, "----");
+    printf("BBnum %*sdescAddr ref try hnd %s     weight  %*s%s [IL range]      [jump]%*s    [EH region]         [flags]\n",
+        padWidth, "",
+        fgCheapPredsValid       ? "cheap preds" :
+        (fgComputePredsDone     ? "preds      "
+                                : "           "),
+        ((ibcColWidth > 0) ? ibcColWidth - 3 : 0), "",  // Subtract 3 for the width of "IBC", printed next.
+        ((ibcColWidth > 0)      ? "IBC"
+                                : ""),
+        maxBlockNumWidth, ""
+        );
+    printf("------%*s------------------------------------%*s-----------------------%*s----------------------------------------\n",
+        padWidth, "------------",
+        ibcColWidth, "------------",
+        maxBlockNumWidth, "----");
+
+    for (block = firstBlock;
+         block;
+         block = block->bbNext)
+    {
+        // First, do some checking on the bbPrev links
+        if (block->bbPrev)
+        {
+            if (block->bbPrev->bbNext != block)
+            {
+                printf("bad prev link\n");
+            }
+        }
+        else if (block != fgFirstBB)
+        {
+            printf("bad prev link!\n");
+        }
+            
+        if (block == fgFirstColdBlock)
+        {
+            printf("~~~~~~%*s~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~%*s~~~~~~~~~~~~~~~~~~~~~~~%*s~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n",
+                padWidth, "~~~~~~~~~~~~",
+                ibcColWidth, "~~~~~~~~~~~~",
+                maxBlockNumWidth, "~~~~");
+        }
+
+#if FEATURE_EH_FUNCLETS
+        if (block == fgFirstFuncletBB)
+        {
+            printf("++++++%*s++++++++++++++++++++++++++++++++++++%*s+++++++++++++++++++++++%*s++++++++++++++++++++++++++++++++++++++++ funclets follow\n",
+                padWidth, "++++++++++++",
+                ibcColWidth, "++++++++++++",
+                maxBlockNumWidth, "++++");
+        }
+#endif // FEATURE_EH_FUNCLETS
+
+        fgTableDispBasicBlock(block, ibcColWidth);
+
+        if (block == lastBlock) {
+            break;
+    }
+    }
+
+    printf("------%*s------------------------------------%*s-----------------------%*s----------------------------------------\n",
+        padWidth, "------------",
+        ibcColWidth, "------------",
+        maxBlockNumWidth, "----");
+
+    if (dumpTrees)
+    {
+        fgDumpTrees(firstBlock, lastBlock);
+    }
+}
+
+/*****************************************************************************/
+
+void                Compiler::fgDispBasicBlocks(bool dumpTrees)
+{
+    fgDispBasicBlocks(fgFirstBB, nullptr, dumpTrees);
+}
+
+/*****************************************************************************/
+//  Increment the stmtNum and dump the tree using gtDispTree
+//
+void                Compiler::fgDumpStmtTree(GenTreePtr stmt, unsigned blkNum)
+{
+    compCurStmtNum++;  // Increment the current stmtNum
+
+    printf("\n***** BB%02u, stmt %d\n", blkNum, compCurStmtNum);
+
+    if (fgOrder == FGOrderLinear || opts.compDbgInfo)
+    {
+        gtDispTree(stmt);
+    }
+    else
+    {
+        gtDispTree(stmt->gtStmt.gtStmtExpr);
+    }
+}
+
+//------------------------------------------------------------------------
+// Compiler::fgDumpBlock: dumps the contents of the given block to stdout.
+//
+// Arguments:
+//    block - The block to dump.
+//
+void                Compiler::fgDumpBlock(BasicBlock* block)
+{
+    printf("\n------------ ");
+    block->dspBlockHeader(this);
+
+    if (!block->IsLIR())
+    {
+        for (GenTreeStmt* stmt = block->firstStmt(); stmt != nullptr; stmt = stmt->gtNextStmt)
+        {
+            fgDumpStmtTree(stmt, block->bbNum);
+            if (stmt == block->bbTreeList)
+            {
+                block->bbStmtNum = compCurStmtNum;  // Set the block->bbStmtNum
+            }
+        }
+    }
+    else
+    {
+        gtDispRange(LIR::AsRange(block));
+    }
+}
+
+/*****************************************************************************/
+//  Walk the BasicBlock list calling fgDumpTree once per Stmt
+//
+void                Compiler::fgDumpTrees(BasicBlock*  firstBlock,
+                                          BasicBlock*  lastBlock)
+{
+    compCurStmtNum = 0;  // Reset the current stmtNum
+
+    /* Walk the basic blocks */
+
+    // Note that typically we have already called fgDispBasicBlocks() 
+    //  so we don't need to print the preds and succs again here
+    //
+    for (BasicBlock* block = firstBlock; block; block = block->bbNext)
+    {
+        fgDumpBlock(block);
+
+        if (block == lastBlock) {
+            break;
+    }
+    }
+    printf("\n-------------------------------------------------------------------------------------------------------------------\n");
+}
+
+
+/*****************************************************************************
+ * Try to create as many candidates for GTF_MUL_64RSLT as possible.
+ * We convert 'intOp1*intOp2' into 'int(long(nop(intOp1))*long(intOp2))'.
+ */
+
+/* static */
+Compiler::fgWalkResult      Compiler::fgStress64RsltMulCB(GenTreePtr* pTree, fgWalkData* data)
+{
+    GenTreePtr tree = *pTree;
+    Compiler*  pComp = data->compiler;
+    
+    if (tree->gtOper != GT_MUL || tree->gtType != TYP_INT || (tree->gtOverflow())) {
+        return WALK_CONTINUE;
+}
+
+    // To ensure optNarrowTree() doesn't fold back to the original tree.
+    tree->gtOp.gtOp1 = pComp->gtNewOperNode(GT_NOP, TYP_LONG, tree->gtOp.gtOp1); 
+    tree->gtOp.gtOp1 = pComp->gtNewCastNode(TYP_LONG, tree->gtOp.gtOp1, TYP_LONG);
+    tree->gtOp.gtOp2 = pComp->gtNewCastNode(TYP_LONG, tree->gtOp.gtOp2,  TYP_LONG);
+    tree->gtType = TYP_LONG;
+    *pTree = pComp->gtNewCastNode(TYP_INT, tree, TYP_INT);
+
+    return WALK_SKIP_SUBTREES;
+}
+
+void                Compiler::fgStress64RsltMul()
+{
+    if (!compStressCompile(STRESS_64RSLT_MUL, 20)) {
+        return;
+}
+
+    fgWalkAllTreesPre(fgStress64RsltMulCB, (void*)this);
+}
+
+
+// This variable is used to generate "traversal labels": one-time constants with which
+// we label basic blocks that are members of the basic block list, in order to have a
+// fast, high-probability test for membership in that list.  Type is "volatile" because
+// it's incremented with an atomic operation, which wants a volatile type; "long" so that
+// wrap-around to 0 (which I think has the highest probability of accidental collision) is
+// postponed a *long* time.
+static volatile int bbTraverseLabel = 1;
+
+/*****************************************************************************
+ *
+ * A DEBUG routine to check the consistency of the flowgraph,
+ * i.e. bbNum, bbRefs, bbPreds have to be up to date.
+ *
+ *****************************************************************************/
+
+void                Compiler::fgDebugCheckBBlist(bool checkBBNum  /* = false */,
+                                                 bool checkBBRefs /* = true  */)
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In fgDebugCheckBBlist\n");
+    }
+#endif // DEBUG
+
+    fgDebugCheckBlockLinks();
+    
+    if (fgBBcount > 10000 && expensiveDebugCheckLevel < 1)
+    {
+        // The basic block checks are too expensive if there are too many blocks,
+        // so give up unless we've been told to try hard.
+        return;
+    }
+
+    DWORD startTickCount = GetTickCount();
+
+    BasicBlock* block;
+    BasicBlock* prevBlock;
+    BasicBlock* blockPred;
+    flowList*   pred;
+    unsigned    blockRefs;
+
+#if FEATURE_EH_FUNCLETS
+    bool            reachedFirstFunclet = false;
+    if (fgFuncletsCreated)
+    {
+        //
+        // Make sure that fgFirstFuncletBB is accurate.
+        // It should be the first basic block in a handler region.
+        //
+        if (fgFirstFuncletBB != nullptr)
+        {
+            assert(fgFirstFuncletBB->hasHndIndex() == true);
+            assert(fgFirstFuncletBB->bbFlags & BBF_FUNCLET_BEG);
+        }
+    }
+#endif // FEATURE_EH_FUNCLETS
+
+    /* Check bbNum, bbRefs and bbPreds */
+    // First, pick a traversal stamp, and label all the blocks with it.
+    unsigned curTraversalStamp = unsigned(InterlockedIncrement((LONG*)&bbTraverseLabel));
+    for (block = fgFirstBB; block; block = block->bbNext)
+    {
+        block->bbTraversalStamp = curTraversalStamp;
+    }
+
+    for (prevBlock = nullptr, block = fgFirstBB;
+                              block;
+         prevBlock = block,   block = block->bbNext)
+    {
+        blockRefs = 0;
+
+        /* First basic block has countOfInEdges() >= 1 */
+
+        if  (block == fgFirstBB)
+        {
+            noway_assert(block->countOfInEdges() >= 1);
+            blockRefs = 1;
+        }
+
+        if (checkBBNum)
+        {
+            // Check that bbNum is sequential
+            noway_assert(block->bbNext == nullptr || (block->bbNum + 1 == block->bbNext->bbNum));
+        }
+
+        // If the block is a BBJ_COND, a BBJ_SWITCH or a
+        // lowered GT_SWITCH_TABLE node then make sure it
+        // ends with a GT_JTRUE or a GT_SWITCH
+
+        if (block->bbJumpKind == BBJ_COND)
+        {
+            noway_assert(block->lastNode()->gtNext == nullptr && block->lastNode()->gtOper == GT_JTRUE);
+        }
+        else if (block->bbJumpKind == BBJ_SWITCH)
+        {
+#ifndef LEGACY_BACKEND
+            noway_assert(block->lastNode()->gtNext == nullptr &&
+                         (block->lastNode()->gtOper == GT_SWITCH ||
+                          block->lastNode()->gtOper == GT_SWITCH_TABLE));
+#else // LEGACY_BACKEND
+            noway_assert(block->lastStmt()->gtNext == NULL &&
+                         block->lastStmt()->gtStmtExpr->gtOper == GT_SWITCH);
+#endif // LEGACY_BACKEND
+        }
+        else if (!(   block->bbJumpKind == BBJ_ALWAYS
+                   || block->bbJumpKind == BBJ_RETURN))
+        {
+            //this block cannot have a poll
+            noway_assert(!(block->bbFlags & BBF_NEEDS_GCPOLL));
+        }
+
+        if (block->bbCatchTyp == BBCT_FILTER)
+        {
+            if (!fgCheapPredsValid) // Don't check cheap preds
+            {
+                // A filter has no predecessors
+                noway_assert(block->bbPreds == nullptr);
+            }
+        }
+
+#if FEATURE_EH_FUNCLETS
+        if (fgFuncletsCreated)
+        {
+            //
+            // There should be no handler blocks until
+            // we get to the fgFirstFuncletBB block,
+            // then every block should be a handler block
+            //
+            if (!reachedFirstFunclet)
+            {
+                if (block == fgFirstFuncletBB)
+                {
+                    assert(block->hasHndIndex() == true);
+                    reachedFirstFunclet = true;
+                }
+                else
+                {
+                    assert(block->hasHndIndex() == false);
+                }
+            }
+            else // reachedFirstFunclet
+            {
+                assert(block->hasHndIndex() == true);
+            }
+        }
+#endif // FEATURE_EH_FUNCLETS
+
+        // Don't check cheap preds.
+        for (pred = (fgCheapPredsValid ? nullptr : block->bbPreds); pred != nullptr; blockRefs += pred->flDupCount, pred = pred->flNext)
+        {
+            assert(fgComputePredsDone); // If this isn't set, why do we have a preds list?
+
+            /*  make sure this pred is part of the BB list */
+
+            blockPred = pred->flBlock;
+            noway_assert(blockPred->bbTraversalStamp == curTraversalStamp);
+
+            EHblkDsc* ehTryDsc = ehGetBlockTryDsc(block);
+            if (ehTryDsc != nullptr)
+            {
+                // You can jump to the start of a try
+                if (ehTryDsc->ebdTryBeg == block) {
+                    goto CHECK_HND;
+}
+
+                // You can jump within the same try region
+                if (bbInTryRegions(block->getTryIndex(), blockPred)) {
+                    goto CHECK_HND;
+}
+
+                // The catch block can jump back into the middle of the try
+                if (bbInCatchHandlerRegions(block, blockPred)) {
+                    goto CHECK_HND;
+}
+
+                // The end of a finally region is a BBJ_EHFINALLYRET block (during importing, BBJ_LEAVE) which
+                // is marked as "returning" to the BBJ_ALWAYS block following the BBJ_CALLFINALLY
+                // block that does a local call to the finally. This BBJ_ALWAYS is within
+                // the try region protected by the finally (for x86, ARM), but that's ok.
+                if (prevBlock->bbJumpKind == BBJ_CALLFINALLY &&
+                        block->bbJumpKind == BBJ_ALWAYS &&
+                    blockPred->bbJumpKind == BBJ_EHFINALLYRET) {
+                    goto CHECK_HND;
+}
+
+                printf("Jump into the middle of try region: BB%02u branches to BB%02u\n", blockPred->bbNum, block->bbNum);
+                noway_assert(!"Jump into middle of try region");
+            }
+
+CHECK_HND:;
+
+            EHblkDsc* ehHndDsc = ehGetBlockHndDsc(block);
+            if (ehHndDsc != nullptr)
+            {
+                // You can do a BBJ_EHFINALLYRET or BBJ_EHFILTERRET into a handler region
+                if (   (blockPred->bbJumpKind == BBJ_EHFINALLYRET)
+                    || (blockPred->bbJumpKind == BBJ_EHFILTERRET)) {
+                    goto CHECK_JUMP;
+}
+
+                // Our try block can call our finally block
+                if ((block->bbCatchTyp == BBCT_FINALLY) &&
+                    (blockPred->bbJumpKind == BBJ_CALLFINALLY) &&
+                    ehCallFinallyInCorrectRegion(blockPred, block->getHndIndex()))
+                {
+                    goto CHECK_JUMP;
+                }
+
+                // You can jump within the same handler region
+                if (bbInHandlerRegions(block->getHndIndex(), blockPred)) {
+                    goto CHECK_JUMP;
+}
+
+                // A filter can jump to the start of the filter handler
+                if (ehHndDsc->HasFilter()) {
+                    goto CHECK_JUMP;
+}
+
+                printf("Jump into the middle of handler region: BB%02u branches to BB%02u\n", blockPred->bbNum, block->bbNum);
+                noway_assert(!"Jump into the middle of handler region");
+            }
+
+CHECK_JUMP:;
+
+            switch (blockPred->bbJumpKind)
+            {
+            case BBJ_COND:
+                noway_assert(blockPred->bbNext == block || blockPred->bbJumpDest == block);
+                break;
+
+            case BBJ_NONE:
+                noway_assert(blockPred->bbNext == block);
+                break;
+
+            case BBJ_CALLFINALLY:
+            case BBJ_ALWAYS:
+            case BBJ_EHCATCHRET:
+            case BBJ_EHFILTERRET:
+                noway_assert(blockPred->bbJumpDest == block);
+                break;
+
+            case BBJ_EHFINALLYRET:
+                {
+                    // If the current block is a successor to a BBJ_EHFINALLYRET (return from finally),
+                    // then the lexically previous block should be a call to the same finally.
+                    // Verify all of that.
+
+                    unsigned hndIndex = blockPred->getHndIndex();
+                    EHblkDsc* ehDsc = ehGetDsc(hndIndex);
+                    BasicBlock* finBeg = ehDsc->ebdHndBeg;
+
+                    // Because there is no bbPrev, we have to search for the lexically previous
+                    // block.  We can shorten the search by only looking in places where it is legal
+                    // to have a call to the finally.
+
+                    BasicBlock* begBlk;
+                    BasicBlock* endBlk;
+                    ehGetCallFinallyBlockRange(hndIndex, &begBlk, &endBlk);
+
+                    for (BasicBlock* bcall = begBlk; bcall != endBlk; bcall = bcall->bbNext)
+                    {
+                        if  (bcall->bbJumpKind != BBJ_CALLFINALLY || bcall->bbJumpDest != finBeg) {
+                            continue;
+}
+
+                        if  (block == bcall->bbNext) {
+                            goto PRED_OK;
+                    }
+                    }
+
+#if FEATURE_EH_FUNCLETS
+
+                    if (fgFuncletsCreated)
+                    {
+                        // There is no easy way to search just the funclets that were pulled out of
+                        // the corresponding try body, so instead we search all the funclets, and if
+                        // we find a potential 'hit' we check if the funclet we're looking at is
+                        // from the correct try region.
+
+                        for (BasicBlock* bcall = fgFirstFuncletBB; bcall; bcall = bcall->bbNext)
+                        {
+                            if  (bcall->bbJumpKind != BBJ_CALLFINALLY || bcall->bbJumpDest != finBeg) {
+                                continue;
+}
+
+                            if  (block != bcall->bbNext) {
+                                continue;
+}
+
+                            if (ehCallFinallyInCorrectRegion(bcall, hndIndex)) {
+                                goto PRED_OK;
+                        }
+                    }
+                    }
+
+#endif // FEATURE_EH_FUNCLETS
+
+                    noway_assert(!"BBJ_EHFINALLYRET predecessor of block that doesn't follow a BBJ_CALLFINALLY!");
+                }
+                break;
+
+            case BBJ_THROW:
+            case BBJ_RETURN:
+                noway_assert(!"THROW and RETURN block cannot be in the predecessor list!");
+                break;
+
+            case BBJ_SWITCH:
+                unsigned        jumpCnt; jumpCnt = blockPred->bbJumpSwt->bbsCount;
+                BasicBlock**    jumpTab; jumpTab = blockPred->bbJumpSwt->bbsDstTab;
+
+                do
+                {
+                    if  (block == *jumpTab)
+                    {
+                        goto PRED_OK;
+                    }
+                }
+                while (++jumpTab, --jumpCnt);
+
+                noway_assert(!"SWITCH in the predecessor list with no jump label to BLOCK!");
+                break;
+
+            default:
+                noway_assert(!"Unexpected bbJumpKind");
+                break;
+            }
+
+PRED_OK:;
+
+        }
+
+        /* Check the bbRefs */
+        noway_assert(!checkBBRefs || block->bbRefs == blockRefs);
+
+        /* Check that BBF_HAS_HANDLER is valid bbTryIndex */
+        if (block->hasTryIndex())
+        {
+            noway_assert(block->getTryIndex() < compHndBBtabCount);
+        }
+
+        /* Check if BBF_RUN_RARELY is set that we have bbWeight of zero */
+        if (block->isRunRarely())
+        {
+            noway_assert(block->bbWeight == BB_ZERO_WEIGHT);
+        }
+        else
+        {
+            noway_assert(block->bbWeight > BB_ZERO_WEIGHT);
+        }
+    }
+
+    // Make sure the one return BB is not changed.
+    if (genReturnBB)
+    {
+        noway_assert(genReturnBB->bbTreeList);
+        noway_assert(genReturnBB->IsLIR() || genReturnBB->bbTreeList->gtOper == GT_STMT);
+        noway_assert(genReturnBB->IsLIR() || genReturnBB->bbTreeList->gtType == TYP_VOID);
+    }
+
+    // The general encoder/decoder (currently) only reports "this" as a generics context as a stack location,
+    // so we mark info.compThisArg as lvAddrTaken to ensure that it is not enregistered. Otherwise, it should
+    // not be address-taken.  This variable determines if the address-taken-ness of "thisArg" is "OK".
+    bool copiedForGenericsCtxt;
+#ifndef JIT32_GCENCODER
+    copiedForGenericsCtxt = ((info.compMethodInfo->options & CORINFO_GENERICS_CTXT_FROM_THIS) != 0);
+#else  // JIT32_GCENCODER
+    copiedForGenericsCtxt = FALSE;
+#endif  // JIT32_GCENCODER
+
+    // This if only in support of the noway_asserts it contains.
+    if (info.compIsStatic)
+    {
+        // For static method, should have never grabbed the temp.
+        noway_assert(lvaArg0Var == BAD_VAR_NUM);
+    }
+    else
+    {
+        // For instance method:
+        assert(info.compThisArg != BAD_VAR_NUM);
+        bool compThisArgAddrExposedOK = !lvaTable[info.compThisArg].lvAddrExposed;
+#ifndef JIT32_GCENCODER
+        compThisArgAddrExposedOK = compThisArgAddrExposedOK || copiedForGenericsCtxt;
+#endif  // !JIT32_GCENCODER
+        noway_assert(compThisArgAddrExposedOK &&                  //     should never expose the address of arg 0 or
+                     !lvaTable[info.compThisArg].lvArgWrite &&    //     write to arg 0.
+                     (                                                  //   In addition,
+                        lvaArg0Var == info.compThisArg ||               //     lvArg0Var should remain 0 if arg0 is not written to or address-exposed.
+                        lvaArg0Var != info.compThisArg &&
+                        (lvaTable[lvaArg0Var].lvAddrExposed || lvaTable[lvaArg0Var].lvArgWrite || copiedForGenericsCtxt)
+                     ));
+    }
+}
+
+/*****************************************************************************
+ *
+ * A DEBUG routine to check the that the exception flags are correctly set.
+ *
+ ****************************************************************************/
+
+void                Compiler::fgDebugCheckFlags(GenTreePtr tree)
+{
+    noway_assert(tree->gtOper != GT_STMT);
+
+    genTreeOps      oper        = tree->OperGet();
+    unsigned        kind        = tree->OperKind();
+    unsigned        treeFlags   = tree->gtFlags & GTF_ALL_EFFECT;
+    unsigned        chkFlags    = 0;
+
+    /* Is this a leaf node? */
+
+    if  (kind & GTK_LEAF)
+    {
+        switch (oper)
+        {
+        case GT_CLS_VAR:
+            chkFlags |= GTF_GLOB_REF;
+            break;
+
+        case GT_CATCH_ARG:
+            chkFlags |= GTF_ORDER_SIDEEFF;
+            break;
+
+        default:
+            break;
+        }
+    }
+
+    /* Is it a 'simple' unary/binary operator? */
+
+    else if  (kind & GTK_SMPOP)
+    {
+        GenTreePtr      op1 = tree->gtOp.gtOp1;
+        GenTreePtr      op2 = tree->gtGetOp2();
+
+        // During GS work, we make shadow copies for params.
+        // In gsParamsToShadows(), we create a shadow var of TYP_INT for every small type param.
+        // Then in gsReplaceShadowParams(), we change the gtLclNum to the shadow var.
+        // We also change the types of the local var tree and the assignment tree to TYP_INT if necessary.
+        // However, since we don't morph the tree at this late stage. Manually propagating
+        // TYP_INT up to the GT_ASG tree is only correct if we don't need to propagate the TYP_INT back up.
+        // The following checks will ensure this.
+
+        // Is the left child of "tree" a GT_ASG?,
+        if (op1 && op1->gtOper == GT_ASG)
+        {
+            assert(tree->gtType == TYP_VOID ||  // If parent is a TYP_VOID, we don't no need to propagate TYP_INT up. We are fine.
+                   tree->gtOper == GT_COMMA);   // (or) If GT_ASG is the left child of a GT_COMMA, the type of the GT_COMMA node will
+        }                                       // be determined by its right child. So we don't need to propagate TYP_INT up either. We are fine.
+
+        // Is the right child of "tree" a GT_ASG?,
+        if (op2 && op2->gtOper == GT_ASG)
+        {
+            assert(tree->gtType == TYP_VOID);   // If parent is a TYP_VOID, we don't no need to propagate TYP_INT up. We are fine.
+        }
+
+        switch (oper)
+        {
+        case GT_QMARK:
+            if (op1->OperIsCompare())
+            {
+                noway_assert(op1->gtFlags & GTF_DONT_CSE);
+            }
+            else
+            {
+                noway_assert( (op1->gtOper == GT_CNS_INT) &&
+                              ((op1->gtIntCon.gtIconVal == 0) || (op1->gtIntCon.gtIconVal == 1)) );
+            }
+            break;
+
+        default:
+            break;
+        }
+
+        /* Recursively check the subtrees */
+
+        if (op1) { fgDebugCheckFlags(op1);
+}
+        if (op2) { fgDebugCheckFlags(op2);
+}
+
+        if (op1) { chkFlags   |= (op1->gtFlags & GTF_ALL_EFFECT);
+}
+        if (op2) { chkFlags   |= (op2->gtFlags & GTF_ALL_EFFECT);
+}
+
+        // We reuse the value of GTF_REVERSE_OPS for a GT_IND-specific flag,
+        // so exempt that (unary) operator.
+        if (tree->OperGet() != GT_IND && tree->gtFlags & GTF_REVERSE_OPS)
+        {
+            /* Must have two operands if GTF_REVERSE is set */
+            noway_assert(op1 && op2);
+
+            /* Make sure that the order of side effects has not been swapped. */
+
+            /* However CSE may introduce an assignment after the reverse flag
+               was set and thus GTF_ASG cannot be considered here. */
+
+            /* For a GT_ASG(GT_IND(x), y) we are interested in the side effects of x */
+            GenTreePtr  op1p;
+            if ((kind & GTK_ASGOP) && (op1->gtOper == GT_IND))
+            {
+                op1p = op1->gtOp.gtOp1;
+            }
+            else
+            {
+                op1p = op1;
+            }
+
+            /* This isn't true any more with the sticky GTF_REVERSE */
+            /*
+            // if op1p has side effects, then op2 cannot have side effects
+            if (op1p->gtFlags & (GTF_SIDE_EFFECT & ~GTF_ASG))
+            {
+                if (op2->gtFlags & (GTF_SIDE_EFFECT & ~GTF_ASG))
+                    gtDispTree(tree);
+                noway_assert(!(op2->gtFlags & (GTF_SIDE_EFFECT & ~GTF_ASG)));
+            }
+            */
+        }
+
+        if (kind & GTK_ASGOP)
+        {
+            chkFlags        |= GTF_ASG;
+        }
+
+        /* Note that it is OK for treeFlags not to have a GTF_EXCEPT,
+           AssertionProp's non-Null may have cleared it */
+        if (tree->OperMayThrow())
+        {
+            chkFlags        |= (treeFlags & GTF_EXCEPT);
+        }
+
+        if (oper == GT_ADDR &&
+            (op1->OperIsLocal() ||
+             op1->gtOper == GT_CLS_VAR ||
+             (op1->gtOper == GT_IND && op1->gtOp.gtOp1->gtOper == GT_CLS_VAR_ADDR)))
+        {
+            /* &aliasedVar doesn't need GTF_GLOB_REF, though alisasedVar does.
+               Similarly for clsVar */
+            treeFlags |= GTF_GLOB_REF;
+        }
+    }
+
+    /* See what kind of a special operator we have here */
+
+    else { switch  (tree->OperGet())
+    {
+    case GT_CALL:
+
+        GenTreePtr      args;
+        GenTreePtr      argx;
+        GenTreeCall*    call;
+        
+        call = tree->AsCall();
+
+        chkFlags |= GTF_CALL;
+
+        if ((treeFlags & GTF_EXCEPT) && !(chkFlags & GTF_EXCEPT))
+        {
+            switch (eeGetHelperNum(tree->gtCall.gtCallMethHnd))
+            {
+                // Is this a helper call that can throw an exception ?
+            case CORINFO_HELP_LDIV:
+            case CORINFO_HELP_LMOD:
+            case CORINFO_HELP_METHOD_ACCESS_CHECK:
+            case CORINFO_HELP_FIELD_ACCESS_CHECK:
+            case CORINFO_HELP_CLASS_ACCESS_CHECK:
+            case CORINFO_HELP_DELEGATE_SECURITY_CHECK:
+                chkFlags |= GTF_EXCEPT;
+                break;
+            default:
+                break;
+            }
+        }
+
+        if (call->gtCallObjp)
+        {
+            fgDebugCheckFlags(call->gtCallObjp);
+            chkFlags |= (call->gtCallObjp->gtFlags & GTF_SIDE_EFFECT);
+
+            if (call->gtCallObjp->gtFlags & GTF_ASG)
+            {
+                treeFlags |= GTF_ASG;
+            }
+        }
+
+        for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2)
+        {
+            argx = args->gtOp.gtOp1;
+            fgDebugCheckFlags(argx);
+
+            chkFlags |= (argx->gtFlags & GTF_SIDE_EFFECT);
+
+            if (argx->gtFlags & GTF_ASG)
+            {
+                treeFlags |= GTF_ASG;
+            }
+        }
+
+        for (args = call->gtCallLateArgs; args; args = args->gtOp.gtOp2)
+        {
+            argx = args->gtOp.gtOp1;
+            fgDebugCheckFlags(argx);
+
+            chkFlags |= (argx->gtFlags & GTF_SIDE_EFFECT);
+
+            if (argx->gtFlags & GTF_ASG)
+            {
+                treeFlags |= GTF_ASG;
+            }
+        }
+
+        if ((call->gtCallType == CT_INDIRECT) && (call->gtCallCookie != nullptr))
+        {
+            fgDebugCheckFlags(call->gtCallCookie);
+            chkFlags |= (call->gtCallCookie->gtFlags & GTF_SIDE_EFFECT);
+        }
+
+        if (call->gtCallType == CT_INDIRECT)
+        {
+            fgDebugCheckFlags(call->gtCallAddr);
+            chkFlags |= (call->gtCallAddr->gtFlags & GTF_SIDE_EFFECT);
+        }
+
+        if (call->IsUnmanaged() &&
+            (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL))
+        {
+            if (call->gtCallArgs->gtOp.gtOp1->OperGet() == GT_NOP)
+            {
+                noway_assert(call->gtCallLateArgs->gtOp.gtOp1->TypeGet() == TYP_I_IMPL ||
+                             call->gtCallLateArgs->gtOp.gtOp1->TypeGet() == TYP_BYREF);
+            }
+            else
+            {
+                noway_assert(call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_I_IMPL ||
+                             call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_BYREF);
+            }
+        }
+        break;
+
+    case GT_ARR_ELEM:
+
+        GenTreePtr      arrObj;
+        unsigned        dim;
+
+        arrObj = tree->gtArrElem.gtArrObj;
+        fgDebugCheckFlags(arrObj);
+        chkFlags   |= (arrObj->gtFlags & GTF_ALL_EFFECT);
+
+        for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+        {
+            fgDebugCheckFlags(tree->gtArrElem.gtArrInds[dim]);
+            chkFlags |= tree->gtArrElem.gtArrInds[dim]->gtFlags & GTF_ALL_EFFECT;
+        }
+        break;
+
+    case GT_ARR_OFFSET:
+        fgDebugCheckFlags(tree->gtArrOffs.gtOffset);
+        chkFlags   |= (tree->gtArrOffs.gtOffset->gtFlags & GTF_ALL_EFFECT);
+        fgDebugCheckFlags(tree->gtArrOffs.gtIndex);
+        chkFlags   |= (tree->gtArrOffs.gtIndex->gtFlags & GTF_ALL_EFFECT);
+        fgDebugCheckFlags(tree->gtArrOffs.gtArrObj);
+        chkFlags   |= (tree->gtArrOffs.gtArrObj->gtFlags & GTF_ALL_EFFECT);
+        break;
+
+    default:
+        break;
+    }
+}
+
+    if (chkFlags & ~treeFlags)
+    {
+        // Print the tree so we can see it in the log.
+        printf("Missing flags on tree [%06d]: ", dspTreeID(tree));
+        GenTree::gtDispFlags(chkFlags & ~treeFlags, GTF_DEBUG_NONE);
+        printf("\n");
+        gtDispTree(tree);
+
+        noway_assert(!"Missing flags on tree");
+
+        // Print the tree again so we can see it right after we hook up the debugger.
+        printf("Missing flags on tree [%06d]: ", dspTreeID(tree));
+        GenTree::gtDispFlags(chkFlags & ~treeFlags, GTF_DEBUG_NONE);
+        printf("\n");
+        gtDispTree(tree);
+    }
+    else if (treeFlags & ~chkFlags)
+    {
+#if 0
+        // TODO-Cleanup:
+        /* The tree has extra flags set. However, this will happen if we
+           replace a subtree with something, but don't clear the flags up
+           the tree. Can't flag this unless we start clearing flags above.
+
+           Note: we need this working for GTF_CALL and CSEs, so I'm enabling
+           it for calls.
+           */
+        if (tree->OperGet() != GT_CALL && (treeFlags & GTF_CALL) && !(chkFlags & GTF_CALL))
+        {
+            // Print the tree so we can see it in the log.
+            printf("Extra GTF_CALL flags on parent tree [%X]: ", tree);
+            GenTree::gtDispFlags(treeFlags & ~chkFlags, GTF_DEBUG_NONE);
+            printf("\n");
+            gtDispTree(tree);
+
+            noway_assert(!"Extra flags on tree");
+
+            // Print the tree again so we can see it right after we hook up the debugger.
+            printf("Extra GTF_CALL flags on parent tree [%X]: ", tree);
+            GenTree::gtDispFlags(treeFlags & ~chkFlags, GTF_DEBUG_NONE);
+            printf("\n");
+            gtDispTree(tree);
+        }
+#endif // 0
+    }
+}
+
+// DEBUG routine to check correctness of the internal gtNext, gtPrev threading of a statement.
+// This threading is only valid when fgStmtListThreaded is true.
+// This calls an alternate method for FGOrderLinear.
+void Compiler::fgDebugCheckNodeLinks(BasicBlock* block, GenTree* node)
+{
+    // LIR blocks are checked using BasicBlock::CheckLIR().
+    if (block->IsLIR())
+    {
+        LIR::AsRange(block).CheckLIR(this);
+        // TODO: return?
+    }
+
+    GenTreeStmt* stmt = node->AsStmt();
+
+    assert(fgStmtListThreaded);
+
+    noway_assert(stmt->gtStmtList);
+
+    // The first node's gtPrev must be nullptr (the gtPrev list is not circular).
+    // The last node's gtNext must be nullptr (the gtNext list is not circular). This is tested if the loop below terminates.
+    assert(stmt->gtStmtList->gtPrev == nullptr);
+
+    for (GenTreePtr tree = stmt->gtStmtList;
+         tree != nullptr;
+         tree = tree->gtNext)
+    {
+        if  (tree->gtPrev)
+        {
+            noway_assert(tree->gtPrev->gtNext == tree);
+        }
+        else
+        {
+            noway_assert(tree == stmt->gtStmtList);
+        }
+
+        if  (tree->gtNext)
+        {
+            noway_assert(tree->gtNext->gtPrev == tree);
+        }
+        else
+        {
+            noway_assert(tree == stmt->gtStmtExpr);
+        }
+
+        /* Cross-check gtPrev,gtNext with gtOp for simple trees */
+
+        GenTreePtr expectedPrevTree = nullptr;
+
+        if (tree->OperIsLeaf())
+        {
+            if (tree->gtOper == GT_CATCH_ARG)
+            {
+                // The GT_CATCH_ARG should always have GTF_ORDER_SIDEEFF set
+                noway_assert(tree->gtFlags & GTF_ORDER_SIDEEFF);
+                // The GT_CATCH_ARG has to be the first thing evaluated
+                noway_assert(stmt == block->FirstNonPhiDef());
+                noway_assert(stmt->gtStmtList->gtOper == GT_CATCH_ARG);
+                // The root of the tree should have GTF_ORDER_SIDEEFF set
+                noway_assert(stmt->gtStmtExpr->gtFlags & GTF_ORDER_SIDEEFF);
+            }
+        }
+
+        if (tree->OperIsUnary() && tree->gtOp.gtOp1)
+        {
+            GenTreePtr lclVarTree;
+            expectedPrevTree = tree->gtOp.gtOp1;
+        }
+        else if (tree->OperIsBinary() && tree->gtOp.gtOp1)
+        {
+            switch (tree->gtOper)
+            {
+            case GT_QMARK:
+                expectedPrevTree = tree->gtOp.gtOp2->AsColon()->ThenNode(); // "then" operand of the GT_COLON (generated second).
+                break;
+
+            case GT_COLON:
+                expectedPrevTree = tree->AsColon()->ElseNode(); // "else" branch result (generated first).
+                break;
+
+            default:
+                if (tree->gtOp.gtOp2)
+                {
+                    if (tree->gtFlags & GTF_REVERSE_OPS)
+                    {
+                        expectedPrevTree = tree->gtOp.gtOp1;
+                    }
+                    else
+                    {
+                        expectedPrevTree = tree->gtOp.gtOp2;
+                    }
+                }
+                else
+                {
+                    expectedPrevTree = tree->gtOp.gtOp1;
+                }
+                break;
+            }
+        }
+
+        noway_assert(expectedPrevTree == nullptr ||                // No expectations about the prev node
+                     tree->gtPrev == expectedPrevTree);         // The "normal" case
+    }
+}
+
+
+/*****************************************************************************
+ *
+ * A DEBUG routine to check the correctness of the links between GT_STMT nodes
+ * and ordinary nodes within a statement.
+ *
+ ****************************************************************************/
+
+void                Compiler::fgDebugCheckLinks(bool morphTrees)
+{
+    // This used to be only on for stress, and there was a comment stating that
+    // it was "quite an expensive operation" but I did not find that to be true.
+    // Set DO_SANITY_DEBUG_CHECKS to false to revert to that behavior.
+    const bool DO_SANITY_DEBUG_CHECKS = true;
+
+    if (!DO_SANITY_DEBUG_CHECKS &&
+        !compStressCompile(STRESS_CHK_FLOW_UPDATE, 30))
+    {
+        return;
+    }
+
+    fgDebugCheckBlockLinks();
+
+    /* For each basic block check the bbTreeList links */
+    for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+    {
+PROCESS_BLOCK_AGAIN:;
+        if (block->IsLIR())
+        {
+            LIR::AsRange(block).CheckLIR(this);
+        }
+        else
+        {
+            for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
+            {
+                /* Verify that bbTreeList is threaded correctly */
+                /* Note that for the GT_STMT list, the gtPrev list is circular. The gtNext list is not: gtNext of the last GT_STMT in a block is nullptr. */
+
+                noway_assert(stmt->gtPrev);
+
+                if  (stmt == block->bbTreeList)
+                {
+                    noway_assert(stmt->gtPrev->gtNext == nullptr);
+                }
+                else
+                {
+                    noway_assert(stmt->gtPrev->gtNext == stmt);
+                }
+
+                if  (stmt->gtNext)
+                {
+                    noway_assert(stmt->gtNext->gtPrev == stmt);
+                }
+                else
+                {
+                    noway_assert(block->lastStmt() == stmt);
+                }
+
+                /* For each statement check that the exception flags are properly set */
+
+                noway_assert(stmt->gtStmtExpr);
+
+                if (verbose && 0)
+                {
+                    gtDispTree(stmt->gtStmtExpr);
+                }
+
+                fgDebugCheckFlags(stmt->gtStmtExpr);
+
+                // Not only will this stress fgMorphBlockStmt(), but we also get all the checks
+                // done by fgMorphTree()
+
+                if (morphTrees)
+                {
+                    // If 'stmt' is removed from the block, restart
+                    if (fgMorphBlockStmt(block, stmt DEBUGARG("test morphing")))
+                    {
+                        goto PROCESS_BLOCK_AGAIN;
+                    }
+                }
+
+                /* For each GT_STMT node check that the nodes are threaded correcly - gtStmtList */
+
+                if (fgStmtListThreaded)
+                {
+                    fgDebugCheckNodeLinks(block, stmt);
+                }
+            }
+        }
+    }
+}
+
+// ensure that bbNext and bbPrev are consistent
+void Compiler::fgDebugCheckBlockLinks()
+{
+    assert(fgFirstBB->bbPrev == nullptr);
+
+    for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+    {
+        if (block->bbNext)
+        {
+            assert(block->bbNext->bbPrev == block);
+        }
+        else
+        {
+            assert(block == fgLastBB);
+        }
+
+        if (block->bbPrev)
+        {
+            assert(block->bbPrev->bbNext == block);
+        }
+        else
+        {
+            assert(block == fgFirstBB);
+        }
+
+        // If this is a switch, check that the tables are consistent.
+        // Note that we don't call GetSwitchDescMap(), because it has the side-effect
+        // of allocating it if it is not present.
+        if (block->bbJumpKind == BBJ_SWITCH && m_switchDescMap != nullptr)
+        {
+            SwitchUniqueSuccSet uniqueSuccSet;
+            if (m_switchDescMap->Lookup(block, &uniqueSuccSet))
+            {
+                // Create a set with all the successors. Don't use BlockSet, so we don't need to worry
+                // about the BlockSet epoch.
+                BitVecTraits bitVecTraits(fgBBNumMax + 1, this);
+                BitVec BITVEC_INIT_NOCOPY(succBlocks, BitVecOps::MakeEmpty(&bitVecTraits));
+                BasicBlock** jumpTable = block->bbJumpSwt->bbsDstTab;
+                unsigned jumpCount = block->bbJumpSwt->bbsCount;
+                for (unsigned i = 0; i < jumpCount; i++)
+                {
+                    BitVecOps::AddElemD(&bitVecTraits, succBlocks, jumpTable[i]->bbNum);
+                }
+                // Now we should have a set of unique successors that matches what's in the switchMap.
+                // First, check the number of entries, then make sure all the blocks in uniqueSuccSet
+                // are in the BlockSet.
+                unsigned count = BitVecOps::Count(&bitVecTraits, succBlocks);
+                assert(uniqueSuccSet.numDistinctSuccs == count);
+                for (unsigned i = 0; i < uniqueSuccSet.numDistinctSuccs; i++)
+                {
+                    assert(BitVecOps::IsMember(&bitVecTraits, succBlocks, uniqueSuccSet.nonDuplicates[i]->bbNum));
+                }
+            }
+        }
+    }
+}
+
+/*****************************************************************************/
+#endif // DEBUG
+/*****************************************************************************/
+
+//------------------------------------------------------------------------
+// fgCheckForInlineDepthAndRecursion: compute depth of the candidate, and
+// check for recursion.
+//
+// Return Value:
+//    The depth of the inline candidate. The root method is a depth 0, top-level
+//    candidates at depth 1, etc.
+//
+// Notes:
+//    We generally disallow recursive inlines by policy. However, they are
+//    supported by the underlying machinery.
+//
+//    Likewise the depth limit is a policy consideration, and serves mostly
+//    as a safeguard to prevent runaway inlining of small methods.
+
+unsigned     Compiler::fgCheckInlineDepthAndRecursion(InlineInfo* inlineInfo)
+{
+    BYTE*          candidateCode = inlineInfo->inlineCandidateInfo->methInfo.ILCode;
+    InlineContext* inlineContext = inlineInfo->iciStmt->gtStmt.gtInlineContext;
+    InlineResult*  inlineResult  = inlineInfo->inlineResult;
+
+    // There should be a context for all candidates.
+    assert(inlineContext != nullptr);
+    int depth = 0;
+
+    for (; inlineContext != nullptr; inlineContext = inlineContext->GetParent())
+    {
+
+        depth++;
+
+        if (inlineContext->GetCode() == candidateCode)
+        {
+            // This inline candidate has the same IL code buffer as an already
+            // inlined method does.
+            inlineResult->NoteFatal(InlineObservation::CALLSITE_IS_RECURSIVE);
+            break;
+        }
+
+        if (depth > InlineStrategy::IMPLEMENTATION_MAX_INLINE_DEPTH)
+        {
+            break;
+        }
+    }
+
+    inlineResult->NoteInt(InlineObservation::CALLSITE_DEPTH, depth);
+    return depth;
+}
+
+/*****************************************************************************
+ *
+ *  Inlining phase
+ */
+
+
+void                Compiler::fgInline()
+{
+    if (!opts.OptEnabled(CLFLG_INLINING)) {
+        return;
+}
+
+#ifdef DEBUG
+    if  (verbose) {
+        printf("*************** In fgInline()\n");
+}
+#endif // DEBUG
+
+    BasicBlock* block = fgFirstBB;
+    noway_assert(block != nullptr);
+
+    // Set the root inline context on all statements
+    InlineContext* rootContext = m_inlineStrategy->GetRootContext();
+
+    for (; block != nullptr; block = block->bbNext)
+    {
+        for (GenTreeStmt* stmt = block->firstStmt();
+             stmt;
+             stmt = stmt->gtNextStmt)
+        {
+            stmt->gtInlineContext = rootContext;
+        }
+    }
+
+    // Reset block back to start for inlining
+    block = fgFirstBB;
+
+    do
+    {
+        /* Make the current basic block address available globally */
+
+        compCurBB = block;
+
+        GenTreeStmt* stmt;
+        GenTreePtr   expr;
+
+        for (stmt = block->firstStmt();
+             stmt != nullptr;
+             stmt = stmt->gtNextStmt)
+        {
+            expr = stmt->gtStmtExpr;
+
+            // See if we can expand the inline candidate
+            if ((expr->gtOper == GT_CALL) && ((expr->gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0))
+            {
+                GenTreeCall* call = expr->AsCall();
+                InlineResult inlineResult(this, call, stmt, "fgInline");
+
+                fgMorphStmt = stmt;
+
+                fgMorphCallInline(call, &inlineResult);
+
+                if (stmt->gtStmtExpr->IsNothingNode())
+                {
+                    fgRemoveStmt(block, stmt);
+                    continue;
+                }
+            }
+            else
+            {
+#ifdef DEBUG
+                // Look for non-candidates.
+                fgWalkTreePre(&stmt->gtStmtExpr, fgFindNonInlineCandidate, stmt);
+#endif
+            }
+
+            // See if we need to replace the return value place holder.
+            fgWalkTreePre(&stmt->gtStmtExpr,
+                          fgUpdateInlineReturnExpressionPlaceHolder,
+                          (void *) this);
+
+            // See if stmt is of the form GT_COMMA(call, nop)
+            // If yes, we can get rid of GT_COMMA.            
+            if (expr->OperGet() == GT_COMMA &&
+                expr->gtOp.gtOp1->OperGet() == GT_CALL &&
+                expr->gtOp.gtOp2->OperGet() == GT_NOP)
+            {
+                stmt->gtStmtExpr = expr->gtOp.gtOp1;
+            }
+        }
+
+        block = block->bbNext;
+
+    } while (block);
+
+#ifdef DEBUG
+
+    // Check that we should not have any inline candidate or return value place holder left.
+
+    block = fgFirstBB;
+    noway_assert(block);
+
+    do
+    {
+        GenTreeStmt* stmt;
+
+        for (stmt = block->firstStmt();
+             stmt;
+             stmt = stmt->gtNextStmt)
+        {
+            // Call Compiler::fgDebugCheckInlineCandidates on each node
+            fgWalkTreePre(&stmt->gtStmtExpr, fgDebugCheckInlineCandidates);
+        }
+
+        block = block->bbNext;
+
+    } while (block);
+
+    fgVerifyHandlerTab();
+
+    if  (verbose)
+    {
+        printf("*************** After fgInline()\n");
+        fgDispBasicBlocks(true);
+        fgDispHandlerTab();
+    }
+
+    if  (verbose || fgPrintInlinedMethods)
+    {
+       printf("**************** Inline Tree\n");
+       m_inlineStrategy->Dump();
+    }
+
+#endif // DEBUG
+}
+
+#ifdef DEBUG
+
+//------------------------------------------------------------------------
+// fgFindNonInlineCandidate: tree walk helper to ensure that a tree node
+// that is not an inline candidate is noted as a failed inline.
+//
+// Arguments:
+//    pTree - pointer to pointer tree node being walked
+//    data  - contextual data for the walk
+//
+// Return Value:
+//    walk result
+//
+// Note:
+//    Invokes fgNoteNonInlineCandidate on the nodes it finds.
+
+Compiler::fgWalkResult      Compiler::fgFindNonInlineCandidate(GenTreePtr* pTree,
+                                                               fgWalkData* data)
+{
+    GenTreePtr tree = *pTree;
+    if (tree->gtOper == GT_CALL)
+    {
+        Compiler*    compiler = data->compiler;
+        GenTreePtr   stmt     = (GenTreePtr) data->pCallbackData;
+        GenTreeCall* call     = tree->AsCall();
+
+        compiler->fgNoteNonInlineCandidate(stmt, call);
+    }
+    return WALK_CONTINUE;
+}
+
+//------------------------------------------------------------------------
+// fgNoteNonInlineCandidate: account for inlining failures in calls
+// not marked as inline candidates.
+//
+// Arguments:
+//    tree  - statement containing the call
+//    call  - the call itself
+//
+// Notes:
+//    Used in debug only to try and place descriptions of inline failures
+//    into the proper context in the inline tree.
+
+void Compiler::fgNoteNonInlineCandidate(GenTreePtr   tree,
+                                        GenTreeCall* call)
+{
+    InlineResult inlineResult(this, call, nullptr, "fgNotInlineCandidate");
+    InlineObservation currentObservation = InlineObservation::CALLSITE_NOT_CANDIDATE;
+
+    // Try and recover the reason left behind when the jit decided
+    // this call was not a candidate.
+    InlineObservation priorObservation = call->gtInlineObservation;
+
+    if (InlIsValidObservation(priorObservation))
+    {
+        currentObservation = priorObservation;
+    }
+
+    // Would like to just call noteFatal here, since this
+    // observation blocked candidacy, but policy comes into play
+    // here too.  Also note there's no need to re-report these
+    // failures, since we reported them during the initial
+    // candidate scan.
+    InlineImpact impact = InlGetImpact(currentObservation);
+
+    if (impact == InlineImpact::FATAL)
+    {
+        inlineResult.NoteFatal(currentObservation);
+    }
+    else
+    {
+        inlineResult.Note(currentObservation);
+    }
+
+    inlineResult.SetReported();
+
+    if (call->gtCallType == CT_USER_FUNC)
+    {
+        // Create InlineContext for the failure
+        m_inlineStrategy->NewFailure(tree, &inlineResult);
+    }
+}
+
+#endif
+
+#if FEATURE_MULTIREG_RET
+
+/*********************************************************************************
+ *
+ * tree - The node which needs to be converted to a struct pointer.
+ *
+ *  Return the pointer by either __replacing__ the tree node with a suitable pointer
+ *  type or __without replacing__ and just returning a subtree or by __modifying__
+ *  a subtree.
+ */
+GenTreePtr Compiler::fgGetStructAsStructPtr(GenTreePtr tree)
+{
+    noway_assert((tree->gtOper == GT_LCL_VAR) ||
+                 (tree->gtOper == GT_FIELD)   ||
+                 (tree->gtOper == GT_IND)     ||
+                 (tree->gtOper == GT_BLK)     ||
+                 (tree->gtOper == GT_OBJ)     ||
+                 tree->OperIsSIMD()           ||
+                 // tree->gtOper == GT_CALL     || cannot get address of call.
+                 // tree->gtOper == GT_MKREFANY || inlining should've been aborted due to mkrefany opcode.
+                 // tree->gtOper == GT_RET_EXPR || cannot happen after fgUpdateInlineReturnExpressionPlaceHolder
+                 (tree->gtOper == GT_COMMA));
+
+    switch (tree->OperGet())
+    {
+    case GT_BLK:
+    case GT_OBJ:
+    case GT_IND:
+        return tree->gtOp.gtOp1;
+
+    case GT_COMMA:
+        tree->gtOp.gtOp2 = fgGetStructAsStructPtr(tree->gtOp.gtOp2);
+        tree->gtType = TYP_BYREF;
+        return tree;
+
+    default:
+        return gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
+    }
+}
+
+/***************************************************************************************************
+ * child     - The inlinee of the retExpr node.
+ * retClsHnd - The struct class handle of the type of the inlinee.
+ *
+ * Assign the inlinee to a tmp, if it is a call, just assign it to a lclVar, else we can
+ * use a copyblock to do the assignment.
+ */
+GenTreePtr Compiler::fgAssignStructInlineeToVar(GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd)
+{
+    assert(child->gtOper != GT_RET_EXPR && child->gtOper != GT_MKREFANY);
+
+    unsigned tmpNum = lvaGrabTemp(false DEBUGARG("RetBuf for struct inline return candidates."));
+    lvaSetStruct(tmpNum, retClsHnd, false);
+    var_types structType = lvaTable[tmpNum].lvType;
+
+    GenTreePtr dst = gtNewLclvNode(tmpNum, structType);
+
+    // If we have a call, we'd like it to be: V00 = call(), but first check if
+    // we have a ", , , call()" -- this is very defensive as we may never get
+    // an inlinee that is made of commas. If the inlinee is not a call, then
+    // we use a copy block to do the assignment.
+    GenTreePtr src = child;
+    GenTreePtr lastComma = NULL;
+    while (src->gtOper == GT_COMMA)
+    {
+        lastComma = src;
+        src = src->gtOp.gtOp2;
+    }
+
+    GenTreePtr newInlinee = NULL;
+    if (src->gtOper == GT_CALL)
+    {
+        // If inlinee was just a call, new inlinee is v05 = call()
+        newInlinee = gtNewAssignNode(dst, src);
+
+        // When returning a multi-register value in a local var, make sure the variable is
+        // marked as lvIsMultiRegRet, so it does not get promoted.
+        if (src->AsCall()->HasMultiRegRetVal())
+        {
+            lvaTable[tmpNum].lvIsMultiRegRet = true;
+        }
+
+        // If inlinee was comma, but a deeper call, new inlinee is (, , , v05 = call())
+        if (child->gtOper == GT_COMMA)
+        {
+            lastComma->gtOp.gtOp2 = newInlinee;
+            newInlinee = child;
+        }
+    }
+    else
+    {
+        // Inlinee is not a call, so just create a copy block to the tmp.
+        src = child;
+        GenTreePtr dstAddr = fgGetStructAsStructPtr(dst);
+        GenTreePtr srcAddr = fgGetStructAsStructPtr(src);
+        newInlinee = gtNewCpObjNode(dstAddr, srcAddr, retClsHnd, false);
+    }
+
+    GenTreePtr production = gtNewLclvNode(tmpNum, structType);
+    return gtNewOperNode(GT_COMMA, structType, newInlinee, production);
+}
+
+/***************************************************************************************************
+ * tree      - The tree pointer that has one of its child nodes as retExpr.
+ * child     - The inlinee child.
+ * retClsHnd - The struct class handle of the type of the inlinee.
+ *
+ * V04 = call() assignments are okay as we codegen it. Everything else needs to be a copy block or
+ * would need a temp. For example, a cast(ldobj) will then be, cast(v05 = ldobj, v05); But it is
+ * a very rare (or impossible) scenario that we'd have a retExpr transform into a ldobj other than
+ * a lclVar/call. So it is not worthwhile to do pattern matching optimizations like addr(ldobj(op1))
+ * can just be op1.
+ */
+void Compiler::fgAttachStructInlineeToAsg(GenTreePtr tree, GenTreePtr child, CORINFO_CLASS_HANDLE retClsHnd)
+{
+    // We are okay to have:
+    // 1. V02 = call();
+    // 2. copyBlk(dstAddr, srcAddr);
+    assert(tree->gtOper == GT_ASG);
+
+    // We have an assignment, we codegen only V05 = call().
+    if (child->gtOper == GT_CALL && tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)
+    {
+        return;
+    }
+
+    GenTreePtr dstAddr = fgGetStructAsStructPtr(tree->gtOp.gtOp1);
+    GenTreePtr srcAddr = fgGetStructAsStructPtr((child->gtOper == GT_CALL)
+                            ? fgAssignStructInlineeToVar(child, retClsHnd) // Assign to a variable if it is a call.
+                            : child);                                   // Just get the address, if not a call.
+
+    tree->CopyFrom(gtNewCpObjNode(dstAddr, srcAddr, retClsHnd, false), this);
+}
+
+#endif // FEATURE_MULTIREG_RET
+
+/*****************************************************************************
+ * Callback to replace the inline return expression place holder (GT_RET_EXPR)
+ */
+
+/* static */
+Compiler::fgWalkResult      Compiler::fgUpdateInlineReturnExpressionPlaceHolder(GenTreePtr* pTree,
+                                                                                fgWalkData* data)
+{
+    GenTreePtr tree = *pTree;
+    Compiler*  comp = data->compiler;
+    CORINFO_CLASS_HANDLE retClsHnd = NO_CLASS_HANDLE;
+
+    if (tree->gtOper == GT_RET_EXPR)
+    {
+        // We are going to copy the tree from the inlinee, 
+        // so record the handle now.
+        //
+        if (varTypeIsStruct(tree))
+        {
+            retClsHnd = tree->gtRetExpr.gtRetClsHnd;
+        }
+
+        do
+        {
+            // Obtained the expanded inline candidate
+            GenTreePtr inlineCandidate = tree->gtRetExpr.gtInlineCandidate;
+
+#ifdef DEBUG
+            if (comp->verbose)
+            {
+                printf("\nReplacing the return expression placeholder ");              
+                printTreeID(tree);
+                printf(" with ");
+                printTreeID(inlineCandidate);
+                printf("\n");
+                // Dump out the old return expression placeholder it will be overwritten by the CopyFrom below
+                comp->gtDispTree(tree);
+            }
+#endif // DEBUG
+
+            tree->CopyFrom(inlineCandidate, comp);           
+
+#ifdef DEBUG
+            if (comp->verbose)
+            {
+                printf("\nInserting the inline return expression\n");
+                comp->gtDispTree(tree);
+                printf("\n");
+            }
+#endif // DEBUG
+        }
+        while (tree->gtOper == GT_RET_EXPR);
+    }
+
+#if FEATURE_MULTIREG_RET
+
+    // Did we record a struct return class handle above?
+    //
+    if (retClsHnd != NO_CLASS_HANDLE)
+    {
+        // Is this a type that is returned in multiple registers?
+        // if so we need to force into into a form we accept.
+        // i.e. LclVar = call()
+        //
+        if (comp->IsMultiRegReturnedType(retClsHnd))
+        {
+            GenTreePtr parent = data->parent;
+            // See assert below, we only look one level above for an asg parent.
+            if (parent->gtOper == GT_ASG)
+            {
+                // Either lhs is a call V05 = call(); or lhs is addr, and asg becomes a copyBlk.
+                comp->fgAttachStructInlineeToAsg(parent, tree, retClsHnd);
+            }
+            else
+            {
+                // Just assign the inlinee to a variable to keep it simple.
+                tree->CopyFrom(comp->fgAssignStructInlineeToVar(tree, retClsHnd), comp);
+            }
+        }
+    }
+
+#if defined(DEBUG)
+
+    // Make sure we don't have a tree like so: V05 = (, , , retExpr);
+    // Since we only look one level above for the parent for '=' and
+    // do not check if there is a series of COMMAs. See above.
+    // Importer and FlowGraph will not generate such a tree, so just
+    // leaving an assert in here. This can be fixed by looking ahead
+    // when we visit GT_ASG similar to fgAttachStructInlineeToAsg.
+    //
+    if ((tree->gtOper == GT_ASG) && (tree->gtOp.gtOp2->gtOper == GT_COMMA))
+    {
+        GenTreePtr comma;
+        for (comma = tree->gtOp.gtOp2;
+             comma->gtOper == GT_COMMA;
+             comma = comma->gtOp.gtOp2)
+        {
+            // empty
+        }
+
+        noway_assert(!varTypeIsStruct(comma) ||
+                     comma->gtOper != GT_RET_EXPR ||
+                     !comp->IsMultiRegReturnedType(comma->gtRetExpr.gtRetClsHnd));
+    }
+
+#endif // defined(DEBUG)
+#endif // FEATURE_MULTIREG_RET
+
+    return WALK_CONTINUE;
+}
+
+#ifdef DEBUG
+
+/*****************************************************************************
+ * Callback to make sure there is no more GT_RET_EXPR and GTF_CALL_INLINE_CANDIDATE nodes.
+ */
+
+/* static */
+Compiler::fgWalkResult      Compiler::fgDebugCheckInlineCandidates(GenTreePtr* pTree,
+                                                                   fgWalkData* data)
+{
+    GenTreePtr tree = *pTree;
+    if (tree->gtOper == GT_CALL)
+    {
+        assert((tree->gtFlags & GTF_CALL_INLINE_CANDIDATE) == 0);
+    }
+    else
+    {
+        assert(tree->gtOper != GT_RET_EXPR);
+    }
+
+    return WALK_CONTINUE;
+}
+
+#endif // DEBUG
+
+
+void       Compiler::fgInvokeInlineeCompiler(GenTreeCall*  call,
+                                             InlineResult* inlineResult)
+{
+    noway_assert(call->gtOper == GT_CALL);
+    noway_assert((call->gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0);
+    noway_assert(opts.OptEnabled(CLFLG_INLINING));
+
+    // This is the InlineInfo struct representing a method to be inlined.
+    InlineInfo inlineInfo = {nullptr};
+
+    CORINFO_METHOD_HANDLE fncHandle = call->gtCallMethHnd;
+
+    inlineInfo.fncHandle             = fncHandle;
+    inlineInfo.iciCall               = call;
+    inlineInfo.iciStmt               = fgMorphStmt;
+    inlineInfo.iciBlock              = compCurBB;
+    inlineInfo.thisDereferencedFirst = false;
+    inlineInfo.retExpr               = nullptr;
+    inlineInfo.inlineResult          = inlineResult;
+#ifdef FEATURE_SIMD
+    inlineInfo.hasSIMDTypeArgLocalOrReturn = false;
+#endif // FEATURE_SIMD
+
+    InlineCandidateInfo* inlineCandidateInfo = call->gtInlineCandidateInfo;
+    noway_assert(inlineCandidateInfo);
+    // Store the link to inlineCandidateInfo into inlineInfo
+    inlineInfo.inlineCandidateInfo = inlineCandidateInfo;
+
+    unsigned inlineDepth = fgCheckInlineDepthAndRecursion(&inlineInfo);
+
+    if (inlineResult->IsFailure())
+    {
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("Recursive or deep inline recursion detected. Will not expand this INLINECANDIDATE \n");
+        }
+#endif // DEBUG
+        return;
+    }
+
+    // Set the trap to catch all errors (including recoverable ones from the EE)
+    struct Param
+    {
+        Compiler* pThis;
+        GenTree* call;
+        CORINFO_METHOD_HANDLE fncHandle;
+        InlineCandidateInfo* inlineCandidateInfo;
+        InlineInfo* inlineInfo;
+    } param = {nullptr};
+
+    param.pThis = this;
+    param.call = call;
+    param.fncHandle = fncHandle;
+    param.inlineCandidateInfo = inlineCandidateInfo;
+    param.inlineInfo = &inlineInfo;
+    bool success = eeRunWithErrorTrap<Param>([](Param* pParam)
+    {
+        // Init the local var info of the inlinee
+        pParam->pThis->impInlineInitVars(pParam->inlineInfo);
+
+        if (pParam->inlineInfo->inlineResult->IsCandidate())
+        {
+            /* Clear the temp table */
+            memset(pParam->inlineInfo->lclTmpNum, -1, sizeof(pParam->inlineInfo->lclTmpNum));
+
+            //
+            // Prepare the call to jitNativeCode
+            //
+
+            pParam->inlineInfo->InlinerCompiler = pParam->pThis;
+            if (pParam->pThis->impInlineInfo == nullptr)
+            {
+                pParam->inlineInfo->InlineRoot = pParam->pThis;
+            }
+            else
+            {
+                pParam->inlineInfo->InlineRoot = pParam->pThis->impInlineInfo->InlineRoot;
+            }
+            pParam->inlineInfo->argCnt                   = pParam->inlineCandidateInfo->methInfo.args.totalILArgs();
+            pParam->inlineInfo->tokenLookupContextHandle = pParam->inlineCandidateInfo->exactContextHnd;
+
+            JITLOG_THIS(pParam->pThis,
+                        (LL_INFO100000,
+                         "INLINER: inlineInfo.tokenLookupContextHandle for %s set to 0x%p:\n",
+                         pParam->pThis->eeGetMethodFullName(pParam->fncHandle),
+                         pParam->pThis->dspPtr(pParam->inlineInfo->tokenLookupContextHandle)));
+
+            CORJIT_FLAGS compileFlagsForInlinee;
+            memcpy(&compileFlagsForInlinee, pParam->pThis->opts.jitFlags, sizeof(compileFlagsForInlinee));
+            compileFlagsForInlinee.corJitFlags &= ~CORJIT_FLG_LOST_WHEN_INLINING;
+            compileFlagsForInlinee.corJitFlags |= CORJIT_FLG_SKIP_VERIFICATION;
+
+#ifdef DEBUG
+            if (pParam->pThis->verbose)
+            {
+                printf("\nInvoking compiler for the inlinee method %s :\n",
+                       pParam->pThis->eeGetMethodFullName(pParam->fncHandle));
+            }
+#endif // DEBUG
+
+            int result = jitNativeCode(pParam->fncHandle,
+                          pParam->inlineCandidateInfo->methInfo.scope,
+                          pParam->pThis->info.compCompHnd,
+                          &pParam->inlineCandidateInfo->methInfo,
+                          (void**)pParam->inlineInfo,
+                          nullptr,
+                          &compileFlagsForInlinee,
+                          pParam->inlineInfo);
+
+            if (result != CORJIT_OK)
+            {
+                // If we haven't yet determined why this inline fails, use
+                // a catch-all something bad happened observation.
+                InlineResult* innerInlineResult = pParam->inlineInfo->inlineResult;
+
+                if (!innerInlineResult->IsFailure())
+                {
+                    innerInlineResult->NoteFatal(InlineObservation::CALLSITE_COMPILATION_FAILURE);
+                }
+            }
+        }
+    }, &param);
+    if (!success)
+    {
+#ifdef DEBUG
+        if (verbose)
+        {
+             printf("\nInlining failed due to an exception during invoking the compiler for the inlinee method %s.\n",
+                    eeGetMethodFullName(fncHandle));
+        }
+#endif // DEBUG
+
+        // If we haven't yet determined why this inline fails, use
+        // a catch-all something bad happened observation.
+        if (!inlineResult->IsFailure())
+        {
+            inlineResult->NoteFatal(InlineObservation::CALLSITE_COMPILATION_ERROR);
+        }
+    }
+
+    if (inlineResult->IsFailure())
+    {
+        return;
+    }
+
+#ifdef DEBUG
+    if (0 && verbose)
+    {
+         printf("\nDone invoking compiler for the inlinee method %s\n",
+                eeGetMethodFullName(fncHandle));
+    }
+#endif // DEBUG
+
+    // If there is non-NULL return, but we haven't set the pInlineInfo->retExpr,
+    // That means we haven't imported any BB that contains CEE_RET opcode.
+    // (This could happen for example for a BBJ_THROW block fall through a BBJ_RETURN block which
+    // causes the BBJ_RETURN block not to be imported at all.)
+    // Fail the inlining attempt
+    if (inlineCandidateInfo->fncRetType != TYP_VOID && inlineInfo.retExpr == nullptr)
+    {
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\nInlining failed because pInlineInfo->retExpr is not set in the inlinee method %s.\n",
+                    eeGetMethodFullName(fncHandle));
+        }
+#endif // DEBUG
+        inlineResult->NoteFatal(InlineObservation::CALLEE_LACKS_RETURN);
+        return;
+    }
+
+    if (inlineCandidateInfo->initClassResult & CORINFO_INITCLASS_SPECULATIVE)
+    {
+        // we defer the call to initClass() until inlining is completed in case it fails. If inlining succeeds,
+        // we will call initClass().
+        if (!(info.compCompHnd->initClass(nullptr /* field */, fncHandle /* method */,
+                inlineCandidateInfo->exactContextHnd /* context */) & CORINFO_INITCLASS_INITIALIZED))
+        {
+            inlineResult->NoteFatal(InlineObservation::CALLEE_CLASS_INIT_FAILURE);
+            return;
+        }
+    }
+
+    // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+    // The inlining attempt cannot be failed starting from this point.
+    // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+    // We've successfully obtain the list of inlinee's basic blocks.
+    // Let's insert it to inliner's basic block list.
+    fgInsertInlineeBlocks(&inlineInfo);
+
+#ifdef DEBUG
+
+    if (verbose || fgPrintInlinedMethods)
+    {
+        printf("Successfully inlined %s (%d IL bytes) (depth %d) [%s]\n",
+               eeGetMethodFullName(fncHandle),
+               inlineCandidateInfo->methInfo.ILCodeSize,
+               inlineDepth,
+               inlineResult->ReasonString());
+    }
+
+    if (verbose)
+    {
+        printf("--------------------------------------------------------------------------------------------\n");
+    }
+#endif // DEBUG
+
+#if defined(DEBUG)
+    impInlinedCodeSize += inlineCandidateInfo->methInfo.ILCodeSize;
+#endif
+
+    // We inlined...
+    inlineResult->NoteSuccess();
+}
+
+// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+// The inlining attempt cannot be failed starting from this point.
+// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
+{
+    GenTreePtr   iciCall     = pInlineInfo->iciCall;
+    GenTreePtr   iciStmt     = pInlineInfo->iciStmt;
+    BasicBlock*  iciBlock    = pInlineInfo->iciBlock;
+    BasicBlock*  block;
+
+    // We can write better assert here. For example, we can check that
+    // iciBlock contains iciStmt, which in turn contains iciCall.
+    noway_assert(iciBlock->bbTreeList != nullptr);
+    noway_assert(iciStmt->gtStmt.gtStmtExpr != nullptr);
+    noway_assert(iciCall->gtOper == GT_CALL);
+
+#ifdef DEBUG
+
+    GenTreePtr currentDumpStmt = nullptr;
+
+    if (verbose)
+    {
+        printf("\n\n----------- Statements (and blocks) added due to the inlining of call ");
+        printTreeID(iciCall);
+        printf(" -----------\n");
+        // gtDispTree(iciStmt);
+    }
+
+#endif // DEBUG
+
+    //
+    // Create a new inline context and mark the inlined statements with it
+    //
+    InlineContext* calleeContext = m_inlineStrategy->NewSuccess(pInlineInfo);
+
+    for (block = InlineeCompiler->fgFirstBB;
+         block != nullptr;
+         block = block->bbNext)
+    {
+        for (GenTreeStmt* stmt = block->firstStmt();
+             stmt;
+             stmt = stmt->gtNextStmt)
+        {
+            stmt->gtInlineContext = calleeContext;
+        }
+    }
+
+    //
+    // Prepend statements.
+    //
+    GenTreePtr stmtAfter;
+    stmtAfter = fgInlinePrependStatements(pInlineInfo);
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        currentDumpStmt = stmtAfter;
+        printf("\nInlinee method body:");
+    }
+#endif // DEBUG
+
+    if (InlineeCompiler->fgBBcount == 1)
+    {
+        // When fgBBCount is 1 we will always have a non-NULL fgFirstBB
+        //
+        PREFAST_ASSUME(InlineeCompiler->fgFirstBB != nullptr);
+
+        // DDB 91389: Don't throw away the (only) inlinee block
+        // when its return type is not BBJ_RETURN.
+        // In other words, we need its BBJ_ to perform the right thing.
+        if (InlineeCompiler->fgFirstBB->bbJumpKind == BBJ_RETURN)
+        {
+            // Inlinee contains just one BB. So just insert its statement list to topBlock.
+            if (InlineeCompiler->fgFirstBB->bbTreeList)
+            {
+                stmtAfter = fgInsertStmtListAfter(iciBlock,
+                                                  stmtAfter,
+                                                  InlineeCompiler->fgFirstBB->bbTreeList);
+
+                // Copy inlinee bbFlags to caller bbFlags.
+                const unsigned int inlineeBlockFlags = InlineeCompiler->fgFirstBB->bbFlags;
+                noway_assert((inlineeBlockFlags & BBF_HAS_JMP) == 0);
+                noway_assert((inlineeBlockFlags & BBF_KEEP_BBJ_ALWAYS) == 0);
+                iciBlock->bbFlags |= inlineeBlockFlags;
+            }
+#ifdef DEBUG
+            if (verbose)
+            {
+                noway_assert(currentDumpStmt);
+
+                if  (currentDumpStmt != stmtAfter)
+                {
+                    do
+                    {
+                        currentDumpStmt = currentDumpStmt->gtNext;
+
+                        printf("\n");
+
+                        noway_assert(currentDumpStmt->gtOper == GT_STMT);
+
+                        gtDispTree(currentDumpStmt);
+                        printf("\n");
+
+                    }  while (currentDumpStmt != stmtAfter);
+                }
+            }
+#endif // DEBUG
+            goto _Done;
+        }
+    }
+
+    //
+    // ======= Inserting inlinee's basic blocks ===============
+    //
+
+    BasicBlock* topBlock;
+    BasicBlock* bottomBlock;
+
+    topBlock = iciBlock;
+
+    bottomBlock = fgNewBBafter(topBlock->bbJumpKind, topBlock, true);
+    bottomBlock->bbRefs = 1;
+    bottomBlock->bbJumpDest = topBlock->bbJumpDest;
+    bottomBlock->inheritWeight(topBlock);
+
+    topBlock->bbJumpKind = BBJ_NONE;
+
+    // Update block flags
+    unsigned originalFlags;
+    originalFlags = topBlock->bbFlags;
+    noway_assert((originalFlags & BBF_SPLIT_NONEXIST) == 0);
+    topBlock->bbFlags    &= ~(BBF_SPLIT_LOST);
+    bottomBlock->bbFlags |= originalFlags & BBF_SPLIT_GAINED;
+
+    //
+    // Split statements between topBlock and bottomBlock
+    //
+    GenTreePtr topBlock_Begin;
+    GenTreePtr topBlock_End;
+    GenTreePtr bottomBlock_Begin;
+    GenTreePtr bottomBlock_End;
+
+    topBlock_Begin = nullptr;
+    topBlock_End = nullptr;
+    bottomBlock_Begin = nullptr;
+    bottomBlock_End = nullptr;
+
+    //
+    // First figure out bottomBlock_Begin
+    //
+
+    bottomBlock_Begin = stmtAfter->gtNext;
+
+    if (topBlock->bbTreeList == nullptr)
+    {
+        // topBlock is empty before the split.
+        // In this case, both topBlock and bottomBlock should be empty
+        noway_assert(bottomBlock_Begin == nullptr);
+        topBlock->bbTreeList = nullptr;
+        bottomBlock->bbTreeList = nullptr;
+    }
+    else if (topBlock->bbTreeList == bottomBlock_Begin)
+    {
+        noway_assert(bottomBlock_Begin);
+
+        // topBlock contains at least one statement before the split.
+        // And the split is before the first statement.
+        // In this case, topBlock should be empty, and everything else should be moved to the bottonBlock.
+        bottomBlock->bbTreeList = topBlock->bbTreeList;
+        topBlock->bbTreeList = nullptr;
+    }
+    else if (bottomBlock_Begin == nullptr)
+    {
+        noway_assert(topBlock->bbTreeList);
+
+        // topBlock contains at least one statement before the split.
+        // And the split is at the end of the topBlock.
+        // In this case, everything should be kept in the topBlock, and the bottomBlock should be empty
+
+        bottomBlock->bbTreeList = nullptr;
+    }
+    else
+    {
+        noway_assert(topBlock->bbTreeList);
+        noway_assert(bottomBlock_Begin);
+
+        // This is the normal case where both blocks should contain at least one statement.
+        topBlock_Begin  = topBlock->bbTreeList;
+        noway_assert(topBlock_Begin);
+        topBlock_End    = bottomBlock_Begin->gtPrev;
+        noway_assert(topBlock_End);
+        bottomBlock_End = topBlock->lastStmt();
+        noway_assert(bottomBlock_End);
+
+        // Break the linkage between 2 blocks.
+        topBlock_End->gtNext = nullptr;
+
+        // Fix up all the pointers.
+        topBlock->bbTreeList         = topBlock_Begin;
+        topBlock->bbTreeList->gtPrev = topBlock_End;
+
+        bottomBlock->bbTreeList         = bottomBlock_Begin;
+        bottomBlock->bbTreeList->gtPrev = bottomBlock_End;
+    }
+
+    //
+    // Set the try and handler index and fix the jump types of inlinee's blocks.
+    //
+
+    bool inheritWeight;
+    inheritWeight = true; // The firstBB does inherit the weight from the iciBlock
+
+    for (block = InlineeCompiler->fgFirstBB;
+         block != nullptr;
+         block = block->bbNext)
+    {
+        noway_assert(!block->hasTryIndex());
+        noway_assert(!block->hasHndIndex());
+        block->copyEHRegion(iciBlock);
+        block->bbFlags   |=  iciBlock->bbFlags & BBF_BACKWARD_JUMP;
+
+        if (iciStmt->gtStmt.gtStmtILoffsx != BAD_IL_OFFSET)
+        {
+            block->bbCodeOffs    = jitGetILoffs(iciStmt->gtStmt.gtStmtILoffsx);
+            block->bbCodeOffsEnd = block->bbCodeOffs + 1;  // TODO: is code size of 1 some magic number for inlining?
+        }
+        else
+        {
+            block->bbCodeOffs     = 0; // TODO: why not BAD_IL_OFFSET?
+            block->bbCodeOffsEnd  = 0;
+            block->bbFlags    |= BBF_INTERNAL;
+        }
+
+        if (block->bbJumpKind == BBJ_RETURN)
+        {
+            inheritWeight = true; // A return block does inherit the weight from the iciBlock
+            noway_assert((block->bbFlags & BBF_HAS_JMP) == 0);
+            if (block->bbNext)
+            {
+                block->bbJumpKind = BBJ_ALWAYS;
+                block->bbJumpDest = bottomBlock;
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("\nConvert bbJumpKind of BB%02u to BBJ_ALWAYS to bottomBlock BB%02u\n",
+                           block->bbNum, bottomBlock->bbNum);
+                }
+#endif // DEBUG
+            }
+            else
+            {
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("\nConvert bbJumpKind of BB%02u to BBJ_NONE\n", block->bbNum);
+                }
+#endif // DEBUG
+                block->bbJumpKind = BBJ_NONE;
+            }
+        }
+        if (inheritWeight)
+        {
+            block->inheritWeight(iciBlock);
+            inheritWeight = false;
+        }
+        else
+        {
+            block->modifyBBWeight(iciBlock->bbWeight / 2);
+        }
+    }
+
+    // Insert inlinee's blocks into inliner's block list.
+    topBlock->setNext(InlineeCompiler->fgFirstBB);
+    InlineeCompiler->fgLastBB->setNext(bottomBlock);
+
+    //
+    // Add inlinee's block count to inliner's.
+    //
+    fgBBcount += InlineeCompiler->fgBBcount;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        fgDispBasicBlocks(InlineeCompiler->fgFirstBB, InlineeCompiler->fgLastBB, true);
+    }
+#endif // DEBUG
+
+_Done:
+
+    //
+    // At this point, we have successully inserted inlinee's code.
+    //
+
+    //
+    // Copy out some flags
+    //
+    compLongUsed              |= InlineeCompiler->compLongUsed;
+    compFloatingPointUsed     |= InlineeCompiler->compFloatingPointUsed;
+    compLocallocUsed          |= InlineeCompiler->compLocallocUsed;
+    compQmarkUsed             |= InlineeCompiler->compQmarkUsed;
+    compUnsafeCastUsed        |= InlineeCompiler->compUnsafeCastUsed;
+    compNeedsGSSecurityCookie |= InlineeCompiler->compNeedsGSSecurityCookie;
+    compGSReorderStackLayout  |= InlineeCompiler->compGSReorderStackLayout;
+
+    // Update optMethodFlags
+
+#ifdef DEBUG
+    unsigned optMethodFlagsBefore = optMethodFlags;
+#endif
+
+    optMethodFlags |= InlineeCompiler->optMethodFlags;
+
+#ifdef DEBUG
+    if (optMethodFlags != optMethodFlagsBefore)
+    {
+        JITDUMP("INLINER: Updating optMethodFlags --  root:%0x callee:%0x new:%0x\n",
+                optMethodFlagsBefore, InlineeCompiler->optMethodFlags, optMethodFlags);
+    }
+#endif
+
+    // If there is non-NULL return, replace the GT_CALL with its return value expression,
+    // so later it will be picked up by the GT_RET_EXPR node.
+    if ((pInlineInfo->inlineCandidateInfo->fncRetType != TYP_VOID) || (iciCall->gtCall.gtReturnType == TYP_STRUCT))
+    {
+        noway_assert(pInlineInfo->retExpr);
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\nReturn expression for call at ");
+            printTreeID(iciCall);
+            printf(" is\n");
+            gtDispTree(pInlineInfo->retExpr);
+        }
+#endif // DEBUG
+        // Replace the call with the return expression
+        iciCall->CopyFrom(pInlineInfo->retExpr, this);
+    }
+
+    //
+    // Detach the GT_CALL node from the original statement by hanging a "nothing" node under it,
+    // so that fgMorphStmts can remove the statement once we return from here.
+    //
+    iciStmt->gtStmt.gtStmtExpr = gtNewNothingNode();
+}
+
+// Prepend the statements that are needed before the inlined call.
+// Return the last statement that is prepended.
+
+GenTreePtr      Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo)
+{
+    BasicBlock* block = inlineInfo->iciBlock;
+
+    GenTreePtr callStmt  = inlineInfo->iciStmt;
+    noway_assert(callStmt->gtOper == GT_STMT);
+    IL_OFFSETX callILOffset = callStmt->gtStmt.gtStmtILoffsx;
+
+    GenTreePtr afterStmt = callStmt; // afterStmt is the place where the new statements should be inserted after.
+    GenTreePtr newStmt;
+
+    GenTreePtr call = inlineInfo->iciCall;
+    noway_assert(call->gtOper == GT_CALL);
+
+#ifdef DEBUG
+    if (0 && verbose)
+    {
+        printf("\nfgInlinePrependStatements for iciCall= ");
+        printTreeID(call);
+        printf(":\n");
+    }
+#endif
+
+    // Prepend statements for any initialization / side effects
+
+    InlArgInfo*       inlArgInfo = inlineInfo->inlArgInfo;
+    InlLclVarInfo*    lclVarInfo = inlineInfo->lclVarInfo;
+
+    GenTreePtr tree;
+
+    // Create the null check statement (but not appending it to the statement list yet) for the 'this' pointer if necessary.
+    // The NULL check should be done after "argument setup statements".
+    // The only reason we move it here is for calling "impInlineFetchArg(0,..." to reserve a temp
+    // for the "this" pointer.
+    // Note: Here we no longer do the optimization that was done by thisDereferencedFirst in the old inliner.
+    // However the assetionProp logic will remove any unecessary null checks that we may have added
+    //
+    GenTreePtr nullcheck = nullptr;
+
+    if (call->gtFlags & GTF_CALL_NULLCHECK && !inlineInfo->thisDereferencedFirst)
+    {
+        // Call impInlineFetchArg to "reserve" a temp for the "this" pointer.
+        nullcheck = gtNewOperNode(GT_IND, TYP_INT,
+                                  impInlineFetchArg(0, inlArgInfo, lclVarInfo));
+        nullcheck->gtFlags |= GTF_EXCEPT;
+
+        // The NULL-check statement will be inserted to the statement list after those statements
+        // that assign arguments to temps and before the actual body of the inlinee method.
+    }
+
+    /* Treat arguments that had to be assigned to temps */
+    if (inlineInfo->argCnt)
+    {
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\nArguments setup:\n");
+        }
+#endif // DEBUG
+
+        for (unsigned argNum = 0; argNum < inlineInfo->argCnt; argNum++)
+        {
+            if (inlArgInfo[argNum].argHasTmp)
+            {
+                noway_assert(inlArgInfo[argNum].argIsUsed);
+
+                /* argBashTmpNode is non-NULL iff the argument's value was
+                   referenced exactly once by the original IL. This offers an
+                   oppportunity to avoid an intermediate temp and just insert
+                   the original argument tree.
+
+                   However, if the temp node has been cloned somewhere while
+                   importing (e.g. when handling isinst or dup), or if the IL
+                   took the address of the argument, then argBashTmpNode will
+                   be set (because the value was only explicitly retrieved
+                   once) but the optimization cannot be applied.
+                 */
+
+                GenTreePtr argSingleUseNode = inlArgInfo[argNum].argBashTmpNode;
+
+                if (argSingleUseNode &&
+                    !(argSingleUseNode->gtFlags & GTF_VAR_CLONED) &&
+                    !inlArgInfo[argNum].argHasLdargaOp &&
+                    !inlArgInfo[argNum].argHasStargOp)
+                {
+                    // Change the temp in-place to the actual argument.
+                    // We currently do not support this for struct arguments, so it must not be a GT_OBJ.
+                    GenTree* argNode = inlArgInfo[argNum].argNode;
+                    assert(argNode->gtOper != GT_OBJ);
+                    argSingleUseNode->CopyFrom(argNode, this);
+                    continue;
+                }
+                else
+                {
+                    /* Create the temp assignment for this argument */
+
+                    CORINFO_CLASS_HANDLE structHnd = DUMMY_INIT(0);
+
+                    if (varTypeIsStruct(lclVarInfo[argNum].lclTypeInfo))
+                    {
+                        structHnd = gtGetStructHandleIfPresent(inlArgInfo[argNum].argNode);
+                        noway_assert(structHnd != NO_CLASS_HANDLE);
+                    }
+
+                    // Unsafe value cls check is not needed for argTmpNum here since in-linee compiler instance would have
+                    // iterated over these and marked them accordingly.
+                    impAssignTempGen(inlArgInfo[argNum].argTmpNum,
+                                     inlArgInfo[argNum].argNode,
+                                     structHnd,
+                                     (unsigned)CHECK_SPILL_NONE,
+                                     & afterStmt,
+                                     callILOffset,
+                                     block);
+
+#ifdef DEBUG
+                    if (verbose)
+                    {
+                        gtDispTree(afterStmt);
+                    }
+#endif // DEBUG
+
+                }
+            }
+            else if (inlArgInfo[argNum].argIsByRefToStructLocal)
+            {
+                // Do nothing.
+            }
+            else
+            {
+                /* The argument is either not used or a const or lcl var */
+
+                noway_assert(!inlArgInfo[argNum].argIsUsed  ||
+                              inlArgInfo[argNum].argIsInvariant ||
+                              inlArgInfo[argNum].argIsLclVar );
+
+                /* Make sure we didnt change argNode's along the way, or else
+                   subsequent uses of the arg would have worked with the bashed value */
+                if (inlArgInfo[argNum].argIsInvariant)
+                {
+                    assert(inlArgInfo[argNum].argNode->OperIsConst() ||
+                           inlArgInfo[argNum].argNode->gtOper == GT_ADDR);
+                }
+                noway_assert((inlArgInfo[argNum].argIsLclVar == 0) ==
+                             (inlArgInfo[argNum].argNode->gtOper != GT_LCL_VAR || (inlArgInfo[argNum].argNode->gtFlags & GTF_GLOB_REF)));
+
+                /* If the argument has side effects, append it */
+
+                if (inlArgInfo[argNum].argHasSideEff)
+                {
+                    noway_assert(inlArgInfo[argNum].argIsUsed == false);
+
+                    if (inlArgInfo[argNum].argNode->gtOper == GT_OBJ ||
+                        inlArgInfo[argNum].argNode->gtOper == GT_MKREFANY)
+                    {
+                        // Don't put GT_OBJ node under a GT_COMMA.
+                        // Codegen can't deal with it.
+                        // Just hang the address here in case there are side-effect.
+                        newStmt = gtNewStmt(gtUnusedValNode(inlArgInfo[argNum].argNode->gtOp.gtOp1), callILOffset);
+                    }
+                    else
+                    {
+                        newStmt = gtNewStmt(gtUnusedValNode(inlArgInfo[argNum].argNode), callILOffset);
+                    }
+                    afterStmt = fgInsertStmtAfter(block, afterStmt, newStmt);
+
+#ifdef DEBUG
+                    if (verbose)
+                    {
+                        gtDispTree(afterStmt);
+                    }
+#endif // DEBUG
+
+                }
+            }
+        }
+    }
+
+    // Add the CCTOR check if asked for.
+    // Note: We no longer do the optimization that is done before by staticAccessedFirstUsingHelper in the old inliner.
+    //       Therefore we might prepend redundant call to HELPER.CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE
+    //       before the inlined method body, even if a static field of this type was accessed in the inlinee
+    //       using a helper before any other observable side-effect.
+
+    if (inlineInfo->inlineCandidateInfo->initClassResult & CORINFO_INITCLASS_USE_HELPER)
+    {
+        CORINFO_CONTEXT_HANDLE exactContext = inlineInfo->inlineCandidateInfo->exactContextHnd;
+        CORINFO_CLASS_HANDLE exactClass;
+
+        if (((SIZE_T)exactContext & CORINFO_CONTEXTFLAGS_MASK) == CORINFO_CONTEXTFLAGS_CLASS)
+        {
+            exactClass = CORINFO_CLASS_HANDLE((SIZE_T)exactContext & ~CORINFO_CONTEXTFLAGS_MASK);
+        }
+        else
+        {
+            exactClass = info.compCompHnd->getMethodClass(CORINFO_METHOD_HANDLE((SIZE_T)exactContext & ~CORINFO_CONTEXTFLAGS_MASK));
+        }
+
+        tree = fgGetSharedCCtor(exactClass);
+        newStmt = gtNewStmt(tree, callILOffset);
+        afterStmt = fgInsertStmtAfter(block, afterStmt, newStmt);
+    }
+
+    // Insert the nullcheck statement now.
+    if (nullcheck)
+    {
+        newStmt = gtNewStmt(nullcheck, callILOffset);
+        afterStmt = fgInsertStmtAfter(block, afterStmt, newStmt);
+    }
+
+    //
+    // Now zero-init inlinee locals
+    //
+
+    CORINFO_METHOD_INFO* InlineeMethodInfo = InlineeCompiler->info.compMethodInfo;
+
+    unsigned lclCnt = InlineeMethodInfo->locals.numArgs;
+
+    // Does callee contain any zero-init local?
+    if ((lclCnt != 0) &&
+        (InlineeMethodInfo->options & CORINFO_OPT_INIT_LOCALS) != 0)
+    {
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\nZero init inlinee locals:\n");
+        }
+#endif // DEBUG
+
+        for (unsigned lclNum = 0; lclNum < lclCnt; lclNum++)
+        {
+            unsigned  tmpNum = inlineInfo->lclTmpNum[lclNum];
+
+            // Is the local used at all?
+            if (tmpNum != BAD_VAR_NUM)
+            {
+                var_types lclTyp = (var_types)lvaTable[tmpNum].lvType;
+                noway_assert(lclTyp == lclVarInfo[lclNum + inlineInfo->argCnt].lclTypeInfo);
+
+                if (!varTypeIsStruct(lclTyp))
+                {
+                    // Unsafe value cls check is not needed here since in-linee compiler instance would have
+                    // iterated over locals and marked accordingly.
+                    impAssignTempGen(tmpNum,
+                                     gtNewZeroConNode(genActualType(lclTyp)),
+                                     NO_CLASS_HANDLE,
+                                     (unsigned)CHECK_SPILL_NONE,
+                                     & afterStmt,
+                                     callILOffset,
+                                     block);
+                }
+                else
+                {
+                    CORINFO_CLASS_HANDLE structType = lclVarInfo[lclNum + inlineInfo->argCnt].lclVerTypeInfo.GetClassHandle();
+
+                    tree = gtNewBlkOpNode(gtNewLclvNode(tmpNum, lclTyp), // Dest
+                                          gtNewIconNode(0), // Value
+                                          info.compCompHnd->getClassSize(structType), // Size
+                                          false,            // isVolatile
+                                          false);           // not copyBlock
+
+                    newStmt = gtNewStmt(tree, callILOffset);
+                    afterStmt = fgInsertStmtAfter(block, afterStmt, newStmt);
+                }
+
+#ifdef DEBUG
+                if (verbose)
+                {
+                    gtDispTree(afterStmt);
+                }
+#endif // DEBUG
+            }
+        }
+    }
+
+    return afterStmt;
+}
+
+
+/*****************************************************************************/
+/*static*/
+Compiler::fgWalkResult  Compiler::fgChkThrowCB(GenTreePtr* pTree,
+                                               fgWalkData* data)
+{
+    GenTreePtr tree = *pTree;
+
+    // If this tree doesn't have the EXCEPT flag set, then there is no
+    // way any of the child nodes could throw, so we can stop recursing.
+    if (!(tree->gtFlags & GTF_EXCEPT))
+    {
+        return Compiler::WALK_SKIP_SUBTREES;
+    }
+
+    switch (tree->gtOper)
+    {
+    case GT_MUL:
+    case GT_ADD:
+    case GT_SUB:
+    case GT_ASG_ADD:
+    case GT_ASG_SUB:
+    case GT_CAST:
+        if (tree->gtOverflow()) {
+            return Compiler::WALK_ABORT;
+}
+        break;
+
+    case GT_INDEX:
+        if (tree->gtFlags & GTF_INX_RNGCHK) {
+            return Compiler::WALK_ABORT;
+}
+        break;
+
+    case GT_ARR_BOUNDS_CHECK:
+        return Compiler::WALK_ABORT;
+
+    default:
+        break;
+    }
+
+    return Compiler::WALK_CONTINUE;
+}
+
+/*****************************************************************************/
+/*static*/
+Compiler::fgWalkResult  Compiler::fgChkLocAllocCB(GenTreePtr* pTree,
+                                                  fgWalkData* data)
+{
+    GenTreePtr tree = *pTree;
+
+    if (tree->gtOper == GT_LCLHEAP) {
+        return Compiler::WALK_ABORT;
+}
+
+    return Compiler::WALK_CONTINUE;
+}
+
+/*****************************************************************************/
+/*static*/
+Compiler::fgWalkResult  Compiler::fgChkQmarkCB(GenTreePtr* pTree,
+                                               fgWalkData* data)
+{
+    GenTreePtr tree = *pTree;
+
+    if (tree->gtOper == GT_QMARK) {
+        return Compiler::WALK_ABORT;
+}
+
+    return Compiler::WALK_CONTINUE;
+}
+
+
+void Compiler::fgLclFldAssign(unsigned lclNum)
+{
+    assert(varTypeIsStruct(lvaTable[lclNum].lvType));
+    if (lvaTable[lclNum].lvPromoted && lvaTable[lclNum].lvFieldCnt > 1)
+    {
+        lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
+    }
+}
diff --git a/src/jit/fp.h b/src/jit/fp.h
new file mode 100644
index 0000000000..f1cee9581a
--- /dev/null
+++ b/src/jit/fp.h
@@ -0,0 +1,73 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef _JIT_FP
+
+#define _JIT_FP
+
+// Auxiliary structures.
+#if FEATURE_STACK_FP_X87
+
+enum dummyFPenum
+{
+#define REGDEF(name, rnum, mask, sname) dummmy_##name = rnum,
+#include "registerfp.h"
+
+    FP_VIRTUALREGISTERS,
+};
+
+// FlatFPStateX87 holds the state of the virtual register file. For each
+// virtual register we keep track to which physical register we're
+// mapping. We also keep track of the physical stack.
+
+#define FP_PHYSICREGISTERS FP_VIRTUALREGISTERS
+#define FP_VRNOTMAPPED -1
+
+struct FlatFPStateX87
+{
+public:
+    void Init(FlatFPStateX87* pFrom = 0);
+    bool Mapped(unsigned uEntry); // Is virtual register mapped
+    void Unmap(unsigned uEntry);  // Unmaps a virtual register
+    void Associate(unsigned uEntry, unsigned uStack);
+    unsigned StackToST(unsigned uEntry); // Maps the stack to a ST(x) entry
+    unsigned VirtualToST(unsigned uEntry);
+    unsigned STToVirtual(unsigned uST);
+    unsigned TopIndex();
+    unsigned TopVirtual();
+    void Rename(unsigned uVirtualTo, unsigned uVirtualFrom);
+    unsigned Pop();
+    void Push(unsigned uEntry);
+    bool IsEmpty();
+
+    // Debug/test methods
+    static bool AreEqual(FlatFPStateX87* pSrc, FlatFPStateX87* pDst);
+#ifdef DEBUG
+    bool IsValidEntry(unsigned uEntry);
+    bool IsConsistent();
+    void UpdateMappingFromStack();
+    void Dump();
+
+    // In some optimizations the stack will be inconsistent in some transactions. We want to keep
+    // the checks for everthing else, so if have the stack in an inconsistent state, you must
+    // ignore it on purpose.
+    bool m_bIgnoreConsistencyChecks;
+
+    inline void IgnoreConsistencyChecks(bool bIgnore)
+    {
+        m_bIgnoreConsistencyChecks = bIgnore;
+    }
+#else
+    inline void IgnoreConsistencyChecks(bool bIgnore)
+    {
+    }
+#endif
+
+    unsigned m_uVirtualMap[FP_VIRTUALREGISTERS];
+    unsigned m_uStack[FP_PHYSICREGISTERS];
+    unsigned m_uStackSize;
+};
+
+#endif // FEATURE_STACK_FP_X87
+#endif
diff --git a/src/jit/gcdecode.cpp b/src/jit/gcdecode.cpp
new file mode 100644
index 0000000000..0722917490
--- /dev/null
+++ b/src/jit/gcdecode.cpp
@@ -0,0 +1,15 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+/* Precompiled header nonsense requires that we do it this way  */
+
+/* GCDecoder.cpp is a common source file bewtween VM and JIT/IL */
+/* GCDecoder.cpp is located in $COM99/inc                       */
+
+#include "gcdecoder.cpp"
diff --git a/src/jit/gcencode.cpp b/src/jit/gcencode.cpp
new file mode 100644
index 0000000000..f20183b25a
--- /dev/null
+++ b/src/jit/gcencode.cpp
@@ -0,0 +1,4725 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                          GCEncode                                         XX
+XX                                                                           XX
+XX   Logic to encode the JIT method header and GC pointer tables             XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+
+#pragma warning(disable : 4244) // loss of data int -> char ..
+
+#endif
+
+#include "gcinfotypes.h"
+
+#ifdef JIT32_GCENCODER
+
+#include "emit.h"
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+/*****************************************************************************/
+// (see jit.h) #define REGEN_SHORTCUTS 0
+// To Regenerate the compressed info header shortcuts, define REGEN_SHORTCUTS
+// and use the following command line pipe/filter to give you the 128
+// most useful encodings.
+//
+// find . -name regen.txt | xargs cat | grep InfoHdr | sort | uniq -c | sort -r | head -128
+
+// (see jit.h) #define REGEN_CALLPAT 0
+// To Regenerate the compressed info header shortcuts, define REGEN_CALLPAT
+// and use the following command line pipe/filter to give you the 80
+// most useful encodings.
+//
+// find . -name regen.txt | xargs cat | grep CallSite | sort | uniq -c | sort -r | head -80
+
+#if REGEN_SHORTCUTS || REGEN_CALLPAT
+static FILE*     logFile = NULL;
+CRITICAL_SECTION logFileLock;
+#endif
+
+#if REGEN_CALLPAT
+static void regenLog(unsigned codeDelta,
+                     unsigned argMask,
+                     unsigned regMask,
+                     unsigned argCnt,
+                     unsigned byrefArgMask,
+                     unsigned byrefRegMask,
+                     BYTE*    base,
+                     unsigned enSize)
+{
+    CallPattern pat;
+
+    pat.fld.argCnt    = (argCnt < 0xff) ? argCnt : 0xff;
+    pat.fld.regMask   = (regMask < 0xff) ? regMask : 0xff;
+    pat.fld.argMask   = (argMask < 0xff) ? argMask : 0xff;
+    pat.fld.codeDelta = (codeDelta < 0xff) ? codeDelta : 0xff;
+
+    if (logFile == NULL)
+    {
+        logFile = fopen("regen.txt", "a");
+        InitializeCriticalSection(&logFileLock);
+    }
+
+    assert(((enSize > 0) && (enSize < 256)) && ((pat.val & 0xffffff) != 0xffffff));
+
+    EnterCriticalSection(&logFileLock);
+
+    fprintf(logFile, "CallSite( 0x%08x, 0x%02x%02x, 0x", pat.val, byrefArgMask, byrefRegMask);
+
+    while (enSize > 0)
+    {
+        fprintf(logFile, "%02x", *base++);
+        enSize--;
+    }
+    fprintf(logFile, "),\n");
+    fflush(logFile);
+
+    LeaveCriticalSection(&logFileLock);
+}
+#endif
+
+#if REGEN_SHORTCUTS
+static void regenLog(unsigned encoding, InfoHdr* header, InfoHdr* state)
+{
+    if (logFile == NULL)
+    {
+        logFile = fopen("regen.txt", "a");
+        InitializeCriticalSection(&logFileLock);
+    }
+
+    EnterCriticalSection(&logFileLock);
+
+    fprintf(logFile, "InfoHdr( %2d, %2d, %1d, %1d, %1d,"
+                     " %1d, %1d, %1d, %1d, %1d,"
+                     " %1d, %1d, %1d, %1d, %1d,"
+                     " %1d, %2d, %2d, %2d, %2d,"
+                     " %2d, %2d), \n",
+            state->prologSize, state->epilogSize, state->epilogCount, state->epilogAtEnd, state->ediSaved,
+            state->esiSaved, state->ebxSaved, state->ebpSaved, state->ebpFrame, state->interruptible,
+            state->doubleAlign, state->security, state->handlers, state->localloc, state->editNcontinue, state->varargs,
+            state->profCallbacks, state->argCount, state->frameSize,
+            (state->untrackedCnt <= SET_UNTRACKED_MAX) ? state->untrackedCnt : HAS_UNTRACKED,
+            (state->varPtrTableSize == 0) ? 0 : HAS_VARPTR,
+            (state->gsCookieOffset == INVALID_GS_COOKIE_OFFSET) ? 0 : HAS_GS_COOKIE_OFFSET,
+            (state->syncStartOffset == INVALID_SYNC_OFFSET) ? 0 : HAS_SYNC_OFFSET,
+            (state->syncStartOffset == INVALID_SYNC_OFFSET) ? 0 : HAS_SYNC_OFFSET);
+
+    fflush(logFile);
+
+    LeaveCriticalSection(&logFileLock);
+}
+#endif
+
+/*****************************************************************************
+ *
+ *  Given the four parameters return the index into the callPatternTable[]
+ *  that is used to encoding these four items.  If an exact match cannot
+ *  found then ignore the codeDelta and search the table again for a near
+ *  match.
+ *  Returns 0..79 for an exact match or
+ *         (delta<<8) | (0..79) for a near match.
+ *  A near match will be encoded using two bytes, the first byte will
+ *  skip the adjustment delta that prevented an exact match and the
+ *  rest of the delta plus the other three items are encoded in the
+ *  second byte.
+ */
+int FASTCALL lookupCallPattern(unsigned argCnt, unsigned regMask, unsigned argMask, unsigned codeDelta)
+{
+    if ((argCnt <= CP_MAX_ARG_CNT) && (argMask <= CP_MAX_ARG_MASK))
+    {
+        CallPattern pat;
+
+        pat.fld.argCnt    = argCnt;
+        pat.fld.regMask   = regMask; // EBP,EBX,ESI,EDI
+        pat.fld.argMask   = argMask;
+        pat.fld.codeDelta = codeDelta;
+
+        bool     codeDeltaOK = (pat.fld.codeDelta == codeDelta);
+        unsigned bestDelta2  = 0xff;
+        unsigned bestPattern = 0xff;
+        unsigned patval      = pat.val;
+        assert(sizeof(CallPattern) == sizeof(unsigned));
+
+        const unsigned* curp = &callPatternTable[0];
+        for (unsigned inx = 0; inx < 80; inx++, curp++)
+        {
+            unsigned curval = *curp;
+            if ((patval == curval) && codeDeltaOK)
+                return inx;
+
+            if (((patval ^ curval) & 0xffffff) == 0)
+            {
+                unsigned delta2 = codeDelta - (curval >> 24);
+                if (delta2 < bestDelta2)
+                {
+                    bestDelta2  = delta2;
+                    bestPattern = inx;
+                }
+            }
+        }
+
+        if (bestPattern != 0xff)
+        {
+            return (bestDelta2 << 8) | bestPattern;
+        }
+    }
+    return -1;
+}
+
+static bool initNeeded3(unsigned cur, unsigned tgt, unsigned max, unsigned* hint)
+{
+    assert(cur != tgt);
+
+    unsigned tmp = tgt;
+    unsigned nib = 0;
+    unsigned cnt = 0;
+
+    while (tmp > max)
+    {
+        nib = tmp & 0x07;
+        tmp >>= 3;
+        if (tmp == cur)
+        {
+            *hint = nib;
+            return false;
+        }
+        cnt++;
+    }
+
+    *hint = tmp;
+    return true;
+}
+
+static bool initNeeded4(unsigned cur, unsigned tgt, unsigned max, unsigned* hint)
+{
+    assert(cur != tgt);
+
+    unsigned tmp = tgt;
+    unsigned nib = 0;
+    unsigned cnt = 0;
+
+    while (tmp > max)
+    {
+        nib = tmp & 0x0f;
+        tmp >>= 4;
+        if (tmp == cur)
+        {
+            *hint = nib;
+            return false;
+        }
+        cnt++;
+    }
+
+    *hint = tmp;
+    return true;
+}
+
+static int bigEncoding3(unsigned cur, unsigned tgt, unsigned max)
+{
+    assert(cur != tgt);
+
+    unsigned tmp = tgt;
+    unsigned nib = 0;
+    unsigned cnt = 0;
+
+    while (tmp > max)
+    {
+        nib = tmp & 0x07;
+        tmp >>= 3;
+        if (tmp == cur)
+            break;
+        cnt++;
+    }
+    return cnt;
+}
+
+static int bigEncoding4(unsigned cur, unsigned tgt, unsigned max)
+{
+    assert(cur != tgt);
+
+    unsigned tmp = tgt;
+    unsigned nib = 0;
+    unsigned cnt = 0;
+
+    while (tmp > max)
+    {
+        nib = tmp & 0x0f;
+        tmp >>= 4;
+        if (tmp == cur)
+            break;
+        cnt++;
+    }
+    return cnt;
+}
+
+BYTE FASTCALL encodeHeaderNext(const InfoHdr& header, InfoHdr* state)
+{
+    BYTE encoding = 0xff;
+
+    if (state->argCount != header.argCount)
+    {
+        // We have one-byte encodings for 0..8
+        if (header.argCount <= SET_ARGCOUNT_MAX)
+        {
+            state->argCount = header.argCount;
+            encoding        = SET_ARGCOUNT + header.argCount;
+            goto DO_RETURN;
+        }
+        else
+        {
+            unsigned hint;
+            if (initNeeded4(state->argCount, header.argCount, SET_ARGCOUNT_MAX, &hint))
+            {
+                assert(hint <= SET_ARGCOUNT_MAX);
+                state->argCount = hint;
+                encoding        = SET_ARGCOUNT + hint;
+                goto DO_RETURN;
+            }
+            else
+            {
+                assert(hint <= 0xf);
+                state->argCount <<= 4;
+                state->argCount += hint;
+                encoding = NEXT_FOUR_ARGCOUNT + hint;
+                goto DO_RETURN;
+            }
+        }
+    }
+
+    if (state->frameSize != header.frameSize)
+    {
+        // We have one-byte encodings for 0..7
+        if (header.frameSize <= SET_FRAMESIZE_MAX)
+        {
+            state->frameSize = header.frameSize;
+            encoding         = SET_FRAMESIZE + header.frameSize;
+            goto DO_RETURN;
+        }
+        else
+        {
+            unsigned hint;
+            if (initNeeded4(state->frameSize, header.frameSize, SET_FRAMESIZE_MAX, &hint))
+            {
+                assert(hint <= SET_FRAMESIZE_MAX);
+                state->frameSize = hint;
+                encoding         = SET_FRAMESIZE + hint;
+                goto DO_RETURN;
+            }
+            else
+            {
+                assert(hint <= 0xf);
+                state->frameSize <<= 4;
+                state->frameSize += hint;
+                encoding = NEXT_FOUR_FRAMESIZE + hint;
+                goto DO_RETURN;
+            }
+        }
+    }
+
+    if ((state->epilogCount != header.epilogCount) || (state->epilogAtEnd != header.epilogAtEnd))
+    {
+        if (header.epilogCount > SET_EPILOGCNT_MAX)
+            IMPL_LIMITATION("More than SET_EPILOGCNT_MAX epilogs");
+
+        state->epilogCount = header.epilogCount;
+        state->epilogAtEnd = header.epilogAtEnd;
+        encoding           = SET_EPILOGCNT + header.epilogCount * 2;
+        if (header.epilogAtEnd)
+            encoding++;
+        goto DO_RETURN;
+    }
+
+    if (state->varPtrTableSize != header.varPtrTableSize)
+    {
+        assert(state->varPtrTableSize == 0 || state->varPtrTableSize == HAS_VARPTR);
+
+        if (state->varPtrTableSize == 0)
+        {
+            state->varPtrTableSize = HAS_VARPTR;
+            encoding               = FLIP_VAR_PTR_TABLE_SZ;
+            goto DO_RETURN;
+        }
+        else if (header.varPtrTableSize == 0)
+        {
+            state->varPtrTableSize = 0;
+            encoding               = FLIP_VAR_PTR_TABLE_SZ;
+            goto DO_RETURN;
+        }
+    }
+
+    if (state->untrackedCnt != header.untrackedCnt)
+    {
+        assert(state->untrackedCnt <= SET_UNTRACKED_MAX || state->untrackedCnt == HAS_UNTRACKED);
+
+        // We have one-byte encodings for 0..3
+        if (header.untrackedCnt <= SET_UNTRACKED_MAX)
+        {
+            state->untrackedCnt = header.untrackedCnt;
+            encoding            = SET_UNTRACKED + header.untrackedCnt;
+            goto DO_RETURN;
+        }
+        else if (state->untrackedCnt != HAS_UNTRACKED)
+        {
+            state->untrackedCnt = HAS_UNTRACKED;
+            encoding            = FFFF_UNTRACKED_CNT;
+            goto DO_RETURN;
+        }
+    }
+
+    if (state->epilogSize != header.epilogSize)
+    {
+        // We have one-byte encodings for 0..10
+        if (header.epilogSize <= SET_EPILOGSIZE_MAX)
+        {
+            state->epilogSize = header.epilogSize;
+            encoding          = SET_EPILOGSIZE + header.epilogSize;
+            goto DO_RETURN;
+        }
+        else
+        {
+            unsigned hint;
+            if (initNeeded3(state->epilogSize, header.epilogSize, SET_EPILOGSIZE_MAX, &hint))
+            {
+                assert(hint <= SET_EPILOGSIZE_MAX);
+                state->epilogSize = hint;
+                encoding          = SET_EPILOGSIZE + hint;
+                goto DO_RETURN;
+            }
+            else
+            {
+                assert(hint <= 0x7);
+                state->epilogSize <<= 3;
+                state->epilogSize += hint;
+                encoding = NEXT_THREE_EPILOGSIZE + hint;
+                goto DO_RETURN;
+            }
+        }
+    }
+
+    if (state->prologSize != header.prologSize)
+    {
+        // We have one-byte encodings for 0..16
+        if (header.prologSize <= SET_PROLOGSIZE_MAX)
+        {
+            state->prologSize = header.prologSize;
+            encoding          = SET_PROLOGSIZE + header.prologSize;
+            goto DO_RETURN;
+        }
+        else
+        {
+            unsigned hint;
+            assert(SET_PROLOGSIZE_MAX > 15);
+            if (initNeeded3(state->prologSize, header.prologSize, 15, &hint))
+            {
+                assert(hint <= 15);
+                state->prologSize = hint;
+                encoding          = SET_PROLOGSIZE + hint;
+                goto DO_RETURN;
+            }
+            else
+            {
+                assert(hint <= 0x7);
+                state->prologSize <<= 3;
+                state->prologSize += hint;
+                encoding = NEXT_THREE_PROLOGSIZE + hint;
+                goto DO_RETURN;
+            }
+        }
+    }
+
+    if (state->ediSaved != header.ediSaved)
+    {
+        state->ediSaved = header.ediSaved;
+        encoding        = FLIP_EDI_SAVED;
+        goto DO_RETURN;
+    }
+
+    if (state->esiSaved != header.esiSaved)
+    {
+        state->esiSaved = header.esiSaved;
+        encoding        = FLIP_ESI_SAVED;
+        goto DO_RETURN;
+    }
+
+    if (state->ebxSaved != header.ebxSaved)
+    {
+        state->ebxSaved = header.ebxSaved;
+        encoding        = FLIP_EBX_SAVED;
+        goto DO_RETURN;
+    }
+
+    if (state->ebpSaved != header.ebpSaved)
+    {
+        state->ebpSaved = header.ebpSaved;
+        encoding        = FLIP_EBP_SAVED;
+        goto DO_RETURN;
+    }
+
+    if (state->ebpFrame != header.ebpFrame)
+    {
+        state->ebpFrame = header.ebpFrame;
+        encoding        = FLIP_EBP_FRAME;
+        goto DO_RETURN;
+    }
+
+    if (state->interruptible != header.interruptible)
+    {
+        state->interruptible = header.interruptible;
+        encoding             = FLIP_INTERRUPTIBLE;
+        goto DO_RETURN;
+    }
+
+#if DOUBLE_ALIGN
+    if (state->doubleAlign != header.doubleAlign)
+    {
+        state->doubleAlign = header.doubleAlign;
+        encoding           = FLIP_DOUBLE_ALIGN;
+        goto DO_RETURN;
+    }
+#endif
+
+    if (state->security != header.security)
+    {
+        state->security = header.security;
+        encoding        = FLIP_SECURITY;
+        goto DO_RETURN;
+    }
+
+    if (state->handlers != header.handlers)
+    {
+        state->handlers = header.handlers;
+        encoding        = FLIP_HANDLERS;
+        goto DO_RETURN;
+    }
+
+    if (state->localloc != header.localloc)
+    {
+        state->localloc = header.localloc;
+        encoding        = FLIP_LOCALLOC;
+        goto DO_RETURN;
+    }
+
+    if (state->editNcontinue != header.editNcontinue)
+    {
+        state->editNcontinue = header.editNcontinue;
+        encoding             = FLIP_EDITnCONTINUE;
+        goto DO_RETURN;
+    }
+
+    if (state->varargs != header.varargs)
+    {
+        state->varargs = header.varargs;
+        encoding       = FLIP_VARARGS;
+        goto DO_RETURN;
+    }
+
+    if (state->profCallbacks != header.profCallbacks)
+    {
+        state->profCallbacks = header.profCallbacks;
+        encoding             = FLIP_PROF_CALLBACKS;
+        goto DO_RETURN;
+    }
+
+    if (state->genericsContext != header.genericsContext)
+    {
+        state->genericsContext = header.genericsContext;
+        encoding               = FLIP_HAS_GENERICS_CONTEXT;
+        goto DO_RETURN;
+    }
+
+    if (state->genericsContextIsMethodDesc != header.genericsContextIsMethodDesc)
+    {
+        state->genericsContextIsMethodDesc = header.genericsContextIsMethodDesc;
+        encoding                           = FLIP_GENERICS_CONTEXT_IS_METHODDESC;
+        goto DO_RETURN;
+    }
+
+    if (state->gsCookieOffset != header.gsCookieOffset)
+    {
+        assert(state->gsCookieOffset == INVALID_GS_COOKIE_OFFSET || state->gsCookieOffset == HAS_GS_COOKIE_OFFSET);
+
+        if (state->gsCookieOffset == INVALID_GS_COOKIE_OFFSET)
+        {
+            // header.gsCookieOffset is non-zero. We can set it
+            // to zero using FLIP_HAS_GS_COOKIE
+            state->gsCookieOffset = HAS_GS_COOKIE_OFFSET;
+            encoding              = FLIP_HAS_GS_COOKIE;
+            goto DO_RETURN;
+        }
+        else if (header.gsCookieOffset == INVALID_GS_COOKIE_OFFSET)
+        {
+            state->gsCookieOffset = INVALID_GS_COOKIE_OFFSET;
+            encoding              = FLIP_HAS_GS_COOKIE;
+            goto DO_RETURN;
+        }
+    }
+
+    if (state->syncStartOffset != header.syncStartOffset)
+    {
+        assert(state->syncStartOffset == INVALID_SYNC_OFFSET || state->syncStartOffset == HAS_SYNC_OFFSET);
+
+        if (state->syncStartOffset == INVALID_SYNC_OFFSET)
+        {
+            // header.syncStartOffset is non-zero. We can set it
+            // to zero using FLIP_SYNC
+            state->syncStartOffset = HAS_SYNC_OFFSET;
+            encoding               = FLIP_SYNC;
+            goto DO_RETURN;
+        }
+        else if (header.syncStartOffset == INVALID_SYNC_OFFSET)
+        {
+            state->syncStartOffset = INVALID_SYNC_OFFSET;
+            encoding               = FLIP_SYNC;
+            goto DO_RETURN;
+        }
+    }
+
+DO_RETURN:
+    assert(encoding < 0x80);
+    if (!state->isHeaderMatch(header))
+        encoding |= 0x80;
+    return encoding;
+}
+
+static int measureDistance(const InfoHdr& header, const InfoHdrSmall* p, int closeness)
+{
+    int distance = 0;
+
+    if (p->untrackedCnt != header.untrackedCnt)
+    {
+        if (header.untrackedCnt > 3)
+        {
+            if (p->untrackedCnt != HAS_UNTRACKED)
+                distance += 1;
+        }
+        else
+        {
+            distance += 1;
+        }
+        if (distance >= closeness)
+            return distance;
+    }
+
+    if (p->varPtrTableSize != header.varPtrTableSize)
+    {
+        if (header.varPtrTableSize != 0)
+        {
+            if (p->varPtrTableSize != HAS_VARPTR)
+                distance += 1;
+        }
+        else
+        {
+            assert(p->varPtrTableSize == HAS_VARPTR);
+            distance += 1;
+        }
+        if (distance >= closeness)
+            return distance;
+    }
+
+    if (p->frameSize != header.frameSize)
+    {
+        distance += 1;
+        if (distance >= closeness)
+            return distance;
+
+        // We have one-byte encodings for 0..7
+        if (header.frameSize > SET_FRAMESIZE_MAX)
+        {
+            distance += bigEncoding4(p->frameSize, header.frameSize, SET_FRAMESIZE_MAX);
+            if (distance >= closeness)
+                return distance;
+        }
+    }
+
+    if (p->argCount != header.argCount)
+    {
+        distance += 1;
+        if (distance >= closeness)
+            return distance;
+
+        // We have one-byte encodings for 0..8
+        if (header.argCount > SET_ARGCOUNT_MAX)
+        {
+            distance += bigEncoding4(p->argCount, header.argCount, SET_ARGCOUNT_MAX);
+            if (distance >= closeness)
+                return distance;
+        }
+    }
+
+    if (p->prologSize != header.prologSize)
+    {
+        distance += 1;
+        if (distance >= closeness)
+            return distance;
+
+        // We have one-byte encodings for 0..16
+        if (header.prologSize > SET_PROLOGSIZE_MAX)
+        {
+            assert(SET_PROLOGSIZE_MAX > 15);
+            distance += bigEncoding3(p->prologSize, header.prologSize, 15);
+            if (distance >= closeness)
+                return distance;
+        }
+    }
+
+    if (p->epilogSize != header.epilogSize)
+    {
+        distance += 1;
+        if (distance >= closeness)
+            return distance;
+        // We have one-byte encodings for 0..10
+        if (header.epilogSize > SET_EPILOGSIZE_MAX)
+        {
+            distance += bigEncoding3(p->epilogSize, header.epilogSize, SET_EPILOGSIZE_MAX);
+            if (distance >= closeness)
+                return distance;
+        }
+    }
+
+    if ((p->epilogCount != header.epilogCount) || (p->epilogAtEnd != header.epilogAtEnd))
+    {
+        distance += 1;
+        if (distance >= closeness)
+            return distance;
+
+        if (header.epilogCount > SET_EPILOGCNT_MAX)
+            IMPL_LIMITATION("More than SET_EPILOGCNT_MAX epilogs");
+    }
+
+    if (p->ediSaved != header.ediSaved)
+    {
+        distance += 1;
+        if (distance >= closeness)
+            return distance;
+    }
+
+    if (p->esiSaved != header.esiSaved)
+    {
+        distance += 1;
+        if (distance >= closeness)
+            return distance;
+    }
+
+    if (p->ebxSaved != header.ebxSaved)
+    {
+        distance += 1;
+        if (distance >= closeness)
+            return distance;
+    }
+
+    if (p->ebpSaved != header.ebpSaved)
+    {
+        distance += 1;
+        if (distance >= closeness)
+            return distance;
+    }
+
+    if (p->ebpFrame != header.ebpFrame)
+    {
+        distance += 1;
+        if (distance >= closeness)
+            return distance;
+    }
+
+    if (p->interruptible != header.interruptible)
+    {
+        distance += 1;
+        if (distance >= closeness)
+            return distance;
+    }
+
+#if DOUBLE_ALIGN
+    if (p->doubleAlign != header.doubleAlign)
+    {
+        distance += 1;
+        if (distance >= closeness)
+            return distance;
+    }
+#endif
+
+    if (p->security != header.security)
+    {
+        distance += 1;
+        if (distance >= closeness)
+            return distance;
+    }
+
+    if (p->handlers != header.handlers)
+    {
+        distance += 1;
+        if (distance >= closeness)
+            return distance;
+    }
+
+    if (p->localloc != header.localloc)
+    {
+        distance += 1;
+        if (distance >= closeness)
+            return distance;
+    }
+
+    if (p->editNcontinue != header.editNcontinue)
+    {
+        distance += 1;
+        if (distance >= closeness)
+            return distance;
+    }
+
+    if (p->varargs != header.varargs)
+    {
+        distance += 1;
+        if (distance >= closeness)
+            return distance;
+    }
+
+    if (p->profCallbacks != header.profCallbacks)
+    {
+        distance += 1;
+        if (distance >= closeness)
+            return distance;
+    }
+
+    if (p->genericsContext != header.genericsContext)
+    {
+        distance += 1;
+        if (distance >= closeness)
+            return distance;
+    }
+
+    if (p->genericsContextIsMethodDesc != header.genericsContextIsMethodDesc)
+    {
+        distance += 1;
+        if (distance >= closeness)
+            return distance;
+    }
+
+    if (header.gsCookieOffset != INVALID_GS_COOKIE_OFFSET)
+    {
+        distance += 1;
+        if (distance >= closeness)
+            return distance;
+    }
+
+    if (header.syncStartOffset != INVALID_SYNC_OFFSET)
+    {
+        distance += 1;
+        if (distance >= closeness)
+            return distance;
+    }
+
+    return distance;
+}
+
+// DllMain calls gcInitEncoderLookupTable to fill in this table
+/* extern */ int infoHdrLookup[IH_MAX_PROLOG_SIZE + 2];
+
+/* static */ void GCInfo::gcInitEncoderLookupTable()
+{
+    const InfoHdrSmall* p  = &infoHdrShortcut[0];
+    int                 lo = -1;
+    int                 hi = 0;
+    int                 n;
+
+    for (n = 0; n < 128; n++, p++)
+    {
+        if (p->prologSize != lo)
+        {
+            if (p->prologSize < lo)
+            {
+                assert(p->prologSize == 0);
+                hi = IH_MAX_PROLOG_SIZE;
+            }
+            else
+                hi = p->prologSize;
+
+            assert(hi <= IH_MAX_PROLOG_SIZE);
+
+            while (lo < hi)
+                infoHdrLookup[++lo] = n;
+
+            if (lo == IH_MAX_PROLOG_SIZE)
+                break;
+        }
+    }
+
+    assert(lo == IH_MAX_PROLOG_SIZE);
+    assert(infoHdrLookup[IH_MAX_PROLOG_SIZE] < 128);
+
+    while (p->prologSize == lo)
+    {
+        n++;
+        if (n >= 128)
+            break;
+        p++;
+    }
+
+    infoHdrLookup[++lo] = n;
+
+#ifdef DEBUG
+    //
+    // We do some other DEBUG only validity checks here
+    //
+    assert(callCommonDelta[0] < callCommonDelta[1]);
+    assert(callCommonDelta[1] < callCommonDelta[2]);
+    assert(callCommonDelta[2] < callCommonDelta[3]);
+    assert(sizeof(CallPattern) == sizeof(unsigned));
+    unsigned maxMarks = 0;
+    for (unsigned inx = 0; inx < 80; inx++)
+    {
+        CallPattern pat;
+        pat.val = callPatternTable[inx];
+
+        assert(pat.fld.codeDelta <= CP_MAX_CODE_DELTA);
+        if (pat.fld.codeDelta == CP_MAX_CODE_DELTA)
+            maxMarks |= 0x01;
+
+        assert(pat.fld.argCnt <= CP_MAX_ARG_CNT);
+        if (pat.fld.argCnt == CP_MAX_ARG_CNT)
+            maxMarks |= 0x02;
+
+        assert(pat.fld.argMask <= CP_MAX_ARG_MASK);
+        if (pat.fld.argMask == CP_MAX_ARG_MASK)
+            maxMarks |= 0x04;
+    }
+    assert(maxMarks == 0x07);
+#endif
+}
+
+const int NO_CACHED_HEADER = -1;
+
+BYTE FASTCALL encodeHeaderFirst(const InfoHdr& header, InfoHdr* state, int* more, int* pCached)
+{
+    // First try the cached value for an exact match, if there is one
+    //
+    int                 n = *pCached;
+    const InfoHdrSmall* p;
+
+    if (n != NO_CACHED_HEADER)
+    {
+        p = &infoHdrShortcut[n];
+        if (p->isHeaderMatch(header))
+        {
+            // exact match found
+            GetInfoHdr(n, state);
+            *more = 0;
+            return n;
+        }
+    }
+
+    // Next search the table for an exact match
+    // Only search entries that have a matching prolog size
+    // Note: lo and hi are saved here as they specify the
+    // range of entries that have the correct prolog size
+    //
+    unsigned psz = header.prologSize;
+    int      lo  = 0;
+    int      hi  = 0;
+
+    if (psz <= IH_MAX_PROLOG_SIZE)
+    {
+        lo = infoHdrLookup[psz];
+        hi = infoHdrLookup[psz + 1];
+        p  = &infoHdrShortcut[lo];
+        for (n = lo; n < hi; n++, p++)
+        {
+            assert(psz == p->prologSize);
+            if (p->isHeaderMatch(header))
+            {
+                // exact match found
+                GetInfoHdr(n, state);
+                *pCached = n; // cache the value
+                *more    = 0;
+                return n;
+            }
+        }
+    }
+
+    //
+    // no exact match in infoHdrShortcut[]
+    //
+    // find the nearest entry in the table
+    //
+    int nearest   = -1;
+    int closeness = 255; // (i.e. not very close)
+
+    //
+    // Calculate the minimum acceptable distance
+    // if we find an entry that is at least this close
+    // we will stop the search and use that value
+    //
+    int min_acceptable_distance = 1;
+
+    if (header.frameSize > SET_FRAMESIZE_MAX)
+    {
+        ++min_acceptable_distance;
+        if (header.frameSize > 32)
+            ++min_acceptable_distance;
+    }
+    if (header.argCount > SET_ARGCOUNT_MAX)
+    {
+        ++min_acceptable_distance;
+        if (header.argCount > 32)
+            ++min_acceptable_distance;
+    }
+
+    // First try the cached value
+    // and see if it meets the minimum acceptable distance
+    //
+    if (*pCached != NO_CACHED_HEADER)
+    {
+        p            = &infoHdrShortcut[*pCached];
+        int distance = measureDistance(header, p, closeness);
+        assert(distance > 0);
+        if (distance <= min_acceptable_distance)
+        {
+            GetInfoHdr(*pCached, state);
+            *more = distance;
+            return 0x80 | *pCached;
+        }
+        else
+        {
+            closeness = distance;
+            nearest   = *pCached;
+        }
+    }
+
+    // Then try the ones pointed to by [lo..hi),
+    // (i.e. the ones that have the correct prolog size)
+    //
+    p = &infoHdrShortcut[lo];
+    for (n = lo; n < hi; n++, p++)
+    {
+        if (n == *pCached)
+            continue; // already tried this one
+        int distance = measureDistance(header, p, closeness);
+        assert(distance > 0);
+        if (distance <= min_acceptable_distance)
+        {
+            GetInfoHdr(n, state);
+            *pCached = n; // Cache this value
+            *more    = distance;
+            return 0x80 | n;
+        }
+        else if (distance < closeness)
+        {
+            closeness = distance;
+            nearest   = n;
+        }
+    }
+
+    int last = infoHdrLookup[IH_MAX_PROLOG_SIZE + 1];
+    assert(last <= 128);
+
+    // Then try all the rest [0..last-1]
+    p = &infoHdrShortcut[0];
+    for (n = 0; n < last; n++, p++)
+    {
+        if (n == *pCached)
+            continue; // already tried this one
+        if ((n >= lo) && (n < hi))
+            continue; // already tried these
+        int distance = measureDistance(header, p, closeness);
+        assert(distance > 0);
+        if (distance <= min_acceptable_distance)
+        {
+            GetInfoHdr(n, state);
+            *pCached = n; // Cache this value
+            *more    = distance;
+            return 0x80 | n;
+        }
+        else if (distance < closeness)
+        {
+            closeness = distance;
+            nearest   = n;
+        }
+    }
+
+    //
+    // If we reach here then there was no adjacent neighbor
+    //  in infoHdrShortcut[], closeness indicate how many extra
+    //  bytes we will need to encode this item.
+    //
+    assert((nearest >= 0) && (nearest <= 127));
+    GetInfoHdr(nearest, state);
+    *pCached = nearest; // Cache this value
+    *more    = closeness;
+    return 0x80 | nearest;
+}
+
+/*****************************************************************************
+ *
+ *  Write the initial part of the method info block. This is called twice;
+ *  first to compute the size needed for the info (mask=0), the second time
+ *  to actually generate the contents of the table (mask=-1,dest!=NULL).
+ */
+
+size_t GCInfo::gcInfoBlockHdrSave(
+    BYTE* dest, int mask, unsigned methodSize, unsigned prologSize, unsigned epilogSize, InfoHdr* header, int* pCached)
+{
+#ifdef DEBUG
+    if (compiler->verbose)
+        printf("*************** In gcInfoBlockHdrSave()\n");
+#endif
+    size_t size = 0;
+
+#if VERIFY_GC_TABLES
+    *castto(dest, unsigned short*)++ = 0xFEEF;
+    size += sizeof(short);
+#endif
+
+    /* Write the method size first (using between 1 and 5 bytes) */
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+    if (compiler->verbose)
+    {
+        if (mask)
+            printf("GCINFO: methodSize = %04X\n", methodSize);
+        if (mask)
+            printf("GCINFO: prologSize = %04X\n", prologSize);
+        if (mask)
+            printf("GCINFO: epilogSize = %04X\n", epilogSize);
+    }
+#endif
+
+    size_t methSz = encodeUnsigned(dest, methodSize);
+    size += methSz;
+    dest += methSz & mask;
+
+    //
+    // New style InfoBlk Header
+    //
+    // Typically only uses one-byte to store everything.
+    //
+
+    if (mask == 0)
+    {
+        memset(header, 0, sizeof(InfoHdr));
+        *pCached = NO_CACHED_HEADER;
+    }
+
+    assert(FitsIn<unsigned char>(prologSize));
+    header->prologSize = static_cast<unsigned char>(prologSize);
+    assert(FitsIn<unsigned char>(epilogSize));
+    header->epilogSize  = static_cast<unsigned char>(epilogSize);
+    header->epilogCount = compiler->getEmitter()->emitGetEpilogCnt();
+    if (header->epilogCount != compiler->getEmitter()->emitGetEpilogCnt())
+        IMPL_LIMITATION("emitGetEpilogCnt() does not fit in InfoHdr::epilogCount");
+    header->epilogAtEnd = compiler->getEmitter()->emitHasEpilogEnd();
+
+    if (compiler->codeGen->regSet.rsRegsModified(RBM_EDI))
+        header->ediSaved = 1;
+    if (compiler->codeGen->regSet.rsRegsModified(RBM_ESI))
+        header->esiSaved = 1;
+    if (compiler->codeGen->regSet.rsRegsModified(RBM_EBX))
+        header->ebxSaved = 1;
+
+    header->interruptible = compiler->codeGen->genInterruptible;
+
+    if (!compiler->isFramePointerUsed())
+    {
+#if DOUBLE_ALIGN
+        if (compiler->genDoubleAlign())
+        {
+            header->ebpSaved = true;
+            assert(!compiler->codeGen->regSet.rsRegsModified(RBM_EBP));
+        }
+#endif
+        if (compiler->codeGen->regSet.rsRegsModified(RBM_EBP))
+        {
+            header->ebpSaved = true;
+        }
+    }
+    else
+    {
+        header->ebpSaved = true;
+        header->ebpFrame = true;
+    }
+
+#if DOUBLE_ALIGN
+    header->doubleAlign = compiler->genDoubleAlign();
+#endif
+
+    header->security = compiler->opts.compNeedSecurityCheck;
+
+    header->handlers = compiler->ehHasCallableHandlers();
+    header->localloc = compiler->compLocallocUsed;
+
+    header->varargs         = compiler->info.compIsVarArgs;
+    header->profCallbacks   = compiler->info.compProfilerCallback;
+    header->editNcontinue   = compiler->opts.compDbgEnC;
+    header->genericsContext = compiler->lvaReportParamTypeArg();
+    header->genericsContextIsMethodDesc =
+        header->genericsContext && (compiler->info.compMethodInfo->options & (CORINFO_GENERICS_CTXT_FROM_METHODDESC));
+    header->gsCookieOffset = INVALID_GS_COOKIE_OFFSET;
+    if (compiler->getNeedsGSSecurityCookie())
+    {
+        assert(compiler->lvaGSSecurityCookie != BAD_VAR_NUM);
+        int stkOffs            = compiler->lvaTable[compiler->lvaGSSecurityCookie].lvStkOffs;
+        header->gsCookieOffset = compiler->isFramePointerUsed() ? -stkOffs : stkOffs;
+        assert(header->gsCookieOffset != INVALID_GS_COOKIE_OFFSET);
+    }
+
+    header->syncStartOffset = INVALID_SYNC_OFFSET;
+    header->syncEndOffset   = INVALID_SYNC_OFFSET;
+    if (compiler->info.compFlags & CORINFO_FLG_SYNCH)
+    {
+        assert(compiler->syncStartEmitCookie != NULL);
+        header->syncStartOffset = compiler->getEmitter()->emitCodeOffset(compiler->syncStartEmitCookie, 0);
+        assert(header->syncStartOffset != INVALID_SYNC_OFFSET);
+
+        assert(compiler->syncEndEmitCookie != NULL);
+        header->syncEndOffset = compiler->getEmitter()->emitCodeOffset(compiler->syncEndEmitCookie, 0);
+        assert(header->syncEndOffset != INVALID_SYNC_OFFSET);
+
+        assert(header->syncStartOffset < header->syncEndOffset);
+        // synchronized methods can't have more than 1 epilog
+        assert(header->epilogCount <= 1);
+    }
+
+    assert((compiler->compArgSize & 0x3) == 0);
+
+    size_t argCount =
+        (compiler->compArgSize - (compiler->codeGen->intRegState.rsCalleeRegArgCount * sizeof(void*))) / sizeof(void*);
+    assert(argCount <= MAX_USHORT_SIZE_T);
+    header->argCount = static_cast<unsigned short>(argCount);
+
+    header->frameSize = compiler->compLclFrameSize / sizeof(int);
+    if (header->frameSize != (compiler->compLclFrameSize / sizeof(int)))
+        IMPL_LIMITATION("compLclFrameSize does not fit in InfoHdr::frameSize");
+
+    if (mask == 0)
+    {
+        gcCountForHeader((UNALIGNED unsigned int*)&header->untrackedCnt,
+                         (UNALIGNED unsigned int*)&header->varPtrTableSize);
+    }
+
+    //
+    // If the high-order bit of headerEncoding is set
+    // then additional bytes will update the InfoHdr state
+    // until the fully state is encoded
+    //
+    InfoHdr state;
+    int     more           = 0;
+    BYTE    headerEncoding = encodeHeaderFirst(*header, &state, &more, pCached);
+    ++size;
+    if (mask)
+    {
+#if REGEN_SHORTCUTS
+        regenLog(headerEncoding, header, &state);
+#endif
+        *dest++ = headerEncoding;
+
+        BYTE encoding = headerEncoding;
+        while (encoding & 0x80)
+        {
+            encoding = encodeHeaderNext(*header, &state);
+#if REGEN_SHORTCUTS
+            regenLog(headerEncoding, header, &state);
+#endif
+            *dest++ = encoding;
+            ++size;
+        }
+    }
+    else
+    {
+        size += more;
+    }
+
+    if (header->untrackedCnt > SET_UNTRACKED_MAX)
+    {
+        unsigned count = header->untrackedCnt;
+        unsigned sz    = encodeUnsigned(mask ? dest : NULL, count);
+        size += sz;
+        dest += (sz & mask);
+    }
+
+    if (header->varPtrTableSize != 0)
+    {
+        unsigned count = header->varPtrTableSize;
+        unsigned sz    = encodeUnsigned(mask ? dest : NULL, count);
+        size += sz;
+        dest += (sz & mask);
+    }
+
+    if (header->gsCookieOffset != INVALID_GS_COOKIE_OFFSET)
+    {
+        assert(mask == 0 || state.gsCookieOffset == HAS_GS_COOKIE_OFFSET);
+        unsigned offset = header->gsCookieOffset;
+        unsigned sz     = encodeUnsigned(mask ? dest : NULL, offset);
+        size += sz;
+        dest += (sz & mask);
+    }
+
+    if (header->syncStartOffset != INVALID_SYNC_OFFSET)
+    {
+        assert(mask == 0 || state.syncStartOffset == HAS_SYNC_OFFSET);
+
+        {
+            unsigned offset = header->syncStartOffset;
+            unsigned sz     = encodeUnsigned(mask ? dest : NULL, offset);
+            size += sz;
+            dest += (sz & mask);
+        }
+
+        {
+            unsigned offset = header->syncEndOffset;
+            unsigned sz     = encodeUnsigned(mask ? dest : NULL, offset);
+            size += sz;
+            dest += (sz & mask);
+        }
+    }
+
+    if (header->epilogCount)
+    {
+        /* Generate table unless one epilog at the end of the method */
+
+        if (header->epilogAtEnd == 0 || header->epilogCount != 1)
+        {
+#if VERIFY_GC_TABLES
+            *castto(dest, unsigned short*)++ = 0xFACE;
+            size += sizeof(short);
+#endif
+
+            /* Simply write a sorted array of offsets using encodeUDelta */
+
+            gcEpilogTable      = mask ? dest : NULL;
+            gcEpilogPrevOffset = 0;
+
+            size_t sz = compiler->getEmitter()->emitGenEpilogLst(gcRecordEpilog, this);
+
+            /* Add the size of the epilog table to the total size */
+
+            size += sz;
+            dest += (sz & mask);
+        }
+    }
+
+#if DISPLAY_SIZES
+
+    if (mask)
+    {
+        if (compiler->codeGen->genInterruptible)
+        {
+            genMethodICnt++;
+        }
+        else
+        {
+            genMethodNCnt++;
+        }
+    }
+
+#endif // DISPLAY_SIZES
+
+    return size;
+}
+
+/*****************************************************************************
+ *
+ *  Return the size of the pointer tracking tables.
+ */
+
+size_t GCInfo::gcPtrTableSize(const InfoHdr& header, unsigned codeSize, size_t* pArgTabOffset)
+{
+    BYTE temp[16 + 1];
+#ifdef DEBUG
+    temp[16] = 0xAB; // Set some marker
+#endif
+
+    /* Compute the total size of the tables */
+
+    size_t size = gcMakeRegPtrTable(temp, 0, header, codeSize, pArgTabOffset);
+
+    assert(temp[16] == 0xAB); // Check that marker didnt get overwritten
+
+    return size;
+}
+
+/*****************************************************************************
+ * Encode the callee-saved registers into 3 bits.
+ */
+
+unsigned gceEncodeCalleeSavedRegs(unsigned regs)
+{
+    unsigned encodedRegs = 0;
+
+    if (regs & RBM_EBX)
+        encodedRegs |= 0x04;
+    if (regs & RBM_ESI)
+        encodedRegs |= 0x02;
+    if (regs & RBM_EDI)
+        encodedRegs |= 0x01;
+
+    return encodedRegs;
+}
+
+/*****************************************************************************
+ * Is the next entry for a byref pointer. If so, emit the prefix for the
+ * interruptible encoding. Check only for pushes and registers
+ */
+
+inline BYTE* gceByrefPrefixI(GCInfo::regPtrDsc* rpd, BYTE* dest)
+{
+    // For registers, we don't need a prefix if it is going dead.
+    assert(rpd->rpdArg || rpd->rpdCompiler.rpdDel == 0);
+
+    if (!rpd->rpdArg || rpd->rpdArgType == GCInfo::rpdARG_PUSH)
+        if (rpd->rpdGCtypeGet() == GCT_BYREF)
+            *dest++ = 0xBF;
+
+    return dest;
+}
+
+/*****************************************************************************/
+
+/* These functions are needed to work around a VC5.0 compiler bug */
+/* DO NOT REMOVE, unless you are sure that the free build works   */
+static int zeroFN()
+{
+    return 0;
+}
+static int (*zeroFunc)() = zeroFN;
+
+/*****************************************************************************
+ *  Modelling of the GC ptrs pushed on the stack
+ */
+
+typedef unsigned pasMaskType;
+#define BITS_IN_pasMask (BITS_IN_BYTE * sizeof(pasMaskType))
+#define HIGHEST_pasMask_BIT (((pasMaskType)0x1) << (BITS_IN_pasMask - 1))
+
+//-----------------------------------------------------------------------------
+
+class PendingArgsStack
+{
+public:
+    PendingArgsStack(unsigned maxDepth, Compiler* pComp);
+
+    void pasPush(GCtype gcType);
+    void pasPop(unsigned count);
+    void pasKill(unsigned gcCount);
+
+    unsigned pasCurDepth()
+    {
+        return pasDepth;
+    }
+    pasMaskType pasArgMask()
+    {
+        assert(pasDepth <= BITS_IN_pasMask);
+        return pasBottomMask;
+    }
+    pasMaskType pasByrefArgMask()
+    {
+        assert(pasDepth <= BITS_IN_pasMask);
+        return pasByrefBottomMask;
+    }
+    bool pasHasGCptrs();
+
+    // Use these in the case where there actually are more ptrs than pasArgMask
+    unsigned pasEnumGCoffsCount();
+#define pasENUM_START ((unsigned)-1)
+#define pasENUM_LAST ((unsigned)-2)
+#define pasENUM_END ((unsigned)-3)
+    unsigned pasEnumGCoffs(unsigned iter, unsigned* offs);
+
+protected:
+    unsigned pasMaxDepth;
+
+    unsigned pasDepth;
+
+    pasMaskType pasBottomMask;      // The first 32 args
+    pasMaskType pasByrefBottomMask; // byref qualifier for pasBottomMask
+
+    BYTE*    pasTopArray;       // More than 32 args are represented here
+    unsigned pasPtrsInTopArray; // How many GCptrs here
+};
+
+//-----------------------------------------------------------------------------
+
+PendingArgsStack::PendingArgsStack(unsigned maxDepth, Compiler* pComp)
+    : pasMaxDepth(maxDepth)
+    , pasDepth(0)
+    , pasBottomMask(0)
+    , pasByrefBottomMask(0)
+    , pasTopArray(NULL)
+    , pasPtrsInTopArray(0)
+{
+    /* Do we need an array as well as the mask ? */
+
+    if (pasMaxDepth > BITS_IN_pasMask)
+        pasTopArray = (BYTE*)pComp->compGetMemA(pasMaxDepth - BITS_IN_pasMask);
+}
+
+//-----------------------------------------------------------------------------
+
+void PendingArgsStack::pasPush(GCtype gcType)
+{
+    assert(pasDepth < pasMaxDepth);
+
+    if (pasDepth < BITS_IN_pasMask)
+    {
+        /* Shift the mask */
+
+        pasBottomMask <<= 1;
+        pasByrefBottomMask <<= 1;
+
+        if (needsGC(gcType))
+        {
+            pasBottomMask |= 1;
+
+            if (gcType == GCT_BYREF)
+                pasByrefBottomMask |= 1;
+        }
+    }
+    else
+    {
+        /* Push on array */
+
+        pasTopArray[pasDepth - BITS_IN_pasMask] = (BYTE)gcType;
+
+        if (gcType)
+            pasPtrsInTopArray++;
+    }
+
+    pasDepth++;
+}
+
+//-----------------------------------------------------------------------------
+
+void PendingArgsStack::pasPop(unsigned count)
+{
+    assert(pasDepth >= count);
+
+    /* First pop from array (if applicable) */
+
+    for (/**/; (pasDepth > BITS_IN_pasMask) && count; pasDepth--, count--)
+    {
+        unsigned topIndex = pasDepth - BITS_IN_pasMask - 1;
+
+        GCtype topArg = (GCtype)pasTopArray[topIndex];
+
+        if (needsGC(topArg))
+            pasPtrsInTopArray--;
+    }
+    if (count == 0)
+        return;
+
+    /* Now un-shift the mask */
+
+    assert(pasPtrsInTopArray == 0);
+    assert(count <= BITS_IN_pasMask);
+
+    if (count == BITS_IN_pasMask) // (x>>32) is a nop on x86. So special-case it
+    {
+        pasBottomMask = pasByrefBottomMask = 0;
+        pasDepth                           = 0;
+    }
+    else
+    {
+        pasBottomMask >>= count;
+        pasByrefBottomMask >>= count;
+        pasDepth -= count;
+    }
+}
+
+//-----------------------------------------------------------------------------
+// Kill (but don't pop) the top 'gcCount' args
+
+void PendingArgsStack::pasKill(unsigned gcCount)
+{
+    assert(gcCount != 0);
+
+    /* First kill args in array (if any) */
+
+    for (unsigned curPos = pasDepth; (curPos > BITS_IN_pasMask) && gcCount; curPos--)
+    {
+        unsigned curIndex = curPos - BITS_IN_pasMask - 1;
+
+        GCtype curArg = (GCtype)pasTopArray[curIndex];
+
+        if (needsGC(curArg))
+        {
+            pasTopArray[curIndex] = GCT_NONE;
+            pasPtrsInTopArray--;
+            gcCount--;
+        }
+    }
+
+    /* Now kill bits from the mask */
+
+    assert(pasPtrsInTopArray == 0);
+    assert(gcCount <= BITS_IN_pasMask);
+
+    for (unsigned bitPos = 1; gcCount; bitPos <<= 1)
+    {
+        assert(pasBottomMask != 0);
+
+        if (pasBottomMask & bitPos)
+        {
+            pasBottomMask &= ~bitPos;
+            pasByrefBottomMask &= ~bitPos;
+            --gcCount;
+        }
+        else
+        {
+            assert(bitPos != HIGHEST_pasMask_BIT);
+        }
+    }
+}
+
+//-----------------------------------------------------------------------------
+// Used for the case where there are more than BITS_IN_pasMask args on stack,
+// but none are any pointers. May avoid reporting anything to GCinfo
+
+bool PendingArgsStack::pasHasGCptrs()
+{
+    if (pasDepth <= BITS_IN_pasMask)
+        return pasBottomMask != 0;
+    else
+        return pasBottomMask != 0 || pasPtrsInTopArray != 0;
+}
+
+//-----------------------------------------------------------------------------
+//  Iterates over mask and array to return total count.
+//  Use only when you are going to emit a table of the offsets
+
+unsigned PendingArgsStack::pasEnumGCoffsCount()
+{
+    /* Should only be used in the worst case, when just the mask can't be used */
+
+    assert(pasDepth > BITS_IN_pasMask && pasHasGCptrs());
+
+    /* Count number of set bits in mask */
+
+    unsigned count = 0;
+
+    for (pasMaskType mask = 0x1, i = 0; i < BITS_IN_pasMask; mask <<= 1, i++)
+    {
+        if (mask & pasBottomMask)
+            count++;
+    }
+
+    return count + pasPtrsInTopArray;
+}
+
+//-----------------------------------------------------------------------------
+//  Initalize enumeration by passing in iter=pasENUM_START.
+//  Continue by passing in the return value as the new value of iter
+//  End of enumeration when pasENUM_END is returned
+//  If return value != pasENUM_END, *offs is set to the offset for GCinfo
+
+unsigned PendingArgsStack::pasEnumGCoffs(unsigned iter, unsigned* offs)
+{
+    if (iter == pasENUM_LAST)
+        return pasENUM_END;
+
+    unsigned i = (iter == pasENUM_START) ? pasDepth : iter;
+
+    for (/**/; i > BITS_IN_pasMask; i--)
+    {
+        GCtype curArg = (GCtype)pasTopArray[i - BITS_IN_pasMask - 1];
+        if (needsGC(curArg))
+        {
+            unsigned offset;
+
+            offset = (pasDepth - i) * sizeof(void*);
+            if (curArg == GCT_BYREF)
+                offset |= byref_OFFSET_FLAG;
+
+            *offs = offset;
+            return i - 1;
+        }
+    }
+
+    if (!pasBottomMask)
+        return pasENUM_END;
+
+    // Have we already processed some of the bits in pasBottomMask ?
+
+    i = (iter == pasENUM_START || iter >= BITS_IN_pasMask) ? 0     // no
+                                                           : iter; // yes
+
+    for (pasMaskType mask = 0x1 << i; mask; i++, mask <<= 1)
+    {
+        if (mask & pasBottomMask)
+        {
+            unsigned lvl = (pasDepth > BITS_IN_pasMask) ? (pasDepth - BITS_IN_pasMask) : 0; // How many in pasTopArray[]
+            lvl += i;
+
+            unsigned offset;
+            offset = lvl * sizeof(void*);
+            if (mask & pasByrefBottomMask)
+                offset |= byref_OFFSET_FLAG;
+
+            *offs = offset;
+
+            unsigned remMask = -int(mask << 1);
+            return ((pasBottomMask & remMask) ? (i + 1) : pasENUM_LAST);
+        }
+    }
+
+    assert(!"Shouldnt reach here");
+    return pasENUM_END;
+}
+
+/*****************************************************************************
+ *
+ *  Generate the register pointer map, and return its total size in bytes. If
+ *  'mask' is 0, we don't actually store any data in 'dest' (except for one
+ *  entry, which is never more than 10 bytes), so this can be used to merely
+ *  compute the size of the table.
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+size_t GCInfo::gcMakeRegPtrTable(BYTE* dest, int mask, const InfoHdr& header, unsigned codeSize, size_t* pArgTabOffset)
+{
+    unsigned count;
+
+    unsigned   varNum;
+    LclVarDsc* varDsc;
+
+    unsigned pass;
+
+    size_t   totalSize = 0;
+    unsigned lastOffset;
+
+    bool thisKeptAliveIsInUntracked = false;
+
+    /* The mask should be all 0's or all 1's */
+
+    assert(mask == 0 || mask == -1);
+
+    /* Start computing the total size of the table */
+
+    BOOL emitArgTabOffset = (header.varPtrTableSize != 0 || header.untrackedCnt > SET_UNTRACKED_MAX);
+    if (mask != 0 && emitArgTabOffset)
+    {
+        assert(*pArgTabOffset <= MAX_UNSIGNED_SIZE_T);
+        unsigned sz = encodeUnsigned(dest, static_cast<unsigned>(*pArgTabOffset));
+        dest += sz;
+        totalSize += sz;
+    }
+
+#if VERIFY_GC_TABLES
+    if (mask)
+    {
+        *(short*)dest = (short)0xBEEF;
+        dest += sizeof(short);
+    }
+    totalSize += sizeof(short);
+#endif
+
+    /**************************************************************************
+     *
+     *                      Untracked ptr variables
+     *
+     **************************************************************************
+     */
+
+    count = 0;
+    for (pass = 0; pass < 2; pass++)
+    {
+        /* If pass==0, generate the count
+         * If pass==1, write the table of untracked pointer variables.
+         */
+
+        int lastoffset = 0;
+        if (pass == 1)
+        {
+            assert(count == header.untrackedCnt);
+            if (header.untrackedCnt == 0)
+                break; // No entries, break exits the loop since pass==1
+        }
+
+        /* Count&Write untracked locals and non-enregistered args */
+
+        for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+        {
+            if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+            {
+                // Field local of a PROMOTION_TYPE_DEPENDENT struct must have been
+                // reported through its parent local
+                continue;
+            }
+
+            if (varTypeIsGC(varDsc->TypeGet()))
+            {
+                /* Do we have an argument or local variable? */
+                if (!varDsc->lvIsParam)
+                {
+                    // If is is pinned, it must be an untracked local
+                    assert(!varDsc->lvPinned || !varDsc->lvTracked);
+
+                    if (varDsc->lvTracked || !varDsc->lvOnFrame)
+                        continue;
+                }
+                else
+                {
+                    /* Stack-passed arguments which are not enregistered
+                     * are always reported in this "untracked stack
+                     * pointers" section of the GC info even if lvTracked==true
+                     */
+
+                    /* Has this argument been enregistered? */
+#ifndef LEGACY_BACKEND
+                    if (!varDsc->lvOnFrame)
+#else  // LEGACY_BACKEND
+                    if (varDsc->lvRegister)
+#endif // LEGACY_BACKEND
+                    {
+                        /* if a CEE_JMP has been used, then we need to report all the arguments
+                           even if they are enregistered, since we will be using this value
+                           in JMP call.  Note that this is subtle as we require that
+                           argument offsets are always fixed up properly even if lvRegister
+                           is set */
+                        if (!compiler->compJmpOpUsed)
+                            continue;
+                    }
+                    else
+                    {
+                        if (!varDsc->lvOnFrame)
+                        {
+                            /* If this non-enregistered pointer arg is never
+                             * used, we don't need to report it
+                             */
+                            assert(varDsc->lvRefCnt == 0); // This assert is currently a known issue for X86-RyuJit
+                            continue;
+                        }
+                        else if (varDsc->lvIsRegArg && varDsc->lvTracked)
+                        {
+                            /* If this register-passed arg is tracked, then
+                             * it has been allocated space near the other
+                             * pointer variables and we have accurate life-
+                             * time info. It will be reported with
+                             * gcVarPtrList in the "tracked-pointer" section
+                             */
+
+                            continue;
+                        }
+                    }
+                }
+
+                if (compiler->lvaIsOriginalThisArg(varNum) && compiler->lvaKeepAliveAndReportThis())
+                {
+                    // Encoding of untracked variables does not support reporting
+                    // "this". So report it as a tracked variable with a liveness
+                    // extending over the entire method.
+
+                    thisKeptAliveIsInUntracked = true;
+                    continue;
+                }
+
+                if (pass == 0)
+                    count++;
+                else
+                {
+                    int offset;
+                    assert(pass == 1);
+
+                    offset = varDsc->lvStkOffs;
+#if DOUBLE_ALIGN
+                    // For genDoubleAlign(), locals are addressed relative to ESP and
+                    // arguments are addressed relative to EBP.
+
+                    if (compiler->genDoubleAlign() && varDsc->lvIsParam && !varDsc->lvIsRegArg)
+                        offset += compiler->codeGen->genTotalFrameSize();
+#endif
+
+                    // The lower bits of the offset encode properties of the stk ptr
+
+                    assert(~OFFSET_MASK % sizeof(offset) == 0);
+
+                    if (varDsc->TypeGet() == TYP_BYREF)
+                    {
+                        // Or in byref_OFFSET_FLAG for 'byref' pointer tracking
+                        offset |= byref_OFFSET_FLAG;
+                    }
+
+                    if (varDsc->lvPinned)
+                    {
+                        // Or in pinned_OFFSET_FLAG for 'pinned' pointer tracking
+                        offset |= pinned_OFFSET_FLAG;
+                    }
+
+                    int encodedoffset = lastoffset - offset;
+                    lastoffset        = offset;
+
+                    if (mask == 0)
+                        totalSize += encodeSigned(NULL, encodedoffset);
+                    else
+                    {
+                        unsigned sz = encodeSigned(dest, encodedoffset);
+                        dest += sz;
+                        totalSize += sz;
+                    }
+                }
+            }
+
+            // A struct will have gcSlots only if it is at least TARGET_POINTER_SIZE.
+            if (varDsc->lvType == TYP_STRUCT && varDsc->lvOnFrame && (varDsc->lvExactSize >= TARGET_POINTER_SIZE))
+            {
+                unsigned slots  = compiler->lvaLclSize(varNum) / sizeof(void*);
+                BYTE*    gcPtrs = compiler->lvaGetGcLayout(varNum);
+
+                // walk each member of the array
+                for (unsigned i = 0; i < slots; i++)
+                {
+                    if (gcPtrs[i] == TYPE_GC_NONE) // skip non-gc slots
+                        continue;
+
+                    if (pass == 0)
+                        count++;
+                    else
+                    {
+                        assert(pass == 1);
+
+                        unsigned offset = varDsc->lvStkOffs + i * sizeof(void*);
+#if DOUBLE_ALIGN
+                        // For genDoubleAlign(), locals are addressed relative to ESP and
+                        // arguments are addressed relative to EBP.
+
+                        if (compiler->genDoubleAlign() && varDsc->lvIsParam && !varDsc->lvIsRegArg)
+                            offset += compiler->codeGen->genTotalFrameSize();
+#endif
+                        if (gcPtrs[i] == TYPE_GC_BYREF)
+                            offset |= byref_OFFSET_FLAG; // indicate it is a byref GC pointer
+
+                        int encodedoffset = lastoffset - offset;
+                        lastoffset        = offset;
+
+                        if (mask == 0)
+                            totalSize += encodeSigned(NULL, encodedoffset);
+                        else
+                        {
+                            unsigned sz = encodeSigned(dest, encodedoffset);
+                            dest += sz;
+                            totalSize += sz;
+                        }
+                    }
+                }
+            }
+        }
+
+        /* Count&Write spill temps that hold pointers */
+
+        assert(compiler->tmpAllFree());
+        for (TempDsc* tempItem = compiler->tmpListBeg(); tempItem != nullptr; tempItem = compiler->tmpListNxt(tempItem))
+        {
+            if (varTypeIsGC(tempItem->tdTempType()))
+            {
+                if (pass == 0)
+                    count++;
+                else
+                {
+                    int offset;
+                    assert(pass == 1);
+
+                    offset = tempItem->tdTempOffs();
+
+                    if (tempItem->tdTempType() == TYP_BYREF)
+                    {
+                        offset |= byref_OFFSET_FLAG;
+                    }
+
+                    int encodedoffset = lastoffset - offset;
+                    lastoffset        = offset;
+
+                    if (mask == 0)
+                    {
+                        totalSize += encodeSigned(NULL, encodedoffset);
+                    }
+                    else
+                    {
+                        unsigned sz = encodeSigned(dest, encodedoffset);
+                        dest += sz;
+                        totalSize += sz;
+                    }
+                }
+            }
+        }
+    }
+
+#if VERIFY_GC_TABLES
+    if (mask)
+    {
+        *(short*)dest = (short)0xCAFE;
+        dest += sizeof(short);
+    }
+    totalSize += sizeof(short);
+#endif
+
+    /**************************************************************************
+     *
+     *  Generate the table of stack pointer variable lifetimes.
+     *
+     *  In the first pass we'll count the lifetime entries and note
+     *  whether there are any that don't fit in a small encoding. In
+     *  the second pass we actually generate the table contents.
+     *
+     **************************************************************************
+     */
+
+    // First we check for the most common case - no lifetimes at all.
+
+    if (header.varPtrTableSize == 0)
+        goto DONE_VLT;
+
+    varPtrDsc* varTmp;
+    count = 0;
+
+    if (thisKeptAliveIsInUntracked)
+    {
+        count = 1;
+
+        // Encoding of untracked variables does not support reporting
+        // "this". So report it as a tracked variable with a liveness
+        // extending over the entire method.
+
+        assert(compiler->lvaTable[compiler->info.compThisArg].TypeGet() == TYP_REF);
+
+        unsigned varOffs = compiler->lvaTable[compiler->info.compThisArg].lvStkOffs;
+
+        /* For negative stack offsets we must reset the low bits,
+         * take abs and then set them back */
+
+        varOffs = abs(static_cast<int>(varOffs));
+        varOffs |= this_OFFSET_FLAG;
+
+        size_t sz = 0;
+        sz        = encodeUnsigned(mask ? (dest + sz) : NULL, varOffs);
+        sz += encodeUDelta(mask ? (dest + sz) : NULL, 0, 0);
+        sz += encodeUDelta(mask ? (dest + sz) : NULL, codeSize, 0);
+
+        dest += (sz & mask);
+        totalSize += sz;
+    }
+
+    for (pass = 0; pass < 2; pass++)
+    {
+        /* If second pass, generate the count */
+
+        if (pass)
+        {
+            assert(header.varPtrTableSize > 0);
+            assert(header.varPtrTableSize == count);
+        }
+
+        /* We'll use a delta encoding for the lifetime offsets */
+
+        lastOffset = 0;
+
+        for (varTmp = gcVarPtrList; varTmp; varTmp = varTmp->vpdNext)
+        {
+            unsigned varOffs;
+            unsigned lowBits;
+
+            unsigned begOffs;
+            unsigned endOffs;
+
+            assert(~OFFSET_MASK % sizeof(void*) == 0);
+
+            /* Get hold of the variable's stack offset */
+
+            lowBits = varTmp->vpdVarNum & OFFSET_MASK;
+
+            /* For negative stack offsets we must reset the low bits,
+             * take abs and then set them back */
+
+            varOffs = abs(static_cast<int>(varTmp->vpdVarNum & ~OFFSET_MASK));
+            varOffs |= lowBits;
+
+            /* Compute the actual lifetime offsets */
+
+            begOffs = varTmp->vpdBegOfs;
+            endOffs = varTmp->vpdEndOfs;
+
+            /* Special case: skip any 0-length lifetimes */
+
+            if (endOffs == begOffs)
+                continue;
+
+            /* Are we counting or generating? */
+
+            if (!pass)
+            {
+                count++;
+            }
+            else
+            {
+                size_t sz = 0;
+                sz        = encodeUnsigned(mask ? (dest + sz) : NULL, varOffs);
+                sz += encodeUDelta(mask ? (dest + sz) : NULL, begOffs, lastOffset);
+                sz += encodeUDelta(mask ? (dest + sz) : NULL, endOffs, begOffs);
+
+                dest += (sz & mask);
+                totalSize += sz;
+            }
+
+            /* The next entry will be relative to the one we just processed */
+
+            lastOffset = begOffs;
+        }
+    }
+
+DONE_VLT:
+
+    if (pArgTabOffset != NULL)
+        *pArgTabOffset = totalSize;
+
+#if VERIFY_GC_TABLES
+    if (mask)
+    {
+        *(short*)dest = (short)0xBABE;
+        dest += sizeof(short);
+    }
+    totalSize += sizeof(short);
+#endif
+
+    if (!mask && emitArgTabOffset)
+    {
+        assert(*pArgTabOffset <= MAX_UNSIGNED_SIZE_T);
+        totalSize += encodeUnsigned(NULL, static_cast<unsigned>(*pArgTabOffset));
+    }
+
+    /**************************************************************************
+     *
+     * Prepare to generate the pointer register/argument map
+     *
+     **************************************************************************
+     */
+
+    lastOffset = 0;
+
+    if (compiler->codeGen->genInterruptible)
+    {
+#ifdef _TARGET_X86_
+        assert(compiler->genFullPtrRegMap);
+
+        unsigned ptrRegs = 0;
+
+        regPtrDsc* genRegPtrTemp;
+
+        /* Walk the list of pointer register/argument entries */
+
+        for (genRegPtrTemp = gcRegPtrList; genRegPtrTemp; genRegPtrTemp = genRegPtrTemp->rpdNext)
+        {
+            BYTE* base = dest;
+
+            unsigned nextOffset;
+            DWORD    codeDelta;
+
+            nextOffset = genRegPtrTemp->rpdOffs;
+
+            /*
+                Encoding table for methods that are fully interruptible
+
+                The encoding used is as follows:
+
+                ptr reg dead    00RRRDDD    [RRR != 100]
+                ptr reg live    01RRRDDD    [RRR != 100]
+
+            non-ptr arg push    10110DDD                    [SSS == 110]
+                ptr arg push    10SSSDDD                    [SSS != 110] && [SSS != 111]
+                ptr arg pop     11CCCDDD    [CCC != 000] && [CCC != 110] && [CCC != 111]
+                little skip     11000DDD    [CCC == 000]
+                bigger skip     11110BBB                    [CCC == 110]
+
+                The values used in the above encodings are as follows:
+
+                  DDD                 code offset delta from previous entry (0-7)
+                  BBB                 bigger delta 000=8,001=16,010=24,...,111=64
+                  RRR                 register number (EAX=000,ECX=001,EDX=010,EBX=011,
+                                        EBP=101,ESI=110,EDI=111), ESP=100 is reserved
+                  SSS                 argument offset from base of stack. This is
+                                        redundant for frameless methods as we can
+                                        infer it from the previous pushes+pops. However,
+                                        for EBP-methods, we only report GC pushes, and
+                                        so we need SSS
+                  CCC                 argument count being popped (includes only ptrs for EBP methods)
+
+                The following are the 'large' versions:
+
+                  large delta skip        10111000 [0xB8] , encodeUnsigned(delta)
+
+                  large     ptr arg push  11111000 [0xF8] , encodeUnsigned(pushCount)
+                  large non-ptr arg push  11111001 [0xF9] , encodeUnsigned(pushCount)
+                  large     ptr arg pop   11111100 [0xFC] , encodeUnsigned(popCount)
+                  large         arg dead  11111101 [0xFD] , encodeUnsigned(popCount) for caller-pop args.
+                                                              Any GC args go dead after the call,
+                                                              but are still sitting on the stack
+
+                  this pointer prefix     10111100 [0xBC]   the next encoding is a ptr live
+                                                              or a ptr arg push
+                                                              and contains the this pointer
+
+                  interior or by-ref      10111111 [0xBF]   the next encoding is a ptr live
+                       pointer prefix                         or a ptr arg push
+                                                              and contains an interior
+                                                              or by-ref pointer
+
+
+                  The value 11111111 [0xFF] indicates the end of the table.
+            */
+
+            codeDelta = nextOffset - lastOffset;
+            assert((int)codeDelta >= 0);
+
+            // If the code delta is between 8 and (64+7),
+            // generate a 'bigger delta' encoding
+
+            if ((codeDelta >= 8) && (codeDelta <= (64 + 7)))
+            {
+                unsigned biggerDelta = ((codeDelta - 8) & 0x38) + 8;
+                *dest++              = 0xF0 | ((biggerDelta - 8) >> 3);
+                lastOffset += biggerDelta;
+                codeDelta &= 0x07;
+            }
+
+            // If the code delta is still bigger than 7,
+            // generate a 'large code delta' encoding
+
+            if (codeDelta > 7)
+            {
+                *dest++ = 0xB8;
+                dest += encodeUnsigned(dest, codeDelta);
+                codeDelta = 0;
+
+                /* Remember the new 'last' offset */
+
+                lastOffset = nextOffset;
+            }
+
+            /* Is this a pointer argument or register entry? */
+
+            if (genRegPtrTemp->rpdArg)
+            {
+                if (genRegPtrTemp->rpdArgTypeGet() == rpdARG_KILL)
+                {
+                    if (codeDelta)
+                    {
+                        /*
+                            Use the small encoding:
+                            little delta skip       11000DDD    [0xC0]
+                         */
+
+                        assert((codeDelta & 0x7) == codeDelta);
+                        *dest++ = 0xC0 | (BYTE)codeDelta;
+
+                        /* Remember the new 'last' offset */
+
+                        lastOffset = nextOffset;
+                    }
+
+                    /* Caller-pop arguments are dead after call but are still
+                       sitting on the stack */
+
+                    *dest++ = 0xFD;
+                    assert(genRegPtrTemp->rpdPtrArg != 0);
+                    dest += encodeUnsigned(dest, genRegPtrTemp->rpdPtrArg);
+                }
+                else if (genRegPtrTemp->rpdPtrArg < 6 && genRegPtrTemp->rpdGCtypeGet())
+                {
+                    /* Is the argument offset/count smaller than 6 ? */
+
+                    dest = gceByrefPrefixI(genRegPtrTemp, dest);
+
+                    if (genRegPtrTemp->rpdArgTypeGet() == rpdARG_PUSH || (genRegPtrTemp->rpdPtrArg != 0))
+                    {
+                        /*
+                          Use the small encoding:
+
+                            ptr arg push 10SSSDDD [SSS != 110] && [SSS != 111]
+                            ptr arg pop  11CCCDDD [CCC != 110] && [CCC != 111]
+                         */
+
+                        bool isPop = genRegPtrTemp->rpdArgTypeGet() == rpdARG_POP;
+
+                        *dest++ = 0x80 | (BYTE)codeDelta | genRegPtrTemp->rpdPtrArg << 3 | isPop << 6;
+
+                        /* Remember the new 'last' offset */
+
+                        lastOffset = nextOffset;
+                    }
+                    else
+                    {
+                        assert(!"Check this");
+                    }
+                }
+                else if (genRegPtrTemp->rpdGCtypeGet() == GCT_NONE)
+                {
+                    /*
+                        Use the small encoding:
+`                        non-ptr arg push 10110DDD [0xB0] (push of sizeof(int))
+                     */
+
+                    assert((codeDelta & 0x7) == codeDelta);
+                    *dest++ = 0xB0 | (BYTE)codeDelta;
+                    assert(!compiler->isFramePointerUsed());
+
+                    /* Remember the new 'last' offset */
+
+                    lastOffset = nextOffset;
+                }
+                else
+                {
+                    /* Will have to use large encoding;
+                     *   first do the code delta
+                     */
+
+                    if (codeDelta)
+                    {
+                        /*
+                            Use the small encoding:
+                            little delta skip       11000DDD    [0xC0]
+                         */
+
+                        assert((codeDelta & 0x7) == codeDelta);
+                        *dest++ = 0xC0 | (BYTE)codeDelta;
+                    }
+
+                    /*
+                        Now append a large argument record:
+
+                            large ptr arg push  11111000 [0xF8]
+                            large ptr arg pop   11111100 [0xFC]
+                     */
+
+                    bool isPop = genRegPtrTemp->rpdArgTypeGet() == rpdARG_POP;
+
+                    dest = gceByrefPrefixI(genRegPtrTemp, dest);
+
+                    *dest++ = 0xF8 | (isPop << 2);
+                    dest += encodeUnsigned(dest, genRegPtrTemp->rpdPtrArg);
+
+                    /* Remember the new 'last' offset */
+
+                    lastOffset = nextOffset;
+                }
+            }
+            else
+            {
+                unsigned regMask;
+
+                /* Record any registers that are becoming dead */
+
+                regMask = genRegPtrTemp->rpdCompiler.rpdDel & ptrRegs;
+
+                while (regMask) // EAX,ECX,EDX,EBX,---,EBP,ESI,EDI
+                {
+                    unsigned  tmpMask;
+                    regNumber regNum;
+
+                    /* Get hold of the next register bit */
+
+                    tmpMask = genFindLowestReg(regMask);
+                    assert(tmpMask);
+
+                    /* Remember the new state of this register */
+
+                    ptrRegs &= ~tmpMask;
+
+                    /* Figure out which register the next bit corresponds to */
+
+                    regNum = genRegNumFromMask(tmpMask);
+                    assert(regNum <= 7);
+
+                    /* Reserve ESP, regNum==4 for future use */
+
+                    assert(regNum != 4);
+
+                    /*
+                        Generate a small encoding:
+
+                            ptr reg dead        00RRRDDD
+                     */
+
+                    assert((codeDelta & 0x7) == codeDelta);
+                    *dest++ = 0x00 | regNum << 3 | (BYTE)codeDelta;
+
+                    /* Turn the bit we've just generated off and continue */
+
+                    regMask -= tmpMask; // EAX,ECX,EDX,EBX,---,EBP,ESI,EDI
+
+                    /* Remember the new 'last' offset */
+
+                    lastOffset = nextOffset;
+
+                    /* Any entries that follow will be at the same offset */
+
+                    codeDelta = zeroFunc(); /* DO NOT REMOVE */
+                }
+
+                /* Record any registers that are becoming live */
+
+                regMask = genRegPtrTemp->rpdCompiler.rpdAdd & ~ptrRegs;
+
+                while (regMask) // EAX,ECX,EDX,EBX,---,EBP,ESI,EDI
+                {
+                    unsigned  tmpMask;
+                    regNumber regNum;
+
+                    /* Get hold of the next register bit */
+
+                    tmpMask = genFindLowestReg(regMask);
+                    assert(tmpMask);
+
+                    /* Remember the new state of this register */
+
+                    ptrRegs |= tmpMask;
+
+                    /* Figure out which register the next bit corresponds to */
+
+                    regNum = genRegNumFromMask(tmpMask);
+                    assert(regNum <= 7);
+
+                    /*
+                        Generate a small encoding:
+
+                            ptr reg live        01RRRDDD
+                     */
+
+                    dest = gceByrefPrefixI(genRegPtrTemp, dest);
+
+                    if (!thisKeptAliveIsInUntracked && genRegPtrTemp->rpdIsThis)
+                    {
+                        // Mark with 'this' pointer prefix
+                        *dest++ = 0xBC;
+                        // Can only have one bit set in regMask
+                        assert(regMask == tmpMask);
+                    }
+
+                    assert((codeDelta & 0x7) == codeDelta);
+                    *dest++ = 0x40 | (regNum << 3) | (BYTE)codeDelta;
+
+                    /* Turn the bit we've just generated off and continue */
+
+                    regMask -= tmpMask; // EAX,ECX,EDX,EBX,---,EBP,ESI,EDI
+
+                    /* Remember the new 'last' offset */
+
+                    lastOffset = nextOffset;
+
+                    /* Any entries that follow will be at the same offset */
+
+                    codeDelta = zeroFunc(); /* DO NOT REMOVE */
+                }
+            }
+
+            /* Keep track of the total amount of generated stuff */
+
+            totalSize += dest - base;
+
+            /* Go back to the buffer start if we're not generating a table */
+
+            if (!mask)
+                dest = base;
+        }
+#endif // _TARGET_X86_
+
+        /* Terminate the table with 0xFF */
+
+        *dest = 0xFF;
+        dest -= mask;
+        totalSize++;
+    }
+    else if (compiler->isFramePointerUsed()) // genInterruptible is false
+    {
+#ifdef _TARGET_X86_
+        /*
+            Encoding table for methods with an EBP frame and
+                               that are not fully interruptible
+
+            The encoding used is as follows:
+
+            this pointer encodings:
+
+               01000000          this pointer in EBX
+               00100000          this pointer in ESI
+               00010000          this pointer in EDI
+
+            tiny encoding:
+
+               0bsdDDDD
+                                 requires code delta > 0 & delta < 16 (4-bits)
+                                 requires pushed argmask == 0
+
+                 where    DDDD   is code delta
+                             b   indicates that register EBX is a live pointer
+                             s   indicates that register ESI is a live pointer
+                             d   indicates that register EDI is a live pointer
+
+
+            small encoding:
+
+               1DDDDDDD bsdAAAAA
+
+                                 requires code delta     < 120 (7-bits)
+                                 requires pushed argmask <  64 (5-bits)
+
+                 where DDDDDDD   is code delta
+                         AAAAA   is the pushed args mask
+                             b   indicates that register EBX is a live pointer
+                             s   indicates that register ESI is a live pointer
+                             d   indicates that register EDI is a live pointer
+
+            medium encoding
+
+               0xFD aaaaaaaa AAAAdddd bseDDDDD
+
+                                 requires code delta     <  512  (9-bits)
+                                 requires pushed argmask < 2048 (12-bits)
+
+                 where    DDDDD  is the upper 5-bits of the code delta
+                           dddd  is the low   4-bits of the code delta
+                           AAAA  is the upper 4-bits of the pushed arg mask
+                       aaaaaaaa  is the low   8-bits of the pushed arg mask
+                              b  indicates that register EBX is a live pointer
+                              s  indicates that register ESI is a live pointer
+                              e  indicates that register EDI is a live pointer
+
+            medium encoding with interior pointers
+
+               0xF9 DDDDDDDD bsdAAAAAA iiiIIIII
+
+                                 requires code delta     < 256 (8-bits)
+                                 requires pushed argmask <  64 (5-bits)
+
+                 where  DDDDDDD  is the code delta
+                              b  indicates that register EBX is a live pointer
+                              s  indicates that register ESI is a live pointer
+                              d  indicates that register EDI is a live pointer
+                          AAAAA  is the pushed arg mask
+                            iii  indicates that EBX,EDI,ESI are interior pointers
+                          IIIII  indicates that bits in the arg mask are interior
+                                 pointers
+
+            large encoding
+
+               0xFE [0BSD0bsd][32-bit code delta][32-bit argMask]
+
+                              b  indicates that register EBX is a live pointer
+                              s  indicates that register ESI is a live pointer
+                              d  indicates that register EDI is a live pointer
+                              B  indicates that register EBX is an interior pointer
+                              S  indicates that register ESI is an interior pointer
+                              D  indicates that register EDI is an interior pointer
+                                 requires pushed  argmask < 32-bits
+
+            large encoding  with interior pointers
+
+               0xFA [0BSD0bsd][32-bit code delta][32-bit argMask][32-bit interior pointer mask]
+
+
+                              b  indicates that register EBX is a live pointer
+                              s  indicates that register ESI is a live pointer
+                              d  indicates that register EDI is a live pointer
+                              B  indicates that register EBX is an interior pointer
+                              S  indicates that register ESI is an interior pointer
+                              D  indicates that register EDI is an interior pointer
+                                 requires pushed  argmask < 32-bits
+                                 requires pushed iArgmask < 32-bits
+
+
+            huge encoding        This is the only encoding that supports
+                                 a pushed argmask which is greater than
+                                 32-bits.
+
+               0xFB [0BSD0bsd][32-bit code delta]
+                    [32-bit table count][32-bit table size]
+                    [pushed ptr offsets table...]
+
+                             b   indicates that register EBX is a live pointer
+                             s   indicates that register ESI is a live pointer
+                             d   indicates that register EDI is a live pointer
+                             B   indicates that register EBX is an interior pointer
+                             S   indicates that register ESI is an interior pointer
+                             D   indicates that register EDI is an interior pointer
+                             the list count is the number of entries in the list
+                             the list size gives the byte-length of the list
+                             the offsets in the list are variable-length
+        */
+
+        /* If "this" is enregistered, note it. We do this explicitly here as
+           genFullPtrRegMap==false, and so we don't have any regPtrDsc's. */
+
+        if (compiler->lvaKeepAliveAndReportThis() && compiler->lvaTable[compiler->info.compThisArg].lvRegister)
+        {
+            unsigned thisRegMask   = genRegMask(compiler->lvaTable[compiler->info.compThisArg].lvRegNum);
+            unsigned thisPtrRegEnc = gceEncodeCalleeSavedRegs(thisRegMask) << 4;
+
+            if (thisPtrRegEnc)
+            {
+                totalSize += 1;
+                if (mask)
+                    *dest++ = thisPtrRegEnc;
+            }
+        }
+
+        CallDsc* call;
+
+        assert(compiler->genFullPtrRegMap == false);
+
+        /* Walk the list of pointer register/argument entries */
+
+        for (call = gcCallDescList; call; call = call->cdNext)
+        {
+            BYTE*    base = dest;
+            unsigned nextOffset;
+
+            /* Figure out the code offset of this entry */
+
+            nextOffset = call->cdOffs;
+
+            /* Compute the distance from the previous call */
+
+            DWORD codeDelta = nextOffset - lastOffset;
+
+            assert((int)codeDelta >= 0);
+
+            /* Remember the new 'last' offset */
+
+            lastOffset = nextOffset;
+
+            /* Compute the register mask */
+
+            unsigned gcrefRegMask = 0;
+            unsigned byrefRegMask = 0;
+
+            gcrefRegMask |= gceEncodeCalleeSavedRegs(call->cdGCrefRegs);
+            byrefRegMask |= gceEncodeCalleeSavedRegs(call->cdByrefRegs);
+
+            assert((gcrefRegMask & byrefRegMask) == 0);
+
+            unsigned regMask = gcrefRegMask | byrefRegMask;
+
+            bool byref = (byrefRegMask | call->u1.cdByrefArgMask) != 0;
+
+            /* Check for the really large argument offset case */
+            /* The very rare Huge encodings */
+
+            if (call->cdArgCnt)
+            {
+                unsigned argNum;
+                DWORD    argCnt    = call->cdArgCnt;
+                DWORD    argBytes  = 0;
+                BYTE*    pArgBytes = DUMMY_INIT(NULL);
+
+                if (mask != 0)
+                {
+                    *dest++       = 0xFB;
+                    *dest++       = (byrefRegMask << 4) | regMask;
+                    *(DWORD*)dest = codeDelta;
+                    dest += sizeof(DWORD);
+                    *(DWORD*)dest = argCnt;
+                    dest += sizeof(DWORD);
+                    // skip the byte-size for now. Just note where it will go
+                    pArgBytes = dest;
+                    dest += sizeof(DWORD);
+                }
+
+                for (argNum = 0; argNum < argCnt; argNum++)
+                {
+                    unsigned eltSize;
+                    eltSize = encodeUnsigned(dest, call->cdArgTable[argNum]);
+                    argBytes += eltSize;
+                    if (mask)
+                        dest += eltSize;
+                }
+
+                if (mask == 0)
+                {
+                    dest = base + 1 + 1 + 3 * sizeof(DWORD) + argBytes;
+                }
+                else
+                {
+                    assert(dest == pArgBytes + sizeof(argBytes) + argBytes);
+                    *(DWORD*)pArgBytes = argBytes;
+                }
+            }
+
+            /* Check if we can use a tiny encoding */
+            else if ((codeDelta < 16) && (codeDelta != 0) && (call->u1.cdArgMask == 0) && !byref)
+            {
+                *dest++ = (regMask << 4) | (BYTE)codeDelta;
+            }
+
+            /* Check if we can use the small encoding */
+            else if ((codeDelta < 0x79) && (call->u1.cdArgMask <= 0x1F) && !byref)
+            {
+                *dest++ = 0x80 | (BYTE)codeDelta;
+                *dest++ = call->u1.cdArgMask | (regMask << 5);
+            }
+
+            /* Check if we can use the medium encoding */
+            else if (codeDelta <= 0x01FF && call->u1.cdArgMask <= 0x0FFF && !byref)
+            {
+                *dest++ = 0xFD;
+                *dest++ = call->u1.cdArgMask;
+                *dest++ = ((call->u1.cdArgMask >> 4) & 0xF0) | ((BYTE)codeDelta & 0x0F);
+                *dest++ = (regMask << 5) | (BYTE)((codeDelta >> 4) & 0x1F);
+            }
+
+            /* Check if we can use the medium encoding with byrefs */
+            else if (codeDelta <= 0x0FF && call->u1.cdArgMask <= 0x01F)
+            {
+                *dest++ = 0xF9;
+                *dest++ = (BYTE)codeDelta;
+                *dest++ = (regMask << 5) | call->u1.cdArgMask;
+                *dest++ = (byrefRegMask << 5) | call->u1.cdByrefArgMask;
+            }
+
+            /* We'll use the large encoding */
+            else if (!byref)
+            {
+                *dest++       = 0xFE;
+                *dest++       = (byrefRegMask << 4) | regMask;
+                *(DWORD*)dest = codeDelta;
+                dest += sizeof(DWORD);
+                *(DWORD*)dest = call->u1.cdArgMask;
+                dest += sizeof(DWORD);
+            }
+
+            /* We'll use the large encoding with byrefs */
+            else
+            {
+                *dest++       = 0xFA;
+                *dest++       = (byrefRegMask << 4) | regMask;
+                *(DWORD*)dest = codeDelta;
+                dest += sizeof(DWORD);
+                *(DWORD*)dest = call->u1.cdArgMask;
+                dest += sizeof(DWORD);
+                *(DWORD*)dest = call->u1.cdByrefArgMask;
+                dest += sizeof(DWORD);
+            }
+
+            /* Keep track of the total amount of generated stuff */
+
+            totalSize += dest - base;
+
+            /* Go back to the buffer start if we're not generating a table */
+
+            if (!mask)
+                dest = base;
+        }
+#endif // _TARGET_X86_
+
+        /* Terminate the table with 0xFF */
+
+        *dest = 0xFF;
+        dest -= mask;
+        totalSize++;
+    }
+    else // genInterruptible is false and we have an EBP-less frame
+    {
+        assert(compiler->genFullPtrRegMap);
+
+#ifdef _TARGET_X86_
+
+        regPtrDsc*       genRegPtrTemp;
+        regNumber        thisRegNum = regNumber(0);
+        PendingArgsStack pasStk(compiler->getEmitter()->emitMaxStackDepth, compiler);
+
+        /* Walk the list of pointer register/argument entries */
+
+        for (genRegPtrTemp = gcRegPtrList; genRegPtrTemp; genRegPtrTemp = genRegPtrTemp->rpdNext)
+        {
+
+            /*
+             *    Encoding table for methods without an EBP frame and
+             *     that are not fully interruptible
+             *
+             *               The encoding used is as follows:
+             *
+             *  push     000DDDDD                     ESP push one item with 5-bit delta
+             *  push     00100000 [pushCount]         ESP push multiple items
+             *  reserved 0010xxxx                     xxxx != 0000
+             *  reserved 0011xxxx
+             *  skip     01000000 [Delta]             Skip Delta, arbitrary sized delta
+             *  skip     0100DDDD                     Skip small Delta, for call (DDDD != 0)
+             *  pop      01CCDDDD                     ESP pop  CC items with 4-bit delta (CC != 00)
+             *  call     1PPPPPPP                     Call Pattern, P=[0..79]
+             *  call     1101pbsd DDCCCMMM            Call RegMask=pbsd,ArgCnt=CCC,
+             *                                        ArgMask=MMM Delta=commonDelta[DD]
+             *  call     1110pbsd [ArgCnt] [ArgMask]  Call ArgCnt,RegMask=pbsd,ArgMask
+             *  call     11111000 [PBSDpbsd][32-bit delta][32-bit ArgCnt]
+             *                    [32-bit PndCnt][32-bit PndSize][PndOffs...]
+             *  iptr     11110000 [IPtrMask]          Arbitrary Interior Pointer Mask
+             *  thisptr  111101RR                     This pointer is in Register RR
+             *                                        00=EDI,01=ESI,10=EBX,11=EBP
+             *  reserved 111100xx                     xx  != 00
+             *  reserved 111110xx                     xx  != 00
+             *  reserved 11111xxx                     xxx != 000 && xxx != 111(EOT)
+             *
+             *   The value 11111111 [0xFF] indicates the end of the table. (EOT)
+             *
+             *  An offset (at which stack-walking is performed) without an explicit encoding
+             *  is assumed to be a trivial call-site (no GC registers, stack empty before and
+             *  after) to avoid having to encode all trivial calls.
+             *
+             * Note on the encoding used for interior pointers
+             *
+             *   The iptr encoding must immediately precede a call encoding.  It is used
+             *   to transform a normal GC pointer addresses into an interior pointers for
+             *   GC purposes.  The mask supplied to the iptr encoding is read from the
+             *   least signicant bit to the most signicant bit. (i.e the lowest bit is
+             *   read first)
+             *
+             *   p   indicates that register EBP is a live pointer
+             *   b   indicates that register EBX is a live pointer
+             *   s   indicates that register ESI is a live pointer
+             *   d   indicates that register EDI is a live pointer
+             *   P   indicates that register EBP is an interior pointer
+             *   B   indicates that register EBX is an interior pointer
+             *   S   indicates that register ESI is an interior pointer
+             *   D   indicates that register EDI is an interior pointer
+             *
+             *   As an example the following sequence indicates that EDI.ESI and the
+             *   second pushed pointer in ArgMask are really interior pointers.  The
+             *   pointer in ESI in a normal pointer:
+             *
+             *   iptr 11110000 00010011           => read Interior Ptr, Interior Ptr,
+             *                                       Normal Ptr, Normal Ptr, Interior Ptr
+             *
+             *   call 11010011 DDCCC011 RRRR=1011 => read EDI is a GC-pointer,
+             *                                            ESI is a GC-pointer.
+             *                                            EBP is a GC-pointer
+             *                           MMM=0011 => read two GC-pointers arguments
+             *                                         on the stack (nested call)
+             *
+             *   Since the call instruction mentions 5 GC-pointers we list them in
+             *   the required order:  EDI, ESI, EBP, 1st-pushed pointer, 2nd-pushed pointer
+             *
+             *   And we apply the Interior Pointer mask mmmm=10011 to the five GC-pointers
+             *   we learn that EDI and ESI are interior GC-pointers and that
+             *   the second push arg is an interior GC-pointer.
+             */
+
+            BYTE* base = dest;
+
+            bool     usePopEncoding;
+            unsigned regMask;
+            unsigned argMask;
+            unsigned byrefRegMask;
+            unsigned byrefArgMask;
+            DWORD    callArgCnt;
+
+            unsigned nextOffset;
+            DWORD    codeDelta;
+
+            nextOffset = genRegPtrTemp->rpdOffs;
+
+            /* Compute the distance from the previous call */
+
+            codeDelta = nextOffset - lastOffset;
+            assert((int)codeDelta >= 0);
+
+#if REGEN_CALLPAT
+            // Must initialize this flag to true when REGEN_CALLPAT is on
+            usePopEncoding         = true;
+            unsigned origCodeDelta = codeDelta;
+#endif
+
+            if (!thisKeptAliveIsInUntracked && genRegPtrTemp->rpdIsThis)
+            {
+                unsigned tmpMask = genRegPtrTemp->rpdCompiler.rpdAdd;
+
+                /* tmpMask must have exactly one bit set */
+
+                assert(tmpMask && ((tmpMask & (tmpMask - 1)) == 0));
+
+                thisRegNum = genRegNumFromMask(tmpMask);
+                switch (thisRegNum)
+                {
+                    case 0: // EAX
+                    case 1: // ECX
+                    case 2: // EDX
+                    case 4: // ESP
+                        break;
+                    case 7:             // EDI
+                        *dest++ = 0xF4; /* 11110100  This pointer is in EDI */
+                        break;
+                    case 6:             // ESI
+                        *dest++ = 0xF5; /* 11110100  This pointer is in ESI */
+                        break;
+                    case 3:             // EBX
+                        *dest++ = 0xF6; /* 11110100  This pointer is in EBX */
+                        break;
+                    case 5:             // EBP
+                        *dest++ = 0xF7; /* 11110100  This pointer is in EBP */
+                        break;
+                    default:
+                        break;
+                }
+            }
+
+            /* Is this a stack pointer change or call? */
+
+            if (genRegPtrTemp->rpdArg)
+            {
+                if (genRegPtrTemp->rpdArgTypeGet() == rpdARG_KILL)
+                {
+                    // kill 'rpdPtrArg' number of pointer variables in pasStk
+                    pasStk.pasKill(genRegPtrTemp->rpdPtrArg);
+                }
+                /* Is this a call site? */
+                else if (genRegPtrTemp->rpdCall)
+                {
+                    /* This is a true call site */
+
+                    /* Remember the new 'last' offset */
+
+                    lastOffset = nextOffset;
+
+                    callArgCnt = genRegPtrTemp->rpdPtrArg;
+
+                    unsigned gcrefRegMask = genRegPtrTemp->rpdCallGCrefRegs;
+
+                    byrefRegMask = genRegPtrTemp->rpdCallByrefRegs;
+
+                    assert((gcrefRegMask & byrefRegMask) == 0);
+
+                    regMask = gcrefRegMask | byrefRegMask;
+
+                    /* adjust argMask for this call-site */
+                    pasStk.pasPop(callArgCnt);
+
+                    /* Do we have to use the fat encoding */
+
+                    if (pasStk.pasCurDepth() > BITS_IN_pasMask && pasStk.pasHasGCptrs())
+                    {
+                        /* use fat encoding:
+                         *   11111000 [PBSDpbsd][32-bit delta][32-bit ArgCnt]
+                         *            [32-bit PndCnt][32-bit PndSize][PndOffs...]
+                         */
+
+                        DWORD pndCount = pasStk.pasEnumGCoffsCount();
+                        DWORD pndSize  = 0;
+                        BYTE* pPndSize = DUMMY_INIT(NULL);
+
+                        if (mask)
+                        {
+                            *dest++       = 0xF8;
+                            *dest++       = (byrefRegMask << 4) | regMask;
+                            *(DWORD*)dest = codeDelta;
+                            dest += sizeof(DWORD);
+                            *(DWORD*)dest = callArgCnt;
+                            dest += sizeof(DWORD);
+                            *(DWORD*)dest = pndCount;
+                            dest += sizeof(DWORD);
+                            pPndSize = dest;
+                            dest += sizeof(DWORD); // Leave space for pndSize
+                        }
+
+                        unsigned offs, iter;
+
+                        for (iter = pasStk.pasEnumGCoffs(pasENUM_START, &offs); pndCount;
+                             iter = pasStk.pasEnumGCoffs(iter, &offs), pndCount--)
+                        {
+                            unsigned eltSize = encodeUnsigned(dest, offs);
+
+                            pndSize += eltSize;
+                            if (mask)
+                                dest += eltSize;
+                        }
+                        assert(iter == pasENUM_END);
+
+                        if (mask == 0)
+                        {
+                            dest = base + 2 + 4 * sizeof(DWORD) + pndSize;
+                        }
+                        else
+                        {
+                            assert(pPndSize + sizeof(pndSize) + pndSize == dest);
+                            *(DWORD*)pPndSize = pndSize;
+                        }
+
+                        goto NEXT_RPD;
+                    }
+
+                    argMask = byrefArgMask = 0;
+
+                    if (pasStk.pasHasGCptrs())
+                    {
+                        assert(pasStk.pasCurDepth() <= BITS_IN_pasMask);
+
+                        argMask      = pasStk.pasArgMask();
+                        byrefArgMask = pasStk.pasByrefArgMask();
+                    }
+
+                    /* Shouldn't be reporting trivial call-sites */
+
+                    assert(regMask || argMask || callArgCnt || pasStk.pasCurDepth());
+
+// Emit IPtrMask if needed
+
+#define CHK_NON_INTRPT_ESP_IPtrMask                                                                                    \
+                                                                                                                       \
+    if (byrefRegMask || byrefArgMask)                                                                                  \
+    {                                                                                                                  \
+        *dest++        = 0xF0;                                                                                         \
+        unsigned imask = (byrefArgMask << 4) | byrefRegMask;                                                           \
+        dest += encodeUnsigned(dest, imask);                                                                           \
+    }
+
+                    /* When usePopEncoding is true:
+                     *  this is not an interesting call site
+                     *   because nothing is live here.
+                     */
+                    usePopEncoding = ((callArgCnt < 4) && (regMask == 0) && (argMask == 0));
+
+                    if (!usePopEncoding)
+                    {
+                        int pattern = lookupCallPattern(callArgCnt, regMask, argMask, codeDelta);
+                        if (pattern != -1)
+                        {
+                            if (pattern > 0xff)
+                            {
+                                codeDelta = pattern >> 8;
+                                pattern &= 0xff;
+                                if (codeDelta >= 16)
+                                {
+                                    /* use encoding: */
+                                    /*   skip 01000000 [Delta] */
+                                    *dest++ = 0x40;
+                                    dest += encodeUnsigned(dest, codeDelta);
+                                    codeDelta = 0;
+                                }
+                                else
+                                {
+                                    /* use encoding: */
+                                    /*   skip 0100DDDD  small delta=DDDD */
+                                    *dest++ = 0x40 | (BYTE)codeDelta;
+                                }
+                            }
+
+                            // Emit IPtrMask if needed
+                            CHK_NON_INTRPT_ESP_IPtrMask;
+
+                            assert((pattern >= 0) && (pattern < 80));
+                            *dest++ = 0x80 | pattern;
+                            goto NEXT_RPD;
+                        }
+
+                        /* See if we can use 2nd call encoding
+                         *     1101RRRR DDCCCMMM encoding */
+
+                        if ((callArgCnt <= 7) && (argMask <= 7))
+                        {
+                            unsigned inx; // callCommonDelta[] index
+                            unsigned maxCommonDelta = callCommonDelta[3];
+
+                            if (codeDelta > maxCommonDelta)
+                            {
+                                if (codeDelta > maxCommonDelta + 15)
+                                {
+                                    /* use encoding: */
+                                    /*   skip    01000000 [Delta] */
+                                    *dest++ = 0x40;
+                                    dest += encodeUnsigned(dest, codeDelta - maxCommonDelta);
+                                }
+                                else
+                                {
+                                    /* use encoding: */
+                                    /*   skip 0100DDDD  small delta=DDDD */
+                                    *dest++ = 0x40 | (BYTE)(codeDelta - maxCommonDelta);
+                                }
+
+                                codeDelta = maxCommonDelta;
+                                inx       = 3;
+                                goto EMIT_2ND_CALL_ENCODING;
+                            }
+
+                            for (inx = 0; inx < 4; inx++)
+                            {
+                                if (codeDelta == callCommonDelta[inx])
+                                {
+                                EMIT_2ND_CALL_ENCODING:
+                                    // Emit IPtrMask if needed
+                                    CHK_NON_INTRPT_ESP_IPtrMask;
+
+                                    *dest++ = 0xD0 | regMask;
+                                    *dest++ = (inx << 6) | (callArgCnt << 3) | argMask;
+                                    goto NEXT_RPD;
+                                }
+                            }
+
+                            unsigned minCommonDelta = callCommonDelta[0];
+
+                            if ((codeDelta > minCommonDelta) && (codeDelta < maxCommonDelta))
+                            {
+                                assert((minCommonDelta + 16) > maxCommonDelta);
+                                /* use encoding: */
+                                /*   skip 0100DDDD  small delta=DDDD */
+                                *dest++ = 0x40 | (BYTE)(codeDelta - minCommonDelta);
+
+                                codeDelta = minCommonDelta;
+                                inx       = 0;
+                                goto EMIT_2ND_CALL_ENCODING;
+                            }
+                        }
+                    }
+
+                    if (codeDelta >= 16)
+                    {
+                        unsigned i = (usePopEncoding ? 15 : 0);
+                        /* use encoding: */
+                        /*   skip    01000000 [Delta]  arbitrary sized delta */
+                        *dest++ = 0x40;
+                        dest += encodeUnsigned(dest, codeDelta - i);
+                        codeDelta = i;
+                    }
+
+                    if ((codeDelta > 0) || usePopEncoding)
+                    {
+                        if (usePopEncoding)
+                        {
+                            /* use encoding: */
+                            /*   pop 01CCDDDD  ESP pop CC items, 4-bit delta */
+                            if (callArgCnt || codeDelta)
+                                *dest++ = (BYTE)(0x40 | (callArgCnt << 4) | codeDelta);
+                            goto NEXT_RPD;
+                        }
+                        else
+                        {
+                            /* use encoding: */
+                            /*   skip 0100DDDD  small delta=DDDD */
+                            *dest++ = 0x40 | (BYTE)codeDelta;
+                        }
+                    }
+
+                    // Emit IPtrMask if needed
+                    CHK_NON_INTRPT_ESP_IPtrMask;
+
+                    /* use encoding:                                   */
+                    /*   call 1110RRRR [ArgCnt] [ArgMask]              */
+
+                    *dest++ = 0xE0 | regMask;
+                    dest += encodeUnsigned(dest, callArgCnt);
+
+                    dest += encodeUnsigned(dest, argMask);
+                }
+                else
+                {
+                    /* This is a push or a pop site */
+
+                    /* Remember the new 'last' offset */
+
+                    lastOffset = nextOffset;
+
+                    if (genRegPtrTemp->rpdArgTypeGet() == rpdARG_POP)
+                    {
+                        /* This must be a gcArgPopSingle */
+
+                        assert(genRegPtrTemp->rpdPtrArg == 1);
+
+                        if (codeDelta >= 16)
+                        {
+                            /* use encoding: */
+                            /*   skip    01000000 [Delta] */
+                            *dest++ = 0x40;
+                            dest += encodeUnsigned(dest, codeDelta - 15);
+                            codeDelta = 15;
+                        }
+
+                        /* use encoding: */
+                        /*   pop1    0101DDDD  ESP pop one item, 4-bit delta */
+
+                        *dest++ = 0x50 | (BYTE)codeDelta;
+
+                        /* adjust argMask for this pop */
+                        pasStk.pasPop(1);
+                    }
+                    else
+                    {
+                        /* This is a push */
+
+                        if (codeDelta >= 32)
+                        {
+                            /* use encoding: */
+                            /*   skip    01000000 [Delta] */
+                            *dest++ = 0x40;
+                            dest += encodeUnsigned(dest, codeDelta - 31);
+                            codeDelta = 31;
+                        }
+
+                        assert(codeDelta < 32);
+
+                        /* use encoding: */
+                        /*   push    000DDDDD ESP push one item, 5-bit delta */
+
+                        *dest++ = (BYTE)codeDelta;
+
+                        /* adjust argMask for this push */
+                        pasStk.pasPush(genRegPtrTemp->rpdGCtypeGet());
+                    }
+                }
+            }
+
+        /*  We ignore the register live/dead information, since the
+         *  rpdCallRegMask contains all the liveness information
+         *  that we need
+         */
+        NEXT_RPD:
+
+            totalSize += dest - base;
+
+            /* Go back to the buffer start if we're not generating a table */
+
+            if (!mask)
+                dest = base;
+
+#if REGEN_CALLPAT
+            if ((mask == -1) && (usePopEncoding == false) && ((dest - base) > 0))
+                regenLog(origCodeDelta, argMask, regMask, callArgCnt, byrefArgMask, byrefRegMask, base, (dest - base));
+#endif
+        }
+
+        /* Verify that we pop every arg that was pushed and that argMask is 0 */
+
+        assert(pasStk.pasCurDepth() == 0);
+
+#endif // _TARGET_X86_
+
+        /* Terminate the table with 0xFF */
+
+        *dest = 0xFF;
+        dest -= mask;
+        totalSize++;
+    }
+
+#if VERIFY_GC_TABLES
+    if (mask)
+    {
+        *(short*)dest = (short)0xBEEB;
+        dest += sizeof(short);
+    }
+    totalSize += sizeof(short);
+#endif
+
+#if MEASURE_PTRTAB_SIZE
+
+    if (mask)
+        s_gcTotalPtrTabSize += totalSize;
+
+#endif
+
+    return totalSize;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************/
+#if DUMP_GC_TABLES
+/*****************************************************************************
+ *
+ *  Dump the contents of a GC pointer table.
+ */
+
+#include "gcdump.h"
+
+#if VERIFY_GC_TABLES
+const bool verifyGCTables = true;
+#else
+const bool verifyGCTables = false;
+#endif
+
+/*****************************************************************************
+ *
+ *  Dump the info block header.
+ */
+
+unsigned GCInfo::gcInfoBlockHdrDump(const BYTE* table, InfoHdr* header, unsigned* methodSize)
+{
+    GCDump gcDump(GCINFO_VERSION);
+
+    gcDump.gcPrintf = gcDump_logf; // use my printf (which logs to VM)
+    printf("Method info block:\n");
+
+    return gcDump.DumpInfoHdr(table, header, methodSize, verifyGCTables);
+}
+
+/*****************************************************************************/
+
+unsigned GCInfo::gcDumpPtrTable(const BYTE* table, const InfoHdr& header, unsigned methodSize)
+{
+    printf("Pointer table:\n");
+
+    GCDump gcDump(GCINFO_VERSION);
+    gcDump.gcPrintf = gcDump_logf; // use my printf (which logs to VM)
+
+    return gcDump.DumpGCTable(table, header, methodSize, verifyGCTables);
+}
+
+/*****************************************************************************
+ *
+ *  Find all the live pointers in a stack frame.
+ */
+
+void GCInfo::gcFindPtrsInFrame(const void* infoBlock, const void* codeBlock, unsigned offs)
+{
+    GCDump gcDump(GCINFO_VERSION);
+    gcDump.gcPrintf = gcDump_logf; // use my printf (which logs to VM)
+
+    gcDump.DumpPtrsInFrame((const BYTE*)infoBlock, (const BYTE*)codeBlock, offs, verifyGCTables);
+}
+
+#endif // DUMP_GC_TABLES
+
+#else // !JIT32_GCENCODER
+
+#include "gcinfoencoder.h"
+#include "simplerhash.h"
+
+// Do explicit instantiation.
+template class SimplerHashTable<RegSlotIdKey, RegSlotIdKey, GcSlotId, JitSimplerHashBehavior>;
+template class SimplerHashTable<StackSlotIdKey, StackSlotIdKey, GcSlotId, JitSimplerHashBehavior>;
+
+#ifdef DEBUG
+
+void GCInfo::gcDumpVarPtrDsc(varPtrDsc* desc)
+{
+    const int    offs   = (desc->vpdVarNum & ~OFFSET_MASK);
+    const GCtype gcType = (desc->vpdVarNum & byref_OFFSET_FLAG) ? GCT_BYREF : GCT_GCREF;
+    const bool   isPin  = (desc->vpdVarNum & pinned_OFFSET_FLAG) != 0;
+
+    printf("[%08X] %s%s var at [%s", dspPtr(desc), GCtypeStr(gcType), isPin ? "pinned-ptr" : "",
+           compiler->isFramePointerUsed() ? STR_FPBASE : STR_SPBASE);
+
+    if (offs < 0)
+    {
+        printf("-%02XH", -offs);
+    }
+    else if (offs > 0)
+    {
+        printf("+%02XH", +offs);
+    }
+
+    printf("] live from %04X to %04X\n", desc->vpdBegOfs, desc->vpdEndOfs);
+}
+
+static const char* const GcSlotFlagsNames[] = {"",
+                                               "(byref) ",
+                                               "(pinned) ",
+                                               "(byref, pinned) ",
+                                               "(untracked) ",
+                                               "(byref, untracked) ",
+                                               "(pinned, untracked) ",
+                                               "(byref, pinned, untracked) "};
+
+// I'm making a local wrapper class for GcInfoEncoder so that can add logging of my own (DLD).
+class GcInfoEncoderWithLogging
+{
+    GcInfoEncoder* m_gcInfoEncoder;
+    bool           m_doLogging;
+
+public:
+    GcInfoEncoderWithLogging(GcInfoEncoder* gcInfoEncoder, bool verbose)
+        : m_gcInfoEncoder(gcInfoEncoder), m_doLogging(verbose || JitConfig.JitGCInfoLogging() != 0)
+    {
+    }
+
+    GcSlotId GetStackSlotId(INT32 spOffset, GcSlotFlags flags, GcStackSlotBase spBase = GC_CALLER_SP_REL)
+    {
+        GcSlotId newSlotId = m_gcInfoEncoder->GetStackSlotId(spOffset, flags, spBase);
+        if (m_doLogging)
+        {
+            printf("Stack slot id for offset %d (0x%x) (%s) %s= %d.\n", spOffset, spOffset,
+                   GcStackSlotBaseNames[spBase], GcSlotFlagsNames[flags & 7], newSlotId);
+        }
+        return newSlotId;
+    }
+
+    GcSlotId GetRegisterSlotId(UINT32 regNum, GcSlotFlags flags)
+    {
+        GcSlotId newSlotId = m_gcInfoEncoder->GetRegisterSlotId(regNum, flags);
+        if (m_doLogging)
+        {
+            printf("Register slot id for reg %s %s= %d.\n", getRegName(regNum), GcSlotFlagsNames[flags & 7], newSlotId);
+        }
+        return newSlotId;
+    }
+
+    void SetSlotState(UINT32 instructionOffset, GcSlotId slotId, GcSlotState slotState)
+    {
+        m_gcInfoEncoder->SetSlotState(instructionOffset, slotId, slotState);
+        if (m_doLogging)
+        {
+            printf("Set state of slot %d at instr offset 0x%x to %s.\n", slotId, instructionOffset,
+                   (slotState == GC_SLOT_LIVE ? "Live" : "Dead"));
+        }
+    }
+
+    void DefineCallSites(UINT32* pCallSites, BYTE* pCallSiteSizes, UINT32 numCallSites)
+    {
+        m_gcInfoEncoder->DefineCallSites(pCallSites, pCallSiteSizes, numCallSites);
+        if (m_doLogging)
+        {
+            printf("Defining %d call sites:\n", numCallSites);
+            for (UINT32 k = 0; k < numCallSites; k++)
+            {
+                printf("    Offset 0x%x, size %d.\n", pCallSites[k], pCallSiteSizes[k]);
+            }
+        }
+    }
+
+    void DefineInterruptibleRange(UINT32 startInstructionOffset, UINT32 length)
+    {
+        m_gcInfoEncoder->DefineInterruptibleRange(startInstructionOffset, length);
+        if (m_doLogging)
+        {
+            printf("Defining interruptible range: [0x%x, 0x%x).\n", startInstructionOffset,
+                   startInstructionOffset + length);
+        }
+    }
+
+    void SetCodeLength(UINT32 length)
+    {
+        m_gcInfoEncoder->SetCodeLength(length);
+        if (m_doLogging)
+        {
+            printf("Set code length to %d.\n", length);
+        }
+    }
+
+    void SetReturnKind(ReturnKind returnKind)
+    {
+        m_gcInfoEncoder->SetReturnKind(returnKind);
+        if (m_doLogging)
+        {
+            printf("Set ReturnKind to %s.\n", ReturnKindToString(returnKind));
+        }
+    }
+
+    void SetStackBaseRegister(UINT32 registerNumber)
+    {
+        m_gcInfoEncoder->SetStackBaseRegister(registerNumber);
+        if (m_doLogging)
+        {
+            printf("Set stack base register to %s.\n", getRegName(registerNumber));
+        }
+    }
+
+    void SetPrologSize(UINT32 prologSize)
+    {
+        m_gcInfoEncoder->SetPrologSize(prologSize);
+        if (m_doLogging)
+        {
+            printf("Set prolog size 0x%x.\n", prologSize);
+        }
+    }
+
+    void SetGSCookieStackSlot(INT32 spOffsetGSCookie, UINT32 validRangeStart, UINT32 validRangeEnd)
+    {
+        m_gcInfoEncoder->SetGSCookieStackSlot(spOffsetGSCookie, validRangeStart, validRangeEnd);
+        if (m_doLogging)
+        {
+            printf("Set GS Cookie stack slot to %d, valid from 0x%x to 0x%x.\n", spOffsetGSCookie, validRangeStart,
+                   validRangeEnd);
+        }
+    }
+
+    void SetPSPSymStackSlot(INT32 spOffsetPSPSym)
+    {
+        m_gcInfoEncoder->SetPSPSymStackSlot(spOffsetPSPSym);
+        if (m_doLogging)
+        {
+            printf("Set PSPSym stack slot to %d.\n", spOffsetPSPSym);
+        }
+    }
+
+    void SetGenericsInstContextStackSlot(INT32 spOffsetGenericsContext, GENERIC_CONTEXTPARAM_TYPE type)
+    {
+        m_gcInfoEncoder->SetGenericsInstContextStackSlot(spOffsetGenericsContext, type);
+        if (m_doLogging)
+        {
+            printf("Set generic instantiation context stack slot to %d, type is %s.\n", spOffsetGenericsContext,
+                   (type == GENERIC_CONTEXTPARAM_THIS
+                        ? "THIS"
+                        : (type == GENERIC_CONTEXTPARAM_MT ? "MT"
+                                                           : (type == GENERIC_CONTEXTPARAM_MD ? "MD" : "UNKNOWN!"))));
+        }
+    }
+
+    void SetSecurityObjectStackSlot(INT32 spOffset)
+    {
+        m_gcInfoEncoder->SetSecurityObjectStackSlot(spOffset);
+        if (m_doLogging)
+        {
+            printf("Set security object stack slot to %d.\n", spOffset);
+        }
+    }
+
+    void SetIsVarArg()
+    {
+        m_gcInfoEncoder->SetIsVarArg();
+        if (m_doLogging)
+        {
+            printf("SetIsVarArg.\n");
+        }
+    }
+
+    void SetWantsReportOnlyLeaf()
+    {
+        m_gcInfoEncoder->SetWantsReportOnlyLeaf();
+        if (m_doLogging)
+        {
+            printf("Set WantsReportOnlyLeaf.\n");
+        }
+    }
+
+    void SetSizeOfStackOutgoingAndScratchArea(UINT32 size)
+    {
+        m_gcInfoEncoder->SetSizeOfStackOutgoingAndScratchArea(size);
+        if (m_doLogging)
+        {
+            printf("Set Outgoing stack arg area size to %d.\n", size);
+        }
+    }
+};
+
+#define GCENCODER_WITH_LOGGING(withLog, realEncoder)                                                                   \
+    GcInfoEncoderWithLogging  withLog##Var(realEncoder, compiler->verbose || compiler->opts.dspGCtbls);                \
+    GcInfoEncoderWithLogging* withLog = &withLog##Var;
+
+#else // DEBUG
+
+#define GCENCODER_WITH_LOGGING(withLog, realEncoder) GcInfoEncoder* withLog = realEncoder;
+
+#endif // DEBUG
+
+ReturnKind GCTypeToReturnKind(CorInfoGCType gcType)
+{
+
+    switch (gcType)
+    {
+        case TYPE_GC_NONE:
+            return RT_Scalar;
+        case TYPE_GC_REF:
+            return RT_Object;
+        case TYPE_GC_BYREF:
+            return RT_ByRef;
+        default:
+            _ASSERTE(!"TYP_GC_OTHER is unexpected");
+            return RT_Illegal;
+    }
+}
+
+void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSize, unsigned prologSize)
+{
+#ifdef DEBUG
+    if (compiler->verbose)
+    {
+        printf("*************** In gcInfoBlockHdrSave()\n");
+    }
+#endif
+
+    GCENCODER_WITH_LOGGING(gcInfoEncoderWithLog, gcInfoEncoder);
+
+    // Can't create tables if we've not saved code.
+
+    gcInfoEncoderWithLog->SetCodeLength(methodSize);
+
+    ReturnKind returnKind = RT_Illegal;
+
+    switch (compiler->info.compRetType)
+    {
+        case TYP_REF:
+        case TYP_ARRAY:
+            returnKind = RT_Object;
+            break;
+        case TYP_BYREF:
+            returnKind = RT_ByRef;
+            break;
+        case TYP_STRUCT:
+        {
+            CORINFO_CLASS_HANDLE structType = compiler->info.compMethodInfo->args.retTypeClass;
+            var_types retType = compiler->getReturnTypeForStruct(structType);
+
+            switch (retType)
+            {
+            case TYP_ARRAY:
+                _ASSERTE(false && "TYP_ARRAY unexpected from getReturnTypeForStruct()");
+
+            case TYP_REF:
+                returnKind = RT_Object;
+                break;
+
+            case TYP_BYREF:
+                returnKind = RT_ByRef;
+                break;
+
+            case TYP_STRUCT:
+                if (compiler->IsHfa(structType))
+                {
+                    returnKind = RT_Scalar;
+                }
+                else
+                {
+                    // Multi-reg return
+                    BYTE gcPtrs[2] = { TYPE_GC_NONE, TYPE_GC_NONE };
+                    compiler->info.compCompHnd->getClassGClayout(structType, gcPtrs);
+
+                    ReturnKind first = GCTypeToReturnKind((CorInfoGCType)gcPtrs[0]);
+                    ReturnKind second = GCTypeToReturnKind((CorInfoGCType)gcPtrs[1]);
+
+                    returnKind = GetStructReturnKind(first, second);
+                }
+                break;
+
+            default:
+                returnKind = RT_Scalar;
+                break;
+            }
+            break;
+        }
+        default:
+            returnKind = RT_Scalar;
+    }
+
+    _ASSERTE(returnKind != RT_Illegal);
+    gcInfoEncoderWithLog->SetReturnKind(returnKind);
+
+    if (compiler->isFramePointerUsed())
+    {
+        gcInfoEncoderWithLog->SetStackBaseRegister(REG_FPBASE);
+    }
+
+    if (compiler->info.compIsVarArgs)
+    {
+        gcInfoEncoderWithLog->SetIsVarArg();
+    }
+    // No equivalents.
+    // header->profCallbacks = compiler->info.compProfilerCallback;
+    // header->editNcontinue = compiler->opts.compDbgEnC;
+    //
+    if (compiler->lvaReportParamTypeArg())
+    {
+        // The predicate above is true only if there is an extra generic context parameter, not for
+        // the case where the generic context is provided by "this."
+        assert(compiler->info.compTypeCtxtArg != BAD_VAR_NUM);
+        GENERIC_CONTEXTPARAM_TYPE ctxtParamType = GENERIC_CONTEXTPARAM_NONE;
+        switch (compiler->info.compMethodInfo->options & CORINFO_GENERICS_CTXT_MASK)
+        {
+            case CORINFO_GENERICS_CTXT_FROM_METHODDESC:
+                ctxtParamType = GENERIC_CONTEXTPARAM_MD;
+                break;
+            case CORINFO_GENERICS_CTXT_FROM_METHODTABLE:
+                ctxtParamType = GENERIC_CONTEXTPARAM_MT;
+                break;
+
+            case CORINFO_GENERICS_CTXT_FROM_THIS: // See comment above.
+            default:
+                // If we have a generic context parameter, then we should have
+                // one of the two options flags handled above.
+                assert(false);
+        }
+
+        gcInfoEncoderWithLog->SetGenericsInstContextStackSlot(
+            compiler->lvaToCallerSPRelativeOffset(compiler->lvaCachedGenericContextArgOffset(),
+                                                  compiler->isFramePointerUsed()),
+            ctxtParamType);
+    }
+    // As discussed above, handle the case where the generics context is obtained via
+    // the method table of "this".
+    else if (compiler->lvaKeepAliveAndReportThis())
+    {
+        assert(compiler->info.compThisArg != BAD_VAR_NUM);
+        gcInfoEncoderWithLog->SetGenericsInstContextStackSlot(
+            compiler->lvaToCallerSPRelativeOffset(compiler->lvaCachedGenericContextArgOffset(),
+                                                  compiler->isFramePointerUsed()),
+            GENERIC_CONTEXTPARAM_THIS);
+    }
+
+    if (compiler->getNeedsGSSecurityCookie())
+    {
+        assert(compiler->lvaGSSecurityCookie != BAD_VAR_NUM);
+
+        // The lv offset is FP-relative, and the using code expects caller-sp relative, so translate.
+        // The code offset ranges assume that the GS Cookie slot is initialized in the prolog, and is valid
+        // through the remainder of the method.  We will not query for the GS Cookie while we're in an epilog,
+        // so the question of where in the epilog it becomes invalid is moot.
+        gcInfoEncoderWithLog->SetGSCookieStackSlot(compiler->lvaGetCallerSPRelativeOffset(
+                                                       compiler->lvaGSSecurityCookie),
+                                                   prologSize, methodSize);
+    }
+    else if (compiler->opts.compNeedSecurityCheck || compiler->lvaReportParamTypeArg() ||
+             compiler->lvaKeepAliveAndReportThis())
+    {
+        gcInfoEncoderWithLog->SetPrologSize(prologSize);
+    }
+
+    if (compiler->opts.compNeedSecurityCheck)
+    {
+        assert(compiler->lvaSecurityObject != BAD_VAR_NUM);
+
+        // A VM requirement due to how the decoder works (it ignores partially interruptible frames when
+        // an exception has escaped, but the VM requires the security object to live on).
+        assert(compiler->codeGen->genInterruptible);
+
+        // The lv offset is FP-relative, and the using code expects caller-sp relative, so translate.
+        // The normal GC lifetime reporting mechanisms will report a proper lifetime to the GC.
+        // The security subsystem can safely assume that anywhere it might walk the stack, it will be
+        // valid (null or a live GC ref).
+        gcInfoEncoderWithLog->SetSecurityObjectStackSlot(
+            compiler->lvaGetCallerSPRelativeOffset(compiler->lvaSecurityObject));
+    }
+
+#if FEATURE_EH_FUNCLETS
+    if (compiler->ehNeedsPSPSym())
+    {
+        assert(compiler->lvaPSPSym != BAD_VAR_NUM);
+
+#ifdef _TARGET_AMD64_
+        // The PSPSym is relative to InitialSP on X64 and CallerSP on other platforms.
+        gcInfoEncoderWithLog->SetPSPSymStackSlot(compiler->lvaGetInitialSPRelativeOffset(compiler->lvaPSPSym));
+#else  // !_TARGET_AMD64_
+        gcInfoEncoderWithLog->SetPSPSymStackSlot(compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym));
+#endif // !_TARGET_AMD64_
+    }
+
+    if (compiler->ehAnyFunclets())
+    {
+        // Set this to avoid double-reporting the parent frame (unlike JIT64)
+        gcInfoEncoderWithLog->SetWantsReportOnlyLeaf();
+    }
+#endif // FEATURE_EH_FUNCLETS
+
+    // outgoing stack area size
+    gcInfoEncoderWithLog->SetSizeOfStackOutgoingAndScratchArea(compiler->lvaOutgoingArgSpaceSize);
+
+#if DISPLAY_SIZES
+
+    if (compiler->codeGen->genInterruptible)
+    {
+        genMethodICnt++;
+    }
+    else
+    {
+        genMethodNCnt++;
+    }
+
+#endif // DISPLAY_SIZES
+}
+
+#ifdef DEBUG
+#define Encoder GcInfoEncoderWithLogging
+#else
+#define Encoder GcInfoEncoder
+#endif
+
+// Small helper class to handle the No-GC-Interrupt callbacks
+// when reporting interruptible ranges.
+//
+// Encoder should be either GcInfoEncoder or GcInfoEncoderWithLogging
+//
+struct InterruptibleRangeReporter
+{
+    unsigned prevStart;
+    Encoder* gcInfoEncoderWithLog;
+
+    InterruptibleRangeReporter(unsigned _prevStart, Encoder* _gcInfo)
+        : prevStart(_prevStart), gcInfoEncoderWithLog(_gcInfo)
+    {
+    }
+
+    // This callback is called for each insGroup marked with
+    // IGF_NOGCINTERRUPT (currently just prologs and epilogs).
+    // Report everything between the previous region and the current
+    // region as interruptible.
+
+    bool operator()(unsigned igFuncIdx, unsigned igOffs, unsigned igSize)
+    {
+        if (igOffs < prevStart)
+        {
+            // We're still in the main method prolog, which has already
+            // had it's interruptible range reported.
+            assert(igFuncIdx == 0);
+            assert(igOffs + igSize <= prevStart);
+            return true;
+        }
+
+        assert(igOffs >= prevStart);
+        if (igOffs > prevStart)
+        {
+            gcInfoEncoderWithLog->DefineInterruptibleRange(prevStart, igOffs - prevStart);
+        }
+        prevStart = igOffs + igSize;
+        return true;
+    }
+};
+
+void GCInfo::gcMakeRegPtrTable(GcInfoEncoder* gcInfoEncoder,
+                               unsigned       codeSize,
+                               unsigned       prologSize,
+                               MakeRegPtrMode mode)
+{
+    GCENCODER_WITH_LOGGING(gcInfoEncoderWithLog, gcInfoEncoder);
+
+    if (mode == MAKE_REG_PTR_MODE_ASSIGN_SLOTS)
+    {
+        m_regSlotMap   = new (compiler->getAllocator()) RegSlotMap(compiler->getAllocator());
+        m_stackSlotMap = new (compiler->getAllocator()) StackSlotMap(compiler->getAllocator());
+    }
+
+    /**************************************************************************
+     *
+     *                      Untracked ptr variables
+     *
+     **************************************************************************
+     */
+
+    unsigned count = 0;
+
+    int lastoffset = 0;
+
+    /* Count&Write untracked locals and non-enregistered args */
+
+    unsigned   varNum;
+    LclVarDsc* varDsc;
+    for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+    {
+        if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+        {
+            // Field local of a PROMOTION_TYPE_DEPENDENT struct must have been
+            // reported through its parent local.
+            continue;
+        }
+
+        if (varTypeIsGC(varDsc->TypeGet()))
+        {
+            // Do we have an argument or local variable?
+            if (!varDsc->lvIsParam)
+            {
+                // If is is pinned, it must be an untracked local.
+                assert(!varDsc->lvPinned || !varDsc->lvTracked);
+
+                if (varDsc->lvTracked || !varDsc->lvOnFrame)
+                {
+                    continue;
+                }
+            }
+            else
+            {
+                // Stack-passed arguments which are not enregistered
+                // are always reported in this "untracked stack
+                // pointers" section of the GC info even if lvTracked==true
+
+                // Has this argument been fully enregistered?
+                CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef LEGACY_BACKEND
+                if (!varDsc->lvOnFrame)
+#else  // LEGACY_BACKEND
+                if (varDsc->lvRegister)
+#endif // LEGACY_BACKEND
+                {
+                    // If a CEE_JMP has been used, then we need to report all the arguments
+                    // even if they are enregistered, since we will be using this value
+                    // in a JMP call.  Note that this is subtle as we require that
+                    // argument offsets are always fixed up properly even if lvRegister
+                    // is set.
+                    if (!compiler->compJmpOpUsed)
+                    {
+                        continue;
+                    }
+                }
+                else
+                {
+                    if (!varDsc->lvOnFrame)
+                    {
+                        // If this non-enregistered pointer arg is never
+                        // used, we don't need to report it.
+                        assert(varDsc->lvRefCnt == 0);
+                        continue;
+                    }
+                    else if (varDsc->lvIsRegArg && varDsc->lvTracked)
+                    {
+                        // If this register-passed arg is tracked, then
+                        // it has been allocated space near the other
+                        // pointer variables and we have accurate life-
+                        // time info. It will be reported with
+                        // gcVarPtrList in the "tracked-pointer" section.
+                        continue;
+                    }
+                }
+            }
+
+            // If we haven't continued to the next variable, we should report this as an untracked local.
+            CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if DOUBLE_ALIGN
+            // For genDoubleAlign(), locals are addressed relative to ESP and
+            // arguments are addressed relative to EBP.
+
+            if (genDoubleAlign() && varDsc->lvIsParam && !varDsc->lvIsRegArg)
+                offset += compiler->codeGen->genTotalFrameSize();
+#endif
+            GcSlotFlags flags = GC_SLOT_UNTRACKED;
+
+            if (varDsc->TypeGet() == TYP_BYREF)
+            {
+                // Or in byref_OFFSET_FLAG for 'byref' pointer tracking
+                flags = (GcSlotFlags)(flags | GC_SLOT_INTERIOR);
+            }
+
+            if (varDsc->lvPinned)
+            {
+                // Or in pinned_OFFSET_FLAG for 'pinned' pointer tracking
+                flags = (GcSlotFlags)(flags | GC_SLOT_PINNED);
+            }
+            GcStackSlotBase stackSlotBase = GC_SP_REL;
+            if (varDsc->lvFramePointerBased)
+            {
+                stackSlotBase = GC_FRAMEREG_REL;
+            }
+            StackSlotIdKey sskey(varDsc->lvStkOffs, (stackSlotBase == GC_FRAMEREG_REL), flags);
+            GcSlotId       varSlotId;
+            if (mode == MAKE_REG_PTR_MODE_ASSIGN_SLOTS)
+            {
+                if (!m_stackSlotMap->Lookup(sskey, &varSlotId))
+                {
+                    varSlotId = gcInfoEncoderWithLog->GetStackSlotId(varDsc->lvStkOffs, flags, stackSlotBase);
+                    m_stackSlotMap->Set(sskey, varSlotId);
+                }
+            }
+        }
+
+        // If this is a TYP_STRUCT, handle its GC pointers.
+        // Note that the enregisterable struct types cannot have GC pointers in them.
+        if ((varDsc->lvType == TYP_STRUCT) && varDsc->lvOnFrame && (varDsc->lvExactSize >= TARGET_POINTER_SIZE))
+        {
+            unsigned slots  = compiler->lvaLclSize(varNum) / sizeof(void*);
+            BYTE*    gcPtrs = compiler->lvaGetGcLayout(varNum);
+
+            // walk each member of the array
+            for (unsigned i = 0; i < slots; i++)
+            {
+                if (gcPtrs[i] == TYPE_GC_NONE)
+                { // skip non-gc slots
+                    continue;
+                }
+
+                int offset = varDsc->lvStkOffs + i * sizeof(void*);
+#if DOUBLE_ALIGN
+                // For genDoubleAlign(), locals are addressed relative to ESP and
+                // arguments are addressed relative to EBP.
+
+                if (genDoubleAlign() && varDsc->lvIsParam && !varDsc->lvIsRegArg)
+                    offset += compiler->codeGen->genTotalFrameSize();
+#endif
+                GcSlotFlags flags = GC_SLOT_UNTRACKED;
+                if (gcPtrs[i] == TYPE_GC_BYREF)
+                {
+                    flags = (GcSlotFlags)(flags | GC_SLOT_INTERIOR);
+                }
+
+                GcStackSlotBase stackSlotBase = GC_SP_REL;
+                if (varDsc->lvFramePointerBased)
+                {
+                    stackSlotBase = GC_FRAMEREG_REL;
+                }
+                StackSlotIdKey sskey(offset, (stackSlotBase == GC_FRAMEREG_REL), flags);
+                GcSlotId       varSlotId;
+                if (mode == MAKE_REG_PTR_MODE_ASSIGN_SLOTS)
+                {
+                    if (!m_stackSlotMap->Lookup(sskey, &varSlotId))
+                    {
+                        varSlotId = gcInfoEncoderWithLog->GetStackSlotId(offset, flags, stackSlotBase);
+                        m_stackSlotMap->Set(sskey, varSlotId);
+                    }
+                }
+            }
+        }
+    }
+
+    if (mode == MAKE_REG_PTR_MODE_ASSIGN_SLOTS)
+    {
+        // Count&Write spill temps that hold pointers.
+
+        assert(compiler->tmpAllFree());
+        for (TempDsc* tempItem = compiler->tmpListBeg(); tempItem != nullptr; tempItem = compiler->tmpListNxt(tempItem))
+        {
+            if (varTypeIsGC(tempItem->tdTempType()))
+            {
+                int offset = tempItem->tdTempOffs();
+
+                GcSlotFlags flags = GC_SLOT_UNTRACKED;
+                if (tempItem->tdTempType() == TYP_BYREF)
+                {
+                    flags = (GcSlotFlags)(flags | GC_SLOT_INTERIOR);
+                }
+
+                GcStackSlotBase stackSlotBase = GC_SP_REL;
+                if (compiler->isFramePointerUsed())
+                {
+                    stackSlotBase = GC_FRAMEREG_REL;
+                }
+                StackSlotIdKey sskey(offset, (stackSlotBase == GC_FRAMEREG_REL), flags);
+                GcSlotId       varSlotId;
+                if (!m_stackSlotMap->Lookup(sskey, &varSlotId))
+                {
+                    varSlotId = gcInfoEncoderWithLog->GetStackSlotId(offset, flags, stackSlotBase);
+                    m_stackSlotMap->Set(sskey, varSlotId);
+                }
+            }
+        }
+
+        if (compiler->lvaKeepAliveAndReportThis())
+        {
+            // We need to report the cached copy as an untracked pointer
+            assert(compiler->info.compThisArg != BAD_VAR_NUM);
+            assert(!compiler->lvaReportParamTypeArg());
+            GcSlotFlags flags = GC_SLOT_UNTRACKED;
+
+            if (compiler->lvaTable[compiler->info.compThisArg].TypeGet() == TYP_BYREF)
+            {
+                // Or in GC_SLOT_INTERIOR for 'byref' pointer tracking
+                flags = (GcSlotFlags)(flags | GC_SLOT_INTERIOR);
+            }
+
+            GcStackSlotBase stackSlotBase = compiler->isFramePointerUsed() ? GC_FRAMEREG_REL : GC_SP_REL;
+
+            gcInfoEncoderWithLog->GetStackSlotId(compiler->lvaCachedGenericContextArgOffset(), flags, stackSlotBase);
+        }
+    }
+
+    // Generate the table of tracked stack pointer variable lifetimes.
+    gcMakeVarPtrTable(gcInfoEncoder, mode);
+
+    /**************************************************************************
+     *
+     * Prepare to generate the pointer register/argument map
+     *
+     **************************************************************************
+     */
+
+    if (compiler->codeGen->genInterruptible)
+    {
+        assert(compiler->genFullPtrRegMap);
+
+        regMaskSmall ptrRegs          = 0;
+        regPtrDsc*   regStackArgFirst = nullptr;
+
+        // Walk the list of pointer register/argument entries.
+
+        for (regPtrDsc* genRegPtrTemp = gcRegPtrList; genRegPtrTemp != nullptr; genRegPtrTemp = genRegPtrTemp->rpdNext)
+        {
+            int nextOffset = genRegPtrTemp->rpdOffs;
+
+            if (genRegPtrTemp->rpdArg)
+            {
+                if (genRegPtrTemp->rpdArgTypeGet() == rpdARG_KILL)
+                {
+                    // Kill all arguments for a call
+                    if ((mode == MAKE_REG_PTR_MODE_DO_WORK) && (regStackArgFirst != nullptr))
+                    {
+                        // Record any outgoing arguments as becoming dead
+                        gcInfoRecordGCStackArgsDead(gcInfoEncoder, genRegPtrTemp->rpdOffs, regStackArgFirst,
+                                                    genRegPtrTemp);
+                    }
+                    regStackArgFirst = nullptr;
+                }
+                else if (genRegPtrTemp->rpdGCtypeGet() != GCT_NONE)
+                {
+                    if (genRegPtrTemp->rpdArgTypeGet() == rpdARG_PUSH || (genRegPtrTemp->rpdPtrArg != 0))
+                    {
+                        bool isPop = genRegPtrTemp->rpdArgTypeGet() == rpdARG_POP;
+                        assert(!isPop);
+                        gcInfoRecordGCStackArgLive(gcInfoEncoder, mode, genRegPtrTemp);
+                        if (regStackArgFirst == nullptr)
+                        {
+                            regStackArgFirst = genRegPtrTemp;
+                        }
+                    }
+                    else
+                    {
+                        // We know it's a POP.  Sometimes we'll record a POP for a call, just to make sure
+                        // the call site is recorded.
+                        // This is just the negation of the condition:
+                        assert(genRegPtrTemp->rpdArgTypeGet() == rpdARG_POP && genRegPtrTemp->rpdPtrArg == 0);
+                        // This asserts that we only get here when we're recording a call site.
+                        assert(genRegPtrTemp->rpdArg && genRegPtrTemp->rpdIsCallInstr());
+
+                        // Kill all arguments for a call
+                        if ((mode == MAKE_REG_PTR_MODE_DO_WORK) && (regStackArgFirst != nullptr))
+                        {
+                            // Record any outgoing arguments as becoming dead
+                            gcInfoRecordGCStackArgsDead(gcInfoEncoder, genRegPtrTemp->rpdOffs, regStackArgFirst,
+                                                        genRegPtrTemp);
+                        }
+                        regStackArgFirst = nullptr;
+                    }
+                }
+            }
+            else
+            {
+                // Record any registers that are becoming dead.
+
+                regMaskSmall regMask   = genRegPtrTemp->rpdCompiler.rpdDel & ptrRegs;
+                regMaskSmall byRefMask = 0;
+                if (genRegPtrTemp->rpdGCtypeGet() == GCT_BYREF)
+                {
+                    byRefMask = regMask;
+                }
+                gcInfoRecordGCRegStateChange(gcInfoEncoder, mode, genRegPtrTemp->rpdOffs, regMask, GC_SLOT_DEAD,
+                                             byRefMask, &ptrRegs);
+
+                // Record any registers that are becoming live.
+                regMask   = genRegPtrTemp->rpdCompiler.rpdAdd & ~ptrRegs;
+                byRefMask = 0;
+                // As far as I (DLD, 2010) can tell, there's one GCtype for the entire genRegPtrTemp, so if
+                // it says byref then all the registers in "regMask" contain byrefs.
+                if (genRegPtrTemp->rpdGCtypeGet() == GCT_BYREF)
+                {
+                    byRefMask = regMask;
+                }
+                gcInfoRecordGCRegStateChange(gcInfoEncoder, mode, genRegPtrTemp->rpdOffs, regMask, GC_SLOT_LIVE,
+                                             byRefMask, &ptrRegs);
+            }
+        }
+
+        // Now we can declare the entire method body fully interruptible.
+        if (mode == MAKE_REG_PTR_MODE_DO_WORK)
+        {
+            assert(prologSize <= codeSize);
+
+            // Now exempt any other region marked as IGF_NOGCINTERRUPT
+            // Currently just prologs and epilogs.
+
+            InterruptibleRangeReporter reporter(prologSize, gcInfoEncoderWithLog);
+            compiler->getEmitter()->emitGenNoGCLst(reporter);
+            prologSize = reporter.prevStart;
+
+            // Report any remainder
+            if (prologSize < codeSize)
+            {
+                gcInfoEncoderWithLog->DefineInterruptibleRange(prologSize, codeSize - prologSize);
+            }
+        }
+    }
+    else if (compiler->isFramePointerUsed()) // genInterruptible is false, and we're using EBP as a frame pointer.
+    {
+        assert(compiler->genFullPtrRegMap == false);
+
+        // Walk the list of pointer register/argument entries.
+
+        // First count them.
+        unsigned numCallSites = 0;
+
+        // Now we can allocate the information.
+        unsigned* pCallSites     = nullptr;
+        BYTE*     pCallSiteSizes = nullptr;
+        unsigned  callSiteNum    = 0;
+
+        if (mode == MAKE_REG_PTR_MODE_DO_WORK)
+        {
+            if (gcCallDescList != nullptr)
+            {
+                for (CallDsc* call = gcCallDescList; call != nullptr; call = call->cdNext)
+                {
+                    numCallSites++;
+                }
+                pCallSites     = new (compiler, CMK_GC) unsigned[numCallSites];
+                pCallSiteSizes = new (compiler, CMK_GC) BYTE[numCallSites];
+            }
+        }
+
+        // Now consider every call.
+        for (CallDsc* call = gcCallDescList; call != nullptr; call = call->cdNext)
+        {
+            if (mode == MAKE_REG_PTR_MODE_DO_WORK)
+            {
+                pCallSites[callSiteNum]     = call->cdOffs - call->cdCallInstrSize;
+                pCallSiteSizes[callSiteNum] = call->cdCallInstrSize;
+                callSiteNum++;
+            }
+
+            unsigned nextOffset;
+
+            // Figure out the code offset of this entry.
+            nextOffset = call->cdOffs;
+
+            // As far as I (DLD, 2010) can determine by asking around, the "call->u1.cdArgMask"
+            // and "cdArgCnt" cases are to handle x86 situations in which a call expression is nested as an
+            // argument to an outer call.  The "natural" (evaluation-order-preserving) thing to do is to
+            // evaluate the outer call's arguments, pushing those that are not enregistered, until you
+            // encounter the nested call.  These parts of the call description, then, describe the "pending"
+            // pushed arguments.  This situation does not exist outside of x86, where we're going to use a
+            // fixed-size stack frame: in situations like this nested call, we would evaluate the pending
+            // arguments to temporaries, and only "push" them (really, write them to the outgoing argument section
+            // of the stack frame) when it's the outer call's "turn."  So we can assert that these
+            // situations never occur.
+            assert(call->u1.cdArgMask == 0 && call->cdArgCnt == 0);
+
+            // Other than that, we just have to deal with the regmasks.
+            regMaskSmall gcrefRegMask = call->cdGCrefRegs & RBM_CALLEE_SAVED;
+            regMaskSmall byrefRegMask = call->cdByrefRegs & RBM_CALLEE_SAVED;
+
+            assert((gcrefRegMask & byrefRegMask) == 0);
+
+            regMaskSmall regMask = gcrefRegMask | byrefRegMask;
+
+            assert(call->cdOffs >= call->cdCallInstrSize);
+            // call->cdOffs is actually the offset of the instruction *following* the call, so subtract
+            // the call instruction size to get the offset of the actual call instruction...
+            unsigned callOffset = call->cdOffs - call->cdCallInstrSize;
+            // Record that these registers are live before the call...
+            gcInfoRecordGCRegStateChange(gcInfoEncoder, mode, callOffset, regMask, GC_SLOT_LIVE, byrefRegMask, nullptr);
+            // ...and dead after.
+            gcInfoRecordGCRegStateChange(gcInfoEncoder, mode, call->cdOffs, regMask, GC_SLOT_DEAD, byrefRegMask,
+                                         nullptr);
+        }
+        // OK, define the call sites.
+        if (mode == MAKE_REG_PTR_MODE_DO_WORK)
+        {
+            gcInfoEncoderWithLog->DefineCallSites(pCallSites, pCallSiteSizes, numCallSites);
+        }
+    }
+    else // genInterruptible is false and we have an EBP-less frame
+    {
+        assert(compiler->genFullPtrRegMap);
+
+        // Walk the list of pointer register/argument entries */
+        // First count them.
+        unsigned numCallSites = 0;
+
+        // Now we can allocate the information (if we're in the "DO_WORK" pass...)
+        unsigned* pCallSites     = nullptr;
+        BYTE*     pCallSiteSizes = nullptr;
+        unsigned  callSiteNum    = 0;
+
+        if (mode == MAKE_REG_PTR_MODE_DO_WORK)
+        {
+            for (regPtrDsc* genRegPtrTemp = gcRegPtrList; genRegPtrTemp != nullptr;
+                 genRegPtrTemp            = genRegPtrTemp->rpdNext)
+            {
+                if (genRegPtrTemp->rpdArg && genRegPtrTemp->rpdIsCallInstr())
+                {
+                    numCallSites++;
+                }
+            }
+
+            if (numCallSites > 0)
+            {
+                pCallSites     = new (compiler, CMK_GC) unsigned[numCallSites];
+                pCallSiteSizes = new (compiler, CMK_GC) BYTE[numCallSites];
+            }
+        }
+
+        for (regPtrDsc* genRegPtrTemp = gcRegPtrList; genRegPtrTemp != nullptr; genRegPtrTemp = genRegPtrTemp->rpdNext)
+        {
+            if (genRegPtrTemp->rpdArg)
+            {
+                // Is this a call site?
+                if (genRegPtrTemp->rpdIsCallInstr())
+                {
+                    // This is a true call site.
+
+                    regMaskSmall gcrefRegMask = genRegMaskFromCalleeSavedMask(genRegPtrTemp->rpdCallGCrefRegs);
+
+                    regMaskSmall byrefRegMask = genRegMaskFromCalleeSavedMask(genRegPtrTemp->rpdCallByrefRegs);
+
+                    assert((gcrefRegMask & byrefRegMask) == 0);
+
+                    regMaskSmall regMask = gcrefRegMask | byrefRegMask;
+
+                    // The "rpdOffs" is (apparently) the offset of the following instruction already.
+                    // GcInfoEncoder wants the call instruction, so subtract the width of the call instruction.
+                    assert(genRegPtrTemp->rpdOffs >= genRegPtrTemp->rpdCallInstrSize);
+                    unsigned callOffset = genRegPtrTemp->rpdOffs - genRegPtrTemp->rpdCallInstrSize;
+
+                    // Tell the GCInfo encoder about these registers.  We say that the registers become live
+                    // before the call instruction, and dead after.
+                    gcInfoRecordGCRegStateChange(gcInfoEncoder, mode, callOffset, regMask, GC_SLOT_LIVE, byrefRegMask,
+                                                 nullptr);
+                    gcInfoRecordGCRegStateChange(gcInfoEncoder, mode, genRegPtrTemp->rpdOffs, regMask, GC_SLOT_DEAD,
+                                                 byrefRegMask, nullptr);
+
+                    // Also remember the call site.
+                    if (mode == MAKE_REG_PTR_MODE_DO_WORK)
+                    {
+                        assert(pCallSites != nullptr && pCallSiteSizes != nullptr);
+                        pCallSites[callSiteNum]     = callOffset;
+                        pCallSiteSizes[callSiteNum] = genRegPtrTemp->rpdCallInstrSize;
+                        callSiteNum++;
+                    }
+                }
+                else
+                {
+                    // These are reporting outgoing stack arguments, but we don't need to report anything
+                    // for partially interruptible
+                    assert(genRegPtrTemp->rpdGCtypeGet() != GCT_NONE);
+                    assert(genRegPtrTemp->rpdArgTypeGet() == rpdARG_PUSH);
+                }
+            }
+        }
+        // The routine is fully interruptible.
+        if (mode == MAKE_REG_PTR_MODE_DO_WORK)
+        {
+            gcInfoEncoderWithLog->DefineCallSites(pCallSites, pCallSiteSizes, numCallSites);
+        }
+    }
+}
+
+void GCInfo::gcInfoRecordGCRegStateChange(GcInfoEncoder* gcInfoEncoder,
+                                          MakeRegPtrMode mode,
+                                          unsigned       instrOffset,
+                                          regMaskSmall   regMask,
+                                          GcSlotState    newState,
+                                          regMaskSmall   byRefMask,
+                                          regMaskSmall*  pPtrRegs)
+{
+    // Precondition: byRefMask is a subset of regMask.
+    assert((byRefMask & ~regMask) == 0);
+
+    GCENCODER_WITH_LOGGING(gcInfoEncoderWithLog, gcInfoEncoder);
+
+    while (regMask)
+    {
+        // Get hold of the next register bit.
+        regMaskTP tmpMask = genFindLowestReg(regMask);
+        assert(tmpMask);
+
+        // Remember the new state of this register.
+        if (pPtrRegs != nullptr)
+        {
+            if (newState == GC_SLOT_DEAD)
+            {
+                *pPtrRegs &= ~tmpMask;
+            }
+            else
+            {
+                *pPtrRegs |= tmpMask;
+            }
+        }
+
+        // Figure out which register the next bit corresponds to.
+        regNumber regNum = genRegNumFromMask(tmpMask);
+
+        /* Reserve SP future use */
+        assert(regNum != REG_SPBASE);
+
+        GcSlotFlags regFlags = GC_SLOT_BASE;
+        if ((tmpMask & byRefMask) != 0)
+        {
+            regFlags = (GcSlotFlags)(regFlags | GC_SLOT_INTERIOR);
+        }
+
+        RegSlotIdKey rskey(regNum, regFlags);
+        GcSlotId     regSlotId;
+        if (mode == MAKE_REG_PTR_MODE_ASSIGN_SLOTS)
+        {
+            if (!m_regSlotMap->Lookup(rskey, &regSlotId))
+            {
+                regSlotId = gcInfoEncoderWithLog->GetRegisterSlotId(regNum, regFlags);
+                m_regSlotMap->Set(rskey, regSlotId);
+            }
+        }
+        else
+        {
+            BOOL b = m_regSlotMap->Lookup(rskey, &regSlotId);
+            assert(b); // Should have been added in the first pass.
+            gcInfoEncoderWithLog->SetSlotState(instrOffset, regSlotId, newState);
+        }
+
+        // Turn the bit we've just generated off and continue.
+        regMask -= tmpMask; // EAX,ECX,EDX,EBX,---,EBP,ESI,EDI
+    }
+}
+
+/**************************************************************************
+ *
+ *  gcMakeVarPtrTable - Generate the table of tracked stack pointer
+ *      variable lifetimes.
+ *
+ *  In the first pass we'll allocate slot Ids
+ *  In the second pass we actually generate the lifetimes.
+ *
+ **************************************************************************
+ */
+
+void GCInfo::gcMakeVarPtrTable(GcInfoEncoder* gcInfoEncoder, MakeRegPtrMode mode)
+{
+    GCENCODER_WITH_LOGGING(gcInfoEncoderWithLog, gcInfoEncoder);
+
+    // Make sure any flags we hide in the offset are in the bits guaranteed
+    // unused by alignment
+    C_ASSERT((OFFSET_MASK + 1) <= sizeof(int));
+
+#ifdef DEBUG
+    if (mode == MAKE_REG_PTR_MODE_ASSIGN_SLOTS)
+    {
+        // Tracked variables can't be pinned, and the encoding takes
+        // advantage of that by using the same bit for 'pinned' and 'this'
+        // Since we don't track 'this', we should never see either flag here.
+        // Check it now before we potentially add some pinned flags.
+        for (varPtrDsc* varTmp = gcVarPtrList; varTmp != nullptr; varTmp = varTmp->vpdNext)
+        {
+            const unsigned flags = varTmp->vpdVarNum & OFFSET_MASK;
+            assert((flags & pinned_OFFSET_FLAG) == 0);
+            assert((flags & this_OFFSET_FLAG) == 0);
+        }
+    }
+#endif // DEBUG
+
+    // Only need to do this once, and only if we have EH.
+    if ((mode == MAKE_REG_PTR_MODE_ASSIGN_SLOTS) && compiler->ehAnyFunclets())
+    {
+        gcMarkFilterVarsPinned();
+    }
+
+    for (varPtrDsc* varTmp = gcVarPtrList; varTmp != nullptr; varTmp = varTmp->vpdNext)
+    {
+        C_ASSERT((OFFSET_MASK + 1) <= sizeof(int));
+
+        // Get hold of the variable's stack offset.
+
+        unsigned lowBits = varTmp->vpdVarNum & OFFSET_MASK;
+
+        // For negative stack offsets we must reset the low bits
+        int varOffs = static_cast<int>(varTmp->vpdVarNum & ~OFFSET_MASK);
+
+        // Compute the actual lifetime offsets.
+        unsigned begOffs = varTmp->vpdBegOfs;
+        unsigned endOffs = varTmp->vpdEndOfs;
+
+        // Special case: skip any 0-length lifetimes.
+        if (endOffs == begOffs)
+        {
+            continue;
+        }
+
+        GcSlotFlags flags = GC_SLOT_BASE;
+        if ((lowBits & byref_OFFSET_FLAG) != 0)
+        {
+            flags = (GcSlotFlags)(flags | GC_SLOT_INTERIOR);
+        }
+        if ((lowBits & pinned_OFFSET_FLAG) != 0)
+        {
+            flags = (GcSlotFlags)(flags | GC_SLOT_PINNED);
+        }
+
+        GcStackSlotBase stackSlotBase = GC_SP_REL;
+        if (compiler->isFramePointerUsed())
+        {
+            stackSlotBase = GC_FRAMEREG_REL;
+        }
+        StackSlotIdKey sskey(varOffs, (stackSlotBase == GC_FRAMEREG_REL), flags);
+        GcSlotId       varSlotId;
+        if (mode == MAKE_REG_PTR_MODE_ASSIGN_SLOTS)
+        {
+            if (!m_stackSlotMap->Lookup(sskey, &varSlotId))
+            {
+                varSlotId = gcInfoEncoderWithLog->GetStackSlotId(varOffs, flags, stackSlotBase);
+                m_stackSlotMap->Set(sskey, varSlotId);
+            }
+        }
+        else
+        {
+            BOOL b = m_stackSlotMap->Lookup(sskey, &varSlotId);
+            assert(b); // Should have been added in the first pass.
+            // Live from the beginning to the end.
+            gcInfoEncoderWithLog->SetSlotState(begOffs, varSlotId, GC_SLOT_LIVE);
+            gcInfoEncoderWithLog->SetSlotState(endOffs, varSlotId, GC_SLOT_DEAD);
+        }
+    }
+}
+
+// gcMarkFilterVarsPinned - Walk all lifetimes and make it so that anything
+//     live in a filter is marked as pinned (often by splitting the lifetime
+//     so that *only* the filter region is pinned).  This should only be
+//     called once (after generating all lifetimes, but before slot ids are
+//     finalized.
+//
+// DevDiv 376329 - The VM has to double report filters and their parent frame
+// because they occur during the 1st pass and the parent frame doesn't go dead
+// until we start unwinding in the 2nd pass.
+//
+// Untracked locals will only be reported in non-filter funclets and the
+// parent.
+// Registers can't be double reported by 2 frames since they're different.
+// That just leaves stack variables which might be double reported.
+//
+// Technically double reporting is only a problem when the GC has to relocate a
+// reference. So we avoid that problem by marking all live tracked stack
+// variables as pinned inside the filter.  Thus if they are double reported, it
+// won't be a problem since they won't be double relocated.
+//
+void GCInfo::gcMarkFilterVarsPinned()
+{
+    assert(compiler->ehAnyFunclets());
+    const EHblkDsc* endHBtab = &(compiler->compHndBBtab[compiler->compHndBBtabCount]);
+
+    for (EHblkDsc* HBtab = compiler->compHndBBtab; HBtab < endHBtab; HBtab++)
+    {
+        if (HBtab->HasFilter())
+        {
+            const UNATIVE_OFFSET filterBeg = compiler->ehCodeOffset(HBtab->ebdFilter);
+            const UNATIVE_OFFSET filterEnd = compiler->ehCodeOffset(HBtab->ebdHndBeg);
+
+            for (varPtrDsc* varTmp = gcVarPtrList; varTmp != nullptr; varTmp = varTmp->vpdNext)
+            {
+                // Get hold of the variable's flags.
+                const unsigned lowBits = varTmp->vpdVarNum & OFFSET_MASK;
+
+                // Compute the actual lifetime offsets.
+                const unsigned begOffs = varTmp->vpdBegOfs;
+                const unsigned endOffs = varTmp->vpdEndOfs;
+
+                // Special case: skip any 0-length lifetimes.
+                if (endOffs == begOffs)
+                {
+                    continue;
+                }
+
+                // Skip lifetimes with no overlap with the filter
+                if ((endOffs <= filterBeg) || (begOffs >= filterEnd))
+                {
+                    continue;
+                }
+
+                // Because there is no nesting within filters, nothing
+                // should be already pinned.
+                assert((lowBits & pinned_OFFSET_FLAG) == 0);
+
+                if (begOffs < filterBeg)
+                {
+                    if (endOffs > filterEnd)
+                    {
+                        // The variable lifetime is starts before AND ends after
+                        // the filter, so we need to create 2 new lifetimes:
+                        //     (1) a pinned one for the filter
+                        //     (2) a regular one for after the filter
+                        // and then adjust the original lifetime to end before
+                        // the filter.
+                        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+                        if (compiler->verbose)
+                        {
+                            printf("Splitting lifetime for filter: [%04X, %04X).\nOld: ", filterBeg, filterEnd);
+                            gcDumpVarPtrDsc(varTmp);
+                        }
+#endif // DEBUG
+
+                        varPtrDsc* desc1 = new (compiler, CMK_GC) varPtrDsc;
+                        desc1->vpdNext   = gcVarPtrList;
+                        desc1->vpdVarNum = varTmp->vpdVarNum | pinned_OFFSET_FLAG;
+                        desc1->vpdBegOfs = filterBeg;
+                        desc1->vpdEndOfs = filterEnd;
+
+                        varPtrDsc* desc2 = new (compiler, CMK_GC) varPtrDsc;
+                        desc2->vpdNext   = desc1;
+                        desc2->vpdVarNum = varTmp->vpdVarNum;
+                        desc2->vpdBegOfs = filterEnd;
+                        desc2->vpdEndOfs = endOffs;
+                        gcVarPtrList     = desc2;
+
+                        varTmp->vpdEndOfs = filterBeg;
+#ifdef DEBUG
+                        if (compiler->verbose)
+                        {
+                            printf("New (1 of 3): ");
+                            gcDumpVarPtrDsc(varTmp);
+                            printf("New (2 of 3): ");
+                            gcDumpVarPtrDsc(desc1);
+                            printf("New (3 of 3): ");
+                            gcDumpVarPtrDsc(desc2);
+                        }
+#endif // DEBUG
+                    }
+                    else
+                    {
+                        // The variable lifetime started before the filter and ends
+                        // somewhere inside it, so we only create 1 new lifetime,
+                        // and then adjust the original lifetime to end before
+                        // the filter.
+                        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+                        if (compiler->verbose)
+                        {
+                            printf("Splitting lifetime for filter.\nOld: ");
+                            gcDumpVarPtrDsc(varTmp);
+                        }
+#endif // DEBUG
+
+                        varPtrDsc* desc = new (compiler, CMK_GC) varPtrDsc;
+                        desc->vpdNext   = gcVarPtrList;
+                        desc->vpdVarNum = varTmp->vpdVarNum | pinned_OFFSET_FLAG;
+                        desc->vpdBegOfs = filterBeg;
+                        desc->vpdEndOfs = endOffs;
+                        gcVarPtrList    = desc;
+
+                        varTmp->vpdEndOfs = filterBeg;
+
+#ifdef DEBUG
+                        if (compiler->verbose)
+                        {
+                            printf("New (1 of 2): ");
+                            gcDumpVarPtrDsc(varTmp);
+                            printf("New (2 of 2): ");
+                            gcDumpVarPtrDsc(desc);
+                        }
+#endif // DEBUG
+                    }
+                }
+                else
+                {
+                    if (endOffs > filterEnd)
+                    {
+                        // The variable lifetime starts inside the filter and
+                        // ends somewhere after it, so we create 1 new
+                        // lifetime for the part inside the filter and adjust
+                        // the start of the original lifetime to be the end
+                        // of the filter
+                        CLANG_FORMAT_COMMENT_ANCHOR;
+#ifdef DEBUG
+                        if (compiler->verbose)
+                        {
+                            printf("Splitting lifetime for filter.\nOld: ");
+                            gcDumpVarPtrDsc(varTmp);
+                        }
+#endif // DEBUG
+
+                        varPtrDsc* desc = new (compiler, CMK_GC) varPtrDsc;
+                        desc->vpdNext   = gcVarPtrList;
+                        desc->vpdVarNum = varTmp->vpdVarNum | pinned_OFFSET_FLAG;
+                        desc->vpdBegOfs = begOffs;
+                        desc->vpdEndOfs = filterEnd;
+                        gcVarPtrList    = desc;
+
+                        varTmp->vpdBegOfs = filterEnd;
+
+#ifdef DEBUG
+                        if (compiler->verbose)
+                        {
+                            printf("New (1 of 2): ");
+                            gcDumpVarPtrDsc(desc);
+                            printf("New (2 of 2): ");
+                            gcDumpVarPtrDsc(varTmp);
+                        }
+#endif // DEBUG
+                    }
+                    else
+                    {
+                        // The variable lifetime is completely within the filter,
+                        // so just add the pinned flag.
+                        CLANG_FORMAT_COMMENT_ANCHOR;
+#ifdef DEBUG
+                        if (compiler->verbose)
+                        {
+                            printf("Pinning lifetime for filter.\nOld: ");
+                            gcDumpVarPtrDsc(varTmp);
+                        }
+#endif // DEBUG
+
+                        varTmp->vpdVarNum |= pinned_OFFSET_FLAG;
+#ifdef DEBUG
+                        if (compiler->verbose)
+                        {
+                            printf("New : ");
+                            gcDumpVarPtrDsc(varTmp);
+                        }
+#endif // DEBUG
+                    }
+                }
+            }
+        } // HasFilter
+    }     // Foreach EH
+}
+
+void GCInfo::gcInfoRecordGCStackArgLive(GcInfoEncoder* gcInfoEncoder, MakeRegPtrMode mode, regPtrDsc* genStackPtr)
+{
+    // On non-x86 platforms, don't have pointer argument push/pop/kill declarations.
+    // But we use the same mechanism to record writes into the outgoing argument space...
+    assert(genStackPtr->rpdGCtypeGet() != GCT_NONE);
+    assert(genStackPtr->rpdArg);
+    assert(genStackPtr->rpdArgTypeGet() == rpdARG_PUSH);
+
+    // We only need to report these when we're doing fuly-interruptible
+    assert(compiler->codeGen->genInterruptible);
+
+    GCENCODER_WITH_LOGGING(gcInfoEncoderWithLog, gcInfoEncoder);
+
+    StackSlotIdKey sskey(genStackPtr->rpdPtrArg, FALSE,
+                         GcSlotFlags(genStackPtr->rpdGCtypeGet() == GCT_BYREF ? GC_SLOT_INTERIOR : GC_SLOT_BASE));
+    GcSlotId varSlotId;
+    if (mode == MAKE_REG_PTR_MODE_ASSIGN_SLOTS)
+    {
+        if (!m_stackSlotMap->Lookup(sskey, &varSlotId))
+        {
+            varSlotId = gcInfoEncoderWithLog->GetStackSlotId(sskey.m_offset, (GcSlotFlags)sskey.m_flags, GC_SP_REL);
+            m_stackSlotMap->Set(sskey, varSlotId);
+        }
+    }
+    else
+    {
+        BOOL b = m_stackSlotMap->Lookup(sskey, &varSlotId);
+        assert(b); // Should have been added in the first pass.
+        // Live until the call.
+        gcInfoEncoderWithLog->SetSlotState(genStackPtr->rpdOffs, varSlotId, GC_SLOT_LIVE);
+    }
+}
+
+void GCInfo::gcInfoRecordGCStackArgsDead(GcInfoEncoder* gcInfoEncoder,
+                                         unsigned       instrOffset,
+                                         regPtrDsc*     genStackPtrFirst,
+                                         regPtrDsc*     genStackPtrLast)
+{
+    // After a call all of the outgoing arguments are marked as dead.
+    // The calling loop keeps track of the first argument pushed for this call
+    // and passes it in as genStackPtrFirst.
+    // genStackPtrLast is the call.
+    // Re-walk that list and mark all outgoing arguments that we're marked as live
+    // earlier, as going dead after the call.
+
+    // We only need to report these when we're doing fuly-interruptible
+    assert(compiler->codeGen->genInterruptible);
+
+    GCENCODER_WITH_LOGGING(gcInfoEncoderWithLog, gcInfoEncoder);
+
+    for (regPtrDsc* genRegPtrTemp = genStackPtrFirst; genRegPtrTemp != genStackPtrLast;
+         genRegPtrTemp            = genRegPtrTemp->rpdNext)
+    {
+        if (!genRegPtrTemp->rpdArg)
+        {
+            continue;
+        }
+
+        assert(genRegPtrTemp->rpdGCtypeGet() != GCT_NONE);
+        assert(genRegPtrTemp->rpdArgTypeGet() == rpdARG_PUSH);
+
+        StackSlotIdKey sskey(genRegPtrTemp->rpdPtrArg, FALSE,
+                             genRegPtrTemp->rpdGCtypeGet() == GCT_BYREF ? GC_SLOT_INTERIOR : GC_SLOT_BASE);
+        GcSlotId varSlotId;
+        BOOL     b = m_stackSlotMap->Lookup(sskey, &varSlotId);
+        assert(b); // Should have been added in the first pass.
+        // Live until the call.
+        gcInfoEncoderWithLog->SetSlotState(instrOffset, varSlotId, GC_SLOT_DEAD);
+    }
+}
+
+#undef GCENCODER_WITH_LOGGING
+
+#endif // !JIT32_GCENCODER
+
+/*****************************************************************************/
+/*****************************************************************************/
diff --git a/src/jit/gcinfo.cpp b/src/jit/gcinfo.cpp
new file mode 100644
index 0000000000..b64fd0a174
--- /dev/null
+++ b/src/jit/gcinfo.cpp
@@ -0,0 +1,867 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                          GCInfo                                           XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "gcinfo.h"
+#include "emit.h"
+#include "jitgcinfo.h"
+
+#ifdef _TARGET_AMD64_
+#include "gcinfoencoder.h" //this includes a LOT of other files too
+#endif
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+/*****************************************************************************/
+
+extern int JITGcBarrierCall;
+
+/*****************************************************************************/
+
+#if MEASURE_PTRTAB_SIZE
+/* static */ size_t GCInfo::s_gcRegPtrDscSize   = 0;
+/* static */ size_t GCInfo::s_gcTotalPtrTabSize = 0;
+#endif // MEASURE_PTRTAB_SIZE
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                          GCInfo                                           XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+GCInfo::GCInfo(Compiler* theCompiler) : compiler(theCompiler)
+{
+    regSet         = nullptr;
+    gcVarPtrList   = nullptr;
+    gcVarPtrLast   = nullptr;
+    gcRegPtrList   = nullptr;
+    gcRegPtrLast   = nullptr;
+    gcPtrArgCnt    = 0;
+    gcCallDescList = nullptr;
+    gcCallDescLast = nullptr;
+#ifdef JIT32_GCENCODER
+    gcEpilogTable = nullptr;
+#else  // !JIT32_GCENCODER
+    m_regSlotMap   = nullptr;
+    m_stackSlotMap = nullptr;
+#endif // JIT32_GCENCODER
+}
+
+/*****************************************************************************/
+/*****************************************************************************
+ *  Reset tracking info at the start of a basic block.
+ */
+
+void GCInfo::gcResetForBB()
+{
+    gcRegGCrefSetCur = RBM_NONE;
+    gcRegByrefSetCur = RBM_NONE;
+    VarSetOps::AssignNoCopy(compiler, gcVarPtrSetCur, VarSetOps::MakeEmpty(compiler));
+}
+
+#ifdef DEBUG
+
+/*****************************************************************************
+ *
+ *  Print the changes in the gcRegGCrefSetCur sets.
+ */
+
+void GCInfo::gcDspGCrefSetChanges(regMaskTP gcRegGCrefSetNew DEBUGARG(bool forceOutput))
+{
+    if (compiler->verbose)
+    {
+        if (forceOutput || (gcRegGCrefSetCur != gcRegGCrefSetNew))
+        {
+            printf("\t\t\t\t\t\t\tGC regs: ");
+            if (gcRegGCrefSetCur == gcRegGCrefSetNew)
+            {
+                printf("(unchanged) ");
+            }
+            else
+            {
+                printRegMaskInt(gcRegGCrefSetCur);
+                compiler->getEmitter()->emitDispRegSet(gcRegGCrefSetCur);
+                printf(" => ");
+            }
+            printRegMaskInt(gcRegGCrefSetNew);
+            compiler->getEmitter()->emitDispRegSet(gcRegGCrefSetNew);
+            printf("\n");
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Print the changes in the gcRegByrefSetCur sets.
+ */
+
+void GCInfo::gcDspByrefSetChanges(regMaskTP gcRegByrefSetNew DEBUGARG(bool forceOutput))
+{
+    if (compiler->verbose)
+    {
+        if (forceOutput || (gcRegByrefSetCur != gcRegByrefSetNew))
+        {
+            printf("\t\t\t\t\t\t\tByref regs: ");
+            if (gcRegByrefSetCur == gcRegByrefSetNew)
+            {
+                printf("(unchanged) ");
+            }
+            else
+            {
+                printRegMaskInt(gcRegByrefSetCur);
+                compiler->getEmitter()->emitDispRegSet(gcRegByrefSetCur);
+                printf(" => ");
+            }
+            printRegMaskInt(gcRegByrefSetNew);
+            compiler->getEmitter()->emitDispRegSet(gcRegByrefSetNew);
+            printf("\n");
+        }
+    }
+}
+
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ *  Mark the set of registers given by the specified mask as holding
+ *  GCref pointer values.
+ */
+
+void GCInfo::gcMarkRegSetGCref(regMaskTP regMask DEBUGARG(bool forceOutput))
+{
+#ifdef DEBUG
+    if (compiler->compRegSetCheckLevel == 0)
+    {
+        // This set of registers are going to hold REFs.
+        // Make sure they were not holding BYREFs.
+        assert((gcRegByrefSetCur & regMask) == 0);
+    }
+#endif
+
+    regMaskTP gcRegByrefSetNew = gcRegByrefSetCur & ~regMask; // Clear it if set in Byref mask
+    regMaskTP gcRegGCrefSetNew = gcRegGCrefSetCur | regMask;  // Set it in GCref mask
+
+    INDEBUG(gcDspGCrefSetChanges(gcRegGCrefSetNew, forceOutput));
+    INDEBUG(gcDspByrefSetChanges(gcRegByrefSetNew));
+
+    gcRegByrefSetCur = gcRegByrefSetNew;
+    gcRegGCrefSetCur = gcRegGCrefSetNew;
+}
+
+/*****************************************************************************
+ *
+ *  Mark the set of registers given by the specified mask as holding
+ *  Byref pointer values.
+ */
+
+void GCInfo::gcMarkRegSetByref(regMaskTP regMask DEBUGARG(bool forceOutput))
+{
+    regMaskTP gcRegByrefSetNew = gcRegByrefSetCur | regMask;  // Set it in Byref mask
+    regMaskTP gcRegGCrefSetNew = gcRegGCrefSetCur & ~regMask; // Clear it if set in GCref mask
+
+    INDEBUG(gcDspGCrefSetChanges(gcRegGCrefSetNew));
+    INDEBUG(gcDspByrefSetChanges(gcRegByrefSetNew, forceOutput));
+
+    gcRegByrefSetCur = gcRegByrefSetNew;
+    gcRegGCrefSetCur = gcRegGCrefSetNew;
+}
+
+/*****************************************************************************
+ *
+ *  Mark the set of registers given by the specified mask as holding
+ *  non-pointer values.
+ */
+
+void GCInfo::gcMarkRegSetNpt(regMaskTP regMask DEBUGARG(bool forceOutput))
+{
+    /* NOTE: don't unmark any live register variables */
+
+    regMaskTP gcRegByrefSetNew = gcRegByrefSetCur & ~(regMask & ~regSet->rsMaskVars);
+    regMaskTP gcRegGCrefSetNew = gcRegGCrefSetCur & ~(regMask & ~regSet->rsMaskVars);
+
+    INDEBUG(gcDspGCrefSetChanges(gcRegGCrefSetNew, forceOutput));
+    INDEBUG(gcDspByrefSetChanges(gcRegByrefSetNew, forceOutput));
+
+    gcRegByrefSetCur = gcRegByrefSetNew;
+    gcRegGCrefSetCur = gcRegGCrefSetNew;
+}
+
+/*****************************************************************************
+ *
+ *  Mark the specified register as now holding a value of the given type.
+ */
+
+void GCInfo::gcMarkRegPtrVal(regNumber reg, var_types type)
+{
+    regMaskTP regMask = genRegMask(reg);
+
+    switch (type)
+    {
+        case TYP_REF:
+            gcMarkRegSetGCref(regMask);
+            break;
+        case TYP_BYREF:
+            gcMarkRegSetByref(regMask);
+            break;
+        default:
+            gcMarkRegSetNpt(regMask);
+            break;
+    }
+}
+
+/*****************************************************************************/
+
+GCInfo::WriteBarrierForm GCInfo::gcIsWriteBarrierCandidate(GenTreePtr tgt, GenTreePtr assignVal)
+{
+#if FEATURE_WRITE_BARRIER
+
+    /* Are we storing a GC ptr? */
+
+    if (!varTypeIsGC(tgt->TypeGet()))
+    {
+        return WBF_NoBarrier;
+    }
+
+    /* Ignore any assignments of NULL */
+
+    // 'assignVal' can be the constant Null or something else (LclVar, etc..)
+    //  that is known to be null via Value Numbering.
+    if (assignVal->GetVN(VNK_Liberal) == ValueNumStore::VNForNull())
+    {
+        return WBF_NoBarrier;
+    }
+
+    if (assignVal->gtOper == GT_CNS_INT && assignVal->gtIntCon.gtIconVal == 0)
+    {
+        return WBF_NoBarrier;
+    }
+
+    /* Where are we storing into? */
+
+    tgt = tgt->gtEffectiveVal();
+
+    switch (tgt->gtOper)
+    {
+
+#ifndef LEGACY_BACKEND
+        case GT_STOREIND:
+#endif               // !LEGACY_BACKEND
+        case GT_IND: /* Could be the managed heap */
+            return gcWriteBarrierFormFromTargetAddress(tgt->gtOp.gtOp1);
+
+        case GT_LEA:
+            return gcWriteBarrierFormFromTargetAddress(tgt->AsAddrMode()->Base());
+
+        case GT_ARR_ELEM: /* Definitely in the managed heap */
+        case GT_CLS_VAR:
+            return WBF_BarrierUnchecked;
+
+        case GT_REG_VAR: /* Definitely not in the managed heap  */
+        case GT_LCL_VAR:
+        case GT_LCL_FLD:
+        case GT_STORE_LCL_VAR:
+        case GT_STORE_LCL_FLD:
+            return WBF_NoBarrier;
+
+        default:
+            break;
+    }
+
+    assert(!"Missing case in gcIsWriteBarrierCandidate");
+#endif
+
+    return WBF_NoBarrier;
+}
+
+bool GCInfo::gcIsWriteBarrierAsgNode(GenTreePtr op)
+{
+    if (op->gtOper == GT_ASG)
+    {
+        return gcIsWriteBarrierCandidate(op->gtOp.gtOp1, op->gtOp.gtOp2) != WBF_NoBarrier;
+    }
+#ifndef LEGACY_BACKEND
+    else if (op->gtOper == GT_STOREIND)
+    {
+        return gcIsWriteBarrierCandidate(op, op->gtOp.gtOp2) != WBF_NoBarrier;
+    }
+#endif // !LEGACY_BACKEND
+    else
+    {
+        return false;
+    }
+}
+
+/*****************************************************************************/
+/*****************************************************************************
+ *
+ *  If the given tree value is sitting in a register, free it now.
+ */
+
+void GCInfo::gcMarkRegPtrVal(GenTreePtr tree)
+{
+    if (varTypeIsGC(tree->TypeGet()))
+    {
+#ifdef LEGACY_BACKEND
+        if (tree->gtOper == GT_LCL_VAR)
+            compiler->codeGen->genMarkLclVar(tree);
+#endif // LEGACY_BACKEND
+        if (tree->gtFlags & GTF_REG_VAL)
+        {
+            gcMarkRegSetNpt(genRegMask(tree->gtRegNum));
+        }
+    }
+}
+
+/*****************************************************************************/
+/*****************************************************************************
+ *
+ *  Initialize the non-register pointer variable tracking logic.
+ */
+
+void GCInfo::gcVarPtrSetInit()
+{
+    VarSetOps::AssignNoCopy(compiler, gcVarPtrSetCur, VarSetOps::MakeEmpty(compiler));
+
+    /* Initialize the list of lifetime entries */
+    gcVarPtrList = gcVarPtrLast = nullptr;
+}
+
+/*****************************************************************************
+ *
+ *  Allocate a new pointer register set / pointer argument entry and append
+ *  it to the list.
+ */
+
+GCInfo::regPtrDsc* GCInfo::gcRegPtrAllocDsc()
+{
+    regPtrDsc* regPtrNext;
+
+    assert(compiler->genFullPtrRegMap);
+
+    /* Allocate a new entry and initialize it */
+
+    regPtrNext = new (compiler, CMK_GC) regPtrDsc;
+
+    regPtrNext->rpdIsThis = FALSE;
+
+    regPtrNext->rpdOffs = 0;
+    regPtrNext->rpdNext = nullptr;
+
+    // Append the entry to the end of the list.
+    if (gcRegPtrLast == nullptr)
+    {
+        assert(gcRegPtrList == nullptr);
+        gcRegPtrList = gcRegPtrLast = regPtrNext;
+    }
+    else
+    {
+        assert(gcRegPtrList != nullptr);
+        gcRegPtrLast->rpdNext = regPtrNext;
+        gcRegPtrLast          = regPtrNext;
+    }
+
+#if MEASURE_PTRTAB_SIZE
+    s_gcRegPtrDscSize += sizeof(*regPtrNext);
+#endif
+
+    return regPtrNext;
+}
+
+/*****************************************************************************
+ *
+ *  Compute the various counts that get stored in the info block header.
+ */
+
+void GCInfo::gcCountForHeader(UNALIGNED unsigned int* untrackedCount, UNALIGNED unsigned int* varPtrTableSize)
+{
+    unsigned   varNum;
+    LclVarDsc* varDsc;
+    varPtrDsc* varTmp;
+
+    bool         thisKeptAliveIsInUntracked = false; // did we track "this" in a synchronized method?
+    unsigned int count                      = 0;
+
+    /* Count the untracked locals and non-enregistered args */
+
+    for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+    {
+        if (varTypeIsGC(varDsc->TypeGet()))
+        {
+            if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+            {
+                // Field local of a PROMOTION_TYPE_DEPENDENT struct must have been
+                // reported through its parent local
+                continue;
+            }
+
+            /* Do we have an argument or local variable? */
+            if (!varDsc->lvIsParam)
+            {
+                if (varDsc->lvTracked || !varDsc->lvOnFrame)
+                {
+                    continue;
+                }
+            }
+            else
+            {
+                /* Stack-passed arguments which are not enregistered
+                 * are always reported in this "untracked stack
+                 * pointers" section of the GC info even if lvTracked==true
+                 */
+
+                /* Has this argument been fully enregistered? */
+                CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef LEGACY_BACKEND
+                if (!varDsc->lvOnFrame)
+#else  // LEGACY_BACKEND
+                if (varDsc->lvRegister)
+#endif // LEGACY_BACKEND
+                {
+                    /* if a CEE_JMP has been used, then we need to report all the arguments
+                       even if they are enregistered, since we will be using this value
+                       in JMP call.  Note that this is subtle as we require that
+                       argument offsets are always fixed up properly even if lvRegister
+                       is set */
+                    if (!compiler->compJmpOpUsed)
+                    {
+                        continue;
+                    }
+                }
+                else
+                {
+                    if (!varDsc->lvOnFrame)
+                    {
+                        /* If this non-enregistered pointer arg is never
+                         * used, we don't need to report it
+                         */
+                        assert(varDsc->lvRefCnt == 0);
+                        continue;
+                    }
+                    else if (varDsc->lvIsRegArg && varDsc->lvTracked)
+                    {
+                        /* If this register-passed arg is tracked, then
+                         * it has been allocated space near the other
+                         * pointer variables and we have accurate life-
+                         * time info. It will be reported with
+                         * gcVarPtrList in the "tracked-pointer" section
+                         */
+
+                        continue;
+                    }
+                }
+            }
+
+            if (compiler->lvaIsOriginalThisArg(varNum) && compiler->lvaKeepAliveAndReportThis())
+            {
+                // Encoding of untracked variables does not support reporting
+                // "this". So report it as a tracked variable with a liveness
+                // extending over the entire method.
+
+                thisKeptAliveIsInUntracked = true;
+                continue;
+            }
+
+#ifdef DEBUG
+            if (compiler->verbose)
+            {
+                int offs = varDsc->lvStkOffs;
+
+                printf("GCINFO: untrckd %s lcl at [%s", varTypeGCstring(varDsc->TypeGet()),
+                       compiler->genEmitter->emitGetFrameReg());
+
+                if (offs < 0)
+                {
+                    printf("-%02XH", -offs);
+                }
+                else if (offs > 0)
+                {
+                    printf("+%02XH", +offs);
+                }
+
+                printf("]\n");
+            }
+#endif
+
+            count++;
+        }
+        else if (varDsc->lvType == TYP_STRUCT && varDsc->lvOnFrame && (varDsc->lvExactSize >= TARGET_POINTER_SIZE))
+        {
+            unsigned slots  = compiler->lvaLclSize(varNum) / sizeof(void*);
+            BYTE*    gcPtrs = compiler->lvaGetGcLayout(varNum);
+
+            // walk each member of the array
+            for (unsigned i = 0; i < slots; i++)
+            {
+                if (gcPtrs[i] != TYPE_GC_NONE)
+                { // count only gc slots
+                    count++;
+                }
+            }
+        }
+    }
+
+    /* Also count spill temps that hold pointers */
+
+    assert(compiler->tmpAllFree());
+    for (TempDsc* tempThis = compiler->tmpListBeg(); tempThis != nullptr; tempThis = compiler->tmpListNxt(tempThis))
+    {
+        if (varTypeIsGC(tempThis->tdTempType()) == false)
+        {
+            continue;
+        }
+
+#ifdef DEBUG
+        if (compiler->verbose)
+        {
+            int offs = tempThis->tdTempOffs();
+
+            printf("GCINFO: untrck %s Temp at [%s", varTypeGCstring(varDsc->TypeGet()),
+                   compiler->genEmitter->emitGetFrameReg());
+
+            if (offs < 0)
+            {
+                printf("-%02XH", -offs);
+            }
+            else if (offs > 0)
+            {
+                printf("+%02XH", +offs);
+            }
+
+            printf("]\n");
+        }
+#endif
+
+        count++;
+    }
+
+#ifdef DEBUG
+    if (compiler->verbose)
+    {
+        printf("GCINFO: untrckVars = %u\n", count);
+    }
+#endif
+
+    *untrackedCount = count;
+
+    /* Count the number of entries in the table of non-register pointer
+       variable lifetimes. */
+
+    count = 0;
+
+    if (thisKeptAliveIsInUntracked)
+    {
+        count++;
+    }
+
+    if (gcVarPtrList)
+    {
+        /* We'll use a delta encoding for the lifetime offsets */
+
+        for (varTmp = gcVarPtrList; varTmp; varTmp = varTmp->vpdNext)
+        {
+            /* Special case: skip any 0-length lifetimes */
+
+            if (varTmp->vpdBegOfs == varTmp->vpdEndOfs)
+            {
+                continue;
+            }
+
+            count++;
+        }
+    }
+
+#ifdef DEBUG
+    if (compiler->verbose)
+    {
+        printf("GCINFO: trackdLcls = %u\n", count);
+    }
+#endif
+
+    *varPtrTableSize = count;
+}
+
+#ifdef JIT32_GCENCODER
+/*****************************************************************************
+ *
+ *  Shutdown the 'pointer value' register tracking logic and save the necessary
+ *  info (which will be used at runtime to locate all pointers) at the specified
+ *  address. The number of bytes written to 'destPtr' must be identical to that
+ *  returned from gcPtrTableSize().
+ */
+
+BYTE* GCInfo::gcPtrTableSave(BYTE* destPtr, const InfoHdr& header, unsigned codeSize, size_t* pArgTabOffset)
+{
+    /* Write the tables to the info block */
+
+    return destPtr + gcMakeRegPtrTable(destPtr, -1, header, codeSize, pArgTabOffset);
+}
+#endif
+
+/*****************************************************************************
+ *
+ *  Initialize the 'pointer value' register/argument tracking logic.
+ */
+
+void GCInfo::gcRegPtrSetInit()
+{
+    gcRegGCrefSetCur = gcRegByrefSetCur = 0;
+
+    if (compiler->genFullPtrRegMap)
+    {
+        gcRegPtrList = gcRegPtrLast = nullptr;
+    }
+    else
+    {
+        /* Initialize the 'call descriptor' list */
+        gcCallDescList = gcCallDescLast = nullptr;
+    }
+}
+
+#ifdef JIT32_GCENCODER
+
+/*****************************************************************************
+ *
+ *  Helper passed to genEmitter.emitGenEpilogLst() to generate
+ *  the table of epilogs.
+ */
+
+/* static */ size_t GCInfo::gcRecordEpilog(void* pCallBackData, unsigned offset)
+{
+    GCInfo* gcInfo = (GCInfo*)pCallBackData;
+
+    assert(gcInfo);
+
+    size_t result = encodeUDelta(gcInfo->gcEpilogTable, offset, gcInfo->gcEpilogPrevOffset);
+
+    if (gcInfo->gcEpilogTable)
+        gcInfo->gcEpilogTable += result;
+
+    gcInfo->gcEpilogPrevOffset = offset;
+
+    return result;
+}
+
+#endif // JIT32_GCENCODER
+
+GCInfo::WriteBarrierForm GCInfo::gcWriteBarrierFormFromTargetAddress(GenTreePtr tgtAddr)
+{
+    GCInfo::WriteBarrierForm result = GCInfo::WBF_BarrierUnknown; // Default case, we have no information.
+
+    // If we store through an int to a GC_REF field, we'll assume that needs to use a checked barriers.
+    if (tgtAddr->TypeGet() == TYP_I_IMPL)
+    {
+        return GCInfo::WBF_BarrierChecked; // Why isn't this GCInfo::WBF_BarrierUnknown?
+    }
+
+    // Otherwise...
+    assert(tgtAddr->TypeGet() == TYP_BYREF);
+    bool simplifiedExpr = true;
+    while (simplifiedExpr)
+    {
+        simplifiedExpr = false;
+
+        tgtAddr = tgtAddr->gtSkipReloadOrCopy();
+
+        while (tgtAddr->OperGet() == GT_ADDR && tgtAddr->gtOp.gtOp1->OperGet() == GT_IND)
+        {
+            tgtAddr        = tgtAddr->gtOp.gtOp1->gtOp.gtOp1;
+            simplifiedExpr = true;
+            assert(tgtAddr->TypeGet() == TYP_BYREF);
+        }
+        // For additions, one of the operands is a byref or a ref (and the other is not).  Follow this down to its
+        // source.
+        while (tgtAddr->OperGet() == GT_ADD || tgtAddr->OperGet() == GT_LEA)
+        {
+            if (tgtAddr->OperGet() == GT_ADD)
+            {
+                if (tgtAddr->gtOp.gtOp1->TypeGet() == TYP_BYREF || tgtAddr->gtOp.gtOp1->TypeGet() == TYP_REF)
+                {
+                    assert(!(tgtAddr->gtOp.gtOp2->TypeGet() == TYP_BYREF || tgtAddr->gtOp.gtOp2->TypeGet() == TYP_REF));
+                    tgtAddr        = tgtAddr->gtOp.gtOp1;
+                    simplifiedExpr = true;
+                }
+                else if (tgtAddr->gtOp.gtOp2->TypeGet() == TYP_BYREF || tgtAddr->gtOp.gtOp2->TypeGet() == TYP_REF)
+                {
+                    tgtAddr        = tgtAddr->gtOp.gtOp2;
+                    simplifiedExpr = true;
+                }
+                else
+                {
+                    // We might have a native int. For example:
+                    //        const     int    0
+                    //    +         byref
+                    //        lclVar    int    V06 loc5  // this is a local declared "valuetype VType*"
+                    return GCInfo::WBF_BarrierUnknown;
+                }
+            }
+            else
+            {
+                // Must be an LEA (i.e., an AddrMode)
+                assert(tgtAddr->OperGet() == GT_LEA);
+                tgtAddr = tgtAddr->AsAddrMode()->Base();
+                if (tgtAddr->TypeGet() == TYP_BYREF || tgtAddr->TypeGet() == TYP_REF)
+                {
+                    simplifiedExpr = true;
+                }
+                else
+                {
+                    // We might have a native int.
+                    return GCInfo::WBF_BarrierUnknown;
+                }
+            }
+        }
+    }
+    if (tgtAddr->IsLocalAddrExpr() != nullptr)
+    {
+        // No need for a GC barrier when writing to a local variable.
+        return GCInfo::WBF_NoBarrier;
+    }
+    if (tgtAddr->OperGet() == GT_LCL_VAR || tgtAddr->OperGet() == GT_REG_VAR)
+    {
+        unsigned lclNum = 0;
+        if (tgtAddr->gtOper == GT_LCL_VAR)
+        {
+            lclNum = tgtAddr->gtLclVar.gtLclNum;
+        }
+        else
+        {
+            assert(tgtAddr->gtOper == GT_REG_VAR);
+            lclNum = tgtAddr->gtRegVar.gtLclNum;
+        }
+
+        LclVarDsc* varDsc = &compiler->lvaTable[lclNum];
+
+        // Instead of marking LclVar with 'lvStackByref',
+        // Consider decomposing the Value Number given to this LclVar to see if it was
+        // created using a GT_ADDR(GT_LCLVAR)  or a GT_ADD( GT_ADDR(GT_LCLVAR), Constant)
+
+        // We may have an internal compiler temp created in fgMorphCopyBlock() that we know
+        // points at one of our stack local variables, it will have lvStackByref set to true.
+        //
+        if (varDsc->lvStackByref)
+        {
+            assert(varDsc->TypeGet() == TYP_BYREF);
+            return GCInfo::WBF_NoBarrier;
+        }
+
+        // We don't eliminate for inlined methods, where we (can) know where the "retBuff" points.
+        if (!compiler->compIsForInlining() && lclNum == compiler->info.compRetBuffArg)
+        {
+            assert(compiler->info.compRetType == TYP_STRUCT); // Else shouldn't have a ret buff.
+
+            // Are we assured that the ret buff pointer points into the stack of a caller?
+            if (compiler->info.compRetBuffDefStack)
+            {
+#if 0
+                // This is an optional debugging mode.  If the #if 0 above is changed to #if 1,
+                // every barrier we remove for stores to GC ref fields of a retbuff use a special
+                // helper that asserts that the target is not in the heap.
+#ifdef DEBUG
+                return WBF_NoBarrier_CheckNotHeapInDebug;
+#else
+                return WBF_NoBarrier;
+#endif
+#else  // 0
+                return GCInfo::WBF_NoBarrier;
+#endif // 0
+            }
+        }
+    }
+    if (tgtAddr->TypeGet() == TYP_REF)
+    {
+        return GCInfo::WBF_BarrierUnchecked;
+    }
+    // Otherwise, we have no information.
+    return GCInfo::WBF_BarrierUnknown;
+}
+
+#ifndef LEGACY_BACKEND
+//------------------------------------------------------------------------
+// gcUpdateForRegVarMove: Update the masks when a variable is moved
+//
+// Arguments:
+//    srcMask - The register mask for the register(s) from which it is being moved
+//    dstMask - The register mask for the register(s) to which it is being moved
+//    type    - The type of the variable
+//
+// Return Value:
+//    None
+//
+// Notes:
+//    This is called during codegen when a var is moved due to an LSRA_ASG.
+//    It is also called by LinearScan::recordVarLocationAtStartOfBB() which is in turn called by
+//    CodeGen::genCodeForBBList() at the block boundary.
+
+void GCInfo::gcUpdateForRegVarMove(regMaskTP srcMask, regMaskTP dstMask, LclVarDsc* varDsc)
+{
+    var_types type    = varDsc->TypeGet();
+    bool      isGCRef = (type == TYP_REF);
+    bool      isByRef = (type == TYP_BYREF);
+
+    if (srcMask != RBM_NONE)
+    {
+        regSet->RemoveMaskVars(srcMask);
+        if (isGCRef)
+        {
+            assert((gcRegByrefSetCur & srcMask) == 0);
+            gcRegGCrefSetCur &= ~srcMask;
+            gcRegGCrefSetCur |= dstMask; // safe if no dst, i.e. RBM_NONE
+        }
+        else if (isByRef)
+        {
+            assert((gcRegGCrefSetCur & srcMask) == 0);
+            gcRegByrefSetCur &= ~srcMask;
+            gcRegByrefSetCur |= dstMask; // safe if no dst, i.e. RBM_NONE
+        }
+    }
+    else if (isGCRef || isByRef)
+    {
+        // In this case, we are moving it from the stack to a register,
+        // so remove it from the set of live stack gc refs
+        VarSetOps::RemoveElemD(compiler, gcVarPtrSetCur, varDsc->lvVarIndex);
+    }
+    if (dstMask != RBM_NONE)
+    {
+        regSet->AddMaskVars(dstMask);
+        // If the source is a reg, then the gc sets have been set appropriately
+        // Otherwise, we have to determine whether to set them
+        if (srcMask == RBM_NONE)
+        {
+            if (isGCRef)
+            {
+                gcRegGCrefSetCur |= dstMask;
+            }
+            else if (isByRef)
+            {
+                gcRegByrefSetCur |= dstMask;
+            }
+        }
+    }
+    else if (isGCRef || isByRef)
+    {
+        VarSetOps::AddElemD(compiler, gcVarPtrSetCur, varDsc->lvVarIndex);
+    }
+}
+#endif // !LEGACY_BACKEND
+
+/*****************************************************************************/
+/*****************************************************************************/
diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp
new file mode 100644
index 0000000000..67474e11ec
--- /dev/null
+++ b/src/jit/gentree.cpp
@@ -0,0 +1,16748 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                               GenTree                                     XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#include "simd.h"
+
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+/*****************************************************************************/
+
+const unsigned short GenTree::gtOperKindTable[] = {
+#define GTNODE(en, sn, cm, ok) ok + GTK_COMMUTE *cm,
+#include "gtlist.h"
+};
+
+/*****************************************************************************/
+// static
+genTreeOps GenTree::OpAsgToOper(genTreeOps op)
+{
+    // Precondition.
+    assert(OperIsAssignment(op) && op != GT_ASG);
+    switch (op)
+    {
+        case GT_ASG_ADD:
+            return GT_ADD;
+        case GT_ASG_SUB:
+            return GT_SUB;
+        case GT_ASG_MUL:
+            return GT_MUL;
+        case GT_ASG_DIV:
+            return GT_DIV;
+        case GT_ASG_MOD:
+            return GT_MOD;
+
+        case GT_ASG_UDIV:
+            return GT_UDIV;
+        case GT_ASG_UMOD:
+            return GT_UMOD;
+
+        case GT_ASG_OR:
+            return GT_OR;
+        case GT_ASG_XOR:
+            return GT_XOR;
+        case GT_ASG_AND:
+            return GT_AND;
+        case GT_ASG_LSH:
+            return GT_LSH;
+        case GT_ASG_RSH:
+            return GT_RSH;
+        case GT_ASG_RSZ:
+            return GT_RSZ;
+
+        case GT_CHS:
+            return GT_NEG;
+
+        default:
+            unreached(); // Precondition implies we don't get here.
+    }
+}
+
+/*****************************************************************************
+ *
+ *  The types of different GenTree nodes
+ */
+
+#ifdef DEBUG
+
+#define INDENT_SIZE 3
+
+//--------------------------------------------
+//
+// IndentStack: This struct is used, along with its related enums and strings,
+//    to control both the indendtation and the printing of arcs.
+//
+// Notes:
+//    The mode of printing is set in the Constructor, using its 'compiler' argument.
+//    Currently it only prints arcs when fgOrder == fgOrderLinear.
+//    The type of arc to print is specified by the IndentInfo enum, and is controlled
+//    by the caller of the Push() method.
+
+enum IndentChars
+{
+    ICVertical,
+    ICBottom,
+    ICTop,
+    ICMiddle,
+    ICDash,
+    ICEmbedded,
+    ICTerminal,
+    ICError,
+    IndentCharCount
+};
+
+// clang-format off
+// Sets of strings for different dumping options            vert             bot             top             mid             dash       embedded    terminal    error
+static const char*  emptyIndents[IndentCharCount]   = {     " ",             " ",            " ",            " ",            " ",           "{",      "",        "?"  };
+static const char*  asciiIndents[IndentCharCount]   = {     "|",            "\\",            "/",            "+",            "-",           "{",      "*",       "?"  };
+static const char*  unicodeIndents[IndentCharCount] = { "\xe2\x94\x82", "\xe2\x94\x94", "\xe2\x94\x8c", "\xe2\x94\x9c", "\xe2\x94\x80",     "{", "\xe2\x96\x8c", "?"  };
+// clang-format on
+
+typedef ArrayStack<Compiler::IndentInfo> IndentInfoStack;
+struct IndentStack
+{
+    IndentInfoStack stack;
+    const char**    indents;
+
+    // Constructor for IndentStack.  Uses 'compiler' to determine the mode of printing.
+    IndentStack(Compiler* compiler) : stack(compiler)
+    {
+        if (compiler->asciiTrees)
+        {
+            indents = asciiIndents;
+        }
+        else
+        {
+            indents = unicodeIndents;
+        }
+    }
+
+    // Return the depth of the current indentation.
+    unsigned Depth()
+    {
+        return stack.Height();
+    }
+
+    // Push a new indentation onto the stack, of the given type.
+    void Push(Compiler::IndentInfo info)
+    {
+        stack.Push(info);
+    }
+
+    // Pop the most recent indentation type off the stack.
+    Compiler::IndentInfo Pop()
+    {
+        return stack.Pop();
+    }
+
+    // Print the current indentation and arcs.
+    void print()
+    {
+        unsigned indentCount = Depth();
+        for (unsigned i = 0; i < indentCount; i++)
+        {
+            unsigned index = indentCount - 1 - i;
+            switch (stack.Index(index))
+            {
+                case Compiler::IndentInfo::IINone:
+                    printf("   ");
+                    break;
+                case Compiler::IndentInfo::IIEmbedded:
+                    printf("%s  ", indents[ICEmbedded]);
+                    break;
+                case Compiler::IndentInfo::IIArc:
+                    if (index == 0)
+                    {
+                        printf("%s%s%s", indents[ICMiddle], indents[ICDash], indents[ICDash]);
+                    }
+                    else
+                    {
+                        printf("%s  ", indents[ICVertical]);
+                    }
+                    break;
+                case Compiler::IndentInfo::IIArcBottom:
+                    printf("%s%s%s", indents[ICBottom], indents[ICDash], indents[ICDash]);
+                    break;
+                case Compiler::IndentInfo::IIArcTop:
+                    printf("%s%s%s", indents[ICTop], indents[ICDash], indents[ICDash]);
+                    break;
+                case Compiler::IndentInfo::IIError:
+                    printf("%s%s%s", indents[ICError], indents[ICDash], indents[ICDash]);
+                    break;
+                default:
+                    unreached();
+            }
+        }
+        printf("%s", indents[ICTerminal]);
+    }
+};
+
+//------------------------------------------------------------------------
+// printIndent: This is a static method which simply invokes the 'print'
+//    method on its 'indentStack' argument.
+//
+// Arguments:
+//    indentStack - specifies the information for the indentation & arcs to be printed
+//
+// Notes:
+//    This method exists to localize the checking for the case where indentStack is null.
+
+static void printIndent(IndentStack* indentStack)
+{
+    if (indentStack == nullptr)
+    {
+        return;
+    }
+    indentStack->print();
+}
+
+static const char* nodeNames[] = {
+#define GTNODE(en, sn, cm, ok) sn,
+#include "gtlist.h"
+};
+
+const char* GenTree::NodeName(genTreeOps op)
+{
+    assert((unsigned)op < sizeof(nodeNames) / sizeof(nodeNames[0]));
+
+    return nodeNames[op];
+}
+
+static const char* opNames[] = {
+#define GTNODE(en, sn, cm, ok) #en,
+#include "gtlist.h"
+};
+
+const char* GenTree::OpName(genTreeOps op)
+{
+    assert((unsigned)op < sizeof(opNames) / sizeof(opNames[0]));
+
+    return opNames[op];
+}
+
+#endif
+
+/*****************************************************************************
+ *
+ *  When 'SMALL_TREE_NODES' is enabled, we allocate tree nodes in 2 different
+ *  sizes: 'GTF_DEBUG_NODE_SMALL' for most nodes and 'GTF_DEBUG_NODE_LARGE' for
+ *  the few nodes (such as calls and statement list nodes) that have more fields
+ *  and take up a lot more space.
+ */
+
+#if SMALL_TREE_NODES
+
+/* GT_COUNT'th oper is overloaded as 'undefined oper', so allocate storage for GT_COUNT'th oper also */
+/* static */
+unsigned char GenTree::s_gtNodeSizes[GT_COUNT + 1];
+
+/* static */
+void GenTree::InitNodeSize()
+{
+    /* 'GT_LCL_VAR' often gets changed to 'GT_REG_VAR' */
+
+    assert(GenTree::s_gtNodeSizes[GT_LCL_VAR] >= GenTree::s_gtNodeSizes[GT_REG_VAR]);
+
+    /* Set all sizes to 'small' first */
+
+    for (unsigned op = 0; op <= GT_COUNT; op++)
+    {
+        GenTree::s_gtNodeSizes[op] = TREE_NODE_SZ_SMALL;
+    }
+
+    // Now set all of the appropriate entries to 'large'
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+    // On ARM32, ARM64 and System V for struct returning
+    // there is code that does GT_ASG-tree.CopyObj call.
+    // CopyObj is a large node and the GT_ASG is small, which triggers an exception.
+    GenTree::s_gtNodeSizes[GT_ASG]    = TREE_NODE_SZ_LARGE;
+    GenTree::s_gtNodeSizes[GT_RETURN] = TREE_NODE_SZ_LARGE;
+#endif // defined(FEATURE_HFA) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+    GenTree::s_gtNodeSizes[GT_CALL]             = TREE_NODE_SZ_LARGE;
+    GenTree::s_gtNodeSizes[GT_CAST]             = TREE_NODE_SZ_LARGE;
+    GenTree::s_gtNodeSizes[GT_FTN_ADDR]         = TREE_NODE_SZ_LARGE;
+    GenTree::s_gtNodeSizes[GT_BOX]              = TREE_NODE_SZ_LARGE;
+    GenTree::s_gtNodeSizes[GT_INDEX]            = TREE_NODE_SZ_LARGE;
+    GenTree::s_gtNodeSizes[GT_ARR_BOUNDS_CHECK] = TREE_NODE_SZ_LARGE;
+#ifdef FEATURE_SIMD
+    GenTree::s_gtNodeSizes[GT_SIMD_CHK] = TREE_NODE_SZ_LARGE;
+#endif // FEATURE_SIMD
+    GenTree::s_gtNodeSizes[GT_ARR_ELEM]      = TREE_NODE_SZ_LARGE;
+    GenTree::s_gtNodeSizes[GT_ARR_INDEX]     = TREE_NODE_SZ_LARGE;
+    GenTree::s_gtNodeSizes[GT_ARR_OFFSET]    = TREE_NODE_SZ_LARGE;
+    GenTree::s_gtNodeSizes[GT_RET_EXPR]      = TREE_NODE_SZ_LARGE;
+    GenTree::s_gtNodeSizes[GT_OBJ]           = TREE_NODE_SZ_LARGE;
+    GenTree::s_gtNodeSizes[GT_FIELD]         = TREE_NODE_SZ_LARGE;
+    GenTree::s_gtNodeSizes[GT_STMT]          = TREE_NODE_SZ_LARGE;
+    GenTree::s_gtNodeSizes[GT_CMPXCHG]       = TREE_NODE_SZ_LARGE;
+    GenTree::s_gtNodeSizes[GT_QMARK]         = TREE_NODE_SZ_LARGE;
+    GenTree::s_gtNodeSizes[GT_LEA]           = TREE_NODE_SZ_LARGE;
+    GenTree::s_gtNodeSizes[GT_STORE_OBJ]     = TREE_NODE_SZ_LARGE;
+    GenTree::s_gtNodeSizes[GT_DYN_BLK]       = TREE_NODE_SZ_LARGE;
+    GenTree::s_gtNodeSizes[GT_STORE_DYN_BLK] = TREE_NODE_SZ_LARGE;
+    GenTree::s_gtNodeSizes[GT_INTRINSIC]     = TREE_NODE_SZ_LARGE;
+    GenTree::s_gtNodeSizes[GT_ALLOCOBJ]      = TREE_NODE_SZ_LARGE;
+#if USE_HELPERS_FOR_INT_DIV
+    GenTree::s_gtNodeSizes[GT_DIV]  = TREE_NODE_SZ_LARGE;
+    GenTree::s_gtNodeSizes[GT_UDIV] = TREE_NODE_SZ_LARGE;
+    GenTree::s_gtNodeSizes[GT_MOD]  = TREE_NODE_SZ_LARGE;
+    GenTree::s_gtNodeSizes[GT_UMOD] = TREE_NODE_SZ_LARGE;
+#endif
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+    GenTree::s_gtNodeSizes[GT_PUTARG_STK] = TREE_NODE_SZ_LARGE;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+    assert(GenTree::s_gtNodeSizes[GT_RETURN] == GenTree::s_gtNodeSizes[GT_ASG]);
+
+    // This list of assertions should come to contain all GenTree subtypes that are declared
+    // "small".
+    assert(sizeof(GenTreeLclFld) <= GenTree::s_gtNodeSizes[GT_LCL_FLD]);
+    assert(sizeof(GenTreeLclVar) <= GenTree::s_gtNodeSizes[GT_LCL_VAR]);
+
+    static_assert_no_msg(sizeof(GenTree) <= TREE_NODE_SZ_SMALL);
+    static_assert_no_msg(sizeof(GenTreeUnOp) <= TREE_NODE_SZ_SMALL);
+    static_assert_no_msg(sizeof(GenTreeOp) <= TREE_NODE_SZ_SMALL);
+    static_assert_no_msg(sizeof(GenTreeVal) <= TREE_NODE_SZ_SMALL);
+    static_assert_no_msg(sizeof(GenTreeIntConCommon) <= TREE_NODE_SZ_SMALL);
+    static_assert_no_msg(sizeof(GenTreePhysReg) <= TREE_NODE_SZ_SMALL);
+#ifndef LEGACY_BACKEND
+    static_assert_no_msg(sizeof(GenTreeJumpTable) <= TREE_NODE_SZ_SMALL);
+#endif // !LEGACY_BACKEND
+    static_assert_no_msg(sizeof(GenTreeIntCon) <= TREE_NODE_SZ_SMALL);
+    static_assert_no_msg(sizeof(GenTreeLngCon) <= TREE_NODE_SZ_SMALL);
+    static_assert_no_msg(sizeof(GenTreeDblCon) <= TREE_NODE_SZ_SMALL);
+    static_assert_no_msg(sizeof(GenTreeStrCon) <= TREE_NODE_SZ_SMALL);
+    static_assert_no_msg(sizeof(GenTreeLclVarCommon) <= TREE_NODE_SZ_SMALL);
+    static_assert_no_msg(sizeof(GenTreeLclVar) <= TREE_NODE_SZ_SMALL);
+    static_assert_no_msg(sizeof(GenTreeLclFld) <= TREE_NODE_SZ_SMALL);
+    static_assert_no_msg(sizeof(GenTreeRegVar) <= TREE_NODE_SZ_SMALL);
+    static_assert_no_msg(sizeof(GenTreeCast) <= TREE_NODE_SZ_LARGE);  // *** large node
+    static_assert_no_msg(sizeof(GenTreeBox) <= TREE_NODE_SZ_LARGE);   // *** large node
+    static_assert_no_msg(sizeof(GenTreeField) <= TREE_NODE_SZ_LARGE); // *** large node
+    static_assert_no_msg(sizeof(GenTreeArgList) <= TREE_NODE_SZ_SMALL);
+    static_assert_no_msg(sizeof(GenTreeColon) <= TREE_NODE_SZ_SMALL);
+    static_assert_no_msg(sizeof(GenTreeCall) <= TREE_NODE_SZ_LARGE);      // *** large node
+    static_assert_no_msg(sizeof(GenTreeCmpXchg) <= TREE_NODE_SZ_LARGE);   // *** large node
+    static_assert_no_msg(sizeof(GenTreeFptrVal) <= TREE_NODE_SZ_LARGE);   // *** large node
+    static_assert_no_msg(sizeof(GenTreeQmark) <= TREE_NODE_SZ_LARGE);     // *** large node
+    static_assert_no_msg(sizeof(GenTreeIntrinsic) <= TREE_NODE_SZ_LARGE); // *** large node
+    static_assert_no_msg(sizeof(GenTreeIndex) <= TREE_NODE_SZ_LARGE);     // *** large node
+    static_assert_no_msg(sizeof(GenTreeArrLen) <= TREE_NODE_SZ_LARGE);    // *** large node
+    static_assert_no_msg(sizeof(GenTreeBoundsChk) <= TREE_NODE_SZ_LARGE); // *** large node
+    static_assert_no_msg(sizeof(GenTreeArrElem) <= TREE_NODE_SZ_LARGE);   // *** large node
+    static_assert_no_msg(sizeof(GenTreeArrIndex) <= TREE_NODE_SZ_LARGE);  // *** large node
+    static_assert_no_msg(sizeof(GenTreeArrOffs) <= TREE_NODE_SZ_LARGE);   // *** large node
+    static_assert_no_msg(sizeof(GenTreeIndir) <= TREE_NODE_SZ_SMALL);
+    static_assert_no_msg(sizeof(GenTreeStoreInd) <= TREE_NODE_SZ_SMALL);
+    static_assert_no_msg(sizeof(GenTreeAddrMode) <= TREE_NODE_SZ_SMALL);
+    static_assert_no_msg(sizeof(GenTreeObj) <= TREE_NODE_SZ_LARGE); // *** large node
+    static_assert_no_msg(sizeof(GenTreeBlk) <= TREE_NODE_SZ_SMALL);
+    static_assert_no_msg(sizeof(GenTreeRetExpr) <= TREE_NODE_SZ_LARGE); // *** large node
+    static_assert_no_msg(sizeof(GenTreeStmt) <= TREE_NODE_SZ_LARGE);    // *** large node
+    static_assert_no_msg(sizeof(GenTreeClsVar) <= TREE_NODE_SZ_SMALL);
+    static_assert_no_msg(sizeof(GenTreeArgPlace) <= TREE_NODE_SZ_SMALL);
+    static_assert_no_msg(sizeof(GenTreeLabel) <= TREE_NODE_SZ_SMALL);
+    static_assert_no_msg(sizeof(GenTreePhiArg) <= TREE_NODE_SZ_SMALL);
+    static_assert_no_msg(sizeof(GenTreeAllocObj) <= TREE_NODE_SZ_LARGE); // *** large node
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
+    static_assert_no_msg(sizeof(GenTreePutArgStk) <= TREE_NODE_SZ_SMALL);
+#else  // FEATURE_UNIX_AMD64_STRUCT_PASSING
+    static_assert_no_msg(sizeof(GenTreePutArgStk) <= TREE_NODE_SZ_LARGE);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+#ifdef FEATURE_SIMD
+    static_assert_no_msg(sizeof(GenTreeSIMD) <= TREE_NODE_SZ_SMALL);
+#endif // FEATURE_SIMD
+}
+
+size_t GenTree::GetNodeSize() const
+{
+    return GenTree::s_gtNodeSizes[gtOper];
+}
+
+#ifdef DEBUG
+bool GenTree::IsNodeProperlySized() const
+{
+    size_t size;
+
+    if (gtDebugFlags & GTF_DEBUG_NODE_SMALL)
+    {
+        size = TREE_NODE_SZ_SMALL;
+    }
+    else
+    {
+        assert(gtDebugFlags & GTF_DEBUG_NODE_LARGE);
+        size = TREE_NODE_SZ_LARGE;
+    }
+
+    return GenTree::s_gtNodeSizes[gtOper] <= size;
+}
+#endif
+
+#else // SMALL_TREE_NODES
+
+#ifdef DEBUG
+bool GenTree::IsNodeProperlySized() const
+{
+    return true;
+}
+#endif
+
+#endif // SMALL_TREE_NODES
+
+/*****************************************************************************/
+
+// make sure these get instantiated, because it's not in a header file
+// (emulating the c++ 'export' keyword here)
+// VC appears to be somewhat unpredictable about whether they end up in the .obj file without this
+template Compiler::fgWalkResult Compiler::fgWalkTreePostRec<true>(GenTreePtr* pTree, fgWalkData* fgWalkData);
+template Compiler::fgWalkResult Compiler::fgWalkTreePostRec<false>(GenTreePtr* pTree, fgWalkData* fgWalkData);
+template Compiler::fgWalkResult Compiler::fgWalkTreePreRec<true>(GenTreePtr* pTree, fgWalkData* fgWalkData);
+template Compiler::fgWalkResult Compiler::fgWalkTreePreRec<false>(GenTreePtr* pTree, fgWalkData* fgWalkData);
+template Compiler::fgWalkResult Compiler::fgWalkTreeRec<true, true>(GenTreePtr* pTree, fgWalkData* fgWalkData);
+template Compiler::fgWalkResult Compiler::fgWalkTreeRec<false, false>(GenTreePtr* pTree, fgWalkData* fgWalkData);
+template Compiler::fgWalkResult Compiler::fgWalkTreeRec<true, false>(GenTreePtr* pTree, fgWalkData* fgWalkData);
+template Compiler::fgWalkResult Compiler::fgWalkTreeRec<false, true>(GenTreePtr* pTree, fgWalkData* fgWalkData);
+
+//******************************************************************************
+// fgWalkTreePreRec - Helper function for fgWalkTreePre.
+//                    walk tree in pre order, executing callback on every node.
+//                    Template parameter 'computeStack' specifies whether to maintain
+//                    a stack of ancestor nodes which can be viewed in the callback.
+//
+template <bool computeStack>
+// static
+Compiler::fgWalkResult Compiler::fgWalkTreePreRec(GenTreePtr* pTree, fgWalkData* fgWalkData)
+{
+    fgWalkResult result        = WALK_CONTINUE;
+    GenTreePtr   currentParent = fgWalkData->parent;
+
+    genTreeOps oper;
+    unsigned   kind;
+
+    do
+    {
+        GenTreePtr tree = *pTree;
+        assert(tree);
+        assert(tree->gtOper != GT_STMT);
+        GenTreeArgList* args; // For call node arg lists.
+
+        if (computeStack)
+        {
+            fgWalkData->parentStack->Push(tree);
+        }
+
+        /* Visit this node */
+
+        // if we are not in the mode where we only do the callback for local var nodes,
+        // visit the node unconditionally.  Otherwise we will visit it under leaf handling.
+        if (!fgWalkData->wtprLclsOnly)
+        {
+            assert(tree == *pTree);
+            result = fgWalkData->wtprVisitorFn(pTree, fgWalkData);
+            if (result != WALK_CONTINUE)
+            {
+                break;
+            }
+        }
+
+        /* Figure out what kind of a node we have */
+
+        oper = tree->OperGet();
+        kind = tree->OperKind();
+
+        /* Is this a constant or leaf node? */
+
+        if (kind & (GTK_CONST | GTK_LEAF))
+        {
+            if (fgWalkData->wtprLclsOnly && (oper == GT_LCL_VAR || oper == GT_LCL_FLD))
+            {
+                result = fgWalkData->wtprVisitorFn(pTree, fgWalkData);
+            }
+            break;
+        }
+        else if (fgWalkData->wtprLclsOnly && GenTree::OperIsLocalStore(oper))
+        {
+            result = fgWalkData->wtprVisitorFn(pTree, fgWalkData);
+            if (result != WALK_CONTINUE)
+            {
+                break;
+            }
+        }
+
+        fgWalkData->parent = tree;
+
+        /* Is it a 'simple' unary/binary operator? */
+
+        if (kind & GTK_SMPOP)
+        {
+            if (tree->gtGetOp2())
+            {
+                if (tree->gtOp.gtOp1 != nullptr)
+                {
+                    result = fgWalkTreePreRec<computeStack>(&tree->gtOp.gtOp1, fgWalkData);
+                    if (result == WALK_ABORT)
+                    {
+                        return result;
+                    }
+                }
+                else
+                {
+                    assert(tree->NullOp1Legal());
+                }
+
+                pTree = &tree->gtOp.gtOp2;
+                continue;
+            }
+            else
+            {
+                pTree = &tree->gtOp.gtOp1;
+                if (*pTree)
+                {
+                    continue;
+                }
+
+                break;
+            }
+        }
+
+        /* See what kind of a special operator we have here */
+
+        switch (oper)
+        {
+            case GT_FIELD:
+                pTree = &tree->gtField.gtFldObj;
+                break;
+
+            case GT_CALL:
+
+                assert(tree->gtFlags & GTF_CALL);
+
+                /* Is this a call to unmanaged code ? */
+                if (fgWalkData->wtprLclsOnly && (tree->gtFlags & GTF_CALL_UNMANAGED))
+                {
+                    result = fgWalkData->wtprVisitorFn(pTree, fgWalkData);
+                    if (result == WALK_ABORT)
+                    {
+                        return result;
+                    }
+                }
+
+                if (tree->gtCall.gtCallObjp)
+                {
+                    result = fgWalkTreePreRec<computeStack>(&tree->gtCall.gtCallObjp, fgWalkData);
+                    if (result == WALK_ABORT)
+                    {
+                        return result;
+                    }
+                }
+
+                for (args = tree->gtCall.gtCallArgs; args; args = args->Rest())
+                {
+                    result = fgWalkTreePreRec<computeStack>(args->pCurrent(), fgWalkData);
+                    if (result == WALK_ABORT)
+                    {
+                        return result;
+                    }
+                }
+
+                for (args = tree->gtCall.gtCallLateArgs; args; args = args->Rest())
+                {
+                    result = fgWalkTreePreRec<computeStack>(args->pCurrent(), fgWalkData);
+                    if (result == WALK_ABORT)
+                    {
+                        return result;
+                    }
+                }
+
+                if (tree->gtCall.gtControlExpr)
+                {
+                    result = fgWalkTreePreRec<computeStack>(&tree->gtCall.gtControlExpr, fgWalkData);
+                    if (result == WALK_ABORT)
+                    {
+                        return result;
+                    }
+                }
+
+                if (tree->gtCall.gtCallType == CT_INDIRECT)
+                {
+                    if (tree->gtCall.gtCallCookie)
+                    {
+                        result = fgWalkTreePreRec<computeStack>(&tree->gtCall.gtCallCookie, fgWalkData);
+                        if (result == WALK_ABORT)
+                        {
+                            return result;
+                        }
+                    }
+                    pTree = &tree->gtCall.gtCallAddr;
+                }
+                else
+                {
+                    pTree = nullptr;
+                }
+
+                break;
+
+            case GT_ARR_ELEM:
+
+                result = fgWalkTreePreRec<computeStack>(&tree->gtArrElem.gtArrObj, fgWalkData);
+                if (result == WALK_ABORT)
+                {
+                    return result;
+                }
+
+                unsigned dim;
+                for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+                {
+                    result = fgWalkTreePreRec<computeStack>(&tree->gtArrElem.gtArrInds[dim], fgWalkData);
+                    if (result == WALK_ABORT)
+                    {
+                        return result;
+                    }
+                }
+                pTree = nullptr;
+                break;
+
+            case GT_ARR_OFFSET:
+                result = fgWalkTreePreRec<computeStack>(&tree->gtArrOffs.gtOffset, fgWalkData);
+                if (result == WALK_ABORT)
+                {
+                    return result;
+                }
+                result = fgWalkTreePreRec<computeStack>(&tree->gtArrOffs.gtIndex, fgWalkData);
+                if (result == WALK_ABORT)
+                {
+                    return result;
+                }
+                result = fgWalkTreePreRec<computeStack>(&tree->gtArrOffs.gtArrObj, fgWalkData);
+                if (result == WALK_ABORT)
+                {
+                    return result;
+                }
+                pTree = nullptr;
+                break;
+
+            case GT_CMPXCHG:
+                result = fgWalkTreePreRec<computeStack>(&tree->gtCmpXchg.gtOpLocation, fgWalkData);
+                if (result == WALK_ABORT)
+                {
+                    return result;
+                }
+                result = fgWalkTreePreRec<computeStack>(&tree->gtCmpXchg.gtOpValue, fgWalkData);
+                if (result == WALK_ABORT)
+                {
+                    return result;
+                }
+                result = fgWalkTreePreRec<computeStack>(&tree->gtCmpXchg.gtOpComparand, fgWalkData);
+                if (result == WALK_ABORT)
+                {
+                    return result;
+                }
+                pTree = nullptr;
+                break;
+
+            case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+            case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+                result = fgWalkTreePreRec<computeStack>(&tree->gtBoundsChk.gtArrLen, fgWalkData);
+                if (result == WALK_ABORT)
+                {
+                    return result;
+                }
+                result = fgWalkTreePreRec<computeStack>(&tree->gtBoundsChk.gtIndex, fgWalkData);
+                if (result == WALK_ABORT)
+                {
+                    return result;
+                }
+                pTree = nullptr;
+                break;
+
+            case GT_STORE_DYN_BLK:
+                result = fgWalkTreePreRec<computeStack>(&tree->gtDynBlk.Data(), fgWalkData);
+                if (result == WALK_ABORT)
+                {
+                    return result;
+                }
+                __fallthrough;
+
+            case GT_DYN_BLK:
+                result = fgWalkTreePreRec<computeStack>(&tree->gtDynBlk.Addr(), fgWalkData);
+                if (result == WALK_ABORT)
+                {
+                    return result;
+                }
+                result = fgWalkTreePreRec<computeStack>(&tree->gtDynBlk.gtDynamicSize, fgWalkData);
+                if (result == WALK_ABORT)
+                {
+                    return result;
+                }
+                pTree = nullptr;
+                break;
+
+            default:
+#ifdef DEBUG
+                fgWalkData->compiler->gtDispTree(tree);
+#endif
+                assert(!"unexpected operator");
+        }
+    } while (pTree != nullptr && *pTree != nullptr);
+
+    if (computeStack)
+    {
+        fgWalkData->parentStack->Pop();
+    }
+
+    if (result != WALK_ABORT)
+    {
+        //
+        // Restore fgWalkData->parent
+        //
+        fgWalkData->parent = currentParent;
+    }
+    return result;
+}
+
+/*****************************************************************************
+ *
+ *  Walk all basic blocks and call the given function pointer for all tree
+ *  nodes contained therein.
+ */
+
+void Compiler::fgWalkAllTreesPre(fgWalkPreFn* visitor, void* pCallBackData)
+{
+    BasicBlock* block;
+
+    for (block = fgFirstBB; block; block = block->bbNext)
+    {
+        GenTreePtr tree;
+
+        for (tree = block->bbTreeList; tree; tree = tree->gtNext)
+        {
+            assert(tree->gtOper == GT_STMT);
+
+            fgWalkTreePre(&tree->gtStmt.gtStmtExpr, visitor, pCallBackData);
+        }
+    }
+}
+
+//******************************************************************************
+// fgWalkTreePostRec - Helper function for fgWalkTreePost.
+//                     Walk tree in post order, executing callback on every node
+//                     template parameter 'computeStack' specifies whether to maintain
+//                     a stack of ancestor nodes which can be viewed in the callback.
+//
+template <bool computeStack>
+// static
+Compiler::fgWalkResult Compiler::fgWalkTreePostRec(GenTreePtr* pTree, fgWalkData* fgWalkData)
+{
+    fgWalkResult result;
+    GenTreePtr   currentParent = fgWalkData->parent;
+
+    genTreeOps oper;
+    unsigned   kind;
+
+    GenTree* tree = *pTree;
+    assert(tree);
+    assert(tree->gtOper != GT_STMT);
+    GenTreeArgList* args;
+
+    /* Figure out what kind of a node we have */
+
+    oper = tree->OperGet();
+    kind = tree->OperKind();
+
+    if (computeStack)
+    {
+        fgWalkData->parentStack->Push(tree);
+    }
+
+    /* Is this a constant or leaf node? */
+
+    if (kind & (GTK_CONST | GTK_LEAF))
+    {
+        goto DONE;
+    }
+
+    /* Is it a 'simple' unary/binary operator? */
+
+    fgWalkData->parent = tree;
+
+    /* See what kind of a special operator we have here */
+
+    switch (oper)
+    {
+        case GT_FIELD:
+            if (tree->gtField.gtFldObj)
+            {
+                result = fgWalkTreePostRec<computeStack>(&tree->gtField.gtFldObj, fgWalkData);
+                if (result == WALK_ABORT)
+                {
+                    return result;
+                }
+            }
+
+            break;
+
+        case GT_CALL:
+
+            assert(tree->gtFlags & GTF_CALL);
+
+            if (tree->gtCall.gtCallObjp)
+            {
+                result = fgWalkTreePostRec<computeStack>(&tree->gtCall.gtCallObjp, fgWalkData);
+                if (result == WALK_ABORT)
+                {
+                    return result;
+                }
+            }
+
+            for (args = tree->gtCall.gtCallArgs; args; args = args->Rest())
+            {
+                result = fgWalkTreePostRec<computeStack>(args->pCurrent(), fgWalkData);
+                if (result == WALK_ABORT)
+                {
+                    return result;
+                }
+            }
+
+            for (args = tree->gtCall.gtCallLateArgs; args; args = args->Rest())
+            {
+                result = fgWalkTreePostRec<computeStack>(args->pCurrent(), fgWalkData);
+                if (result == WALK_ABORT)
+                {
+                    return result;
+                }
+            }
+            if (tree->gtCall.gtCallType == CT_INDIRECT)
+            {
+                if (tree->gtCall.gtCallCookie)
+                {
+                    result = fgWalkTreePostRec<computeStack>(&tree->gtCall.gtCallCookie, fgWalkData);
+                    if (result == WALK_ABORT)
+                    {
+                        return result;
+                    }
+                }
+                result = fgWalkTreePostRec<computeStack>(&tree->gtCall.gtCallAddr, fgWalkData);
+                if (result == WALK_ABORT)
+                {
+                    return result;
+                }
+            }
+
+            if (tree->gtCall.gtControlExpr != nullptr)
+            {
+                result = fgWalkTreePostRec<computeStack>(&tree->gtCall.gtControlExpr, fgWalkData);
+                if (result == WALK_ABORT)
+                {
+                    return result;
+                }
+            }
+            break;
+
+        case GT_ARR_ELEM:
+
+            result = fgWalkTreePostRec<computeStack>(&tree->gtArrElem.gtArrObj, fgWalkData);
+            if (result == WALK_ABORT)
+            {
+                return result;
+            }
+
+            unsigned dim;
+            for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+            {
+                result = fgWalkTreePostRec<computeStack>(&tree->gtArrElem.gtArrInds[dim], fgWalkData);
+                if (result == WALK_ABORT)
+                {
+                    return result;
+                }
+            }
+            break;
+
+        case GT_ARR_OFFSET:
+            result = fgWalkTreePostRec<computeStack>(&tree->gtArrOffs.gtOffset, fgWalkData);
+            if (result == WALK_ABORT)
+            {
+                return result;
+            }
+            result = fgWalkTreePostRec<computeStack>(&tree->gtArrOffs.gtIndex, fgWalkData);
+            if (result == WALK_ABORT)
+            {
+                return result;
+            }
+            result = fgWalkTreePostRec<computeStack>(&tree->gtArrOffs.gtArrObj, fgWalkData);
+            if (result == WALK_ABORT)
+            {
+                return result;
+            }
+            break;
+
+        case GT_CMPXCHG:
+            result = fgWalkTreePostRec<computeStack>(&tree->gtCmpXchg.gtOpComparand, fgWalkData);
+            if (result == WALK_ABORT)
+            {
+                return result;
+            }
+            result = fgWalkTreePostRec<computeStack>(&tree->gtCmpXchg.gtOpValue, fgWalkData);
+            if (result == WALK_ABORT)
+            {
+                return result;
+            }
+            result = fgWalkTreePostRec<computeStack>(&tree->gtCmpXchg.gtOpLocation, fgWalkData);
+            if (result == WALK_ABORT)
+            {
+                return result;
+            }
+            break;
+
+        case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+        case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+            result = fgWalkTreePostRec<computeStack>(&tree->gtBoundsChk.gtArrLen, fgWalkData);
+            if (result == WALK_ABORT)
+            {
+                return result;
+            }
+            result = fgWalkTreePostRec<computeStack>(&tree->gtBoundsChk.gtIndex, fgWalkData);
+            if (result == WALK_ABORT)
+            {
+                return result;
+            }
+            break;
+
+        case GT_STORE_DYN_BLK:
+            result = fgWalkTreePostRec<computeStack>(&tree->gtDynBlk.Data(), fgWalkData);
+            if (result == WALK_ABORT)
+            {
+                return result;
+            }
+            __fallthrough;
+
+        case GT_DYN_BLK:
+            result = fgWalkTreePostRec<computeStack>(&tree->gtDynBlk.Addr(), fgWalkData);
+            if (result == WALK_ABORT)
+            {
+                return result;
+            }
+            result = fgWalkTreePostRec<computeStack>(&tree->gtDynBlk.gtDynamicSize, fgWalkData);
+            if (result == WALK_ABORT)
+            {
+                return result;
+            }
+            break;
+
+        case GT_PHI:
+        {
+            GenTreeUnOp* phi = tree->AsUnOp();
+            if (phi->gtOp1 != nullptr)
+            {
+                for (GenTreeArgList* args = phi->gtOp1->AsArgList(); args != nullptr; args = args->Rest())
+                {
+                    result = fgWalkTreePostRec<computeStack>(&args->gtOp1, fgWalkData);
+                    if (result == WALK_ABORT)
+                    {
+                        return result;
+                    }
+                }
+            }
+        }
+        break;
+
+        case GT_LIST:
+        {
+            GenTreeArgList* list = tree->AsArgList();
+            if (list->IsAggregate())
+            {
+                for (; list != nullptr; list = list->Rest())
+                {
+                    result = fgWalkTreePostRec<computeStack>(&list->gtOp1, fgWalkData);
+                    if (result == WALK_ABORT)
+                    {
+                        return result;
+                    }
+                }
+                break;
+            }
+
+            // GT_LIST nodes that do not represent aggregate arguments intentionally fall through to the
+            // default node processing below.
+            __fallthrough;
+        }
+
+        default:
+            if (kind & GTK_SMPOP)
+            {
+                GenTree** op1Slot = &tree->gtOp.gtOp1;
+
+                GenTree** op2Slot;
+                if (tree->OperIsBinary())
+                {
+                    if ((tree->gtFlags & GTF_REVERSE_OPS) == 0)
+                    {
+                        op2Slot = &tree->gtOp.gtOp2;
+                    }
+                    else
+                    {
+                        op2Slot = op1Slot;
+                        op1Slot = &tree->gtOp.gtOp2;
+                    }
+                }
+                else
+                {
+                    op2Slot = nullptr;
+                }
+
+                if (*op1Slot != nullptr)
+                {
+                    result = fgWalkTreePostRec<computeStack>(op1Slot, fgWalkData);
+                    if (result == WALK_ABORT)
+                    {
+                        return result;
+                    }
+                }
+
+                if (op2Slot != nullptr && *op2Slot != nullptr)
+                {
+                    result = fgWalkTreePostRec<computeStack>(op2Slot, fgWalkData);
+                    if (result == WALK_ABORT)
+                    {
+                        return result;
+                    }
+                }
+            }
+#ifdef DEBUG
+            else
+            {
+                fgWalkData->compiler->gtDispTree(tree);
+                assert(!"unexpected operator");
+            }
+#endif
+            break;
+    }
+
+DONE:
+
+    fgWalkData->parent = currentParent;
+
+    /* Finally, visit the current node */
+    result = fgWalkData->wtpoVisitorFn(pTree, fgWalkData);
+
+    if (computeStack)
+    {
+        fgWalkData->parentStack->Pop();
+    }
+
+    return result;
+}
+
+// ****************************************************************************
+// walk tree doing callbacks in both pre- and post- order (both optional)
+
+template <bool doPreOrder, bool doPostOrder>
+// static
+Compiler::fgWalkResult Compiler::fgWalkTreeRec(GenTreePtr* pTree, fgWalkData* fgWalkData)
+{
+    fgWalkResult result = WALK_CONTINUE;
+
+    genTreeOps oper;
+    unsigned   kind;
+
+    GenTree* tree = *pTree;
+    assert(tree);
+    assert(tree->gtOper != GT_STMT);
+    GenTreeArgList* args;
+
+    /* Figure out what kind of a node we have */
+
+    oper = tree->OperGet();
+    kind = tree->OperKind();
+
+    fgWalkData->parentStack->Push(tree);
+
+    if (doPreOrder)
+    {
+        result = fgWalkData->wtprVisitorFn(pTree, fgWalkData);
+        if (result == WALK_ABORT)
+        {
+            return result;
+        }
+        else
+        {
+            tree = *pTree;
+            oper = tree->OperGet();
+            kind = tree->OperKind();
+        }
+    }
+
+    // If we're skipping subtrees, we're done.
+    if (result == WALK_SKIP_SUBTREES)
+    {
+        goto DONE;
+    }
+
+    /* Is this a constant or leaf node? */
+
+    if ((kind & (GTK_CONST | GTK_LEAF)) != 0)
+    {
+        goto DONE;
+    }
+
+    /* Is it a 'simple' unary/binary operator? */
+
+    if (kind & GTK_SMPOP)
+    {
+        if (tree->gtOp.gtOp1)
+        {
+            result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtOp.gtOp1, fgWalkData);
+            if (result == WALK_ABORT)
+            {
+                return result;
+            }
+        }
+
+        if (tree->gtGetOp2())
+        {
+            result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtOp.gtOp2, fgWalkData);
+            if (result == WALK_ABORT)
+            {
+                return result;
+            }
+        }
+
+        goto DONE;
+    }
+
+    /* See what kind of a special operator we have here */
+
+    switch (oper)
+    {
+        case GT_FIELD:
+            if (tree->gtField.gtFldObj)
+            {
+                result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtField.gtFldObj, fgWalkData);
+                if (result == WALK_ABORT)
+                {
+                    return result;
+                }
+            }
+
+            break;
+
+        case GT_CALL:
+
+            assert(tree->gtFlags & GTF_CALL);
+
+            if (tree->gtCall.gtCallObjp)
+            {
+                result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtCall.gtCallObjp, fgWalkData);
+                if (result == WALK_ABORT)
+                {
+                    return result;
+                }
+            }
+
+            for (args = tree->gtCall.gtCallArgs; args; args = args->Rest())
+            {
+                result = fgWalkTreeRec<doPreOrder, doPostOrder>(args->pCurrent(), fgWalkData);
+                if (result == WALK_ABORT)
+                {
+                    return result;
+                }
+            }
+
+            for (args = tree->gtCall.gtCallLateArgs; args; args = args->Rest())
+            {
+                result = fgWalkTreeRec<doPreOrder, doPostOrder>(args->pCurrent(), fgWalkData);
+                if (result == WALK_ABORT)
+                {
+                    return result;
+                }
+            }
+            if (tree->gtCall.gtCallType == CT_INDIRECT)
+            {
+                if (tree->gtCall.gtCallCookie)
+                {
+                    result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtCall.gtCallCookie, fgWalkData);
+                    if (result == WALK_ABORT)
+                    {
+                        return result;
+                    }
+                }
+                result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtCall.gtCallAddr, fgWalkData);
+                if (result == WALK_ABORT)
+                {
+                    return result;
+                }
+            }
+
+            if (tree->gtCall.gtControlExpr)
+            {
+                result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtCall.gtControlExpr, fgWalkData);
+                if (result == WALK_ABORT)
+                {
+                    return result;
+                }
+            }
+
+            break;
+
+        case GT_ARR_ELEM:
+
+            result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtArrElem.gtArrObj, fgWalkData);
+            if (result == WALK_ABORT)
+            {
+                return result;
+            }
+
+            unsigned dim;
+            for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+            {
+                result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtArrElem.gtArrInds[dim], fgWalkData);
+                if (result == WALK_ABORT)
+                {
+                    return result;
+                }
+            }
+            break;
+
+        case GT_ARR_OFFSET:
+            result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtArrOffs.gtOffset, fgWalkData);
+            if (result == WALK_ABORT)
+            {
+                return result;
+            }
+            result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtArrOffs.gtIndex, fgWalkData);
+            if (result == WALK_ABORT)
+            {
+                return result;
+            }
+            result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtArrOffs.gtArrObj, fgWalkData);
+            if (result == WALK_ABORT)
+            {
+                return result;
+            }
+            break;
+
+        case GT_CMPXCHG:
+            result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtCmpXchg.gtOpComparand, fgWalkData);
+            if (result == WALK_ABORT)
+            {
+                return result;
+            }
+            result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtCmpXchg.gtOpValue, fgWalkData);
+            if (result == WALK_ABORT)
+            {
+                return result;
+            }
+            result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtCmpXchg.gtOpLocation, fgWalkData);
+            if (result == WALK_ABORT)
+            {
+                return result;
+            }
+            break;
+
+        case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+        case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+            result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtBoundsChk.gtArrLen, fgWalkData);
+            if (result == WALK_ABORT)
+            {
+                return result;
+            }
+            result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtBoundsChk.gtIndex, fgWalkData);
+            if (result == WALK_ABORT)
+            {
+                return result;
+            }
+            break;
+
+        case GT_STORE_DYN_BLK:
+            result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtDynBlk.Data(), fgWalkData);
+            if (result == WALK_ABORT)
+            {
+                return result;
+            }
+            __fallthrough;
+
+        case GT_DYN_BLK:
+            result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtDynBlk.Addr(), fgWalkData);
+            if (result == WALK_ABORT)
+            {
+                return result;
+            }
+            result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtDynBlk.gtDynamicSize, fgWalkData);
+            if (result == WALK_ABORT)
+            {
+                return result;
+            }
+            break;
+
+        default:
+#ifdef DEBUG
+            fgWalkData->compiler->gtDispTree(tree);
+#endif
+            assert(!"unexpected operator");
+    }
+
+DONE:
+
+    /* Finally, visit the current node */
+    if (doPostOrder)
+    {
+        result = fgWalkData->wtpoVisitorFn(pTree, fgWalkData);
+    }
+
+    fgWalkData->parentStack->Pop();
+
+    return result;
+}
+
+/*****************************************************************************
+ *
+ *  Call the given function pointer for all nodes in the tree. The 'visitor'
+ *  fn should return one of the following values:
+ *
+ *  WALK_ABORT          stop walking and return immediately
+ *  WALK_CONTINUE       continue walking
+ *  WALK_SKIP_SUBTREES  don't walk any subtrees of the node just visited
+ */
+
+Compiler::fgWalkResult Compiler::fgWalkTree(GenTreePtr*  pTree,
+                                            fgWalkPreFn* preVisitor,
+                                            fgWalkPreFn* postVisitor,
+                                            void*        callBackData)
+
+{
+    fgWalkData walkData;
+
+    walkData.compiler      = this;
+    walkData.wtprVisitorFn = preVisitor;
+    walkData.wtpoVisitorFn = postVisitor;
+    walkData.pCallbackData = callBackData;
+    walkData.parent        = nullptr;
+    walkData.wtprLclsOnly  = false;
+#ifdef DEBUG
+    walkData.printModified = false;
+#endif
+    ArrayStack<GenTree*> parentStack(this);
+    walkData.parentStack = &parentStack;
+
+    fgWalkResult result;
+
+    assert(preVisitor || postVisitor);
+
+    if (preVisitor && postVisitor)
+    {
+        result = fgWalkTreeRec<true, true>(pTree, &walkData);
+    }
+    else if (preVisitor)
+    {
+        result = fgWalkTreeRec<true, false>(pTree, &walkData);
+    }
+    else
+    {
+        result = fgWalkTreeRec<false, true>(pTree, &walkData);
+    }
+
+#ifdef DEBUG
+    if (verbose && walkData.printModified)
+    {
+        gtDispTree(*pTree);
+    }
+#endif
+
+    return result;
+}
+
+// ------------------------------------------------------------------------------------------
+// gtClearReg: Sets the register to the "no register assignment" value, depending upon
+// the type of the node, and whether it fits any of the special cases for register pairs
+// or multi-reg call nodes.
+//
+// Arguments:
+//     compiler  -  compiler instance
+//
+// Return Value:
+//     None
+void GenTree::gtClearReg(Compiler* compiler)
+{
+#if CPU_LONG_USES_REGPAIR
+    if (isRegPairType(TypeGet()) ||
+        // (IsLocal() && isRegPairType(compiler->lvaTable[gtLclVarCommon.gtLclNum].TypeGet())) ||
+        (OperGet() == GT_MUL && (gtFlags & GTF_MUL_64RSLT)))
+    {
+        gtRegPair = REG_PAIR_NONE;
+    }
+    else
+#endif // CPU_LONG_USES_REGPAIR
+    {
+        gtRegNum = REG_NA;
+    }
+
+    // Also clear multi-reg state if this is a call node
+    if (IsCall())
+    {
+        this->AsCall()->ClearOtherRegs();
+    }
+    else if (IsCopyOrReload())
+    {
+        this->AsCopyOrReload()->ClearOtherRegs();
+    }
+}
+
+//-----------------------------------------------------------
+// CopyReg: Copy the _gtRegNum/_gtRegPair/gtRegTag fields.
+//
+// Arguments:
+//     from   -  GenTree node from which to copy
+//
+// Return Value:
+//     None
+void GenTree::CopyReg(GenTreePtr from)
+{
+    // To do the copy, use _gtRegPair, which must be bigger than _gtRegNum. Note that the values
+    // might be undefined (so gtRegTag == GT_REGTAG_NONE).
+    _gtRegPair = from->_gtRegPair;
+    C_ASSERT(sizeof(_gtRegPair) >= sizeof(_gtRegNum));
+    INDEBUG(gtRegTag = from->gtRegTag;)
+
+    // Also copy multi-reg state if this is a call node
+    if (IsCall())
+    {
+        assert(from->IsCall());
+        this->AsCall()->CopyOtherRegs(from->AsCall());
+    }
+    else if (IsCopyOrReload())
+    {
+        this->AsCopyOrReload()->CopyOtherRegs(from->AsCopyOrReload());
+    }
+}
+
+//------------------------------------------------------------------
+// gtHasReg: Whether node beeen assigned a register by LSRA
+//
+// Arguments:
+//    None
+//
+// Return Value:
+//    Returns true if the node was assigned a register.
+//
+//    In case of multi-reg call nodes, it is considered
+//    having a reg if regs are allocated for all its
+//    return values.
+//
+//    In case of GT_COPY or GT_RELOAD of a multi-reg call,
+//    GT_COPY/GT_RELOAD is considered having a reg if it
+//    has a reg assigned to any of its positions.
+//
+// Assumption:
+//    In order for this to work properly, gtClearReg must be called
+//    prior to setting the register value.
+//
+bool GenTree::gtHasReg() const
+{
+    bool hasReg;
+
+#if CPU_LONG_USES_REGPAIR
+    if (isRegPairType(TypeGet()))
+    {
+        assert(_gtRegNum != REG_NA);
+        INDEBUG(assert(gtRegTag == GT_REGTAG_REGPAIR));
+        hasReg = (gtRegPair != REG_PAIR_NONE);
+    }
+    else
+#endif
+    {
+        assert(_gtRegNum != REG_PAIR_NONE);
+        INDEBUG(assert(gtRegTag == GT_REGTAG_REG));
+
+        if (IsMultiRegCall())
+        {
+            // Has to cast away const-ness because GetReturnTypeDesc() is a non-const method
+            GenTree*     tree     = const_cast<GenTree*>(this);
+            GenTreeCall* call     = tree->AsCall();
+            unsigned     regCount = call->GetReturnTypeDesc()->GetReturnRegCount();
+            hasReg                = false;
+
+            // A Multi-reg call node is said to have regs, if it has
+            // reg assigned to each of its result registers.
+            for (unsigned i = 0; i < regCount; ++i)
+            {
+                hasReg = (call->GetRegNumByIdx(i) != REG_NA);
+                if (!hasReg)
+                {
+                    break;
+                }
+            }
+        }
+        else if (IsCopyOrReloadOfMultiRegCall())
+        {
+            GenTree*             tree         = const_cast<GenTree*>(this);
+            GenTreeCopyOrReload* copyOrReload = tree->AsCopyOrReload();
+            GenTreeCall*         call         = copyOrReload->gtGetOp1()->AsCall();
+            unsigned             regCount     = call->GetReturnTypeDesc()->GetReturnRegCount();
+            hasReg                            = false;
+
+            // A Multi-reg copy or reload node is said to have regs,
+            // if it has valid regs in any of the positions.
+            for (unsigned i = 0; i < regCount; ++i)
+            {
+                hasReg = (copyOrReload->GetRegNumByIdx(i) != REG_NA);
+                if (hasReg)
+                {
+                    break;
+                }
+            }
+        }
+        else
+        {
+            hasReg = (gtRegNum != REG_NA);
+        }
+    }
+
+    return hasReg;
+}
+
+//---------------------------------------------------------------
+// gtGetRegMask: Get the reg mask of the node.
+//
+// Arguments:
+//    None
+//
+// Return Value:
+//    Reg Mask of GenTree node.
+//
+regMaskTP GenTree::gtGetRegMask() const
+{
+    regMaskTP resultMask;
+
+#if CPU_LONG_USES_REGPAIR
+    if (isRegPairType(TypeGet()))
+    {
+        resultMask = genRegPairMask(gtRegPair);
+    }
+    else
+#endif
+    {
+        if (IsMultiRegCall())
+        {
+            // temporarily cast away const-ness as AsCall() method is not declared const
+            resultMask    = genRegMask(gtRegNum);
+            GenTree* temp = const_cast<GenTree*>(this);
+            resultMask |= temp->AsCall()->GetOtherRegMask();
+        }
+        else if (IsCopyOrReloadOfMultiRegCall())
+        {
+            // A multi-reg copy or reload, will have valid regs for only those
+            // positions that need to be copied or reloaded.  Hence we need
+            // to consider only those registers for computing reg mask.
+
+            GenTree*             tree         = const_cast<GenTree*>(this);
+            GenTreeCopyOrReload* copyOrReload = tree->AsCopyOrReload();
+            GenTreeCall*         call         = copyOrReload->gtGetOp1()->AsCall();
+            unsigned             regCount     = call->GetReturnTypeDesc()->GetReturnRegCount();
+
+            resultMask = RBM_NONE;
+            for (unsigned i = 0; i < regCount; ++i)
+            {
+                regNumber reg = copyOrReload->GetRegNumByIdx(i);
+                if (reg != REG_NA)
+                {
+                    resultMask |= genRegMask(reg);
+                }
+            }
+        }
+        else
+        {
+            resultMask = genRegMask(gtRegNum);
+        }
+    }
+
+    return resultMask;
+}
+
+//---------------------------------------------------------------
+// GetOtherRegMask: Get the reg mask of gtOtherRegs of call node
+//
+// Arguments:
+//    None
+//
+// Return Value:
+//    Reg mask of gtOtherRegs of call node.
+//
+regMaskTP GenTreeCall::GetOtherRegMask() const
+{
+    regMaskTP resultMask = RBM_NONE;
+
+#if FEATURE_MULTIREG_RET
+    for (unsigned i = 0; i < MAX_RET_REG_COUNT - 1; ++i)
+    {
+        if (gtOtherRegs[i] != REG_NA)
+        {
+            resultMask |= genRegMask(gtOtherRegs[i]);
+            continue;
+        }
+        break;
+    }
+#endif
+
+    return resultMask;
+}
+
+//-------------------------------------------------------------------------
+// IsPure:
+//    Returns true if this call is pure. For now, this uses the same
+//    definition of "pure" that is that used by HelperCallProperties: a
+//    pure call does not read or write any aliased (e.g. heap) memory or
+//    have other global side effects (e.g. class constructors, finalizers),
+//    but is allowed to throw an exception.
+//
+//    NOTE: this call currently only returns true if the call target is a
+//    helper method that is known to be pure. No other analysis is
+//    performed.
+//
+// Arguments:
+//    Copiler - the compiler context.
+//
+// Returns:
+//    True if the call is pure; false otherwise.
+//
+bool GenTreeCall::IsPure(Compiler* compiler) const
+{
+    return (gtCallType == CT_HELPER) &&
+           compiler->s_helperCallProperties.IsPure(compiler->eeGetHelperNum(gtCallMethHnd));
+}
+
+#ifndef LEGACY_BACKEND
+
+//-------------------------------------------------------------------------
+// HasNonStandardAddedArgs: Return true if the method has non-standard args added to the call
+// argument list during argument morphing (fgMorphArgs), e.g., passed in R10 or R11 on AMD64.
+// See also GetNonStandardAddedArgCount().
+//
+// Arguments:
+//     compiler - the compiler instance
+//
+// Return Value:
+//      true if there are any such args, false otherwise.
+//
+bool GenTreeCall::HasNonStandardAddedArgs(Compiler* compiler) const
+{
+    return GetNonStandardAddedArgCount(compiler) != 0;
+}
+
+//-------------------------------------------------------------------------
+// GetNonStandardAddedArgCount: Get the count of non-standard arguments that have been added
+// during call argument morphing (fgMorphArgs). Do not count non-standard args that are already
+// counted in the argument list prior to morphing.
+//
+// This function is used to help map the caller and callee arguments during tail call setup.
+//
+// Arguments:
+//     compiler - the compiler instance
+//
+// Return Value:
+//      The count of args, as described.
+//
+// Notes:
+//      It would be more general to have fgMorphArgs set a bit on the call node when such
+//      args are added to a call, and a bit on each such arg, and then have this code loop
+//      over the call args when the special call bit is set, counting the args with the special
+//      arg bit. This seems pretty heavyweight, though. Instead, this logic needs to be kept
+//      in sync with fgMorphArgs.
+//
+int GenTreeCall::GetNonStandardAddedArgCount(Compiler* compiler) const
+{
+    if (IsUnmanaged() && !compiler->opts.ShouldUsePInvokeHelpers())
+    {
+        // R11 = PInvoke cookie param
+        return 1;
+    }
+    else if (gtCallType == CT_INDIRECT)
+    {
+        if (IsVirtualStub())
+        {
+            // R11 = Virtual stub param
+            return 1;
+        }
+        else if (gtCallCookie != nullptr)
+        {
+            // R10 = PInvoke target param
+            // R11 = PInvoke cookie param
+            return 2;
+        }
+    }
+    return 0;
+}
+
+#endif // !LEGACY_BACKEND
+
+//-------------------------------------------------------------------------
+// TreatAsHasRetBufArg:
+//
+// Arguments:
+//     compiler, the compiler instance so that we can call eeGetHelperNum
+//
+// Return Value:
+//     Returns true if we treat the call as if it has a retBuf argument
+//     This method may actually have a retBuf argument
+//     or it could be a JIT helper that we are still transforming during
+//     the importer phase.
+//
+// Notes:
+//     On ARM64 marking the method with the GTF_CALL_M_RETBUFFARG flag
+//     will make HasRetBufArg() return true, but will also force the
+//     use of register x8 to pass the RetBuf argument.
+//
+//     These two Jit Helpers that we handle here by returning true
+//     aren't actually defined to return a struct, so they don't expect
+//     their RetBuf to be passed in x8, instead they  expect it in x0.
+//
+bool GenTreeCall::TreatAsHasRetBufArg(Compiler* compiler) const
+{
+    if (HasRetBufArg())
+    {
+        return true;
+    }
+    else
+    {
+        // If we see a Jit helper call that returns a TYP_STRUCT we will
+        // transform it as if it has a Return Buffer Argument
+        //
+        if (IsHelperCall() && (gtReturnType == TYP_STRUCT))
+        {
+            // There are two possible helper calls that use this path:
+            //  CORINFO_HELP_GETFIELDSTRUCT and CORINFO_HELP_UNBOX_NULLABLE
+            //
+            CorInfoHelpFunc helpFunc = compiler->eeGetHelperNum(gtCallMethHnd);
+
+            if (helpFunc == CORINFO_HELP_GETFIELDSTRUCT)
+            {
+                return true;
+            }
+            else if (helpFunc == CORINFO_HELP_UNBOX_NULLABLE)
+            {
+                return true;
+            }
+            else
+            {
+                assert(!"Unexpected JIT helper in TreatAsHasRetBufArg");
+            }
+        }
+    }
+    return false;
+}
+
+//-------------------------------------------------------------------------
+// IsHelperCall: Determine if this GT_CALL node is a specific helper call.
+//
+// Arguments:
+//     compiler - the compiler instance so that we can call eeFindHelper
+//
+// Return Value:
+//     Returns true if this GT_CALL node is a call to the specified helper.
+//
+bool GenTreeCall::IsHelperCall(Compiler* compiler, unsigned helper) const
+{
+    return IsHelperCall(compiler->eeFindHelper(helper));
+}
+
+/*****************************************************************************
+ *
+ *  Returns non-zero if the two trees are identical.
+ */
+
+bool GenTree::Compare(GenTreePtr op1, GenTreePtr op2, bool swapOK)
+{
+    genTreeOps oper;
+    unsigned   kind;
+
+//  printf("tree1:\n"); gtDispTree(op1);
+//  printf("tree2:\n"); gtDispTree(op2);
+
+AGAIN:
+
+    if (op1 == nullptr)
+    {
+        return (op2 == nullptr);
+    }
+    if (op2 == nullptr)
+    {
+        return false;
+    }
+    if (op1 == op2)
+    {
+        return true;
+    }
+
+    assert(op1->gtOper != GT_STMT);
+    assert(op2->gtOper != GT_STMT);
+
+    oper = op1->OperGet();
+
+    /* The operators must be equal */
+
+    if (oper != op2->gtOper)
+    {
+        return false;
+    }
+
+    /* The types must be equal */
+
+    if (op1->gtType != op2->gtType)
+    {
+        return false;
+    }
+
+    /* Overflow must be equal */
+    if (op1->gtOverflowEx() != op2->gtOverflowEx())
+    {
+        return false;
+    }
+
+    /* Sensible flags must be equal */
+    if ((op1->gtFlags & (GTF_UNSIGNED)) != (op2->gtFlags & (GTF_UNSIGNED)))
+    {
+        return false;
+    }
+
+    /* Figure out what kind of nodes we're comparing */
+
+    kind = op1->OperKind();
+
+    /* Is this a constant node? */
+
+    if (kind & GTK_CONST)
+    {
+        switch (oper)
+        {
+            case GT_CNS_INT:
+                if (op1->gtIntCon.gtIconVal == op2->gtIntCon.gtIconVal)
+                {
+                    return true;
+                }
+                break;
+#if 0
+            // TODO-CQ: Enable this in the future
+        case GT_CNS_LNG:
+            if  (op1->gtLngCon.gtLconVal == op2->gtLngCon.gtLconVal)
+                return true;
+            break;
+
+        case GT_CNS_DBL:
+            if  (op1->gtDblCon.gtDconVal == op2->gtDblCon.gtDconVal)
+                return true;
+            break;
+#endif
+            default:
+                break;
+        }
+
+        return false;
+    }
+
+    /* Is this a leaf node? */
+
+    if (kind & GTK_LEAF)
+    {
+        switch (oper)
+        {
+            case GT_LCL_VAR:
+                if (op1->gtLclVarCommon.gtLclNum != op2->gtLclVarCommon.gtLclNum)
+                {
+                    break;
+                }
+
+                return true;
+
+            case GT_LCL_FLD:
+                if (op1->gtLclFld.gtLclNum != op2->gtLclFld.gtLclNum ||
+                    op1->gtLclFld.gtLclOffs != op2->gtLclFld.gtLclOffs)
+                {
+                    break;
+                }
+
+                return true;
+
+            case GT_CLS_VAR:
+                if (op1->gtClsVar.gtClsVarHnd != op2->gtClsVar.gtClsVarHnd)
+                {
+                    break;
+                }
+
+                return true;
+
+            case GT_LABEL:
+                return true;
+
+            case GT_ARGPLACE:
+                if ((op1->gtType == TYP_STRUCT) &&
+                    (op1->gtArgPlace.gtArgPlaceClsHnd != op2->gtArgPlace.gtArgPlaceClsHnd))
+                {
+                    break;
+                }
+                return true;
+
+            default:
+                break;
+        }
+
+        return false;
+    }
+
+    /* Is it a 'simple' unary/binary operator? */
+
+    if (kind & GTK_UNOP)
+    {
+        if (IsExOp(kind))
+        {
+            // ExOp operators extend unary operator with extra, non-GenTreePtr members.  In many cases,
+            // these should be included in the comparison.
+            switch (oper)
+            {
+                case GT_ARR_LENGTH:
+                    if (op1->gtArrLen.ArrLenOffset() != op2->gtArrLen.ArrLenOffset())
+                    {
+                        return false;
+                    }
+                    break;
+                case GT_CAST:
+                    if (op1->gtCast.gtCastType != op2->gtCast.gtCastType)
+                    {
+                        return false;
+                    }
+                    break;
+                case GT_OBJ:
+                    if (op1->AsObj()->gtClass != op2->AsObj()->gtClass)
+                    {
+                        return false;
+                    }
+                    break;
+
+                // For the ones below no extra argument matters for comparison.
+                case GT_BOX:
+                    break;
+
+                default:
+                    assert(!"unexpected unary ExOp operator");
+            }
+        }
+        return Compare(op1->gtOp.gtOp1, op2->gtOp.gtOp1);
+    }
+
+    if (kind & GTK_BINOP)
+    {
+        if (IsExOp(kind))
+        {
+            // ExOp operators extend unary operator with extra, non-GenTreePtr members.  In many cases,
+            // these should be included in the hash code.
+            switch (oper)
+            {
+                case GT_INTRINSIC:
+                    if (op1->gtIntrinsic.gtIntrinsicId != op2->gtIntrinsic.gtIntrinsicId)
+                    {
+                        return false;
+                    }
+                    break;
+                case GT_LEA:
+                    if (op1->gtAddrMode.gtScale != op2->gtAddrMode.gtScale)
+                    {
+                        return false;
+                    }
+                    if (op1->gtAddrMode.gtOffset != op2->gtAddrMode.gtOffset)
+                    {
+                        return false;
+                    }
+                    break;
+                case GT_INDEX:
+                    if (op1->gtIndex.gtIndElemSize != op2->gtIndex.gtIndElemSize)
+                    {
+                        return false;
+                    }
+                    break;
+
+                // For the ones below no extra argument matters for comparison.
+                case GT_QMARK:
+                    break;
+
+                default:
+                    assert(!"unexpected binary ExOp operator");
+            }
+        }
+
+        if (op1->gtOp.gtOp2)
+        {
+            if (!Compare(op1->gtOp.gtOp1, op2->gtOp.gtOp1, swapOK))
+            {
+                if (swapOK && OperIsCommutative(oper) &&
+                    ((op1->gtOp.gtOp1->gtFlags | op1->gtOp.gtOp2->gtFlags | op2->gtOp.gtOp1->gtFlags |
+                      op2->gtOp.gtOp2->gtFlags) &
+                     GTF_ALL_EFFECT) == 0)
+                {
+                    if (Compare(op1->gtOp.gtOp1, op2->gtOp.gtOp2, swapOK))
+                    {
+                        op1 = op1->gtOp.gtOp2;
+                        op2 = op2->gtOp.gtOp1;
+                        goto AGAIN;
+                    }
+                }
+
+                return false;
+            }
+
+            op1 = op1->gtOp.gtOp2;
+            op2 = op2->gtOp.gtOp2;
+
+            goto AGAIN;
+        }
+        else
+        {
+
+            op1 = op1->gtOp.gtOp1;
+            op2 = op2->gtOp.gtOp1;
+
+            if (!op1)
+            {
+                return (op2 == nullptr);
+            }
+            if (!op2)
+            {
+                return false;
+            }
+
+            goto AGAIN;
+        }
+    }
+
+    /* See what kind of a special operator we have here */
+
+    switch (oper)
+    {
+        case GT_FIELD:
+            if (op1->gtField.gtFldHnd != op2->gtField.gtFldHnd)
+            {
+                break;
+            }
+
+            op1 = op1->gtField.gtFldObj;
+            op2 = op2->gtField.gtFldObj;
+
+            if (op1 || op2)
+            {
+                if (op1 && op2)
+                {
+                    goto AGAIN;
+                }
+            }
+
+            return true;
+
+        case GT_CALL:
+
+            if (op1->gtCall.gtCallType != op2->gtCall.gtCallType)
+            {
+                return false;
+            }
+
+            if (op1->gtCall.gtCallType != CT_INDIRECT)
+            {
+                if (op1->gtCall.gtCallMethHnd != op2->gtCall.gtCallMethHnd)
+                {
+                    return false;
+                }
+
+#ifdef FEATURE_READYTORUN_COMPILER
+                if (op1->gtCall.gtEntryPoint.addr != op2->gtCall.gtEntryPoint.addr)
+                    return false;
+#endif
+            }
+            else
+            {
+                if (!Compare(op1->gtCall.gtCallAddr, op2->gtCall.gtCallAddr))
+                {
+                    return false;
+                }
+            }
+
+            if (Compare(op1->gtCall.gtCallLateArgs, op2->gtCall.gtCallLateArgs) &&
+                Compare(op1->gtCall.gtCallArgs, op2->gtCall.gtCallArgs) &&
+                Compare(op1->gtCall.gtControlExpr, op2->gtCall.gtControlExpr) &&
+                Compare(op1->gtCall.gtCallObjp, op2->gtCall.gtCallObjp))
+            {
+                return true;
+            }
+            break;
+
+        case GT_ARR_ELEM:
+
+            if (op1->gtArrElem.gtArrRank != op2->gtArrElem.gtArrRank)
+            {
+                return false;
+            }
+
+            // NOTE: gtArrElemSize may need to be handled
+
+            unsigned dim;
+            for (dim = 0; dim < op1->gtArrElem.gtArrRank; dim++)
+            {
+                if (!Compare(op1->gtArrElem.gtArrInds[dim], op2->gtArrElem.gtArrInds[dim]))
+                {
+                    return false;
+                }
+            }
+
+            op1 = op1->gtArrElem.gtArrObj;
+            op2 = op2->gtArrElem.gtArrObj;
+            goto AGAIN;
+
+        case GT_ARR_OFFSET:
+            if (op1->gtArrOffs.gtCurrDim != op2->gtArrOffs.gtCurrDim ||
+                op1->gtArrOffs.gtArrRank != op2->gtArrOffs.gtArrRank)
+            {
+                return false;
+            }
+            return (Compare(op1->gtArrOffs.gtOffset, op2->gtArrOffs.gtOffset) &&
+                    Compare(op1->gtArrOffs.gtIndex, op2->gtArrOffs.gtIndex) &&
+                    Compare(op1->gtArrOffs.gtArrObj, op2->gtArrOffs.gtArrObj));
+
+        case GT_CMPXCHG:
+            return Compare(op1->gtCmpXchg.gtOpLocation, op2->gtCmpXchg.gtOpLocation) &&
+                   Compare(op1->gtCmpXchg.gtOpValue, op2->gtCmpXchg.gtOpValue) &&
+                   Compare(op1->gtCmpXchg.gtOpComparand, op2->gtCmpXchg.gtOpComparand);
+
+        case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+        case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+            return Compare(op1->gtBoundsChk.gtArrLen, op2->gtBoundsChk.gtArrLen) &&
+                   Compare(op1->gtBoundsChk.gtIndex, op2->gtBoundsChk.gtIndex) &&
+                   (op1->gtBoundsChk.gtThrowKind == op2->gtBoundsChk.gtThrowKind);
+
+        case GT_STORE_DYN_BLK:
+        case GT_DYN_BLK:
+            return Compare(op1->gtDynBlk.Addr(), op2->gtDynBlk.Addr()) &&
+                   Compare(op1->gtDynBlk.Data(), op2->gtDynBlk.Data()) &&
+                   Compare(op1->gtDynBlk.gtDynamicSize, op2->gtDynBlk.gtDynamicSize);
+
+        default:
+            assert(!"unexpected operator");
+    }
+
+    return false;
+}
+
+/*****************************************************************************
+ *
+ *  Returns non-zero if the given tree contains a use of a local #lclNum.
+ */
+
+bool Compiler::gtHasRef(GenTreePtr tree, ssize_t lclNum, bool defOnly)
+{
+    genTreeOps oper;
+    unsigned   kind;
+
+AGAIN:
+
+    assert(tree);
+
+    oper = tree->OperGet();
+    kind = tree->OperKind();
+
+    assert(oper != GT_STMT);
+
+    /* Is this a constant node? */
+
+    if (kind & GTK_CONST)
+    {
+        return false;
+    }
+
+    /* Is this a leaf node? */
+
+    if (kind & GTK_LEAF)
+    {
+        if (oper == GT_LCL_VAR)
+        {
+            if (tree->gtLclVarCommon.gtLclNum == (unsigned)lclNum)
+            {
+                if (!defOnly)
+                {
+                    return true;
+                }
+            }
+        }
+        else if (oper == GT_RET_EXPR)
+        {
+            return gtHasRef(tree->gtRetExpr.gtInlineCandidate, lclNum, defOnly);
+        }
+
+        return false;
+    }
+
+    /* Is it a 'simple' unary/binary operator? */
+
+    if (kind & GTK_SMPOP)
+    {
+        if (tree->gtGetOp2())
+        {
+            if (gtHasRef(tree->gtOp.gtOp1, lclNum, defOnly))
+            {
+                return true;
+            }
+
+            tree = tree->gtOp.gtOp2;
+            goto AGAIN;
+        }
+        else
+        {
+            tree = tree->gtOp.gtOp1;
+
+            if (!tree)
+            {
+                return false;
+            }
+
+            if (kind & GTK_ASGOP)
+            {
+                // 'tree' is the gtOp1 of an assignment node. So we can handle
+                // the case where defOnly is either true or false.
+
+                if (tree->gtOper == GT_LCL_VAR && tree->gtLclVarCommon.gtLclNum == (unsigned)lclNum)
+                {
+                    return true;
+                }
+                else if (tree->gtOper == GT_FIELD && lclNum == (ssize_t)tree->gtField.gtFldHnd)
+                {
+                    return true;
+                }
+            }
+
+            goto AGAIN;
+        }
+    }
+
+    /* See what kind of a special operator we have here */
+
+    switch (oper)
+    {
+        case GT_FIELD:
+            if (lclNum == (ssize_t)tree->gtField.gtFldHnd)
+            {
+                if (!defOnly)
+                {
+                    return true;
+                }
+            }
+
+            tree = tree->gtField.gtFldObj;
+            if (tree)
+            {
+                goto AGAIN;
+            }
+            break;
+
+        case GT_CALL:
+
+            if (tree->gtCall.gtCallObjp)
+            {
+                if (gtHasRef(tree->gtCall.gtCallObjp, lclNum, defOnly))
+                {
+                    return true;
+                }
+            }
+
+            if (tree->gtCall.gtCallArgs)
+            {
+                if (gtHasRef(tree->gtCall.gtCallArgs, lclNum, defOnly))
+                {
+                    return true;
+                }
+            }
+
+            if (tree->gtCall.gtCallLateArgs)
+            {
+                if (gtHasRef(tree->gtCall.gtCallLateArgs, lclNum, defOnly))
+                {
+                    return true;
+                }
+            }
+
+            if (tree->gtCall.gtCallLateArgs)
+            {
+                if (gtHasRef(tree->gtCall.gtControlExpr, lclNum, defOnly))
+                {
+                    return true;
+                }
+            }
+
+            if (tree->gtCall.gtCallType == CT_INDIRECT)
+            {
+                // pinvoke-calli cookie is a constant, or constant indirection
+                assert(tree->gtCall.gtCallCookie == nullptr || tree->gtCall.gtCallCookie->gtOper == GT_CNS_INT ||
+                       tree->gtCall.gtCallCookie->gtOper == GT_IND);
+
+                tree = tree->gtCall.gtCallAddr;
+            }
+            else
+            {
+                tree = nullptr;
+            }
+
+            if (tree)
+            {
+                goto AGAIN;
+            }
+
+            break;
+
+        case GT_ARR_ELEM:
+            if (gtHasRef(tree->gtArrElem.gtArrObj, lclNum, defOnly))
+            {
+                return true;
+            }
+
+            unsigned dim;
+            for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+            {
+                if (gtHasRef(tree->gtArrElem.gtArrInds[dim], lclNum, defOnly))
+                {
+                    return true;
+                }
+            }
+
+            break;
+
+        case GT_ARR_OFFSET:
+            if (gtHasRef(tree->gtArrOffs.gtOffset, lclNum, defOnly) ||
+                gtHasRef(tree->gtArrOffs.gtIndex, lclNum, defOnly) ||
+                gtHasRef(tree->gtArrOffs.gtArrObj, lclNum, defOnly))
+            {
+                return true;
+            }
+            break;
+
+        case GT_CMPXCHG:
+            if (gtHasRef(tree->gtCmpXchg.gtOpLocation, lclNum, defOnly))
+            {
+                return true;
+            }
+            if (gtHasRef(tree->gtCmpXchg.gtOpValue, lclNum, defOnly))
+            {
+                return true;
+            }
+            if (gtHasRef(tree->gtCmpXchg.gtOpComparand, lclNum, defOnly))
+            {
+                return true;
+            }
+            break;
+
+        case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+        case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+            if (gtHasRef(tree->gtBoundsChk.gtArrLen, lclNum, defOnly))
+            {
+                return true;
+            }
+            if (gtHasRef(tree->gtBoundsChk.gtIndex, lclNum, defOnly))
+            {
+                return true;
+            }
+            break;
+
+        case GT_STORE_DYN_BLK:
+            if (gtHasRef(tree->gtDynBlk.Data(), lclNum, defOnly))
+            {
+                return true;
+            }
+            __fallthrough;
+        case GT_DYN_BLK:
+            if (gtHasRef(tree->gtDynBlk.Addr(), lclNum, defOnly))
+            {
+                return true;
+            }
+            if (gtHasRef(tree->gtDynBlk.gtDynamicSize, lclNum, defOnly))
+            {
+                return true;
+            }
+            break;
+
+        default:
+#ifdef DEBUG
+            gtDispTree(tree);
+#endif
+            assert(!"unexpected operator");
+    }
+
+    return false;
+}
+
+struct AddrTakenDsc
+{
+    Compiler* comp;
+    bool      hasAddrTakenLcl;
+};
+
+/* static */
+Compiler::fgWalkResult Compiler::gtHasLocalsWithAddrOpCB(GenTreePtr* pTree, fgWalkData* data)
+{
+    GenTreePtr tree = *pTree;
+    Compiler*  comp = data->compiler;
+
+    if (tree->gtOper == GT_LCL_VAR)
+    {
+        unsigned   lclNum = tree->gtLclVarCommon.gtLclNum;
+        LclVarDsc* varDsc = &comp->lvaTable[lclNum];
+
+        if (varDsc->lvHasLdAddrOp || varDsc->lvAddrExposed)
+        {
+            ((AddrTakenDsc*)data->pCallbackData)->hasAddrTakenLcl = true;
+            return WALK_ABORT;
+        }
+    }
+
+    return WALK_CONTINUE;
+}
+
+/*****************************************************************************
+ *
+ *  Return true if this tree contains locals with lvHasLdAddrOp or lvAddrExposed
+ *  flag(s) set.
+ */
+
+bool Compiler::gtHasLocalsWithAddrOp(GenTreePtr tree)
+{
+    AddrTakenDsc desc;
+
+    desc.comp            = this;
+    desc.hasAddrTakenLcl = false;
+
+    fgWalkTreePre(&tree, gtHasLocalsWithAddrOpCB, &desc);
+
+    return desc.hasAddrTakenLcl;
+}
+
+/*****************************************************************************
+ *
+ *  Helper used to compute hash values for trees.
+ */
+
+inline unsigned genTreeHashAdd(unsigned old, unsigned add)
+{
+    return (old + old / 2) ^ add;
+}
+
+inline unsigned genTreeHashAdd(unsigned old, void* add)
+{
+    return genTreeHashAdd(old, (unsigned)(size_t)add);
+}
+
+inline unsigned genTreeHashAdd(unsigned old, unsigned add1, unsigned add2)
+{
+    return (old + old / 2) ^ add1 ^ add2;
+}
+
+/*****************************************************************************
+ *
+ *  Given an arbitrary expression tree, compute a hash value for it.
+ */
+
+unsigned Compiler::gtHashValue(GenTree* tree)
+{
+    genTreeOps oper;
+    unsigned   kind;
+
+    unsigned hash = 0;
+
+    GenTreePtr temp;
+
+AGAIN:
+    assert(tree);
+    assert(tree->gtOper != GT_STMT);
+
+    /* Figure out what kind of a node we have */
+
+    oper = tree->OperGet();
+    kind = tree->OperKind();
+
+    /* Include the operator value in the hash */
+
+    hash = genTreeHashAdd(hash, oper);
+
+    /* Is this a constant or leaf node? */
+
+    if (kind & (GTK_CONST | GTK_LEAF))
+    {
+        size_t add;
+
+        switch (oper)
+        {
+            case GT_LCL_VAR:
+                add = tree->gtLclVar.gtLclNum;
+                break;
+            case GT_LCL_FLD:
+                hash = genTreeHashAdd(hash, tree->gtLclFld.gtLclNum);
+                add  = tree->gtLclFld.gtLclOffs;
+                break;
+
+            case GT_CNS_INT:
+                add = (int)tree->gtIntCon.gtIconVal;
+                break;
+            case GT_CNS_LNG:
+                add = (int)tree->gtLngCon.gtLconVal;
+                break;
+            case GT_CNS_DBL:
+                add = (int)tree->gtDblCon.gtDconVal;
+                break;
+            case GT_CNS_STR:
+                add = (int)tree->gtStrCon.gtSconCPX;
+                break;
+
+            case GT_JMP:
+                add = tree->gtVal.gtVal1;
+                break;
+
+            default:
+                add = 0;
+                break;
+        }
+
+        // narrowing cast, but for hashing.
+        hash = genTreeHashAdd(hash, (unsigned)add);
+        goto DONE;
+    }
+
+    /* Is it a 'simple' unary/binary operator? */
+
+    GenTreePtr op1;
+
+    if (kind & GTK_UNOP)
+    {
+        op1 = tree->gtOp.gtOp1;
+        /* Special case: no sub-operand at all */
+
+        if (GenTree::IsExOp(kind))
+        {
+            // ExOp operators extend operators with extra, non-GenTreePtr members.  In many cases,
+            // these should be included in the hash code.
+            switch (oper)
+            {
+                case GT_ARR_LENGTH:
+                    hash += tree->gtArrLen.ArrLenOffset();
+                    break;
+                case GT_CAST:
+                    hash ^= tree->gtCast.gtCastType;
+                    break;
+                case GT_INDEX:
+                    hash += tree->gtIndex.gtIndElemSize;
+                    break;
+                case GT_ALLOCOBJ:
+                    hash = genTreeHashAdd(hash, static_cast<unsigned>(
+                                                    reinterpret_cast<uintptr_t>(tree->gtAllocObj.gtAllocObjClsHnd)));
+                    hash = genTreeHashAdd(hash, tree->gtAllocObj.gtNewHelper);
+                    break;
+                case GT_OBJ:
+                    hash = genTreeHashAdd(hash, static_cast<unsigned>(
+                                                    reinterpret_cast<uintptr_t>(tree->gtObj.gtClass)));
+                    break;
+
+                // For the ones below no extra argument matters for comparison.
+                case GT_BOX:
+                    break;
+
+                default:
+                    assert(!"unexpected unary ExOp operator");
+            }
+        }
+
+        if (!op1)
+        {
+            goto DONE;
+        }
+
+        tree = op1;
+        goto AGAIN;
+    }
+
+    if (kind & GTK_BINOP)
+    {
+        if (GenTree::IsExOp(kind))
+        {
+            // ExOp operators extend operators with extra, non-GenTreePtr members.  In many cases,
+            // these should be included in the hash code.
+            switch (oper)
+            {
+                case GT_INTRINSIC:
+                    hash += tree->gtIntrinsic.gtIntrinsicId;
+                    break;
+                case GT_LEA:
+                    hash += (tree->gtAddrMode.gtOffset << 3) + tree->gtAddrMode.gtScale;
+                    break;
+
+                case GT_BLK:
+                case GT_STORE_BLK:
+                    hash += tree->gtBlk.gtBlkSize;
+                    break;
+
+                case GT_OBJ:
+                case GT_STORE_OBJ:
+                    hash ^= reinterpret_cast<unsigned>(tree->AsObj()->gtClass);
+                    break;
+
+                case GT_DYN_BLK:
+                case GT_STORE_DYN_BLK:
+                    hash += gtHashValue(tree->AsDynBlk()->gtDynamicSize);
+                    break;
+
+                // For the ones below no extra argument matters for comparison.
+                case GT_ARR_INDEX:
+                case GT_QMARK:
+                case GT_INDEX:
+                    break;
+
+#ifdef FEATURE_SIMD
+                case GT_SIMD:
+                    hash += tree->gtSIMD.gtSIMDIntrinsicID;
+                    hash += tree->gtSIMD.gtSIMDBaseType;
+                    break;
+#endif // FEATURE_SIMD
+
+                default:
+                    assert(!"unexpected binary ExOp operator");
+            }
+        }
+
+        op1            = tree->gtOp.gtOp1;
+        GenTreePtr op2 = tree->gtOp.gtOp2;
+
+        /* Is there a second sub-operand? */
+
+        if (!op2)
+        {
+            /* Special case: no sub-operands at all */
+
+            if (!op1)
+            {
+                goto DONE;
+            }
+
+            /* This is a unary operator */
+
+            tree = op1;
+            goto AGAIN;
+        }
+
+        /* This is a binary operator */
+
+        unsigned hsh1 = gtHashValue(op1);
+
+        /* Special case: addition of two values */
+
+        if (GenTree::OperIsCommutative(oper))
+        {
+            unsigned hsh2 = gtHashValue(op2);
+
+            /* Produce a hash that allows swapping the operands */
+
+            hash = genTreeHashAdd(hash, hsh1, hsh2);
+            goto DONE;
+        }
+
+        /* Add op1's hash to the running value and continue with op2 */
+
+        hash = genTreeHashAdd(hash, hsh1);
+
+        tree = op2;
+        goto AGAIN;
+    }
+
+    /* See what kind of a special operator we have here */
+    switch (tree->gtOper)
+    {
+        case GT_FIELD:
+            if (tree->gtField.gtFldObj)
+            {
+                temp = tree->gtField.gtFldObj;
+                assert(temp);
+                hash = genTreeHashAdd(hash, gtHashValue(temp));
+            }
+            break;
+
+        case GT_STMT:
+            temp = tree->gtStmt.gtStmtExpr;
+            assert(temp);
+            hash = genTreeHashAdd(hash, gtHashValue(temp));
+            break;
+
+        case GT_ARR_ELEM:
+
+            hash = genTreeHashAdd(hash, gtHashValue(tree->gtArrElem.gtArrObj));
+
+            unsigned dim;
+            for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+            {
+                hash = genTreeHashAdd(hash, gtHashValue(tree->gtArrElem.gtArrInds[dim]));
+            }
+
+            break;
+
+        case GT_ARR_OFFSET:
+            hash = genTreeHashAdd(hash, gtHashValue(tree->gtArrOffs.gtOffset));
+            hash = genTreeHashAdd(hash, gtHashValue(tree->gtArrOffs.gtIndex));
+            hash = genTreeHashAdd(hash, gtHashValue(tree->gtArrOffs.gtArrObj));
+            break;
+
+        case GT_CALL:
+
+            if (tree->gtCall.gtCallObjp && tree->gtCall.gtCallObjp->gtOper != GT_NOP)
+            {
+                temp = tree->gtCall.gtCallObjp;
+                assert(temp);
+                hash = genTreeHashAdd(hash, gtHashValue(temp));
+            }
+
+            if (tree->gtCall.gtCallArgs)
+            {
+                temp = tree->gtCall.gtCallArgs;
+                assert(temp);
+                hash = genTreeHashAdd(hash, gtHashValue(temp));
+            }
+
+            if (tree->gtCall.gtCallType == CT_INDIRECT)
+            {
+                temp = tree->gtCall.gtCallAddr;
+                assert(temp);
+                hash = genTreeHashAdd(hash, gtHashValue(temp));
+            }
+            else
+            {
+                hash = genTreeHashAdd(hash, tree->gtCall.gtCallMethHnd);
+            }
+
+            if (tree->gtCall.gtCallLateArgs)
+            {
+                temp = tree->gtCall.gtCallLateArgs;
+                assert(temp);
+                hash = genTreeHashAdd(hash, gtHashValue(temp));
+            }
+            break;
+
+        case GT_CMPXCHG:
+            hash = genTreeHashAdd(hash, gtHashValue(tree->gtCmpXchg.gtOpLocation));
+            hash = genTreeHashAdd(hash, gtHashValue(tree->gtCmpXchg.gtOpValue));
+            hash = genTreeHashAdd(hash, gtHashValue(tree->gtCmpXchg.gtOpComparand));
+            break;
+
+        case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+        case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+            hash = genTreeHashAdd(hash, gtHashValue(tree->gtBoundsChk.gtArrLen));
+            hash = genTreeHashAdd(hash, gtHashValue(tree->gtBoundsChk.gtIndex));
+            hash = genTreeHashAdd(hash, tree->gtBoundsChk.gtThrowKind);
+            break;
+
+        case GT_STORE_DYN_BLK:
+            hash = genTreeHashAdd(hash, gtHashValue(tree->gtDynBlk.Data()));
+            __fallthrough;
+        case GT_DYN_BLK:
+            hash = genTreeHashAdd(hash, gtHashValue(tree->gtDynBlk.Addr()));
+            hash = genTreeHashAdd(hash, gtHashValue(tree->gtDynBlk.gtDynamicSize));
+            break;
+
+        default:
+#ifdef DEBUG
+            gtDispTree(tree);
+#endif
+            assert(!"unexpected operator");
+            break;
+    }
+
+DONE:
+
+    return hash;
+}
+
+/*****************************************************************************
+ *
+ *  Given an arbitrary expression tree, attempts to find the set of all local variables
+ *  referenced by the tree, and return them as "*result".
+ *  If "findPtr" is null, this is a tracked variable set;
+ *  if it is non-null, this is an "all var set."
+ *  The "*result" value is valid only if the call returns "true."  It may return "false"
+ *  for several reasons:
+ *     If "findPtr" is NULL, and the expression contains an untracked variable.
+ *     If "findPtr" is non-NULL, and the expression contains a variable that can't be represented
+ *        in an "all var set."
+ *     If the expression accesses address-exposed variables.
+ *
+ *  If there
+ *  are any indirections or global refs in the expression, the "*refsPtr" argument
+ *  will be assigned the appropriate bit set based on the 'varRefKinds' type.
+ *  It won't be assigned anything when there are no indirections or global
+ *  references, though, so this value should be initialized before the call.
+ *  If we encounter an expression that is equal to *findPtr we set *findPtr
+ *  to NULL.
+ */
+bool Compiler::lvaLclVarRefs(GenTreePtr tree, GenTreePtr* findPtr, varRefKinds* refsPtr, void* result)
+{
+    genTreeOps   oper;
+    unsigned     kind;
+    varRefKinds  refs = VR_NONE;
+    ALLVARSET_TP ALLVARSET_INIT_NOCOPY(allVars, AllVarSetOps::UninitVal());
+    VARSET_TP    VARSET_INIT_NOCOPY(trkdVars, VarSetOps::UninitVal());
+    if (findPtr)
+    {
+        AllVarSetOps::AssignNoCopy(this, allVars, AllVarSetOps::MakeEmpty(this));
+    }
+    else
+    {
+        VarSetOps::AssignNoCopy(this, trkdVars, VarSetOps::MakeEmpty(this));
+    }
+
+AGAIN:
+
+    assert(tree);
+    assert(tree->gtOper != GT_STMT);
+
+    /* Remember whether we've come across the expression we're looking for */
+
+    if (findPtr && *findPtr == tree)
+    {
+        *findPtr = nullptr;
+    }
+
+    /* Figure out what kind of a node we have */
+
+    oper = tree->OperGet();
+    kind = tree->OperKind();
+
+    /* Is this a constant or leaf node? */
+
+    if (kind & (GTK_CONST | GTK_LEAF))
+    {
+        if (oper == GT_LCL_VAR)
+        {
+            unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
+
+            /* Should we use the variable table? */
+
+            if (findPtr)
+            {
+                if (lclNum >= lclMAX_ALLSET_TRACKED)
+                {
+                    return false;
+                }
+
+                AllVarSetOps::AddElemD(this, allVars, lclNum);
+            }
+            else
+            {
+                assert(lclNum < lvaCount);
+                LclVarDsc* varDsc = lvaTable + lclNum;
+
+                if (varDsc->lvTracked == false)
+                {
+                    return false;
+                }
+
+                // Don't deal with expressions with address-exposed variables.
+                if (varDsc->lvAddrExposed)
+                {
+                    return false;
+                }
+
+                VarSetOps::AddElemD(this, trkdVars, varDsc->lvVarIndex);
+            }
+        }
+        else if (oper == GT_LCL_FLD)
+        {
+            /* We can't track every field of every var. Moreover, indirections
+               may access different parts of the var as different (but
+               overlapping) fields. So just treat them as indirect accesses */
+
+            if (varTypeIsGC(tree->TypeGet()))
+            {
+                refs = VR_IND_REF;
+            }
+            else
+            {
+                refs = VR_IND_SCL;
+            }
+        }
+        else if (oper == GT_CLS_VAR)
+        {
+            refs = VR_GLB_VAR;
+        }
+
+        if (refs != VR_NONE)
+        {
+            /* Write it back to callers parameter using an 'or' */
+            *refsPtr = varRefKinds((*refsPtr) | refs);
+        }
+        lvaLclVarRefsAccumIntoRes(findPtr, result, allVars, trkdVars);
+        return true;
+    }
+
+    /* Is it a 'simple' unary/binary operator? */
+
+    if (kind & GTK_SMPOP)
+    {
+        if (oper == GT_IND)
+        {
+            assert(tree->gtOp.gtOp2 == nullptr);
+
+            /* Set the proper indirection bit */
+
+            if ((tree->gtFlags & GTF_IND_INVARIANT) == 0)
+            {
+                if (varTypeIsGC(tree->TypeGet()))
+                {
+                    refs = VR_IND_REF;
+                }
+                else
+                {
+                    refs = VR_IND_SCL;
+                }
+
+                // If the flag GTF_IND_TGTANYWHERE is set this indirection
+                // could also point at a global variable
+
+                if (tree->gtFlags & GTF_IND_TGTANYWHERE)
+                {
+                    refs = varRefKinds(((int)refs) | ((int)VR_GLB_VAR));
+                }
+            }
+
+            /* Write it back to callers parameter using an 'or' */
+            *refsPtr = varRefKinds((*refsPtr) | refs);
+
+            // For IL volatile memory accesses we mark the GT_IND node
+            // with a GTF_DONT_CSE flag.
+            //
+            // This flag is also set for the left hand side of an assignment.
+            //
+            // If this flag is set then we return false
+            //
+            if (tree->gtFlags & GTF_DONT_CSE)
+            {
+                return false;
+            }
+        }
+
+        if (tree->gtGetOp2())
+        {
+            /* It's a binary operator */
+            if (!lvaLclVarRefsAccum(tree->gtOp.gtOp1, findPtr, refsPtr, &allVars, &trkdVars))
+            {
+                return false;
+            }
+            // Otherwise...
+            tree = tree->gtOp.gtOp2;
+            assert(tree);
+            goto AGAIN;
+        }
+        else
+        {
+            /* It's a unary (or nilary) operator */
+
+            tree = tree->gtOp.gtOp1;
+            if (tree)
+            {
+                goto AGAIN;
+            }
+
+            lvaLclVarRefsAccumIntoRes(findPtr, result, allVars, trkdVars);
+            return true;
+        }
+    }
+
+    switch (oper)
+    {
+        case GT_ARR_ELEM:
+            if (!lvaLclVarRefsAccum(tree->gtArrElem.gtArrObj, findPtr, refsPtr, &allVars, &trkdVars))
+            {
+                return false;
+            }
+
+            unsigned dim;
+            for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+            {
+                VARSET_TP VARSET_INIT_NOCOPY(tmpVs, VarSetOps::UninitVal());
+                if (!lvaLclVarRefsAccum(tree->gtArrElem.gtArrInds[dim], findPtr, refsPtr, &allVars, &trkdVars))
+                {
+                    return false;
+                }
+            }
+            lvaLclVarRefsAccumIntoRes(findPtr, result, allVars, trkdVars);
+            return true;
+
+        case GT_ARR_OFFSET:
+            if (!lvaLclVarRefsAccum(tree->gtArrOffs.gtOffset, findPtr, refsPtr, &allVars, &trkdVars))
+            {
+                return false;
+            }
+            // Otherwise...
+            if (!lvaLclVarRefsAccum(tree->gtArrOffs.gtIndex, findPtr, refsPtr, &allVars, &trkdVars))
+            {
+                return false;
+            }
+            // Otherwise...
+            if (!lvaLclVarRefsAccum(tree->gtArrOffs.gtArrObj, findPtr, refsPtr, &allVars, &trkdVars))
+            {
+                return false;
+            }
+            // Otherwise...
+            lvaLclVarRefsAccumIntoRes(findPtr, result, allVars, trkdVars);
+            return true;
+
+        case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+        case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+        {
+            if (!lvaLclVarRefsAccum(tree->gtBoundsChk.gtArrLen, findPtr, refsPtr, &allVars, &trkdVars))
+            {
+                return false;
+            }
+            // Otherwise...
+            if (!lvaLclVarRefsAccum(tree->gtBoundsChk.gtIndex, findPtr, refsPtr, &allVars, &trkdVars))
+            {
+                return false;
+            }
+            // Otherwise...
+            lvaLclVarRefsAccumIntoRes(findPtr, result, allVars, trkdVars);
+            return true;
+        }
+
+        case GT_STORE_DYN_BLK:
+            if (!lvaLclVarRefsAccum(tree->gtDynBlk.Data(), findPtr, refsPtr, &allVars, &trkdVars))
+            {
+                return false;
+            }
+            // Otherwise...
+            __fallthrough;
+        case GT_DYN_BLK:
+            if (!lvaLclVarRefsAccum(tree->gtDynBlk.Addr(), findPtr, refsPtr, &allVars, &trkdVars))
+            {
+                return false;
+            }
+            // Otherwise...
+            if (!lvaLclVarRefsAccum(tree->gtDynBlk.gtDynamicSize, findPtr, refsPtr, &allVars, &trkdVars))
+            {
+                return false;
+            }
+            // Otherwise...
+            lvaLclVarRefsAccumIntoRes(findPtr, result, allVars, trkdVars);
+            break;
+
+        case GT_CALL:
+            /* Allow calls to the Shared Static helper */
+            if (IsSharedStaticHelper(tree))
+            {
+                *refsPtr = varRefKinds((*refsPtr) | VR_INVARIANT);
+                lvaLclVarRefsAccumIntoRes(findPtr, result, allVars, trkdVars);
+                return true;
+            }
+            break;
+        default:
+            break;
+
+    } // end switch (oper)
+
+    return false;
+}
+
+bool Compiler::lvaLclVarRefsAccum(
+    GenTreePtr tree, GenTreePtr* findPtr, varRefKinds* refsPtr, ALLVARSET_TP* allVars, VARSET_TP* trkdVars)
+{
+    if (findPtr)
+    {
+        ALLVARSET_TP ALLVARSET_INIT_NOCOPY(tmpVs, AllVarSetOps::UninitVal());
+        if (!lvaLclVarRefs(tree, findPtr, refsPtr, &tmpVs))
+        {
+            return false;
+        }
+        // Otherwise...
+        AllVarSetOps::UnionD(this, *allVars, tmpVs);
+    }
+    else
+    {
+        VARSET_TP VARSET_INIT_NOCOPY(tmpVs, VarSetOps::UninitVal());
+        if (!lvaLclVarRefs(tree, findPtr, refsPtr, &tmpVs))
+        {
+            return false;
+        }
+        // Otherwise...
+        VarSetOps::UnionD(this, *trkdVars, tmpVs);
+    }
+    return true;
+}
+
+void Compiler::lvaLclVarRefsAccumIntoRes(GenTreePtr*         findPtr,
+                                         void*               result,
+                                         ALLVARSET_VALARG_TP allVars,
+                                         VARSET_VALARG_TP    trkdVars)
+{
+    if (findPtr)
+    {
+        ALLVARSET_TP* avsPtr = (ALLVARSET_TP*)result;
+        AllVarSetOps::AssignNoCopy(this, (*avsPtr), allVars);
+    }
+    else
+    {
+        VARSET_TP* vsPtr = (VARSET_TP*)result;
+        VarSetOps::AssignNoCopy(this, (*vsPtr), trkdVars);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Return a relational operator that is the reverse of the given one.
+ */
+
+/* static */
+genTreeOps GenTree::ReverseRelop(genTreeOps relop)
+{
+    static const genTreeOps reverseOps[] = {
+        GT_NE, // GT_EQ
+        GT_EQ, // GT_NE
+        GT_GE, // GT_LT
+        GT_GT, // GT_LE
+        GT_LT, // GT_GE
+        GT_LE, // GT_GT
+    };
+
+    assert(reverseOps[GT_EQ - GT_EQ] == GT_NE);
+    assert(reverseOps[GT_NE - GT_EQ] == GT_EQ);
+
+    assert(reverseOps[GT_LT - GT_EQ] == GT_GE);
+    assert(reverseOps[GT_LE - GT_EQ] == GT_GT);
+    assert(reverseOps[GT_GE - GT_EQ] == GT_LT);
+    assert(reverseOps[GT_GT - GT_EQ] == GT_LE);
+
+    assert(OperIsCompare(relop));
+    assert(relop >= GT_EQ && (unsigned)(relop - GT_EQ) < sizeof(reverseOps));
+
+    return reverseOps[relop - GT_EQ];
+}
+
+/*****************************************************************************
+ *
+ *  Return a relational operator that will work for swapped operands.
+ */
+
+/* static */
+genTreeOps GenTree::SwapRelop(genTreeOps relop)
+{
+    static const genTreeOps swapOps[] = {
+        GT_EQ, // GT_EQ
+        GT_NE, // GT_NE
+        GT_GT, // GT_LT
+        GT_GE, // GT_LE
+        GT_LE, // GT_GE
+        GT_LT, // GT_GT
+    };
+
+    assert(swapOps[GT_EQ - GT_EQ] == GT_EQ);
+    assert(swapOps[GT_NE - GT_EQ] == GT_NE);
+
+    assert(swapOps[GT_LT - GT_EQ] == GT_GT);
+    assert(swapOps[GT_LE - GT_EQ] == GT_GE);
+    assert(swapOps[GT_GE - GT_EQ] == GT_LE);
+    assert(swapOps[GT_GT - GT_EQ] == GT_LT);
+
+    assert(OperIsCompare(relop));
+    assert(relop >= GT_EQ && (unsigned)(relop - GT_EQ) < sizeof(swapOps));
+
+    return swapOps[relop - GT_EQ];
+}
+
+/*****************************************************************************
+ *
+ *  Reverse the meaning of the given test condition.
+ */
+
+GenTreePtr Compiler::gtReverseCond(GenTree* tree)
+{
+    if (tree->OperIsCompare())
+    {
+        tree->SetOper(GenTree::ReverseRelop(tree->OperGet()));
+
+        // Flip the GTF_RELOP_NAN_UN bit
+        //     a ord b   === (a != NaN && b != NaN)
+        //     a unord b === (a == NaN || b == NaN)
+        // => !(a ord b) === (a unord b)
+        if (varTypeIsFloating(tree->gtOp.gtOp1->TypeGet()))
+        {
+            tree->gtFlags ^= GTF_RELOP_NAN_UN;
+        }
+    }
+    else
+    {
+        tree = gtNewOperNode(GT_NOT, TYP_INT, tree);
+    }
+
+    return tree;
+}
+
+/*****************************************************************************/
+
+#ifdef DEBUG
+
+bool GenTree::gtIsValid64RsltMul()
+{
+    if ((gtOper != GT_MUL) || !(gtFlags & GTF_MUL_64RSLT))
+    {
+        return false;
+    }
+
+    GenTreePtr op1 = gtOp.gtOp1;
+    GenTreePtr op2 = gtOp.gtOp2;
+
+    if (TypeGet() != TYP_LONG || op1->TypeGet() != TYP_LONG || op2->TypeGet() != TYP_LONG)
+    {
+        return false;
+    }
+
+    if (gtOverflow())
+    {
+        return false;
+    }
+
+    // op1 has to be conv.i8(i4Expr)
+    if ((op1->gtOper != GT_CAST) || (genActualType(op1->CastFromType()) != TYP_INT))
+    {
+        return false;
+    }
+
+    // op2 has to be conv.i8(i4Expr)
+    if ((op2->gtOper != GT_CAST) || (genActualType(op2->CastFromType()) != TYP_INT))
+    {
+        return false;
+    }
+
+    // The signedness of both casts must be the same
+    if (((op1->gtFlags & GTF_UNSIGNED) != 0) != ((op2->gtFlags & GTF_UNSIGNED) != 0))
+    {
+        return false;
+    }
+
+    // Do unsigned mul iff both the casts are unsigned
+    if (((op1->gtFlags & GTF_UNSIGNED) != 0) != ((gtFlags & GTF_UNSIGNED) != 0))
+    {
+        return false;
+    }
+
+    return true;
+}
+
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ *  Figure out the evaluation order for a list of values.
+ */
+
+unsigned Compiler::gtSetListOrder(GenTree* list, bool regs)
+{
+    assert(list && list->IsList());
+
+    unsigned level  = 0;
+    unsigned ftreg  = 0;
+    unsigned costSz = 0;
+    unsigned costEx = 0;
+
+#if FEATURE_STACK_FP_X87
+    /* Save the current FP stack level since an argument list
+     * will implicitly pop the FP stack when pushing the argument */
+    unsigned FPlvlSave = codeGen->genGetFPstkLevel();
+#endif // FEATURE_STACK_FP_X87
+
+    GenTreePtr next = list->gtOp.gtOp2;
+
+    if (next)
+    {
+        unsigned nxtlvl = gtSetListOrder(next, regs);
+
+        ftreg |= next->gtRsvdRegs;
+
+        if (level < nxtlvl)
+        {
+            level = nxtlvl;
+        }
+        costEx += next->gtCostEx;
+        costSz += next->gtCostSz;
+    }
+
+    GenTreePtr op1 = list->gtOp.gtOp1;
+    unsigned   lvl = gtSetEvalOrder(op1);
+
+#if FEATURE_STACK_FP_X87
+    /* restore the FP level */
+    codeGen->genResetFPstkLevel(FPlvlSave);
+#endif // FEATURE_STACK_FP_X87
+
+    list->gtRsvdRegs = (regMaskSmall)(ftreg | op1->gtRsvdRegs);
+
+    if (level < lvl)
+    {
+        level = lvl;
+    }
+
+    if (op1->gtCostEx != 0)
+    {
+        costEx += op1->gtCostEx;
+        costEx += regs ? 0 : IND_COST_EX;
+    }
+
+    if (op1->gtCostSz != 0)
+    {
+        costSz += op1->gtCostSz;
+#ifdef _TARGET_XARCH_
+        if (regs) // push is smaller than mov to reg
+#endif
+        {
+            costSz += 1;
+        }
+    }
+
+    list->SetCosts(costEx, costSz);
+
+    return level;
+}
+
+/*****************************************************************************
+ *
+ *  This routine is a helper routine for gtSetEvalOrder() and is used to
+ *  mark the interior address computation nodes with the GTF_ADDRMODE_NO_CSE flag
+ *  which prevents them from being considered for CSE's.
+ *
+ *  Furthermore this routine is a factoring of the logic used to walk down
+ *  the child nodes of a GT_IND tree, similar to optParseArrayRef().
+ *
+ *  Previously we had this logic repeated three times inside of gtSetEvalOrder().
+ *  Here we combine those three repeats into this routine and use the
+ *  bool constOnly to modify the behavior of this routine for the first call.
+ *
+ *  The object here is to mark all of the interior GT_ADD's and GT_NOP's
+ *  with the GTF_ADDRMODE_NO_CSE flag and to set op1 and op2 to the terminal nodes
+ *  which are later matched against 'adr' and 'idx'.
+ *
+ *  *pbHasRangeCheckBelow is set to false if we traverse a range check GT_NOP
+ *  node in our walk. It remains unchanged otherwise.
+ *
+ *  TODO-Cleanup: It is essentially impossible to determine
+ *  what it is supposed to do, or to write a reasonable specification comment
+ *  for it that describes what it is supposed to do. There are obviously some
+ *  very specific tree patterns that it expects to see, but those are not documented.
+ *  The fact that it writes back to its op1WB and op2WB arguments, and traverses
+ *  down both op1 and op2 trees, but op2 is only related to op1 in the (!constOnly)
+ *  case (which really seems like a bug) is very confusing.
+ */
+
+void Compiler::gtWalkOp(GenTree** op1WB, GenTree** op2WB, GenTree* adr, bool constOnly)
+{
+    GenTreePtr op1 = *op1WB;
+    GenTreePtr op2 = *op2WB;
+    GenTreePtr op1EffectiveVal;
+
+    if (op1->gtOper == GT_COMMA)
+    {
+        op1EffectiveVal = op1->gtEffectiveVal();
+        if ((op1EffectiveVal->gtOper == GT_ADD) && (!op1EffectiveVal->gtOverflow()) &&
+            (!constOnly || (op1EffectiveVal->gtOp.gtOp2->IsCnsIntOrI())))
+        {
+            op1 = op1EffectiveVal;
+        }
+    }
+
+    // Now we look for op1's with non-overflow GT_ADDs [of constants]
+    while ((op1->gtOper == GT_ADD) && (!op1->gtOverflow()) && (!constOnly || (op1->gtOp.gtOp2->IsCnsIntOrI())))
+    {
+        // mark it with GTF_ADDRMODE_NO_CSE
+        op1->gtFlags |= GTF_ADDRMODE_NO_CSE;
+
+        if (!constOnly)
+        { // TODO-Cleanup: It seems bizarre that this is !constOnly
+            op2 = op1->gtOp.gtOp2;
+        }
+        op1 = op1->gtOp.gtOp1;
+
+        // If op1 is a GT_NOP then swap op1 and op2.
+        // (Why? Also, presumably op2 is not a GT_NOP in this case?)
+        if (op1->gtOper == GT_NOP)
+        {
+            GenTreePtr tmp;
+
+            tmp = op1;
+            op1 = op2;
+            op2 = tmp;
+        }
+
+        if (op1->gtOper == GT_COMMA)
+        {
+            op1EffectiveVal = op1->gtEffectiveVal();
+            if ((op1EffectiveVal->gtOper == GT_ADD) && (!op1EffectiveVal->gtOverflow()) &&
+                (!constOnly || (op1EffectiveVal->gtOp.gtOp2->IsCnsIntOrI())))
+            {
+                op1 = op1EffectiveVal;
+            }
+        }
+
+        if (!constOnly && ((op2 == adr) || (!op2->IsCnsIntOrI())))
+        {
+            break;
+        }
+    }
+
+    *op1WB = op1;
+    *op2WB = op2;
+}
+
+#ifdef DEBUG
+/*****************************************************************************
+ * This is a workaround. It is to help implement an assert in gtSetEvalOrder() that the values
+ * gtWalkOp() leaves in op1 and op2 correspond with the values of adr, idx, mul, and cns
+ * that are returned by genCreateAddrMode(). It's essentially impossible to determine
+ * what gtWalkOp() *should* return for all possible trees. This simply loosens one assert
+ * to handle the following case:
+
+         indir     int
+                    const(h)  int    4 field
+                 +         byref
+                    lclVar    byref  V00 this               <-- op2
+              comma     byref                           <-- adr (base)
+                 indir     byte
+                    lclVar    byref  V00 this
+           +         byref
+                 const     int    2                     <-- mul == 4
+              <<        int                                 <-- op1
+                 lclVar    int    V01 arg1              <-- idx
+
+ * Here, we are planning to generate the address mode [edx+4*eax], where eax = idx and edx = the GT_COMMA expression.
+ * To check adr equivalence with op2, we need to walk down the GT_ADD tree just like gtWalkOp() does.
+ */
+GenTreePtr Compiler::gtWalkOpEffectiveVal(GenTreePtr op)
+{
+    for (;;)
+    {
+        if (op->gtOper == GT_COMMA)
+        {
+            GenTreePtr opEffectiveVal = op->gtEffectiveVal();
+            if ((opEffectiveVal->gtOper == GT_ADD) && (!opEffectiveVal->gtOverflow()) &&
+                (opEffectiveVal->gtOp.gtOp2->IsCnsIntOrI()))
+            {
+                op = opEffectiveVal;
+            }
+        }
+
+        if ((op->gtOper != GT_ADD) || op->gtOverflow() || !op->gtOp.gtOp2->IsCnsIntOrI())
+        {
+            break;
+        }
+
+        op = op->gtOp.gtOp1;
+    }
+
+    return op;
+}
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ *  Given a tree, set the gtCostEx and gtCostSz fields which
+ *  are used to measure the relative costs of the codegen of the tree
+ *
+ */
+
+void Compiler::gtPrepareCost(GenTree* tree)
+{
+#if FEATURE_STACK_FP_X87
+    codeGen->genResetFPstkLevel();
+#endif // FEATURE_STACK_FP_X87
+    gtSetEvalOrder(tree);
+}
+
+bool Compiler::gtIsLikelyRegVar(GenTree* tree)
+{
+    if (tree->gtOper != GT_LCL_VAR)
+    {
+        return false;
+    }
+
+    assert(tree->gtLclVar.gtLclNum < lvaTableCnt);
+    LclVarDsc* varDsc = lvaTable + tree->gtLclVar.gtLclNum;
+
+    if (varDsc->lvDoNotEnregister)
+    {
+        return false;
+    }
+
+    if (varDsc->lvRefCntWtd < (BB_UNITY_WEIGHT * 3))
+    {
+        return false;
+    }
+
+#ifdef _TARGET_X86_
+    if (varTypeIsFloating(tree->TypeGet()))
+        return false;
+    if (varTypeIsLong(tree->TypeGet()))
+        return false;
+#endif
+
+    return true;
+}
+
+//------------------------------------------------------------------------
+// gtCanSwapOrder: Returns true iff the secondNode can be swapped with firstNode.
+//
+// Arguments:
+//    firstNode  - An operand of a tree that can have GTF_REVERSE_OPS set.
+//    secondNode - The other operand of the tree.
+//
+// Return Value:
+//    Returns a boolean indicating whether it is safe to reverse the execution
+//    order of the two trees, considering any exception, global effects, or
+//    ordering constraints.
+//
+bool Compiler::gtCanSwapOrder(GenTree* firstNode, GenTree* secondNode)
+{
+    // Relative of order of global / side effects can't be swapped.
+
+    bool canSwap = true;
+
+    if (optValnumCSE_phase)
+    {
+        canSwap = optCSE_canSwap(firstNode, secondNode);
+    }
+
+    // We cannot swap in the presence of special side effects such as GT_CATCH_ARG.
+
+    if (canSwap && (firstNode->gtFlags & GTF_ORDER_SIDEEFF))
+    {
+        canSwap = false;
+    }
+
+    // When strict side effect order is disabled we allow GTF_REVERSE_OPS to be set
+    // when one or both sides contains a GTF_CALL or GTF_EXCEPT.
+    // Currently only the C and C++ languages allow non strict side effect order.
+
+    unsigned strictEffects = GTF_GLOB_EFFECT;
+
+    if (canSwap && (firstNode->gtFlags & strictEffects))
+    {
+        // op1 has side efects that can't be reordered.
+        // Check for some special cases where we still may be able to swap.
+
+        if (secondNode->gtFlags & strictEffects)
+        {
+            // op2 has also has non reorderable side effects - can't swap.
+            canSwap = false;
+        }
+        else
+        {
+            // No side effects in op2 - we can swap iff op1 has no way of modifying op2,
+            // i.e. through byref assignments or calls or op2 is a constant.
+
+            if (firstNode->gtFlags & strictEffects & GTF_PERSISTENT_SIDE_EFFECTS)
+            {
+                // We have to be conservative - can swap iff op2 is constant.
+                if (!secondNode->OperIsConst())
+                {
+                    canSwap = false;
+                }
+            }
+        }
+    }
+    return canSwap;
+}
+
+/*****************************************************************************
+ *
+ *  Given a tree, figure out the order in which its sub-operands should be
+ *  evaluated. If the second operand of a binary operator is more expensive
+ *  than the first operand, then try to swap the operand trees. Updates the
+ *  GTF_REVERSE_OPS bit if necessary in this case.
+ *
+ *  Returns the Sethi 'complexity' estimate for this tree (the higher
+ *  the number, the higher is the tree's resources requirement).
+ *
+ *  This function sets:
+ *      1. gtCostEx to the execution complexity estimate
+ *      2. gtCostSz to the code size estimate
+ *      3. gtRsvdRegs to the set of fixed registers trashed by the tree
+ *      4. gtFPlvl to the "floating point depth" value for node, i.e. the max. number
+ *         of operands the tree will push on the x87 (coprocessor) stack. Also sets
+ *         genFPstkLevel, tmpDoubleSpillMax, and possibly gtFPstLvlRedo.
+ *      5. Sometimes sets GTF_ADDRMODE_NO_CSE on nodes in the tree.
+ *      6. DEBUG-only: clears GTF_DEBUG_NODE_MORPHED.
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+unsigned Compiler::gtSetEvalOrder(GenTree* tree)
+{
+    assert(tree);
+    assert(tree->gtOper != GT_STMT);
+
+#ifdef DEBUG
+    /* Clear the GTF_DEBUG_NODE_MORPHED flag as well */
+    tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
+#endif
+
+    /* Is this a FP value? */
+
+    bool     isflt = varTypeIsFloating(tree->TypeGet());
+    unsigned FPlvlSave;
+
+    /* Figure out what kind of a node we have */
+
+    genTreeOps oper = tree->OperGet();
+    unsigned   kind = tree->OperKind();
+
+    /* Assume no fixed registers will be trashed */
+
+    regMaskTP ftreg = RBM_NONE; // Set of registers that will be used by the subtree
+    unsigned  level;
+    int       costEx;
+    int       costSz;
+
+    bool bRngChk;
+
+#ifdef DEBUG
+    costEx = -1;
+    costSz = -1;
+#endif
+
+    /* Is this a constant or a leaf node? */
+
+    if (kind & (GTK_LEAF | GTK_CONST))
+    {
+        switch (oper)
+        {
+            bool iconNeedsReloc;
+
+#ifdef _TARGET_ARM_
+            case GT_CNS_LNG:
+                costSz = 9;
+                costEx = 4;
+                goto COMMON_CNS;
+
+            case GT_CNS_STR:
+                // Uses movw/movt
+                costSz = 7;
+                costEx = 3;
+                goto COMMON_CNS;
+
+            case GT_CNS_INT:
+
+                // If the constant is a handle then it will need to have a relocation
+                //  applied to it.
+                // Any constant that requires a reloc must use the movw/movt sequence
+                //
+                iconNeedsReloc = opts.compReloc && tree->IsIconHandle() && !tree->IsIconHandle(GTF_ICON_FIELD_HDL);
+
+                if (iconNeedsReloc || !codeGen->validImmForInstr(INS_mov, tree->gtIntCon.gtIconVal))
+                {
+                    // Uses movw/movt
+                    costSz = 7;
+                    costEx = 3;
+                }
+                else if (((unsigned)tree->gtIntCon.gtIconVal) <= 0x00ff)
+                {
+                    // mov  Rd, <const8>
+                    costSz = 1;
+                    costEx = 1;
+                }
+                else
+                {
+                    // Uses movw/mvn
+                    costSz = 3;
+                    costEx = 1;
+                }
+                goto COMMON_CNS;
+
+#elif defined _TARGET_XARCH_
+
+            case GT_CNS_LNG:
+                costSz = 10;
+                costEx = 3;
+                goto COMMON_CNS;
+
+            case GT_CNS_STR:
+                costSz = 4;
+                costEx = 1;
+                goto COMMON_CNS;
+
+            case GT_CNS_INT:
+
+                // If the constant is a handle then it will need to have a relocation
+                //  applied to it.
+                // Any constant that requires a reloc must use the movw/movt sequence
+                //
+                iconNeedsReloc = opts.compReloc && tree->IsIconHandle() && !tree->IsIconHandle(GTF_ICON_FIELD_HDL);
+
+                if (!iconNeedsReloc && (((signed char)tree->gtIntCon.gtIconVal) == tree->gtIntCon.gtIconVal))
+                {
+                    costSz = 1;
+                    costEx = 1;
+                }
+#if defined(_TARGET_AMD64_)
+                else if (iconNeedsReloc || ((tree->gtIntCon.gtIconVal & 0xFFFFFFFF00000000LL) != 0))
+                {
+                    costSz = 10;
+                    costEx = 3;
+                }
+#endif // _TARGET_AMD64_
+                else
+                {
+                    costSz = 4;
+                    costEx = 1;
+                }
+                goto COMMON_CNS;
+
+#elif defined(_TARGET_ARM64_)
+            case GT_CNS_LNG:
+            case GT_CNS_STR:
+            case GT_CNS_INT:
+                // TODO-ARM64-NYI: Need cost estimates.
+                costSz = 1;
+                costEx = 1;
+                goto COMMON_CNS;
+
+#else
+            case GT_CNS_LNG:
+            case GT_CNS_STR:
+            case GT_CNS_INT:
+#error "Unknown _TARGET_"
+#endif
+
+            COMMON_CNS:
+                /*
+                    Note that some code below depends on constants always getting
+                    moved to be the second operand of a binary operator. This is
+                    easily accomplished by giving constants a level of 0, which
+                    we do on the next line. If you ever decide to change this, be
+                    aware that unless you make other arrangements for integer
+                    constants to be moved, stuff will break.
+                 */
+
+                level = 0;
+                break;
+
+            case GT_CNS_DBL:
+                level = 0;
+                /* We use fldz and fld1 to load 0.0 and 1.0, but all other  */
+                /* floating point constants are loaded using an indirection */
+                if ((*((__int64*)&(tree->gtDblCon.gtDconVal)) == 0) ||
+                    (*((__int64*)&(tree->gtDblCon.gtDconVal)) == I64(0x3ff0000000000000)))
+                {
+                    costEx = 1;
+                    costSz = 1;
+                }
+                else
+                {
+                    costEx = IND_COST_EX;
+                    costSz = 4;
+                }
+                break;
+
+            case GT_LCL_VAR:
+                level = 1;
+                if (gtIsLikelyRegVar(tree))
+                {
+                    costEx = 1;
+                    costSz = 1;
+                    /* Sign-extend and zero-extend are more expensive to load */
+                    if (lvaTable[tree->gtLclVar.gtLclNum].lvNormalizeOnLoad())
+                    {
+                        costEx += 1;
+                        costSz += 1;
+                    }
+                }
+                else
+                {
+                    costEx = IND_COST_EX;
+                    costSz = 2;
+                    /* Sign-extend and zero-extend are more expensive to load */
+                    if (varTypeIsSmall(tree->TypeGet()))
+                    {
+                        costEx += 1;
+                        costSz += 1;
+                    }
+                }
+#if defined(_TARGET_AMD64_)
+                // increase costSz for floating point locals
+                if (isflt)
+                {
+                    costSz += 1;
+                    if (!gtIsLikelyRegVar(tree))
+                    {
+                        costSz += 1;
+                    }
+                }
+#endif
+#if CPU_LONG_USES_REGPAIR
+                if (varTypeIsLong(tree->TypeGet()))
+                {
+                    costEx *= 2; // Longs are twice as expensive
+                    costSz *= 2;
+                }
+#endif
+                break;
+
+            case GT_CLS_VAR:
+#ifdef _TARGET_ARM_
+                // We generate movw/movt/ldr
+                level  = 1;
+                costEx = 3 + IND_COST_EX; // 6
+                costSz = 4 + 4 + 2;       // 10
+                break;
+#endif
+            case GT_LCL_FLD:
+                level  = 1;
+                costEx = IND_COST_EX;
+                costSz = 4;
+                if (varTypeIsSmall(tree->TypeGet()))
+                {
+                    costEx += 1;
+                    costSz += 1;
+                }
+                break;
+
+            case GT_PHI_ARG:
+            case GT_ARGPLACE:
+                level  = 0;
+                costEx = 0;
+                costSz = 0;
+                break;
+
+            default:
+                level  = 1;
+                costEx = 1;
+                costSz = 1;
+                break;
+        }
+#if FEATURE_STACK_FP_X87
+        if (isflt && (oper != GT_PHI_ARG))
+        {
+            codeGen->genIncrementFPstkLevel();
+        }
+#endif // FEATURE_STACK_FP_X87
+        goto DONE;
+    }
+
+    /* Is it a 'simple' unary/binary operator? */
+
+    if (kind & GTK_SMPOP)
+    {
+        int      lvlb; // preference for op2
+        unsigned lvl2; // scratch variable
+
+        GenTreePtr op1 = tree->gtOp.gtOp1;
+        GenTreePtr op2 = tree->gtGetOp2();
+
+        costEx = 0;
+        costSz = 0;
+
+        if (tree->OperIsAddrMode())
+        {
+            if (op1 == nullptr)
+            {
+                op1 = op2;
+                op2 = nullptr;
+            }
+        }
+
+        /* Check for a nilary operator */
+
+        if (op1 == nullptr)
+        {
+            assert(op2 == nullptr);
+
+            level = 0;
+
+            goto DONE;
+        }
+
+        /* Is this a unary operator? */
+
+        if (op2 == nullptr)
+        {
+            /* Process the operand of the operator */
+
+            /* Most Unary ops have costEx of 1 */
+            costEx = 1;
+            costSz = 1;
+
+            level = gtSetEvalOrder(op1);
+            ftreg |= op1->gtRsvdRegs;
+
+            /* Special handling for some operators */
+
+            switch (oper)
+            {
+                case GT_JTRUE:
+                    costEx = 2;
+                    costSz = 2;
+                    break;
+
+                case GT_SWITCH:
+                    costEx = 10;
+                    costSz = 5;
+                    break;
+
+                case GT_CAST:
+#if defined(_TARGET_ARM_)
+                    costEx = 1;
+                    costSz = 1;
+                    if (isflt || varTypeIsFloating(op1->TypeGet()))
+                    {
+                        costEx = 3;
+                        costSz = 4;
+                    }
+#elif defined(_TARGET_ARM64_)
+                    costEx = 1;
+                    costSz = 2;
+                    if (isflt || varTypeIsFloating(op1->TypeGet()))
+                    {
+                        costEx = 2;
+                        costSz = 4;
+                    }
+#elif defined(_TARGET_XARCH_)
+                    costEx = 1;
+                    costSz = 2;
+
+                    if (isflt || varTypeIsFloating(op1->TypeGet()))
+                    {
+                        /* cast involving floats always go through memory */
+                        costEx = IND_COST_EX * 2;
+                        costSz = 6;
+
+#if FEATURE_STACK_FP_X87
+                        if (isflt != varTypeIsFloating(op1->TypeGet()))
+                        {
+                            isflt ? codeGen->genIncrementFPstkLevel()  // Cast from int to float
+                                  : codeGen->genDecrementFPstkLevel(); // Cast from float to int
+                        }
+#endif // FEATURE_STACK_FP_X87
+                    }
+#else
+#error "Unknown _TARGET_"
+#endif
+
+#if CPU_LONG_USES_REGPAIR
+                    if (varTypeIsLong(tree->TypeGet()))
+                    {
+                        if (varTypeIsUnsigned(tree->TypeGet()))
+                        {
+                            /* Cast to unsigned long */
+                            costEx += 1;
+                            costSz += 2;
+                        }
+                        else
+                        {
+                            /* Cast to signed long is slightly more costly */
+                            costEx += 2;
+                            costSz += 3;
+                        }
+                    }
+#endif // CPU_LONG_USES_REGPAIR
+
+                    /* Overflow casts are a lot more expensive */
+                    if (tree->gtOverflow())
+                    {
+                        costEx += 6;
+                        costSz += 6;
+                    }
+
+                    break;
+
+                case GT_LIST:
+                case GT_NOP:
+                    costEx = 0;
+                    costSz = 0;
+                    break;
+
+                case GT_INTRINSIC:
+                    // GT_INTRINSIC intrinsics Sin, Cos, Sqrt, Abs ... have higher costs.
+                    // TODO: tune these costs target specific as some of these are
+                    // target intrinsics and would cost less to generate code.
+                    switch (tree->gtIntrinsic.gtIntrinsicId)
+                    {
+                        default:
+                            assert(!"missing case for gtIntrinsicId");
+                            costEx = 12;
+                            costSz = 12;
+                            break;
+
+                        case CORINFO_INTRINSIC_Sin:
+                        case CORINFO_INTRINSIC_Cos:
+                        case CORINFO_INTRINSIC_Sqrt:
+                        case CORINFO_INTRINSIC_Cosh:
+                        case CORINFO_INTRINSIC_Sinh:
+                        case CORINFO_INTRINSIC_Tan:
+                        case CORINFO_INTRINSIC_Tanh:
+                        case CORINFO_INTRINSIC_Asin:
+                        case CORINFO_INTRINSIC_Acos:
+                        case CORINFO_INTRINSIC_Atan:
+                        case CORINFO_INTRINSIC_Atan2:
+                        case CORINFO_INTRINSIC_Log10:
+                        case CORINFO_INTRINSIC_Pow:
+                        case CORINFO_INTRINSIC_Exp:
+                        case CORINFO_INTRINSIC_Ceiling:
+                        case CORINFO_INTRINSIC_Floor:
+                        case CORINFO_INTRINSIC_Object_GetType:
+                            // Giving intrinsics a large fixed exectuion cost is because we'd like to CSE
+                            // them, even if they are implemented by calls. This is different from modeling
+                            // user calls since we never CSE user calls.
+                            costEx = 36;
+                            costSz = 4;
+                            break;
+
+                        case CORINFO_INTRINSIC_Abs:
+                            costEx = 5;
+                            costSz = 15;
+                            break;
+
+                        case CORINFO_INTRINSIC_Round:
+                            costEx = 3;
+                            costSz = 4;
+#if FEATURE_STACK_FP_X87
+                            if (tree->TypeGet() == TYP_INT)
+                            {
+                                // This is a special case to handle the following
+                                // optimization: conv.i4(round.d(d)) -> round.i(d)
+                                codeGen->genDecrementFPstkLevel();
+                            }
+#endif // FEATURE_STACK_FP_X87
+                            break;
+                    }
+                    level++;
+                    break;
+
+                case GT_NOT:
+                case GT_NEG:
+                    // We need to ensure that -x is evaluated before x or else
+                    // we get burned while adjusting genFPstkLevel in x*-x where
+                    // the rhs x is the last use of the enregsitered x.
+                    //
+                    // Even in the integer case we want to prefer to
+                    // evaluate the side without the GT_NEG node, all other things
+                    // being equal.  Also a GT_NOT requires a scratch register
+
+                    level++;
+                    break;
+
+                case GT_ADDR:
+
+#if FEATURE_STACK_FP_X87
+                    /* If the operand was floating point, pop the value from the stack */
+
+                    if (varTypeIsFloating(op1->TypeGet()))
+                    {
+                        codeGen->genDecrementFPstkLevel();
+                    }
+#endif // FEATURE_STACK_FP_X87
+                    costEx = 0;
+                    costSz = 1;
+
+                    // If we have a GT_ADDR of an GT_IND we can just copy the costs from indOp1
+                    if (op1->OperGet() == GT_IND)
+                    {
+                        GenTreePtr indOp1 = op1->gtOp.gtOp1;
+                        costEx            = indOp1->gtCostEx;
+                        costSz            = indOp1->gtCostSz;
+                    }
+                    break;
+
+                case GT_ARR_LENGTH:
+                    level++;
+
+                    /* Array Len should be the same as an indirections, which have a costEx of IND_COST_EX */
+                    costEx = IND_COST_EX - 1;
+                    costSz = 2;
+                    break;
+
+                case GT_MKREFANY:
+                case GT_OBJ:
+                    // We estimate the cost of a GT_OBJ or GT_MKREFANY to be two loads (GT_INDs)
+                    costEx = 2 * IND_COST_EX;
+                    costSz = 2 * 2;
+                    break;
+
+                case GT_BOX:
+                    // We estimate the cost of a GT_BOX to be two stores (GT_INDs)
+                    costEx = 2 * IND_COST_EX;
+                    costSz = 2 * 2;
+                    break;
+
+                case GT_BLK:
+                case GT_IND:
+
+                    /* An indirection should always have a non-zero level.
+                     * Only constant leaf nodes have level 0.
+                     */
+
+                    if (level == 0)
+                    {
+                        level = 1;
+                    }
+
+                    /* Indirections have a costEx of IND_COST_EX */
+                    costEx = IND_COST_EX;
+                    costSz = 2;
+
+                    /* If we have to sign-extend or zero-extend, bump the cost */
+                    if (varTypeIsSmall(tree->TypeGet()))
+                    {
+                        costEx += 1;
+                        costSz += 1;
+                    }
+
+                    if (isflt)
+                    {
+#if FEATURE_STACK_FP_X87
+                        /* Indirect loads of FP values push a new value on the FP stack */
+                        codeGen->genIncrementFPstkLevel();
+#endif // FEATURE_STACK_FP_X87
+                        if (tree->TypeGet() == TYP_DOUBLE)
+                        {
+                            costEx += 1;
+                        }
+#ifdef _TARGET_ARM_
+                        costSz += 2;
+#endif // _TARGET_ARM_
+                    }
+
+                    // Can we form an addressing mode with this indirection?
+                    // TODO-CQ: Consider changing this to op1->gtEffectiveVal() to take into account
+                    // addressing modes hidden under a comma node.
+
+                    if (op1->gtOper == GT_ADD)
+                    {
+                        bool rev;
+#if SCALED_ADDR_MODES
+                        unsigned mul;
+#endif
+                        unsigned   cns;
+                        GenTreePtr base;
+                        GenTreePtr idx;
+
+                        // See if we can form a complex addressing mode.
+
+                        GenTreePtr addr = op1->gtEffectiveVal();
+
+                        bool doAddrMode = true;
+                        // See if we can form a complex addressing mode.
+                        // Always use an addrMode for an array index indirection.
+                        // TODO-1stClassStructs: Always do this, but first make sure it's
+                        // done in Lowering as well.
+                        if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0)
+                        {
+                            if (tree->TypeGet() == TYP_STRUCT)
+                            {
+                                doAddrMode = false;
+                            }
+                            else if (varTypeIsStruct(tree))
+                            {
+                                // This is a heuristic attempting to match prior behavior when indirections
+                                // under a struct assignment would not be considered for addressing modes.
+                                if (compCurStmt != nullptr)
+                                {
+                                    GenTree* expr = compCurStmt->gtStmt.gtStmtExpr;
+                                    if ((expr->OperGet() == GT_ASG) &&
+                                        ((expr->gtGetOp1() == tree) || (expr->gtGetOp2() == tree)))
+                                    {
+                                        doAddrMode = false;
+                                    }
+                                }
+                            }
+                        }
+                        if ((doAddrMode) &&
+                            codeGen->genCreateAddrMode(addr,     // address
+                                                       0,        // mode
+                                                       false,    // fold
+                                                       RBM_NONE, // reg mask
+                                                       &rev,     // reverse ops
+                                                       &base,    // base addr
+                                                       &idx,     // index val
+#if SCALED_ADDR_MODES
+                                                       &mul, // scaling
+#endif
+                                                       &cns,  // displacement
+                                                       true)) // don't generate code
+                        {
+                            // We can form a complex addressing mode, so mark each of the interior
+                            // nodes with GTF_ADDRMODE_NO_CSE and calculate a more accurate cost.
+
+                            addr->gtFlags |= GTF_ADDRMODE_NO_CSE;
+#ifdef _TARGET_XARCH_
+                            // addrmodeCount is the count of items that we used to form
+                            // an addressing mode.  The maximum value is 4 when we have
+                            // all of these:   { base, idx, cns, mul }
+                            //
+                            unsigned addrmodeCount = 0;
+                            if (base)
+                            {
+                                costEx += base->gtCostEx;
+                                costSz += base->gtCostSz;
+                                addrmodeCount++;
+                            }
+
+                            if (idx)
+                            {
+                                costEx += idx->gtCostEx;
+                                costSz += idx->gtCostSz;
+                                addrmodeCount++;
+                            }
+
+                            if (cns)
+                            {
+                                if (((signed char)cns) == ((int)cns))
+                                {
+                                    costSz += 1;
+                                }
+                                else
+                                {
+                                    costSz += 4;
+                                }
+                                addrmodeCount++;
+                            }
+                            if (mul)
+                            {
+                                addrmodeCount++;
+                            }
+                            // When we form a complex addressing mode we can reduced the costs
+                            // associated with the interior GT_ADD and GT_LSH nodes:
+                            //
+                            //                      GT_ADD      -- reduce this interior GT_ADD by (-3,-3)
+                            //                      /   \       --
+                            //                  GT_ADD  'cns'   -- reduce this interior GT_ADD by (-2,-2)
+                            //                  /   \           --
+                            //               'base'  GT_LSL     -- reduce this interior GT_LSL by (-1,-1)
+                            //                      /   \       --
+                            //                   'idx'  'mul'
+                            //
+                            if (addrmodeCount > 1)
+                            {
+                                // The number of interior GT_ADD and GT_LSL will always be one less than addrmodeCount
+                                //
+                                addrmodeCount--;
+
+                                GenTreePtr tmp = addr;
+                                while (addrmodeCount > 0)
+                                {
+                                    // decrement the gtCosts for the interior GT_ADD or GT_LSH node by the remaining
+                                    // addrmodeCount
+                                    tmp->SetCosts(tmp->gtCostEx - addrmodeCount, tmp->gtCostSz - addrmodeCount);
+
+                                    addrmodeCount--;
+                                    if (addrmodeCount > 0)
+                                    {
+                                        GenTreePtr tmpOp1 = tmp->gtOp.gtOp1;
+                                        GenTreePtr tmpOp2 = tmp->gtGetOp2();
+                                        assert(tmpOp2 != nullptr);
+
+                                        if ((tmpOp1 != base) && (tmpOp1->OperGet() == GT_ADD))
+                                        {
+                                            tmp = tmpOp1;
+                                        }
+                                        else if (tmpOp2->OperGet() == GT_LSH)
+                                        {
+                                            tmp = tmpOp2;
+                                        }
+                                        else if (tmpOp1->OperGet() == GT_LSH)
+                                        {
+                                            tmp = tmpOp1;
+                                        }
+                                        else if (tmpOp2->OperGet() == GT_ADD)
+                                        {
+                                            tmp = tmpOp2;
+                                        }
+                                        else
+                                        {
+                                            // We can very rarely encounter a tree that has a GT_COMMA node
+                                            // that is difficult to walk, so we just early out without decrementing.
+                                            addrmodeCount = 0;
+                                        }
+                                    }
+                                }
+                            }
+#elif defined _TARGET_ARM_
+                            if (base)
+                            {
+                                costEx += base->gtCostEx;
+                                costSz += base->gtCostSz;
+                                if ((base->gtOper == GT_LCL_VAR) && ((idx == NULL) || (cns == 0)))
+                                {
+                                    costSz -= 1;
+                                }
+                            }
+
+                            if (idx)
+                            {
+                                costEx += idx->gtCostEx;
+                                costSz += idx->gtCostSz;
+                                if (mul > 0)
+                                {
+                                    costSz += 2;
+                                }
+                            }
+
+                            if (cns)
+                            {
+                                if (cns >= 128) // small offsets fits into a 16-bit instruction
+                                {
+                                    if (cns < 4096) // medium offsets require a 32-bit instruction
+                                    {
+                                        if (!isflt)
+                                            costSz += 2;
+                                    }
+                                    else
+                                    {
+                                        costEx += 2; // Very large offsets require movw/movt instructions
+                                        costSz += 8;
+                                    }
+                                }
+                            }
+#elif defined _TARGET_ARM64_
+                            if (base)
+                            {
+                                costEx += base->gtCostEx;
+                                costSz += base->gtCostSz;
+                            }
+
+                            if (idx)
+                            {
+                                costEx += idx->gtCostEx;
+                                costSz += idx->gtCostSz;
+                            }
+
+                            if (cns != 0)
+                            {
+                                if (cns >= (4096 * genTypeSize(tree->TypeGet())))
+                                {
+                                    costEx += 1;
+                                    costSz += 4;
+                                }
+                            }
+#else
+#error "Unknown _TARGET_"
+#endif
+
+                            assert(addr->gtOper == GT_ADD);
+                            assert(!addr->gtOverflow());
+                            assert(op2 == nullptr);
+                            assert(mul != 1);
+
+                            // If we have an addressing mode, we have one of:
+                            //   [base             + cns]
+                            //   [       idx * mul      ]  // mul >= 2, else we would use base instead of idx
+                            //   [       idx * mul + cns]  // mul >= 2, else we would use base instead of idx
+                            //   [base + idx * mul      ]  // mul can be 0, 2, 4, or 8
+                            //   [base + idx * mul + cns]  // mul can be 0, 2, 4, or 8
+                            // Note that mul == 0 is semantically equivalent to mul == 1.
+                            // Note that cns can be zero.
+                            CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if SCALED_ADDR_MODES
+                            assert((base != nullptr) || (idx != nullptr && mul >= 2));
+#else
+                            assert(base != NULL);
+#endif
+
+                            INDEBUG(GenTreePtr op1Save = addr);
+
+                            /* Walk addr looking for non-overflow GT_ADDs */
+                            gtWalkOp(&addr, &op2, base, false);
+
+                            // addr and op2 are now children of the root GT_ADD of the addressing mode
+                            assert(addr != op1Save);
+                            assert(op2 != nullptr);
+
+                            /* Walk addr looking for non-overflow GT_ADDs of constants */
+                            gtWalkOp(&addr, &op2, nullptr, true);
+
+                            // TODO-Cleanup: It seems very strange that we might walk down op2 now, even though the
+                            // prior
+                            //           call to gtWalkOp() may have altered op2.
+
+                            /* Walk op2 looking for non-overflow GT_ADDs of constants */
+                            gtWalkOp(&op2, &addr, nullptr, true);
+
+                            // OK we are done walking the tree
+                            // Now assert that addr and op2 correspond with base and idx
+                            // in one of the several acceptable ways.
+
+                            // Note that sometimes addr/op2 is equal to idx/base
+                            // and other times addr/op2 is a GT_COMMA node with
+                            // an effective value that is idx/base
+
+                            if (mul > 1)
+                            {
+                                if ((addr != base) && (addr->gtOper == GT_LSH))
+                                {
+                                    addr->gtFlags |= GTF_ADDRMODE_NO_CSE;
+                                    if (addr->gtOp.gtOp1->gtOper == GT_MUL)
+                                    {
+                                        addr->gtOp.gtOp1->gtFlags |= GTF_ADDRMODE_NO_CSE;
+                                    }
+                                    assert((base == nullptr) || (op2 == base) ||
+                                           (op2->gtEffectiveVal() == base->gtEffectiveVal()) ||
+                                           (gtWalkOpEffectiveVal(op2) == gtWalkOpEffectiveVal(base)));
+                                }
+                                else
+                                {
+                                    assert(op2);
+                                    assert(op2->gtOper == GT_LSH || op2->gtOper == GT_MUL);
+                                    op2->gtFlags |= GTF_ADDRMODE_NO_CSE;
+                                    // We may have eliminated multiple shifts and multiplies in the addressing mode,
+                                    // so navigate down through them to get to "idx".
+                                    GenTreePtr op2op1 = op2->gtOp.gtOp1;
+                                    while ((op2op1->gtOper == GT_LSH || op2op1->gtOper == GT_MUL) && op2op1 != idx)
+                                    {
+                                        op2op1->gtFlags |= GTF_ADDRMODE_NO_CSE;
+                                        op2op1 = op2op1->gtOp.gtOp1;
+                                    }
+                                    assert(addr->gtEffectiveVal() == base);
+                                    assert(op2op1 == idx);
+                                }
+                            }
+                            else
+                            {
+                                assert(mul == 0);
+
+                                if ((addr == idx) || (addr->gtEffectiveVal() == idx))
+                                {
+                                    if (idx != nullptr)
+                                    {
+                                        if ((addr->gtOper == GT_MUL) || (addr->gtOper == GT_LSH))
+                                        {
+                                            if ((addr->gtOp.gtOp1->gtOper == GT_NOP) ||
+                                                (addr->gtOp.gtOp1->gtOper == GT_MUL &&
+                                                 addr->gtOp.gtOp1->gtOp.gtOp1->gtOper == GT_NOP))
+                                            {
+                                                addr->gtFlags |= GTF_ADDRMODE_NO_CSE;
+                                                if (addr->gtOp.gtOp1->gtOper == GT_MUL)
+                                                {
+                                                    addr->gtOp.gtOp1->gtFlags |= GTF_ADDRMODE_NO_CSE;
+                                                }
+                                            }
+                                        }
+                                    }
+                                    assert((op2 == base) || (op2->gtEffectiveVal() == base));
+                                }
+                                else if ((addr == base) || (addr->gtEffectiveVal() == base))
+                                {
+                                    if (idx != nullptr)
+                                    {
+                                        assert(op2);
+                                        if ((op2->gtOper == GT_MUL) || (op2->gtOper == GT_LSH))
+                                        {
+                                            if ((op2->gtOp.gtOp1->gtOper == GT_NOP) ||
+                                                (op2->gtOp.gtOp1->gtOper == GT_MUL &&
+                                                 op2->gtOp.gtOp1->gtOp.gtOp1->gtOper == GT_NOP))
+                                            {
+                                                // assert(bRngChk);
+                                                op2->gtFlags |= GTF_ADDRMODE_NO_CSE;
+                                                if (op2->gtOp.gtOp1->gtOper == GT_MUL)
+                                                {
+                                                    op2->gtOp.gtOp1->gtFlags |= GTF_ADDRMODE_NO_CSE;
+                                                }
+                                            }
+                                        }
+                                        assert((op2 == idx) || (op2->gtEffectiveVal() == idx));
+                                    }
+                                }
+                                else
+                                {
+                                    // addr isn't base or idx. Is this possible? Or should there be an assert?
+                                }
+                            }
+                            goto DONE;
+
+                        } // end  if  (genCreateAddrMode(...))
+
+                    } // end if  (op1->gtOper == GT_ADD)
+                    else if (gtIsLikelyRegVar(op1))
+                    {
+                        /* Indirection of an enregister LCL_VAR, don't increase costEx/costSz */
+                        goto DONE;
+                    }
+#ifdef _TARGET_XARCH_
+                    else if (op1->IsCnsIntOrI())
+                    {
+                        // Indirection of a CNS_INT, subtract 1 from costEx
+                        // makes costEx 3 for x86 and 4 for amd64
+                        //
+                        costEx += (op1->gtCostEx - 1);
+                        costSz += op1->gtCostSz;
+                        goto DONE;
+                    }
+#endif
+                    break;
+
+                default:
+                    break;
+            }
+            costEx += op1->gtCostEx;
+            costSz += op1->gtCostSz;
+            goto DONE;
+        }
+
+        /* Binary operator - check for certain special cases */
+
+        lvlb = 0;
+
+        /* Default Binary ops have a cost of 1,1 */
+        costEx = 1;
+        costSz = 1;
+
+#ifdef _TARGET_ARM_
+        if (isflt)
+        {
+            costSz += 2;
+        }
+#endif
+#ifndef _TARGET_64BIT_
+        if (varTypeIsLong(op1->TypeGet()))
+        {
+            /* Operations on longs are more expensive */
+            costEx += 3;
+            costSz += 3;
+        }
+#endif
+        switch (oper)
+        {
+            case GT_MOD:
+            case GT_UMOD:
+
+                /* Modulo by a power of 2 is easy */
+
+                if (op2->IsCnsIntOrI())
+                {
+                    size_t ival = op2->gtIntConCommon.IconValue();
+
+                    if (ival > 0 && ival == genFindLowestBit(ival))
+                    {
+                        break;
+                    }
+                }
+
+                __fallthrough;
+
+            case GT_DIV:
+            case GT_UDIV:
+
+                if (isflt)
+                {
+                    /* fp division is very expensive to execute */
+                    costEx = 36; // TYP_DOUBLE
+                    costSz += 3;
+                }
+                else
+                {
+                    /* integer division is also very expensive */
+                    costEx = 20;
+                    costSz += 2;
+
+                    // Encourage the first operand to be evaluated (into EAX/EDX) first */
+                    lvlb -= 3;
+
+#ifdef _TARGET_XARCH_
+                    // the idiv and div instruction requires EAX/EDX
+                    ftreg |= RBM_EAX | RBM_EDX;
+#endif
+                }
+                break;
+
+            case GT_MUL:
+
+                if (isflt)
+                {
+                    /* FP multiplication instructions are more expensive */
+                    costEx += 4;
+                    costSz += 3;
+                }
+                else
+                {
+                    /* Integer multiplication instructions are more expensive */
+                    costEx += 3;
+                    costSz += 2;
+
+                    if (tree->gtOverflow())
+                    {
+                        /* Overflow check are more expensive */
+                        costEx += 3;
+                        costSz += 3;
+                    }
+
+#ifdef _TARGET_X86_
+                    if ((tree->gtType == TYP_LONG) || tree->gtOverflow())
+                    {
+                        /* We use imulEAX for TYP_LONG and overflow multiplications */
+                        // Encourage the first operand to be evaluated (into EAX/EDX) first */
+                        lvlb -= 4;
+
+                        // the imulEAX instruction ob x86 requires EDX:EAX
+                        ftreg |= (RBM_EAX | RBM_EDX);
+
+                        /* The 64-bit imul instruction costs more */
+                        costEx += 4;
+                    }
+#endif //  _TARGET_X86_
+                }
+                break;
+
+            case GT_ADD:
+            case GT_SUB:
+            case GT_ASG_ADD:
+            case GT_ASG_SUB:
+
+                if (isflt)
+                {
+                    /* FP instructions are a bit more expensive */
+                    costEx += 4;
+                    costSz += 3;
+                    break;
+                }
+
+                /* Overflow check are more expensive */
+                if (tree->gtOverflow())
+                {
+                    costEx += 3;
+                    costSz += 3;
+                }
+                break;
+
+            case GT_COMMA:
+
+                /* Comma tosses the result of the left operand */
+                gtSetEvalOrderAndRestoreFPstkLevel(op1);
+                level = gtSetEvalOrder(op2);
+
+                ftreg |= op1->gtRsvdRegs | op2->gtRsvdRegs;
+
+                /* GT_COMMA cost is the sum of op1 and op2 costs */
+                costEx = (op1->gtCostEx + op2->gtCostEx);
+                costSz = (op1->gtCostSz + op2->gtCostSz);
+
+                goto DONE;
+
+            case GT_COLON:
+
+                level = gtSetEvalOrderAndRestoreFPstkLevel(op1);
+                lvl2  = gtSetEvalOrder(op2);
+
+                if (level < lvl2)
+                {
+                    level = lvl2;
+                }
+                else if (level == lvl2)
+                {
+                    level += 1;
+                }
+
+                ftreg |= op1->gtRsvdRegs | op2->gtRsvdRegs;
+                costEx = op1->gtCostEx + op2->gtCostEx;
+                costSz = op1->gtCostSz + op2->gtCostSz;
+
+                goto DONE;
+
+            default:
+                break;
+        }
+
+        /* Assignments need a bit of special handling */
+
+        if (kind & GTK_ASGOP)
+        {
+            /* Process the target */
+
+            level = gtSetEvalOrder(op1);
+
+#if FEATURE_STACK_FP_X87
+
+            /* If assigning an FP value, the target won't get pushed */
+
+            if (isflt && !tree->IsPhiDefn())
+            {
+                op1->gtFPlvl--;
+                codeGen->genDecrementFPstkLevel();
+            }
+
+#endif // FEATURE_STACK_FP_X87
+
+            if (gtIsLikelyRegVar(op1))
+            {
+                assert(lvlb == 0);
+                lvl2 = gtSetEvalOrder(op2);
+                if (oper != GT_ASG)
+                {
+                    ftreg |= op2->gtRsvdRegs;
+                }
+
+                /* Assignment to an enregistered LCL_VAR */
+                costEx = op2->gtCostEx;
+                costSz = max(3, op2->gtCostSz); // 3 is an estimate for a reg-reg assignment
+                goto DONE_OP1_AFTER_COST;
+            }
+            else if (oper != GT_ASG)
+            {
+                // Assign-Op instructions read and write op1
+                //
+                costEx += op1->gtCostEx;
+#ifdef _TARGET_ARM_
+                costSz += op1->gtCostSz;
+#endif
+            }
+
+            goto DONE_OP1;
+        }
+
+        /* Process the sub-operands */
+
+        level = gtSetEvalOrder(op1);
+        if (lvlb < 0)
+        {
+            level -= lvlb; // lvlb is negative, so this increases level
+            lvlb = 0;
+        }
+
+    DONE_OP1:
+        assert(lvlb >= 0);
+        lvl2 = gtSetEvalOrder(op2) + lvlb;
+        ftreg |= op1->gtRsvdRegs;
+        // For assignment, we execute op2 before op1, except that for block
+        // ops the destination address is evaluated first.
+        if ((oper != GT_ASG) || tree->OperIsBlkOp())
+        {
+            ftreg |= op2->gtRsvdRegs;
+        }
+
+        costEx += (op1->gtCostEx + op2->gtCostEx);
+        costSz += (op1->gtCostSz + op2->gtCostSz);
+
+    DONE_OP1_AFTER_COST:
+#if FEATURE_STACK_FP_X87
+        /*
+            Binary FP operators pop 2 operands and produce 1 result;
+            FP comparisons pop 2 operands and produces 0 results.
+            assignments consume 1 value and don't produce anything.
+         */
+
+        if (isflt && !tree->IsPhiDefn())
+        {
+            assert(oper != GT_COMMA);
+            codeGen->genDecrementFPstkLevel();
+        }
+#endif // FEATURE_STACK_FP_X87
+
+        bool bReverseInAssignment = false;
+        if (kind & GTK_ASGOP)
+        {
+            GenTreePtr op1Val = op1;
+
+            if (tree->gtOper == GT_ASG)
+            {
+                // Skip over the GT_IND/GT_ADDR tree (if one exists)
+                //
+                if ((op1->gtOper == GT_IND) && (op1->gtOp.gtOp1->gtOper == GT_ADDR))
+                {
+                    op1Val = op1->gtOp.gtOp1->gtOp.gtOp1;
+                }
+            }
+
+            switch (op1Val->gtOper)
+            {
+                case GT_IND:
+
+                    // Struct assignments are different from scalar assignments in that semantically
+                    // the address of op1 is evaluated prior to op2.
+                    if (!varTypeIsStruct(op1))
+                    {
+                        // If we have any side effects on the GT_IND child node
+                        // we have to evaluate op1 first.
+                        if (op1Val->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT)
+                        {
+                            break;
+                        }
+                    }
+
+                    // In case op2 assigns to a local var that is used in op1Val, we have to evaluate op1Val first.
+                    if (op2->gtFlags & GTF_ASG)
+                    {
+                        break;
+                    }
+
+                    // If op2 is simple then evaluate op1 first
+
+                    if (op2->OperKind() & GTK_LEAF)
+                    {
+                        break;
+                    }
+
+                // fall through and set GTF_REVERSE_OPS
+
+                case GT_LCL_VAR:
+                case GT_LCL_FLD:
+                case GT_BLK:
+                case GT_OBJ:
+                case GT_DYN_BLK:
+
+                    // We evaluate op2 before op1
+                    bReverseInAssignment = true;
+                    tree->gtFlags |= GTF_REVERSE_OPS;
+                    break;
+
+                default:
+                    break;
+            }
+        }
+        else if (kind & GTK_RELOP)
+        {
+            /* Float compares remove both operands from the FP stack */
+            /* Also FP comparison uses EAX for flags */
+
+            if (varTypeIsFloating(op1->TypeGet()))
+            {
+#if FEATURE_STACK_FP_X87
+                codeGen->genDecrementFPstkLevel(2);
+#endif // FEATURE_STACK_FP_X87
+#ifdef _TARGET_XARCH_
+                ftreg |= RBM_EAX;
+#endif
+                level++;
+                lvl2++;
+            }
+#if CPU_LONG_USES_REGPAIR
+            if (varTypeIsLong(op1->TypeGet()))
+            {
+                costEx *= 2; // Longs are twice as expensive
+                costSz *= 2;
+            }
+#endif
+            if ((tree->gtFlags & GTF_RELOP_JMP_USED) == 0)
+            {
+                /* Using a setcc instruction is more expensive */
+                costEx += 3;
+            }
+        }
+
+        /* Check for other interesting cases */
+
+        switch (oper)
+        {
+            case GT_LSH:
+            case GT_RSH:
+            case GT_RSZ:
+            case GT_ROL:
+            case GT_ROR:
+            case GT_ASG_LSH:
+            case GT_ASG_RSH:
+            case GT_ASG_RSZ:
+
+                /* Variable sized shifts are more expensive and use REG_SHIFT */
+
+                if (!op2->IsCnsIntOrI())
+                {
+                    costEx += 3;
+                    if (REG_SHIFT != REG_NA)
+                    {
+                        ftreg |= RBM_SHIFT;
+                    }
+
+#ifndef _TARGET_64BIT_
+                    // Variable sized LONG shifts require the use of a helper call
+                    //
+                    if (tree->gtType == TYP_LONG)
+                    {
+                        level += 5;
+                        lvl2 += 5;
+                        costEx += 3 * IND_COST_EX;
+                        costSz += 4;
+                        ftreg |= RBM_CALLEE_TRASH;
+                    }
+#endif // !_TARGET_64BIT_
+                }
+                break;
+
+            case GT_INTRINSIC:
+
+                switch (tree->gtIntrinsic.gtIntrinsicId)
+                {
+                    case CORINFO_INTRINSIC_Atan2:
+                    case CORINFO_INTRINSIC_Pow:
+                        // These math intrinsics are actually implemented by user calls.
+                        // Increase the Sethi 'complexity' by two to reflect the argument
+                        // register requirement.
+                        level += 2;
+                        break;
+                    default:
+                        assert(!"Unknown binary GT_INTRINSIC operator");
+                        break;
+                }
+
+                break;
+
+            default:
+                break;
+        }
+
+        /* We need to evalutate constants later as many places in codegen
+           can't handle op1 being a constant. This is normally naturally
+           enforced as constants have the least level of 0. However,
+           sometimes we end up with a tree like "cns1 < nop(cns2)". In
+           such cases, both sides have a level of 0. So encourage constants
+           to be evaluated last in such cases */
+
+        if ((level == 0) && (level == lvl2) && (op1->OperKind() & GTK_CONST) &&
+            (tree->OperIsCommutative() || tree->OperIsCompare()))
+        {
+            lvl2++;
+        }
+
+        /* We try to swap operands if the second one is more expensive */
+        bool       tryToSwap;
+        GenTreePtr opA, opB;
+
+        if (tree->gtFlags & GTF_REVERSE_OPS)
+        {
+            opA = op2;
+            opB = op1;
+        }
+        else
+        {
+            opA = op1;
+            opB = op2;
+        }
+
+        if (fgOrder == FGOrderLinear)
+        {
+            // Don't swap anything if we're in linear order; we're really just interested in the costs.
+            tryToSwap = false;
+        }
+        else if (bReverseInAssignment)
+        {
+            // Assignments are special, we want the reverseops flags
+            // so if possible it was set above.
+            tryToSwap = false;
+        }
+        else
+        {
+            if (tree->gtFlags & GTF_REVERSE_OPS)
+            {
+                tryToSwap = (level > lvl2);
+            }
+            else
+            {
+                tryToSwap = (level < lvl2);
+            }
+
+            // Try to force extra swapping when in the stress mode:
+            if (compStressCompile(STRESS_REVERSE_FLAG, 60) && ((tree->gtFlags & GTF_REVERSE_OPS) == 0) &&
+                ((op2->OperKind() & GTK_CONST) == 0))
+            {
+                tryToSwap = true;
+            }
+        }
+
+        if (tryToSwap)
+        {
+            bool canSwap = gtCanSwapOrder(opA, opB);
+
+            if (canSwap)
+            {
+                /* Can we swap the order by commuting the operands? */
+
+                switch (oper)
+                {
+                    case GT_EQ:
+                    case GT_NE:
+                    case GT_LT:
+                    case GT_LE:
+                    case GT_GE:
+                    case GT_GT:
+                        if (GenTree::SwapRelop(oper) != oper)
+                        {
+                            // SetOper will obliterate the VN for the underlying expression.
+                            // If we're in VN CSE phase, we don't want to lose that information,
+                            // so save the value numbers and put them back after the SetOper.
+                            ValueNumPair vnp = tree->gtVNPair;
+                            tree->SetOper(GenTree::SwapRelop(oper));
+                            if (optValnumCSE_phase)
+                            {
+                                tree->gtVNPair = vnp;
+                            }
+                        }
+
+                        __fallthrough;
+
+                    case GT_ADD:
+                    case GT_MUL:
+
+                    case GT_OR:
+                    case GT_XOR:
+                    case GT_AND:
+
+                        /* Swap the operands */
+
+                        tree->gtOp.gtOp1 = op2;
+                        tree->gtOp.gtOp2 = op1;
+
+#if FEATURE_STACK_FP_X87
+                        /* We may have to recompute FP levels */
+                        if (op1->gtFPlvl || op2->gtFPlvl)
+                            gtFPstLvlRedo = true;
+#endif // FEATURE_STACK_FP_X87
+                        break;
+
+                    case GT_QMARK:
+                    case GT_COLON:
+                    case GT_MKREFANY:
+                        break;
+
+                    case GT_LIST:
+                        break;
+
+                    case GT_SUB:
+#ifdef LEGACY_BACKEND
+                        // For LSRA we require that LclVars be "evaluated" just prior to their use,
+                        // so that if they must be reloaded, it is done at the right place.
+                        // This means that we allow reverse evaluation for all BINOPs.
+                        // (Note that this doesn't affect the order of the operands in the instruction).
+                        if (!isflt)
+                            break;
+#endif // LEGACY_BACKEND
+
+                        __fallthrough;
+
+                    default:
+
+                        /* Mark the operand's evaluation order to be swapped */
+                        if (tree->gtFlags & GTF_REVERSE_OPS)
+                        {
+                            tree->gtFlags &= ~GTF_REVERSE_OPS;
+                        }
+                        else
+                        {
+                            tree->gtFlags |= GTF_REVERSE_OPS;
+                        }
+
+#if FEATURE_STACK_FP_X87
+                        /* We may have to recompute FP levels */
+                        if (op1->gtFPlvl || op2->gtFPlvl)
+                            gtFPstLvlRedo = true;
+#endif // FEATURE_STACK_FP_X87
+
+                        break;
+                }
+            }
+        }
+
+        /* Swap the level counts */
+        if (tree->gtFlags & GTF_REVERSE_OPS)
+        {
+            unsigned tmpl;
+
+            tmpl  = level;
+            level = lvl2;
+            lvl2  = tmpl;
+        }
+
+        /* Compute the sethi number for this binary operator */
+
+        if (level < 1)
+        {
+            level = lvl2;
+        }
+        else if (level == lvl2)
+        {
+            level += 1;
+        }
+
+        goto DONE;
+    }
+
+    /* See what kind of a special operator we have here */
+
+    switch (oper)
+    {
+        unsigned lvl2; // Scratch variable
+
+        case GT_CALL:
+
+            assert(tree->gtFlags & GTF_CALL);
+
+            level  = 0;
+            costEx = 5;
+            costSz = 2;
+
+            /* Evaluate the 'this' argument, if present */
+
+            if (tree->gtCall.gtCallObjp)
+            {
+                GenTreePtr thisVal = tree->gtCall.gtCallObjp;
+
+                lvl2 = gtSetEvalOrder(thisVal);
+                if (level < lvl2)
+                {
+                    level = lvl2;
+                }
+                costEx += thisVal->gtCostEx;
+                costSz += thisVal->gtCostSz + 1;
+                ftreg |= thisVal->gtRsvdRegs;
+            }
+
+            /* Evaluate the arguments, right to left */
+
+            if (tree->gtCall.gtCallArgs)
+            {
+#if FEATURE_STACK_FP_X87
+                FPlvlSave = codeGen->genGetFPstkLevel();
+#endif // FEATURE_STACK_FP_X87
+                lvl2 = gtSetListOrder(tree->gtCall.gtCallArgs, false);
+                if (level < lvl2)
+                {
+                    level = lvl2;
+                }
+                costEx += tree->gtCall.gtCallArgs->gtCostEx;
+                costSz += tree->gtCall.gtCallArgs->gtCostSz;
+                ftreg |= tree->gtCall.gtCallArgs->gtRsvdRegs;
+#if FEATURE_STACK_FP_X87
+                codeGen->genResetFPstkLevel(FPlvlSave);
+#endif // FEATURE_STACK_FP_X87
+            }
+
+            /* Evaluate the temp register arguments list
+             * This is a "hidden" list and its only purpose is to
+             * extend the life of temps until we make the call */
+
+            if (tree->gtCall.gtCallLateArgs)
+            {
+#if FEATURE_STACK_FP_X87
+                FPlvlSave = codeGen->genGetFPstkLevel();
+#endif // FEATURE_STACK_FP_X87
+                lvl2 = gtSetListOrder(tree->gtCall.gtCallLateArgs, true);
+                if (level < lvl2)
+                {
+                    level = lvl2;
+                }
+                costEx += tree->gtCall.gtCallLateArgs->gtCostEx;
+                costSz += tree->gtCall.gtCallLateArgs->gtCostSz;
+                ftreg |= tree->gtCall.gtCallLateArgs->gtRsvdRegs;
+#if FEATURE_STACK_FP_X87
+                codeGen->genResetFPstkLevel(FPlvlSave);
+#endif // FEATURE_STACK_FP_X87
+            }
+
+            if (tree->gtCall.gtCallType == CT_INDIRECT)
+            {
+                // pinvoke-calli cookie is a constant, or constant indirection
+                assert(tree->gtCall.gtCallCookie == nullptr || tree->gtCall.gtCallCookie->gtOper == GT_CNS_INT ||
+                       tree->gtCall.gtCallCookie->gtOper == GT_IND);
+
+                GenTreePtr indirect = tree->gtCall.gtCallAddr;
+
+                lvl2 = gtSetEvalOrder(indirect);
+                if (level < lvl2)
+                {
+                    level = lvl2;
+                }
+                costEx += indirect->gtCostEx + IND_COST_EX;
+                costSz += indirect->gtCostSz;
+                ftreg |= indirect->gtRsvdRegs;
+            }
+            else
+            {
+#ifdef _TARGET_ARM_
+                if ((tree->gtFlags & GTF_CALL_VIRT_KIND_MASK) == GTF_CALL_VIRT_STUB)
+                {
+                    // We generate movw/movt/ldr
+                    costEx += (1 + IND_COST_EX);
+                    costSz += 8;
+                    if (tree->gtCall.gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT)
+                    {
+                        // Must use R12 for the ldr target -- REG_JUMP_THUNK_PARAM
+                        costSz += 2;
+                    }
+                }
+                else if ((opts.eeFlags & CORJIT_FLG_PREJIT) == 0)
+                {
+                    costEx += 2;
+                    costSz += 6;
+                }
+                costSz += 2;
+#endif
+#ifdef _TARGET_XARCH_
+                costSz += 3;
+#endif
+            }
+
+            level += 1;
+
+            unsigned callKind;
+            callKind = (tree->gtFlags & GTF_CALL_VIRT_KIND_MASK);
+
+            /* Virtual calls are a bit more expensive */
+            if (callKind != GTF_CALL_NONVIRT)
+            {
+                costEx += 2 * IND_COST_EX;
+                costSz += 2;
+            }
+
+            /* Virtual stub calls also must reserve the VIRTUAL_STUB_PARAM reg */
+            if (callKind == GTF_CALL_VIRT_STUB)
+            {
+                ftreg |= RBM_VIRTUAL_STUB_PARAM;
+            }
+
+#ifdef FEATURE_READYTORUN_COMPILER
+#ifdef _TARGET_ARM64_
+            if (tree->gtCall.IsR2RRelativeIndir())
+            {
+                ftreg |= RBM_R2R_INDIRECT_PARAM;
+            }
+#endif
+#endif
+
+#if GTF_CALL_REG_SAVE
+            // Normally function calls don't preserve caller save registers
+            //   and thus are much more expensive.
+            // However a few function calls do preserve these registers
+            //   such as the GC WriteBarrier helper calls.
+
+            if (!(tree->gtFlags & GTF_CALL_REG_SAVE))
+#endif
+            {
+                level += 5;
+                costEx += 3 * IND_COST_EX;
+                ftreg |= RBM_CALLEE_TRASH;
+            }
+
+#if FEATURE_STACK_FP_X87
+            if (isflt)
+                codeGen->genIncrementFPstkLevel();
+#endif // FEATURE_STACK_FP_X87
+
+            break;
+
+        case GT_ARR_ELEM:
+
+            level  = gtSetEvalOrder(tree->gtArrElem.gtArrObj);
+            costEx = tree->gtArrElem.gtArrObj->gtCostEx;
+            costSz = tree->gtArrElem.gtArrObj->gtCostSz;
+
+            unsigned dim;
+            for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+            {
+                lvl2 = gtSetEvalOrder(tree->gtArrElem.gtArrInds[dim]);
+                if (level < lvl2)
+                {
+                    level = lvl2;
+                }
+                costEx += tree->gtArrElem.gtArrInds[dim]->gtCostEx;
+                costSz += tree->gtArrElem.gtArrInds[dim]->gtCostSz;
+            }
+
+#if FEATURE_STACK_FP_X87
+            if (isflt)
+                codeGen->genIncrementFPstkLevel();
+#endif // FEATURE_STACK_FP_X87
+            level += tree->gtArrElem.gtArrRank;
+            costEx += 2 + (tree->gtArrElem.gtArrRank * (IND_COST_EX + 1));
+            costSz += 2 + (tree->gtArrElem.gtArrRank * 2);
+            break;
+
+        case GT_ARR_OFFSET:
+            level  = gtSetEvalOrder(tree->gtArrOffs.gtOffset);
+            costEx = tree->gtArrOffs.gtOffset->gtCostEx;
+            costSz = tree->gtArrOffs.gtOffset->gtCostSz;
+            lvl2   = gtSetEvalOrder(tree->gtArrOffs.gtIndex);
+            level  = max(level, lvl2);
+            costEx += tree->gtArrOffs.gtIndex->gtCostEx;
+            costSz += tree->gtArrOffs.gtIndex->gtCostSz;
+            lvl2  = gtSetEvalOrder(tree->gtArrOffs.gtArrObj);
+            level = max(level, lvl2);
+            costEx += tree->gtArrOffs.gtArrObj->gtCostEx;
+            costSz += tree->gtArrOffs.gtArrObj->gtCostSz;
+            break;
+
+        case GT_CMPXCHG:
+
+            level  = gtSetEvalOrder(tree->gtCmpXchg.gtOpLocation);
+            costSz = tree->gtCmpXchg.gtOpLocation->gtCostSz;
+
+            lvl2 = gtSetEvalOrder(tree->gtCmpXchg.gtOpValue);
+            if (level < lvl2)
+            {
+                level = lvl2;
+            }
+            costSz += tree->gtCmpXchg.gtOpValue->gtCostSz;
+
+            lvl2 = gtSetEvalOrder(tree->gtCmpXchg.gtOpComparand);
+            if (level < lvl2)
+            {
+                level = lvl2;
+            }
+            costSz += tree->gtCmpXchg.gtOpComparand->gtCostSz;
+
+            costEx = MAX_COST; // Seriously, what could be more expensive than lock cmpxchg?
+            costSz += 5;       // size of lock cmpxchg [reg+C], reg
+#ifdef _TARGET_XARCH_
+            ftreg |= RBM_EAX; // cmpxchg must be evaluated into eax.
+#endif
+            break;
+
+        case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+        case GT_SIMD_CHK:
+#endif                  // FEATURE_SIMD
+            costEx = 4; // cmp reg,reg and jae throw (not taken)
+            costSz = 7; // jump to cold section
+
+            level = gtSetEvalOrder(tree->gtBoundsChk.gtArrLen);
+            costEx += tree->gtBoundsChk.gtArrLen->gtCostEx;
+            costSz += tree->gtBoundsChk.gtArrLen->gtCostSz;
+
+            lvl2 = gtSetEvalOrder(tree->gtBoundsChk.gtIndex);
+            if (level < lvl2)
+            {
+                level = lvl2;
+            }
+            costEx += tree->gtBoundsChk.gtIndex->gtCostEx;
+            costSz += tree->gtBoundsChk.gtIndex->gtCostSz;
+
+            break;
+
+        case GT_STORE_DYN_BLK:
+        case GT_DYN_BLK:
+        {
+            costEx = 0;
+            costSz = 0;
+            level  = 0;
+            if (oper == GT_STORE_DYN_BLK)
+            {
+                lvl2  = gtSetEvalOrder(tree->gtDynBlk.Data());
+                level = max(level, lvl2);
+                costEx += tree->gtDynBlk.Data()->gtCostEx;
+                costSz += tree->gtDynBlk.Data()->gtCostSz;
+            }
+            lvl2               = gtSetEvalOrder(tree->gtDynBlk.Addr());
+            level              = max(level, lvl2);
+            costEx             = tree->gtDynBlk.Addr()->gtCostEx;
+            costSz             = tree->gtDynBlk.Addr()->gtCostSz;
+            unsigned sizeLevel = gtSetEvalOrder(tree->gtDynBlk.gtDynamicSize);
+
+            // Determine whether the size node should be evaluated first.
+            // We would like to do this if the sizeLevel is larger than the current level,
+            // but we have to ensure that we obey ordering constraints.
+            if (tree->AsDynBlk()->gtEvalSizeFirst != (level < sizeLevel))
+            {
+                bool canChange = true;
+
+                GenTree* sizeNode = tree->AsDynBlk()->gtDynamicSize;
+                GenTree* dst      = tree->AsDynBlk()->Addr();
+                GenTree* src      = tree->AsDynBlk()->Data();
+
+                if (tree->AsDynBlk()->gtEvalSizeFirst)
+                {
+                    canChange = gtCanSwapOrder(sizeNode, dst);
+                    if (canChange && (src != nullptr))
+                    {
+                        canChange = gtCanSwapOrder(sizeNode, src);
+                    }
+                }
+                else
+                {
+                    canChange = gtCanSwapOrder(dst, sizeNode);
+                    if (canChange && (src != nullptr))
+                    {
+                        gtCanSwapOrder(src, sizeNode);
+                    }
+                }
+                if (canChange)
+                {
+                    tree->AsDynBlk()->gtEvalSizeFirst = (level < sizeLevel);
+                }
+            }
+            level = max(level, sizeLevel);
+            costEx += tree->gtDynBlk.gtDynamicSize->gtCostEx;
+            costSz += tree->gtDynBlk.gtDynamicSize->gtCostSz;
+        }
+        break;
+
+        default:
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("unexpected operator in this tree:\n");
+                gtDispTree(tree);
+            }
+#endif
+            NO_WAY("unexpected operator");
+    }
+
+DONE:
+
+#if FEATURE_STACK_FP_X87
+    // printf("[FPlvl=%2u] ", genGetFPstkLevel()); gtDispTree(tree, 0, true);
+    noway_assert((unsigned char)codeGen->genFPstkLevel == codeGen->genFPstkLevel);
+    tree->gtFPlvl = (unsigned char)codeGen->genFPstkLevel;
+
+    if (codeGen->genFPstkLevel > tmpDoubleSpillMax)
+        tmpDoubleSpillMax = codeGen->genFPstkLevel;
+#endif // FEATURE_STACK_FP_X87
+
+    tree->gtRsvdRegs = (regMaskSmall)ftreg;
+
+    // Some path through this function must have set the costs.
+    assert(costEx != -1);
+    assert(costSz != -1);
+
+    tree->SetCosts(costEx, costSz);
+
+    return level;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+#if FEATURE_STACK_FP_X87
+
+/*****************************************************************************/
+void Compiler::gtComputeFPlvls(GenTreePtr tree)
+{
+    genTreeOps oper;
+    unsigned   kind;
+    bool       isflt;
+    unsigned   savFPstkLevel;
+
+    noway_assert(tree);
+    noway_assert(tree->gtOper != GT_STMT);
+
+    /* Figure out what kind of a node we have */
+
+    oper  = tree->OperGet();
+    kind  = tree->OperKind();
+    isflt = varTypeIsFloating(tree->TypeGet()) ? 1 : 0;
+
+    /* Is this a constant or leaf node? */
+
+    if (kind & (GTK_CONST | GTK_LEAF))
+    {
+        codeGen->genFPstkLevel += isflt;
+        goto DONE;
+    }
+
+    /* Is it a 'simple' unary/binary operator? */
+
+    if (kind & GTK_SMPOP)
+    {
+        GenTreePtr op1 = tree->gtOp.gtOp1;
+        GenTreePtr op2 = tree->gtGetOp2();
+
+        /* Check for some special cases */
+
+        switch (oper)
+        {
+            case GT_IND:
+
+                gtComputeFPlvls(op1);
+
+                /* Indirect loads of FP values push a new value on the FP stack */
+
+                codeGen->genFPstkLevel += isflt;
+                goto DONE;
+
+            case GT_CAST:
+
+                gtComputeFPlvls(op1);
+
+                /* Casts between non-FP and FP push on / pop from the FP stack */
+
+                if (varTypeIsFloating(op1->TypeGet()))
+                {
+                    if (isflt == false)
+                        codeGen->genFPstkLevel--;
+                }
+                else
+                {
+                    if (isflt != false)
+                        codeGen->genFPstkLevel++;
+                }
+
+                goto DONE;
+
+            case GT_LIST:  /* GT_LIST presumably part of an argument list */
+            case GT_COMMA: /* Comma tosses the result of the left operand */
+
+                savFPstkLevel = codeGen->genFPstkLevel;
+                gtComputeFPlvls(op1);
+                codeGen->genFPstkLevel = savFPstkLevel;
+
+                if (op2)
+                    gtComputeFPlvls(op2);
+
+                goto DONE;
+
+            default:
+                break;
+        }
+
+        if (!op1)
+        {
+            if (!op2)
+                goto DONE;
+
+            gtComputeFPlvls(op2);
+            goto DONE;
+        }
+
+        if (!op2)
+        {
+            gtComputeFPlvls(op1);
+            if (oper == GT_ADDR)
+            {
+                /* If the operand was floating point pop the value from the stack */
+                if (varTypeIsFloating(op1->TypeGet()))
+                {
+                    noway_assert(codeGen->genFPstkLevel);
+                    codeGen->genFPstkLevel--;
+                }
+            }
+
+            // This is a special case to handle the following
+            // optimization: conv.i4(round.d(d)) -> round.i(d)
+
+            if (oper == GT_INTRINSIC && tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round &&
+                tree->TypeGet() == TYP_INT)
+            {
+                codeGen->genFPstkLevel--;
+            }
+
+            goto DONE;
+        }
+
+        /* FP assignments need a bit special handling */
+
+        if (isflt && (kind & GTK_ASGOP))
+        {
+            /* The target of the assignment won't get pushed */
+
+            if (tree->gtFlags & GTF_REVERSE_OPS)
+            {
+                gtComputeFPlvls(op2);
+                gtComputeFPlvls(op1);
+                op1->gtFPlvl--;
+                codeGen->genFPstkLevel--;
+            }
+            else
+            {
+                gtComputeFPlvls(op1);
+                op1->gtFPlvl--;
+                codeGen->genFPstkLevel--;
+                gtComputeFPlvls(op2);
+            }
+
+            codeGen->genFPstkLevel--;
+            goto DONE;
+        }
+
+        /* Here we have a binary operator; visit operands in proper order */
+
+        if (tree->gtFlags & GTF_REVERSE_OPS)
+        {
+            gtComputeFPlvls(op2);
+            gtComputeFPlvls(op1);
+        }
+        else
+        {
+            gtComputeFPlvls(op1);
+            gtComputeFPlvls(op2);
+        }
+
+        /*
+            Binary FP operators pop 2 operands and produce 1 result;
+            assignments consume 1 value and don't produce any.
+         */
+
+        if (isflt)
+            codeGen->genFPstkLevel--;
+
+        /* Float compares remove both operands from the FP stack */
+
+        if (kind & GTK_RELOP)
+        {
+            if (varTypeIsFloating(op1->TypeGet()))
+                codeGen->genFPstkLevel -= 2;
+        }
+
+        goto DONE;
+    }
+
+    /* See what kind of a special operator we have here */
+
+    switch (oper)
+    {
+        case GT_FIELD:
+            gtComputeFPlvls(tree->gtField.gtFldObj);
+            codeGen->genFPstkLevel += isflt;
+            break;
+
+        case GT_CALL:
+
+            if (tree->gtCall.gtCallObjp)
+                gtComputeFPlvls(tree->gtCall.gtCallObjp);
+
+            if (tree->gtCall.gtCallArgs)
+            {
+                savFPstkLevel = codeGen->genFPstkLevel;
+                gtComputeFPlvls(tree->gtCall.gtCallArgs);
+                codeGen->genFPstkLevel = savFPstkLevel;
+            }
+
+            if (tree->gtCall.gtCallLateArgs)
+            {
+                savFPstkLevel = codeGen->genFPstkLevel;
+                gtComputeFPlvls(tree->gtCall.gtCallLateArgs);
+                codeGen->genFPstkLevel = savFPstkLevel;
+            }
+
+            codeGen->genFPstkLevel += isflt;
+            break;
+
+        case GT_ARR_ELEM:
+
+            gtComputeFPlvls(tree->gtArrElem.gtArrObj);
+
+            unsigned dim;
+            for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+                gtComputeFPlvls(tree->gtArrElem.gtArrInds[dim]);
+
+            /* Loads of FP values push a new value on the FP stack */
+            codeGen->genFPstkLevel += isflt;
+            break;
+
+        case GT_CMPXCHG:
+            // Evaluate the trees left to right
+            gtComputeFPlvls(tree->gtCmpXchg.gtOpLocation);
+            gtComputeFPlvls(tree->gtCmpXchg.gtOpValue);
+            gtComputeFPlvls(tree->gtCmpXchg.gtOpComparand);
+            noway_assert(!isflt);
+            break;
+
+        case GT_ARR_BOUNDS_CHECK:
+            gtComputeFPlvls(tree->gtBoundsChk.gtArrLen);
+            gtComputeFPlvls(tree->gtBoundsChk.gtIndex);
+            noway_assert(!isflt);
+            break;
+
+#ifdef DEBUG
+        default:
+            noway_assert(!"Unhandled special operator in gtComputeFPlvls()");
+            break;
+#endif
+    }
+
+DONE:
+
+    noway_assert((unsigned char)codeGen->genFPstkLevel == codeGen->genFPstkLevel);
+
+    tree->gtFPlvl = (unsigned char)codeGen->genFPstkLevel;
+}
+
+#endif // FEATURE_STACK_FP_X87
+
+/*****************************************************************************
+ *
+ *  If the given tree is an integer constant that can be used
+ *  in a scaled index address mode as a multiplier (e.g. "[4*index]"), then return
+ *  the scale factor: 2, 4, or 8. Otherwise, return 0. Note that we never return 1,
+ *  to match the behavior of GetScaleIndexShf().
+ */
+
+unsigned GenTree::GetScaleIndexMul()
+{
+    if (IsCnsIntOrI() && jitIsScaleIndexMul(gtIntConCommon.IconValue()) && gtIntConCommon.IconValue() != 1)
+    {
+        return (unsigned)gtIntConCommon.IconValue();
+    }
+
+    return 0;
+}
+
+/*****************************************************************************
+ *
+ *  If the given tree is the right-hand side of a left shift (that is,
+ *  'y' in the tree 'x' << 'y'), and it is an integer constant that can be used
+ *  in a scaled index address mode as a multiplier (e.g. "[4*index]"), then return
+ *  the scale factor: 2, 4, or 8. Otherwise, return 0.
+ */
+
+unsigned GenTree::GetScaleIndexShf()
+{
+    if (IsCnsIntOrI() && jitIsScaleIndexShift(gtIntConCommon.IconValue()))
+    {
+        return (unsigned)(1 << gtIntConCommon.IconValue());
+    }
+
+    return 0;
+}
+
+/*****************************************************************************
+ *
+ *  If the given tree is a scaled index (i.e. "op * 4" or "op << 2"), returns
+ *  the multiplier: 2, 4, or 8; otherwise returns 0. Note that "1" is never
+ *  returned.
+ */
+
+unsigned GenTree::GetScaledIndex()
+{
+    // with (!opts.OptEnabled(CLFLG_CONSTANTFOLD) we can have
+    //   CNS_INT * CNS_INT
+    //
+    if (gtOp.gtOp1->IsCnsIntOrI())
+    {
+        return 0;
+    }
+
+    switch (gtOper)
+    {
+        case GT_MUL:
+            return gtOp.gtOp2->GetScaleIndexMul();
+
+        case GT_LSH:
+            return gtOp.gtOp2->GetScaleIndexShf();
+
+        default:
+            assert(!"GenTree::GetScaledIndex() called with illegal gtOper");
+            break;
+    }
+
+    return 0;
+}
+
+/*****************************************************************************
+ *
+ *  Returns true if "addr" is a GT_ADD node, at least one of whose arguments is an integer (<= 32 bit)
+ *  constant.  If it returns true, it sets "*offset" to (one of the) constant value(s), and
+ *  "*addr" to the other argument.
+ */
+
+bool GenTree::IsAddWithI32Const(GenTreePtr* addr, int* offset)
+{
+    if (OperGet() == GT_ADD)
+    {
+        if (gtOp.gtOp1->IsIntCnsFitsInI32())
+        {
+            *offset = (int)gtOp.gtOp1->gtIntCon.gtIconVal;
+            *addr   = gtOp.gtOp2;
+            return true;
+        }
+        else if (gtOp.gtOp2->IsIntCnsFitsInI32())
+        {
+            *offset = (int)gtOp.gtOp2->gtIntCon.gtIconVal;
+            *addr   = gtOp.gtOp1;
+            return true;
+        }
+    }
+    // Otherwise...
+    return false;
+}
+
+//------------------------------------------------------------------------
+// gtGetChildPointer: If 'parent' is the parent of this node, return the pointer
+//    to the child node so that it can be modified; otherwise, return nullptr.
+//
+// Arguments:
+//    parent - The possible parent of this node
+//
+// Return Value:
+//    If "child" is a child of "parent", returns a pointer to the child node in the parent
+//    (i.e. a pointer to a GenTree pointer).
+//    Otherwise, returns nullptr.
+//
+// Assumptions:
+//    'parent' must be non-null
+//
+// Notes:
+//    When FEATURE_MULTIREG_ARGS is defined we can get here with GT_LDOBJ tree.
+//    This happens when we have a struct that is passed in multiple registers.
+//
+//    Also note that when FEATURE_UNIX_AMD64_STRUCT_PASSING is defined the GT_LDOBJ
+//    later gets converted to a GT_LIST with two GT_LCL_FLDs in Lower/LowerXArch.
+//
+
+GenTreePtr* GenTree::gtGetChildPointer(GenTreePtr parent)
+
+{
+    switch (parent->OperGet())
+    {
+        default:
+            if (!parent->OperIsSimple())
+            {
+                return nullptr;
+            }
+            if (this == parent->gtOp.gtOp1)
+            {
+                return &(parent->gtOp.gtOp1);
+            }
+            if (this == parent->gtOp.gtOp2)
+            {
+                return &(parent->gtOp.gtOp2);
+            }
+            break;
+
+        case GT_CMPXCHG:
+            if (this == parent->gtCmpXchg.gtOpLocation)
+            {
+                return &(parent->gtCmpXchg.gtOpLocation);
+            }
+            if (this == parent->gtCmpXchg.gtOpValue)
+            {
+                return &(parent->gtCmpXchg.gtOpValue);
+            }
+            if (this == parent->gtCmpXchg.gtOpComparand)
+            {
+                return &(parent->gtCmpXchg.gtOpComparand);
+            }
+            break;
+
+        case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+        case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+            if (this == parent->gtBoundsChk.gtArrLen)
+            {
+                return &(parent->gtBoundsChk.gtArrLen);
+            }
+            if (this == parent->gtBoundsChk.gtIndex)
+            {
+                return &(parent->gtBoundsChk.gtIndex);
+            }
+            if (this == parent->gtBoundsChk.gtIndRngFailBB)
+            {
+                return &(parent->gtBoundsChk.gtIndRngFailBB);
+            }
+            break;
+
+        case GT_ARR_ELEM:
+            if (this == parent->gtArrElem.gtArrObj)
+            {
+                return &(parent->gtArrElem.gtArrObj);
+            }
+            for (int i = 0; i < GT_ARR_MAX_RANK; i++)
+            {
+                if (this == parent->gtArrElem.gtArrInds[i])
+                {
+                    return &(parent->gtArrElem.gtArrInds[i]);
+                }
+            }
+            break;
+
+        case GT_ARR_OFFSET:
+            if (this == parent->gtArrOffs.gtOffset)
+            {
+                return &(parent->gtArrOffs.gtOffset);
+            }
+            if (this == parent->gtArrOffs.gtIndex)
+            {
+                return &(parent->gtArrOffs.gtIndex);
+            }
+            if (this == parent->gtArrOffs.gtArrObj)
+            {
+                return &(parent->gtArrOffs.gtArrObj);
+            }
+            break;
+
+        case GT_STORE_DYN_BLK:
+        case GT_DYN_BLK:
+            if (this == parent->gtDynBlk.gtOp1)
+            {
+                return &(parent->gtDynBlk.gtOp1);
+            }
+            if (this == parent->gtDynBlk.gtOp2)
+            {
+                return &(parent->gtDynBlk.gtOp2);
+            }
+            if (this == parent->gtDynBlk.gtDynamicSize)
+            {
+                return &(parent->gtDynBlk.gtDynamicSize);
+            }
+            break;
+
+        case GT_FIELD:
+            if (this == parent->AsField()->gtFldObj)
+            {
+                return &(parent->AsField()->gtFldObj);
+            }
+            break;
+
+        case GT_RET_EXPR:
+            if (this == parent->gtRetExpr.gtInlineCandidate)
+            {
+                return &(parent->gtRetExpr.gtInlineCandidate);
+            }
+            break;
+
+        case GT_CALL:
+        {
+            GenTreeCall* call = parent->AsCall();
+
+            if (this == call->gtCallObjp)
+            {
+                return &(call->gtCallObjp);
+            }
+            if (this == call->gtCallArgs)
+            {
+                return reinterpret_cast<GenTreePtr*>(&(call->gtCallArgs));
+            }
+            if (this == call->gtCallLateArgs)
+            {
+                return reinterpret_cast<GenTreePtr*>(&(call->gtCallLateArgs));
+            }
+            if (this == call->gtControlExpr)
+            {
+                return &(call->gtControlExpr);
+            }
+            if (call->gtCallType == CT_INDIRECT)
+            {
+                if (this == call->gtCallCookie)
+                {
+                    return &(call->gtCallCookie);
+                }
+                if (this == call->gtCallAddr)
+                {
+                    return &(call->gtCallAddr);
+                }
+            }
+        }
+        break;
+
+        case GT_STMT:
+            noway_assert(!"Illegal node for gtGetChildPointer()");
+            unreached();
+    }
+
+    return nullptr;
+}
+
+bool GenTree::TryGetUse(GenTree* def, GenTree*** use)
+{
+    for (GenTree** useEdge : UseEdges())
+    {
+        if (*useEdge == def)
+        {
+            *use = useEdge;
+            return true;
+        }
+    }
+
+    return false;
+}
+
+//------------------------------------------------------------------------
+// gtGetParent: Get the parent of this node, and optionally capture the
+//    pointer to the child so that it can be modified.
+//
+// Arguments:
+
+//    parentChildPointer - A pointer to a GenTreePtr* (yes, that's three
+//                         levels, i.e. GenTree ***), which if non-null,
+//                         will be set to point to the field in the parent
+//                         that points to this node.
+//
+//    Return value       - The parent of this node.
+//
+//    Notes:
+//
+//    This requires that the execution order must be defined (i.e. gtSetEvalOrder() has been called).
+//    To enable the child to be replaced, it accepts an argument, parentChildPointer that, if non-null,
+//    will be set to point to the child pointer in the parent that points to this node.
+
+GenTreePtr GenTree::gtGetParent(GenTreePtr** parentChildPtrPtr)
+{
+    // Find the parent node; it must be after this node in the execution order.
+    GenTreePtr* parentChildPtr = nullptr;
+    GenTreePtr  parent;
+    for (parent = gtNext; parent != nullptr; parent = parent->gtNext)
+    {
+        parentChildPtr = gtGetChildPointer(parent);
+        if (parentChildPtr != nullptr)
+        {
+            break;
+        }
+    }
+    if (parentChildPtrPtr != nullptr)
+    {
+        *parentChildPtrPtr = parentChildPtr;
+    }
+    return parent;
+}
+
+/*****************************************************************************
+ *
+ *  Returns true if the given operator may cause an exception.
+ */
+
+bool GenTree::OperMayThrow()
+{
+    GenTreePtr op;
+
+    switch (gtOper)
+    {
+        case GT_MOD:
+        case GT_DIV:
+        case GT_UMOD:
+        case GT_UDIV:
+
+            /* Division with a non-zero, non-minus-one constant does not throw an exception */
+
+            op = gtOp.gtOp2;
+
+            if (varTypeIsFloating(op->TypeGet()))
+            {
+                return false; // Floating point division does not throw.
+            }
+
+            // For integers only division by 0 or by -1 can throw
+            if (op->IsIntegralConst() && !op->IsIntegralConst(0) && !op->IsIntegralConst(-1))
+            {
+                return false;
+            }
+            return true;
+
+        case GT_IND:
+            op = gtOp.gtOp1;
+
+            /* Indirections of handles are known to be safe */
+            if (op->gtOper == GT_CNS_INT)
+            {
+                if (op->IsIconHandle())
+                {
+                    /* No exception is thrown on this indirection */
+                    return false;
+                }
+            }
+            if (this->gtFlags & GTF_IND_NONFAULTING)
+            {
+                return false;
+            }
+            // Non-Null AssertionProp will remove the GTF_EXCEPT flag and mark the GT_IND with GTF_ORDER_SIDEEFF flag
+            if ((this->gtFlags & GTF_ALL_EFFECT) == GTF_ORDER_SIDEEFF)
+            {
+                return false;
+            }
+
+            return true;
+
+        case GT_INTRINSIC:
+            // If this is an intrinsic that represents the object.GetType(), it can throw an NullReferenceException.
+            // Report it as may throw.
+            // Note: Some of the rest of the existing intrinsics could potentially throw an exception (for example
+            //       the array and string element access ones). They are handled differently than the GetType intrinsic
+            //       and are not marked with GTF_EXCEPT. If these are revisited at some point to be marked as
+            //       GTF_EXCEPT,
+            //       the code below might need to be specialized to handle them properly.
+            if ((this->gtFlags & GTF_EXCEPT) != 0)
+            {
+                return true;
+            }
+
+            break;
+
+        case GT_BLK:
+        case GT_OBJ:
+        case GT_DYN_BLK:
+        case GT_STORE_BLK:
+            return !Compiler::fgIsIndirOfAddrOfLocal(this);
+
+        case GT_ARR_BOUNDS_CHECK:
+        case GT_ARR_ELEM:
+        case GT_ARR_INDEX:
+        case GT_CATCH_ARG:
+        case GT_ARR_LENGTH:
+        case GT_LCLHEAP:
+        case GT_CKFINITE:
+        case GT_NULLCHECK:
+#ifdef FEATURE_SIMD
+        case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+            return true;
+        default:
+            break;
+    }
+
+    /* Overflow arithmetic operations also throw exceptions */
+
+    if (gtOverflowEx())
+    {
+        return true;
+    }
+
+    return false;
+}
+
+#if DEBUGGABLE_GENTREE
+// static
+GenTree::VtablePtr GenTree::s_vtablesForOpers[] = {nullptr};
+GenTree::VtablePtr GenTree::s_vtableForOp       = nullptr;
+
+GenTree::VtablePtr GenTree::GetVtableForOper(genTreeOps oper)
+{
+    noway_assert(oper < GT_COUNT);
+
+    if (s_vtablesForOpers[oper] != nullptr)
+    {
+        return s_vtablesForOpers[oper];
+    }
+    // Otherwise...
+    VtablePtr res = nullptr;
+    switch (oper)
+    {
+#define GTSTRUCT_0(nm, tag) /*handle explicitly*/
+#define GTSTRUCT_1(nm, tag)                                                                                            \
+    case tag:                                                                                                          \
+    {                                                                                                                  \
+        GenTree##nm gt;                                                                                                \
+        res = *reinterpret_cast<VtablePtr*>(&gt);                                                                      \
+    }                                                                                                                  \
+    break;
+#define GTSTRUCT_2(nm, tag, tag2)             /*handle explicitly*/
+#define GTSTRUCT_3(nm, tag, tag2, tag3)       /*handle explicitly*/
+#define GTSTRUCT_4(nm, tag, tag2, tag3, tag4) /*handle explicitly*/
+#define GTSTRUCT_N(nm, ...)                   /*handle explicitly*/
+#include "gtstructs.h"
+
+#if !FEATURE_EH_FUNCLETS
+        // If FEATURE_EH_FUNCLETS is set, then GT_JMP becomes the only member of Val, and will be handled above.
+        case GT_END_LFIN:
+        case GT_JMP:
+        {
+            GenTreeVal gt(GT_JMP, TYP_INT, 0);
+            res = *reinterpret_cast<VtablePtr*>(&gt);
+            break;
+        }
+#endif
+        case GT_OBJ:
+        {
+            GenTreeIntCon dummyOp(TYP_I_IMPL, 0);
+            GenTreeObj    obj(TYP_STRUCT, &dummyOp, NO_CLASS_HANDLE, 0);
+            res = *reinterpret_cast<VtablePtr*>(&obj);
+        }
+        break;
+
+        default:
+        {
+            // Should be unary or binary op.
+            if (s_vtableForOp == nullptr)
+            {
+                unsigned opKind = OperKind(oper);
+                assert(!IsExOp(opKind));
+                assert(OperIsSimple(oper) || OperIsLeaf(oper));
+                // Need to provide non-null operands.
+                Compiler*     comp = (Compiler*)_alloca(sizeof(Compiler));
+                GenTreeIntCon dummyOp(TYP_INT, 0);
+                GenTreeOp     gt(oper, TYP_INT, &dummyOp, ((opKind & GTK_UNOP) ? nullptr : &dummyOp));
+                s_vtableForOp = *reinterpret_cast<VtablePtr*>(&gt);
+            }
+            res = s_vtableForOp;
+            break;
+        }
+    }
+    s_vtablesForOpers[oper] = res;
+    return res;
+}
+
+void GenTree::SetVtableForOper(genTreeOps oper)
+{
+    *reinterpret_cast<VtablePtr*>(this) = GetVtableForOper(oper);
+}
+#endif // DEBUGGABLE_GENTREE
+
+GenTreePtr Compiler::gtNewOperNode(genTreeOps oper, var_types type, GenTreePtr op1, GenTreePtr op2)
+{
+    assert(op1 != nullptr);
+    assert(op2 != nullptr);
+
+    // We should not be allocating nodes that extend GenTreeOp with this;
+    // should call the appropriate constructor for the extended type.
+    assert(!GenTree::IsExOp(GenTree::OperKind(oper)));
+
+    GenTreePtr node = new (this, oper) GenTreeOp(oper, type, op1, op2);
+
+    return node;
+}
+
+GenTreePtr Compiler::gtNewQmarkNode(var_types type, GenTreePtr cond, GenTreePtr colon)
+{
+    compQmarkUsed   = true;
+    GenTree* result = new (this, GT_QMARK) GenTreeQmark(type, cond, colon, this);
+#ifdef DEBUG
+    if (compQmarkRationalized)
+    {
+        fgCheckQmarkAllowedForm(result);
+    }
+#endif
+    return result;
+}
+
+GenTreeQmark::GenTreeQmark(var_types type, GenTreePtr cond, GenTreePtr colonOp, Compiler* comp)
+    : GenTreeOp(GT_QMARK, type, cond, colonOp)
+    , gtThenLiveSet(VarSetOps::UninitVal())
+    , gtElseLiveSet(VarSetOps::UninitVal())
+{
+    // These must follow a specific form.
+    assert(cond != nullptr && cond->TypeGet() == TYP_INT);
+    assert(colonOp != nullptr && colonOp->OperGet() == GT_COLON);
+
+    comp->impInlineRoot()->compQMarks->Push(this);
+}
+
+GenTreeIntCon* Compiler::gtNewIconNode(ssize_t value, var_types type)
+{
+    return new (this, GT_CNS_INT) GenTreeIntCon(type, value);
+}
+
+// return a new node representing the value in a physical register
+GenTree* Compiler::gtNewPhysRegNode(regNumber reg, var_types type)
+{
+    assert(genIsValidIntReg(reg) || (reg == REG_SPBASE));
+    GenTree* result = new (this, GT_PHYSREG) GenTreePhysReg(reg, type);
+    return result;
+}
+
+// Return a new node representing a store of a value to a physical register
+// modifies: child's gtRegNum
+GenTree* Compiler::gtNewPhysRegNode(regNumber reg, GenTree* src)
+{
+    assert(genIsValidIntReg(reg));
+    GenTree* result  = new (this, GT_PHYSREGDST) GenTreeOp(GT_PHYSREGDST, TYP_I_IMPL, src, nullptr);
+    result->gtRegNum = reg;
+    src->gtRegNum    = reg;
+    return result;
+}
+
+#ifndef LEGACY_BACKEND
+GenTreePtr Compiler::gtNewJmpTableNode()
+{
+    GenTreePtr node                   = new (this, GT_JMPTABLE) GenTreeJumpTable(TYP_INT);
+    node->gtJumpTable.gtJumpTableAddr = 0;
+    return node;
+}
+#endif // !LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ *  Converts an annotated token into an icon flags (so that we will later be
+ *  able to tell the type of the handle that will be embedded in the icon
+ *  node)
+ */
+
+unsigned Compiler::gtTokenToIconFlags(unsigned token)
+{
+    unsigned flags = 0;
+
+    switch (TypeFromToken(token))
+    {
+        case mdtTypeRef:
+        case mdtTypeDef:
+        case mdtTypeSpec:
+            flags = GTF_ICON_CLASS_HDL;
+            break;
+
+        case mdtMethodDef:
+            flags = GTF_ICON_METHOD_HDL;
+            break;
+
+        case mdtFieldDef:
+            flags = GTF_ICON_FIELD_HDL;
+            break;
+
+        default:
+            flags = GTF_ICON_TOKEN_HDL;
+            break;
+    }
+
+    return flags;
+}
+
+/*****************************************************************************
+ *
+ *  Allocates a integer constant entry that represents a HANDLE to something.
+ *  It may not be allowed to embed HANDLEs directly into the JITed code (for eg,
+ *  as arguments to JIT helpers). Get a corresponding value that can be embedded.
+ *  If the handle needs to be accessed via an indirection, pValue points to it.
+ */
+
+GenTreePtr Compiler::gtNewIconEmbHndNode(
+    void* value, void* pValue, unsigned flags, unsigned handle1, void* handle2, void* compileTimeHandle)
+{
+    GenTreePtr node;
+
+    assert((!value) != (!pValue));
+
+    if (value)
+    {
+        node = gtNewIconHandleNode((size_t)value, flags, /*fieldSeq*/ FieldSeqStore::NotAField(), handle1, handle2);
+        node->gtIntCon.gtCompileTimeHandle = (size_t)compileTimeHandle;
+    }
+    else
+    {
+        node = gtNewIconHandleNode((size_t)pValue, flags, /*fieldSeq*/ FieldSeqStore::NotAField(), handle1, handle2);
+        node->gtIntCon.gtCompileTimeHandle = (size_t)compileTimeHandle;
+        node                               = gtNewOperNode(GT_IND, TYP_I_IMPL, node);
+    }
+
+    return node;
+}
+
+/*****************************************************************************/
+GenTreePtr Compiler::gtNewStringLiteralNode(InfoAccessType iat, void* pValue)
+{
+    GenTreePtr tree = nullptr;
+
+    switch (iat)
+    {
+        case IAT_VALUE: // The info value is directly available
+            tree         = gtNewIconEmbHndNode(pValue, nullptr, GTF_ICON_STR_HDL);
+            tree->gtType = TYP_REF;
+            tree         = gtNewOperNode(GT_NOP, TYP_REF, tree); // prevents constant folding
+            break;
+
+        case IAT_PVALUE: // The value needs to be accessed via an       indirection
+            tree = gtNewIconHandleNode((size_t)pValue, GTF_ICON_STR_HDL);
+            // An indirection of a string handle can't cause an exception so don't set GTF_EXCEPT
+            tree = gtNewOperNode(GT_IND, TYP_REF, tree);
+            tree->gtFlags |= GTF_GLOB_REF;
+            break;
+
+        case IAT_PPVALUE: // The value needs to be accessed via a double indirection
+            tree = gtNewIconHandleNode((size_t)pValue, GTF_ICON_PSTR_HDL);
+            tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
+            tree->gtFlags |= GTF_IND_INVARIANT;
+            // An indirection of a string handle can't cause an exception so don't set GTF_EXCEPT
+            tree = gtNewOperNode(GT_IND, TYP_REF, tree);
+            tree->gtFlags |= GTF_GLOB_REF;
+            break;
+
+        default:
+            assert(!"Unexpected InfoAccessType");
+    }
+
+    return tree;
+}
+
+/*****************************************************************************/
+
+GenTreePtr Compiler::gtNewLconNode(__int64 value)
+{
+#ifdef _TARGET_64BIT_
+    GenTreePtr node = new (this, GT_CNS_INT) GenTreeIntCon(TYP_LONG, value);
+#else
+    GenTreePtr node = new (this, GT_CNS_LNG) GenTreeLngCon(value);
+#endif
+
+    return node;
+}
+
+GenTreePtr Compiler::gtNewDconNode(double value)
+{
+    GenTreePtr node = new (this, GT_CNS_DBL) GenTreeDblCon(value);
+
+    return node;
+}
+
+GenTreePtr Compiler::gtNewSconNode(int CPX, CORINFO_MODULE_HANDLE scpHandle)
+{
+
+#if SMALL_TREE_NODES
+
+    /* 'GT_CNS_STR' nodes later get transformed into 'GT_CALL' */
+
+    assert(GenTree::s_gtNodeSizes[GT_CALL] > GenTree::s_gtNodeSizes[GT_CNS_STR]);
+
+    GenTreePtr node = new (this, GT_CALL) GenTreeStrCon(CPX, scpHandle DEBUGARG(/*largeNode*/ true));
+#else
+    GenTreePtr node = new (this, GT_CNS_STR) GenTreeStrCon(CPX, scpHandle DEBUGARG(/*largeNode*/ true));
+#endif
+
+    return node;
+}
+
+GenTreePtr Compiler::gtNewZeroConNode(var_types type)
+{
+    GenTreePtr zero;
+    switch (type)
+    {
+        case TYP_INT:
+            zero = gtNewIconNode(0);
+            break;
+
+        case TYP_BYREF:
+            __fallthrough;
+
+        case TYP_REF:
+            zero         = gtNewIconNode(0);
+            zero->gtType = type;
+            break;
+
+        case TYP_LONG:
+            zero = gtNewLconNode(0);
+            break;
+
+        case TYP_FLOAT:
+            zero         = gtNewDconNode(0.0);
+            zero->gtType = type;
+            break;
+
+        case TYP_DOUBLE:
+            zero = gtNewDconNode(0.0);
+            break;
+
+        default:
+            assert(!"Bad type");
+            zero = nullptr;
+            break;
+    }
+    return zero;
+}
+
+GenTreePtr Compiler::gtNewOneConNode(var_types type)
+{
+    switch (type)
+    {
+        case TYP_INT:
+        case TYP_UINT:
+            return gtNewIconNode(1);
+
+        case TYP_LONG:
+        case TYP_ULONG:
+            return gtNewLconNode(1);
+
+        case TYP_FLOAT:
+        {
+            GenTreePtr one = gtNewDconNode(1.0);
+            one->gtType    = type;
+            return one;
+        }
+
+        case TYP_DOUBLE:
+            return gtNewDconNode(1.0);
+
+        default:
+            assert(!"Bad type");
+            return nullptr;
+    }
+}
+
+GenTreeCall* Compiler::gtNewIndCallNode(GenTreePtr addr, var_types type, GenTreeArgList* args, IL_OFFSETX ilOffset)
+{
+    return gtNewCallNode(CT_INDIRECT, (CORINFO_METHOD_HANDLE)addr, type, args, ilOffset);
+}
+
+GenTreeCall* Compiler::gtNewCallNode(
+    gtCallTypes callType, CORINFO_METHOD_HANDLE callHnd, var_types type, GenTreeArgList* args, IL_OFFSETX ilOffset)
+{
+    GenTreeCall* node = new (this, GT_CALL) GenTreeCall(genActualType(type));
+
+    node->gtFlags |= (GTF_CALL | GTF_GLOB_REF);
+    if (args)
+    {
+        node->gtFlags |= (args->gtFlags & GTF_ALL_EFFECT);
+    }
+    node->gtCallType      = callType;
+    node->gtCallMethHnd   = callHnd;
+    node->gtCallArgs      = args;
+    node->gtCallObjp      = nullptr;
+    node->fgArgInfo       = nullptr;
+    node->callSig         = nullptr;
+    node->gtRetClsHnd     = nullptr;
+    node->gtControlExpr   = nullptr;
+    node->gtCallMoreFlags = 0;
+
+    if (callType == CT_INDIRECT)
+    {
+        node->gtCallCookie = nullptr;
+    }
+    else
+    {
+        node->gtInlineCandidateInfo = nullptr;
+    }
+    node->gtCallLateArgs = nullptr;
+    node->gtReturnType   = type;
+
+#ifdef LEGACY_BACKEND
+    node->gtCallRegUsedMask = RBM_NONE;
+#endif // LEGACY_BACKEND
+
+#ifdef FEATURE_READYTORUN_COMPILER
+    node->gtCall.gtEntryPoint.addr = nullptr;
+#endif
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+    // These get updated after call node is built.
+    node->gtCall.gtInlineObservation = InlineObservation::CALLEE_UNUSED_INITIAL;
+    node->gtCall.gtRawILOffset       = BAD_IL_OFFSET;
+#endif
+
+#ifdef DEBUGGING_SUPPORT
+    // Spec: Managed Retval sequence points needs to be generated while generating debug info for debuggable code.
+    //
+    // Implementation note: if not generating MRV info genCallSite2ILOffsetMap will be NULL and
+    // codegen will pass BAD_IL_OFFSET as IL offset of a call node to emitter, which will cause emitter
+    // not to emit IP mapping entry.
+    if (opts.compDbgCode && opts.compDbgInfo)
+    {
+        // Managed Retval - IL offset of the call.  This offset is used to emit a
+        // CALL_INSTRUCTION type sequence point while emitting corresponding native call.
+        //
+        // TODO-Cleanup:
+        // a) (Opt) We need not store this offset if the method doesn't return a
+        // value.  Rather it can be made BAD_IL_OFFSET to prevent a sequence
+        // point being emitted.
+        //
+        // b) (Opt) Add new sequence points only if requested by debugger through
+        // a new boundary type - ICorDebugInfo::BoundaryTypes
+        if (genCallSite2ILOffsetMap == nullptr)
+        {
+            genCallSite2ILOffsetMap = new (getAllocator()) CallSiteILOffsetTable(getAllocator());
+        }
+
+        // Make sure that there are no duplicate entries for a given call node
+        IL_OFFSETX value;
+        assert(!genCallSite2ILOffsetMap->Lookup(node, &value));
+        genCallSite2ILOffsetMap->Set(node, ilOffset);
+    }
+#endif
+
+    // Initialize gtOtherRegs
+    node->ClearOtherRegs();
+
+    // Initialize spill flags of gtOtherRegs
+    node->ClearOtherRegFlags();
+
+    return node;
+}
+
+GenTreePtr Compiler::gtNewLclvNode(unsigned lnum, var_types type, IL_OFFSETX ILoffs)
+{
+    // We need to ensure that all struct values are normalized.
+    // It might be nice to assert this in general, but we have assignments of int to long.
+    if (varTypeIsStruct(type))
+    {
+        assert(type == lvaTable[lnum].lvType);
+    }
+    GenTreePtr node = new (this, GT_LCL_VAR) GenTreeLclVar(type, lnum, ILoffs);
+
+    /* Cannot have this assert because the inliner uses this function
+     * to add temporaries */
+
+    // assert(lnum < lvaCount);
+
+    return node;
+}
+
+GenTreePtr Compiler::gtNewLclLNode(unsigned lnum, var_types type, IL_OFFSETX ILoffs)
+{
+    // We need to ensure that all struct values are normalized.
+    // It might be nice to assert this in general, but we have assignments of int to long.
+    if (varTypeIsStruct(type))
+    {
+        assert(type == lvaTable[lnum].lvType);
+    }
+#if SMALL_TREE_NODES
+    /* This local variable node may later get transformed into a large node */
+
+    // assert(GenTree::s_gtNodeSizes[GT_CALL] > GenTree::s_gtNodeSizes[GT_LCL_VAR]);
+
+    GenTreePtr node = new (this, GT_CALL) GenTreeLclVar(type, lnum, ILoffs DEBUGARG(/*largeNode*/ true));
+#else
+    GenTreePtr node = new (this, GT_LCL_VAR) GenTreeLclVar(type, lnum, ILoffs DEBUGARG(/*largeNode*/ true));
+#endif
+
+    return node;
+}
+
+GenTreeLclFld* Compiler::gtNewLclFldNode(unsigned lnum, var_types type, unsigned offset)
+{
+    GenTreeLclFld* node = new (this, GT_LCL_FLD) GenTreeLclFld(type, lnum, offset);
+
+    /* Cannot have this assert because the inliner uses this function
+     * to add temporaries */
+
+    // assert(lnum < lvaCount);
+
+    node->gtFieldSeq = FieldSeqStore::NotAField();
+    return node;
+}
+
+GenTreePtr Compiler::gtNewInlineCandidateReturnExpr(GenTreePtr inlineCandidate, var_types type)
+{
+    assert(GenTree::s_gtNodeSizes[GT_RET_EXPR] == TREE_NODE_SZ_LARGE);
+
+    GenTreePtr node = new (this, GT_RET_EXPR) GenTreeRetExpr(type);
+
+    node->gtRetExpr.gtInlineCandidate = inlineCandidate;
+
+    if (varTypeIsStruct(inlineCandidate) && !inlineCandidate->OperIsBlkOp())
+    {
+        node->gtRetExpr.gtRetClsHnd = gtGetStructHandle(inlineCandidate);
+    }
+
+    // GT_RET_EXPR node eventually might be bashed back to GT_CALL (when inlining is aborted for example).
+    // Therefore it should carry the GTF_CALL flag so that all the rules about spilling can apply to it as well.
+    // For example, impImportLeave or CEE_POP need to spill GT_RET_EXPR before empty the evaluation stack.
+    node->gtFlags |= GTF_CALL;
+
+    return node;
+}
+
+GenTreeArgList* Compiler::gtNewListNode(GenTreePtr op1, GenTreeArgList* op2)
+{
+    assert((op1 != nullptr) && (op1->OperGet() != GT_LIST));
+
+    return new (this, GT_LIST) GenTreeArgList(op1, op2);
+}
+
+/*****************************************************************************
+ *
+ *  Create a list out of one value.
+ */
+
+GenTreeArgList* Compiler::gtNewArgList(GenTreePtr arg)
+{
+    return new (this, GT_LIST) GenTreeArgList(arg);
+}
+
+/*****************************************************************************
+ *
+ *  Create a list out of the two values.
+ */
+
+GenTreeArgList* Compiler::gtNewArgList(GenTreePtr arg1, GenTreePtr arg2)
+{
+    return new (this, GT_LIST) GenTreeArgList(arg1, gtNewArgList(arg2));
+}
+
+//------------------------------------------------------------------------
+// Compiler::gtNewAggregate:
+//    Creates a new aggregate argument node. These nodes are used to
+//    represent arguments that are composed of multiple values (e.g.
+//    the lclVars that represent the fields of a promoted struct).
+//
+//    Note that aggregate arguments are currently represented by GT_LIST
+//    nodes that are marked with the GTF_LIST_AGGREGATE flag. This
+//    representation may be changed in the future to instead use its own
+//    node type (e.g. GT_AGGREGATE).
+//
+// Arguments:
+//    firstElement - The first element in the aggregate's list of values.
+//
+// Returns:
+//    The newly-created aggregate node.
+GenTreeArgList* Compiler::gtNewAggregate(GenTree* firstElement)
+{
+    GenTreeArgList* agg = gtNewArgList(firstElement);
+    agg->gtFlags |= GTF_LIST_AGGREGATE;
+    return agg;
+}
+
+/*****************************************************************************
+ *
+ *  Create a list out of the three values.
+ */
+
+GenTreeArgList* Compiler::gtNewArgList(GenTreePtr arg1, GenTreePtr arg2, GenTreePtr arg3)
+{
+    return new (this, GT_LIST) GenTreeArgList(arg1, gtNewArgList(arg2, arg3));
+}
+
+/*****************************************************************************
+ *
+ *  Given a GT_CALL node, access the fgArgInfo and find the entry
+ *  that has the matching argNum and return the fgArgTableEntryPtr
+ */
+
+fgArgTabEntryPtr Compiler::gtArgEntryByArgNum(GenTreePtr call, unsigned argNum)
+{
+    noway_assert(call->IsCall());
+    fgArgInfoPtr argInfo = call->gtCall.fgArgInfo;
+    noway_assert(argInfo != nullptr);
+
+    unsigned          argCount       = argInfo->ArgCount();
+    fgArgTabEntryPtr* argTable       = argInfo->ArgTable();
+    fgArgTabEntryPtr  curArgTabEntry = nullptr;
+
+    for (unsigned i = 0; i < argCount; i++)
+    {
+        curArgTabEntry = argTable[i];
+        if (curArgTabEntry->argNum == argNum)
+        {
+            return curArgTabEntry;
+        }
+    }
+    noway_assert(!"gtArgEntryByArgNum: argNum not found");
+    return nullptr;
+}
+
+/*****************************************************************************
+ *
+ *  Given a GT_CALL node, access the fgArgInfo and find the entry
+ *  that has the matching node and return the fgArgTableEntryPtr
+ */
+
+fgArgTabEntryPtr Compiler::gtArgEntryByNode(GenTreePtr call, GenTreePtr node)
+{
+    noway_assert(call->IsCall());
+    fgArgInfoPtr argInfo = call->gtCall.fgArgInfo;
+    noway_assert(argInfo != nullptr);
+
+    unsigned          argCount       = argInfo->ArgCount();
+    fgArgTabEntryPtr* argTable       = argInfo->ArgTable();
+    fgArgTabEntryPtr  curArgTabEntry = nullptr;
+
+    for (unsigned i = 0; i < argCount; i++)
+    {
+        curArgTabEntry = argTable[i];
+
+        if (curArgTabEntry->node == node)
+        {
+            return curArgTabEntry;
+        }
+#ifdef PROTO_JIT
+        else if (node->OperGet() == GT_RELOAD && node->gtOp.gtOp1 == curArgTabEntry->node)
+        {
+            return curArgTabEntry;
+        }
+#endif // PROTO_JIT
+        else if (curArgTabEntry->parent != nullptr)
+        {
+            assert(curArgTabEntry->parent->IsList());
+            if (curArgTabEntry->parent->Current() == node)
+            {
+                return curArgTabEntry;
+            }
+        }
+        else // (curArgTabEntry->parent == NULL)
+        {
+            if (call->gtCall.gtCallObjp == node)
+            {
+                return curArgTabEntry;
+            }
+        }
+    }
+    noway_assert(!"gtArgEntryByNode: node not found");
+    return nullptr;
+}
+
+/*****************************************************************************
+ *
+ *  Find and return the entry with the given "lateArgInx".  Requires that one is found
+ *  (asserts this).
+ */
+fgArgTabEntryPtr Compiler::gtArgEntryByLateArgIndex(GenTreePtr call, unsigned lateArgInx)
+{
+    noway_assert(call->IsCall());
+    fgArgInfoPtr argInfo = call->gtCall.fgArgInfo;
+    noway_assert(argInfo != nullptr);
+
+    unsigned          argCount       = argInfo->ArgCount();
+    fgArgTabEntryPtr* argTable       = argInfo->ArgTable();
+    fgArgTabEntryPtr  curArgTabEntry = nullptr;
+
+    for (unsigned i = 0; i < argCount; i++)
+    {
+        curArgTabEntry = argTable[i];
+        if (curArgTabEntry->lateArgInx == lateArgInx)
+        {
+            return curArgTabEntry;
+        }
+    }
+    noway_assert(!"gtArgEntryByNode: node not found");
+    return nullptr;
+}
+
+/*****************************************************************************
+ *
+ *  Given an fgArgTabEntryPtr, return true if it is the 'this' pointer argument.
+ */
+bool Compiler::gtArgIsThisPtr(fgArgTabEntryPtr argEntry)
+{
+    return (argEntry->parent == nullptr);
+}
+
+/*****************************************************************************
+ *
+ *  Create a node that will assign 'src' to 'dst'.
+ */
+
+GenTreePtr Compiler::gtNewAssignNode(GenTreePtr dst, GenTreePtr src)
+{
+    /* Mark the target as being assigned */
+
+    if ((dst->gtOper == GT_LCL_VAR) || (dst->OperGet() == GT_LCL_FLD))
+    {
+        dst->gtFlags |= GTF_VAR_DEF;
+        if (dst->IsPartialLclFld(this))
+        {
+            // We treat these partial writes as combined uses and defs.
+            dst->gtFlags |= GTF_VAR_USEASG;
+        }
+    }
+    dst->gtFlags |= GTF_DONT_CSE;
+
+    /* Create the assignment node */
+
+    GenTreePtr asg = gtNewOperNode(GT_ASG, dst->TypeGet(), dst, src);
+
+    /* Mark the expression as containing an assignment */
+
+    asg->gtFlags |= GTF_ASG;
+
+    return asg;
+}
+
+//------------------------------------------------------------------------
+// gtNewObjNode: Creates a new Obj node.
+//
+// Arguments:
+//    structHnd - The class handle of the struct type.
+//    addr      - The address of the struct.
+//
+// Return Value:
+//    Returns a node representing the struct value at the given address.
+//
+// Assumptions:
+//    Any entry and exit conditions, such as required preconditions of
+//    data structures, memory to be freed by caller, etc.
+//
+// Notes:
+//    It will currently return a GT_OBJ node for any struct type, but may
+//    return a GT_IND or a non-indirection for a scalar type.
+//    The node will not yet have its GC info initialized. This is because
+//    we may not need this info if this is an r-value.
+
+GenTree* Compiler::gtNewObjNode(CORINFO_CLASS_HANDLE structHnd, GenTree* addr)
+{
+    var_types nodeType = impNormStructType(structHnd);
+    assert(varTypeIsStruct(nodeType));
+    unsigned size = info.compCompHnd->getClassSize(structHnd);
+
+    // It would be convenient to set the GC info at this time, but we don't actually require
+    // it unless this is going to be a destination.
+    if (!varTypeIsStruct(nodeType))
+    {
+        if ((addr->gtOper == GT_ADDR) && (addr->gtGetOp1()->TypeGet() == nodeType))
+        {
+            return addr->gtGetOp1();
+        }
+        else
+        {
+            return gtNewOperNode(GT_IND, nodeType, addr);
+        }
+    }
+    GenTreeBlk* newBlkOrObjNode = new (this, GT_OBJ) GenTreeObj(nodeType, addr, structHnd, size);
+
+    // An Obj is not a global reference, if it is known to be a local struct.
+    if ((addr->gtFlags & GTF_GLOB_REF) == 0)
+    {
+        GenTreeLclVarCommon* lclNode = addr->IsLocalAddrExpr();
+        if ((lclNode != nullptr) && !lvaIsImplicitByRefLocal(lclNode->gtLclNum))
+        {
+            newBlkOrObjNode->gtFlags &= ~GTF_GLOB_REF;
+        }
+    }
+    return newBlkOrObjNode;
+}
+
+//------------------------------------------------------------------------
+// gtSetObjGcInfo: Set the GC info on an object node
+//
+// Arguments:
+//    objNode - The object node of interest
+
+void Compiler::gtSetObjGcInfo(GenTreeObj* objNode)
+{
+    CORINFO_CLASS_HANDLE structHnd  = objNode->gtClass;
+    var_types            nodeType   = objNode->TypeGet();
+    unsigned             size       = objNode->gtBlkSize;
+    unsigned             slots      = 0;
+    unsigned             gcPtrCount = 0;
+    BYTE*                gcPtrs     = nullptr;
+
+    assert(varTypeIsStruct(nodeType));
+    assert(size == info.compCompHnd->getClassSize(structHnd));
+    assert(nodeType == impNormStructType(structHnd));
+
+    if (nodeType == TYP_STRUCT)
+    {
+        if (size >= TARGET_POINTER_SIZE)
+        {
+            // Get the GC fields info
+            var_types simdBaseType; // Dummy argument
+            slots    = (unsigned)(roundUp(size, TARGET_POINTER_SIZE) / TARGET_POINTER_SIZE);
+            gcPtrs   = new (this, CMK_ASTNode) BYTE[slots];
+            nodeType = impNormStructType(structHnd, gcPtrs, &gcPtrCount, &simdBaseType);
+        }
+    }
+    objNode->SetGCInfo(gcPtrs, gcPtrCount, slots);
+    assert(objNode->gtType == nodeType);
+}
+
+//------------------------------------------------------------------------
+// gtNewStructVal: Return a node that represents a struct value
+//
+// Arguments:
+//    structHnd - The class for the struct
+//    addr      - The address of the struct
+//
+// Return Value:
+//    A block, object or local node that represents the struct value pointed to by 'addr'.
+
+GenTree* Compiler::gtNewStructVal(CORINFO_CLASS_HANDLE structHnd, GenTreePtr addr)
+{
+    if (addr->gtOper == GT_ADDR)
+    {
+        GenTree* val = addr->gtGetOp1();
+        if (val->OperGet() == GT_LCL_VAR)
+        {
+            unsigned   lclNum = addr->gtGetOp1()->AsLclVarCommon()->gtLclNum;
+            LclVarDsc* varDsc = &(lvaTable[lclNum]);
+            if (varTypeIsStruct(varDsc) && (varDsc->lvVerTypeInfo.GetClassHandle() == structHnd) &&
+                !lvaIsImplicitByRefLocal(lclNum))
+            {
+                return addr->gtGetOp1();
+            }
+        }
+    }
+    return gtNewObjNode(structHnd, addr);
+}
+
+//------------------------------------------------------------------------
+// gtNewBlockVal: Return a node that represents a possibly untyped block value
+//
+// Arguments:
+//    addr      - The address of the block
+//    size      - The size of the block
+//
+// Return Value:
+//    A block, object or local node that represents the block value pointed to by 'addr'.
+
+GenTree* Compiler::gtNewBlockVal(GenTreePtr addr, unsigned size)
+{
+    // By default we treat this as an opaque struct type with known size.
+    var_types blkType = TYP_STRUCT;
+#if FEATURE_SIMD
+    if ((addr->gtOper == GT_ADDR) && (addr->gtGetOp1()->OperGet() == GT_LCL_VAR))
+    {
+        GenTree* val = addr->gtGetOp1();
+        if (varTypeIsSIMD(val) && (genTypeSize(val->TypeGet()) == size))
+        {
+            blkType = val->TypeGet();
+            return addr->gtGetOp1();
+        }
+    }
+#endif // FEATURE_SIMD
+    return new (this, GT_BLK) GenTreeBlk(GT_BLK, blkType, addr, size);
+}
+
+// Creates a new assignment node for a CpObj.
+// Parameters (exactly the same as MSIL CpObj):
+//
+//  dstAddr    - The target to copy the struct to
+//  srcAddr    - The source to copy the struct from
+//  structHnd  - A class token that represents the type of object being copied. May be null
+//               if FEATURE_SIMD is enabled and the source has a SIMD type.
+//  isVolatile - Is this marked as volatile memory?
+
+GenTree* Compiler::gtNewCpObjNode(GenTreePtr dstAddr, GenTreePtr srcAddr, CORINFO_CLASS_HANDLE structHnd, bool isVolatile)
+{
+    GenTreePtr lhs = gtNewStructVal(structHnd, dstAddr);
+    GenTree*   src = nullptr;
+    unsigned   size;
+
+    if (lhs->OperIsBlk())
+    {
+        size = lhs->AsBlk()->gtBlkSize;
+        if (lhs->OperGet() == GT_OBJ)
+        {
+            gtSetObjGcInfo(lhs->AsObj());
+        }
+    }
+    else
+    {
+        size = genTypeSize(lhs->gtType);
+    }
+
+    if (srcAddr->OperGet() == GT_ADDR)
+    {
+        src = srcAddr->gtOp.gtOp1;
+    }
+    else
+    {
+        src = gtNewOperNode(GT_IND, lhs->TypeGet(), srcAddr);
+    }
+
+    GenTree* result = gtNewBlkOpNode(lhs, src, size, isVolatile, true);
+    return result;
+}
+
+//------------------------------------------------------------------------
+// FixupInitBlkValue: Fixup the init value for an initBlk operation
+//
+// Arguments:
+//    asgType - The type of assignment that the initBlk is being transformed into
+//
+// Return Value:
+//    Modifies the constant value on this node to be the appropriate "fill"
+//    value for the initblk.
+//
+// Notes:
+//    The initBlk MSIL instruction takes a byte value, which must be
+//    extended to the size of the assignment when an initBlk is transformed
+//    to an assignment of a primitive type.
+//    This performs the appropriate extension.
+
+void GenTreeIntCon::FixupInitBlkValue(var_types asgType)
+{
+    assert(varTypeIsIntegralOrI(asgType));
+    unsigned size = genTypeSize(asgType);
+    if (size > 1)
+    {
+        size_t cns = gtIconVal;
+        cns        = cns & 0xFF;
+        cns |= cns << 8;
+        if (size >= 4)
+        {
+            cns |= cns << 16;
+#ifdef _TARGET_64BIT_
+            if (size == 8)
+            {
+                cns |= cns << 32;
+            }
+#endif // _TARGET_64BIT_
+
+            // Make the type used in the GT_IND node match for evaluation types.
+            gtType = asgType;
+
+            // if we are using an GT_INITBLK on a GC type the value being assigned has to be zero (null).
+            assert(!varTypeIsGC(asgType) || (cns == 0));
+        }
+
+        gtIconVal = cns;
+    }
+}
+
+// 
+//------------------------------------------------------------------------
+// gtBlockOpInit: Initializes a BlkOp GenTree
+//
+// Arguments:
+//    result     - an assignment node that is to be initialized.
+//    dst        - the target (destination) we want to either initialize or copy to.
+//    src        - the init value for InitBlk or the source struct for CpBlk/CpObj.
+//    isVolatile - specifies whether this node is a volatile memory operation.
+// 
+// Assumptions:
+//    'result' is an assignment that is newly constructed.
+//    If 'dst' is TYP_STRUCT, then it must be a block node or lclVar.
+//
+// Notes:
+//    This procedure centralizes all the logic to both enforce proper structure and
+//    to properly construct any InitBlk/CpBlk node.
+
+void Compiler::gtBlockOpInit(GenTreePtr result, GenTreePtr dst, GenTreePtr srcOrFillVal, bool isVolatile)
+{
+    if (!result->OperIsBlkOp())
+    {
+        assert(dst->TypeGet() != TYP_STRUCT);
+        return;
+    }
+#ifdef DEBUG
+    // If the copy involves GC pointers, the caller must have already set
+    // the node additional members (gtGcPtrs, gtGcPtrCount, gtSlots) on the dst.
+    if ((dst->gtOper == GT_OBJ) && dst->AsBlk()->HasGCPtr())
+    {
+        GenTreeObj* objNode = dst->AsObj();
+        assert(objNode->gtGcPtrs != nullptr);
+        assert(!IsUninitialized(objNode->gtGcPtrs));
+        assert(!IsUninitialized(objNode->gtGcPtrCount));
+        assert(!IsUninitialized(objNode->gtSlots) && objNode->gtSlots > 0);
+
+        for (unsigned i = 0; i < objNode->gtGcPtrCount; ++i)
+        {
+            CorInfoGCType t = (CorInfoGCType)objNode->gtGcPtrs[i];
+            switch (t)
+            {
+                case TYPE_GC_NONE:
+                case TYPE_GC_REF:
+                case TYPE_GC_BYREF:
+                case TYPE_GC_OTHER:
+                    break;
+                default:
+                    unreached();
+            }
+        }
+    }
+#endif // DEBUG
+
+    /* In the case of CpBlk, we want to avoid generating
+    * nodes where the source and destination are the same
+    * because of two reasons, first, is useless, second
+    * it introduces issues in liveness and also copying
+    * memory from an overlapping memory location is
+    * undefined both as per the ECMA standard and also
+    * the memcpy semantics specify that.
+    *
+    * NOTE: In this case we'll only detect the case for addr of a local
+    * and a local itself, any other complex expressions won't be
+    * caught.
+    *
+    * TODO-Cleanup: though having this logic is goodness (i.e. avoids self-assignment
+    * of struct vars very early), it was added because fgInterBlockLocalVarLiveness()
+    * isn't handling self-assignment of struct variables correctly.  This issue may not
+    * surface if struct promotion is ON (which is the case on x86/arm).  But still the
+    * fundamental issue exists that needs to be addressed.
+    */
+    if (result->OperIsCopyBlkOp())
+    {
+        GenTreePtr currSrc = srcOrFillVal;
+        GenTreePtr currDst = dst;
+
+        if (currSrc->OperIsBlk() && (currSrc->AsBlk()->Addr()->OperGet() == GT_ADDR))
+        {
+            currSrc = currSrc->AsBlk()->Addr()->gtGetOp1();
+        }
+        if (currDst->OperIsBlk() && (currDst->AsBlk()->Addr()->OperGet() == GT_ADDR))
+        {
+            currDst = currDst->AsBlk()->Addr()->gtGetOp1();
+        }
+
+        if (currSrc->OperGet() == GT_LCL_VAR && currDst->OperGet() == GT_LCL_VAR &&
+            currSrc->gtLclVarCommon.gtLclNum == currDst->gtLclVarCommon.gtLclNum)
+        {
+            // Make this a NOP
+            // TODO-Cleanup: probably doesn't matter, but could do this earlier and avoid creating a GT_ASG
+            result->gtBashToNOP();
+            return;
+        }
+    }
+
+    // Propagate all effect flags from children
+    result->gtFlags |= dst->gtFlags & GTF_ALL_EFFECT;
+    result->gtFlags |= result->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT;
+
+    // TODO-1stClassStructs: This should be done only if the destination is non-local.
+    result->gtFlags |= (GTF_GLOB_REF | GTF_ASG);
+
+    // REVERSE_OPS is necessary because the use must occur before the def
+    result->gtFlags |= GTF_REVERSE_OPS;
+
+    result->gtFlags |= (dst->gtFlags & GTF_EXCEPT) | (srcOrFillVal->gtFlags & GTF_EXCEPT);
+
+    if (isVolatile)
+    {
+        result->gtFlags |= GTF_BLK_VOLATILE;
+    }
+
+#ifdef FEATURE_SIMD
+    if (result->OperIsCopyBlkOp() && varTypeIsSIMD(srcOrFillVal))
+    {
+        // If the source is a GT_SIMD node of SIMD type, then the dst lclvar struct
+        // should be labeled as simd intrinsic related struct.
+        // This is done so that the morpher can transform any field accesses into
+        // intrinsics, thus avoiding conflicting access methods (fields vs. whole-register).
+
+        GenTree* src = srcOrFillVal;
+        if (src->OperIsIndir() && (src->AsIndir()->Addr()->OperGet() == GT_ADDR))
+        {
+            src = src->AsIndir()->Addr()->gtGetOp1();
+        }
+        if (src->OperGet() == GT_SIMD)
+        {
+            if (dst->OperIsBlk() && (dst->AsIndir()->Addr()->OperGet() == GT_ADDR))
+            {
+                dst = dst->AsIndir()->Addr()->gtGetOp1();
+            }
+
+            if (dst->OperIsLocal() && varTypeIsStruct(dst))
+            {
+                unsigned   lclNum                = dst->AsLclVarCommon()->GetLclNum();
+                LclVarDsc* lclVarDsc             = &lvaTable[lclNum];
+                lclVarDsc->lvUsedInSIMDIntrinsic = true;
+            }
+        }
+    }
+#endif // FEATURE_SIMD
+}
+
+//------------------------------------------------------------------------
+// gtNewBlkOpNode: Creates a GenTree for a block (struct) assignment.
+//
+// Arguments:
+//    dst           - Destination or target to copy to / initialize the buffer.
+//    srcOrFillVall - the size of the buffer to copy/initialize or zero, in the case of CpObj.
+//    size          - The size of the buffer or a class token (in the case of CpObj).
+//    isVolatile    - Whether this is a volatile memory operation or not.
+//    isCopyBlock   - True if this is a block copy (rather than a block init).
+//
+// Return Value:
+//    Returns the newly constructed and initialized block operation.
+//
+// Notes:
+//    If size is zero, the dst must be a GT_OBJ with the class handle.
+//    'dst' must be a block node or lclVar.
+//
+GenTree* Compiler::gtNewBlkOpNode(
+    GenTreePtr dst, GenTreePtr srcOrFillVal, unsigned size, bool isVolatile, bool isCopyBlock)
+{
+    assert(dst->OperIsBlk() || dst->OperIsLocal());
+    if (isCopyBlock)
+    {
+        srcOrFillVal->gtFlags |= GTF_DONT_CSE;
+        if (srcOrFillVal->OperIsIndir() && (srcOrFillVal->gtGetOp1()->gtOper == GT_ADDR))
+        {
+            srcOrFillVal = srcOrFillVal->gtGetOp1()->gtGetOp1();
+        }
+    }
+
+    GenTree* result = gtNewAssignNode(dst, srcOrFillVal);
+    if (!isCopyBlock)
+    {
+        result->gtFlags |= GTF_BLK_INIT;
+    }
+    gtBlockOpInit(result, dst, srcOrFillVal, isVolatile);
+    return result;
+}
+
+/*****************************************************************************
+ *
+ *  Clones the given tree value and returns a copy of the given tree.
+ *  If 'complexOK' is false, the cloning is only done provided the tree
+ *     is not too complex (whatever that may mean);
+ *  If 'complexOK' is true, we try slightly harder to clone the tree.
+ *  In either case, NULL is returned if the tree cannot be cloned
+ *
+ *  Note that there is the function gtCloneExpr() which does a more
+ *  complete job if you can't handle this function failing.
+ */
+
+GenTreePtr Compiler::gtClone(GenTree* tree, bool complexOK)
+{
+    GenTreePtr copy;
+
+    switch (tree->gtOper)
+    {
+        case GT_CNS_INT:
+
+#if defined(LATE_DISASM)
+            if (tree->IsIconHandle())
+            {
+                copy = gtNewIconHandleNode(tree->gtIntCon.gtIconVal, tree->gtFlags, tree->gtIntCon.gtFieldSeq,
+                                           tree->gtIntCon.gtIconHdl.gtIconHdl1, tree->gtIntCon.gtIconHdl.gtIconHdl2);
+                copy->gtIntCon.gtCompileTimeHandle = tree->gtIntCon.gtCompileTimeHandle;
+                copy->gtType                       = tree->gtType;
+            }
+            else
+#endif
+            {
+                copy = new (this, GT_CNS_INT)
+                    GenTreeIntCon(tree->gtType, tree->gtIntCon.gtIconVal, tree->gtIntCon.gtFieldSeq);
+                copy->gtIntCon.gtCompileTimeHandle = tree->gtIntCon.gtCompileTimeHandle;
+            }
+            break;
+
+        case GT_LCL_VAR:
+            // Remember that the LclVar node has been cloned. The flag will be set
+            // on 'copy' as well.
+            tree->gtFlags |= GTF_VAR_CLONED;
+            copy = gtNewLclvNode(tree->gtLclVarCommon.gtLclNum, tree->gtType, tree->gtLclVar.gtLclILoffs);
+            break;
+
+        case GT_LCL_FLD:
+        case GT_LCL_FLD_ADDR:
+            // Remember that the LclVar node has been cloned. The flag will be set
+            // on 'copy' as well.
+            tree->gtFlags |= GTF_VAR_CLONED;
+            copy = new (this, tree->gtOper)
+                GenTreeLclFld(tree->gtOper, tree->TypeGet(), tree->gtLclFld.gtLclNum, tree->gtLclFld.gtLclOffs);
+            copy->gtLclFld.gtFieldSeq = tree->gtLclFld.gtFieldSeq;
+            break;
+
+        case GT_CLS_VAR:
+            copy = new (this, GT_CLS_VAR)
+                GenTreeClsVar(tree->gtType, tree->gtClsVar.gtClsVarHnd, tree->gtClsVar.gtFieldSeq);
+            break;
+
+        case GT_REG_VAR:
+            assert(!"clone regvar");
+
+        default:
+            if (!complexOK)
+            {
+                return nullptr;
+            }
+
+            if (tree->gtOper == GT_FIELD)
+            {
+                GenTreePtr objp;
+
+                // copied from line 9850
+
+                objp = nullptr;
+                if (tree->gtField.gtFldObj)
+                {
+                    objp = gtClone(tree->gtField.gtFldObj, false);
+                    if (!objp)
+                    {
+                        return objp;
+                    }
+                }
+
+                copy = gtNewFieldRef(tree->TypeGet(), tree->gtField.gtFldHnd, objp, tree->gtField.gtFldOffset);
+                copy->gtField.gtFldMayOverlap = tree->gtField.gtFldMayOverlap;
+            }
+            else if (tree->gtOper == GT_ADD)
+            {
+                GenTreePtr op1 = tree->gtOp.gtOp1;
+                GenTreePtr op2 = tree->gtOp.gtOp2;
+
+                if (op1->OperIsLeaf() && op2->OperIsLeaf())
+                {
+                    op1 = gtClone(op1);
+                    if (op1 == nullptr)
+                    {
+                        return nullptr;
+                    }
+                    op2 = gtClone(op2);
+                    if (op2 == nullptr)
+                    {
+                        return nullptr;
+                    }
+
+                    copy = gtNewOperNode(GT_ADD, tree->TypeGet(), op1, op2);
+                }
+                else
+                {
+                    return nullptr;
+                }
+            }
+            else if (tree->gtOper == GT_ADDR)
+            {
+                GenTreePtr op1 = gtClone(tree->gtOp.gtOp1);
+                if (op1 == nullptr)
+                {
+                    return nullptr;
+                }
+                copy = gtNewOperNode(GT_ADDR, tree->TypeGet(), op1);
+            }
+            else
+            {
+                return nullptr;
+            }
+
+            break;
+    }
+
+    copy->gtFlags |= tree->gtFlags & ~GTF_NODE_MASK;
+#if defined(DEBUG)
+    copy->gtDebugFlags |= tree->gtDebugFlags & ~GTF_DEBUG_NODE_MASK;
+#endif // defined(DEBUG)
+
+    return copy;
+}
+
+/*****************************************************************************
+ *
+ *  Clones the given tree value and returns a copy of the given tree. Any
+ *  references to local variable varNum will be replaced with the integer
+ *  constant varVal.
+ */
+
+GenTreePtr Compiler::gtCloneExpr(GenTree* tree,
+                                 unsigned addFlags,
+                                 unsigned varNum, // = (unsigned)-1
+                                 int      varVal)
+{
+    if (tree == nullptr)
+    {
+        return nullptr;
+    }
+
+    /* Figure out what kind of a node we have */
+
+    genTreeOps oper = tree->OperGet();
+    unsigned   kind = tree->OperKind();
+    GenTree*   copy;
+
+    /* Is this a constant or leaf node? */
+
+    if (kind & (GTK_CONST | GTK_LEAF))
+    {
+        switch (oper)
+        {
+            case GT_CNS_INT:
+
+#if defined(LATE_DISASM)
+                if (tree->IsIconHandle())
+                {
+                    copy = gtNewIconHandleNode(tree->gtIntCon.gtIconVal, tree->gtFlags, tree->gtIntCon.gtFieldSeq,
+                                               tree->gtIntCon.gtIconFld.gtIconCPX, tree->gtIntCon.gtIconFld.gtIconCls);
+                    copy->gtIntCon.gtCompileTimeHandle = tree->gtIntCon.gtCompileTimeHandle;
+                    copy->gtType                       = tree->gtType;
+                }
+                else
+#endif
+                {
+                    copy                               = gtNewIconNode(tree->gtIntCon.gtIconVal, tree->gtType);
+                    copy->gtIntCon.gtCompileTimeHandle = tree->gtIntCon.gtCompileTimeHandle;
+                    copy->gtIntCon.gtFieldSeq          = tree->gtIntCon.gtFieldSeq;
+                }
+                goto DONE;
+
+            case GT_CNS_LNG:
+                copy = gtNewLconNode(tree->gtLngCon.gtLconVal);
+                goto DONE;
+
+            case GT_CNS_DBL:
+                copy         = gtNewDconNode(tree->gtDblCon.gtDconVal);
+                copy->gtType = tree->gtType; // keep the same type
+                goto DONE;
+
+            case GT_CNS_STR:
+                copy = gtNewSconNode(tree->gtStrCon.gtSconCPX, tree->gtStrCon.gtScpHnd);
+                goto DONE;
+
+            case GT_LCL_VAR:
+
+                if (tree->gtLclVarCommon.gtLclNum == varNum)
+                {
+                    copy = gtNewIconNode(varVal, tree->gtType);
+                }
+                else
+                {
+                    // Remember that the LclVar node has been cloned. The flag will
+                    // be set on 'copy' as well.
+                    tree->gtFlags |= GTF_VAR_CLONED;
+                    copy = gtNewLclvNode(tree->gtLclVar.gtLclNum, tree->gtType, tree->gtLclVar.gtLclILoffs);
+                    copy->AsLclVarCommon()->SetSsaNum(tree->AsLclVarCommon()->GetSsaNum());
+                }
+                copy->gtFlags = tree->gtFlags;
+                goto DONE;
+
+            case GT_LCL_FLD:
+                if (tree->gtLclFld.gtLclNum == varNum)
+                {
+                    IMPL_LIMITATION("replacing GT_LCL_FLD with a constant");
+                }
+                else
+                {
+                    // Remember that the LclVar node has been cloned. The flag will
+                    // be set on 'copy' as well.
+                    tree->gtFlags |= GTF_VAR_CLONED;
+                    copy = new (this, GT_LCL_FLD)
+                        GenTreeLclFld(tree->TypeGet(), tree->gtLclFld.gtLclNum, tree->gtLclFld.gtLclOffs);
+                    copy->gtLclFld.gtFieldSeq = tree->gtLclFld.gtFieldSeq;
+                    copy->gtFlags             = tree->gtFlags;
+                }
+                goto DONE;
+
+            case GT_CLS_VAR:
+                copy = new (this, GT_CLS_VAR)
+                    GenTreeClsVar(tree->TypeGet(), tree->gtClsVar.gtClsVarHnd, tree->gtClsVar.gtFieldSeq);
+                goto DONE;
+
+            case GT_RET_EXPR:
+                copy = gtNewInlineCandidateReturnExpr(tree->gtRetExpr.gtInlineCandidate, tree->gtType);
+                goto DONE;
+
+            case GT_MEMORYBARRIER:
+                copy = new (this, GT_MEMORYBARRIER) GenTree(GT_MEMORYBARRIER, TYP_VOID);
+                goto DONE;
+
+            case GT_ARGPLACE:
+                copy = gtNewArgPlaceHolderNode(tree->gtType, tree->gtArgPlace.gtArgPlaceClsHnd);
+                goto DONE;
+
+            case GT_REG_VAR:
+                NO_WAY("Cloning of GT_REG_VAR node not supported");
+                goto DONE;
+
+            case GT_FTN_ADDR:
+                copy = new (this, oper) GenTreeFptrVal(tree->gtType, tree->gtFptrVal.gtFptrMethod);
+
+#ifdef FEATURE_READYTORUN_COMPILER
+                copy->gtFptrVal.gtEntryPoint         = tree->gtFptrVal.gtEntryPoint;
+                copy->gtFptrVal.gtLdftnResolvedToken = tree->gtFptrVal.gtLdftnResolvedToken;
+#endif
+                goto DONE;
+
+            case GT_CATCH_ARG:
+            case GT_NO_OP:
+                copy = new (this, oper) GenTree(oper, tree->gtType);
+                goto DONE;
+
+#if !FEATURE_EH_FUNCLETS
+            case GT_END_LFIN:
+#endif // !FEATURE_EH_FUNCLETS
+            case GT_JMP:
+                copy = new (this, oper) GenTreeVal(oper, tree->gtType, tree->gtVal.gtVal1);
+                goto DONE;
+
+            case GT_LABEL:
+                copy = new (this, oper) GenTreeLabel(tree->gtLabel.gtLabBB);
+                goto DONE;
+
+            default:
+                NO_WAY("Cloning of node not supported");
+                goto DONE;
+        }
+    }
+
+    /* Is it a 'simple' unary/binary operator? */
+
+    if (kind & GTK_SMPOP)
+    {
+        /* If necessary, make sure we allocate a "fat" tree node */
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if SMALL_TREE_NODES
+        switch (oper)
+        {
+            /* These nodes sometimes get bashed to "fat" ones */
+
+            case GT_MUL:
+            case GT_DIV:
+            case GT_MOD:
+
+            case GT_UDIV:
+            case GT_UMOD:
+
+                //  In the implementation of gtNewLargeOperNode you have
+                //  to give an oper that will create a small node,
+                //  otherwise it asserts.
+                //
+                if (GenTree::s_gtNodeSizes[oper] == TREE_NODE_SZ_SMALL)
+                {
+                    copy = gtNewLargeOperNode(oper, tree->TypeGet(), tree->gtOp.gtOp1,
+                                              tree->OperIsBinary() ? tree->gtOp.gtOp2 : nullptr);
+                }
+                else // Always a large tree
+                {
+                    if (tree->OperIsBinary())
+                    {
+                        copy = gtNewOperNode(oper, tree->TypeGet(), tree->gtOp.gtOp1, tree->gtOp.gtOp2);
+                    }
+                    else
+                    {
+                        copy = gtNewOperNode(oper, tree->TypeGet(), tree->gtOp.gtOp1);
+                    }
+                }
+                break;
+
+            case GT_CAST:
+                copy = new (this, LargeOpOpcode()) GenTreeCast(tree->TypeGet(), tree->gtCast.CastOp(),
+                                                               tree->gtCast.gtCastType DEBUGARG(/*largeNode*/ TRUE));
+                break;
+
+            // The nodes below this are not bashed, so they can be allocated at their individual sizes.
+
+            case GT_LIST:
+                // This is ridiculous, but would go away if we made a stronger distinction between argument lists, whose
+                // second argument *must* be an arglist*, and the uses of LIST in copyblk and initblk.
+                if (tree->gtOp.gtOp2 != nullptr && tree->gtOp.gtOp2->OperGet() == GT_LIST)
+                {
+                    copy = new (this, GT_LIST) GenTreeArgList(tree->gtOp.gtOp1, tree->gtOp.gtOp2->AsArgList());
+                }
+                else
+                {
+                    copy = new (this, GT_LIST) GenTreeOp(GT_LIST, TYP_VOID, tree->gtOp.gtOp1, tree->gtOp.gtOp2);
+                }
+                break;
+
+            case GT_INDEX:
+            {
+                GenTreeIndex* asInd = tree->AsIndex();
+                copy                = new (this, GT_INDEX)
+                    GenTreeIndex(asInd->TypeGet(), asInd->Arr(), asInd->Index(), asInd->gtIndElemSize);
+                copy->AsIndex()->gtStructElemClass = asInd->gtStructElemClass;
+            }
+            break;
+
+            case GT_ALLOCOBJ:
+            {
+                GenTreeAllocObj* asAllocObj = tree->AsAllocObj();
+                copy = new (this, GT_ALLOCOBJ) GenTreeAllocObj(tree->TypeGet(), asAllocObj->gtNewHelper,
+                                                               asAllocObj->gtAllocObjClsHnd, asAllocObj->gtOp1);
+            }
+            break;
+
+            case GT_ARR_LENGTH:
+                copy = new (this, GT_ARR_LENGTH)
+                    GenTreeArrLen(tree->TypeGet(), tree->gtOp.gtOp1, tree->gtArrLen.ArrLenOffset());
+                break;
+
+            case GT_ARR_INDEX:
+                copy = new (this, GT_ARR_INDEX)
+                    GenTreeArrIndex(tree->TypeGet(), gtCloneExpr(tree->gtArrIndex.ArrObj(), addFlags, varNum, varVal),
+                                    gtCloneExpr(tree->gtArrIndex.IndexExpr(), addFlags, varNum, varVal),
+                                    tree->gtArrIndex.gtCurrDim, tree->gtArrIndex.gtArrRank,
+                                    tree->gtArrIndex.gtArrElemType);
+                break;
+
+            case GT_QMARK:
+                copy = new (this, GT_QMARK) GenTreeQmark(tree->TypeGet(), tree->gtOp.gtOp1, tree->gtOp.gtOp2, this);
+                VarSetOps::AssignAllowUninitRhs(this, copy->gtQmark.gtThenLiveSet, tree->gtQmark.gtThenLiveSet);
+                VarSetOps::AssignAllowUninitRhs(this, copy->gtQmark.gtElseLiveSet, tree->gtQmark.gtElseLiveSet);
+                break;
+
+            case GT_OBJ:
+                copy = new (this, GT_OBJ)
+                    GenTreeObj(tree->TypeGet(), tree->gtOp.gtOp1, tree->AsObj()->gtClass, tree->gtBlk.gtBlkSize);
+                copy->AsObj()->CopyGCInfo(tree->AsObj());
+                copy->gtBlk.gtBlkOpGcUnsafe = tree->gtBlk.gtBlkOpGcUnsafe;
+                break;
+
+            case GT_BLK:
+                copy = new (this, GT_BLK) GenTreeBlk(GT_BLK, tree->TypeGet(), tree->gtOp.gtOp1, tree->gtBlk.gtBlkSize);
+                copy->gtBlk.gtBlkOpGcUnsafe = tree->gtBlk.gtBlkOpGcUnsafe;
+                break;
+
+            case GT_DYN_BLK:
+                copy = new (this, GT_DYN_BLK) GenTreeDynBlk(tree->gtOp.gtOp1, tree->gtDynBlk.gtDynamicSize);
+                copy->gtBlk.gtBlkOpGcUnsafe = tree->gtBlk.gtBlkOpGcUnsafe;
+                break;
+
+            case GT_BOX:
+                copy = new (this, GT_BOX)
+                    GenTreeBox(tree->TypeGet(), tree->gtOp.gtOp1, tree->gtBox.gtAsgStmtWhenInlinedBoxValue);
+                break;
+
+            case GT_INTRINSIC:
+                copy = new (this, GT_INTRINSIC)
+                    GenTreeIntrinsic(tree->TypeGet(), tree->gtOp.gtOp1, tree->gtOp.gtOp2,
+                                     tree->gtIntrinsic.gtIntrinsicId, tree->gtIntrinsic.gtMethodHandle);
+#ifdef FEATURE_READYTORUN_COMPILER
+                copy->gtIntrinsic.gtEntryPoint = tree->gtIntrinsic.gtEntryPoint;
+#endif
+                break;
+
+            case GT_LEA:
+            {
+                GenTreeAddrMode* addrModeOp = tree->AsAddrMode();
+                copy =
+                    new (this, GT_LEA) GenTreeAddrMode(addrModeOp->TypeGet(), addrModeOp->Base(), addrModeOp->Index(),
+                                                       addrModeOp->gtScale, addrModeOp->gtOffset);
+            }
+            break;
+
+            case GT_COPY:
+            case GT_RELOAD:
+            {
+                copy = new (this, oper) GenTreeCopyOrReload(oper, tree->TypeGet(), tree->gtGetOp1());
+            }
+            break;
+
+#ifdef FEATURE_SIMD
+            case GT_SIMD:
+            {
+                GenTreeSIMD* simdOp = tree->AsSIMD();
+                copy                = gtNewSIMDNode(simdOp->TypeGet(), simdOp->gtGetOp1(), simdOp->gtGetOp2(),
+                                     simdOp->gtSIMDIntrinsicID, simdOp->gtSIMDBaseType, simdOp->gtSIMDSize);
+            }
+            break;
+#endif
+
+            default:
+                assert(!GenTree::IsExOp(tree->OperKind()) && tree->OperIsSimple());
+                // We're in the SimpleOp case, so it's always unary or binary.
+                if (GenTree::OperIsUnary(tree->OperGet()))
+                {
+                    copy = gtNewOperNode(oper, tree->TypeGet(), tree->gtOp.gtOp1, /*doSimplifications*/ false);
+                }
+                else
+                {
+                    assert(GenTree::OperIsBinary(tree->OperGet()));
+                    copy = gtNewOperNode(oper, tree->TypeGet(), tree->gtOp.gtOp1, tree->gtOp.gtOp2);
+                }
+                break;
+        }
+#else
+        // We're in the SimpleOp case, so it's always unary or binary.
+        copy = gtNewOperNode(oper, tree->TypeGet(), tree->gtOp.gtOp1, tree->gtOp.gtOp2);
+#endif
+
+        // Some flags are conceptually part of the gtOper, and should be copied immediately.
+        if (tree->gtOverflowEx())
+        {
+            copy->gtFlags |= GTF_OVERFLOW;
+        }
+        if (copy->OperGet() == GT_CAST)
+        {
+            copy->gtFlags |= (tree->gtFlags & GTF_UNSIGNED);
+        }
+
+        if (tree->gtOp.gtOp1)
+        {
+            copy->gtOp.gtOp1 = gtCloneExpr(tree->gtOp.gtOp1, addFlags, varNum, varVal);
+        }
+
+        if (tree->gtGetOp2())
+        {
+            copy->gtOp.gtOp2 = gtCloneExpr(tree->gtOp.gtOp2, addFlags, varNum, varVal);
+        }
+
+        /* Flags */
+        addFlags |= tree->gtFlags;
+
+        // Copy any node annotations, if necessary.
+        switch (tree->gtOper)
+        {
+            case GT_ASG:
+            {
+                IndirectAssignmentAnnotation* pIndirAnnot = nullptr;
+                if (m_indirAssignMap != nullptr && GetIndirAssignMap()->Lookup(tree, &pIndirAnnot))
+                {
+                    IndirectAssignmentAnnotation* pNewIndirAnnot = new (this, CMK_Unknown)
+                        IndirectAssignmentAnnotation(pIndirAnnot->m_lclNum, pIndirAnnot->m_fieldSeq,
+                                                     pIndirAnnot->m_isEntire);
+                    GetIndirAssignMap()->Set(copy, pNewIndirAnnot);
+                }
+            }
+            break;
+
+            case GT_STOREIND:
+            case GT_IND:
+            case GT_OBJ:
+            case GT_STORE_OBJ:
+                if (tree->gtFlags & GTF_IND_ARR_INDEX)
+                {
+                    ArrayInfo arrInfo;
+                    bool      b = GetArrayInfoMap()->Lookup(tree, &arrInfo);
+                    assert(b);
+                    GetArrayInfoMap()->Set(copy, arrInfo);
+                }
+                break;
+
+            default:
+                break;
+        }
+
+#ifdef DEBUG
+        /* GTF_NODE_MASK should not be propagated from 'tree' to 'copy' */
+        addFlags &= ~GTF_NODE_MASK;
+#endif
+
+        // Effects flags propagate upwards.
+        if (copy->gtOp.gtOp1 != nullptr)
+        {
+            copy->gtFlags |= (copy->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
+        }
+        if (copy->gtGetOp2() != nullptr)
+        {
+            copy->gtFlags |= (copy->gtGetOp2()->gtFlags & GTF_ALL_EFFECT);
+        }
+
+        // The early morph for TailCall creates a GT_NOP with GTF_REG_VAL flag set
+        // Thus we have to copy the gtRegNum/gtRegPair value if we clone it here.
+        //
+        if (addFlags & GTF_REG_VAL)
+        {
+            copy->CopyReg(tree);
+        }
+
+        // We can call gtCloneExpr() before we have called fgMorph when we expand a GT_INDEX node in fgMorphArrayIndex()
+        // The method gtFoldExpr() expects to be run after fgMorph so it will set the GTF_DEBUG_NODE_MORPHED
+        // flag on nodes that it adds/modifies.  Then when we call fgMorph we will assert.
+        // We really only will need to fold when this method is used to replace references to
+        // local variable with an integer.
+        //
+        if (varNum != (unsigned)-1)
+        {
+            /* Try to do some folding */
+            copy = gtFoldExpr(copy);
+        }
+
+        goto DONE;
+    }
+
+    /* See what kind of a special operator we have here */
+
+    switch (oper)
+    {
+        case GT_STMT:
+            copy = gtCloneExpr(tree->gtStmt.gtStmtExpr, addFlags, varNum, varVal);
+            copy = gtNewStmt(copy, tree->gtStmt.gtStmtILoffsx);
+            goto DONE;
+
+        case GT_CALL:
+
+            copy = new (this, GT_CALL) GenTreeCall(tree->TypeGet());
+
+            copy->gtCall.gtCallObjp =
+                tree->gtCall.gtCallObjp ? gtCloneExpr(tree->gtCall.gtCallObjp, addFlags, varNum, varVal) : nullptr;
+            copy->gtCall.gtCallArgs = tree->gtCall.gtCallArgs
+                                          ? gtCloneExpr(tree->gtCall.gtCallArgs, addFlags, varNum, varVal)->AsArgList()
+                                          : nullptr;
+            copy->gtCall.gtCallMoreFlags = tree->gtCall.gtCallMoreFlags;
+            copy->gtCall.gtCallLateArgs =
+                tree->gtCall.gtCallLateArgs
+                    ? gtCloneExpr(tree->gtCall.gtCallLateArgs, addFlags, varNum, varVal)->AsArgList()
+                    : nullptr;
+
+#if !FEATURE_FIXED_OUT_ARGS
+            copy->gtCall.regArgList      = tree->gtCall.regArgList;
+            copy->gtCall.regArgListCount = tree->gtCall.regArgListCount;
+#endif
+
+            // The call sig comes from the EE and doesn't change throughout the compilation process, meaning
+            // we only really need one physical copy of it. Therefore a shallow pointer copy will suffice.
+            // (Note that this still holds even if the tree we are cloning was created by an inlinee compiler,
+            // because the inlinee still uses the inliner's memory allocator anyway.)
+            copy->gtCall.callSig = tree->gtCall.callSig;
+
+            copy->gtCall.gtCallType    = tree->gtCall.gtCallType;
+            copy->gtCall.gtReturnType  = tree->gtCall.gtReturnType;
+            copy->gtCall.gtControlExpr = tree->gtCall.gtControlExpr;
+
+            /* Copy the union */
+            if (tree->gtCall.gtCallType == CT_INDIRECT)
+            {
+                copy->gtCall.gtCallCookie = tree->gtCall.gtCallCookie
+                                                ? gtCloneExpr(tree->gtCall.gtCallCookie, addFlags, varNum, varVal)
+                                                : nullptr;
+                copy->gtCall.gtCallAddr =
+                    tree->gtCall.gtCallAddr ? gtCloneExpr(tree->gtCall.gtCallAddr, addFlags, varNum, varVal) : nullptr;
+            }
+            else if (tree->gtFlags & GTF_CALL_VIRT_STUB)
+            {
+                copy->gtCall.gtCallMethHnd      = tree->gtCall.gtCallMethHnd;
+                copy->gtCall.gtStubCallStubAddr = tree->gtCall.gtStubCallStubAddr;
+            }
+            else
+            {
+                copy->gtCall.gtCallMethHnd         = tree->gtCall.gtCallMethHnd;
+                copy->gtCall.gtInlineCandidateInfo = tree->gtCall.gtInlineCandidateInfo;
+            }
+
+            if (tree->gtCall.fgArgInfo)
+            {
+                // Create and initialize the fgArgInfo for our copy of the call tree
+                copy->gtCall.fgArgInfo = new (this, CMK_Unknown) fgArgInfo(copy, tree);
+            }
+            else
+            {
+                copy->gtCall.fgArgInfo = nullptr;
+            }
+            copy->gtCall.gtRetClsHnd = tree->gtCall.gtRetClsHnd;
+
+#if FEATURE_MULTIREG_RET
+            copy->gtCall.gtReturnTypeDesc = tree->gtCall.gtReturnTypeDesc;
+#endif
+
+#ifdef LEGACY_BACKEND
+            copy->gtCall.gtCallRegUsedMask = tree->gtCall.gtCallRegUsedMask;
+#endif // LEGACY_BACKEND
+
+#ifdef FEATURE_READYTORUN_COMPILER
+            copy->gtCall.setEntryPoint(tree->gtCall.gtEntryPoint);
+#endif
+
+#ifdef DEBUG
+            copy->gtCall.gtInlineObservation = tree->gtCall.gtInlineObservation;
+#endif
+
+            copy->AsCall()->CopyOtherRegFlags(tree->AsCall());
+            break;
+
+        case GT_FIELD:
+
+            copy = gtNewFieldRef(tree->TypeGet(), tree->gtField.gtFldHnd, nullptr, tree->gtField.gtFldOffset);
+
+            copy->gtField.gtFldObj =
+                tree->gtField.gtFldObj ? gtCloneExpr(tree->gtField.gtFldObj, addFlags, varNum, varVal) : nullptr;
+            copy->gtField.gtFldMayOverlap = tree->gtField.gtFldMayOverlap;
+#ifdef FEATURE_READYTORUN_COMPILER
+            copy->gtField.gtFieldLookup = tree->gtField.gtFieldLookup;
+#endif
+
+            break;
+
+        case GT_ARR_ELEM:
+        {
+            GenTreePtr inds[GT_ARR_MAX_RANK];
+            for (unsigned dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+            {
+                inds[dim] = gtCloneExpr(tree->gtArrElem.gtArrInds[dim], addFlags, varNum, varVal);
+            }
+            copy = new (this, GT_ARR_ELEM)
+                GenTreeArrElem(tree->TypeGet(), gtCloneExpr(tree->gtArrElem.gtArrObj, addFlags, varNum, varVal),
+                               tree->gtArrElem.gtArrRank, tree->gtArrElem.gtArrElemSize, tree->gtArrElem.gtArrElemType,
+                               &inds[0]);
+        }
+        break;
+
+        case GT_ARR_OFFSET:
+        {
+            copy = new (this, GT_ARR_OFFSET)
+                GenTreeArrOffs(tree->TypeGet(), gtCloneExpr(tree->gtArrOffs.gtOffset, addFlags, varNum, varVal),
+                               gtCloneExpr(tree->gtArrOffs.gtIndex, addFlags, varNum, varVal),
+                               gtCloneExpr(tree->gtArrOffs.gtArrObj, addFlags, varNum, varVal),
+                               tree->gtArrOffs.gtCurrDim, tree->gtArrOffs.gtArrRank, tree->gtArrOffs.gtArrElemType);
+        }
+        break;
+
+        case GT_CMPXCHG:
+            copy = new (this, GT_CMPXCHG)
+                GenTreeCmpXchg(tree->TypeGet(), gtCloneExpr(tree->gtCmpXchg.gtOpLocation, addFlags, varNum, varVal),
+                               gtCloneExpr(tree->gtCmpXchg.gtOpValue, addFlags, varNum, varVal),
+                               gtCloneExpr(tree->gtCmpXchg.gtOpComparand, addFlags, varNum, varVal));
+            break;
+
+        case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+        case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+            copy = new (this, oper) GenTreeBoundsChk(oper, tree->TypeGet(),
+                                                     gtCloneExpr(tree->gtBoundsChk.gtArrLen, addFlags, varNum, varVal),
+                                                     gtCloneExpr(tree->gtBoundsChk.gtIndex, addFlags, varNum, varVal),
+                                                     tree->gtBoundsChk.gtThrowKind);
+            break;
+
+        case GT_STORE_DYN_BLK:
+        case GT_DYN_BLK:
+            copy = new (this, oper) GenTreeDynBlk(gtCloneExpr(tree->gtDynBlk.Addr(), addFlags, varNum, varVal),
+                                                  gtCloneExpr(tree->gtDynBlk.gtDynamicSize, addFlags, varNum, varVal));
+            break;
+
+        default:
+#ifdef DEBUG
+            gtDispTree(tree);
+#endif
+            NO_WAY("unexpected operator");
+    }
+
+DONE:
+
+    // If it has a zero-offset field seq, copy annotation.
+    if (tree->TypeGet() == TYP_BYREF)
+    {
+        FieldSeqNode* fldSeq = nullptr;
+        if (GetZeroOffsetFieldMap()->Lookup(tree, &fldSeq))
+        {
+            GetZeroOffsetFieldMap()->Set(copy, fldSeq);
+        }
+    }
+
+    copy->gtVNPair = tree->gtVNPair; // A cloned tree gets the orginal's Value number pair
+
+    /* We assume the FP stack level will be identical */
+
+    copy->gtCopyFPlvl(tree);
+
+    /* Compute the flags for the copied node. Note that we can do this only
+       if we didnt gtFoldExpr(copy) */
+
+    if (copy->gtOper == oper)
+    {
+        addFlags |= tree->gtFlags;
+
+#ifdef DEBUG
+        /* GTF_NODE_MASK should not be propagated from 'tree' to 'copy' */
+        addFlags &= ~GTF_NODE_MASK;
+#endif
+        // Some other flags depend on the context of the expression, and should not be preserved.
+        // For example, GTF_RELOP_QMARK:
+        if (copy->OperKind() & GTK_RELOP)
+        {
+            addFlags &= ~GTF_RELOP_QMARK;
+        }
+        // On the other hand, if we're creating such a context, restore this flag.
+        if (copy->OperGet() == GT_QMARK)
+        {
+            copy->gtOp.gtOp1->gtFlags |= GTF_RELOP_QMARK;
+        }
+
+        copy->gtFlags |= addFlags;
+    }
+
+    /* GTF_COLON_COND should be propagated from 'tree' to 'copy' */
+    copy->gtFlags |= (tree->gtFlags & GTF_COLON_COND);
+
+#if defined(DEBUG)
+    // Non-node debug flags should be propagated from 'tree' to 'copy'
+    copy->gtDebugFlags |= (tree->gtDebugFlags & ~GTF_DEBUG_NODE_MASK);
+#endif
+
+    /* Make sure to copy back fields that may have been initialized */
+
+    copy->CopyRawCosts(tree);
+    copy->gtRsvdRegs = tree->gtRsvdRegs;
+    copy->CopyReg(tree);
+    return copy;
+}
+
+//------------------------------------------------------------------------
+// gtReplaceTree: Replace a tree with a new tree.
+//
+// Arguments:
+//    stmt            - The top-level root stmt of the tree being replaced.
+//                      Must not be null.
+//    tree            - The tree being replaced. Must not be null.
+//    replacementTree - The replacement tree. Must not be null.
+//
+// Return Value:
+//    The tree node that replaces the old tree.
+//
+// Assumptions:
+//    The sequencing of the stmt has been done.
+//
+// Notes:
+//    The caller must ensure that the original statement has been sequenced,
+//    but this method will sequence 'replacementTree', and insert it into the
+//    proper place in the statement sequence.
+
+GenTreePtr Compiler::gtReplaceTree(GenTreePtr stmt, GenTreePtr tree, GenTreePtr replacementTree)
+{
+    assert(fgStmtListThreaded);
+    assert(tree != nullptr);
+    assert(stmt != nullptr);
+    assert(replacementTree != nullptr);
+
+    GenTreePtr* treePtr    = nullptr;
+    GenTreePtr  treeParent = tree->gtGetParent(&treePtr);
+
+    assert(treeParent != nullptr || tree == stmt->gtStmt.gtStmtExpr);
+
+    if (treePtr == nullptr)
+    {
+        // Replace the stmt expr and rebuild the linear order for "stmt".
+        assert(treeParent == nullptr);
+        assert(fgOrder != FGOrderLinear);
+        stmt->gtStmt.gtStmtExpr = tree;
+        fgSetStmtSeq(stmt);
+    }
+    else
+    {
+        assert(treeParent != nullptr);
+
+        GenTreePtr treeFirstNode = fgGetFirstNode(tree);
+        GenTreePtr treeLastNode  = tree;
+        GenTreePtr treePrevNode  = treeFirstNode->gtPrev;
+        GenTreePtr treeNextNode  = treeLastNode->gtNext;
+
+        *treePtr = replacementTree;
+
+        // Build the linear order for "replacementTree".
+        fgSetTreeSeq(replacementTree, treePrevNode);
+
+        // Restore linear-order Prev and Next for "replacementTree".
+        if (treePrevNode != nullptr)
+        {
+            treeFirstNode         = fgGetFirstNode(replacementTree);
+            treeFirstNode->gtPrev = treePrevNode;
+            treePrevNode->gtNext  = treeFirstNode;
+        }
+        else
+        {
+            // Update the linear oder start of "stmt" if treeFirstNode
+            // appears to have replaced the original first node.
+            assert(treeFirstNode == stmt->gtStmt.gtStmtList);
+            stmt->gtStmt.gtStmtList = fgGetFirstNode(replacementTree);
+        }
+
+        if (treeNextNode != nullptr)
+        {
+            treeLastNode         = replacementTree;
+            treeLastNode->gtNext = treeNextNode;
+            treeNextNode->gtPrev = treeLastNode;
+        }
+
+        bool       needFixupCallArg = false;
+        GenTreePtr node             = treeParent;
+
+        // If we have replaced an arg, then update pointers in argtable.
+        do
+        {
+            // Look for the first enclosing callsite
+            switch (node->OperGet())
+            {
+                case GT_LIST:
+                case GT_ARGPLACE:
+                    // "tree" is likely an argument of a call.
+                    needFixupCallArg = true;
+                    break;
+
+                case GT_CALL:
+                    if (needFixupCallArg)
+                    {
+                        // We have replaced an arg, so update pointers in argtable.
+                        fgFixupArgTabEntryPtr(node, tree, replacementTree);
+                        needFixupCallArg = false;
+                    }
+                    break;
+
+                default:
+                    // "tree" is unlikely an argument of a call.
+                    needFixupCallArg = false;
+                    break;
+            }
+
+            if (needFixupCallArg)
+            {
+                // Keep tracking to update the first enclosing call.
+                node = node->gtGetParent(nullptr);
+            }
+            else
+            {
+                // Stop tracking.
+                node = nullptr;
+            }
+        } while (node != nullptr);
+
+        // Propagate side-effect flags of "replacementTree" to its parents if needed.
+        gtUpdateSideEffects(treeParent, tree->gtFlags, replacementTree->gtFlags);
+    }
+
+    return replacementTree;
+}
+
+//------------------------------------------------------------------------
+// gtUpdateSideEffects: Update the side effects for ancestors.
+//
+// Arguments:
+//    treeParent      - The immediate parent node.
+//    oldGtFlags      - The stale gtFlags.
+//    newGtFlags      - The new gtFlags.
+//
+//
+// Assumptions:
+//    Linear order of the stmt has been established.
+//
+// Notes:
+//    The routine is used for updating the stale side effect flags for ancestor
+//    nodes starting from treeParent up to the top-level stmt expr.
+
+void Compiler::gtUpdateSideEffects(GenTreePtr treeParent, unsigned oldGtFlags, unsigned newGtFlags)
+{
+    assert(fgStmtListThreaded);
+
+    oldGtFlags = oldGtFlags & GTF_ALL_EFFECT;
+    newGtFlags = newGtFlags & GTF_ALL_EFFECT;
+
+    if (oldGtFlags != newGtFlags)
+    {
+        while (treeParent)
+        {
+            treeParent->gtFlags &= ~oldGtFlags;
+            treeParent->gtFlags |= newGtFlags;
+            treeParent = treeParent->gtGetParent(nullptr);
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Comapres two trees and returns true when both trees are the same.
+ *  Instead of fully comparing the two trees this method can just return false.
+ *  Thus callers should not assume that the trees are different when false is returned.
+ *  Only when true is returned can the caller perform code optimizations.
+ *  The current implementation only compares a limited set of LEAF/CONST node
+ *  and returns false for all othere trees.
+ */
+bool Compiler::gtCompareTree(GenTree* op1, GenTree* op2)
+{
+    /* Make sure that both trees are of the same GT node kind */
+    if (op1->OperGet() != op2->OperGet())
+    {
+        return false;
+    }
+
+    /* Make sure that both trees are returning the same type */
+    if (op1->gtType != op2->gtType)
+    {
+        return false;
+    }
+
+    /* Figure out what kind of a node we have */
+
+    genTreeOps oper = op1->OperGet();
+    unsigned   kind = op1->OperKind();
+
+    /* Is this a constant or leaf node? */
+
+    if (kind & (GTK_CONST | GTK_LEAF))
+    {
+        switch (oper)
+        {
+            case GT_CNS_INT:
+                if ((op1->gtIntCon.gtIconVal == op2->gtIntCon.gtIconVal) && GenTree::SameIconHandleFlag(op1, op2))
+                {
+                    return true;
+                }
+                break;
+
+            case GT_CNS_LNG:
+                if (op1->gtLngCon.gtLconVal == op2->gtLngCon.gtLconVal)
+                {
+                    return true;
+                }
+                break;
+
+            case GT_CNS_STR:
+                if (op1->gtStrCon.gtSconCPX == op2->gtStrCon.gtSconCPX)
+                {
+                    return true;
+                }
+                break;
+
+            case GT_LCL_VAR:
+                if (op1->gtLclVarCommon.gtLclNum == op2->gtLclVarCommon.gtLclNum)
+                {
+                    return true;
+                }
+                break;
+
+            case GT_CLS_VAR:
+                if (op1->gtClsVar.gtClsVarHnd == op2->gtClsVar.gtClsVarHnd)
+                {
+                    return true;
+                }
+                break;
+
+            default:
+                // we return false for these unhandled 'oper' kinds
+                break;
+        }
+    }
+    return false;
+}
+
+GenTreePtr Compiler::gtGetThisArg(GenTreePtr call)
+{
+    assert(call->gtOper == GT_CALL);
+
+    if (call->gtCall.gtCallObjp != nullptr)
+    {
+        if (call->gtCall.gtCallObjp->gtOper != GT_NOP && call->gtCall.gtCallObjp->gtOper != GT_ASG)
+        {
+            if (!(call->gtCall.gtCallObjp->gtFlags & GTF_LATE_ARG))
+            {
+                return call->gtCall.gtCallObjp;
+            }
+        }
+
+        if (call->gtCall.gtCallLateArgs)
+        {
+            regNumber        thisReg         = REG_ARG_0;
+            unsigned         argNum          = 0;
+            fgArgTabEntryPtr thisArgTabEntry = gtArgEntryByArgNum(call, argNum);
+            GenTreePtr       result          = thisArgTabEntry->node;
+
+#if !FEATURE_FIXED_OUT_ARGS
+            GenTreePtr lateArgs = call->gtCall.gtCallLateArgs;
+            regList    list     = call->gtCall.regArgList;
+            int        index    = 0;
+            while (lateArgs != NULL)
+            {
+                assert(lateArgs->gtOper == GT_LIST);
+                assert(index < call->gtCall.regArgListCount);
+                regNumber curArgReg = list[index];
+                if (curArgReg == thisReg)
+                {
+                    if (optAssertionPropagatedCurrentStmt)
+                        result = lateArgs->gtOp.gtOp1;
+
+                    assert(result == lateArgs->gtOp.gtOp1);
+                }
+
+                lateArgs = lateArgs->gtOp.gtOp2;
+                index++;
+            }
+#endif
+            return result;
+        }
+    }
+    return nullptr;
+}
+
+bool GenTree::gtSetFlags() const
+{
+    //
+    // When FEATURE_SET_FLAGS (_TARGET_ARM_) is active the method returns true
+    //    when the gtFlags has the flag GTF_SET_FLAGS set
+    // otherwise the architecture will be have instructions that typically set
+    //    the flags and this method will return true.
+    //
+    //    Exceptions: GT_IND (load/store) is not allowed to set the flags
+    //                and on XARCH the GT_MUL/GT_DIV and all overflow instructions
+    //                do not set the condition flags
+    //
+    // Precondition we have a GTK_SMPOP
+    //
+    assert(OperIsSimple());
+
+    if (!varTypeIsIntegralOrI(TypeGet()))
+    {
+        return false;
+    }
+
+#if FEATURE_SET_FLAGS
+
+    if ((gtFlags & GTF_SET_FLAGS) && gtOper != GT_IND)
+    {
+        // GTF_SET_FLAGS is not valid on GT_IND and is overlaid with GTF_NONFAULTING_IND
+        return true;
+    }
+    else
+    {
+        return false;
+    }
+
+#else // !FEATURE_SET_FLAGS
+
+#ifdef _TARGET_XARCH_
+    // Return true if/when the codegen for this node will set the flags
+    //
+    //
+    if ((gtOper == GT_IND) || (gtOper == GT_MUL) || (gtOper == GT_DIV))
+    {
+        return false;
+    }
+    else if (gtOverflowEx())
+    {
+        return false;
+    }
+    else
+    {
+        return true;
+    }
+#else
+    // Otherwise for other architectures we should return false
+    return false;
+#endif
+
+#endif // !FEATURE_SET_FLAGS
+}
+
+bool GenTree::gtRequestSetFlags()
+{
+    bool result = false;
+
+#if FEATURE_SET_FLAGS
+    // This method is a Nop unless FEATURE_SET_FLAGS is defined
+
+    // In order to set GTF_SET_FLAGS
+    //              we must have a GTK_SMPOP
+    //          and we have a integer or machine size type (not floating point or TYP_LONG on 32-bit)
+    //
+    if (!OperIsSimple())
+        return false;
+
+    if (!varTypeIsIntegralOrI(TypeGet()))
+        return false;
+
+    switch (gtOper)
+    {
+        case GT_IND:
+        case GT_ARR_LENGTH:
+            // These will turn into simple load from memory instructions
+            // and we can't force the setting of the flags on load from memory
+            break;
+
+        case GT_MUL:
+        case GT_DIV:
+            // These instructions don't set the flags (on x86/x64)
+            //
+            break;
+
+        default:
+            // Otherwise we can set the flags for this gtOper
+            // and codegen must set the condition flags.
+            //
+            gtFlags |= GTF_SET_FLAGS;
+            result = true;
+            break;
+    }
+#endif // FEATURE_SET_FLAGS
+
+    // Codegen for this tree must set the condition flags if
+    // this method returns true.
+    //
+    return result;
+}
+
+/*****************************************************************************/
+void GenTree::CopyTo(class Compiler* comp, const GenTree& gt)
+{
+    gtOper         = gt.gtOper;
+    gtType         = gt.gtType;
+    gtAssertionNum = gt.gtAssertionNum;
+
+    gtRegNum = gt.gtRegNum; // one union member.
+    CopyCosts(&gt);
+
+    gtFlags  = gt.gtFlags;
+    gtVNPair = gt.gtVNPair;
+
+    gtRsvdRegs = gt.gtRsvdRegs;
+
+#ifdef LEGACY_BACKEND
+    gtUsedRegs = gt.gtUsedRegs;
+#endif // LEGACY_BACKEND
+
+#if FEATURE_STACK_FP_X87
+    gtFPlvl = gt.gtFPlvl;
+#endif // FEATURE_STACK_FP_X87
+
+    gtNext = gt.gtNext;
+    gtPrev = gt.gtPrev;
+#ifdef DEBUG
+    gtTreeID = gt.gtTreeID;
+    gtSeqNum = gt.gtSeqNum;
+#endif
+    // Largest node subtype:
+    void* remDst = reinterpret_cast<char*>(this) + sizeof(GenTree);
+    void* remSrc = reinterpret_cast<char*>(const_cast<GenTree*>(&gt)) + sizeof(GenTree);
+    memcpy(remDst, remSrc, TREE_NODE_SZ_LARGE - sizeof(GenTree));
+}
+
+void GenTree::CopyToSmall(const GenTree& gt)
+{
+    // Small node size is defined by GenTreeOp.
+    void* remDst = reinterpret_cast<char*>(this) + sizeof(GenTree);
+    void* remSrc = reinterpret_cast<char*>(const_cast<GenTree*>(&gt)) + sizeof(GenTree);
+    memcpy(remDst, remSrc, TREE_NODE_SZ_SMALL - sizeof(GenTree));
+}
+
+unsigned GenTree::NumChildren()
+{
+    if (OperIsConst() || OperIsLeaf())
+    {
+        return 0;
+    }
+    else if (OperIsUnary())
+    {
+        if (OperGet() == GT_NOP || OperGet() == GT_RETURN || OperGet() == GT_RETFILT)
+        {
+            if (gtOp.gtOp1 == nullptr)
+            {
+                return 0;
+            }
+            else
+            {
+                return 1;
+            }
+        }
+        else
+        {
+            return 1;
+        }
+    }
+    else if (OperIsBinary())
+    {
+        // All binary operators except LEA have at least one arg; the second arg may sometimes be null, however.
+        if (OperGet() == GT_LEA)
+        {
+            unsigned childCount = 0;
+            if (gtOp.gtOp1 != nullptr)
+            {
+                childCount++;
+            }
+            if (gtOp.gtOp2 != nullptr)
+            {
+                childCount++;
+            }
+            return childCount;
+        }
+        // Special case for assignment of dynamic block.
+        // This is here to duplicate the former case where the size may be evaluated prior to the
+        // source and destination addresses. In order to do this, we treat the size as a child of the
+        // assignment.
+        // TODO-1stClassStructs-Cleanup: Remove all this special casing, and ensure that the diffs are reasonable.
+        if ((OperGet() == GT_ASG) && (gtOp.gtOp1->OperGet() == GT_DYN_BLK) && (gtOp.gtOp1->AsDynBlk()->gtEvalSizeFirst))
+        {
+            return 3;
+        }
+        assert(gtOp.gtOp1 != nullptr);
+        if (gtOp.gtOp2 == nullptr)
+        {
+            return 1;
+        }
+        else
+        {
+            return 2;
+        }
+    }
+    else
+    {
+        // Special
+        switch (OperGet())
+        {
+            case GT_CMPXCHG:
+                return 3;
+
+            case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+            case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+                return 2;
+
+            case GT_FIELD:
+            case GT_STMT:
+                return 1;
+
+            case GT_ARR_ELEM:
+                return 1 + AsArrElem()->gtArrRank;
+
+            // This really has two children, but if the size is evaluated first, we treat it as a child of the
+            // parent assignment.
+            case GT_DYN_BLK:
+                if (AsDynBlk()->gtEvalSizeFirst)
+                {
+                    return 1;
+                }
+                else
+                {
+                    return 2;
+                }
+
+            case GT_ARR_OFFSET:
+            case GT_STORE_DYN_BLK:
+                return 3;
+
+            case GT_CALL:
+            {
+                GenTreeCall* call = AsCall();
+                unsigned     res  = 0; // arg list(s) (including late args).
+                if (call->gtCallObjp != nullptr)
+                {
+                    res++; // Add objp?
+                }
+                if (call->gtCallArgs != nullptr)
+                {
+                    res++; // Add args?
+                }
+                if (call->gtCallLateArgs != nullptr)
+                {
+                    res++; // Add late args?
+                }
+                if (call->gtControlExpr != nullptr)
+                {
+                    res++;
+                }
+
+                if (call->gtCallType == CT_INDIRECT)
+                {
+                    if (call->gtCallCookie != nullptr)
+                    {
+                        res++;
+                    }
+                    if (call->gtCallAddr != nullptr)
+                    {
+                        res++;
+                    }
+                }
+                return res;
+            }
+            case GT_NONE:
+                return 0;
+            default:
+                unreached();
+        }
+    }
+}
+
+GenTreePtr GenTree::GetChild(unsigned childNum)
+{
+    assert(childNum < NumChildren()); // Precondition.
+    assert(NumChildren() <= MAX_CHILDREN);
+    assert(!(OperIsConst() || OperIsLeaf()));
+    if (OperIsUnary())
+    {
+        return AsUnOp()->gtOp1;
+    }
+    // Special case for assignment of dynamic block.
+    // This code is here to duplicate the former case where the size may be evaluated prior to the
+    // source and destination addresses. In order to do this, we treat the size as a child of the
+    // assignment.
+    // TODO-1stClassStructs: Revisit the need to duplicate former behavior, so that we can remove
+    // these special cases.
+    if ((OperGet() == GT_ASG) && (gtOp.gtOp1->OperGet() == GT_DYN_BLK) && (childNum == 2))
+    {
+        return gtOp.gtOp1->AsDynBlk()->gtDynamicSize;
+    }
+    else if (OperIsBinary())
+    {
+        if (OperIsAddrMode())
+        {
+            // If this is the first (0th) child, only return op1 if it is non-null
+            // Otherwise, we return gtOp2.
+            if (childNum == 0 && AsOp()->gtOp1 != nullptr)
+            {
+                return AsOp()->gtOp1;
+            }
+            return AsOp()->gtOp2;
+        }
+        // TODO-Cleanup: Consider handling ReverseOps here, and then we wouldn't have to handle it in
+        // fgGetFirstNode().  However, it seems that it causes loop hoisting behavior to change.
+        if (childNum == 0)
+        {
+            return AsOp()->gtOp1;
+        }
+        else
+        {
+            return AsOp()->gtOp2;
+        }
+    }
+    else
+    {
+        // Special
+        switch (OperGet())
+        {
+            case GT_CMPXCHG:
+                switch (childNum)
+                {
+                    case 0:
+                        return AsCmpXchg()->gtOpLocation;
+                    case 1:
+                        return AsCmpXchg()->gtOpValue;
+                    case 2:
+                        return AsCmpXchg()->gtOpComparand;
+                    default:
+                        unreached();
+                }
+            case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+            case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+                switch (childNum)
+                {
+                    case 0:
+                        return AsBoundsChk()->gtArrLen;
+                    case 1:
+                        return AsBoundsChk()->gtIndex;
+                    default:
+                        unreached();
+                }
+
+            case GT_STORE_DYN_BLK:
+                switch (childNum)
+                {
+                    case 0:
+                        return AsDynBlk()->Addr();
+                    case 1:
+                        return AsDynBlk()->Data();
+                    case 2:
+                        return AsDynBlk()->gtDynamicSize;
+                    default:
+                        unreached();
+                }
+            case GT_DYN_BLK:
+                switch (childNum)
+                {
+                    case 0:
+                        return AsDynBlk()->Addr();
+                    case 1:
+                        assert(!AsDynBlk()->gtEvalSizeFirst);
+                        return AsDynBlk()->gtDynamicSize;
+                    default:
+                        unreached();
+                }
+
+            case GT_FIELD:
+                return AsField()->gtFldObj;
+
+            case GT_STMT:
+                return AsStmt()->gtStmtExpr;
+
+            case GT_ARR_ELEM:
+                if (childNum == 0)
+                {
+                    return AsArrElem()->gtArrObj;
+                }
+                else
+                {
+                    return AsArrElem()->gtArrInds[childNum - 1];
+                }
+
+            case GT_ARR_OFFSET:
+                switch (childNum)
+                {
+                    case 0:
+                        return AsArrOffs()->gtOffset;
+                    case 1:
+                        return AsArrOffs()->gtIndex;
+                    case 2:
+                        return AsArrOffs()->gtArrObj;
+                    default:
+                        unreached();
+                }
+
+            case GT_CALL:
+            {
+                // The if chain below assumes that all possible children are non-null.
+                // If some are null, "virtually skip them."
+                // If there isn't "virtually skip it."
+                GenTreeCall* call = AsCall();
+
+                if (call->gtCallObjp == nullptr)
+                {
+                    childNum++;
+                }
+                if (childNum >= 1 && call->gtCallArgs == nullptr)
+                {
+                    childNum++;
+                }
+                if (childNum >= 2 && call->gtCallLateArgs == nullptr)
+                {
+                    childNum++;
+                }
+                if (childNum >= 3 && call->gtControlExpr == nullptr)
+                {
+                    childNum++;
+                }
+                if (call->gtCallType == CT_INDIRECT)
+                {
+                    if (childNum >= 4 && call->gtCallCookie == nullptr)
+                    {
+                        childNum++;
+                    }
+                }
+
+                if (childNum == 0)
+                {
+                    return call->gtCallObjp;
+                }
+                else if (childNum == 1)
+                {
+                    return call->gtCallArgs;
+                }
+                else if (childNum == 2)
+                {
+                    return call->gtCallLateArgs;
+                }
+                else if (childNum == 3)
+                {
+                    return call->gtControlExpr;
+                }
+                else
+                {
+                    assert(call->gtCallType == CT_INDIRECT);
+                    if (childNum == 4)
+                    {
+                        return call->gtCallCookie;
+                    }
+                    else
+                    {
+                        assert(childNum == 5);
+                        return call->gtCallAddr;
+                    }
+                }
+            }
+            case GT_NONE:
+                unreached();
+            default:
+                unreached();
+        }
+    }
+}
+
+GenTreeUseEdgeIterator::GenTreeUseEdgeIterator()
+    : m_node(nullptr)
+    , m_edge(nullptr)
+    , m_argList(nullptr)
+    , m_state(-1)
+{
+}
+
+GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node)
+    : m_node(node)
+    , m_edge(nullptr)
+    , m_argList(nullptr)
+    , m_state(0)
+{
+    assert(m_node != nullptr);
+
+    // Advance to the first operand.
+    ++(*this);
+}
+
+//------------------------------------------------------------------------
+// GenTreeUseEdgeIterator::GetNextUseEdge:
+//    Gets the next operand of a node with a fixed number of operands.
+//    This covers all nodes besides GT_CALL, GT_PHI, and GT_SIMD. For the
+//    node types handled by this method, the `m_state` field indicates the
+//    index of the next operand to produce.
+//
+// Returns:
+//    The node's next operand or nullptr if all operands have been
+//    produced.
+//
+GenTree** GenTreeUseEdgeIterator::GetNextUseEdge() const
+{
+    switch (m_node->OperGet())
+    {
+        case GT_CMPXCHG:
+            switch (m_state)
+            {
+                case 0:
+                    return &m_node->AsCmpXchg()->gtOpLocation;
+                case 1:
+                    return &m_node->AsCmpXchg()->gtOpValue;
+                case 2:
+                    return &m_node->AsCmpXchg()->gtOpComparand;
+                default:
+                    return nullptr;
+            }
+        case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+        case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+            switch (m_state)
+            {
+                case 0:
+                    return &m_node->AsBoundsChk()->gtArrLen;
+                case 1:
+                    return &m_node->AsBoundsChk()->gtIndex;
+                default:
+                    return nullptr;
+            }
+
+        case GT_FIELD:
+            if (m_state == 0)
+            {
+                return &m_node->AsField()->gtFldObj;
+            }
+            return nullptr;
+
+        case GT_STMT:
+            if (m_state == 0)
+            {
+                return &m_node->AsStmt()->gtStmtExpr;
+            }
+            return nullptr;
+
+        case GT_ARR_ELEM:
+            if (m_state == 0)
+            {
+                return &m_node->AsArrElem()->gtArrObj;
+            }
+            else if (m_state <= m_node->AsArrElem()->gtArrRank)
+            {
+                return &m_node->AsArrElem()->gtArrInds[m_state - 1];
+            }
+            return nullptr;
+
+        case GT_ARR_OFFSET:
+            switch (m_state)
+            {
+                case 0:
+                    return &m_node->AsArrOffs()->gtOffset;
+                case 1:
+                    return &m_node->AsArrOffs()->gtIndex;
+                case 2:
+                    return &m_node->AsArrOffs()->gtArrObj;
+                default:
+                    return nullptr;
+            }
+
+        // Call, phi, and SIMD nodes are handled by MoveNext{Call,Phi,SIMD}UseEdge, repsectively.
+        case GT_CALL:
+        case GT_PHI:
+#ifdef FEATURE_SIMD
+        case GT_SIMD:
+#endif
+            break;
+
+        case GT_ASG:
+        {
+            bool operandsReversed = (m_node->gtFlags & GTF_REVERSE_OPS) != 0;
+            switch (m_state)
+            {
+                case 0:
+                    return !operandsReversed ? &(m_node->AsOp()->gtOp1) : &(m_node->AsOp()->gtOp2);
+                case 1:
+                    return !operandsReversed ? &(m_node->AsOp()->gtOp2) : &(m_node->AsOp()->gtOp1);
+                default:
+                    return nullptr;
+            }
+        }
+
+        case GT_DYN_BLK:
+            switch (m_state)
+            {
+                case 0:
+                    return &(m_node->AsDynBlk()->gtOp1);
+                case 1:
+                    return &(m_node->AsDynBlk()->gtDynamicSize);
+                default:
+                    return nullptr;
+            }
+            break;
+
+        case GT_STORE_DYN_BLK:
+            switch (m_state)
+            {
+                case 0:
+                    return &(m_node->AsDynBlk()->gtOp1);
+                case 1:
+                    return &(m_node->AsDynBlk()->gtOp2);
+                case 2:
+                    return &(m_node->AsDynBlk()->gtDynamicSize);
+                default:
+                    return nullptr;
+            }
+            break;
+
+        case GT_LEA:
+        {
+            GenTreeAddrMode* lea = m_node->AsAddrMode();
+
+            bool hasOp1 = lea->gtOp1 != nullptr;
+            if (!hasOp1)
+            {
+                return m_state == 0 ? &lea->gtOp2 : nullptr;
+            }
+
+            bool operandsReversed = (lea->gtFlags & GTF_REVERSE_OPS) != 0;
+            switch (m_state)
+            {
+                case 0:
+                    return !operandsReversed ? &lea->gtOp1 : &lea->gtOp2;
+                case 1:
+                    return !operandsReversed ? &lea->gtOp2 : &lea->gtOp1;
+                default:
+                    return nullptr;
+            }
+        }
+        break;
+
+        case GT_LIST:
+            if (m_node->AsArgList()->IsAggregate())
+            {
+                // List nodes that represent aggregates are handled by MoveNextAggregateUseEdge.
+                break;
+            }
+            __fallthrough;
+
+        default:
+            if (m_node->OperIsConst() || m_node->OperIsLeaf())
+            {
+                return nullptr;
+            }
+            else if (m_node->OperIsUnary())
+            {
+                return m_state == 0 ? &m_node->AsUnOp()->gtOp1 : nullptr;
+            }
+            else if (m_node->OperIsBinary())
+            {
+                bool operandsReversed = (m_node->gtFlags & GTF_REVERSE_OPS) != 0;
+                switch (m_state)
+                {
+                    case 0:
+                        return !operandsReversed ? &m_node->AsOp()->gtOp1 : &m_node->AsOp()->gtOp2;
+                    case 1:
+                        return !operandsReversed ? &m_node->AsOp()->gtOp2 : &m_node->AsOp()->gtOp1;
+                    default:
+                        return nullptr;
+                }
+            }
+    }
+
+    unreached();
+}
+
+//------------------------------------------------------------------------
+// GenTreeUseEdgeIterator::MoveToNextCallUseEdge:
+//    Moves to the next operand of a call node. Unlike the simple nodes
+//    handled by `GetNextUseEdge`, call nodes have a variable number of
+//    operands stored in cons lists. This method expands the cons lists
+//    into the operands stored within.
+//
+void GenTreeUseEdgeIterator::MoveToNextCallUseEdge()
+{
+    enum
+    {
+        CALL_INSTANCE = 0,
+        CALL_ARGS = 1,
+        CALL_LATE_ARGS = 2,
+        CALL_CONTROL_EXPR = 3,
+        CALL_COOKIE = 4,
+        CALL_ADDRESS = 5,
+        CALL_TERMINAL = 6,
+    };
+
+    GenTreeCall* call = m_node->AsCall();
+
+    for (;;)
+    {
+        switch (m_state)
+        {
+            case CALL_INSTANCE:
+                m_state   = CALL_ARGS;
+                m_argList = call->gtCallArgs;
+
+                if (call->gtCallObjp != nullptr)
+                {
+                    m_edge = &call->gtCallObjp;
+                    return;
+                }
+                break;
+
+            case CALL_ARGS:
+            case CALL_LATE_ARGS:
+                if (m_argList == nullptr)
+                {
+                    m_state++;
+
+                    if (m_state == CALL_LATE_ARGS)
+                    {
+                        m_argList = call->gtCallLateArgs;
+                    }
+                }
+                else
+                {
+                    GenTreeArgList* argNode = m_argList->AsArgList();
+                    m_edge                  = &argNode->gtOp1;
+                    m_argList               = argNode->Rest();
+                    return;
+                }
+                break;
+
+            case CALL_CONTROL_EXPR:
+                m_state = call->gtCallType == CT_INDIRECT ? CALL_COOKIE : CALL_TERMINAL;
+
+                if (call->gtControlExpr != nullptr)
+                {
+                    m_edge = &call->gtControlExpr;
+                    return;
+                }
+                break;
+
+            case 4:
+                assert(call->gtCallType == CT_INDIRECT);
+
+                m_state = CALL_ADDRESS;
+
+                if (call->gtCallCookie != nullptr)
+                {
+                    m_edge = &call->gtCallCookie;
+                    return;
+                }
+                break;
+
+            case 5:
+                assert(call->gtCallType == CT_INDIRECT);
+
+                m_state = CALL_TERMINAL;
+                if (call->gtCallAddr != nullptr)
+                {
+                    m_edge = &call->gtCallAddr;
+                    return;
+                }
+                break;
+
+            default:
+                m_node    = nullptr;
+                m_edge    = nullptr;
+                m_argList = nullptr;
+                m_state   = -1;
+                return;
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// GenTreeUseEdgeIterator::MoveToNextPhiUseEdge:
+//    Moves to the next operand of a phi node. Unlike the simple nodes
+//    handled by `GetNextUseEdge`, phi nodes have a variable number of
+//    operands stored in a cons list. This method expands the cons list
+//    into the operands stored within.
+//
+void GenTreeUseEdgeIterator::MoveToNextPhiUseEdge()
+{
+    GenTreeUnOp* phi = m_node->AsUnOp();
+
+    for (;;)
+    {
+        switch (m_state)
+        {
+            case 0:
+                m_state   = 1;
+                m_argList = phi->gtOp1;
+                break;
+
+            case 1:
+                if (m_argList == nullptr)
+                {
+                    m_state = 2;
+                }
+                else
+                {
+                    GenTreeArgList* argNode = m_argList->AsArgList();
+                    m_edge                  = &argNode->gtOp1;
+                    m_argList               = argNode->Rest();
+                    return;
+                }
+                break;
+
+            default:
+                m_node    = nullptr;
+                m_edge    = nullptr;
+                m_argList = nullptr;
+                m_state   = -1;
+                return;
+        }
+    }
+}
+
+#ifdef FEATURE_SIMD
+//------------------------------------------------------------------------
+// GenTreeUseEdgeIterator::MoveToNextSIMDUseEdge:
+//    Moves to the next operand of a SIMD node. Most SIMD nodes have a
+//    fixed number of operands and are handled accordingly.
+//    `SIMDIntrinsicInitN` nodes, however, have a variable number of
+//    operands stored in a cons list. This method expands the cons list
+//    into the operands stored within.
+//
+void GenTreeUseEdgeIterator::MoveToNextSIMDUseEdge()
+{
+    GenTreeSIMD* simd = m_node->AsSIMD();
+
+    if (simd->gtSIMDIntrinsicID != SIMDIntrinsicInitN)
+    {
+        bool operandsReversed = (simd->gtFlags & GTF_REVERSE_OPS) != 0;
+        switch (m_state)
+        {
+            case 0:
+                m_edge = !operandsReversed ? &simd->gtOp1 : &simd->gtOp2;
+                break;
+            case 1:
+                m_edge = !operandsReversed ? &simd->gtOp2 : &simd->gtOp1;
+                break;
+            default:
+                m_edge = nullptr;
+                break;
+        }
+
+        if (m_edge != nullptr && *m_edge != nullptr)
+        {
+            m_state++;
+        }
+        else
+        {
+            m_node  = nullptr;
+            m_state = -1;
+        }
+
+        return;
+    }
+
+    for (;;)
+    {
+        switch (m_state)
+        {
+            case 0:
+                m_state   = 1;
+                m_argList = simd->gtOp1;
+                break;
+
+            case 1:
+                if (m_argList == nullptr)
+                {
+                    m_state = 2;
+                }
+                else
+                {
+                    GenTreeArgList* argNode = m_argList->AsArgList();
+                    m_edge                  = &argNode->gtOp1;
+                    m_argList               = argNode->Rest();
+                    return;
+                }
+                break;
+
+            default:
+                m_node    = nullptr;
+                m_edge    = nullptr;
+                m_argList = nullptr;
+                m_state   = -1;
+                return;
+        }
+    }
+}
+#endif // FEATURE_SIMD
+
+void GenTreeUseEdgeIterator::MoveToNextAggregateUseEdge()
+{
+    assert(m_node->OperGet() == GT_LIST);
+    assert(m_node->AsArgList()->IsAggregate());
+
+    for (;;)
+    {
+        switch (m_state)
+        {
+            case 0:
+                m_state   = 1;
+                m_argList = m_node;
+                break;
+
+            case 1:
+                if (m_argList == nullptr)
+                {
+                    m_state = 2;
+                }
+                else
+                {
+                    GenTreeArgList* aggNode = m_argList->AsArgList();
+                    m_edge                  = &aggNode->gtOp1;
+                    m_argList               = aggNode->Rest();
+                    return;
+                }
+                break;
+
+            default:
+                m_node    = nullptr;
+                m_edge    = nullptr;
+                m_argList = nullptr;
+                m_state   = -1;
+                return;
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// GenTreeUseEdgeIterator::operator++:
+//    Advances the iterator to the next operand.
+//
+GenTreeUseEdgeIterator& GenTreeUseEdgeIterator::operator++()
+{
+    if (m_state == -1)
+    {
+        // If we've reached the terminal state, do nothing.
+        assert(m_node == nullptr);
+        assert(m_edge == nullptr);
+        assert(m_argList == nullptr);
+    }
+    else
+    {
+        // Otherwise, move to the next operand in the node.
+        genTreeOps op = m_node->OperGet();
+        if (op == GT_CALL)
+        {
+            MoveToNextCallUseEdge();
+        }
+        else if (op == GT_PHI)
+        {
+            MoveToNextPhiUseEdge();
+        }
+#ifdef FEATURE_SIMD
+        else if (op == GT_SIMD)
+        {
+            MoveToNextSIMDUseEdge();
+        }
+#endif
+        else if ((op == GT_LIST) && (m_node->AsArgList()->IsAggregate()))
+        {
+            MoveToNextAggregateUseEdge();
+        }
+        else
+        {
+            m_edge = GetNextUseEdge();
+            if (m_edge != nullptr && *m_edge != nullptr)
+            {
+                m_state++;
+            }
+            else
+            {
+                m_edge  = nullptr;
+                m_node  = nullptr;
+                m_state = -1;
+            }
+        }
+    }
+
+    return *this;
+}
+
+GenTreeUseEdgeIterator GenTree::UseEdgesBegin()
+{
+    return GenTreeUseEdgeIterator(this);
+}
+
+GenTreeUseEdgeIterator GenTree::UseEdgesEnd()
+{
+    return GenTreeUseEdgeIterator();
+}
+
+IteratorPair<GenTreeUseEdgeIterator> GenTree::UseEdges()
+{
+    return MakeIteratorPair(UseEdgesBegin(), UseEdgesEnd());
+}
+
+GenTreeOperandIterator GenTree::OperandsBegin()
+{
+    return GenTreeOperandIterator(this);
+}
+
+GenTreeOperandIterator GenTree::OperandsEnd()
+{
+    return GenTreeOperandIterator();
+}
+
+IteratorPair<GenTreeOperandIterator> GenTree::Operands()
+{
+    return MakeIteratorPair(OperandsBegin(), OperandsEnd());
+}
+
+bool GenTree::Precedes(GenTree* other)
+{
+    assert(other != nullptr);
+
+    for (GenTree* node = gtNext; node != nullptr; node = node->gtNext)
+    {
+        if (node == other)
+        {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+#ifdef DEBUG
+
+/* static */ int GenTree::gtDispFlags(unsigned flags, unsigned debugFlags)
+{
+    printf("%c", (flags & GTF_ASG) ? 'A' : '-');
+    printf("%c", (flags & GTF_CALL) ? 'C' : '-');
+    printf("%c", (flags & GTF_EXCEPT) ? 'X' : '-');
+    printf("%c", (flags & GTF_GLOB_REF) ? 'G' : '-');
+    printf("%c", (debugFlags & GTF_DEBUG_NODE_MORPHED) ? '+' : // First print '+' if GTF_DEBUG_NODE_MORPHED is set
+                     (flags & GTF_ORDER_SIDEEFF) ? 'O' : '-'); // otherwise print 'O' or '-'
+    printf("%c", (flags & GTF_COLON_COND) ? '?' : '-');
+    printf("%c", (flags & GTF_DONT_CSE) ? 'N' :           // N is for No cse
+                     (flags & GTF_MAKE_CSE) ? 'H' : '-'); // H is for Hoist this expr
+    printf("%c", (flags & GTF_REVERSE_OPS) ? 'R' : '-');
+    printf("%c", (flags & GTF_UNSIGNED) ? 'U' : (flags & GTF_BOOLEAN) ? 'B' : '-');
+#if FEATURE_SET_FLAGS
+    printf("%c", (flags & GTF_SET_FLAGS) ? 'S' : '-');
+#endif
+    printf("%c", (flags & GTF_LATE_ARG) ? 'L' : '-');
+    printf("%c", (flags & GTF_SPILLED) ? 'z' : (flags & GTF_SPILL) ? 'Z' : '-');
+    return 12; // displayed 12 flag characters
+}
+
+/*****************************************************************************/
+
+void Compiler::gtDispNodeName(GenTree* tree)
+{
+    /* print the node name */
+
+    const char* name;
+
+    assert(tree);
+    if (tree->gtOper < GT_COUNT)
+    {
+        name = GenTree::NodeName(tree->OperGet());
+    }
+    else
+    {
+        name = "<ERROR>";
+    }
+    char  buf[32];
+    char* bufp = &buf[0];
+
+    if ((tree->gtOper == GT_CNS_INT) && tree->IsIconHandle())
+    {
+        sprintf_s(bufp, sizeof(buf), " %s(h)%c", name, 0);
+    }
+    else if (tree->gtOper == GT_PUTARG_STK)
+    {
+        sprintf_s(bufp, sizeof(buf), " %s [+0x%02x]%c", name, tree->AsPutArgStk()->getArgOffset(), 0);
+    }
+    else if (tree->gtOper == GT_CALL)
+    {
+        const char* callType = "call";
+        const char* gtfType  = "";
+        const char* ctType   = "";
+        char        gtfTypeBuf[100];
+
+        if (tree->gtCall.gtCallType == CT_USER_FUNC)
+        {
+            if ((tree->gtFlags & GTF_CALL_VIRT_KIND_MASK) != GTF_CALL_NONVIRT)
+            {
+                callType = "callv";
+            }
+        }
+        else if (tree->gtCall.gtCallType == CT_HELPER)
+        {
+            ctType = " help";
+        }
+        else if (tree->gtCall.gtCallType == CT_INDIRECT)
+        {
+            ctType = " ind";
+        }
+        else
+        {
+            assert(!"Unknown gtCallType");
+        }
+
+        if (tree->gtFlags & GTF_CALL_NULLCHECK)
+        {
+            gtfType = " nullcheck";
+        }
+        if (tree->gtFlags & GTF_CALL_VIRT_VTABLE)
+        {
+            gtfType = " ind";
+        }
+        else if (tree->gtFlags & GTF_CALL_VIRT_STUB)
+        {
+            gtfType = " stub";
+        }
+#ifdef FEATURE_READYTORUN_COMPILER
+        else if (tree->gtCall.IsR2RRelativeIndir())
+        {
+            gtfType = " r2r_ind";
+        }
+#endif // FEATURE_READYTORUN_COMPILER
+        else if (tree->gtFlags & GTF_CALL_UNMANAGED)
+        {
+            char* gtfTypeBufWalk = gtfTypeBuf;
+            gtfTypeBufWalk += SimpleSprintf_s(gtfTypeBufWalk, gtfTypeBuf, sizeof(gtfTypeBuf), " unman");
+            if (tree->gtFlags & GTF_CALL_POP_ARGS)
+            {
+                gtfTypeBufWalk += SimpleSprintf_s(gtfTypeBufWalk, gtfTypeBuf, sizeof(gtfTypeBuf), " popargs");
+            }
+            if (tree->gtCall.gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL)
+            {
+                gtfTypeBufWalk += SimpleSprintf_s(gtfTypeBufWalk, gtfTypeBuf, sizeof(gtfTypeBuf), " thiscall");
+            }
+            gtfType = gtfTypeBuf;
+        }
+
+        sprintf_s(bufp, sizeof(buf), " %s%s%s%c", callType, ctType, gtfType, 0);
+    }
+    else if (tree->gtOper == GT_ARR_ELEM)
+    {
+        bufp += SimpleSprintf_s(bufp, buf, sizeof(buf), " %s[", name);
+        for (unsigned rank = tree->gtArrElem.gtArrRank - 1; rank; rank--)
+        {
+            bufp += SimpleSprintf_s(bufp, buf, sizeof(buf), ",");
+        }
+        SimpleSprintf_s(bufp, buf, sizeof(buf), "]");
+    }
+    else if (tree->gtOper == GT_ARR_OFFSET || tree->gtOper == GT_ARR_INDEX)
+    {
+        bufp += SimpleSprintf_s(bufp, buf, sizeof(buf), " %s[", name);
+        unsigned char currDim;
+        unsigned char rank;
+        if (tree->gtOper == GT_ARR_OFFSET)
+        {
+            currDim = tree->gtArrOffs.gtCurrDim;
+            rank    = tree->gtArrOffs.gtArrRank;
+        }
+        else
+        {
+            currDim = tree->gtArrIndex.gtCurrDim;
+            rank    = tree->gtArrIndex.gtArrRank;
+        }
+
+        for (unsigned char dim = 0; dim < rank; dim++)
+        {
+            // Use a defacto standard i,j,k for the dimensions.
+            // Note that we only support up to rank 3 arrays with these nodes, so we won't run out of characters.
+            char dimChar = '*';
+            if (dim == currDim)
+            {
+                dimChar = 'i' + dim;
+            }
+            else if (dim > currDim)
+            {
+                dimChar = ' ';
+            }
+
+            bufp += SimpleSprintf_s(bufp, buf, sizeof(buf), "%c", dimChar);
+            if (dim != rank - 1)
+            {
+                bufp += SimpleSprintf_s(bufp, buf, sizeof(buf), ",");
+            }
+        }
+        SimpleSprintf_s(bufp, buf, sizeof(buf), "]");
+    }
+    else if (tree->gtOper == GT_LEA)
+    {
+        GenTreeAddrMode* lea = tree->AsAddrMode();
+        bufp += SimpleSprintf_s(bufp, buf, sizeof(buf), " %s(", name);
+        if (lea->Base() != nullptr)
+        {
+            bufp += SimpleSprintf_s(bufp, buf, sizeof(buf), "b+");
+        }
+        if (lea->Index() != nullptr)
+        {
+            bufp += SimpleSprintf_s(bufp, buf, sizeof(buf), "(i*%d)+", lea->gtScale);
+        }
+        bufp += SimpleSprintf_s(bufp, buf, sizeof(buf), "%d)", lea->gtOffset);
+    }
+    else if (tree->gtOper == GT_ARR_BOUNDS_CHECK)
+    {
+        switch (tree->gtBoundsChk.gtThrowKind)
+        {
+            case SCK_RNGCHK_FAIL:
+                sprintf_s(bufp, sizeof(buf), " %s_Rng", name);
+                break;
+            case SCK_ARG_EXCPN:
+                sprintf_s(bufp, sizeof(buf), " %s_Arg", name);
+                break;
+            case SCK_ARG_RNG_EXCPN:
+                sprintf_s(bufp, sizeof(buf), " %s_ArgRng", name);
+                break;
+            default:
+                unreached();
+        }
+    }
+    else if (tree->gtOverflowEx())
+    {
+        sprintf_s(bufp, sizeof(buf), " %s_ovfl%c", name, 0);
+    }
+    else if (tree->OperIsBlk() && (tree->AsBlk()->gtBlkSize != 0))
+    {
+        sprintf_s(bufp, sizeof(buf), " %s(%d)", name, tree->AsBlk()->gtBlkSize);
+    }
+    else
+    {
+        sprintf_s(bufp, sizeof(buf), " %s%c", name, 0);
+    }
+
+    if (strlen(buf) < 10)
+    {
+        printf(" %-10s", buf);
+    }
+    else
+    {
+        printf(" %s", buf);
+    }
+}
+
+void Compiler::gtDispVN(GenTree* tree)
+{
+    if (tree->gtVNPair.GetLiberal() != ValueNumStore::NoVN)
+    {
+        assert(tree->gtVNPair.GetConservative() != ValueNumStore::NoVN);
+        printf(" ");
+        vnpPrint(tree->gtVNPair, 0);
+    }
+}
+
+//------------------------------------------------------------------------
+// gtDispNode: Print a tree to jitstdout.
+//
+// Arguments:
+//    tree - the tree to be printed
+//    indentStack - the specification for the current level of indentation & arcs
+//    msg         - a contextual method (i.e. from the parent) to print
+//
+// Return Value:
+//    None.
+//
+// Notes:
+//    'indentStack' may be null, in which case no indentation or arcs are printed
+//    'msg' may be null
+
+void Compiler::gtDispNode(GenTreePtr tree, IndentStack* indentStack, __in __in_z __in_opt const char* msg, bool isLIR)
+{
+    bool printPointer = true; // always true..
+    bool printFlags   = true; // always true..
+    bool printCost    = true; // always true..
+
+    int msgLength = 25;
+
+    GenTree* prev;
+
+    if (tree->gtSeqNum)
+    {
+        printf("N%03u ", tree->gtSeqNum);
+        if (tree->gtCostsInitialized)
+        {
+            printf("(%3u,%3u) ", tree->gtCostEx, tree->gtCostSz);
+        }
+        else
+        {
+            printf("(???"
+                   ",???"
+                   ") "); // This probably indicates a bug: the node has a sequence number, but not costs.
+        }
+    }
+    else
+    {
+        if (tree->gtOper == GT_STMT)
+        {
+            prev = tree->gtStmt.gtStmtExpr;
+        }
+        else
+        {
+            prev = tree;
+        }
+
+        bool     hasSeqNum = true;
+        unsigned dotNum    = 0;
+        do
+        {
+            dotNum++;
+            prev = prev->gtPrev;
+
+            if ((prev == nullptr) || (prev == tree))
+            {
+                hasSeqNum = false;
+                break;
+            }
+
+            assert(prev);
+        } while (prev->gtSeqNum == 0);
+
+        // If we have an indent stack, don't add additional characters,
+        // as it will mess up the alignment.
+        bool displayDotNum = tree->gtOper != GT_STMT && hasSeqNum && (indentStack == nullptr);
+        if (displayDotNum)
+        {
+            printf("N%03u.%02u ", prev->gtSeqNum, dotNum);
+        }
+        else
+        {
+            printf("     ");
+        }
+
+        if (tree->gtCostsInitialized)
+        {
+            printf("(%3u,%3u) ", tree->gtCostEx, tree->gtCostSz);
+        }
+        else
+        {
+            if (displayDotNum)
+            {
+                // Do better alignment in this case
+                printf("       ");
+            }
+            else
+            {
+                printf("          ");
+            }
+        }
+    }
+
+    if (optValnumCSE_phase)
+    {
+        if (IS_CSE_INDEX(tree->gtCSEnum))
+        {
+            printf("CSE #%02d (%s)", GET_CSE_INDEX(tree->gtCSEnum), (IS_CSE_USE(tree->gtCSEnum) ? "use" : "def"));
+        }
+        else
+        {
+            printf("             ");
+        }
+    }
+
+    /* Print the node ID */
+    printTreeID(tree);
+    printf(" ");
+
+    if (tree->gtOper >= GT_COUNT)
+    {
+        printf(" **** ILLEGAL NODE ****");
+        return;
+    }
+
+    if (printFlags)
+    {
+        /* First print the flags associated with the node */
+        switch (tree->gtOper)
+        {
+            case GT_LEA:
+            case GT_BLK:
+            case GT_OBJ:
+            case GT_DYN_BLK:
+            case GT_STORE_BLK:
+            case GT_STORE_OBJ:
+            case GT_STORE_DYN_BLK:
+
+            case GT_IND:
+                // We prefer printing R, V or U
+                if ((tree->gtFlags & (GTF_IND_REFARR_LAYOUT | GTF_IND_VOLATILE | GTF_IND_UNALIGNED)) == 0)
+                {
+                    if (tree->gtFlags & GTF_IND_TGTANYWHERE)
+                    {
+                        printf("*");
+                        --msgLength;
+                        break;
+                    }
+                    if (tree->gtFlags & GTF_IND_INVARIANT)
+                    {
+                        printf("#");
+                        --msgLength;
+                        break;
+                    }
+                    if (tree->gtFlags & GTF_IND_ARR_INDEX)
+                    {
+                        printf("a");
+                        --msgLength;
+                        break;
+                    }
+                }
+                __fallthrough;
+
+            case GT_INDEX:
+
+                if ((tree->gtFlags & (GTF_IND_VOLATILE | GTF_IND_UNALIGNED)) == 0) // We prefer printing V or U over R
+                {
+                    if (tree->gtFlags & GTF_IND_REFARR_LAYOUT)
+                    {
+                        printf("R");
+                        --msgLength;
+                        break;
+                    } // R means RefArray
+                }
+                __fallthrough;
+
+            case GT_FIELD:
+            case GT_CLS_VAR:
+                if (tree->gtFlags & GTF_IND_VOLATILE)
+                {
+                    printf("V");
+                    --msgLength;
+                    break;
+                }
+                if (tree->gtFlags & GTF_IND_UNALIGNED)
+                {
+                    printf("U");
+                    --msgLength;
+                    break;
+                }
+                goto DASH;
+
+            case GT_ASG:
+                if (tree->OperIsInitBlkOp())
+                {
+                    printf("I");
+                    --msgLength;
+                    break;
+                }
+                goto DASH;
+
+            case GT_CALL:
+                if (tree->gtFlags & GTF_CALL_INLINE_CANDIDATE)
+                {
+                    printf("I");
+                    --msgLength;
+                    break;
+                }
+                if (tree->gtCall.gtCallMoreFlags & GTF_CALL_M_RETBUFFARG)
+                {
+                    printf("S");
+                    --msgLength;
+                    break;
+                }
+                if (tree->gtFlags & GTF_CALL_HOISTABLE)
+                {
+                    printf("H");
+                    --msgLength;
+                    break;
+                }
+
+                goto DASH;
+
+            case GT_MUL:
+                if (tree->gtFlags & GTF_MUL_64RSLT)
+                {
+                    printf("L");
+                    --msgLength;
+                    break;
+                }
+                goto DASH;
+
+            case GT_ADDR:
+                if (tree->gtFlags & GTF_ADDR_ONSTACK)
+                {
+                    printf("L");
+                    --msgLength;
+                    break;
+                } // L means LclVar
+                goto DASH;
+
+            case GT_LCL_FLD:
+            case GT_LCL_VAR:
+            case GT_LCL_VAR_ADDR:
+            case GT_LCL_FLD_ADDR:
+            case GT_STORE_LCL_FLD:
+            case GT_STORE_LCL_VAR:
+            case GT_REG_VAR:
+                if (tree->gtFlags & GTF_VAR_USEASG)
+                {
+                    printf("U");
+                    --msgLength;
+                    break;
+                }
+                if (tree->gtFlags & GTF_VAR_USEDEF)
+                {
+                    printf("B");
+                    --msgLength;
+                    break;
+                }
+                if (tree->gtFlags & GTF_VAR_DEF)
+                {
+                    printf("D");
+                    --msgLength;
+                    break;
+                }
+                if (tree->gtFlags & GTF_VAR_CAST)
+                {
+                    printf("C");
+                    --msgLength;
+                    break;
+                }
+                if (tree->gtFlags & GTF_VAR_ARR_INDEX)
+                {
+                    printf("i");
+                    --msgLength;
+                    break;
+                }
+                goto DASH;
+
+            case GT_EQ:
+            case GT_NE:
+            case GT_LT:
+            case GT_LE:
+            case GT_GE:
+            case GT_GT:
+                if (tree->gtFlags & GTF_RELOP_NAN_UN)
+                {
+                    printf("N");
+                    --msgLength;
+                    break;
+                }
+                if (tree->gtFlags & GTF_RELOP_JMP_USED)
+                {
+                    printf("J");
+                    --msgLength;
+                    break;
+                }
+                if (tree->gtFlags & GTF_RELOP_QMARK)
+                {
+                    printf("Q");
+                    --msgLength;
+                    break;
+                }
+                if (tree->gtFlags & GTF_RELOP_SMALL)
+                {
+                    printf("S");
+                    --msgLength;
+                    break;
+                }
+                goto DASH;
+
+            default:
+            DASH:
+                printf("-");
+                --msgLength;
+                break;
+        }
+
+        /* Then print the general purpose flags */
+        unsigned flags = tree->gtFlags;
+
+        if (tree->OperIsBinary())
+        {
+            genTreeOps oper = tree->OperGet();
+
+            // Check for GTF_ADDRMODE_NO_CSE flag on add/mul/shl Binary Operators
+            if ((oper == GT_ADD) || (oper == GT_MUL) || (oper == GT_LSH))
+            {
+                if ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0)
+                {
+                    flags |= GTF_DONT_CSE; // Force the GTF_ADDRMODE_NO_CSE flag to print out like GTF_DONT_CSE
+                }
+            }
+        }
+        else // !tree->OperIsBinary()
+        {
+            // the GTF_REVERSE flag only applies to binary operations
+            flags &= ~GTF_REVERSE_OPS; // we use this value for GTF_VAR_ARR_INDEX above
+        }
+
+        msgLength -= GenTree::gtDispFlags(flags, tree->gtDebugFlags);
+/*
+    printf("%c", (flags & GTF_ASG           ) ? 'A' : '-');
+    printf("%c", (flags & GTF_CALL          ) ? 'C' : '-');
+    printf("%c", (flags & GTF_EXCEPT        ) ? 'X' : '-');
+    printf("%c", (flags & GTF_GLOB_REF      ) ? 'G' : '-');
+    printf("%c", (flags & GTF_ORDER_SIDEEFF ) ? 'O' : '-');
+    printf("%c", (flags & GTF_COLON_COND    ) ? '?' : '-');
+    printf("%c", (flags & GTF_DONT_CSE      ) ? 'N' :        // N is for No cse
+                 (flags & GTF_MAKE_CSE      ) ? 'H' : '-');  // H is for Hoist this expr
+    printf("%c", (flags & GTF_REVERSE_OPS   ) ? 'R' : '-');
+    printf("%c", (flags & GTF_UNSIGNED      ) ? 'U' :
+                 (flags & GTF_BOOLEAN       ) ? 'B' : '-');
+    printf("%c", (flags & GTF_SET_FLAGS     ) ? 'S' : '-');
+    printf("%c", (flags & GTF_SPILLED       ) ? 'z' : '-');
+    printf("%c", (flags & GTF_SPILL         ) ? 'Z' : '-');
+*/
+
+#if FEATURE_STACK_FP_X87
+        BYTE fpLvl = (BYTE)tree->gtFPlvl;
+        if (IsUninitialized(fpLvl) || fpLvl == 0x00)
+        {
+            printf("-");
+        }
+        else
+        {
+            printf("%1u", tree->gtFPlvl);
+        }
+#endif // FEATURE_STACK_FP_X87
+    }
+
+    // If we're printing a node for LIR, we use the space normally associated with the message
+    // to display the node's temp name (if any)
+    const bool hasOperands = tree->OperandsBegin() != tree->OperandsEnd();
+    if (isLIR)
+    {
+        assert(msg == nullptr);
+
+        // If the tree does not have any operands, we do not display the indent stack. This gives us
+        // two additional characters for alignment.
+        if (!hasOperands)
+        {
+            msgLength += 1;
+        }
+
+        if (tree->IsValue())
+        {
+            const size_t bufLength = msgLength - 1;
+            msg                    = reinterpret_cast<char*>(alloca(bufLength * sizeof(char)));
+            sprintf_s(const_cast<char*>(msg), bufLength, "t%d = %s", tree->gtTreeID, hasOperands ? "" : " ");
+        }
+    }
+
+    /* print the msg associated with the node */
+
+    if (msg == nullptr)
+    {
+        msg = "";
+    }
+    if (msgLength < 0)
+    {
+        msgLength = 0;
+    }
+
+    printf(isLIR ? " %+*s" : " %-*s", msgLength, msg);
+
+    /* Indent the node accordingly */
+    if (!isLIR || hasOperands)
+    {
+        printIndent(indentStack);
+    }
+
+    gtDispNodeName(tree);
+
+    assert(tree == nullptr || tree->gtOper < GT_COUNT);
+
+    if (tree)
+    {
+        /* print the type of the node */
+        if (tree->gtOper != GT_CAST)
+        {
+            printf(" %-6s", varTypeName(tree->TypeGet()));
+            if (tree->gtOper == GT_LCL_VAR || tree->gtOper == GT_STORE_LCL_VAR)
+            {
+                LclVarDsc* varDsc = &lvaTable[tree->gtLclVarCommon.gtLclNum];
+                if (varDsc->lvAddrExposed)
+                {
+                    printf("(AX)"); // Variable has address exposed.
+                }
+
+                if (varDsc->lvUnusedStruct)
+                {
+                    assert(varDsc->lvPromoted);
+                    printf("(U)"); // Unused struct
+                }
+                else if (varDsc->lvPromoted)
+                {
+                    assert(varTypeIsPromotable(varDsc));
+                    printf("(P)"); // Promoted struct
+                }
+            }
+
+            if (tree->gtOper == GT_STMT)
+            {
+                if (opts.compDbgInfo)
+                {
+                    IL_OFFSET endIL = tree->gtStmt.gtStmtLastILoffs;
+
+                    printf("(IL ");
+                    if (tree->gtStmt.gtStmtILoffsx == BAD_IL_OFFSET)
+                    {
+                        printf("  ???");
+                    }
+                    else
+                    {
+                        printf("0x%03X", jitGetILoffs(tree->gtStmt.gtStmtILoffsx));
+                    }
+                    printf("...");
+                    if (endIL == BAD_IL_OFFSET)
+                    {
+                        printf("  ???");
+                    }
+                    else
+                    {
+                        printf("0x%03X", endIL);
+                    }
+                    printf(")");
+                }
+            }
+
+            if (tree->IsArgPlaceHolderNode() && (tree->gtArgPlace.gtArgPlaceClsHnd != nullptr))
+            {
+                printf(" => [clsHnd=%08X]", dspPtr(tree->gtArgPlace.gtArgPlaceClsHnd));
+            }
+        }
+
+        // for tracking down problems in reguse prediction or liveness tracking
+
+        if (verbose && 0)
+        {
+            printf(" RR=");
+            dspRegMask(tree->gtRsvdRegs);
+#ifdef LEGACY_BACKEND
+            printf(",UR=");
+            dspRegMask(tree->gtUsedRegs);
+#endif // LEGACY_BACKEND
+            printf("\n");
+        }
+    }
+}
+
+void Compiler::gtDispRegVal(GenTree* tree)
+{
+    switch (tree->GetRegTag())
+    {
+        // Don't display NOREG; the absence of this tag will imply this state
+        // case GenTree::GT_REGTAG_NONE:       printf(" NOREG");   break;
+
+        case GenTree::GT_REGTAG_REG:
+            printf(" REG %s", compRegVarName(tree->gtRegNum));
+            break;
+
+#if CPU_LONG_USES_REGPAIR
+        case GenTree::GT_REGTAG_REGPAIR:
+            printf(" PAIR %s", compRegPairName(tree->gtRegPair));
+            break;
+#endif
+
+        default:
+            break;
+    }
+
+    if (tree->IsMultiRegCall())
+    {
+        // 0th reg is gtRegNum, which is already printed above.
+        // Print the remaining regs of a multi-reg call node.
+        GenTreeCall* call     = tree->AsCall();
+        unsigned     regCount = call->GetReturnTypeDesc()->GetReturnRegCount();
+        for (unsigned i = 1; i < regCount; ++i)
+        {
+            printf(",%s", compRegVarName(call->GetRegNumByIdx(i)));
+        }
+    }
+    else if (tree->IsCopyOrReloadOfMultiRegCall())
+    {
+        GenTreeCopyOrReload* copyOrReload = tree->AsCopyOrReload();
+        GenTreeCall*         call         = tree->gtGetOp1()->AsCall();
+        unsigned             regCount     = call->GetReturnTypeDesc()->GetReturnRegCount();
+        for (unsigned i = 1; i < regCount; ++i)
+        {
+            printf(",%s", compRegVarName(copyOrReload->GetRegNumByIdx(i)));
+        }
+    }
+
+    if (tree->gtFlags & GTF_REG_VAL)
+    {
+        printf(" RV");
+    }
+}
+
+// We usually/commonly don't expect to print anything longer than this string,
+#define LONGEST_COMMON_LCL_VAR_DISPLAY "V99 PInvokeFrame"
+#define LONGEST_COMMON_LCL_VAR_DISPLAY_LENGTH (sizeof(LONGEST_COMMON_LCL_VAR_DISPLAY))
+#define BUF_SIZE (LONGEST_COMMON_LCL_VAR_DISPLAY_LENGTH * 2)
+
+void Compiler::gtGetLclVarNameInfo(unsigned lclNum, const char** ilKindOut, const char** ilNameOut, unsigned* ilNumOut)
+{
+    const char* ilKind = nullptr;
+    const char* ilName = nullptr;
+
+    unsigned ilNum = compMap2ILvarNum(lclNum);
+
+    if (ilNum == (unsigned)ICorDebugInfo::RETBUF_ILNUM)
+    {
+        ilName = "RetBuf";
+    }
+    else if (ilNum == (unsigned)ICorDebugInfo::VARARGS_HND_ILNUM)
+    {
+        ilName = "VarArgHandle";
+    }
+    else if (ilNum == (unsigned)ICorDebugInfo::TYPECTXT_ILNUM)
+    {
+        ilName = "TypeCtx";
+    }
+    else if (ilNum == (unsigned)ICorDebugInfo::UNKNOWN_ILNUM)
+    {
+#if FEATURE_ANYCSE
+        if (lclNumIsTrueCSE(lclNum))
+        {
+            ilKind = "cse";
+            ilNum  = lclNum - optCSEstart;
+        }
+        else if (lclNum >= optCSEstart)
+        {
+            // Currently any new LclVar's introduced after the CSE phase
+            // are believed to be created by the "rationalizer" that is what is meant by the "rat" prefix.
+            ilKind = "rat";
+            ilNum  = lclNum - (optCSEstart + optCSEcount);
+        }
+        else
+#endif // FEATURE_ANYCSE
+        {
+            if (lclNum == info.compLvFrameListRoot)
+            {
+                ilName = "FramesRoot";
+            }
+            else if (lclNum == lvaInlinedPInvokeFrameVar)
+            {
+                ilName = "PInvokeFrame";
+            }
+            else if (lclNum == lvaGSSecurityCookie)
+            {
+                ilName = "GsCookie";
+            }
+#if FEATURE_FIXED_OUT_ARGS
+            else if (lclNum == lvaPInvokeFrameRegSaveVar)
+            {
+                ilName = "PInvokeFrameRegSave";
+            }
+            else if (lclNum == lvaOutgoingArgSpaceVar)
+            {
+                ilName = "OutArgs";
+            }
+#endif // FEATURE_FIXED_OUT_ARGS
+#ifdef _TARGET_ARM_
+            else if (lclNum == lvaPromotedStructAssemblyScratchVar)
+            {
+                ilName = "PromotedStructScratch";
+            }
+#endif // _TARGET_ARM_
+#if !FEATURE_EH_FUNCLETS
+            else if (lclNum == lvaShadowSPslotsVar)
+            {
+                ilName = "EHSlots";
+            }
+#endif // !FEATURE_EH_FUNCLETS
+            else if (lclNum == lvaLocAllocSPvar)
+            {
+                ilName = "LocAllocSP";
+            }
+#if FEATURE_EH_FUNCLETS
+            else if (lclNum == lvaPSPSym)
+            {
+                ilName = "PSPSym";
+            }
+#endif // FEATURE_EH_FUNCLETS
+            else
+            {
+                ilKind = "tmp";
+                if (compIsForInlining())
+                {
+                    ilNum = lclNum - impInlineInfo->InlinerCompiler->info.compLocalsCount;
+                }
+                else
+                {
+                    ilNum = lclNum - info.compLocalsCount;
+                }
+            }
+        }
+    }
+    else if (lclNum < (compIsForInlining() ? impInlineInfo->InlinerCompiler->info.compArgsCount : info.compArgsCount))
+    {
+        if (ilNum == 0 && !info.compIsStatic)
+        {
+            ilName = "this";
+        }
+        else
+        {
+            ilKind = "arg";
+        }
+    }
+    else
+    {
+        if (!lvaTable[lclNum].lvIsStructField)
+        {
+            ilKind = "loc";
+        }
+        if (compIsForInlining())
+        {
+            ilNum -= impInlineInfo->InlinerCompiler->info.compILargsCount;
+        }
+        else
+        {
+            ilNum -= info.compILargsCount;
+        }
+    }
+
+    *ilKindOut = ilKind;
+    *ilNameOut = ilName;
+    *ilNumOut  = ilNum;
+}
+
+/*****************************************************************************/
+int Compiler::gtGetLclVarName(unsigned lclNum, char* buf, unsigned buf_remaining)
+{
+    char*    bufp_next    = buf;
+    unsigned charsPrinted = 0;
+    int      sprintf_result;
+
+    sprintf_result = sprintf_s(bufp_next, buf_remaining, "V%02u", lclNum);
+
+    if (sprintf_result < 0)
+    {
+        return sprintf_result;
+    }
+
+    charsPrinted += sprintf_result;
+    bufp_next += sprintf_result;
+    buf_remaining -= sprintf_result;
+
+    const char* ilKind = nullptr;
+    const char* ilName = nullptr;
+    unsigned    ilNum  = 0;
+
+    Compiler::gtGetLclVarNameInfo(lclNum, &ilKind, &ilName, &ilNum);
+
+    if (ilName != nullptr)
+    {
+        sprintf_result = sprintf_s(bufp_next, buf_remaining, " %s", ilName);
+        if (sprintf_result < 0)
+        {
+            return sprintf_result;
+        }
+        charsPrinted += sprintf_result;
+        bufp_next += sprintf_result;
+        buf_remaining -= sprintf_result;
+    }
+    else if (ilKind != nullptr)
+    {
+        sprintf_result = sprintf_s(bufp_next, buf_remaining, " %s%d", ilKind, ilNum);
+        if (sprintf_result < 0)
+        {
+            return sprintf_result;
+        }
+        charsPrinted += sprintf_result;
+        bufp_next += sprintf_result;
+        buf_remaining -= sprintf_result;
+    }
+
+    assert(charsPrinted > 0);
+    assert(buf_remaining > 0);
+
+    return (int)charsPrinted;
+}
+
+/*****************************************************************************
+ * Get the local var name, and create a copy of the string that can be used in debug output.
+ */
+char* Compiler::gtGetLclVarName(unsigned lclNum)
+{
+    char buf[BUF_SIZE];
+    int  charsPrinted = gtGetLclVarName(lclNum, buf, sizeof(buf) / sizeof(buf[0]));
+    if (charsPrinted < 0)
+    {
+        return nullptr;
+    }
+
+    char* retBuf = new (this, CMK_DebugOnly) char[charsPrinted + 1];
+    strcpy_s(retBuf, charsPrinted + 1, buf);
+    return retBuf;
+}
+
+/*****************************************************************************/
+void Compiler::gtDispLclVar(unsigned lclNum, bool padForBiggestDisp)
+{
+    char buf[BUF_SIZE];
+    int  charsPrinted = gtGetLclVarName(lclNum, buf, sizeof(buf) / sizeof(buf[0]));
+
+    if (charsPrinted < 0)
+    {
+        return;
+    }
+
+    printf("%s", buf);
+
+    if (padForBiggestDisp && (charsPrinted < LONGEST_COMMON_LCL_VAR_DISPLAY_LENGTH))
+    {
+        printf("%*c", LONGEST_COMMON_LCL_VAR_DISPLAY_LENGTH - charsPrinted, ' ');
+    }
+}
+
+/*****************************************************************************/
+void Compiler::gtDispConst(GenTree* tree)
+{
+    assert(tree->OperKind() & GTK_CONST);
+
+    switch (tree->gtOper)
+    {
+        case GT_CNS_INT:
+            if (tree->IsIconHandle(GTF_ICON_STR_HDL))
+            {
+                printf(" 0x%X \"%S\"", dspPtr(tree->gtIntCon.gtIconVal), eeGetCPString(tree->gtIntCon.gtIconVal));
+            }
+            else
+            {
+                ssize_t dspIconVal = tree->IsIconHandle() ? dspPtr(tree->gtIntCon.gtIconVal) : tree->gtIntCon.gtIconVal;
+
+                if (tree->TypeGet() == TYP_REF)
+                {
+                    assert(tree->gtIntCon.gtIconVal == 0);
+                    printf(" null");
+                }
+                else if ((tree->gtIntCon.gtIconVal > -1000) && (tree->gtIntCon.gtIconVal < 1000))
+                {
+                    printf(" %ld", dspIconVal);
+#ifdef _TARGET_64BIT_
+                }
+                else if ((tree->gtIntCon.gtIconVal & 0xFFFFFFFF00000000LL) != 0)
+                {
+                    printf(" 0x%llx", dspIconVal);
+#endif
+                }
+                else
+                {
+                    printf(" 0x%X", dspIconVal);
+                }
+
+                if (tree->IsIconHandle())
+                {
+                    switch (tree->GetIconHandleFlag())
+                    {
+                        case GTF_ICON_SCOPE_HDL:
+                            printf(" scope");
+                            break;
+                        case GTF_ICON_CLASS_HDL:
+                            printf(" class");
+                            break;
+                        case GTF_ICON_METHOD_HDL:
+                            printf(" method");
+                            break;
+                        case GTF_ICON_FIELD_HDL:
+                            printf(" field");
+                            break;
+                        case GTF_ICON_STATIC_HDL:
+                            printf(" static");
+                            break;
+                        case GTF_ICON_STR_HDL:
+                            unreached(); // This case is handled above
+                            break;
+                        case GTF_ICON_PSTR_HDL:
+                            printf(" pstr");
+                            break;
+                        case GTF_ICON_PTR_HDL:
+                            printf(" ptr");
+                            break;
+                        case GTF_ICON_VARG_HDL:
+                            printf(" vararg");
+                            break;
+                        case GTF_ICON_PINVKI_HDL:
+                            printf(" pinvoke");
+                            break;
+                        case GTF_ICON_TOKEN_HDL:
+                            printf(" token");
+                            break;
+                        case GTF_ICON_TLS_HDL:
+                            printf(" tls");
+                            break;
+                        case GTF_ICON_FTN_ADDR:
+                            printf(" ftn");
+                            break;
+                        case GTF_ICON_CIDMID_HDL:
+                            printf(" cid");
+                            break;
+                        case GTF_ICON_BBC_PTR:
+                            printf(" bbc");
+                            break;
+                        default:
+                            printf(" UNKNOWN");
+                            break;
+                    }
+                }
+
+                if ((tree->gtFlags & GTF_ICON_FIELD_OFF) != 0)
+                {
+                    printf(" field offset");
+                }
+
+                if ((tree->IsReuseRegVal()) != 0)
+                {
+                    printf(" reuse reg val");
+                }
+            }
+
+            gtDispFieldSeq(tree->gtIntCon.gtFieldSeq);
+
+            break;
+
+        case GT_CNS_LNG:
+            printf(" 0x%016I64x", tree->gtLngCon.gtLconVal);
+            break;
+
+        case GT_CNS_DBL:
+            if (*((__int64*)&tree->gtDblCon.gtDconVal) == (__int64)I64(0x8000000000000000))
+            {
+                printf(" -0.00000");
+            }
+            else
+            {
+                printf(" %#.17g", tree->gtDblCon.gtDconVal);
+            }
+            break;
+        case GT_CNS_STR:
+            printf("<string constant>");
+            break;
+        default:
+            assert(!"unexpected constant node");
+    }
+
+    gtDispRegVal(tree);
+}
+
+void Compiler::gtDispFieldSeq(FieldSeqNode* pfsn)
+{
+    if (pfsn == FieldSeqStore::NotAField() || (pfsn == nullptr))
+    {
+        return;
+    }
+
+    // Otherwise...
+    printf(" Fseq[");
+    while (pfsn != nullptr)
+    {
+        assert(pfsn != FieldSeqStore::NotAField()); // Can't exist in a field sequence list except alone
+        CORINFO_FIELD_HANDLE fldHnd = pfsn->m_fieldHnd;
+        // First check the "pseudo" field handles...
+        if (fldHnd == FieldSeqStore::FirstElemPseudoField)
+        {
+            printf("#FirstElem");
+        }
+        else if (fldHnd == FieldSeqStore::ConstantIndexPseudoField)
+        {
+            printf("#ConstantIndex");
+        }
+        else
+        {
+            printf("%s", eeGetFieldName(fldHnd));
+        }
+        pfsn = pfsn->m_next;
+        if (pfsn != nullptr)
+        {
+            printf(", ");
+        }
+    }
+    printf("]");
+}
+
+//------------------------------------------------------------------------
+// gtDispLeaf: Print a single leaf node to jitstdout.
+//
+// Arguments:
+//    tree - the tree to be printed
+//    indentStack - the specification for the current level of indentation & arcs
+//
+// Return Value:
+//    None.
+//
+// Notes:
+//    'indentStack' may be null, in which case no indentation or arcs are printed
+
+void Compiler::gtDispLeaf(GenTree* tree, IndentStack* indentStack)
+{
+    if (tree->OperKind() & GTK_CONST)
+    {
+        gtDispConst(tree);
+        return;
+    }
+
+    bool isLclFld = false;
+
+    switch (tree->gtOper)
+    {
+        unsigned   varNum;
+        LclVarDsc* varDsc;
+
+        case GT_LCL_FLD:
+        case GT_LCL_FLD_ADDR:
+        case GT_STORE_LCL_FLD:
+            isLclFld = true;
+            __fallthrough;
+
+        case GT_PHI_ARG:
+        case GT_LCL_VAR:
+        case GT_LCL_VAR_ADDR:
+        case GT_STORE_LCL_VAR:
+            printf(" ");
+            varNum = tree->gtLclVarCommon.gtLclNum;
+            varDsc = &lvaTable[varNum];
+            gtDispLclVar(varNum);
+            if (tree->gtLclVarCommon.HasSsaName())
+            {
+                if (tree->gtFlags & GTF_VAR_USEASG)
+                {
+                    assert(tree->gtFlags & GTF_VAR_DEF);
+                    printf("ud:%d->%d", tree->gtLclVarCommon.gtSsaNum, GetSsaNumForLocalVarDef(tree));
+                }
+                else
+                {
+                    printf("%s:%d", (tree->gtFlags & GTF_VAR_DEF) ? "d" : "u", tree->gtLclVarCommon.gtSsaNum);
+                }
+            }
+
+            if (isLclFld)
+            {
+                printf("[+%u]", tree->gtLclFld.gtLclOffs);
+                gtDispFieldSeq(tree->gtLclFld.gtFieldSeq);
+            }
+
+            if (varDsc->lvRegister)
+            {
+                printf(" ");
+                varDsc->PrintVarReg();
+            }
+#ifndef LEGACY_BACKEND
+            else if (tree->InReg())
+            {
+#if CPU_LONG_USES_REGPAIR
+                if (isRegPairType(tree->TypeGet()))
+                    printf(" %s", compRegPairName(tree->gtRegPair));
+                else
+#endif
+                    printf(" %s", compRegVarName(tree->gtRegNum));
+            }
+#endif // !LEGACY_BACKEND
+
+            if (varDsc->lvPromoted)
+            {
+                assert(varTypeIsPromotable(varDsc) || varDsc->lvUnusedStruct);
+
+                CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
+                CORINFO_FIELD_HANDLE fldHnd;
+
+                for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
+                {
+                    LclVarDsc*  fieldVarDsc = &lvaTable[i];
+                    const char* fieldName;
+#if !defined(_TARGET_64BIT_)
+                    if (varTypeIsLong(varDsc))
+                    {
+                        fieldName = (i == 0) ? "lo" : "hi";
+                    }
+                    else
+#endif // !defined(_TARGET_64BIT_)
+                    {
+                        fldHnd    = info.compCompHnd->getFieldInClass(typeHnd, fieldVarDsc->lvFldOrdinal);
+                        fieldName = eeGetFieldName(fldHnd);
+                    }
+
+                    printf("\n");
+                    printf("                                                  ");
+                    printIndent(indentStack);
+                    printf("    %-6s V%02u.%s (offs=0x%02x) -> ", varTypeName(fieldVarDsc->TypeGet()),
+                           tree->gtLclVarCommon.gtLclNum, fieldName, fieldVarDsc->lvFldOffset);
+                    gtDispLclVar(i);
+
+                    if (fieldVarDsc->lvRegister)
+                    {
+                        printf(" ");
+                        fieldVarDsc->PrintVarReg();
+                    }
+
+                    if (fieldVarDsc->lvTracked && fgLocalVarLivenessDone && // Includes local variable liveness
+                        ((tree->gtFlags & GTF_VAR_DEATH) != 0))
+                    {
+                        printf(" (last use)");
+                    }
+                }
+            }
+            else // a normal not-promoted lclvar
+            {
+                if (varDsc->lvTracked && fgLocalVarLivenessDone && ((tree->gtFlags & GTF_VAR_DEATH) != 0))
+                {
+                    printf(" (last use)");
+                }
+            }
+            break;
+
+        case GT_REG_VAR:
+            printf(" ");
+            gtDispLclVar(tree->gtRegVar.gtLclNum);
+            if (isFloatRegType(tree->gtType))
+            {
+                assert(tree->gtRegVar.gtRegNum == tree->gtRegNum);
+                printf(" FPV%u", tree->gtRegNum);
+            }
+            else
+            {
+                printf(" %s", compRegVarName(tree->gtRegVar.gtRegNum));
+            }
+
+            varNum = tree->gtRegVar.gtLclNum;
+            varDsc = &lvaTable[varNum];
+
+            if (varDsc->lvTracked && fgLocalVarLivenessDone && ((tree->gtFlags & GTF_VAR_DEATH) != 0))
+            {
+                printf(" (last use)");
+            }
+
+            break;
+
+        case GT_JMP:
+        {
+            const char* methodName;
+            const char* className;
+
+            methodName = eeGetMethodName((CORINFO_METHOD_HANDLE)tree->gtVal.gtVal1, &className);
+            printf(" %s.%s\n", className, methodName);
+        }
+        break;
+
+        case GT_CLS_VAR:
+            printf(" Hnd=%#x", dspPtr(tree->gtClsVar.gtClsVarHnd));
+            gtDispFieldSeq(tree->gtClsVar.gtFieldSeq);
+            break;
+
+        case GT_CLS_VAR_ADDR:
+            printf(" Hnd=%#x", dspPtr(tree->gtClsVar.gtClsVarHnd));
+            break;
+
+        case GT_LABEL:
+            if (tree->gtLabel.gtLabBB)
+            {
+                printf(" dst=BB%02u", tree->gtLabel.gtLabBB->bbNum);
+            }
+            else
+            {
+                printf(" dst=<null>");
+            }
+
+            break;
+
+        case GT_FTN_ADDR:
+        {
+            const char* methodName;
+            const char* className;
+
+            methodName = eeGetMethodName((CORINFO_METHOD_HANDLE)tree->gtFptrVal.gtFptrMethod, &className);
+            printf(" %s.%s\n", className, methodName);
+        }
+        break;
+
+#if !FEATURE_EH_FUNCLETS
+        case GT_END_LFIN:
+            printf(" endNstLvl=%d", tree->gtVal.gtVal1);
+            break;
+#endif // !FEATURE_EH_FUNCLETS
+
+        // Vanilla leaves. No qualifying information available. So do nothing
+
+        case GT_NO_OP:
+        case GT_START_NONGC:
+        case GT_PROF_HOOK:
+        case GT_CATCH_ARG:
+        case GT_MEMORYBARRIER:
+        case GT_ARGPLACE:
+        case GT_PINVOKE_PROLOG:
+#ifndef LEGACY_BACKEND
+        case GT_JMPTABLE:
+#endif // !LEGACY_BACKEND
+            break;
+
+        case GT_RET_EXPR:
+            printf("(inl return from call ");
+            printTreeID(tree->gtRetExpr.gtInlineCandidate);
+            printf(")");
+            break;
+
+        case GT_PHYSREG:
+            printf(" %s", getRegName(tree->gtPhysReg.gtSrcReg, varTypeIsFloating(tree)));
+            break;
+
+        case GT_IL_OFFSET:
+            printf(" IL offset: ");
+            if (tree->gtStmt.gtStmtILoffsx == BAD_IL_OFFSET)
+            {
+                printf("???");
+            }
+            else
+            {
+                printf("%d", jitGetILoffs(tree->gtStmt.gtStmtILoffsx));
+            }
+            break;
+
+        default:
+            assert(!"don't know how to display tree leaf node");
+    }
+
+    gtDispRegVal(tree);
+}
+
+//------------------------------------------------------------------------
+// gtDispLeaf: Print a child node to jitstdout.
+//
+// Arguments:
+//    tree - the tree to be printed
+//    indentStack - the specification for the current level of indentation & arcs
+//    arcType     - the type of arc to use for this child
+//    msg         - a contextual method (i.e. from the parent) to print
+//    topOnly     - a boolean indicating whether to print the children, or just the top node
+//
+// Return Value:
+//    None.
+//
+// Notes:
+//    'indentStack' may be null, in which case no indentation or arcs are printed
+//    'msg' has a default value of null
+//    'topOnly' is an optional argument that defaults to false
+
+void Compiler::gtDispChild(GenTreePtr           child,
+                           IndentStack*         indentStack,
+                           IndentInfo           arcType,
+                           __in_opt const char* msg,     /* = nullptr  */
+                           bool                 topOnly) /* = false */
+{
+    IndentInfo info;
+    indentStack->Push(arcType);
+    gtDispTree(child, indentStack, msg, topOnly);
+    indentStack->Pop();
+}
+
+#ifdef FEATURE_SIMD
+// Intrinsic Id to name map
+extern const char* const simdIntrinsicNames[] = {
+#define SIMD_INTRINSIC(mname, inst, id, name, r, ac, arg1, arg2, arg3, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) name,
+#include "simdintrinsiclist.h"
+};
+#endif // FEATURE_SIMD
+
+/*****************************************************************************/
+
+void Compiler::gtDispTree(GenTreePtr   tree,
+                          IndentStack* indentStack,                 /* = nullptr */
+                          __in __in_z __in_opt const char* msg,     /* = nullptr  */
+                          bool                             topOnly, /* = false */
+                          bool                             isLIR)   /* = false */
+{
+    if (tree == nullptr)
+    {
+        printf(" [%08X] <NULL>\n", tree);
+        printf(""); // null string means flush
+        return;
+    }
+
+    if (indentStack == nullptr)
+    {
+        indentStack = new (this, CMK_DebugOnly) IndentStack(this);
+    }
+
+    if (IsUninitialized(tree))
+    {
+        /* Value used to initalize nodes */
+        printf("Uninitialized tree node!");
+        return;
+    }
+
+    if (tree->gtOper >= GT_COUNT)
+    {
+        gtDispNode(tree, indentStack, msg, isLIR);
+        printf("Bogus operator!");
+        return;
+    }
+
+    /* Is tree a leaf node? */
+
+    if (tree->OperIsLeaf() || tree->OperIsLocalStore()) // local stores used to be leaves
+    {
+        gtDispNode(tree, indentStack, msg, isLIR);
+        gtDispLeaf(tree, indentStack);
+        gtDispVN(tree);
+        printf("\n");
+        if (tree->OperIsLocalStore() && !topOnly)
+        {
+            gtDispChild(tree->gtOp.gtOp1, indentStack, IINone);
+        }
+        return;
+    }
+
+    // Determine what kind of arc to propagate.
+    IndentInfo myArc    = IINone;
+    IndentInfo lowerArc = IINone;
+    if (indentStack->Depth() > 0)
+    {
+        myArc = indentStack->Pop();
+        switch (myArc)
+        {
+            case IIArcBottom:
+                indentStack->Push(IIArc);
+                lowerArc = IINone;
+                break;
+            case IIArc:
+                indentStack->Push(IIArc);
+                lowerArc = IIArc;
+                break;
+            case IIArcTop:
+                indentStack->Push(IINone);
+                lowerArc = IIArc;
+                break;
+            case IIEmbedded:
+                indentStack->Push(IIEmbedded);
+                lowerArc = IIEmbedded;
+                break;
+            default:
+                // Should never get here; just use IINone.
+                break;
+        }
+    }
+
+    // Special case formatting for PHI nodes -- arg lists like calls.
+
+    if (tree->OperGet() == GT_PHI)
+    {
+        gtDispNode(tree, indentStack, msg, isLIR);
+        gtDispVN(tree);
+        printf("\n");
+
+        if (!topOnly)
+        {
+            if (tree->gtOp.gtOp1 != nullptr)
+            {
+                IndentInfo arcType = IIArcTop;
+                for (GenTreeArgList* args = tree->gtOp.gtOp1->AsArgList(); args != nullptr; args = args->Rest())
+                {
+                    if (args->Rest() == nullptr)
+                    {
+                        arcType = IIArcBottom;
+                    }
+                    gtDispChild(args->Current(), indentStack, arcType);
+                    arcType = IIArc;
+                }
+            }
+        }
+        return;
+    }
+
+    /* Is it a 'simple' unary/binary operator? */
+
+    const char* childMsg = nullptr;
+
+    if (tree->OperIsSimple())
+    {
+        if (!topOnly)
+        {
+            if (tree->gtGetOp2())
+            {
+                // Label the childMsgs of the GT_COLON operator
+                // op2 is the then part
+
+                if (tree->gtOper == GT_COLON)
+                {
+                    childMsg = "then";
+                }
+                gtDispChild(tree->gtOp.gtOp2, indentStack, IIArcTop, childMsg, topOnly);
+            }
+        }
+
+        // Now, get the right type of arc for this node
+        if (myArc != IINone)
+        {
+            indentStack->Pop();
+            indentStack->Push(myArc);
+        }
+
+        gtDispNode(tree, indentStack, msg, isLIR);
+
+        // Propagate lowerArc to the lower children.
+        if (indentStack->Depth() > 0)
+        {
+            (void)indentStack->Pop();
+            indentStack->Push(lowerArc);
+        }
+
+        if (tree->gtOper == GT_CAST)
+        {
+            /* Format a message that explains the effect of this GT_CAST */
+
+            var_types fromType  = genActualType(tree->gtCast.CastOp()->TypeGet());
+            var_types toType    = tree->CastToType();
+            var_types finalType = tree->TypeGet();
+
+            /* if GTF_UNSIGNED is set then force fromType to an unsigned type */
+            if (tree->gtFlags & GTF_UNSIGNED)
+            {
+                fromType = genUnsignedType(fromType);
+            }
+
+            if (finalType != toType)
+            {
+                printf(" %s <-", varTypeName(finalType));
+            }
+
+            printf(" %s <- %s", varTypeName(toType), varTypeName(fromType));
+        }
+
+        if (tree->gtOper == GT_OBJ && (tree->gtFlags & GTF_VAR_DEATH))
+        {
+            printf(" (last use)");
+        }
+        if (tree->OperIsCopyBlkOp())
+        {
+            printf(" (copy)");
+        }
+        else if (tree->OperIsInitBlkOp())
+        {
+            printf(" (init)");
+        }
+
+        IndirectAssignmentAnnotation* pIndirAnnote;
+        if (tree->gtOper == GT_ASG && GetIndirAssignMap()->Lookup(tree, &pIndirAnnote))
+        {
+            printf("  indir assign of V%02d:", pIndirAnnote->m_lclNum);
+            if (pIndirAnnote->m_isEntire)
+            {
+                printf("d:%d", pIndirAnnote->m_defSsaNum);
+            }
+            else
+            {
+                printf("ud:%d->%d", pIndirAnnote->m_useSsaNum, pIndirAnnote->m_defSsaNum);
+            }
+        }
+
+        if (tree->gtOper == GT_INTRINSIC)
+        {
+            switch (tree->gtIntrinsic.gtIntrinsicId)
+            {
+                case CORINFO_INTRINSIC_Sin:
+                    printf(" sin");
+                    break;
+                case CORINFO_INTRINSIC_Cos:
+                    printf(" cos");
+                    break;
+                case CORINFO_INTRINSIC_Sqrt:
+                    printf(" sqrt");
+                    break;
+                case CORINFO_INTRINSIC_Abs:
+                    printf(" abs");
+                    break;
+                case CORINFO_INTRINSIC_Round:
+                    printf(" round");
+                    break;
+                case CORINFO_INTRINSIC_Cosh:
+                    printf(" cosh");
+                    break;
+                case CORINFO_INTRINSIC_Sinh:
+                    printf(" sinh");
+                    break;
+                case CORINFO_INTRINSIC_Tan:
+                    printf(" tan");
+                    break;
+                case CORINFO_INTRINSIC_Tanh:
+                    printf(" tanh");
+                    break;
+                case CORINFO_INTRINSIC_Asin:
+                    printf(" asin");
+                    break;
+                case CORINFO_INTRINSIC_Acos:
+                    printf(" acos");
+                    break;
+                case CORINFO_INTRINSIC_Atan:
+                    printf(" atan");
+                    break;
+                case CORINFO_INTRINSIC_Atan2:
+                    printf(" atan2");
+                    break;
+                case CORINFO_INTRINSIC_Log10:
+                    printf(" log10");
+                    break;
+                case CORINFO_INTRINSIC_Pow:
+                    printf(" pow");
+                    break;
+                case CORINFO_INTRINSIC_Exp:
+                    printf(" exp");
+                    break;
+                case CORINFO_INTRINSIC_Ceiling:
+                    printf(" ceiling");
+                    break;
+                case CORINFO_INTRINSIC_Floor:
+                    printf(" floor");
+                    break;
+                case CORINFO_INTRINSIC_Object_GetType:
+                    printf(" objGetType");
+                    break;
+
+                default:
+                    unreached();
+            }
+        }
+
+#ifdef FEATURE_SIMD
+        if (tree->gtOper == GT_SIMD)
+        {
+            printf(" %s %s", varTypeName(tree->gtSIMD.gtSIMDBaseType),
+                   simdIntrinsicNames[tree->gtSIMD.gtSIMDIntrinsicID]);
+        }
+#endif // FEATURE_SIMD
+
+        gtDispRegVal(tree);
+        gtDispVN(tree);
+        printf("\n");
+
+        if (!topOnly && tree->gtOp.gtOp1)
+        {
+
+            // Label the child of the GT_COLON operator
+            // op1 is the else part
+
+            if (tree->gtOper == GT_COLON)
+            {
+                childMsg = "else";
+            }
+            else if (tree->gtOper == GT_QMARK)
+            {
+                childMsg = "   if";
+            }
+            gtDispChild(tree->gtOp.gtOp1, indentStack, IIArcBottom, childMsg, topOnly);
+        }
+
+        return;
+    }
+
+    // Now, get the right type of arc for this node
+    if (myArc != IINone)
+    {
+        indentStack->Pop();
+        indentStack->Push(myArc);
+    }
+    gtDispNode(tree, indentStack, msg, isLIR);
+
+    // Propagate lowerArc to the lower children.
+    if (indentStack->Depth() > 0)
+    {
+        (void)indentStack->Pop();
+        indentStack->Push(lowerArc);
+    }
+
+    // See what kind of a special operator we have here, and handle its special children.
+
+    switch (tree->gtOper)
+    {
+        case GT_FIELD:
+            printf(" %s", eeGetFieldName(tree->gtField.gtFldHnd), 0);
+
+            if (tree->gtField.gtFldObj && !topOnly)
+            {
+                gtDispVN(tree);
+                printf("\n");
+                gtDispChild(tree->gtField.gtFldObj, indentStack, IIArcBottom);
+            }
+            else
+            {
+                gtDispRegVal(tree);
+                gtDispVN(tree);
+                printf("\n");
+            }
+            break;
+
+        case GT_CALL:
+        {
+            assert(tree->gtFlags & GTF_CALL);
+            unsigned numChildren = tree->NumChildren();
+            GenTree* lastChild   = nullptr;
+            if (numChildren != 0)
+            {
+                lastChild = tree->GetChild(numChildren - 1);
+            }
+
+            if (tree->gtCall.gtCallType != CT_INDIRECT)
+            {
+                const char* methodName;
+                const char* className;
+
+                methodName = eeGetMethodName(tree->gtCall.gtCallMethHnd, &className);
+
+                printf(" %s.%s", className, methodName);
+            }
+
+            if ((tree->gtFlags & GTF_CALL_UNMANAGED) && (tree->gtCall.gtCallMoreFlags & GTF_CALL_M_FRAME_VAR_DEATH))
+            {
+                printf(" (FramesRoot last use)");
+            }
+
+            if (((tree->gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0) && (tree->gtCall.gtInlineCandidateInfo != nullptr) &&
+                (tree->gtCall.gtInlineCandidateInfo->exactContextHnd != nullptr))
+            {
+                printf(" (exactContextHnd=0x%p)", dspPtr(tree->gtCall.gtInlineCandidateInfo->exactContextHnd));
+            }
+
+            gtDispVN(tree);
+            if (tree->IsMultiRegCall())
+            {
+                gtDispRegVal(tree);
+            }
+            printf("\n");
+
+            if (!topOnly)
+            {
+                char  buf[64];
+                char* bufp;
+
+                bufp = &buf[0];
+
+                if ((tree->gtCall.gtCallObjp != nullptr) && (tree->gtCall.gtCallObjp->gtOper != GT_NOP) &&
+                    (!tree->gtCall.gtCallObjp->IsArgPlaceHolderNode()))
+                {
+                    if (tree->gtCall.gtCallObjp->gtOper == GT_ASG)
+                    {
+                        sprintf_s(bufp, sizeof(buf), "this SETUP%c", 0);
+                    }
+                    else
+                    {
+                        sprintf_s(bufp, sizeof(buf), "this in %s%c", compRegVarName(REG_ARG_0), 0);
+                    }
+                    gtDispChild(tree->gtCall.gtCallObjp, indentStack,
+                                (tree->gtCall.gtCallObjp == lastChild) ? IIArcBottom : IIArc, bufp, topOnly);
+                }
+
+                if (tree->gtCall.gtCallArgs)
+                {
+                    gtDispArgList(tree, indentStack);
+                }
+
+                if (tree->gtCall.gtCallType == CT_INDIRECT)
+                {
+                    gtDispChild(tree->gtCall.gtCallAddr, indentStack,
+                                (tree->gtCall.gtCallAddr == lastChild) ? IIArcBottom : IIArc, "calli tgt", topOnly);
+                }
+
+                if (tree->gtCall.gtControlExpr != nullptr)
+                {
+                    gtDispChild(tree->gtCall.gtControlExpr, indentStack,
+                                (tree->gtCall.gtControlExpr == lastChild) ? IIArcBottom : IIArc, "control expr",
+                                topOnly);
+                }
+
+#if !FEATURE_FIXED_OUT_ARGS
+                regList list = tree->gtCall.regArgList;
+#endif
+                /* process the late argument list */
+                int lateArgIndex = 0;
+                for (GenTreeArgList* lateArgs = tree->gtCall.gtCallLateArgs; lateArgs;
+                     (lateArgIndex++, lateArgs = lateArgs->Rest()))
+                {
+                    GenTreePtr argx;
+
+                    argx = lateArgs->Current();
+
+                    IndentInfo arcType = (lateArgs->Rest() == nullptr) ? IIArcBottom : IIArc;
+                    gtGetLateArgMsg(tree, argx, lateArgIndex, -1, bufp, sizeof(buf));
+                    gtDispChild(argx, indentStack, arcType, bufp, topOnly);
+                }
+            }
+        }
+        break;
+
+        case GT_STMT:
+            printf("\n");
+
+            if (!topOnly)
+            {
+                gtDispChild(tree->gtStmt.gtStmtExpr, indentStack, IIArcBottom);
+            }
+            break;
+
+        case GT_ARR_ELEM:
+            gtDispVN(tree);
+            printf("\n");
+
+            if (!topOnly)
+            {
+                gtDispChild(tree->gtArrElem.gtArrObj, indentStack, IIArc, nullptr, topOnly);
+
+                unsigned dim;
+                for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+                {
+                    IndentInfo arcType = ((dim + 1) == tree->gtArrElem.gtArrRank) ? IIArcBottom : IIArc;
+                    gtDispChild(tree->gtArrElem.gtArrInds[dim], indentStack, arcType, nullptr, topOnly);
+                }
+            }
+            break;
+
+        case GT_ARR_OFFSET:
+            gtDispVN(tree);
+            printf("\n");
+            if (!topOnly)
+            {
+                gtDispChild(tree->gtArrOffs.gtOffset, indentStack, IIArc, nullptr, topOnly);
+                gtDispChild(tree->gtArrOffs.gtIndex, indentStack, IIArc, nullptr, topOnly);
+                gtDispChild(tree->gtArrOffs.gtArrObj, indentStack, IIArcBottom, nullptr, topOnly);
+            }
+            break;
+
+        case GT_CMPXCHG:
+            gtDispVN(tree);
+            printf("\n");
+            if (!topOnly)
+            {
+                gtDispChild(tree->gtCmpXchg.gtOpLocation, indentStack, IIArc, nullptr, topOnly);
+                gtDispChild(tree->gtCmpXchg.gtOpValue, indentStack, IIArc, nullptr, topOnly);
+                gtDispChild(tree->gtCmpXchg.gtOpComparand, indentStack, IIArcBottom, nullptr, topOnly);
+            }
+            break;
+
+        case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+        case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+            gtDispVN(tree);
+            printf("\n");
+            if (!topOnly)
+            {
+                gtDispChild(tree->gtBoundsChk.gtArrLen, indentStack, IIArc, nullptr, topOnly);
+                gtDispChild(tree->gtBoundsChk.gtIndex, indentStack, IIArcBottom, nullptr, topOnly);
+            }
+            break;
+
+        case GT_STORE_DYN_BLK:
+        case GT_DYN_BLK:
+            gtDispVN(tree);
+            printf("\n");
+            if (!topOnly)
+            {
+                if (tree->gtDynBlk.Data() != nullptr)
+                {
+                    gtDispChild(tree->gtDynBlk.Data(), indentStack, IIArc, nullptr, topOnly);
+                }
+                gtDispChild(tree->gtDynBlk.Addr(), indentStack, IIArc, nullptr, topOnly);
+                gtDispChild(tree->gtDynBlk.gtDynamicSize, indentStack, IIArcBottom, nullptr, topOnly);
+            }
+            if (tree->OperIsCopyBlkOp())
+            {
+                printf(" (copy)");
+            }
+            else if (tree->OperIsInitBlkOp())
+            {
+                printf(" (init)");
+            }
+            break;
+
+        default:
+            printf("<DON'T KNOW HOW TO DISPLAY THIS NODE> :");
+            printf(""); // null string means flush
+            break;
+    }
+}
+
+//------------------------------------------------------------------------
+// gtGetArgMsg: Construct a message about the given argument
+//
+// Arguments:
+//    call      - The call for which 'arg' is an argument
+//    arg       - The argument for which a message should be constructed
+//    argNum    - The ordinal number of the arg in the argument list
+//    listCount - When printing in LIR form this is the count for a multireg GT_LIST
+//                or -1 if we are not printing in LIR form
+//    bufp      - A pointer to the buffer into which the message is written
+//    bufLength - The length of the buffer pointed to by bufp
+//
+// Return Value:
+//    No return value, but bufp is written.
+//
+// Assumptions:
+//    'call' must be a call node
+//    'arg' must be an argument to 'call' (else gtArgEntryByNode will assert)
+
+void Compiler::gtGetArgMsg(
+    GenTreePtr call, GenTreePtr arg, unsigned argNum, int listCount, char* bufp, unsigned bufLength)
+{
+    if (call->gtCall.gtCallLateArgs != nullptr)
+    {
+        fgArgTabEntryPtr curArgTabEntry = gtArgEntryByArgNum(call, argNum);
+        assert(curArgTabEntry);
+
+        if (arg->gtFlags & GTF_LATE_ARG)
+        {
+            sprintf_s(bufp, bufLength, "arg%d SETUP%c", argNum, 0);
+        }
+        else
+        {
+#if FEATURE_FIXED_OUT_ARGS
+            if (listCount == -1)
+            {
+                sprintf_s(bufp, bufLength, "arg%d out+%02x%c", argNum, curArgTabEntry->slotNum * TARGET_POINTER_SIZE,
+                          0);
+            }
+            else // listCount is 0,1,2 or 3
+            {
+                assert(listCount <= MAX_ARG_REG_COUNT);
+                sprintf_s(bufp, bufLength, "arg%d out+%02x%c", argNum,
+                          (curArgTabEntry->slotNum + listCount) * TARGET_POINTER_SIZE, 0);
+            }
+#else
+            sprintf_s(bufp, bufLength, "arg%d on STK%c", argNum, 0);
+#endif
+        }
+    }
+    else
+    {
+        sprintf_s(bufp, bufLength, "arg%d%c", argNum, 0);
+    }
+}
+
+//------------------------------------------------------------------------
+// gtGetLateArgMsg: Construct a message about the given argument
+//
+// Arguments:
+//    call         - The call for which 'arg' is an argument
+//    argx         - The argument for which a message should be constructed
+//    lateArgIndex - The ordinal number of the arg in the lastArg  list
+//    listCount    - When printing in LIR form this is the count for a multireg GT_LIST
+//                   or -1 if we are not printing in LIR form
+//    bufp         - A pointer to the buffer into which the message is written
+//    bufLength    - The length of the buffer pointed to by bufp
+//
+// Return Value:
+//    No return value, but bufp is written.
+//
+// Assumptions:
+//    'call' must be a call node
+//    'arg' must be an argument to 'call' (else gtArgEntryByNode will assert)
+
+void Compiler::gtGetLateArgMsg(
+    GenTreePtr call, GenTreePtr argx, int lateArgIndex, int listCount, char* bufp, unsigned bufLength)
+{
+    assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
+
+    fgArgTabEntryPtr curArgTabEntry = gtArgEntryByLateArgIndex(call, lateArgIndex);
+    assert(curArgTabEntry);
+    regNumber argReg = curArgTabEntry->regNum;
+
+#if !FEATURE_FIXED_OUT_ARGS
+    assert(lateArgIndex < call->gtCall.regArgListCount);
+    assert(argReg == call->gtCall.regArgList[lateArgIndex]);
+#else
+    if (argReg == REG_STK)
+    {
+        sprintf_s(bufp, bufLength, "arg%d in out+%02x%c", curArgTabEntry->argNum,
+                  curArgTabEntry->slotNum * TARGET_POINTER_SIZE, 0);
+    }
+    else
+#endif
+    {
+        if (gtArgIsThisPtr(curArgTabEntry))
+        {
+            sprintf_s(bufp, bufLength, "this in %s%c", compRegVarName(argReg), 0);
+        }
+        else
+        {
+#if FEATURE_MULTIREG_ARGS
+            if (curArgTabEntry->numRegs >= 2)
+            {
+                regNumber otherRegNum;
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+                assert(curArgTabEntry->numRegs == 2);
+                otherRegNum = curArgTabEntry->otherRegNum;
+#else
+                otherRegNum = (regNumber)(((unsigned)curArgTabEntry->regNum) + curArgTabEntry->numRegs - 1);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+                if (listCount == -1)
+                {
+                    char seperator = (curArgTabEntry->numRegs == 2) ? ',' : '-';
+
+                    sprintf_s(bufp, bufLength, "arg%d %s%c%s%c", curArgTabEntry->argNum, compRegVarName(argReg),
+                              seperator, compRegVarName(otherRegNum), 0);
+                }
+                else // listCount is 0,1,2 or 3
+                {
+                    assert(listCount <= MAX_ARG_REG_COUNT);
+                    regNumber curReg = (listCount == 1) ? otherRegNum : (regNumber)((unsigned)(argReg) + listCount);
+                    sprintf_s(bufp, bufLength, "arg%d m%d %s%c", curArgTabEntry->argNum, listCount,
+                              compRegVarName(curReg), 0);
+                }
+            }
+            else
+#endif
+            {
+                sprintf_s(bufp, bufLength, "arg%d in %s%c", curArgTabEntry->argNum, compRegVarName(argReg), 0);
+            }
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// gtDispArgList: Dump the tree for a call arg list
+//
+// Arguments:
+//    tree         - The call for which 'arg' is an argument
+//    indentStack  - the specification for the current level of indentation & arcs
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    'tree' must be a call node
+
+void Compiler::gtDispArgList(GenTreePtr tree, IndentStack* indentStack)
+{
+    GenTree*  args      = tree->gtCall.gtCallArgs;
+    unsigned  argnum    = 0;
+    const int BufLength = 256;
+    char      buf[BufLength];
+    char*     bufp        = &buf[0];
+    unsigned  numChildren = tree->NumChildren();
+    assert(numChildren != 0);
+    bool argListIsLastChild = (args == tree->GetChild(numChildren - 1));
+
+    IndentInfo arcType = IIArc;
+    if (tree->gtCall.gtCallObjp != nullptr)
+    {
+        argnum++;
+    }
+
+    while (args != nullptr)
+    {
+        assert(args->gtOper == GT_LIST);
+        GenTree* arg = args->gtOp.gtOp1;
+        if (!arg->IsNothingNode() && !arg->IsArgPlaceHolderNode())
+        {
+            gtGetArgMsg(tree, arg, argnum, -1, bufp, BufLength);
+            if (argListIsLastChild && (args->gtOp.gtOp2 == nullptr))
+            {
+                arcType = IIArcBottom;
+            }
+            gtDispChild(arg, indentStack, arcType, bufp, false);
+        }
+        args = args->gtOp.gtOp2;
+        argnum++;
+    }
+}
+
+//------------------------------------------------------------------------
+// gtDispArgList: Dump the tree for a call arg list
+//
+// Arguments:
+//    tree         - The call for which 'arg' is an argument
+//    indentStack  - the specification for the current level of indentation & arcs
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    'tree' must be a GT_LIST node
+
+void Compiler::gtDispTreeList(GenTreePtr tree, IndentStack* indentStack /* = nullptr */)
+{
+    for (/*--*/; tree != nullptr; tree = tree->gtNext)
+    {
+        gtDispTree(tree, indentStack);
+        printf("\n");
+    }
+}
+
+//------------------------------------------------------------------------
+// Compiler::gtDispRange: dumps a range of LIR.
+//
+// Arguments:
+//    range - the range of LIR to display.
+//
+void Compiler::gtDispRange(LIR::ReadOnlyRange const& range)
+{
+    for (GenTree* node : range)
+    {
+        gtDispLIRNode(node);
+    }
+}
+
+//------------------------------------------------------------------------
+// Compiler::gtDispTreeRange: dumps the LIR range that contains all of the
+//                            nodes in the dataflow tree rooted at a given
+//                            node.
+//
+// Arguments:
+//    containingRange - the LIR range that contains the root node.
+//    tree - the root of the dataflow tree.
+//
+void Compiler::gtDispTreeRange(LIR::Range& containingRange, GenTree* tree)
+{
+    bool unused;
+    gtDispRange(containingRange.GetTreeRange(tree, &unused));
+}
+
+//------------------------------------------------------------------------
+// Compiler::gtDispLIRNode: dumps a single LIR node.
+//
+// Arguments:
+//    node - the LIR node to dump.
+//
+void Compiler::gtDispLIRNode(GenTree* node)
+{
+    auto displayOperand = [](GenTree* operand, const char* message, IndentInfo operandArc, IndentStack& indentStack) {
+        assert(operand != nullptr);
+        assert(message != nullptr);
+
+        // 49 spaces for alignment
+        printf("%-49s", "");
+
+        indentStack.Push(operandArc);
+        indentStack.print();
+        indentStack.Pop();
+        operandArc = IIArc;
+
+        printf("  t%-5d %-6s %s\n", operand->gtTreeID, varTypeName(operand->TypeGet()), message);
+
+    };
+
+    IndentStack indentStack(this);
+
+    const int bufLength = 256;
+    char      buf[bufLength];
+
+    const bool nodeIsCall = node->IsCall();
+
+    int numCallEarlyArgs = 0;
+    if (nodeIsCall)
+    {
+        GenTreeCall* call = node->AsCall();
+        for (GenTreeArgList* args = call->gtCallArgs; args != nullptr; args = args->Rest())
+        {
+            if (!args->Current()->IsArgPlaceHolderNode() && args->Current()->IsValue())
+            {
+                numCallEarlyArgs++;
+            }
+        }
+    }
+
+    // Visit operands
+    IndentInfo operandArc         = IIArcTop;
+    int        callArgNumber      = 0;
+    for (GenTree* operand : node->Operands())
+    {
+        if (operand->IsArgPlaceHolderNode() || !operand->IsValue())
+        {
+            // Either of these situations may happen with calls.
+            continue;
+        }
+
+        if (nodeIsCall)
+        {
+            GenTreeCall* call = node->AsCall();
+            if (operand == call->gtCallObjp)
+            {
+                sprintf_s(buf, sizeof(buf), "this in %s", compRegVarName(REG_ARG_0));
+                displayOperand(operand, buf, operandArc, indentStack);
+            }
+            else if (operand == call->gtCallAddr)
+            {
+                displayOperand(operand, "calli tgt", operandArc, indentStack);
+            }
+            else if (operand == call->gtControlExpr)
+            {
+                displayOperand(operand, "control expr", operandArc, indentStack);
+            }
+            else if (operand == call->gtCallCookie)
+            {
+                displayOperand(operand, "cookie", operandArc, indentStack);
+            }
+            else
+            {
+                int callLateArgNumber = callArgNumber - numCallEarlyArgs;
+                if (operand->OperGet() == GT_LIST)
+                {
+                    int listIndex = 0;
+                    for (GenTreeArgList* element = operand->AsArgList(); element != nullptr; element = element->Rest())
+                    {
+                        operand = element->Current();
+                        if (callLateArgNumber < 0)
+                        {
+                            gtGetArgMsg(call, operand, callArgNumber, listIndex, buf, sizeof(buf));
+                        }
+                        else
+                        {
+                            gtGetLateArgMsg(call, operand, callLateArgNumber, listIndex, buf, sizeof(buf));
+                        }
+
+                        displayOperand(operand, buf, operandArc, indentStack);
+                        operandArc = IIArc;
+                    }
+                }
+                else
+                {
+                    if (callLateArgNumber < 0)
+                    {
+                        gtGetArgMsg(call, operand, callArgNumber, -1, buf, sizeof(buf));
+                    }
+                    else
+                    {
+                        gtGetLateArgMsg(call, operand, callLateArgNumber, -1, buf, sizeof(buf));
+                    }
+
+                    displayOperand(operand, buf, operandArc, indentStack);
+                }
+
+                callArgNumber++;
+            }
+        }
+        else if (node->OperIsDynBlkOp())
+        {
+            if (operand == node->AsBlk()->Addr())
+            {
+                displayOperand(operand, "lhs", operandArc, indentStack);
+            }
+            else if (operand == node->AsBlk()->Data())
+            {
+                displayOperand(operand, "rhs", operandArc, indentStack);
+            }
+            else
+            {
+                assert(operand == node->AsDynBlk()->gtDynamicSize);
+                displayOperand(operand, "size", operandArc, indentStack);
+            }
+        }
+        else if (node->OperGet() == GT_DYN_BLK)
+        {
+            if (operand == node->AsBlk()->Addr())
+            {
+                displayOperand(operand, "lhs", operandArc, indentStack);
+            }
+            else
+            {
+                assert(operand == node->AsDynBlk()->gtDynamicSize);
+                displayOperand(operand, "size", operandArc, indentStack);
+            }
+        }
+        else if (node->OperIsAssignment())
+        {
+            if (operand == node->gtGetOp1())
+            {
+                displayOperand(operand, "lhs", operandArc, indentStack);
+            }
+            else
+            {
+                displayOperand(operand, "rhs", operandArc, indentStack);
+            }
+        }
+        else
+        {
+            displayOperand(operand, "", operandArc, indentStack);
+        }
+
+        operandArc = IIArc;
+    }
+
+    // Visit the operator
+    const bool topOnly = true;
+    const bool isLIR   = true;
+    gtDispTree(node, &indentStack, nullptr, topOnly, isLIR);
+
+    printf("\n");
+}
+
+/*****************************************************************************/
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ *  Check if the given node can be folded,
+ *  and call the methods to perform the folding
+ */
+
+GenTreePtr Compiler::gtFoldExpr(GenTreePtr tree)
+{
+    unsigned kind = tree->OperKind();
+
+    /* We must have a simple operation to fold */
+
+    // If we're in CSE, it's not safe to perform tree
+    // folding given that it can will potentially
+    // change considered CSE candidates.
+    if (optValnumCSE_phase)
+    {
+        return tree;
+    }
+
+    if (!(kind & GTK_SMPOP))
+    {
+        return tree;
+    }
+
+    GenTreePtr op1 = tree->gtOp.gtOp1;
+
+    /* Filter out non-foldable trees that can have constant children */
+
+    assert(kind & (GTK_UNOP | GTK_BINOP));
+    switch (tree->gtOper)
+    {
+        case GT_RETFILT:
+        case GT_RETURN:
+        case GT_IND:
+            return tree;
+        default:
+            break;
+    }
+
+    /* try to fold the current node */
+
+    if ((kind & GTK_UNOP) && op1)
+    {
+        if (op1->OperKind() & GTK_CONST)
+        {
+            return gtFoldExprConst(tree);
+        }
+    }
+    else if ((kind & GTK_BINOP) && op1 && tree->gtOp.gtOp2 &&
+             // Don't take out conditionals for debugging
+             !((opts.compDbgCode || opts.MinOpts()) && tree->OperIsCompare()))
+    {
+        GenTreePtr op2 = tree->gtOp.gtOp2;
+
+        // The atomic operations are exempted here because they are never computable statically;
+        // one of their arguments is an address.
+        if (((op1->OperKind() & op2->OperKind()) & GTK_CONST) && !tree->OperIsAtomicOp())
+        {
+            /* both nodes are constants - fold the expression */
+            return gtFoldExprConst(tree);
+        }
+        else if ((op1->OperKind() | op2->OperKind()) & GTK_CONST)
+        {
+            /* at least one is a constant - see if we have a
+             * special operator that can use only one constant
+             * to fold - e.g. booleans */
+
+            return gtFoldExprSpecial(tree);
+        }
+        else if (tree->OperIsCompare())
+        {
+            /* comparisons of two local variables can sometimes be folded */
+
+            return gtFoldExprCompare(tree);
+        }
+        else if (op2->OperGet() == GT_COLON)
+        {
+            assert(tree->OperGet() == GT_QMARK);
+
+            GenTreePtr colon_op1 = op2->gtOp.gtOp1;
+            GenTreePtr colon_op2 = op2->gtOp.gtOp2;
+
+            if (gtCompareTree(colon_op1, colon_op2))
+            {
+                // Both sides of the GT_COLON are the same tree
+
+                GenTreePtr sideEffList = nullptr;
+                gtExtractSideEffList(op1, &sideEffList);
+
+                fgUpdateRefCntForExtract(op1, sideEffList);   // Decrement refcounts for op1, Keeping any side-effects
+                fgUpdateRefCntForExtract(colon_op1, nullptr); // Decrement refcounts for colon_op1
+
+                // Clear colon flags only if the qmark itself is not conditionaly executed
+                if ((tree->gtFlags & GTF_COLON_COND) == 0)
+                {
+                    fgWalkTreePre(&colon_op2, gtClearColonCond);
+                }
+
+                if (sideEffList == nullptr)
+                {
+                    // No side-effects, just return colon_op2
+                    return colon_op2;
+                }
+                else
+                {
+#ifdef DEBUG
+                    if (verbose)
+                    {
+                        printf("\nIdentical GT_COLON trees with side effects! Extracting side effects...\n");
+                        gtDispTree(sideEffList);
+                        printf("\n");
+                    }
+#endif
+                    // Change the GT_COLON into a GT_COMMA node with the side-effects
+                    op2->ChangeOper(GT_COMMA);
+                    op2->gtFlags |= (sideEffList->gtFlags & GTF_ALL_EFFECT);
+                    op2->gtOp.gtOp1 = sideEffList;
+                    return op2;
+                }
+            }
+        }
+    }
+
+    /* Return the original node (folded/bashed or not) */
+
+    return tree;
+}
+
+/*****************************************************************************
+ *
+ *  Some comparisons can be folded:
+ *
+ *    locA        == locA
+ *    classVarA   == classVarA
+ *    locA + locB == locB + locA
+ *
+ */
+
+GenTreePtr Compiler::gtFoldExprCompare(GenTreePtr tree)
+{
+    GenTreePtr op1 = tree->gtOp.gtOp1;
+    GenTreePtr op2 = tree->gtOp.gtOp2;
+
+    assert(tree->OperIsCompare());
+
+    /* Filter out cases that cannot be folded here */
+
+    /* Do not fold floats or doubles (e.g. NaN != Nan) */
+
+    if (varTypeIsFloating(op1->TypeGet()))
+    {
+        return tree;
+    }
+
+    /* Currently we can only fold when the two subtrees exactly match */
+
+    if ((tree->gtFlags & GTF_SIDE_EFFECT) || GenTree::Compare(op1, op2, true) == false)
+    {
+        return tree; /* return unfolded tree */
+    }
+
+    GenTreePtr cons;
+
+    switch (tree->gtOper)
+    {
+        case GT_EQ:
+        case GT_LE:
+        case GT_GE:
+            cons = gtNewIconNode(true); /* Folds to GT_CNS_INT(true) */
+            break;
+
+        case GT_NE:
+        case GT_LT:
+        case GT_GT:
+            cons = gtNewIconNode(false); /* Folds to GT_CNS_INT(false) */
+            break;
+
+        default:
+            assert(!"Unexpected relOp");
+            return tree;
+    }
+
+    /* The node has beeen folded into 'cons' */
+
+    if (fgGlobalMorph)
+    {
+        if (!fgIsInlining())
+        {
+            fgMorphTreeDone(cons);
+        }
+    }
+    else
+    {
+        cons->gtNext = tree->gtNext;
+        cons->gtPrev = tree->gtPrev;
+    }
+    if (lvaLocalVarRefCounted)
+    {
+        lvaRecursiveDecRefCounts(tree);
+    }
+    return cons;
+}
+
+/*****************************************************************************
+ *
+ *  Some binary operators can be folded even if they have only one
+ *  operand constant - e.g. boolean operators, add with 0
+ *  multiply with 1, etc
+ */
+
+GenTreePtr Compiler::gtFoldExprSpecial(GenTreePtr tree)
+{
+    GenTreePtr op1  = tree->gtOp.gtOp1;
+    GenTreePtr op2  = tree->gtOp.gtOp2;
+    genTreeOps oper = tree->OperGet();
+
+    GenTreePtr op, cons;
+    ssize_t    val;
+
+    assert(tree->OperKind() & GTK_BINOP);
+
+    /* Filter out operators that cannot be folded here */
+    if (oper == GT_CAST)
+    {
+        return tree;
+    }
+
+    /* We only consider TYP_INT for folding
+     * Do not fold pointer arithmetic (e.g. addressing modes!) */
+
+    if (oper != GT_QMARK && !varTypeIsIntOrI(tree->gtType))
+    {
+        return tree;
+    }
+
+    /* Find out which is the constant node */
+
+    if (op1->IsCnsIntOrI())
+    {
+        op   = op2;
+        cons = op1;
+    }
+    else if (op2->IsCnsIntOrI())
+    {
+        op   = op1;
+        cons = op2;
+    }
+    else
+    {
+        return tree;
+    }
+
+    /* Get the constant value */
+
+    val = cons->gtIntConCommon.IconValue();
+
+    /* Here op is the non-constant operand, val is the constant,
+       first is true if the constant is op1 */
+
+    switch (oper)
+    {
+
+        case GT_EQ:
+        case GT_NE:
+            // Optimize boxed value classes; these are always false.  This IL is
+            // generated when a generic value is tested against null:
+            //     <T> ... foo(T x) { ... if ((object)x == null) ...
+            if (val == 0 && op->IsBoxedValue())
+            {
+                // Change the assignment node so we don't generate any code for it.
+
+                GenTreePtr asgStmt = op->gtBox.gtAsgStmtWhenInlinedBoxValue;
+                assert(asgStmt->gtOper == GT_STMT);
+                GenTreePtr asg = asgStmt->gtStmt.gtStmtExpr;
+                assert(asg->gtOper == GT_ASG);
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("Bashing ");
+                    printTreeID(asg);
+                    printf(" to NOP as part of dead box operation\n");
+                    gtDispTree(tree);
+                }
+#endif
+                asg->gtBashToNOP();
+
+                op = gtNewIconNode(oper == GT_NE);
+                if (fgGlobalMorph)
+                {
+                    if (!fgIsInlining())
+                    {
+                        fgMorphTreeDone(op);
+                    }
+                }
+                else
+                {
+                    op->gtNext = tree->gtNext;
+                    op->gtPrev = tree->gtPrev;
+                }
+                fgSetStmtSeq(asgStmt);
+                return op;
+            }
+            break;
+
+        case GT_ADD:
+        case GT_ASG_ADD:
+            if (val == 0)
+            {
+                goto DONE_FOLD;
+            }
+            break;
+
+        case GT_MUL:
+        case GT_ASG_MUL:
+            if (val == 1)
+            {
+                goto DONE_FOLD;
+            }
+            else if (val == 0)
+            {
+                /* Multiply by zero - return the 'zero' node, but not if side effects */
+                if (!(op->gtFlags & GTF_SIDE_EFFECT))
+                {
+                    if (lvaLocalVarRefCounted)
+                    {
+                        lvaRecursiveDecRefCounts(op);
+                    }
+                    op = cons;
+                    goto DONE_FOLD;
+                }
+            }
+            break;
+
+        case GT_DIV:
+        case GT_UDIV:
+        case GT_ASG_DIV:
+            if ((op2 == cons) && (val == 1) && !(op1->OperKind() & GTK_CONST))
+            {
+                goto DONE_FOLD;
+            }
+            break;
+
+        case GT_SUB:
+        case GT_ASG_SUB:
+            if ((op2 == cons) && (val == 0) && !(op1->OperKind() & GTK_CONST))
+            {
+                goto DONE_FOLD;
+            }
+            break;
+
+        case GT_AND:
+            if (val == 0)
+            {
+                /* AND with zero - return the 'zero' node, but not if side effects */
+
+                if (!(op->gtFlags & GTF_SIDE_EFFECT))
+                {
+                    if (lvaLocalVarRefCounted)
+                    {
+                        lvaRecursiveDecRefCounts(op);
+                    }
+                    op = cons;
+                    goto DONE_FOLD;
+                }
+            }
+            else
+            {
+                /* The GTF_BOOLEAN flag is set for nodes that are part
+                 * of a boolean expression, thus all their children
+                 * are known to evaluate to only 0 or 1 */
+
+                if (tree->gtFlags & GTF_BOOLEAN)
+                {
+
+                    /* The constant value must be 1
+                     * AND with 1 stays the same */
+                    assert(val == 1);
+                    goto DONE_FOLD;
+                }
+            }
+            break;
+
+        case GT_OR:
+            if (val == 0)
+            {
+                goto DONE_FOLD;
+            }
+            else if (tree->gtFlags & GTF_BOOLEAN)
+            {
+                /* The constant value must be 1 - OR with 1 is 1 */
+
+                assert(val == 1);
+
+                /* OR with one - return the 'one' node, but not if side effects */
+
+                if (!(op->gtFlags & GTF_SIDE_EFFECT))
+                {
+                    if (lvaLocalVarRefCounted)
+                    {
+                        lvaRecursiveDecRefCounts(op);
+                    }
+                    op = cons;
+                    goto DONE_FOLD;
+                }
+            }
+            break;
+
+        case GT_LSH:
+        case GT_RSH:
+        case GT_RSZ:
+        case GT_ROL:
+        case GT_ROR:
+        case GT_ASG_LSH:
+        case GT_ASG_RSH:
+        case GT_ASG_RSZ:
+            if (val == 0)
+            {
+                if (op2 == cons)
+                {
+                    goto DONE_FOLD;
+                }
+                else if (!(op->gtFlags & GTF_SIDE_EFFECT))
+                {
+                    if (lvaLocalVarRefCounted)
+                    {
+                        lvaRecursiveDecRefCounts(op);
+                    }
+                    op = cons;
+                    goto DONE_FOLD;
+                }
+            }
+            break;
+
+        case GT_QMARK:
+        {
+            assert(op1 == cons && op2 == op && op2->gtOper == GT_COLON);
+            assert(op2->gtOp.gtOp1 && op2->gtOp.gtOp2);
+
+            assert(val == 0 || val == 1);
+
+            GenTree* opToDelete;
+            if (val)
+            {
+                op         = op2->AsColon()->ThenNode();
+                opToDelete = op2->AsColon()->ElseNode();
+            }
+            else
+            {
+                op         = op2->AsColon()->ElseNode();
+                opToDelete = op2->AsColon()->ThenNode();
+            }
+            if (lvaLocalVarRefCounted)
+            {
+                lvaRecursiveDecRefCounts(opToDelete);
+            }
+
+            // Clear colon flags only if the qmark itself is not conditionaly executed
+            if ((tree->gtFlags & GTF_COLON_COND) == 0)
+            {
+                fgWalkTreePre(&op, gtClearColonCond);
+            }
+        }
+
+            goto DONE_FOLD;
+
+        default:
+            break;
+    }
+
+    /* The node is not foldable */
+
+    return tree;
+
+DONE_FOLD:
+
+    /* The node has beeen folded into 'op' */
+
+    // If there was an assigment update, we just morphed it into
+    // a use, update the flags appropriately
+    if (op->gtOper == GT_LCL_VAR)
+    {
+        assert((tree->OperKind() & GTK_ASGOP) || (op->gtFlags & (GTF_VAR_USEASG | GTF_VAR_USEDEF | GTF_VAR_DEF)) == 0);
+
+        op->gtFlags &= ~(GTF_VAR_USEASG | GTF_VAR_USEDEF | GTF_VAR_DEF);
+    }
+
+    op->gtNext = tree->gtNext;
+    op->gtPrev = tree->gtPrev;
+
+    return op;
+}
+
+/*****************************************************************************
+ *
+ *  Fold the given constant tree.
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+GenTreePtr Compiler::gtFoldExprConst(GenTreePtr tree)
+{
+    unsigned kind = tree->OperKind();
+
+    SSIZE_T       i1, i2, itemp;
+    INT64         lval1, lval2, ltemp;
+    float         f1, f2;
+    double        d1, d2;
+    var_types     switchType;
+    FieldSeqNode* fieldSeq = FieldSeqStore::NotAField(); // default unless we override it when folding
+
+    assert(kind & (GTK_UNOP | GTK_BINOP));
+
+    GenTreePtr op1 = tree->gtOp.gtOp1;
+    GenTreePtr op2 = tree->gtGetOp2();
+
+    if (!opts.OptEnabled(CLFLG_CONSTANTFOLD))
+    {
+        return tree;
+    }
+
+    if (tree->OperGet() == GT_NOP)
+    {
+        return tree;
+    }
+
+#ifdef FEATURE_SIMD
+    if (tree->OperGet() == GT_SIMD)
+    {
+        return tree;
+    }
+#endif // FEATURE_SIMD
+
+    if (tree->gtOper == GT_ALLOCOBJ)
+    {
+        return tree;
+    }
+
+    if (kind & GTK_UNOP)
+    {
+        assert(op1->OperKind() & GTK_CONST);
+
+        switch (op1->gtType)
+        {
+            case TYP_INT:
+
+                /* Fold constant INT unary operator */
+                assert(op1->gtIntCon.ImmedValCanBeFolded(this, tree->OperGet()));
+                i1 = (int)op1->gtIntCon.gtIconVal;
+
+                // If we fold a unary oper, then the folded constant
+                // is considered a ConstantIndexField if op1 was one
+                //
+
+                if ((op1->gtIntCon.gtFieldSeq != nullptr) && op1->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
+                {
+                    fieldSeq = op1->gtIntCon.gtFieldSeq;
+                }
+
+                switch (tree->gtOper)
+                {
+                    case GT_NOT:
+                        i1 = ~i1;
+                        break;
+
+                    case GT_NEG:
+                    case GT_CHS:
+                        i1 = -i1;
+                        break;
+
+                    case GT_CAST:
+                        // assert (genActualType(tree->CastToType()) == tree->gtType);
+                        switch (tree->CastToType())
+                        {
+                            case TYP_BYTE:
+                                itemp = INT32(INT8(i1));
+                                goto CHK_OVF;
+
+                            case TYP_SHORT:
+                                itemp = INT32(INT16(i1));
+                            CHK_OVF:
+                                if (tree->gtOverflow() && ((itemp != i1) || ((tree->gtFlags & GTF_UNSIGNED) && i1 < 0)))
+                                {
+                                    goto INT_OVF;
+                                }
+                                i1 = itemp;
+                                goto CNS_INT;
+
+                            case TYP_CHAR:
+                                itemp = INT32(UINT16(i1));
+                                if (tree->gtOverflow())
+                                {
+                                    if (itemp != i1)
+                                    {
+                                        goto INT_OVF;
+                                    }
+                                }
+                                i1 = itemp;
+                                goto CNS_INT;
+
+                            case TYP_BOOL:
+                            case TYP_UBYTE:
+                                itemp = INT32(UINT8(i1));
+                                if (tree->gtOverflow())
+                                {
+                                    if (itemp != i1)
+                                    {
+                                        goto INT_OVF;
+                                    }
+                                }
+                                i1 = itemp;
+                                goto CNS_INT;
+
+                            case TYP_UINT:
+                                if (!(tree->gtFlags & GTF_UNSIGNED) && tree->gtOverflow() && i1 < 0)
+                                {
+                                    goto INT_OVF;
+                                }
+                                goto CNS_INT;
+
+                            case TYP_INT:
+                                if ((tree->gtFlags & GTF_UNSIGNED) && tree->gtOverflow() && i1 < 0)
+                                {
+                                    goto INT_OVF;
+                                }
+                                goto CNS_INT;
+
+                            case TYP_ULONG:
+                                if (!(tree->gtFlags & GTF_UNSIGNED) && tree->gtOverflow() && i1 < 0)
+                                {
+                                    op1->ChangeOperConst(GT_CNS_NATIVELONG); // need type of oper to be same as tree
+                                    op1->gtType = TYP_LONG;
+                                    // We don't care about the value as we are throwing an exception
+                                    goto LNG_OVF;
+                                }
+                                lval1 = UINT64(UINT32(i1));
+                                goto CNS_LONG;
+
+                            case TYP_LONG:
+                                if (tree->gtFlags & GTF_UNSIGNED)
+                                {
+                                    lval1 = INT64(UINT32(i1));
+                                }
+                                else
+                                {
+                                    lval1 = INT64(INT32(i1));
+                                }
+                                goto CNS_LONG;
+
+                            case TYP_FLOAT:
+                                if (tree->gtFlags & GTF_UNSIGNED)
+                                {
+                                    f1 = forceCastToFloat(UINT32(i1));
+                                }
+                                else
+                                {
+                                    f1 = forceCastToFloat(INT32(i1));
+                                }
+                                d1 = f1;
+                                goto CNS_DOUBLE;
+
+                            case TYP_DOUBLE:
+                                if (tree->gtFlags & GTF_UNSIGNED)
+                                {
+                                    d1 = (double)UINT32(i1);
+                                }
+                                else
+                                {
+                                    d1 = (double)INT32(i1);
+                                }
+                                goto CNS_DOUBLE;
+
+                            default:
+                                assert(!"BAD_TYP");
+                                break;
+                        }
+                        return tree;
+
+                    default:
+                        return tree;
+                }
+
+                goto CNS_INT;
+
+            case TYP_LONG:
+
+                /* Fold constant LONG unary operator */
+
+                assert(op1->gtIntConCommon.ImmedValCanBeFolded(this, tree->OperGet()));
+                lval1 = op1->gtIntConCommon.LngValue();
+
+                switch (tree->gtOper)
+                {
+                    case GT_NOT:
+                        lval1 = ~lval1;
+                        break;
+
+                    case GT_NEG:
+                    case GT_CHS:
+                        lval1 = -lval1;
+                        break;
+
+                    case GT_CAST:
+                        assert(genActualType(tree->CastToType()) == tree->gtType);
+                        switch (tree->CastToType())
+                        {
+                            case TYP_BYTE:
+                                i1 = INT32(INT8(lval1));
+                                goto CHECK_INT_OVERFLOW;
+
+                            case TYP_SHORT:
+                                i1 = INT32(INT16(lval1));
+                                goto CHECK_INT_OVERFLOW;
+
+                            case TYP_CHAR:
+                                i1 = INT32(UINT16(lval1));
+                                goto CHECK_UINT_OVERFLOW;
+
+                            case TYP_UBYTE:
+                                i1 = INT32(UINT8(lval1));
+                                goto CHECK_UINT_OVERFLOW;
+
+                            case TYP_INT:
+                                i1 = INT32(lval1);
+
+                            CHECK_INT_OVERFLOW:
+                                if (tree->gtOverflow())
+                                {
+                                    if (i1 != lval1)
+                                    {
+                                        goto INT_OVF;
+                                    }
+                                    if ((tree->gtFlags & GTF_UNSIGNED) && i1 < 0)
+                                    {
+                                        goto INT_OVF;
+                                    }
+                                }
+                                goto CNS_INT;
+
+                            case TYP_UINT:
+                                i1 = UINT32(lval1);
+
+                            CHECK_UINT_OVERFLOW:
+                                if (tree->gtOverflow() && UINT32(i1) != lval1)
+                                {
+                                    goto INT_OVF;
+                                }
+                                goto CNS_INT;
+
+                            case TYP_ULONG:
+                                if (!(tree->gtFlags & GTF_UNSIGNED) && tree->gtOverflow() && lval1 < 0)
+                                {
+                                    goto LNG_OVF;
+                                }
+                                goto CNS_LONG;
+
+                            case TYP_LONG:
+                                if ((tree->gtFlags & GTF_UNSIGNED) && tree->gtOverflow() && lval1 < 0)
+                                {
+                                    goto LNG_OVF;
+                                }
+                                goto CNS_LONG;
+
+                            case TYP_FLOAT:
+                            case TYP_DOUBLE:
+                                if ((tree->gtFlags & GTF_UNSIGNED) && lval1 < 0)
+                                {
+                                    d1 = FloatingPointUtils::convertUInt64ToDouble((unsigned __int64)lval1);
+                                }
+                                else
+                                {
+                                    d1 = (double)lval1;
+                                }
+
+                                if (tree->CastToType() == TYP_FLOAT)
+                                {
+                                    f1 = forceCastToFloat(d1); // truncate precision
+                                    d1 = f1;
+                                }
+                                goto CNS_DOUBLE;
+                            default:
+                                assert(!"BAD_TYP");
+                                break;
+                        }
+                        return tree;
+
+                    default:
+                        return tree;
+                }
+
+                goto CNS_LONG;
+
+            case TYP_FLOAT:
+            case TYP_DOUBLE:
+                assert(op1->gtOper == GT_CNS_DBL);
+
+                /* Fold constant DOUBLE unary operator */
+
+                d1 = op1->gtDblCon.gtDconVal;
+
+                switch (tree->gtOper)
+                {
+                    case GT_NEG:
+                    case GT_CHS:
+                        d1 = -d1;
+                        break;
+
+                    case GT_CAST:
+
+                        if (tree->gtOverflowEx())
+                        {
+                            return tree;
+                        }
+
+                        assert(genActualType(tree->CastToType()) == tree->gtType);
+
+                        if ((op1->gtType == TYP_FLOAT && !_finite(forceCastToFloat(d1))) ||
+                            (op1->gtType == TYP_DOUBLE && !_finite(d1)))
+                        {
+                            // The floating point constant is not finite.  The ECMA spec says, in
+                            // III 3.27, that "...if overflow occurs converting a floating point type
+                            // to an integer, ..., the value returned is unspecified."  However, it would
+                            // at least be desirable to have the same value returned for casting an overflowing
+                            // constant to an int as would obtained by passing that constant as a parameter
+                            // then casting that parameter to an int type.  We will assume that the C compiler's
+                            // cast logic will yield the desired result (and trust testing to tell otherwise).
+                            // Cross-compilation is an issue here; if that becomes an important scenario, we should
+                            // capture the target-specific values of overflow casts to the various integral types as
+                            // constants in a target-specific function.
+                            CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_XARCH_
+                            // Don't fold conversions of +inf/-inf to integral value as the value returned by JIT helper
+                            // doesn't match with the C compiler's cast result.
+                            return tree;
+#else  //!_TARGET_XARCH_
+
+                            switch (tree->CastToType())
+                            {
+                                case TYP_BYTE:
+                                    i1 = ssize_t(INT8(d1));
+                                    goto CNS_INT;
+                                case TYP_UBYTE:
+                                    i1 = ssize_t(UINT8(d1));
+                                    goto CNS_INT;
+                                case TYP_SHORT:
+                                    i1 = ssize_t(INT16(d1));
+                                    goto CNS_INT;
+                                case TYP_CHAR:
+                                    i1 = ssize_t(UINT16(d1));
+                                    goto CNS_INT;
+                                case TYP_INT:
+                                    i1 = ssize_t(INT32(d1));
+                                    goto CNS_INT;
+                                case TYP_UINT:
+                                    i1 = ssize_t(UINT32(d1));
+                                    goto CNS_INT;
+                                case TYP_LONG:
+                                    lval1 = INT64(d1);
+                                    goto CNS_LONG;
+                                case TYP_ULONG:
+                                    lval1 = UINT64(d1);
+                                    goto CNS_LONG;
+                                case TYP_FLOAT:
+                                case TYP_DOUBLE:
+                                    if (op1->gtType == TYP_FLOAT)
+                                        d1 = forceCastToFloat(d1); // it's only !_finite() after this conversion
+                                    goto CNS_DOUBLE;
+                                default:
+                                    unreached();
+                            }
+#endif //!_TARGET_XARCH_
+                        }
+
+                        switch (tree->CastToType())
+                        {
+                            case TYP_BYTE:
+                                i1 = INT32(INT8(d1));
+                                goto CNS_INT;
+
+                            case TYP_SHORT:
+                                i1 = INT32(INT16(d1));
+                                goto CNS_INT;
+
+                            case TYP_CHAR:
+                                i1 = INT32(UINT16(d1));
+                                goto CNS_INT;
+
+                            case TYP_UBYTE:
+                                i1 = INT32(UINT8(d1));
+                                goto CNS_INT;
+
+                            case TYP_INT:
+                                i1 = INT32(d1);
+                                goto CNS_INT;
+
+                            case TYP_UINT:
+                                i1 = forceCastToUInt32(d1);
+                                goto CNS_INT;
+
+                            case TYP_LONG:
+                                lval1 = INT64(d1);
+                                goto CNS_LONG;
+
+                            case TYP_ULONG:
+                                lval1 = FloatingPointUtils::convertDoubleToUInt64(d1);
+                                goto CNS_LONG;
+
+                            case TYP_FLOAT:
+                                d1 = forceCastToFloat(d1);
+                                goto CNS_DOUBLE;
+
+                            case TYP_DOUBLE:
+                                if (op1->gtType == TYP_FLOAT)
+                                {
+                                    d1 = forceCastToFloat(d1); // truncate precision
+                                }
+                                goto CNS_DOUBLE; // redundant cast
+
+                            default:
+                                assert(!"BAD_TYP");
+                                break;
+                        }
+                        return tree;
+
+                    default:
+                        return tree;
+                }
+                goto CNS_DOUBLE;
+
+            default:
+                /* not a foldable typ - e.g. RET const */
+                return tree;
+        }
+    }
+
+    /* We have a binary operator */
+
+    assert(kind & GTK_BINOP);
+    assert(op2);
+    assert(op1->OperKind() & GTK_CONST);
+    assert(op2->OperKind() & GTK_CONST);
+
+    if (tree->gtOper == GT_COMMA)
+    {
+        return op2;
+    }
+
+    if (tree->gtOper == GT_LIST)
+    {
+        return tree;
+    }
+
+    switchType = op1->gtType;
+
+    // Normally we will just switch on op1 types, but for the case where
+    //  only op2 is a GC type and op1 is not a GC type, we use the op2 type.
+    //  This makes us handle this as a case of folding for GC type.
+    //
+    if (varTypeIsGC(op2->gtType) && !varTypeIsGC(op1->gtType))
+    {
+        switchType = op2->gtType;
+    }
+
+    switch (switchType)
+    {
+
+        /*-------------------------------------------------------------------------
+         * Fold constant REF of BYREF binary operator
+         * These can only be comparisons or null pointers
+         */
+
+        case TYP_REF:
+
+            /* String nodes are an RVA at this point */
+
+            if (op1->gtOper == GT_CNS_STR || op2->gtOper == GT_CNS_STR)
+            {
+                return tree;
+            }
+
+            __fallthrough;
+
+        case TYP_BYREF:
+
+            i1 = op1->gtIntConCommon.IconValue();
+            i2 = op2->gtIntConCommon.IconValue();
+
+            switch (tree->gtOper)
+            {
+                case GT_EQ:
+                    i1 = (i1 == i2);
+                    goto FOLD_COND;
+
+                case GT_NE:
+                    i1 = (i1 != i2);
+                    goto FOLD_COND;
+
+                case GT_ADD:
+                    noway_assert(tree->gtType != TYP_REF);
+                    // We only fold a GT_ADD that involves a null reference.
+                    if (((op1->TypeGet() == TYP_REF) && (i1 == 0)) || ((op2->TypeGet() == TYP_REF) && (i2 == 0)))
+                    {
+#ifdef DEBUG
+                        if (verbose)
+                        {
+                            printf("\nFolding operator with constant nodes into a constant:\n");
+                            gtDispTree(tree);
+                        }
+#endif
+                        // Fold into GT_IND of null byref
+                        tree->ChangeOperConst(GT_CNS_INT);
+                        tree->gtType              = TYP_BYREF;
+                        tree->gtIntCon.gtIconVal  = 0;
+                        tree->gtIntCon.gtFieldSeq = FieldSeqStore::NotAField();
+                        if (vnStore != nullptr)
+                        {
+                            fgValueNumberTreeConst(tree);
+                        }
+#ifdef DEBUG
+                        if (verbose)
+                        {
+                            printf("\nFolded to null byref:\n");
+                            gtDispTree(tree);
+                        }
+#endif
+                        goto DONE;
+                    }
+
+                default:
+                    break;
+            }
+
+            return tree;
+
+        /*-------------------------------------------------------------------------
+         * Fold constant INT binary operator
+         */
+
+        case TYP_INT:
+
+            if (tree->OperIsCompare() && (tree->gtType == TYP_BYTE))
+            {
+                tree->gtType = TYP_INT;
+            }
+
+            assert(tree->gtType == TYP_INT || varTypeIsGC(tree->TypeGet()) || tree->gtOper == GT_MKREFANY);
+
+            // No GC pointer types should be folded here...
+            //
+            assert(!varTypeIsGC(op1->gtType) && !varTypeIsGC(op2->gtType));
+
+            assert(op1->gtIntConCommon.ImmedValCanBeFolded(this, tree->OperGet()));
+            assert(op2->gtIntConCommon.ImmedValCanBeFolded(this, tree->OperGet()));
+
+            i1 = op1->gtIntConCommon.IconValue();
+            i2 = op2->gtIntConCommon.IconValue();
+
+            switch (tree->gtOper)
+            {
+                case GT_EQ:
+                    i1 = (INT32(i1) == INT32(i2));
+                    break;
+                case GT_NE:
+                    i1 = (INT32(i1) != INT32(i2));
+                    break;
+
+                case GT_LT:
+                    if (tree->gtFlags & GTF_UNSIGNED)
+                    {
+                        i1 = (UINT32(i1) < UINT32(i2));
+                    }
+                    else
+                    {
+                        i1 = (INT32(i1) < INT32(i2));
+                    }
+                    break;
+
+                case GT_LE:
+                    if (tree->gtFlags & GTF_UNSIGNED)
+                    {
+                        i1 = (UINT32(i1) <= UINT32(i2));
+                    }
+                    else
+                    {
+                        i1 = (INT32(i1) <= INT32(i2));
+                    }
+                    break;
+
+                case GT_GE:
+                    if (tree->gtFlags & GTF_UNSIGNED)
+                    {
+                        i1 = (UINT32(i1) >= UINT32(i2));
+                    }
+                    else
+                    {
+                        i1 = (INT32(i1) >= INT32(i2));
+                    }
+                    break;
+
+                case GT_GT:
+                    if (tree->gtFlags & GTF_UNSIGNED)
+                    {
+                        i1 = (UINT32(i1) > UINT32(i2));
+                    }
+                    else
+                    {
+                        i1 = (INT32(i1) > INT32(i2));
+                    }
+                    break;
+
+                case GT_ADD:
+                    itemp = i1 + i2;
+                    if (tree->gtOverflow())
+                    {
+                        if (tree->gtFlags & GTF_UNSIGNED)
+                        {
+                            if (INT64(UINT32(itemp)) != INT64(UINT32(i1)) + INT64(UINT32(i2)))
+                            {
+                                goto INT_OVF;
+                            }
+                        }
+                        else
+                        {
+                            if (INT64(INT32(itemp)) != INT64(INT32(i1)) + INT64(INT32(i2)))
+                            {
+                                goto INT_OVF;
+                            }
+                        }
+                    }
+                    i1       = itemp;
+                    fieldSeq = GetFieldSeqStore()->Append(op1->gtIntCon.gtFieldSeq, op2->gtIntCon.gtFieldSeq);
+                    break;
+                case GT_SUB:
+                    itemp = i1 - i2;
+                    if (tree->gtOverflow())
+                    {
+                        if (tree->gtFlags & GTF_UNSIGNED)
+                        {
+                            if (INT64(UINT32(itemp)) != ((INT64)((UINT32)i1) - (INT64)((UINT32)i2)))
+                            {
+                                goto INT_OVF;
+                            }
+                        }
+                        else
+                        {
+                            if (INT64(INT32(itemp)) != INT64(INT32(i1)) - INT64(INT32(i2)))
+                            {
+                                goto INT_OVF;
+                            }
+                        }
+                    }
+                    i1 = itemp;
+                    break;
+                case GT_MUL:
+                    itemp = i1 * i2;
+                    if (tree->gtOverflow())
+                    {
+                        if (tree->gtFlags & GTF_UNSIGNED)
+                        {
+                            if (INT64(UINT32(itemp)) != ((INT64)((UINT32)i1) * (INT64)((UINT32)i2)))
+                            {
+                                goto INT_OVF;
+                            }
+                        }
+                        else
+                        {
+                            if (INT64(INT32(itemp)) != INT64(INT32(i1)) * INT64(INT32(i2)))
+                            {
+                                goto INT_OVF;
+                            }
+                        }
+                    }
+                    // For the very particular case of the "constant array index" pseudo-field, we
+                    // assume that multiplication is by the field width, and preserves that field.
+                    // This could obviously be made more robust by a more complicated set of annotations...
+                    if ((op1->gtIntCon.gtFieldSeq != nullptr) && op1->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
+                    {
+                        assert(op2->gtIntCon.gtFieldSeq == FieldSeqStore::NotAField());
+                        fieldSeq = op1->gtIntCon.gtFieldSeq;
+                    }
+                    else if ((op2->gtIntCon.gtFieldSeq != nullptr) &&
+                             op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
+                    {
+                        assert(op1->gtIntCon.gtFieldSeq == FieldSeqStore::NotAField());
+                        fieldSeq = op2->gtIntCon.gtFieldSeq;
+                    }
+                    i1 = itemp;
+                    break;
+
+                case GT_OR:
+                    i1 |= i2;
+                    break;
+                case GT_XOR:
+                    i1 ^= i2;
+                    break;
+                case GT_AND:
+                    i1 &= i2;
+                    break;
+
+                case GT_LSH:
+                    i1 <<= (i2 & 0x1f);
+                    break;
+                case GT_RSH:
+                    i1 >>= (i2 & 0x1f);
+                    break;
+                case GT_RSZ:
+                    /* logical shift -> make it unsigned to not propagate the sign bit */
+                    i1 = UINT32(i1) >> (i2 & 0x1f);
+                    break;
+                case GT_ROL:
+                    i1 = (i1 << (i2 & 0x1f)) | (UINT32(i1) >> ((32 - i2) & 0x1f));
+                    break;
+                case GT_ROR:
+                    i1 = (i1 << ((32 - i2) & 0x1f)) | (UINT32(i1) >> (i2 & 0x1f));
+                    break;
+
+                /* DIV and MOD can generate an INT 0 - if division by 0
+                 * or overflow - when dividing MIN by -1 */
+
+                case GT_DIV:
+                case GT_MOD:
+                case GT_UDIV:
+                case GT_UMOD:
+                    if (INT32(i2) == 0)
+                    {
+                        // Division by zero:
+                        // We have to evaluate this expression and throw an exception
+                        return tree;
+                    }
+                    else if ((INT32(i2) == -1) && (UINT32(i1) == 0x80000000))
+                    {
+                        // Overflow Division:
+                        // We have to evaluate this expression and throw an exception
+                        return tree;
+                    }
+
+                    if (tree->gtOper == GT_DIV)
+                    {
+                        i1 = INT32(i1) / INT32(i2);
+                    }
+                    else if (tree->gtOper == GT_MOD)
+                    {
+                        i1 = INT32(i1) % INT32(i2);
+                    }
+                    else if (tree->gtOper == GT_UDIV)
+                    {
+                        i1 = UINT32(i1) / UINT32(i2);
+                    }
+                    else
+                    {
+                        assert(tree->gtOper == GT_UMOD);
+                        i1 = UINT32(i1) % UINT32(i2);
+                    }
+                    break;
+
+                default:
+                    return tree;
+            }
+
+        /* We get here after folding to a GT_CNS_INT type
+         * change the node to the new type / value and make sure the node sizes are OK */
+        CNS_INT:
+        FOLD_COND:
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("\nFolding operator with constant nodes into a constant:\n");
+                gtDispTree(tree);
+            }
+#endif
+
+#ifdef _TARGET_64BIT_
+            // we need to properly re-sign-extend or truncate as needed.
+            if (tree->gtFlags & GTF_UNSIGNED)
+            {
+                i1 = UINT32(i1);
+            }
+            else
+            {
+                i1 = INT32(i1);
+            }
+#endif // _TARGET_64BIT_
+
+            /* Also all conditional folding jumps here since the node hanging from
+             * GT_JTRUE has to be a GT_CNS_INT - value 0 or 1 */
+
+            tree->ChangeOperConst(GT_CNS_INT);
+            tree->gtType              = TYP_INT;
+            tree->gtIntCon.gtIconVal  = i1;
+            tree->gtIntCon.gtFieldSeq = fieldSeq;
+            if (vnStore != nullptr)
+            {
+                fgValueNumberTreeConst(tree);
+            }
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("Bashed to int constant:\n");
+                gtDispTree(tree);
+            }
+#endif
+            goto DONE;
+
+        /* This operation is going to cause an overflow exception. Morph into
+           an overflow helper. Put a dummy constant value for code generation.
+
+           We could remove all subsequent trees in the current basic block,
+           unless this node is a child of GT_COLON
+
+           NOTE: Since the folded value is not constant we should not change the
+                 "tree" node - otherwise we confuse the logic that checks if the folding
+                 was successful - instead use one of the operands, e.g. op1
+         */
+
+        LNG_OVF:
+            // Don't fold overflow operations if not global morph phase.
+            // The reason for this is that this optimization is replacing a gentree node
+            // with another new gentree node. Say a GT_CALL(arglist) has one 'arg'
+            // involving overflow arithmetic.  During assertion prop, it is possible
+            // that the 'arg' could be constant folded and the result could lead to an
+            // overflow.  In such a case 'arg' will get replaced with GT_COMMA node
+            // but fgMorphArgs() - see the logic around "if(lateArgsComputed)" - doesn't
+            // update args table. For this reason this optimization is enabled only
+            // for global morphing phase.
+            //
+            // X86/Arm32 legacy codegen note: This is not an issue on x86 for the reason that
+            // it doesn't use arg table for calls.  In addition x86/arm32 legacy codegen doesn't
+            // expect long constants to show up as an operand of overflow cast operation.
+            //
+            // TODO-CQ: Once fgMorphArgs() is fixed this restriction could be removed.
+            CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef LEGACY_BACKEND
+            if (!fgGlobalMorph)
+            {
+                assert(tree->gtOverflow());
+                return tree;
+            }
+#endif // !LEGACY_BACKEND
+
+            op1 = gtNewLconNode(0);
+            if (vnStore != nullptr)
+            {
+                op1->gtVNPair.SetBoth(vnStore->VNZeroForType(TYP_LONG));
+            }
+            goto OVF;
+
+        INT_OVF:
+#ifndef LEGACY_BACKEND
+            // Don't fold overflow operations if not global morph phase.
+            // The reason for this is that this optimization is replacing a gentree node
+            // with another new gentree node. Say a GT_CALL(arglist) has one 'arg'
+            // involving overflow arithmetic.  During assertion prop, it is possible
+            // that the 'arg' could be constant folded and the result could lead to an
+            // overflow.  In such a case 'arg' will get replaced with GT_COMMA node
+            // but fgMorphArgs() - see the logic around "if(lateArgsComputed)" - doesn't
+            // update args table. For this reason this optimization is enabled only
+            // for global morphing phase.
+            //
+            // X86/Arm32 legacy codegen note: This is not an issue on x86 for the reason that
+            // it doesn't use arg table for calls.  In addition x86/arm32 legacy codegen doesn't
+            // expect long constants to show up as an operand of overflow cast operation.
+            //
+            // TODO-CQ: Once fgMorphArgs() is fixed this restriction could be removed.
+
+            if (!fgGlobalMorph)
+            {
+                assert(tree->gtOverflow());
+                return tree;
+            }
+#endif // !LEGACY_BACKEND
+
+            op1 = gtNewIconNode(0);
+            if (vnStore != nullptr)
+            {
+                op1->gtVNPair.SetBoth(vnStore->VNZeroForType(TYP_INT));
+            }
+            goto OVF;
+
+        OVF:
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("\nFolding binary operator with constant nodes into a comma throw:\n");
+                gtDispTree(tree);
+            }
+#endif
+            /* We will change the cast to a GT_COMMA and attach the exception helper as gtOp.gtOp1.
+             * The constant expression zero becomes op2. */
+
+            assert(tree->gtOverflow());
+            assert(tree->gtOper == GT_ADD || tree->gtOper == GT_SUB || tree->gtOper == GT_CAST ||
+                   tree->gtOper == GT_MUL);
+            assert(op1);
+
+            op2 = op1;
+            op1 = gtNewHelperCallNode(CORINFO_HELP_OVERFLOW, TYP_VOID, GTF_EXCEPT,
+                                      gtNewArgList(gtNewIconNode(compCurBB->bbTryIndex)));
+
+            if (vnStore != nullptr)
+            {
+                op1->gtVNPair =
+                    vnStore->VNPWithExc(ValueNumPair(ValueNumStore::VNForVoid(), ValueNumStore::VNForVoid()),
+                                        vnStore->VNPExcSetSingleton(vnStore->VNPairForFunc(TYP_REF, VNF_OverflowExc)));
+            }
+
+            tree = gtNewOperNode(GT_COMMA, tree->gtType, op1, op2);
+
+            return tree;
+
+        /*-------------------------------------------------------------------------
+         * Fold constant LONG binary operator
+         */
+
+        case TYP_LONG:
+
+            // No GC pointer types should be folded here...
+            //
+            assert(!varTypeIsGC(op1->gtType) && !varTypeIsGC(op2->gtType));
+
+            // op1 is known to be a TYP_LONG, op2 is normally a TYP_LONG, unless we have a shift operator in which case
+            // it is a TYP_INT
+            //
+            assert((op2->gtType == TYP_LONG) || (op2->gtType == TYP_INT));
+
+            assert(op1->gtIntConCommon.ImmedValCanBeFolded(this, tree->OperGet()));
+            assert(op2->gtIntConCommon.ImmedValCanBeFolded(this, tree->OperGet()));
+
+            lval1 = op1->gtIntConCommon.LngValue();
+
+            // For the shift operators we can have a op2 that is a TYP_INT and thus will be GT_CNS_INT
+            if (op2->OperGet() == GT_CNS_INT)
+            {
+                lval2 = op2->gtIntConCommon.IconValue();
+            }
+            else
+            {
+                lval2 = op2->gtIntConCommon.LngValue();
+            }
+
+            switch (tree->gtOper)
+            {
+                case GT_EQ:
+                    i1 = (lval1 == lval2);
+                    goto FOLD_COND;
+                case GT_NE:
+                    i1 = (lval1 != lval2);
+                    goto FOLD_COND;
+
+                case GT_LT:
+                    if (tree->gtFlags & GTF_UNSIGNED)
+                    {
+                        i1 = (UINT64(lval1) < UINT64(lval2));
+                    }
+                    else
+                    {
+                        i1 = (lval1 < lval2);
+                    }
+                    goto FOLD_COND;
+
+                case GT_LE:
+                    if (tree->gtFlags & GTF_UNSIGNED)
+                    {
+                        i1 = (UINT64(lval1) <= UINT64(lval2));
+                    }
+                    else
+                    {
+                        i1 = (lval1 <= lval2);
+                    }
+                    goto FOLD_COND;
+
+                case GT_GE:
+                    if (tree->gtFlags & GTF_UNSIGNED)
+                    {
+                        i1 = (UINT64(lval1) >= UINT64(lval2));
+                    }
+                    else
+                    {
+                        i1 = (lval1 >= lval2);
+                    }
+                    goto FOLD_COND;
+
+                case GT_GT:
+                    if (tree->gtFlags & GTF_UNSIGNED)
+                    {
+                        i1 = (UINT64(lval1) > UINT64(lval2));
+                    }
+                    else
+                    {
+                        i1 = (lval1 > lval2);
+                    }
+                    goto FOLD_COND;
+
+                case GT_ADD:
+                    ltemp = lval1 + lval2;
+
+                LNG_ADD_CHKOVF:
+                    /* For the SIGNED case - If there is one positive and one negative operand, there can be no overflow
+                     * If both are positive, the result has to be positive, and similary for negatives.
+                     *
+                     * For the UNSIGNED case - If a UINT32 operand is bigger than the result then OVF */
+
+                    if (tree->gtOverflow())
+                    {
+                        if (tree->gtFlags & GTF_UNSIGNED)
+                        {
+                            if ((UINT64(lval1) > UINT64(ltemp)) || (UINT64(lval2) > UINT64(ltemp)))
+                            {
+                                goto LNG_OVF;
+                            }
+                        }
+                        else if (((lval1 < 0) == (lval2 < 0)) && ((lval1 < 0) != (ltemp < 0)))
+                        {
+                            goto LNG_OVF;
+                        }
+                    }
+                    lval1 = ltemp;
+                    break;
+
+                case GT_SUB:
+                    ltemp = lval1 - lval2;
+                    if (tree->gtOverflow())
+                    {
+                        if (tree->gtFlags & GTF_UNSIGNED)
+                        {
+                            if (UINT64(lval2) > UINT64(lval1))
+                            {
+                                goto LNG_OVF;
+                            }
+                        }
+                        else
+                        {
+                            /* If both operands are +ve or both are -ve, there can be no
+                               overflow. Else use the logic for : lval1 + (-lval2) */
+
+                            if ((lval1 < 0) != (lval2 < 0))
+                            {
+                                if (lval2 == INT64_MIN)
+                                {
+                                    goto LNG_OVF;
+                                }
+                                lval2 = -lval2;
+                                goto LNG_ADD_CHKOVF;
+                            }
+                        }
+                    }
+                    lval1 = ltemp;
+                    break;
+
+                case GT_MUL:
+                    ltemp = lval1 * lval2;
+
+                    if (tree->gtOverflow() && lval2 != 0)
+                    {
+
+                        if (tree->gtFlags & GTF_UNSIGNED)
+                        {
+                            UINT64 ultemp = ltemp;
+                            UINT64 ulval1 = lval1;
+                            UINT64 ulval2 = lval2;
+                            if ((ultemp / ulval2) != ulval1)
+                            {
+                                goto LNG_OVF;
+                            }
+                        }
+                        else
+                        {
+                            // This does a multiply and then reverses it.  This test works great except for MIN_INT *
+                            //-1.  In that case we mess up the sign on ltmp.  Make sure to double check the sign.
+                            // if either is 0, then no overflow
+                            if (lval1 != 0) // lval2 checked above.
+                            {
+                                if (((lval1 < 0) == (lval2 < 0)) && (ltemp < 0))
+                                {
+                                    goto LNG_OVF;
+                                }
+                                if (((lval1 < 0) != (lval2 < 0)) && (ltemp > 0))
+                                {
+                                    goto LNG_OVF;
+                                }
+
+                                // TODO-Amd64-Unix: Remove the code that disables optimizations for this method when the
+                                // clang
+                                // optimizer is fixed and/or the method implementation is refactored in a simpler code.
+                                // There is a bug in the clang-3.5 optimizer. The issue is that in release build the
+                                // optimizer is mistyping (or just wrongly decides to use 32 bit operation for a corner
+                                // case of MIN_LONG) the args of the (ltemp / lval2) to int (it does a 32 bit div
+                                // operation instead of 64 bit.). For the case of lval1 and lval2 equal to MIN_LONG
+                                // (0x8000000000000000) this results in raising a SIGFPE.
+                                // Optimizations disabled for now. See compiler.h.
+                                if ((ltemp / lval2) != lval1)
+                                {
+                                    goto LNG_OVF;
+                                }
+                            }
+                        }
+                    }
+
+                    lval1 = ltemp;
+                    break;
+
+                case GT_OR:
+                    lval1 |= lval2;
+                    break;
+                case GT_XOR:
+                    lval1 ^= lval2;
+                    break;
+                case GT_AND:
+                    lval1 &= lval2;
+                    break;
+
+                case GT_LSH:
+                    lval1 <<= (lval2 & 0x3f);
+                    break;
+                case GT_RSH:
+                    lval1 >>= (lval2 & 0x3f);
+                    break;
+                case GT_RSZ:
+                    /* logical shift -> make it unsigned to not propagate the sign bit */
+                    lval1 = UINT64(lval1) >> (lval2 & 0x3f);
+                    break;
+                case GT_ROL:
+                    lval1 = (lval1 << (lval2 & 0x3f)) | (UINT64(lval1) >> ((64 - lval2) & 0x3f));
+                    break;
+                case GT_ROR:
+                    lval1 = (lval1 << ((64 - lval2) & 0x3f)) | (UINT64(lval1) >> (lval2 & 0x3f));
+                    break;
+
+                // Both DIV and IDIV on x86 raise an exception for min_int (and min_long) / -1.  So we preserve
+                // that behavior here.
+                case GT_DIV:
+                    if (!lval2)
+                    {
+                        return tree;
+                    }
+
+                    if (UINT64(lval1) == UI64(0x8000000000000000) && lval2 == INT64(-1))
+                    {
+                        return tree;
+                    }
+                    lval1 /= lval2;
+                    break;
+
+                case GT_MOD:
+                    if (!lval2)
+                    {
+                        return tree;
+                    }
+                    if (UINT64(lval1) == UI64(0x8000000000000000) && lval2 == INT64(-1))
+                    {
+                        return tree;
+                    }
+                    lval1 %= lval2;
+                    break;
+
+                case GT_UDIV:
+                    if (!lval2)
+                    {
+                        return tree;
+                    }
+                    if (UINT64(lval1) == UI64(0x8000000000000000) && lval2 == INT64(-1))
+                    {
+                        return tree;
+                    }
+                    lval1 = UINT64(lval1) / UINT64(lval2);
+                    break;
+
+                case GT_UMOD:
+                    if (!lval2)
+                    {
+                        return tree;
+                    }
+                    if (UINT64(lval1) == UI64(0x8000000000000000) && lval2 == INT64(-1))
+                    {
+                        return tree;
+                    }
+                    lval1 = UINT64(lval1) % UINT64(lval2);
+                    break;
+                default:
+                    return tree;
+            }
+
+        CNS_LONG:
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("\nFolding long operator with constant nodes into a constant:\n");
+                gtDispTree(tree);
+            }
+#endif
+            assert((GenTree::s_gtNodeSizes[GT_CNS_NATIVELONG] == TREE_NODE_SZ_SMALL) ||
+                   (tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE));
+
+            tree->ChangeOperConst(GT_CNS_NATIVELONG);
+            tree->gtIntConCommon.SetLngValue(lval1);
+            if (vnStore != nullptr)
+            {
+                fgValueNumberTreeConst(tree);
+            }
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("Bashed to long constant:\n");
+                gtDispTree(tree);
+            }
+#endif
+            goto DONE;
+
+        /*-------------------------------------------------------------------------
+         * Fold constant FLOAT or DOUBLE binary operator
+         */
+
+        case TYP_FLOAT:
+        case TYP_DOUBLE:
+
+            if (tree->gtOverflowEx())
+            {
+                return tree;
+            }
+
+            assert(op1->gtOper == GT_CNS_DBL);
+            d1 = op1->gtDblCon.gtDconVal;
+
+            assert(varTypeIsFloating(op2->gtType));
+            assert(op2->gtOper == GT_CNS_DBL);
+            d2 = op2->gtDblCon.gtDconVal;
+
+            /* Special case - check if we have NaN operands.
+             * For comparisons if not an unordered operation always return 0.
+             * For unordered operations (i.e. the GTF_RELOP_NAN_UN flag is set)
+             * the result is always true - return 1. */
+
+            if (_isnan(d1) || _isnan(d2))
+            {
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("Double operator(s) is NaN\n");
+                }
+#endif
+                if (tree->OperKind() & GTK_RELOP)
+                {
+                    if (tree->gtFlags & GTF_RELOP_NAN_UN)
+                    {
+                        /* Unordered comparison with NaN always succeeds */
+                        i1 = 1;
+                        goto FOLD_COND;
+                    }
+                    else
+                    {
+                        /* Normal comparison with NaN always fails */
+                        i1 = 0;
+                        goto FOLD_COND;
+                    }
+                }
+            }
+
+            switch (tree->gtOper)
+            {
+                case GT_EQ:
+                    i1 = (d1 == d2);
+                    goto FOLD_COND;
+                case GT_NE:
+                    i1 = (d1 != d2);
+                    goto FOLD_COND;
+
+                case GT_LT:
+                    i1 = (d1 < d2);
+                    goto FOLD_COND;
+                case GT_LE:
+                    i1 = (d1 <= d2);
+                    goto FOLD_COND;
+                case GT_GE:
+                    i1 = (d1 >= d2);
+                    goto FOLD_COND;
+                case GT_GT:
+                    i1 = (d1 > d2);
+                    goto FOLD_COND;
+
+#if FEATURE_STACK_FP_X87
+                case GT_ADD:
+                    d1 += d2;
+                    break;
+                case GT_SUB:
+                    d1 -= d2;
+                    break;
+                case GT_MUL:
+                    d1 *= d2;
+                    break;
+                case GT_DIV:
+                    if (!d2)
+                        return tree;
+                    d1 /= d2;
+                    break;
+#else  //! FEATURE_STACK_FP_X87
+                // non-x86 arch: floating point arithmetic should be done in declared
+                // precision while doing constant folding. For this reason though TYP_FLOAT
+                // constants are stored as double constants, while performing float arithmetic,
+                // double constants should be converted to float.  Here is an example case
+                // where performing arithmetic in double precision would lead to incorrect
+                // results.
+                //
+                // Example:
+                // float a = float.MaxValue;
+                // float b = a*a;   This will produce +inf in single precision and 1.1579207543382391e+077 in double
+                //                  precision.
+                // flaot c = b/b;   This will produce NaN in single precision and 1 in double precision.
+                case GT_ADD:
+                    if (op1->TypeGet() == TYP_FLOAT)
+                    {
+                        f1 = forceCastToFloat(d1);
+                        f2 = forceCastToFloat(d2);
+                        d1 = f1 + f2;
+                    }
+                    else
+                    {
+                        d1 += d2;
+                    }
+                    break;
+
+                case GT_SUB:
+                    if (op1->TypeGet() == TYP_FLOAT)
+                    {
+                        f1 = forceCastToFloat(d1);
+                        f2 = forceCastToFloat(d2);
+                        d1 = f1 - f2;
+                    }
+                    else
+                    {
+                        d1 -= d2;
+                    }
+                    break;
+
+                case GT_MUL:
+                    if (op1->TypeGet() == TYP_FLOAT)
+                    {
+                        f1 = forceCastToFloat(d1);
+                        f2 = forceCastToFloat(d2);
+                        d1 = f1 * f2;
+                    }
+                    else
+                    {
+                        d1 *= d2;
+                    }
+                    break;
+
+                case GT_DIV:
+                    if (!d2)
+                    {
+                        return tree;
+                    }
+                    if (op1->TypeGet() == TYP_FLOAT)
+                    {
+                        f1 = forceCastToFloat(d1);
+                        f2 = forceCastToFloat(d2);
+                        d1 = f1 / f2;
+                    }
+                    else
+                    {
+                        d1 /= d2;
+                    }
+                    break;
+#endif //! FEATURE_STACK_FP_X87
+
+                default:
+                    return tree;
+            }
+
+        CNS_DOUBLE:
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("\nFolding fp operator with constant nodes into a fp constant:\n");
+                gtDispTree(tree);
+            }
+#endif
+
+            assert((GenTree::s_gtNodeSizes[GT_CNS_DBL] == TREE_NODE_SZ_SMALL) ||
+                   (tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE));
+
+            tree->ChangeOperConst(GT_CNS_DBL);
+            tree->gtDblCon.gtDconVal = d1;
+            if (vnStore != nullptr)
+            {
+                fgValueNumberTreeConst(tree);
+            }
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("Bashed to fp constant:\n");
+                gtDispTree(tree);
+            }
+#endif
+            goto DONE;
+
+        default:
+            /* not a foldable typ */
+            return tree;
+    }
+
+//-------------------------------------------------------------------------
+
+DONE:
+
+    /* Make sure no side effect flags are set on this constant node */
+
+    tree->gtFlags &= ~GTF_ALL_EFFECT;
+
+    return tree;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************
+ *
+ *  Create an assignment of the given value to a temp.
+ */
+
+GenTreePtr Compiler::gtNewTempAssign(unsigned tmp, GenTreePtr val)
+{
+    LclVarDsc* varDsc = lvaTable + tmp;
+
+    if (varDsc->TypeGet() == TYP_I_IMPL && val->TypeGet() == TYP_BYREF)
+    {
+        impBashVarAddrsToI(val);
+    }
+
+    var_types valTyp = val->TypeGet();
+    if (val->OperGet() == GT_LCL_VAR && lvaTable[val->gtLclVar.gtLclNum].lvNormalizeOnLoad())
+    {
+        valTyp = lvaGetRealType(val->gtLclVar.gtLclNum);
+        val    = gtNewLclvNode(val->gtLclVar.gtLclNum, valTyp, val->gtLclVar.gtLclILoffs);
+    }
+    var_types dstTyp = varDsc->TypeGet();
+
+    /* If the variable's lvType is not yet set then set it here */
+    if (dstTyp == TYP_UNDEF)
+    {
+        varDsc->lvType = dstTyp = genActualType(valTyp);
+        if (varTypeIsGC(dstTyp))
+        {
+            varDsc->lvStructGcCount = 1;
+        }
+#if FEATURE_SIMD
+        else if (varTypeIsSIMD(dstTyp))
+        {
+            varDsc->lvSIMDType = 1;
+        }
+#endif
+    }
+
+#ifdef DEBUG
+    /* Make sure the actual types match               */
+    if (genActualType(valTyp) != genActualType(dstTyp))
+    {
+        // Plus some other exceptions that are apparently legal:
+        // 1) TYP_REF or BYREF = TYP_I_IMPL
+        bool ok = false;
+        if (varTypeIsGC(dstTyp) && (valTyp == TYP_I_IMPL))
+        {
+            ok = true;
+        }
+        // 2) TYP_DOUBLE = TYP_FLOAT or TYP_FLOAT = TYP_DOUBLE
+        else if (varTypeIsFloating(dstTyp) && varTypeIsFloating(valTyp))
+        {
+            ok = true;
+        }
+
+        if (!ok)
+        {
+            gtDispTree(val);
+            assert(!"Incompatible types for gtNewTempAssign");
+        }
+    }
+#endif
+
+    // Floating Point assignments can be created during inlining
+    // see "Zero init inlinee locals:" in fgInlinePrependStatements
+    // thus we may need to set compFloatingPointUsed to true here.
+    //
+    if (varTypeIsFloating(dstTyp) && (compFloatingPointUsed == false))
+    {
+        compFloatingPointUsed = true;
+    }
+
+    /* Create the assignment node */
+
+    GenTreePtr asg;
+    GenTreePtr dest = gtNewLclvNode(tmp, dstTyp);
+    dest->gtFlags |= GTF_VAR_DEF;
+
+    // With first-class structs, we should be propagating the class handle on all non-primitive
+    // struct types. We don't have a convenient way to do that for all SIMD temps, since some
+    // internal trees use SIMD types that are not used by the input IL. In this case, we allow
+    // a null type handle and derive the necessary information about the type from its varType.
+    CORINFO_CLASS_HANDLE structHnd = gtGetStructHandleIfPresent(val);
+    if (varTypeIsStruct(valTyp) && ((structHnd != NO_CLASS_HANDLE) || (varTypeIsSIMD(valTyp))))
+    {
+        // The GT_OBJ may be be a child of a GT_COMMA.
+        GenTreePtr valx = val->gtEffectiveVal(/*commaOnly*/ true);
+
+        if (valx->gtOper == GT_OBJ)
+        {
+            assert(structHnd != nullptr);
+            lvaSetStruct(tmp, structHnd, false);
+        }
+        dest->gtFlags |= GTF_DONT_CSE;
+        valx->gtFlags |= GTF_DONT_CSE;
+        asg = impAssignStruct(dest, val, structHnd, (unsigned)CHECK_SPILL_NONE);
+    }
+    else
+    {
+        asg = gtNewAssignNode(dest, val);
+    }
+
+#ifndef LEGACY_BACKEND
+    if (compRationalIRForm)
+    {
+        Rationalizer::RewriteAssignmentIntoStoreLcl(asg->AsOp());
+    }
+#endif // !LEGACY_BACKEND
+
+    return asg;
+}
+
+/*****************************************************************************
+ *
+ *  Create a helper call to access a COM field (iff 'assg' is non-zero this is
+ *  an assignment and 'assg' is the new value).
+ */
+
+GenTreePtr Compiler::gtNewRefCOMfield(GenTreePtr              objPtr,
+                                      CORINFO_RESOLVED_TOKEN* pResolvedToken,
+                                      CORINFO_ACCESS_FLAGS    access,
+                                      CORINFO_FIELD_INFO*     pFieldInfo,
+                                      var_types               lclTyp,
+                                      CORINFO_CLASS_HANDLE    structType,
+                                      GenTreePtr              assg)
+{
+    assert(pFieldInfo->fieldAccessor == CORINFO_FIELD_INSTANCE_HELPER ||
+           pFieldInfo->fieldAccessor == CORINFO_FIELD_INSTANCE_ADDR_HELPER ||
+           pFieldInfo->fieldAccessor == CORINFO_FIELD_STATIC_ADDR_HELPER);
+
+    /* If we can't access it directly, we need to call a helper function */
+    GenTreeArgList* args       = nullptr;
+    var_types       helperType = TYP_BYREF;
+
+    if (pFieldInfo->fieldAccessor == CORINFO_FIELD_INSTANCE_HELPER)
+    {
+        if (access & CORINFO_ACCESS_SET)
+        {
+            assert(assg != nullptr);
+            // helper needs pointer to struct, not struct itself
+            if (pFieldInfo->helper == CORINFO_HELP_SETFIELDSTRUCT)
+            {
+                assert(structType != nullptr);
+                assg = impGetStructAddr(assg, structType, (unsigned)CHECK_SPILL_ALL, true);
+            }
+            else if (lclTyp == TYP_DOUBLE && assg->TypeGet() == TYP_FLOAT)
+            {
+                assg = gtNewCastNode(TYP_DOUBLE, assg, TYP_DOUBLE);
+            }
+            else if (lclTyp == TYP_FLOAT && assg->TypeGet() == TYP_DOUBLE)
+            {
+                assg = gtNewCastNode(TYP_FLOAT, assg, TYP_FLOAT);
+            }
+
+            args       = gtNewArgList(assg);
+            helperType = TYP_VOID;
+        }
+        else if (access & CORINFO_ACCESS_GET)
+        {
+            helperType = lclTyp;
+
+            // The calling convention for the helper does not take into
+            // account optimization of primitive structs.
+            if ((pFieldInfo->helper == CORINFO_HELP_GETFIELDSTRUCT) && !varTypeIsStruct(lclTyp))
+            {
+                helperType = TYP_STRUCT;
+            }
+        }
+    }
+
+    if (pFieldInfo->helper == CORINFO_HELP_GETFIELDSTRUCT || pFieldInfo->helper == CORINFO_HELP_SETFIELDSTRUCT)
+    {
+        assert(pFieldInfo->structType != nullptr);
+        args = gtNewListNode(gtNewIconEmbClsHndNode(pFieldInfo->structType), args);
+    }
+
+    GenTreePtr fieldHnd = impTokenToHandle(pResolvedToken);
+    if (fieldHnd == nullptr)
+    { // compDonotInline()
+        return nullptr;
+    }
+
+    args = gtNewListNode(fieldHnd, args);
+
+    // If it's a static field, we shouldn't have an object node
+    // If it's an instance field, we have an object node
+    assert((pFieldInfo->fieldAccessor != CORINFO_FIELD_STATIC_ADDR_HELPER) ^ (objPtr == nullptr));
+
+    if (objPtr != nullptr)
+    {
+        args = gtNewListNode(objPtr, args);
+    }
+
+    GenTreePtr tree = gtNewHelperCallNode(pFieldInfo->helper, genActualType(helperType), 0, args);
+
+    if (pFieldInfo->fieldAccessor == CORINFO_FIELD_INSTANCE_HELPER)
+    {
+        if (access & CORINFO_ACCESS_GET)
+        {
+            if (pFieldInfo->helper == CORINFO_HELP_GETFIELDSTRUCT)
+            {
+                if (!varTypeIsStruct(lclTyp))
+                {
+                    // get the result as primitive type
+                    tree = impGetStructAddr(tree, structType, (unsigned)CHECK_SPILL_ALL, true);
+                    tree = gtNewOperNode(GT_IND, lclTyp, tree);
+                }
+            }
+            else if (varTypeIsIntegral(lclTyp) && genTypeSize(lclTyp) < genTypeSize(TYP_INT))
+            {
+                // The helper does not extend the small return types.
+                tree = gtNewCastNode(genActualType(lclTyp), tree, lclTyp);
+            }
+        }
+    }
+    else
+    {
+        // OK, now do the indirection
+        if (access & CORINFO_ACCESS_GET)
+        {
+            if (varTypeIsStruct(lclTyp))
+            {
+                tree = gtNewObjNode(structType, tree);
+            }
+            else
+            {
+                tree = gtNewOperNode(GT_IND, lclTyp, tree);
+            }
+            tree->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF);
+        }
+        else if (access & CORINFO_ACCESS_SET)
+        {
+            if (varTypeIsStruct(lclTyp))
+            {
+                tree = impAssignStructPtr(tree, assg, structType, (unsigned)CHECK_SPILL_ALL);
+            }
+            else
+            {
+                tree = gtNewOperNode(GT_IND, lclTyp, tree);
+                tree->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
+                tree = gtNewAssignNode(tree, assg);
+            }
+        }
+    }
+
+    return (tree);
+}
+
+/*****************************************************************************
+ *
+ *  Return true if the given node (excluding children trees) contains side effects.
+ *  Note that it does not recurse, and children need to be handled separately.
+ *  It may return false even if the node has GTF_SIDE_EFFECT (because of its children).
+ *
+ *  Similar to OperMayThrow() (but handles GT_CALLs specially), but considers
+ *  assignments too.
+ */
+
+bool Compiler::gtNodeHasSideEffects(GenTreePtr tree, unsigned flags)
+{
+    if (flags & GTF_ASG)
+    {
+        if ((tree->OperKind() & GTK_ASGOP))
+        {
+            return true;
+        }
+    }
+
+    // Are there only GTF_CALL side effects remaining? (and no other side effect kinds)
+    if (flags & GTF_CALL)
+    {
+        if (tree->OperGet() == GT_CALL)
+        {
+            // Generally all GT_CALL nodes are considered to have side-effects.
+            // But we may have a helper call that doesn't have any important side effects.
+            //
+            if (tree->gtCall.gtCallType == CT_HELPER)
+            {
+                // But if this tree is a helper call we may not care about the side-effects
+                //
+                CorInfoHelpFunc helper = eeGetHelperNum(tree->AsCall()->gtCallMethHnd);
+
+                // We definitely care about the side effects if MutatesHeap is true
+                //
+                if (s_helperCallProperties.MutatesHeap(helper))
+                {
+                    return true;
+                }
+
+                // with GTF_IS_IN_CSE we will CSE helper calls that can run cctors.
+                //
+                if (((flags & GTF_IS_IN_CSE) == 0) && (s_helperCallProperties.MayRunCctor(helper)))
+                {
+                    return true;
+                }
+
+                // If we also care about exceptions then check if the helper can throw
+                //
+                if (((flags & GTF_EXCEPT) != 0) && !s_helperCallProperties.NoThrow(helper))
+                {
+                    return true;
+                }
+
+                // If this is a Pure helper call or an allocator (that will not need to run a finalizer)
+                // then we don't need to preserve the side effects (of this call -- we may care about those of the
+                // arguments).
+                if (s_helperCallProperties.IsPure(helper) ||
+                    (s_helperCallProperties.IsAllocator(helper) && !s_helperCallProperties.MayFinalize(helper)))
+                {
+                    GenTreeCall* call = tree->AsCall();
+                    for (GenTreeArgList* args = call->gtCallArgs; args != nullptr; args = args->Rest())
+                    {
+                        if (gtTreeHasSideEffects(args->Current(), flags))
+                        {
+                            return true;
+                        }
+                    }
+                    // I'm a little worried that args that assign to temps that are late args will look like
+                    // side effects...but better to be conservative for now.
+                    for (GenTreeArgList* args = call->gtCallLateArgs; args != nullptr; args = args->Rest())
+                    {
+                        if (gtTreeHasSideEffects(args->Current(), flags))
+                        {
+                            return true;
+                        }
+                    }
+                    // Otherwise:
+                    return false;
+                }
+            }
+
+            // Otherwise the GT_CALL is considered to have side-effects.
+            return true;
+        }
+    }
+
+    if (flags & GTF_EXCEPT)
+    {
+        if (tree->OperMayThrow())
+        {
+            return true;
+        }
+    }
+
+    // Expressions declared as CSE by (e.g.) hoisting code are considered to have relevant side
+    // effects (if we care about GTF_MAKE_CSE).
+    if ((flags & GTF_MAKE_CSE) && (tree->gtFlags & GTF_MAKE_CSE))
+    {
+        return true;
+    }
+
+    return false;
+}
+
+/*****************************************************************************
+ * Returns true if the expr tree has any side effects.
+ */
+
+bool Compiler::gtTreeHasSideEffects(GenTreePtr tree, unsigned flags /* = GTF_SIDE_EFFECT*/)
+{
+    // These are the side effect flags that we care about for this tree
+    unsigned sideEffectFlags = tree->gtFlags & flags;
+
+    // Does this tree have any Side-effect flags set that we care about?
+    if (sideEffectFlags == 0)
+    {
+        // no it doesn't..
+        return false;
+    }
+
+    if (sideEffectFlags == GTF_CALL)
+    {
+        if (tree->OperGet() == GT_CALL)
+        {
+            // Generally all trees that contain GT_CALL nodes are considered to have side-effects.
+            //
+            if (tree->gtCall.gtCallType == CT_HELPER)
+            {
+                // If this node is a helper call we may not care about the side-effects.
+                // Note that gtNodeHasSideEffects checks the side effects of the helper itself
+                // as well as the side effects of its arguments.
+                return gtNodeHasSideEffects(tree, flags);
+            }
+        }
+        else if (tree->OperGet() == GT_INTRINSIC)
+        {
+            if (gtNodeHasSideEffects(tree, flags))
+            {
+                return true;
+            }
+
+            if (gtNodeHasSideEffects(tree->gtOp.gtOp1, flags))
+            {
+                return true;
+            }
+
+            if ((tree->gtOp.gtOp2 != nullptr) && gtNodeHasSideEffects(tree->gtOp.gtOp2, flags))
+            {
+                return true;
+            }
+
+            return false;
+        }
+    }
+
+    return true;
+}
+
+GenTreePtr Compiler::gtBuildCommaList(GenTreePtr list, GenTreePtr expr)
+{
+    // 'list' starts off as null,
+    //        and when it is null we haven't started the list yet.
+    //
+    if (list != nullptr)
+    {
+        // Create a GT_COMMA that appends 'expr' in front of the remaining set of expressions in (*list)
+        GenTreePtr result = gtNewOperNode(GT_COMMA, TYP_VOID, expr, list);
+
+        // Set the flags in the comma node
+        result->gtFlags |= (list->gtFlags & GTF_ALL_EFFECT);
+        result->gtFlags |= (expr->gtFlags & GTF_ALL_EFFECT);
+
+        // 'list' and 'expr' should have valuenumbers defined for both or for neither one
+        noway_assert(list->gtVNPair.BothDefined() == expr->gtVNPair.BothDefined());
+
+        // Set the ValueNumber 'gtVNPair' for the new GT_COMMA node
+        //
+        if (expr->gtVNPair.BothDefined())
+        {
+            // The result of a GT_COMMA node is op2, the normal value number is op2vnp
+            // But we also need to include the union of side effects from op1 and op2.
+            // we compute this value into exceptions_vnp.
+            ValueNumPair op1vnp;
+            ValueNumPair op1Xvnp = ValueNumStore::VNPForEmptyExcSet();
+            ValueNumPair op2vnp;
+            ValueNumPair op2Xvnp = ValueNumStore::VNPForEmptyExcSet();
+
+            vnStore->VNPUnpackExc(expr->gtVNPair, &op1vnp, &op1Xvnp);
+            vnStore->VNPUnpackExc(list->gtVNPair, &op2vnp, &op2Xvnp);
+
+            ValueNumPair exceptions_vnp = ValueNumStore::VNPForEmptyExcSet();
+
+            exceptions_vnp = vnStore->VNPExcSetUnion(exceptions_vnp, op1Xvnp);
+            exceptions_vnp = vnStore->VNPExcSetUnion(exceptions_vnp, op2Xvnp);
+
+            result->gtVNPair = vnStore->VNPWithExc(op2vnp, exceptions_vnp);
+        }
+
+        return result;
+    }
+    else
+    {
+        // The 'expr' will start the list of expressions
+        return expr;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Extracts side effects from the given expression
+ *  and appends them to a given list (actually a GT_COMMA list)
+ *  If ignore root is specified, the method doesn't treat the top
+ *  level tree node as having side-effect.
+ */
+
+void Compiler::gtExtractSideEffList(GenTreePtr  expr,
+                                    GenTreePtr* pList,
+                                    unsigned    flags /* = GTF_SIDE_EFFECT*/,
+                                    bool        ignoreRoot /* = false */)
+{
+    assert(expr);
+    assert(expr->gtOper != GT_STMT);
+
+    /* If no side effect in the expression return */
+
+    if (!gtTreeHasSideEffects(expr, flags))
+    {
+        return;
+    }
+
+    genTreeOps oper = expr->OperGet();
+    unsigned   kind = expr->OperKind();
+
+    // Look for any side effects that we care about
+    //
+    if (!ignoreRoot && gtNodeHasSideEffects(expr, flags))
+    {
+        // Add the side effect to the list and return
+        //
+        *pList = gtBuildCommaList(*pList, expr);
+        return;
+    }
+
+    if (kind & GTK_LEAF)
+    {
+        return;
+    }
+
+    if (oper == GT_LOCKADD || oper == GT_XADD || oper == GT_XCHG || oper == GT_CMPXCHG)
+    {
+        // XADD both adds to the memory location and also fetches the old value.  If we only need the side
+        // effect of this instruction, change it into a GT_LOCKADD node (the add only)
+        if (oper == GT_XADD)
+        {
+            expr->gtOper = GT_LOCKADD;
+            expr->gtType = TYP_VOID;
+        }
+
+        // These operations are kind of important to keep
+        *pList = gtBuildCommaList(*pList, expr);
+        return;
+    }
+
+    if (kind & GTK_SMPOP)
+    {
+        GenTreePtr op1 = expr->gtOp.gtOp1;
+        GenTreePtr op2 = expr->gtGetOp2();
+
+        if (flags & GTF_EXCEPT)
+        {
+            // Special case - GT_ADDR of GT_IND nodes of TYP_STRUCT
+            // have to be kept together
+
+            if (oper == GT_ADDR && op1->OperIsIndir() && op1->gtType == TYP_STRUCT)
+            {
+                *pList = gtBuildCommaList(*pList, expr);
+
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("Keep the GT_ADDR and GT_IND together:\n");
+                }
+#endif
+                return;
+            }
+        }
+
+        /* Continue searching for side effects in the subtrees of the expression
+         * NOTE: Be careful to preserve the right ordering - side effects are prepended
+         * to the list */
+
+        /* Continue searching for side effects in the subtrees of the expression
+         * NOTE: Be careful to preserve the right ordering
+         * as side effects are prepended to the list */
+
+        if (expr->gtFlags & GTF_REVERSE_OPS)
+        {
+            assert(oper != GT_COMMA);
+            if (op1)
+            {
+                gtExtractSideEffList(op1, pList, flags);
+            }
+            if (op2)
+            {
+                gtExtractSideEffList(op2, pList, flags);
+            }
+        }
+        else
+        {
+            if (op2)
+            {
+                gtExtractSideEffList(op2, pList, flags);
+            }
+            if (op1)
+            {
+                gtExtractSideEffList(op1, pList, flags);
+            }
+        }
+    }
+
+    if (expr->OperGet() == GT_CALL)
+    {
+        // Generally all GT_CALL nodes are considered to have side-effects.
+        // So if we get here it must be a Helper call that we decided does
+        // not have side effects that we needed to keep
+        //
+        assert(expr->gtCall.gtCallType == CT_HELPER);
+
+        // We can remove this Helper call, but there still could be
+        // side-effects in the arguments that we may need to keep
+        //
+        GenTreePtr args;
+        for (args = expr->gtCall.gtCallArgs; args; args = args->gtOp.gtOp2)
+        {
+            assert(args->IsList());
+            gtExtractSideEffList(args->Current(), pList, flags);
+        }
+        for (args = expr->gtCall.gtCallLateArgs; args; args = args->gtOp.gtOp2)
+        {
+            assert(args->IsList());
+            gtExtractSideEffList(args->Current(), pList, flags);
+        }
+    }
+
+    if (expr->OperGet() == GT_ARR_BOUNDS_CHECK
+#ifdef FEATURE_SIMD
+        || expr->OperGet() == GT_SIMD_CHK
+#endif // FEATURE_SIMD
+        )
+    {
+        gtExtractSideEffList(expr->AsBoundsChk()->gtArrLen, pList, flags);
+        gtExtractSideEffList(expr->AsBoundsChk()->gtIndex, pList, flags);
+    }
+
+    if (expr->OperGet() == GT_DYN_BLK || expr->OperGet() == GT_STORE_DYN_BLK)
+    {
+        if (expr->AsDynBlk()->Data() != nullptr)
+        {
+            gtExtractSideEffList(expr->AsDynBlk()->Data(), pList, flags);
+        }
+        gtExtractSideEffList(expr->AsDynBlk()->Addr(), pList, flags);
+        gtExtractSideEffList(expr->AsDynBlk()->gtDynamicSize, pList, flags);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  For debugging only - displays a tree node list and makes sure all the
+ *  links are correctly set.
+ */
+
+#ifdef DEBUG
+
+void dispNodeList(GenTreePtr list, bool verbose)
+{
+    GenTreePtr last = nullptr;
+    GenTreePtr next;
+
+    if (!list)
+    {
+        return;
+    }
+
+    for (;;)
+    {
+        next = list->gtNext;
+
+        if (verbose)
+        {
+            printf("%08X -> %08X -> %08X\n", last, list, next);
+        }
+
+        assert(!last || last->gtNext == list);
+
+        assert(next == nullptr || next->gtPrev == list);
+
+        if (!next)
+        {
+            break;
+        }
+
+        last = list;
+        list = next;
+    }
+    printf(""); // null string means flush
+}
+
+/*****************************************************************************
+ * Callback to assert that the nodes of a qmark-colon subtree are marked
+ */
+
+/* static */
+Compiler::fgWalkResult Compiler::gtAssertColonCond(GenTreePtr* pTree, fgWalkData* data)
+{
+    assert(data->pCallbackData == nullptr);
+
+    assert((*pTree)->gtFlags & GTF_COLON_COND);
+
+    return WALK_CONTINUE;
+}
+#endif // DEBUG
+
+/*****************************************************************************
+ * Callback to mark the nodes of a qmark-colon subtree that are conditionally
+ * executed.
+ */
+
+/* static */
+Compiler::fgWalkResult Compiler::gtMarkColonCond(GenTreePtr* pTree, fgWalkData* data)
+{
+    assert(data->pCallbackData == nullptr);
+
+    (*pTree)->gtFlags |= GTF_COLON_COND;
+
+    return WALK_CONTINUE;
+}
+
+/*****************************************************************************
+ * Callback to clear the conditionally executed flags of nodes that no longer
+   will be conditionally executed. Note that when we find another colon we must
+   stop, as the nodes below this one WILL be conditionally executed. This callback
+   is called when folding a qmark condition (ie the condition is constant).
+ */
+
+/* static */
+Compiler::fgWalkResult Compiler::gtClearColonCond(GenTreePtr* pTree, fgWalkData* data)
+{
+    GenTreePtr tree = *pTree;
+
+    assert(data->pCallbackData == nullptr);
+
+    if (tree->OperGet() == GT_COLON)
+    {
+        // Nodes below this will be conditionally executed.
+        return WALK_SKIP_SUBTREES;
+    }
+
+    tree->gtFlags &= ~GTF_COLON_COND;
+    return WALK_CONTINUE;
+}
+
+struct FindLinkData
+{
+    GenTreePtr  nodeToFind;
+    GenTreePtr* result;
+};
+
+/*****************************************************************************
+ *
+ *  Callback used by the tree walker to implement fgFindLink()
+ */
+static Compiler::fgWalkResult gtFindLinkCB(GenTreePtr* pTree, Compiler::fgWalkData* cbData)
+{
+    FindLinkData* data = (FindLinkData*)cbData->pCallbackData;
+    if (*pTree == data->nodeToFind)
+    {
+        data->result = pTree;
+        return Compiler::WALK_ABORT;
+    }
+
+    return Compiler::WALK_CONTINUE;
+}
+
+GenTreePtr* Compiler::gtFindLink(GenTreePtr stmt, GenTreePtr node)
+{
+    assert(stmt->gtOper == GT_STMT);
+
+    FindLinkData data = {node, nullptr};
+
+    fgWalkResult result = fgWalkTreePre(&stmt->gtStmt.gtStmtExpr, gtFindLinkCB, &data);
+
+    if (result == WALK_ABORT)
+    {
+        assert(data.nodeToFind == *data.result);
+        return data.result;
+    }
+    else
+    {
+        return nullptr;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Callback that checks if a tree node has oper type GT_CATCH_ARG
+ */
+
+static Compiler::fgWalkResult gtFindCatchArg(GenTreePtr* pTree, Compiler::fgWalkData* /* data */)
+{
+    return ((*pTree)->OperGet() == GT_CATCH_ARG) ? Compiler::WALK_ABORT : Compiler::WALK_CONTINUE;
+}
+
+/*****************************************************************************/
+bool Compiler::gtHasCatchArg(GenTreePtr tree)
+{
+    if (((tree->gtFlags & GTF_ORDER_SIDEEFF) != 0) && (fgWalkTreePre(&tree, gtFindCatchArg) == WALK_ABORT))
+    {
+        return true;
+    }
+    return false;
+}
+
+//------------------------------------------------------------------------
+// gtHasCallOnStack:
+//
+// Arguments:
+//    parentStack: a context (stack of parent nodes)
+//
+// Return Value:
+//     returns true if any of the parent nodes are a GT_CALL
+//
+// Assumptions:
+//    We have a stack of parent nodes. This generally requires that
+//    we are performing a recursive tree walk using struct fgWalkData
+//
+//------------------------------------------------------------------------
+/* static */ bool Compiler::gtHasCallOnStack(GenTreeStack* parentStack)
+{
+    for (int i = 0; i < parentStack->Height(); i++)
+    {
+        GenTree* node = parentStack->Index(i);
+        if (node->OperGet() == GT_CALL)
+        {
+            return true;
+        }
+    }
+    return false;
+}
+
+//------------------------------------------------------------------------
+// gtCheckQuirkAddrExposedLclVar:
+//
+// Arguments:
+//    tree: an address taken GenTree node that is a GT_LCL_VAR
+//    parentStack: a context (stack of parent nodes)
+//    The 'parentStack' is used to ensure that we are in an argument context.
+//
+// Return Value:
+//    None
+//
+// Notes:
+//    When allocation size of this LclVar is 32-bits we will quirk the size to 64-bits
+//    because some PInvoke signatures incorrectly specify a ByRef to an INT32
+//    when they actually write a SIZE_T or INT64. There are cases where overwriting
+//    these extra 4 bytes corrupts some data (such as a saved register) that leads to A/V
+//    Wheras previously the JIT64 codegen did not lead to an A/V
+//
+// Assumptions:
+//    'tree' is known to be address taken and that we have a stack
+//    of parent nodes. Both of these generally requires that
+//    we are performing a recursive tree walk using struct fgWalkData
+//------------------------------------------------------------------------
+void Compiler::gtCheckQuirkAddrExposedLclVar(GenTreePtr tree, GenTreeStack* parentStack)
+{
+#ifdef _TARGET_64BIT_
+    // We only need to Quirk for _TARGET_64BIT_
+
+    // Do we have a parent node that is a Call?
+    if (!Compiler::gtHasCallOnStack(parentStack))
+    {
+        // No, so we don't apply the Quirk
+        return;
+    }
+    noway_assert(tree->gtOper == GT_LCL_VAR);
+    unsigned   lclNum  = tree->gtLclVarCommon.gtLclNum;
+    LclVarDsc* varDsc  = &lvaTable[lclNum];
+    var_types  vartype = varDsc->TypeGet();
+
+    if (varDsc->lvIsParam)
+    {
+        // We can't Quirk the size of an incoming parameter
+        return;
+    }
+
+    // We may need to Quirk the storage size for this LCL_VAR
+    if (genActualType(vartype) == TYP_INT)
+    {
+        varDsc->lvQuirkToLong = true;
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\nAdding a Quirk for the storage size of LvlVar V%02d:", lclNum);
+            printf(" (%s ==> %s)\n", varTypeName(vartype), varTypeName(TYP_LONG));
+        }
+#endif // DEBUG
+    }
+#endif
+}
+
+// Checks to see if we're allowed to optimize Type::op_Equality or Type::op_Inequality on this operand.
+// We're allowed to convert to GT_EQ/GT_NE if one of the operands is:
+//  1) The result of Object::GetType
+//  2) The result of typeof(...)
+//  3) a local variable of type RuntimeType.
+bool Compiler::gtCanOptimizeTypeEquality(GenTreePtr tree)
+{
+    if (tree->gtOper == GT_CALL)
+    {
+        if (tree->gtCall.gtCallType == CT_HELPER)
+        {
+            if (gtIsTypeHandleToRuntimeTypeHelper(tree))
+            {
+                return true;
+            }
+        }
+        else if (tree->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
+        {
+            if (info.compCompHnd->getIntrinsicID(tree->gtCall.gtCallMethHnd) == CORINFO_INTRINSIC_Object_GetType)
+            {
+                return true;
+            }
+        }
+    }
+    else if ((tree->gtOper == GT_INTRINSIC) && (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType))
+    {
+        return true;
+    }
+    else if (tree->gtOper == GT_LCL_VAR)
+    {
+        LclVarDsc* lcl = &(lvaTable[tree->gtLclVarCommon.gtLclNum]);
+        if (lcl->TypeGet() == TYP_REF)
+        {
+            if (lcl->lvVerTypeInfo.GetClassHandle() == info.compCompHnd->getBuiltinClass(CLASSID_RUNTIME_TYPE))
+            {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+bool Compiler::gtIsTypeHandleToRuntimeTypeHelper(GenTreePtr tree)
+{
+    return tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE) ||
+           tree->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE_MAYBENULL);
+}
+
+bool Compiler::gtIsActiveCSE_Candidate(GenTreePtr tree)
+{
+    return (optValnumCSE_phase && IS_CSE_INDEX(tree->gtCSEnum));
+}
+
+/*****************************************************************************/
+
+struct ComplexityStruct
+{
+    unsigned m_numNodes;
+    unsigned m_nodeLimit;
+    ComplexityStruct(unsigned nodeLimit) : m_numNodes(0), m_nodeLimit(nodeLimit)
+    {
+    }
+};
+
+static Compiler::fgWalkResult ComplexityExceedsWalker(GenTreePtr* pTree, Compiler::fgWalkData* data)
+{
+    ComplexityStruct* pComplexity = (ComplexityStruct*)data->pCallbackData;
+    if (++pComplexity->m_numNodes > pComplexity->m_nodeLimit)
+    {
+        return Compiler::WALK_ABORT;
+    }
+    else
+    {
+        return Compiler::WALK_CONTINUE;
+    }
+}
+
+bool Compiler::gtComplexityExceeds(GenTreePtr* tree, unsigned limit)
+{
+    ComplexityStruct complexity(limit);
+    if (fgWalkTreePre(tree, &ComplexityExceedsWalker, &complexity) == WALK_ABORT)
+    {
+        return true;
+    }
+    else
+    {
+        return false;
+    }
+}
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                          BasicBlock                                       XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#if MEASURE_BLOCK_SIZE
+/* static  */
+size_t BasicBlock::s_Size;
+/* static */
+size_t BasicBlock::s_Count;
+#endif // MEASURE_BLOCK_SIZE
+
+#ifdef DEBUG
+// The max # of tree nodes in any BB
+/* static */
+unsigned BasicBlock::s_nMaxTrees;
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ *  Allocate a basic block but don't append it to the current BB list.
+ */
+
+BasicBlock* Compiler::bbNewBasicBlock(BBjumpKinds jumpKind)
+{
+    BasicBlock* block;
+
+    /* Allocate the block descriptor and zero it out */
+    assert(fgSafeBasicBlockCreation);
+
+    block = new (this, CMK_BasicBlock) BasicBlock;
+
+#if MEASURE_BLOCK_SIZE
+    BasicBlock::s_Count += 1;
+    BasicBlock::s_Size += sizeof(*block);
+#endif
+
+#ifdef DEBUG
+    // fgLookupBB() is invalid until fgInitBBLookup() is called again.
+    fgBBs = (BasicBlock**)0xCDCD;
+#endif
+
+    // TODO-Throughput: The following memset is pretty expensive - do something else?
+    // Note that some fields have to be initialized to 0 (like bbFPStateX87)
+    memset(block, 0, sizeof(*block));
+
+    // scopeInfo needs to be able to differentiate between blocks which
+    // correspond to some instrs (and so may have some LocalVarInfo
+    // boundaries), or have been inserted by the JIT
+    block->bbCodeOffs    = BAD_IL_OFFSET;
+    block->bbCodeOffsEnd = BAD_IL_OFFSET;
+
+    /* Give the block a number, set the ancestor count and weight */
+
+    ++fgBBcount;
+
+    if (compIsForInlining())
+    {
+        block->bbNum = ++impInlineInfo->InlinerCompiler->fgBBNumMax;
+    }
+    else
+    {
+        block->bbNum = ++fgBBNumMax;
+    }
+
+#ifndef LEGACY_BACKEND
+    if (compRationalIRForm)
+    {
+        block->bbFlags |= BBF_IS_LIR;
+    }
+#endif // !LEGACY_BACKEND
+
+    block->bbRefs   = 1;
+    block->bbWeight = BB_UNITY_WEIGHT;
+
+    block->bbStkTempsIn  = NO_BASE_TMP;
+    block->bbStkTempsOut = NO_BASE_TMP;
+
+    block->bbEntryState = nullptr;
+
+    /* Record the jump kind in the block */
+
+    block->bbJumpKind = jumpKind;
+
+    if (jumpKind == BBJ_THROW)
+    {
+        block->bbSetRunRarely();
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("New Basic Block BB%02u [%p] created.\n", block->bbNum, dspPtr(block));
+    }
+#endif
+
+    // We will give all the blocks var sets after the number of tracked variables
+    // is determined and frozen.  After that, if we dynamically create a basic block,
+    // we will initialize its var sets.
+    if (fgBBVarSetsInited)
+    {
+        VarSetOps::AssignNoCopy(this, block->bbVarUse, VarSetOps::MakeEmpty(this));
+        VarSetOps::AssignNoCopy(this, block->bbVarDef, VarSetOps::MakeEmpty(this));
+        VarSetOps::AssignNoCopy(this, block->bbVarTmp, VarSetOps::MakeEmpty(this));
+        VarSetOps::AssignNoCopy(this, block->bbLiveIn, VarSetOps::MakeEmpty(this));
+        VarSetOps::AssignNoCopy(this, block->bbLiveOut, VarSetOps::MakeEmpty(this));
+        VarSetOps::AssignNoCopy(this, block->bbScope, VarSetOps::MakeEmpty(this));
+    }
+    else
+    {
+        VarSetOps::AssignNoCopy(this, block->bbVarUse, VarSetOps::UninitVal());
+        VarSetOps::AssignNoCopy(this, block->bbVarDef, VarSetOps::UninitVal());
+        VarSetOps::AssignNoCopy(this, block->bbVarTmp, VarSetOps::UninitVal());
+        VarSetOps::AssignNoCopy(this, block->bbLiveIn, VarSetOps::UninitVal());
+        VarSetOps::AssignNoCopy(this, block->bbLiveOut, VarSetOps::UninitVal());
+        VarSetOps::AssignNoCopy(this, block->bbScope, VarSetOps::UninitVal());
+    }
+
+    block->bbHeapUse     = false;
+    block->bbHeapDef     = false;
+    block->bbHeapLiveIn  = false;
+    block->bbHeapLiveOut = false;
+
+    block->bbHeapSsaPhiFunc = nullptr;
+    block->bbHeapSsaNumIn   = 0;
+    block->bbHeapSsaNumOut  = 0;
+
+    // Make sure we reserve a NOT_IN_LOOP value that isn't a legal table index.
+    static_assert_no_msg(MAX_LOOP_NUM < BasicBlock::NOT_IN_LOOP);
+
+    block->bbNatLoopNum = BasicBlock::NOT_IN_LOOP;
+
+    return block;
+}
+
+//------------------------------------------------------------------------------
+// containsStatement - return true if the block contains the given statement
+//------------------------------------------------------------------------------
+
+bool BasicBlock::containsStatement(GenTree* statement)
+{
+    assert(statement->gtOper == GT_STMT);
+
+    GenTree* curr = bbTreeList;
+    do
+    {
+        if (curr == statement)
+        {
+            break;
+        }
+        curr = curr->gtNext;
+    } while (curr);
+    return curr != nullptr;
+}
+
+GenTreeStmt* BasicBlock::FirstNonPhiDef()
+{
+    GenTreePtr stmt = bbTreeList;
+    if (stmt == nullptr)
+    {
+        return nullptr;
+    }
+    GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+    while ((tree->OperGet() == GT_ASG && tree->gtOp.gtOp2->OperGet() == GT_PHI) ||
+           (tree->OperGet() == GT_STORE_LCL_VAR && tree->gtOp.gtOp1->OperGet() == GT_PHI))
+    {
+        stmt = stmt->gtNext;
+        if (stmt == nullptr)
+        {
+            return nullptr;
+        }
+        tree = stmt->gtStmt.gtStmtExpr;
+    }
+    return stmt->AsStmt();
+}
+
+GenTreePtr BasicBlock::FirstNonPhiDefOrCatchArgAsg()
+{
+    GenTreePtr stmt = FirstNonPhiDef();
+    if (stmt == nullptr)
+    {
+        return nullptr;
+    }
+    GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+    if ((tree->OperGet() == GT_ASG && tree->gtOp.gtOp2->OperGet() == GT_CATCH_ARG) ||
+        (tree->OperGet() == GT_STORE_LCL_VAR && tree->gtOp.gtOp1->OperGet() == GT_CATCH_ARG))
+    {
+        stmt = stmt->gtNext;
+    }
+    return stmt;
+}
+
+/*****************************************************************************
+ *
+ *  Mark a block as rarely run, we also don't want to have a loop in a
+ *   rarely run block, and we set it's weight to zero.
+ */
+
+void BasicBlock::bbSetRunRarely()
+{
+    setBBWeight(BB_ZERO_WEIGHT);
+    if (bbWeight == BB_ZERO_WEIGHT)
+    {
+        bbFlags |= BBF_RUN_RARELY; // This block is never/rarely run
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Can a BasicBlock be inserted after this without altering the flowgraph
+ */
+
+bool BasicBlock::bbFallsThrough()
+{
+    switch (bbJumpKind)
+    {
+
+        case BBJ_THROW:
+        case BBJ_EHFINALLYRET:
+        case BBJ_EHFILTERRET:
+        case BBJ_EHCATCHRET:
+        case BBJ_RETURN:
+        case BBJ_ALWAYS:
+        case BBJ_LEAVE:
+        case BBJ_SWITCH:
+            return false;
+
+        case BBJ_NONE:
+        case BBJ_COND:
+            return true;
+
+        case BBJ_CALLFINALLY:
+            return ((bbFlags & BBF_RETLESS_CALL) == 0);
+
+        default:
+            assert(!"Unknown bbJumpKind in bbFallsThrough()");
+            return true;
+    }
+}
+
+unsigned BasicBlock::NumSucc(Compiler* comp)
+{
+    // As described in the spec comment of NumSucc at its declaration, whether "comp" is null determines
+    // whether NumSucc and GetSucc yield successors of finally blocks.
+
+    switch (bbJumpKind)
+    {
+
+        case BBJ_THROW:
+        case BBJ_RETURN:
+            return 0;
+
+        case BBJ_EHFILTERRET:
+            if (comp == nullptr)
+            {
+                return 0;
+            }
+            else
+            {
+                return 1;
+            }
+
+        case BBJ_EHFINALLYRET:
+        {
+            if (comp == nullptr)
+            {
+                return 0;
+            }
+            else
+            {
+                // The first block of the handler is labelled with the catch type.
+                BasicBlock* hndBeg = comp->fgFirstBlockOfHandler(this);
+                if (hndBeg->bbCatchTyp == BBCT_FINALLY)
+                {
+                    return comp->fgNSuccsOfFinallyRet(this);
+                }
+                else
+                {
+                    assert(hndBeg->bbCatchTyp == BBCT_FAULT); // We can only BBJ_EHFINALLYRET from FINALLY and FAULT.
+                    // A FAULT block has no successors.
+                    return 0;
+                }
+            }
+        }
+        case BBJ_CALLFINALLY:
+        case BBJ_ALWAYS:
+        case BBJ_EHCATCHRET:
+        case BBJ_LEAVE:
+        case BBJ_NONE:
+            return 1;
+        case BBJ_COND:
+            if (bbJumpDest == bbNext)
+            {
+                return 1;
+            }
+            else
+            {
+                return 2;
+            }
+        case BBJ_SWITCH:
+            if (comp == nullptr)
+            {
+                return bbJumpSwt->bbsCount;
+            }
+            else
+            {
+                Compiler::SwitchUniqueSuccSet sd = comp->GetDescriptorForSwitch(this);
+                return sd.numDistinctSuccs;
+            }
+
+        default:
+            unreached();
+    }
+}
+
+BasicBlock* BasicBlock::GetSucc(unsigned i, Compiler* comp)
+{
+    // As described in the spec comment of GetSucc at its declaration, whether "comp" is null determines
+    // whether NumSucc and GetSucc yield successors of finally blocks.
+
+    assert(i < NumSucc(comp)); // Index bounds check.
+    // printf("bbjk=%d\n", bbJumpKind);
+    switch (bbJumpKind)
+    {
+
+        case BBJ_THROW:
+        case BBJ_RETURN:
+            unreached(); // Should have been covered by assert above.
+
+        case BBJ_EHFILTERRET:
+        {
+            assert(comp != nullptr); // Or else we're not looking for successors.
+            BasicBlock* result = comp->fgFirstBlockOfHandler(this);
+            noway_assert(result == bbJumpDest);
+            // Handler is the (sole) normal successor of the filter.
+            return result;
+        }
+
+        case BBJ_EHFINALLYRET:
+            return comp->fgSuccOfFinallyRet(this, i);
+
+        case BBJ_CALLFINALLY:
+        case BBJ_ALWAYS:
+        case BBJ_EHCATCHRET:
+        case BBJ_LEAVE:
+            return bbJumpDest;
+
+        case BBJ_NONE:
+            return bbNext;
+        case BBJ_COND:
+            if (i == 0)
+            {
+                return bbNext;
+            }
+            else
+            {
+                assert(i == 1);
+                return bbJumpDest;
+            };
+        case BBJ_SWITCH:
+            if (comp == nullptr)
+            {
+                assert(i < bbJumpSwt->bbsCount); // Range check.
+                return bbJumpSwt->bbsDstTab[i];
+            }
+            else
+            {
+                // Remove duplicates.
+                Compiler::SwitchUniqueSuccSet sd = comp->GetDescriptorForSwitch(this);
+                assert(i < sd.numDistinctSuccs); // Range check.
+                return sd.nonDuplicates[i];
+            }
+
+        default:
+            unreached();
+    }
+}
+
+// -------------------------------------------------------------------------
+// IsRegOptional: Returns true if this gentree node is marked by lowering to
+// indicate that codegen can still generate code even if it wasn't allocated
+// a register.
+bool GenTree::IsRegOptional() const
+{
+#ifdef LEGACY_BACKEND
+    return false;
+#else
+    return gtLsraInfo.regOptional;
+#endif
+}
+
+bool GenTree::IsPhiNode()
+{
+    return (OperGet() == GT_PHI_ARG) || (OperGet() == GT_PHI) || IsPhiDefn();
+}
+
+bool GenTree::IsPhiDefn()
+{
+    bool res = ((OperGet() == GT_ASG) && (gtOp.gtOp2 != nullptr) && (gtOp.gtOp2->OperGet() == GT_PHI)) ||
+               ((OperGet() == GT_STORE_LCL_VAR) && (gtOp.gtOp1 != nullptr) && (gtOp.gtOp1->OperGet() == GT_PHI));
+    assert(!res || OperGet() == GT_STORE_LCL_VAR || gtOp.gtOp1->OperGet() == GT_LCL_VAR);
+    return res;
+}
+
+bool GenTree::IsPhiDefnStmt()
+{
+    if (OperGet() != GT_STMT)
+    {
+        return false;
+    }
+    GenTreePtr asg = gtStmt.gtStmtExpr;
+    return asg->IsPhiDefn();
+}
+
+// IsPartialLclFld: Check for a GT_LCL_FLD whose type is a different size than the lclVar.
+//
+// Arguments:
+//    comp      - the Compiler object.
+//
+// Return Value:
+//    Returns "true" iff 'this' is a GT_LCL_FLD or GT_STORE_LCL_FLD on which the type
+//    is not the same size as the type of the GT_LCL_VAR
+
+bool GenTree::IsPartialLclFld(Compiler* comp)
+{
+    return ((gtOper == GT_LCL_FLD) &&
+            (comp->lvaTable[this->gtLclVarCommon.gtLclNum].lvExactSize != genTypeSize(gtType)));
+}
+
+bool GenTree::DefinesLocal(Compiler* comp, GenTreeLclVarCommon** pLclVarTree, bool* pIsEntire)
+{
+    GenTreeBlk* blkNode = nullptr;
+    if (OperIsAssignment())
+    {
+        if (gtOp.gtOp1->IsLocal())
+        {
+            GenTreeLclVarCommon* lclVarTree = gtOp.gtOp1->AsLclVarCommon();
+            *pLclVarTree                    = lclVarTree;
+            if (pIsEntire != nullptr)
+            {
+                if (lclVarTree->IsPartialLclFld(comp))
+                {
+                    *pIsEntire = false;
+                }
+                else
+                {
+                    *pIsEntire = true;
+                }
+            }
+            return true;
+        }
+        else if (gtOp.gtOp1->OperGet() == GT_IND)
+        {
+            GenTreePtr indArg = gtOp.gtOp1->gtOp.gtOp1;
+            return indArg->DefinesLocalAddr(comp, genTypeSize(gtOp.gtOp1->TypeGet()), pLclVarTree, pIsEntire);
+        }
+        else if (gtOp.gtOp1->OperIsBlk())
+        {
+            blkNode = gtOp.gtOp1->AsBlk();
+        }
+    }
+    else if (OperIsBlk())
+    {
+        blkNode = this->AsBlk();
+    }
+    if (blkNode != nullptr)
+    {
+        GenTreePtr destAddr = blkNode->Addr();
+        unsigned   width    = blkNode->gtBlkSize;
+        // Do we care about whether this assigns the entire variable?
+        if (pIsEntire != nullptr && width == 0)
+        {
+            assert(blkNode->gtOper == GT_DYN_BLK);
+            GenTreePtr blockWidth = blkNode->AsDynBlk()->gtDynamicSize;
+            if (blockWidth->IsCnsIntOrI())
+            {
+                if (blockWidth->IsIconHandle())
+                {
+                    // If it's a handle, it must be a class handle.  We only create such block operations
+                    // for initialization of struct types, so the type of the argument(s) will match this
+                    // type, by construction, and be "entire".
+                    assert(blockWidth->IsIconHandle(GTF_ICON_CLASS_HDL));
+                    width = comp->info.compCompHnd->getClassSize(
+                        CORINFO_CLASS_HANDLE(blockWidth->gtIntConCommon.IconValue()));
+                }
+                else
+                {
+                    ssize_t swidth = blockWidth->AsIntConCommon()->IconValue();
+                    assert(swidth >= 0);
+                    // cpblk of size zero exists in the wild (in yacc-generated code in SQL) and is valid IL.
+                    if (swidth == 0)
+                    {
+                        return false;
+                    }
+                    width = unsigned(swidth);
+                }
+            }
+        }
+        return destAddr->DefinesLocalAddr(comp, width, pLclVarTree, pIsEntire);
+    }
+    // Otherwise...
+    return false;
+}
+
+// Returns true if this GenTree defines a result which is based on the address of a local.
+bool GenTree::DefinesLocalAddr(Compiler* comp, unsigned width, GenTreeLclVarCommon** pLclVarTree, bool* pIsEntire)
+{
+    if (OperGet() == GT_ADDR || OperGet() == GT_LCL_VAR_ADDR)
+    {
+        GenTreePtr addrArg = this;
+        if (OperGet() == GT_ADDR)
+        {
+            addrArg = gtOp.gtOp1;
+        }
+
+        if (addrArg->IsLocal() || addrArg->OperIsLocalAddr())
+        {
+            GenTreeLclVarCommon* addrArgLcl = addrArg->AsLclVarCommon();
+            *pLclVarTree                    = addrArgLcl;
+            if (pIsEntire != nullptr)
+            {
+                unsigned lclOffset = 0;
+                if (addrArg->OperIsLocalField())
+                {
+                    lclOffset = addrArg->gtLclFld.gtLclOffs;
+                }
+
+                if (lclOffset != 0)
+                {
+                    // We aren't updating the bytes at [0..lclOffset-1] so *pIsEntire should be set to false
+                    *pIsEntire = false;
+                }
+                else
+                {
+                    unsigned lclNum   = addrArgLcl->GetLclNum();
+                    unsigned varWidth = comp->lvaLclExactSize(lclNum);
+                    if (comp->lvaTable[lclNum].lvNormalizeOnStore())
+                    {
+                        // It's normalize on store, so use the full storage width -- writing to low bytes won't
+                        // necessarily yield a normalized value.
+                        varWidth = genTypeStSz(var_types(comp->lvaTable[lclNum].lvType)) * sizeof(int);
+                    }
+                    *pIsEntire = (varWidth == width);
+                }
+            }
+            return true;
+        }
+        else if (addrArg->OperGet() == GT_IND)
+        {
+            // A GT_ADDR of a GT_IND can both be optimized away, recurse using the child of the GT_IND
+            return addrArg->gtOp.gtOp1->DefinesLocalAddr(comp, width, pLclVarTree, pIsEntire);
+        }
+    }
+    else if (OperGet() == GT_ADD)
+    {
+        if (gtOp.gtOp1->IsCnsIntOrI())
+        {
+            // If we just adding a zero then we allow an IsEntire match against width
+            //  otherwise we change width to zero to disallow an IsEntire Match
+            return gtOp.gtOp2->DefinesLocalAddr(comp, gtOp.gtOp1->IsIntegralConst(0) ? width : 0, pLclVarTree,
+                                                pIsEntire);
+        }
+        else if (gtOp.gtOp2->IsCnsIntOrI())
+        {
+            // If we just adding a zero then we allow an IsEntire match against width
+            //  otherwise we change width to zero to disallow an IsEntire Match
+            return gtOp.gtOp1->DefinesLocalAddr(comp, gtOp.gtOp2->IsIntegralConst(0) ? width : 0, pLclVarTree,
+                                                pIsEntire);
+        }
+    }
+    // Post rationalization we could have GT_IND(GT_LEA(..)) trees.
+    else if (OperGet() == GT_LEA)
+    {
+        // This method gets invoked during liveness computation and therefore it is critical
+        // that we don't miss 'use' of any local.  The below logic is making the assumption
+        // that in case of LEA(base, index, offset) - only base can be a GT_LCL_VAR_ADDR
+        // and index is not.
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+        GenTreePtr index = gtOp.gtOp2;
+        if (index != nullptr)
+        {
+            assert(!index->DefinesLocalAddr(comp, width, pLclVarTree, pIsEntire));
+        }
+#endif // DEBUG
+
+        // base
+        GenTreePtr base = gtOp.gtOp1;
+        if (base != nullptr)
+        {
+            // Lea could have an Indir as its base.
+            if (base->OperGet() == GT_IND)
+            {
+                base = base->gtOp.gtOp1->gtEffectiveVal(/*commas only*/ true);
+            }
+            return base->DefinesLocalAddr(comp, width, pLclVarTree, pIsEntire);
+        }
+    }
+    // Otherwise...
+    return false;
+}
+
+//------------------------------------------------------------------------
+// IsLocalExpr: Determine if this is a LclVarCommon node and return some
+//              additional info about it in the two out parameters.
+//
+// Arguments:
+//    comp        - The Compiler instance
+//    pLclVarTree - An "out" argument that returns the local tree as a
+//                  LclVarCommon, if it is indeed local.
+//    pFldSeq     - An "out" argument that returns the value numbering field
+//                  sequence for the node, if any.
+//
+// Return Value:
+//    Returns true, and sets the out arguments accordingly, if this is
+//    a LclVarCommon node.
+
+bool GenTree::IsLocalExpr(Compiler* comp, GenTreeLclVarCommon** pLclVarTree, FieldSeqNode** pFldSeq)
+{
+    if (IsLocal()) // Note that this covers "GT_LCL_FLD."
+    {
+        *pLclVarTree = AsLclVarCommon();
+        if (OperGet() == GT_LCL_FLD)
+        {
+            // Otherwise, prepend this field to whatever we've already accumulated outside in.
+            *pFldSeq = comp->GetFieldSeqStore()->Append(AsLclFld()->gtFieldSeq, *pFldSeq);
+        }
+        return true;
+    }
+    else
+    {
+        return false;
+    }
+}
+
+// If this tree evaluates some sum of a local address and some constants,
+// return the node for the local being addressed
+
+GenTreeLclVarCommon* GenTree::IsLocalAddrExpr()
+{
+    if (OperGet() == GT_ADDR)
+    {
+        return gtOp.gtOp1->IsLocal() ? gtOp.gtOp1->AsLclVarCommon() : nullptr;
+    }
+    else if (OperIsLocalAddr())
+    {
+        return this->AsLclVarCommon();
+    }
+    else if (OperGet() == GT_ADD)
+    {
+        if (gtOp.gtOp1->OperGet() == GT_CNS_INT)
+        {
+            return gtOp.gtOp2->IsLocalAddrExpr();
+        }
+        else if (gtOp.gtOp2->OperGet() == GT_CNS_INT)
+        {
+            return gtOp.gtOp1->IsLocalAddrExpr();
+        }
+    }
+    // Otherwise...
+    return nullptr;
+}
+
+bool GenTree::IsLocalAddrExpr(Compiler* comp, GenTreeLclVarCommon** pLclVarTree, FieldSeqNode** pFldSeq)
+{
+    if (OperGet() == GT_ADDR)
+    {
+        assert(!comp->compRationalIRForm);
+        GenTreePtr addrArg = gtOp.gtOp1;
+        if (addrArg->IsLocal()) // Note that this covers "GT_LCL_FLD."
+        {
+            *pLclVarTree = addrArg->AsLclVarCommon();
+            if (addrArg->OperGet() == GT_LCL_FLD)
+            {
+                // Otherwise, prepend this field to whatever we've already accumulated outside in.
+                *pFldSeq = comp->GetFieldSeqStore()->Append(addrArg->AsLclFld()->gtFieldSeq, *pFldSeq);
+            }
+            return true;
+        }
+        else
+        {
+            return false;
+        }
+    }
+    else if (OperIsLocalAddr())
+    {
+        *pLclVarTree = this->AsLclVarCommon();
+        if (this->OperGet() == GT_LCL_FLD_ADDR)
+        {
+            *pFldSeq = comp->GetFieldSeqStore()->Append(this->AsLclFld()->gtFieldSeq, *pFldSeq);
+        }
+        return true;
+    }
+    else if (OperGet() == GT_ADD)
+    {
+        if (gtOp.gtOp1->OperGet() == GT_CNS_INT)
+        {
+            if (gtOp.gtOp1->AsIntCon()->gtFieldSeq == nullptr)
+            {
+                return false;
+            }
+            // Otherwise, prepend this field to whatever we've already accumulated outside in.
+            *pFldSeq = comp->GetFieldSeqStore()->Append(gtOp.gtOp1->AsIntCon()->gtFieldSeq, *pFldSeq);
+            return gtOp.gtOp2->IsLocalAddrExpr(comp, pLclVarTree, pFldSeq);
+        }
+        else if (gtOp.gtOp2->OperGet() == GT_CNS_INT)
+        {
+            if (gtOp.gtOp2->AsIntCon()->gtFieldSeq == nullptr)
+            {
+                return false;
+            }
+            // Otherwise, prepend this field to whatever we've already accumulated outside in.
+            *pFldSeq = comp->GetFieldSeqStore()->Append(gtOp.gtOp2->AsIntCon()->gtFieldSeq, *pFldSeq);
+            return gtOp.gtOp1->IsLocalAddrExpr(comp, pLclVarTree, pFldSeq);
+        }
+    }
+    // Otherwise...
+    return false;
+}
+
+//------------------------------------------------------------------------
+// IsLclVarUpdateTree: Determine whether this is an assignment tree of the
+//                     form Vn = Vn 'oper' 'otherTree' where Vn is a lclVar
+//
+// Arguments:
+//    pOtherTree - An "out" argument in which 'otherTree' will be returned.
+//    pOper      - An "out" argument in which 'oper' will be returned.
+//
+// Return Value:
+//    If the tree is of the above form, the lclNum of the variable being
+//    updated is returned, and 'pOtherTree' and 'pOper' are set.
+//    Otherwise, returns BAD_VAR_NUM.
+//
+// Notes:
+//    'otherTree' can have any shape.
+//     We avoid worrying about whether the op is commutative by only considering the
+//     first operand of the rhs. It is expected that most trees of this form will
+//     already have the lclVar on the lhs.
+//     TODO-CQ: Evaluate whether there are missed opportunities due to this, or
+//     whether gtSetEvalOrder will already have put the lclVar on the lhs in
+//     the cases of interest.
+
+unsigned GenTree::IsLclVarUpdateTree(GenTree** pOtherTree, genTreeOps* pOper)
+{
+    unsigned lclNum = BAD_VAR_NUM;
+    if (OperIsAssignment())
+    {
+        GenTree* lhs = gtOp.gtOp1;
+        if (lhs->OperGet() == GT_LCL_VAR)
+        {
+            unsigned lhsLclNum = lhs->AsLclVarCommon()->gtLclNum;
+            if (gtOper == GT_ASG)
+            {
+                GenTree* rhs = gtOp.gtOp2;
+                if (rhs->OperIsBinary() && (rhs->gtOp.gtOp1->gtOper == GT_LCL_VAR) &&
+                    (rhs->gtOp.gtOp1->AsLclVarCommon()->gtLclNum == lhsLclNum))
+                {
+                    lclNum      = lhsLclNum;
+                    *pOtherTree = rhs->gtOp.gtOp2;
+                    *pOper      = rhs->gtOper;
+                }
+            }
+            else
+            {
+                lclNum      = lhsLclNum;
+                *pOper      = GenTree::OpAsgToOper(gtOper);
+                *pOtherTree = gtOp.gtOp2;
+            }
+        }
+    }
+    return lclNum;
+}
+
+// return true if this tree node is a subcomponent of parent for codegen purposes
+// (essentially, will be rolled into the same instruction)
+// Note that this method relies upon the value of gtRegNum field to determine
+// if the treenode is contained or not.  Therefore you can not call this method
+// until after the LSRA phase has allocated physical registers to the treenodes.
+bool GenTree::isContained() const
+{
+    if (isContainedSpillTemp())
+    {
+        return true;
+    }
+
+    if (gtHasReg())
+    {
+        return false;
+    }
+
+    // these actually produce a register (the flags reg, we just don't model it)
+    // and are a separate instruction from the branch that consumes the result
+    if (OperKind() & GTK_RELOP)
+    {
+        return false;
+    }
+
+    // TODO-Cleanup : this is not clean, would be nice to have some way of marking this.
+    switch (OperGet())
+    {
+        case GT_STOREIND:
+        case GT_JTRUE:
+        case GT_RETURN:
+        case GT_RETFILT:
+        case GT_STORE_LCL_FLD:
+        case GT_STORE_LCL_VAR:
+        case GT_ARR_BOUNDS_CHECK:
+        case GT_LOCKADD:
+        case GT_NOP:
+        case GT_NO_OP:
+        case GT_START_NONGC:
+        case GT_PROF_HOOK:
+        case GT_RETURNTRAP:
+        case GT_COMMA:
+        case GT_PINVOKE_PROLOG:
+        case GT_PHYSREGDST:
+        case GT_PUTARG_STK:
+        case GT_MEMORYBARRIER:
+        case GT_STORE_BLK:
+        case GT_STORE_OBJ:
+        case GT_STORE_DYN_BLK:
+        case GT_SWITCH:
+        case GT_JMPTABLE:
+        case GT_SWITCH_TABLE:
+        case GT_SWAP:
+        case GT_LCLHEAP:
+        case GT_CKFINITE:
+        case GT_JMP:
+        case GT_IL_OFFSET:
+#ifdef FEATURE_SIMD
+        case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+
+#if !FEATURE_EH_FUNCLETS
+        case GT_END_LFIN:
+#endif
+            return false;
+
+#if !defined(LEGACY_BACKEND) && !defined(_TARGET_64BIT_)
+        case GT_LONG:
+            // GT_LONG nodes are normally contained. The only exception is when the result
+            // of a TYP_LONG operation is not used and this can only happen if the GT_LONG
+            // is the last node in the statement (in linear order).
+            return gtNext != nullptr;
+#endif
+
+        case GT_CALL:
+            // Note: if you hit this assert you are probably calling isContained()
+            // before the LSRA phase has allocated physical register to the tree nodes
+            //
+            assert(gtType == TYP_VOID);
+            return false;
+
+        default:
+            // if it's contained it better have a parent
+            assert(gtNext || OperIsLocal());
+            return true;
+    }
+}
+
+// return true if node is contained and an indir
+bool GenTree::isContainedIndir() const
+{
+    return isContained() && isIndir();
+}
+
+bool GenTree::isIndirAddrMode()
+{
+    return isIndir() && AsIndir()->Addr()->OperIsAddrMode() && AsIndir()->Addr()->isContained();
+}
+
+bool GenTree::isIndir() const
+{
+    return OperGet() == GT_IND || OperGet() == GT_STOREIND;
+}
+
+bool GenTreeIndir::HasBase()
+{
+    return Base() != nullptr;
+}
+
+bool GenTreeIndir::HasIndex()
+{
+    return Index() != nullptr;
+}
+
+GenTreePtr GenTreeIndir::Base()
+{
+    GenTreePtr addr = Addr();
+
+    if (isIndirAddrMode())
+    {
+        GenTree* result = addr->AsAddrMode()->Base();
+        if (result != nullptr)
+        {
+            result = result->gtEffectiveVal();
+        }
+        return result;
+    }
+    else
+    {
+        return addr; // TODO: why do we return 'addr' here, but we return 'nullptr' in the equivalent Index() case?
+    }
+}
+
+GenTree* GenTreeIndir::Index()
+{
+    if (isIndirAddrMode())
+    {
+        GenTree* result = Addr()->AsAddrMode()->Index();
+        if (result != nullptr)
+        {
+            result = result->gtEffectiveVal();
+        }
+        return result;
+    }
+    else
+    {
+        return nullptr;
+    }
+}
+
+unsigned GenTreeIndir::Scale()
+{
+    if (HasIndex())
+    {
+        return Addr()->AsAddrMode()->gtScale;
+    }
+    else
+    {
+        return 1;
+    }
+}
+
+size_t GenTreeIndir::Offset()
+{
+    if (isIndirAddrMode())
+    {
+        return Addr()->AsAddrMode()->gtOffset;
+    }
+    else if (Addr()->gtOper == GT_CLS_VAR_ADDR)
+    {
+        return (size_t)Addr()->gtClsVar.gtClsVarHnd;
+    }
+    else if (Addr()->IsCnsIntOrI() && Addr()->isContained())
+    {
+        return Addr()->AsIntConCommon()->IconValue();
+    }
+    else
+    {
+        return 0;
+    }
+}
+
+//------------------------------------------------------------------------
+// GenTreeIntConCommon::ImmedValNeedsReloc: does this immediate value needs recording a relocation with the VM?
+//
+// Arguments:
+//    comp - Compiler instance
+//
+// Return Value:
+//    True if this immediate value needs recording a relocation with the VM; false otherwise.
+
+bool GenTreeIntConCommon::ImmedValNeedsReloc(Compiler* comp)
+{
+#ifdef RELOC_SUPPORT
+    return comp->opts.compReloc && (gtOper == GT_CNS_INT) && IsIconHandle();
+#else
+    return false;
+#endif
+}
+
+//------------------------------------------------------------------------
+// ImmedValCanBeFolded: can this immediate value be folded for op?
+//
+// Arguments:
+//    comp - Compiler instance
+//    op - Tree operator
+//
+// Return Value:
+//    True if this immediate value can be folded for op; false otherwise.
+
+bool GenTreeIntConCommon::ImmedValCanBeFolded(Compiler* comp, genTreeOps op)
+{
+    // In general, immediate values that need relocations can't be folded.
+    // There are cases where we do want to allow folding of handle comparisons
+    // (e.g., typeof(T) == typeof(int)).
+    return !ImmedValNeedsReloc(comp) || (op == GT_EQ) || (op == GT_NE);
+}
+
+#ifdef _TARGET_AMD64_
+// Returns true if this absolute address fits within the base of an addr mode.
+// On Amd64 this effectively means, whether an absolute indirect address can
+// be encoded as 32-bit offset relative to IP or zero.
+bool GenTreeIntConCommon::FitsInAddrBase(Compiler* comp)
+{
+#ifndef LEGACY_BACKEND
+#ifdef DEBUG
+    // Early out if PC-rel encoding of absolute addr is disabled.
+    if (!comp->opts.compEnablePCRelAddr)
+    {
+        return false;
+    }
+#endif
+#endif //! LEGACY_BACKEND
+
+    if (comp->opts.compReloc)
+    {
+        // During Ngen JIT is always asked to generate relocatable code.
+        // Hence JIT will try to encode only icon handles as pc-relative offsets.
+        return IsIconHandle() && (IMAGE_REL_BASED_REL32 == comp->eeGetRelocTypeHint((void*)IconValue()));
+    }
+    else
+    {
+        // During Jitting, we are allowed to generate non-relocatable code.
+        // On Amd64 we can encode an absolute indirect addr as an offset relative to zero or RIP.
+        // An absolute indir addr that can fit within 32-bits can ben encoded as an offset relative
+        // to zero. All other absolute indir addr could be attempted to be encoded as RIP relative
+        // based on reloc hint provided by VM.  RIP relative encoding is preferred over relative
+        // to zero, because the former is one byte smaller than the latter.  For this reason
+        // we check for reloc hint first and then whether addr fits in 32-bits next.
+        //
+        // VM starts off with an initial state to allow both data and code address to be encoded as
+        // pc-relative offsets.  Hence JIT will attempt to encode all absolute addresses as pc-relative
+        // offsets.  It is possible while jitting a method, an address could not be encoded as a
+        // pc-relative offset.  In that case VM will note the overflow and will trigger re-jitting
+        // of the method with reloc hints turned off for all future methods. Second time around
+        // jitting will succeed since JIT will not attempt to encode data addresses as pc-relative
+        // offsets.  Note that JIT will always attempt to relocate code addresses (.e.g call addr).
+        // After an overflow, VM will assume any relocation recorded is for a code address and will
+        // emit jump thunk if it cannot be encoded as pc-relative offset.
+        return (IMAGE_REL_BASED_REL32 == comp->eeGetRelocTypeHint((void*)IconValue())) || FitsInI32();
+    }
+}
+
+// Returns true if this icon value is encoded as addr needs recording a relocation with VM
+bool GenTreeIntConCommon::AddrNeedsReloc(Compiler* comp)
+{
+    if (comp->opts.compReloc)
+    {
+        // During Ngen JIT is always asked to generate relocatable code.
+        // Hence JIT will try to encode only icon handles as pc-relative offsets.
+        return IsIconHandle() && (IMAGE_REL_BASED_REL32 == comp->eeGetRelocTypeHint((void*)IconValue()));
+    }
+    else
+    {
+        return IMAGE_REL_BASED_REL32 == comp->eeGetRelocTypeHint((void*)IconValue());
+    }
+}
+
+#elif defined(_TARGET_X86_)
+// Returns true if this absolute address fits within the base of an addr mode.
+// On x86 all addresses are 4-bytes and can be directly encoded in an addr mode.
+bool GenTreeIntConCommon::FitsInAddrBase(Compiler* comp)
+{
+#ifndef LEGACY_BACKEND
+#ifdef DEBUG
+    // Early out if PC-rel encoding of absolute addr is disabled.
+    if (!comp->opts.compEnablePCRelAddr)
+    {
+        return false;
+    }
+#endif
+#endif //! LEGACY_BACKEND
+
+    // TODO-x86 - TLS field handles are excluded for now as they are accessed relative to FS segment.
+    // Handling of TLS field handles is a NYI and this needs to be relooked after implementing it.
+    return IsCnsIntOrI() && !IsIconHandle(GTF_ICON_TLS_HDL);
+}
+
+// Returns true if this icon value is encoded as addr needs recording a relocation with VM
+bool GenTreeIntConCommon::AddrNeedsReloc(Compiler* comp)
+{
+    // If generating relocatable code, icons should be reported for recording relocatons.
+    return comp->opts.compReloc && IsIconHandle();
+}
+#endif //_TARGET_X86_
+
+bool GenTree::IsFieldAddr(Compiler* comp, GenTreePtr* pObj, GenTreePtr* pStatic, FieldSeqNode** pFldSeq)
+{
+    FieldSeqNode* newFldSeq    = nullptr;
+    GenTreePtr    baseAddr     = nullptr;
+    bool          mustBeStatic = false;
+
+    FieldSeqNode* statStructFldSeq = nullptr;
+    if (TypeGet() == TYP_REF)
+    {
+        // Recognize struct static field patterns...
+        if (OperGet() == GT_IND)
+        {
+            GenTreePtr     addr = gtOp.gtOp1;
+            GenTreeIntCon* icon = nullptr;
+            if (addr->OperGet() == GT_CNS_INT)
+            {
+                icon = addr->AsIntCon();
+            }
+            else if (addr->OperGet() == GT_ADD)
+            {
+                // op1 should never be a field sequence (or any other kind of handle)
+                assert((addr->gtOp.gtOp1->gtOper != GT_CNS_INT) || !addr->gtOp.gtOp1->IsIconHandle());
+                if (addr->gtOp.gtOp2->OperGet() == GT_CNS_INT)
+                {
+                    icon = addr->gtOp.gtOp2->AsIntCon();
+                }
+            }
+            if (icon != nullptr && !icon->IsIconHandle(GTF_ICON_STR_HDL) // String handles are a source of TYP_REFs.
+                && icon->gtFieldSeq != nullptr &&
+                icon->gtFieldSeq->m_next == nullptr // A static field should be a singleton
+                // TODO-Review: A pseudoField here indicates an issue - this requires investigation
+                // See test case src\ddsuites\src\clr\x86\CoreMangLib\Dev\Globalization\CalendarRegressions.exe
+                && !(FieldSeqStore::IsPseudoField(icon->gtFieldSeq->m_fieldHnd)) &&
+                icon->gtFieldSeq != FieldSeqStore::NotAField()) // Ignore non-fields.
+            {
+                statStructFldSeq = icon->gtFieldSeq;
+            }
+            else
+            {
+                addr = addr->gtEffectiveVal();
+
+                // Perhaps it's a direct indirection of a helper call or a cse with a zero offset annotation.
+                if ((addr->OperGet() == GT_CALL) || (addr->OperGet() == GT_LCL_VAR))
+                {
+                    FieldSeqNode* zeroFieldSeq = nullptr;
+                    if (comp->GetZeroOffsetFieldMap()->Lookup(addr, &zeroFieldSeq))
+                    {
+                        if (zeroFieldSeq->m_next == nullptr)
+                        {
+                            statStructFldSeq = zeroFieldSeq;
+                        }
+                    }
+                }
+            }
+        }
+        else if (OperGet() == GT_CLS_VAR)
+        {
+            GenTreeClsVar* clsVar = AsClsVar();
+            if (clsVar->gtFieldSeq != nullptr && clsVar->gtFieldSeq->m_next == nullptr)
+            {
+                statStructFldSeq = clsVar->gtFieldSeq;
+            }
+        }
+        else if (OperIsLocal())
+        {
+            // If we have a GT_LCL_VAR, it can be result of a CSE substitution
+            // If it is then the CSE assignment will have a ValueNum that
+            // describes the RHS of the CSE assignment.
+            //
+            // The CSE could be a pointer to a boxed struct
+            //
+            GenTreeLclVarCommon* lclVar = AsLclVarCommon();
+            ValueNum             vn     = gtVNPair.GetLiberal();
+            if (vn != ValueNumStore::NoVN)
+            {
+                // Is the ValueNum a MapSelect involving a SharedStatic helper?
+                VNFuncApp funcApp1;
+                if (comp->vnStore->GetVNFunc(vn, &funcApp1) && (funcApp1.m_func == VNF_MapSelect) &&
+                    (comp->vnStore->IsSharedStatic(funcApp1.m_args[1])))
+                {
+                    ValueNum mapVN = funcApp1.m_args[0];
+                    // Is this new 'mapVN' ValueNum, a MapSelect involving a handle?
+                    VNFuncApp funcApp2;
+                    if (comp->vnStore->GetVNFunc(mapVN, &funcApp2) && (funcApp2.m_func == VNF_MapSelect) &&
+                        (comp->vnStore->IsVNHandle(funcApp2.m_args[1])))
+                    {
+                        ValueNum fldHndVN = funcApp2.m_args[1];
+                        // Is this new 'fldHndVN' VNhandle a FieldHandle?
+                        unsigned flags = comp->vnStore->GetHandleFlags(fldHndVN);
+                        if (flags == GTF_ICON_FIELD_HDL)
+                        {
+                            CORINFO_FIELD_HANDLE fieldHnd =
+                                CORINFO_FIELD_HANDLE(comp->vnStore->ConstantValue<ssize_t>(fldHndVN));
+
+                            // Record this field sequence in 'statStructFldSeq' as it is likely to be a Boxed Struct
+                            // field access.
+                            statStructFldSeq = comp->GetFieldSeqStore()->CreateSingleton(fieldHnd);
+                        }
+                    }
+                }
+            }
+        }
+
+        if (statStructFldSeq != nullptr)
+        {
+            assert(statStructFldSeq->m_next == nullptr);
+            // Is this a pointer to a boxed struct?
+            if (comp->gtIsStaticFieldPtrToBoxedStruct(TYP_REF, statStructFldSeq->m_fieldHnd))
+            {
+                *pFldSeq = comp->GetFieldSeqStore()->Append(statStructFldSeq, *pFldSeq);
+                *pObj    = nullptr;
+                *pStatic = this;
+                return true;
+            }
+        }
+
+        // Otherwise...
+        *pObj    = this;
+        *pStatic = nullptr;
+        return true;
+    }
+    else if (OperGet() == GT_ADD)
+    {
+        // op1 should never be a field sequence (or any other kind of handle)
+        assert((gtOp.gtOp1->gtOper != GT_CNS_INT) || !gtOp.gtOp1->IsIconHandle());
+        if (gtOp.gtOp2->OperGet() == GT_CNS_INT)
+        {
+            newFldSeq = gtOp.gtOp2->AsIntCon()->gtFieldSeq;
+            baseAddr  = gtOp.gtOp1;
+        }
+    }
+    else
+    {
+        // Check if "this" has a zero-offset annotation.
+        if (!comp->GetZeroOffsetFieldMap()->Lookup(this, &newFldSeq))
+        {
+            // If not, this is not a field address.
+            return false;
+        }
+        else
+        {
+            baseAddr     = this;
+            mustBeStatic = true;
+        }
+    }
+
+    // If not we don't have a field seq, it's not a field address.
+    if (newFldSeq == nullptr || newFldSeq == FieldSeqStore::NotAField())
+    {
+        return false;
+    }
+
+    // Prepend this field to whatever we've already accumulated (outside-in).
+    *pFldSeq = comp->GetFieldSeqStore()->Append(newFldSeq, *pFldSeq);
+
+    // Is it a static or instance field?
+    if (!FieldSeqStore::IsPseudoField(newFldSeq->m_fieldHnd) &&
+        comp->info.compCompHnd->isFieldStatic(newFldSeq->m_fieldHnd))
+    {
+        // It is a static field.  We're done.
+        *pObj    = nullptr;
+        *pStatic = baseAddr;
+        return true;
+    }
+    else if ((baseAddr != nullptr) && !mustBeStatic)
+    {
+        // It's an instance field...but it must be for a struct field, since we've not yet encountered
+        // a "TYP_REF" address.  Analyze the reset of the address.
+        return baseAddr->gtEffectiveVal()->IsFieldAddr(comp, pObj, pStatic, pFldSeq);
+    }
+
+    // Otherwise...
+    return false;
+}
+
+bool Compiler::gtIsStaticFieldPtrToBoxedStruct(var_types fieldNodeType, CORINFO_FIELD_HANDLE fldHnd)
+{
+    if (fieldNodeType != TYP_REF)
+    {
+        return false;
+    }
+    CORINFO_CLASS_HANDLE fldCls = nullptr;
+    noway_assert(fldHnd != nullptr);
+    CorInfoType cit      = info.compCompHnd->getFieldType(fldHnd, &fldCls);
+    var_types   fieldTyp = JITtype2varType(cit);
+    return fieldTyp != TYP_REF;
+}
+
+CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleIfPresent(GenTree* tree)
+{
+    CORINFO_CLASS_HANDLE structHnd = NO_CLASS_HANDLE;
+    tree                           = tree->gtEffectiveVal();
+    if (varTypeIsStruct(tree->gtType))
+    {
+        switch (tree->gtOper)
+        {
+            default:
+                break;
+            case GT_MKREFANY:
+                structHnd = impGetRefAnyClass();
+                break;
+            case GT_OBJ:
+                structHnd = tree->gtObj.gtClass;
+                break;
+            case GT_CALL:
+                structHnd = tree->gtCall.gtRetClsHnd;
+                break;
+            case GT_RET_EXPR:
+                structHnd = tree->gtRetExpr.gtRetClsHnd;
+                break;
+            case GT_ARGPLACE:
+                structHnd = tree->gtArgPlace.gtArgPlaceClsHnd;
+                break;
+            case GT_INDEX:
+                structHnd = tree->gtIndex.gtStructElemClass;
+                break;
+            case GT_FIELD:
+                info.compCompHnd->getFieldType(tree->gtField.gtFldHnd, &structHnd);
+                break;
+            case GT_ASG:
+                structHnd = gtGetStructHandleIfPresent(tree->gtGetOp1());
+                break;
+            case GT_LCL_VAR:
+            case GT_LCL_FLD:
+                structHnd = lvaTable[tree->AsLclVarCommon()->gtLclNum].lvVerTypeInfo.GetClassHandle();
+                break;
+            case GT_RETURN:
+                structHnd = gtGetStructHandleIfPresent(tree->gtOp.gtOp1);
+                break;
+            case GT_IND:
+#ifdef FEATURE_SIMD
+                if (varTypeIsSIMD(tree))
+                {
+                    structHnd = gtGetStructHandleForSIMD(tree->gtType, TYP_FLOAT);
+                }
+                else
+#endif
+                    if (tree->gtFlags & GTF_IND_ARR_INDEX)
+                {
+                    ArrayInfo arrInfo;
+                    bool      b = GetArrayInfoMap()->Lookup(tree, &arrInfo);
+                    assert(b);
+                    structHnd = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
+                }
+                break;
+#ifdef FEATURE_SIMD
+            case GT_SIMD:
+                structHnd = gtGetStructHandleForSIMD(tree->gtType, tree->AsSIMD()->gtSIMDBaseType);
+#endif // FEATURE_SIMD
+                break;
+        }
+    }
+    return structHnd;
+}
+
+CORINFO_CLASS_HANDLE Compiler::gtGetStructHandle(GenTree* tree)
+{
+    CORINFO_CLASS_HANDLE structHnd = gtGetStructHandleIfPresent(tree);
+    assert(structHnd != NO_CLASS_HANDLE);
+    return structHnd;
+}
+
+void GenTree::ParseArrayAddress(
+    Compiler* comp, ArrayInfo* arrayInfo, GenTreePtr* pArr, ValueNum* pInxVN, FieldSeqNode** pFldSeq)
+{
+    *pArr                = nullptr;
+    ValueNum      inxVN  = ValueNumStore::NoVN;
+    ssize_t       offset = 0;
+    FieldSeqNode* fldSeq = nullptr;
+
+    ParseArrayAddressWork(comp, 1, pArr, &inxVN, &offset, &fldSeq);
+
+    // If we didn't find an array reference (perhaps it is the constant null?) we will give up.
+    if (*pArr == nullptr)
+    {
+        return;
+    }
+
+    // OK, new we have to figure out if any part of the "offset" is a constant contribution to the index.
+    // First, sum the offsets of any fields in fldSeq.
+    unsigned      fieldOffsets = 0;
+    FieldSeqNode* fldSeqIter   = fldSeq;
+    // Also, find the first non-pseudo field...
+    assert(*pFldSeq == nullptr);
+    while (fldSeqIter != nullptr)
+    {
+        if (fldSeqIter == FieldSeqStore::NotAField())
+        {
+            // TODO-Review: A NotAField here indicates a failure to properly maintain the field sequence
+            // See test case self_host_tests_x86\jit\regression\CLR-x86-JIT\v1-m12-beta2\ b70992\ b70992.exe
+            // Safest thing to do here is to drop back to MinOpts
+            noway_assert(!"fldSeqIter is NotAField() in ParseArrayAddress");
+        }
+
+        if (!FieldSeqStore::IsPseudoField(fldSeqIter->m_fieldHnd))
+        {
+            if (*pFldSeq == nullptr)
+            {
+                *pFldSeq = fldSeqIter;
+            }
+            CORINFO_CLASS_HANDLE fldCls = nullptr;
+            noway_assert(fldSeqIter->m_fieldHnd != nullptr);
+            CorInfoType cit = comp->info.compCompHnd->getFieldType(fldSeqIter->m_fieldHnd, &fldCls);
+            fieldOffsets += comp->compGetTypeSize(cit, fldCls);
+        }
+        fldSeqIter = fldSeqIter->m_next;
+    }
+
+    // Is there some portion of the "offset" beyond the first-elem offset and the struct field suffix we just computed?
+    if (!FitsIn<ssize_t>(fieldOffsets + arrayInfo->m_elemOffset) || !FitsIn<ssize_t>(arrayInfo->m_elemSize))
+    {
+        // This seems unlikely, but no harm in being safe...
+        *pInxVN = comp->GetValueNumStore()->VNForExpr(nullptr, TYP_INT);
+        return;
+    }
+    // Otherwise...
+    ssize_t offsetAccountedFor = static_cast<ssize_t>(fieldOffsets + arrayInfo->m_elemOffset);
+    ssize_t elemSize           = static_cast<ssize_t>(arrayInfo->m_elemSize);
+
+    ssize_t constIndOffset = offset - offsetAccountedFor;
+    // This should be divisible by the element size...
+    assert((constIndOffset % elemSize) == 0);
+    ssize_t constInd = constIndOffset / elemSize;
+
+    ValueNumStore* vnStore = comp->GetValueNumStore();
+
+    if (inxVN == ValueNumStore::NoVN)
+    {
+        // Must be a constant index.
+        *pInxVN = vnStore->VNForPtrSizeIntCon(constInd);
+    }
+    else
+    {
+        //
+        // Perform ((inxVN / elemSizeVN) + vnForConstInd)
+        //
+
+        // The value associated with the index value number (inxVN) is the offset into the array,
+        // which has been scaled by element size. We need to recover the array index from that offset
+        if (vnStore->IsVNConstant(inxVN))
+        {
+            ssize_t index = vnStore->CoercedConstantValue<ssize_t>(inxVN);
+            noway_assert(elemSize > 0 && ((index % elemSize) == 0));
+            *pInxVN = vnStore->VNForPtrSizeIntCon((index / elemSize) + constInd);
+        }
+        else
+        {
+            bool canFoldDiv = false;
+
+            // If the index VN is a MUL by elemSize, see if we can eliminate it instead of adding
+            // the division by elemSize.
+            VNFuncApp funcApp;
+            if (vnStore->GetVNFunc(inxVN, &funcApp) && funcApp.m_func == (VNFunc)GT_MUL)
+            {
+                ValueNum vnForElemSize = vnStore->VNForLongCon(elemSize);
+
+                // One of the multiply operand is elemSize, so the resulting
+                // index VN should simply be the other operand.
+                if (funcApp.m_args[1] == vnForElemSize)
+                {
+                    *pInxVN    = funcApp.m_args[0];
+                    canFoldDiv = true;
+                }
+                else if (funcApp.m_args[0] == vnForElemSize)
+                {
+                    *pInxVN    = funcApp.m_args[1];
+                    canFoldDiv = true;
+                }
+            }
+
+            // Perform ((inxVN / elemSizeVN) + vnForConstInd)
+            if (!canFoldDiv)
+            {
+                ValueNum vnForElemSize = vnStore->VNForPtrSizeIntCon(elemSize);
+                ValueNum vnForScaledInx =
+                    vnStore->VNForFunc(TYP_I_IMPL, GetVNFuncForOper(GT_DIV, false), inxVN, vnForElemSize);
+                *pInxVN = vnForScaledInx;
+            }
+
+            if (constInd != 0)
+            {
+                ValueNum vnForConstInd = comp->GetValueNumStore()->VNForPtrSizeIntCon(constInd);
+                *pInxVN                = comp->GetValueNumStore()->VNForFunc(TYP_I_IMPL,
+                                                              GetVNFuncForOper(GT_ADD, (gtFlags & GTF_UNSIGNED) != 0),
+                                                              *pInxVN, vnForConstInd);
+            }
+        }
+    }
+}
+
+void GenTree::ParseArrayAddressWork(
+    Compiler* comp, ssize_t inputMul, GenTreePtr* pArr, ValueNum* pInxVN, ssize_t* pOffset, FieldSeqNode** pFldSeq)
+{
+    if (TypeGet() == TYP_REF)
+    {
+        // This must be the array pointer.
+        *pArr = this;
+        assert(inputMul == 1); // Can't multiply the array pointer by anything.
+    }
+    else
+    {
+        switch (OperGet())
+        {
+            case GT_CNS_INT:
+                *pFldSeq = comp->GetFieldSeqStore()->Append(*pFldSeq, gtIntCon.gtFieldSeq);
+                *pOffset += (inputMul * gtIntCon.gtIconVal);
+                return;
+
+            case GT_ADD:
+            case GT_SUB:
+                gtOp.gtOp1->ParseArrayAddressWork(comp, inputMul, pArr, pInxVN, pOffset, pFldSeq);
+                if (OperGet() == GT_SUB)
+                {
+                    inputMul = -inputMul;
+                }
+                gtOp.gtOp2->ParseArrayAddressWork(comp, inputMul, pArr, pInxVN, pOffset, pFldSeq);
+                return;
+
+            case GT_MUL:
+            {
+                // If one op is a constant, continue parsing down.
+                ssize_t    subMul   = 0;
+                GenTreePtr nonConst = nullptr;
+                if (gtOp.gtOp1->IsCnsIntOrI())
+                {
+                    // If the other arg is an int constant, and is a "not-a-field", choose
+                    // that as the multiplier, thus preserving constant index offsets...
+                    if (gtOp.gtOp2->OperGet() == GT_CNS_INT &&
+                        gtOp.gtOp2->gtIntCon.gtFieldSeq == FieldSeqStore::NotAField())
+                    {
+                        subMul   = gtOp.gtOp2->gtIntConCommon.IconValue();
+                        nonConst = gtOp.gtOp1;
+                    }
+                    else
+                    {
+                        subMul   = gtOp.gtOp1->gtIntConCommon.IconValue();
+                        nonConst = gtOp.gtOp2;
+                    }
+                }
+                else if (gtOp.gtOp2->IsCnsIntOrI())
+                {
+                    subMul   = gtOp.gtOp2->gtIntConCommon.IconValue();
+                    nonConst = gtOp.gtOp1;
+                }
+                if (nonConst != nullptr)
+                {
+                    nonConst->ParseArrayAddressWork(comp, inputMul * subMul, pArr, pInxVN, pOffset, pFldSeq);
+                    return;
+                }
+                // Otherwise, exit the switch, treat as a contribution to the index.
+            }
+            break;
+
+            case GT_LSH:
+                // If one op is a constant, continue parsing down.
+                if (gtOp.gtOp2->IsCnsIntOrI())
+                {
+                    ssize_t subMul = 1 << gtOp.gtOp2->gtIntConCommon.IconValue();
+                    gtOp.gtOp1->ParseArrayAddressWork(comp, inputMul * subMul, pArr, pInxVN, pOffset, pFldSeq);
+                    return;
+                }
+                // Otherwise, exit the switch, treat as a contribution to the index.
+                break;
+
+            case GT_COMMA:
+                // We don't care about exceptions for this purpose.
+                if ((gtOp.gtOp1->OperGet() == GT_ARR_BOUNDS_CHECK) || gtOp.gtOp1->IsNothingNode())
+                {
+                    gtOp.gtOp2->ParseArrayAddressWork(comp, inputMul, pArr, pInxVN, pOffset, pFldSeq);
+                    return;
+                }
+                break;
+
+            default:
+                break;
+        }
+        // If we didn't return above, must be a constribution to the non-constant part of the index VN.
+        ValueNum vn = comp->GetValueNumStore()->VNNormVal(gtVNPair.GetLiberal()); // We don't care about exceptions for
+                                                                                  // this purpose.
+        if (inputMul != 1)
+        {
+            ValueNum mulVN = comp->GetValueNumStore()->VNForLongCon(inputMul);
+            vn             = comp->GetValueNumStore()->VNForFunc(TypeGet(), GetVNFuncForOper(GT_MUL, false), mulVN, vn);
+        }
+        if (*pInxVN == ValueNumStore::NoVN)
+        {
+            *pInxVN = vn;
+        }
+        else
+        {
+            *pInxVN = comp->GetValueNumStore()->VNForFunc(TypeGet(), GetVNFuncForOper(GT_ADD, false), *pInxVN, vn);
+        }
+    }
+}
+
+bool GenTree::ParseArrayElemForm(Compiler* comp, ArrayInfo* arrayInfo, FieldSeqNode** pFldSeq)
+{
+    if (OperIsIndir())
+    {
+        if (gtFlags & GTF_IND_ARR_INDEX)
+        {
+            bool b = comp->GetArrayInfoMap()->Lookup(this, arrayInfo);
+            assert(b);
+            return true;
+        }
+
+        // Otherwise...
+        GenTreePtr addr = AsIndir()->Addr();
+        return addr->ParseArrayElemAddrForm(comp, arrayInfo, pFldSeq);
+    }
+    else
+    {
+        return false;
+    }
+}
+
+bool GenTree::ParseArrayElemAddrForm(Compiler* comp, ArrayInfo* arrayInfo, FieldSeqNode** pFldSeq)
+{
+    switch (OperGet())
+    {
+        case GT_ADD:
+        {
+            GenTreePtr arrAddr = nullptr;
+            GenTreePtr offset  = nullptr;
+            if (gtOp.gtOp1->TypeGet() == TYP_BYREF)
+            {
+                arrAddr = gtOp.gtOp1;
+                offset  = gtOp.gtOp2;
+            }
+            else if (gtOp.gtOp2->TypeGet() == TYP_BYREF)
+            {
+                arrAddr = gtOp.gtOp2;
+                offset  = gtOp.gtOp1;
+            }
+            else
+            {
+                return false;
+            }
+            if (!offset->ParseOffsetForm(comp, pFldSeq))
+            {
+                return false;
+            }
+            return arrAddr->ParseArrayElemAddrForm(comp, arrayInfo, pFldSeq);
+        }
+
+        case GT_ADDR:
+        {
+            GenTreePtr addrArg = gtOp.gtOp1;
+            if (addrArg->OperGet() != GT_IND)
+            {
+                return false;
+            }
+            else
+            {
+                // The "Addr" node might be annotated with a zero-offset field sequence.
+                FieldSeqNode* zeroOffsetFldSeq = nullptr;
+                if (comp->GetZeroOffsetFieldMap()->Lookup(this, &zeroOffsetFldSeq))
+                {
+                    *pFldSeq = comp->GetFieldSeqStore()->Append(*pFldSeq, zeroOffsetFldSeq);
+                }
+                return addrArg->ParseArrayElemForm(comp, arrayInfo, pFldSeq);
+            }
+        }
+
+        default:
+            return false;
+    }
+}
+
+bool GenTree::ParseOffsetForm(Compiler* comp, FieldSeqNode** pFldSeq)
+{
+    switch (OperGet())
+    {
+        case GT_CNS_INT:
+        {
+            GenTreeIntCon* icon = AsIntCon();
+            *pFldSeq            = comp->GetFieldSeqStore()->Append(*pFldSeq, icon->gtFieldSeq);
+            return true;
+        }
+
+        case GT_ADD:
+            if (!gtOp.gtOp1->ParseOffsetForm(comp, pFldSeq))
+            {
+                return false;
+            }
+            return gtOp.gtOp2->ParseOffsetForm(comp, pFldSeq);
+
+        default:
+            return false;
+    }
+}
+
+void GenTree::LabelIndex(Compiler* comp, bool isConst)
+{
+    switch (OperGet())
+    {
+        case GT_CNS_INT:
+            // If we got here, this is a contribution to the constant part of the index.
+            if (isConst)
+            {
+                gtIntCon.gtFieldSeq =
+                    comp->GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
+            }
+            return;
+
+        case GT_LCL_VAR:
+            gtFlags |= GTF_VAR_ARR_INDEX;
+            return;
+
+        case GT_ADD:
+        case GT_SUB:
+            gtOp.gtOp1->LabelIndex(comp, isConst);
+            gtOp.gtOp2->LabelIndex(comp, isConst);
+            break;
+
+        case GT_CAST:
+            gtOp.gtOp1->LabelIndex(comp, isConst);
+            break;
+
+        case GT_ARR_LENGTH:
+            gtFlags |= GTF_ARRLEN_ARR_IDX;
+            return;
+
+        default:
+            // For all other operators, peel off one constant; and then label the other if it's also a constant.
+            if (OperIsArithmetic() || OperIsCompare())
+            {
+                if (gtOp.gtOp2->OperGet() == GT_CNS_INT)
+                {
+                    gtOp.gtOp1->LabelIndex(comp, isConst);
+                    break;
+                }
+                else if (gtOp.gtOp1->OperGet() == GT_CNS_INT)
+                {
+                    gtOp.gtOp2->LabelIndex(comp, isConst);
+                    break;
+                }
+                // Otherwise continue downward on both, labeling vars.
+                gtOp.gtOp1->LabelIndex(comp, false);
+                gtOp.gtOp2->LabelIndex(comp, false);
+            }
+            break;
+    }
+}
+
+// Note that the value of the below field doesn't matter; it exists only to provide a distinguished address.
+//
+// static
+FieldSeqNode FieldSeqStore::s_notAField(nullptr, nullptr);
+
+// FieldSeqStore methods.
+FieldSeqStore::FieldSeqStore(IAllocator* alloc) : m_alloc(alloc), m_canonMap(new (alloc) FieldSeqNodeCanonMap(alloc))
+{
+}
+
+FieldSeqNode* FieldSeqStore::CreateSingleton(CORINFO_FIELD_HANDLE fieldHnd)
+{
+    FieldSeqNode  fsn(fieldHnd, nullptr);
+    FieldSeqNode* res = nullptr;
+    if (m_canonMap->Lookup(fsn, &res))
+    {
+        return res;
+    }
+    else
+    {
+        res  = reinterpret_cast<FieldSeqNode*>(m_alloc->Alloc(sizeof(FieldSeqNode)));
+        *res = fsn;
+        m_canonMap->Set(fsn, res);
+        return res;
+    }
+}
+
+FieldSeqNode* FieldSeqStore::Append(FieldSeqNode* a, FieldSeqNode* b)
+{
+    if (a == nullptr)
+    {
+        return b;
+    }
+    else if (a == NotAField())
+    {
+        return NotAField();
+    }
+    else if (b == nullptr)
+    {
+        return a;
+    }
+    else if (b == NotAField())
+    {
+        return NotAField();
+        // Extremely special case for ConstantIndex pseudo-fields -- appending consecutive such
+        // together collapse to one.
+    }
+    else if (a->m_next == nullptr && a->m_fieldHnd == ConstantIndexPseudoField &&
+             b->m_fieldHnd == ConstantIndexPseudoField)
+    {
+        return b;
+    }
+    else
+    {
+        FieldSeqNode* tmp = Append(a->m_next, b);
+        FieldSeqNode  fsn(a->m_fieldHnd, tmp);
+        FieldSeqNode* res = nullptr;
+        if (m_canonMap->Lookup(fsn, &res))
+        {
+            return res;
+        }
+        else
+        {
+            res  = reinterpret_cast<FieldSeqNode*>(m_alloc->Alloc(sizeof(FieldSeqNode)));
+            *res = fsn;
+            m_canonMap->Set(fsn, res);
+            return res;
+        }
+    }
+}
+
+// Static vars.
+int FieldSeqStore::FirstElemPseudoFieldStruct;
+int FieldSeqStore::ConstantIndexPseudoFieldStruct;
+
+CORINFO_FIELD_HANDLE FieldSeqStore::FirstElemPseudoField =
+    (CORINFO_FIELD_HANDLE)&FieldSeqStore::FirstElemPseudoFieldStruct;
+CORINFO_FIELD_HANDLE FieldSeqStore::ConstantIndexPseudoField =
+    (CORINFO_FIELD_HANDLE)&FieldSeqStore::ConstantIndexPseudoFieldStruct;
+
+bool FieldSeqNode::IsFirstElemFieldSeq()
+{
+    // this must be non-null per ISO C++
+    return m_fieldHnd == FieldSeqStore::FirstElemPseudoField;
+}
+
+bool FieldSeqNode::IsConstantIndexFieldSeq()
+{
+    // this must be non-null per ISO C++
+    return m_fieldHnd == FieldSeqStore::ConstantIndexPseudoField;
+}
+
+bool FieldSeqNode::IsPseudoField()
+{
+    if (this == nullptr)
+    {
+        return false;
+    }
+    return m_fieldHnd == FieldSeqStore::FirstElemPseudoField || m_fieldHnd == FieldSeqStore::ConstantIndexPseudoField;
+}
+
+#ifdef FEATURE_SIMD
+GenTreeSIMD* Compiler::gtNewSIMDNode(
+    var_types type, GenTreePtr op1, SIMDIntrinsicID simdIntrinsicID, var_types baseType, unsigned size)
+{
+    // TODO-CQ: An operand may be a GT_OBJ(GT_ADDR(GT_LCL_VAR))), in which case it should be
+    // marked lvUsedInSIMDIntrinsic.
+    assert(op1 != nullptr);
+    if (op1->OperGet() == GT_LCL_VAR)
+    {
+        unsigned   lclNum                = op1->AsLclVarCommon()->GetLclNum();
+        LclVarDsc* lclVarDsc             = &lvaTable[lclNum];
+        lclVarDsc->lvUsedInSIMDIntrinsic = true;
+    }
+
+    return new (this, GT_SIMD) GenTreeSIMD(type, op1, simdIntrinsicID, baseType, size);
+}
+
+GenTreeSIMD* Compiler::gtNewSIMDNode(
+    var_types type, GenTreePtr op1, GenTreePtr op2, SIMDIntrinsicID simdIntrinsicID, var_types baseType, unsigned size)
+{
+    // TODO-CQ: An operand may be a GT_OBJ(GT_ADDR(GT_LCL_VAR))), in which case it should be
+    // marked lvUsedInSIMDIntrinsic.
+    assert(op1 != nullptr);
+    if (op1->OperIsLocal())
+    {
+        unsigned   lclNum                = op1->AsLclVarCommon()->GetLclNum();
+        LclVarDsc* lclVarDsc             = &lvaTable[lclNum];
+        lclVarDsc->lvUsedInSIMDIntrinsic = true;
+    }
+
+    if (op2 != nullptr && op2->OperIsLocal())
+    {
+        unsigned   lclNum                = op2->AsLclVarCommon()->GetLclNum();
+        LclVarDsc* lclVarDsc             = &lvaTable[lclNum];
+        lclVarDsc->lvUsedInSIMDIntrinsic = true;
+    }
+
+    return new (this, GT_SIMD) GenTreeSIMD(type, op1, op2, simdIntrinsicID, baseType, size);
+}
+
+bool GenTree::isCommutativeSIMDIntrinsic()
+{
+    assert(gtOper == GT_SIMD);
+    switch (AsSIMD()->gtSIMDIntrinsicID)
+    {
+        case SIMDIntrinsicAdd:
+        case SIMDIntrinsicBitwiseAnd:
+        case SIMDIntrinsicBitwiseOr:
+        case SIMDIntrinsicBitwiseXor:
+        case SIMDIntrinsicEqual:
+        case SIMDIntrinsicMax:
+        case SIMDIntrinsicMin:
+        case SIMDIntrinsicMul:
+        case SIMDIntrinsicOpEquality:
+        case SIMDIntrinsicOpInEquality:
+            return true;
+        default:
+            return false;
+    }
+}
+#endif // FEATURE_SIMD
+
+//---------------------------------------------------------------------------------------
+// GenTreeArgList::Prepend:
+//    Prepends an element to a GT_LIST.
+// 
+// Arguments:
+//    compiler - The compiler context.
+//    element  - The element to prepend.
+//
+// Returns:
+//    The new head of the list.
+GenTreeArgList* GenTreeArgList::Prepend(Compiler* compiler, GenTree* element)
+{
+    GenTreeArgList* head = compiler->gtNewListNode(element, this);
+    head->gtFlags |= (gtFlags & GTF_LIST_AGGREGATE);
+    gtFlags &= ~GTF_LIST_AGGREGATE;
+    return head;
+}
+
+//---------------------------------------------------------------------------------------
+// InitializeStructReturnType:
+//    Initialize the Return Type Descriptor for a method that returns a struct type
+//
+// Arguments
+//    comp        -  Compiler Instance
+//    retClsHnd   -  VM handle to the struct type returned by the method
+//
+// Return Value
+//    None
+//
+void ReturnTypeDesc::InitializeStructReturnType(Compiler* comp, CORINFO_CLASS_HANDLE retClsHnd)
+{
+    assert(!m_inited);
+
+#if FEATURE_MULTIREG_RET
+
+    assert(retClsHnd != NO_CLASS_HANDLE);
+    unsigned structSize = comp->info.compCompHnd->getClassSize(retClsHnd);
+
+    Compiler::structPassingKind howToReturnStruct;
+    var_types                   returnType = comp->getReturnTypeForStruct(retClsHnd, &howToReturnStruct, structSize);
+
+    switch (howToReturnStruct)
+    {
+        case Compiler::SPK_PrimitiveType:
+        {
+            assert(returnType != TYP_UNKNOWN);
+            assert(returnType != TYP_STRUCT);
+            m_regType[0] = returnType;
+            break;
+        }
+
+        case Compiler::SPK_ByValueAsHfa:
+        {
+            assert(returnType == TYP_STRUCT);
+            var_types hfaType = comp->GetHfaType(retClsHnd);
+
+            // We should have an hfa struct type
+            assert(varTypeIsFloating(hfaType));
+
+            // Note that the retail build issues a warning about a potential divsion by zero without this Max function
+            unsigned elemSize = Max((unsigned)1, EA_SIZE_IN_BYTES(emitActualTypeSize(hfaType)));
+
+            // The size of this struct should be evenly divisible by elemSize
+            assert((structSize % elemSize) == 0);
+
+            unsigned hfaCount = (structSize / elemSize);
+            for (unsigned i = 0; i < hfaCount; ++i)
+            {
+                m_regType[i] = hfaType;
+            }
+
+            if (comp->compFloatingPointUsed == false)
+            {
+                comp->compFloatingPointUsed = true;
+            }
+            break;
+        }
+
+        case Compiler::SPK_ByValue:
+        {
+            assert(returnType == TYP_STRUCT);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+            SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+            comp->eeGetSystemVAmd64PassStructInRegisterDescriptor(retClsHnd, &structDesc);
+
+            assert(structDesc.passedInRegisters);
+            for (int i = 0; i < structDesc.eightByteCount; i++)
+            {
+                assert(i < MAX_RET_REG_COUNT);
+                m_regType[i] = comp->GetEightByteType(structDesc, i);
+            }
+
+#elif defined(_TARGET_ARM64_)
+
+            // a non-HFA struct returned using two registers
+            //
+            assert((structSize > TARGET_POINTER_SIZE) && (structSize <= (2 * TARGET_POINTER_SIZE)));
+
+            BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE};
+            comp->info.compCompHnd->getClassGClayout(retClsHnd, &gcPtrs[0]);
+            for (unsigned i = 0; i < 2; ++i)
+            {
+                m_regType[i] = comp->getJitGCType(gcPtrs[i]);
+            }
+
+#else //  _TARGET_XXX_
+
+            // This target needs support here!
+            //
+            NYI("Unsupported TARGET returning a TYP_STRUCT in InitializeStructReturnType");
+
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+            break; // for case SPK_ByValue
+        }
+
+        case Compiler::SPK_ByReference:
+
+            // We are returning using the return buffer argument
+            // There are no return registers
+            break;
+
+        default:
+
+            unreached(); // By the contract of getReturnTypeForStruct we should never get here.
+
+    } // end of switch (howToReturnStruct)
+
+#endif //  FEATURE_MULTIREG_RET
+
+#ifdef DEBUG
+    m_inited = true;
+#endif
+}
+
+//---------------------------------------------------------------------------------------
+// InitializeLongReturnType:
+//    Initialize the Return Type Descriptor for a method that returns a TYP_LONG
+//
+// Arguments
+//    comp        -  Compiler Instance
+//
+// Return Value
+//    None
+//
+void ReturnTypeDesc::InitializeLongReturnType(Compiler* comp)
+{
+#if defined(_TARGET_X86_)
+
+    // Setups up a ReturnTypeDesc for returning a long using two registers
+    //
+    assert(MAX_RET_REG_COUNT >= 2);
+    m_regType[0] = TYP_INT;
+    m_regType[1] = TYP_INT;
+
+#else // not _TARGET_X86_
+
+    m_regType[0] = TYP_LONG;
+
+#endif // _TARGET_X86_
+
+#ifdef DEBUG
+    m_inited = true;
+#endif
+}
+
+//-------------------------------------------------------------------
+// GetABIReturnReg:  Return ith return register as per target ABI
+//
+// Arguments:
+//     idx   -   Index of the return register.
+//               The first return register has an index of 0 and so on.
+//
+// Return Value:
+//     Returns ith return register as per target ABI.
+//
+// Notes:
+//     Right now this is implemented only for x64 Unix
+//     and yet to be implemented for other multi-reg return
+//     targets (Arm64/Arm32/x86).
+//
+// TODO-ARM:   Implement this routine to support HFA returns.
+// TODO-X86:   Implement this routine to support long returns.
+regNumber ReturnTypeDesc::GetABIReturnReg(unsigned idx)
+{
+    unsigned count = GetReturnRegCount();
+    assert(idx < count);
+
+    regNumber resultReg = REG_NA;
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+    var_types regType0 = GetReturnRegType(0);
+
+    if (idx == 0)
+    {
+        if (varTypeIsIntegralOrI(regType0))
+        {
+            resultReg = REG_INTRET;
+        }
+        else
+        {
+            noway_assert(varTypeIsFloating(regType0));
+            resultReg = REG_FLOATRET;
+        }
+    }
+    else if (idx == 1)
+    {
+        var_types regType1 = GetReturnRegType(1);
+
+        if (varTypeIsIntegralOrI(regType1))
+        {
+            if (varTypeIsIntegralOrI(regType0))
+            {
+                resultReg = REG_INTRET_1;
+            }
+            else
+            {
+                resultReg = REG_INTRET;
+            }
+        }
+        else
+        {
+            noway_assert(varTypeIsFloating(regType1));
+
+            if (varTypeIsFloating(regType0))
+            {
+                resultReg = REG_FLOATRET_1;
+            }
+            else
+            {
+                resultReg = REG_FLOATRET;
+            }
+        }
+    }
+
+#elif defined(_TARGET_X86_)
+
+    if (idx == 0)
+    {
+        resultReg = REG_LNGRET_LO;
+    }
+    else if (idx == 1)
+    {
+        resultReg = REG_LNGRET_HI;
+    }
+
+#elif defined(_TARGET_ARM64_)
+
+    var_types regType = GetReturnRegType(idx);
+    if (varTypeIsIntegralOrI(regType))
+    {
+        noway_assert(idx < 2);                              // Up to 2 return registers for 16-byte structs
+        resultReg = (idx == 0) ? REG_INTRET : REG_INTRET_1; // X0 or X1
+    }
+    else
+    {
+        noway_assert(idx < 4);                                   // Up to 4 return registers for HFA's
+        resultReg = (regNumber)((unsigned)(REG_FLOATRET) + idx); // V0, V1, V2 or V3
+    }
+
+#endif // TARGET_XXX
+
+    assert(resultReg != REG_NA);
+    return resultReg;
+}
+
+//--------------------------------------------------------------------------------
+// GetABIReturnRegs: get the mask of return registers as per target arch ABI.
+//
+// Arguments:
+//    None
+//
+// Return Value:
+//    reg mask of return registers in which the return type is returned.
+//
+// Note:
+//    For now this is implemented only for x64 Unix and yet to be implemented
+//    for other multi-reg return targets (Arm64/Arm32x86).
+//
+//    This routine can be used when the caller is not particular about the order
+//    of return registers and wants to know the set of return registers.
+//
+// TODO-ARM:   Implement this routine to support HFA returns.
+// TODO-ARM64: Implement this routine to support HFA returns.
+// TODO-X86:   Implement this routine to support long returns.
+//
+// static
+regMaskTP ReturnTypeDesc::GetABIReturnRegs()
+{
+    regMaskTP resultMask = RBM_NONE;
+
+    unsigned count = GetReturnRegCount();
+    for (unsigned i = 0; i < count; ++i)
+    {
+        resultMask |= genRegMask(GetABIReturnReg(i));
+    }
+
+    return resultMask;
+}
diff --git a/src/jit/gentree.h b/src/jit/gentree.h
new file mode 100644
index 0000000000..4efeeae620
--- /dev/null
+++ b/src/jit/gentree.h
@@ -0,0 +1,5124 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                          GenTree                                          XX
+XX                                                                           XX
+XX  This is the node in the semantic tree graph. It represents the operation XX
+XX  corresponding to the node, and other information during code-gen.        XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************/
+#ifndef _GENTREE_H_
+#define _GENTREE_H_
+/*****************************************************************************/
+
+#include "vartype.h"   // For "var_types"
+#include "target.h"    // For "regNumber"
+#include "ssaconfig.h" // For "SsaConfig::RESERVED_SSA_NUM"
+#include "reglist.h"
+#include "valuenumtype.h"
+#include "simplerhash.h"
+#include "nodeinfo.h"
+#include "simd.h"
+
+// Debugging GenTree is much easier if we add a magic virtual function to make the debugger able to figure out what type
+// it's got. This is enabled by default in DEBUG. To enable it in RET builds (temporarily!), you need to change the
+// build to define DEBUGGABLE_GENTREE=1, as well as pass /OPT:NOICF to the linker (or else all the vtables get merged,
+// making the debugging value supplied by them useless). See protojit.nativeproj for a commented example of setting the
+// build flags correctly.
+#ifndef DEBUGGABLE_GENTREE
+#ifdef DEBUG
+#define DEBUGGABLE_GENTREE 1
+#else // !DEBUG
+#define DEBUGGABLE_GENTREE 0
+#endif // !DEBUG
+#endif // !DEBUGGABLE_GENTREE
+
+// The SpecialCodeKind enum is used to indicate the type of special (unique)
+// target block that will be targeted by an instruction.
+// These are used by:
+//   GenTreeBoundsChk nodes (SCK_RNGCHK_FAIL, SCK_ARG_EXCPN, SCK_ARG_RNG_EXCPN)
+//     - these nodes have a field (gtThrowKind) to indicate which kind
+//   GenTreeOps nodes, for which codegen will generate the branch
+//     - it will use the appropriate kind based on the opcode, though it's not
+//       clear why SCK_OVERFLOW == SCK_ARITH_EXCPN
+// SCK_PAUSE_EXEC is not currently used.
+//
+enum SpecialCodeKind
+{
+    SCK_NONE,
+    SCK_RNGCHK_FAIL,                // target when range check fails
+    SCK_PAUSE_EXEC,                 // target to stop (e.g. to allow GC)
+    SCK_DIV_BY_ZERO,                // target for divide by zero (Not used on X86/X64)
+    SCK_ARITH_EXCPN,                // target on arithmetic exception
+    SCK_OVERFLOW = SCK_ARITH_EXCPN, // target on overflow
+    SCK_ARG_EXCPN,                  // target on ArgumentException (currently used only for SIMD intrinsics)
+    SCK_ARG_RNG_EXCPN,              // target on ArgumentOutOfRangeException (currently used only for SIMD intrinsics)
+    SCK_COUNT
+};
+
+/*****************************************************************************/
+
+DECLARE_TYPED_ENUM(genTreeOps, BYTE)
+{
+#define GTNODE(en, sn, cm, ok) GT_##en,
+#include "gtlist.h"
+
+    GT_COUNT,
+
+#ifdef _TARGET_64BIT_
+        // GT_CNS_NATIVELONG is the gtOper symbol for GT_CNS_LNG or GT_CNS_INT, depending on the target.
+        // For the 64-bit targets we will only use GT_CNS_INT as it used to represent all the possible sizes
+        GT_CNS_NATIVELONG = GT_CNS_INT,
+#else
+        // For the 32-bit targets we use a GT_CNS_LNG to hold a 64-bit integer constant and GT_CNS_INT for all others.
+        // In the future when we retarget the JIT for x86 we should consider eliminating GT_CNS_LNG
+        GT_CNS_NATIVELONG = GT_CNS_LNG,
+#endif
+}
+END_DECLARE_TYPED_ENUM(genTreeOps, BYTE)
+
+/*****************************************************************************
+ *
+ *  The following enum defines a set of bit flags that can be used
+ *  to classify expression tree nodes. Note that some operators will
+ *  have more than one bit set, as follows:
+ *
+ *          GTK_CONST    implies    GTK_LEAF
+ *          GTK_RELOP    implies    GTK_BINOP
+ *          GTK_LOGOP    implies    GTK_BINOP
+ */
+
+enum genTreeKinds
+{
+    GTK_SPECIAL = 0x0000, // unclassified operator (special handling reqd)
+
+    GTK_CONST = 0x0001, // constant     operator
+    GTK_LEAF  = 0x0002, // leaf         operator
+    GTK_UNOP  = 0x0004, // unary        operator
+    GTK_BINOP = 0x0008, // binary       operator
+    GTK_RELOP = 0x0010, // comparison   operator
+    GTK_LOGOP = 0x0020, // logical      operator
+    GTK_ASGOP = 0x0040, // assignment   operator
+
+    GTK_KINDMASK = 0x007F, // operator kind mask
+
+    GTK_COMMUTE = 0x0080, // commutative  operator
+
+    GTK_EXOP = 0x0100, // Indicates that an oper for a node type that extends GenTreeOp (or GenTreeUnOp)
+                       // by adding non-node fields to unary or binary operator.
+
+    GTK_LOCAL = 0x0200, // is a local access (load, store, phi)
+
+    GTK_NOVALUE = 0x0400, // node does not produce a value
+    GTK_NOTLIR  = 0x0800, // node is not allowed in LIR
+
+    /* Define composite value(s) */
+
+    GTK_SMPOP = (GTK_UNOP | GTK_BINOP | GTK_RELOP | GTK_LOGOP)
+};
+
+/*****************************************************************************/
+
+#define SMALL_TREE_NODES 1
+
+/*****************************************************************************/
+
+DECLARE_TYPED_ENUM(gtCallTypes, BYTE)
+{
+    CT_USER_FUNC,    // User function
+        CT_HELPER,   // Jit-helper
+        CT_INDIRECT, // Indirect call
+
+        CT_COUNT // fake entry (must be last)
+}
+END_DECLARE_TYPED_ENUM(gtCallTypes, BYTE)
+
+/*****************************************************************************/
+
+struct BasicBlock;
+
+struct InlineCandidateInfo;
+
+/*****************************************************************************/
+
+// GT_FIELD nodes will be lowered into more "code-gen-able" representations, like
+// GT_IND's of addresses, or GT_LCL_FLD nodes.  We'd like to preserve the more abstract
+// information, and will therefore annotate such lowered nodes with FieldSeq's.  A FieldSeq
+// represents a (possibly) empty sequence of fields.  The fields are in the order
+// in which they are dereferenced.  The first field may be an object field or a struct field;
+// all subsequent fields must be struct fields.
+struct FieldSeqNode
+{
+    CORINFO_FIELD_HANDLE m_fieldHnd;
+    FieldSeqNode*        m_next;
+
+    FieldSeqNode(CORINFO_FIELD_HANDLE fieldHnd, FieldSeqNode* next) : m_fieldHnd(fieldHnd), m_next(next)
+    {
+    }
+
+    // returns true when this is the pseudo #FirstElem field sequence
+    bool IsFirstElemFieldSeq();
+
+    // returns true when this is the pseudo #ConstantIndex field sequence
+    bool IsConstantIndexFieldSeq();
+
+    // returns true when this is the the pseudo #FirstElem field sequence or the pseudo #ConstantIndex field sequence
+    bool IsPseudoField();
+
+    // Make sure this provides methods that allow it to be used as a KeyFuncs type in SimplerHash.
+    static int GetHashCode(FieldSeqNode fsn)
+    {
+        return static_cast<int>(reinterpret_cast<intptr_t>(fsn.m_fieldHnd)) ^
+               static_cast<int>(reinterpret_cast<intptr_t>(fsn.m_next));
+    }
+
+    static bool Equals(FieldSeqNode fsn1, FieldSeqNode fsn2)
+    {
+        return fsn1.m_fieldHnd == fsn2.m_fieldHnd && fsn1.m_next == fsn2.m_next;
+    }
+};
+
+// This class canonicalizes field sequences.
+class FieldSeqStore
+{
+    typedef SimplerHashTable<FieldSeqNode, /*KeyFuncs*/ FieldSeqNode, FieldSeqNode*, JitSimplerHashBehavior>
+        FieldSeqNodeCanonMap;
+
+    IAllocator*           m_alloc;
+    FieldSeqNodeCanonMap* m_canonMap;
+
+    static FieldSeqNode s_notAField; // No value, just exists to provide an address.
+
+    // Dummy variables to provide the addresses for the "pseudo field handle" statics below.
+    static int FirstElemPseudoFieldStruct;
+    static int ConstantIndexPseudoFieldStruct;
+
+public:
+    FieldSeqStore(IAllocator* alloc);
+
+    // Returns the (canonical in the store) singleton field sequence for the given handle.
+    FieldSeqNode* CreateSingleton(CORINFO_FIELD_HANDLE fieldHnd);
+
+    // This is a special distinguished FieldSeqNode indicating that a constant does *not*
+    // represent a valid field sequence.  This is "infectious", in the sense that appending it
+    // (on either side) to any field sequence yields the "NotAField()" sequence.
+    static FieldSeqNode* NotAField()
+    {
+        return &s_notAField;
+    }
+
+    // Returns the (canonical in the store) field sequence representing the concatenation of
+    // the sequences represented by "a" and "b".  Assumes that "a" and "b" are canonical; that is,
+    // they are the results of CreateSingleton, NotAField, or Append calls.  If either of the arguments
+    // are the "NotAField" value, so is the result.
+    FieldSeqNode* Append(FieldSeqNode* a, FieldSeqNode* b);
+
+    // We have a few "pseudo" field handles:
+
+    // This treats the constant offset of the first element of something as if it were a field.
+    // Works for method table offsets of boxed structs, or first elem offset of arrays/strings.
+    static CORINFO_FIELD_HANDLE FirstElemPseudoField;
+
+    // If there is a constant index, we make a psuedo field to correspond to the constant added to
+    // offset of the indexed field.  This keeps the field sequence structure "normalized", especially in the
+    // case where the element type is a struct, so we might add a further struct field offset.
+    static CORINFO_FIELD_HANDLE ConstantIndexPseudoField;
+
+    static bool IsPseudoField(CORINFO_FIELD_HANDLE hnd)
+    {
+        return hnd == FirstElemPseudoField || hnd == ConstantIndexPseudoField;
+    }
+};
+
+class GenTreeUseEdgeIterator;
+class GenTreeOperandIterator;
+
+/*****************************************************************************/
+
+typedef struct GenTree* GenTreePtr;
+struct GenTreeArgList;
+
+// Forward declarations of the subtypes
+#define GTSTRUCT_0(fn, en) struct GenTree##fn;
+#define GTSTRUCT_1(fn, en) struct GenTree##fn;
+#define GTSTRUCT_2(fn, en, en2) struct GenTree##fn;
+#define GTSTRUCT_3(fn, en, en2, en3) struct GenTree##fn;
+#define GTSTRUCT_4(fn, en, en2, en3, en4) struct GenTree##fn;
+#define GTSTRUCT_N(fn, ...) struct GenTree##fn;
+#include "gtstructs.h"
+
+/*****************************************************************************/
+
+#ifndef _HOST_64BIT_
+#include <pshpack4.h>
+#endif
+
+struct GenTree
+{
+// We use GT_STRUCT_0 only for the category of simple ops.
+#define GTSTRUCT_0(fn, en)                                                                                             \
+    GenTree##fn* As##fn()                                                                                              \
+    {                                                                                                                  \
+        assert(this->OperIsSimple());                                                                                  \
+        return reinterpret_cast<GenTree##fn*>(this);                                                                   \
+    }                                                                                                                  \
+    GenTree##fn& As##fn##Ref()                                                                                         \
+    {                                                                                                                  \
+        return *As##fn();                                                                                              \
+    }                                                                                                                  \
+    __declspec(property(get = As##fn##Ref)) GenTree##fn& gt##fn;
+#define GTSTRUCT_1(fn, en)                                                                                             \
+    GenTree##fn* As##fn()                                                                                              \
+    {                                                                                                                  \
+        assert(this->gtOper == en);                                                                                    \
+        return reinterpret_cast<GenTree##fn*>(this);                                                                   \
+    }                                                                                                                  \
+    GenTree##fn& As##fn##Ref()                                                                                         \
+    {                                                                                                                  \
+        return *As##fn();                                                                                              \
+    }                                                                                                                  \
+    __declspec(property(get = As##fn##Ref)) GenTree##fn& gt##fn;
+#define GTSTRUCT_2(fn, en, en2)                                                                                        \
+    GenTree##fn* As##fn()                                                                                              \
+    {                                                                                                                  \
+        assert(this->gtOper == en || this->gtOper == en2);                                                             \
+        return reinterpret_cast<GenTree##fn*>(this);                                                                   \
+    }                                                                                                                  \
+    GenTree##fn& As##fn##Ref()                                                                                         \
+    {                                                                                                                  \
+        return *As##fn();                                                                                              \
+    }                                                                                                                  \
+    __declspec(property(get = As##fn##Ref)) GenTree##fn& gt##fn;
+#define GTSTRUCT_3(fn, en, en2, en3)                                                                                   \
+    GenTree##fn* As##fn()                                                                                              \
+    {                                                                                                                  \
+        assert(this->gtOper == en || this->gtOper == en2 || this->gtOper == en3);                                      \
+        return reinterpret_cast<GenTree##fn*>(this);                                                                   \
+    }                                                                                                                  \
+    GenTree##fn& As##fn##Ref()                                                                                         \
+    {                                                                                                                  \
+        return *As##fn();                                                                                              \
+    }                                                                                                                  \
+    __declspec(property(get = As##fn##Ref)) GenTree##fn& gt##fn;
+
+#define GTSTRUCT_4(fn, en, en2, en3, en4)                                                                              \
+    GenTree##fn* As##fn()                                                                                              \
+    {                                                                                                                  \
+        assert(this->gtOper == en || this->gtOper == en2 || this->gtOper == en3 || this->gtOper == en4);               \
+        return reinterpret_cast<GenTree##fn*>(this);                                                                   \
+    }                                                                                                                  \
+    GenTree##fn& As##fn##Ref()                                                                                         \
+    {                                                                                                                  \
+        return *As##fn();                                                                                              \
+    }                                                                                                                  \
+    __declspec(property(get = As##fn##Ref)) GenTree##fn& gt##fn;
+
+#ifdef DEBUG
+// VC does not optimize out this loop in retail even though the value it computes is unused
+// so we need a separate version for non-debug
+#define GTSTRUCT_N(fn, ...)                                                                                            \
+    GenTree##fn* As##fn()                                                                                              \
+    {                                                                                                                  \
+        genTreeOps validOps[] = {__VA_ARGS__};                                                                         \
+        bool       found      = false;                                                                                 \
+        for (unsigned i = 0; i < ArrLen(validOps); i++)                                                                \
+        {                                                                                                              \
+            if (this->gtOper == validOps[i])                                                                           \
+            {                                                                                                          \
+                found = true;                                                                                          \
+                break;                                                                                                 \
+            }                                                                                                          \
+        }                                                                                                              \
+        assert(found);                                                                                                 \
+        return reinterpret_cast<GenTree##fn*>(this);                                                                   \
+    }                                                                                                                  \
+    GenTree##fn& As##fn##Ref()                                                                                         \
+    {                                                                                                                  \
+        return *As##fn();                                                                                              \
+    }                                                                                                                  \
+    __declspec(property(get = As##fn##Ref)) GenTree##fn& gt##fn;
+#else
+#define GTSTRUCT_N(fn, ...)                                                                                            \
+    GenTree##fn* As##fn()                                                                                              \
+    {                                                                                                                  \
+        return reinterpret_cast<GenTree##fn*>(this);                                                                   \
+    }                                                                                                                  \
+    GenTree##fn& As##fn##Ref()                                                                                         \
+    {                                                                                                                  \
+        return *As##fn();                                                                                              \
+    }                                                                                                                  \
+    __declspec(property(get = As##fn##Ref)) GenTree##fn& gt##fn;
+#endif
+
+#include "gtstructs.h"
+
+    genTreeOps gtOper; // enum subtype BYTE
+    var_types  gtType; // enum subtype BYTE
+
+    genTreeOps OperGet() const
+    {
+        return gtOper;
+    }
+    var_types TypeGet() const
+    {
+        return gtType;
+    }
+
+#ifdef DEBUG
+    genTreeOps gtOperSave; // Only used to save gtOper when we destroy a node, to aid debugging.
+#endif
+
+#if FEATURE_ANYCSE
+
+#define NO_CSE (0)
+
+#define IS_CSE_INDEX(x) (x != 0)
+#define IS_CSE_USE(x) (x > 0)
+#define IS_CSE_DEF(x) (x < 0)
+#define GET_CSE_INDEX(x) ((x > 0) ? x : -x)
+#define TO_CSE_DEF(x) (-x)
+
+    signed char gtCSEnum; // 0 or the CSE index (negated if def)
+                          // valid only for CSE expressions
+
+#endif // FEATURE_ANYCSE
+
+    unsigned char gtLIRFlags; // Used for nodes that are in LIR. See LIR::Flags in lir.h for the various flags.
+
+#if ASSERTION_PROP
+    unsigned short gtAssertionNum; // 0 or Assertion table index
+                                   // valid only for non-GT_STMT nodes
+
+    bool HasAssertion() const
+    {
+        return gtAssertionNum != 0;
+    }
+    void ClearAssertion()
+    {
+        gtAssertionNum = 0;
+    }
+
+    unsigned short GetAssertion() const
+    {
+        return gtAssertionNum;
+    }
+    void SetAssertion(unsigned short value)
+    {
+        assert((unsigned short)value == value);
+        gtAssertionNum = (unsigned short)value;
+    }
+
+#endif
+
+#if FEATURE_STACK_FP_X87
+    unsigned char gtFPlvl; // x87 stack depth at this node
+    void gtCopyFPlvl(GenTree* other)
+    {
+        gtFPlvl = other->gtFPlvl;
+    }
+    void gtSetFPlvl(unsigned level)
+    {
+        noway_assert(FitsIn<unsigned char>(level));
+        gtFPlvl = (unsigned char)level;
+    }
+#else  // FEATURE_STACK_FP_X87
+    void gtCopyFPlvl(GenTree* other)
+    {
+    }
+    void gtSetFPlvl(unsigned level)
+    {
+    }
+#endif // FEATURE_STACK_FP_X87
+
+    //
+    // Cost metrics on the node. Don't allow direct access to the variable for setting.
+    //
+
+public:
+#ifdef DEBUG
+    // You are not allowed to read the cost values before they have been set in gtSetEvalOrder().
+    // Keep track of whether the costs have been initialized, and assert if they are read before being initialized.
+    // Obviously, this information does need to be initialized when a node is created.
+    // This is public so the dumpers can see it.
+
+    bool gtCostsInitialized;
+#endif // DEBUG
+
+#define MAX_COST UCHAR_MAX
+#define IND_COST_EX 3 // execution cost for an indirection
+
+    __declspec(property(get = GetCostEx)) unsigned char gtCostEx; // estimate of expression execution cost
+
+    __declspec(property(get = GetCostSz)) unsigned char gtCostSz; // estimate of expression code size cost
+
+    unsigned char GetCostEx() const
+    {
+        assert(gtCostsInitialized);
+        return _gtCostEx;
+    }
+    unsigned char GetCostSz() const
+    {
+        assert(gtCostsInitialized);
+        return _gtCostSz;
+    }
+
+    // Set the costs. They are always both set at the same time.
+    // Don't use the "put" property: force calling this function, to make it more obvious in the few places
+    // that set the values.
+    // Note that costs are only set in gtSetEvalOrder() and its callees.
+    void SetCosts(unsigned costEx, unsigned costSz)
+    {
+        assert(costEx != (unsigned)-1); // looks bogus
+        assert(costSz != (unsigned)-1); // looks bogus
+        INDEBUG(gtCostsInitialized = true;)
+
+        _gtCostEx = (costEx > MAX_COST) ? MAX_COST : (unsigned char)costEx;
+        _gtCostSz = (costSz > MAX_COST) ? MAX_COST : (unsigned char)costSz;
+    }
+
+    // Opimized copy function, to avoid the SetCosts() function comparisons, and make it more clear that a node copy is
+    // happening.
+    void CopyCosts(const GenTree* const tree)
+    {
+        INDEBUG(gtCostsInitialized =
+                    tree->gtCostsInitialized;) // If the 'tree' costs aren't initialized, we'll hit an assert below.
+        _gtCostEx = tree->gtCostEx;
+        _gtCostSz = tree->gtCostSz;
+    }
+
+    // Same as CopyCosts, but avoids asserts if the costs we are copying have not been initialized.
+    // This is because the importer, for example, clones nodes, before these costs have been initialized.
+    // Note that we directly access the 'tree' costs, not going through the accessor functions (either
+    // directly or through the properties).
+    void CopyRawCosts(const GenTree* const tree)
+    {
+        INDEBUG(gtCostsInitialized = tree->gtCostsInitialized;)
+        _gtCostEx = tree->_gtCostEx;
+        _gtCostSz = tree->_gtCostSz;
+    }
+
+private:
+    unsigned char _gtCostEx; // estimate of expression execution cost
+    unsigned char _gtCostSz; // estimate of expression code size cost
+
+    //
+    // Register or register pair number of the node.
+    //
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+public:
+    enum genRegTag
+    {
+        GT_REGTAG_NONE, // Nothing has been assigned to _gtRegNum/_gtRegPair
+        GT_REGTAG_REG,  // _gtRegNum  has been assigned
+#if CPU_LONG_USES_REGPAIR
+        GT_REGTAG_REGPAIR // _gtRegPair has been assigned
+#endif
+    };
+    genRegTag GetRegTag() const
+    {
+#if CPU_LONG_USES_REGPAIR
+        assert(gtRegTag == GT_REGTAG_NONE || gtRegTag == GT_REGTAG_REG || gtRegTag == GT_REGTAG_REGPAIR);
+#else
+        assert(gtRegTag == GT_REGTAG_NONE || gtRegTag == GT_REGTAG_REG);
+#endif
+        return gtRegTag;
+    }
+
+private:
+    genRegTag gtRegTag; // What is in _gtRegNum/_gtRegPair?
+#endif                  // DEBUG
+
+private:
+    union {
+        // NOTE: After LSRA, one of these values may be valid even if GTF_REG_VAL is not set in gtFlags.
+        // They store the register assigned to the node. If a register is not assigned, _gtRegNum is set to REG_NA
+        // or _gtRegPair is set to REG_PAIR_NONE, depending on the node type.
+        regNumberSmall _gtRegNum;  // which register      the value is in
+        regPairNoSmall _gtRegPair; // which register pair the value is in
+    };
+
+public:
+    // The register number is stored in a small format (8 bits), but the getters return and the setters take
+    // a full-size (unsigned) format, to localize the casts here.
+
+    __declspec(property(get = GetRegNum, put = SetRegNum)) regNumber gtRegNum;
+
+    // for codegen purposes, is this node a subnode of its parent
+    bool isContained() const;
+
+    bool isContainedIndir() const;
+
+    bool isIndirAddrMode();
+
+    bool isIndir() const;
+
+    bool isContainedIntOrIImmed() const
+    {
+        return isContained() && IsCnsIntOrI();
+    }
+
+    bool isContainedFltOrDblImmed() const
+    {
+        return isContained() && (OperGet() == GT_CNS_DBL);
+    }
+
+    bool isLclField() const
+    {
+        return OperGet() == GT_LCL_FLD || OperGet() == GT_STORE_LCL_FLD;
+    }
+
+    bool isContainedLclField() const
+    {
+        return isContained() && isLclField();
+    }
+
+    bool isContainedLclVar() const
+    {
+        return isContained() && (OperGet() == GT_LCL_VAR);
+    }
+
+    bool isContainedSpillTemp() const;
+
+    // Indicates whether it is a memory op.
+    // Right now it includes Indir and LclField ops.
+    bool isMemoryOp() const
+    {
+        return isIndir() || isLclField();
+    }
+
+    bool isContainedMemoryOp() const
+    {
+        return (isContained() && isMemoryOp()) || isContainedLclVar() || isContainedSpillTemp();
+    }
+
+    regNumber GetRegNum() const
+    {
+        assert((gtRegTag == GT_REGTAG_REG) || (gtRegTag == GT_REGTAG_NONE)); // TODO-Cleanup: get rid of the NONE case,
+                                                                             // and fix everyplace that reads undefined
+                                                                             // values
+        regNumber reg = (regNumber)_gtRegNum;
+        assert((gtRegTag == GT_REGTAG_NONE) || // TODO-Cleanup: get rid of the NONE case, and fix everyplace that reads
+                                               // undefined values
+               (reg >= REG_FIRST && reg <= REG_COUNT));
+        return reg;
+    }
+
+    void SetRegNum(regNumber reg)
+    {
+        assert(reg >= REG_FIRST && reg <= REG_COUNT);
+        // Make sure the upper bits of _gtRegPair are clear
+        _gtRegPair = (regPairNoSmall)0;
+        _gtRegNum  = (regNumberSmall)reg;
+        INDEBUG(gtRegTag = GT_REGTAG_REG;)
+        assert(_gtRegNum == reg);
+    }
+
+#if CPU_LONG_USES_REGPAIR
+    __declspec(property(get = GetRegPair, put = SetRegPair)) regPairNo gtRegPair;
+
+    regPairNo GetRegPair() const
+    {
+        assert((gtRegTag == GT_REGTAG_REGPAIR) || (gtRegTag == GT_REGTAG_NONE)); // TODO-Cleanup: get rid of the NONE
+                                                                                 // case, and fix everyplace that reads
+                                                                                 // undefined values
+        regPairNo regPair = (regPairNo)_gtRegPair;
+        assert((gtRegTag == GT_REGTAG_NONE) || // TODO-Cleanup: get rid of the NONE case, and fix everyplace that reads
+                                               // undefined values
+               (regPair >= REG_PAIR_FIRST && regPair <= REG_PAIR_LAST) ||
+               (regPair == REG_PAIR_NONE)); // allow initializing to an undefined value
+        return regPair;
+    }
+
+    void SetRegPair(regPairNo regPair)
+    {
+        assert((regPair >= REG_PAIR_FIRST && regPair <= REG_PAIR_LAST) ||
+               (regPair == REG_PAIR_NONE)); // allow initializing to an undefined value
+        _gtRegPair = (regPairNoSmall)regPair;
+        INDEBUG(gtRegTag = GT_REGTAG_REGPAIR;)
+        assert(_gtRegPair == regPair);
+    }
+#endif
+
+    // Copy the _gtRegNum/_gtRegPair/gtRegTag fields
+    void CopyReg(GenTreePtr from);
+
+    void gtClearReg(Compiler* compiler);
+
+    bool gtHasReg() const;
+
+    regMaskTP gtGetRegMask() const;
+
+    unsigned gtFlags; // see GTF_xxxx below
+
+#if defined(DEBUG)
+    unsigned gtDebugFlags; // see GTF_DEBUG_xxx below
+#endif                     // defined(DEBUG)
+
+    ValueNumPair gtVNPair;
+
+    regMaskSmall gtRsvdRegs; // set of fixed trashed  registers
+#ifdef LEGACY_BACKEND
+    regMaskSmall gtUsedRegs; // set of used (trashed) registers
+#endif                       // LEGACY_BACKEND
+
+#ifndef LEGACY_BACKEND
+    TreeNodeInfo gtLsraInfo;
+#endif // !LEGACY_BACKEND
+
+    void SetVNsFromNode(GenTreePtr tree)
+    {
+        gtVNPair = tree->gtVNPair;
+    }
+
+    ValueNum GetVN(ValueNumKind vnk) const
+    {
+        if (vnk == VNK_Liberal)
+        {
+            return gtVNPair.GetLiberal();
+        }
+        else
+        {
+            assert(vnk == VNK_Conservative);
+            return gtVNPair.GetConservative();
+        }
+    }
+    void SetVN(ValueNumKind vnk, ValueNum vn)
+    {
+        if (vnk == VNK_Liberal)
+        {
+            return gtVNPair.SetLiberal(vn);
+        }
+        else
+        {
+            assert(vnk == VNK_Conservative);
+            return gtVNPair.SetConservative(vn);
+        }
+    }
+    void SetVNs(ValueNumPair vnp)
+    {
+        gtVNPair = vnp;
+    }
+    void ClearVN()
+    {
+        gtVNPair = ValueNumPair(); // Initializes both elements to "NoVN".
+    }
+
+//---------------------------------------------------------------------
+//  The first set of flags can be used with a large set of nodes, and
+//  thus they must all have distinct values. That is, one can test any
+//  expression node for one of these flags.
+//---------------------------------------------------------------------
+
+#define GTF_ASG 0x00000001           // sub-expression contains an assignment
+#define GTF_CALL 0x00000002          // sub-expression contains a  func. call
+#define GTF_EXCEPT 0x00000004        // sub-expression might throw an exception
+#define GTF_GLOB_REF 0x00000008      // sub-expression uses global variable(s)
+#define GTF_ORDER_SIDEEFF 0x00000010 // sub-expression has a re-ordering side effect
+
+// If you set these flags, make sure that code:gtExtractSideEffList knows how to find the tree,
+// otherwise the C# (run csc /o-)
+// var v = side_eff_operation
+// with no use of v will drop your tree on the floor.
+#define GTF_PERSISTENT_SIDE_EFFECTS (GTF_ASG | GTF_CALL)
+#define GTF_SIDE_EFFECT (GTF_PERSISTENT_SIDE_EFFECTS | GTF_EXCEPT)
+#define GTF_GLOB_EFFECT (GTF_SIDE_EFFECT | GTF_GLOB_REF)
+#define GTF_ALL_EFFECT (GTF_GLOB_EFFECT | GTF_ORDER_SIDEEFF)
+
+// The extra flag GTF_IS_IN_CSE is used to tell the consumer of these flags
+// that we are calling in the context of performing a CSE, thus we
+// should allow the run-once side effects of running a class constructor.
+//
+// The only requirement of this flag is that it not overlap any of the
+// side-effect flags. The actual bit used is otherwise arbitrary.
+#define GTF_IS_IN_CSE GTF_BOOLEAN
+#define GTF_PERSISTENT_SIDE_EFFECTS_IN_CSE (GTF_ASG | GTF_CALL | GTF_IS_IN_CSE)
+
+// Can any side-effects be observed externally, say by a caller method?
+// For assignments, only assignments to global memory can be observed
+// externally, whereas simple assignments to local variables can not.
+//
+// Be careful when using this inside a "try" protected region as the
+// order of assignments to local variables would need to be preserved
+// wrt side effects if the variables are alive on entry to the
+// "catch/finally" region. In such cases, even assignments to locals
+// will have to be restricted.
+#define GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS(flags)                                                                       \
+    (((flags) & (GTF_CALL | GTF_EXCEPT)) || (((flags) & (GTF_ASG | GTF_GLOB_REF)) == (GTF_ASG | GTF_GLOB_REF)))
+
+#define GTF_REVERSE_OPS                                                                                                \
+    0x00000020 // operand op2 should be evaluated before op1 (normally, op1 is evaluated first and op2 is evaluated
+               // second)
+#define GTF_REG_VAL                                                                                                    \
+    0x00000040 // operand is sitting in a register (or part of a TYP_LONG operand is sitting in a register)
+
+#define GTF_SPILLED 0x00000080 // the value has been spilled
+
+#ifdef LEGACY_BACKEND
+#define GTF_SPILLED_OPER 0x00000100 // op1 has been spilled
+#define GTF_SPILLED_OP2 0x00000200  // op2 has been spilled
+#else
+#define GTF_NOREG_AT_USE 0x00000100 // tree node is in memory at the point of use
+#endif                              // LEGACY_BACKEND
+
+#define GTF_ZSF_SET 0x00000400 // the zero(ZF) and sign(SF) flags set to the operand
+#if FEATURE_SET_FLAGS
+#define GTF_SET_FLAGS 0x00000800 // Requires that codegen for this node set the flags
+                                 // Use gtSetFlags() to check this flags
+#endif
+#define GTF_IND_NONFAULTING 0x00000800 // An indir that cannot fault.  GTF_SET_FLAGS is not used on indirs
+
+#define GTF_MAKE_CSE 0x00002000   // Hoisted Expression: try hard to make this into CSE  (see optPerformHoistExpr)
+#define GTF_DONT_CSE 0x00004000   // don't bother CSE'ing this expr
+#define GTF_COLON_COND 0x00008000 // this node is conditionally executed (part of ? :)
+
+#define GTF_NODE_MASK (GTF_COLON_COND)
+
+#define GTF_BOOLEAN 0x00040000 // value is known to be 0/1
+
+#define GTF_SMALL_OK 0x00080000 // actual small int sufficient
+
+#define GTF_UNSIGNED 0x00100000 // with GT_CAST:   the source operand is an unsigned type
+                                // with operators: the specified node is an unsigned operator
+
+#define GTF_LATE_ARG                                                                                                   \
+    0x00200000 // the specified node is evaluated to a temp in the arg list, and this temp is added to gtCallLateArgs.
+
+#define GTF_SPILL 0x00400000      // needs to be spilled here
+#define GTF_SPILL_HIGH 0x00040000 // shared with GTF_BOOLEAN
+
+#define GTF_COMMON_MASK 0x007FFFFF // mask of all the flags above
+
+#define GTF_REUSE_REG_VAL 0x00800000 // This is set by the register allocator on nodes whose value already exists in the
+                                     // register assigned to this node, so the code generator does not have to generate
+                                     // code to produce the value.
+                                     // It is currently used only on constant nodes.
+// It CANNOT be set on var (GT_LCL*) nodes, or on indir (GT_IND or GT_STOREIND) nodes, since
+// it is not needed for lclVars and is highly unlikely to be useful for indir nodes
+
+//---------------------------------------------------------------------
+//  The following flags can be used only with a small set of nodes, and
+//  thus their values need not be distinct (other than within the set
+//  that goes with a particular node/nodes, of course). That is, one can
+//  only test for one of these flags if the 'gtOper' value is tested as
+//  well to make sure it's the right operator for the particular flag.
+//---------------------------------------------------------------------
+
+// NB: GTF_VAR_* and GTF_REG_* share the same namespace of flags, because
+// GT_LCL_VAR nodes may be changed to GT_REG_VAR nodes without resetting
+// the flags. These are also used by GT_LCL_FLD.
+#define GTF_VAR_DEF 0x80000000      // GT_LCL_VAR -- this is a definition
+#define GTF_VAR_USEASG 0x40000000   // GT_LCL_VAR -- this is a use/def for a x<op>=y
+#define GTF_VAR_USEDEF 0x20000000   // GT_LCL_VAR -- this is a use/def as in x=x+y (only the lhs x is tagged)
+#define GTF_VAR_CAST 0x10000000     // GT_LCL_VAR -- has been explictly cast (variable node may not be type of local)
+#define GTF_VAR_ITERATOR 0x08000000 // GT_LCL_VAR -- this is a iterator reference in the loop condition
+#define GTF_VAR_CLONED 0x01000000   // GT_LCL_VAR -- this node has been cloned or is a clone
+                                    // Relevant for inlining optimizations (see fgInlinePrependStatements)
+
+// TODO-Cleanup: Currently, GTF_REG_BIRTH is used only by stackfp
+//         We should consider using it more generally for VAR_BIRTH, instead of
+//         GTF_VAR_DEF && !GTF_VAR_USEASG
+#define GTF_REG_BIRTH 0x04000000 // GT_REG_VAR -- enregistered variable born here
+#define GTF_VAR_DEATH 0x02000000 // GT_LCL_VAR, GT_REG_VAR -- variable dies here (last use)
+
+#define GTF_VAR_ARR_INDEX 0x00000020 // The variable is part of (the index portion of) an array index expression.
+                                     // Shares a value with GTF_REVERSE_OPS, which is meaningless for local var.
+
+#define GTF_LIVENESS_MASK (GTF_VAR_DEF | GTF_VAR_USEASG | GTF_VAR_USEDEF | GTF_REG_BIRTH | GTF_VAR_DEATH)
+
+#define GTF_CALL_UNMANAGED 0x80000000        // GT_CALL    -- direct call to unmanaged code
+#define GTF_CALL_INLINE_CANDIDATE 0x40000000 // GT_CALL -- this call has been marked as an inline candidate
+
+#define GTF_CALL_VIRT_KIND_MASK 0x30000000
+#define GTF_CALL_NONVIRT 0x00000000     // GT_CALL    -- a non virtual call
+#define GTF_CALL_VIRT_STUB 0x10000000   // GT_CALL    -- a stub-dispatch virtual call
+#define GTF_CALL_VIRT_VTABLE 0x20000000 // GT_CALL    -- a  vtable-based virtual call
+
+#define GTF_CALL_NULLCHECK 0x08000000 // GT_CALL    -- must check instance pointer for null
+#define GTF_CALL_POP_ARGS 0x04000000  // GT_CALL    -- caller pop arguments?
+#define GTF_CALL_HOISTABLE 0x02000000 // GT_CALL    -- call is hoistable
+#define GTF_CALL_REG_SAVE 0x01000000  // GT_CALL    -- This call preserves all integer regs
+                                      // For additional flags for GT_CALL node see GTF_CALL_M_
+
+#define GTF_NOP_DEATH 0x40000000 // GT_NOP     -- operand dies here
+
+#define GTF_FLD_NULLCHECK 0x80000000 // GT_FIELD -- need to nullcheck the "this" pointer
+#define GTF_FLD_VOLATILE 0x40000000  // GT_FIELD/GT_CLS_VAR -- same as GTF_IND_VOLATILE
+
+#define GTF_INX_RNGCHK 0x80000000        // GT_INDEX -- the array reference should be range-checked.
+#define GTF_INX_REFARR_LAYOUT 0x20000000 // GT_INDEX -- same as GTF_IND_REFARR_LAYOUT
+#define GTF_INX_STRING_LAYOUT 0x40000000 // GT_INDEX -- this uses the special string array layout
+
+#define GTF_IND_VOLATILE 0x40000000      // GT_IND   -- the load or store must use volatile sematics (this is a nop
+                                         //             on X86)
+#define GTF_IND_REFARR_LAYOUT 0x20000000 // GT_IND   -- the array holds object refs (only effects layout of Arrays)
+#define GTF_IND_TGTANYWHERE 0x10000000   // GT_IND   -- the target could be anywhere
+#define GTF_IND_TLS_REF 0x08000000       // GT_IND   -- the target is accessed via TLS
+#define GTF_IND_ASG_LHS 0x04000000       // GT_IND   -- this GT_IND node is (the effective val) of the LHS of an
+                                         //             assignment; don't evaluate it independently.
+#define GTF_IND_UNALIGNED 0x02000000     // GT_IND   -- the load or store is unaligned (we assume worst case
+                                         //             alignment of 1 byte)
+#define GTF_IND_INVARIANT 0x01000000     // GT_IND   -- the target is invariant (a prejit indirection)
+#define GTF_IND_ARR_LEN 0x80000000       // GT_IND   -- the indirection represents an array length (of the REF
+                                         //             contribution to its argument).
+#define GTF_IND_ARR_INDEX 0x00800000     // GT_IND   -- the indirection represents an (SZ) array index
+
+#define GTF_IND_FLAGS                                                                                                  \
+    (GTF_IND_VOLATILE | GTF_IND_REFARR_LAYOUT | GTF_IND_TGTANYWHERE | GTF_IND_NONFAULTING | GTF_IND_TLS_REF |          \
+     GTF_IND_UNALIGNED | GTF_IND_INVARIANT | GTF_IND_ARR_INDEX)
+
+#define GTF_CLS_VAR_ASG_LHS 0x04000000 // GT_CLS_VAR   -- this GT_CLS_VAR node is (the effective val) of the LHS
+                                       //                 of an assignment; don't evaluate it independently.
+
+#define GTF_ADDR_ONSTACK 0x80000000 // GT_ADDR    -- this expression is guaranteed to be on the stack
+
+#define GTF_ADDRMODE_NO_CSE 0x80000000 // GT_ADD/GT_MUL/GT_LSH -- Do not CSE this node only, forms complex
+                                       //                         addressing mode
+
+#define GTF_MUL_64RSLT 0x40000000 // GT_MUL     -- produce 64-bit result
+
+#define GTF_MOD_INT_RESULT 0x80000000 // GT_MOD,    -- the real tree represented by this
+                                      // GT_UMOD       node evaluates to an int even though
+                                      //               its type is long.  The result is
+                                      //               placed in the low member of the
+                                      //               reg pair
+
+#define GTF_RELOP_NAN_UN 0x80000000   // GT_<relop> -- Is branch taken if ops are NaN?
+#define GTF_RELOP_JMP_USED 0x40000000 // GT_<relop> -- result of compare used for jump or ?:
+#define GTF_RELOP_QMARK 0x20000000    // GT_<relop> -- the node is the condition for ?:
+#define GTF_RELOP_SMALL 0x10000000    // GT_<relop> -- We should use a byte or short sized compare (op1->gtType
+                                      //               is the small type)
+#define GTF_RELOP_ZTT 0x08000000      // GT_<relop> -- Loop test cloned for converting while-loops into do-while
+                                      //               with explicit "loop test" in the header block.
+
+#define GTF_QMARK_CAST_INSTOF 0x80000000 // GT_QMARK -- Is this a top (not nested) level qmark created for
+                                         //             castclass or instanceof?
+
+#define GTF_BOX_VALUE 0x80000000 // GT_BOX -- "box" is on a value type
+
+#define GTF_ICON_HDL_MASK 0xF0000000 // Bits used by handle types below
+
+#define GTF_ICON_SCOPE_HDL 0x10000000  // GT_CNS_INT -- constant is a scope handle
+#define GTF_ICON_CLASS_HDL 0x20000000  // GT_CNS_INT -- constant is a class handle
+#define GTF_ICON_METHOD_HDL 0x30000000 // GT_CNS_INT -- constant is a method handle
+#define GTF_ICON_FIELD_HDL 0x40000000  // GT_CNS_INT -- constant is a field handle
+#define GTF_ICON_STATIC_HDL 0x50000000 // GT_CNS_INT -- constant is a handle to static data
+#define GTF_ICON_STR_HDL 0x60000000    // GT_CNS_INT -- constant is a string handle
+#define GTF_ICON_PSTR_HDL 0x70000000   // GT_CNS_INT -- constant is a ptr to a string handle
+#define GTF_ICON_PTR_HDL 0x80000000    // GT_CNS_INT -- constant is a ldptr handle
+#define GTF_ICON_VARG_HDL 0x90000000   // GT_CNS_INT -- constant is a var arg cookie handle
+#define GTF_ICON_PINVKI_HDL 0xA0000000 // GT_CNS_INT -- constant is a pinvoke calli handle
+#define GTF_ICON_TOKEN_HDL 0xB0000000  // GT_CNS_INT -- constant is a token handle
+#define GTF_ICON_TLS_HDL 0xC0000000    // GT_CNS_INT -- constant is a TLS ref with offset
+#define GTF_ICON_FTN_ADDR 0xD0000000   // GT_CNS_INT -- constant is a function address
+#define GTF_ICON_CIDMID_HDL 0xE0000000 // GT_CNS_INT -- constant is a class or module ID handle
+#define GTF_ICON_BBC_PTR 0xF0000000    // GT_CNS_INT -- constant is a basic block count pointer
+
+#define GTF_ICON_FIELD_OFF 0x08000000 // GT_CNS_INT -- constant is a field offset
+
+#define GTF_BLK_VOLATILE 0x40000000  // GT_ASG, GT_STORE_BLK, GT_STORE_OBJ, GT_STORE_DYNBLK
+                                     // -- is a volatile block operation
+#define GTF_BLK_UNALIGNED 0x02000000 // GT_ASG, GT_STORE_BLK, GT_STORE_OBJ, GT_STORE_DYNBLK
+                                     // -- is an unaligned block operation
+#define GTF_BLK_INIT 0x01000000 // GT_ASG, GT_STORE_BLK, GT_STORE_OBJ, GT_STORE_DYNBLK -- is an init block operation
+
+#define GTF_OVERFLOW 0x10000000 // GT_ADD, GT_SUB, GT_MUL, - Need overflow check
+                                // GT_ASG_ADD, GT_ASG_SUB,
+                                // GT_CAST
+                                // Use gtOverflow(Ex)() to check this flag
+
+#define GTF_NO_OP_NO 0x80000000 // GT_NO_OP   --Have the codegenerator generate a special nop
+
+#define GTF_ARR_BOUND_INBND 0x80000000 // GT_ARR_BOUNDS_CHECK -- have proved this check is always in-bounds
+
+#define GTF_ARRLEN_ARR_IDX 0x80000000 // GT_ARR_LENGTH -- Length which feeds into an array index expression
+
+#define GTF_LIST_AGGREGATE 0x80000000 // GT_LIST -- Indicates that this list should be treated as an
+                                      //            anonymous aggregate value (e.g. a multi-value argument).
+
+//----------------------------------------------------------------
+
+#define GTF_STMT_CMPADD 0x80000000  // GT_STMT    -- added by compiler
+#define GTF_STMT_HAS_CSE 0x40000000 // GT_STMT    -- CSE def or use was subsituted
+
+//----------------------------------------------------------------
+
+#if defined(DEBUG)
+#define GTF_DEBUG_NONE 0x00000000 // No debug flags.
+
+#define GTF_DEBUG_NODE_MORPHED 0x00000001 // the node has been morphed (in the global morphing phase)
+#define GTF_DEBUG_NODE_SMALL 0x00000002
+#define GTF_DEBUG_NODE_LARGE 0x00000004
+
+#define GTF_DEBUG_NODE_MASK 0x00000007 // These flags are all node (rather than operation) properties.
+
+#define GTF_DEBUG_VAR_CSE_REF 0x00800000 // GT_LCL_VAR -- This is a CSE LCL_VAR node
+#endif                                   // defined(DEBUG)
+
+    GenTreePtr gtNext;
+    GenTreePtr gtPrev;
+
+#ifdef DEBUG
+    unsigned gtTreeID;
+    unsigned gtSeqNum; // liveness traversal order within the current statement
+#endif
+
+    static const unsigned short gtOperKindTable[];
+
+    static unsigned OperKind(unsigned gtOper)
+    {
+        assert(gtOper < GT_COUNT);
+
+        return gtOperKindTable[gtOper];
+    }
+
+    unsigned OperKind() const
+    {
+        assert(gtOper < GT_COUNT);
+
+        return gtOperKindTable[gtOper];
+    }
+
+    static bool IsExOp(unsigned opKind)
+    {
+        return (opKind & GTK_EXOP) != 0;
+    }
+    // Returns the operKind with the GTK_EX_OP bit removed (the
+    // kind of operator, unary or binary, that is extended).
+    static unsigned StripExOp(unsigned opKind)
+    {
+        return opKind & ~GTK_EXOP;
+    }
+
+    bool IsValue() const
+    {
+        if ((OperKind(gtOper) & GTK_NOVALUE) != 0)
+        {
+            return false;
+        }
+
+        if (gtOper == GT_NOP || gtOper == GT_CALL)
+        {
+            return gtType != TYP_VOID;
+        }
+
+        if (gtOper == GT_LIST)
+        {
+            return (gtFlags & GTF_LIST_AGGREGATE) != 0;
+        }
+
+        return true;
+    }
+
+    bool IsLIR() const
+    {
+        if ((OperKind(gtOper) & GTK_NOTLIR) != 0)
+        {
+            return false;
+        }
+
+        switch (gtOper)
+        {
+            case GT_NOP:
+                // NOPs may only be present in LIR if they do not produce a value.
+                return IsNothingNode();
+
+            case GT_ARGPLACE:
+                // ARGPLACE nodes may not be present in a block's LIR sequence, but they may
+                // be present as children of an LIR node.
+                return (gtNext == nullptr) && (gtPrev == nullptr);
+
+            case GT_LIST:
+                // LIST nodes may only be present in an LIR sequence if they represent aggregates.
+                // They are always allowed, however, as children of an LIR node.
+                return ((gtFlags & GTF_LIST_AGGREGATE) != 0) || ((gtNext == nullptr) && (gtPrev == nullptr));
+
+            case GT_ADDR:
+            {
+                // ADDR ndoes may only be present in LIR if the location they refer to is not a
+                // local, class variable, or IND node.
+                GenTree*   location   = const_cast<GenTree*>(this)->gtGetOp1();
+                genTreeOps locationOp = location->OperGet();
+                return !location->IsLocal() && (locationOp != GT_CLS_VAR) && (locationOp != GT_IND);
+            }
+
+            default:
+                // All other nodes are assumed to be correct.
+                return true;
+        }
+    }
+
+    static bool OperIsConst(genTreeOps gtOper)
+    {
+        return (OperKind(gtOper) & GTK_CONST) != 0;
+    }
+
+    bool OperIsConst() const
+    {
+        return (OperKind(gtOper) & GTK_CONST) != 0;
+    }
+
+    static bool OperIsLeaf(genTreeOps gtOper)
+    {
+        return (OperKind(gtOper) & GTK_LEAF) != 0;
+    }
+
+    bool OperIsLeaf() const
+    {
+        return (OperKind(gtOper) & GTK_LEAF) != 0;
+    }
+
+    static bool OperIsCompare(genTreeOps gtOper)
+    {
+        return (OperKind(gtOper) & GTK_RELOP) != 0;
+    }
+
+    static bool OperIsLocal(genTreeOps gtOper)
+    {
+        bool result = (OperKind(gtOper) & GTK_LOCAL) != 0;
+        assert(result == (gtOper == GT_LCL_VAR || gtOper == GT_PHI_ARG || gtOper == GT_REG_VAR ||
+                          gtOper == GT_LCL_FLD || gtOper == GT_STORE_LCL_VAR || gtOper == GT_STORE_LCL_FLD));
+        return result;
+    }
+
+    static bool OperIsLocalAddr(genTreeOps gtOper)
+    {
+        return (gtOper == GT_LCL_VAR_ADDR || gtOper == GT_LCL_FLD_ADDR);
+    }
+
+    static bool OperIsLocalField(genTreeOps gtOper)
+    {
+        return (gtOper == GT_LCL_FLD || gtOper == GT_LCL_FLD_ADDR || gtOper == GT_STORE_LCL_FLD);
+    }
+
+    inline bool OperIsLocalField() const
+    {
+        return OperIsLocalField(gtOper);
+    }
+
+    static bool OperIsScalarLocal(genTreeOps gtOper)
+    {
+        return (gtOper == GT_LCL_VAR || gtOper == GT_REG_VAR || gtOper == GT_STORE_LCL_VAR);
+    }
+
+    static bool OperIsNonPhiLocal(genTreeOps gtOper)
+    {
+        return OperIsLocal(gtOper) && (gtOper != GT_PHI_ARG);
+    }
+
+    static bool OperIsLocalRead(genTreeOps gtOper)
+    {
+        return (OperIsLocal(gtOper) && !OperIsLocalStore(gtOper));
+    }
+
+    static bool OperIsLocalStore(genTreeOps gtOper)
+    {
+        return (gtOper == GT_STORE_LCL_VAR || gtOper == GT_STORE_LCL_FLD);
+    }
+
+    static bool OperIsAddrMode(genTreeOps gtOper)
+    {
+        return (gtOper == GT_LEA);
+    }
+
+    bool OperIsBlkOp();
+    bool OperIsCopyBlkOp();
+    bool OperIsInitBlkOp();
+    bool OperIsDynBlkOp();
+
+    static bool OperIsBlk(genTreeOps gtOper)
+    {
+        return ((gtOper == GT_BLK) || (gtOper == GT_OBJ) || (gtOper == GT_DYN_BLK) || (gtOper == GT_STORE_BLK) ||
+                (gtOper == GT_STORE_OBJ) || (gtOper == GT_STORE_DYN_BLK));
+    }
+
+    bool OperIsBlk() const
+    {
+        return OperIsBlk(OperGet());
+    }
+
+    static bool OperIsStoreBlk(genTreeOps gtOper)
+    {
+        return ((gtOper == GT_STORE_BLK) || (gtOper == GT_STORE_OBJ) || (gtOper == GT_STORE_DYN_BLK));
+    }
+
+    bool OperIsStoreBlk() const
+    {
+        return OperIsStoreBlk(OperGet());
+    }
+
+    bool OperIsPutArgStk() const
+    {
+        return gtOper == GT_PUTARG_STK;
+    }
+
+    bool OperIsPutArgReg() const
+    {
+        return gtOper == GT_PUTARG_REG;
+    }
+
+    bool OperIsPutArg() const
+    {
+        return OperIsPutArgStk() || OperIsPutArgReg();
+    }
+
+    bool OperIsAddrMode() const
+    {
+        return OperIsAddrMode(OperGet());
+    }
+
+    bool OperIsLocal() const
+    {
+        return OperIsLocal(OperGet());
+    }
+
+    bool OperIsLocalAddr() const
+    {
+        return OperIsLocalAddr(OperGet());
+    }
+
+    bool OperIsScalarLocal() const
+    {
+        return OperIsScalarLocal(OperGet());
+    }
+
+    bool OperIsNonPhiLocal() const
+    {
+        return OperIsNonPhiLocal(OperGet());
+    }
+
+    bool OperIsLocalStore() const
+    {
+        return OperIsLocalStore(OperGet());
+    }
+
+    bool OperIsLocalRead() const
+    {
+        return OperIsLocalRead(OperGet());
+    }
+
+    bool OperIsCompare()
+    {
+        return (OperKind(gtOper) & GTK_RELOP) != 0;
+    }
+
+    static bool OperIsLogical(genTreeOps gtOper)
+    {
+        return (OperKind(gtOper) & GTK_LOGOP) != 0;
+    }
+
+    bool OperIsLogical() const
+    {
+        return (OperKind(gtOper) & GTK_LOGOP) != 0;
+    }
+
+    static bool OperIsShift(genTreeOps gtOper)
+    {
+        return (gtOper == GT_LSH) || (gtOper == GT_RSH) || (gtOper == GT_RSZ);
+    }
+
+    bool OperIsShift() const
+    {
+        return OperIsShift(OperGet());
+    }
+
+    static bool OperIsRotate(genTreeOps gtOper)
+    {
+        return (gtOper == GT_ROL) || (gtOper == GT_ROR);
+    }
+
+    bool OperIsRotate() const
+    {
+        return OperIsRotate(OperGet());
+    }
+
+    static bool OperIsShiftOrRotate(genTreeOps gtOper)
+    {
+        return OperIsShift(gtOper) || OperIsRotate(gtOper);
+    }
+
+    bool OperIsShiftOrRotate() const
+    {
+        return OperIsShiftOrRotate(OperGet());
+    }
+
+    bool OperIsArithmetic() const
+    {
+        genTreeOps op = OperGet();
+        return op == GT_ADD || op == GT_SUB || op == GT_MUL || op == GT_DIV || op == GT_MOD
+
+               || op == GT_UDIV || op == GT_UMOD
+
+               || op == GT_OR || op == GT_XOR || op == GT_AND
+
+               || OperIsShiftOrRotate(op);
+    }
+
+#if !defined(LEGACY_BACKEND) && !defined(_TARGET_64BIT_)
+    static bool OperIsHigh(genTreeOps gtOper)
+    {
+        switch (gtOper)
+        {
+            case GT_ADD_HI:
+            case GT_SUB_HI:
+            case GT_MUL_HI:
+            case GT_DIV_HI:
+            case GT_MOD_HI:
+                return true;
+            default:
+                return false;
+        }
+    }
+
+    bool OperIsHigh() const
+    {
+        return OperIsHigh(OperGet());
+    }
+#endif // !defined(LEGACY_BACKEND) && !defined(_TARGET_64BIT_)
+
+    static bool OperIsUnary(genTreeOps gtOper)
+    {
+        return (OperKind(gtOper) & GTK_UNOP) != 0;
+    }
+
+    bool OperIsUnary() const
+    {
+        return OperIsUnary(gtOper);
+    }
+
+    static bool OperIsBinary(genTreeOps gtOper)
+    {
+        return (OperKind(gtOper) & GTK_BINOP) != 0;
+    }
+
+    bool OperIsBinary() const
+    {
+        return OperIsBinary(gtOper);
+    }
+
+    static bool OperIsSimple(genTreeOps gtOper)
+    {
+        return (OperKind(gtOper) & GTK_SMPOP) != 0;
+    }
+
+    static bool OperIsSpecial(genTreeOps gtOper)
+    {
+        return ((OperKind(gtOper) & GTK_KINDMASK) == GTK_SPECIAL);
+    }
+
+    bool OperIsSimple() const
+    {
+        return OperIsSimple(gtOper);
+    }
+
+#ifdef FEATURE_SIMD
+    bool isCommutativeSIMDIntrinsic();
+#else  // !
+    bool isCommutativeSIMDIntrinsic()
+    {
+        return false;
+    }
+#endif // FEATURE_SIMD
+
+    static bool OperIsCommutative(genTreeOps gtOper)
+    {
+        return (OperKind(gtOper) & GTK_COMMUTE) != 0;
+    }
+
+    bool OperIsCommutative()
+    {
+        return OperIsCommutative(gtOper) || (OperIsSIMD(gtOper) && isCommutativeSIMDIntrinsic());
+    }
+
+    static bool OperIsAssignment(genTreeOps gtOper)
+    {
+        return (OperKind(gtOper) & GTK_ASGOP) != 0;
+    }
+
+    bool OperIsAssignment() const
+    {
+        return OperIsAssignment(gtOper);
+    }
+
+    static bool OperIsIndir(genTreeOps gtOper)
+    {
+        return gtOper == GT_IND || gtOper == GT_STOREIND || gtOper == GT_NULLCHECK || OperIsBlk(gtOper);
+    }
+
+    bool OperIsIndir() const
+    {
+        return OperIsIndir(gtOper);
+    }
+
+    static bool OperIsImplicitIndir(genTreeOps gtOper)
+    {
+        switch (gtOper)
+        {
+            case GT_LOCKADD:
+            case GT_XADD:
+            case GT_XCHG:
+            case GT_CMPXCHG:
+            case GT_BLK:
+            case GT_OBJ:
+            case GT_DYN_BLK:
+            case GT_STORE_BLK:
+            case GT_STORE_OBJ:
+            case GT_STORE_DYN_BLK:
+            case GT_BOX:
+            case GT_ARR_INDEX:
+            case GT_ARR_ELEM:
+            case GT_ARR_OFFSET:
+                return true;
+            default:
+                return false;
+        }
+    }
+
+    bool OperIsImplicitIndir() const
+    {
+        return OperIsImplicitIndir(gtOper);
+    }
+
+    bool OperIsStore() const
+    {
+        return OperIsStore(gtOper);
+    }
+
+    static bool OperIsStore(genTreeOps gtOper)
+    {
+        return (gtOper == GT_STOREIND || gtOper == GT_STORE_LCL_VAR || gtOper == GT_STORE_LCL_FLD ||
+                gtOper == GT_STORE_CLS_VAR || gtOper == GT_STORE_BLK || gtOper == GT_STORE_OBJ ||
+                gtOper == GT_STORE_DYN_BLK);
+    }
+
+    static bool OperIsAtomicOp(genTreeOps gtOper)
+    {
+        return (gtOper == GT_XADD || gtOper == GT_XCHG || gtOper == GT_LOCKADD || gtOper == GT_CMPXCHG);
+    }
+
+    bool OperIsAtomicOp() const
+    {
+        return OperIsAtomicOp(gtOper);
+    }
+
+    // This is basically here for cleaner FEATURE_SIMD #ifdefs.
+    static bool OperIsSIMD(genTreeOps gtOper)
+    {
+#ifdef FEATURE_SIMD
+        return gtOper == GT_SIMD;
+#else  // !FEATURE_SIMD
+        return false;
+#endif // !FEATURE_SIMD
+    }
+
+    bool OperIsSIMD()
+    {
+        return OperIsSIMD(gtOper);
+    }
+
+    bool OperIsAggregate()
+    {
+        return (gtOper == GT_LIST) && ((gtFlags & GTF_LIST_AGGREGATE) != 0);
+    }
+
+    // Requires that "op" is an op= operator.  Returns
+    // the corresponding "op".
+    static genTreeOps OpAsgToOper(genTreeOps op);
+
+#ifdef DEBUG
+    bool NullOp1Legal() const
+    {
+        assert(OperIsSimple(gtOper));
+        switch (gtOper)
+        {
+            case GT_PHI:
+            case GT_LEA:
+            case GT_RETFILT:
+            case GT_NOP:
+                return true;
+            case GT_RETURN:
+                return gtType == TYP_VOID;
+            default:
+                return false;
+        }
+    }
+
+    bool NullOp2Legal() const
+    {
+        assert(OperIsSimple(gtOper) || OperIsBlk(gtOper));
+        if (!OperIsBinary(gtOper))
+        {
+            return true;
+        }
+        switch (gtOper)
+        {
+            case GT_LIST:
+            case GT_INTRINSIC:
+            case GT_LEA:
+#ifdef FEATURE_SIMD
+            case GT_SIMD:
+#endif // !FEATURE_SIMD
+                return true;
+            default:
+                return false;
+        }
+    }
+
+    static inline bool RequiresNonNullOp2(genTreeOps oper);
+    bool IsListForMultiRegArg();
+#endif // DEBUG
+
+    inline bool IsFPZero();
+    inline bool IsIntegralConst(ssize_t constVal);
+
+    inline bool IsBoxedValue();
+
+    bool IsList() const
+    {
+        return gtOper == GT_LIST;
+    }
+
+    inline GenTreePtr MoveNext();
+
+    inline GenTreePtr Current();
+
+    inline GenTreePtr* pCurrent();
+
+    inline GenTreePtr gtGetOp1();
+
+    inline GenTreePtr gtGetOp2();
+
+    // Given a tree node, if this is a child of that node, return the pointer to the child node so that it
+    // can be modified; otherwise, return null.
+    GenTreePtr* gtGetChildPointer(GenTreePtr parent);
+
+    // Given a tree node, if this node uses that node, return the use as an out parameter and return true.
+    // Otherwise, return false.
+    bool TryGetUse(GenTree* def, GenTree*** use);
+
+    // Get the parent of this node, and optionally capture the pointer to the child so that it can be modified.
+    GenTreePtr gtGetParent(GenTreePtr** parentChildPtrPtr);
+
+    inline GenTreePtr gtEffectiveVal(bool commaOnly = false);
+
+    // Return the child of this node if it is a GT_RELOAD or GT_COPY; otherwise simply return the node itself
+    inline GenTree* gtSkipReloadOrCopy();
+
+    // Returns true if it is a call node returning its value in more than one register
+    inline bool IsMultiRegCall() const;
+
+    // Returns true if it is a GT_COPY or GT_RELOAD node
+    inline bool IsCopyOrReload() const;
+
+    // Returns true if it is a GT_COPY or GT_RELOAD of a multi-reg call node
+    inline bool IsCopyOrReloadOfMultiRegCall() const;
+
+    bool OperMayThrow();
+
+    unsigned GetScaleIndexMul();
+    unsigned GetScaleIndexShf();
+    unsigned GetScaledIndex();
+
+    // Returns true if "addr" is a GT_ADD node, at least one of whose arguments is an integer
+    // (<= 32 bit) constant.  If it returns true, it sets "*offset" to (one of the) constant value(s), and
+    // "*addr" to the other argument.
+    bool IsAddWithI32Const(GenTreePtr* addr, int* offset);
+
+public:
+#if SMALL_TREE_NODES
+    static unsigned char s_gtNodeSizes[];
+#endif
+
+    static void InitNodeSize();
+
+    size_t GetNodeSize() const;
+
+    bool IsNodeProperlySized() const;
+
+    void CopyFrom(const GenTree* src, Compiler* comp);
+
+    static genTreeOps ReverseRelop(genTreeOps relop);
+
+    static genTreeOps SwapRelop(genTreeOps relop);
+
+    //---------------------------------------------------------------------
+
+    static bool Compare(GenTreePtr op1, GenTreePtr op2, bool swapOK = false);
+
+//---------------------------------------------------------------------
+#ifdef DEBUG
+    //---------------------------------------------------------------------
+
+    static const char* NodeName(genTreeOps op);
+
+    static const char* OpName(genTreeOps op);
+
+//---------------------------------------------------------------------
+#endif
+    //---------------------------------------------------------------------
+
+    bool IsNothingNode() const;
+    void gtBashToNOP();
+
+    // Value number update action enumeration
+    enum ValueNumberUpdate
+    {
+        CLEAR_VN,   // Clear value number
+        PRESERVE_VN // Preserve value number
+    };
+
+    void SetOper(genTreeOps oper, ValueNumberUpdate vnUpdate = CLEAR_VN); // set gtOper
+    void SetOperResetFlags(genTreeOps oper);                              // set gtOper and reset flags
+
+    void ChangeOperConst(genTreeOps oper); // ChangeOper(constOper)
+    // set gtOper and only keep GTF_COMMON_MASK flags
+    void ChangeOper(genTreeOps oper, ValueNumberUpdate vnUpdate = CLEAR_VN);
+    void ChangeOperUnchecked(genTreeOps oper);
+
+    void ChangeType(var_types newType)
+    {
+        var_types oldType = gtType;
+        gtType            = newType;
+        GenTree* node     = this;
+        while (node->gtOper == GT_COMMA)
+        {
+            node = node->gtGetOp2();
+            assert(node->gtType == oldType);
+            node->gtType = newType;
+        }
+    }
+
+    bool IsLocal() const
+    {
+        return OperIsLocal(OperGet());
+    }
+
+    // Returns "true" iff 'this' is a GT_LCL_FLD or GT_STORE_LCL_FLD on which the type
+    // is not the same size as the type of the GT_LCL_VAR.
+    bool IsPartialLclFld(Compiler* comp);
+
+    // Returns "true" iff "this" defines a local variable.  Requires "comp" to be the
+    // current compilation.  If returns "true", sets "*pLclVarTree" to the
+    // tree for the local that is defined, and, if "pIsEntire" is non-null, sets "*pIsEntire" to
+    // true or false, depending on whether the assignment writes to the entirety of the local
+    // variable, or just a portion of it.
+    bool DefinesLocal(Compiler* comp, GenTreeLclVarCommon** pLclVarTree, bool* pIsEntire = nullptr);
+
+    // Returns true if "this" represents the address of a local, or a field of a local.  If returns true, sets
+    // "*pLclVarTree" to the node indicating the local variable.  If the address is that of a field of this node,
+    // sets "*pFldSeq" to the field sequence representing that field, else null.
+    bool IsLocalAddrExpr(Compiler* comp, GenTreeLclVarCommon** pLclVarTree, FieldSeqNode** pFldSeq);
+
+    // Simpler variant of the above which just returns the local node if this is an expression that
+    // yields an address into a local
+    GenTreeLclVarCommon* IsLocalAddrExpr();
+
+    // Determine if this is a LclVarCommon node and return some additional info about it in the
+    // two out parameters.
+    bool IsLocalExpr(Compiler* comp, GenTreeLclVarCommon** pLclVarTree, FieldSeqNode** pFldSeq);
+
+    // Determine whether this is an assignment tree of the form X = X (op) Y,
+    // where Y is an arbitrary tree, and X is a lclVar.
+    unsigned IsLclVarUpdateTree(GenTree** otherTree, genTreeOps* updateOper);
+
+    // If returns "true", "this" may represent the address of a static or instance field
+    // (or a field of such a field, in the case of an object field of type struct).
+    // If returns "true", then either "*pObj" is set to the object reference,
+    // or "*pStatic" is set to the baseAddr or offset to be added to the "*pFldSeq"
+    // Only one of "*pObj" or "*pStatic" will be set, the other one will be null.
+    // The boolean return value only indicates that "this" *may* be a field address
+    // -- the field sequence must also be checked.
+    // If it is a field address, the field sequence will be a sequence of length >= 1,
+    // starting with an instance or static field, and optionally continuing with struct fields.
+    bool IsFieldAddr(Compiler* comp, GenTreePtr* pObj, GenTreePtr* pStatic, FieldSeqNode** pFldSeq);
+
+    // Requires "this" to be the address of an array (the child of a GT_IND labeled with GTF_IND_ARR_INDEX).
+    // Sets "pArr" to the node representing the array (either an array object pointer, or perhaps a byref to the some
+    // element).
+    // Sets "*pArrayType" to the class handle for the array type.
+    // Sets "*inxVN" to the value number inferred for the array index.
+    // Sets "*pFldSeq" to the sequence, if any, of struct fields used to index into the array element.
+    void ParseArrayAddress(
+        Compiler* comp, struct ArrayInfo* arrayInfo, GenTreePtr* pArr, ValueNum* pInxVN, FieldSeqNode** pFldSeq);
+
+    // Helper method for the above.
+    void ParseArrayAddressWork(
+        Compiler* comp, ssize_t inputMul, GenTreePtr* pArr, ValueNum* pInxVN, ssize_t* pOffset, FieldSeqNode** pFldSeq);
+
+    // Requires "this" to be a GT_IND.  Requires the outermost caller to set "*pFldSeq" to nullptr.
+    // Returns true if it is an array index expression, or access to a (sequence of) struct field(s)
+    // within a struct array element.  If it returns true, sets *arrayInfo to the array information, and sets *pFldSeq
+    // to the sequence of struct field accesses.
+    bool ParseArrayElemForm(Compiler* comp, ArrayInfo* arrayInfo, FieldSeqNode** pFldSeq);
+
+    // Requires "this" to be the address of a (possible) array element (or struct field within that).
+    // If it is, sets "*arrayInfo" to the array access info, "*pFldSeq" to the sequence of struct fields
+    // accessed within the array element, and returns true.  If not, returns "false".
+    bool ParseArrayElemAddrForm(Compiler* comp, ArrayInfo* arrayInfo, FieldSeqNode** pFldSeq);
+
+    // Requires "this" to be an int expression.  If it is a sequence of one or more integer constants added together,
+    // returns true and sets "*pFldSeq" to the sequence of fields with which those constants are annotated.
+    bool ParseOffsetForm(Compiler* comp, FieldSeqNode** pFldSeq);
+
+    // Labels "*this" as an array index expression: label all constants and variables that could contribute, as part of
+    // an affine expression, to the value of the of the index.
+    void LabelIndex(Compiler* comp, bool isConst = true);
+
+    // Assumes that "this" occurs in a context where it is being dereferenced as the LHS of an assignment-like
+    // statement (assignment, initblk, or copyblk).  The "width" should be the number of bytes copied by the
+    // operation.  Returns "true" if "this" is an address of (or within)
+    // a local variable; sets "*pLclVarTree" to that local variable instance; and, if "pIsEntire" is non-null,
+    // sets "*pIsEntire" to true if this assignment writes the full width of the local.
+    bool DefinesLocalAddr(Compiler* comp, unsigned width, GenTreeLclVarCommon** pLclVarTree, bool* pIsEntire);
+
+    bool IsRegVar() const
+    {
+        return OperGet() == GT_REG_VAR ? true : false;
+    }
+    bool InReg() const
+    {
+        return (gtFlags & GTF_REG_VAL) ? true : false;
+    }
+    void SetInReg()
+    {
+        gtFlags |= GTF_REG_VAL;
+    }
+
+    regNumber GetReg() const
+    {
+        return InReg() ? gtRegNum : REG_NA;
+    }
+    bool IsRegVarDeath() const
+    {
+        assert(OperGet() == GT_REG_VAR);
+        return (gtFlags & GTF_VAR_DEATH) ? true : false;
+    }
+    bool IsRegVarBirth() const
+    {
+        assert(OperGet() == GT_REG_VAR);
+        return (gtFlags & GTF_REG_BIRTH) ? true : false;
+    }
+    bool IsReverseOp() const
+    {
+        return (gtFlags & GTF_REVERSE_OPS) ? true : false;
+    }
+
+    inline bool IsCnsIntOrI() const;
+
+    inline bool IsIntegralConst() const;
+
+    inline bool IsIntCnsFitsInI32();
+
+    inline bool IsCnsFltOrDbl() const;
+
+    inline bool IsCnsNonZeroFltOrDbl();
+
+    bool IsIconHandle() const
+    {
+        assert(gtOper == GT_CNS_INT);
+        return (gtFlags & GTF_ICON_HDL_MASK) ? true : false;
+    }
+
+    bool IsIconHandle(unsigned handleType) const
+    {
+        assert(gtOper == GT_CNS_INT);
+        assert((handleType & GTF_ICON_HDL_MASK) != 0); // check that handleType is one of the valid GTF_ICON_* values
+        assert((handleType & ~GTF_ICON_HDL_MASK) == 0);
+        return (gtFlags & GTF_ICON_HDL_MASK) == handleType;
+    }
+
+    // Return just the part of the flags corresponding to the GTF_ICON_*_HDL flag. For example,
+    // GTF_ICON_SCOPE_HDL. The tree node must be a const int, but it might not be a handle, in which
+    // case we'll return zero.
+    unsigned GetIconHandleFlag() const
+    {
+        assert(gtOper == GT_CNS_INT);
+        return (gtFlags & GTF_ICON_HDL_MASK);
+    }
+
+    // Mark this node as no longer being a handle; clear its GTF_ICON_*_HDL bits.
+    void ClearIconHandleMask()
+    {
+        assert(gtOper == GT_CNS_INT);
+        gtFlags &= ~GTF_ICON_HDL_MASK;
+    }
+
+    // Return true if the two GT_CNS_INT trees have the same handle flag (GTF_ICON_*_HDL).
+    static bool SameIconHandleFlag(GenTree* t1, GenTree* t2)
+    {
+        return t1->GetIconHandleFlag() == t2->GetIconHandleFlag();
+    }
+
+    bool IsArgPlaceHolderNode() const
+    {
+        return OperGet() == GT_ARGPLACE;
+    }
+    bool IsCall() const
+    {
+        return OperGet() == GT_CALL;
+    }
+    bool IsStatement() const
+    {
+        return OperGet() == GT_STMT;
+    }
+    inline bool IsHelperCall();
+
+    bool IsVarAddr() const;
+    bool gtOverflow() const;
+    bool gtOverflowEx() const;
+    bool gtSetFlags() const;
+    bool gtRequestSetFlags();
+#ifdef DEBUG
+    bool       gtIsValid64RsltMul();
+    static int gtDispFlags(unsigned flags, unsigned debugFlags);
+#endif
+
+    // cast operations
+    inline var_types  CastFromType();
+    inline var_types& CastToType();
+
+    // Returns true if this gentree node is marked by lowering to indicate
+    // that codegen can still generate code even if it wasn't allocated a
+    // register.
+    bool IsRegOptional() const;
+
+    // Returns "true" iff "this" is a phi-related node (i.e. a GT_PHI_ARG, GT_PHI, or a PhiDefn).
+    bool IsPhiNode();
+
+    // Returns "true" iff "*this" is an assignment (GT_ASG) tree that defines an SSA name (lcl = phi(...));
+    bool IsPhiDefn();
+
+    // Returns "true" iff "*this" is a statement containing an assignment that defines an SSA name (lcl = phi(...));
+    bool IsPhiDefnStmt();
+
+    // Can't use an assignment operator, because we need the extra "comp" argument
+    // (to provide the allocator necessary for the VarSet assignment).
+    // TODO-Cleanup: Not really needed now, w/o liveset on tree nodes
+    void CopyTo(class Compiler* comp, const GenTree& gt);
+
+    // Like the above, excepts assumes copying from small node to small node.
+    // (Following the code it replaces, it does *not* copy the GenTree fields,
+    // which CopyTo does.)
+    void CopyToSmall(const GenTree& gt);
+
+    // Because of the fact that we hid the assignment operator of "BitSet" (in DEBUG),
+    // we can't synthesize an assignment operator.
+    // TODO-Cleanup: Could change this w/o liveset on tree nodes
+    // (This is also necessary for the VTable trick.)
+    GenTree()
+    {
+    }
+
+    // Returns the number of children of the current node.
+    unsigned NumChildren();
+
+    // Requires "childNum < NumChildren()".  Returns the "n"th child of "this."
+    GenTreePtr GetChild(unsigned childNum);
+
+    // Returns an iterator that will produce the use edge to each operand of this node. Differs
+    // from the sequence of nodes produced by a loop over `GetChild` in its handling of call, phi,
+    // and block op nodes.
+    GenTreeUseEdgeIterator GenTree::UseEdgesBegin();
+    GenTreeUseEdgeIterator GenTree::UseEdgesEnd();
+
+    IteratorPair<GenTreeUseEdgeIterator> GenTree::UseEdges();
+
+    // Returns an iterator that will produce each operand of this node. Differs from the sequence
+    // of nodes produced by a loop over `GetChild` in its handling of call, phi, and block op
+    // nodes.
+    GenTreeOperandIterator OperandsBegin();
+    GenTreeOperandIterator OperandsEnd();
+
+    // Returns a range that will produce the operands of this node in use order.
+    IteratorPair<GenTreeOperandIterator> Operands();
+
+    bool Precedes(GenTree* other);
+
+    // The maximum possible # of children of any node.
+    static const int MAX_CHILDREN = 6;
+
+    bool IsReuseRegVal() const
+    {
+        // This can be extended to non-constant nodes, but not to local or indir nodes.
+        if (OperIsConst() && ((gtFlags & GTF_REUSE_REG_VAL) != 0))
+        {
+            return true;
+        }
+        return false;
+    }
+    void SetReuseRegVal()
+    {
+        assert(OperIsConst());
+        gtFlags |= GTF_REUSE_REG_VAL;
+    }
+    void ResetReuseRegVal()
+    {
+        assert(OperIsConst());
+        gtFlags &= ~GTF_REUSE_REG_VAL;
+    }
+
+#ifdef DEBUG
+
+private:
+    GenTree& operator=(const GenTree& gt)
+    {
+        assert(!"Don't copy");
+        return *this;
+    }
+#endif // DEBUG
+
+#if DEBUGGABLE_GENTREE
+    // In DEBUG builds, add a dummy virtual method, to give the debugger run-time type information.
+    virtual void DummyVirt()
+    {
+    }
+
+    typedef void* VtablePtr;
+
+    VtablePtr GetVtableForOper(genTreeOps oper);
+    void SetVtableForOper(genTreeOps oper);
+
+    static VtablePtr s_vtablesForOpers[GT_COUNT];
+    static VtablePtr s_vtableForOp;
+#endif // DEBUGGABLE_GENTREE
+
+public:
+    inline void* operator new(size_t sz, class Compiler*, genTreeOps oper);
+
+    inline GenTree(genTreeOps oper, var_types type DEBUGARG(bool largeNode = false));
+};
+
+//------------------------------------------------------------------------
+// GenTreeUseEdgeIterator: an iterator that will produce each use edge of a
+//                         GenTree node in the order in which they are
+//                         used. Note that the use edges of a node may not
+//                         correspond exactly to the nodes on the other
+//                         ends of its use edges: in particular, GT_LIST
+//                         nodes are expanded into their component parts
+//                         (with the optional exception of multi-reg
+//                         arguments). This differs from the behavior of
+//                         GenTree::GetChildPointer(), which does not expand
+//                         lists.
+//
+// Note: valid values of this type may be obtained by calling
+// `GenTree::UseEdgesBegin` and `GenTree::UseEdgesEnd`.
+//
+class GenTreeUseEdgeIterator final
+{
+    friend class GenTreeOperandIterator;
+    friend GenTreeUseEdgeIterator GenTree::UseEdgesBegin();
+    friend GenTreeUseEdgeIterator GenTree::UseEdgesEnd();
+
+    GenTree*  m_node;
+    GenTree** m_edge;
+    GenTree*  m_argList;
+    int       m_state;
+
+    GenTreeUseEdgeIterator(GenTree* node);
+
+    GenTree** GetNextUseEdge() const;
+    void      MoveToNextCallUseEdge();
+    void      MoveToNextPhiUseEdge();
+#ifdef FEATURE_SIMD
+    void MoveToNextSIMDUseEdge();
+#endif
+    void MoveToNextAggregateUseEdge();
+
+public:
+    GenTreeUseEdgeIterator();
+
+    inline GenTree** operator*()
+    {
+        return m_edge;
+    }
+
+    inline GenTree** operator->()
+    {
+        return m_edge;
+    }
+
+    inline bool operator==(const GenTreeUseEdgeIterator& other) const
+    {
+        if (m_state == -1 || other.m_state == -1)
+        {
+            return m_state == other.m_state;
+        }
+
+        return (m_node == other.m_node) && (m_edge == other.m_edge) && (m_argList == other.m_argList) &&
+               (m_state == other.m_state);
+    }
+
+    inline bool operator!=(const GenTreeUseEdgeIterator& other) const
+    {
+        return !(operator==(other));
+    }
+
+    GenTreeUseEdgeIterator& operator++();
+};
+
+//------------------------------------------------------------------------
+// GenTreeOperandIterator: an iterator that will produce each operand of a
+//                         GenTree node in the order in which they are
+//                         used. This uses `GenTreeUseEdgeIterator` under
+//                         the covers and comes with the same caveats
+//                         w.r.t. `GetChild`.
+//
+// Note: valid values of this type may be obtained by calling
+// `GenTree::OperandsBegin` and `GenTree::OperandsEnd`.
+class GenTreeOperandIterator final
+{
+    friend GenTreeOperandIterator GenTree::OperandsBegin();
+    friend GenTreeOperandIterator GenTree::OperandsEnd();
+
+    GenTreeUseEdgeIterator m_useEdges;
+
+    GenTreeOperandIterator(GenTree* node) : m_useEdges(node)
+    {
+    }
+
+public:
+    GenTreeOperandIterator() : m_useEdges()
+    {
+    }
+
+    inline GenTree* operator*()
+    {
+        return *(*m_useEdges);
+    }
+
+    inline GenTree* operator->()
+    {
+        return *(*m_useEdges);
+    }
+
+    inline bool operator==(const GenTreeOperandIterator& other) const
+    {
+        return m_useEdges == other.m_useEdges;
+    }
+
+    inline bool operator!=(const GenTreeOperandIterator& other) const
+    {
+        return !(operator==(other));
+    }
+
+    inline GenTreeOperandIterator& operator++()
+    {
+        ++m_useEdges;
+        return *this;
+    }
+};
+
+/*****************************************************************************/
+// In the current design, we never instantiate GenTreeUnOp: it exists only to be
+// used as a base class.  For unary operators, we instantiate GenTreeOp, with a NULL second
+// argument.  We check that this is true dynamically.  We could tighten this and get static
+// checking, but that would entail accessing the first child of a unary operator via something
+// like gtUnOp.gtOp1 instead of gtOp.gtOp1.
+struct GenTreeUnOp : public GenTree
+{
+    GenTreePtr gtOp1;
+
+protected:
+    GenTreeUnOp(genTreeOps oper, var_types type DEBUGARG(bool largeNode = false))
+        : GenTree(oper, type DEBUGARG(largeNode)), gtOp1(nullptr)
+    {
+    }
+
+    GenTreeUnOp(genTreeOps oper, var_types type, GenTreePtr op1 DEBUGARG(bool largeNode = false))
+        : GenTree(oper, type DEBUGARG(largeNode)), gtOp1(op1)
+    {
+        assert(op1 != nullptr || NullOp1Legal());
+        if (op1 != nullptr)
+        { // Propagate effects flags from child.
+            gtFlags |= op1->gtFlags & GTF_ALL_EFFECT;
+        }
+    }
+
+#if DEBUGGABLE_GENTREE
+    GenTreeUnOp() : GenTree(), gtOp1(nullptr)
+    {
+    }
+#endif
+};
+
+struct GenTreeOp : public GenTreeUnOp
+{
+    GenTreePtr gtOp2;
+
+    GenTreeOp(genTreeOps oper, var_types type, GenTreePtr op1, GenTreePtr op2 DEBUGARG(bool largeNode = false))
+        : GenTreeUnOp(oper, type, op1 DEBUGARG(largeNode)), gtOp2(op2)
+    {
+        // comparisons are always integral types
+        assert(!GenTree::OperIsCompare(oper) || varTypeIsIntegral(type));
+        // Binary operators, with a few exceptions, require a non-nullptr
+        // second argument.
+        assert(op2 != nullptr || NullOp2Legal());
+        // Unary operators, on the other hand, require a null second argument.
+        assert(!OperIsUnary(oper) || op2 == nullptr);
+        // Propagate effects flags from child.  (UnOp handled this for first child.)
+        if (op2 != nullptr)
+        {
+            gtFlags |= op2->gtFlags & GTF_ALL_EFFECT;
+        }
+    }
+
+    // A small set of types are unary operators with optional arguments.  We use
+    // this constructor to build those.
+    GenTreeOp(genTreeOps oper, var_types type DEBUGARG(bool largeNode = false))
+        : GenTreeUnOp(oper, type DEBUGARG(largeNode)), gtOp2(nullptr)
+    {
+        // Unary operators with optional arguments:
+        assert(oper == GT_NOP || oper == GT_RETURN || oper == GT_RETFILT || OperIsBlk(oper));
+    }
+
+#if DEBUGGABLE_GENTREE
+    GenTreeOp() : GenTreeUnOp(), gtOp2(nullptr)
+    {
+    }
+#endif
+};
+
+struct GenTreeVal : public GenTree
+{
+    size_t gtVal1;
+
+    GenTreeVal(genTreeOps oper, var_types type, ssize_t val) : GenTree(oper, type), gtVal1(val)
+    {
+    }
+#if DEBUGGABLE_GENTREE
+    GenTreeVal() : GenTree()
+    {
+    }
+#endif
+};
+
+struct GenTreeIntConCommon : public GenTree
+{
+    inline INT64 LngValue();
+    inline void SetLngValue(INT64 val);
+    inline ssize_t IconValue();
+    inline void SetIconValue(ssize_t val);
+
+    GenTreeIntConCommon(genTreeOps oper, var_types type DEBUGARG(bool largeNode = false))
+        : GenTree(oper, type DEBUGARG(largeNode))
+    {
+    }
+
+    bool FitsInI32()
+    {
+        return FitsInI32(IconValue());
+    }
+
+    static bool FitsInI32(ssize_t val)
+    {
+#ifdef _TARGET_64BIT_
+        return (int)val == val;
+#else
+        return true;
+#endif
+    }
+
+    bool ImmedValNeedsReloc(Compiler* comp);
+    bool GenTreeIntConCommon::ImmedValCanBeFolded(Compiler* comp, genTreeOps op);
+
+#ifdef _TARGET_XARCH_
+    bool FitsInAddrBase(Compiler* comp);
+    bool AddrNeedsReloc(Compiler* comp);
+#endif
+
+#if DEBUGGABLE_GENTREE
+    GenTreeIntConCommon() : GenTree()
+    {
+    }
+#endif
+};
+
+// node representing a read from a physical register
+struct GenTreePhysReg : public GenTree
+{
+    // physregs need a field beyond gtRegNum because
+    // gtRegNum indicates the destination (and can be changed)
+    // whereas reg indicates the source
+    regNumber gtSrcReg;
+    GenTreePhysReg(regNumber r, var_types type = TYP_I_IMPL) : GenTree(GT_PHYSREG, type), gtSrcReg(r)
+    {
+    }
+#if DEBUGGABLE_GENTREE
+    GenTreePhysReg() : GenTree()
+    {
+    }
+#endif
+};
+
+#ifndef LEGACY_BACKEND
+// gtJumpTable - Switch Jump Table
+//
+// This node stores a DWORD constant that represents the
+// absolute address of a jump table for switches.  The code
+// generator uses this table to code the destination for every case
+// in an array of addresses which starting position is stored in
+// this constant.
+struct GenTreeJumpTable : public GenTreeIntConCommon
+{
+    ssize_t gtJumpTableAddr;
+
+    GenTreeJumpTable(var_types type DEBUGARG(bool largeNode = false))
+        : GenTreeIntConCommon(GT_JMPTABLE, type DEBUGARG(largeNode))
+    {
+    }
+#if DEBUGGABLE_GENTREE
+    GenTreeJumpTable() : GenTreeIntConCommon()
+    {
+    }
+#endif // DEBUG
+};
+#endif // !LEGACY_BACKEND
+
+/* gtIntCon -- integer constant (GT_CNS_INT) */
+struct GenTreeIntCon : public GenTreeIntConCommon
+{
+    /*
+     * This is the GT_CNS_INT struct definition.
+     * It's used to hold for both int constants and pointer handle constants.
+     * For the 64-bit targets we will only use GT_CNS_INT as it used to represent all the possible sizes
+     * For the 32-bit targets we use a GT_CNS_LNG to hold a 64-bit integer constant and GT_CNS_INT for all others.
+     * In the future when we retarget the JIT for x86 we should consider eliminating GT_CNS_LNG
+     */
+    ssize_t gtIconVal; // Must overlap and have the same offset with the gtIconVal field in GenTreeLngCon below.
+
+    /* The InitializeArray intrinsic needs to go back to the newarray statement
+       to find the class handle of the array so that we can get its size.  However,
+       in ngen mode, the handle in that statement does not correspond to the compile
+       time handle (rather it lets you get a handle at run-time).  In that case, we also
+       need to store a compile time handle, which goes in this gtCompileTimeHandle field.
+    */
+    ssize_t gtCompileTimeHandle;
+
+    // TODO-Cleanup: It's not clear what characterizes the cases where the field
+    // above is used.  It may be that its uses and those of the "gtFieldSeq" field below
+    // are mutually exclusive, and they could be put in a union.  Or else we should separate
+    // this type into three subtypes.
+
+    // If this constant represents the offset of one or more fields, "gtFieldSeq" represents that
+    // sequence of fields.
+    FieldSeqNode* gtFieldSeq;
+
+#if defined(LATE_DISASM)
+
+    /*  If the constant was morphed from some other node,
+        these fields enable us to get back to what the node
+        originally represented. See use of gtNewIconHandleNode()
+     */
+
+    union {
+        /* Template struct - The significant field of the other
+         * structs should overlap exactly with this struct
+         */
+
+        struct
+        {
+            unsigned gtIconHdl1;
+            void*    gtIconHdl2;
+        } gtIconHdl;
+
+        /* GT_FIELD, etc */
+
+        struct
+        {
+            unsigned             gtIconCPX;
+            CORINFO_CLASS_HANDLE gtIconCls;
+        } gtIconFld;
+    };
+#endif
+
+    GenTreeIntCon(var_types type, ssize_t value DEBUGARG(bool largeNode = false))
+        : GenTreeIntConCommon(GT_CNS_INT, type DEBUGARG(largeNode))
+        , gtIconVal(value)
+        , gtCompileTimeHandle(0)
+        , gtFieldSeq(FieldSeqStore::NotAField())
+    {
+    }
+
+    GenTreeIntCon(var_types type, ssize_t value, FieldSeqNode* fields DEBUGARG(bool largeNode = false))
+        : GenTreeIntConCommon(GT_CNS_INT, type DEBUGARG(largeNode))
+        , gtIconVal(value)
+        , gtCompileTimeHandle(0)
+        , gtFieldSeq(fields)
+    {
+        assert(fields != nullptr);
+    }
+
+    void FixupInitBlkValue(var_types asgType);
+
+#ifdef _TARGET_64BIT_
+    void TruncateOrSignExtend32()
+    {
+        if (gtFlags & GTF_UNSIGNED)
+        {
+            gtIconVal = UINT32(gtIconVal);
+        }
+        else
+        {
+            gtIconVal = INT32(gtIconVal);
+        }
+    }
+#endif // _TARGET_64BIT_
+
+#if DEBUGGABLE_GENTREE
+    GenTreeIntCon() : GenTreeIntConCommon()
+    {
+    }
+#endif
+};
+
+/* gtLngCon -- long    constant (GT_CNS_LNG) */
+
+struct GenTreeLngCon : public GenTreeIntConCommon
+{
+    INT64 gtLconVal; // Must overlap and have the same offset with the gtIconVal field in GenTreeIntCon above.
+    INT32 LoVal()
+    {
+        return (INT32)(gtLconVal & 0xffffffff);
+    }
+
+    INT32 HiVal()
+    {
+        return (INT32)(gtLconVal >> 32);
+        ;
+    }
+
+    GenTreeLngCon(INT64 val) : GenTreeIntConCommon(GT_CNS_NATIVELONG, TYP_LONG)
+    {
+        SetLngValue(val);
+    }
+#if DEBUGGABLE_GENTREE
+    GenTreeLngCon() : GenTreeIntConCommon()
+    {
+    }
+#endif
+};
+
+inline INT64 GenTreeIntConCommon::LngValue()
+{
+#ifndef _TARGET_64BIT_
+    assert(gtOper == GT_CNS_LNG);
+    return AsLngCon()->gtLconVal;
+#else
+    return IconValue();
+#endif
+}
+
+inline void GenTreeIntConCommon::SetLngValue(INT64 val)
+{
+#ifndef _TARGET_64BIT_
+    assert(gtOper == GT_CNS_LNG);
+    AsLngCon()->gtLconVal = val;
+#else
+    // Compile time asserts that these two fields overlap and have the same offsets:  gtIconVal and gtLconVal
+    C_ASSERT(offsetof(GenTreeLngCon, gtLconVal) == offsetof(GenTreeIntCon, gtIconVal));
+    C_ASSERT(sizeof(AsLngCon()->gtLconVal) == sizeof(AsIntCon()->gtIconVal));
+
+    SetIconValue(ssize_t(val));
+#endif
+}
+
+inline ssize_t GenTreeIntConCommon::IconValue()
+{
+    assert(gtOper == GT_CNS_INT); //  We should never see a GT_CNS_LNG for a 64-bit target!
+    return AsIntCon()->gtIconVal;
+}
+
+inline void GenTreeIntConCommon::SetIconValue(ssize_t val)
+{
+    assert(gtOper == GT_CNS_INT); //  We should never see a GT_CNS_LNG for a 64-bit target!
+    AsIntCon()->gtIconVal = val;
+}
+
+/* gtDblCon -- double  constant (GT_CNS_DBL) */
+
+struct GenTreeDblCon : public GenTree
+{
+    double gtDconVal;
+
+    bool isBitwiseEqual(GenTreeDblCon* other)
+    {
+        unsigned __int64 bits      = *(unsigned __int64*)(&gtDconVal);
+        unsigned __int64 otherBits = *(unsigned __int64*)(&(other->gtDconVal));
+        return (bits == otherBits);
+    }
+
+    GenTreeDblCon(double val) : GenTree(GT_CNS_DBL, TYP_DOUBLE), gtDconVal(val)
+    {
+    }
+#if DEBUGGABLE_GENTREE
+    GenTreeDblCon() : GenTree()
+    {
+    }
+#endif
+};
+
+/* gtStrCon -- string  constant (GT_CNS_STR) */
+
+struct GenTreeStrCon : public GenTree
+{
+    unsigned              gtSconCPX;
+    CORINFO_MODULE_HANDLE gtScpHnd;
+
+    // Because this node can come from an inlined method we need to
+    // have the scope handle, since it will become a helper call.
+    GenTreeStrCon(unsigned sconCPX, CORINFO_MODULE_HANDLE mod DEBUGARG(bool largeNode = false))
+        : GenTree(GT_CNS_STR, TYP_REF DEBUGARG(largeNode)), gtSconCPX(sconCPX), gtScpHnd(mod)
+    {
+    }
+#if DEBUGGABLE_GENTREE
+    GenTreeStrCon() : GenTree()
+    {
+    }
+#endif
+};
+
+// Common supertype of LCL_VAR, LCL_FLD, REG_VAR, PHI_ARG
+// This inherits from UnOp because lclvar stores are Unops
+struct GenTreeLclVarCommon : public GenTreeUnOp
+{
+private:
+    unsigned _gtLclNum; // The local number. An index into the Compiler::lvaTable array.
+    unsigned _gtSsaNum; // The SSA number.
+
+public:
+    GenTreeLclVarCommon(genTreeOps oper, var_types type, unsigned lclNum DEBUGARG(bool largeNode = false))
+        : GenTreeUnOp(oper, type DEBUGARG(largeNode))
+    {
+        SetLclNum(lclNum);
+    }
+
+    unsigned GetLclNum() const
+    {
+        return _gtLclNum;
+    }
+    __declspec(property(get = GetLclNum)) unsigned gtLclNum;
+
+    void SetLclNum(unsigned lclNum)
+    {
+        _gtLclNum = lclNum;
+        _gtSsaNum = SsaConfig::RESERVED_SSA_NUM;
+    }
+
+    unsigned GetSsaNum() const
+    {
+        return _gtSsaNum;
+    }
+    __declspec(property(get = GetSsaNum)) unsigned gtSsaNum;
+
+    void SetSsaNum(unsigned ssaNum)
+    {
+        _gtSsaNum = ssaNum;
+    }
+
+    bool HasSsaName()
+    {
+        return (gtSsaNum != SsaConfig::RESERVED_SSA_NUM);
+    }
+
+#if DEBUGGABLE_GENTREE
+    GenTreeLclVarCommon() : GenTreeUnOp()
+    {
+    }
+#endif
+};
+
+// gtLclVar -- load/store/addr of local variable
+
+struct GenTreeLclVar : public GenTreeLclVarCommon
+{
+    IL_OFFSET gtLclILoffs; // instr offset of ref (only for debug info)
+
+    GenTreeLclVar(var_types type, unsigned lclNum, IL_OFFSET ilOffs DEBUGARG(bool largeNode = false))
+        : GenTreeLclVarCommon(GT_LCL_VAR, type, lclNum DEBUGARG(largeNode)), gtLclILoffs(ilOffs)
+    {
+    }
+
+    GenTreeLclVar(genTreeOps oper, var_types type, unsigned lclNum, IL_OFFSET ilOffs DEBUGARG(bool largeNode = false))
+        : GenTreeLclVarCommon(oper, type, lclNum DEBUGARG(largeNode)), gtLclILoffs(ilOffs)
+    {
+        assert(OperIsLocal(oper) || OperIsLocalAddr(oper));
+    }
+
+#if DEBUGGABLE_GENTREE
+    GenTreeLclVar() : GenTreeLclVarCommon()
+    {
+    }
+#endif
+};
+
+// gtLclFld -- load/store/addr of local variable field
+
+struct GenTreeLclFld : public GenTreeLclVarCommon
+{
+    unsigned gtLclOffs; // offset into the variable to access
+
+    FieldSeqNode* gtFieldSeq; // This LclFld node represents some sequences of accesses.
+
+    // old/FE style constructor where load/store/addr share same opcode
+    GenTreeLclFld(var_types type, unsigned lclNum, unsigned lclOffs)
+        : GenTreeLclVarCommon(GT_LCL_FLD, type, lclNum), gtLclOffs(lclOffs), gtFieldSeq(nullptr)
+    {
+        assert(sizeof(*this) <= s_gtNodeSizes[GT_LCL_FLD]);
+    }
+
+    GenTreeLclFld(genTreeOps oper, var_types type, unsigned lclNum, unsigned lclOffs)
+        : GenTreeLclVarCommon(oper, type, lclNum), gtLclOffs(lclOffs), gtFieldSeq(nullptr)
+    {
+        assert(sizeof(*this) <= s_gtNodeSizes[GT_LCL_FLD]);
+    }
+#if DEBUGGABLE_GENTREE
+    GenTreeLclFld() : GenTreeLclVarCommon()
+    {
+    }
+#endif
+};
+
+struct GenTreeRegVar : public GenTreeLclVarCommon
+{
+    // TODO-Cleanup: Note that the base class GenTree already has a gtRegNum field.
+    // It's not clear exactly why a GT_REG_VAR has a separate field. When
+    // GT_REG_VAR is created, the two are identical. It appears that they may
+    // or may not remain so. In particular, there is a comment in stackfp.cpp
+    // that states:
+    //
+    //      There used to be an assertion: assert(src->gtRegNum == src->gtRegVar.gtRegNum, ...)
+    //      here, but there's actually no reason to assume that.  AFAICT, for FP vars under stack FP,
+    //      src->gtRegVar.gtRegNum is the allocated stack pseudo-register, but src->gtRegNum is the
+    //      FP stack position into which that is loaded to represent a particular use of the variable.
+    //
+    // It might be the case that only for stackfp do they ever differ.
+    //
+    // The following might be possible: the GT_REG_VAR node has a last use prior to a complex
+    // subtree being evaluated. It could then be spilled from the register. Later,
+    // it could be unspilled into a different register, which would be recorded at
+    // the unspill time in the GenTree::gtRegNum, whereas GenTreeRegVar::gtRegNum
+    // is left alone. It's not clear why that is useful.
+    //
+    // Assuming there is a particular use, like stack fp, that requires it, maybe we
+    // can get rid of GT_REG_VAR and just leave it as GT_LCL_VAR, using the base class gtRegNum field.
+    // If we need it for stackfp, we could add a GenTreeStackFPRegVar type, which carries both the
+    // pieces of information, in a clearer and more specific way (in particular, with
+    // a different member name).
+    //
+
+private:
+    regNumberSmall _gtRegNum;
+
+public:
+    GenTreeRegVar(var_types type, unsigned lclNum, regNumber regNum) : GenTreeLclVarCommon(GT_REG_VAR, type, lclNum)
+    {
+        gtRegNum = regNum;
+    }
+
+    // The register number is stored in a small format (8 bits), but the getters return and the setters take
+    // a full-size (unsigned) format, to localize the casts here.
+
+    __declspec(property(get = GetRegNum, put = SetRegNum)) regNumber gtRegNum;
+
+    regNumber GetRegNum() const
+    {
+        return (regNumber)_gtRegNum;
+    }
+
+    void SetRegNum(regNumber reg)
+    {
+        _gtRegNum = (regNumberSmall)reg;
+        assert(_gtRegNum == reg);
+    }
+
+#if DEBUGGABLE_GENTREE
+    GenTreeRegVar() : GenTreeLclVarCommon()
+    {
+    }
+#endif
+};
+
+/* gtCast -- conversion to a different type  (GT_CAST) */
+
+struct GenTreeCast : public GenTreeOp
+{
+    GenTreePtr& CastOp()
+    {
+        return gtOp1;
+    }
+    var_types gtCastType;
+
+    GenTreeCast(var_types type, GenTreePtr op, var_types castType DEBUGARG(bool largeNode = false))
+        : GenTreeOp(GT_CAST, type, op, nullptr DEBUGARG(largeNode)), gtCastType(castType)
+    {
+    }
+#if DEBUGGABLE_GENTREE
+    GenTreeCast() : GenTreeOp()
+    {
+    }
+#endif
+};
+
+// GT_BOX nodes are place markers for boxed values.  The "real" tree
+// for most purposes is in gtBoxOp.
+struct GenTreeBox : public GenTreeUnOp
+{
+    // An expanded helper call to implement the "box" if we don't get
+    // rid of it any other way.  Must be in same position as op1.
+
+    GenTreePtr& BoxOp()
+    {
+        return gtOp1;
+    }
+    // This is the statement that contains the assignment tree when the node is an inlined GT_BOX on a value
+    // type
+    GenTreePtr gtAsgStmtWhenInlinedBoxValue;
+
+    GenTreeBox(var_types type, GenTreePtr boxOp, GenTreePtr asgStmtWhenInlinedBoxValue)
+        : GenTreeUnOp(GT_BOX, type, boxOp), gtAsgStmtWhenInlinedBoxValue(asgStmtWhenInlinedBoxValue)
+    {
+    }
+#if DEBUGGABLE_GENTREE
+    GenTreeBox() : GenTreeUnOp()
+    {
+    }
+#endif
+};
+
+/* gtField  -- data member ref  (GT_FIELD) */
+
+struct GenTreeField : public GenTree
+{
+    GenTreePtr           gtFldObj;
+    CORINFO_FIELD_HANDLE gtFldHnd;
+    DWORD                gtFldOffset;
+    bool                 gtFldMayOverlap;
+#ifdef FEATURE_READYTORUN_COMPILER
+    CORINFO_CONST_LOOKUP gtFieldLookup;
+#endif
+
+    GenTreeField(var_types type) : GenTree(GT_FIELD, type)
+    {
+        gtFldMayOverlap = false;
+    }
+#if DEBUGGABLE_GENTREE
+    GenTreeField() : GenTree()
+    {
+    }
+#endif
+};
+
+// Represents the Argument list of a call node, as a Lisp-style linked list.
+// (Originally I had hoped that this could have *only* the m_arg/m_rest fields, but it turns out
+// that enough of the GenTree mechanism is used that it makes sense just to make it a subtype.  But
+// note that in many ways, this is *not* a "real" node of the tree, but rather a mechanism for
+// giving call nodes a flexible number of children.  GenTreeArgListNodes never evaluate to registers,
+// for example.)
+
+// Note that while this extends GenTreeOp, it is *not* an EXOP.  We don't add any new fields, and one
+// is free to allocate a GenTreeOp of type GT_LIST.  If you use this type, you get the convenient Current/Rest
+// method names for the arguments.
+struct GenTreeArgList : public GenTreeOp
+{
+    bool IsAggregate() const
+    {
+        return (gtFlags & GTF_LIST_AGGREGATE) != 0;
+    }
+
+    GenTreePtr& Current()
+    {
+        return gtOp1;
+    }
+    GenTreeArgList*& Rest()
+    {
+        assert(gtOp2 == nullptr || gtOp2->OperGet() == GT_LIST);
+        return *reinterpret_cast<GenTreeArgList**>(&gtOp2);
+    }
+
+#if DEBUGGABLE_GENTREE
+    GenTreeArgList() : GenTreeOp()
+    {
+    }
+#endif
+
+    GenTreeArgList(GenTreePtr arg) : GenTreeArgList(arg, nullptr)
+    {
+    }
+
+    GenTreeArgList(GenTreePtr arg, GenTreeArgList* rest) : GenTreeOp(GT_LIST, TYP_VOID, arg, rest)
+    {
+        // With structs passed in multiple args we could have an arg
+        // GT_LIST containing a list of LCL_FLDs, see IsListForMultiRegArg()
+        //
+        assert((arg != nullptr) && ((!arg->IsList()) || (arg->IsListForMultiRegArg())));
+        gtFlags |= arg->gtFlags & GTF_ALL_EFFECT;
+        if (rest != nullptr)
+        {
+            gtFlags |= rest->gtFlags & GTF_ALL_EFFECT;
+        }
+    }
+
+    GenTreeArgList* Prepend(Compiler* compiler, GenTree* element);
+};
+
+// There was quite a bit of confusion in the code base about which of gtOp1 and gtOp2 was the
+// 'then' and 'else' clause of a colon node.  Adding these accessors, while not enforcing anything,
+// at least *allows* the programmer to be obviously correct.
+// However, these conventions seem backward.
+// TODO-Cleanup: If we could get these accessors used everywhere, then we could switch them.
+struct GenTreeColon : public GenTreeOp
+{
+    GenTreePtr& ThenNode()
+    {
+        return gtOp2;
+    }
+    GenTreePtr& ElseNode()
+    {
+        return gtOp1;
+    }
+
+#if DEBUGGABLE_GENTREE
+    GenTreeColon() : GenTreeOp()
+    {
+    }
+#endif
+
+    GenTreeColon(var_types typ, GenTreePtr thenNode, GenTreePtr elseNode) : GenTreeOp(GT_COLON, typ, elseNode, thenNode)
+    {
+    }
+};
+
+// gtCall   -- method call      (GT_CALL)
+typedef class fgArgInfo* fgArgInfoPtr;
+enum class InlineObservation;
+
+// Return type descriptor of a GT_CALL node.
+// x64 Unix, Arm64, Arm32 and x86 allow a value to be returned in multiple
+// registers. For such calls this struct provides the following info
+// on their return type
+//    - type of value returned in each return register
+//    - ABI return register numbers in which the value is returned
+//    - count of return registers in which the value is returned
+//
+// TODO-ARM: Update this to meet the needs of Arm64 and Arm32
+//
+// TODO-AllArch: Right now it is used for describing multi-reg returned types.
+// Eventually we would want to use it for describing even single-reg
+// returned types (e.g. structs returned in single register x64/arm).
+// This would allow us not to lie or normalize single struct return
+// values in importer/morph.
+struct ReturnTypeDesc
+{
+private:
+    var_types m_regType[MAX_RET_REG_COUNT];
+
+#ifdef DEBUG
+    bool m_inited;
+#endif
+
+public:
+    ReturnTypeDesc()
+    {
+        Reset();
+    }
+
+    // Initialize the Return Type Descriptor for a method that returns a struct type
+    void InitializeStructReturnType(Compiler* comp, CORINFO_CLASS_HANDLE retClsHnd);
+
+    // Initialize the Return Type Descriptor for a method that returns a TYP_LONG
+    // Only needed for X86
+    void InitializeLongReturnType(Compiler* comp);
+
+    // Reset type descriptor to defaults
+    void Reset()
+    {
+        for (unsigned i = 0; i < MAX_RET_REG_COUNT; ++i)
+        {
+            m_regType[i] = TYP_UNKNOWN;
+        }
+#ifdef DEBUG
+        m_inited = false;
+#endif
+    }
+
+    //--------------------------------------------------------------------------------------------
+    // GetReturnRegCount:  Get the count of return registers in which the return value is returned.
+    //
+    // Arguments:
+    //    None
+    //
+    // Return Value:
+    //   Count of return registers.
+    //   Returns 0 if the return type is not returned in registers.
+    unsigned GetReturnRegCount() const
+    {
+        assert(m_inited);
+
+        int regCount = 0;
+        for (unsigned i = 0; i < MAX_RET_REG_COUNT; ++i)
+        {
+            if (m_regType[i] == TYP_UNKNOWN)
+            {
+                break;
+            }
+            // otherwise
+            regCount++;
+        }
+
+#ifdef DEBUG
+        // Any remaining elements in m_regTypes[] should also be TYP_UNKNOWN
+        for (unsigned i = regCount + 1; i < MAX_RET_REG_COUNT; ++i)
+        {
+            assert(m_regType[i] == TYP_UNKNOWN);
+        }
+#endif
+
+        return regCount;
+    }
+
+    //-----------------------------------------------------------------------
+    // IsMultiRegRetType: check whether the type is returned in multiple
+    // return registers.
+    //
+    // Arguments:
+    //    None
+    //
+    // Return Value:
+    //    Returns true if the type is returned in multiple return registers.
+    //    False otherwise.
+    // Note that we only have to examine the first two values to determine this
+    //
+    bool IsMultiRegRetType() const
+    {
+        if (MAX_RET_REG_COUNT < 2)
+        {
+            return false;
+        }
+        else
+        {
+            return ((m_regType[0] != TYP_UNKNOWN) && (m_regType[1] != TYP_UNKNOWN));
+        }
+    }
+
+    //--------------------------------------------------------------------------
+    // GetReturnRegType:  Get var_type of the return register specified by index.
+    //
+    // Arguments:
+    //    index - Index of the return register.
+    //            First return register will have an index 0 and so on.
+    //
+    // Return Value:
+    //    var_type of the return register specified by its index.
+    //    asserts if the index does not have a valid register return type.
+
+    var_types GetReturnRegType(unsigned index)
+    {
+        var_types result = m_regType[index];
+        assert(result != TYP_UNKNOWN);
+
+        return result;
+    }
+
+    // Get ith ABI return register
+    regNumber GetABIReturnReg(unsigned idx);
+
+    // Get reg mask of ABI return registers
+    regMaskTP GetABIReturnRegs();
+};
+
+struct GenTreeCall final : public GenTree
+{
+    GenTreePtr      gtCallObjp;     // The instance argument ('this' pointer)
+    GenTreeArgList* gtCallArgs;     // The list of arguments in original evaluation order
+    GenTreeArgList* gtCallLateArgs; // On x86:     The register arguments in an optimal order
+                                    // On ARM/x64: - also includes any outgoing arg space arguments
+                                    //             - that were evaluated into a temp LclVar
+    fgArgInfoPtr fgArgInfo;
+
+#if !FEATURE_FIXED_OUT_ARGS
+    int     regArgListCount;
+    regList regArgList;
+#endif
+
+    // TODO-Throughput: Revisit this (this used to be only defined if
+    // FEATURE_FIXED_OUT_ARGS was enabled, so this makes GenTreeCall 4 bytes bigger on x86).
+    CORINFO_SIG_INFO* callSig; // Used by tail calls and to register callsites with the EE
+
+#ifdef LEGACY_BACKEND
+    regMaskTP gtCallRegUsedMask; // mask of registers used to pass parameters
+#endif                           // LEGACY_BACKEND
+
+#if FEATURE_MULTIREG_RET
+    // State required to support multi-reg returning call nodes.
+    // For now it is enabled only for x64 unix.
+    //
+    // TODO-AllArch: enable for all call nodes to unify single-reg and multi-reg returns.
+    ReturnTypeDesc gtReturnTypeDesc;
+
+    // gtRegNum would always be the first return reg.
+    // The following array holds the other reg numbers of multi-reg return.
+    regNumber gtOtherRegs[MAX_RET_REG_COUNT - 1];
+
+    // GTF_SPILL or GTF_SPILLED flag on a multi-reg call node indicates that one or
+    // more of its result regs are in that state.  The spill flag of each of the
+    // return register is stored in the below array.
+    unsigned gtSpillFlags[MAX_RET_REG_COUNT];
+#endif
+
+    //-----------------------------------------------------------------------
+    // GetReturnTypeDesc: get the type descriptor of return value of the call
+    //
+    // Arguments:
+    //    None
+    //
+    // Returns
+    //    Type descriptor of the value returned by call
+    //
+    // Note:
+    //    Right now implemented only for x64 unix and yet to be
+    //    implemented for other multi-reg target arch (Arm64/Arm32/x86).
+    //
+    // TODO-AllArch: enable for all call nodes to unify single-reg and multi-reg returns.
+    ReturnTypeDesc* GetReturnTypeDesc()
+    {
+#if FEATURE_MULTIREG_RET
+        return &gtReturnTypeDesc;
+#else
+        return nullptr;
+#endif
+    }
+
+    //---------------------------------------------------------------------------
+    // GetRegNumByIdx: get ith return register allocated to this call node.
+    //
+    // Arguments:
+    //     idx   -   index of the return register
+    //
+    // Return Value:
+    //     Return regNumber of ith return register of call node.
+    //     Returns REG_NA if there is no valid return register for the given index.
+    //
+    regNumber GetRegNumByIdx(unsigned idx) const
+    {
+        assert(idx < MAX_RET_REG_COUNT);
+
+        if (idx == 0)
+        {
+            return gtRegNum;
+        }
+
+#if FEATURE_MULTIREG_RET
+        return gtOtherRegs[idx - 1];
+#else
+        return REG_NA;
+#endif
+    }
+
+    //----------------------------------------------------------------------
+    // SetRegNumByIdx: set ith return register of this call node
+    //
+    // Arguments:
+    //    reg    -   reg number
+    //    idx    -   index of the return register
+    //
+    // Return Value:
+    //    None
+    //
+    void SetRegNumByIdx(regNumber reg, unsigned idx)
+    {
+        assert(idx < MAX_RET_REG_COUNT);
+
+        if (idx == 0)
+        {
+            gtRegNum = reg;
+        }
+#if FEATURE_MULTIREG_RET
+        else
+        {
+            gtOtherRegs[idx - 1] = reg;
+            assert(gtOtherRegs[idx - 1] == reg);
+        }
+#else
+        unreached();
+#endif
+    }
+
+    //----------------------------------------------------------------------------
+    // ClearOtherRegs: clear multi-reg state to indicate no regs are allocated
+    //
+    // Arguments:
+    //    None
+    //
+    // Return Value:
+    //    None
+    //
+    void ClearOtherRegs()
+    {
+#if FEATURE_MULTIREG_RET
+        for (unsigned i = 0; i < MAX_RET_REG_COUNT - 1; ++i)
+        {
+            gtOtherRegs[i] = REG_NA;
+        }
+#endif
+    }
+
+    //----------------------------------------------------------------------------
+    // CopyOtherRegs: copy multi-reg state from the given call node to this node
+    //
+    // Arguments:
+    //    fromCall  -  GenTreeCall node from which to copy multi-reg state
+    //
+    // Return Value:
+    //    None
+    //
+    void CopyOtherRegs(GenTreeCall* fromCall)
+    {
+#if FEATURE_MULTIREG_RET
+        for (unsigned i = 0; i < MAX_RET_REG_COUNT - 1; ++i)
+        {
+            this->gtOtherRegs[i] = fromCall->gtOtherRegs[i];
+        }
+#endif
+    }
+
+    // Get reg mask of all the valid registers of gtOtherRegs array
+    regMaskTP GetOtherRegMask() const;
+
+    //----------------------------------------------------------------------
+    // GetRegSpillFlagByIdx: get spill flag associated with the return register
+    // specified by its index.
+    //
+    // Arguments:
+    //    idx  -  Position or index of the return register
+    //
+    // Return Value:
+    //    Returns GTF_* flags associated with.
+    unsigned GetRegSpillFlagByIdx(unsigned idx) const
+    {
+        assert(idx < MAX_RET_REG_COUNT);
+
+#if FEATURE_MULTIREG_RET
+        return gtSpillFlags[idx];
+#else
+        assert(!"unreached");
+        return 0;
+#endif
+    }
+
+    //----------------------------------------------------------------------
+    // SetRegSpillFlagByIdx: set spill flags for the return register
+    // specified by its index.
+    //
+    // Arguments:
+    //    flags  -  GTF_* flags
+    //    idx    -  Position or index of the return register
+    //
+    // Return Value:
+    //    None
+    void SetRegSpillFlagByIdx(unsigned flags, unsigned idx)
+    {
+        assert(idx < MAX_RET_REG_COUNT);
+
+#if FEATURE_MULTIREG_RET
+        gtSpillFlags[idx] = flags;
+#else
+        unreached();
+#endif
+    }
+
+    //-------------------------------------------------------------------
+    // clearOtherRegFlags: clear GTF_* flags associated with gtOtherRegs
+    //
+    // Arguments:
+    //     None
+    //
+    // Return Value:
+    //     None
+    void ClearOtherRegFlags()
+    {
+#if FEATURE_MULTIREG_RET
+        for (unsigned i = 0; i < MAX_RET_REG_COUNT; ++i)
+        {
+            gtSpillFlags[i] = 0;
+        }
+#endif
+    }
+
+    //-------------------------------------------------------------------------
+    // CopyOtherRegFlags: copy GTF_* flags associated with gtOtherRegs from
+    // the given call node.
+    //
+    // Arguments:
+    //    fromCall  -  GenTreeCall node from which to copy
+    //
+    // Return Value:
+    //    None
+    //
+    void CopyOtherRegFlags(GenTreeCall* fromCall)
+    {
+#if FEATURE_MULTIREG_RET
+        for (unsigned i = 0; i < MAX_RET_REG_COUNT; ++i)
+        {
+            this->gtSpillFlags[i] = fromCall->gtSpillFlags[i];
+        }
+#endif
+    }
+
+#define GTF_CALL_M_EXPLICIT_TAILCALL                                                                                   \
+    0x0001                         // GT_CALL -- the call is "tail" prefixed and importer has performed tail call checks
+#define GTF_CALL_M_TAILCALL 0x0002 // GT_CALL -- the call is a tailcall
+#define GTF_CALL_M_VARARGS 0x0004  // GT_CALL -- the call uses varargs ABI
+#define GTF_CALL_M_RETBUFFARG 0x0008        // GT_CALL -- first parameter is the return buffer argument
+#define GTF_CALL_M_DELEGATE_INV 0x0010      // GT_CALL -- call to Delegate.Invoke
+#define GTF_CALL_M_NOGCCHECK 0x0020         // GT_CALL -- not a call for computing full interruptability
+#define GTF_CALL_M_SPECIAL_INTRINSIC 0x0040 // GT_CALL -- function that could be optimized as an intrinsic
+                                            // in special cases. Used to optimize fast way out in morphing
+#define GTF_CALL_M_UNMGD_THISCALL                                                                                      \
+    0x0080 // "this" pointer (first argument) should be enregistered (only for GTF_CALL_UNMANAGED)
+#define GTF_CALL_M_VIRTSTUB_REL_INDIRECT                                                                               \
+    0x0080 // the virtstub is indirected through a relative address (only for GTF_CALL_VIRT_STUB)
+#define GTF_CALL_M_NONVIRT_SAME_THIS                                                                                   \
+    0x0080 // callee "this" pointer is equal to caller this pointer (only for GTF_CALL_NONVIRT)
+#define GTF_CALL_M_FRAME_VAR_DEATH 0x0100 // GT_CALL -- the compLvFrameListRoot variable dies here (last use)
+
+#ifndef LEGACY_BACKEND
+#define GTF_CALL_M_TAILCALL_VIA_HELPER 0x0200 // GT_CALL -- call is a tail call dispatched via tail call JIT helper.
+#endif                                        // !LEGACY_BACKEND
+
+#if FEATURE_TAILCALL_OPT
+#define GTF_CALL_M_IMPLICIT_TAILCALL                                                                                   \
+    0x0400 // GT_CALL -- call is an opportunistic tail call and importer has performed tail call checks
+#define GTF_CALL_M_TAILCALL_TO_LOOP                                                                                    \
+    0x0800 // GT_CALL -- call is a fast recursive tail call that can be converted into a loop
+#endif
+
+#define GTF_CALL_M_PINVOKE 0x1000 // GT_CALL -- call is a pinvoke.  This mirrors VM flag CORINFO_FLG_PINVOKE.
+                                  // A call marked as Pinvoke is not necessarily a GT_CALL_UNMANAGED. For e.g.
+                                  // an IL Stub dynamically generated for a PInvoke declaration is flagged as
+                                  // a Pinvoke but not as an unmanaged call. See impCheckForPInvokeCall() to
+                                  // know when these flags are set.
+
+#define GTF_CALL_M_R2R_REL_INDIRECT 0x2000    // GT_CALL -- ready to run call is indirected through a relative address
+#define GTF_CALL_M_DOES_NOT_RETURN 0x4000     // GT_CALL -- call does not return
+#define GTF_CALL_M_SECURE_DELEGATE_INV 0x8000 // GT_CALL -- call is in secure delegate
+
+    bool IsUnmanaged() const
+    {
+        return (gtFlags & GTF_CALL_UNMANAGED) != 0;
+    }
+    bool NeedsNullCheck() const
+    {
+        return (gtFlags & GTF_CALL_NULLCHECK) != 0;
+    }
+    bool CallerPop() const
+    {
+        return (gtFlags & GTF_CALL_POP_ARGS) != 0;
+    }
+    bool IsVirtual() const
+    {
+        return (gtFlags & GTF_CALL_VIRT_KIND_MASK) != GTF_CALL_NONVIRT;
+    }
+    bool IsVirtualStub() const
+    {
+        return (gtFlags & GTF_CALL_VIRT_KIND_MASK) == GTF_CALL_VIRT_STUB;
+    }
+    bool IsVirtualVtable() const
+    {
+        return (gtFlags & GTF_CALL_VIRT_KIND_MASK) == GTF_CALL_VIRT_VTABLE;
+    }
+    bool IsInlineCandidate() const
+    {
+        return (gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0;
+    }
+
+#ifndef LEGACY_BACKEND
+    bool HasNonStandardAddedArgs(Compiler* compiler) const;
+    int GetNonStandardAddedArgCount(Compiler* compiler) const;
+#endif // !LEGACY_BACKEND
+
+    // Returns true if this call uses a retBuf argument and its calling convention
+    bool HasRetBufArg() const
+    {
+        return (gtCallMoreFlags & GTF_CALL_M_RETBUFFARG) != 0;
+    }
+
+    //-------------------------------------------------------------------------
+    // TreatAsHasRetBufArg:
+    //
+    // Arguments:
+    //     compiler, the compiler instance so that we can call eeGetHelperNum
+    //
+    // Return Value:
+    //     Returns true if we treat the call as if it has a retBuf argument
+    //     This method may actually have a retBuf argument
+    //     or it could be a JIT helper that we are still transforming during
+    //     the importer phase.
+    //
+    // Notes:
+    //     On ARM64 marking the method with the GTF_CALL_M_RETBUFFARG flag
+    //     will make HasRetBufArg() return true, but will also force the
+    //     use of register x8 to pass the RetBuf argument.
+    //
+    bool TreatAsHasRetBufArg(Compiler* compiler) const;
+
+    //-----------------------------------------------------------------------------------------
+    // HasMultiRegRetVal: whether the call node returns its value in multiple return registers.
+    //
+    // Arguments:
+    //     None
+    //
+    // Return Value:
+    //     True if the call is returning a multi-reg return value. False otherwise.
+    //
+    // Note:
+    //     This is implemented only for x64 Unix and yet to be implemented for
+    //     other multi-reg return target arch (arm64/arm32/x86).
+    //
+    bool HasMultiRegRetVal() const
+    {
+#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+        // LEGACY_BACKEND does not use multi reg returns for calls with long return types
+        return varTypeIsLong(gtType);
+#elif FEATURE_MULTIREG_RET
+        return varTypeIsStruct(gtType) && !HasRetBufArg();
+#else
+        return false;
+#endif
+    }
+
+    // Returns true if VM has flagged this method as CORINFO_FLG_PINVOKE.
+    bool IsPInvoke() const
+    {
+        return (gtCallMoreFlags & GTF_CALL_M_PINVOKE) != 0;
+    }
+
+    // Note that the distinction of whether tail prefixed or an implicit tail call
+    // is maintained on a call node till fgMorphCall() after which it will be
+    // either a tail call (i.e. IsTailCall() is true) or a non-tail call.
+    bool IsTailPrefixedCall() const
+    {
+        return (gtCallMoreFlags & GTF_CALL_M_EXPLICIT_TAILCALL) != 0;
+    }
+
+    // This method returning "true" implies that tail call flowgraph morhphing has
+    // performed final checks and committed to making a tail call.
+    bool IsTailCall() const
+    {
+        return (gtCallMoreFlags & GTF_CALL_M_TAILCALL) != 0;
+    }
+
+    // This method returning "true" implies that importer has performed tail call checks
+    // and providing a hint that this can be converted to a tail call.
+    bool CanTailCall() const
+    {
+        return IsTailPrefixedCall() || IsImplicitTailCall();
+    }
+
+#ifndef LEGACY_BACKEND
+    bool IsTailCallViaHelper() const
+    {
+        return IsTailCall() && (gtCallMoreFlags & GTF_CALL_M_TAILCALL_VIA_HELPER);
+    }
+#else  // LEGACY_BACKEND
+    bool IsTailCallViaHelper() const
+    {
+        return true;
+    }
+#endif // LEGACY_BACKEND
+
+#if FEATURE_FASTTAILCALL
+    bool IsFastTailCall() const
+    {
+        return IsTailCall() && !(gtCallMoreFlags & GTF_CALL_M_TAILCALL_VIA_HELPER);
+    }
+#else  // !FEATURE_FASTTAILCALL
+    bool IsFastTailCall() const
+    {
+        return false;
+    }
+#endif // !FEATURE_FASTTAILCALL
+
+#if FEATURE_TAILCALL_OPT
+    // Returns true if this is marked for opportunistic tail calling.
+    // That is, can be tail called though not explicitly prefixed with "tail" prefix.
+    bool IsImplicitTailCall() const
+    {
+        return (gtCallMoreFlags & GTF_CALL_M_IMPLICIT_TAILCALL) != 0;
+    }
+    bool IsTailCallConvertibleToLoop() const
+    {
+        return (gtCallMoreFlags & GTF_CALL_M_TAILCALL_TO_LOOP) != 0;
+    }
+#else  // !FEATURE_TAILCALL_OPT
+    bool IsImplicitTailCall() const
+    {
+        return false;
+    }
+    bool IsTailCallConvertibleToLoop() const
+    {
+        return false;
+    }
+#endif // !FEATURE_TAILCALL_OPT
+
+    bool IsSameThis() const
+    {
+        return (gtCallMoreFlags & GTF_CALL_M_NONVIRT_SAME_THIS) != 0;
+    }
+    bool IsDelegateInvoke() const
+    {
+        return (gtCallMoreFlags & GTF_CALL_M_DELEGATE_INV) != 0;
+    }
+    bool IsVirtualStubRelativeIndir() const
+    {
+        return (gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT) != 0;
+    }
+
+#ifdef FEATURE_READYTORUN_COMPILER
+    bool IsR2RRelativeIndir() const
+    {
+        return (gtCallMoreFlags & GTF_CALL_M_R2R_REL_INDIRECT) != 0;
+    }
+    void setEntryPoint(CORINFO_CONST_LOOKUP entryPoint)
+    {
+        gtEntryPoint = entryPoint;
+        if (gtEntryPoint.accessType == IAT_PVALUE)
+        {
+            gtCallMoreFlags |= GTF_CALL_M_R2R_REL_INDIRECT;
+        }
+    }
+#endif // FEATURE_READYTORUN_COMPILER
+
+    bool IsVarargs() const
+    {
+        return (gtCallMoreFlags & GTF_CALL_M_VARARGS) != 0;
+    }
+
+    bool IsNoReturn() const
+    {
+        return (gtCallMoreFlags & GTF_CALL_M_DOES_NOT_RETURN) != 0;
+    }
+
+    bool IsPure(Compiler* compiler) const;
+
+    unsigned short gtCallMoreFlags; // in addition to gtFlags
+
+    unsigned char gtCallType : 3;   // value from the gtCallTypes enumeration
+    unsigned char gtReturnType : 5; // exact return type
+
+    CORINFO_CLASS_HANDLE gtRetClsHnd; // The return type handle of the call if it is a struct; always available
+
+    union {
+        // only used for CALLI unmanaged calls (CT_INDIRECT)
+        GenTreePtr gtCallCookie;
+        // gtInlineCandidateInfo is only used when inlining methods
+        InlineCandidateInfo*   gtInlineCandidateInfo;
+        void*                  gtStubCallStubAddr;              // GTF_CALL_VIRT_STUB - these are never inlined
+        CORINFO_GENERIC_HANDLE compileTimeHelperArgumentHandle; // Used to track type handle argument of dynamic helpers
+        void*                  gtDirectCallAddress; // Used to pass direct call address between lower and codegen
+    };
+
+    // expression evaluated after args are placed which determines the control target
+    GenTree* gtControlExpr;
+
+    union {
+        CORINFO_METHOD_HANDLE gtCallMethHnd; // CT_USER_FUNC
+        GenTreePtr            gtCallAddr;    // CT_INDIRECT
+    };
+
+#ifdef FEATURE_READYTORUN_COMPILER
+    // Call target lookup info for method call from a Ready To Run module
+    CORINFO_CONST_LOOKUP gtEntryPoint;
+#endif
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+    // For non-inline candidates, track the first observation
+    // that blocks candidacy.
+    InlineObservation gtInlineObservation;
+
+    // IL offset of the call wrt its parent method.
+    IL_OFFSET gtRawILOffset;
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+    bool IsHelperCall() const
+    {
+        return gtCallType == CT_HELPER;
+    }
+
+    bool IsHelperCall(CORINFO_METHOD_HANDLE callMethHnd) const
+    {
+        return IsHelperCall() && (callMethHnd == gtCallMethHnd);
+    }
+
+    bool IsHelperCall(Compiler* compiler, unsigned helper) const;
+
+    GenTreeCall(var_types type) : GenTree(GT_CALL, type)
+    {
+    }
+#if DEBUGGABLE_GENTREE
+    GenTreeCall() : GenTree()
+    {
+    }
+#endif
+};
+
+struct GenTreeCmpXchg : public GenTree
+{
+    GenTreePtr gtOpLocation;
+    GenTreePtr gtOpValue;
+    GenTreePtr gtOpComparand;
+
+    GenTreeCmpXchg(var_types type, GenTreePtr loc, GenTreePtr val, GenTreePtr comparand)
+        : GenTree(GT_CMPXCHG, type), gtOpLocation(loc), gtOpValue(val), gtOpComparand(comparand)
+    {
+        // There's no reason to do a compare-exchange on a local location, so we'll assume that all of these
+        // have global effects.
+        gtFlags |= GTF_GLOB_EFFECT;
+    }
+#if DEBUGGABLE_GENTREE
+    GenTreeCmpXchg() : GenTree()
+    {
+    }
+#endif
+};
+
+struct GenTreeFptrVal : public GenTree
+{
+    CORINFO_METHOD_HANDLE gtFptrMethod;
+
+#ifdef FEATURE_READYTORUN_COMPILER
+    CORINFO_CONST_LOOKUP    gtEntryPoint;
+    CORINFO_RESOLVED_TOKEN* gtLdftnResolvedToken;
+#endif
+
+    GenTreeFptrVal(var_types type, CORINFO_METHOD_HANDLE meth) : GenTree(GT_FTN_ADDR, type), gtFptrMethod(meth)
+    {
+    }
+#if DEBUGGABLE_GENTREE
+    GenTreeFptrVal() : GenTree()
+    {
+    }
+#endif
+};
+
+/* gtQmark */
+struct GenTreeQmark : public GenTreeOp
+{
+    // Livesets on entry to then and else subtrees
+    VARSET_TP gtThenLiveSet;
+    VARSET_TP gtElseLiveSet;
+
+    // The "Compiler*" argument is not a DEBUGARG here because we use it to keep track of the set of
+    // (possible) QMark nodes.
+    GenTreeQmark(var_types type, GenTreePtr cond, GenTreePtr colonOp, class Compiler* comp);
+
+#if DEBUGGABLE_GENTREE
+    GenTreeQmark() : GenTreeOp(GT_QMARK, TYP_INT, nullptr, nullptr)
+    {
+    }
+#endif
+};
+
+/* gtIntrinsic   -- intrinsic   (possibly-binary op [NULL op2 is allowed] with an additional field) */
+
+struct GenTreeIntrinsic : public GenTreeOp
+{
+    CorInfoIntrinsics     gtIntrinsicId;
+    CORINFO_METHOD_HANDLE gtMethodHandle; // Method handle of the method which is treated as an intrinsic.
+
+#ifdef FEATURE_READYTORUN_COMPILER
+    // Call target lookup info for method call from a Ready To Run module
+    CORINFO_CONST_LOOKUP gtEntryPoint;
+#endif
+
+    GenTreeIntrinsic(var_types type, GenTreePtr op1, CorInfoIntrinsics intrinsicId, CORINFO_METHOD_HANDLE methodHandle)
+        : GenTreeOp(GT_INTRINSIC, type, op1, nullptr), gtIntrinsicId(intrinsicId), gtMethodHandle(methodHandle)
+    {
+    }
+
+    GenTreeIntrinsic(var_types             type,
+                     GenTreePtr            op1,
+                     GenTreePtr            op2,
+                     CorInfoIntrinsics     intrinsicId,
+                     CORINFO_METHOD_HANDLE methodHandle)
+        : GenTreeOp(GT_INTRINSIC, type, op1, op2), gtIntrinsicId(intrinsicId), gtMethodHandle(methodHandle)
+    {
+    }
+
+#if DEBUGGABLE_GENTREE
+    GenTreeIntrinsic() : GenTreeOp()
+    {
+    }
+#endif
+};
+
+#ifdef FEATURE_SIMD
+
+/* gtSIMD   -- SIMD intrinsic   (possibly-binary op [NULL op2 is allowed] with additional fields) */
+struct GenTreeSIMD : public GenTreeOp
+{
+    SIMDIntrinsicID gtSIMDIntrinsicID; // operation Id
+    var_types       gtSIMDBaseType;    // SIMD vector base type
+    unsigned        gtSIMDSize;        // SIMD vector size in bytes
+
+    GenTreeSIMD(var_types type, GenTreePtr op1, SIMDIntrinsicID simdIntrinsicID, var_types baseType, unsigned size)
+        : GenTreeOp(GT_SIMD, type, op1, nullptr)
+        , gtSIMDIntrinsicID(simdIntrinsicID)
+        , gtSIMDBaseType(baseType)
+        , gtSIMDSize(size)
+    {
+    }
+
+    GenTreeSIMD(var_types       type,
+                GenTreePtr      op1,
+                GenTreePtr      op2,
+                SIMDIntrinsicID simdIntrinsicID,
+                var_types       baseType,
+                unsigned        size)
+        : GenTreeOp(GT_SIMD, type, op1, op2)
+        , gtSIMDIntrinsicID(simdIntrinsicID)
+        , gtSIMDBaseType(baseType)
+        , gtSIMDSize(size)
+    {
+    }
+
+#if DEBUGGABLE_GENTREE
+    GenTreeSIMD() : GenTreeOp()
+    {
+    }
+#endif
+};
+#endif // FEATURE_SIMD
+
+/* gtIndex -- array access */
+
+struct GenTreeIndex : public GenTreeOp
+{
+    GenTreePtr& Arr()
+    {
+        return gtOp1;
+    }
+    GenTreePtr& Index()
+    {
+        return gtOp2;
+    }
+
+    unsigned             gtIndElemSize;     // size of elements in the array
+    CORINFO_CLASS_HANDLE gtStructElemClass; // If the element type is a struct, this is the struct type.
+
+    GenTreeIndex(var_types type, GenTreePtr arr, GenTreePtr ind, unsigned indElemSize)
+        : GenTreeOp(GT_INDEX, type, arr, ind)
+        , gtIndElemSize(indElemSize)
+        , gtStructElemClass(nullptr) // We always initialize this after construction.
+    {
+#ifdef DEBUG
+        if (JitConfig.JitSkipArrayBoundCheck() == 1)
+        {
+            // Skip bounds check
+        }
+        else
+#endif
+        {
+            // Do bounds check
+            gtFlags |= GTF_INX_RNGCHK;
+        }
+
+        if (type == TYP_REF)
+        {
+            gtFlags |= GTF_INX_REFARR_LAYOUT;
+        }
+
+        gtFlags |= GTF_EXCEPT | GTF_GLOB_REF;
+    }
+#if DEBUGGABLE_GENTREE
+    GenTreeIndex() : GenTreeOp()
+    {
+    }
+#endif
+};
+
+/* gtArrLen -- array length (GT_ARR_LENGTH)
+   GT_ARR_LENGTH is used for "arr.length" */
+
+struct GenTreeArrLen : public GenTreeUnOp
+{
+    GenTreePtr& ArrRef()
+    {
+        return gtOp1;
+    } // the array address node
+private:
+    int gtArrLenOffset; // constant to add to "gtArrRef" to get the address of the array length.
+
+public:
+    inline int ArrLenOffset()
+    {
+        return gtArrLenOffset;
+    }
+
+    GenTreeArrLen(var_types type, GenTreePtr arrRef, int lenOffset)
+        : GenTreeUnOp(GT_ARR_LENGTH, type, arrRef), gtArrLenOffset(lenOffset)
+    {
+    }
+
+#if DEBUGGABLE_GENTREE
+    GenTreeArrLen() : GenTreeUnOp()
+    {
+    }
+#endif
+};
+
+// This takes:
+// - a comparison value (generally an array length),
+// - an index value, and
+// - the label to jump to if the index is out of range.
+// - the "kind" of the throw block to branch to on failure
+// It generates no result.
+
+struct GenTreeBoundsChk : public GenTree
+{
+    GenTreePtr gtArrLen; // An expression for the length of the array being indexed.
+    GenTreePtr gtIndex;  // The index expression.
+
+    GenTreePtr      gtIndRngFailBB; // Label to jump to for array-index-out-of-range
+    SpecialCodeKind gtThrowKind;    // Kind of throw block to branch to on failure
+
+    /* Only out-of-ranges at same stack depth can jump to the same label (finding return address is easier)
+       For delayed calling of fgSetRngChkTarget() so that the
+       optimizer has a chance of eliminating some of the rng checks */
+    unsigned gtStkDepth;
+
+    GenTreeBoundsChk(genTreeOps oper, var_types type, GenTreePtr arrLen, GenTreePtr index, SpecialCodeKind kind)
+        : GenTree(oper, type)
+        , gtArrLen(arrLen)
+        , gtIndex(index)
+        , gtIndRngFailBB(nullptr)
+        , gtThrowKind(kind)
+        , gtStkDepth(0)
+    {
+        // Effects flags propagate upwards.
+        gtFlags |= (arrLen->gtFlags & GTF_ALL_EFFECT);
+        gtFlags |= GTF_EXCEPT;
+    }
+#if DEBUGGABLE_GENTREE
+    GenTreeBoundsChk() : GenTree()
+    {
+    }
+#endif
+
+    // If the gtArrLen is really an array length, returns array reference, else "NULL".
+    GenTreePtr GetArray()
+    {
+        if (gtArrLen->OperGet() == GT_ARR_LENGTH)
+        {
+            return gtArrLen->gtArrLen.ArrRef();
+        }
+        else
+        {
+            return nullptr;
+        }
+    }
+};
+
+// gtArrElem -- general array element (GT_ARR_ELEM), for non "SZ_ARRAYS"
+//              -- multidimensional arrays, or 1-d arrays with non-zero lower bounds.
+
+struct GenTreeArrElem : public GenTree
+{
+    GenTreePtr gtArrObj;
+
+#define GT_ARR_MAX_RANK 3
+    GenTreePtr    gtArrInds[GT_ARR_MAX_RANK]; // Indices
+    unsigned char gtArrRank;                  // Rank of the array
+
+    unsigned char gtArrElemSize; // !!! Caution, this is an "unsigned char", it is used only
+                                 // on the optimization path of array intrisics.
+                                 // It stores the size of array elements WHEN it can fit
+                                 // into an "unsigned char".
+                                 // This has caused VSW 571394.
+    var_types gtArrElemType;     // The array element type
+
+    // Requires that "inds" is a pointer to an array of "rank" GenTreePtrs for the indices.
+    GenTreeArrElem(var_types     type,
+                   GenTreePtr    arr,
+                   unsigned char rank,
+                   unsigned char elemSize,
+                   var_types     elemType,
+                   GenTreePtr*   inds)
+        : GenTree(GT_ARR_ELEM, type), gtArrObj(arr), gtArrRank(rank), gtArrElemSize(elemSize), gtArrElemType(elemType)
+    {
+        for (unsigned char i = 0; i < rank; i++)
+        {
+            gtArrInds[i] = inds[i];
+        }
+        gtFlags |= GTF_EXCEPT;
+    }
+#if DEBUGGABLE_GENTREE
+    GenTreeArrElem() : GenTree()
+    {
+    }
+#endif
+};
+
+//--------------------------------------------
+//
+// GenTreeArrIndex (gtArrIndex): Expression to bounds-check the index for one dimension of a
+//    multi-dimensional or non-zero-based array., and compute the effective index
+//    (i.e. subtracting the lower bound).
+//
+// Notes:
+//    This node is similar in some ways to GenTreeBoundsChk, which ONLY performs the check.
+//    The reason that this node incorporates the check into the effective index computation is
+//    to avoid duplicating the codegen, as the effective index is required to compute the
+//    offset anyway.
+//    TODO-CQ: Enable optimization of the lower bound and length by replacing this:
+//                /--*  <arrObj>
+//                +--*  <index0>
+//             +--* ArrIndex[i, ]
+//    with something like:
+//                   /--*  <arrObj>
+//                /--*  ArrLowerBound[i, ]
+//                |  /--*  <arrObj>
+//                +--*  ArrLen[i, ]    (either generalize GT_ARR_LENGTH or add a new node)
+//                +--*  <index0>
+//             +--* ArrIndex[i, ]
+//    Which could, for example, be optimized to the following when known to be within bounds:
+//                /--*  TempForLowerBoundDim0
+//                +--*  <index0>
+//             +--* - (GT_SUB)
+//
+struct GenTreeArrIndex : public GenTreeOp
+{
+    // The array object - may be any expression producing an Array reference, but is likely to be a lclVar.
+    GenTreePtr& ArrObj()
+    {
+        return gtOp1;
+    }
+    // The index expression - may be any integral expression.
+    GenTreePtr& IndexExpr()
+    {
+        return gtOp2;
+    }
+    unsigned char gtCurrDim;     // The current dimension
+    unsigned char gtArrRank;     // Rank of the array
+    var_types     gtArrElemType; // The array element type
+
+    GenTreeArrIndex(var_types     type,
+                    GenTreePtr    arrObj,
+                    GenTreePtr    indexExpr,
+                    unsigned char currDim,
+                    unsigned char arrRank,
+                    var_types     elemType)
+        : GenTreeOp(GT_ARR_INDEX, type, arrObj, indexExpr)
+        , gtCurrDim(currDim)
+        , gtArrRank(arrRank)
+        , gtArrElemType(elemType)
+    {
+        gtFlags |= GTF_EXCEPT;
+    }
+#if DEBUGGABLE_GENTREE
+protected:
+    friend GenTree;
+    // Used only for GenTree::GetVtableForOper()
+    GenTreeArrIndex() : GenTreeOp()
+    {
+    }
+#endif
+};
+
+//--------------------------------------------
+//
+// GenTreeArrOffset (gtArrOffset): Expression to compute the accumulated offset for the address
+//    of an element of a multi-dimensional or non-zero-based array.
+//
+// Notes:
+//    The result of this expression is (gtOffset * dimSize) + gtIndex
+//    where dimSize is the length/stride/size of the dimension, and is obtained from gtArrObj.
+//    This node is generated in conjunction with the GenTreeArrIndex node, which computes the
+//    effective index for a single dimension.  The sub-trees can be separately optimized, e.g.
+//    within a loop body where the expression for the 0th dimension may be invariant.
+//
+//    Here is an example of how the tree might look for a two-dimension array reference:
+//                /--*  const 0
+//                |  /--* <arrObj>
+//                |  +--* <index0>
+//                +--* ArrIndex[i, ]
+//                +--*  <arrObj>
+//             /--| arrOffs[i, ]
+//             |  +--*  <arrObj>
+//             |  +--*  <index1>
+//             +--* ArrIndex[*,j]
+//             +--*  <arrObj>
+//          /--| arrOffs[*,j]
+//    TODO-CQ: see comment on GenTreeArrIndex for how its representation may change.  When that
+//    is done, we will also want to replace the <arrObj> argument to arrOffs with the
+//    ArrLen as for GenTreeArrIndex.
+//
+struct GenTreeArrOffs : public GenTree
+{
+    GenTreePtr gtOffset;         // The accumulated offset for lower dimensions - must be TYP_I_IMPL, and
+                                 // will either be a CSE temp, the constant 0, or another GenTreeArrOffs node.
+    GenTreePtr gtIndex;          // The effective index for the current dimension - must be non-negative
+                                 // and can be any expression (though it is likely to be either a GenTreeArrIndex,
+                                 // node, a lclVar, or a constant).
+    GenTreePtr gtArrObj;         // The array object - may be any expression producing an Array reference,
+                                 // but is likely to be a lclVar.
+    unsigned char gtCurrDim;     // The current dimension
+    unsigned char gtArrRank;     // Rank of the array
+    var_types     gtArrElemType; // The array element type
+
+    GenTreeArrOffs(var_types     type,
+                   GenTreePtr    offset,
+                   GenTreePtr    index,
+                   GenTreePtr    arrObj,
+                   unsigned char currDim,
+                   unsigned char rank,
+                   var_types     elemType)
+        : GenTree(GT_ARR_OFFSET, type)
+        , gtOffset(offset)
+        , gtIndex(index)
+        , gtArrObj(arrObj)
+        , gtCurrDim(currDim)
+        , gtArrRank(rank)
+        , gtArrElemType(elemType)
+    {
+        assert(index->gtFlags & GTF_EXCEPT);
+        gtFlags |= GTF_EXCEPT;
+    }
+#if DEBUGGABLE_GENTREE
+    GenTreeArrOffs() : GenTree()
+    {
+    }
+#endif
+};
+
+/* gtAddrMode -- Target-specific canonicalized addressing expression (GT_LEA) */
+
+struct GenTreeAddrMode : public GenTreeOp
+{
+    // Address is Base + Index*Scale + Offset.
+    // These are the legal patterns:
+    //
+    //      Base                                // Base != nullptr && Index == nullptr && Scale == 0 && Offset == 0
+    //      Base + Index*Scale                  // Base != nullptr && Index != nullptr && Scale != 0 && Offset == 0
+    //      Base + Offset                       // Base != nullptr && Index == nullptr && Scale == 0 && Offset != 0
+    //      Base + Index*Scale + Offset         // Base != nullptr && Index != nullptr && Scale != 0 && Offset != 0
+    //             Index*Scale                  // Base == nullptr && Index != nullptr && Scale >  1 && Offset == 0
+    //             Index*Scale + Offset         // Base == nullptr && Index != nullptr && Scale >  1 && Offset != 0
+    //                           Offset         // Base == nullptr && Index == nullptr && Scale == 0 && Offset != 0
+    //
+    // So, for example:
+    //      1. Base + Index is legal with Scale==1
+    //      2. If Index is null, Scale should be zero (or unintialized / unused)
+    //      3. If Scale==1, then we should have "Base" instead of "Index*Scale", and "Base + Offset" instead of
+    //         "Index*Scale + Offset".
+
+    // First operand is base address/pointer
+    bool HasBase() const
+    {
+        return gtOp1 != nullptr;
+    }
+    GenTreePtr& Base()
+    {
+        return gtOp1;
+    }
+
+    // Second operand is scaled index value
+    bool HasIndex() const
+    {
+        return gtOp2 != nullptr;
+    }
+    GenTreePtr& Index()
+    {
+        return gtOp2;
+    }
+
+    unsigned gtScale;  // The scale factor
+    unsigned gtOffset; // The offset to add
+
+    GenTreeAddrMode(var_types type, GenTreePtr base, GenTreePtr index, unsigned scale, unsigned offset)
+        : GenTreeOp(GT_LEA, type, base, index)
+    {
+        gtScale  = scale;
+        gtOffset = offset;
+    }
+#if DEBUGGABLE_GENTREE
+protected:
+    friend GenTree;
+    // Used only for GenTree::GetVtableForOper()
+    GenTreeAddrMode() : GenTreeOp()
+    {
+    }
+#endif
+};
+
+// Indir is just an op, no additional data, but some additional abstractions
+struct GenTreeIndir : public GenTreeOp
+{
+    // The address for the indirection.
+    // Since GenTreeDynBlk derives from this, but is an "EXOP" (i.e. it has extra fields),
+    // we can't access Op1 and Op2 in the normal manner if we may have a DynBlk.
+    GenTreePtr& Addr()
+    {
+        return gtOp1;
+    }
+
+    // these methods provide an interface to the indirection node which
+    bool     HasBase();
+    bool     HasIndex();
+    GenTree* Base();
+    GenTree* Index();
+    unsigned Scale();
+    size_t   Offset();
+
+    GenTreeIndir(genTreeOps oper, var_types type, GenTree* addr, GenTree* data) : GenTreeOp(oper, type, addr, data)
+    {
+    }
+
+#if DEBUGGABLE_GENTREE
+protected:
+    friend GenTree;
+    // Used only for GenTree::GetVtableForOper()
+    GenTreeIndir() : GenTreeOp()
+    {
+    }
+#endif
+};
+
+// gtBlk  -- 'block' (GT_BLK, GT_STORE_BLK).
+//
+// This is the base type for all of the nodes that represent block or struct
+// values.
+// Since it can be a store, it includes gtBlkOpKind to specify the type of
+// code generation that will be used for the block operation.
+
+struct GenTreeBlk : public GenTreeIndir
+{
+public:
+    // The data to be stored (null for GT_BLK)
+    GenTree*& Data()
+    {
+        return gtOp2;
+    }
+    void SetData(GenTree* dataNode)
+    {
+        gtOp2 = dataNode;
+    }
+
+    // The size of the buffer to be copied.
+    unsigned Size() const
+    {
+        return gtBlkSize;
+    }
+
+    unsigned gtBlkSize;
+
+    // Return true iff the object being copied contains one or more GC pointers.
+    bool HasGCPtr();
+
+    // True if this BlkOpNode is a volatile memory operation.
+    bool IsVolatile() const
+    {
+        return (gtFlags & GTF_BLK_VOLATILE) != 0;
+    }
+
+    // True if this BlkOpNode is a volatile memory operation.
+    bool IsUnaligned() const
+    {
+        return (gtFlags & GTF_BLK_UNALIGNED) != 0;
+    }
+
+    // Instruction selection: during codegen time, what code sequence we will be using
+    // to encode this operation.
+    enum
+    {
+        BlkOpKindInvalid,
+        BlkOpKindHelper,
+        BlkOpKindRepInstr,
+        BlkOpKindUnroll,
+    } gtBlkOpKind;
+
+    bool gtBlkOpGcUnsafe;
+
+    GenTreeBlk(genTreeOps oper, var_types type, GenTreePtr addr, unsigned size)
+        : GenTreeIndir(oper, type, addr, nullptr)
+        , gtBlkSize(size)
+        , gtBlkOpKind(BlkOpKindInvalid)
+        , gtBlkOpGcUnsafe(false)
+    {
+        assert(OperIsBlk(oper));
+        gtFlags |= (addr->gtFlags & GTF_ALL_EFFECT);
+    }
+
+    GenTreeBlk(genTreeOps oper, var_types type, GenTreePtr addr, GenTreePtr data, unsigned size)
+        : GenTreeIndir(oper, type, addr, data), gtBlkSize(size), gtBlkOpKind(BlkOpKindInvalid), gtBlkOpGcUnsafe(false)
+    {
+        assert(OperIsBlk(oper));
+        gtFlags |= (addr->gtFlags & GTF_ALL_EFFECT);
+        gtFlags |= (data->gtFlags & GTF_ALL_EFFECT);
+    }
+
+#if DEBUGGABLE_GENTREE
+protected:
+    friend GenTree;
+    GenTreeBlk() : GenTreeIndir()
+    {
+    }
+#endif // DEBUGGABLE_GENTREE
+};
+
+// gtObj  -- 'object' (GT_OBJ).
+//
+// This node is used for block values that may have GC pointers.
+
+struct GenTreeObj : public GenTreeBlk
+{
+    CORINFO_CLASS_HANDLE gtClass; // the class of the object
+
+    // If non-null, this array represents the gc-layout of the class.
+    // This may be simply copied when cloning this node, because it is not changed once computed.
+    BYTE* gtGcPtrs;
+
+    // If non-zero, this is the number of slots in the class layout that
+    // contain gc-pointers.
+    __declspec(property(get = GetGcPtrCount)) unsigned gtGcPtrCount;
+    unsigned GetGcPtrCount() const
+    {
+        assert(_gtGcPtrCount != UINT32_MAX);
+        return _gtGcPtrCount;
+    }
+    unsigned _gtGcPtrCount;
+
+    // If non-zero, the number of pointer-sized slots that constitutes the class token.
+    unsigned gtSlots;
+
+    bool IsGCInfoInitialized()
+    {
+        return (_gtGcPtrCount != UINT32_MAX);
+    }
+
+    void SetGCInfo(BYTE* gcPtrs, unsigned gcPtrCount, unsigned slots)
+    {
+        gtGcPtrs      = gcPtrs;
+        _gtGcPtrCount = gcPtrCount;
+        gtSlots       = slots;
+        if (gtGcPtrCount != 0)
+        {
+            // We assume that we cannot have a struct with GC pointers that is not a multiple
+            // of the register size.
+            // The EE currently does not allow this, but it could change.
+            // Let's assert it just to be safe.
+            noway_assert(roundUp(gtBlkSize, REGSIZE_BYTES) == gtBlkSize);
+        }
+    }
+
+    void CopyGCInfo(GenTreeObj* srcObj)
+    {
+        if (srcObj->IsGCInfoInitialized())
+        {
+            gtGcPtrs      = srcObj->gtGcPtrs;
+            _gtGcPtrCount = srcObj->gtGcPtrCount;
+            gtSlots       = srcObj->gtSlots;
+        }
+    }
+
+    GenTreeObj(var_types type, GenTreePtr addr, CORINFO_CLASS_HANDLE cls, unsigned size)
+        : GenTreeBlk(GT_OBJ, type, addr, size), gtClass(cls)
+    {
+        // By default, an OBJ is assumed to be a global reference.
+        gtFlags |= GTF_GLOB_REF;
+        noway_assert(cls != NO_CLASS_HANDLE);
+        _gtGcPtrCount = UINT32_MAX;
+    }
+
+    GenTreeObj(var_types type, GenTreePtr addr, GenTreePtr data, CORINFO_CLASS_HANDLE cls, unsigned size)
+        : GenTreeBlk(GT_STORE_OBJ, type, addr, data, size), gtClass(cls)
+    {
+        // By default, an OBJ is assumed to be a global reference.
+        gtFlags |= GTF_GLOB_REF;
+        noway_assert(cls != NO_CLASS_HANDLE);
+        _gtGcPtrCount = UINT32_MAX;
+    }
+
+#if DEBUGGABLE_GENTREE
+    GenTreeObj() : GenTreeBlk()
+    {
+    }
+#endif
+};
+
+// gtDynBlk  -- 'dynamic block' (GT_DYN_BLK).
+//
+// This node is used for block values that have a dynamic size.
+// Note that such a value can never have GC pointers.
+
+struct GenTreeDynBlk : public GenTreeBlk
+{
+public:
+    GenTreePtr gtDynamicSize;
+    bool       gtEvalSizeFirst;
+
+    GenTreeDynBlk(GenTreePtr addr, GenTreePtr dynamicSize)
+        : GenTreeBlk(GT_DYN_BLK, TYP_STRUCT, addr, 0), gtDynamicSize(dynamicSize), gtEvalSizeFirst(false)
+    {
+        gtFlags |= (dynamicSize->gtFlags & GTF_ALL_EFFECT);
+    }
+
+#if DEBUGGABLE_GENTREE
+protected:
+    friend GenTree;
+    GenTreeDynBlk() : GenTreeBlk()
+    {
+    }
+#endif // DEBUGGABLE_GENTREE
+};
+
+// Read-modify-write status of a RMW memory op rooted at a storeInd
+enum RMWStatus
+{
+    STOREIND_RMW_STATUS_UNKNOWN, // RMW status of storeInd unknown
+                                 // Default status unless modified by IsRMWMemOpRootedAtStoreInd()
+
+    // One of these denote storeind is a RMW memory operation.
+    STOREIND_RMW_DST_IS_OP1, // StoreInd is known to be a RMW memory op and dst candidate is op1
+    STOREIND_RMW_DST_IS_OP2, // StoreInd is known to be a RMW memory op and dst candidate is op2
+
+    // One of these denote the reason for storeind is marked as non-RMW operation
+    STOREIND_RMW_UNSUPPORTED_ADDR, // Addr mode is not yet supported for RMW memory
+    STOREIND_RMW_UNSUPPORTED_OPER, // Operation is not supported for RMW memory
+    STOREIND_RMW_UNSUPPORTED_TYPE, // Type is not supported for RMW memory
+    STOREIND_RMW_INDIR_UNEQUAL     // Indir to read value is not equivalent to indir that writes the value
+};
+
+// StoreInd is just a BinOp, with additional RMW status
+struct GenTreeStoreInd : public GenTreeIndir
+{
+#if !CPU_LOAD_STORE_ARCH
+    // The below flag is set and used during lowering
+    RMWStatus gtRMWStatus;
+
+    bool IsRMWStatusUnknown()
+    {
+        return gtRMWStatus == STOREIND_RMW_STATUS_UNKNOWN;
+    }
+    bool IsNonRMWMemoryOp()
+    {
+        return gtRMWStatus == STOREIND_RMW_UNSUPPORTED_ADDR || gtRMWStatus == STOREIND_RMW_UNSUPPORTED_OPER ||
+               gtRMWStatus == STOREIND_RMW_UNSUPPORTED_TYPE || gtRMWStatus == STOREIND_RMW_INDIR_UNEQUAL;
+    }
+    bool IsRMWMemoryOp()
+    {
+        return gtRMWStatus == STOREIND_RMW_DST_IS_OP1 || gtRMWStatus == STOREIND_RMW_DST_IS_OP2;
+    }
+    bool IsRMWDstOp1()
+    {
+        return gtRMWStatus == STOREIND_RMW_DST_IS_OP1;
+    }
+    bool IsRMWDstOp2()
+    {
+        return gtRMWStatus == STOREIND_RMW_DST_IS_OP2;
+    }
+#endif //! CPU_LOAD_STORE_ARCH
+
+    RMWStatus GetRMWStatus()
+    {
+#if !CPU_LOAD_STORE_ARCH
+        return gtRMWStatus;
+#else
+        return STOREIND_RMW_STATUS_UNKNOWN;
+#endif
+    }
+
+    void SetRMWStatusDefault()
+    {
+#if !CPU_LOAD_STORE_ARCH
+        gtRMWStatus = STOREIND_RMW_STATUS_UNKNOWN;
+#endif
+    }
+
+    void SetRMWStatus(RMWStatus status)
+    {
+#if !CPU_LOAD_STORE_ARCH
+        gtRMWStatus = status;
+#endif
+    }
+
+    GenTreePtr& Data()
+    {
+        return gtOp2;
+    }
+
+    GenTreeStoreInd(var_types type, GenTree* destPtr, GenTree* data) : GenTreeIndir(GT_STOREIND, type, destPtr, data)
+    {
+        SetRMWStatusDefault();
+    }
+
+#if DEBUGGABLE_GENTREE
+protected:
+    friend GenTree;
+    // Used only for GenTree::GetVtableForOper()
+    GenTreeStoreInd() : GenTreeIndir()
+    {
+        SetRMWStatusDefault();
+    }
+#endif
+};
+
+/* gtRetExp -- Place holder for the return expression from an inline candidate (GT_RET_EXPR) */
+
+struct GenTreeRetExpr : public GenTree
+{
+    GenTreePtr gtInlineCandidate;
+
+    CORINFO_CLASS_HANDLE gtRetClsHnd;
+
+    GenTreeRetExpr(var_types type) : GenTree(GT_RET_EXPR, type)
+    {
+    }
+#if DEBUGGABLE_GENTREE
+    GenTreeRetExpr() : GenTree()
+    {
+    }
+#endif
+};
+
+/* gtStmt   -- 'statement expr' (GT_STMT) */
+
+class InlineContext;
+
+struct GenTreeStmt : public GenTree
+{
+    GenTreePtr     gtStmtExpr;      // root of the expression tree
+    GenTreePtr     gtStmtList;      // first node (for forward walks)
+    InlineContext* gtInlineContext; // The inline context for this statement.
+
+#if defined(DEBUGGING_SUPPORT) || defined(DEBUG)
+    IL_OFFSETX gtStmtILoffsx; // instr offset (if available)
+#endif
+
+#ifdef DEBUG
+    IL_OFFSET gtStmtLastILoffs; // instr offset at end of stmt
+#endif
+
+    __declspec(property(get = getNextStmt)) GenTreeStmt* gtNextStmt;
+
+    __declspec(property(get = getPrevStmt)) GenTreeStmt* gtPrevStmt;
+
+    GenTreeStmt* getNextStmt()
+    {
+        if (gtNext == nullptr)
+        {
+            return nullptr;
+        }
+        else
+        {
+            return gtNext->AsStmt();
+        }
+    }
+
+    GenTreeStmt* getPrevStmt()
+    {
+        if (gtPrev == nullptr)
+        {
+            return nullptr;
+        }
+        else
+        {
+            return gtPrev->AsStmt();
+        }
+    }
+
+    GenTreeStmt(GenTreePtr expr, IL_OFFSETX offset)
+        : GenTree(GT_STMT, TYP_VOID)
+        , gtStmtExpr(expr)
+        , gtStmtList(nullptr)
+        , gtInlineContext(nullptr)
+#if defined(DEBUGGING_SUPPORT) || defined(DEBUG)
+        , gtStmtILoffsx(offset)
+#endif
+#ifdef DEBUG
+        , gtStmtLastILoffs(BAD_IL_OFFSET)
+#endif
+    {
+        // Statements can't have statements as part of their expression tree.
+        assert(expr->gtOper != GT_STMT);
+
+        // Set the statement to have the same costs as the top node of the tree.
+        // This is used long before costs have been assigned, so we need to copy
+        // the raw costs.
+        CopyRawCosts(expr);
+    }
+
+#if DEBUGGABLE_GENTREE
+    GenTreeStmt() : GenTree(GT_STMT, TYP_VOID)
+    {
+    }
+#endif
+};
+
+/*  NOTE: Any tree nodes that are larger than 8 bytes (two ints or
+    pointers) must be flagged as 'large' in GenTree::InitNodeSize().
+ */
+
+/* gtClsVar -- 'static data member' (GT_CLS_VAR) */
+
+struct GenTreeClsVar : public GenTree
+{
+    CORINFO_FIELD_HANDLE gtClsVarHnd;
+    FieldSeqNode*        gtFieldSeq;
+
+    GenTreeClsVar(var_types type, CORINFO_FIELD_HANDLE clsVarHnd, FieldSeqNode* fldSeq)
+        : GenTree(GT_CLS_VAR, type), gtClsVarHnd(clsVarHnd), gtFieldSeq(fldSeq)
+    {
+        gtFlags |= GTF_GLOB_REF;
+    }
+#if DEBUGGABLE_GENTREE
+    GenTreeClsVar() : GenTree()
+    {
+    }
+#endif
+};
+
+/* gtArgPlace -- 'register argument placeholder' (GT_ARGPLACE) */
+
+struct GenTreeArgPlace : public GenTree
+{
+    CORINFO_CLASS_HANDLE gtArgPlaceClsHnd; // Needed when we have a TYP_STRUCT argument
+
+    GenTreeArgPlace(var_types type, CORINFO_CLASS_HANDLE clsHnd) : GenTree(GT_ARGPLACE, type), gtArgPlaceClsHnd(clsHnd)
+    {
+    }
+#if DEBUGGABLE_GENTREE
+    GenTreeArgPlace() : GenTree()
+    {
+    }
+#endif
+};
+
+/* gtLabel  -- code label target    (GT_LABEL) */
+
+struct GenTreeLabel : public GenTree
+{
+    BasicBlock* gtLabBB;
+
+    GenTreeLabel(BasicBlock* bb) : GenTree(GT_LABEL, TYP_VOID), gtLabBB(bb)
+    {
+    }
+#if DEBUGGABLE_GENTREE
+    GenTreeLabel() : GenTree()
+    {
+    }
+#endif
+};
+
+/* gtPhiArg -- phi node rhs argument, var = phi(phiarg, phiarg, phiarg...); GT_PHI_ARG */
+struct GenTreePhiArg : public GenTreeLclVarCommon
+{
+    BasicBlock* gtPredBB;
+
+    GenTreePhiArg(var_types type, unsigned lclNum, unsigned snum, BasicBlock* block)
+        : GenTreeLclVarCommon(GT_PHI_ARG, type, lclNum), gtPredBB(block)
+    {
+        SetSsaNum(snum);
+    }
+
+#if DEBUGGABLE_GENTREE
+    GenTreePhiArg() : GenTreeLclVarCommon()
+    {
+    }
+#endif
+};
+
+/* gtPutArgStk -- Argument passed on stack */
+
+struct GenTreePutArgStk : public GenTreeUnOp
+{
+    unsigned gtSlotNum; // Slot number of the argument to be passed on stack
+
+#if FEATURE_FASTTAILCALL
+    bool putInIncomingArgArea; // Whether this arg needs to be placed in incoming arg area.
+                               // By default this is false and will be placed in out-going arg area.
+                               // Fast tail calls set this to true.
+                               // In future if we need to add more such bool fields consider bit fields.
+
+    GenTreePutArgStk(genTreeOps oper,
+                     var_types  type,
+                     unsigned slotNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned numSlots)
+                         FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(bool isStruct),
+                     bool _putInIncomingArgArea = false DEBUGARG(GenTreePtr callNode = nullptr)
+                         DEBUGARG(bool largeNode = false))
+        : GenTreeUnOp(oper, type DEBUGARG(largeNode))
+        , gtSlotNum(slotNum)
+        , putInIncomingArgArea(_putInIncomingArgArea)
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+        , gtPutArgStkKind(PutArgStkKindInvalid)
+        , gtNumSlots(numSlots)
+        , gtIsStruct(isStruct)
+        , gtNumberReferenceSlots(0)
+        , gtGcPtrs(nullptr)
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+    {
+#ifdef DEBUG
+        gtCall = callNode;
+#endif
+    }
+
+    GenTreePutArgStk(genTreeOps oper,
+                     var_types  type,
+                     GenTreePtr op1,
+                     unsigned slotNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned numSlots)
+                         FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(bool isStruct),
+                     bool _putInIncomingArgArea = false DEBUGARG(GenTreePtr callNode = nullptr)
+                         DEBUGARG(bool largeNode = false))
+        : GenTreeUnOp(oper, type, op1 DEBUGARG(largeNode))
+        , gtSlotNum(slotNum)
+        , putInIncomingArgArea(_putInIncomingArgArea)
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+        , gtPutArgStkKind(PutArgStkKindInvalid)
+        , gtNumSlots(numSlots)
+        , gtIsStruct(isStruct)
+        , gtNumberReferenceSlots(0)
+        , gtGcPtrs(nullptr)
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+    {
+#ifdef DEBUG
+        gtCall = callNode;
+#endif
+    }
+
+#else // !FEATURE_FASTTAILCALL
+
+    GenTreePutArgStk(genTreeOps oper,
+                     var_types  type,
+                     unsigned slotNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned numSlots)
+                         FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(bool isStruct) DEBUGARG(GenTreePtr callNode = NULL)
+                             DEBUGARG(bool largeNode = false))
+        : GenTreeUnOp(oper, type DEBUGARG(largeNode))
+        , gtSlotNum(slotNum)
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+        , gtPutArgStkKind(PutArgStkKindInvalid)
+        , gtNumSlots(numSlots)
+        , gtIsStruct(isStruct)
+        , gtNumberReferenceSlots(0)
+        , gtGcPtrs(nullptr)
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+    {
+#ifdef DEBUG
+        gtCall = callNode;
+#endif
+    }
+
+    GenTreePutArgStk(genTreeOps oper,
+                     var_types  type,
+                     GenTreePtr op1,
+                     unsigned slotNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(unsigned numSlots)
+                         FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(bool isStruct) DEBUGARG(GenTreePtr callNode = NULL)
+                             DEBUGARG(bool largeNode = false))
+        : GenTreeUnOp(oper, type, op1 DEBUGARG(largeNode))
+        , gtSlotNum(slotNum)
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+        , gtPutArgStkKind(PutArgStkKindInvalid)
+        , gtNumSlots(numSlots)
+        , gtIsStruct(isStruct)
+        , gtNumberReferenceSlots(0)
+        , gtGcPtrs(nullptr)
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+    {
+#ifdef DEBUG
+        gtCall = callNode;
+#endif
+    }
+#endif // FEATURE_FASTTAILCALL
+
+    unsigned getArgOffset()
+    {
+        return gtSlotNum * TARGET_POINTER_SIZE;
+    }
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+    unsigned getArgSize()
+    {
+        return gtNumSlots * TARGET_POINTER_SIZE;
+    }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+    //------------------------------------------------------------------------
+    // setGcPointers: Sets the number of references and the layout of the struct object returned by the VM.
+    //
+    // Arguments:
+    //    numPointers - Number of pointer references.
+    //    pointers    - layout of the struct (with pointers marked.)
+    //
+    // Return Value:
+    //    None
+    //
+    // Notes:
+    //    This data is used in the codegen for GT_PUTARG_STK to decide how to copy the struct to the stack by value.
+    //    If no pointer references are used, block copying instructions are used.
+    //    Otherwise the pointer reference slots are copied atomically in a way that gcinfo is emitted.
+    //    Any non pointer references between the pointer reference slots are copied in block fashion.
+    //
+    void setGcPointers(unsigned numPointers, BYTE* pointers)
+    {
+        gtNumberReferenceSlots = numPointers;
+        gtGcPtrs               = pointers;
+    }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+#ifdef DEBUG
+    GenTreePtr gtCall; // the call node to which this argument belongs
+#endif
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+    // Instruction selection: during codegen time, what code sequence we will be using
+    // to encode this operation.
+
+    enum PutArgStkKind : __int8{
+        PutArgStkKindInvalid, PutArgStkKindRepInstr, PutArgStkKindUnroll,
+    };
+
+    PutArgStkKind gtPutArgStkKind;
+
+    unsigned gtNumSlots;             // Number of slots for the argument to be passed on stack
+    bool     gtIsStruct;             // This stack arg is a struct.
+    unsigned gtNumberReferenceSlots; // Number of reference slots.
+    BYTE*    gtGcPtrs;               // gcPointers
+#endif                               // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+#if DEBUGGABLE_GENTREE
+    GenTreePutArgStk() : GenTreeUnOp()
+    {
+    }
+#endif
+};
+
+// Represents GT_COPY or GT_RELOAD node
+struct GenTreeCopyOrReload : public GenTreeUnOp
+{
+#if FEATURE_MULTIREG_RET
+    // State required to support copy/reload of a multi-reg call node.
+    // First register is is always given by gtRegNum.
+    //
+    regNumber gtOtherRegs[MAX_RET_REG_COUNT - 1];
+#endif
+
+    //----------------------------------------------------------
+    // ClearOtherRegs: set gtOtherRegs to REG_NA.
+    //
+    // Arguments:
+    //    None
+    //
+    // Return Value:
+    //    None
+    //
+    void ClearOtherRegs()
+    {
+#if FEATURE_MULTIREG_RET
+        for (unsigned i = 0; i < MAX_RET_REG_COUNT - 1; ++i)
+        {
+            gtOtherRegs[i] = REG_NA;
+        }
+#endif
+    }
+
+    //-----------------------------------------------------------
+    // GetRegNumByIdx: Get regNumber of ith position.
+    //
+    // Arguments:
+    //    idx   -   register position.
+    //
+    // Return Value:
+    //    Returns regNumber assigned to ith position.
+    //
+    regNumber GetRegNumByIdx(unsigned idx) const
+    {
+        assert(idx < MAX_RET_REG_COUNT);
+
+        if (idx == 0)
+        {
+            return gtRegNum;
+        }
+
+#if FEATURE_MULTIREG_RET
+        return gtOtherRegs[idx - 1];
+#else
+        return REG_NA;
+#endif
+    }
+
+    //-----------------------------------------------------------
+    // SetRegNumByIdx: Set the regNumber for ith position.
+    //
+    // Arguments:
+    //    reg   -   reg number
+    //    idx   -   register position.
+    //
+    // Return Value:
+    //    None.
+    //
+    void SetRegNumByIdx(regNumber reg, unsigned idx)
+    {
+        assert(idx < MAX_RET_REG_COUNT);
+
+        if (idx == 0)
+        {
+            gtRegNum = reg;
+        }
+#if FEATURE_MULTIREG_RET
+        else
+        {
+            gtOtherRegs[idx - 1] = reg;
+            assert(gtOtherRegs[idx - 1] == reg);
+        }
+#else
+        else
+        {
+            unreached();
+        }
+#endif
+    }
+
+    //----------------------------------------------------------------------------
+    // CopyOtherRegs: copy multi-reg state from the given copy/reload node to this
+    // node.
+    //
+    // Arguments:
+    //    from  -  GenTree node from which to copy multi-reg state
+    //
+    // Return Value:
+    //    None
+    //
+    // TODO-ARM: Implement this routine for Arm64 and Arm32
+    // TODO-X86: Implement this routine for x86
+    void CopyOtherRegs(GenTreeCopyOrReload* from)
+    {
+        assert(OperGet() == from->OperGet());
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+        for (unsigned i = 0; i < MAX_RET_REG_COUNT - 1; ++i)
+        {
+            gtOtherRegs[i] = from->gtOtherRegs[i];
+        }
+#endif
+    }
+
+    GenTreeCopyOrReload(genTreeOps oper, var_types type, GenTree* op1) : GenTreeUnOp(oper, type, op1)
+    {
+        gtRegNum = REG_NA;
+        ClearOtherRegs();
+    }
+
+#if DEBUGGABLE_GENTREE
+    GenTreeCopyOrReload() : GenTreeUnOp()
+    {
+    }
+#endif
+};
+
+// Represents GT_ALLOCOBJ node
+
+struct GenTreeAllocObj final : public GenTreeUnOp
+{
+    unsigned int         gtNewHelper; // Value returned by ICorJitInfo::getNewHelper
+    CORINFO_CLASS_HANDLE gtAllocObjClsHnd;
+
+    GenTreeAllocObj(var_types type, unsigned int helper, CORINFO_CLASS_HANDLE clsHnd, GenTreePtr op)
+        : GenTreeUnOp(GT_ALLOCOBJ, type, op DEBUGARG(/*largeNode*/ TRUE))
+        , // This node in most cases will be changed to a call node
+        gtNewHelper(helper)
+        , gtAllocObjClsHnd(clsHnd)
+    {
+    }
+#if DEBUGGABLE_GENTREE
+    GenTreeAllocObj() : GenTreeUnOp()
+    {
+    }
+#endif
+};
+
+//------------------------------------------------------------------------
+// Deferred inline functions of GenTree -- these need the subtypes above to
+// be defined already.
+//------------------------------------------------------------------------
+
+inline bool GenTree::OperIsBlkOp()
+{
+    return (((gtOper == GT_ASG) && varTypeIsStruct(gtOp.gtOp1))
+#ifndef LEGACY_BACKEND
+            || (OperIsBlk() && (AsBlk()->Data() != nullptr))
+#endif
+                );
+}
+
+inline bool GenTree::OperIsDynBlkOp()
+{
+    if (gtOper == GT_ASG)
+    {
+        return gtGetOp1()->OperGet() == GT_DYN_BLK;
+    }
+#ifndef LEGACY_BACKEND
+    else if (gtOper == GT_STORE_DYN_BLK)
+    {
+        return true;
+    }
+#endif
+    return false;
+}
+
+inline bool GenTree::OperIsCopyBlkOp()
+{
+    if (gtOper == GT_ASG)
+    {
+        return (varTypeIsStruct(gtGetOp1()) && ((gtFlags & GTF_BLK_INIT) == 0));
+    }
+#ifndef LEGACY_BACKEND
+    else if (OperIsStoreBlk())
+    {
+        return ((gtFlags & GTF_BLK_INIT) == 0);
+    }
+#endif
+    return false;
+}
+
+inline bool GenTree::OperIsInitBlkOp()
+{
+    if (gtOper == GT_ASG)
+    {
+        return (varTypeIsStruct(gtGetOp1()) && ((gtFlags & GTF_BLK_INIT) != 0));
+    }
+#ifndef LEGACY_BACKEND
+    else if (OperIsStoreBlk())
+    {
+        return ((gtFlags & GTF_BLK_INIT) != 0);
+    }
+#endif
+    return false;
+}
+
+//------------------------------------------------------------------------
+// IsFPZero: Checks whether this is a floating point constant with value 0.0
+//
+// Return Value:
+//    Returns true iff the tree is an GT_CNS_DBL, with value of 0.0.
+
+inline bool GenTree::IsFPZero()
+{
+    if ((gtOper == GT_CNS_DBL) && (gtDblCon.gtDconVal == 0.0))
+    {
+        return true;
+    }
+    return false;
+}
+
+//------------------------------------------------------------------------
+// IsIntegralConst: Checks whether this is a constant node with the given value
+//
+// Arguments:
+//    constVal - the value of interest
+//
+// Return Value:
+//    Returns true iff the tree is an integral constant opcode, with
+//    the given value.
+//
+// Notes:
+//    Like gtIconVal, the argument is of ssize_t, so cannot check for
+//    long constants in a target-independent way.
+
+inline bool GenTree::IsIntegralConst(ssize_t constVal)
+
+{
+    if ((gtOper == GT_CNS_INT) && (gtIntConCommon.IconValue() == constVal))
+    {
+        return true;
+    }
+
+    if ((gtOper == GT_CNS_LNG) && (gtIntConCommon.LngValue() == constVal))
+    {
+        return true;
+    }
+
+    return false;
+}
+
+inline bool GenTree::IsBoxedValue()
+{
+    assert(gtOper != GT_BOX || gtBox.BoxOp() != nullptr);
+    return (gtOper == GT_BOX) && (gtFlags & GTF_BOX_VALUE);
+}
+
+inline GenTreePtr GenTree::MoveNext()
+{
+    assert(IsList());
+    return gtOp.gtOp2;
+}
+
+#ifdef DEBUG
+//------------------------------------------------------------------------
+// IsListForMultiRegArg: Given an GenTree node that represents an argument
+//                       enforce (or don't enforce) the following invariant.
+//
+// For LEGACY_BACKEND or architectures that don't support MultiReg args
+// we don't allow a GT_LIST at all.
+//
+// Currently for AMD64 UNIX we allow a limited case where a GT_LIST is
+// allowed but every element must be a GT_LCL_FLD.
+//
+// For the future targets that allow for Multireg args (and this includes
+//  the current ARM64 target) we allow a GT_LIST of arbitrary nodes, these
+//  would typically start out as GT_LCL_VARs or GT_LCL_FLDS or GT_INDs,
+//  but could be changed into constants or GT_COMMA trees by the later
+//  optimization phases.
+//
+// Arguments:
+//    instance method for a GenTree node
+//
+// Return values:
+//    true:      the GenTree node is accepted as a valid argument
+//    false:     the GenTree node is not accepted as a valid argumeny
+//
+inline bool GenTree::IsListForMultiRegArg()
+{
+    if (!IsList())
+    {
+        // We don't have a GT_LIST, so just return true.
+        return true;
+    }
+    else // We do have a GT_LIST
+    {
+#if defined(LEGACY_BACKEND) || !FEATURE_MULTIREG_ARGS
+
+        // Not allowed to have a GT_LIST for an argument
+        // unless we have a RyuJIT backend and FEATURE_MULTIREG_ARGS
+
+        return false;
+
+#else // we have RyuJIT backend and FEATURE_MULTIREG_ARGS
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+        // For UNIX ABI we currently only allow a GT_LIST of GT_LCL_FLDs nodes
+        GenTree* gtListPtr = this;
+        while (gtListPtr != nullptr)
+        {
+            // ToDo: fix UNIX_AMD64 so that we do not generate this kind of a List
+            //  Note the list as currently created is malformed, as the last entry is a nullptr
+            if (gtListPtr->Current() == nullptr)
+                break;
+
+            // Only a list of GT_LCL_FLDs is allowed
+            if (gtListPtr->Current()->OperGet() != GT_LCL_FLD)
+            {
+                return false;
+            }
+            gtListPtr = gtListPtr->MoveNext();
+        }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+        // Note that for non-UNIX ABI the GT_LIST may contain any node
+        //
+        // We allow this GT_LIST as an argument
+        return true;
+
+#endif // RyuJIT backend and FEATURE_MULTIREG_ARGS
+    }
+}
+#endif // DEBUG
+
+inline GenTreePtr GenTree::Current()
+{
+    assert(IsList());
+    return gtOp.gtOp1;
+}
+
+inline GenTreePtr* GenTree::pCurrent()
+{
+    assert(IsList());
+    return &(gtOp.gtOp1);
+}
+
+inline GenTreePtr GenTree::gtGetOp1()
+{
+    return gtOp.gtOp1;
+}
+
+#ifdef DEBUG
+/* static */
+inline bool GenTree::RequiresNonNullOp2(genTreeOps oper)
+{
+    switch (oper)
+    {
+        case GT_ADD:
+        case GT_SUB:
+        case GT_MUL:
+        case GT_DIV:
+        case GT_MOD:
+        case GT_UDIV:
+        case GT_UMOD:
+        case GT_OR:
+        case GT_XOR:
+        case GT_AND:
+        case GT_LSH:
+        case GT_RSH:
+        case GT_RSZ:
+        case GT_ROL:
+        case GT_ROR:
+        case GT_INDEX:
+        case GT_ASG:
+        case GT_ASG_ADD:
+        case GT_ASG_SUB:
+        case GT_ASG_MUL:
+        case GT_ASG_DIV:
+        case GT_ASG_MOD:
+        case GT_ASG_UDIV:
+        case GT_ASG_UMOD:
+        case GT_ASG_OR:
+        case GT_ASG_XOR:
+        case GT_ASG_AND:
+        case GT_ASG_LSH:
+        case GT_ASG_RSH:
+        case GT_ASG_RSZ:
+        case GT_EQ:
+        case GT_NE:
+        case GT_LT:
+        case GT_LE:
+        case GT_GE:
+        case GT_GT:
+        case GT_COMMA:
+        case GT_QMARK:
+        case GT_COLON:
+        case GT_MKREFANY:
+            return true;
+        default:
+            return false;
+    }
+}
+#endif // DEBUG
+
+inline GenTreePtr GenTree::gtGetOp2()
+{
+    /* gtOp.gtOp2 is only valid for GTK_BINOP nodes. */
+
+    GenTreePtr op2 = OperIsBinary() ? gtOp.gtOp2 : nullptr;
+
+    // This documents the genTreeOps for which gtOp.gtOp2 cannot be nullptr.
+    // This helps prefix in its analyis of code which calls gtGetOp2()
+
+    assert((op2 != nullptr) || !RequiresNonNullOp2(gtOper));
+
+    return op2;
+}
+
+inline GenTreePtr GenTree::gtEffectiveVal(bool commaOnly)
+{
+    switch (gtOper)
+    {
+        case GT_COMMA:
+            return gtOp.gtOp2->gtEffectiveVal(commaOnly);
+
+        case GT_NOP:
+            if (!commaOnly && gtOp.gtOp1 != nullptr)
+            {
+                return gtOp.gtOp1->gtEffectiveVal();
+            }
+            break;
+
+        default:
+            break;
+    }
+
+    return this;
+}
+
+inline GenTree* GenTree::gtSkipReloadOrCopy()
+{
+    // There can be only one reload or copy (we can't have a reload/copy of a reload/copy)
+    if (gtOper == GT_RELOAD || gtOper == GT_COPY)
+    {
+        assert(gtGetOp1()->OperGet() != GT_RELOAD && gtGetOp1()->OperGet() != GT_COPY);
+        return gtGetOp1();
+    }
+    return this;
+}
+
+//-----------------------------------------------------------------------------------
+// IsMultiRegCall: whether a call node returning its value in more than one register
+//
+// Arguments:
+//     None
+//
+// Return Value:
+//     Returns true if this GenTree is a multi register returning call
+inline bool GenTree::IsMultiRegCall() const
+{
+    if (this->IsCall())
+    {
+        // We cannot use AsCall() as it is not declared const
+        const GenTreeCall* call = reinterpret_cast<const GenTreeCall*>(this);
+        return call->HasMultiRegRetVal();
+    }
+
+    return false;
+}
+
+//-------------------------------------------------------------------------
+// IsCopyOrReload: whether this is a GT_COPY or GT_RELOAD node.
+//
+// Arguments:
+//     None
+//
+// Return Value:
+//     Returns true if this GenTree is a copy or reload node.
+inline bool GenTree::IsCopyOrReload() const
+{
+    return (gtOper == GT_COPY || gtOper == GT_RELOAD);
+}
+
+//-----------------------------------------------------------------------------------
+// IsCopyOrReloadOfMultiRegCall: whether this is a GT_COPY or GT_RELOAD of a multi-reg
+// call node.
+//
+// Arguments:
+//     None
+//
+// Return Value:
+//     Returns true if this GenTree is a copy or reload of multi-reg call node.
+inline bool GenTree::IsCopyOrReloadOfMultiRegCall() const
+{
+    if (IsCopyOrReload())
+    {
+        GenTree* t = const_cast<GenTree*>(this);
+        return t->gtGetOp1()->IsMultiRegCall();
+    }
+
+    return false;
+}
+
+inline bool GenTree::IsCnsIntOrI() const
+{
+    return (gtOper == GT_CNS_INT);
+}
+
+inline bool GenTree::IsIntegralConst() const
+{
+#ifdef _TARGET_64BIT_
+    return IsCnsIntOrI();
+#else  // !_TARGET_64BIT_
+    return ((gtOper == GT_CNS_INT) || (gtOper == GT_CNS_LNG));
+#endif // !_TARGET_64BIT_
+}
+
+inline bool GenTree::IsIntCnsFitsInI32()
+{
+#ifdef _TARGET_64BIT_
+    return IsCnsIntOrI() && ((int)gtIntConCommon.IconValue() == gtIntConCommon.IconValue());
+#else  // !_TARGET_64BIT_
+    return IsCnsIntOrI();
+#endif // !_TARGET_64BIT_
+}
+
+inline bool GenTree::IsCnsFltOrDbl() const
+{
+    return OperGet() == GT_CNS_DBL;
+}
+
+inline bool GenTree::IsCnsNonZeroFltOrDbl()
+{
+    if (OperGet() == GT_CNS_DBL)
+    {
+        double constValue = gtDblCon.gtDconVal;
+        return *(__int64*)&constValue != 0;
+    }
+
+    return false;
+}
+
+inline bool GenTree::IsHelperCall()
+{
+    return OperGet() == GT_CALL && gtCall.gtCallType == CT_HELPER;
+}
+
+inline var_types GenTree::CastFromType()
+{
+    return this->gtCast.CastOp()->TypeGet();
+}
+inline var_types& GenTree::CastToType()
+{
+    return this->gtCast.gtCastType;
+}
+
+//-----------------------------------------------------------------------------------
+// HasGCPtr: determine whether this block op involves GC pointers
+//
+// Arguments:
+//     None
+//
+// Return Value:
+//    Returns true iff the object being copied contains one or more GC pointers.
+//
+// Notes:
+//    Of the block nodes, only GT_OBJ and ST_STORE_OBJ are allowed to have GC pointers.
+//
+inline bool GenTreeBlk::HasGCPtr()
+{
+    if ((gtOper == GT_OBJ) || (gtOper == GT_STORE_OBJ))
+    {
+        return (AsObj()->gtGcPtrCount != 0);
+    }
+    return false;
+}
+
+inline bool GenTree::isContainedSpillTemp() const
+{
+#if !defined(LEGACY_BACKEND)
+    // If spilled and no reg at use, then it is treated as contained.
+    if (((gtFlags & GTF_SPILLED) != 0) && ((gtFlags & GTF_NOREG_AT_USE) != 0))
+    {
+        return true;
+    }
+#endif //! LEGACY_BACKEND
+
+    return false;
+}
+
+/*****************************************************************************/
+
+#ifndef _HOST_64BIT_
+#include <poppack.h>
+#endif
+
+/*****************************************************************************/
+
+#if SMALL_TREE_NODES
+
+// In debug, on some platforms (e.g., when LATE_DISASM is defined), GenTreeIntCon is bigger than GenTreeLclFld.
+const size_t TREE_NODE_SZ_SMALL = max(sizeof(GenTreeIntCon), sizeof(GenTreeLclFld));
+
+#endif // SMALL_TREE_NODES
+
+const size_t TREE_NODE_SZ_LARGE = sizeof(GenTreeCall);
+
+/*****************************************************************************
+ * Types returned by GenTree::lvaLclVarRefs()
+ */
+
+enum varRefKinds
+{
+    VR_INVARIANT = 0x00, // an invariant value
+    VR_NONE      = 0x00,
+    VR_IND_REF   = 0x01, // an object reference
+    VR_IND_SCL   = 0x02, // a non-object reference
+    VR_GLB_VAR   = 0x04, // a global (clsVar)
+};
+// Add a temp define to avoid merge conflict.
+#define VR_IND_PTR VR_IND_REF
+
+/*****************************************************************************/
+#endif // !GENTREE_H
+/*****************************************************************************/
diff --git a/src/jit/gschecks.cpp b/src/jit/gschecks.cpp
new file mode 100644
index 0000000000..43cbb892e9
--- /dev/null
+++ b/src/jit/gschecks.cpp
@@ -0,0 +1,583 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                               GSChecks                                    XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+/*****************************************************************************
+ * gsGSChecksInitCookie
+ * Grabs the cookie for detecting overflow of unsafe buffers.
+ */
+void Compiler::gsGSChecksInitCookie()
+{
+    var_types type = TYP_I_IMPL;
+
+    lvaGSSecurityCookie = lvaGrabTemp(false DEBUGARG("GSSecurityCookie"));
+
+    // Prevent cookie init/check from being optimized
+    lvaSetVarAddrExposed(lvaGSSecurityCookie);
+    lvaTable[lvaGSSecurityCookie].lvType = type;
+
+    info.compCompHnd->getGSCookie(&gsGlobalSecurityCookieVal, &gsGlobalSecurityCookieAddr);
+}
+
+const unsigned NO_SHADOW_COPY = UINT_MAX;
+
+/*****************************************************************************
+ * gsCopyShadowParams
+ * The current function has an unsafe buffer on the stack.  Search for vulnerable
+ * parameters which could be used to modify a code address and take over the process
+ * in the case of a buffer overrun. Create a safe local copy for each vulnerable parameter,
+ * which will be allocated bellow the unsafe buffer.  Change uses of the param to the 
+ * shadow copy.
+ * 
+ * A pointer under indirection is considered vulnerable. A malicious user could read from
+ * protected memory or write to it. If a parameter is assigned/computed into another variable,
+ * and is a pointer (i.e., under indirection), then we consider the variable to be part of the
+ * equivalence class with the parameter. All parameters in the equivalence class are shadowed.
+ */
+void Compiler::gsCopyShadowParams()
+{
+    if (info.compIsVarArgs)
+    {
+        return;
+    }
+
+    // Allocate array for shadow param info
+    gsShadowVarInfo = new (this, CMK_Unknown) ShadowParamVarInfo[lvaCount]();
+
+    // Find groups of variables assigned to each other, and also 
+    // tracks variables which are dereferenced and marks them as ptrs.
+    // Look for assignments to *p, and ptrs passed to functions
+    if (gsFindVulnerableParams())
+    {
+        // Replace vulnerable params by shadow copies.
+        gsParamsToShadows();
+    }
+}
+
+// This struct tracks how a tree is being used
+
+struct MarkPtrsInfo
+{
+    Compiler* comp;
+    unsigned  lvAssignDef;  // Which local variable is the tree being assigned to?
+    bool      isAssignSrc;  // Is this the source value for an assignment?
+    bool      isUnderIndir; // Is this a pointer value tree that is being dereferenced?
+    bool      skipNextNode; // Skip a single node during the tree-walk
+
+#ifdef DEBUG
+    void Print()
+    {
+        printf(
+            "[MarkPtrsInfo] = {comp = %p, lvAssignDef = %d, isAssignSrc = %d, isUnderIndir = %d, skipNextNode = %d}\n",
+                                  comp, lvAssignDef, isAssignSrc, isUnderIndir, skipNextNode);
+    }
+#endif
+};
+
+/*****************************************************************************
+ * gsMarkPtrsAndAssignGroups
+ * Walk a tree looking for assignment groups, variables whose value is used
+ * in a *p store or use, and variable passed to calls.  This info is then used
+ * to determine parameters which are vulnerable.
+ * This function carries a state to know if it is under an assign node, call node
+ * or indirection node.  It starts a new tree walk for it's subtrees when the state
+ * changes.
+ */
+Compiler::fgWalkResult Compiler::gsMarkPtrsAndAssignGroups(GenTreePtr* pTree, fgWalkData* data)
+{
+    struct MarkPtrsInfo* pState        = (MarkPtrsInfo*)data->pCallbackData;
+    struct MarkPtrsInfo  newState      = *pState;
+    Compiler*            comp          = data->compiler;
+    GenTreePtr           tree          = *pTree;
+    ShadowParamVarInfo*  shadowVarInfo = pState->comp->gsShadowVarInfo;
+    assert(shadowVarInfo);
+    bool     fIsBlk = false;
+    unsigned lclNum;
+
+    assert(!pState->isAssignSrc || pState->lvAssignDef != (unsigned)-1);
+
+    if (pState->skipNextNode)
+    {
+        pState->skipNextNode = false;
+        return WALK_CONTINUE;
+    }
+
+    switch (tree->OperGet())
+    {
+        // Indirections - look for *p uses and defs
+        case GT_IND:
+        case GT_OBJ:
+        case GT_ARR_ELEM:
+        case GT_ARR_INDEX:
+        case GT_ARR_OFFSET:
+        case GT_FIELD:
+
+            newState.isUnderIndir = true;
+            {
+                newState.skipNextNode = true; // Don't have to worry about which kind of node we're dealing with
+                comp->fgWalkTreePre(&tree, comp->gsMarkPtrsAndAssignGroups, (void *)&newState);
+            }
+
+            return WALK_SKIP_SUBTREES;
+
+        // local vars and param uses
+        case GT_LCL_VAR:
+        case GT_LCL_FLD:
+            lclNum = tree->gtLclVarCommon.gtLclNum;
+
+            if (pState->isUnderIndir)
+            {
+                // The variable is being dereferenced for a read or a write.
+                comp->lvaTable[lclNum].lvIsPtr = 1;
+            }
+
+            if (pState->isAssignSrc)
+            {
+                //
+                // Add lvAssignDef and lclNum to a common assign group
+                if (shadowVarInfo[pState->lvAssignDef].assignGroup)
+                {
+                    if (shadowVarInfo[lclNum].assignGroup)
+                    {
+                        // OR both bit vector
+                        shadowVarInfo[pState->lvAssignDef].assignGroup->bitVectOr(shadowVarInfo[lclNum].assignGroup);
+                    }
+                    else
+                    {
+                        shadowVarInfo[pState->lvAssignDef].assignGroup->bitVectSet(lclNum);
+                    }
+            
+                    // Point both to the same bit vector
+                    shadowVarInfo[lclNum].assignGroup = shadowVarInfo[pState->lvAssignDef].assignGroup;
+                }
+                else if (shadowVarInfo[lclNum].assignGroup)
+                {
+                    shadowVarInfo[lclNum].assignGroup->bitVectSet(pState->lvAssignDef);
+            
+                    // Point both to the same bit vector
+                    shadowVarInfo[pState->lvAssignDef].assignGroup = shadowVarInfo[lclNum].assignGroup;
+                }
+                else
+                {
+                        FixedBitVect* bv = FixedBitVect::bitVectInit(pState->comp->lvaCount, pState->comp);
+
+                    // (shadowVarInfo[pState->lvAssignDef] == NULL && shadowVarInfo[lclNew] == NULL);
+                    // Neither of them has an assign group yet.  Make a new one.
+                    shadowVarInfo[pState->lvAssignDef].assignGroup = bv;
+                        shadowVarInfo[lclNum].assignGroup              = bv;
+                    bv->bitVectSet(pState->lvAssignDef);
+                    bv->bitVectSet(lclNum);
+                }
+            }
+            return WALK_CONTINUE;
+    
+        // Calls - Mark arg variables
+        case GT_CALL:
+
+            newState.isUnderIndir = false;
+                newState.isAssignSrc  = false;
+            {
+                if (tree->gtCall.gtCallObjp)
+                {
+                    newState.isUnderIndir = true;
+                        comp->fgWalkTreePre(&tree->gtCall.gtCallObjp, gsMarkPtrsAndAssignGroups, (void*)&newState);
+                }
+
+                for (GenTreeArgList* args = tree->gtCall.gtCallArgs; args; args = args->Rest())
+                {
+                        comp->fgWalkTreePre(&args->Current(), gsMarkPtrsAndAssignGroups, (void*)&newState);
+                }
+                for (GenTreeArgList* args = tree->gtCall.gtCallLateArgs; args; args = args->Rest())
+                {
+                        comp->fgWalkTreePre(&args->Current(), gsMarkPtrsAndAssignGroups, (void*)&newState);
+                }
+
+                if (tree->gtCall.gtCallType == CT_INDIRECT)
+                {
+                    newState.isUnderIndir = true;
+
+                    // A function pointer is treated like a write-through pointer since
+                    // it controls what code gets executed, and so indirectly can cause
+                    // a write to memory.
+                        comp->fgWalkTreePre(&tree->gtCall.gtCallAddr, gsMarkPtrsAndAssignGroups, (void*)&newState);
+                }
+            }
+            return WALK_SKIP_SUBTREES;
+
+        case GT_ADDR:
+            newState.isUnderIndir = false;
+            // We'll assume p in "**p = " can be vulnerable because by changing 'p', someone
+            // could control where **p stores to.
+            {
+                    comp->fgWalkTreePre(&tree->gtOp.gtOp1, comp->gsMarkPtrsAndAssignGroups, (void*)&newState);
+            }
+            return WALK_SKIP_SUBTREES;
+
+        default:
+            // Assignments - track assign groups and *p defs.
+            if (tree->OperIsAssignment())
+            {
+                bool isLocVar;
+                bool isLocFld;
+
+                if (tree->OperIsBlkOp())
+                {
+                    // Blk assignments are always handled as if they have implicit indirections.
+                    // TODO-1stClassStructs: improve this.
+                    newState.isUnderIndir = true;
+                    comp->fgWalkTreePre(&tree->gtOp.gtOp1, comp->gsMarkPtrsAndAssignGroups, (void*)&newState);
+
+                    if (tree->OperIsInitBlkOp())
+                    {
+                        newState.isUnderIndir = false;
+                    }
+                    comp->fgWalkTreePre(&tree->gtOp.gtOp2, comp->gsMarkPtrsAndAssignGroups, (void*)&newState);
+                }
+                else
+                {
+                    // Walk dst side
+                    comp->fgWalkTreePre(&tree->gtOp.gtOp1, comp->gsMarkPtrsAndAssignGroups, (void*)&newState);
+            
+                    // Now handle src side
+                    isLocVar = tree->gtOp.gtOp1->OperGet() == GT_LCL_VAR;
+                    isLocFld = tree->gtOp.gtOp1->OperGet() == GT_LCL_FLD;
+
+                    if ((isLocVar || isLocFld) && tree->gtOp.gtOp2)
+                    {
+                        lclNum               = tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum;
+                        newState.lvAssignDef = lclNum;
+                        newState.isAssignSrc = true;
+                    }
+    
+                    comp->fgWalkTreePre(&tree->gtOp.gtOp2, comp->gsMarkPtrsAndAssignGroups, (void*)&newState);
+                }
+
+                return WALK_SKIP_SUBTREES;
+            }
+    }
+
+    return WALK_CONTINUE;
+}
+
+/*****************************************************************************
+ * gsFindVulnerableParams
+ * Walk all the trees looking for ptrs, args, assign groups, *p stores, etc.
+ * Then use that info to figure out vulnerable pointers.
+ *
+ * It returns true if it found atleast one vulnerable pointer parameter that
+ * needs to be shadow-copied.
+ */
+
+bool Compiler::gsFindVulnerableParams()
+{
+    MarkPtrsInfo info;
+
+    info.comp         = this;
+    info.lvAssignDef  = (unsigned)-1;
+    info.isUnderIndir = false;
+    info.isAssignSrc  = false;
+    info.skipNextNode = false;
+
+    // Walk all the trees setting lvIsWritePtr, lvIsOutgoingArg, lvIsPtr and assignGroup.
+    fgWalkAllTreesPre(gsMarkPtrsAndAssignGroups, &info);
+
+    // Compute has vulnerable at the end of the loop.
+    bool hasOneVulnerable = false;
+
+    // Initialize propagated[v0...vn] = {0}^n, so we can skip the ones propagated through
+    // some assign group.
+    FixedBitVect* propagated = (lvaCount > 0) ? FixedBitVect::bitVectInit(lvaCount, this) : nullptr;
+
+    for (UINT lclNum = 0; lclNum < lvaCount; lclNum++)
+    {
+        LclVarDsc*          varDsc     = &lvaTable[lclNum];
+        ShadowParamVarInfo* shadowInfo = &gsShadowVarInfo[lclNum];
+
+        // If there was an indirection or if unsafe buffer, then we'd call it vulnerable.
+        if (varDsc->lvIsPtr || varDsc->lvIsUnsafeBuffer)
+        {
+            hasOneVulnerable = true;
+        }
+
+        // Now, propagate the info through the assign group (an equivalence class of vars transitively assigned.)
+        if (shadowInfo->assignGroup == nullptr || propagated->bitVectTest(lclNum))
+        {
+            continue;
+        }
+
+        // Propagate lvIsPtr, so that:
+        //   1. Any parameter in the equivalence class can be identified as lvIsPtr and hence shadowed.
+        //   2. Buffers with pointers are placed at lower memory addresses than buffers without pointers.
+        UINT isUnderIndir = varDsc->lvIsPtr;
+
+        // First pass -- find if any variable is vulnerable.
+        FixedBitVect* assignGroup = shadowInfo->assignGroup;
+        for (UINT lclNum = assignGroup->bitVectGetFirst(); lclNum != (unsigned)-1 && !isUnderIndir;
+             lclNum      = assignGroup->bitVectGetNext(lclNum))
+        {
+            isUnderIndir |= lvaTable[lclNum].lvIsPtr;
+        }
+
+        // Vulnerable, so propagate to all members of the equivalence class.
+        if (isUnderIndir)
+        {
+            hasOneVulnerable = true;
+        }
+        // Nothing to propagate.
+        else
+        {
+            continue;
+        }
+
+        // Second pass -- mark all are vulnerable.
+        assert(isUnderIndir);
+        for (UINT lclNum = assignGroup->bitVectGetFirst(); lclNum != (unsigned)-1;
+             lclNum      = assignGroup->bitVectGetNext(lclNum))
+        {
+            lvaTable[lclNum].lvIsPtr = TRUE;
+            propagated->bitVectSet(lclNum);
+        }
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("Equivalence assign group %s: ", isUnderIndir ? "isPtr " : "");
+            for (UINT lclNum = assignGroup->bitVectGetFirst(); lclNum != (unsigned)-1;
+                 lclNum      = assignGroup->bitVectGetNext(lclNum))
+            {
+                gtDispLclVar(lclNum, false);
+                printf(" ");
+            }
+            printf("\n");
+        }
+#endif
+    }
+
+    return hasOneVulnerable;
+}
+
+/*****************************************************************************
+ * gsParamsToShadows
+ * Copy each vulnerable param ptr or buffer to a local shadow copy and replace
+ * uses of the param by the shadow copy
+ */
+void Compiler::gsParamsToShadows()
+{
+    // Cache old count since we'll add new variables, and  
+    // gsShadowVarInfo will not grow to accomodate the new ones.
+    UINT lvaOldCount = lvaCount;
+
+    // Create shadow copy for each param candidate
+    for (UINT lclNum = 0; lclNum < lvaOldCount; lclNum++)
+    {
+        LclVarDsc* varDsc                  = &lvaTable[lclNum];
+        gsShadowVarInfo[lclNum].shadowCopy = NO_SHADOW_COPY;
+
+        // Only care about params whose values are on the stack
+        if (!ShadowParamVarInfo::mayNeedShadowCopy(varDsc))
+        {
+            continue;
+        }
+
+        if (!varDsc->lvIsPtr && !varDsc->lvIsUnsafeBuffer)
+        {
+            continue;
+        }
+
+        int shadowVar = lvaGrabTemp(false DEBUGARG("shadowVar"));
+        // Copy some info
+
+        var_types type             = varTypeIsSmall(varDsc->TypeGet()) ? TYP_INT : varDsc->TypeGet();
+        lvaTable[shadowVar].lvType = type;
+
+#ifdef FEATURE_SIMD
+        lvaTable[shadowVar].lvSIMDType            = varDsc->lvSIMDType;
+        lvaTable[shadowVar].lvUsedInSIMDIntrinsic = varDsc->lvUsedInSIMDIntrinsic;
+        if (varDsc->lvSIMDType)
+        {
+            lvaTable[shadowVar].lvBaseType = varDsc->lvBaseType;
+        }
+#endif
+        lvaTable[shadowVar].lvRegStruct = varDsc->lvRegStruct;
+
+        lvaTable[shadowVar].lvAddrExposed     = varDsc->lvAddrExposed;
+        lvaTable[shadowVar].lvDoNotEnregister = varDsc->lvDoNotEnregister;
+#ifdef DEBUG
+        lvaTable[shadowVar].lvVMNeedsStackAddr = varDsc->lvVMNeedsStackAddr;
+        lvaTable[shadowVar].lvLiveInOutOfHndlr = varDsc->lvLiveInOutOfHndlr;
+        lvaTable[shadowVar].lvLclFieldExpr     = varDsc->lvLclFieldExpr;
+        lvaTable[shadowVar].lvLiveAcrossUCall  = varDsc->lvLiveAcrossUCall;
+#endif
+        lvaTable[shadowVar].lvVerTypeInfo    = varDsc->lvVerTypeInfo;
+        lvaTable[shadowVar].lvGcLayout       = varDsc->lvGcLayout;
+        lvaTable[shadowVar].lvIsUnsafeBuffer = varDsc->lvIsUnsafeBuffer;
+        lvaTable[shadowVar].lvIsPtr          = varDsc->lvIsPtr;
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("Var V%02u is shadow param candidate. Shadow copy is V%02u.\n", lclNum, shadowVar);
+        }
+#endif
+
+        gsShadowVarInfo[lclNum].shadowCopy = shadowVar;
+    }
+
+    // Replace param uses with shadow copy
+    fgWalkAllTreesPre(gsReplaceShadowParams, (void*)this);
+
+    // Now insert code to copy the params to their shadow copy.
+    for (UINT lclNum = 0; lclNum < lvaOldCount; lclNum++)
+    {
+        LclVarDsc* varDsc = &lvaTable[lclNum];
+
+        unsigned shadowVar = gsShadowVarInfo[lclNum].shadowCopy;
+        if (shadowVar == NO_SHADOW_COPY)
+        {
+            continue;
+        }
+
+        var_types type = lvaTable[shadowVar].TypeGet();
+
+        GenTreePtr src = gtNewLclvNode(lclNum, varDsc->TypeGet());
+        GenTreePtr dst = gtNewLclvNode(shadowVar, type);
+
+        src->gtFlags |= GTF_DONT_CSE;
+        dst->gtFlags |= GTF_DONT_CSE;
+
+        GenTreePtr opAssign = nullptr;
+        if (type == TYP_STRUCT)
+        {
+            CORINFO_CLASS_HANDLE clsHnd = varDsc->lvVerTypeInfo.GetClassHandle();
+
+            // We don't need unsafe value cls check here since we are copying the params and this flag
+            // would have been set on the original param before reaching here.
+            lvaSetStruct(shadowVar, clsHnd, false);
+
+            src = gtNewOperNode(GT_ADDR, TYP_BYREF, src);
+            dst = gtNewOperNode(GT_ADDR, TYP_BYREF, dst);
+
+            opAssign                            = gtNewCpObjNode(dst, src, clsHnd, false);
+            lvaTable[shadowVar].lvIsMultiRegArg = lvaTable[lclNum].lvIsMultiRegArg;
+            lvaTable[shadowVar].lvIsMultiRegRet = lvaTable[lclNum].lvIsMultiRegRet;
+        }
+        else
+        {
+            opAssign = gtNewAssignNode(dst, src);
+        }
+        fgEnsureFirstBBisScratch();
+        (void)fgInsertStmtAtBeg(fgFirstBB, fgMorphTree(opAssign));
+    }
+
+    // If the method has "Jmp CalleeMethod", then we need to copy shadow params back to original
+    // params before "jmp" to CalleeMethod.
+    if (compJmpOpUsed)
+    {
+        // There could be more than one basic block ending with a "Jmp" type tail call.
+        // We would have to insert assignments in all such blocks, just before GT_JMP stmnt.
+        for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+        {
+            if (block->bbJumpKind != BBJ_RETURN)
+            {
+                continue;
+            }
+
+            if ((block->bbFlags & BBF_HAS_JMP) == 0)
+            {
+                continue;
+            }
+
+            for (UINT lclNum = 0; lclNum < info.compArgsCount; lclNum++)
+            {
+                LclVarDsc* varDsc = &lvaTable[lclNum];
+
+                unsigned shadowVar = gsShadowVarInfo[lclNum].shadowCopy;
+                if (shadowVar == NO_SHADOW_COPY)
+                {
+                    continue;
+                }
+
+                GenTreePtr src = gtNewLclvNode(shadowVar, lvaTable[shadowVar].TypeGet());
+                GenTreePtr dst = gtNewLclvNode(lclNum, varDsc->TypeGet());
+                
+                src->gtFlags |= GTF_DONT_CSE;
+                dst->gtFlags |= GTF_DONT_CSE;
+
+                GenTreePtr opAssign = nullptr;
+                if (varDsc->TypeGet() == TYP_STRUCT)
+                {
+                    CORINFO_CLASS_HANDLE clsHnd = varDsc->lvVerTypeInfo.GetClassHandle();
+                    src                         = gtNewOperNode(GT_ADDR, TYP_BYREF, src);
+                    dst                         = gtNewOperNode(GT_ADDR, TYP_BYREF, dst);
+
+                    opAssign = gtNewCpObjNode(dst, src, clsHnd, false);
+                }
+                else
+                {
+                    opAssign = gtNewAssignNode(dst, src);
+                }
+                
+                (void)fgInsertStmtNearEnd(block, fgMorphTree(opAssign));
+            }
+        }
+    }
+}
+
+/*****************************************************************************
+ * gsReplaceShadowParams (tree-walk call-back)
+ * Replace all vulnerable param uses by it's shadow copy.
+ */
+
+Compiler::fgWalkResult Compiler::gsReplaceShadowParams(GenTreePtr* pTree, fgWalkData* data)
+{
+    Compiler*  comp = data->compiler;
+    GenTreePtr tree = *pTree;
+    GenTreePtr asg  = nullptr;
+
+    if (tree->gtOper == GT_ASG)
+    {
+        asg  = tree;             // "asg" is the assignment tree.
+        tree = tree->gtOp.gtOp1; // "tree" is the local var tree at the left-hand size of the assignment.
+    }        
+    
+    if (tree->gtOper == GT_LCL_VAR || tree->gtOper == GT_LCL_FLD)
+    {
+        UINT paramNum = tree->gtLclVarCommon.gtLclNum;
+
+        if (!ShadowParamVarInfo::mayNeedShadowCopy(&comp->lvaTable[paramNum]) ||
+            comp->gsShadowVarInfo[paramNum].shadowCopy == NO_SHADOW_COPY)
+        {
+            return WALK_CONTINUE;
+        }
+
+        tree->gtLclVarCommon.SetLclNum(comp->gsShadowVarInfo[paramNum].shadowCopy);
+
+        // In gsParamsToShadows(), we create a shadow var of TYP_INT for every small type param.
+        // Make sure we update the type of the local var tree as well.
+        if (varTypeIsSmall(comp->lvaTable[paramNum].TypeGet()))
+        {
+            tree->gtType = TYP_INT;
+            if (asg) 
+            {
+                // If this is an assignment tree, propagate the type to it as well.
+                asg->gtType = TYP_INT;
+            }
+        }
+    }
+
+    return WALK_CONTINUE;
+}
diff --git a/src/jit/gtlist.h b/src/jit/gtlist.h
new file mode 100644
index 0000000000..a03bcfe4b0
--- /dev/null
+++ b/src/jit/gtlist.h
@@ -0,0 +1,255 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// clang-format off
+/*****************************************************************************/
+#ifndef GTNODE
+#error  Define GTNODE before including this file.
+#endif
+/*****************************************************************************/
+//
+//    Node enum
+//                   , "Node name"
+//                                  ,commutative
+//                                    ,operKind
+
+GTNODE(NONE       , "<none>"     ,0,GTK_SPECIAL)
+
+//-----------------------------------------------------------------------------
+//  Leaf nodes (i.e. these nodes have no sub-operands):
+//-----------------------------------------------------------------------------
+
+GTNODE(LCL_VAR       , "lclVar"     ,0,GTK_LEAF|GTK_LOCAL)             // local variable
+GTNODE(LCL_FLD       , "lclFld"     ,0,GTK_LEAF|GTK_LOCAL)             // field in a non-primitive variable
+GTNODE(LCL_VAR_ADDR  , "&lclVar"    ,0,GTK_LEAF)                       // address of local variable
+GTNODE(LCL_FLD_ADDR  , "&lclFld"    ,0,GTK_LEAF)                       // address of field in a non-primitive variable
+GTNODE(STORE_LCL_VAR , "st.lclVar"  ,0,GTK_UNOP|GTK_LOCAL|GTK_NOVALUE) // store to local variable
+GTNODE(STORE_LCL_FLD , "st.lclFld"  ,0,GTK_UNOP|GTK_LOCAL|GTK_NOVALUE) // store to field in a non-primitive variable
+GTNODE(CATCH_ARG     , "catchArg"   ,0,GTK_LEAF)                       // Exception object in a catch block
+GTNODE(LABEL         , "codeLabel"  ,0,GTK_LEAF)                       // Jump-target
+GTNODE(FTN_ADDR      , "ftnAddr"    ,0,GTK_LEAF)                       // Address of a function
+GTNODE(RET_EXPR      , "retExpr"    ,0,GTK_LEAF)                       // Place holder for the return expression from an inline candidate
+
+//-----------------------------------------------------------------------------
+//  Constant nodes:
+//-----------------------------------------------------------------------------
+
+GTNODE(CNS_INT    , "const"       ,0,GTK_LEAF|GTK_CONST)
+GTNODE(CNS_LNG    , "lconst"      ,0,GTK_LEAF|GTK_CONST)
+GTNODE(CNS_DBL    , "dconst"      ,0,GTK_LEAF|GTK_CONST)
+GTNODE(CNS_STR    , "sconst"      ,0,GTK_LEAF|GTK_CONST)
+
+//-----------------------------------------------------------------------------
+//  Unary  operators (1 operand):
+//-----------------------------------------------------------------------------
+
+GTNODE(NOT        , "~"             ,0,GTK_UNOP)
+GTNODE(NOP        , "nop"           ,0,GTK_UNOP)
+GTNODE(NEG        , "unary -"       ,0,GTK_UNOP)
+GTNODE(COPY       , "copy"          ,0,GTK_UNOP)             // Copies a variable from its current location to a register that satisfies
+                                                                // code generation constraints.  The child is the actual lclVar node.
+GTNODE(RELOAD     , "reload"        ,0,GTK_UNOP)
+GTNODE(CHS        , "flipsign"      ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)  // GT_CHS is actually unary -- op2 is ignored.
+                                                                // Changing to unary presently causes problems, though -- take a little work to fix.
+
+GTNODE(ARR_LENGTH , "arrLen"        ,0,GTK_UNOP|GTK_EXOP)    // array-length
+
+GTNODE(INTRINSIC  , "intrinsic"     ,0,GTK_BINOP|GTK_EXOP)   // intrinsics
+
+GTNODE(LOCKADD          , "lockAdd"       ,0,GTK_BINOP|GTK_NOVALUE)
+GTNODE(XADD             , "XAdd"          ,0,GTK_BINOP)
+GTNODE(XCHG             , "Xchg"          ,0,GTK_BINOP)
+GTNODE(CMPXCHG          , "cmpxchg"       ,0,GTK_SPECIAL)
+GTNODE(MEMORYBARRIER    , "memoryBarrier" ,0,GTK_LEAF|GTK_NOVALUE)
+
+GTNODE(CAST             , "cast"          ,0,GTK_UNOP|GTK_EXOP)    // conversion to another type
+GTNODE(CKFINITE         , "ckfinite"      ,0,GTK_UNOP)             // Check for NaN
+GTNODE(LCLHEAP          , "lclHeap"       ,0,GTK_UNOP)             // alloca()
+GTNODE(JMP              , "jump"          ,0,GTK_LEAF|GTK_NOVALUE) // Jump to another function
+
+
+GTNODE(ADDR             , "addr"          ,0,GTK_UNOP)              // address of
+GTNODE(IND              , "indir"         ,0,GTK_UNOP)              // load indirection
+GTNODE(STOREIND         , "storeIndir"    ,0,GTK_BINOP|GTK_NOVALUE) // store indirection
+
+                                                                      // TODO-Cleanup: GT_ARR_BOUNDS_CHECK should be made a GTK_BINOP now that it has only two child nodes
+GTNODE(ARR_BOUNDS_CHECK , "arrBndsChk"    ,0,GTK_SPECIAL|GTK_NOVALUE) // array bounds check
+GTNODE(OBJ              , "obj"           ,0,GTK_UNOP|GTK_EXOP)       // Object that MAY have gc pointers, and thus includes the relevant gc layout info.
+GTNODE(STORE_OBJ        , "storeObj"      ,0,GTK_BINOP|GTK_EXOP|GTK_NOVALUE) // Object that MAY have gc pointers, and thus includes the relevant gc layout info.
+GTNODE(BLK              , "blk"           ,0,GTK_UNOP)                // Block/object with no gc pointers, and with a known size (e.g. a struct with no gc fields)
+GTNODE(STORE_BLK        , "storeBlk"      ,0,GTK_BINOP|GTK_NOVALUE)   // Block/object with no gc pointers, and with a known size (e.g. a struct with no gc fields)
+GTNODE(DYN_BLK          , "DynBlk"        ,0,GTK_SPECIAL)             // Dynamically sized block object
+GTNODE(STORE_DYN_BLK    , "storeDynBlk"   ,0,GTK_SPECIAL|GTK_NOVALUE) // Dynamically sized block object
+GTNODE(BOX              , "box"           ,0,GTK_UNOP|GTK_EXOP|GTK_NOTLIR)
+
+#ifdef FEATURE_SIMD
+GTNODE(SIMD_CHK         , "simdChk"       ,0,GTK_SPECIAL|GTK_NOVALUE) // Compare whether an index is less than the given SIMD vector length, and call CORINFO_HELP_RNGCHKFAIL if not.
+                                                                   // TODO-CQ: In future may want to add a field that specifies different exceptions but we'll
+                                                                   // need VM assistance for that.
+                                                                   // TODO-CQ: It would actually be very nice to make this an unconditional throw, and expose the control flow that
+                                                                   // does the compare, so that it can be more easily optimized.  But that involves generating qmarks at import time...
+#endif // FEATURE_SIMD
+
+GTNODE(ALLOCOBJ         , "allocObj"      ,0,GTK_UNOP|GTK_EXOP) // object allocator
+
+//-----------------------------------------------------------------------------
+//  Binary operators (2 operands):
+//-----------------------------------------------------------------------------
+
+GTNODE(ADD        , "+"          ,1,GTK_BINOP)
+GTNODE(SUB        , "-"          ,0,GTK_BINOP)
+GTNODE(MUL        , "*"          ,1,GTK_BINOP)
+GTNODE(DIV        , "/"          ,0,GTK_BINOP)
+GTNODE(MOD        , "%"          ,0,GTK_BINOP)
+
+GTNODE(UDIV       , "un-/"       ,0,GTK_BINOP)
+GTNODE(UMOD       , "un-%"       ,0,GTK_BINOP)
+
+GTNODE(OR         , "|"          ,1,GTK_BINOP|GTK_LOGOP)
+GTNODE(XOR        , "^"          ,1,GTK_BINOP|GTK_LOGOP)
+GTNODE(AND        , "&"          ,1,GTK_BINOP|GTK_LOGOP)
+
+GTNODE(LSH        , "<<"         ,0,GTK_BINOP)
+GTNODE(RSH        , ">>"         ,0,GTK_BINOP)
+GTNODE(RSZ        , ">>>"        ,0,GTK_BINOP)
+GTNODE(ROL        , "rol"        ,0,GTK_BINOP)
+GTNODE(ROR        , "ror"        ,0,GTK_BINOP)
+GTNODE(MULHI      , "mulhi"      ,1,GTK_BINOP) // returns high bits (top N bits of the 2N bit result of an NxN multiply)
+
+GTNODE(ASG        , "="          ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_ADD    , "+="         ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_SUB    , "-="         ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_MUL    , "*="         ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_DIV    , "/="         ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_MOD    , "%="         ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+
+GTNODE(ASG_UDIV   , "/="         ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_UMOD   , "%="         ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+
+GTNODE(ASG_OR     , "|="         ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_XOR    , "^="         ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_AND    , "&="         ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_LSH    , "<<="        ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_RSH    , ">>="        ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+GTNODE(ASG_RSZ    , ">>>="       ,0,GTK_BINOP|GTK_ASGOP|GTK_NOTLIR)
+
+GTNODE(EQ         , "=="         ,0,GTK_BINOP|GTK_RELOP)
+GTNODE(NE         , "!="         ,0,GTK_BINOP|GTK_RELOP)
+GTNODE(LT         , "<"          ,0,GTK_BINOP|GTK_RELOP)
+GTNODE(LE         , "<="         ,0,GTK_BINOP|GTK_RELOP)
+GTNODE(GE         , ">="         ,0,GTK_BINOP|GTK_RELOP)
+GTNODE(GT         , ">"          ,0,GTK_BINOP|GTK_RELOP)
+
+GTNODE(COMMA      , "comma"      ,0,GTK_BINOP|GTK_NOTLIR)
+
+GTNODE(QMARK      , "qmark"      ,0,GTK_BINOP|GTK_EXOP|GTK_NOTLIR)
+GTNODE(COLON      , "colon"      ,0,GTK_BINOP|GTK_NOTLIR)
+
+GTNODE(INDEX      , "[]"         ,0,GTK_BINOP|GTK_EXOP|GTK_NOTLIR)   // SZ-array-element
+
+GTNODE(MKREFANY   , "mkrefany"   ,0,GTK_BINOP)
+
+GTNODE(LEA        , "lea"        ,0,GTK_BINOP|GTK_EXOP)
+
+#if !defined(LEGACY_BACKEND) && !defined(_TARGET_64BIT_)
+// A GT_LONG node simply represents the long value produced by the concatenation
+// of its two (lower and upper half) operands.  Some GT_LONG nodes are transient,
+// during the decomposing of longs; others are handled by codegen as operands of
+// nodes such as calls, returns and stores of long lclVars.
+GTNODE(LONG       , "gt_long"    ,0,GTK_BINOP)
+
+// The following are nodes representing the upper half of a 64-bit operation
+// that requires a carry/borrow.  However, they are all named GT_XXX_HI for
+// consistency.
+GTNODE(ADD_LO     , "+Lo"          ,1,GTK_BINOP)
+GTNODE(ADD_HI     , "+Hi"          ,1,GTK_BINOP)
+GTNODE(SUB_LO     , "-Lo"          ,0,GTK_BINOP)
+GTNODE(SUB_HI     , "-Hi"          ,0,GTK_BINOP)
+GTNODE(MUL_HI     , "*Hi"          ,1,GTK_BINOP)
+GTNODE(DIV_HI     , "/Hi"          ,0,GTK_BINOP)
+GTNODE(MOD_HI     , "%Hi"          ,0,GTK_BINOP)
+#endif // !defined(LEGACY_BACKEND) && !defined(_TARGET_64BIT_)
+
+#ifdef FEATURE_SIMD
+GTNODE(SIMD       , "simd"       ,0,GTK_BINOP|GTK_EXOP)   // SIMD functions/operators/intrinsics
+#endif // FEATURE_SIMD
+
+//-----------------------------------------------------------------------------
+//  Other nodes that look like unary/binary operators:
+//-----------------------------------------------------------------------------
+
+GTNODE(JTRUE      , "jmpTrue"    ,0,GTK_UNOP|GTK_NOVALUE)
+
+GTNODE(LIST       , "<list>"     ,0,GTK_BINOP)
+
+//-----------------------------------------------------------------------------
+//  Other nodes that have special structure:
+//-----------------------------------------------------------------------------
+
+GTNODE(FIELD      , "field"      ,0,GTK_SPECIAL)        // Member-field
+GTNODE(ARR_ELEM   , "arrMD&"     ,0,GTK_SPECIAL)        // Multi-dimensional array-element address
+GTNODE(ARR_INDEX  , "arrMDIdx"   ,0,GTK_BINOP|GTK_EXOP) // Effective, bounds-checked index for one dimension of a multi-dimensional array element
+GTNODE(ARR_OFFSET , "arrMDOffs"  ,0,GTK_SPECIAL)        // Flattened offset of multi-dimensional array element
+GTNODE(CALL       , "call()"     ,0,GTK_SPECIAL)
+
+//-----------------------------------------------------------------------------
+//  Statement operator nodes:
+//-----------------------------------------------------------------------------
+
+GTNODE(BEG_STMTS  , "begStmts"   ,0,GTK_SPECIAL|GTK_NOVALUE) // used only temporarily in importer by impBegin/EndTreeList()
+GTNODE(STMT       , "stmtExpr"   ,0,GTK_SPECIAL|GTK_NOVALUE) // top-level list nodes in bbTreeList
+
+GTNODE(RETURN     , "return"     ,0,GTK_UNOP|GTK_NOVALUE)    // return from current function
+GTNODE(SWITCH     , "switch"     ,0,GTK_UNOP|GTK_NOVALUE)    // switch
+
+GTNODE(NO_OP      , "no_op"      ,0,GTK_LEAF|GTK_NOVALUE)    // nop!
+
+GTNODE(START_NONGC, "start_nongc",0,GTK_LEAF|GTK_NOVALUE)    // starts a new instruction group that will be non-gc interruptible
+
+GTNODE(PROF_HOOK  , "prof_hook"  ,0,GTK_LEAF|GTK_NOVALUE)    // profiler Enter/Leave/TailCall hook
+
+GTNODE(RETFILT    , "retfilt",    0,GTK_UNOP|GTK_NOVALUE)    // end filter with TYP_I_IMPL return value
+#if !FEATURE_EH_FUNCLETS
+GTNODE(END_LFIN   , "endLFin"    ,0,GTK_LEAF|GTK_NOVALUE)    // end locally-invoked finally
+#endif // !FEATURE_EH_FUNCLETS
+
+//-----------------------------------------------------------------------------
+//  Nodes used for optimizations.
+//-----------------------------------------------------------------------------
+
+GTNODE(PHI        , "phi"        ,0,GTK_UNOP)            // phi node for ssa.
+GTNODE(PHI_ARG    , "phiArg"     ,0,GTK_LEAF|GTK_LOCAL)  // phi(phiarg, phiarg, phiarg)
+
+//-----------------------------------------------------------------------------
+//  Nodes used by Lower to generate a closer CPU representation of other nodes
+//-----------------------------------------------------------------------------
+
+GTNODE(JMPTABLE    , "jumpTable"  , 0, GTK_LEAF)               // Generates the jump table for switches
+GTNODE(SWITCH_TABLE, "tableSwitch", 0, GTK_BINOP|GTK_NOVALUE)  // Jump Table based switch construct
+
+//-----------------------------------------------------------------------------
+//  Nodes used only within the code generator:
+//-----------------------------------------------------------------------------
+
+GTNODE(REG_VAR      , "regVar"        ,0,GTK_LEAF|GTK_LOCAL)    // register variable
+GTNODE(CLS_VAR      , "clsVar"        ,0,GTK_LEAF)              // static data member
+GTNODE(CLS_VAR_ADDR , "&clsVar"       ,0,GTK_LEAF)              // static data member address
+GTNODE(STORE_CLS_VAR, "st.clsVar"     ,0,GTK_LEAF|GTK_NOVALUE)  // store to static data member
+GTNODE(ARGPLACE     , "argPlace"      ,0,GTK_LEAF)              // placeholder for a register arg
+GTNODE(NULLCHECK    , "nullcheck"     ,0,GTK_UNOP|GTK_NOVALUE)  // null checks the source
+GTNODE(PHYSREG      , "physregSrc"    ,0,GTK_LEAF)              // read from a physical register
+GTNODE(PHYSREGDST   , "physregDst"    ,0,GTK_UNOP|GTK_NOVALUE)  // write to a physical register
+GTNODE(EMITNOP      , "emitnop"       ,0,GTK_LEAF|GTK_NOVALUE)  // emitter-placed nop
+GTNODE(PINVOKE_PROLOG,"pinvoke_prolog",0,GTK_LEAF|GTK_NOVALUE)  // pinvoke prolog seq
+GTNODE(PINVOKE_EPILOG,"pinvoke_epilog",0,GTK_LEAF|GTK_NOVALUE)  // pinvoke epilog seq
+GTNODE(PUTARG_REG   , "putarg_reg"    ,0,GTK_UNOP)              // operator that places outgoing arg in register
+GTNODE(PUTARG_STK   , "putarg_stk"    ,0,GTK_UNOP)              // operator that places outgoing arg in stack
+GTNODE(RETURNTRAP   , "returnTrap"    ,0,GTK_UNOP|GTK_NOVALUE)  // a conditional call to wait on gc
+GTNODE(SWAP         , "swap"          ,0,GTK_BINOP|GTK_NOVALUE) // op1 and op2 swap (registers)
+GTNODE(IL_OFFSET    , "il_offset"     ,0,GTK_LEAF|GTK_NOVALUE)  // marks an IL offset for debugging purposes
+
+/*****************************************************************************/
+#undef  GTNODE
+/*****************************************************************************/
+// clang-format on
diff --git a/src/jit/gtstructs.h b/src/jit/gtstructs.h
new file mode 100644
index 0000000000..895d3b6598
--- /dev/null
+++ b/src/jit/gtstructs.h
@@ -0,0 +1,112 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// clang-format off
+
+/*****************************************************************************/
+
+#ifndef GTSTRUCT_0
+#error  Define GTSTRUCT_0 before including this file.
+#endif
+
+#ifndef GTSTRUCT_1
+#error  Define GTSTRUCT_1 before including this file.
+#endif
+
+#ifndef GTSTRUCT_2
+#error  Define GTSTRUCT_2 before including this file.
+#endif
+
+#ifndef GTSTRUCT_3
+#error  Define GTSTRUCT_3 before including this file.
+#endif
+
+#ifndef GTSTRUCT_4
+#error  Define GTSTRUCT_4 before including this file.
+#endif
+
+#ifndef GTSTRUCT_N
+#error  Define GTSTRUCT_N before including this file.
+#endif
+
+/*****************************************************************************/
+
+//
+//       Field name    , Allowed node enum(s)
+//                                  
+
+GTSTRUCT_0(UnOp        , GT_OP)
+GTSTRUCT_0(Op          , GT_OP)
+#if !FEATURE_EH_FUNCLETS
+GTSTRUCT_2(Val         , GT_END_LFIN, GT_JMP)
+#else
+GTSTRUCT_1(Val         , GT_JMP)
+#endif
+#ifndef LEGACY_BACKEND
+GTSTRUCT_3(IntConCommon, GT_CNS_INT, GT_CNS_LNG, GT_JMPTABLE)
+GTSTRUCT_1(JumpTable   , GT_JMPTABLE)
+#else // LEGACY_BACKEND
+GTSTRUCT_2(IntConCommon, GT_CNS_INT, GT_CNS_LNG)
+#endif// LEGACY_BACKEND
+GTSTRUCT_1(IntCon      , GT_CNS_INT)
+GTSTRUCT_1(LngCon      , GT_CNS_LNG)
+GTSTRUCT_1(DblCon      , GT_CNS_DBL) 
+GTSTRUCT_1(StrCon      , GT_CNS_STR) 
+GTSTRUCT_N(LclVarCommon, GT_LCL_VAR, GT_LCL_FLD, GT_REG_VAR, GT_PHI_ARG, GT_STORE_LCL_VAR, GT_STORE_LCL_FLD, GT_LCL_VAR_ADDR, GT_LCL_FLD_ADDR) 
+GTSTRUCT_3(LclVar      , GT_LCL_VAR, GT_LCL_VAR_ADDR, GT_STORE_LCL_VAR) 
+#ifndef LEGACY_BACKEND
+GTSTRUCT_3(LclFld      , GT_LCL_FLD, GT_STORE_LCL_FLD, GT_LCL_FLD_ADDR)
+#else // LEGACY_BACKEND
+GTSTRUCT_1(LclFld      , GT_LCL_FLD)
+#endif // LEGACY_BACKEND
+GTSTRUCT_1(RegVar      , GT_REG_VAR)
+GTSTRUCT_1(Cast        , GT_CAST)
+GTSTRUCT_1(Box         , GT_BOX)
+GTSTRUCT_1(Field       , GT_FIELD) 
+GTSTRUCT_1(Call        , GT_CALL) 
+GTSTRUCT_1(ArgList     , GT_LIST)
+GTSTRUCT_1(Colon       , GT_COLON)
+GTSTRUCT_1(FptrVal     , GT_FTN_ADDR)
+GTSTRUCT_1(Intrinsic   , GT_INTRINSIC) 
+GTSTRUCT_1(Index       , GT_INDEX)
+#ifdef FEATURE_SIMD
+GTSTRUCT_2(BoundsChk   , GT_ARR_BOUNDS_CHECK, GT_SIMD_CHK)
+#else // !FEATURE_SIMD
+GTSTRUCT_1(BoundsChk   , GT_ARR_BOUNDS_CHECK)
+#endif  // !FEATURE_SIMD
+GTSTRUCT_1(ArrLen      , GT_ARR_LENGTH)
+GTSTRUCT_1(ArrElem     , GT_ARR_ELEM) 
+GTSTRUCT_1(ArrOffs     , GT_ARR_OFFSET)
+GTSTRUCT_1(ArrIndex    , GT_ARR_INDEX) 
+GTSTRUCT_1(RetExpr     , GT_RET_EXPR) 
+GTSTRUCT_2(Stmt        , GT_STMT, GT_IL_OFFSET) 
+GTSTRUCT_2(CopyOrReload, GT_COPY, GT_RELOAD)
+GTSTRUCT_2(ClsVar      , GT_CLS_VAR, GT_CLS_VAR_ADDR) 
+GTSTRUCT_1(ArgPlace    , GT_ARGPLACE) 
+GTSTRUCT_1(Label       , GT_LABEL) 
+GTSTRUCT_1(CmpXchg     , GT_CMPXCHG)
+GTSTRUCT_1(AddrMode    , GT_LEA)
+GTSTRUCT_N(Blk         , GT_BLK, GT_STORE_BLK, GT_OBJ, GT_STORE_OBJ, GT_DYN_BLK, GT_STORE_DYN_BLK)
+GTSTRUCT_2(Obj         , GT_OBJ, GT_STORE_OBJ)
+GTSTRUCT_2(DynBlk      , GT_DYN_BLK, GT_STORE_DYN_BLK)
+GTSTRUCT_1(Qmark       , GT_QMARK)
+GTSTRUCT_1(PhiArg      , GT_PHI_ARG)
+GTSTRUCT_1(StoreInd    , GT_STOREIND)
+GTSTRUCT_N(Indir       , GT_STOREIND, GT_IND, GT_NULLCHECK, GT_BLK, GT_STORE_BLK, GT_OBJ, GT_STORE_OBJ, GT_DYN_BLK, GT_STORE_DYN_BLK)
+GTSTRUCT_1(PutArgStk   , GT_PUTARG_STK)
+GTSTRUCT_1(PhysReg     , GT_PHYSREG)
+#ifdef FEATURE_SIMD
+GTSTRUCT_1(SIMD        , GT_SIMD) 
+#endif // FEATURE_SIMD
+GTSTRUCT_1(AllocObj    , GT_ALLOCOBJ)
+/*****************************************************************************/
+#undef  GTSTRUCT_0
+#undef  GTSTRUCT_1
+#undef  GTSTRUCT_2
+#undef  GTSTRUCT_3
+#undef  GTSTRUCT_4
+#undef  GTSTRUCT_N
+/*****************************************************************************/
+
+// clang-format on
diff --git a/src/jit/hashbv.cpp b/src/jit/hashbv.cpp
new file mode 100644
index 0000000000..fa06ec7b1e
--- /dev/null
+++ b/src/jit/hashbv.cpp
@@ -0,0 +1,2028 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+// --------------------------------------------------------------------
+// --------------------------------------------------------------------
+
+#ifdef DEBUG
+void hashBvNode::dump()
+{
+    printf("base: %d { ", baseIndex);
+    this->foreachBit(pBit);
+    printf("}\n");
+}
+#endif // DEBUG
+
+void hashBvNode::Reconstruct(indexType base)
+{
+    baseIndex = base;
+
+    assert(!(baseIndex % BITS_PER_NODE));
+
+    for (int i = 0; i < this->numElements(); i++)
+    {
+        elements[i] = 0;
+    }
+    next = nullptr;
+}
+
+hashBvNode::hashBvNode(indexType base)
+{
+    this->Reconstruct(base);
+}
+
+hashBvNode* hashBvNode::Create(indexType base, Compiler* compiler)
+{
+    hashBvNode* result = nullptr;
+
+    if (compiler->hbvGlobalData.hbvNodeFreeList)
+    {
+        result                                  = compiler->hbvGlobalData.hbvNodeFreeList;
+        compiler->hbvGlobalData.hbvNodeFreeList = result->next;
+    }
+    else
+    {
+        result = new (compiler, CMK_hashBv) hashBvNode;
+    }
+    result->Reconstruct(base);
+    return result;
+}
+
+void hashBvNode::freeNode(hashBvGlobalData* glob)
+{
+    this->next            = glob->hbvNodeFreeList;
+    glob->hbvNodeFreeList = this;
+}
+
+void hashBvNode::setBit(indexType base)
+{
+    assert(base >= baseIndex);
+    assert(base - baseIndex < BITS_PER_NODE);
+
+    base -= baseIndex;
+    indexType elem = base / BITS_PER_ELEMENT;
+    indexType posi = base % BITS_PER_ELEMENT;
+
+    elements[elem] |= indexType(1) << posi;
+}
+
+void hashBvNode::setLowest(indexType numToSet)
+{
+    assert(numToSet <= BITS_PER_NODE);
+
+    int elemIndex = 0;
+    while (numToSet > BITS_PER_ELEMENT)
+    {
+        elements[elemIndex] = ~(elemType(0));
+        numToSet -= BITS_PER_ELEMENT;
+        elemIndex++;
+    }
+    if (numToSet)
+    {
+        elemType allOnes    = ~(elemType(0));
+        int      numToShift = (int)(BITS_PER_ELEMENT - numToSet);
+        elements[elemIndex] = allOnes >> numToShift;
+    }
+}
+
+void hashBvNode::clrBit(indexType base)
+{
+    assert(base >= baseIndex);
+    assert(base - baseIndex < BITS_PER_NODE);
+
+    base -= baseIndex;
+    indexType elem = base / BITS_PER_ELEMENT;
+    indexType posi = base % BITS_PER_ELEMENT;
+
+    elements[elem] &= ~(indexType(1) << posi);
+}
+
+bool hashBvNode::belongsIn(indexType index)
+{
+    if (index < baseIndex)
+    {
+        return false;
+    }
+    if (index >= baseIndex + BITS_PER_NODE)
+    {
+        return false;
+    }
+    return true;
+}
+
+int countBitsInWord(unsigned int bits)
+{
+    // In-place adder tree: perform 16 1-bit adds, 8 2-bit adds,
+    // 4 4-bit adds, 2 8=bit adds, and 1 16-bit add.
+    bits = ((bits >> 1) & 0x55555555) + (bits & 0x55555555);
+    bits = ((bits >> 2) & 0x33333333) + (bits & 0x33333333);
+    bits = ((bits >> 4) & 0x0F0F0F0F) + (bits & 0x0F0F0F0F);
+    bits = ((bits >> 8) & 0x00FF00FF) + (bits & 0x00FF00FF);
+    bits = ((bits >> 16) & 0x0000FFFF) + (bits & 0x0000FFFF);
+    return (int)bits;
+}
+
+int countBitsInWord(unsigned __int64 bits)
+{
+    bits = ((bits >> 1) & 0x5555555555555555) + (bits & 0x5555555555555555);
+    bits = ((bits >> 2) & 0x3333333333333333) + (bits & 0x3333333333333333);
+    bits = ((bits >> 4) & 0x0F0F0F0F0F0F0F0F) + (bits & 0x0F0F0F0F0F0F0F0F);
+    bits = ((bits >> 8) & 0x00FF00FF00FF00FF) + (bits & 0x00FF00FF00FF00FF);
+    bits = ((bits >> 16) & 0x0000FFFF0000FFFF) + (bits & 0x0000FFFF0000FFFF);
+    bits = ((bits >> 32) & 0x00000000FFFFFFFF) + (bits & 0x00000000FFFFFFFF);
+    return (int)bits;
+}
+
+int hashBvNode::countBits()
+{
+    int result = 0;
+
+    for (int i = 0; i < this->numElements(); i++)
+    {
+        elemType bits = elements[i];
+
+        result += countBitsInWord(bits);
+
+        result += (int)bits;
+    }
+    return result;
+}
+
+bool hashBvNode::anyBits()
+{
+    for (int i = 0; i < this->numElements(); i++)
+    {
+        if (elements[i])
+        {
+            return true;
+        }
+    }
+    return false;
+}
+
+bool hashBvNode::getBit(indexType base)
+{
+    assert(base >= baseIndex);
+    assert(base - baseIndex < BITS_PER_NODE);
+    base -= baseIndex;
+
+    indexType elem = base / BITS_PER_ELEMENT;
+    indexType posi = base % BITS_PER_ELEMENT;
+
+    if (elements[elem] & (indexType(1) << posi))
+    {
+        return true;
+    }
+    else
+    {
+        return false;
+    }
+}
+
+bool hashBvNode::anySet()
+{
+    for (int i = 0; i < this->numElements(); i++)
+    {
+        if (elements[i])
+        {
+            return true;
+        }
+    }
+    return false;
+}
+
+void hashBvNode::copyFrom(hashBvNode* other)
+{
+    this->baseIndex = other->baseIndex;
+    for (int i = 0; i < this->numElements(); i++)
+    {
+        this->elements[i] = other->elements[i];
+    }
+}
+
+void hashBvNode::foreachBit(bitAction a)
+{
+    indexType base;
+    for (int i = 0; i < this->numElements(); i++)
+    {
+        base       = baseIndex + i * BITS_PER_ELEMENT;
+        elemType e = elements[i];
+        while (e)
+        {
+            if (e & 1)
+            {
+                a(base);
+            }
+            e >>= 1;
+            base++;
+        }
+    }
+}
+
+elemType hashBvNode::AndWithChange(hashBvNode* other)
+{
+    elemType result = 0;
+
+    for (int i = 0; i < this->numElements(); i++)
+    {
+        elemType src = this->elements[i];
+        elemType dst;
+
+        dst = src & other->elements[i];
+        result |= src ^ dst;
+        this->elements[i] = dst;
+    }
+    return result;
+}
+
+elemType hashBvNode::OrWithChange(hashBvNode* other)
+{
+    elemType result = 0;
+
+    for (int i = 0; i < this->numElements(); i++)
+    {
+        elemType src = this->elements[i];
+        elemType dst;
+
+        dst = src | other->elements[i];
+        result |= src ^ dst;
+        this->elements[i] = dst;
+    }
+    return result;
+}
+
+elemType hashBvNode::XorWithChange(hashBvNode* other)
+{
+    elemType result = 0;
+
+    for (int i = 0; i < this->numElements(); i++)
+    {
+        elemType src = this->elements[i];
+        elemType dst;
+
+        dst = src ^ other->elements[i];
+        result |= src ^ dst;
+        this->elements[i] = dst;
+    }
+    return result;
+}
+
+elemType hashBvNode::SubtractWithChange(hashBvNode* other)
+{
+    elemType result = 0;
+
+    for (int i = 0; i < this->numElements(); i++)
+    {
+        elemType src = this->elements[i];
+        elemType dst;
+
+        dst = src & ~other->elements[i];
+        result |= src ^ dst;
+        this->elements[i] = dst;
+    }
+    return result;
+}
+
+bool hashBvNode::Intersects(hashBvNode* other)
+{
+    for (int i = 0; i < this->numElements(); i++)
+    {
+        if ((this->elements[i] & other->elements[i]) != 0)
+        {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+void hashBvNode::AndWith(hashBvNode* other)
+{
+    for (int i = 0; i < this->numElements(); i++)
+    {
+        this->elements[i] &= other->elements[i];
+    }
+}
+
+void hashBvNode::OrWith(hashBvNode* other)
+{
+    for (int i = 0; i < this->numElements(); i++)
+    {
+        this->elements[i] |= other->elements[i];
+    }
+}
+
+void hashBvNode::XorWith(hashBvNode* other)
+{
+    for (int i = 0; i < this->numElements(); i++)
+    {
+        this->elements[i] ^= other->elements[i];
+    }
+}
+
+void hashBvNode::Subtract(hashBvNode* other)
+{
+    for (int i = 0; i < this->numElements(); i++)
+    {
+        this->elements[i] &= ~other->elements[i];
+    }
+}
+
+bool hashBvNode::sameAs(hashBvNode* other)
+{
+    if (this->baseIndex != other->baseIndex)
+    {
+        return false;
+    }
+
+    for (int i = 0; i < this->numElements(); i++)
+    {
+        if (this->elements[i] != other->elements[i])
+        {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+// --------------------------------------------------------------------
+// --------------------------------------------------------------------
+
+hashBv::hashBv(Compiler* comp)
+{
+    this->compiler      = comp;
+    this->log2_hashSize = globalData()->hbvHashSizeLog2;
+
+    int hts = hashtable_size();
+    nodeArr = getNewVector(hts);
+
+    for (int i = 0; i < hts; i++)
+    {
+        nodeArr[i] = nullptr;
+    }
+    this->numNodes = 0;
+}
+
+hashBv* hashBv::Create(Compiler* compiler)
+{
+    hashBv*           result;
+    hashBvGlobalData* gd = &compiler->hbvGlobalData;
+
+    if (hbvFreeList(gd))
+    {
+        result          = hbvFreeList(gd);
+        hbvFreeList(gd) = result->next;
+        assert(result->nodeArr);
+    }
+    else
+    {
+        result = new (compiler, CMK_hashBv) hashBv(compiler);
+        memset(result, 0, sizeof(hashBv));
+        result->nodeArr = result->initialVector;
+    }
+
+    result->compiler      = compiler;
+    result->log2_hashSize = 0;
+    result->numNodes      = 0;
+
+    return result;
+}
+
+void hashBv::Init(Compiler* compiler)
+{
+    memset(&compiler->hbvGlobalData, 0, sizeof(hashBvGlobalData));
+}
+
+hashBvGlobalData* hashBv::globalData()
+{
+    return &compiler->hbvGlobalData;
+}
+
+hashBvNode** hashBv::getNewVector(int vectorLength)
+{
+    assert(vectorLength > 0);
+    assert(isPow2(vectorLength));
+
+    hashBvNode** newVector = new (compiler, CMK_hashBv) hashBvNode*[vectorLength]();
+    return newVector;
+}
+
+hashBvNode*& hashBv::nodeFreeList(hashBvGlobalData* data)
+{
+    return data->hbvNodeFreeList;
+}
+
+hashBv*& hashBv::hbvFreeList(hashBvGlobalData* data)
+{
+    return data->hbvFreeList;
+}
+
+void hashBv::freeVector(hashBvNode* vect, int vectorLength)
+{
+    // not enough space to do anything with it
+    if (vectorLength < 2)
+    {
+        return;
+    }
+
+    hbvFreeListNode* f              = (hbvFreeListNode*)vect;
+    f->next                         = globalData()->hbvFreeVectorList;
+    globalData()->hbvFreeVectorList = f;
+    f->size                         = vectorLength;
+}
+
+void hashBv::hbvFree()
+{
+    Compiler* comp = this->compiler;
+
+    int hts = hashtable_size();
+    for (int i = 0; i < hts; i++)
+    {
+        while (nodeArr[i])
+        {
+            hashBvNode* curr = nodeArr[i];
+            nodeArr[i]       = curr->next;
+            curr->freeNode(globalData());
+        }
+    }
+    // keep the vector attached because the whole thing is freelisted
+    // plus you don't even know if it's freeable
+
+    this->next                = hbvFreeList(globalData());
+    hbvFreeList(globalData()) = this;
+}
+
+hashBv* hashBv::CreateFrom(hashBv* other, Compiler* comp)
+{
+    hashBv* result = hashBv::Create(comp);
+    result->copyFrom(other, comp);
+    return result;
+}
+
+void hashBv::MergeLists(hashBvNode** root1, hashBvNode** root2)
+{
+}
+
+bool hashBv::TooSmall()
+{
+    return this->numNodes > this->hashtable_size() * 4;
+}
+
+bool hashBv::TooBig()
+{
+    return this->hashtable_size() > this->numNodes * 4;
+}
+
+int hashBv::getNodeCount()
+{
+    int size   = hashtable_size();
+    int result = 0;
+
+    for (int i = 0; i < size; i++)
+    {
+        hashBvNode* last = nodeArr[i];
+
+        while (last)
+        {
+            last = last->next;
+            result++;
+        }
+    }
+    return result;
+}
+
+bool hashBv::IsValid()
+{
+    int size = hashtable_size();
+    // is power of 2
+    assert(((size - 1) & size) == 0);
+
+    for (int i = 0; i < size; i++)
+    {
+        hashBvNode* last = nodeArr[i];
+        hashBvNode* curr;
+        int         lastIndex = -1;
+
+        while (last)
+        {
+            // the node has been hashed correctly
+            assert((int)last->baseIndex > lastIndex);
+            lastIndex = (int)last->baseIndex;
+            assert(i == getHashForIndex(last->baseIndex, size));
+            curr = last->next;
+            // the order is monotonically increasing bases
+            if (curr)
+            {
+                assert(curr->baseIndex > last->baseIndex);
+            }
+            last = curr;
+        }
+    }
+    return true;
+}
+
+void hashBv::Resize()
+{
+    // resize to 'optimal' size
+
+    this->Resize(this->numNodes);
+}
+
+void hashBv::Resize(int newSize)
+{
+    assert(newSize > 0);
+    newSize = nearest_pow2(newSize);
+
+    int oldSize = hashtable_size();
+
+    if (newSize == oldSize)
+    {
+        return;
+    }
+
+    int oldSizeLog2  = log2_hashSize;
+    int log2_newSize = genLog2((unsigned)newSize);
+    int size;
+
+    hashBvNode** newNodes = this->getNewVector(newSize);
+
+    hashBvNode*** insertionPoints = (hashBvNode***)alloca(sizeof(hashBvNode*) * newSize);
+    memset(insertionPoints, 0, sizeof(hashBvNode*) * newSize);
+
+    for (int i = 0; i < newSize; i++)
+    {
+        insertionPoints[i] = &(newNodes[i]);
+    }
+
+    if (newSize > oldSize)
+    {
+        // for each src list, expand it into multiple dst lists
+        for (int i = 0; i < oldSize; i++)
+        {
+            hashBvNode* next = nodeArr[i];
+
+            while (next)
+            {
+                hashBvNode* curr = next;
+                next             = curr->next;
+                int destination  = getHashForIndex(curr->baseIndex, newSize);
+
+                // ...
+
+                // stick the current node on the end of the selected list
+                *(insertionPoints[destination]) = curr;
+                insertionPoints[destination]    = &(curr->next);
+                curr->next                      = nullptr;
+            }
+        }
+        nodeArr       = newNodes;
+        log2_hashSize = (unsigned short)log2_newSize;
+    }
+    else if (oldSize > newSize)
+    {
+        int shrinkFactor = oldSize / newSize;
+
+        // shrink multiple lists into one list
+        // more efficient ways to do this but...
+        // if the lists are long, you shouldn't be shrinking.
+        for (int i = 0; i < oldSize; i++)
+        {
+            hashBvNode* next = nodeArr[i];
+
+            if (next)
+            {
+                // all nodes in this list should have the same destination list
+                int          destination    = getHashForIndex(next->baseIndex, newSize);
+                hashBvNode** insertionPoint = &newNodes[destination];
+                do
+                {
+                    hashBvNode* curr = next;
+                    // figure out where to insert it
+                    while (*insertionPoint && (*insertionPoint)->baseIndex < curr->baseIndex)
+                    {
+                        insertionPoint = &((*insertionPoint)->next);
+                    }
+                    next = curr->next;
+
+                    hashBvNode* temp = *insertionPoint;
+                    *insertionPoint  = curr;
+                    curr->next       = temp;
+
+                } while (next);
+            }
+        }
+        nodeArr       = newNodes;
+        log2_hashSize = (unsigned short)log2_newSize;
+    }
+    else
+    {
+        // same size
+        assert(oldSize == newSize);
+    }
+    assert(this->IsValid());
+}
+
+#ifdef DEBUG
+void hashBv::dump()
+{
+    bool      first = true;
+    indexType index;
+
+    // uncomment to print internal implementation details
+    // DBEXEC(TRUE, printf("[%d(%d)(nodes:%d)]{ ", hashtable_size(), countBits(), this->numNodes));
+
+    printf("{");
+    FOREACH_HBV_BIT_SET(index, this)
+    {
+        if (!first)
+        {
+            printf(" ");
+        }
+        printf("%d", index);
+        first = false;
+    }
+    NEXT_HBV_BIT_SET;
+    printf("}\n");
+}
+
+void hashBv::dumpFancy()
+{
+    indexType index;
+    indexType last_1 = -1;
+    indexType last_0 = -1;
+
+    printf("{");
+    printf("count:%d", this->countBits());
+    FOREACH_HBV_BIT_SET(index, this)
+    {
+        if (last_1 != index - 1)
+        {
+            if (last_0 + 1 != last_1)
+            {
+                printf(" %d-%d", last_0 + 1, last_1);
+            }
+            else
+            {
+                printf(" %d", last_1);
+            }
+            last_0 = index - 1;
+        }
+        last_1 = index;
+    }
+    NEXT_HBV_BIT_SET;
+
+    // Print the last one
+    if (last_0 + 1 != last_1)
+    {
+        printf(" %d-%d", last_0 + 1, last_1);
+    }
+    else
+    {
+        printf(" %d", last_1);
+    }
+
+    printf("}\n");
+}
+#endif // DEBUG
+
+void hashBv::removeNodeAtBase(indexType index)
+{
+    hashBvNode** insertionPoint = this->getInsertionPointForIndex(index);
+
+    hashBvNode* node = *insertionPoint;
+
+    // make sure that we were called to remove something
+    // that really was there
+    assert(node);
+
+    // splice it out
+    *insertionPoint = node->next;
+    this->numNodes--;
+}
+
+int hashBv::getHashForIndex(indexType index, int table_size)
+{
+    indexType hashIndex;
+
+    hashIndex = index >> LOG2_BITS_PER_NODE;
+    hashIndex &= (table_size - 1);
+
+    return (int)hashIndex;
+}
+
+int hashBv::getRehashForIndex(indexType thisIndex, int thisTableSize, int newTableSize)
+{
+    assert(0);
+    return 0;
+}
+
+hashBvNode** hashBv::getInsertionPointForIndex(indexType index)
+{
+    indexType indexInNode;
+    indexType hashIndex;
+    indexType baseIndex;
+
+    hashBvNode* result;
+
+    hashIndex = getHashForIndex(index, hashtable_size());
+
+    baseIndex   = index & ~(BITS_PER_NODE - 1);
+    indexInNode = index & (BITS_PER_NODE - 1);
+
+    // printf("(%x) : hsh=%x, base=%x, index=%x\n", index,
+    //      hashIndex, baseIndex, indexInNode);
+
+    // find the node
+    hashBvNode** prev = &nodeArr[hashIndex];
+    result            = nodeArr[hashIndex];
+
+    while (result)
+    {
+        if (result->baseIndex == baseIndex)
+        {
+            return prev;
+        }
+        else if (result->baseIndex > baseIndex)
+        {
+            return prev;
+        }
+        else
+        {
+            prev   = &(result->next);
+            result = result->next;
+        }
+    }
+    return prev;
+}
+
+hashBvNode* hashBv::getNodeForIndexHelper(indexType index, bool canAdd)
+{
+    // determine the base index of the node containing this index
+    index = index & ~(BITS_PER_NODE - 1);
+
+    hashBvNode** prev = getInsertionPointForIndex(index);
+
+    hashBvNode* node = *prev;
+
+    if (node && node->belongsIn(index))
+    {
+        return node;
+    }
+    else if (canAdd)
+    {
+        // missing node, insert it before the current one
+        hashBvNode* temp = hashBvNode::Create(index, this->compiler);
+        temp->next       = node;
+        *prev            = temp;
+        this->numNodes++;
+        return temp;
+    }
+    else
+    {
+        return nullptr;
+    }
+}
+
+hashBvNode* hashBv::getNodeForIndex(indexType index)
+{
+    // determine the base index of the node containing this index
+    index = index & ~(BITS_PER_NODE - 1);
+
+    hashBvNode** prev = getInsertionPointForIndex(index);
+
+    hashBvNode* node = *prev;
+
+    if (node && node->belongsIn(index))
+    {
+        return node;
+    }
+    else
+    {
+        return nullptr;
+    }
+}
+
+void hashBv::setBit(indexType index)
+{
+    assert(index >= 0);
+    assert(this->numNodes == this->getNodeCount());
+    hashBvNode* result = nullptr;
+
+    indexType baseIndex = index & ~(BITS_PER_NODE - 1);
+    indexType base      = index - baseIndex;
+    indexType elem      = base / BITS_PER_ELEMENT;
+    indexType posi      = base % BITS_PER_ELEMENT;
+
+    // this should be the 99% case :  when there is only one node in the structure
+    if ((result = nodeArr[0]) && result->baseIndex == baseIndex)
+    {
+        result->elements[elem] |= indexType(1) << posi;
+        return;
+    }
+
+    result = getOrAddNodeForIndex(index);
+    result->setBit(index);
+
+    assert(this->numNodes == this->getNodeCount());
+
+    // if it's getting out of control resize it
+    if (this->numNodes > this->hashtable_size() * 4)
+    {
+        this->Resize();
+    }
+
+    return;
+}
+
+void hashBv::setAll(indexType numToSet)
+{
+    // TODO-Throughput: this could be more efficient
+    for (unsigned int i = 0; i < numToSet; i += BITS_PER_NODE)
+    {
+        hashBvNode* node        = getOrAddNodeForIndex(i);
+        indexType   bits_to_set = min(BITS_PER_NODE, numToSet - i);
+        node->setLowest(bits_to_set);
+    }
+}
+
+void hashBv::clearBit(indexType index)
+{
+    assert(index >= 0);
+    assert(this->numNodes == this->getNodeCount());
+    hashBvNode* result = nullptr;
+
+    indexType baseIndex = index & ~(BITS_PER_NODE - 1);
+    indexType hashIndex = getHashForIndex(index, hashtable_size());
+
+    hashBvNode** prev = &nodeArr[hashIndex];
+    result            = nodeArr[hashIndex];
+
+    while (result)
+    {
+        if (result->baseIndex == baseIndex)
+        {
+            result->clrBit(index);
+            // if nothing left set free it
+            if (!result->anySet())
+            {
+                *prev = result->next;
+                result->freeNode(globalData());
+                this->numNodes--;
+            }
+            return;
+        }
+        else if (result->baseIndex > baseIndex)
+        {
+            return;
+        }
+        else
+        {
+            prev   = &(result->next);
+            result = result->next;
+        }
+    }
+    assert(this->numNodes == this->getNodeCount());
+    return;
+}
+
+bool hashBv::testBit(indexType index)
+{
+    // determine the base index of the node containing this index
+    indexType baseIndex = index & ~(BITS_PER_NODE - 1);
+    // 99% case
+    if (nodeArr[0] && nodeArr[0]->baseIndex == baseIndex)
+    {
+        return nodeArr[0]->getBit(index);
+    }
+
+    indexType hashIndex = getHashForIndex(baseIndex, hashtable_size());
+
+    hashBvNode* iter = nodeArr[hashIndex];
+
+    while (iter)
+    {
+        if (iter->baseIndex == baseIndex)
+        {
+            return iter->getBit(index);
+        }
+        else
+        {
+            iter = iter->next;
+        }
+    }
+    return false;
+}
+
+int hashBv::countBits()
+{
+    int result = 0;
+    int hts    = this->hashtable_size();
+    for (int hashNum = 0; hashNum < hts; hashNum++)
+    {
+        hashBvNode* node = nodeArr[hashNum];
+        while (node)
+        {
+            result += node->countBits();
+            node = node->next;
+        }
+    }
+    return result;
+}
+
+bool hashBv::anySet()
+{
+    int result = 0;
+
+    int hts = this->hashtable_size();
+    for (int hashNum = 0; hashNum < hts; hashNum++)
+    {
+        hashBvNode* node = nodeArr[hashNum];
+        while (node)
+        {
+            if (node->anySet())
+            {
+                return true;
+            }
+            node = node->next;
+        }
+    }
+    return false;
+}
+
+class AndAction
+{
+public:
+    static inline void PreAction(hashBv* lhs, hashBv* rhs)
+    {
+    }
+    static inline void PostAction(hashBv* lhs, hashBv* rhs)
+    {
+    }
+    static inline bool DefaultResult()
+    {
+        return false;
+    }
+
+    static inline void LeftGap(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+    {
+        // it's in other, not this
+        // so skip it
+        r = r->next;
+    }
+    static inline void RightGap(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+    {
+        // it's in LHS, not RHS
+        // so have to remove it
+        hashBvNode* old = *l;
+        *l              = (*l)->next;
+        // splice it out
+        old->freeNode(lhs->globalData());
+        lhs->numNodes--;
+        result = true;
+    }
+    static inline void BothPresent(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+    {
+        if ((*l)->AndWithChange(r))
+        {
+            r      = r->next;
+            result = true;
+
+            if ((*l)->anySet())
+            {
+                l = &((*l)->next);
+            }
+            else
+            {
+                hashBvNode* old = *l;
+                *l              = (*l)->next;
+                old->freeNode(lhs->globalData());
+                lhs->numNodes--;
+            }
+        }
+        else
+        {
+            r = r->next;
+            l = &((*l)->next);
+        }
+    }
+    static inline void LeftEmpty(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+    {
+        r = r->next;
+    }
+};
+
+class SubtractAction
+{
+public:
+    static inline void PreAction(hashBv* lhs, hashBv* rhs)
+    {
+    }
+    static inline void PostAction(hashBv* lhs, hashBv* rhs)
+    {
+    }
+    static inline bool DefaultResult()
+    {
+        return false;
+    }
+    static inline void LeftGap(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+    {
+        // it's in other, not this
+        // so skip it
+        r = r->next;
+    }
+    static inline void RightGap(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+    {
+        // in lhs, not rhs
+        // so skip lhs
+        l = &((*l)->next);
+    }
+    static inline void BothPresent(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+    {
+        if ((*l)->SubtractWithChange(r))
+        {
+            r      = r->next;
+            result = true;
+
+            if ((*l)->anySet())
+            {
+                l = &((*l)->next);
+            }
+            else
+            {
+                hashBvNode* old = *l;
+                *l              = (*l)->next;
+                old->freeNode(lhs->globalData());
+                lhs->numNodes--;
+            }
+        }
+        else
+        {
+            r = r->next;
+            l = &((*l)->next);
+        }
+    }
+    static inline void LeftEmpty(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+    {
+        r = r->next;
+    }
+};
+
+class XorAction
+{
+public:
+    static inline void PreAction(hashBv* lhs, hashBv* rhs)
+    {
+    }
+    static inline void PostAction(hashBv* lhs, hashBv* rhs)
+    {
+    }
+    static inline bool DefaultResult()
+    {
+        return false;
+    }
+
+    static inline void LeftGap(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+    {
+        // it's in other, not this
+        // so put one in
+        result           = true;
+        hashBvNode* temp = hashBvNode::Create(r->baseIndex, lhs->compiler);
+        lhs->numNodes++;
+        temp->XorWith(r);
+        temp->next = (*l)->next;
+        *l         = temp;
+        l          = &(temp->next);
+
+        r = r->next;
+    }
+
+    static inline void RightGap(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+    {
+        // it's in LHS, not RHS
+        // so LHS remains the same
+        l = &((*l)->next);
+    }
+
+    static inline void BothPresent(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+    {
+        if ((*l)->XorWithChange(r))
+        {
+            result = true;
+        }
+        l = &((*l)->next);
+        r = r->next;
+    }
+
+    static inline void LeftEmpty(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+    {
+        // it's in other, not this
+        // so put one in
+        result           = true;
+        hashBvNode* temp = hashBvNode::Create(r->baseIndex, lhs->compiler);
+        lhs->numNodes++;
+        temp->XorWith(r);
+        temp->next = nullptr;
+        *l         = temp;
+        l          = &(temp->next);
+
+        r = r->next;
+    }
+};
+
+class OrAction
+{
+public:
+    static inline void PreAction(hashBv* lhs, hashBv* rhs)
+    {
+        if (lhs->log2_hashSize + 2 < rhs->log2_hashSize)
+        {
+            lhs->Resize(rhs->numNodes);
+        }
+        if (rhs->numNodes > rhs->hashtable_size() * 4)
+        {
+            rhs->Resize(rhs->numNodes);
+        }
+    }
+    static inline void PostAction(hashBv* lhs, hashBv* rhs)
+    {
+    }
+    static inline bool DefaultResult()
+    {
+        return false;
+    }
+
+    static inline void LeftGap(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+    {
+        // it's in other, not this
+        // so put one in
+        result           = true;
+        hashBvNode* temp = hashBvNode::Create(r->baseIndex, lhs->compiler);
+        lhs->numNodes++;
+        temp->OrWith(r);
+        temp->next = *l;
+        *l         = temp;
+        l          = &(temp->next);
+
+        r = r->next;
+    }
+    static inline void RightGap(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+    {
+        // in lhs, not rhs
+        // so skip lhs
+        l = &((*l)->next);
+    }
+    static inline void BothPresent(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+    {
+        if ((*l)->OrWithChange(r))
+        {
+            result = true;
+        }
+        l = &((*l)->next);
+        r = r->next;
+    }
+    static inline void LeftEmpty(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+    {
+        // other contains something this does not
+        // copy it
+        // LeftGap(lhs, l, r, result, terminate);
+        result           = true;
+        hashBvNode* temp = hashBvNode::Create(r->baseIndex, lhs->compiler);
+        lhs->numNodes++;
+        temp->OrWith(r);
+        temp->next = nullptr;
+        *l         = temp;
+        l          = &(temp->next);
+
+        r = r->next;
+    }
+};
+
+class CompareAction
+{
+public:
+    static inline void PreAction(hashBv* lhs, hashBv* rhs)
+    {
+    }
+    static inline void PostAction(hashBv* lhs, hashBv* rhs)
+    {
+    }
+    static inline bool DefaultResult()
+    {
+        return true;
+    }
+
+    static inline void LeftGap(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+    {
+        terminate = true;
+        result    = false;
+    }
+    static inline void RightGap(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+    {
+        // in lhs, not rhs
+        // so skip lhs
+        terminate = true;
+        result    = false;
+    }
+    static inline void BothPresent(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+    {
+        if (!(*l)->sameAs(r))
+        {
+            terminate = true;
+            result    = false;
+        }
+        l = &((*l)->next);
+        r = r->next;
+    }
+    static inline void LeftEmpty(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+    {
+        terminate = true;
+        result    = false;
+    }
+};
+
+class IntersectsAction
+{
+public:
+    static inline void PreAction(hashBv* lhs, hashBv* rhs)
+    {
+    }
+    static inline void PostAction(hashBv* lhs, hashBv* rhs)
+    {
+    }
+    static inline bool DefaultResult()
+    {
+        return false;
+    }
+
+    static inline void LeftGap(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+    {
+        // in rhs, not lhs
+        // so skip rhs
+        r = r->next;
+    }
+    static inline void RightGap(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+    {
+        // in lhs, not rhs
+        // so skip lhs
+        l = &((*l)->next);
+    }
+    static inline void BothPresent(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+    {
+        if ((*l)->Intersects(r))
+        {
+            terminate = true;
+            result    = true;
+        }
+    }
+    static inline void LeftEmpty(hashBv* lhs, hashBvNode**& l, hashBvNode*& r, bool& result, bool& terminate)
+    {
+        r = r->next;
+    }
+};
+
+template <typename Action>
+bool hashBv::MultiTraverseLHSBigger(hashBv* other)
+{
+    int hts = this->hashtable_size();
+    int ots = other->hashtable_size();
+
+    bool result    = Action::DefaultResult();
+    bool terminate = false;
+
+    // this is larger
+    hashBvNode*** cursors;
+    int           shiftFactor     = this->log2_hashSize - other->log2_hashSize;
+    int           expansionFactor = hts / ots;
+    cursors                       = (hashBvNode***)alloca(expansionFactor * sizeof(void*));
+
+    for (int h = 0; h < other->hashtable_size(); h++)
+    {
+        // set up cursors for the expansion of nodes
+        for (int i = 0; i < expansionFactor; i++)
+        {
+            // ex: for [1024] &= [8]
+            // for rhs in bin 0
+            // cursors point to lhs: 0, 8, 16, 24, ...
+            cursors[i] = &nodeArr[ots * i + h];
+        }
+
+        hashBvNode* o = other->nodeArr[h];
+        while (o)
+        {
+            hashBvNode* next = o->next;
+            // figure out what dst list this goes to
+            int          hash     = getHashForIndex(o->baseIndex, hts);
+            int          dstIndex = (hash - h) >> other->log2_hashSize;
+            hashBvNode** cursor   = cursors[dstIndex];
+            hashBvNode*  c        = *cursor;
+
+            // figure out where o fits in the cursor
+
+            if (!c)
+            {
+                Action::LeftEmpty(this, cursors[dstIndex], o, result, terminate);
+                if (terminate)
+                {
+                    return result;
+                }
+            }
+            else if (c->baseIndex == o->baseIndex)
+            {
+                Action::BothPresent(this, cursors[dstIndex], o, result, terminate);
+                if (terminate)
+                {
+                    return result;
+                }
+            }
+            else if (c->baseIndex > o->baseIndex)
+            {
+                Action::LeftGap(this, cursors[dstIndex], o, result, terminate);
+                if (terminate)
+                {
+                    return result;
+                }
+            }
+            else if (c->baseIndex < o->baseIndex)
+            {
+                Action::RightGap(this, cursors[dstIndex], o, result, terminate);
+                if (terminate)
+                {
+                    return result;
+                }
+            }
+        }
+        for (int i = 0; i < expansionFactor; i++)
+        {
+            while (*(cursors[i]))
+            {
+                Action::RightGap(this, cursors[i], o, result, terminate);
+                if (terminate)
+                {
+                    return result;
+                }
+            }
+        }
+    }
+    return result;
+}
+
+template <typename Action>
+bool hashBv::MultiTraverseRHSBigger(hashBv* other)
+{
+    int hts = this->hashtable_size();
+    int ots = other->hashtable_size();
+
+    bool result    = Action::DefaultResult();
+    bool terminate = false;
+
+    for (int hashNum = 0; hashNum < ots; hashNum++)
+    {
+        int destination = getHashForIndex(BITS_PER_NODE * hashNum, this->hashtable_size());
+        assert(hashNum == getHashForIndex(BITS_PER_NODE * hashNum, other->hashtable_size()));
+
+        hashBvNode** pa = &this->nodeArr[destination];
+        hashBvNode** pb = &other->nodeArr[hashNum];
+        hashBvNode*  b  = *pb;
+
+        while (*pa && b)
+        {
+            hashBvNode* a = *pa;
+            if (a->baseIndex < b->baseIndex)
+            {
+                // in a but not in b
+                // but maybe it's someplace else in b
+                if (getHashForIndex(a->baseIndex, ots) == hashNum)
+                {
+                    // this contains something other does not
+                    // need to erase it
+                    Action::RightGap(this, pa, b, result, terminate);
+                    if (terminate)
+                    {
+                        return result;
+                    }
+                }
+                else
+                {
+                    // other might contain this, we don't know yet
+                    pa = &a->next;
+                }
+            }
+            else if (a->baseIndex == b->baseIndex)
+            {
+                Action::BothPresent(this, pa, b, result, terminate);
+                if (terminate)
+                {
+                    return result;
+                }
+            }
+            else if (a->baseIndex > b->baseIndex)
+            {
+                // other contains something this does not
+                Action::LeftGap(this, pa, b, result, terminate);
+                if (terminate)
+                {
+                    return result;
+                }
+            }
+        }
+        while (*pa)
+        {
+            // if it's in the dest but not in src
+            // then make sure it's expected to be in this list
+            if (getHashForIndex((*pa)->baseIndex, ots) == hashNum)
+            {
+                Action::RightGap(this, pa, b, result, terminate);
+                if (terminate)
+                {
+                    return result;
+                }
+            }
+            else
+            {
+                pa = &((*pa)->next);
+            }
+        }
+        while (b)
+        {
+            Action::LeftEmpty(this, pa, b, result, terminate);
+            if (terminate)
+            {
+                return result;
+            }
+        }
+    }
+    assert(this->numNodes == this->getNodeCount());
+    return result;
+}
+
+// LHSBigger and RHSBigger algorithms both work for equal
+// this is a specialized version of RHSBigger which is simpler (and faster)
+// because equal sizes are the 99% case
+template <typename Action>
+bool hashBv::MultiTraverseEqual(hashBv* other)
+{
+    int hts = this->hashtable_size();
+    assert(other->hashtable_size() == hts);
+
+    bool result    = Action::DefaultResult();
+    bool terminate = false;
+
+    for (int hashNum = 0; hashNum < hts; hashNum++)
+    {
+        int destination = getHashForIndex(BITS_PER_NODE * hashNum, this->hashtable_size());
+
+        hashBvNode** pa = &this->nodeArr[hashNum];
+        hashBvNode** pb = &other->nodeArr[hashNum];
+        hashBvNode*  b  = *pb;
+
+        while (*pa && b)
+        {
+            hashBvNode* a = *pa;
+            if (a->baseIndex < b->baseIndex)
+            {
+                // in a but not in b
+                Action::RightGap(this, pa, b, result, terminate);
+                if (terminate)
+                {
+                    return result;
+                }
+            }
+            else if (a->baseIndex == b->baseIndex)
+            {
+                Action::BothPresent(this, pa, b, result, terminate);
+                if (terminate)
+                {
+                    return result;
+                }
+            }
+            else if (a->baseIndex > b->baseIndex)
+            {
+                // other contains something this does not
+                Action::LeftGap(this, pa, b, result, terminate);
+                if (terminate)
+                {
+                    return result;
+                }
+            }
+        }
+        while (*pa)
+        {
+            // if it's in the dest but not in src
+            Action::RightGap(this, pa, b, result, terminate);
+            if (terminate)
+            {
+                return result;
+            }
+        }
+        while (b)
+        {
+            Action::LeftEmpty(this, pa, b, result, terminate);
+            if (terminate)
+            {
+                return result;
+            }
+        }
+    }
+    assert(this->numNodes == this->getNodeCount());
+    return result;
+}
+
+template <class Action>
+bool hashBv::MultiTraverse(hashBv* other)
+{
+    bool result = false;
+
+    assert(this->numNodes == this->getNodeCount());
+
+    Action::PreAction(this, other);
+
+    int hts = this->log2_hashSize;
+    int ots = other->log2_hashSize;
+
+    if (hts == ots)
+    {
+        return MultiTraverseEqual<Action>(other);
+    }
+    else if (hts > ots)
+    {
+        return MultiTraverseLHSBigger<Action>(other);
+    }
+    else
+    {
+        return MultiTraverseRHSBigger<Action>(other);
+    }
+}
+
+bool hashBv::Intersects(hashBv* other)
+{
+    return MultiTraverse<IntersectsAction>(other);
+}
+
+bool hashBv::AndWithChange(hashBv* other)
+{
+    return MultiTraverse<AndAction>(other);
+}
+
+// same as AND ~x
+bool hashBv::SubtractWithChange(hashBv* other)
+{
+    return MultiTraverse<SubtractAction>(other);
+}
+
+void hashBv::Subtract(hashBv* other)
+{
+    this->SubtractWithChange(other);
+}
+
+void hashBv::Subtract3(hashBv* o1, hashBv* o2)
+{
+    this->copyFrom(o1, compiler);
+    this->Subtract(o2);
+}
+
+void hashBv::UnionMinus(hashBv* src1, hashBv* src2, hashBv* src3)
+{
+    this->Subtract3(src1, src2);
+    this->OrWithChange(src3);
+}
+
+void hashBv::ZeroAll()
+{
+    int hts = this->hashtable_size();
+
+    for (int hashNum = 0; hashNum < hts; hashNum++)
+    {
+        while (nodeArr[hashNum])
+        {
+            hashBvNode* n    = nodeArr[hashNum];
+            nodeArr[hashNum] = n->next;
+            n->freeNode(globalData());
+        }
+    }
+    this->numNodes = 0;
+}
+
+bool hashBv::OrWithChange(hashBv* other)
+{
+    return MultiTraverse<OrAction>(other);
+}
+
+bool hashBv::XorWithChange(hashBv* other)
+{
+    return MultiTraverse<XorAction>(other);
+}
+void hashBv::OrWith(hashBv* other)
+{
+    this->OrWithChange(other);
+}
+
+void hashBv::AndWith(hashBv* other)
+{
+    this->AndWithChange(other);
+}
+
+bool hashBv::CompareWith(hashBv* other)
+{
+    return MultiTraverse<CompareAction>(other);
+}
+
+void hashBv::copyFrom(hashBv* other, Compiler* comp)
+{
+    assert(this != other);
+
+    hashBvNode* freeList = nullptr;
+
+    this->ZeroAll();
+
+    if (this->log2_hashSize != other->log2_hashSize)
+    {
+        this->nodeArr       = this->getNewVector(other->hashtable_size());
+        this->log2_hashSize = other->log2_hashSize;
+        assert(this->hashtable_size() == other->hashtable_size());
+    }
+
+    int hts = this->hashtable_size();
+    // printf("in copyfrom\n");
+    for (int h = 0; h < hts; h++)
+    {
+        // put the current list on the free list
+        freeList         = this->nodeArr[h];
+        this->nodeArr[h] = nullptr;
+
+        hashBvNode** splicePoint = &(this->nodeArr[h]);
+        hashBvNode*  otherNode   = other->nodeArr[h];
+        hashBvNode*  newNode     = nullptr;
+
+        while (otherNode)
+        {
+            // printf("otherNode is True...\n");
+            hashBvNode* next = *splicePoint;
+
+            this->numNodes++;
+
+            if (freeList)
+            {
+                newNode  = freeList;
+                freeList = freeList->next;
+                newNode->Reconstruct(otherNode->baseIndex);
+            }
+            else
+            {
+                newNode = hashBvNode::Create(otherNode->baseIndex, this->compiler);
+            }
+            newNode->copyFrom(otherNode);
+
+            newNode->next = *splicePoint;
+            *splicePoint  = newNode;
+            splicePoint   = &(newNode->next);
+
+            otherNode = otherNode->next;
+        }
+    }
+    while (freeList)
+    {
+        hashBvNode* next = freeList->next;
+        freeList->freeNode(globalData());
+        freeList = next;
+    }
+#if 0
+    for (int h=0; h<hashtable_size(); h++)
+    {
+        printf("%p %p\n", this->nodeArr[h], other->nodeArr[h]);
+    }
+#endif
+}
+
+int nodeSort(const void* x, const void* y)
+{
+    hashBvNode* a = (hashBvNode*)x;
+    hashBvNode* b = (hashBvNode*)y;
+    return (int)(b->baseIndex - a->baseIndex);
+}
+
+void hashBv::InorderTraverse(nodeAction n)
+{
+    int hts = hashtable_size();
+
+    hashBvNode** x = new (compiler, CMK_hashBv) hashBvNode*[hts];
+
+    {
+        // keep an array of the current pointers
+        // into each of the the bitvector lists
+        // in the hashtable
+        for (int i = 0; i < hts; i++)
+        {
+            x[i] = nodeArr[i];
+        }
+
+        while (1)
+        {
+            // pick the lowest node in the hashtable
+
+            indexType lowest       = INT_MAX;
+            int       lowest_index = -1;
+            for (int i = 0; i < hts; i++)
+            {
+                if (x[i] && x[i]->baseIndex < lowest)
+                {
+                    lowest       = x[i]->baseIndex;
+                    lowest_index = i;
+                }
+            }
+            // if there was anything left, use it and update
+            // the list pointers otherwise we are done
+            if (lowest_index != -1)
+            {
+                n(x[lowest_index]);
+                x[lowest_index] = x[lowest_index]->next;
+            }
+            else
+            {
+                break;
+            }
+        }
+    }
+
+    delete[] x;
+}
+
+void hashBv::InorderTraverseTwo(hashBv* other, dualNodeAction a)
+{
+    int          sizeThis, sizeOther;
+    hashBvNode **nodesThis, **nodesOther;
+
+    sizeThis  = this->hashtable_size();
+    sizeOther = other->hashtable_size();
+
+    nodesThis  = new (compiler, CMK_hashBv) hashBvNode*[sizeThis];
+    nodesOther = new (compiler, CMK_hashBv) hashBvNode*[sizeOther];
+
+    // populate the arrays
+    for (int i = 0; i < sizeThis; i++)
+    {
+        nodesThis[i] = this->nodeArr[i];
+    }
+
+    for (int i = 0; i < sizeOther; i++)
+    {
+        nodesOther[i] = other->nodeArr[i];
+    }
+
+    while (1)
+    {
+        indexType lowestThis           = INT_MAX;
+        indexType lowestOther          = INT_MAX;
+        int       lowestHashIndexThis  = -1;
+        int       lowestHashIndexOther = -1;
+
+        // find the lowest remaining node in each BV
+        for (int i = 0; i < sizeThis; i++)
+        {
+            if (nodesThis[i] && nodesThis[i]->baseIndex < lowestThis)
+            {
+                lowestHashIndexThis = i;
+                lowestThis          = nodesThis[i]->baseIndex;
+            }
+        }
+        for (int i = 0; i < sizeOther; i++)
+        {
+            if (nodesOther[i] && nodesOther[i]->baseIndex < lowestOther)
+            {
+                lowestHashIndexOther = i;
+                lowestOther          = nodesOther[i]->baseIndex;
+            }
+        }
+        hashBvNode *nodeThis, *nodeOther;
+        nodeThis  = lowestHashIndexThis == -1 ? nullptr : nodesThis[lowestHashIndexThis];
+        nodeOther = lowestHashIndexOther == -1 ? nullptr : nodesOther[lowestHashIndexOther];
+        // no nodes left in either, so return
+        if ((!nodeThis) && (!nodeOther))
+        {
+            break;
+
+            // there are only nodes left in one bitvector
+        }
+        else if ((!nodeThis) || (!nodeOther))
+        {
+            a(this, other, nodeThis, nodeOther);
+            if (nodeThis)
+            {
+                nodesThis[lowestHashIndexThis] = nodesThis[lowestHashIndexThis]->next;
+            }
+            if (nodeOther)
+            {
+                nodesOther[lowestHashIndexOther] = nodesOther[lowestHashIndexOther]->next;
+            }
+        }
+        // nodes are left in both so determine if the lowest ones
+        // match.  if so process them in a pair.  if not then
+        // process the lower of the two alone
+        else if (nodeThis && nodeOther)
+        {
+            if (nodeThis->baseIndex == nodeOther->baseIndex)
+            {
+                a(this, other, nodeThis, nodeOther);
+                nodesThis[lowestHashIndexThis]   = nodesThis[lowestHashIndexThis]->next;
+                nodesOther[lowestHashIndexOther] = nodesOther[lowestHashIndexOther]->next;
+            }
+            else if (nodeThis->baseIndex < nodeOther->baseIndex)
+            {
+                a(this, other, nodeThis, nullptr);
+                nodesThis[lowestHashIndexThis] = nodesThis[lowestHashIndexThis]->next;
+            }
+            else if (nodeOther->baseIndex < nodeThis->baseIndex)
+            {
+                a(this, other, nullptr, nodeOther);
+                nodesOther[lowestHashIndexOther] = nodesOther[lowestHashIndexOther]->next;
+            }
+        }
+    }
+    delete[] nodesThis;
+    delete[] nodesOther;
+}
+
+// --------------------------------------------------------------------
+// --------------------------------------------------------------------
+
+#ifdef DEBUG
+void SimpleDumpNode(hashBvNode* n)
+{
+    printf("base: %d\n", n->baseIndex);
+}
+
+void DumpNode(hashBvNode* n)
+{
+    n->dump();
+}
+
+void SimpleDumpDualNode(hashBv* a, hashBv* b, hashBvNode* n, hashBvNode* m)
+{
+    printf("nodes: ");
+    if (n)
+    {
+        printf("%d,", n->baseIndex);
+    }
+    else
+    {
+        printf("----,");
+    }
+    if (m)
+    {
+        printf("%d\n", m->baseIndex);
+    }
+    else
+    {
+        printf("----\n");
+    }
+}
+#endif // DEBUG
+
+hashBvIterator::hashBvIterator()
+{
+    this->bv = nullptr;
+}
+
+hashBvIterator::hashBvIterator(hashBv* bv)
+{
+    this->bv              = bv;
+    this->hashtable_index = 0;
+    this->current_element = 0;
+    this->current_base    = 0;
+    this->current_data    = 0;
+
+    if (bv)
+    {
+        this->hashtable_size = bv->hashtable_size();
+        this->currNode       = bv->nodeArr[0];
+
+        if (!this->currNode)
+        {
+            this->nextNode();
+        }
+    }
+}
+
+void hashBvIterator::initFrom(hashBv* bv)
+{
+    this->bv              = bv;
+    this->hashtable_size  = bv->hashtable_size();
+    this->hashtable_index = 0;
+    this->currNode        = bv->nodeArr[0];
+    this->current_element = 0;
+    this->current_base    = 0;
+    this->current_data    = 0;
+
+    if (!this->currNode)
+    {
+        this->nextNode();
+    }
+    if (this->currNode)
+    {
+        this->current_data = this->currNode->elements[0];
+    }
+}
+
+void hashBvIterator::nextNode()
+{
+    // if we have a valid node then just get the next one in the chain
+    if (this->currNode)
+    {
+        this->currNode = this->currNode->next;
+    }
+
+    // else step to the next one in the hash table
+    while (!this->currNode)
+    {
+        hashtable_index++;
+        // no more
+        if (hashtable_index >= hashtable_size)
+        {
+            // printf("nextnode bailed\n");
+            return;
+        }
+
+        this->currNode = bv->nodeArr[hashtable_index];
+    }
+    // first element in the new node
+    this->current_element = 0;
+    this->current_base    = this->currNode->baseIndex;
+    this->current_data    = this->currNode->elements[0];
+    // printf("nextnode returned base %d\n", this->current_base);
+    // printf("hti = %d ", hashtable_index);
+}
+
+indexType hashBvIterator::nextBit()
+{
+
+    // printf("in nextbit for bv:\n");
+    // this->bv->dump();
+
+    if (!this->currNode)
+    {
+        this->nextNode();
+    }
+
+top:
+
+    if (!this->currNode)
+    {
+        return NOMOREBITS;
+    }
+
+more_data:
+    if (!this->current_data)
+    {
+        current_element++;
+        // printf("current element is %d\n", current_element);
+        // reached the end of this node
+        if (current_element == (indexType) this->currNode->numElements())
+        {
+            // printf("going to next node\n");
+            this->nextNode();
+            goto top;
+        }
+        else
+        {
+            assert(current_element < (indexType) this->currNode->numElements());
+            // printf("getting more data\n");
+            current_data = this->currNode->elements[current_element];
+            current_base = this->currNode->baseIndex + current_element * BITS_PER_ELEMENT;
+            goto more_data;
+        }
+    }
+    else
+    {
+        while (current_data)
+        {
+            if (current_data & 1)
+            {
+                current_data >>= 1;
+                current_base++;
+
+                return current_base - 1;
+            }
+            else
+            {
+                current_data >>= 1;
+                current_base++;
+            }
+        }
+        goto more_data;
+    }
+}
+
+indexType HbvNext(hashBv* bv, Compiler* comp)
+{
+    if (bv)
+    {
+        bv->globalData()->hashBvNextIterator.initFrom(bv);
+    }
+    return bv->globalData()->hashBvNextIterator.nextBit();
+}
diff --git a/src/jit/hashbv.h b/src/jit/hashbv.h
new file mode 100644
index 0000000000..cadb182cc6
--- /dev/null
+++ b/src/jit/hashbv.h
@@ -0,0 +1,363 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef HASHBV_H
+#define HASHBV_H
+
+#if defined(_M_AMD64) || defined(_M_X86)
+#include <xmmintrin.h>
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <memory.h>
+#include <windows.h>
+
+//#define TESTING 1
+
+#define LOG2_BITS_PER_ELEMENT 5
+#define LOG2_ELEMENTS_PER_NODE 2
+#define LOG2_BITS_PER_NODE (LOG2_BITS_PER_ELEMENT + LOG2_ELEMENTS_PER_NODE)
+
+#define BITS_PER_ELEMENT (1 << LOG2_BITS_PER_ELEMENT)
+#define ELEMENTS_PER_NODE (1 << LOG2_ELEMENTS_PER_NODE)
+#define BITS_PER_NODE (1 << LOG2_BITS_PER_NODE)
+
+#ifdef _TARGET_AMD64_
+typedef unsigned __int64 elemType;
+typedef unsigned __int64 indexType;
+#else
+typedef unsigned int elemType;
+typedef unsigned int indexType;
+#endif
+
+class hashBvNode;
+class hashBv;
+class hashBvIterator;
+class hashBvGlobalData;
+
+typedef void bitAction(indexType);
+typedef void nodeAction(hashBvNode*);
+typedef void dualNodeAction(hashBv* left, hashBv* right, hashBvNode* a, hashBvNode* b);
+
+#define NOMOREBITS -1
+
+#ifdef DEBUG
+inline void pBit(indexType i)
+{
+    printf("%d ", i);
+}
+#endif // DEBUG
+
+// ------------------------------------------------------------
+//  this is essentially a hashtable of small fixed bitvectors.
+//  for any index, bits select position as follows:
+//   32                                                      0
+// ------------------------------------------------------------
+//  | ... ... ... | hash | element in node | index in element |
+// ------------------------------------------------------------
+//
+//
+// hashBv
+// | // hashtable
+// v
+// []->node->node->node
+// []->node
+// []
+// []->node->node
+//
+//
+
+#if TESTING
+inline int log2(int number)
+{
+    int result = 0;
+    number >>= 1;
+    while (number)
+    {
+        result++;
+        number >>= 1;
+    }
+    return result;
+}
+#endif
+
+// return greatest power of 2 that is less than or equal
+inline int nearest_pow2(unsigned number)
+{
+    int result = 0;
+
+    if (number > 0xffff)
+    {
+        number >>= 16;
+        result += 16;
+    }
+    if (number > 0xff)
+    {
+        number >>= 8;
+        result += 8;
+    }
+    if (number > 0xf)
+    {
+        number >>= 4;
+        result += 4;
+    }
+    if (number > 0x3)
+    {
+        number >>= 2;
+        result += 2;
+    }
+    if (number > 0x1)
+    {
+        number >>= 1;
+        result += 1;
+    }
+    return 1 << result;
+}
+
+class hashBvNode
+{
+public:
+    hashBvNode* next;
+    indexType   baseIndex;
+    elemType    elements[ELEMENTS_PER_NODE];
+
+public:
+    hashBvNode(indexType base);
+    hashBvNode()
+    {
+    }
+    static hashBvNode* Create(indexType base, Compiler* comp);
+    void Reconstruct(indexType base);
+    int numElements()
+    {
+        return ELEMENTS_PER_NODE;
+    }
+    void setBit(indexType base);
+    void setLowest(indexType numToSet);
+    bool getBit(indexType base);
+    void clrBit(indexType base);
+    bool anySet();
+    bool belongsIn(indexType index);
+    int  countBits();
+    bool anyBits();
+    void foreachBit(bitAction x);
+    void freeNode(hashBvGlobalData* glob);
+    bool sameAs(hashBvNode* other);
+    void copyFrom(hashBvNode* other);
+
+    void AndWith(hashBvNode* other);
+    void OrWith(hashBvNode* other);
+    void XorWith(hashBvNode* other);
+    void Subtract(hashBvNode* other);
+
+    elemType AndWithChange(hashBvNode* other);
+    elemType OrWithChange(hashBvNode* other);
+    elemType XorWithChange(hashBvNode* other);
+    elemType SubtractWithChange(hashBvNode* other);
+
+    bool Intersects(hashBvNode* other);
+
+#ifdef DEBUG
+    void dump();
+#endif // DEBUG
+};
+
+class hashBv
+{
+public:
+    // --------------------------------------
+    // data
+    // --------------------------------------
+    hashBvNode** nodeArr;
+    hashBvNode*  initialVector[1];
+
+    union {
+        Compiler* compiler;
+        // for freelist
+        hashBv* next;
+    };
+
+    unsigned short log2_hashSize;
+    // used for heuristic resizing... could be overflowed in rare circumstances
+    // but should not affect correctness
+    unsigned short numNodes;
+
+public:
+    hashBv(Compiler* comp);
+    hashBv(hashBv* other);
+    // hashBv() {}
+    static hashBv* Create(Compiler* comp);
+    static void Init(Compiler* comp);
+    static hashBv* CreateFrom(hashBv* other, Compiler* comp);
+    void hbvFree();
+#ifdef DEBUG
+    void dump();
+    void dumpFancy();
+#endif // DEBUG
+    __forceinline int hashtable_size()
+    {
+        return 1 << this->log2_hashSize;
+    }
+
+    hashBvGlobalData* globalData();
+
+    static hashBvNode*& nodeFreeList(hashBvGlobalData* globalData);
+    static hashBv*& hbvFreeList(hashBvGlobalData* data);
+
+    hashBvNode** getInsertionPointForIndex(indexType index);
+
+private:
+    hashBvNode* getNodeForIndexHelper(indexType index, bool canAdd);
+    int getHashForIndex(indexType index, int table_size);
+    int getRehashForIndex(indexType thisIndex, int thisTableSize, int newTableSize);
+
+    // maintain free lists for vectors
+    hashBvNode** getNewVector(int vectorLength);
+    void freeVector(hashBvNode* vect, int vectorLength);
+    int getNodeCount();
+
+    hashBvNode* getFreeList();
+
+public:
+    inline hashBvNode* getOrAddNodeForIndex(indexType index)
+    {
+        hashBvNode* temp = getNodeForIndexHelper(index, true);
+        return temp;
+    }
+    hashBvNode* getNodeForIndex(indexType index);
+    void removeNodeAtBase(indexType index);
+
+public:
+    void setBit(indexType index);
+    void setAll(indexType numToSet);
+    bool testBit(indexType index);
+    void clearBit(indexType index);
+    int  countBits();
+    bool anySet();
+    void copyFrom(hashBv* other, Compiler* comp);
+    void ZeroAll();
+    bool CompareWith(hashBv* other);
+
+    void AndWith(hashBv* other);
+    void OrWith(hashBv* other);
+    void XorWith(hashBv* other);
+    void Subtract(hashBv* other);
+    void Subtract3(hashBv* other, hashBv* other2);
+
+    void UnionMinus(hashBv* a, hashBv* b, hashBv* c);
+
+    bool AndWithChange(hashBv* other);
+    bool OrWithChange(hashBv* other);
+    bool OrWithChangeRight(hashBv* other);
+    bool OrWithChangeLeft(hashBv* other);
+    bool XorWithChange(hashBv* other);
+    bool SubtractWithChange(hashBv* other);
+
+    bool Intersects(hashBv* other);
+
+    template <class Action>
+    bool MultiTraverseLHSBigger(hashBv* other);
+    template <class Action>
+    bool MultiTraverseRHSBigger(hashBv* other);
+    template <class Action>
+    bool MultiTraverseEqual(hashBv* other);
+    template <class Action>
+    bool MultiTraverse(hashBv* other);
+
+    void InorderTraverse(nodeAction a);
+    void InorderTraverseTwo(hashBv* other, dualNodeAction a);
+
+    void Resize(int newSize);
+    void Resize();
+    void MergeLists(hashBvNode** a, hashBvNode** b);
+
+    bool TooSmall();
+    bool TooBig();
+    bool IsValid();
+};
+
+// --------------------------------------------------------------------
+// --------------------------------------------------------------------
+
+class hbvFreeListNode
+{
+public:
+    hbvFreeListNode* next;
+    int              size;
+};
+
+// --------------------------------------------------------------------
+// --------------------------------------------------------------------
+
+class hashBvIterator
+{
+public:
+    unsigned    hashtable_size;
+    unsigned    hashtable_index;
+    hashBv*     bv;
+    hashBvNode* currNode;
+    indexType   current_element;
+    // base index of current node
+    indexType current_base;
+    // working data of current element
+    elemType current_data;
+
+    hashBvIterator(hashBv* bv);
+    void initFrom(hashBv* bv);
+    hashBvIterator();
+    indexType nextBit();
+
+private:
+    void nextNode();
+};
+
+class hashBvGlobalData
+{
+    friend class hashBv;
+    friend class hashBvNode;
+
+    hashBvNode*      hbvNodeFreeList;
+    hashBv*          hbvFreeList;
+    unsigned short   hbvHashSizeLog2;
+    hbvFreeListNode* hbvFreeVectorList;
+
+public:
+    hashBvIterator hashBvNextIterator;
+};
+
+indexType HbvNext(hashBv* bv, Compiler* comp);
+
+// clang-format off
+#define FOREACH_HBV_BIT_SET(index, bv) \
+    { \
+        for (int hashNum=0; hashNum<(bv)->hashtable_size(); hashNum++) {\
+            hashBvNode *node = (bv)->nodeArr[hashNum];\
+            while (node) { \
+                indexType base = node->baseIndex; \
+                for (int el=0; el<node->numElements(); el++) {\
+                    elemType _i = 0; \
+                    elemType _e = node->elements[el]; \
+                    while (_e) { \
+                    int _result = BitScanForwardPtr((DWORD *) &_i, _e); \
+                        assert(_result); \
+                        (index) = base + (el*BITS_PER_ELEMENT) + _i; \
+                        _e ^= (elemType(1) << _i);
+
+#define NEXT_HBV_BIT_SET \
+                    }\
+                }\
+                node = node->next; \
+            }\
+        }\
+    } \
+//clang-format on
+
+#ifdef DEBUG
+void SimpleDumpNode(hashBvNode *n);
+void DumpNode(hashBvNode *n);
+void SimpleDumpDualNode(hashBv *a, hashBv *b, hashBvNode *n, hashBvNode *m);
+#endif // DEBUG
+
+#endif
diff --git a/src/jit/host.h b/src/jit/host.h
new file mode 100644
index 0000000000..87e13d4180
--- /dev/null
+++ b/src/jit/host.h
@@ -0,0 +1,68 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+
+#ifdef DEBUG
+#ifndef printf
+#define printf logf
+#endif
+
+#ifndef fprintf
+#define fprintf flogf
+#endif
+
+class Compiler;
+class LogEnv
+{
+public:
+    LogEnv(ICorJitInfo* aCompHnd);
+    void setCompiler(Compiler* val)
+    {
+        const_cast<Compiler*&>(compiler) = val;
+    }
+
+    ICorJitInfo* const compHnd;
+    Compiler* const    compiler;
+};
+
+BOOL vlogf(unsigned level, const char* fmt, va_list args);
+int vflogf(FILE* file, const char* fmt, va_list args);
+
+int logf(const char* fmt, ...);
+int flogf(FILE* file, const char* fmt, ...);
+void gcDump_logf(const char* fmt, ...);
+
+void logf(unsigned level, const char* fmt, ...);
+
+extern "C" void __cdecl assertAbort(const char* why, const char* file, unsigned line);
+
+#undef assert
+#define assert(p) (void)((p) || (assertAbort(#p, __FILE__, __LINE__), 0))
+
+#else // DEBUG
+
+#undef assert
+#define assert(p) (void)0
+#endif // DEBUG
+
+/*****************************************************************************/
+#ifndef _HOST_H_
+#define _HOST_H_
+/*****************************************************************************/
+
+const size_t OS_page_size = (4 * 1024);
+
+extern FILE* jitstdout;
+
+inline FILE* procstdout()
+{
+    return stdout;
+}
+#undef stdout
+#define stdout use_jitstdout
+
+/*****************************************************************************/
+#endif
+/*****************************************************************************/
diff --git a/src/jit/hostallocator.cpp b/src/jit/hostallocator.cpp
new file mode 100644
index 0000000000..b737424ee8
--- /dev/null
+++ b/src/jit/hostallocator.cpp
@@ -0,0 +1,40 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+#include "hostallocator.h"
+
+HostAllocator HostAllocator::s_hostAllocator;
+
+void* HostAllocator::Alloc(size_t size)
+{
+    assert(g_jitHost != nullptr);
+    return g_jitHost->allocateMemory(size, false);
+}
+
+void* HostAllocator::ArrayAlloc(size_t elemSize, size_t numElems)
+{
+    assert(g_jitHost != nullptr);
+
+    ClrSafeInt<size_t> safeElemSize(elemSize);
+    ClrSafeInt<size_t> safeNumElems(numElems);
+    ClrSafeInt<size_t> size = safeElemSize * safeNumElems;
+    if (size.IsOverflow())
+    {
+        return nullptr;
+    }
+
+    return g_jitHost->allocateMemory(size.Value(), false);
+}
+
+void HostAllocator::Free(void* p)
+{
+    assert(g_jitHost != nullptr);
+    g_jitHost->freeMemory(p, false);
+}
+
+HostAllocator* HostAllocator::getHostAllocator()
+{
+    return &s_hostAllocator;
+}
diff --git a/src/jit/hostallocator.h b/src/jit/hostallocator.h
new file mode 100644
index 0000000000..c48ed45b8c
--- /dev/null
+++ b/src/jit/hostallocator.h
@@ -0,0 +1,22 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+class HostAllocator : public IAllocator
+{
+private:
+    static HostAllocator s_hostAllocator;
+
+    HostAllocator()
+    {
+    }
+
+public:
+    void* Alloc(size_t size) override;
+
+    void* ArrayAlloc(size_t elemSize, size_t numElems) override;
+
+    void Free(void* p) override;
+
+    static HostAllocator* getHostAllocator();
+};
diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp
new file mode 100644
index 0000000000..d04ded78fa
--- /dev/null
+++ b/src/jit/importer.cpp
@@ -0,0 +1,17997 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                           Importer                                        XX
+XX                                                                           XX
+XX   Imports the given method and converts it to semantic trees              XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "corexcep.h"
+
+#define Verify(cond, msg)                                                                                              \
+    do                                                                                                                 \
+    {                                                                                                                  \
+        if (!(cond))                                                                                                   \
+        {                                                                                                              \
+            verRaiseVerifyExceptionIfNeeded(INDEBUG(msg) DEBUGARG(__FILE__) DEBUGARG(__LINE__));                       \
+        }                                                                                                              \
+    } while (0)
+
+#define VerifyOrReturn(cond, msg)                                                                                      \
+    do                                                                                                                 \
+    {                                                                                                                  \
+        if (!(cond))                                                                                                   \
+        {                                                                                                              \
+            verRaiseVerifyExceptionIfNeeded(INDEBUG(msg) DEBUGARG(__FILE__) DEBUGARG(__LINE__));                       \
+            return;                                                                                                    \
+        }                                                                                                              \
+    } while (0)
+
+#define VerifyOrReturnSpeculative(cond, msg, speculative)                                                              \
+    do                                                                                                                 \
+    {                                                                                                                  \
+        if (speculative)                                                                                               \
+        {                                                                                                              \
+            if (!(cond))                                                                                               \
+            {                                                                                                          \
+                return false;                                                                                          \
+            }                                                                                                          \
+        }                                                                                                              \
+        else                                                                                                           \
+        {                                                                                                              \
+            if (!(cond))                                                                                               \
+            {                                                                                                          \
+                verRaiseVerifyExceptionIfNeeded(INDEBUG(msg) DEBUGARG(__FILE__) DEBUGARG(__LINE__));                   \
+                return false;                                                                                          \
+            }                                                                                                          \
+        }                                                                                                              \
+    } while (0)
+
+/*****************************************************************************/
+
+void Compiler::impInit()
+{
+#ifdef DEBUG
+    impTreeList = impTreeLast = nullptr;
+#endif
+
+#if defined(DEBUG)
+    impInlinedCodeSize = 0;
+#endif
+
+    seenConditionalJump = false;
+}
+
+/*****************************************************************************
+ *
+ *  Pushes the given tree on the stack.
+ */
+
+void Compiler::impPushOnStack(GenTreePtr tree, typeInfo ti)
+{
+    /* Check for overflow. If inlining, we may be using a bigger stack */
+
+    if ((verCurrentState.esStackDepth >= info.compMaxStack) &&
+        (verCurrentState.esStackDepth >= impStkSize || ((compCurBB->bbFlags & BBF_IMPORTED) == 0)))
+    {
+        BADCODE("stack overflow");
+    }
+
+#ifdef DEBUG
+    // If we are pushing a struct, make certain we know the precise type!
+    if (tree->TypeGet() == TYP_STRUCT)
+    {
+        assert(ti.IsType(TI_STRUCT));
+        CORINFO_CLASS_HANDLE clsHnd = ti.GetClassHandle();
+        assert(clsHnd != NO_CLASS_HANDLE);
+    }
+
+    if (tiVerificationNeeded && !ti.IsDead())
+    {
+        assert(typeInfo::AreEquivalent(NormaliseForStack(ti), ti)); // types are normalized
+
+        // The ti type is consistent with the tree type.
+        //
+
+        // On 64-bit systems, nodes whose "proper" type is "native int" get labeled TYP_LONG.
+        // In the verification type system, we always transform "native int" to "TI_INT".
+        // Ideally, we would keep track of which nodes labeled "TYP_LONG" are really "native int", but
+        // attempts to do that have proved too difficult.  Instead, we'll assume that in checks like this,
+        // when there's a mismatch, it's because of this reason -- the typeInfo::AreEquivalentModuloNativeInt
+        // method used in the last disjunct allows exactly this mismatch.
+        assert(ti.IsDead() || ti.IsByRef() && (tree->TypeGet() == TYP_I_IMPL || tree->TypeGet() == TYP_BYREF) ||
+               ti.IsUnboxedGenericTypeVar() && tree->TypeGet() == TYP_REF ||
+               ti.IsObjRef() && tree->TypeGet() == TYP_REF || ti.IsMethod() && tree->TypeGet() == TYP_I_IMPL ||
+               ti.IsType(TI_STRUCT) && tree->TypeGet() != TYP_REF ||
+               typeInfo::AreEquivalentModuloNativeInt(NormaliseForStack(ti),
+                                                      NormaliseForStack(typeInfo(tree->TypeGet()))));
+
+        // If it is a struct type, make certain we normalized the primitive types
+        assert(!ti.IsType(TI_STRUCT) ||
+               info.compCompHnd->getTypeForPrimitiveValueClass(ti.GetClassHandle()) == CORINFO_TYPE_UNDEF);
+    }
+
+#if VERBOSE_VERIFY
+    if (VERBOSE && tiVerificationNeeded)
+    {
+        printf("\n");
+        printf(TI_DUMP_PADDING);
+        printf("About to push to stack: ");
+        ti.Dump();
+    }
+#endif // VERBOSE_VERIFY
+
+#endif // DEBUG
+
+    verCurrentState.esStack[verCurrentState.esStackDepth].seTypeInfo = ti;
+    verCurrentState.esStack[verCurrentState.esStackDepth++].val      = tree;
+
+    if ((tree->gtType == TYP_LONG) && (compLongUsed == false))
+    {
+        compLongUsed = true;
+    }
+    else if (((tree->gtType == TYP_FLOAT) || (tree->gtType == TYP_DOUBLE)) && (compFloatingPointUsed == false))
+    {
+        compFloatingPointUsed = true;
+    }
+}
+
+/******************************************************************************/
+// used in the inliner, where we can assume typesafe code. please don't use in the importer!!
+inline void Compiler::impPushOnStackNoType(GenTreePtr tree)
+{
+    assert(verCurrentState.esStackDepth < impStkSize);
+    INDEBUG(verCurrentState.esStack[verCurrentState.esStackDepth].seTypeInfo = typeInfo());
+    verCurrentState.esStack[verCurrentState.esStackDepth++].val              = tree;
+
+    if ((tree->gtType == TYP_LONG) && (compLongUsed == false))
+    {
+        compLongUsed = true;
+    }
+    else if (((tree->gtType == TYP_FLOAT) || (tree->gtType == TYP_DOUBLE)) && (compFloatingPointUsed == false))
+    {
+        compFloatingPointUsed = true;
+    }
+}
+
+inline void Compiler::impPushNullObjRefOnStack()
+{
+    impPushOnStack(gtNewIconNode(0, TYP_REF), typeInfo(TI_NULL));
+}
+
+// This method gets called when we run into unverifiable code
+// (and we are verifying the method)
+
+inline void Compiler::verRaiseVerifyExceptionIfNeeded(INDEBUG(const char* msg) DEBUGARG(const char* file)
+                                                          DEBUGARG(unsigned line))
+{
+    // Remember that the code is not verifiable
+    // Note that the method may yet pass canSkipMethodVerification(),
+    // and so the presence of unverifiable code may not be an issue.
+    tiIsVerifiableCode = FALSE;
+
+#ifdef DEBUG
+    const char* tail = strrchr(file, '\\');
+    if (tail)
+    {
+        file = tail + 1;
+    }
+
+    if (JitConfig.JitBreakOnUnsafeCode())
+    {
+        assert(!"Unsafe code detected");
+    }
+#endif
+
+    JITLOG((LL_INFO10000, "Detected unsafe code: %s:%d : %s, while compiling %s opcode %s, IL offset %x\n", file, line,
+            msg, info.compFullName, impCurOpcName, impCurOpcOffs));
+
+    if (verNeedsVerification() || compIsForImportOnly())
+    {
+        JITLOG((LL_ERROR, "Verification failure:  %s:%d : %s, while compiling %s opcode %s, IL offset %x\n", file, line,
+                msg, info.compFullName, impCurOpcName, impCurOpcOffs));
+        verRaiseVerifyException(INDEBUG(msg) DEBUGARG(file) DEBUGARG(line));
+    }
+}
+
+inline void DECLSPEC_NORETURN Compiler::verRaiseVerifyException(INDEBUG(const char* msg) DEBUGARG(const char* file)
+                                                                    DEBUGARG(unsigned line))
+{
+    JITLOG((LL_ERROR, "Verification failure:  %s:%d : %s, while compiling %s opcode %s, IL offset %x\n", file, line,
+            msg, info.compFullName, impCurOpcName, impCurOpcOffs));
+
+#ifdef DEBUG
+    //    BreakIfDebuggerPresent();
+    if (getBreakOnBadCode())
+    {
+        assert(!"Typechecking error");
+    }
+#endif
+
+    RaiseException(SEH_VERIFICATION_EXCEPTION, EXCEPTION_NONCONTINUABLE, 0, nullptr);
+    UNREACHABLE();
+}
+
+// helper function that will tell us if the IL instruction at the addr passed
+// by param consumes an address at the top of the stack. We use it to save
+// us lvAddrTaken
+bool Compiler::impILConsumesAddr(const BYTE* codeAddr, CORINFO_METHOD_HANDLE fncHandle, CORINFO_MODULE_HANDLE scpHandle)
+{
+    assert(!compIsForInlining());
+
+    OPCODE opcode;
+
+    opcode = (OPCODE)getU1LittleEndian(codeAddr);
+
+    switch (opcode)
+    {
+        // case CEE_LDFLDA: We're taking this one out as if you have a sequence
+        // like
+        //
+        //          ldloca.0
+        //          ldflda whatever
+        //
+        // of a primitivelike struct, you end up after morphing with addr of a local
+        // that's not marked as addrtaken, which is wrong. Also ldflda is usually used
+        // for structs that contain other structs, which isnt a case we handle very
+        // well now for other reasons.
+
+        case CEE_LDFLD:
+        {
+            // We won't collapse small fields. This is probably not the right place to have this
+            // check, but we're only using the function for this purpose, and is easy to factor
+            // out if we need to do so.
+
+            CORINFO_RESOLVED_TOKEN resolvedToken;
+            impResolveToken(codeAddr + sizeof(__int8), &resolvedToken, CORINFO_TOKENKIND_Field);
+
+            CORINFO_CLASS_HANDLE clsHnd;
+            var_types lclTyp = JITtype2varType(info.compCompHnd->getFieldType(resolvedToken.hField, &clsHnd));
+
+            // Preserve 'small' int types
+            if (lclTyp > TYP_INT)
+            {
+                lclTyp = genActualType(lclTyp);
+            }
+
+            if (varTypeIsSmall(lclTyp))
+            {
+                return false;
+            }
+
+            return true;
+        }
+        default:
+            break;
+    }
+
+    return false;
+}
+
+void Compiler::impResolveToken(const BYTE* addr, CORINFO_RESOLVED_TOKEN* pResolvedToken, CorInfoTokenKind kind)
+{
+    pResolvedToken->tokenContext = impTokenLookupContextHandle;
+    pResolvedToken->tokenScope   = info.compScopeHnd;
+    pResolvedToken->token        = getU4LittleEndian(addr);
+    pResolvedToken->tokenType    = kind;
+
+    if (!tiVerificationNeeded)
+    {
+        info.compCompHnd->resolveToken(pResolvedToken);
+    }
+    else
+    {
+        Verify(eeTryResolveToken(pResolvedToken), "Token resolution failed");
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Pop one tree from the stack.
+ */
+
+StackEntry Compiler::impPopStack()
+{
+    if (verCurrentState.esStackDepth == 0)
+    {
+        BADCODE("stack underflow");
+    }
+
+#ifdef DEBUG
+#if VERBOSE_VERIFY
+    if (VERBOSE && tiVerificationNeeded)
+    {
+        JITDUMP("\n");
+        printf(TI_DUMP_PADDING);
+        printf("About to pop from the stack: ");
+        const typeInfo& ti = verCurrentState.esStack[verCurrentState.esStackDepth - 1].seTypeInfo;
+        ti.Dump();
+    }
+#endif // VERBOSE_VERIFY
+#endif // DEBUG
+
+    return verCurrentState.esStack[--verCurrentState.esStackDepth];
+}
+
+StackEntry Compiler::impPopStack(CORINFO_CLASS_HANDLE& structType)
+{
+    StackEntry ret = impPopStack();
+    structType     = verCurrentState.esStack[verCurrentState.esStackDepth].seTypeInfo.GetClassHandle();
+    return (ret);
+}
+
+GenTreePtr Compiler::impPopStack(typeInfo& ti)
+{
+    StackEntry ret = impPopStack();
+    ti             = ret.seTypeInfo;
+    return (ret.val);
+}
+
+/*****************************************************************************
+ *
+ *  Peep at n'th (0-based) tree on the top of the stack.
+ */
+
+StackEntry& Compiler::impStackTop(unsigned n)
+{
+    if (verCurrentState.esStackDepth <= n)
+    {
+        BADCODE("stack underflow");
+    }
+
+    return verCurrentState.esStack[verCurrentState.esStackDepth - n - 1];
+}
+/*****************************************************************************
+ *  Some of the trees are spilled specially. While unspilling them, or
+ *  making a copy, these need to be handled specially. The function
+ *  enumerates the operators possible after spilling.
+ */
+
+#ifdef DEBUG // only used in asserts
+static bool impValidSpilledStackEntry(GenTreePtr tree)
+{
+    if (tree->gtOper == GT_LCL_VAR)
+    {
+        return true;
+    }
+
+    if (tree->OperIsConst())
+    {
+        return true;
+    }
+
+    return false;
+}
+#endif
+
+/*****************************************************************************
+ *
+ *  The following logic is used to save/restore stack contents.
+ *  If 'copy' is true, then we make a copy of the trees on the stack. These
+ *  have to all be cloneable/spilled values.
+ */
+
+void Compiler::impSaveStackState(SavedStack* savePtr, bool copy)
+{
+    savePtr->ssDepth = verCurrentState.esStackDepth;
+
+    if (verCurrentState.esStackDepth)
+    {
+        savePtr->ssTrees = new (this, CMK_ImpStack) StackEntry[verCurrentState.esStackDepth];
+        size_t saveSize  = verCurrentState.esStackDepth * sizeof(*savePtr->ssTrees);
+
+        if (copy)
+        {
+            StackEntry* table = savePtr->ssTrees;
+
+            /* Make a fresh copy of all the stack entries */
+
+            for (unsigned level = 0; level < verCurrentState.esStackDepth; level++, table++)
+            {
+                table->seTypeInfo = verCurrentState.esStack[level].seTypeInfo;
+                GenTreePtr tree   = verCurrentState.esStack[level].val;
+
+                assert(impValidSpilledStackEntry(tree));
+
+                switch (tree->gtOper)
+                {
+                    case GT_CNS_INT:
+                    case GT_CNS_LNG:
+                    case GT_CNS_DBL:
+                    case GT_CNS_STR:
+                    case GT_LCL_VAR:
+                        table->val = gtCloneExpr(tree);
+                        break;
+
+                    default:
+                        assert(!"Bad oper - Not covered by impValidSpilledStackEntry()");
+                        break;
+                }
+            }
+        }
+        else
+        {
+            memcpy(savePtr->ssTrees, verCurrentState.esStack, saveSize);
+        }
+    }
+}
+
+void Compiler::impRestoreStackState(SavedStack* savePtr)
+{
+    verCurrentState.esStackDepth = savePtr->ssDepth;
+
+    if (verCurrentState.esStackDepth)
+    {
+        memcpy(verCurrentState.esStack, savePtr->ssTrees,
+               verCurrentState.esStackDepth * sizeof(*verCurrentState.esStack));
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Get the tree list started for a new basic block.
+ */
+inline void Compiler::impBeginTreeList()
+{
+    assert(impTreeList == nullptr && impTreeLast == nullptr);
+
+    impTreeList = impTreeLast = new (this, GT_BEG_STMTS) GenTree(GT_BEG_STMTS, TYP_VOID);
+}
+
+/*****************************************************************************
+ *
+ *  Store the given start and end stmt in the given basic block. This is
+ *  mostly called by impEndTreeList(BasicBlock *block). It is called
+ *  directly only for handling CEE_LEAVEs out of finally-protected try's.
+ */
+
+inline void Compiler::impEndTreeList(BasicBlock* block, GenTreePtr firstStmt, GenTreePtr lastStmt)
+{
+    assert(firstStmt->gtOper == GT_STMT);
+    assert(lastStmt->gtOper == GT_STMT);
+
+    /* Make the list circular, so that we can easily walk it backwards */
+
+    firstStmt->gtPrev = lastStmt;
+
+    /* Store the tree list in the basic block */
+
+    block->bbTreeList = firstStmt;
+
+    /* The block should not already be marked as imported */
+    assert((block->bbFlags & BBF_IMPORTED) == 0);
+
+    block->bbFlags |= BBF_IMPORTED;
+}
+
+/*****************************************************************************
+ *
+ *  Store the current tree list in the given basic block.
+ */
+
+inline void Compiler::impEndTreeList(BasicBlock* block)
+{
+    assert(impTreeList->gtOper == GT_BEG_STMTS);
+
+    GenTreePtr firstTree = impTreeList->gtNext;
+
+    if (!firstTree)
+    {
+        /* The block should not already be marked as imported */
+        assert((block->bbFlags & BBF_IMPORTED) == 0);
+
+        // Empty block. Just mark it as imported
+        block->bbFlags |= BBF_IMPORTED;
+    }
+    else
+    {
+        // Ignore the GT_BEG_STMTS
+        assert(firstTree->gtPrev == impTreeList);
+
+        impEndTreeList(block, firstTree, impTreeLast);
+    }
+
+#ifdef DEBUG
+    if (impLastILoffsStmt != nullptr)
+    {
+        impLastILoffsStmt->gtStmt.gtStmtLastILoffs = compIsForInlining() ? BAD_IL_OFFSET : impCurOpcOffs;
+        impLastILoffsStmt                          = nullptr;
+    }
+
+    impTreeList = impTreeLast = nullptr;
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Check that storing the given tree doesnt mess up the semantic order. Note
+ *  that this has only limited value as we can only check [0..chkLevel).
+ */
+
+inline void Compiler::impAppendStmtCheck(GenTreePtr stmt, unsigned chkLevel)
+{
+#ifndef DEBUG
+    return;
+#else
+    assert(stmt->gtOper == GT_STMT);
+
+    if (chkLevel == (unsigned)CHECK_SPILL_ALL)
+    {
+        chkLevel = verCurrentState.esStackDepth;
+    }
+
+    if (verCurrentState.esStackDepth == 0 || chkLevel == 0 || chkLevel == (unsigned)CHECK_SPILL_NONE)
+    {
+        return;
+    }
+
+    GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+
+    // Calls can only be appended if there are no GTF_GLOB_EFFECT on the stack
+
+    if (tree->gtFlags & GTF_CALL)
+    {
+        for (unsigned level = 0; level < chkLevel; level++)
+        {
+            assert((verCurrentState.esStack[level].val->gtFlags & GTF_GLOB_EFFECT) == 0);
+        }
+    }
+
+    if (tree->gtOper == GT_ASG)
+    {
+        // For an assignment to a local variable, all references of that
+        // variable have to be spilled. If it is aliased, all calls and
+        // indirect accesses have to be spilled
+
+        if (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)
+        {
+            unsigned lclNum = tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum;
+            for (unsigned level = 0; level < chkLevel; level++)
+            {
+                assert(!gtHasRef(verCurrentState.esStack[level].val, lclNum, false));
+                assert(!lvaTable[lclNum].lvAddrExposed ||
+                       (verCurrentState.esStack[level].val->gtFlags & GTF_SIDE_EFFECT) == 0);
+            }
+        }
+
+        // If the access may be to global memory, all side effects have to be spilled.
+
+        else if (tree->gtOp.gtOp1->gtFlags & GTF_GLOB_REF)
+        {
+            for (unsigned level = 0; level < chkLevel; level++)
+            {
+                assert((verCurrentState.esStack[level].val->gtFlags & GTF_GLOB_REF) == 0);
+            }
+        }
+    }
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Append the given GT_STMT node to the current block's tree list.
+ *  [0..chkLevel) is the portion of the stack which we will check for
+ *    interference with stmt and spill if needed.
+ */
+
+inline void Compiler::impAppendStmt(GenTreePtr stmt, unsigned chkLevel)
+{
+    assert(stmt->gtOper == GT_STMT);
+    noway_assert(impTreeLast != nullptr);
+
+    /* If the statement being appended has any side-effects, check the stack
+       to see if anything needs to be spilled to preserve correct ordering. */
+
+    GenTreePtr expr  = stmt->gtStmt.gtStmtExpr;
+    unsigned   flags = expr->gtFlags & GTF_GLOB_EFFECT;
+
+    // Assignment to (unaliased) locals don't count as a side-effect as
+    // we handle them specially using impSpillLclRefs(). Temp locals should
+    // be fine too.
+    // TODO-1stClassStructs: The check below should apply equally to struct assignments,
+    // but previously the block ops were always being marked GTF_GLOB_REF, even if
+    // the operands could not be global refs.
+
+    if ((expr->gtOper == GT_ASG) && (expr->gtOp.gtOp1->gtOper == GT_LCL_VAR) &&
+        !(expr->gtOp.gtOp1->gtFlags & GTF_GLOB_REF) && !gtHasLocalsWithAddrOp(expr->gtOp.gtOp2) &&
+        !varTypeIsStruct(expr->gtOp.gtOp1))
+    {
+        unsigned op2Flags = expr->gtOp.gtOp2->gtFlags & GTF_GLOB_EFFECT;
+        assert(flags == (op2Flags | GTF_ASG));
+        flags = op2Flags;
+    }
+
+    if (chkLevel == (unsigned)CHECK_SPILL_ALL)
+    {
+        chkLevel = verCurrentState.esStackDepth;
+    }
+
+    if (chkLevel && chkLevel != (unsigned)CHECK_SPILL_NONE)
+    {
+        assert(chkLevel <= verCurrentState.esStackDepth);
+
+        if (flags)
+        {
+            // If there is a call, we have to spill global refs
+            bool spillGlobEffects = (flags & GTF_CALL) ? true : false;
+
+            if (expr->gtOper == GT_ASG)
+            {
+                GenTree* lhs = expr->gtGetOp1();
+                // If we are assigning to a global ref, we have to spill global refs on stack.
+                // TODO-1stClassStructs: Previously, spillGlobEffects was set to true for
+                // GT_INITBLK and GT_COPYBLK, but this is overly conservative, and should be
+                // revisited. (Note that it was NOT set to true for GT_COPYOBJ.)
+                if (!expr->OperIsBlkOp())
+                {
+                    // If we are assigning to a global ref, we have to spill global refs on stack
+                    if ((lhs->gtFlags & GTF_GLOB_REF) != 0)
+                    {
+                        spillGlobEffects = true;
+                    }
+                }
+                else if ((lhs->OperIsBlk() && !lhs->AsBlk()->HasGCPtr()) ||
+                         ((lhs->OperGet() == GT_LCL_VAR) &&
+                          (lvaTable[lhs->AsLclVarCommon()->gtLclNum].lvStructGcCount == 0)))
+                {
+                    spillGlobEffects = true;
+                }
+            }
+
+            impSpillSideEffects(spillGlobEffects, chkLevel DEBUGARG("impAppendStmt"));
+        }
+        else
+        {
+            impSpillSpecialSideEff();
+        }
+    }
+
+    impAppendStmtCheck(stmt, chkLevel);
+
+    /* Point 'prev' at the previous node, so that we can walk backwards */
+
+    stmt->gtPrev = impTreeLast;
+
+    /* Append the expression statement to the list */
+
+    impTreeLast->gtNext = stmt;
+    impTreeLast         = stmt;
+
+#ifdef FEATURE_SIMD
+    impMarkContiguousSIMDFieldAssignments(stmt);
+#endif
+
+#ifdef DEBUGGING_SUPPORT
+
+    /* Once we set impCurStmtOffs in an appended tree, we are ready to
+       report the following offsets. So reset impCurStmtOffs */
+
+    if (impTreeLast->gtStmt.gtStmtILoffsx == impCurStmtOffs)
+    {
+        impCurStmtOffsSet(BAD_IL_OFFSET);
+    }
+
+#endif
+
+#ifdef DEBUG
+    if (impLastILoffsStmt == nullptr)
+    {
+        impLastILoffsStmt = stmt;
+    }
+
+    if (verbose)
+    {
+        printf("\n\n");
+        gtDispTree(stmt);
+    }
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Insert the given GT_STMT "stmt" before GT_STMT "stmtBefore"
+ */
+
+inline void Compiler::impInsertStmtBefore(GenTreePtr stmt, GenTreePtr stmtBefore)
+{
+    assert(stmt->gtOper == GT_STMT);
+    assert(stmtBefore->gtOper == GT_STMT);
+
+    GenTreePtr stmtPrev = stmtBefore->gtPrev;
+    stmt->gtPrev        = stmtPrev;
+    stmt->gtNext        = stmtBefore;
+    stmtPrev->gtNext    = stmt;
+    stmtBefore->gtPrev  = stmt;
+}
+
+/*****************************************************************************
+ *
+ *  Append the given expression tree to the current block's tree list.
+ *  Return the newly created statement.
+ */
+
+GenTreePtr Compiler::impAppendTree(GenTreePtr tree, unsigned chkLevel, IL_OFFSETX offset)
+{
+    assert(tree);
+
+    /* Allocate an 'expression statement' node */
+
+    GenTreePtr expr = gtNewStmt(tree, offset);
+
+    /* Append the statement to the current block's stmt list */
+
+    impAppendStmt(expr, chkLevel);
+
+    return expr;
+}
+
+/*****************************************************************************
+ *
+ *  Insert the given exression tree before GT_STMT "stmtBefore"
+ */
+
+void Compiler::impInsertTreeBefore(GenTreePtr tree, IL_OFFSETX offset, GenTreePtr stmtBefore)
+{
+    assert(stmtBefore->gtOper == GT_STMT);
+
+    /* Allocate an 'expression statement' node */
+
+    GenTreePtr expr = gtNewStmt(tree, offset);
+
+    /* Append the statement to the current block's stmt list */
+
+    impInsertStmtBefore(expr, stmtBefore);
+}
+
+/*****************************************************************************
+ *
+ *  Append an assignment of the given value to a temp to the current tree list.
+ *  curLevel is the stack level for which the spill to the temp is being done.
+ */
+
+void Compiler::impAssignTempGen(unsigned    tmp,
+                                GenTreePtr  val,
+                                unsigned    curLevel,
+                                GenTreePtr* pAfterStmt, /* = NULL */
+                                IL_OFFSETX  ilOffset,   /* = BAD_IL_OFFSET */
+                                BasicBlock* block       /* = NULL */
+                                )
+{
+    GenTreePtr asg = gtNewTempAssign(tmp, val);
+
+    if (!asg->IsNothingNode())
+    {
+        if (pAfterStmt)
+        {
+            GenTreePtr asgStmt = gtNewStmt(asg, ilOffset);
+            *pAfterStmt        = fgInsertStmtAfter(block, *pAfterStmt, asgStmt);
+        }
+        else
+        {
+            impAppendTree(asg, curLevel, impCurStmtOffs);
+        }
+    }
+}
+
+/*****************************************************************************
+ * same as above, but handle the valueclass case too
+ */
+
+void Compiler::impAssignTempGen(unsigned             tmpNum,
+                                GenTreePtr           val,
+                                CORINFO_CLASS_HANDLE structType,
+                                unsigned             curLevel,
+                                GenTreePtr*          pAfterStmt, /* = NULL */
+                                IL_OFFSETX           ilOffset,   /* = BAD_IL_OFFSET */
+                                BasicBlock*          block       /* = NULL */
+                                )
+{
+    GenTreePtr asg;
+
+    if (varTypeIsStruct(val))
+    {
+        assert(tmpNum < lvaCount);
+        assert(structType != NO_CLASS_HANDLE);
+
+        // if the method is non-verifiable the assert is not true
+        // so at least ignore it in the case when verification is turned on
+        // since any block that tries to use the temp would have failed verification.
+        var_types varType = lvaTable[tmpNum].lvType;
+        assert(tiVerificationNeeded || varType == TYP_UNDEF || varTypeIsStruct(varType));
+        lvaSetStruct(tmpNum, structType, false);
+
+        // Now, set the type of the struct value. Note that lvaSetStruct may modify the type
+        // of the lclVar to a specialized type (e.g. TYP_SIMD), based on the handle (structType)
+        // that has been passed in for the value being assigned to the temp, in which case we
+        // need to set 'val' to that same type.
+        // Note also that if we always normalized the types of any node that might be a struct
+        // type, this would not be necessary - but that requires additional JIT/EE interface
+        // calls that may not actually be required - e.g. if we only access a field of a struct.
+
+        val->gtType = lvaTable[tmpNum].lvType;
+
+        GenTreePtr dst = gtNewLclvNode(tmpNum, val->gtType);
+        asg            = impAssignStruct(dst, val, structType, curLevel, pAfterStmt, block);
+    }
+    else
+    {
+        asg = gtNewTempAssign(tmpNum, val);
+    }
+
+    if (!asg->IsNothingNode())
+    {
+        if (pAfterStmt)
+        {
+            GenTreePtr asgStmt = gtNewStmt(asg, ilOffset);
+            *pAfterStmt        = fgInsertStmtAfter(block, *pAfterStmt, asgStmt);
+        }
+        else
+        {
+            impAppendTree(asg, curLevel, impCurStmtOffs);
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Pop the given number of values from the stack and return a list node with
+ *  their values.
+ *  The 'prefixTree' argument may optionally contain an argument
+ *  list that is prepended to the list returned from this function.
+ *
+ *  The notion of prepended is a bit misleading in that the list is backwards
+ *  from the way I would expect: The first element popped is at the end of
+ *  the returned list, and prefixTree is 'before' that, meaning closer to
+ *  the end of the list.  To get to prefixTree, you have to walk to the
+ *  end of the list.
+ *
+ *  For ARG_ORDER_R2L prefixTree is only used to insert extra arguments, as
+ *  such we reverse its meaning such that returnValue has a reversed
+ *  prefixTree at the head of the list.
+ */
+
+GenTreeArgList* Compiler::impPopList(unsigned          count,
+                                     unsigned*         flagsPtr,
+                                     CORINFO_SIG_INFO* sig,
+                                     GenTreeArgList*   prefixTree)
+{
+    assert(sig == nullptr || count == sig->numArgs);
+
+    unsigned             flags = 0;
+    CORINFO_CLASS_HANDLE structType;
+    GenTreeArgList*      treeList;
+
+    if (Target::g_tgtArgOrder == Target::ARG_ORDER_R2L)
+    {
+        treeList = nullptr;
+    }
+    else
+    { // ARG_ORDER_L2R
+        treeList = prefixTree;
+    }
+
+    while (count--)
+    {
+        StackEntry se   = impPopStack();
+        typeInfo   ti   = se.seTypeInfo;
+        GenTreePtr temp = se.val;
+
+        if (varTypeIsStruct(temp))
+        {
+            // Morph trees that aren't already OBJs or MKREFANY to be OBJs
+            assert(ti.IsType(TI_STRUCT));
+            structType = ti.GetClassHandleForValueClass();
+            temp       = impNormStructVal(temp, structType, (unsigned)CHECK_SPILL_ALL);
+        }
+
+        /* NOTE: we defer bashing the type for I_IMPL to fgMorphArgs */
+        flags |= temp->gtFlags;
+        treeList = gtNewListNode(temp, treeList);
+    }
+
+    *flagsPtr = flags;
+
+    if (sig != nullptr)
+    {
+        if (sig->retTypeSigClass != nullptr && sig->retType != CORINFO_TYPE_CLASS &&
+            sig->retType != CORINFO_TYPE_BYREF && sig->retType != CORINFO_TYPE_PTR && sig->retType != CORINFO_TYPE_VAR)
+        {
+            // Make sure that all valuetypes (including enums) that we push are loaded.
+            // This is to guarantee that if a GC is triggerred from the prestub of this methods,
+            // all valuetypes in the method signature are already loaded.
+            // We need to be able to find the size of the valuetypes, but we cannot
+            // do a class-load from within GC.
+            info.compCompHnd->classMustBeLoadedBeforeCodeIsRun(sig->retTypeSigClass);
+        }
+
+        CORINFO_ARG_LIST_HANDLE argLst = sig->args;
+        CORINFO_CLASS_HANDLE    argClass;
+        CORINFO_CLASS_HANDLE    argRealClass;
+        GenTreeArgList*         args;
+        unsigned                sigSize;
+
+        for (args = treeList, count = sig->numArgs; count > 0; args = args->Rest(), count--)
+        {
+            PREFIX_ASSUME(args != nullptr);
+
+            CorInfoType corType = strip(info.compCompHnd->getArgType(sig, argLst, &argClass));
+
+            // insert implied casts (from float to double or double to float)
+
+            if (corType == CORINFO_TYPE_DOUBLE && args->Current()->TypeGet() == TYP_FLOAT)
+            {
+                args->Current() = gtNewCastNode(TYP_DOUBLE, args->Current(), TYP_DOUBLE);
+            }
+            else if (corType == CORINFO_TYPE_FLOAT && args->Current()->TypeGet() == TYP_DOUBLE)
+            {
+                args->Current() = gtNewCastNode(TYP_FLOAT, args->Current(), TYP_FLOAT);
+            }
+
+            // insert any widening or narrowing casts for backwards compatibility
+
+            args->Current() = impImplicitIorI4Cast(args->Current(), JITtype2varType(corType));
+
+            if (corType != CORINFO_TYPE_CLASS && corType != CORINFO_TYPE_BYREF && corType != CORINFO_TYPE_PTR &&
+                corType != CORINFO_TYPE_VAR && (argRealClass = info.compCompHnd->getArgClass(sig, argLst)) != nullptr)
+            {
+                // Everett MC++ could generate IL with a mismatched valuetypes. It used to work with Everett JIT,
+                // but it stopped working in Whidbey when we have started passing simple valuetypes as underlying
+                // primitive types.
+                // We will try to adjust for this case here to avoid breaking customers code (see VSW 485789 for
+                // details).
+                if (corType == CORINFO_TYPE_VALUECLASS && !varTypeIsStruct(args->Current()))
+                {
+                    args->Current() = impNormStructVal(args->Current(), argRealClass, (unsigned)CHECK_SPILL_ALL, true);
+                }
+
+                // Make sure that all valuetypes (including enums) that we push are loaded.
+                // This is to guarantee that if a GC is triggered from the prestub of this methods,
+                // all valuetypes in the method signature are already loaded.
+                // We need to be able to find the size of the valuetypes, but we cannot
+                // do a class-load from within GC.
+                info.compCompHnd->classMustBeLoadedBeforeCodeIsRun(argRealClass);
+            }
+
+            argLst = info.compCompHnd->getArgNext(argLst);
+        }
+    }
+
+    if (Target::g_tgtArgOrder == Target::ARG_ORDER_R2L)
+    {
+        // Prepend the prefixTree
+
+        // Simple in-place reversal to place treeList
+        // at the end of a reversed prefixTree
+        while (prefixTree != nullptr)
+        {
+            GenTreeArgList* next = prefixTree->Rest();
+            prefixTree->Rest()   = treeList;
+            treeList             = prefixTree;
+            prefixTree           = next;
+        }
+    }
+    return treeList;
+}
+
+/*****************************************************************************
+ *
+ *  Pop the given number of values from the stack in reverse order (STDCALL/CDECL etc.)
+ *  The first "skipReverseCount" items are not reversed.
+ */
+
+GenTreeArgList* Compiler::impPopRevList(unsigned          count,
+                                        unsigned*         flagsPtr,
+                                        CORINFO_SIG_INFO* sig,
+                                        unsigned          skipReverseCount)
+
+{
+    assert(skipReverseCount <= count);
+
+    GenTreeArgList* list = impPopList(count, flagsPtr, sig);
+
+    // reverse the list
+    if (list == nullptr || skipReverseCount == count)
+    {
+        return list;
+    }
+
+    GenTreeArgList* ptr          = nullptr; // Initialized to the first node that needs to be reversed
+    GenTreeArgList* lastSkipNode = nullptr; // Will be set to the last node that does not need to be reversed
+
+    if (skipReverseCount == 0)
+    {
+        ptr = list;
+    }
+    else
+    {
+        lastSkipNode = list;
+        // Get to the first node that needs to be reversed
+        for (unsigned i = 0; i < skipReverseCount - 1; i++)
+        {
+            lastSkipNode = lastSkipNode->Rest();
+        }
+
+        PREFIX_ASSUME(lastSkipNode != nullptr);
+        ptr = lastSkipNode->Rest();
+    }
+
+    GenTreeArgList* reversedList = nullptr;
+
+    do
+    {
+        GenTreeArgList* tmp = ptr->Rest();
+        ptr->Rest()         = reversedList;
+        reversedList        = ptr;
+        ptr                 = tmp;
+    } while (ptr != nullptr);
+
+    if (skipReverseCount)
+    {
+        lastSkipNode->Rest() = reversedList;
+        return list;
+    }
+    else
+    {
+        return reversedList;
+    }
+}
+
+/*****************************************************************************
+   Assign (copy) the structure from 'src' to 'dest'.  The structure is a value
+   class of type 'clsHnd'.  It returns the tree that should be appended to the
+   statement list that represents the assignment.
+   Temp assignments may be appended to impTreeList if spilling is necessary.
+   curLevel is the stack level for which a spill may be being done.
+ */
+
+GenTreePtr Compiler::impAssignStruct(GenTreePtr           dest,
+                                     GenTreePtr           src,
+                                     CORINFO_CLASS_HANDLE structHnd,
+                                     unsigned             curLevel,
+                                     GenTreePtr*          pAfterStmt, /* = NULL */
+                                     BasicBlock*          block       /* = NULL */
+                                     )
+{
+    assert(varTypeIsStruct(dest));
+
+    while (dest->gtOper == GT_COMMA)
+    {
+        assert(varTypeIsStruct(dest->gtOp.gtOp2)); // Second thing is the struct
+
+        // Append all the op1 of GT_COMMA trees before we evaluate op2 of the GT_COMMA tree.
+        if (pAfterStmt)
+        {
+            *pAfterStmt = fgInsertStmtAfter(block, *pAfterStmt, gtNewStmt(dest->gtOp.gtOp1, impCurStmtOffs));
+        }
+        else
+        {
+            impAppendTree(dest->gtOp.gtOp1, curLevel, impCurStmtOffs); // do the side effect
+        }
+
+        // set dest to the second thing
+        dest = dest->gtOp.gtOp2;
+    }
+
+    assert(dest->gtOper == GT_LCL_VAR || dest->gtOper == GT_RETURN || dest->gtOper == GT_FIELD ||
+           dest->gtOper == GT_IND || dest->gtOper == GT_OBJ || dest->gtOper == GT_INDEX);
+
+    if (dest->OperGet() == GT_LCL_VAR && src->OperGet() == GT_LCL_VAR &&
+        src->gtLclVarCommon.gtLclNum == dest->gtLclVarCommon.gtLclNum)
+    {
+        // Make this a NOP
+        return gtNewNothingNode();
+    }
+
+    // TODO-1stClassStructs: Avoid creating an address if it is not needed,
+    // or re-creating a Blk node if it is.
+    GenTreePtr destAddr;
+
+    if (dest->gtOper == GT_IND || dest->OperIsBlk())
+    {
+        destAddr = dest->gtOp.gtOp1;
+    }
+    else
+    {
+        destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
+    }
+
+    return (impAssignStructPtr(destAddr, src, structHnd, curLevel, pAfterStmt, block));
+}
+
+/*****************************************************************************/
+
+GenTreePtr Compiler::impAssignStructPtr(GenTreePtr           destAddr,
+                                        GenTreePtr           src,
+                                        CORINFO_CLASS_HANDLE structHnd,
+                                        unsigned             curLevel,
+                                        GenTreePtr*          pAfterStmt, /* = NULL */
+                                        BasicBlock*          block       /* = NULL */
+                                        )
+{
+    var_types  destType;
+    GenTreePtr dest      = nullptr;
+    unsigned   destFlags = 0;
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+    assert(varTypeIsStruct(src) || (src->gtOper == GT_ADDR && src->TypeGet() == TYP_BYREF));
+    // TODO-ARM-BUG: Does ARM need this?
+    // TODO-ARM64-BUG: Does ARM64 need this?
+    assert(src->gtOper == GT_LCL_VAR || src->gtOper == GT_FIELD || src->gtOper == GT_IND || src->gtOper == GT_OBJ ||
+           src->gtOper == GT_CALL || src->gtOper == GT_MKREFANY || src->gtOper == GT_RET_EXPR ||
+           src->gtOper == GT_COMMA || src->gtOper == GT_ADDR ||
+           (src->TypeGet() != TYP_STRUCT && (GenTree::OperIsSIMD(src->gtOper) || src->gtOper == GT_LCL_FLD)));
+#else  // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+    assert(varTypeIsStruct(src));
+
+    assert(src->gtOper == GT_LCL_VAR || src->gtOper == GT_FIELD || src->gtOper == GT_IND || src->gtOper == GT_OBJ ||
+           src->gtOper == GT_CALL || src->gtOper == GT_MKREFANY || src->gtOper == GT_RET_EXPR ||
+           src->gtOper == GT_COMMA ||
+           (src->TypeGet() != TYP_STRUCT && (GenTree::OperIsSIMD(src->gtOper) || src->gtOper == GT_LCL_FLD)));
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+    if (destAddr->OperGet() == GT_ADDR)
+    {
+        GenTree* destNode = destAddr->gtGetOp1();
+        // If the actual destination is already a block node, or is a node that
+        // will be morphed, don't insert an OBJ(ADDR).
+        if (destNode->gtOper == GT_INDEX || destNode->OperIsBlk())
+        {
+            dest = destNode;
+        }
+        destType = destNode->TypeGet();
+    }
+    else
+    {
+        destType = src->TypeGet();
+    }
+
+    var_types asgType = src->TypeGet();
+
+    if (src->gtOper == GT_CALL)
+    {
+        if (src->AsCall()->TreatAsHasRetBufArg(this))
+        {
+            // Case of call returning a struct via hidden retbuf arg
+
+            // insert the return value buffer into the argument list as first byref parameter
+            src->gtCall.gtCallArgs = gtNewListNode(destAddr, src->gtCall.gtCallArgs);
+
+            // now returns void, not a struct
+            src->gtType = TYP_VOID;
+
+            // return the morphed call node
+            return src;
+        }
+        else
+        {
+            // Case of call returning a struct in one or more registers.
+
+            var_types returnType = (var_types)src->gtCall.gtReturnType;
+
+            // We won't use a return buffer, so change the type of src->gtType to 'returnType'
+            src->gtType = genActualType(returnType);
+
+            // First we try to change this to "LclVar/LclFld = call"
+            //
+            if ((destAddr->gtOper == GT_ADDR) && (destAddr->gtOp.gtOp1->gtOper == GT_LCL_VAR))
+            {
+                // If it is a multi-reg struct return, don't change the oper to GT_LCL_FLD.
+                // That is, the IR will be of the form lclVar = call for multi-reg return
+                //
+                GenTreePtr lcl = destAddr->gtOp.gtOp1;
+                if (src->AsCall()->HasMultiRegRetVal())
+                {
+                    // Mark the struct LclVar as used in a MultiReg return context
+                    //  which currently makes it non promotable.
+                    lvaTable[lcl->gtLclVarCommon.gtLclNum].lvIsMultiRegRet = true;
+                }
+                else // The call result is not a multireg return
+                {
+                    // We change this to a GT_LCL_FLD (from a GT_ADDR of a GT_LCL_VAR)
+                    lcl->ChangeOper(GT_LCL_FLD);
+                    fgLclFldAssign(lcl->gtLclVarCommon.gtLclNum);
+                }
+
+                lcl->gtType = src->gtType;
+                asgType     = src->gtType;
+                dest        = lcl;
+
+#if defined(_TARGET_ARM_)
+                impMarkLclDstNotPromotable(lcl->gtLclVarCommon.gtLclNum, src, structHnd);
+#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+                // Not allowed for FEATURE_CORCLR which is the only SKU available for System V OSs.
+                assert(!src->gtCall.IsVarargs() && "varargs not allowed for System V OSs.");
+
+                // Make the struct non promotable. The eightbytes could contain multiple fields.
+                lvaTable[lcl->gtLclVarCommon.gtLclNum].lvIsMultiRegRet = true;
+#endif
+            }
+            else // we don't have a GT_ADDR of a GT_LCL_VAR
+            {
+                // !!! The destination could be on stack. !!!
+                // This flag will let us choose the correct write barrier.
+                asgType   = returnType;
+                destFlags = GTF_IND_TGTANYWHERE;
+            }
+        }
+    }
+    else if (src->gtOper == GT_RET_EXPR)
+    {
+        GenTreePtr call = src->gtRetExpr.gtInlineCandidate;
+        noway_assert(call->gtOper == GT_CALL);
+
+        if (call->AsCall()->HasRetBufArg())
+        {
+            // insert the return value buffer into the argument list as first byref parameter
+            call->gtCall.gtCallArgs = gtNewListNode(destAddr, call->gtCall.gtCallArgs);
+
+            // now returns void, not a struct
+            src->gtType  = TYP_VOID;
+            call->gtType = TYP_VOID;
+
+            // We already have appended the write to 'dest' GT_CALL's args
+            // So now we just return an empty node (pruning the GT_RET_EXPR)
+            return src;
+        }
+        else
+        {
+            // Case of inline method returning a struct in one or more registers.
+            //
+            var_types returnType = (var_types)call->gtCall.gtReturnType;
+
+            // We won't need a return buffer
+            asgType      = returnType;
+            src->gtType  = genActualType(returnType);
+            call->gtType = src->gtType;
+
+            // 1stClassStructToDo: We shouldn't necessarily need this.
+            if (dest != nullptr)
+            {
+                dest = gtNewOperNode(GT_IND, returnType, gtNewOperNode(GT_ADDR, TYP_BYREF, dest));
+            }
+
+            // !!! The destination could be on stack. !!!
+            // This flag will let us choose the correct write barrier.
+            destFlags = GTF_IND_TGTANYWHERE;
+        }
+    }
+    else if (src->OperIsBlk())
+    {
+        asgType = impNormStructType(structHnd);
+        if (src->gtOper == GT_OBJ)
+        {
+            assert(src->gtObj.gtClass == structHnd);
+        }
+    }
+    else if (src->gtOper == GT_INDEX)
+    {
+        asgType = impNormStructType(structHnd);
+        assert(src->gtIndex.gtStructElemClass == structHnd);
+    }
+    else if (src->gtOper == GT_MKREFANY)
+    {
+        // Since we are assigning the result of a GT_MKREFANY,
+        // "destAddr" must point to a refany.
+
+        GenTreePtr destAddrClone;
+        destAddr =
+            impCloneExpr(destAddr, &destAddrClone, structHnd, curLevel, pAfterStmt DEBUGARG("MKREFANY assignment"));
+
+        assert(offsetof(CORINFO_RefAny, dataPtr) == 0);
+        assert(destAddr->gtType == TYP_I_IMPL || destAddr->gtType == TYP_BYREF);
+        GetZeroOffsetFieldMap()->Set(destAddr, GetFieldSeqStore()->CreateSingleton(GetRefanyDataField()));
+        GenTreePtr     ptrSlot         = gtNewOperNode(GT_IND, TYP_I_IMPL, destAddr);
+        GenTreeIntCon* typeFieldOffset = gtNewIconNode(offsetof(CORINFO_RefAny, type), TYP_I_IMPL);
+        typeFieldOffset->gtFieldSeq    = GetFieldSeqStore()->CreateSingleton(GetRefanyTypeField());
+        GenTreePtr typeSlot =
+            gtNewOperNode(GT_IND, TYP_I_IMPL, gtNewOperNode(GT_ADD, destAddr->gtType, destAddrClone, typeFieldOffset));
+
+        // append the assign of the pointer value
+        GenTreePtr asg = gtNewAssignNode(ptrSlot, src->gtOp.gtOp1);
+        if (pAfterStmt)
+        {
+            *pAfterStmt = fgInsertStmtAfter(block, *pAfterStmt, gtNewStmt(asg, impCurStmtOffs));
+        }
+        else
+        {
+            impAppendTree(asg, curLevel, impCurStmtOffs);
+        }
+
+        // return the assign of the type value, to be appended
+        return gtNewAssignNode(typeSlot, src->gtOp.gtOp2);
+    }
+    else if (src->gtOper == GT_COMMA)
+    {
+        // The second thing is the struct or its address.
+        assert(varTypeIsStruct(src->gtOp.gtOp2) || src->gtOp.gtOp2->gtType == TYP_BYREF);
+        if (pAfterStmt)
+        {
+            *pAfterStmt = fgInsertStmtAfter(block, *pAfterStmt, gtNewStmt(src->gtOp.gtOp1, impCurStmtOffs));
+        }
+        else
+        {
+            impAppendTree(src->gtOp.gtOp1, curLevel, impCurStmtOffs); // do the side effect
+        }
+
+        // Evaluate the second thing using recursion.
+        return impAssignStructPtr(destAddr, src->gtOp.gtOp2, structHnd, curLevel, pAfterStmt, block);
+    }
+    else if (src->IsLocal())
+    {
+        // TODO-1stClassStructs: Eliminate this; it is only here to minimize diffs in the
+        // initial implementation. Previously the source would have been under a GT_ADDR, which
+        // would cause it to be marked GTF_DONT_CSE.
+        asgType = src->TypeGet();
+        src->gtFlags |= GTF_DONT_CSE;
+        if (asgType == TYP_STRUCT)
+        {
+            GenTree* srcAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, src);
+            src              = gtNewOperNode(GT_IND, TYP_STRUCT, srcAddr);
+        }
+    }
+    else if (asgType == TYP_STRUCT)
+    {
+        asgType     = impNormStructType(structHnd);
+        src->gtType = asgType;
+    }
+    if (dest == nullptr)
+    {
+        // TODO-1stClassStructs: We shouldn't really need a block node as the destination
+        // if this is a known struct type.
+        if (asgType == TYP_STRUCT)
+        {
+            dest = gtNewObjNode(structHnd, destAddr);
+            gtSetObjGcInfo(dest->AsObj());
+            // Although an obj as a call argument was always assumed to be a globRef
+            // (which is itself overly conservative), that is not true of the operands
+            // of a block assignment.
+            dest->gtFlags &= ~GTF_GLOB_REF;
+            dest->gtFlags |= (destAddr->gtFlags & GTF_GLOB_REF);
+        }
+        else if (varTypeIsStruct(asgType))
+        {
+            dest = new (this, GT_BLK) GenTreeBlk(GT_BLK, asgType, destAddr, genTypeSize(asgType));
+        }
+        else
+        {
+            dest = gtNewOperNode(GT_IND, asgType, destAddr);
+        }
+    }
+    else
+    {
+        dest->gtType = asgType;
+    }
+
+    dest->gtFlags |= destFlags;
+    destFlags = dest->gtFlags;
+
+    // return an assignment node, to be appended
+    GenTree* asgNode = gtNewAssignNode(dest, src);
+    gtBlockOpInit(asgNode, dest, src, false);
+
+    // TODO-1stClassStructs: Clean up the settings of GTF_DONT_CSE on the lhs
+    // of assignments.
+    if ((destFlags & GTF_DONT_CSE) == 0)
+    {
+        dest->gtFlags &= ~(GTF_DONT_CSE);
+    }
+    return asgNode;
+}
+
+/*****************************************************************************
+   Given a struct value, and the class handle for that structure, return
+   the expression for the address for that structure value.
+
+   willDeref - does the caller guarantee to dereference the pointer.
+*/
+
+GenTreePtr Compiler::impGetStructAddr(GenTreePtr           structVal,
+                                      CORINFO_CLASS_HANDLE structHnd,
+                                      unsigned             curLevel,
+                                      bool                 willDeref)
+{
+    assert(varTypeIsStruct(structVal) || eeIsValueClass(structHnd));
+
+    var_types type = structVal->TypeGet();
+
+    genTreeOps oper = structVal->gtOper;
+
+    if (oper == GT_OBJ && willDeref)
+    {
+        assert(structVal->gtObj.gtClass == structHnd);
+        return (structVal->gtObj.Addr());
+    }
+    else if (oper == GT_CALL || oper == GT_RET_EXPR || oper == GT_OBJ || oper == GT_MKREFANY)
+    {
+        unsigned tmpNum = lvaGrabTemp(true DEBUGARG("struct address for call/obj"));
+
+        impAssignTempGen(tmpNum, structVal, structHnd, curLevel);
+
+        // The 'return value' is now the temp itself
+
+        type            = genActualType(lvaTable[tmpNum].TypeGet());
+        GenTreePtr temp = gtNewLclvNode(tmpNum, type);
+        temp            = gtNewOperNode(GT_ADDR, TYP_BYREF, temp);
+        return temp;
+    }
+    else if (oper == GT_COMMA)
+    {
+        assert(structVal->gtOp.gtOp2->gtType == type); // Second thing is the struct
+
+        GenTreePtr oldTreeLast = impTreeLast;
+        structVal->gtOp.gtOp2  = impGetStructAddr(structVal->gtOp.gtOp2, structHnd, curLevel, willDeref);
+        structVal->gtType      = TYP_BYREF;
+
+        if (oldTreeLast != impTreeLast)
+        {
+            // Some temp assignment statement was placed on the statement list
+            // for Op2, but that would be out of order with op1, so we need to
+            // spill op1 onto the statement list after whatever was last
+            // before we recursed on Op2 (i.e. before whatever Op2 appended).
+            impInsertTreeBefore(structVal->gtOp.gtOp1, impCurStmtOffs, oldTreeLast->gtNext);
+            structVal->gtOp.gtOp1 = gtNewNothingNode();
+        }
+
+        return (structVal);
+    }
+
+    return (gtNewOperNode(GT_ADDR, TYP_BYREF, structVal));
+}
+
+//------------------------------------------------------------------------
+// impNormStructType: Given a (known to be) struct class handle structHnd, normalize its type,
+//                    and optionally determine the GC layout of the struct.
+//
+// Arguments:
+//    structHnd       - The class handle for the struct type of interest.
+//    gcLayout        - (optional, default nullptr) - a BYTE pointer, allocated by the caller,
+//                      into which the gcLayout will be written.
+//    pNumGCVars      - (optional, default nullptr) - if non-null, a pointer to an unsigned,
+//                      which will be set to the number of GC fields in the struct.
+//
+// Return Value:
+//    The JIT type for the struct (e.g. TYP_STRUCT, or TYP_SIMD*).
+//    The gcLayout will be returned using the pointers provided by the caller, if non-null.
+//    It may also modify the compFloatingPointUsed flag if the type is a SIMD type.
+//
+// Assumptions:
+//    The caller must set gcLayout to nullptr OR ensure that it is large enough
+//    (see ICorStaticInfo::getClassGClayout in corinfo.h).
+//
+// Notes:
+//    Normalizing the type involves examining the struct type to determine if it should
+//    be modified to one that is handled specially by the JIT, possibly being a candidate
+//    for full enregistration, e.g. TYP_SIMD16.
+
+var_types Compiler::impNormStructType(CORINFO_CLASS_HANDLE structHnd,
+                                      BYTE*                gcLayout,
+                                      unsigned*            pNumGCVars,
+                                      var_types*           pSimdBaseType)
+{
+    assert(structHnd != NO_CLASS_HANDLE);
+    unsigned  originalSize        = info.compCompHnd->getClassSize(structHnd);
+    unsigned  numGCVars           = 0;
+    var_types structType          = TYP_STRUCT;
+    var_types simdBaseType        = TYP_UNKNOWN;
+    bool      definitelyHasGCPtrs = false;
+
+#ifdef FEATURE_SIMD
+    // We don't want to consider this as a possible SIMD type if it has GC pointers.
+    // (Saves querying about the SIMD assembly.)
+    BYTE gcBytes[maxPossibleSIMDStructBytes / TARGET_POINTER_SIZE];
+    if ((gcLayout == nullptr) && (originalSize >= minSIMDStructBytes()) && (originalSize <= maxSIMDStructBytes()))
+    {
+        gcLayout = gcBytes;
+    }
+#endif // FEATURE_SIMD
+
+    if (gcLayout != nullptr)
+    {
+        numGCVars           = info.compCompHnd->getClassGClayout(structHnd, gcLayout);
+        definitelyHasGCPtrs = (numGCVars != 0);
+    }
+#ifdef FEATURE_SIMD
+    // Check to see if this is a SIMD type.
+    if (featureSIMD && (originalSize <= getSIMDVectorRegisterByteLength()) && (originalSize >= TARGET_POINTER_SIZE) &&
+        !definitelyHasGCPtrs)
+    {
+        unsigned int sizeBytes;
+        simdBaseType = getBaseTypeAndSizeOfSIMDType(structHnd, &sizeBytes);
+        if (simdBaseType != TYP_UNKNOWN)
+        {
+            assert(sizeBytes == originalSize);
+            structType = getSIMDTypeForSize(sizeBytes);
+            if (pSimdBaseType != nullptr)
+            {
+                *pSimdBaseType = simdBaseType;
+            }
+#ifdef _TARGET_AMD64_
+            // Amd64: also indicate that we use floating point registers
+            compFloatingPointUsed = true;
+#endif
+        }
+    }
+#endif // FEATURE_SIMD
+    if (pNumGCVars != nullptr)
+    {
+        *pNumGCVars = numGCVars;
+    }
+    return structType;
+}
+
+//****************************************************************************
+//  Given TYP_STRUCT value 'structVal', make sure it is 'canonical', that is
+//  it is either an OBJ or a MKREFANY node, or a node (e.g. GT_INDEX) that will be morphed.
+//
+GenTreePtr Compiler::impNormStructVal(GenTreePtr           structVal,
+                                      CORINFO_CLASS_HANDLE structHnd,
+                                      unsigned             curLevel,
+                                      bool                 forceNormalization /*=false*/)
+{
+    assert(forceNormalization || varTypeIsStruct(structVal));
+    assert(structHnd != NO_CLASS_HANDLE);
+    var_types structType = structVal->TypeGet();
+    bool      makeTemp   = false;
+    if (structType == TYP_STRUCT)
+    {
+        structType = impNormStructType(structHnd);
+    }
+    bool                 alreadyNormalized = false;
+    GenTreeLclVarCommon* structLcl         = nullptr;
+
+    genTreeOps oper = structVal->OperGet();
+    switch (oper)
+    {
+        // GT_RETURN and GT_MKREFANY don't capture the handle.
+        case GT_RETURN:
+            break;
+        case GT_MKREFANY:
+            alreadyNormalized = true;
+            break;
+
+        case GT_CALL:
+            structVal->gtCall.gtRetClsHnd = structHnd;
+            makeTemp                      = true;
+            break;
+
+        case GT_RET_EXPR:
+            structVal->gtRetExpr.gtRetClsHnd = structHnd;
+            makeTemp                         = true;
+            break;
+
+        case GT_ARGPLACE:
+            structVal->gtArgPlace.gtArgPlaceClsHnd = structHnd;
+            break;
+
+        case GT_INDEX:
+            // This will be transformed to an OBJ later.
+            alreadyNormalized                    = true;
+            structVal->gtIndex.gtStructElemClass = structHnd;
+            structVal->gtIndex.gtIndElemSize     = info.compCompHnd->getClassSize(structHnd);
+            break;
+
+        case GT_FIELD:
+            // Wrap it in a GT_OBJ.
+            structVal->gtType = structType;
+            structVal         = gtNewObjNode(structHnd, gtNewOperNode(GT_ADDR, TYP_BYREF, structVal));
+            break;
+
+        case GT_LCL_VAR:
+        case GT_LCL_FLD:
+            structLcl = structVal->AsLclVarCommon();
+            // Wrap it in a GT_OBJ.
+            structVal = gtNewObjNode(structHnd, gtNewOperNode(GT_ADDR, TYP_BYREF, structVal));
+            __fallthrough;
+
+        case GT_OBJ:
+        case GT_BLK:
+        case GT_DYN_BLK:
+        case GT_ASG:
+            // These should already have the appropriate type.
+            assert(structVal->gtType == structType);
+            alreadyNormalized = true;
+            break;
+
+        case GT_IND:
+            assert(structVal->gtType == structType);
+            structVal         = gtNewObjNode(structHnd, structVal->gtGetOp1());
+            alreadyNormalized = true;
+            break;
+
+#ifdef FEATURE_SIMD
+        case GT_SIMD:
+            assert(varTypeIsSIMD(structVal) && (structVal->gtType == structType));
+            break;
+#endif // FEATURE_SIMD
+
+        case GT_COMMA:
+        {
+            // The second thing is the block node.
+            GenTree* blockNode = structVal->gtOp.gtOp2;
+            assert(blockNode->gtType == structType);
+            // It had better be a block node - any others should not occur here.
+            assert(blockNode->OperIsBlk());
+
+            // Sink the GT_COMMA below the blockNode addr.
+            GenTree* blockNodeAddr = blockNode->gtOp.gtOp1;
+            assert(blockNodeAddr->gtType == TYP_BYREF);
+            GenTree* commaNode    = structVal;
+            commaNode->gtType     = TYP_BYREF;
+            commaNode->gtOp.gtOp2 = blockNodeAddr;
+            blockNode->gtOp.gtOp1 = commaNode;
+            structVal             = blockNode;
+            alreadyNormalized     = true;
+        }
+        break;
+
+        default:
+            assert(!"Unexpected node in impNormStructVal()");
+            break;
+    }
+    structVal->gtType  = structType;
+    GenTree* structObj = structVal;
+
+    if (!alreadyNormalized || forceNormalization)
+    {
+        if (makeTemp)
+        {
+            unsigned tmpNum = lvaGrabTemp(true DEBUGARG("struct address for call/obj"));
+
+            impAssignTempGen(tmpNum, structVal, structHnd, curLevel);
+
+            // The structVal is now the temp itself
+
+            structLcl = gtNewLclvNode(tmpNum, structType)->AsLclVarCommon();
+            // TODO-1stClassStructs: Avoid always wrapping in GT_OBJ.
+            structObj = gtNewObjNode(structHnd, gtNewOperNode(GT_ADDR, TYP_BYREF, structLcl));
+        }
+        else if (varTypeIsStruct(structType) && !structVal->OperIsBlk())
+        {
+            // Wrap it in a GT_OBJ
+            structObj = gtNewObjNode(structHnd, gtNewOperNode(GT_ADDR, TYP_BYREF, structVal));
+        }
+    }
+
+    if (structLcl != nullptr)
+    {
+        // A OBJ on a ADDR(LCL_VAR) can never raise an exception
+        // so we don't set GTF_EXCEPT here.
+        if (!lvaIsImplicitByRefLocal(structLcl->gtLclNum))
+        {
+            structObj->gtFlags &= ~GTF_GLOB_REF;
+        }
+    }
+    else
+    {
+        // In general a OBJ is an indirection and could raise an exception.
+        structObj->gtFlags |= GTF_EXCEPT;
+    }
+    return (structObj);
+}
+
+/******************************************************************************/
+// Given a type token, generate code that will evaluate to the correct
+// handle representation of that token (type handle, field handle, or method handle)
+//
+// For most cases, the handle is determined at compile-time, and the code
+// generated is simply an embedded handle.
+//
+// Run-time lookup is required if the enclosing method is shared between instantiations
+// and the token refers to formal type parameters whose instantiation is not known
+// at compile-time.
+//
+GenTreePtr Compiler::impTokenToHandle(CORINFO_RESOLVED_TOKEN* pResolvedToken,
+                                      BOOL*                   pRuntimeLookup /* = NULL */,
+                                      BOOL                    mustRestoreHandle /* = FALSE */,
+                                      BOOL                    importParent /* = FALSE */)
+{
+    assert(!fgGlobalMorph);
+
+    CORINFO_GENERICHANDLE_RESULT embedInfo;
+    info.compCompHnd->embedGenericHandle(pResolvedToken, importParent, &embedInfo);
+
+    if (pRuntimeLookup)
+    {
+        *pRuntimeLookup = embedInfo.lookup.lookupKind.needsRuntimeLookup;
+    }
+
+    if (mustRestoreHandle && !embedInfo.lookup.lookupKind.needsRuntimeLookup)
+    {
+        switch (embedInfo.handleType)
+        {
+            case CORINFO_HANDLETYPE_CLASS:
+                info.compCompHnd->classMustBeLoadedBeforeCodeIsRun((CORINFO_CLASS_HANDLE)embedInfo.compileTimeHandle);
+                break;
+
+            case CORINFO_HANDLETYPE_METHOD:
+                info.compCompHnd->methodMustBeLoadedBeforeCodeIsRun((CORINFO_METHOD_HANDLE)embedInfo.compileTimeHandle);
+                break;
+
+            case CORINFO_HANDLETYPE_FIELD:
+                info.compCompHnd->classMustBeLoadedBeforeCodeIsRun(
+                    info.compCompHnd->getFieldClass((CORINFO_FIELD_HANDLE)embedInfo.compileTimeHandle));
+                break;
+
+            default:
+                break;
+        }
+    }
+
+    return impLookupToTree(pResolvedToken, &embedInfo.lookup, gtTokenToIconFlags(pResolvedToken->token),
+                           embedInfo.compileTimeHandle);
+}
+
+GenTreePtr Compiler::impLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken,
+                                     CORINFO_LOOKUP*         pLookup,
+                                     unsigned                handleFlags,
+                                     void*                   compileTimeHandle)
+{
+    if (!pLookup->lookupKind.needsRuntimeLookup)
+    {
+        // No runtime lookup is required.
+        // Access is direct or memory-indirect (of a fixed address) reference
+
+        CORINFO_GENERIC_HANDLE handle       = nullptr;
+        void*                  pIndirection = nullptr;
+        assert(pLookup->constLookup.accessType != IAT_PPVALUE);
+
+        if (pLookup->constLookup.accessType == IAT_VALUE)
+        {
+            handle = pLookup->constLookup.handle;
+        }
+        else if (pLookup->constLookup.accessType == IAT_PVALUE)
+        {
+            pIndirection = pLookup->constLookup.addr;
+        }
+        return gtNewIconEmbHndNode(handle, pIndirection, handleFlags, 0, nullptr, compileTimeHandle);
+    }
+    else if (compIsForInlining())
+    {
+        // Don't import runtime lookups when inlining
+        // Inlining has to be aborted in such a case
+        compInlineResult->NoteFatal(InlineObservation::CALLSITE_GENERIC_DICTIONARY_LOOKUP);
+        return nullptr;
+    }
+    else
+    {
+        // Need to use dictionary-based access which depends on the typeContext
+        // which is only available at runtime, not at compile-time.
+
+        return impRuntimeLookupToTree(pResolvedToken, pLookup, compileTimeHandle);
+    }
+}
+
+#ifdef FEATURE_READYTORUN_COMPILER
+GenTreePtr Compiler::impReadyToRunLookupToTree(CORINFO_CONST_LOOKUP* pLookup,
+                                               unsigned              handleFlags,
+                                               void*                 compileTimeHandle)
+{
+    CORINFO_GENERIC_HANDLE handle       = 0;
+    void*                  pIndirection = 0;
+    assert(pLookup->accessType != IAT_PPVALUE);
+
+    if (pLookup->accessType == IAT_VALUE)
+        handle = pLookup->handle;
+    else if (pLookup->accessType == IAT_PVALUE)
+        pIndirection = pLookup->addr;
+    return gtNewIconEmbHndNode(handle, pIndirection, handleFlags, 0, 0, compileTimeHandle);
+}
+
+GenTreePtr Compiler::impReadyToRunHelperToTree(
+    CORINFO_RESOLVED_TOKEN* pResolvedToken,
+    CorInfoHelpFunc         helper,
+    var_types               type,
+    GenTreeArgList*         args /* =NULL*/,
+    CORINFO_LOOKUP_KIND*    pGenericLookupKind /* =NULL. Only used with generics */)
+{
+    CORINFO_CONST_LOOKUP lookup;
+#if COR_JIT_EE_VERSION > 460
+    if (!info.compCompHnd->getReadyToRunHelper(pResolvedToken, pGenericLookupKind, helper, &lookup))
+        return NULL;
+#else
+    info.compCompHnd->getReadyToRunHelper(pResolvedToken, helper, &lookup);
+#endif
+
+    GenTreePtr op1 = gtNewHelperCallNode(helper, type, GTF_EXCEPT, args);
+
+    op1->gtCall.setEntryPoint(lookup);
+
+    return op1;
+}
+#endif
+
+GenTreePtr Compiler::impMethodPointer(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_CALL_INFO* pCallInfo)
+{
+    GenTreePtr op1 = nullptr;
+
+    switch (pCallInfo->kind)
+    {
+        case CORINFO_CALL:
+            op1 = new (this, GT_FTN_ADDR) GenTreeFptrVal(TYP_I_IMPL, pCallInfo->hMethod);
+
+#ifdef FEATURE_READYTORUN_COMPILER
+            if (opts.IsReadyToRun())
+            {
+                op1->gtFptrVal.gtEntryPoint          = pCallInfo->codePointerLookup.constLookup;
+                op1->gtFptrVal.gtLdftnResolvedToken  = new (this, CMK_Unknown) CORINFO_RESOLVED_TOKEN;
+                *op1->gtFptrVal.gtLdftnResolvedToken = *pResolvedToken;
+            }
+            else
+                op1->gtFptrVal.gtEntryPoint.addr = nullptr;
+#endif
+            break;
+
+        case CORINFO_CALL_CODE_POINTER:
+            if (compIsForInlining())
+            {
+                // Don't import runtime lookups when inlining
+                // Inlining has to be aborted in such a case
+                compInlineResult->NoteFatal(InlineObservation::CALLSITE_GENERIC_DICTIONARY_LOOKUP);
+                return nullptr;
+            }
+
+            op1 = impLookupToTree(pResolvedToken, &pCallInfo->codePointerLookup, GTF_ICON_FTN_ADDR, pCallInfo->hMethod);
+            break;
+
+        default:
+            noway_assert(!"unknown call kind");
+            break;
+    }
+
+    return op1;
+}
+
+/*****************************************************************************/
+/* Import a dictionary lookup to access a handle in code shared between
+   generic instantiations.
+   The lookup depends on the typeContext which is only available at
+   runtime, and not at compile-time.
+   pLookup->token1 and pLookup->token2 specify the handle that is needed.
+   The cases are:
+
+   1. pLookup->indirections == CORINFO_USEHELPER : Call a helper passing it the
+      instantiation-specific handle, and the tokens to lookup the handle.
+   2. pLookup->indirections != CORINFO_USEHELPER :
+      2a. pLookup->testForNull == false : Dereference the instantiation-specific handle
+          to get the handle.
+      2b. pLookup->testForNull == true : Dereference the instantiation-specific handle.
+          If it is non-NULL, it is the handle required. Else, call a helper
+          to lookup the handle.
+ */
+
+GenTreePtr Compiler::impRuntimeLookupToTree(CORINFO_RESOLVED_TOKEN* pResolvedToken,
+                                            CORINFO_LOOKUP*         pLookup,
+                                            void*                   compileTimeHandle)
+{
+    CORINFO_RUNTIME_LOOKUP_KIND kind           = pLookup->lookupKind.runtimeLookupKind;
+    CORINFO_RUNTIME_LOOKUP*     pRuntimeLookup = &pLookup->runtimeLookup;
+
+    // This method can only be called from the importer instance of the Compiler.
+    // In other word, it cannot be called by the instance of the Compiler for the inlinee.
+    assert(!compIsForInlining());
+
+    GenTreePtr ctxTree;
+
+    // Collectible types requires that for shared generic code, if we use the generic context parameter
+    // that we report it. (This is a conservative approach, we could detect some cases particularly when the
+    // context parameter is this that we don't need the eager reporting logic.)
+    lvaGenericsContextUsed = true;
+
+    if (kind == CORINFO_LOOKUP_THISOBJ)
+    {
+        // this Object
+        ctxTree = gtNewLclvNode(info.compThisArg, TYP_REF);
+
+        // Vtable pointer of this object
+        ctxTree = gtNewOperNode(GT_IND, TYP_I_IMPL, ctxTree);
+        ctxTree->gtFlags |= GTF_EXCEPT; // Null-pointer exception
+        ctxTree->gtFlags |= GTF_IND_INVARIANT;
+    }
+    else
+    {
+        assert(kind == CORINFO_LOOKUP_METHODPARAM || kind == CORINFO_LOOKUP_CLASSPARAM);
+
+        ctxTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL); // Exact method descriptor as passed in as last arg
+    }
+
+#ifdef FEATURE_READYTORUN_COMPILER
+    if (opts.IsReadyToRun())
+    {
+        return impReadyToRunHelperToTree(pResolvedToken, CORINFO_HELP_READYTORUN_GENERIC_HANDLE, TYP_I_IMPL,
+                                         gtNewArgList(ctxTree), &pLookup->lookupKind);
+    }
+#endif
+
+    // It's available only via the run-time helper function
+    if (pRuntimeLookup->indirections == CORINFO_USEHELPER)
+    {
+        GenTreeArgList* helperArgs =
+            gtNewArgList(ctxTree, gtNewIconEmbHndNode(pRuntimeLookup->signature, nullptr, GTF_ICON_TOKEN_HDL, 0,
+                                                      nullptr, compileTimeHandle));
+
+        return gtNewHelperCallNode(pRuntimeLookup->helper, TYP_I_IMPL, GTF_EXCEPT, helperArgs);
+    }
+
+    // Slot pointer
+    GenTreePtr slotPtrTree = ctxTree;
+
+    if (pRuntimeLookup->testForNull)
+    {
+        slotPtrTree = impCloneExpr(ctxTree, &ctxTree, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
+                                   nullptr DEBUGARG("impRuntimeLookup slot"));
+    }
+
+    // Applied repeated indirections
+    for (WORD i = 0; i < pRuntimeLookup->indirections; i++)
+    {
+        if (i != 0)
+        {
+            slotPtrTree = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree);
+            slotPtrTree->gtFlags |= GTF_IND_NONFAULTING;
+            slotPtrTree->gtFlags |= GTF_IND_INVARIANT;
+        }
+        if (pRuntimeLookup->offsets[i] != 0)
+        {
+            slotPtrTree =
+                gtNewOperNode(GT_ADD, TYP_I_IMPL, slotPtrTree, gtNewIconNode(pRuntimeLookup->offsets[i], TYP_I_IMPL));
+        }
+    }
+
+    // No null test required
+    if (!pRuntimeLookup->testForNull)
+    {
+        if (pRuntimeLookup->indirections == 0)
+        {
+            return slotPtrTree;
+        }
+
+        slotPtrTree = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree);
+        slotPtrTree->gtFlags |= GTF_IND_NONFAULTING;
+
+        if (!pRuntimeLookup->testForFixup)
+        {
+            return slotPtrTree;
+        }
+
+        impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("bubbling QMark0"));
+
+        GenTreePtr op1 = impCloneExpr(slotPtrTree, &slotPtrTree, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
+                                      nullptr DEBUGARG("impRuntimeLookup test"));
+        op1 = impImplicitIorI4Cast(op1, TYP_INT); // downcast the pointer to a TYP_INT on 64-bit targets
+
+        // Use a GT_AND to check for the lowest bit and indirect if it is set
+        GenTreePtr testTree = gtNewOperNode(GT_AND, TYP_INT, op1, gtNewIconNode(1));
+        GenTreePtr relop    = gtNewOperNode(GT_EQ, TYP_INT, testTree, gtNewIconNode(0));
+        relop->gtFlags |= GTF_RELOP_QMARK;
+
+        op1 = impCloneExpr(slotPtrTree, &slotPtrTree, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
+                           nullptr DEBUGARG("impRuntimeLookup indir"));
+        op1 = gtNewOperNode(GT_ADD, TYP_I_IMPL, op1, gtNewIconNode(-1, TYP_I_IMPL)); // subtract 1 from the pointer
+        GenTreePtr indirTree = gtNewOperNode(GT_IND, TYP_I_IMPL, op1);
+        GenTreePtr colon     = new (this, GT_COLON) GenTreeColon(TYP_I_IMPL, slotPtrTree, indirTree);
+
+        GenTreePtr qmark = gtNewQmarkNode(TYP_I_IMPL, relop, colon);
+
+        unsigned tmp = lvaGrabTemp(true DEBUGARG("spilling QMark0"));
+        impAssignTempGen(tmp, qmark, (unsigned)CHECK_SPILL_NONE);
+        return gtNewLclvNode(tmp, TYP_I_IMPL);
+    }
+
+    assert(pRuntimeLookup->indirections != 0);
+
+    impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("bubbling QMark1"));
+
+    // Extract the handle
+    GenTreePtr handle = gtNewOperNode(GT_IND, TYP_I_IMPL, slotPtrTree);
+    handle->gtFlags |= GTF_IND_NONFAULTING;
+
+    GenTreePtr handleCopy = impCloneExpr(handle, &handle, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
+                                         nullptr DEBUGARG("impRuntimeLookup typehandle"));
+
+    // Call to helper
+    GenTreeArgList* helperArgs =
+        gtNewArgList(ctxTree, gtNewIconEmbHndNode(pRuntimeLookup->signature, nullptr, GTF_ICON_TOKEN_HDL, 0, nullptr,
+                                                  compileTimeHandle));
+    GenTreePtr helperCall = gtNewHelperCallNode(pRuntimeLookup->helper, TYP_I_IMPL, GTF_EXCEPT, helperArgs);
+
+    // Check for null and possibly call helper
+    GenTreePtr relop = gtNewOperNode(GT_NE, TYP_INT, handle, gtNewIconNode(0, TYP_I_IMPL));
+    relop->gtFlags |= GTF_RELOP_QMARK;
+
+    GenTreePtr colon = new (this, GT_COLON) GenTreeColon(TYP_I_IMPL,
+                                                         gtNewNothingNode(), // do nothing if nonnull
+                                                         helperCall);
+
+    GenTreePtr qmark = gtNewQmarkNode(TYP_I_IMPL, relop, colon);
+
+    unsigned tmp;
+    if (handleCopy->IsLocal())
+    {
+        tmp = handleCopy->gtLclVarCommon.gtLclNum;
+    }
+    else
+    {
+        tmp = lvaGrabTemp(true DEBUGARG("spilling QMark1"));
+    }
+
+    impAssignTempGen(tmp, qmark, (unsigned)CHECK_SPILL_NONE);
+    return gtNewLclvNode(tmp, TYP_I_IMPL);
+}
+
+/******************************************************************************
+ *  Spills the stack at verCurrentState.esStack[level] and replaces it with a temp.
+ *  If tnum!=BAD_VAR_NUM, the temp var used to replace the tree is tnum,
+ *     else, grab a new temp.
+ *  For structs (which can be pushed on the stack using obj, etc),
+ *  special handling is needed
+ */
+
+struct RecursiveGuard
+{
+public:
+    RecursiveGuard()
+    {
+        m_pAddress = nullptr;
+    }
+
+    ~RecursiveGuard()
+    {
+        if (m_pAddress)
+        {
+            *m_pAddress = false;
+        }
+    }
+
+    void Init(bool* pAddress, bool bInitialize)
+    {
+        assert(pAddress && *pAddress == false && "Recursive guard violation");
+        m_pAddress = pAddress;
+
+        if (bInitialize)
+        {
+            *m_pAddress = true;
+        }
+    }
+
+protected:
+    bool* m_pAddress;
+};
+
+bool Compiler::impSpillStackEntry(unsigned level,
+                                  unsigned tnum
+#ifdef DEBUG
+                                  ,
+                                  bool        bAssertOnRecursion,
+                                  const char* reason
+#endif
+                                  )
+{
+
+#ifdef DEBUG
+    RecursiveGuard guard;
+    guard.Init(&impNestedStackSpill, bAssertOnRecursion);
+#endif
+
+    assert(!fgGlobalMorph); // use impInlineSpillStackEntry() during inlining
+
+    GenTreePtr tree = verCurrentState.esStack[level].val;
+
+    /* Allocate a temp if we haven't been asked to use a particular one */
+
+    if (tiVerificationNeeded)
+    {
+        // Ignore bad temp requests (they will happen with bad code and will be
+        // catched when importing the destblock)
+        if ((tnum != BAD_VAR_NUM && tnum >= lvaCount) && verNeedsVerification())
+        {
+            return false;
+        }
+    }
+    else
+    {
+        if (tnum != BAD_VAR_NUM && (tnum >= lvaCount))
+        {
+            return false;
+        }
+    }
+
+    if (tnum == BAD_VAR_NUM)
+    {
+        tnum = lvaGrabTemp(true DEBUGARG(reason));
+    }
+    else if (tiVerificationNeeded && lvaTable[tnum].TypeGet() != TYP_UNDEF)
+    {
+        // if verification is needed and tnum's type is incompatible with
+        // type on that stack, we grab a new temp. This is safe since
+        // we will throw a verification exception in the dest block.
+
+        var_types valTyp = tree->TypeGet();
+        var_types dstTyp = lvaTable[tnum].TypeGet();
+
+        // if the two types are different, we return. This will only happen with bad code and will
+        // be catched when importing the destblock. We still allow int/byrefs and float/double differences.
+        if ((genActualType(valTyp) != genActualType(dstTyp)) &&
+            !(
+#ifndef _TARGET_64BIT_
+                (valTyp == TYP_I_IMPL && dstTyp == TYP_BYREF) || (valTyp == TYP_BYREF && dstTyp == TYP_I_IMPL) ||
+#endif // !_TARGET_64BIT_
+                (varTypeIsFloating(dstTyp) && varTypeIsFloating(valTyp))))
+        {
+            if (verNeedsVerification())
+            {
+                return false;
+            }
+        }
+    }
+
+    /* Assign the spilled entry to the temp */
+    impAssignTempGen(tnum, tree, verCurrentState.esStack[level].seTypeInfo.GetClassHandle(), level);
+
+    // The tree type may be modified by impAssignTempGen, so use the type of the lclVar.
+    var_types  type                    = genActualType(lvaTable[tnum].TypeGet());
+    GenTreePtr temp                    = gtNewLclvNode(tnum, type);
+    verCurrentState.esStack[level].val = temp;
+
+    return true;
+}
+
+/*****************************************************************************
+ *
+ *  Ensure that the stack has only spilled values
+ */
+
+void Compiler::impSpillStackEnsure(bool spillLeaves)
+{
+    assert(!spillLeaves || opts.compDbgCode);
+
+    for (unsigned level = 0; level < verCurrentState.esStackDepth; level++)
+    {
+        GenTreePtr tree = verCurrentState.esStack[level].val;
+
+        if (!spillLeaves && tree->OperIsLeaf())
+        {
+            continue;
+        }
+
+        // Temps introduced by the importer itself don't need to be spilled
+
+        bool isTempLcl = (tree->OperGet() == GT_LCL_VAR) && (tree->gtLclVarCommon.gtLclNum >= info.compLocalsCount);
+
+        if (isTempLcl)
+        {
+            continue;
+        }
+
+        impSpillStackEntry(level, BAD_VAR_NUM DEBUGARG(false) DEBUGARG("impSpillStackEnsure"));
+    }
+}
+
+void Compiler::impSpillEvalStack()
+{
+    assert(!fgGlobalMorph); // use impInlineSpillEvalStack() during inlining
+
+    for (unsigned level = 0; level < verCurrentState.esStackDepth; level++)
+    {
+        impSpillStackEntry(level, BAD_VAR_NUM DEBUGARG(false) DEBUGARG("impSpillEvalStack"));
+    }
+}
+
+/*****************************************************************************
+ *
+ *  If the stack contains any trees with side effects in them, assign those
+ *  trees to temps and append the assignments to the statement list.
+ *  On return the stack is guaranteed to be empty.
+ */
+
+inline void Compiler::impEvalSideEffects()
+{
+    impSpillSideEffects(false, (unsigned)CHECK_SPILL_ALL DEBUGARG("impEvalSideEffects"));
+    verCurrentState.esStackDepth = 0;
+}
+
+/*****************************************************************************
+ *
+ *  If the stack contains any trees with side effects in them, assign those
+ *  trees to temps and replace them on the stack with refs to their temps.
+ *  [0..chkLevel) is the portion of the stack which will be checked and spilled.
+ */
+
+inline void Compiler::impSpillSideEffects(bool spillGlobEffects, unsigned chkLevel DEBUGARG(const char* reason))
+{
+    assert(chkLevel != (unsigned)CHECK_SPILL_NONE);
+
+    /* Before we make any appends to the tree list we must spill the
+     * "special" side effects (GTF_ORDER_SIDEEFF on a GT_CATCH_ARG) */
+
+    impSpillSpecialSideEff();
+
+    if (chkLevel == (unsigned)CHECK_SPILL_ALL)
+    {
+        chkLevel = verCurrentState.esStackDepth;
+    }
+
+    assert(chkLevel <= verCurrentState.esStackDepth);
+
+    unsigned spillFlags = spillGlobEffects ? GTF_GLOB_EFFECT : GTF_SIDE_EFFECT;
+
+    for (unsigned i = 0; i < chkLevel; i++)
+    {
+        GenTreePtr tree = verCurrentState.esStack[i].val;
+
+        GenTreePtr lclVarTree;
+
+        if ((tree->gtFlags & spillFlags) != 0 ||
+            (spillGlobEffects &&                        // Only consider the following when  spillGlobEffects == TRUE
+             !impIsAddressInLocal(tree, &lclVarTree) && // No need to spill the GT_ADDR node on a local.
+             gtHasLocalsWithAddrOp(tree))) // Spill if we still see GT_LCL_VAR that contains lvHasLdAddrOp or
+                                           // lvAddrTaken flag.
+        {
+            impSpillStackEntry(i, BAD_VAR_NUM DEBUGARG(false) DEBUGARG(reason));
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  If the stack contains any trees with special side effects in them, assign
+ *  those trees to temps and replace them on the stack with refs to their temps.
+ */
+
+inline void Compiler::impSpillSpecialSideEff()
+{
+    // Only exception objects need to be carefully handled
+
+    if (!compCurBB->bbCatchTyp)
+    {
+        return;
+    }
+
+    for (unsigned level = 0; level < verCurrentState.esStackDepth; level++)
+    {
+        GenTreePtr tree = verCurrentState.esStack[level].val;
+        // Make sure if we have an exception object in the sub tree we spill ourselves.
+        if (gtHasCatchArg(tree))
+        {
+            impSpillStackEntry(level, BAD_VAR_NUM DEBUGARG(false) DEBUGARG("impSpillSpecialSideEff"));
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Spill all stack references to value classes (TYP_STRUCT nodes)
+ */
+
+void Compiler::impSpillValueClasses()
+{
+    for (unsigned level = 0; level < verCurrentState.esStackDepth; level++)
+    {
+        GenTreePtr tree = verCurrentState.esStack[level].val;
+
+        if (fgWalkTreePre(&tree, impFindValueClasses) == WALK_ABORT)
+        {
+            // Tree walk was aborted, which means that we found a
+            // value class on the stack.  Need to spill that
+            // stack entry.
+
+            impSpillStackEntry(level, BAD_VAR_NUM DEBUGARG(false) DEBUGARG("impSpillValueClasses"));
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Callback that checks if a tree node is TYP_STRUCT
+ */
+
+Compiler::fgWalkResult Compiler::impFindValueClasses(GenTreePtr* pTree, fgWalkData* data)
+{
+    fgWalkResult walkResult = WALK_CONTINUE;
+
+    if ((*pTree)->gtType == TYP_STRUCT)
+    {
+        // Abort the walk and indicate that we found a value class
+
+        walkResult = WALK_ABORT;
+    }
+
+    return walkResult;
+}
+
+/*****************************************************************************
+ *
+ *  If the stack contains any trees with references to local #lclNum, assign
+ *  those trees to temps and replace their place on the stack with refs to
+ *  their temps.
+ */
+
+void Compiler::impSpillLclRefs(ssize_t lclNum)
+{
+    assert(!fgGlobalMorph); // use impInlineSpillLclRefs() during inlining
+
+    /* Before we make any appends to the tree list we must spill the
+     * "special" side effects (GTF_ORDER_SIDEEFF) - GT_CATCH_ARG */
+
+    impSpillSpecialSideEff();
+
+    for (unsigned level = 0; level < verCurrentState.esStackDepth; level++)
+    {
+        GenTreePtr tree = verCurrentState.esStack[level].val;
+
+        /* If the tree may throw an exception, and the block has a handler,
+           then we need to spill assignments to the local if the local is
+           live on entry to the handler.
+           Just spill 'em all without considering the liveness */
+
+        bool xcptnCaught = ehBlockHasExnFlowDsc(compCurBB) && (tree->gtFlags & (GTF_CALL | GTF_EXCEPT));
+
+        /* Skip the tree if it doesn't have an affected reference,
+           unless xcptnCaught */
+
+        if (xcptnCaught || gtHasRef(tree, lclNum, false))
+        {
+            impSpillStackEntry(level, BAD_VAR_NUM DEBUGARG(false) DEBUGARG("impSpillLclRefs"));
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Push catch arg onto the stack.
+ *  If there are jumps to the beginning of the handler, insert basic block
+ *  and spill catch arg to a temp. Update the handler block if necessary.
+ *
+ *  Returns the basic block of the actual handler.
+ */
+
+BasicBlock* Compiler::impPushCatchArgOnStack(BasicBlock* hndBlk, CORINFO_CLASS_HANDLE clsHnd)
+{
+    // Do not inject the basic block twice on reimport. This should be
+    // hit only under JIT stress. See if the block is the one we injected.
+    // Note that EH canonicalization can inject internal blocks here. We might
+    // be able to re-use such a block (but we don't, right now).
+    if ((hndBlk->bbFlags & (BBF_IMPORTED | BBF_INTERNAL | BBF_DONT_REMOVE | BBF_HAS_LABEL | BBF_JMP_TARGET)) ==
+        (BBF_IMPORTED | BBF_INTERNAL | BBF_DONT_REMOVE | BBF_HAS_LABEL | BBF_JMP_TARGET))
+    {
+        GenTreePtr tree = hndBlk->bbTreeList;
+
+        if (tree != nullptr && tree->gtOper == GT_STMT)
+        {
+            tree = tree->gtStmt.gtStmtExpr;
+            assert(tree != nullptr);
+
+            if ((tree->gtOper == GT_ASG) && (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR) &&
+                (tree->gtOp.gtOp2->gtOper == GT_CATCH_ARG))
+            {
+                tree = gtNewLclvNode(tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum, TYP_REF);
+
+                impPushOnStack(tree, typeInfo(TI_REF, clsHnd));
+
+                return hndBlk->bbNext;
+            }
+        }
+
+        // If we get here, it must have been some other kind of internal block. It's possible that
+        // someone prepended something to our injected block, but that's unlikely.
+    }
+
+    /* Push the exception address value on the stack */
+    GenTreePtr arg = new (this, GT_CATCH_ARG) GenTree(GT_CATCH_ARG, TYP_REF);
+
+    /* Mark the node as having a side-effect - i.e. cannot be
+     * moved around since it is tied to a fixed location (EAX) */
+    arg->gtFlags |= GTF_ORDER_SIDEEFF;
+
+    /* Spill GT_CATCH_ARG to a temp if there are jumps to the beginning of the handler */
+    if (hndBlk->bbRefs > 1 || compStressCompile(STRESS_CATCH_ARG, 5))
+    {
+        if (hndBlk->bbRefs == 1)
+        {
+            hndBlk->bbRefs++;
+        }
+
+        /* Create extra basic block for the spill */
+        BasicBlock* newBlk = fgNewBBbefore(BBJ_NONE, hndBlk, /* extendRegion */ true);
+        newBlk->bbFlags |= BBF_IMPORTED | BBF_DONT_REMOVE | BBF_HAS_LABEL | BBF_JMP_TARGET;
+        newBlk->setBBWeight(hndBlk->bbWeight);
+        newBlk->bbCodeOffs = hndBlk->bbCodeOffs;
+
+        /* Account for the new link we are about to create */
+        hndBlk->bbRefs++;
+
+        /* Spill into a temp */
+        unsigned tempNum         = lvaGrabTemp(false DEBUGARG("SpillCatchArg"));
+        lvaTable[tempNum].lvType = TYP_REF;
+        arg                      = gtNewTempAssign(tempNum, arg);
+
+        hndBlk->bbStkTempsIn = tempNum;
+
+        /* Report the debug info. impImportBlockCode won't treat
+         * the actual handler as exception block and thus won't do it for us. */
+        if (info.compStmtOffsetsImplicit & ICorDebugInfo::CALL_SITE_BOUNDARIES)
+        {
+            impCurStmtOffs = newBlk->bbCodeOffs | IL_OFFSETX_STKBIT;
+            arg            = gtNewStmt(arg, impCurStmtOffs);
+        }
+
+        fgInsertStmtAtEnd(newBlk, arg);
+
+        arg = gtNewLclvNode(tempNum, TYP_REF);
+    }
+
+    impPushOnStack(arg, typeInfo(TI_REF, clsHnd));
+
+    return hndBlk;
+}
+
+/*****************************************************************************
+ *
+ *  Given a tree, clone it. *pClone is set to the cloned tree.
+ *  Returns the original tree if the cloning was easy,
+ *   else returns the temp to which the tree had to be spilled to.
+ *  If the tree has side-effects, it will be spilled to a temp.
+ */
+
+GenTreePtr Compiler::impCloneExpr(GenTreePtr           tree,
+                                  GenTreePtr*          pClone,
+                                  CORINFO_CLASS_HANDLE structHnd,
+                                  unsigned             curLevel,
+                                  GenTreePtr* pAfterStmt DEBUGARG(const char* reason))
+{
+    if (!(tree->gtFlags & GTF_GLOB_EFFECT))
+    {
+        GenTreePtr clone = gtClone(tree, true);
+
+        if (clone)
+        {
+            *pClone = clone;
+            return tree;
+        }
+    }
+
+    /* Store the operand in a temp and return the temp */
+
+    unsigned temp = lvaGrabTemp(true DEBUGARG(reason));
+
+    // impAssignTempGen() may change tree->gtType to TYP_VOID for calls which
+    // return a struct type. It also may modify the struct type to a more
+    // specialized type (e.g. a SIMD type).  So we will get the type from
+    // the lclVar AFTER calling impAssignTempGen().
+
+    impAssignTempGen(temp, tree, structHnd, curLevel, pAfterStmt, impCurStmtOffs);
+    var_types type = genActualType(lvaTable[temp].TypeGet());
+
+    *pClone = gtNewLclvNode(temp, type);
+    return gtNewLclvNode(temp, type);
+}
+
+/*****************************************************************************
+ * Remember the IL offset (including stack-empty info) for the trees we will
+ * generate now.
+ */
+
+inline void Compiler::impCurStmtOffsSet(IL_OFFSET offs)
+{
+    if (compIsForInlining())
+    {
+        GenTreePtr callStmt = impInlineInfo->iciStmt;
+        assert(callStmt->gtOper == GT_STMT);
+        impCurStmtOffs = callStmt->gtStmt.gtStmtILoffsx;
+    }
+    else
+    {
+        assert(offs == BAD_IL_OFFSET || (offs & IL_OFFSETX_BITS) == 0);
+        IL_OFFSETX stkBit = (verCurrentState.esStackDepth > 0) ? IL_OFFSETX_STKBIT : 0;
+        impCurStmtOffs    = offs | stkBit;
+    }
+}
+
+/*****************************************************************************
+ * Returns current IL offset with stack-empty and call-instruction info incorporated
+ */
+inline IL_OFFSETX Compiler::impCurILOffset(IL_OFFSET offs, bool callInstruction)
+{
+    if (compIsForInlining())
+    {
+        return BAD_IL_OFFSET;
+    }
+    else
+    {
+        assert(offs == BAD_IL_OFFSET || (offs & IL_OFFSETX_BITS) == 0);
+        IL_OFFSETX stkBit             = (verCurrentState.esStackDepth > 0) ? IL_OFFSETX_STKBIT : 0;
+        IL_OFFSETX callInstructionBit = callInstruction ? IL_OFFSETX_CALLINSTRUCTIONBIT : 0;
+        return offs | stkBit | callInstructionBit;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Remember the instr offset for the statements
+ *
+ *  When we do impAppendTree(tree), we can't set tree->gtStmtLastILoffs to
+ *  impCurOpcOffs, if the append was done because of a partial stack spill,
+ *  as some of the trees corresponding to code up to impCurOpcOffs might
+ *  still be sitting on the stack.
+ *  So we delay marking of gtStmtLastILoffs until impNoteLastILoffs().
+ *  This should be called when an opcode finally/explicitly causes
+ *  impAppendTree(tree) to be called (as opposed to being called because of
+ *  a spill caused by the opcode)
+ */
+
+#ifdef DEBUG
+
+void Compiler::impNoteLastILoffs()
+{
+    if (impLastILoffsStmt == nullptr)
+    {
+        // We should have added a statement for the current basic block
+        // Is this assert correct ?
+
+        assert(impTreeLast);
+        assert(impTreeLast->gtOper == GT_STMT);
+
+        impTreeLast->gtStmt.gtStmtLastILoffs = compIsForInlining() ? BAD_IL_OFFSET : impCurOpcOffs;
+    }
+    else
+    {
+        impLastILoffsStmt->gtStmt.gtStmtLastILoffs = compIsForInlining() ? BAD_IL_OFFSET : impCurOpcOffs;
+        impLastILoffsStmt                          = nullptr;
+    }
+}
+
+#endif // DEBUG
+
+/*****************************************************************************
+ * We don't create any GenTree (excluding spills) for a branch.
+ * For debugging info, we need a placeholder so that we can note
+ * the IL offset in gtStmt.gtStmtOffs. So append an empty statement.
+ */
+
+void Compiler::impNoteBranchOffs()
+{
+    if (opts.compDbgCode)
+    {
+        impAppendTree(gtNewNothingNode(), (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
+    }
+}
+
+/*****************************************************************************
+ * Locate the next stmt boundary for which we need to record info.
+ * We will have to spill the stack at such boundaries if it is not
+ * already empty.
+ * Returns the next stmt boundary (after the start of the block)
+ */
+
+unsigned Compiler::impInitBlockLineInfo()
+{
+    /* Assume the block does not correspond with any IL offset. This prevents
+       us from reporting extra offsets. Extra mappings can cause confusing
+       stepping, especially if the extra mapping is a jump-target, and the
+       debugger does not ignore extra mappings, but instead rewinds to the
+       nearest known offset */
+
+    impCurStmtOffsSet(BAD_IL_OFFSET);
+
+    if (compIsForInlining())
+    {
+        return ~0;
+    }
+
+    IL_OFFSET blockOffs = compCurBB->bbCodeOffs;
+
+    if ((verCurrentState.esStackDepth == 0) && (info.compStmtOffsetsImplicit & ICorDebugInfo::STACK_EMPTY_BOUNDARIES))
+    {
+        impCurStmtOffsSet(blockOffs);
+    }
+
+    if (false && (info.compStmtOffsetsImplicit & ICorDebugInfo::CALL_SITE_BOUNDARIES))
+    {
+        impCurStmtOffsSet(blockOffs);
+    }
+
+    /* Always report IL offset 0 or some tests get confused.
+       Probably a good idea anyways */
+
+    if (blockOffs == 0)
+    {
+        impCurStmtOffsSet(blockOffs);
+    }
+
+    if (!info.compStmtOffsetsCount)
+    {
+        return ~0;
+    }
+
+    /* Find the lowest explicit stmt boundary within the block */
+
+    /* Start looking at an entry that is based on our instr offset */
+
+    unsigned index = (info.compStmtOffsetsCount * blockOffs) / info.compILCodeSize;
+
+    if (index >= info.compStmtOffsetsCount)
+    {
+        index = info.compStmtOffsetsCount - 1;
+    }
+
+    /* If we've guessed too far, back up */
+
+    while (index > 0 && info.compStmtOffsets[index - 1] >= blockOffs)
+    {
+        index--;
+    }
+
+    /* If we guessed short, advance ahead */
+
+    while (info.compStmtOffsets[index] < blockOffs)
+    {
+        index++;
+
+        if (index == info.compStmtOffsetsCount)
+        {
+            return info.compStmtOffsetsCount;
+        }
+    }
+
+    assert(index < info.compStmtOffsetsCount);
+
+    if (info.compStmtOffsets[index] == blockOffs)
+    {
+        /* There is an explicit boundary for the start of this basic block.
+           So we will start with bbCodeOffs. Else we will wait until we
+           get to the next explicit boundary */
+
+        impCurStmtOffsSet(blockOffs);
+
+        index++;
+    }
+
+    return index;
+}
+
+/*****************************************************************************/
+
+static inline bool impOpcodeIsCallOpcode(OPCODE opcode)
+{
+    switch (opcode)
+    {
+        case CEE_CALL:
+        case CEE_CALLI:
+        case CEE_CALLVIRT:
+            return true;
+
+        default:
+            return false;
+    }
+}
+
+/*****************************************************************************/
+#ifdef DEBUGGING_SUPPORT
+
+static inline bool impOpcodeIsCallSiteBoundary(OPCODE opcode)
+{
+    switch (opcode)
+    {
+        case CEE_CALL:
+        case CEE_CALLI:
+        case CEE_CALLVIRT:
+        case CEE_JMP:
+        case CEE_NEWOBJ:
+        case CEE_NEWARR:
+            return true;
+
+        default:
+            return false;
+    }
+}
+
+#endif // DEBUGGING_SUPPORT
+
+/*****************************************************************************/
+
+// One might think it is worth caching these values, but results indicate
+// that it isn't.
+// In addition, caching them causes SuperPMI to be unable to completely
+// encapsulate an individual method context.
+CORINFO_CLASS_HANDLE Compiler::impGetRefAnyClass()
+{
+    CORINFO_CLASS_HANDLE refAnyClass = info.compCompHnd->getBuiltinClass(CLASSID_TYPED_BYREF);
+    assert(refAnyClass != (CORINFO_CLASS_HANDLE) nullptr);
+    return refAnyClass;
+}
+
+CORINFO_CLASS_HANDLE Compiler::impGetTypeHandleClass()
+{
+    CORINFO_CLASS_HANDLE typeHandleClass = info.compCompHnd->getBuiltinClass(CLASSID_TYPE_HANDLE);
+    assert(typeHandleClass != (CORINFO_CLASS_HANDLE) nullptr);
+    return typeHandleClass;
+}
+
+CORINFO_CLASS_HANDLE Compiler::impGetRuntimeArgumentHandle()
+{
+    CORINFO_CLASS_HANDLE argIteratorClass = info.compCompHnd->getBuiltinClass(CLASSID_ARGUMENT_HANDLE);
+    assert(argIteratorClass != (CORINFO_CLASS_HANDLE) nullptr);
+    return argIteratorClass;
+}
+
+CORINFO_CLASS_HANDLE Compiler::impGetStringClass()
+{
+    CORINFO_CLASS_HANDLE stringClass = info.compCompHnd->getBuiltinClass(CLASSID_STRING);
+    assert(stringClass != (CORINFO_CLASS_HANDLE) nullptr);
+    return stringClass;
+}
+
+CORINFO_CLASS_HANDLE Compiler::impGetObjectClass()
+{
+    CORINFO_CLASS_HANDLE objectClass = info.compCompHnd->getBuiltinClass(CLASSID_SYSTEM_OBJECT);
+    assert(objectClass != (CORINFO_CLASS_HANDLE) nullptr);
+    return objectClass;
+}
+
+/*****************************************************************************
+ *  "&var" can be used either as TYP_BYREF or TYP_I_IMPL, but we
+ *  set its type to TYP_BYREF when we create it. We know if it can be
+ *  changed to TYP_I_IMPL only at the point where we use it
+ */
+
+/* static */
+void Compiler::impBashVarAddrsToI(GenTreePtr tree1, GenTreePtr tree2)
+{
+    if (tree1->IsVarAddr())
+    {
+        tree1->gtType = TYP_I_IMPL;
+    }
+
+    if (tree2 && tree2->IsVarAddr())
+    {
+        tree2->gtType = TYP_I_IMPL;
+    }
+}
+
+/*****************************************************************************
+ *  TYP_INT and TYP_I_IMPL can be used almost interchangeably, but we want
+ *  to make that an explicit cast in our trees, so any implicit casts that
+ *  exist in the IL (at least on 64-bit where TYP_I_IMPL != TYP_INT) are
+ *  turned into explicit casts here.
+ *  We also allow an implicit conversion of a ldnull into a TYP_I_IMPL(0)
+ */
+
+GenTreePtr Compiler::impImplicitIorI4Cast(GenTreePtr tree, var_types dstTyp)
+{
+    var_types currType   = genActualType(tree->gtType);
+    var_types wantedType = genActualType(dstTyp);
+
+    if (wantedType != currType)
+    {
+        // Automatic upcast for a GT_CNS_INT into TYP_I_IMPL
+        if ((tree->OperGet() == GT_CNS_INT) && varTypeIsI(dstTyp))
+        {
+            if (!varTypeIsI(tree->gtType) || ((tree->gtType == TYP_REF) && (tree->gtIntCon.gtIconVal == 0)))
+            {
+                tree->gtType = TYP_I_IMPL;
+            }
+        }
+#ifdef _TARGET_64BIT_
+        else if (varTypeIsI(wantedType) && (currType == TYP_INT))
+        {
+            // Note that this allows TYP_INT to be cast to a TYP_I_IMPL when wantedType is a TYP_BYREF or TYP_REF
+            tree = gtNewCastNode(TYP_I_IMPL, tree, TYP_I_IMPL);
+        }
+        else if ((wantedType == TYP_INT) && varTypeIsI(currType))
+        {
+            // Note that this allows TYP_BYREF or TYP_REF to be cast to a TYP_INT
+            tree = gtNewCastNode(TYP_INT, tree, TYP_INT);
+        }
+#endif // _TARGET_64BIT_
+    }
+
+    return tree;
+}
+
+/*****************************************************************************
+ *  TYP_FLOAT and TYP_DOUBLE can be used almost interchangeably in some cases,
+ *  but we want to make that an explicit cast in our trees, so any implicit casts
+ *  that exist in the IL are turned into explicit casts here.
+ */
+
+GenTreePtr Compiler::impImplicitR4orR8Cast(GenTreePtr tree, var_types dstTyp)
+{
+#ifndef LEGACY_BACKEND
+    if (varTypeIsFloating(tree) && varTypeIsFloating(dstTyp) && (dstTyp != tree->gtType))
+    {
+        tree = gtNewCastNode(dstTyp, tree, dstTyp);
+    }
+#endif // !LEGACY_BACKEND
+
+    return tree;
+}
+
+/*****************************************************************************/
+BOOL Compiler::impLocAllocOnStack()
+{
+    if (!compLocallocUsed)
+    {
+        return (FALSE);
+    }
+
+    // Returns true if a GT_LCLHEAP node is encountered in any of the trees
+    // that have been pushed on the importer evaluatuion stack.
+    //
+    for (unsigned i = 0; i < verCurrentState.esStackDepth; i++)
+    {
+        if (fgWalkTreePre(&verCurrentState.esStack[i].val, Compiler::fgChkLocAllocCB) == WALK_ABORT)
+        {
+            return (TRUE);
+        }
+    }
+    return (FALSE);
+}
+
+//------------------------------------------------------------------------
+// impInitializeArrayIntrinsic: Attempts to replace a call to InitializeArray
+//    with a GT_COPYBLK node.
+//
+// Arguments:
+//    sig - The InitializeArray signature.
+//
+// Return Value:
+//    A pointer to the newly created GT_COPYBLK node if the replacement succeeds or
+//    nullptr otherwise.
+//
+// Notes:
+//    The function recognizes the following IL pattern:
+//      ldc <length> or a list of ldc <lower bound>/<length>
+//      newarr or newobj
+//      dup
+//      ldtoken <field handle>
+//      call InitializeArray
+//    The lower bounds need not be constant except when the array rank is 1.
+//    The function recognizes all kinds of arrays thus enabling a small runtime
+//    such as CoreRT to skip providing an implementation for InitializeArray.
+
+GenTreePtr Compiler::impInitializeArrayIntrinsic(CORINFO_SIG_INFO* sig)
+{
+    assert(sig->numArgs == 2);
+
+    GenTreePtr fieldTokenNode = impStackTop(0).val;
+    GenTreePtr arrayLocalNode = impStackTop(1).val;
+
+    //
+    // Verify that the field token is known and valid.  Note that It's also
+    // possible for the token to come from reflection, in which case we cannot do
+    // the optimization and must therefore revert to calling the helper.  You can
+    // see an example of this in bvt\DynIL\initarray2.exe (in Main).
+    //
+
+    // Check to see if the ldtoken helper call is what we see here.
+    if (fieldTokenNode->gtOper != GT_CALL || (fieldTokenNode->gtCall.gtCallType != CT_HELPER) ||
+        (fieldTokenNode->gtCall.gtCallMethHnd != eeFindHelper(CORINFO_HELP_FIELDDESC_TO_STUBRUNTIMEFIELD)))
+    {
+        return nullptr;
+    }
+
+    // Strip helper call away
+    fieldTokenNode = fieldTokenNode->gtCall.gtCallArgs->Current();
+
+    if (fieldTokenNode->gtOper == GT_IND)
+    {
+        fieldTokenNode = fieldTokenNode->gtOp.gtOp1;
+    }
+
+    // Check for constant
+    if (fieldTokenNode->gtOper != GT_CNS_INT)
+    {
+        return nullptr;
+    }
+
+    CORINFO_FIELD_HANDLE fieldToken = (CORINFO_FIELD_HANDLE)fieldTokenNode->gtIntCon.gtCompileTimeHandle;
+    if (!fieldTokenNode->IsIconHandle(GTF_ICON_FIELD_HDL) || (fieldToken == nullptr))
+    {
+        return nullptr;
+    }
+
+    //
+    // We need to get the number of elements in the array and the size of each element.
+    // We verify that the newarr statement is exactly what we expect it to be.
+    // If it's not then we just return NULL and we don't optimize this call
+    //
+
+    //
+    // It is possible the we don't have any statements in the block yet
+    //
+    if (impTreeLast->gtOper != GT_STMT)
+    {
+        assert(impTreeLast->gtOper == GT_BEG_STMTS);
+        return nullptr;
+    }
+
+    //
+    // We start by looking at the last statement, making sure it's an assignment, and
+    // that the target of the assignment is the array passed to InitializeArray.
+    //
+    GenTreePtr arrayAssignment = impTreeLast->gtStmt.gtStmtExpr;
+    if ((arrayAssignment->gtOper != GT_ASG) || (arrayAssignment->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
+        (arrayLocalNode->gtOper != GT_LCL_VAR) ||
+        (arrayAssignment->gtOp.gtOp1->gtLclVarCommon.gtLclNum != arrayLocalNode->gtLclVarCommon.gtLclNum))
+    {
+        return nullptr;
+    }
+
+    //
+    // Make sure that the object being assigned is a helper call.
+    //
+
+    GenTreePtr newArrayCall = arrayAssignment->gtOp.gtOp2;
+    if ((newArrayCall->gtOper != GT_CALL) || (newArrayCall->gtCall.gtCallType != CT_HELPER))
+    {
+        return nullptr;
+    }
+
+    //
+    // Verify that it is one of the new array helpers.
+    //
+
+    bool isMDArray = false;
+
+    if (newArrayCall->gtCall.gtCallMethHnd != eeFindHelper(CORINFO_HELP_NEWARR_1_DIRECT) &&
+        newArrayCall->gtCall.gtCallMethHnd != eeFindHelper(CORINFO_HELP_NEWARR_1_OBJ) &&
+        newArrayCall->gtCall.gtCallMethHnd != eeFindHelper(CORINFO_HELP_NEWARR_1_VC) &&
+        newArrayCall->gtCall.gtCallMethHnd != eeFindHelper(CORINFO_HELP_NEWARR_1_ALIGN8)
+#ifdef FEATURE_READYTORUN_COMPILER
+        && newArrayCall->gtCall.gtCallMethHnd != eeFindHelper(CORINFO_HELP_READYTORUN_NEWARR_1)
+#endif
+            )
+    {
+#if COR_JIT_EE_VERSION > 460
+        if (newArrayCall->gtCall.gtCallMethHnd != eeFindHelper(CORINFO_HELP_NEW_MDARR_NONVARARG))
+        {
+            return nullptr;
+        }
+
+        isMDArray = true;
+#endif
+    }
+
+    CORINFO_CLASS_HANDLE arrayClsHnd = (CORINFO_CLASS_HANDLE)newArrayCall->gtCall.compileTimeHelperArgumentHandle;
+
+    //
+    // Make sure we found a compile time handle to the array
+    //
+
+    if (!arrayClsHnd)
+    {
+        return nullptr;
+    }
+
+    unsigned rank = 0;
+    S_UINT32 numElements;
+
+    if (isMDArray)
+    {
+        rank = info.compCompHnd->getArrayRank(arrayClsHnd);
+
+        if (rank == 0)
+        {
+            return nullptr;
+        }
+
+        GenTreeArgList* tokenArg = newArrayCall->gtCall.gtCallArgs;
+        assert(tokenArg != nullptr);
+        GenTreeArgList* numArgsArg = tokenArg->Rest();
+        assert(numArgsArg != nullptr);
+        GenTreeArgList* argsArg = numArgsArg->Rest();
+        assert(argsArg != nullptr);
+
+        //
+        // The number of arguments should be a constant between 1 and 64. The rank can't be 0
+        // so at least one length must be present and the rank can't exceed 32 so there can
+        // be at most 64 arguments - 32 lengths and 32 lower bounds.
+        //
+
+        if ((!numArgsArg->Current()->IsCnsIntOrI()) || (numArgsArg->Current()->AsIntCon()->IconValue() < 1) ||
+            (numArgsArg->Current()->AsIntCon()->IconValue() > 64))
+        {
+            return nullptr;
+        }
+
+        unsigned numArgs = static_cast<unsigned>(numArgsArg->Current()->AsIntCon()->IconValue());
+        bool     lowerBoundsSpecified;
+
+        if (numArgs == rank * 2)
+        {
+            lowerBoundsSpecified = true;
+        }
+        else if (numArgs == rank)
+        {
+            lowerBoundsSpecified = false;
+
+            //
+            // If the rank is 1 and a lower bound isn't specified then the runtime creates
+            // a SDArray. Note that even if a lower bound is specified it can be 0 and then
+            // we get a SDArray as well, see the for loop below.
+            //
+
+            if (rank == 1)
+            {
+                isMDArray = false;
+            }
+        }
+        else
+        {
+            return nullptr;
+        }
+
+        //
+        // The rank is known to be at least 1 so we can start with numElements being 1
+        // to avoid the need to special case the first dimension.
+        //
+
+        numElements = S_UINT32(1);
+
+        struct Match
+        {
+            static bool IsArgsFieldInit(GenTree* tree, unsigned index, unsigned lvaNewObjArrayArgs)
+            {
+                return (tree->OperGet() == GT_ASG) && IsArgsFieldIndir(tree->gtGetOp1(), index, lvaNewObjArrayArgs) &&
+                       IsArgsAddr(tree->gtGetOp1()->gtGetOp1()->gtGetOp1(), lvaNewObjArrayArgs);
+            }
+
+            static bool IsArgsFieldIndir(GenTree* tree, unsigned index, unsigned lvaNewObjArrayArgs)
+            {
+                return (tree->OperGet() == GT_IND) && (tree->gtGetOp1()->OperGet() == GT_ADD) &&
+                       (tree->gtGetOp1()->gtGetOp2()->IsIntegralConst(sizeof(INT32) * index)) &&
+                       IsArgsAddr(tree->gtGetOp1()->gtGetOp1(), lvaNewObjArrayArgs);
+            }
+
+            static bool IsArgsAddr(GenTree* tree, unsigned lvaNewObjArrayArgs)
+            {
+                return (tree->OperGet() == GT_ADDR) && (tree->gtGetOp1()->OperGet() == GT_LCL_VAR) &&
+                       (tree->gtGetOp1()->AsLclVar()->GetLclNum() == lvaNewObjArrayArgs);
+            }
+
+            static bool IsComma(GenTree* tree)
+            {
+                return (tree != nullptr) && (tree->OperGet() == GT_COMMA);
+            }
+        };
+
+        unsigned argIndex = 0;
+        GenTree* comma;
+
+        for (comma = argsArg->Current(); Match::IsComma(comma); comma = comma->gtGetOp2())
+        {
+            if (lowerBoundsSpecified)
+            {
+                //
+                // In general lower bounds can be ignored because they're not needed to
+                // calculate the total number of elements. But for single dimensional arrays
+                // we need to know if the lower bound is 0 because in this case the runtime
+                // creates a SDArray and this affects the way the array data offset is calculated.
+                //
+
+                if (rank == 1)
+                {
+                    GenTree* lowerBoundAssign = comma->gtGetOp1();
+                    assert(Match::IsArgsFieldInit(lowerBoundAssign, argIndex, lvaNewObjArrayArgs));
+                    GenTree* lowerBoundNode = lowerBoundAssign->gtGetOp2();
+
+                    if (lowerBoundNode->IsIntegralConst(0))
+                    {
+                        isMDArray = false;
+                    }
+                }
+
+                comma = comma->gtGetOp2();
+                argIndex++;
+            }
+
+            GenTree* lengthNodeAssign = comma->gtGetOp1();
+            assert(Match::IsArgsFieldInit(lengthNodeAssign, argIndex, lvaNewObjArrayArgs));
+            GenTree* lengthNode = lengthNodeAssign->gtGetOp2();
+
+            if (!lengthNode->IsCnsIntOrI())
+            {
+                return nullptr;
+            }
+
+            numElements *= S_SIZE_T(lengthNode->AsIntCon()->IconValue());
+            argIndex++;
+        }
+
+        assert((comma != nullptr) && Match::IsArgsAddr(comma, lvaNewObjArrayArgs));
+
+        if (argIndex != numArgs)
+        {
+            return nullptr;
+        }
+    }
+    else
+    {
+        //
+        // Make sure there are exactly two arguments:  the array class and
+        // the number of elements.
+        //
+
+        GenTreePtr arrayLengthNode;
+
+        GenTreeArgList* args = newArrayCall->gtCall.gtCallArgs;
+#ifdef FEATURE_READYTORUN_COMPILER
+        if (newArrayCall->gtCall.gtCallMethHnd == eeFindHelper(CORINFO_HELP_READYTORUN_NEWARR_1))
+        {
+            // Array length is 1st argument for readytorun helper
+            arrayLengthNode = args->Current();
+        }
+        else
+#endif
+        {
+            // Array length is 2nd argument for regular helper
+            arrayLengthNode = args->Rest()->Current();
+        }
+
+        //
+        // Make sure that the number of elements look valid.
+        //
+        if (arrayLengthNode->gtOper != GT_CNS_INT)
+        {
+            return nullptr;
+        }
+
+        numElements = S_SIZE_T(arrayLengthNode->gtIntCon.gtIconVal);
+
+        if (!info.compCompHnd->isSDArray(arrayClsHnd))
+        {
+            return nullptr;
+        }
+    }
+
+    CORINFO_CLASS_HANDLE elemClsHnd;
+    var_types            elementType = JITtype2varType(info.compCompHnd->getChildType(arrayClsHnd, &elemClsHnd));
+
+    //
+    // Note that genTypeSize will return zero for non primitive types, which is exactly
+    // what we want (size will then be 0, and we will catch this in the conditional below).
+    // Note that we don't expect this to fail for valid binaries, so we assert in the
+    // non-verification case (the verification case should not assert but rather correctly
+    // handle bad binaries).  This assert is not guarding any specific invariant, but rather
+    // saying that we don't expect this to happen, and if it is hit, we need to investigate
+    // why.
+    //
+
+    S_UINT32 elemSize(genTypeSize(elementType));
+    S_UINT32 size = elemSize * S_UINT32(numElements);
+
+    if (size.IsOverflow())
+    {
+        return nullptr;
+    }
+
+    if ((size.Value() == 0) || (varTypeIsGC(elementType)))
+    {
+        assert(verNeedsVerification());
+        return nullptr;
+    }
+
+    void* initData = info.compCompHnd->getArrayInitializationData(fieldToken, size.Value());
+    if (!initData)
+    {
+        return nullptr;
+    }
+
+    //
+    // At this point we are ready to commit to implementing the InitializeArray
+    // intrinsic using a struct assignment.  Pop the arguments from the stack and
+    // return the struct assignment node.
+    //
+
+    impPopStack();
+    impPopStack();
+
+    const unsigned blkSize = size.Value();
+    GenTreePtr     dst;
+
+    if (isMDArray)
+    {
+        unsigned dataOffset = eeGetMDArrayDataOffset(elementType, rank);
+
+        dst = gtNewOperNode(GT_ADD, TYP_BYREF, arrayLocalNode, gtNewIconNode(dataOffset, TYP_I_IMPL));
+    }
+    else
+    {
+        dst = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewIndexRef(elementType, arrayLocalNode, gtNewIconNode(0)));
+    }
+    GenTreePtr blk     = gtNewBlockVal(dst, blkSize);
+    GenTreePtr srcAddr = gtNewIconHandleNode((size_t)initData, GTF_ICON_STATIC_HDL);
+    GenTreePtr src     = gtNewOperNode(GT_IND, TYP_STRUCT, srcAddr);
+
+    return gtNewBlkOpNode(blk,     // dst
+                          src,     // src
+                          blkSize, // size
+                          false,   // volatil
+                          true);   // copyBlock
+}
+
+/*****************************************************************************/
+// Returns the GenTree that should be used to do the intrinsic instead of the call.
+// Returns NULL if an intrinsic cannot be used
+
+GenTreePtr Compiler::impIntrinsic(CORINFO_CLASS_HANDLE  clsHnd,
+                                  CORINFO_METHOD_HANDLE method,
+                                  CORINFO_SIG_INFO*     sig,
+                                  int                   memberRef,
+                                  bool                  readonlyCall,
+                                  bool                  tailCall,
+                                  CorInfoIntrinsics*    pIntrinsicID)
+{
+    bool mustExpand = false;
+#if COR_JIT_EE_VERSION > 460
+    CorInfoIntrinsics intrinsicID = info.compCompHnd->getIntrinsicID(method, &mustExpand);
+#else
+    CorInfoIntrinsics intrinsicID                                      = info.compCompHnd->getIntrinsicID(method);
+#endif
+    *pIntrinsicID = intrinsicID;
+
+#ifndef _TARGET_ARM_
+    genTreeOps interlockedOperator;
+#endif
+
+    if (intrinsicID == CORINFO_INTRINSIC_StubHelpers_GetStubContext)
+    {
+        // must be done regardless of DbgCode and MinOpts
+        return gtNewLclvNode(lvaStubArgumentVar, TYP_I_IMPL);
+    }
+#ifdef _TARGET_64BIT_
+    if (intrinsicID == CORINFO_INTRINSIC_StubHelpers_GetStubContextAddr)
+    {
+        // must be done regardless of DbgCode and MinOpts
+        return gtNewOperNode(GT_ADDR, TYP_I_IMPL, gtNewLclvNode(lvaStubArgumentVar, TYP_I_IMPL));
+    }
+#else
+    assert(intrinsicID != CORINFO_INTRINSIC_StubHelpers_GetStubContextAddr);
+#endif
+
+    GenTreePtr retNode = nullptr;
+
+    //
+    // We disable the inlining of instrinsics for MinOpts.
+    //
+    if (!mustExpand && (opts.compDbgCode || opts.MinOpts()))
+    {
+        *pIntrinsicID = CORINFO_INTRINSIC_Illegal;
+        return retNode;
+    }
+
+    // Currently we don't have CORINFO_INTRINSIC_Exp because it does not
+    // seem to work properly for Infinity values, we don't do
+    // CORINFO_INTRINSIC_Pow because it needs a Helper which we currently don't have
+
+    var_types callType = JITtype2varType(sig->retType);
+
+    /* First do the intrinsics which are always smaller than a call */
+
+    switch (intrinsicID)
+    {
+        GenTreePtr op1, op2;
+
+        case CORINFO_INTRINSIC_Sin:
+        case CORINFO_INTRINSIC_Sqrt:
+        case CORINFO_INTRINSIC_Abs:
+        case CORINFO_INTRINSIC_Cos:
+        case CORINFO_INTRINSIC_Round:
+        case CORINFO_INTRINSIC_Cosh:
+        case CORINFO_INTRINSIC_Sinh:
+        case CORINFO_INTRINSIC_Tan:
+        case CORINFO_INTRINSIC_Tanh:
+        case CORINFO_INTRINSIC_Asin:
+        case CORINFO_INTRINSIC_Acos:
+        case CORINFO_INTRINSIC_Atan:
+        case CORINFO_INTRINSIC_Atan2:
+        case CORINFO_INTRINSIC_Log10:
+        case CORINFO_INTRINSIC_Pow:
+        case CORINFO_INTRINSIC_Exp:
+        case CORINFO_INTRINSIC_Ceiling:
+        case CORINFO_INTRINSIC_Floor:
+
+            // These are math intrinsics
+
+            assert(callType != TYP_STRUCT);
+
+            op1 = nullptr;
+
+#ifdef LEGACY_BACKEND
+            if (IsTargetIntrinsic(intrinsicID))
+#else
+            // Intrinsics that are not implemented directly by target instructions will
+            // be re-materialized as users calls in rationalizer. For prefixed tail calls,
+            // don't do this optimization, because
+            //  a) For back compatibility reasons on desktop.Net 4.6 / 4.6.1
+            //  b) It will be non-trivial task or too late to re-materialize a surviving
+            //     tail prefixed GT_INTRINSIC as tail call in rationalizer.
+            if (!IsIntrinsicImplementedByUserCall(intrinsicID) || !tailCall)
+#endif
+            {
+                switch (sig->numArgs)
+                {
+                    case 1:
+                        op1 = impPopStack().val;
+
+#if FEATURE_X87_DOUBLES
+
+                        // X87 stack doesn't differentiate between float/double
+                        // so it doesn't need a cast, but everybody else does
+                        // Just double check it is at least a FP type
+                        noway_assert(varTypeIsFloating(op1));
+
+#else // FEATURE_X87_DOUBLES
+
+                        if (op1->TypeGet() != callType)
+                        {
+                            op1 = gtNewCastNode(callType, op1, callType);
+                        }
+
+#endif // FEATURE_X87_DOUBLES
+
+                        op1 = new (this, GT_INTRINSIC)
+                            GenTreeIntrinsic(genActualType(callType), op1, intrinsicID, method);
+                        break;
+
+                    case 2:
+                        op2 = impPopStack().val;
+                        op1 = impPopStack().val;
+
+#if FEATURE_X87_DOUBLES
+
+                        // X87 stack doesn't differentiate between float/double
+                        // so it doesn't need a cast, but everybody else does
+                        // Just double check it is at least a FP type
+                        noway_assert(varTypeIsFloating(op2));
+                        noway_assert(varTypeIsFloating(op1));
+
+#else // FEATURE_X87_DOUBLES
+
+                        if (op2->TypeGet() != callType)
+                        {
+                            op2 = gtNewCastNode(callType, op2, callType);
+                        }
+                        if (op1->TypeGet() != callType)
+                        {
+                            op1 = gtNewCastNode(callType, op1, callType);
+                        }
+
+#endif // FEATURE_X87_DOUBLES
+
+                        op1 = new (this, GT_INTRINSIC)
+                            GenTreeIntrinsic(genActualType(callType), op1, op2, intrinsicID, method);
+                        break;
+
+                    default:
+                        NO_WAY("Unsupported number of args for Math Instrinsic");
+                }
+
+#ifndef LEGACY_BACKEND
+                if (IsIntrinsicImplementedByUserCall(intrinsicID))
+                {
+                    op1->gtFlags |= GTF_CALL;
+                }
+#endif
+            }
+
+            retNode = op1;
+            break;
+
+#ifdef _TARGET_XARCH_
+        // TODO-ARM-CQ: reenable treating Interlocked operation as intrinsic
+        case CORINFO_INTRINSIC_InterlockedAdd32:
+            interlockedOperator = GT_LOCKADD;
+            goto InterlockedBinOpCommon;
+        case CORINFO_INTRINSIC_InterlockedXAdd32:
+            interlockedOperator = GT_XADD;
+            goto InterlockedBinOpCommon;
+        case CORINFO_INTRINSIC_InterlockedXchg32:
+            interlockedOperator = GT_XCHG;
+            goto InterlockedBinOpCommon;
+
+#ifdef _TARGET_AMD64_
+        case CORINFO_INTRINSIC_InterlockedAdd64:
+            interlockedOperator = GT_LOCKADD;
+            goto InterlockedBinOpCommon;
+        case CORINFO_INTRINSIC_InterlockedXAdd64:
+            interlockedOperator = GT_XADD;
+            goto InterlockedBinOpCommon;
+        case CORINFO_INTRINSIC_InterlockedXchg64:
+            interlockedOperator = GT_XCHG;
+            goto InterlockedBinOpCommon;
+#endif // _TARGET_AMD64_
+
+        InterlockedBinOpCommon:
+            assert(callType != TYP_STRUCT);
+            assert(sig->numArgs == 2);
+
+            op2 = impPopStack().val;
+            op1 = impPopStack().val;
+
+            // This creates:
+            //   val
+            // XAdd
+            //   addr
+            //     field (for example)
+            //
+            // In the case where the first argument is the address of a local, we might
+            // want to make this *not* make the var address-taken -- but atomic instructions
+            // on a local are probably pretty useless anyway, so we probably don't care.
+
+            op1 = gtNewOperNode(interlockedOperator, genActualType(callType), op1, op2);
+            op1->gtFlags |= GTF_GLOB_EFFECT;
+            retNode = op1;
+            break;
+#endif // _TARGET_XARCH_
+
+        case CORINFO_INTRINSIC_MemoryBarrier:
+
+            assert(sig->numArgs == 0);
+
+            op1 = new (this, GT_MEMORYBARRIER) GenTree(GT_MEMORYBARRIER, TYP_VOID);
+            op1->gtFlags |= GTF_GLOB_EFFECT;
+            retNode = op1;
+            break;
+
+#ifdef _TARGET_XARCH_
+        // TODO-ARM-CQ: reenable treating InterlockedCmpXchg32 operation as intrinsic
+        case CORINFO_INTRINSIC_InterlockedCmpXchg32:
+#ifdef _TARGET_AMD64_
+        case CORINFO_INTRINSIC_InterlockedCmpXchg64:
+#endif
+        {
+            assert(callType != TYP_STRUCT);
+            assert(sig->numArgs == 3);
+            GenTreePtr op3;
+
+            op3 = impPopStack().val; // comparand
+            op2 = impPopStack().val; // value
+            op1 = impPopStack().val; // location
+
+            GenTreePtr node = new (this, GT_CMPXCHG) GenTreeCmpXchg(genActualType(callType), op1, op2, op3);
+
+            node->gtCmpXchg.gtOpLocation->gtFlags |= GTF_DONT_CSE;
+            retNode = node;
+            break;
+        }
+#endif
+
+        case CORINFO_INTRINSIC_StringLength:
+            op1 = impPopStack().val;
+            if (!opts.MinOpts() && !opts.compDbgCode)
+            {
+                GenTreeArrLen* arrLen =
+                    new (this, GT_ARR_LENGTH) GenTreeArrLen(TYP_INT, op1, offsetof(CORINFO_String, stringLen));
+                op1 = arrLen;
+            }
+            else
+            {
+                /* Create the expression "*(str_addr + stringLengthOffset)" */
+                op1 = gtNewOperNode(GT_ADD, TYP_BYREF, op1,
+                                    gtNewIconNode(offsetof(CORINFO_String, stringLen), TYP_I_IMPL));
+                op1 = gtNewOperNode(GT_IND, TYP_INT, op1);
+            }
+            retNode = op1;
+            break;
+
+        case CORINFO_INTRINSIC_StringGetChar:
+            op2 = impPopStack().val;
+            op1 = impPopStack().val;
+            op1 = gtNewIndexRef(TYP_CHAR, op1, op2);
+            op1->gtFlags |= GTF_INX_STRING_LAYOUT;
+            retNode = op1;
+            break;
+
+        case CORINFO_INTRINSIC_InitializeArray:
+            retNode = impInitializeArrayIntrinsic(sig);
+            break;
+
+        case CORINFO_INTRINSIC_Array_Address:
+        case CORINFO_INTRINSIC_Array_Get:
+        case CORINFO_INTRINSIC_Array_Set:
+            retNode = impArrayAccessIntrinsic(clsHnd, sig, memberRef, readonlyCall, intrinsicID);
+            break;
+
+        case CORINFO_INTRINSIC_GetTypeFromHandle:
+            op1 = impStackTop(0).val;
+            if (op1->gtOper == GT_CALL && (op1->gtCall.gtCallType == CT_HELPER) &&
+                gtIsTypeHandleToRuntimeTypeHelper(op1))
+            {
+                op1 = impPopStack().val;
+                // Change call to return RuntimeType directly.
+                op1->gtType = TYP_REF;
+                retNode     = op1;
+            }
+            // Call the regular function.
+            break;
+
+        case CORINFO_INTRINSIC_RTH_GetValueInternal:
+            op1 = impStackTop(0).val;
+            if (op1->gtOper == GT_CALL && (op1->gtCall.gtCallType == CT_HELPER) &&
+                gtIsTypeHandleToRuntimeTypeHelper(op1))
+            {
+                // Old tree
+                // Helper-RuntimeTypeHandle -> TreeToGetNativeTypeHandle
+                //
+                // New tree
+                // TreeToGetNativeTypeHandle
+
+                // Remove call to helper and return the native TypeHandle pointer that was the parameter
+                // to that helper.
+
+                op1 = impPopStack().val;
+
+                // Get native TypeHandle argument to old helper
+                op1 = op1->gtCall.gtCallArgs;
+                assert(op1->IsList());
+                assert(op1->gtOp.gtOp2 == nullptr);
+                op1     = op1->gtOp.gtOp1;
+                retNode = op1;
+            }
+            // Call the regular function.
+            break;
+
+#ifndef LEGACY_BACKEND
+        case CORINFO_INTRINSIC_Object_GetType:
+
+            op1 = impPopStack().val;
+            op1 = new (this, GT_INTRINSIC) GenTreeIntrinsic(genActualType(callType), op1, intrinsicID, method);
+
+            // Set the CALL flag to indicate that the operator is implemented by a call.
+            // Set also the EXCEPTION flag because the native implementation of
+            // CORINFO_INTRINSIC_Object_GetType intrinsic can throw NullReferenceException.
+            op1->gtFlags |= (GTF_CALL | GTF_EXCEPT);
+            retNode = op1;
+            break;
+#endif
+
+        default:
+            /* Unknown intrinsic */
+            break;
+    }
+
+    if (mustExpand)
+    {
+        if (retNode == nullptr)
+        {
+            NO_WAY("JIT must expand the intrinsic!");
+        }
+    }
+
+    return retNode;
+}
+
+/*****************************************************************************/
+
+GenTreePtr Compiler::impArrayAccessIntrinsic(
+    CORINFO_CLASS_HANDLE clsHnd, CORINFO_SIG_INFO* sig, int memberRef, bool readonlyCall, CorInfoIntrinsics intrinsicID)
+{
+    /* If we are generating SMALL_CODE, we don't want to use intrinsics for
+       the following, as it generates fatter code.
+    */
+
+    if (compCodeOpt() == SMALL_CODE)
+    {
+        return nullptr;
+    }
+
+    /* These intrinsics generate fatter (but faster) code and are only
+       done if we don't need SMALL_CODE */
+
+    unsigned rank = (intrinsicID == CORINFO_INTRINSIC_Array_Set) ? (sig->numArgs - 1) : sig->numArgs;
+
+    // The rank 1 case is special because it has to handle two array formats
+    // we will simply not do that case
+    if (rank > GT_ARR_MAX_RANK || rank <= 1)
+    {
+        return nullptr;
+    }
+
+    CORINFO_CLASS_HANDLE arrElemClsHnd = nullptr;
+    var_types            elemType      = JITtype2varType(info.compCompHnd->getChildType(clsHnd, &arrElemClsHnd));
+
+    // For the ref case, we will only be able to inline if the types match
+    // (verifier checks for this, we don't care for the nonverified case and the
+    // type is final (so we don't need to do the cast)
+    if ((intrinsicID != CORINFO_INTRINSIC_Array_Get) && !readonlyCall && varTypeIsGC(elemType))
+    {
+        // Get the call site signature
+        CORINFO_SIG_INFO LocalSig;
+        eeGetCallSiteSig(memberRef, info.compScopeHnd, impTokenLookupContextHandle, &LocalSig);
+        assert(LocalSig.hasThis());
+
+        CORINFO_CLASS_HANDLE actualElemClsHnd;
+
+        if (intrinsicID == CORINFO_INTRINSIC_Array_Set)
+        {
+            // Fetch the last argument, the one that indicates the type we are setting.
+            CORINFO_ARG_LIST_HANDLE argType = LocalSig.args;
+            for (unsigned r = 0; r < rank; r++)
+            {
+                argType = info.compCompHnd->getArgNext(argType);
+            }
+
+            typeInfo argInfo = verParseArgSigToTypeInfo(&LocalSig, argType);
+            actualElemClsHnd = argInfo.GetClassHandle();
+        }
+        else
+        {
+            assert(intrinsicID == CORINFO_INTRINSIC_Array_Address);
+
+            // Fetch the return type
+            typeInfo retInfo = verMakeTypeInfo(LocalSig.retType, LocalSig.retTypeClass);
+            assert(retInfo.IsByRef());
+            actualElemClsHnd = retInfo.GetClassHandle();
+        }
+
+        // if it's not final, we can't do the optimization
+        if (!(info.compCompHnd->getClassAttribs(actualElemClsHnd) & CORINFO_FLG_FINAL))
+        {
+            return nullptr;
+        }
+    }
+
+    unsigned arrayElemSize;
+    if (elemType == TYP_STRUCT)
+    {
+        assert(arrElemClsHnd);
+
+        arrayElemSize = info.compCompHnd->getClassSize(arrElemClsHnd);
+    }
+    else
+    {
+        arrayElemSize = genTypeSize(elemType);
+    }
+
+    if ((unsigned char)arrayElemSize != arrayElemSize)
+    {
+        // arrayElemSize would be truncated as an unsigned char.
+        // This means the array element is too large. Don't do the optimization.
+        return nullptr;
+    }
+
+    GenTreePtr val = nullptr;
+
+    if (intrinsicID == CORINFO_INTRINSIC_Array_Set)
+    {
+        // Assignment of a struct is more work, and there are more gets than sets.
+        if (elemType == TYP_STRUCT)
+        {
+            return nullptr;
+        }
+
+        val = impPopStack().val;
+        assert(genActualType(elemType) == genActualType(val->gtType) ||
+               (elemType == TYP_FLOAT && val->gtType == TYP_DOUBLE) ||
+               (elemType == TYP_INT && val->gtType == TYP_BYREF) ||
+               (elemType == TYP_DOUBLE && val->gtType == TYP_FLOAT));
+    }
+
+    noway_assert((unsigned char)GT_ARR_MAX_RANK == GT_ARR_MAX_RANK);
+
+    GenTreePtr inds[GT_ARR_MAX_RANK];
+    for (unsigned k = rank; k > 0; k--)
+    {
+        inds[k - 1] = impPopStack().val;
+    }
+
+    GenTreePtr arr = impPopStack().val;
+    assert(arr->gtType == TYP_REF);
+
+    GenTreePtr arrElem =
+        new (this, GT_ARR_ELEM) GenTreeArrElem(TYP_BYREF, arr, static_cast<unsigned char>(rank),
+                                               static_cast<unsigned char>(arrayElemSize), elemType, &inds[0]);
+
+    if (intrinsicID != CORINFO_INTRINSIC_Array_Address)
+    {
+        arrElem = gtNewOperNode(GT_IND, elemType, arrElem);
+    }
+
+    if (intrinsicID == CORINFO_INTRINSIC_Array_Set)
+    {
+        assert(val != nullptr);
+        return gtNewAssignNode(arrElem, val);
+    }
+    else
+    {
+        return arrElem;
+    }
+}
+
+BOOL Compiler::verMergeEntryStates(BasicBlock* block, bool* changed)
+{
+    unsigned i;
+
+    // do some basic checks first
+    if (block->bbStackDepthOnEntry() != verCurrentState.esStackDepth)
+    {
+        return FALSE;
+    }
+
+    if (verCurrentState.esStackDepth > 0)
+    {
+        // merge stack types
+        StackEntry* parentStack = block->bbStackOnEntry();
+        StackEntry* childStack  = verCurrentState.esStack;
+
+        for (i = 0; i < verCurrentState.esStackDepth; i++, parentStack++, childStack++)
+        {
+            if (tiMergeToCommonParent(&parentStack->seTypeInfo, &childStack->seTypeInfo, changed) == FALSE)
+            {
+                return FALSE;
+            }
+        }
+    }
+
+    // merge initialization status of this ptr
+
+    if (verTrackObjCtorInitState)
+    {
+        // If we're tracking the CtorInitState, then it must not be unknown in the current state.
+        assert(verCurrentState.thisInitialized != TIS_Bottom);
+
+        // If the successor block's thisInit state is unknown, copy it from the current state.
+        if (block->bbThisOnEntry() == TIS_Bottom)
+        {
+            *changed = true;
+            verSetThisInit(block, verCurrentState.thisInitialized);
+        }
+        else if (verCurrentState.thisInitialized != block->bbThisOnEntry())
+        {
+            if (block->bbThisOnEntry() != TIS_Top)
+            {
+                *changed = true;
+                verSetThisInit(block, TIS_Top);
+
+                if (block->bbFlags & BBF_FAILED_VERIFICATION)
+                {
+                    // The block is bad. Control can flow through the block to any handler that catches the
+                    // verification exception, but the importer ignores bad blocks and therefore won't model
+                    // this flow in the normal way. To complete the merge into the bad block, the new state
+                    // needs to be manually pushed to the handlers that may be reached after the verification
+                    // exception occurs.
+                    //
+                    // Usually, the new state was already propagated to the relevant handlers while processing
+                    // the predecessors of the bad block. The exception is when the bad block is at the start
+                    // of a try region, meaning it is protected by additional handlers that do not protect its
+                    // predecessors.
+                    //
+                    if (block->hasTryIndex() && ((block->bbFlags & BBF_TRY_BEG) != 0))
+                    {
+                        // Push TIS_Top to the handlers that protect the bad block. Note that this can cause
+                        // recursive calls back into this code path (if successors of the current bad block are
+                        // also bad blocks).
+                        //
+                        ThisInitState origTIS           = verCurrentState.thisInitialized;
+                        verCurrentState.thisInitialized = TIS_Top;
+                        impVerifyEHBlock(block, true);
+                        verCurrentState.thisInitialized = origTIS;
+                    }
+                }
+            }
+        }
+    }
+    else
+    {
+        assert(verCurrentState.thisInitialized == TIS_Bottom && block->bbThisOnEntry() == TIS_Bottom);
+    }
+
+    return TRUE;
+}
+
+/*****************************************************************************
+ * 'logMsg' is true if a log message needs to be logged. false if the caller has
+ *   already logged it (presumably in a more detailed fashion than done here)
+ * 'bVerificationException' is true for a verification exception, false for a
+ *   "call unauthorized by host" exception.
+ */
+
+void Compiler::verConvertBBToThrowVerificationException(BasicBlock* block DEBUGARG(bool logMsg))
+{
+    block->bbJumpKind = BBJ_THROW;
+    block->bbFlags |= BBF_FAILED_VERIFICATION;
+
+    impCurStmtOffsSet(block->bbCodeOffs);
+
+#ifdef DEBUG
+    // we need this since BeginTreeList asserts otherwise
+    impTreeList = impTreeLast = nullptr;
+    block->bbFlags &= ~BBF_IMPORTED;
+
+    if (logMsg)
+    {
+        JITLOG((LL_ERROR, "Verification failure: while compiling %s near IL offset %x..%xh \n", info.compFullName,
+                block->bbCodeOffs, block->bbCodeOffsEnd));
+        if (verbose)
+        {
+            printf("\n\nVerification failure: %s near IL %xh \n", info.compFullName, block->bbCodeOffs);
+        }
+    }
+
+    if (JitConfig.DebugBreakOnVerificationFailure())
+    {
+        DebugBreak();
+    }
+#endif
+
+    impBeginTreeList();
+
+    // if the stack is non-empty evaluate all the side-effects
+    if (verCurrentState.esStackDepth > 0)
+    {
+        impEvalSideEffects();
+    }
+    assert(verCurrentState.esStackDepth == 0);
+
+    GenTreePtr op1 = gtNewHelperCallNode(CORINFO_HELP_VERIFICATION, TYP_VOID, GTF_EXCEPT,
+                                         gtNewArgList(gtNewIconNode(block->bbCodeOffs)));
+    // verCurrentState.esStackDepth = 0;
+    impAppendTree(op1, (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
+
+    // The inliner is not able to handle methods that require throw block, so
+    // make sure this methods never gets inlined.
+    info.compCompHnd->setMethodAttribs(info.compMethodHnd, CORINFO_FLG_BAD_INLINEE);
+}
+
+/*****************************************************************************
+ *
+ */
+void Compiler::verHandleVerificationFailure(BasicBlock* block DEBUGARG(bool logMsg))
+
+{
+    // In AMD64, for historical reasons involving design limitations of JIT64, the VM has a
+    // slightly different mechanism in which it calls the JIT to perform IL verification:
+    // in the case of transparent methods the VM calls for a predicate IsVerifiable()
+    // that consists of calling the JIT with the IMPORT_ONLY flag and with the IL verify flag on.
+    // If the JIT determines the method is not verifiable, it should raise the exception to the VM and let
+    // it bubble up until reported by the runtime.  Currently in RyuJIT, this method doesn't bubble
+    // up the exception, instead it embeds a throw inside the offending basic block and lets this
+    // to fail upon runtime of the jitted method.
+    //
+    // For AMD64 we don't want this behavior when the JIT has been called only for verification (i.e.
+    // with the IMPORT_ONLY and IL Verification flag set) because this won't actually generate code,
+    // just try to find out whether to fail this method before even actually jitting it.  So, in case
+    // we detect these two conditions, instead of generating a throw statement inside the offending
+    // basic block, we immediately fail to JIT and notify the VM to make the IsVerifiable() predicate
+    // to return false and make RyuJIT behave the same way JIT64 does.
+    //
+    // The rationale behind this workaround is to avoid modifying the VM and maintain compatibility between JIT64 and
+    // RyuJIT for the time being until we completely replace JIT64.
+    // TODO-ARM64-Cleanup:  We probably want to actually modify the VM in the future to avoid the unnecesary two passes.
+
+    // In AMD64 we must make sure we're behaving the same way as JIT64, meaning we should only raise the verification
+    // exception if we are only importing and verifying.  The method verNeedsVerification() can also modify the
+    // tiVerificationNeeded flag in the case it determines it can 'skip verification' during importation and defer it
+    // to a runtime check. That's why we must assert one or the other (since the flag tiVerificationNeeded can
+    // be turned off during importation).
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_64BIT_
+
+#ifdef DEBUG
+    bool canSkipVerificationResult =
+        info.compCompHnd->canSkipMethodVerification(info.compMethodHnd) != CORINFO_VERIFICATION_CANNOT_SKIP;
+    assert(tiVerificationNeeded || canSkipVerificationResult);
+#endif // DEBUG
+
+    // Add the non verifiable flag to the compiler
+    if ((opts.eeFlags & CORJIT_FLG_IMPORT_ONLY) != 0)
+    {
+        tiIsVerifiableCode = FALSE;
+    }
+#endif //_TARGET_64BIT_
+    verResetCurrentState(block, &verCurrentState);
+    verConvertBBToThrowVerificationException(block DEBUGARG(logMsg));
+
+#ifdef DEBUG
+    impNoteLastILoffs(); // Remember at which BC offset the tree was finished
+#endif                   // DEBUG
+}
+
+/******************************************************************************/
+typeInfo Compiler::verMakeTypeInfo(CorInfoType ciType, CORINFO_CLASS_HANDLE clsHnd)
+{
+    assert(ciType < CORINFO_TYPE_COUNT);
+
+    typeInfo tiResult;
+    switch (ciType)
+    {
+        case CORINFO_TYPE_STRING:
+        case CORINFO_TYPE_CLASS:
+            tiResult = verMakeTypeInfo(clsHnd);
+            if (!tiResult.IsType(TI_REF))
+            { // type must be consistent with element type
+                return typeInfo();
+            }
+            break;
+
+#ifdef _TARGET_64BIT_
+        case CORINFO_TYPE_NATIVEINT:
+        case CORINFO_TYPE_NATIVEUINT:
+            if (clsHnd)
+            {
+                // If we have more precise information, use it
+                return verMakeTypeInfo(clsHnd);
+            }
+            else
+            {
+                return typeInfo::nativeInt();
+            }
+            break;
+#endif // _TARGET_64BIT_
+
+        case CORINFO_TYPE_VALUECLASS:
+        case CORINFO_TYPE_REFANY:
+            tiResult = verMakeTypeInfo(clsHnd);
+            // type must be constant with element type;
+            if (!tiResult.IsValueClass())
+            {
+                return typeInfo();
+            }
+            break;
+        case CORINFO_TYPE_VAR:
+            return verMakeTypeInfo(clsHnd);
+
+        case CORINFO_TYPE_PTR: // for now, pointers are treated as an error
+        case CORINFO_TYPE_VOID:
+            return typeInfo();
+            break;
+
+        case CORINFO_TYPE_BYREF:
+        {
+            CORINFO_CLASS_HANDLE childClassHandle;
+            CorInfoType          childType = info.compCompHnd->getChildType(clsHnd, &childClassHandle);
+            return ByRef(verMakeTypeInfo(childType, childClassHandle));
+        }
+        break;
+
+        default:
+            if (clsHnd)
+            { // If we have more precise information, use it
+                return typeInfo(TI_STRUCT, clsHnd);
+            }
+            else
+            {
+                return typeInfo(JITtype2tiType(ciType));
+            }
+    }
+    return tiResult;
+}
+
+/******************************************************************************/
+
+typeInfo Compiler::verMakeTypeInfo(CORINFO_CLASS_HANDLE clsHnd, bool bashStructToRef /* = false */)
+{
+    if (clsHnd == nullptr)
+    {
+        return typeInfo();
+    }
+
+    // Byrefs should only occur in method and local signatures, which are accessed
+    // using ICorClassInfo and ICorClassInfo.getChildType.
+    // So findClass() and getClassAttribs() should not be called for byrefs
+
+    if (JITtype2varType(info.compCompHnd->asCorInfoType(clsHnd)) == TYP_BYREF)
+    {
+        assert(!"Did findClass() return a Byref?");
+        return typeInfo();
+    }
+
+    unsigned attribs = info.compCompHnd->getClassAttribs(clsHnd);
+
+    if (attribs & CORINFO_FLG_VALUECLASS)
+    {
+        CorInfoType t = info.compCompHnd->getTypeForPrimitiveValueClass(clsHnd);
+
+        // Meta-data validation should ensure that CORINF_TYPE_BYREF should
+        // not occur here, so we may want to change this to an assert instead.
+        if (t == CORINFO_TYPE_VOID || t == CORINFO_TYPE_BYREF || t == CORINFO_TYPE_PTR)
+        {
+            return typeInfo();
+        }
+
+#ifdef _TARGET_64BIT_
+        if (t == CORINFO_TYPE_NATIVEINT || t == CORINFO_TYPE_NATIVEUINT)
+        {
+            return typeInfo::nativeInt();
+        }
+#endif // _TARGET_64BIT_
+
+        if (t != CORINFO_TYPE_UNDEF)
+        {
+            return (typeInfo(JITtype2tiType(t)));
+        }
+        else if (bashStructToRef)
+        {
+            return (typeInfo(TI_REF, clsHnd));
+        }
+        else
+        {
+            return (typeInfo(TI_STRUCT, clsHnd));
+        }
+    }
+    else if (attribs & CORINFO_FLG_GENERIC_TYPE_VARIABLE)
+    {
+        // See comment in _typeInfo.h for why we do it this way.
+        return (typeInfo(TI_REF, clsHnd, true));
+    }
+    else
+    {
+        return (typeInfo(TI_REF, clsHnd));
+    }
+}
+
+/******************************************************************************/
+BOOL Compiler::verIsSDArray(typeInfo ti)
+{
+    if (ti.IsNullObjRef())
+    { // nulls are SD arrays
+        return TRUE;
+    }
+
+    if (!ti.IsType(TI_REF))
+    {
+        return FALSE;
+    }
+
+    if (!info.compCompHnd->isSDArray(ti.GetClassHandleForObjRef()))
+    {
+        return FALSE;
+    }
+    return TRUE;
+}
+
+/******************************************************************************/
+/* Given 'arrayObjectType' which is an array type, fetch the element type. */
+/* Returns an error type if anything goes wrong */
+
+typeInfo Compiler::verGetArrayElemType(typeInfo arrayObjectType)
+{
+    assert(!arrayObjectType.IsNullObjRef()); // you need to check for null explictly since that is a success case
+
+    if (!verIsSDArray(arrayObjectType))
+    {
+        return typeInfo();
+    }
+
+    CORINFO_CLASS_HANDLE childClassHandle = nullptr;
+    CorInfoType ciType = info.compCompHnd->getChildType(arrayObjectType.GetClassHandleForObjRef(), &childClassHandle);
+
+    return verMakeTypeInfo(ciType, childClassHandle);
+}
+
+/*****************************************************************************
+ */
+typeInfo Compiler::verParseArgSigToTypeInfo(CORINFO_SIG_INFO* sig, CORINFO_ARG_LIST_HANDLE args)
+{
+    CORINFO_CLASS_HANDLE classHandle;
+    CorInfoType          ciType = strip(info.compCompHnd->getArgType(sig, args, &classHandle));
+
+    var_types type = JITtype2varType(ciType);
+    if (varTypeIsGC(type))
+    {
+        // For efficiency, getArgType only returns something in classHandle for
+        // value types.  For other types that have addition type info, you
+        // have to call back explicitly
+        classHandle = info.compCompHnd->getArgClass(sig, args);
+        if (!classHandle)
+        {
+            NO_WAY("Could not figure out Class specified in argument or local signature");
+        }
+    }
+
+    return verMakeTypeInfo(ciType, classHandle);
+}
+
+/*****************************************************************************/
+
+// This does the expensive check to figure out whether the method
+// needs to be verified. It is called only when we fail verification,
+// just before throwing the verification exception.
+
+BOOL Compiler::verNeedsVerification()
+{
+    // If we have previously determined that verification is NOT needed
+    // (for example in Compiler::compCompile), that means verification is really not needed.
+    // Return the same decision we made before.
+    // (Note: This literally means that tiVerificationNeeded can never go from 0 to 1.)
+
+    if (!tiVerificationNeeded)
+    {
+        return tiVerificationNeeded;
+    }
+
+    assert(tiVerificationNeeded);
+
+    // Ok, we haven't concluded that verification is NOT needed. Consult the EE now to
+    // obtain the answer.
+    CorInfoCanSkipVerificationResult canSkipVerificationResult =
+        info.compCompHnd->canSkipMethodVerification(info.compMethodHnd);
+
+    // canSkipVerification will return one of the following three values:
+    //    CORINFO_VERIFICATION_CANNOT_SKIP = 0,       // Cannot skip verification during jit time.
+    //    CORINFO_VERIFICATION_CAN_SKIP = 1,          // Can skip verification during jit time.
+    //    CORINFO_VERIFICATION_RUNTIME_CHECK = 2,     // Skip verification during jit time,
+    //     but need to insert a callout to the VM to ask during runtime
+    //     whether to skip verification or not.
+
+    // Set tiRuntimeCalloutNeeded if canSkipVerification() instructs us to insert a callout for runtime check
+    if (canSkipVerificationResult == CORINFO_VERIFICATION_RUNTIME_CHECK)
+    {
+        tiRuntimeCalloutNeeded = true;
+    }
+
+    if (canSkipVerificationResult == CORINFO_VERIFICATION_DONT_JIT)
+    {
+        // Dev10 706080 - Testers don't like the assert, so just silence it
+        // by not using the macros that invoke debugAssert.
+        badCode();
+    }
+
+    // When tiVerificationNeeded is true, JIT will do the verification during JIT time.
+    // The following line means we will NOT do jit time verification if canSkipVerification
+    // returns CORINFO_VERIFICATION_CAN_SKIP or CORINFO_VERIFICATION_RUNTIME_CHECK.
+    tiVerificationNeeded = (canSkipVerificationResult == CORINFO_VERIFICATION_CANNOT_SKIP);
+    return tiVerificationNeeded;
+}
+
+BOOL Compiler::verIsByRefLike(const typeInfo& ti)
+{
+    if (ti.IsByRef())
+    {
+        return TRUE;
+    }
+    if (!ti.IsType(TI_STRUCT))
+    {
+        return FALSE;
+    }
+    return info.compCompHnd->getClassAttribs(ti.GetClassHandleForValueClass()) & CORINFO_FLG_CONTAINS_STACK_PTR;
+}
+
+BOOL Compiler::verIsSafeToReturnByRef(const typeInfo& ti)
+{
+    if (ti.IsPermanentHomeByRef())
+    {
+        return TRUE;
+    }
+    else
+    {
+        return FALSE;
+    }
+}
+
+BOOL Compiler::verIsBoxable(const typeInfo& ti)
+{
+    return (ti.IsPrimitiveType() || ti.IsObjRef() // includes boxed generic type variables
+            || ti.IsUnboxedGenericTypeVar() ||
+            (ti.IsType(TI_STRUCT) &&
+             // exclude byreflike structs
+             !(info.compCompHnd->getClassAttribs(ti.GetClassHandleForValueClass()) & CORINFO_FLG_CONTAINS_STACK_PTR)));
+}
+
+// Is it a boxed value type?
+bool Compiler::verIsBoxedValueType(typeInfo ti)
+{
+    if (ti.GetType() == TI_REF)
+    {
+        CORINFO_CLASS_HANDLE clsHnd = ti.GetClassHandleForObjRef();
+        return !!eeIsValueClass(clsHnd);
+    }
+    else
+    {
+        return false;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Check if a TailCall is legal.
+ */
+
+bool Compiler::verCheckTailCallConstraint(
+    OPCODE                  opcode,
+    CORINFO_RESOLVED_TOKEN* pResolvedToken,
+    CORINFO_RESOLVED_TOKEN* pConstrainedResolvedToken, // Is this a "constrained." call on a type parameter?
+    bool                    speculative                // If true, won't throw if verificatoin fails. Instead it will
+                                                       // return false to the caller.
+                                                       // If false, it will throw.
+    )
+{
+    DWORD            mflags;
+    CORINFO_SIG_INFO sig;
+    unsigned int     popCount = 0; // we can't pop the stack since impImportCall needs it, so
+                                   // this counter is used to keep track of how many items have been
+                                   // virtually popped
+
+    CORINFO_METHOD_HANDLE methodHnd       = nullptr;
+    CORINFO_CLASS_HANDLE  methodClassHnd  = nullptr;
+    unsigned              methodClassFlgs = 0;
+
+    assert(impOpcodeIsCallOpcode(opcode));
+
+    if (compIsForInlining())
+    {
+        return false;
+    }
+
+    // for calli, VerifyOrReturn that this is not a virtual method
+    if (opcode == CEE_CALLI)
+    {
+        /* Get the call sig */
+        eeGetSig(pResolvedToken->token, info.compScopeHnd, impTokenLookupContextHandle, &sig);
+
+        // We don't know the target method, so we have to infer the flags, or
+        // assume the worst-case.
+        mflags = (sig.callConv & CORINFO_CALLCONV_HASTHIS) ? 0 : CORINFO_FLG_STATIC;
+    }
+    else
+    {
+        methodHnd = pResolvedToken->hMethod;
+
+        mflags = info.compCompHnd->getMethodAttribs(methodHnd);
+
+        // When verifying generic code we pair the method handle with its
+        // owning class to get the exact method signature.
+        methodClassHnd = pResolvedToken->hClass;
+        assert(methodClassHnd);
+
+        eeGetMethodSig(methodHnd, &sig, methodClassHnd);
+
+        // opcode specific check
+        methodClassFlgs = info.compCompHnd->getClassAttribs(methodClassHnd);
+    }
+
+    // We must have got the methodClassHnd if opcode is not CEE_CALLI
+    assert((methodHnd != nullptr && methodClassHnd != nullptr) || opcode == CEE_CALLI);
+
+    if ((sig.callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_VARARG)
+    {
+        eeGetCallSiteSig(pResolvedToken->token, info.compScopeHnd, impTokenLookupContextHandle, &sig);
+    }
+
+    // check compatibility of the arguments
+    unsigned int argCount;
+    argCount = sig.numArgs;
+    CORINFO_ARG_LIST_HANDLE args;
+    args = sig.args;
+    while (argCount--)
+    {
+        typeInfo tiDeclared = verParseArgSigToTypeInfo(&sig, args).NormaliseForStack();
+
+        // check that the argument is not a byref for tailcalls
+        VerifyOrReturnSpeculative(!verIsByRefLike(tiDeclared), "tailcall on byrefs", speculative);
+
+        // For unsafe code, we might have parameters containing pointer to the stack location.
+        // Disallow the tailcall for this kind.
+        CORINFO_CLASS_HANDLE classHandle;
+        CorInfoType          ciType = strip(info.compCompHnd->getArgType(&sig, args, &classHandle));
+        VerifyOrReturnSpeculative(ciType != CORINFO_TYPE_PTR, "tailcall on CORINFO_TYPE_PTR", speculative);
+
+        args = info.compCompHnd->getArgNext(args);
+    }
+
+    // update popCount
+    popCount += sig.numArgs;
+
+    // check for 'this' which is on non-static methods, not called via NEWOBJ
+    if (!(mflags & CORINFO_FLG_STATIC))
+    {
+        // Always update the popCount.
+        // This is crucial for the stack calculation to be correct.
+        typeInfo tiThis = impStackTop(popCount).seTypeInfo;
+        popCount++;
+
+        if (opcode == CEE_CALLI)
+        {
+            // For CALLI, we don't know the methodClassHnd. Therefore, let's check the "this" object
+            // on the stack.
+            if (tiThis.IsValueClass())
+            {
+                tiThis.MakeByRef();
+            }
+            VerifyOrReturnSpeculative(!verIsByRefLike(tiThis), "byref in tailcall", speculative);
+        }
+        else
+        {
+            // Check type compatibility of the this argument
+            typeInfo tiDeclaredThis = verMakeTypeInfo(methodClassHnd);
+            if (tiDeclaredThis.IsValueClass())
+            {
+                tiDeclaredThis.MakeByRef();
+            }
+
+            VerifyOrReturnSpeculative(!verIsByRefLike(tiDeclaredThis), "byref in tailcall", speculative);
+        }
+    }
+
+    // Tail calls on constrained calls should be illegal too:
+    // when instantiated at a value type, a constrained call may pass the address of a stack allocated value
+    VerifyOrReturnSpeculative(!pConstrainedResolvedToken, "byref in constrained tailcall", speculative);
+
+    // Get the exact view of the signature for an array method
+    if (sig.retType != CORINFO_TYPE_VOID)
+    {
+        if (methodClassFlgs & CORINFO_FLG_ARRAY)
+        {
+            assert(opcode != CEE_CALLI);
+            eeGetCallSiteSig(pResolvedToken->token, info.compScopeHnd, impTokenLookupContextHandle, &sig);
+        }
+    }
+
+    typeInfo tiCalleeRetType = verMakeTypeInfo(sig.retType, sig.retTypeClass);
+    typeInfo tiCallerRetType =
+        verMakeTypeInfo(info.compMethodInfo->args.retType, info.compMethodInfo->args.retTypeClass);
+
+    // void return type gets morphed into the error type, so we have to treat them specially here
+    if (sig.retType == CORINFO_TYPE_VOID)
+    {
+        VerifyOrReturnSpeculative(info.compMethodInfo->args.retType == CORINFO_TYPE_VOID, "tailcall return mismatch",
+                                  speculative);
+    }
+    else
+    {
+        VerifyOrReturnSpeculative(tiCompatibleWith(NormaliseForStack(tiCalleeRetType),
+                                                   NormaliseForStack(tiCallerRetType), true),
+                                  "tailcall return mismatch", speculative);
+    }
+
+    // for tailcall, stack must be empty
+    VerifyOrReturnSpeculative(verCurrentState.esStackDepth == popCount, "stack non-empty on tailcall", speculative);
+
+    return true; // Yes, tailcall is legal
+}
+
+/*****************************************************************************
+ *
+ *  Checks the IL verification rules for the call
+ */
+
+void Compiler::verVerifyCall(OPCODE                  opcode,
+                             CORINFO_RESOLVED_TOKEN* pResolvedToken,
+                             CORINFO_RESOLVED_TOKEN* pConstrainedResolvedToken,
+                             bool                    tailCall,
+                             bool                    readonlyCall,
+                             const BYTE*             delegateCreateStart,
+                             const BYTE*             codeAddr,
+                             CORINFO_CALL_INFO* callInfo DEBUGARG(const char* methodName))
+{
+    DWORD             mflags;
+    CORINFO_SIG_INFO* sig      = nullptr;
+    unsigned int      popCount = 0; // we can't pop the stack since impImportCall needs it, so
+                                    // this counter is used to keep track of how many items have been
+                                    // virtually popped
+
+    // for calli, VerifyOrReturn that this is not a virtual method
+    if (opcode == CEE_CALLI)
+    {
+        Verify(false, "Calli not verifiable");
+        return;
+    }
+
+    //<NICE> It would be nice to cache the rest of it, but eeFindMethod is the big ticket item.
+    mflags = callInfo->verMethodFlags;
+
+    sig = &callInfo->verSig;
+
+    if ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_VARARG)
+    {
+        eeGetCallSiteSig(pResolvedToken->token, pResolvedToken->tokenScope, pResolvedToken->tokenContext, sig);
+    }
+
+    // opcode specific check
+    unsigned methodClassFlgs = callInfo->classFlags;
+    switch (opcode)
+    {
+        case CEE_CALLVIRT:
+            // cannot do callvirt on valuetypes
+            VerifyOrReturn(!(methodClassFlgs & CORINFO_FLG_VALUECLASS), "callVirt on value class");
+            VerifyOrReturn(sig->hasThis(), "CallVirt on static method");
+            break;
+
+        case CEE_NEWOBJ:
+        {
+            assert(!tailCall); // Importer should not allow this
+            VerifyOrReturn((mflags & CORINFO_FLG_CONSTRUCTOR) && !(mflags & CORINFO_FLG_STATIC),
+                           "newobj must be on instance");
+
+            if (methodClassFlgs & CORINFO_FLG_DELEGATE)
+            {
+                VerifyOrReturn(sig->numArgs == 2, "wrong number args to delegate ctor");
+                typeInfo tiDeclaredObj = verParseArgSigToTypeInfo(sig, sig->args).NormaliseForStack();
+                typeInfo tiDeclaredFtn =
+                    verParseArgSigToTypeInfo(sig, info.compCompHnd->getArgNext(sig->args)).NormaliseForStack();
+                VerifyOrReturn(tiDeclaredFtn.IsNativeIntType(), "ftn arg needs to be a native int type");
+
+                assert(popCount == 0);
+                typeInfo tiActualObj = impStackTop(1).seTypeInfo;
+                typeInfo tiActualFtn = impStackTop(0).seTypeInfo;
+
+                VerifyOrReturn(tiActualFtn.IsMethod(), "delegate needs method as first arg");
+                VerifyOrReturn(tiCompatibleWith(tiActualObj, tiDeclaredObj, true), "delegate object type mismatch");
+                VerifyOrReturn(tiActualObj.IsNullObjRef() || tiActualObj.IsType(TI_REF),
+                               "delegate object type mismatch");
+
+                CORINFO_CLASS_HANDLE objTypeHandle =
+                    tiActualObj.IsNullObjRef() ? nullptr : tiActualObj.GetClassHandleForObjRef();
+
+                // the method signature must be compatible with the delegate's invoke method
+
+                // check that for virtual functions, the type of the object used to get the
+                // ftn ptr is the same as the type of the object passed to the delegate ctor.
+                // since this is a bit of work to determine in general, we pattern match stylized
+                // code sequences
+
+                // the delegate creation code check, which used to be done later, is now done here
+                // so we can read delegateMethodRef directly from
+                // from the preceding LDFTN or CEE_LDVIRTFN instruction sequence;
+                // we then use it in our call to isCompatibleDelegate().
+
+                mdMemberRef delegateMethodRef = mdMemberRefNil;
+                VerifyOrReturn(verCheckDelegateCreation(delegateCreateStart, codeAddr, delegateMethodRef),
+                               "must create delegates with certain IL");
+
+                CORINFO_RESOLVED_TOKEN delegateResolvedToken;
+                delegateResolvedToken.tokenContext = impTokenLookupContextHandle;
+                delegateResolvedToken.tokenScope   = info.compScopeHnd;
+                delegateResolvedToken.token        = delegateMethodRef;
+                delegateResolvedToken.tokenType    = CORINFO_TOKENKIND_Method;
+                info.compCompHnd->resolveToken(&delegateResolvedToken);
+
+                CORINFO_CALL_INFO delegateCallInfo;
+                eeGetCallInfo(&delegateResolvedToken, nullptr /* constraint typeRef */,
+                              addVerifyFlag(CORINFO_CALLINFO_SECURITYCHECKS), &delegateCallInfo);
+
+                BOOL isOpenDelegate = FALSE;
+                VerifyOrReturn(info.compCompHnd->isCompatibleDelegate(objTypeHandle, delegateResolvedToken.hClass,
+                                                                      tiActualFtn.GetMethod(), pResolvedToken->hClass,
+                                                                      &isOpenDelegate),
+                               "function incompatible with delegate");
+
+                // check the constraints on the target method
+                VerifyOrReturn(info.compCompHnd->satisfiesClassConstraints(delegateResolvedToken.hClass),
+                               "delegate target has unsatisfied class constraints");
+                VerifyOrReturn(info.compCompHnd->satisfiesMethodConstraints(delegateResolvedToken.hClass,
+                                                                            tiActualFtn.GetMethod()),
+                               "delegate target has unsatisfied method constraints");
+
+                // See ECMA spec section 1.8.1.5.2 (Delegating via instance dispatch)
+                // for additional verification rules for delegates
+                CORINFO_METHOD_HANDLE actualMethodHandle  = tiActualFtn.GetMethod();
+                DWORD                 actualMethodAttribs = info.compCompHnd->getMethodAttribs(actualMethodHandle);
+                if (impIsLDFTN_TOKEN(delegateCreateStart, codeAddr))
+                {
+
+                    if ((actualMethodAttribs & CORINFO_FLG_VIRTUAL) && ((actualMethodAttribs & CORINFO_FLG_FINAL) == 0)
+#ifdef DEBUG
+                        && StrictCheckForNonVirtualCallToVirtualMethod()
+#endif
+                            )
+                    {
+                        if (info.compCompHnd->shouldEnforceCallvirtRestriction(info.compScopeHnd))
+                        {
+                            VerifyOrReturn(tiActualObj.IsThisPtr() && lvaIsOriginalThisReadOnly() ||
+                                               verIsBoxedValueType(tiActualObj),
+                                           "The 'this' parameter to the call must be either the calling method's "
+                                           "'this' parameter or "
+                                           "a boxed value type.");
+                        }
+                    }
+                }
+
+                if (actualMethodAttribs & CORINFO_FLG_PROTECTED)
+                {
+                    BOOL targetIsStatic = actualMethodAttribs & CORINFO_FLG_STATIC;
+
+                    Verify(targetIsStatic || !isOpenDelegate,
+                           "Unverifiable creation of an open instance delegate for a protected member.");
+
+                    CORINFO_CLASS_HANDLE instanceClassHnd = (tiActualObj.IsNullObjRef() || targetIsStatic)
+                                                                ? info.compClassHnd
+                                                                : tiActualObj.GetClassHandleForObjRef();
+
+                    // In the case of protected methods, it is a requirement that the 'this'
+                    // pointer be a subclass of the current context.  Perform this check.
+                    Verify(info.compCompHnd->canAccessFamily(info.compMethodHnd, instanceClassHnd),
+                           "Accessing protected method through wrong type.");
+                }
+                goto DONE_ARGS;
+            }
+        }
+        // fall thru to default checks
+        default:
+            VerifyOrReturn(!(mflags & CORINFO_FLG_ABSTRACT), "method abstract");
+    }
+    VerifyOrReturn(!((mflags & CORINFO_FLG_CONSTRUCTOR) && (methodClassFlgs & CORINFO_FLG_DELEGATE)),
+                   "can only newobj a delegate constructor");
+
+    // check compatibility of the arguments
+    unsigned int argCount;
+    argCount = sig->numArgs;
+    CORINFO_ARG_LIST_HANDLE args;
+    args = sig->args;
+    while (argCount--)
+    {
+        typeInfo tiActual = impStackTop(popCount + argCount).seTypeInfo;
+
+        typeInfo tiDeclared = verParseArgSigToTypeInfo(sig, args).NormaliseForStack();
+        VerifyOrReturn(tiCompatibleWith(tiActual, tiDeclared, true), "type mismatch");
+
+        args = info.compCompHnd->getArgNext(args);
+    }
+
+DONE_ARGS:
+
+    // update popCount
+    popCount += sig->numArgs;
+
+    // check for 'this' which are is non-static methods, not called via NEWOBJ
+    CORINFO_CLASS_HANDLE instanceClassHnd = info.compClassHnd;
+    if (!(mflags & CORINFO_FLG_STATIC) && (opcode != CEE_NEWOBJ))
+    {
+        typeInfo tiThis = impStackTop(popCount).seTypeInfo;
+        popCount++;
+
+        // If it is null, we assume we can access it (since it will AV shortly)
+        // If it is anything but a reference class, there is no hierarchy, so
+        // again, we don't need the precise instance class to compute 'protected' access
+        if (tiThis.IsType(TI_REF))
+        {
+            instanceClassHnd = tiThis.GetClassHandleForObjRef();
+        }
+
+        // Check type compatibility of the this argument
+        typeInfo tiDeclaredThis = verMakeTypeInfo(pResolvedToken->hClass);
+        if (tiDeclaredThis.IsValueClass())
+        {
+            tiDeclaredThis.MakeByRef();
+        }
+
+        // If this is a call to the base class .ctor, set thisPtr Init for
+        // this block.
+        if (mflags & CORINFO_FLG_CONSTRUCTOR)
+        {
+            if (verTrackObjCtorInitState && tiThis.IsThisPtr() &&
+                verIsCallToInitThisPtr(info.compClassHnd, pResolvedToken->hClass))
+            {
+                assert(verCurrentState.thisInitialized !=
+                       TIS_Bottom); // This should never be the case just from the logic of the verifier.
+                VerifyOrReturn(verCurrentState.thisInitialized == TIS_Uninit,
+                               "Call to base class constructor when 'this' is possibly initialized");
+                // Otherwise, 'this' is now initialized.
+                verCurrentState.thisInitialized = TIS_Init;
+                tiThis.SetInitialisedObjRef();
+            }
+            else
+            {
+                // We allow direct calls to value type constructors
+                // NB: we have to check that the contents of tiThis is a value type, otherwise we could use a
+                // constrained callvirt to illegally re-enter a .ctor on a value of reference type.
+                VerifyOrReturn(tiThis.IsByRef() && DereferenceByRef(tiThis).IsValueClass(),
+                               "Bad call to a constructor");
+            }
+        }
+
+        if (pConstrainedResolvedToken != nullptr)
+        {
+            VerifyOrReturn(tiThis.IsByRef(), "non-byref this type in constrained call");
+
+            typeInfo tiConstraint = verMakeTypeInfo(pConstrainedResolvedToken->hClass);
+
+            // We just dereference this and test for equality
+            tiThis.DereferenceByRef();
+            VerifyOrReturn(typeInfo::AreEquivalent(tiThis, tiConstraint),
+                           "this type mismatch with constrained type operand");
+
+            // Now pretend the this type is the boxed constrained type, for the sake of subsequent checks
+            tiThis = typeInfo(TI_REF, pConstrainedResolvedToken->hClass);
+        }
+
+        // To support direct calls on readonly byrefs, just pretend tiDeclaredThis is readonly too
+        if (tiDeclaredThis.IsByRef() && tiThis.IsReadonlyByRef())
+        {
+            tiDeclaredThis.SetIsReadonlyByRef();
+        }
+
+        VerifyOrReturn(tiCompatibleWith(tiThis, tiDeclaredThis, true), "this type mismatch");
+
+        if (tiThis.IsByRef())
+        {
+            // Find the actual type where the method exists (as opposed to what is declared
+            // in the metadata). This is to prevent passing a byref as the "this" argument
+            // while calling methods like System.ValueType.GetHashCode() which expect boxed objects.
+
+            CORINFO_CLASS_HANDLE actualClassHnd = info.compCompHnd->getMethodClass(pResolvedToken->hMethod);
+            VerifyOrReturn(eeIsValueClass(actualClassHnd),
+                           "Call to base type of valuetype (which is never a valuetype)");
+        }
+
+        // Rules for non-virtual call to a non-final virtual method:
+
+        // Define:
+        // The "this" pointer is considered to be "possibly written" if
+        //   1. Its address have been taken (LDARGA 0) anywhere in the method.
+        //   (or)
+        //   2. It has been stored to (STARG.0) anywhere in the method.
+
+        // A non-virtual call to a non-final virtual method is only allowed if
+        //   1. The this pointer passed to the callee is an instance of a boxed value type.
+        //   (or)
+        //   2. The this pointer passed to the callee is the current method's this pointer.
+        //      (and) The current method's this pointer is not "possibly written".
+
+        // Thus the rule is that if you assign to this ANYWHERE you can't make "base" calls to
+        // virtual methods.  (Luckily this does affect .ctors, since they are not virtual).
+        // This is stronger that is strictly needed, but implementing a laxer rule is significantly
+        // hard and more error prone.
+
+        if (opcode == CEE_CALL && (mflags & CORINFO_FLG_VIRTUAL) && ((mflags & CORINFO_FLG_FINAL) == 0)
+#ifdef DEBUG
+            && StrictCheckForNonVirtualCallToVirtualMethod()
+#endif
+                )
+        {
+            if (info.compCompHnd->shouldEnforceCallvirtRestriction(info.compScopeHnd))
+            {
+                VerifyOrReturn(
+                    tiThis.IsThisPtr() && lvaIsOriginalThisReadOnly() || verIsBoxedValueType(tiThis),
+                    "The 'this' parameter to the call must be either the calling method's 'this' parameter or "
+                    "a boxed value type.");
+            }
+        }
+    }
+
+    // check any constraints on the callee's class and type parameters
+    VerifyOrReturn(info.compCompHnd->satisfiesClassConstraints(pResolvedToken->hClass),
+                   "method has unsatisfied class constraints");
+    VerifyOrReturn(info.compCompHnd->satisfiesMethodConstraints(pResolvedToken->hClass, pResolvedToken->hMethod),
+                   "method has unsatisfied method constraints");
+
+    if (mflags & CORINFO_FLG_PROTECTED)
+    {
+        VerifyOrReturn(info.compCompHnd->canAccessFamily(info.compMethodHnd, instanceClassHnd),
+                       "Can't access protected method");
+    }
+
+    // Get the exact view of the signature for an array method
+    if (sig->retType != CORINFO_TYPE_VOID)
+    {
+        eeGetMethodSig(pResolvedToken->hMethod, sig, pResolvedToken->hClass);
+    }
+
+    // "readonly." prefixed calls only allowed for the Address operation on arrays.
+    // The methods supported by array types are under the control of the EE
+    // so we can trust that only the Address operation returns a byref.
+    if (readonlyCall)
+    {
+        typeInfo tiCalleeRetType = verMakeTypeInfo(sig->retType, sig->retTypeClass);
+        VerifyOrReturn((methodClassFlgs & CORINFO_FLG_ARRAY) && tiCalleeRetType.IsByRef(),
+                       "unexpected use of readonly prefix");
+    }
+
+    // Verify the tailcall
+    if (tailCall)
+    {
+        verCheckTailCallConstraint(opcode, pResolvedToken, pConstrainedResolvedToken, false);
+    }
+}
+
+/*****************************************************************************
+ *  Checks that a delegate creation is done using the following pattern:
+ *     dup
+ *     ldvirtftn targetMemberRef
+ *  OR
+ *     ldftn targetMemberRef
+ *
+ * 'delegateCreateStart' points at the last dup or ldftn in this basic block (null if
+ *  not in this basic block)
+ *
+ *  targetMemberRef is read from the code sequence.
+ *  targetMemberRef is validated iff verificationNeeded.
+ */
+
+BOOL Compiler::verCheckDelegateCreation(const BYTE*  delegateCreateStart,
+                                        const BYTE*  codeAddr,
+                                        mdMemberRef& targetMemberRef)
+{
+    if (impIsLDFTN_TOKEN(delegateCreateStart, codeAddr))
+    {
+        targetMemberRef = getU4LittleEndian(&delegateCreateStart[2]);
+        return TRUE;
+    }
+    else if (impIsDUP_LDVIRTFTN_TOKEN(delegateCreateStart, codeAddr))
+    {
+        targetMemberRef = getU4LittleEndian(&delegateCreateStart[3]);
+        return TRUE;
+    }
+
+    return FALSE;
+}
+
+typeInfo Compiler::verVerifySTIND(const typeInfo& tiTo, const typeInfo& value, const typeInfo& instrType)
+{
+    Verify(!tiTo.IsReadonlyByRef(), "write to readonly byref");
+    typeInfo ptrVal     = verVerifyLDIND(tiTo, instrType);
+    typeInfo normPtrVal = typeInfo(ptrVal).NormaliseForStack();
+    if (!tiCompatibleWith(value, normPtrVal, true))
+    {
+        Verify(tiCompatibleWith(value, normPtrVal, true), "type mismatch");
+        compUnsafeCastUsed = true;
+    }
+    return ptrVal;
+}
+
+typeInfo Compiler::verVerifyLDIND(const typeInfo& ptr, const typeInfo& instrType)
+{
+    assert(!instrType.IsStruct());
+
+    typeInfo ptrVal;
+    if (ptr.IsByRef())
+    {
+        ptrVal = DereferenceByRef(ptr);
+        if (instrType.IsObjRef() && !ptrVal.IsObjRef())
+        {
+            Verify(false, "bad pointer");
+            compUnsafeCastUsed = true;
+        }
+        else if (!instrType.IsObjRef() && !typeInfo::AreEquivalent(instrType, ptrVal))
+        {
+            Verify(false, "pointer not consistent with instr");
+            compUnsafeCastUsed = true;
+        }
+    }
+    else
+    {
+        Verify(false, "pointer not byref");
+        compUnsafeCastUsed = true;
+    }
+
+    return ptrVal;
+}
+
+// Verify that the field is used properly.  'tiThis' is NULL for statics,
+// 'fieldFlags' is the fields attributes, and mutator is TRUE if it is a
+// ld*flda or a st*fld.
+// 'enclosingClass' is given if we are accessing a field in some specific type.
+
+void Compiler::verVerifyField(CORINFO_RESOLVED_TOKEN*   pResolvedToken,
+                              const CORINFO_FIELD_INFO& fieldInfo,
+                              const typeInfo*           tiThis,
+                              BOOL                      mutator,
+                              BOOL                      allowPlainStructAsThis)
+{
+    CORINFO_CLASS_HANDLE enclosingClass = pResolvedToken->hClass;
+    unsigned             fieldFlags     = fieldInfo.fieldFlags;
+    CORINFO_CLASS_HANDLE instanceClass =
+        info.compClassHnd; // for statics, we imagine the instance is the current class.
+
+    bool isStaticField = ((fieldFlags & CORINFO_FLG_FIELD_STATIC) != 0);
+    if (mutator)
+    {
+        Verify(!(fieldFlags & CORINFO_FLG_FIELD_UNMANAGED), "mutating an RVA bases static");
+        if ((fieldFlags & CORINFO_FLG_FIELD_FINAL))
+        {
+            Verify((info.compFlags & CORINFO_FLG_CONSTRUCTOR) && enclosingClass == info.compClassHnd &&
+                       info.compIsStatic == isStaticField,
+                   "bad use of initonly field (set or address taken)");
+        }
+    }
+
+    if (tiThis == nullptr)
+    {
+        Verify(isStaticField, "used static opcode with non-static field");
+    }
+    else
+    {
+        typeInfo tThis = *tiThis;
+
+        if (allowPlainStructAsThis && tThis.IsValueClass())
+        {
+            tThis.MakeByRef();
+        }
+
+        // If it is null, we assume we can access it (since it will AV shortly)
+        // If it is anything but a refernce class, there is no hierarchy, so
+        // again, we don't need the precise instance class to compute 'protected' access
+        if (tiThis->IsType(TI_REF))
+        {
+            instanceClass = tiThis->GetClassHandleForObjRef();
+        }
+
+        // Note that even if the field is static, we require that the this pointer
+        // satisfy the same constraints as a non-static field  This happens to
+        // be simpler and seems reasonable
+        typeInfo tiDeclaredThis = verMakeTypeInfo(enclosingClass);
+        if (tiDeclaredThis.IsValueClass())
+        {
+            tiDeclaredThis.MakeByRef();
+
+            // we allow read-only tThis, on any field access (even stores!), because if the
+            // class implementor wants to prohibit stores he should make the field private.
+            // we do this by setting the read-only bit on the type we compare tThis to.
+            tiDeclaredThis.SetIsReadonlyByRef();
+        }
+        else if (verTrackObjCtorInitState && tThis.IsThisPtr())
+        {
+            // Any field access is legal on "uninitialized" this pointers.
+            // The easiest way to implement this is to simply set the
+            // initialized bit for the duration of the type check on the
+            // field access only.  It does not change the state of the "this"
+            // for the function as a whole. Note that the "tThis" is a copy
+            // of the original "this" type (*tiThis) passed in.
+            tThis.SetInitialisedObjRef();
+        }
+
+        Verify(tiCompatibleWith(tThis, tiDeclaredThis, true), "this type mismatch");
+    }
+
+    // Presently the JIT does not check that we don't store or take the address of init-only fields
+    // since we cannot guarantee their immutability and it is not a security issue.
+
+    // check any constraints on the fields's class --- accessing the field might cause a class constructor to run.
+    VerifyOrReturn(info.compCompHnd->satisfiesClassConstraints(enclosingClass),
+                   "field has unsatisfied class constraints");
+    if (fieldFlags & CORINFO_FLG_FIELD_PROTECTED)
+    {
+        Verify(info.compCompHnd->canAccessFamily(info.compMethodHnd, instanceClass),
+               "Accessing protected method through wrong type.");
+    }
+}
+
+void Compiler::verVerifyCond(const typeInfo& tiOp1, const typeInfo& tiOp2, unsigned opcode)
+{
+    if (tiOp1.IsNumberType())
+    {
+#ifdef _TARGET_64BIT_
+        Verify(tiCompatibleWith(tiOp1, tiOp2, true), "Cond type mismatch");
+#else  // _TARGET_64BIT
+        // [10/17/2013] Consider changing this: to put on my verification lawyer hat,
+        // this is non-conforming to the ECMA Spec: types don't have to be equivalent,
+        // but compatible, since we can coalesce native int with int32 (see section III.1.5).
+        Verify(typeInfo::AreEquivalent(tiOp1, tiOp2), "Cond type mismatch");
+#endif // !_TARGET_64BIT_
+    }
+    else if (tiOp1.IsObjRef())
+    {
+        switch (opcode)
+        {
+            case CEE_BEQ_S:
+            case CEE_BEQ:
+            case CEE_BNE_UN_S:
+            case CEE_BNE_UN:
+            case CEE_CEQ:
+            case CEE_CGT_UN:
+                break;
+            default:
+                Verify(FALSE, "Cond not allowed on object types");
+        }
+        Verify(tiOp2.IsObjRef(), "Cond type mismatch");
+    }
+    else if (tiOp1.IsByRef())
+    {
+        Verify(tiOp2.IsByRef(), "Cond type mismatch");
+    }
+    else
+    {
+        Verify(tiOp1.IsMethod() && tiOp2.IsMethod(), "Cond type mismatch");
+    }
+}
+
+void Compiler::verVerifyThisPtrInitialised()
+{
+    if (verTrackObjCtorInitState)
+    {
+        Verify(verCurrentState.thisInitialized == TIS_Init, "this ptr is not initialized");
+    }
+}
+
+BOOL Compiler::verIsCallToInitThisPtr(CORINFO_CLASS_HANDLE context, CORINFO_CLASS_HANDLE target)
+{
+    // Either target == context, in this case calling an alternate .ctor
+    // Or target is the immediate parent of context
+
+    return ((target == context) || (target == info.compCompHnd->getParentType(context)));
+}
+
+GenTreePtr Compiler::impImportLdvirtftn(GenTreePtr              thisPtr,
+                                        CORINFO_RESOLVED_TOKEN* pResolvedToken,
+                                        CORINFO_CALL_INFO*      pCallInfo)
+{
+    if ((pCallInfo->methodFlags & CORINFO_FLG_EnC) && !(pCallInfo->classFlags & CORINFO_FLG_INTERFACE))
+    {
+        NO_WAY("Virtual call to a function added via EnC is not supported");
+    }
+
+#ifdef FEATURE_READYTORUN_COMPILER
+    if (opts.IsReadyToRun() && !pCallInfo->exactContextNeedsRuntimeLookup)
+    {
+        GenTreeCall* call = gtNewHelperCallNode(CORINFO_HELP_READYTORUN_VIRTUAL_FUNC_PTR, TYP_I_IMPL, GTF_EXCEPT,
+                                                gtNewArgList(thisPtr));
+
+        call->setEntryPoint(pCallInfo->codePointerLookup.constLookup);
+
+        return call;
+    }
+#endif
+
+    // Get the exact descriptor for the static callsite
+    GenTreePtr exactTypeDesc = impParentClassTokenToHandle(pResolvedToken);
+    if (exactTypeDesc == nullptr)
+    { // compDonotInline()
+        return nullptr;
+    }
+
+    GenTreePtr exactMethodDesc = impTokenToHandle(pResolvedToken);
+    if (exactMethodDesc == nullptr)
+    { // compDonotInline()
+        return nullptr;
+    }
+
+    GenTreeArgList* helpArgs = gtNewArgList(exactMethodDesc);
+
+    helpArgs = gtNewListNode(exactTypeDesc, helpArgs);
+
+    helpArgs = gtNewListNode(thisPtr, helpArgs);
+
+    // Call helper function.  This gets the target address of the final destination callsite.
+
+    return gtNewHelperCallNode(CORINFO_HELP_VIRTUAL_FUNC_PTR, TYP_I_IMPL, GTF_EXCEPT, helpArgs);
+}
+
+/*****************************************************************************
+ *
+ *  Build and import a box node
+ */
+
+void Compiler::impImportAndPushBox(CORINFO_RESOLVED_TOKEN* pResolvedToken)
+{
+    // Get the tree for the type handle for the boxed object.  In the case
+    // of shared generic code or ngen'd code this might be an embedded
+    // computation.
+    // Note we can only box do it if the class construtor has been called
+    // We can always do it on primitive types
+
+    GenTreePtr op1 = nullptr;
+    GenTreePtr op2 = nullptr;
+    var_types  lclTyp;
+
+    impSpillSpecialSideEff();
+
+    // Now get the expression to box from the stack.
+    CORINFO_CLASS_HANDLE operCls;
+    GenTreePtr           exprToBox = impPopStack(operCls).val;
+
+    CorInfoHelpFunc boxHelper = info.compCompHnd->getBoxHelper(pResolvedToken->hClass);
+    if (boxHelper == CORINFO_HELP_BOX)
+    {
+        // we are doing 'normal' boxing.  This means that we can inline the box operation
+        // Box(expr) gets morphed into
+        // temp = new(clsHnd)
+        // cpobj(temp+4, expr, clsHnd)
+        // push temp
+        // The code paths differ slightly below for structs and primitives because
+        // "cpobj" differs in these cases.  In one case you get
+        //    impAssignStructPtr(temp+4, expr, clsHnd)
+        // and the other you get
+        //    *(temp+4) = expr
+
+        if (impBoxTempInUse || impBoxTemp == BAD_VAR_NUM)
+        {
+            impBoxTemp = lvaGrabTemp(true DEBUGARG("Box Helper"));
+        }
+
+        // needs to stay in use until this box expression is appended
+        // some other node.  We approximate this by keeping it alive until
+        // the opcode stack becomes empty
+        impBoxTempInUse = true;
+
+#ifdef FEATURE_READYTORUN_COMPILER
+        bool usingReadyToRunHelper = false;
+
+        if (opts.IsReadyToRun())
+        {
+            op1                   = impReadyToRunHelperToTree(pResolvedToken, CORINFO_HELP_READYTORUN_NEW, TYP_REF);
+            usingReadyToRunHelper = (op1 != NULL);
+        }
+
+        if (!usingReadyToRunHelper)
+#endif
+        {
+            // TODO: ReadyToRun: When generic dictionary lookups are necessary, replace the lookup call
+            // and the newfast call with a single call to a dynamic R2R cell that will:
+            //      1) Load the context
+            //      2) Perform the generic dictionary lookup and caching, and generate the appropriate stub
+            //      3) Allocate and return the new object for boxing
+            // Reason: performance (today, we'll always use the slow helper for the R2R generics case)
+
+            // Ensure that the value class is restored
+            op2 = impTokenToHandle(pResolvedToken, nullptr, TRUE /* mustRestoreHandle */);
+            if (op2 == nullptr)
+            { // compDonotInline()
+                return;
+            }
+
+            op1 = gtNewHelperCallNode(info.compCompHnd->getNewHelper(pResolvedToken, info.compMethodHnd), TYP_REF, 0,
+                                      gtNewArgList(op2));
+        }
+
+        /* Remember that this basic block contains 'new' of an array */
+        compCurBB->bbFlags |= BBF_HAS_NEWOBJ;
+
+        GenTreePtr asg = gtNewTempAssign(impBoxTemp, op1);
+
+        GenTreePtr asgStmt = impAppendTree(asg, (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
+
+        op1 = gtNewLclvNode(impBoxTemp, TYP_REF);
+        op2 = gtNewIconNode(sizeof(void*), TYP_I_IMPL);
+        op1 = gtNewOperNode(GT_ADD, TYP_BYREF, op1, op2);
+
+        if (varTypeIsStruct(exprToBox))
+        {
+            assert(info.compCompHnd->getClassSize(pResolvedToken->hClass) == info.compCompHnd->getClassSize(operCls));
+            op1 = impAssignStructPtr(op1, exprToBox, operCls, (unsigned)CHECK_SPILL_ALL);
+        }
+        else
+        {
+            lclTyp = exprToBox->TypeGet();
+            if (lclTyp == TYP_BYREF)
+            {
+                lclTyp = TYP_I_IMPL;
+            }
+            CorInfoType jitType = info.compCompHnd->asCorInfoType(pResolvedToken->hClass);
+            if (impIsPrimitive(jitType))
+            {
+                lclTyp = JITtype2varType(jitType);
+            }
+            assert(genActualType(exprToBox->TypeGet()) == genActualType(lclTyp) ||
+                   varTypeIsFloating(lclTyp) == varTypeIsFloating(exprToBox->TypeGet()));
+            var_types srcTyp = exprToBox->TypeGet();
+            var_types dstTyp = lclTyp;
+
+            if (srcTyp != dstTyp)
+            {
+                assert((varTypeIsFloating(srcTyp) && varTypeIsFloating(dstTyp)) ||
+                       (varTypeIsIntegral(srcTyp) && varTypeIsIntegral(dstTyp)));
+                exprToBox = gtNewCastNode(dstTyp, exprToBox, dstTyp);
+            }
+            op1 = gtNewAssignNode(gtNewOperNode(GT_IND, lclTyp, op1), exprToBox);
+        }
+
+        op2 = gtNewLclvNode(impBoxTemp, TYP_REF);
+        op1 = gtNewOperNode(GT_COMMA, TYP_REF, op1, op2);
+
+        // Record that this is a "box" node.
+        op1 = new (this, GT_BOX) GenTreeBox(TYP_REF, op1, asgStmt);
+
+        // If it is a value class, mark the "box" node.  We can use this information
+        // to optimise several cases:
+        //    "box(x) == null" --> false
+        //    "(box(x)).CallAnInterfaceMethod(...)" --> "(&x).CallAValueTypeMethod"
+        //    "(box(x)).CallAnObjectMethod(...)" --> "(&x).CallAValueTypeMethod"
+
+        op1->gtFlags |= GTF_BOX_VALUE;
+        assert(op1->IsBoxedValue());
+        assert(asg->gtOper == GT_ASG);
+    }
+    else
+    {
+        // Don't optimize, just call the helper and be done with it
+
+        // Ensure that the value class is restored
+        op2 = impTokenToHandle(pResolvedToken, nullptr, TRUE /* mustRestoreHandle */);
+        if (op2 == nullptr)
+        { // compDonotInline()
+            return;
+        }
+
+        GenTreeArgList* args = gtNewArgList(op2, impGetStructAddr(exprToBox, operCls, (unsigned)CHECK_SPILL_ALL, true));
+        op1                  = gtNewHelperCallNode(boxHelper, TYP_REF, GTF_EXCEPT, args);
+    }
+
+    /* Push the result back on the stack, */
+    /* even if clsHnd is a value class we want the TI_REF */
+    typeInfo tiRetVal = typeInfo(TI_REF, info.compCompHnd->getTypeForBox(pResolvedToken->hClass));
+    impPushOnStack(op1, tiRetVal);
+}
+
+//------------------------------------------------------------------------
+// impImportNewObjArray: Build and import `new` of multi-dimmensional array
+//
+// Arguments:
+//    pResolvedToken - The CORINFO_RESOLVED_TOKEN that has been initialized
+//                     by a call to CEEInfo::resolveToken().
+//    pCallInfo - The CORINFO_CALL_INFO that has been initialized
+//                by a call to CEEInfo::getCallInfo().
+//
+// Assumptions:
+//    The multi-dimensional array constructor arguments (array dimensions) are
+//    pushed on the IL stack on entry to this method.
+//
+// Notes:
+//    Multi-dimensional array constructors are imported as calls to a JIT
+//    helper, not as regular calls.
+
+void Compiler::impImportNewObjArray(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_CALL_INFO* pCallInfo)
+{
+    GenTreePtr classHandle = impParentClassTokenToHandle(pResolvedToken);
+    if (classHandle == nullptr)
+    { // compDonotInline()
+        return;
+    }
+
+    assert(pCallInfo->sig.numArgs);
+
+    GenTreePtr      node;
+    GenTreeArgList* args;
+
+    //
+    // There are two different JIT helpers that can be used to allocate
+    // multi-dimensional arrays:
+    //
+    // - CORINFO_HELP_NEW_MDARR - takes the array dimensions as varargs.
+    //      This variant is deprecated. It should be eventually removed.
+    //
+    // - CORINFO_HELP_NEW_MDARR_NONVARARG - takes the array dimensions as
+    //      pointer to block of int32s. This variant is more portable.
+    //
+    // The non-varargs helper is enabled for CoreRT only for now. Enabling this
+    // unconditionally would require ReadyToRun version bump.
+    //
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if COR_JIT_EE_VERSION > 460
+    if (!opts.IsReadyToRun() || (eeGetEEInfo()->targetAbi == CORINFO_CORERT_ABI))
+    {
+        LclVarDsc* newObjArrayArgsVar;
+
+        // Reuse the temp used to pass the array dimensions to avoid bloating
+        // the stack frame in case there are multiple calls to multi-dim array
+        // constructors within a single method.
+        if (lvaNewObjArrayArgs == BAD_VAR_NUM)
+        {
+            lvaNewObjArrayArgs                       = lvaGrabTemp(false DEBUGARG("NewObjArrayArgs"));
+            lvaTable[lvaNewObjArrayArgs].lvType      = TYP_BLK;
+            lvaTable[lvaNewObjArrayArgs].lvExactSize = 0;
+        }
+
+        // Increase size of lvaNewObjArrayArgs to be the largest size needed to hold 'numArgs' integers
+        // for our call to CORINFO_HELP_NEW_MDARR_NONVARARG.
+        lvaTable[lvaNewObjArrayArgs].lvExactSize =
+            max(lvaTable[lvaNewObjArrayArgs].lvExactSize, pCallInfo->sig.numArgs * sizeof(INT32));
+
+        // The side-effects may include allocation of more multi-dimensional arrays. Spill all side-effects
+        // to ensure that the shared lvaNewObjArrayArgs local variable is only ever used to pass arguments
+        // to one allocation at a time.
+        impSpillSideEffects(true, (unsigned)CHECK_SPILL_ALL DEBUGARG("impImportNewObjArray"));
+
+        //
+        // The arguments of the CORINFO_HELP_NEW_MDARR_NONVARARG helper are:
+        //  - Array class handle
+        //  - Number of dimension arguments
+        //  - Pointer to block of int32 dimensions - address  of lvaNewObjArrayArgs temp.
+        //
+
+        node = gtNewLclvNode(lvaNewObjArrayArgs, TYP_BLK);
+        node = gtNewOperNode(GT_ADDR, TYP_I_IMPL, node);
+
+        // Pop dimension arguments from the stack one at a time and store it
+        // into lvaNewObjArrayArgs temp.
+        for (int i = pCallInfo->sig.numArgs - 1; i >= 0; i--)
+        {
+            GenTreePtr arg = impImplicitIorI4Cast(impPopStack().val, TYP_INT);
+
+            GenTreePtr dest = gtNewLclvNode(lvaNewObjArrayArgs, TYP_BLK);
+            dest            = gtNewOperNode(GT_ADDR, TYP_I_IMPL, dest);
+            dest            = gtNewOperNode(GT_ADD, TYP_I_IMPL, dest,
+                                 new (this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, sizeof(INT32) * i));
+            dest = gtNewOperNode(GT_IND, TYP_INT, dest);
+
+            node = gtNewOperNode(GT_COMMA, node->TypeGet(), gtNewAssignNode(dest, arg), node);
+        }
+
+        args = gtNewArgList(node);
+
+        // pass number of arguments to the helper
+        args = gtNewListNode(gtNewIconNode(pCallInfo->sig.numArgs), args);
+
+        args = gtNewListNode(classHandle, args);
+
+        node = gtNewHelperCallNode(CORINFO_HELP_NEW_MDARR_NONVARARG, TYP_REF, 0, args);
+    }
+    else
+#endif
+    {
+        //
+        // The varargs helper needs the type and method handles as last
+        // and  last-1 param (this is a cdecl call, so args will be
+        // pushed in reverse order on the CPU stack)
+        //
+
+        args = gtNewArgList(classHandle);
+
+        // pass number of arguments to the helper
+        args = gtNewListNode(gtNewIconNode(pCallInfo->sig.numArgs), args);
+
+        unsigned argFlags = 0;
+        args              = impPopList(pCallInfo->sig.numArgs, &argFlags, &pCallInfo->sig, args);
+
+        node = gtNewHelperCallNode(CORINFO_HELP_NEW_MDARR, TYP_REF, 0, args);
+
+        // varargs, so we pop the arguments
+        node->gtFlags |= GTF_CALL_POP_ARGS;
+
+#ifdef DEBUG
+        // At the present time we don't track Caller pop arguments
+        // that have GC references in them
+        for (GenTreeArgList* temp = args; temp; temp = temp->Rest())
+        {
+            assert(temp->Current()->gtType != TYP_REF);
+        }
+#endif
+    }
+
+    node->gtFlags |= args->gtFlags & GTF_GLOB_EFFECT;
+    node->gtCall.compileTimeHelperArgumentHandle = (CORINFO_GENERIC_HANDLE)pResolvedToken->hClass;
+
+    // Remember that this basic block contains 'new' of a md array
+    compCurBB->bbFlags |= BBF_HAS_NEWARRAY;
+
+    impPushOnStack(node, typeInfo(TI_REF, pResolvedToken->hClass));
+}
+
+GenTreePtr Compiler::impTransformThis(GenTreePtr              thisPtr,
+                                      CORINFO_RESOLVED_TOKEN* pConstrainedResolvedToken,
+                                      CORINFO_THIS_TRANSFORM  transform)
+{
+    switch (transform)
+    {
+        case CORINFO_DEREF_THIS:
+        {
+            GenTreePtr obj = thisPtr;
+
+            // This does a LDIND on the obj, which should be a byref. pointing to a ref
+            impBashVarAddrsToI(obj);
+            assert(genActualType(obj->gtType) == TYP_I_IMPL || obj->gtType == TYP_BYREF);
+            CorInfoType constraintTyp = info.compCompHnd->asCorInfoType(pConstrainedResolvedToken->hClass);
+
+            obj = gtNewOperNode(GT_IND, JITtype2varType(constraintTyp), obj);
+            // ldind could point anywhere, example a boxed class static int
+            obj->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
+
+            return obj;
+        }
+
+        case CORINFO_BOX_THIS:
+        {
+            // Constraint calls where there might be no
+            // unboxed entry point require us to implement the call via helper.
+            // These only occur when a possible target of the call
+            // may have inherited an implementation of an interface
+            // method from System.Object or System.ValueType.  The EE does not provide us with
+            // "unboxed" versions of these methods.
+
+            GenTreePtr obj = thisPtr;
+
+            assert(obj->TypeGet() == TYP_BYREF || obj->TypeGet() == TYP_I_IMPL);
+            obj = gtNewObjNode(pConstrainedResolvedToken->hClass, obj);
+            obj->gtFlags |= GTF_EXCEPT;
+
+            CorInfoType jitTyp  = info.compCompHnd->asCorInfoType(pConstrainedResolvedToken->hClass);
+            var_types   objType = JITtype2varType(jitTyp);
+            if (impIsPrimitive(jitTyp))
+            {
+                if (obj->OperIsBlk())
+                {
+                    obj->ChangeOperUnchecked(GT_IND);
+
+                    // Obj could point anywhere, example a boxed class static int
+                    obj->gtFlags |= GTF_IND_TGTANYWHERE;
+                    obj->gtOp.gtOp2 = nullptr; // must be zero for tree walkers
+                }
+
+                obj->gtType = JITtype2varType(jitTyp);
+                assert(varTypeIsArithmetic(obj->gtType));
+            }
+
+            // This pushes on the dereferenced byref
+            // This is then used immediately to box.
+            impPushOnStack(obj, verMakeTypeInfo(pConstrainedResolvedToken->hClass).NormaliseForStack());
+
+            // This pops off the byref-to-a-value-type remaining on the stack and
+            // replaces it with a boxed object.
+            // This is then used as the object to the virtual call immediately below.
+            impImportAndPushBox(pConstrainedResolvedToken);
+            if (compDonotInline())
+            {
+                return nullptr;
+            }
+
+            obj = impPopStack().val;
+            return obj;
+        }
+        case CORINFO_NO_THIS_TRANSFORM:
+        default:
+            return thisPtr;
+    }
+}
+
+bool Compiler::impCanPInvokeInline(var_types callRetTyp)
+{
+    return impCanPInvokeInlineCallSite(callRetTyp) && getInlinePInvokeEnabled() && (!opts.compDbgCode) &&
+           (compCodeOpt() != SMALL_CODE) && (!opts.compNoPInvokeInlineCB) // profiler is preventing inline pinvoke
+        ;
+}
+
+// Returns false only if the callsite really cannot be inlined. Ignores global variables
+// like debugger, profiler etc.
+bool Compiler::impCanPInvokeInlineCallSite(var_types callRetTyp)
+{
+    return
+        // We have to disable pinvoke inlining inside of filters
+        // because in case the main execution (i.e. in the try block) is inside
+        // unmanaged code, we cannot reuse the inlined stub (we still need the
+        // original state until we are in the catch handler)
+        (!bbInFilterILRange(compCurBB)) &&
+        // We disable pinvoke inlining inside handlers since the GSCookie is
+        // in the inlined Frame (see CORINFO_EE_INFO::InlinedCallFrameInfo::offsetOfGSCookie),
+        // but this would not protect framelets/return-address of handlers.
+        !compCurBB->hasHndIndex() &&
+#ifdef _TARGET_AMD64_
+        // Turns out JIT64 doesn't perform PInvoke inlining inside try regions, here's an excerpt of
+        // the comment from JIT64 explaining why:
+        //
+        //// [VSWhidbey: 611015] - because the jitted code links in the Frame (instead
+        //// of the stub) we rely on the Frame not being 'active' until inside the
+        //// stub.  This normally happens by the stub setting the return address
+        //// pointer in the Frame object inside the stub.  On a normal return, the
+        //// return address pointer is zeroed out so the Frame can be safely re-used,
+        //// but if an exception occurs, nobody zeros out the return address pointer.
+        //// Thus if we re-used the Frame object, it would go 'active' as soon as we
+        //// link it into the Frame chain.
+        ////
+        //// Technically we only need to disable PInvoke inlining if we're in a
+        //// handler or if we're
+        //// in a try body with a catch or filter/except where other non-handler code
+        //// in this method might run and try to re-use the dirty Frame object.
+        //
+        // Now, because of this, the VM actually assumes that in 64 bit we never PInvoke
+        // inline calls on any EH construct, you can verify that on VM\ExceptionHandling.cpp:203
+        // The method responsible for resuming execution is UpdateObjectRefInResumeContextCallback
+        // you can see how it aligns with JIT64 policy of not inlining PInvoke calls almost right
+        // at the beginning of the body of the method.
+        !compCurBB->hasTryIndex() &&
+#endif
+        (!impLocAllocOnStack()) && (callRetTyp != TYP_STRUCT);
+}
+
+void Compiler::impCheckForPInvokeCall(GenTreePtr            call,
+                                      CORINFO_METHOD_HANDLE methHnd,
+                                      CORINFO_SIG_INFO*     sig,
+                                      unsigned              mflags)
+{
+    var_types                callRetTyp = JITtype2varType(sig->retType);
+    CorInfoUnmanagedCallConv unmanagedCallConv;
+
+    // If VM flagged it as Pinvoke, flag the call node accordingly
+    if ((mflags & CORINFO_FLG_PINVOKE) != 0)
+    {
+        call->gtCall.gtCallMoreFlags |= GTF_CALL_M_PINVOKE;
+    }
+
+    if (methHnd)
+    {
+        if ((mflags & CORINFO_FLG_PINVOKE) == 0 || (mflags & CORINFO_FLG_NOSECURITYWRAP) == 0)
+        {
+            return;
+        }
+
+        unmanagedCallConv = info.compCompHnd->getUnmanagedCallConv(methHnd);
+    }
+    else
+    {
+        CorInfoCallConv callConv = CorInfoCallConv(sig->callConv & CORINFO_CALLCONV_MASK);
+        if (callConv == CORINFO_CALLCONV_NATIVEVARARG)
+        {
+            // Used by the IL Stubs.
+            callConv = CORINFO_CALLCONV_C;
+        }
+        static_assert_no_msg((unsigned)CORINFO_CALLCONV_C == (unsigned)CORINFO_UNMANAGED_CALLCONV_C);
+        static_assert_no_msg((unsigned)CORINFO_CALLCONV_STDCALL == (unsigned)CORINFO_UNMANAGED_CALLCONV_STDCALL);
+        static_assert_no_msg((unsigned)CORINFO_CALLCONV_THISCALL == (unsigned)CORINFO_UNMANAGED_CALLCONV_THISCALL);
+        unmanagedCallConv = CorInfoUnmanagedCallConv(callConv);
+
+        assert(!call->gtCall.gtCallCookie);
+    }
+
+    if (unmanagedCallConv != CORINFO_UNMANAGED_CALLCONV_C && unmanagedCallConv != CORINFO_UNMANAGED_CALLCONV_STDCALL &&
+        unmanagedCallConv != CORINFO_UNMANAGED_CALLCONV_THISCALL)
+    {
+        return;
+    }
+    optNativeCallCount++;
+
+    if (opts.compMustInlinePInvokeCalli && methHnd == nullptr)
+    {
+#ifdef _TARGET_X86_
+        // CALLI in IL stubs must be inlined
+        assert(impCanPInvokeInlineCallSite(callRetTyp));
+        assert(!info.compCompHnd->pInvokeMarshalingRequired(methHnd, sig));
+#endif // _TARGET_X86_
+    }
+    else
+    {
+        if (!impCanPInvokeInline(callRetTyp))
+        {
+            return;
+        }
+
+        if (info.compCompHnd->pInvokeMarshalingRequired(methHnd, sig))
+        {
+            return;
+        }
+    }
+
+    JITLOG((LL_INFO1000000, "\nInline a CALLI PINVOKE call from method %s", info.compFullName));
+
+    call->gtFlags |= GTF_CALL_UNMANAGED;
+    info.compCallUnmanaged++;
+
+    assert(!compIsForInlining());
+
+    // AMD64 convention is same for native and managed
+    if (unmanagedCallConv == CORINFO_UNMANAGED_CALLCONV_C)
+    {
+        call->gtFlags |= GTF_CALL_POP_ARGS;
+    }
+
+    if (unmanagedCallConv == CORINFO_UNMANAGED_CALLCONV_THISCALL)
+    {
+        call->gtCall.gtCallMoreFlags |= GTF_CALL_M_UNMGD_THISCALL;
+    }
+}
+
+GenTreePtr Compiler::impImportIndirectCall(CORINFO_SIG_INFO* sig, IL_OFFSETX ilOffset)
+{
+    var_types callRetTyp = JITtype2varType(sig->retType);
+
+    /* The function pointer is on top of the stack - It may be a
+     * complex expression. As it is evaluated after the args,
+     * it may cause registered args to be spilled. Simply spill it.
+     */
+
+    // Ignore this trivial case.
+    if (impStackTop().val->gtOper != GT_LCL_VAR)
+    {
+        impSpillStackEntry(verCurrentState.esStackDepth - 1,
+                           BAD_VAR_NUM DEBUGARG(false) DEBUGARG("impImportIndirectCall"));
+    }
+
+    /* Get the function pointer */
+
+    GenTreePtr fptr = impPopStack().val;
+    assert(genActualType(fptr->gtType) == TYP_I_IMPL);
+
+#ifdef DEBUG
+    // This temporary must never be converted to a double in stress mode,
+    // because that can introduce a call to the cast helper after the
+    // arguments have already been evaluated.
+
+    if (fptr->OperGet() == GT_LCL_VAR)
+    {
+        lvaTable[fptr->gtLclVarCommon.gtLclNum].lvKeepType = 1;
+    }
+#endif
+
+    /* Create the call node */
+
+    GenTreePtr call = gtNewIndCallNode(fptr, callRetTyp, nullptr, ilOffset);
+
+    call->gtFlags |= GTF_EXCEPT | (fptr->gtFlags & GTF_GLOB_EFFECT);
+
+    return call;
+}
+
+/*****************************************************************************/
+
+void Compiler::impPopArgsForUnmanagedCall(GenTreePtr call, CORINFO_SIG_INFO* sig)
+{
+    assert(call->gtFlags & GTF_CALL_UNMANAGED);
+
+    /* Since we push the arguments in reverse order (i.e. right -> left)
+     * spill any side effects from the stack
+     *
+     * OBS: If there is only one side effect we do not need to spill it
+     *      thus we have to spill all side-effects except last one
+     */
+
+    unsigned lastLevelWithSideEffects = UINT_MAX;
+
+    unsigned argsToReverse = sig->numArgs;
+
+    // For "thiscall", the first argument goes in a register. Since its
+    // order does not need to be changed, we do not need to spill it
+
+    if (call->gtCall.gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL)
+    {
+        assert(argsToReverse);
+        argsToReverse--;
+    }
+
+#ifndef _TARGET_X86_
+    // Don't reverse args on ARM or x64 - first four args always placed in regs in order
+    argsToReverse = 0;
+#endif
+
+    for (unsigned level = verCurrentState.esStackDepth - argsToReverse; level < verCurrentState.esStackDepth; level++)
+    {
+        if (verCurrentState.esStack[level].val->gtFlags & GTF_ORDER_SIDEEFF)
+        {
+            assert(lastLevelWithSideEffects == UINT_MAX);
+
+            impSpillStackEntry(level,
+                               BAD_VAR_NUM DEBUGARG(false) DEBUGARG("impPopArgsForUnmanagedCall - other side effect"));
+        }
+        else if (verCurrentState.esStack[level].val->gtFlags & GTF_SIDE_EFFECT)
+        {
+            if (lastLevelWithSideEffects != UINT_MAX)
+            {
+                /* We had a previous side effect - must spill it */
+                impSpillStackEntry(lastLevelWithSideEffects,
+                                   BAD_VAR_NUM DEBUGARG(false) DEBUGARG("impPopArgsForUnmanagedCall - side effect"));
+
+                /* Record the level for the current side effect in case we will spill it */
+                lastLevelWithSideEffects = level;
+            }
+            else
+            {
+                /* This is the first side effect encountered - record its level */
+
+                lastLevelWithSideEffects = level;
+            }
+        }
+    }
+
+    /* The argument list is now "clean" - no out-of-order side effects
+     * Pop the argument list in reverse order */
+
+    unsigned   argFlags = 0;
+    GenTreePtr args     = call->gtCall.gtCallArgs =
+        impPopRevList(sig->numArgs, &argFlags, sig, sig->numArgs - argsToReverse);
+
+    if (call->gtCall.gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL)
+    {
+        GenTreePtr thisPtr = args->Current();
+        impBashVarAddrsToI(thisPtr);
+        assert(thisPtr->TypeGet() == TYP_I_IMPL || thisPtr->TypeGet() == TYP_BYREF);
+    }
+
+    if (args)
+    {
+        call->gtFlags |= args->gtFlags & GTF_GLOB_EFFECT;
+    }
+}
+
+//------------------------------------------------------------------------
+// impInitClass: Build a node to initialize the class before accessing the
+//               field if necessary
+//
+// Arguments:
+//    pResolvedToken - The CORINFO_RESOLVED_TOKEN that has been initialized
+//                     by a call to CEEInfo::resolveToken().
+//
+// Return Value: If needed, a pointer to the node that will perform the class
+//               initializtion.  Otherwise, nullptr.
+//
+
+GenTreePtr Compiler::impInitClass(CORINFO_RESOLVED_TOKEN* pResolvedToken)
+{
+    CorInfoInitClassResult initClassResult =
+        info.compCompHnd->initClass(pResolvedToken->hField, info.compMethodHnd, impTokenLookupContextHandle);
+
+    if ((initClassResult & CORINFO_INITCLASS_USE_HELPER) == 0)
+    {
+        return nullptr;
+    }
+    BOOL runtimeLookup;
+
+    GenTreePtr node = impParentClassTokenToHandle(pResolvedToken, &runtimeLookup);
+
+    if (node == nullptr)
+    {
+        assert(compDonotInline());
+        return nullptr;
+    }
+
+    if (runtimeLookup)
+    {
+        node = gtNewHelperCallNode(CORINFO_HELP_INITCLASS, TYP_VOID, 0, gtNewArgList(node));
+    }
+    else
+    {
+        // Call the shared non gc static helper, as its the fastest
+        node = fgGetSharedCCtor(pResolvedToken->hClass);
+    }
+
+    return node;
+}
+
+GenTreePtr Compiler::impImportStaticReadOnlyField(void* fldAddr, var_types lclTyp)
+{
+    GenTreePtr op1 = nullptr;
+
+    switch (lclTyp)
+    {
+        int     ival;
+        __int64 lval;
+        double  dval;
+
+        case TYP_BOOL:
+            ival = *((bool*)fldAddr);
+            goto IVAL_COMMON;
+
+        case TYP_BYTE:
+            ival = *((signed char*)fldAddr);
+            goto IVAL_COMMON;
+
+        case TYP_UBYTE:
+            ival = *((unsigned char*)fldAddr);
+            goto IVAL_COMMON;
+
+        case TYP_SHORT:
+            ival = *((short*)fldAddr);
+            goto IVAL_COMMON;
+
+        case TYP_CHAR:
+        case TYP_USHORT:
+            ival = *((unsigned short*)fldAddr);
+            goto IVAL_COMMON;
+
+        case TYP_UINT:
+        case TYP_INT:
+            ival = *((int*)fldAddr);
+        IVAL_COMMON:
+            op1 = gtNewIconNode(ival);
+            break;
+
+        case TYP_LONG:
+        case TYP_ULONG:
+            lval = *((__int64*)fldAddr);
+            op1  = gtNewLconNode(lval);
+            break;
+
+        case TYP_FLOAT:
+            dval = *((float*)fldAddr);
+            op1  = gtNewDconNode(dval);
+#if !FEATURE_X87_DOUBLES
+            // X87 stack doesn't differentiate between float/double
+            // so R4 is treated as R8, but everybody else does
+            op1->gtType = TYP_FLOAT;
+#endif // FEATURE_X87_DOUBLES
+            break;
+
+        case TYP_DOUBLE:
+            dval = *((double*)fldAddr);
+            op1  = gtNewDconNode(dval);
+            break;
+
+        default:
+            assert(!"Unexpected lclTyp");
+            break;
+    }
+
+    return op1;
+}
+
+GenTreePtr Compiler::impImportStaticFieldAccess(CORINFO_RESOLVED_TOKEN* pResolvedToken,
+                                                CORINFO_ACCESS_FLAGS    access,
+                                                CORINFO_FIELD_INFO*     pFieldInfo,
+                                                var_types               lclTyp)
+{
+    GenTreePtr op1;
+
+    switch (pFieldInfo->fieldAccessor)
+    {
+        case CORINFO_FIELD_STATIC_GENERICS_STATIC_HELPER:
+        {
+            assert(!compIsForInlining());
+
+            // We first call a special helper to get the statics base pointer
+            op1 = impParentClassTokenToHandle(pResolvedToken);
+
+            // compIsForInlining() is false so we should not neve get NULL here
+            assert(op1 != nullptr);
+
+            var_types type = TYP_BYREF;
+
+            switch (pFieldInfo->helper)
+            {
+                case CORINFO_HELP_GETGENERICS_NONGCTHREADSTATIC_BASE:
+                    type = TYP_I_IMPL;
+                    break;
+                case CORINFO_HELP_GETGENERICS_GCSTATIC_BASE:
+                case CORINFO_HELP_GETGENERICS_NONGCSTATIC_BASE:
+                case CORINFO_HELP_GETGENERICS_GCTHREADSTATIC_BASE:
+                    break;
+                default:
+                    assert(!"unknown generic statics helper");
+                    break;
+            }
+
+            op1 = gtNewHelperCallNode(pFieldInfo->helper, type, 0, gtNewArgList(op1));
+
+            FieldSeqNode* fs = GetFieldSeqStore()->CreateSingleton(pResolvedToken->hField);
+            op1              = gtNewOperNode(GT_ADD, type, op1,
+                                new (this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, pFieldInfo->offset, fs));
+        }
+        break;
+
+        case CORINFO_FIELD_STATIC_SHARED_STATIC_HELPER:
+#ifdef FEATURE_READYTORUN_COMPILER
+            if (opts.IsReadyToRun())
+            {
+                unsigned callFlags = 0;
+
+                if (info.compCompHnd->getClassAttribs(pResolvedToken->hClass) & CORINFO_FLG_BEFOREFIELDINIT)
+                {
+                    callFlags |= GTF_CALL_HOISTABLE;
+                }
+
+                op1 = gtNewHelperCallNode(CORINFO_HELP_READYTORUN_STATIC_BASE, TYP_BYREF, callFlags);
+
+                op1->gtCall.setEntryPoint(pFieldInfo->fieldLookup);
+            }
+            else
+#endif
+            {
+                op1 = fgGetStaticsCCtorHelper(pResolvedToken->hClass, pFieldInfo->helper);
+            }
+
+            {
+                FieldSeqNode* fs = GetFieldSeqStore()->CreateSingleton(pResolvedToken->hField);
+                op1              = gtNewOperNode(GT_ADD, op1->TypeGet(), op1,
+                                    new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, pFieldInfo->offset, fs));
+            }
+            break;
+
+        default:
+            if (!(access & CORINFO_ACCESS_ADDRESS))
+            {
+                // In future, it may be better to just create the right tree here instead of folding it later.
+                op1 = gtNewFieldRef(lclTyp, pResolvedToken->hField);
+
+                if (pFieldInfo->fieldFlags & CORINFO_FLG_FIELD_STATIC_IN_HEAP)
+                {
+                    op1->gtType = TYP_REF; // points at boxed object
+                    FieldSeqNode* firstElemFldSeq =
+                        GetFieldSeqStore()->CreateSingleton(FieldSeqStore::FirstElemPseudoField);
+                    op1 =
+                        gtNewOperNode(GT_ADD, TYP_BYREF, op1,
+                                      new (this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, sizeof(void*), firstElemFldSeq));
+
+                    if (varTypeIsStruct(lclTyp))
+                    {
+                        // Constructor adds GTF_GLOB_REF.  Note that this is *not* GTF_EXCEPT.
+                        op1 = gtNewObjNode(pFieldInfo->structType, op1);
+                    }
+                    else
+                    {
+                        op1 = gtNewOperNode(GT_IND, lclTyp, op1);
+                        op1->gtFlags |= GTF_GLOB_REF | GTF_IND_NONFAULTING;
+                    }
+                }
+
+                return op1;
+            }
+            else
+            {
+                void** pFldAddr = nullptr;
+                void*  fldAddr  = info.compCompHnd->getFieldAddress(pResolvedToken->hField, (void**)&pFldAddr);
+
+                FieldSeqNode* fldSeq = GetFieldSeqStore()->CreateSingleton(pResolvedToken->hField);
+
+                /* Create the data member node */
+                if (pFldAddr == nullptr)
+                {
+                    op1 = gtNewIconHandleNode((size_t)fldAddr, GTF_ICON_STATIC_HDL, fldSeq);
+                }
+                else
+                {
+                    op1 = gtNewIconHandleNode((size_t)pFldAddr, GTF_ICON_STATIC_HDL, fldSeq);
+
+                    // There are two cases here, either the static is RVA based,
+                    // in which case the type of the FIELD node is not a GC type
+                    // and the handle to the RVA is a TYP_I_IMPL.  Or the FIELD node is
+                    // a GC type and the handle to it is a TYP_BYREF in the GC heap
+                    // because handles to statics now go into the large object heap
+
+                    var_types handleTyp = (var_types)(varTypeIsGC(lclTyp) ? TYP_BYREF : TYP_I_IMPL);
+                    op1                 = gtNewOperNode(GT_IND, handleTyp, op1);
+                    op1->gtFlags |= GTF_IND_INVARIANT | GTF_IND_NONFAULTING;
+                }
+            }
+            break;
+    }
+
+    if (pFieldInfo->fieldFlags & CORINFO_FLG_FIELD_STATIC_IN_HEAP)
+    {
+        op1 = gtNewOperNode(GT_IND, TYP_REF, op1);
+
+        FieldSeqNode* fldSeq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::FirstElemPseudoField);
+
+        op1 = gtNewOperNode(GT_ADD, TYP_BYREF, op1,
+                            new (this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, sizeof(void*), fldSeq));
+    }
+
+    if (!(access & CORINFO_ACCESS_ADDRESS))
+    {
+        op1 = gtNewOperNode(GT_IND, lclTyp, op1);
+        op1->gtFlags |= GTF_GLOB_REF;
+    }
+
+    return op1;
+}
+
+// In general try to call this before most of the verification work.  Most people expect the access
+// exceptions before the verification exceptions.  If you do this after, that usually doesn't happen.  Turns
+// out if you can't access something we also think that you're unverifiable for other reasons.
+void Compiler::impHandleAccessAllowed(CorInfoIsAccessAllowedResult result, CORINFO_HELPER_DESC* helperCall)
+{
+    if (result != CORINFO_ACCESS_ALLOWED)
+    {
+        impHandleAccessAllowedInternal(result, helperCall);
+    }
+}
+
+void Compiler::impHandleAccessAllowedInternal(CorInfoIsAccessAllowedResult result, CORINFO_HELPER_DESC* helperCall)
+{
+    switch (result)
+    {
+        case CORINFO_ACCESS_ALLOWED:
+            break;
+        case CORINFO_ACCESS_ILLEGAL:
+            // if we're verifying, then we need to reject the illegal access to ensure that we don't think the
+            // method is verifiable.  Otherwise, delay the exception to runtime.
+            if (compIsForImportOnly())
+            {
+                info.compCompHnd->ThrowExceptionForHelper(helperCall);
+            }
+            else
+            {
+                impInsertHelperCall(helperCall);
+            }
+            break;
+        case CORINFO_ACCESS_RUNTIME_CHECK:
+            impInsertHelperCall(helperCall);
+            break;
+    }
+}
+
+void Compiler::impInsertHelperCall(CORINFO_HELPER_DESC* helperInfo)
+{
+    // Construct the argument list
+    GenTreeArgList* args = nullptr;
+    assert(helperInfo->helperNum != CORINFO_HELP_UNDEF);
+    for (unsigned i = helperInfo->numArgs; i > 0; --i)
+    {
+        const CORINFO_HELPER_ARG& helperArg  = helperInfo->args[i - 1];
+        GenTreePtr                currentArg = nullptr;
+        switch (helperArg.argType)
+        {
+            case CORINFO_HELPER_ARG_TYPE_Field:
+                info.compCompHnd->classMustBeLoadedBeforeCodeIsRun(
+                    info.compCompHnd->getFieldClass(helperArg.fieldHandle));
+                currentArg = gtNewIconEmbFldHndNode(helperArg.fieldHandle);
+                break;
+            case CORINFO_HELPER_ARG_TYPE_Method:
+                info.compCompHnd->methodMustBeLoadedBeforeCodeIsRun(helperArg.methodHandle);
+                currentArg = gtNewIconEmbMethHndNode(helperArg.methodHandle);
+                break;
+            case CORINFO_HELPER_ARG_TYPE_Class:
+                info.compCompHnd->classMustBeLoadedBeforeCodeIsRun(helperArg.classHandle);
+                currentArg = gtNewIconEmbClsHndNode(helperArg.classHandle);
+                break;
+            case CORINFO_HELPER_ARG_TYPE_Module:
+                currentArg = gtNewIconEmbScpHndNode(helperArg.moduleHandle);
+                break;
+            case CORINFO_HELPER_ARG_TYPE_Const:
+                currentArg = gtNewIconNode(helperArg.constant);
+                break;
+            default:
+                NO_WAY("Illegal helper arg type");
+        }
+        args = (currentArg == nullptr) ? gtNewArgList(currentArg) : gtNewListNode(currentArg, args);
+    }
+
+    /* TODO-Review:
+     * Mark as CSE'able, and hoistable.  Consider marking hoistable unless you're in the inlinee.
+     * Also, consider sticking this in the first basic block.
+     */
+    GenTreePtr callout = gtNewHelperCallNode(helperInfo->helperNum, TYP_VOID, GTF_EXCEPT, args);
+    impAppendTree(callout, (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
+}
+
+void Compiler::impInsertCalloutForDelegate(CORINFO_METHOD_HANDLE callerMethodHnd,
+                                           CORINFO_METHOD_HANDLE calleeMethodHnd,
+                                           CORINFO_CLASS_HANDLE  delegateTypeHnd)
+{
+#ifdef FEATURE_CORECLR
+    if (!info.compCompHnd->isDelegateCreationAllowed(delegateTypeHnd, calleeMethodHnd))
+    {
+        // Call the JIT_DelegateSecurityCheck helper before calling the actual function.
+        // This helper throws an exception if the CLR host disallows the call.
+
+        GenTreePtr helper = gtNewHelperCallNode(CORINFO_HELP_DELEGATE_SECURITY_CHECK, TYP_VOID, GTF_EXCEPT,
+                                                gtNewArgList(gtNewIconEmbClsHndNode(delegateTypeHnd),
+                                                             gtNewIconEmbMethHndNode(calleeMethodHnd)));
+        // Append the callout statement
+        impAppendTree(helper, (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
+    }
+#endif // FEATURE_CORECLR
+}
+
+// Checks whether the return types of caller and callee are compatible
+// so that callee can be tail called. Note that here we don't check
+// compatibility in IL Verifier sense, but on the lines of return type
+// sizes are equal and get returned in the same return register.
+bool Compiler::impTailCallRetTypeCompatible(var_types            callerRetType,
+                                            CORINFO_CLASS_HANDLE callerRetTypeClass,
+                                            var_types            calleeRetType,
+                                            CORINFO_CLASS_HANDLE calleeRetTypeClass)
+{
+    // Note that we can not relax this condition with genActualType() as the
+    // calling convention dictates that the caller of a function with a small
+    // typed return value is responsible for normalizing the return val.
+    if (callerRetType == calleeRetType)
+    {
+        return true;
+    }
+
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+    // Jit64 compat:
+    if (callerRetType == TYP_VOID)
+    {
+        // This needs to be allowed to support the following IL pattern that Jit64 allows:
+        //     tail.call
+        //     pop
+        //     ret
+        //
+        // Note that the above IL pattern is not valid as per IL verification rules.
+        // Therefore, only full trust code can take advantage of this pattern.
+        return true;
+    }
+
+    // These checks return true if the return value type sizes are the same and
+    // get returned in the same return register i.e. caller doesn't need to normalize
+    // return value. Some of the tail calls permitted by below checks would have
+    // been rejected by IL Verifier before we reached here.  Therefore, only full
+    // trust code can make those tail calls.
+    unsigned callerRetTypeSize = 0;
+    unsigned calleeRetTypeSize = 0;
+    bool     isCallerRetTypMBEnreg =
+        VarTypeIsMultiByteAndCanEnreg(callerRetType, callerRetTypeClass, &callerRetTypeSize, true);
+    bool isCalleeRetTypMBEnreg =
+        VarTypeIsMultiByteAndCanEnreg(calleeRetType, calleeRetTypeClass, &calleeRetTypeSize, true);
+
+    if (varTypeIsIntegral(callerRetType) || isCallerRetTypMBEnreg)
+    {
+        return (varTypeIsIntegral(calleeRetType) || isCalleeRetTypMBEnreg) && (callerRetTypeSize == calleeRetTypeSize);
+    }
+#endif // _TARGET_AMD64_ || _TARGET_ARM64_
+
+    return false;
+}
+
+// For prefixFlags
+enum
+{
+    PREFIX_TAILCALL_EXPLICIT = 0x00000001, // call has "tail" IL prefix
+    PREFIX_TAILCALL_IMPLICIT =
+        0x00000010, // call is treated as having "tail" prefix even though there is no "tail" IL prefix
+    PREFIX_TAILCALL    = (PREFIX_TAILCALL_EXPLICIT | PREFIX_TAILCALL_IMPLICIT),
+    PREFIX_VOLATILE    = 0x00000100,
+    PREFIX_UNALIGNED   = 0x00001000,
+    PREFIX_CONSTRAINED = 0x00010000,
+    PREFIX_READONLY    = 0x00100000
+};
+
+/********************************************************************************
+ *
+ * Returns true if the current opcode and and the opcodes following it correspond
+ * to a supported tail call IL pattern.
+ *
+ */
+bool Compiler::impIsTailCallILPattern(bool        tailPrefixed,
+                                      OPCODE      curOpcode,
+                                      const BYTE* codeAddrOfNextOpcode,
+                                      const BYTE* codeEnd,
+                                      bool        isRecursive,
+                                      bool*       isCallPopAndRet /* = nullptr */)
+{
+    // Bail out if the current opcode is not a call.
+    if (!impOpcodeIsCallOpcode(curOpcode))
+    {
+        return false;
+    }
+
+#if !FEATURE_TAILCALL_OPT_SHARED_RETURN
+    // If shared ret tail opt is not enabled, we will enable
+    // it for recursive methods.
+    if (isRecursive)
+#endif
+    {
+        // we can actually handle if the ret is in a fallthrough block, as long as that is the only part of the
+        // sequence. Make sure we don't go past the end of the IL however.
+        codeEnd = min(codeEnd + 1, info.compCode + info.compILCodeSize);
+    }
+
+    // Bail out if there is no next opcode after call
+    if (codeAddrOfNextOpcode >= codeEnd)
+    {
+        return false;
+    }
+
+    // Scan the opcodes to look for the following IL patterns if either
+    //   i) the call is not tail prefixed (i.e. implicit tail call) or
+    //  ii) if tail prefixed, IL verification is not needed for the method.
+    //
+    // Only in the above two cases we can allow the below tail call patterns
+    // violating ECMA spec.
+    //
+    // Pattern1:
+    //       call
+    //       nop*
+    //       ret
+    //
+    // Pattern2:
+    //       call
+    //       nop*
+    //       pop
+    //       nop*
+    //       ret
+    int    cntPop = 0;
+    OPCODE nextOpcode;
+
+#ifdef _TARGET_AMD64_
+    do
+    {
+        nextOpcode = (OPCODE)getU1LittleEndian(codeAddrOfNextOpcode);
+        codeAddrOfNextOpcode += sizeof(__int8);
+    } while ((codeAddrOfNextOpcode < codeEnd) &&         // Haven't reached end of method
+             (!tailPrefixed || !tiVerificationNeeded) && // Not ".tail" prefixed or method requires no IL verification
+             ((nextOpcode == CEE_NOP) || ((nextOpcode == CEE_POP) && (++cntPop == 1)))); // Next opcode = nop or exactly
+                                                                                         // one pop seen so far.
+#else
+    nextOpcode = (OPCODE)getU1LittleEndian(codeAddrOfNextOpcode);
+#endif
+
+    if (isCallPopAndRet)
+    {
+        // Allow call+pop+ret to be tail call optimized if caller ret type is void
+        *isCallPopAndRet = (nextOpcode == CEE_RET) && (cntPop == 1);
+    }
+
+#ifdef _TARGET_AMD64_
+    // Jit64 Compat:
+    // Tail call IL pattern could be either of the following
+    // 1) call/callvirt/calli + ret
+    // 2) call/callvirt/calli + pop + ret in a method returning void.
+    return (nextOpcode == CEE_RET) && ((cntPop == 0) || ((cntPop == 1) && (info.compRetType == TYP_VOID)));
+#else //!_TARGET_AMD64_
+    return (nextOpcode == CEE_RET) && (cntPop == 0);
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Determine whether the call could be converted to an implicit tail call
+ *
+ */
+bool Compiler::impIsImplicitTailCallCandidate(
+    OPCODE opcode, const BYTE* codeAddrOfNextOpcode, const BYTE* codeEnd, int prefixFlags, bool isRecursive)
+{
+
+#if FEATURE_TAILCALL_OPT
+    if (!opts.compTailCallOpt)
+    {
+        return false;
+    }
+
+    if (opts.compDbgCode || opts.MinOpts())
+    {
+        return false;
+    }
+
+    // must not be tail prefixed
+    if (prefixFlags & PREFIX_TAILCALL_EXPLICIT)
+    {
+        return false;
+    }
+
+#if !FEATURE_TAILCALL_OPT_SHARED_RETURN
+    // the block containing call is marked as BBJ_RETURN
+    // We allow shared ret tail call optimization on recursive calls even under
+    // !FEATURE_TAILCALL_OPT_SHARED_RETURN.
+    if (!isRecursive && (compCurBB->bbJumpKind != BBJ_RETURN))
+        return false;
+#endif // !FEATURE_TAILCALL_OPT_SHARED_RETURN
+
+    // must be call+ret or call+pop+ret
+    if (!impIsTailCallILPattern(false, opcode, codeAddrOfNextOpcode, codeEnd, isRecursive))
+    {
+        return false;
+    }
+
+    return true;
+#else
+    return false;
+#endif // FEATURE_TAILCALL_OPT
+}
+
+//------------------------------------------------------------------------
+// impImportCall: import a call-inspiring opcode
+//
+// Arguments:
+//    opcode                    - opcode that inspires the call
+//    pResolvedToken            - resolved token for the call target
+//    pConstrainedResolvedToken - resolved constraint token (or nullptr)
+//    newObjThis                - tree for this pointer or uninitalized newobj temp (or nullptr)
+//    prefixFlags               - IL prefix flags for the call
+//    callInfo                  - EE supplied info for the call
+//    rawILOffset               - IL offset of the opcode
+//
+// Returns:
+//    Type of the call's return value.
+//
+// Notes:
+//    opcode can be CEE_CALL, CEE_CALLI, CEE_CALLVIRT, or CEE_NEWOBJ.
+//
+//    For CEE_NEWOBJ, newobjThis should be the temp grabbed for the allocated
+//    uninitalized object.
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+
+var_types Compiler::impImportCall(OPCODE                  opcode,
+                                  CORINFO_RESOLVED_TOKEN* pResolvedToken,
+                                  CORINFO_RESOLVED_TOKEN* pConstrainedResolvedToken,
+                                  GenTreePtr              newobjThis,
+                                  int                     prefixFlags,
+                                  CORINFO_CALL_INFO*      callInfo,
+                                  IL_OFFSET               rawILOffset)
+{
+    assert(opcode == CEE_CALL || opcode == CEE_CALLVIRT || opcode == CEE_NEWOBJ || opcode == CEE_CALLI);
+
+    IL_OFFSETX             ilOffset                       = impCurILOffset(rawILOffset, true);
+    var_types              callRetTyp                     = TYP_COUNT;
+    CORINFO_SIG_INFO*      sig                            = nullptr;
+    CORINFO_METHOD_HANDLE  methHnd                        = nullptr;
+    CORINFO_CLASS_HANDLE   clsHnd                         = nullptr;
+    unsigned               clsFlags                       = 0;
+    unsigned               mflags                         = 0;
+    unsigned               argFlags                       = 0;
+    GenTreePtr             call                           = nullptr;
+    GenTreeArgList*        args                           = nullptr;
+    CORINFO_THIS_TRANSFORM constraintCallThisTransform    = CORINFO_NO_THIS_TRANSFORM;
+    CORINFO_CONTEXT_HANDLE exactContextHnd                = nullptr;
+    BOOL                   exactContextNeedsRuntimeLookup = FALSE;
+    bool                   canTailCall                    = true;
+    const char*            szCanTailCallFailReason        = nullptr;
+    int                    tailCall                       = prefixFlags & PREFIX_TAILCALL;
+    bool                   readonlyCall                   = (prefixFlags & PREFIX_READONLY) != 0;
+
+    // Synchronized methods need to call CORINFO_HELP_MON_EXIT at the end. We could
+    // do that before tailcalls, but that is probably not the intended
+    // semantic. So just disallow tailcalls from synchronized methods.
+    // Also, popping arguments in a varargs function is more work and NYI
+    // If we have a security object, we have to keep our frame around for callers
+    // to see any imperative security.
+    if (info.compFlags & CORINFO_FLG_SYNCH)
+    {
+        canTailCall             = false;
+        szCanTailCallFailReason = "Caller is synchronized";
+    }
+#if !FEATURE_FIXED_OUT_ARGS
+    else if (info.compIsVarArgs)
+    {
+        canTailCall             = false;
+        szCanTailCallFailReason = "Caller is varargs";
+    }
+#endif // FEATURE_FIXED_OUT_ARGS
+    else if (opts.compNeedSecurityCheck)
+    {
+        canTailCall             = false;
+        szCanTailCallFailReason = "Caller requires a security check.";
+    }
+
+    // We only need to cast the return value of pinvoke inlined calls that return small types
+
+    // TODO-AMD64-Cleanup: Remove this when we stop interoperating with JIT64, or if we decide to stop
+    // widening everything! CoreCLR does not support JIT64 interoperation so no need to widen there.
+    // The existing x64 JIT doesn't bother widening all types to int, so we have to assume for
+    // the time being that the callee might be compiled by the other JIT and thus the return
+    // value will need to be widened by us (or not widened at all...)
+
+    // ReadyToRun code sticks with default calling convention that does not widen small return types.
+
+    bool checkForSmallType  = opts.IsJit64Compat() || opts.IsReadyToRun();
+    bool bIntrinsicImported = false;
+
+    CORINFO_SIG_INFO calliSig;
+    GenTreeArgList*  extraArg = nullptr;
+
+    /*-------------------------------------------------------------------------
+     * First create the call node
+     */
+
+    if (opcode == CEE_CALLI)
+    {
+        /* Get the call site sig */
+        eeGetSig(pResolvedToken->token, info.compScopeHnd, impTokenLookupContextHandle, &calliSig);
+
+        callRetTyp = JITtype2varType(calliSig.retType);
+
+        call = impImportIndirectCall(&calliSig, ilOffset);
+
+        // We don't know the target method, so we have to infer the flags, or
+        // assume the worst-case.
+        mflags = (calliSig.callConv & CORINFO_CALLCONV_HASTHIS) ? 0 : CORINFO_FLG_STATIC;
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            unsigned structSize =
+                (callRetTyp == TYP_STRUCT) ? info.compCompHnd->getClassSize(calliSig.retTypeSigClass) : 0;
+            printf("\nIn Compiler::impImportCall: opcode is %s, kind=%d, callRetType is %s, structSize is %d\n",
+                   opcodeNames[opcode], callInfo->kind, varTypeName(callRetTyp), structSize);
+        }
+#endif
+        // This should be checked in impImportBlockCode.
+        assert(!compIsForInlining() || !(impInlineInfo->inlineCandidateInfo->dwRestrictions & INLINE_RESPECT_BOUNDARY));
+
+        sig = &calliSig;
+
+#ifdef DEBUG
+        // We cannot lazily obtain the signature of a CALLI call because it has no method
+        // handle that we can use, so we need to save its full call signature here.
+        assert(call->gtCall.callSig == nullptr);
+        call->gtCall.callSig  = new (this, CMK_CorSig) CORINFO_SIG_INFO;
+        *call->gtCall.callSig = calliSig;
+#endif // DEBUG
+    }
+    else // (opcode != CEE_CALLI)
+    {
+        CorInfoIntrinsics intrinsicID = CORINFO_INTRINSIC_Count;
+
+        // Passing CORINFO_CALLINFO_ALLOWINSTPARAM indicates that this JIT is prepared to
+        // supply the instantiation parameters necessary to make direct calls to underlying
+        // shared generic code, rather than calling through instantiating stubs.  If the
+        // returned signature has CORINFO_CALLCONV_PARAMTYPE then this indicates that the JIT
+        // must indeed pass an instantiation parameter.
+
+        methHnd = callInfo->hMethod;
+
+        sig        = &(callInfo->sig);
+        callRetTyp = JITtype2varType(sig->retType);
+
+        mflags = callInfo->methodFlags;
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            unsigned structSize = (callRetTyp == TYP_STRUCT) ? info.compCompHnd->getClassSize(sig->retTypeSigClass) : 0;
+            printf("\nIn Compiler::impImportCall: opcode is %s, kind=%d, callRetType is %s, structSize is %d\n",
+                   opcodeNames[opcode], callInfo->kind, varTypeName(callRetTyp), structSize);
+        }
+#endif
+        if (compIsForInlining())
+        {
+            /* Does this call site have security boundary restrictions? */
+
+            if (impInlineInfo->inlineCandidateInfo->dwRestrictions & INLINE_RESPECT_BOUNDARY)
+            {
+                compInlineResult->NoteFatal(InlineObservation::CALLSITE_CROSS_BOUNDARY_SECURITY);
+                return callRetTyp;
+            }
+
+            /* Does the inlinee need a security check token on the frame */
+
+            if (mflags & CORINFO_FLG_SECURITYCHECK)
+            {
+                compInlineResult->NoteFatal(InlineObservation::CALLEE_NEEDS_SECURITY_CHECK);
+                return callRetTyp;
+            }
+
+            /* Does the inlinee use StackCrawlMark */
+
+            if (mflags & CORINFO_FLG_DONT_INLINE_CALLER)
+            {
+                compInlineResult->NoteFatal(InlineObservation::CALLEE_STACK_CRAWL_MARK);
+                return callRetTyp;
+            }
+
+            /* For now ignore delegate invoke */
+
+            if (mflags & CORINFO_FLG_DELEGATE_INVOKE)
+            {
+                compInlineResult->NoteFatal(InlineObservation::CALLEE_HAS_DELEGATE_INVOKE);
+                return callRetTyp;
+            }
+
+            /* For now ignore varargs */
+            if ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_NATIVEVARARG)
+            {
+                compInlineResult->NoteFatal(InlineObservation::CALLEE_HAS_NATIVE_VARARGS);
+                return callRetTyp;
+            }
+
+            if ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_VARARG)
+            {
+                compInlineResult->NoteFatal(InlineObservation::CALLEE_HAS_MANAGED_VARARGS);
+                return callRetTyp;
+            }
+
+            if ((mflags & CORINFO_FLG_VIRTUAL) && (sig->sigInst.methInstCount != 0) && (opcode == CEE_CALLVIRT))
+            {
+                compInlineResult->NoteFatal(InlineObservation::CALLEE_IS_GENERIC_VIRTUAL);
+                return callRetTyp;
+            }
+        }
+
+        clsHnd = pResolvedToken->hClass;
+
+        clsFlags = callInfo->classFlags;
+
+#ifdef DEBUG
+        // If this is a call to JitTestLabel.Mark, do "early inlining", and record the test attribute.
+
+        // This recognition should really be done by knowing the methHnd of the relevant Mark method(s).
+        // These should be in mscorlib.h, and available through a JIT/EE interface call.
+        const char* modName;
+        const char* className;
+        const char* methodName;
+        if ((className = eeGetClassName(clsHnd)) != nullptr &&
+            strcmp(className, "System.Runtime.CompilerServices.JitTestLabel") == 0 &&
+            (methodName = eeGetMethodName(methHnd, &modName)) != nullptr && strcmp(methodName, "Mark") == 0)
+        {
+            return impImportJitTestLabelMark(sig->numArgs);
+        }
+#endif // DEBUG
+
+        // <NICE> Factor this into getCallInfo </NICE>
+        if ((mflags & CORINFO_FLG_INTRINSIC) && !pConstrainedResolvedToken)
+        {
+            call = impIntrinsic(clsHnd, methHnd, sig, pResolvedToken->token, readonlyCall,
+                                (canTailCall && (tailCall != 0)), &intrinsicID);
+
+            if (call != nullptr)
+            {
+                assert(!(mflags & CORINFO_FLG_VIRTUAL) || (mflags & CORINFO_FLG_FINAL) ||
+                       (clsFlags & CORINFO_FLG_FINAL));
+
+#ifdef FEATURE_READYTORUN_COMPILER
+                if (call->OperGet() == GT_INTRINSIC)
+                {
+                    if (opts.IsReadyToRun())
+                    {
+                        noway_assert(callInfo->kind == CORINFO_CALL);
+                        call->gtIntrinsic.gtEntryPoint = callInfo->codePointerLookup.constLookup;
+                    }
+                    else
+                    {
+                        call->gtIntrinsic.gtEntryPoint.addr = nullptr;
+                    }
+                }
+#endif
+
+                bIntrinsicImported = true;
+                goto DONE_CALL;
+            }
+        }
+
+#ifdef FEATURE_SIMD
+        if (featureSIMD)
+        {
+            call = impSIMDIntrinsic(opcode, newobjThis, clsHnd, methHnd, sig, pResolvedToken->token);
+            if (call != nullptr)
+            {
+                bIntrinsicImported = true;
+                goto DONE_CALL;
+            }
+        }
+#endif // FEATURE_SIMD
+
+        if ((mflags & CORINFO_FLG_VIRTUAL) && (mflags & CORINFO_FLG_EnC) && (opcode == CEE_CALLVIRT))
+        {
+            NO_WAY("Virtual call to a function added via EnC is not supported");
+            goto DONE_CALL;
+        }
+
+        if ((sig->callConv & CORINFO_CALLCONV_MASK) != CORINFO_CALLCONV_DEFAULT &&
+            (sig->callConv & CORINFO_CALLCONV_MASK) != CORINFO_CALLCONV_VARARG &&
+            (sig->callConv & CORINFO_CALLCONV_MASK) != CORINFO_CALLCONV_NATIVEVARARG)
+        {
+            BADCODE("Bad calling convention");
+        }
+
+        //-------------------------------------------------------------------------
+        //  Construct the call node
+        //
+        // Work out what sort of call we're making.
+        // Dispense with virtual calls implemented via LDVIRTFTN immediately.
+
+        constraintCallThisTransform = callInfo->thisTransform;
+
+        exactContextHnd                = callInfo->contextHandle;
+        exactContextNeedsRuntimeLookup = callInfo->exactContextNeedsRuntimeLookup;
+
+        // Recursive call is treaded as a loop to the begining of the method.
+        if (methHnd == info.compMethodHnd)
+        {
+#ifdef DEBUG
+            if (verbose)
+            {
+                JITDUMP("\nFound recursive call in the method. Mark BB%02u to BB%02u as having a backward branch.\n",
+                        fgFirstBB->bbNum, compCurBB->bbNum);
+            }
+#endif
+            fgMarkBackwardJump(fgFirstBB, compCurBB);
+        }
+
+        switch (callInfo->kind)
+        {
+
+            case CORINFO_VIRTUALCALL_STUB:
+            {
+                assert(!(mflags & CORINFO_FLG_STATIC)); // can't call a static method
+                assert(!(clsFlags & CORINFO_FLG_VALUECLASS));
+                if (callInfo->stubLookup.lookupKind.needsRuntimeLookup)
+                {
+
+                    if (compIsForInlining())
+                    {
+                        // Don't import runtime lookups when inlining
+                        // Inlining has to be aborted in such a case
+                        /* XXX Fri 3/20/2009
+                         * By the way, this would never succeed.  If the handle lookup is into the generic
+                         * dictionary for a candidate, you'll generate different dictionary offsets and the
+                         * inlined code will crash.
+                         *
+                         * To anyone code reviewing this, when could this ever succeed in the future?  It'll
+                         * always have a handle lookup.  These lookups are safe intra-module, but we're just
+                         * failing here.
+                         */
+                        compInlineResult->NoteFatal(InlineObservation::CALLSITE_HAS_COMPLEX_HANDLE);
+                        return callRetTyp;
+                    }
+
+                    GenTreePtr stubAddr = impRuntimeLookupToTree(pResolvedToken, &callInfo->stubLookup, methHnd);
+                    assert(!compDonotInline());
+
+                    // This is the rough code to set up an indirect stub call
+                    assert(stubAddr != nullptr);
+
+                    // The stubAddr may be a
+                    // complex expression. As it is evaluated after the args,
+                    // it may cause registered args to be spilled. Simply spill it.
+
+                    unsigned lclNum = lvaGrabTemp(true DEBUGARG("VirtualCall with runtime lookup"));
+                    impAssignTempGen(lclNum, stubAddr, (unsigned)CHECK_SPILL_ALL);
+                    stubAddr = gtNewLclvNode(lclNum, TYP_I_IMPL);
+
+                    // Create the actual call node
+
+                    assert((sig->callConv & CORINFO_CALLCONV_MASK) != CORINFO_CALLCONV_VARARG &&
+                           (sig->callConv & CORINFO_CALLCONV_MASK) != CORINFO_CALLCONV_NATIVEVARARG);
+
+                    call = gtNewIndCallNode(stubAddr, callRetTyp, nullptr);
+
+                    call->gtFlags |= GTF_EXCEPT | (stubAddr->gtFlags & GTF_GLOB_EFFECT);
+                    call->gtFlags |= GTF_CALL_VIRT_STUB;
+
+#ifdef _TARGET_X86_
+                    // No tailcalls allowed for these yet...
+                    canTailCall             = false;
+                    szCanTailCallFailReason = "VirtualCall with runtime lookup";
+#endif
+                }
+                else
+                {
+                    // ok, the stub is available at compile type.
+
+                    call = gtNewCallNode(CT_USER_FUNC, callInfo->hMethod, callRetTyp, nullptr, ilOffset);
+                    call->gtCall.gtStubCallStubAddr = callInfo->stubLookup.constLookup.addr;
+                    call->gtFlags |= GTF_CALL_VIRT_STUB;
+                    assert(callInfo->stubLookup.constLookup.accessType != IAT_PPVALUE);
+                    if (callInfo->stubLookup.constLookup.accessType == IAT_PVALUE)
+                    {
+                        call->gtCall.gtCallMoreFlags |= GTF_CALL_M_VIRTSTUB_REL_INDIRECT;
+                    }
+                }
+
+#ifdef FEATURE_READYTORUN_COMPILER
+                if (opts.IsReadyToRun())
+                {
+                    // Null check is sometimes needed for ready to run to handle
+                    // non-virtual <-> virtual changes between versions
+                    if (callInfo->nullInstanceCheck)
+                    {
+                        call->gtFlags |= GTF_CALL_NULLCHECK;
+                    }
+                }
+#endif
+
+                break;
+            }
+
+            case CORINFO_VIRTUALCALL_VTABLE:
+            {
+                assert(!(mflags & CORINFO_FLG_STATIC)); // can't call a static method
+                assert(!(clsFlags & CORINFO_FLG_VALUECLASS));
+                call = gtNewCallNode(CT_USER_FUNC, callInfo->hMethod, callRetTyp, nullptr, ilOffset);
+                call->gtFlags |= GTF_CALL_VIRT_VTABLE;
+                break;
+            }
+
+            case CORINFO_VIRTUALCALL_LDVIRTFTN:
+            {
+                if (compIsForInlining())
+                {
+                    compInlineResult->NoteFatal(InlineObservation::CALLSITE_HAS_CALL_VIA_LDVIRTFTN);
+                    return callRetTyp;
+                }
+
+                assert(!(mflags & CORINFO_FLG_STATIC)); // can't call a static method
+                assert(!(clsFlags & CORINFO_FLG_VALUECLASS));
+                // OK, We've been told to call via LDVIRTFTN, so just
+                // take the call now....
+
+                args = impPopList(sig->numArgs, &argFlags, sig);
+
+                GenTreePtr thisPtr = impPopStack().val;
+                thisPtr            = impTransformThis(thisPtr, pConstrainedResolvedToken, callInfo->thisTransform);
+                if (compDonotInline())
+                {
+                    return callRetTyp;
+                }
+
+                // Clone the (possibly transformed) "this" pointer
+                GenTreePtr thisPtrCopy;
+                thisPtr = impCloneExpr(thisPtr, &thisPtrCopy, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
+                                       nullptr DEBUGARG("LDVIRTFTN this pointer"));
+
+                GenTreePtr fptr = impImportLdvirtftn(thisPtr, pResolvedToken, callInfo);
+                if (compDonotInline())
+                {
+                    return callRetTyp;
+                }
+
+                thisPtr = nullptr; // can't reuse it
+
+                // Now make an indirect call through the function pointer
+
+                unsigned lclNum = lvaGrabTemp(true DEBUGARG("VirtualCall through function pointer"));
+                impAssignTempGen(lclNum, fptr, (unsigned)CHECK_SPILL_ALL);
+                fptr = gtNewLclvNode(lclNum, TYP_I_IMPL);
+
+                // Create the actual call node
+
+                call                    = gtNewIndCallNode(fptr, callRetTyp, args, ilOffset);
+                call->gtCall.gtCallObjp = thisPtrCopy;
+                call->gtFlags |= GTF_EXCEPT | (fptr->gtFlags & GTF_GLOB_EFFECT);
+
+#ifdef FEATURE_READYTORUN_COMPILER
+                if (opts.IsReadyToRun())
+                {
+                    // Null check is needed for ready to run to handle
+                    // non-virtual <-> virtual changes between versions
+                    call->gtFlags |= GTF_CALL_NULLCHECK;
+                }
+#endif
+
+                // Sine we are jumping over some code, check that its OK to skip that code
+                assert((sig->callConv & CORINFO_CALLCONV_MASK) != CORINFO_CALLCONV_VARARG &&
+                       (sig->callConv & CORINFO_CALLCONV_MASK) != CORINFO_CALLCONV_NATIVEVARARG);
+                goto DONE;
+            }
+
+            case CORINFO_CALL:
+            {
+                // This is for a non-virtual, non-interface etc. call
+                call = gtNewCallNode(CT_USER_FUNC, callInfo->hMethod, callRetTyp, nullptr, ilOffset);
+
+                // We remove the nullcheck for the GetType call instrinsic.
+                // TODO-CQ: JIT64 does not introduce the null check for many more helper calls
+                // and instrinsics.
+                if (callInfo->nullInstanceCheck &&
+                    !((mflags & CORINFO_FLG_INTRINSIC) != 0 && (intrinsicID == CORINFO_INTRINSIC_Object_GetType)))
+                {
+                    call->gtFlags |= GTF_CALL_NULLCHECK;
+                }
+
+#ifdef FEATURE_READYTORUN_COMPILER
+                if (opts.IsReadyToRun())
+                {
+                    call->gtCall.setEntryPoint(callInfo->codePointerLookup.constLookup);
+                }
+#endif
+                break;
+            }
+
+            case CORINFO_CALL_CODE_POINTER:
+            {
+                // The EE has asked us to call by computing a code pointer and then doing an
+                // indirect call.  This is because a runtime lookup is required to get the code entry point.
+
+                // These calls always follow a uniform calling convention, i.e. no extra hidden params
+                assert((sig->callConv & CORINFO_CALLCONV_PARAMTYPE) == 0);
+
+                assert((sig->callConv & CORINFO_CALLCONV_MASK) != CORINFO_CALLCONV_VARARG);
+                assert((sig->callConv & CORINFO_CALLCONV_MASK) != CORINFO_CALLCONV_NATIVEVARARG);
+
+                GenTreePtr fptr =
+                    impLookupToTree(pResolvedToken, &callInfo->codePointerLookup, GTF_ICON_FTN_ADDR, callInfo->hMethod);
+
+                if (compDonotInline())
+                {
+                    return callRetTyp;
+                }
+
+                // Now make an indirect call through the function pointer
+
+                unsigned lclNum = lvaGrabTemp(true DEBUGARG("Indirect call through function pointer"));
+                impAssignTempGen(lclNum, fptr, (unsigned)CHECK_SPILL_ALL);
+                fptr = gtNewLclvNode(lclNum, TYP_I_IMPL);
+
+                call = gtNewIndCallNode(fptr, callRetTyp, nullptr, ilOffset);
+                call->gtFlags |= GTF_EXCEPT | (fptr->gtFlags & GTF_GLOB_EFFECT);
+                if (callInfo->nullInstanceCheck)
+                {
+                    call->gtFlags |= GTF_CALL_NULLCHECK;
+                }
+
+                break;
+            }
+
+            default:
+                assert(!"unknown call kind");
+                break;
+        }
+
+        //-------------------------------------------------------------------------
+        // Set more flags
+
+        PREFIX_ASSUME(call != nullptr);
+
+        if (mflags & CORINFO_FLG_NOGCCHECK)
+        {
+            call->gtCall.gtCallMoreFlags |= GTF_CALL_M_NOGCCHECK;
+        }
+
+        // Mark call if it's one of the ones we will maybe treat as an intrinsic
+        if (intrinsicID == CORINFO_INTRINSIC_Object_GetType || intrinsicID == CORINFO_INTRINSIC_TypeEQ ||
+            intrinsicID == CORINFO_INTRINSIC_TypeNEQ || intrinsicID == CORINFO_INTRINSIC_GetCurrentManagedThread ||
+            intrinsicID == CORINFO_INTRINSIC_GetManagedThreadId)
+        {
+            call->gtCall.gtCallMoreFlags |= GTF_CALL_M_SPECIAL_INTRINSIC;
+        }
+    }
+    assert(sig);
+    assert(clsHnd || (opcode == CEE_CALLI)); // We're never verifying for CALLI, so this is not set.
+
+    /* Some sanity checks */
+
+    // CALL_VIRT and NEWOBJ must have a THIS pointer
+    assert((opcode != CEE_CALLVIRT && opcode != CEE_NEWOBJ) || (sig->callConv & CORINFO_CALLCONV_HASTHIS));
+    // static bit and hasThis are negations of one another
+    assert(((mflags & CORINFO_FLG_STATIC) != 0) == ((sig->callConv & CORINFO_CALLCONV_HASTHIS) == 0));
+    assert(call != nullptr);
+
+    /*-------------------------------------------------------------------------
+     * Check special-cases etc
+     */
+
+    /* Special case - Check if it is a call to Delegate.Invoke(). */
+
+    if (mflags & CORINFO_FLG_DELEGATE_INVOKE)
+    {
+        assert(!compIsForInlining());
+        assert(!(mflags & CORINFO_FLG_STATIC)); // can't call a static method
+        assert(mflags & CORINFO_FLG_FINAL);
+
+        /* Set the delegate flag */
+        call->gtCall.gtCallMoreFlags |= GTF_CALL_M_DELEGATE_INV;
+
+        if (callInfo->secureDelegateInvoke)
+        {
+            call->gtCall.gtCallMoreFlags |= GTF_CALL_M_SECURE_DELEGATE_INV;
+        }
+
+        if (opcode == CEE_CALLVIRT)
+        {
+            assert(mflags & CORINFO_FLG_FINAL);
+
+            /* It should have the GTF_CALL_NULLCHECK flag set. Reset it */
+            assert(call->gtFlags & GTF_CALL_NULLCHECK);
+            call->gtFlags &= ~GTF_CALL_NULLCHECK;
+        }
+    }
+
+    CORINFO_CLASS_HANDLE actualMethodRetTypeSigClass;
+    actualMethodRetTypeSigClass = sig->retTypeSigClass;
+    if (varTypeIsStruct(callRetTyp))
+    {
+        callRetTyp   = impNormStructType(actualMethodRetTypeSigClass);
+        call->gtType = callRetTyp;
+    }
+
+#if !FEATURE_VARARG
+    /* Check for varargs */
+    if ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_VARARG ||
+        (sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_NATIVEVARARG)
+    {
+        BADCODE("Varargs not supported.");
+    }
+#endif // !FEATURE_VARARG
+
+    if ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_VARARG ||
+        (sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_NATIVEVARARG)
+    {
+        assert(!compIsForInlining());
+
+        /* Set the right flags */
+
+        call->gtFlags |= GTF_CALL_POP_ARGS;
+        call->gtCall.gtCallMoreFlags |= GTF_CALL_M_VARARGS;
+
+        /* Can't allow tailcall for varargs as it is caller-pop. The caller
+           will be expecting to pop a certain number of arguments, but if we
+           tailcall to a function with a different number of arguments, we
+           are hosed. There are ways around this (caller remembers esp value,
+           varargs is not caller-pop, etc), but not worth it. */
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_X86_
+        if (canTailCall)
+        {
+            canTailCall             = false;
+            szCanTailCallFailReason = "Callee is varargs";
+        }
+#endif
+
+        /* Get the total number of arguments - this is already correct
+         * for CALLI - for methods we have to get it from the call site */
+
+        if (opcode != CEE_CALLI)
+        {
+#ifdef DEBUG
+            unsigned numArgsDef = sig->numArgs;
+#endif
+            eeGetCallSiteSig(pResolvedToken->token, info.compScopeHnd, impTokenLookupContextHandle, sig);
+
+#ifdef DEBUG
+            // We cannot lazily obtain the signature of a vararg call because using its method
+            // handle will give us only the declared argument list, not the full argument list.
+            assert(call->gtCall.callSig == nullptr);
+            call->gtCall.callSig  = new (this, CMK_CorSig) CORINFO_SIG_INFO;
+            *call->gtCall.callSig = *sig;
+#endif
+
+            // For vararg calls we must be sure to load the return type of the
+            // method actually being called, as well as the return types of the
+            // specified in the vararg signature. With type equivalency, these types
+            // may not be the same.
+            if (sig->retTypeSigClass != actualMethodRetTypeSigClass)
+            {
+                if (actualMethodRetTypeSigClass != nullptr && sig->retType != CORINFO_TYPE_CLASS &&
+                    sig->retType != CORINFO_TYPE_BYREF && sig->retType != CORINFO_TYPE_PTR &&
+                    sig->retType != CORINFO_TYPE_VAR)
+                {
+                    // Make sure that all valuetypes (including enums) that we push are loaded.
+                    // This is to guarantee that if a GC is triggerred from the prestub of this methods,
+                    // all valuetypes in the method signature are already loaded.
+                    // We need to be able to find the size of the valuetypes, but we cannot
+                    // do a class-load from within GC.
+                    info.compCompHnd->classMustBeLoadedBeforeCodeIsRun(actualMethodRetTypeSigClass);
+                }
+            }
+
+            assert(numArgsDef <= sig->numArgs);
+        }
+
+        /* We will have "cookie" as the last argument but we cannot push
+         * it on the operand stack because we may overflow, so we append it
+         * to the arg list next after we pop them */
+    }
+
+    if (mflags & CORINFO_FLG_SECURITYCHECK)
+    {
+        assert(!compIsForInlining());
+
+        // Need security prolog/epilog callouts when there is
+        // imperative security in the method. This is to give security a
+        // chance to do any setup in the prolog and cleanup in the epilog if needed.
+
+        if (compIsForInlining())
+        {
+            // Cannot handle this if the method being imported is an inlinee by itself.
+            // Because inlinee method does not have its own frame.
+
+            compInlineResult->NoteFatal(InlineObservation::CALLEE_NEEDS_SECURITY_CHECK);
+            return callRetTyp;
+        }
+        else
+        {
+            tiSecurityCalloutNeeded = true;
+
+            // If the current method calls a method which needs a security check,
+            // (i.e. the method being compiled has imperative security)
+            // we need to reserve a slot for the security object in
+            // the current method's stack frame
+            opts.compNeedSecurityCheck = true;
+        }
+    }
+
+    //--------------------------- Inline NDirect ------------------------------
+
+    if (!compIsForInlining())
+    {
+        impCheckForPInvokeCall(call, methHnd, sig, mflags);
+    }
+
+    if (call->gtFlags & GTF_CALL_UNMANAGED)
+    {
+        // We set up the unmanaged call by linking the frame, disabling GC, etc
+        // This needs to be cleaned up on return
+        if (canTailCall)
+        {
+            canTailCall             = false;
+            szCanTailCallFailReason = "Callee is native";
+        }
+
+        checkForSmallType = true;
+
+        impPopArgsForUnmanagedCall(call, sig);
+
+        goto DONE;
+    }
+    else if ((opcode == CEE_CALLI) && (((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_STDCALL) ||
+                                       ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_C) ||
+                                       ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_THISCALL) ||
+                                       ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_FASTCALL)))
+    {
+        if (!info.compCompHnd->canGetCookieForPInvokeCalliSig(sig))
+        {
+            // Normally this only happens with inlining.
+            // However, a generic method (or type) being NGENd into another module
+            // can run into this issue as well.  There's not an easy fall-back for NGEN
+            // so instead we fallback to JIT.
+            if (compIsForInlining())
+            {
+                compInlineResult->NoteFatal(InlineObservation::CALLSITE_CANT_EMBED_PINVOKE_COOKIE);
+            }
+            else
+            {
+                IMPL_LIMITATION("Can't get PInvoke cookie (cross module generics)");
+            }
+
+            return callRetTyp;
+        }
+
+        GenTreePtr cookie = eeGetPInvokeCookie(sig);
+
+        // This cookie is required to be either a simple GT_CNS_INT or
+        // an indirection of a GT_CNS_INT
+        //
+        GenTreePtr cookieConst = cookie;
+        if (cookie->gtOper == GT_IND)
+        {
+            cookieConst = cookie->gtOp.gtOp1;
+        }
+        assert(cookieConst->gtOper == GT_CNS_INT);
+
+        // Setting GTF_DONT_CSE on the GT_CNS_INT as well as on the GT_IND (if it exists) will ensure that
+        // we won't allow this tree to participate in any CSE logic
+        //
+        cookie->gtFlags |= GTF_DONT_CSE;
+        cookieConst->gtFlags |= GTF_DONT_CSE;
+
+        call->gtCall.gtCallCookie = cookie;
+
+        if (canTailCall)
+        {
+            canTailCall             = false;
+            szCanTailCallFailReason = "PInvoke calli";
+        }
+    }
+
+    /*-------------------------------------------------------------------------
+     * Create the argument list
+     */
+
+    //-------------------------------------------------------------------------
+    // Special case - for varargs we have an implicit last argument
+
+    if ((sig->callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_VARARG)
+    {
+        assert(!compIsForInlining());
+
+        void *varCookie, *pVarCookie;
+        if (!info.compCompHnd->canGetVarArgsHandle(sig))
+        {
+            compInlineResult->NoteFatal(InlineObservation::CALLSITE_CANT_EMBED_VARARGS_COOKIE);
+            return callRetTyp;
+        }
+
+        varCookie = info.compCompHnd->getVarArgsHandle(sig, &pVarCookie);
+        assert((!varCookie) != (!pVarCookie));
+        GenTreePtr cookie = gtNewIconEmbHndNode(varCookie, pVarCookie, GTF_ICON_VARG_HDL);
+
+        assert(extraArg == nullptr);
+        extraArg = gtNewArgList(cookie);
+    }
+
+    //-------------------------------------------------------------------------
+    // Extra arg for shared generic code and array methods
+    //
+    // Extra argument containing instantiation information is passed in the
+    // following circumstances:
+    // (a) To the "Address" method on array classes; the extra parameter is
+    //     the array's type handle (a TypeDesc)
+    // (b) To shared-code instance methods in generic structs; the extra parameter
+    //     is the struct's type handle (a vtable ptr)
+    // (c) To shared-code per-instantiation non-generic static methods in generic
+    //     classes and structs; the extra parameter is the type handle
+    // (d) To shared-code generic methods; the extra parameter is an
+    //     exact-instantiation MethodDesc
+    //
+    // We also set the exact type context associated with the call so we can
+    // inline the call correctly later on.
+
+    if (sig->callConv & CORINFO_CALLCONV_PARAMTYPE)
+    {
+        assert(call->gtCall.gtCallType == CT_USER_FUNC);
+        if (clsHnd == nullptr)
+        {
+            NO_WAY("CALLI on parameterized type");
+        }
+
+        assert(opcode != CEE_CALLI);
+
+        GenTreePtr instParam;
+        BOOL       runtimeLookup;
+
+        // Instantiated generic method
+        if (((SIZE_T)exactContextHnd & CORINFO_CONTEXTFLAGS_MASK) == CORINFO_CONTEXTFLAGS_METHOD)
+        {
+            CORINFO_METHOD_HANDLE exactMethodHandle =
+                (CORINFO_METHOD_HANDLE)((SIZE_T)exactContextHnd & ~CORINFO_CONTEXTFLAGS_MASK);
+
+            if (!exactContextNeedsRuntimeLookup)
+            {
+#ifdef FEATURE_READYTORUN_COMPILER
+                if (opts.IsReadyToRun())
+                {
+                    instParam =
+                        impReadyToRunLookupToTree(&callInfo->instParamLookup, GTF_ICON_METHOD_HDL, exactMethodHandle);
+                    if (instParam == nullptr)
+                    {
+                        return callRetTyp;
+                    }
+                }
+                else
+#endif
+                {
+                    instParam = gtNewIconEmbMethHndNode(exactMethodHandle);
+                    info.compCompHnd->methodMustBeLoadedBeforeCodeIsRun(exactMethodHandle);
+                }
+            }
+            else
+            {
+                instParam = impTokenToHandle(pResolvedToken, &runtimeLookup, TRUE /*mustRestoreHandle*/);
+                if (instParam == nullptr)
+                {
+                    return callRetTyp;
+                }
+            }
+        }
+
+        // otherwise must be an instance method in a generic struct,
+        // a static method in a generic type, or a runtime-generated array method
+        else
+        {
+            assert(((SIZE_T)exactContextHnd & CORINFO_CONTEXTFLAGS_MASK) == CORINFO_CONTEXTFLAGS_CLASS);
+            CORINFO_CLASS_HANDLE exactClassHandle =
+                (CORINFO_CLASS_HANDLE)((SIZE_T)exactContextHnd & ~CORINFO_CONTEXTFLAGS_MASK);
+
+            if (compIsForInlining() && (clsFlags & CORINFO_FLG_ARRAY) != 0)
+            {
+                compInlineResult->NoteFatal(InlineObservation::CALLEE_IS_ARRAY_METHOD);
+                return callRetTyp;
+            }
+
+            if ((clsFlags & CORINFO_FLG_ARRAY) && readonlyCall)
+            {
+                // We indicate "readonly" to the Address operation by using a null
+                // instParam.
+                instParam = gtNewIconNode(0, TYP_REF);
+            }
+
+            if (!exactContextNeedsRuntimeLookup)
+            {
+#ifdef FEATURE_READYTORUN_COMPILER
+                if (opts.IsReadyToRun())
+                {
+                    instParam =
+                        impReadyToRunLookupToTree(&callInfo->instParamLookup, GTF_ICON_CLASS_HDL, exactClassHandle);
+                    if (instParam == NULL)
+                    {
+                        return callRetTyp;
+                    }
+                }
+                else
+#endif
+                {
+                    instParam = gtNewIconEmbClsHndNode(exactClassHandle);
+                    info.compCompHnd->classMustBeLoadedBeforeCodeIsRun(exactClassHandle);
+                }
+            }
+            else
+            {
+                instParam = impParentClassTokenToHandle(pResolvedToken, &runtimeLookup, TRUE /*mustRestoreHandle*/);
+                if (instParam == nullptr)
+                {
+                    return callRetTyp;
+                }
+            }
+        }
+
+        assert(extraArg == nullptr);
+        extraArg = gtNewArgList(instParam);
+    }
+
+    // Inlining may need the exact type context (exactContextHnd) if we're inlining shared generic code, in particular
+    // to inline 'polytypic' operations such as static field accesses, type tests and method calls which
+    // rely on the exact context. The exactContextHnd is passed back to the JitInterface at appropriate points.
+    // exactContextHnd is not currently required when inlining shared generic code into shared
+    // generic code, since the inliner aborts whenever shared code polytypic operations are encountered
+    // (e.g. anything marked needsRuntimeLookup)
+    if (exactContextNeedsRuntimeLookup)
+    {
+        exactContextHnd = nullptr;
+    }
+
+    //-------------------------------------------------------------------------
+    // The main group of arguments
+
+    args = call->gtCall.gtCallArgs = impPopList(sig->numArgs, &argFlags, sig, extraArg);
+
+    if (args)
+    {
+        call->gtFlags |= args->gtFlags & GTF_GLOB_EFFECT;
+    }
+
+    //-------------------------------------------------------------------------
+    // The "this" pointer
+
+    if (!(mflags & CORINFO_FLG_STATIC) && !((opcode == CEE_NEWOBJ) && (newobjThis == nullptr)))
+    {
+        GenTreePtr obj;
+
+        if (opcode == CEE_NEWOBJ)
+        {
+            obj = newobjThis;
+        }
+        else
+        {
+            obj = impPopStack().val;
+            obj = impTransformThis(obj, pConstrainedResolvedToken, constraintCallThisTransform);
+            if (compDonotInline())
+            {
+                return callRetTyp;
+            }
+        }
+
+        /* Is this a virtual or interface call? */
+
+        if ((call->gtFlags & GTF_CALL_VIRT_KIND_MASK) != GTF_CALL_NONVIRT)
+        {
+            /* only true object pointers can be virtual */
+
+            assert(obj->gtType == TYP_REF);
+        }
+        else
+        {
+            if (impIsThis(obj))
+            {
+                call->gtCall.gtCallMoreFlags |= GTF_CALL_M_NONVIRT_SAME_THIS;
+            }
+        }
+
+        /* Store the "this" value in the call */
+
+        call->gtFlags |= obj->gtFlags & GTF_GLOB_EFFECT;
+        call->gtCall.gtCallObjp = obj;
+    }
+
+    //-------------------------------------------------------------------------
+    // The "this" pointer for "newobj"
+
+    if (opcode == CEE_NEWOBJ)
+    {
+        if (clsFlags & CORINFO_FLG_VAROBJSIZE)
+        {
+            assert(!(clsFlags & CORINFO_FLG_ARRAY)); // arrays handled separately
+            // This is a 'new' of a variable sized object, wher
+            // the constructor is to return the object.  In this case
+            // the constructor claims to return VOID but we know it
+            // actually returns the new object
+            assert(callRetTyp == TYP_VOID);
+            callRetTyp   = TYP_REF;
+            call->gtType = TYP_REF;
+            impSpillSpecialSideEff();
+
+            impPushOnStack(call, typeInfo(TI_REF, clsHnd));
+        }
+        else
+        {
+            if (clsFlags & CORINFO_FLG_DELEGATE)
+            {
+                // New inliner morph it in impImportCall.
+                // This will allow us to inline the call to the delegate constructor.
+                call = fgOptimizeDelegateConstructor(call, &exactContextHnd);
+            }
+
+            if (!bIntrinsicImported)
+            {
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+                // Keep track of the raw IL offset of the call
+                call->gtCall.gtRawILOffset = rawILOffset;
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+                // Is it an inline candidate?
+                impMarkInlineCandidate(call, exactContextHnd, callInfo);
+            }
+
+            // append the call node.
+            impAppendTree(call, (unsigned)CHECK_SPILL_ALL, impCurStmtOffs);
+
+            // Now push the value of the 'new onto the stack
+
+            // This is a 'new' of a non-variable sized object.
+            // Append the new node (op1) to the statement list,
+            // and then push the local holding the value of this
+            // new instruction on the stack.
+
+            if (clsFlags & CORINFO_FLG_VALUECLASS)
+            {
+                assert(newobjThis->gtOper == GT_ADDR && newobjThis->gtOp.gtOp1->gtOper == GT_LCL_VAR);
+
+                unsigned tmp = newobjThis->gtOp.gtOp1->gtLclVarCommon.gtLclNum;
+                impPushOnStack(gtNewLclvNode(tmp, lvaGetRealType(tmp)), verMakeTypeInfo(clsHnd).NormaliseForStack());
+            }
+            else
+            {
+                if (newobjThis->gtOper == GT_COMMA)
+                {
+                    // In coreclr the callout can be inserted even if verification is disabled
+                    // so we cannot rely on tiVerificationNeeded alone
+
+                    // We must have inserted the callout. Get the real newobj.
+                    newobjThis = newobjThis->gtOp.gtOp2;
+                }
+
+                assert(newobjThis->gtOper == GT_LCL_VAR);
+                impPushOnStack(gtNewLclvNode(newobjThis->gtLclVarCommon.gtLclNum, TYP_REF), typeInfo(TI_REF, clsHnd));
+            }
+        }
+        return callRetTyp;
+    }
+
+DONE:
+
+    if (tailCall)
+    {
+        // This check cannot be performed for implicit tail calls for the reason
+        // that impIsImplicitTailCallCandidate() is not checking whether return
+        // types are compatible before marking a call node with PREFIX_TAILCALL_IMPLICIT.
+        // As a result it is possible that in the following case, we find that
+        // the type stack is non-empty if Callee() is considered for implicit
+        // tail calling.
+        //      int Caller(..) { .... void Callee(); ret val; ... }
+        //
+        // Note that we cannot check return type compatibility before ImpImportCall()
+        // as we don't have required info or need to duplicate some of the logic of
+        // ImpImportCall().
+        //
+        // For implicit tail calls, we perform this check after return types are
+        // known to be compatible.
+        if ((tailCall & PREFIX_TAILCALL_EXPLICIT) && (verCurrentState.esStackDepth != 0))
+        {
+            BADCODE("Stack should be empty after tailcall");
+        }
+
+        // Note that we can not relax this condition with genActualType() as
+        // the calling convention dictates that the caller of a function with
+        // a small-typed return value is responsible for normalizing the return val
+
+        if (canTailCall &&
+            !impTailCallRetTypeCompatible(info.compRetType, info.compMethodInfo->args.retTypeClass, callRetTyp,
+                                          callInfo->sig.retTypeClass))
+        {
+            canTailCall             = false;
+            szCanTailCallFailReason = "Return types are not tail call compatible";
+        }
+
+        // Stack empty check for implicit tail calls.
+        if (canTailCall && (tailCall & PREFIX_TAILCALL_IMPLICIT) && (verCurrentState.esStackDepth != 0))
+        {
+#ifdef _TARGET_AMD64_
+            // JIT64 Compatibility:  Opportunistic tail call stack mismatch throws a VerificationException
+            // in JIT64, not an InvalidProgramException.
+            Verify(false, "Stack should be empty after tailcall");
+#else  // _TARGET_64BIT_
+            BADCODE("Stack should be empty after tailcall");
+#endif //!_TARGET_64BIT_
+        }
+
+        // assert(compCurBB is not a catch, finally or filter block);
+        // assert(compCurBB is not a try block protected by a finally block);
+
+        // Check for permission to tailcall
+        bool explicitTailCall = (tailCall & PREFIX_TAILCALL_EXPLICIT) != 0;
+
+        assert(!explicitTailCall || compCurBB->bbJumpKind == BBJ_RETURN);
+
+        if (canTailCall)
+        {
+            // True virtual or indirect calls, shouldn't pass in a callee handle.
+            CORINFO_METHOD_HANDLE exactCalleeHnd = ((call->gtCall.gtCallType != CT_USER_FUNC) ||
+                                                    ((call->gtFlags & GTF_CALL_VIRT_KIND_MASK) != GTF_CALL_NONVIRT))
+                                                       ? nullptr
+                                                       : methHnd;
+            GenTreePtr thisArg = call->gtCall.gtCallObjp;
+
+            if (info.compCompHnd->canTailCall(info.compMethodHnd, methHnd, exactCalleeHnd, explicitTailCall))
+            {
+                canTailCall = true;
+                if (explicitTailCall)
+                {
+                    // In case of explicit tail calls, mark it so that it is not considered
+                    // for in-lining.
+                    call->gtCall.gtCallMoreFlags |= GTF_CALL_M_EXPLICIT_TAILCALL;
+#ifdef DEBUG
+                    if (verbose)
+                    {
+                        printf("\nGTF_CALL_M_EXPLICIT_TAILCALL bit set for call ");
+                        printTreeID(call);
+                        printf("\n");
+                    }
+#endif
+                }
+                else
+                {
+#if FEATURE_TAILCALL_OPT
+                    // Must be an implicit tail call.
+                    assert((tailCall & PREFIX_TAILCALL_IMPLICIT) != 0);
+
+                    // It is possible that a call node is both an inline candidate and marked
+                    // for opportunistic tail calling.  In-lining happens before morhphing of
+                    // trees.  If in-lining of an in-line candidate gets aborted for whatever
+                    // reason, it will survive to the morphing stage at which point it will be
+                    // transformed into a tail call after performing additional checks.
+
+                    call->gtCall.gtCallMoreFlags |= GTF_CALL_M_IMPLICIT_TAILCALL;
+#ifdef DEBUG
+                    if (verbose)
+                    {
+                        printf("\nGTF_CALL_M_IMPLICIT_TAILCALL bit set for call ");
+                        printTreeID(call);
+                        printf("\n");
+                    }
+#endif
+
+#else //! FEATURE_TAILCALL_OPT
+                    NYI("Implicit tail call prefix on a target which doesn't support opportunistic tail calls");
+
+#endif // FEATURE_TAILCALL_OPT
+                }
+
+                // we can't report success just yet...
+            }
+            else
+            {
+                canTailCall = false;
+// canTailCall reported its reasons already
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("\ninfo.compCompHnd->canTailCall returned false for call ");
+                    printTreeID(call);
+                    printf("\n");
+                }
+#endif
+            }
+        }
+        else
+        {
+            // If this assert fires it means that canTailCall was set to false without setting a reason!
+            assert(szCanTailCallFailReason != nullptr);
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("\nRejecting %splicit tail call for call ", explicitTailCall ? "ex" : "im");
+                printTreeID(call);
+                printf(": %s\n", szCanTailCallFailReason);
+            }
+#endif
+            info.compCompHnd->reportTailCallDecision(info.compMethodHnd, methHnd, explicitTailCall, TAILCALL_FAIL,
+                                                     szCanTailCallFailReason);
+        }
+    }
+
+// Note: we assume that small return types are already normalized by the managed callee
+// or by the pinvoke stub for calls to unmanaged code.
+
+DONE_CALL:
+
+    if (!bIntrinsicImported)
+    {
+        //
+        // Things needed to be checked when bIntrinsicImported is false.
+        //
+
+        assert(call->gtOper == GT_CALL);
+        assert(sig != nullptr);
+
+        // Tail calls require us to save the call site's sig info so we can obtain an argument
+        // copying thunk from the EE later on.
+        if (call->gtCall.callSig == nullptr)
+        {
+            call->gtCall.callSig  = new (this, CMK_CorSig) CORINFO_SIG_INFO;
+            *call->gtCall.callSig = *sig;
+        }
+
+        if (compIsForInlining() && opcode == CEE_CALLVIRT)
+        {
+            GenTreePtr callObj = call->gtCall.gtCallObjp;
+            assert(callObj != nullptr);
+
+            unsigned callKind = call->gtFlags & GTF_CALL_VIRT_KIND_MASK;
+
+            if (((callKind != GTF_CALL_NONVIRT) || (call->gtFlags & GTF_CALL_NULLCHECK)) &&
+                impInlineIsGuaranteedThisDerefBeforeAnySideEffects(call->gtCall.gtCallArgs, callObj,
+                                                                   impInlineInfo->inlArgInfo))
+            {
+                impInlineInfo->thisDereferencedFirst = true;
+            }
+        }
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+        // Keep track of the raw IL offset of the call
+        call->gtCall.gtRawILOffset = rawILOffset;
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+        // Is it an inline candidate?
+        impMarkInlineCandidate(call, exactContextHnd, callInfo);
+    }
+
+    // Push or append the result of the call
+    if (callRetTyp == TYP_VOID)
+    {
+        if (opcode == CEE_NEWOBJ)
+        {
+            // we actually did push something, so don't spill the thing we just pushed.
+            assert(verCurrentState.esStackDepth > 0);
+            impAppendTree(call, verCurrentState.esStackDepth - 1, impCurStmtOffs);
+        }
+        else
+        {
+            impAppendTree(call, (unsigned)CHECK_SPILL_ALL, impCurStmtOffs);
+        }
+    }
+    else
+    {
+        impSpillSpecialSideEff();
+
+        if (clsFlags & CORINFO_FLG_ARRAY)
+        {
+            eeGetCallSiteSig(pResolvedToken->token, pResolvedToken->tokenScope, pResolvedToken->tokenContext, sig);
+        }
+
+        // Find the return type used for verification by interpreting the method signature.
+        // NB: we are clobbering the already established sig.
+        if (tiVerificationNeeded)
+        {
+            // Actually, we never get the sig for the original method.
+            sig = &(callInfo->verSig);
+        }
+
+        typeInfo tiRetVal = verMakeTypeInfo(sig->retType, sig->retTypeClass);
+        tiRetVal.NormaliseForStack();
+
+        // The CEE_READONLY prefix modifies the verification semantics of an Address
+        // operation on an array type.
+        if ((clsFlags & CORINFO_FLG_ARRAY) && readonlyCall && tiRetVal.IsByRef())
+        {
+            tiRetVal.SetIsReadonlyByRef();
+        }
+
+        if (tiVerificationNeeded)
+        {
+            // We assume all calls return permanent home byrefs. If they
+            // didn't they wouldn't be verifiable. This is also covering
+            // the Address() helper for multidimensional arrays.
+            if (tiRetVal.IsByRef())
+            {
+                tiRetVal.SetIsPermanentHomeByRef();
+            }
+        }
+
+        if (call->gtOper == GT_CALL)
+        {
+            // Sometimes "call" is not a GT_CALL (if we imported an intrinsic that didn't turn into a call)
+            if (varTypeIsStruct(callRetTyp))
+            {
+                call = impFixupCallStructReturn(call, sig->retTypeClass);
+            }
+            else if (varTypeIsLong(callRetTyp))
+            {
+                call = impInitCallLongReturn(call);
+            }
+
+            if ((call->gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0)
+            {
+                assert(opts.OptEnabled(CLFLG_INLINING));
+
+                // Make the call its own tree (spill the stack if needed).
+                impAppendTree(call, (unsigned)CHECK_SPILL_ALL, impCurStmtOffs);
+
+                // TODO: Still using the widened type.
+                call = gtNewInlineCandidateReturnExpr(call, genActualType(callRetTyp));
+            }
+        }
+
+        if (!bIntrinsicImported)
+        {
+            //-------------------------------------------------------------------------
+            //
+            /* If the call is of a small type and the callee is managed, the callee will normalize the result
+                before returning.
+                However, we need to normalize small type values returned by unmanaged
+                functions (pinvoke). The pinvoke stub does the normalization, but we need to do it here
+                if we use the shorter inlined pinvoke stub. */
+
+            if (checkForSmallType && varTypeIsIntegral(callRetTyp) && genTypeSize(callRetTyp) < genTypeSize(TYP_INT))
+            {
+                call = gtNewCastNode(genActualType(callRetTyp), call, callRetTyp);
+            }
+        }
+
+        impPushOnStack(call, tiRetVal);
+    }
+
+    // VSD functions get a new call target each time we getCallInfo, so clear the cache.
+    // Also, the call info cache for CALLI instructions is largely incomplete, so clear it out.
+    // if ( (opcode == CEE_CALLI) || (callInfoCache.fetchCallInfo().kind == CORINFO_VIRTUALCALL_STUB))
+    //  callInfoCache.uncacheCallInfo();
+
+    return callRetTyp;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+bool Compiler::impMethodInfo_hasRetBuffArg(CORINFO_METHOD_INFO* methInfo)
+{
+    CorInfoType corType = methInfo->args.retType;
+
+    if ((corType == CORINFO_TYPE_VALUECLASS) || (corType == CORINFO_TYPE_REFANY))
+    {
+        // We have some kind of STRUCT being returned
+
+        structPassingKind howToReturnStruct = SPK_Unknown;
+
+        var_types returnType = getReturnTypeForStruct(methInfo->args.retTypeClass, &howToReturnStruct);
+
+        if (howToReturnStruct == SPK_ByReference)
+        {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+#ifdef DEBUG
+//
+var_types Compiler::impImportJitTestLabelMark(int numArgs)
+{
+    TestLabelAndNum tlAndN;
+    if (numArgs == 2)
+    {
+        tlAndN.m_num  = 0;
+        StackEntry se = impPopStack();
+        assert(se.seTypeInfo.GetType() == TI_INT);
+        GenTreePtr val = se.val;
+        assert(val->IsCnsIntOrI());
+        tlAndN.m_tl = (TestLabel)val->AsIntConCommon()->IconValue();
+    }
+    else if (numArgs == 3)
+    {
+        StackEntry se = impPopStack();
+        assert(se.seTypeInfo.GetType() == TI_INT);
+        GenTreePtr val = se.val;
+        assert(val->IsCnsIntOrI());
+        tlAndN.m_num = val->AsIntConCommon()->IconValue();
+        se           = impPopStack();
+        assert(se.seTypeInfo.GetType() == TI_INT);
+        val = se.val;
+        assert(val->IsCnsIntOrI());
+        tlAndN.m_tl = (TestLabel)val->AsIntConCommon()->IconValue();
+    }
+    else
+    {
+        assert(false);
+    }
+
+    StackEntry expSe = impPopStack();
+    GenTreePtr node  = expSe.val;
+
+    // There are a small number of special cases, where we actually put the annotation on a subnode.
+    if (tlAndN.m_tl == TL_LoopHoist && tlAndN.m_num >= 100)
+    {
+        // A loop hoist annotation with value >= 100 means that the expression should be a static field access,
+        // a GT_IND of a static field address, which should be the sum of a (hoistable) helper call and possibly some
+        // offset within the the static field block whose address is returned by the helper call.
+        // The annotation is saying that this address calculation, but not the entire access, should be hoisted.
+        GenTreePtr helperCall = nullptr;
+        assert(node->OperGet() == GT_IND);
+        tlAndN.m_num -= 100;
+        GetNodeTestData()->Set(node->gtOp.gtOp1, tlAndN);
+        GetNodeTestData()->Remove(node);
+    }
+    else
+    {
+        GetNodeTestData()->Set(node, tlAndN);
+    }
+
+    impPushOnStack(node, expSe.seTypeInfo);
+    return node->TypeGet();
+}
+#endif // DEBUG
+
+//-----------------------------------------------------------------------------------
+//  impFixupCallStructReturn: For a call node that returns a struct type either
+//  adjust the return type to an enregisterable type, or set the flag to indicate
+//  struct return via retbuf arg.
+//
+//  Arguments:
+//    call       -  GT_CALL GenTree node
+//    retClsHnd  -  Class handle of return type of the call
+//
+//  Return Value:
+//    Returns new GenTree node after fixing struct return of call node
+//
+GenTreePtr Compiler::impFixupCallStructReturn(GenTreePtr call, CORINFO_CLASS_HANDLE retClsHnd)
+{
+    assert(call->gtOper == GT_CALL);
+
+    if (!varTypeIsStruct(call))
+    {
+        return call;
+    }
+
+    call->gtCall.gtRetClsHnd = retClsHnd;
+
+    GenTreeCall* callNode = call->AsCall();
+
+#if FEATURE_MULTIREG_RET
+    // Initialize Return type descriptor of call node
+    ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc();
+    retTypeDesc->InitializeStructReturnType(this, retClsHnd);
+#endif // FEATURE_MULTIREG_RET
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+    // Not allowed for FEATURE_CORCLR which is the only SKU available for System V OSs.
+    assert(!callNode->IsVarargs() && "varargs not allowed for System V OSs.");
+
+    // The return type will remain as the incoming struct type unless normalized to a
+    // single eightbyte return type below.
+    callNode->gtReturnType = call->gtType;
+
+    unsigned retRegCount = retTypeDesc->GetReturnRegCount();
+    if (retRegCount != 0)
+    {
+        if (retRegCount == 1)
+        {
+            // struct returned in a single register
+            callNode->gtReturnType = retTypeDesc->GetReturnRegType(0);
+        }
+        else
+        {
+            // must be a struct returned in two registers
+            assert(retRegCount == 2);
+
+            if ((!callNode->CanTailCall()) && (!callNode->IsInlineCandidate()))
+            {
+                // Force a call returning multi-reg struct to be always of the IR form
+                //   tmp = call
+                //
+                // No need to assign a multi-reg struct to a local var if:
+                //  - It is a tail call or
+                //  - The call is marked for in-lining later
+                return impAssignMultiRegTypeToVar(call, retClsHnd);
+            }
+        }
+    }
+    else
+    {
+        // struct not returned in registers i.e returned via hiddden retbuf arg.
+        callNode->gtCallMoreFlags |= GTF_CALL_M_RETBUFFARG;
+    }
+
+#else // not FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+#if FEATURE_MULTIREG_RET && defined(_TARGET_ARM_)
+    // There is no fixup necessary if the return type is a HFA struct.
+    // HFA structs are returned in registers for ARM32 and ARM64
+    //
+    if (!call->gtCall.IsVarargs() && IsHfa(retClsHnd))
+    {
+        if (call->gtCall.CanTailCall())
+        {
+            if (info.compIsVarArgs)
+            {
+                // We cannot tail call because control needs to return to fixup the calling
+                // convention for result return.
+                call->gtCall.gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL;
+            }
+            else
+            {
+                // If we can tail call returning HFA, then don't assign it to
+                // a variable back and forth.
+                return call;
+            }
+        }
+
+        if (call->gtFlags & GTF_CALL_INLINE_CANDIDATE)
+        {
+            return call;
+        }
+
+        unsigned retRegCount = retTypeDesc->GetReturnRegCount();
+        if (retRegCount >= 2)
+        {
+            return impAssignMultiRegTypeToVar(call, retClsHnd);
+        }
+    }
+#endif // _TARGET_ARM_
+
+    // Check for TYP_STRUCT type that wraps a primitive type
+    // Such structs are returned using a single register
+    // and we change the return type on those calls here.
+    //
+    structPassingKind howToReturnStruct;
+    var_types         returnType = getReturnTypeForStruct(retClsHnd, &howToReturnStruct);
+
+    if (howToReturnStruct == SPK_ByReference)
+    {
+        assert(returnType == TYP_UNKNOWN);
+        call->gtCall.gtCallMoreFlags |= GTF_CALL_M_RETBUFFARG;
+    }
+    else
+    {
+        assert(returnType != TYP_UNKNOWN);
+        call->gtCall.gtReturnType = returnType;
+
+        // ToDo: Refactor this common code sequence into its own method as it is used 4+ times
+        if ((returnType == TYP_LONG) && (compLongUsed == false))
+        {
+            compLongUsed = true;
+        }
+        else if (((returnType == TYP_FLOAT) || (returnType == TYP_DOUBLE)) && (compFloatingPointUsed == false))
+        {
+            compFloatingPointUsed = true;
+        }
+
+#if FEATURE_MULTIREG_RET
+        unsigned retRegCount = retTypeDesc->GetReturnRegCount();
+        assert(retRegCount != 0);
+
+        if (retRegCount >= 2)
+        {
+            if ((!callNode->CanTailCall()) && (!callNode->IsInlineCandidate()))
+            {
+                // Force a call returning multi-reg struct to be always of the IR form
+                //   tmp = call
+                //
+                // No need to assign a multi-reg struct to a local var if:
+                //  - It is a tail call or
+                //  - The call is marked for in-lining later
+                return impAssignMultiRegTypeToVar(call, retClsHnd);
+            }
+        }
+#endif // FEATURE_MULTIREG_RET
+    }
+
+#endif // not FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+    return call;
+}
+
+//-------------------------------------------------------------------------------------
+//  impInitCallLongReturn:
+//     Initialize the ReturnTypDesc for a call that returns a TYP_LONG
+//
+//  Arguments:
+//    call       -  GT_CALL GenTree node
+//
+//  Return Value:
+//    Returns new GenTree node after initializing the ReturnTypeDesc of call node
+//
+GenTreePtr Compiler::impInitCallLongReturn(GenTreePtr call)
+{
+    assert(call->gtOper == GT_CALL);
+
+#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+    // LEGACY_BACKEND does not use multi reg returns for calls with long return types
+
+    if (varTypeIsLong(call))
+    {
+        GenTreeCall* callNode = call->AsCall();
+
+        // The return type will remain as the incoming long type
+        callNode->gtReturnType = call->gtType;
+
+        // Initialize Return type descriptor of call node
+        ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc();
+        retTypeDesc->InitializeLongReturnType(this);
+
+        // must be a long returned in two registers
+        assert(retTypeDesc->GetReturnRegCount() == 2);
+    }
+#endif // _TARGET_X86_ && !LEGACY_BACKEND
+
+    return call;
+}
+
+/*****************************************************************************
+   For struct return values, re-type the operand in the case where the ABI
+   does not use a struct return buffer
+   Note that this method is only call for !_TARGET_X86_
+ */
+
+GenTreePtr Compiler::impFixupStructReturnType(GenTreePtr op, CORINFO_CLASS_HANDLE retClsHnd)
+{
+    assert(varTypeIsStruct(info.compRetType));
+    assert(info.compRetBuffArg == BAD_VAR_NUM);
+
+#if defined(_TARGET_XARCH_)
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+    // No VarArgs for CoreCLR on x64 Unix
+    assert(!info.compIsVarArgs);
+
+    // Is method returning a multi-reg struct?
+    if (varTypeIsStruct(info.compRetNativeType) && IsMultiRegReturnedType(retClsHnd))
+    {
+        // In case of multi-reg struct return, we force IR to be one of the following:
+        // GT_RETURN(lclvar) or GT_RETURN(call).  If op is anything other than a
+        // lclvar or call, it is assigned to a temp to create: temp = op and GT_RETURN(tmp).
+
+        if (op->gtOper == GT_LCL_VAR)
+        {
+            // Make sure that this struct stays in memory and doesn't get promoted.
+            unsigned lclNum                  = op->gtLclVarCommon.gtLclNum;
+            lvaTable[lclNum].lvIsMultiRegRet = true;
+
+            return op;
+        }
+
+        if (op->gtOper == GT_CALL)
+        {
+            return op;
+        }
+
+        return impAssignMultiRegTypeToVar(op, retClsHnd);
+    }
+#else  // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+    assert(info.compRetNativeType != TYP_STRUCT);
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+#elif FEATURE_MULTIREG_RET && defined(_TARGET_ARM_)
+
+    if (varTypeIsStruct(info.compRetNativeType) && !info.compIsVarArgs && IsHfa(retClsHnd))
+    {
+        if (op->gtOper == GT_LCL_VAR)
+        {
+            // This LCL_VAR is an HFA return value, it stays as a TYP_STRUCT
+            unsigned lclNum = op->gtLclVarCommon.gtLclNum;
+            // Make sure this struct type stays as struct so that we can return it as an HFA
+            lvaTable[lclNum].lvIsMultiRegRet = true;
+            return op;
+        }
+
+        if (op->gtOper == GT_CALL)
+        {
+            if (op->gtCall.IsVarargs())
+            {
+                // We cannot tail call because control needs to return to fixup the calling
+                // convention for result return.
+                op->gtCall.gtCallMoreFlags &= ~GTF_CALL_M_TAILCALL;
+                op->gtCall.gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL;
+            }
+            else
+            {
+                return op;
+            }
+        }
+        return impAssignMultiRegTypeToVar(op, retClsHnd);
+    }
+
+#elif FEATURE_MULTIREG_RET && defined(_TARGET_ARM64_)
+
+    // Is method returning a multi-reg struct?
+    if (IsMultiRegReturnedType(retClsHnd))
+    {
+        if (op->gtOper == GT_LCL_VAR)
+        {
+            // This LCL_VAR stays as a TYP_STRUCT
+            unsigned lclNum = op->gtLclVarCommon.gtLclNum;
+
+            // Make sure this struct type is not struct promoted
+            lvaTable[lclNum].lvIsMultiRegRet = true;
+            return op;
+        }
+
+        if (op->gtOper == GT_CALL)
+        {
+            if (op->gtCall.IsVarargs())
+            {
+                // We cannot tail call because control needs to return to fixup the calling
+                // convention for result return.
+                op->gtCall.gtCallMoreFlags &= ~GTF_CALL_M_TAILCALL;
+                op->gtCall.gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL;
+            }
+            else
+            {
+                return op;
+            }
+        }
+        return impAssignMultiRegTypeToVar(op, retClsHnd);
+    }
+
+#endif //  FEATURE_MULTIREG_RET && FEATURE_HFA
+
+REDO_RETURN_NODE:
+    // adjust the type away from struct to integral
+    // and no normalizing
+    if (op->gtOper == GT_LCL_VAR)
+    {
+        op->ChangeOper(GT_LCL_FLD);
+    }
+    else if (op->gtOper == GT_OBJ)
+    {
+        GenTreePtr op1 = op->AsObj()->Addr();
+
+        // We will fold away OBJ/ADDR
+        // except for OBJ/ADDR/INDEX
+        //     as the array type influences the array element's offset
+        //     Later in this method we change op->gtType to info.compRetNativeType
+        //     This is not correct when op is a GT_INDEX as the starting offset
+        //     for the array elements 'elemOffs' is different for an array of
+        //     TYP_REF than an array of TYP_STRUCT (which simply wraps a TYP_REF)
+        //     Also refer to the GTF_INX_REFARR_LAYOUT flag
+        //
+        if ((op1->gtOper == GT_ADDR) && (op1->gtOp.gtOp1->gtOper != GT_INDEX))
+        {
+            // Change '*(&X)' to 'X' and see if we can do better
+            op = op1->gtOp.gtOp1;
+            goto REDO_RETURN_NODE;
+        }
+        op->gtObj.gtClass = NO_CLASS_HANDLE;
+        op->ChangeOperUnchecked(GT_IND);
+        op->gtFlags |= GTF_IND_TGTANYWHERE;
+    }
+    else if (op->gtOper == GT_CALL)
+    {
+        if (op->AsCall()->TreatAsHasRetBufArg(this))
+        {
+            // This must be one of those 'special' helpers that don't
+            // really have a return buffer, but instead use it as a way
+            // to keep the trees cleaner with fewer address-taken temps.
+            //
+            // Well now we have to materialize the the return buffer as
+            // an address-taken temp. Then we can return the temp.
+            //
+            // NOTE: this code assumes that since the call directly
+            // feeds the return, then the call must be returning the
+            // same structure/class/type.
+            //
+            unsigned tmpNum = lvaGrabTemp(true DEBUGARG("pseudo return buffer"));
+
+            // No need to spill anything as we're about to return.
+            impAssignTempGen(tmpNum, op, info.compMethodInfo->args.retTypeClass, (unsigned)CHECK_SPILL_NONE);
+
+            // Don't create both a GT_ADDR & GT_OBJ just to undo all of that; instead,
+            // jump directly to a GT_LCL_FLD.
+            op = gtNewLclvNode(tmpNum, info.compRetNativeType);
+            op->ChangeOper(GT_LCL_FLD);
+        }
+        else
+        {
+            assert(info.compRetNativeType == op->gtCall.gtReturnType);
+
+            // Don't change the gtType of the node just yet, it will get changed later.
+            return op;
+        }
+    }
+    else if (op->gtOper == GT_COMMA)
+    {
+        op->gtOp.gtOp2 = impFixupStructReturnType(op->gtOp.gtOp2, retClsHnd);
+    }
+
+    op->gtType = info.compRetNativeType;
+
+    return op;
+}
+
+/*****************************************************************************
+   CEE_LEAVE may be jumping out of a protected block, viz, a catch or a
+   finally-protected try. We find the finally blocks protecting the current
+   offset (in order) by walking over the complete exception table and
+   finding enclosing clauses. This assumes that the table is sorted.
+   This will create a series of BBJ_CALLFINALLY -> BBJ_CALLFINALLY ... -> BBJ_ALWAYS.
+
+   If we are leaving a catch handler, we need to attach the
+   CPX_ENDCATCHes to the correct BBJ_CALLFINALLY blocks.
+
+   After this function, the BBJ_LEAVE block has been converted to a different type.
+ */
+
+#if !FEATURE_EH_FUNCLETS
+
+void Compiler::impImportLeave(BasicBlock* block)
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nBefore import CEE_LEAVE:\n");
+        fgDispBasicBlocks();
+        fgDispHandlerTab();
+    }
+#endif // DEBUG
+
+    bool        invalidatePreds = false; // If we create new blocks, invalidate the predecessor lists (if created)
+    unsigned    blkAddr         = block->bbCodeOffs;
+    BasicBlock* leaveTarget     = block->bbJumpDest;
+    unsigned    jmpAddr         = leaveTarget->bbCodeOffs;
+
+    // LEAVE clears the stack, spill side effects, and set stack to 0
+
+    impSpillSideEffects(true, (unsigned)CHECK_SPILL_ALL DEBUGARG("impImportLeave"));
+    verCurrentState.esStackDepth = 0;
+
+    assert(block->bbJumpKind == BBJ_LEAVE);
+    assert(fgBBs == (BasicBlock**)0xCDCD || fgLookupBB(jmpAddr) != NULL); // should be a BB boundary
+
+    BasicBlock* step         = DUMMY_INIT(NULL);
+    unsigned    encFinallies = 0; // Number of enclosing finallies.
+    GenTreePtr  endCatches   = NULL;
+    GenTreePtr  endLFin      = NULL; // The statement tree to indicate the end of locally-invoked finally.
+
+    unsigned  XTnum;
+    EHblkDsc* HBtab;
+
+    for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+    {
+        // Grab the handler offsets
+
+        IL_OFFSET tryBeg = HBtab->ebdTryBegOffs();
+        IL_OFFSET tryEnd = HBtab->ebdTryEndOffs();
+        IL_OFFSET hndBeg = HBtab->ebdHndBegOffs();
+        IL_OFFSET hndEnd = HBtab->ebdHndEndOffs();
+
+        /* Is this a catch-handler we are CEE_LEAVEing out of?
+         * If so, we need to call CORINFO_HELP_ENDCATCH.
+         */
+
+        if (jitIsBetween(blkAddr, hndBeg, hndEnd) && !jitIsBetween(jmpAddr, hndBeg, hndEnd))
+        {
+            // Can't CEE_LEAVE out of a finally/fault handler
+            if (HBtab->HasFinallyOrFaultHandler())
+                BADCODE("leave out of fault/finally block");
+
+            // Create the call to CORINFO_HELP_ENDCATCH
+            GenTreePtr endCatch = gtNewHelperCallNode(CORINFO_HELP_ENDCATCH, TYP_VOID);
+
+            // Make a list of all the currently pending endCatches
+            if (endCatches)
+                endCatches = gtNewOperNode(GT_COMMA, TYP_VOID, endCatches, endCatch);
+            else
+                endCatches = endCatch;
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("impImportLeave - BB%02u jumping out of catch handler EH#%u, adding call to "
+                       "CORINFO_HELP_ENDCATCH\n",
+                       block->bbNum, XTnum);
+            }
+#endif
+        }
+        else if (HBtab->HasFinallyHandler() && jitIsBetween(blkAddr, tryBeg, tryEnd) &&
+                 !jitIsBetween(jmpAddr, tryBeg, tryEnd))
+        {
+            /* This is a finally-protected try we are jumping out of */
+
+            /* If there are any pending endCatches, and we have already
+               jumped out of a finally-protected try, then the endCatches
+               have to be put in a block in an outer try for async
+               exceptions to work correctly.
+               Else, just use append to the original block */
+
+            BasicBlock* callBlock;
+
+            assert(!encFinallies == !endLFin); // if we have finallies, we better have an endLFin tree, and vice-versa
+
+            if (encFinallies == 0)
+            {
+                assert(step == DUMMY_INIT(NULL));
+                callBlock             = block;
+                callBlock->bbJumpKind = BBJ_CALLFINALLY; // convert the BBJ_LEAVE to BBJ_CALLFINALLY
+
+                if (endCatches)
+                    impAppendTree(endCatches, (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
+
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("impImportLeave - jumping out of a finally-protected try, convert block to BBJ_CALLFINALLY "
+                           "block BB%02u [%08p]\n",
+                           callBlock->bbNum, dspPtr(callBlock));
+                }
+#endif
+            }
+            else
+            {
+                assert(step != DUMMY_INIT(NULL));
+
+                /* Calling the finally block */
+                callBlock = fgNewBBinRegion(BBJ_CALLFINALLY, XTnum + 1, 0, step);
+                assert(step->bbJumpKind == BBJ_ALWAYS);
+                step->bbJumpDest = callBlock; // the previous call to a finally returns to this call (to the next
+                                              // finally in the chain)
+                step->bbJumpDest->bbRefs++;
+
+                /* The new block will inherit this block's weight */
+                callBlock->setBBWeight(block->bbWeight);
+                callBlock->bbFlags |= block->bbFlags & BBF_RUN_RARELY;
+
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("impImportLeave - jumping out of a finally-protected try, new BBJ_CALLFINALLY block BB%02u "
+                           "[%08p]\n",
+                           callBlock->bbNum, dspPtr(callBlock));
+                }
+#endif
+
+                GenTreePtr lastStmt;
+
+                if (endCatches)
+                {
+                    lastStmt         = gtNewStmt(endCatches);
+                    endLFin->gtNext  = lastStmt;
+                    lastStmt->gtPrev = endLFin;
+                }
+                else
+                {
+                    lastStmt = endLFin;
+                }
+
+                // note that this sets BBF_IMPORTED on the block
+                impEndTreeList(callBlock, endLFin, lastStmt);
+            }
+
+            step = fgNewBBafter(BBJ_ALWAYS, callBlock, true);
+            /* The new block will inherit this block's weight */
+            step->setBBWeight(block->bbWeight);
+            step->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED | BBF_KEEP_BBJ_ALWAYS;
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("impImportLeave - jumping out of a finally-protected try, created step (BBJ_ALWAYS) block "
+                       "BB%02u [%08p]\n",
+                       step->bbNum, dspPtr(step));
+            }
+#endif
+
+            unsigned finallyNesting = compHndBBtab[XTnum].ebdHandlerNestingLevel;
+            assert(finallyNesting <= compHndBBtabCount);
+
+            callBlock->bbJumpDest = HBtab->ebdHndBeg; // This callBlock will call the "finally" handler.
+            endLFin               = new (this, GT_END_LFIN) GenTreeVal(GT_END_LFIN, TYP_VOID, finallyNesting);
+            endLFin               = gtNewStmt(endLFin);
+            endCatches            = NULL;
+
+            encFinallies++;
+
+            invalidatePreds = true;
+        }
+    }
+
+    /* Append any remaining endCatches, if any */
+
+    assert(!encFinallies == !endLFin);
+
+    if (encFinallies == 0)
+    {
+        assert(step == DUMMY_INIT(NULL));
+        block->bbJumpKind = BBJ_ALWAYS; // convert the BBJ_LEAVE to a BBJ_ALWAYS
+
+        if (endCatches)
+            impAppendTree(endCatches, (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("impImportLeave - no enclosing finally-protected try blocks; convert CEE_LEAVE block to BBJ_ALWAYS "
+                   "block BB%02u [%08p]\n",
+                   block->bbNum, dspPtr(block));
+        }
+#endif
+    }
+    else
+    {
+        // If leaveTarget is the start of another try block, we want to make sure that
+        // we do not insert finalStep into that try block. Hence, we find the enclosing
+        // try block.
+        unsigned tryIndex = bbFindInnermostCommonTryRegion(step, leaveTarget);
+
+        // Insert a new BB either in the try region indicated by tryIndex or
+        // the handler region indicated by leaveTarget->bbHndIndex,
+        // depending on which is the inner region.
+        BasicBlock* finalStep = fgNewBBinRegion(BBJ_ALWAYS, tryIndex, leaveTarget->bbHndIndex, step);
+        finalStep->bbFlags |= BBF_KEEP_BBJ_ALWAYS;
+        step->bbJumpDest = finalStep;
+
+        /* The new block will inherit this block's weight */
+        finalStep->setBBWeight(block->bbWeight);
+        finalStep->bbFlags |= block->bbFlags & BBF_RUN_RARELY;
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("impImportLeave - finalStep block required (encFinallies(%d) > 0), new block BB%02u [%08p]\n",
+                   encFinallies, finalStep->bbNum, dspPtr(finalStep));
+        }
+#endif
+
+        GenTreePtr lastStmt;
+
+        if (endCatches)
+        {
+            lastStmt         = gtNewStmt(endCatches);
+            endLFin->gtNext  = lastStmt;
+            lastStmt->gtPrev = endLFin;
+        }
+        else
+        {
+            lastStmt = endLFin;
+        }
+
+        impEndTreeList(finalStep, endLFin, lastStmt);
+
+        finalStep->bbJumpDest = leaveTarget; // this is the ultimate destination of the LEAVE
+
+        // Queue up the jump target for importing
+
+        impImportBlockPending(leaveTarget);
+
+        invalidatePreds = true;
+    }
+
+    if (invalidatePreds && fgComputePredsDone)
+    {
+        JITDUMP("\n**** impImportLeave - Removing preds after creating new blocks\n");
+        fgRemovePreds();
+    }
+
+#ifdef DEBUG
+    fgVerifyHandlerTab();
+
+    if (verbose)
+    {
+        printf("\nAfter import CEE_LEAVE:\n");
+        fgDispBasicBlocks();
+        fgDispHandlerTab();
+    }
+#endif // DEBUG
+}
+
+#else // FEATURE_EH_FUNCLETS
+
+void Compiler::impImportLeave(BasicBlock* block)
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nBefore import CEE_LEAVE in BB%02u (targetting BB%02u):\n", block->bbNum, block->bbJumpDest->bbNum);
+        fgDispBasicBlocks();
+        fgDispHandlerTab();
+    }
+#endif // DEBUG
+
+    bool        invalidatePreds = false; // If we create new blocks, invalidate the predecessor lists (if created)
+    unsigned    blkAddr         = block->bbCodeOffs;
+    BasicBlock* leaveTarget     = block->bbJumpDest;
+    unsigned    jmpAddr         = leaveTarget->bbCodeOffs;
+
+    // LEAVE clears the stack, spill side effects, and set stack to 0
+
+    impSpillSideEffects(true, (unsigned)CHECK_SPILL_ALL DEBUGARG("impImportLeave"));
+    verCurrentState.esStackDepth = 0;
+
+    assert(block->bbJumpKind == BBJ_LEAVE);
+    assert(fgBBs == (BasicBlock**)0xCDCD || fgLookupBB(jmpAddr) != nullptr); // should be a BB boundary
+
+    BasicBlock* step = nullptr;
+
+    enum StepType
+    {
+        // No step type; step == NULL.
+        ST_None,
+
+        // Is the step block the BBJ_ALWAYS block of a BBJ_CALLFINALLY/BBJ_ALWAYS pair?
+        // That is, is step->bbJumpDest where a finally will return to?
+        ST_FinallyReturn,
+
+        // The step block is a catch return.
+        ST_Catch,
+
+        // The step block is in a "try", created as the target for a finally return or the target for a catch return.
+        ST_Try
+    };
+    StepType stepType = ST_None;
+
+    unsigned  XTnum;
+    EHblkDsc* HBtab;
+
+    for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+    {
+        // Grab the handler offsets
+
+        IL_OFFSET tryBeg = HBtab->ebdTryBegOffs();
+        IL_OFFSET tryEnd = HBtab->ebdTryEndOffs();
+        IL_OFFSET hndBeg = HBtab->ebdHndBegOffs();
+        IL_OFFSET hndEnd = HBtab->ebdHndEndOffs();
+
+        /* Is this a catch-handler we are CEE_LEAVEing out of?
+         */
+
+        if (jitIsBetween(blkAddr, hndBeg, hndEnd) && !jitIsBetween(jmpAddr, hndBeg, hndEnd))
+        {
+            // Can't CEE_LEAVE out of a finally/fault handler
+            if (HBtab->HasFinallyOrFaultHandler())
+            {
+                BADCODE("leave out of fault/finally block");
+            }
+
+            /* We are jumping out of a catch */
+
+            if (step == nullptr)
+            {
+                step             = block;
+                step->bbJumpKind = BBJ_EHCATCHRET; // convert the BBJ_LEAVE to BBJ_EHCATCHRET
+                stepType         = ST_Catch;
+
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("impImportLeave - jumping out of a catch (EH#%u), convert block BB%02u to BBJ_EHCATCHRET "
+                           "block\n",
+                           XTnum, step->bbNum);
+                }
+#endif
+            }
+            else
+            {
+                BasicBlock* exitBlock;
+
+                /* Create a new catch exit block in the catch region for the existing step block to jump to in this
+                 * scope */
+                exitBlock = fgNewBBinRegion(BBJ_EHCATCHRET, 0, XTnum + 1, step);
+
+                assert(step->bbJumpKind == BBJ_ALWAYS || step->bbJumpKind == BBJ_EHCATCHRET);
+                step->bbJumpDest = exitBlock; // the previous step (maybe a call to a nested finally, or a nested catch
+                                              // exit) returns to this block
+                step->bbJumpDest->bbRefs++;
+
+#if defined(_TARGET_ARM_)
+                if (stepType == ST_FinallyReturn)
+                {
+                    assert(step->bbJumpKind == BBJ_ALWAYS);
+                    // Mark the target of a finally return
+                    step->bbJumpDest->bbFlags |= BBF_FINALLY_TARGET;
+                }
+#endif // defined(_TARGET_ARM_)
+
+                /* The new block will inherit this block's weight */
+                exitBlock->setBBWeight(block->bbWeight);
+                exitBlock->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED;
+
+                /* This exit block is the new step */
+                step     = exitBlock;
+                stepType = ST_Catch;
+
+                invalidatePreds = true;
+
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("impImportLeave - jumping out of a catch (EH#%u), new BBJ_EHCATCHRET block BB%02u\n", XTnum,
+                           exitBlock->bbNum);
+                }
+#endif
+            }
+        }
+        else if (HBtab->HasFinallyHandler() && jitIsBetween(blkAddr, tryBeg, tryEnd) &&
+                 !jitIsBetween(jmpAddr, tryBeg, tryEnd))
+        {
+            /* We are jumping out of a finally-protected try */
+
+            BasicBlock* callBlock;
+
+            if (step == nullptr)
+            {
+#if FEATURE_EH_CALLFINALLY_THUNKS
+
+                // Put the call to the finally in the enclosing region.
+                unsigned callFinallyTryIndex =
+                    (HBtab->ebdEnclosingTryIndex == EHblkDsc::NO_ENCLOSING_INDEX) ? 0 : HBtab->ebdEnclosingTryIndex + 1;
+                unsigned callFinallyHndIndex =
+                    (HBtab->ebdEnclosingHndIndex == EHblkDsc::NO_ENCLOSING_INDEX) ? 0 : HBtab->ebdEnclosingHndIndex + 1;
+                callBlock = fgNewBBinRegion(BBJ_CALLFINALLY, callFinallyTryIndex, callFinallyHndIndex, block);
+
+                // Convert the BBJ_LEAVE to BBJ_ALWAYS, jumping to the new BBJ_CALLFINALLY. This is because
+                // the new BBJ_CALLFINALLY is in a different EH region, thus it can't just replace the BBJ_LEAVE,
+                // which might be in the middle of the "try". In most cases, the BBJ_ALWAYS will jump to the
+                // next block, and flow optimizations will remove it.
+                block->bbJumpKind = BBJ_ALWAYS;
+                block->bbJumpDest = callBlock;
+                block->bbJumpDest->bbRefs++;
+
+                /* The new block will inherit this block's weight */
+                callBlock->setBBWeight(block->bbWeight);
+                callBlock->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED;
+
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("impImportLeave - jumping out of a finally-protected try (EH#%u), convert block BB%02u to "
+                           "BBJ_ALWAYS, add BBJ_CALLFINALLY block BB%02u\n",
+                           XTnum, block->bbNum, callBlock->bbNum);
+                }
+#endif
+
+#else // !FEATURE_EH_CALLFINALLY_THUNKS
+
+                callBlock             = block;
+                callBlock->bbJumpKind = BBJ_CALLFINALLY; // convert the BBJ_LEAVE to BBJ_CALLFINALLY
+
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("impImportLeave - jumping out of a finally-protected try (EH#%u), convert block BB%02u to "
+                           "BBJ_CALLFINALLY block\n",
+                           XTnum, callBlock->bbNum);
+                }
+#endif
+
+#endif // !FEATURE_EH_CALLFINALLY_THUNKS
+            }
+            else
+            {
+                // Calling the finally block. We already have a step block that is either the call-to-finally from a
+                // more nested try/finally (thus we are jumping out of multiple nested 'try' blocks, each protected by
+                // a 'finally'), or the step block is the return from a catch.
+                //
+                // Due to ThreadAbortException, we can't have the catch return target the call-to-finally block
+                // directly. Note that if a 'catch' ends without resetting the ThreadAbortException, the VM will
+                // automatically re-raise the exception, using the return address of the catch (that is, the target
+                // block of the BBJ_EHCATCHRET) as the re-raise address. If this address is in a finally, the VM will
+                // refuse to do the re-raise, and the ThreadAbortException will get eaten (and lost). On AMD64/ARM64,
+                // we put the call-to-finally thunk in a special "cloned finally" EH region that does look like a
+                // finally clause to the VM. Thus, on these platforms, we can't have BBJ_EHCATCHRET target a
+                // BBJ_CALLFINALLY directly. (Note that on ARM32, we don't mark the thunk specially -- it lives directly
+                // within the 'try' region protected by the finally, since we generate code in such a way that execution
+                // never returns to the call-to-finally call, and the finally-protected 'try' region doesn't appear on
+                // stack walks.)
+
+                assert(step->bbJumpKind == BBJ_ALWAYS || step->bbJumpKind == BBJ_EHCATCHRET);
+
+#if FEATURE_EH_CALLFINALLY_THUNKS
+                if (step->bbJumpKind == BBJ_EHCATCHRET)
+                {
+                    // Need to create another step block in the 'try' region that will actually branch to the
+                    // call-to-finally thunk.
+                    BasicBlock* step2 = fgNewBBinRegion(BBJ_ALWAYS, XTnum + 1, 0, step);
+                    step->bbJumpDest  = step2;
+                    step->bbJumpDest->bbRefs++;
+                    step2->setBBWeight(block->bbWeight);
+                    step2->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED;
+
+#ifdef DEBUG
+                    if (verbose)
+                    {
+                        printf("impImportLeave - jumping out of a finally-protected try (EH#%u), step block is "
+                               "BBJ_EHCATCHRET (BB%02u), new BBJ_ALWAYS step-step block BB%02u\n",
+                               XTnum, step->bbNum, step2->bbNum);
+                    }
+#endif
+
+                    step = step2;
+                    assert(stepType == ST_Catch); // Leave it as catch type for now.
+                }
+#endif // FEATURE_EH_CALLFINALLY_THUNKS
+
+#if FEATURE_EH_CALLFINALLY_THUNKS
+                unsigned callFinallyTryIndex =
+                    (HBtab->ebdEnclosingTryIndex == EHblkDsc::NO_ENCLOSING_INDEX) ? 0 : HBtab->ebdEnclosingTryIndex + 1;
+                unsigned callFinallyHndIndex =
+                    (HBtab->ebdEnclosingHndIndex == EHblkDsc::NO_ENCLOSING_INDEX) ? 0 : HBtab->ebdEnclosingHndIndex + 1;
+#else  // !FEATURE_EH_CALLFINALLY_THUNKS
+                unsigned callFinallyTryIndex = XTnum + 1;
+                unsigned callFinallyHndIndex = 0; // don't care
+#endif // !FEATURE_EH_CALLFINALLY_THUNKS
+
+                callBlock        = fgNewBBinRegion(BBJ_CALLFINALLY, callFinallyTryIndex, callFinallyHndIndex, step);
+                step->bbJumpDest = callBlock; // the previous call to a finally returns to this call (to the next
+                                              // finally in the chain)
+                step->bbJumpDest->bbRefs++;
+
+#if defined(_TARGET_ARM_)
+                if (stepType == ST_FinallyReturn)
+                {
+                    assert(step->bbJumpKind == BBJ_ALWAYS);
+                    // Mark the target of a finally return
+                    step->bbJumpDest->bbFlags |= BBF_FINALLY_TARGET;
+                }
+#endif // defined(_TARGET_ARM_)
+
+                /* The new block will inherit this block's weight */
+                callBlock->setBBWeight(block->bbWeight);
+                callBlock->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED;
+
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("impImportLeave - jumping out of a finally-protected try (EH#%u), new BBJ_CALLFINALLY block "
+                           "BB%02u\n",
+                           XTnum, callBlock->bbNum);
+                }
+#endif
+            }
+
+            step     = fgNewBBafter(BBJ_ALWAYS, callBlock, true);
+            stepType = ST_FinallyReturn;
+
+            /* The new block will inherit this block's weight */
+            step->setBBWeight(block->bbWeight);
+            step->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED | BBF_KEEP_BBJ_ALWAYS;
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("impImportLeave - jumping out of a finally-protected try (EH#%u), created step (BBJ_ALWAYS) "
+                       "block BB%02u\n",
+                       XTnum, step->bbNum);
+            }
+#endif
+
+            callBlock->bbJumpDest = HBtab->ebdHndBeg; // This callBlock will call the "finally" handler.
+
+            invalidatePreds = true;
+        }
+        else if (HBtab->HasCatchHandler() && jitIsBetween(blkAddr, tryBeg, tryEnd) &&
+                 !jitIsBetween(jmpAddr, tryBeg, tryEnd))
+        {
+            // We are jumping out of a catch-protected try.
+            //
+            // If we are returning from a call to a finally, then we must have a step block within a try
+            // that is protected by a catch. This is so when unwinding from that finally (e.g., if code within the
+            // finally raises an exception), the VM will find this step block, notice that it is in a protected region,
+            // and invoke the appropriate catch.
+            //
+            // We also need to handle a special case with the handling of ThreadAbortException. If a try/catch
+            // catches a ThreadAbortException (which might be because it catches a parent, e.g. System.Exception),
+            // and the catch doesn't call System.Threading.Thread::ResetAbort(), then when the catch returns to the VM,
+            // the VM will automatically re-raise the ThreadAbortException. When it does this, it uses the target
+            // address of the catch return as the new exception address. That is, the re-raised exception appears to
+            // occur at the catch return address. If this exception return address skips an enclosing try/catch that
+            // catches ThreadAbortException, then the enclosing try/catch will not catch the exception, as it should.
+            // For example:
+            //
+            // try {
+            //    try {
+            //       // something here raises ThreadAbortException
+            //       LEAVE LABEL_1; // no need to stop at LABEL_2
+            //    } catch (Exception) {
+            //       // This catches ThreadAbortException, but doesn't call System.Threading.Thread::ResetAbort(), so
+            //       // ThreadAbortException is re-raised by the VM at the address specified by the LEAVE opcode.
+            //       // This is bad, since it means the outer try/catch won't get a chance to catch the re-raised
+            //       // ThreadAbortException. So, instead, create step block LABEL_2 and LEAVE to that. We only
+            //       // need to do this transformation if the current EH block is a try/catch that catches
+            //       // ThreadAbortException (or one of its parents), however we might not be able to find that
+            //       // information, so currently we do it for all catch types.
+            //       LEAVE LABEL_1; // Convert this to LEAVE LABEL2;
+            //    }
+            //    LABEL_2: LEAVE LABEL_1; // inserted by this step creation code
+            // } catch (ThreadAbortException) {
+            // }
+            // LABEL_1:
+            //
+            // Note that this pattern isn't theoretical: it occurs in ASP.NET, in IL code generated by the Roslyn C#
+            // compiler.
+
+            if ((stepType == ST_FinallyReturn) || (stepType == ST_Catch))
+            {
+                BasicBlock* catchStep;
+
+                assert(step);
+
+                if (stepType == ST_FinallyReturn)
+                {
+                    assert(step->bbJumpKind == BBJ_ALWAYS);
+                }
+                else
+                {
+                    assert(stepType == ST_Catch);
+                    assert(step->bbJumpKind == BBJ_EHCATCHRET);
+                }
+
+                /* Create a new exit block in the try region for the existing step block to jump to in this scope */
+                catchStep        = fgNewBBinRegion(BBJ_ALWAYS, XTnum + 1, 0, step);
+                step->bbJumpDest = catchStep;
+                step->bbJumpDest->bbRefs++;
+
+#if defined(_TARGET_ARM_)
+                if (stepType == ST_FinallyReturn)
+                {
+                    // Mark the target of a finally return
+                    step->bbJumpDest->bbFlags |= BBF_FINALLY_TARGET;
+                }
+#endif // defined(_TARGET_ARM_)
+
+                /* The new block will inherit this block's weight */
+                catchStep->setBBWeight(block->bbWeight);
+                catchStep->bbFlags |= (block->bbFlags & BBF_RUN_RARELY) | BBF_IMPORTED;
+
+#ifdef DEBUG
+                if (verbose)
+                {
+                    if (stepType == ST_FinallyReturn)
+                    {
+                        printf("impImportLeave - return from finally jumping out of a catch-protected try (EH#%u), new "
+                               "BBJ_ALWAYS block BB%02u\n",
+                               XTnum, catchStep->bbNum);
+                    }
+                    else
+                    {
+                        assert(stepType == ST_Catch);
+                        printf("impImportLeave - return from catch jumping out of a catch-protected try (EH#%u), new "
+                               "BBJ_ALWAYS block BB%02u\n",
+                               XTnum, catchStep->bbNum);
+                    }
+                }
+#endif // DEBUG
+
+                /* This block is the new step */
+                step     = catchStep;
+                stepType = ST_Try;
+
+                invalidatePreds = true;
+            }
+        }
+    }
+
+    if (step == nullptr)
+    {
+        block->bbJumpKind = BBJ_ALWAYS; // convert the BBJ_LEAVE to a BBJ_ALWAYS
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("impImportLeave - no enclosing finally-protected try blocks or catch handlers; convert CEE_LEAVE "
+                   "block BB%02u to BBJ_ALWAYS\n",
+                   block->bbNum);
+        }
+#endif
+    }
+    else
+    {
+        step->bbJumpDest = leaveTarget; // this is the ultimate destination of the LEAVE
+
+#if defined(_TARGET_ARM_)
+        if (stepType == ST_FinallyReturn)
+        {
+            assert(step->bbJumpKind == BBJ_ALWAYS);
+            // Mark the target of a finally return
+            step->bbJumpDest->bbFlags |= BBF_FINALLY_TARGET;
+        }
+#endif // defined(_TARGET_ARM_)
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("impImportLeave - final destination of step blocks set to BB%02u\n", leaveTarget->bbNum);
+        }
+#endif
+
+        // Queue up the jump target for importing
+
+        impImportBlockPending(leaveTarget);
+    }
+
+    if (invalidatePreds && fgComputePredsDone)
+    {
+        JITDUMP("\n**** impImportLeave - Removing preds after creating new blocks\n");
+        fgRemovePreds();
+    }
+
+#ifdef DEBUG
+    fgVerifyHandlerTab();
+
+    if (verbose)
+    {
+        printf("\nAfter import CEE_LEAVE:\n");
+        fgDispBasicBlocks();
+        fgDispHandlerTab();
+    }
+#endif // DEBUG
+}
+
+#endif // FEATURE_EH_FUNCLETS
+
+/*****************************************************************************/
+// This is called when reimporting a leave block. It resets the JumpKind,
+// JumpDest, and bbNext to the original values
+
+void Compiler::impResetLeaveBlock(BasicBlock* block, unsigned jmpAddr)
+{
+#if FEATURE_EH_FUNCLETS
+    // With EH Funclets, while importing leave opcode we create another block ending with BBJ_ALWAYS (call it B1)
+    // and the block containing leave (say B0) is marked as BBJ_CALLFINALLY.   Say for some reason we reimport B0,
+    // it is reset (in this routine) by marking as ending with BBJ_LEAVE and further down when B0 is reimported, we
+    // create another BBJ_ALWAYS (call it B2). In this process B1 gets orphaned and any blocks to which B1 is the
+    // only predecessor are also considered orphans and attempted to be deleted.
+    //
+    //  try  {
+    //     ....
+    //     try
+    //     {
+    //         ....
+    //         leave OUTSIDE;  // B0 is the block containing this leave, following this would be B1
+    //     } finally { }
+    //  } finally { }
+    //  OUTSIDE:
+    //
+    // In the above nested try-finally example, we create a step block (call it Bstep) which in branches to a block
+    // where a finally would branch to (and such block is marked as finally target).  Block B1 branches to step block.
+    // Because of re-import of B0, Bstep is also orphaned. Since Bstep is a finally target it cannot be removed.  To
+    // work around this we will duplicate B0 (call it B0Dup) before reseting. B0Dup is marked as BBJ_CALLFINALLY and
+    // only serves to pair up with B1 (BBJ_ALWAYS) that got orphaned. Now during orphan block deletion B0Dup and B1
+    // will be treated as pair and handled correctly.
+    if (block->bbJumpKind == BBJ_CALLFINALLY)
+    {
+        BasicBlock* dupBlock = bbNewBasicBlock(block->bbJumpKind);
+        dupBlock->bbFlags    = block->bbFlags;
+        dupBlock->bbJumpDest = block->bbJumpDest;
+        dupBlock->copyEHRegion(block);
+        dupBlock->bbCatchTyp = block->bbCatchTyp;
+
+        // Mark this block as
+        //  a) not referenced by any other block to make sure that it gets deleted
+        //  b) weight zero
+        //  c) prevent from being imported
+        //  d) as internal
+        //  e) as rarely run
+        dupBlock->bbRefs   = 0;
+        dupBlock->bbWeight = 0;
+        dupBlock->bbFlags |= BBF_IMPORTED | BBF_INTERNAL | BBF_RUN_RARELY;
+
+        // Insert the block right after the block which is getting reset so that BBJ_CALLFINALLY and BBJ_ALWAYS
+        // will be next to each other.
+        fgInsertBBafter(block, dupBlock);
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("New Basic Block BB%02u duplicate of BB%02u created.\n", dupBlock->bbNum, block->bbNum);
+        }
+#endif
+    }
+#endif // FEATURE_EH_FUNCLETS
+
+    block->bbJumpKind = BBJ_LEAVE;
+    fgInitBBLookup();
+    block->bbJumpDest = fgLookupBB(jmpAddr);
+
+    // We will leave the BBJ_ALWAYS block we introduced. When it's reimported
+    // the BBJ_ALWAYS block will be unreachable, and will be removed after. The
+    // reason we don't want to remove the block at this point is that if we call
+    // fgInitBBLookup() again we will do it wrong as the BBJ_ALWAYS block won't be
+    // added and the linked list length will be different than fgBBcount.
+}
+
+/*****************************************************************************/
+// Get the first non-prefix opcode. Used for verification of valid combinations
+// of prefixes and actual opcodes.
+
+static OPCODE impGetNonPrefixOpcode(const BYTE* codeAddr, const BYTE* codeEndp)
+{
+    while (codeAddr < codeEndp)
+    {
+        OPCODE opcode = (OPCODE)getU1LittleEndian(codeAddr);
+        codeAddr += sizeof(__int8);
+
+        if (opcode == CEE_PREFIX1)
+        {
+            if (codeAddr >= codeEndp)
+            {
+                break;
+            }
+            opcode = (OPCODE)(getU1LittleEndian(codeAddr) + 256);
+            codeAddr += sizeof(__int8);
+        }
+
+        switch (opcode)
+        {
+            case CEE_UNALIGNED:
+            case CEE_VOLATILE:
+            case CEE_TAILCALL:
+            case CEE_CONSTRAINED:
+            case CEE_READONLY:
+                break;
+            default:
+                return opcode;
+        }
+
+        codeAddr += opcodeSizes[opcode];
+    }
+
+    return CEE_ILLEGAL;
+}
+
+/*****************************************************************************/
+// Checks whether the opcode is a valid opcode for volatile. and unaligned. prefixes
+
+static void impValidateMemoryAccessOpcode(const BYTE* codeAddr, const BYTE* codeEndp, bool volatilePrefix)
+{
+    OPCODE opcode = impGetNonPrefixOpcode(codeAddr, codeEndp);
+
+    if (!(
+            // Opcode of all ldind and stdind happen to be in continuous, except stind.i.
+            ((CEE_LDIND_I1 <= opcode) && (opcode <= CEE_STIND_R8)) || (opcode == CEE_STIND_I) ||
+            (opcode == CEE_LDFLD) || (opcode == CEE_STFLD) || (opcode == CEE_LDOBJ) || (opcode == CEE_STOBJ) ||
+            (opcode == CEE_INITBLK) || (opcode == CEE_CPBLK) ||
+            // volatile. prefix is allowed with the ldsfld and stsfld
+            (volatilePrefix && ((opcode == CEE_LDSFLD) || (opcode == CEE_STSFLD)))))
+    {
+        BADCODE("Invalid opcode for unaligned. or volatile. prefix");
+    }
+}
+
+/*****************************************************************************/
+
+#ifdef DEBUG
+
+#undef RETURN // undef contracts RETURN macro
+
+enum controlFlow_t
+{
+    NEXT,
+    CALL,
+    RETURN,
+    THROW,
+    BRANCH,
+    COND_BRANCH,
+    BREAK,
+    PHI,
+    META,
+};
+
+const static controlFlow_t controlFlow[] = {
+#define OPDEF(c, s, pop, push, args, type, l, s1, s2, flow) flow,
+#include "opcode.def"
+#undef OPDEF
+};
+
+#endif // DEBUG
+
+/*****************************************************************************
+ *  Determine the result type of an arithemetic operation
+ *  On 64-bit inserts upcasts when native int is mixed with int32
+ */
+var_types Compiler::impGetByRefResultType(genTreeOps oper, bool fUnsigned, GenTreePtr* pOp1, GenTreePtr* pOp2)
+{
+    var_types  type = TYP_UNDEF;
+    GenTreePtr op1 = *pOp1, op2 = *pOp2;
+
+    // Arithemetic operations are generally only allowed with
+    // primitive types, but certain operations are allowed
+    // with byrefs
+
+    if ((oper == GT_SUB) && (genActualType(op1->TypeGet()) == TYP_BYREF || genActualType(op2->TypeGet()) == TYP_BYREF))
+    {
+        if ((genActualType(op1->TypeGet()) == TYP_BYREF) && (genActualType(op2->TypeGet()) == TYP_BYREF))
+        {
+            // byref1-byref2 => gives a native int
+            type = TYP_I_IMPL;
+        }
+        else if (genActualTypeIsIntOrI(op1->TypeGet()) && (genActualType(op2->TypeGet()) == TYP_BYREF))
+        {
+            // [native] int - byref => gives a native int
+
+            //
+            // The reason is that it is possible, in managed C++,
+            // to have a tree like this:
+            //
+            //              -
+            //             / \
+            //            /   \
+            //           /     \
+            //          /       \
+            // const(h) int     addr byref
+            //
+            // <BUGNUM> VSW 318822 </BUGNUM>
+            //
+            // So here we decide to make the resulting type to be a native int.
+            CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_64BIT_
+            if (genActualType(op1->TypeGet()) != TYP_I_IMPL)
+            {
+                // insert an explicit upcast
+                op1 = *pOp1 = gtNewCastNode(TYP_I_IMPL, op1, (var_types)(fUnsigned ? TYP_U_IMPL : TYP_I_IMPL));
+            }
+#endif // _TARGET_64BIT_
+
+            type = TYP_I_IMPL;
+        }
+        else
+        {
+            // byref - [native] int => gives a byref
+            assert(genActualType(op1->TypeGet()) == TYP_BYREF && genActualTypeIsIntOrI(op2->TypeGet()));
+
+#ifdef _TARGET_64BIT_
+            if ((genActualType(op2->TypeGet()) != TYP_I_IMPL))
+            {
+                // insert an explicit upcast
+                op2 = *pOp2 = gtNewCastNode(TYP_I_IMPL, op2, (var_types)(fUnsigned ? TYP_U_IMPL : TYP_I_IMPL));
+            }
+#endif // _TARGET_64BIT_
+
+            type = TYP_BYREF;
+        }
+    }
+    else if ((oper == GT_ADD) &&
+             (genActualType(op1->TypeGet()) == TYP_BYREF || genActualType(op2->TypeGet()) == TYP_BYREF))
+    {
+        // byref + [native] int => gives a byref
+        // (or)
+        // [native] int + byref => gives a byref
+
+        // only one can be a byref : byref op byref not allowed
+        assert(genActualType(op1->TypeGet()) != TYP_BYREF || genActualType(op2->TypeGet()) != TYP_BYREF);
+        assert(genActualTypeIsIntOrI(op1->TypeGet()) || genActualTypeIsIntOrI(op2->TypeGet()));
+
+#ifdef _TARGET_64BIT_
+        if (genActualType(op2->TypeGet()) == TYP_BYREF)
+        {
+            if (genActualType(op1->TypeGet()) != TYP_I_IMPL)
+            {
+                // insert an explicit upcast
+                op1 = *pOp1 = gtNewCastNode(TYP_I_IMPL, op1, (var_types)(fUnsigned ? TYP_U_IMPL : TYP_I_IMPL));
+            }
+        }
+        else if (genActualType(op2->TypeGet()) != TYP_I_IMPL)
+        {
+            // insert an explicit upcast
+            op2 = *pOp2 = gtNewCastNode(TYP_I_IMPL, op2, (var_types)(fUnsigned ? TYP_U_IMPL : TYP_I_IMPL));
+        }
+#endif // _TARGET_64BIT_
+
+        type = TYP_BYREF;
+    }
+#ifdef _TARGET_64BIT_
+    else if (genActualType(op1->TypeGet()) == TYP_I_IMPL || genActualType(op2->TypeGet()) == TYP_I_IMPL)
+    {
+        assert(!varTypeIsFloating(op1->gtType) && !varTypeIsFloating(op2->gtType));
+
+        // int + long => gives long
+        // long + int => gives long
+        // we get this because in the IL the long isn't Int64, it's just IntPtr
+
+        if (genActualType(op1->TypeGet()) != TYP_I_IMPL)
+        {
+            // insert an explicit upcast
+            op1 = *pOp1 = gtNewCastNode(TYP_I_IMPL, op1, (var_types)(fUnsigned ? TYP_U_IMPL : TYP_I_IMPL));
+        }
+        else if (genActualType(op2->TypeGet()) != TYP_I_IMPL)
+        {
+            // insert an explicit upcast
+            op2 = *pOp2 = gtNewCastNode(TYP_I_IMPL, op2, (var_types)(fUnsigned ? TYP_U_IMPL : TYP_I_IMPL));
+        }
+
+        type = TYP_I_IMPL;
+    }
+#else  // 32-bit TARGET
+    else if (genActualType(op1->TypeGet()) == TYP_LONG || genActualType(op2->TypeGet()) == TYP_LONG)
+    {
+        assert(!varTypeIsFloating(op1->gtType) && !varTypeIsFloating(op2->gtType));
+
+        // int + long => gives long
+        // long + int => gives long
+
+        type = TYP_LONG;
+    }
+#endif // _TARGET_64BIT_
+    else
+    {
+        // int + int => gives an int
+        assert(genActualType(op1->TypeGet()) != TYP_BYREF && genActualType(op2->TypeGet()) != TYP_BYREF);
+
+        assert(genActualType(op1->TypeGet()) == genActualType(op2->TypeGet()) ||
+               varTypeIsFloating(op1->gtType) && varTypeIsFloating(op2->gtType));
+
+        type = genActualType(op1->gtType);
+
+#if FEATURE_X87_DOUBLES
+
+        // For x87, since we only have 1 size of registers, prefer double
+        // For everybody else, be more precise
+        if (type == TYP_FLOAT)
+            type = TYP_DOUBLE;
+
+#else // !FEATURE_X87_DOUBLES
+
+        // If both operands are TYP_FLOAT, then leave it as TYP_FLOAT.
+        // Otherwise, turn floats into doubles
+        if ((type == TYP_FLOAT) && (genActualType(op2->gtType) != TYP_FLOAT))
+        {
+            assert(genActualType(op2->gtType) == TYP_DOUBLE);
+            type = TYP_DOUBLE;
+        }
+
+#endif // FEATURE_X87_DOUBLES
+    }
+
+#if FEATURE_X87_DOUBLES
+    assert(type == TYP_BYREF || type == TYP_DOUBLE || type == TYP_LONG || type == TYP_INT);
+#else  // FEATURE_X87_DOUBLES
+    assert(type == TYP_BYREF || type == TYP_DOUBLE || type == TYP_FLOAT || type == TYP_LONG || type == TYP_INT);
+#endif // FEATURE_X87_DOUBLES
+
+    return type;
+}
+
+/*****************************************************************************
+ * Casting Helper Function to service both CEE_CASTCLASS and CEE_ISINST
+ *
+ * typeRef contains the token, op1 to contain the value being cast,
+ * and op2 to contain code that creates the type handle corresponding to typeRef
+ * isCastClass = true means CEE_CASTCLASS, false means CEE_ISINST
+ */
+GenTreePtr Compiler::impCastClassOrIsInstToTree(GenTreePtr              op1,
+                                                GenTreePtr              op2,
+                                                CORINFO_RESOLVED_TOKEN* pResolvedToken,
+                                                bool                    isCastClass)
+{
+    bool expandInline;
+
+    assert(op1->TypeGet() == TYP_REF);
+
+    CorInfoHelpFunc helper = info.compCompHnd->getCastingHelper(pResolvedToken, isCastClass);
+
+    if (isCastClass)
+    {
+        // We only want to expand inline the normal CHKCASTCLASS helper;
+        expandInline = (helper == CORINFO_HELP_CHKCASTCLASS);
+    }
+    else
+    {
+        if (helper == CORINFO_HELP_ISINSTANCEOFCLASS)
+        {
+            // Get the Class Handle abd class attributes for the type we are casting to
+            //
+            DWORD flags = info.compCompHnd->getClassAttribs(pResolvedToken->hClass);
+
+            //
+            // If the class handle is marked as final we can also expand the IsInst check inline
+            //
+            expandInline = ((flags & CORINFO_FLG_FINAL) != 0);
+
+            //
+            // But don't expand inline these two cases
+            //
+            if (flags & CORINFO_FLG_MARSHAL_BYREF)
+            {
+                expandInline = false;
+            }
+            else if (flags & CORINFO_FLG_CONTEXTFUL)
+            {
+                expandInline = false;
+            }
+        }
+        else
+        {
+            //
+            // We can't expand inline any other helpers
+            //
+            expandInline = false;
+        }
+    }
+
+    if (expandInline)
+    {
+        if (compCurBB->isRunRarely())
+        {
+            expandInline = false; // not worth the code expansion in a rarely run block
+        }
+
+        if ((op1->gtFlags & GTF_GLOB_EFFECT) && lvaHaveManyLocals())
+        {
+            expandInline = false; // not worth creating an untracked local variable
+        }
+    }
+
+    if (!expandInline)
+    {
+        // If we CSE this class handle we prevent assertionProp from making SubType assertions
+        // so instead we force the CSE logic to not consider CSE-ing this class handle.
+        //
+        op2->gtFlags |= GTF_DONT_CSE;
+
+        return gtNewHelperCallNode(helper, TYP_REF, 0, gtNewArgList(op2, op1));
+    }
+
+    impSpillSideEffects(true, CHECK_SPILL_ALL DEBUGARG("bubbling QMark2"));
+
+    GenTreePtr temp;
+    GenTreePtr condMT;
+    //
+    // expand the methodtable match:
+    //
+    //  condMT ==>   GT_NE
+    //               /    \
+    //           GT_IND   op2 (typically CNS_INT)
+    //              |
+    //           op1Copy
+    //
+
+    // This can replace op1 with a GT_COMMA that evaluates op1 into a local
+    //
+    op1 = impCloneExpr(op1, &temp, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL, nullptr DEBUGARG("CASTCLASS eval op1"));
+    //
+    // op1 is now known to be a non-complex tree
+    // thus we can use gtClone(op1) from now on
+    //
+
+    GenTreePtr op2Var = op2;
+    if (isCastClass)
+    {
+        op2Var                                                  = fgInsertCommaFormTemp(&op2);
+        lvaTable[op2Var->AsLclVarCommon()->GetLclNum()].lvIsCSE = true;
+    }
+    temp = gtNewOperNode(GT_IND, TYP_I_IMPL, temp);
+    temp->gtFlags |= GTF_EXCEPT;
+    condMT = gtNewOperNode(GT_NE, TYP_INT, temp, op2);
+
+    GenTreePtr condNull;
+    //
+    // expand the null check:
+    //
+    //  condNull ==>   GT_EQ
+    //                 /    \
+    //             op1Copy CNS_INT
+    //                      null
+    //
+    condNull = gtNewOperNode(GT_EQ, TYP_INT, gtClone(op1), gtNewIconNode(0, TYP_REF));
+
+    //
+    // expand the true and false trees for the condMT
+    //
+    GenTreePtr condFalse = gtClone(op1);
+    GenTreePtr condTrue;
+    if (isCastClass)
+    {
+        //
+        // use the special helper that skips the cases checked by our inlined cast
+        //
+        helper = CORINFO_HELP_CHKCASTCLASS_SPECIAL;
+
+        condTrue = gtNewHelperCallNode(helper, TYP_REF, 0, gtNewArgList(op2Var, gtClone(op1)));
+    }
+    else
+    {
+        condTrue = gtNewIconNode(0, TYP_REF);
+    }
+
+#define USE_QMARK_TREES
+
+#ifdef USE_QMARK_TREES
+    GenTreePtr qmarkMT;
+    //
+    // Generate first QMARK - COLON tree
+    //
+    //  qmarkMT ==>   GT_QMARK
+    //                 /     \
+    //            condMT   GT_COLON
+    //                      /     \
+    //                condFalse  condTrue
+    //
+    temp    = new (this, GT_COLON) GenTreeColon(TYP_REF, condTrue, condFalse);
+    qmarkMT = gtNewQmarkNode(TYP_REF, condMT, temp);
+    condMT->gtFlags |= GTF_RELOP_QMARK;
+
+    GenTreePtr qmarkNull;
+    //
+    // Generate second QMARK - COLON tree
+    //
+    //  qmarkNull ==>  GT_QMARK
+    //                 /     \
+    //           condNull  GT_COLON
+    //                      /     \
+    //                qmarkMT   op1Copy
+    //
+    temp      = new (this, GT_COLON) GenTreeColon(TYP_REF, gtClone(op1), qmarkMT);
+    qmarkNull = gtNewQmarkNode(TYP_REF, condNull, temp);
+    qmarkNull->gtFlags |= GTF_QMARK_CAST_INSTOF;
+    condNull->gtFlags |= GTF_RELOP_QMARK;
+
+    // Make QMark node a top level node by spilling it.
+    unsigned tmp = lvaGrabTemp(true DEBUGARG("spilling QMark2"));
+    impAssignTempGen(tmp, qmarkNull, (unsigned)CHECK_SPILL_NONE);
+    return gtNewLclvNode(tmp, TYP_REF);
+#endif
+}
+
+#ifndef DEBUG
+#define assertImp(cond) ((void)0)
+#else
+#define assertImp(cond)                                                                                                \
+    do                                                                                                                 \
+    {                                                                                                                  \
+        if (!(cond))                                                                                                   \
+        {                                                                                                              \
+            const int cchAssertImpBuf = 600;                                                                           \
+            char*     assertImpBuf    = (char*)alloca(cchAssertImpBuf);                                                \
+            _snprintf_s(assertImpBuf, cchAssertImpBuf, cchAssertImpBuf - 1,                                            \
+                        "%s : Possibly bad IL with CEE_%s at offset %04Xh (op1=%s op2=%s stkDepth=%d)", #cond,         \
+                        impCurOpcName, impCurOpcOffs, op1 ? varTypeName(op1->TypeGet()) : "NULL",                      \
+                        op2 ? varTypeName(op2->TypeGet()) : "NULL", verCurrentState.esStackDepth);                     \
+            assertAbort(assertImpBuf, __FILE__, __LINE__);                                                             \
+        }                                                                                                              \
+    } while (0)
+#endif // DEBUG
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+/*****************************************************************************
+ *  Import the instr for the given basic block
+ */
+void Compiler::impImportBlockCode(BasicBlock* block)
+{
+#define _impResolveToken(kind) impResolveToken(codeAddr, &resolvedToken, kind)
+
+#ifdef DEBUG
+
+    if (verbose)
+    {
+        printf("\nImporting BB%02u (PC=%03u) of '%s'", block->bbNum, block->bbCodeOffs, info.compFullName);
+    }
+#endif
+
+    unsigned  nxtStmtIndex = impInitBlockLineInfo();
+    IL_OFFSET nxtStmtOffs;
+
+    GenTreePtr                   arrayNodeFrom, arrayNodeTo, arrayNodeToIndex;
+    bool                         expandInline;
+    CorInfoHelpFunc              helper;
+    CorInfoIsAccessAllowedResult accessAllowedResult;
+    CORINFO_HELPER_DESC          calloutHelper;
+    const BYTE*                  lastLoadToken = nullptr;
+
+    // reject cyclic constraints
+    if (tiVerificationNeeded)
+    {
+        Verify(!info.hasCircularClassConstraints, "Method parent has circular class type parameter constraints.");
+        Verify(!info.hasCircularMethodConstraints, "Method has circular method type parameter constraints.");
+    }
+
+    /* Get the tree list started */
+
+    impBeginTreeList();
+
+    /* Walk the opcodes that comprise the basic block */
+
+    const BYTE* codeAddr = info.compCode + block->bbCodeOffs;
+    const BYTE* codeEndp = info.compCode + block->bbCodeOffsEnd;
+
+    IL_OFFSET opcodeOffs    = block->bbCodeOffs;
+    IL_OFFSET lastSpillOffs = opcodeOffs;
+
+    signed jmpDist;
+
+    /* remember the start of the delegate creation sequence (used for verification) */
+    const BYTE* delegateCreateStart = nullptr;
+
+    int  prefixFlags = 0;
+    bool explicitTailCall, constraintCall, readonlyCall;
+
+    bool     insertLdloc = false; // set by CEE_DUP and cleared by following store
+    typeInfo tiRetVal;
+
+    unsigned numArgs = info.compArgsCount;
+
+    /* Now process all the opcodes in the block */
+
+    var_types callTyp    = TYP_COUNT;
+    OPCODE    prevOpcode = CEE_ILLEGAL;
+
+    if (block->bbCatchTyp)
+    {
+        if (info.compStmtOffsetsImplicit & ICorDebugInfo::CALL_SITE_BOUNDARIES)
+        {
+            impCurStmtOffsSet(block->bbCodeOffs);
+        }
+
+        // We will spill the GT_CATCH_ARG and the input of the BB_QMARK block
+        // to a temp. This is a trade off for code simplicity
+        impSpillSpecialSideEff();
+    }
+
+    while (codeAddr < codeEndp)
+    {
+        bool                   usingReadyToRunHelper = false;
+        CORINFO_RESOLVED_TOKEN resolvedToken;
+        CORINFO_RESOLVED_TOKEN constrainedResolvedToken;
+        CORINFO_CALL_INFO      callInfo;
+        CORINFO_FIELD_INFO     fieldInfo;
+
+        tiRetVal = typeInfo(); // Default type info
+
+        //---------------------------------------------------------------------
+
+        /* We need to restrict the max tree depth as many of the Compiler
+           functions are recursive. We do this by spilling the stack */
+
+        if (verCurrentState.esStackDepth)
+        {
+            /* Has it been a while since we last saw a non-empty stack (which
+               guarantees that the tree depth isnt accumulating. */
+
+            if ((opcodeOffs - lastSpillOffs) > 200)
+            {
+                impSpillStackEnsure();
+                lastSpillOffs = opcodeOffs;
+            }
+        }
+        else
+        {
+            lastSpillOffs   = opcodeOffs;
+            impBoxTempInUse = false; // nothing on the stack, box temp OK to use again
+        }
+
+        /* Compute the current instr offset */
+
+        opcodeOffs = (IL_OFFSET)(codeAddr - info.compCode);
+
+#if defined(DEBUGGING_SUPPORT) || defined(DEBUG)
+
+#ifndef DEBUG
+        if (opts.compDbgInfo)
+#endif
+        {
+            if (!compIsForInlining())
+            {
+                nxtStmtOffs =
+                    (nxtStmtIndex < info.compStmtOffsetsCount) ? info.compStmtOffsets[nxtStmtIndex] : BAD_IL_OFFSET;
+
+                /* Have we reached the next stmt boundary ? */
+
+                if (nxtStmtOffs != BAD_IL_OFFSET && opcodeOffs >= nxtStmtOffs)
+                {
+                    assert(nxtStmtOffs == info.compStmtOffsets[nxtStmtIndex]);
+
+                    if (verCurrentState.esStackDepth != 0 && opts.compDbgCode)
+                    {
+                        /* We need to provide accurate IP-mapping at this point.
+                           So spill anything on the stack so that it will form
+                           gtStmts with the correct stmt offset noted */
+
+                        impSpillStackEnsure(true);
+                    }
+
+                    // Has impCurStmtOffs been reported in any tree?
+
+                    if (impCurStmtOffs != BAD_IL_OFFSET && opts.compDbgCode)
+                    {
+                        GenTreePtr placeHolder = new (this, GT_NO_OP) GenTree(GT_NO_OP, TYP_VOID);
+                        impAppendTree(placeHolder, (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
+
+                        assert(impCurStmtOffs == BAD_IL_OFFSET);
+                    }
+
+                    if (impCurStmtOffs == BAD_IL_OFFSET)
+                    {
+                        /* Make sure that nxtStmtIndex is in sync with opcodeOffs.
+                           If opcodeOffs has gone past nxtStmtIndex, catch up */
+
+                        while ((nxtStmtIndex + 1) < info.compStmtOffsetsCount &&
+                               info.compStmtOffsets[nxtStmtIndex + 1] <= opcodeOffs)
+                        {
+                            nxtStmtIndex++;
+                        }
+
+                        /* Go to the new stmt */
+
+                        impCurStmtOffsSet(info.compStmtOffsets[nxtStmtIndex]);
+
+                        /* Update the stmt boundary index */
+
+                        nxtStmtIndex++;
+                        assert(nxtStmtIndex <= info.compStmtOffsetsCount);
+
+                        /* Are there any more line# entries after this one? */
+
+                        if (nxtStmtIndex < info.compStmtOffsetsCount)
+                        {
+                            /* Remember where the next line# starts */
+
+                            nxtStmtOffs = info.compStmtOffsets[nxtStmtIndex];
+                        }
+                        else
+                        {
+                            /* No more line# entries */
+
+                            nxtStmtOffs = BAD_IL_OFFSET;
+                        }
+                    }
+                }
+                else if ((info.compStmtOffsetsImplicit & ICorDebugInfo::STACK_EMPTY_BOUNDARIES) &&
+                         (verCurrentState.esStackDepth == 0))
+                {
+                    /* At stack-empty locations, we have already added the tree to
+                       the stmt list with the last offset. We just need to update
+                       impCurStmtOffs
+                     */
+
+                    impCurStmtOffsSet(opcodeOffs);
+                }
+                else if ((info.compStmtOffsetsImplicit & ICorDebugInfo::CALL_SITE_BOUNDARIES) &&
+                         impOpcodeIsCallSiteBoundary(prevOpcode))
+                {
+                    /* Make sure we have a type cached */
+                    assert(callTyp != TYP_COUNT);
+
+                    if (callTyp == TYP_VOID)
+                    {
+                        impCurStmtOffsSet(opcodeOffs);
+                    }
+                    else if (opts.compDbgCode)
+                    {
+                        impSpillStackEnsure(true);
+                        impCurStmtOffsSet(opcodeOffs);
+                    }
+                }
+                else if ((info.compStmtOffsetsImplicit & ICorDebugInfo::NOP_BOUNDARIES) && (prevOpcode == CEE_NOP))
+                {
+                    if (opts.compDbgCode)
+                    {
+                        impSpillStackEnsure(true);
+                    }
+
+                    impCurStmtOffsSet(opcodeOffs);
+                }
+
+                assert(impCurStmtOffs == BAD_IL_OFFSET || nxtStmtOffs == BAD_IL_OFFSET ||
+                       jitGetILoffs(impCurStmtOffs) <= nxtStmtOffs);
+            }
+        }
+
+#endif // defined(DEBUGGING_SUPPORT) || defined(DEBUG)
+
+        CORINFO_CLASS_HANDLE clsHnd       = DUMMY_INIT(NULL);
+        CORINFO_CLASS_HANDLE ldelemClsHnd = DUMMY_INIT(NULL);
+        CORINFO_CLASS_HANDLE stelemClsHnd = DUMMY_INIT(NULL);
+
+        var_types       lclTyp, ovflType = TYP_UNKNOWN;
+        GenTreePtr      op1           = DUMMY_INIT(NULL);
+        GenTreePtr      op2           = DUMMY_INIT(NULL);
+        GenTreeArgList* args          = nullptr; // What good do these "DUMMY_INIT"s do?
+        GenTreePtr      newObjThisPtr = DUMMY_INIT(NULL);
+        bool            uns           = DUMMY_INIT(false);
+
+        /* Get the next opcode and the size of its parameters */
+
+        OPCODE opcode = (OPCODE)getU1LittleEndian(codeAddr);
+        codeAddr += sizeof(__int8);
+
+#ifdef DEBUG
+        impCurOpcOffs = (IL_OFFSET)(codeAddr - info.compCode - 1);
+        JITDUMP("\n    [%2u] %3u (0x%03x) ", verCurrentState.esStackDepth, impCurOpcOffs, impCurOpcOffs);
+#endif
+
+    DECODE_OPCODE:
+
+        // Return if any previous code has caused inline to fail.
+        if (compDonotInline())
+        {
+            return;
+        }
+
+        /* Get the size of additional parameters */
+
+        signed int sz = opcodeSizes[opcode];
+
+#ifdef DEBUG
+        clsHnd  = NO_CLASS_HANDLE;
+        lclTyp  = TYP_COUNT;
+        callTyp = TYP_COUNT;
+
+        impCurOpcOffs = (IL_OFFSET)(codeAddr - info.compCode - 1);
+        impCurOpcName = opcodeNames[opcode];
+
+        if (verbose && (opcode != CEE_PREFIX1))
+        {
+            printf("%s", impCurOpcName);
+        }
+
+        /* Use assertImp() to display the opcode */
+
+        op1 = op2 = nullptr;
+#endif
+
+        /* See what kind of an opcode we have, then */
+
+        unsigned mflags   = 0;
+        unsigned clsFlags = 0;
+
+        switch (opcode)
+        {
+            unsigned  lclNum;
+            var_types type;
+
+            GenTreePtr op3;
+            genTreeOps oper;
+            unsigned   size;
+
+            int val;
+
+            CORINFO_SIG_INFO     sig;
+            unsigned             flags;
+            IL_OFFSET            jmpAddr;
+            bool                 ovfl, unordered, callNode;
+            bool                 ldstruct;
+            CORINFO_CLASS_HANDLE tokenType;
+
+            union {
+                int     intVal;
+                float   fltVal;
+                __int64 lngVal;
+                double  dblVal;
+            } cval;
+
+            case CEE_PREFIX1:
+                opcode = (OPCODE)(getU1LittleEndian(codeAddr) + 256);
+                codeAddr += sizeof(__int8);
+                opcodeOffs = (IL_OFFSET)(codeAddr - info.compCode);
+                goto DECODE_OPCODE;
+
+            SPILL_APPEND:
+
+                /* Append 'op1' to the list of statements */
+                impAppendTree(op1, (unsigned)CHECK_SPILL_ALL, impCurStmtOffs);
+                goto DONE_APPEND;
+
+            APPEND:
+
+                /* Append 'op1' to the list of statements */
+
+                impAppendTree(op1, (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
+                goto DONE_APPEND;
+
+            DONE_APPEND:
+
+#ifdef DEBUG
+                // Remember at which BC offset the tree was finished
+                impNoteLastILoffs();
+#endif
+                break;
+
+            case CEE_LDNULL:
+                impPushNullObjRefOnStack();
+                break;
+
+            case CEE_LDC_I4_M1:
+            case CEE_LDC_I4_0:
+            case CEE_LDC_I4_1:
+            case CEE_LDC_I4_2:
+            case CEE_LDC_I4_3:
+            case CEE_LDC_I4_4:
+            case CEE_LDC_I4_5:
+            case CEE_LDC_I4_6:
+            case CEE_LDC_I4_7:
+            case CEE_LDC_I4_8:
+                cval.intVal = (opcode - CEE_LDC_I4_0);
+                assert(-1 <= cval.intVal && cval.intVal <= 8);
+                goto PUSH_I4CON;
+
+            case CEE_LDC_I4_S:
+                cval.intVal = getI1LittleEndian(codeAddr);
+                goto PUSH_I4CON;
+            case CEE_LDC_I4:
+                cval.intVal = getI4LittleEndian(codeAddr);
+                goto PUSH_I4CON;
+            PUSH_I4CON:
+                JITDUMP(" %d", cval.intVal);
+                impPushOnStack(gtNewIconNode(cval.intVal), typeInfo(TI_INT));
+                break;
+
+            case CEE_LDC_I8:
+                cval.lngVal = getI8LittleEndian(codeAddr);
+                JITDUMP(" 0x%016llx", cval.lngVal);
+                impPushOnStack(gtNewLconNode(cval.lngVal), typeInfo(TI_LONG));
+                break;
+
+            case CEE_LDC_R8:
+                cval.dblVal = getR8LittleEndian(codeAddr);
+                JITDUMP(" %#.17g", cval.dblVal);
+                impPushOnStack(gtNewDconNode(cval.dblVal), typeInfo(TI_DOUBLE));
+                break;
+
+            case CEE_LDC_R4:
+                cval.dblVal = getR4LittleEndian(codeAddr);
+                JITDUMP(" %#.17g", cval.dblVal);
+                {
+                    GenTreePtr cnsOp = gtNewDconNode(cval.dblVal);
+#if !FEATURE_X87_DOUBLES
+                    // X87 stack doesn't differentiate between float/double
+                    // so R4 is treated as R8, but everybody else does
+                    cnsOp->gtType = TYP_FLOAT;
+#endif // FEATURE_X87_DOUBLES
+                    impPushOnStack(cnsOp, typeInfo(TI_DOUBLE));
+                }
+                break;
+
+            case CEE_LDSTR:
+
+                if (compIsForInlining())
+                {
+                    if (impInlineInfo->inlineCandidateInfo->dwRestrictions & INLINE_NO_CALLEE_LDSTR)
+                    {
+                        compInlineResult->NoteFatal(InlineObservation::CALLSITE_HAS_LDSTR_RESTRICTION);
+                        return;
+                    }
+                }
+
+                val = getU4LittleEndian(codeAddr);
+                JITDUMP(" %08X", val);
+                if (tiVerificationNeeded)
+                {
+                    Verify(info.compCompHnd->isValidStringRef(info.compScopeHnd, val), "bad string");
+                    tiRetVal = typeInfo(TI_REF, impGetStringClass());
+                }
+                impPushOnStack(gtNewSconNode(val, info.compScopeHnd), tiRetVal);
+
+                break;
+
+            case CEE_LDARG:
+                lclNum = getU2LittleEndian(codeAddr);
+                JITDUMP(" %u", lclNum);
+                impLoadArg(lclNum, opcodeOffs + sz + 1);
+                break;
+
+            case CEE_LDARG_S:
+                lclNum = getU1LittleEndian(codeAddr);
+                JITDUMP(" %u", lclNum);
+                impLoadArg(lclNum, opcodeOffs + sz + 1);
+                break;
+
+            case CEE_LDARG_0:
+            case CEE_LDARG_1:
+            case CEE_LDARG_2:
+            case CEE_LDARG_3:
+                lclNum = (opcode - CEE_LDARG_0);
+                assert(lclNum >= 0 && lclNum < 4);
+                impLoadArg(lclNum, opcodeOffs + sz + 1);
+                break;
+
+            case CEE_LDLOC:
+                lclNum = getU2LittleEndian(codeAddr);
+                JITDUMP(" %u", lclNum);
+                impLoadLoc(lclNum, opcodeOffs + sz + 1);
+                break;
+
+            case CEE_LDLOC_S:
+                lclNum = getU1LittleEndian(codeAddr);
+                JITDUMP(" %u", lclNum);
+                impLoadLoc(lclNum, opcodeOffs + sz + 1);
+                break;
+
+            case CEE_LDLOC_0:
+            case CEE_LDLOC_1:
+            case CEE_LDLOC_2:
+            case CEE_LDLOC_3:
+                lclNum = (opcode - CEE_LDLOC_0);
+                assert(lclNum >= 0 && lclNum < 4);
+                impLoadLoc(lclNum, opcodeOffs + sz + 1);
+                break;
+
+            case CEE_STARG:
+                lclNum = getU2LittleEndian(codeAddr);
+                goto STARG;
+
+            case CEE_STARG_S:
+                lclNum = getU1LittleEndian(codeAddr);
+            STARG:
+                JITDUMP(" %u", lclNum);
+
+                if (tiVerificationNeeded)
+                {
+                    Verify(lclNum < info.compILargsCount, "bad arg num");
+                }
+
+                if (compIsForInlining())
+                {
+                    op1 = impInlineFetchArg(lclNum, impInlineInfo->inlArgInfo, impInlineInfo->lclVarInfo);
+                    noway_assert(op1->gtOper == GT_LCL_VAR);
+                    lclNum = op1->AsLclVar()->gtLclNum;
+
+                    goto VAR_ST_VALID;
+                }
+
+                lclNum = compMapILargNum(lclNum); // account for possible hidden param
+                assertImp(lclNum < numArgs);
+
+                if (lclNum == info.compThisArg)
+                {
+                    lclNum = lvaArg0Var;
+                }
+                lvaTable[lclNum].lvArgWrite = 1;
+
+                if (tiVerificationNeeded)
+                {
+                    typeInfo& tiLclVar = lvaTable[lclNum].lvVerTypeInfo;
+                    Verify(tiCompatibleWith(impStackTop().seTypeInfo, NormaliseForStack(tiLclVar), true),
+                           "type mismatch");
+
+                    if (verTrackObjCtorInitState && (verCurrentState.thisInitialized != TIS_Init))
+                    {
+                        Verify(!tiLclVar.IsThisPtr(), "storing to uninit this ptr");
+                    }
+                }
+
+                goto VAR_ST;
+
+            case CEE_STLOC:
+                lclNum = getU2LittleEndian(codeAddr);
+                JITDUMP(" %u", lclNum);
+                goto LOC_ST;
+
+            case CEE_STLOC_S:
+                lclNum = getU1LittleEndian(codeAddr);
+                JITDUMP(" %u", lclNum);
+                goto LOC_ST;
+
+            case CEE_STLOC_0:
+            case CEE_STLOC_1:
+            case CEE_STLOC_2:
+            case CEE_STLOC_3:
+                lclNum = (opcode - CEE_STLOC_0);
+                assert(lclNum >= 0 && lclNum < 4);
+
+            LOC_ST:
+                if (tiVerificationNeeded)
+                {
+                    Verify(lclNum < info.compMethodInfo->locals.numArgs, "bad local num");
+                    Verify(tiCompatibleWith(impStackTop().seTypeInfo,
+                                            NormaliseForStack(lvaTable[lclNum + numArgs].lvVerTypeInfo), true),
+                           "type mismatch");
+                }
+
+                if (compIsForInlining())
+                {
+                    lclTyp = impInlineInfo->lclVarInfo[lclNum + impInlineInfo->argCnt].lclTypeInfo;
+
+                    /* Have we allocated a temp for this local? */
+
+                    lclNum = impInlineFetchLocal(lclNum DEBUGARG("Inline stloc first use temp"));
+
+                    goto _PopValue;
+                }
+
+                lclNum += numArgs;
+
+            VAR_ST:
+
+                if (lclNum >= info.compLocalsCount && lclNum != lvaArg0Var)
+                {
+                    assert(!tiVerificationNeeded); // We should have thrown the VerificationException before.
+                    BADCODE("Bad IL");
+                }
+
+            VAR_ST_VALID:
+
+                /* if it is a struct assignment, make certain we don't overflow the buffer */
+                assert(lclTyp != TYP_STRUCT || lvaLclSize(lclNum) >= info.compCompHnd->getClassSize(clsHnd));
+
+                if (lvaTable[lclNum].lvNormalizeOnLoad())
+                {
+                    lclTyp = lvaGetRealType(lclNum);
+                }
+                else
+                {
+                    lclTyp = lvaGetActualType(lclNum);
+                }
+
+            _PopValue:
+                /* Pop the value being assigned */
+
+                {
+                    StackEntry se = impPopStack(clsHnd);
+                    op1           = se.val;
+                    tiRetVal      = se.seTypeInfo;
+                }
+
+#ifdef FEATURE_SIMD
+                if (varTypeIsSIMD(lclTyp) && (lclTyp != op1->TypeGet()))
+                {
+                    assert(op1->TypeGet() == TYP_STRUCT);
+                    op1->gtType = lclTyp;
+                }
+#endif // FEATURE_SIMD
+
+                op1 = impImplicitIorI4Cast(op1, lclTyp);
+
+#ifdef _TARGET_64BIT_
+                // Downcast the TYP_I_IMPL into a 32-bit Int for x86 JIT compatiblity
+                if (varTypeIsI(op1->TypeGet()) && (genActualType(lclTyp) == TYP_INT))
+                {
+                    assert(!tiVerificationNeeded); // We should have thrown the VerificationException before.
+                    op1 = gtNewCastNode(TYP_INT, op1, TYP_INT);
+                }
+#endif // _TARGET_64BIT_
+
+                // We had better assign it a value of the correct type
+                assertImp(
+                    genActualType(lclTyp) == genActualType(op1->gtType) ||
+                    genActualType(lclTyp) == TYP_I_IMPL && op1->IsVarAddr() ||
+                    (genActualType(lclTyp) == TYP_I_IMPL && (op1->gtType == TYP_BYREF || op1->gtType == TYP_REF)) ||
+                    (genActualType(op1->gtType) == TYP_I_IMPL && lclTyp == TYP_BYREF) ||
+                    (varTypeIsFloating(lclTyp) && varTypeIsFloating(op1->TypeGet())) ||
+                    ((genActualType(lclTyp) == TYP_BYREF) && genActualType(op1->TypeGet()) == TYP_REF));
+
+                /* If op1 is "&var" then its type is the transient "*" and it can
+                   be used either as TYP_BYREF or TYP_I_IMPL */
+
+                if (op1->IsVarAddr())
+                {
+                    assertImp(genActualType(lclTyp) == TYP_I_IMPL || lclTyp == TYP_BYREF);
+
+                    /* When "&var" is created, we assume it is a byref. If it is
+                       being assigned to a TYP_I_IMPL var, change the type to
+                       prevent unnecessary GC info */
+
+                    if (genActualType(lclTyp) == TYP_I_IMPL)
+                    {
+                        op1->gtType = TYP_I_IMPL;
+                    }
+                }
+
+                /* Filter out simple assignments to itself */
+
+                if (op1->gtOper == GT_LCL_VAR && lclNum == op1->gtLclVarCommon.gtLclNum)
+                {
+                    if (insertLdloc)
+                    {
+                        // This is a sequence of (ldloc, dup, stloc).  Can simplify
+                        // to (ldloc, stloc).  Goto LDVAR to reconstruct the ldloc node.
+                        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+                        if (tiVerificationNeeded)
+                        {
+                            assert(
+                                typeInfo::AreEquivalent(tiRetVal, NormaliseForStack(lvaTable[lclNum].lvVerTypeInfo)));
+                        }
+#endif
+
+                        op1         = nullptr;
+                        insertLdloc = false;
+
+                        impLoadVar(lclNum, opcodeOffs + sz + 1);
+                        break;
+                    }
+                    else if (opts.compDbgCode)
+                    {
+                        op1 = gtNewNothingNode();
+                        goto SPILL_APPEND;
+                    }
+                    else
+                    {
+                        break;
+                    }
+                }
+
+                /* Create the assignment node */
+
+                op2 = gtNewLclvNode(lclNum, lclTyp, opcodeOffs + sz + 1);
+
+                /* If the local is aliased, we need to spill calls and
+                   indirections from the stack. */
+
+                if ((lvaTable[lclNum].lvAddrExposed || lvaTable[lclNum].lvHasLdAddrOp) &&
+                    verCurrentState.esStackDepth > 0)
+                {
+                    impSpillSideEffects(false, (unsigned)CHECK_SPILL_ALL DEBUGARG("Local could be aliased"));
+                }
+
+                /* Spill any refs to the local from the stack */
+
+                impSpillLclRefs(lclNum);
+
+#if !FEATURE_X87_DOUBLES
+                // We can generate an assignment to a TYP_FLOAT from a TYP_DOUBLE
+                // We insert a cast to the dest 'op2' type
+                //
+                if ((op1->TypeGet() != op2->TypeGet()) && varTypeIsFloating(op1->gtType) &&
+                    varTypeIsFloating(op2->gtType))
+                {
+                    op1 = gtNewCastNode(op2->TypeGet(), op1, op2->TypeGet());
+                }
+#endif // !FEATURE_X87_DOUBLES
+
+                if (varTypeIsStruct(lclTyp))
+                {
+                    op1 = impAssignStruct(op2, op1, clsHnd, (unsigned)CHECK_SPILL_ALL);
+                }
+                else
+                {
+                    // The code generator generates GC tracking information
+                    // based on the RHS of the assignment.  Later the LHS (which is
+                    // is a BYREF) gets used and the emitter checks that that variable
+                    // is being tracked.  It is not (since the RHS was an int and did
+                    // not need tracking).  To keep this assert happy, we change the RHS
+                    if (lclTyp == TYP_BYREF && !varTypeIsGC(op1->gtType))
+                    {
+                        op1->gtType = TYP_BYREF;
+                    }
+                    op1 = gtNewAssignNode(op2, op1);
+                }
+
+                /* If insertLdloc is true, then we need to insert a ldloc following the
+                   stloc.  This is done when converting a (dup, stloc) sequence into
+                   a (stloc, ldloc) sequence. */
+
+                if (insertLdloc)
+                {
+                    // From SPILL_APPEND
+                    impAppendTree(op1, (unsigned)CHECK_SPILL_ALL, impCurStmtOffs);
+
+#ifdef DEBUG
+                    // From DONE_APPEND
+                    impNoteLastILoffs();
+#endif
+                    op1         = nullptr;
+                    insertLdloc = false;
+
+                    impLoadVar(lclNum, opcodeOffs + sz + 1, tiRetVal);
+                    break;
+                }
+
+                goto SPILL_APPEND;
+
+            case CEE_LDLOCA:
+                lclNum = getU2LittleEndian(codeAddr);
+                goto LDLOCA;
+
+            case CEE_LDLOCA_S:
+                lclNum = getU1LittleEndian(codeAddr);
+            LDLOCA:
+                JITDUMP(" %u", lclNum);
+                if (tiVerificationNeeded)
+                {
+                    Verify(lclNum < info.compMethodInfo->locals.numArgs, "bad local num");
+                    Verify(info.compInitMem, "initLocals not set");
+                }
+
+                if (compIsForInlining())
+                {
+                    // Get the local type
+                    lclTyp = impInlineInfo->lclVarInfo[lclNum + impInlineInfo->argCnt].lclTypeInfo;
+
+                    /* Have we allocated a temp for this local? */
+
+                    lclNum = impInlineFetchLocal(lclNum DEBUGARG("Inline ldloca(s) first use temp"));
+
+                    op1 = gtNewLclvNode(lclNum, lvaGetActualType(lclNum));
+
+                    goto _PUSH_ADRVAR;
+                }
+
+                lclNum += numArgs;
+                assertImp(lclNum < info.compLocalsCount);
+                goto ADRVAR;
+
+            case CEE_LDARGA:
+                lclNum = getU2LittleEndian(codeAddr);
+                goto LDARGA;
+
+            case CEE_LDARGA_S:
+                lclNum = getU1LittleEndian(codeAddr);
+            LDARGA:
+                JITDUMP(" %u", lclNum);
+                Verify(lclNum < info.compILargsCount, "bad arg num");
+
+                if (compIsForInlining())
+                {
+                    // In IL, LDARGA(_S) is used to load the byref managed pointer of struct argument,
+                    // followed by a ldfld to load the field.
+
+                    op1 = impInlineFetchArg(lclNum, impInlineInfo->inlArgInfo, impInlineInfo->lclVarInfo);
+                    if (op1->gtOper != GT_LCL_VAR)
+                    {
+                        compInlineResult->NoteFatal(InlineObservation::CALLSITE_LDARGA_NOT_LOCAL_VAR);
+                        return;
+                    }
+
+                    assert(op1->gtOper == GT_LCL_VAR);
+
+                    goto _PUSH_ADRVAR;
+                }
+
+                lclNum = compMapILargNum(lclNum); // account for possible hidden param
+                assertImp(lclNum < numArgs);
+
+                if (lclNum == info.compThisArg)
+                {
+                    lclNum = lvaArg0Var;
+                }
+
+                goto ADRVAR;
+
+            ADRVAR:
+
+                op1 = gtNewLclvNode(lclNum, lvaGetActualType(lclNum), opcodeOffs + sz + 1);
+
+            _PUSH_ADRVAR:
+                assert(op1->gtOper == GT_LCL_VAR);
+
+                /* Note that this is supposed to create the transient type "*"
+                   which may be used as a TYP_I_IMPL. However we catch places
+                   where it is used as a TYP_I_IMPL and change the node if needed.
+                   Thus we are pessimistic and may report byrefs in the GC info
+                   where it was not absolutely needed, but it is safer this way.
+                 */
+                op1 = gtNewOperNode(GT_ADDR, TYP_BYREF, op1);
+
+                // &aliasedVar doesnt need GTF_GLOB_REF, though alisasedVar does
+                assert((op1->gtFlags & GTF_GLOB_REF) == 0);
+
+                tiRetVal = lvaTable[lclNum].lvVerTypeInfo;
+                if (tiVerificationNeeded)
+                {
+                    // Don't allow taking address of uninit this ptr.
+                    if (verTrackObjCtorInitState && (verCurrentState.thisInitialized != TIS_Init))
+                    {
+                        Verify(!tiRetVal.IsThisPtr(), "address of uninit this ptr");
+                    }
+
+                    if (!tiRetVal.IsByRef())
+                    {
+                        tiRetVal.MakeByRef();
+                    }
+                    else
+                    {
+                        Verify(false, "byref to byref");
+                    }
+                }
+
+                impPushOnStack(op1, tiRetVal);
+                break;
+
+            case CEE_ARGLIST:
+
+                if (!info.compIsVarArgs)
+                {
+                    BADCODE("arglist in non-vararg method");
+                }
+
+                if (tiVerificationNeeded)
+                {
+                    tiRetVal = typeInfo(TI_STRUCT, impGetRuntimeArgumentHandle());
+                }
+                assertImp((info.compMethodInfo->args.callConv & CORINFO_CALLCONV_MASK) == CORINFO_CALLCONV_VARARG);
+
+                /* The ARGLIST cookie is a hidden 'last' parameter, we have already
+                   adjusted the arg count cos this is like fetching the last param */
+                assertImp(0 < numArgs);
+                assert(lvaTable[lvaVarargsHandleArg].lvAddrExposed);
+                lclNum = lvaVarargsHandleArg;
+                op1    = gtNewLclvNode(lclNum, TYP_I_IMPL, opcodeOffs + sz + 1);
+                op1    = gtNewOperNode(GT_ADDR, TYP_BYREF, op1);
+                impPushOnStack(op1, tiRetVal);
+                break;
+
+            case CEE_ENDFINALLY:
+
+                if (compIsForInlining())
+                {
+                    assert(!"Shouldn't have exception handlers in the inliner!");
+                    compInlineResult->NoteFatal(InlineObservation::CALLEE_HAS_ENDFINALLY);
+                    return;
+                }
+
+                if (verCurrentState.esStackDepth > 0)
+                {
+                    impEvalSideEffects();
+                }
+
+                if (info.compXcptnsCount == 0)
+                {
+                    BADCODE("endfinally outside finally");
+                }
+
+                assert(verCurrentState.esStackDepth == 0);
+
+                op1 = gtNewOperNode(GT_RETFILT, TYP_VOID, nullptr);
+                goto APPEND;
+
+            case CEE_ENDFILTER:
+
+                if (compIsForInlining())
+                {
+                    assert(!"Shouldn't have exception handlers in the inliner!");
+                    compInlineResult->NoteFatal(InlineObservation::CALLEE_HAS_ENDFILTER);
+                    return;
+                }
+
+                block->bbSetRunRarely(); // filters are rare
+
+                if (info.compXcptnsCount == 0)
+                {
+                    BADCODE("endfilter outside filter");
+                }
+
+                if (tiVerificationNeeded)
+                {
+                    Verify(impStackTop().seTypeInfo.IsType(TI_INT), "bad endfilt arg");
+                }
+
+                op1 = impPopStack().val;
+                assertImp(op1->gtType == TYP_INT);
+                if (!bbInFilterILRange(block))
+                {
+                    BADCODE("EndFilter outside a filter handler");
+                }
+
+                /* Mark current bb as end of filter */
+
+                assert(compCurBB->bbFlags & BBF_DONT_REMOVE);
+                assert(compCurBB->bbJumpKind == BBJ_EHFILTERRET);
+
+                /* Mark catch handler as successor */
+
+                op1 = gtNewOperNode(GT_RETFILT, op1->TypeGet(), op1);
+                if (verCurrentState.esStackDepth != 0)
+                {
+                    verRaiseVerifyException(INDEBUG("stack must be 1 on end of filter") DEBUGARG(__FILE__)
+                                                DEBUGARG(__LINE__));
+                }
+                goto APPEND;
+
+            case CEE_RET:
+                prefixFlags &= ~PREFIX_TAILCALL; // ret without call before it
+            RET:
+                if (!impReturnInstruction(block, prefixFlags, opcode))
+                {
+                    return; // abort
+                }
+                else
+                {
+                    break;
+                }
+
+            case CEE_JMP:
+
+                assert(!compIsForInlining());
+
+                if (tiVerificationNeeded)
+                {
+                    Verify(false, "Invalid opcode: CEE_JMP");
+                }
+
+                if ((info.compFlags & CORINFO_FLG_SYNCH) || block->hasTryIndex() || block->hasHndIndex())
+                {
+                    /* CEE_JMP does not make sense in some "protected" regions. */
+
+                    BADCODE("Jmp not allowed in protected region");
+                }
+
+                if (verCurrentState.esStackDepth != 0)
+                {
+                    BADCODE("Stack must be empty after CEE_JMPs");
+                }
+
+                _impResolveToken(CORINFO_TOKENKIND_Method);
+
+                JITDUMP(" %08X", resolvedToken.token);
+
+                /* The signature of the target has to be identical to ours.
+                   At least check that argCnt and returnType match */
+
+                eeGetMethodSig(resolvedToken.hMethod, &sig);
+                if (sig.numArgs != info.compMethodInfo->args.numArgs ||
+                    sig.retType != info.compMethodInfo->args.retType ||
+                    sig.callConv != info.compMethodInfo->args.callConv)
+                {
+                    BADCODE("Incompatible target for CEE_JMPs");
+                }
+
+#if defined(_TARGET_XARCH_) || defined(_TARGET_ARMARCH_)
+
+                op1 = new (this, GT_JMP) GenTreeVal(GT_JMP, TYP_VOID, (size_t)resolvedToken.hMethod);
+
+                /* Mark the basic block as being a JUMP instead of RETURN */
+
+                block->bbFlags |= BBF_HAS_JMP;
+
+                /* Set this flag to make sure register arguments have a location assigned
+                 * even if we don't use them inside the method */
+
+                compJmpOpUsed = true;
+
+                fgNoStructPromotion = true;
+
+                goto APPEND;
+
+#else // !_TARGET_XARCH_ && !_TARGET_ARMARCH_
+
+                // Import this just like a series of LDARGs + tail. + call + ret
+
+                if (info.compIsVarArgs)
+                {
+                    // For now we don't implement true tail calls, so this breaks varargs.
+                    // So warn the user instead of generating bad code.
+                    // This is a semi-temporary workaround for DevDiv 173860, until we can properly
+                    // implement true tail calls.
+                    IMPL_LIMITATION("varags + CEE_JMP doesn't work yet");
+                }
+
+                // First load up the arguments (0 - N)
+                for (unsigned argNum = 0; argNum < info.compILargsCount; argNum++)
+                {
+                    impLoadArg(argNum, opcodeOffs + sz + 1);
+                }
+
+                // Now generate the tail call
+                noway_assert(prefixFlags == 0);
+                prefixFlags = PREFIX_TAILCALL_EXPLICIT;
+                opcode      = CEE_CALL;
+
+                eeGetCallInfo(&resolvedToken, NULL,
+                              combine(CORINFO_CALLINFO_ALLOWINSTPARAM, CORINFO_CALLINFO_SECURITYCHECKS), &callInfo);
+
+                // All calls and delegates need a security callout.
+                impHandleAccessAllowed(callInfo.accessAllowed, &callInfo.callsiteCalloutHelper);
+
+                callTyp = impImportCall(CEE_CALL, &resolvedToken, NULL, NULL, PREFIX_TAILCALL_EXPLICIT, &callInfo,
+                                        opcodeOffs);
+
+                // And finish with the ret
+                goto RET;
+
+#endif // _TARGET_XARCH_ || _TARGET_ARMARCH_
+
+            case CEE_LDELEMA:
+                assertImp(sz == sizeof(unsigned));
+
+                _impResolveToken(CORINFO_TOKENKIND_Class);
+
+                JITDUMP(" %08X", resolvedToken.token);
+
+                ldelemClsHnd = resolvedToken.hClass;
+
+                if (tiVerificationNeeded)
+                {
+                    typeInfo tiArray = impStackTop(1).seTypeInfo;
+                    typeInfo tiIndex = impStackTop().seTypeInfo;
+
+                    // As per ECMA 'index' specified can be either int32 or native int.
+                    Verify(tiIndex.IsIntOrNativeIntType(), "bad index");
+
+                    typeInfo arrayElemType = verMakeTypeInfo(ldelemClsHnd);
+                    Verify(tiArray.IsNullObjRef() ||
+                               typeInfo::AreEquivalent(verGetArrayElemType(tiArray), arrayElemType),
+                           "bad array");
+
+                    tiRetVal = arrayElemType;
+                    tiRetVal.MakeByRef();
+                    if (prefixFlags & PREFIX_READONLY)
+                    {
+                        tiRetVal.SetIsReadonlyByRef();
+                    }
+
+                    // an array interior pointer is always in the heap
+                    tiRetVal.SetIsPermanentHomeByRef();
+                }
+
+                // If it's a value class array we just do a simple address-of
+                if (eeIsValueClass(ldelemClsHnd))
+                {
+                    CorInfoType cit = info.compCompHnd->getTypeForPrimitiveValueClass(ldelemClsHnd);
+                    if (cit == CORINFO_TYPE_UNDEF)
+                    {
+                        lclTyp = TYP_STRUCT;
+                    }
+                    else
+                    {
+                        lclTyp = JITtype2varType(cit);
+                    }
+                    goto ARR_LD_POST_VERIFY;
+                }
+
+                // Similarly, if its a readonly access, we can do a simple address-of
+                // without doing a runtime type-check
+                if (prefixFlags & PREFIX_READONLY)
+                {
+                    lclTyp = TYP_REF;
+                    goto ARR_LD_POST_VERIFY;
+                }
+
+                // Otherwise we need the full helper function with run-time type check
+                op1 = impTokenToHandle(&resolvedToken);
+                if (op1 == nullptr)
+                { // compDonotInline()
+                    return;
+                }
+
+                args = gtNewArgList(op1);                      // Type
+                args = gtNewListNode(impPopStack().val, args); // index
+                args = gtNewListNode(impPopStack().val, args); // array
+                op1  = gtNewHelperCallNode(CORINFO_HELP_LDELEMA_REF, TYP_BYREF, GTF_EXCEPT, args);
+
+                impPushOnStack(op1, tiRetVal);
+                break;
+
+            // ldelem for reference and value types
+            case CEE_LDELEM:
+                assertImp(sz == sizeof(unsigned));
+
+                _impResolveToken(CORINFO_TOKENKIND_Class);
+
+                JITDUMP(" %08X", resolvedToken.token);
+
+                ldelemClsHnd = resolvedToken.hClass;
+
+                if (tiVerificationNeeded)
+                {
+                    typeInfo tiArray = impStackTop(1).seTypeInfo;
+                    typeInfo tiIndex = impStackTop().seTypeInfo;
+
+                    // As per ECMA 'index' specified can be either int32 or native int.
+                    Verify(tiIndex.IsIntOrNativeIntType(), "bad index");
+                    tiRetVal = verMakeTypeInfo(ldelemClsHnd);
+
+                    Verify(tiArray.IsNullObjRef() || tiCompatibleWith(verGetArrayElemType(tiArray), tiRetVal, false),
+                           "type of array incompatible with type operand");
+                    tiRetVal.NormaliseForStack();
+                }
+
+                // If it's a reference type or generic variable type
+                // then just generate code as though it's a ldelem.ref instruction
+                if (!eeIsValueClass(ldelemClsHnd))
+                {
+                    lclTyp = TYP_REF;
+                    opcode = CEE_LDELEM_REF;
+                }
+                else
+                {
+                    CorInfoType jitTyp = info.compCompHnd->asCorInfoType(ldelemClsHnd);
+                    lclTyp             = JITtype2varType(jitTyp);
+                    tiRetVal           = verMakeTypeInfo(ldelemClsHnd); // precise type always needed for struct
+                    tiRetVal.NormaliseForStack();
+                }
+                goto ARR_LD_POST_VERIFY;
+
+            case CEE_LDELEM_I1:
+                lclTyp = TYP_BYTE;
+                goto ARR_LD;
+            case CEE_LDELEM_I2:
+                lclTyp = TYP_SHORT;
+                goto ARR_LD;
+            case CEE_LDELEM_I:
+                lclTyp = TYP_I_IMPL;
+                goto ARR_LD;
+
+            // Should be UINT, but since no platform widens 4->8 bytes it doesn't matter
+            // and treating it as TYP_INT avoids other asserts.
+            case CEE_LDELEM_U4:
+                lclTyp = TYP_INT;
+                goto ARR_LD;
+
+            case CEE_LDELEM_I4:
+                lclTyp = TYP_INT;
+                goto ARR_LD;
+            case CEE_LDELEM_I8:
+                lclTyp = TYP_LONG;
+                goto ARR_LD;
+            case CEE_LDELEM_REF:
+                lclTyp = TYP_REF;
+                goto ARR_LD;
+            case CEE_LDELEM_R4:
+                lclTyp = TYP_FLOAT;
+                goto ARR_LD;
+            case CEE_LDELEM_R8:
+                lclTyp = TYP_DOUBLE;
+                goto ARR_LD;
+            case CEE_LDELEM_U1:
+                lclTyp = TYP_UBYTE;
+                goto ARR_LD;
+            case CEE_LDELEM_U2:
+                lclTyp = TYP_CHAR;
+                goto ARR_LD;
+
+            ARR_LD:
+
+                if (tiVerificationNeeded)
+                {
+                    typeInfo tiArray = impStackTop(1).seTypeInfo;
+                    typeInfo tiIndex = impStackTop().seTypeInfo;
+
+                    // As per ECMA 'index' specified can be either int32 or native int.
+                    Verify(tiIndex.IsIntOrNativeIntType(), "bad index");
+                    if (tiArray.IsNullObjRef())
+                    {
+                        if (lclTyp == TYP_REF)
+                        { // we will say a deref of a null array yields a null ref
+                            tiRetVal = typeInfo(TI_NULL);
+                        }
+                        else
+                        {
+                            tiRetVal = typeInfo(lclTyp);
+                        }
+                    }
+                    else
+                    {
+                        tiRetVal             = verGetArrayElemType(tiArray);
+                        typeInfo arrayElemTi = typeInfo(lclTyp);
+#ifdef _TARGET_64BIT_
+                        if (opcode == CEE_LDELEM_I)
+                        {
+                            arrayElemTi = typeInfo::nativeInt();
+                        }
+
+                        if (lclTyp != TYP_REF && lclTyp != TYP_STRUCT)
+                        {
+                            Verify(typeInfo::AreEquivalent(tiRetVal, arrayElemTi), "bad array");
+                        }
+                        else
+#endif // _TARGET_64BIT_
+                        {
+                            Verify(tiRetVal.IsType(arrayElemTi.GetType()), "bad array");
+                        }
+                    }
+                    tiRetVal.NormaliseForStack();
+                }
+            ARR_LD_POST_VERIFY:
+
+                /* Pull the index value and array address */
+                op2 = impPopStack().val;
+                op1 = impPopStack().val;
+                assertImp(op1->gtType == TYP_REF);
+
+                /* Check for null pointer - in the inliner case we simply abort */
+
+                if (compIsForInlining())
+                {
+                    if (op1->gtOper == GT_CNS_INT)
+                    {
+                        compInlineResult->NoteFatal(InlineObservation::CALLEE_HAS_NULL_FOR_LDELEM);
+                        return;
+                    }
+                }
+
+                op1 = impCheckForNullPointer(op1);
+
+                /* Mark the block as containing an index expression */
+
+                if (op1->gtOper == GT_LCL_VAR)
+                {
+                    if (op2->gtOper == GT_LCL_VAR || op2->gtOper == GT_CNS_INT || op2->gtOper == GT_ADD)
+                    {
+                        block->bbFlags |= BBF_HAS_IDX_LEN;
+                        optMethodFlags |= OMF_HAS_ARRAYREF;
+                    }
+                }
+
+                /* Create the index node and push it on the stack */
+
+                op1 = gtNewIndexRef(lclTyp, op1, op2);
+
+                ldstruct = (opcode == CEE_LDELEM && lclTyp == TYP_STRUCT);
+
+                if ((opcode == CEE_LDELEMA) || ldstruct ||
+                    (ldelemClsHnd != DUMMY_INIT(NULL) && eeIsValueClass(ldelemClsHnd)))
+                {
+                    assert(ldelemClsHnd != DUMMY_INIT(NULL));
+
+                    // remember the element size
+                    if (lclTyp == TYP_REF)
+                    {
+                        op1->gtIndex.gtIndElemSize = sizeof(void*);
+                    }
+                    else
+                    {
+                        // If ldElemClass is precisely a primitive type, use that, otherwise, preserve the struct type.
+                        if (info.compCompHnd->getTypeForPrimitiveValueClass(ldelemClsHnd) == CORINFO_TYPE_UNDEF)
+                        {
+                            op1->gtIndex.gtStructElemClass = ldelemClsHnd;
+                        }
+                        assert(lclTyp != TYP_STRUCT || op1->gtIndex.gtStructElemClass != nullptr);
+                        if (lclTyp == TYP_STRUCT)
+                        {
+                            size                       = info.compCompHnd->getClassSize(ldelemClsHnd);
+                            op1->gtIndex.gtIndElemSize = size;
+                            op1->gtType                = lclTyp;
+                        }
+                    }
+
+                    if ((opcode == CEE_LDELEMA) || ldstruct)
+                    {
+                        // wrap it in a &
+                        lclTyp = TYP_BYREF;
+
+                        op1 = gtNewOperNode(GT_ADDR, lclTyp, op1);
+                    }
+                    else
+                    {
+                        assert(lclTyp != TYP_STRUCT);
+                    }
+                }
+
+                if (ldstruct)
+                {
+                    // Create an OBJ for the result
+                    op1 = gtNewObjNode(ldelemClsHnd, op1);
+                    op1->gtFlags |= GTF_EXCEPT;
+                }
+                impPushOnStack(op1, tiRetVal);
+                break;
+
+            // stelem for reference and value types
+            case CEE_STELEM:
+
+                assertImp(sz == sizeof(unsigned));
+
+                _impResolveToken(CORINFO_TOKENKIND_Class);
+
+                JITDUMP(" %08X", resolvedToken.token);
+
+                stelemClsHnd = resolvedToken.hClass;
+
+                if (tiVerificationNeeded)
+                {
+                    typeInfo tiArray = impStackTop(2).seTypeInfo;
+                    typeInfo tiIndex = impStackTop(1).seTypeInfo;
+                    typeInfo tiValue = impStackTop().seTypeInfo;
+
+                    // As per ECMA 'index' specified can be either int32 or native int.
+                    Verify(tiIndex.IsIntOrNativeIntType(), "bad index");
+                    typeInfo arrayElem = verMakeTypeInfo(stelemClsHnd);
+
+                    Verify(tiArray.IsNullObjRef() || tiCompatibleWith(arrayElem, verGetArrayElemType(tiArray), false),
+                           "type operand incompatible with array element type");
+                    arrayElem.NormaliseForStack();
+                    Verify(tiCompatibleWith(tiValue, arrayElem, true), "value incompatible with type operand");
+                }
+
+                // If it's a reference type just behave as though it's a stelem.ref instruction
+                if (!eeIsValueClass(stelemClsHnd))
+                {
+                    goto STELEM_REF_POST_VERIFY;
+                }
+
+                // Otherwise extract the type
+                {
+                    CorInfoType jitTyp = info.compCompHnd->asCorInfoType(stelemClsHnd);
+                    lclTyp             = JITtype2varType(jitTyp);
+                    goto ARR_ST_POST_VERIFY;
+                }
+
+            case CEE_STELEM_REF:
+
+                if (tiVerificationNeeded)
+                {
+                    typeInfo tiArray = impStackTop(2).seTypeInfo;
+                    typeInfo tiIndex = impStackTop(1).seTypeInfo;
+                    typeInfo tiValue = impStackTop().seTypeInfo;
+
+                    // As per ECMA 'index' specified can be either int32 or native int.
+                    Verify(tiIndex.IsIntOrNativeIntType(), "bad index");
+                    Verify(tiValue.IsObjRef(), "bad value");
+
+                    // we only check that it is an object referece, The helper does additional checks
+                    Verify(tiArray.IsNullObjRef() || verGetArrayElemType(tiArray).IsType(TI_REF), "bad array");
+                }
+
+                arrayNodeTo      = impStackTop(2).val;
+                arrayNodeToIndex = impStackTop(1).val;
+                arrayNodeFrom    = impStackTop().val;
+
+                //
+                // Note that it is not legal to optimize away CORINFO_HELP_ARRADDR_ST in a
+                // lot of cases because of covariance. ie. foo[] can be cast to object[].
+                //
+
+                // Check for assignment to same array, ie. arrLcl[i] = arrLcl[j]
+                // This does not need CORINFO_HELP_ARRADDR_ST
+
+                if (arrayNodeFrom->OperGet() == GT_INDEX && arrayNodeFrom->gtOp.gtOp1->gtOper == GT_LCL_VAR &&
+                    arrayNodeTo->gtOper == GT_LCL_VAR &&
+                    arrayNodeTo->gtLclVarCommon.gtLclNum == arrayNodeFrom->gtOp.gtOp1->gtLclVarCommon.gtLclNum &&
+                    !lvaTable[arrayNodeTo->gtLclVarCommon.gtLclNum].lvAddrExposed)
+                {
+                    lclTyp = TYP_REF;
+                    goto ARR_ST_POST_VERIFY;
+                }
+
+                // Check for assignment of NULL. This does not need CORINFO_HELP_ARRADDR_ST
+
+                if (arrayNodeFrom->OperGet() == GT_CNS_INT)
+                {
+                    assert(arrayNodeFrom->gtType == TYP_REF && arrayNodeFrom->gtIntCon.gtIconVal == 0);
+
+                    lclTyp = TYP_REF;
+                    goto ARR_ST_POST_VERIFY;
+                }
+
+            STELEM_REF_POST_VERIFY:
+
+                /* Call a helper function to do the assignment */
+                op1 = gtNewHelperCallNode(CORINFO_HELP_ARRADDR_ST, TYP_VOID, 0, impPopList(3, &flags, nullptr));
+
+                goto SPILL_APPEND;
+
+            case CEE_STELEM_I1:
+                lclTyp = TYP_BYTE;
+                goto ARR_ST;
+            case CEE_STELEM_I2:
+                lclTyp = TYP_SHORT;
+                goto ARR_ST;
+            case CEE_STELEM_I:
+                lclTyp = TYP_I_IMPL;
+                goto ARR_ST;
+            case CEE_STELEM_I4:
+                lclTyp = TYP_INT;
+                goto ARR_ST;
+            case CEE_STELEM_I8:
+                lclTyp = TYP_LONG;
+                goto ARR_ST;
+            case CEE_STELEM_R4:
+                lclTyp = TYP_FLOAT;
+                goto ARR_ST;
+            case CEE_STELEM_R8:
+                lclTyp = TYP_DOUBLE;
+                goto ARR_ST;
+
+            ARR_ST:
+
+                if (tiVerificationNeeded)
+                {
+                    typeInfo tiArray = impStackTop(2).seTypeInfo;
+                    typeInfo tiIndex = impStackTop(1).seTypeInfo;
+                    typeInfo tiValue = impStackTop().seTypeInfo;
+
+                    // As per ECMA 'index' specified can be either int32 or native int.
+                    Verify(tiIndex.IsIntOrNativeIntType(), "bad index");
+                    typeInfo arrayElem = typeInfo(lclTyp);
+#ifdef _TARGET_64BIT_
+                    if (opcode == CEE_STELEM_I)
+                    {
+                        arrayElem = typeInfo::nativeInt();
+                    }
+#endif // _TARGET_64BIT_
+                    Verify(tiArray.IsNullObjRef() || typeInfo::AreEquivalent(verGetArrayElemType(tiArray), arrayElem),
+                           "bad array");
+
+                    Verify(tiCompatibleWith(NormaliseForStack(tiValue), arrayElem.NormaliseForStack(), true),
+                           "bad value");
+                }
+
+            ARR_ST_POST_VERIFY:
+                /* The strict order of evaluation is LHS-operands, RHS-operands,
+                   range-check, and then assignment. However, codegen currently
+                   does the range-check before evaluation the RHS-operands. So to
+                   maintain strict ordering, we spill the stack. */
+
+                if (impStackTop().val->gtFlags & GTF_SIDE_EFFECT)
+                {
+                    impSpillSideEffects(false, (unsigned)CHECK_SPILL_ALL DEBUGARG(
+                                                   "Strict ordering of exceptions for Array store"));
+                }
+
+                /* Pull the new value from the stack */
+                op2 = impPopStack().val;
+
+                /* Pull the index value */
+                op1 = impPopStack().val;
+
+                /* Pull the array address */
+                op3 = impPopStack().val;
+
+                assertImp(op3->gtType == TYP_REF);
+                if (op2->IsVarAddr())
+                {
+                    op2->gtType = TYP_I_IMPL;
+                }
+
+                op3 = impCheckForNullPointer(op3);
+
+                // Mark the block as containing an index expression
+
+                if (op3->gtOper == GT_LCL_VAR)
+                {
+                    if (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_CNS_INT || op1->gtOper == GT_ADD)
+                    {
+                        block->bbFlags |= BBF_HAS_IDX_LEN;
+                        optMethodFlags |= OMF_HAS_ARRAYREF;
+                    }
+                }
+
+                /* Create the index node */
+
+                op1 = gtNewIndexRef(lclTyp, op3, op1);
+
+                /* Create the assignment node and append it */
+
+                if (lclTyp == TYP_STRUCT)
+                {
+                    assert(stelemClsHnd != DUMMY_INIT(NULL));
+
+                    op1->gtIndex.gtStructElemClass = stelemClsHnd;
+                    op1->gtIndex.gtIndElemSize     = info.compCompHnd->getClassSize(stelemClsHnd);
+                }
+                if (varTypeIsStruct(op1))
+                {
+                    op1 = impAssignStruct(op1, op2, stelemClsHnd, (unsigned)CHECK_SPILL_ALL);
+                }
+                else
+                {
+                    op2 = impImplicitR4orR8Cast(op2, op1->TypeGet());
+                    op1 = gtNewAssignNode(op1, op2);
+                }
+
+                /* Mark the expression as containing an assignment */
+
+                op1->gtFlags |= GTF_ASG;
+
+                goto SPILL_APPEND;
+
+            case CEE_ADD:
+                oper = GT_ADD;
+                goto MATH_OP2;
+
+            case CEE_ADD_OVF:
+                uns = false;
+                goto ADD_OVF;
+            case CEE_ADD_OVF_UN:
+                uns = true;
+                goto ADD_OVF;
+
+            ADD_OVF:
+                ovfl     = true;
+                callNode = false;
+                oper     = GT_ADD;
+                goto MATH_OP2_FLAGS;
+
+            case CEE_SUB:
+                oper = GT_SUB;
+                goto MATH_OP2;
+
+            case CEE_SUB_OVF:
+                uns = false;
+                goto SUB_OVF;
+            case CEE_SUB_OVF_UN:
+                uns = true;
+                goto SUB_OVF;
+
+            SUB_OVF:
+                ovfl     = true;
+                callNode = false;
+                oper     = GT_SUB;
+                goto MATH_OP2_FLAGS;
+
+            case CEE_MUL:
+                oper = GT_MUL;
+                goto MATH_MAYBE_CALL_NO_OVF;
+
+            case CEE_MUL_OVF:
+                uns = false;
+                goto MUL_OVF;
+            case CEE_MUL_OVF_UN:
+                uns = true;
+                goto MUL_OVF;
+
+            MUL_OVF:
+                ovfl = true;
+                oper = GT_MUL;
+                goto MATH_MAYBE_CALL_OVF;
+
+            // Other binary math operations
+
+            case CEE_DIV:
+                oper = GT_DIV;
+                goto MATH_MAYBE_CALL_NO_OVF;
+
+            case CEE_DIV_UN:
+                oper = GT_UDIV;
+                goto MATH_MAYBE_CALL_NO_OVF;
+
+            case CEE_REM:
+                oper = GT_MOD;
+                goto MATH_MAYBE_CALL_NO_OVF;
+
+            case CEE_REM_UN:
+                oper = GT_UMOD;
+                goto MATH_MAYBE_CALL_NO_OVF;
+
+            MATH_MAYBE_CALL_NO_OVF:
+                ovfl = false;
+            MATH_MAYBE_CALL_OVF:
+                // Morpher has some complex logic about when to turn different
+                // typed nodes on different platforms into helper calls. We
+                // need to either duplicate that logic here, or just
+                // pessimistically make all the nodes large enough to become
+                // call nodes.  Since call nodes aren't that much larger and
+                // these opcodes are infrequent enough I chose the latter.
+                callNode = true;
+                goto MATH_OP2_FLAGS;
+
+            case CEE_AND:
+                oper = GT_AND;
+                goto MATH_OP2;
+            case CEE_OR:
+                oper = GT_OR;
+                goto MATH_OP2;
+            case CEE_XOR:
+                oper = GT_XOR;
+                goto MATH_OP2;
+
+            MATH_OP2: // For default values of 'ovfl' and 'callNode'
+
+                ovfl     = false;
+                callNode = false;
+
+            MATH_OP2_FLAGS: // If 'ovfl' and 'callNode' have already been set
+
+                /* Pull two values and push back the result */
+
+                if (tiVerificationNeeded)
+                {
+                    const typeInfo& tiOp1 = impStackTop(1).seTypeInfo;
+                    const typeInfo& tiOp2 = impStackTop().seTypeInfo;
+
+                    Verify(tiCompatibleWith(tiOp1, tiOp2, true), "different arg type");
+                    if (oper == GT_ADD || oper == GT_DIV || oper == GT_SUB || oper == GT_MUL || oper == GT_MOD)
+                    {
+                        Verify(tiOp1.IsNumberType(), "not number");
+                    }
+                    else
+                    {
+                        Verify(tiOp1.IsIntegerType(), "not integer");
+                    }
+
+                    Verify(!ovfl || tiOp1.IsIntegerType(), "not integer");
+
+                    tiRetVal = tiOp1;
+
+#ifdef _TARGET_64BIT_
+                    if (tiOp2.IsNativeIntType())
+                    {
+                        tiRetVal = tiOp2;
+                    }
+#endif // _TARGET_64BIT_
+                }
+
+                op2 = impPopStack().val;
+                op1 = impPopStack().val;
+
+#if !CPU_HAS_FP_SUPPORT
+                if (varTypeIsFloating(op1->gtType))
+                {
+                    callNode = true;
+                }
+#endif
+                /* Can't do arithmetic with references */
+                assertImp(genActualType(op1->TypeGet()) != TYP_REF && genActualType(op2->TypeGet()) != TYP_REF);
+
+                // Change both to TYP_I_IMPL (impBashVarAddrsToI won't change if its a true byref, only
+                // if it is in the stack)
+                impBashVarAddrsToI(op1, op2);
+
+                type = impGetByRefResultType(oper, uns, &op1, &op2);
+
+                assert(!ovfl || !varTypeIsFloating(op1->gtType));
+
+                /* Special case: "int+0", "int-0", "int*1", "int/1" */
+
+                if (op2->gtOper == GT_CNS_INT)
+                {
+                    if ((op2->IsIntegralConst(0) && (oper == GT_ADD || oper == GT_SUB)) ||
+                        (op2->IsIntegralConst(1) && (oper == GT_MUL || oper == GT_DIV)))
+
+                    {
+                        impPushOnStack(op1, tiRetVal);
+                        break;
+                    }
+                }
+
+#if !FEATURE_X87_DOUBLES
+                // We can generate a TYP_FLOAT operation that has a TYP_DOUBLE operand
+                //
+                if (varTypeIsFloating(type) && varTypeIsFloating(op1->gtType) && varTypeIsFloating(op2->gtType))
+                {
+                    if (op1->TypeGet() != type)
+                    {
+                        // We insert a cast of op1 to 'type'
+                        op1 = gtNewCastNode(type, op1, type);
+                    }
+                    if (op2->TypeGet() != type)
+                    {
+                        // We insert a cast of op2 to 'type'
+                        op2 = gtNewCastNode(type, op2, type);
+                    }
+                }
+#endif // !FEATURE_X87_DOUBLES
+
+#if SMALL_TREE_NODES
+                if (callNode)
+                {
+                    /* These operators can later be transformed into 'GT_CALL' */
+
+                    assert(GenTree::s_gtNodeSizes[GT_CALL] > GenTree::s_gtNodeSizes[GT_MUL]);
+#ifndef _TARGET_ARM_
+                    assert(GenTree::s_gtNodeSizes[GT_CALL] > GenTree::s_gtNodeSizes[GT_DIV]);
+                    assert(GenTree::s_gtNodeSizes[GT_CALL] > GenTree::s_gtNodeSizes[GT_UDIV]);
+                    assert(GenTree::s_gtNodeSizes[GT_CALL] > GenTree::s_gtNodeSizes[GT_MOD]);
+                    assert(GenTree::s_gtNodeSizes[GT_CALL] > GenTree::s_gtNodeSizes[GT_UMOD]);
+#endif
+                    // It's tempting to use LargeOpOpcode() here, but this logic is *not* saying
+                    // that we'll need to transform into a general large node, but rather specifically
+                    // to a call: by doing it this way, things keep working if there are multiple sizes,
+                    // and a CALL is no longer the largest.
+                    // That said, as of now it *is* a large node, so we'll do this with an assert rather
+                    // than an "if".
+                    assert(GenTree::s_gtNodeSizes[GT_CALL] == TREE_NODE_SZ_LARGE);
+                    op1 = new (this, GT_CALL) GenTreeOp(oper, type, op1, op2 DEBUGARG(/*largeNode*/ true));
+                }
+                else
+#endif // SMALL_TREE_NODES
+                {
+                    op1 = gtNewOperNode(oper, type, op1, op2);
+                }
+
+                /* Special case: integer/long division may throw an exception */
+
+                if (varTypeIsIntegral(op1->TypeGet()) && op1->OperMayThrow())
+                {
+                    op1->gtFlags |= GTF_EXCEPT;
+                }
+
+                if (ovfl)
+                {
+                    assert(oper == GT_ADD || oper == GT_SUB || oper == GT_MUL);
+                    if (ovflType != TYP_UNKNOWN)
+                    {
+                        op1->gtType = ovflType;
+                    }
+                    op1->gtFlags |= (GTF_EXCEPT | GTF_OVERFLOW);
+                    if (uns)
+                    {
+                        op1->gtFlags |= GTF_UNSIGNED;
+                    }
+                }
+
+                impPushOnStack(op1, tiRetVal);
+                break;
+
+            case CEE_SHL:
+                oper = GT_LSH;
+                goto CEE_SH_OP2;
+
+            case CEE_SHR:
+                oper = GT_RSH;
+                goto CEE_SH_OP2;
+            case CEE_SHR_UN:
+                oper = GT_RSZ;
+                goto CEE_SH_OP2;
+
+            CEE_SH_OP2:
+                if (tiVerificationNeeded)
+                {
+                    const typeInfo& tiVal   = impStackTop(1).seTypeInfo;
+                    const typeInfo& tiShift = impStackTop(0).seTypeInfo;
+                    Verify(tiVal.IsIntegerType() && tiShift.IsType(TI_INT), "Bad shift args");
+                    tiRetVal = tiVal;
+                }
+                op2 = impPopStack().val;
+                op1 = impPopStack().val; // operand to be shifted
+                impBashVarAddrsToI(op1, op2);
+
+                type = genActualType(op1->TypeGet());
+                op1  = gtNewOperNode(oper, type, op1, op2);
+
+                impPushOnStack(op1, tiRetVal);
+                break;
+
+            case CEE_NOT:
+                if (tiVerificationNeeded)
+                {
+                    tiRetVal = impStackTop().seTypeInfo;
+                    Verify(tiRetVal.IsIntegerType(), "bad int value");
+                }
+
+                op1 = impPopStack().val;
+                impBashVarAddrsToI(op1, nullptr);
+                type = genActualType(op1->TypeGet());
+                impPushOnStack(gtNewOperNode(GT_NOT, type, op1), tiRetVal);
+                break;
+
+            case CEE_CKFINITE:
+                if (tiVerificationNeeded)
+                {
+                    tiRetVal = impStackTop().seTypeInfo;
+                    Verify(tiRetVal.IsType(TI_DOUBLE), "bad R value");
+                }
+                op1  = impPopStack().val;
+                type = op1->TypeGet();
+                op1  = gtNewOperNode(GT_CKFINITE, type, op1);
+                op1->gtFlags |= GTF_EXCEPT;
+
+                impPushOnStack(op1, tiRetVal);
+                break;
+
+            case CEE_LEAVE:
+
+                val     = getI4LittleEndian(codeAddr); // jump distance
+                jmpAddr = (IL_OFFSET)((codeAddr - info.compCode + sizeof(__int32)) + val);
+                goto LEAVE;
+
+            case CEE_LEAVE_S:
+                val     = getI1LittleEndian(codeAddr); // jump distance
+                jmpAddr = (IL_OFFSET)((codeAddr - info.compCode + sizeof(__int8)) + val);
+
+            LEAVE:
+
+                if (compIsForInlining())
+                {
+                    compInlineResult->NoteFatal(InlineObservation::CALLEE_HAS_LEAVE);
+                    return;
+                }
+
+                JITDUMP(" %04X", jmpAddr);
+                if (block->bbJumpKind != BBJ_LEAVE)
+                {
+                    impResetLeaveBlock(block, jmpAddr);
+                }
+
+                assert(jmpAddr == block->bbJumpDest->bbCodeOffs);
+                impImportLeave(block);
+                impNoteBranchOffs();
+
+                break;
+
+            case CEE_BR:
+            case CEE_BR_S:
+                jmpDist = (sz == 1) ? getI1LittleEndian(codeAddr) : getI4LittleEndian(codeAddr);
+
+                if (compIsForInlining() && jmpDist == 0)
+                {
+                    break; /* NOP */
+                }
+
+                impNoteBranchOffs();
+                break;
+
+            case CEE_BRTRUE:
+            case CEE_BRTRUE_S:
+            case CEE_BRFALSE:
+            case CEE_BRFALSE_S:
+
+                /* Pop the comparand (now there's a neat term) from the stack */
+                if (tiVerificationNeeded)
+                {
+                    typeInfo& tiVal = impStackTop().seTypeInfo;
+                    Verify(tiVal.IsObjRef() || tiVal.IsByRef() || tiVal.IsIntegerType() || tiVal.IsMethod(),
+                           "bad value");
+                }
+
+                op1  = impPopStack().val;
+                type = op1->TypeGet();
+
+                // brfalse and brtrue is only allowed on I4, refs, and byrefs.
+                if (!opts.MinOpts() && !opts.compDbgCode && block->bbJumpDest == block->bbNext)
+                {
+                    block->bbJumpKind = BBJ_NONE;
+
+                    if (op1->gtFlags & GTF_GLOB_EFFECT)
+                    {
+                        op1 = gtUnusedValNode(op1);
+                        goto SPILL_APPEND;
+                    }
+                    else
+                    {
+                        break;
+                    }
+                }
+
+                if (op1->OperIsCompare())
+                {
+                    if (opcode == CEE_BRFALSE || opcode == CEE_BRFALSE_S)
+                    {
+                        // Flip the sense of the compare
+
+                        op1 = gtReverseCond(op1);
+                    }
+                }
+                else
+                {
+                    /* We'll compare against an equally-sized integer 0 */
+                    /* For small types, we always compare against int   */
+                    op2 = gtNewZeroConNode(genActualType(op1->gtType));
+
+                    /* Create the comparison operator and try to fold it */
+
+                    oper = (opcode == CEE_BRTRUE || opcode == CEE_BRTRUE_S) ? GT_NE : GT_EQ;
+                    op1  = gtNewOperNode(oper, TYP_INT, op1, op2);
+                }
+
+            // fall through
+
+            COND_JUMP:
+
+                seenConditionalJump = true;
+
+                /* Fold comparison if we can */
+
+                op1 = gtFoldExpr(op1);
+
+                /* Try to fold the really simple cases like 'iconst *, ifne/ifeq'*/
+                /* Don't make any blocks unreachable in import only mode */
+
+                if ((op1->gtOper == GT_CNS_INT) && !compIsForImportOnly())
+                {
+                    /* gtFoldExpr() should prevent this as we don't want to make any blocks
+                       unreachable under compDbgCode */
+                    assert(!opts.compDbgCode);
+
+                    BBjumpKinds foldedJumpKind = (BBjumpKinds)(op1->gtIntCon.gtIconVal ? BBJ_ALWAYS : BBJ_NONE);
+                    assertImp((block->bbJumpKind == BBJ_COND)            // normal case
+                              || (block->bbJumpKind == foldedJumpKind)); // this can happen if we are reimporting the
+                                                                         // block for the second time
+
+                    block->bbJumpKind = foldedJumpKind;
+#ifdef DEBUG
+                    if (verbose)
+                    {
+                        if (op1->gtIntCon.gtIconVal)
+                        {
+                            printf("\nThe conditional jump becomes an unconditional jump to BB%02u\n",
+                                   block->bbJumpDest->bbNum);
+                        }
+                        else
+                        {
+                            printf("\nThe block falls through into the next BB%02u\n", block->bbNext->bbNum);
+                        }
+                    }
+#endif
+                    break;
+                }
+
+                op1 = gtNewOperNode(GT_JTRUE, TYP_VOID, op1);
+
+                /* GT_JTRUE is handled specially for non-empty stacks. See 'addStmt'
+                   in impImportBlock(block). For correct line numbers, spill stack. */
+
+                if (opts.compDbgCode && impCurStmtOffs != BAD_IL_OFFSET)
+                {
+                    impSpillStackEnsure(true);
+                }
+
+                goto SPILL_APPEND;
+
+            case CEE_CEQ:
+                oper = GT_EQ;
+                uns  = false;
+                goto CMP_2_OPs;
+            case CEE_CGT_UN:
+                oper = GT_GT;
+                uns  = true;
+                goto CMP_2_OPs;
+            case CEE_CGT:
+                oper = GT_GT;
+                uns  = false;
+                goto CMP_2_OPs;
+            case CEE_CLT_UN:
+                oper = GT_LT;
+                uns  = true;
+                goto CMP_2_OPs;
+            case CEE_CLT:
+                oper = GT_LT;
+                uns  = false;
+                goto CMP_2_OPs;
+
+            CMP_2_OPs:
+                if (tiVerificationNeeded)
+                {
+                    verVerifyCond(impStackTop(1).seTypeInfo, impStackTop().seTypeInfo, opcode);
+                    tiRetVal = typeInfo(TI_INT);
+                }
+
+                op2 = impPopStack().val;
+                op1 = impPopStack().val;
+
+#ifdef _TARGET_64BIT_
+                if (varTypeIsI(op1->TypeGet()) && (genActualType(op2->TypeGet()) == TYP_INT))
+                {
+                    op2 = gtNewCastNode(TYP_I_IMPL, op2, (var_types)(uns ? TYP_U_IMPL : TYP_I_IMPL));
+                }
+                else if (varTypeIsI(op2->TypeGet()) && (genActualType(op1->TypeGet()) == TYP_INT))
+                {
+                    op1 = gtNewCastNode(TYP_I_IMPL, op1, (var_types)(uns ? TYP_U_IMPL : TYP_I_IMPL));
+                }
+#endif // _TARGET_64BIT_
+
+                assertImp(genActualType(op1->TypeGet()) == genActualType(op2->TypeGet()) ||
+                          varTypeIsI(op1->TypeGet()) && varTypeIsI(op2->TypeGet()) ||
+                          varTypeIsFloating(op1->gtType) && varTypeIsFloating(op2->gtType));
+
+                /* Create the comparison node */
+
+                op1 = gtNewOperNode(oper, TYP_INT, op1, op2);
+
+                /* TODO: setting both flags when only one is appropriate */
+                if (opcode == CEE_CGT_UN || opcode == CEE_CLT_UN)
+                {
+                    op1->gtFlags |= GTF_RELOP_NAN_UN | GTF_UNSIGNED;
+                }
+
+                impPushOnStack(op1, tiRetVal);
+                break;
+
+            case CEE_BEQ_S:
+            case CEE_BEQ:
+                oper = GT_EQ;
+                goto CMP_2_OPs_AND_BR;
+
+            case CEE_BGE_S:
+            case CEE_BGE:
+                oper = GT_GE;
+                goto CMP_2_OPs_AND_BR;
+
+            case CEE_BGE_UN_S:
+            case CEE_BGE_UN:
+                oper = GT_GE;
+                goto CMP_2_OPs_AND_BR_UN;
+
+            case CEE_BGT_S:
+            case CEE_BGT:
+                oper = GT_GT;
+                goto CMP_2_OPs_AND_BR;
+
+            case CEE_BGT_UN_S:
+            case CEE_BGT_UN:
+                oper = GT_GT;
+                goto CMP_2_OPs_AND_BR_UN;
+
+            case CEE_BLE_S:
+            case CEE_BLE:
+                oper = GT_LE;
+                goto CMP_2_OPs_AND_BR;
+
+            case CEE_BLE_UN_S:
+            case CEE_BLE_UN:
+                oper = GT_LE;
+                goto CMP_2_OPs_AND_BR_UN;
+
+            case CEE_BLT_S:
+            case CEE_BLT:
+                oper = GT_LT;
+                goto CMP_2_OPs_AND_BR;
+
+            case CEE_BLT_UN_S:
+            case CEE_BLT_UN:
+                oper = GT_LT;
+                goto CMP_2_OPs_AND_BR_UN;
+
+            case CEE_BNE_UN_S:
+            case CEE_BNE_UN:
+                oper = GT_NE;
+                goto CMP_2_OPs_AND_BR_UN;
+
+            CMP_2_OPs_AND_BR_UN:
+                uns       = true;
+                unordered = true;
+                goto CMP_2_OPs_AND_BR_ALL;
+            CMP_2_OPs_AND_BR:
+                uns       = false;
+                unordered = false;
+                goto CMP_2_OPs_AND_BR_ALL;
+            CMP_2_OPs_AND_BR_ALL:
+
+                if (tiVerificationNeeded)
+                {
+                    verVerifyCond(impStackTop(1).seTypeInfo, impStackTop().seTypeInfo, opcode);
+                }
+
+                /* Pull two values */
+                op2 = impPopStack().val;
+                op1 = impPopStack().val;
+
+#ifdef _TARGET_64BIT_
+                if ((op1->TypeGet() == TYP_I_IMPL) && (genActualType(op2->TypeGet()) == TYP_INT))
+                {
+                    op2 = gtNewCastNode(TYP_I_IMPL, op2, (var_types)(uns ? TYP_U_IMPL : TYP_I_IMPL));
+                }
+                else if ((op2->TypeGet() == TYP_I_IMPL) && (genActualType(op1->TypeGet()) == TYP_INT))
+                {
+                    op1 = gtNewCastNode(TYP_I_IMPL, op1, (var_types)(uns ? TYP_U_IMPL : TYP_I_IMPL));
+                }
+#endif // _TARGET_64BIT_
+
+                assertImp(genActualType(op1->TypeGet()) == genActualType(op2->TypeGet()) ||
+                          varTypeIsI(op1->TypeGet()) && varTypeIsI(op2->TypeGet()) ||
+                          varTypeIsFloating(op1->gtType) && varTypeIsFloating(op2->gtType));
+
+                if (!opts.MinOpts() && !opts.compDbgCode && block->bbJumpDest == block->bbNext)
+                {
+                    block->bbJumpKind = BBJ_NONE;
+
+                    if (op1->gtFlags & GTF_GLOB_EFFECT)
+                    {
+                        impSpillSideEffects(false, (unsigned)CHECK_SPILL_ALL DEBUGARG(
+                                                       "Branch to next Optimization, op1 side effect"));
+                        impAppendTree(gtUnusedValNode(op1), (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
+                    }
+                    if (op2->gtFlags & GTF_GLOB_EFFECT)
+                    {
+                        impSpillSideEffects(false, (unsigned)CHECK_SPILL_ALL DEBUGARG(
+                                                       "Branch to next Optimization, op2 side effect"));
+                        impAppendTree(gtUnusedValNode(op2), (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
+                    }
+
+#ifdef DEBUG
+                    if ((op1->gtFlags | op2->gtFlags) & GTF_GLOB_EFFECT)
+                    {
+                        impNoteLastILoffs();
+                    }
+#endif
+                    break;
+                }
+#if !FEATURE_X87_DOUBLES
+                // We can generate an compare of different sized floating point op1 and op2
+                // We insert a cast
+                //
+                if (varTypeIsFloating(op1->TypeGet()))
+                {
+                    if (op1->TypeGet() != op2->TypeGet())
+                    {
+                        assert(varTypeIsFloating(op2->TypeGet()));
+
+                        // say op1=double, op2=float. To avoid loss of precision
+                        // while comparing, op2 is converted to double and double
+                        // comparison is done.
+                        if (op1->TypeGet() == TYP_DOUBLE)
+                        {
+                            // We insert a cast of op2 to TYP_DOUBLE
+                            op2 = gtNewCastNode(TYP_DOUBLE, op2, TYP_DOUBLE);
+                        }
+                        else if (op2->TypeGet() == TYP_DOUBLE)
+                        {
+                            // We insert a cast of op1 to TYP_DOUBLE
+                            op1 = gtNewCastNode(TYP_DOUBLE, op1, TYP_DOUBLE);
+                        }
+                    }
+                }
+#endif // !FEATURE_X87_DOUBLES
+
+                /* Create and append the operator */
+
+                op1 = gtNewOperNode(oper, TYP_INT, op1, op2);
+
+                if (uns)
+                {
+                    op1->gtFlags |= GTF_UNSIGNED;
+                }
+
+                if (unordered)
+                {
+                    op1->gtFlags |= GTF_RELOP_NAN_UN;
+                }
+
+                goto COND_JUMP;
+
+            case CEE_SWITCH:
+                assert(!compIsForInlining());
+
+                if (tiVerificationNeeded)
+                {
+                    Verify(impStackTop().seTypeInfo.IsType(TI_INT), "Bad switch val");
+                }
+                /* Pop the switch value off the stack */
+                op1 = impPopStack().val;
+                assertImp(genActualTypeIsIntOrI(op1->TypeGet()));
+
+#ifdef _TARGET_64BIT_
+                // Widen 'op1' on 64-bit targets
+                if (op1->TypeGet() != TYP_I_IMPL)
+                {
+                    if (op1->OperGet() == GT_CNS_INT)
+                    {
+                        op1->gtType = TYP_I_IMPL;
+                    }
+                    else
+                    {
+                        op1 = gtNewCastNode(TYP_I_IMPL, op1, TYP_I_IMPL);
+                    }
+                }
+#endif // _TARGET_64BIT_
+                assert(genActualType(op1->TypeGet()) == TYP_I_IMPL);
+
+                /* We can create a switch node */
+
+                op1 = gtNewOperNode(GT_SWITCH, TYP_VOID, op1);
+
+                val = (int)getU4LittleEndian(codeAddr);
+                codeAddr += 4 + val * 4; // skip over the switch-table
+
+                goto SPILL_APPEND;
+
+            /************************** Casting OPCODES ***************************/
+
+            case CEE_CONV_OVF_I1:
+                lclTyp = TYP_BYTE;
+                goto CONV_OVF;
+            case CEE_CONV_OVF_I2:
+                lclTyp = TYP_SHORT;
+                goto CONV_OVF;
+            case CEE_CONV_OVF_I:
+                lclTyp = TYP_I_IMPL;
+                goto CONV_OVF;
+            case CEE_CONV_OVF_I4:
+                lclTyp = TYP_INT;
+                goto CONV_OVF;
+            case CEE_CONV_OVF_I8:
+                lclTyp = TYP_LONG;
+                goto CONV_OVF;
+
+            case CEE_CONV_OVF_U1:
+                lclTyp = TYP_UBYTE;
+                goto CONV_OVF;
+            case CEE_CONV_OVF_U2:
+                lclTyp = TYP_CHAR;
+                goto CONV_OVF;
+            case CEE_CONV_OVF_U:
+                lclTyp = TYP_U_IMPL;
+                goto CONV_OVF;
+            case CEE_CONV_OVF_U4:
+                lclTyp = TYP_UINT;
+                goto CONV_OVF;
+            case CEE_CONV_OVF_U8:
+                lclTyp = TYP_ULONG;
+                goto CONV_OVF;
+
+            case CEE_CONV_OVF_I1_UN:
+                lclTyp = TYP_BYTE;
+                goto CONV_OVF_UN;
+            case CEE_CONV_OVF_I2_UN:
+                lclTyp = TYP_SHORT;
+                goto CONV_OVF_UN;
+            case CEE_CONV_OVF_I_UN:
+                lclTyp = TYP_I_IMPL;
+                goto CONV_OVF_UN;
+            case CEE_CONV_OVF_I4_UN:
+                lclTyp = TYP_INT;
+                goto CONV_OVF_UN;
+            case CEE_CONV_OVF_I8_UN:
+                lclTyp = TYP_LONG;
+                goto CONV_OVF_UN;
+
+            case CEE_CONV_OVF_U1_UN:
+                lclTyp = TYP_UBYTE;
+                goto CONV_OVF_UN;
+            case CEE_CONV_OVF_U2_UN:
+                lclTyp = TYP_CHAR;
+                goto CONV_OVF_UN;
+            case CEE_CONV_OVF_U_UN:
+                lclTyp = TYP_U_IMPL;
+                goto CONV_OVF_UN;
+            case CEE_CONV_OVF_U4_UN:
+                lclTyp = TYP_UINT;
+                goto CONV_OVF_UN;
+            case CEE_CONV_OVF_U8_UN:
+                lclTyp = TYP_ULONG;
+                goto CONV_OVF_UN;
+
+            CONV_OVF_UN:
+                uns = true;
+                goto CONV_OVF_COMMON;
+            CONV_OVF:
+                uns = false;
+                goto CONV_OVF_COMMON;
+
+            CONV_OVF_COMMON:
+                ovfl = true;
+                goto _CONV;
+
+            case CEE_CONV_I1:
+                lclTyp = TYP_BYTE;
+                goto CONV;
+            case CEE_CONV_I2:
+                lclTyp = TYP_SHORT;
+                goto CONV;
+            case CEE_CONV_I:
+                lclTyp = TYP_I_IMPL;
+                goto CONV;
+            case CEE_CONV_I4:
+                lclTyp = TYP_INT;
+                goto CONV;
+            case CEE_CONV_I8:
+                lclTyp = TYP_LONG;
+                goto CONV;
+
+            case CEE_CONV_U1:
+                lclTyp = TYP_UBYTE;
+                goto CONV;
+            case CEE_CONV_U2:
+                lclTyp = TYP_CHAR;
+                goto CONV;
+#if (REGSIZE_BYTES == 8)
+            case CEE_CONV_U:
+                lclTyp = TYP_U_IMPL;
+                goto CONV_UN;
+#else
+            case CEE_CONV_U:
+                lclTyp = TYP_U_IMPL;
+                goto CONV;
+#endif
+            case CEE_CONV_U4:
+                lclTyp = TYP_UINT;
+                goto CONV;
+            case CEE_CONV_U8:
+                lclTyp = TYP_ULONG;
+                goto CONV_UN;
+
+            case CEE_CONV_R4:
+                lclTyp = TYP_FLOAT;
+                goto CONV;
+            case CEE_CONV_R8:
+                lclTyp = TYP_DOUBLE;
+                goto CONV;
+
+            case CEE_CONV_R_UN:
+                lclTyp = TYP_DOUBLE;
+                goto CONV_UN;
+
+            CONV_UN:
+                uns  = true;
+                ovfl = false;
+                goto _CONV;
+
+            CONV:
+                uns  = false;
+                ovfl = false;
+                goto _CONV;
+
+            _CONV:
+                // just check that we have a number on the stack
+                if (tiVerificationNeeded)
+                {
+                    const typeInfo& tiVal = impStackTop().seTypeInfo;
+                    Verify(tiVal.IsNumberType(), "bad arg");
+
+#ifdef _TARGET_64BIT_
+                    bool isNative = false;
+
+                    switch (opcode)
+                    {
+                        case CEE_CONV_OVF_I:
+                        case CEE_CONV_OVF_I_UN:
+                        case CEE_CONV_I:
+                        case CEE_CONV_OVF_U:
+                        case CEE_CONV_OVF_U_UN:
+                        case CEE_CONV_U:
+                            isNative = true;
+                        default:
+                            // leave 'isNative' = false;
+                            break;
+                    }
+                    if (isNative)
+                    {
+                        tiRetVal = typeInfo::nativeInt();
+                    }
+                    else
+#endif // _TARGET_64BIT_
+                    {
+                        tiRetVal = typeInfo(lclTyp).NormaliseForStack();
+                    }
+                }
+
+                // only converts from FLOAT or DOUBLE to an integer type
+                // and converts from  ULONG (or LONG on ARM) to DOUBLE are morphed to calls
+
+                if (varTypeIsFloating(lclTyp))
+                {
+                    callNode = varTypeIsLong(impStackTop().val) || uns // uint->dbl gets turned into uint->long->dbl
+#ifdef _TARGET_64BIT_
+                               // TODO-ARM64-Bug?: This was AMD64; I enabled it for ARM64 also. OK?
+                               // TYP_BYREF could be used as TYP_I_IMPL which is long.
+                               // TODO-CQ: remove this when we lower casts long/ulong --> float/double
+                               // and generate SSE2 code instead of going through helper calls.
+                               || (impStackTop().val->TypeGet() == TYP_BYREF)
+#endif
+                        ;
+                }
+                else
+                {
+                    callNode = varTypeIsFloating(impStackTop().val->TypeGet());
+                }
+
+                // At this point uns, ovf, callNode all set
+
+                op1 = impPopStack().val;
+                impBashVarAddrsToI(op1);
+
+                if (varTypeIsSmall(lclTyp) && !ovfl && op1->gtType == TYP_INT && op1->gtOper == GT_AND)
+                {
+                    op2 = op1->gtOp.gtOp2;
+
+                    if (op2->gtOper == GT_CNS_INT)
+                    {
+                        ssize_t ival = op2->gtIntCon.gtIconVal;
+                        ssize_t mask, umask;
+
+                        switch (lclTyp)
+                        {
+                            case TYP_BYTE:
+                            case TYP_UBYTE:
+                                mask  = 0x00FF;
+                                umask = 0x007F;
+                                break;
+                            case TYP_CHAR:
+                            case TYP_SHORT:
+                                mask  = 0xFFFF;
+                                umask = 0x7FFF;
+                                break;
+
+                            default:
+                                assert(!"unexpected type");
+                                return;
+                        }
+
+                        if (((ival & umask) == ival) || ((ival & mask) == ival && uns))
+                        {
+                            /* Toss the cast, it's a waste of time */
+
+                            impPushOnStack(op1, tiRetVal);
+                            break;
+                        }
+                        else if (ival == mask)
+                        {
+                            /* Toss the masking, it's a waste of time, since
+                               we sign-extend from the small value anyways */
+
+                            op1 = op1->gtOp.gtOp1;
+                        }
+                    }
+                }
+
+                /*  The 'op2' sub-operand of a cast is the 'real' type number,
+                    since the result of a cast to one of the 'small' integer
+                    types is an integer.
+                 */
+
+                type = genActualType(lclTyp);
+
+#if SMALL_TREE_NODES
+                if (callNode)
+                {
+                    op1 = gtNewCastNodeL(type, op1, lclTyp);
+                }
+                else
+#endif // SMALL_TREE_NODES
+                {
+                    op1 = gtNewCastNode(type, op1, lclTyp);
+                }
+
+                if (ovfl)
+                {
+                    op1->gtFlags |= (GTF_OVERFLOW | GTF_EXCEPT);
+                }
+                if (uns)
+                {
+                    op1->gtFlags |= GTF_UNSIGNED;
+                }
+                impPushOnStack(op1, tiRetVal);
+                break;
+
+            case CEE_NEG:
+                if (tiVerificationNeeded)
+                {
+                    tiRetVal = impStackTop().seTypeInfo;
+                    Verify(tiRetVal.IsNumberType(), "Bad arg");
+                }
+
+                op1 = impPopStack().val;
+                impBashVarAddrsToI(op1, nullptr);
+                impPushOnStack(gtNewOperNode(GT_NEG, genActualType(op1->gtType), op1), tiRetVal);
+                break;
+
+            case CEE_POP:
+                if (tiVerificationNeeded)
+                {
+                    impStackTop(0);
+                }
+
+                /* Pull the top value from the stack */
+
+                op1 = impPopStack(clsHnd).val;
+
+                /* Get hold of the type of the value being duplicated */
+
+                lclTyp = genActualType(op1->gtType);
+
+                /* Does the value have any side effects? */
+
+                if ((op1->gtFlags & GTF_SIDE_EFFECT) || opts.compDbgCode)
+                {
+                    // Since we are throwing away the value, just normalize
+                    // it to its address.  This is more efficient.
+
+                    if (varTypeIsStruct(op1))
+                    {
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+                        // Non-calls, such as obj or ret_expr, have to go through this.
+                        // Calls with large struct return value have to go through this.
+                        // Helper calls with small struct return value also have to go
+                        // through this since they do not follow Unix calling convention.
+                        if (op1->gtOper != GT_CALL || !IsMultiRegReturnedType(clsHnd) ||
+                            op1->AsCall()->gtCallType == CT_HELPER)
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+                        {
+                            op1 = impGetStructAddr(op1, clsHnd, (unsigned)CHECK_SPILL_ALL, false);
+                        }
+                    }
+
+                    // If op1 is non-overflow cast, throw it away since it is useless.
+                    // Another reason for throwing away the useless cast is in the context of
+                    // implicit tail calls when the operand of pop is GT_CAST(GT_CALL(..)).
+                    // The cast gets added as part of importing GT_CALL, which gets in the way
+                    // of fgMorphCall() on the forms of tail call nodes that we assert.
+                    if ((op1->gtOper == GT_CAST) && !op1->gtOverflow())
+                    {
+                        op1 = op1->gtOp.gtOp1;
+                    }
+
+                    // If 'op1' is an expression, create an assignment node.
+                    // Helps analyses (like CSE) to work fine.
+
+                    if (op1->gtOper != GT_CALL)
+                    {
+                        op1 = gtUnusedValNode(op1);
+                    }
+
+                    /* Append the value to the tree list */
+                    goto SPILL_APPEND;
+                }
+
+                /* No side effects - just throw the <BEEP> thing away */
+                break;
+
+            case CEE_DUP:
+
+                if (tiVerificationNeeded)
+                {
+                    // Dup could start the begining of delegate creation sequence, remember that
+                    delegateCreateStart = codeAddr - 1;
+                    impStackTop(0);
+                }
+
+                // Convert a (dup, stloc) sequence into a (stloc, ldloc) sequence in the following cases:
+                // - If this is non-debug code - so that CSE will recognize the two as equal.
+                //   This helps eliminate a redundant bounds check in cases such as:
+                //       ariba[i+3] += some_value;
+                // - If the top of the stack is a non-leaf that may be expensive to clone.
+
+                if (codeAddr < codeEndp)
+                {
+                    OPCODE nextOpcode = (OPCODE)getU1LittleEndian(codeAddr);
+                    if (impIsAnySTLOC(nextOpcode))
+                    {
+                        if (!opts.compDbgCode)
+                        {
+                            insertLdloc = true;
+                            break;
+                        }
+                        GenTree* stackTop = impStackTop().val;
+                        if (!stackTop->IsIntegralConst(0) && !stackTop->IsFPZero() && !stackTop->IsLocal())
+                        {
+                            insertLdloc = true;
+                            break;
+                        }
+                    }
+                }
+
+                /* Pull the top value from the stack */
+                op1 = impPopStack(tiRetVal);
+
+                /* Clone the value */
+                op1 = impCloneExpr(op1, &op2, tiRetVal.GetClassHandle(), (unsigned)CHECK_SPILL_ALL,
+                                   nullptr DEBUGARG("DUP instruction"));
+
+                /* Either the tree started with no global effects, or impCloneExpr
+                   evaluated the tree to a temp and returned two copies of that
+                   temp. Either way, neither op1 nor op2 should have side effects.
+                */
+                assert(!(op1->gtFlags & GTF_GLOB_EFFECT) && !(op2->gtFlags & GTF_GLOB_EFFECT));
+
+                /* Push the tree/temp back on the stack */
+                impPushOnStack(op1, tiRetVal);
+
+                /* Push the copy on the stack */
+                impPushOnStack(op2, tiRetVal);
+
+                break;
+
+            case CEE_STIND_I1:
+                lclTyp = TYP_BYTE;
+                goto STIND;
+            case CEE_STIND_I2:
+                lclTyp = TYP_SHORT;
+                goto STIND;
+            case CEE_STIND_I4:
+                lclTyp = TYP_INT;
+                goto STIND;
+            case CEE_STIND_I8:
+                lclTyp = TYP_LONG;
+                goto STIND;
+            case CEE_STIND_I:
+                lclTyp = TYP_I_IMPL;
+                goto STIND;
+            case CEE_STIND_REF:
+                lclTyp = TYP_REF;
+                goto STIND;
+            case CEE_STIND_R4:
+                lclTyp = TYP_FLOAT;
+                goto STIND;
+            case CEE_STIND_R8:
+                lclTyp = TYP_DOUBLE;
+                goto STIND;
+            STIND:
+
+                if (tiVerificationNeeded)
+                {
+                    typeInfo instrType(lclTyp);
+#ifdef _TARGET_64BIT_
+                    if (opcode == CEE_STIND_I)
+                    {
+                        instrType = typeInfo::nativeInt();
+                    }
+#endif // _TARGET_64BIT_
+                    verVerifySTIND(impStackTop(1).seTypeInfo, impStackTop(0).seTypeInfo, instrType);
+                }
+                else
+                {
+                    compUnsafeCastUsed = true; // Have to go conservative
+                }
+
+            STIND_POST_VERIFY:
+
+                op2 = impPopStack().val; // value to store
+                op1 = impPopStack().val; // address to store to
+
+                // you can indirect off of a TYP_I_IMPL (if we are in C) or a BYREF
+                assertImp(genActualType(op1->gtType) == TYP_I_IMPL || op1->gtType == TYP_BYREF);
+
+                impBashVarAddrsToI(op1, op2);
+
+                op2 = impImplicitR4orR8Cast(op2, lclTyp);
+
+#ifdef _TARGET_64BIT_
+                // Automatic upcast for a GT_CNS_INT into TYP_I_IMPL
+                if ((op2->OperGet() == GT_CNS_INT) && varTypeIsI(lclTyp) && !varTypeIsI(op2->gtType))
+                {
+                    op2->gtType = TYP_I_IMPL;
+                }
+                else
+                {
+                    // Allow a downcast of op2 from TYP_I_IMPL into a 32-bit Int for x86 JIT compatiblity
+                    //
+                    if (varTypeIsI(op2->gtType) && (genActualType(lclTyp) == TYP_INT))
+                    {
+                        assert(!tiVerificationNeeded); // We should have thrown the VerificationException before.
+                        op2 = gtNewCastNode(TYP_INT, op2, TYP_INT);
+                    }
+                    // Allow an upcast of op2 from a 32-bit Int into TYP_I_IMPL for x86 JIT compatiblity
+                    //
+                    if (varTypeIsI(lclTyp) && (genActualType(op2->gtType) == TYP_INT))
+                    {
+                        assert(!tiVerificationNeeded); // We should have thrown the VerificationException before.
+                        op2 = gtNewCastNode(TYP_I_IMPL, op2, TYP_I_IMPL);
+                    }
+                }
+#endif // _TARGET_64BIT_
+
+                if (opcode == CEE_STIND_REF)
+                {
+                    // STIND_REF can be used to store TYP_INT, TYP_I_IMPL, TYP_REF, or TYP_BYREF
+                    assertImp(varTypeIsIntOrI(op2->gtType) || varTypeIsGC(op2->gtType));
+                    lclTyp = genActualType(op2->TypeGet());
+                }
+
+// Check target type.
+#ifdef DEBUG
+                if (op2->gtType == TYP_BYREF || lclTyp == TYP_BYREF)
+                {
+                    if (op2->gtType == TYP_BYREF)
+                    {
+                        assertImp(lclTyp == TYP_BYREF || lclTyp == TYP_I_IMPL);
+                    }
+                    else if (lclTyp == TYP_BYREF)
+                    {
+                        assertImp(op2->gtType == TYP_BYREF || varTypeIsIntOrI(op2->gtType));
+                    }
+                }
+                else
+                {
+                    assertImp(genActualType(op2->gtType) == genActualType(lclTyp) ||
+                              ((lclTyp == TYP_I_IMPL) && (genActualType(op2->gtType) == TYP_INT)) ||
+                              (varTypeIsFloating(op2->gtType) && varTypeIsFloating(lclTyp)));
+                }
+#endif
+
+                op1 = gtNewOperNode(GT_IND, lclTyp, op1);
+
+                // stind could point anywhere, example a boxed class static int
+                op1->gtFlags |= GTF_IND_TGTANYWHERE;
+
+                if (prefixFlags & PREFIX_VOLATILE)
+                {
+                    assert(op1->OperGet() == GT_IND);
+                    op1->gtFlags |= GTF_DONT_CSE;      // Can't CSE a volatile
+                    op1->gtFlags |= GTF_ORDER_SIDEEFF; // Prevent this from being reordered
+                    op1->gtFlags |= GTF_IND_VOLATILE;
+                }
+
+                if (prefixFlags & PREFIX_UNALIGNED)
+                {
+                    assert(op1->OperGet() == GT_IND);
+                    op1->gtFlags |= GTF_IND_UNALIGNED;
+                }
+
+                op1 = gtNewAssignNode(op1, op2);
+                op1->gtFlags |= GTF_EXCEPT | GTF_GLOB_REF;
+
+                // Spill side-effects AND global-data-accesses
+                if (verCurrentState.esStackDepth > 0)
+                {
+                    impSpillSideEffects(true, (unsigned)CHECK_SPILL_ALL DEBUGARG("spill side effects before STIND"));
+                }
+
+                goto APPEND;
+
+            case CEE_LDIND_I1:
+                lclTyp = TYP_BYTE;
+                goto LDIND;
+            case CEE_LDIND_I2:
+                lclTyp = TYP_SHORT;
+                goto LDIND;
+            case CEE_LDIND_U4:
+            case CEE_LDIND_I4:
+                lclTyp = TYP_INT;
+                goto LDIND;
+            case CEE_LDIND_I8:
+                lclTyp = TYP_LONG;
+                goto LDIND;
+            case CEE_LDIND_REF:
+                lclTyp = TYP_REF;
+                goto LDIND;
+            case CEE_LDIND_I:
+                lclTyp = TYP_I_IMPL;
+                goto LDIND;
+            case CEE_LDIND_R4:
+                lclTyp = TYP_FLOAT;
+                goto LDIND;
+            case CEE_LDIND_R8:
+                lclTyp = TYP_DOUBLE;
+                goto LDIND;
+            case CEE_LDIND_U1:
+                lclTyp = TYP_UBYTE;
+                goto LDIND;
+            case CEE_LDIND_U2:
+                lclTyp = TYP_CHAR;
+                goto LDIND;
+            LDIND:
+
+                if (tiVerificationNeeded)
+                {
+                    typeInfo lclTiType(lclTyp);
+#ifdef _TARGET_64BIT_
+                    if (opcode == CEE_LDIND_I)
+                    {
+                        lclTiType = typeInfo::nativeInt();
+                    }
+#endif // _TARGET_64BIT_
+                    tiRetVal = verVerifyLDIND(impStackTop().seTypeInfo, lclTiType);
+                    tiRetVal.NormaliseForStack();
+                }
+                else
+                {
+                    compUnsafeCastUsed = true; // Have to go conservative
+                }
+
+            LDIND_POST_VERIFY:
+
+                op1 = impPopStack().val; // address to load from
+                impBashVarAddrsToI(op1);
+
+#ifdef _TARGET_64BIT_
+                // Allow an upcast of op1 from a 32-bit Int into TYP_I_IMPL for x86 JIT compatiblity
+                //
+                if (genActualType(op1->gtType) == TYP_INT)
+                {
+                    assert(!tiVerificationNeeded); // We should have thrown the VerificationException before.
+                    op1 = gtNewCastNode(TYP_I_IMPL, op1, TYP_I_IMPL);
+                }
+#endif
+
+                assertImp(genActualType(op1->gtType) == TYP_I_IMPL || op1->gtType == TYP_BYREF);
+
+                op1 = gtNewOperNode(GT_IND, lclTyp, op1);
+
+                // ldind could point anywhere, example a boxed class static int
+                op1->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
+
+                if (prefixFlags & PREFIX_VOLATILE)
+                {
+                    assert(op1->OperGet() == GT_IND);
+                    op1->gtFlags |= GTF_DONT_CSE;      // Can't CSE a volatile
+                    op1->gtFlags |= GTF_ORDER_SIDEEFF; // Prevent this from being reordered
+                    op1->gtFlags |= GTF_IND_VOLATILE;
+                }
+
+                if (prefixFlags & PREFIX_UNALIGNED)
+                {
+                    assert(op1->OperGet() == GT_IND);
+                    op1->gtFlags |= GTF_IND_UNALIGNED;
+                }
+
+                impPushOnStack(op1, tiRetVal);
+
+                break;
+
+            case CEE_UNALIGNED:
+
+                assert(sz == 1);
+                val = getU1LittleEndian(codeAddr);
+                ++codeAddr;
+                JITDUMP(" %u", val);
+                if ((val != 1) && (val != 2) && (val != 4))
+                {
+                    BADCODE("Alignment unaligned. must be 1, 2, or 4");
+                }
+
+                Verify(!(prefixFlags & PREFIX_UNALIGNED), "Multiple unaligned. prefixes");
+                prefixFlags |= PREFIX_UNALIGNED;
+
+                impValidateMemoryAccessOpcode(codeAddr, codeEndp, false);
+
+            PREFIX:
+                opcode = (OPCODE)getU1LittleEndian(codeAddr);
+                codeAddr += sizeof(__int8);
+                opcodeOffs = (IL_OFFSET)(codeAddr - info.compCode);
+                goto DECODE_OPCODE;
+
+            case CEE_VOLATILE:
+
+                Verify(!(prefixFlags & PREFIX_VOLATILE), "Multiple volatile. prefixes");
+                prefixFlags |= PREFIX_VOLATILE;
+
+                impValidateMemoryAccessOpcode(codeAddr, codeEndp, true);
+
+                assert(sz == 0);
+                goto PREFIX;
+
+            case CEE_LDFTN:
+            {
+                // Need to do a lookup here so that we perform an access check
+                // and do a NOWAY if protections are violated
+                _impResolveToken(CORINFO_TOKENKIND_Method);
+
+                JITDUMP(" %08X", resolvedToken.token);
+
+                eeGetCallInfo(&resolvedToken, nullptr /* constraint typeRef*/,
+                              addVerifyFlag(combine(CORINFO_CALLINFO_SECURITYCHECKS, CORINFO_CALLINFO_LDFTN)),
+                              &callInfo);
+
+                // This check really only applies to intrinsic Array.Address methods
+                if (callInfo.sig.callConv & CORINFO_CALLCONV_PARAMTYPE)
+                {
+                    NO_WAY("Currently do not support LDFTN of Parameterized functions");
+                }
+
+                // Do this before DO_LDFTN since CEE_LDVIRTFN does it on its own.
+                impHandleAccessAllowed(callInfo.accessAllowed, &callInfo.callsiteCalloutHelper);
+
+                if (tiVerificationNeeded)
+                {
+                    // LDFTN could start the begining of delegate creation sequence, remember that
+                    delegateCreateStart = codeAddr - 2;
+
+                    // check any constraints on the callee's class and type parameters
+                    VerifyOrReturn(info.compCompHnd->satisfiesClassConstraints(resolvedToken.hClass),
+                                   "method has unsatisfied class constraints");
+                    VerifyOrReturn(info.compCompHnd->satisfiesMethodConstraints(resolvedToken.hClass,
+                                                                                resolvedToken.hMethod),
+                                   "method has unsatisfied method constraints");
+
+                    mflags = callInfo.verMethodFlags;
+                    Verify(!(mflags & CORINFO_FLG_CONSTRUCTOR), "LDFTN on a constructor");
+                }
+
+            DO_LDFTN:
+                op1 = impMethodPointer(&resolvedToken, &callInfo);
+                if (compDonotInline())
+                {
+                    return;
+                }
+
+                impPushOnStack(op1, typeInfo(resolvedToken.hMethod));
+
+                break;
+            }
+
+            case CEE_LDVIRTFTN:
+            {
+                /* Get the method token */
+
+                _impResolveToken(CORINFO_TOKENKIND_Method);
+
+                JITDUMP(" %08X", resolvedToken.token);
+
+                eeGetCallInfo(&resolvedToken, nullptr /* constraint typeRef */,
+                              addVerifyFlag(combine(combine(CORINFO_CALLINFO_SECURITYCHECKS, CORINFO_CALLINFO_LDFTN),
+                                                    CORINFO_CALLINFO_CALLVIRT)),
+                              &callInfo);
+
+                // This check really only applies to intrinsic Array.Address methods
+                if (callInfo.sig.callConv & CORINFO_CALLCONV_PARAMTYPE)
+                {
+                    NO_WAY("Currently do not support LDFTN of Parameterized functions");
+                }
+
+                mflags = callInfo.methodFlags;
+
+                impHandleAccessAllowed(callInfo.accessAllowed, &callInfo.callsiteCalloutHelper);
+
+                if (compIsForInlining())
+                {
+                    if (mflags & (CORINFO_FLG_FINAL | CORINFO_FLG_STATIC) || !(mflags & CORINFO_FLG_VIRTUAL))
+                    {
+                        compInlineResult->NoteFatal(InlineObservation::CALLSITE_LDVIRTFN_ON_NON_VIRTUAL);
+                        return;
+                    }
+                }
+
+                CORINFO_SIG_INFO& ftnSig = callInfo.sig;
+
+                if (tiVerificationNeeded)
+                {
+
+                    Verify(ftnSig.hasThis(), "ldvirtftn on a static method");
+                    Verify(!(mflags & CORINFO_FLG_CONSTRUCTOR), "LDVIRTFTN on a constructor");
+
+                    // JIT32 verifier rejects verifiable ldvirtftn pattern
+                    typeInfo declType =
+                        verMakeTypeInfo(resolvedToken.hClass, true); // Change TI_STRUCT to TI_REF when necessary
+
+                    typeInfo arg = impStackTop().seTypeInfo;
+                    Verify((arg.IsType(TI_REF) || arg.IsType(TI_NULL)) && tiCompatibleWith(arg, declType, true),
+                           "bad ldvirtftn");
+
+                    CORINFO_CLASS_HANDLE instanceClassHnd = info.compClassHnd;
+                    if (!(arg.IsType(TI_NULL) || (mflags & CORINFO_FLG_STATIC)))
+                    {
+                        instanceClassHnd = arg.GetClassHandleForObjRef();
+                    }
+
+                    // check any constraints on the method's class and type parameters
+                    VerifyOrReturn(info.compCompHnd->satisfiesClassConstraints(resolvedToken.hClass),
+                                   "method has unsatisfied class constraints");
+                    VerifyOrReturn(info.compCompHnd->satisfiesMethodConstraints(resolvedToken.hClass,
+                                                                                resolvedToken.hMethod),
+                                   "method has unsatisfied method constraints");
+
+                    if (mflags & CORINFO_FLG_PROTECTED)
+                    {
+                        Verify(info.compCompHnd->canAccessFamily(info.compMethodHnd, instanceClassHnd),
+                               "Accessing protected method through wrong type.");
+                    }
+                }
+
+                /* Get the object-ref */
+                op1 = impPopStack().val;
+                assertImp(op1->gtType == TYP_REF);
+
+                if (opts.IsReadyToRun())
+                {
+                    if (callInfo.kind != CORINFO_VIRTUALCALL_LDVIRTFTN)
+                    {
+                        if (op1->gtFlags & GTF_SIDE_EFFECT)
+                        {
+                            op1 = gtUnusedValNode(op1);
+                            impAppendTree(op1, (unsigned)CHECK_SPILL_ALL, impCurStmtOffs);
+                        }
+                        goto DO_LDFTN;
+                    }
+                }
+                else if (mflags & (CORINFO_FLG_FINAL | CORINFO_FLG_STATIC) || !(mflags & CORINFO_FLG_VIRTUAL))
+                {
+                    if (op1->gtFlags & GTF_SIDE_EFFECT)
+                    {
+                        op1 = gtUnusedValNode(op1);
+                        impAppendTree(op1, (unsigned)CHECK_SPILL_ALL, impCurStmtOffs);
+                    }
+                    goto DO_LDFTN;
+                }
+
+                GenTreePtr fptr = impImportLdvirtftn(op1, &resolvedToken, &callInfo);
+                if (compDonotInline())
+                {
+                    return;
+                }
+
+                impPushOnStack(fptr, typeInfo(resolvedToken.hMethod));
+
+                break;
+            }
+
+            case CEE_CONSTRAINED:
+
+                assertImp(sz == sizeof(unsigned));
+                impResolveToken(codeAddr, &constrainedResolvedToken, CORINFO_TOKENKIND_Constrained);
+                codeAddr += sizeof(unsigned); // prefix instructions must increment codeAddr manually
+                JITDUMP(" (%08X) ", constrainedResolvedToken.token);
+
+                Verify(!(prefixFlags & PREFIX_CONSTRAINED), "Multiple constrained. prefixes");
+                prefixFlags |= PREFIX_CONSTRAINED;
+
+                {
+                    OPCODE actualOpcode = impGetNonPrefixOpcode(codeAddr, codeEndp);
+                    if (actualOpcode != CEE_CALLVIRT)
+                    {
+                        BADCODE("constrained. has to be followed by callvirt");
+                    }
+                }
+
+                goto PREFIX;
+
+            case CEE_READONLY:
+                JITDUMP(" readonly.");
+
+                Verify(!(prefixFlags & PREFIX_READONLY), "Multiple readonly. prefixes");
+                prefixFlags |= PREFIX_READONLY;
+
+                {
+                    OPCODE actualOpcode = impGetNonPrefixOpcode(codeAddr, codeEndp);
+                    if (actualOpcode != CEE_LDELEMA && !impOpcodeIsCallOpcode(actualOpcode))
+                    {
+                        BADCODE("readonly. has to be followed by ldelema or call");
+                    }
+                }
+
+                assert(sz == 0);
+                goto PREFIX;
+
+            case CEE_TAILCALL:
+                JITDUMP(" tail.");
+
+                Verify(!(prefixFlags & PREFIX_TAILCALL_EXPLICIT), "Multiple tailcall. prefixes");
+                prefixFlags |= PREFIX_TAILCALL_EXPLICIT;
+
+                {
+                    OPCODE actualOpcode = impGetNonPrefixOpcode(codeAddr, codeEndp);
+                    if (!impOpcodeIsCallOpcode(actualOpcode))
+                    {
+                        BADCODE("tailcall. has to be followed by call, callvirt or calli");
+                    }
+                }
+                assert(sz == 0);
+                goto PREFIX;
+
+            case CEE_NEWOBJ:
+
+                /* Since we will implicitly insert newObjThisPtr at the start of the
+                   argument list, spill any GTF_ORDER_SIDEEFF */
+                impSpillSpecialSideEff();
+
+                /* NEWOBJ does not respond to TAIL */
+                prefixFlags &= ~PREFIX_TAILCALL_EXPLICIT;
+
+                /* NEWOBJ does not respond to CONSTRAINED */
+                prefixFlags &= ~PREFIX_CONSTRAINED;
+
+#if COR_JIT_EE_VERSION > 460
+                _impResolveToken(CORINFO_TOKENKIND_NewObj);
+#else
+                _impResolveToken(CORINFO_TOKENKIND_Method);
+#endif
+
+                eeGetCallInfo(&resolvedToken, nullptr /* constraint typeRef*/,
+                              addVerifyFlag(combine(CORINFO_CALLINFO_SECURITYCHECKS, CORINFO_CALLINFO_ALLOWINSTPARAM)),
+                              &callInfo);
+
+                if (compIsForInlining())
+                {
+                    if (impInlineInfo->inlineCandidateInfo->dwRestrictions & INLINE_RESPECT_BOUNDARY)
+                    {
+                        // Check to see if this call violates the boundary.
+                        compInlineResult->NoteFatal(InlineObservation::CALLSITE_CROSS_BOUNDARY_SECURITY);
+                        return;
+                    }
+                }
+
+                mflags = callInfo.methodFlags;
+
+                if ((mflags & (CORINFO_FLG_STATIC | CORINFO_FLG_ABSTRACT)) != 0)
+                {
+                    BADCODE("newobj on static or abstract method");
+                }
+
+                // Insert the security callout before any actual code is generated
+                impHandleAccessAllowed(callInfo.accessAllowed, &callInfo.callsiteCalloutHelper);
+
+                // There are three different cases for new
+                // Object size is variable (depends on arguments)
+                //      1) Object is an array (arrays treated specially by the EE)
+                //      2) Object is some other variable sized object (e.g. String)
+                //      3) Class Size can be determined beforehand (normal case)
+                // In the first case, we need to call a NEWOBJ helper (multinewarray)
+                // in the second case we call the constructor with a '0' this pointer
+                // In the third case we alloc the memory, then call the constuctor
+
+                clsFlags = callInfo.classFlags;
+                if (clsFlags & CORINFO_FLG_ARRAY)
+                {
+                    if (tiVerificationNeeded)
+                    {
+                        CORINFO_CLASS_HANDLE elemTypeHnd;
+                        INDEBUG(CorInfoType corType =)
+                        info.compCompHnd->getChildType(resolvedToken.hClass, &elemTypeHnd);
+                        assert(!(elemTypeHnd == nullptr && corType == CORINFO_TYPE_VALUECLASS));
+                        Verify(elemTypeHnd == nullptr ||
+                                   !(info.compCompHnd->getClassAttribs(elemTypeHnd) & CORINFO_FLG_CONTAINS_STACK_PTR),
+                               "newarr of byref-like objects");
+                        verVerifyCall(opcode, &resolvedToken, nullptr, ((prefixFlags & PREFIX_TAILCALL_EXPLICIT) != 0),
+                                      ((prefixFlags & PREFIX_READONLY) != 0), delegateCreateStart, codeAddr - 1,
+                                      &callInfo DEBUGARG(info.compFullName));
+                    }
+                    // Arrays need to call the NEWOBJ helper.
+                    assertImp(clsFlags & CORINFO_FLG_VAROBJSIZE);
+
+                    impImportNewObjArray(&resolvedToken, &callInfo);
+                    if (compDonotInline())
+                    {
+                        return;
+                    }
+
+                    callTyp = TYP_REF;
+                    break;
+                }
+                // At present this can only be String
+                else if (clsFlags & CORINFO_FLG_VAROBJSIZE)
+                {
+#if COR_JIT_EE_VERSION > 460
+                    if (eeGetEEInfo()->targetAbi == CORINFO_CORERT_ABI)
+                    {
+                        // The dummy argument does not exist in CoreRT
+                        newObjThisPtr = nullptr;
+                    }
+                    else
+#endif
+                    {
+                        // This is the case for variable-sized objects that are not
+                        // arrays.  In this case, call the constructor with a null 'this'
+                        // pointer
+                        newObjThisPtr = gtNewIconNode(0, TYP_REF);
+                    }
+
+                    /* Remember that this basic block contains 'new' of an object */
+                    block->bbFlags |= BBF_HAS_NEWOBJ;
+                    optMethodFlags |= OMF_HAS_NEWOBJ;
+                }
+                else
+                {
+                    // This is the normal case where the size of the object is
+                    // fixed.  Allocate the memory and call the constructor.
+
+                    // Note: We cannot add a peep to avoid use of temp here
+                    // becase we don't have enough interference info to detect when
+                    // sources and destination interfere, example: s = new S(ref);
+
+                    // TODO: We find the correct place to introduce a general
+                    // reverse copy prop for struct return values from newobj or
+                    // any function returning structs.
+
+                    /* get a temporary for the new object */
+                    lclNum = lvaGrabTemp(true DEBUGARG("NewObj constructor temp"));
+
+                    // In the value class case we only need clsHnd for size calcs.
+                    //
+                    // The lookup of the code pointer will be handled by CALL in this case
+                    if (clsFlags & CORINFO_FLG_VALUECLASS)
+                    {
+                        CorInfoType jitTyp = info.compCompHnd->asCorInfoType(resolvedToken.hClass);
+                        unsigned    size   = info.compCompHnd->getClassSize(resolvedToken.hClass);
+
+                        if (impIsPrimitive(jitTyp))
+                        {
+                            lvaTable[lclNum].lvType = JITtype2varType(jitTyp);
+                        }
+                        else
+                        {
+                            // The local variable itself is the allocated space.
+                            // Here we need unsafe value cls check, since the address of struct is taken for further use
+                            // and potentially exploitable.
+                            lvaSetStruct(lclNum, resolvedToken.hClass, true /* unsafe value cls check */);
+                        }
+
+                        // Append a tree to zero-out the temp
+                        newObjThisPtr = gtNewLclvNode(lclNum, lvaTable[lclNum].TypeGet());
+
+                        newObjThisPtr = gtNewBlkOpNode(newObjThisPtr,    // Dest
+                                                       gtNewIconNode(0), // Value
+                                                       size,             // Size
+                                                       false,            // isVolatile
+                                                       false);           // not copyBlock
+                        impAppendTree(newObjThisPtr, (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
+
+                        // Obtain the address of the temp
+                        newObjThisPtr =
+                            gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(lclNum, lvaTable[lclNum].TypeGet()));
+                    }
+                    else
+                    {
+#ifdef FEATURE_READYTORUN_COMPILER
+                        if (opts.IsReadyToRun())
+                        {
+                            op1 = impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_NEW, TYP_REF);
+                            usingReadyToRunHelper = (op1 != NULL);
+                        }
+
+                        if (!usingReadyToRunHelper)
+#endif
+                        {
+                            op1 = impParentClassTokenToHandle(&resolvedToken, nullptr, TRUE);
+                            if (op1 == nullptr)
+                            { // compDonotInline()
+                                return;
+                            }
+
+                            // TODO: ReadyToRun: When generic dictionary lookups are necessary, replace the lookup call
+                            // and the newfast call with a single call to a dynamic R2R cell that will:
+                            //      1) Load the context
+                            //      2) Perform the generic dictionary lookup and caching, and generate the appropriate
+                            //      stub
+                            //      3) Allocate and return the new object
+                            // Reason: performance (today, we'll always use the slow helper for the R2R generics case)
+
+                            op1 = gtNewAllocObjNode(info.compCompHnd->getNewHelper(&resolvedToken, info.compMethodHnd),
+                                                    resolvedToken.hClass, TYP_REF, op1);
+                        }
+
+                        // Remember that this basic block contains 'new' of an object
+                        block->bbFlags |= BBF_HAS_NEWOBJ;
+                        optMethodFlags |= OMF_HAS_NEWOBJ;
+
+                        // Append the assignment to the temp/local. Dont need to spill
+                        // at all as we are just calling an EE-Jit helper which can only
+                        // cause an (async) OutOfMemoryException.
+
+                        // We assign the newly allocated object (by a GT_ALLOCOBJ node)
+                        // to a temp. Note that the pattern "temp = allocObj" is required
+                        // by ObjectAllocator phase to be able to determine GT_ALLOCOBJ nodes
+                        // without exhaustive walk over all expressions.
+
+                        impAssignTempGen(lclNum, op1, (unsigned)CHECK_SPILL_NONE);
+
+                        newObjThisPtr = gtNewLclvNode(lclNum, TYP_REF);
+                    }
+                }
+                goto CALL;
+
+            case CEE_CALLI:
+
+                /* CALLI does not respond to CONSTRAINED */
+                prefixFlags &= ~PREFIX_CONSTRAINED;
+
+                if (compIsForInlining())
+                {
+                    // CALLI doesn't have a method handle, so assume the worst.
+                    if (impInlineInfo->inlineCandidateInfo->dwRestrictions & INLINE_RESPECT_BOUNDARY)
+                    {
+                        compInlineResult->NoteFatal(InlineObservation::CALLSITE_CROSS_BOUNDARY_CALLI);
+                        return;
+                    }
+                }
+
+            // fall through
+
+            case CEE_CALLVIRT:
+            case CEE_CALL:
+
+                // We can't call getCallInfo on the token from a CALLI, but we need it in
+                // many other places.  We unfortunately embed that knowledge here.
+                if (opcode != CEE_CALLI)
+                {
+                    _impResolveToken(CORINFO_TOKENKIND_Method);
+
+                    eeGetCallInfo(&resolvedToken,
+                                  (prefixFlags & PREFIX_CONSTRAINED) ? &constrainedResolvedToken : nullptr,
+                                  // this is how impImportCall invokes getCallInfo
+                                  addVerifyFlag(
+                                      combine(combine(CORINFO_CALLINFO_ALLOWINSTPARAM, CORINFO_CALLINFO_SECURITYCHECKS),
+                                              (opcode == CEE_CALLVIRT) ? CORINFO_CALLINFO_CALLVIRT
+                                                                       : CORINFO_CALLINFO_NONE)),
+                                  &callInfo);
+                }
+                else
+                {
+                    // Suppress uninitialized use warning.
+                    memset(&resolvedToken, 0, sizeof(resolvedToken));
+                    memset(&callInfo, 0, sizeof(callInfo));
+
+                    resolvedToken.token = getU4LittleEndian(codeAddr);
+                }
+
+            CALL: // memberRef should be set.
+                // newObjThisPtr should be set for CEE_NEWOBJ
+
+                JITDUMP(" %08X", resolvedToken.token);
+                constraintCall = (prefixFlags & PREFIX_CONSTRAINED) != 0;
+
+                bool newBBcreatedForTailcallStress;
+
+                newBBcreatedForTailcallStress = false;
+
+                if (compIsForInlining())
+                {
+                    // We rule out inlinees with explicit tail calls in fgMakeBasicBlocks.
+                    assert((prefixFlags & PREFIX_TAILCALL_EXPLICIT) == 0);
+                }
+                else
+                {
+                    if (compTailCallStress())
+                    {
+                        // Have we created a new BB after the "call" instruction in fgMakeBasicBlocks()?
+                        // Tail call stress only recognizes call+ret patterns and forces them to be
+                        // explicit tail prefixed calls.  Also fgMakeBasicBlocks() under tail call stress
+                        // doesn't import 'ret' opcode following the call into the basic block containing
+                        // the call instead imports it to a new basic block.  Note that fgMakeBasicBlocks()
+                        // is already checking that there is an opcode following call and hence it is
+                        // safe here to read next opcode without bounds check.
+                        newBBcreatedForTailcallStress =
+                            impOpcodeIsCallOpcode(opcode) && // Current opcode is a CALL, (not a CEE_NEWOBJ). So, don't
+                                                             // make it jump to RET.
+                            (OPCODE)getU1LittleEndian(codeAddr + sz) == CEE_RET; // Next opcode is a CEE_RET
+
+                        if (newBBcreatedForTailcallStress &&
+                            !(prefixFlags & PREFIX_TAILCALL_EXPLICIT) && // User hasn't set "tail." prefix yet.
+                            verCheckTailCallConstraint(opcode, &resolvedToken,
+                                                       constraintCall ? &constrainedResolvedToken : nullptr,
+                                                       true) // Is it legal to do talcall?
+                            )
+                        {
+                            // Stress the tailcall.
+                            JITDUMP(" (Tailcall stress: prefixFlags |= PREFIX_TAILCALL_EXPLICIT)");
+                            prefixFlags |= PREFIX_TAILCALL_EXPLICIT;
+                        }
+                    }
+
+                    // Note that when running under tail call stress, a call will be marked as explicit tail prefixed
+                    // hence will not be considered for implicit tail calling.
+                    bool isRecursive = (callInfo.hMethod == info.compMethodHnd);
+                    if (impIsImplicitTailCallCandidate(opcode, codeAddr + sz, codeEndp, prefixFlags, isRecursive))
+                    {
+                        JITDUMP(" (Implicit Tail call: prefixFlags |= PREFIX_TAILCALL_IMPLICIT)");
+                        prefixFlags |= PREFIX_TAILCALL_IMPLICIT;
+                    }
+                }
+
+                // Treat this call as tail call for verification only if "tail" prefixed (i.e. explicit tail call).
+                explicitTailCall = (prefixFlags & PREFIX_TAILCALL_EXPLICIT) != 0;
+                readonlyCall     = (prefixFlags & PREFIX_READONLY) != 0;
+
+                if (opcode != CEE_CALLI && opcode != CEE_NEWOBJ)
+                {
+                    // All calls and delegates need a security callout.
+                    // For delegates, this is the call to the delegate constructor, not the access check on the
+                    // LD(virt)FTN.
+                    impHandleAccessAllowed(callInfo.accessAllowed, &callInfo.callsiteCalloutHelper);
+
+#if 0 // DevDiv 410397 - This breaks too many obfuscated apps to do this in an in-place release
+     
+                // DevDiv 291703 - we need to check for accessibility between the caller of InitializeArray
+                // and the field it is reading, thus it is now unverifiable to not immediately precede with
+                // ldtoken <filed token>, and we now check accessibility
+                if ((callInfo.methodFlags & CORINFO_FLG_INTRINSIC) &&
+                    (info.compCompHnd->getIntrinsicID(callInfo.hMethod) == CORINFO_INTRINSIC_InitializeArray))
+                {
+                    if (prevOpcode != CEE_LDTOKEN)
+                    {
+                        Verify(prevOpcode == CEE_LDTOKEN, "Need ldtoken for InitializeArray");
+                    }
+                    else
+                    {
+                        assert(lastLoadToken != NULL);
+                        // Now that we know we have a token, verify that it is accessible for loading
+                        CORINFO_RESOLVED_TOKEN resolvedLoadField;
+                        impResolveToken(lastLoadToken, &resolvedLoadField, CORINFO_TOKENKIND_Field);
+                        eeGetFieldInfo(&resolvedLoadField, CORINFO_ACCESS_INIT_ARRAY, &fieldInfo);
+                        impHandleAccessAllowed(fieldInfo.accessAllowed, &fieldInfo.accessCalloutHelper);
+                    }
+                }
+
+#endif // DevDiv 410397
+                }
+
+                if (tiVerificationNeeded)
+                {
+                    verVerifyCall(opcode, &resolvedToken, constraintCall ? &constrainedResolvedToken : nullptr,
+                                  explicitTailCall, readonlyCall, delegateCreateStart, codeAddr - 1,
+                                  &callInfo DEBUGARG(info.compFullName));
+                }
+
+                // Insert delegate callout here.
+                if (opcode == CEE_NEWOBJ && (mflags & CORINFO_FLG_CONSTRUCTOR) && (clsFlags & CORINFO_FLG_DELEGATE))
+                {
+#ifdef DEBUG
+                    // We should do this only if verification is enabled
+                    // If verification is disabled, delegateCreateStart will not be initialized correctly
+                    if (tiVerificationNeeded)
+                    {
+                        mdMemberRef delegateMethodRef = mdMemberRefNil;
+                        // We should get here only for well formed delegate creation.
+                        assert(verCheckDelegateCreation(delegateCreateStart, codeAddr - 1, delegateMethodRef));
+                    }
+#endif
+
+#ifdef FEATURE_CORECLR
+                    // In coreclr the delegate transparency rule needs to be enforced even if verification is disabled
+                    typeInfo              tiActualFtn          = impStackTop(0).seTypeInfo;
+                    CORINFO_METHOD_HANDLE delegateMethodHandle = tiActualFtn.GetMethod2();
+
+                    impInsertCalloutForDelegate(info.compMethodHnd, delegateMethodHandle, resolvedToken.hClass);
+#endif // FEATURE_CORECLR
+                }
+
+                callTyp = impImportCall(opcode, &resolvedToken, constraintCall ? &constrainedResolvedToken : nullptr,
+                                        newObjThisPtr, prefixFlags, &callInfo, opcodeOffs);
+                if (compDonotInline())
+                {
+                    return;
+                }
+
+                if (explicitTailCall || newBBcreatedForTailcallStress) // If newBBcreatedForTailcallStress is true, we
+                                                                       // have created a new BB after the "call"
+                // instruction in fgMakeBasicBlocks(). So we need to jump to RET regardless.
+                {
+                    assert(!compIsForInlining());
+                    goto RET;
+                }
+
+                break;
+
+            case CEE_LDFLD:
+            case CEE_LDSFLD:
+            case CEE_LDFLDA:
+            case CEE_LDSFLDA:
+            {
+
+                BOOL isLoadAddress = (opcode == CEE_LDFLDA || opcode == CEE_LDSFLDA);
+                BOOL isLoadStatic  = (opcode == CEE_LDSFLD || opcode == CEE_LDSFLDA);
+
+                /* Get the CP_Fieldref index */
+                assertImp(sz == sizeof(unsigned));
+
+                _impResolveToken(CORINFO_TOKENKIND_Field);
+
+                JITDUMP(" %08X", resolvedToken.token);
+
+                int aflags = isLoadAddress ? CORINFO_ACCESS_ADDRESS : CORINFO_ACCESS_GET;
+
+                GenTreePtr           obj     = nullptr;
+                typeInfo*            tiObj   = nullptr;
+                CORINFO_CLASS_HANDLE objType = nullptr; // used for fields
+
+                if (opcode == CEE_LDFLD || opcode == CEE_LDFLDA)
+                {
+                    tiObj = &impStackTop().seTypeInfo;
+                    obj   = impPopStack(objType).val;
+
+                    if (impIsThis(obj))
+                    {
+                        aflags |= CORINFO_ACCESS_THIS;
+
+                        // An optimization for Contextful classes:
+                        // we unwrap the proxy when we have a 'this reference'
+
+                        if (info.compUnwrapContextful)
+                        {
+                            aflags |= CORINFO_ACCESS_UNWRAP;
+                        }
+                    }
+                }
+
+                eeGetFieldInfo(&resolvedToken, (CORINFO_ACCESS_FLAGS)aflags, &fieldInfo);
+
+                // Figure out the type of the member.  We always call canAccessField, so you always need this
+                // handle
+                CorInfoType ciType = fieldInfo.fieldType;
+                clsHnd             = fieldInfo.structType;
+
+                lclTyp = JITtype2varType(ciType);
+
+#ifdef _TARGET_AMD64
+                noway_assert(varTypeIsIntegralOrI(lclTyp) || varTypeIsFloating(lclTyp) || lclTyp == TYP_STRUCT);
+#endif // _TARGET_AMD64
+
+                if (compIsForInlining())
+                {
+                    switch (fieldInfo.fieldAccessor)
+                    {
+                        case CORINFO_FIELD_INSTANCE_HELPER:
+                        case CORINFO_FIELD_INSTANCE_ADDR_HELPER:
+                        case CORINFO_FIELD_STATIC_ADDR_HELPER:
+                        case CORINFO_FIELD_STATIC_TLS:
+
+                            compInlineResult->NoteFatal(InlineObservation::CALLEE_LDFLD_NEEDS_HELPER);
+                            return;
+
+                        case CORINFO_FIELD_STATIC_GENERICS_STATIC_HELPER:
+
+                            /* We may be able to inline the field accessors in specific instantiations of generic
+                             * methods */
+                            compInlineResult->NoteFatal(InlineObservation::CALLSITE_LDFLD_NEEDS_HELPER);
+                            return;
+
+                        default:
+                            break;
+                    }
+
+                    if (!isLoadAddress && (fieldInfo.fieldFlags & CORINFO_FLG_FIELD_STATIC) && lclTyp == TYP_STRUCT &&
+                        clsHnd)
+                    {
+                        if ((info.compCompHnd->getTypeForPrimitiveValueClass(clsHnd) == CORINFO_TYPE_UNDEF) &&
+                            !(info.compFlags & CORINFO_FLG_FORCEINLINE))
+                        {
+                            // Loading a static valuetype field usually will cause a JitHelper to be called
+                            // for the static base. This will bloat the code.
+                            compInlineResult->Note(InlineObservation::CALLEE_LDFLD_STATIC_VALUECLASS);
+
+                            if (compInlineResult->IsFailure())
+                            {
+                                return;
+                            }
+                        }
+                    }
+                }
+
+                tiRetVal = verMakeTypeInfo(ciType, clsHnd);
+                if (isLoadAddress)
+                {
+                    tiRetVal.MakeByRef();
+                }
+                else
+                {
+                    tiRetVal.NormaliseForStack();
+                }
+
+                // Perform this check always to ensure that we get field access exceptions even with
+                // SkipVerification.
+                impHandleAccessAllowed(fieldInfo.accessAllowed, &fieldInfo.accessCalloutHelper);
+
+                if (tiVerificationNeeded)
+                {
+                    // You can also pass the unboxed struct to  LDFLD
+                    BOOL bAllowPlainValueTypeAsThis = FALSE;
+                    if (opcode == CEE_LDFLD && impIsValueType(tiObj))
+                    {
+                        bAllowPlainValueTypeAsThis = TRUE;
+                    }
+
+                    verVerifyField(&resolvedToken, fieldInfo, tiObj, isLoadAddress, bAllowPlainValueTypeAsThis);
+
+                    // If we're doing this on a heap object or from a 'safe' byref
+                    // then the result is a safe byref too
+                    if (isLoadAddress) // load address
+                    {
+                        if (fieldInfo.fieldFlags &
+                            CORINFO_FLG_FIELD_STATIC) // statics marked as safe will have permanent home
+                        {
+                            if (fieldInfo.fieldFlags & CORINFO_FLG_FIELD_SAFESTATIC_BYREF_RETURN)
+                            {
+                                tiRetVal.SetIsPermanentHomeByRef();
+                            }
+                        }
+                        else if (tiObj->IsObjRef() || tiObj->IsPermanentHomeByRef())
+                        {
+                            // ldflda of byref is safe if done on a gc object or on  a
+                            // safe byref
+                            tiRetVal.SetIsPermanentHomeByRef();
+                        }
+                    }
+                }
+                else
+                {
+                    // tiVerificationNeeded is false.
+                    // Raise InvalidProgramException if static load accesses non-static field
+                    if (isLoadStatic && ((fieldInfo.fieldFlags & CORINFO_FLG_FIELD_STATIC) == 0))
+                    {
+                        BADCODE("static access on an instance field");
+                    }
+                }
+
+                // We are using ldfld/a on a static field. We allow it, but need to get side-effect from obj.
+                if ((fieldInfo.fieldFlags & CORINFO_FLG_FIELD_STATIC) && obj != nullptr)
+                {
+                    if (obj->gtFlags & GTF_SIDE_EFFECT)
+                    {
+                        obj = gtUnusedValNode(obj);
+                        impAppendTree(obj, (unsigned)CHECK_SPILL_ALL, impCurStmtOffs);
+                    }
+                    obj = nullptr;
+                }
+
+                /* Preserve 'small' int types */
+                if (lclTyp > TYP_INT)
+                {
+                    lclTyp = genActualType(lclTyp);
+                }
+
+                bool usesHelper = false;
+
+                switch (fieldInfo.fieldAccessor)
+                {
+                    case CORINFO_FIELD_INSTANCE:
+#ifdef FEATURE_READYTORUN_COMPILER
+                    case CORINFO_FIELD_INSTANCE_WITH_BASE:
+#endif
+                    {
+                        bool nullcheckNeeded = false;
+
+                        obj = impCheckForNullPointer(obj);
+
+                        if (isLoadAddress && (obj->gtType == TYP_BYREF) && fgAddrCouldBeNull(obj))
+                        {
+                            nullcheckNeeded = true;
+                        }
+
+                        // If the object is a struct, what we really want is
+                        // for the field to operate on the address of the struct.
+                        if (!varTypeGCtype(obj->TypeGet()) && impIsValueType(tiObj))
+                        {
+                            assert(opcode == CEE_LDFLD && objType != nullptr);
+
+                            obj = impGetStructAddr(obj, objType, (unsigned)CHECK_SPILL_ALL, true);
+                        }
+
+                        /* Create the data member node */
+                        op1 = gtNewFieldRef(lclTyp, resolvedToken.hField, obj, fieldInfo.offset, nullcheckNeeded);
+
+#ifdef FEATURE_READYTORUN_COMPILER
+                        if (fieldInfo.fieldAccessor == CORINFO_FIELD_INSTANCE_WITH_BASE)
+                            op1->gtField.gtFieldLookup = fieldInfo.fieldLookup;
+#endif
+
+                        op1->gtFlags |= (obj->gtFlags & GTF_GLOB_EFFECT);
+
+                        if (fgAddrCouldBeNull(obj))
+                        {
+                            op1->gtFlags |= GTF_EXCEPT;
+                        }
+
+                        // If gtFldObj is a BYREF then our target is a value class and
+                        // it could point anywhere, example a boxed class static int
+                        if (obj->gtType == TYP_BYREF)
+                        {
+                            op1->gtFlags |= GTF_IND_TGTANYWHERE;
+                        }
+
+                        DWORD typeFlags = info.compCompHnd->getClassAttribs(resolvedToken.hClass);
+                        if (StructHasOverlappingFields(typeFlags))
+                        {
+                            op1->gtField.gtFldMayOverlap = true;
+                        }
+
+                        // wrap it in a address of operator if necessary
+                        if (isLoadAddress)
+                        {
+                            op1 = gtNewOperNode(GT_ADDR,
+                                                (var_types)(varTypeIsGC(obj->TypeGet()) ? TYP_BYREF : TYP_I_IMPL), op1);
+                        }
+                        else
+                        {
+                            if (compIsForInlining() &&
+                                impInlineIsGuaranteedThisDerefBeforeAnySideEffects(nullptr, obj,
+                                                                                   impInlineInfo->inlArgInfo))
+                            {
+                                impInlineInfo->thisDereferencedFirst = true;
+                            }
+                        }
+                    }
+                    break;
+
+                    case CORINFO_FIELD_STATIC_TLS:
+#ifdef _TARGET_X86_
+                        // Legacy TLS access is implemented as intrinsic on x86 only
+
+                        /* Create the data member node */
+                        op1 = gtNewFieldRef(lclTyp, resolvedToken.hField, NULL, fieldInfo.offset);
+                        op1->gtFlags |= GTF_IND_TLS_REF; // fgMorphField will handle the transformation
+
+                        if (isLoadAddress)
+                        {
+                            op1 = gtNewOperNode(GT_ADDR, (var_types)TYP_I_IMPL, op1);
+                        }
+                        break;
+#else
+                        fieldInfo.fieldAccessor = CORINFO_FIELD_STATIC_ADDR_HELPER;
+
+                        __fallthrough;
+#endif
+
+                    case CORINFO_FIELD_STATIC_ADDR_HELPER:
+                    case CORINFO_FIELD_INSTANCE_HELPER:
+                    case CORINFO_FIELD_INSTANCE_ADDR_HELPER:
+                        op1 = gtNewRefCOMfield(obj, &resolvedToken, (CORINFO_ACCESS_FLAGS)aflags, &fieldInfo, lclTyp,
+                                               clsHnd, nullptr);
+                        usesHelper = true;
+                        break;
+
+                    case CORINFO_FIELD_STATIC_ADDRESS:
+                        // Replace static read-only fields with constant if possible
+                        if ((aflags & CORINFO_ACCESS_GET) && (fieldInfo.fieldFlags & CORINFO_FLG_FIELD_FINAL) &&
+                            !(fieldInfo.fieldFlags & CORINFO_FLG_FIELD_STATIC_IN_HEAP) &&
+                            (varTypeIsIntegral(lclTyp) || varTypeIsFloating(lclTyp)))
+                        {
+                            CorInfoInitClassResult initClassResult =
+                                info.compCompHnd->initClass(resolvedToken.hField, info.compMethodHnd,
+                                                            impTokenLookupContextHandle);
+
+                            if (initClassResult & CORINFO_INITCLASS_INITIALIZED)
+                            {
+                                void** pFldAddr = nullptr;
+                                void*  fldAddr =
+                                    info.compCompHnd->getFieldAddress(resolvedToken.hField, (void**)&pFldAddr);
+
+                                // We should always be able to access this static's address directly
+                                assert(pFldAddr == nullptr);
+
+                                op1 = impImportStaticReadOnlyField(fldAddr, lclTyp);
+                                goto FIELD_DONE;
+                            }
+                        }
+
+                        __fallthrough;
+
+                    case CORINFO_FIELD_STATIC_RVA_ADDRESS:
+                    case CORINFO_FIELD_STATIC_SHARED_STATIC_HELPER:
+                    case CORINFO_FIELD_STATIC_GENERICS_STATIC_HELPER:
+                        op1 = impImportStaticFieldAccess(&resolvedToken, (CORINFO_ACCESS_FLAGS)aflags, &fieldInfo,
+                                                         lclTyp);
+                        break;
+
+                    case CORINFO_FIELD_INTRINSIC_ZERO:
+                    {
+                        assert(aflags & CORINFO_ACCESS_GET);
+                        op1 = gtNewIconNode(0, lclTyp);
+                        goto FIELD_DONE;
+                    }
+                    break;
+
+                    case CORINFO_FIELD_INTRINSIC_EMPTY_STRING:
+                    {
+                        assert(aflags & CORINFO_ACCESS_GET);
+
+                        LPVOID         pValue;
+                        InfoAccessType iat = info.compCompHnd->emptyStringLiteral(&pValue);
+                        op1                = gtNewStringLiteralNode(iat, pValue);
+                        goto FIELD_DONE;
+                    }
+                    break;
+
+                    default:
+                        assert(!"Unexpected fieldAccessor");
+                }
+
+                if (!isLoadAddress)
+                {
+
+                    if (prefixFlags & PREFIX_VOLATILE)
+                    {
+                        op1->gtFlags |= GTF_DONT_CSE;      // Can't CSE a volatile
+                        op1->gtFlags |= GTF_ORDER_SIDEEFF; // Prevent this from being reordered
+
+                        if (!usesHelper)
+                        {
+                            assert((op1->OperGet() == GT_FIELD) || (op1->OperGet() == GT_IND) ||
+                                   (op1->OperGet() == GT_OBJ));
+                            op1->gtFlags |= GTF_IND_VOLATILE;
+                        }
+                    }
+
+                    if (prefixFlags & PREFIX_UNALIGNED)
+                    {
+                        if (!usesHelper)
+                        {
+                            assert((op1->OperGet() == GT_FIELD) || (op1->OperGet() == GT_IND) ||
+                                   (op1->OperGet() == GT_OBJ));
+                            op1->gtFlags |= GTF_IND_UNALIGNED;
+                        }
+                    }
+                }
+
+                /* Check if the class needs explicit initialization */
+
+                if (fieldInfo.fieldFlags & CORINFO_FLG_FIELD_INITCLASS)
+                {
+                    GenTreePtr helperNode = impInitClass(&resolvedToken);
+                    if (compDonotInline())
+                    {
+                        return;
+                    }
+                    if (helperNode != nullptr)
+                    {
+                        op1 = gtNewOperNode(GT_COMMA, op1->TypeGet(), helperNode, op1);
+                    }
+                }
+
+            FIELD_DONE:
+                impPushOnStack(op1, tiRetVal);
+            }
+            break;
+
+            case CEE_STFLD:
+            case CEE_STSFLD:
+            {
+
+                BOOL isStoreStatic = (opcode == CEE_STSFLD);
+
+                CORINFO_CLASS_HANDLE fieldClsHnd; // class of the field (if it's a ref type)
+
+                /* Get the CP_Fieldref index */
+
+                assertImp(sz == sizeof(unsigned));
+
+                _impResolveToken(CORINFO_TOKENKIND_Field);
+
+                JITDUMP(" %08X", resolvedToken.token);
+
+                int        aflags = CORINFO_ACCESS_SET;
+                GenTreePtr obj    = nullptr;
+                typeInfo*  tiObj  = nullptr;
+                typeInfo   tiVal;
+
+                /* Pull the value from the stack */
+                op2    = impPopStack(tiVal);
+                clsHnd = tiVal.GetClassHandle();
+
+                if (opcode == CEE_STFLD)
+                {
+                    tiObj = &impStackTop().seTypeInfo;
+                    obj   = impPopStack().val;
+
+                    if (impIsThis(obj))
+                    {
+                        aflags |= CORINFO_ACCESS_THIS;
+
+                        // An optimization for Contextful classes:
+                        // we unwrap the proxy when we have a 'this reference'
+
+                        if (info.compUnwrapContextful)
+                        {
+                            aflags |= CORINFO_ACCESS_UNWRAP;
+                        }
+                    }
+                }
+
+                eeGetFieldInfo(&resolvedToken, (CORINFO_ACCESS_FLAGS)aflags, &fieldInfo);
+
+                // Figure out the type of the member.  We always call canAccessField, so you always need this
+                // handle
+                CorInfoType ciType = fieldInfo.fieldType;
+                fieldClsHnd        = fieldInfo.structType;
+
+                lclTyp = JITtype2varType(ciType);
+
+                if (compIsForInlining())
+                {
+                    /* Is this a 'special' (COM) field? or a TLS ref static field?, field stored int GC heap? or
+                     * per-inst static? */
+
+                    switch (fieldInfo.fieldAccessor)
+                    {
+                        case CORINFO_FIELD_INSTANCE_HELPER:
+                        case CORINFO_FIELD_INSTANCE_ADDR_HELPER:
+                        case CORINFO_FIELD_STATIC_ADDR_HELPER:
+                        case CORINFO_FIELD_STATIC_TLS:
+
+                            compInlineResult->NoteFatal(InlineObservation::CALLEE_STFLD_NEEDS_HELPER);
+                            return;
+
+                        case CORINFO_FIELD_STATIC_GENERICS_STATIC_HELPER:
+
+                            /* We may be able to inline the field accessors in specific instantiations of generic
+                             * methods */
+                            compInlineResult->NoteFatal(InlineObservation::CALLSITE_STFLD_NEEDS_HELPER);
+                            return;
+
+                        default:
+                            break;
+                    }
+                }
+
+                impHandleAccessAllowed(fieldInfo.accessAllowed, &fieldInfo.accessCalloutHelper);
+
+                if (tiVerificationNeeded)
+                {
+                    verVerifyField(&resolvedToken, fieldInfo, tiObj, TRUE);
+                    typeInfo fieldType = verMakeTypeInfo(ciType, fieldClsHnd);
+                    Verify(tiCompatibleWith(tiVal, fieldType.NormaliseForStack(), true), "type mismatch");
+                }
+                else
+                {
+                    // tiVerificationNeed is false.
+                    // Raise InvalidProgramException if static store accesses non-static field
+                    if (isStoreStatic && ((fieldInfo.fieldFlags & CORINFO_FLG_FIELD_STATIC) == 0))
+                    {
+                        BADCODE("static access on an instance field");
+                    }
+                }
+
+                // We are using stfld on a static field.
+                // We allow it, but need to eval any side-effects for obj
+                if ((fieldInfo.fieldFlags & CORINFO_FLG_FIELD_STATIC) && obj != nullptr)
+                {
+                    if (obj->gtFlags & GTF_SIDE_EFFECT)
+                    {
+                        obj = gtUnusedValNode(obj);
+                        impAppendTree(obj, (unsigned)CHECK_SPILL_ALL, impCurStmtOffs);
+                    }
+                    obj = nullptr;
+                }
+
+                /* Preserve 'small' int types */
+                if (lclTyp > TYP_INT)
+                {
+                    lclTyp = genActualType(lclTyp);
+                }
+
+                switch (fieldInfo.fieldAccessor)
+                {
+                    case CORINFO_FIELD_INSTANCE:
+#ifdef FEATURE_READYTORUN_COMPILER
+                    case CORINFO_FIELD_INSTANCE_WITH_BASE:
+#endif
+                    {
+                        obj = impCheckForNullPointer(obj);
+
+                        /* Create the data member node */
+                        op1             = gtNewFieldRef(lclTyp, resolvedToken.hField, obj, fieldInfo.offset);
+                        DWORD typeFlags = info.compCompHnd->getClassAttribs(resolvedToken.hClass);
+                        if (StructHasOverlappingFields(typeFlags))
+                        {
+                            op1->gtField.gtFldMayOverlap = true;
+                        }
+
+#ifdef FEATURE_READYTORUN_COMPILER
+                        if (fieldInfo.fieldAccessor == CORINFO_FIELD_INSTANCE_WITH_BASE)
+                            op1->gtField.gtFieldLookup = fieldInfo.fieldLookup;
+#endif
+
+                        op1->gtFlags |= (obj->gtFlags & GTF_GLOB_EFFECT);
+
+                        if (fgAddrCouldBeNull(obj))
+                        {
+                            op1->gtFlags |= GTF_EXCEPT;
+                        }
+
+                        // If gtFldObj is a BYREF then our target is a value class and
+                        // it could point anywhere, example a boxed class static int
+                        if (obj->gtType == TYP_BYREF)
+                        {
+                            op1->gtFlags |= GTF_IND_TGTANYWHERE;
+                        }
+
+                        if (compIsForInlining() &&
+                            impInlineIsGuaranteedThisDerefBeforeAnySideEffects(op2, obj, impInlineInfo->inlArgInfo))
+                        {
+                            impInlineInfo->thisDereferencedFirst = true;
+                        }
+                    }
+                    break;
+
+                    case CORINFO_FIELD_STATIC_TLS:
+#ifdef _TARGET_X86_
+                        // Legacy TLS access is implemented as intrinsic on x86 only
+
+                        /* Create the data member node */
+                        op1 = gtNewFieldRef(lclTyp, resolvedToken.hField, NULL, fieldInfo.offset);
+                        op1->gtFlags |= GTF_IND_TLS_REF; // fgMorphField will handle the transformation
+
+                        break;
+#else
+                        fieldInfo.fieldAccessor = CORINFO_FIELD_STATIC_ADDR_HELPER;
+
+                        __fallthrough;
+#endif
+
+                    case CORINFO_FIELD_STATIC_ADDR_HELPER:
+                    case CORINFO_FIELD_INSTANCE_HELPER:
+                    case CORINFO_FIELD_INSTANCE_ADDR_HELPER:
+                        op1 = gtNewRefCOMfield(obj, &resolvedToken, (CORINFO_ACCESS_FLAGS)aflags, &fieldInfo, lclTyp,
+                                               clsHnd, op2);
+                        goto SPILL_APPEND;
+
+                    case CORINFO_FIELD_STATIC_ADDRESS:
+                    case CORINFO_FIELD_STATIC_RVA_ADDRESS:
+                    case CORINFO_FIELD_STATIC_SHARED_STATIC_HELPER:
+                    case CORINFO_FIELD_STATIC_GENERICS_STATIC_HELPER:
+                        op1 = impImportStaticFieldAccess(&resolvedToken, (CORINFO_ACCESS_FLAGS)aflags, &fieldInfo,
+                                                         lclTyp);
+                        break;
+
+                    default:
+                        assert(!"Unexpected fieldAccessor");
+                }
+
+                // Create the member assignment, unless we have a struct.
+                // TODO-1stClassStructs: This could be limited to TYP_STRUCT, to avoid extra copies.
+                bool deferStructAssign = varTypeIsStruct(lclTyp);
+
+                if (!deferStructAssign)
+                {
+                    if (prefixFlags & PREFIX_VOLATILE)
+                    {
+                        assert((op1->OperGet() == GT_FIELD) || (op1->OperGet() == GT_IND));
+                        op1->gtFlags |= GTF_DONT_CSE;      // Can't CSE a volatile
+                        op1->gtFlags |= GTF_ORDER_SIDEEFF; // Prevent this from being reordered
+                        op1->gtFlags |= GTF_IND_VOLATILE;
+                    }
+                    if (prefixFlags & PREFIX_UNALIGNED)
+                    {
+                        assert((op1->OperGet() == GT_FIELD) || (op1->OperGet() == GT_IND));
+                        op1->gtFlags |= GTF_IND_UNALIGNED;
+                    }
+
+                    /* V4.0 allows assignment of i4 constant values to i8 type vars when IL verifier is bypassed (full
+                       trust
+                       apps).  The reason this works is that JIT stores an i4 constant in Gentree union during
+                       importation
+                       and reads from the union as if it were a long during code generation. Though this can potentially
+                       read garbage, one can get lucky to have this working correctly.
+
+                       This code pattern is generated by Dev10 MC++ compiler while storing to fields when compiled with
+                       /O2
+                       switch (default when compiling retail configs in Dev10) and a customer app has taken a dependency
+                       on
+                       it.  To be backward compatible, we will explicitly add an upward cast here so that it works
+                       correctly
+                       always.
+
+                       Note that this is limited to x86 alone as thereis no back compat to be addressed for Arm JIT for
+                       V4.0.
+                    */
+                    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_X86_
+                    if (op1->TypeGet() != op2->TypeGet() && op2->OperIsConst() && varTypeIsIntOrI(op2->TypeGet()) &&
+                        varTypeIsLong(op1->TypeGet()))
+                    {
+                        op2 = gtNewCastNode(op1->TypeGet(), op2, op1->TypeGet());
+                    }
+#endif
+
+#ifdef _TARGET_64BIT_
+                    // Automatic upcast for a GT_CNS_INT into TYP_I_IMPL
+                    if ((op2->OperGet() == GT_CNS_INT) && varTypeIsI(lclTyp) && !varTypeIsI(op2->gtType))
+                    {
+                        op2->gtType = TYP_I_IMPL;
+                    }
+                    else
+                    {
+                        // Allow a downcast of op2 from TYP_I_IMPL into a 32-bit Int for x86 JIT compatiblity
+                        //
+                        if (varTypeIsI(op2->gtType) && (genActualType(lclTyp) == TYP_INT))
+                        {
+                            op2 = gtNewCastNode(TYP_INT, op2, TYP_INT);
+                        }
+                        // Allow an upcast of op2 from a 32-bit Int into TYP_I_IMPL for x86 JIT compatiblity
+                        //
+                        if (varTypeIsI(lclTyp) && (genActualType(op2->gtType) == TYP_INT))
+                        {
+                            op2 = gtNewCastNode(TYP_I_IMPL, op2, TYP_I_IMPL);
+                        }
+                    }
+#endif
+
+#if !FEATURE_X87_DOUBLES
+                    // We can generate an assignment to a TYP_FLOAT from a TYP_DOUBLE
+                    // We insert a cast to the dest 'op1' type
+                    //
+                    if ((op1->TypeGet() != op2->TypeGet()) && varTypeIsFloating(op1->gtType) &&
+                        varTypeIsFloating(op2->gtType))
+                    {
+                        op2 = gtNewCastNode(op1->TypeGet(), op2, op1->TypeGet());
+                    }
+#endif // !FEATURE_X87_DOUBLES
+
+                    op1 = gtNewAssignNode(op1, op2);
+
+                    /* Mark the expression as containing an assignment */
+
+                    op1->gtFlags |= GTF_ASG;
+                }
+
+                /* Check if the class needs explicit initialization */
+
+                if (fieldInfo.fieldFlags & CORINFO_FLG_FIELD_INITCLASS)
+                {
+                    GenTreePtr helperNode = impInitClass(&resolvedToken);
+                    if (compDonotInline())
+                    {
+                        return;
+                    }
+                    if (helperNode != nullptr)
+                    {
+                        op1 = gtNewOperNode(GT_COMMA, op1->TypeGet(), helperNode, op1);
+                    }
+                }
+
+                /* stfld can interfere with value classes (consider the sequence
+                   ldloc, ldloca, ..., stfld, stloc).  We will be conservative and
+                   spill all value class references from the stack. */
+
+                if (obj && ((obj->gtType == TYP_BYREF) || (obj->gtType == TYP_I_IMPL)))
+                {
+                    assert(tiObj);
+
+                    if (impIsValueType(tiObj))
+                    {
+                        impSpillEvalStack();
+                    }
+                    else
+                    {
+                        impSpillValueClasses();
+                    }
+                }
+
+                /* Spill any refs to the same member from the stack */
+
+                impSpillLclRefs((ssize_t)resolvedToken.hField);
+
+                /* stsfld also interferes with indirect accesses (for aliased
+                   statics) and calls. But don't need to spill other statics
+                   as we have explicitly spilled this particular static field. */
+
+                impSpillSideEffects(false, (unsigned)CHECK_SPILL_ALL DEBUGARG("spill side effects before STFLD"));
+
+                if (deferStructAssign)
+                {
+                    op1 = impAssignStruct(op1, op2, clsHnd, (unsigned)CHECK_SPILL_ALL);
+                }
+            }
+                goto APPEND;
+
+            case CEE_NEWARR:
+            {
+
+                /* Get the class type index operand */
+
+                _impResolveToken(CORINFO_TOKENKIND_Newarr);
+
+                JITDUMP(" %08X", resolvedToken.token);
+
+                if (!opts.IsReadyToRun())
+                {
+                    // Need to restore array classes before creating array objects on the heap
+                    op1 = impTokenToHandle(&resolvedToken, nullptr, TRUE /*mustRestoreHandle*/);
+                    if (op1 == nullptr)
+                    { // compDonotInline()
+                        return;
+                    }
+                }
+
+                if (tiVerificationNeeded)
+                {
+                    // As per ECMA 'numElems' specified can be either int32 or native int.
+                    Verify(impStackTop().seTypeInfo.IsIntOrNativeIntType(), "bad bound");
+
+                    CORINFO_CLASS_HANDLE elemTypeHnd;
+                    info.compCompHnd->getChildType(resolvedToken.hClass, &elemTypeHnd);
+                    Verify(elemTypeHnd == nullptr ||
+                               !(info.compCompHnd->getClassAttribs(elemTypeHnd) & CORINFO_FLG_CONTAINS_STACK_PTR),
+                           "array of byref-like type");
+                    tiRetVal = verMakeTypeInfo(resolvedToken.hClass);
+                }
+
+                accessAllowedResult =
+                    info.compCompHnd->canAccessClass(&resolvedToken, info.compMethodHnd, &calloutHelper);
+                impHandleAccessAllowed(accessAllowedResult, &calloutHelper);
+
+                /* Form the arglist: array class handle, size */
+                op2 = impPopStack().val;
+                assertImp(genActualTypeIsIntOrI(op2->gtType));
+
+#ifdef FEATURE_READYTORUN_COMPILER
+                if (opts.IsReadyToRun())
+                {
+                    op1 = impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_NEWARR_1, TYP_REF,
+                                                    gtNewArgList(op2));
+                    usingReadyToRunHelper = (op1 != NULL);
+
+                    if (!usingReadyToRunHelper)
+                    {
+                        // TODO: ReadyToRun: When generic dictionary lookups are necessary, replace the lookup call
+                        // and the newarr call with a single call to a dynamic R2R cell that will:
+                        //      1) Load the context
+                        //      2) Perform the generic dictionary lookup and caching, and generate the appropriate stub
+                        //      3) Allocate the new array
+                        // Reason: performance (today, we'll always use the slow helper for the R2R generics case)
+
+                        // Need to restore array classes before creating array objects on the heap
+                        op1 = impTokenToHandle(&resolvedToken, NULL, TRUE /*mustRestoreHandle*/);
+                        if (op1 == NULL) // compDonotInline()
+                            return;
+                    }
+                }
+
+                if (!usingReadyToRunHelper)
+#endif
+                {
+                    args = gtNewArgList(op1, op2);
+
+                    /* Create a call to 'new' */
+
+                    // Note that this only works for shared generic code because the same helper is used for all
+                    // reference array types
+                    op1 =
+                        gtNewHelperCallNode(info.compCompHnd->getNewArrHelper(resolvedToken.hClass), TYP_REF, 0, args);
+                }
+
+                op1->gtCall.compileTimeHelperArgumentHandle = (CORINFO_GENERIC_HANDLE)resolvedToken.hClass;
+
+                /* Remember that this basic block contains 'new' of an sd array */
+
+                block->bbFlags |= BBF_HAS_NEWARRAY;
+                optMethodFlags |= OMF_HAS_NEWARRAY;
+
+                /* Push the result of the call on the stack */
+
+                impPushOnStack(op1, tiRetVal);
+
+                callTyp = TYP_REF;
+            }
+            break;
+
+            case CEE_LOCALLOC:
+                assert(!compIsForInlining());
+
+                if (tiVerificationNeeded)
+                {
+                    Verify(false, "bad opcode");
+                }
+
+                // We don't allow locallocs inside handlers
+                if (block->hasHndIndex())
+                {
+                    BADCODE("Localloc can't be inside handler");
+                }
+
+                /* The FP register may not be back to the original value at the end
+                   of the method, even if the frame size is 0, as localloc may
+                   have modified it. So we will HAVE to reset it */
+
+                compLocallocUsed = true;
+                setNeedsGSSecurityCookie();
+
+                // Get the size to allocate
+
+                op2 = impPopStack().val;
+                assertImp(genActualTypeIsIntOrI(op2->gtType));
+
+                if (verCurrentState.esStackDepth != 0)
+                {
+                    BADCODE("Localloc can only be used when the stack is empty");
+                }
+
+                op1 = gtNewOperNode(GT_LCLHEAP, TYP_I_IMPL, op2);
+
+                // May throw a stack overflow exception. Obviously, we don't want locallocs to be CSE'd.
+
+                op1->gtFlags |= (GTF_EXCEPT | GTF_DONT_CSE);
+
+                impPushOnStack(op1, tiRetVal);
+                break;
+
+            case CEE_ISINST:
+
+                /* Get the type token */
+                assertImp(sz == sizeof(unsigned));
+
+                _impResolveToken(CORINFO_TOKENKIND_Casting);
+
+                JITDUMP(" %08X", resolvedToken.token);
+
+                if (!opts.IsReadyToRun())
+                {
+                    op2 = impTokenToHandle(&resolvedToken, nullptr, FALSE);
+                    if (op2 == nullptr)
+                    { // compDonotInline()
+                        return;
+                    }
+                }
+
+                if (tiVerificationNeeded)
+                {
+                    Verify(impStackTop().seTypeInfo.IsObjRef(), "obj reference needed");
+                    // Even if this is a value class, we know it is boxed.
+                    tiRetVal = typeInfo(TI_REF, resolvedToken.hClass);
+                }
+                accessAllowedResult =
+                    info.compCompHnd->canAccessClass(&resolvedToken, info.compMethodHnd, &calloutHelper);
+                impHandleAccessAllowed(accessAllowedResult, &calloutHelper);
+
+                op1 = impPopStack().val;
+
+#ifdef FEATURE_READYTORUN_COMPILER
+                if (opts.IsReadyToRun())
+                {
+                    GenTreePtr opLookup =
+                        impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_ISINSTANCEOF, TYP_REF,
+                                                  gtNewArgList(op1));
+                    usingReadyToRunHelper = (opLookup != NULL);
+                    op1                   = (usingReadyToRunHelper ? opLookup : op1);
+
+                    if (!usingReadyToRunHelper)
+                    {
+                        // TODO: ReadyToRun: When generic dictionary lookups are necessary, replace the lookup call
+                        // and the isinstanceof_any call with a single call to a dynamic R2R cell that will:
+                        //      1) Load the context
+                        //      2) Perform the generic dictionary lookup and caching, and generate the appropriate stub
+                        //      3) Perform the 'is instance' check on the input object
+                        // Reason: performance (today, we'll always use the slow helper for the R2R generics case)
+
+                        op2 = impTokenToHandle(&resolvedToken, NULL, FALSE);
+                        if (op2 == NULL) // compDonotInline()
+                            return;
+                    }
+                }
+
+                if (!usingReadyToRunHelper)
+#endif
+                {
+                    op1 = impCastClassOrIsInstToTree(op1, op2, &resolvedToken, false);
+                }
+                if (compDonotInline())
+                {
+                    return;
+                }
+
+                impPushOnStack(op1, tiRetVal);
+
+                break;
+
+            case CEE_REFANYVAL:
+
+                // get the class handle and make a ICON node out of it
+
+                _impResolveToken(CORINFO_TOKENKIND_Class);
+
+                JITDUMP(" %08X", resolvedToken.token);
+
+                op2 = impTokenToHandle(&resolvedToken);
+                if (op2 == nullptr)
+                { // compDonotInline()
+                    return;
+                }
+
+                if (tiVerificationNeeded)
+                {
+                    Verify(typeInfo::AreEquivalent(impStackTop().seTypeInfo, verMakeTypeInfo(impGetRefAnyClass())),
+                           "need refany");
+                    tiRetVal = verMakeTypeInfo(resolvedToken.hClass).MakeByRef();
+                }
+
+                op1 = impPopStack().val;
+                // make certain it is normalized;
+                op1 = impNormStructVal(op1, impGetRefAnyClass(), (unsigned)CHECK_SPILL_ALL);
+
+                // Call helper GETREFANY(classHandle, op1);
+                args = gtNewArgList(op2, op1);
+                op1  = gtNewHelperCallNode(CORINFO_HELP_GETREFANY, TYP_BYREF, 0, args);
+
+                impPushOnStack(op1, tiRetVal);
+                break;
+
+            case CEE_REFANYTYPE:
+
+                if (tiVerificationNeeded)
+                {
+                    Verify(typeInfo::AreEquivalent(impStackTop().seTypeInfo, verMakeTypeInfo(impGetRefAnyClass())),
+                           "need refany");
+                }
+
+                op1 = impPopStack().val;
+
+                // make certain it is normalized;
+                op1 = impNormStructVal(op1, impGetRefAnyClass(), (unsigned)CHECK_SPILL_ALL);
+
+                if (op1->gtOper == GT_OBJ)
+                {
+                    // Get the address of the refany
+                    op1 = op1->gtOp.gtOp1;
+
+                    // Fetch the type from the correct slot
+                    op1 = gtNewOperNode(GT_ADD, TYP_BYREF, op1,
+                                        gtNewIconNode(offsetof(CORINFO_RefAny, type), TYP_I_IMPL));
+                    op1 = gtNewOperNode(GT_IND, TYP_BYREF, op1);
+                }
+                else
+                {
+                    assertImp(op1->gtOper == GT_MKREFANY);
+
+                    // The pointer may have side-effects
+                    if (op1->gtOp.gtOp1->gtFlags & GTF_SIDE_EFFECT)
+                    {
+                        impAppendTree(op1->gtOp.gtOp1, (unsigned)CHECK_SPILL_ALL, impCurStmtOffs);
+#ifdef DEBUG
+                        impNoteLastILoffs();
+#endif
+                    }
+
+                    // We already have the class handle
+                    op1 = op1->gtOp.gtOp2;
+                }
+
+                // convert native TypeHandle to RuntimeTypeHandle
+                {
+                    GenTreeArgList* helperArgs = gtNewArgList(op1);
+
+                    op1 = gtNewHelperCallNode(CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE_MAYBENULL, TYP_STRUCT, GTF_EXCEPT,
+                                              helperArgs);
+
+                    // The handle struct is returned in register
+                    op1->gtCall.gtReturnType = TYP_REF;
+
+                    tiRetVal = typeInfo(TI_STRUCT, impGetTypeHandleClass());
+                }
+
+                impPushOnStack(op1, tiRetVal);
+                break;
+
+            case CEE_LDTOKEN:
+            {
+                /* Get the Class index */
+                assertImp(sz == sizeof(unsigned));
+                lastLoadToken = codeAddr;
+                _impResolveToken(CORINFO_TOKENKIND_Ldtoken);
+
+                tokenType = info.compCompHnd->getTokenTypeAsHandle(&resolvedToken);
+
+                op1 = impTokenToHandle(&resolvedToken, nullptr, TRUE);
+                if (op1 == nullptr)
+                { // compDonotInline()
+                    return;
+                }
+
+                helper = CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE;
+                assert(resolvedToken.hClass != nullptr);
+
+                if (resolvedToken.hMethod != nullptr)
+                {
+                    helper = CORINFO_HELP_METHODDESC_TO_STUBRUNTIMEMETHOD;
+                }
+                else if (resolvedToken.hField != nullptr)
+                {
+                    helper = CORINFO_HELP_FIELDDESC_TO_STUBRUNTIMEFIELD;
+                }
+
+                GenTreeArgList* helperArgs = gtNewArgList(op1);
+
+                op1 = gtNewHelperCallNode(helper, TYP_STRUCT, GTF_EXCEPT, helperArgs);
+
+                // The handle struct is returned in register
+                op1->gtCall.gtReturnType = TYP_REF;
+
+                tiRetVal = verMakeTypeInfo(tokenType);
+                impPushOnStack(op1, tiRetVal);
+            }
+            break;
+
+            case CEE_UNBOX:
+            case CEE_UNBOX_ANY:
+            {
+                /* Get the Class index */
+                assertImp(sz == sizeof(unsigned));
+
+                _impResolveToken(CORINFO_TOKENKIND_Class);
+
+                JITDUMP(" %08X", resolvedToken.token);
+
+                BOOL runtimeLookup;
+                op2 = impTokenToHandle(&resolvedToken, &runtimeLookup);
+                if (op2 == nullptr)
+                { // compDonotInline()
+                    return;
+                }
+
+                // Run this always so we can get access exceptions even with SkipVerification.
+                accessAllowedResult =
+                    info.compCompHnd->canAccessClass(&resolvedToken, info.compMethodHnd, &calloutHelper);
+                impHandleAccessAllowed(accessAllowedResult, &calloutHelper);
+
+                if (opcode == CEE_UNBOX_ANY && !eeIsValueClass(resolvedToken.hClass))
+                {
+                    if (tiVerificationNeeded)
+                    {
+                        typeInfo tiUnbox = impStackTop().seTypeInfo;
+                        Verify(tiUnbox.IsObjRef(), "bad unbox.any arg");
+                        tiRetVal = verMakeTypeInfo(resolvedToken.hClass);
+                        tiRetVal.NormaliseForStack();
+                    }
+                    op1 = impPopStack().val;
+                    goto CASTCLASS;
+                }
+
+                /* Pop the object and create the unbox helper call */
+                /* You might think that for UNBOX_ANY we need to push a different */
+                /* (non-byref) type, but here we're making the tiRetVal that is used */
+                /* for the intermediate pointer which we then transfer onto the OBJ */
+                /* instruction.  OBJ then creates the appropriate tiRetVal. */
+                if (tiVerificationNeeded)
+                {
+                    typeInfo tiUnbox = impStackTop().seTypeInfo;
+                    Verify(tiUnbox.IsObjRef(), "Bad unbox arg");
+
+                    tiRetVal = verMakeTypeInfo(resolvedToken.hClass);
+                    Verify(tiRetVal.IsValueClass(), "not value class");
+                    tiRetVal.MakeByRef();
+
+                    // We always come from an objref, so this is safe byref
+                    tiRetVal.SetIsPermanentHomeByRef();
+                    tiRetVal.SetIsReadonlyByRef();
+                }
+
+                op1 = impPopStack().val;
+                assertImp(op1->gtType == TYP_REF);
+
+                helper = info.compCompHnd->getUnBoxHelper(resolvedToken.hClass);
+                assert(helper == CORINFO_HELP_UNBOX || helper == CORINFO_HELP_UNBOX_NULLABLE);
+
+                // We only want to expand inline the normal UNBOX helper;
+                expandInline = (helper == CORINFO_HELP_UNBOX);
+
+                if (expandInline)
+                {
+                    if (compCurBB->isRunRarely())
+                    {
+                        expandInline = false; // not worth the code expansion
+                    }
+                }
+
+                if (expandInline)
+                {
+                    // we are doing normal unboxing
+                    // inline the common case of the unbox helper
+                    // UNBOX(exp) morphs into
+                    // clone = pop(exp);
+                    // ((*clone == typeToken) ? nop : helper(clone, typeToken));
+                    // push(clone + sizeof(void*))
+                    //
+                    GenTreePtr cloneOperand;
+                    op1 = impCloneExpr(op1, &cloneOperand, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
+                                       nullptr DEBUGARG("inline UNBOX clone1"));
+                    op1 = gtNewOperNode(GT_IND, TYP_I_IMPL, op1);
+
+                    GenTreePtr condBox = gtNewOperNode(GT_EQ, TYP_INT, op1, op2);
+
+                    op1 = impCloneExpr(cloneOperand, &cloneOperand, NO_CLASS_HANDLE, (unsigned)CHECK_SPILL_ALL,
+                                       nullptr DEBUGARG("inline UNBOX clone2"));
+                    op2 = impTokenToHandle(&resolvedToken);
+                    if (op2 == nullptr)
+                    { // compDonotInline()
+                        return;
+                    }
+                    args = gtNewArgList(op2, op1);
+                    op1  = gtNewHelperCallNode(helper, TYP_VOID, 0, args);
+
+                    op1 = new (this, GT_COLON) GenTreeColon(TYP_VOID, gtNewNothingNode(), op1);
+                    op1 = gtNewQmarkNode(TYP_VOID, condBox, op1);
+                    condBox->gtFlags |= GTF_RELOP_QMARK;
+
+                    // QMARK nodes cannot reside on the evaluation stack. Because there
+                    // may be other trees on the evaluation stack that side-effect the
+                    // sources of the UNBOX operation we must spill the stack.
+
+                    impAppendTree(op1, (unsigned)CHECK_SPILL_ALL, impCurStmtOffs);
+
+                    // Create the address-expression to reference past the object header
+                    // to the beginning of the value-type. Today this means adjusting
+                    // past the base of the objects vtable field which is pointer sized.
+
+                    op2 = gtNewIconNode(sizeof(void*), TYP_I_IMPL);
+                    op1 = gtNewOperNode(GT_ADD, TYP_BYREF, cloneOperand, op2);
+                }
+                else
+                {
+                    unsigned callFlags = (helper == CORINFO_HELP_UNBOX) ? 0 : GTF_EXCEPT;
+
+                    // Don't optimize, just call the helper and be done with it
+                    args = gtNewArgList(op2, op1);
+                    op1  = gtNewHelperCallNode(helper,
+                                              (var_types)((helper == CORINFO_HELP_UNBOX) ? TYP_BYREF : TYP_STRUCT),
+                                              callFlags, args);
+                }
+
+                assert(helper == CORINFO_HELP_UNBOX && op1->gtType == TYP_BYREF || // Unbox helper returns a byref.
+                       helper == CORINFO_HELP_UNBOX_NULLABLE &&
+                           varTypeIsStruct(op1) // UnboxNullable helper returns a struct.
+                       );
+
+                /*
+                  ----------------------------------------------------------------------
+                  | \ helper  |                         |                              |
+                  |   \       |                         |                              |
+                  |     \     | CORINFO_HELP_UNBOX      | CORINFO_HELP_UNBOX_NULLABLE  |
+                  |       \   | (which returns a BYREF) | (which returns a STRUCT)     |                              |
+                  | opcode  \ |                         |                              |
+                  |---------------------------------------------------------------------
+                  | UNBOX     | push the BYREF          | spill the STRUCT to a local, |
+                  |           |                         | push the BYREF to this local |
+                  |---------------------------------------------------------------------
+                  | UNBOX_ANY | push a GT_OBJ of        | push the STRUCT              |
+                  |           | the BYREF               | For Linux when the           |
+                  |           |                         |  struct is returned in two   |
+                  |           |                         |  registers create a temp     |
+                  |           |                         |  which address is passed to  |
+                  |           |                         |  the unbox_nullable helper.  |
+                  |---------------------------------------------------------------------
+                */
+
+                if (opcode == CEE_UNBOX)
+                {
+                    if (helper == CORINFO_HELP_UNBOX_NULLABLE)
+                    {
+                        // Unbox nullable helper returns a struct type.
+                        // We need to spill it to a temp so than can take the address of it.
+                        // Here we need unsafe value cls check, since the address of struct is taken to be used
+                        // further along and potetially be exploitable.
+
+                        unsigned tmp = lvaGrabTemp(true DEBUGARG("UNBOXing a nullable"));
+                        lvaSetStruct(tmp, resolvedToken.hClass, true /* unsafe value cls check */);
+
+                        op2 = gtNewLclvNode(tmp, TYP_STRUCT);
+                        op1 = impAssignStruct(op2, op1, resolvedToken.hClass, (unsigned)CHECK_SPILL_ALL);
+                        assert(op1->gtType == TYP_VOID); // We must be assigning the return struct to the temp.
+
+                        op2 = gtNewLclvNode(tmp, TYP_STRUCT);
+                        op2 = gtNewOperNode(GT_ADDR, TYP_BYREF, op2);
+                        op1 = gtNewOperNode(GT_COMMA, TYP_BYREF, op1, op2);
+                    }
+
+                    assert(op1->gtType == TYP_BYREF);
+                    assert(!tiVerificationNeeded || tiRetVal.IsByRef());
+                }
+                else
+                {
+                    assert(opcode == CEE_UNBOX_ANY);
+
+                    if (helper == CORINFO_HELP_UNBOX)
+                    {
+                        // Normal unbox helper returns a TYP_BYREF.
+                        impPushOnStack(op1, tiRetVal);
+                        oper = GT_OBJ;
+                        goto OBJ;
+                    }
+
+                    assert(helper == CORINFO_HELP_UNBOX_NULLABLE && "Make sure the helper is nullable!");
+
+#if FEATURE_MULTIREG_RET
+
+                    if (varTypeIsStruct(op1) && IsMultiRegReturnedType(resolvedToken.hClass))
+                    {
+                        // Unbox nullable helper returns a TYP_STRUCT.
+                        // For the multi-reg case we need to spill it to a temp so that
+                        // we can pass the address to the unbox_nullable jit helper.
+
+                        unsigned tmp = lvaGrabTemp(true DEBUGARG("UNBOXing a register returnable nullable"));
+                        lvaTable[tmp].lvIsMultiRegArg = true;
+                        lvaSetStruct(tmp, resolvedToken.hClass, true /* unsafe value cls check */);
+
+                        op2 = gtNewLclvNode(tmp, TYP_STRUCT);
+                        op1 = impAssignStruct(op2, op1, resolvedToken.hClass, (unsigned)CHECK_SPILL_ALL);
+                        assert(op1->gtType == TYP_VOID); // We must be assigning the return struct to the temp.
+
+                        op2 = gtNewLclvNode(tmp, TYP_STRUCT);
+                        op2 = gtNewOperNode(GT_ADDR, TYP_BYREF, op2);
+                        op1 = gtNewOperNode(GT_COMMA, TYP_BYREF, op1, op2);
+
+                        // In this case the return value of the unbox helper is TYP_BYREF.
+                        // Make sure the right type is placed on the operand type stack.
+                        impPushOnStack(op1, tiRetVal);
+
+                        // Load the struct.
+                        oper = GT_OBJ;
+
+                        assert(op1->gtType == TYP_BYREF);
+                        assert(!tiVerificationNeeded || tiRetVal.IsByRef());
+
+                        goto OBJ;
+                    }
+                    else
+
+#endif // !FEATURE_MULTIREG_RET
+
+                    {
+                        // If non register passable struct we have it materialized in the RetBuf.
+                        assert(op1->gtType == TYP_STRUCT);
+                        tiRetVal = verMakeTypeInfo(resolvedToken.hClass);
+                        assert(tiRetVal.IsValueClass());
+                    }
+                }
+
+                impPushOnStack(op1, tiRetVal);
+            }
+            break;
+
+            case CEE_BOX:
+            {
+                /* Get the Class index */
+                assertImp(sz == sizeof(unsigned));
+
+                _impResolveToken(CORINFO_TOKENKIND_Box);
+
+                JITDUMP(" %08X", resolvedToken.token);
+
+                if (tiVerificationNeeded)
+                {
+                    typeInfo tiActual = impStackTop().seTypeInfo;
+                    typeInfo tiBox    = verMakeTypeInfo(resolvedToken.hClass);
+
+                    Verify(verIsBoxable(tiBox), "boxable type expected");
+
+                    // check the class constraints of the boxed type in case we are boxing an uninitialized value
+                    Verify(info.compCompHnd->satisfiesClassConstraints(resolvedToken.hClass),
+                           "boxed type has unsatisfied class constraints");
+
+                    Verify(tiCompatibleWith(tiActual, tiBox.NormaliseForStack(), true), "type mismatch");
+
+                    // Observation: the following code introduces a boxed value class on the stack, but,
+                    // according to the ECMA spec, one would simply expect: tiRetVal =
+                    // typeInfo(TI_REF,impGetObjectClass());
+
+                    // Push the result back on the stack,
+                    // even if clsHnd is a value class we want the TI_REF
+                    // we call back to the EE to get find out what hte type we should push (for nullable<T> we push T)
+                    tiRetVal = typeInfo(TI_REF, info.compCompHnd->getTypeForBox(resolvedToken.hClass));
+                }
+
+                accessAllowedResult =
+                    info.compCompHnd->canAccessClass(&resolvedToken, info.compMethodHnd, &calloutHelper);
+                impHandleAccessAllowed(accessAllowedResult, &calloutHelper);
+
+                // Note BOX can be used on things that are not value classes, in which
+                // case we get a NOP.  However the verifier's view of the type on the
+                // stack changes (in generic code a 'T' becomes a 'boxed T')
+                if (!eeIsValueClass(resolvedToken.hClass))
+                {
+                    verCurrentState.esStack[verCurrentState.esStackDepth - 1].seTypeInfo = tiRetVal;
+                    break;
+                }
+
+                // Look ahead for unbox.any
+                if (codeAddr + (sz + 1 + sizeof(mdToken)) <= codeEndp && codeAddr[sz] == CEE_UNBOX_ANY)
+                {
+                    DWORD classAttribs = info.compCompHnd->getClassAttribs(resolvedToken.hClass);
+                    if (!(classAttribs & CORINFO_FLG_SHAREDINST))
+                    {
+                        CORINFO_RESOLVED_TOKEN unboxResolvedToken;
+
+                        impResolveToken(codeAddr + (sz + 1), &unboxResolvedToken, CORINFO_TOKENKIND_Class);
+
+                        if (unboxResolvedToken.hClass == resolvedToken.hClass)
+                        {
+                            // Skip the next unbox.any instruction
+                            sz += sizeof(mdToken) + 1;
+                            break;
+                        }
+                    }
+                }
+
+                impImportAndPushBox(&resolvedToken);
+                if (compDonotInline())
+                {
+                    return;
+                }
+            }
+            break;
+
+            case CEE_SIZEOF:
+
+                /* Get the Class index */
+                assertImp(sz == sizeof(unsigned));
+
+                _impResolveToken(CORINFO_TOKENKIND_Class);
+
+                JITDUMP(" %08X", resolvedToken.token);
+
+                if (tiVerificationNeeded)
+                {
+                    tiRetVal = typeInfo(TI_INT);
+                }
+
+                op1 = gtNewIconNode(info.compCompHnd->getClassSize(resolvedToken.hClass));
+                impPushOnStack(op1, tiRetVal);
+                break;
+
+            case CEE_CASTCLASS:
+
+                /* Get the Class index */
+
+                assertImp(sz == sizeof(unsigned));
+
+                _impResolveToken(CORINFO_TOKENKIND_Casting);
+
+                JITDUMP(" %08X", resolvedToken.token);
+
+                if (!opts.IsReadyToRun())
+                {
+                    op2 = impTokenToHandle(&resolvedToken, nullptr, FALSE);
+                    if (op2 == nullptr)
+                    { // compDonotInline()
+                        return;
+                    }
+                }
+
+                if (tiVerificationNeeded)
+                {
+                    Verify(impStackTop().seTypeInfo.IsObjRef(), "object ref expected");
+                    // box it
+                    tiRetVal = typeInfo(TI_REF, resolvedToken.hClass);
+                }
+
+                accessAllowedResult =
+                    info.compCompHnd->canAccessClass(&resolvedToken, info.compMethodHnd, &calloutHelper);
+                impHandleAccessAllowed(accessAllowedResult, &calloutHelper);
+
+                op1 = impPopStack().val;
+
+            /* Pop the address and create the 'checked cast' helper call */
+
+            // At this point we expect typeRef to contain the token, op1 to contain the value being cast,
+            // and op2 to contain code that creates the type handle corresponding to typeRef
+            CASTCLASS:
+
+#ifdef FEATURE_READYTORUN_COMPILER
+                if (opts.IsReadyToRun())
+                {
+                    GenTreePtr opLookup = impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_CHKCAST,
+                                                                    TYP_REF, gtNewArgList(op1));
+                    usingReadyToRunHelper = (opLookup != NULL);
+                    op1                   = (usingReadyToRunHelper ? opLookup : op1);
+
+                    if (!usingReadyToRunHelper)
+                    {
+                        // TODO: ReadyToRun: When generic dictionary lookups are necessary, replace the lookup call
+                        // and the chkcastany call with a single call to a dynamic R2R cell that will:
+                        //      1) Load the context
+                        //      2) Perform the generic dictionary lookup and caching, and generate the appropriate stub
+                        //      3) Check the object on the stack for the type-cast
+                        // Reason: performance (today, we'll always use the slow helper for the R2R generics case)
+
+                        op2 = impTokenToHandle(&resolvedToken, NULL, FALSE);
+                        if (op2 == NULL) // compDonotInline()
+                            return;
+                    }
+                }
+
+                if (!usingReadyToRunHelper)
+#endif
+                {
+                    op1 = impCastClassOrIsInstToTree(op1, op2, &resolvedToken, true);
+                }
+                if (compDonotInline())
+                {
+                    return;
+                }
+
+                /* Push the result back on the stack */
+                impPushOnStack(op1, tiRetVal);
+                break;
+
+            case CEE_THROW:
+
+                if (compIsForInlining())
+                {
+                    // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+                    // TODO: Will this be too strict, given that we will inline many basic blocks?
+                    // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+                    /* Do we have just the exception on the stack ?*/
+
+                    if (verCurrentState.esStackDepth != 1)
+                    {
+                        /* if not, just don't inline the method */
+
+                        compInlineResult->NoteFatal(InlineObservation::CALLEE_THROW_WITH_INVALID_STACK);
+                        return;
+                    }
+
+                    /* Don't inline non-void conditionals that have a throw in one of the branches */
+
+                    /* NOTE: If we do allow this, note that we can't simply do a
+                      checkLiveness() to match the liveness at the end of the "then"
+                      and "else" branches of the GT_COLON. The branch with the throw
+                      will keep nothing live, so we should use the liveness at the
+                      end of the non-throw branch. */
+
+                    if (seenConditionalJump && (impInlineInfo->inlineCandidateInfo->fncRetType != TYP_VOID))
+                    {
+                        compInlineResult->NoteFatal(InlineObservation::CALLSITE_CONDITIONAL_THROW);
+                        return;
+                    }
+                }
+
+                if (tiVerificationNeeded)
+                {
+                    tiRetVal = impStackTop().seTypeInfo;
+                    Verify(tiRetVal.IsObjRef(), "object ref expected");
+                    if (verTrackObjCtorInitState && (verCurrentState.thisInitialized != TIS_Init))
+                    {
+                        Verify(!tiRetVal.IsThisPtr(), "throw uninitialized this");
+                    }
+                }
+
+                block->bbSetRunRarely(); // any block with a throw is rare
+                /* Pop the exception object and create the 'throw' helper call */
+
+                op1 = gtNewHelperCallNode(CORINFO_HELP_THROW, TYP_VOID, GTF_EXCEPT, gtNewArgList(impPopStack().val));
+
+            EVAL_APPEND:
+                if (verCurrentState.esStackDepth > 0)
+                {
+                    impEvalSideEffects();
+                }
+
+                assert(verCurrentState.esStackDepth == 0);
+
+                goto APPEND;
+
+            case CEE_RETHROW:
+
+                assert(!compIsForInlining());
+
+                if (info.compXcptnsCount == 0)
+                {
+                    BADCODE("rethrow outside catch");
+                }
+
+                if (tiVerificationNeeded)
+                {
+                    Verify(block->hasHndIndex(), "rethrow outside catch");
+                    if (block->hasHndIndex())
+                    {
+                        EHblkDsc* HBtab = ehGetDsc(block->getHndIndex());
+                        Verify(!HBtab->HasFinallyOrFaultHandler(), "rethrow in finally or fault");
+                        if (HBtab->HasFilter())
+                        {
+                            // we better be in the handler clause part, not the filter part
+                            Verify(jitIsBetween(compCurBB->bbCodeOffs, HBtab->ebdHndBegOffs(), HBtab->ebdHndEndOffs()),
+                                   "rethrow in filter");
+                        }
+                    }
+                }
+
+                /* Create the 'rethrow' helper call */
+
+                op1 = gtNewHelperCallNode(CORINFO_HELP_RETHROW, TYP_VOID, GTF_EXCEPT);
+
+                goto EVAL_APPEND;
+
+            case CEE_INITOBJ:
+
+                assertImp(sz == sizeof(unsigned));
+
+                _impResolveToken(CORINFO_TOKENKIND_Class);
+
+                JITDUMP(" %08X", resolvedToken.token);
+
+                if (tiVerificationNeeded)
+                {
+                    typeInfo tiTo    = impStackTop().seTypeInfo;
+                    typeInfo tiInstr = verMakeTypeInfo(resolvedToken.hClass);
+
+                    Verify(tiTo.IsByRef(), "byref expected");
+                    Verify(!tiTo.IsReadonlyByRef(), "write to readonly byref");
+
+                    Verify(tiCompatibleWith(tiInstr, tiTo.DereferenceByRef(), false),
+                           "type operand incompatible with type of address");
+                }
+
+                size = info.compCompHnd->getClassSize(resolvedToken.hClass); // Size
+                op2  = gtNewIconNode(0);                                     // Value
+                op1  = impPopStack().val;                                    // Dest
+                op1  = gtNewBlockVal(op1, size);
+                op1  = gtNewBlkOpNode(op1, op2, size, (prefixFlags & PREFIX_VOLATILE) != 0, false);
+                goto SPILL_APPEND;
+
+            case CEE_INITBLK:
+
+                if (tiVerificationNeeded)
+                {
+                    Verify(false, "bad opcode");
+                }
+
+                op3 = impPopStack().val; // Size
+                op2 = impPopStack().val; // Value
+                op1 = impPopStack().val; // Dest
+
+                if (op3->IsCnsIntOrI())
+                {
+                    size = (unsigned)op3->AsIntConCommon()->IconValue();
+                    op1  = new (this, GT_BLK) GenTreeBlk(GT_BLK, TYP_STRUCT, op1, size);
+                }
+                else
+                {
+                    op1  = new (this, GT_DYN_BLK) GenTreeDynBlk(op1, op3);
+                    size = 0;
+                }
+                op1 = gtNewBlkOpNode(op1, op2, size, (prefixFlags & PREFIX_VOLATILE) != 0, false);
+
+                goto SPILL_APPEND;
+
+            case CEE_CPBLK:
+
+                if (tiVerificationNeeded)
+                {
+                    Verify(false, "bad opcode");
+                }
+                op3 = impPopStack().val; // Size
+                op2 = impPopStack().val; // Src
+                op1 = impPopStack().val; // Dest
+
+                if (op3->IsCnsIntOrI())
+                {
+                    size = (unsigned)op3->AsIntConCommon()->IconValue();
+                    op1  = new (this, GT_BLK) GenTreeBlk(GT_BLK, TYP_STRUCT, op1, size);
+                }
+                else
+                {
+                    op1  = new (this, GT_DYN_BLK) GenTreeDynBlk(op1, op3);
+                    size = 0;
+                }
+                if (op2->OperGet() == GT_ADDR)
+                {
+                    op2 = op2->gtOp.gtOp1;
+                }
+                else
+                {
+                    op2 = gtNewOperNode(GT_IND, TYP_STRUCT, op2);
+                }
+
+                op1 = gtNewBlkOpNode(op1, op2, size, (prefixFlags & PREFIX_VOLATILE) != 0, true);
+                goto SPILL_APPEND;
+
+            case CEE_CPOBJ:
+
+                assertImp(sz == sizeof(unsigned));
+
+                _impResolveToken(CORINFO_TOKENKIND_Class);
+
+                JITDUMP(" %08X", resolvedToken.token);
+
+                if (tiVerificationNeeded)
+                {
+                    typeInfo tiFrom  = impStackTop().seTypeInfo;
+                    typeInfo tiTo    = impStackTop(1).seTypeInfo;
+                    typeInfo tiInstr = verMakeTypeInfo(resolvedToken.hClass);
+
+                    Verify(tiFrom.IsByRef(), "expected byref source");
+                    Verify(tiTo.IsByRef(), "expected byref destination");
+
+                    Verify(tiCompatibleWith(tiFrom.DereferenceByRef(), tiInstr, false),
+                           "type of source address incompatible with type operand");
+                    Verify(!tiTo.IsReadonlyByRef(), "write to readonly byref");
+                    Verify(tiCompatibleWith(tiInstr, tiTo.DereferenceByRef(), false),
+                           "type operand incompatible with type of destination address");
+                }
+
+                if (!eeIsValueClass(resolvedToken.hClass))
+                {
+                    op1 = impPopStack().val; // address to load from
+
+                    impBashVarAddrsToI(op1);
+
+                    assertImp(genActualType(op1->gtType) == TYP_I_IMPL || op1->gtType == TYP_BYREF);
+
+                    op1 = gtNewOperNode(GT_IND, TYP_REF, op1);
+                    op1->gtFlags |= GTF_EXCEPT | GTF_GLOB_REF;
+
+                    impPushOnStackNoType(op1);
+                    opcode = CEE_STIND_REF;
+                    lclTyp = TYP_REF;
+                    goto STIND_POST_VERIFY;
+                }
+
+                op2 = impPopStack().val; // Src
+                op1 = impPopStack().val; // Dest
+                op1 = gtNewCpObjNode(op1, op2, resolvedToken.hClass, ((prefixFlags & PREFIX_VOLATILE) != 0));
+                goto SPILL_APPEND;
+
+            case CEE_STOBJ:
+            {
+                assertImp(sz == sizeof(unsigned));
+
+                _impResolveToken(CORINFO_TOKENKIND_Class);
+
+                JITDUMP(" %08X", resolvedToken.token);
+
+                if (eeIsValueClass(resolvedToken.hClass))
+                {
+                    lclTyp = TYP_STRUCT;
+                }
+                else
+                {
+                    lclTyp = TYP_REF;
+                }
+
+                if (tiVerificationNeeded)
+                {
+
+                    typeInfo tiPtr = impStackTop(1).seTypeInfo;
+
+                    // Make sure we have a good looking byref
+                    Verify(tiPtr.IsByRef(), "pointer not byref");
+                    Verify(!tiPtr.IsReadonlyByRef(), "write to readonly byref");
+                    if (!tiPtr.IsByRef() || tiPtr.IsReadonlyByRef())
+                    {
+                        compUnsafeCastUsed = true;
+                    }
+
+                    typeInfo ptrVal = DereferenceByRef(tiPtr);
+                    typeInfo argVal = verMakeTypeInfo(resolvedToken.hClass);
+
+                    if (!tiCompatibleWith(impStackTop(0).seTypeInfo, NormaliseForStack(argVal), true))
+                    {
+                        Verify(false, "type of value incompatible with type operand");
+                        compUnsafeCastUsed = true;
+                    }
+
+                    if (!tiCompatibleWith(argVal, ptrVal, false))
+                    {
+                        Verify(false, "type operand incompatible with type of address");
+                        compUnsafeCastUsed = true;
+                    }
+                }
+                else
+                {
+                    compUnsafeCastUsed = true;
+                }
+
+                if (lclTyp == TYP_REF)
+                {
+                    opcode = CEE_STIND_REF;
+                    goto STIND_POST_VERIFY;
+                }
+
+                CorInfoType jitTyp = info.compCompHnd->asCorInfoType(resolvedToken.hClass);
+                if (impIsPrimitive(jitTyp))
+                {
+                    lclTyp = JITtype2varType(jitTyp);
+                    goto STIND_POST_VERIFY;
+                }
+
+                op2 = impPopStack().val; // Value
+                op1 = impPopStack().val; // Ptr
+
+                assertImp(varTypeIsStruct(op2));
+
+                op1 = impAssignStructPtr(op1, op2, resolvedToken.hClass, (unsigned)CHECK_SPILL_ALL);
+                goto SPILL_APPEND;
+            }
+
+            case CEE_MKREFANY:
+
+                assert(!compIsForInlining());
+
+                // Being lazy here. Refanys are tricky in terms of gc tracking.
+                // Since it is uncommon, just don't perform struct promotion in any method that contains mkrefany.
+
+                JITDUMP("disabling struct promotion because of mkrefany\n");
+                fgNoStructPromotion = true;
+
+                oper = GT_MKREFANY;
+                assertImp(sz == sizeof(unsigned));
+
+                _impResolveToken(CORINFO_TOKENKIND_Class);
+
+                JITDUMP(" %08X", resolvedToken.token);
+
+                op2 = impTokenToHandle(&resolvedToken, nullptr, TRUE);
+                if (op2 == nullptr)
+                { // compDonotInline()
+                    return;
+                }
+
+                if (tiVerificationNeeded)
+                {
+                    typeInfo tiPtr   = impStackTop().seTypeInfo;
+                    typeInfo tiInstr = verMakeTypeInfo(resolvedToken.hClass);
+
+                    Verify(!verIsByRefLike(tiInstr), "mkrefany of byref-like class");
+                    Verify(!tiPtr.IsReadonlyByRef(), "readonly byref used with mkrefany");
+                    Verify(typeInfo::AreEquivalent(tiPtr.DereferenceByRef(), tiInstr), "type mismatch");
+                }
+
+                accessAllowedResult =
+                    info.compCompHnd->canAccessClass(&resolvedToken, info.compMethodHnd, &calloutHelper);
+                impHandleAccessAllowed(accessAllowedResult, &calloutHelper);
+
+                op1 = impPopStack().val;
+
+                // @SPECVIOLATION: TYP_INT should not be allowed here by a strict reading of the spec.
+                // But JIT32 allowed it, so we continue to allow it.
+                assertImp(op1->TypeGet() == TYP_BYREF || op1->TypeGet() == TYP_I_IMPL || op1->TypeGet() == TYP_INT);
+
+                // MKREFANY returns a struct.  op2 is the class token.
+                op1 = gtNewOperNode(oper, TYP_STRUCT, op1, op2);
+
+                impPushOnStack(op1, verMakeTypeInfo(impGetRefAnyClass()));
+                break;
+
+            case CEE_LDOBJ:
+            {
+                oper = GT_OBJ;
+                assertImp(sz == sizeof(unsigned));
+
+                _impResolveToken(CORINFO_TOKENKIND_Class);
+
+                JITDUMP(" %08X", resolvedToken.token);
+
+            OBJ:
+
+                tiRetVal = verMakeTypeInfo(resolvedToken.hClass);
+
+                if (tiVerificationNeeded)
+                {
+                    typeInfo tiPtr = impStackTop().seTypeInfo;
+
+                    // Make sure we have a byref
+                    if (!tiPtr.IsByRef())
+                    {
+                        Verify(false, "pointer not byref");
+                        compUnsafeCastUsed = true;
+                    }
+                    typeInfo tiPtrVal = DereferenceByRef(tiPtr);
+
+                    if (!tiCompatibleWith(tiPtrVal, tiRetVal, false))
+                    {
+                        Verify(false, "type of address incompatible with type operand");
+                        compUnsafeCastUsed = true;
+                    }
+                    tiRetVal.NormaliseForStack();
+                }
+                else
+                {
+                    compUnsafeCastUsed = true;
+                }
+
+                if (eeIsValueClass(resolvedToken.hClass))
+                {
+                    lclTyp = TYP_STRUCT;
+                }
+                else
+                {
+                    lclTyp = TYP_REF;
+                    opcode = CEE_LDIND_REF;
+                    goto LDIND_POST_VERIFY;
+                }
+
+                op1 = impPopStack().val;
+
+                assertImp(op1->TypeGet() == TYP_BYREF || op1->TypeGet() == TYP_I_IMPL);
+
+                CorInfoType jitTyp = info.compCompHnd->asCorInfoType(resolvedToken.hClass);
+                if (impIsPrimitive(jitTyp))
+                {
+                    op1 = gtNewOperNode(GT_IND, JITtype2varType(jitTyp), op1);
+
+                    // Could point anywhere, example a boxed class static int
+                    op1->gtFlags |= GTF_IND_TGTANYWHERE | GTF_GLOB_REF;
+                    assertImp(varTypeIsArithmetic(op1->gtType));
+                }
+                else
+                {
+                    // OBJ returns a struct
+                    // and an inline argument which is the class token of the loaded obj
+                    op1 = gtNewObjNode(resolvedToken.hClass, op1);
+                }
+                op1->gtFlags |= GTF_EXCEPT;
+
+                impPushOnStack(op1, tiRetVal);
+                break;
+            }
+
+            case CEE_LDLEN:
+                if (tiVerificationNeeded)
+                {
+                    typeInfo tiArray = impStackTop().seTypeInfo;
+                    Verify(verIsSDArray(tiArray), "bad array");
+                    tiRetVal = typeInfo(TI_INT);
+                }
+
+                op1 = impPopStack().val;
+                if (!opts.MinOpts() && !opts.compDbgCode)
+                {
+                    /* Use GT_ARR_LENGTH operator so rng check opts see this */
+                    GenTreeArrLen* arrLen =
+                        new (this, GT_ARR_LENGTH) GenTreeArrLen(TYP_INT, op1, offsetof(CORINFO_Array, length));
+
+                    /* Mark the block as containing a length expression */
+
+                    if (op1->gtOper == GT_LCL_VAR)
+                    {
+                        block->bbFlags |= BBF_HAS_IDX_LEN;
+                    }
+
+                    op1 = arrLen;
+                }
+                else
+                {
+                    /* Create the expression "*(array_addr + ArrLenOffs)" */
+                    op1 = gtNewOperNode(GT_ADD, TYP_BYREF, op1,
+                                        gtNewIconNode(offsetof(CORINFO_Array, length), TYP_I_IMPL));
+                    op1 = gtNewOperNode(GT_IND, TYP_INT, op1);
+                    op1->gtFlags |= GTF_IND_ARR_LEN;
+                }
+
+                /* An indirection will cause a GPF if the address is null */
+                op1->gtFlags |= GTF_EXCEPT;
+
+                /* Push the result back on the stack */
+                impPushOnStack(op1, tiRetVal);
+                break;
+
+            case CEE_BREAK:
+                op1 = gtNewHelperCallNode(CORINFO_HELP_USER_BREAKPOINT, TYP_VOID);
+                goto SPILL_APPEND;
+
+            case CEE_NOP:
+                if (opts.compDbgCode)
+                {
+                    op1 = new (this, GT_NO_OP) GenTree(GT_NO_OP, TYP_VOID);
+                    goto SPILL_APPEND;
+                }
+                break;
+
+            /******************************** NYI *******************************/
+
+            case 0xCC:
+                OutputDebugStringA("CLR: Invalid x86 breakpoint in IL stream\n");
+
+            case CEE_ILLEGAL:
+            case CEE_MACRO_END:
+
+            default:
+                BADCODE3("unknown opcode", ": %02X", (int)opcode);
+        }
+
+        codeAddr += sz;
+        prevOpcode = opcode;
+
+        prefixFlags = 0;
+        assert(!insertLdloc || opcode == CEE_DUP);
+    }
+
+    assert(!insertLdloc);
+
+    return;
+#undef _impResolveToken
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+// Push a local/argument treeon the operand stack
+void Compiler::impPushVar(GenTree* op, typeInfo tiRetVal)
+{
+    tiRetVal.NormaliseForStack();
+
+    if (verTrackObjCtorInitState && (verCurrentState.thisInitialized != TIS_Init) && tiRetVal.IsThisPtr())
+    {
+        tiRetVal.SetUninitialisedObjRef();
+    }
+
+    impPushOnStack(op, tiRetVal);
+}
+
+// Load a local/argument on the operand stack
+// lclNum is an index into lvaTable *NOT* the arg/lcl index in the IL
+void Compiler::impLoadVar(unsigned lclNum, IL_OFFSET offset, typeInfo tiRetVal)
+{
+    var_types lclTyp;
+
+    if (lvaTable[lclNum].lvNormalizeOnLoad())
+    {
+        lclTyp = lvaGetRealType(lclNum);
+    }
+    else
+    {
+        lclTyp = lvaGetActualType(lclNum);
+    }
+
+    impPushVar(gtNewLclvNode(lclNum, lclTyp, offset), tiRetVal);
+}
+
+// Load an argument on the operand stack
+// Shared by the various CEE_LDARG opcodes
+// ilArgNum is the argument index as specified in IL.
+// It will be mapped to the correct lvaTable index
+void Compiler::impLoadArg(unsigned ilArgNum, IL_OFFSET offset)
+{
+    Verify(ilArgNum < info.compILargsCount, "bad arg num");
+
+    if (compIsForInlining())
+    {
+        if (ilArgNum >= info.compArgsCount)
+        {
+            compInlineResult->NoteFatal(InlineObservation::CALLEE_BAD_ARGUMENT_NUMBER);
+            return;
+        }
+
+        impPushVar(impInlineFetchArg(ilArgNum, impInlineInfo->inlArgInfo, impInlineInfo->lclVarInfo),
+                   impInlineInfo->lclVarInfo[ilArgNum].lclVerTypeInfo);
+    }
+    else
+    {
+        if (ilArgNum >= info.compArgsCount)
+        {
+            BADCODE("Bad IL");
+        }
+
+        unsigned lclNum = compMapILargNum(ilArgNum); // account for possible hidden param
+
+        if (lclNum == info.compThisArg)
+        {
+            lclNum = lvaArg0Var;
+        }
+
+        impLoadVar(lclNum, offset);
+    }
+}
+
+// Load a local on the operand stack
+// Shared by the various CEE_LDLOC opcodes
+// ilLclNum is the local index as specified in IL.
+// It will be mapped to the correct lvaTable index
+void Compiler::impLoadLoc(unsigned ilLclNum, IL_OFFSET offset)
+{
+    if (tiVerificationNeeded)
+    {
+        Verify(ilLclNum < info.compMethodInfo->locals.numArgs, "bad loc num");
+        Verify(info.compInitMem, "initLocals not set");
+    }
+
+    if (compIsForInlining())
+    {
+        if (ilLclNum >= info.compMethodInfo->locals.numArgs)
+        {
+            compInlineResult->NoteFatal(InlineObservation::CALLEE_BAD_LOCAL_NUMBER);
+            return;
+        }
+
+        // Get the local type
+        var_types lclTyp = impInlineInfo->lclVarInfo[ilLclNum + impInlineInfo->argCnt].lclTypeInfo;
+
+        typeInfo tiRetVal = impInlineInfo->lclVarInfo[ilLclNum + impInlineInfo->argCnt].lclVerTypeInfo;
+
+        /* Have we allocated a temp for this local? */
+
+        unsigned lclNum = impInlineFetchLocal(ilLclNum DEBUGARG("Inline ldloc first use temp"));
+
+        // All vars of inlined methods should be !lvNormalizeOnLoad()
+
+        assert(!lvaTable[lclNum].lvNormalizeOnLoad());
+        lclTyp = genActualType(lclTyp);
+
+        impPushVar(gtNewLclvNode(lclNum, lclTyp), tiRetVal);
+    }
+    else
+    {
+        if (ilLclNum >= info.compMethodInfo->locals.numArgs)
+        {
+            BADCODE("Bad IL");
+        }
+
+        unsigned lclNum = info.compArgsCount + ilLclNum;
+
+        impLoadVar(lclNum, offset);
+    }
+}
+
+#ifdef _TARGET_ARM_
+/**************************************************************************************
+ *
+ *  When assigning a vararg call src to a HFA lcl dest, mark that we cannot promote the
+ *  dst struct, because struct promotion will turn it into a float/double variable while
+ *  the rhs will be an int/long variable. We don't code generate assignment of int into
+ *  a float, but there is nothing that might prevent us from doing so. The tree however
+ *  would like: (=, (typ_float, typ_int)) or (GT_TRANSFER, (typ_float, typ_int))
+ *
+ *  tmpNum - the lcl dst variable num that is a struct.
+ *  src    - the src tree assigned to the dest that is a struct/int (when varargs call.)
+ *  hClass - the type handle for the struct variable.
+ *
+ *  TODO-ARM-CQ: [301608] This is a rare scenario with varargs and struct promotion coming into play,
+ *        however, we could do a codegen of transferring from int to float registers
+ *        (transfer, not a cast.)
+ *
+ */
+void Compiler::impMarkLclDstNotPromotable(unsigned tmpNum, GenTreePtr src, CORINFO_CLASS_HANDLE hClass)
+{
+    if (src->gtOper == GT_CALL && src->gtCall.IsVarargs() && IsHfa(hClass))
+    {
+        int       hfaSlots = GetHfaCount(hClass);
+        var_types hfaType  = GetHfaType(hClass);
+
+        // If we have varargs we morph the method's return type to be "int" irrespective of its original
+        // type: struct/float at importer because the ABI calls out return in integer registers.
+        // We don't want struct promotion to replace an expression like this:
+        //   lclFld_int = callvar_int() into lclFld_float = callvar_int();
+        // This means an int is getting assigned to a float without a cast. Prevent the promotion.
+        if ((hfaType == TYP_DOUBLE && hfaSlots == sizeof(double) / REGSIZE_BYTES) ||
+            (hfaType == TYP_FLOAT && hfaSlots == sizeof(float) / REGSIZE_BYTES))
+        {
+            // Make sure this struct type stays as struct so we can receive the call in a struct.
+            lvaTable[tmpNum].lvIsMultiRegRet = true;
+        }
+    }
+}
+#endif // _TARGET_ARM_
+
+#if FEATURE_MULTIREG_RET
+GenTreePtr Compiler::impAssignMultiRegTypeToVar(GenTreePtr op, CORINFO_CLASS_HANDLE hClass)
+{
+    unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Return value temp for multireg return."));
+    impAssignTempGen(tmpNum, op, hClass, (unsigned)CHECK_SPILL_NONE);
+    GenTreePtr ret = gtNewLclvNode(tmpNum, op->gtType);
+    assert(IsMultiRegReturnedType(hClass));
+
+    // Mark the var so that fields are not promoted and stay together.
+    lvaTable[tmpNum].lvIsMultiRegRet = true;
+
+    return ret;
+}
+#endif // FEATURE_MULTIREG_RET
+
+// do import for a return
+// returns false if inlining was aborted
+// opcode can be ret or call in the case of a tail.call
+bool Compiler::impReturnInstruction(BasicBlock* block, int prefixFlags, OPCODE& opcode)
+{
+    if (tiVerificationNeeded)
+    {
+        verVerifyThisPtrInitialised();
+
+        unsigned expectedStack = 0;
+        if (info.compRetType != TYP_VOID)
+        {
+            typeInfo tiVal = impStackTop().seTypeInfo;
+            typeInfo tiDeclared =
+                verMakeTypeInfo(info.compMethodInfo->args.retType, info.compMethodInfo->args.retTypeClass);
+
+            Verify(!verIsByRefLike(tiDeclared) || verIsSafeToReturnByRef(tiVal), "byref return");
+
+            Verify(tiCompatibleWith(tiVal, tiDeclared.NormaliseForStack(), true), "type mismatch");
+            expectedStack = 1;
+        }
+        Verify(verCurrentState.esStackDepth == expectedStack, "stack non-empty on return");
+    }
+
+    GenTree*             op2       = nullptr;
+    GenTree*             op1       = nullptr;
+    CORINFO_CLASS_HANDLE retClsHnd = nullptr;
+
+    if (info.compRetType != TYP_VOID)
+    {
+        StackEntry se = impPopStack(retClsHnd);
+        op2           = se.val;
+
+        if (!compIsForInlining())
+        {
+            impBashVarAddrsToI(op2);
+            op2 = impImplicitIorI4Cast(op2, info.compRetType);
+            op2 = impImplicitR4orR8Cast(op2, info.compRetType);
+            assertImp((genActualType(op2->TypeGet()) == genActualType(info.compRetType)) ||
+                      ((op2->TypeGet() == TYP_I_IMPL) && (info.compRetType == TYP_BYREF)) ||
+                      ((op2->TypeGet() == TYP_BYREF) && (info.compRetType == TYP_I_IMPL)) ||
+                      (varTypeIsFloating(op2->gtType) && varTypeIsFloating(info.compRetType)) ||
+                      (varTypeIsStruct(op2) && varTypeIsStruct(info.compRetType)));
+
+#ifdef DEBUG
+            if (opts.compGcChecks && info.compRetType == TYP_REF)
+            {
+                // DDB 3483  : JIT Stress: early termination of GC ref's life time in exception code path
+                // VSW 440513: Incorrect gcinfo on the return value under COMPlus_JitGCChecks=1 for methods with
+                // one-return BB.
+
+                assert(op2->gtType == TYP_REF);
+
+                // confirm that the argument is a GC pointer (for debugging (GC stress))
+                GenTreeArgList* args = gtNewArgList(op2);
+                op2                  = gtNewHelperCallNode(CORINFO_HELP_CHECK_OBJ, TYP_REF, 0, args);
+
+                if (verbose)
+                {
+                    printf("\ncompGcChecks tree:\n");
+                    gtDispTree(op2);
+                }
+            }
+#endif
+        }
+        else
+        {
+            // inlinee's stack should be empty now.
+            assert(verCurrentState.esStackDepth == 0);
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("\n\n    Inlinee Return expression (before normalization)  =>\n");
+                gtDispTree(op2);
+            }
+#endif
+
+            // Make sure the type matches the original call.
+
+            var_types returnType       = genActualType(op2->gtType);
+            var_types originalCallType = impInlineInfo->inlineCandidateInfo->fncRetType;
+            if ((returnType != originalCallType) && (originalCallType == TYP_STRUCT))
+            {
+                originalCallType = impNormStructType(impInlineInfo->inlineCandidateInfo->methInfo.args.retTypeClass);
+            }
+
+            if (returnType != originalCallType)
+            {
+                compInlineResult->NoteFatal(InlineObservation::CALLSITE_RETURN_TYPE_MISMATCH);
+                return false;
+            }
+
+            // Below, we are going to set impInlineInfo->retExpr to the tree with the return
+            // expression. At this point, retExpr could already be set if there are multiple
+            // return blocks (meaning lvaInlineeReturnSpillTemp != BAD_VAR_NUM) and one of
+            // the other blocks already set it. If there is only a single return block,
+            // retExpr shouldn't be set. However, this is not true if we reimport a block
+            // with a return. In that case, retExpr will be set, then the block will be
+            // reimported, but retExpr won't get cleared as part of setting the block to
+            // be reimported. The reimported retExpr value should be the same, so even if
+            // we don't unconditionally overwrite it, it shouldn't matter.
+            if (info.compRetNativeType != TYP_STRUCT)
+            {
+                // compRetNativeType is not TYP_STRUCT.
+                // This implies it could be either a scalar type or SIMD vector type or
+                // a struct type that can be normalized to a scalar type.
+
+                if (varTypeIsStruct(info.compRetType))
+                {
+                    noway_assert(info.compRetBuffArg == BAD_VAR_NUM);
+                    // adjust the type away from struct to integral
+                    // and no normalizing
+                    op2 = impFixupStructReturnType(op2, retClsHnd);
+                }
+                else
+                {
+                    // Do we have to normalize?
+                    var_types fncRealRetType = JITtype2varType(info.compMethodInfo->args.retType);
+                    if ((varTypeIsSmall(op2->TypeGet()) || varTypeIsSmall(fncRealRetType)) &&
+                        fgCastNeeded(op2, fncRealRetType))
+                    {
+                        // Small-typed return values are normalized by the callee
+                        op2 = gtNewCastNode(TYP_INT, op2, fncRealRetType);
+                    }
+                }
+
+                if (lvaInlineeReturnSpillTemp != BAD_VAR_NUM)
+                {
+                    assert(info.compRetNativeType != TYP_VOID && fgMoreThanOneReturnBlock());
+
+                    // This is a bit of a workaround...
+                    // If we are inlining a call that returns a struct, where the actual "native" return type is
+                    // not a struct (for example, the struct is composed of exactly one int, and the native
+                    // return type is thus an int), and the inlinee has multiple return blocks (thus,
+                    // lvaInlineeReturnSpillTemp is != BAD_VAR_NUM, and is the index of a local var that is set
+                    // to the *native* return type), and at least one of the return blocks is the result of
+                    // a call, then we have a problem. The situation is like this (from a failed test case):
+                    //
+                    // inliner:
+                    //      // Note: valuetype plinq_devtests.LazyTests/LIX is a struct with only a single int
+                    //      call !!0 [mscorlib]System.Threading.LazyInitializer::EnsureInitialized<valuetype
+                    //      plinq_devtests.LazyTests/LIX>(!!0&, bool&, object&, class [mscorlib]System.Func`1<!!0>)
+                    //
+                    // inlinee:
+                    //      ...
+                    //      ldobj      !!T                 // this gets bashed to a GT_LCL_FLD, type TYP_INT
+                    //      ret
+                    //      ...
+                    //      call       !!0 System.Threading.LazyInitializer::EnsureInitializedCore<!!0>(!!0&, bool&,
+                    //      object&, class System.Func`1<!!0>)
+                    //      ret
+                    //
+                    // In the code above, when we call impFixupStructReturnType(), we will change the op2 return type
+                    // of the inlinee return node, but we don't do that for GT_CALL nodes, which we delay until
+                    // morphing when we call fgFixupStructReturn(). We do this, apparently, to handle nested
+                    // inlining properly by leaving the correct type on the GT_CALL node through importing.
+                    //
+                    // To fix this, for this case, we temporarily change the GT_CALL node type to the
+                    // native return type, which is what it will be set to eventually. We generate the
+                    // assignment to the return temp, using the correct type, and then restore the GT_CALL
+                    // node type. During morphing, the GT_CALL will get the correct, final, native return type.
+
+                    bool restoreType = false;
+                    if ((op2->OperGet() == GT_CALL) && (info.compRetType == TYP_STRUCT))
+                    {
+                        noway_assert(op2->TypeGet() == TYP_STRUCT);
+                        op2->gtType = info.compRetNativeType;
+                        restoreType = true;
+                    }
+
+                    impAssignTempGen(lvaInlineeReturnSpillTemp, op2, se.seTypeInfo.GetClassHandle(),
+                                     (unsigned)CHECK_SPILL_ALL);
+
+                    GenTreePtr tmpOp2 = gtNewLclvNode(lvaInlineeReturnSpillTemp, op2->TypeGet());
+
+                    if (restoreType)
+                    {
+                        op2->gtType = TYP_STRUCT; // restore it to what it was
+                    }
+
+                    op2 = tmpOp2;
+
+#ifdef DEBUG
+                    if (impInlineInfo->retExpr)
+                    {
+                        // Some other block(s) have seen the CEE_RET first.
+                        // Better they spilled to the same temp.
+                        assert(impInlineInfo->retExpr->gtOper == GT_LCL_VAR);
+                        assert(impInlineInfo->retExpr->gtLclVarCommon.gtLclNum == op2->gtLclVarCommon.gtLclNum);
+                    }
+#endif
+                }
+
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("\n\n    Inlinee Return expression (after normalization) =>\n");
+                    gtDispTree(op2);
+                }
+#endif
+
+                // Report the return expression
+                impInlineInfo->retExpr = op2;
+            }
+            else
+            {
+                // compRetNativeType is TYP_STRUCT.
+                // This implies that struct return via RetBuf arg or multi-reg struct return
+
+                GenTreePtr iciCall = impInlineInfo->iciCall;
+                assert(iciCall->gtOper == GT_CALL);
+
+                // Assign the inlinee return into a spill temp.
+                // spill temp only exists if there are multiple return points
+                if (lvaInlineeReturnSpillTemp != BAD_VAR_NUM)
+                {
+                    // in this case we have to insert multiple struct copies to the temp
+                    // and the retexpr is just the temp.
+                    assert(info.compRetNativeType != TYP_VOID);
+                    assert(fgMoreThanOneReturnBlock());
+
+                    impAssignTempGen(lvaInlineeReturnSpillTemp, op2, se.seTypeInfo.GetClassHandle(),
+                                     (unsigned)CHECK_SPILL_ALL);
+                }
+
+#if defined(_TARGET_ARM_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#if defined(_TARGET_ARM_)
+                // TODO-ARM64-NYI: HFA
+                // TODO-AMD64-Unix and TODO-ARM once the ARM64 functionality is implemented the
+                // next ifdefs could be refactored in a single method with the ifdef inside.
+                if (IsHfa(retClsHnd))
+                {
+// Same as !IsHfa but just don't bother with impAssignStructPtr.
+#else  // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+                ReturnTypeDesc retTypeDesc;
+                retTypeDesc.InitializeStructReturnType(this, retClsHnd);
+                unsigned retRegCount = retTypeDesc.GetReturnRegCount();
+
+                if (retRegCount != 0)
+                {
+                    // If single eightbyte, the return type would have been normalized and there won't be a temp var.
+                    // This code will be called only if the struct return has not been normalized (i.e. 2 eightbytes -
+                    // max allowed.)
+                    assert(retRegCount == MAX_RET_REG_COUNT);
+                    // Same as !structDesc.passedInRegisters but just don't bother with impAssignStructPtr.
+                    CLANG_FORMAT_COMMENT_ANCHOR;
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+                    if (lvaInlineeReturnSpillTemp != BAD_VAR_NUM)
+                    {
+                        if (!impInlineInfo->retExpr)
+                        {
+#if defined(_TARGET_ARM_)
+                            impInlineInfo->retExpr = gtNewLclvNode(lvaInlineeReturnSpillTemp, info.compRetType);
+#else  // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+                            // The inlinee compiler has figured out the type of the temp already. Use it here.
+                            impInlineInfo->retExpr =
+                                gtNewLclvNode(lvaInlineeReturnSpillTemp, lvaTable[lvaInlineeReturnSpillTemp].lvType);
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+                        }
+                    }
+                    else
+                    {
+                        impInlineInfo->retExpr = op2;
+                    }
+                }
+                else
+#elif defined(_TARGET_ARM64_)
+                ReturnTypeDesc retTypeDesc;
+                retTypeDesc.InitializeStructReturnType(this, retClsHnd);
+                unsigned retRegCount = retTypeDesc.GetReturnRegCount();
+
+                if (retRegCount != 0)
+                {
+                    assert(!iciCall->AsCall()->HasRetBufArg());
+                    assert(retRegCount >= 2);
+                    if (lvaInlineeReturnSpillTemp != BAD_VAR_NUM)
+                    {
+                        if (!impInlineInfo->retExpr)
+                        {
+                            // The inlinee compiler has figured out the type of the temp already. Use it here.
+                            impInlineInfo->retExpr =
+                                gtNewLclvNode(lvaInlineeReturnSpillTemp, lvaTable[lvaInlineeReturnSpillTemp].lvType);
+                        }
+                    }
+                    else
+                    {
+                        impInlineInfo->retExpr = op2;
+                    }
+                }
+                else
+#endif // defined(_TARGET_ARM64_)
+                {
+                    assert(iciCall->AsCall()->HasRetBufArg());
+                    GenTreePtr dest = gtCloneExpr(iciCall->gtCall.gtCallArgs->gtOp.gtOp1);
+                    // spill temp only exists if there are multiple return points
+                    if (lvaInlineeReturnSpillTemp != BAD_VAR_NUM)
+                    {
+                        // if this is the first return we have seen set the retExpr
+                        if (!impInlineInfo->retExpr)
+                        {
+                            impInlineInfo->retExpr =
+                                impAssignStructPtr(dest, gtNewLclvNode(lvaInlineeReturnSpillTemp, info.compRetType),
+                                                   retClsHnd, (unsigned)CHECK_SPILL_ALL);
+                        }
+                    }
+                    else
+                    {
+                        impInlineInfo->retExpr = impAssignStructPtr(dest, op2, retClsHnd, (unsigned)CHECK_SPILL_ALL);
+                    }
+                }
+            }
+        }
+    }
+
+    if (compIsForInlining())
+    {
+        return true;
+    }
+
+    if (info.compRetType == TYP_VOID)
+    {
+        // return void
+        op1 = new (this, GT_RETURN) GenTreeOp(GT_RETURN, TYP_VOID);
+    }
+    else if (info.compRetBuffArg != BAD_VAR_NUM)
+    {
+        // Assign value to return buff (first param)
+        GenTreePtr retBuffAddr = gtNewLclvNode(info.compRetBuffArg, TYP_BYREF, impCurStmtOffs);
+
+        op2 = impAssignStructPtr(retBuffAddr, op2, retClsHnd, (unsigned)CHECK_SPILL_ALL);
+        impAppendTree(op2, (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
+
+        // There are cases where the address of the implicit RetBuf should be returned explicitly (in RAX).
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_AMD64_)
+
+        // x64 (System V and Win64) calling convention requires to
+        // return the implicit return buffer explicitly (in RAX).
+        // Change the return type to be BYREF.
+        op1 = gtNewOperNode(GT_RETURN, TYP_BYREF, gtNewLclvNode(info.compRetBuffArg, TYP_BYREF));
+#else  // !defined(_TARGET_AMD64_)
+        // In case of non-AMD64 targets the profiler hook requires to return the implicit RetBuf explicitly (in RAX).
+        // In such case the return value of the function is changed to BYREF.
+        // If profiler hook is not needed the return type of the function is TYP_VOID.
+        if (compIsProfilerHookNeeded())
+        {
+            op1 = gtNewOperNode(GT_RETURN, TYP_BYREF, gtNewLclvNode(info.compRetBuffArg, TYP_BYREF));
+        }
+        else
+        {
+            // return void
+            op1 = new (this, GT_RETURN) GenTreeOp(GT_RETURN, TYP_VOID);
+        }
+#endif // !defined(_TARGET_AMD64_)
+    }
+    else if (varTypeIsStruct(info.compRetType))
+    {
+#if !FEATURE_MULTIREG_RET
+        // For both ARM architectures the HFA native types are maintained as structs.
+        // Also on System V AMD64 the multireg structs returns are also left as structs.
+        noway_assert(info.compRetNativeType != TYP_STRUCT);
+#endif
+        op2 = impFixupStructReturnType(op2, retClsHnd);
+        // return op2
+        op1 = gtNewOperNode(GT_RETURN, genActualType(info.compRetNativeType), op2);
+    }
+    else
+    {
+        // return op2
+        op1 = gtNewOperNode(GT_RETURN, genActualType(info.compRetType), op2);
+    }
+
+    // We must have imported a tailcall and jumped to RET
+    if (prefixFlags & PREFIX_TAILCALL)
+    {
+#ifndef _TARGET_AMD64_
+        // Jit64 compat:
+        // This cannot be asserted on Amd64 since we permit the following IL pattern:
+        //      tail.call
+        //      pop
+        //      ret
+        assert(verCurrentState.esStackDepth == 0 && impOpcodeIsCallOpcode(opcode));
+#endif
+
+        opcode = CEE_RET; // To prevent trying to spill if CALL_SITE_BOUNDARIES
+
+        // impImportCall() would have already appended TYP_VOID calls
+        if (info.compRetType == TYP_VOID)
+        {
+            return true;
+        }
+    }
+
+    impAppendTree(op1, (unsigned)CHECK_SPILL_NONE, impCurStmtOffs);
+#ifdef DEBUG
+    // Remember at which BC offset the tree was finished
+    impNoteLastILoffs();
+#endif
+    return true;
+}
+
+/*****************************************************************************
+ *  Mark the block as unimported.
+ *  Note that the caller is responsible for calling impImportBlockPending(),
+ *  with the appropriate stack-state
+ */
+
+inline void Compiler::impReimportMarkBlock(BasicBlock* block)
+{
+#ifdef DEBUG
+    if (verbose && (block->bbFlags & BBF_IMPORTED))
+    {
+        printf("\nBB%02u will be reimported\n", block->bbNum);
+    }
+#endif
+
+    block->bbFlags &= ~BBF_IMPORTED;
+}
+
+/*****************************************************************************
+ *  Mark the successors of the given block as unimported.
+ *  Note that the caller is responsible for calling impImportBlockPending()
+ *  for all the successors, with the appropriate stack-state.
+ */
+
+void Compiler::impReimportMarkSuccessors(BasicBlock* block)
+{
+    for (unsigned i = 0; i < block->NumSucc(); i++)
+    {
+        impReimportMarkBlock(block->GetSucc(i));
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Filter wrapper to handle only passed in exception code
+ *  from it).
+ */
+
+LONG FilterVerificationExceptions(PEXCEPTION_POINTERS pExceptionPointers, LPVOID lpvParam)
+{
+    if (pExceptionPointers->ExceptionRecord->ExceptionCode == SEH_VERIFICATION_EXCEPTION)
+    {
+        return EXCEPTION_EXECUTE_HANDLER;
+    }
+
+    return EXCEPTION_CONTINUE_SEARCH;
+}
+
+void Compiler::impVerifyEHBlock(BasicBlock* block, bool isTryStart)
+{
+    assert(block->hasTryIndex());
+    assert(!compIsForInlining());
+
+    unsigned  tryIndex = block->getTryIndex();
+    EHblkDsc* HBtab    = ehGetDsc(tryIndex);
+
+    if (isTryStart)
+    {
+        assert(block->bbFlags & BBF_TRY_BEG);
+
+        // The Stack must be empty
+        //
+        if (block->bbStkDepth != 0)
+        {
+            BADCODE("Evaluation stack must be empty on entry into a try block");
+        }
+    }
+
+    // Save the stack contents, we'll need to restore it later
+    //
+    SavedStack blockState;
+    impSaveStackState(&blockState, false);
+
+    while (HBtab != nullptr)
+    {
+        if (isTryStart)
+        {
+            // Are we verifying that an instance constructor properly initializes it's 'this' pointer once?
+            //  We do not allow the 'this' pointer to be uninitialized when entering most kinds try regions
+            //
+            if (verTrackObjCtorInitState && (verCurrentState.thisInitialized != TIS_Init))
+            {
+                // We  trigger an invalid program exception here unless we have a try/fault region.
+                //
+                if (HBtab->HasCatchHandler() || HBtab->HasFinallyHandler() || HBtab->HasFilter())
+                {
+                    BADCODE(
+                        "The 'this' pointer of an instance constructor is not intialized upon entry to a try region");
+                }
+                else
+                {
+                    // Allow a try/fault region to proceed.
+                    assert(HBtab->HasFaultHandler());
+                }
+            }
+
+            /* Recursively process the handler block */
+            BasicBlock* hndBegBB = HBtab->ebdHndBeg;
+
+            //  Construct the proper verification stack state
+            //   either empty or one that contains just
+            //   the Exception Object that we are dealing with
+            //
+            verCurrentState.esStackDepth = 0;
+
+            if (handlerGetsXcptnObj(hndBegBB->bbCatchTyp))
+            {
+                CORINFO_CLASS_HANDLE clsHnd;
+
+                if (HBtab->HasFilter())
+                {
+                    clsHnd = impGetObjectClass();
+                }
+                else
+                {
+                    CORINFO_RESOLVED_TOKEN resolvedToken;
+
+                    resolvedToken.tokenContext = impTokenLookupContextHandle;
+                    resolvedToken.tokenScope   = info.compScopeHnd;
+                    resolvedToken.token        = HBtab->ebdTyp;
+                    resolvedToken.tokenType    = CORINFO_TOKENKIND_Class;
+                    info.compCompHnd->resolveToken(&resolvedToken);
+
+                    clsHnd = resolvedToken.hClass;
+                }
+
+                // push catch arg the stack, spill to a temp if necessary
+                // Note: can update HBtab->ebdHndBeg!
+                hndBegBB = impPushCatchArgOnStack(hndBegBB, clsHnd);
+            }
+
+            // Queue up the handler for importing
+            //
+            impImportBlockPending(hndBegBB);
+
+            if (HBtab->HasFilter())
+            {
+                /* @VERIFICATION : Ideally the end of filter state should get
+                   propagated to the catch handler, this is an incompleteness,
+                   but is not a security/compliance issue, since the only
+                   interesting state is the 'thisInit' state.
+                   */
+
+                verCurrentState.esStackDepth = 0;
+
+                BasicBlock* filterBB = HBtab->ebdFilter;
+
+                // push catch arg the stack, spill to a temp if necessary
+                // Note: can update HBtab->ebdFilter!
+                filterBB = impPushCatchArgOnStack(filterBB, impGetObjectClass());
+
+                impImportBlockPending(filterBB);
+            }
+        }
+        else if (verTrackObjCtorInitState && HBtab->HasFaultHandler())
+        {
+            /* Recursively process the handler block */
+
+            verCurrentState.esStackDepth = 0;
+
+            // Queue up the fault handler for importing
+            //
+            impImportBlockPending(HBtab->ebdHndBeg);
+        }
+
+        // Now process our enclosing try index (if any)
+        //
+        tryIndex = HBtab->ebdEnclosingTryIndex;
+        if (tryIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+        {
+            HBtab = nullptr;
+        }
+        else
+        {
+            HBtab = ehGetDsc(tryIndex);
+        }
+    }
+
+    // Restore the stack contents
+    impRestoreStackState(&blockState);
+}
+
+//***************************************************************
+// Import the instructions for the given basic block.  Perform
+// verification, throwing an exception on failure.  Push any successor blocks that are enabled for the first
+// time, or whose verification pre-state is changed.
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+void Compiler::impImportBlock(BasicBlock* block)
+{
+    // BBF_INTERNAL blocks only exist during importation due to EH canonicalization. We need to
+    // handle them specially. In particular, there is no IL to import for them, but we do need
+    // to mark them as imported and put their successors on the pending import list.
+    if (block->bbFlags & BBF_INTERNAL)
+    {
+        JITDUMP("Marking BBF_INTERNAL block BB%02u as BBF_IMPORTED\n", block->bbNum);
+        block->bbFlags |= BBF_IMPORTED;
+
+        for (unsigned i = 0; i < block->NumSucc(); i++)
+        {
+            impImportBlockPending(block->GetSucc(i));
+        }
+
+        return;
+    }
+
+    bool markImport;
+
+    assert(block);
+
+    /* Make the block globaly available */
+
+    compCurBB = block;
+
+#ifdef DEBUG
+    /* Initialize the debug variables */
+    impCurOpcName = "unknown";
+    impCurOpcOffs = block->bbCodeOffs;
+#endif
+
+    /* Set the current stack state to the merged result */
+    verResetCurrentState(block, &verCurrentState);
+
+    /* Now walk the code and import the IL into GenTrees */
+
+    struct FilterVerificationExceptionsParam
+    {
+        Compiler*   pThis;
+        BasicBlock* block;
+    };
+    FilterVerificationExceptionsParam param;
+
+    param.pThis = this;
+    param.block = block;
+
+    PAL_TRY(FilterVerificationExceptionsParam*, pParam, &param)
+    {
+        /* @VERIFICATION : For now, the only state propagation from try
+           to it's handler is "thisInit" state (stack is empty at start of try).
+           In general, for state that we track in verification, we need to
+           model the possibility that an exception might happen at any IL
+           instruction, so we really need to merge all states that obtain
+           between IL instructions in a try block into the start states of
+           all handlers.
+
+           However we do not allow the 'this' pointer to be uninitialized when
+           entering most kinds try regions (only try/fault are allowed to have
+           an uninitialized this pointer on entry to the try)
+
+           Fortunately, the stack is thrown away when an exception
+           leads to a handler, so we don't have to worry about that.
+           We DO, however, have to worry about the "thisInit" state.
+           But only for the try/fault case.
+
+           The only allowed transition is from TIS_Uninit to TIS_Init.
+
+           So for a try/fault region for the fault handler block
+           we will merge the start state of the try begin
+           and the post-state of each block that is part of this try region
+        */
+
+        // merge the start state of the try begin
+        //
+        if (pParam->block->bbFlags & BBF_TRY_BEG)
+        {
+            pParam->pThis->impVerifyEHBlock(pParam->block, true);
+        }
+
+        pParam->pThis->impImportBlockCode(pParam->block);
+
+        // As discussed above:
+        // merge the post-state of each block that is part of this try region
+        //
+        if (pParam->block->hasTryIndex())
+        {
+            pParam->pThis->impVerifyEHBlock(pParam->block, false);
+        }
+    }
+    PAL_EXCEPT_FILTER(FilterVerificationExceptions)
+    {
+        verHandleVerificationFailure(block DEBUGARG(false));
+    }
+    PAL_ENDTRY
+
+    if (compDonotInline())
+    {
+        return;
+    }
+
+    assert(!compDonotInline());
+
+    markImport = false;
+
+SPILLSTACK:
+
+    unsigned    baseTmp             = NO_BASE_TMP; // input temps assigned to successor blocks
+    bool        reimportSpillClique = false;
+    BasicBlock* tgtBlock            = nullptr;
+
+    /* If the stack is non-empty, we might have to spill its contents */
+
+    if (verCurrentState.esStackDepth != 0)
+    {
+        impBoxTemp = BAD_VAR_NUM; // if a box temp is used in a block that leaves something
+                                  // on the stack, its lifetime is hard to determine, simply
+                                  // don't reuse such temps.
+
+        GenTreePtr addStmt = nullptr;
+
+        /* Do the successors of 'block' have any other predecessors ?
+           We do not want to do some of the optimizations related to multiRef
+           if we can reimport blocks */
+
+        unsigned multRef = impCanReimport ? unsigned(~0) : 0;
+
+        switch (block->bbJumpKind)
+        {
+            case BBJ_COND:
+
+                /* Temporarily remove the 'jtrue' from the end of the tree list */
+
+                assert(impTreeLast);
+                assert(impTreeLast->gtOper == GT_STMT);
+                assert(impTreeLast->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
+
+                addStmt     = impTreeLast;
+                impTreeLast = impTreeLast->gtPrev;
+
+                /* Note if the next block has more than one ancestor */
+
+                multRef |= block->bbNext->bbRefs;
+
+                /* Does the next block have temps assigned? */
+
+                baseTmp  = block->bbNext->bbStkTempsIn;
+                tgtBlock = block->bbNext;
+
+                if (baseTmp != NO_BASE_TMP)
+                {
+                    break;
+                }
+
+                /* Try the target of the jump then */
+
+                multRef |= block->bbJumpDest->bbRefs;
+                baseTmp  = block->bbJumpDest->bbStkTempsIn;
+                tgtBlock = block->bbJumpDest;
+                break;
+
+            case BBJ_ALWAYS:
+                multRef |= block->bbJumpDest->bbRefs;
+                baseTmp  = block->bbJumpDest->bbStkTempsIn;
+                tgtBlock = block->bbJumpDest;
+                break;
+
+            case BBJ_NONE:
+                multRef |= block->bbNext->bbRefs;
+                baseTmp  = block->bbNext->bbStkTempsIn;
+                tgtBlock = block->bbNext;
+                break;
+
+            case BBJ_SWITCH:
+
+                BasicBlock** jmpTab;
+                unsigned     jmpCnt;
+
+                /* Temporarily remove the GT_SWITCH from the end of the tree list */
+
+                assert(impTreeLast);
+                assert(impTreeLast->gtOper == GT_STMT);
+                assert(impTreeLast->gtStmt.gtStmtExpr->gtOper == GT_SWITCH);
+
+                addStmt     = impTreeLast;
+                impTreeLast = impTreeLast->gtPrev;
+
+                jmpCnt = block->bbJumpSwt->bbsCount;
+                jmpTab = block->bbJumpSwt->bbsDstTab;
+
+                do
+                {
+                    tgtBlock = (*jmpTab);
+
+                    multRef |= tgtBlock->bbRefs;
+
+                    // Thanks to spill cliques, we should have assigned all or none
+                    assert((baseTmp == NO_BASE_TMP) || (baseTmp == tgtBlock->bbStkTempsIn));
+                    baseTmp = tgtBlock->bbStkTempsIn;
+                    if (multRef > 1)
+                    {
+                        break;
+                    }
+                } while (++jmpTab, --jmpCnt);
+
+                break;
+
+            case BBJ_CALLFINALLY:
+            case BBJ_EHCATCHRET:
+            case BBJ_RETURN:
+            case BBJ_EHFINALLYRET:
+            case BBJ_EHFILTERRET:
+            case BBJ_THROW:
+                NO_WAY("can't have 'unreached' end of BB with non-empty stack");
+                break;
+
+            default:
+                noway_assert(!"Unexpected bbJumpKind");
+                break;
+        }
+
+        assert(multRef >= 1);
+
+        /* Do we have a base temp number? */
+
+        bool newTemps = (baseTmp == NO_BASE_TMP);
+
+        if (newTemps)
+        {
+            /* Grab enough temps for the whole stack */
+            baseTmp = impGetSpillTmpBase(block);
+        }
+
+        /* Spill all stack entries into temps */
+        unsigned level, tempNum;
+
+        JITDUMP("\nSpilling stack entries into temps\n");
+        for (level = 0, tempNum = baseTmp; level < verCurrentState.esStackDepth; level++, tempNum++)
+        {
+            GenTreePtr tree = verCurrentState.esStack[level].val;
+
+            /* VC generates code where it pushes a byref from one branch, and an int (ldc.i4 0) from
+               the other. This should merge to a byref in unverifiable code.
+               However, if the branch which leaves the TYP_I_IMPL on the stack is imported first, the
+               successor would be imported assuming there was a TYP_I_IMPL on
+               the stack. Thus the value would not get GC-tracked. Hence,
+               change the temp to TYP_BYREF and reimport the successors.
+               Note: We should only allow this in unverifiable code.
+            */
+            if (tree->gtType == TYP_BYREF && lvaTable[tempNum].lvType == TYP_I_IMPL && !verNeedsVerification())
+            {
+                lvaTable[tempNum].lvType = TYP_BYREF;
+                impReimportMarkSuccessors(block);
+                markImport = true;
+            }
+
+#ifdef _TARGET_64BIT_
+            if (genActualType(tree->gtType) == TYP_I_IMPL && lvaTable[tempNum].lvType == TYP_INT)
+            {
+                if (tiVerificationNeeded && tgtBlock->bbEntryState != nullptr &&
+                    (tgtBlock->bbFlags & BBF_FAILED_VERIFICATION) == 0)
+                {
+                    // Merge the current state into the entry state of block;
+                    // the call to verMergeEntryStates must have changed
+                    // the entry state of the block by merging the int local var
+                    // and the native-int stack entry.
+                    bool changed = false;
+                    if (verMergeEntryStates(tgtBlock, &changed))
+                    {
+                        impRetypeEntryStateTemps(tgtBlock);
+                        impReimportBlockPending(tgtBlock);
+                        assert(changed);
+                    }
+                    else
+                    {
+                        tgtBlock->bbFlags |= BBF_FAILED_VERIFICATION;
+                        break;
+                    }
+                }
+
+                // Some other block in the spill clique set this to "int", but now we have "native int".
+                // Change the type and go back to re-import any blocks that used the wrong type.
+                lvaTable[tempNum].lvType = TYP_I_IMPL;
+                reimportSpillClique      = true;
+            }
+            else if (genActualType(tree->gtType) == TYP_INT && lvaTable[tempNum].lvType == TYP_I_IMPL)
+            {
+                // Spill clique has decided this should be "native int", but this block only pushes an "int".
+                // Insert a sign-extension to "native int" so we match the clique.
+                verCurrentState.esStack[level].val = gtNewCastNode(TYP_I_IMPL, tree, TYP_I_IMPL);
+            }
+
+            // Consider the case where one branch left a 'byref' on the stack and the other leaves
+            // an 'int'. On 32-bit, this is allowed (in non-verifiable code) since they are the same
+            // size. JIT64 managed to make this work on 64-bit. For compatibility, we support JIT64
+            // behavior instead of asserting and then generating bad code (where we save/restore the
+            // low 32 bits of a byref pointer to an 'int' sized local). If the 'int' side has been
+            // imported already, we need to change the type of the local and reimport the spill clique.
+            // If the 'byref' side has imported, we insert a cast from int to 'native int' to match
+            // the 'byref' size.
+            if (!tiVerificationNeeded)
+            {
+                if (genActualType(tree->gtType) == TYP_BYREF && lvaTable[tempNum].lvType == TYP_INT)
+                {
+                    // Some other block in the spill clique set this to "int", but now we have "byref".
+                    // Change the type and go back to re-import any blocks that used the wrong type.
+                    lvaTable[tempNum].lvType = TYP_BYREF;
+                    reimportSpillClique      = true;
+                }
+                else if (genActualType(tree->gtType) == TYP_INT && lvaTable[tempNum].lvType == TYP_BYREF)
+                {
+                    // Spill clique has decided this should be "byref", but this block only pushes an "int".
+                    // Insert a sign-extension to "native int" so we match the clique size.
+                    verCurrentState.esStack[level].val = gtNewCastNode(TYP_I_IMPL, tree, TYP_I_IMPL);
+                }
+            }
+#endif // _TARGET_64BIT_
+
+#if FEATURE_X87_DOUBLES
+            // X87 stack doesn't differentiate between float/double
+            // so promoting is no big deal.
+            // For everybody else keep it as float until we have a collision and then promote
+            // Just like for x64's TYP_INT<->TYP_I_IMPL
+
+            if (multRef > 1 && tree->gtType == TYP_FLOAT)
+            {
+                verCurrentState.esStack[level].val = gtNewCastNode(TYP_DOUBLE, tree, TYP_DOUBLE);
+            }
+
+#else // !FEATURE_X87_DOUBLES
+
+            if (tree->gtType == TYP_DOUBLE && lvaTable[tempNum].lvType == TYP_FLOAT)
+            {
+                // Some other block in the spill clique set this to "float", but now we have "double".
+                // Change the type and go back to re-import any blocks that used the wrong type.
+                lvaTable[tempNum].lvType = TYP_DOUBLE;
+                reimportSpillClique      = true;
+            }
+            else if (tree->gtType == TYP_FLOAT && lvaTable[tempNum].lvType == TYP_DOUBLE)
+            {
+                // Spill clique has decided this should be "double", but this block only pushes a "float".
+                // Insert a cast to "double" so we match the clique.
+                verCurrentState.esStack[level].val = gtNewCastNode(TYP_DOUBLE, tree, TYP_DOUBLE);
+            }
+
+#endif // FEATURE_X87_DOUBLES
+
+            /* If addStmt has a reference to tempNum (can only happen if we
+               are spilling to the temps already used by a previous block),
+               we need to spill addStmt */
+
+            if (addStmt && !newTemps && gtHasRef(addStmt->gtStmt.gtStmtExpr, tempNum, false))
+            {
+                GenTreePtr addTree = addStmt->gtStmt.gtStmtExpr;
+
+                if (addTree->gtOper == GT_JTRUE)
+                {
+                    GenTreePtr relOp = addTree->gtOp.gtOp1;
+                    assert(relOp->OperIsCompare());
+
+                    var_types type = genActualType(relOp->gtOp.gtOp1->TypeGet());
+
+                    if (gtHasRef(relOp->gtOp.gtOp1, tempNum, false))
+                    {
+                        unsigned temp = lvaGrabTemp(true DEBUGARG("spill addStmt JTRUE ref Op1"));
+                        impAssignTempGen(temp, relOp->gtOp.gtOp1, level);
+                        type              = genActualType(lvaTable[temp].TypeGet());
+                        relOp->gtOp.gtOp1 = gtNewLclvNode(temp, type);
+                    }
+
+                    if (gtHasRef(relOp->gtOp.gtOp2, tempNum, false))
+                    {
+                        unsigned temp = lvaGrabTemp(true DEBUGARG("spill addStmt JTRUE ref Op2"));
+                        impAssignTempGen(temp, relOp->gtOp.gtOp2, level);
+                        type              = genActualType(lvaTable[temp].TypeGet());
+                        relOp->gtOp.gtOp2 = gtNewLclvNode(temp, type);
+                    }
+                }
+                else
+                {
+                    assert(addTree->gtOper == GT_SWITCH && genActualType(addTree->gtOp.gtOp1->gtType) == TYP_I_IMPL);
+
+                    unsigned temp = lvaGrabTemp(true DEBUGARG("spill addStmt SWITCH"));
+                    impAssignTempGen(temp, addTree->gtOp.gtOp1, level);
+                    addTree->gtOp.gtOp1 = gtNewLclvNode(temp, TYP_I_IMPL);
+                }
+            }
+
+            /* Spill the stack entry, and replace with the temp */
+
+            if (!impSpillStackEntry(level, tempNum
+#ifdef DEBUG
+                                    ,
+                                    true, "Spill Stack Entry"
+#endif
+                                    ))
+            {
+                if (markImport)
+                {
+                    BADCODE("bad stack state");
+                }
+
+                // Oops. Something went wrong when spilling. Bad code.
+                verHandleVerificationFailure(block DEBUGARG(true));
+
+                goto SPILLSTACK;
+            }
+        }
+
+        /* Put back the 'jtrue'/'switch' if we removed it earlier */
+
+        if (addStmt)
+        {
+            impAppendStmt(addStmt, (unsigned)CHECK_SPILL_NONE);
+        }
+    }
+
+    // Some of the append/spill logic works on compCurBB
+
+    assert(compCurBB == block);
+
+    /* Save the tree list in the block */
+    impEndTreeList(block);
+
+    // impEndTreeList sets BBF_IMPORTED on the block
+    // We do *NOT* want to set it later than this because
+    // impReimportSpillClique might clear it if this block is both a
+    // predecessor and successor in the current spill clique
+    assert(block->bbFlags & BBF_IMPORTED);
+
+    // If we had a int/native int, or float/double collision, we need to re-import
+    if (reimportSpillClique)
+    {
+        // This will re-import all the successors of block (as well as each of their predecessors)
+        impReimportSpillClique(block);
+
+        // For blocks that haven't been imported yet, we still need to mark them as pending import.
+        for (unsigned i = 0; i < block->NumSucc(); i++)
+        {
+            BasicBlock* succ = block->GetSucc(i);
+            if ((succ->bbFlags & BBF_IMPORTED) == 0)
+            {
+                impImportBlockPending(succ);
+            }
+        }
+    }
+    else // the normal case
+    {
+        // otherwise just import the successors of block
+
+        /* Does this block jump to any other blocks? */
+        for (unsigned i = 0; i < block->NumSucc(); i++)
+        {
+            impImportBlockPending(block->GetSucc(i));
+        }
+    }
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************/
+//
+// Ensures that "block" is a member of the list of BBs waiting to be imported, pushing it on the list if
+// necessary (and ensures that it is a member of the set of BB's on the list, by setting its byte in
+// impPendingBlockMembers).  Merges the current verification state into the verification state of "block"
+// (its "pre-state").
+
+void Compiler::impImportBlockPending(BasicBlock* block)
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nimpImportBlockPending for BB%02u\n", block->bbNum);
+    }
+#endif
+
+    // We will add a block to the pending set if it has not already been imported (or needs to be re-imported),
+    // or if it has, but merging in a predecessor's post-state changes the block's pre-state.
+    // (When we're doing verification, we always attempt the merge to detect verification errors.)
+
+    // If the block has not been imported, add to pending set.
+    bool addToPending = ((block->bbFlags & BBF_IMPORTED) == 0);
+
+    // Initialize bbEntryState just the first time we try to add this block to the pending list
+    // Just because bbEntryState is NULL, doesn't mean the pre-state wasn't previously set
+    // We use NULL to indicate the 'common' state to avoid memory allocation
+    if ((block->bbEntryState == nullptr) && ((block->bbFlags & (BBF_IMPORTED | BBF_FAILED_VERIFICATION)) == 0) &&
+        (impGetPendingBlockMember(block) == 0))
+    {
+        verInitBBEntryState(block, &verCurrentState);
+        assert(block->bbStkDepth == 0);
+        block->bbStkDepth = static_cast<unsigned short>(verCurrentState.esStackDepth);
+        assert(addToPending);
+        assert(impGetPendingBlockMember(block) == 0);
+    }
+    else
+    {
+        // The stack should have the same height on entry to the block from all its predecessors.
+        if (block->bbStkDepth != verCurrentState.esStackDepth)
+        {
+#ifdef DEBUG
+            char buffer[400];
+            sprintf_s(buffer, sizeof(buffer),
+                      "Block at offset %4.4x to %4.4x in %s entered with different stack depths.\n"
+                      "Previous depth was %d, current depth is %d",
+                      block->bbCodeOffs, block->bbCodeOffsEnd, info.compFullName, block->bbStkDepth,
+                      verCurrentState.esStackDepth);
+            buffer[400 - 1] = 0;
+            NO_WAY(buffer);
+#else
+            NO_WAY("Block entered with different stack depths");
+#endif
+        }
+
+        // Additionally, if we need to verify, merge the verification state.
+        if (tiVerificationNeeded)
+        {
+            // Merge the current state into the entry state of block; if this does not change the entry state
+            // by merging, do not add the block to the pending-list.
+            bool changed = false;
+            if (!verMergeEntryStates(block, &changed))
+            {
+                block->bbFlags |= BBF_FAILED_VERIFICATION;
+                addToPending = true; // We will pop it off, and check the flag set above.
+            }
+            else if (changed)
+            {
+                addToPending = true;
+
+                JITDUMP("Adding BB%02u to pending set due to new merge result\n", block->bbNum);
+            }
+        }
+
+        if (!addToPending)
+        {
+            return;
+        }
+
+        if (block->bbStkDepth > 0)
+        {
+            // We need to fix the types of any spill temps that might have changed:
+            //   int->native int, float->double, int->byref, etc.
+            impRetypeEntryStateTemps(block);
+        }
+
+        // OK, we must add to the pending list, if it's not already in it.
+        if (impGetPendingBlockMember(block) != 0)
+        {
+            return;
+        }
+    }
+
+    // Get an entry to add to the pending list
+
+    PendingDsc* dsc;
+
+    if (impPendingFree)
+    {
+        // We can reuse one of the freed up dscs.
+        dsc            = impPendingFree;
+        impPendingFree = dsc->pdNext;
+    }
+    else
+    {
+        // We have to create a new dsc
+        dsc = new (this, CMK_Unknown) PendingDsc;
+    }
+
+    dsc->pdBB                 = block;
+    dsc->pdSavedStack.ssDepth = verCurrentState.esStackDepth;
+    dsc->pdThisPtrInit        = verCurrentState.thisInitialized;
+
+    // Save the stack trees for later
+
+    if (verCurrentState.esStackDepth)
+    {
+        impSaveStackState(&dsc->pdSavedStack, false);
+    }
+
+    // Add the entry to the pending list
+
+    dsc->pdNext    = impPendingList;
+    impPendingList = dsc;
+    impSetPendingBlockMember(block, 1); // And indicate that it's now a member of the set.
+
+    // Various assertions require us to now to consider the block as not imported (at least for
+    // the final time...)
+    block->bbFlags &= ~BBF_IMPORTED;
+
+#ifdef DEBUG
+    if (verbose && 0)
+    {
+        printf("Added PendingDsc - %08p for BB%02u\n", dspPtr(dsc), block->bbNum);
+    }
+#endif
+}
+
+/*****************************************************************************/
+//
+// Ensures that "block" is a member of the list of BBs waiting to be imported, pushing it on the list if
+// necessary (and ensures that it is a member of the set of BB's on the list, by setting its byte in
+// impPendingBlockMembers).  Does *NOT* change the existing "pre-state" of the block.
+
+void Compiler::impReimportBlockPending(BasicBlock* block)
+{
+    JITDUMP("\nimpReimportBlockPending for BB%02u", block->bbNum);
+
+    assert(block->bbFlags & BBF_IMPORTED);
+
+    // OK, we must add to the pending list, if it's not already in it.
+    if (impGetPendingBlockMember(block) != 0)
+    {
+        return;
+    }
+
+    // Get an entry to add to the pending list
+
+    PendingDsc* dsc;
+
+    if (impPendingFree)
+    {
+        // We can reuse one of the freed up dscs.
+        dsc            = impPendingFree;
+        impPendingFree = dsc->pdNext;
+    }
+    else
+    {
+        // We have to create a new dsc
+        dsc = new (this, CMK_ImpStack) PendingDsc;
+    }
+
+    dsc->pdBB = block;
+
+    if (block->bbEntryState)
+    {
+        dsc->pdThisPtrInit        = block->bbEntryState->thisInitialized;
+        dsc->pdSavedStack.ssDepth = block->bbEntryState->esStackDepth;
+        dsc->pdSavedStack.ssTrees = block->bbEntryState->esStack;
+    }
+    else
+    {
+        dsc->pdThisPtrInit        = TIS_Bottom;
+        dsc->pdSavedStack.ssDepth = 0;
+        dsc->pdSavedStack.ssTrees = nullptr;
+    }
+
+    // Add the entry to the pending list
+
+    dsc->pdNext    = impPendingList;
+    impPendingList = dsc;
+    impSetPendingBlockMember(block, 1); // And indicate that it's now a member of the set.
+
+    // Various assertions require us to now to consider the block as not imported (at least for
+    // the final time...)
+    block->bbFlags &= ~BBF_IMPORTED;
+
+#ifdef DEBUG
+    if (verbose && 0)
+    {
+        printf("Added PendingDsc - %08p for BB%02u\n", dspPtr(dsc), block->bbNum);
+    }
+#endif
+}
+
+void* Compiler::BlockListNode::operator new(size_t sz, Compiler* comp)
+{
+    if (comp->impBlockListNodeFreeList == nullptr)
+    {
+        return (BlockListNode*)comp->compGetMem(sizeof(BlockListNode), CMK_BasicBlock);
+    }
+    else
+    {
+        BlockListNode* res             = comp->impBlockListNodeFreeList;
+        comp->impBlockListNodeFreeList = res->m_next;
+        return res;
+    }
+}
+
+void Compiler::FreeBlockListNode(Compiler::BlockListNode* node)
+{
+    node->m_next             = impBlockListNodeFreeList;
+    impBlockListNodeFreeList = node;
+}
+
+void Compiler::impWalkSpillCliqueFromPred(BasicBlock* block, SpillCliqueWalker* callback)
+{
+    bool toDo = true;
+
+    noway_assert(!fgComputePredsDone);
+    if (!fgCheapPredsValid)
+    {
+        fgComputeCheapPreds();
+    }
+
+    BlockListNode* succCliqueToDo = nullptr;
+    BlockListNode* predCliqueToDo = new (this) BlockListNode(block);
+    while (toDo)
+    {
+        toDo = false;
+        // Look at the successors of every member of the predecessor to-do list.
+        while (predCliqueToDo != nullptr)
+        {
+            BlockListNode* node = predCliqueToDo;
+            predCliqueToDo      = node->m_next;
+            BasicBlock* blk     = node->m_blk;
+            FreeBlockListNode(node);
+
+            for (unsigned succNum = 0; succNum < blk->NumSucc(); succNum++)
+            {
+                BasicBlock* succ = blk->GetSucc(succNum);
+                // If it's not already in the clique, add it, and also add it
+                // as a member of the successor "toDo" set.
+                if (impSpillCliqueGetMember(SpillCliqueSucc, succ) == 0)
+                {
+                    callback->Visit(SpillCliqueSucc, succ);
+                    impSpillCliqueSetMember(SpillCliqueSucc, succ, 1);
+                    succCliqueToDo = new (this) BlockListNode(succ, succCliqueToDo);
+                    toDo           = true;
+                }
+            }
+        }
+        // Look at the predecessors of every member of the successor to-do list.
+        while (succCliqueToDo != nullptr)
+        {
+            BlockListNode* node = succCliqueToDo;
+            succCliqueToDo      = node->m_next;
+            BasicBlock* blk     = node->m_blk;
+            FreeBlockListNode(node);
+
+            for (BasicBlockList* pred = blk->bbCheapPreds; pred != nullptr; pred = pred->next)
+            {
+                BasicBlock* predBlock = pred->block;
+                // If it's not already in the clique, add it, and also add it
+                // as a member of the predecessor "toDo" set.
+                if (impSpillCliqueGetMember(SpillCliquePred, predBlock) == 0)
+                {
+                    callback->Visit(SpillCliquePred, predBlock);
+                    impSpillCliqueSetMember(SpillCliquePred, predBlock, 1);
+                    predCliqueToDo = new (this) BlockListNode(predBlock, predCliqueToDo);
+                    toDo           = true;
+                }
+            }
+        }
+    }
+
+    // If this fails, it means we didn't walk the spill clique properly and somehow managed
+    // miss walking back to include the predecessor we started from.
+    // This most likely cause: missing or out of date bbPreds
+    assert(impSpillCliqueGetMember(SpillCliquePred, block) != 0);
+}
+
+void Compiler::SetSpillTempsBase::Visit(SpillCliqueDir predOrSucc, BasicBlock* blk)
+{
+    if (predOrSucc == SpillCliqueSucc)
+    {
+        assert(blk->bbStkTempsIn == NO_BASE_TMP); // Should not already be a member of a clique as a successor.
+        blk->bbStkTempsIn = m_baseTmp;
+    }
+    else
+    {
+        assert(predOrSucc == SpillCliquePred);
+        assert(blk->bbStkTempsOut == NO_BASE_TMP); // Should not already be a member of a clique as a predecessor.
+        blk->bbStkTempsOut = m_baseTmp;
+    }
+}
+
+void Compiler::ReimportSpillClique::Visit(SpillCliqueDir predOrSucc, BasicBlock* blk)
+{
+    // For Preds we could be a little smarter and just find the existing store
+    // and re-type it/add a cast, but that is complicated and hopefully very rare, so
+    // just re-import the whole block (just like we do for successors)
+
+    if (((blk->bbFlags & BBF_IMPORTED) == 0) && (m_pComp->impGetPendingBlockMember(blk) == 0))
+    {
+        // If we haven't imported this block and we're not going to (because it isn't on
+        // the pending list) then just ignore it for now.
+
+        // This block has either never been imported (EntryState == NULL) or it failed
+        // verification. Neither state requires us to force it to be imported now.
+        assert((blk->bbEntryState == nullptr) || (blk->bbFlags & BBF_FAILED_VERIFICATION));
+        return;
+    }
+
+    // For successors we have a valid verCurrentState, so just mark them for reimport
+    // the 'normal' way
+    // Unlike predecessors, we *DO* need to reimport the current block because the
+    // initial import had the wrong entry state types.
+    // Similarly, blocks that are currently on the pending list, still need to call
+    // impImportBlockPending to fixup their entry state.
+    if (predOrSucc == SpillCliqueSucc)
+    {
+        m_pComp->impReimportMarkBlock(blk);
+
+        // Set the current stack state to that of the blk->bbEntryState
+        m_pComp->verResetCurrentState(blk, &m_pComp->verCurrentState);
+        assert(m_pComp->verCurrentState.thisInitialized == blk->bbThisOnEntry());
+
+        m_pComp->impImportBlockPending(blk);
+    }
+    else if ((blk != m_pComp->compCurBB) && ((blk->bbFlags & BBF_IMPORTED) != 0))
+    {
+        // As described above, we are only visiting predecessors so they can
+        // add the appropriate casts, since we have already done that for the current
+        // block, it does not need to be reimported.
+        // Nor do we need to reimport blocks that are still pending, but not yet
+        // imported.
+        //
+        // For predecessors, we have no state to seed the EntryState, so we just have
+        // to assume the existing one is correct.
+        // If the block is also a successor, it will get the EntryState properly
+        // updated when it is visited as a successor in the above "if" block.
+        assert(predOrSucc == SpillCliquePred);
+        m_pComp->impReimportBlockPending(blk);
+    }
+}
+
+// Re-type the incoming lclVar nodes to match the varDsc.
+void Compiler::impRetypeEntryStateTemps(BasicBlock* blk)
+{
+    if (blk->bbEntryState != nullptr)
+    {
+        EntryState* es = blk->bbEntryState;
+        for (unsigned level = 0; level < es->esStackDepth; level++)
+        {
+            GenTreePtr tree = es->esStack[level].val;
+            if ((tree->gtOper == GT_LCL_VAR) || (tree->gtOper == GT_LCL_FLD))
+            {
+                unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
+                noway_assert(lclNum < lvaCount);
+                LclVarDsc* varDsc              = lvaTable + lclNum;
+                es->esStack[level].val->gtType = varDsc->TypeGet();
+            }
+        }
+    }
+}
+
+unsigned Compiler::impGetSpillTmpBase(BasicBlock* block)
+{
+    if (block->bbStkTempsOut != NO_BASE_TMP)
+    {
+        return block->bbStkTempsOut;
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\n*************** In impGetSpillTmpBase(BB%02u)\n", block->bbNum);
+    }
+#endif // DEBUG
+
+    // Otherwise, choose one, and propagate to all members of the spill clique.
+    // Grab enough temps for the whole stack.
+    unsigned baseTmp = lvaGrabTemps(verCurrentState.esStackDepth DEBUGARG("IL Stack Entries"));
+    SetSpillTempsBase callback(baseTmp);
+
+    // We do *NOT* need to reset the SpillClique*Members because a block can only be the predecessor
+    // to one spill clique, and similarly can only be the sucessor to one spill clique
+    impWalkSpillCliqueFromPred(block, &callback);
+
+    return baseTmp;
+}
+
+void Compiler::impReimportSpillClique(BasicBlock* block)
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\n*************** In impReimportSpillClique(BB%02u)\n", block->bbNum);
+    }
+#endif // DEBUG
+
+    // If we get here, it is because this block is already part of a spill clique
+    // and one predecessor had an outgoing live stack slot of type int, and this
+    // block has an outgoing live stack slot of type native int.
+    // We need to reset these before traversal because they have already been set
+    // by the previous walk to determine all the members of the spill clique.
+    impInlineRoot()->impSpillCliquePredMembers.Reset();
+    impInlineRoot()->impSpillCliqueSuccMembers.Reset();
+
+    ReimportSpillClique callback(this);
+
+    impWalkSpillCliqueFromPred(block, &callback);
+}
+
+// Set the pre-state of "block" (which should not have a pre-state allocated) to
+// a copy of "srcState", cloning tree pointers as required.
+void Compiler::verInitBBEntryState(BasicBlock* block, EntryState* srcState)
+{
+    if (srcState->esStackDepth == 0 && srcState->thisInitialized == TIS_Bottom)
+    {
+        block->bbEntryState = nullptr;
+        return;
+    }
+
+    block->bbEntryState = (EntryState*)compGetMemA(sizeof(EntryState));
+
+    // block->bbEntryState.esRefcount = 1;
+
+    block->bbEntryState->esStackDepth    = srcState->esStackDepth;
+    block->bbEntryState->thisInitialized = TIS_Bottom;
+
+    if (srcState->esStackDepth > 0)
+    {
+        block->bbSetStack(new (this, CMK_Unknown) StackEntry[srcState->esStackDepth]);
+        unsigned stackSize = srcState->esStackDepth * sizeof(StackEntry);
+
+        memcpy(block->bbEntryState->esStack, srcState->esStack, stackSize);
+        for (unsigned level = 0; level < srcState->esStackDepth; level++)
+        {
+            GenTreePtr tree                         = srcState->esStack[level].val;
+            block->bbEntryState->esStack[level].val = gtCloneExpr(tree);
+        }
+    }
+
+    if (verTrackObjCtorInitState)
+    {
+        verSetThisInit(block, srcState->thisInitialized);
+    }
+
+    return;
+}
+
+void Compiler::verSetThisInit(BasicBlock* block, ThisInitState tis)
+{
+    assert(tis != TIS_Bottom); // Precondition.
+    if (block->bbEntryState == nullptr)
+    {
+        block->bbEntryState = new (this, CMK_Unknown) EntryState();
+    }
+
+    block->bbEntryState->thisInitialized = tis;
+}
+
+/*
+ * Resets the current state to the state at the start of the basic block
+ */
+void Compiler::verResetCurrentState(BasicBlock* block, EntryState* destState)
+{
+
+    if (block->bbEntryState == nullptr)
+    {
+        destState->esStackDepth    = 0;
+        destState->thisInitialized = TIS_Bottom;
+        return;
+    }
+
+    destState->esStackDepth = block->bbEntryState->esStackDepth;
+
+    if (destState->esStackDepth > 0)
+    {
+        unsigned stackSize = destState->esStackDepth * sizeof(StackEntry);
+
+        memcpy(destState->esStack, block->bbStackOnEntry(), stackSize);
+    }
+
+    destState->thisInitialized = block->bbThisOnEntry();
+
+    return;
+}
+
+ThisInitState BasicBlock::bbThisOnEntry()
+{
+    return bbEntryState ? bbEntryState->thisInitialized : TIS_Bottom;
+}
+
+unsigned BasicBlock::bbStackDepthOnEntry()
+{
+    return (bbEntryState ? bbEntryState->esStackDepth : 0);
+}
+
+void BasicBlock::bbSetStack(void* stackBuffer)
+{
+    assert(bbEntryState);
+    assert(stackBuffer);
+    bbEntryState->esStack = (StackEntry*)stackBuffer;
+}
+
+StackEntry* BasicBlock::bbStackOnEntry()
+{
+    assert(bbEntryState);
+    return bbEntryState->esStack;
+}
+
+void Compiler::verInitCurrentState()
+{
+    verTrackObjCtorInitState        = FALSE;
+    verCurrentState.thisInitialized = TIS_Bottom;
+
+    if (tiVerificationNeeded)
+    {
+        // Track this ptr initialization
+        if (!info.compIsStatic && (info.compFlags & CORINFO_FLG_CONSTRUCTOR) && lvaTable[0].lvVerTypeInfo.IsObjRef())
+        {
+            verTrackObjCtorInitState        = TRUE;
+            verCurrentState.thisInitialized = TIS_Uninit;
+        }
+    }
+
+    // initialize stack info
+
+    verCurrentState.esStackDepth = 0;
+    assert(verCurrentState.esStack != nullptr);
+
+    // copy current state to entry state of first BB
+    verInitBBEntryState(fgFirstBB, &verCurrentState);
+}
+
+Compiler* Compiler::impInlineRoot()
+{
+    if (impInlineInfo == nullptr)
+    {
+        return this;
+    }
+    else
+    {
+        return impInlineInfo->InlineRoot;
+    }
+}
+
+BYTE Compiler::impSpillCliqueGetMember(SpillCliqueDir predOrSucc, BasicBlock* blk)
+{
+    if (predOrSucc == SpillCliquePred)
+    {
+        return impInlineRoot()->impSpillCliquePredMembers.Get(blk->bbInd());
+    }
+    else
+    {
+        assert(predOrSucc == SpillCliqueSucc);
+        return impInlineRoot()->impSpillCliqueSuccMembers.Get(blk->bbInd());
+    }
+}
+
+void Compiler::impSpillCliqueSetMember(SpillCliqueDir predOrSucc, BasicBlock* blk, BYTE val)
+{
+    if (predOrSucc == SpillCliquePred)
+    {
+        impInlineRoot()->impSpillCliquePredMembers.Set(blk->bbInd(), val);
+    }
+    else
+    {
+        assert(predOrSucc == SpillCliqueSucc);
+        impInlineRoot()->impSpillCliqueSuccMembers.Set(blk->bbInd(), val);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Convert the instrs ("import") into our internal format (trees). The
+ *  basic flowgraph has already been constructed and is passed in.
+ */
+
+void Compiler::impImport(BasicBlock* method)
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In impImport() for %s\n", info.compFullName);
+    }
+#endif
+
+    /* Allocate the stack contents */
+
+    if (info.compMaxStack <= sizeof(impSmallStack) / sizeof(impSmallStack[0]))
+    {
+        /* Use local variable, don't waste time allocating on the heap */
+
+        impStkSize              = sizeof(impSmallStack) / sizeof(impSmallStack[0]);
+        verCurrentState.esStack = impSmallStack;
+    }
+    else
+    {
+        impStkSize              = info.compMaxStack;
+        verCurrentState.esStack = new (this, CMK_ImpStack) StackEntry[impStkSize];
+    }
+
+    // initialize the entry state at start of method
+    verInitCurrentState();
+
+    // Initialize stuff related to figuring "spill cliques" (see spec comment for impGetSpillTmpBase).
+    Compiler* inlineRoot = impInlineRoot();
+    if (this == inlineRoot) // These are only used on the root of the inlining tree.
+    {
+        // We have initialized these previously, but to size 0.  Make them larger.
+        impPendingBlockMembers.Init(getAllocator(), fgBBNumMax * 2);
+        impSpillCliquePredMembers.Init(getAllocator(), fgBBNumMax * 2);
+        impSpillCliqueSuccMembers.Init(getAllocator(), fgBBNumMax * 2);
+    }
+    inlineRoot->impPendingBlockMembers.Reset(fgBBNumMax * 2);
+    inlineRoot->impSpillCliquePredMembers.Reset(fgBBNumMax * 2);
+    inlineRoot->impSpillCliqueSuccMembers.Reset(fgBBNumMax * 2);
+    impBlockListNodeFreeList = nullptr;
+
+#ifdef DEBUG
+    impLastILoffsStmt   = nullptr;
+    impNestedStackSpill = false;
+#endif
+    impBoxTemp = BAD_VAR_NUM;
+
+    impPendingList = impPendingFree = nullptr;
+
+    /* Add the entry-point to the worker-list */
+
+    // Skip leading internal blocks. There can be one as a leading scratch BB, and more
+    // from EH normalization.
+    // NOTE: It might be possible to always just put fgFirstBB on the pending list, and let everything else just fall
+    // out.
+    for (; method->bbFlags & BBF_INTERNAL; method = method->bbNext)
+    {
+        // Treat these as imported.
+        assert(method->bbJumpKind == BBJ_NONE); // We assume all the leading ones are fallthrough.
+        JITDUMP("Marking leading BBF_INTERNAL block BB%02u as BBF_IMPORTED\n", method->bbNum);
+        method->bbFlags |= BBF_IMPORTED;
+    }
+
+    impImportBlockPending(method);
+
+    /* Import blocks in the worker-list until there are no more */
+
+    while (impPendingList)
+    {
+        /* Remove the entry at the front of the list */
+
+        PendingDsc* dsc = impPendingList;
+        impPendingList  = impPendingList->pdNext;
+        impSetPendingBlockMember(dsc->pdBB, 0);
+
+        /* Restore the stack state */
+
+        verCurrentState.thisInitialized = dsc->pdThisPtrInit;
+        verCurrentState.esStackDepth    = dsc->pdSavedStack.ssDepth;
+        if (verCurrentState.esStackDepth)
+        {
+            impRestoreStackState(&dsc->pdSavedStack);
+        }
+
+        /* Add the entry to the free list for reuse */
+
+        dsc->pdNext    = impPendingFree;
+        impPendingFree = dsc;
+
+        /* Now import the block */
+
+        if (dsc->pdBB->bbFlags & BBF_FAILED_VERIFICATION)
+        {
+
+#ifdef _TARGET_64BIT_
+            // On AMD64, during verification we have to match JIT64 behavior since the VM is very tighly
+            // coupled with the JIT64 IL Verification logic.  Look inside verHandleVerificationFailure
+            // method for further explanation on why we raise this exception instead of making the jitted
+            // code throw the verification exception during execution.
+            if (tiVerificationNeeded && (opts.eeFlags & CORJIT_FLG_IMPORT_ONLY) != 0)
+            {
+                BADCODE("Basic block marked as not verifiable");
+            }
+            else
+#endif // _TARGET_64BIT_
+            {
+                verConvertBBToThrowVerificationException(dsc->pdBB DEBUGARG(true));
+                impEndTreeList(dsc->pdBB);
+            }
+        }
+        else
+        {
+            impImportBlock(dsc->pdBB);
+
+            if (compDonotInline())
+            {
+                return;
+            }
+            if (compIsForImportOnly() && !tiVerificationNeeded)
+            {
+                return;
+            }
+        }
+    }
+
+#ifdef DEBUG
+    if (verbose && info.compXcptnsCount)
+    {
+        printf("\nAfter impImport() added block for try,catch,finally");
+        fgDispBasicBlocks();
+        printf("\n");
+    }
+
+    // Used in impImportBlockPending() for STRESS_CHK_REIMPORT
+    for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+    {
+        block->bbFlags &= ~BBF_VISITED;
+    }
+#endif
+
+    assert(!compIsForInlining() || !tiVerificationNeeded);
+}
+
+// Checks if a typeinfo (usually stored in the type stack) is a struct.
+// The invariant here is that if it's not a ref or a method and has a class handle
+// it's a valuetype
+bool Compiler::impIsValueType(typeInfo* pTypeInfo)
+{
+    if (pTypeInfo && pTypeInfo->IsValueClassWithClsHnd())
+    {
+        return true;
+    }
+    else
+    {
+        return false;
+    }
+}
+
+/*****************************************************************************
+ *  Check to see if the tree is the address of a local or
+    the address of a field in a local.
+
+    *lclVarTreeOut will contain the GT_LCL_VAR tree when it returns TRUE.
+
+ */
+
+BOOL Compiler::impIsAddressInLocal(GenTreePtr tree, GenTreePtr* lclVarTreeOut)
+{
+    if (tree->gtOper != GT_ADDR)
+    {
+        return FALSE;
+    }
+
+    GenTreePtr op = tree->gtOp.gtOp1;
+    while (op->gtOper == GT_FIELD)
+    {
+        op = op->gtField.gtFldObj;
+        if (op && op->gtOper == GT_ADDR) // Skip static fields where op will be NULL.
+        {
+            op = op->gtOp.gtOp1;
+        }
+        else
+        {
+            return false;
+        }
+    }
+
+    if (op->gtOper == GT_LCL_VAR)
+    {
+        *lclVarTreeOut = op;
+        return TRUE;
+    }
+    else
+    {
+        return FALSE;
+    }
+}
+
+//------------------------------------------------------------------------
+// impMakeDiscretionaryInlineObservations: make observations that help
+// determine the profitability of a discretionary inline
+//
+// Arguments:
+//    pInlineInfo -- InlineInfo for the inline, or null for the prejit root
+//    inlineResult -- InlineResult accumulating information about this inline
+//
+// Notes:
+//    If inlining or prejitting the root, this method also makes
+//    various observations about the method that factor into inline
+//    decisions. It sets `compNativeSizeEstimate` as a side effect.
+
+void Compiler::impMakeDiscretionaryInlineObservations(InlineInfo* pInlineInfo, InlineResult* inlineResult)
+{
+    assert(pInlineInfo != nullptr && compIsForInlining() || // Perform the actual inlining.
+           pInlineInfo == nullptr && !compIsForInlining()   // Calculate the static inlining hint for ngen.
+           );
+
+    // If we're really inlining, we should just have one result in play.
+    assert((pInlineInfo == nullptr) || (inlineResult == pInlineInfo->inlineResult));
+
+    // If this is a "forceinline" method, the JIT probably shouldn't have gone
+    // to the trouble of estimating the native code size. Even if it did, it
+    // shouldn't be relying on the result of this method.
+    assert(inlineResult->GetObservation() == InlineObservation::CALLEE_IS_DISCRETIONARY_INLINE);
+
+    // Note if the caller contains NEWOBJ or NEWARR.
+    Compiler* rootCompiler = impInlineRoot();
+
+    if ((rootCompiler->optMethodFlags & OMF_HAS_NEWARRAY) != 0)
+    {
+        inlineResult->Note(InlineObservation::CALLER_HAS_NEWARRAY);
+    }
+
+    if ((rootCompiler->optMethodFlags & OMF_HAS_NEWOBJ) != 0)
+    {
+        inlineResult->Note(InlineObservation::CALLER_HAS_NEWOBJ);
+    }
+
+    bool calleeIsStatic  = (info.compFlags & CORINFO_FLG_STATIC) != 0;
+    bool isSpecialMethod = (info.compFlags & CORINFO_FLG_CONSTRUCTOR) != 0;
+
+    if (isSpecialMethod)
+    {
+        if (calleeIsStatic)
+        {
+            inlineResult->Note(InlineObservation::CALLEE_IS_CLASS_CTOR);
+        }
+        else
+        {
+            inlineResult->Note(InlineObservation::CALLEE_IS_INSTANCE_CTOR);
+        }
+    }
+    else if (!calleeIsStatic)
+    {
+        // Callee is an instance method.
+        //
+        // Check if the callee has the same 'this' as the root.
+        if (pInlineInfo != nullptr)
+        {
+            GenTreePtr thisArg = pInlineInfo->iciCall->gtCall.gtCallObjp;
+            assert(thisArg);
+            bool isSameThis = impIsThis(thisArg);
+            inlineResult->NoteBool(InlineObservation::CALLSITE_IS_SAME_THIS, isSameThis);
+        }
+    }
+
+    // Note if the callee's class is a promotable struct
+    if ((info.compClassAttr & CORINFO_FLG_VALUECLASS) != 0)
+    {
+        lvaStructPromotionInfo structPromotionInfo;
+        lvaCanPromoteStructType(info.compClassHnd, &structPromotionInfo, false);
+        if (structPromotionInfo.canPromote)
+        {
+            inlineResult->Note(InlineObservation::CALLEE_CLASS_PROMOTABLE);
+        }
+    }
+
+#ifdef FEATURE_SIMD
+
+    // Note if this method is has SIMD args or return value
+    if (pInlineInfo != nullptr && pInlineInfo->hasSIMDTypeArgLocalOrReturn)
+    {
+        inlineResult->Note(InlineObservation::CALLEE_HAS_SIMD);
+    }
+
+#endif // FEATURE_SIMD
+
+    // Roughly classify callsite frequency.
+    InlineCallsiteFrequency frequency = InlineCallsiteFrequency::UNUSED;
+
+    // If this is a prejit root, or a maximally hot block...
+    if ((pInlineInfo == nullptr) || (pInlineInfo->iciBlock->bbWeight >= BB_MAX_WEIGHT))
+    {
+        frequency = InlineCallsiteFrequency::HOT;
+    }
+    // No training data.  Look for loop-like things.
+    // We consider a recursive call loop-like.  Do not give the inlining boost to the method itself.
+    // However, give it to things nearby.
+    else if ((pInlineInfo->iciBlock->bbFlags & BBF_BACKWARD_JUMP) &&
+             (pInlineInfo->fncHandle != pInlineInfo->inlineCandidateInfo->ilCallerHandle))
+    {
+        frequency = InlineCallsiteFrequency::LOOP;
+    }
+    else if ((pInlineInfo->iciBlock->bbFlags & BBF_PROF_WEIGHT) && (pInlineInfo->iciBlock->bbWeight > BB_ZERO_WEIGHT))
+    {
+        frequency = InlineCallsiteFrequency::WARM;
+    }
+    // Now modify the multiplier based on where we're called from.
+    else if (pInlineInfo->iciBlock->isRunRarely() || ((info.compFlags & FLG_CCTOR) == FLG_CCTOR))
+    {
+        frequency = InlineCallsiteFrequency::RARE;
+    }
+    else
+    {
+        frequency = InlineCallsiteFrequency::BORING;
+    }
+
+    // Also capture the block weight of the call site.  In the prejit
+    // root case, assume there's some hot call site for this method.
+    unsigned weight = 0;
+
+    if (pInlineInfo != nullptr)
+    {
+        weight = pInlineInfo->iciBlock->bbWeight;
+    }
+    else
+    {
+        weight = BB_MAX_WEIGHT;
+    }
+
+    inlineResult->NoteInt(InlineObservation::CALLSITE_FREQUENCY, static_cast<int>(frequency));
+    inlineResult->NoteInt(InlineObservation::CALLSITE_WEIGHT, static_cast<int>(weight));
+}
+
+/*****************************************************************************
+ This method makes STATIC inlining decision based on the IL code.
+ It should not make any inlining decision based on the context.
+ If forceInline is true, then the inlining decision should not depend on
+ performance heuristics (code size, etc.).
+ */
+
+void Compiler::impCanInlineIL(CORINFO_METHOD_HANDLE fncHandle,
+                              CORINFO_METHOD_INFO*  methInfo,
+                              bool                  forceInline,
+                              InlineResult*         inlineResult)
+{
+    unsigned codeSize = methInfo->ILCodeSize;
+
+    // We shouldn't have made up our minds yet...
+    assert(!inlineResult->IsDecided());
+
+    if (methInfo->EHcount)
+    {
+        inlineResult->NoteFatal(InlineObservation::CALLEE_HAS_EH);
+        return;
+    }
+
+    if ((methInfo->ILCode == nullptr) || (codeSize == 0))
+    {
+        inlineResult->NoteFatal(InlineObservation::CALLEE_HAS_NO_BODY);
+        return;
+    }
+
+    // For now we don't inline varargs (import code can't handle it)
+
+    if (methInfo->args.isVarArg())
+    {
+        inlineResult->NoteFatal(InlineObservation::CALLEE_HAS_MANAGED_VARARGS);
+        return;
+    }
+
+    // Reject if it has too many locals.
+    // This is currently an implementation limit due to fixed-size arrays in the
+    // inline info, rather than a performance heuristic.
+
+    inlineResult->NoteInt(InlineObservation::CALLEE_NUMBER_OF_LOCALS, methInfo->locals.numArgs);
+
+    if (methInfo->locals.numArgs > MAX_INL_LCLS)
+    {
+        inlineResult->NoteFatal(InlineObservation::CALLEE_TOO_MANY_LOCALS);
+        return;
+    }
+
+    // Make sure there aren't too many arguments.
+    // This is currently an implementation limit due to fixed-size arrays in the
+    // inline info, rather than a performance heuristic.
+
+    inlineResult->NoteInt(InlineObservation::CALLEE_NUMBER_OF_ARGUMENTS, methInfo->args.numArgs);
+
+    if (methInfo->args.numArgs > MAX_INL_ARGS)
+    {
+        inlineResult->NoteFatal(InlineObservation::CALLEE_TOO_MANY_ARGUMENTS);
+        return;
+    }
+
+    // Note force inline state
+
+    inlineResult->NoteBool(InlineObservation::CALLEE_IS_FORCE_INLINE, forceInline);
+
+    // Note IL code size
+
+    inlineResult->NoteInt(InlineObservation::CALLEE_IL_CODE_SIZE, codeSize);
+
+    if (inlineResult->IsFailure())
+    {
+        return;
+    }
+
+    // Make sure maxstack is not too big
+
+    inlineResult->NoteInt(InlineObservation::CALLEE_MAXSTACK, methInfo->maxStack);
+
+    if (inlineResult->IsFailure())
+    {
+        return;
+    }
+}
+
+/*****************************************************************************
+ */
+
+void Compiler::impCheckCanInline(GenTreePtr             call,
+                                 CORINFO_METHOD_HANDLE  fncHandle,
+                                 unsigned               methAttr,
+                                 CORINFO_CONTEXT_HANDLE exactContextHnd,
+                                 InlineCandidateInfo**  ppInlineCandidateInfo,
+                                 InlineResult*          inlineResult)
+{
+    // Either EE or JIT might throw exceptions below.
+    // If that happens, just don't inline the method.
+
+    struct Param
+    {
+        Compiler*              pThis;
+        GenTreePtr             call;
+        CORINFO_METHOD_HANDLE  fncHandle;
+        unsigned               methAttr;
+        CORINFO_CONTEXT_HANDLE exactContextHnd;
+        InlineResult*          result;
+        InlineCandidateInfo**  ppInlineCandidateInfo;
+    } param = {nullptr};
+
+    param.pThis                 = this;
+    param.call                  = call;
+    param.fncHandle             = fncHandle;
+    param.methAttr              = methAttr;
+    param.exactContextHnd       = (exactContextHnd != nullptr) ? exactContextHnd : MAKE_METHODCONTEXT(fncHandle);
+    param.result                = inlineResult;
+    param.ppInlineCandidateInfo = ppInlineCandidateInfo;
+
+    bool success = eeRunWithErrorTrap<Param>(
+        [](Param* pParam) {
+            DWORD                  dwRestrictions = 0;
+            CorInfoInitClassResult initClassResult;
+
+#ifdef DEBUG
+            const char* methodName;
+            const char* className;
+            methodName = pParam->pThis->eeGetMethodName(pParam->fncHandle, &className);
+
+            if (JitConfig.JitNoInline())
+            {
+                pParam->result->NoteFatal(InlineObservation::CALLEE_IS_JIT_NOINLINE);
+                goto _exit;
+            }
+#endif
+
+            /* Try to get the code address/size for the method */
+
+            CORINFO_METHOD_INFO methInfo;
+            if (!pParam->pThis->info.compCompHnd->getMethodInfo(pParam->fncHandle, &methInfo))
+            {
+                pParam->result->NoteFatal(InlineObservation::CALLEE_NO_METHOD_INFO);
+                goto _exit;
+            }
+
+            bool forceInline;
+            forceInline = !!(pParam->methAttr & CORINFO_FLG_FORCEINLINE);
+
+            pParam->pThis->impCanInlineIL(pParam->fncHandle, &methInfo, forceInline, pParam->result);
+
+            if (pParam->result->IsFailure())
+            {
+                assert(pParam->result->IsNever());
+                goto _exit;
+            }
+
+            // Speculatively check if initClass() can be done.
+            // If it can be done, we will try to inline the method. If inlining
+            // succeeds, then we will do the non-speculative initClass() and commit it.
+            // If this speculative call to initClass() fails, there is no point
+            // trying to inline this method.
+            initClassResult =
+                pParam->pThis->info.compCompHnd->initClass(nullptr /* field */, pParam->fncHandle /* method */,
+                                                           pParam->exactContextHnd /* context */,
+                                                           TRUE /* speculative */);
+
+            if (initClassResult & CORINFO_INITCLASS_DONT_INLINE)
+            {
+                pParam->result->NoteFatal(InlineObservation::CALLSITE_CLASS_INIT_FAILURE_SPEC);
+                goto _exit;
+            }
+
+            // Given the EE the final say in whether to inline or not.
+            // This should be last since for verifiable code, this can be expensive
+
+            /* VM Inline check also ensures that the method is verifiable if needed */
+            CorInfoInline vmResult;
+            vmResult = pParam->pThis->info.compCompHnd->canInline(pParam->pThis->info.compMethodHnd, pParam->fncHandle,
+                                                                  &dwRestrictions);
+
+            if (vmResult == INLINE_FAIL)
+            {
+                pParam->result->NoteFatal(InlineObservation::CALLSITE_IS_VM_NOINLINE);
+            }
+            else if (vmResult == INLINE_NEVER)
+            {
+                pParam->result->NoteFatal(InlineObservation::CALLEE_IS_VM_NOINLINE);
+            }
+
+            if (pParam->result->IsFailure())
+            {
+                // Make sure not to report this one.  It was already reported by the VM.
+                pParam->result->SetReported();
+                goto _exit;
+            }
+
+            // check for unsupported inlining restrictions
+            assert((dwRestrictions & ~(INLINE_RESPECT_BOUNDARY | INLINE_NO_CALLEE_LDSTR | INLINE_SAME_THIS)) == 0);
+
+            if (dwRestrictions & INLINE_SAME_THIS)
+            {
+                GenTreePtr thisArg = pParam->call->gtCall.gtCallObjp;
+                assert(thisArg);
+
+                if (!pParam->pThis->impIsThis(thisArg))
+                {
+                    pParam->result->NoteFatal(InlineObservation::CALLSITE_REQUIRES_SAME_THIS);
+                    goto _exit;
+                }
+            }
+
+            /* Get the method properties */
+
+            CORINFO_CLASS_HANDLE clsHandle;
+            clsHandle = pParam->pThis->info.compCompHnd->getMethodClass(pParam->fncHandle);
+            unsigned clsAttr;
+            clsAttr = pParam->pThis->info.compCompHnd->getClassAttribs(clsHandle);
+
+            /* Get the return type */
+
+            var_types fncRetType;
+            fncRetType = pParam->call->TypeGet();
+
+#ifdef DEBUG
+            var_types fncRealRetType;
+            fncRealRetType = JITtype2varType(methInfo.args.retType);
+
+            assert((genActualType(fncRealRetType) == genActualType(fncRetType)) ||
+                   // <BUGNUM> VSW 288602 </BUGNUM>
+                   // In case of IJW, we allow to assign a native pointer to a BYREF.
+                   (fncRetType == TYP_BYREF && methInfo.args.retType == CORINFO_TYPE_PTR) ||
+                   (varTypeIsStruct(fncRetType) && (fncRealRetType == TYP_STRUCT)));
+#endif
+
+            //
+            // Allocate an InlineCandidateInfo structure
+            //
+            InlineCandidateInfo* pInfo;
+            pInfo = new (pParam->pThis, CMK_Inlining) InlineCandidateInfo;
+
+            pInfo->dwRestrictions  = dwRestrictions;
+            pInfo->methInfo        = methInfo;
+            pInfo->methAttr        = pParam->methAttr;
+            pInfo->clsHandle       = clsHandle;
+            pInfo->clsAttr         = clsAttr;
+            pInfo->fncRetType      = fncRetType;
+            pInfo->exactContextHnd = pParam->exactContextHnd;
+            pInfo->ilCallerHandle  = pParam->pThis->info.compMethodHnd;
+            pInfo->initClassResult = initClassResult;
+
+            *(pParam->ppInlineCandidateInfo) = pInfo;
+
+        _exit:;
+        },
+        &param);
+    if (!success)
+    {
+        param.result->NoteFatal(InlineObservation::CALLSITE_COMPILATION_ERROR);
+    }
+}
+
+void Compiler::impInlineRecordArgInfo(InlineInfo*   pInlineInfo,
+                                      GenTreePtr    curArgVal,
+                                      unsigned      argNum,
+                                      InlineResult* inlineResult)
+{
+    InlArgInfo* inlCurArgInfo = &pInlineInfo->inlArgInfo[argNum];
+
+    if (curArgVal->gtOper == GT_MKREFANY)
+    {
+        inlineResult->NoteFatal(InlineObservation::CALLSITE_ARG_IS_MKREFANY);
+        return;
+    }
+
+    inlCurArgInfo->argNode = curArgVal;
+
+    GenTreePtr lclVarTree;
+    if (impIsAddressInLocal(curArgVal, &lclVarTree) && varTypeIsStruct(lclVarTree))
+    {
+        inlCurArgInfo->argIsByRefToStructLocal = true;
+#ifdef FEATURE_SIMD
+        if (lvaTable[lclVarTree->AsLclVarCommon()->gtLclNum].lvSIMDType)
+        {
+            pInlineInfo->hasSIMDTypeArgLocalOrReturn = true;
+        }
+#endif // FEATURE_SIMD
+    }
+
+    if (curArgVal->gtFlags & GTF_ORDER_SIDEEFF)
+    {
+        // Right now impInlineSpillLclRefs and impInlineSpillGlobEffects don't take
+        // into account special side effects, so we disallow them during inlining.
+        inlineResult->NoteFatal(InlineObservation::CALLSITE_ARG_HAS_SIDE_EFFECT);
+        return;
+    }
+
+    if (curArgVal->gtFlags & GTF_GLOB_EFFECT)
+    {
+        inlCurArgInfo->argHasGlobRef = (curArgVal->gtFlags & GTF_GLOB_REF) != 0;
+        inlCurArgInfo->argHasSideEff = (curArgVal->gtFlags & GTF_SIDE_EFFECT) != 0;
+    }
+
+    if (curArgVal->gtOper == GT_LCL_VAR)
+    {
+        inlCurArgInfo->argIsLclVar = true;
+
+        /* Remember the "original" argument number */
+        curArgVal->gtLclVar.gtLclILoffs = argNum;
+    }
+
+    if ((curArgVal->OperKind() & GTK_CONST) ||
+        ((curArgVal->gtOper == GT_ADDR) && (curArgVal->gtOp.gtOp1->gtOper == GT_LCL_VAR)))
+    {
+        inlCurArgInfo->argIsInvariant = true;
+        if (inlCurArgInfo->argIsThis && (curArgVal->gtOper == GT_CNS_INT) && (curArgVal->gtIntCon.gtIconVal == 0))
+        {
+            /* Abort, but do not mark as not inlinable */
+            inlineResult->NoteFatal(InlineObservation::CALLSITE_ARG_HAS_NULL_THIS);
+            return;
+        }
+    }
+
+    if (!inlCurArgInfo->argIsInvariant && gtHasLocalsWithAddrOp(curArgVal))
+    {
+        inlCurArgInfo->argHasLdargaOp = true;
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        if (inlCurArgInfo->argIsThis)
+        {
+            printf("thisArg:");
+        }
+        else
+        {
+            printf("\nArgument #%u:", argNum);
+        }
+        if (inlCurArgInfo->argIsLclVar)
+        {
+            printf(" is a local var");
+        }
+        if (inlCurArgInfo->argIsInvariant)
+        {
+            printf(" is a constant");
+        }
+        if (inlCurArgInfo->argHasGlobRef)
+        {
+            printf(" has global refs");
+        }
+        if (inlCurArgInfo->argHasSideEff)
+        {
+            printf(" has side effects");
+        }
+        if (inlCurArgInfo->argHasLdargaOp)
+        {
+            printf(" has ldarga effect");
+        }
+        if (inlCurArgInfo->argHasStargOp)
+        {
+            printf(" has starg effect");
+        }
+        if (inlCurArgInfo->argIsByRefToStructLocal)
+        {
+            printf(" is byref to a struct local");
+        }
+
+        printf("\n");
+        gtDispTree(curArgVal);
+        printf("\n");
+    }
+#endif
+}
+
+/*****************************************************************************
+ *
+ */
+
+void Compiler::impInlineInitVars(InlineInfo* pInlineInfo)
+{
+    assert(!compIsForInlining());
+
+    GenTreePtr           call         = pInlineInfo->iciCall;
+    CORINFO_METHOD_INFO* methInfo     = &pInlineInfo->inlineCandidateInfo->methInfo;
+    unsigned             clsAttr      = pInlineInfo->inlineCandidateInfo->clsAttr;
+    InlArgInfo*          inlArgInfo   = pInlineInfo->inlArgInfo;
+    InlLclVarInfo*       lclVarInfo   = pInlineInfo->lclVarInfo;
+    InlineResult*        inlineResult = pInlineInfo->inlineResult;
+
+    const bool hasRetBuffArg = impMethodInfo_hasRetBuffArg(methInfo);
+
+    /* init the argument stuct */
+
+    memset(inlArgInfo, 0, (MAX_INL_ARGS + 1) * sizeof(inlArgInfo[0]));
+
+    /* Get hold of the 'this' pointer and the argument list proper */
+
+    GenTreePtr thisArg = call->gtCall.gtCallObjp;
+    GenTreePtr argList = call->gtCall.gtCallArgs;
+    unsigned   argCnt  = 0; // Count of the arguments
+
+    assert((methInfo->args.hasThis()) == (thisArg != nullptr));
+
+    if (thisArg)
+    {
+        inlArgInfo[0].argIsThis = true;
+
+        impInlineRecordArgInfo(pInlineInfo, thisArg, argCnt, inlineResult);
+
+        if (inlineResult->IsFailure())
+        {
+            return;
+        }
+
+        /* Increment the argument count */
+        argCnt++;
+    }
+
+    /* Record some information about each of the arguments */
+    bool hasTypeCtxtArg = (methInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE) != 0;
+
+#if USER_ARGS_COME_LAST
+    unsigned typeCtxtArg = thisArg ? 1 : 0;
+#else  // USER_ARGS_COME_LAST
+    unsigned typeCtxtArg = methInfo->args.totalILArgs();
+#endif // USER_ARGS_COME_LAST
+
+    for (GenTreePtr argTmp = argList; argTmp; argTmp = argTmp->gtOp.gtOp2)
+    {
+        if (argTmp == argList && hasRetBuffArg)
+        {
+            continue;
+        }
+
+        // Ignore the type context argument
+        if (hasTypeCtxtArg && (argCnt == typeCtxtArg))
+        {
+            typeCtxtArg = 0xFFFFFFFF;
+            continue;
+        }
+
+        assert(argTmp->gtOper == GT_LIST);
+        GenTreePtr argVal = argTmp->gtOp.gtOp1;
+
+        impInlineRecordArgInfo(pInlineInfo, argVal, argCnt, inlineResult);
+
+        if (inlineResult->IsFailure())
+        {
+            return;
+        }
+
+        /* Increment the argument count */
+        argCnt++;
+    }
+
+    /* Make sure we got the arg number right */
+    assert(argCnt == methInfo->args.totalILArgs());
+
+#ifdef FEATURE_SIMD
+    bool foundSIMDType = pInlineInfo->hasSIMDTypeArgLocalOrReturn;
+#endif // FEATURE_SIMD
+
+    /* We have typeless opcodes, get type information from the signature */
+
+    if (thisArg)
+    {
+        var_types sigType;
+
+        if (clsAttr & CORINFO_FLG_VALUECLASS)
+        {
+            sigType = TYP_BYREF;
+        }
+        else
+        {
+            sigType = TYP_REF;
+        }
+
+        lclVarInfo[0].lclVerTypeInfo = verMakeTypeInfo(pInlineInfo->inlineCandidateInfo->clsHandle);
+        lclVarInfo[0].lclHasLdlocaOp = false;
+
+#ifdef FEATURE_SIMD
+        // We always want to check isSIMDClass, since we want to set foundSIMDType (to increase
+        // the inlining multiplier) for anything in that assembly.
+        // But we only need to normalize it if it is a TYP_STRUCT
+        // (which we need to do even if we have already set foundSIMDType).
+        if ((!foundSIMDType || (sigType == TYP_STRUCT)) && isSIMDClass(&(lclVarInfo[0].lclVerTypeInfo)))
+        {
+            if (sigType == TYP_STRUCT)
+            {
+                sigType = impNormStructType(lclVarInfo[0].lclVerTypeInfo.GetClassHandle());
+            }
+            foundSIMDType = true;
+        }
+#endif // FEATURE_SIMD
+        lclVarInfo[0].lclTypeInfo = sigType;
+
+        assert(varTypeIsGC(thisArg->gtType) ||   // "this" is managed
+               (thisArg->gtType == TYP_I_IMPL && // "this" is unmgd but the method's class doesnt care
+                (clsAttr & CORINFO_FLG_VALUECLASS)));
+
+        if (genActualType(thisArg->gtType) != genActualType(sigType))
+        {
+            if (sigType == TYP_REF)
+            {
+                /* The argument cannot be bashed into a ref (see bug 750871) */
+                inlineResult->NoteFatal(InlineObservation::CALLSITE_ARG_NO_BASH_TO_REF);
+                return;
+            }
+
+            /* This can only happen with byrefs <-> ints/shorts */
+
+            assert(genActualType(sigType) == TYP_I_IMPL || sigType == TYP_BYREF);
+            assert(genActualType(thisArg->gtType) == TYP_I_IMPL || thisArg->gtType == TYP_BYREF);
+
+            if (sigType == TYP_BYREF)
+            {
+                lclVarInfo[0].lclVerTypeInfo = typeInfo(varType2tiType(TYP_I_IMPL));
+            }
+            else if (thisArg->gtType == TYP_BYREF)
+            {
+                assert(sigType == TYP_I_IMPL);
+
+                /* If possible change the BYREF to an int */
+                if (thisArg->IsVarAddr())
+                {
+                    thisArg->gtType              = TYP_I_IMPL;
+                    lclVarInfo[0].lclVerTypeInfo = typeInfo(varType2tiType(TYP_I_IMPL));
+                }
+                else
+                {
+                    /* Arguments 'int <- byref' cannot be bashed */
+                    inlineResult->NoteFatal(InlineObservation::CALLSITE_ARG_NO_BASH_TO_INT);
+                    return;
+                }
+            }
+        }
+    }
+
+    /* Init the types of the arguments and make sure the types
+     * from the trees match the types in the signature */
+
+    CORINFO_ARG_LIST_HANDLE argLst;
+    argLst = methInfo->args.args;
+
+    unsigned i;
+    for (i = (thisArg ? 1 : 0); i < argCnt; i++, argLst = info.compCompHnd->getArgNext(argLst))
+    {
+        var_types sigType = (var_types)eeGetArgType(argLst, &methInfo->args);
+
+        lclVarInfo[i].lclVerTypeInfo = verParseArgSigToTypeInfo(&methInfo->args, argLst);
+#ifdef FEATURE_SIMD
+        if ((!foundSIMDType || (sigType == TYP_STRUCT)) && isSIMDClass(&(lclVarInfo[i].lclVerTypeInfo)))
+        {
+            // If this is a SIMD class (i.e. in the SIMD assembly), then we will consider that we've
+            // found a SIMD type, even if this may not be a type we recognize (the assumption is that
+            // it is likely to use a SIMD type, and therefore we want to increase the inlining multiplier).
+            foundSIMDType = true;
+            if (sigType == TYP_STRUCT)
+            {
+                var_types structType = impNormStructType(lclVarInfo[i].lclVerTypeInfo.GetClassHandle());
+                sigType              = structType;
+            }
+        }
+#endif // FEATURE_SIMD
+
+        lclVarInfo[i].lclTypeInfo    = sigType;
+        lclVarInfo[i].lclHasLdlocaOp = false;
+
+        /* Does the tree type match the signature type? */
+
+        GenTreePtr inlArgNode = inlArgInfo[i].argNode;
+
+        if (sigType != inlArgNode->gtType)
+        {
+            /* In valid IL, this can only happen for short integer types or byrefs <-> [native] ints,
+               but in bad IL cases with caller-callee signature mismatches we can see other types.
+               Intentionally reject cases with mismatches so the jit is more flexible when
+               encountering bad IL. */
+
+            bool isPlausibleTypeMatch = (genActualType(sigType) == genActualType(inlArgNode->gtType)) ||
+                                        (genActualTypeIsIntOrI(sigType) && inlArgNode->gtType == TYP_BYREF) ||
+                                        (sigType == TYP_BYREF && genActualTypeIsIntOrI(inlArgNode->gtType));
+
+            if (!isPlausibleTypeMatch)
+            {
+                inlineResult->NoteFatal(InlineObservation::CALLSITE_ARG_TYPES_INCOMPATIBLE);
+                return;
+            }
+
+            /* Is it a narrowing or widening cast?
+             * Widening casts are ok since the value computed is already
+             * normalized to an int (on the IL stack) */
+
+            if (genTypeSize(inlArgNode->gtType) >= genTypeSize(sigType))
+            {
+                if (sigType == TYP_BYREF)
+                {
+                    lclVarInfo[i].lclVerTypeInfo = typeInfo(varType2tiType(TYP_I_IMPL));
+                }
+                else if (inlArgNode->gtType == TYP_BYREF)
+                {
+                    assert(varTypeIsIntOrI(sigType));
+
+                    /* If possible bash the BYREF to an int */
+                    if (inlArgNode->IsVarAddr())
+                    {
+                        inlArgNode->gtType           = TYP_I_IMPL;
+                        lclVarInfo[i].lclVerTypeInfo = typeInfo(varType2tiType(TYP_I_IMPL));
+                    }
+                    else
+                    {
+                        /* Arguments 'int <- byref' cannot be changed */
+                        inlineResult->NoteFatal(InlineObservation::CALLSITE_ARG_NO_BASH_TO_INT);
+                        return;
+                    }
+                }
+                else if (genTypeSize(sigType) < EA_PTRSIZE)
+                {
+                    /* Narrowing cast */
+
+                    if (inlArgNode->gtOper == GT_LCL_VAR &&
+                        !lvaTable[inlArgNode->gtLclVarCommon.gtLclNum].lvNormalizeOnLoad() &&
+                        sigType == lvaGetRealType(inlArgNode->gtLclVarCommon.gtLclNum))
+                    {
+                        /* We don't need to insert a cast here as the variable
+                           was assigned a normalized value of the right type */
+
+                        continue;
+                    }
+
+                    inlArgNode = inlArgInfo[i].argNode = gtNewCastNode(TYP_INT, inlArgNode, sigType);
+
+                    inlArgInfo[i].argIsLclVar = false;
+
+                    /* Try to fold the node in case we have constant arguments */
+
+                    if (inlArgInfo[i].argIsInvariant)
+                    {
+                        inlArgNode            = gtFoldExprConst(inlArgNode);
+                        inlArgInfo[i].argNode = inlArgNode;
+                        assert(inlArgNode->OperIsConst());
+                    }
+                }
+#ifdef _TARGET_64BIT_
+                else if (genTypeSize(genActualType(inlArgNode->gtType)) < genTypeSize(sigType))
+                {
+                    // This should only happen for int -> native int widening
+                    inlArgNode = inlArgInfo[i].argNode = gtNewCastNode(genActualType(sigType), inlArgNode, sigType);
+
+                    inlArgInfo[i].argIsLclVar = false;
+
+                    /* Try to fold the node in case we have constant arguments */
+
+                    if (inlArgInfo[i].argIsInvariant)
+                    {
+                        inlArgNode            = gtFoldExprConst(inlArgNode);
+                        inlArgInfo[i].argNode = inlArgNode;
+                        assert(inlArgNode->OperIsConst());
+                    }
+                }
+#endif // _TARGET_64BIT_
+            }
+        }
+    }
+
+    /* Init the types of the local variables */
+
+    CORINFO_ARG_LIST_HANDLE localsSig;
+    localsSig = methInfo->locals.args;
+
+    for (i = 0; i < methInfo->locals.numArgs; i++)
+    {
+        bool      isPinned;
+        var_types type = (var_types)eeGetArgType(localsSig, &methInfo->locals, &isPinned);
+
+        lclVarInfo[i + argCnt].lclHasLdlocaOp = false;
+        lclVarInfo[i + argCnt].lclTypeInfo    = type;
+
+        if (isPinned)
+        {
+            inlineResult->NoteFatal(InlineObservation::CALLEE_HAS_PINNED_LOCALS);
+            return;
+        }
+
+        lclVarInfo[i + argCnt].lclVerTypeInfo = verParseArgSigToTypeInfo(&methInfo->locals, localsSig);
+
+        localsSig = info.compCompHnd->getArgNext(localsSig);
+
+#ifdef FEATURE_SIMD
+        if ((!foundSIMDType || (type == TYP_STRUCT)) && isSIMDClass(&(lclVarInfo[i + argCnt].lclVerTypeInfo)))
+        {
+            foundSIMDType = true;
+            if (featureSIMD && type == TYP_STRUCT)
+            {
+                var_types structType = impNormStructType(lclVarInfo[i + argCnt].lclVerTypeInfo.GetClassHandle());
+                lclVarInfo[i + argCnt].lclTypeInfo = structType;
+            }
+        }
+#endif // FEATURE_SIMD
+    }
+
+#ifdef FEATURE_SIMD
+    if (!foundSIMDType && (call->AsCall()->gtRetClsHnd != nullptr) && isSIMDClass(call->AsCall()->gtRetClsHnd))
+    {
+        foundSIMDType = true;
+    }
+    pInlineInfo->hasSIMDTypeArgLocalOrReturn = foundSIMDType;
+#endif // FEATURE_SIMD
+}
+
+unsigned Compiler::impInlineFetchLocal(unsigned lclNum DEBUGARG(const char* reason))
+{
+    assert(compIsForInlining());
+
+    unsigned tmpNum = impInlineInfo->lclTmpNum[lclNum];
+
+    if (tmpNum == BAD_VAR_NUM)
+    {
+        var_types lclTyp = impInlineInfo->lclVarInfo[lclNum + impInlineInfo->argCnt].lclTypeInfo;
+
+        // The lifetime of this local might span multiple BBs.
+        // So it is a long lifetime local.
+        impInlineInfo->lclTmpNum[lclNum] = tmpNum = lvaGrabTemp(false DEBUGARG(reason));
+
+        lvaTable[tmpNum].lvType = lclTyp;
+        if (impInlineInfo->lclVarInfo[lclNum + impInlineInfo->argCnt].lclHasLdlocaOp)
+        {
+            lvaTable[tmpNum].lvHasLdAddrOp = 1;
+        }
+
+        if (impInlineInfo->lclVarInfo[lclNum + impInlineInfo->argCnt].lclVerTypeInfo.IsStruct())
+        {
+            if (varTypeIsStruct(lclTyp))
+            {
+                lvaSetStruct(tmpNum,
+                             impInlineInfo->lclVarInfo[lclNum + impInlineInfo->argCnt].lclVerTypeInfo.GetClassHandle(),
+                             true /* unsafe value cls check */);
+            }
+            else
+            {
+                // This is a wrapped primitive.  Make sure the verstate knows that
+                lvaTable[tmpNum].lvVerTypeInfo =
+                    impInlineInfo->lclVarInfo[lclNum + impInlineInfo->argCnt].lclVerTypeInfo;
+            }
+        }
+    }
+
+    return tmpNum;
+}
+
+// A method used to return the GenTree (usually a GT_LCL_VAR) representing the arguments of the inlined method.
+// Only use this method for the arguments of the inlinee method.
+// !!! Do not use it for the locals of the inlinee method. !!!!
+
+GenTreePtr Compiler::impInlineFetchArg(unsigned lclNum, InlArgInfo* inlArgInfo, InlLclVarInfo* lclVarInfo)
+{
+    /* Get the argument type */
+    var_types lclTyp = lclVarInfo[lclNum].lclTypeInfo;
+
+    GenTreePtr op1 = nullptr;
+
+    // constant or address of local
+    if (inlArgInfo[lclNum].argIsInvariant && !inlArgInfo[lclNum].argHasLdargaOp && !inlArgInfo[lclNum].argHasStargOp)
+    {
+        /* Clone the constant. Note that we cannot directly use argNode
+        in the trees even if inlArgInfo[lclNum].argIsUsed==false as this
+        would introduce aliasing between inlArgInfo[].argNode and
+        impInlineExpr. Then gtFoldExpr() could change it, causing further
+        references to the argument working off of the bashed copy. */
+
+        op1 = gtCloneExpr(inlArgInfo[lclNum].argNode);
+        PREFIX_ASSUME(op1 != nullptr);
+        inlArgInfo[lclNum].argTmpNum = (unsigned)-1; // illegal temp
+    }
+    else if (inlArgInfo[lclNum].argIsLclVar && !inlArgInfo[lclNum].argHasLdargaOp && !inlArgInfo[lclNum].argHasStargOp)
+    {
+        /* Argument is a local variable (of the caller)
+         * Can we re-use the passed argument node? */
+
+        op1                          = inlArgInfo[lclNum].argNode;
+        inlArgInfo[lclNum].argTmpNum = op1->gtLclVarCommon.gtLclNum;
+
+        if (inlArgInfo[lclNum].argIsUsed)
+        {
+            assert(op1->gtOper == GT_LCL_VAR);
+            assert(lclNum == op1->gtLclVar.gtLclILoffs);
+
+            if (!lvaTable[op1->gtLclVarCommon.gtLclNum].lvNormalizeOnLoad())
+            {
+                lclTyp = genActualType(lclTyp);
+            }
+
+            /* Create a new lcl var node - remember the argument lclNum */
+            op1 = gtNewLclvNode(op1->gtLclVarCommon.gtLclNum, lclTyp, op1->gtLclVar.gtLclILoffs);
+        }
+    }
+    else if (inlArgInfo[lclNum].argIsByRefToStructLocal && !inlArgInfo[lclNum].argHasStargOp)
+    {
+        /* Argument is a by-ref address to a struct, a normed struct, or its field.
+           In these cases, don't spill the byref to a local, simply clone the tree and use it.
+           This way we will increase the chance for this byref to be optimized away by
+           a subsequent "dereference" operation.
+
+           From Dev11 bug #139955: Argument node can also be TYP_I_IMPL if we've bashed the tree
+           (in impInlineInitVars()), if the arg has argHasLdargaOp as well as argIsByRefToStructLocal.
+           For example, if the caller is:
+                ldloca.s   V_1  // V_1 is a local struct
+                call       void Test.ILPart::RunLdargaOnPointerArg(int32*)
+           and the callee being inlined has:
+                .method public static void  RunLdargaOnPointerArg(int32* ptrToInts) cil managed
+                    ldarga.s   ptrToInts
+                    call       void Test.FourInts::NotInlined_SetExpectedValuesThroughPointerToPointer(int32**)
+           then we change the argument tree (of "ldloca.s V_1") to TYP_I_IMPL to match the callee signature. We'll
+           soon afterwards reject the inlining anyway, since the tree we return isn't a GT_LCL_VAR.
+        */
+        assert(inlArgInfo[lclNum].argNode->TypeGet() == TYP_BYREF ||
+               inlArgInfo[lclNum].argNode->TypeGet() == TYP_I_IMPL);
+        op1 = gtCloneExpr(inlArgInfo[lclNum].argNode);
+    }
+    else
+    {
+        /* Argument is a complex expression - it must be evaluated into a temp */
+
+        if (inlArgInfo[lclNum].argHasTmp)
+        {
+            assert(inlArgInfo[lclNum].argIsUsed);
+            assert(inlArgInfo[lclNum].argTmpNum < lvaCount);
+
+            /* Create a new lcl var node - remember the argument lclNum */
+            op1 = gtNewLclvNode(inlArgInfo[lclNum].argTmpNum, genActualType(lclTyp));
+
+            /* This is the second or later use of the this argument,
+            so we have to use the temp (instead of the actual arg) */
+            inlArgInfo[lclNum].argBashTmpNode = nullptr;
+        }
+        else
+        {
+            /* First time use */
+            assert(inlArgInfo[lclNum].argIsUsed == false);
+
+            /* Reserve a temp for the expression.
+            * Use a large size node as we may change it later */
+
+            unsigned tmpNum = lvaGrabTemp(true DEBUGARG("Inlining Arg"));
+
+            lvaTable[tmpNum].lvType = lclTyp;
+            assert(lvaTable[tmpNum].lvAddrExposed == 0);
+            if (inlArgInfo[lclNum].argHasLdargaOp)
+            {
+                lvaTable[tmpNum].lvHasLdAddrOp = 1;
+            }
+
+            if (lclVarInfo[lclNum].lclVerTypeInfo.IsStruct())
+            {
+                if (varTypeIsStruct(lclTyp))
+                {
+                    lvaSetStruct(tmpNum, impInlineInfo->lclVarInfo[lclNum].lclVerTypeInfo.GetClassHandle(),
+                                 true /* unsafe value cls check */);
+                }
+                else
+                {
+                    // This is a wrapped primitive.  Make sure the verstate knows that
+                    lvaTable[tmpNum].lvVerTypeInfo = impInlineInfo->lclVarInfo[lclNum].lclVerTypeInfo;
+                }
+            }
+
+            inlArgInfo[lclNum].argHasTmp = true;
+            inlArgInfo[lclNum].argTmpNum = tmpNum;
+
+            // If we require strict exception order, then arguments must
+            // be evaluated in sequence before the body of the inlined method.
+            // So we need to evaluate them to a temp.
+            // Also, if arguments have global references, we need to
+            // evaluate them to a temp before the inlined body as the
+            // inlined body may be modifying the global ref.
+            // TODO-1stClassStructs: We currently do not reuse an existing lclVar
+            // if it is a struct, because it requires some additional handling.
+
+            if (!varTypeIsStruct(lclTyp) && (!inlArgInfo[lclNum].argHasSideEff) && (!inlArgInfo[lclNum].argHasGlobRef))
+            {
+                /* Get a *LARGE* LCL_VAR node */
+                op1 = gtNewLclLNode(tmpNum, genActualType(lclTyp), lclNum);
+
+                /* Record op1 as the very first use of this argument.
+                If there are no further uses of the arg, we may be
+                able to use the actual arg node instead of the temp.
+                If we do see any further uses, we will clear this. */
+                inlArgInfo[lclNum].argBashTmpNode = op1;
+            }
+            else
+            {
+                /* Get a small LCL_VAR node */
+                op1 = gtNewLclvNode(tmpNum, genActualType(lclTyp));
+                /* No bashing of this argument */
+                inlArgInfo[lclNum].argBashTmpNode = nullptr;
+            }
+        }
+    }
+
+    /* Mark the argument as used */
+
+    inlArgInfo[lclNum].argIsUsed = true;
+
+    return op1;
+}
+
+/******************************************************************************
+ Is this the original "this" argument to the call being inlined?
+
+ Note that we do not inline methods with "starg 0", and so we do not need to
+ worry about it.
+*/
+
+BOOL Compiler::impInlineIsThis(GenTreePtr tree, InlArgInfo* inlArgInfo)
+{
+    assert(compIsForInlining());
+    return (tree->gtOper == GT_LCL_VAR && tree->gtLclVarCommon.gtLclNum == inlArgInfo[0].argTmpNum);
+}
+
+//-----------------------------------------------------------------------------
+// This function checks if a dereference in the inlinee can guarantee that
+// the "this" is non-NULL.
+// If we haven't hit a branch or a side effect, and we are dereferencing
+// from 'this' to access a field or make GTF_CALL_NULLCHECK call,
+// then we can avoid a separate null pointer check.
+//
+// "additionalTreesToBeEvaluatedBefore"
+// is the set of pending trees that have not yet been added to the statement list,
+// and which have been removed from verCurrentState.esStack[]
+
+BOOL Compiler::impInlineIsGuaranteedThisDerefBeforeAnySideEffects(GenTreePtr  additionalTreesToBeEvaluatedBefore,
+                                                                  GenTreePtr  variableBeingDereferenced,
+                                                                  InlArgInfo* inlArgInfo)
+{
+    assert(compIsForInlining());
+    assert(opts.OptEnabled(CLFLG_INLINING));
+
+    BasicBlock* block = compCurBB;
+
+    GenTreePtr stmt;
+    GenTreePtr expr;
+
+    if (block != fgFirstBB)
+    {
+        return FALSE;
+    }
+
+    if (!impInlineIsThis(variableBeingDereferenced, inlArgInfo))
+    {
+        return FALSE;
+    }
+
+    if (additionalTreesToBeEvaluatedBefore &&
+        GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS(additionalTreesToBeEvaluatedBefore->gtFlags))
+    {
+        return FALSE;
+    }
+
+    for (stmt = impTreeList->gtNext; stmt; stmt = stmt->gtNext)
+    {
+        expr = stmt->gtStmt.gtStmtExpr;
+
+        if (GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS(expr->gtFlags))
+        {
+            return FALSE;
+        }
+    }
+
+    for (unsigned level = 0; level < verCurrentState.esStackDepth; level++)
+    {
+        unsigned stackTreeFlags = verCurrentState.esStack[level].val->gtFlags;
+        if (GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS(stackTreeFlags))
+        {
+            return FALSE;
+        }
+    }
+
+    return TRUE;
+}
+
+/******************************************************************************/
+// Check the inlining eligibility of this GT_CALL node.
+// Mark GTF_CALL_INLINE_CANDIDATE on the GT_CALL node
+
+// Todo: find a way to record the failure reasons in the IR (or
+// otherwise build tree context) so when we do the inlining pass we
+// can capture these reasons
+
+void Compiler::impMarkInlineCandidate(GenTreePtr             callNode,
+                                      CORINFO_CONTEXT_HANDLE exactContextHnd,
+                                      CORINFO_CALL_INFO*     callInfo)
+{
+    // Let the strategy know there's another call
+    impInlineRoot()->m_inlineStrategy->NoteCall();
+
+    if (!opts.OptEnabled(CLFLG_INLINING))
+    {
+        /* XXX Mon 8/18/2008
+         * This assert is misleading.  The caller does not ensure that we have CLFLG_INLINING set before
+         * calling impMarkInlineCandidate.  However, if this assert trips it means that we're an inlinee and
+         * CLFLG_MINOPT is set.  That doesn't make a lot of sense.  If you hit this assert, work back and
+         * figure out why we did not set MAXOPT for this compile.
+         */
+        assert(!compIsForInlining());
+        return;
+    }
+
+    if (compIsForImportOnly())
+    {
+        // Don't bother creating the inline candidate during verification.
+        // Otherwise the call to info.compCompHnd->canInline will trigger a recursive verification
+        // that leads to the creation of multiple instances of Compiler.
+        return;
+    }
+
+    GenTreeCall* call = callNode->AsCall();
+    InlineResult inlineResult(this, call, nullptr, "impMarkInlineCandidate");
+
+    // Don't inline if not optimizing root method
+    if (opts.compDbgCode)
+    {
+        inlineResult.NoteFatal(InlineObservation::CALLER_DEBUG_CODEGEN);
+        return;
+    }
+
+    // Don't inline if inlining into root method is disabled.
+    if (InlineStrategy::IsNoInline(info.compCompHnd, info.compMethodHnd))
+    {
+        inlineResult.NoteFatal(InlineObservation::CALLER_IS_JIT_NOINLINE);
+        return;
+    }
+
+    // Inlining candidate determination needs to honor only IL tail prefix.
+    // Inlining takes precedence over implicit tail call optimization (if the call is not directly recursive).
+    if (call->IsTailPrefixedCall())
+    {
+        inlineResult.NoteFatal(InlineObservation::CALLSITE_EXPLICIT_TAIL_PREFIX);
+        return;
+    }
+
+    // Tail recursion elimination takes precedence over inlining.
+    // TODO: We may want to do some of the additional checks from fgMorphCall
+    // here to reduce the chance we don't inline a call that won't be optimized
+    // as a fast tail call or turned into a loop.
+    if (gtIsRecursiveCall(call) && call->IsImplicitTailCall())
+    {
+        inlineResult.NoteFatal(InlineObservation::CALLSITE_IMPLICIT_REC_TAIL_CALL);
+        return;
+    }
+
+    if ((call->gtFlags & GTF_CALL_VIRT_KIND_MASK) != GTF_CALL_NONVIRT)
+    {
+        inlineResult.NoteFatal(InlineObservation::CALLSITE_IS_NOT_DIRECT);
+        return;
+    }
+
+    /* Ignore helper calls */
+
+    if (call->gtCallType == CT_HELPER)
+    {
+        inlineResult.NoteFatal(InlineObservation::CALLSITE_IS_CALL_TO_HELPER);
+        return;
+    }
+
+    /* Ignore indirect calls */
+    if (call->gtCallType == CT_INDIRECT)
+    {
+        inlineResult.NoteFatal(InlineObservation::CALLSITE_IS_NOT_DIRECT_MANAGED);
+        return;
+    }
+
+    /* I removed the check for BBJ_THROW.  BBJ_THROW is usually marked as rarely run.  This more or less
+     * restricts the inliner to non-expanding inlines.  I removed the check to allow for non-expanding
+     * inlining in throw blocks.  I should consider the same thing for catch and filter regions. */
+
+    CORINFO_METHOD_HANDLE fncHandle = call->gtCallMethHnd;
+    unsigned              methAttr;
+
+    // Reuse method flags from the original callInfo if possible
+    if (fncHandle == callInfo->hMethod)
+    {
+        methAttr = callInfo->methodFlags;
+    }
+    else
+    {
+        methAttr = info.compCompHnd->getMethodAttribs(fncHandle);
+    }
+
+#ifdef DEBUG
+    if (compStressCompile(STRESS_FORCE_INLINE, 0))
+    {
+        methAttr |= CORINFO_FLG_FORCEINLINE;
+    }
+#endif
+
+    // Check for COMPlus_AggressiveInlining
+    if (compDoAggressiveInlining)
+    {
+        methAttr |= CORINFO_FLG_FORCEINLINE;
+    }
+
+    if (!(methAttr & CORINFO_FLG_FORCEINLINE))
+    {
+        /* Don't bother inline blocks that are in the filter region */
+        if (bbInCatchHandlerILRange(compCurBB))
+        {
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("\nWill not inline blocks that are in the catch handler region\n");
+            }
+
+#endif
+
+            inlineResult.NoteFatal(InlineObservation::CALLSITE_IS_WITHIN_CATCH);
+            return;
+        }
+
+        if (bbInFilterILRange(compCurBB))
+        {
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("\nWill not inline blocks that are in the filter region\n");
+            }
+#endif
+
+            inlineResult.NoteFatal(InlineObservation::CALLSITE_IS_WITHIN_FILTER);
+            return;
+        }
+    }
+
+    /* If the caller's stack frame is marked, then we can't do any inlining. Period. */
+
+    if (opts.compNeedSecurityCheck)
+    {
+        inlineResult.NoteFatal(InlineObservation::CALLER_NEEDS_SECURITY_CHECK);
+        return;
+    }
+
+    /* Check if we tried to inline this method before */
+
+    if (methAttr & CORINFO_FLG_DONT_INLINE)
+    {
+        inlineResult.NoteFatal(InlineObservation::CALLEE_IS_NOINLINE);
+        return;
+    }
+
+    /* Cannot inline synchronized methods */
+
+    if (methAttr & CORINFO_FLG_SYNCH)
+    {
+        inlineResult.NoteFatal(InlineObservation::CALLEE_IS_SYNCHRONIZED);
+        return;
+    }
+
+    /* Do not inline if callee needs security checks (since they would then mark the wrong frame) */
+
+    if (methAttr & CORINFO_FLG_SECURITYCHECK)
+    {
+        inlineResult.NoteFatal(InlineObservation::CALLEE_NEEDS_SECURITY_CHECK);
+        return;
+    }
+
+    InlineCandidateInfo* inlineCandidateInfo = nullptr;
+    impCheckCanInline(call, fncHandle, methAttr, exactContextHnd, &inlineCandidateInfo, &inlineResult);
+
+    if (inlineResult.IsFailure())
+    {
+        return;
+    }
+
+    // The old value should be NULL
+    assert(call->gtInlineCandidateInfo == nullptr);
+
+    call->gtInlineCandidateInfo = inlineCandidateInfo;
+
+    // Mark the call node as inline candidate.
+    call->gtFlags |= GTF_CALL_INLINE_CANDIDATE;
+
+    // Let the strategy know there's another candidate.
+    impInlineRoot()->m_inlineStrategy->NoteCandidate();
+
+    // Since we're not actually inlining yet, and this call site is
+    // still just an inline candidate, there's nothing to report.
+    inlineResult.SetReported();
+}
+
+/******************************************************************************/
+// Returns true if the given intrinsic will be implemented by target-specific
+// instructions
+
+bool Compiler::IsTargetIntrinsic(CorInfoIntrinsics intrinsicId)
+{
+#if defined(_TARGET_AMD64_)
+    switch (intrinsicId)
+    {
+        // Amd64 only has SSE2 instruction to directly compute sqrt/abs.
+        case CORINFO_INTRINSIC_Sqrt:
+        case CORINFO_INTRINSIC_Abs:
+            return true;
+
+        default:
+            return false;
+    }
+#elif defined(_TARGET_ARM64_)
+    switch (intrinsicId)
+    {
+        case CORINFO_INTRINSIC_Sqrt:
+        case CORINFO_INTRINSIC_Abs:
+        case CORINFO_INTRINSIC_Round:
+            return true;
+
+        default:
+            return false;
+    }
+#elif defined(_TARGET_ARM_)
+    switch (intrinsicId)
+    {
+        case CORINFO_INTRINSIC_Sqrt:
+        case CORINFO_INTRINSIC_Abs:
+        case CORINFO_INTRINSIC_Round:
+            return true;
+
+        default:
+            return false;
+    }
+#elif defined(_TARGET_X86_)
+    switch (intrinsicId)
+    {
+        case CORINFO_INTRINSIC_Sin:
+        case CORINFO_INTRINSIC_Cos:
+        case CORINFO_INTRINSIC_Sqrt:
+        case CORINFO_INTRINSIC_Abs:
+        case CORINFO_INTRINSIC_Round:
+            return true;
+
+        default:
+            return false;
+    }
+#else
+    // TODO: This portion of logic is not implemented for other arch.
+    // The reason for returning true is that on all other arch the only intrinsic
+    // enabled are target intrinsics.
+    return true;
+#endif //_TARGET_AMD64_
+}
+
+/******************************************************************************/
+// Returns true if the given intrinsic will be implemented by calling System.Math
+// methods.
+
+bool Compiler::IsIntrinsicImplementedByUserCall(CorInfoIntrinsics intrinsicId)
+{
+    // Currently, if an math intrisic is not implemented by target-specific
+    // intructions, it will be implemented by a System.Math call. In the
+    // future, if we turn to implementing some of them with helper callers,
+    // this predicate needs to be revisited.
+    return !IsTargetIntrinsic(intrinsicId);
+}
+
+bool Compiler::IsMathIntrinsic(CorInfoIntrinsics intrinsicId)
+{
+    switch (intrinsicId)
+    {
+        case CORINFO_INTRINSIC_Sin:
+        case CORINFO_INTRINSIC_Sqrt:
+        case CORINFO_INTRINSIC_Abs:
+        case CORINFO_INTRINSIC_Cos:
+        case CORINFO_INTRINSIC_Round:
+        case CORINFO_INTRINSIC_Cosh:
+        case CORINFO_INTRINSIC_Sinh:
+        case CORINFO_INTRINSIC_Tan:
+        case CORINFO_INTRINSIC_Tanh:
+        case CORINFO_INTRINSIC_Asin:
+        case CORINFO_INTRINSIC_Acos:
+        case CORINFO_INTRINSIC_Atan:
+        case CORINFO_INTRINSIC_Atan2:
+        case CORINFO_INTRINSIC_Log10:
+        case CORINFO_INTRINSIC_Pow:
+        case CORINFO_INTRINSIC_Exp:
+        case CORINFO_INTRINSIC_Ceiling:
+        case CORINFO_INTRINSIC_Floor:
+            return true;
+        default:
+            return false;
+    }
+}
+
+bool Compiler::IsMathIntrinsic(GenTreePtr tree)
+{
+    return (tree->OperGet() == GT_INTRINSIC) && IsMathIntrinsic(tree->gtIntrinsic.gtIntrinsicId);
+}
+/*****************************************************************************/
diff --git a/src/jit/inline.cpp b/src/jit/inline.cpp
new file mode 100644
index 0000000000..deccc0e84b
--- /dev/null
+++ b/src/jit/inline.cpp
@@ -0,0 +1,1640 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "inlinepolicy.h"
+
+// Lookup table for inline description strings
+
+static const char* InlineDescriptions[] = {
+#define INLINE_OBSERVATION(name, type, description, impact, target) description,
+#include "inline.def"
+#undef INLINE_OBSERVATION
+};
+
+// Lookup table for inline targets
+
+static const InlineTarget InlineTargets[] = {
+#define INLINE_OBSERVATION(name, type, description, impact, target) InlineTarget::target,
+#include "inline.def"
+#undef INLINE_OBSERVATION
+};
+
+// Lookup table for inline impacts
+
+static const InlineImpact InlineImpacts[] = {
+#define INLINE_OBSERVATION(name, type, description, impact, target) InlineImpact::impact,
+#include "inline.def"
+#undef INLINE_OBSERVATION
+};
+
+#ifdef DEBUG
+
+//------------------------------------------------------------------------
+// InlIsValidObservation: run a validity check on an inline observation
+//
+// Arguments:
+//    obs - the observation in question
+//
+// Return Value:
+//    true if the observation is valid
+
+bool InlIsValidObservation(InlineObservation obs)
+{
+    return ((obs > InlineObservation::CALLEE_UNUSED_INITIAL) && (obs < InlineObservation::CALLEE_UNUSED_FINAL));
+}
+
+#endif // DEBUG
+
+//------------------------------------------------------------------------
+// InlGetObservationString: get a string describing this inline observation
+//
+// Arguments:
+//    obs - the observation in question
+//
+// Return Value:
+//    string describing the observation
+
+const char* InlGetObservationString(InlineObservation obs)
+{
+    assert(InlIsValidObservation(obs));
+    return InlineDescriptions[static_cast<int>(obs)];
+}
+
+//------------------------------------------------------------------------
+// InlGetTarget: get the target of an inline observation
+//
+// Arguments:
+//    obs - the observation in question
+//
+// Return Value:
+//    enum describing the target
+
+InlineTarget InlGetTarget(InlineObservation obs)
+{
+    assert(InlIsValidObservation(obs));
+    return InlineTargets[static_cast<int>(obs)];
+}
+
+//------------------------------------------------------------------------
+// InlGetTargetString: get a string describing the target of an inline observation
+//
+// Arguments:
+//    obs - the observation in question
+//
+// Return Value:
+//    string describing the target
+
+const char* InlGetTargetString(InlineObservation obs)
+{
+    InlineTarget t = InlGetTarget(obs);
+    switch (t)
+    {
+        case InlineTarget::CALLER:
+            return "caller";
+        case InlineTarget::CALLEE:
+            return "callee";
+        case InlineTarget::CALLSITE:
+            return "call site";
+        default:
+            return "unexpected target";
+    }
+}
+
+//------------------------------------------------------------------------
+// InlGetImpact: get the impact of an inline observation
+//
+// Arguments:
+//    obs - the observation in question
+//
+// Return Value:
+//    enum value describing the impact
+
+InlineImpact InlGetImpact(InlineObservation obs)
+{
+    assert(InlIsValidObservation(obs));
+    return InlineImpacts[static_cast<int>(obs)];
+}
+
+//------------------------------------------------------------------------
+// InlGetImpactString: get a string describing the impact of an inline observation
+//
+// Arguments:
+//    obs - the observation in question
+//
+// Return Value:
+//    string describing the impact
+
+const char* InlGetImpactString(InlineObservation obs)
+{
+    InlineImpact i = InlGetImpact(obs);
+    switch (i)
+    {
+        case InlineImpact::FATAL:
+            return "correctness -- fatal";
+        case InlineImpact::FUNDAMENTAL:
+            return "correctness -- fundamental limitation";
+        case InlineImpact::LIMITATION:
+            return "correctness -- jit limitation";
+        case InlineImpact::PERFORMANCE:
+            return "performance";
+        case InlineImpact::INFORMATION:
+            return "information";
+        default:
+            return "unexpected impact";
+    }
+}
+
+//------------------------------------------------------------------------
+// InlGetCorInfoInlineDecision: translate decision into a CorInfoInline
+//
+// Arguments:
+//    d - the decision in question
+//
+// Return Value:
+//    CorInfoInline value representing the decision
+
+CorInfoInline InlGetCorInfoInlineDecision(InlineDecision d)
+{
+    switch (d)
+    {
+        case InlineDecision::SUCCESS:
+            return INLINE_PASS;
+        case InlineDecision::FAILURE:
+            return INLINE_FAIL;
+        case InlineDecision::NEVER:
+            return INLINE_NEVER;
+        default:
+            assert(!"Unexpected InlineDecision");
+            unreached();
+    }
+}
+
+//------------------------------------------------------------------------
+// InlGetDecisionString: get a string representing this decision
+//
+// Arguments:
+//    d - the decision in question
+//
+// Return Value:
+//    string representing the decision
+
+const char* InlGetDecisionString(InlineDecision d)
+{
+    switch (d)
+    {
+        case InlineDecision::SUCCESS:
+            return "success";
+        case InlineDecision::FAILURE:
+            return "failed this call site";
+        case InlineDecision::NEVER:
+            return "failed this callee";
+        case InlineDecision::CANDIDATE:
+            return "candidate";
+        case InlineDecision::UNDECIDED:
+            return "undecided";
+        default:
+            assert(!"Unexpected InlineDecision");
+            unreached();
+    }
+}
+
+//------------------------------------------------------------------------
+// InlDecisionIsFailure: check if this decision describes a failing inline
+//
+// Arguments:
+//    d - the decision in question
+//
+// Return Value:
+//    true if the inline is definitely a failure
+
+bool InlDecisionIsFailure(InlineDecision d)
+{
+    switch (d)
+    {
+        case InlineDecision::SUCCESS:
+        case InlineDecision::UNDECIDED:
+        case InlineDecision::CANDIDATE:
+            return false;
+        case InlineDecision::FAILURE:
+        case InlineDecision::NEVER:
+            return true;
+        default:
+            assert(!"Unexpected InlineDecision");
+            unreached();
+    }
+}
+
+//------------------------------------------------------------------------
+// InlDecisionIsSuccess: check if this decision describes a sucessful inline
+//
+// Arguments:
+//    d - the decision in question
+//
+// Return Value:
+//    true if the inline is definitely a success
+
+bool InlDecisionIsSuccess(InlineDecision d)
+{
+    switch (d)
+    {
+        case InlineDecision::SUCCESS:
+            return true;
+        case InlineDecision::FAILURE:
+        case InlineDecision::NEVER:
+        case InlineDecision::UNDECIDED:
+        case InlineDecision::CANDIDATE:
+            return false;
+        default:
+            assert(!"Unexpected InlineDecision");
+            unreached();
+    }
+}
+
+//------------------------------------------------------------------------
+// InlDecisionIsNever: check if this decision describes a never inline
+//
+// Arguments:
+//    d - the decision in question
+//
+// Return Value:
+//    true if the inline is a never inline case
+
+bool InlDecisionIsNever(InlineDecision d)
+{
+    switch (d)
+    {
+        case InlineDecision::NEVER:
+            return true;
+        case InlineDecision::FAILURE:
+        case InlineDecision::SUCCESS:
+        case InlineDecision::UNDECIDED:
+        case InlineDecision::CANDIDATE:
+            return false;
+        default:
+            assert(!"Unexpected InlineDecision");
+            unreached();
+    }
+}
+
+//------------------------------------------------------------------------
+// InlDecisionIsCandidate: check if this decision describes a viable candidate
+//
+// Arguments:
+//    d - the decision in question
+//
+// Return Value:
+//    true if this inline still might happen
+
+bool InlDecisionIsCandidate(InlineDecision d)
+{
+    return !InlDecisionIsFailure(d);
+}
+
+//------------------------------------------------------------------------
+// InlDecisionIsDecided: check if this decision has been made
+//
+// Arguments:
+//    d - the decision in question
+//
+// Return Value:
+//    true if this inline has been decided one way or another
+
+bool InlDecisionIsDecided(InlineDecision d)
+{
+    switch (d)
+    {
+        case InlineDecision::NEVER:
+        case InlineDecision::FAILURE:
+        case InlineDecision::SUCCESS:
+            return true;
+        case InlineDecision::UNDECIDED:
+        case InlineDecision::CANDIDATE:
+            return false;
+        default:
+            assert(!"Unexpected InlineDecision");
+            unreached();
+    }
+}
+
+//------------------------------------------------------------------------
+// InlineContext: default constructor
+
+InlineContext::InlineContext(InlineStrategy* strategy)
+    : m_InlineStrategy(strategy)
+    , m_Parent(nullptr)
+    , m_Child(nullptr)
+    , m_Sibling(nullptr)
+    , m_Code(nullptr)
+    , m_ILSize(0)
+    , m_Offset(BAD_IL_OFFSET)
+    , m_Observation(InlineObservation::CALLEE_UNUSED_INITIAL)
+    , m_CodeSizeEstimate(0)
+    , m_Success(true)
+#if defined(DEBUG) || defined(INLINE_DATA)
+    , m_Policy(nullptr)
+    , m_Callee(nullptr)
+    , m_TreeID(0)
+    , m_Ordinal(0)
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+{
+    // Empty
+}
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+//------------------------------------------------------------------------
+// Dump: Dump an InlineContext entry and all descendants to jitstdout
+//
+// Arguments:
+//    indent   - indentation level for this node
+
+void InlineContext::Dump(unsigned indent)
+{
+    // Handle fact that siblings are in reverse order.
+    if (m_Sibling != nullptr)
+    {
+        m_Sibling->Dump(indent);
+    }
+
+    // We may not know callee name in some of the failing cases
+    Compiler*   compiler   = m_InlineStrategy->GetCompiler();
+    const char* calleeName = nullptr;
+
+    if (m_Callee == nullptr)
+    {
+        assert(!m_Success);
+        calleeName = "<unknown>";
+    }
+    else
+    {
+
+#if defined(DEBUG)
+        calleeName = compiler->eeGetMethodFullName(m_Callee);
+#else
+        calleeName         = "callee";
+#endif // defined(DEBUG)
+    }
+
+    mdMethodDef calleeToken = compiler->info.compCompHnd->getMethodDefFromMethod(m_Callee);
+
+    // Dump this node
+    if (m_Parent == nullptr)
+    {
+        // Root method
+        printf("Inlines into %08X %s\n", calleeToken, calleeName);
+    }
+    else
+    {
+        // Inline attempt.
+        const char* inlineReason = InlGetObservationString(m_Observation);
+        const char* inlineResult = m_Success ? "" : "FAILED: ";
+
+        if (m_Offset == BAD_IL_OFFSET)
+        {
+            printf("%*s[%u IL=???? TR=%06u %08X] [%s%s] %s\n", indent, "", m_Ordinal, m_TreeID, calleeToken,
+                   inlineResult, inlineReason, calleeName);
+        }
+        else
+        {
+            IL_OFFSET offset = jitGetILoffs(m_Offset);
+            printf("%*s[%u IL=%04d TR=%06u %08X] [%s%s] %s\n", indent, "", m_Ordinal, offset, m_TreeID, calleeToken,
+                   inlineResult, inlineReason, calleeName);
+        }
+    }
+
+    // Recurse to first child
+    if (m_Child != nullptr)
+    {
+        m_Child->Dump(indent + 2);
+    }
+}
+
+//------------------------------------------------------------------------
+// DumpData: Dump a successful InlineContext entry, detailed data, and
+//  any successful descendant inlines
+//
+// Arguments:
+//    indent   - indentation level for this node
+
+void InlineContext::DumpData(unsigned indent)
+{
+    // Handle fact that siblings are in reverse order.
+    if (m_Sibling != nullptr)
+    {
+        m_Sibling->DumpData(indent);
+    }
+
+    Compiler* compiler = m_InlineStrategy->GetCompiler();
+
+#if defined(DEBUG)
+    const char* calleeName = compiler->eeGetMethodFullName(m_Callee);
+#else
+    const char* calleeName = "callee";
+#endif // defined(DEBUG)
+
+    if (m_Parent == nullptr)
+    {
+        // Root method... cons up a policy so we can display the name
+        InlinePolicy* policy = InlinePolicy::GetPolicy(compiler, true);
+        printf("\nInlines [%u] into \"%s\" [%s]\n", m_InlineStrategy->GetInlineCount(), calleeName, policy->GetName());
+    }
+    else if (m_Success)
+    {
+        const char* inlineReason = InlGetObservationString(m_Observation);
+        printf("%*s%u,\"%s\",\"%s\"", indent, "", m_Ordinal, inlineReason, calleeName);
+        m_Policy->DumpData(jitstdout);
+        printf("\n");
+    }
+
+    // Recurse to first child
+    if (m_Child != nullptr)
+    {
+        m_Child->DumpData(indent + 2);
+    }
+}
+
+//------------------------------------------------------------------------
+// DumpXml: Dump an InlineContext entry and all descendants in xml format
+//
+// Arguments:
+//    file     - file for output
+//    indent   - indentation level for this node
+
+void InlineContext::DumpXml(FILE* file, unsigned indent)
+{
+    // Handle fact that siblings are in reverse order.
+    if (m_Sibling != nullptr)
+    {
+        m_Sibling->DumpXml(file, indent);
+    }
+
+    const bool  isRoot     = m_Parent == nullptr;
+    const bool  hasChild   = m_Child != nullptr;
+    const char* inlineType = m_Success ? "Inline" : "FailedInline";
+    unsigned    newIndent  = indent;
+
+    if (!isRoot)
+    {
+        Compiler* compiler = m_InlineStrategy->GetCompiler();
+
+        mdMethodDef calleeToken = compiler->info.compCompHnd->getMethodDefFromMethod(m_Callee);
+        unsigned    calleeHash  = compiler->info.compCompHnd->getMethodHash(m_Callee);
+
+        const char* inlineReason = InlGetObservationString(m_Observation);
+
+        int offset = -1;
+        if (m_Offset != BAD_IL_OFFSET)
+        {
+            offset = (int)jitGetILoffs(m_Offset);
+        }
+
+        fprintf(file, "%*s<%s>\n", indent, "", inlineType);
+        fprintf(file, "%*s<Token>%u</Token>\n", indent + 2, "", calleeToken);
+        fprintf(file, "%*s<Hash>%u</Hash>\n", indent + 2, "", calleeHash);
+        fprintf(file, "%*s<Offset>%u</Offset>\n", indent + 2, "", offset);
+        fprintf(file, "%*s<Reason>%s</Reason>\n", indent + 2, "", inlineReason);
+
+        // Optionally, dump data about the last inline
+        if ((JitConfig.JitInlineDumpData() != 0) && (this == m_InlineStrategy->GetLastContext()))
+        {
+            fprintf(file, "%*s<Data>", indent + 2, "");
+            m_InlineStrategy->DumpDataContents(file);
+            fprintf(file, "</Data>\n");
+        }
+
+        newIndent = indent + 2;
+    }
+
+    // Handle children
+
+    if (hasChild)
+    {
+        fprintf(file, "%*s<Inlines>\n", newIndent, "");
+        m_Child->DumpXml(file, newIndent + 2);
+        fprintf(file, "%*s</Inlines>\n", newIndent, "");
+    }
+    else
+    {
+        fprintf(file, "%*s<Inlines />\n", newIndent, "");
+    }
+
+    // Close out
+
+    if (!isRoot)
+    {
+        fprintf(file, "%*s</%s>\n", indent, "", inlineType);
+    }
+}
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+//------------------------------------------------------------------------
+// InlineResult: Construct an InlineResult to evaluate a particular call
+// for inlining.
+//
+// Arguments:
+//   compiler      - the compiler instance examining a call for inlining
+//   call          - the call in question
+//   stmt          - statement containing the call (if known)
+//   description   - string describing the context of the decision
+
+InlineResult::InlineResult(Compiler* compiler, GenTreeCall* call, GenTreeStmt* stmt, const char* description)
+    : m_RootCompiler(nullptr)
+    , m_Policy(nullptr)
+    , m_Call(call)
+    , m_InlineContext(nullptr)
+    , m_Caller(nullptr)
+    , m_Callee(nullptr)
+    , m_Description(description)
+    , m_Reported(false)
+{
+    // Set the compiler instance
+    m_RootCompiler = compiler->impInlineRoot();
+
+    // Set the policy
+    const bool isPrejitRoot = false;
+    m_Policy                = InlinePolicy::GetPolicy(m_RootCompiler, isPrejitRoot);
+
+    // Pass along some optional information to the policy.
+    if (stmt != nullptr)
+    {
+        m_InlineContext = stmt->gtInlineContext;
+        m_Policy->NoteContext(m_InlineContext);
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+        m_Policy->NoteOffset(call->gtRawILOffset);
+#else
+        m_Policy->NoteOffset(stmt->gtStmtILoffsx);
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+    }
+
+    // Get method handle for caller. Note we use the
+    // handle for the "immediate" caller here.
+    m_Caller = compiler->info.compMethodHnd;
+
+    // Get method handle for callee, if known
+    if (m_Call->gtCall.gtCallType == CT_USER_FUNC)
+    {
+        m_Callee = m_Call->gtCall.gtCallMethHnd;
+    }
+}
+
+//------------------------------------------------------------------------
+// InlineResult: Construct an InlineResult to evaluate a particular
+// method as a possible inline candidate, while prejtting.
+//
+// Arguments:
+//    compiler    - the compiler instance doing the prejitting
+//    method      - the method in question
+//    description - string describing the context of the decision
+//
+// Notes:
+//    Used only during prejitting to try and pre-identify methods that
+//    cannot be inlined, to help subsequent jit throughput.
+//
+//    We use the inlCallee member to track the method since logically
+//    it is the callee here.
+
+InlineResult::InlineResult(Compiler* compiler, CORINFO_METHOD_HANDLE method, const char* description)
+    : m_RootCompiler(nullptr)
+    , m_Policy(nullptr)
+    , m_Call(nullptr)
+    , m_InlineContext(nullptr)
+    , m_Caller(nullptr)
+    , m_Callee(method)
+    , m_Description(description)
+    , m_Reported(false)
+{
+    // Set the compiler instance
+    m_RootCompiler = compiler->impInlineRoot();
+
+    // Set the policy
+    const bool isPrejitRoot = true;
+    m_Policy                = InlinePolicy::GetPolicy(m_RootCompiler, isPrejitRoot);
+}
+
+//------------------------------------------------------------------------
+// Report: Dump, log, and report information about an inline decision.
+//
+// Notes:
+//    Called (automatically via the InlineResult dtor) when the
+//    inliner is done evaluating a candidate.
+//
+//    Dumps state of the inline candidate, and if a decision was
+//    reached, sends it to the log and reports the decision back to the
+//    EE. Optionally update the method attribute to NOINLINE if
+//    observation and policy warrant.
+//
+//    All this can be suppressed if desired by calling setReported()
+//    before the InlineResult goes out of scope.
+
+void InlineResult::Report()
+{
+    // If we weren't actually inlining, user may have suppressed
+    // reporting via setReported(). If so, do nothing.
+    if (m_Reported)
+    {
+        return;
+    }
+
+    m_Reported = true;
+
+#ifdef DEBUG
+    const char* callee = nullptr;
+
+    // Optionally dump the result
+    if (VERBOSE)
+    {
+        const char* format = "INLINER: during '%s' result '%s' reason '%s' for '%s' calling '%s'\n";
+        const char* caller = (m_Caller == nullptr) ? "n/a" : m_RootCompiler->eeGetMethodFullName(m_Caller);
+
+        callee = (m_Callee == nullptr) ? "n/a" : m_RootCompiler->eeGetMethodFullName(m_Callee);
+
+        JITDUMP(format, m_Description, ResultString(), ReasonString(), caller, callee);
+    }
+
+    // If the inline failed, leave information on the call so we can
+    // later recover what observation lead to the failure.
+    if (IsFailure() && (m_Call != nullptr))
+    {
+        // compiler should have revoked candidacy on the call by now
+        assert((m_Call->gtFlags & GTF_CALL_INLINE_CANDIDATE) == 0);
+
+        m_Call->gtInlineObservation = m_Policy->GetObservation();
+    }
+
+#endif // DEBUG
+
+    // Was the result NEVER? If so we might want to propagate this to
+    // the runtime.
+
+    if (IsNever() && m_Policy->PropagateNeverToRuntime())
+    {
+        // If we know the callee, and if the observation that got us
+        // to this Never inline state is something *other* than
+        // IS_NOINLINE, then we've uncovered a reason why this method
+        // can't ever be inlined. Update the callee method attributes
+        // so that future inline attempts for this callee fail faster.
+
+        InlineObservation obs = m_Policy->GetObservation();
+
+        if ((m_Callee != nullptr) && (obs != InlineObservation::CALLEE_IS_NOINLINE))
+        {
+
+#ifdef DEBUG
+
+            if (VERBOSE)
+            {
+                const char* obsString = InlGetObservationString(obs);
+                JITDUMP("\nINLINER: Marking %s as NOINLINE because of %s\n", callee, obsString);
+            }
+
+#endif // DEBUG
+
+            COMP_HANDLE comp = m_RootCompiler->info.compCompHnd;
+            comp->setMethodAttribs(m_Callee, CORINFO_FLG_BAD_INLINEE);
+        }
+    }
+
+    if (IsDecided())
+    {
+        const char* format = "INLINER: during '%s' result '%s' reason '%s'\n";
+        JITLOG_THIS(m_RootCompiler, (LL_INFO100000, format, m_Description, ResultString(), ReasonString()));
+        COMP_HANDLE comp = m_RootCompiler->info.compCompHnd;
+        comp->reportInliningDecision(m_Caller, m_Callee, Result(), ReasonString());
+    }
+}
+
+//------------------------------------------------------------------------
+// InlineStrategy construtor
+//
+// Arguments
+//    compiler - root compiler instance
+
+InlineStrategy::InlineStrategy(Compiler* compiler)
+    : m_Compiler(compiler)
+    , m_RootContext(nullptr)
+    , m_LastSuccessfulPolicy(nullptr)
+    , m_CallCount(0)
+    , m_CandidateCount(0)
+    , m_AlwaysCandidateCount(0)
+    , m_ForceCandidateCount(0)
+    , m_DiscretionaryCandidateCount(0)
+    , m_UnprofitableCandidateCount(0)
+    , m_ImportCount(0)
+    , m_InlineCount(0)
+    , m_MaxInlineSize(DEFAULT_MAX_INLINE_SIZE)
+    , m_MaxInlineDepth(DEFAULT_MAX_INLINE_DEPTH)
+    , m_InitialTimeBudget(0)
+    , m_InitialTimeEstimate(0)
+    , m_CurrentTimeBudget(0)
+    , m_CurrentTimeEstimate(0)
+    , m_InitialSizeEstimate(0)
+    , m_CurrentSizeEstimate(0)
+    , m_HasForceViaDiscretionary(false)
+#if defined(DEBUG) || defined(INLINE_DATA)
+    , m_MethodXmlFilePosition(0)
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+{
+    // Verify compiler is a root compiler instance
+    assert(m_Compiler->impInlineRoot() == m_Compiler);
+
+#ifdef DEBUG
+
+    // Possibly modify the max inline size.
+    //
+    // Default value of JitInlineSize is the same as our default.
+    // So normally this next line does not change the size.
+    m_MaxInlineSize = JitConfig.JitInlineSize();
+
+    // Up the max size under stress
+    if (m_Compiler->compInlineStress())
+    {
+        m_MaxInlineSize *= 10;
+    }
+
+    // But don't overdo it
+    if (m_MaxInlineSize > IMPLEMENTATION_MAX_INLINE_SIZE)
+    {
+        m_MaxInlineSize = IMPLEMENTATION_MAX_INLINE_SIZE;
+    }
+
+    // Verify: not too small, not too big.
+    assert(m_MaxInlineSize >= ALWAYS_INLINE_SIZE);
+    assert(m_MaxInlineSize <= IMPLEMENTATION_MAX_INLINE_SIZE);
+
+    // Possibly modify the max inline depth
+    //
+    // Default value of JitInlineDepth is the same as our default.
+    // So normally this next line does not change the size.
+    m_MaxInlineDepth = JitConfig.JitInlineDepth();
+
+    // But don't overdo it
+    if (m_MaxInlineDepth > IMPLEMENTATION_MAX_INLINE_DEPTH)
+    {
+        m_MaxInlineDepth = IMPLEMENTATION_MAX_INLINE_DEPTH;
+    }
+
+#endif // DEBUG
+}
+
+//------------------------------------------------------------------------
+// GetRootContext: get the InlineContext for the root method
+//
+// Return Value:
+//    Root context; describes the method being jitted.
+//
+// Note:
+//    Also initializes the jit time estimate and budget.
+
+InlineContext* InlineStrategy::GetRootContext()
+{
+    if (m_RootContext == nullptr)
+    {
+        // Allocate on first demand.
+        m_RootContext = NewRoot();
+
+        // Estimate how long the jit will take if there's no inlining
+        // done to this method.
+        m_InitialTimeEstimate = EstimateTime(m_RootContext);
+        m_CurrentTimeEstimate = m_InitialTimeEstimate;
+
+        // Set the initial budget for inlining. Note this is
+        // deliberately set very high and is intended to catch
+        // only pathological runaway inline cases.
+        m_InitialTimeBudget = BUDGET * m_InitialTimeEstimate;
+        m_CurrentTimeBudget = m_InitialTimeBudget;
+
+        // Estimate the code size  if there's no inlining
+        m_InitialSizeEstimate = EstimateSize(m_RootContext);
+        m_CurrentSizeEstimate = m_InitialSizeEstimate;
+
+        // Sanity check
+        assert(m_CurrentTimeEstimate > 0);
+        assert(m_CurrentSizeEstimate > 0);
+
+        // Cache as the "last" context created
+        m_LastContext = m_RootContext;
+    }
+
+    return m_RootContext;
+}
+
+//------------------------------------------------------------------------
+// NoteAttempt: do bookkeeping for an inline attempt
+//
+// Arguments:
+//    result -- InlineResult for successful inline candidate
+
+void InlineStrategy::NoteAttempt(InlineResult* result)
+{
+    assert(result->IsCandidate());
+    InlineObservation obs = result->GetObservation();
+
+    if (obs == InlineObservation::CALLEE_BELOW_ALWAYS_INLINE_SIZE)
+    {
+        m_AlwaysCandidateCount++;
+    }
+    else if (obs == InlineObservation::CALLEE_IS_FORCE_INLINE)
+    {
+        m_ForceCandidateCount++;
+    }
+    else
+    {
+        m_DiscretionaryCandidateCount++;
+    }
+}
+
+//------------------------------------------------------------------------
+// DumpCsvHeader: dump header for csv inline stats
+//
+// Argument:
+//     fp -- file for dump output
+
+void InlineStrategy::DumpCsvHeader(FILE* fp)
+{
+    fprintf(fp, "\"InlineCalls\",");
+    fprintf(fp, "\"InlineCandidates\",");
+    fprintf(fp, "\"InlineAlways\",");
+    fprintf(fp, "\"InlineForce\",");
+    fprintf(fp, "\"InlineDiscretionary\",");
+    fprintf(fp, "\"InlineUnprofitable\",");
+    fprintf(fp, "\"InlineEarlyFail\",");
+    fprintf(fp, "\"InlineImport\",");
+    fprintf(fp, "\"InlineLateFail\",");
+    fprintf(fp, "\"InlineSuccess\",");
+}
+
+//------------------------------------------------------------------------
+// DumpCsvData: dump data for csv inline stats
+//
+// Argument:
+//     fp -- file for dump output
+
+void InlineStrategy::DumpCsvData(FILE* fp)
+{
+    fprintf(fp, "%u,", m_CallCount);
+    fprintf(fp, "%u,", m_CandidateCount);
+    fprintf(fp, "%u,", m_AlwaysCandidateCount);
+    fprintf(fp, "%u,", m_ForceCandidateCount);
+    fprintf(fp, "%u,", m_DiscretionaryCandidateCount);
+    fprintf(fp, "%u,", m_UnprofitableCandidateCount);
+
+    // Early failures are cases where candates are rejected between
+    // the time the jit invokes the inlinee compiler and the time it
+    // starts to import the inlinee IL.
+    //
+    // So they are "cheaper" that late failures.
+
+    unsigned profitableCandidateCount = m_DiscretionaryCandidateCount - m_UnprofitableCandidateCount;
+
+    unsigned earlyFailCount =
+        m_CandidateCount - m_AlwaysCandidateCount - m_ForceCandidateCount - profitableCandidateCount;
+
+    fprintf(fp, "%u,", earlyFailCount);
+
+    unsigned lateFailCount = m_ImportCount - m_InlineCount;
+
+    fprintf(fp, "%u,", m_ImportCount);
+    fprintf(fp, "%u,", lateFailCount);
+    fprintf(fp, "%u,", m_InlineCount);
+}
+
+//------------------------------------------------------------------------
+// EstimateTime: estimate impact of this inline on the method jit time
+//
+// Arguments:
+//     context - context describing this inline
+//
+// Return Value:
+//    Nominal estimate of jit time.
+
+int InlineStrategy::EstimateTime(InlineContext* context)
+{
+    // Simple linear models based on observations
+    // show time is fairly well predicted by IL size.
+    unsigned ilSize = context->GetILSize();
+
+    // Prediction varies for root and inlines.
+    if (context == m_RootContext)
+    {
+        return EstimateRootTime(ilSize);
+    }
+    else
+    {
+        return EstimateInlineTime(ilSize);
+    }
+}
+
+//------------------------------------------------------------------------
+// EstimteRootTime: estimate jit time for method of this size with
+// no inlining.
+//
+// Arguments:
+//    ilSize - size of the method's IL
+//
+// Return Value:
+//    Nominal estimate of jit time.
+//
+// Notes:
+//    Based on observational data. Time is nominally microseconds.
+
+int InlineStrategy::EstimateRootTime(unsigned ilSize)
+{
+    return 60 + 3 * ilSize;
+}
+
+//------------------------------------------------------------------------
+// EstimteInlineTime: estimate time impact on jitting for an inline
+// of this size.
+//
+// Arguments:
+//    ilSize - size of the method's IL
+//
+// Return Value:
+//    Nominal increase in jit time.
+//
+// Notes:
+//    Based on observational data. Time is nominally microseconds.
+//    Small inlines will make the jit a bit faster.
+
+int InlineStrategy::EstimateInlineTime(unsigned ilSize)
+{
+    return -14 + 2 * ilSize;
+}
+
+//------------------------------------------------------------------------
+// EstimateSize: estimate impact of this inline on the method size
+//
+// Arguments:
+//     context - context describing this inline
+//
+// Return Value:
+//    Nominal estimate of method size (bytes * 10)
+
+int InlineStrategy::EstimateSize(InlineContext* context)
+{
+    // Prediction varies for root and inlines.
+    if (context == m_RootContext)
+    {
+        // Simple linear models based on observations show root method
+        // native code size is fairly well predicted by IL size.
+        //
+        // Model below is for x64 on windows.
+        unsigned ilSize   = context->GetILSize();
+        int      estimate = (1312 + 228 * ilSize) / 10;
+
+        return estimate;
+    }
+    else
+    {
+        // Use context's code size estimate.
+        return context->GetCodeSizeEstimate();
+    }
+}
+
+//------------------------------------------------------------------------
+// NoteOutcome: do bookkeeping for an inline
+//
+// Arguments:
+//    context - context for the inlie
+
+void InlineStrategy::NoteOutcome(InlineContext* context)
+{
+    // Note we can't generally count up failures here -- we only
+    // create contexts for failures in debug modes, and even then
+    // we may not get them all.
+    if (context->IsSuccess())
+    {
+        m_InlineCount++;
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+        // Keep track of the inline targeted for data collection or,
+        // if we don't have one (yet), the last successful inline.
+        bool updateLast = (m_LastSuccessfulPolicy == nullptr) || !m_LastSuccessfulPolicy->IsDataCollectionTarget();
+
+        if (updateLast)
+        {
+            m_LastContext          = context;
+            m_LastSuccessfulPolicy = context->m_Policy;
+        }
+        else
+        {
+            // We only expect one inline to be a data collection
+            // target.
+            assert(!context->m_Policy->IsDataCollectionTarget());
+        }
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+        // Budget update.
+        //
+        // If callee is a force inline, increase budget, provided all
+        // parent contexts are likewise force inlines.
+        //
+        // If callee is discretionary or has a discretionary ancestor,
+        // increase expense.
+
+        InlineContext* currentContext = context;
+        bool           isForceInline  = false;
+
+        while (currentContext != m_RootContext)
+        {
+            InlineObservation observation = currentContext->GetObservation();
+
+            if (observation != InlineObservation::CALLEE_IS_FORCE_INLINE)
+            {
+                if (isForceInline)
+                {
+                    // Interesting case where discretionary inlines pull
+                    // in a force inline...
+                    m_HasForceViaDiscretionary = true;
+                }
+
+                isForceInline = false;
+                break;
+            }
+
+            isForceInline  = true;
+            currentContext = currentContext->GetParent();
+        }
+
+        int timeDelta = EstimateTime(context);
+
+        if (isForceInline)
+        {
+            // Update budget since this inline was forced.  Only allow
+            // budget to increase.
+            if (timeDelta > 0)
+            {
+                m_CurrentTimeBudget += timeDelta;
+            }
+        }
+
+        // Update time estimate.
+        m_CurrentTimeEstimate += timeDelta;
+
+        // Update size estimate.
+        //
+        // Sometimes estimates don't make sense. Don't let the method
+        // size go negative.
+        int sizeDelta = EstimateSize(context);
+
+        if (m_CurrentSizeEstimate + sizeDelta <= 0)
+        {
+            sizeDelta = 0;
+        }
+
+        // Update the code size estimate.
+        m_CurrentSizeEstimate += sizeDelta;
+    }
+}
+
+//------------------------------------------------------------------------
+// BudgetCheck: return true if as inline of this size would exceed the
+// jit time budget for this method
+//
+// Arguments:
+//     ilSize - size of the method's IL
+//
+// Return Value:
+//     true if the inline would go over budget
+
+bool InlineStrategy::BudgetCheck(unsigned ilSize)
+{
+    int timeDelta = EstimateInlineTime(ilSize);
+    return (timeDelta + m_CurrentTimeEstimate > m_CurrentTimeBudget);
+}
+
+//------------------------------------------------------------------------
+// NewRoot: construct an InlineContext for the root method
+//
+// Return Value:
+//    InlineContext for use as the root context
+//
+// Notes:
+//    We leave m_Code as nullptr here (rather than the IL buffer
+//    address of the root method) to preserve existing behavior, which
+//    is to allow one recursive inline.
+
+InlineContext* InlineStrategy::NewRoot()
+{
+    InlineContext* rootContext = new (m_Compiler, CMK_Inlining) InlineContext(this);
+
+    rootContext->m_ILSize = m_Compiler->info.compILCodeSize;
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+    rootContext->m_Callee = m_Compiler->info.compMethodHnd;
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+    return rootContext;
+}
+
+//------------------------------------------------------------------------
+// NewSuccess: construct an InlineContext for a successful inline
+// and link it into the context tree
+//
+// Arguments:
+//    stmt       - statement containing call being inlined
+//    inlineInfo - information about this inline
+//
+// Return Value:
+//    A new InlineContext for statements brought into the method by
+//    this inline.
+
+InlineContext* InlineStrategy::NewSuccess(InlineInfo* inlineInfo)
+{
+    InlineContext* calleeContext = new (m_Compiler, CMK_Inlining) InlineContext(this);
+    GenTree*       stmt          = inlineInfo->iciStmt;
+    BYTE*          calleeIL      = inlineInfo->inlineCandidateInfo->methInfo.ILCode;
+    unsigned       calleeILSize  = inlineInfo->inlineCandidateInfo->methInfo.ILCodeSize;
+    InlineContext* parentContext = stmt->gtStmt.gtInlineContext;
+
+    noway_assert(parentContext != nullptr);
+
+    calleeContext->m_Code   = calleeIL;
+    calleeContext->m_ILSize = calleeILSize;
+    calleeContext->m_Parent = parentContext;
+    // Push on front here will put siblings in reverse lexical
+    // order which we undo in the dumper
+    calleeContext->m_Sibling     = parentContext->m_Child;
+    parentContext->m_Child       = calleeContext;
+    calleeContext->m_Child       = nullptr;
+    calleeContext->m_Offset      = stmt->AsStmt()->gtStmtILoffsx;
+    calleeContext->m_Observation = inlineInfo->inlineResult->GetObservation();
+    calleeContext->m_Success     = true;
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+    InlinePolicy* policy = inlineInfo->inlineResult->GetPolicy();
+
+    calleeContext->m_Policy           = policy;
+    calleeContext->m_CodeSizeEstimate = policy->CodeSizeEstimate();
+    calleeContext->m_Callee           = inlineInfo->fncHandle;
+    // +1 here since we set this before calling NoteOutcome.
+    calleeContext->m_Ordinal = m_InlineCount + 1;
+    // Update offset with more accurate info
+    calleeContext->m_Offset = inlineInfo->inlineResult->GetCall()->gtRawILOffset;
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+#if defined(DEBUG)
+
+    calleeContext->m_TreeID = inlineInfo->inlineResult->GetCall()->gtTreeID;
+
+#endif // defined(DEBUG)
+
+    NoteOutcome(calleeContext);
+
+    return calleeContext;
+}
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+//------------------------------------------------------------------------
+// NewFailure: construct an InlineContext for a failing inline
+// and link it into the context tree
+//
+// Arguments:
+//    stmt         - statement containing the attempted inline
+//    inlineResult - inlineResult for the attempt
+//
+// Return Value:
+//    A new InlineContext for diagnostic purposes, or nullptr if
+//    the desired context could not be created.
+
+InlineContext* InlineStrategy::NewFailure(GenTree* stmt, InlineResult* inlineResult)
+{
+    // Check for a parent context first. We may insert new statements
+    // between the caller and callee that do not pick up either's
+    // context, and these statements may have calls that we later
+    // examine and fail to inline.
+    //
+    // See fgInlinePrependStatements for examples.
+
+    InlineContext* parentContext = stmt->gtStmt.gtInlineContext;
+
+    if (parentContext == nullptr)
+    {
+        // Assume for now this is a failure to inline a call in a
+        // statement inserted between caller and callee. Just ignore
+        // it for the time being.
+
+        return nullptr;
+    }
+
+    InlineContext* failedContext = new (m_Compiler, CMK_Inlining) InlineContext(this);
+
+    failedContext->m_Parent = parentContext;
+    // Push on front here will put siblings in reverse lexical
+    // order which we undo in the dumper
+    failedContext->m_Sibling     = parentContext->m_Child;
+    parentContext->m_Child       = failedContext;
+    failedContext->m_Child       = nullptr;
+    failedContext->m_Offset      = stmt->AsStmt()->gtStmtILoffsx;
+    failedContext->m_Observation = inlineResult->GetObservation();
+    failedContext->m_Callee      = inlineResult->GetCallee();
+    failedContext->m_Success     = false;
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+    // Update offset with more accurate info
+    failedContext->m_Offset = inlineResult->GetCall()->gtRawILOffset;
+
+#endif // #if defined(DEBUG) || defined(INLINE_DATA)
+
+#if defined(DEBUG)
+
+    failedContext->m_TreeID = inlineResult->GetCall()->gtTreeID;
+
+#endif // defined(DEBUG)
+
+    NoteOutcome(failedContext);
+
+    return failedContext;
+}
+
+//------------------------------------------------------------------------
+// Dump: dump description of inline behavior
+
+void InlineStrategy::Dump()
+{
+    m_RootContext->Dump();
+
+    printf("Budget: initialTime=%d, finalTime=%d, initialBudget=%d, currentBudget=%d\n", m_InitialTimeEstimate,
+           m_CurrentTimeEstimate, m_InitialTimeBudget, m_CurrentTimeBudget);
+
+    if (m_CurrentTimeBudget > m_InitialTimeBudget)
+    {
+        printf("Budget: increased by %d because of force inlines\n", m_CurrentTimeBudget - m_InitialTimeBudget);
+    }
+
+    if (m_CurrentTimeEstimate > m_CurrentTimeBudget)
+    {
+        printf("Budget: went over budget by %d\n", m_CurrentTimeEstimate - m_CurrentTimeBudget);
+    }
+
+    if (m_HasForceViaDiscretionary)
+    {
+        printf("Budget: discretionary inline caused a force inline\n");
+    }
+
+    printf("Budget: initialSize=%d, finalSize=%d\n", m_InitialSizeEstimate, m_CurrentSizeEstimate);
+}
+
+// Static to track emission of the inline data header
+
+bool InlineStrategy::s_HasDumpedDataHeader = false;
+
+//------------------------------------------------------------------------
+// DumpData: dump data about the last successful inline into this method
+// in a format suitable for automated analysis.
+
+void InlineStrategy::DumpData()
+{
+    // Is dumping enabled? If not, nothing to do.
+    if (JitConfig.JitInlineDumpData() == 0)
+    {
+        return;
+    }
+
+    // If we're also dumping inline XML, we'll let it dump the data.
+    if (JitConfig.JitInlineDumpXml() != 0)
+    {
+        return;
+    }
+
+    // Don't dump anything if limiting is on and we didn't reach
+    // the limit while inlining.
+    //
+    // This serves to filter out duplicate data.
+    const int limit = JitConfig.JitInlineLimit();
+
+    if ((limit >= 0) && (m_InlineCount < static_cast<unsigned>(limit)))
+    {
+        return;
+    }
+
+    // Dump header, if not already dumped
+    if (!s_HasDumpedDataHeader)
+    {
+        DumpDataHeader(stderr);
+        s_HasDumpedDataHeader = true;
+    }
+
+    // Dump contents
+    DumpDataContents(stderr);
+    fprintf(stderr, "\n");
+}
+
+//------------------------------------------------------------------------
+// DumpDataEnsurePolicyIsSet: ensure m_LastSuccessfulPolicy describes the
+//    inline policy in effect.
+//
+// Notes:
+//    Needed for methods that don't have any successful inlines.
+
+void InlineStrategy::DumpDataEnsurePolicyIsSet()
+{
+    // Cache references to compiler substructures.
+    const Compiler::Info&    info = m_Compiler->info;
+    const Compiler::Options& opts = m_Compiler->opts;
+
+    // If there weren't any successful inlines, we won't have a
+    // successful policy, so fake one up.
+    if (m_LastSuccessfulPolicy == nullptr)
+    {
+        const bool isPrejitRoot = (opts.eeFlags & CORJIT_FLG_PREJIT) != 0;
+        m_LastSuccessfulPolicy  = InlinePolicy::GetPolicy(m_Compiler, isPrejitRoot);
+
+        // Add in a bit of data....
+        const bool isForceInline = (info.compFlags & CORINFO_FLG_FORCEINLINE) != 0;
+        m_LastSuccessfulPolicy->NoteBool(InlineObservation::CALLEE_IS_FORCE_INLINE, isForceInline);
+        m_LastSuccessfulPolicy->NoteInt(InlineObservation::CALLEE_IL_CODE_SIZE, info.compMethodInfo->ILCodeSize);
+    }
+}
+
+//------------------------------------------------------------------------
+// DumpDataHeader: dump header for inline data.
+//
+// Arguments:
+//    file - file for data output
+
+void InlineStrategy::DumpDataHeader(FILE* file)
+{
+    DumpDataEnsurePolicyIsSet();
+    const int limit = JitConfig.JitInlineLimit();
+    fprintf(file, "*** Inline Data: Policy=%s JitInlineLimit=%d ***\n", m_LastSuccessfulPolicy->GetName(), limit);
+    DumpDataSchema(file);
+    fprintf(file, "\n");
+}
+
+//------------------------------------------------------------------------
+// DumpSchema: dump schema for inline data.
+//
+// Arguments:
+//    file - file for data output
+
+void InlineStrategy::DumpDataSchema(FILE* file)
+{
+    DumpDataEnsurePolicyIsSet();
+    fprintf(file, "Method,Version,HotSize,ColdSize,JitTime,SizeEstimate,TimeEstimate");
+    m_LastSuccessfulPolicy->DumpSchema(file);
+}
+
+//------------------------------------------------------------------------
+// DumpDataContents: dump contents of inline data
+//
+// Arguments:
+//    file - file for data output
+
+void InlineStrategy::DumpDataContents(FILE* file)
+{
+    DumpDataEnsurePolicyIsSet();
+
+    // Cache references to compiler substructures.
+    const Compiler::Info&    info = m_Compiler->info;
+    const Compiler::Options& opts = m_Compiler->opts;
+
+    // We'd really like the method identifier to be unique and
+    // durable across crossgen invocations. Not clear how to
+    // accomplish this, so we'll use the token for now.
+    //
+    // Post processing will have to filter out all data from
+    // methods where the root entry appears multiple times.
+    mdMethodDef currentMethodToken = info.compCompHnd->getMethodDefFromMethod(info.compMethodHnd);
+
+    // Convert time spent jitting into microseconds
+    unsigned         microsecondsSpentJitting = 0;
+    unsigned __int64 compCycles               = m_Compiler->getInlineCycleCount();
+    if (compCycles > 0)
+    {
+        double countsPerSec      = CycleTimer::CyclesPerSecond();
+        double counts            = (double)compCycles;
+        microsecondsSpentJitting = (unsigned)((counts / countsPerSec) * 1000 * 1000);
+    }
+
+    fprintf(file, "%08X,%u,%u,%u,%u,%d,%d", currentMethodToken, m_InlineCount, info.compTotalHotCodeSize,
+            info.compTotalColdCodeSize, microsecondsSpentJitting, m_CurrentSizeEstimate / 10, m_CurrentTimeEstimate);
+    m_LastSuccessfulPolicy->DumpData(file);
+}
+
+// Static to track emission of the xml data header
+// and lock to prevent interleaved file writes
+
+bool          InlineStrategy::s_HasDumpedXmlHeader = false;
+CritSecObject InlineStrategy::s_XmlWriterLock;
+
+//------------------------------------------------------------------------
+// DumpXml: dump xml-formatted version of the inline tree.
+//
+// Arguments
+//    file - file for data output
+//    indent - indent level of this element
+
+void InlineStrategy::DumpXml(FILE* file, unsigned indent)
+{
+    if (JitConfig.JitInlineDumpXml() == 0)
+    {
+        return;
+    }
+
+    // Lock to prevent interleaving of trees.
+    CritSecHolder writeLock(s_XmlWriterLock);
+
+    // Dump header
+    if (!s_HasDumpedXmlHeader)
+    {
+        DumpDataEnsurePolicyIsSet();
+
+        fprintf(file, "<?xml version=\"1.0\"?>\n");
+        fprintf(file, "<InlineForest>\n");
+        fprintf(file, "<Policy>%s</Policy>\n", m_LastSuccessfulPolicy->GetName());
+
+        if (JitConfig.JitInlineDumpData() != 0)
+        {
+            fprintf(file, "<DataSchema>");
+            DumpDataSchema(file);
+            fprintf(file, "</DataSchema>\n");
+        }
+
+        fprintf(file, "<Methods>\n");
+        s_HasDumpedXmlHeader = true;
+    }
+
+    // If we're dumping "minimal" Xml, and we didn't do
+    // any inlines into this method, then there's nothing
+    // to emit here.
+    if ((m_InlineCount == 0) && (JitConfig.JitInlineDumpXml() == 2))
+    {
+        return;
+    }
+
+    // Cache references to compiler substructures.
+    const Compiler::Info&    info = m_Compiler->info;
+    const Compiler::Options& opts = m_Compiler->opts;
+
+    const bool isPrejitRoot  = (opts.eeFlags & CORJIT_FLG_PREJIT) != 0;
+    const bool isForceInline = (info.compFlags & CORINFO_FLG_FORCEINLINE) != 0;
+
+    // We'd really like the method identifier to be unique and
+    // durable across crossgen invocations. Not clear how to
+    // accomplish this, so we'll use the token for now.
+    //
+    // Post processing will have to filter out all data from
+    // methods where the root entry appears multiple times.
+    mdMethodDef currentMethodToken = info.compCompHnd->getMethodDefFromMethod(info.compMethodHnd);
+
+    unsigned hash = info.compMethodHash();
+
+    // Convert time spent jitting into microseconds
+    unsigned         microsecondsSpentJitting = 0;
+    unsigned __int64 compCycles               = m_Compiler->getInlineCycleCount();
+    if (compCycles > 0)
+    {
+        double countsPerSec      = CycleTimer::CyclesPerSecond();
+        double counts            = (double)compCycles;
+        microsecondsSpentJitting = (unsigned)((counts / countsPerSec) * 1000 * 1000);
+    }
+
+    // Get method name just for root method, to make it a bit easier
+    // to search for things in the inline xml.
+    const char* methodName = info.compCompHnd->getMethodName(info.compMethodHnd, nullptr);
+
+    // Cheap xml quoting for values. Only < and & are troublemakers,
+    // but change > for symmetry.
+    //
+    // Ok to truncate name, just ensure it's null terminated.
+    char buf[64];
+    strncpy(buf, methodName, sizeof(buf));
+    buf[sizeof(buf) - 1] = 0;
+
+    for (int i = 0; i < sizeof(buf); i++)
+    {
+        switch (buf[i])
+        {
+            case '<':
+                buf[i] = '[';
+                break;
+            case '>':
+                buf[i] = ']';
+                break;
+            case '&':
+                buf[i] = '#';
+                break;
+            default:
+                break;
+        }
+    }
+
+    fprintf(file, "%*s<Method>\n", indent, "");
+    fprintf(file, "%*s<Token>%u</Token>\n", indent + 2, "", currentMethodToken);
+    fprintf(file, "%*s<Hash>%u</Hash>\n", indent + 2, "", hash);
+    fprintf(file, "%*s<Name>%s</Name>\n", indent + 2, "", buf);
+    fprintf(file, "%*s<InlineCount>%u</InlineCount>\n", indent + 2, "", m_InlineCount);
+    fprintf(file, "%*s<HotSize>%u</HotSize>\n", indent + 2, "", info.compTotalHotCodeSize);
+    fprintf(file, "%*s<ColdSize>%u</ColdSize>\n", indent + 2, "", info.compTotalColdCodeSize);
+    fprintf(file, "%*s<JitTime>%u</JitTime>\n", indent + 2, "", microsecondsSpentJitting);
+    fprintf(file, "%*s<SizeEstimate>%u</SizeEstimate>\n", indent + 2, "", m_CurrentSizeEstimate / 10);
+    fprintf(file, "%*s<TimeEstimate>%u</TimeEstimate>\n", indent + 2, "", m_CurrentTimeEstimate);
+
+    // Root context will be null if we're not optimizing the method.
+    //
+    // Note there are cases of this in mscorlib even in release builds,
+    // eg Task.NotifyDebuggerOfWaitCompletion.
+    //
+    // For such methods there aren't any inlines.
+    if (m_RootContext != nullptr)
+    {
+        m_RootContext->DumpXml(file, indent + 2);
+    }
+    else
+    {
+        fprintf(file, "%*s<Inlines/>\n", indent + 2, "");
+    }
+
+    fprintf(file, "%*s</Method>\n", indent, "");
+}
+
+//------------------------------------------------------------------------
+// FinalizeXml: finalize the xml-formatted version of the inline tree.
+//
+// Arguments
+//    file - file for data output
+
+void InlineStrategy::FinalizeXml(FILE* file)
+{
+    // If we dumped the header, dump a footer
+    if (s_HasDumpedXmlHeader)
+    {
+        fprintf(file, "</Methods>\n");
+        fprintf(file, "</InlineForest>\n");
+        fflush(file);
+
+        // Workaroud compShutdown getting called twice.
+        s_HasDumpedXmlHeader = false;
+    }
+
+    // Finalize reading inline xml
+    ReplayPolicy::FinalizeXml();
+}
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+//------------------------------------------------------------------------
+// IsNoInline: allow strategy to disable inlining in a method
+//
+// Arguments:
+//    info -- compiler interface from the EE
+//    method -- handle for the root method
+//
+// Notes:
+//    Only will return true in debug or special release builds.
+//    Expects JitNoInlineRange to be set to the hashes of methods
+//    where inlining is disabled.
+
+bool InlineStrategy::IsNoInline(ICorJitInfo* info, CORINFO_METHOD_HANDLE method)
+{
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+    static ConfigMethodRange range;
+    const wchar_t*           noInlineRange = JitConfig.JitNoInlineRange();
+
+    if (noInlineRange == nullptr)
+    {
+        return false;
+    }
+
+    // If we have a config string we have at least one entry.  Count
+    // number of spaces in our config string to see if there are
+    // more. Number of ranges we need is 2x that value.
+    unsigned entryCount = 1;
+    for (const wchar_t* p = noInlineRange; *p != 0; p++)
+    {
+        if (*p == L' ')
+        {
+            entryCount++;
+        }
+    }
+
+    range.EnsureInit(noInlineRange, 2 * entryCount);
+    assert(!range.Error());
+    return range.Contains(info, method);
+
+#else
+
+    return false;
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+}
diff --git a/src/jit/inline.def b/src/jit/inline.def
new file mode 100644
index 0000000000..2c933fb8a9
--- /dev/null
+++ b/src/jit/inline.def
@@ -0,0 +1,176 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// Macro template for inline observations
+//
+// INLINE_OBSERVATION(name, type, description, impact, target)
+//
+// name will be used to create an InlineObservation enum member
+//    (enum name prepends scope, eg CALLEE_MARKED_AS_SKIPPED)
+// type is the data type for the observation
+// description is a user string for diagnostics
+// impact is one of the members of InlineImpact
+// target is one of the members of InlineTarget
+//
+// Note: the impact classification is work in progress.
+//
+// Some subset of the FATAL cases here can be refined to SERIOUS,
+// LIMITATION, or PERFORMANCE. While the refined observations may
+// eventually veto inlining, the jit can safely keep making more 
+// observations.
+
+// ------ Initial Sentinel ------- 
+
+INLINE_OBSERVATION(UNUSED_INITIAL,            bool,   "unused initial observation",    FATAL,       CALLEE)
+
+// ------ Callee Fatal ------- 
+
+INLINE_OBSERVATION(BAD_ARGUMENT_NUMBER,       bool,   "invalid argument number",       FATAL,       CALLEE)
+INLINE_OBSERVATION(BAD_LOCAL_NUMBER,          bool,   "invalid local number",          FATAL,       CALLEE)
+INLINE_OBSERVATION(CLASS_INIT_FAILURE,        bool,   "class init failed",             FATAL,       CALLEE)
+INLINE_OBSERVATION(COMPILATION_ERROR,         bool,   "compilation error",             FATAL,       CALLEE)
+INLINE_OBSERVATION(EXCEEDS_THRESHOLD,         bool,   "exceeds profit threshold",      FATAL,       CALLEE)
+INLINE_OBSERVATION(HAS_DELEGATE_INVOKE,       bool,   "delegate invoke",               FATAL,       CALLEE)
+INLINE_OBSERVATION(HAS_EH,                    bool,   "has exception handling",        FATAL,       CALLEE)
+INLINE_OBSERVATION(HAS_ENDFILTER,             bool,   "has endfilter",                 FATAL,       CALLEE)
+INLINE_OBSERVATION(HAS_ENDFINALLY,            bool,   "has endfinally",                FATAL,       CALLEE)
+INLINE_OBSERVATION(HAS_LEAVE,                 bool,   "has leave",                     FATAL,       CALLEE)
+INLINE_OBSERVATION(HAS_MANAGED_VARARGS,       bool,   "managed varargs",               FATAL,       CALLEE)
+INLINE_OBSERVATION(HAS_NATIVE_VARARGS,        bool,   "native varargs",                FATAL,       CALLEE)
+INLINE_OBSERVATION(HAS_NO_BODY,               bool,   "has no body",                   FATAL,       CALLEE)
+INLINE_OBSERVATION(HAS_NULL_FOR_LDELEM,       bool,   "has null pointer for ldelem",   FATAL,       CALLEE)
+INLINE_OBSERVATION(HAS_PINNED_LOCALS,         bool,   "has pinned locals",             FATAL,       CALLEE)
+INLINE_OBSERVATION(IS_ARRAY_METHOD,           bool,   "is array method",               FATAL,       CALLEE)
+INLINE_OBSERVATION(IS_GENERIC_VIRTUAL,        bool,   "generic virtual",               FATAL,       CALLEE)
+INLINE_OBSERVATION(IS_JIT_NOINLINE,           bool,   "noinline per JitNoinline",      FATAL,       CALLEE)
+INLINE_OBSERVATION(IS_NOINLINE,               bool,   "noinline per IL/cached result", FATAL,       CALLEE)
+INLINE_OBSERVATION(IS_SYNCHRONIZED,           bool,   "is synchronized",               FATAL,       CALLEE)
+INLINE_OBSERVATION(IS_VM_NOINLINE,            bool,   "noinline per VM",               FATAL,       CALLEE)
+INLINE_OBSERVATION(LACKS_RETURN,              bool,   "no return opcode",              FATAL,       CALLEE)
+INLINE_OBSERVATION(LDFLD_NEEDS_HELPER,        bool,   "ldfld needs helper",            FATAL,       CALLEE)
+INLINE_OBSERVATION(LOG_REPLAY_REJECT,         bool,   "rejected by log replay",        FATAL,       CALLEE)
+INLINE_OBSERVATION(MARKED_AS_SKIPPED,         bool,   "skipped by complus request",    FATAL,       CALLEE)
+INLINE_OBSERVATION(MAXSTACK_TOO_BIG,          bool,   "maxstack too big"  ,            FATAL,       CALLEE)
+INLINE_OBSERVATION(NEEDS_SECURITY_CHECK,      bool,   "needs security check",          FATAL,       CALLEE)
+INLINE_OBSERVATION(NO_METHOD_INFO,            bool,   "cannot get method info",        FATAL,       CALLEE)
+INLINE_OBSERVATION(NOT_PROFITABLE_INLINE,     bool,   "unprofitable inline",           FATAL,       CALLEE)
+INLINE_OBSERVATION(RANDOM_REJECT,             bool,   "random reject",                 FATAL,       CALLEE)
+INLINE_OBSERVATION(STACK_CRAWL_MARK,          bool,   "uses stack crawl mark",         FATAL,       CALLEE)
+INLINE_OBSERVATION(STFLD_NEEDS_HELPER,        bool,   "stfld needs helper",            FATAL,       CALLEE)
+INLINE_OBSERVATION(THROW_WITH_INVALID_STACK,  bool,   "throw with invalid stack",      FATAL,       CALLEE)
+INLINE_OBSERVATION(TOO_MANY_ARGUMENTS,        bool,   "too many arguments",            FATAL,       CALLEE)
+INLINE_OBSERVATION(TOO_MANY_LOCALS,           bool,   "too many locals",               FATAL,       CALLEE)
+INLINE_OBSERVATION(EXPLICIT_TAIL_PREFIX,      bool,   "explicit tail prefix in callee",FATAL,       CALLEE)
+
+// ------ Callee Performance ------- 
+
+INLINE_OBSERVATION(LDFLD_STATIC_VALUECLASS,   bool,   "ldsfld of value class",         PERFORMANCE, CALLEE)
+INLINE_OBSERVATION(TOO_MANY_BASIC_BLOCKS,     bool,   "too many basic blocks",         PERFORMANCE, CALLEE)
+INLINE_OBSERVATION(TOO_MUCH_IL,               bool,   "too many il bytes",             PERFORMANCE, CALLEE)
+
+// ------ Callee Information ------- 
+
+INLINE_OBSERVATION(ARG_FEEDS_CONSTANT_TEST,   bool,   "argument feeds constant test",  INFORMATION, CALLEE)
+INLINE_OBSERVATION(ARG_FEEDS_RANGE_CHECK,     bool,   "argument feeds range check",    INFORMATION, CALLEE)
+INLINE_OBSERVATION(BEGIN_OPCODE_SCAN,         bool,   "prepare to look at opcodes",    INFORMATION, CALLEE)
+INLINE_OBSERVATION(BELOW_ALWAYS_INLINE_SIZE,  bool,   "below ALWAYS_INLINE size",      INFORMATION, CALLEE)
+INLINE_OBSERVATION(CLASS_PROMOTABLE,          bool,   "promotable value class",        INFORMATION, CALLEE)
+INLINE_OBSERVATION(DOES_NOT_RETURN,           bool,   "does not return",               INFORMATION, CALLEE)
+INLINE_OBSERVATION(END_OPCODE_SCAN,           bool,   "done looking at opcodes",       INFORMATION, CALLEE)
+INLINE_OBSERVATION(HAS_SIMD,                  bool,   "has SIMD arg, local, or ret",   INFORMATION, CALLEE)
+INLINE_OBSERVATION(HAS_SWITCH,                bool,   "has switch",                    INFORMATION, CALLEE)
+INLINE_OBSERVATION(IL_CODE_SIZE,              int,    "number of bytes of IL",         INFORMATION, CALLEE)
+INLINE_OBSERVATION(IS_CLASS_CTOR,             bool,   "class constructor",             INFORMATION, CALLEE)
+INLINE_OBSERVATION(IS_DISCRETIONARY_INLINE,   bool,   "can inline, check heuristics",  INFORMATION, CALLEE)
+INLINE_OBSERVATION(IS_FORCE_INLINE,           bool,   "aggressive inline attribute",   INFORMATION, CALLEE)
+INLINE_OBSERVATION(IS_INSTANCE_CTOR,          bool,   "instance constructor",          INFORMATION, CALLEE)
+INLINE_OBSERVATION(IS_PROFITABLE_INLINE,      bool,   "profitable inline",             INFORMATION, CALLEE)
+INLINE_OBSERVATION(IS_SIZE_DECREASING_INLINE, bool,   "size decreasing inline",        INFORMATION, CALLEE)
+INLINE_OBSERVATION(LOG_REPLAY_ACCEPT,         bool,   "accepted by log replay",        INFORMATION, CALLEE)
+INLINE_OBSERVATION(LOOKS_LIKE_WRAPPER,        bool,   "thin wrapper around a call",    INFORMATION, CALLEE)
+INLINE_OBSERVATION(MAXSTACK,                  int,    "maxstack",                      INFORMATION, CALLEE)
+INLINE_OBSERVATION(OPCODE,                    int,    "next opcode in IL stream",      INFORMATION, CALLEE)
+INLINE_OBSERVATION(OPCODE_NORMED,             int,    "next opcode in IL stream",      INFORMATION, CALLEE)
+INLINE_OBSERVATION(NUMBER_OF_ARGUMENTS,       int,    "number of arguments",           INFORMATION, CALLEE)
+INLINE_OBSERVATION(NUMBER_OF_BASIC_BLOCKS,    int,    "number of basic blocks",        INFORMATION, CALLEE)
+INLINE_OBSERVATION(NUMBER_OF_LOCALS,          int,    "number of locals",              INFORMATION, CALLEE)
+INLINE_OBSERVATION(RANDOM_ACCEPT,             bool,   "random accept",                 INFORMATION, CALLEE)
+INLINE_OBSERVATION(UNSUPPORTED_OPCODE,        bool,   "unsupported opcode",            INFORMATION, CALLEE)
+
+// ------ Caller Correctness -------
+
+INLINE_OBSERVATION(DEBUG_CODEGEN,             bool,   "debug codegen",                 FATAL,       CALLER)
+INLINE_OBSERVATION(IS_JIT_NOINLINE,           bool,   "noinline per JitNoInlineRange", FATAL,       CALLER)
+INLINE_OBSERVATION(NEEDS_SECURITY_CHECK,      bool,   "needs security check",          FATAL,       CALLER)
+
+// ------ Caller Information -------
+
+INLINE_OBSERVATION(HAS_NEWARRAY,              bool,   "has newarray",                  INFORMATION, CALLER)
+INLINE_OBSERVATION(HAS_NEWOBJ,                bool,   "has newobj",                    INFORMATION, CALLER)
+
+// ------ Call Site Correctness ------- 
+
+INLINE_OBSERVATION(ARG_HAS_NULL_THIS,         bool,   "this pointer argument is null", FATAL,       CALLSITE)
+INLINE_OBSERVATION(ARG_HAS_SIDE_EFFECT,       bool,   "argument has side effect",      FATAL,       CALLSITE)
+INLINE_OBSERVATION(ARG_IS_MKREFANY,           bool,   "argument is mkrefany",          FATAL,       CALLSITE)
+INLINE_OBSERVATION(ARG_NO_BASH_TO_INT,        bool,   "argument can't bash to int",    FATAL,       CALLSITE)
+INLINE_OBSERVATION(ARG_NO_BASH_TO_REF,        bool,   "argument can't bash to ref",    FATAL,       CALLSITE)
+INLINE_OBSERVATION(ARG_TYPES_INCOMPATIBLE,    bool,   "argument types incompatible",   FATAL,       CALLSITE)
+INLINE_OBSERVATION(CANT_EMBED_PINVOKE_COOKIE, bool,   "can't embed pinvoke cookie",    FATAL,       CALLSITE)
+INLINE_OBSERVATION(CANT_EMBED_VARARGS_COOKIE, bool,   "can't embed varargs cookie",    FATAL,       CALLSITE)
+INLINE_OBSERVATION(CLASS_INIT_FAILURE_SPEC,   bool,   "speculative class init failed", FATAL,       CALLSITE)
+INLINE_OBSERVATION(COMPILATION_ERROR,         bool,   "compilation error",             FATAL,       CALLSITE)
+INLINE_OBSERVATION(COMPILATION_FAILURE,       bool,   "failed to compile",             FATAL,       CALLSITE)
+INLINE_OBSERVATION(CONDITIONAL_THROW,         bool,   "conditional throw",             FATAL,       CALLSITE)
+INLINE_OBSERVATION(CROSS_BOUNDARY_CALLI,      bool,   "cross-boundary calli",          FATAL,       CALLSITE)
+INLINE_OBSERVATION(CROSS_BOUNDARY_SECURITY,   bool,   "cross-boundary security check", FATAL,       CALLSITE)
+INLINE_OBSERVATION(EXCEEDS_THRESHOLD,         bool,   "exceeds profit threshold",      FATAL,       CALLSITE)
+INLINE_OBSERVATION(EXPLICIT_TAIL_PREFIX,      bool,   "explicit tail prefix",          FATAL,       CALLSITE)
+INLINE_OBSERVATION(GENERIC_DICTIONARY_LOOKUP, bool,   "runtime dictionary lookup",     FATAL,       CALLSITE)
+INLINE_OBSERVATION(HAS_CALL_VIA_LDVIRTFTN,    bool,   "call via ldvirtftn",            FATAL,       CALLSITE)
+INLINE_OBSERVATION(HAS_COMPLEX_HANDLE,        bool,   "complex handle access",         FATAL,       CALLSITE)
+INLINE_OBSERVATION(HAS_LDSTR_RESTRICTION,     bool,   "has ldstr VM restriction",      FATAL,       CALLSITE)
+INLINE_OBSERVATION(IMPLICIT_REC_TAIL_CALL,    bool,   "implicit recursive tail call",  FATAL,       CALLSITE)
+INLINE_OBSERVATION(IS_CALL_TO_HELPER,         bool,   "target is helper",              FATAL,       CALLSITE)
+INLINE_OBSERVATION(IS_NOT_DIRECT,             bool,   "target not direct",             FATAL,       CALLSITE)
+INLINE_OBSERVATION(IS_NOT_DIRECT_MANAGED,     bool,   "target not direct managed",     FATAL,       CALLSITE)
+INLINE_OBSERVATION(IS_RECURSIVE,              bool,   "recursive",                     FATAL,       CALLSITE)
+INLINE_OBSERVATION(IS_TOO_DEEP,               bool,   "too deep",                      FATAL,       CALLSITE)
+INLINE_OBSERVATION(IS_VIRTUAL,                bool,   "virtual",                       FATAL,       CALLSITE)
+INLINE_OBSERVATION(IS_VM_NOINLINE,            bool,   "noinline per VM",               FATAL,       CALLSITE)
+INLINE_OBSERVATION(IS_WITHIN_CATCH,           bool,   "within catch region",           FATAL,       CALLSITE)
+INLINE_OBSERVATION(IS_WITHIN_FILTER,          bool,   "within filterregion",           FATAL,       CALLSITE)
+INLINE_OBSERVATION(LDARGA_NOT_LOCAL_VAR,      bool,   "ldarga not on local var",       FATAL,       CALLSITE)
+INLINE_OBSERVATION(LDFLD_NEEDS_HELPER,        bool,   "ldfld needs helper",            FATAL,       CALLSITE)
+INLINE_OBSERVATION(LDVIRTFN_ON_NON_VIRTUAL,   bool,   "ldvirtfn on non-virtual",       FATAL,       CALLSITE)
+INLINE_OBSERVATION(LOG_REPLAY_REJECT,         bool,   "rejected by log replay",        FATAL,       CALLSITE)
+INLINE_OBSERVATION(NOT_CANDIDATE,             bool,   "not inline candidate",          FATAL,       CALLSITE)
+INLINE_OBSERVATION(NOT_PROFITABLE_INLINE,     bool,   "unprofitable inline",           FATAL,       CALLSITE)
+INLINE_OBSERVATION(OVER_BUDGET,               bool,   "inline exceeds budget",         FATAL,       CALLSITE)
+INLINE_OBSERVATION(OVER_INLINE_LIMIT,         bool,   "limited by JitInlineLimit",     FATAL,       CALLSITE)
+INLINE_OBSERVATION(RANDOM_REJECT,             bool,   "random reject",                 FATAL,       CALLSITE)
+INLINE_OBSERVATION(REQUIRES_SAME_THIS,        bool,   "requires same this",            FATAL,       CALLSITE)
+INLINE_OBSERVATION(RETURN_TYPE_MISMATCH,      bool,   "return type mismatch",          FATAL,       CALLSITE)
+INLINE_OBSERVATION(STFLD_NEEDS_HELPER,        bool,   "stfld needs helper",            FATAL,       CALLSITE)
+INLINE_OBSERVATION(TOO_MANY_LOCALS,           bool,   "too many locals",               FATAL,       CALLSITE)
+
+// ------ Call Site Performance ------- 
+
+
+// ------ Call Site Information ------- 
+
+INLINE_OBSERVATION(CONSTANT_ARG_FEEDS_TEST,   bool,   "constant argument feeds test",  INFORMATION, CALLSITE)
+INLINE_OBSERVATION(DEPTH,                     int,    "depth",                         INFORMATION, CALLSITE)
+INLINE_OBSERVATION(FREQUENCY,                 int,    "rough call site frequency",     INFORMATION, CALLSITE)
+INLINE_OBSERVATION(IS_PROFITABLE_INLINE,      bool,   "profitable inline",             INFORMATION, CALLSITE)
+INLINE_OBSERVATION(IS_SAME_THIS,              bool,   "same this as root caller",      INFORMATION, CALLSITE)
+INLINE_OBSERVATION(IS_SIZE_DECREASING_INLINE, bool,   "size decreasing inline",        INFORMATION, CALLSITE)
+INLINE_OBSERVATION(LOG_REPLAY_ACCEPT,         bool,   "accepted by log replay",        INFORMATION, CALLSITE)
+INLINE_OBSERVATION(RANDOM_ACCEPT,             bool,   "random accept",                 INFORMATION, CALLSITE)
+INLINE_OBSERVATION(WEIGHT,                    int,    "call site frequency",           INFORMATION, CALLSITE)
+
+// ------ Final Sentinel ------- 
+
+INLINE_OBSERVATION(UNUSED_FINAL,              bool,   "unused final observation",      FATAL,       CALLEE)
+
diff --git a/src/jit/inline.h b/src/jit/inline.h
new file mode 100644
index 0000000000..e3d5750754
--- /dev/null
+++ b/src/jit/inline.h
@@ -0,0 +1,894 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// Inlining Support
+//
+// This file contains enum and class definitions and related
+// information that the jit uses to make inlining decisions.
+//
+// -- ENUMS --
+//
+// InlineCallFrequency - rough assessment of call site frequency
+// InlineDecision      - overall decision made about an inline
+// InlineTarget        - target of a particular observation
+// InlineImpact        - impact of a particular observation
+// InlineObservation   - facts observed when considering an inline
+//
+// -- CLASSES --
+//
+// InlineResult        - accumulates observations, consults with policy
+// InlineCandidateInfo - basic information needed for inlining
+// InlArgInfo          - information about a candidate's argument
+// InlLclVarInfo       - information about a candidate's local variable
+// InlineInfo          - detailed information needed for inlining
+// InlineContext       - class, remembers what inlines happened
+// InlinePolicy        - class, determines policy for inlining
+// InlineStrategy      - class, determines overall inline strategy
+//
+// Enums are used throughout to provide various descriptions.
+//
+// There are 4 sitations where inline candidacy is evaluated.  In each
+// case an InlineResult is allocated on the stack to collect
+// information about the inline candidate.  Each InlineResult refers
+// to an InlinePolicy.
+//
+// 1. Importer Candidate Screen (impMarkInlineCandidate)
+//
+// Creates: InlineCandidateInfo
+//
+// During importing, the IL being imported is scanned to identify
+// inline candidates. This happens both when the root method is being
+// imported as well as when prospective inlines are being imported.
+// Candidates are marked in the IL and given an InlineCandidateInfo.
+//
+// 2. Inlining Optimization Pass -- candidates (fgInline)
+//
+// Creates / Uses: InlineContext
+// Creates: InlineInfo, InlArgInfo, InlLocalVarInfo
+//
+// During the inlining optimation pass, each candidate is further
+// analyzed. Viable candidates will eventually inspire creation of an
+// InlineInfo and a set of InlArgInfos (for call arguments) and
+// InlLocalVarInfos (for callee locals).
+//
+// The analysis will also examine InlineContexts from relevant prior
+// inlines. If the inline is successful, a new InlineContext will be
+// created to remember this inline. In DEBUG builds, failing inlines
+// also create InlineContexts.
+//
+// 3. Inlining Optimization Pass -- non-candidates (fgNoteNotInlineCandidate)
+//
+// Creates / Uses: InlineContext
+//
+// In DEBUG, the jit also searches for non-candidate calls to try
+// and get a complete picture of the set of failed inlines.
+//
+// 4. Prejit suitability screen (compCompileHelper)
+//
+// When prejitting, each method is scanned to see if it is a viable
+// inline candidate.
+
+#ifndef _INLINE_H_
+#define _INLINE_H_
+
+#include "jit.h"
+#include "gentree.h"
+
+// Implementation limits
+
+#ifndef LEGACY_BACKEND
+const unsigned int MAX_INL_ARGS = 32; // does not include obj pointer
+const unsigned int MAX_INL_LCLS = 32;
+#else  // LEGACY_BACKEND
+const unsigned int MAX_INL_ARGS = 10; // does not include obj pointer
+const unsigned int MAX_INL_LCLS = 8;
+#endif // LEGACY_BACKEND
+
+// Flags lost during inlining.
+
+#define CORJIT_FLG_LOST_WHEN_INLINING                                                                                  \
+    (CORJIT_FLG_BBOPT | CORJIT_FLG_BBINSTR | CORJIT_FLG_PROF_ENTERLEAVE | CORJIT_FLG_DEBUG_EnC | CORJIT_FLG_DEBUG_INFO)
+
+// Forward declarations
+
+class InlineStrategy;
+
+// InlineCallsiteFrequency gives a rough classification of how
+// often a call site will be excuted at runtime.
+
+enum class InlineCallsiteFrequency
+{
+    UNUSED, // n/a
+    RARE,   // once in a blue moon
+    BORING, // normal call site
+    WARM,   // seen during profiling
+    LOOP,   // in a loop
+    HOT     // very frequent
+};
+
+// InlineDecision describes the various states the jit goes through when
+// evaluating an inline candidate. It is distinct from CorInfoInline
+// because it must capture internal states that don't get reported back
+// to the runtime.
+
+enum class InlineDecision
+{
+    UNDECIDED,
+    CANDIDATE,
+    SUCCESS,
+    FAILURE,
+    NEVER
+};
+
+// Translate a decision into a CorInfoInline for reporting back to the runtime.
+
+CorInfoInline InlGetCorInfoInlineDecision(InlineDecision d);
+
+// Get a string describing this InlineDecision
+
+const char* InlGetDecisionString(InlineDecision d);
+
+// True if this InlineDecsion describes a failing inline
+
+bool InlDecisionIsFailure(InlineDecision d);
+
+// True if this decision describes a successful inline
+
+bool InlDecisionIsSuccess(InlineDecision d);
+
+// True if this InlineDecision is a never inline decision
+
+bool InlDecisionIsNever(InlineDecision d);
+
+// True if this InlineDecision describes a viable candidate
+
+bool InlDecisionIsCandidate(InlineDecision d);
+
+// True if this InlineDecsion describes a decision
+
+bool InlDecisionIsDecided(InlineDecision d);
+
+// InlineTarget describes the possible targets of an inline observation.
+
+enum class InlineTarget
+{
+    CALLEE,  // observation applies to all calls to this callee
+    CALLER,  // observation applies to all calls made by this caller
+    CALLSITE // observation applies to a specific call site
+};
+
+// InlineImpact describe the possible impact of an inline observation.
+
+enum class InlineImpact
+{
+    FATAL,       // inlining impossible, unsafe to evaluate further
+    FUNDAMENTAL, // inlining impossible for fundamental reasons, deeper exploration safe
+    LIMITATION,  // inlining impossible because of jit limitations, deeper exploration safe
+    PERFORMANCE, // inlining inadvisable because of performance concerns
+    INFORMATION  // policy-free observation to provide data for later decision making
+};
+
+// InlineObservation describes the set of possible inline observations.
+
+enum class InlineObservation
+{
+#define INLINE_OBSERVATION(name, type, description, impact, scope) scope##_##name,
+#include "inline.def"
+#undef INLINE_OBSERVATION
+};
+
+#ifdef DEBUG
+
+// Sanity check the observation value
+
+bool InlIsValidObservation(InlineObservation obs);
+
+#endif // DEBUG
+
+// Get a string describing this observation
+
+const char* InlGetObservationString(InlineObservation obs);
+
+// Get a string describing the target of this observation
+
+const char* InlGetTargetString(InlineObservation obs);
+
+// Get a string describing the impact of this observation
+
+const char* InlGetImpactString(InlineObservation obs);
+
+// Get the target of this observation
+
+InlineTarget InlGetTarget(InlineObservation obs);
+
+// Get the impact of this observation
+
+InlineImpact InlGetImpact(InlineObservation obs);
+
+// InlinePolicy is an abstract base class for a family of inline
+// policies.
+
+class InlinePolicy
+{
+public:
+    // Factory method for getting policies
+    static InlinePolicy* GetPolicy(Compiler* compiler, bool isPrejitRoot);
+
+    // Obligatory virtual dtor
+    virtual ~InlinePolicy()
+    {
+    }
+
+    // Get the current decision
+    InlineDecision GetDecision() const
+    {
+        return m_Decision;
+    }
+
+    // Get the observation responsible for the result
+    InlineObservation GetObservation() const
+    {
+        return m_Observation;
+    }
+
+    // Policy observations
+    virtual void NoteSuccess() = 0;
+    virtual void NoteBool(InlineObservation obs, bool value) = 0;
+    virtual void NoteFatal(InlineObservation obs) = 0;
+    virtual void NoteInt(InlineObservation obs, int value) = 0;
+
+    // Optional observations. Most policies ignore these.
+    virtual void NoteContext(InlineContext* context)
+    {
+        (void)context;
+    }
+    virtual void NoteOffset(IL_OFFSETX offset)
+    {
+        (void)offset;
+    }
+
+    // Policy determinations
+    virtual void DetermineProfitability(CORINFO_METHOD_INFO* methodInfo) = 0;
+
+    // Policy policies
+    virtual bool PropagateNeverToRuntime() const = 0;
+    virtual bool IsLegacyPolicy() const          = 0;
+
+    // Policy estimates
+    virtual int CodeSizeEstimate() = 0;
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+    // Name of the policy
+    virtual const char* GetName() const = 0;
+    // Detailed data value dump
+    virtual void DumpData(FILE* file) const
+    {
+    }
+    // Detailed data name dump
+    virtual void DumpSchema(FILE* file) const
+    {
+    }
+    // True if this is the inline targeted by data collection
+    bool IsDataCollectionTarget()
+    {
+        return m_IsDataCollectionTarget;
+    }
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+protected:
+    InlinePolicy(bool isPrejitRoot)
+        : m_Decision(InlineDecision::UNDECIDED)
+        , m_Observation(InlineObservation::CALLEE_UNUSED_INITIAL)
+        , m_IsPrejitRoot(isPrejitRoot)
+#if defined(DEBUG) || defined(INLINE_DATA)
+        , m_IsDataCollectionTarget(false)
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+    {
+        // empty
+    }
+
+private:
+    // No copying or assignment supported
+    InlinePolicy(const InlinePolicy&) = delete;
+    InlinePolicy& operator=(const InlinePolicy&) = delete;
+
+protected:
+    InlineDecision    m_Decision;
+    InlineObservation m_Observation;
+    bool              m_IsPrejitRoot;
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+    bool m_IsDataCollectionTarget;
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+};
+
+// InlineResult summarizes what is known about the viability of a
+// particular inline candiate.
+
+class InlineResult
+{
+public:
+    // Construct a new InlineResult to help evaluate a
+    // particular call for inlining.
+    InlineResult(Compiler* compiler, GenTreeCall* call, GenTreeStmt* stmt, const char* description);
+
+    // Construct a new InlineResult to evaluate a particular
+    // method to see if it is inlineable.
+    InlineResult(Compiler* compiler, CORINFO_METHOD_HANDLE method, const char* description);
+
+    // Has the policy determined this inline should fail?
+    bool IsFailure() const
+    {
+        return InlDecisionIsFailure(m_Policy->GetDecision());
+    }
+
+    // Has the policy determined this inline will succeed?
+    bool IsSuccess() const
+    {
+        return InlDecisionIsSuccess(m_Policy->GetDecision());
+    }
+
+    // Has the policy determined this inline will fail,
+    // and that the callee should never be inlined?
+    bool IsNever() const
+    {
+        return InlDecisionIsNever(m_Policy->GetDecision());
+    }
+
+    // Has the policy determined this inline attempt is still viable?
+    bool IsCandidate() const
+    {
+        return InlDecisionIsCandidate(m_Policy->GetDecision());
+    }
+
+    // Has the policy determined this inline attempt is still viable
+    // and is a discretionary inline?
+    bool IsDiscretionaryCandidate() const
+    {
+        bool result = InlDecisionIsCandidate(m_Policy->GetDecision()) &&
+                      (m_Policy->GetObservation() == InlineObservation::CALLEE_IS_DISCRETIONARY_INLINE);
+
+        return result;
+    }
+
+    // Has the policy made a determination?
+    bool IsDecided() const
+    {
+        return InlDecisionIsDecided(m_Policy->GetDecision());
+    }
+
+    // NoteSuccess means the all the various checks have passed and
+    // the inline can happen.
+    void NoteSuccess()
+    {
+        assert(IsCandidate());
+        m_Policy->NoteSuccess();
+    }
+
+    // Make a true observation, and update internal state
+    // appropriately.
+    //
+    // Caller is expected to call isFailure after this to see whether
+    // more observation is desired.
+    void Note(InlineObservation obs)
+    {
+        m_Policy->NoteBool(obs, true);
+    }
+
+    // Make a boolean observation, and update internal state
+    // appropriately.
+    //
+    // Caller is expected to call isFailure after this to see whether
+    // more observation is desired.
+    void NoteBool(InlineObservation obs, bool value)
+    {
+        m_Policy->NoteBool(obs, value);
+    }
+
+    // Make an observation that must lead to immediate failure.
+    void NoteFatal(InlineObservation obs)
+    {
+        m_Policy->NoteFatal(obs);
+        assert(IsFailure());
+    }
+
+    // Make an observation with an int value
+    void NoteInt(InlineObservation obs, int value)
+    {
+        m_Policy->NoteInt(obs, value);
+    }
+
+    // Determine if this inline is profitable
+    void DetermineProfitability(CORINFO_METHOD_INFO* methodInfo)
+    {
+        m_Policy->DetermineProfitability(methodInfo);
+    }
+
+    // Ensure details of this inlining process are appropriately
+    // reported when the result goes out of scope.
+    ~InlineResult()
+    {
+        Report();
+    }
+
+    // The observation leading to this particular result
+    InlineObservation GetObservation() const
+    {
+        return m_Policy->GetObservation();
+    }
+
+    // The callee handle for this result
+    CORINFO_METHOD_HANDLE GetCallee() const
+    {
+        return m_Callee;
+    }
+
+    // The call being considered
+    GenTreeCall* GetCall() const
+    {
+        return m_Call;
+    }
+
+    // Result that can be reported back to the runtime
+    CorInfoInline Result() const
+    {
+        return InlGetCorInfoInlineDecision(m_Policy->GetDecision());
+    }
+
+    // String describing the decision made
+    const char* ResultString() const
+    {
+        return InlGetDecisionString(m_Policy->GetDecision());
+    }
+
+    // String describing the reason for the decision
+    const char* ReasonString() const
+    {
+        return InlGetObservationString(m_Policy->GetObservation());
+    }
+
+    // Get the policy that evaluated this result.
+    InlinePolicy* GetPolicy() const
+    {
+        return m_Policy;
+    }
+
+    // True if the policy used for this result is (exactly) the legacy
+    // policy.
+    bool UsesLegacyPolicy() const
+    {
+        return m_Policy->IsLegacyPolicy();
+    }
+
+    // SetReported indicates that this particular result doesn't need
+    // to be reported back to the runtime, either because the runtime
+    // already knows, or we aren't actually inlining yet.
+    void SetReported()
+    {
+        m_Reported = true;
+    }
+
+    // Get the InlineContext for this inline
+    InlineContext* GetInlineContext() const
+    {
+        return m_InlineContext;
+    }
+
+private:
+    // No copying or assignment allowed.
+    InlineResult(const InlineResult&) = delete;
+    InlineResult& operator=(const InlineResult&) = delete;
+
+    // Report/log/dump decision as appropriate
+    void Report();
+
+    Compiler*             m_RootCompiler;
+    InlinePolicy*         m_Policy;
+    GenTreeCall*          m_Call;
+    InlineContext*        m_InlineContext;
+    CORINFO_METHOD_HANDLE m_Caller; // immediate caller's handle
+    CORINFO_METHOD_HANDLE m_Callee;
+    const char*           m_Description;
+    bool                  m_Reported;
+};
+
+// InlineCandidateInfo provides basic information about a particular
+// inline candidate.
+
+struct InlineCandidateInfo
+{
+    DWORD                  dwRestrictions;
+    CORINFO_METHOD_INFO    methInfo;
+    unsigned               methAttr;
+    CORINFO_CLASS_HANDLE   clsHandle;
+    unsigned               clsAttr;
+    var_types              fncRetType;
+    CORINFO_METHOD_HANDLE  ilCallerHandle; // the logical IL caller of this inlinee.
+    CORINFO_CONTEXT_HANDLE exactContextHnd;
+    CorInfoInitClassResult initClassResult;
+};
+
+// InlArgInfo describes inline candidate argument properties.
+
+struct InlArgInfo
+{
+    unsigned argIsUsed : 1;               // is this arg used at all?
+    unsigned argIsInvariant : 1;          // the argument is a constant or a local variable address
+    unsigned argIsLclVar : 1;             // the argument is a local variable
+    unsigned argIsThis : 1;               // the argument is the 'this' pointer
+    unsigned argHasSideEff : 1;           // the argument has side effects
+    unsigned argHasGlobRef : 1;           // the argument has a global ref
+    unsigned argHasTmp : 1;               // the argument will be evaluated to a temp
+    unsigned argIsByRefToStructLocal : 1; // Is this arg an address of a struct local or a normed struct local or a
+                                          // field in them?
+    unsigned argHasLdargaOp : 1;          // Is there LDARGA(s) operation on this argument?
+    unsigned argHasStargOp : 1;           // Is there STARG(s) operation on this argument?
+
+    unsigned   argTmpNum; // the argument tmp number
+    GenTreePtr argNode;
+    GenTreePtr argBashTmpNode; // tmp node created, if it may be replaced with actual arg
+};
+
+// InlArgInfo describes inline candidate local variable properties.
+
+struct InlLclVarInfo
+{
+    var_types lclTypeInfo;
+    typeInfo  lclVerTypeInfo;
+    bool      lclHasLdlocaOp; // Is there LDLOCA(s) operation on this argument?
+};
+
+// InlineInfo provides detailed information about a particular inline candidate.
+
+struct InlineInfo
+{
+    Compiler* InlinerCompiler; // The Compiler instance for the caller (i.e. the inliner)
+    Compiler* InlineRoot; // The Compiler instance that is the root of the inlining tree of which the owner of "this" is
+                          // a member.
+
+    CORINFO_METHOD_HANDLE fncHandle;
+    InlineCandidateInfo*  inlineCandidateInfo;
+
+    InlineResult* inlineResult;
+
+    GenTreePtr retExpr; // The return expression of the inlined candidate.
+
+    CORINFO_CONTEXT_HANDLE tokenLookupContextHandle; // The context handle that will be passed to
+                                                     // impTokenLookupContextHandle in Inlinee's Compiler.
+
+    unsigned      argCnt;
+    InlArgInfo    inlArgInfo[MAX_INL_ARGS + 1];
+    int           lclTmpNum[MAX_INL_LCLS];                     // map local# -> temp# (-1 if unused)
+    InlLclVarInfo lclVarInfo[MAX_INL_LCLS + MAX_INL_ARGS + 1]; // type information from local sig
+
+    bool thisDereferencedFirst;
+#ifdef FEATURE_SIMD
+    bool hasSIMDTypeArgLocalOrReturn;
+#endif // FEATURE_SIMD
+
+    GenTreeCall* iciCall;  // The GT_CALL node to be inlined.
+    GenTree*     iciStmt;  // The statement iciCall is in.
+    BasicBlock*  iciBlock; // The basic block iciStmt is in.
+};
+
+// InlineContext tracks the inline history in a method.
+//
+// Notes:
+//
+// InlineContexts form a tree with the root method as the root and
+// inlines as children. Nested inlines are represented as granchildren
+// and so on.
+//
+// Leaves in the tree represent successful inlines of leaf methods.
+// In DEBUG builds we also keep track of failed inline attempts.
+//
+// During inlining, all statements in the IR refer back to the
+// InlineContext that is responsible for those statements existing.
+// This makes it possible to detect recursion and to keep track of the
+// depth of each inline attempt.
+
+class InlineContext
+{
+    // InlineContexts are created by InlineStrategies
+    friend class InlineStrategy;
+
+public:
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+    // Dump the full subtree, including failures
+    void Dump(unsigned indent = 0);
+
+    // Dump only the success subtree, with rich data
+    void DumpData(unsigned indent = 0);
+
+    // Dump full subtree in xml format
+    void DumpXml(FILE* file = stderr, unsigned indent = 0);
+
+    // Get callee handle
+    CORINFO_METHOD_HANDLE GetCallee() const
+    {
+        return m_Callee;
+    }
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+    // Get the parent context for this context.
+    InlineContext* GetParent() const
+    {
+        return m_Parent;
+    }
+
+    // Get the code pointer for this context.
+    BYTE* GetCode() const
+    {
+        return m_Code;
+    }
+
+    // True if this context describes a successful inline.
+    bool IsSuccess() const
+    {
+        return m_Success;
+    }
+
+    // Get the observation that supported or disqualified this inline.
+    InlineObservation GetObservation() const
+    {
+        return m_Observation;
+    }
+
+    // Get the IL code size for this inline.
+    unsigned GetILSize() const
+    {
+        return m_ILSize;
+    }
+
+    // Get the native code size estimate for this inline.
+    unsigned GetCodeSizeEstimate() const
+    {
+        return m_CodeSizeEstimate;
+    }
+
+    // Get the offset of the call site
+    IL_OFFSETX GetOffset() const
+    {
+        return m_Offset;
+    }
+
+    // True if this is the root context
+    bool IsRoot() const
+    {
+        return m_Parent == nullptr;
+    }
+
+private:
+    InlineContext(InlineStrategy* strategy);
+
+private:
+    InlineStrategy*   m_InlineStrategy;   // overall strategy
+    InlineContext*    m_Parent;           // logical caller (parent)
+    InlineContext*    m_Child;            // first child
+    InlineContext*    m_Sibling;          // next child of the parent
+    BYTE*             m_Code;             // address of IL buffer for the method
+    unsigned          m_ILSize;           // size of IL buffer for the method
+    IL_OFFSETX        m_Offset;           // call site location within parent
+    InlineObservation m_Observation;      // what lead to this inline
+    int               m_CodeSizeEstimate; // in bytes * 10
+    bool              m_Success;          // true if this was a successful inline
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+    InlinePolicy*         m_Policy;  // policy that evaluated this inline
+    CORINFO_METHOD_HANDLE m_Callee;  // handle to the method
+    unsigned              m_TreeID;  // ID of the GenTreeCall
+    unsigned              m_Ordinal; // Ordinal number of this inline
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+};
+
+// The InlineStrategy holds the per-method persistent inline state.
+// It is responsible for providing information that applies to
+// multiple inlining decisions.
+
+class InlineStrategy
+{
+
+public:
+    // Construct a new inline strategy.
+    InlineStrategy(Compiler* compiler);
+
+    // Create context for a successful inline.
+    InlineContext* NewSuccess(InlineInfo* inlineInfo);
+
+    // Create context for a failing inline.
+    InlineContext* NewFailure(GenTree* stmt, InlineResult* inlineResult);
+
+    // Compiler associated with this strategy
+    Compiler* GetCompiler() const
+    {
+        return m_Compiler;
+    }
+
+    // Root context
+    InlineContext* GetRootContext();
+
+    // Context for the last sucessful inline
+    // (or root if no inlines)
+    InlineContext* GetLastContext() const
+    {
+        return m_LastContext;
+    }
+
+    // Get IL size for maximum allowable inline
+    unsigned GetMaxInlineILSize() const
+    {
+        return m_MaxInlineSize;
+    }
+
+    // Get depth of maximum allowable inline
+    unsigned GetMaxInlineDepth() const
+    {
+        return m_MaxInlineDepth;
+    }
+
+    // Number of successful inlines into the root
+    unsigned GetInlineCount() const
+    {
+        return m_InlineCount;
+    }
+
+    // Return the current code size estimate for this method
+    int GetCurrentSizeEstimate() const
+    {
+        return m_CurrentSizeEstimate;
+    }
+
+    // Return the initial code size estimate for this method
+    int GetInitialSizeEstimate() const
+    {
+        return m_InitialSizeEstimate;
+    }
+
+    // Inform strategy that there's another call
+    void NoteCall()
+    {
+        m_CallCount++;
+    }
+
+    // Inform strategy that there's a new inline candidate.
+    void NoteCandidate()
+    {
+        m_CandidateCount++;
+    }
+
+    // Inform strategy that a candidate was assessed and determined to
+    // be unprofitable.
+    void NoteUnprofitable()
+    {
+        m_UnprofitableCandidateCount++;
+    }
+
+    // Inform strategy that a candidate has passed screening
+    // and that the jit will attempt to inline.
+    void NoteAttempt(InlineResult* result);
+
+    // Inform strategy that jit is about to import the inlinee IL.
+    void NoteImport()
+    {
+        m_ImportCount++;
+    }
+
+    // Dump csv header for inline stats to indicated file.
+    static void DumpCsvHeader(FILE* f);
+
+    // Dump csv data for inline stats to indicated file.
+    void DumpCsvData(FILE* f);
+
+    // See if an inline of this size would fit within the current jit
+    // time budget.
+    bool BudgetCheck(unsigned ilSize);
+
+    // Check if this method is not allowing inlines.
+    static bool IsNoInline(ICorJitInfo* info, CORINFO_METHOD_HANDLE method);
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+    // Dump textual description of inlines done so far.
+    void Dump();
+
+    // Dump data-format description of inlines done so far.
+    void DumpData();
+    void DumpDataEnsurePolicyIsSet();
+    void DumpDataHeader(FILE* file);
+    void DumpDataSchema(FILE* file);
+    void DumpDataContents(FILE* file);
+
+    // Dump xml-formatted description of inlines
+    void DumpXml(FILE* file = stderr, unsigned indent = 0);
+    static void FinalizeXml(FILE* file = stderr);
+
+    // Cache for file position of this method in the inline xml
+    long GetMethodXmlFilePosition()
+    {
+        return m_MethodXmlFilePosition;
+    }
+
+    void SetMethodXmlFilePosition(long val)
+    {
+        m_MethodXmlFilePosition = val;
+    }
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+    // Some inline limit values
+    enum
+    {
+        ALWAYS_INLINE_SIZE              = 16,
+        IMPLEMENTATION_MAX_INLINE_SIZE  = _UI16_MAX,
+        IMPLEMENTATION_MAX_INLINE_DEPTH = 1000
+    };
+
+private:
+    // Create a context for the root method.
+    InlineContext* NewRoot();
+
+    // Accounting updates for a successful or failed inline.
+    void NoteOutcome(InlineContext* context);
+
+    // Cap on allowable increase in jit time due to inlining.
+    // Multiplicative, so BUDGET = 10 means up to 10x increase
+    // in jit time.
+    enum
+    {
+        BUDGET = 10
+    };
+
+    // Estimate the jit time change because of this inline.
+    int EstimateTime(InlineContext* context);
+
+    // EstimateTime helpers
+    int EstimateRootTime(unsigned ilSize);
+    int EstimateInlineTime(unsigned ilSize);
+
+    // Estimate native code size change because of this inline.
+    int EstimateSize(InlineContext* context);
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+    static bool          s_HasDumpedDataHeader;
+    static bool          s_HasDumpedXmlHeader;
+    static CritSecObject s_XmlWriterLock;
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+    Compiler*      m_Compiler;
+    InlineContext* m_RootContext;
+    InlinePolicy*  m_LastSuccessfulPolicy;
+    InlineContext* m_LastContext;
+    unsigned       m_CallCount;
+    unsigned       m_CandidateCount;
+    unsigned       m_AlwaysCandidateCount;
+    unsigned       m_ForceCandidateCount;
+    unsigned       m_DiscretionaryCandidateCount;
+    unsigned       m_UnprofitableCandidateCount;
+    unsigned       m_ImportCount;
+    unsigned       m_InlineCount;
+    unsigned       m_MaxInlineSize;
+    unsigned       m_MaxInlineDepth;
+    int            m_InitialTimeBudget;
+    int            m_InitialTimeEstimate;
+    int            m_CurrentTimeBudget;
+    int            m_CurrentTimeEstimate;
+    int            m_InitialSizeEstimate;
+    int            m_CurrentSizeEstimate;
+    bool           m_HasForceViaDiscretionary;
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+    long m_MethodXmlFilePosition;
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+};
+
+#endif // _INLINE_H_
diff --git a/src/jit/inlinepolicy.cpp b/src/jit/inlinepolicy.cpp
new file mode 100644
index 0000000000..f80f3a5ec0
--- /dev/null
+++ b/src/jit/inlinepolicy.cpp
@@ -0,0 +1,2857 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "inlinepolicy.h"
+#include "sm.h"
+
+//------------------------------------------------------------------------
+// getPolicy: Factory method for getting an InlinePolicy
+//
+// Arguments:
+//    compiler      - the compiler instance that will evaluate inlines
+//    isPrejitRoot  - true if this policy is evaluating a prejit root
+//
+// Return Value:
+//    InlinePolicy to use in evaluating an inline.
+//
+// Notes:
+//    Determines which of the various policies should apply,
+//    and creates (or reuses) a policy instance to use.
+
+InlinePolicy* InlinePolicy::GetPolicy(Compiler* compiler, bool isPrejitRoot)
+{
+
+#ifdef DEBUG
+
+    // Optionally install the RandomPolicy.
+    bool useRandomPolicy = compiler->compRandomInlineStress();
+
+    if (useRandomPolicy)
+    {
+        unsigned seed = getJitStressLevel();
+        assert(seed != 0);
+        return new (compiler, CMK_Inlining) RandomPolicy(compiler, isPrejitRoot, seed);
+    }
+
+#endif // DEBUG
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+    // Optionally install the ReplayPolicy.
+    bool useReplayPolicy = JitConfig.JitInlinePolicyReplay() != 0;
+
+    if (useReplayPolicy)
+    {
+        return new (compiler, CMK_Inlining) ReplayPolicy(compiler, isPrejitRoot);
+    }
+
+    // Optionally install the SizePolicy.
+    bool useSizePolicy = JitConfig.JitInlinePolicySize() != 0;
+
+    if (useSizePolicy)
+    {
+        return new (compiler, CMK_Inlining) SizePolicy(compiler, isPrejitRoot);
+    }
+
+    // Optionally install the FullPolicy.
+    bool useFullPolicy = JitConfig.JitInlinePolicyFull() != 0;
+
+    if (useFullPolicy)
+    {
+        return new (compiler, CMK_Inlining) FullPolicy(compiler, isPrejitRoot);
+    }
+
+    // Optionally install the DiscretionaryPolicy.
+    bool useDiscretionaryPolicy = JitConfig.JitInlinePolicyDiscretionary() != 0;
+
+    if (useDiscretionaryPolicy)
+    {
+        return new (compiler, CMK_Inlining) DiscretionaryPolicy(compiler, isPrejitRoot);
+    }
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+    // Optionally install the ModelPolicy.
+    bool useModelPolicy = JitConfig.JitInlinePolicyModel() != 0;
+
+    if (useModelPolicy)
+    {
+        return new (compiler, CMK_Inlining) ModelPolicy(compiler, isPrejitRoot);
+    }
+
+    // Optionally fallback to the original legacy policy
+    bool useLegacyPolicy = JitConfig.JitInlinePolicyLegacy() != 0;
+
+    if (useLegacyPolicy)
+    {
+        return new (compiler, CMK_Inlining) LegacyPolicy(compiler, isPrejitRoot);
+    }
+
+    // Use the enhanced legacy policy by default
+    return new (compiler, CMK_Inlining) EnhancedLegacyPolicy(compiler, isPrejitRoot);
+}
+
+//------------------------------------------------------------------------
+// NoteFatal: handle an observation with fatal impact
+//
+// Arguments:
+//    obs      - the current obsevation
+
+void LegalPolicy::NoteFatal(InlineObservation obs)
+{
+    // As a safeguard, all fatal impact must be
+    // reported via noteFatal.
+    assert(InlGetImpact(obs) == InlineImpact::FATAL);
+    NoteInternal(obs);
+    assert(InlDecisionIsFailure(m_Decision));
+}
+
+//------------------------------------------------------------------------
+// NoteInternal: helper for handling an observation
+//
+// Arguments:
+//    obs      - the current obsevation
+
+void LegalPolicy::NoteInternal(InlineObservation obs)
+{
+    // Note any INFORMATION that reaches here will now cause failure.
+    // Non-fatal INFORMATION observations must be handled higher up.
+    InlineTarget target = InlGetTarget(obs);
+
+    if (target == InlineTarget::CALLEE)
+    {
+        this->SetNever(obs);
+    }
+    else
+    {
+        this->SetFailure(obs);
+    }
+}
+
+//------------------------------------------------------------------------
+// SetFailure: helper for setting a failing decision
+//
+// Arguments:
+//    obs      - the current obsevation
+
+void LegalPolicy::SetFailure(InlineObservation obs)
+{
+    // Expect a valid observation
+    assert(InlIsValidObservation(obs));
+
+    switch (m_Decision)
+    {
+        case InlineDecision::FAILURE:
+            // Repeated failure only ok if evaluating a prejit root
+            // (since we can't fail fast because we're not inlining)
+            // or if inlining and the observation is CALLSITE_TOO_MANY_LOCALS
+            // (since we can't fail fast from lvaGrabTemp).
+            assert(m_IsPrejitRoot || (obs == InlineObservation::CALLSITE_TOO_MANY_LOCALS));
+            break;
+        case InlineDecision::UNDECIDED:
+        case InlineDecision::CANDIDATE:
+            m_Decision    = InlineDecision::FAILURE;
+            m_Observation = obs;
+            break;
+        default:
+            // SUCCESS, NEVER, or ??
+            assert(!"Unexpected m_Decision");
+            unreached();
+    }
+}
+
+//------------------------------------------------------------------------
+// SetNever: helper for setting a never decision
+//
+// Arguments:
+//    obs      - the current obsevation
+
+void LegalPolicy::SetNever(InlineObservation obs)
+{
+    // Expect a valid observation
+    assert(InlIsValidObservation(obs));
+
+    switch (m_Decision)
+    {
+        case InlineDecision::NEVER:
+            // Repeated never only ok if evaluating a prejit root
+            assert(m_IsPrejitRoot);
+            break;
+        case InlineDecision::UNDECIDED:
+        case InlineDecision::CANDIDATE:
+            m_Decision    = InlineDecision::NEVER;
+            m_Observation = obs;
+            break;
+        default:
+            // SUCCESS, FAILURE or ??
+            assert(!"Unexpected m_Decision");
+            unreached();
+    }
+}
+
+//------------------------------------------------------------------------
+// SetCandidate: helper updating candidacy
+//
+// Arguments:
+//    obs      - the current obsevation
+//
+// Note:
+//    Candidate observations are handled here. If the inline has already
+//    failed, they're ignored. If there's already a candidate reason,
+//    this new reason trumps it.
+
+void LegalPolicy::SetCandidate(InlineObservation obs)
+{
+    // Ignore if this inline is going to fail.
+    if (InlDecisionIsFailure(m_Decision))
+    {
+        return;
+    }
+
+    // We should not have declared success yet.
+    assert(!InlDecisionIsSuccess(m_Decision));
+
+    // Update, overriding any previous candidacy.
+    m_Decision    = InlineDecision::CANDIDATE;
+    m_Observation = obs;
+}
+
+//------------------------------------------------------------------------
+// NoteSuccess: handle finishing all the inlining checks successfully
+
+void LegacyPolicy::NoteSuccess()
+{
+    assert(InlDecisionIsCandidate(m_Decision));
+    m_Decision = InlineDecision::SUCCESS;
+}
+
+//------------------------------------------------------------------------
+// NoteBool: handle a boolean observation with non-fatal impact
+//
+// Arguments:
+//    obs      - the current obsevation
+//    value    - the value of the observation
+void LegacyPolicy::NoteBool(InlineObservation obs, bool value)
+{
+    // Check the impact
+    InlineImpact impact = InlGetImpact(obs);
+
+    // As a safeguard, all fatal impact must be
+    // reported via noteFatal.
+    assert(impact != InlineImpact::FATAL);
+
+    // Handle most information here
+    bool isInformation = (impact == InlineImpact::INFORMATION);
+    bool propagate     = !isInformation;
+
+    if (isInformation)
+    {
+        switch (obs)
+        {
+            case InlineObservation::CALLEE_IS_FORCE_INLINE:
+                // We may make the force-inline observation more than
+                // once.  All observations should agree.
+                assert(!m_IsForceInlineKnown || (m_IsForceInline == value));
+                m_IsForceInline      = value;
+                m_IsForceInlineKnown = true;
+                break;
+
+            case InlineObservation::CALLEE_IS_INSTANCE_CTOR:
+                m_IsInstanceCtor = value;
+                break;
+
+            case InlineObservation::CALLEE_CLASS_PROMOTABLE:
+                m_IsFromPromotableValueClass = value;
+                break;
+
+            case InlineObservation::CALLEE_HAS_SIMD:
+                m_HasSimd = value;
+                break;
+
+            case InlineObservation::CALLEE_LOOKS_LIKE_WRAPPER:
+                // LegacyPolicy ignores this for prejit roots.
+                if (!m_IsPrejitRoot)
+                {
+                    m_LooksLikeWrapperMethod = value;
+                }
+                break;
+
+            case InlineObservation::CALLEE_ARG_FEEDS_CONSTANT_TEST:
+                // LegacyPolicy ignores this for prejit roots.
+                if (!m_IsPrejitRoot)
+                {
+                    m_ArgFeedsConstantTest++;
+                }
+                break;
+
+            case InlineObservation::CALLEE_ARG_FEEDS_RANGE_CHECK:
+                // LegacyPolicy ignores this for prejit roots.
+                if (!m_IsPrejitRoot)
+                {
+                    m_ArgFeedsRangeCheck++;
+                }
+                break;
+
+            case InlineObservation::CALLEE_HAS_SWITCH:
+            case InlineObservation::CALLEE_UNSUPPORTED_OPCODE:
+                // LegacyPolicy ignores these for prejit roots.
+                if (!m_IsPrejitRoot)
+                {
+                    // Pass these on, they should cause inlining to fail.
+                    propagate = true;
+                }
+                break;
+
+            case InlineObservation::CALLSITE_CONSTANT_ARG_FEEDS_TEST:
+                // We shouldn't see this for a prejit root since
+                // we don't know anything about callers.
+                assert(!m_IsPrejitRoot);
+                m_ConstantArgFeedsConstantTest++;
+                break;
+
+            case InlineObservation::CALLEE_BEGIN_OPCODE_SCAN:
+            {
+                // Set up the state machine, if this inline is
+                // discretionary and is still a candidate.
+                if (InlDecisionIsCandidate(m_Decision) &&
+                    (m_Observation == InlineObservation::CALLEE_IS_DISCRETIONARY_INLINE))
+                {
+                    // Better not have a state machine already.
+                    assert(m_StateMachine == nullptr);
+                    m_StateMachine = new (m_RootCompiler, CMK_Inlining) CodeSeqSM;
+                    m_StateMachine->Start(m_RootCompiler);
+                }
+                break;
+            }
+
+            case InlineObservation::CALLEE_END_OPCODE_SCAN:
+            {
+                if (m_StateMachine != nullptr)
+                {
+                    m_StateMachine->End();
+                }
+
+                // If this function is mostly loads and stores, we
+                // should try harder to inline it.  You can't just use
+                // the percentage test because if the method has 8
+                // instructions and 6 are loads, it's only 75% loads.
+                // This allows for CALL, RET, and one more non-ld/st
+                // instruction.
+                if (((m_InstructionCount - m_LoadStoreCount) < 4) ||
+                    (((double)m_LoadStoreCount / (double)m_InstructionCount) > .90))
+                {
+                    m_MethodIsMostlyLoadStore = true;
+                }
+
+                // Budget check.
+                //
+                // Conceptually this should happen when we
+                // observe the candidate's IL size.
+                //
+                // However, we do this here to avoid potential
+                // inconsistency between the state of the budget
+                // during candidate scan and the state when the IL is
+                // being scanned.
+                //
+                // Consider the case where we're just below the budget
+                // during candidate scan, and we have three possible
+                // inlines, any two of which put us over budget. We
+                // allow them all to become candidates. We then move
+                // on to inlining and the first two get inlined and
+                // put us over budget. Now the third can't be inlined
+                // anymore, but we have a policy that when we replay
+                // the candidate IL size during the inlining pass it
+                // "reestablishes" candidacy rather than alters
+                // candidacy ... so instead we bail out here.
+
+                if (!m_IsPrejitRoot)
+                {
+                    InlineStrategy* strategy   = m_RootCompiler->m_inlineStrategy;
+                    bool            overBudget = strategy->BudgetCheck(m_CodeSize);
+                    if (overBudget)
+                    {
+                        SetFailure(InlineObservation::CALLSITE_OVER_BUDGET);
+                    }
+                }
+
+                break;
+            }
+
+            default:
+                // Ignore the remainder for now
+                break;
+        }
+    }
+
+    if (propagate)
+    {
+        NoteInternal(obs);
+    }
+}
+
+//------------------------------------------------------------------------
+// NoteInt: handle an observed integer value
+//
+// Arguments:
+//    obs      - the current obsevation
+//    value    - the value being observed
+
+void LegacyPolicy::NoteInt(InlineObservation obs, int value)
+{
+    switch (obs)
+    {
+        case InlineObservation::CALLEE_MAXSTACK:
+        {
+            assert(m_IsForceInlineKnown);
+
+            unsigned calleeMaxStack = static_cast<unsigned>(value);
+
+            if (!m_IsForceInline && (calleeMaxStack > SMALL_STACK_SIZE))
+            {
+                SetNever(InlineObservation::CALLEE_MAXSTACK_TOO_BIG);
+            }
+
+            break;
+        }
+
+        case InlineObservation::CALLEE_NUMBER_OF_BASIC_BLOCKS:
+        {
+            assert(m_IsForceInlineKnown);
+            assert(value != 0);
+
+            unsigned basicBlockCount = static_cast<unsigned>(value);
+
+            if (!m_IsForceInline && (basicBlockCount > MAX_BASIC_BLOCKS))
+            {
+                SetNever(InlineObservation::CALLEE_TOO_MANY_BASIC_BLOCKS);
+            }
+
+            break;
+        }
+
+        case InlineObservation::CALLEE_IL_CODE_SIZE:
+        {
+            assert(m_IsForceInlineKnown);
+            assert(value != 0);
+            m_CodeSize = static_cast<unsigned>(value);
+
+            // Now that we know size and forceinline state,
+            // update candidacy.
+            if (m_CodeSize <= InlineStrategy::ALWAYS_INLINE_SIZE)
+            {
+                // Candidate based on small size
+                SetCandidate(InlineObservation::CALLEE_BELOW_ALWAYS_INLINE_SIZE);
+            }
+            else if (m_IsForceInline)
+            {
+                // Candidate based on force inline
+                SetCandidate(InlineObservation::CALLEE_IS_FORCE_INLINE);
+            }
+            else if (m_CodeSize <= m_RootCompiler->m_inlineStrategy->GetMaxInlineILSize())
+            {
+                // Candidate, pending profitability evaluation
+                SetCandidate(InlineObservation::CALLEE_IS_DISCRETIONARY_INLINE);
+            }
+            else
+            {
+                // Callee too big, not a candidate
+                SetNever(InlineObservation::CALLEE_TOO_MUCH_IL);
+            }
+
+            break;
+        }
+
+        case InlineObservation::CALLSITE_DEPTH:
+        {
+            unsigned depth = static_cast<unsigned>(value);
+
+            if (depth > m_RootCompiler->m_inlineStrategy->GetMaxInlineDepth())
+            {
+                SetFailure(InlineObservation::CALLSITE_IS_TOO_DEEP);
+            }
+
+            break;
+        }
+
+        case InlineObservation::CALLEE_OPCODE_NORMED:
+        case InlineObservation::CALLEE_OPCODE:
+        {
+            m_InstructionCount++;
+            OPCODE opcode = static_cast<OPCODE>(value);
+
+            if (m_StateMachine != nullptr)
+            {
+                SM_OPCODE smOpcode = CodeSeqSM::MapToSMOpcode(opcode);
+                noway_assert(smOpcode < SM_COUNT);
+                noway_assert(smOpcode != SM_PREFIX_N);
+                if (obs == InlineObservation::CALLEE_OPCODE_NORMED)
+                {
+                    if (smOpcode == SM_LDARGA_S)
+                    {
+                        smOpcode = SM_LDARGA_S_NORMED;
+                    }
+                    else if (smOpcode == SM_LDLOCA_S)
+                    {
+                        smOpcode = SM_LDLOCA_S_NORMED;
+                    }
+                }
+
+                m_StateMachine->Run(smOpcode DEBUGARG(0));
+            }
+
+            // Look for opcodes that imply loads and stores.
+            // Logic here is as it is to match legacy behavior.
+            if ((opcode >= CEE_LDARG_0 && opcode <= CEE_STLOC_S) || (opcode >= CEE_LDARG && opcode <= CEE_STLOC) ||
+                (opcode >= CEE_LDNULL && opcode <= CEE_LDC_R8) || (opcode >= CEE_LDIND_I1 && opcode <= CEE_STIND_R8) ||
+                (opcode >= CEE_LDFLD && opcode <= CEE_STOBJ) || (opcode >= CEE_LDELEMA && opcode <= CEE_STELEM) ||
+                (opcode == CEE_POP))
+            {
+                m_LoadStoreCount++;
+            }
+
+            break;
+        }
+
+        case InlineObservation::CALLSITE_FREQUENCY:
+            assert(m_CallsiteFrequency == InlineCallsiteFrequency::UNUSED);
+            m_CallsiteFrequency = static_cast<InlineCallsiteFrequency>(value);
+            assert(m_CallsiteFrequency != InlineCallsiteFrequency::UNUSED);
+            break;
+
+        default:
+            // Ignore all other information
+            break;
+    }
+}
+
+//------------------------------------------------------------------------
+// DetermineMultiplier: determine benefit multiplier for this inline
+//
+// Notes: uses the accumulated set of observations to compute a
+// profitability boost for the inline candidate.
+
+double LegacyPolicy::DetermineMultiplier()
+{
+    double multiplier = 0;
+
+    // Bump up the multiplier for instance constructors
+
+    if (m_IsInstanceCtor)
+    {
+        multiplier += 1.5;
+        JITDUMP("\nmultiplier in instance constructors increased to %g.", multiplier);
+    }
+
+    // Bump up the multiplier for methods in promotable struct
+
+    if (m_IsFromPromotableValueClass)
+    {
+        multiplier += 3;
+        JITDUMP("\nmultiplier in methods of promotable struct increased to %g.", multiplier);
+    }
+
+#ifdef FEATURE_SIMD
+
+    if (m_HasSimd)
+    {
+        multiplier += JitConfig.JitInlineSIMDMultiplier();
+        JITDUMP("\nInline candidate has SIMD type args, locals or return value.  Multiplier increased to %g.",
+                multiplier);
+    }
+
+#endif // FEATURE_SIMD
+
+    if (m_LooksLikeWrapperMethod)
+    {
+        multiplier += 1.0;
+        JITDUMP("\nInline candidate looks like a wrapper method.  Multiplier increased to %g.", multiplier);
+    }
+
+    if (m_ArgFeedsConstantTest > 0)
+    {
+        multiplier += 1.0;
+        JITDUMP("\nInline candidate has an arg that feeds a constant test.  Multiplier increased to %g.", multiplier);
+    }
+
+    if (m_MethodIsMostlyLoadStore)
+    {
+        multiplier += 3.0;
+        JITDUMP("\nInline candidate is mostly loads and stores.  Multiplier increased to %g.", multiplier);
+    }
+
+    if (m_ArgFeedsRangeCheck > 0)
+    {
+        multiplier += 0.5;
+        JITDUMP("\nInline candidate has arg that feeds range check.  Multiplier increased to %g.", multiplier);
+    }
+
+    if (m_ConstantArgFeedsConstantTest > 0)
+    {
+        multiplier += 3.0;
+        JITDUMP("\nInline candidate has const arg that feeds a conditional.  Multiplier increased to %g.", multiplier);
+    }
+
+    switch (m_CallsiteFrequency)
+    {
+        case InlineCallsiteFrequency::RARE:
+            // Note this one is not additive, it uses '=' instead of '+='
+            multiplier = 1.3;
+            JITDUMP("\nInline candidate callsite is rare.  Multiplier limited to %g.", multiplier);
+            break;
+        case InlineCallsiteFrequency::BORING:
+            multiplier += 1.3;
+            JITDUMP("\nInline candidate callsite is boring.  Multiplier increased to %g.", multiplier);
+            break;
+        case InlineCallsiteFrequency::WARM:
+            multiplier += 2.0;
+            JITDUMP("\nInline candidate callsite is warm.  Multiplier increased to %g.", multiplier);
+            break;
+        case InlineCallsiteFrequency::LOOP:
+            multiplier += 3.0;
+            JITDUMP("\nInline candidate callsite is in a loop.  Multiplier increased to %g.", multiplier);
+            break;
+        case InlineCallsiteFrequency::HOT:
+            multiplier += 3.0;
+            JITDUMP("\nInline candidate callsite is hot.  Multiplier increased to %g.", multiplier);
+            break;
+        default:
+            assert(!"Unexpected callsite frequency");
+            break;
+    }
+
+#ifdef DEBUG
+
+    int additionalMultiplier = JitConfig.JitInlineAdditionalMultiplier();
+
+    if (additionalMultiplier != 0)
+    {
+        multiplier += additionalMultiplier;
+        JITDUMP("\nmultiplier increased via JitInlineAdditonalMultiplier=%d to %g.", additionalMultiplier, multiplier);
+    }
+
+    if (m_RootCompiler->compInlineStress())
+    {
+        multiplier += 10;
+        JITDUMP("\nmultiplier increased via inline stress to %g.", multiplier);
+    }
+
+#endif // DEBUG
+
+    return multiplier;
+}
+
+//------------------------------------------------------------------------
+// DetermineNativeSizeEstimate: return estimated native code size for
+// this inline candidate.
+//
+// Notes:
+//    This is an estimate for the size of the inlined callee.
+//    It does not include size impact on the caller side.
+//
+//    Uses the results of a state machine model for discretionary
+//    candidates.  Should not be needed for forced or always
+//    candidates.
+
+int LegacyPolicy::DetermineNativeSizeEstimate()
+{
+    // Should be a discretionary candidate.
+    assert(m_StateMachine != nullptr);
+
+    return m_StateMachine->NativeSize;
+}
+
+//------------------------------------------------------------------------
+// DetermineCallsiteNativeSizeEstimate: estimate native size for the
+// callsite.
+//
+// Arguments:
+//    methInfo -- method info for the callee
+//
+// Notes:
+//    Estimates the native size (in bytes, scaled up by 10x) for the
+//    call site. While the quality of the estimate here is questionable
+//    (especially for x64) it is being left as is for legacy compatibility.
+
+int LegacyPolicy::DetermineCallsiteNativeSizeEstimate(CORINFO_METHOD_INFO* methInfo)
+{
+    int callsiteSize = 55; // Direct call take 5 native bytes; indirect call takes 6 native bytes.
+
+    bool hasThis = methInfo->args.hasThis();
+
+    if (hasThis)
+    {
+        callsiteSize += 30; // "mov" or "lea"
+    }
+
+    CORINFO_ARG_LIST_HANDLE argLst = methInfo->args.args;
+    COMP_HANDLE             comp   = m_RootCompiler->info.compCompHnd;
+
+    for (unsigned i = (hasThis ? 1 : 0); i < methInfo->args.totalILArgs(); i++, argLst = comp->getArgNext(argLst))
+    {
+        var_types sigType = (var_types)m_RootCompiler->eeGetArgType(argLst, &methInfo->args);
+
+        if (sigType == TYP_STRUCT)
+        {
+            typeInfo verType = m_RootCompiler->verParseArgSigToTypeInfo(&methInfo->args, argLst);
+
+            /*
+
+            IN0028: 00009B      lea     EAX, bword ptr [EBP-14H]
+            IN0029: 00009E      push    dword ptr [EAX+4]
+            IN002a: 0000A1      push    gword ptr [EAX]
+            IN002b: 0000A3      call    [MyStruct.staticGetX2(struct):int]
+
+            */
+
+            callsiteSize += 10; // "lea     EAX, bword ptr [EBP-14H]"
+
+            // NB sizeof (void*) fails to convey intent when cross-jitting.
+
+            unsigned opsz  = (unsigned)(roundUp(comp->getClassSize(verType.GetClassHandle()), sizeof(void*)));
+            unsigned slots = opsz / sizeof(void*);
+
+            callsiteSize += slots * 20; // "push    gword ptr [EAX+offs]  "
+        }
+        else
+        {
+            callsiteSize += 30; // push by average takes 3 bytes.
+        }
+    }
+
+    return callsiteSize;
+}
+
+//------------------------------------------------------------------------
+// DetermineProfitability: determine if this inline is profitable
+//
+// Arguments:
+//    methodInfo -- method info for the callee
+//
+// Notes:
+//    A profitable inline is one that is projected to have a beneficial
+//    size/speed tradeoff.
+//
+//    It is expected that this method is only invoked for discretionary
+//    candidates, since it does not make sense to do this assessment for
+//    failed, always, or forced inlines.
+
+void LegacyPolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo)
+{
+
+#if defined(DEBUG)
+
+    // Punt if we're inlining and we've reached the acceptance limit.
+    int      limit   = JitConfig.JitInlineLimit();
+    unsigned current = m_RootCompiler->m_inlineStrategy->GetInlineCount();
+
+    if (!m_IsPrejitRoot && (limit >= 0) && (current >= static_cast<unsigned>(limit)))
+    {
+        SetFailure(InlineObservation::CALLSITE_OVER_INLINE_LIMIT);
+        return;
+    }
+
+#endif // defined(DEBUG)
+
+    assert(InlDecisionIsCandidate(m_Decision));
+    assert(m_Observation == InlineObservation::CALLEE_IS_DISCRETIONARY_INLINE);
+
+    m_CalleeNativeSizeEstimate   = DetermineNativeSizeEstimate();
+    m_CallsiteNativeSizeEstimate = DetermineCallsiteNativeSizeEstimate(methodInfo);
+    m_Multiplier                 = DetermineMultiplier();
+    const int threshold          = (int)(m_CallsiteNativeSizeEstimate * m_Multiplier);
+
+    // Note the LegacyPolicy estimates are scaled up by SIZE_SCALE
+    JITDUMP("\ncalleeNativeSizeEstimate=%d\n", m_CalleeNativeSizeEstimate)
+    JITDUMP("callsiteNativeSizeEstimate=%d\n", m_CallsiteNativeSizeEstimate);
+    JITDUMP("benefit multiplier=%g\n", m_Multiplier);
+    JITDUMP("threshold=%d\n", threshold);
+
+    // Reject if callee size is over the threshold
+    if (m_CalleeNativeSizeEstimate > threshold)
+    {
+        // Inline appears to be unprofitable
+        JITLOG_THIS(m_RootCompiler,
+                    (LL_INFO100000, "Native estimate for function size exceeds threshold"
+                                    " for inlining %g > %g (multiplier = %g)\n",
+                     (double)m_CalleeNativeSizeEstimate / SIZE_SCALE, (double)threshold / SIZE_SCALE, m_Multiplier));
+
+        // Fail the inline
+        if (m_IsPrejitRoot)
+        {
+            SetNever(InlineObservation::CALLEE_NOT_PROFITABLE_INLINE);
+        }
+        else
+        {
+            SetFailure(InlineObservation::CALLSITE_NOT_PROFITABLE_INLINE);
+        }
+    }
+    else
+    {
+        // Inline appears to be profitable
+        JITLOG_THIS(m_RootCompiler,
+                    (LL_INFO100000, "Native estimate for function size is within threshold"
+                                    " for inlining %g <= %g (multiplier = %g)\n",
+                     (double)m_CalleeNativeSizeEstimate / SIZE_SCALE, (double)threshold / SIZE_SCALE, m_Multiplier));
+
+        // Update candidacy
+        if (m_IsPrejitRoot)
+        {
+            SetCandidate(InlineObservation::CALLEE_IS_PROFITABLE_INLINE);
+        }
+        else
+        {
+            SetCandidate(InlineObservation::CALLSITE_IS_PROFITABLE_INLINE);
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// CodeSizeEstimate: estimated code size impact of the inline
+//
+// Return Value:
+//    Estimated code size impact, in bytes * 10
+//
+// Notes:
+//    Only meaningful for discretionary inlines (whether successful or
+//    not).  For always or force inlines the legacy policy doesn't
+//    estimate size impact.
+
+int LegacyPolicy::CodeSizeEstimate()
+{
+    if (m_StateMachine != nullptr)
+    {
+        // This is not something the LegacyPolicy explicitly computed,
+        // since it uses a blended evaluation model (mixing size and time
+        // together for overall profitability). But it's effecitvely an
+        // estimate of the size impact.
+        return (m_CalleeNativeSizeEstimate - m_CallsiteNativeSizeEstimate);
+    }
+    else
+    {
+        return 0;
+    }
+}
+
+//------------------------------------------------------------------------
+// NoteBool: handle a boolean observation with non-fatal impact
+//
+// Arguments:
+//    obs      - the current obsevation
+//    value    - the value of the observation
+
+void EnhancedLegacyPolicy::NoteBool(InlineObservation obs, bool value)
+{
+    switch (obs)
+    {
+        case InlineObservation::CALLEE_DOES_NOT_RETURN:
+            m_IsNoReturn      = value;
+            m_IsNoReturnKnown = true;
+            break;
+
+        default:
+            // Pass all other information to the legacy policy
+            LegacyPolicy::NoteBool(obs, value);
+            break;
+    }
+}
+
+//------------------------------------------------------------------------
+// NoteInt: handle an observed integer value
+//
+// Arguments:
+//    obs      - the current obsevation
+//    value    - the value being observed
+
+void EnhancedLegacyPolicy::NoteInt(InlineObservation obs, int value)
+{
+    switch (obs)
+    {
+        case InlineObservation::CALLEE_NUMBER_OF_BASIC_BLOCKS:
+        {
+            assert(value != 0);
+            assert(m_IsNoReturnKnown);
+
+            //
+            // Let's be conservative for now and reject inlining of "no return" methods only
+            // if the callee contains a single basic block. This covers most of the use cases
+            // (typical throw helpers simply do "throw new X();" and so they have a single block)
+            // without affecting more exotic cases (loops that do actual work for example) where
+            // failure to inline could negatively impact code quality.
+            //
+
+            unsigned basicBlockCount = static_cast<unsigned>(value);
+
+            if (m_IsNoReturn && (basicBlockCount == 1))
+            {
+                SetNever(InlineObservation::CALLEE_DOES_NOT_RETURN);
+            }
+            else
+            {
+                LegacyPolicy::NoteInt(obs, value);
+            }
+
+            break;
+        }
+
+        default:
+            // Pass all other information to the legacy policy
+            LegacyPolicy::NoteInt(obs, value);
+            break;
+    }
+}
+
+//------------------------------------------------------------------------
+// PropagateNeverToRuntime: determine if a never result should cause the
+// method to be marked as un-inlinable.
+
+bool EnhancedLegacyPolicy::PropagateNeverToRuntime() const
+{
+    //
+    // Do not propagate the "no return" observation. If we do this then future inlining
+    // attempts will fail immediately without marking the call node as "no return".
+    // This can have an adverse impact on caller's code quality as it may have to preserve
+    // registers across the call.
+    // TODO-Throughput: We should persist the "no return" information in the runtime
+    // so we don't need to re-analyze the inlinee all the time.
+    //
+
+    bool propagate = (m_Observation != InlineObservation::CALLEE_DOES_NOT_RETURN);
+
+    propagate &= LegacyPolicy::PropagateNeverToRuntime();
+
+    return propagate;
+}
+
+#ifdef DEBUG
+
+//------------------------------------------------------------------------
+// RandomPolicy: construct a new RandomPolicy
+//
+// Arguments:
+//    compiler -- compiler instance doing the inlining (root compiler)
+//    isPrejitRoot -- true if this compiler is prejitting the root method
+//    seed -- seed value for the random number generator
+
+RandomPolicy::RandomPolicy(Compiler* compiler, bool isPrejitRoot, unsigned seed)
+    : LegalPolicy(isPrejitRoot)
+    , m_RootCompiler(compiler)
+    , m_Random(nullptr)
+    , m_CodeSize(0)
+    , m_IsForceInline(false)
+    , m_IsForceInlineKnown(false)
+{
+    // If necessary, setup and seed the random state.
+    if (compiler->inlRNG == nullptr)
+    {
+        compiler->inlRNG = new (compiler, CMK_Inlining) CLRRandom();
+
+        unsigned hash = m_RootCompiler->info.compMethodHash();
+        assert(hash != 0);
+        assert(seed != 0);
+        int hashSeed = static_cast<int>(hash ^ seed);
+        compiler->inlRNG->Init(hashSeed);
+    }
+
+    m_Random = compiler->inlRNG;
+}
+
+//------------------------------------------------------------------------
+// NoteSuccess: handle finishing all the inlining checks successfully
+
+void RandomPolicy::NoteSuccess()
+{
+    assert(InlDecisionIsCandidate(m_Decision));
+    m_Decision = InlineDecision::SUCCESS;
+}
+
+//------------------------------------------------------------------------
+// NoteBool: handle a boolean observation with non-fatal impact
+//
+// Arguments:
+//    obs      - the current obsevation
+//    value    - the value of the observation
+void RandomPolicy::NoteBool(InlineObservation obs, bool value)
+{
+    // Check the impact
+    InlineImpact impact = InlGetImpact(obs);
+
+    // As a safeguard, all fatal impact must be
+    // reported via noteFatal.
+    assert(impact != InlineImpact::FATAL);
+
+    // Handle most information here
+    bool isInformation = (impact == InlineImpact::INFORMATION);
+    bool propagate     = !isInformation;
+
+    if (isInformation)
+    {
+        switch (obs)
+        {
+            case InlineObservation::CALLEE_IS_FORCE_INLINE:
+                // The RandomPolicy still honors force inlines.
+                //
+                // We may make the force-inline observation more than
+                // once.  All observations should agree.
+                assert(!m_IsForceInlineKnown || (m_IsForceInline == value));
+                m_IsForceInline      = value;
+                m_IsForceInlineKnown = true;
+                break;
+
+            case InlineObservation::CALLEE_HAS_SWITCH:
+            case InlineObservation::CALLEE_UNSUPPORTED_OPCODE:
+                // Pass these on, they should cause inlining to fail.
+                propagate = true;
+                break;
+
+            default:
+                // Ignore the remainder for now
+                break;
+        }
+    }
+
+    if (propagate)
+    {
+        NoteInternal(obs);
+    }
+}
+
+//------------------------------------------------------------------------
+// NoteInt: handle an observed integer value
+//
+// Arguments:
+//    obs      - the current obsevation
+//    value    - the value being observed
+
+void RandomPolicy::NoteInt(InlineObservation obs, int value)
+{
+    switch (obs)
+    {
+
+        case InlineObservation::CALLEE_IL_CODE_SIZE:
+        {
+            assert(m_IsForceInlineKnown);
+            assert(value != 0);
+            m_CodeSize = static_cast<unsigned>(value);
+
+            if (m_IsForceInline)
+            {
+                // Candidate based on force inline
+                SetCandidate(InlineObservation::CALLEE_IS_FORCE_INLINE);
+            }
+            else
+            {
+                // Candidate, pending profitability evaluation
+                SetCandidate(InlineObservation::CALLEE_IS_DISCRETIONARY_INLINE);
+            }
+
+            break;
+        }
+
+        default:
+            // Ignore all other information
+            break;
+    }
+}
+
+//------------------------------------------------------------------------
+// DetermineProfitability: determine if this inline is profitable
+//
+// Arguments:
+//    methodInfo -- method info for the callee
+//
+// Notes:
+//    The random policy makes random decisions about profitablity.
+//    Generally we aspire to inline differently, not necessarily to
+//    inline more.
+
+void RandomPolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo)
+{
+    assert(InlDecisionIsCandidate(m_Decision));
+    assert(m_Observation == InlineObservation::CALLEE_IS_DISCRETIONARY_INLINE);
+
+    // Budget check.
+    if (!m_IsPrejitRoot)
+    {
+        InlineStrategy* strategy   = m_RootCompiler->m_inlineStrategy;
+        bool            overBudget = strategy->BudgetCheck(m_CodeSize);
+        if (overBudget)
+        {
+            SetFailure(InlineObservation::CALLSITE_OVER_BUDGET);
+            return;
+        }
+    }
+
+    // Use a probability curve that roughly matches the observed
+    // behavior of the LegacyPolicy. That way we're inlining
+    // differently but not creating enormous methods.
+    //
+    // We vary a bit at the extremes. The RandomPolicy won't always
+    // inline the small methods (<= 16 IL bytes) and won't always
+    // reject the large methods (> 100 IL bytes).
+
+    unsigned threshold = 0;
+
+    if (m_CodeSize <= 16)
+    {
+        threshold = 75;
+    }
+    else if (m_CodeSize <= 30)
+    {
+        threshold = 50;
+    }
+    else if (m_CodeSize <= 40)
+    {
+        threshold = 40;
+    }
+    else if (m_CodeSize <= 50)
+    {
+        threshold = 30;
+    }
+    else if (m_CodeSize <= 75)
+    {
+        threshold = 20;
+    }
+    else if (m_CodeSize <= 100)
+    {
+        threshold = 10;
+    }
+    else if (m_CodeSize <= 200)
+    {
+        threshold = 5;
+    }
+    else
+    {
+        threshold = 1;
+    }
+
+    unsigned randomValue = m_Random->Next(1, 100);
+
+    // Reject if callee size is over the threshold
+    if (randomValue > threshold)
+    {
+        // Inline appears to be unprofitable
+        JITLOG_THIS(m_RootCompiler, (LL_INFO100000, "Random rejection (r=%d > t=%d)\n", randomValue, threshold));
+
+        // Fail the inline
+        if (m_IsPrejitRoot)
+        {
+            SetNever(InlineObservation::CALLEE_RANDOM_REJECT);
+        }
+        else
+        {
+            SetFailure(InlineObservation::CALLSITE_RANDOM_REJECT);
+        }
+    }
+    else
+    {
+        // Inline appears to be profitable
+        JITLOG_THIS(m_RootCompiler, (LL_INFO100000, "Random acceptance (r=%d <= t=%d)\n", randomValue, threshold));
+
+        // Update candidacy
+        if (m_IsPrejitRoot)
+        {
+            SetCandidate(InlineObservation::CALLEE_RANDOM_ACCEPT);
+        }
+        else
+        {
+            SetCandidate(InlineObservation::CALLSITE_RANDOM_ACCEPT);
+        }
+    }
+}
+
+#endif // DEBUG
+
+#ifdef _MSC_VER
+// Disable warning about new array member initialization behavior
+#pragma warning(disable : 4351)
+#endif
+
+//------------------------------------------------------------------------
+// DiscretionaryPolicy: construct a new DiscretionaryPolicy
+//
+// Arguments:
+//    compiler -- compiler instance doing the inlining (root compiler)
+//    isPrejitRoot -- true if this compiler is prejitting the root method
+
+// clang-format off
+DiscretionaryPolicy::DiscretionaryPolicy(Compiler* compiler, bool isPrejitRoot)
+    : LegacyPolicy(compiler, isPrejitRoot)
+    , m_Depth(0)
+    , m_BlockCount(0)
+    , m_Maxstack(0)
+    , m_ArgCount(0)
+    , m_ArgType()
+    , m_ArgSize()
+    , m_LocalCount(0)
+    , m_ReturnType(CORINFO_TYPE_UNDEF)
+    , m_ReturnSize(0)
+    , m_ArgAccessCount(0)
+    , m_LocalAccessCount(0)
+    , m_IntConstantCount(0)
+    , m_FloatConstantCount(0)
+    , m_IntLoadCount(0)
+    , m_FloatLoadCount(0)
+    , m_IntStoreCount(0)
+    , m_FloatStoreCount(0)
+    , m_SimpleMathCount(0)
+    , m_ComplexMathCount(0)
+    , m_OverflowMathCount(0)
+    , m_IntArrayLoadCount(0)
+    , m_FloatArrayLoadCount(0)
+    , m_RefArrayLoadCount(0)
+    , m_StructArrayLoadCount(0)
+    , m_IntArrayStoreCount(0)
+    , m_FloatArrayStoreCount(0)
+    , m_RefArrayStoreCount(0)
+    , m_StructArrayStoreCount(0)
+    , m_StructOperationCount(0)
+    , m_ObjectModelCount(0)
+    , m_FieldLoadCount(0)
+    , m_FieldStoreCount(0)
+    , m_StaticFieldLoadCount(0)
+    , m_StaticFieldStoreCount(0)
+    , m_LoadAddressCount(0)
+    , m_ThrowCount(0)
+    , m_ReturnCount(0)
+    , m_CallCount(0)
+    , m_CallSiteWeight(0)
+    , m_ModelCodeSizeEstimate(0)
+    , m_PerCallInstructionEstimate(0)
+    , m_IsClassCtor(false)
+    , m_IsSameThis(false)
+    , m_CallerHasNewArray(false)
+    , m_CallerHasNewObj(false)
+{
+    // Empty
+}
+// clang-format on
+
+//------------------------------------------------------------------------
+// NoteBool: handle an observed boolean value
+//
+// Arguments:
+//    obs      - the current obsevation
+//    value    - the value being observed
+
+void DiscretionaryPolicy::NoteBool(InlineObservation obs, bool value)
+{
+    switch (obs)
+    {
+        case InlineObservation::CALLEE_LOOKS_LIKE_WRAPPER:
+            m_LooksLikeWrapperMethod = value;
+            break;
+
+        case InlineObservation::CALLEE_ARG_FEEDS_CONSTANT_TEST:
+            assert(value);
+            m_ArgFeedsConstantTest++;
+            break;
+
+        case InlineObservation::CALLEE_ARG_FEEDS_RANGE_CHECK:
+            assert(value);
+            m_ArgFeedsRangeCheck++;
+            break;
+
+        case InlineObservation::CALLSITE_CONSTANT_ARG_FEEDS_TEST:
+            assert(value);
+            m_ConstantArgFeedsConstantTest++;
+            break;
+
+        case InlineObservation::CALLEE_IS_CLASS_CTOR:
+            m_IsClassCtor = value;
+            break;
+
+        case InlineObservation::CALLSITE_IS_SAME_THIS:
+            m_IsSameThis = value;
+            break;
+
+        case InlineObservation::CALLER_HAS_NEWARRAY:
+            m_CallerHasNewArray = value;
+            break;
+
+        case InlineObservation::CALLER_HAS_NEWOBJ:
+            m_CallerHasNewObj = value;
+            break;
+
+        default:
+            LegacyPolicy::NoteBool(obs, value);
+            break;
+    }
+}
+
+//------------------------------------------------------------------------
+// NoteInt: handle an observed integer value
+//
+// Arguments:
+//    obs      - the current obsevation
+//    value    - the value being observed
+
+void DiscretionaryPolicy::NoteInt(InlineObservation obs, int value)
+{
+    switch (obs)
+    {
+
+        case InlineObservation::CALLEE_IL_CODE_SIZE:
+            // Override how code size is handled
+            {
+                assert(m_IsForceInlineKnown);
+                assert(value != 0);
+                m_CodeSize = static_cast<unsigned>(value);
+
+                if (m_IsForceInline)
+                {
+                    // Candidate based on force inline
+                    SetCandidate(InlineObservation::CALLEE_IS_FORCE_INLINE);
+                }
+                else
+                {
+                    // Candidate, pending profitability evaluation
+                    SetCandidate(InlineObservation::CALLEE_IS_DISCRETIONARY_INLINE);
+                }
+
+                break;
+            }
+
+        case InlineObservation::CALLEE_OPCODE:
+        {
+            // This tries to do a rough binning of opcodes based
+            // on similarity of impact on codegen.
+            OPCODE opcode = static_cast<OPCODE>(value);
+            ComputeOpcodeBin(opcode);
+            LegacyPolicy::NoteInt(obs, value);
+            break;
+        }
+
+        case InlineObservation::CALLEE_MAXSTACK:
+            m_Maxstack = value;
+            break;
+
+        case InlineObservation::CALLEE_NUMBER_OF_BASIC_BLOCKS:
+            m_BlockCount = value;
+            break;
+
+        case InlineObservation::CALLSITE_DEPTH:
+            m_Depth = value;
+            break;
+
+        case InlineObservation::CALLSITE_WEIGHT:
+            m_CallSiteWeight = static_cast<unsigned>(value);
+            break;
+
+        default:
+            // Delegate remainder to the LegacyPolicy.
+            LegacyPolicy::NoteInt(obs, value);
+            break;
+    }
+}
+
+//------------------------------------------------------------------------
+// ComputeOpcodeBin: simple histogramming of opcodes based on presumably
+// similar codegen impact.
+//
+// Arguments:
+//    opcode - an MSIL opcode from the callee
+
+void DiscretionaryPolicy::ComputeOpcodeBin(OPCODE opcode)
+{
+    switch (opcode)
+    {
+        case CEE_LDARG_0:
+        case CEE_LDARG_1:
+        case CEE_LDARG_2:
+        case CEE_LDARG_3:
+        case CEE_LDARG_S:
+        case CEE_LDARG:
+        case CEE_STARG_S:
+        case CEE_STARG:
+            m_ArgAccessCount++;
+            break;
+
+        case CEE_LDLOC_0:
+        case CEE_LDLOC_1:
+        case CEE_LDLOC_2:
+        case CEE_LDLOC_3:
+        case CEE_LDLOC_S:
+        case CEE_STLOC_0:
+        case CEE_STLOC_1:
+        case CEE_STLOC_2:
+        case CEE_STLOC_3:
+        case CEE_STLOC_S:
+        case CEE_LDLOC:
+        case CEE_STLOC:
+            m_LocalAccessCount++;
+            break;
+
+        case CEE_LDNULL:
+        case CEE_LDC_I4_M1:
+        case CEE_LDC_I4_0:
+        case CEE_LDC_I4_1:
+        case CEE_LDC_I4_2:
+        case CEE_LDC_I4_3:
+        case CEE_LDC_I4_4:
+        case CEE_LDC_I4_5:
+        case CEE_LDC_I4_6:
+        case CEE_LDC_I4_7:
+        case CEE_LDC_I4_8:
+        case CEE_LDC_I4_S:
+            m_IntConstantCount++;
+            break;
+
+        case CEE_LDC_R4:
+        case CEE_LDC_R8:
+            m_FloatConstantCount++;
+            break;
+
+        case CEE_LDIND_I1:
+        case CEE_LDIND_U1:
+        case CEE_LDIND_I2:
+        case CEE_LDIND_U2:
+        case CEE_LDIND_I4:
+        case CEE_LDIND_U4:
+        case CEE_LDIND_I8:
+        case CEE_LDIND_I:
+            m_IntLoadCount++;
+            break;
+
+        case CEE_LDIND_R4:
+        case CEE_LDIND_R8:
+            m_FloatLoadCount++;
+            break;
+
+        case CEE_STIND_I1:
+        case CEE_STIND_I2:
+        case CEE_STIND_I4:
+        case CEE_STIND_I8:
+        case CEE_STIND_I:
+            m_IntStoreCount++;
+            break;
+
+        case CEE_STIND_R4:
+        case CEE_STIND_R8:
+            m_FloatStoreCount++;
+            break;
+
+        case CEE_SUB:
+        case CEE_AND:
+        case CEE_OR:
+        case CEE_XOR:
+        case CEE_SHL:
+        case CEE_SHR:
+        case CEE_SHR_UN:
+        case CEE_NEG:
+        case CEE_NOT:
+        case CEE_CONV_I1:
+        case CEE_CONV_I2:
+        case CEE_CONV_I4:
+        case CEE_CONV_I8:
+        case CEE_CONV_U4:
+        case CEE_CONV_U8:
+        case CEE_CONV_U2:
+        case CEE_CONV_U1:
+        case CEE_CONV_I:
+        case CEE_CONV_U:
+            m_SimpleMathCount++;
+            break;
+
+        case CEE_MUL:
+        case CEE_DIV:
+        case CEE_DIV_UN:
+        case CEE_REM:
+        case CEE_REM_UN:
+        case CEE_CONV_R4:
+        case CEE_CONV_R8:
+        case CEE_CONV_R_UN:
+            m_ComplexMathCount++;
+            break;
+
+        case CEE_CONV_OVF_I1_UN:
+        case CEE_CONV_OVF_I2_UN:
+        case CEE_CONV_OVF_I4_UN:
+        case CEE_CONV_OVF_I8_UN:
+        case CEE_CONV_OVF_U1_UN:
+        case CEE_CONV_OVF_U2_UN:
+        case CEE_CONV_OVF_U4_UN:
+        case CEE_CONV_OVF_U8_UN:
+        case CEE_CONV_OVF_I_UN:
+        case CEE_CONV_OVF_U_UN:
+        case CEE_CONV_OVF_I1:
+        case CEE_CONV_OVF_U1:
+        case CEE_CONV_OVF_I2:
+        case CEE_CONV_OVF_U2:
+        case CEE_CONV_OVF_I4:
+        case CEE_CONV_OVF_U4:
+        case CEE_CONV_OVF_I8:
+        case CEE_CONV_OVF_U8:
+        case CEE_ADD_OVF:
+        case CEE_ADD_OVF_UN:
+        case CEE_MUL_OVF:
+        case CEE_MUL_OVF_UN:
+        case CEE_SUB_OVF:
+        case CEE_SUB_OVF_UN:
+        case CEE_CKFINITE:
+            m_OverflowMathCount++;
+            break;
+
+        case CEE_LDELEM_I1:
+        case CEE_LDELEM_U1:
+        case CEE_LDELEM_I2:
+        case CEE_LDELEM_U2:
+        case CEE_LDELEM_I4:
+        case CEE_LDELEM_U4:
+        case CEE_LDELEM_I8:
+        case CEE_LDELEM_I:
+            m_IntArrayLoadCount++;
+            break;
+
+        case CEE_LDELEM_R4:
+        case CEE_LDELEM_R8:
+            m_FloatArrayLoadCount++;
+            break;
+
+        case CEE_LDELEM_REF:
+            m_RefArrayLoadCount++;
+            break;
+
+        case CEE_LDELEM:
+            m_StructArrayLoadCount++;
+            break;
+
+        case CEE_STELEM_I:
+        case CEE_STELEM_I1:
+        case CEE_STELEM_I2:
+        case CEE_STELEM_I4:
+        case CEE_STELEM_I8:
+            m_IntArrayStoreCount++;
+            break;
+
+        case CEE_STELEM_R4:
+        case CEE_STELEM_R8:
+            m_FloatArrayStoreCount++;
+            break;
+
+        case CEE_STELEM_REF:
+            m_RefArrayStoreCount++;
+            break;
+
+        case CEE_STELEM:
+            m_StructArrayStoreCount++;
+            break;
+
+        case CEE_CPOBJ:
+        case CEE_LDOBJ:
+        case CEE_CPBLK:
+        case CEE_INITBLK:
+        case CEE_STOBJ:
+            m_StructOperationCount++;
+            break;
+
+        case CEE_CASTCLASS:
+        case CEE_ISINST:
+        case CEE_UNBOX:
+        case CEE_BOX:
+        case CEE_UNBOX_ANY:
+        case CEE_LDFTN:
+        case CEE_LDVIRTFTN:
+        case CEE_SIZEOF:
+            m_ObjectModelCount++;
+            break;
+
+        case CEE_LDFLD:
+        case CEE_LDLEN:
+        case CEE_REFANYTYPE:
+        case CEE_REFANYVAL:
+            m_FieldLoadCount++;
+            break;
+
+        case CEE_STFLD:
+            m_FieldStoreCount++;
+            break;
+
+        case CEE_LDSFLD:
+            m_StaticFieldLoadCount++;
+            break;
+
+        case CEE_STSFLD:
+            m_StaticFieldStoreCount++;
+            break;
+
+        case CEE_LDELEMA:
+        case CEE_LDSFLDA:
+        case CEE_LDFLDA:
+        case CEE_LDSTR:
+        case CEE_LDARGA:
+        case CEE_LDLOCA:
+            m_LoadAddressCount++;
+            break;
+
+        case CEE_CALL:
+        case CEE_CALLI:
+        case CEE_CALLVIRT:
+        case CEE_NEWOBJ:
+        case CEE_NEWARR:
+        case CEE_JMP:
+            m_CallCount++;
+            break;
+
+        case CEE_THROW:
+        case CEE_RETHROW:
+            m_ThrowCount++;
+            break;
+
+        case CEE_RET:
+            m_ReturnCount++;
+
+        default:
+            break;
+    }
+}
+
+//------------------------------------------------------------------------
+// PropagateNeverToRuntime: determine if a never result should cause the
+// method to be marked as un-inlinable.
+
+bool DiscretionaryPolicy::PropagateNeverToRuntime() const
+{
+    // Propagate most failures, but don't propagate when the inline
+    // was viable but unprofitable.
+    bool propagate = (m_Observation != InlineObservation::CALLEE_NOT_PROFITABLE_INLINE);
+
+    return propagate;
+}
+
+//------------------------------------------------------------------------
+// DetermineProfitability: determine if this inline is profitable
+//
+// Arguments:
+//    methodInfo -- method info for the callee
+
+void DiscretionaryPolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo)
+{
+
+#if defined(DEBUG)
+
+    // Punt if we're inlining and we've reached the acceptance limit.
+    int      limit   = JitConfig.JitInlineLimit();
+    unsigned current = m_RootCompiler->m_inlineStrategy->GetInlineCount();
+
+    if (!m_IsPrejitRoot && (limit >= 0) && (current >= static_cast<unsigned>(limit)))
+    {
+        SetFailure(InlineObservation::CALLSITE_OVER_INLINE_LIMIT);
+        return;
+    }
+
+#endif // defined(DEBUG)
+
+    // Make additional observations based on the method info
+    MethodInfoObservations(methodInfo);
+
+    // Estimate the code size impact. This is just for model
+    // evaluation purposes -- we'll still use the legacy policy's
+    // model for actual inlining.
+    EstimateCodeSize();
+
+    // Estimate peformance impact. This is just for model
+    // evaluation purposes -- we'll still use the legacy policy's
+    // model for actual inlining.
+    EstimatePerformanceImpact();
+
+    // Delegate to LegacyPolicy for the rest
+    LegacyPolicy::DetermineProfitability(methodInfo);
+}
+
+//------------------------------------------------------------------------
+// MethodInfoObservations: make observations based on information from
+// the method info for the callee.
+//
+// Arguments:
+//    methodInfo -- method info for the callee
+
+void DiscretionaryPolicy::MethodInfoObservations(CORINFO_METHOD_INFO* methodInfo)
+{
+    CORINFO_SIG_INFO& locals = methodInfo->locals;
+    m_LocalCount             = locals.numArgs;
+
+    CORINFO_SIG_INFO& args     = methodInfo->args;
+    const unsigned    argCount = args.numArgs;
+    m_ArgCount                 = argCount;
+
+    const unsigned pointerSize = sizeof(void*);
+    unsigned       i           = 0;
+
+    // Implicit arguments
+
+    const bool hasThis = args.hasThis();
+
+    if (hasThis)
+    {
+        m_ArgType[i] = CORINFO_TYPE_CLASS;
+        m_ArgSize[i] = pointerSize;
+        i++;
+        m_ArgCount++;
+    }
+
+    const bool hasTypeArg = args.hasTypeArg();
+
+    if (hasTypeArg)
+    {
+        m_ArgType[i] = CORINFO_TYPE_NATIVEINT;
+        m_ArgSize[i] = pointerSize;
+        i++;
+        m_ArgCount++;
+    }
+
+    // Explicit arguments
+
+    unsigned                j             = 0;
+    CORINFO_ARG_LIST_HANDLE argListHandle = args.args;
+    COMP_HANDLE             comp          = m_RootCompiler->info.compCompHnd;
+
+    while ((i < MAX_ARGS) && (j < argCount))
+    {
+        CORINFO_CLASS_HANDLE classHandle;
+        CorInfoType          type = strip(comp->getArgType(&args, argListHandle, &classHandle));
+
+        m_ArgType[i] = type;
+
+        if (type == CORINFO_TYPE_VALUECLASS)
+        {
+            assert(classHandle != nullptr);
+            m_ArgSize[i] = roundUp(comp->getClassSize(classHandle), pointerSize);
+        }
+        else
+        {
+            m_ArgSize[i] = pointerSize;
+        }
+
+        argListHandle = comp->getArgNext(argListHandle);
+        i++;
+        j++;
+    }
+
+    while (i < MAX_ARGS)
+    {
+        m_ArgType[i] = CORINFO_TYPE_UNDEF;
+        m_ArgSize[i] = 0;
+        i++;
+    }
+
+    // Return Type
+
+    m_ReturnType = args.retType;
+
+    if (m_ReturnType == CORINFO_TYPE_VALUECLASS)
+    {
+        assert(args.retTypeClass != nullptr);
+        m_ReturnSize = roundUp(comp->getClassSize(args.retTypeClass), pointerSize);
+    }
+    else if (m_ReturnType == CORINFO_TYPE_VOID)
+    {
+        m_ReturnSize = 0;
+    }
+    else
+    {
+        m_ReturnSize = pointerSize;
+    }
+}
+
+//------------------------------------------------------------------------
+// EstimateCodeSize: produce (various) code size estimates based on
+// observations.
+//
+// The "Baseline" code size model used by the legacy policy is
+// effectively
+//
+//   0.100 * m_CalleeNativeSizeEstimate +
+//  -0.100 * m_CallsiteNativeSizeEstimate
+//
+// On the inlines in CoreCLR's mscorlib, release windows x64, this
+// yields scores of R=0.42, MSE=228, and MAE=7.25.
+//
+// This estimate can be improved slighly by refitting, resulting in
+//
+//  -1.451 +
+//   0.095 * m_CalleeNativeSizeEstimate +
+//  -0.104 * m_CallsiteNativeSizeEstimate
+//
+// With R=0.44, MSE=220, and MAE=6.93.
+
+void DiscretionaryPolicy::EstimateCodeSize()
+{
+    // Ensure we have this available.
+    m_CalleeNativeSizeEstimate = DetermineNativeSizeEstimate();
+
+    // Size estimate based on GLMNET model.
+    // R=0.55, MSE=177, MAE=6.59
+    //
+    // Suspect it doesn't handle factors properly...
+    // clang-format off
+    double sizeEstimate =
+        -13.532 +
+          0.359 * (int) m_CallsiteFrequency +
+         -0.015 * m_ArgCount +
+         -1.553 * m_ArgSize[5] +
+          2.326 * m_LocalCount +
+          0.287 * m_ReturnSize +
+          0.561 * m_IntConstantCount +
+          1.932 * m_FloatConstantCount +
+         -0.822 * m_SimpleMathCount +
+         -7.591 * m_IntArrayLoadCount +
+          4.784 * m_RefArrayLoadCount +
+         12.778 * m_StructArrayLoadCount +
+          1.452 * m_FieldLoadCount +
+          8.811 * m_StaticFieldLoadCount +
+          2.752 * m_StaticFieldStoreCount +
+         -6.566 * m_ThrowCount +
+          6.021 * m_CallCount +
+         -0.238 * m_IsInstanceCtor +
+         -5.357 * m_IsFromPromotableValueClass +
+         -7.901 * (m_ConstantArgFeedsConstantTest > 0 ? 1 : 0)  +
+          0.065 * m_CalleeNativeSizeEstimate;
+    // clang-format on
+
+    // Scaled up and reported as an integer value.
+    m_ModelCodeSizeEstimate = (int)(SIZE_SCALE * sizeEstimate);
+}
+
+//------------------------------------------------------------------------
+// EstimatePeformanceImpact: produce performance estimates based on
+// observations.
+//
+// Notes:
+//    Attempts to predict the per-call savings in instructions executed.
+//
+//    A negative value indicates the doing the inline will save instructions
+//    and likely time.
+
+void DiscretionaryPolicy::EstimatePerformanceImpact()
+{
+    // Performance estimate based on GLMNET model.
+    // R=0.24, RMSE=16.1, MAE=8.9.
+    // clang-format off
+    double perCallSavingsEstimate =
+        -7.35
+        + (m_CallsiteFrequency == InlineCallsiteFrequency::BORING ?  0.76 : 0)
+        + (m_CallsiteFrequency == InlineCallsiteFrequency::LOOP   ? -2.02 : 0)
+        + (m_ArgType[0] == CORINFO_TYPE_CLASS ?  3.51 : 0)
+        + (m_ArgType[3] == CORINFO_TYPE_BOOL  ? 20.7  : 0)
+        + (m_ArgType[4] == CORINFO_TYPE_CLASS ?  0.38 : 0)
+        + (m_ReturnType == CORINFO_TYPE_CLASS ?  2.32 : 0);
+    // clang-format on
+
+    // Scaled up and reported as an integer value.
+    m_PerCallInstructionEstimate = (int)(SIZE_SCALE * perCallSavingsEstimate);
+}
+
+//------------------------------------------------------------------------
+// CodeSizeEstimate: estimated code size impact of the inline
+//
+// Return Value:
+//    Estimated code size impact, in bytes * 10
+
+int DiscretionaryPolicy::CodeSizeEstimate()
+{
+    return m_ModelCodeSizeEstimate;
+}
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+//------------------------------------------------------------------------
+// DumpSchema: dump names for all the supporting data for the
+// inline decision in CSV format.
+//
+// Arguments:
+//    file -- file to write to
+
+void DiscretionaryPolicy::DumpSchema(FILE* file) const
+{
+    fprintf(file, ",ILSize");
+    fprintf(file, ",CallsiteFrequency");
+    fprintf(file, ",InstructionCount");
+    fprintf(file, ",LoadStoreCount");
+    fprintf(file, ",Depth");
+    fprintf(file, ",BlockCount");
+    fprintf(file, ",Maxstack");
+    fprintf(file, ",ArgCount");
+
+    for (unsigned i = 0; i < MAX_ARGS; i++)
+    {
+        fprintf(file, ",Arg%uType", i);
+    }
+
+    for (unsigned i = 0; i < MAX_ARGS; i++)
+    {
+        fprintf(file, ",Arg%uSize", i);
+    }
+
+    fprintf(file, ",LocalCount");
+    fprintf(file, ",ReturnType");
+    fprintf(file, ",ReturnSize");
+    fprintf(file, ",ArgAccessCount");
+    fprintf(file, ",LocalAccessCount");
+    fprintf(file, ",IntConstantCount");
+    fprintf(file, ",FloatConstantCount");
+    fprintf(file, ",IntLoadCount");
+    fprintf(file, ",FloatLoadCount");
+    fprintf(file, ",IntStoreCount");
+    fprintf(file, ",FloatStoreCount");
+    fprintf(file, ",SimpleMathCount");
+    fprintf(file, ",ComplexMathCount");
+    fprintf(file, ",OverflowMathCount");
+    fprintf(file, ",IntArrayLoadCount");
+    fprintf(file, ",FloatArrayLoadCount");
+    fprintf(file, ",RefArrayLoadCount");
+    fprintf(file, ",StructArrayLoadCount");
+    fprintf(file, ",IntArrayStoreCount");
+    fprintf(file, ",FloatArrayStoreCount");
+    fprintf(file, ",RefArrayStoreCount");
+    fprintf(file, ",StructArrayStoreCount");
+    fprintf(file, ",StructOperationCount");
+    fprintf(file, ",ObjectModelCount");
+    fprintf(file, ",FieldLoadCount");
+    fprintf(file, ",FieldStoreCount");
+    fprintf(file, ",StaticFieldLoadCount");
+    fprintf(file, ",StaticFieldStoreCount");
+    fprintf(file, ",LoadAddressCount");
+    fprintf(file, ",ThrowCount");
+    fprintf(file, ",ReturnCount");
+    fprintf(file, ",CallCount");
+    fprintf(file, ",CallSiteWeight");
+    fprintf(file, ",IsForceInline");
+    fprintf(file, ",IsInstanceCtor");
+    fprintf(file, ",IsFromPromotableValueClass");
+    fprintf(file, ",HasSimd");
+    fprintf(file, ",LooksLikeWrapperMethod");
+    fprintf(file, ",ArgFeedsConstantTest");
+    fprintf(file, ",IsMostlyLoadStore");
+    fprintf(file, ",ArgFeedsRangeCheck");
+    fprintf(file, ",ConstantArgFeedsConstantTest");
+    fprintf(file, ",CalleeNativeSizeEstimate");
+    fprintf(file, ",CallsiteNativeSizeEstimate");
+    fprintf(file, ",ModelCodeSizeEstimate");
+    fprintf(file, ",ModelPerCallInstructionEstimate");
+    fprintf(file, ",IsClassCtor");
+    fprintf(file, ",IsSameThis");
+    fprintf(file, ",CallerHasNewArray");
+    fprintf(file, ",CallerHasNewObj");
+}
+
+//------------------------------------------------------------------------
+// DumpData: dump all the supporting data for the inline decision
+// in CSV format.
+//
+// Arguments:
+//    file -- file to write to
+
+void DiscretionaryPolicy::DumpData(FILE* file) const
+{
+    fprintf(file, ",%u", m_CodeSize);
+    fprintf(file, ",%u", m_CallsiteFrequency);
+    fprintf(file, ",%u", m_InstructionCount);
+    fprintf(file, ",%u", m_LoadStoreCount);
+    fprintf(file, ",%u", m_Depth);
+    fprintf(file, ",%u", m_BlockCount);
+    fprintf(file, ",%u", m_Maxstack);
+    fprintf(file, ",%u", m_ArgCount);
+
+    for (unsigned i = 0; i < MAX_ARGS; i++)
+    {
+        fprintf(file, ",%u", m_ArgType[i]);
+    }
+
+    for (unsigned i = 0; i < MAX_ARGS; i++)
+    {
+        fprintf(file, ",%u", (unsigned)m_ArgSize[i]);
+    }
+
+    fprintf(file, ",%u", m_LocalCount);
+    fprintf(file, ",%u", m_ReturnType);
+    fprintf(file, ",%u", (unsigned)m_ReturnSize);
+    fprintf(file, ",%u", m_ArgAccessCount);
+    fprintf(file, ",%u", m_LocalAccessCount);
+    fprintf(file, ",%u", m_IntConstantCount);
+    fprintf(file, ",%u", m_FloatConstantCount);
+    fprintf(file, ",%u", m_IntLoadCount);
+    fprintf(file, ",%u", m_FloatLoadCount);
+    fprintf(file, ",%u", m_IntStoreCount);
+    fprintf(file, ",%u", m_FloatStoreCount);
+    fprintf(file, ",%u", m_SimpleMathCount);
+    fprintf(file, ",%u", m_ComplexMathCount);
+    fprintf(file, ",%u", m_OverflowMathCount);
+    fprintf(file, ",%u", m_IntArrayLoadCount);
+    fprintf(file, ",%u", m_FloatArrayLoadCount);
+    fprintf(file, ",%u", m_RefArrayLoadCount);
+    fprintf(file, ",%u", m_StructArrayLoadCount);
+    fprintf(file, ",%u", m_IntArrayStoreCount);
+    fprintf(file, ",%u", m_FloatArrayStoreCount);
+    fprintf(file, ",%u", m_RefArrayStoreCount);
+    fprintf(file, ",%u", m_StructArrayStoreCount);
+    fprintf(file, ",%u", m_StructOperationCount);
+    fprintf(file, ",%u", m_ObjectModelCount);
+    fprintf(file, ",%u", m_FieldLoadCount);
+    fprintf(file, ",%u", m_FieldStoreCount);
+    fprintf(file, ",%u", m_StaticFieldLoadCount);
+    fprintf(file, ",%u", m_StaticFieldStoreCount);
+    fprintf(file, ",%u", m_LoadAddressCount);
+    fprintf(file, ",%u", m_ReturnCount);
+    fprintf(file, ",%u", m_ThrowCount);
+    fprintf(file, ",%u", m_CallCount);
+    fprintf(file, ",%u", m_CallSiteWeight);
+    fprintf(file, ",%u", m_IsForceInline ? 1 : 0);
+    fprintf(file, ",%u", m_IsInstanceCtor ? 1 : 0);
+    fprintf(file, ",%u", m_IsFromPromotableValueClass ? 1 : 0);
+    fprintf(file, ",%u", m_HasSimd ? 1 : 0);
+    fprintf(file, ",%u", m_LooksLikeWrapperMethod ? 1 : 0);
+    fprintf(file, ",%u", m_ArgFeedsConstantTest);
+    fprintf(file, ",%u", m_MethodIsMostlyLoadStore ? 1 : 0);
+    fprintf(file, ",%u", m_ArgFeedsRangeCheck);
+    fprintf(file, ",%u", m_ConstantArgFeedsConstantTest);
+    fprintf(file, ",%d", m_CalleeNativeSizeEstimate);
+    fprintf(file, ",%d", m_CallsiteNativeSizeEstimate);
+    fprintf(file, ",%d", m_ModelCodeSizeEstimate);
+    fprintf(file, ",%d", m_PerCallInstructionEstimate);
+    fprintf(file, ",%u", m_IsClassCtor ? 1 : 0);
+    fprintf(file, ",%u", m_IsSameThis ? 1 : 0);
+    fprintf(file, ",%u", m_CallerHasNewArray ? 1 : 0);
+    fprintf(file, ",%u", m_CallerHasNewObj ? 1 : 0);
+}
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+//------------------------------------------------------------------------/
+// ModelPolicy: construct a new ModelPolicy
+//
+// Arguments:
+//    compiler -- compiler instance doing the inlining (root compiler)
+//    isPrejitRoot -- true if this compiler is prejitting the root method
+
+ModelPolicy::ModelPolicy(Compiler* compiler, bool isPrejitRoot) : DiscretionaryPolicy(compiler, isPrejitRoot)
+{
+    // Empty
+}
+
+//------------------------------------------------------------------------
+// NoteInt: handle an observed integer value
+//
+// Arguments:
+//    obs      - the current obsevation
+//    value    - the value being observed
+//
+// Notes:
+//    The ILSize threshold used here should be large enough that
+//    it does not generally influence inlining decisions -- it only
+//    helps to make them faster.
+//
+//    The value is determined as follows. We figure out the maximum
+//    possible code size estimate that will lead to an inline. This is
+//    found by determining the maximum possible inline benefit and
+//    working backwards.
+//
+//    In the current ModelPolicy, the maximum benefit is -28.1, which
+//    comes from a CallSiteWeight of 3 and a per call benefit of
+//    -9.37.  This implies that any candidate with code size larger
+//    than (28.1/0.2) will not pass the threshold. So maximum code
+//    size estimate (in bytes) for any inlinee is 140.55, and hence
+//    maximum estimate is 1405.
+//
+//    Since we are trying to short circuit early in the evaluation
+//    process we don't have the code size estimate in hand. We need to
+//    estimate the possible code size estimate based on something we
+//    know cheaply and early -- the ILSize. So we use quantile
+//    regression to project how ILSize predicts the model code size
+//    estimate. Note that ILSize does not currently directly enter
+//    into the model.
+//
+//    The median value for the model code size estimate based on
+//    ILSize is given by -107 + 12.6 * ILSize for the V9 data.  This
+//    means an ILSize of 120 is likely to lead to a size estimate of
+//    at least 1405 at least 50% of the time. So we choose this as the
+//    early rejection threshold.
+
+void ModelPolicy::NoteInt(InlineObservation obs, int value)
+{
+    // Let underlying policy do its thing.
+    DiscretionaryPolicy::NoteInt(obs, value);
+
+    // Fail fast for inlinees that are too large to ever inline.
+    // The value of 120 is model-dependent; see notes above.
+    if (!m_IsForceInline && (obs == InlineObservation::CALLEE_IL_CODE_SIZE) && (value >= 120))
+    {
+        // Callee too big, not a candidate
+        SetNever(InlineObservation::CALLEE_TOO_MUCH_IL);
+        return;
+    }
+
+    // Safeguard against overly deep inlines
+    if (obs == InlineObservation::CALLSITE_DEPTH)
+    {
+        unsigned depthLimit = m_RootCompiler->m_inlineStrategy->GetMaxInlineDepth();
+
+        if (m_Depth > depthLimit)
+        {
+            SetFailure(InlineObservation::CALLSITE_IS_TOO_DEEP);
+            return;
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// DetermineProfitability: determine if this inline is profitable
+//
+// Arguments:
+//    methodInfo -- method info for the callee
+//
+// Notes:
+//    There are currently two parameters that are ad-hoc: the
+//    per-call-site weight and the size/speed threshold. Ideally this
+//    policy would have just one tunable parameter, the threshold,
+//    which describes how willing we are to trade size for speed.
+
+void ModelPolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo)
+{
+    // Do some homework
+    MethodInfoObservations(methodInfo);
+    EstimateCodeSize();
+    EstimatePerformanceImpact();
+
+    // Preliminary inline model.
+    //
+    // If code size is estimated to increase, look at
+    // the profitability model for guidance.
+    //
+    // If code size will decrease, just inline.
+
+    if (m_ModelCodeSizeEstimate <= 0)
+    {
+        // Inline will likely decrease code size
+        JITLOG_THIS(m_RootCompiler, (LL_INFO100000, "Inline profitable, will decrease code size by %g bytes\n",
+                                     (double)-m_ModelCodeSizeEstimate / SIZE_SCALE));
+
+        if (m_IsPrejitRoot)
+        {
+            SetCandidate(InlineObservation::CALLEE_IS_SIZE_DECREASING_INLINE);
+        }
+        else
+        {
+            SetCandidate(InlineObservation::CALLSITE_IS_SIZE_DECREASING_INLINE);
+        }
+    }
+    else
+    {
+        // We estimate that this inline will increase code size.  Only
+        // inline if the performance win is sufficiently large to
+        // justify bigger code.
+
+        // First compute the number of instruction executions saved
+        // via inlining per call to the callee per byte of code size
+        // impact.
+        //
+        // The per call instruction estimate is negative if the inline
+        // will reduce instruction count. Flip the sign here to make
+        // positive be better and negative worse.
+        double perCallBenefit = -((double)m_PerCallInstructionEstimate / (double)m_ModelCodeSizeEstimate);
+
+        // Now estimate the local call frequency.
+        //
+        // Todo: use IBC data, or a better local profile estimate, or
+        // try and incorporate this into the model. For instance if we
+        // tried to predict the benefit per call to the root method
+        // then the model would have to incorporate the local call
+        // frequency, somehow.
+        double callSiteWeight = 1.0;
+
+        switch (m_CallsiteFrequency)
+        {
+            case InlineCallsiteFrequency::RARE:
+                callSiteWeight = 0.1;
+                break;
+            case InlineCallsiteFrequency::BORING:
+                callSiteWeight = 1.0;
+                break;
+            case InlineCallsiteFrequency::WARM:
+                callSiteWeight = 1.5;
+                break;
+            case InlineCallsiteFrequency::LOOP:
+            case InlineCallsiteFrequency::HOT:
+                callSiteWeight = 3.0;
+                break;
+            default:
+                assert(false);
+                break;
+        }
+
+        // Determine the estimated number of instructions saved per
+        // call to the root method per byte of code size impact. This
+        // is our benefit figure of merit.
+        double benefit = callSiteWeight * perCallBenefit;
+
+        // Compare this to the threshold, and inline if greater.
+        //
+        // The threshold is interpretable as a size/speed tradeoff:
+        // the value of 0.2 below indicates we'll allow inlines that
+        // grow code by as many as 5 bytes to save 1 instruction
+        // execution (per call to the root method).
+        double threshold    = 0.20;
+        bool   shouldInline = (benefit > threshold);
+
+        JITLOG_THIS(m_RootCompiler,
+                    (LL_INFO100000, "Inline %s profitable: benefit=%g (weight=%g, percall=%g, size=%g)\n",
+                     shouldInline ? "is" : "is not", benefit, callSiteWeight,
+                     (double)m_PerCallInstructionEstimate / SIZE_SCALE, (double)m_ModelCodeSizeEstimate / SIZE_SCALE));
+
+        if (!shouldInline)
+        {
+            // Fail the inline
+            if (m_IsPrejitRoot)
+            {
+                SetNever(InlineObservation::CALLEE_NOT_PROFITABLE_INLINE);
+            }
+            else
+            {
+                SetFailure(InlineObservation::CALLSITE_NOT_PROFITABLE_INLINE);
+            }
+        }
+        else
+        {
+            // Update candidacy
+            if (m_IsPrejitRoot)
+            {
+                SetCandidate(InlineObservation::CALLEE_IS_PROFITABLE_INLINE);
+            }
+            else
+            {
+                SetCandidate(InlineObservation::CALLSITE_IS_PROFITABLE_INLINE);
+            }
+        }
+    }
+}
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+//------------------------------------------------------------------------/
+// FullPolicy: construct a new FullPolicy
+//
+// Arguments:
+//    compiler -- compiler instance doing the inlining (root compiler)
+//    isPrejitRoot -- true if this compiler is prejitting the root method
+
+FullPolicy::FullPolicy(Compiler* compiler, bool isPrejitRoot) : DiscretionaryPolicy(compiler, isPrejitRoot)
+{
+    // Empty
+}
+
+//------------------------------------------------------------------------
+// DetermineProfitability: determine if this inline is profitable
+//
+// Arguments:
+//    methodInfo -- method info for the callee
+
+void FullPolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo)
+{
+    // Check depth
+
+    unsigned depthLimit = m_RootCompiler->m_inlineStrategy->GetMaxInlineDepth();
+
+    if (m_Depth > depthLimit)
+    {
+        SetFailure(InlineObservation::CALLSITE_IS_TOO_DEEP);
+        return;
+    }
+
+    // Check size
+
+    unsigned sizeLimit = m_RootCompiler->m_inlineStrategy->GetMaxInlineILSize();
+
+    if (m_CodeSize > sizeLimit)
+    {
+        SetFailure(InlineObservation::CALLEE_TOO_MUCH_IL);
+        return;
+    }
+
+    // Otherwise, we're good to go
+
+    if (m_IsPrejitRoot)
+    {
+        SetCandidate(InlineObservation::CALLEE_IS_PROFITABLE_INLINE);
+    }
+    else
+    {
+        SetCandidate(InlineObservation::CALLSITE_IS_PROFITABLE_INLINE);
+    }
+
+    return;
+}
+
+//------------------------------------------------------------------------/
+// SizePolicy: construct a new SizePolicy
+//
+// Arguments:
+//    compiler -- compiler instance doing the inlining (root compiler)
+//    isPrejitRoot -- true if this compiler is prejitting the root method
+
+SizePolicy::SizePolicy(Compiler* compiler, bool isPrejitRoot) : DiscretionaryPolicy(compiler, isPrejitRoot)
+{
+    // Empty
+}
+
+//------------------------------------------------------------------------
+// DetermineProfitability: determine if this inline is profitable
+//
+// Arguments:
+//    methodInfo -- method info for the callee
+
+void SizePolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo)
+{
+    // Do some homework
+    MethodInfoObservations(methodInfo);
+    EstimateCodeSize();
+
+    // Does this inline increase the estimated size beyond
+    // the original size estimate?
+    const InlineStrategy* strategy    = m_RootCompiler->m_inlineStrategy;
+    const int             initialSize = strategy->GetInitialSizeEstimate();
+    const int             currentSize = strategy->GetCurrentSizeEstimate();
+    const int             newSize     = currentSize + m_ModelCodeSizeEstimate;
+
+    if (newSize <= initialSize)
+    {
+        // Estimated size impact is acceptable, so inline here.
+        JITLOG_THIS(m_RootCompiler,
+                    (LL_INFO100000, "Inline profitable, root size estimate %d is less than initial size %d\n",
+                     newSize / SIZE_SCALE, initialSize / SIZE_SCALE));
+
+        if (m_IsPrejitRoot)
+        {
+            SetCandidate(InlineObservation::CALLEE_IS_SIZE_DECREASING_INLINE);
+        }
+        else
+        {
+            SetCandidate(InlineObservation::CALLSITE_IS_SIZE_DECREASING_INLINE);
+        }
+    }
+    else
+    {
+        // Estimated size increase is too large, so no inline here.
+        //
+        // Note that we ought to reconsider this inline if we make
+        // room in the budget by inlining a bunch of size decreasing
+        // inlines after this one. But for now, we won't do this.
+        if (m_IsPrejitRoot)
+        {
+            SetNever(InlineObservation::CALLEE_NOT_PROFITABLE_INLINE);
+        }
+        else
+        {
+            SetFailure(InlineObservation::CALLSITE_NOT_PROFITABLE_INLINE);
+        }
+    }
+
+    return;
+}
+
+// Statics to track emission of the replay banner
+// and provide file access to the inline xml
+
+bool          ReplayPolicy::s_WroteReplayBanner = false;
+FILE*         ReplayPolicy::s_ReplayFile        = nullptr;
+CritSecObject ReplayPolicy::s_XmlReaderLock;
+
+//------------------------------------------------------------------------/
+// ReplayPolicy: construct a new ReplayPolicy
+//
+// Arguments:
+//    compiler -- compiler instance doing the inlining (root compiler)
+//    isPrejitRoot -- true if this compiler is prejitting the root method
+
+ReplayPolicy::ReplayPolicy(Compiler* compiler, bool isPrejitRoot)
+    : DiscretionaryPolicy(compiler, isPrejitRoot)
+    , m_InlineContext(nullptr)
+    , m_Offset(BAD_IL_OFFSET)
+    , m_WasForceInline(false)
+{
+    // Is there a log file open already? If so, we can use it.
+    if (s_ReplayFile == nullptr)
+    {
+        // Did we already try and open and fail?
+        if (!s_WroteReplayBanner)
+        {
+            // Nope, open it up.
+            const wchar_t* replayFileName = JitConfig.JitInlineReplayFile();
+            s_ReplayFile                  = _wfopen(replayFileName, W("r"));
+
+            // Display banner to stderr, unless we're dumping inline Xml,
+            // in which case the policy name is captured in the Xml.
+            if (JitConfig.JitInlineDumpXml() == 0)
+            {
+                fprintf(stderr, "*** %s inlines from %ws\n", s_ReplayFile == nullptr ? "Unable to replay" : "Replaying",
+                        replayFileName);
+            }
+
+            s_WroteReplayBanner = true;
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// ReplayPolicy: Finalize reading of inline Xml
+//
+// Notes:
+//    Called during jitShutdown()
+
+void ReplayPolicy::FinalizeXml()
+{
+    if (s_ReplayFile != nullptr)
+    {
+        fclose(s_ReplayFile);
+        s_ReplayFile = nullptr;
+    }
+}
+
+//------------------------------------------------------------------------
+// FindMethod: find the root method in the inline Xml
+//
+// ReturnValue:
+//    true if found. File position left pointing just after the
+//    <Token> entry for the method.
+
+bool ReplayPolicy::FindMethod()
+{
+    if (s_ReplayFile == nullptr)
+    {
+        return false;
+    }
+
+    // See if we've already found this method.
+    InlineStrategy* inlineStrategy = m_RootCompiler->m_inlineStrategy;
+    long            filePosition   = inlineStrategy->GetMethodXmlFilePosition();
+
+    if (filePosition == -1)
+    {
+        // Past lookup failed
+        return false;
+    }
+    else if (filePosition > 0)
+    {
+        // Past lookup succeeded, jump there
+        fseek(s_ReplayFile, filePosition, SEEK_SET);
+        return true;
+    }
+
+    // Else, scan the file. Might be nice to build an index
+    // or something, someday.
+    const mdMethodDef methodToken =
+        m_RootCompiler->info.compCompHnd->getMethodDefFromMethod(m_RootCompiler->info.compMethodHnd);
+    const unsigned methodHash = m_RootCompiler->info.compMethodHash();
+
+    bool foundMethod = false;
+    char buffer[256];
+    fseek(s_ReplayFile, 0, SEEK_SET);
+
+    while (!foundMethod)
+    {
+        // Get next line
+        if (fgets(buffer, sizeof(buffer), s_ReplayFile) == nullptr)
+        {
+            break;
+        }
+
+        // Look for next method entry
+        if (strstr(buffer, "<Method>") == nullptr)
+        {
+            continue;
+        }
+
+        // Get next line
+        if (fgets(buffer, sizeof(buffer), s_ReplayFile) == nullptr)
+        {
+            break;
+        }
+
+        // See if token matches
+        unsigned token = 0;
+        int      count = sscanf(buffer, " <Token>%u</Token> ", &token);
+        if ((count != 1) || (token != methodToken))
+        {
+            continue;
+        }
+
+        // Get next line
+        if (fgets(buffer, sizeof(buffer), s_ReplayFile) == nullptr)
+        {
+            break;
+        }
+
+        // See if hash matches
+        unsigned hash = 0;
+        count         = sscanf(buffer, " <Hash>%u</Hash> ", &hash);
+        if ((count != 1) || (hash != methodHash))
+        {
+            continue;
+        }
+
+        // Found a match...
+        foundMethod = true;
+        break;
+    }
+
+    // Update file position cache for this method
+    long foundPosition = -1;
+
+    if (foundMethod)
+    {
+        foundPosition = ftell(s_ReplayFile);
+    }
+
+    inlineStrategy->SetMethodXmlFilePosition(foundPosition);
+
+    return foundMethod;
+}
+
+//------------------------------------------------------------------------
+// FindContext: find an inline context in the inline Xml
+//
+// Notes:
+//    Assumes file position within the relevant method has just been
+//    set by a successful call to FindMethod().
+//
+// Arguments:
+//    context -- context of interest
+//
+// ReturnValue:
+//    true if found. File position left pointing just after the
+//    <Token> entry for the context.
+
+bool ReplayPolicy::FindContext(InlineContext* context)
+{
+    // Make sure we've found the parent context.
+    if (context->IsRoot())
+    {
+        // We've already found the method context so we're good.
+        return true;
+    }
+
+    bool foundParent = FindContext(context->GetParent());
+
+    if (!foundParent)
+    {
+        return false;
+    }
+
+    // File pointer should be pointing at the parent context level.
+    // See if we see an inline entry for this context.
+    //
+    // Token and Hash we're looking for.
+    mdMethodDef contextToken  = m_RootCompiler->info.compCompHnd->getMethodDefFromMethod(context->GetCallee());
+    unsigned    contextHash   = m_RootCompiler->info.compCompHnd->getMethodHash(context->GetCallee());
+    unsigned    contextOffset = (unsigned)context->GetOffset();
+
+    return FindInline(contextToken, contextHash, contextOffset);
+}
+
+//------------------------------------------------------------------------
+// FindInline: find entry for the current inline in inline Xml.
+//
+// Arguments:
+//    token -- token describing the inline
+//    hash  -- hash describing the inline
+//    offset -- IL offset of the call site in the parent method
+//
+// ReturnValue:
+//    true if the inline entry was found
+//
+// Notes:
+//    Assumes file position has just been set by a successful call to
+//    FindMethod or FindContext.
+//
+//    Token and Hash will not be sufficiently unique to identify a
+//    particular inline, if there are multiple calls to the same
+//    method.
+
+bool ReplayPolicy::FindInline(unsigned token, unsigned hash, unsigned offset)
+{
+    char buffer[256];
+    bool foundInline = false;
+    int  depth       = 0;
+
+    while (!foundInline)
+    {
+        // Get next line
+        if (fgets(buffer, sizeof(buffer), s_ReplayFile) == nullptr)
+        {
+            break;
+        }
+
+        // If we hit </Method> we've gone too far,
+        // and the XML is messed up.
+        if (strstr(buffer, "</Method>") != nullptr)
+        {
+            break;
+        }
+
+        // Look for <Inlines />....
+        if (strstr(buffer, "<Inlines />") != nullptr)
+        {
+            if (depth == 0)
+            {
+                // Exited depth 1, failed to find the context
+                break;
+            }
+            else
+            {
+                // Exited nested, keep looking
+                continue;
+            }
+        }
+
+        // Look for <Inlines>....
+        if (strstr(buffer, "<Inlines>") != nullptr)
+        {
+            depth++;
+            continue;
+        }
+
+        // If we hit </Inlines> we've exited a nested entry
+        // or the current entry.
+        if (strstr(buffer, "</Inlines>") != nullptr)
+        {
+            depth--;
+
+            if (depth == 0)
+            {
+                // Exited depth 1, failed to find the context
+                break;
+            }
+            else
+            {
+                // Exited nested, keep looking
+                continue;
+            }
+        }
+
+        // Look for start of inline section at the right depth
+        if ((depth != 1) || (strstr(buffer, "<Inline>") == nullptr))
+        {
+            continue;
+        }
+
+        // Get next line
+        if (fgets(buffer, sizeof(buffer), s_ReplayFile) == nullptr)
+        {
+            break;
+        }
+
+        // Match token
+        unsigned inlineToken = 0;
+        int      count       = sscanf(buffer, " <Token>%u</Token> ", &inlineToken);
+
+        if ((count != 1) || (inlineToken != token))
+        {
+            continue;
+        }
+
+        // Get next line
+        if (fgets(buffer, sizeof(buffer), s_ReplayFile) == nullptr)
+        {
+            break;
+        }
+
+        // Match hash
+        unsigned inlineHash = 0;
+        count               = sscanf(buffer, " <Hash>%u</Hash> ", &inlineHash);
+
+        if ((count != 1) || (inlineHash != hash))
+        {
+            continue;
+        }
+
+        // Get next line
+        if (fgets(buffer, sizeof(buffer), s_ReplayFile) == nullptr)
+        {
+            break;
+        }
+
+        // Match offset
+        unsigned inlineOffset = 0;
+        count                 = sscanf(buffer, " <Offset>%u</Offset> ", &inlineOffset);
+        if ((count != 1) || (inlineOffset != offset))
+        {
+            continue;
+        }
+
+        // Token,Hash,Offset may still not be unique enough, but it's
+        // all we have right now.
+
+        // We're good!
+        foundInline = true;
+
+        // Check for a data collection marker. This does not affect
+        // matching...
+
+        // Get next line
+        if (fgets(buffer, sizeof(buffer), s_ReplayFile) != nullptr)
+        {
+            unsigned collectData = 0;
+            count                = sscanf(buffer, " <CollectData>%u</CollectData> ", &collectData);
+
+            if (count == 1)
+            {
+                m_IsDataCollectionTarget = (collectData == 1);
+            }
+        }
+
+        break;
+    }
+
+    return foundInline;
+}
+
+//------------------------------------------------------------------------
+// FindInline: find entry for a particular callee in inline Xml.
+//
+// Arguments:
+//    callee -- handle for the callee method
+//
+// ReturnValue:
+//    true if the inline should be performed.
+//
+// Notes:
+//    Assumes file position has just been set by a successful call to
+//    FindContext(...);
+//
+//    callee handle will not be sufficiently unique to identify a
+//    particular inline, if there are multiple calls to the same
+//    method.
+
+bool ReplayPolicy::FindInline(CORINFO_METHOD_HANDLE callee)
+{
+    // Token and Hash we're looking for
+    mdMethodDef calleeToken = m_RootCompiler->info.compCompHnd->getMethodDefFromMethod(callee);
+    unsigned    calleeHash  = m_RootCompiler->info.compCompHnd->getMethodHash(callee);
+
+    // Abstract this or just pass through raw bits
+    // See matching code in xml writer
+    int offset = -1;
+    if (m_Offset != BAD_IL_OFFSET)
+    {
+        offset = (int)jitGetILoffs(m_Offset);
+    }
+
+    unsigned calleeOffset = (unsigned)offset;
+
+    bool foundInline = FindInline(calleeToken, calleeHash, calleeOffset);
+
+    return foundInline;
+}
+
+//------------------------------------------------------------------------
+// NoteBool: handle an observed boolean value
+//
+// Arguments:
+//    obs      - the current obsevation
+//    value    - the value being observed
+//
+// Notes:
+//    Overrides parent so Replay can control force inlines.
+
+void ReplayPolicy::NoteBool(InlineObservation obs, bool value)
+{
+    // When inlining, let log override force inline.
+    // Make a note of the actual value for later reporting during observations.
+    if (!m_IsPrejitRoot && (obs == InlineObservation::CALLEE_IS_FORCE_INLINE))
+    {
+        m_WasForceInline = value;
+        value            = false;
+    }
+
+    DiscretionaryPolicy::NoteBool(obs, value);
+}
+
+//------------------------------------------------------------------------
+// DetermineProfitability: determine if this inline is profitable
+//
+// Arguments:
+//    methodInfo -- method info for the callee
+
+void ReplayPolicy::DetermineProfitability(CORINFO_METHOD_INFO* methodInfo)
+{
+    // TODO: handle prejit root case....need to record this in the
+    // root method XML.
+    if (m_IsPrejitRoot)
+    {
+        // Fall back to discretionary policy for now.
+        return DiscretionaryPolicy::DetermineProfitability(methodInfo);
+    }
+
+    // If we're also dumping inline data, make additional observations
+    // based on the method info, and estimate code size and perf
+    // impact, so that the reports have the necessary data.
+    if (JitConfig.JitInlineDumpData() != 0)
+    {
+        MethodInfoObservations(methodInfo);
+        EstimateCodeSize();
+        EstimatePerformanceImpact();
+        m_IsForceInline = m_WasForceInline;
+    }
+
+    // Try and find this candiate in the Xml.
+    // If we fail to find it, then don't inline.
+    bool accept = false;
+
+    // Grab the reader lock, since we'll be manipulating
+    // the file pointer as we look for the relevant inline xml.
+    {
+        CritSecHolder readerLock(s_XmlReaderLock);
+
+        // First, locate the entries for the root method.
+        bool foundMethod = FindMethod();
+
+        if (foundMethod && (m_InlineContext != nullptr))
+        {
+            // Next, navigate the context tree to find the entries
+            // for the context that contains this candidate.
+            bool foundContext = FindContext(m_InlineContext);
+
+            if (foundContext)
+            {
+                // Finally, find this candidate within its context
+                CORINFO_METHOD_HANDLE calleeHandle = methodInfo->ftn;
+                accept                             = FindInline(calleeHandle);
+            }
+        }
+    }
+
+    if (accept)
+    {
+        JITLOG_THIS(m_RootCompiler, (LL_INFO100000, "Inline accepted via log replay"));
+
+        if (m_IsPrejitRoot)
+        {
+            SetCandidate(InlineObservation::CALLEE_LOG_REPLAY_ACCEPT);
+        }
+        else
+        {
+            SetCandidate(InlineObservation::CALLSITE_LOG_REPLAY_ACCEPT);
+        }
+    }
+    else
+    {
+        JITLOG_THIS(m_RootCompiler, (LL_INFO100000, "Inline rejected via log replay"));
+
+        if (m_IsPrejitRoot)
+        {
+            SetNever(InlineObservation::CALLEE_LOG_REPLAY_REJECT);
+        }
+        else
+        {
+            SetFailure(InlineObservation::CALLSITE_LOG_REPLAY_REJECT);
+        }
+    }
+
+    return;
+}
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
diff --git a/src/jit/inlinepolicy.h b/src/jit/inlinepolicy.h
new file mode 100644
index 0000000000..62031c86a0
--- /dev/null
+++ b/src/jit/inlinepolicy.h
@@ -0,0 +1,479 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// Inlining Policies
+//
+// This file contains class definitions for various inlining
+// policies used by the jit.
+//
+// -- CLASSES --
+//
+// LegalPolicy          - partial class providing common legality checks
+// LegacyPolicy         - policy that provides legacy inline behavior
+// EnhancedLegacyPolicy - legacy variant with some enhancements
+// DiscretionaryPolicy  - legacy variant with uniform size policy
+// ModelPolicy          - policy based on statistical modelling
+//
+// These experimental policies are available only in
+// DEBUG or release+INLINE_DATA builds of the jit.
+//
+// RandomPolicy         - randomized inlining
+// FullPolicy           - inlines everything up to size and depth limits
+// SizePolicy           - tries not to increase method sizes
+//
+// The default policy in use is the EnhancedLegacyPolicy.
+
+#ifndef _INLINE_POLICY_H_
+#define _INLINE_POLICY_H_
+
+#include "jit.h"
+#include "inline.h"
+
+// LegalPolicy is a partial policy that encapsulates the common
+// legality and ability checks the inliner must make.
+//
+// Generally speaking, the legal policy expects the inlining attempt
+// to fail fast when a fatal or equivalent observation is made. So
+// once an observation causes failure, no more observations are
+// expected. However for the prejit scan case (where the jit is not
+// actually inlining, but is assessing a method's general
+// inlinability) the legal policy allows multiple failing
+// observations provided they have the same impact. Only the first
+// observation that puts the policy into a failing state is
+// remembered. Transitions from failing states to candidate or success
+// states are not allowed.
+
+class LegalPolicy : public InlinePolicy
+{
+
+public:
+    // Constructor
+    LegalPolicy(bool isPrejitRoot) : InlinePolicy(isPrejitRoot)
+    {
+        // empty
+    }
+
+    // Handle an observation that must cause inlining to fail.
+    void NoteFatal(InlineObservation obs) override;
+
+protected:
+    // Helper methods
+    void NoteInternal(InlineObservation obs);
+    void SetCandidate(InlineObservation obs);
+    void SetFailure(InlineObservation obs);
+    void SetNever(InlineObservation obs);
+};
+
+// Forward declaration for the state machine class used by the
+// LegacyPolicy
+
+class CodeSeqSM;
+
+// LegacyPolicy implements the inlining policy used by the jit in its
+// initial release.
+
+class LegacyPolicy : public LegalPolicy
+{
+public:
+    // Construct a LegacyPolicy
+    LegacyPolicy(Compiler* compiler, bool isPrejitRoot)
+        : LegalPolicy(isPrejitRoot)
+        , m_RootCompiler(compiler)
+        , m_StateMachine(nullptr)
+        , m_Multiplier(0.0)
+        , m_CodeSize(0)
+        , m_CallsiteFrequency(InlineCallsiteFrequency::UNUSED)
+        , m_InstructionCount(0)
+        , m_LoadStoreCount(0)
+        , m_ArgFeedsConstantTest(0)
+        , m_ArgFeedsRangeCheck(0)
+        , m_ConstantArgFeedsConstantTest(0)
+        , m_CalleeNativeSizeEstimate(0)
+        , m_CallsiteNativeSizeEstimate(0)
+        , m_IsForceInline(false)
+        , m_IsForceInlineKnown(false)
+        , m_IsInstanceCtor(false)
+        , m_IsFromPromotableValueClass(false)
+        , m_HasSimd(false)
+        , m_LooksLikeWrapperMethod(false)
+        , m_MethodIsMostlyLoadStore(false)
+    {
+        // empty
+    }
+
+    // Policy observations
+    void NoteSuccess() override;
+    void NoteBool(InlineObservation obs, bool value) override;
+    void NoteInt(InlineObservation obs, int value) override;
+
+    // Policy determinations
+    void DetermineProfitability(CORINFO_METHOD_INFO* methodInfo) override;
+
+    // Policy policies
+    bool PropagateNeverToRuntime() const override
+    {
+        return true;
+    }
+    bool IsLegacyPolicy() const override
+    {
+        return true;
+    }
+
+    // Policy estimates
+    int CodeSizeEstimate() override;
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+    const char* GetName() const override
+    {
+        return "LegacyPolicy";
+    }
+
+#endif // (DEBUG) || defined(INLINE_DATA)
+
+protected:
+    // Constants
+    enum
+    {
+        MAX_BASIC_BLOCKS = 5,
+        SIZE_SCALE       = 10
+    };
+
+    // Helper methods
+    double DetermineMultiplier();
+    int    DetermineNativeSizeEstimate();
+    int DetermineCallsiteNativeSizeEstimate(CORINFO_METHOD_INFO* methodInfo);
+
+    // Data members
+    Compiler*               m_RootCompiler; // root compiler instance
+    CodeSeqSM*              m_StateMachine;
+    double                  m_Multiplier;
+    unsigned                m_CodeSize;
+    InlineCallsiteFrequency m_CallsiteFrequency;
+    unsigned                m_InstructionCount;
+    unsigned                m_LoadStoreCount;
+    unsigned                m_ArgFeedsConstantTest;
+    unsigned                m_ArgFeedsRangeCheck;
+    unsigned                m_ConstantArgFeedsConstantTest;
+    int                     m_CalleeNativeSizeEstimate;
+    int                     m_CallsiteNativeSizeEstimate;
+    bool                    m_IsForceInline : 1;
+    bool                    m_IsForceInlineKnown : 1;
+    bool                    m_IsInstanceCtor : 1;
+    bool                    m_IsFromPromotableValueClass : 1;
+    bool                    m_HasSimd : 1;
+    bool                    m_LooksLikeWrapperMethod : 1;
+    bool                    m_MethodIsMostlyLoadStore : 1;
+};
+
+// EnhancedLegacyPolicy extends the legacy policy by rejecting
+// inlining of methods that never return because they throw.
+
+class EnhancedLegacyPolicy : public LegacyPolicy
+{
+public:
+    EnhancedLegacyPolicy(Compiler* compiler, bool isPrejitRoot)
+        : LegacyPolicy(compiler, isPrejitRoot), m_IsNoReturn(false), m_IsNoReturnKnown(false)
+    {
+        // empty
+    }
+
+    // Policy observations
+    void NoteBool(InlineObservation obs, bool value) override;
+    void NoteInt(InlineObservation obs, int value) override;
+
+    // Policy policies
+    bool PropagateNeverToRuntime() const override;
+    bool IsLegacyPolicy() const override
+    {
+        return false;
+    }
+
+protected:
+    // Data members
+    bool m_IsNoReturn : 1;
+    bool m_IsNoReturnKnown : 1;
+};
+
+#ifdef DEBUG
+
+// RandomPolicy implements a policy that inlines at random.
+// It is mostly useful for stress testing.
+
+class RandomPolicy : public LegalPolicy
+{
+public:
+    // Construct a RandomPolicy
+    RandomPolicy(Compiler* compiler, bool isPrejitRoot, unsigned seed);
+
+    // Policy observations
+    void NoteSuccess() override;
+    void NoteBool(InlineObservation obs, bool value) override;
+    void NoteInt(InlineObservation obs, int value) override;
+
+    // Policy determinations
+    void DetermineProfitability(CORINFO_METHOD_INFO* methodInfo) override;
+
+    // Policy policies
+    bool PropagateNeverToRuntime() const override
+    {
+        return true;
+    }
+    bool IsLegacyPolicy() const override
+    {
+        return false;
+    }
+
+    // Policy estimates
+    int CodeSizeEstimate() override
+    {
+        return 0;
+    }
+
+    const char* GetName() const override
+    {
+        return "RandomPolicy";
+    }
+
+private:
+    // Data members
+    Compiler*  m_RootCompiler;
+    CLRRandom* m_Random;
+    unsigned   m_CodeSize;
+    bool       m_IsForceInline : 1;
+    bool       m_IsForceInlineKnown : 1;
+};
+
+#endif // DEBUG
+
+// DiscretionaryPolicy is a variant of the legacy policy.  It differs
+// in that there is no ALWAYS_INLINE class, there is no IL size limit,
+// it does not try and maintain legacy compatabilty, and in prejit mode,
+// discretionary failures do not set the "NEVER" inline bit.
+//
+// It is useful for gathering data about inline costs.
+
+class DiscretionaryPolicy : public LegacyPolicy
+{
+public:
+    // Construct a DiscretionaryPolicy
+    DiscretionaryPolicy(Compiler* compiler, bool isPrejitRoot);
+
+    // Policy observations
+    void NoteBool(InlineObservation obs, bool value) override;
+    void NoteInt(InlineObservation obs, int value) override;
+
+    // Policy policies
+    bool PropagateNeverToRuntime() const override;
+    bool IsLegacyPolicy() const override
+    {
+        return false;
+    }
+
+    // Policy determinations
+    void DetermineProfitability(CORINFO_METHOD_INFO* methodInfo) override;
+
+    // Policy estimates
+    int CodeSizeEstimate() override;
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+    // Externalize data
+    void DumpData(FILE* file) const override;
+    void DumpSchema(FILE* file) const override;
+
+    // Miscellaneous
+    const char* GetName() const override
+    {
+        return "DiscretionaryPolicy";
+    }
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+protected:
+    void ComputeOpcodeBin(OPCODE opcode);
+    void EstimateCodeSize();
+    void EstimatePerformanceImpact();
+    void MethodInfoObservations(CORINFO_METHOD_INFO* methodInfo);
+    enum
+    {
+        MAX_ARGS = 6
+    };
+
+    unsigned    m_Depth;
+    unsigned    m_BlockCount;
+    unsigned    m_Maxstack;
+    unsigned    m_ArgCount;
+    CorInfoType m_ArgType[MAX_ARGS];
+    size_t      m_ArgSize[MAX_ARGS];
+    unsigned    m_LocalCount;
+    CorInfoType m_ReturnType;
+    size_t      m_ReturnSize;
+    unsigned    m_ArgAccessCount;
+    unsigned    m_LocalAccessCount;
+    unsigned    m_IntConstantCount;
+    unsigned    m_FloatConstantCount;
+    unsigned    m_IntLoadCount;
+    unsigned    m_FloatLoadCount;
+    unsigned    m_IntStoreCount;
+    unsigned    m_FloatStoreCount;
+    unsigned    m_SimpleMathCount;
+    unsigned    m_ComplexMathCount;
+    unsigned    m_OverflowMathCount;
+    unsigned    m_IntArrayLoadCount;
+    unsigned    m_FloatArrayLoadCount;
+    unsigned    m_RefArrayLoadCount;
+    unsigned    m_StructArrayLoadCount;
+    unsigned    m_IntArrayStoreCount;
+    unsigned    m_FloatArrayStoreCount;
+    unsigned    m_RefArrayStoreCount;
+    unsigned    m_StructArrayStoreCount;
+    unsigned    m_StructOperationCount;
+    unsigned    m_ObjectModelCount;
+    unsigned    m_FieldLoadCount;
+    unsigned    m_FieldStoreCount;
+    unsigned    m_StaticFieldLoadCount;
+    unsigned    m_StaticFieldStoreCount;
+    unsigned    m_LoadAddressCount;
+    unsigned    m_ThrowCount;
+    unsigned    m_ReturnCount;
+    unsigned    m_CallCount;
+    unsigned    m_CallSiteWeight;
+    int         m_ModelCodeSizeEstimate;
+    int         m_PerCallInstructionEstimate;
+    bool        m_IsClassCtor;
+    bool        m_IsSameThis;
+    bool        m_CallerHasNewArray;
+    bool        m_CallerHasNewObj;
+};
+
+// ModelPolicy is an experimental policy that uses the results
+// of data modelling to make estimates.
+
+class ModelPolicy : public DiscretionaryPolicy
+{
+public:
+    // Construct a ModelPolicy
+    ModelPolicy(Compiler* compiler, bool isPrejitRoot);
+
+    // Policy observations
+    void NoteInt(InlineObservation obs, int value) override;
+
+    // Policy determinations
+    void DetermineProfitability(CORINFO_METHOD_INFO* methodInfo) override;
+
+    // Policy policies
+    bool PropagateNeverToRuntime() const override
+    {
+        return true;
+    }
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+    // Miscellaneous
+    const char* GetName() const override
+    {
+        return "ModelPolicy";
+    }
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+};
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+// FullPolicy is an experimental policy that will always inline if
+// possible, subject to externally settable depth and size limits.
+//
+// It's useful for unconvering the full set of possible inlines for
+// methods.
+
+class FullPolicy : public DiscretionaryPolicy
+{
+public:
+    // Construct a FullPolicy
+    FullPolicy(Compiler* compiler, bool isPrejitRoot);
+
+    // Policy determinations
+    void DetermineProfitability(CORINFO_METHOD_INFO* methodInfo) override;
+
+    // Miscellaneous
+    const char* GetName() const override
+    {
+        return "FullPolicy";
+    }
+};
+
+// SizePolicy is an experimental policy that will inline as much
+// as possible without increasing the (estimated) method size.
+//
+// It may be useful down the road as a policy to use for methods
+// that are rarely executed (eg class constructors).
+
+class SizePolicy : public DiscretionaryPolicy
+{
+public:
+    // Construct a SizePolicy
+    SizePolicy(Compiler* compiler, bool isPrejitRoot);
+
+    // Policy determinations
+    void DetermineProfitability(CORINFO_METHOD_INFO* methodInfo) override;
+
+    // Miscellaneous
+    const char* GetName() const override
+    {
+        return "SizePolicy";
+    }
+};
+
+// The ReplayPolicy performs only inlines specified by an external
+// inline replay log.
+
+class ReplayPolicy : public DiscretionaryPolicy
+{
+public:
+    // Construct a ReplayPolicy
+    ReplayPolicy(Compiler* compiler, bool isPrejitRoot);
+
+    // Policy observations
+    void NoteBool(InlineObservation obs, bool value) override;
+
+    // Optional observations
+    void NoteContext(InlineContext* context) override
+    {
+        m_InlineContext = context;
+    }
+
+    void NoteOffset(IL_OFFSETX offset) override
+    {
+        m_Offset = offset;
+    }
+
+    // Policy determinations
+    void DetermineProfitability(CORINFO_METHOD_INFO* methodInfo) override;
+
+    // Miscellaneous
+    const char* GetName() const override
+    {
+        return "ReplayPolicy";
+    }
+
+    static void FinalizeXml();
+
+private:
+    bool FindMethod();
+    bool FindContext(InlineContext* context);
+    bool FindInline(CORINFO_METHOD_HANDLE callee);
+    bool FindInline(unsigned token, unsigned hash, unsigned offset);
+
+    static bool          s_WroteReplayBanner;
+    static FILE*         s_ReplayFile;
+    static CritSecObject s_XmlReaderLock;
+    InlineContext*       m_InlineContext;
+    IL_OFFSETX           m_Offset;
+    bool                 m_WasForceInline;
+};
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+#endif // _INLINE_POLICY_H_
diff --git a/src/jit/instr.cpp b/src/jit/instr.cpp
new file mode 100644
index 0000000000..d516e0dea4
--- /dev/null
+++ b/src/jit/instr.cpp
@@ -0,0 +1,4086 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                           Instruction                                     XX
+XX                                                                           XX
+XX          The interface to generate a machine-instruction.                 XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "codegen.h"
+#include "instr.h"
+#include "emit.h"
+
+/*****************************************************************************/
+#ifdef DEBUG
+
+/*****************************************************************************
+ *
+ *  Returns the string representation of the given CPU instruction.
+ */
+
+const char* CodeGen::genInsName(instruction ins)
+{
+    // clang-format off
+    static
+    const char * const insNames[] =
+    {
+#if defined(_TARGET_XARCH_)
+        #define INST0(id, nm, fp, um, rf, wf, mr                 ) nm,
+        #define INST1(id, nm, fp, um, rf, wf, mr                 ) nm,
+        #define INST2(id, nm, fp, um, rf, wf, mr, mi             ) nm,
+        #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm         ) nm,
+        #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4     ) nm,
+        #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr ) nm,
+        #include "instrs.h"
+
+#elif defined(_TARGET_ARM_)
+        #define INST1(id, nm, fp, ldst, fmt, e1                                 ) nm,
+        #define INST2(id, nm, fp, ldst, fmt, e1, e2                             ) nm,
+        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                         ) nm,
+        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                     ) nm,
+        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                 ) nm,
+        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6             ) nm,
+        #define INST8(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8     ) nm,
+        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9 ) nm,
+        #include "instrs.h"
+
+#elif defined(_TARGET_ARM64_)
+        #define INST1(id, nm, fp, ldst, fmt, e1                                 ) nm,
+        #define INST2(id, nm, fp, ldst, fmt, e1, e2                             ) nm,
+        #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                         ) nm,
+        #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                     ) nm,
+        #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                 ) nm,
+        #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6             ) nm,
+        #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9 ) nm,
+        #include "instrs.h"
+
+#else
+#error "Unknown _TARGET_"
+#endif
+    };
+    // clang-format on
+
+    assert((unsigned)ins < sizeof(insNames) / sizeof(insNames[0]));
+    assert(insNames[ins] != nullptr);
+
+    return insNames[ins];
+}
+
+void __cdecl CodeGen::instDisp(instruction ins, bool noNL, const char* fmt, ...)
+{
+    if (compiler->opts.dspCode)
+    {
+        /* Display the instruction offset within the emit block */
+
+        //      printf("[%08X:%04X]", getEmitter().emitCodeCurBlock(), getEmitter().emitCodeOffsInBlock());
+
+        /* Display the FP stack depth (before the instruction is executed) */
+
+        //      printf("[FP=%02u] ", genGetFPstkLevel());
+
+        /* Display the instruction mnemonic */
+        printf("        ");
+
+        printf("            %-8s", genInsName(ins));
+
+        if (fmt)
+        {
+            va_list args;
+            va_start(args, fmt);
+            vprintf(fmt, args);
+            va_end(args);
+        }
+
+        if (!noNL)
+        {
+            printf("\n");
+        }
+    }
+}
+
+/*****************************************************************************/
+#endif // DEBUG
+/*****************************************************************************/
+
+void CodeGen::instInit()
+{
+}
+
+/*****************************************************************************
+ *
+ *  Return the size string (e.g. "word ptr") appropriate for the given size.
+ */
+
+#ifdef DEBUG
+
+const char* CodeGen::genSizeStr(emitAttr attr)
+{
+    // clang-format off
+    static
+    const char * const sizes[] =
+    {
+        "",
+        "byte  ptr ",
+        "word  ptr ",
+        nullptr,
+        "dword ptr ",
+        nullptr,
+        nullptr,
+        nullptr,
+        "qword ptr ",
+        nullptr,
+        nullptr,
+        nullptr,
+        nullptr,
+        nullptr,
+        nullptr,
+        nullptr,
+        "xmmword ptr ",
+        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
+        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
+        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
+        nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
+        "ymmword ptr"
+    };
+    // clang-format on
+
+    unsigned size = EA_SIZE(attr);
+
+    assert(size == 0 || size == 1 || size == 2 || size == 4 || size == 8 || size == 16 || size == 32);
+
+    if (EA_ATTR(size) == attr)
+    {
+        return sizes[size];
+    }
+    else if (attr == EA_GCREF)
+    {
+        return "gword ptr ";
+    }
+    else if (attr == EA_BYREF)
+    {
+        return "bword ptr ";
+    }
+    else if (EA_IS_DSP_RELOC(attr))
+    {
+        return "rword ptr ";
+    }
+    else
+    {
+        assert(!"Unexpected");
+        return "unknw ptr ";
+    }
+}
+
+#endif
+
+/*****************************************************************************
+ *
+ *  Generate an instruction.
+ */
+
+void CodeGen::instGen(instruction ins)
+{
+
+    getEmitter()->emitIns(ins);
+
+#ifdef _TARGET_XARCH_
+    // A workaround necessitated by limitations of emitter
+    // if we are scheduled to insert a nop here, we have to delay it
+    // hopefully we have not missed any other prefix instructions or places
+    // they could be inserted
+    if (ins == INS_lock && getEmitter()->emitNextNop == 0)
+    {
+        getEmitter()->emitNextNop = 1;
+    }
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Returns non-zero if the given CPU instruction is a floating-point ins.
+ */
+
+// static inline
+bool CodeGenInterface::instIsFP(instruction ins)
+{
+    assert((unsigned)ins < sizeof(instInfo) / sizeof(instInfo[0]));
+
+    return (instInfo[ins] & INST_FP) != 0;
+}
+
+#ifdef _TARGET_XARCH_
+/*****************************************************************************
+ *
+ *  Generate a multi-byte NOP instruction.
+ */
+
+void CodeGen::instNop(unsigned size)
+{
+    assert(size <= 15);
+    getEmitter()->emitIns_Nop(size);
+}
+#endif
+
+/*****************************************************************************
+ *
+ *  Generate a jump instruction.
+ */
+
+void CodeGen::inst_JMP(emitJumpKind jmp, BasicBlock* tgtBlock)
+{
+#if !FEATURE_FIXED_OUT_ARGS
+    // On the x86 we are pushing (and changing the stack level), but on x64 and other archs we have
+    // a fixed outgoing args area that we store into and we never change the stack level when calling methods.
+    //
+    // Thus only on x86 do we need to assert that the stack level at the target block matches the current stack level.
+    //
+    assert(tgtBlock->bbTgtStkDepth * sizeof(int) == genStackLevel || compiler->rpFrameType != FT_ESP_FRAME);
+#endif
+
+    getEmitter()->emitIns_J(emitter::emitJumpKindToIns(jmp), tgtBlock);
+}
+
+/*****************************************************************************
+ *
+ *  Generate a set instruction.
+ */
+
+void CodeGen::inst_SET(emitJumpKind condition, regNumber reg)
+{
+#ifdef _TARGET_XARCH_
+    instruction ins;
+
+    /* Convert the condition to an instruction opcode */
+
+    switch (condition)
+    {
+        case EJ_js:
+            ins = INS_sets;
+            break;
+        case EJ_jns:
+            ins = INS_setns;
+            break;
+        case EJ_je:
+            ins = INS_sete;
+            break;
+        case EJ_jne:
+            ins = INS_setne;
+            break;
+
+        case EJ_jl:
+            ins = INS_setl;
+            break;
+        case EJ_jle:
+            ins = INS_setle;
+            break;
+        case EJ_jge:
+            ins = INS_setge;
+            break;
+        case EJ_jg:
+            ins = INS_setg;
+            break;
+
+        case EJ_jb:
+            ins = INS_setb;
+            break;
+        case EJ_jbe:
+            ins = INS_setbe;
+            break;
+        case EJ_jae:
+            ins = INS_setae;
+            break;
+        case EJ_ja:
+            ins = INS_seta;
+            break;
+
+        case EJ_jpe:
+            ins = INS_setpe;
+            break;
+        case EJ_jpo:
+            ins = INS_setpo;
+            break;
+
+        default:
+            NO_WAY("unexpected condition type");
+            return;
+    }
+
+    assert(genRegMask(reg) & RBM_BYTE_REGS);
+
+    // These instructions only write the low byte of 'reg'
+    getEmitter()->emitIns_R(ins, EA_1BYTE, reg);
+#elif defined(_TARGET_ARM64_)
+    insCond cond;
+    /* Convert the condition to an insCond value */
+    switch (condition)
+    {
+        case EJ_eq:
+            cond = INS_COND_EQ;
+            break;
+        case EJ_ne:
+            cond = INS_COND_NE;
+            break;
+        case EJ_hs:
+            cond = INS_COND_HS;
+            break;
+        case EJ_lo:
+            cond = INS_COND_LO;
+            break;
+
+        case EJ_mi:
+            cond = INS_COND_MI;
+            break;
+        case EJ_pl:
+            cond = INS_COND_PL;
+            break;
+        case EJ_vs:
+            cond = INS_COND_VS;
+            break;
+        case EJ_vc:
+            cond = INS_COND_VC;
+            break;
+
+        case EJ_hi:
+            cond = INS_COND_HI;
+            break;
+        case EJ_ls:
+            cond = INS_COND_LS;
+            break;
+        case EJ_ge:
+            cond = INS_COND_GE;
+            break;
+        case EJ_lt:
+            cond = INS_COND_LT;
+            break;
+
+        case EJ_gt:
+            cond = INS_COND_GT;
+            break;
+        case EJ_le:
+            cond = INS_COND_LE;
+            break;
+
+        default:
+            NO_WAY("unexpected condition type");
+            return;
+    }
+    getEmitter()->emitIns_R_COND(INS_cset, EA_8BYTE, reg, cond);
+#else
+    NYI("inst_SET");
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Generate a "op reg" instruction.
+ */
+
+void CodeGen::inst_RV(instruction ins, regNumber reg, var_types type, emitAttr size)
+{
+    if (size == EA_UNKNOWN)
+    {
+        size = emitActualTypeSize(type);
+    }
+
+    getEmitter()->emitIns_R(ins, size, reg);
+}
+
+/*****************************************************************************
+ *
+ *  Generate a "op reg1, reg2" instruction.
+ */
+
+void CodeGen::inst_RV_RV(instruction ins,
+                         regNumber   reg1,
+                         regNumber   reg2,
+                         var_types   type,
+                         emitAttr    size,
+                         insFlags    flags /* = INS_FLAGS_DONT_CARE */)
+{
+    if (size == EA_UNKNOWN)
+    {
+        size = emitActualTypeSize(type);
+    }
+
+#ifdef _TARGET_ARM_
+    getEmitter()->emitIns_R_R(ins, size, reg1, reg2, flags);
+#else
+    getEmitter()->emitIns_R_R(ins, size, reg1, reg2);
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Generate a "op reg1, reg2, reg3" instruction.
+ */
+
+void CodeGen::inst_RV_RV_RV(instruction ins,
+                            regNumber   reg1,
+                            regNumber   reg2,
+                            regNumber   reg3,
+                            emitAttr    size,
+                            insFlags    flags /* = INS_FLAGS_DONT_CARE */)
+{
+#ifdef _TARGET_ARM_
+    getEmitter()->emitIns_R_R_R(ins, size, reg1, reg2, reg3, flags);
+#elif defined(_TARGET_XARCH_) && defined(FEATURE_AVX_SUPPORT)
+    getEmitter()->emitIns_R_R_R(ins, size, reg1, reg2, reg3);
+#else
+    NYI("inst_RV_RV_RV");
+#endif
+}
+/*****************************************************************************
+ *
+ *  Generate a "op icon" instruction.
+ */
+
+void CodeGen::inst_IV(instruction ins, int val)
+{
+    getEmitter()->emitIns_I(ins, EA_PTRSIZE, val);
+}
+
+/*****************************************************************************
+ *
+ *  Generate a "op icon" instruction where icon is a handle of type specified
+ *  by 'flags'
+ */
+
+void CodeGen::inst_IV_handle(instruction ins, int val)
+{
+    getEmitter()->emitIns_I(ins, EA_HANDLE_CNS_RELOC, val);
+}
+
+#if FEATURE_STACK_FP_X87
+/*****************************************************************************
+ *
+ *  Generate a "op ST(n), ST(0)" instruction.
+ */
+
+void CodeGen::inst_FS(instruction ins, unsigned stk)
+{
+    assert(stk < 8);
+
+#ifdef DEBUG
+
+    switch (ins)
+    {
+        case INS_fcompp:
+            assert(stk == 1);
+            break; // Implicit operand of compp is ST(1)
+        case INS_fld:
+        case INS_fxch:
+            assert(!"don't do this. Do you want to use inst_FN() instead?");
+            break;
+        default:
+            break;
+    }
+
+#endif
+
+    getEmitter()->emitIns_F_F0(ins, stk);
+}
+
+/*****************************************************************************
+ *
+ *  Generate a "op ST(0), ST(n)" instruction
+ */
+
+void CodeGenInterface::inst_FN(instruction ins, unsigned stk)
+{
+    assert(stk < 8);
+
+#ifdef DEBUG
+
+    switch (ins)
+    {
+        case INS_fst:
+        case INS_fstp:
+        case INS_faddp:
+        case INS_fsubp:
+        case INS_fsubrp:
+        case INS_fmulp:
+        case INS_fdivp:
+        case INS_fdivrp:
+        case INS_fcompp:
+            assert(!"don't do this. Do you want to use inst_FS() instead?");
+            break;
+        default:
+            break;
+    }
+
+#endif // DEBUG
+
+    getEmitter()->emitIns_F0_F(ins, stk);
+}
+#endif // FEATURE_STACK_FP_X87
+
+/*****************************************************************************
+ *
+ *  Display a stack frame reference.
+ */
+
+void CodeGen::inst_set_SV_var(GenTreePtr tree)
+{
+#ifdef DEBUG
+    assert(tree && (tree->gtOper == GT_LCL_VAR || tree->gtOper == GT_LCL_VAR_ADDR || tree->gtOper == GT_STORE_LCL_VAR));
+    assert(tree->gtLclVarCommon.gtLclNum < compiler->lvaCount);
+
+    getEmitter()->emitVarRefOffs = tree->gtLclVar.gtLclILoffs;
+
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ *  Generate a "op reg, icon" instruction.
+ */
+
+void CodeGen::inst_RV_IV(
+    instruction ins, regNumber reg, ssize_t val, emitAttr size, insFlags flags /* = INS_FLAGS_DONT_CARE */)
+{
+#if !defined(_TARGET_64BIT_)
+    assert(size != EA_8BYTE);
+#endif
+
+#ifdef _TARGET_ARM_
+    if (arm_Valid_Imm_For_Instr(ins, val, flags))
+    {
+        getEmitter()->emitIns_R_I(ins, size, reg, val, flags);
+    }
+    else if (ins == INS_mov)
+    {
+        instGen_Set_Reg_To_Imm(size, reg, val);
+    }
+    else
+    {
+#ifndef LEGACY_BACKEND
+        // TODO-Cleanup: Add a comment about why this is unreached() for RyuJIT backend.
+        unreached();
+#else  // LEGACY_BACKEND
+        regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
+        instGen_Set_Reg_To_Imm(size, tmpReg, val);
+        getEmitter()->emitIns_R_R(ins, size, reg, tmpReg, flags);
+#endif // LEGACY_BACKEND
+    }
+#elif defined(_TARGET_ARM64_)
+    // TODO-Arm64-Bug: handle large constants!
+    // Probably need something like the ARM case above: if (arm_Valid_Imm_For_Instr(ins, val)) ...
+    assert(ins != INS_cmp);
+    assert(ins != INS_tst);
+    assert(ins != INS_mov);
+    getEmitter()->emitIns_R_R_I(ins, size, reg, reg, val);
+#else // !_TARGET_ARM_
+#ifdef _TARGET_AMD64_
+    // Instead of an 8-byte immediate load, a 4-byte immediate will do fine
+    // as the high 4 bytes will be zero anyway.
+    if (size == EA_8BYTE && ins == INS_mov && ((val & 0xFFFFFFFF00000000LL) == 0))
+    {
+        size = EA_4BYTE;
+        getEmitter()->emitIns_R_I(ins, size, reg, val);
+    }
+    else if (EA_SIZE(size) == EA_8BYTE && ins != INS_mov && (((int)val != val) || EA_IS_CNS_RELOC(size)))
+    {
+#ifndef LEGACY_BACKEND
+        assert(!"Invalid immediate for inst_RV_IV");
+#else  // LEGACY_BACKEND
+        // We can't fit the immediate into this instruction, so move it into
+        // a register first
+        regNumber tmpReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
+        instGen_Set_Reg_To_Imm(size, tmpReg, val);
+
+        // We might have to switch back from 3-operand imul to two operand form
+        if (instrIs3opImul(ins))
+        {
+            assert(getEmitter()->inst3opImulReg(ins) == reg);
+            ins = INS_imul;
+        }
+        getEmitter()->emitIns_R_R(ins, EA_TYPE(size), reg, tmpReg);
+#endif // LEGACY_BACKEND
+    }
+    else
+#endif // _TARGET_AMD64_
+    {
+        getEmitter()->emitIns_R_I(ins, size, reg, val);
+    }
+#endif // !_TARGET_ARM_
+}
+
+#if defined(LEGACY_BACKEND)
+/*****************************************************************************
+ *  Figure out the operands to address the tree.
+ *  'addr' can be one of (1) a pointer to be indirected
+ *                       (2) a calculation to be done with LEA_AVAILABLE
+ *                       (3) GT_ARR_ELEM
+ *
+ *  On return, *baseReg, *indScale, *indReg, and *cns are set.
+ */
+
+void CodeGen::instGetAddrMode(GenTreePtr addr, regNumber* baseReg, unsigned* indScale, regNumber* indReg, unsigned* cns)
+{
+    if (addr->gtOper == GT_ARR_ELEM)
+    {
+        /* For GT_ARR_ELEM, the addressibility registers are marked on
+           gtArrObj and gtArrInds[0] */
+
+        assert(addr->gtArrElem.gtArrObj->gtFlags & GTF_REG_VAL);
+        *baseReg = addr->gtArrElem.gtArrObj->gtRegNum;
+
+        assert(addr->gtArrElem.gtArrInds[0]->gtFlags & GTF_REG_VAL);
+        *indReg = addr->gtArrElem.gtArrInds[0]->gtRegNum;
+
+        if (jitIsScaleIndexMul(addr->gtArrElem.gtArrElemSize))
+            *indScale = addr->gtArrElem.gtArrElemSize;
+        else
+            *indScale = 0;
+
+        *cns = compiler->eeGetMDArrayDataOffset(addr->gtArrElem.gtArrElemType, addr->gtArrElem.gtArrRank);
+    }
+    else if (addr->gtOper == GT_LEA)
+    {
+        GenTreeAddrMode* lea  = addr->AsAddrMode();
+        GenTreePtr       base = lea->Base();
+        assert(!base || (base->gtFlags & GTF_REG_VAL));
+        GenTreePtr index = lea->Index();
+        assert(!index || (index->gtFlags & GTF_REG_VAL));
+
+        *baseReg  = base ? base->gtRegNum : REG_NA;
+        *indReg   = index ? index->gtRegNum : REG_NA;
+        *indScale = lea->gtScale;
+        *cns      = lea->gtOffset;
+        return;
+    }
+    else
+    {
+        /* Figure out what complex address mode to use */
+
+        GenTreePtr rv1 = NULL;
+        GenTreePtr rv2 = NULL;
+        bool       rev = false;
+
+        INDEBUG(bool yes =)
+        genCreateAddrMode(addr, -1, true, RBM_NONE, &rev, &rv1, &rv2,
+#if SCALED_ADDR_MODES
+                          indScale,
+#endif
+                          cns);
+
+        assert(yes); // // since we have called genMakeAddressable() on addr
+        // Ensure that the base and index, if used, are in registers.
+        if (rv1 && ((rv1->gtFlags & GTF_REG_VAL) == 0))
+        {
+            if (rv1->gtFlags & GTF_SPILLED)
+            {
+                genRecoverReg(rv1, RBM_ALLINT, RegSet::KEEP_REG);
+            }
+            else
+            {
+                genCodeForTree(rv1, RBM_NONE);
+                regSet.rsMarkRegUsed(rv1, addr);
+            }
+            assert(rv1->gtFlags & GTF_REG_VAL);
+        }
+        if (rv2 && ((rv2->gtFlags & GTF_REG_VAL) == 0))
+        {
+            if (rv2->gtFlags & GTF_SPILLED)
+            {
+                genRecoverReg(rv2, ~genRegMask(rv1->gtRegNum), RegSet::KEEP_REG);
+            }
+            else
+            {
+                genCodeForTree(rv2, RBM_NONE);
+                regSet.rsMarkRegUsed(rv2, addr);
+            }
+            assert(rv2->gtFlags & GTF_REG_VAL);
+        }
+        // If we did both, we might have spilled rv1.
+        if (rv1 && ((rv1->gtFlags & GTF_SPILLED) != 0))
+        {
+            regSet.rsLockUsedReg(genRegMask(rv2->gtRegNum));
+            genRecoverReg(rv1, ~genRegMask(rv2->gtRegNum), RegSet::KEEP_REG);
+            regSet.rsUnlockReg(genRegMask(rv2->gtRegNum));
+        }
+
+        *baseReg = rv1 ? rv1->gtRegNum : REG_NA;
+        *indReg  = rv2 ? rv2->gtRegNum : REG_NA;
+    }
+}
+
+#if CPU_LOAD_STORE_ARCH
+/*****************************************************************************
+ *
+ *  Originally this was somewhat specific to the x86 instrution format.
+ *  For a Load/Store arch we generate the 1-8 instructions necessary to
+ *  implement the single addressing mode instruction used on x86.
+ *  We currently don't have an instruction scheduler enabled on any target.
+ *
+ *  [Schedule] an "ins reg, [r/m]" (rdst=true), or "ins [r/m], reg" (rdst=false)
+ *  instruction (the r/m operand given by a tree). We also allow instructions
+ *  of the form "ins [r/m], icon", these are signaled by setting 'cons' to
+ *  true.
+ *
+ *   The longest instruction sequence emitted on the ARM is as follows:
+ *
+ *       - the "addr" represents an array addressing mode,
+ *          with a baseReg, indReg with a shift and a large offset
+ *          (Note that typically array addressing modes do NOT have a large offset)
+ *       - "ins" is an ALU instruction,
+ *       - cons=true, and imm is a large constant that can not be directly encoded with "ins"
+ *       - We may need to grab upto four additional registers: regT, rtegVal, regOffs and regImm
+ *
+ *       add    regT, baseReg, indReg<<shift
+ *       movw   regOffs, offsLo
+ *       movt   regOffs, offsHi
+ *       ldr    regVal, [regT + regOffs]
+ *       movw   regImm, consLo
+ *       movt   regImm, consHi
+ *       "ins"  regVal, regImm
+ *       str    regVal, [regT + regOffs]
+ *
+ */
+
+void CodeGen::sched_AM(instruction ins,
+                       emitAttr    size,
+                       regNumber   ireg,
+                       bool        rdst,
+                       GenTreePtr  addr,
+                       unsigned    offs,
+                       bool        cons,
+                       int         imm,
+                       insFlags    flags)
+{
+    assert(addr);
+    assert(size != EA_UNKNOWN);
+
+    enum INS_TYPE
+    {
+        eIT_Lea,
+        eIT_Load,
+        eIT_Store,
+        eIT_Other
+    };
+    INS_TYPE insType = eIT_Other;
+
+    if (ins == INS_lea)
+    {
+        insType = eIT_Lea;
+        ins     = INS_add;
+    }
+    else if (getEmitter()->emitInsIsLoad(ins))
+    {
+        insType = eIT_Load;
+    }
+    else if (getEmitter()->emitInsIsStore(ins))
+    {
+        insType = eIT_Store;
+    }
+
+    regNumber baseReg  = REG_NA;
+    regNumber indReg   = REG_NA;
+    unsigned  indScale = 0;
+
+    regMaskTP avoidMask = RBM_NONE;
+
+    if (addr->gtFlags & GTF_REG_VAL)
+    {
+        /* The address is "[reg+offs]" */
+        baseReg = addr->gtRegNum;
+    }
+    else if (addr->IsCnsIntOrI())
+    {
+#ifdef RELOC_SUPPORT
+        // Do we need relocations?
+        if (compiler->opts.compReloc && addr->IsIconHandle())
+        {
+            size = EA_SET_FLG(size, EA_DSP_RELOC_FLG);
+            // offs should be smaller than ZapperModule::FixupPlaceHolder
+            // so that we can uniquely identify the handle
+            assert(offs <= 4);
+        }
+#endif
+        ssize_t disp = addr->gtIntCon.gtIconVal + offs;
+        if ((insType == eIT_Store) && (ireg != REG_NA))
+        {
+            // Can't use the ireg as the baseReg when we have a store instruction
+            avoidMask |= genRegMask(ireg);
+        }
+        baseReg = regSet.rsPickFreeReg(RBM_ALLINT & ~avoidMask);
+
+        avoidMask |= genRegMask(baseReg);
+        instGen_Set_Reg_To_Imm(size, baseReg, disp);
+        offs = 0;
+    }
+    else
+    {
+        unsigned cns = 0;
+
+        instGetAddrMode(addr, &baseReg, &indScale, &indReg, &cns);
+
+        /* Add the constant offset value, if present */
+
+        offs += cns;
+
+#if SCALED_ADDR_MODES
+        noway_assert((baseReg != REG_NA) || (indReg != REG_NA));
+        if (baseReg != REG_NA)
+#endif
+        {
+            avoidMask |= genRegMask(baseReg);
+        }
+
+        // I don't think this is necessary even in the non-proto-jit case, but better to be
+        // conservative here.  It is only necessary to avoid using ireg if it is used as regT,
+        // in which case it will be added to avoidMask below.
+
+        if (ireg != REG_NA)
+        {
+            avoidMask |= genRegMask(ireg);
+        }
+
+        if (indReg != REG_NA)
+        {
+            avoidMask |= genRegMask(indReg);
+        }
+    }
+
+    unsigned shift = (indScale > 0) ? genLog2((unsigned)indScale) : 0;
+
+    regNumber regT    = REG_NA; // the register where the address is computed into
+    regNumber regOffs = REG_NA; // a temporary register to use for the offs when it can't be directly encoded
+    regNumber regImm  = REG_NA; // a temporary register to use for the imm when it can't be directly encoded
+    regNumber regVal  = REG_NA; // a temporary register to use when we have to do a load/modify/store operation
+
+    // Setup regT
+    if (indReg == REG_NA)
+    {
+        regT = baseReg; // We can use the baseReg, regT is read-only
+    }
+    else // We have an index register (indReg != REG_NA)
+    {
+        // Check for special case that we can encode using one instruction
+        if ((offs == 0) && (insType != eIT_Other) && !instIsFP(ins) && baseReg != REG_NA)
+        {
+            //  ins    ireg, [baseReg + indReg << shift]
+            getEmitter()->emitIns_R_R_R_I(ins, size, ireg, baseReg, indReg, shift, flags, INS_OPTS_LSL);
+            return;
+        }
+
+        // Otherwise setup regT, regT is written once here
+        //
+        if (insType == eIT_Lea || (insType == eIT_Load && !instIsFP(ins)))
+        {
+            assert(ireg != REG_NA);
+            // ireg will be written, so we can take it as our temporary register
+            regT = ireg;
+        }
+        else
+        {
+            // need a new temporary reg
+            regT = regSet.rsPickFreeReg(RBM_ALLINT & ~avoidMask);
+            regTracker.rsTrackRegTrash(regT);
+        }
+
+#if SCALED_ADDR_MODES
+        if (baseReg == REG_NA)
+        {
+            assert(shift > 0);
+            //  LSL    regT, indReg, shift.
+            getEmitter()->emitIns_R_R_I(INS_lsl, EA_PTRSIZE, regT, indReg, shift & ((TARGET_POINTER_SIZE * 8) - 1));
+        }
+        else
+#endif // SCALED_ADDR_MODES
+        {
+            assert(baseReg != REG_NA);
+
+            //  add    regT, baseReg, indReg<<shift.
+            getEmitter()->emitIns_R_R_R_I(INS_add,
+                                          // The "add" operation will yield either a pointer or byref, depending on the
+                                          // type of "addr."
+                                          varTypeIsGC(addr->TypeGet()) ? EA_BYREF : EA_PTRSIZE, regT, baseReg, indReg,
+                                          shift, INS_FLAGS_NOT_SET, INS_OPTS_LSL);
+        }
+    }
+
+    // regT is the base register for a load/store or an operand for add when insType is eIT_Lea
+    //
+    assert(regT != REG_NA);
+    avoidMask |= genRegMask(regT);
+
+    if (insType != eIT_Other)
+    {
+        assert((flags != INS_FLAGS_SET) || (insType == eIT_Lea));
+        if ((insType == eIT_Lea) && (offs == 0))
+        {
+            // If we have the same register as src and dst and we do not need to set the flags
+            //   then we can skip emitting the instruction
+            if ((ireg != regT) || (flags == INS_FLAGS_SET))
+            {
+                //  mov    ireg, regT
+                getEmitter()->emitIns_R_R(INS_mov, size, ireg, regT, flags);
+            }
+        }
+        else if (arm_Valid_Imm_For_Instr(ins, offs, flags))
+        {
+            //  ins    ireg, [regT + offs]
+            getEmitter()->emitIns_R_R_I(ins, size, ireg, regT, offs, flags);
+        }
+        else
+        {
+            regOffs = regSet.rsPickFreeReg(RBM_ALLINT & ~avoidMask);
+
+            // We cannot use [regT + regOffs] to load/store a floating register
+            if (emitter::isFloatReg(ireg))
+            {
+                if (arm_Valid_Imm_For_Instr(INS_add, offs, flags))
+                {
+                    //  add    regOffs, regT, offs
+                    getEmitter()->emitIns_R_R_I(INS_add, EA_4BYTE, regOffs, regT, offs, flags);
+                }
+                else
+                {
+                    //  movw   regOffs, offs_lo16
+                    //  movt   regOffs, offs_hi16
+                    //  add    regOffs, regOffs, regT
+                    instGen_Set_Reg_To_Imm(EA_4BYTE, regOffs, offs);
+                    getEmitter()->emitIns_R_R_R(INS_add, EA_4BYTE, regOffs, regOffs, regT, flags);
+                }
+                //  ins    ireg, [regOffs]
+                getEmitter()->emitIns_R_R_I(ins, size, ireg, regOffs, 0, flags);
+
+                regTracker.rsTrackRegTrash(regOffs);
+            }
+            else
+            {
+                //  mov    regOffs, offs
+                //  ins    ireg, [regT + regOffs]
+                instGen_Set_Reg_To_Imm(EA_4BYTE, regOffs, offs);
+                getEmitter()->emitIns_R_R_R(ins, size, ireg, regT, regOffs, flags);
+            }
+        }
+    }
+    else // (insType == eIT_Other);
+    {
+        // Setup regVal
+        //
+
+        regVal = regSet.rsPickReg(RBM_ALLINT & ~avoidMask);
+        regTracker.rsTrackRegTrash(regVal);
+        avoidMask |= genRegMask(regVal);
+        var_types load_store_type;
+        switch (size)
+        {
+            case EA_4BYTE:
+                load_store_type = TYP_INT;
+                break;
+
+            case EA_2BYTE:
+                load_store_type = TYP_SHORT;
+                break;
+
+            case EA_1BYTE:
+                load_store_type = TYP_BYTE;
+                break;
+
+            default:
+                assert(!"Unexpected size in sched_AM, eIT_Other");
+                load_store_type = TYP_INT;
+                break;
+        }
+
+        // Load the content at addr into regVal using regT + offs
+        if (arm_Valid_Disp_For_LdSt(offs, load_store_type))
+        {
+            //  ldrX   regVal, [regT + offs]
+            getEmitter()->emitIns_R_R_I(ins_Load(load_store_type), size, regVal, regT, offs);
+        }
+        else
+        {
+            //  mov    regOffs, offs
+            //  ldrX   regVal, [regT + regOffs]
+            regOffs = regSet.rsPickFreeReg(RBM_ALLINT & ~avoidMask);
+            avoidMask |= genRegMask(regOffs);
+            instGen_Set_Reg_To_Imm(EA_4BYTE, regOffs, offs);
+            getEmitter()->emitIns_R_R_R(ins_Load(load_store_type), size, regVal, regT, regOffs);
+        }
+
+        if (cons)
+        {
+            if (arm_Valid_Imm_For_Instr(ins, imm, flags))
+            {
+                getEmitter()->emitIns_R_I(ins, size, regVal, imm, flags);
+            }
+            else
+            {
+                assert(regOffs == REG_NA);
+                regImm = regSet.rsPickFreeReg(RBM_ALLINT & ~avoidMask);
+                avoidMask |= genRegMask(regImm);
+                instGen_Set_Reg_To_Imm(size, regImm, imm);
+                getEmitter()->emitIns_R_R(ins, size, regVal, regImm, flags);
+            }
+        }
+        else if (rdst)
+        {
+            getEmitter()->emitIns_R_R(ins, size, ireg, regVal, flags);
+        }
+        else
+        {
+            getEmitter()->emitIns_R_R(ins, size, regVal, ireg, flags);
+        }
+
+        //  If we do not have a register destination we must perform the write-back store instruction
+        //  (unless we have an instruction like INS_cmp that does not write a destination)
+        //
+        if (!rdst && ins_Writes_Dest(ins))
+        {
+            // Store regVal into [addr]
+            if (regOffs == REG_NA)
+            {
+                //  strX   regVal, [regT + offs]
+                getEmitter()->emitIns_R_R_I(ins_Store(load_store_type), size, regVal, regT, offs);
+            }
+            else
+            {
+                //  strX   regVal, [regT + regOffs]
+                getEmitter()->emitIns_R_R_R(ins_Store(load_store_type), size, regVal, regT, regOffs);
+            }
+        }
+    }
+}
+
+#else // !CPU_LOAD_STORE_ARCH
+
+/*****************************************************************************
+ *
+ *  This is somewhat specific to the x86 instrution format.
+ *  We currently don't have an instruction scheduler enabled on any target.
+ *
+ *  [Schedule] an "ins reg, [r/m]" (rdst=true), or "ins [r/m], reg" (rdst=false)
+ *  instruction (the r/m operand given by a tree). We also allow instructions
+ *  of the form "ins [r/m], icon", these are signalled by setting 'cons' to
+ *  true.
+ */
+
+void CodeGen::sched_AM(instruction ins,
+                       emitAttr    size,
+                       regNumber   ireg,
+                       bool        rdst,
+                       GenTreePtr  addr,
+                       unsigned    offs,
+                       bool        cons,
+                       int         imm,
+                       insFlags    flags)
+{
+#ifdef _TARGET_XARCH_
+    /* Don't use this method for issuing calls. Use instEmit_xxxCall() */
+    assert(ins != INS_call);
+#endif
+
+    assert(addr);
+    assert(size != EA_UNKNOWN);
+
+    regNumber reg;
+
+    /* Has the address been conveniently loaded into a register,
+       or is it an absolute value ? */
+
+    if ((addr->gtFlags & GTF_REG_VAL) || (addr->IsCnsIntOrI()))
+    {
+        if (addr->gtFlags & GTF_REG_VAL)
+        {
+            /* The address is "[reg+offs]" */
+
+            reg = addr->gtRegNum;
+
+            if (cons)
+                getEmitter()->emitIns_I_AR(ins, size, imm, reg, offs);
+            else if (rdst)
+                getEmitter()->emitIns_R_AR(ins, size, ireg, reg, offs);
+            else
+                getEmitter()->emitIns_AR_R(ins, size, ireg, reg, offs);
+        }
+        else
+        {
+            /* The address is an absolute value */
+
+            assert(addr->IsCnsIntOrI());
+
+#ifdef RELOC_SUPPORT
+            // Do we need relocations?
+            if (compiler->opts.compReloc && addr->IsIconHandle())
+            {
+                size = EA_SET_FLG(size, EA_DSP_RELOC_FLG);
+                // offs should be smaller than ZapperModule::FixupPlaceHolder
+                // so that we can uniquely identify the handle
+                assert(offs <= 4);
+            }
+#endif
+            reg          = REG_NA;
+            ssize_t disp = addr->gtIntCon.gtIconVal + offs;
+
+            // Cross our fingers and hope the codegenerator did the right
+            // thing and the constant address can be RIP-relative
+
+            if (cons)
+                getEmitter()->emitIns_I_AI(ins, size, imm, disp);
+            else if (rdst)
+                getEmitter()->emitIns_R_AI(ins, size, ireg, disp);
+            else
+                getEmitter()->emitIns_AI_R(ins, size, ireg, disp);
+        }
+
+        return;
+    }
+
+    /* Figure out what complex address mode to use */
+
+    regNumber baseReg, indReg;
+    unsigned  indScale = 0, cns = 0;
+
+    instGetAddrMode(addr, &baseReg, &indScale, &indReg, &cns);
+
+    /* Add the constant offset value, if present */
+
+    offs += cns;
+
+    /* Is there an index reg operand? */
+
+    if (indReg != REG_NA)
+    {
+        /* Is the index reg operand scaled? */
+
+        if (indScale)
+        {
+            /* Is there a base address operand? */
+
+            if (baseReg != REG_NA)
+            {
+                reg = baseReg;
+
+                /* The address is "[reg + {2/4/8} * indReg + offs]" */
+
+                if (cons)
+                    getEmitter()->emitIns_I_ARX(ins, size, imm, reg, indReg, indScale, offs);
+                else if (rdst)
+                    getEmitter()->emitIns_R_ARX(ins, size, ireg, reg, indReg, indScale, offs);
+                else
+                    getEmitter()->emitIns_ARX_R(ins, size, ireg, reg, indReg, indScale, offs);
+            }
+            else
+            {
+                /* The address is "[{2/4/8} * indReg + offs]" */
+
+                if (cons)
+                    getEmitter()->emitIns_I_AX(ins, size, imm, indReg, indScale, offs);
+                else if (rdst)
+                    getEmitter()->emitIns_R_AX(ins, size, ireg, indReg, indScale, offs);
+                else
+                    getEmitter()->emitIns_AX_R(ins, size, ireg, indReg, indScale, offs);
+            }
+        }
+        else
+        {
+            assert(baseReg != REG_NA);
+            reg = baseReg;
+
+            /* The address is "[reg + indReg + offs]" */
+            if (cons)
+                getEmitter()->emitIns_I_ARR(ins, size, imm, reg, indReg, offs);
+            else if (rdst)
+                getEmitter()->emitIns_R_ARR(ins, size, ireg, reg, indReg, offs);
+            else
+                getEmitter()->emitIns_ARR_R(ins, size, ireg, reg, indReg, offs);
+        }
+    }
+    else
+    {
+        unsigned             cpx = 0;
+        CORINFO_CLASS_HANDLE cls = 0;
+
+        /* No second operand: the address is "[reg  + icon]" */
+
+        assert(baseReg != REG_NA);
+        reg = baseReg;
+
+#ifdef LATE_DISASM
+        /*
+            Keep in mind that non-static data members (GT_FIELD nodes) were
+            transformed into GT_IND nodes - we keep the CLS/CPX information
+            in the GT_CNS_INT node representing the field offset of the
+            class member
+         */
+
+        if (addr->gtOper != GT_LEA && (addr->gtOp.gtOp2->gtOper == GT_CNS_INT) &&
+            addr->gtOp.gtOp2->IsIconHandle(GTF_ICON_FIELD_HDL))
+        {
+            /* This is a field offset - set the CPX/CLS values to emit a fixup */
+
+            cpx = addr->gtOp.gtOp2->gtIntCon.gtIconFld.gtIconCPX;
+            cls = addr->gtOp.gtOp2->gtIntCon.gtIconFld.gtIconCls;
+        }
+#endif
+
+        if (cons)
+        {
+            getEmitter()->emitIns_I_AR(ins, size, imm, reg, offs, cpx, cls);
+        }
+        else if (rdst)
+        {
+            getEmitter()->emitIns_R_AR(ins, size, ireg, reg, offs, cpx, cls);
+        }
+        else
+        {
+            getEmitter()->emitIns_AR_R(ins, size, ireg, reg, offs, cpx, cls);
+        }
+    }
+}
+
+#endif // !CPU_LOAD_STORE_ARCH
+#endif // LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ *  Emit a "call [r/m]" instruction (the r/m operand given by a tree).
+ */
+
+void CodeGen::instEmit_indCall(GenTreePtr call,
+                               size_t     argSize,
+                               emitAttr retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(emitAttr secondRetSize))
+{
+    GenTreePtr addr;
+
+    emitter::EmitCallType emitCallType;
+
+    regNumber brg = REG_NA;
+    regNumber xrg = REG_NA;
+    unsigned  mul = 0;
+    unsigned  cns = 0;
+
+    CORINFO_SIG_INFO* sigInfo = nullptr;
+
+    assert(call->gtOper == GT_CALL);
+
+    /* Get hold of the function address */
+
+    assert(call->gtCall.gtCallType == CT_INDIRECT);
+    addr = call->gtCall.gtCallAddr;
+    assert(addr);
+
+#ifdef DEBUG
+    // Pass the call signature information from the GenTree node so the emitter can associate
+    // native call sites with the signatures they were generated from.
+    sigInfo = call->gtCall.callSig;
+#endif // DEBUG
+
+#if CPU_LOAD_STORE_ARCH
+
+    emitCallType = emitter::EC_INDIR_R;
+
+    if (!addr->OperIsIndir())
+    {
+        if (!(addr->gtFlags & GTF_REG_VAL) && (addr->OperGet() == GT_CNS_INT))
+        {
+            ssize_t funcPtr = addr->gtIntCon.gtIconVal;
+
+            getEmitter()->emitIns_Call(emitter::EC_FUNC_ADDR,
+                                       NULL, // methHnd
+                                       INDEBUG_LDISASM_COMMA(sigInfo)(void*) funcPtr, argSize,
+                                       retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
+                                       gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
+            return;
+        }
+    }
+    else
+    {
+        /* Get hold of the address of the function pointer */
+
+        addr = addr->gtOp.gtOp1;
+    }
+
+    if (addr->gtFlags & GTF_REG_VAL)
+    {
+        /* The address is "reg" */
+
+        brg = addr->gtRegNum;
+    }
+    else
+    {
+        // Force the address into a register
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef LEGACY_BACKEND
+        genCodeForTree(addr, RBM_NONE);
+#endif // LEGACY_BACKEND
+        assert(addr->gtFlags & GTF_REG_VAL);
+        brg = addr->gtRegNum;
+    }
+
+#else // CPU_LOAD_STORE_ARCH
+
+    /* Is there an indirection? */
+
+    if (!addr->OperIsIndir())
+    {
+        if (addr->gtFlags & GTF_REG_VAL)
+        {
+            emitCallType = emitter::EC_INDIR_R;
+            brg          = addr->gtRegNum;
+        }
+        else
+        {
+            if (addr->OperGet() != GT_CNS_INT)
+            {
+                assert(addr->OperGet() == GT_LCL_VAR);
+
+                emitCallType = emitter::EC_INDIR_SR;
+                cns          = addr->gtLclVarCommon.gtLclNum;
+            }
+            else
+            {
+                ssize_t funcPtr = addr->gtIntCon.gtIconVal;
+
+                getEmitter()->emitIns_Call(emitter::EC_FUNC_ADDR,
+                                           nullptr, // methHnd
+                                           INDEBUG_LDISASM_COMMA(sigInfo)(void*) funcPtr, argSize,
+                                           retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
+                                           gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur);
+                return;
+            }
+        }
+    }
+    else
+    {
+        /* This is an indirect call */
+
+        emitCallType = emitter::EC_INDIR_ARD;
+
+        /* Get hold of the address of the function pointer */
+
+        addr = addr->gtOp.gtOp1;
+
+        /* Has the address been conveniently loaded into a register? */
+
+        if (addr->gtFlags & GTF_REG_VAL)
+        {
+            /* The address is "reg" */
+
+            brg = addr->gtRegNum;
+        }
+        else
+        {
+            bool rev = false;
+
+            GenTreePtr rv1 = nullptr;
+            GenTreePtr rv2 = nullptr;
+
+            /* Figure out what complex address mode to use */
+
+            INDEBUG(bool yes =)
+            genCreateAddrMode(addr, -1, true, RBM_NONE, &rev, &rv1, &rv2, &mul, &cns);
+
+            INDEBUG(PREFIX_ASSUME(yes)); // since we have called genMakeAddressable() on call->gtCall.gtCallAddr
+
+            /* Get the additional operands if any */
+
+            if (rv1)
+            {
+                assert(rv1->gtFlags & GTF_REG_VAL);
+                brg = rv1->gtRegNum;
+            }
+
+            if (rv2)
+            {
+                assert(rv2->gtFlags & GTF_REG_VAL);
+                xrg = rv2->gtRegNum;
+            }
+        }
+    }
+
+    assert(emitCallType == emitter::EC_INDIR_R || emitCallType == emitter::EC_INDIR_SR ||
+           emitCallType == emitter::EC_INDIR_C || emitCallType == emitter::EC_INDIR_ARD);
+
+#endif // CPU_LOAD_STORE_ARCH
+
+    getEmitter()->emitIns_Call(emitCallType,
+                               nullptr,                                // methHnd
+                               INDEBUG_LDISASM_COMMA(sigInfo) nullptr, // addr
+                               argSize, retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize),
+                               gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur,
+                               BAD_IL_OFFSET, // ilOffset
+                               brg, xrg, mul,
+                               cns); // addressing mode values
+}
+
+#ifdef LEGACY_BACKEND
+/*****************************************************************************
+ *
+ *  Emit an "op [r/m]" instruction (the r/m operand given by a tree).
+ */
+
+void CodeGen::instEmit_RM(instruction ins, GenTreePtr tree, GenTreePtr addr, unsigned offs)
+{
+    emitAttr size;
+
+    if (!instIsFP(ins))
+        size = emitTypeSize(tree->TypeGet());
+    else
+        size = EA_ATTR(genTypeSize(tree->TypeGet()));
+
+    sched_AM(ins, size, REG_NA, false, addr, offs);
+}
+
+/*****************************************************************************
+ *
+ *  Emit an "op [r/m], reg" instruction (the r/m operand given by a tree).
+ */
+
+void CodeGen::instEmit_RM_RV(instruction ins, emitAttr size, GenTreePtr tree, regNumber reg, unsigned offs)
+{
+#ifdef _TARGET_XARCH_
+    assert(instIsFP(ins) == 0);
+#endif
+    sched_AM(ins, size, reg, false, tree, offs);
+}
+#endif // LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ *  Generate an instruction that has one operand given by a tree (which has
+ *  been made addressable).
+ */
+
+void CodeGen::inst_TT(instruction ins, GenTreePtr tree, unsigned offs, int shfv, emitAttr size)
+{
+    bool sizeInferred = false;
+
+    if (size == EA_UNKNOWN)
+    {
+        sizeInferred = true;
+        if (instIsFP(ins))
+        {
+            size = EA_ATTR(genTypeSize(tree->TypeGet()));
+        }
+        else
+        {
+            size = emitTypeSize(tree->TypeGet());
+        }
+    }
+
+AGAIN:
+
+    /* Is the value sitting in a register? */
+
+    if (tree->gtFlags & GTF_REG_VAL)
+    {
+        regNumber reg;
+
+#ifndef _TARGET_64BIT_
+#ifdef LEGACY_BACKEND
+    LONGREG_TT:
+#endif // LEGACY_BACKEND
+#endif
+
+#if FEATURE_STACK_FP_X87
+
+        /* Is this a floating-point instruction? */
+
+        if (isFloatRegType(tree->gtType))
+        {
+            reg = tree->gtRegNum;
+
+            assert(instIsFP(ins) && ins != INS_fst && ins != INS_fstp);
+            assert(shfv == 0);
+
+            inst_FS(ins, reg + genGetFPstkLevel());
+            return;
+        }
+#endif // FEATURE_STACK_FP_X87
+
+        assert(!instIsFP(ins));
+
+#if CPU_LONG_USES_REGPAIR
+        if (tree->gtType == TYP_LONG)
+        {
+            if (offs)
+            {
+                assert(offs == sizeof(int));
+                reg = genRegPairHi(tree->gtRegPair);
+            }
+            else
+            {
+                reg = genRegPairLo(tree->gtRegPair);
+            }
+        }
+        else
+#endif // CPU_LONG_USES_REGPAIR
+        {
+            reg = tree->gtRegNum;
+        }
+
+        /* Make sure it is not the "stack-half" of an enregistered long */
+
+        if (reg != REG_STK)
+        {
+            // For short types, indicate that the value is promoted to 4 bytes.
+            // For longs, we are only emitting half of it so again set it to 4 bytes.
+            // but leave the GC tracking information alone
+            if (sizeInferred && EA_SIZE(size) < EA_4BYTE)
+            {
+                size = EA_SET_SIZE(size, 4);
+            }
+
+            if (shfv)
+            {
+                getEmitter()->emitIns_R_I(ins, size, reg, shfv);
+            }
+            else
+            {
+                inst_RV(ins, reg, tree->TypeGet(), size);
+            }
+
+            return;
+        }
+    }
+
+    /* Is this a spilled value? */
+
+    if (tree->gtFlags & GTF_SPILLED)
+    {
+        assert(!"ISSUE: If this can happen, we need to generate 'ins [ebp+spill]'");
+    }
+
+    switch (tree->gtOper)
+    {
+        unsigned varNum;
+
+        case GT_LCL_VAR:
+
+#ifdef LEGACY_BACKEND
+            /* Is this an enregistered long ? */
+
+            if (tree->gtType == TYP_LONG && !(tree->gtFlags & GTF_REG_VAL))
+            {
+                /* Avoid infinite loop */
+
+                if (genMarkLclVar(tree))
+                    goto LONGREG_TT;
+            }
+#endif // LEGACY_BACKEND
+
+            inst_set_SV_var(tree);
+            goto LCL;
+
+        case GT_LCL_FLD:
+
+            offs += tree->gtLclFld.gtLclOffs;
+            goto LCL;
+
+        LCL:
+            varNum = tree->gtLclVarCommon.gtLclNum;
+            assert(varNum < compiler->lvaCount);
+
+            if (shfv)
+            {
+                getEmitter()->emitIns_S_I(ins, size, varNum, offs, shfv);
+            }
+            else
+            {
+                getEmitter()->emitIns_S(ins, size, varNum, offs);
+            }
+
+            return;
+
+        case GT_CLS_VAR:
+            // Make sure FP instruction size matches the operand size
+            // (We optimized constant doubles to floats when we can, just want to
+            // make sure that we don't mistakenly use 8 bytes when the
+            // constant.
+            assert(!isFloatRegType(tree->gtType) || genTypeSize(tree->gtType) == EA_SIZE_IN_BYTES(size));
+
+            if (shfv)
+            {
+                getEmitter()->emitIns_C_I(ins, size, tree->gtClsVar.gtClsVarHnd, offs, shfv);
+            }
+            else
+            {
+                getEmitter()->emitIns_C(ins, size, tree->gtClsVar.gtClsVarHnd, offs);
+            }
+            return;
+
+        case GT_IND:
+        case GT_NULLCHECK:
+        case GT_ARR_ELEM:
+        {
+#ifndef LEGACY_BACKEND
+            assert(!"inst_TT not supported for GT_IND, GT_NULLCHECK or GT_ARR_ELEM in !LEGACY_BACKEND");
+#else  // LEGACY_BACKEND
+            GenTreePtr addr = tree->OperIsIndir() ? tree->gtOp.gtOp1 : tree;
+            if (shfv)
+                sched_AM(ins, size, REG_NA, false, addr, offs, true, shfv);
+            else
+                instEmit_RM(ins, tree, addr, offs);
+#endif // LEGACY_BACKEND
+        }
+        break;
+
+#ifdef _TARGET_X86_
+        case GT_CNS_INT:
+            // We will get here for GT_MKREFANY from CodeGen::genPushArgList
+            assert(offs == 0);
+            assert(!shfv);
+            if (tree->IsIconHandle())
+                inst_IV_handle(ins, tree->gtIntCon.gtIconVal);
+            else
+                inst_IV(ins, tree->gtIntCon.gtIconVal);
+            break;
+#endif
+
+        case GT_COMMA:
+            //     tree->gtOp.gtOp1 - already processed by genCreateAddrMode()
+            tree = tree->gtOp.gtOp2;
+            goto AGAIN;
+
+        default:
+            assert(!"invalid address");
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Generate an instruction that has one operand given by a tree (which has
+ *  been made addressable) and another that is a register.
+ */
+
+void CodeGen::inst_TT_RV(instruction ins, GenTreePtr tree, regNumber reg, unsigned offs, emitAttr size, insFlags flags)
+{
+    assert(reg != REG_STK);
+
+AGAIN:
+
+    /* Is the value sitting in a register? */
+
+    if (tree->gtFlags & GTF_REG_VAL)
+    {
+        regNumber rg2;
+
+#ifdef _TARGET_64BIT_
+        assert(!instIsFP(ins));
+
+        rg2 = tree->gtRegNum;
+
+        assert(offs == 0);
+        assert(rg2 != REG_STK);
+
+        if (ins != INS_mov || rg2 != reg)
+        {
+            inst_RV_RV(ins, rg2, reg, tree->TypeGet());
+        }
+        return;
+
+#else // !_TARGET_64BIT_
+
+#ifdef LEGACY_BACKEND
+    LONGREG_TT_RV:
+#endif // LEGACY_BACKEND
+
+#ifdef _TARGET_XARCH_
+        assert(!instIsFP(ins));
+#endif
+
+#if CPU_LONG_USES_REGPAIR
+        if (tree->gtType == TYP_LONG)
+        {
+            if (offs)
+            {
+                assert(offs == sizeof(int));
+                rg2 = genRegPairHi(tree->gtRegPair);
+            }
+            else
+            {
+                rg2 = genRegPairLo(tree->gtRegPair);
+            }
+        }
+        else
+#endif // CPU_LONG_USES_REGPAIR
+        {
+            rg2 = tree->gtRegNum;
+        }
+
+        if (rg2 != REG_STK)
+        {
+            if (ins != INS_mov || rg2 != reg)
+                inst_RV_RV(ins, rg2, reg, tree->TypeGet(), size, flags);
+            return;
+        }
+
+#endif // _TARGET_64BIT_
+    }
+
+    /* Is this a spilled value? */
+
+    if (tree->gtFlags & GTF_SPILLED)
+    {
+        assert(!"ISSUE: If this can happen, we need to generate 'ins [ebp+spill]'");
+    }
+
+    if (size == EA_UNKNOWN)
+    {
+        if (instIsFP(ins))
+        {
+            size = EA_ATTR(genTypeSize(tree->TypeGet()));
+        }
+        else
+        {
+            size = emitTypeSize(tree->TypeGet());
+        }
+    }
+
+    switch (tree->gtOper)
+    {
+        unsigned varNum;
+
+        case GT_LCL_VAR:
+
+#ifdef LEGACY_BACKEND
+            if (tree->gtType == TYP_LONG && !(tree->gtFlags & GTF_REG_VAL))
+            {
+                /* Avoid infinite loop */
+
+                if (genMarkLclVar(tree))
+                    goto LONGREG_TT_RV;
+            }
+#endif // LEGACY_BACKEND
+
+            inst_set_SV_var(tree);
+            goto LCL;
+
+        case GT_LCL_FLD:
+        case GT_STORE_LCL_FLD:
+            offs += tree->gtLclFld.gtLclOffs;
+            goto LCL;
+
+        LCL:
+
+            varNum = tree->gtLclVarCommon.gtLclNum;
+            assert(varNum < compiler->lvaCount);
+
+#if CPU_LOAD_STORE_ARCH
+            if (!getEmitter()->emitInsIsStore(ins))
+            {
+#ifndef LEGACY_BACKEND
+                // TODO-LdStArch-Bug: Should regTmp be a dst on the node or an internal reg?
+                // Either way, it is not currently being handled by Lowering.
+                regNumber regTmp = tree->gtRegNum;
+                assert(regTmp != REG_NA);
+#else  // LEGACY_BACKEND
+                regNumber regTmp      = regSet.rsPickFreeReg(RBM_ALLINT & ~genRegMask(reg));
+#endif // LEGACY_BACKEND
+                getEmitter()->emitIns_R_S(ins_Load(tree->TypeGet()), size, regTmp, varNum, offs);
+                getEmitter()->emitIns_R_R(ins, size, regTmp, reg, flags);
+                getEmitter()->emitIns_S_R(ins_Store(tree->TypeGet()), size, regTmp, varNum, offs);
+
+                regTracker.rsTrackRegTrash(regTmp);
+            }
+            else
+#endif
+            {
+                // ins is a Store instruction
+                //
+                getEmitter()->emitIns_S_R(ins, size, reg, varNum, offs);
+#ifdef _TARGET_ARM_
+                // If we need to set the flags then add an extra movs reg,reg instruction
+                if (flags == INS_FLAGS_SET)
+                    getEmitter()->emitIns_R_R(INS_mov, size, reg, reg, INS_FLAGS_SET);
+#endif
+            }
+            return;
+
+        case GT_CLS_VAR:
+            // Make sure FP instruction size matches the operand size
+            // (We optimized constant doubles to floats when we can, just want to
+            // make sure that we don't mistakenly use 8 bytes when the
+            // constant).
+            assert(!isFloatRegType(tree->gtType) || genTypeSize(tree->gtType) == EA_SIZE_IN_BYTES(size));
+
+#if CPU_LOAD_STORE_ARCH
+            if (!getEmitter()->emitInsIsStore(ins))
+            {
+#ifndef LEGACY_BACKEND
+                NYI("Store of GT_CLS_VAR not supported for ARM RyuJIT Backend");
+#else  // LEGACY_BACKEND
+                regNumber regTmpAddr  = regSet.rsPickFreeReg(RBM_ALLINT & ~genRegMask(reg));
+                regNumber regTmpArith = regSet.rsPickFreeReg(RBM_ALLINT & ~genRegMask(reg) & ~genRegMask(regTmpAddr));
+
+                getEmitter()->emitIns_R_C(INS_lea, EA_PTRSIZE, regTmpAddr, tree->gtClsVar.gtClsVarHnd, offs);
+                getEmitter()->emitIns_R_R(ins_Load(tree->TypeGet()), size, regTmpArith, regTmpAddr);
+                getEmitter()->emitIns_R_R(ins, size, regTmpArith, reg, flags);
+                getEmitter()->emitIns_R_R(ins_Store(tree->TypeGet()), size, regTmpArith, regTmpAddr);
+
+                regTracker.rsTrackRegTrash(regTmpAddr);
+                regTracker.rsTrackRegTrash(regTmpArith);
+#endif // LEGACY_BACKEND
+            }
+            else
+#endif // CPU_LOAD_STORE_ARCH
+            {
+                getEmitter()->emitIns_C_R(ins, size, tree->gtClsVar.gtClsVarHnd, reg, offs);
+            }
+            return;
+
+        case GT_IND:
+        case GT_NULLCHECK:
+        case GT_ARR_ELEM:
+        {
+#ifndef LEGACY_BACKEND
+            assert(!"inst_TT_RV not supported for GT_IND, GT_NULLCHECK or GT_ARR_ELEM in RyuJIT Backend");
+#else  // LEGACY_BACKEND
+            GenTreePtr addr = tree->OperIsIndir() ? tree->gtOp.gtOp1 : tree;
+            sched_AM(ins, size, reg, false, addr, offs, false, 0, flags);
+#endif // LEGACY_BACKEND
+        }
+        break;
+
+        case GT_COMMA:
+            //     tree->gtOp.gtOp1 - already processed by genCreateAddrMode()
+            tree = tree->gtOp.gtOp2;
+            goto AGAIN;
+
+        default:
+            assert(!"invalid address");
+    }
+}
+
+regNumber CodeGen::genGetZeroRegister()
+{
+    regNumber zeroReg = REG_NA;
+
+#if REDUNDANT_LOAD
+
+    // Is the constant already in some register?
+
+    zeroReg = regTracker.rsIconIsInReg(0);
+#endif
+
+#ifdef LEGACY_BACKEND
+    if (zeroReg == REG_NA)
+    {
+        regMaskTP freeMask = regSet.rsRegMaskFree();
+
+        if ((freeMask != 0) && (compiler->compCodeOpt() != Compiler::FAST_CODE))
+        {
+            // For SMALL_CODE and BLENDED_CODE,
+            // we try to generate:
+            //
+            //  xor   reg,  reg
+            //  mov   dest, reg
+            //
+            // When selecting a register to xor we try to avoid REG_TMP_0
+            // when we have another CALLEE_TRASH register available.
+            // This will often let us reuse the zeroed register in
+            // several back-to-back assignments
+            //
+            if ((freeMask & RBM_CALLEE_TRASH) != RBM_TMP_0)
+                freeMask &= ~RBM_TMP_0;
+            zeroReg = regSet.rsGrabReg(freeMask); // PickReg in stress will pick 'random' registers
+                                                  // We want one in the freeMask set, so just use GrabReg
+            genSetRegToIcon(zeroReg, 0, TYP_INT);
+        }
+    }
+#endif // !LEGACY_BACKEND
+
+    return zeroReg;
+}
+
+/*****************************************************************************
+ *
+ *  Generate an instruction that has one operand given by a tree (which has
+ *  been made addressable) and another that is an integer constant.
+ */
+#ifdef LEGACY_BACKEND
+void CodeGen::inst_TT_IV(instruction ins, GenTreePtr tree, ssize_t val, unsigned offs, emitAttr size, insFlags flags)
+{
+    bool sizeInferred = false;
+
+    if (size == EA_UNKNOWN)
+    {
+        sizeInferred = true;
+        if (instIsFP(ins))
+            size = EA_ATTR(genTypeSize(tree->TypeGet()));
+        else
+            size = emitTypeSize(tree->TypeGet());
+    }
+
+AGAIN:
+
+    /* Is the value sitting in a register? */
+
+    if (tree->gtFlags & GTF_REG_VAL)
+    {
+#ifndef _TARGET_64BIT_
+    LONGREG_TT_IV:
+#endif
+        regNumber reg;
+
+        assert(instIsFP(ins) == 0);
+
+#if CPU_LONG_USES_REGPAIR
+        if (tree->gtType == TYP_LONG)
+        {
+            if (offs == 0)
+            {
+                reg = genRegPairLo(tree->gtRegPair);
+            }
+            else // offs == 4
+            {
+                assert(offs == sizeof(int));
+                reg = genRegPairHi(tree->gtRegPair);
+            }
+#if CPU_LOAD_STORE_ARCH
+            if (reg == REG_STK && !getEmitter()->emitInsIsLoadOrStore(ins))
+            {
+                reg = regSet.rsPickFreeReg();
+                inst_RV_TT(INS_mov, reg, tree, offs, EA_4BYTE, flags);
+                regTracker.rsTrackRegTrash(reg);
+            }
+#endif
+        }
+        else
+#endif // CPU_LONG_USES_REGPAIR
+        {
+            reg = tree->gtRegNum;
+        }
+
+        if (reg != REG_STK)
+        {
+            // We always widen as part of enregistering,
+            // so a smaller tree in a register can be
+            // treated as 4 bytes
+            if (sizeInferred && (size < EA_4BYTE))
+            {
+                size = EA_SET_SIZE(size, EA_4BYTE);
+            }
+
+            if ((ins == INS_mov) && !EA_IS_CNS_RELOC(size))
+            {
+                genSetRegToIcon(reg, val, tree->TypeGet(), flags);
+            }
+            else
+            {
+#if defined(_TARGET_XARCH_)
+                inst_RV_IV(ins, reg, val, size);
+#elif defined(_TARGET_ARM_)
+                if (!EA_IS_CNS_RELOC(size) && arm_Valid_Imm_For_Instr(ins, val, flags))
+                {
+                    getEmitter()->emitIns_R_I(ins, size, reg, val, flags);
+                }
+                else // We need a scratch register
+                {
+                    // Load imm into a register
+                    regMaskTP usedMask;
+                    if (tree->gtType == TYP_LONG)
+                    {
+                        usedMask = genRegPairMask(tree->gtRegPair);
+#if CPU_LOAD_STORE_ARCH
+                        // In gtRegPair, this part of the long may have been on the stack
+                        // in which case, the code above would have loaded it into 'reg'
+                        // and so we need to also include 'reg' in the set of registers
+                        // that are already in use.
+                        usedMask |= genRegMask(reg);
+#endif // CPU_LOAD_STORE_ARCH
+                    }
+                    else
+                    {
+                        usedMask = genRegMask(tree->gtRegNum);
+                    }
+                    regNumber immReg = regSet.rsGrabReg(RBM_ALLINT & ~usedMask);
+                    noway_assert(reg != immReg);
+                    instGen_Set_Reg_To_Imm(size, immReg, val);
+                    if (getEmitter()->emitInsIsStore(ins))
+                        ins = INS_mov;
+                    getEmitter()->emitIns_R_R(ins, size, reg, immReg, flags);
+                }
+#else
+                NYI("inst_TT_IV - unknown target");
+#endif
+            }
+            return;
+        }
+    }
+
+#ifdef _TARGET_XARCH_
+    /* Are we storing a zero? */
+
+    if ((ins == INS_mov) && (val == 0) &&
+        ((genTypeSize(tree->gtType) == sizeof(int)) || (genTypeSize(tree->gtType) == REGSIZE_BYTES)))
+    {
+        regNumber zeroReg;
+
+        zeroReg = genGetZeroRegister();
+
+        if (zeroReg != REG_NA)
+        {
+            inst_TT_RV(INS_mov, tree, zeroReg, offs);
+            return;
+        }
+    }
+#endif
+
+#if CPU_LOAD_STORE_ARCH
+    /* Are we storing/comparing with a constant? */
+
+    if (getEmitter()->emitInsIsStore(ins) || getEmitter()->emitInsIsCompare(ins))
+    {
+        // Load val into a register
+
+        regNumber valReg;
+        valReg = regSet.rsGrabReg(RBM_ALLINT);
+        instGen_Set_Reg_To_Imm(EA_PTRSIZE, valReg, val);
+        inst_TT_RV(ins, tree, valReg, offs, size, flags);
+        return;
+    }
+    else if (ins == INS_mov)
+    {
+        assert(!"Please call ins_Store(type) to get the store instruction");
+    }
+    assert(!getEmitter()->emitInsIsLoad(ins));
+#endif // CPU_LOAD_STORE_ARCH
+
+    /* Is this a spilled value? */
+
+    if (tree->gtFlags & GTF_SPILLED)
+    {
+        assert(!"ISSUE: If this can happen, we need to generate 'ins [ebp+spill], icon'");
+    }
+
+#ifdef _TARGET_AMD64_
+    if ((EA_SIZE(size) == EA_8BYTE) && (((int)val != (ssize_t)val) || EA_IS_CNS_RELOC(size)))
+    {
+        // Load imm into a register
+        regNumber immReg = regSet.rsGrabReg(RBM_ALLINT);
+        instGen_Set_Reg_To_Imm(size, immReg, val);
+        inst_TT_RV(ins, tree, immReg, offs);
+        return;
+    }
+#endif // _TARGET_AMD64_
+
+    int ival = (int)val;
+
+    switch (tree->gtOper)
+    {
+        unsigned   varNum;
+        LclVarDsc* varDsc;
+
+        case GT_LCL_FLD:
+
+            varNum = tree->gtLclVarCommon.gtLclNum;
+            assert(varNum < compiler->lvaCount);
+            offs += tree->gtLclFld.gtLclOffs;
+
+            goto LCL;
+
+        case GT_LCL_VAR:
+
+#ifndef _TARGET_64BIT_
+            /* Is this an enregistered long ? */
+
+            if (tree->gtType == TYP_LONG && !(tree->gtFlags & GTF_REG_VAL))
+            {
+                /* Avoid infinite loop */
+
+                if (genMarkLclVar(tree))
+                    goto LONGREG_TT_IV;
+            }
+#endif // !_TARGET_64BIT_
+
+            inst_set_SV_var(tree);
+
+            varNum = tree->gtLclVarCommon.gtLclNum;
+            assert(varNum < compiler->lvaCount);
+            varDsc = &compiler->lvaTable[varNum];
+
+            // Fix the immediate by sign extending if needed
+            if (size < EA_4BYTE && !varTypeIsUnsigned(varDsc->TypeGet()))
+            {
+                if (size == EA_1BYTE)
+                {
+                    if ((ival & 0x7f) != ival)
+                        ival = ival | 0xffffff00;
+                }
+                else
+                {
+                    assert(size == EA_2BYTE);
+                    if ((ival & 0x7fff) != ival)
+                        ival = ival | 0xffff0000;
+                }
+            }
+
+            // A local stack slot is at least 4 bytes in size, regardles of
+            // what the local var is typed as, so auto-promote it here
+            // unless the codegenerator told us a size, or it is a field
+            // of a promoted struct
+            if (sizeInferred && (size < EA_4BYTE) && !varDsc->lvIsStructField)
+            {
+                size = EA_SET_SIZE(size, EA_4BYTE);
+            }
+
+        LCL:
+
+            /* Integer instructions never operate on more than EA_PTRSIZE */
+
+            assert(instIsFP(ins) == false);
+
+#if CPU_LOAD_STORE_ARCH
+            if (!getEmitter()->emitInsIsStore(ins))
+            {
+                regNumber regTmp = regSet.rsPickFreeReg(RBM_ALLINT);
+                getEmitter()->emitIns_R_S(ins_Load(tree->TypeGet()), size, regTmp, varNum, offs);
+                regTracker.rsTrackRegTrash(regTmp);
+
+                if (arm_Valid_Imm_For_Instr(ins, val, flags))
+                {
+                    getEmitter()->emitIns_R_I(ins, size, regTmp, ival, flags);
+                }
+                else // We need a scratch register
+                {
+                    // Load imm into a register
+                    regNumber regImm = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(regTmp));
+
+                    instGen_Set_Reg_To_Imm(size, regImm, val);
+                    getEmitter()->emitIns_R_R(ins, size, regTmp, regImm, flags);
+                }
+                getEmitter()->emitIns_S_R(ins_Store(tree->TypeGet()), size, regTmp, varNum, offs);
+            }
+            else
+#endif
+            {
+                getEmitter()->emitIns_S_I(ins, size, varNum, offs, ival);
+            }
+            return;
+
+        case GT_CLS_VAR:
+            // Make sure FP instruction size matches the operand size
+            // (We optimize constant doubles to floats when we can)
+            // We just want to make sure that we don't mistakenly
+            // use 8 bytes when the constant is smaller.
+            //
+            assert(!isFloatRegType(tree->gtType) || genTypeSize(tree->gtType) == EA_SIZE_IN_BYTES(size));
+
+#if CPU_LOAD_STORE_ARCH
+            regNumber regTmpAddr;
+            regTmpAddr = regSet.rsPickFreeReg(RBM_ALLINT);
+
+            getEmitter()->emitIns_R_C(INS_lea, EA_PTRSIZE, regTmpAddr, tree->gtClsVar.gtClsVarHnd, offs);
+            regTracker.rsTrackRegTrash(regTmpAddr);
+
+            if (!getEmitter()->emitInsIsStore(ins))
+            {
+                regNumber regTmpArith = regSet.rsPickFreeReg(RBM_ALLINT & ~genRegMask(regTmpAddr));
+
+                getEmitter()->emitIns_R_R(ins_Load(tree->TypeGet()), size, regTmpArith, regTmpAddr);
+
+                if (arm_Valid_Imm_For_Instr(ins, ival, flags))
+                {
+                    getEmitter()->emitIns_R_R_I(ins, size, regTmpArith, regTmpArith, ival, flags);
+                }
+                else
+                {
+                    regNumber regTmpImm =
+                        regSet.rsPickFreeReg(RBM_ALLINT & ~genRegMask(regTmpAddr) & ~genRegMask(regTmpArith));
+                    instGen_Set_Reg_To_Imm(EA_4BYTE, regTmpImm, (ssize_t)ival);
+                    getEmitter()->emitIns_R_R(ins, size, regTmpArith, regTmpImm, flags);
+                }
+                regTracker.rsTrackRegTrash(regTmpArith);
+
+                getEmitter()->emitIns_R_R(ins_Store(tree->TypeGet()), size, regTmpArith, regTmpAddr);
+            }
+            else
+            {
+                regNumber regTmpImm = regSet.rsPickFreeReg(RBM_ALLINT & ~genRegMask(regTmpAddr));
+
+                instGen_Set_Reg_To_Imm(EA_4BYTE, regTmpImm, (ssize_t)ival, flags);
+                getEmitter()->emitIns_R_R(ins_Store(tree->TypeGet()), size, regTmpImm, regTmpAddr);
+            }
+#else // !CPU_LOAD_STORE_ARCH
+            getEmitter()->emitIns_C_I(ins, size, tree->gtClsVar.gtClsVarHnd, offs, ival);
+#endif
+            return;
+
+        case GT_IND:
+        case GT_NULLCHECK:
+        case GT_ARR_ELEM:
+        {
+            GenTreePtr addr = tree->OperIsIndir() ? tree->gtOp.gtOp1 : tree;
+            sched_AM(ins, size, REG_NA, false, addr, offs, true, ival, flags);
+        }
+            return;
+
+        case GT_COMMA:
+            //     tree->gtOp.gtOp1 - already processed by genCreateAddrMode()
+            tree = tree->gtOp.gtOp2;
+            goto AGAIN;
+
+        default:
+            assert(!"invalid address");
+    }
+}
+#endif // LEGACY_BACKEND
+
+#ifdef LEGACY_BACKEND
+/*****************************************************************************
+ *
+ *  Generate an instruction that has one operand given by a register and the
+ *  other one by an indirection tree (which has been made addressable).
+ */
+
+void CodeGen::inst_RV_AT(
+    instruction ins, emitAttr size, var_types type, regNumber reg, GenTreePtr tree, unsigned offs, insFlags flags)
+{
+#ifdef _TARGET_XARCH_
+#ifdef DEBUG
+    // If it is a GC type and the result is not, then either
+    // 1) it is an LEA
+    // 2) optOptimizeBools() optimized if (ref != 0 && ref != 0) to if (ref & ref)
+    // 3) optOptimizeBools() optimized if (ref == 0 || ref == 0) to if (ref | ref)
+    // 4) byref - byref = int
+    if (type == TYP_REF && !EA_IS_GCREF(size))
+        assert((EA_IS_BYREF(size) && ins == INS_add) || (ins == INS_lea || ins == INS_and || ins == INS_or));
+    if (type == TYP_BYREF && !EA_IS_BYREF(size))
+        assert(ins == INS_lea || ins == INS_and || ins == INS_or || ins == INS_sub);
+    assert(!instIsFP(ins));
+#endif
+#endif
+
+    // Integer instructions never operate on more than EA_PTRSIZE.
+    if (EA_SIZE(size) > EA_PTRSIZE && !instIsFP(ins))
+        EA_SET_SIZE(size, EA_PTRSIZE);
+
+    GenTreePtr addr = tree;
+    sched_AM(ins, size, reg, true, addr, offs, false, 0, flags);
+}
+
+/*****************************************************************************
+ *
+ *  Generate an instruction that has one operand given by an indirection tree
+ *  (which has been made addressable) and an integer constant.
+ */
+
+void CodeGen::inst_AT_IV(instruction ins, emitAttr size, GenTreePtr baseTree, int icon, unsigned offs)
+{
+    sched_AM(ins, size, REG_NA, false, baseTree, offs, true, icon);
+}
+#endif // LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ *  Generate an instruction that has one operand given by a register and the
+ *  other one by a tree (which has been made addressable).
+ */
+
+void CodeGen::inst_RV_TT(instruction ins,
+                         regNumber   reg,
+                         GenTreePtr  tree,
+                         unsigned    offs,
+                         emitAttr    size,
+                         insFlags    flags /* = INS_FLAGS_DONT_CARE */)
+{
+    assert(reg != REG_STK);
+
+    if (size == EA_UNKNOWN)
+    {
+        if (!instIsFP(ins))
+        {
+            size = emitTypeSize(tree->TypeGet());
+        }
+        else
+        {
+            size = EA_ATTR(genTypeSize(tree->TypeGet()));
+        }
+    }
+
+#ifdef _TARGET_XARCH_
+#ifdef DEBUG
+    // If it is a GC type and the result is not, then either
+    // 1) it is an LEA
+    // 2) optOptimizeBools() optimized if (ref != 0 && ref != 0) to if (ref & ref)
+    // 3) optOptimizeBools() optimized if (ref == 0 || ref == 0) to if (ref | ref)
+    // 4) byref - byref = int
+    if (tree->gtType == TYP_REF && !EA_IS_GCREF(size))
+    {
+        assert((EA_IS_BYREF(size) && ins == INS_add) || (ins == INS_lea || ins == INS_and || ins == INS_or));
+    }
+    if (tree->gtType == TYP_BYREF && !EA_IS_BYREF(size))
+    {
+        assert(ins == INS_lea || ins == INS_and || ins == INS_or || ins == INS_sub);
+    }
+#endif
+#endif
+
+#if CPU_LOAD_STORE_ARCH
+    if (ins == INS_mov)
+    {
+#if defined(_TARGET_ARM_)
+        if (tree->TypeGet() != TYP_LONG)
+        {
+            ins = ins_Move_Extend(tree->TypeGet(), (tree->gtFlags & GTF_REG_VAL) != 0);
+        }
+        else if (offs == 0)
+        {
+            ins = ins_Move_Extend(TYP_INT,
+                                  (tree->gtFlags & GTF_REG_VAL) != 0 && genRegPairLo(tree->gtRegPair) != REG_STK);
+        }
+        else
+        {
+            ins = ins_Move_Extend(TYP_INT,
+                                  (tree->gtFlags & GTF_REG_VAL) != 0 && genRegPairHi(tree->gtRegPair) != REG_STK);
+        }
+#elif defined(_TARGET_ARM64_)
+        ins = ins_Move_Extend(tree->TypeGet(), (tree->gtFlags & GTF_REG_VAL) != 0);
+#else
+        NYI("CodeGen::inst_RV_TT with INS_mov");
+#endif
+    }
+#endif // CPU_LOAD_STORE_ARCH
+
+AGAIN:
+
+    /* Is the value sitting in a register? */
+
+    if (tree->gtFlags & GTF_REG_VAL)
+    {
+#ifdef _TARGET_64BIT_
+        assert(instIsFP(ins) == 0);
+
+        regNumber rg2 = tree->gtRegNum;
+
+        assert(offs == 0);
+        assert(rg2 != REG_STK);
+
+        if ((ins != INS_mov) || (rg2 != reg))
+        {
+            inst_RV_RV(ins, reg, rg2, tree->TypeGet(), size);
+        }
+        return;
+
+#else // !_TARGET_64BIT_
+
+#ifdef LEGACY_BACKEND
+    LONGREG_RVTT:
+#endif // LEGACY_BACKEND
+
+#ifdef _TARGET_XARCH_
+        assert(instIsFP(ins) == 0);
+#endif
+
+        regNumber rg2;
+
+#if CPU_LONG_USES_REGPAIR
+        if (tree->gtType == TYP_LONG)
+        {
+            if (offs)
+            {
+                assert(offs == sizeof(int));
+
+                rg2 = genRegPairHi(tree->gtRegPair);
+            }
+            else
+            {
+                rg2 = genRegPairLo(tree->gtRegPair);
+            }
+        }
+        else
+#endif // LEGACY_BACKEND
+        {
+            rg2 = tree->gtRegNum;
+        }
+
+        if (rg2 != REG_STK)
+        {
+#ifdef _TARGET_ARM_
+            if (getEmitter()->emitInsIsLoad(ins) || (ins == INS_lea))
+            {
+                ins = ins_Copy(tree->TypeGet());
+            }
+#endif
+
+            bool isMoveIns = (ins == INS_mov);
+#ifdef _TARGET_ARM_
+            if (ins == INS_vmov)
+                isMoveIns = true;
+#endif
+            if (!isMoveIns || (rg2 != reg))
+            {
+                inst_RV_RV(ins, reg, rg2, tree->TypeGet(), size, flags);
+            }
+            return;
+        }
+
+#endif // _TARGET_64BIT_
+    }
+
+    /* Is this a spilled value? */
+
+    if (tree->gtFlags & GTF_SPILLED)
+    {
+        assert(!"ISSUE: If this can happen, we need to generate 'ins [ebp+spill]'");
+    }
+
+    switch (tree->gtOper)
+    {
+        unsigned varNum;
+
+        case GT_LCL_VAR:
+        case GT_LCL_VAR_ADDR:
+
+#ifdef LEGACY_BACKEND
+            /* Is this an enregistered long ? */
+
+            if (tree->gtType == TYP_LONG && !(tree->gtFlags & GTF_REG_VAL))
+            {
+
+                /* Avoid infinite loop */
+
+                if (genMarkLclVar(tree))
+                    goto LONGREG_RVTT;
+            }
+#endif // LEGACY_BACKEND
+
+            inst_set_SV_var(tree);
+            goto LCL;
+
+        case GT_LCL_FLD_ADDR:
+        case GT_LCL_FLD:
+            offs += tree->gtLclFld.gtLclOffs;
+            goto LCL;
+
+        LCL:
+            varNum = tree->gtLclVarCommon.gtLclNum;
+            assert(varNum < compiler->lvaCount);
+
+#ifdef _TARGET_ARM_
+            switch (ins)
+            {
+                case INS_mov:
+                    ins = ins_Load(tree->TypeGet());
+                    __fallthrough;
+
+                case INS_lea:
+                case INS_ldr:
+                case INS_ldrh:
+                case INS_ldrb:
+                case INS_ldrsh:
+                case INS_ldrsb:
+                case INS_vldr:
+                    assert(flags != INS_FLAGS_SET);
+                    getEmitter()->emitIns_R_S(ins, size, reg, varNum, offs);
+                    return;
+
+                default:
+                    regNumber regTmp;
+#ifndef LEGACY_BACKEND
+                    if (tree->TypeGet() == TYP_LONG)
+                        regTmp = (offs == 0) ? genRegPairLo(tree->gtRegPair) : genRegPairHi(tree->gtRegPair);
+                    else
+                        regTmp = tree->gtRegNum;
+#else  // LEGACY_BACKEND
+                    if (varTypeIsFloating(tree))
+                    {
+                        regTmp = regSet.PickRegFloat(tree->TypeGet());
+                    }
+                    else
+                    {
+                        regTmp = regSet.rsPickReg(RBM_ALLINT & ~genRegMask(reg));
+                    }
+#endif // LEGACY_BACKEND
+
+                    getEmitter()->emitIns_R_S(ins_Load(tree->TypeGet()), size, regTmp, varNum, offs);
+                    getEmitter()->emitIns_R_R(ins, size, reg, regTmp, flags);
+
+                    regTracker.rsTrackRegTrash(regTmp);
+                    return;
+            }
+#else  // !_TARGET_ARM_
+            getEmitter()->emitIns_R_S(ins, size, reg, varNum, offs);
+            return;
+#endif // !_TARGET_ARM_
+
+        case GT_CLS_VAR:
+            // Make sure FP instruction size matches the operand size
+            // (We optimized constant doubles to floats when we can, just want to
+            // make sure that we don't mistakenly use 8 bytes when the
+            // constant.
+            assert(!isFloatRegType(tree->gtType) || genTypeSize(tree->gtType) == EA_SIZE_IN_BYTES(size));
+
+#if CPU_LOAD_STORE_ARCH
+#ifndef LEGACY_BACKEND
+            assert(!"GT_CLS_VAR not supported in ARM RyuJIT backend");
+#else  // LEGACY_BACKEND
+            switch (ins)
+            {
+                case INS_mov:
+                    ins = ins_Load(tree->TypeGet());
+
+                    __fallthrough;
+
+                case INS_lea:
+                case INS_ldr:
+                case INS_ldrh:
+                case INS_ldrb:
+                case INS_ldrsh:
+                case INS_ldrsb:
+                case INS_vldr:
+                    assert(flags != INS_FLAGS_SET);
+                    getEmitter()->emitIns_R_C(ins, size, reg, tree->gtClsVar.gtClsVarHnd, offs);
+                    return;
+
+                default:
+                    regNumber regTmp = regSet.rsPickFreeReg(RBM_ALLINT & ~genRegMask(reg));
+                    getEmitter()->emitIns_R_C(ins_Load(tree->TypeGet()), size, regTmp, tree->gtClsVar.gtClsVarHnd,
+                                              offs);
+                    getEmitter()->emitIns_R_R(ins, size, reg, regTmp, flags);
+                    regTracker.rsTrackRegTrash(regTmp);
+                    return;
+            }
+#endif // LEGACY_BACKEND
+#else  // CPU_LOAD_STORE_ARCH
+            getEmitter()->emitIns_R_C(ins, size, reg, tree->gtClsVar.gtClsVarHnd, offs);
+#endif // CPU_LOAD_STORE_ARCH
+            return;
+
+        case GT_IND:
+        case GT_NULLCHECK:
+        case GT_ARR_ELEM:
+        case GT_LEA:
+        {
+#ifndef LEGACY_BACKEND
+            assert(!"inst_RV_TT not supported for GT_IND, GT_NULLCHECK, GT_ARR_ELEM or GT_LEA in !LEGACY_BACKEND");
+#else  // LEGACY_BACKEND
+            GenTreePtr addr = tree->OperIsIndir() ? tree->gtOp.gtOp1 : tree;
+            inst_RV_AT(ins, size, tree->TypeGet(), reg, addr, offs, flags);
+#endif // LEGACY_BACKEND
+        }
+        break;
+
+        case GT_CNS_INT:
+
+            assert(offs == 0);
+
+            inst_RV_IV(ins, reg, tree->gtIntCon.gtIconVal, emitActualTypeSize(tree->TypeGet()), flags);
+            break;
+
+        case GT_CNS_LNG:
+
+            assert(size == EA_4BYTE || size == EA_8BYTE);
+
+#ifdef _TARGET_AMD64_
+            assert(offs == 0);
+#endif // _TARGET_AMD64_
+
+            ssize_t  constVal;
+            emitAttr size;
+            if (offs == 0)
+            {
+                constVal = (ssize_t)(tree->gtLngCon.gtLconVal);
+                size     = EA_PTRSIZE;
+            }
+            else
+            {
+                constVal = (ssize_t)(tree->gtLngCon.gtLconVal >> 32);
+                size     = EA_4BYTE;
+            }
+#ifndef LEGACY_BACKEND
+#ifdef _TARGET_ARM_
+            if ((ins != INS_mov) && !arm_Valid_Imm_For_Instr(ins, constVal, flags))
+            {
+                regNumber constReg = (offs == 0) ? genRegPairLo(tree->gtRegPair) : genRegPairHi(tree->gtRegPair);
+                instGen_Set_Reg_To_Imm(size, constReg, constVal);
+                getEmitter()->emitIns_R_R(ins, size, reg, constReg, flags);
+                break;
+            }
+#endif // _TARGET_ARM_
+#endif // !LEGACY_BACKEND
+
+            inst_RV_IV(ins, reg, constVal, size, flags);
+            break;
+
+        case GT_COMMA:
+            tree = tree->gtOp.gtOp2;
+            goto AGAIN;
+
+        default:
+            assert(!"invalid address");
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Generate the 3-operand imul instruction "imul reg, [tree], icon"
+ *  which is reg=[tree]*icon
+ */
+#ifdef LEGACY_BACKEND
+void CodeGen::inst_RV_TT_IV(instruction ins, regNumber reg, GenTreePtr tree, int val)
+{
+    assert(tree->gtType <= TYP_I_IMPL);
+
+#ifdef _TARGET_XARCH_
+    /* Only 'imul' uses this instruction format. Since we don't represent
+       three operands for an instruction, we encode the target register as
+       an implicit operand */
+
+    assert(ins == INS_imul);
+    ins = getEmitter()->inst3opImulForReg(reg);
+
+    genUpdateLife(tree);
+    inst_TT_IV(ins, tree, val);
+#else
+    NYI("inst_RV_TT_IV - unknown target");
+#endif
+}
+#endif // LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ *  Generate a "shift reg, icon" instruction.
+ */
+
+void CodeGen::inst_RV_SH(
+    instruction ins, emitAttr size, regNumber reg, unsigned val, insFlags flags /* = INS_FLAGS_DONT_CARE */)
+{
+#if defined(_TARGET_ARM_)
+
+    if (val >= 32)
+        val &= 0x1f;
+
+    getEmitter()->emitIns_R_I(ins, size, reg, val, flags);
+
+#elif defined(_TARGET_XARCH_)
+
+#ifdef _TARGET_AMD64_
+    // X64 JB BE insures only encodable values make it here.
+    // x86 can encode 8 bits, though it masks down to 5 or 6
+    // depending on 32-bit or 64-bit registers are used.
+    // Here we will allow anything that is encodable.
+    assert(val < 256);
+#endif
+
+    ins = genMapShiftInsToShiftByConstantIns(ins, val);
+
+    if (val == 1)
+    {
+        getEmitter()->emitIns_R(ins, size, reg);
+    }
+    else
+    {
+        getEmitter()->emitIns_R_I(ins, size, reg, val);
+    }
+
+#else
+    NYI("inst_RV_SH - unknown target");
+#endif // _TARGET_*
+}
+
+/*****************************************************************************
+ *
+ *  Generate a "shift [r/m], icon" instruction.
+ */
+
+void CodeGen::inst_TT_SH(instruction ins, GenTreePtr tree, unsigned val, unsigned offs)
+{
+#ifdef _TARGET_XARCH_
+    if (val == 0)
+    {
+        // Shift by 0 - why are you wasting our precious time????
+        return;
+    }
+
+    ins = genMapShiftInsToShiftByConstantIns(ins, val);
+    if (val == 1)
+    {
+        inst_TT(ins, tree, offs, 0, emitTypeSize(tree->TypeGet()));
+    }
+    else
+    {
+        inst_TT(ins, tree, offs, val, emitTypeSize(tree->TypeGet()));
+    }
+#endif // _TARGET_XARCH_
+
+#ifdef _TARGET_ARM_
+    inst_TT(ins, tree, offs, val, emitTypeSize(tree->TypeGet()));
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Generate a "shift [addr], cl" instruction.
+ */
+
+void CodeGen::inst_TT_CL(instruction ins, GenTreePtr tree, unsigned offs)
+{
+    inst_TT(ins, tree, offs, 0, emitTypeSize(tree->TypeGet()));
+}
+
+/*****************************************************************************
+ *
+ *  Generate an instruction of the form "op reg1, reg2, icon".
+ */
+
+#if defined(_TARGET_XARCH_)
+void CodeGen::inst_RV_RV_IV(instruction ins, emitAttr size, regNumber reg1, regNumber reg2, unsigned ival)
+{
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+    assert(ins == INS_shld || ins == INS_shrd || ins == INS_shufps || ins == INS_shufpd || ins == INS_pshufd ||
+           ins == INS_cmpps || ins == INS_cmppd || ins == INS_dppd || ins == INS_dpps || ins == INS_insertps);
+#else  // !_TARGET_XARCH_
+    assert(ins == INS_shld || ins == INS_shrd);
+#endif // !_TARGET_XARCH_
+
+    getEmitter()->emitIns_R_R_I(ins, size, reg1, reg2, ival);
+}
+#endif
+
+/*****************************************************************************
+ *
+ *  Generate an instruction with two registers, the second one being a byte
+ *  or word register (i.e. this is something like "movzx eax, cl").
+ */
+
+void CodeGen::inst_RV_RR(instruction ins, emitAttr size, regNumber reg1, regNumber reg2)
+{
+    assert(size == EA_1BYTE || size == EA_2BYTE);
+#ifdef _TARGET_XARCH_
+    assert(ins == INS_movsx || ins == INS_movzx);
+    assert(size != EA_1BYTE || (genRegMask(reg2) & RBM_BYTE_REGS));
+#endif
+
+    getEmitter()->emitIns_R_R(ins, size, reg1, reg2);
+}
+
+/*****************************************************************************
+ *
+ *  The following should all end up inline in compiler.hpp at some point.
+ */
+
+void CodeGen::inst_ST_RV(instruction ins, TempDsc* tmp, unsigned ofs, regNumber reg, var_types type)
+{
+    getEmitter()->emitIns_S_R(ins, emitActualTypeSize(type), reg, tmp->tdTempNum(), ofs);
+}
+
+void CodeGen::inst_ST_IV(instruction ins, TempDsc* tmp, unsigned ofs, int val, var_types type)
+{
+    getEmitter()->emitIns_S_I(ins, emitActualTypeSize(type), tmp->tdTempNum(), ofs, val);
+}
+
+#if FEATURE_FIXED_OUT_ARGS
+/*****************************************************************************
+ *
+ *  Generate an instruction that references the outgoing argument space
+ *  like "str r3, [sp+0x04]"
+ */
+
+void CodeGen::inst_SA_RV(instruction ins, unsigned ofs, regNumber reg, var_types type)
+{
+    assert(ofs < compiler->lvaOutgoingArgSpaceSize);
+
+    getEmitter()->emitIns_S_R(ins, emitActualTypeSize(type), reg, compiler->lvaOutgoingArgSpaceVar, ofs);
+}
+
+void CodeGen::inst_SA_IV(instruction ins, unsigned ofs, int val, var_types type)
+{
+    assert(ofs < compiler->lvaOutgoingArgSpaceSize);
+
+    getEmitter()->emitIns_S_I(ins, emitActualTypeSize(type), compiler->lvaOutgoingArgSpaceVar, ofs, val);
+}
+#endif // FEATURE_FIXED_OUT_ARGS
+
+/*****************************************************************************
+ *
+ *  Generate an instruction with one register and one operand that is byte
+ *  or short (e.g. something like "movzx eax, byte ptr [edx]").
+ */
+
+void CodeGen::inst_RV_ST(instruction ins, emitAttr size, regNumber reg, GenTreePtr tree)
+{
+    assert(size == EA_1BYTE || size == EA_2BYTE);
+
+    /* "movsx erx, rl" must be handled as a special case */
+
+    if (tree->gtFlags & GTF_REG_VAL)
+    {
+        inst_RV_RR(ins, size, reg, tree->gtRegNum);
+    }
+    else
+    {
+        inst_RV_TT(ins, reg, tree, 0, size);
+    }
+}
+
+void CodeGen::inst_RV_ST(instruction ins, regNumber reg, TempDsc* tmp, unsigned ofs, var_types type, emitAttr size)
+{
+    if (size == EA_UNKNOWN)
+    {
+        size = emitActualTypeSize(type);
+    }
+
+#ifdef _TARGET_ARM_
+    switch (ins)
+    {
+        case INS_mov:
+            assert(!"Please call ins_Load(type) to get the load instruction");
+            break;
+
+        case INS_add:
+        case INS_ldr:
+        case INS_ldrh:
+        case INS_ldrb:
+        case INS_ldrsh:
+        case INS_ldrsb:
+        case INS_lea:
+        case INS_vldr:
+            getEmitter()->emitIns_R_S(ins, size, reg, tmp->tdTempNum(), ofs);
+            break;
+
+        default:
+#ifndef LEGACY_BACKEND
+            assert(!"Default inst_RV_ST case not supported for Arm !LEGACY_BACKEND");
+#else  // LEGACY_BACKEND
+            regNumber regTmp;
+            if (varTypeIsFloating(type))
+            {
+                regTmp = regSet.PickRegFloat(type);
+            }
+            else
+            {
+                regTmp = regSet.rsPickFreeReg(RBM_ALLINT & ~genRegMask(reg));
+            }
+            getEmitter()->emitIns_R_S(ins_Load(type), size, regTmp, tmp->tdTempNum(), ofs);
+            regTracker.rsTrackRegTrash(regTmp);
+            getEmitter()->emitIns_R_R(ins, size, reg, regTmp);
+#endif // LEGACY_BACKEND
+            break;
+    }
+#else  // !_TARGET_ARM_
+    getEmitter()->emitIns_R_S(ins, size, reg, tmp->tdTempNum(), ofs);
+#endif // !_TARGET_ARM_
+}
+
+void CodeGen::inst_mov_RV_ST(regNumber reg, GenTreePtr tree)
+{
+    /* Figure out the size of the value being loaded */
+
+    emitAttr    size    = EA_ATTR(genTypeSize(tree->gtType));
+    instruction loadIns = ins_Move_Extend(tree->TypeGet(), (tree->gtFlags & GTF_REG_VAL) != 0);
+
+    if (size < EA_4BYTE)
+    {
+        if ((tree->gtFlags & GTF_SMALL_OK) && (size == EA_1BYTE)
+#if CPU_HAS_BYTE_REGS
+            && (genRegMask(reg) & RBM_BYTE_REGS)
+#endif
+                )
+        {
+            /* We only need to load the actual size */
+
+            inst_RV_TT(INS_mov, reg, tree, 0, EA_1BYTE);
+        }
+        else
+        {
+            /* Generate the "movsx/movzx" opcode */
+
+            inst_RV_ST(loadIns, size, reg, tree);
+        }
+    }
+    else
+    {
+        /* Compute op1 into the target register */
+
+        inst_RV_TT(loadIns, reg, tree);
+    }
+}
+#ifdef _TARGET_XARCH_
+void CodeGen::inst_FS_ST(instruction ins, emitAttr size, TempDsc* tmp, unsigned ofs)
+{
+    getEmitter()->emitIns_S(ins, size, tmp->tdTempNum(), ofs);
+}
+#endif
+
+#ifdef _TARGET_ARM_
+bool CodeGenInterface::validImmForInstr(instruction ins, ssize_t imm, insFlags flags)
+{
+    if (getEmitter()->emitInsIsLoadOrStore(ins) && !instIsFP(ins))
+    {
+        return validDispForLdSt(imm, TYP_INT);
+    }
+
+    bool result = false;
+    switch (ins)
+    {
+        case INS_cmp:
+        case INS_cmn:
+            if (validImmForAlu(imm) || validImmForAlu(-imm))
+                result = true;
+            break;
+
+        case INS_and:
+        case INS_bic:
+        case INS_orr:
+        case INS_orn:
+        case INS_mvn:
+            if (validImmForAlu(imm) || validImmForAlu(~imm))
+                result = true;
+            break;
+
+        case INS_mov:
+            if (validImmForMov(imm))
+                result = true;
+            break;
+
+        case INS_addw:
+        case INS_subw:
+            if ((unsigned_abs(imm) <= 0x00000fff) && (flags != INS_FLAGS_SET)) // 12-bit immediate
+                result = true;
+            break;
+
+        case INS_add:
+        case INS_sub:
+            if (validImmForAdd(imm, flags))
+                result = true;
+            break;
+
+        case INS_tst:
+        case INS_eor:
+        case INS_teq:
+        case INS_adc:
+        case INS_sbc:
+        case INS_rsb:
+            if (validImmForAlu(imm))
+                result = true;
+            break;
+
+        case INS_asr:
+        case INS_lsl:
+        case INS_lsr:
+        case INS_ror:
+            if (imm > 0 && imm <= 32)
+                result = true;
+            break;
+
+        case INS_vstr:
+        case INS_vldr:
+            if ((imm & 0x3FC) == imm)
+                result = true;
+            break;
+
+        default:
+            break;
+    }
+    return result;
+}
+bool CodeGen::arm_Valid_Imm_For_Instr(instruction ins, ssize_t imm, insFlags flags)
+{
+    return validImmForInstr(ins, imm, flags);
+}
+
+bool CodeGenInterface::validDispForLdSt(ssize_t disp, var_types type)
+{
+    if (varTypeIsFloating(type))
+    {
+        if ((disp & 0x3FC) == disp)
+            return true;
+        else
+            return false;
+    }
+    else
+    {
+        if ((disp >= -0x00ff) && (disp <= 0x0fff))
+            return true;
+        else
+            return false;
+    }
+}
+bool CodeGen::arm_Valid_Disp_For_LdSt(ssize_t disp, var_types type)
+{
+    return validDispForLdSt(disp, type);
+}
+
+bool CodeGenInterface::validImmForAlu(ssize_t imm)
+{
+    return emitter::emitIns_valid_imm_for_alu(imm);
+}
+bool CodeGen::arm_Valid_Imm_For_Alu(ssize_t imm)
+{
+    return validImmForAlu(imm);
+}
+
+bool CodeGenInterface::validImmForMov(ssize_t imm)
+{
+    return emitter::emitIns_valid_imm_for_mov(imm);
+}
+bool CodeGen::arm_Valid_Imm_For_Mov(ssize_t imm)
+{
+    return validImmForMov(imm);
+}
+
+bool CodeGen::arm_Valid_Imm_For_Small_Mov(regNumber reg, ssize_t imm, insFlags flags)
+{
+    return emitter::emitIns_valid_imm_for_small_mov(reg, imm, flags);
+}
+
+bool CodeGenInterface::validImmForAdd(ssize_t imm, insFlags flags)
+{
+    return emitter::emitIns_valid_imm_for_add(imm, flags);
+}
+bool CodeGen::arm_Valid_Imm_For_Add(ssize_t imm, insFlags flags)
+{
+    return emitter::emitIns_valid_imm_for_add(imm, flags);
+}
+
+// Check "add Rd,SP,i10"
+bool CodeGen::arm_Valid_Imm_For_Add_SP(ssize_t imm)
+{
+    return emitter::emitIns_valid_imm_for_add_sp(imm);
+}
+
+bool CodeGenInterface::validImmForBL(ssize_t addr)
+{
+    return
+        // If we are running the altjit for NGEN, then assume we can use the "BL" instruction.
+        // This matches the usual behavior for NGEN, since we normally do generate "BL".
+        (!compiler->info.compMatchedVM && (compiler->opts.eeFlags & CORJIT_FLG_PREJIT)) ||
+        (compiler->eeGetRelocTypeHint((void*)addr) == IMAGE_REL_BASED_THUMB_BRANCH24);
+}
+bool CodeGen::arm_Valid_Imm_For_BL(ssize_t addr)
+{
+    return validImmForBL(addr);
+}
+
+// Returns true if this instruction writes to a destination register
+//
+bool CodeGen::ins_Writes_Dest(instruction ins)
+{
+    switch (ins)
+    {
+
+        case INS_cmp:
+        case INS_cmn:
+        case INS_tst:
+        case INS_teq:
+            return false;
+
+        default:
+            return true;
+    }
+}
+#endif // _TARGET_ARM_
+
+/*****************************************************************************
+ *
+ *  Get the machine dependent instruction for performing sign/zero extension.
+ *
+ *  Parameters
+ *      srcType   - source type
+ *      srcInReg  - whether source is in a register
+ */
+instruction CodeGen::ins_Move_Extend(var_types srcType, bool srcInReg)
+{
+    instruction ins = INS_invalid;
+
+    if (varTypeIsSIMD(srcType))
+    {
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+        // SSE2/AVX requires destination to be a reg always.
+        // If src is in reg means, it is a reg-reg move.
+        //
+        // SSE2 Note: always prefer movaps/movups over movapd/movupd since the
+        // former doesn't require 66h prefix and one byte smaller than the
+        // latter.
+        //
+        // TODO-CQ: based on whether src type is aligned use movaps instead
+
+        return (srcInReg) ? INS_movaps : INS_movups;
+#else  // !defined(_TARGET_XARCH_) || defined(LEGACY_BACKEND)
+        assert(!"unhandled SIMD type");
+#endif // !defined(_TARGET_XARCH_) || defined(LEGACY_BACKEND)
+    }
+
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+    if (varTypeIsFloating(srcType))
+    {
+        if (srcType == TYP_DOUBLE)
+        {
+            return (srcInReg) ? INS_movaps : INS_movsdsse2;
+        }
+        else if (srcType == TYP_FLOAT)
+        {
+            return (srcInReg) ? INS_movaps : INS_movss;
+        }
+        else
+        {
+            assert(!"unhandled floating type");
+        }
+    }
+#elif defined(_TARGET_ARM_)
+    if (varTypeIsFloating(srcType))
+        return INS_vmov;
+#else
+    assert(!varTypeIsFloating(srcType));
+#endif
+
+#if defined(_TARGET_XARCH_)
+    if (!varTypeIsSmall(srcType))
+    {
+        ins = INS_mov;
+    }
+    else if (varTypeIsUnsigned(srcType))
+    {
+        ins = INS_movzx;
+    }
+    else
+    {
+        ins = INS_movsx;
+    }
+#elif defined(_TARGET_ARM_)
+    //
+    // Register to Register zero/sign extend operation
+    //
+    if (srcInReg)
+    {
+        if (!varTypeIsSmall(srcType))
+        {
+            ins = INS_mov;
+        }
+        else if (varTypeIsUnsigned(srcType))
+        {
+            if (varTypeIsByte(srcType))
+                ins = INS_uxtb;
+            else
+                ins = INS_uxth;
+        }
+        else
+        {
+            if (varTypeIsByte(srcType))
+                ins = INS_sxtb;
+            else
+                ins = INS_sxth;
+        }
+    }
+    else
+    {
+        ins = ins_Load(srcType);
+    }
+#elif defined(_TARGET_ARM64_)
+    //
+    // Register to Register zero/sign extend operation
+    //
+    if (srcInReg)
+    {
+        if (varTypeIsUnsigned(srcType))
+        {
+            if (varTypeIsByte(srcType))
+            {
+                ins = INS_uxtb;
+            }
+            else if (varTypeIsShort(srcType))
+            {
+                ins = INS_uxth;
+            }
+            else
+            {
+                // A mov Rd, Rm instruction performs the zero extend
+                // for the upper 32 bits when the size is EA_4BYTE
+
+                ins = INS_mov;
+            }
+        }
+        else
+        {
+            if (varTypeIsByte(srcType))
+            {
+                ins = INS_sxtb;
+            }
+            else if (varTypeIsShort(srcType))
+            {
+                ins = INS_sxth;
+            }
+            else
+            {
+                if (srcType == TYP_INT)
+                {
+                    ins = INS_sxtw;
+                }
+                else
+                {
+                    ins = INS_mov;
+                }
+            }
+        }
+    }
+    else
+    {
+        ins = ins_Load(srcType);
+    }
+#else
+    NYI("ins_Move_Extend");
+#endif
+    assert(ins != INS_invalid);
+    return ins;
+}
+
+/*****************************************************************************
+ *
+ *  Get the machine dependent instruction for performing a load for srcType
+ *
+ *  Parameters
+ *      srcType   - source type
+ *      aligned   - whether source is 16-byte aligned if srcType is a SIMD type
+ */
+instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false*/)
+{
+    instruction ins = INS_invalid;
+
+    if (varTypeIsSIMD(srcType))
+    {
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+#ifdef FEATURE_SIMD
+        if (srcType == TYP_SIMD8)
+        {
+            return INS_movsdsse2;
+        }
+        else
+#endif // FEATURE_SIMD
+            if (compiler->canUseAVX())
+        {
+            // TODO-CQ: consider alignment of AVX vectors.
+            return INS_movupd;
+        }
+        else
+        {
+            // SSE2 Note: always prefer movaps/movups over movapd/movupd since the
+            // former doesn't require 66h prefix and one byte smaller than the
+            // latter.
+            return (aligned) ? INS_movaps : INS_movups;
+        }
+#else
+        assert(!"ins_Load with SIMD type");
+#endif
+    }
+
+    if (varTypeIsFloating(srcType))
+    {
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+        if (srcType == TYP_DOUBLE)
+        {
+            return INS_movsdsse2;
+        }
+        else if (srcType == TYP_FLOAT)
+        {
+            return INS_movss;
+        }
+        else
+        {
+            assert(!"unhandled floating type");
+        }
+#elif defined(_TARGET_ARM64_)
+        return INS_ldr;
+#elif defined(_TARGET_ARM_)
+        return INS_vldr;
+#else
+        assert(!varTypeIsFloating(srcType));
+#endif
+    }
+
+#if defined(_TARGET_XARCH_)
+    if (!varTypeIsSmall(srcType))
+    {
+        ins = INS_mov;
+    }
+    else if (varTypeIsUnsigned(srcType))
+    {
+        ins = INS_movzx;
+    }
+    else
+    {
+        ins = INS_movsx;
+    }
+
+#elif defined(_TARGET_ARMARCH_)
+    if (!varTypeIsSmall(srcType))
+    {
+#if defined(_TARGET_ARM64_)
+        if (!varTypeIsI(srcType) && !varTypeIsUnsigned(srcType))
+        {
+            ins = INS_ldrsw;
+        }
+        else
+#endif // defined(_TARGET_ARM64_)
+        {
+            ins = INS_ldr;
+        }
+    }
+    else if (varTypeIsByte(srcType))
+    {
+        if (varTypeIsUnsigned(srcType))
+            ins = INS_ldrb;
+        else
+            ins = INS_ldrsb;
+    }
+    else if (varTypeIsShort(srcType))
+    {
+        if (varTypeIsUnsigned(srcType))
+            ins = INS_ldrh;
+        else
+            ins = INS_ldrsh;
+    }
+#else
+    NYI("ins_Load");
+#endif
+
+    assert(ins != INS_invalid);
+    return ins;
+}
+
+/*****************************************************************************
+ *
+ *  Get the machine dependent instruction for performing a reg-reg copy for dstType
+ *
+ *  Parameters
+ *      dstType   - destination type
+ */
+instruction CodeGen::ins_Copy(var_types dstType)
+{
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+    if (varTypeIsSIMD(dstType))
+    {
+        return INS_movaps;
+    }
+    else if (varTypeIsFloating(dstType))
+    {
+        // Both float and double copy can use movaps
+        return INS_movaps;
+    }
+    else
+    {
+        return INS_mov;
+    }
+#elif defined(_TARGET_ARM64_)
+    if (varTypeIsFloating(dstType))
+    {
+        return INS_fmov;
+    }
+    else
+    {
+        return INS_mov;
+    }
+#elif defined(_TARGET_ARM_)
+    assert(!varTypeIsSIMD(dstType));
+    if (varTypeIsFloating(dstType))
+    {
+        return INS_vmov;
+    }
+    else
+    {
+        return INS_mov;
+    }
+#elif defined(_TARGET_X86_)
+    assert(!varTypeIsSIMD(dstType));
+    assert(!varTypeIsFloating(dstType));
+    return INS_mov;
+#else // _TARGET_*
+#error "Unknown _TARGET_"
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Get the machine dependent instruction for performing a store for dstType
+ *
+ *  Parameters
+ *      dstType   - destination type
+ *      aligned   - whether destination is 16-byte aligned if dstType is a SIMD type
+ */
+instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false*/)
+{
+    instruction ins = INS_invalid;
+
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+    if (varTypeIsSIMD(dstType))
+    {
+#ifdef FEATURE_SIMD
+        if (dstType == TYP_SIMD8)
+        {
+            return INS_movsdsse2;
+        }
+        else
+#endif // FEATURE_SIMD
+            if (compiler->canUseAVX())
+        {
+            // TODO-CQ: consider alignment of AVX vectors.
+            return INS_movupd;
+        }
+        else
+        {
+            // SSE2 Note: always prefer movaps/movups over movapd/movupd since the
+            // former doesn't require 66h prefix and one byte smaller than the
+            // latter.
+            return (aligned) ? INS_movaps : INS_movups;
+        }
+    }
+    else if (varTypeIsFloating(dstType))
+    {
+        if (dstType == TYP_DOUBLE)
+        {
+            return INS_movsdsse2;
+        }
+        else if (dstType == TYP_FLOAT)
+        {
+            return INS_movss;
+        }
+        else
+        {
+            assert(!"unhandled floating type");
+        }
+    }
+#elif defined(_TARGET_ARM64_)
+    if (varTypeIsSIMD(dstType) || varTypeIsFloating(dstType))
+    {
+        // All sizes of SIMD and FP instructions use INS_str
+        return INS_str;
+    }
+#elif defined(_TARGET_ARM_)
+    assert(!varTypeIsSIMD(dstType));
+    if (varTypeIsFloating(dstType))
+    {
+        return INS_vstr;
+    }
+#else
+    assert(!varTypeIsSIMD(dstType));
+    assert(!varTypeIsFloating(dstType));
+#endif
+
+#if defined(_TARGET_XARCH_)
+    ins = INS_mov;
+#elif defined(_TARGET_ARMARCH_)
+    if (!varTypeIsSmall(dstType))
+        ins = INS_str;
+    else if (varTypeIsByte(dstType))
+        ins = INS_strb;
+    else if (varTypeIsShort(dstType))
+        ins = INS_strh;
+#else
+    NYI("ins_Store");
+#endif
+
+    assert(ins != INS_invalid);
+    return ins;
+}
+
+#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+
+bool CodeGen::isMoveIns(instruction ins)
+{
+    return (ins == INS_mov);
+}
+
+instruction CodeGenInterface::ins_FloatLoad(var_types type)
+{
+    // Do Not use this routine in RyuJIT backend. Instead use ins_Load()/ins_Store()
+    unreached();
+}
+
+// everything is just an addressing mode variation on x64
+instruction CodeGen::ins_FloatStore(var_types type)
+{
+    // Do Not use this routine in RyuJIT backend. Instead use ins_Store()
+    unreached();
+}
+
+instruction CodeGen::ins_FloatCopy(var_types type)
+{
+    // Do Not use this routine in RyuJIT backend. Instead use ins_Load().
+    unreached();
+}
+
+instruction CodeGen::ins_FloatCompare(var_types type)
+{
+    return (type == TYP_FLOAT) ? INS_ucomiss : INS_ucomisd;
+}
+
+instruction CodeGen::ins_CopyIntToFloat(var_types srcType, var_types dstType)
+{
+    // On SSE2/AVX - the same instruction is used for moving double/quad word to XMM/YMM register.
+    assert((srcType == TYP_INT) || (srcType == TYP_UINT) || (srcType == TYP_LONG) || (srcType == TYP_ULONG));
+    return INS_mov_i2xmm;
+}
+
+instruction CodeGen::ins_CopyFloatToInt(var_types srcType, var_types dstType)
+{
+    // On SSE2/AVX - the same instruction is used for moving double/quad word of XMM/YMM to an integer register.
+    assert((dstType == TYP_INT) || (dstType == TYP_UINT) || (dstType == TYP_LONG) || (dstType == TYP_ULONG));
+    return INS_mov_xmm2i;
+}
+
+instruction CodeGen::ins_MathOp(genTreeOps oper, var_types type)
+{
+    switch (oper)
+    {
+        case GT_ADD:
+        case GT_ASG_ADD:
+            return type == TYP_DOUBLE ? INS_addsd : INS_addss;
+            break;
+        case GT_SUB:
+        case GT_ASG_SUB:
+            return type == TYP_DOUBLE ? INS_subsd : INS_subss;
+            break;
+        case GT_MUL:
+        case GT_ASG_MUL:
+            return type == TYP_DOUBLE ? INS_mulsd : INS_mulss;
+            break;
+        case GT_DIV:
+        case GT_ASG_DIV:
+            return type == TYP_DOUBLE ? INS_divsd : INS_divss;
+        case GT_AND:
+            return type == TYP_DOUBLE ? INS_andpd : INS_andps;
+        case GT_OR:
+            return type == TYP_DOUBLE ? INS_orpd : INS_orps;
+        case GT_XOR:
+            return type == TYP_DOUBLE ? INS_xorpd : INS_xorps;
+        default:
+            unreached();
+    }
+}
+
+instruction CodeGen::ins_FloatSqrt(var_types type)
+{
+    instruction ins = INS_invalid;
+
+    if (type == TYP_DOUBLE)
+    {
+        ins = INS_sqrtsd;
+    }
+    else
+    {
+        // Right now sqrt of scalar single is not needed.
+        unreached();
+    }
+
+    return ins;
+}
+
+// Conversions to or from floating point values
+instruction CodeGen::ins_FloatConv(var_types to, var_types from)
+{
+    // AVX: For now we support only conversion from Int/Long -> float
+
+    switch (from)
+    {
+        // int/long -> float/double use the same instruction but type size would be different.
+        case TYP_INT:
+        case TYP_LONG:
+            switch (to)
+            {
+                case TYP_FLOAT:
+                    return INS_cvtsi2ss;
+                case TYP_DOUBLE:
+                    return INS_cvtsi2sd;
+                default:
+                    unreached();
+            }
+            break;
+
+        case TYP_FLOAT:
+            switch (to)
+            {
+                case TYP_INT:
+                    return INS_cvttss2si;
+                case TYP_LONG:
+                    return INS_cvttss2si;
+                case TYP_FLOAT:
+                    return ins_Move_Extend(TYP_FLOAT, false);
+                case TYP_DOUBLE:
+                    return INS_cvtss2sd;
+                default:
+                    unreached();
+            }
+            break;
+
+        case TYP_DOUBLE:
+            switch (to)
+            {
+                case TYP_INT:
+                    return INS_cvttsd2si;
+                case TYP_LONG:
+                    return INS_cvttsd2si;
+                case TYP_FLOAT:
+                    return INS_cvtsd2ss;
+                case TYP_DOUBLE:
+                    return ins_Move_Extend(TYP_DOUBLE, false);
+                default:
+                    unreached();
+            }
+            break;
+
+        default:
+            unreached();
+    }
+}
+
+#elif defined(_TARGET_ARM_)
+
+bool CodeGen::isMoveIns(instruction ins)
+{
+    return (ins == INS_vmov) || (ins == INS_mov);
+}
+
+instruction CodeGenInterface::ins_FloatLoad(var_types type)
+{
+    assert(type == TYP_DOUBLE || type == TYP_FLOAT);
+    return INS_vldr;
+}
+instruction CodeGen::ins_FloatStore(var_types type)
+{
+    assert(type == TYP_DOUBLE || type == TYP_FLOAT);
+    return INS_vstr;
+}
+instruction CodeGen::ins_FloatCopy(var_types type)
+{
+    assert(type == TYP_DOUBLE || type == TYP_FLOAT);
+    return INS_vmov;
+}
+
+instruction CodeGen::ins_CopyIntToFloat(var_types srcType, var_types dstType)
+{
+    // Not used and not implemented
+    unreached();
+}
+
+instruction CodeGen::ins_CopyFloatToInt(var_types srcType, var_types dstType)
+{
+    // Not used and not implemented
+    unreached();
+}
+
+instruction CodeGen::ins_FloatCompare(var_types type)
+{
+    // Not used and not implemented
+    unreached();
+}
+
+instruction CodeGen::ins_FloatSqrt(var_types type)
+{
+    // Not used and not implemented
+    unreached();
+}
+
+instruction CodeGen::ins_MathOp(genTreeOps oper, var_types type)
+{
+    switch (oper)
+    {
+        case GT_ADD:
+        case GT_ASG_ADD:
+            return INS_vadd;
+            break;
+        case GT_SUB:
+        case GT_ASG_SUB:
+            return INS_vsub;
+            break;
+        case GT_MUL:
+        case GT_ASG_MUL:
+            return INS_vmul;
+            break;
+        case GT_DIV:
+        case GT_ASG_DIV:
+            return INS_vdiv;
+        case GT_NEG:
+            return INS_vneg;
+        default:
+            unreached();
+    }
+}
+
+instruction CodeGen::ins_FloatConv(var_types to, var_types from)
+{
+    switch (from)
+    {
+        case TYP_INT:
+            switch (to)
+            {
+                case TYP_FLOAT:
+                    return INS_vcvt_i2f;
+                case TYP_DOUBLE:
+                    return INS_vcvt_i2d;
+                default:
+                    unreached();
+            }
+            break;
+        case TYP_UINT:
+            switch (to)
+            {
+                case TYP_FLOAT:
+                    return INS_vcvt_u2f;
+                case TYP_DOUBLE:
+                    return INS_vcvt_u2d;
+                default:
+                    unreached();
+            }
+            break;
+        case TYP_LONG:
+            switch (to)
+            {
+                case TYP_FLOAT:
+                    NYI("long to float");
+                case TYP_DOUBLE:
+                    NYI("long to double");
+                default:
+                    unreached();
+            }
+            break;
+        case TYP_FLOAT:
+            switch (to)
+            {
+                case TYP_INT:
+                    return INS_vcvt_f2i;
+                case TYP_UINT:
+                    return INS_vcvt_f2u;
+                case TYP_LONG:
+                    NYI("float to long");
+                case TYP_DOUBLE:
+                    return INS_vcvt_f2d;
+                case TYP_FLOAT:
+                    return INS_vmov;
+                default:
+                    unreached();
+            }
+            break;
+        case TYP_DOUBLE:
+            switch (to)
+            {
+                case TYP_INT:
+                    return INS_vcvt_d2i;
+                case TYP_UINT:
+                    return INS_vcvt_d2u;
+                case TYP_LONG:
+                    NYI("double to long");
+                case TYP_FLOAT:
+                    return INS_vcvt_d2f;
+                case TYP_DOUBLE:
+                    return INS_vmov;
+                default:
+                    unreached();
+            }
+            break;
+        default:
+            unreached();
+    }
+}
+
+#endif // #elif defined(_TARGET_ARM_)
+
+/*****************************************************************************
+ *
+ *  Machine independent way to return
+ */
+void CodeGen::instGen_Return(unsigned stkArgSize)
+{
+#if defined(_TARGET_XARCH_)
+    if (stkArgSize == 0)
+    {
+        instGen(INS_ret);
+    }
+    else
+    {
+        inst_IV(INS_ret, stkArgSize);
+    }
+#elif defined(_TARGET_ARM_)
+//
+// The return on ARM is folded into the pop multiple instruction
+// and as we do not know the exact set of registers that we will
+// need to restore (pop) when we first call instGen_Return we will
+// instead just not emit anything for this method on the ARM
+// The return will be part of the pop multiple and that will be
+// part of the epilog that is generated by genFnEpilog()
+#elif defined(_TARGET_ARM64_)
+    // This function shouldn't be used on ARM64.
+    unreached();
+#else
+    NYI("instGen_Return");
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Emit a MemoryBarrier instruction
+ *
+ *     Note: all MemoryBarriers instructions can be removed by
+ *           SET COMPlus_JitNoMemoryBarriers=1
+ */
+void CodeGen::instGen_MemoryBarrier()
+{
+#ifdef DEBUG
+    if (JitConfig.JitNoMemoryBarriers() == 1)
+    {
+        return;
+    }
+#endif // DEBUG
+
+#if defined(_TARGET_XARCH_)
+    instGen(INS_lock);
+    getEmitter()->emitIns_I_AR(INS_or, EA_4BYTE, 0, REG_SPBASE, 0);
+#elif defined(_TARGET_ARM_)
+    getEmitter()->emitIns_I(INS_dmb, EA_4BYTE, 0xf);
+#elif defined(_TARGET_ARM64_)
+    getEmitter()->emitIns_BARR(INS_dmb, INS_BARRIER_SY);
+#else
+#error "Unknown _TARGET_"
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Machine independent way to move a Zero value into a register
+ */
+void CodeGen::instGen_Set_Reg_To_Zero(emitAttr size, regNumber reg, insFlags flags)
+{
+#if defined(_TARGET_XARCH_)
+    getEmitter()->emitIns_R_R(INS_xor, size, reg, reg);
+#elif defined(_TARGET_ARMARCH_)
+    getEmitter()->emitIns_R_I(INS_mov, size, reg, 0 ARM_ARG(flags));
+#else
+#error "Unknown _TARGET_"
+#endif
+    regTracker.rsTrackRegIntCns(reg, 0);
+}
+
+#ifdef LEGACY_BACKEND
+/*****************************************************************************
+ *
+ *  Machine independent way to move an immediate value into a register
+ */
+void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags)
+{
+#if RELOC_SUPPORT
+    if (!compiler->opts.compReloc)
+#endif // RELOC_SUPPORT
+    {
+        size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs
+    }
+
+    if ((imm == 0) && !EA_IS_RELOC(size))
+    {
+        instGen_Set_Reg_To_Zero(size, reg, flags);
+    }
+    else
+    {
+#if defined(_TARGET_XARCH_)
+        getEmitter()->emitIns_R_I(INS_mov, size, reg, imm);
+#elif defined(_TARGET_ARM_)
+
+        if (EA_IS_RELOC(size))
+        {
+            getEmitter()->emitIns_R_I(INS_movw, size, reg, imm);
+            getEmitter()->emitIns_R_I(INS_movt, size, reg, imm);
+        }
+        else if (arm_Valid_Imm_For_Mov(imm))
+        {
+            getEmitter()->emitIns_R_I(INS_mov, size, reg, imm, flags);
+        }
+        else // We have to use a movw/movt pair of instructions
+        {
+            ssize_t imm_lo16 = (imm & 0xffff);
+            ssize_t imm_hi16 = (imm >> 16) & 0xffff;
+
+            assert(arm_Valid_Imm_For_Mov(imm_lo16));
+            assert(imm_hi16 != 0);
+
+            getEmitter()->emitIns_R_I(INS_movw, size, reg, imm_lo16);
+
+            // If we've got a low register, the high word is all bits set,
+            // and the high bit of the low word is set, we can sign extend
+            // halfword and save two bytes of encoding. This can happen for
+            // small magnitude negative numbers 'n' for -32768 <= n <= -1.
+
+            if (getEmitter()->isLowRegister(reg) && (imm_hi16 == 0xffff) && ((imm_lo16 & 0x8000) == 0x8000))
+            {
+                getEmitter()->emitIns_R_R(INS_sxth, EA_2BYTE, reg, reg);
+            }
+            else
+            {
+                getEmitter()->emitIns_R_I(INS_movt, size, reg, imm_hi16);
+            }
+
+            if (flags == INS_FLAGS_SET)
+                getEmitter()->emitIns_R_R(INS_mov, size, reg, reg, INS_FLAGS_SET);
+        }
+#elif defined(_TARGET_ARM64_)
+        NYI_ARM64("instGen_Set_Reg_To_Imm");
+#else
+#error "Unknown _TARGET_"
+#endif
+    }
+    regTracker.rsTrackRegIntCns(reg, imm);
+}
+#endif // LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ *  Machine independent way to set the flags based on
+ *   comparing a register with zero
+ */
+void CodeGen::instGen_Compare_Reg_To_Zero(emitAttr size, regNumber reg)
+{
+#if defined(_TARGET_XARCH_)
+    getEmitter()->emitIns_R_R(INS_test, size, reg, reg);
+#elif defined(_TARGET_ARMARCH_)
+    getEmitter()->emitIns_R_I(INS_cmp, size, reg, 0);
+#else
+#error "Unknown _TARGET_"
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Machine independent way to set the flags based upon
+ *   comparing a register with another register
+ */
+void CodeGen::instGen_Compare_Reg_To_Reg(emitAttr size, regNumber reg1, regNumber reg2)
+{
+#if defined(_TARGET_XARCH_) || defined(_TARGET_ARMARCH_)
+    getEmitter()->emitIns_R_R(INS_cmp, size, reg1, reg2);
+#else
+#error "Unknown _TARGET_"
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Machine independent way to set the flags based upon
+ *   comparing a register with an immediate
+ */
+void CodeGen::instGen_Compare_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm)
+{
+    if (imm == 0)
+    {
+        instGen_Compare_Reg_To_Zero(size, reg);
+    }
+    else
+    {
+#if defined(_TARGET_XARCH_)
+#if defined(_TARGET_AMD64_)
+        if ((EA_SIZE(size) == EA_8BYTE) && (((int)imm != (ssize_t)imm) || EA_IS_CNS_RELOC(size)))
+        {
+#ifndef LEGACY_BACKEND
+            assert(!"Invalid immediate for instGen_Compare_Reg_To_Imm");
+#else  // LEGACY_BACKEND
+            // Load imm into a register
+            regNumber immReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
+            instGen_Set_Reg_To_Imm(size, immReg, (ssize_t)imm);
+            getEmitter()->emitIns_R_R(INS_cmp, EA_TYPE(size), reg, immReg);
+#endif // LEGACY_BACKEND
+        }
+        else
+#endif // _TARGET_AMD64_
+        {
+            getEmitter()->emitIns_R_I(INS_cmp, size, reg, imm);
+        }
+#elif defined(_TARGET_ARM_)
+        if (arm_Valid_Imm_For_Alu(imm) || arm_Valid_Imm_For_Alu(-imm))
+        {
+            getEmitter()->emitIns_R_I(INS_cmp, size, reg, imm);
+        }
+        else // We need a scratch register
+        {
+#ifndef LEGACY_BACKEND
+            assert(!"Invalid immediate for instGen_Compare_Reg_To_Imm");
+#else  // LEGACY_BACKEND
+            // Load imm into a register
+            regNumber immReg = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg));
+            instGen_Set_Reg_To_Imm(size, immReg, (ssize_t)imm);
+            getEmitter()->emitIns_R_R(INS_cmp, size, reg, immReg);
+#endif // !LEGACY_BACKEND
+        }
+#elif defined(_TARGET_ARM64_)
+        if (true) // TODO-ARM64-NYI: arm_Valid_Imm_For_Alu(imm) || arm_Valid_Imm_For_Alu(-imm))
+        {
+            getEmitter()->emitIns_R_I(INS_cmp, size, reg, imm);
+        }
+        else // We need a scratch register
+        {
+            assert(!"Invalid immediate for instGen_Compare_Reg_To_Imm");
+        }
+#else
+#error "Unknown _TARGET_"
+#endif
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Machine independent way to move a stack based local variable into a register
+ */
+void CodeGen::instGen_Load_Reg_From_Lcl(var_types srcType, regNumber dstReg, int varNum, int offs)
+{
+    emitAttr size = emitTypeSize(srcType);
+
+    getEmitter()->emitIns_R_S(ins_Load(srcType), size, dstReg, varNum, offs);
+}
+
+/*****************************************************************************
+ *
+ *  Machine independent way to move a register into a stack based local variable
+ */
+void CodeGen::instGen_Store_Reg_Into_Lcl(var_types dstType, regNumber srcReg, int varNum, int offs)
+{
+    emitAttr size = emitTypeSize(dstType);
+
+    getEmitter()->emitIns_S_R(ins_Store(dstType), size, srcReg, varNum, offs);
+}
+
+/*****************************************************************************
+ *
+ *  Machine independent way to move an immediate into a stack based local variable
+ */
+void CodeGen::instGen_Store_Imm_Into_Lcl(
+    var_types dstType, emitAttr sizeAttr, ssize_t imm, int varNum, int offs, regNumber regToUse)
+{
+#ifdef _TARGET_XARCH_
+#ifdef _TARGET_AMD64_
+    if ((EA_SIZE(sizeAttr) == EA_8BYTE) && (((int)imm != (ssize_t)imm) || EA_IS_CNS_RELOC(sizeAttr)))
+    {
+        assert(!"Invalid immediate for instGen_Store_Imm_Into_Lcl");
+    }
+    else
+#endif // _TARGET_AMD64_
+    {
+        getEmitter()->emitIns_S_I(ins_Store(dstType), sizeAttr, varNum, offs, (int)imm);
+    }
+#elif defined(_TARGET_ARMARCH_)
+    // Load imm into a register
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef LEGACY_BACKEND
+    regNumber immReg = regToUse;
+    assert(regToUse != REG_NA);
+#else  // LEGACY_BACKEND
+    regNumber immReg = (regToUse == REG_NA) ? regSet.rsGrabReg(RBM_ALLINT) : regToUse;
+#endif // LEGACY_BACKEND
+    instGen_Set_Reg_To_Imm(sizeAttr, immReg, (ssize_t)imm);
+    instGen_Store_Reg_Into_Lcl(dstType, immReg, varNum, offs);
+    if (EA_IS_RELOC(sizeAttr))
+    {
+        regTracker.rsTrackRegTrash(immReg);
+    }
+#else  // _TARGET_*
+#error "Unknown _TARGET_"
+#endif // _TARGET_*
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+/*****************************************************************************/
diff --git a/src/jit/instr.h b/src/jit/instr.h
new file mode 100644
index 0000000000..c38f8d2073
--- /dev/null
+++ b/src/jit/instr.h
@@ -0,0 +1,301 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+/*****************************************************************************/
+
+#ifndef _INSTR_H_
+#define _INSTR_H_
+/*****************************************************************************/
+
+#define BAD_CODE 0x0BADC0DE // better not match a real encoding!
+
+/*****************************************************************************/
+
+// clang-format off
+DECLARE_TYPED_ENUM(instruction,unsigned)
+{
+#if defined(_TARGET_XARCH_)
+    #define INST0(id, nm, fp, um, rf, wf, mr                ) INS_##id,
+    #define INST1(id, nm, fp, um, rf, wf, mr                ) INS_##id,
+    #define INST2(id, nm, fp, um, rf, wf, mr, mi            ) INS_##id,
+    #define INST3(id, nm, fp, um, rf, wf, mr, mi, rm        ) INS_##id,
+    #define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4    ) INS_##id,
+    #define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr) INS_##id,
+    #include "instrs.h"
+
+#elif defined(_TARGET_ARM_)
+    #define INST1(id, nm, fp, ldst, fmt, e1                                ) INS_##id,
+    #define INST2(id, nm, fp, ldst, fmt, e1, e2                            ) INS_##id,
+    #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        ) INS_##id,
+    #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    ) INS_##id,
+    #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                ) INS_##id,
+    #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            ) INS_##id,
+    #define INST8(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8    ) INS_##id,
+    #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) INS_##id,
+    #include "instrs.h"
+    #include "x86_instrs.h"
+
+#elif defined(_TARGET_ARM64_)
+    #define INST1(id, nm, fp, ldst, fmt, e1                                ) INS_##id,
+    #define INST2(id, nm, fp, ldst, fmt, e1, e2                            ) INS_##id,
+    #define INST3(id, nm, fp, ldst, fmt, e1, e2, e3                        ) INS_##id,
+    #define INST4(id, nm, fp, ldst, fmt, e1, e2, e3, e4                    ) INS_##id,
+    #define INST5(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5                ) INS_##id,
+    #define INST6(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6            ) INS_##id,
+    #define INST9(id, nm, fp, ldst, fmt, e1, e2, e3, e4, e5, e6, e7, e8, e9) INS_##id,
+    #include "instrs.h"
+
+    INS_lea,   // Not a real instruction. It is used for load the address of stack locals
+
+#else
+#error Unsupported target architecture
+#endif
+
+    INS_none,
+    INS_count = INS_none
+}
+END_DECLARE_TYPED_ENUM(instruction,unsigned)
+
+/*****************************************************************************/
+
+enum insUpdateModes
+{
+    IUM_RD,
+    IUM_WR,
+    IUM_RW,
+};
+
+/*****************************************************************************/
+
+enum emitJumpKind
+{
+    EJ_NONE,
+
+    #define JMP_SMALL(en, rev, ins)           EJ_##en,
+    #include "emitjmps.h"
+
+    EJ_COUNT
+};
+
+/*****************************************************************************/
+
+DECLARE_TYPED_ENUM(GCtype,unsigned)
+{
+    GCT_NONE,
+    GCT_GCREF,
+    GCT_BYREF
+}
+END_DECLARE_TYPED_ENUM(GCtype,unsigned)
+
+// TODO-Cleanup:  Move 'insFlags' under _TARGET_ARM_ 
+DECLARE_TYPED_ENUM(insFlags,unsigned)
+{
+    INS_FLAGS_NOT_SET,
+    INS_FLAGS_SET,
+    INS_FLAGS_DONT_CARE
+};
+END_DECLARE_TYPED_ENUM(insFlags,unsigned)
+
+#if defined(_TARGET_ARM_)
+DECLARE_TYPED_ENUM(insOpts,unsigned)
+{
+    INS_OPTS_NONE,
+    INS_OPTS_LDST_PRE_DEC,
+    INS_OPTS_LDST_POST_INC,
+
+    INS_OPTS_RRX,
+    INS_OPTS_LSL,
+    INS_OPTS_LSR,
+    INS_OPTS_ASR,
+    INS_OPTS_ROR
+}
+END_DECLARE_TYPED_ENUM(insOpts,unsigned)
+#elif defined(_TARGET_ARM64_)
+DECLARE_TYPED_ENUM(insOpts,unsigned)
+{
+    INS_OPTS_NONE,
+
+    INS_OPTS_PRE_INDEX,
+    INS_OPTS_POST_INDEX,
+
+    INS_OPTS_LSL12,
+
+    INS_OPTS_LSL = 4,
+    INS_OPTS_LSR,
+    INS_OPTS_ASR,
+    INS_OPTS_ROR,
+
+    INS_OPTS_UXTB = 8,
+    INS_OPTS_UXTH,
+    INS_OPTS_UXTW,
+    INS_OPTS_UXTX,
+    INS_OPTS_SXTB,
+    INS_OPTS_SXTH,
+    INS_OPTS_SXTW,
+    INS_OPTS_SXTX,
+
+    INS_OPTS_8B  = 16,
+    INS_OPTS_16B,
+    INS_OPTS_4H,
+    INS_OPTS_8H,
+    INS_OPTS_2S,
+    INS_OPTS_4S,
+    INS_OPTS_1D,
+    INS_OPTS_2D,
+
+    INS_OPTS_MSL,     // Vector Immediate (shifting ones variant)
+
+    INS_OPTS_S_TO_4BYTE,  // Single to INT32
+    INS_OPTS_D_TO_4BYTE,  // Double to INT32  
+
+    INS_OPTS_S_TO_8BYTE,  // Single to INT64
+    INS_OPTS_D_TO_8BYTE,  // Double to INT64
+
+    INS_OPTS_4BYTE_TO_S,  // INT32 to Single
+    INS_OPTS_4BYTE_TO_D,  // INT32 to Double  
+
+    INS_OPTS_8BYTE_TO_S,  // INT64 to Single
+    INS_OPTS_8BYTE_TO_D,  // INT64 to Double
+
+    INS_OPTS_S_TO_D,      // Single to Double
+    INS_OPTS_D_TO_S,      // Double to Single
+
+    INS_OPTS_H_TO_S,      // Half to Single
+    INS_OPTS_H_TO_D,      // Half to Double
+
+    INS_OPTS_S_TO_H,      // Single to Half
+    INS_OPTS_D_TO_H,      // Double to Half
+}
+END_DECLARE_TYPED_ENUM(insOpts,unsigned)
+
+DECLARE_TYPED_ENUM(insCond,unsigned)
+{
+    INS_COND_EQ,
+    INS_COND_NE,
+    INS_COND_HS,
+    INS_COND_LO,
+
+    INS_COND_MI,
+    INS_COND_PL,
+    INS_COND_VS,
+    INS_COND_VC,
+
+    INS_COND_HI,
+    INS_COND_LS,
+    INS_COND_GE,
+    INS_COND_LT,
+
+    INS_COND_GT,
+    INS_COND_LE,
+}
+END_DECLARE_TYPED_ENUM(insCond,unsigned)
+
+DECLARE_TYPED_ENUM(insCflags,unsigned)
+{
+    INS_FLAGS_NONE,
+    INS_FLAGS_V,
+    INS_FLAGS_C,
+    INS_FLAGS_CV,
+
+    INS_FLAGS_Z,
+    INS_FLAGS_ZV,
+    INS_FLAGS_ZC,
+    INS_FLAGS_ZCV,
+
+    INS_FLAGS_N,
+    INS_FLAGS_NV,
+    INS_FLAGS_NC,
+    INS_FLAGS_NCV,
+
+    INS_FLAGS_NZ,
+    INS_FLAGS_NZV,
+    INS_FLAGS_NZC,
+    INS_FLAGS_NZCV,
+}
+END_DECLARE_TYPED_ENUM(insCFlags,unsigned)
+
+DECLARE_TYPED_ENUM(insBarrier,unsigned)
+{
+    INS_BARRIER_OSHLD =  1,
+    INS_BARRIER_OSHST =  2,
+    INS_BARRIER_OSH   =  3,
+
+    INS_BARRIER_NSHLD =  5,
+    INS_BARRIER_NSHST =  6,
+    INS_BARRIER_NSH   =  7,
+
+    INS_BARRIER_ISHLD =  9,
+    INS_BARRIER_ISHST = 10,
+    INS_BARRIER_ISH   = 11,
+
+    INS_BARRIER_LD    = 13,
+    INS_BARRIER_ST    = 14,
+    INS_BARRIER_SY    = 15,
+}
+END_DECLARE_TYPED_ENUM(insBarrier,unsigned)
+#endif
+
+#undef EA_UNKNOWN
+DECLARE_TYPED_ENUM(emitAttr,unsigned)
+{
+                EA_UNKNOWN       = 0x000,
+                EA_1BYTE         = 0x001,
+                EA_2BYTE         = 0x002,
+                EA_4BYTE         = 0x004,
+                EA_8BYTE         = 0x008,
+                EA_16BYTE        = 0x010,
+                EA_32BYTE        = 0x020,
+                EA_SIZE_MASK     = 0x03F,
+
+#ifdef _TARGET_64BIT_
+                EA_PTRSIZE       = EA_8BYTE,
+#else
+                EA_PTRSIZE       = EA_4BYTE,
+#endif
+
+                EA_OFFSET_FLG    = 0x040,
+                EA_OFFSET        = EA_OFFSET_FLG | EA_PTRSIZE,       /* size ==  0 */
+                EA_GCREF_FLG     = 0x080,
+                EA_GCREF         = EA_GCREF_FLG |  EA_PTRSIZE,       /* size == -1 */
+                EA_BYREF_FLG     = 0x100,
+                EA_BYREF         = EA_BYREF_FLG |  EA_PTRSIZE,       /* size == -2 */
+                EA_DSP_RELOC_FLG = 0x200,
+                EA_CNS_RELOC_FLG = 0x400,
+}
+END_DECLARE_TYPED_ENUM(emitAttr,unsigned)
+
+#define EA_ATTR(x)                  ((emitAttr)(x))
+#define EA_SIZE(x)                  ((emitAttr)(((unsigned)(x)) &  EA_SIZE_MASK))
+#define EA_SIZE_IN_BYTES(x)         ((UNATIVE_OFFSET)(EA_SIZE(x)))
+#define EA_SET_SIZE(x, sz)          ((emitAttr)((((unsigned)(x)) & ~EA_SIZE_MASK) | sz))
+#define EA_SET_FLG(x, flg)          ((emitAttr)(((unsigned)(x)) | flg))
+#define EA_4BYTE_DSP_RELOC          (EA_SET_FLG(EA_4BYTE, EA_DSP_RELOC_FLG))
+#define EA_PTR_DSP_RELOC            (EA_SET_FLG(EA_PTRSIZE, EA_DSP_RELOC_FLG))
+#define EA_HANDLE_CNS_RELOC         (EA_SET_FLG(EA_PTRSIZE, EA_CNS_RELOC_FLG))
+#define EA_IS_OFFSET(x)             ((((unsigned)(x)) & ((unsigned)EA_OFFSET_FLG)) != 0)
+#define EA_IS_GCREF(x)              ((((unsigned)(x)) & ((unsigned)EA_GCREF_FLG)) != 0)
+#define EA_IS_BYREF(x)              ((((unsigned)(x)) & ((unsigned)EA_BYREF_FLG)) != 0)
+#define EA_IS_GCREF_OR_BYREF(x)     ((((unsigned)(x)) & ((unsigned)(EA_BYREF_FLG | EA_GCREF_FLG))) != 0)
+#define EA_IS_DSP_RELOC(x)          ((((unsigned)(x)) & ((unsigned)EA_DSP_RELOC_FLG)) != 0)
+#define EA_IS_CNS_RELOC(x)          ((((unsigned)(x)) & ((unsigned)EA_CNS_RELOC_FLG)) != 0)
+#define EA_IS_RELOC(x)              (EA_IS_DSP_RELOC(x) || EA_IS_CNS_RELOC(x))
+#define EA_TYPE(x)                  ((emitAttr)(((unsigned)(x)) & ~(EA_OFFSET_FLG | EA_DSP_RELOC_FLG | EA_CNS_RELOC_FLG)))
+
+#define EmitSize(x)                 (EA_ATTR(genTypeSize(TypeGet(x))))
+
+// Enum specifying the instruction set for generating floating point or SIMD code.
+enum InstructionSet
+{
+#ifdef _TARGET_XARCH_
+    InstructionSet_SSE2,
+    InstructionSet_AVX,
+#elif defined(_TARGET_ARM_)
+    InstructionSet_NEON,
+#endif
+    InstructionSet_NONE
+};
+// clang-format on
+
+/*****************************************************************************/
+#endif //_INSTR_H_
+/*****************************************************************************/
diff --git a/src/jit/instrs.h b/src/jit/instrs.h
new file mode 100644
index 0000000000..2f5c14fc6f
--- /dev/null
+++ b/src/jit/instrs.h
@@ -0,0 +1,13 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#if defined(_TARGET_XARCH_)
+#include "instrsxarch.h"
+#elif defined(_TARGET_ARM_)
+#include "instrsarm.h"
+#elif defined(_TARGET_ARM64_)
+#include "instrsarm64.h"
+#else
+#error Unsupported or unset target architecture
+#endif // target type
diff --git a/src/jit/instrsarm.h b/src/jit/instrsarm.h
new file mode 100644
index 0000000000..d1a77f8ebb
--- /dev/null
+++ b/src/jit/instrsarm.h
@@ -0,0 +1,557 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************
+ *  Arm Thumb1/Thumb2 instructions for JIT compiler
+ *
+ *          id      -- the enum name for the instruction
+ *          nm      -- textual name (for assembly dipslay)
+ *          fp      -- floating point instruction
+ *          ld/st/cmp   -- load/store/compare instruction
+ *          fmt     -- encoding format used by this instruction
+ *          e1      -- encoding 1
+ *          e2      -- encoding 2
+ *          e3      -- encoding 3
+ *          e4      -- encoding 4
+ *          e5      -- encoding 5
+ *          e6      -- encoding 6
+ *          e7      -- encoding 7
+ *          e8      -- encoding 8
+ *          e9      -- encoding 9
+ *
+******************************************************************************/
+
+#if !defined(_TARGET_ARM_)
+#error Unexpected target type
+#endif
+
+#ifndef INST1
+#error INST1 must be defined before including this file.
+#endif
+#ifndef INST2
+#error INST2 must be defined before including this file.
+#endif
+#ifndef INST3
+#error INST3 must be defined before including this file.
+#endif
+#ifndef INST4
+#error INST4 must be defined before including this file.
+#endif
+#ifndef INST5
+#error INST5 must be defined before including this file.
+#endif
+#ifndef INST6
+#error INST6 must be defined before including this file.
+#endif
+// No INST7
+// #ifndef INST7
+// #error  INST7 must be defined before including this file.
+// #endif
+#ifndef INST8
+#error INST8 must be defined before including this file.
+#endif
+#ifndef INST9
+#error INST9 must be defined before including this file.
+#endif
+
+/*****************************************************************************/
+/*               The following is ARM-specific                               */
+/*****************************************************************************/
+
+// If you're adding a new instruction:
+// You need not only to fill in one of these macros describing the instruction, but also:
+//   * If the instruction writes to more than one destination register, update the function
+//     emitInsMayWriteMultipleRegs in emitArm.cpp.
+
+// clang-format off
+INST9(invalid, "INVALID", 0, 0, IF_NONE,   BAD_CODE,  BAD_CODE,    BAD_CODE,     BAD_CODE,   BAD_CODE,     BAD_CODE,      BAD_CODE, BAD_CODE,   BAD_CODE)
+
+//    enum     name      FP LD/ST         Rdn,Rm     Rd,Rn,Rm     Rdn,i8        Rd,Rn,i3    Rd,Rn,+i8<<i4 Rd,Rn,Rm{,sh}  SP,i9     Rd,SP,i10   Rd,PC,i10
+//                                          T1_D0     T1_H         T1_J0         T1_G        T2_L0         T2_C0          T1_F      T1_J2       T1_J3
+INST9(add,     "add",    0, 0, IF_EN9,    0x4400,    0x1800,      0x3000,       0x1C00,     0xF1000000,   0xEB000000,    0xB000,   0xA800,     0xA000)
+                                   //  add     Rdn,Rm            T1_D0     01000100Dmmmmddd                    4400        high                    
+                                   //  adds    Rd,Rn,Rm          T1_H      0001100mmmnnnddd                    1800        low                  
+                                   //  adds    Rdn,i8            T1_J0     00110dddiiiiiiii                    3000        low     imm(0-255)                
+                                   //  adds    Rd,Rn,i3          T1_G      0001110iiinnnddd                    1C00        low     imm(0-7)                
+                                   //  add{s}  Rd,Rn,Rm{,sh}     T2_C0     11101011000Snnnn 0iiiddddiishmmmm   EB00 0000                               
+                                   //  add{s}  Rd,Rn,+i8<<i4     T2_L0     11110i01000Snnnn 0iiiddddiiiiiiii   F100 0000           imm(i8<<i4) *pref                          
+                                   //  add     SP,i9             T1_F      101100000iiiiiii                    B000        SP      imm(0-508)                
+                                   //  add     Rd,SP,i10         T1_J2     10101dddiiiiiiii                    A800        low     imm(0-1020)                
+                                   //  add     Rd,PC,i10         T1_J3     10100dddiiiiiiii                    A000        low     imm(0-1020)                
+INST9(sub,     "sub",    0, 0, IF_EN9,    BAD_CODE,  0x1A00,      0x3800,       0x1E00,     0xF1A00000,   0xEBA00000,    0xB080,   BAD_CODE,   BAD_CODE)
+                                   //  subs    Rd,Rn,Rm          T1_H      0001101mmmnnnddd                    1A00        low                
+                                   //  subs    Rdn,i8            T1_J0     00111dddiiiiiiii                    3800        low     imm(0-255)                
+                                   //  subs    Rd,Rn,i3          T1_G      0001111iiinnnddd                    1E00        low     imm(0-7)                                            
+                                   //  sub{s}  Rd,Rn,+i8<<i4     T2_L0     11110i01101Snnnn 0iiiddddiiiiiiii   F1A0 0000           imm(i8<<i4) *pref 
+                                   //  sub{s}  Rd,Rn,Rm{,sh}     T2_C0     11101011101Snnnn 0iiiddddiishmmmm   EBA0 0000     
+                                   //  sub     SP,i9             T1_F      101100001iiiiiii                    B080        SP     imm(0-508)   <
+
+//    enum     name      FP LD/ST         Rt,[Rn+Rm] Rt,[Rn+i7]   Rt,[Rn+Rm,sh] Rt,[Rn+=i8] Rt,[Rn+i12]   Rt,[PC+-i12]   Rd,[SP+i10] Rd,[PC+i10]
+//                                         T1_H       T1_C         T2_E0         T2_H0       T2_K1         T2_K4          T1_J2        T1_J3
+INST8(ldr,     "ldr",    0,LD, IF_EN8,    0x5800,    0x6800,      0xF8500000,   0xF8500800, 0xF8D00000,   0xF85F0000,    0x9800,     0x4800)
+                                   //  ldr     Rt,[Rn+Rm]        T1_H      0101100mmmnnnttt                    5800        low                  
+                                   //  ldr     Rt,[Rn+i7]        T1_C      01101iiiiinnnttt                    6800        low     imm(0-124)                
+                                   //  ldr     Rt,[Rn+Rm{,sh}]   T2_E0     111110000101nnnn tttt000000shmmmm   F850 0000           sh=(0,1,2,3)                          
+                                   //  ldr     Rt,[Rn],+-i8{!}   T2_H0     111110000101nnnn tttt1PUWiiiiiiii   F850 0800           imm(0-255)                               
+                                   //  ldr     Rt,[Rn+i12]       T2_K1     111110001101nnnn ttttiiiiiiiiiiii   F8D0 0000           imm(0-4095)                          
+                                   //  ldr     Rt,[PC+-i12]      T2_K4     11111000U1011111 ttttiiiiiiiiiiii   F85F 0000           imm(+-4095) 
+                                   //  ldr     Rt,[SP+i10]       T1_J2     10011tttiiiiiiii                    9800        low     imm(0-1020)  
+                                   //  ldr     Rt,[PC+i10]       T1_J3     01001tttiiiiiiii                    4800        low     imm(0-1020)                
+              
+//    enum     name      FP LD/ST           Rt,[Rn+Rm] Rt,[Rn+i7]   Rt,[Rn+Rm,sh] Rt,[Rn+=i8] Rt,[Rn+i12]   Rt,[PC+-i12] or Rt,[SP+-i10]
+//                                           T1_H       T1_C         T2_E0         T2_H0       T2_K1         T2_K4       or  T1_J2
+INST6(str,     "str",    0,ST, IF_EN6B,   0x5000,    0x6000,      0xF8400000,   0xF8400800, 0xF8C00000,   0x9000)
+                                   //  str     Rt,[Rn+Rm]        T1_H      0101000mmmnnnttt                    5000        low                  
+                                   //  str     Rt,[Rn+i7]        T1_C      01100iiiiinnnttt                    6000        low     imm(0-124)                
+                                   //  str     Rt,[Rn+Rm{,sh}]   T2_E0     111110000100nnnn tttt000000shmmmm   F840 0000           sh=(0,1,2,3)                          
+                                   //  str     Rt,[Rn],+-i8{!}   T2_H0     111110000100nnnn tttt1PUWiiiiiiii   F840 0800           imm(0-255)                               
+                                   //  str     Rt,[Rn+i12]       T2_K1     111110001100nnnn ttttiiiiiiiiiiii   F8C0 0000           imm(0-4095)  
+                                   //  str     Rt,[SP+-i10]      T1_J2     10010tttiiiiiiii                    9000        low     imm(0-1020)     
+INST6(ldrb,    "ldrb",   0,LD, IF_EN6A,   0x5C00,    0x7800,      0xF8100000,   0xF8100800, 0xF8900000,   0xF81F0000)
+                                   //  ldrb    Rt,[Rn+Rm]        T1_H      0101110mmmnnnttt                    5C00        low                  
+                                   //  ldrb    Rt,[Rn+i5]        T1_C      01111iiiiinnnttt                    7800        low     imm(0-31)                
+                                   //  ldrb    Rt,[Rn+Rm{,sh}]   T2_E0     111110000001nnnn tttt000000shmmmm   F810 0000           sh=(0,1,2,3)                          
+                                   //  ldrb    Rt,[Rn],+-i8{!}   T2_H0     111110000001nnnn tttt1PUWiiiiiiii   F810 0800           imm(0-255)                               
+                                   //  ldrb    Rt,[Rn+i12]       T2_K1     111110001001nnnn ttttiiiiiiiiiiii   F890 0000           imm(0-4095)                          
+                                   //  ldrb    Rt,[PC+i12]       T2_K4     11111000U0011111 ttttiiiiiiiiiiii   F81F 0000           imm(+-4095)                          
+INST6(strb,    "strb",   0,ST, IF_EN6B,   0x5400,    0x7000,      0xF8000000,   0xF8000800, 0xF8800000,   BAD_CODE)
+                                   //  strb    Rt,[Rn+Rm]        T1_H      0101010mmmnnnttt                    5400        low                  
+                                   //  strb    Rt,[Rn+i5]        T1_C      01110iiiiinnnttt                    7000        low     imm(0-31)                
+                                   //  strb    Rt,[Rn+Rm{,sh}]   T2_E0     111110000000nnnn tttt000000shmmmm   F800 0000           sh=(0,1,2,3)                          
+                                   //  strb    Rt,[Rn],+-i8{!}   T2_H0     111110000000nnnn tttt1PUWiiiiiiii   F800 0800           imm(0-255)                             
+                                   //  strb    Rt,[Rn+i12]       T2_K1     111110001000nnnn ttttiiiiiiiiiiii   F880 0000           imm(0-4095)                          
+INST6(ldrh,    "ldrh",   0,LD, IF_EN6A,   0x5A00,    0x8800,      0xF8300000,   0xF8300800, 0xF8B00000,   0xF83F0000)
+                                   //  ldrh    Rt,[Rn+Rm]        T1_H      0101101mmmnnnttt                    5A00        low                  
+                                   //  ldrh    Rt,[Rn+i6]        T1_C      10001iiiiinnnttt                    8800        low     imm(0-62)                
+                                   //  ldrh    Rt,[Rn+Rm{,sh}]   T2_E0     111110000011nnnn tttt000000shmmmm   F830 0000           sh=(0,1,2,3)                          
+                                   //  ldrh    Rt,[Rn],+-i8{!}   T2_H0     111110000011nnnn tttt1PUWiiiiiiii   F830 0800           imm(0-255)                              
+                                   //  ldrh    Rt,[Rn+i12]       T2_K1     111110001011nnnn ttttiiiiiiiiiiii   F8B0 0000           imm(0-4095)                          
+                                   //  ldrh    Rt,[PC+i12]       T2_K4     11111000U0111111 ttttiiiiiiiiiiii   F83F 0000           imm(+-4095)                          
+INST6(strh,    "strh",   0,ST, IF_EN6B,   0x5200,    0x8000,      0xF8200000,   0xF8200800, 0xF8a00000,   BAD_CODE)
+                                   //  strh    Rt,[Rn+Rm]        T1_H      0101001mmmnnnttt                    5200        low                  
+                                   //  strh    Rt,[Rn+i6]        T1_C      10000iiiiinnnttt                    8000        low     imm(0-62)                
+                                   //  strh    Rt,[Rn+Rm{,sh}]   T2_E0     111110000010nnnn tttt000000shmmmm   F820 0000           sh=(0,1,2,3)                          
+                                   //  strh    Rt,[Rn],+-i8{!}   T2_H0     111110000010nnnn tttt1PUWiiiiiiii   F820 0800           imm(0-255)                              
+                                   //  strh    Rt,[Rn+i12]       T2_K1     111110001010nnnn ttttiiiiiiiiiiii   F8A0 0000           imm(0-4095)                          
+INST6(ldrsb,   "ldrsb",  0,LD, IF_EN6A,   0x5600,    BAD_CODE,    0xF9100000,   0xF9100800, 0xF9900000,   0xF91F0000)
+                                   //  ldrsb   Rt,[Rn+Rm]        T1_H      0101011mmmnnnttt                    5600        low                  
+                                   //  ldrsb   Rt,[Rn+Rm{,sh}]   T2_E0     111110010001nnnn tttt000000shmmmm   F910 0000           sh=(0,1,2,3)                          
+                                   //  ldrsb   Rt,[Rn],+-i8{!}   T2_H0     111110010001nnnn tttt1PUWiiiiiiii   F910 0800           imm(0-255)                           
+                                   //  ldrsb   Rt,[Rn+i12]       T2_K1     111110011001nnnn ttttiiiiiiiiiiii   F990 0000           imm(0-4095)                          
+                                   //  ldrsb   Rt,[PC+i12]       T2_K4     11111001U0011111 ttttiiiiiiiiiiii   F91F 0000           imm(+-4095)                          
+INST6(ldrsh,   "ldrsh",  0,LD, IF_EN6A,   0x5E00,    BAD_CODE,    0xF9300000,   0xF9300800, 0xF9B00000,   0xF93F0000)
+                                   //  ldrsh   Rt,[Rn+Rm]        T1_H      0101111mmmnnnttt                    5E00        low                  
+                                   //  ldrsh   Rt,[Rn+Rm{,sh}]   T2_E0     111110010011nnnn tttt000000shmmmm   F930 0000           sh=(0,1,2,3)                          
+                                   //  ldrsh   Rt,[Rn],+-i8{!}   T2_H0     111110010011nnnn tttt1PUWiiiiiiii   F930 0800           imm(0-255)                           
+                                   //  ldrsh   Rt,[Rn+i12]       T2_K1     111110011011nnnn ttttiiiiiiiiiiii   F9B0 0000           imm(0-4095)                          
+                                   //  ldrsh   Rt,[PC+i12]       T2_K4     11111001U0111111 ttttiiiiiiiiiiii   F93F 0000           imm(+-4095)                          
+
+//    enum     name      FP LD/ST          Rd, Rm     Rd,Rm        Rd,i8       Rd,+i8<<i4   S / Rn,Rm{,sh}
+//                                          T1_E       T1_D0        T1_J0       T2_L1/L2     T2_C3/C8
+INST5(mov,     "mov",    0, 0, IF_EN5A,   0x0000,    0x4600,      0x2000,      0xF04F0000,  0xEA5F0000)
+                                   //  movs    Rd,Rm             T1_E      0000000000mmmddd                    0000        low                  
+                                   //  mov     Rd,Rm             T1_D0     01000110Dmmmmddd                    4600        high                                 
+                                   //  movs    Rd,i8             T1_J0     00100dddiiiiiiii                    2000        low     imm(0-255)                
+                                   //  mov{s}  Rd,+i8<<i4        T2_L1     11110i00010S1111 0iiiddddiiiiiiii   F04F 0000           imm(i8<<i4)
+                                   //  mov{s}  Rd,Rm             T2_C3     1110101001011111 0000dddd0000mmmm   EA5F 0000                              
+INST5(cmp,     "cmp",    0,CMP,IF_EN5B,   0x4280,    0x4500,      0x2800,      0xF1B00F00,  0xEBB00F00)
+                                   //  cmp     Rn,Rm             T1_E      0100001010mmmnnn                    4280        low                  
+                                   //  cmp     Rn,Rm             T1_D0     01000101Nmmmmnnn                    4500        high                  
+                                   //  cmp     Rn,i8             T1_J0     00101nnniiiiiiii                    2800        low     imm(0-255)                
+                                   //  cmp     Rn,+i8<<i4        T2_L2     11110i011011nnnn 0iii1111iiiiiiii   F1B0 0F00           imm(i8<<i4)                          
+                                   //  cmp     Rn,Rm{,sh}        T2_C8     111010111011nnnn 0iii1111iishmmmm   EBB0 0F00                               
+
+//    enum     name      FP LD/ST          Rdn, Rn    Rd,Rn,i5     Rd,Rn,Rm     Rd,Rn,i5
+//                                          T1_E       T2_C         T2_C4        T2_C2
+INST4(lsl,     "lsl",    0, 0, IF_EN4A,   0x4080,    0x0000,      0xFA00F000,  0xEA4F0000)
+                                   //  lsls    Rdn,Rm            T1_E      0100000010mmmddd                    4080        low                  
+                                   //  lsls    Rd,Rm,i5          T1_C      00000iiiiimmmddd                    0000        low     imm(0-31)                
+                                   //  lsl{s}  Rd,Rn,Rm          T2_C4     11111010000Snnnn 1111dddd0000mmmm   FA00 F000                               
+                                   //  lsl{s}  Rd,Rm,i5          T2_C2     11101010010S1111 0iiiddddii00mmmm   EA4F 0000           imm(0-31)                          
+INST4(lsr,     "lsr",    0, 0, IF_EN4A,   0x40C0,    0x0800,      0xFA20F000,  0xEA4F0010)
+                                   //  lsrs    Rdn,Rm            T1_E      0100000011mmmddd                    40C0        low                  
+                                   //  lsrs    Rd,Rm,i5          T1_C      00001iiiiimmmddd                    0800        low     imm(0-31)                
+                                   //  lsr{s}  Rd,Rn,Rm          T2_C4     11111010001Snnnn 1111dddd0000mmmm   FA20 F000                               
+                                   //  lsr{s}  Rd,Rm,i5          T2_C2     11101010010S1111 0iiiddddii01mmmm   EA4F 0010           imm(0-31)                          
+INST4(asr,     "asr",    0, 0, IF_EN4A,   0x4100,    0x1000,      0xFA40F000,  0xEA4F0020)
+                                   //  asrs    Rdn,Rm            T1_E      0100000100mmmddd                    4100        low     shift by Rm                
+                                   //  asrs    Rd,Rm,i5          T1_C      00010iiiiimmmddd                    1000        low     imm(0-31)                
+                                   //  asr{s}  Rd,Rn,Rm          T2_C4     11111010010Snnnn 1111dddd0000mmmm   FA40 F000                               
+                                   //  asr{s}  Rd,Rm,i5          T2_C2     11101010010S1111 0iiiddddii10mmmm   EA4F 0020           imm(0-31)                          
+INST4(ror,     "ror",    0, 0, IF_EN4A,   0x41C0,    BAD_CODE,    0xFA60F000,  0xEA4F0030)
+                                   //  rors    Rdn,Rm            T1_E      0100000111mmmddd                    41C0        low                 
+                                   //  ror{s}  Rd,Rn,Rm          T2_C4     11111010011Snnnn 1111dddd0000mmmm   FA60 F000                               
+                                   //  ror{s}  Rd,Rm,i5          T2_C2     11101010010S1111 0iiiddddii11mmmm   EA4F 0030           imm(0-31)                          
+
+//    enum     name      FP LD/ST          Rdn, Rn    Rd,Rn,i5     Rd,Rn,Rm     Rd,Rn,i5
+//                                          T2_K2       T2_H2       T2_C7        T2_K3
+INST4(pld,     "pld",    0,LD, IF_EN4B,  0xF890F000, 0xF810FC00,  0xF810F000,  0xF81FF000)                               // Cache Prefetch Data for Read
+                                   //  pld     [Rn+i12]          T2_K2     111110001001nnnn 1111iiiiiiiiiiii   F890 F000           imm(0-4095)                          
+                                   //  pld     [Rn-i8]           T2_H2     111110000001nnnn 11111100iiiiiiii   F810 FC00           imm(0-255)                          
+                                   //  pld     [Rn+Rm{,sh}]      T2_C7     111110000001nnnn 1111000000shmmmm   F810 F000           sh=(0,1,2,3)                          
+                                   //  pld     [PC+-i12]         T2_K3     11111001U0011111 1111iiiiiiiiiiii   F81F F000           imm(+-4095)                          
+INST4(pldw,    "pldw",   0,LD, IF_EN4B,  0xF8B0F000, 0xF830FC00,  0xF830F000,  BAD_CODE)                                 // Cache Prefetch Data for Write
+                                   //  pldw    [Rn+i12]          T2_K2     111110001011nnnn 1111iiiiiiiiiiii   F8B0 F000           imm(0-4095)                          
+                                   //  pldw    [Rn-i8]           T2_H2     111110000011nnnn 11111100iiiiiiii   F830 FC00           imm(0-255)                          
+                                   //  pldw    [Rn+Rm{,sh}]      T2_C7     111110000011nnnn 1111000000shmmmm   F830 F000           sh=(0,1,2,3)                          
+#ifdef FEATURE_PLI_INSTRUCTION
+// NOTE: The PLI instruction had an errata in early Krait implementations, so even though it's unlikely we would ever generate it, it is
+// #ifdef'ed out to prevent its use.
+INST4(pli,     "pli",    0,LD, IF_EN4B,  0xF990F000, 0xF910FC00,  0xF910F000,  0xF91FF000)                               // Cache Prefetch Instructions for Execute
+                                   //  pli     [Rn+i12]          T2_K2     111110011001nnnn 1111iiiiiiiiiiii   F990 F000           imm(0-4095)                          
+                                   //  pli     [Rn-i8]           T2_H2     111110010001nnnn 11111100iiiiiiii   F910 FC00           imm(0-255)                          
+                                   //  pli     [Rn+Rm{,sh}]      T2_C7     111110010001nnnn 1111000000shmmmm   F910 F000           sh=(0,1,2,3)                          
+                                   //  pli     [PC+-i12]         T2_K3     11111001U0011111 1111iiiiiiiiiiii   F91F F000           imm(+-4095)                          
+#endif // FEATURE_PLI_INSTRUCTION
+
+//    enum     name      FP LD/ST          Rdn, Rm    Rd,Rn,Rm,sh  Rd,Rn,i12 
+//                                          T1_E       T2_C0        T2_L0
+INST3(and,     "and",    0, 0, IF_EN3A,   0x4000,    0xEA000000,   0xF0000000)
+                                   //  ands    Rdn,Rm            T1_E      0100000000mmmddd                    4000        low                  
+                                   //  and{s}  Rd,Rn,Rm{,sh}     T2_C0     11101010000Snnnn 0iiiddddiishmmmm   EA00 0000                               
+                                   //  and{s}  Rd,Rn,i12         T2_L0     11110i00000Snnnn 0iiiddddiiiiiiii   F000 0000           imm(i8<<i4)                          
+INST3(eor,     "eor",    0, 0, IF_EN3A,   0x4040,    0xEA800000,   0xF0800000)
+                                   //  eors    Rd,Rm             T1_E      0100000001mmmddd                    4040        low                  
+                                   //  eor{s}  Rd,Rn,Rm{,sh}     T2_C0     11101010100Snnnn 0iiiddddiishmmmm   EA80 0000                               
+                                   //  eor{s}  Rd,Rn,i12         T2_L0     11110i00100Snnnn 0iiiddddiiiiiiii   F080 0000           imm(i8<<i4)                          
+INST3(orr,     "orr",    0, 0, IF_EN3A,   0x4300,    0xEA400000,   0xF0400000)
+                                   //  orrs    Rdn,Rm            T1_E      0100001100mmmddd                    4300        low                  
+                                   //  orr{s}  Rd,Rn,Rm{,sh}     T2_C0     11101010010Snnnn 0iiiddddiishmmmm   EA40 0000                               
+                                   //  orr{s}  Rd,Rn,i12         T2_L0     11110i00010Snnnn 0iiiddddiiiiiiii   F040 0000           imm(i8<<i4)                          
+INST3(orn,     "orn",    0, 0, IF_EN3A,   BAD_CODE,  0xEA600000,   0xF0600000)
+                                   //  orn{s}  Rd,Rn,Rm{,sh}     T2_C0     11101010011Snnnn 0iiiddddiishmmmm   EA60 0000                               
+                                   //  orn{s}  Rd,Rn,i12         T2_L0     11110i00011Snnnn 0iiiddddiiiiiiii   F060 0000           imm(i8<<i4)                          
+INST3(bic,     "bic",    0, 0, IF_EN3A,   0x4380,    0xEA200000,   0xF0200000)
+                                   //  bics    Rdn,Rm            T1_E      0100001110mmmddd                    4380        low                  
+                                   //  bic{s}  Rd,Rn,Rm{,sh}     T2_C0     11101010001Snnnn 0iiiddddiishmmmm   EA20 0000                               
+                                   //  bic{s}  Rd,Rn,i12         T2_L0     11110i00001Snnnn 0iiiddddiiiiiiii   F020 0000           imm(i8<<i4)                          
+INST3(adc,     "adc",    0, 0, IF_EN3A,   0x4140,    0xEB400000,   0xF1400000)
+                                   //  adcs    Rdn,Rn            T1_E      0100000101mmmddd                    4140        low                  
+                                   //  adcs    Rd,Rn,Rm{,sh}     T2_C0     11101011010Snnnn 0iiiddddiishmmmm   EB40 0000   
+                                   //  adcs    Rd,Rn,i12         T2_L0     11110i01010Snnnn 0iiiddddiiiiiiii   F140 0000           imm(0-4095)
+INST3(sbc,     "sbc",    0, 0, IF_EN3A,   0x4180,    0xEB600000,   0xF1600000)
+                                   //  sbcs    Rd,Rm             T1_E      0100000110mmmddd                    4180        low                  
+                                   //  sbc{s}  Rd,Rn,Rm{,sh}     T2_C0     11101011011Snnnn 0iiiddddiishmmmm   EB60 0000                              
+                                   //  sbc{s}  Rd,Rn,+i8<<i4     T2_L0     11110i01011Snnnn 0iiiddddiiiiiiii   F160 0000           imm(i8<<i4)                          
+INST3(rsb,     "rsb",    0, 0, IF_EN3A,   0x4240,    0xEBC00000,   0xF1C00000)
+                                   //  rsbs    Rd,Rn,#0          T1_E      0100001001nnnddd                    4240        low     (Note: x86 NEG instr)                
+                                   //  rsb{s}  Rd,Rn,Rm{,sh}     T2_C0     11101011110Snnnn 0iiiddddiishmmmm   EBC0 0000                                 
+                                   //  rsb{s}  Rd,Rn,+i8<<i4     T2_L0     11110i01110Snnnn 0iiiddddiiiiiiii   F1C0 0000           imm(i8<<i4)                          
+
+//    enum     name      FP LD/ST          Rn,Rm     Rn,Rm,sh      Rn,i12 
+//                                          T1_E       T2_C8        T2_L2
+INST3(tst,     "tst",    0,CMP,IF_EN3B,   0x4200,    0xEA100F00,   0xF0100F00)
+                                   //  tst     Rn,Rm             T1_E      0100001000mmmnnn                    4200        low                  
+                                   //  tst     Rn,Rm{,sh}        T2_C8     111010100001nnnn 0iii1111iishmmmm   EA10 0F00                       
+                                   //  tst     Rn,+i8<<i4        T2_L2     11110i000001nnnn 0iii1111iiiiiiii   F010 0F00           imm(i8<<i4)                          
+INST3(teq,     "teq",    0,CMP,IF_EN3B,   BAD_CODE,  0xEA900F00,   0xF0900F00)
+                                   //  teq     Rn,Rm{,sh}        T2_C8     111010101001nnnn 0iii1111iishmmmm   EA90 0F00                              
+                                   //  teq     Rn,+i8<<i4        T2_L2     11110i001001nnnn 0iii1111iiiiiiii   F090 0F00           imm(i8<<i4)                          
+INST3(cmn,     "cmn",    0,CMP,IF_EN3B,   0x42C0,    0xEB100F00,   0xF1100F00)
+                                   //  cmn     Rn,Rn             T1_E      0100001011mmmnnn                    42C0        low                
+                                   //  cmn     Rn,Rm{,sh}        T2_C8     111010110001nnnn 0iii1111iishmmmm   EB10 0F00                               
+                                   //  cmn     Rn,+i8<<i4        T2_L2     11110i010001nnnn 0iii1111iiiiiiii   F110 0F00           imm(i8<<i4)                          
+
+//    enum     name      FP LD/ST          Rd,Rm     Rd,Rm,sh      Rd,Rn,i12 
+//                                          T1_E       T2_C1        T2_L1
+INST3(mvn,     "mvn",    0, 0, IF_EN3C,   0x43C0,    0xEA6F0000,   0xF06F0000)
+                                   //  mvns    Rd,Rm             T1_E      0100001111mmmddd                    43C0        low                  
+                                   //  mvn{s}  Rd,Rm{,sh}        T2_C1     11101010011S1111 0iiiddddiishmmmm   EA6F 0000                              
+                                   //  mvn{s}  Rd,+i8<<i4        T2_L1     11110i00011S1111 0iiiddddiiiiiiii   F06F 0000           imm(i8<<i4)                          
+
+//    enum     name      FP LD/ST          SP,reg8     rT          reg,reg16
+//                                          T1_L1      T2_E2        T2_I1
+INST3(push,    "push",   0, 0, IF_EN3D,   0xB400,    0xF84D0D04,   0xE92D0000)
+                                   //  push    {LR,}<reglist8>   T1_L1     1011010Mrrrrrrrr                    B400        low                 
+                                   //  push    rT                T2_E2     1111100001001101 tttt110100000100   F84D 0D04                              
+                                   //  push    <reglist16>       T2_I1     1110100100101101 0M0rrrrrrrrrrrrr   E92D 0000                              
+INST3(pop,     "pop",    0, 0, IF_EN3D,   0xBC00,    0xF85D0B04,   0xE8BD0000) 
+                                   //  pop     {PC,}<reglist8>   T1_L1     1011110Prrrrrrrr                    BC00        low                            
+                                   //  pop     rT                T2_E2     1111100001011101 tttt101100000100   F85D 0B04                              
+                                   //  pop     <reglist16>       T2_I1     1110100010111101 PM0rrrrrrrrrrrrr   E8BD 0000                              
+
+//    enum     name      FP LD/ST         Rd,i16     Rd,i16     Rd,i16   
+//                                        T2_N       T2_N1      T2_N2
+INST3(movt,    "movt",   0, 0, IF_EN3E,   0xF2C00000,0xF2C00000,0xF2C00000)
+                                           //  Rd,i16            T2_N      11110i101100iiii 0iiiddddiiiiiiii   F2C0 0000           imm(0-65535)                          
+                                           //  Rd,i16            T2_N1     11110i101100iiii 0iiiddddiiiiiiii   F2C0 0000           imm(0-65535)                          
+                                           //  Rd,i16            T2_N2     11110i101100iiii 0iiiddddiiiiiiii   F2C0 0000           imm(0-65535)                          
+INST3(movw,    "movw",   0, 0, IF_EN3E,   0xF2400000,0xF2400000,0xF2400000)
+                                           //  Rd,+i16           T2_N      11110i100100iiii 0iiiddddiiiiiiii   F240 0000           imm(0-65535)                          
+                                           //  Rd,+i16           T2_N1     11110i100100iiii 0iiiddddiiiiiiii   F240 0000           imm(0-65535)                          
+                                           //  Rd,+i16           T2_N2     11110i100100iiii 0iiiddddiiiiiiii   F240 0000           imm(0-65535)                          
+
+//    enum     name      FP LD/ST         PC+-imm11 PC+-imm24   PC+-imm24
+//                                          T1_M      T2_J2       T2_J3
+INST3(b,       "b",      0, 0, IF_EN3F,   0xE000,   0xF0009000, 0xF0009000)
+                                   //  b       PC+-i11            T1_M      11100iiiiiiiiiii                    E000                imm(-2048..2046)                          
+                                   //  b       PC+-i24            T2_J2     11110Siiiiiiiiii 10j1jiiiiiiiiiii   F000 9000           imm(-16777216..16777214) (intra-procedure offset)
+                                   //  b       PC+-i24            T2_J3     11110Siiiiiiiiii 10j1jiiiiiiiiiii   F000 9000           imm(-16777216..16777214) (inter-procedure offset)
+
+
+//    enum     name      FP LD/ST         PC+-imm8  PC+-imm20
+//                                          T1_K      T2_J1
+INST2(beq,     "beq",    0, 0, IF_EN2A,   0xD000,    0xF0008000)  
+                                   //  beq     PC+-i8             T1_K      11010000iiiiiiii                    D000                imm(-256..254)
+                                   //  beq     PC+-i20            T2_J1     11110S0000iiiiii 10j0jiiiiiiiiiii   F000 8000           imm(-1048576..1048574)
+INST2(bne,     "bne",    0, 0, IF_EN2A,   0xD100,    0xF0408000)  
+                                   //  bne     PC+-i8             T1_K      11010001iiiiiiii                    D000                imm(-256..254)
+                                   //  bne     PC+-i20            T2_J1     11110S0001iiiiii 10j0jiiiiiiiiiii   F000 8000           imm(-1048576..1048574)
+INST2(bhs,     "bhs",    0, 0, IF_EN2A,   0xD200,    0xF0808000)      
+                                   //  bhs     PC+-i8             T1_K      11010010iiiiiiii                    D000                imm(-256..254)
+                                   //  bhs     PC+-i20            T2_J1     11110S0010iiiiii 10j0jiiiiiiiiiii   F000 8000           imm(-1048576..1048574)
+INST2(blo,     "blo",    0, 0, IF_EN2A,   0xD300,    0xF0C08000)      
+                                   //  blo     PC+-i8             T1_K      11010011iiiiiiii                    D000                imm(-256..254)
+                                   //  blo     PC+-i20            T2_J1     11110S0011iiiiii 10j0jiiiiiiiiiii   F000 8000           imm(-1048576..1048574)
+INST2(bmi,     "bmi",    0, 0, IF_EN2A,   0xD400,    0xF1008000)      
+                                   //  bmi     PC+-i8             T1_K      11010100iiiiiiii                    D000                imm(-256..254)
+                                   //  bmi     PC+-i20            T2_J1     11110S0100iiiiii 10j0jiiiiiiiiiii   F000 8000           imm(-1048576..1048574)
+INST2(bpl,     "bpl",    0, 0, IF_EN2A,   0xD500,    0xF1408000)      
+                                   //  bpl     PC+-i8             T1_K      11010101iiiiiiii                    D000                imm(-256..254)
+                                   //  bpl     PC+-i20            T2_J1     11110S0101iiiiii 10j0jiiiiiiiiiii   F000 8000           imm(-1048576..1048574)
+INST2(bvs,     "bvs",    0, 0, IF_EN2A,   0xD600,    0xF1808000)      
+                                   //  bvs     PC+-i8             T1_K      11010110iiiiiiii                    D000                imm(-256..254)
+                                   //  bvs     PC+-i20            T2_J1     11110S0110iiiiii 10j0jiiiiiiiiiii   F000 8000           imm(-1048576..1048574)
+INST2(bvc,     "bvc",    0, 0, IF_EN2A,   0xD700,    0xF1C08000)      
+                                   //  bvc     PC+-i8             T1_K      11010111iiiiiiii                    D000                imm(-256..254)
+                                   //  bvc     PC+-i20            T2_J1     11110S0111iiiiii 10j0jiiiiiiiiiii   F000 8000           imm(-1048576..1048574)
+INST2(bhi,     "bhi",    0, 0, IF_EN2A,   0xD800,    0xF2008000)      
+                                   //  bhi     PC+-i8             T1_K      11011000iiiiiiii                    D000                imm(-256..254)
+                                   //  bhi     PC+-i20            T2_J1     11110S1000iiiiii 10j0jiiiiiiiiiii   F000 8000           imm(-1048576..1048574)
+INST2(bls,     "bls",    0, 0, IF_EN2A,   0xD900,    0xF2408000)      
+                                   //  bls     PC+-i8             T1_K      11011001iiiiiiii                    D000                imm(-256..254)
+                                   //  bls     PC+-i20            T2_J1     11110S1001iiiiii 10j0jiiiiiiiiiii   F000 8000           imm(-1048576..1048574)
+INST2(bge,     "bge",    0, 0, IF_EN2A,   0xDA00,    0xF2808000)      
+                                   //  bge     PC+-i8             T1_K      11011010iiiiiiii                    D000                imm(-256..254)
+                                   //  bge     PC+-i20            T2_J1     11110S1010iiiiii 10j0jiiiiiiiiiii   F000 8000           imm(-1048576..1048574)
+INST2(blt,     "blt",    0, 0, IF_EN2A,   0xDB00,    0xF2C08000)      
+                                   //  blt     PC+-i8             T1_K      11011011iiiiiiii                    D000                imm(-256..254)
+                                   //  blt     PC+-i20            T2_J1     11110S1011iiiiii 10j0jiiiiiiiiiii   F000 8000           imm(-1048576..1048574)
+INST2(bgt,     "bgt",    0, 0, IF_EN2A,   0xDC00,    0xF3008000)      
+                                   //  bgt     PC+-i8             T1_K      11011100iiiiiiii                    D000                imm(-256..254)
+                                   //  bgt     PC+-i20            T2_J1     11110S1100iiiiii 10j0jiiiiiiiiiii   F000 8000           imm(-1048576..1048574)
+INST2(ble,     "ble",    0, 0, IF_EN2A,   0xDD00,    0xF3408000)      
+                                   //  ble     PC+-i8             T1_K      11011101iiiiiiii                    D000                imm(-256..254)
+                                   //  ble     PC+-i20            T2_J1     11110S1101iiiiii 10j0jiiiiiiiiiii   F000 8000           imm(-1048576..1048574)
+
+//    enum     name      FP LD/ST           Rm          Rm   
+//                                          T1_D1       T1_D2
+INST2(bx,      "bx",     0, 0, IF_EN2B,   0x4700,     0x4700)
+                                   //  bx      Rm                T1_D1     010001110mmmm000                     4700        high                  
+                                   //  bx      Rm                T1_D2     010001110mmmm000                     4700        high                  
+
+//    enum     name      FP LD/ST           rM      PC+-imm24
+//                                          T1_D2      T2_J3
+INST2(blx,     "blx",    0, 0, IF_EN2C,   0x4780,   0xF000C000)
+                                   //  blx     Rm                 T1_D2     010001111mmmm000                    4780        high                 
+                                   //  blx     PC+-i24            T2_J3     11110Siiiiiiiiii 11j0jiiiiiiiiii0   F000 C000           imm(-16777216..16777214)
+
+//    enum     name      FP LD/ST          Rn,<reg8>  Rn,<reg16>   
+//                                          T1_J1      T2_I0
+INST2(ldm,     "ldm",    0,LD, IF_EN2D,   0xC800,   0xE8900000)      
+                                   //  ldm     Rn,<reglist8>     T1_J1     11001nnnrrrrrrrr                    C800        low                           
+                                   //  ldm     Rn{!},<reglist16> T2_I0     1110100010W1nnnn rr0rrrrrrrrrrrrr   E890 0000                                   
+INST2(stm,     "stm",    0,ST, IF_EN2D,   0xC000,   0xE8800000)      
+                                   //  stm     Rn!,<reglist8>    T1_J1    11000nnnrrrrrrrr                    C000        low                            
+                                   //  stm     Rn{!},<reglist16> T2_I0     1110100010W0nnnn 0r0rrrrrrrrrrrrr   E880 0000                                                 
+
+//    enum     name      FP LD/ST          Rn,Rm      Rn,Rm,{sb}   
+//                                          T1_E       T2_C6
+INST2(sxtb,    "sxtb",   0, 0, IF_EN2E,   0xB240,    0xFA4FF080)      
+                                           //  Rd,Rm             T1_E      1011001001mmmddd                    B240        low                  
+                                           //  Rd,Rm{,sb}        T2_C6     1111101001001111 1111dddd10sbmmmm   FA4F F080                           
+INST2(sxth,    "sxth",   0, 0, IF_EN2E,   0xB200,    0xFA0FF080)            
+                                           //  Rd,Rm             T1_E      1011001000mmmddd                    B200        low                  
+                                           //  Rd,Rm{,sb}        T2_C6     1111101000001111 1111dddd10sbmmmm   FA0F F080                     
+INST2(uxtb,    "uxtb",   0, 0, IF_EN2E,   0xB2C0,    0xFA5FF080)      
+                                           //  Rd,Rm             T1_E      1011001011mmmddd                    B2C0        low                  
+                                           //  Rd,Rm{,sb}        T2_C6     1111101001011111 1111dddd10sbmmmm   FA5F F080                           
+INST2(uxth,    "uxth",   0, 0, IF_EN2E,   0xB280,    0xFA1FF080)      
+                                           //  Rd,Rm             T1_E      1011001010mmmddd                    B280        low                  
+                                           //  Rd,Rm             T2_C6     1111101000011111 1111dddd10sbmmmm   FA1F F080
+
+//    enum     name      FP LD/ST          Rdn,Rm     Rd,Rn,Rm   
+//                                          T1_E       T2_C5
+INST2(mul,     "mul",    0, 0, IF_EN2F,   0x4340,    0xFB00F000)      
+                                           //  Rd,Rm             T1_E      0100001101nnnddd                    4340        low                  
+                                           //  Rd,Rn,Rm          T2_C5     111110110000nnnn 1111dddd0000mmmm   FB00 F000                               
+
+//    enum     name      FP LD/ST          Rd,PC,i10   Rd,PC,+-i12   
+//                                          T1_J3       T2_M1
+INST2(adr,     "adr",    0, 0, IF_EN2G,   0xA000,    0xF20F0000)
+                                           //  Rd,PC+i10         T1_J3     10100dddiiiiiiii                    A000        low     imm(0-1020)  
+                                           //  Rd,PC+-i12        T2_M1     11110i10U0U01111 0iiiddddiiiiiiii   F20F 0000           imm(+-4095)   
+
+INST1(addw,    "addw",   0, 0, IF_T2_M0,  0xF2000000)
+                                           //  Rd,Rn,i12         T2_M0     11110i100000nnnn 0iiiddddiiiiiiii   F200 0000           imm(0-4095)                     
+INST1(bfc,     "bfc",    0, 0, IF_T2_D1,  0xF36F0000)
+                                           //  Rd,#b,#w          T2_D1     1111001101101111 0iiiddddii0wwwww   F36F 0000           imm(0-31),imm(0-31)                          
+INST1(bfi,     "bfi",    0, 0, IF_T2_D0,  0xF3600000)
+                                           //  Rd,Rn,#b,#w       T2_D0     111100110110nnnn 0iiiddddii0wwwww   F360 0000           imm(0-31),imm(0-31)  
+INST1(bl,      "bl",     0, 0, IF_T2_J3,  0xF000D000)
+                                           //  PC+-i24           T2_J3     11110Siiiiiiiiii 11j1jiiiiiiiiiii   F000 D000           imm(-16777216..16777214)
+INST1(bkpt,    "bkpt",   0, 0, IF_T1_A,   0xDEFE)
+                                           //                    T1_A      1101111011111110                    DEFE                // Windows uses this        
+                                           //  i8                T1_L0     10111110iiiiiiii                    BE00                imm(0-255)                          
+INST1(cbnz,    "cbnz",   0, 0, IF_T1_I,   0xB900)
+                                           //  Rn,PC+i7          T1_I      101110i1iiiiinnn                    B900        low     imm(0-126)                
+INST1(cbz,     "cbz",    0, 0, IF_T1_I,   0xB100)
+                                           //  Rn,PC+i7          T1_I      101100i1iiiiinnn                    B100        low     imm(0-126)                
+INST1(clz,     "clz",    0, 0, IF_T2_C10, 0xFAB0F000)
+                                           //  Rd,Rm             T2_C10    111110101011mmmm 1111dddd0000mmmm   FAB0 F000                               
+INST1(dmb,     "dmb",    0, 0, IF_T2_B,   0xF3BF8F50)
+                                           //  #i4               T2_B      1111001110111111 100011110101iiii   F3BF 8F50           imm(0-15)                          
+INST1(ism,     "ism",    0, 0, IF_T2_B,   0xF3BF8F60)
+                                           //  #i4               T2_B      1111001110111111 100011110110iiii   F3BF 8F60           imm(0-15)                          
+INST1(ldmdb,   "ldmdb",  0,LD, IF_T2_I0,  0xE9100000)      
+                                           //  Rn{!},<reglist16> T2_I0     1110100100W1nnnn rr0rrrrrrrrrrrrr   E910 0000    
+INST1(ldrd,    "ldrd",   0,LD, IF_T2_G0,  0xE8500000)      
+                                           //  Rt,RT,[Rn],+-i8{!}T2_G0     1110100PU1W1nnnn ttttTTTTiiiiiiii   E850 0000                                   
+INST1(ldrex,   "ldrex",  0,LD, IF_T2_H1,  0xE8500F00)
+                                           //  Rt,[Rn+i8]        T2_H1     111010000101nnnn tttt1111iiiiiiii   E850 0F00           imm(0-1020)                               
+INST1(ldrexb,  "ldrexb", 0,LD, IF_T2_E1,  0xE8D00F4F)
+                                           //  Rt,[Rn]           T2_E1     111010001101nnnn tttt111101001111   E8D0 0F4F                                   
+INST1(ldrexd,  "ldrexd", 0,LD, IF_T2_G1,  0xE8D0007F)
+                                           //  Rt,RT,[Rn]        T2_G1     111010001101nnnn ttttTTTT01111111   E8D0 007F                                   
+INST1(ldrexh,  "ldrexh", 0,LD, IF_T2_E1,  0xE8D00F5F)
+                                           //  Rt,[Rn]           T2_E1     111010001101nnnn tttt111101011111   E8D0 0F5F                                   
+INST1(mla,     "mla",    0, 0, IF_T2_F2,  0xFB000000)
+                                           //  Rd,Rn,Rm,Ra       T2_F2     111110110000nnnn aaaadddd0000mmmm   FB00 0000                              
+INST1(mls,     "mls",    0, 0, IF_T2_F2,  0xFB000010)
+                                           //  Rd,Rn,Rm,Ra       T2_F2     111110110000nnnn aaaadddd0001mmmm   FB00 0010                              
+INST1(nop,     "nop",    0, 0, IF_T1_A,   0xBF00)
+                                           //                    T1_A      1011111100000000                    BF00           
+INST1(nopw,    "nop.w",  0, 0, IF_T2_A,   0xF3AF8000)
+                                           //                    T2_A      1111001110101111 1000000000000000   F3AF 8000        
+INST1(sbfx,    "sbfx",   0, 0, IF_T2_D0,  0xF3400000)
+                                           //  Rd,Rn,#b,#w       T2_D0     111100110100nnnn 0iiiddddii0wwwww   F340 0000           imm(0-31),imm(0-31)                          
+INST1(sdiv,    "sdiv",   0, 0, IF_T2_C5,  0xFB90F0F0)
+                                           //  Rd,Rn,Rm          T2_C5     111110111001nnnn 1111dddd1111mmmm   FB90 F0F0                   
+INST1(smlal,   "smlal",  0, 0, IF_T2_F1,  0xFBC00000)
+                                           //  Rl,Rh,Rn,Rm       T2_F1     111110111100nnnn llllhhhh0000mmmm   FBC0 0000                      
+INST1(smull,   "smull",  0, 0, IF_T2_F1,  0xFB800000)
+                                           //  Rl,Rh,Rn,Rm       T2_F1     111110111000nnnn llllhhhh0000mmmm   FB80 0000                              
+INST1(stmdb,   "stmdb",  0,ST, IF_EN2D,   0xE9000000)      
+                                           //  Rn{!},<reglist16> T2_I0     1110100100W0nnnn 0r0rrrrrrrrrrrrr   E900 0000     
+INST1(strd,    "strd",   0,ST, IF_T2_G0,  0xE8400000)      
+                                           //  Rt,RT,[Rn],+-i8{!}T2_G0     1110100PU1W0nnnn ttttTTTTiiiiiiii   E840 0000                     
+INST1(strex,   "strex",  0,ST, IF_T2_H1,  0xE8400F00)
+                                           //  Rt,[Rn+i8]        T2_H1     111010000100nnnn tttt1111iiiiiiii   E840 0F00           imm(0-1020)                               
+INST1(strexb,  "strexb", 0,ST, IF_T2_E1,  0xE8C00F4F)
+                                           //  Rt,[Rn]           T2_E1     111010001100nnnn tttt111101001111   E8C0 0F4F                                   
+INST1(strexd,  "strexd", 0,ST, IF_T2_G1,  0xE8C0007F)
+                                           //  Rt,RT,[Rn]        T2_G1     111010001100nnnn ttttTTTT01111111   E8C0 007F                                   
+INST1(strexh,  "strexh", 0,ST, IF_T2_E1,  0xE8C00F5F)
+                                           //  Rt,[Rn]           T2_E1     111010001100nnnn tttt111101011111   E8C0 0F5F                                   
+INST1(subw,    "subw",   0, 0, IF_T2_M0,  0xF2A00000)
+                                           //  Rd,Rn,+i12        T2_M0     11110i101010nnnn 0iiiddddiiiiiiii   F2A0 0000           imm(0-4095)                          
+INST1(tbb,     "tbb",    0, 0, IF_T2_C9,  0xE8D0F000)
+                                           //  Rn,Rm             T2_C9     111010001101nnnn 111100000000mmmm   E8D0 F000                
+INST1(tbh,     "tbh",    0, 0, IF_T2_C9,  0xE8D0F010)
+                                           //  Rn,Rm,LSL #1      T2_C9     111010001101nnnn 111100000001mmmm   E8D0 F010                       
+INST1(ubfx,    "ubfx",   0, 0, IF_T2_D0,  0xF2C00000)
+                                           //  Rd,Rn,#b,#w       T2_D0     111100111100nnnn 0iiiddddii0wwwww   F3C0 0000           imm(0-31),imm(0-31)                          
+INST1(udiv,    "udiv",   0, 0, IF_T2_C5,  0xFBB0F0F0) 
+                                           //  Rd,Rn,Rm          T2_C5     111110111011nnnn 1111dddd1111mmmm   FBB0 F0F0                   
+INST1(umlal,   "umlal",  0, 0, IF_T2_F1,  0xFBE00000)
+                                           //  Rl,Rh,Rn,Rm       T2_F1     111110111110nnnn llllhhhh0000mmmm   FBE0 0000                              
+INST1(umull,   "umull",  0, 0, IF_T2_F1,  0xFBA00000)   
+                                           //  Rl,Rh,Rn,Rm       T2_F1     111110111010nnnn llllhhhh0000mmmm   FBA0 0000                              
+
+#ifdef FEATURE_ITINSTRUCTION
+INST1(it,      "it",     0, 0, IF_T1_B,   0xBF08)
+                                           //  cond              T1_B      10111111cond1000                    BF08                cond   
+INST1(itt,     "itt",    0, 0, IF_T1_B,   0xBF04)
+                                           //  cond              T1_B      10111111cond0100                    BF04                cond  
+INST1(ite,     "ite",    0, 0, IF_T1_B,   0xBF0C)
+                                           //  cond              T1_B      10111111cond1100                    BF0C                cond  
+INST1(ittt,    "ittt",   0, 0, IF_T1_B,   0xBF02)
+                                           //  cond              T1_B      10111111cond0010                    BF02                cond  
+INST1(itte,    "itte",   0, 0, IF_T1_B,   0xBF06)
+                                           //  cond              T1_B      10111111cond0110                    BF06                cond  
+INST1(itet,    "itet",   0, 0, IF_T1_B,   0xBF0A)
+                                           //  cond              T1_B      10111111cond1010                    BF0A                cond  
+INST1(itee,    "itee",   0, 0, IF_T1_B,   0xBF0E)
+                                           //  cond              T1_B      10111111cond1110                    BF0E                cond  
+INST1(itttt,   "itttt",  0, 0, IF_T1_B,   0xBF01)
+                                           //  cond              T1_B      10111111cond0001                    BF01                cond  
+INST1(ittte,   "ittte",  0, 0, IF_T1_B,   0xBF01)
+                                           //  cond              T1_B      10111111cond0011                    BF03                cond  
+INST1(ittet,   "ittet",  0, 0, IF_T1_B,   0xBF01)
+                                           //  cond              T1_B      10111111cond0101                    BF05                cond  
+INST1(ittee,   "ittee",  0, 0, IF_T1_B,   0xBF01)
+                                           //  cond              T1_B      10111111cond0111                    BF07                cond  
+INST1(itett,   "itett",  0, 0, IF_T1_B,   0xBF01)
+                                           //  cond              T1_B      10111111cond1001                    BF09                cond  
+INST1(itete,   "itete",  0, 0, IF_T1_B,   0xBF01)
+                                           //  cond              T1_B      10111111cond1011                    BF0B                cond  
+INST1(iteet,   "iteet",  0, 0, IF_T1_B,   0xBF01)
+                                           //  cond              T1_B      10111111cond1101                    BF0D                cond  
+INST1(iteee,   "iteee",  0, 0, IF_T1_B,   0xBF01)
+                                           //  cond              T1_B      10111111cond1111                    BF0F                cond
+#endif // FEATURE_ITINSTRUCTION
+
+
+/*****************************************************************************/
+/*             Floating Point Instructions                                   */
+/*****************************************************************************/
+//    enum      name          FP LD/ST  
+                                           //  Dd,[Rn+imm8]      T2_VLDST  11101101UD0Lnnnn  dddd101Ziiiiiiii   ED00 0A00           imm(+-1020)     
+INST1(vstr,     "vstr",        1,ST,   IF_T2_VLDST, 0xED000A00)                                      
+INST1(vldr,     "vldr",        1,LD,   IF_T2_VLDST, 0xED100A00)
+INST1(vstm,     "vstm",        1,ST,   IF_T2_VLDST, 0xEC800A00)   // A8.6.399 VSTM (to an address in ARM core register from consecutive floats)
+INST1(vldm,     "vldm",        1,LD,   IF_T2_VLDST, 0xEC900A00)   // A8.6.399 VLDM (from an address in ARM core register to consecutive floats)
+INST1(vpush,    "vpush",       1,ST,   IF_T2_VLDST, 0xED2D0A00)
+INST1(vpop,     "vpop",        1,LD,   IF_T2_VLDST, 0xECBD0A00)
+
+                                           //  vmrs    rT        T2_E2     1110111011110001  tttt101000010000   EEF1 0A10   
+INST1(vmrs,     "vmrs",        1, 0,   IF_T2_E2,    0xEEF10A10) 
+
+                                           //  Dd,Dn,Dm          T2_VFP3   11101110-D--nnnn  dddd101ZN-M0mmmm   EE30 0A00 
+INST1(vadd,     "vadd",        1, 0,   IF_T2_VFP3,  0xEE300A00)
+INST1(vsub,     "vsub",        1, 0,   IF_T2_VFP3,  0xEE300A40)
+INST1(vmul,     "vmul",        1, 0,   IF_T2_VFP3,  0xEE200A00)
+INST1(vdiv,     "vdiv",        1, 0,   IF_T2_VFP3,  0xEE800A00)
+
+                                           //  Dd,Dm             T2_VFP2   111011101D110---  dddd101zp1M0mmmm   EEB0 0A40 
+INST1(vmov,     "vmov",        1, 0,   IF_T2_VFP2,  0xEEB00A40)               // opc2 = '000',  zp = 00
+INST1(vabs,     "vabs",        1, 0,   IF_T2_VFP2,  0xEEB00AC0)               // opc2 = '000',  zp = 01
+INST1(vsqrt,    "vsqrt",       1, 0,   IF_T2_VFP2,  0xEEB10AC0)               // opc2 = '001',  zp = 01
+INST1(vneg,     "vneg",        1, 0,   IF_T2_VFP2,  0xEEB10A40)               // opc2 = '001',  zp = 00
+INST1(vcmp,     "vcmp",        1, CMP, IF_T2_VFP2,  0xEEB40A40)               // opc2 = '100',  zp = 00
+INST1(vcmp0,    "vcmp.0",      1, CMP, IF_T2_VFP2,  0xEEB50A40)               // opc2 = '101',  zp = 00
+
+                                           //  Dd,Dm             T2_VFP2   111011101D111---  dddd101zp1M0mmmm   EEB8 0A40 
+INST1(vcvt_d2i,  "vcvt.d2i",   1, 0,   IF_T2_VFP2,  0xEEBD0BC0)               // opc2 = '101',  zp = 11
+INST1(vcvt_f2i,  "vcvt.f2i",   1, 0,   IF_T2_VFP2,  0xEEBD0AC0)               // opc2 = '101',  zp = 01
+INST1(vcvt_d2u,  "vcvt.d2u",   1, 0,   IF_T2_VFP2,  0xEEBC0BC0)               // opc2 = '100',  zp = 11
+INST1(vcvt_f2u,  "vcvt.f2u",   1, 0,   IF_T2_VFP2,  0xEEBC0AC0)               // opc2 = '100',  zp = 01
+
+INST1(vcvt_i2f,  "vcvt.i2f",   1, 0,   IF_T2_VFP2,  0xEEB80AC0)               // opc2 = '000',  zp = 01
+INST1(vcvt_i2d,  "vcvt.i2d",   1, 0,   IF_T2_VFP2,  0xEEB80BC0)               // opc2 = '000',  zp = 11
+INST1(vcvt_u2f,  "vcvt.u2f",   1, 0,   IF_T2_VFP2,  0xEEB80A40)               // opc2 = '000',  zp = 00
+INST1(vcvt_u2d,  "vcvt.u2d",   1, 0,   IF_T2_VFP2,  0xEEB80B40)               // opc2 = '000',  zp = 10
+
+                                           //  Dd,Dm             T2_VFP2   111011101D110111  dddd101z11M0mmmm   EEB7 0AC0 
+INST1(vcvt_d2f,  "vcvt.d2f",   1, 0,   IF_T2_VFP2,  0xEEB70BC0)               // opc2 = '111'   zp = 01
+INST1(vcvt_f2d,  "vcvt.f2d",   1, 0,   IF_T2_VFP2,  0xEEB70AC0)               // opc2 = '111'   zp = 11
+
+                                           //  Dd,Dm             T2_VMOVD  111011F100D0V0000  
+INST1(vmov_i2d,  "vmov.i2d",   1, 0,   IF_T2_VMOVD, 0xEC400B10) // A8.6.332 VMOV from 2 int regs to a double
+INST1(vmov_d2i,  "vmov.d2i",   1, 0,   IF_T2_VMOVD, 0xEC500B10) // A8.6.332 VMOV from a double to 2 int regs
+INST1(vmov_i2f,  "vmov.i2f",   1, 0,   IF_T2_VMOVS, 0xEE000A10) // A8.6.330 VMOV (between ARM core register and single-precision register)
+INST1(vmov_f2i,  "vmov.f2i",   1, 0,   IF_T2_VMOVS, 0xEE100A10) // A8.6.330 VMOV (between ARM core register and single-precision register)
+// clang-format on
+
+/*****************************************************************************/
+#undef INST1
+#undef INST2
+#undef INST3
+#undef INST4
+#undef INST5
+#undef INST6
+#undef INST7
+#undef INST8
+#undef INST9
+/*****************************************************************************/
diff --git a/src/jit/instrsarm64.h b/src/jit/instrsarm64.h
new file mode 100644
index 0000000000..e91aaa6836
--- /dev/null
+++ b/src/jit/instrsarm64.h
@@ -0,0 +1,954 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************
+ *  Arm64 instructions for JIT compiler
+ *
+ *          id      -- the enum name for the instruction
+ *          nm      -- textual name (for assembly dipslay)
+ *          fp      -- floating point instruction
+ *          ld/st/cmp   -- load/store/compare instruction
+ *          fmt     -- encoding format used by this instruction
+ *          e1      -- encoding 1
+ *          e2      -- encoding 2
+ *          e3      -- encoding 3
+ *          e4      -- encoding 4
+ *          e5      -- encoding 5
+ *
+******************************************************************************/
+
+#if !defined(_TARGET_ARM64_)
+#error Unexpected target type
+#endif
+
+#ifndef INST1
+#error INST1 must be defined before including this file.
+#endif
+#ifndef INST2
+#error INST2 must be defined before including this file.
+#endif
+#ifndef INST3
+#error INST3 must be defined before including this file.
+#endif
+#ifndef INST4
+#error INST4 must be defined before including this file.
+#endif
+#ifndef INST5
+#error INST5 must be defined before including this file.
+#endif
+#ifndef INST6
+#error INST6 must be defined before including this file.
+#endif
+#ifndef INST9
+#error INST9 must be defined before including this file.
+#endif
+
+/*****************************************************************************/
+/*               The following is ARM64-specific                               */
+/*****************************************************************************/
+
+// If you're adding a new instruction:
+// You need not only to fill in one of these macros describing the instruction, but also:
+//   * If the instruction writes to more than one destination register, update the function
+//     emitInsMayWriteMultipleRegs in emitArm64.cpp.
+
+// clang-format off
+INST9(invalid, "INVALID", 0, 0, IF_NONE,  BAD_CODE,    BAD_CODE,    BAD_CODE,    BAD_CODE,   BAD_CODE,     BAD_CODE,    BAD_CODE,    BAD_CODE,    BAD_CODE)
+
+//    enum     name     FP LD/ST            DR_2E        DR_2G        DI_1B        DI_1D        DV_3C        DV_2B        DV_2C        DV_2E        DV_2F
+INST9(mov,     "mov",    0, 0, IF_EN9,    0x2A0003E0,  0x11000000,  0x52800000,  0x320003E0,  0x0EA01C00,  0x0E003C00,  0x4E001C00,  0x5E000400,  0x6E000400) 
+                                   //  mov     Rd,Rm                DR_2E  X0101010000mmmmm 00000011111ddddd   2A00 03E0
+                                   //  mov     Rd,Rn                DR_2G  X001000100000000 000000nnnnnddddd   1100 0000   mov to/from SP only 
+                                   //  mov     Rd,imm(i16,hw)       DI_1B  X10100101hwiiiii iiiiiiiiiiiddddd   5280 0000   imm(i16,hw)
+                                   //  mov     Rd,imm(N,r,s)        DI_1D  X01100100Nrrrrrr ssssss11111ddddd   3200 03E0   imm(N,r,s)
+                                   //  mov     Vd,Vn                DV_3C  0Q001110101nnnnn 000111nnnnnddddd   0EA0 1C00   Vd,Vn
+                                   //  mov     Rd,Vn[0]             DV_2B  0Q001110000iiiii 001111nnnnnddddd   0E00 3C00   Rd,Vn[]   (to general)
+                                   //  mov     Vd[],Rn              DV_2C  01001110000iiiii 000111nnnnnddddd   4E00 1C00   Vd[],Rn   (from general)
+                                   //  mov     Vd,Vn[]              DV_2E  01011110000iiiii 000001nnnnnddddd   5E00 0400   Vd,Vn[] (scalar by elem)
+                                   //  mov     Vd[],Vn[]            DV_2F  01101110000iiiii 0jjjj1nnnnnddddd   6E00 0400   Vd[],Vn[] (from/to elem)
+   
+//    enum     name     FP LD/ST            DR_3A        DR_3B        DR_3C        DI_2A        DV_3A        DV_3E
+INST6(add,     "add",    0, 0, IF_EN6A,   0x0B000000,  0x0B000000,  0x0B200000,  0x11000000,  0x0E208400,  0x5EE08400)
+                                   //  add     Rd,Rn,Rm             DR_3A  X0001011000mmmmm 000000nnnnnddddd   0B00 0000   Rd,Rn,Rm 
+                                   //  add     Rd,Rn,(Rm,shk,imm)   DR_3B  X0001011sh0mmmmm ssssssnnnnnddddd   0B00 0000   Rm {LSL,LSR,ASR} imm(0-63)
+                                   //  add     Rd,Rn,(Rm,ext,shl)   DR_3C  X0001011001mmmmm ooosssnnnnnddddd   0B20 0000   ext(Rm) LSL imm(0-4)
+                                   //  add     Rd,Rn,i12            DI_2A  X0010001shiiiiii iiiiiinnnnnddddd   1100 0000   imm(i12,sh)
+                                   //  add     Vd,Vn,Vm             DV_3A  0Q001110XX1mmmmm 100001nnnnnddddd   0E20 8400   Vd,Vn,Vm  (vector)
+                                   //  add     Vd,Vn,Vm             DV_3E  01011110111mmmmm 100001nnnnnddddd   5EE0 8400   Vd,Vn,Vm  (scalar)
+   
+INST6(sub,     "sub",    0, 0, IF_EN6A,   0x4B000000,  0x4B000000,  0x4B200000,  0x51000000,  0x2E208400,  0x7EE08400)
+                                   //  sub     Rd,Rn,Rm             DR_3A  X1001011000mmmmm 000000nnnnnddddd   4B00 0000   Rd,Rn,Rm 
+                                   //  sub     Rd,Rn,(Rm,shk,imm)   DR_3B  X1001011sh0mmmmm ssssssnnnnnddddd   4B00 0000   Rm {LSL,LSR,ASR} imm(0-63)
+                                   //  sub     Rd,Rn,(Rm,ext,shl)   DR_3C  X1001011001mmmmm ooosssnnnnnddddd   4B20 0000   ext(Rm) LSL imm(0-4)
+                                   //  sub     Rd,Rn,i12            DI_2A  X1010001shiiiiii iiiiiinnnnnddddd   5100 0000   imm(i12,sh)
+                                   //  sub     Vd,Vn,Vm             DV_3A  0Q101110XX1mmmmm 100001nnnnnddddd   2E20 8400   Vd,Vn,Vm  (vector)
+                                   //  sub     Vd,Vn,Vm             DV_3E  01111110111mmmmm 100001nnnnnddddd   7EE0 8400   Vd,Vn,Vm  (scalar)
+
+//    enum     name     FP LD/ST            LS_2A        LS_2B        LS_2C        LS_3A        LS_1A
+INST5(ldr,     "ldr",    0,LD, IF_EN5A,   0xB9400000,  0xB9400000,  0xB8400000,  0xB8600800,  0x18000000)
+                                   //  ldr     Rt,[Xn]              LS_2A  1X11100101000000 000000nnnnnttttt   B940 0000   
+                                   //  ldr     Rt,[Xn+pimm12]       LS_2B  1X11100101iiiiii iiiiiinnnnnttttt   B940 0000   imm(0-4095<<{2,3})
+                                   //  ldr     Rt,[Xn+simm9]        LS_2C  1X111000010iiiii iiiiPPnnnnnttttt   B840 0000   [Xn imm(-256..+255) pre/post/no inc]
+                                   //  ldr     Rt,[Xn,(Rm,ext,shl)] LS_3A  1X111000011mmmmm oooS10nnnnnttttt   B860 0800   [Xn, ext(Rm) LSL {0,2,3}]
+                                   //  ldr     Vt/Rt,[PC+simm19<<2] LS_1A  XX011V00iiiiiiii iiiiiiiiiiittttt   1800 0000   [PC +- imm(1MB)]
+  
+INST5(ldrsw,   "ldrsw",  0,LD, IF_EN5A,   0xB9800000,  0xB9800000,  0xB8800000,  0xB8A00800,  0x98000000)
+                                   //  ldrsw   Rt,[Xn]              LS_2A  1011100110000000 000000nnnnnttttt   B980 0000   
+                                   //  ldrsw   Rt,[Xn+pimm12]       LS_2B  1011100110iiiiii iiiiiinnnnnttttt   B980 0000   imm(0-4095<<2)
+                                   //  ldrsw   Rt,[Xn+simm9]        LS_2C  10111000100iiiii iiiiPPnnnnnttttt   B880 0000   [Xn imm(-256..+255) pre/post/no inc]
+                                   //  ldrsw   Rt,[Xn,(Rm,ext,shl)] LS_3A  10111000101mmmmm oooS10nnnnnttttt   B8A0 0800   [Xn, ext(Rm) LSL {0,2}]
+                                   //  ldrsw   Rt,[PC+simm19<<2]    LS_1A  10011000iiiiiiii iiiiiiiiiiittttt   9800 0000   [PC +- imm(1MB)]
+  
+//    enum     name     FP LD/ST            DV_2G        DV_2H        DV_2I        DV_1A        DV_1B
+INST5(fmov,    "fmov",   0, 0, IF_EN5B,   0x1E204000,  0x1E260000,  0x1E270000,  0x1E201000,  0x0F00F400)
+                                   //  fmov    Vd,Vn                DV_2G  000111100X100000 010000nnnnnddddd   1E20 4000   Vd,Vn    (scalar)
+                                   //  fmov    Rd,Vn                DV_2H  X00111100X100110 000000nnnnnddddd   1E26 0000   Rd,Vn    (scalar, to general)
+                                   //  fmov    Vd,Rn                DV_2I  X00111100X100111 000000nnnnnddddd   1E27 0000   Vd,Rn    (scalar, from general)
+                                   //  fmov    Vd,immfp             DV_1A  000111100X1iiiii iii10000000ddddd   1E20 1000   Vd,immfp (scalar)
+                                   //  fmov    Vd,immfp             DV_1B  0QX0111100000iii 111101iiiiiddddd   0F00 F400   Vd,immfp (immediate vector) 
+
+//    enum     name     FP LD/ST            DR_3A        DR_3B        DI_2C        DV_3C        DV_1B
+INST5(orr,     "orr",    0, 0, IF_EN5C,   0x2A000000,  0x2A000000,  0x32000000,  0x0EA01C00,  0x0F001400)
+                                   //  orr     Rd,Rn,Rm             DR_3A  X0101010000mmmmm 000000nnnnnddddd   2A00 0000
+                                   //  orr     Rd,Rn,(Rm,shk,imm)   DR_3B  X0101010sh0mmmmm iiiiiinnnnnddddd   2A00 0000   Rm {LSL,LSR,ASR,ROR} imm(0-63)
+                                   //  orr     Rd,Rn,imm(N,r,s)     DI_2C  X01100100Nrrrrrr ssssssnnnnnddddd   3200 0000   imm(N,r,s)
+                                   //  orr     Vd,Vn,Vm             DV_3C  0Q001110101mmmmm 000111nnnnnddddd   0EA0 1C00   Vd,Vn,Vm 
+                                   //  orr     Vd,imm8              DV_1B  0Q00111100000iii ---101iiiiiddddd   0F00 1400   Vd imm8  (immediate vector)
+
+//    enum     name     FP LD/ST            LS_2A        LS_2B        LS_2C        LS_3A
+INST4(ldrb,    "ldrb",   0,LD, IF_EN4A,   0x39400000,  0x39400000,  0x38400000,  0x38600800)
+                                   //  ldrb    Rt,[Xn]              LS_2A  0011100101000000 000000nnnnnttttt   3940 0000   
+                                   //  ldrb    Rt,[Xn+pimm12]       LS_2B  0011100101iiiiii iiiiiinnnnnttttt   3940 0000   imm(0-4095)
+                                   //  ldrb    Rt,[Xn+simm9]        LS_2C  00111000010iiiii iiiiPPnnnnnttttt   3840 0000   [Xn imm(-256..+255) pre/post/no inc]
+                                   //  ldrb    Rt,[Xn,(Rm,ext,shl)] LS_3A  00111000011mmmmm oooS10nnnnnttttt   3860 0800   [Xn, ext(Rm)]
+  
+INST4(ldrh,    "ldrh",   0,LD, IF_EN4A,   0x79400000,  0x79400000,  0x78400000,  0x78600800)
+                                   //  ldrh    Rt,[Xn]              LS_2A  0111100101000000 000000nnnnnttttt   7940 0000   
+                                   //  ldrh    Rt,[Xn+pimm12]       LS_2B  0111100101iiiiii iiiiiinnnnnttttt   7940 0000   imm(0-4095<<1)
+                                   //  ldrh    Rt,[Xn+simm9]        LS_2C  01111000010iiiii iiiiPPnnnnnttttt   7840 0000   [Xn imm(-256..+255) pre/post/no inc]
+                                   //  ldrh    Rt,[Xn,(Rm,ext,shl)] LS_3A  01111000011mmmmm oooS10nnnnnttttt   7860 0800   [Xn, ext(Rm) LSL {0,1}]
+  
+INST4(ldrsb,   "ldrsb",  0,LD, IF_EN4A,   0x39800000,  0x39800000,  0x38800000,  0x38A00800)
+                                   //  ldrsb   Rt,[Xn]              LS_2A  001110011X000000 000000nnnnnttttt   3980 0000   
+                                   //  ldrsb   Rt,[Xn+pimm12]       LS_2B  001110011Xiiiiii iiiiiinnnnnttttt   3980 0000   imm(0-4095)
+                                   //  ldrsb   Rt,[Xn+simm9]        LS_2C  001110001X0iiiii iiii01nnnnnttttt   3880 0000   [Xn imm(-256..+255) pre/post/no inc]
+                                   //  ldrsb   Rt,[Xn,(Rm,ext,shl)] LS_3A  001110001X1mmmmm oooS10nnnnnttttt   38A0 0800   [Xn, ext(Rm)]
+  
+INST4(ldrsh,   "ldrsh",  0,LD, IF_EN4A,   0x79800000,  0x79800000,  0x78800000,  0x78A00800)
+                                   //  ldrsh   Rt,[Xn]              LS_2A  011110011X000000 000000nnnnnttttt   7980 0000   
+                                   //  ldrsh   Rt,[Xn+pimm12]       LS_2B  011110011Xiiiiii iiiiiinnnnnttttt   7980 0000   imm(0-4095<<1)
+                                   //  ldrsh   Rt,[Xn+simm9]        LS_2C  011110001X0iiiii iiiiPPnnnnnttttt   7880 0000   [Xn imm(-256..+255) pre/post/no inc]
+                                   //  ldrsh   Rt,[Xn,(Rm,ext,shl)] LS_3A  011110001X1mmmmm oooS10nnnnnttttt   78A0 0800   [Xn, ext(Rm) LSL {0,1}]
+  
+INST4(str,     "str",    0,ST, IF_EN4A,   0xB9000000,  0xB9000000,  0xB8000000,  0xB8200800)
+                                   //  str     Rt,[Xn]              LS_2A  1X11100100000000 000000nnnnnttttt   B900 0000   
+                                   //  str     Rt,[Xn+pimm12]       LS_2B  1X11100100iiiiii iiiiiinnnnnttttt   B900 0000   imm(0-4095<<{2,3})
+                                   //  str     Rt,[Xn+simm9]        LS_2C  1X111000000iiiii iiiiPPnnnnnttttt   B800 0000   [Xn imm(-256..+255) pre/post/no inc]
+                                   //  str     Rt,[Xn,(Rm,ext,shl)] LS_3A  1X111000001mmmmm oooS10nnnnnttttt   B820 0800   [Xn, ext(Rm)]
+  
+INST4(strb,    "strb",   0,ST, IF_EN4A,   0x39000000,  0x39000000,  0x38000000,  0x38200800)
+                                   //  strb    Rt,[Xn]              LS_2A  0011100100000000 000000nnnnnttttt   3900 0000   
+                                   //  strb    Rt,[Xn+pimm12]       LS_2B  0011100100iiiiii iiiiiinnnnnttttt   3900 0000   imm(0-4095)
+                                   //  strb    Rt,[Xn+simm9]        LS_2C  00111000000iiiii iiiiPPnnnnnttttt   3800 0000   [Xn imm(-256..+255) pre/post/no inc]
+                                   //  strb    Rt,[Xn,(Rm,ext,shl)] LS_3A  00111000001mmmmm oooS10nnnnnttttt   3820 0800   [Xn, ext(Rm)]
+  
+INST4(strh,    "strh",   0,ST, IF_EN4A,   0x79000000,  0x79000000,  0x78000000,  0x78200800)
+                                   //  strh    Rt,[Xn]              LS_2A  0111100100000000 000000nnnnnttttt   7900 0000   
+                                   //  strh    Rt,[Xn+pimm12]       LS_2B  0111100100iiiiii iiiiiinnnnnttttt   7900 0000   imm(0-4095<<1)
+                                   //  strh    Rt,[Xn+simm9]        LS_2C  01111000000iiiii iiiiPPnnnnnttttt   7800 0000   [Xn imm(-256..+255) pre/post/no inc]
+                                   //  strh    Rt,[Xn,(Rm,ext,shl)] LS_3A  01111000001mmmmm oooS10nnnnnttttt   7820 0800   [Xn, ext(Rm)]
+  
+//    enum     name     FP LD/ST            DR_3A        DR_3B        DR_3C        DI_2A
+INST4(adds,    "adds",   0, 0, IF_EN4B,   0x2B000000,  0x2B000000,  0x2B200000,  0x31000000)
+                                   //  adds    Rd,Rn,Rm             DR_3A  X0101011000mmmmm 000000nnnnnddddd   2B00 0000
+                                   //  adds    Rd,Rn,(Rm,shk,imm)   DR_3B  X0101011sh0mmmmm ssssssnnnnnddddd   2B00 0000   Rm {LSL,LSR,ASR} imm(0-63)
+                                   //  adds    Rd,Rn,(Rm,ext,shl)   DR_3C  X0101011001mmmmm ooosssnnnnnddddd   2B20 0000   ext(Rm) LSL imm(0-4)
+                                   //  adds    Rd,Rn,i12            DI_2A  X0110001shiiiiii iiiiiinnnnnddddd   3100 0000   imm(i12,sh)
+   
+INST4(subs,    "subs",   0, 0, IF_EN4B,   0x6B000000,  0x6B000000,  0x6B200000,  0x71000000)
+                                   //  subs    Rd,Rn,Rm             DR_3A  X1101011000mmmmm 000000nnnnnddddd   6B00 0000
+                                   //  subs    Rd,Rn,(Rm,shk,imm)   DR_3B  X1101011sh0mmmmm ssssssnnnnnddddd   6B00 0000   Rm {LSL,LSR,ASR} imm(0-63)
+                                   //  subs    Rd,Rn,(Rm,ext,shl)   DR_3C  X1101011001mmmmm ooosssnnnnnddddd   6B20 0000   ext(Rm) LSL imm(0-4)
+                                   //  subs    Rd,Rn,i12            DI_2A  X1110001shiiiiii iiiiiinnnnnddddd   7100 0000   imm(i12,sh)
+
+//    enum     name     FP LD/ST            DR_2A        DR_2B        DR_2C        DI_1A
+INST4(cmp,     "cmp",    0,CMP,IF_EN4C,   0x6B00001F,  0x6B00001F,  0x6B20001F,  0x7100001F)
+                                   //  cmp     Rn,Rm                DR_2A  X1101011000mmmmm 000000nnnnn11111   6B00 001F
+                                   //  cmp     Rn,(Rm,shk,imm)      DR_2B  X1101011sh0mmmmm ssssssnnnnn11111   6B00 001F   Rm {LSL,LSR,ASR} imm(0-63)
+                                   //  cmp     Rn,(Rm,ext,shl)      DR_2C  X1101011001mmmmm ooosssnnnnn11111   6B20 001F   ext(Rm) LSL imm(0-4)
+                                   //  cmp     Rn,i12               DI_1A  X111000100iiiiii iiiiiinnnnn11111   7100 001F   imm(i12,sh)
+  
+INST4(cmn,     "cmn",    0,CMP,IF_EN4C,   0x2B00001F,  0x2B00001F,  0x2B20001F,  0x3100001F)
+                                   //  cmn     Rn,Rm                DR_2A  X0101011000mmmmm 000000nnnnn11111   2B00 001F
+                                   //  cmn     Rn,(Rm,shk,imm)      DR_2B  X0101011sh0mmmmm ssssssnnnnn11111   2B00 001F   Rm {LSL,LSR,ASR} imm(0-63)
+                                   //  cmn     Rn,(Rm,ext,shl)      DR_2C  X0101011001mmmmm ooosssnnnnn11111   2B20 001F   ext(Rm) LSL imm(0-4)
+                                   //  cmn     Rn,i12               DI_1A  X0110001shiiiiii iiiiiinnnnn11111   3100 001F   imm(0-4095)
+  
+//    enum     name     FP LD/ST            DV_3B        DV_3D        DV_3BI       DV_3DI
+INST4(fmul,    "fmul",   0, 0, IF_EN4D,   0x2E20DC00,  0x1E200800,  0x0F809000,  0x5F809000)
+                                   //  fmul    Vd,Vn,Vm             DV_3B  0Q1011100X1mmmmm 110111nnnnnddddd   2E20 DC00   Vd,Vn,Vm   (vector)
+                                   //  fmul    Vd,Vn,Vm             DV_3D  000111100X1mmmmm 000010nnnnnddddd   1E20 0800   Vd,Vn,Vm   (scalar)
+                                   //  fmul    Vd,Vn,Vm[]           DV_3BI 0Q0011111XLmmmmm 1001H0nnnnnddddd   0F80 9000   Vd,Vn,Vm[] (vector by elem)
+                                   //  fmul    Vd,Vn,Vm[]           DV_3DI 010111111XLmmmmm 1001H0nnnnnddddd   5F80 9000   Vd,Vn,Vm[] (scalar by elem)
+
+INST4(fmulx,   "fmulx",  0, 0, IF_EN4D,   0x0E20DC00,  0x5E20DC00,  0x2F809000,  0x7F809000)
+                                   //  fmulx   Vd,Vn,Vm             DV_3B  0Q0011100X1mmmmm 110111nnnnnddddd   0E20 DC00   Vd,Vn,Vm   (vector)
+                                   //  fmulx   Vd,Vn,Vm             DV_3D  010111100X1mmmmm 110111nnnnnddddd   5E20 DC00   Vd,Vn,Vm   (scalar)
+                                   //  fmulx   Vd,Vn,Vm[]           DV_3BI 0Q1011111XLmmmmm 1001H0nnnnnddddd   2F80 9000   Vd,Vn,Vm[] (vector by elem)
+                                   //  fmulx   Vd,Vn,Vm[]           DV_3DI 011111111XLmmmmm 1001H0nnnnnddddd   7F80 9000   Vd,Vn,Vm[] (scalar by elem)
+
+//    enum     name     FP LD/ST            DR_3A        DR_3B        DI_2C        DV_3C
+INST4(and,     "and",    0, 0, IF_EN4E,   0x0A000000,  0x0A000000,  0x12000000,  0x0E201C00)
+                                   //  and     Rd,Rn,Rm             DR_3A  X0001010000mmmmm 000000nnnnnddddd   0A00 0000
+                                   //  and     Rd,Rn,(Rm,shk,imm)   DR_3B  X0001010sh0mmmmm iiiiiinnnnnddddd   0A00 0000   Rm {LSL,LSR,ASR,ROR} imm(0-63)
+                                   //  and     Rd,Rn,imm(N,r,s)     DI_2C  X00100100Nrrrrrr ssssssnnnnnddddd   1200 0000   imm(N,r,s)
+                                   //  and     Vd,Vn,Vm             DV_3C  0Q001110001mmmmm 000111nnnnnddddd   0E20 1C00   Vd,Vn,Vm 
+
+INST4(eor,     "eor",    0, 0, IF_EN4E,   0x4A000000,  0x4A000000,  0x52000000,  0x2E201C00)
+                                   //  eor     Rd,Rn,Rm             DR_3A  X1001010000mmmmm 000000nnnnnddddd   4A00 0000
+                                   //  eor     Rd,Rn,(Rm,shk,imm)   DR_3B  X1001010sh0mmmmm iiiiiinnnnnddddd   4A00 0000   Rm {LSL,LSR,ASR,ROR} imm(0-63) 
+                                   //  eor     Rd,Rn,imm(N,r,s)     DI_2C  X10100100Nrrrrrr ssssssnnnnnddddd   5200 0000   imm(N,r,s)
+                                   //  eor     Vd,Vn,Vm             DV_3C  0Q101110001mmmmm 000111nnnnnddddd   2E20 1C00   Vd,Vn,Vm 
+
+//    enum     name     FP LD/ST            DR_3A        DR_3B        DV_3C        DV_1B
+INST4(bic,     "bic",    0, 0, IF_EN4F,   0x0A200000,  0x0A200000,  0x0E601C00,  0x2F001400)
+                                   //  bic     Rd,Rn,Rm             DR_3A  X0001010001mmmmm 000000nnnnnddddd   0A20 0000
+                                   //  bic     Rd,Rn,(Rm,shk,imm)   DR_3B  X0001010sh1mmmmm iiiiiinnnnnddddd   0A20 0000   Rm {LSL,LSR,ASR,ROR} imm(0-63)
+                                   //  bic     Vd,Vn,Vm             DV_3C  0Q001110011mmmmm 000111nnnnnddddd   0E60 1C00   Vd,Vn,Vm 
+                                   //  bic     Vd,imm8              DV_1B  0Q10111100000iii ---101iiiiiddddd   2F00 1400   Vd imm8  (immediate vector)
+  
+//    enum     name     FP LD/ST            DR_2E        DR_2F        DV_2M        DV_2L
+INST4(neg,     "neg",    0, 0, IF_EN4G,   0x4B0003E0,  0x4B0003E0,  0x2E20B800,  0x7E20B800)
+                                   //  neg     Rd,Rm                DR_2E  X1001011000mmmmm 00000011111ddddd   4B00 03E0
+                                   //  neg     Rd,(Rm,shk,imm)      DR_2F  X1001011sh0mmmmm ssssss11111ddddd   4B00 03E0   Rm {LSL,LSR,ASR} imm(0-63)
+                                   //  neg     Vd,Vn                DV_2M  0Q101110XX100000 101110nnnnnddddd   2E20 B800   Vd,Vn    (vector)
+                                   //  neg     Vd,Vn                DV_2L  01111110XX100000 101110nnnnnddddd   7E20 B800   Vd,Vn    (scalar)
+
+//    enum     name     FP LD/ST            DR_3A        DR_3B        DI_2C
+INST3(ands,    "ands",   0, 0, IF_EN3A,   0x6A000000,  0x6A000000,  0x72000000)
+                                   //  ands    Rd,Rn,Rm             DR_3A  X1101010000mmmmm 000000nnnnnddddd   6A00 0000
+                                   //  ands    Rd,Rn,(Rm,shk,imm)   DR_3B  X1101010sh0mmmmm iiiiiinnnnnddddd   6A00 0000   Rm {LSL,LSR,ASR,ROR} imm(0-63)
+                                   //  ands    Rd,Rn,imm(N,r,s)     DI_2C  X11100100Nrrrrrr ssssssnnnnnddddd   7200 0000   imm(N,r,s)
+    
+//    enum     name     FP LD/ST            DR_2A        DR_2B        DI_1C
+INST3(tst,     "tst",    0, 0, IF_EN3B,   0x6A00001F,  0x6A00001F,  0x7200001F)
+                                   //  tst     Rn,Rm                DR_2A  X1101010000mmmmm 000000nnnnn11111   6A00 001F
+                                   //  tst     Rn,(Rm,shk,imm)      DR_2B  X1101010sh0mmmmm iiiiiinnnnn11111   6A00 001F   Rm {LSL,LSR,ASR,ROR} imm(0-63)
+                                   //  tst     Rn,imm(N,r,s)        DI_1C  X11100100Nrrrrrr ssssssnnnnn11111   7200 001F   imm(N,r,s)
+  
+//    enum     name     FP LD/ST            DR_3A        DR_3B        DV_3C
+INST3(orn,     "orn",    0, 0, IF_EN3C,   0x2A200000,  0x2A200000,  0x0EE01C00)
+                                   //  orn     Rd,Rn,Rm             DR_3A  X0101010001mmmmm 000000nnnnnddddd   2A20 0000
+                                   //  orn     Rd,Rn,(Rm,shk,imm)   DR_3B  X0101010sh1mmmmm iiiiiinnnnnddddd   2A20 0000   Rm {LSL,LSR,ASR,ROR} imm(0-63)
+                                   //  orn     Vd,Vn,Vm             DV_3C  0Q001110111mmmmm 000111nnnnnddddd   0EE0 1C00   Vd,Vn,Vm 
+
+//    enum     name     FP LD/ST            DV_2C        DV_2D       DV_2E
+INST3(dup,     "dup",    0, 0, IF_EN3D,   0x0E000C00,  0x0E004000,  0x5E000400)
+                                   //  dup     Vd,Rn                DV_2C  0Q001110000iiiii 000011nnnnnddddd   0E00 0C00   Vd,Rn   (vector from general)
+                                   //  dup     Vd,Vn[]              DV_2D  0Q001110000iiiii 000001nnnnnddddd   0E00 0400   Vd,Vn[] (vector by elem)
+                                   //  dup     Vd,Vn[]              DV_2E  01011110000iiiii 000001nnnnnddddd   5E00 0400   Vd,Vn[] (scalar by elem)
+
+//    enum     name     FP LD/ST            DV_3B        DV_3BI       DV_3DI
+INST3(fmla,    "fmla",   0, 0, IF_EN3E,   0x0E20CC00,  0x0F801000,  0x5F801000)
+                                   //  fmla    Vd,Vn,Vm             DV_3B  0Q0011100X1mmmmm 110011nnnnnddddd   0E20 CC00   Vd,Vn,Vm   (vector)
+                                   //  fmla    Vd,Vn,Vm[]           DV_3BI 0Q0011111XLmmmmm 0001H0nnnnnddddd   0F80 1000   Vd,Vn,Vm[] (vector by elem)
+                                   //  fmla    Vd,Vn,Vm[]           DV_3DI 010111111XLmmmmm 0001H0nnnnnddddd   5F80 1000   Vd,Vn,Vm[] (scalar by elem)
+
+INST3(fmls,    "fmls",   0, 0, IF_EN3E,   0x0EA0CC00,  0x0F805000,  0x5F805000)
+                                   //  fmls    Vd,Vn,Vm             DV_3B  0Q0011101X1mmmmm 110011nnnnnddddd   0EA0 CC00   Vd,Vn,Vm   (vector)
+                                   //  fmls    Vd,Vn,Vm[]           DV_3BI 0Q0011111XLmmmmm 0101H0nnnnnddddd   0F80 5000   Vd,Vn,Vm[] (vector by elem)
+                                   //  fmls    Vd,Vn,Vm[]           DV_3DI 010111111XLmmmmm 0101H0nnnnnddddd   5F80 5000   Vd,Vn,Vm[] (scalar by elem)
+
+//    enum     name     FP LD/ST            DV_2A        DV_2G        DV_2H
+INST3(fcvtas,  "fcvtas", 0, 0, IF_EN3F,   0x0E21C800,  0x5E21C800,  0x1E240000)
+                                   //  fcvtas  Vd,Vn                DV_2A  0Q0011100X100001 110010nnnnnddddd   0E21 C800   Vd,Vn    (vector)
+                                   //  fcvtas  Vd,Vn                DV_2G  010111100X100001 110010nnnnnddddd   5E21 C800   Vd,Vn    (scalar)
+                                   //  fcvtas  Rd,Vn                DV_2H  X00111100X100100 000000nnnnnddddd   1E24 0000   Rd,Vn    (scalar, to general)
+
+INST3(fcvtau,  "fcvtau", 0, 0, IF_EN3F,   0x2E21C800,  0x7E21C800,  0x1E250000)
+                                   //  fcvtau  Vd,Vn                DV_2A  0Q1011100X100001 111010nnnnnddddd   2E21 C800   Vd,Vn    (vector)
+                                   //  fcvtau  Vd,Vn                DV_2G  011111100X100001 111010nnnnnddddd   7E21 C800   Vd,Vn    (scalar)
+                                   //  fcvtau  Rd,Vn                DV_2H  X00111100X100101 000000nnnnnddddd   1E25 0000   Rd,Vn    (scalar, to general)
+
+INST3(fcvtms,  "fcvtms", 0, 0, IF_EN3F,   0x0E21B800,  0x5E21B800,  0x1E300000)
+                                   //  fcvtms  Vd,Vn                DV_2A  0Q0011100X100001 101110nnnnnddddd   0E21 B800   Vd,Vn    (vector)
+                                   //  fcvtms  Vd,Vn                DV_2G  010111100X100001 101110nnnnnddddd   5E21 B800   Vd,Vn    (scalar)
+                                   //  fcvtms  Rd,Vn                DV_2H  X00111100X110000 000000nnnnnddddd   1E30 0000   Rd,Vn    (scalar, to general)
+
+INST3(fcvtmu,  "fcvtmu", 0, 0, IF_EN3F,   0x2E21B800,  0x7E21B800,  0x1E310000)
+                                   //  fcvtmu  Vd,Vn                DV_2A  0Q1011100X100001 101110nnnnnddddd   2E21 B800   Vd,Vn    (vector)
+                                   //  fcvtmu  Vd,Vn                DV_2G  011111100X100001 101110nnnnnddddd   7E21 B800   Vd,Vn    (scalar)
+                                   //  fcvtmu  Rd,Vn                DV_2H  X00111100X110001 000000nnnnnddddd   1E31 0000   Rd,Vn    (scalar, to general)
+
+INST3(fcvtns,  "fcvtns", 0, 0, IF_EN3F,   0x0E21A800,  0x5E21A800,  0x1E200000)
+                                   //  fcvtns  Vd,Vn                DV_2A  0Q0011100X100001 101010nnnnnddddd   0E21 A800   Vd,Vn    (vector)
+                                   //  fcvtns  Vd,Vn                DV_2G  010111100X100001 101010nnnnnddddd   5E21 A800   Vd,Vn    (scalar)
+                                   //  fcvtns  Rd,Vn                DV_2H  X00111100X100000 000000nnnnnddddd   1E20 0000   Rd,Vn    (scalar, to general)
+
+INST3(fcvtnu,  "fcvtnu", 0, 0, IF_EN3F,   0x2E21A800,  0x7E21A800,  0x1E210000)
+                                   //  fcvtnu  Vd,Vn                DV_2A  0Q1011100X100001 101010nnnnnddddd   2E21 A800   Vd,Vn    (vector)
+                                   //  fcvtnu  Vd,Vn                DV_2G  011111100X100001 101010nnnnnddddd   7E21 A800   Vd,Vn    (scalar)
+                                   //  fcvtnu  Rd,Vn                DV_2H  X00111100X100001 000000nnnnnddddd   1E21 0000   Rd,Vn    (scalar, to general)
+
+INST3(fcvtps,  "fcvtps", 0, 0, IF_EN3F,   0x0EA1A800,  0x5EA1A800,  0x1E280000)
+                                   //  fcvtps  Vd,Vn                DV_2A  0Q0011101X100001 101010nnnnnddddd   0EA1 A800   Vd,Vn    (vector)
+                                   //  fcvtps  Vd,Vn                DV_2G  010111101X100001 101010nnnnnddddd   5EA1 A800   Vd,Vn    (scalar)
+                                   //  fcvtps  Rd,Vn                DV_2H  X00111100X101000 000000nnnnnddddd   1E28 0000   Rd,Vn    (scalar, to general)
+
+INST3(fcvtpu,  "fcvtpu", 0, 0, IF_EN3F,   0x2EA1A800,  0x7EA1A800,  0x1E290000)
+                                   //  fcvtpu  Vd,Vn                DV_2A  0Q1011101X100001 101010nnnnnddddd   2EA1 A800   Vd,Vn    (vector)
+                                   //  fcvtpu  Vd,Vn                DV_2G  011111101X100001 101010nnnnnddddd   7EA1 A800   Vd,Vn    (scalar)
+                                   //  fcvtpu  Rd,Vn                DV_2H  X00111100X101001 000000nnnnnddddd   1E29 0000   Rd,Vn    (scalar, to general)
+
+INST3(fcvtzs,  "fcvtzs", 0, 0, IF_EN3F,   0x0EA1B800,  0x5EA1B800,  0x1E380000)
+                                   //  fcvtzs  Vd,Vn                DV_2A  0Q0011101X100001 101110nnnnnddddd   0EA1 B800   Vd,Vn    (vector)
+                                   //  fcvtzs  Vd,Vn                DV_2G  010111101X100001 101110nnnnnddddd   5EA1 B800   Vd,Vn    (scalar)
+                                   //  fcvtzs  Rd,Vn                DV_2H  X00111100X111000 000000nnnnnddddd   1E38 0000   Rd,Vn    (scalar, to general)
+
+INST3(fcvtzu,  "fcvtzu", 0, 0, IF_EN3F,   0x2EA1B800,  0x7EA1B800,  0x1E390000)
+                                   //  fcvtzu  Vd,Vn                DV_2A  0Q1011101X100001 101110nnnnnddddd   2EA1 B800   Vd,Vn    (vector)
+                                   //  fcvtzu  Vd,Vn                DV_2G  011111101X100001 101110nnnnnddddd   7EA1 B800   Vd,Vn    (scalar)
+                                   //  fcvtzu  Rd,Vn                DV_2H  X00111100X111001 000000nnnnnddddd   1E39 0000   Rd,Vn    (scalar, to general)
+
+//    enum     name     FP LD/ST            DV_2A        DV_2G        DV_2I
+INST3(scvtf,   "scvtf",  0, 0, IF_EN3G,   0x0E21D800,  0x5E21D800,  0x1E220000)
+                                   //  scvtf   Vd,Vn                DV_2A  0Q0011100X100001 110110nnnnnddddd   0E21 D800   Vd,Vn    (vector)
+                                   //  scvtf   Vd,Vn                DV_2G  010111100X100001 110110nnnnnddddd   7E21 D800   Vd,Vn    (scalar)
+                                   //  scvtf   Rd,Vn                DV_2I  X00111100X100010 000000nnnnnddddd   1E22 0000   Vd,Rn    (scalar, from general)
+
+INST3(ucvtf,   "ucvtf",  0, 0, IF_EN3G,   0x2E21D800,  0x7E21D800,  0x1E230000)
+                                   //  ucvtf   Vd,Vn                DV_2A  0Q1011100X100001 110110nnnnnddddd   2E21 D800   Vd,Vn    (vector)
+                                   //  ucvtf   Vd,Vn                DV_2G  011111100X100001 110110nnnnnddddd   7E21 D800   Vd,Vn    (scalar)
+                                   //  ucvtf   Rd,Vn                DV_2I  X00111100X100011 000000nnnnnddddd   1E23 0000   Vd,Rn    (scalar, from general)
+
+INST3(mul,     "mul",    0, 0, IF_EN3H,   0x1B007C00,  0x0E209C00,  0x0F008000)
+                                   //  mul     Rd,Rn,Rm             DR_3A  X0011011000mmmmm 011111nnnnnddddd   1B00 7C00
+                                   //  mul     Vd,Vn,Vm             DV_3A  0Q001110XX1mmmmm 100111nnnnnddddd   0E20 9C00   Vd,Vn,Vm   (vector)
+                                   //  mul     Vd,Vn,Vm[]           DV_3AI 0Q001111XXLMmmmm 1000H0nnnnnddddd   0F00 8000   Vd,Vn,Vm[] (vector by elem)
+
+//    enum     name     FP LD/ST            DR_2E        DR_2F        DV_2M
+INST3(mvn,     "mvn",    0, 0, IF_EN3I,   0x2A2003E0,  0x2A2003E0,  0x2E205800)
+                                   //  mvn     Rd,Rm                DR_2E  X0101010001mmmmm 00000011111ddddd   2A20 03E0
+                                   //  mvn     Rd,(Rm,shk,imm)      DR_2F  X0101010sh1mmmmm iiiiii11111ddddd   2A20 03E0   Rm {LSL,LSR,ASR} imm(0-63)
+                                   //  mvn     Vd,Vn                DV_2M  0Q10111000100000 010110nnnnnddddd   2E20 5800   Vd,Vn    (vector)
+  
+
+//    enum     name     FP LD/ST            DR_2E        DR_2F
+INST2(negs,    "negs",   0, 0, IF_EN2A,   0x6B0003E0,  0x6B0003E0)
+                                   //  negs    Rd,Rm                DR_2E  X1101011000mmmmm 00000011111ddddd   6B00 03E0
+                                   //  negs    Rd,(Rm,shk,imm)      DR_2F  X1101011sh0mmmmm ssssss11111ddddd   6B00 03E0   Rm {LSL,LSR,ASR} imm(0-63)
+
+//    enum     name     FP LD/ST            DR_3A        DR_3B
+INST2(bics,    "bics",   0, 0, IF_EN2B,   0x6A200000,  0x6A200000)
+                                   //  bics    Rd,Rn,Rm             DR_3A  X1101010001mmmmm 000000nnnnnddddd   6A20 0000
+                                   //  bics    Rd,Rn,(Rm,shk,imm)   DR_3B  X1101010sh1mmmmm iiiiiinnnnnddddd   6A20 0000   Rm {LSL,LSR,ASR,ROR} imm(0-63)
+  
+INST2(eon,     "eon",    0, 0, IF_EN2B,   0x4A200000,  0x4A200000)
+                                   //  eon     Rd,Rn,Rm             DR_3A  X1001010001mmmmm 000000nnnnnddddd   4A20 0000
+                                   //  eon     Rd,Rn,(Rm,shk,imm)   DR_3B  X1001010sh1mmmmm iiiiiinnnnnddddd   4A20 0000   Rm {LSL,LSR,ASR,ROR} imm(0-63)
+    
+//    enum     name     FP LD/ST            DR_3A        DI_2C
+INST2(lsl,     "lsl",    0, 0, IF_EN2C,   0x1AC02000,  0x53000000)
+                                   //  lsl     Rd,Rn,Rm             DR_3A  X0011010110mmmmm 001000nnnnnddddd   1AC0 2000
+                                   //  lsl     Rd,Rn,imm6           DI_2D  X10100110Xrrrrrr ssssssnnnnnddddd   5300 0000   imm(N,r,s)
+   
+INST2(lsr,     "lsr",    0, 0, IF_EN2C,   0x1AC02400,  0x53000000)
+                                   //  lsr     Rd,Rn,Rm             DR_3A  X0011010110mmmmm 001001nnnnnddddd   1AC0 2400
+                                   //  lsr     Rd,Rn,imm6           DI_2D  X10100110Xrrrrrr ssssssnnnnnddddd   5300 0000   imm(N,r,s)
+   
+INST2(asr,     "asr",    0, 0, IF_EN2C,   0x1AC02800,  0x13000000)
+                                   //  asr     Rd,Rn,Rm             DR_3A  X0011010110mmmmm 001010nnnnnddddd   1AC0 2800
+                                   //  asr     Rd,Rn,imm6           DI_2D  X00100110Xrrrrrr ssssssnnnnnddddd   1300 0000   imm(N,r,s)
+   
+//    enum     name     FP LD/ST            DR_3A        DI_2B
+INST2(ror,     "ror",    0, 0, IF_EN2D,   0x1AC02C00,  0x13800000)
+                                   //  ror     Rd,Rn,Rm             DR_3A  X0011010110mmmmm 001011nnnnnddddd   1AC0 2C00
+                                   //  ror     Rd,Rn,imm6           DI_2B  X00100111X0nnnnn ssssssnnnnnddddd   1380 0000   imm(0-63)
+
+//    enum     name     FP LD/ST            LS_3B        LS_3C
+INST2(ldp,     "ldp",    0,LD, IF_EN2E,   0x29400000,  0x28400000)
+                                   //  ldp     Rt,Ra,[Xn]           LS_3B  X010100101000000 0aaaaannnnnttttt   2940 0000   [Xn imm7]
+                                   //  ldp     Rt,Ra,[Xn+simm7]     LS_3C  X010100PP1iiiiii iaaaaannnnnttttt   2840 0000   [Xn imm7 LSL {} pre/post/no inc]
+  
+INST2(ldpsw,   "ldpsw",  0,LD, IF_EN2E,   0x69400000,  0x68400000)
+                                   //  ldpsw   Rt,Ra,[Xn]           LS_3B  0110100101000000 0aaaaannnnnttttt   6940 0000   [Xn imm7]
+                                   //  ldpsw   Rt,Ra,[Xn+simm7]     LS_3C  0110100PP1iiiiii iaaaaannnnnttttt   6840 0000   [Xn imm7 LSL {} pre/post/no inc]
+
+INST2(stp,     "stp",    0,ST, IF_EN2E,   0x29000000,  0x28000000)
+                                   //  stp     Rt,Ra,[Xn]           LS_3B  X010100100000000 0aaaaannnnnttttt   2900 0000   [Xn imm7]
+                                   //  stp     Rt,Ra,[Xn+simm7]     LS_3C  X010100PP0iiiiii iaaaaannnnnttttt   2800 0000   [Xn imm7 LSL {} pre/post/no inc]
+  
+INST2(ldnp,    "ldnp",   0,LD, IF_EN2E,   0x28400000,  0x28400000)
+                                   //  ldnp    Rt,Ra,[Xn]           LS_3B  X010100001000000 0aaaaannnnnttttt   2840 0000   [Xn imm7]
+                                   //  ldnp    Rt,Ra,[Xn+simm7]     LS_3C  X010100001iiiiii iaaaaannnnnttttt   2840 0000   [Xn imm7 LSL {}]
+  
+INST2(stnp,    "stnp",   0,ST, IF_EN2E,   0x28000000,  0x28000000)
+                                   //  stnp    Rt,Ra,[Xn]           LS_3B  X010100000000000 0aaaaannnnnttttt   2800 0000   [Xn imm7]
+                                   //  stnp    Rt,Ra,[Xn+simm7]     LS_3C  X010100000iiiiii iaaaaannnnnttttt   2800 0000   [Xn imm7 LSL {}]
+
+INST2(ccmp,    "ccmp",   0,CMP,IF_EN2F,   0x7A400000,  0x7A400800)
+                                   //  ccmp    Rn,Rm,  nzcv,cond    DR_2I  X1111010010mmmmm cccc00nnnnn0nzcv   7A40 0000         nzcv, cond
+                                   //  ccmp    Rn,imm5,nzcv,cond    DI_1F  X1111010010iiiii cccc10nnnnn0nzcv   7A40 0800   imm5, nzcv, cond
+
+INST2(ccmn,    "ccmn",   0,CMP,IF_EN2F,   0x3A400000,  0x3A400800)
+                                   //  ccmn    Rn,Rm,  nzcv,cond    DR_2I  X0111010010mmmmm cccc00nnnnn0nzcv   3A40 0000         nzcv, cond
+                                   //  ccmn    Rn,imm5,nzcv,cond    DI_1F  X0111010910iiiii cccc10nnnnn0nzcv   3A40 0800   imm5, nzcv, cond
+
+//    enum     name     FP LD/ST            DV_2C        DV_2F
+INST2(ins,     "ins",    0, 0, IF_EN2H,   0x4E001C00,  0x6E000400)
+                                   //  ins     Vd[],Rn              DV_2C  01001110000iiiii 000111nnnnnddddd   4E00 1C00   Vd[],Rn   (from general)
+                                   //  ins     Vd[],Vn[]            DV_2F  01101110000iiiii 0jjjj1nnnnnddddd   6E00 0400   Vd[],Vn[] (from/to elem)
+
+//    enum     name     FP LD/ST            DV_3B        DV_3D
+INST2(fadd,    "fadd",   0, 0, IF_EN2G,   0x0E20D400,  0x1E202800)
+                                   //  fadd    Vd,Vn,Vm             DV_3B  0Q0011100X1mmmmm 110101nnnnnddddd   0E20 D400   Vd,Vn,Vm  (vector)
+                                   //  fadd    Vd,Vn,Vm             DV_3D  000111100X1mmmmm 001010nnnnnddddd   1E20 2800   Vd,Vn,Vm  (scalar)
+
+INST2(fsub,    "fsub",   0, 0, IF_EN2G,   0x0EA0D400,  0x1E203800)
+                                   //  fsub    Vd,Vn,Vm             DV_3B  0Q0011101X1mmmmm 110101nnnnnddddd   0EA0 D400   Vd,Vn,Vm  (vector)
+                                   //  fsub    Vd,Vn,Vm             DV_3D  000111100X1mmmmm 001110nnnnnddddd   1E20 3800   Vd,Vn,Vm  (scalar)
+
+INST2(fdiv,    "fdiv",   0, 0, IF_EN2G,   0x2E20FC00,  0x1E201800)
+                                   //  fdiv    Vd,Vn,Vm             DV_3B  0Q1011100X1mmmmm 111111nnnnnddddd   2E20 FC00   Vd,Vn,Vm  (vector)
+                                   //  fdiv    Vd,Vn,Vm             DV_3D  000111100X1mmmmm 000110nnnnnddddd   1E20 1800   Vd,Vn,Vm  (scalar)
+
+INST2(fmax,    "fmax",   0, 0, IF_EN2G,   0x0E20F400,  0x1E204800)
+                                   //  fmax    Vd,Vn,Vm             DV_3B  0Q0011100X1mmmmm 111101nnnnnddddd   0E20 F400   Vd,Vn,Vm  (vector)
+                                   //  fmax    Vd,Vn,Vm             DV_3D  000111100X1mmmmm 010010nnnnnddddd   1E20 4800   Vd,Vn,Vm  (scalar)
+
+INST2(fmin,    "fmin",   0, 0, IF_EN2G,   0x0EA0F400,  0x1E205800)
+                                   //  fmin    Vd,Vn,Vm             DV_3B  0Q0011101X1mmmmm 111101nnnnnddddd   0EA0 F400   Vd,Vn,Vm  (vector)
+                                   //  fmin    Vd,Vn,Vm             DV_3D  000111100X1mmmmm 010110nnnnnddddd   1E20 5800   Vd,Vn,Vm  (scalar)
+
+INST2(fabd,    "fabd",   0, 0, IF_EN2G,   0x0EA0F400,  0x1E205800)
+                                   //  fabd    Vd,Vn,Vm             DV_3B  0Q1011101X1mmmmm 110101nnnnnddddd   2EA0 D400   Vd,Vn,Vm  (vector)
+                                   //  fabd    Vd,Vn,Vm             DV_3D  011111101X1mmmmm 110101nnnnnddddd   7EA0 D400   Vd,Vn,Vm  (scalar)
+
+//    enum     name     FP LD/ST            DV_2K        DV_1C
+INST2(fcmp,    "fcmp",   0, 0, IF_EN2I,   0x1E202000,  0x1E202008)
+                                   //  fcmp    Vn,Vm                DV_2K  000111100X1mmmmm 001000nnnnn00000   1E20 2000   Vn Vm
+                                   //  fcmp    Vn,#0.0              DV_1C  000111100X100000 001000nnnnn01000   1E20 2008   Vn #0.0
+
+INST2(fcmpe,   "fcmpe",  0, 0, IF_EN2I,   0x1E202010,  0x1E202018)
+                                   //  fcmpe   Vn,Vm                DV_2K  000111100X1mmmmm 001000nnnnn10000   1E20 2010   Vn Vm
+                                   //  fcmpe   Vn,#0.0              DV_1C  000111100X100000 001000nnnnn11000   1E20 2018   Vn #0.0
+
+//    enum     name     FP LD/ST            DV_2A        DV_2G
+INST2(fabs,    "fabs",   0, 0, IF_EN2J,   0x0EA0F800,  0x1E20C000)
+                                   //  fabs    Vd,Vn                DV_2A  0Q0011101X100000 111110nnnnnddddd   0EA0 F800   Vd,Vn    (vector)
+                                   //  fabs    Vd,Vn                DV_2G  000111100X100000 110000nnnnnddddd   1E20 C000   Vd,Vn    (scalar)
+
+INST2(fneg,    "fneg",   0, 0, IF_EN2J,   0x2EA0F800,  0x1E214000)
+                                   //  fneg    Vd,Vn                DV_2A  0Q1011101X100000 111110nnnnnddddd   2EA0 F800   Vd,Vn    (vector)
+                                   //  fneg    Vd,Vn                DV_2G  000111100X100001 010000nnnnnddddd   1E21 4000   Vd,Vn    (scalar)
+
+INST2(fsqrt,   "fsqrt",  0, 0, IF_EN2J,   0x2EA1F800,  0x1E21C000)
+                                   //  fsqrt   Vd,Vn                DV_2A  0Q1011101X100001 111110nnnnnddddd   2EA1 F800   Vd,Vn    (vector)
+                                   //  fsqrt   Vd,Vn                DV_2G  000111100X100001 110000nnnnnddddd   1E21 C000   Vd,Vn    (scalar)
+
+INST2(frintn,  "frintn", 0, 0, IF_EN2J,   0x0E218800,  0x1E244000)
+                                   //  frintn  Vd,Vn                DV_2A  0Q0011100X100001 100010nnnnnddddd   0E21 8800   Vd,Vn    (vector)
+                                   //  frintn  Vd,Vn                DV_2G  000111100X100100 010000nnnnnddddd   1E24 4000   Vd,Vn    (scalar)
+
+INST2(frintp,  "frintp", 0, 0, IF_EN2J,   0x0EA18800,  0x1E24C000)
+                                   //  frintp  Vd,Vn                DV_2A  0Q0011101X100001 100010nnnnnddddd   0EA1 8800   Vd,Vn    (vector)
+                                   //  frintp  Vd,Vn                DV_2G  000111100X100100 110000nnnnnddddd   1E24 C000   Vd,Vn    (scalar)
+
+INST2(frintm,  "frintm", 0, 0, IF_EN2J,   0x0E219800,  0x1E254000)
+                                   //  frintm  Vd,Vn                DV_2A  0Q0011100X100001 100110nnnnnddddd   0E21 9800   Vd,Vn    (vector)
+                                   //  frintm  Vd,Vn                DV_2G  000111100X100101 010000nnnnnddddd   1E25 4000   Vd,Vn    (scalar)
+
+INST2(frintz,  "frintz", 0, 0, IF_EN2J,   0x0EA19800,  0x1E25C000)
+                                   //  frintz  Vd,Vn                DV_2A  0Q0011101X100001 100110nnnnnddddd   0EA1 9800   Vd,Vn    (vector)
+                                   //  frintz  Vd,Vn                DV_2G  000111100X100101 110000nnnnnddddd   1E25 C000   Vd,Vn    (scalar)
+
+INST2(frinta,  "frinta", 0, 0, IF_EN2J,   0x2E218800,  0x1E264000)
+                                   //  frinta  Vd,Vn                DV_2A  0Q1011100X100001 100010nnnnnddddd   2E21 8800   Vd,Vn    (vector)
+                                   //  frinta  Vd,Vn                DV_2G  000111100X100110 010000nnnnnddddd   1E26 4000   Vd,Vn    (scalar)
+
+INST2(frintx,  "frintx", 0, 0, IF_EN2J,   0x2E219800,  0x1E274000)
+                                   //  frintx  Vd,Vn                DV_2A  0Q1011100X100001 100110nnnnnddddd   2E21 9800   Vd,Vn    (vector)
+                                   //  frintx  Vd,Vn                DV_2G  000111100X100111 010000nnnnnddddd   1E27 4000   Vd,Vn    (scalar)
+
+INST2(frinti,  "frinti", 0, 0, IF_EN2J,   0x2EA19800,  0x1E27C000)
+                                   //  frinti  Vd,Vn                DV_2A  0Q1011101X100001 100110nnnnnddddd   2EA1 9800   Vd,Vn    (vector)
+                                   //  frinti  Vd,Vn                DV_2G  000111100X100111 110000nnnnnddddd   1E27 C000   Vd,Vn    (scalar)
+
+//    enum     name     FP LD/ST            DV_2M        DV_2L
+INST2(abs,     "abs",    0, 0, IF_EN2K,   0x0E20B800,  0x5E20B800)
+                                   //  abs     Vd,Vn                DV_2M  0Q001110XX100000 101110nnnnnddddd   0E20 B800   Vd,Vn    (vector)
+                                   //  abs     Vd,Vn                DV_2L  01011110XX100000 101110nnnnnddddd   5E20 B800   Vd,Vn    (scalar)
+
+//    enum     name     FP LD/ST            DR_2G        DV_2M
+INST2(cls,     "cls",    0, 0, IF_EN2L,   0x5AC01400,  0x0E204800)
+                                   //  cls     Rd,Rm                DR_2G  X101101011000000 000101nnnnnddddd   5AC0 1400   Rd Rn    (general)
+                                   //  cls     Vd,Vn                DV_2M  0Q00111000100000 010010nnnnnddddd   0E20 4800   Vd,Vn    (vector)
+
+INST2(clz,     "clz",    0, 0, IF_EN2L,   0x5AC01000,  0x2E204800)
+                                   //  clz     Rd,Rm                DR_2G  X101101011000000 000100nnnnnddddd   5AC0 1000   Rd Rn    (general)
+                                   //  clz     Vd,Vn                DV_2M  0Q10111000100000 010010nnnnnddddd   2E20 4800   Vd,Vn    (vector)
+
+INST2(rbit,    "rbit",   0, 0, IF_EN2L,   0x5AC00000,  0x2E605800)
+                                   //  rbit    Rd,Rm                DR_2G  X101101011000000 000000nnnnnddddd   5AC0 0000   Rd Rn    (general)
+                                   //  rbit    Vd,Vn                DV_2M  0Q10111001100000 010110nnnnnddddd   2E60 5800   Vd,Vn    (vector)
+
+INST2(rev16,   "rev16",  0, 0, IF_EN2L,   0x5AC00400,  0x0E201800)
+                                   //  rev16   Rd,Rm                DR_2G  X101101011000000 000001nnnnnddddd   5AC0 0400   Rd Rn    (general)
+                                   //  rev16   Vd,Vn                DV_2M  0Q001110XX100000 000110nnnnnddddd   0E20 1800   Vd,Vn    (vector)
+
+INST2(rev32,   "rev32",  0, 0, IF_EN2L,   0xDAC00800,  0x2E200800)
+                                   //  rev32   Rd,Rm                DR_2G  1101101011000000 000010nnnnnddddd   DAC0 0800   Rd Rn    (general)
+                                   //  rev32   Vd,Vn                DV_2M  0Q101110XX100000 000010nnnnnddddd   2E20 0800   Vd,Vn    (vector)
+
+//    enum     name     FP LD/ST            DV_3A        DV_3AI
+INST2(mla,     "mla",    0, 0, IF_EN2M,   0x0E209400,  0x2F000000)
+                                   //  mla     Vd,Vn,Vm             DV_3A  0Q001110XX1mmmmm 100101nnnnnddddd   0E20 9400   Vd,Vn,Vm   (vector)
+                                   //  mla     Vd,Vn,Vm[]           DV_3AI 0Q101111XXLMmmmm 0000H0nnnnnddddd   2F00 0000   Vd,Vn,Vm[] (vector by elem)
+
+INST2(mls,     "mls",    0, 0, IF_EN2M,   0x2E209400,  0x2F004000)
+                                   //  mls     Vd,Vn,Vm             DV_3A  0Q101110XX1mmmmm 100101nnnnnddddd   2E20 9400   Vd,Vn,Vm   (vector)
+                                   //  mls     Vd,Vn,Vm[]           DV_3AI 0Q101111XXLMmmmm 0100H0nnnnnddddd   2F00 4000   Vd,Vn,Vm[] (vector by elem)
+
+//    enum     name     FP LD/ST            DV_2N        DV_2O
+INST2(sshr,    "sshr",   0, 0, IF_EN2N,   0x5F000400,  0x0F000400)
+                                   //  sshr    Vd,Vn,imm            DV_2N  010111110iiiiiii 000001nnnnnddddd   5F00 0400   Vd Vn imm  (shift - scalar)
+                                   //  sshr    Vd,Vn,imm            DV_2O  0Q0011110iiiiiii 000001nnnnnddddd   0F00 0400   Vd,Vn imm  (shift - vector)
+
+INST2(ssra,    "ssra",   0, 0, IF_EN2N,   0x5F001400,  0x0F001400)
+                                   //  ssra    Vd,Vn,imm            DV_2N  010111110iiiiiii 000101nnnnnddddd   5F00 1400   Vd Vn imm  (shift - scalar)
+                                   //  ssra    Vd,Vn,imm            DV_2O  0Q0011110iiiiiii 000101nnnnnddddd   0F00 1400   Vd,Vn imm  (shift - vector)
+
+INST2(srshr,   "srshr",  0, 0, IF_EN2N,   0x5F002400,  0x0F002400)
+                                   //  srshr   Vd,Vn,imm            DV_2N  010111110iiiiiii 001001nnnnnddddd   5F00 0400   Vd Vn imm  (shift - scalar)
+                                   //  srshr   Vd,Vn,imm            DV_2O  0Q0011110iiiiiii 001001nnnnnddddd   0F00 0400   Vd,Vn imm  (shift - vector)
+
+INST2(srsra,   "srsra",  0, 0, IF_EN2N,   0x5F003400,  0x0F003400)
+                                   //  srsra   Vd,Vn,imm            DV_2N  010111110iiiiiii 001101nnnnnddddd   5F00 1400   Vd Vn imm  (shift - scalar)
+                                   //  srsra   Vd,Vn,imm            DV_2O  0Q0011110iiiiiii 001101nnnnnddddd   0F00 1400   Vd,Vn imm  (shift - vector)
+
+INST2(shl,     "shl",    0, 0, IF_EN2N,   0x5F005400,  0x0F005400)
+                                   //  shl     Vd,Vn,imm            DV_2N  010111110iiiiiii 010101nnnnnddddd   5F00 5400   Vd Vn imm  (shift - scalar)
+                                   //  shl     Vd,Vn,imm            DV_2O  0Q0011110iiiiiii 010101nnnnnddddd   0F00 5400   Vd,Vn imm  (shift - vector)
+
+INST2(ushr,    "ushr",   0, 0, IF_EN2N,   0x7F000400,  0x2F000400)
+                                   //  ushr    Vd,Vn,imm            DV_2N  011111110iiiiiii 000001nnnnnddddd   7F00 0400   Vd Vn imm  (shift - scalar)
+                                   //  ushr    Vd,Vn,imm            DV_2O  0Q1011110iiiiiii 000001nnnnnddddd   2F00 0400   Vd,Vn imm  (shift - vector)
+
+INST2(usra,    "usra",   0, 0, IF_EN2N,   0x7F001400,  0x2F001400)
+                                   //  usra    Vd,Vn,imm            DV_2N  011111110iiiiiii 000101nnnnnddddd   7F00 1400   Vd Vn imm  (shift - scalar)
+                                   //  usra    Vd,Vn,imm            DV_2O  0Q1011110iiiiiii 000101nnnnnddddd   2F00 1400   Vd,Vn imm  (shift - vector)
+
+INST2(urshr,   "urshr",  0, 0, IF_EN2N,   0x7F002400,  0x2F002400)
+                                   //  urshr   Vd,Vn,imm            DV_2N  011111110iiiiiii 001001nnnnnddddd   7F00 2400   Vd Vn imm  (shift - scalar)
+                                   //  urshr   Vd,Vn,imm            DV_2O  0Q1011110iiiiiii 001001nnnnnddddd   2F00 2400   Vd,Vn imm  (shift - vector)
+
+INST2(ursra,   "ursra",  0, 0, IF_EN2N,   0x7F003400,  0x2F003400)
+                                   //  ursra   Vd,Vn,imm            DV_2N  011111110iiiiiii 001101nnnnnddddd   7F00 3400   Vd Vn imm  (shift - scalar)
+                                   //  ursra   Vd,Vn,imm            DV_2O  0Q1011110iiiiiii 001101nnnnnddddd   2F00 3400   Vd,Vn imm  (shift - vector)
+
+INST2(sri,     "sri",    0, 0, IF_EN2N,   0x7F004400,  0x2F004400)
+                                   //  sri     Vd,Vn,imm            DV_2N  011111110iiiiiii 010001nnnnnddddd   7F00 4400   Vd Vn imm  (shift - scalar)
+                                   //  sri     Vd,Vn,imm            DV_2O  0Q1011110iiiiiii 010001nnnnnddddd   2F00 4400   Vd,Vn imm  (shift - vector)
+
+INST2(sli,     "sli",    0, 0, IF_EN2N,   0x7F005400,  0x2F005400)
+                                   //  sli     Vd,Vn,imm            DV_2N  011111110iiiiiii 010101nnnnnddddd   7F00 5400   Vd Vn imm  (shift - scalar)
+                                   //  sli     Vd,Vn,imm            DV_2O  0Q1011110iiiiiii 010101nnnnnddddd   2F00 5400   Vd,Vn imm  (shift - vector)
+
+INST1(ldur,    "ldur",   0,LD, IF_LS_2C,  0xB8400000)  
+                                   //  ldur    Rt,[Xn+simm9]        LS_2C  1X111000010iiiii iiii00nnnnnttttt   B840 0000   [Xn imm(-256..+255)]
+
+INST1(ldurb,   "ldurb",  0,LD, IF_LS_2C,  0x38400000)  
+                                   //  ldurb   Rt,[Xn+simm9]        LS_2C  00111000010iiiii iiii00nnnnnttttt   3840 0000   [Xn imm(-256..+255)]
+
+INST1(ldurh,   "ldurh",  0,LD, IF_LS_2C,  0x78400000)  
+                                   //  ldurh   Rt,[Xn+simm9]        LS_2C  01111000010iiiii iiii00nnnnnttttt   7840 0000   [Xn imm(-256..+255)]
+
+INST1(ldursb,  "ldursb", 0,LD, IF_LS_2C,  0x38800000)  
+                                   //  ldursb  Rt,[Xn+simm9]        LS_2C  001110001X0iiiii iiii00nnnnnttttt   3880 0000   [Xn imm(-256..+255)]
+
+INST1(ldursh,  "ldursh", 0,LD, IF_LS_2C,  0x78800000)  
+                                   //  ldursh  Rt,[Xn+simm9]        LS_2C  011110001X0iiiii iiii00nnnnnttttt   7880 0000   [Xn imm(-256..+255)]
+
+INST1(ldursw,  "ldursw", 0,LD, IF_LS_2C,  0xB8800000)  
+                                   //  ldursw  Rt,[Xn+simm9]        LS_2C  10111000100iiiii iiii00nnnnnttttt   B880 0000   [Xn imm(-256..+255)]
+
+INST1(stur,    "stur",   0,ST, IF_LS_2C,  0xB8000000)  
+                                   //  stur    Rt,[Xn+simm9]        LS_2C  1X111000000iiiii iiii00nnnnnttttt   B800 0000   [Xn imm(-256..+255)]
+
+INST1(sturb,   "sturb",  0,ST, IF_LS_2C,  0x38000000)  
+                                   //  sturb   Rt,[Xn+simm9]        LS_2C  00111000000iiiii iiii00nnnnnttttt   3800 0000   [Xn imm(-256..+255)]
+
+INST1(sturh,   "sturh",  0,ST, IF_LS_2C,  0x78000000)  
+                                   //  sturh   Rt,[Xn+simm9]        LS_2C  01111000000iiiii iiii00nnnnnttttt   7800 0000   [Xn imm(-256..+255)]
+
+INST1(adr,     "adr",    0, 0, IF_DI_1E,  0x10000000)  
+                                   //  adr     Rd, simm21           DI_1E  0ii10000iiiiiiii iiiiiiiiiiiddddd   1000 0000   Rd simm21
+
+INST1(adrp,    "adrp",   0, 0, IF_DI_1E,  0x90000000)
+                                   //  adrp    Rd, simm21           DI_1E  1ii10000iiiiiiii iiiiiiiiiiiddddd   9000 0000   Rd simm21
+
+INST1(b,       "b",      0, 0, IF_BI_0A,  0x14000000)
+                                   //  b       simm26               BI_0A  000101iiiiiiiiii iiiiiiiiiiiiiiii   1400 0000   simm26:00
+
+INST1(b_tail,  "b",      0, 0, IF_BI_0C,  0x14000000)
+                                   //  b       simm26               BI_0A  000101iiiiiiiiii iiiiiiiiiiiiiiii   1400 0000   simm26:00, same as b representing a tail call of bl.
+
+INST1(bl_local,"bl",     0, 0, IF_BI_0A,  0x94000000)
+                                   //  bl      simm26               BI_0A  100101iiiiiiiiii iiiiiiiiiiiiiiii   9400 0000   simm26:00, same as bl, but with a BasicBlock target.
+   
+INST1(bl,      "bl",     0, 0, IF_BI_0C,  0x94000000)
+                                   //  bl      simm26               BI_0C  100101iiiiiiiiii iiiiiiiiiiiiiiii   9400 0000   simm26:00
+
+INST1(br,      "br",     0, 0, IF_BR_1A,  0xD61F0000)
+                                   //  br      Rn                   BR_1A  1101011000011111 000000nnnnn00000   D61F 0000, an indirect branch like switch expansion
+
+INST1(br_tail, "br",     0, 0, IF_BR_1B,  0xD61F0000)
+                                   //  br      Rn                   BR_1B  1101011000011111 000000nnnnn00000   D61F 0000, same as br representing a tail call of blr. Encode target with Reg3.
+
+INST1(blr,     "blr",    0, 0, IF_BR_1B,  0xD63F0000)
+                                   //  blr     Rn                   BR_1B  1101011000111111 000000nnnnn00000   D63F 0000, Encode target with Reg3.
+
+INST1(ret,     "ret",    0, 0, IF_BR_1A,  0xD65F0000)
+                                   //  ret     Rn                   BR_1A  1101011001011111 000000nnnnn00000   D65F 0000
+
+INST1(beq,     "beq",    0, 0, IF_BI_0B,  0x54000000)  
+                                   //  beq     simm19               BI_0B  01010100iiiiiiii iiiiiiiiiii00000   5400 0000   simm19:00
+
+INST1(bne,     "bne",    0, 0, IF_BI_0B,  0x54000001)
+                                   //  bne     simm19               BI_0B  01010100iiiiiiii iiiiiiiiiii00001   5400 0001   simm19:00
+
+INST1(bhs,     "bhs",    0, 0, IF_BI_0B,  0x54000002)
+                                   //  bhs     simm19               BI_0B  01010100iiiiiiii iiiiiiiiiii00010   5400 0002   simm19:00
+
+INST1(blo,     "blo",    0, 0, IF_BI_0B,  0x54000003)
+                                   //  blo     simm19               BI_0B  01010100iiiiiiii iiiiiiiiiii00011   5400 0003   simm19:00
+
+INST1(bmi,     "bmi",    0, 0, IF_BI_0B,  0x54000004)
+                                   //  bmi     simm19               BI_0B  01010100iiiiiiii iiiiiiiiiii00100   5400 0004   simm19:00
+
+INST1(bpl,     "bpl",    0, 0, IF_BI_0B,  0x54000005)
+                                   //  bpl     simm19               BI_0B  01010100iiiiiiii iiiiiiiiiii00101   5400 0005   simm19:00
+
+INST1(bvs,     "bvs",    0, 0, IF_BI_0B,  0x54000006)
+                                   //  bvs     simm19               BI_0B  01010100iiiiiiii iiiiiiiiiii00110   5400 0006   simm19:00
+
+INST1(bvc,     "bvc",    0, 0, IF_BI_0B,  0x54000007)
+                                   //  bvc     simm19               BI_0B  01010100iiiiiiii iiiiiiiiiii00111   5400 0007   simm19:00
+
+INST1(bhi,     "bhi",    0, 0, IF_BI_0B,  0x54000008)
+                                   //  bhi     simm19               BI_0B  01010100iiiiiiii iiiiiiiiiii01000   5400 0008   simm19:00
+
+INST1(bls,     "bls",    0, 0, IF_BI_0B,  0x54000009)
+                                   //  bls     simm19               BI_0B  01010100iiiiiiii iiiiiiiiiii01001   5400 0009   simm19:00
+
+INST1(bge,     "bge",    0, 0, IF_BI_0B,  0x5400000A)
+                                   //  bge     simm19               BI_0B  01010100iiiiiiii iiiiiiiiiii01010   5400 000A   simm19:00
+
+INST1(blt,     "blt",    0, 0, IF_BI_0B,  0x5400000B)
+                                   //  blt     simm19               BI_0B  01010100iiiiiiii iiiiiiiiiii01011   5400 000B   simm19:00
+
+INST1(bgt,     "bgt",    0, 0, IF_BI_0B,  0x5400000C)
+                                   //  bgt     simm19               BI_0B  01010100iiiiiiii iiiiiiiiiii01100   5400 000C   simm19:00
+
+INST1(ble,     "ble",    0, 0, IF_BI_0B,  0x5400000D)
+                                   //  ble     simm19               BI_0B  01010100iiiiiiii iiiiiiiiiii01101   5400 000D   simm19:00
+
+INST1(cbz,     "cbz",    0, 0, IF_BI_1A,  0x34000000)  
+                                   //  cbz     Rt, simm19           BI_1A  X0110100iiiiiiii iiiiiiiiiiittttt   3400 0000   Rt simm19:00
+
+INST1(cbnz,    "cbnz",   0, 0, IF_BI_1A,  0x35000000)
+                                   //  cbnz    Rt, simm19           BI_1A  X0110101iiiiiiii iiiiiiiiiiittttt   3500 0000   Rt simm19:00
+
+INST1(tbz,     "tbz",    0, 0, IF_BI_1B,  0x36000000)  
+                                   //  tbz     Rt, imm6, simm14     BI_1B  B0110110bbbbbiii iiiiiiiiiiittttt   3600 0000   Rt imm6, simm14:00
+
+INST1(tbnz,    "tbnz",   0, 0, IF_BI_1B,  0x37000000)
+                                   //  tbnz    Rt, imm6, simm14     BI_1B  B0110111bbbbbiii iiiiiiiiiiittttt   3700 0000   Rt imm6, simm14:00
+
+INST1(movk,    "movk",   0, 0, IF_DI_1B,  0x72800000)
+                                   //  movk    Rd,imm(i16,hw)       DI_1B  X11100101hwiiiii iiiiiiiiiiiddddd   7280 0000   imm(i16,hw)
+   
+INST1(movn,    "movn",   0, 0, IF_DI_1B,  0x12800000)
+                                   //  movn    Rd,imm(i16,hw)       DI_1B  X00100101hwiiiii iiiiiiiiiiiddddd   1280 0000   imm(i16,hw)
+   
+INST1(movz,    "movz",   0, 0, IF_DI_1B,  0x52800000)
+                                   //  movz    Rd,imm(i16,hw)       DI_1B  X10100101hwiiiii iiiiiiiiiiiddddd   5280 0000   imm(i16,hw)
+                                   
+INST1(csel,    "csel",   0, 0, IF_DR_3D,  0x1A800000)
+                                   //  csel    Rd,Rn,Rm,cond        DR_3D  X0011010100mmmmm cccc00nnnnnddddd   1A80 0000   cond
+
+INST1(csinc,   "csinc",  0, 0, IF_DR_3D,  0x1A800400)
+                                   //  csinc   Rd,Rn,Rm,cond        DR_3D  X0011010100mmmmm cccc01nnnnnddddd   1A80 0400   cond
+
+INST1(csinv,   "csinv",  0, 0, IF_DR_3D,  0x5A800000)
+                                   //  csinv   Rd,Rn,Rm,cond        DR_3D  X1011010100mmmmm cccc00nnnnnddddd   5A80 0000   cond
+
+INST1(csneg,   "csneg",  0, 0, IF_DR_3D,  0x5A800400)
+                                   //  csneg   Rd,Rn,Rm,cond        DR_3D  X1011010100mmmmm cccc01nnnnnddddd   5A80 0400   cond
+
+INST1(cinc,    "cinc",   0, 0, IF_DR_2D,  0x1A800400)
+                                   //  cinc    Rd,Rn,cond           DR_2D  X0011010100nnnnn cccc01nnnnnddddd   1A80 0400   cond
+
+INST1(cinv,    "cinv",   0, 0, IF_DR_2D,  0x5A800000)
+                                   //  cinv    Rd,Rn,cond           DR_2D  X1011010100nnnnn cccc00nnnnnddddd   5A80 0000   cond
+
+INST1(cneg,    "cneg",   0, 0, IF_DR_2D,  0x5A800400)
+                                   //  cneg    Rd,Rn,cond           DR_2D  X1011010100nnnnn cccc01nnnnnddddd   5A80 0400   cond
+
+INST1(cset,    "cset",   0, 0, IF_DR_1D,  0x1A9F07E0)
+                                   //  cset    Rd,cond              DR_1D  X001101010011111 cccc0111111ddddd   1A9F 07E0   Rd cond
+
+INST1(csetm,   "csetm",  0, 0, IF_DR_1D,  0x5A9F03E0)
+                                   //  csetm   Rd,cond              DR_1D  X101101010011111 cccc0011111ddddd   5A9F 03E0   Rd cond
+
+INST1(rev,     "rev",    0, 0, IF_DR_2G,  0x5AC00800)
+                                   //  rev     Rd,Rm                DR_2G  X101101011000000 00001Xnnnnnddddd   5AC0 0800   Rd Rn
+
+INST1(rev64,   "rev64",  0, 0, IF_DV_2M,  0x0E200800)
+                                   //  rev64   Vd,Vn                DV_2M  0Q001110XX100000 000010nnnnnddddd   0E20 0800   Vd,Vn    (vector)
+
+INST1(adc,     "adc",    0, 0, IF_DR_3A,  0x1A000000)
+                                   //  adc     Rd,Rn,Rm             DR_3A  X0011010000mmmmm 000000nnnnnddddd   1A00 0000
+   
+INST1(adcs,    "adcs",   0, 0, IF_DR_3A,  0x3A000000)
+                                   //  adcs    Rd,Rn,Rm             DR_3A  X0111010000mmmmm 000000nnnnnddddd   3A00 0000
+   
+INST1(sbc,     "sbc",    0, 0, IF_DR_3A,  0x5A000000)
+                                   //  sdc     Rd,Rn,Rm             DR_3A  X1011010000mmmmm 000000nnnnnddddd   5A00 0000
+
+INST1(sbcs,    "sbcs",   0, 0, IF_DR_3A,  0x7A000000)
+                                   //  sdcs    Rd,Rn,Rm             DR_3A  X1111010000mmmmm 000000nnnnnddddd   7A00 0000
+
+INST1(udiv,    "udiv",   0, 0, IF_DR_3A,  0x1AC00800)
+                                   //  udiv    Rd,Rn,Rm             DR_3A  X0011010110mmmmm 000010nnnnnddddd   1AC0 0800
+   
+INST1(sdiv,    "sdiv",   0, 0, IF_DR_3A,  0x1AC00C00)
+                                   //  sdiv    Rd,Rn,Rm             DR_3A  X0011010110mmmmm 000011nnnnnddddd   1AC0 0C00
+   
+INST1(mneg,    "mneg",   0, 0, IF_DR_3A,  0x1B00FC00)
+                                   //  mneg    Rd,Rn,Rm             DR_3A  X0011011000mmmmm 111111nnnnnddddd   1B00 FC00
+   
+INST1(madd,    "madd",   0, 0, IF_DR_4A,  0x1B000000)
+                                   //  madd    Rd,Rn,Rm,Ra          DR_4A  X0011011000mmmmm 0aaaaannnnnddddd   1B00 0000
+   
+INST1(msub,    "msub",   0, 0, IF_DR_4A,  0x1B008000)
+                                   //  msub    Rd,Rn,Rm,Ra          DR_4A  X0011011000mmmmm 1aaaaannnnnddddd   1B00 8000
+
+INST1(smull,   "smull",  0, 0, IF_DR_3A,  0x9B207C00)
+                                   //  smull   Rd,Rn,Rm             DR_3A  10011011001mmmmm 011111nnnnnddddd   9B20 7C00
+      
+INST1(smaddl,  "smaddl", 0, 0, IF_DR_4A,  0x9B200000)
+                                   //  smaddl  Rd,Rn,Rm,Ra          DR_4A  10011011001mmmmm 0aaaaannnnnddddd   9B20 0000
+   
+INST1(smnegl,  "smnegl", 0, 0, IF_DR_3A,  0x9B20FC00)
+                                   //  smnegl  Rd,Rn,Rm             DR_3A  10011011001mmmmm 111111nnnnnddddd   9B20 FC00
+      
+INST1(smsubl,  "smsubl", 0, 0, IF_DR_4A,  0x9B208000)
+                                   //  smsubl  Rd,Rn,Rm,Ra          DR_4A  10011011001mmmmm 1aaaaannnnnddddd   9B20 8000
+   
+INST1(smulh,   "smulh",  0, 0, IF_DR_3A,  0x9B407C00)
+                                   //  smulh   Rd,Rn,Rm             DR_3A  10011011010mmmmm 011111nnnnnddddd   9B40 7C00
+   
+INST1(umull,   "umull",  0, 0, IF_DR_3A,  0x9BA07C00)
+                                   //  umull   Rd,Rn,Rm             DR_3A  10011011101mmmmm 011111nnnnnddddd   9BA0 7C00
+      
+INST1(umaddl,  "umaddl", 0, 0, IF_DR_4A,  0x9BA00000)
+                                   //  umaddl  Rd,Rn,Rm,Ra          DR_4A  10011011101mmmmm 0aaaaannnnnddddd   9BA0 0000
+
+INST1(umnegl,  "umnegl", 0, 0, IF_DR_3A,  0x9BA0FC00)
+                                   //  umnegl  Rd,Rn,Rm             DR_3A  10011011101mmmmm 111111nnnnnddddd   9BA0 FC00
+      
+INST1(umsubl,  "umsubl", 0, 0, IF_DR_4A,  0x9BA08000)
+                                   //  umsubl  Rd,Rn,Rm,Ra          DR_4A  10011011101mmmmm 1aaaaannnnnddddd   9BA0 8000
+   
+INST1(umulh,   "umulh",  0, 0, IF_DR_3A,  0x9BC07C00)
+                                   //  umulh   Rd,Rn,Rm             DR_3A  10011011110mmmmm 011111nnnnnddddd   9BC0 7C00
+   
+INST1(extr,    "extr",   0, 0, IF_DR_3E,  0x13800000)
+                                   //  extr    Rd,Rn,Rm,imm6        DR_3E  X00100111X0mmmmm ssssssnnnnnddddd   1380 0000   imm(0-63)
+
+INST1(lslv,    "lslv",   0, 0, IF_DR_3A,  0x1AC02000)
+                                   //  lslv    Rd,Rn,Rm             DR_3A  X0011010110mmmmm 001000nnnnnddddd   1AC0 2000
+   
+INST1(lsrv,    "lsrv",   0, 0, IF_DR_3A,  0x1AC02400)
+                                   //  lsrv    Rd,Rn,Rm             DR_3A  X0011010110mmmmm 001001nnnnnddddd   1AC0 2400
+   
+INST1(asrv,    "asrv",   0, 0, IF_DR_3A,  0x1AC02800)
+                                   //  asrv    Rd,Rn,Rm             DR_3A  X0011010110mmmmm 001010nnnnnddddd   1AC0 2800
+
+INST1(rorv,    "rorv",   0, 0, IF_DR_3A,  0x1AC02C00)
+                                   //  rorv    Rd,Rn,Rm             DR_3A  X0011010110mmmmm 001011nnnnnddddd   1AC0 2C00
+   
+INST1(sbfm,    "sbfm",   0, 0, IF_DI_2D,  0x13000000)
+                                   //  sbfm    Rd,Rn,imr,ims        DI_2D  X00100110Nrrrrrr ssssssnnnnnddddd   1300 0000   imr, ims
+   
+INST1(bfm,     "bfm",    0, 0, IF_DI_2D,  0x33000000)
+                                   //  bfm     Rd,Rn,imr,ims        DI_2D  X01100110Nrrrrrr ssssssnnnnnddddd   3300 0000   imr, ims
+   
+INST1(ubfm,    "ubfm",   0, 0, IF_DI_2D,  0x53000000)
+                                   //  ubfm    Rd,Rn,imr,ims        DI_2D  X10100110Nrrrrrr ssssssnnnnnddddd   5300 0000   imr, ims
+   
+INST1(sbfiz,   "sbfiz",  0, 0, IF_DI_2D,  0x13000000)
+                                   //  sbfiz   Rd,Rn,lsb,width      DI_2D  X00100110Nrrrrrr ssssssnnnnnddddd   1300 0000   imr, ims
+   
+INST1(bfi,     "bfi",    0, 0, IF_DI_2D,  0x33000000)
+                                   //  bfi     Rd,Rn,lsb,width      DI_2D  X01100110Nrrrrrr ssssssnnnnnddddd   3300 0000   imr, ims
+   
+INST1(ubfiz,   "ubfiz",  0, 0, IF_DI_2D,  0x53000000)
+                                   //  ubfiz   Rd,Rn,lsb,width      DI_2D  X10100110Nrrrrrr ssssssnnnnnddddd   5300 0000   imr, ims
+   
+INST1(sbfx,    "sbfx",   0, 0, IF_DI_2D,  0x13000000)
+                                   //  sbfx    Rd,Rn,lsb,width      DI_2D  X00100110Nrrrrrr ssssssnnnnnddddd   1300 0000   imr, ims
+   
+INST1(bfxil,   "bfxil",  0, 0, IF_DI_2D,  0x33000000)
+                                   //  bfxil   Rd,Rn,lsb,width      DI_2D  X01100110Nrrrrrr ssssssnnnnnddddd   3300 0000   imr, ims
+   
+INST1(ubfx,    "ubfx",   0, 0, IF_DI_2D,  0x53000000)
+                                   //  ubfx    Rd,Rn,lsb,width      DI_2D  X10100110Nrrrrrr ssssssnnnnnddddd   5300 0000   imr, ims
+   
+INST1(sxtb,    "sxtb",   0, 0, IF_DR_2H,  0x13001C00)
+                                   //  sxtb    Rd,Rn                DR_2H  X00100110X000000 000111nnnnnddddd   1300 1C00
+   
+INST1(sxth,    "sxth",   0, 0, IF_DR_2H,  0x13003C00)
+                                   //  sxth    Rd,Rn                DR_2H  X00100110X000000 001111nnnnnddddd   1300 3C00
+   
+INST1(sxtw,    "sxtw",   0, 0, IF_DR_2H,  0x13007C00)
+                                   //  sxtw    Rd,Rn                DR_2H  X00100110X000000 011111nnnnnddddd   1300 7C00
+   
+INST1(uxtb,    "uxtb",   0, 0, IF_DR_2H,  0x53001C00)
+                                   //  uxtb    Rd,Rn                DR_2H  0101001100000000 000111nnnnnddddd   5300 1C00
+   
+INST1(uxth,    "uxth",   0, 0, IF_DR_2H,  0x53003C00)
+                                   //  uxth    Rd,Rn                DR_2H  0101001100000000 001111nnnnnddddd   5300 3C00
+   
+INST1(nop,     "nop",    0, 0, IF_SN_0A,  0xD503201F)  
+                                   //  nop                          SN_0A  1101010100000011 0010000000011111   D503 201F
+
+INST1(bkpt,    "bkpt",   0, 0, IF_SN_0A,  0xD43E0000)
+                                   //  brpt                         SN_0A  1101010000111110 0000000000000000   D43E 0000   0xF000
+
+INST1(brk,     "brk",    0, 0, IF_SI_0A,  0xD4200000)  
+                                   //  brk     imm16                SI_0A  11010100001iiiii iiiiiiiiiii00000   D420 0000   imm16
+
+INST1(dsb,     "dsb",    0, 0, IF_SI_0B,  0xD503309F)  
+                                   //  dsb     barrierKind          SI_0B  1101010100000011 0011bbbb10011111   D503 309F   imm4 - barrier kind
+
+INST1(dmb,     "dmb",    0, 0, IF_SI_0B,  0xD50330BF)  
+                                   //  dmb     barrierKind          SI_0B  1101010100000011 0011bbbb10111111   D503 30BF   imm4 - barrier kind
+
+INST1(isb,     "isb",    0, 0, IF_SI_0B,  0xD50330DF)  
+                                   //  isb     barrierKind          SI_0B  1101010100000011 0011bbbb11011111   D503 30DF   imm4 - barrier kind
+
+INST1(umov,    "umov",   0, 0, IF_DV_2B,  0x0E003C00)
+                                   //  umov    Rd,Vn[]              DV_2B  0Q001110000iiiii 001111nnnnnddddd   0E00 3C00   Rd,Vn[]
+   
+INST1(smov,    "smov",   0, 0, IF_DV_2B,  0x0E002C00)
+                                   //  smov    Rd,Vn[]              DV_2B  0Q001110000iiiii 001011nnnnnddddd   0E00 3C00   Rd,Vn[]
+
+INST1(movi,    "movi",   0, 0, IF_DV_1B,  0x0F000400)
+                                   //  movi    Vd,imm8              DV_1B  0QX0111100000iii cmod01iiiiiddddd   0F00 0400   Vd imm8 (immediate vector) 
+   
+INST1(mvni,    "mvni",   0, 0, IF_DV_1B,  0x2F000400)
+                                   //  mvni    Vd,imm8              DV_1B  0Q10111100000iii cmod01iiiiiddddd   2F00 0400   Vd imm8 (immediate vector) 
+   
+INST1(bsl,     "bsl",    0, 0, IF_DV_3C,  0x2E601C00)
+                                   //  bsl     Vd,Vn,Vm             DV_3C  0Q101110011mmmmm 000111nnnnnddddd   2E60 1C00   Vd,Vn,Vm
+   
+INST1(bit,     "bit",    0, 0, IF_DV_3C,  0x2EA01C00)
+                                   //  bit     Vd,Vn,Vm             DV_3C  0Q101110101mmmmm 000111nnnnnddddd   2EA0 1C00   Vd,Vn,Vm
+   
+INST1(bif,     "bif",    0, 0, IF_DV_3C,  0x2EE01C00)
+                                   //  bif     Vd,Vn,Vm             DV_3C  0Q101110111mmmmm 000111nnnnnddddd   2EE0 1C00   Vd,Vn,Vm
+   
+INST1(cnt,     "cnt",    0, 0, IF_DV_2M,  0x0E205800)
+                                   //  cnt     Vd,Vn                DV_2M  0Q00111000100000 010110nnnnnddddd   0E20 5800   Vd,Vn      (vector)
+   
+INST1(not,     "not",    0, 0, IF_DV_2M,  0x2E205800)
+                                   //  not     Vd,Vn                DV_2M  0Q10111000100000 010110nnnnnddddd   2E20 5800   Vd,Vn      (vector)
+   
+INST1(fnmul,   "fnmul",  0, 0, IF_DV_3D,  0x1E208800)
+                                   //  fnmul   Vd,Vn,Vm             DV_3D  000111100X1mmmmm 100010nnnnnddddd   1E20 8800   Vd,Vn,Vm   (scalar)
+
+INST1(fmadd,   "fmadd",  0, 0, IF_DV_4A,  0x1F000000)
+                                   //  fmadd   Vd,Va,Vn,Vm          DV_4A  000111110X0mmmmm 0aaaaannnnnddddd   1F00 0000   Vd Vn Vm Va (scalar)
+   
+INST1(fmsub,   "fmsub",  0, 0, IF_DV_4A,  0x1F008000)
+                                   //  fmsub   Vd,Va,Vn,Vm          DV_4A  000111110X0mmmmm 1aaaaannnnnddddd   1F00 8000   Vd Vn Vm Va (scalar)
+   
+INST1(fnmadd,  "fnmadd", 0, 0, IF_DV_4A,  0x1F200000)
+                                   //  fnmadd  Vd,Va,Vn,Vm          DV_4A  000111110X1mmmmm 0aaaaannnnnddddd   1F20 0000   Vd Vn Vm Va (scalar)
+   
+INST1(fnmsub,  "fnmsub", 0, 0, IF_DV_4A,  0x1F208000)
+                                   //  fnmsub  Vd,Va,Vn,Vm          DV_4A  000111110X1mmmmm 1aaaaannnnnddddd   1F20 8000   Vd Vn Vm Va (scalar)
+   
+INST1(fcvt,    "fcvt",   0, 0, IF_DV_2J,  0x1E224000)
+                                   //  fcvt    Vd,Vn                DV_2J  00011110SS10001D D10000nnnnnddddd   1E22 4000   Vd,Vn   
+
+INST1(pmul,    "pmul",   0, 0, IF_DV_3A,  0x2E209C00)
+                                   //  pmul    Vd,Vn,Vm             DV_3A  0Q101110XX1mmmmm 100111nnnnnddddd   2E20 9C00   Vd,Vn,Vm  (vector)
+
+INST1(saba,    "saba",   0, 0, IF_DV_3A,  0x0E207C00)
+                                   //  saba    Vd,Vn,Vm             DV_3A  0Q001110XX1mmmmm 011111nnnnnddddd   0E20 7C00   Vd,Vn,Vm  (vector)
+
+INST1(sabd,    "sabd",   0, 0, IF_DV_3A,  0x0E207400)
+                                   //  sabd    Vd,Vn,Vm             DV_3A  0Q001110XX1mmmmm 011101nnnnnddddd   0E20 7400   Vd,Vn,Vm  (vector)
+
+INST1(uaba,    "uaba",   0, 0, IF_DV_3A,  0x2E207C00)
+                                   //  uaba    Vd,Vn,Vm             DV_3A  0Q101110XX1mmmmm 011111nnnnnddddd   2E20 7C00   Vd,Vn,Vm  (vector)
+
+INST1(uabd,    "uabd",   0, 0, IF_DV_3A,  0x2E207400)
+                                   //  uabd    Vd,Vn,Vm             DV_3A  0Q101110XX1mmmmm 011101nnnnnddddd   2E20 7400   Vd,Vn,Vm  (vector)
+
+INST1(shll,    "shll",   0, 0, IF_DV_2M,  0x2F00A400)
+                                   //  shll    Vd,Vn,imm            DV_2M  0Q101110XX100001 001110nnnnnddddd   2E21 3800   Vd,Vn, {8/16/32}
+
+INST1(shll2,   "shll2",  0, 0, IF_DV_2M,  0x6F00A400)
+                                   //  shll    Vd,Vn,imm            DV_2M  0Q101110XX100001 001110nnnnnddddd   2E21 3800   Vd,Vn, {8/16/32}
+
+INST1(sshll,   "sshll",  0, 0, IF_DV_2O,  0x0F00A400)
+                                   //  sshll   Vd,Vn,imm            DV_2O  000011110iiiiiii 101001nnnnnddddd   0F00 A400   Vd,Vn imm  (shift - vector)
+
+INST1(sshll2,  "sshll2", 0, 0, IF_DV_2O,  0x4F00A400)
+                                   //  sshll2  Vd,Vn,imm            DV_2O  010011110iiiiiii 101001nnnnnddddd   4F00 A400   Vd,Vn imm  (shift - vector)
+
+INST1(ushll,   "ushll",  0, 0, IF_DV_2O,  0x2F00A400)
+                                   //  ushll   Vd,Vn,imm            DV_2O  001011110iiiiiii 101001nnnnnddddd   2F00 A400   Vd,Vn imm  (shift - vector)
+
+INST1(ushll2,  "ushll2", 0, 0, IF_DV_2O,  0x6F00A400)
+                                   //  ushll2  Vd,Vn,imm            DV_2O  011011110iiiiiii 101001nnnnnddddd   6F00 A400   Vd,Vn imm  (shift - vector)
+
+INST1(shrn,    "shrn",   0, 0, IF_DV_2O,  0x0F008400)
+                                   //  shrn    Vd,Vn,imm            DV_2O  000011110iiiiiii 100001nnnnnddddd   0F00 8400   Vd,Vn imm  (shift - vector)
+
+INST1(shrn2,   "shrn2",  0, 0, IF_DV_2O,  0x4F008400)
+                                   //  shrn2   Vd,Vn,imm            DV_2O  010011110iiiiiii 100001nnnnnddddd   4F00 8400   Vd,Vn imm  (shift - vector)
+
+INST1(rshrn,   "rshrn",  0, 0, IF_DV_2O,  0x0F008C00)
+                                   //  rshrn   Vd,Vn,imm            DV_2O  000011110iiiiiii 100011nnnnnddddd   0F00 8C00   Vd,Vn imm  (shift - vector)
+
+INST1(rshrn2,  "rshrn2", 0, 0, IF_DV_2O,  0x4F008C00)
+                                   //  rshrn2  Vd,Vn,imm            DV_2O  010011110iiiiiii 100011nnnnnddddd   4F00 8C00   Vd,Vn imm  (shift - vector)
+
+INST1(sxtl,    "sxtl",   0, 0, IF_DV_2O,  0x0F00A400)
+                                   //  sxtl    Vd,Vn                DV_2O  000011110iiiiiii 101001nnnnnddddd   0F00 A400   Vd,Vn      (shift - vector)
+
+INST1(sxtl2,   "sxtl2",  0, 0, IF_DV_2O,  0x4F00A400)
+                                   //  sxtl2   Vd,Vn                DV_2O  010011110iiiiiii 101001nnnnnddddd   4F00 A400   Vd,Vn      (shift - vector)
+
+INST1(uxtl,    "uxtl",   0, 0, IF_DV_2O,  0x2F00A400)
+                                   //  uxtl    Vd,Vn                DV_2O  001011110iiiiiii 101001nnnnnddddd   2F00 A400   Vd,Vn      (shift - vector)
+
+INST1(uxtl2,   "uxtl2",  0, 0, IF_DV_2O,  0x6F00A400)
+                                   //  uxtl2   Vd,Vn                DV_2O  011011110iiiiiii 101001nnnnnddddd   6F00 A400   Vd,Vn      (shift - vector)
+// clang-format on
+
+/*****************************************************************************/
+#undef INST1
+#undef INST2
+#undef INST3
+#undef INST4
+#undef INST5
+#undef INST6
+#undef INST9
+/*****************************************************************************/
diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h
new file mode 100644
index 0000000000..436563babf
--- /dev/null
+++ b/src/jit/instrsxarch.h
@@ -0,0 +1,540 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+//  This file was previously known as instrs.h
+//
+/*****************************************************************************
+ *  x86 instructions for  the JIT compiler
+ *
+ *          id      -- the enum name for the instruction
+ *          nm      -- textual name (for assembly dipslay)
+ *          fp      -- 1 = floating point instruction, 0 = not floating point instruction
+ *          um      -- update mode, see IUM_xx enum (rd, wr, or rw)
+ *          rf      -- 1 = reads flags, 0 = doesn't read flags
+ *          wf      -- 1 = writes flags, 0 = doesn't write flags
+ *          mr      -- base encoding for R/M[reg] addressing mode
+ *          mi      -- base encoding for R/M,icon addressing mode
+ *          rm      -- base encoding for reg,R/M  addressing mode
+ *          a4      -- base encoding for eax,i32  addressing mode
+ *          rr      -- base encoding for register addressing mode
+ *
+******************************************************************************/
+
+// clang-format off
+#if !defined(_TARGET_XARCH_)
+  #error Unexpected target type
+#endif
+
+#ifndef INST1
+#error  At least INST1 must be defined before including this file.
+#endif
+/*****************************************************************************/
+#ifndef INST0
+#define INST0(id, nm, fp, um, rf, wf, mr                )
+#endif
+#ifndef INST2
+#define INST2(id, nm, fp, um, rf, wf, mr, mi            )
+#endif
+#ifndef INST3
+#define INST3(id, nm, fp, um, rf, wf, mr, mi, rm        )
+#endif
+#ifndef INST4
+#define INST4(id, nm, fp, um, rf, wf, mr, mi, rm, a4    )
+#endif
+#ifndef INST5
+#define INST5(id, nm, fp, um, rf, wf, mr, mi, rm, a4, rr)
+#endif
+
+/*****************************************************************************/
+/*               The following is x86-specific                               */
+/*****************************************************************************/
+
+//    enum     name            FP  updmode rf wf R/M[reg]  R/M,icon  reg,R/M   eax,i32   register
+INST5(invalid, "INVALID"      , 0, IUM_RD, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE, BAD_CODE)
+
+INST5(push   , "push"         , 0, IUM_RD, 0, 0, 0x0030FE, 0x000068, BAD_CODE, BAD_CODE, 0x000050)
+INST5(pop    , "pop"          , 0, IUM_WR, 0, 0, 0x00008E, BAD_CODE, BAD_CODE, BAD_CODE, 0x000058)
+// Does not affect the stack tracking in the emitter
+INST5(push_hide, "push"       , 0, IUM_RD, 0, 0, 0x0030FE, 0x000068, BAD_CODE, BAD_CODE, 0x000050)
+INST5(pop_hide,  "pop"        , 0, IUM_WR, 0, 0, 0x00008E, BAD_CODE, BAD_CODE, BAD_CODE, 0x000058)
+
+INST5(inc    , "inc"          , 0, IUM_RW, 0, 1, 0x0000FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x000040)
+INST5(inc_l  , "inc"          , 0, IUM_RW, 0, 1, 0x0000FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x00C0FE)
+INST5(dec    , "dec"          , 0, IUM_RW, 0, 1, 0x0008FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x000048)
+INST5(dec_l  , "dec"          , 0, IUM_RW, 0, 1, 0x0008FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x00C8FE)
+
+//    enum     name            FP  updmode rf wf R/M,R/M[reg] R/M,icon  reg,R/M   eax,i32
+
+INST4(add    , "add"          , 0, IUM_RW, 0, 1, 0x000000, 0x000080, 0x000002, 0x000004)
+INST4(or     , "or"           , 0, IUM_RW, 0, 1, 0x000008, 0x000880, 0x00000A, 0x00000C)
+INST4(adc    , "adc"          , 0, IUM_RW, 1, 1, 0x000010, 0x001080, 0x000012, 0x000014)
+INST4(sbb    , "sbb"          , 0, IUM_RW, 1, 1, 0x000018, 0x001880, 0x00001A, 0x00001C)
+INST4(and    , "and"          , 0, IUM_RW, 0, 1, 0x000020, 0x002080, 0x000022, 0x000024)
+INST4(sub    , "sub"          , 0, IUM_RW, 0, 1, 0x000028, 0x002880, 0x00002A, 0x00002C)
+INST4(xor    , "xor"          , 0, IUM_RW, 0, 1, 0x000030, 0x003080, 0x000032, 0x000034)
+INST4(cmp    , "cmp"          , 0, IUM_RD, 0, 1, 0x000038, 0x003880, 0x00003A, 0x00003C)
+INST4(test   , "test"         , 0, IUM_RD, 0, 1, 0x000084, 0x0000F6, 0x000084, 0x0000A8)
+INST4(mov    , "mov"          , 0, IUM_WR, 0, 0, 0x000088, 0x0000C6, 0x00008A, 0x0000B0)
+
+INST4(lea    , "lea"          , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, 0x00008D, BAD_CODE)
+
+//    enum     name            FP  updmode rf wf R/M,R/M[reg]  R/M,icon  reg,R/M
+
+INST3(movsx  , "movsx"        , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, 0x0F00BE)
+#ifdef _TARGET_AMD64_
+INST3(movsxd , "movsxd"       , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, 0x4800000063LL )
+#endif
+INST3(movzx  , "movzx"        , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, 0x0F00B6)
+
+INST3(cmovo  , "cmovo"        , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0040)
+INST3(cmovno , "cmovno"       , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0041)
+INST3(cmovb  , "cmovb"        , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0042)
+INST3(cmovae , "cmovae"       , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0043)
+INST3(cmove  , "cmove"        , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0044)
+INST3(cmovne , "cmovne"       , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0045)
+INST3(cmovbe , "cmovbe"       , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0046)
+INST3(cmova  , "cmova"        , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0047)
+INST3(cmovs  , "cmovs"        , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0048)
+INST3(cmovns , "cmovns"       , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F0049)
+INST3(cmovpe , "cmovpe"       , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F004A)
+INST3(cmovpo , "cmovpo"       , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F004B)
+INST3(cmovl  , "cmovl"        , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F004C)
+INST3(cmovge , "cmovge"       , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F004D)
+INST3(cmovle , "cmovle"       , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F004E)
+INST3(cmovg  , "cmovg"        , 0, IUM_WR, 1, 0, BAD_CODE, BAD_CODE, 0x0F004F)
+
+INST3(xchg   , "xchg"         , 0, IUM_RW, 0, 0, 0x000086, BAD_CODE, 0x000086)
+INST3(imul   , "imul"         , 0, IUM_RW, 0, 1, 0x0F00AC, BAD_CODE, 0x0F00AF) // op1 *= op2
+
+//    enum     name            FP  updmode rf wf R/M,R/M[reg]  R/M,icon  reg,R/M
+
+// Instead of encoding these as 3-operand instructions, we encode them
+// as 2-operand instructions with the target register being implicit
+// implicit_reg = op1*op2_icon
+#define INSTMUL INST3
+INSTMUL(imul_AX, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x000068, BAD_CODE)
+INSTMUL(imul_CX, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x000868, BAD_CODE)
+INSTMUL(imul_DX, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x001068, BAD_CODE)
+INSTMUL(imul_BX, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x001868, BAD_CODE)
+INSTMUL(imul_SP, "imul", 0, IUM_RD, 0, 1, BAD_CODE, BAD_CODE, BAD_CODE)
+INSTMUL(imul_BP, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x002868, BAD_CODE)
+INSTMUL(imul_SI, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x003068, BAD_CODE)
+INSTMUL(imul_DI, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x003868, BAD_CODE)
+
+#ifdef _TARGET_AMD64_
+
+INSTMUL(imul_08, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400000068, BAD_CODE)
+INSTMUL(imul_09, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400000868, BAD_CODE)
+INSTMUL(imul_10, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400001068, BAD_CODE)
+INSTMUL(imul_11, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400001868, BAD_CODE)
+INSTMUL(imul_12, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400002068, BAD_CODE)
+INSTMUL(imul_13, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400002868, BAD_CODE)
+INSTMUL(imul_14, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400003068, BAD_CODE)
+INSTMUL(imul_15, "imul", 0, IUM_RD, 0, 1, BAD_CODE, 0x4400003868, BAD_CODE)
+
+#endif // _TARGET_AMD64_
+
+// the hex codes in this file represent the instruction encoding as follows:
+// 0x0000ff00 - modrm byte position
+// 0x000000ff - last byte of opcode (before modrm)
+// 0x00ff0000 - first byte of opcode
+// 0xff000000 - middle byte of opcode, if needed (after first, before last)
+//
+// So a 1-byte opcode is:      and with modrm:
+//             0x00000011          0x0000RM11
+//
+// So a 2-byte opcode is:      and with modrm:
+//             0x00002211          0x0011RM22
+//
+// So a 3-byte opcode is:      and with modrm:
+//             0x00113322          0x2211RM33
+//
+// So a 4-byte opcode would be something like this:
+//             0x22114433
+
+#define PACK3(byte1,byte2,byte3) ((byte1 << 16) | (byte2 << 24) | byte3)
+#define PACK2(byte1,byte2)                       ((byte1 << 16) | byte2)
+#define SSEFLT(c) PACK3(0xf3, 0x0f, c)
+#define SSEDBL(c) PACK3(0xf2, 0x0f, c)
+#define PCKDBL(c) PACK3(0x66, 0x0f, c)
+#define PCKFLT(c) PACK2(0x0f,c)
+
+// These macros encode extra byte that is implicit in the macro.
+#define PACK4(byte1,byte2,byte3,byte4) ((byte1 << 16) | (byte2 << 24) | byte3 | (byte4 << 8))
+#define SSE38(c)   PACK4(0x66, 0x0f, 0x38, c)
+#define SSE3A(c)   PACK4(0x66, 0x0f, 0x3A, c)
+
+// VEX* encodes the implied leading opcode bytes in c1:
+// 1: implied 0f, 2: implied 0f 38, 3: implied 0f 3a
+#define VEX2INT(c1,c2)   PACK3(c1, 0xc5, c2)
+#define VEX3INT(c1,c2)   PACK4(c1, 0xc5, 0x02, c2)
+#define VEX3FLT(c1,c2)   PACK4(c1, 0xc5, 0x02, c2)
+
+//  Please insert any SSE2 instructions between FIRST_SSE2_INSTRUCTION and LAST_SSE2_INSTRUCTION
+INST3(FIRST_SSE2_INSTRUCTION, "FIRST_SSE2_INSTRUCTION",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
+
+// These are the SSE instructions used on x86
+INST3( mov_i2xmm,   "movd"        , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, PCKDBL(0x6E)) // Move int reg to a xmm reg. reg1=xmm reg, reg2=int reg 
+INST3( mov_xmm2i,   "movd"        , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, PCKDBL(0x7E)) // Move xmm reg to an int reg. reg1=xmm reg, reg2=int reg 
+INST3( movq,        "movq"        , 0, IUM_WR, 0, 0, PCKDBL(0xD6), BAD_CODE, SSEFLT(0x7E))
+INST3( movsdsse2,   "movsd"       , 0, IUM_WR, 0, 0, SSEDBL(0x11), BAD_CODE, SSEDBL(0x10))
+
+INST3( punpckldq,   "punpckldq"   , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, PCKDBL(0x62))
+
+INST3( xorps,       "xorps"       , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, PCKFLT(0x57)) // XOR packed singles
+
+INST3( cvttsd2si,   "cvttsd2si"   , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSEDBL(0x2C)) // cvt with trunc scalar double to signed DWORDs
+
+#ifndef LEGACY_BACKEND
+INST3( movdqu,      "movdqu"      , 0, IUM_WR, 0, 0, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F))
+INST3( movdqa,      "movdqa"      , 0, IUM_WR, 0, 0, PCKDBL(0x7F), BAD_CODE, PCKDBL(0x6F))
+INST3( movlpd,      "movlpd"      , 0, IUM_WR, 0, 0, PCKDBL(0x13), BAD_CODE, PCKDBL(0x12))
+INST3( movlps,      "movlps"      , 0, IUM_WR, 0, 0, PCKFLT(0x13), BAD_CODE, PCKFLT(0x12))
+INST3( movhpd,      "movhpd"      , 0, IUM_WR, 0, 0, PCKDBL(0x17), BAD_CODE, PCKDBL(0x16))
+INST3( movhps,      "movhps"      , 0, IUM_WR, 0, 0, PCKFLT(0x17), BAD_CODE, PCKFLT(0x16))
+INST3( movss,       "movss"       , 0, IUM_WR, 0, 0, SSEFLT(0x11), BAD_CODE, SSEFLT(0x10))
+INST3( movapd,      "movapd"      , 0, IUM_WR, 0, 0, PCKDBL(0x29), BAD_CODE, PCKDBL(0x28))
+INST3( movaps,      "movaps"      , 0, IUM_WR, 0, 0, PCKFLT(0x29), BAD_CODE, PCKFLT(0x28))
+INST3( movupd,      "movupd"      , 0, IUM_WR, 0, 0, PCKDBL(0x11), BAD_CODE, PCKDBL(0x10))
+INST3( movups,      "movups"      , 0, IUM_WR, 0, 0, PCKFLT(0x11), BAD_CODE, PCKFLT(0x10))
+
+INST3( shufps,      "shufps"      , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, PCKFLT(0xC6))
+INST3( shufpd,      "shufpd"      , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, PCKDBL(0xC6))
+       
+// SSE 2 arith
+INST3( addps,  "addps",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x58))    // Add packed singles
+INST3( addss,  "addss",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x58))    // Add scalar singles
+INST3( addpd,  "addpd",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x58))    // Add packed doubles
+INST3( addsd,  "addsd",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x58))    // Add scalar doubles
+INST3( mulps,  "mulps",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x59))    // Multiply packed singles
+INST3( mulss,  "mulss",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x59))    // Multiply scalar single
+INST3( mulpd,  "mulpd",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x59))    // Multiply packed doubles
+INST3( mulsd,  "mulsd",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x59))    // Multiply scalar doubles
+INST3( subps,  "subps",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x5C))    // Subtract packed singles
+INST3( subss,  "subss",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x5C))    // Subtract scalar singles
+INST3( subpd,  "subpd",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x5C))    // Subtract packed doubles
+INST3( subsd,  "subsd",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x5C))    // Subtract scalar doubles
+INST3( minps,  "minps",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x5D))    // Return Minimum packed singles
+INST3( minss,  "minss",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x5D))    // Return Minimum scalar single
+INST3( minpd,  "minpd",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x5D))    // Return Minimum packed doubles
+INST3( minsd,  "minsd",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x5D))    // Return Minimum scalar double
+INST3( divps,  "divps",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x5E))    // Divide packed singles
+INST3( divss,  "divss",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x5E))    // Divide scalar singles
+INST3( divpd,  "divpd",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x5E))    // Divide packed doubles
+INST3( divsd,  "divsd",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x5E))    // Divide scalar doubles
+INST3( maxps,  "maxps",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x5F))    // Return Maximum packed singles
+INST3( maxss,  "maxss",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x5F))    // Return Maximum scalar single
+INST3( maxpd,  "maxpd",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x5F))    // Return Maximum packed doubles
+INST3( maxsd,  "maxsd",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x5F))    // Return Maximum scalar double
+INST3( xorpd,  "xorpd",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x57))    // XOR packed doubles
+INST3( andps,  "andps",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x54))    // AND packed singles
+INST3( andpd,  "andpd",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x54))    // AND packed doubles
+INST3( sqrtsd, "sqrtsd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x51))    // Sqrt of a scalar double
+INST3( sqrtps, "sqrtps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x51))    // Sqrt of a packed float
+INST3( sqrtpd, "sqrtpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x51))    // Sqrt of a packed double
+INST3( andnps, "andnps", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x55))    // And-Not packed singles
+INST3( andnpd, "andnpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x55))    // And-Not packed doubles
+INST3( orps,   "orps",   0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x56))    // Or packed singles
+INST3( orpd,   "orpd",   0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x56))    // Or packed doubles
+INST3( haddpd, "haddpd", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x7C))    // Horizontal add packed doubles
+
+// SSE2 conversions
+INST3( cvtpi2ps,  "cvtpi2ps",   0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x2A))   // cvt packed DWORDs to singles
+INST3( cvtsi2ss,  "cvtsi2ss",   0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x2A))   // cvt DWORD to scalar single
+INST3( cvtpi2pd,  "cvtpi2pd",   0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x2A))   // cvt packed DWORDs to doubles
+INST3( cvtsi2sd,  "cvtsi2sd",   0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x2A))   // cvt DWORD to scalar double
+INST3( cvttps2pi, "cvttps2pi",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x2C))   // cvt with trunc packed singles to DWORDs
+INST3( cvttss2si, "cvttss2si",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x2C))   // cvt with trunc scalar single to DWORD
+INST3( cvttpd2pi, "cvttpd2pi",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x2C))   // cvt with trunc packed doubles to DWORDs
+INST3( cvtps2pi,  "cvtps2pi",   0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x2D))   // cvt packed singles to DWORDs
+INST3( cvtss2si,  "cvtss2si",   0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x2D))   // cvt scalar single to DWORD
+INST3( cvtpd2pi,  "cvtpd2pi",   0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x2D))   // cvt packed doubles to DWORDs
+INST3( cvtsd2si,  "cvtsd2si",   0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x2D))   // cvt scalar double to DWORD
+INST3( cvtps2pd,  "cvtps2pd",   0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x5A))   // cvt packed singles to doubles
+INST3( cvtpd2ps,  "cvtpd2ps",   0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x5A))   // cvt packed doubles to singles
+INST3( cvtss2sd,  "cvtss2sd",   0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x5A))   // cvt scalar single to scalar doubles
+INST3( cvtsd2ss,  "cvtsd2ss",   0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0x5A))   // cvt scalar double to scalar singles
+INST3( cvtdq2ps,  "cvtdq2ps",   0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x5B))   // cvt packed DWORDs to singles
+INST3( cvtps2dq,  "cvtps2dq",   0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x5B))   // cvt packed singles to DWORDs
+INST3( cvttps2dq, "cvttps2dq",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0x5B))   // cvt with trunc packed singles to DWORDs
+INST3( cvtpd2dq,  "cvtpd2dq",   0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEDBL(0xE6))   // cvt packed doubles to DWORDs
+INST3( cvttpd2dq, "cvttpd2dq",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xE6))   // cvt with trunc packed doubles to DWORDs
+INST3( cvtdq2pd,  "cvtdq2pd",   0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSEFLT(0xE6))   // cvt packed DWORDs to doubles
+
+// SSE2 comparison instructions
+INST3( ucomiss,   "ucomiss",    0, IUM_RD, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0x2E))    // unordered compare singles
+INST3( ucomisd,   "ucomisd",    0, IUM_RD, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x2E))    // unordered compare doubles
+
+// SSE2 packed single/double comparison operations.
+// Note that these instructions not only compare but also overwrite the first source.
+INST3( cmpps,     "cmpps",      0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKFLT(0xC2))    // compare packed singles
+INST3( cmppd,     "cmppd",      0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0xC2))    // compare packed doubles
+
+//SSE2 packed integer operations
+INST3( paddb,       "paddb"       , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0xFC))   // Add packed byte integers
+INST3( paddw,       "paddw"       , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0xFD))   // Add packed word (16-bit) integers
+INST3( paddd,       "paddd"       , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0xFE))   // Add packed double-word (32-bit) integers
+INST3( paddq,       "paddq"       , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0xD4))   // Add packed quad-word (64-bit) integers
+INST3( psubb,       "psubb"       , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0xF8))   // Subtract packed word (16-bit) integers
+INST3( psubw,       "psubw"       , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0xF9))   // Subtract packed word (16-bit) integers
+INST3( psubd,       "psubd"       , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0xFA))   // Subtract packed double-word (32-bit) integers
+INST3( psubq,       "psubq"       , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0xFB))   // subtract packed quad-word (64-bit) integers
+INST3( pmuludq,     "pmuludq"     , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0xF4))   // packed multiply 32-bit unsigned integers and store 64-bit result
+INST3( pmullw,      "pmullw"      , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0xD5))   // Packed multiply 16 bit unsigned integers and store lower 16 bits of each result
+INST3( pand,        "pand"        , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0xDB))   // Packed bit-wise AND of two xmm regs
+INST3( pandn,       "pandn"       , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0xDF))   // Packed bit-wise AND NOT of two xmm regs
+INST3( por,         "por"         , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0xEB))   // Packed bit-wise OR of two xmm regs
+INST3( pxor,        "pxor"        , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0xEF))   // Packed bit-wise XOR of two xmm regs
+INST3( psrldq,      "psrldq"      , 0, IUM_WR, 0, 0, BAD_CODE,     PCKDBL(0x73),  BAD_CODE    )   // Shift right logical of xmm reg by given number of bytes
+INST3( pslldq,      "pslldq"      , 0, IUM_WR, 0, 0, BAD_CODE,     PCKDBL(0x73),  BAD_CODE    )   // Shift left logical of xmm reg by given number of bytes
+INST3( pmaxub,      "pmaxub"      , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0xDE))   // packed maximum unsigned bytes
+INST3( pminub,      "pminub"      , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0xDA))   // packed minimum unsigned bytes
+INST3( pmaxsw,      "pmaxsw"      , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0xEE))   // packed maximum signed words
+INST3( pminsw,      "pminsw"      , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0xEA))   // packed minimum signed words
+INST3( pcmpeqd,     "pcmpeqd"     , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0x76))   // Packed compare 32-bit integers for equality
+INST3( pcmpgtd,     "pcmpgtd"     , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0x66))   // Packed compare 32-bit signed integers for greater than
+INST3( pcmpeqw,     "pcmpeqw"     , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0x75))   // Packed compare 16-bit integers for equality
+INST3( pcmpgtw,     "pcmpgtw"     , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0x65))   // Packed compare 16-bit signed integers for greater than
+INST3( pcmpeqb,     "pcmpeqb"     , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0x74))   // Packed compare 8-bit integers for equality
+INST3( pcmpgtb,     "pcmpgtb"     , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0x64))   // Packed compare 8-bit signed integers for greater than
+
+INST3( pshufd,      "pshufd"      , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0x70))   // Packed shuffle of 32-bit integers
+INST3( pextrw,      "pextrw"      , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0xC5))   // Extract 16-bit value into a r32 with zero extended to 32-bits
+INST3( pinsrw,      "pinsrw"      , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,      PCKDBL(0xC4))   // packed insert word
+
+#endif // !LEGACY_BACKEND
+INST3(LAST_SSE2_INSTRUCTION, "LAST_SSE2_INSTRUCTION",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
+
+#ifndef LEGACY_BACKEND
+INST3(FIRST_SSE4_INSTRUCTION, "FIRST_SSE4_INSTRUCTION",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
+// Most of the following instructions should be included in the method Is4ByteAVXInstruction()
+//    enum           name           FP updmode rf wf    MR            MI        RM
+INST3( dpps,         "dpps"        , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE3A(0x40))   // Packed bit-wise AND NOT of two xmm regs
+INST3( dppd,         "dppd"        , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE3A(0x41))   // Packed bit-wise AND NOT of two xmm regs
+INST3( insertps,     "insertps"    , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE3A(0x21))   // Insert packed single precision float value
+INST3( pcmpeqq,      "pcmpeqq"     , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE38(0x29))   // Packed compare 64-bit integers for equality
+INST3( pcmpgtq,      "pcmpgtq"     , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE38(0x37))   // Packed compare 64-bit integers for equality
+INST3( pmulld,       "pmulld"      , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE38(0x40))   // Packed multiply 32 bit unsigned integers and store lower 32 bits of each result
+INST3(LAST_SSE4_INSTRUCTION, "LAST_SSE4_INSTRUCTION",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
+
+INST3(FIRST_AVX_INSTRUCTION, "FIRST_AVX_INSTRUCTION",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
+// AVX only instructions
+INST3( vbroadcastss, "broadcastss" , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE38(0x18))   // Broadcast float value read from memory to entire ymm register
+INST3( vbroadcastsd, "broadcastsd" , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE38(0x19))   // Broadcast float value read from memory to entire ymm register
+INST3( vpbroadcastb, "pbroadcastb" , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE38(0x78))   // Broadcast int8 value from reg/memory to entire ymm register
+INST3( vpbroadcastw, "pbroadcastw" , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE38(0x79))   // Broadcast int16 value from reg/memory to entire ymm register
+INST3( vpbroadcastd, "pbroadcastd" , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE38(0x58))   // Broadcast int32 value from reg/memory to entire ymm register
+INST3( vpbroadcastq, "pbroadcastq" , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE38(0x59))   // Broadcast int64 value from reg/memory to entire ymm register
+INST3( vextractf128, "extractf128" , 0, IUM_WR, 0, 0, SSE3A(0x19),  BAD_CODE, BAD_CODE)      // Extract 128-bit packed floating point values
+INST3( vinsertf128,  "insertf128"  , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE3A(0x18))   // Insert 128-bit packed floating point values
+INST3( vzeroupper,   "zeroupper"   , 0, IUM_WR, 0, 0, 0xC577F8,     BAD_CODE, BAD_CODE)      // Zero upper 128-bits of all YMM regs (includes 2-byte fixed VEX prefix)
+
+INST3(LAST_AVX_INSTRUCTION, "LAST_AVX_INSTRUCTION",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
+#endif // !LEGACY_BACKEND
+//    enum     name            FP  updmode rf wf R/M,R/M[reg]  R/M,icon
+
+INST2(ret    , "ret"          , 0, IUM_RD, 0, 0, 0x0000C3, 0x0000C2)
+INST2(loop   , "loop"         , 0, IUM_RD, 0, 0, BAD_CODE, 0x0000E2)
+INST2(call   , "call"         , 0, IUM_RD, 0, 1, 0x0010FF, 0x0000E8)
+
+INST2(rol    , "rol"          , 0, IUM_RW, 0, 1, 0x0000D2, BAD_CODE)
+INST2(rol_1  , "rol"          , 0, IUM_RW, 0, 1, 0x0000D0, 0x0000D0)
+INST2(rol_N  , "rol"          , 0, IUM_RW, 0, 1, 0x0000C0, 0x0000C0)
+INST2(ror    , "ror"          , 0, IUM_RW, 0, 1, 0x0008D2, BAD_CODE)
+INST2(ror_1  , "ror"          , 0, IUM_RW, 0, 1, 0x0008D0, 0x0008D0)
+INST2(ror_N  , "ror"          , 0, IUM_RW, 0, 1, 0x0008C0, 0x0008C0)
+
+INST2(rcl    , "rcl"          , 0, IUM_RW, 1, 1, 0x0010D2, BAD_CODE)
+INST2(rcl_1  , "rcl"          , 0, IUM_RW, 1, 1, 0x0010D0, 0x0010D0)
+INST2(rcl_N  , "rcl"          , 0, IUM_RW, 1, 1, 0x0010C0, 0x0010C0)
+INST2(rcr    , "rcr"          , 0, IUM_RW, 1, 1, 0x0018D2, BAD_CODE)
+INST2(rcr_1  , "rcr"          , 0, IUM_RW, 1, 1, 0x0018D0, 0x0018D0)
+INST2(rcr_N  , "rcr"          , 0, IUM_RW, 1, 1, 0x0018C0, 0x0018C0)
+INST2(shl    , "shl"          , 0, IUM_RW, 0, 1, 0x0020D2, BAD_CODE)
+INST2(shl_1  , "shl"          , 0, IUM_RW, 0, 1, 0x0020D0, 0x0020D0)
+INST2(shl_N  , "shl"          , 0, IUM_RW, 0, 1, 0x0020C0, 0x0020C0)
+INST2(shr    , "shr"          , 0, IUM_RW, 0, 1, 0x0028D2, BAD_CODE)
+INST2(shr_1  , "shr"          , 0, IUM_RW, 0, 1, 0x0028D0, 0x0028D0)
+INST2(shr_N  , "shr"          , 0, IUM_RW, 0, 1, 0x0028C0, 0x0028C0)
+INST2(sar    , "sar"          , 0, IUM_RW, 0, 1, 0x0038D2, BAD_CODE)
+INST2(sar_1  , "sar"          , 0, IUM_RW, 0, 1, 0x0038D0, 0x0038D0)
+INST2(sar_N  , "sar"          , 0, IUM_RW, 0, 1, 0x0038C0, 0x0038C0)
+
+
+//    enum     name            FP  updmode rf wf R/M,R/M[reg]
+
+INST1(r_movsb, "rep movsb"    , 0, IUM_RD, 0, 0, 0x00A4F3)
+INST1(r_movsd, "rep movsd"    , 0, IUM_RD, 0, 0, 0x00A5F3)
+#ifndef LEGACY_BACKEND
+INST1(r_movsq, "rep movsq"    , 0, IUM_RD, 0, 0, 0xF3A548)
+#endif // !LEGACY_BACKEND
+INST1(movsb  , "movsb"        , 0, IUM_RD, 0, 0, 0x0000A4)
+INST1(movsd  , "movsd"        , 0, IUM_RD, 0, 0, 0x0000A5)
+#ifndef LEGACY_BACKEND
+INST1(movsq, "movsq"          , 0, IUM_RD, 0, 0, 0x00A548)
+#endif // !LEGACY_BACKEND
+
+INST1(r_stosb, "rep stosb"    , 0, IUM_RD, 0, 0, 0x00AAF3)
+INST1(r_stosd, "rep stosd"    , 0, IUM_RD, 0, 0, 0x00ABF3)
+#ifndef LEGACY_BACKEND
+INST1(r_stosq, "rep stosq"    , 0, IUM_RD, 0, 0, 0xF3AB48)
+#endif // !LEGACY_BACKEND
+INST1(stosb,   "stosb"        , 0, IUM_RD, 0, 0, 0x0000AA)
+INST1(stosd,   "stosd"        , 0, IUM_RD, 0, 0, 0x0000AB)
+#ifndef LEGACY_BACKEND
+INST1(stosq,   "stosq"        , 0, IUM_RD, 0, 0, 0x00AB48)
+#endif // !LEGACY_BACKEND
+
+INST1(int3   , "int3"         , 0, IUM_RD, 0, 0, 0x0000CC)
+INST1(nop    , "nop"          , 0, IUM_RD, 0, 0, 0x000090)
+INST1(lock   , "lock"         , 0, IUM_RD, 0, 0, 0x0000F0)
+INST1(leave  , "leave"        , 0, IUM_RD, 0, 0, 0x0000C9)
+
+
+INST1(neg    , "neg"          , 0, IUM_RW, 0, 1, 0x0018F6)
+INST1(not    , "not"          , 0, IUM_RW, 0, 1, 0x0010F6)
+
+INST1(cdq    , "cdq"          , 0, IUM_RD, 0, 1, 0x000099)
+INST1(idiv   , "idiv"         , 0, IUM_RD, 0, 1, 0x0038F6)
+INST1(imulEAX, "imul"         , 0, IUM_RD, 0, 1, 0x0028F6) // edx:eax = eax*op1
+INST1(div    , "div"          , 0, IUM_RD, 0, 1, 0x0030F6)
+INST1(mulEAX , "mul"          , 0, IUM_RD, 0, 1, 0x0020F6)
+
+INST1(sahf   , "sahf"         , 0, IUM_RD, 0, 1, 0x00009E)
+
+INST1(xadd   , "xadd"         , 0, IUM_RW, 0, 1, 0x0F00C0)
+INST1(cmpxchg, "cmpxchg"      , 0, IUM_RW, 0, 1, 0x0F00B0)
+
+INST1(shld   , "shld"         , 0, IUM_RW, 0, 1, 0x0F00A4)
+INST1(shrd   , "shrd"         , 0, IUM_RW, 0, 1, 0x0F00AC)
+
+// For RyuJIT/x86, we follow the x86 calling convention that requires
+// us to return floating point value on the x87 FP stack, so we need
+// these instructions regardless of whether we're using full stack fp.
+#ifdef _TARGET_X86_
+INST1(fld    , "fld"          , 1, IUM_WR, 0, 0, 0x0000D9)
+INST1(fstp   , "fstp"         , 1, IUM_WR, 0, 0, 0x0018D9)
+#endif // _TARGET_X86
+
+#if FEATURE_STACK_FP_X87
+INST1(fnstsw , "fnstsw"       , 1, IUM_WR, 1, 0, 0x0020DF)
+INST1(fcom   , "fcom"         , 1, IUM_RD, 0, 1, 0x0010D8)
+INST1(fcomp  , "fcomp"        , 1, IUM_RD, 0, 1, 0x0018D8)
+INST1(fcompp , "fcompp"       , 1, IUM_RD, 0, 1, 0x00D9DE)
+INST1(fcomi  , "fcomi"        , 1, IUM_RD, 0, 1, 0x00F0DB)
+INST1(fcomip , "fcomip"       , 1, IUM_RD, 0, 1, 0x00F0DF)
+
+INST1(fchs   , "fchs"         , 1, IUM_RW, 0, 1, 0x00E0D9)
+INST1(fabs   , "fabs"         , 1, IUM_RW, 0, 1, 0x00E1D9)
+INST1(fsin   , "fsin"         , 1, IUM_RW, 0, 1, 0x00FED9)
+INST1(fcos   , "fcos"         , 1, IUM_RW, 0, 1, 0x00FFD9)
+INST1(fsqrt  , "fsqrt"        , 1, IUM_RW, 0, 1, 0x00FAD9)
+INST1(fldl2e , "fldl2e"       , 1, IUM_RW, 0, 1, 0x00EAD9)
+INST1(frndint, "frndint"      , 1, IUM_RW, 0, 1, 0x00FCD9)
+INST1(f2xm1  , "f2xm1"        , 1, IUM_RW, 0, 1, 0x00F0D9)
+INST1(fscale , "fscale"       , 1, IUM_RW, 0, 1, 0x00FDD9)
+
+INST1(fld1   , "fld1"         , 1, IUM_WR, 0, 0, 0x00E8D9)
+INST1(fldz   , "fldz"         , 1, IUM_WR, 0, 0, 0x00EED9)
+INST1(fst    , "fst"          , 1, IUM_WR, 0, 0, 0x0010D9)
+
+INST1(fadd   , "fadd"         , 1, IUM_RW, 0, 0, 0x0000D8)
+INST1(faddp  , "faddp"        , 1, IUM_RW, 0, 0, 0x0000DA)
+INST1(fsub   , "fsub"         , 1, IUM_RW, 0, 0, 0x0020D8)
+INST1(fsubp  , "fsubp"        , 1, IUM_RW, 0, 0, 0x0028DA)
+INST1(fsubr  , "fsubr"        , 1, IUM_RW, 0, 0, 0x0028D8)
+INST1(fsubrp , "fsubrp"       , 1, IUM_RW, 0, 0, 0x0020DA)
+INST1(fmul   , "fmul"         , 1, IUM_RW, 0, 0, 0x0008D8)
+INST1(fmulp  , "fmulp"        , 1, IUM_RW, 0, 0, 0x0008DA)
+INST1(fdiv   , "fdiv"         , 1, IUM_RW, 0, 0, 0x0030D8)
+INST1(fdivp  , "fdivp"        , 1, IUM_RW, 0, 0, 0x0038DA)
+INST1(fdivr  , "fdivr"        , 1, IUM_RW, 0, 0, 0x0038D8)
+INST1(fdivrp , "fdivrp"       , 1, IUM_RW, 0, 0, 0x0030DA)
+
+INST1(fxch   , "fxch"         , 1, IUM_RW, 0, 0, 0x00C8D9)
+INST1(fprem  , "fprem"        , 0, IUM_RW, 0, 1, 0x00F8D9)
+
+INST1(fild   , "fild"         , 1, IUM_RD, 0, 0, 0x0000DB)
+INST1(fildl  , "fild"         , 1, IUM_RD, 0, 0, 0x0028DB)
+INST1(fistp  , "fistp"        , 1, IUM_WR, 0, 0, 0x0018DB)
+INST1(fistpl , "fistp"        , 1, IUM_WR, 0, 0, 0x0038DB)
+
+INST1(fldcw  , "fldcw"        , 1, IUM_RD, 0, 0, 0x0028D9)
+INST1(fnstcw , "fnstcw"       , 1, IUM_WR, 0, 0, 0x0038D9)
+#endif // FEATURE_STACK_FP_X87
+
+INST1(seto   , "seto"         , 0, IUM_WR, 1, 0, 0x0F0090)
+INST1(setno  , "setno"        , 0, IUM_WR, 1, 0, 0x0F0091)
+INST1(setb   , "setb"         , 0, IUM_WR, 1, 0, 0x0F0092)
+INST1(setae  , "setae"        , 0, IUM_WR, 1, 0, 0x0F0093)
+INST1(sete   , "sete"         , 0, IUM_WR, 1, 0, 0x0F0094)
+INST1(setne  , "setne"        , 0, IUM_WR, 1, 0, 0x0F0095)
+INST1(setbe  , "setbe"        , 0, IUM_WR, 1, 0, 0x0F0096)
+INST1(seta   , "seta"         , 0, IUM_WR, 1, 0, 0x0F0097)
+INST1(sets   , "sets"         , 0, IUM_WR, 1, 0, 0x0F0098)
+INST1(setns  , "setns"        , 0, IUM_WR, 1, 0, 0x0F0099)
+INST1(setpe  , "setpe"        , 0, IUM_WR, 1, 0, 0x0F009A)
+INST1(setpo  , "setpo"        , 0, IUM_WR, 1, 0, 0x0F009B)
+INST1(setl   , "setl"         , 0, IUM_WR, 1, 0, 0x0F009C)
+INST1(setge  , "setge"        , 0, IUM_WR, 1, 0, 0x0F009D)
+INST1(setle  , "setle"        , 0, IUM_WR, 1, 0, 0x0F009E)
+INST1(setg   , "setg"         , 0, IUM_WR, 1, 0, 0x0F009F)
+
+#ifdef _TARGET_AMD64_
+// A jump with rex prefix. This is used for register indirect
+// tail calls.
+INST1(rex_jmp, "rex.jmp"      , 0, IUM_RD, 0, 0, 0x0020FE)
+#endif
+
+INST1(i_jmp  , "jmp"          , 0, IUM_RD, 0, 0, 0x0020FE)
+
+INST0(jmp    , "jmp"          , 0, IUM_RD, 0, 0, 0x0000EB)
+INST0(jo     , "jo"           , 0, IUM_RD, 1, 0, 0x000070)
+INST0(jno    , "jno"          , 0, IUM_RD, 1, 0, 0x000071)
+INST0(jb     , "jb"           , 0, IUM_RD, 1, 0, 0x000072)
+INST0(jae    , "jae"          , 0, IUM_RD, 1, 0, 0x000073)
+INST0(je     , "je"           , 0, IUM_RD, 1, 0, 0x000074)
+INST0(jne    , "jne"          , 0, IUM_RD, 1, 0, 0x000075)
+INST0(jbe    , "jbe"          , 0, IUM_RD, 1, 0, 0x000076)
+INST0(ja     , "ja"           , 0, IUM_RD, 1, 0, 0x000077)
+INST0(js     , "js"           , 0, IUM_RD, 1, 0, 0x000078)
+INST0(jns    , "jns"          , 0, IUM_RD, 1, 0, 0x000079)
+INST0(jpe    , "jpe"          , 0, IUM_RD, 1, 0, 0x00007A)
+INST0(jpo    , "jpo"          , 0, IUM_RD, 1, 0, 0x00007B)
+INST0(jl     , "jl"           , 0, IUM_RD, 1, 0, 0x00007C)
+INST0(jge    , "jge"          , 0, IUM_RD, 1, 0, 0x00007D)
+INST0(jle    , "jle"          , 0, IUM_RD, 1, 0, 0x00007E)
+INST0(jg     , "jg"           , 0, IUM_RD, 1, 0, 0x00007F)
+
+INST0(l_jmp  , "jmp"          , 0, IUM_RD, 0, 0, 0x0000E9)
+INST0(l_jo   , "jo"           , 0, IUM_RD, 1, 0, 0x00800F)
+INST0(l_jno  , "jno"          , 0, IUM_RD, 1, 0, 0x00810F)
+INST0(l_jb   , "jb"           , 0, IUM_RD, 1, 0, 0x00820F)
+INST0(l_jae  , "jae"          , 0, IUM_RD, 1, 0, 0x00830F)
+INST0(l_je   , "je"           , 0, IUM_RD, 1, 0, 0x00840F)
+INST0(l_jne  , "jne"          , 0, IUM_RD, 1, 0, 0x00850F)
+INST0(l_jbe  , "jbe"          , 0, IUM_RD, 1, 0, 0x00860F)
+INST0(l_ja   , "ja"           , 0, IUM_RD, 1, 0, 0x00870F)
+INST0(l_js   , "js"           , 0, IUM_RD, 1, 0, 0x00880F)
+INST0(l_jns  , "jns"          , 0, IUM_RD, 1, 0, 0x00890F)
+INST0(l_jpe  , "jpe"          , 0, IUM_RD, 1, 0, 0x008A0F)
+INST0(l_jpo  , "jpo"          , 0, IUM_RD, 1, 0, 0x008B0F)
+INST0(l_jl   , "jl"           , 0, IUM_RD, 1, 0, 0x008C0F)
+INST0(l_jge  , "jge"          , 0, IUM_RD, 1, 0, 0x008D0F)
+INST0(l_jle  , "jle"          , 0, IUM_RD, 1, 0, 0x008E0F)
+INST0(l_jg   , "jg"           , 0, IUM_RD, 1, 0, 0x008F0F)
+
+INST0(align  , "align"        , 0, IUM_RD, 0, 0, BAD_CODE)
+
+/*****************************************************************************/
+#undef  INST0
+#undef  INST1
+#undef  INST2
+#undef  INST3
+#undef  INST4
+#undef  INST5
+/*****************************************************************************/
+
+// clang-format on
diff --git a/src/jit/jit.h b/src/jit/jit.h
new file mode 100644
index 0000000000..7bf5cd4051
--- /dev/null
+++ b/src/jit/jit.h
@@ -0,0 +1,891 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+#ifndef _JIT_H_
+#define _JIT_H_
+/*****************************************************************************/
+
+//
+// clr.sln only defines _DEBUG
+// The jit uses DEBUG rather than _DEBUG
+// So we make sure that _DEBUG implies DEBUG
+//
+#ifdef _DEBUG
+#ifndef DEBUG
+#define DEBUG 1
+#endif
+#endif
+
+// Clang-format messes with the indentation of comments if they directly precede an
+// ifdef. This macro allows us to anchor the comments to the regular flow of code.
+#define CLANG_FORMAT_COMMENT_ANCHOR ;
+
+// Clang-tidy replaces 0 with nullptr in some templated functions, causing a build
+// break. Replacing those instances with ZERO avoids this change
+#define ZERO 0
+
+#ifdef _MSC_VER
+// These don't seem useful, so turning them off is no big deal
+#pragma warning(disable : 4510) // can't generate default constructor
+#pragma warning(disable : 4511) // can't generate copy constructor
+#pragma warning(disable : 4512) // can't generate assignment constructor
+#pragma warning(disable : 4610) // user defined constructor required
+#pragma warning(disable : 4211) // nonstandard extention used (char name[0] in structs)
+#pragma warning(disable : 4127) // conditional expression constant
+#pragma warning(disable : 4201) // "nonstandard extension used : nameless struct/union"
+
+// Depending on the code base, you may want to not disable these
+#pragma warning(disable : 4245) // assigning signed / unsigned
+#pragma warning(disable : 4146) // unary minus applied to unsigned
+
+#pragma warning(disable : 4100) // unreferenced formal parameter
+#pragma warning(disable : 4291) // new operator without delete (only in emitX86.cpp)
+#endif
+
+#ifdef _MSC_VER
+#define CHECK_STRUCT_PADDING 0 // Set this to '1' to enable warning C4820 "'bytes' bytes padding added after
+                               // construct 'member_name'" on interesting structs/classes
+#else
+#define CHECK_STRUCT_PADDING 0 // Never enable it for non-MSFT compilers
+#endif
+
+#if defined(_X86_)
+#if defined(_ARM_)
+#error Cannot define both _X86_ and _ARM_
+#endif
+#if defined(_AMD64_)
+#error Cannot define both _X86_ and _AMD64_
+#endif
+#if defined(_ARM64_)
+#error Cannot define both _X86_ and _ARM64_
+#endif
+#define _HOST_X86_
+#elif defined(_AMD64_)
+#if defined(_X86_)
+#error Cannot define both _AMD64_ and _X86_
+#endif
+#if defined(_ARM_)
+#error Cannot define both _AMD64_ and _ARM_
+#endif
+#if defined(_ARM64_)
+#error Cannot define both _AMD64_ and _ARM64_
+#endif
+#define _HOST_AMD64_
+#elif defined(_ARM_)
+#if defined(_X86_)
+#error Cannot define both _ARM_ and _X86_
+#endif
+#if defined(_AMD64_)
+#error Cannot define both _ARM_ and _AMD64_
+#endif
+#if defined(_ARM64_)
+#error Cannot define both _ARM_ and _ARM64_
+#endif
+#define _HOST_ARM_
+#elif defined(_ARM64_)
+#if defined(_X86_)
+#error Cannot define both _ARM64_ and _X86_
+#endif
+#if defined(_AMD64_)
+#error Cannot define both _ARM64_ and _AMD64_
+#endif
+#if defined(_ARM_)
+#error Cannot define both _ARM64_ and _ARM_
+#endif
+#define _HOST_ARM64_
+#else
+#error Unsupported or unset host architecture
+#endif
+
+#if defined(_HOST_AMD64_) || defined(_HOST_ARM64_)
+#define _HOST_64BIT_
+#endif
+
+#if defined(_TARGET_X86_)
+#if defined(_TARGET_ARM_)
+#error Cannot define both _TARGET_X86_ and _TARGET_ARM_
+#endif
+#if defined(_TARGET_AMD64_)
+#error Cannot define both _TARGET_X86_ and _TARGET_AMD64_
+#endif
+#if defined(_TARGET_ARM64_)
+#error Cannot define both _TARGET_X86_ and _TARGET_ARM64_
+#endif
+#if !defined(_HOST_X86_)
+#define _CROSS_COMPILER_
+#endif
+#elif defined(_TARGET_AMD64_)
+#if defined(_TARGET_X86_)
+#error Cannot define both _TARGET_AMD64_ and _TARGET_X86_
+#endif
+#if defined(_TARGET_ARM_)
+#error Cannot define both _TARGET_AMD64_ and _TARGET_ARM_
+#endif
+#if defined(_TARGET_ARM64_)
+#error Cannot define both _TARGET_AMD64_ and _TARGET_ARM64_
+#endif
+#if !defined(_HOST_AMD64_)
+#define _CROSS_COMPILER_
+#endif
+#elif defined(_TARGET_ARM_)
+#if defined(_TARGET_X86_)
+#error Cannot define both _TARGET_ARM_ and _TARGET_X86_
+#endif
+#if defined(_TARGET_AMD64_)
+#error Cannot define both _TARGET_ARM_ and _TARGET_AMD64_
+#endif
+#if defined(_TARGET_ARM64_)
+#error Cannot define both _TARGET_ARM_ and _TARGET_ARM64_
+#endif
+#if !defined(_HOST_ARM_)
+#define _CROSS_COMPILER_
+#endif
+#elif defined(_TARGET_ARM64_)
+#if defined(_TARGET_X86_)
+#error Cannot define both _TARGET_ARM64_ and _TARGET_X86_
+#endif
+#if defined(_TARGET_AMD64_)
+#error Cannot define both _TARGET_ARM64_ and _TARGET_AMD64_
+#endif
+#if defined(_TARGET_ARM_)
+#error Cannot define both _TARGET_ARM64_ and _TARGET_ARM_
+#endif
+#if !defined(_HOST_ARM64_)
+#define _CROSS_COMPILER_
+#endif
+#else
+#error Unsupported or unset target architecture
+#endif
+
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+#define _TARGET_64BIT_
+#endif
+
+#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_)
+#define _TARGET_XARCH_
+#endif
+
+#if defined(_TARGET_ARM_) || defined(_TARGET_ARM64_)
+#define _TARGET_ARMARCH_
+#endif
+
+// --------------------------------------------------------------------------------
+// IMAGE_FILE_MACHINE_TARGET
+// --------------------------------------------------------------------------------
+
+#if defined(_TARGET_X86_)
+#define IMAGE_FILE_MACHINE_TARGET IMAGE_FILE_MACHINE_I386
+#elif defined(_TARGET_AMD64_)
+#define IMAGE_FILE_MACHINE_TARGET IMAGE_FILE_MACHINE_AMD64
+#elif defined(_TARGET_ARM_)
+#define IMAGE_FILE_MACHINE_TARGET IMAGE_FILE_MACHINE_ARMNT
+#elif defined(_TARGET_ARM64_)
+#define IMAGE_FILE_MACHINE_TARGET IMAGE_FILE_MACHINE_ARM64 // 0xAA64
+#else
+#error Unsupported or unset target architecture
+#endif
+
+// Include the AMD64 unwind codes when appropriate.
+#if defined(_TARGET_AMD64_)
+#include "win64unwind.h"
+#endif
+
+// Macros for defining strongly-typed enums. Use as follows:
+//
+// DECLARE_TYPED_ENUM(FooEnum,BYTE)
+// {
+//    fooTag1, fooTag2
+// }
+// END_DECLARE_TYPED_ENUM(FooEnum, BYTE)
+//
+// VC++ understands the syntax to declare these directly, e.g., "enum FooEnum : BYTE",
+// but GCC does not, so we use typedefs.
+
+#define DECLARE_TYPED_ENUM(tag, baseType) enum tag : baseType
+
+#define END_DECLARE_TYPED_ENUM(tag, baseType) ;
+
+#include "corhdr.h"
+#include "corjit.h"
+
+#define __OPERATOR_NEW_INLINE 1 // indicate that I will define these
+#define __PLACEMENT_NEW_INLINE  // don't bring in the global placement new, it is easy to make a mistake
+                                // with our new(compiler*) pattern.
+
+#if COR_JIT_EE_VER > 460
+#define NO_CLRCONFIG // Don't bring in the usual CLRConfig infrastructure, since the JIT uses the JIT/EE
+                     // interface to retrieve config values.
+
+// This is needed for contract.inl when FEATURE_STACK_PROBE is enabled.
+struct CLRConfig
+{
+    static struct ConfigKey
+    {
+    } EXTERNAL_NO_SO_NOT_MAINLINE;
+    static DWORD GetConfigValue(const ConfigKey& key)
+    {
+        return 0;
+    }
+};
+#endif
+
+#include "utilcode.h" // this defines assert as _ASSERTE
+#include "host.h"     // this redefines assert for the JIT to use assertAbort
+#include "utils.h"
+
+#ifdef DEBUG
+#define INDEBUG(x) x
+#define INDEBUG_COMMA(x) x,
+#define DEBUGARG(x) , x
+#else
+#define INDEBUG(x)
+#define INDEBUG_COMMA(x)
+#define DEBUGARG(x)
+#endif
+
+#if defined(DEBUG) || defined(LATE_DISASM)
+#define INDEBUG_LDISASM_COMMA(x) x,
+#else
+#define INDEBUG_LDISASM_COMMA(x)
+#endif
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#define FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(x) , x
+#define FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(x) x
+#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#define FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(x)
+#define FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(x)
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+#if defined(UNIX_AMD64_ABI)
+#define UNIX_AMD64_ABI_ONLY_ARG(x) , x
+#define UNIX_AMD64_ABI_ONLY(x) x
+#else // !defined(UNIX_AMD64_ABI)
+#define UNIX_AMD64_ABI_ONLY_ARG(x)
+#define UNIX_AMD64_ABI_ONLY(x)
+#endif // defined(UNIX_AMD64_ABI)
+
+#if defined(UNIX_AMD64_ABI) || defined(_TARGET_ARM64_)
+#define MULTIREG_HAS_SECOND_GC_RET 1
+#define MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(x) , x
+#define MULTIREG_HAS_SECOND_GC_RET_ONLY(x) x
+#else // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#define MULTIREG_HAS_SECOND_GC_RET 0
+#define MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(x)
+#define MULTIREG_HAS_SECOND_GC_RET_ONLY(x)
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+// To get rid of warning 4701 : local variable may be used without being initialized
+#define DUMMY_INIT(x) (x)
+
+#define REGEN_SHORTCUTS 0
+#define REGEN_CALLPAT 0
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                          jit.h                                            XX
+XX                                                                           XX
+XX   Interface of the JIT with jit.cpp                                       XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************/
+#if defined(DEBUG)
+#include "log.h"
+
+#define INFO6 LL_INFO10000   // Did Jit or Inline succeeded?
+#define INFO7 LL_INFO100000  // NYI stuff
+#define INFO8 LL_INFO1000000 // Weird failures
+#define INFO9 LL_EVERYTHING  // Info about incoming settings
+#define INFO10 LL_EVERYTHING // Totally verbose
+
+#endif // DEBUG
+
+typedef class ICorJitInfo* COMP_HANDLE;
+
+const CORINFO_CLASS_HANDLE NO_CLASS_HANDLE = (CORINFO_CLASS_HANDLE) nullptr;
+
+/*****************************************************************************/
+
+inline bool False()
+{
+    return false;
+} // Use to disable code while keeping prefast happy
+
+// We define two IL offset types, as follows:
+//
+// IL_OFFSET:  either a distinguished value, or an IL offset.
+// IL_OFFSETX: either a distinguished value, or the top two bits are a flags, and the remaining bottom
+//             bits are a IL offset.
+//
+// In both cases, the set of legal distinguished values is:
+//     BAD_IL_OFFSET             -- A unique illegal IL offset number. Note that it must be different from
+//                                  the ICorDebugInfo values, below, and must also not be a legal IL offset.
+//     ICorDebugInfo::NO_MAPPING -- The IL offset corresponds to no source code (such as EH step blocks).
+//     ICorDebugInfo::PROLOG     -- The IL offset indicates a prolog
+//     ICorDebugInfo::EPILOG     -- The IL offset indicates an epilog
+//
+// The IL offset must be in the range [0 .. 0x3fffffff]. This is because we steal
+// the top two bits in IL_OFFSETX for flags, but we want the maximum range to be the same
+// for both types. The IL value can't be larger than the maximum IL offset of the function
+// being compiled.
+//
+// Blocks and statements never store one of the ICorDebugInfo values, even for IL_OFFSETX types. These are
+// only stored in the IPmappingDsc struct, ipmdILoffsx field.
+
+typedef unsigned IL_OFFSET;
+
+const IL_OFFSET BAD_IL_OFFSET = 0x80000000;
+const IL_OFFSET MAX_IL_OFFSET = 0x3fffffff;
+
+typedef unsigned IL_OFFSETX;                                 // IL_OFFSET with stack-empty or call-instruction bit
+const IL_OFFSETX IL_OFFSETX_STKBIT             = 0x80000000; // Note: this bit is set when the stack is NOT empty!
+const IL_OFFSETX IL_OFFSETX_CALLINSTRUCTIONBIT = 0x40000000; // Set when the IL offset is for a call instruction.
+const IL_OFFSETX IL_OFFSETX_BITS               = IL_OFFSETX_STKBIT | IL_OFFSETX_CALLINSTRUCTIONBIT;
+
+IL_OFFSET jitGetILoffs(IL_OFFSETX offsx);
+IL_OFFSET jitGetILoffsAny(IL_OFFSETX offsx);
+bool jitIsStackEmpty(IL_OFFSETX offsx);
+bool jitIsCallInstruction(IL_OFFSETX offsx);
+
+const unsigned BAD_VAR_NUM = UINT_MAX;
+
+// Code can't be more than 2^31 in any direction.  This is signed, so it should be used for anything that is
+// relative to something else.
+typedef int NATIVE_OFFSET;
+
+// This is the same as the above, but it's used in absolute contexts (i.e. offset from the start).  Also,
+// this is used for native code sizes.
+typedef unsigned UNATIVE_OFFSET;
+
+typedef ptrdiff_t ssize_t;
+
+// For the following specially handled FIELD_HANDLES we need
+//   values that are negative and have the low two bits zero
+// See eeFindJitDataOffs and eeGetJitDataOffs in Compiler.hpp
+#define FLD_GLOBAL_DS ((CORINFO_FIELD_HANDLE)-4)
+#define FLD_GLOBAL_FS ((CORINFO_FIELD_HANDLE)-8)
+
+/*****************************************************************************/
+
+#include "vartype.h"
+
+/*****************************************************************************/
+
+// Debugging support is ON by default. Can be turned OFF by
+// adding /DDEBUGGING_SUPPORT=0 on the command line.
+
+#ifndef DEBUGGING_SUPPORT
+#define DEBUGGING_SUPPORT
+#elif !DEBUGGING_SUPPORT
+#undef DEBUGGING_SUPPORT
+#endif
+
+/*****************************************************************************/
+
+// Late disassembly is OFF by default. Can be turned ON by
+// adding /DLATE_DISASM=1 on the command line.
+// Always OFF in the non-debug version
+
+#if defined(LATE_DISASM) && (LATE_DISASM == 0)
+#undef LATE_DISASM
+#endif
+
+/*****************************************************************************/
+
+/*****************************************************************************/
+
+#define FEATURE_VALNUM_CSE 1 // enable the Value Number CSE optimization logic
+
+// true if Value Number CSE is enabled
+#define FEATURE_ANYCSE FEATURE_VALNUM_CSE
+
+#define CSE_INTO_HANDLERS 0
+
+#define CAN_DISABLE_DFA 1 // disable data flow for minopts
+
+#define LARGE_EXPSET 1   // Track 64 or 32 assertions/copies/consts/rangechecks
+#define ASSERTION_PROP 1 // Enable value/assertion propagation
+
+#define LOCAL_ASSERTION_PROP ASSERTION_PROP // Enable local assertion propagation
+
+//=============================================================================
+
+#define FANCY_ARRAY_OPT 0 // optimize more complex index checks
+
+//=============================================================================
+
+#define LONG_ASG_OPS 0 // implementation isn't complete yet
+
+//=============================================================================
+
+#define OPT_MULT_ADDSUB 1 // optimize consecutive "lclVar += or -= icon"
+#define OPT_BOOL_OPS 1    // optimize boolean operations
+
+//=============================================================================
+
+#define REDUNDANT_LOAD 1      // track locals in regs, suppress loads
+#define STACK_PROBES 0        // Support for stack probes
+#define DUMP_FLOWGRAPHS DEBUG // Support for creating Xml Flowgraph reports in *.fgx files
+
+#define HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION 1 // if 1 we must have all handler entry points in the Hot code section
+
+/*****************************************************************************/
+
+#define VPTR_OFFS 0 // offset of vtable pointer from obj ptr
+
+/*****************************************************************************/
+
+#define DUMP_GC_TABLES DEBUG
+#define VERIFY_GC_TABLES 0
+#define REARRANGE_ADDS 1
+
+#define FUNC_INFO_LOGGING 1 // Support dumping function info to a file. In retail, only NYIs, with no function name,
+                            // are dumped.
+
+/*****************************************************************************/
+/*****************************************************************************/
+/* Set these to 1 to collect and output various statistics about the JIT */
+
+#define CALL_ARG_STATS 0      // Collect stats about calls and call arguments.
+#define COUNT_BASIC_BLOCKS 0  // Create a histogram of basic block sizes, and a histogram of IL sizes in the simple
+                              // case of single block methods.
+#define COUNT_LOOPS 0         // Collect stats about loops, such as the total number of natural loops, a histogram of
+                              // the number of loop exits, etc.
+#define COUNT_RANGECHECKS 0   // Count range checks removed (in lexical CSE?).
+#define DATAFLOW_ITER 0       // Count iterations in lexical CSE and constant folding dataflow.
+#define DISPLAY_SIZES 0       // Display generated code, data, and GC information sizes.
+#define MEASURE_BLOCK_SIZE 0  // Collect stats about basic block and flowList node sizes and memory allocations.
+#define MEASURE_FATAL 0       // Count the number of calls to fatal(), including NYIs and noway_asserts.
+#define MEASURE_NODE_SIZE 0   // Collect stats about GenTree node allocations.
+#define MEASURE_PTRTAB_SIZE 0 // Collect stats about GC pointer table allocations.
+#define EMITTER_STATS 0       // Collect stats on the emitter.
+
+#define VERBOSE_SIZES 0  // Always display GC info sizes. If set, DISPLAY_SIZES must also be set.
+#define VERBOSE_VERIFY 0 // Dump additional information when verifying code. Useful to debug verification bugs.
+
+#ifdef DEBUG
+#define MEASURE_MEM_ALLOC 1 // Collect memory allocation stats.
+#define LOOP_HOIST_STATS 1  // Collect loop hoisting stats.
+#else
+#define MEASURE_MEM_ALLOC 0 // You can set this to 1 to get memory stats in retail, as well
+#define LOOP_HOIST_STATS 0  // You can set this to 1 to get loop hoist stats in retail, as well
+#endif
+
+/*****************************************************************************/
+/* Portability Defines */
+/*****************************************************************************/
+#ifdef _TARGET_X86_
+#define JIT32_GCENCODER
+#endif
+
+/*****************************************************************************/
+#ifdef DEBUG
+/*****************************************************************************/
+
+#define DUMPER
+
+#else // !DEBUG
+
+#if DUMP_GC_TABLES
+#pragma message("NOTE: this non-debug build has GC ptr table dumping always enabled!")
+const bool dspGCtbls = true;
+#endif
+
+/*****************************************************************************/
+#endif // !DEBUG
+
+#ifdef DEBUG
+void JitDump(const char* pcFormat, ...);
+#define JITDUMP(...)                                                                                                   \
+    {                                                                                                                  \
+        if (JitTls::GetCompiler()->verbose)                                                                            \
+            JitDump(__VA_ARGS__);                                                                                      \
+    }
+#define JITLOG(x)                                                                                                      \
+    {                                                                                                                  \
+        JitLogEE x;                                                                                                    \
+    }
+#define JITLOG_THIS(t, x)                                                                                              \
+    {                                                                                                                  \
+        (t)->JitLogEE x;                                                                                               \
+    }
+#define DBEXEC(flg, expr)                                                                                              \
+    if (flg)                                                                                                           \
+    {                                                                                                                  \
+        expr;                                                                                                          \
+    }
+#define DISPNODE(t)                                                                                                    \
+    if (JitTls::GetCompiler()->verbose)                                                                                \
+        JitTls::GetCompiler()->gtDispTree(t, nullptr, nullptr, true);
+#define DISPTREE(t)                                                                                                    \
+    if (JitTls::GetCompiler()->verbose)                                                                                \
+        JitTls::GetCompiler()->gtDispTree(t);
+#define DISPRANGE(range)                                                                                               \
+    if (JitTls::GetCompiler()->verbose)                                                                                \
+        JitTls::GetCompiler()->gtDispRange(range);
+#define DISPTREERANGE(range, t)                                                                                        \
+    if (JitTls::GetCompiler()->verbose)                                                                                \
+        JitTls::GetCompiler()->gtDispTreeRange(range, t);
+#define VERBOSE JitTls::GetCompiler()->verbose
+#else // !DEBUG
+#define JITDUMP(...)
+#define JITLOG(x)
+#define JITLOG_THIS(t, x)
+#define DBEXEC(flg, expr)
+#define DISPNODE(t)
+#define DISPTREE(t)
+#define DISPRANGE(range)
+#define DISPTREERANGE(range, t)
+#define VERBOSE 0
+#endif // !DEBUG
+
+/*****************************************************************************
+ *
+ * Double alignment. This aligns ESP to 0 mod 8 in function prolog, then uses ESP
+ * to reference locals, EBP to reference parameters.
+ * It only makes sense if frameless method support is on.
+ * (frameless method support is now always on)
+ */
+
+#ifdef _TARGET_X86_
+#define DOUBLE_ALIGN 1 // permit the double alignment of ESP in prolog,
+                       //  and permit the double alignment of local offsets
+#else
+#define DOUBLE_ALIGN 0 // no special handling for double alignment
+#endif
+/*****************************************************************************/
+#ifdef DEBUG
+extern void _cdecl debugStop(const char* why, ...);
+#endif
+/*****************************************************************************/
+
+#ifdef DEBUG
+
+struct JitOptions
+{
+    const char* methodName; // Method to display output for
+    const char* className;  // Class  to display output for
+
+    double   CGknob;   // Tweakable knob for testing
+    unsigned testMask; // Tweakable mask for testing
+
+    JitOptions* lastDummyField; // Ensures instantiation uses right order of arguments
+};
+
+extern JitOptions jitOpts;
+
+/*****************************************************************************
+*
+*  Returns a word filled with the JITs allocator CHK fill value.
+*
+*/
+template <typename T>
+inline T UninitializedWord()
+{
+    __int64 word = 0x0101010101010101LL * (JitConfig.JitDefaultFill() & 0xFF);
+    return (T)word;
+}
+
+/*****************************************************************************
+*
+*  Determines whether this value is coming from uninitialized JIT memory
+*
+*/
+
+template <typename T>
+inline bool IsUninitialized(T data)
+{
+    return data == UninitializedWord<T>();
+}
+#endif // DEBUG
+
+/*****************************************************************************/
+
+enum accessLevel
+{
+    ACL_NONE,
+    ACL_PRIVATE,
+    ACL_DEFAULT,
+    ACL_PROTECTED,
+    ACL_PUBLIC,
+};
+
+/*****************************************************************************/
+
+#define castto(var, typ) (*(typ*)&var)
+
+#define sizeto(typ, mem) (offsetof(typ, mem) + sizeof(((typ*)0)->mem))
+
+/*****************************************************************************/
+
+#ifdef NO_MISALIGNED_ACCESS
+
+#define MISALIGNED_RD_I2(src) (*castto(src, char*) | *castto(src + 1, char*) << 8)
+
+#define MISALIGNED_RD_U2(src) (*castto(src, char*) | *castto(src + 1, char*) << 8)
+
+#define MISALIGNED_WR_I2(dst, val)                                                                                     \
+    *castto(dst, char*)     = val;                                                                                     \
+    *castto(dst + 1, char*) = val >> 8;
+
+#define MISALIGNED_WR_I4(dst, val)                                                                                     \
+    *castto(dst, char*)     = val;                                                                                     \
+    *castto(dst + 1, char*) = val >> 8;                                                                                \
+    *castto(dst + 2, char*) = val >> 16;                                                                               \
+    *castto(dst + 3, char*) = val >> 24;
+
+#else
+
+#define MISALIGNED_RD_I2(src) (*castto(src, short*))
+#define MISALIGNED_RD_U2(src) (*castto(src, unsigned short*))
+
+#define MISALIGNED_WR_I2(dst, val) *castto(dst, short*) = val;
+#define MISALIGNED_WR_I4(dst, val) *castto(dst, int*)   = val;
+
+#define MISALIGNED_WR_ST(dst, val) *castto(dst, ssize_t*) = val;
+
+#endif
+
+/*****************************************************************************/
+
+inline size_t roundUp(size_t size, size_t mult = sizeof(size_t))
+{
+    assert(mult && ((mult & (mult - 1)) == 0)); // power of two test
+
+    return (size + (mult - 1)) & ~(mult - 1);
+}
+
+inline size_t roundDn(size_t size, size_t mult = sizeof(size_t))
+{
+    assert(mult && ((mult & (mult - 1)) == 0)); // power of two test
+
+    return (size) & ~(mult - 1);
+}
+
+inline unsigned int unsigned_abs(int x)
+{
+    return ((unsigned int)abs(x));
+}
+
+#ifdef _TARGET_64BIT_
+inline size_t unsigned_abs(ssize_t x)
+{
+#ifndef FEATURE_PAL
+    return ((size_t)abs(x));
+#else  // !FEATURE_PAL
+    return ((size_t)labs(x));
+#endif // !FEATURE_PAL
+}
+#endif // _TARGET_64BIT_
+
+/*****************************************************************************/
+
+#if CALL_ARG_STATS || COUNT_BASIC_BLOCKS || COUNT_LOOPS || EMITTER_STATS || MEASURE_NODE_SIZE
+
+class Histogram
+{
+public:
+    Histogram(IAllocator* allocator, const unsigned* const sizeTable);
+    ~Histogram();
+
+    void dump(FILE* output);
+    void record(unsigned size);
+
+private:
+    void ensureAllocated();
+
+    IAllocator*           m_allocator;
+    unsigned              m_sizeCount;
+    const unsigned* const m_sizeTable;
+    unsigned*             m_counts;
+};
+
+#endif // CALL_ARG_STATS || COUNT_BASIC_BLOCKS || COUNT_LOOPS || EMITTER_STATS || MEASURE_NODE_SIZE
+
+/*****************************************************************************/
+#ifdef ICECAP
+#include "icapexp.h"
+#include "icapctrl.h"
+#endif
+
+/*****************************************************************************/
+
+#define SECURITY_CHECK 1
+#define VERIFY_IMPORTER 1
+
+/*****************************************************************************/
+
+#if !defined(RELOC_SUPPORT)
+#define RELOC_SUPPORT 1
+#endif
+
+/*****************************************************************************/
+
+#include "error.h"
+
+/*****************************************************************************/
+
+#if CHECK_STRUCT_PADDING
+#pragma warning(push)
+#pragma warning(default : 4820) // 'bytes' bytes padding added after construct 'member_name'
+#endif                          // CHECK_STRUCT_PADDING
+
+#include "alloc.h"
+#include "target.h"
+
+#if FEATURE_TAILCALL_OPT
+
+#ifdef FEATURE_CORECLR
+// CoreCLR - enable tail call opt for the following IL pattern
+//
+//     call someFunc
+//     jmp/jcc RetBlock
+//     ...
+//  RetBlock:
+//     ret
+#define FEATURE_TAILCALL_OPT_SHARED_RETURN 1
+#else
+// Desktop: Keep this to zero as one of app-compat apps that is using GetCallingAssembly()
+// has an issue turning this ON.
+//
+// Refer to TF: Bug: 824625 and its associated regression TF Bug: 1113265
+#define FEATURE_TAILCALL_OPT_SHARED_RETURN 0
+#endif // FEATURE_CORECLR
+
+#else // !FEATURE_TAILCALL_OPT
+#define FEATURE_TAILCALL_OPT_SHARED_RETURN 0
+#endif // !FEATURE_TAILCALL_OPT
+
+#define CLFLG_CODESIZE 0x00001
+#define CLFLG_CODESPEED 0x00002
+#define CLFLG_CSE 0x00004
+#define CLFLG_REGVAR 0x00008
+#define CLFLG_RNGCHKOPT 0x00010
+#define CLFLG_DEADASGN 0x00020
+#define CLFLG_CODEMOTION 0x00040
+#define CLFLG_QMARK 0x00080
+#define CLFLG_TREETRANS 0x00100
+#define CLFLG_INLINING 0x00200
+#define CLFLG_CONSTANTFOLD 0x00800
+
+#if FEATURE_STRUCTPROMOTE
+#define CLFLG_STRUCTPROMOTE 0x00400
+#else
+#define CLFLG_STRUCTPROMOTE 0x00000
+#endif
+
+#define CLFLG_MAXOPT                                                                                                   \
+    (CLFLG_CSE | CLFLG_REGVAR | CLFLG_RNGCHKOPT | CLFLG_DEADASGN | CLFLG_CODEMOTION | CLFLG_QMARK | CLFLG_TREETRANS |  \
+     CLFLG_INLINING | CLFLG_STRUCTPROMOTE | CLFLG_CONSTANTFOLD)
+
+#define CLFLG_MINOPT (CLFLG_TREETRANS)
+
+#define JIT_RESERVED_STACK 64 // Reserved for arguments of calls and hidden
+                              // pushes for finallys so that we don't
+                              // probe on every call site. See comment in
+                              // for CORINFO_STACKPROBE_DEPTH in corjit.h
+
+/*****************************************************************************/
+
+extern void dumpILBytes(const BYTE* const codeAddr, unsigned codeSize, unsigned alignSize);
+
+extern unsigned dumpSingleInstr(const BYTE* const codeAddr, IL_OFFSET offs, const char* prefix = nullptr);
+
+extern void dumpILRange(const BYTE* const codeAddr, unsigned codeSize); // in bytes
+
+/*****************************************************************************/
+
+extern int jitNativeCode(CORINFO_METHOD_HANDLE methodHnd,
+                         CORINFO_MODULE_HANDLE classHnd,
+                         COMP_HANDLE           compHnd,
+                         CORINFO_METHOD_INFO*  methodInfo,
+                         void**                methodCodePtr,
+                         ULONG*                methodCodeSize,
+                         CORJIT_FLAGS*         compileFlags,
+                         void*                 inlineInfoPtr);
+
+#ifdef _HOST_64BIT_
+const size_t INVALID_POINTER_VALUE = 0xFEEDFACEABADF00D;
+#else
+const size_t INVALID_POINTER_VALUE = 0xFEEDFACE;
+#endif
+
+// Constants for making sure size_t fit into smaller types.
+const size_t MAX_USHORT_SIZE_T   = static_cast<size_t>(static_cast<unsigned short>(-1));
+const size_t MAX_UNSIGNED_SIZE_T = static_cast<size_t>(static_cast<unsigned>(-1));
+
+// These assume 2's complement...
+const int MAX_SHORT_AS_INT = 32767;
+const int MIN_SHORT_AS_INT = -32768;
+
+/*****************************************************************************/
+
+enum CompMemKind
+{
+#define CompMemKindMacro(kind) CMK_##kind,
+#include "compmemkind.h"
+    CMK_Count
+};
+
+class Compiler;
+class JitTls
+{
+#ifdef DEBUG
+    Compiler* m_compiler;
+    LogEnv    m_logEnv;
+    JitTls*   m_next;
+#endif
+
+public:
+    JitTls(ICorJitInfo* jitInfo);
+    ~JitTls();
+
+#ifdef DEBUG
+    static LogEnv* GetLogEnv();
+#endif
+
+    static Compiler* GetCompiler();
+    static void SetCompiler(Compiler* compiler);
+};
+
+#if defined(DEBUG)
+
+#include "compiler.h"
+
+template <typename T>
+T dspPtr(T p)
+{
+    return (p == ZERO) ? ZERO : (JitTls::GetCompiler()->opts.dspDiffable ? T(0xD1FFAB1E) : p);
+}
+
+template <typename T>
+T dspOffset(T o)
+{
+    return (o == ZERO) ? ZERO : (JitTls::GetCompiler()->opts.dspDiffable ? T(0xD1FFAB1E) : o);
+}
+
+#else // !defined(DEBUG)
+
+template <typename T>
+T dspPtr(T p)
+{
+    return p;
+}
+
+template <typename T>
+T dspOffset(T o)
+{
+    return o;
+}
+
+#endif // !defined(DEBUG)
+
+/*****************************************************************************/
+#endif //_JIT_H_
+/*****************************************************************************/
diff --git a/src/jit/jit.settings.targets b/src/jit/jit.settings.targets
new file mode 100644
index 0000000000..9dbc225843
--- /dev/null
+++ b/src/jit/jit.settings.targets
@@ -0,0 +1,136 @@
+<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003" ToolsVersion="dogfood">
+
+    <PropertyGroup>
+        <ClWarningLevel>4</ClWarningLevel>
+
+        <UserIncludes>
+            $(UserIncludes);
+            ..;
+            ..\jitstd\;
+            $(Clrbase)\src\TraceLog;
+        </UserIncludes>
+
+        <!-- PCH baloney -->
+        <EnableCxxPCHHeaders>true</EnableCxxPCHHeaders>
+        <PCHCompile>..\jitpch.cpp</PCHCompile>
+        <PCHHeader>jitpch.h</PCHHeader>
+
+        <!-- JIT_BUILD disables certain PAL_TRY debugging features -->
+        <ClDefines>$(ClDefines);JIT_BUILD=1</ClDefines>
+
+        <ClDefines Condition="'$(DebugBuild)' == 'false'">$(ClDefines);FAST=1</ClDefines>
+        <ClDefines Condition="'$(DebugBuild)' == 'true'">$(ClDefines);DEBUG=1</ClDefines>
+    </PropertyGroup>
+
+    <!-- For debugging purposes only, temporarily enable these in RET builds so GenTree debugging is easier. -->
+    <!-- We need to link with /OPT:NOICF or our magic vtable debugging system for GenTree doesn't work. -->
+    <PropertyGroup Condition="'$(DebugBuild)' == 'true'">
+        <!-- This is already automatically defined in DEBUG builds.
+        <ClDefines>$(ClDefines);DEBUGGABLE_GENTREE=1</ClDefines>
+        -->
+        <LinkEnableCOMDATFolding>false</LinkEnableCOMDATFolding>                <!-- /OPT:NOICF -->
+        <ClAdditionalOptions>$(ClAdditionalOptions) /Ob0</ClAdditionalOptions>  <!-- no inlining -->
+    </PropertyGroup>
+
+    <!-- Leaf Project Items -->
+    <ItemGroup>
+        <CppCompile Include="..\alloc.cpp" />
+        <CppCompile Include="..\earlyprop.cpp" />        
+        <CppCompile Include="..\bitset.cpp" />
+        <CppCompile Include="..\block.cpp" />
+        <CppCompile Include="..\Compiler.cpp" />
+        <CppCompile Include="..\DisAsm.cpp" />
+        <CppCompile Include="..\eeInterface.cpp" />
+        <CppCompile Include="..\ee_il_dll.cpp" />
+        <CppCompile Include="..\jiteh.cpp" />
+        <CppCompile Include="..\error.cpp" />
+        <CppCompile Include="..\FlowGraph.cpp" />
+        <CppCompile Include="..\GCInfo.cpp" />
+        <CppCompile Include="..\GCDecode.cpp" />
+        <CppCompile Include="..\GCEncode.cpp" />
+        <CppCompile Include="..\GenTree.cpp" />
+        <CppCompile Include="..\GSChecks.cpp" />
+        <CppCompile Include="..\hashbv.cpp" />
+        <CppCompile Include="..\Importer.cpp" />
+        <CppCompile Include="..\Instr.cpp" />
+        <CppCompile Include="..\JitTelemetry.cpp" />
+        <CppCompile Include="..\LclVars.cpp" />
+        <CppCompile Include="..\LIR.cpp" />
+        <CppCompile Include="..\Liveness.cpp" />
+        <CppCompile Include="..\Morph.cpp" />
+        <CppCompile Include="..\Optimizer.cpp" />
+        <CppCompile Include="..\OptCSE.cpp" />
+        <CppCompile Include="..\rationalize.cpp" />
+        <CppCompile Include="..\RegAlloc.cpp" />
+        <CppCompile Include="..\RegSet.cpp" />
+        <CppCompile Include="..\register_arg_convention.cpp" />
+        <CppCompile Include="..\emit.cpp" />
+        <CppCompile Include="..\ScopeInfo.cpp" />
+        <CppCompile Include="..\SharedFloat.cpp" />
+        <CppCompile Include="..\SM.cpp" />
+        <CppCompile Include="..\SMData.cpp" />
+        <CppCompile Include="..\SMWeights.cpp" />
+        <CppCompile Include="..\typeInfo.cpp" />
+        <CppCompile Include="..\unwind.cpp" />
+        <CppCompile Include="..\Utils.cpp" />
+        <CppCompile Include="..\SsaBuilder.cpp" />
+        <CppCompile Include="..\SsaRenameState.cpp" />
+        <CppCompile Include="..\ValueNum.cpp" />
+        <CppCompile Include="..\CopyProp.cpp" />
+        <CppCompile Include="..\CodeGenCommon.cpp" />
+        <CppCompile Include="..\AssertionProp.cpp" />
+        <CppCompile Include="..\RangeCheck.cpp" />
+        <CppCompile Include="..\LoopCloning.cpp" />
+        <CppCompile Include="..\inline.cpp" />
+        <CppCompile Include="..\inlinepolicy.cpp" />
+        <CppCompile Include="..\jitconfig.cpp" />
+        <CppCompile Include="..\hostallocator.cpp" />
+        <CppCompile Include="..\objectalloc.cpp" />
+        <CppCompile Inlcude="..\sideeffects.cpp" />
+        <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='True'" Include="..\CodeGenLegacy.cpp" />
+        <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'"  Include="..\Lower.cpp" />
+        <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'"  Include="..\LSRA.cpp" />
+    </ItemGroup>
+    <ItemGroup Condition="'$(TargetArch)'=='i386'">
+        <CppCompile Include="..\emitXArch.cpp" />
+        <CppCompile Include="..\TargetX86.cpp" />
+        <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='True'" Include="..\stackfp.cpp" />
+        <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'"  Include="..\DecomposeLongs.cpp" />
+        <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'"  Include="..\LowerXArch.cpp" />
+        <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'"  Include="..\CodeGenXArch.cpp" />
+        <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'"  Include="..\SIMD.cpp" />
+        <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'"  Include="..\SIMDCodeGenXArch.cpp" />
+    </ItemGroup>
+    <ItemGroup Condition="'$(TargetArch)'=='amd64'">
+        <!-- AMD64 target is always RyuJIT backend -->
+        <CppCompile Include="..\emitXArch.cpp" />
+        <CppCompile Include="..\TargetAmd64.cpp" />
+        <CppCompile Include="..\LowerXArch.cpp" />
+        <CppCompile Include="..\CodeGenXArch.cpp" />
+        <CppCompile Include="..\SIMD.cpp" />
+        <CppCompile Include="..\SIMDCodeGenXArch.cpp" />
+        <CppCompile Include="..\unwindAmd64.cpp" />
+    </ItemGroup>
+    <ItemGroup Condition="'$(TargetArch)'=='arm'">
+        <CppCompile Include="..\emitarm.cpp" />
+        <CppCompile Include="..\TargetArm.cpp" />
+        <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='True'" Include="..\registerfp.cpp" />
+        <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'"  Include="..\DecomposeLongs.cpp" />
+        <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'"  Include="..\LowerArm.cpp" />
+        <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'"  Include="..\CodeGenArm.cpp" />
+        <CppCompile Include="..\unwindArm.cpp" />
+    </ItemGroup>
+    <ItemGroup Condition="'$(TargetArch)'=='arm64'">
+        <!-- ARM64 target is always RyuJIT backend -->
+        <CppCompile Include="..\emitarm64.cpp" />
+        <CppCompile Include="..\TargetArm64.cpp" />
+        <CppCompile Include="..\LowerArm64.cpp" />
+        <CppCompile Include="..\CodeGenArm64.cpp" />
+        <CppCompile Include="..\unwindArm.cpp" />
+        <CppCompile Include="..\unwindArm64.cpp" />
+    </ItemGroup>
+
+    <!-- Import the targets - this actually contains the full build rules -->
+    <Import Project="$(_NTDRIVE)$(_NTROOT)\ndp\clr\clr.targets" />
+
+</Project>
diff --git a/src/jit/jitconfig.cpp b/src/jit/jitconfig.cpp
new file mode 100644
index 0000000000..9f0e226e3a
--- /dev/null
+++ b/src/jit/jitconfig.cpp
@@ -0,0 +1,344 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "jitconfig.h"
+
+JitConfigValues JitConfig;
+
+void JitConfigValues::MethodSet::initialize(const wchar_t* list, ICorJitHost* host)
+{
+    assert(m_list == nullptr);
+
+    enum State
+    {
+        NO_NAME,
+        CLS_NAME,
+        FUNC_NAME,
+        ARG_LIST
+    }; // parsing state machine
+
+    const char SEP_CHAR = ' '; // current character use to separate each entry
+
+    wchar_t      lastChar  = '?';     // dummy
+    int          nameStart = -1;      // Index of the start of the current class or method name
+    MethodName   currentName;         // Buffer used while parsing the current entry
+    MethodName** lastName = &m_names; // Last entry inserted into the list
+    bool         isQuoted = false;
+
+    currentName.m_next            = nullptr;
+    currentName.m_methodNameStart = -1;
+    currentName.m_methodNameLen   = -1;
+    currentName.m_classNameStart  = -1;
+    currentName.m_classNameLen    = -1;
+    currentName.m_numArgs         = -1;
+
+    // Convert the input list to UTF-8
+    int utf8ListLen = WszWideCharToMultiByte(CP_UTF8, 0, list, -1, nullptr, 0, nullptr, nullptr);
+    m_list          = (char*)host->allocateMemory(utf8ListLen);
+    if (WszWideCharToMultiByte(CP_UTF8, 0, list, -1, const_cast<LPSTR>(m_list), utf8ListLen, nullptr, nullptr) == 0)
+    {
+        // Failed to convert the list. Free the memory and ignore the list.
+        host->freeMemory(reinterpret_cast<void*>(const_cast<char*>(m_list)));
+        m_list = "";
+        return;
+    }
+
+    State state = NO_NAME;
+    for (int i = 0; lastChar != '\0'; i++)
+    {
+        lastChar = m_list[i];
+
+        switch (state)
+        {
+            case NO_NAME:
+                if (m_list[i] != SEP_CHAR)
+                {
+                    nameStart = i;
+                    state     = CLS_NAME; // we have found the start of the next entry
+                }
+                break;
+
+            case CLS_NAME:
+                if (m_list[nameStart] == '"')
+                {
+                    for (; m_list[i] != '\0' && m_list[i] != '"'; i++)
+                    {
+                        ;
+                    }
+
+                    nameStart++;
+                    isQuoted = true;
+                }
+
+                if (m_list[i] == ':')
+                {
+                    if (m_list[nameStart] == '*' && !isQuoted)
+                    {
+                        // The class name is a wildcard; mark it invalid.
+                        currentName.m_classNameStart = -1;
+                        currentName.m_classNameLen   = -1;
+                    }
+                    else
+                    {
+                        currentName.m_classNameStart = nameStart;
+                        currentName.m_classNameLen   = i - nameStart;
+
+                        // Remove the trailing quote, if any
+                        if (isQuoted)
+                        {
+                            currentName.m_classNameLen--;
+                            isQuoted = false;
+                        }
+                    }
+
+                    // Accept class::name syntax as well
+                    if (m_list[i + 1] == ':')
+                    {
+                        i++;
+                    }
+
+                    nameStart = i + 1;
+                    state     = FUNC_NAME;
+                }
+                else if (m_list[i] == '\0' || m_list[i] == SEP_CHAR || m_list[i] == '(')
+                {
+                    // Treat this as a method name without a class name.
+                    currentName.m_classNameStart = -1;
+                    currentName.m_classNameLen   = -1;
+                    goto DONE_FUNC_NAME;
+                }
+                break;
+
+            case FUNC_NAME:
+                if (m_list[nameStart] == '"')
+                {
+                    // The first half of the outer contdition handles the case where the
+                    // class name is valid.
+                    for (; nameStart == i || (m_list[i] != '\0' && m_list[i] != '"'); i++)
+                    {
+                        ;
+                    }
+
+                    nameStart++;
+                    isQuoted = true;
+                }
+
+                if (m_list[i] == '\0' || m_list[i] == SEP_CHAR || m_list[i] == '(')
+                {
+                DONE_FUNC_NAME:
+                    assert(m_list[i] == '\0' || m_list[i] == SEP_CHAR || m_list[i] == '(');
+
+                    if (m_list[nameStart] == '*' && !isQuoted)
+                    {
+                        // The method name is a wildcard; mark it invalid.
+                        currentName.m_methodNameStart = -1;
+                        currentName.m_methodNameLen   = -1;
+                    }
+                    else
+                    {
+                        currentName.m_methodNameStart = nameStart;
+                        currentName.m_methodNameLen   = i - nameStart;
+
+                        // Remove the trailing quote, if any
+                        if (isQuoted)
+                        {
+                            currentName.m_classNameLen--;
+                            isQuoted = false;
+                        }
+                    }
+
+                    if (m_list[i] == '\0' || m_list[i] == SEP_CHAR)
+                    {
+                        currentName.m_numArgs = -1;
+                        goto DONE_ARG_LIST;
+                    }
+                    else
+                    {
+                        assert(m_list[i] == '(');
+                        currentName.m_numArgs = -1;
+                        state                 = ARG_LIST;
+                    }
+                }
+                break;
+
+            case ARG_LIST:
+                if (m_list[i] == '\0' || m_list[i] == ')')
+                {
+                    if (currentName.m_numArgs == -1)
+                    {
+                        currentName.m_numArgs = 0;
+                    }
+
+                DONE_ARG_LIST:
+                    assert(m_list[i] == '\0' || m_list[i] == SEP_CHAR || m_list[i] == ')');
+
+                    // We have parsed an entire method name; create a new entry in the list for it.
+                    MethodName* name = (MethodName*)host->allocateMemory(sizeof(MethodName));
+                    *name            = currentName;
+
+                    assert(name->m_next == nullptr);
+                    *lastName = name;
+                    lastName  = &name->m_next;
+
+                    state = NO_NAME;
+
+                    // Skip anything after the argument list until we find the next
+                    // separator character. Otherwise if we see "func(a,b):foo" we
+                    // create entries for "func(a,b)" as well as ":foo".
+                    if (m_list[i] == ')')
+                    {
+                        for (; m_list[i] && m_list[i] != SEP_CHAR; i++)
+                        {
+                            ;
+                        }
+
+                        lastChar = m_list[i];
+                    }
+                }
+                else
+                {
+                    if (m_list[i] != SEP_CHAR && currentName.m_numArgs == -1)
+                    {
+                        currentName.m_numArgs = 1;
+                    }
+
+                    if (m_list[i] == ',')
+                    {
+                        currentName.m_numArgs++;
+                    }
+                }
+                break;
+
+            default:
+                assert(!"Bad state");
+                break;
+        }
+    }
+}
+
+void JitConfigValues::MethodSet::destroy(ICorJitHost* host)
+{
+    // Free method names, free the list string, and reset our state
+    for (MethodName *name = m_names, *next = nullptr; name != nullptr; name = next)
+    {
+        next = name->m_next;
+        host->freeMemory(reinterpret_cast<void*>(const_cast<MethodName*>(name)));
+    }
+
+    host->freeMemory(reinterpret_cast<void*>(const_cast<char*>(m_list)));
+
+    m_names = nullptr;
+    m_list  = nullptr;
+}
+
+static bool matchesName(const char* const name, int nameLen, const char* const s2)
+{
+    return strncmp(name, s2, nameLen) == 0 && s2[nameLen] == '\0';
+}
+
+bool JitConfigValues::MethodSet::contains(const char*       methodName,
+                                          const char*       className,
+                                          CORINFO_SIG_INFO* sigInfo) const
+{
+    int numArgs = sigInfo != nullptr ? sigInfo->numArgs : -1;
+
+    // Try to match any the entries in the list.
+    for (MethodName* name = m_names; name != nullptr; name = name->m_next)
+    {
+        // If m_numArgs is valid, check for a mismatch
+        if (name->m_numArgs != -1 && name->m_numArgs != numArgs)
+        {
+            continue;
+        }
+
+        // If m_methodNameStart is valid, check for a mismatch
+        if (name->m_methodNameStart != -1)
+        {
+            const char* expectedMethodName = &m_list[name->m_methodNameStart];
+            if (!matchesName(expectedMethodName, name->m_methodNameLen, methodName))
+            {
+                // C++ embeds the class name into the method name; deal with that here.
+                const char* colon = strchr(methodName, ':');
+                if (colon != nullptr && colon[1] == ':' &&
+                    matchesName(expectedMethodName, name->m_methodNameLen, methodName))
+                {
+                    int classLen = (int)(colon - methodName);
+                    if (name->m_classNameStart == -1 ||
+                        (classLen == name->m_classNameLen &&
+                         strncmp(&m_list[name->m_classNameStart], methodName, classLen) == 0))
+                    {
+                        return true;
+                    }
+                }
+                continue;
+            }
+        }
+
+        // If m_classNameStart is valid, check for a mismatch
+        if (className == nullptr || name->m_classNameStart == -1 ||
+            matchesName(&m_list[name->m_classNameStart], name->m_classNameLen, className))
+        {
+            return true;
+        }
+
+        // Check for suffix wildcard like System.*
+        if (name->m_classNameLen > 0 && m_list[name->m_classNameStart + name->m_classNameLen - 1] == '*' &&
+            strncmp(&m_list[name->m_classNameStart], className, name->m_classNameLen - 1) == 0)
+        {
+            return true;
+        }
+
+#ifdef _DEBUG
+        // Maybe className doesn't include the namespace. Try to match that
+        const char* nsSep = strrchr(className, '.');
+        if (nsSep != nullptr && nsSep != className)
+        {
+            const char* onlyClass = nsSep[-1] == '.' ? nsSep : &nsSep[1];
+            if (matchesName(&m_list[name->m_classNameStart], name->m_classNameLen, onlyClass))
+            {
+                return true;
+            }
+        }
+#endif
+    }
+
+    return false;
+}
+
+void JitConfigValues::initialize(ICorJitHost* host)
+{
+    assert(!m_isInitialized);
+
+#define CONFIG_INTEGER(name, key, defaultValue) m_##name = host->getIntConfigValue(key, defaultValue);
+#define CONFIG_STRING(name, key) m_##name = host->getStringConfigValue(key);
+#define CONFIG_METHODSET(name, key)                                                                                    \
+    const wchar_t* name##value = host->getStringConfigValue(key);                                                      \
+    m_##name.initialize(name##value, host);                                                                            \
+    host->freeStringConfigValue(name##value);
+
+#include "jitconfigvalues.h"
+
+    m_isInitialized = true;
+}
+
+void JitConfigValues::destroy(ICorJitHost* host)
+{
+    if (!m_isInitialized)
+    {
+        return;
+    }
+
+#define CONFIG_INTEGER(name, key, defaultValue)
+#define CONFIG_STRING(name, key) host->freeStringConfigValue(m_##name);
+#define CONFIG_METHODSET(name, key) m_##name.destroy(host);
+
+#include "jitconfigvalues.h"
+
+    m_isInitialized = false;
+}
diff --git a/src/jit/jitconfig.h b/src/jit/jitconfig.h
new file mode 100644
index 0000000000..d5b4e30796
--- /dev/null
+++ b/src/jit/jitconfig.h
@@ -0,0 +1,97 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef _JITCONFIG_H_
+#define _JITCONFIG_H_
+
+struct CORINFO_SIG_INFO;
+class ICorJitHost;
+
+class JitConfigValues
+{
+public:
+    class MethodSet
+    {
+    private:
+        struct MethodName
+        {
+            MethodName* m_next;
+            int         m_methodNameStart;
+            int         m_methodNameLen;
+            int         m_classNameStart;
+            int         m_classNameLen;
+            int         m_numArgs;
+        };
+
+        const char* m_list;
+        MethodName* m_names;
+
+        MethodSet(const MethodSet& other) = delete;
+        MethodSet& operator=(const MethodSet& other) = delete;
+
+    public:
+        MethodSet()
+        {
+        }
+        inline const char* list() const
+        {
+            return m_list;
+        }
+
+        void initialize(const wchar_t* list, ICorJitHost* host);
+        void destroy(ICorJitHost* host);
+
+        inline bool isEmpty() const
+        {
+            return m_names == nullptr;
+        }
+        bool contains(const char* methodName, const char* className, CORINFO_SIG_INFO* sigInfo) const;
+    };
+
+private:
+#define CONFIG_INTEGER(name, key, defaultValue) int m_##name;
+#define CONFIG_STRING(name, key) const wchar_t* m_##name;
+#define CONFIG_METHODSET(name, key) MethodSet   m_##name;
+#include "jitconfigvalues.h"
+
+public:
+#define CONFIG_INTEGER(name, key, defaultValue)                                                                        \
+    inline int name() const                                                                                            \
+    {                                                                                                                  \
+        return m_##name;                                                                                               \
+    }
+#define CONFIG_STRING(name, key)                                                                                       \
+    inline const wchar_t* name() const                                                                                 \
+    {                                                                                                                  \
+        return m_##name;                                                                                               \
+    }
+#define CONFIG_METHODSET(name, key)                                                                                    \
+    inline const MethodSet& name() const                                                                               \
+    {                                                                                                                  \
+        return m_##name;                                                                                               \
+    }
+#include "jitconfigvalues.h"
+
+private:
+    bool m_isInitialized;
+
+    JitConfigValues(const JitConfigValues& other) = delete;
+    JitConfigValues& operator=(const JitConfigValues& other) = delete;
+
+public:
+    JitConfigValues()
+    {
+    }
+
+    inline bool isInitialized() const
+    {
+        return m_isInitialized != 0;
+    }
+    void initialize(ICorJitHost* host);
+    void destroy(ICorJitHost* host);
+};
+
+extern JitConfigValues JitConfig;
+
+#endif
diff --git a/src/jit/jitconfigvalues.h b/src/jit/jitconfigvalues.h
new file mode 100644
index 0000000000..6579817249
--- /dev/null
+++ b/src/jit/jitconfigvalues.h
@@ -0,0 +1,255 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#if !defined(CONFIG_INTEGER) || !defined(CONFIG_STRING) || !defined(CONFIG_METHODSET)
+#error CONFIG_INTEGER, CONFIG_STRING, and CONFIG_METHODSET must be defined before including this file.
+#endif // !defined(CONFIG_INTEGER) || !defined(CONFIG_STRING) || !defined(CONFIG_METHODSET)
+
+#if defined(DEBUG)
+CONFIG_INTEGER(AltJitLimit, W("AltJitLimit"), 0)               // Max number of functions to use altjit for (decimal)
+CONFIG_INTEGER(AltJitSkipOnAssert, W("AltJitSkipOnAssert"), 0) // If AltJit hits an assert, fall back to the fallback
+                                                               // JIT. Useful in conjunction with
+                                                               // COMPlus_ContinueOnAssert=1
+CONFIG_INTEGER(BreakOnDumpToken, W("BreakOnDumpToken"), 0xffffffff) // Breaks when using internal logging on a
+                                                                    // particular token value.
+CONFIG_INTEGER(DebugBreakOnVerificationFailure, W("DebugBreakOnVerificationFailure"), 0) // Halts the jit on
+                                                                                         // verification failure
+CONFIG_INTEGER(DiffableDasm, W("JitDiffableDasm"), 0)            // Make the disassembly diff-able
+CONFIG_INTEGER(DisplayLoopHoistStats, W("JitLoopHoistStats"), 0) // Display JIT loop hoisting statistics
+CONFIG_INTEGER(DisplayMemStats, W("JitMemStats"), 0)             // Display JIT memory usage statistics
+CONFIG_INTEGER(DumpJittedMethods, W("DumpJittedMethods"), 0)     // Prints all jitted methods to the console
+CONFIG_INTEGER(EnablePCRelAddr, W("JitEnablePCRelAddr"), 1)      // Whether absolute addr be encoded as PC-rel offset by
+                                                                 // RyuJIT where possible
+CONFIG_INTEGER(InterpreterFallback, W("InterpreterFallback"), 0) // Fallback to the interpreter when the JIT compiler
+                                                                 // fails
+CONFIG_INTEGER(JitAssertOnMaxRAPasses, W("JitAssertOnMaxRAPasses"), 0)
+CONFIG_INTEGER(JitBreakEmitOutputInstr, W("JitBreakEmitOutputInstr"), -1)
+CONFIG_INTEGER(JitBreakMorphTree, W("JitBreakMorphTree"), 0xffffffff)
+CONFIG_INTEGER(JitBreakOnBadCode, W("JitBreakOnBadCode"), 0)
+CONFIG_INTEGER(JitBreakOnMinOpts, W("JITBreakOnMinOpts"), 0) // Halt if jit switches to MinOpts
+CONFIG_INTEGER(JitBreakOnUnsafeCode, W("JitBreakOnUnsafeCode"), 0)
+CONFIG_INTEGER(JitCanUseSSE2, W("JitCanUseSSE2"), -1)
+CONFIG_INTEGER(JitCloneLoops, W("JitCloneLoops"), 1) // If 0, don't clone. Otherwise clone loops for optimizations.
+CONFIG_INTEGER(JitDebugLogLoopCloning, W("JitDebugLogLoopCloning"), 0) // In debug builds log places where loop cloning
+                                                                       // optimizations are performed on the fast path.
+CONFIG_INTEGER(JitDefaultFill, W("JitDefaultFill"), 0xff) // In debug builds, initialize the memory allocated by the nra
+                                                          // with this byte.
+CONFIG_INTEGER(JitDirectAlloc, W("JitDirectAlloc"), 0)
+CONFIG_INTEGER(JitDoAssertionProp, W("JitDoAssertionProp"), 1) // Perform assertion propagation optimization
+CONFIG_INTEGER(JitDoCopyProp, W("JitDoCopyProp"), 1)   // Perform copy propagation on variables that appear redundant
+CONFIG_INTEGER(JitDoEarlyProp, W("JitDoEarlyProp"), 1) // Perform Early Value Propagataion
+CONFIG_INTEGER(JitDoLoopHoisting, W("JitDoLoopHoisting"), 1)   // Perform loop hoisting on loop invariant values
+CONFIG_INTEGER(JitDoRangeAnalysis, W("JitDoRangeAnalysis"), 1) // Perform range check analysis
+CONFIG_INTEGER(JitDoSsa, W("JitDoSsa"), 1) // Perform Static Single Assignment (SSA) numbering on the variables
+CONFIG_INTEGER(JitDoValueNumber, W("JitDoValueNumber"), 1) // Perform value numbering on method expressions
+CONFIG_INTEGER(JitDoubleAlign, W("JitDoubleAlign"), 1)
+CONFIG_INTEGER(JitDumpASCII, W("JitDumpASCII"), 1)         // Uses only ASCII characters in tree dumps
+CONFIG_INTEGER(JitDumpFgDot, W("JitDumpFgDot"), 0)         // Set to non-zero to emit Dot instead of Xml Flowgraph dump
+CONFIG_INTEGER(JitDumpTerseLsra, W("JitDumpTerseLsra"), 1) // Produce terse dump output for LSRA
+CONFIG_INTEGER(JitDumpToDebugger, W("JitDumpToDebugger"), 0)     // Output JitDump output to the debugger
+CONFIG_INTEGER(JitDumpVerboseSsa, W("JitDumpVerboseSsa"), 0)     // Produce especially verbose dump output for SSA
+CONFIG_INTEGER(JitDumpVerboseTrees, W("JitDumpVerboseTrees"), 0) // Enable more verbose tree dumps
+CONFIG_INTEGER(JitEmitPrintRefRegs, W("JitEmitPrintRefRegs"), 0)
+CONFIG_INTEGER(JitExpensiveDebugCheckLevel, W("JitExpensiveDebugCheckLevel"), 0) // Level indicates how much checking
+                                                                                 // beyond the default to do in debug
+                                                                                 // builds (currently 1-2)
+CONFIG_INTEGER(JitForceFallback, W("JitForceFallback"), 0) // Set to non-zero to test NOWAY assert by forcing a retry
+CONFIG_INTEGER(JitForceVer, W("JitForceVer"), 0)
+CONFIG_INTEGER(JitFullyInt, W("JitFullyInt"), 0)           // Forces Fully interruptable code
+CONFIG_INTEGER(JitFunctionTrace, W("JitFunctionTrace"), 0) // If non-zero, print JIT start/end logging
+CONFIG_INTEGER(JitGCChecks, W("JitGCChecks"), 0)
+CONFIG_INTEGER(JitGCInfoLogging, W("JitGCInfoLogging"), 0) // If true, prints GCInfo-related output to standard output.
+CONFIG_INTEGER(JitHashBreak, W("JitHashBreak"), -1)        // Same as JitBreak, but for a method hash
+CONFIG_INTEGER(JitHashDump, W("JitHashDump"), -1)          // Same as JitDump, but for a method hash
+CONFIG_INTEGER(JitHashDumpIR, W("JitHashDumpIR"), -1)      // Same as JitDumpIR, but for a method hash
+CONFIG_INTEGER(JitHashHalt, W("JitHashHalt"), -1)          // Same as JitHalt, but for a method hash
+CONFIG_INTEGER(JitInlineAdditionalMultiplier, W("JitInlineAdditionalMultiplier"), 0)
+CONFIG_INTEGER(JitInlinePrintStats, W("JitInlinePrintStats"), 0)
+CONFIG_INTEGER(JitInlineSize, W("JITInlineSize"), DEFAULT_MAX_INLINE_SIZE)
+CONFIG_INTEGER(JitInlineDepth, W("JITInlineDepth"), DEFAULT_MAX_INLINE_DEPTH)
+CONFIG_INTEGER(JitLongAddress, W("JitLongAddress"), 0) // Force using the large pseudo instruction form for long address
+CONFIG_INTEGER(JitMaxTempAssert, W("JITMaxTempAssert"), 1)
+CONFIG_INTEGER(JitMaxUncheckedOffset, W("JitMaxUncheckedOffset"), 8)
+CONFIG_INTEGER(JitMinOpts, W("JITMinOpts"), 0)                                       // Forces MinOpts
+CONFIG_INTEGER(JitMinOptsBbCount, W("JITMinOptsBbCount"), DEFAULT_MIN_OPTS_BB_COUNT) // Internal jit control of MinOpts
+CONFIG_INTEGER(JitMinOptsCodeSize, W("JITMinOptsCodeSize"), DEFAULT_MIN_OPTS_CODE_SIZE)       // Internal jit control of
+                                                                                              // MinOpts
+CONFIG_INTEGER(JitMinOptsInstrCount, W("JITMinOptsInstrCount"), DEFAULT_MIN_OPTS_INSTR_COUNT) // Internal jit control of
+                                                                                              // MinOpts
+CONFIG_INTEGER(JitMinOptsLvNumCount, W("JITMinOptsLvNumcount"), DEFAULT_MIN_OPTS_LV_NUM_COUNT) // Internal jit control
+                                                                                               // of MinOpts
+CONFIG_INTEGER(JitMinOptsLvRefCount, W("JITMinOptsLvRefcount"), DEFAULT_MIN_OPTS_LV_REF_COUNT) // Internal jit control
+                                                                                               // of MinOpts
+CONFIG_INTEGER(JitNoCMOV, W("JitNoCMOV"), 0)
+CONFIG_INTEGER(JitNoCSE, W("JitNoCSE"), 0)
+CONFIG_INTEGER(JitNoCSE2, W("JitNoCSE2"), 0)
+CONFIG_INTEGER(JitNoForceFallback, W("JitNoForceFallback"), 0) // Set to non-zero to prevent NOWAY assert testing.
+                                                               // Overrides COMPlus_JitForceFallback and JIT stress
+                                                               // flags.
+CONFIG_INTEGER(JitNoHoist, W("JitNoHoist"), 0)
+CONFIG_INTEGER(JitNoInline, W("JitNoInline"), 0)                 // Disables inlining of all methods
+CONFIG_INTEGER(JitNoMemoryBarriers, W("JitNoMemoryBarriers"), 0) // If 1, don't generate memory barriers
+CONFIG_INTEGER(JitNoRegLoc, W("JitNoRegLoc"), 0)
+CONFIG_INTEGER(JitNoStructPromotion, W("JitNoStructPromotion"), 0) // Disables struct promotion in Jit32
+CONFIG_INTEGER(JitNoUnroll, W("JitNoUnroll"), 0)
+CONFIG_INTEGER(JitOrder, W("JitOrder"), 0)
+CONFIG_INTEGER(JitPInvokeCheckEnabled, W("JITPInvokeCheckEnabled"), 0)
+CONFIG_INTEGER(JitPInvokeEnabled, W("JITPInvokeEnabled"), 1)
+CONFIG_INTEGER(JitPrintInlinedMethods, W("JitPrintInlinedMethods"), 0)
+CONFIG_INTEGER(JitRequired, W("JITRequired"), -1)
+CONFIG_INTEGER(JitRoundFloat, W("JITRoundFloat"), DEFAULT_ROUND_LEVEL)
+CONFIG_INTEGER(JitSkipArrayBoundCheck, W("JitSkipArrayBoundCheck"), 0)
+CONFIG_INTEGER(JitSlowDebugChecksEnabled, W("JitSlowDebugChecksEnabled"), 1) // Turn on slow debug checks
+CONFIG_INTEGER(JitSplitFunctionSize, W("JitSplitFunctionSize"), 0) // On ARM, use this as the maximum function/funclet
+                                                                   // size for creating function fragments (and creating
+                                                                   // multiple RUNTIME_FUNCTION entries)
+CONFIG_INTEGER(JitSsaStress, W("JitSsaStress"), 0) // Perturb order of processing of blocks in SSA; 0 = no stress; 1 =
+                                                   // use method hash; * = supplied value as random hash
+CONFIG_INTEGER(JitStackChecks, W("JitStackChecks"), 0)
+CONFIG_INTEGER(JitStress, W("JitStress"), 0) // Internal Jit stress mode: 0 = no stress, 2 = all stress, other = vary
+                                             // stress based on a hash of the method and this value
+CONFIG_INTEGER(JitStressBBProf, W("JitStressBBProf"), 0)               // Internal Jit stress mode
+CONFIG_INTEGER(JitStressBiasedCSE, W("JitStressBiasedCSE"), 0x101)     // Internal Jit stress mode: decimal bias value
+                                                                       // between (0,100) to perform CSE on a candidate.
+                                                                       // 100% = All CSEs. 0% = 0 CSE. (> 100) means no
+                                                                       // stress.
+CONFIG_INTEGER(JitStressFP, W("JitStressFP"), 0)                       // Internal Jit stress mode
+CONFIG_INTEGER(JitStressModeNamesOnly, W("JitStressModeNamesOnly"), 0) // Internal Jit stress: if nonzero, only enable
+                                                                       // stress modes listed in JitStressModeNames
+CONFIG_INTEGER(JitStressRegs, W("JitStressRegs"), 0)
+CONFIG_INTEGER(JitStrictCheckForNonVirtualCallToVirtualMethod, W("JitStrictCheckForNonVirtualCallToVirtualMethod"), 1)
+CONFIG_INTEGER(JitVNMapSelLimit, W("JitVNMapSelLimit"), 0) // If non-zero, assert if # of VNF_MapSelect applications
+                                                           // considered reaches this
+CONFIG_INTEGER(NgenHashDump, W("NgenHashDump"), -1)        // same as JitHashDump, but for ngen
+CONFIG_INTEGER(NgenHashDumpIR, W("NgenHashDumpIR"), -1)    // same as JitHashDumpIR, but for ngen
+CONFIG_INTEGER(NgenOrder, W("NgenOrder"), 0)
+CONFIG_INTEGER(RunAltJitCode, W("RunAltJitCode"), 1) // If non-zero, and the compilation succeeds for an AltJit, then
+                                                     // use the code. If zero, then we always throw away the generated
+                                                     // code and fall back to the default compiler.
+CONFIG_INTEGER(RunComponentUnitTests, W("JitComponentUnitTests"), 0) // Run JIT component unit tests
+CONFIG_INTEGER(ShouldInjectFault, W("InjectFault"), 0)
+CONFIG_INTEGER(StackProbesOverride, W("JitStackProbes"), 0)
+CONFIG_INTEGER(StressCOMCall, W("StressCOMCall"), 0)
+CONFIG_INTEGER(TailcallStress, W("TailcallStress"), 0)
+CONFIG_INTEGER(TreesBeforeAfterMorph, W("JitDumpBeforeAfterMorph"), 0) // If 1, display each tree before/after morphing
+CONFIG_METHODSET(JitBreak, W("JitBreak")) // Stops in the importer when compiling a specified method
+CONFIG_METHODSET(JitDebugBreak, W("JitDebugBreak"))
+CONFIG_METHODSET(JitDisasm, W("JitDisasm")) // Dumps disassembly for specified method
+CONFIG_METHODSET(JitDump, W("JitDump"))     // Dumps trees for specified method
+CONFIG_METHODSET(JitDumpIR, W("JitDumpIR")) // Dumps trees (in linear IR form) for specified method
+CONFIG_METHODSET(JitEHDump, W("JitEHDump")) // Dump the EH table for the method, as reported to the VM
+CONFIG_METHODSET(JitExclude, W("JitExclude"))
+CONFIG_METHODSET(JitForceProcedureSplitting, W("JitForceProcedureSplitting"))
+CONFIG_METHODSET(JitGCDump, W("JitGCDump"))
+CONFIG_METHODSET(JitHalt, W("JitHalt")) // Emits break instruction into jitted code
+CONFIG_METHODSET(JitImportBreak, W("JitImportBreak"))
+CONFIG_METHODSET(JitInclude, W("JitInclude"))
+CONFIG_METHODSET(JitLateDisasm, W("JitLateDisasm"))
+CONFIG_METHODSET(JitMinOptsName, W("JITMinOptsName"))                   // Forces MinOpts for a named function
+CONFIG_METHODSET(JitNoProcedureSplitting, W("JitNoProcedureSplitting")) // Disallow procedure splitting for specified
+                                                                        // methods
+CONFIG_METHODSET(JitNoProcedureSplittingEH, W("JitNoProcedureSplittingEH")) // Disallow procedure splitting for
+                                                                            // specified methods if they contain
+                                                                            // exception handling
+CONFIG_METHODSET(JitStressOnly, W("JitStressOnly")) // Internal Jit stress mode: stress only the specified method(s)
+CONFIG_METHODSET(JitUnwindDump, W("JitUnwindDump")) // Dump the unwind codes for the method
+CONFIG_METHODSET(NgenDisasm, W("NgenDisasm"))       // Same as JitDisasm, but for ngen
+CONFIG_METHODSET(NgenDump, W("NgenDump"))           // Same as JitDump, but for ngen
+CONFIG_METHODSET(NgenDumpIR, W("NgenDumpIR"))       // Same as JitDumpIR, but for ngen
+CONFIG_METHODSET(NgenEHDump, W("NgenEHDump"))       // Dump the EH table for the method, as reported to the VM
+CONFIG_METHODSET(NgenGCDump, W("NgenGCDump"))
+CONFIG_METHODSET(NgenUnwindDump, W("NgenUnwindDump")) // Dump the unwind codes for the method
+CONFIG_STRING(JitDumpFg, W("JitDumpFg"))              // Dumps Xml/Dot Flowgraph for specified method
+CONFIG_STRING(JitDumpFgDir, W("JitDumpFgDir"))        // Directory for Xml/Dot flowgraph dump(s)
+CONFIG_STRING(JitDumpFgFile, W("JitDumpFgFile"))      // Filename for Xml/Dot flowgraph dump(s)
+CONFIG_STRING(JitDumpFgPhase, W("JitDumpFgPhase")) // Phase-based Xml/Dot flowgraph support. Set to the short name of a
+                                                   // phase to see the flowgraph after that phase. Leave unset to dump
+                                                   // after COLD-BLK (determine first cold block) or set to * for all
+                                                   // phases
+CONFIG_STRING(JitDumpIRFormat, W("JitDumpIRFormat")) // Comma separated format control for JitDumpIR, values = {types |
+                                                     // locals | ssa | valnums | kinds | flags | nodes | nolists |
+                                                     // nostmts | noleafs | trees | dataflow}
+CONFIG_STRING(JitDumpIRPhase, W("JitDumpIRPhase"))   // Phase control for JitDumpIR, values = {* | phasename}
+CONFIG_STRING(JitLateDisasmTo, W("JITLateDisasmTo"))
+CONFIG_STRING(JitRange, W("JitRange"))
+CONFIG_STRING(JitStressModeNames, W("JitStressModeNames")) // Internal Jit stress mode: stress using the given set of
+                                                           // stress mode names, e.g. STRESS_REGS, STRESS_TAILCALL
+CONFIG_STRING(JitStressModeNamesNot, W("JitStressModeNamesNot")) // Internal Jit stress mode: do NOT stress using the
+                                                                 // given set of stress mode names, e.g. STRESS_REGS,
+                                                                 // STRESS_TAILCALL
+CONFIG_STRING(JitStressRange, W("JitStressRange"))               // Internal Jit stress mode
+CONFIG_STRING(NgenDumpFg, W("NgenDumpFg"))                       // Ngen Xml Flowgraph support
+CONFIG_STRING(NgenDumpFgDir, W("NgenDumpFgDir"))                 // Ngen Xml Flowgraph support
+CONFIG_STRING(NgenDumpFgFile, W("NgenDumpFgFile"))               // Ngen Xml Flowgraph support
+CONFIG_STRING(NgenDumpIRFormat, W("NgenDumpIRFormat"))           // Same as JitDumpIRFormat, but for ngen
+CONFIG_STRING(NgenDumpIRPhase, W("NgenDumpIRPhase"))             // Same as JitDumpIRPhase, but for ngen
+#endif                                                           // defined(DEBUG)
+
+// AltJitAssertOnNYI should be 0 on targets where JIT is under developement or bring up stage, so as to facilitate
+// fallback to main JIT on hitting a NYI.
+#if defined(_TARGET_ARM64_) || defined(_TARGET_X86_)
+CONFIG_INTEGER(AltJitAssertOnNYI, W("AltJitAssertOnNYI"), 0) // Controls the AltJit behavior of NYI stuff
+#else                                                        // !defined(_TARGET_ARM64_) && !defined(_TARGET_X86_)
+CONFIG_INTEGER(AltJitAssertOnNYI, W("AltJitAssertOnNYI"), 1) // Controls the AltJit behavior of NYI stuff
+#endif                                                       // defined(_TARGET_ARM64_) || defined(_TARGET_X86_)
+
+#if defined(_TARGET_AMD64_)
+CONFIG_INTEGER(EnableAVX, W("EnableAVX"), 1) // Enable AVX instruction set for wide operations as default
+#else                                        // !defined(_TARGET_AMD64_)
+CONFIG_INTEGER(EnableAVX, W("EnableAVX"), 0)                 // Enable AVX instruction set for wide operations as default
+#endif                                       // defined(_TARGET_AMD64_)
+
+#if !defined(DEBUG) && !defined(_DEBUG)
+CONFIG_INTEGER(JitEnableNoWayAssert, W("JitEnableNoWayAssert"), 0)
+#else  // defined(DEBUG) || defined(_DEBUG)
+CONFIG_INTEGER(JitEnableNoWayAssert, W("JitEnableNoWayAssert"), 1)
+#endif // !defined(DEBUG) && !defined(_DEBUG)
+
+CONFIG_INTEGER(JitAggressiveInlining, W("JitAggressiveInlining"), 0) // Aggressive inlining of all methods
+CONFIG_INTEGER(JitELTHookEnabled, W("JitELTHookEnabled"), 0) // On ARM, setting this will emit Enter/Leave/TailCall
+                                                             // callbacks
+CONFIG_INTEGER(JitInlineSIMDMultiplier, W("JitInlineSIMDMultiplier"), 3)
+
+#if defined(FEATURE_ENABLE_NO_RANGE_CHECKS)
+CONFIG_INTEGER(JitNoRngChks, W("JitNoRngChks"), 0) // If 1, don't generate range checks
+#endif                                             // defined(FEATURE_ENABLE_NO_RANGE_CHECKS)
+
+CONFIG_INTEGER(JitRegisterFP, W("JitRegisterFP"), 3)           // Control FP enregistration
+CONFIG_INTEGER(JitTelemetry, W("JitTelemetry"), 1)             // If non-zero, gather JIT telemetry data
+CONFIG_INTEGER(JitVNMapSelBudget, W("JitVNMapSelBudget"), 100) // Max # of MapSelect's considered for a particular
+                                                               // top-level invocation.
+CONFIG_INTEGER(TailCallLoopOpt, W("TailCallLoopOpt"), 1)       // Convert recursive tail calls to loops
+CONFIG_METHODSET(AltJit, W("AltJit")) // Enables AltJit and selectively limits it to the specified methods.
+CONFIG_METHODSET(AltJitNgen,
+                 W("AltJitNgen")) // Enables AltJit for NGEN and selectively limits it to the specified methods.
+
+#if defined(ALT_JIT)
+CONFIG_STRING(AltJitExcludeAssemblies,
+              W("AltJitExcludeAssemblies")) // Do not use AltJit on this semicolon-delimited list of assemblies.
+#endif                                      // defined(ALT_JIT)
+
+CONFIG_STRING(JitFuncInfoFile, W("JitFuncInfoLogFile")) // If set, gather JIT function info and write to this file.
+CONFIG_STRING(JitTimeLogCsv, W("JitTimeLogCsv")) // If set, gather JIT throughput data and write to a CSV file. This
+                                                 // mode must be used in internal retail builds.
+CONFIG_STRING(TailCallOpt, W("TailCallOpt"))
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+CONFIG_INTEGER(JitInlineDumpData, W("JitInlineDumpData"), 0)
+CONFIG_INTEGER(JitInlineDumpXml, W("JitInlineDumpXml"), 0) // 1 = full xml (all methods), 2 = minimal xml (only method
+                                                           // with inlines)
+CONFIG_INTEGER(JitInlineLimit, W("JitInlineLimit"), -1)
+CONFIG_INTEGER(JitInlinePolicyDiscretionary, W("JitInlinePolicyDiscretionary"), 0)
+CONFIG_INTEGER(JitInlinePolicyFull, W("JitInlinePolicyFull"), 0)
+CONFIG_INTEGER(JitInlinePolicySize, W("JitInlinePolicySize"), 0)
+CONFIG_INTEGER(JitInlinePolicyReplay, W("JitInlinePolicyReplay"), 0)
+CONFIG_STRING(JitNoInlineRange, W("JitNoInlineRange"))
+CONFIG_STRING(JitInlineReplayFile, W("JitInlineReplayFile"))
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+CONFIG_INTEGER(JitInlinePolicyLegacy, W("JitInlinePolicyLegacy"), 0)
+CONFIG_INTEGER(JitInlinePolicyModel, W("JitInlinePolicyModel"), 0)
+
+#undef CONFIG_INTEGER
+#undef CONFIG_STRING
+#undef CONFIG_METHODSET
diff --git a/src/jit/jiteh.cpp b/src/jit/jiteh.cpp
new file mode 100644
index 0000000000..b20c2f8a9a
--- /dev/null
+++ b/src/jit/jiteh.cpp
@@ -0,0 +1,4056 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                          Exception Handling                               XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                          "EHblkDsc" functions                             XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************/
+
+BasicBlock* EHblkDsc::BBFilterLast()
+{
+    noway_assert(HasFilter());
+    noway_assert(ebdFilter != nullptr);
+    noway_assert(ebdHndBeg != nullptr);
+
+    // The last block of the filter is the block immediately preceding the first block of the handler.
+    return ebdHndBeg->bbPrev;
+}
+
+BasicBlock* EHblkDsc::ExFlowBlock()
+{
+    if (HasFilter())
+    {
+        return ebdFilter;
+    }
+    else
+    {
+        return ebdHndBeg;
+    }
+}
+
+bool EHblkDsc::InTryRegionILRange(BasicBlock* pBlk)
+{
+    // BBF_INTERNAL blocks may not have a valid bbCodeOffs. This function
+    // should only be used before any BBF_INTERNAL blocks have been added.
+    assert(!(pBlk->bbFlags & BBF_INTERNAL));
+
+    return Compiler::jitIsBetween(pBlk->bbCodeOffs, ebdTryBegOffs(), ebdTryEndOffs());
+}
+
+bool EHblkDsc::InFilterRegionILRange(BasicBlock* pBlk)
+{
+    // BBF_INTERNAL blocks may not have a valid bbCodeOffs. This function
+    // should only be used before any BBF_INTERNAL blocks have been added.
+    assert(!(pBlk->bbFlags & BBF_INTERNAL));
+
+    return HasFilter() && Compiler::jitIsBetween(pBlk->bbCodeOffs, ebdFilterBegOffs(), ebdFilterEndOffs());
+}
+
+bool EHblkDsc::InHndRegionILRange(BasicBlock* pBlk)
+{
+    // BBF_INTERNAL blocks may not have a valid bbCodeOffs. This function
+    // should only be used before any BBF_INTERNAL blocks have been added.
+    assert(!(pBlk->bbFlags & BBF_INTERNAL));
+
+    return Compiler::jitIsBetween(pBlk->bbCodeOffs, ebdHndBegOffs(), ebdHndEndOffs());
+}
+
+// HasCatchHandler: returns 'true' for either try/catch, or try/filter/filter-handler.
+bool EHblkDsc::HasCatchHandler()
+{
+    return (ebdHandlerType == EH_HANDLER_CATCH) || (ebdHandlerType == EH_HANDLER_FILTER);
+}
+
+bool EHblkDsc::HasFilter()
+{
+    return ebdHandlerType == EH_HANDLER_FILTER;
+}
+
+bool EHblkDsc::HasFinallyHandler()
+{
+    return ebdHandlerType == EH_HANDLER_FINALLY;
+}
+
+bool EHblkDsc::HasFaultHandler()
+{
+    return ebdHandlerType == EH_HANDLER_FAULT;
+}
+
+bool EHblkDsc::HasFinallyOrFaultHandler()
+{
+    return HasFinallyHandler() || HasFaultHandler();
+}
+
+/*****************************************************************************
+ * Returns true if pBlk is a block in the range [pStart..pEnd).
+ * The check is inclusive of pStart, exclusive of pEnd.
+ */
+
+bool EHblkDsc::InBBRange(BasicBlock* pBlk, BasicBlock* pStart, BasicBlock* pEnd)
+{
+    for (BasicBlock* pWalk = pStart; pWalk != pEnd; pWalk = pWalk->bbNext)
+    {
+        if (pWalk == pBlk)
+        {
+            return true;
+        }
+    }
+    return false;
+}
+
+bool EHblkDsc::InTryRegionBBRange(BasicBlock* pBlk)
+{
+    return InBBRange(pBlk, ebdTryBeg, ebdTryLast->bbNext);
+}
+
+bool EHblkDsc::InFilterRegionBBRange(BasicBlock* pBlk)
+{
+    return HasFilter() && InBBRange(pBlk, ebdFilter, ebdHndBeg);
+}
+
+bool EHblkDsc::InHndRegionBBRange(BasicBlock* pBlk)
+{
+    return InBBRange(pBlk, ebdHndBeg, ebdHndLast->bbNext);
+}
+
+unsigned EHblkDsc::ebdGetEnclosingRegionIndex(bool* inTryRegion)
+{
+    if ((ebdEnclosingTryIndex == NO_ENCLOSING_INDEX) && (ebdEnclosingHndIndex == NO_ENCLOSING_INDEX))
+    {
+        return NO_ENCLOSING_INDEX;
+    }
+    else if (ebdEnclosingTryIndex == NO_ENCLOSING_INDEX)
+    {
+        assert(ebdEnclosingHndIndex != NO_ENCLOSING_INDEX);
+        *inTryRegion = false;
+        return ebdEnclosingHndIndex;
+    }
+    else if (ebdEnclosingHndIndex == NO_ENCLOSING_INDEX)
+    {
+        assert(ebdEnclosingTryIndex != NO_ENCLOSING_INDEX);
+        *inTryRegion = true;
+        return ebdEnclosingTryIndex;
+    }
+    else
+    {
+        assert(ebdEnclosingTryIndex != NO_ENCLOSING_INDEX);
+        assert(ebdEnclosingHndIndex != NO_ENCLOSING_INDEX);
+        assert(ebdEnclosingTryIndex != ebdEnclosingHndIndex);
+        if (ebdEnclosingTryIndex < ebdEnclosingHndIndex)
+        {
+            *inTryRegion = true;
+            return ebdEnclosingTryIndex;
+        }
+        else
+        {
+            *inTryRegion = false;
+            return ebdEnclosingHndIndex;
+        }
+    }
+}
+
+/*****************************************************************************/
+
+// We used to assert that the IL offsets in the EH table matched the IL offset stored
+// on the blocks pointed to by the try/filter/handler block pointers. This is true at
+// import time, but can fail to be true later in compilation when we start doing
+// flow optimizations.
+//
+// That being said, the IL offsets in the EH table should only be examined early,
+// during importing. After importing, use block info instead.
+
+IL_OFFSET EHblkDsc::ebdTryBegOffs()
+{
+    return ebdTryBegOffset;
+}
+
+IL_OFFSET EHblkDsc::ebdTryEndOffs()
+{
+    return ebdTryEndOffset;
+}
+
+IL_OFFSET EHblkDsc::ebdHndBegOffs()
+{
+    return ebdHndBegOffset;
+}
+
+IL_OFFSET EHblkDsc::ebdHndEndOffs()
+{
+    return ebdHndEndOffset;
+}
+
+IL_OFFSET EHblkDsc::ebdFilterBegOffs()
+{
+    assert(HasFilter());
+    return ebdFilterBegOffset;
+}
+
+IL_OFFSET EHblkDsc::ebdFilterEndOffs()
+{
+    assert(HasFilter());
+    return ebdHndBegOffs(); // end of filter is beginning of handler
+}
+
+/* static */
+bool EHblkDsc::ebdIsSameILTry(EHblkDsc* h1, EHblkDsc* h2)
+{
+    return ((h1->ebdTryBegOffset == h2->ebdTryBegOffset) && (h1->ebdTryEndOffset == h2->ebdTryEndOffset));
+}
+
+/*****************************************************************************/
+
+/* static */
+bool EHblkDsc::ebdIsSameTry(EHblkDsc* h1, EHblkDsc* h2)
+{
+    return ((h1->ebdTryBeg == h2->ebdTryBeg) && (h1->ebdTryLast == h2->ebdTryLast));
+}
+
+bool EHblkDsc::ebdIsSameTry(Compiler* comp, unsigned t2)
+{
+    EHblkDsc* h2 = comp->ehGetDsc(t2);
+    return ebdIsSameTry(this, h2);
+}
+
+bool EHblkDsc::ebdIsSameTry(BasicBlock* ebdTryBeg, BasicBlock* ebdTryLast)
+{
+    return ((this->ebdTryBeg == ebdTryBeg) && (this->ebdTryLast == ebdTryLast));
+}
+
+/*****************************************************************************/
+#ifdef DEBUG
+/*****************************************************************************/
+
+void EHblkDsc::DispEntry(unsigned XTnum)
+{
+    printf(" %2u  ::", XTnum);
+
+#if !FEATURE_EH_FUNCLETS
+    printf("  %2u  ", XTnum, ebdHandlerNestingLevel);
+#endif // !FEATURE_EH_FUNCLETS
+
+    if (ebdEnclosingTryIndex == NO_ENCLOSING_INDEX)
+    {
+        printf("      ");
+    }
+    else
+    {
+        printf("  %2u  ", ebdEnclosingTryIndex);
+    }
+
+    if (ebdEnclosingHndIndex == NO_ENCLOSING_INDEX)
+    {
+        printf("      ");
+    }
+    else
+    {
+        printf("  %2u  ", ebdEnclosingHndIndex);
+    }
+
+    //////////////
+    ////////////// Protected (try) region
+    //////////////
+
+    printf("- Try at BB%02u..BB%02u", ebdTryBeg->bbNum, ebdTryLast->bbNum);
+
+    /* ( brace matching editor workaround to compensate for the following line */
+    printf(" [%03X..%03X), ", ebdTryBegOffset, ebdTryEndOffset);
+
+    //////////////
+    ////////////// Filter region
+    //////////////
+
+    if (HasFilter())
+    {
+        /* ( brace matching editor workaround to compensate for the following line */
+        printf("Filter at BB%02u..BB%02u [%03X..%03X), ", ebdFilter->bbNum, BBFilterLast()->bbNum, ebdFilterBegOffset,
+               ebdHndBegOffset);
+    }
+
+    //////////////
+    ////////////// Handler region
+    //////////////
+
+    if (ebdHndBeg->bbCatchTyp == BBCT_FINALLY)
+    {
+        printf("Finally");
+    }
+    else if (ebdHndBeg->bbCatchTyp == BBCT_FAULT)
+    {
+        printf("Fault  ");
+    }
+    else
+    {
+        printf("Handler");
+    }
+
+    printf(" at BB%02u..BB%02u", ebdHndBeg->bbNum, ebdHndLast->bbNum);
+
+    /* ( brace matching editor workaround to compensate for the following line */
+    printf(" [%03X..%03X)", ebdHndBegOffset, ebdHndEndOffset);
+
+    printf("\n");
+}
+
+/*****************************************************************************/
+#endif // DEBUG
+/*****************************************************************************/
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                          "Compiler" functions                             XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+bool Compiler::bbInCatchHandlerILRange(BasicBlock* blk)
+{
+    EHblkDsc* HBtab = ehGetBlockHndDsc(blk);
+
+    if (HBtab == nullptr)
+    {
+        return false;
+    }
+
+    return HBtab->HasCatchHandler() && HBtab->InHndRegionILRange(blk);
+}
+
+bool Compiler::bbInFilterILRange(BasicBlock* blk)
+{
+    EHblkDsc* HBtab = ehGetBlockHndDsc(blk);
+
+    if (HBtab == nullptr)
+    {
+        return false;
+    }
+
+    return HBtab->InFilterRegionILRange(blk);
+}
+
+// Given a handler region, find the innermost try region that contains it.
+// NOTE: handlerIndex is 1-based (0 means no handler).
+unsigned short Compiler::bbFindInnermostTryRegionContainingHandlerRegion(unsigned handlerIndex)
+{
+    if (handlerIndex > 0)
+    {
+        unsigned    XTnum;
+        EHblkDsc*   ehDsc;
+        BasicBlock* blk = ehGetDsc(handlerIndex - 1)->ebdHndBeg;
+
+        // handlerIndex is 1 based, therefore our interesting clauses start from clause compHndBBtab[handlerIndex]
+        EHblkDsc* ehDscEnd = compHndBBtab + compHndBBtabCount;
+        for (ehDsc = compHndBBtab + handlerIndex, XTnum = handlerIndex; ehDsc < ehDscEnd; ehDsc++, XTnum++)
+        {
+            if (bbInTryRegions(XTnum, blk))
+            {
+                noway_assert(XTnum < MAX_XCPTN_INDEX);
+                return (unsigned short)(XTnum + 1); // Return the tryIndex
+            }
+        }
+    }
+
+    return 0;
+}
+
+// Given a try region, find the innermost handler region that contains it.
+// NOTE: tryIndex is 1-based (0 means no handler).
+unsigned short Compiler::bbFindInnermostHandlerRegionContainingTryRegion(unsigned tryIndex)
+{
+    if (tryIndex > 0)
+    {
+        unsigned    XTnum;
+        EHblkDsc*   ehDsc;
+        BasicBlock* blk = ehGetDsc(tryIndex - 1)->ebdTryBeg;
+
+        // tryIndex is 1 based, our interesting clauses start from clause compHndBBtab[tryIndex]
+        EHblkDsc* ehDscEnd = compHndBBtab + compHndBBtabCount;
+        for (ehDsc = compHndBBtab + tryIndex, XTnum = tryIndex; ehDsc < ehDscEnd; ehDsc++, XTnum++)
+        {
+            if (bbInHandlerRegions(XTnum, blk))
+            {
+                noway_assert(XTnum < MAX_XCPTN_INDEX);
+                return (unsigned short)(XTnum + 1); // Return the handlerIndex
+            }
+        }
+    }
+
+    return 0;
+}
+
+/*
+   Given a block and a try region index, check to see if the block is within
+   the try body. For this check, a funclet is considered to be in the region
+   it was extracted from.
+*/
+bool Compiler::bbInTryRegions(unsigned regionIndex, BasicBlock* blk)
+{
+    assert(regionIndex < EHblkDsc::NO_ENCLOSING_INDEX);
+    unsigned tryIndex = blk->hasTryIndex() ? blk->getTryIndex() : EHblkDsc::NO_ENCLOSING_INDEX;
+
+    // Loop outward until we find an enclosing try that is the same as the one
+    // we are looking for or an outer/later one
+    while (tryIndex < regionIndex)
+    {
+        tryIndex = ehGetEnclosingTryIndex(tryIndex);
+    }
+
+    // Now we have the index of 2 try bodies, either they match or not!
+    return (tryIndex == regionIndex);
+}
+
+//------------------------------------------------------------------------
+// bbInExnFlowRegions:
+//     Check to see if an exception raised in the given block could be
+//     handled by the given region (possibly after inner regions).
+//
+// Arguments:
+//    regionIndex - Check if this region can handle exceptions from 'blk'
+//    blk - Consider exceptions raised from this block
+//
+// Return Value:
+//    true - The region with index 'regionIndex' can handle exceptions from 'blk'
+//    false - The region with index 'regionIndex' can't handle exceptions from 'blk'
+//
+// Notes:
+//    For this check, a funclet is considered to be in the region it was
+//    extracted from.
+
+bool Compiler::bbInExnFlowRegions(unsigned regionIndex, BasicBlock* blk)
+{
+    assert(regionIndex < EHblkDsc::NO_ENCLOSING_INDEX);
+    EHblkDsc* ExnFlowRegion = ehGetBlockExnFlowDsc(blk);
+    unsigned  tryIndex      = (ExnFlowRegion == nullptr ? EHblkDsc::NO_ENCLOSING_INDEX : ehGetIndex(ExnFlowRegion));
+
+    // Loop outward until we find an enclosing try that is the same as the one
+    // we are looking for or an outer/later one
+    while (tryIndex < regionIndex)
+    {
+        tryIndex = ehGetEnclosingTryIndex(tryIndex);
+    }
+
+    // Now we have the index of 2 try bodies, either they match or not!
+    return (tryIndex == regionIndex);
+}
+
+/*
+   Given a block, check to see if it is in the handler block of the EH descriptor.
+   For this check, a funclet is considered to be in the region it was extracted from.
+*/
+bool Compiler::bbInHandlerRegions(unsigned regionIndex, BasicBlock* blk)
+{
+    assert(regionIndex < EHblkDsc::NO_ENCLOSING_INDEX);
+    unsigned hndIndex = blk->hasHndIndex() ? blk->getHndIndex() : EHblkDsc::NO_ENCLOSING_INDEX;
+
+    // We can't use the same simple trick here because there is no required ordering
+    // of handlers (which also have no required ordering with respect to their try
+    // bodies).
+    while (hndIndex < EHblkDsc::NO_ENCLOSING_INDEX && hndIndex != regionIndex)
+    {
+        hndIndex = ehGetEnclosingHndIndex(hndIndex);
+    }
+
+    // Now we have the index of 2 try bodies, either they match or not!
+    return (hndIndex == regionIndex);
+}
+
+/*
+   Given a hndBlk, see if it is in one of tryBlk's catch handler regions.
+
+   Since we create one EHblkDsc for each "catch" of a "try", we might end up
+   with multiple EHblkDsc's that have the same ebdTryBeg and ebdTryLast, but different
+   ebdHndBeg and ebdHndLast. Unfortunately getTryIndex() only returns the index of the first EHblkDsc.
+
+   E.g. The following example shows that BB02 has a catch in BB03 and another catch in BB04.
+
+       index  nest, enclosing
+         0  ::   0,    1 - Try at BB01..BB02 [000..008], Handler at BB03       [009..016]
+         1  ::   0,      - Try at BB01..BB02 [000..008], Handler at BB04       [017..022]
+
+   This function will return true for
+       bbInCatchHandlerRegions(BB02, BB03) and bbInCatchHandlerRegions(BB02, BB04)
+
+*/
+bool Compiler::bbInCatchHandlerRegions(BasicBlock* tryBlk, BasicBlock* hndBlk)
+{
+    assert(tryBlk->hasTryIndex());
+    if (!hndBlk->hasHndIndex())
+    {
+        return false;
+    }
+
+    unsigned  XTnum         = tryBlk->getTryIndex();
+    EHblkDsc* firstEHblkDsc = ehGetDsc(XTnum);
+    EHblkDsc* ehDsc         = firstEHblkDsc;
+
+    // Rather than searching the whole list, take advantage of our sorting.
+    // We will only match against blocks with the same try body (mutually
+    // protect regions).  Because of our sort ordering, such regions will
+    // always be immediately adjacent, any nested regions will be before the
+    // first of the set, and any outer regions will be after the last.
+    // Also siblings will be before or after according to their location,
+    // but never in between;
+
+    while (XTnum > 0)
+    {
+        assert(EHblkDsc::ebdIsSameTry(firstEHblkDsc, ehDsc));
+
+        // Stop when the previous region is not mutually protect
+        if (!EHblkDsc::ebdIsSameTry(firstEHblkDsc, ehDsc - 1))
+        {
+            break;
+        }
+
+        ehDsc--;
+        XTnum--;
+    }
+
+    // XTnum and ehDsc are now referring to the first region in the set of
+    // mutually protect regions.
+    assert(EHblkDsc::ebdIsSameTry(firstEHblkDsc, ehDsc));
+    assert((ehDsc == compHndBBtab) || !EHblkDsc::ebdIsSameTry(firstEHblkDsc, ehDsc - 1));
+
+    do
+    {
+        if (ehDsc->HasCatchHandler() && bbInHandlerRegions(XTnum, hndBlk))
+        {
+            return true;
+        }
+        XTnum++;
+        ehDsc++;
+    } while (XTnum < compHndBBtabCount && EHblkDsc::ebdIsSameTry(firstEHblkDsc, ehDsc));
+
+    return false;
+}
+
+/******************************************************************************************
+ * Give two blocks, return the inner-most enclosing try region that contains both of them.
+ * Return 0 if it does not find any try region (which means the inner-most region
+ * is the method itself).
+ */
+
+unsigned short Compiler::bbFindInnermostCommonTryRegion(BasicBlock* bbOne, BasicBlock* bbTwo)
+{
+    unsigned XTnum;
+
+    for (XTnum = 0; XTnum < compHndBBtabCount; XTnum++)
+    {
+        if (bbInTryRegions(XTnum, bbOne) && bbInTryRegions(XTnum, bbTwo))
+        {
+            noway_assert(XTnum < MAX_XCPTN_INDEX);
+            return (unsigned short)(XTnum + 1); // Return the tryIndex
+        }
+    }
+
+    return 0;
+}
+
+// bbIsTryBeg() returns true if this block is the start of any try region.
+//              This is computed by examining the current values in the
+//              EH table rather than just looking at the block->bbFlags.
+//
+// Note that a block is the beginning of any try region if it is the beginning of the
+// most nested try region it is a member of. Thus, we only need to check the EH
+// table entry related to the try index stored on the block.
+//
+bool Compiler::bbIsTryBeg(BasicBlock* block)
+{
+    EHblkDsc* ehDsc = ehGetBlockTryDsc(block);
+    return (ehDsc != nullptr) && (block == ehDsc->ebdTryBeg);
+}
+
+// bbIsHanderBeg() returns true if "block" is the start of any handler or filter.
+// Note that if a block is the beginning of a handler or filter, it must be the beginning
+// of the most nested handler or filter region it is in. Thus, we only need to look at the EH
+// descriptor corresponding to the handler index on the block.
+//
+bool Compiler::bbIsHandlerBeg(BasicBlock* block)
+{
+    EHblkDsc* ehDsc = ehGetBlockHndDsc(block);
+    return (ehDsc != nullptr) && ((block == ehDsc->ebdHndBeg) || (ehDsc->HasFilter() && (block == ehDsc->ebdFilter)));
+}
+
+bool Compiler::bbIsExFlowBlock(BasicBlock* block, unsigned* regionIndex)
+{
+    if (block->hasHndIndex())
+    {
+        *regionIndex = block->getHndIndex();
+        return block == ehGetDsc(*regionIndex)->ExFlowBlock();
+    }
+    else
+    {
+        return false;
+    }
+}
+
+bool Compiler::ehHasCallableHandlers()
+{
+#if FEATURE_EH_FUNCLETS
+
+    // Any EH in the function?
+
+    return compHndBBtabCount > 0;
+
+#else // FEATURE_EH_FUNCLETS
+
+    return ehNeedsShadowSPslots();
+
+#endif // FEATURE_EH_FUNCLETS
+}
+
+/******************************************************************************************
+ * Determine if 'block' is the last block of an EH 'try' or handler (ignoring filters). If so,
+ * return the EH descriptor pointer for that EH region. Otherwise, return nullptr.
+ */
+EHblkDsc* Compiler::ehIsBlockTryLast(BasicBlock* block)
+{
+    EHblkDsc* HBtab = ehGetBlockTryDsc(block);
+    if ((HBtab != nullptr) && (HBtab->ebdTryLast == block))
+    {
+        return HBtab;
+    }
+    return nullptr;
+}
+
+EHblkDsc* Compiler::ehIsBlockHndLast(BasicBlock* block)
+{
+    EHblkDsc* HBtab = ehGetBlockHndDsc(block);
+    if ((HBtab != nullptr) && (HBtab->ebdHndLast == block))
+    {
+        return HBtab;
+    }
+    return nullptr;
+}
+
+bool Compiler::ehIsBlockEHLast(BasicBlock* block)
+{
+    return (ehIsBlockTryLast(block) != nullptr) || (ehIsBlockHndLast(block) != nullptr);
+}
+
+//------------------------------------------------------------------------
+// ehGetBlockExnFlowDsc:
+//     Get the EH descriptor for the most nested region (if any) that may
+//     handle exceptions raised in the given block
+//
+// Arguments:
+//    block - Consider exceptions raised from this block
+//
+// Return Value:
+//    nullptr - The given block's exceptions propagate to caller
+//    non-null - This region is the innermost handler for exceptions raised in
+//               the given block
+
+EHblkDsc* Compiler::ehGetBlockExnFlowDsc(BasicBlock* block)
+{
+    EHblkDsc* hndDesc = ehGetBlockHndDsc(block);
+
+    if ((hndDesc != nullptr) && hndDesc->InFilterRegionBBRange(block))
+    {
+        // If an exception is thrown in a filter (or escapes a callee in a filter),
+        // or if exception_continue_search (0/false) is returned at
+        // the end of a filter, the (original) exception is propagated to
+        // the next outer handler.  The "next outer handler" is the handler
+        // of the try region enclosing the try that the filter protects.
+        // This may not be the same as the try region enclosing the filter,
+        // e.g. in cases like this:
+        //    try {
+        //      ...
+        //    } filter (filter-part) {
+        //      handler-part
+        //    } catch {  (or finally/fault/filter)
+        // which is represented as two EHblkDscs with the same try range,
+        // the inner protected by a filter and the outer protected by the
+        // other handler; exceptions in the filter-part propagate to the
+        // other handler, even though the other handler's try region does not
+        // enclose the filter.
+
+        unsigned outerIndex = hndDesc->ebdEnclosingTryIndex;
+
+        if (outerIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+        {
+            assert(!block->hasTryIndex());
+            return nullptr;
+        }
+        return ehGetDsc(outerIndex);
+    }
+
+    return ehGetBlockTryDsc(block);
+}
+
+bool Compiler::ehBlockHasExnFlowDsc(BasicBlock* block)
+{
+    if (block->hasTryIndex())
+    {
+        return true;
+    }
+
+    EHblkDsc* hndDesc = ehGetBlockHndDsc(block);
+
+    return ((hndDesc != nullptr) && hndDesc->InFilterRegionBBRange(block) &&
+            (hndDesc->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX));
+}
+
+//------------------------------------------------------------------------
+// ehGetMostNestedRegionIndex: Return the region index of the most nested EH region this block is in.
+// The return value is in the range [0..compHndBBtabCount]. It is same scale as bbTryIndex/bbHndIndex:
+// 0 means main method, N is used as an index to compHndBBtab[N - 1]. If we don't return 0, then
+// *inTryRegion indicates whether the most nested region for the block is a 'try' clause or
+// filter/handler clause. For 0 return, *inTryRegion is set to true.
+//
+// Arguments:
+//    block - the BasicBlock we want the region index for.
+//    inTryRegion - an out parameter. As described above.
+//
+// Return Value:
+//    As described above.
+//
+unsigned Compiler::ehGetMostNestedRegionIndex(BasicBlock* block, bool* inTryRegion)
+{
+    assert(block != nullptr);
+    assert(inTryRegion != nullptr);
+
+    unsigned mostNestedRegion;
+    if (block->bbHndIndex == 0)
+    {
+        mostNestedRegion = block->bbTryIndex;
+        *inTryRegion     = true;
+    }
+    else if (block->bbTryIndex == 0)
+    {
+        mostNestedRegion = block->bbHndIndex;
+        *inTryRegion     = false;
+    }
+    else
+    {
+        if (block->bbTryIndex < block->bbHndIndex)
+        {
+            mostNestedRegion = block->bbTryIndex;
+            *inTryRegion     = true;
+        }
+        else
+        {
+            assert(block->bbTryIndex != block->bbHndIndex); // A block can't be both in the 'try' and 'handler' region
+                                                            // of the same EH region
+            mostNestedRegion = block->bbHndIndex;
+            *inTryRegion     = false;
+        }
+    }
+
+    assert(mostNestedRegion <= compHndBBtabCount);
+    return mostNestedRegion;
+}
+
+/*****************************************************************************
+ * Returns the try index of the enclosing try, skipping all EH regions with the
+ * same try region (that is, all 'mutual protect' regions). If there is no such
+ * enclosing try, returns EHblkDsc::NO_ENCLOSING_INDEX.
+ */
+unsigned Compiler::ehTrueEnclosingTryIndexIL(unsigned regionIndex)
+{
+    assert(regionIndex != EHblkDsc::NO_ENCLOSING_INDEX);
+
+    EHblkDsc* ehDscRoot = ehGetDsc(regionIndex);
+    EHblkDsc* HBtab     = ehDscRoot;
+
+    for (;;)
+    {
+        regionIndex = HBtab->ebdEnclosingTryIndex;
+        if (regionIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+        {
+            // No enclosing 'try'; we're done
+            break;
+        }
+
+        HBtab = ehGetDsc(regionIndex);
+        if (!EHblkDsc::ebdIsSameILTry(ehDscRoot, HBtab))
+        {
+            // Found an enclosing 'try' that has a different 'try' region (is not mutually-protect with the
+            // original region). Return it.
+            break;
+        }
+    }
+
+    return regionIndex;
+}
+
+unsigned Compiler::ehGetEnclosingRegionIndex(unsigned regionIndex, bool* inTryRegion)
+{
+    assert(regionIndex != EHblkDsc::NO_ENCLOSING_INDEX);
+
+    EHblkDsc* ehDsc = ehGetDsc(regionIndex);
+    return ehDsc->ebdGetEnclosingRegionIndex(inTryRegion);
+}
+
+/*****************************************************************************
+ * The argument 'block' has been deleted. Update the EH table so 'block' is no longer listed
+ * as a 'last' block. You can't delete a 'begin' block this way.
+ */
+void Compiler::ehUpdateForDeletedBlock(BasicBlock* block)
+{
+    assert(block->bbFlags & BBF_REMOVED);
+
+    if (!block->hasTryIndex() && !block->hasHndIndex())
+    {
+        // The block is not part of any EH region, so there is nothing to do.
+        return;
+    }
+
+    BasicBlock* bPrev = block->bbPrev;
+    assert(bPrev != nullptr);
+
+    ehUpdateLastBlocks(block, bPrev);
+}
+
+/*****************************************************************************
+ * Determine if an empty block can be deleted, and still preserve the EH normalization
+ * rules on blocks.
+ *
+ * We only consider the case where the block to be deleted is the last block of a region,
+ * and the region is being contracted such that the previous block will become the new
+ * 'last' block. If this previous block is already a 'last' block, then we can't do the
+ * delete, as that would cause a single block to be the 'last' block of multiple regions.
+ */
+bool Compiler::ehCanDeleteEmptyBlock(BasicBlock* block)
+{
+    assert(block->isEmpty());
+
+    return true;
+
+#if 0  // This is disabled while the "multiple last block" normalization is disabled
+    if (!fgNormalizeEHDone)
+    {
+        return true;
+    }
+
+    if (ehIsBlockEHLast(block))
+    {
+        BasicBlock* bPrev = block->bbPrev;
+        if ((bPrev != nullptr) && ehIsBlockEHLast(bPrev))
+        {
+            return false;
+        }
+    }
+
+    return true;
+#endif // 0
+}
+
+/*****************************************************************************
+ * The 'last' block of one or more EH regions might have changed. Update the EH table.
+ * This can happen if the EH region shrinks, where one or more blocks have been removed
+ * from the region. It can happen if the EH region grows, where one or more blocks
+ * have been added at the end of the region.
+ *
+ * We might like to verify the handler table integrity after doing this update, but we
+ * can't because this might just be one step by the caller in a transformation back to
+ * a legal state.
+ *
+ * Arguments:
+ *      oldLast -- Search for this block as the 'last' block of one or more EH regions.
+ *      newLast -- If 'oldLast' is found to be the 'last' block of an EH region, replace it by 'newLast'.
+ */
+void Compiler::ehUpdateLastBlocks(BasicBlock* oldLast, BasicBlock* newLast)
+{
+    EHblkDsc* HBtab;
+    EHblkDsc* HBtabEnd;
+
+    for (HBtab = compHndBBtab, HBtabEnd = compHndBBtab + compHndBBtabCount; HBtab < HBtabEnd; HBtab++)
+    {
+        if (HBtab->ebdTryLast == oldLast)
+        {
+            fgSetTryEnd(HBtab, newLast);
+        }
+        if (HBtab->ebdHndLast == oldLast)
+        {
+            fgSetHndEnd(HBtab, newLast);
+        }
+    }
+}
+
+unsigned Compiler::ehGetCallFinallyRegionIndex(unsigned finallyIndex, bool* inTryRegion)
+{
+    assert(finallyIndex != EHblkDsc::NO_ENCLOSING_INDEX);
+    assert(ehGetDsc(finallyIndex)->HasFinallyHandler());
+
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+    return ehGetDsc(finallyIndex)->ebdGetEnclosingRegionIndex(inTryRegion);
+#else
+    *inTryRegion = true;
+    return finallyIndex;
+#endif
+}
+
+void Compiler::ehGetCallFinallyBlockRange(unsigned finallyIndex, BasicBlock** begBlk, BasicBlock** endBlk)
+{
+    assert(finallyIndex != EHblkDsc::NO_ENCLOSING_INDEX);
+    assert(ehGetDsc(finallyIndex)->HasFinallyHandler());
+    assert(begBlk != nullptr);
+    assert(endBlk != nullptr);
+
+    EHblkDsc* ehDsc = ehGetDsc(finallyIndex);
+
+#if FEATURE_EH_CALLFINALLY_THUNKS
+    bool     inTryRegion;
+    unsigned callFinallyRegionIndex = ehGetCallFinallyRegionIndex(finallyIndex, &inTryRegion);
+
+    if (callFinallyRegionIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+    {
+        *begBlk = fgFirstBB;
+        *endBlk = fgEndBBAfterMainFunction();
+    }
+    else
+    {
+        EHblkDsc* ehDsc = ehGetDsc(callFinallyRegionIndex);
+
+        if (inTryRegion)
+        {
+            *begBlk = ehDsc->ebdTryBeg;
+            *endBlk = ehDsc->ebdTryLast->bbNext;
+        }
+        else
+        {
+            *begBlk = ehDsc->ebdHndBeg;
+            *endBlk = ehDsc->ebdHndLast->bbNext;
+        }
+    }
+#else  // !FEATURE_EH_CALLFINALLY_THUNKS
+    *begBlk = ehDsc->ebdTryBeg;
+    *endBlk = ehDsc->ebdTryLast->bbNext;
+#endif // !FEATURE_EH_CALLFINALLY_THUNKS
+}
+
+#ifdef DEBUG
+
+bool Compiler::ehCallFinallyInCorrectRegion(BasicBlock* blockCallFinally, unsigned finallyIndex)
+{
+    assert(blockCallFinally->bbJumpKind == BBJ_CALLFINALLY);
+    assert(finallyIndex != EHblkDsc::NO_ENCLOSING_INDEX);
+    assert(finallyIndex < compHndBBtabCount);
+    assert(ehGetDsc(finallyIndex)->HasFinallyHandler());
+
+    bool     inTryRegion;
+    unsigned callFinallyIndex = ehGetCallFinallyRegionIndex(finallyIndex, &inTryRegion);
+    if (callFinallyIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+    {
+        if (blockCallFinally->hasTryIndex() || blockCallFinally->hasHndIndex())
+        {
+            // The BBJ_CALLFINALLY is supposed to be in the main function body, not in any EH region.
+            return false;
+        }
+        else
+        {
+            return true;
+        }
+    }
+    else
+    {
+        if (inTryRegion)
+        {
+            if (bbInTryRegions(callFinallyIndex, blockCallFinally))
+            {
+                return true;
+            }
+        }
+        else
+        {
+            if (bbInHandlerRegions(callFinallyIndex, blockCallFinally))
+            {
+                return true;
+            }
+        }
+    }
+
+    return false;
+}
+
+#endif // DEBUG
+
+#if FEATURE_EH_FUNCLETS
+
+/*****************************************************************************
+ *
+ *  Are there (or will there be) any funclets in the function?
+ */
+
+bool Compiler::ehAnyFunclets()
+{
+    return compHndBBtabCount > 0; // if there is any EH, there will be funclets
+}
+
+/*****************************************************************************
+ *
+ *  Count the number of EH funclets in the function. This will return the number
+ *  there will be after funclets have been created, but because it runs over the
+ *  EH table, it is accurate at any time.
+ */
+
+unsigned Compiler::ehFuncletCount()
+{
+    unsigned  funcletCnt = 0;
+    EHblkDsc* HBtab;
+    EHblkDsc* HBtabEnd;
+
+    for (HBtab = compHndBBtab, HBtabEnd = compHndBBtab + compHndBBtabCount; HBtab < HBtabEnd; HBtab++)
+    {
+        if (HBtab->HasFilter())
+        {
+            ++funcletCnt;
+        }
+        ++funcletCnt;
+    }
+    return funcletCnt;
+}
+
+/*****************************************************************************
+ *
+ *  Get the index to use as the cache key for sharing throw blocks.
+ *  For non-funclet platforms, this is just the block's bbTryIndex, to ensure
+ *  that throw is protected by the correct set of trys.  However, when we have
+ *  funclets we also have to ensure that the throw blocks are *not* shared
+ *  across funclets, so we use EHblkDsc index of either the funclet or
+ *  the containing try region, whichever is inner-most.  We differentiate
+ *  between the 3 cases by setting the high bits (0 = try, 1 = handler,
+ *  2 = filter)
+ *
+ */
+unsigned Compiler::bbThrowIndex(BasicBlock* blk)
+{
+    if (!blk->hasTryIndex() && !blk->hasHndIndex())
+    {
+        return -1;
+    }
+
+    const unsigned tryIndex = blk->hasTryIndex() ? blk->getTryIndex() : USHRT_MAX;
+    const unsigned hndIndex = blk->hasHndIndex() ? blk->getHndIndex() : USHRT_MAX;
+    assert(tryIndex != hndIndex);
+    assert(tryIndex != USHRT_MAX || hndIndex != USHRT_MAX);
+
+    if (tryIndex < hndIndex)
+    {
+        // The most enclosing region is a try body, use it
+        assert(tryIndex <= 0x3FFFFFFF);
+        return tryIndex;
+    }
+
+    // The most enclosing region is a handler which will be a funclet
+    // Now we have to figure out if blk is in the filter or handler
+    assert(hndIndex <= 0x3FFFFFFF);
+    if (ehGetDsc(hndIndex)->InFilterRegionBBRange(blk))
+    {
+        return hndIndex | 0x40000000;
+    }
+
+    return hndIndex | 0x80000000;
+}
+
+#endif // FEATURE_EH_FUNCLETS
+
+/*****************************************************************************
+ * Determine the emitter code cookie for a block, for unwind purposes.
+ */
+
+void* Compiler::ehEmitCookie(BasicBlock* block)
+{
+    noway_assert(block);
+
+    void* cookie;
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+    if (block->bbFlags & BBF_FINALLY_TARGET)
+    {
+        // Use the offset of the beginning of the NOP padding, not the main block.
+        // This might include loop head padding, too, if this is a loop head.
+        assert(block->bbUnwindNopEmitCookie); // probably not null-initialized, though, so this might not tell us
+                                              // anything
+        cookie = block->bbUnwindNopEmitCookie;
+    }
+    else
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+    {
+        cookie = block->bbEmitCookie;
+    }
+
+    noway_assert(cookie != nullptr);
+    return cookie;
+}
+
+/*****************************************************************************
+ * Determine the emitter code offset for a block. If the block is a finally
+ * target, choose the offset of the NOP padding that precedes the block.
+ */
+
+UNATIVE_OFFSET Compiler::ehCodeOffset(BasicBlock* block)
+{
+    return genEmitter->emitCodeOffset(ehEmitCookie(block), 0);
+}
+
+/****************************************************************************/
+
+EHblkDsc* Compiler::ehInitHndRange(BasicBlock* blk, IL_OFFSET* hndBeg, IL_OFFSET* hndEnd, bool* inFilter)
+{
+    EHblkDsc* hndTab = ehGetBlockHndDsc(blk);
+    if (hndTab != nullptr)
+    {
+        if (hndTab->InFilterRegionILRange(blk))
+        {
+            *hndBeg   = hndTab->ebdFilterBegOffs();
+            *hndEnd   = hndTab->ebdFilterEndOffs();
+            *inFilter = true;
+        }
+        else
+        {
+            *hndBeg   = hndTab->ebdHndBegOffs();
+            *hndEnd   = hndTab->ebdHndEndOffs();
+            *inFilter = false;
+        }
+    }
+    else
+    {
+        *hndBeg   = 0;
+        *hndEnd   = info.compILCodeSize;
+        *inFilter = false;
+    }
+    return hndTab;
+}
+
+/****************************************************************************/
+
+EHblkDsc* Compiler::ehInitTryRange(BasicBlock* blk, IL_OFFSET* tryBeg, IL_OFFSET* tryEnd)
+{
+    EHblkDsc* tryTab = ehGetBlockTryDsc(blk);
+    if (tryTab != nullptr)
+    {
+        *tryBeg = tryTab->ebdTryBegOffs();
+        *tryEnd = tryTab->ebdTryEndOffs();
+    }
+    else
+    {
+        *tryBeg = 0;
+        *tryEnd = info.compILCodeSize;
+    }
+    return tryTab;
+}
+
+/****************************************************************************/
+
+EHblkDsc* Compiler::ehInitHndBlockRange(BasicBlock* blk, BasicBlock** hndBeg, BasicBlock** hndLast, bool* inFilter)
+{
+    EHblkDsc* hndTab = ehGetBlockHndDsc(blk);
+    if (hndTab != nullptr)
+    {
+        if (hndTab->InFilterRegionBBRange(blk))
+        {
+            *hndBeg = hndTab->ebdFilter;
+            if (hndLast != nullptr)
+            {
+                *hndLast = hndTab->BBFilterLast();
+            }
+            *inFilter = true;
+        }
+        else
+        {
+            *hndBeg = hndTab->ebdHndBeg;
+            if (hndLast != nullptr)
+            {
+                *hndLast = hndTab->ebdHndLast;
+            }
+            *inFilter = false;
+        }
+    }
+    else
+    {
+        *hndBeg = nullptr;
+        if (hndLast != nullptr)
+        {
+            *hndLast = nullptr;
+        }
+        *inFilter = false;
+    }
+    return hndTab;
+}
+
+/****************************************************************************/
+
+EHblkDsc* Compiler::ehInitTryBlockRange(BasicBlock* blk, BasicBlock** tryBeg, BasicBlock** tryLast)
+{
+    EHblkDsc* tryTab = ehGetBlockTryDsc(blk);
+    if (tryTab != nullptr)
+    {
+        *tryBeg = tryTab->ebdTryBeg;
+        if (tryLast != nullptr)
+        {
+            *tryLast = tryTab->ebdTryLast;
+        }
+    }
+    else
+    {
+        *tryBeg = nullptr;
+        if (tryLast != nullptr)
+        {
+            *tryLast = nullptr;
+        }
+    }
+    return tryTab;
+}
+
+/*****************************************************************************
+ *  This method updates the value of ebdTryLast.
+ */
+
+void Compiler::fgSetTryEnd(EHblkDsc* handlerTab, BasicBlock* newTryLast)
+{
+    assert(newTryLast != nullptr);
+
+    //
+    // Check if we are going to change the existing value of endTryLast
+    //
+    if (handlerTab->ebdTryLast != newTryLast)
+    {
+        // Update the EH table with the newTryLast block
+        handlerTab->ebdTryLast = newTryLast;
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("EH#%u: New last block of try: BB%02u\n", ehGetIndex(handlerTab), newTryLast->bbNum);
+        }
+#endif // DEBUG
+    }
+}
+
+/*****************************************************************************
+ *
+ *  This method updates the value of ebdHndLast.
+ */
+
+void Compiler::fgSetHndEnd(EHblkDsc* handlerTab, BasicBlock* newHndLast)
+{
+    assert(newHndLast != nullptr);
+
+    //
+    // Check if we are going to change the existing value of endHndLast
+    //
+    if (handlerTab->ebdHndLast != newHndLast)
+    {
+        // Update the EH table with the newHndLast block
+        handlerTab->ebdHndLast = newHndLast;
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("EH#%u: New last block of handler: BB%02u\n", ehGetIndex(handlerTab), newHndLast->bbNum);
+        }
+#endif // DEBUG
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Given a EH handler table entry update the ebdTryLast and ebdHndLast pointers
+ *  to skip basic blocks that have been removed. They are set to the first
+ *  non-removed block after ebdTryBeg and ebdHndBeg, respectively.
+ *
+ *  Note that removed blocks are not in the global list of blocks (no block in the
+ *  global list points to them). However, their pointers are still valid. We use
+ *  this fact when we walk lists of removed blocks until we find a non-removed
+ *  block, to be used for ending our iteration.
+ */
+
+void Compiler::fgSkipRmvdBlocks(EHblkDsc* handlerTab)
+{
+    BasicBlock* block;
+    BasicBlock* bEnd;
+    BasicBlock* bLast;
+
+    // Update ebdTryLast
+    bLast = nullptr;
+
+    // Find the first non-removed block after the 'try' region to end our iteration.
+    bEnd = handlerTab->ebdTryLast->bbNext;
+    while ((bEnd != nullptr) && (bEnd->bbFlags & BBF_REMOVED))
+    {
+        bEnd = bEnd->bbNext;
+    }
+
+    // Update bLast to account for any removed blocks
+    block = handlerTab->ebdTryBeg;
+    while (block != nullptr)
+    {
+        if ((block->bbFlags & BBF_REMOVED) == 0)
+        {
+            bLast = block;
+        }
+
+        block = block->bbNext;
+
+        if (block == bEnd)
+        {
+            break;
+        }
+    }
+
+    fgSetTryEnd(handlerTab, bLast);
+
+    // Update ebdHndLast
+    bLast = nullptr;
+
+    // Find the first non-removed block after the handler region to end our iteration.
+    bEnd = handlerTab->ebdHndLast->bbNext;
+    while ((bEnd != nullptr) && (bEnd->bbFlags & BBF_REMOVED))
+    {
+        bEnd = bEnd->bbNext;
+    }
+
+    // Update bLast to account for any removed blocks
+    block = handlerTab->ebdHndBeg;
+    while (block != nullptr)
+    {
+        if ((block->bbFlags & BBF_REMOVED) == 0)
+        {
+            bLast = block;
+        }
+
+        block = block->bbNext;
+        if (block == bEnd)
+        {
+            break;
+        }
+    }
+
+    fgSetHndEnd(handlerTab, bLast);
+}
+
+/*****************************************************************************
+ *
+ *  Allocate the EH table
+ */
+void Compiler::fgAllocEHTable()
+{
+#if FEATURE_EH_FUNCLETS
+
+    // We need to allocate space for EH clauses that will be used by funclets
+    // as well as one for each EH clause from the IL. Nested EH clauses pulled
+    // out as funclets create one EH clause for each enclosing region. Thus,
+    // the maximum number of clauses we will need might be very large. We allocate
+    // twice the number of EH clauses in the IL, which should be good in practice.
+    // In extreme cases, we might need to abandon this and reallocate. See
+    // fgAddEHTableEntry() for more details.
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+    compHndBBtabAllocCount = info.compXcptnsCount; // force the resizing code to hit more frequently in DEBUG
+#else                                              // DEBUG
+    compHndBBtabAllocCount = info.compXcptnsCount * 2;
+#endif                                             // DEBUG
+
+#else // FEATURE_EH_FUNCLETS
+
+    compHndBBtabAllocCount = info.compXcptnsCount;
+
+#endif // FEATURE_EH_FUNCLETS
+
+    compHndBBtab = new (this, CMK_BasicBlock) EHblkDsc[compHndBBtabAllocCount];
+
+    compHndBBtabCount = info.compXcptnsCount;
+}
+
+/*****************************************************************************
+ *
+ *  Remove a single exception table entry. Note that this changes the size of
+ *  the exception table. If calling this within a loop over the exception table
+ *  be careful to iterate again on the current entry (if XTnum) to not skip any.
+ */
+void Compiler::fgRemoveEHTableEntry(unsigned XTnum)
+{
+    assert(compHndBBtabCount > 0);
+    assert(XTnum < compHndBBtabCount);
+
+    EHblkDsc* HBtab;
+
+    /* Reduce the number of entries in the EH table by one */
+    compHndBBtabCount--;
+
+    if (compHndBBtabCount == 0)
+    {
+        // No more entries remaining.
+        INDEBUG(compHndBBtab = (EHblkDsc*)INVALID_POINTER_VALUE;)
+    }
+    else
+    {
+        /* If we recorded an enclosing index for xtab then see
+         * if it needs to be updated due to the removal of this entry
+         */
+
+        HBtab = compHndBBtab + XTnum;
+
+        EHblkDsc* xtabEnd;
+        EHblkDsc* xtab;
+        for (xtab = compHndBBtab, xtabEnd = compHndBBtab + compHndBBtabCount; xtab < xtabEnd; xtab++)
+        {
+            if ((xtab != HBtab) && (xtab->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX) &&
+                (xtab->ebdEnclosingTryIndex >= XTnum))
+            {
+                // Update the enclosing scope link
+                if (xtab->ebdEnclosingTryIndex == XTnum)
+                {
+                    xtab->ebdEnclosingTryIndex = HBtab->ebdEnclosingTryIndex;
+                }
+                if ((xtab->ebdEnclosingTryIndex > XTnum) &&
+                    (xtab->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX))
+                {
+                    xtab->ebdEnclosingTryIndex--;
+                }
+            }
+
+            if ((xtab != HBtab) && (xtab->ebdEnclosingHndIndex != EHblkDsc::NO_ENCLOSING_INDEX) &&
+                (xtab->ebdEnclosingHndIndex >= XTnum))
+            {
+                // Update the enclosing scope link
+                if (xtab->ebdEnclosingHndIndex == XTnum)
+                {
+                    xtab->ebdEnclosingHndIndex = HBtab->ebdEnclosingHndIndex;
+                }
+                if ((xtab->ebdEnclosingHndIndex > XTnum) &&
+                    (xtab->ebdEnclosingHndIndex != EHblkDsc::NO_ENCLOSING_INDEX))
+                {
+                    xtab->ebdEnclosingHndIndex--;
+                }
+            }
+        }
+
+        /* We need to update all of the blocks' bbTryIndex */
+
+        for (BasicBlock* blk = fgFirstBB; blk; blk = blk->bbNext)
+        {
+            if (blk->hasTryIndex())
+            {
+                if (blk->getTryIndex() == XTnum)
+                {
+                    noway_assert(blk->bbFlags & BBF_REMOVED);
+                    INDEBUG(blk->setTryIndex(MAX_XCPTN_INDEX);) // Note: this is still a legal index, just unlikely
+                }
+                else if (blk->getTryIndex() > XTnum)
+                {
+                    blk->setTryIndex(blk->getTryIndex() - 1);
+                }
+            }
+
+            if (blk->hasHndIndex())
+            {
+                if (blk->getHndIndex() == XTnum)
+                {
+                    noway_assert(blk->bbFlags & BBF_REMOVED);
+                    INDEBUG(blk->setHndIndex(MAX_XCPTN_INDEX);) // Note: this is still a legal index, just unlikely
+                }
+                else if (blk->getHndIndex() > XTnum)
+                {
+                    blk->setHndIndex(blk->getHndIndex() - 1);
+                }
+            }
+        }
+
+        /* Now remove the unused entry from the table */
+
+        if (XTnum < compHndBBtabCount)
+        {
+            /* We copy over the old entry */
+            memmove(HBtab, HBtab + 1, (compHndBBtabCount - XTnum) * sizeof(*HBtab));
+        }
+        else
+        {
+            /* Last entry. Don't need to do anything */
+            noway_assert(XTnum == compHndBBtabCount);
+        }
+    }
+}
+
+#if FEATURE_EH_FUNCLETS
+
+/*****************************************************************************
+ *
+ *  Add a single exception table entry at index 'XTnum', [0 <= XTnum <= compHndBBtabCount].
+ *  If 'XTnum' is compHndBBtabCount, then add the entry at the end.
+ *  Note that this changes the size of the exception table.
+ *  All the blocks referring to the various index values are updated.
+ *  The table entry itself is not filled in.
+ *  Returns a pointer to the new entry.
+ */
+EHblkDsc* Compiler::fgAddEHTableEntry(unsigned XTnum)
+{
+    if (XTnum != compHndBBtabCount)
+    {
+        // Update all enclosing links that will get invalidated by inserting an entry at 'XTnum'
+
+        EHblkDsc* xtabEnd;
+        EHblkDsc* xtab;
+        for (xtab = compHndBBtab, xtabEnd = compHndBBtab + compHndBBtabCount; xtab < xtabEnd; xtab++)
+        {
+            if ((xtab->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX) && (xtab->ebdEnclosingTryIndex >= XTnum))
+            {
+                // Update the enclosing scope link
+                xtab->ebdEnclosingTryIndex++;
+            }
+            if ((xtab->ebdEnclosingHndIndex != EHblkDsc::NO_ENCLOSING_INDEX) && (xtab->ebdEnclosingHndIndex >= XTnum))
+            {
+                // Update the enclosing scope link
+                xtab->ebdEnclosingHndIndex++;
+            }
+        }
+
+        // We need to update the BasicBlock bbTryIndex and bbHndIndex field for all blocks
+
+        for (BasicBlock* blk = fgFirstBB; blk; blk = blk->bbNext)
+        {
+            if (blk->hasTryIndex() && (blk->getTryIndex() >= XTnum))
+            {
+                blk->setTryIndex(blk->getTryIndex() + 1);
+            }
+
+            if (blk->hasHndIndex() && (blk->getHndIndex() >= XTnum))
+            {
+                blk->setHndIndex(blk->getHndIndex() + 1);
+            }
+        }
+    }
+
+    // Increase the number of entries in the EH table by one
+
+    if (compHndBBtabCount == compHndBBtabAllocCount)
+    {
+        // We need to reallocate the table
+
+        if (compHndBBtabAllocCount == MAX_XCPTN_INDEX)
+        { // We're already at the max size for indices to be unsigned short
+            IMPL_LIMITATION("too many exception clauses");
+        }
+
+        // Double the table size. For stress, we could use +1. Note that if the table isn't allocated
+        // yet, such as when we add an EH region for synchronized methods that don't already have one,
+        // we start at zero, so we need to make sure the new table has at least one entry.
+        unsigned newHndBBtabAllocCount = max(1, compHndBBtabAllocCount * 2);
+        noway_assert(compHndBBtabAllocCount < newHndBBtabAllocCount); // check for overflow
+
+        if (newHndBBtabAllocCount > MAX_XCPTN_INDEX)
+        {
+            newHndBBtabAllocCount = MAX_XCPTN_INDEX; // increase to the maximum size we allow
+        }
+
+        JITDUMP("*********** fgAddEHTableEntry: increasing EH table size from %d to %d\n", compHndBBtabAllocCount,
+                newHndBBtabAllocCount);
+
+        compHndBBtabAllocCount = newHndBBtabAllocCount;
+
+        EHblkDsc* newTable = new (this, CMK_BasicBlock) EHblkDsc[compHndBBtabAllocCount];
+
+        // Move over the stuff before the new entry
+
+        memcpy_s(newTable, compHndBBtabAllocCount * sizeof(*compHndBBtab), compHndBBtab, XTnum * sizeof(*compHndBBtab));
+
+        if (XTnum != compHndBBtabCount)
+        {
+            // Move over the stuff after the new entry
+            memcpy_s(newTable + XTnum + 1, (compHndBBtabAllocCount - XTnum - 1) * sizeof(*compHndBBtab),
+                     compHndBBtab + XTnum, (compHndBBtabCount - XTnum) * sizeof(*compHndBBtab));
+        }
+
+        // Now set the new table as the table to use. The old one gets lost, but we can't
+        // free it because we don't have a freeing allocator.
+
+        compHndBBtab = newTable;
+    }
+    else if (XTnum != compHndBBtabCount)
+    {
+        // Leave the elements before the new element alone. Move the ones after it, to make space.
+
+        EHblkDsc* HBtab = compHndBBtab + XTnum;
+
+        memmove_s(HBtab + 1, (compHndBBtabAllocCount - XTnum - 1) * sizeof(*compHndBBtab), HBtab,
+                  (compHndBBtabCount - XTnum) * sizeof(*compHndBBtab));
+    }
+
+    // Now the entry is there, but not filled in
+
+    compHndBBtabCount++;
+    return compHndBBtab + XTnum;
+}
+
+#endif // FEATURE_EH_FUNCLETS
+
+#if !FEATURE_EH
+
+/*****************************************************************************
+ *  fgRemoveEH: To facilitiate the bring-up of new platforms without having to
+ *  worry about fully implementing EH, we want to simply remove EH constructs
+ *  from the IR. This works because a large percentage of our tests contain
+ *  EH constructs but don't actually throw exceptions. This function removes
+ *  'catch', 'filter', 'filter-handler', and 'fault' clauses completely.
+ *  It requires that the importer has created the EH table, and that normal
+ *  EH well-formedness tests have been done, and 'leave' opcodes have been
+ *  imported.
+ *
+ *  It currently does not handle 'finally' clauses, so tests that include
+ *  'finally' will NYI(). To handle 'finally', we would need to inline the
+ *  'finally' clause IL at each exit from a finally-protected 'try', or
+ *  else call the 'finally' clause, like normal.
+ *
+ *  Walk the EH table from beginning to end. If a table entry is nested within
+ *  a handler, we skip it, as we'll delete its code when we get to the enclosing
+ *  handler. If a clause is enclosed within a 'try', or has no nesting, then we delete
+ *  it (and its range of code blocks). We don't need to worry about cleaning up
+ *  the EH table entries as we remove the individual handlers (such as calling
+ *  fgRemoveEHTableEntry()), as we'll null out the entire table at the end.
+ *
+ *  This function assumes FEATURE_EH_FUNCLETS is defined.
+ */
+void Compiler::fgRemoveEH()
+{
+#ifdef DEBUG
+    if (verbose)
+        printf("\n*************** In fgRemoveEH()\n");
+#endif // DEBUG
+
+    if (compHndBBtabCount == 0)
+    {
+        JITDUMP("No EH to remove\n\n");
+        return;
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\n*************** Before fgRemoveEH()\n");
+        fgDispBasicBlocks();
+        fgDispHandlerTab();
+        printf("\n");
+    }
+#endif // DEBUG
+
+    // Make sure we're early in compilation, so we don't need to update lots of data structures.
+    assert(!fgComputePredsDone);
+    assert(!fgDomsComputed);
+    assert(!fgFuncletsCreated);
+    assert(fgFirstFuncletBB == nullptr); // this should follow from "!fgFuncletsCreated"
+    assert(!optLoopsMarked);
+
+    unsigned  XTnum;
+    EHblkDsc* HBtab;
+
+    for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+    {
+        if (HBtab->ebdEnclosingHndIndex != EHblkDsc::NO_ENCLOSING_INDEX)
+        {
+            // This entry is nested within some other handler. So, don't delete the
+            // EH entry here; let the enclosing handler delete it. Note that for this
+            // EH entry, both the 'try' and handler portions are fully nested within
+            // the enclosing handler region, due to proper nesting rules.
+            continue;
+        }
+
+        if (HBtab->HasCatchHandler() || HBtab->HasFilter() || HBtab->HasFaultHandler())
+        {
+            // Remove all the blocks associated with the handler. Note that there is no
+            // fall-through into the handler, or fall-through out of the handler, so
+            // just deleting the blocks is sufficient. Note, however, that for every
+            // BBJ_EHCATCHRET we delete, we need to fix up the reference count of the
+            // block it points to (by subtracting one from its reference count).
+            // Note that the blocks for a filter immediately preceed the blocks for its associated filter-handler.
+
+            BasicBlock* blkBeg  = HBtab->HasFilter() ? HBtab->ebdFilter : HBtab->ebdHndBeg;
+            BasicBlock* blkLast = HBtab->ebdHndLast;
+
+            // Splice out the range of blocks from blkBeg to blkLast (inclusive).
+            fgUnlinkRange(blkBeg, blkLast);
+
+            BasicBlock* blk;
+
+            // Walk the unlinked blocks and marked them as having been removed.
+            for (blk = blkBeg; blk != blkLast->bbNext; blk = blk->bbNext)
+            {
+                blk->bbFlags |= BBF_REMOVED;
+
+                if (blk->bbJumpKind == BBJ_EHCATCHRET)
+                {
+                    assert(blk->bbJumpDest->bbRefs > 0);
+                    blk->bbJumpDest->bbRefs -= 1;
+                }
+            }
+
+            // Walk the blocks of the 'try' and clear data that makes them appear to be within a 'try'.
+            for (blk = HBtab->ebdTryBeg; blk != HBtab->ebdTryLast->bbNext; blk = blk->bbNext)
+            {
+                blk->clearTryIndex();
+                blk->bbFlags &= ~BBF_TRY_BEG;
+            }
+
+            // If we are deleting a range of blocks whose last block is
+            // the 'last' block of an enclosing try/hnd region, we need to
+            // fix up the EH table. We only care about less nested
+            // EH table entries, since we've already deleted everything up to XTnum.
+
+            unsigned  XTnum2;
+            EHblkDsc* HBtab2;
+            for (XTnum2 = XTnum + 1, HBtab2 = compHndBBtab + XTnum2; XTnum2 < compHndBBtabCount; XTnum2++, HBtab2++)
+            {
+                // Handle case where deleted range is at the end of a 'try'.
+                if (HBtab2->ebdTryLast == blkLast)
+                {
+                    fgSetTryEnd(HBtab2, blkBeg->bbPrev);
+                }
+                // Handle case where deleted range is at the end of a handler.
+                // (This shouldn't happen, though, because we don't delete handlers
+                // nested within other handlers; we wait until we get to the
+                // enclosing handler.)
+                if (HBtab2->ebdHndLast == blkLast)
+                {
+                    unreached();
+                }
+            }
+        }
+        else
+        {
+            // It must be a 'finally'. We still need to call the finally. Note that the
+            // 'finally' can be "called" from multiple locations (e.g., the 'try' block
+            // can have multiple 'leave' instructions, each leaving to different targets,
+            // and each going through the 'finally'). We could inline the 'finally' at each
+            // LEAVE site within a 'try'. If the 'try' exits at all (that is, no infinite loop),
+            // there will be at least one since there is no "fall through" at the end of
+            // the 'try'.
+
+            assert(HBtab->HasFinallyHandler());
+
+            NYI("remove finally blocks");
+        }
+    } /* end of the for loop over XTnum */
+
+#ifdef DEBUG
+    // Make sure none of the remaining blocks have any EH.
+
+    BasicBlock* blk;
+    foreach_block(this, blk)
+    {
+        assert(!blk->hasTryIndex());
+        assert(!blk->hasHndIndex());
+        assert((blk->bbFlags & BBF_TRY_BEG) == 0);
+        assert((blk->bbFlags & BBF_FUNCLET_BEG) == 0);
+        assert((blk->bbFlags & BBF_REMOVED) == 0);
+        assert(blk->bbCatchTyp == BBCT_NONE);
+    }
+#endif // DEBUG
+
+    // Delete the EH table
+
+    compHndBBtab      = nullptr;
+    compHndBBtabCount = 0;
+    // Leave compHndBBtabAllocCount alone.
+
+    // Renumber the basic blocks
+    JITDUMP("\nRenumbering the basic blocks for fgRemoveEH\n");
+    fgRenumberBlocks();
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\n*************** After fgRemoveEH()\n");
+        fgDispBasicBlocks();
+        fgDispHandlerTab();
+        printf("\n");
+    }
+#endif
+}
+
+#endif // !FEATURE_EH
+
+/*****************************************************************************
+ *
+ *  Sort the EH table if necessary.
+ */
+
+void Compiler::fgSortEHTable()
+{
+    if (!fgNeedToSortEHTable)
+    {
+        return;
+    }
+
+    // Now, all fields of the EH table are set except for those that are related
+    // to nesting. We need to first sort the table to ensure that an EH clause
+    // appears before any try or handler that it is nested within. The CLI spec
+    // requires this for nesting in 'try' clauses, but does not require this
+    // for handler clauses. However, parts of the JIT do assume this ordering.
+    //
+    // For example:
+    //
+    //      try { // A
+    //      } catch {
+    //          try { // B
+    //          } catch {
+    //          }
+    //      }
+    //
+    // In this case, the EH clauses for A and B have no required ordering: the
+    // clause for either A or B can come first, despite B being nested within
+    // the catch clause for A.
+    //
+    // The CLI spec, section 12.4.2.5 "Overview of exception handling", states:
+    // "The ordering of the exception clauses in the Exception Handler Table is
+    // important. If handlers are nested, the most deeply nested try blocks shall
+    // come before the try blocks that enclose them."
+    //
+    // Note, in particular, that it doesn't say "shall come before the *handler*
+    // blocks that enclose them".
+    //
+    // Also, the same section states, "When an exception occurs, the CLI searches
+    // the array for the first protected block that (1) Protects a region including the
+    // current instruction pointer and (2) Is a catch handler block and (3) Whose
+    // filter wishes to handle the exception."
+    //
+    // Once again, nothing about the ordering of the catch blocks.
+    //
+    // A more complicated example:
+    //
+    //      try { // A
+    //      } catch {
+    //          try { // B
+    //              try { // C
+    //              } catch {
+    //              }
+    //          } catch {
+    //          }
+    //      }
+    //
+    // The clause for C must come before the clause for B, but the clause for A can
+    // be anywhere. Thus, we could have these orderings: ACB, CAB, CBA.
+    //
+    // One more example:
+    //
+    //      try { // A
+    //      } catch {
+    //          try { // B
+    //          } catch {
+    //              try { // C
+    //              } catch {
+    //              }
+    //          }
+    //      }
+    //
+    // There is no ordering requirement: the EH clauses can come in any order.
+    //
+    // In Dev11 (Visual Studio 2012), x86 did not sort the EH table (it never had before)
+    // but ARM did. It turns out not sorting the table can cause the EH table to incorrectly
+    // set the bbHndIndex value in some nested cases, and that can lead to a security exploit
+    // that allows the execution of arbitrary code.
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("fgSortEHTable: Sorting EH table\n");
+    }
+#endif // DEBUG
+
+    EHblkDsc* xtab1;
+    EHblkDsc* xtab2;
+    unsigned  xtabnum1, xtabnum2;
+
+    for (xtabnum1 = 0, xtab1 = compHndBBtab; xtabnum1 < compHndBBtabCount; xtabnum1++, xtab1++)
+    {
+        for (xtabnum2 = xtabnum1 + 1, xtab2 = xtab1 + 1; xtabnum2 < compHndBBtabCount; xtabnum2++, xtab2++)
+        {
+            // If the nesting is wrong, swap them. The nesting is wrong if
+            // EH region 2 is nested in the try, handler, or filter of EH region 1.
+            // Note that due to proper nesting rules, if any of 2 is nested in
+            // the try or handler or filter of 1, then all of 2 is nested.
+            // We must be careful when comparing the offsets of the 'try' clause, because
+            // for "mutually-protect" try/catch, the 'try' bodies will be identical.
+            // For this reason, we use the handler region to check nesting. Note
+            // that we must check both beginning and end: a nested region can have a 'try'
+            // body that starts at the beginning of a handler. Thus, if we just compared the
+            // handler begin offset, we might get confused and think it is nested.
+
+            IL_OFFSET hndBegOff = xtab2->ebdHndBegOffset;
+            IL_OFFSET hndEndOff = xtab2->ebdHndEndOffset;
+            assert(hndEndOff > hndBegOff);
+
+            if ((hndBegOff >= xtab1->ebdTryBegOffset && hndEndOff <= xtab1->ebdTryEndOffset) ||
+                (hndBegOff >= xtab1->ebdHndBegOffset && hndEndOff <= xtab1->ebdHndEndOffset) ||
+                (xtab1->HasFilter() && (hndBegOff >= xtab1->ebdFilterBegOffset && hndEndOff <= xtab1->ebdHndBegOffset))
+                // Note that end of filter is beginning of handler
+                )
+            {
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("fgSortEHTable: Swapping out-of-order EH#%u and EH#%u\n", xtabnum1, xtabnum2);
+                }
+
+                // Assert that the 'try' region is also nested in the same place as the handler
+
+                IL_OFFSET tryBegOff = xtab2->ebdTryBegOffset;
+                IL_OFFSET tryEndOff = xtab2->ebdTryEndOffset;
+                assert(tryEndOff > tryBegOff);
+
+                if (hndBegOff >= xtab1->ebdTryBegOffset && hndEndOff <= xtab1->ebdTryEndOffset)
+                {
+                    assert(tryBegOff >= xtab1->ebdTryBegOffset && tryEndOff <= xtab1->ebdTryEndOffset);
+                }
+                if (hndBegOff >= xtab1->ebdHndBegOffset && hndEndOff <= xtab1->ebdHndEndOffset)
+                {
+                    assert(tryBegOff >= xtab1->ebdHndBegOffset && tryEndOff <= xtab1->ebdHndEndOffset);
+                }
+                if (xtab1->HasFilter() &&
+                    (hndBegOff >= xtab1->ebdFilterBegOffset && hndEndOff <= xtab1->ebdHndBegOffset))
+                {
+                    assert(tryBegOff >= xtab1->ebdFilterBegOffset && tryEndOff <= xtab1->ebdHndBegOffset);
+                }
+#endif // DEBUG
+
+                // Swap them!
+                EHblkDsc tmp = *xtab1;
+                *xtab1       = *xtab2;
+                *xtab2       = tmp;
+            }
+        }
+    }
+}
+
+// fgNormalizeEH: Enforce the following invariants:
+//
+//   1. No block is both the first block of a handler and the first block of a try. In IL (and on entry
+//      to this function), this can happen if the "try" is more nested than the handler.
+//
+//      For example, consider:
+//
+//               try1 ----------------- BB01
+//               |                      BB02
+//               |--------------------- BB03
+//               handler1
+//               |----- try2 ---------- BB04
+//               |      |               BB05
+//               |      handler2 ------ BB06
+//               |      |               BB07
+//               |      --------------- BB08
+//               |--------------------- BB09
+//
+//      Thus, the start of handler1 and the start of try2 are the same block. We will transform this to:
+//
+//               try1 ----------------- BB01
+//               |                      BB02
+//               |--------------------- BB03
+//               handler1 ------------- BB10 // empty block
+//               |      try2 ---------- BB04
+//               |      |               BB05
+//               |      handler2 ------ BB06
+//               |      |               BB07
+//               |      --------------- BB08
+//               |--------------------- BB09
+//
+//   2. No block is the first block of more than one try or handler region.
+//      (Note that filters cannot have EH constructs nested within them, so there can be no nested try or
+//      handler that shares the filter begin or last block. For try/filter/filter-handler constructs nested
+//      within a try or handler region, note that the filter block cannot be the first block of the try,
+//      nor can it be the first block of the handler, since you can't "fall into" a filter, which that situation
+//      would require.)
+//
+//      For example, we will transform this:
+//
+//               try3   try2   try1
+//               |---   |---   |---   BB01
+//               |      |      |      BB02
+//               |      |      |---   BB03
+//               |      |             BB04
+//               |      |------------ BB05
+//               |                    BB06
+//               |------------------- BB07
+//
+//      to this:
+//
+//               try3 -------------   BB08  // empty BBJ_NONE block
+//               |      try2 ------   BB09  // empty BBJ_NONE block
+//               |      |      try1
+//               |      |      |---   BB01
+//               |      |      |      BB02
+//               |      |      |---   BB03
+//               |      |             BB04
+//               |      |------------ BB05
+//               |                    BB06
+//               |------------------- BB07
+//
+//      The benefit of this is that adding a block to an EH region will not require examining every EH region,
+//      looking for possible shared "first" blocks to adjust. It also makes it easier to put code at the top
+//      of a particular EH region, especially for loop optimizations.
+//
+//      These empty blocks (BB08, BB09) will generate no code (unless some code is subsequently placed into them),
+//      and will have the same native code offset as BB01 after code is generated. There may be labels generated
+//      for them, if they are branch targets, so it is possible to have multiple labels targeting the same native
+//      code offset. The blocks will not be merged with the blocks they are split from, because they will have a
+//      different EH region, and we don't merge blocks from two different EH regions.
+//
+//      In the example, if there are branches to BB01, we need to distribute them to BB01, BB08, or BB09, appropriately.
+//      1. A branch from BB01/BB02/BB03 to BB01 will still go to BB01. Branching to BB09 or BB08 would not be legal,
+//         since it would branch out of a try region.
+//      2. A branch from BB04/BB05 to BB01 will instead branch to BB09. Branching to BB08 would not be legal. Note
+//         that branching to BB01 would still be legal, so we have a choice. It makes the most sense to branch to BB09,
+//         so the source and target of a branch are in the same EH region.
+//      3. Similarly, a branch from BB06/BB07 to BB01 will go to BB08, even though branching to BB09 would be legal.
+//      4. A branch from outside this loop (at the top-level) to BB01 will go to BB08. This is one case where the
+//         source and target of the branch are not in the same EH region.
+//
+//      The EH nesting rules for IL branches are described in the ECMA spec section 12.4.2.8.2.7 "Branches" and
+//      section 12.4.2.8.2.9 "Examples".
+//
+//      There is one exception to this normalization rule: we do not change "mutually protect" regions. These are cases
+//      where two EH table entries have exactly the same 'try' region, used to implement C# "try / catch / catch".
+//      The first handler appears by our nesting to be an "inner" handler, with ebdEnclosingTryIndex pointing to the
+//      second one. It is not true nesting, though, since they both protect the same "try". Both the these EH table
+//      entries must keep the same "try" region begin/last block pointers. A block in this "try" region has a try index
+//      of the first ("most nested") EH table entry.
+//
+//   3. No block is the last block of more than one try or handler region. Again, as described above,
+//      filters need not be considered.
+//
+//      For example, we will transform this:
+//
+//               try3 ----------------- BB01
+//               |      try2 ---------- BB02
+//               |      |      handler1 BB03
+//               |      |      |        BB04
+//               |----- |----- |------- BB05
+//
+//      (where all three try regions end at BB05) to this:
+//
+//               try3 ----------------- BB01
+//               |      try2 ---------- BB02
+//               |      |      handler1 BB03
+//               |      |      |        BB04
+//               |      |      |------- BB05
+//               |      |-------------- BB06 // empty BBJ_NONE block
+//               |--------------------- BB07 // empty BBJ_NONE block
+//
+//      No branches need to change: if something branched to BB05, it will still branch to BB05. If BB05 is a
+//      BBJ_NONE block, then control flow will fall through the newly added blocks as well. If it is anything
+//      else, it will retain that block branch type and BB06 and BB07 will be unreachable.
+//
+//      The benefit of this is, once again, to remove the need to consider every EH region when adding new blocks.
+//
+// Overall, a block can appear in the EH table exactly once: as the begin or last block of a single try, filter, or
+// handler. There is one exception: for a single-block EH region, the block can appear as both the "begin" and "last"
+// block of the try, or the "begin" and "last" block of the handler (note that filters don't have a "last" block stored,
+// so this case doesn't apply.)
+// (Note: we could remove this special case if we wanted, and if it helps anything, but it doesn't appear that it will
+// help.)
+//
+// These invariants simplify a number of things. When inserting a new block into a region, it is not necessary to
+// traverse the entire EH table looking to see if any EH region needs to be updated. You only ever need to update a
+// single region (except for mutually-protect "try" regions).
+//
+// Also, for example, when we're trying to determine the successors of a block B1 that leads into a try T1, if a block
+// B2 violates invariant #3 by being the first block of both the handler of T1, and an enclosed try T2, inserting a
+// block to enforce this invariant prevents us from having to consider the first block of T2's handler as a possible
+// successor of B1. This is somewhat akin to breaking of "critical edges" in a flowgraph.
+
+void Compiler::fgNormalizeEH()
+{
+    if (compHndBBtabCount == 0)
+    {
+        // No EH? Nothing to do.
+        INDEBUG(fgNormalizeEHDone = true;)
+        return;
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In fgNormalizeEH()\n");
+        fgDispBasicBlocks();
+        fgDispHandlerTab();
+    }
+#endif
+
+    bool modified = false;
+
+    // Case #1: Prevent the first block of a handler from also being the first block of a 'try'.
+    if (fgNormalizeEHCase1())
+    {
+        modified = true;
+    }
+
+    // Case #2: Prevent any two EH regions from starting with the same block (after case #3, we only need to worry about
+    // 'try' blocks).
+    if (fgNormalizeEHCase2())
+    {
+        modified = true;
+    }
+
+#if 0
+    // Case 3 normalization is disabled. The JIT really doesn't like having extra empty blocks around, especially
+    // blocks that are unreachable. There are lots of asserts when such things occur. We will re-evaluate whether we
+    // can do this normalization.
+    // Note: there are cases in fgVerifyHandlerTab() that are also disabled to match this.
+
+    // Case #3: Prevent any two EH regions from ending with the same block.
+    if (fgNormalizeEHCase3())
+    {
+        modified = true;
+    }
+
+#endif // 0
+
+    INDEBUG(fgNormalizeEHDone = true;)
+
+    if (modified)
+    {
+        // If we computed the cheap preds, don't let them leak out, in case other code doesn't maintain them properly.
+        if (fgCheapPredsValid)
+        {
+            fgRemovePreds();
+        }
+
+        JITDUMP("Added at least one basic block in fgNormalizeEH.\n");
+        fgRenumberBlocks();
+#ifdef DEBUG
+        // fgRenumberBlocks() will dump all the blocks and the handler table, so we don't need to do it here.
+        fgVerifyHandlerTab();
+#endif
+    }
+    else
+    {
+        JITDUMP("No EH normalization performed.\n");
+    }
+}
+
+bool Compiler::fgNormalizeEHCase1()
+{
+    bool modified = false;
+
+    //
+    // Case #1: Is the first block of a handler also the first block of any try?
+    //
+    // Do this as a separate loop from case #2 to simplify the logic for cases where we have both multiple identical
+    // 'try' begin blocks as well as this case, e.g.:
+    //     try {
+    //     } finally { try { try {
+    //         } catch {}
+    //         } catch {}
+    //     }
+    // where the finally/try/try are all the same block.
+    // We also do this before case #2, so when we get to case #2, we only need to worry about updating 'try' begin
+    // blocks (and only those within the 'try' region's parents), not handler begin blocks, when we are inserting new
+    // header blocks.
+    //
+
+    for (unsigned XTnum = 0; XTnum < compHndBBtabCount; XTnum++)
+    {
+        EHblkDsc* eh = ehGetDsc(XTnum);
+
+        BasicBlock* handlerStart              = eh->ebdHndBeg;
+        EHblkDsc*   handlerStartContainingTry = ehGetBlockTryDsc(handlerStart);
+        // If the handler start block is in a try, and is in fact the first block of that try...
+        if (handlerStartContainingTry != nullptr && handlerStartContainingTry->ebdTryBeg == handlerStart)
+        {
+            // ...then we want to insert an empty, non-removable block outside the try to be the new first block of the
+            // handler.
+            BasicBlock* newHndStart = bbNewBasicBlock(BBJ_NONE);
+            fgInsertBBbefore(eh->ebdHndBeg, newHndStart);
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("Handler begin for EH#%02u and 'try' begin for EH%02u are the same block; inserted new BB%02u "
+                       "before BB%02u as new handler begin for EH#%u.\n",
+                       XTnum, ehGetIndex(handlerStartContainingTry), newHndStart->bbNum, eh->ebdHndBeg->bbNum, XTnum);
+            }
+#endif // DEBUG
+
+            // The new block is the new handler begin.
+            eh->ebdHndBeg = newHndStart;
+
+            // Try index is the same as the enclosing try, if any, of eh:
+            if (eh->ebdEnclosingTryIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+            {
+                newHndStart->clearTryIndex();
+            }
+            else
+            {
+                newHndStart->setTryIndex(eh->ebdEnclosingTryIndex);
+            }
+            newHndStart->setHndIndex(XTnum);
+            newHndStart->bbCatchTyp    = handlerStart->bbCatchTyp;
+            handlerStart->bbCatchTyp   = BBCT_NONE; // Now handlerStart is no longer the start of a handler...
+            newHndStart->bbCodeOffs    = handlerStart->bbCodeOffs;
+            newHndStart->bbCodeOffsEnd = newHndStart->bbCodeOffs; // code size = 0. TODO: use BAD_IL_OFFSET instead?
+            newHndStart->inheritWeight(handlerStart);
+#if FEATURE_STACK_FP_X87
+            newHndStart->bbFPStateX87 = codeGen->FlatFPAllocFPState(handlerStart->bbFPStateX87);
+#endif // FEATURE_STACK_FP_X87
+            newHndStart->bbFlags |= (BBF_DONT_REMOVE | BBF_INTERNAL | BBF_HAS_LABEL);
+            modified = true;
+
+#ifdef DEBUG
+            if (0 && verbose) // Normally this is way too verbose, but it is useful for debugging
+            {
+                printf("*************** fgNormalizeEH() made a change\n");
+                fgDispBasicBlocks();
+                fgDispHandlerTab();
+            }
+#endif // DEBUG
+        }
+    }
+
+    return modified;
+}
+
+bool Compiler::fgNormalizeEHCase2()
+{
+    bool modified = false;
+
+    //
+    // Case #2: Make sure no two 'try' have the same begin block (except for mutually-protect regions).
+    // Note that this can only happen for nested 'try' regions, so we only need to look through the
+    // 'try' nesting hierarchy.
+    //
+
+    for (unsigned XTnum = 0; XTnum < compHndBBtabCount; XTnum++)
+    {
+        EHblkDsc* eh = ehGetDsc(XTnum);
+
+        if (eh->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX)
+        {
+            BasicBlock* tryStart        = eh->ebdTryBeg;
+            BasicBlock* insertBeforeBlk = tryStart; // If we need to insert new blocks, we insert before this block.
+
+            // We need to keep track of the last "mutually protect" region so we can properly not add additional header
+            // blocks to the second and subsequent mutually protect try blocks. We can't just keep track of the EH
+            // region pointer, because we're updating the 'try' begin blocks as we go. So, we need to keep track of the
+            // pre-update 'try' begin/last blocks themselves.
+            BasicBlock* mutualTryBeg       = eh->ebdTryBeg;
+            BasicBlock* mutualTryLast      = eh->ebdTryLast;
+            unsigned    mutualProtectIndex = XTnum;
+
+            EHblkDsc* ehOuter = eh;
+            do
+            {
+                unsigned ehOuterTryIndex  = ehOuter->ebdEnclosingTryIndex;
+                ehOuter                   = ehGetDsc(ehOuterTryIndex);
+                BasicBlock* outerTryStart = ehOuter->ebdTryBeg;
+                if (outerTryStart == tryStart)
+                {
+                    // We found two EH regions with the same 'try' begin! Should we do something about it?
+
+                    if (ehOuter->ebdIsSameTry(mutualTryBeg, mutualTryLast))
+                    {
+// clang-format off
+                        // Don't touch mutually-protect regions: their 'try' regions must remain identical!
+                        // We want to continue the looping outwards, in case we have something like this:
+                        //
+                        //               try3   try2   try1
+                        //               |---   |----  |----  BB01
+                        //               |      |      |      BB02
+                        //               |      |----  |----  BB03
+                        //               |                    BB04
+                        //               |------------------- BB05
+                        //
+                        // (Thus, try1 & try2 are mutually-protect 'try' regions from BB01 to BB03. They are nested inside try3,
+                        // which also starts at BB01. The 'catch' clauses have been elided.)
+                        // In this case, we'll decline to add a new header block for try2, but we will add a new one for try3, ending with:
+                        //
+                        //               try3   try2   try1
+                        //               |------------------- BB06
+                        //               |      |----  |----  BB01
+                        //               |      |      |      BB02
+                        //               |      |----  |----  BB03
+                        //               |                    BB04
+                        //               |------------------- BB05
+                        //
+                        // More complicated (yes, this is real):
+                        //
+                        // try {
+                        //     try {
+                        //         try {
+                        //             try {
+                        //                 try {
+                        //                     try {
+                        //                         try {
+                        //                             try {
+                        //                             }
+                        //                             catch {} // mutually-protect set #1
+                        //                             catch {}
+                        //                         } finally {}
+                        //                     }
+                        //                     catch {} // mutually-protect set #2
+                        //                     catch {}
+                        //                     catch {}
+                        //                 } finally {}
+                        //             } catch {}
+                        //         } finally {}
+                        //     } catch {}
+                        //  } finally {}
+                        //
+                        // In this case, all the 'try' start at the same block! Note that there are two sets of mutually-protect regions,
+                        // separated by some nesting.
+// clang-format on
+
+#ifdef DEBUG
+                        if (verbose)
+                        {
+                            printf("Mutually protect regions EH#%u and EH#%u; leaving identical 'try' begin blocks.\n",
+                                   mutualProtectIndex, ehGetIndex(ehOuter));
+                        }
+#endif // DEBUG
+
+                        // We still need to update the tryBeg, if something more nested already did that.
+                        ehOuter->ebdTryBeg = insertBeforeBlk;
+                    }
+                    else
+                    {
+                        // We're in a new set of mutual protect regions, so don't compare against the original.
+                        mutualTryBeg       = ehOuter->ebdTryBeg;
+                        mutualTryLast      = ehOuter->ebdTryLast;
+                        mutualProtectIndex = ehOuterTryIndex;
+
+                        // We're going to need the preds. We compute them here, before inserting the new block,
+                        // so our logic to add/remove preds below is the same for both the first time preds are
+                        // created and subsequent times.
+                        if (!fgCheapPredsValid)
+                        {
+                            fgComputeCheapPreds();
+                        }
+
+                        // We've got multiple 'try' blocks starting at the same place!
+                        // Add a new first 'try' block for 'ehOuter' that will be outside 'eh'.
+
+                        BasicBlock* newTryStart = bbNewBasicBlock(BBJ_NONE);
+                        fgInsertBBbefore(insertBeforeBlk, newTryStart);
+
+#ifdef DEBUG
+                        if (verbose)
+                        {
+                            printf("'try' begin for EH#%u and EH#%u are same block; inserted new BB%02u before BB%02u "
+                                   "as new 'try' begin for EH#%u.\n",
+                                   ehOuterTryIndex, XTnum, newTryStart->bbNum, insertBeforeBlk->bbNum, ehOuterTryIndex);
+                        }
+#endif // DEBUG
+
+                        // The new block is the new 'try' begin.
+                        ehOuter->ebdTryBeg = newTryStart;
+
+                        newTryStart->copyEHRegion(tryStart);       // Copy the EH region info
+                        newTryStart->setTryIndex(ehOuterTryIndex); // ... but overwrite the 'try' index
+                        newTryStart->bbCatchTyp = BBCT_NONE;
+                        newTryStart->bbCodeOffs = tryStart->bbCodeOffs;
+                        newTryStart->bbCodeOffsEnd =
+                            newTryStart->bbCodeOffs; // code size = 0. TODO: use BAD_IL_OFFSET instead?
+                        newTryStart->inheritWeight(tryStart);
+#if FEATURE_STACK_FP_X87
+                        newTryStart->bbFPStateX87 = codeGen->FlatFPAllocFPState(tryStart->bbFPStateX87);
+#endif // FEATURE_STACK_FP_X87
+
+                        // Note that we don't need to clear any flags on the old try start, since it is still a 'try'
+                        // start.
+                        newTryStart->bbFlags |= (BBF_TRY_BEG | BBF_DONT_REMOVE | BBF_INTERNAL | BBF_HAS_LABEL);
+
+                        // Now we need to split any flow edges targetting the old try begin block between the old
+                        // and new block. Note that if we are handling a multiply-nested 'try', we may have already
+                        // split the inner set. So we need to split again, from the most enclosing block that we've
+                        // already created, namely, insertBeforeBlk.
+                        //
+                        // For example:
+                        //
+                        //               try3   try2   try1
+                        //               |----  |----  |----  BB01
+                        //               |      |      |      BB02
+                        //               |      |      |----  BB03
+                        //               |      |-----------  BB04
+                        //               |------------------  BB05
+                        //
+                        // We'll loop twice, to create two header blocks, one for try2, and the second time for try3
+                        // (in that order).
+                        // After the first loop, we have:
+                        //
+                        //               try3   try2   try1
+                        //                      |----         BB06
+                        //               |----  |      |----  BB01
+                        //               |      |      |      BB02
+                        //               |      |      |----  BB03
+                        //               |      |-----------  BB04
+                        //               |------------------  BB05
+                        //
+                        // And all the external edges have been changed to point at try2. On the next loop, we'll create
+                        // a unique header block for try3, and split the edges between try2 and try3, leaving us with:
+                        //
+                        //               try3   try2   try1
+                        //               |----                BB07
+                        //               |      |----         BB06
+                        //               |      |      |----  BB01
+                        //               |      |      |      BB02
+                        //               |      |      |----  BB03
+                        //               |      |-----------  BB04
+                        //               |------------------  BB05
+
+                        BasicBlockList* nextPred; // we're going to update the pred list as we go, so we need to keep
+                                                  // track of the next pred in case it gets deleted.
+                        for (BasicBlockList* pred = insertBeforeBlk->bbCheapPreds; pred != nullptr; pred = nextPred)
+                        {
+                            nextPred = pred->next;
+
+                            // Who gets this predecessor?
+                            BasicBlock* predBlock = pred->block;
+
+                            if (!BasicBlock::sameTryRegion(insertBeforeBlk, predBlock))
+                            {
+                                // Move the edge to target newTryStart instead of insertBeforeBlk.
+                                fgAddCheapPred(newTryStart, predBlock);
+                                fgRemoveCheapPred(insertBeforeBlk, predBlock);
+
+                                // Now change the branch. If it was a BBJ_NONE fall-through to the top block, this will
+                                // do nothing. Since cheap preds contains dups (for switch duplicates), we will call
+                                // this once per dup.
+                                fgReplaceJumpTarget(predBlock, newTryStart, insertBeforeBlk);
+
+#ifdef DEBUG
+                                if (verbose)
+                                {
+                                    printf("Redirect BB%02u target from BB%02u to BB%02u.\n", predBlock->bbNum,
+                                           insertBeforeBlk->bbNum, newTryStart->bbNum);
+                                }
+#endif // DEBUG
+                            }
+                        }
+
+                        // The new block (a fall-through block) is a new predecessor.
+                        fgAddCheapPred(insertBeforeBlk, newTryStart);
+
+                        // We don't need to update the tryBeg block of other EH regions here because we are looping
+                        // outwards in enclosing try index order, and we'll get to them later.
+
+                        // Move the insert block backwards, to the one we just inserted.
+                        insertBeforeBlk = insertBeforeBlk->bbPrev;
+                        assert(insertBeforeBlk == newTryStart);
+
+                        modified = true;
+
+#ifdef DEBUG
+                        if (0 && verbose) // Normally this is way too verbose, but it is useful for debugging
+                        {
+                            printf("*************** fgNormalizeEH() made a change\n");
+                            fgDispBasicBlocks();
+                            fgDispHandlerTab();
+                        }
+#endif // DEBUG
+                    }
+                }
+                else
+                {
+                    // If the 'try' start block in the outer block isn't the same, then none of the more-enclosing
+                    // try regions (if any) can have the same 'try' start block, so we're done.
+                    // Note that we could have a situation like this:
+                    //
+                    //        try4   try3   try2   try1
+                    //        |---   |---   |      |      BB01
+                    //        |      |      |      |      BB02
+                    //        |      |      |----  |----  BB03
+                    //        |      |      |             BB04
+                    //        |      |      |------------ BB05
+                    //        |      |                    BB06
+                    //        |      |------------------- BB07
+                    //        |-------------------------- BB08
+                    //
+                    // (Thus, try1 & try2 start at BB03, and are nested inside try3 & try4, which both start at BB01.)
+                    // In this case, we'll process try1 and try2, then break out. Later, we'll get to try3 and process
+                    // it and try4.
+
+                    break;
+                }
+            } while (ehOuter->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX);
+        }
+    }
+
+    return modified;
+}
+
+bool Compiler::fgNormalizeEHCase3()
+{
+    bool modified = false;
+
+    //
+    // Case #3: Make sure no two 'try' or handler regions have the same 'last' block (except for mutually protect 'try'
+    // regions). As above, there has to be EH region nesting for this to occur. However, since we need to consider
+    // handlers, there are more cases.
+    //
+    // There are four cases to consider:
+    //      (1) try     nested in try
+    //      (2) handler nested in try
+    //      (3) try     nested in handler
+    //      (4) handler nested in handler
+    //
+    // Note that, before funclet generation, it would be unusual, though legal IL, for a 'try' to come at the end
+    // of an EH region (either 'try' or handler region), since that implies that its corresponding handler precedes it.
+    // That will never happen in C#, but is legal in IL.
+    //
+    // Only one of these cases can happen. For example, if we have case (2), where a try/catch is nested in a 'try' and
+    // the nested handler has the same 'last' block as the outer handler, then, due to nesting rules, the nested 'try'
+    // must also be within the outer handler, and obviously cannot share the same 'last' block.
+    //
+
+    for (unsigned XTnum = 0; XTnum < compHndBBtabCount; XTnum++)
+    {
+        EHblkDsc* eh = ehGetDsc(XTnum);
+
+        // Find the EH region 'eh' is most nested within, either 'try' or handler or none.
+        bool     outerIsTryRegion;
+        unsigned ehOuterIndex = eh->ebdGetEnclosingRegionIndex(&outerIsTryRegion);
+
+        if (ehOuterIndex != EHblkDsc::NO_ENCLOSING_INDEX)
+        {
+            EHblkDsc* ehInner      = eh;    // This gets updated as we loop outwards in the EH nesting
+            unsigned  ehInnerIndex = XTnum; // This gets updated as we loop outwards in the EH nesting
+            bool      innerIsTryRegion;
+
+            EHblkDsc* ehOuter = ehGetDsc(ehOuterIndex);
+
+            // Debugging: say what type of block we're updating.
+            INDEBUG(const char* outerType = ""; const char* innerType = "";)
+
+            // 'insertAfterBlk' is the place we will insert new "normalization" blocks. We don't know yet if we will
+            // insert them after the innermost 'try' or handler's "last" block, so we set it to nullptr. Once we
+            // determine the innermost region that is equivalent, we set this, and then update it incrementally as we
+            // loop outwards.
+            BasicBlock* insertAfterBlk = nullptr;
+
+            bool foundMatchingLastBlock = false;
+
+            // This is set to 'false' for mutual protect regions for which we will not insert a normalization block.
+            bool insertNormalizationBlock = true;
+
+            // Keep track of what the 'try' index and handler index should be for any new normalization block that we
+            // insert. If we have a sequence of alternating nested 'try' and handlers with the same 'last' block, we'll
+            // need to update these as we go. For example:
+            //      try { // EH#5
+            //          ...
+            //          catch { // EH#4
+            //              ...
+            //              try { // EH#3
+            //                  ...
+            //                  catch { // EH#2
+            //                      ...
+            //                      try { // EH#1
+            //                          BB01 // try=1, hnd=2
+            //      }   }   }   }   } // all the 'last' blocks are the same
+            //
+            // after normalization:
+            //
+            //      try { // EH#5
+            //          ...
+            //          catch { // EH#4
+            //              ...
+            //              try { // EH#3
+            //                  ...
+            //                  catch { // EH#2
+            //                      ...
+            //                      try { // EH#1
+            //                          BB01 // try=1, hnd=2
+            //                      }
+            //                      BB02 // try=3, hnd=2
+            //                  }
+            //                  BB03 // try=3, hnd=4
+            //              }
+            //              BB04 // try=5, hnd=4
+            //          }
+            //          BB05 // try=5, hnd=0 (no enclosing hnd)
+            //      }
+            //
+            unsigned nextTryIndex = EHblkDsc::NO_ENCLOSING_INDEX; // Initialization only needed to quell compiler
+                                                                  // warnings.
+            unsigned nextHndIndex = EHblkDsc::NO_ENCLOSING_INDEX;
+
+            // We compare the outer region against the inner region's 'try' or handler, determined by the
+            // 'outerIsTryRegion' variable. Once we decide that, we know exactly the 'last' pointer that we will use to
+            // compare against all enclosing EH regions.
+            //
+            // For example, if we have these nested EH regions (omitting some corresponding try/catch clauses for each
+            // nesting level):
+            //
+            //      try {
+            //          ...
+            //          catch {
+            //              ...
+            //              try {
+            //      }   }   } // all the 'last' blocks are the same
+            //
+            // then we determine that the innermost region we are going to compare against is the 'try' region. There's
+            // no reason to compare against its handler region for any enclosing region (since it couldn't possibly
+            // share a 'last' block with the enclosing region). However, there's no harm, either (and it simplifies
+            // the code for the first set of comparisons to be the same as subsequent, more enclosing cases).
+            BasicBlock* lastBlockPtrToCompare = nullptr;
+
+            // We need to keep track of the last "mutual protect" region so we can properly not add additional blocks
+            // to the second and subsequent mutual protect try blocks. We can't just keep track of the EH region
+            // pointer, because we're updating the last blocks as we go. So, we need to keep track of the
+            // pre-update 'try' begin/last blocks themselves. These only matter if the "last" blocks that match are
+            // from two (or more) nested 'try' regions.
+            BasicBlock* mutualTryBeg  = nullptr;
+            BasicBlock* mutualTryLast = nullptr;
+
+            if (outerIsTryRegion)
+            {
+                nextTryIndex = EHblkDsc::NO_ENCLOSING_INDEX; // unused, since the outer block is a 'try' region.
+
+                // The outer (enclosing) region is a 'try'
+                if (ehOuter->ebdTryLast == ehInner->ebdTryLast)
+                {
+                    // Case (1) try nested in try.
+                    foundMatchingLastBlock = true;
+                    INDEBUG(innerType = "try"; outerType = "try";)
+                    insertAfterBlk        = ehOuter->ebdTryLast;
+                    lastBlockPtrToCompare = insertAfterBlk;
+
+                    if (EHblkDsc::ebdIsSameTry(ehOuter, ehInner))
+                    {
+                        // We can't touch this 'try', since it's mutual protect.
+                        CLANG_FORMAT_COMMENT_ANCHOR;
+#ifdef DEBUG
+                        if (verbose)
+                        {
+                            printf("Mutual protect regions EH#%u and EH#%u; leaving identical 'try' last blocks.\n",
+                                   ehOuterIndex, ehInnerIndex);
+                        }
+#endif // DEBUG
+
+                        insertNormalizationBlock = false;
+                    }
+                    else
+                    {
+                        nextHndIndex = ehInner->ebdTryLast->hasHndIndex() ? ehInner->ebdTryLast->getHndIndex()
+                                                                          : EHblkDsc::NO_ENCLOSING_INDEX;
+                    }
+                }
+                else if (ehOuter->ebdTryLast == ehInner->ebdHndLast)
+                {
+                    // Case (2) handler nested in try.
+                    foundMatchingLastBlock = true;
+                    INDEBUG(innerType = "handler"; outerType = "try";)
+                    insertAfterBlk        = ehOuter->ebdTryLast;
+                    lastBlockPtrToCompare = insertAfterBlk;
+
+                    assert(ehInner->ebdHndLast->getHndIndex() == ehInnerIndex);
+                    nextHndIndex = ehInner->ebdEnclosingHndIndex;
+                }
+                else
+                {
+                    // No "last" pointers match!
+                }
+
+                if (foundMatchingLastBlock)
+                {
+                    // The outer might be part of a new set of mutual protect regions (if it isn't part of one already).
+                    mutualTryBeg  = ehOuter->ebdTryBeg;
+                    mutualTryLast = ehOuter->ebdTryLast;
+                }
+            }
+            else
+            {
+                nextHndIndex = EHblkDsc::NO_ENCLOSING_INDEX; // unused, since the outer block is a handler region.
+
+                // The outer (enclosing) region is a handler (note that it can't be a filter; there is no nesting
+                // within a filter).
+                if (ehOuter->ebdHndLast == ehInner->ebdTryLast)
+                {
+                    // Case (3) try nested in handler.
+                    foundMatchingLastBlock = true;
+                    INDEBUG(innerType = "try"; outerType = "handler";)
+                    insertAfterBlk        = ehOuter->ebdHndLast;
+                    lastBlockPtrToCompare = insertAfterBlk;
+
+                    assert(ehInner->ebdTryLast->getTryIndex() == ehInnerIndex);
+                    nextTryIndex = ehInner->ebdEnclosingTryIndex;
+                }
+                else if (ehOuter->ebdHndLast == ehInner->ebdHndLast)
+                {
+                    // Case (4) handler nested in handler.
+                    foundMatchingLastBlock = true;
+                    INDEBUG(innerType = "handler"; outerType = "handler";)
+                    insertAfterBlk        = ehOuter->ebdHndLast;
+                    lastBlockPtrToCompare = insertAfterBlk;
+
+                    nextTryIndex = ehInner->ebdTryLast->hasTryIndex() ? ehInner->ebdTryLast->getTryIndex()
+                                                                      : EHblkDsc::NO_ENCLOSING_INDEX;
+                }
+                else
+                {
+                    // No "last" pointers match!
+                }
+            }
+
+            while (foundMatchingLastBlock)
+            {
+                assert(lastBlockPtrToCompare != nullptr);
+                assert(insertAfterBlk != nullptr);
+                assert(ehOuterIndex != EHblkDsc::NO_ENCLOSING_INDEX);
+                assert(ehOuter != nullptr);
+
+                // Add a normalization block
+
+                if (insertNormalizationBlock)
+                {
+                    // Add a new last block for 'ehOuter' that will be outside the EH region with which it encloses and
+                    // shares a 'last' pointer
+
+                    BasicBlock* newLast = bbNewBasicBlock(BBJ_NONE);
+                    assert(insertAfterBlk != nullptr);
+                    fgInsertBBafter(insertAfterBlk, newLast);
+
+#ifdef DEBUG
+                    if (verbose)
+                    {
+                        printf("last %s block for EH#%u and last %s block for EH#%u are same block; inserted new "
+                               "BB%02u after BB%02u as new last %s block for EH#%u.\n",
+                               outerType, ehOuterIndex, innerType, ehInnerIndex, newLast->bbNum, insertAfterBlk->bbNum,
+                               outerType, ehOuterIndex);
+                    }
+#endif // DEBUG
+
+                    if (outerIsTryRegion)
+                    {
+                        ehOuter->ebdTryLast = newLast;
+                        newLast->setTryIndex(ehOuterIndex);
+                        if (nextHndIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+                        {
+                            newLast->clearHndIndex();
+                        }
+                        else
+                        {
+                            newLast->setHndIndex(nextHndIndex);
+                        }
+                    }
+                    else
+                    {
+                        ehOuter->ebdHndLast = newLast;
+                        if (nextTryIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+                        {
+                            newLast->clearTryIndex();
+                        }
+                        else
+                        {
+                            newLast->setTryIndex(nextTryIndex);
+                        }
+                        newLast->setHndIndex(ehOuterIndex);
+                    }
+
+                    newLast->bbCatchTyp =
+                        BBCT_NONE; // bbCatchTyp is only set on the first block of a handler, which is this not
+                    newLast->bbCodeOffs    = insertAfterBlk->bbCodeOffsEnd;
+                    newLast->bbCodeOffsEnd = newLast->bbCodeOffs; // code size = 0. TODO: use BAD_IL_OFFSET instead?
+                    newLast->inheritWeight(insertAfterBlk);
+#if FEATURE_STACK_FP_X87
+                    newLast->bbFPStateX87 = codeGen->FlatFPAllocFPState(insertAfterBlk->bbFPStateX87);
+#endif // FEATURE_STACK_FP_X87
+
+                    newLast->bbFlags |= BBF_INTERNAL;
+
+                    // The new block (a fall-through block) is a new predecessor.
+                    if (fgCheapPredsValid)
+                    {
+                        fgAddCheapPred(newLast, insertAfterBlk);
+                    }
+
+                    // Move the insert pointer. More enclosing equivalent 'last' blocks will be inserted after this.
+                    insertAfterBlk = newLast;
+
+                    modified = true;
+
+#ifdef DEBUG
+                    if (verbose) // Normally this is way too verbose, but it is useful for debugging
+                    {
+                        printf("*************** fgNormalizeEH() made a change\n");
+                        fgDispBasicBlocks();
+                        fgDispHandlerTab();
+                    }
+#endif // DEBUG
+                }
+
+                // Now find the next outer enclosing EH region and see if it also shares the last block.
+                foundMatchingLastBlock = false; // assume nothing will match
+                ehInner                = ehOuter;
+                ehInnerIndex           = ehOuterIndex;
+                innerIsTryRegion       = outerIsTryRegion;
+
+                ehOuterIndex =
+                    ehOuter->ebdGetEnclosingRegionIndex(&outerIsTryRegion); // Loop outwards in the EH nesting.
+                if (ehOuterIndex != EHblkDsc::NO_ENCLOSING_INDEX)
+                {
+                    // There are more enclosing regions; check for equivalent 'last' pointers.
+
+                    INDEBUG(innerType = outerType; outerType = "";)
+
+                    ehOuter = ehGetDsc(ehOuterIndex);
+
+                    insertNormalizationBlock = true; // assume it's not mutual protect
+
+                    if (outerIsTryRegion)
+                    {
+                        nextTryIndex = EHblkDsc::NO_ENCLOSING_INDEX; // unused, since the outer block is a 'try' region.
+
+                        // The outer (enclosing) region is a 'try'
+                        if (ehOuter->ebdTryLast == lastBlockPtrToCompare)
+                        {
+                            // Case (1) and (2): try or handler nested in try.
+                            foundMatchingLastBlock = true;
+                            INDEBUG(outerType = "try";)
+
+                            if (innerIsTryRegion && ehOuter->ebdIsSameTry(mutualTryBeg, mutualTryLast))
+                            {
+                                // We can't touch this 'try', since it's mutual protect.
+                                CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+                                if (verbose)
+                                {
+                                    printf("Mutual protect regions EH#%u and EH#%u; leaving identical 'try' last "
+                                           "blocks.\n",
+                                           ehOuterIndex, ehInnerIndex);
+                                }
+#endif // DEBUG
+
+                                insertNormalizationBlock = false;
+
+                                // We still need to update the 'last' pointer, in case someone inserted a normalization
+                                // block before the start of the mutual protect 'try' region.
+                                ehOuter->ebdTryLast = insertAfterBlk;
+                            }
+                            else
+                            {
+                                if (innerIsTryRegion)
+                                {
+                                    // Case (1) try nested in try.
+                                    nextHndIndex = ehInner->ebdTryLast->hasHndIndex()
+                                                       ? ehInner->ebdTryLast->getHndIndex()
+                                                       : EHblkDsc::NO_ENCLOSING_INDEX;
+                                }
+                                else
+                                {
+                                    // Case (2) handler nested in try.
+                                    assert(ehInner->ebdHndLast->getHndIndex() == ehInnerIndex);
+                                    nextHndIndex = ehInner->ebdEnclosingHndIndex;
+                                }
+                            }
+
+                            // The outer might be part of a new set of mutual protect regions (if it isn't part of one
+                            // already).
+                            mutualTryBeg  = ehOuter->ebdTryBeg;
+                            mutualTryLast = ehOuter->ebdTryLast;
+                        }
+                    }
+                    else
+                    {
+                        nextHndIndex =
+                            EHblkDsc::NO_ENCLOSING_INDEX; // unused, since the outer block is a handler region.
+
+                        // The outer (enclosing) region is a handler (note that it can't be a filter; there is no
+                        // nesting within a filter).
+                        if (ehOuter->ebdHndLast == lastBlockPtrToCompare)
+                        {
+                            // Case (3) and (4): try nested in try or handler.
+                            foundMatchingLastBlock = true;
+                            INDEBUG(outerType = "handler";)
+
+                            if (innerIsTryRegion)
+                            {
+                                // Case (3) try nested in handler.
+                                assert(ehInner->ebdTryLast->getTryIndex() == ehInnerIndex);
+                                nextTryIndex = ehInner->ebdEnclosingTryIndex;
+                            }
+                            else
+                            {
+                                // Case (4) handler nested in handler.
+                                nextTryIndex = ehInner->ebdTryLast->hasTryIndex() ? ehInner->ebdTryLast->getTryIndex()
+                                                                                  : EHblkDsc::NO_ENCLOSING_INDEX;
+                            }
+                        }
+                    }
+                }
+
+                // If we get to here and foundMatchingLastBlock is false, then the inner and outer region don't share
+                // any 'last' blocks, so we're done. Note that we could have a situation like this:
+                //
+                //        try4   try3   try2   try1
+                //        |----  |      |      |      BB01
+                //        |      |----  |      |      BB02
+                //        |      |      |----  |      BB03
+                //        |      |      |      |----- BB04
+                //        |      |      |----- |----- BB05
+                //        |----  |------------------- BB06
+                //
+                // (Thus, try1 & try2 end at BB05, and are nested inside try3 & try4, which both end at BB06.)
+                // In this case, we'll process try1 and try2, then break out. Later, as we iterate through the EH table,
+                // we'll get to try3 and process it and try4.
+
+            } // end while (foundMatchingLastBlock)
+        }     // if (ehOuterIndex != EHblkDsc::NO_ENCLOSING_INDEX)
+    }         // EH table iteration
+
+    return modified;
+}
+
+/*****************************************************************************/
+#ifdef DEBUG
+
+void Compiler::dispIncomingEHClause(unsigned num, const CORINFO_EH_CLAUSE& clause)
+{
+    printf("EH clause #%u:\n", num);
+    printf("  Flags:         0x%x", clause.Flags);
+
+    // Note: the flags field is kind of weird. It should be compared for equality
+    // to determine the type of clause, even though it looks like a bitfield. In
+    // Particular, CORINFO_EH_CLAUSE_NONE is zero, so you can't use "&" to check it.
+    const DWORD CORINFO_EH_CLAUSE_TYPE_MASK = 0x7;
+    switch (clause.Flags & CORINFO_EH_CLAUSE_TYPE_MASK)
+    {
+        case CORINFO_EH_CLAUSE_NONE:
+            printf(" (catch)");
+            break;
+        case CORINFO_EH_CLAUSE_FILTER:
+            printf(" (filter)");
+            break;
+        case CORINFO_EH_CLAUSE_FINALLY:
+            printf(" (finally)");
+            break;
+        case CORINFO_EH_CLAUSE_FAULT:
+            printf(" (fault)");
+            break;
+        default:
+            printf(" (UNKNOWN type %u!)", clause.Flags & CORINFO_EH_CLAUSE_TYPE_MASK);
+            break;
+    }
+    if (clause.Flags & ~CORINFO_EH_CLAUSE_TYPE_MASK)
+    {
+        printf(" (extra unknown bits: 0x%x)", clause.Flags & ~CORINFO_EH_CLAUSE_TYPE_MASK);
+    }
+    printf("\n");
+
+    printf("  TryOffset:     0x%x\n", clause.TryOffset);
+    printf("  TryLength:     0x%x\n", clause.TryLength);
+    printf("  HandlerOffset: 0x%x\n", clause.HandlerOffset);
+    printf("  HandlerLength: 0x%x\n", clause.HandlerLength);
+    if (clause.Flags & CORINFO_EH_CLAUSE_FILTER)
+    {
+        printf("  FilterOffset:  0x%x\n", clause.FilterOffset);
+    }
+    else
+    {
+        printf("  ClassToken:    0x%x\n", clause.ClassToken);
+    }
+}
+
+void Compiler::dispOutgoingEHClause(unsigned num, const CORINFO_EH_CLAUSE& clause)
+{
+    if (opts.dspDiffable)
+    {
+        /* (( brace matching editor workaround to compensate for the following line */
+        printf("EH#%u: try [%s..%s) handled by [%s..%s) ", num, genEmitter->emitOffsetToLabel(clause.TryOffset),
+               genEmitter->emitOffsetToLabel(clause.TryLength), genEmitter->emitOffsetToLabel(clause.HandlerOffset),
+               genEmitter->emitOffsetToLabel(clause.HandlerLength));
+    }
+    else
+    {
+        /* (( brace matching editor workaround to compensate for the following line */
+        printf("EH#%u: try [%04X..%04X) handled by [%04X..%04X) ", num, dspOffset(clause.TryOffset),
+               dspOffset(clause.TryLength), dspOffset(clause.HandlerOffset), dspOffset(clause.HandlerLength));
+    }
+
+    // Note: the flags field is kind of weird. It should be compared for equality
+    // to determine the type of clause, even though it looks like a bitfield. In
+    // Particular, CORINFO_EH_CLAUSE_NONE is zero, so you can "&" to check it.
+    // You do need to mask off the bits, though, because COR_ILEXCEPTION_CLAUSE_DUPLICATED
+    // is and'ed in.
+    const DWORD CORINFO_EH_CLAUSE_TYPE_MASK = 0x7;
+    switch (clause.Flags & CORINFO_EH_CLAUSE_TYPE_MASK)
+    {
+        case CORINFO_EH_CLAUSE_NONE:
+            printf("(class: %04X)", clause.ClassToken);
+            break;
+        case CORINFO_EH_CLAUSE_FILTER:
+            if (opts.dspDiffable)
+            {
+                /* ( brace matching editor workaround to compensate for the following line */
+                printf("filter at [%s..%s)", genEmitter->emitOffsetToLabel(clause.ClassToken),
+                       genEmitter->emitOffsetToLabel(clause.HandlerOffset));
+            }
+            else
+            {
+                /* ( brace matching editor workaround to compensate for the following line */
+                printf("filter at [%04X..%04X)", dspOffset(clause.ClassToken), dspOffset(clause.HandlerOffset));
+            }
+            break;
+        case CORINFO_EH_CLAUSE_FINALLY:
+            printf("(finally)");
+            break;
+        case CORINFO_EH_CLAUSE_FAULT:
+            printf("(fault)");
+            break;
+        default:
+            printf("(UNKNOWN type %u!)", clause.Flags & CORINFO_EH_CLAUSE_TYPE_MASK);
+            assert(!"unknown type");
+            break;
+    }
+
+    if ((clause.TryOffset == clause.TryLength) && (clause.TryOffset == clause.HandlerOffset) &&
+        ((clause.Flags & (COR_ILEXCEPTION_CLAUSE_DUPLICATED | COR_ILEXCEPTION_CLAUSE_FINALLY)) ==
+         (COR_ILEXCEPTION_CLAUSE_DUPLICATED | COR_ILEXCEPTION_CLAUSE_FINALLY)))
+    {
+        printf(" cloned finally");
+    }
+    else if (clause.Flags & COR_ILEXCEPTION_CLAUSE_DUPLICATED)
+    {
+        printf(" duplicated");
+    }
+    printf("\n");
+}
+
+/*****************************************************************************/
+
+void Compiler::fgVerifyHandlerTab()
+{
+    if (compIsForInlining())
+    {
+        // We don't inline functions with EH. Don't bother verifying the EH table in the inlinee Compiler.
+        return;
+    }
+
+    if (compHndBBtabCount == 0)
+    {
+        return;
+    }
+
+    // Did we do the normalization that prevents the first block of a handler from being a 'try' block (case 1)?
+    bool handlerBegIsTryBegNormalizationDone = fgNormalizeEHDone;
+
+    // Did we do the normalization that prevents multiple EH regions (namely, 'try' blocks) from starting on the same
+    // block (case 2)?
+    bool multipleBegBlockNormalizationDone = fgNormalizeEHDone;
+
+    // Did we do the normalization that prevents multiple EH regions ('try' or handler blocks) from ending on the same
+    // block (case 3)?
+    bool multipleLastBlockNormalizationDone = false; // Currently disabled
+
+    assert(compHndBBtabCount <= compHndBBtabAllocCount);
+
+    unsigned  XTnum;
+    EHblkDsc* HBtab;
+
+    for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+    {
+        assert(HBtab->ebdTryBeg != nullptr);
+        assert(HBtab->ebdTryLast != nullptr);
+        assert(HBtab->ebdHndBeg != nullptr);
+        assert(HBtab->ebdHndLast != nullptr);
+
+        assert(HBtab->ebdTryBeg->bbFlags & BBF_TRY_BEG);
+        assert(HBtab->ebdTryBeg->bbFlags & BBF_DONT_REMOVE);
+        assert(HBtab->ebdTryBeg->bbFlags & BBF_HAS_LABEL);
+
+        assert(HBtab->ebdHndBeg->bbFlags & BBF_DONT_REMOVE);
+        assert(HBtab->ebdHndBeg->bbFlags & BBF_HAS_LABEL);
+
+        assert((HBtab->ebdTryBeg->bbFlags & BBF_REMOVED) == 0);
+        assert((HBtab->ebdTryLast->bbFlags & BBF_REMOVED) == 0);
+        assert((HBtab->ebdHndBeg->bbFlags & BBF_REMOVED) == 0);
+        assert((HBtab->ebdHndLast->bbFlags & BBF_REMOVED) == 0);
+
+        if (HBtab->HasFilter())
+        {
+            assert(HBtab->ebdFilter != nullptr);
+            assert(HBtab->ebdFilter->bbFlags & BBF_DONT_REMOVE);
+            assert((HBtab->ebdFilter->bbFlags & BBF_REMOVED) == 0);
+        }
+
+#if FEATURE_EH_FUNCLETS
+        if (fgFuncletsCreated)
+        {
+            assert(HBtab->ebdHndBeg->bbFlags & BBF_FUNCLET_BEG);
+
+            if (HBtab->HasFilter())
+            {
+                assert(HBtab->ebdFilter->bbFlags & BBF_FUNCLET_BEG);
+            }
+        }
+#endif // FEATURE_EH_FUNCLETS
+    }
+
+    // I want to assert things about the relative ordering of blocks in the block list using
+    // block number, but I don't want to renumber the basic blocks, which might cause a difference
+    // between debug and non-debug code paths. So, create a renumbered block mapping: map the
+    // existing block number to a renumbered block number that is ordered by block list order.
+
+    unsigned bbNumMax = compIsForInlining() ? impInlineInfo->InlinerCompiler->fgBBNumMax : fgBBNumMax;
+
+    // blockNumMap[old block number] => new block number
+    size_t    blockNumBytes = (bbNumMax + 1) * sizeof(unsigned);
+    unsigned* blockNumMap   = (unsigned*)_alloca(blockNumBytes);
+    memset(blockNumMap, 0, blockNumBytes);
+
+    BasicBlock* block;
+    unsigned    newBBnum = 1;
+    for (block = fgFirstBB; block != nullptr; block = block->bbNext)
+    {
+        assert((block->bbFlags & BBF_REMOVED) == 0);
+        assert(1 <= block->bbNum && block->bbNum <= bbNumMax);
+        assert(blockNumMap[block->bbNum] == 0); // If this fails, we have two blocks with the same block number.
+        blockNumMap[block->bbNum] = newBBnum++;
+    }
+// Note that there may be some blockNumMap[x] == 0, for a block number 'x' that has been deleted, if the blocks
+// haven't been renumbered since the deletion.
+
+#if 0 // Useful for debugging, but don't want to put this in the dump all the time
+    if (verbose)
+    {
+        printf("fgVerifyHandlerTab block number map: BB current => BB new\n");
+        for (unsigned i = 0; i <= bbNumMax; i++)
+        {
+            if (blockNumMap[i] != 0)
+            {
+                printf("BB%02u => BB%02u\n", i, blockNumMap[i]);
+            }
+        }
+    }
+#endif
+
+    // To verify that bbCatchTyp is set properly on all blocks, and that some BBF_* flags are only set on the first
+    // block
+    // of 'try' or handlers, create two bool arrays indexed by block number: one for the set of blocks that are the
+    // beginning
+    // blocks of 'try' regions, and one for blocks that are the beginning of handlers (including filters). Note that
+    // since
+    // this checking function runs before EH normalization, we have to handle the case where blocks can be both the
+    // beginning
+    // of a 'try' as well as the beginning of a handler. After we've iterated over the EH table, loop
+    // over all blocks and verify that only handler begin blocks have bbCatchTyp == BBCT_NONE, and some other things.
+
+    size_t blockBoolSetBytes = (bbNumMax + 1) * sizeof(bool);
+    bool*  blockTryBegSet    = (bool*)_alloca(blockBoolSetBytes);
+    bool*  blockHndBegSet    = (bool*)_alloca(blockBoolSetBytes);
+    for (unsigned i = 0; i <= bbNumMax; i++)
+    {
+        blockTryBegSet[i] = false;
+        blockHndBegSet[i] = false;
+    }
+
+#if FEATURE_EH_FUNCLETS
+    bool     isLegalFirstFunclet = false;
+    unsigned bbNumFirstFunclet   = 0;
+
+    if (fgFuncletsCreated)
+    {
+        // Assert some things about the "first funclet block" pointer.
+        assert(fgFirstFuncletBB != nullptr);
+        assert((fgFirstFuncletBB->bbFlags & BBF_REMOVED) == 0);
+        bbNumFirstFunclet = blockNumMap[fgFirstFuncletBB->bbNum];
+        assert(bbNumFirstFunclet != 0);
+    }
+    else
+    {
+        assert(fgFirstFuncletBB == nullptr);
+    }
+#endif // FEATURE_EH_FUNCLETS
+
+    for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+    {
+        unsigned bbNumTryBeg  = blockNumMap[HBtab->ebdTryBeg->bbNum];
+        unsigned bbNumTryLast = blockNumMap[HBtab->ebdTryLast->bbNum];
+        unsigned bbNumHndBeg  = blockNumMap[HBtab->ebdHndBeg->bbNum];
+        unsigned bbNumHndLast = blockNumMap[HBtab->ebdHndLast->bbNum];
+        unsigned bbNumFilter  = 0; // This should never get used except under "if (HBtab->HasFilter())"
+        if (HBtab->HasFilter())
+        {
+            bbNumFilter = blockNumMap[HBtab->ebdFilter->bbNum];
+        }
+
+        // Assert that the EH blocks are in the main block list
+        assert(bbNumTryBeg != 0);
+        assert(bbNumTryLast != 0);
+        assert(bbNumHndBeg != 0);
+        assert(bbNumHndLast != 0);
+        if (HBtab->HasFilter())
+        {
+            assert(bbNumFilter != 0);
+        }
+
+        // Check relative ordering of the 'beg' and 'last' blocks. Note that in IL (and in our initial block list)
+        // there is no required ordering between the 'try' and handler regions: the handler might come first!
+        // After funclets have been created, all the handler blocks come in sequence at the end of the
+        // function (this is checked below, with checks for the first funclet block). Note that a handler
+        // might contain a nested 'try', which will also then be in the "funclet region".
+        // Also, the 'try' and handler regions do not need to be adjacent.
+        assert(bbNumTryBeg <= bbNumTryLast);
+        assert(bbNumHndBeg <= bbNumHndLast);
+        if (HBtab->HasFilter())
+        {
+            // Since the filter block must be different from the handler, this condition is "<", not "<=".
+            assert(bbNumFilter < bbNumHndBeg);
+        }
+
+        // The EH regions are disjoint: the handler (including the filter, if applicable) is strictly before or after
+        // the 'try'.
+        if (HBtab->HasFilter())
+        {
+            assert((bbNumHndLast < bbNumTryBeg) || (bbNumTryLast < bbNumFilter));
+        }
+        else
+        {
+            assert((bbNumHndLast < bbNumTryBeg) || (bbNumTryLast < bbNumHndBeg));
+        }
+
+#if FEATURE_EH_FUNCLETS
+        // If funclets have been created, check the first funclet block. The first funclet block must be the
+        // first block of a filter or handler. All filter/handler blocks must come after it.
+        // Note that 'try' blocks might come either before or after it. If after, they will be nested within
+        // a handler. If before, they might be nested within a try, but not within a handler.
+
+        if (fgFuncletsCreated)
+        {
+            if (bbNumTryLast < bbNumFirstFunclet)
+            {
+                // This EH region can't be nested in a handler, or else it would be in the funclet region.
+                assert(HBtab->ebdEnclosingHndIndex == EHblkDsc::NO_ENCLOSING_INDEX);
+            }
+            else
+            {
+                // The last block of the 'try' is in the funclet region; make sure the whole thing is.
+                if (multipleBegBlockNormalizationDone)
+                {
+                    assert(bbNumTryBeg > bbNumFirstFunclet); // ">" because a 'try' can't be the first block of a
+                                                             // handler (by EH normalization).
+                }
+                else
+                {
+                    assert(bbNumTryBeg >= bbNumFirstFunclet);
+                }
+
+                // This EH region must be nested in a handler.
+                assert(HBtab->ebdEnclosingHndIndex != EHblkDsc::NO_ENCLOSING_INDEX);
+            }
+
+            if (HBtab->HasFilter())
+            {
+                assert(bbNumFirstFunclet <= bbNumFilter);
+                if (fgFirstFuncletBB == HBtab->ebdFilter)
+                {
+                    assert(!isLegalFirstFunclet); // We can't have already found a matching block for the first funclet.
+                    isLegalFirstFunclet = true;
+                }
+            }
+            else
+            {
+                assert(bbNumFirstFunclet <= bbNumHndBeg);
+                if (fgFirstFuncletBB == HBtab->ebdHndBeg)
+                {
+                    assert(!isLegalFirstFunclet); // We can't have already found a matching block for the first funclet.
+                    isLegalFirstFunclet = true;
+                }
+            }
+        }
+#endif // FEATURE_EH_FUNCLETS
+
+        // Check the 'try' region nesting, using ebdEnclosingTryIndex.
+        // Only check one level of nesting, since we'll check the outer EH region (and its nesting) when we get to it
+        // later.
+
+        if (HBtab->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX)
+        {
+            assert(HBtab->ebdEnclosingTryIndex > XTnum); // The enclosing region must come after this one in the table
+            EHblkDsc* HBtabOuter        = ehGetDsc(HBtab->ebdEnclosingTryIndex);
+            unsigned  bbNumOuterTryBeg  = blockNumMap[HBtabOuter->ebdTryBeg->bbNum];
+            unsigned  bbNumOuterTryLast = blockNumMap[HBtabOuter->ebdTryLast->bbNum];
+
+            // A few basic asserts (that will also get covered later, when this outer region gets handled).
+            assert(bbNumOuterTryBeg != 0);
+            assert(bbNumOuterTryLast != 0);
+            assert(bbNumOuterTryBeg <= bbNumOuterTryLast);
+
+            if (!EHblkDsc::ebdIsSameTry(HBtab, HBtabOuter))
+            {
+                // If it's not a mutually protect region, then the outer 'try' must completely lexically contain all the
+                // blocks in the nested EH region. However, if funclets have been created, this is no longer true, since
+                // this 'try' might be in a handler that is pulled out to the funclet region, while the outer 'try'
+                // remains in the main function region.
+                CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if FEATURE_EH_FUNCLETS
+                if (fgFuncletsCreated)
+                {
+                    // If both the 'try' region and the outer 'try' region are in the main function area, then we can
+                    // do the normal nesting check. Otherwise, it's harder to find a useful assert to make about their
+                    // relationship.
+                    if ((bbNumTryLast < bbNumFirstFunclet) && (bbNumOuterTryLast < bbNumFirstFunclet))
+                    {
+                        if (multipleBegBlockNormalizationDone)
+                        {
+                            assert(bbNumOuterTryBeg < bbNumTryBeg); // Two 'try' regions can't start at the same
+                                                                    // block (by EH normalization).
+                        }
+                        else
+                        {
+                            assert(bbNumOuterTryBeg <= bbNumTryBeg);
+                        }
+                        if (multipleLastBlockNormalizationDone)
+                        {
+                            assert(bbNumTryLast < bbNumOuterTryLast); // Two 'try' regions can't end at the same block
+                                                                      //(by EH normalization).
+                        }
+                        else
+                        {
+                            assert(bbNumTryLast <= bbNumOuterTryLast);
+                        }
+                    }
+
+                    // With funclets, all we can say about the handler blocks is that they are disjoint from the
+                    // enclosing try.
+                    assert((bbNumHndLast < bbNumOuterTryBeg) || (bbNumOuterTryLast < bbNumHndBeg));
+                }
+                else
+#endif // FEATURE_EH_FUNCLETS
+                {
+                    if (multipleBegBlockNormalizationDone)
+                    {
+                        assert(bbNumOuterTryBeg < bbNumTryBeg); // Two 'try' regions can't start at the same block
+                                                                // (by EH normalization).
+                    }
+                    else
+                    {
+                        assert(bbNumOuterTryBeg <= bbNumTryBeg);
+                    }
+                    assert(bbNumOuterTryBeg < bbNumHndBeg); // An inner handler can never start at the same
+                                                            // block as an outer 'try' (by IL rules).
+                    if (multipleLastBlockNormalizationDone)
+                    {
+                        // An inner EH region can't share a 'last' block with the outer 'try' (by EH normalization).
+                        assert(bbNumTryLast < bbNumOuterTryLast);
+                        assert(bbNumHndLast < bbNumOuterTryLast);
+                    }
+                    else
+                    {
+                        assert(bbNumTryLast <= bbNumOuterTryLast);
+                        assert(bbNumHndLast <= bbNumOuterTryLast);
+                    }
+                }
+            }
+        }
+
+        // Check the handler region nesting, using ebdEnclosingHndIndex.
+        // Only check one level of nesting, since we'll check the outer EH region (and its nesting) when we get to it
+        // later.
+
+        if (HBtab->ebdEnclosingHndIndex != EHblkDsc::NO_ENCLOSING_INDEX)
+        {
+            assert(HBtab->ebdEnclosingHndIndex > XTnum); // The enclosing region must come after this one in the table
+            EHblkDsc* HBtabOuter        = ehGetDsc(HBtab->ebdEnclosingHndIndex);
+            unsigned  bbNumOuterHndBeg  = blockNumMap[HBtabOuter->ebdHndBeg->bbNum];
+            unsigned  bbNumOuterHndLast = blockNumMap[HBtabOuter->ebdHndLast->bbNum];
+
+            // A few basic asserts (that will also get covered later, when this outer regions gets handled).
+            assert(bbNumOuterHndBeg != 0);
+            assert(bbNumOuterHndLast != 0);
+            assert(bbNumOuterHndBeg <= bbNumOuterHndLast);
+
+// The outer handler must completely contain all the blocks in the EH region nested within it. However, if
+// funclets have been created, it's harder to make any relationship asserts about the order of nested
+// handlers, which also have been made into funclets.
+
+#if FEATURE_EH_FUNCLETS
+            if (fgFuncletsCreated)
+            {
+                if (handlerBegIsTryBegNormalizationDone)
+                {
+                    assert(bbNumOuterHndBeg < bbNumTryBeg); // An inner 'try' can't start at the same block as an
+                                                            // outer handler (by EH normalization).
+                }
+                else
+                {
+                    assert(bbNumOuterHndBeg <= bbNumTryBeg);
+                }
+                if (multipleLastBlockNormalizationDone)
+                {
+                    assert(bbNumTryLast < bbNumOuterHndLast); // An inner 'try' can't end at the same block as an
+                                                              // outer handler (by EH normalization).
+                }
+                else
+                {
+                    assert(bbNumTryLast <= bbNumOuterHndLast);
+                }
+
+                // With funclets, all we can say about the handler blocks is that they are disjoint from the enclosing
+                // handler.
+                assert((bbNumHndLast < bbNumOuterHndBeg) || (bbNumOuterHndLast < bbNumHndBeg));
+            }
+            else
+#endif // FEATURE_EH_FUNCLETS
+            {
+                if (handlerBegIsTryBegNormalizationDone)
+                {
+                    assert(bbNumOuterHndBeg < bbNumTryBeg); // An inner 'try' can't start at the same block as an
+                                                            // outer handler (by EH normalization).
+                }
+                else
+                {
+                    assert(bbNumOuterHndBeg <= bbNumTryBeg);
+                }
+                assert(bbNumOuterHndBeg < bbNumHndBeg); // An inner handler can never start at the same block
+                                                        // as an outer handler (by IL rules).
+                if (multipleLastBlockNormalizationDone)
+                {
+                    // An inner EH region can't share a 'last' block with the outer handler (by EH normalization).
+                    assert(bbNumTryLast < bbNumOuterHndLast);
+                    assert(bbNumHndLast < bbNumOuterHndLast);
+                }
+                else
+                {
+                    assert(bbNumTryLast <= bbNumOuterHndLast);
+                    assert(bbNumHndLast <= bbNumOuterHndLast);
+                }
+            }
+        }
+
+        // Set up blockTryBegSet and blockHndBegSet.
+        // We might want to have this assert:
+        //    if (fgNormalizeEHDone) assert(!blockTryBegSet[HBtab->ebdTryBeg->bbNum]);
+        // But we can't, because if we have mutually-protect 'try' regions, we'll see exactly the same tryBeg twice
+        // (or more).
+        blockTryBegSet[HBtab->ebdTryBeg->bbNum] = true;
+        assert(!blockHndBegSet[HBtab->ebdHndBeg->bbNum]);
+        blockHndBegSet[HBtab->ebdHndBeg->bbNum] = true;
+
+        if (HBtab->HasFilter())
+        {
+            assert(HBtab->ebdFilter->bbCatchTyp == BBCT_FILTER);
+            assert(!blockHndBegSet[HBtab->ebdFilter->bbNum]);
+            blockHndBegSet[HBtab->ebdFilter->bbNum] = true;
+        }
+
+        // Check the block bbCatchTyp for this EH region's filter and handler.
+
+        if (HBtab->HasFilter())
+        {
+            assert(HBtab->ebdHndBeg->bbCatchTyp == BBCT_FILTER_HANDLER);
+        }
+        else if (HBtab->HasCatchHandler())
+        {
+            assert((HBtab->ebdHndBeg->bbCatchTyp != BBCT_NONE) && (HBtab->ebdHndBeg->bbCatchTyp != BBCT_FAULT) &&
+                   (HBtab->ebdHndBeg->bbCatchTyp != BBCT_FINALLY) && (HBtab->ebdHndBeg->bbCatchTyp != BBCT_FILTER) &&
+                   (HBtab->ebdHndBeg->bbCatchTyp != BBCT_FILTER_HANDLER));
+        }
+        else if (HBtab->HasFaultHandler())
+        {
+            assert(HBtab->ebdHndBeg->bbCatchTyp == BBCT_FAULT);
+        }
+        else if (HBtab->HasFinallyHandler())
+        {
+            assert(HBtab->ebdHndBeg->bbCatchTyp == BBCT_FINALLY);
+        }
+    }
+
+#if FEATURE_EH_FUNCLETS
+    assert(!fgFuncletsCreated || isLegalFirstFunclet);
+#endif // FEATURE_EH_FUNCLETS
+
+    // Figure out what 'try' and handler index each basic block should have,
+    // and check the blocks against that. This depends on the more nested EH
+    // clauses appearing first. For duplicate clauses, we use the duplicate
+    // clause 'try' region to set the try index, since a handler that has
+    // been pulled out of an enclosing 'try' wouldn't have had its try index
+    // otherwise set. The duplicate clause handler is truly a duplicate of
+    // a previously processed handler, so we ignore it.
+
+    size_t          blockIndexBytes = (bbNumMax + 1) * sizeof(unsigned short);
+    unsigned short* blockTryIndex   = (unsigned short*)_alloca(blockIndexBytes);
+    unsigned short* blockHndIndex   = (unsigned short*)_alloca(blockIndexBytes);
+    memset(blockTryIndex, 0, blockIndexBytes);
+    memset(blockHndIndex, 0, blockIndexBytes);
+
+    for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+    {
+        BasicBlock* blockEnd;
+
+        for (block = HBtab->ebdTryBeg, blockEnd = HBtab->ebdTryLast->bbNext; block != blockEnd; block = block->bbNext)
+        {
+            if (blockTryIndex[block->bbNum] == 0)
+            {
+                blockTryIndex[block->bbNum] = (unsigned short)(XTnum + 1);
+            }
+        }
+
+        for (block = (HBtab->HasFilter() ? HBtab->ebdFilter : HBtab->ebdHndBeg), blockEnd = HBtab->ebdHndLast->bbNext;
+             block != blockEnd; block = block->bbNext)
+        {
+            if (blockHndIndex[block->bbNum] == 0)
+            {
+                blockHndIndex[block->bbNum] = (unsigned short)(XTnum + 1);
+            }
+        }
+    }
+
+#if FEATURE_EH_FUNCLETS
+    if (fgFuncletsCreated)
+    {
+        // Mark all the funclet 'try' indices correctly, since they do not exist in the linear 'try' region that
+        // we looped over above. This is similar to duplicate clause logic, but we only need to look at the most
+        // nested enclosing try index, not the entire set of enclosing try indices, since that is what we store
+        // on the block.
+        for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+        {
+            unsigned enclosingTryIndex = ehTrueEnclosingTryIndexIL(XTnum); // find the true enclosing try index,
+                                                                           // ignoring 'mutual protect' trys
+            if (enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX)
+            {
+                // The handler funclet for 'XTnum' has a try index of 'enclosingTryIndex' (at least, the parts of the
+                // funclet that don't already have a more nested 'try' index because a 'try' is nested within the
+                // handler).
+
+                BasicBlock* blockEnd;
+                for (block                    = (HBtab->HasFilter() ? HBtab->ebdFilter : HBtab->ebdHndBeg),
+                    blockEnd                  = HBtab->ebdHndLast->bbNext;
+                     block != blockEnd; block = block->bbNext)
+                {
+                    if (blockTryIndex[block->bbNum] == 0)
+                    {
+                        blockTryIndex[block->bbNum] = (unsigned short)(enclosingTryIndex + 1);
+                    }
+                }
+            }
+        }
+    }
+#endif // FEATURE_EH_FUNCLETS
+
+    // Make sure that all blocks have the right index, including those blocks that should have zero (no EH region).
+    for (block = fgFirstBB; block != nullptr; block = block->bbNext)
+    {
+        assert(block->bbTryIndex == blockTryIndex[block->bbNum]);
+        assert(block->bbHndIndex == blockHndIndex[block->bbNum]);
+
+        // Also, since we're walking the blocks, check that all blocks we didn't mark as EH handler 'begin' blocks
+        // already have bbCatchTyp set properly.
+        if (!blockHndBegSet[block->bbNum])
+        {
+            assert(block->bbCatchTyp == BBCT_NONE);
+
+#if FEATURE_EH_FUNCLETS
+            if (fgFuncletsCreated)
+            {
+                // Make sure blocks that aren't the first block of a funclet do not have the BBF_FUNCLET_BEG flag set.
+                assert((block->bbFlags & BBF_FUNCLET_BEG) == 0);
+            }
+#endif // FEATURE_EH_FUNCLETS
+        }
+
+        // Only the first block of 'try' regions should have BBF_TRY_BEG set.
+        if (!blockTryBegSet[block->bbNum])
+        {
+            assert((block->bbFlags & BBF_TRY_BEG) == 0);
+        }
+    }
+}
+
+void Compiler::fgDispHandlerTab()
+{
+    printf("\n***************  Exception Handling table");
+
+    if (compHndBBtabCount == 0)
+    {
+        printf(" is empty\n");
+        return;
+    }
+
+    printf("\nindex  ");
+#if !FEATURE_EH_FUNCLETS
+    printf("nest, ");
+#endif // !FEATURE_EH_FUNCLETS
+    printf("eTry, eHnd\n");
+
+    unsigned  XTnum;
+    EHblkDsc* HBtab;
+
+    for (XTnum = 0, HBtab = compHndBBtab; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+    {
+        HBtab->DispEntry(XTnum);
+    }
+}
+
+#endif // DEBUG
+/*****************************************************************************/
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                          "Compiler" functions: EH tree verification       XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************
+ * The following code checks the following rules for the EH table:
+ * 1. Overlapping of try blocks not allowed.
+ * 2. Handler blocks cannot be shared between different try blocks.
+ * 3. Try blocks with Finally or Fault blocks cannot have other handlers.
+ * 4. If block A contains block B, A should also contain B's try/filter/handler.
+ * 5. A block cannot contain it's related try/filter/handler.
+ * 6. Nested block must appear before containing block
+ *
+ */
+
+void Compiler::verInitEHTree(unsigned numEHClauses)
+{
+    ehnNext = new (this, CMK_BasicBlock) EHNodeDsc[numEHClauses * 3];
+    ehnTree = nullptr;
+}
+
+/* Inserts the try, handler and filter (optional) clause information in a tree structure
+ * in order to catch incorrect eh formatting (e.g. illegal overlaps, incorrect order)
+ */
+
+void Compiler::verInsertEhNode(CORINFO_EH_CLAUSE* clause, EHblkDsc* handlerTab)
+{
+    EHNodeDsc* tryNode     = ehnNext++;
+    EHNodeDsc* handlerNode = ehnNext++;
+    EHNodeDsc* filterNode  = nullptr; // optional
+
+    tryNode->ehnSetTryNodeType();
+    tryNode->ehnStartOffset = clause->TryOffset;
+    tryNode->ehnEndOffset   = clause->TryOffset + clause->TryLength - 1;
+    tryNode->ehnHandlerNode = handlerNode;
+
+    if (clause->Flags & CORINFO_EH_CLAUSE_FINALLY)
+    {
+        handlerNode->ehnSetFinallyNodeType();
+    }
+    else if (clause->Flags & CORINFO_EH_CLAUSE_FAULT)
+    {
+        handlerNode->ehnSetFaultNodeType();
+    }
+    else
+    {
+        handlerNode->ehnSetHandlerNodeType();
+    }
+
+    handlerNode->ehnStartOffset = clause->HandlerOffset;
+    handlerNode->ehnEndOffset   = clause->HandlerOffset + clause->HandlerLength - 1;
+    handlerNode->ehnTryNode     = tryNode;
+
+    if (clause->Flags & CORINFO_EH_CLAUSE_FILTER)
+    {
+        filterNode                 = ehnNext++;
+        filterNode->ehnStartOffset = clause->FilterOffset;
+        BasicBlock* blk            = handlerTab->BBFilterLast();
+        filterNode->ehnEndOffset   = blk->bbCodeOffsEnd - 1;
+
+        noway_assert(filterNode->ehnEndOffset != 0);
+        filterNode->ehnSetFilterNodeType();
+        filterNode->ehnTryNode = tryNode;
+        tryNode->ehnFilterNode = filterNode;
+    }
+
+    verInsertEhNodeInTree(&ehnTree, tryNode);
+    verInsertEhNodeInTree(&ehnTree, handlerNode);
+    if (filterNode)
+    {
+        verInsertEhNodeInTree(&ehnTree, filterNode);
+    }
+}
+
+/*
+    The root node could be changed by this method.
+
+    node is inserted to
+
+        (a) right       of root (root.right       <-- node)
+        (b) left        of root (node.right       <-- root; node becomes root)
+        (c) child       of root (root.child       <-- node)
+        (d) parent      of root (node.child       <-- root; node becomes root)
+        (e) equivalent  of root (root.equivalent  <-- node)
+
+    such that siblings are ordered from left to right
+    child parent relationship and equivalence relationship are not violated
+
+
+    Here is a list of all possible cases
+
+    Case 1 2 3 4 5 6 7 8 9 10 11 12 13
+
+         | | | | |
+         | | | | |
+    .......|.|.|.|..................... [ root start ] .....
+    |        | | | |             |  |
+    |        | | | |             |  |
+   r|        | | | |          |  |  |
+   o|          | | |          |     |
+   o|          | | |          |     |
+   t|          | | |          |     |
+    |          | | | |     |  |     |
+    |          | | | |     |        |
+    |..........|.|.|.|.....|........|.. [ root end ] ........
+                 | | | |
+                 | | | | |
+                 | | | | |
+
+        |<-- - - - n o d e - - - -->|
+
+
+   Case Operation
+   --------------
+    1    (b)
+    2    Error
+    3    Error
+    4    (d)
+    5    (d)
+    6    (d)
+    7    Error
+    8    Error
+    9    (a)
+    10   (c)
+    11   (c)
+    12   (c)
+    13   (e)
+
+
+*/
+
+void Compiler::verInsertEhNodeInTree(EHNodeDsc** ppRoot, EHNodeDsc* node)
+{
+    unsigned nStart = node->ehnStartOffset;
+    unsigned nEnd   = node->ehnEndOffset;
+
+    if (nStart > nEnd)
+    {
+        BADCODE("start offset greater or equal to end offset");
+    }
+    node->ehnNext       = nullptr;
+    node->ehnChild      = nullptr;
+    node->ehnEquivalent = nullptr;
+
+    while (TRUE)
+    {
+        if (*ppRoot == nullptr)
+        {
+            *ppRoot = node;
+            break;
+        }
+        unsigned rStart = (*ppRoot)->ehnStartOffset;
+        unsigned rEnd   = (*ppRoot)->ehnEndOffset;
+
+        if (nStart < rStart)
+        {
+            // Case 1
+            if (nEnd < rStart)
+            {
+                // Left sibling
+                node->ehnNext = *ppRoot;
+                *ppRoot       = node;
+                return;
+            }
+            // Case 2, 3
+            if (nEnd < rEnd)
+            {
+                //[Error]
+                BADCODE("Overlapping try regions");
+            }
+
+            // Case 4, 5
+            //[Parent]
+            verInsertEhNodeParent(ppRoot, node);
+            return;
+        }
+
+        // Cases 6 - 13 (nStart >= rStart)
+
+        if (nEnd > rEnd)
+        { // Case 6, 7, 8, 9
+
+            // Case 9
+            if (nStart > rEnd)
+            {
+                //[RightSibling]
+
+                // Recurse with Root.Sibling as the new root
+                ppRoot = &((*ppRoot)->ehnNext);
+                continue;
+            }
+
+            // Case 6
+            if (nStart == rStart)
+            {
+                //[Parent]
+                if (node->ehnIsTryBlock() || (*ppRoot)->ehnIsTryBlock())
+                {
+                    verInsertEhNodeParent(ppRoot, node);
+                    return;
+                }
+
+                // non try blocks are not allowed to start at the same offset
+                BADCODE("Handlers start at the same offset");
+            }
+
+            // Case 7, 8
+            BADCODE("Overlapping try regions");
+        }
+
+        // Case 10-13 (nStart >= rStart && nEnd <= rEnd)
+        if ((nStart != rStart) || (nEnd != rEnd))
+        { // Cases 10,11,12
+            //[Child]
+
+            if ((*ppRoot)->ehnIsTryBlock())
+            {
+                BADCODE("Inner try appears after outer try in exception handling table");
+            }
+            else
+            {
+                // We have an EH clause nested within a handler, but the parent
+                // handler clause came first in the table. The rest of the compiler
+                // doesn't expect this, so sort the EH table.
+
+                fgNeedToSortEHTable = true;
+
+                // Case 12 (nStart == rStart)
+                // non try blocks are not allowed to start at the same offset
+                if ((nStart == rStart) && !node->ehnIsTryBlock())
+                {
+                    BADCODE("Handlers start at the same offset");
+                }
+
+                // check this!
+                ppRoot = &((*ppRoot)->ehnChild);
+                continue;
+            }
+        }
+
+        // Case 13
+        //[Equivalent]
+        if (!node->ehnIsTryBlock() && !(*ppRoot)->ehnIsTryBlock())
+        {
+            BADCODE("Handlers cannot be shared");
+        }
+
+        if (!node->ehnIsTryBlock() || !(*ppRoot)->ehnIsTryBlock())
+        {
+            // Equivalent is only allowed for try bodies
+            // If one is a handler, this means the nesting is wrong
+            BADCODE("Handler and try with the same offset");
+        }
+
+        node->ehnEquivalent = node->ehnNext = *ppRoot;
+
+        // check that the corresponding handler is either a catch handler
+        // or a filter
+        if (node->ehnHandlerNode->ehnIsFaultBlock() || node->ehnHandlerNode->ehnIsFinallyBlock() ||
+            (*ppRoot)->ehnHandlerNode->ehnIsFaultBlock() || (*ppRoot)->ehnHandlerNode->ehnIsFinallyBlock())
+        {
+            BADCODE("Try block with multiple non-filter/non-handler blocks");
+        }
+
+        break;
+    }
+}
+
+/**********************************************************************
+ * Make node the parent of *ppRoot. All siblings of *ppRoot that are
+ * fully or partially nested in node remain siblings of *ppRoot
+ */
+
+void Compiler::verInsertEhNodeParent(EHNodeDsc** ppRoot, EHNodeDsc* node)
+{
+    noway_assert(node->ehnNext == nullptr);
+    noway_assert(node->ehnChild == nullptr);
+
+    // Root is nested in Node
+    noway_assert(node->ehnStartOffset <= (*ppRoot)->ehnStartOffset);
+    noway_assert(node->ehnEndOffset >= (*ppRoot)->ehnEndOffset);
+
+    // Root is not the same as Node
+    noway_assert(node->ehnStartOffset != (*ppRoot)->ehnStartOffset || node->ehnEndOffset != (*ppRoot)->ehnEndOffset);
+
+    if (node->ehnIsFilterBlock())
+    {
+        BADCODE("Protected block appearing within filter block");
+    }
+
+    EHNodeDsc* lastChild = nullptr;
+    EHNodeDsc* sibling   = (*ppRoot)->ehnNext;
+
+    while (sibling)
+    {
+        // siblings are ordered left to right, largest right.
+        // nodes have a width of at least one.
+        // Hence sibling start will always be after Node start.
+
+        noway_assert(sibling->ehnStartOffset > node->ehnStartOffset); // (1)
+
+        // disjoint
+        if (sibling->ehnStartOffset > node->ehnEndOffset)
+        {
+            break;
+        }
+
+        // partial containment.
+        if (sibling->ehnEndOffset > node->ehnEndOffset) // (2)
+        {
+            BADCODE("Overlapping try regions");
+        }
+        // else full containment (follows from (1) and (2))
+
+        lastChild = sibling;
+        sibling   = sibling->ehnNext;
+    }
+
+    // All siblings of Root up to and including lastChild will continue to be
+    // siblings of Root (and children of Node). The node to the right of
+    // lastChild will become the first sibling of Node.
+    //
+
+    if (lastChild)
+    {
+        // Node has more than one child including Root
+
+        node->ehnNext      = lastChild->ehnNext;
+        lastChild->ehnNext = nullptr;
+    }
+    else
+    {
+        // Root is the only child of Node
+        node->ehnNext      = (*ppRoot)->ehnNext;
+        (*ppRoot)->ehnNext = nullptr;
+    }
+
+    node->ehnChild = *ppRoot;
+    *ppRoot        = node;
+}
+
+/*****************************************************************************
+ * Checks the following two conditions:
+ * 1) If block A contains block B, A should also contain B's try/filter/handler.
+ * 2) A block cannot contain its related try/filter/handler.
+ * Both these conditions are checked by making sure that all the blocks for an
+ * exception clause are at the same level.
+ * The algorithm is: for each exception clause, determine the first block and
+ * search through the next links for its corresponding try/handler/filter as the
+ * case may be. If not found, then fail.
+ */
+void Compiler::verCheckNestingLevel(EHNodeDsc* root)
+{
+    EHNodeDsc* ehnNode = root;
+
+#define exchange(a, b)                                                                                                 \
+    {                                                                                                                  \
+        temp = a;                                                                                                      \
+        a    = b;                                                                                                      \
+        b    = temp;                                                                                                   \
+    }
+
+    for (unsigned XTnum = 0; XTnum < compHndBBtabCount; XTnum++)
+    {
+        EHNodeDsc *p1, *p2, *p3, *temp, *search;
+
+        p1 = ehnNode++;
+        p2 = ehnNode++;
+
+        // we are relying on the fact that ehn nodes are allocated sequentially.
+        noway_assert(p1->ehnHandlerNode == p2);
+        noway_assert(p2->ehnTryNode == p1);
+
+        // arrange p1 and p2 in sequential order
+        if (p1->ehnStartOffset == p2->ehnStartOffset)
+        {
+            BADCODE("shared exception handler");
+        }
+
+        if (p1->ehnStartOffset > p2->ehnStartOffset)
+            exchange(p1, p2);
+
+        temp                 = p1->ehnNext;
+        unsigned numSiblings = 0;
+
+        search = p2;
+        if (search->ehnEquivalent)
+        {
+            search = search->ehnEquivalent;
+        }
+
+        do
+        {
+            if (temp == search)
+            {
+                numSiblings++;
+                break;
+            }
+            if (temp)
+            {
+                temp = temp->ehnNext;
+            }
+        } while (temp);
+
+        CORINFO_EH_CLAUSE clause;
+        info.compCompHnd->getEHinfo(info.compMethodHnd, XTnum, &clause);
+
+        if (clause.Flags & CORINFO_EH_CLAUSE_FILTER)
+        {
+            p3 = ehnNode++;
+
+            noway_assert(p3->ehnTryNode == p1 || p3->ehnTryNode == p2);
+            noway_assert(p1->ehnFilterNode == p3 || p2->ehnFilterNode == p3);
+
+            if (p3->ehnStartOffset < p1->ehnStartOffset)
+            {
+                temp   = p3;
+                search = p1;
+            }
+            else if (p3->ehnStartOffset < p2->ehnStartOffset)
+            {
+                temp   = p1;
+                search = p3;
+            }
+            else
+            {
+                temp   = p2;
+                search = p3;
+            }
+            if (search->ehnEquivalent)
+            {
+                search = search->ehnEquivalent;
+            }
+            do
+            {
+                if (temp == search)
+                {
+                    numSiblings++;
+                    break;
+                }
+                temp = temp->ehnNext;
+            } while (temp);
+        }
+        else
+        {
+            numSiblings++;
+        }
+
+        if (numSiblings != 2)
+        {
+            BADCODE("Outer block does not contain all code in inner handler");
+        }
+    }
+}
diff --git a/src/jit/jiteh.h b/src/jit/jiteh.h
new file mode 100644
index 0000000000..573116282c
--- /dev/null
+++ b/src/jit/jiteh.h
@@ -0,0 +1,180 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                          Exception Handling                               XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************/
+#ifndef _EH_H_
+#define _EH_H_
+
+struct BasicBlock;
+class Compiler;
+
+/*****************************************************************************/
+
+// The following holds the table of exception handlers.
+
+enum EHHandlerType
+{
+    EH_HANDLER_CATCH = 0x1, // Don't use zero (to aid debugging uninitialized memory)
+    EH_HANDLER_FILTER,
+    EH_HANDLER_FAULT,
+    EH_HANDLER_FINALLY
+};
+
+// ToCORINFO_EH_CLAUSE_FLAGS: Convert an internal EHHandlerType to a CORINFO_EH_CLAUSE_FLAGS value
+// to pass back to the VM.
+inline CORINFO_EH_CLAUSE_FLAGS ToCORINFO_EH_CLAUSE_FLAGS(EHHandlerType type)
+{
+    switch (type)
+    {
+        case EH_HANDLER_CATCH:
+            return CORINFO_EH_CLAUSE_NONE;
+        case EH_HANDLER_FILTER:
+            return CORINFO_EH_CLAUSE_FILTER;
+        case EH_HANDLER_FAULT:
+            return CORINFO_EH_CLAUSE_FAULT;
+        case EH_HANDLER_FINALLY:
+            return CORINFO_EH_CLAUSE_FINALLY;
+        default:
+            unreached();
+    }
+}
+
+// ToEHHandlerType: Convert a CORINFO_EH_CLAUSE_FLAGS value obtained from the VM in the EH clause structure
+// to the internal EHHandlerType type.
+inline EHHandlerType ToEHHandlerType(CORINFO_EH_CLAUSE_FLAGS flags)
+{
+    if (flags & CORINFO_EH_CLAUSE_FAULT)
+    {
+        return EH_HANDLER_FAULT;
+    }
+    else if (flags & CORINFO_EH_CLAUSE_FINALLY)
+    {
+        return EH_HANDLER_FINALLY;
+    }
+    else if (flags & CORINFO_EH_CLAUSE_FILTER)
+    {
+        return EH_HANDLER_FILTER;
+    }
+    else
+    {
+        // If it's none of the others, assume it is a try/catch.
+        /* XXX Fri 11/7/2008
+         * The VM (and apparently VC) stick in extra bits in the flags field. We ignore any flags
+         * we don't know about.
+         */
+        return EH_HANDLER_CATCH;
+    }
+}
+
+struct EHblkDsc
+{
+    BasicBlock* ebdTryBeg;  // First block of the try
+    BasicBlock* ebdTryLast; // Last block of the try
+    BasicBlock* ebdHndBeg;  // First block of the handler
+    BasicBlock* ebdHndLast; // Last block of the handler
+    union {
+        BasicBlock* ebdFilter; // First block of filter,          if HasFilter()
+        unsigned    ebdTyp;    // Exception type (a class token), otherwise
+    };
+
+    EHHandlerType ebdHandlerType;
+
+#if !FEATURE_EH_FUNCLETS
+    // How nested is the try/handler within other *handlers* - 0 for outermost clauses, 1 for nesting with a handler,
+    // etc.
+    unsigned short ebdHandlerNestingLevel;
+#endif // !FEATURE_EH_FUNCLETS
+
+    static const unsigned short NO_ENCLOSING_INDEX = USHRT_MAX;
+
+    // The index of the enclosing outer try region, NO_ENCLOSING_INDEX if none.
+    // Be careful of 'mutually protect' catch and filter clauses (multiple
+    // handlers with the same try region): the try regions 'nest' so we set
+    // ebdEnclosingTryIndex, but the inner catch is *NOT* nested within the outer catch!
+    // That is, if the "inner catch" throws an exception, it won't be caught by
+    // the "outer catch" for mutually protect handlers.
+    unsigned short ebdEnclosingTryIndex;
+
+    // The index of the enclosing outer handler region, NO_ENCLOSING_INDEX if none.
+    unsigned short ebdEnclosingHndIndex;
+
+#if FEATURE_EH_FUNCLETS
+
+    // After funclets are created, this is the index of corresponding FuncInfoDsc
+    // Special case for Filter/Filter-handler:
+    //   Like the IL the filter funclet immediately preceeds the filter-handler funclet.
+    //   So this index points to the filter-handler funclet. If you want the filter
+    //   funclet index, just subtract 1.
+    unsigned short ebdFuncIndex;
+
+#endif // FEATURE_EH_FUNCLETS
+
+    IL_OFFSET ebdTryBegOffset; // IL offsets of EH try/end regions as they are imported
+    IL_OFFSET ebdTryEndOffset;
+    IL_OFFSET ebdFilterBegOffset; // only set if HasFilter()
+    IL_OFFSET ebdHndBegOffset;
+    IL_OFFSET ebdHndEndOffset;
+
+    // Returns the last block of the filter. Assumes the EH clause is a try/filter/filter-handler type.
+    BasicBlock* BBFilterLast();
+
+    bool HasCatchHandler();
+    bool HasFilter();
+    bool HasFinallyHandler();
+    bool HasFaultHandler();
+    bool HasFinallyOrFaultHandler();
+
+    // Returns the block to which control will flow if an (otherwise-uncaught) exception is raised
+    // in the try.  This is normally "ebdHndBeg", unless the try region has a filter, in which case that is returned.
+    // (This is, in some sense, the "true handler," at least in the sense of control flow.  Note
+    // that we model the transition from a filter to its handler as normal, non-exceptional control flow.)
+    BasicBlock* ExFlowBlock();
+
+    bool InTryRegionILRange(BasicBlock* pBlk);
+    bool InFilterRegionILRange(BasicBlock* pBlk);
+    bool InHndRegionILRange(BasicBlock* pBlk);
+
+    bool InTryRegionBBRange(BasicBlock* pBlk);
+    bool InFilterRegionBBRange(BasicBlock* pBlk);
+    bool InHndRegionBBRange(BasicBlock* pBlk);
+
+    IL_OFFSET ebdTryBegOffs();
+    IL_OFFSET ebdTryEndOffs();
+    IL_OFFSET ebdFilterBegOffs();
+    IL_OFFSET ebdFilterEndOffs();
+    IL_OFFSET ebdHndBegOffs();
+    IL_OFFSET ebdHndEndOffs();
+
+    static bool ebdIsSameILTry(EHblkDsc* h1, EHblkDsc* h2); // Same 'try' region? Compare IL range.
+
+    // Return the region index of the most nested EH region that encloses this region, or NO_ENCLOSING_INDEX
+    // if this region is directly in the main function body. Set '*inTryRegion' to 'true' if this region is
+    // most nested within a 'try' region, or 'false' if this region is most nested within a handler. (Note
+    // that filters cannot contain nested EH regions.)
+    unsigned ebdGetEnclosingRegionIndex(bool* inTryRegion);
+
+    static bool ebdIsSameTry(EHblkDsc* h1, EHblkDsc* h2); // Same 'try' region? Compare begin/last blocks.
+    bool ebdIsSameTry(Compiler* comp, unsigned t2);
+    bool ebdIsSameTry(BasicBlock* ebdTryBeg, BasicBlock* ebdTryLast);
+
+#ifdef DEBUG
+    void DispEntry(unsigned num); // Display this table entry
+#endif                            // DEBUG
+
+private:
+    static bool InBBRange(BasicBlock* pBlk, BasicBlock* pStart, BasicBlock* pEnd);
+};
+
+/*****************************************************************************/
+#endif // _EH_H_
+/*****************************************************************************/
diff --git a/src/jit/jitgcinfo.h b/src/jit/jitgcinfo.h
new file mode 100644
index 0000000000..b93ac3376c
--- /dev/null
+++ b/src/jit/jitgcinfo.h
@@ -0,0 +1,452 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//  Garbage-collector information
+//  Keeps track of which variables hold pointers.
+//  Generates the GC-tables
+
+#ifndef _JITGCINFO_H_
+#define _JITGCINFO_H_
+
+#include "gcinfotypes.h"
+
+#ifndef JIT32_GCENCODER
+#include "gcinfoencoder.h"
+#endif
+
+/*****************************************************************************/
+
+#ifndef JIT32_GCENCODER
+// Shash typedefs
+struct RegSlotIdKey
+{
+    unsigned short m_regNum;
+    unsigned short m_flags;
+
+    RegSlotIdKey()
+    {
+    }
+
+    RegSlotIdKey(unsigned short regNum, unsigned short flags) : m_regNum(regNum), m_flags(flags)
+    {
+    }
+
+    static unsigned GetHashCode(RegSlotIdKey rsk)
+    {
+        return (rsk.m_flags << (8 * sizeof(unsigned short))) + rsk.m_regNum;
+    }
+
+    static bool Equals(RegSlotIdKey rsk1, RegSlotIdKey rsk2)
+    {
+        return rsk1.m_regNum == rsk2.m_regNum && rsk1.m_flags == rsk2.m_flags;
+    }
+};
+
+struct StackSlotIdKey
+{
+    int            m_offset;
+    bool           m_fpRel;
+    unsigned short m_flags;
+
+    StackSlotIdKey()
+    {
+    }
+
+    StackSlotIdKey(int offset, bool fpRel, unsigned short flags) : m_offset(offset), m_fpRel(fpRel), m_flags(flags)
+    {
+    }
+
+    static unsigned GetHashCode(StackSlotIdKey ssk)
+    {
+        return (ssk.m_flags << (8 * sizeof(unsigned short))) ^ (unsigned)ssk.m_offset ^ (ssk.m_fpRel ? 0x1000000 : 0);
+    }
+
+    static bool Equals(StackSlotIdKey ssk1, StackSlotIdKey ssk2)
+    {
+        return ssk1.m_offset == ssk2.m_offset && ssk1.m_fpRel == ssk2.m_fpRel && ssk1.m_flags == ssk2.m_flags;
+    }
+};
+
+typedef SimplerHashTable<RegSlotIdKey, RegSlotIdKey, GcSlotId, JitSimplerHashBehavior>     RegSlotMap;
+typedef SimplerHashTable<StackSlotIdKey, StackSlotIdKey, GcSlotId, JitSimplerHashBehavior> StackSlotMap;
+#endif
+
+typedef SimplerHashTable<GenTreePtr, PtrKeyFuncs<GenTree>, VARSET_TP*, JitSimplerHashBehavior> NodeToVarsetPtrMap;
+
+class GCInfo
+{
+    friend class CodeGen;
+
+private:
+    Compiler* compiler;
+    RegSet*   regSet;
+
+public:
+    GCInfo(Compiler* theCompiler);
+
+    void gcResetForBB();
+
+    void gcMarkRegSetGCref(regMaskTP regMask DEBUGARG(bool forceOutput = false));
+    void gcMarkRegSetByref(regMaskTP regMask DEBUGARG(bool forceOutput = false));
+    void gcMarkRegSetNpt(regMaskTP regMask DEBUGARG(bool forceOutput = false));
+    void gcMarkRegPtrVal(regNumber reg, var_types type);
+    void gcMarkRegPtrVal(GenTreePtr tree);
+
+#ifdef DEBUG
+    void gcDspGCrefSetChanges(regMaskTP gcRegGCrefSetNew DEBUGARG(bool forceOutput = false));
+    void gcDspByrefSetChanges(regMaskTP gcRegByrefSetNew DEBUGARG(bool forceOutput = false));
+#endif // DEBUG
+
+    /*****************************************************************************/
+
+    //-------------------------------------------------------------------------
+    //
+    //  The following keeps track of which registers currently hold pointer
+    //  values.
+    //
+
+    regMaskTP gcRegGCrefSetCur; // current regs holding GCrefs
+    regMaskTP gcRegByrefSetCur; // current regs holding Byrefs
+
+    VARSET_TP gcTrkStkPtrLcls; // set of tracked stack ptr lcls (GCref and Byref) - no args
+    VARSET_TP gcVarPtrSetCur;  // currently live part of "gcTrkStkPtrLcls"
+
+    //-------------------------------------------------------------------------
+    //
+    //  The following keeps track of the lifetimes of non-register variables that
+    //  hold pointers.
+    //
+
+    struct varPtrDsc
+    {
+        varPtrDsc* vpdNext;
+
+        unsigned vpdVarNum; // which variable is this about?
+
+        unsigned vpdBegOfs; // the offset where life starts
+        unsigned vpdEndOfs; // the offset where life starts
+    };
+
+    varPtrDsc* gcVarPtrList;
+    varPtrDsc* gcVarPtrLast;
+
+    void gcVarPtrSetInit();
+
+    /*****************************************************************************/
+
+    //  'pointer value' register tracking and argument pushes/pops tracking.
+
+    enum rpdArgType_t
+    {
+        rpdARG_POP,
+        rpdARG_PUSH,
+        rpdARG_KILL
+    };
+
+    struct regPtrDsc
+    {
+        regPtrDsc* rpdNext; // next entry in the list
+        unsigned   rpdOffs; // the offset of the instruction
+
+        union // 2-16 byte union (depending on architecture)
+        {
+            struct // 2-16 byte structure (depending on architecture)
+            {
+                regMaskSmall rpdAdd; // regptr bitset being added
+                regMaskSmall rpdDel; // regptr bitset being removed
+            } rpdCompiler;
+
+            unsigned short rpdPtrArg; // arg offset or popped arg count
+        };
+
+#ifndef JIT32_GCENCODER
+        unsigned char rpdCallInstrSize; // Length of the call instruction.
+#endif
+
+        unsigned short rpdArg : 1;     // is this an argument descriptor?
+        unsigned short rpdArgType : 2; // is this an argument push,pop, or kill?
+        rpdArgType_t   rpdArgTypeGet()
+        {
+            return (rpdArgType_t)rpdArgType;
+        }
+
+        unsigned short rpdGCtype : 2; // is this a pointer, after all?
+        GCtype         rpdGCtypeGet()
+        {
+            return (GCtype)rpdGCtype;
+        }
+
+        unsigned short rpdIsThis : 1;                       // is it the 'this' pointer
+        unsigned short rpdCall : 1;                         // is this a true call site?
+        unsigned short : 1;                                 // Padding bit, so next two start on a byte boundary
+        unsigned short rpdCallGCrefRegs : CNT_CALLEE_SAVED; // Callee-saved registers containing GC pointers.
+        unsigned short rpdCallByrefRegs : CNT_CALLEE_SAVED; // Callee-saved registers containing byrefs.
+
+#ifndef JIT32_GCENCODER
+        bool rpdIsCallInstr()
+        {
+            return rpdCall && rpdCallInstrSize != 0;
+        }
+#endif
+    };
+
+    regPtrDsc* gcRegPtrList;
+    regPtrDsc* gcRegPtrLast;
+    unsigned   gcPtrArgCnt;
+
+#ifndef JIT32_GCENCODER
+    enum MakeRegPtrMode
+    {
+        MAKE_REG_PTR_MODE_ASSIGN_SLOTS,
+        MAKE_REG_PTR_MODE_DO_WORK
+    };
+
+    // This method has two modes.  In the "assign slots" mode, it figures out what stack locations are
+    // used to contain GC references, and whether those locations contain byrefs or pinning references,
+    // building up mappings from tuples of <offset X byref/pinning> to the corresponding slot id.
+    // In the "do work" mode, we use these slot ids to actually declare live ranges to the encoder.
+    void gcMakeVarPtrTable(GcInfoEncoder* gcInfoEncoder, MakeRegPtrMode mode);
+
+    // This method expands the tracked stack variables lifetimes so that any lifetimes within filters
+    // are reported as pinned.
+    void gcMarkFilterVarsPinned();
+
+    // At instruction offset "instrOffset," the set of registers indicated by "regMask" is becoming live or dead,
+    // depending on whether "newState" is "GC_SLOT_DEAD" or "GC_SLOT_LIVE".  The subset of registers whose corresponding
+    // bits are set in "byRefMask" contain by-refs rather than regular GC pointers. "*pPtrRegs" is the set of
+    // registers currently known to contain pointers.  If "mode" is "ASSIGN_SLOTS", computes and records slot
+    // ids for the registers.  If "mode" is "DO_WORK", informs "gcInfoEncoder" about the state transition,
+    // using the previously assigned slot ids, and updates "*pPtrRegs" appropriately.
+    void gcInfoRecordGCRegStateChange(GcInfoEncoder* gcInfoEncoder,
+                                      MakeRegPtrMode mode,
+                                      unsigned       instrOffset,
+                                      regMaskSmall   regMask,
+                                      GcSlotState    newState,
+                                      regMaskSmall   byRefMask,
+                                      regMaskSmall*  pPtrRegs);
+
+    // regPtrDsc is also used to encode writes to the outgoing argument space (as if they were pushes)
+    void gcInfoRecordGCStackArgLive(GcInfoEncoder* gcInfoEncoder, MakeRegPtrMode mode, regPtrDsc* genStackPtr);
+
+    // Walk all the pushes between genStackPtrFirst (inclusive) and genStackPtrLast (exclusive)
+    // and mark them as going dead at instrOffset
+    void gcInfoRecordGCStackArgsDead(GcInfoEncoder* gcInfoEncoder,
+                                     unsigned       instrOffset,
+                                     regPtrDsc*     genStackPtrFirst,
+                                     regPtrDsc*     genStackPtrLast);
+
+#endif
+
+#if MEASURE_PTRTAB_SIZE
+    static size_t s_gcRegPtrDscSize;
+    static size_t s_gcTotalPtrTabSize;
+#endif
+
+    regPtrDsc* gcRegPtrAllocDsc();
+
+    /*****************************************************************************/
+
+    //-------------------------------------------------------------------------
+    //
+    //  If we're not generating fully interruptible code, we create a simple
+    //  linked list of call descriptors.
+    //
+
+    struct CallDsc
+    {
+        CallDsc* cdNext;
+        void*    cdBlock; // the code block of the call
+        unsigned cdOffs;  // the offset     of the call
+#ifndef JIT32_GCENCODER
+        unsigned short cdCallInstrSize; // the size       of the call instruction.
+#endif
+
+        unsigned short cdArgCnt;
+
+        union {
+            struct // used if cdArgCnt == 0
+            {
+                unsigned cdArgMask;      // ptr arg bitfield
+                unsigned cdByrefArgMask; // byref qualifier for cdArgMask
+            } u1;
+
+            unsigned* cdArgTable; // used if cdArgCnt != 0
+        };
+
+        regMaskSmall cdGCrefRegs;
+        regMaskSmall cdByrefRegs;
+    };
+
+    CallDsc* gcCallDescList;
+    CallDsc* gcCallDescLast;
+
+    //-------------------------------------------------------------------------
+
+    void gcCountForHeader(UNALIGNED unsigned int* untrackedCount, UNALIGNED unsigned int* varPtrTableSize);
+
+#ifdef JIT32_GCENCODER
+    size_t gcMakeRegPtrTable(BYTE* dest, int mask, const InfoHdr& header, unsigned codeSize, size_t* pArgTabOffset);
+#else
+    RegSlotMap*   m_regSlotMap;
+    StackSlotMap* m_stackSlotMap;
+    // This method has two modes.  In the "assign slots" mode, it figures out what registers and stack
+    // locations are used to contain GC references, and whether those locations contain byrefs or pinning
+    // references, building up mappings from tuples of <reg/offset X byref/pinning> to the corresponding
+    // slot id (in the two member fields declared above).  In the "do work" mode, we use these slot ids to
+    // actually declare live ranges to the encoder.
+    void gcMakeRegPtrTable(GcInfoEncoder* gcInfoEncoder, unsigned codeSize, unsigned prologSize, MakeRegPtrMode mode);
+#endif
+
+#ifdef JIT32_GCENCODER
+    size_t gcPtrTableSize(const InfoHdr& header, unsigned codeSize, size_t* pArgTabOffset);
+    BYTE* gcPtrTableSave(BYTE* destPtr, const InfoHdr& header, unsigned codeSize, size_t* pArgTabOffset);
+#endif
+    void gcRegPtrSetInit();
+    /*****************************************************************************/
+
+    // This enumeration yields the result of the analysis below, whether a store
+    // requires a write barrier:
+    enum WriteBarrierForm
+    {
+        WBF_NoBarrier,                     // No barrier is required
+        WBF_BarrierUnknown,                // A barrier is required, no information on checked/unchecked.
+        WBF_BarrierChecked,                // A checked barrier is required.
+        WBF_BarrierUnchecked,              // An unchecked barrier is required.
+        WBF_NoBarrier_CheckNotHeapInDebug, // We believe that no barrier is required because the
+                                           // target is not in the heap -- but in debug build use a
+                                           // barrier call that verifies this property.  (Because the
+                                           // target not being in the heap relies on a convention that
+                                           // might accidentally be violated in the future.)
+    };
+
+    WriteBarrierForm gcIsWriteBarrierCandidate(GenTreePtr tgt, GenTreePtr assignVal);
+    bool gcIsWriteBarrierAsgNode(GenTreePtr op);
+
+    // Returns a WriteBarrierForm decision based on the form of "tgtAddr", which is assumed to be the
+    // argument of a GT_IND LHS.
+    WriteBarrierForm gcWriteBarrierFormFromTargetAddress(GenTreePtr tgtAddr);
+
+    //-------------------------------------------------------------------------
+    //
+    //  These record the info about the procedure in the info-block
+    //
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef JIT32_GCENCODER
+private:
+    BYTE* gcEpilogTable;
+
+    unsigned gcEpilogPrevOffset;
+
+    size_t gcInfoBlockHdrSave(BYTE*    dest,
+                              int      mask,
+                              unsigned methodSize,
+                              unsigned prologSize,
+                              unsigned epilogSize,
+                              InfoHdr* header,
+                              int*     s_cached);
+
+public:
+    static void gcInitEncoderLookupTable();
+
+private:
+    static size_t gcRecordEpilog(void* pCallBackData, unsigned offset);
+#else // JIT32_GCENCODER
+    void gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSize, unsigned prologSize);
+
+#ifdef DEBUG
+    void gcDumpVarPtrDsc(varPtrDsc* desc);
+#endif // DEBUG
+
+#endif // JIT32_GCENCODER
+
+#if DUMP_GC_TABLES
+
+    void gcFindPtrsInFrame(const void* infoBlock, const void* codeBlock, unsigned offs);
+
+#ifdef JIT32_GCENCODER
+    unsigned gcInfoBlockHdrDump(const BYTE* table,
+                                InfoHdr*    header,      /* OUT */
+                                unsigned*   methodSize); /* OUT */
+
+    unsigned gcDumpPtrTable(const BYTE* table, const InfoHdr& header, unsigned methodSize);
+
+#endif // JIT32_GCENCODER
+#endif // DUMP_GC_TABLES
+
+#ifndef LEGACY_BACKEND
+    // This method updates the appropriate reg masks when a variable is moved.
+public:
+    void gcUpdateForRegVarMove(regMaskTP srcMask, regMaskTP dstMask, LclVarDsc* varDsc);
+#endif // !LEGACY_BACKEND
+};
+
+inline unsigned char encodeUnsigned(BYTE* dest, unsigned value)
+{
+    unsigned char size = 1;
+    unsigned      tmp  = value;
+    while (tmp > 0x7F)
+    {
+        tmp >>= 7;
+        assert(size < 6); // Invariant.
+        size++;
+    }
+    if (dest)
+    {
+        // write the bytes starting at the end of dest in LSB to MSB order
+        BYTE* p    = dest + size;
+        BYTE  cont = 0; // The last byte has no continuation flag
+        while (value > 0x7F)
+        {
+            *--p = cont | (value & 0x7f);
+            value >>= 7;
+            cont = 0x80; // Non last bytes have a continuation flag
+        }
+        *--p = cont | (BYTE)value; // Now write the first byte
+        assert(p == dest);
+    }
+    return size;
+}
+
+inline unsigned char encodeUDelta(BYTE* dest, unsigned value, unsigned lastValue)
+{
+    assert(value >= lastValue);
+    return encodeUnsigned(dest, value - lastValue);
+}
+
+inline unsigned char encodeSigned(BYTE* dest, int val)
+{
+    unsigned char size  = 1;
+    unsigned      value = val;
+    BYTE          neg   = 0;
+    if (val < 0)
+    {
+        value = -val;
+        neg   = 0x40;
+    }
+    unsigned tmp = value;
+    while (tmp > 0x3F)
+    {
+        tmp >>= 7;
+        assert(size < 16); // Definitely sufficient for unsigned.  Fits in an unsigned char, certainly.
+        size++;
+    }
+    if (dest)
+    {
+        // write the bytes starting at the end of dest in LSB to MSB order
+        BYTE* p    = dest + size;
+        BYTE  cont = 0; // The last byte has no continuation flag
+        while (value > 0x3F)
+        {
+            *--p = cont | (value & 0x7f);
+            value >>= 7;
+            cont = 0x80; // Non last bytes have a continuation flag
+        }
+        *--p = neg | cont | (BYTE)value; // Now write the first byte
+        assert(p == dest);
+    }
+    return size;
+}
+
+#endif // _JITGCINFO_H_
diff --git a/src/jit/jitpch.cpp b/src/jit/jitpch.cpp
new file mode 100644
index 0000000000..74300aefc3
--- /dev/null
+++ b/src/jit/jitpch.cpp
@@ -0,0 +1,6 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+/*****************************************************************************/
+
+#include "jitpch.h"
diff --git a/src/jit/jitpch.h b/src/jit/jitpch.h
new file mode 100644
index 0000000000..2e69e79208
--- /dev/null
+++ b/src/jit/jitpch.h
@@ -0,0 +1,36 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include <stdint.h>
+#include <windows.h>
+#include <wchar.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+#include <float.h>
+#include <share.h>
+#include <cstdlib>
+#include <intrin.h>
+
+#if COR_JIT_EE_VERSION <= 460
+#include "corjithost.h"
+#include "jithost.h"
+#endif
+#include "jitconfig.h"
+#include "jit.h"
+#include "iallocator.h"
+#include "hashbv.h"
+#include "compiler.h"
+#include "dataflow.h"
+#include "block.h"
+#include "jiteh.h"
+#include "rationalize.h"
+#include "jitstd.h"
+#include "ssaconfig.h"
+#include "blockset.h"
+#include "bitvec.h"
+#include "inline.h"
+#include "objectalloc.h"
diff --git a/src/jit/jitstd.h b/src/jit/jitstd.h
new file mode 100644
index 0000000000..6b428679f0
--- /dev/null
+++ b/src/jit/jitstd.h
@@ -0,0 +1,10 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "allocator.h"
+#include "type_traits.h"
+#include "pair.h"
+#include "utility.h"
+#include "unordered_map.h"
+#include "unordered_set.h"
diff --git a/src/jit/jitstd/.gitmirror b/src/jit/jitstd/.gitmirror
new file mode 100644
index 0000000000..f507630f94
--- /dev/null
+++ b/src/jit/jitstd/.gitmirror
@@ -0,0 +1 @@
+Only contents of this folder, excluding subfolders, will be mirrored by the Git-TFS Mirror. 
+\ No newline at end of file
diff --git a/src/jit/jitstd/algorithm.h b/src/jit/jitstd/algorithm.h
new file mode 100644
index 0000000000..7b6447e1a5
--- /dev/null
+++ b/src/jit/jitstd/algorithm.h
@@ -0,0 +1,49 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+
+
+#pragma once
+
+namespace jitstd
+{
+
+template <typename InputIterator, typename CompareValue>
+InputIterator find(InputIterator first, InputIterator last,
+                   const CompareValue& value)
+{
+    for (; first != last; ++first)
+    {
+        if (*first == value)
+        {
+            return first;
+        }
+    }
+    return last;
+}
+
+template <typename InputIterator, typename Pred>
+InputIterator find_if(InputIterator first, InputIterator last, const Pred& pred)
+{
+    for (; first != last; ++first)
+    {
+        if (pred(*first))
+        {
+            return first;
+        }
+    }
+    return last;
+}
+
+template<typename InputIterator, typename Function>
+Function for_each(InputIterator first, InputIterator last, Function func)
+{
+    for (; first != last; ++first)
+    {
+        func(*first);
+    }
+    return func;
+}
+
+}
diff --git a/src/jit/jitstd/allocator.h b/src/jit/jitstd/allocator.h
new file mode 100644
index 0000000000..2bd33daa98
--- /dev/null
+++ b/src/jit/jitstd/allocator.h
@@ -0,0 +1,211 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// ==++==
+//
+
+//
+
+//
+// ==--==
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                            allocator<T>                                   XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#pragma once
+
+#include "iallocator.h"
+#include "new.h"
+
+namespace jitstd
+{
+
+template <typename T>
+class allocator;
+
+template <>
+class allocator<void>
+{
+public:
+    typedef size_t size_type;
+    typedef ptrdiff_t difference_type;
+    typedef void* pointer;
+    typedef const void* const_pointer;
+    typedef void value_type;
+
+    template <typename U>
+    struct rebind
+    {
+        typedef allocator<U> allocator;
+    };
+
+private:
+    allocator();
+
+public:
+    inline allocator(IAllocator* pAlloc);
+
+    template <typename U>
+    inline allocator(const allocator<U>& alloc);
+
+    inline allocator(const allocator& alloc);
+
+    template <typename U>
+    inline allocator& operator=(const allocator<U>& alloc);
+
+private:
+    IAllocator* m_pAlloc;
+    template <typename U>
+    friend class allocator;
+};
+
+allocator<void>::allocator(IAllocator* pAlloc)
+    : m_pAlloc(pAlloc)
+{
+}
+
+allocator<void>::allocator(const allocator& alloc)
+    : m_pAlloc(alloc.m_pAlloc)
+{
+}
+
+template <typename U>
+allocator<void>::allocator(const allocator<U>& alloc)
+    : m_pAlloc(alloc.m_pAlloc)
+{
+}
+
+template <typename U>
+allocator<void>& allocator<void>::operator=(const allocator<U>& alloc)
+{
+    m_pAlloc = alloc.m_pAlloc;
+    return *this;
+}
+
+template <typename T>
+class allocator
+{
+public:
+    typedef size_t size_type;
+    typedef ptrdiff_t difference_type;
+    typedef T* pointer;
+    typedef T& reference;
+    typedef const T* const_pointer;
+    typedef const T& const_reference;
+    typedef T value_type;
+
+private:
+    allocator();
+public:
+    allocator(IAllocator* pAlloc);
+
+    template <typename U>
+    allocator(const allocator<U>& alloc);
+
+    allocator(const allocator& alloc);
+
+    template <typename U>
+    allocator& operator=(const allocator<U>& alloc);
+
+    pointer address(reference val);
+    const_pointer address(const_reference val) const;
+    pointer allocate(size_type count, allocator<void>::const_pointer hint = nullptr);
+    void construct(pointer ptr, const_reference val);
+    void deallocate(pointer ptr, size_type size);
+    void destroy(pointer ptr);
+    size_type max_size() const;  
+    template <typename U>
+    struct rebind
+    {
+        typedef allocator<U> allocator;
+    };
+
+private:
+    IAllocator* m_pAlloc;
+    template <typename U>
+    friend class allocator;
+};
+
+} // end of namespace jitstd
+
+
+namespace jitstd
+{
+
+template <typename T>
+allocator<T>::allocator(IAllocator* pAlloc)
+    : m_pAlloc(pAlloc)
+{
+}
+
+template <typename T>
+template <typename U>
+allocator<T>::allocator(const allocator<U>& alloc)
+    : m_pAlloc(alloc.m_pAlloc)
+{
+}
+
+template <typename T>
+allocator<T>::allocator(const allocator<T>& alloc)
+    : m_pAlloc(alloc.m_pAlloc)
+{
+}
+
+template <typename T>
+template <typename U>
+allocator<T>& allocator<T>::operator=(const allocator<U>& alloc)
+{
+    m_pAlloc = alloc.m_pAlloc;
+    return *this;
+}
+
+template <typename T>
+typename allocator<T>::pointer allocator<T>::address(reference val)
+{
+    return &val;
+}
+
+template <typename T>
+typename allocator<T>::const_pointer allocator<T>::address(const_reference val) const
+{
+    return &val;
+}
+
+template <typename T>
+T* allocator<T>::allocate(size_type count, allocator<void>::const_pointer hint)
+{
+    return (pointer) m_pAlloc->Alloc(sizeof(value_type) * count);
+}
+
+template <typename T>
+void allocator<T>::construct(pointer ptr, const_reference val)
+{
+    new (ptr, placement_t()) value_type(val);
+}
+
+template <typename T>
+void allocator<T>::deallocate(pointer ptr, size_type size)
+{
+    // m_pAlloc->Free(ptr);
+}
+
+template <typename T>
+void allocator<T>::destroy(pointer ptr)
+{
+    ptr->~T();
+}
+
+template <typename T>
+typename allocator<T>::size_type allocator<T>::max_size() const
+{
+    return (size_type) -1;
+}
+
+} // end of namespace jitstd
diff --git a/src/jit/jitstd/functional.h b/src/jit/jitstd/functional.h
new file mode 100644
index 0000000000..31456a870e
--- /dev/null
+++ b/src/jit/jitstd/functional.h
@@ -0,0 +1,62 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+
+
+#pragma once
+
+namespace jitstd
+{
+
+template <typename T>
+void swap(T& a, T& b)
+{
+    T t(a);
+    a = b;
+    b = t;
+}
+
+template <typename Arg, typename Result>
+struct unary_function
+{
+    typedef Arg argument_type;
+    typedef Result result_type;
+};
+
+template <typename Arg1, typename Arg2, typename Result>
+struct binary_function
+{
+    typedef Arg1 first_argument_type;
+    typedef Arg2 second_argument_type;
+    typedef Result result_type;
+};
+
+template <typename T>
+struct greater : binary_function<T, T, bool>
+{
+    bool operator()(const T& lhs, const T& rhs) const
+    {
+        return lhs > rhs;
+    }
+};
+
+template <typename T>
+struct equal_to : binary_function<T, T, bool>
+{
+    bool operator()(const T& lhs, const T& rhs) const
+    {
+        return lhs == rhs;
+    }
+};
+
+template <typename T>
+struct identity : unary_function<T, T>
+{
+    const T& operator()(const T& op) const
+    {
+        return op;
+    }
+};
+
+} // end of namespace jitstd.
diff --git a/src/jit/jitstd/hash.h b/src/jit/jitstd/hash.h
new file mode 100644
index 0000000000..18db74fc9f
--- /dev/null
+++ b/src/jit/jitstd/hash.h
@@ -0,0 +1,103 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+
+
+#pragma once
+
+#include "type_traits.h"
+#include <stdio.h>
+
+namespace jitstd
+{
+template<typename Type>
+class hash
+{
+public:
+    size_t operator()(const Type& val) const
+    {
+        div_t qrem = ::div((int)(size_t) val, 127773);
+        qrem.rem = 16807 * qrem.rem - 2836 * qrem.quot;
+        if (qrem.rem < 0)
+        {
+            qrem.rem += 2147483647;
+        }
+        return ((size_t) qrem.rem);
+    }
+};
+
+template<>
+class hash<int>
+{
+public:
+    size_t operator()(const int& val) const
+    {
+        return val;
+    }
+};
+
+template<>
+class hash<unsigned __int64>
+{
+private:
+    typedef unsigned __int64 Type;
+
+public:
+    size_t operator()(const Type& val) const
+    {
+        return (hash<int>()((int)(val & 0xffffffffUL)) ^ hash<int>()((int)(val >> 32)));
+    }
+};
+
+template<>
+class hash<__int64>
+{
+private:
+    typedef __int64 Type;
+
+public:
+    size_t operator()(const Type& val) const
+    {
+        return (hash<unsigned __int64>()((unsigned __int64) val));
+    }
+};
+
+template<typename Type>
+class hash<Type*>
+{
+private:
+    typedef typename conditional<sizeof (Type*) <= sizeof (int), int, __int64>::type TInteger;
+public:
+    size_t operator()(const Type* val) const
+    {
+        return (hash<TInteger>()((TInteger) val));
+    }
+};
+
+template<>
+class hash<float>
+{
+private:
+    typedef float Type;
+public:
+    size_t operator()(const Type& val) const
+    {
+        unsigned long bits = *(unsigned long*) &val;
+        return (hash<unsigned long>()(bits == 0x80000000 ? 0 : bits));
+    }
+};
+
+template<>
+class hash<double>
+{
+public:
+    typedef double Type;
+    size_t operator()(const Type& val) const
+    {
+        unsigned __int64 bits = *(unsigned __int64*)&val;
+        return (hash<unsigned __int64>()((bits & (((unsigned __int64) -1) >> 1)) == 0 ? 0 : bits));
+    }
+};
+
+}
diff --git a/src/jit/jitstd/hashtable.h b/src/jit/jitstd/hashtable.h
new file mode 100644
index 0000000000..05b033a746
--- /dev/null
+++ b/src/jit/jitstd/hashtable.h
@@ -0,0 +1,822 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// ==++==
+//
+
+//
+
+//
+// ==--==
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                          hashtable<K,V,H,P,A,KO>                          XX
+XX                                                                           XX
+XX  Implemented using a vector of list iterators begin and end whose range   XX
+XX  is a single bucket. A chain of buckets is maintained in a linked list    XX
+XX  (doubly) for holding the key-value pairs.                                XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#pragma once
+
+#include "hash.h"
+#include "functional.h"
+#include "allocator.h"
+#include "vector.h"
+#include "list.h"
+#include "pair.h"
+
+namespace jitstd
+{
+
+static const float kflDefaultLoadFactor = 3.0f;
+
+template <typename Key,
+          typename Value = Key,
+          typename Hash = jitstd::hash<Key>,
+          typename Pred = jitstd::equal_to<Key>,
+          typename Alloc = jitstd::allocator<Value>,
+          typename KeyOf = jitstd::identity<Value>>
+class hashtable
+{
+public:
+    typedef Key key_type;
+    typedef Value value_type;
+    typedef Hash hasher;
+    typedef Pred key_equal;
+    typedef Alloc allocator_type;
+    typedef typename allocator_type::pointer pointer;
+    typedef typename allocator_type::const_pointer const_pointer;
+    typedef typename allocator_type::reference reference;
+    typedef typename allocator_type::const_reference const_reference;
+    typedef size_t size_type;
+    typedef ptrdiff_t difference_type;
+    typedef typename list<Value, Alloc>::iterator iterator;
+    typedef typename list<Value, Alloc>::reverse_iterator reverse_iterator;
+    typedef typename list<Value, Alloc>::const_iterator const_iterator;
+    typedef typename list<Value, Alloc>::iterator local_iterator;
+
+protected:
+    hashtable();
+
+    typedef pair<iterator, iterator> BucketEntry;
+    typedef vector<BucketEntry, typename Alloc::template rebind<BucketEntry>::allocator> Buckets;
+    typedef list<Value, typename Alloc::template rebind<Value>::allocator> Elements;
+
+protected:
+    explicit hashtable(size_type,
+        const allocator_type& a,
+        const KeyOf& keyOf = KeyOf());
+
+    hashtable(size_type n,
+        const hasher& hf,
+        const key_equal& eq,
+        const allocator_type& a,
+        const KeyOf& keyOf = KeyOf());
+
+    template<typename InputIterator> 
+    hashtable(
+        InputIterator f, InputIterator l,
+        size_type n,
+        const hasher& hf,
+        const key_equal& eq, 
+        const allocator_type& a,
+        const KeyOf& keyOf = KeyOf());
+
+    explicit hashtable(const allocator_type& a, const KeyOf& keyOf = KeyOf());
+
+    hashtable(const hashtable& other);
+
+    ~hashtable();
+
+public:
+    hashtable& operator=(const hashtable& other);
+
+    allocator_type get_allocator() const;
+
+    bool empty() const;
+
+    size_type size() const;
+    size_type max_size() const;
+
+    iterator begin();
+    iterator end();
+
+    // Even though we have an unordered set and there is no concept of forward and
+    // reverse, rbegin will just return the first element inserted. This is not in STL.
+    reverse_iterator rbegin();
+    reverse_iterator rend();
+
+    const_iterator begin() const;
+    const_iterator end() const;
+    const_iterator cbegin() const;
+    const_iterator cend() const;
+    local_iterator begin(size_type size);
+    local_iterator end(size_type size);
+
+    pair<iterator, bool> insert(const value_type& value);
+    iterator insert(const_iterator, const value_type& value);
+    template<typename InputIterator>
+    void insert(InputIterator first, InputIterator last);
+
+    iterator erase(iterator position);
+    size_type erase(const key_type& key);
+    iterator erase(iterator first, iterator last);
+
+    void clear();
+    void swap(hashtable& table);
+
+    hasher hash_function() const;
+    key_equal key_eq() const;
+
+    const_iterator find(const key_type& key) const;
+    iterator find(const key_type& key);
+
+    size_type count(const key_type& key) const;
+
+    size_type bucket_count() const;
+    size_type max_bucket_count() const;
+
+    size_type bucket_size(size_type size) const;
+    size_type bucket(const key_type& key) const;
+
+    float load_factor() const;
+    float max_load_factor() const;
+    void max_load_factor(float);
+
+    void rehash(size_type);
+
+protected:
+    template <typename Compare>
+    iterator find(const key_type&, const Compare& comp);
+
+    // helpers
+    bool check_load();
+    void copy_helper(const hashtable& other);
+    size_type hash_helper(const key_type& value, size_type buckets) const;
+    pair<iterator, bool> insert_helper(const value_type& value, Buckets& buckets, Elements& elements, bool fRehashing);
+    iterator erase_helper(const_iterator position);
+    void dump_helper();
+    void debug_check();
+
+private:
+
+    // member objects
+    Hash m_hasher;
+    Alloc m_allocator;
+    Pred m_pred;
+
+    Buckets m_buckets;
+    Elements m_elements;
+    size_type m_nSize;
+    KeyOf m_keyOf;
+
+    // metadata
+    float m_flMaxLoadFactor;
+};
+
+} // end of namespace jitstd
+
+
+namespace jitstd
+{
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+void hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::dump_helper()
+{
+    for (size_type i = 0; i < m_buckets.size(); ++i)
+    {
+        printf("\n");
+        printf("--------------=BEGIN=--------------\n");
+        printf("Load factor = %f\n", load_factor());
+        printf("-----------------------------------\n");
+        printf("Bucket number = %d %p %p\n", i, *((ptrdiff_t*)&(m_buckets[i].first)), *((ptrdiff_t*)&(m_buckets[i].second)));
+        printf("-----------------------------------\n");
+        for (typename Elements::iterator value = (m_buckets[i]).first; value != (m_buckets[i]).second; ++value)
+        {
+            printf("%d, ", *((ptrdiff_t*)&value), *value);
+        }
+        printf("-----------------------------------\n");
+    }
+}
+
+// We can't leave this permanently enabled -- it makes algorithms cubic, and causes tests to time out.
+// Enable when/if you have reason to believe there's a problem in hashtable.
+#define JITSTD_DO_HASHTABLE_DEBUGCHECK 0
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+void hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::debug_check()
+{
+#if JITSTD_DO_HASHTABLE_DEBUGCHECK
+    for (iterator iter = m_elements.begin(); iter != m_elements.end(); ++iter)
+    {
+        size_type nHash = hash_helper(m_keyOf(*iter), m_buckets.size());
+        BucketEntry& entry = m_buckets[nHash];
+        iterator iter2 = entry.first;
+        bool present = false;
+        while (iter2 != entry.second)
+        {
+            if (iter2 == iter)
+            {
+                present = true;
+            }
+            iter2++;
+        }
+        if (!present)
+        {
+            present = false;
+        }
+        assert(present);
+    }
+#endif
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+template <typename Compare>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::iterator
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::find(const key_type& key, const Compare& comp)
+{
+    if (empty())
+    {
+        return end();
+    }
+    size_type nHash = hash_helper(key, m_buckets.size());
+    BucketEntry& entry = m_buckets[nHash];
+    for (iterator i = entry.first; i != entry.second; ++i)
+    {
+        if (comp(m_keyOf(*i), key))
+        {
+            return i;
+        }
+    }
+    return end();
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+bool hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::check_load()
+{
+    float flLoadFactor = load_factor();
+    if (flLoadFactor > m_flMaxLoadFactor)
+    {
+        rehash(m_buckets.size());
+        return true;
+    }
+    return false;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::iterator
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::erase_helper(const_iterator position)
+{
+    const Key& key = m_keyOf(*position);
+    size_type nHash = hash_helper(key, m_buckets.size());
+    BucketEntry& entry = m_buckets[nHash];
+    iterator eraseNext = end();
+    for (iterator first = entry.first; first != entry.second; ++first)
+    {
+        if (m_pred(m_keyOf(*first), key))       
+        {
+            if (first == entry.first)
+            {
+                if (first != m_elements.begin())
+                {
+                    iterator update = first;
+                    update--;
+                    size_type nUpdateHash = hash_helper(m_keyOf(*update), m_buckets.size());
+                    if (nUpdateHash != nHash)
+                    {
+                        BucketEntry& updateEntry = m_buckets[nUpdateHash];
+                        if (updateEntry.second == first)
+                        {
+                            updateEntry.second = first;
+                            updateEntry.second++;
+                        }
+                        if (updateEntry.first == first)
+                        {
+                            updateEntry.first = first;
+                            updateEntry.first++;
+                        }
+                    }
+                }
+                entry.first = m_elements.erase(first);
+                eraseNext = entry.first;
+            }
+            else
+            {
+                eraseNext = m_elements.erase(first);
+            }
+
+            --m_nSize;
+#ifdef DEBUG
+            debug_check();
+#endif
+            return eraseNext;
+        }
+    }
+    return end();
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+pair<typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::iterator, bool>
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::insert_helper(
+    const Value& value, Buckets& buckets, Elements& elements, bool fRehashing)
+{
+    const Key& key = m_keyOf(value);
+    size_t nHash = hash_helper(key, buckets.size());
+    BucketEntry& entry = buckets[nHash];
+
+    iterator ret;
+    if (entry.first == entry.second)
+    {
+        entry.first = elements.insert(elements.begin(), value);
+        entry.second = entry.first;
+        entry.second++; // end iterator is one past always.
+        ret = entry.first;
+    }
+    else
+    {
+        for (iterator first = entry.first; first != entry.second; ++first)
+        {
+            if (m_pred(m_keyOf(*first), key))
+            {
+                return pair<iterator, bool>(first, false);
+            }
+        }
+        iterator firstNext = entry.first;
+        firstNext++;
+        ret = elements.insert(firstNext, value);
+        if (entry.second == entry.first)
+        {
+            entry.second = firstNext;
+        }
+    }
+    bool fRehashed = false;
+    if (!fRehashing)
+    {
+        m_nSize += 1;
+        fRehashed = check_load();
+    }
+
+#ifdef DEBUG
+    debug_check();
+#endif
+
+    return pair<iterator, bool>(fRehashed ? find(key, m_pred) : ret, true);
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::size_type
+    hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::hash_helper(
+    const key_type& key, size_type buckets) const
+{
+    return m_hasher(key) % buckets;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+void hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::rehash(size_type n)
+{
+    size_type nCurBuckets = m_buckets.size();
+    float flLoadFactor = load_factor();
+    if (nCurBuckets >= n && flLoadFactor <= m_flMaxLoadFactor)
+    {
+        return;
+    }
+
+    size_type nBuckets = max(nCurBuckets, 1);
+    if (flLoadFactor > m_flMaxLoadFactor)
+    {
+        nBuckets *= 2;
+    }
+
+    if (nBuckets < n)
+    {
+        nBuckets = n;
+    }
+
+    Buckets buckets(m_allocator);
+    Elements elements(m_allocator);
+
+    buckets.resize(nBuckets, BucketEntry(m_elements.end(), m_elements.end())); // both equal means empty.
+    for (typename Elements::iterator iter = m_elements.begin(); iter != m_elements.end(); ++iter)
+    {
+        (void) insert_helper(*iter, buckets, elements, true);
+    }
+    m_buckets.swap(buckets);
+    m_elements.swap(elements);
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::hashtable(
+    size_type n,
+    allocator_type const& allocator,
+    const KeyOf& keyOf)
+    : m_allocator(allocator)
+    , m_buckets(Alloc::template rebind<hashtable::BucketEntry>::allocator(allocator))
+    , m_elements(allocator)
+    , m_flMaxLoadFactor(kflDefaultLoadFactor)
+    , m_nSize(0)
+    , m_keyOf(keyOf)
+{
+    rehash(n);
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::hashtable(
+    size_type n,
+    hasher const& hf,
+    key_equal const& eq,
+    allocator_type const& allocator,
+    const KeyOf& keyOf)
+    : m_hasher(hf)
+    , m_pred(eq)
+    , m_allocator(allocator)
+    , m_buckets(Alloc::template rebind<BucketEntry>::allocator(allocator))
+    , m_elements(allocator)
+    , m_flMaxLoadFactor(kflDefaultLoadFactor)
+    , m_nSize(0)
+    , m_keyOf(keyOf)
+{
+    rehash(n);
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+template<typename InputIterator>
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::hashtable(
+    InputIterator f, InputIterator l,
+    size_type n,
+    const hasher& hf,
+    const key_equal& eq, 
+    const allocator_type& allocator,
+    const KeyOf& keyOf)
+    : m_hasher(hf)
+    , m_pred(eq)
+    , m_allocator(allocator)
+    , m_buckets(Alloc::template rebind<BucketEntry>::allocator(allocator))
+    , m_elements(allocator)
+    , m_flMaxLoadFactor(kflDefaultLoadFactor)
+    , m_nSize(0)
+    , m_keyOf(keyOf)
+{
+    rehash(n);
+    insert(this->first, this->last);
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::hashtable(const allocator_type& allocator, const KeyOf& keyOf)
+    : m_allocator(allocator)
+    , m_buckets(Alloc::template rebind<BucketEntry>::allocator(allocator))
+    , m_elements(allocator)
+    , m_flMaxLoadFactor(kflDefaultLoadFactor)
+    , m_nSize(0)
+    , m_keyOf(keyOf)
+{
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+void hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::copy_helper(const hashtable& other)
+{
+    m_buckets.clear();
+    m_elements.clear();
+    m_nSize = 0;
+
+    rehash(other.m_buckets.size());
+    for (const_iterator i = other.m_elements.begin(); i != other.m_elements.end(); ++i)
+    {
+        insert_helper(*i, m_buckets, m_elements, false);
+    }
+    m_nSize = other.m_nSize;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::hashtable(const hashtable& other)
+    : m_hasher(other.m_hasher)
+    , m_pred(other.m_pred)
+    , m_allocator(other.m_allocator)
+    , m_flMaxLoadFactor(other.m_flMaxLoadFactor)
+    , m_keyOf(other.m_keyOf)
+    , m_elements(other.m_allocator)
+    , m_buckets(other.m_allocator)
+{
+    copy_helper(other);
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::~hashtable()
+{
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>&
+    hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::operator=(hashtable const& other)
+{
+    m_hasher = other.m_hasher;
+    m_pred = other.m_pred;
+    m_allocator = other.m_allocator;
+    m_flMaxLoadFactor = other.m_flMaxLoadFactor;
+    m_keyOf = other.m_keyOf;
+    copy_helper(other);
+    return *this;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::allocator_type
+    hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::get_allocator() const
+{
+    return m_allocator;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+bool hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::empty() const
+{
+    return m_nSize == 0;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::size_type
+    hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::size() const
+{
+    return m_nSize;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::size_type
+    hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::max_size() const
+{
+    return ((size_type)(-1)) >> 1;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::iterator
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::begin()
+{
+    return m_elements.begin();
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::reverse_iterator
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::rbegin()
+{
+    return m_elements.rbegin();
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::iterator
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::end()
+{
+    return m_elements.end();
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::reverse_iterator
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::rend()
+{
+    return m_elements.rend();
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::const_iterator
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::begin() const
+{
+    return m_elements.begin();
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::const_iterator
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::end() const
+{
+    return m_elements.end();
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::const_iterator
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::cbegin() const
+{
+    return m_elements.begin();
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::const_iterator
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::cend() const
+{
+    return m_elements.end();
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+jitstd::pair<typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::iterator, bool>
+    hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::insert(const Value& val)
+{
+    // Allocate some space first.
+    rehash(2);
+    return insert_helper(val, m_buckets, m_elements, false);
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::iterator
+    hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::insert(const_iterator position, const Value& value)
+{
+    // Allocate some space first.
+    rehash(2);
+
+    // We will not use the hint here, we can consider doing this later.
+    return insert_helper(this->val, m_buckets, m_elements, false).first;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+template<typename InputIterator>
+void hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::insert(InputIterator first, InputIterator last)
+{
+    // Allocate some space first.
+    rehash(2);
+    while (first != last)
+    {
+        (void) insert_helper(*first, m_buckets, m_elements, false);
+        ++first;
+    }
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::iterator
+    hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::erase(iterator position)
+{
+    return erase_helper(position);
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::size_type
+    hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::erase(const key_type& key)
+{
+    iterator iter = erase_helper(find(key));
+    return iter == end() ? 0 : 1;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::iterator
+    hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::erase(iterator first, iterator last)
+{
+    iterator iter = end();
+    while (first != last)
+    {
+        iter = erase_helper(find(m_keyOf(*first)));
+        ++first;
+    }
+    return iter;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+void hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::clear()
+{
+    m_buckets.clear();
+    m_elements.clear();
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+void hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::swap(hashtable& set)
+{
+    std::swap(set.m_buckets, m_buckets);
+    std::swap(set.m_elements, m_elements);
+    std::swap(set.m_flLoadFactor, this->m_flLoadFactor);
+    std::swap(set.m_flMaxLoadFactor, this->m_flMaxLoadFactor);
+    std::swap(set.m_keyOf, this->m_keyOf);
+}
+
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::hasher
+    hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::hash_function() const
+{
+    return m_hasher;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::key_equal
+    hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::key_eq() const
+{
+    return m_pred;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::const_iterator
+    hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::find(const key_type& key) const
+{
+    if (empty())
+    {
+        return end();
+    }
+    size_type nHash = hash_helper(key, m_buckets.size());
+    BucketEntry& entry = m_buckets[nHash];
+    for (iterator i = entry.first; i != entry.second; ++i)
+    {
+        if (m_pred(m_keyOf(*i), key))
+        {
+            return i;
+        }
+    }
+    return end();
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::iterator
+    hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::find(const key_type& key)
+{
+    if (empty())
+    {
+        return end();
+    }
+    size_type nHash = hash_helper(key, m_buckets.size());
+    BucketEntry& entry = m_buckets[nHash];
+    for (iterator i = entry.first; i != entry.second; ++i)
+    {
+        if (m_pred(m_keyOf(*i), key))
+        {
+            return i;
+        }
+    }
+    return end();
+}
+
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::size_type
+    hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::count(const key_type& key) const
+{
+    size_type nCount = 0;
+    size_type nHash = hash_helper(key, m_buckets.size());
+    BucketEntry& bucket = m_buckets[nHash];
+    for (iterator i = bucket.first; i != bucket.second; ++i)
+    {
+        if (m_pred(m_keyOf(*i), key))
+        {
+            ++nCount;
+        }
+    }
+    return nCount;  
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::size_type
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::bucket_count() const
+{   
+    return m_buckets.size();
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::size_type
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::max_bucket_count() const
+{
+    return m_buckets.size();
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::size_type
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::bucket_size(size_type size) const
+{
+    rehash(size);
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::size_type
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::bucket(const key_type& key) const
+{
+    return hash_helper(key, m_buckets.size());
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::local_iterator
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::begin(size_type size)
+{
+    return m_buckets[size].first;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+typename hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::local_iterator
+hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::end(size_type size)
+{
+    return m_buckets[size].second;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+float hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::load_factor() const
+{
+    return m_nSize ? (((float) m_nSize) /  m_buckets.size()) : 0;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+float hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::max_load_factor() const
+{
+    return m_flMaxLoadFactor;
+}
+
+template <typename Key, typename Value, typename Hash, typename Pred, typename Alloc, typename KeyOf>
+void hashtable<Key, Value, Hash, Pred, Alloc, KeyOf>::max_load_factor(float flLoadFactor)
+{
+    m_flMaxLoadFactor = flLoadFactor;
+    rehash(m_buckets.size());
+}
+
+} // end of namespace jitstd.
diff --git a/src/jit/jitstd/iterator.h b/src/jit/jitstd/iterator.h
new file mode 100644
index 0000000000..975755c59c
--- /dev/null
+++ b/src/jit/jitstd/iterator.h
@@ -0,0 +1,144 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+
+
+#pragma once
+
+namespace jitstd
+{
+
+template <class Category, class T, class Distance = ptrdiff_t, class Pointer = T*, class Reference = T&>
+struct iterator
+{
+    typedef T value_type;
+    typedef Distance difference_type;
+    typedef Pointer pointer;
+    typedef Reference reference;
+    typedef Category iterator_category;
+};
+
+struct input_iterator_tag
+{
+};
+
+struct forward_iterator_tag : public input_iterator_tag
+{
+};
+
+struct bidirectional_iterator_tag : public forward_iterator_tag
+{
+};
+
+struct random_access_iterator_tag : public bidirectional_iterator_tag
+{
+};
+
+struct int_not_an_iterator_tag
+{
+};
+
+template <typename Iterator>
+struct iterator_traits
+{
+    typedef typename Iterator::difference_type difference_type;
+    typedef typename Iterator::value_type value_type;
+    typedef typename Iterator::pointer pointer;
+    typedef typename Iterator::reference reference;
+    typedef typename Iterator::iterator_category iterator_category;
+};
+
+template <typename T>
+struct iterator_traits<T*>
+{
+    typedef ptrdiff_t difference_type;
+    typedef T value_type;
+    typedef T* pointer;
+    typedef T& reference;
+    typedef random_access_iterator_tag iterator_category;
+};
+
+template <typename T>
+struct iterator_traits<const T*>
+{
+    typedef ptrdiff_t difference_type;
+    typedef T value_type;
+    typedef const T* pointer;
+    typedef const T& reference;
+    typedef random_access_iterator_tag iterator_category;
+};
+
+template<>
+struct iterator_traits<bool>
+{
+    typedef int_not_an_iterator_tag iterator_category;
+};
+
+template<>
+struct iterator_traits<char>
+{
+    typedef int_not_an_iterator_tag iterator_category;
+};
+
+template<>
+struct iterator_traits<signed char>
+{
+    typedef int_not_an_iterator_tag iterator_category;
+};
+
+template<>
+struct iterator_traits<unsigned char>
+{
+    typedef int_not_an_iterator_tag iterator_category;
+};
+
+template<>
+struct iterator_traits<short>
+{
+    typedef int_not_an_iterator_tag iterator_category;
+};
+
+template<>
+struct iterator_traits<unsigned short>
+{
+    typedef int_not_an_iterator_tag iterator_category;
+};
+
+template<>
+struct iterator_traits<int>
+{
+    typedef int_not_an_iterator_tag iterator_category;
+};
+
+template<>
+struct iterator_traits<unsigned int>
+{
+    typedef int_not_an_iterator_tag iterator_category;
+};
+
+template<>
+struct iterator_traits<__int64>
+{
+    typedef int_not_an_iterator_tag iterator_category;
+};
+
+template<>
+struct iterator_traits<unsigned __int64>
+{
+    typedef int_not_an_iterator_tag iterator_category;
+};
+
+namespace util
+{
+template<class Iterator>
+inline
+typename iterator_traits<Iterator>::iterator_category
+    iterator_category(const Iterator&)
+{
+    typename iterator_traits<Iterator>::iterator_category categ;
+    return categ;
+}
+} // end of namespace util.
+
+} // end of namespace jitstd.
diff --git a/src/jit/jitstd/jitstd.cpp b/src/jit/jitstd/jitstd.cpp
new file mode 100644
index 0000000000..48d80e2245
--- /dev/null
+++ b/src/jit/jitstd/jitstd.cpp
@@ -0,0 +1,34 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// jitstd.cpp : Defines the entry point for the console application.
+//
+
+
+#include "stdafx.h"
+
+#include <iostream>
+#include <windows.h>
+#include <string>
+
+#include "iallocator.h"
+
+#include "algorithm.h"
+#include "functional.h"
+#include "hash.h"
+
+#include "unordered_map.h"
+#include "unordered_set.h"
+#include "hashtable.h"
+#include "list.h"
+#include "vector.h"
+#include "pair.h"
+
+int _tmain(int argc, _TCHAR* argv[])
+{
+    // return test1();
+    return 0;
+}
+
+
diff --git a/src/jit/jitstd/jitstd.sln b/src/jit/jitstd/jitstd.sln
new file mode 100644
index 0000000000..6aa099ce3c
--- /dev/null
+++ b/src/jit/jitstd/jitstd.sln
@@ -0,0 +1,20 @@
+
+Microsoft Visual Studio Solution File, Format Version 11.00
+# Visual Studio 2010
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jitstd", "jitstd.vcxproj", "{A4576E91-78F0-4FD1-8323-8FA3BACE0581}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|Win32 = Debug|Win32
+		Release|Win32 = Release|Win32
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{A4576E91-78F0-4FD1-8323-8FA3BACE0581}.Debug|Win32.ActiveCfg = Debug|Win32
+		{A4576E91-78F0-4FD1-8323-8FA3BACE0581}.Debug|Win32.Build.0 = Debug|Win32
+		{A4576E91-78F0-4FD1-8323-8FA3BACE0581}.Release|Win32.ActiveCfg = Release|Win32
+		{A4576E91-78F0-4FD1-8323-8FA3BACE0581}.Release|Win32.Build.0 = Release|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/src/jit/jitstd/jitstd.vcxproj b/src/jit/jitstd/jitstd.vcxproj
new file mode 100644
index 0000000000..bed1b3a12d
--- /dev/null
+++ b/src/jit/jitstd/jitstd.vcxproj
@@ -0,0 +1,103 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{A4576E91-78F0-4FD1-8323-8FA3BACE0581}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>jitstd</RootNamespace>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>Use</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <None Include="ReadMe.txt" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="algorithm.h" />
+    <ClInclude Include="allocator.h" />
+    <ClInclude Include="functional.h" />
+    <ClInclude Include="hash.h" />
+    <ClInclude Include="hashtable.h" />
+    <ClInclude Include="iterator.h" />
+    <ClInclude Include="list.h" />
+    <ClInclude Include="pair.h" />
+    <ClInclude Include="stdafx.h" />
+    <ClInclude Include="targetver.h" />
+    <ClInclude Include="type_traits.h" />
+    <ClInclude Include="unordered_map.h" />
+    <ClInclude Include="unordered_set.h" />
+    <ClInclude Include="vector.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="jitstd.cpp" />
+    <ClCompile Include="stdafx.cpp">
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Create</PrecompiledHeader>
+      <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Create</PrecompiledHeader>
+    </ClCompile>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
+\ No newline at end of file
diff --git a/src/jit/jitstd/list.h b/src/jit/jitstd/list.h
new file mode 100644
index 0000000000..85545f741e
--- /dev/null
+++ b/src/jit/jitstd/list.h
@@ -0,0 +1,1243 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// ==++==
+//
+
+//
+
+//
+// ==--==
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                                list<T>                                    XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#pragma once
+
+#include "iterator.h"
+#include "functional.h"
+
+namespace jitstd
+{
+
+template <typename T, typename Allocator = jitstd::allocator<T>>
+class list
+{
+public:
+    typedef Allocator allocator_type;
+    typedef T* pointer;
+    typedef T& reference;
+    typedef const T* const_pointer;
+    typedef const T& const_reference;
+
+    typedef size_t size_type;
+    typedef ptrdiff_t difference_type;
+    typedef T value_type;
+
+    // Forward declaration
+private:
+    struct Node;
+
+public:
+    // nested classes
+    class iterator;
+    class const_iterator : public jitstd::iterator<bidirectional_iterator_tag, T>
+    {
+    private:
+        const_iterator(Node* ptr);
+        const_iterator();
+    public:
+        const_iterator(const const_iterator& it);
+        const_iterator(const typename list<T, Allocator>::iterator& it);
+
+        const_iterator& operator++();
+        const_iterator& operator++(int);
+        const_iterator& operator--();
+        const_iterator& operator--(int);
+        const_iterator operator+(difference_type n);
+        const_iterator operator-(difference_type n);
+        size_type operator-(const const_iterator& that);
+        bool operator==(const const_iterator& it) const;
+        bool operator!=(const const_iterator& it) const;
+        const T& operator*() const;
+        const T* operator&() const;
+        const T* operator->() const;
+        operator const T*() const;
+
+    private:
+        friend class list<T, Allocator>;
+        Node* m_pNode;
+    };
+    
+    class iterator : public jitstd::iterator<bidirectional_iterator_tag, T>
+    {
+        iterator(Node* ptr);
+    public:
+        iterator();
+        iterator(const iterator& it);
+
+        iterator& operator++();
+        iterator& operator++(int);
+        iterator& operator--();
+        iterator& operator--(int);
+        iterator operator+(difference_type n);
+        iterator operator-(difference_type n);
+        size_type operator-(const iterator& that);
+        bool operator==(const iterator& it);
+        bool operator!=(const iterator& it);
+        T& operator*();
+        T* operator&();
+        T* operator->();
+        operator T*();
+
+    private:
+        friend class list<T, Allocator>;
+        friend class list<T, Allocator>::const_iterator;
+        Node* m_pNode;
+    };
+
+    class reverse_iterator;
+    class const_reverse_iterator : public jitstd::iterator<bidirectional_iterator_tag, T>
+    {
+    private:
+        const_reverse_iterator(Node* ptr);
+    public:
+        const_reverse_iterator();
+        const_reverse_iterator(const const_reverse_iterator& it);
+        const_reverse_iterator(const reverse_iterator& it);
+
+        const_reverse_iterator& operator++();
+        const_reverse_iterator& operator++(int);
+        const_reverse_iterator& operator--();
+        const_reverse_iterator& operator--(int);
+        const_reverse_iterator operator+(difference_type n);
+        const_reverse_iterator operator-(difference_type n);
+        size_type operator-(const const_reverse_iterator& that);
+        bool operator==(const const_reverse_iterator& it) const;
+        bool operator!=(const const_reverse_iterator& it) const;
+        const T& operator*() const;
+        const T* operator&() const;
+        const T* operator->() const;
+        operator const T*() const;
+
+    private:
+        friend class list<T, Allocator>;
+        Node* m_pNode;
+    };
+    
+    class reverse_iterator : public jitstd::iterator<bidirectional_iterator_tag, T>
+    {
+    private:
+        reverse_iterator(Node* ptr);
+    public:
+        reverse_iterator();
+        reverse_iterator(const reverse_iterator& it);
+
+        reverse_iterator& operator++();
+        reverse_iterator& operator++(int);
+        reverse_iterator& operator--();
+        reverse_iterator& operator--(int);
+        reverse_iterator operator+(difference_type n);
+        reverse_iterator operator-(difference_type n);
+        size_type operator-(const reverse_iterator& that);
+        bool operator==(const reverse_iterator& it);
+        bool operator!=(const reverse_iterator& it);
+        T& operator*();
+        T* operator&();
+        T* operator->();
+        operator T*();
+        friend class list<T, Allocator>::const_reverse_iterator;
+
+    private:
+        friend class list<T, Allocator>;
+        Node* m_pNode;
+    };
+    
+    explicit list(const Allocator&);
+    list(size_type n, const T& value, const Allocator&);
+
+    template <typename InputIterator>
+    list(InputIterator first, InputIterator last, const Allocator&);
+
+    list(const list<T, Allocator>&);
+
+    ~list();
+
+    template <class InputIterator>
+    void assign(InputIterator first, InputIterator last);
+
+    void assign(size_type size, const T& val);
+
+    reference back();
+    const_reference back() const;
+
+    iterator begin();
+    const_iterator begin() const;
+
+    void clear();
+    bool empty() const;
+
+    iterator end();
+    const_iterator end() const;
+
+    iterator erase(iterator position);
+    iterator erase(iterator first, iterator last);
+
+    reference front();
+    const_reference front() const;
+
+    allocator_type get_allocator() const;
+
+    iterator insert(iterator position, const T& x);
+    template <class... Args>
+    iterator emplace(iterator position, Args&&... args);
+    void insert(iterator position, size_type n, const T& x);
+    template <class InputIterator>
+    void insert(iterator position, InputIterator first, InputIterator last);
+
+    size_type max_size() const;
+
+    void merge(list<T, Allocator>& lst);
+    template <class Compare>
+    void merge (list<T, Allocator>& lst, Compare comp);
+
+    list<T, Allocator>& operator=(const list<T, Allocator>& lst);
+
+    void pop_back();
+    void pop_front();
+
+    void push_back(const T& val);
+    template <class... Args>
+    void emplace_back(Args&&... args);
+    void push_front (const T& val);
+    template <class... Args>
+    void emplace_front(Args&&... args);
+
+    reverse_iterator rbegin();
+    const_reverse_iterator rbegin() const;
+
+    void remove(const T& val);
+    template <class Predicate>
+    void remove_if(Predicate pred);
+
+    reverse_iterator rend();
+    const_reverse_iterator rend() const;
+
+    void resize(size_type sz, const T& c);
+    void reverse();
+
+    size_type size() const;
+    void sort();
+
+    template <class Compare>
+    void sort(Compare comp);
+
+    void splice(iterator position, list& lst);
+    void splice(iterator position, list& lst, iterator i);
+    void splice(iterator position, list& x, iterator first, iterator last);
+
+    void swap(list<T,Allocator>& lst);
+
+    void unique();
+
+    template <class BinaryPredicate>
+    void unique(const BinaryPredicate& binary_pred);
+
+private:
+    struct Node
+    {
+        T m_value;
+        Node* m_pNext;
+        Node* m_pPrev;
+
+        template <class... Args>
+        Node(Args&&... args)
+            : m_value(jitstd::forward<Args>(args)...)
+        {
+        }
+    };
+
+    void destroy_helper();
+
+    void construct_helper(size_type n, const T& value, int_not_an_iterator_tag);
+    template <typename InputIterator>
+    void construct_helper(InputIterator first, InputIterator last, forward_iterator_tag);
+
+    void assign_helper(size_type n, const T& value, int_not_an_iterator_tag);
+    template <typename InputIterator>
+    void assign_helper(InputIterator first, InputIterator last, forward_iterator_tag);
+
+    void insert_helper(iterator position, size_type n, const T& value, int_not_an_iterator_tag);
+    template <typename InputIterator>
+    void insert_helper(iterator position, InputIterator first, InputIterator last, forward_iterator_tag);
+    
+    void insert_new_node_helper(Node* pInsert, Node* pNewNode);
+
+    Node* m_pHead;
+    Node* m_pTail;
+    size_type m_nSize;
+    typename Allocator::template rebind<T>::allocator m_allocator;
+    typename Allocator::template rebind<Node>::allocator m_nodeAllocator;
+};
+
+}
+
+namespace jitstd
+{
+template <typename T, typename Allocator>
+list<T, Allocator>::list(const Allocator& allocator)
+    : m_pHead(nullptr)
+    , m_pTail(nullptr)
+    , m_nSize(0)
+    , m_allocator(allocator)
+    , m_nodeAllocator(allocator)
+{
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::list(size_type n, const T& value, const Allocator& allocator)
+    : m_pHead(NULL)
+    , m_pTail(NULL)
+    , m_nSize(0)
+    , m_allocator(allocator)
+    , m_nodeAllocator(allocator)
+{
+    construct_helper(n, value, int_not_an_iterator_tag());
+}
+
+template <typename T, typename Allocator>
+template <typename InputIterator>
+list<T, Allocator>::list(InputIterator first, InputIterator last, const Allocator& allocator)
+    : m_pHead(NULL)
+    , m_pTail(NULL)
+    , m_nSize(0)
+    , m_allocator(allocator)
+    , m_nodeAllocator(allocator)
+{
+    construct_helper(first, last, iterator_traits<InputIterator>::iterator_category());
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::list(const list<T, Allocator>& other)
+    : m_pHead(NULL)
+    , m_pTail(NULL)
+    , m_nSize(0)
+    , m_allocator(other.m_allocator)
+    , m_nodeAllocator(other.m_nodeAllocator)
+{
+    construct_helper(other.begin(), other.end(), forward_iterator_tag());
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::~list()
+{
+    destroy_helper();
+}
+
+template <typename T, typename Allocator>
+template <class InputIterator>
+void list<T, Allocator>::assign(InputIterator first, InputIterator last)
+{
+    assign_helper(first, last, iterator_traits<InputIterator>::iterator_category());
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::assign(size_type size, const T& val)
+{
+    assign_helper(size, val, int_not_an_iterator_tag());
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::reference list<T, Allocator>::back()
+{
+    return m_pTail->m_value;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::const_reference list<T, Allocator>::back() const
+{
+    return m_pTail->m_value;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::iterator list<T, Allocator>::begin()
+{
+    return iterator(m_pHead);
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::const_iterator list<T, Allocator>::begin() const
+{
+    return const_iterator(m_pHead);
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::clear()
+{
+    destroy_helper();
+}
+
+template <typename T, typename Allocator>
+bool list<T, Allocator>::empty() const
+{
+    return (m_nSize == 0);
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::iterator list<T, Allocator>::end()
+{
+    return iterator(nullptr);
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::const_iterator list<T, Allocator>::end() const
+{
+    return const_iterator(NULL);
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::iterator list<T, Allocator>::erase(iterator position)
+{
+    // Nothing to erase.
+    assert(position.m_pNode != nullptr);
+
+    --m_nSize;
+
+    Node* pNode = position.m_pNode;
+    Node* pPrev = pNode->m_pPrev;
+    Node* pNext = pNode->m_pNext;
+
+    if (pPrev != nullptr)
+    {
+        pPrev->m_pNext = pNext;
+    }
+    else
+    {
+        m_pHead = pNext;
+    }
+
+    if (pNext != nullptr)
+    {
+        pNext->m_pPrev = pPrev;
+    }
+    else
+    {
+        m_pTail = pPrev;
+    }
+
+    m_nodeAllocator.deallocate(pNode, 1);
+    return iterator(pNext);
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::iterator list<T, Allocator>::erase(iterator first, iterator last)
+{
+    for (; first != last; first++)
+    {
+        erase(first);
+    }
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::reference list<T, Allocator>::front()
+{
+    return m_pHead->m_value;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::const_reference list<T, Allocator>::front() const
+{
+    return m_pHead->m_value;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::allocator_type list<T, Allocator>::get_allocator() const
+{
+    return m_allocator;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::iterator
+    list<T, Allocator>::insert(iterator position, const T& val)
+{
+    Node* pNewNode = new (m_nodeAllocator.allocate(1), placement_t()) Node(val);
+    insert_new_node_helper(position.m_pNode, pNewNode);
+    return iterator(pNewNode);
+}
+
+template <typename T, typename Allocator>
+template <typename... Args>
+typename list<T, Allocator>::iterator 
+    list<T, Allocator>::emplace(iterator position, Args&&... args)
+{
+    Node* pNewNode = new (m_nodeAllocator.allocate(1), placement_t()) Node(jitstd::forward<Args>(args)...);
+    insert_new_node_helper(position.m_pNode, pNewNode);
+    return iterator(pNewNode);
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::insert(iterator position, size_type n, const T& val)
+{
+    insert_helper(position, n, val, int_not_an_iterator_tag());
+}
+
+template <typename T, typename Allocator>
+template <class InputIterator>
+void list<T, Allocator>::insert(iterator position, InputIterator first, InputIterator last)
+{
+    insert_helper(position, first, last, iterator_traits<InputIterator>::iterator_category());
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::size_type list<T, Allocator>::max_size() const
+{
+    return (((size_type)-1) >> 1) / sizeof(Node);
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::merge(list<T, Allocator>& lst)
+{
+    merge(lst, jitstd::greater<T>());
+}
+
+template <typename T, typename Allocator>
+template <class Compare>
+void list<T, Allocator>::merge(list<T, Allocator>& lst, Compare comp)
+{
+    int size = lst.m_nSize;
+
+    iterator i = begin();
+    iterator j = lst.begin();
+    while (i != end() && j != lst.end())
+    {
+        if (comp(*i, *j))
+        {
+            i = insert(i, *j);
+            ++j;
+            --size;
+        }
+        else
+        {
+            ++i;
+        }
+    }
+
+    if (j != lst.end())
+    {
+        if (m_pTail != NULL)
+        {
+            m_pTail->m_pNext = j.m_pNode;
+        }
+        else
+        {
+            m_pHead = j.m_pNode;
+        }
+        m_pTail = lst.m_pTail;
+        m_nSize += size;
+    }
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>& list<T, Allocator>::operator=(const list<T, Allocator>& lst)
+{
+    destroy_helper();
+    construct_helper(lst.begin(), lst.end(), forward_iterator_tag());
+    return *this;
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::pop_back()
+{
+    assert(m_nSize != 0);
+
+    --m_nSize;
+
+    Node* pDelete = m_pTail;
+    if (m_pHead != m_pTail)
+    {
+        m_pTail = m_pTail->m_pPrev;
+        m_pTail->m_pNext = nullptr;
+    }
+    else
+    {
+        m_pHead = nullptr;
+        m_pTail = nullptr;
+    }
+    pDelete->~Node();
+    m_nodeAllocator.deallocate(pDelete, 1);
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::pop_front()
+{
+    assert(m_nSize != 0);
+
+    --m_nSize;
+
+    Node* pDelete = m_pHead;
+    if (m_pHead != m_pTail)
+    {
+        m_pHead = m_pHead->m_pNext;
+        m_pHead->m_pPrev = NULL;
+    }
+    else
+    {
+        m_pHead = NULL;
+        m_pTail = NULL;
+    }
+    pDelete->~Node();
+    m_nodeAllocator.deallocate(pDelete, 1);
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::push_back(const T& val)
+{
+    insert(end(), val);
+}
+
+template <typename T, typename Allocator>
+template <typename... Args>
+void list<T, Allocator>::emplace_back(Args&&... args)
+{
+    emplace(end(), jitstd::forward<Args>(args)...);
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::push_front(const T& val)
+{
+    insert(begin(), val);
+}
+
+template <typename T, typename Allocator>
+template <typename... Args>
+void list<T, Allocator>::emplace_front(Args&&... args)
+{
+    emplace(begin(), jitstd::forward<Args>(args)...);
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::reverse_iterator
+    list<T, Allocator>::rbegin()
+{
+    return reverse_iterator(m_pTail);
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::const_reverse_iterator
+    list<T, Allocator>::rbegin() const
+{
+    return const_reverse_iterator(m_pTail);
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::remove(const T& val)
+{
+    for (iterator i = begin(); i != end(); ++i)
+    {
+        if (*i == val)
+        {
+            i = erase(i);
+        }
+    }
+}
+
+template <typename T, typename Allocator>
+template <class Predicate>
+void list<T, Allocator>::remove_if(Predicate pred)
+{
+    for (iterator i = begin(); i != end(); ++i)
+    {
+        if (pred(*i))
+        {
+            i = erase(i);
+        }
+    }
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::reverse_iterator list<T, Allocator>::rend()
+{
+    return reverse_iterator(nullptr);
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::const_reverse_iterator list<T, Allocator>::rend() const
+{
+    return reverse_iterator(NULL);
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::resize(size_type sz, const T& c)
+{
+    while (m_nSize < sz)
+    {
+        insert(end(), c);
+    }
+
+    while (m_nSize > sz)
+    {
+        erase(end());
+    }
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::reverse()
+{
+    for (Node* p = m_pHead; p != NULL; p = p->m_pNext)
+    {
+        jitstd::swap(p->m_pPrev, p->m_pNext);
+    }
+    jitstd::swap(m_pHead, m_pTail);
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::size_type list<T, Allocator>::size() const
+{
+    return m_nSize;
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::sort()
+{
+    assert(false && !"template method not implemented.");
+}
+
+template <typename T, typename Allocator>
+template <class Compare>
+void list<T, Allocator>::sort(Compare comp)
+{
+    assert(false && !"template method not implemented.");
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::splice(iterator position, list& lst)
+{
+    if (lst.m_nSize == 0)
+    {
+        return;
+    }
+    if (m_nSize == 0)
+    {
+        std::swap(lst.m_pHead, m_pHead);
+        std::swap(lst.m_pTail, m_pTail);
+        std::swap(lst.m_nSize, m_nSize);
+    }
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::splice(iterator position, list& lst, iterator i)
+{
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::splice(iterator position, list& x, iterator first, iterator last)
+{
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::swap(list<T, Allocator>& lst)
+{
+    jitstd::swap(lst.m_pHead, m_pHead);
+    jitstd::swap(lst.m_pTail, m_pTail);
+    jitstd::swap(lst.m_nSize, m_nSize);
+    jitstd::swap(lst.m_allocator, m_allocator);
+    jitstd::swap(lst.m_nodeAllocator, m_nodeAllocator);
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::unique()
+{
+    assert(false && !"template method not implemented.");
+}
+
+template <typename T, typename Allocator>
+template <class BinaryPredicate>
+void list<T, Allocator>::unique(const BinaryPredicate& binary_pred)
+{
+    assert(false && !"template method not implemented.");
+}
+
+// private
+template <typename T, typename Allocator>
+void list<T, Allocator>::destroy_helper()
+{
+    while (m_pTail != nullptr)
+    {
+        Node* prev = m_pTail->m_pPrev;
+        m_pTail->~Node();
+        m_nodeAllocator.deallocate(m_pTail, 1);
+        m_pTail = prev;
+    }
+    m_pHead = nullptr;
+    m_nSize = 0;
+}
+
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::construct_helper(size_type n, const T& value, int_not_an_iterator_tag)
+{
+    for (int i = 0; i < n; ++i)
+    {
+        insert(end(), value);
+    }
+    assert(m_nSize == n);
+}
+
+template <typename T, typename Allocator>
+template <typename InputIterator>
+void list<T, Allocator>::construct_helper(InputIterator first, InputIterator last, forward_iterator_tag)
+{
+    while (first != last)
+    {
+        insert(end(), *first);
+        ++first;
+    }
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::assign_helper(size_type n, const T& value, int_not_an_iterator_tag)
+{
+    destroy_helper();
+    for (int i = 0; i < n; ++i)
+    {
+        insert(end(), value);
+    }
+}
+
+template <typename T, typename Allocator>
+template <typename InputIterator>
+void list<T, Allocator>::assign_helper(InputIterator first, InputIterator last, forward_iterator_tag)
+{
+    destroy_helper();
+    while (first != last)
+    {
+        insert(end(), *first);
+        ++first;
+    }
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::insert_helper(iterator position, size_type n, const T& value, int_not_an_iterator_tag)
+{
+    for (int i = 0; i < n; ++i)
+    {
+        insert(position, value);
+    }
+}
+
+template <typename T, typename Allocator>
+template <typename InputIterator>
+void list<T, Allocator>::insert_helper(iterator position, InputIterator first, InputIterator last, forward_iterator_tag)
+{
+    while (first != last)
+    {
+        insert(position, *first);
+        ++first;
+    }
+}
+
+template <typename T, typename Allocator>
+void list<T, Allocator>::insert_new_node_helper(Node* pInsert, Node* pNewNode)
+{
+    ++m_nSize;
+
+    if (pInsert == nullptr)
+    {
+        pNewNode->m_pPrev = m_pTail;
+        pNewNode->m_pNext = nullptr;
+        if (m_pHead == nullptr)
+        {
+            m_pHead = pNewNode;
+        }
+        else
+        {
+            m_pTail->m_pNext = pNewNode;
+        }
+        m_pTail = pNewNode;
+    }
+    else
+    {
+        pNewNode->m_pPrev = pInsert->m_pPrev;
+        pNewNode->m_pNext = pInsert;
+        if (pInsert->m_pPrev == nullptr)
+        {
+            m_pHead = pNewNode;
+        }
+        else
+        {
+            pInsert->m_pPrev->m_pNext = pNewNode;
+        }
+        pInsert->m_pPrev = pNewNode;
+    }
+}
+
+} // end of namespace jitstd.
+
+
+
+
+
+// Implementation of list iterators
+
+namespace jitstd
+{
+
+// iterator
+template <typename T, typename Allocator>
+list<T, Allocator>::iterator::iterator()
+    : m_pNode(NULL)
+{
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::iterator::iterator(Node* pNode)
+    : m_pNode(pNode)
+{
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::iterator::iterator(const iterator& it)
+    : m_pNode(it.m_pNode)
+{
+}
+
+        
+template <typename T, typename Allocator>
+typename list<T, Allocator>::iterator& list<T, Allocator>::iterator::operator++()
+{
+    m_pNode = m_pNode->m_pNext;
+    return *this;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::iterator& list<T, Allocator>::iterator::operator++(int)
+{
+    m_pNode = m_pNode->m_pNext;
+    return *this;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::iterator& list<T, Allocator>::iterator::operator--()
+{
+    m_pNode = m_pNode->m_pPrev;
+    return *this;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::iterator& list<T, Allocator>::iterator::operator--(int)
+{
+    m_pNode = m_pNode->m_pPrev;
+    return *this;
+}
+
+template <typename T, typename Allocator>
+bool list<T, Allocator>::iterator::operator==(const iterator& it)
+{
+    return (m_pNode == it.m_pNode);
+}
+
+template <typename T, typename Allocator>
+bool list<T, Allocator>::iterator::operator!=(const iterator& it)
+{
+    return !operator==(it);
+}
+
+template <typename T, typename Allocator>
+T& list<T, Allocator>::iterator::operator*()
+{
+    return m_pNode->m_value;
+}
+
+template <typename T, typename Allocator>
+T* list<T, Allocator>::iterator::operator&()
+{
+    return &(m_pNode->m_value);
+}
+
+template <typename T, typename Allocator>
+T* list<T, Allocator>::iterator::operator->()
+{
+    return &(m_pNode->m_value);
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::iterator::operator T*()
+{
+    return &(m_pNode->m_value);
+}
+
+
+
+
+// const_iterator
+template <typename T, typename Allocator>
+list<T, Allocator>::const_iterator::const_iterator()
+    : m_pNode(NULL)
+{
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::const_iterator::const_iterator(Node* pNode)
+    : m_pNode(pNode)
+{
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::const_iterator::const_iterator(const const_iterator& it)
+    : m_pNode(it.m_pNode)
+{
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::const_iterator::const_iterator(const typename list<T, Allocator>::iterator& it)
+    : m_pNode(it.m_pNode)
+{
+}
+        
+template <typename T, typename Allocator>
+typename list<T, Allocator>::const_iterator& list<T, Allocator>::const_iterator::operator++()
+{
+    m_pNode = m_pNode->m_pNext;
+    return *this;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::const_iterator& list<T, Allocator>::const_iterator::operator++(int)
+{
+    m_pNode = m_pNode->m_pNext;
+    return *this;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::const_iterator& list<T, Allocator>::const_iterator::operator--()
+{
+    m_pNode = m_pNode->m_pPrev;
+    return *this;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::const_iterator& list<T, Allocator>::const_iterator::operator--(int)
+{
+    m_pNode = m_pNode->m_pPrev;
+    return *this;
+}
+
+template <typename T, typename Allocator>
+bool list<T, Allocator>::const_iterator::operator==(const const_iterator& it) const
+{
+    return (m_pNode == it.m_pNode);
+}
+
+template <typename T, typename Allocator>
+bool list<T, Allocator>::const_iterator::operator!=(const const_iterator& it) const
+{
+    return !operator==(it);
+}
+
+template <typename T, typename Allocator>
+const T& list<T, Allocator>::const_iterator::operator*() const
+{
+    return m_pNode->m_value;
+}
+
+template <typename T, typename Allocator>
+const T* list<T, Allocator>::const_iterator::operator&() const
+{
+    return &(m_pNode->m_value);
+}
+
+template <typename T, typename Allocator>
+const T* list<T, Allocator>::const_iterator::operator->() const
+{
+    return &(m_pNode->m_value);
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::const_iterator::operator const T*() const
+{
+    return &(m_pNode->m_value);
+}
+
+
+// reverse_iterator
+template <typename T, typename Allocator>
+list<T, Allocator>::reverse_iterator::reverse_iterator()
+    : m_pNode(NULL)
+{
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::reverse_iterator::reverse_iterator(Node* pNode)
+    : m_pNode(pNode)
+{
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::reverse_iterator::reverse_iterator(const reverse_iterator& it)
+    : m_pNode(it.m_pNode)
+{
+}
+
+        
+template <typename T, typename Allocator>
+typename list<T, Allocator>::reverse_iterator& list<T, Allocator>::reverse_iterator::operator++()
+{
+    m_pNode = m_pNode->m_pPrev;
+    return *this;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::reverse_iterator& list<T, Allocator>::reverse_iterator::operator++(int)
+{
+    m_pNode = m_pNode->m_pPrev;
+    return *this;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::reverse_iterator& list<T, Allocator>::reverse_iterator::operator--()
+{
+    m_pNode = m_pNode->m_pNext;
+    return *this;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::reverse_iterator& list<T, Allocator>::reverse_iterator::operator--(int)
+{
+    m_pNode = m_pNode->m_pNext;
+    return *this;
+}
+
+template <typename T, typename Allocator>
+bool list<T, Allocator>::reverse_iterator::operator==(const reverse_iterator& it)
+{
+    return (m_pNode == it.m_pNode);
+}
+
+template <typename T, typename Allocator>
+bool list<T, Allocator>::reverse_iterator::operator!=(const reverse_iterator& it)
+{
+    return !operator==(it);
+}
+
+template <typename T, typename Allocator>
+T& list<T, Allocator>::reverse_iterator::operator*()
+{
+    return m_pNode->m_value;
+}
+
+template <typename T, typename Allocator>
+T* list<T, Allocator>::reverse_iterator::operator&()
+{
+    return &(m_pNode->m_value);
+}
+
+template <typename T, typename Allocator>
+T* list<T, Allocator>::reverse_iterator::operator->()
+{
+    return &(m_pNode->m_value);
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::reverse_iterator::operator T*()
+{
+    return &(m_pNode->m_value);
+}
+
+// const_reverse_iterator
+template <typename T, typename Allocator>
+list<T, Allocator>::const_reverse_iterator::const_reverse_iterator()
+    : m_pNode(NULL)
+{
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::const_reverse_iterator::const_reverse_iterator(Node* pNode)
+    : m_pNode(pNode)
+{
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::const_reverse_iterator::const_reverse_iterator(const const_reverse_iterator& it)
+    : m_pNode(it.m_pNode)
+{
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::const_reverse_iterator::const_reverse_iterator(const reverse_iterator& it)
+    : m_pNode(it.m_pNode)
+{
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::const_reverse_iterator& list<T, Allocator>::const_reverse_iterator::operator++()
+{
+    m_pNode = m_pNode->m_pPrev;
+    return *this;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::const_reverse_iterator& list<T, Allocator>::const_reverse_iterator::operator++(int)
+{
+    m_pNode = m_pNode->m_pPrev;
+    return *this;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::const_reverse_iterator& list<T, Allocator>::const_reverse_iterator::operator--()
+{
+    m_pNode = m_pNode->m_pNext;
+    return *this;
+}
+
+template <typename T, typename Allocator>
+typename list<T, Allocator>::const_reverse_iterator& list<T, Allocator>::const_reverse_iterator::operator--(int)
+{
+    m_pNode = m_pNode->m_pNext;
+    return *this;
+}
+
+template <typename T, typename Allocator>
+bool list<T, Allocator>::const_reverse_iterator::operator==(const const_reverse_iterator& it) const
+{
+    return (m_pNode == it.m_pNode);
+}
+
+template <typename T, typename Allocator>
+bool list<T, Allocator>::const_reverse_iterator::operator!=(const const_reverse_iterator& it) const
+{
+    return !operator==(it);
+}
+
+template <typename T, typename Allocator>
+const T& list<T, Allocator>::const_reverse_iterator::operator*() const
+{
+    return m_pNode->m_value;
+}
+
+template <typename T, typename Allocator>
+const T* list<T, Allocator>::const_reverse_iterator::operator&() const
+{
+    return &(m_pNode->m_value);
+}
+
+template <typename T, typename Allocator>
+const T* list<T, Allocator>::const_reverse_iterator::operator->() const
+{
+    return &(m_pNode->m_value);
+}
+
+template <typename T, typename Allocator>
+list<T, Allocator>::const_reverse_iterator::operator const T*() const
+{
+    return &(m_pNode->m_value);
+}
+
+}
+
diff --git a/src/jit/jitstd/new.h b/src/jit/jitstd/new.h
new file mode 100644
index 0000000000..7054fbea0b
--- /dev/null
+++ b/src/jit/jitstd/new.h
@@ -0,0 +1,16 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+
+
+#pragma once
+
+namespace jitstd
+{
+
+struct placement_t
+{
+};
+
+}
diff --git a/src/jit/jitstd/pair.h b/src/jit/jitstd/pair.h
new file mode 100644
index 0000000000..f306000048
--- /dev/null
+++ b/src/jit/jitstd/pair.h
@@ -0,0 +1,57 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+
+
+#pragma once
+
+namespace jitstd
+{
+template <typename Type1, typename Type2>
+class pair
+{
+public:
+    Type1 first;
+    Type2 second;
+
+    pair(const Type1& fst, const Type2& sec)
+        : first(fst)
+        , second(sec)
+    {
+    }
+
+    template <typename AltType1, typename AltType2>
+    pair(const AltType1& fst, const AltType2& sec)
+        : first((Type1) fst)
+        , second((Type2) sec)
+    {
+    }
+
+    template <typename AltType1, typename AltType2>
+    pair(const pair<AltType1, AltType2>& that)
+        : first((Type1) that.first)
+        , second((Type2) that.second)
+    {
+    }
+
+    pair(const pair& that)
+        : first(that.first)
+        , second(that.second)
+    {
+    }
+
+    template <typename AltType1, typename AltType2>
+    const pair<Type1, Type2>& operator=(const pair<AltType1, AltType2>& pair)
+    {
+        first = pair.first;
+        second = pair.second;
+        return *this;
+    }
+
+    bool operator==(const pair<Type1, Type2>& other) const
+    {
+        return (other.first == first && other.second == second);
+    }
+};
+}
diff --git a/src/jit/jitstd/stdafx.cpp b/src/jit/jitstd/stdafx.cpp
new file mode 100644
index 0000000000..1012ef98b8
--- /dev/null
+++ b/src/jit/jitstd/stdafx.cpp
@@ -0,0 +1,14 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// stdafx.cpp : source file that includes just the standard includes
+// jitstd.pch will be the pre-compiled header
+// stdafx.obj will contain the pre-compiled type information
+
+
+
+#include "stdafx.h"
+
+// TODO: reference any additional headers you need in STDAFX.H
+// and not in this file
diff --git a/src/jit/jitstd/stdafx.h b/src/jit/jitstd/stdafx.h
new file mode 100644
index 0000000000..6d2519d3dc
--- /dev/null
+++ b/src/jit/jitstd/stdafx.h
@@ -0,0 +1,20 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// stdafx.h : include file for standard system include files,
+// or project specific include files that are used frequently, but
+// are changed infrequently
+//
+
+
+#pragma once
+
+#include "targetver.h"
+
+#include <stdio.h>
+#include <tchar.h>
+
+
+
+// TODO: reference additional headers your program requires here
diff --git a/src/jit/jitstd/targetver.h b/src/jit/jitstd/targetver.h
new file mode 100644
index 0000000000..260ee4a21c
--- /dev/null
+++ b/src/jit/jitstd/targetver.h
@@ -0,0 +1,14 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+
+
+#pragma once
+
+// Including SDKDDKVer.h defines the highest available Windows platform.
+
+// If you wish to build your application for a previous Windows platform, include WinSDKVer.h and
+// set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h.
+
+#include <sdkddkver.h>
diff --git a/src/jit/jitstd/type_traits.h b/src/jit/jitstd/type_traits.h
new file mode 100644
index 0000000000..1e853e8cca
--- /dev/null
+++ b/src/jit/jitstd/type_traits.h
@@ -0,0 +1,196 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+
+#pragma once
+
+namespace jitstd
+{
+template <typename T>
+struct remove_const
+{
+    typedef T type;
+};
+
+template <typename T>
+struct remove_const<const T>
+{
+    typedef T type;
+};
+
+template <typename T>
+struct remove_volatile
+{
+    typedef T type;
+};
+
+template <typename T>
+struct remove_volatile<volatile T>
+{
+    typedef T type;
+};
+
+template <typename T>
+struct remove_cv : remove_const<typename remove_volatile<T>::type>
+{
+};
+
+template <typename T>
+struct remove_reference
+{
+    typedef T type;
+};
+
+template <typename T>
+struct remove_reference<T&>
+{
+    typedef T type;
+};
+
+template <typename T>
+struct remove_reference<T&&>
+{
+    typedef T type;
+};
+
+template <typename T>
+struct is_lvalue_reference
+{
+    enum { value = false };
+};
+
+template <typename T>
+struct is_lvalue_reference<T&>
+{
+    enum { value = true };
+};
+
+template <typename T>
+struct is_unqualified_pointer
+{
+    enum { value = false };
+};
+
+template <typename T>
+struct is_unqualified_pointer<T*>
+{
+    enum { value = true };
+};
+
+template <typename T>
+struct is_pointer : is_unqualified_pointer<typename remove_cv<T>::type>
+{
+};
+
+template <typename T>
+struct is_integral
+{
+    enum { value = false };
+};
+
+template<>
+struct is_integral<bool>
+{
+    enum { value = true };
+};
+
+template<>
+struct is_integral<char>
+{
+    enum { value = true };
+};
+
+template<>
+struct is_integral<unsigned char>
+{
+    enum { value = true };
+};
+
+template<>
+struct is_integral<signed char>
+{
+    enum { value = true };
+};
+
+template<>
+struct is_integral<unsigned short>
+{
+    enum { value = true };
+};
+
+template<>
+struct is_integral<signed short>
+{
+    enum { value = true };
+};
+
+template<>
+struct is_integral<unsigned int>
+{
+    enum { value = true };
+};
+
+template<>
+struct is_integral<signed int>
+{
+    enum { value = true };
+};
+
+template<>
+struct is_integral<unsigned __int64>
+{
+    enum { value = true };
+};
+
+template<>
+struct is_integral<signed __int64>
+{
+    enum { value = true };
+};
+
+
+template<bool Pred, typename Type1, typename Type2>
+struct conditional
+{
+};
+
+template<typename Type1, typename Type2>
+struct conditional<true, Type1, Type2>
+{
+    typedef Type1 type;
+};
+
+template<typename Type1, typename Type2>
+struct conditional<false, Type1, Type2>
+{
+    typedef Type2 type;
+};
+
+template<typename Type1>
+struct make_unsigned
+{
+};
+
+template<>
+struct make_unsigned<int>
+{
+    typedef unsigned int type;
+};
+
+#ifndef PLATFORM_UNIX
+
+template<>
+struct make_unsigned<long>
+{
+    typedef unsigned long type;
+};
+#endif // PLATFORM_UNIX
+
+template<>
+struct make_unsigned<__int64>
+{
+    typedef unsigned __int64 type;
+};
+
+} // namespace jit_std
diff --git a/src/jit/jitstd/unordered_map.h b/src/jit/jitstd/unordered_map.h
new file mode 100644
index 0000000000..05e97f450c
--- /dev/null
+++ b/src/jit/jitstd/unordered_map.h
@@ -0,0 +1,179 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// ==++==
+//
+
+//
+
+//
+// ==--==
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                          unordered_map<K,V,H,P,A>                         XX
+XX  Derives from hashtable for most implementation. Inserted elements are    XX
+XX  value pairs and the hash key is provided by the helper method that       XX
+XX  extracts the key from the key value pair                                 XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#pragma once
+
+#include "hashtable.h"
+
+namespace jitstd
+{
+
+template <typename Key, typename Value>
+struct pair_key
+{
+    Key& operator()(const jitstd::pair<Key, Value>& pair) const
+    {
+        return pair.first;
+    }
+};
+
+template<typename Key,
+         typename Value,
+         typename Hash = jitstd::hash<Key>,
+         typename Pred = jitstd::equal_to<Key>,
+         typename Alloc = jitstd::allocator<jitstd::pair<const Key, Value> > >
+class unordered_map
+    : public hashtable<Key, pair<const Key, Value>, Hash, Pred, Alloc, pair_key<const Key, Value>>
+{
+public:
+
+    typedef Key key_type;
+    typedef Value mapped_type;
+    typedef jitstd::pair<const Key, Value> value_type;
+    typedef Hash hasher;
+    typedef Pred key_equal;
+    typedef Alloc allocator_type;
+    typedef typename allocator_type::pointer pointer;             
+    typedef typename allocator_type::const_pointer const_pointer;       
+    typedef typename allocator_type::reference reference;
+    typedef typename allocator_type::const_reference const_reference;
+    typedef size_t size_type;
+    typedef ptrdiff_t difference_type;
+
+    explicit unordered_map(size_type size, const hasher& hasher, const key_equal& pred, const allocator_type& allocator);
+    explicit unordered_map(size_type size, const allocator_type& allocator);
+    template<typename InputIterator> 
+    unordered_map(InputIterator, InputIterator, 
+                  size_type size, 
+                  const hasher& hasher,
+                  const key_equal& pred, 
+                  const allocator_type& allocator);
+
+    unordered_map(const unordered_map& map);
+    explicit unordered_map(const allocator_type& allocator);
+    unordered_map(const unordered_map& map, const allocator_type& allocator);
+    ~unordered_map();
+
+    unordered_map& operator=(unordered_map const&);
+    mapped_type& operator[](const Key& key);
+    mapped_type& operator[](key_type&& key);
+
+    typename unordered_map<Key, Value, Hash, Pred, Alloc>::iterator insert(const key_type& key, const mapped_type& value);
+
+private:
+    typedef hashtable<Key, pair<const Key, Value>, Hash, Pred, Alloc, pair_key<const Key, Value>> base_type;
+};
+
+}
+
+
+namespace jitstd
+{
+
+template<typename Key, typename Value, typename Hash, typename Pred, typename Alloc>
+unordered_map<Key, Value, Hash, Pred, Alloc>::unordered_map(size_type size, const hasher& hasher, const key_equal& pred, const allocator_type& allocator)
+    : base_type(size, hasher, pred, allocator)
+{
+}
+
+template<typename Key, typename Value, typename Hash, typename Pred, typename Alloc>
+unordered_map<Key, Value, Hash, Pred, Alloc>::unordered_map(size_type size, const allocator_type& allocator)
+    : base_type(size, allocator)
+{
+}
+
+template<typename Key, typename Value, typename Hash, typename Pred, typename Alloc>
+template<typename InputIterator> 
+unordered_map<Key, Value, Hash, Pred, Alloc>::unordered_map(InputIterator first, InputIterator last, 
+                size_type size, 
+                const hasher& hasher,
+                const key_equal& pred, 
+                const allocator_type& allocator)
+    : base_type(first, last, size, hasher, pred, allocator)
+{
+}
+
+template<typename Key, typename Value, typename Hash, typename Pred, typename Alloc>
+unordered_map<Key, Value, Hash, Pred, Alloc>::unordered_map(const unordered_map& map)
+    : base_type(map)
+{
+}
+
+template<typename Key, typename Value, typename Hash, typename Pred, typename Alloc>
+unordered_map<Key, Value, Hash, Pred, Alloc>::unordered_map(const allocator_type& allocator)
+    : base_type(allocator)
+{
+}
+
+template<typename Key, typename Value, typename Hash, typename Pred, typename Alloc>
+unordered_map<Key, Value, Hash, Pred, Alloc>::unordered_map(const unordered_map& map, const allocator_type& allocator)
+    : base_type(map, allocator)
+{
+}
+
+template<typename Key, typename Value, typename Hash, typename Pred, typename Alloc>
+unordered_map<Key, Value, Hash, Pred, Alloc>::~unordered_map()
+{
+}
+
+template<typename Key, typename Value, typename Hash, typename Pred, typename Alloc>
+unordered_map<Key, Value, Hash, Pred, Alloc>& unordered_map<Key, Value, Hash, Pred, Alloc>::operator=(const unordered_map& map)
+{
+    base_type::operator=(map);
+    return *this;
+}
+
+template<typename Key, typename Value, typename Hash, typename Pred, typename Alloc>
+Value& unordered_map<Key, Value, Hash, Pred, Alloc>::operator[](const Key& key)
+{
+    iterator<Key, Value> iter = base_type::find(key, this->key_eq());
+    if (iter == this->end())
+    {
+        iter = base_type::insert(jitstd::pair<const Key, mapped_type>(key, mapped_type())).first;
+    }
+    return (*iter).second;
+}
+
+template<typename Key, typename Value, typename Hash, typename Pred, typename Alloc>
+Value& unordered_map<Key, Value, Hash, Pred, Alloc>::operator[](key_type&& key)
+{
+    iterator<Key, Value> iter = base_type::find(key, this->key_eq());
+    if (iter == this->end())
+    {
+        iter = base_type::insert(jitstd::pair<const Key, mapped_type>(key, mapped_type())).first;
+    }
+    return (*iter).second;
+}
+
+
+template<typename Key, typename Value, typename Hash, typename Pred, typename Alloc>
+typename unordered_map<Key, Value, Hash, Pred, Alloc>::iterator
+unordered_map<Key, Value, Hash, Pred, Alloc>::insert(const key_type& key, const mapped_type& value)
+{
+    typename unordered_map<Key, Value, Hash, Pred, Alloc>::iterator iter = base_type::find(key, this->key_eq());
+    iter = base_type::insert(jitstd::pair<const Key, mapped_type>(key, value)).first;
+    return iter;
+}
+
+}
diff --git a/src/jit/jitstd/unordered_set.h b/src/jit/jitstd/unordered_set.h
new file mode 100644
index 0000000000..388e72426c
--- /dev/null
+++ b/src/jit/jitstd/unordered_set.h
@@ -0,0 +1,156 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// ==++==
+//
+
+//
+
+//
+// ==--==
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                          unordered_set<V,H,P,A>                           XX
+XX                                                                           XX
+XX  Derives from hashtable for most implementation. The hash key is the      XX
+XX  elements themselves                                                      XX
+XX                                                                           XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#pragma once
+
+#include "allocator.h"
+#include "hashtable.h"
+
+namespace jitstd
+{
+
+template <typename Value,
+          typename Hash = jitstd::hash<Value>,
+          typename Pred = jitstd::equal_to<Value>,
+          typename Alloc = jitstd::allocator<Value>>
+class unordered_set
+    : public hashtable<Value, Value, Hash, Pred, Alloc>
+{
+public:
+    typedef Value key_type;
+    typedef Value value_type;
+    typedef Hash hasher;
+    typedef Pred key_equal;
+    typedef Alloc allocator_type;
+    typedef typename allocator_type::pointer pointer;             
+    typedef typename allocator_type::const_pointer const_pointer;       
+    typedef typename allocator_type::reference reference;
+    typedef typename allocator_type::const_reference const_reference;
+    typedef size_t size_type;
+    typedef ptrdiff_t difference_type;
+    typedef typename list<Value, Alloc>::iterator iterator;
+    typedef typename list<Value, Alloc>::const_iterator const_iterator;
+    typedef typename list<Value, Alloc>::iterator local_iterator;
+
+private:
+    typedef hashtable<Value, Value, Hash, Pred, Alloc> base_type;
+    unordered_set();
+
+    typedef pair<iterator, iterator> BucketEntry;
+    typedef vector<BucketEntry, typename Alloc::template rebind<BucketEntry>::allocator> Buckets;
+    typedef list<Value, Alloc> Elements;
+
+public:
+    explicit unordered_set(size_type,
+        const allocator_type& a);
+
+    unordered_set(size_type n,
+        const hasher& hf,
+        const key_equal& eq, 
+        const allocator_type&);
+
+    template<typename InputIterator> 
+    unordered_set(
+        InputIterator f, InputIterator l,
+        size_type n,
+        const hasher& hf,
+        const key_equal& eq, 
+        const allocator_type&);
+
+    explicit unordered_set(const allocator_type&);
+
+    unordered_set(const unordered_set& other);
+
+    ~unordered_set();
+
+    unordered_set& operator=(unordered_set const&);
+};
+
+} // end of namespace jitstd
+
+
+namespace jitstd
+{
+
+template <typename Value, typename Hash, typename Pred, typename Alloc>
+unordered_set<Value, Hash, Pred, Alloc>::unordered_set(
+    size_type n,
+    allocator_type const& allocator)
+    : hashtable<Value>(n, allocator)
+{
+    this->rehash(n);
+}
+
+template <typename Value, typename Hash, typename Pred, typename Alloc>
+unordered_set<Value, Hash, Pred, Alloc>::unordered_set(
+    size_type n,
+    hasher const& hf,
+    key_equal const& eq,
+    allocator_type const& allocator)
+    : hashtable<Value>(n, hf, eq, allocator)
+{
+    this->rehash(n);
+}
+
+template <typename Value, typename Hash, typename Pred, typename Alloc>
+template<typename InputIterator>
+unordered_set<Value, Hash, Pred, Alloc>::unordered_set(
+    InputIterator f, InputIterator l,
+    size_type n,
+    const hasher& hf,
+    const key_equal& eq, 
+    const allocator_type& allocator)
+    : hashtable<Value>(f, l, n, hf, eq, allocator)
+{
+    this->rehash(n);
+    insert(this->first, this->last);
+}
+
+template <typename Value, typename Hash, typename Pred, typename Alloc>
+unordered_set<Value, Hash, Pred, Alloc>::unordered_set(const allocator_type& allocator)
+: hashtable<Value>(allocator)
+{
+}
+
+template <typename Value, typename Hash, typename Pred, typename Alloc>
+unordered_set<Value, Hash, Pred, Alloc>::unordered_set(const unordered_set& other)
+: hashtable<Value>(other)
+{
+}
+
+template <typename Value, typename Hash, typename Pred, typename Alloc>
+unordered_set<Value, Hash, Pred, Alloc>::~unordered_set()
+{
+}
+
+template <typename Value, typename Hash, typename Pred, typename Alloc>
+unordered_set<Value, Hash, Pred, Alloc>&
+    unordered_set<Value, Hash, Pred, Alloc>::operator=(unordered_set const& other)
+{
+    base_type::operator=(other);
+    return *this;
+}
+
+} // end of namespace jitstd.
diff --git a/src/jit/jitstd/utility.h b/src/jit/jitstd/utility.h
new file mode 100644
index 0000000000..80ce58e4d7
--- /dev/null
+++ b/src/jit/jitstd/utility.h
@@ -0,0 +1,108 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+
+
+#pragma once
+
+namespace jitstd
+{
+
+template <typename T>
+inline 
+T&& forward(typename jitstd::remove_reference<T>::type& arg)
+{
+    return static_cast<T&&>(arg);
+}
+
+template <typename T>
+inline 
+T&& forward(typename jitstd::remove_reference<T>::type&& arg)
+{
+    static_assert(!jitstd::is_lvalue_reference<T>::value, "unexpected lvalue reference");
+    return static_cast<T&&>(arg);
+}
+
+namespace utility
+{
+    // Template class for scoped execution of a lambda.
+    // Usage:
+    //
+    //  auto code = [&]
+    //  {
+    //      JITDUMP("finally()");
+    //  };
+    //  jitstd::utility::scoped_code<decltype(code)> finally(code);
+    //  "code" will execute when "finally" goes out of scope.
+    template <typename T>
+    class scoped_code
+    {
+    public:
+        const T& l;
+        scoped_code(const T& l) : l(l) { }
+        ~scoped_code() { l(); }
+    }; 
+    
+ 
+    // Helper to allocate objects of any type, given an allocator of void type.
+    //
+    // @param alloc An allocator of void type used to create an allocator of type T.
+    // @param count The number of objects of type T that need to be allocated.
+    //
+    // @return A pointer to an object or an array of objects that was allocated.
+    template <typename T>
+    inline
+    static T* allocate(jitstd::allocator<void>& alloc, size_t count = 1)
+    {
+        return jitstd::allocator<T>(alloc).allocate(count);
+    }
+
+    // Ensures that "wset" is the union of the initial state of "wset" and "rset".
+    // Elements from "rset" that were not in "wset" are added to "cset."
+    template <typename Set>
+    bool set_union(Set& wset, const Set& rset, Set& cset)
+    {
+        bool change = false;
+        for (typename Set::const_iterator i = rset.begin(); i != rset.end(); ++i)
+        {
+            jitstd::pair<typename Set::iterator, bool> result = wset.insert(*i);
+            if (result.second)
+            {
+                change = true;
+                cset.insert(*i);
+            }
+        }
+        return change;
+    }
+
+    template <typename Set>
+    bool set_union(Set& wset, const Set& rset)
+    {
+        bool change = false;
+        for (typename Set::const_iterator i = rset.begin(); i != rset.end(); ++i)
+        {
+            jitstd::pair<typename Set::iterator, bool> result = wset.insert(*i);
+            change |= result.second;
+        }
+        return change;
+    }
+
+    template <typename Set>
+    bool set_difference(Set& wset, const Set& rset)
+    {
+        bool change = false;
+        for (typename Set::const_iterator i = rset.begin(); i != rset.end(); ++i)
+        {
+            if (wset.find(*i) != wset.end())
+            {
+                wset.erase(*i);
+                change = true;
+            }
+        }
+
+        return change;
+    }
+} // end of namespace utility.
+
+} // end of namespace jitstd.
diff --git a/src/jit/jitstd/vector.h b/src/jit/jitstd/vector.h
new file mode 100644
index 0000000000..d252e18253
--- /dev/null
+++ b/src/jit/jitstd/vector.h
@@ -0,0 +1,1254 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// ==++==
+//
+
+//
+
+//
+// ==--==
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                                vector<T>                                  XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#pragma once
+
+#include "allocator.h"
+#include "iterator.h"
+
+namespace jitstd
+{
+
+template <typename T, typename Allocator = allocator<T> >
+class vector
+{
+public:
+    typedef Allocator allocator_type;
+    typedef T* pointer;
+    typedef T& reference;
+    typedef const T* const_pointer;
+    typedef const T& const_reference;
+
+    typedef size_t size_type;
+    typedef ptrdiff_t difference_type;
+    typedef T value_type;
+
+    // nested classes
+    class iterator : public jitstd::iterator<random_access_iterator_tag, T>
+    {
+        iterator(T* ptr);
+    public:
+        iterator();
+        iterator(const iterator& it);
+
+        iterator& operator++();
+        iterator& operator++(int);
+        iterator& operator--();
+        iterator& operator--(int);
+        iterator operator+(difference_type n);
+        iterator operator-(difference_type n);
+        size_type operator-(const iterator& that);
+        bool operator==(const iterator& it);
+        bool operator!=(const iterator& it);
+        T& operator*();
+        T* operator&();
+        operator T*();
+
+    private:
+        friend class vector<T, Allocator>;
+        pointer m_pElem;
+    };
+
+    class const_iterator : public jitstd::iterator<random_access_iterator_tag, T>
+    {
+    private:
+        const_iterator(T* ptr);
+        const_iterator();
+    public:
+        const_iterator(const const_iterator& it);
+
+        const_iterator& operator++();
+        const_iterator& operator++(int);
+        const_iterator& operator--();
+        const_iterator& operator--(int);
+        const_iterator operator+(difference_type n);
+        const_iterator operator-(difference_type n);
+        size_type operator-(const const_iterator& that);
+        bool operator==(const const_iterator& it) const;
+        bool operator!=(const const_iterator& it) const;
+        const T& operator*() const;
+        const T* operator&() const;
+        operator const T*() const;
+
+    private:
+        friend class vector<T, Allocator>;
+        pointer m_pElem;
+    };
+
+    class reverse_iterator : public jitstd::iterator<random_access_iterator_tag, T>
+    {
+    private:
+        reverse_iterator(T* ptr);
+    public:
+        reverse_iterator();
+        reverse_iterator(const reverse_iterator& it);
+
+        reverse_iterator& operator++();
+        reverse_iterator& operator++(int);
+        reverse_iterator& operator--();
+        reverse_iterator& operator--(int);
+        reverse_iterator operator+(difference_type n);
+        reverse_iterator operator-(difference_type n);
+        size_type operator-(const reverse_iterator& that);
+        bool operator==(const reverse_iterator& it);
+        bool operator!=(const reverse_iterator& it);
+        T& operator*();
+        T* operator&();
+        operator T*();
+
+    private:
+        friend class vector<T, Allocator>;
+        pointer m_pElem;
+    };
+
+    class const_reverse_iterator : public jitstd::iterator<random_access_iterator_tag, T>
+    {
+    private:
+        const_reverse_iterator(T* ptr);
+    public:
+        const_reverse_iterator();
+        const_reverse_iterator(const const_reverse_iterator& it);
+
+        const_reverse_iterator& operator++();
+        const_reverse_iterator& operator++(int);
+        const_reverse_iterator& operator--();
+        const_reverse_iterator& operator--(int);
+        const_reverse_iterator operator+(difference_type n);
+        const_reverse_iterator operator-(difference_type n);
+        size_type operator-(const const_reverse_iterator& that);
+        bool operator==(const const_reverse_iterator& it) const;
+        bool operator!=(const const_reverse_iterator& it) const;
+        const T& operator*() const;
+        const T* operator&() const;
+        operator const T*() const;
+
+    private:
+        friend class vector<T, Allocator>;
+        pointer m_pElem;
+    };
+
+    // ctors
+    explicit vector(const Allocator& allocator);
+    explicit vector(size_type n, const T& value, const Allocator& allocator);
+
+    template <typename InputIterator>
+    vector(InputIterator first, InputIterator last, const Allocator& allocator);
+
+    // cctors
+    vector(const vector& vec);
+
+    template <typename Alt, typename AltAllocator>
+    explicit vector(const vector<Alt, AltAllocator>& vec);
+
+    // dtor
+    ~vector();
+
+    template <class InputIterator>
+    void assign(InputIterator first, InputIterator last);
+    void assign(size_type size, const T& value);
+
+    const_reference at(size_type n) const;
+    reference at(size_type n);
+
+    reference back();
+    const_reference back() const;
+
+    iterator begin();
+    const_iterator begin() const;
+    const_iterator cbegin() const;
+
+    size_type capacity() const;
+
+    void clear();
+    bool empty() const;
+
+    iterator end();
+    const_iterator end() const;
+    const_iterator cend() const;
+
+    iterator erase(iterator position);
+    iterator erase(iterator first, iterator last);
+
+    reference front();
+    const_reference front() const;
+
+    allocator_type get_allocator() const;
+
+    iterator insert(iterator position, const T& value);
+    void insert(iterator position, size_type size, const T& value);
+
+    template <typename InputIterator>
+    void insert(iterator position, InputIterator first, InputIterator last);
+
+    size_type max_size() const;
+
+    vector& operator=(const vector& vec);
+    template <typename Alt, typename AltAllocator>
+    vector<T, Allocator>& operator=(const vector<Alt, AltAllocator>& vec);
+    
+    reference operator[](size_type n);
+    const_reference operator[](size_type n) const;
+
+    void pop_back();
+    void push_back(const T& value);
+
+    reverse_iterator rbegin();
+    const_reverse_iterator rbegin() const;
+
+    reverse_iterator rend();
+    const_reverse_iterator rend() const;
+
+    void reserve(size_type n);
+
+    void resize(size_type sz, const T&);
+
+    size_type size() const;
+
+    void swap(vector<T, Allocator>& vec);
+
+private:
+
+    typename Allocator::template rebind<T>::allocator m_allocator;
+    T* m_pArray;
+    size_type m_nSize;
+    size_type m_nCapacity;
+
+    inline
+    bool ensure_capacity(size_type capacity);
+
+    template <typename InputIterator>
+    void construct_helper(InputIterator first, InputIterator last, forward_iterator_tag);
+    template <typename InputIterator>
+    void construct_helper(InputIterator first, InputIterator last, int_not_an_iterator_tag);
+    void construct_helper(size_type size, const T& value);
+
+    template <typename InputIterator>
+    void insert_helper(iterator iter, InputIterator first, InputIterator last, forward_iterator_tag);
+    template <typename InputIterator>
+    void insert_helper(iterator iter, InputIterator first, InputIterator last, int_not_an_iterator_tag);
+    void insert_elements_helper(iterator iter, size_type size, const T& value);
+
+    template <typename InputIterator>
+    void assign_helper(InputIterator first, InputIterator last, forward_iterator_tag);
+    template <typename InputIterator>
+    void assign_helper(InputIterator first, InputIterator last, int_not_an_iterator_tag);
+
+    template <typename Alt, typename AltAllocator>
+    friend class vector;
+};
+
+}// namespace jit_std
+
+
+
+// Implementation of vector.
+
+namespace jitstd
+{
+
+namespace
+{
+
+template <typename InputIterator>
+size_t iterator_difference(InputIterator first, const InputIterator& last)
+{
+    size_t size = 0;
+    for (; first != last; ++first, ++size);
+    return size;
+}
+
+}
+
+
+template <typename T, typename Allocator>
+vector<T, Allocator>::vector(const Allocator& allocator)
+    : m_allocator(allocator)
+    , m_pArray(nullptr)
+    , m_nSize(0)
+    , m_nCapacity(0)
+{
+}
+
+template <typename T, typename Allocator>
+vector<T, Allocator>::vector(size_type size, const T& value, const Allocator& allocator)
+    : m_allocator(allocator)
+    , m_pArray(NULL)
+    , m_nSize(0)
+    , m_nCapacity(0)
+{
+    construct_helper(size, value);
+}
+
+template <typename T, typename Allocator>
+template <typename InputIterator>
+vector<T, Allocator>::vector(InputIterator first, InputIterator last, const Allocator& allocator)
+    : m_allocator(allocator)
+    , m_pArray(NULL)
+    , m_nSize(0)
+    , m_nCapacity(0)
+{
+    construct_helper(first, last, iterator_traits<InputIterator>::iterator_category());
+}
+
+template <typename T, typename Allocator>
+template <typename Alt, typename AltAllocator>
+vector<T, Allocator>::vector(const vector<Alt, AltAllocator>& vec)
+    : m_allocator(vec.m_allocator)
+    , m_pArray(NULL)
+    , m_nSize(0)
+    , m_nCapacity(0)
+{
+    ensure_capacity(vec.m_nSize);
+    for (size_type i = 0, j = 0; i < vec.m_nSize; ++i, ++j)
+    {
+        new (m_pArray + i, placement_t()) T((T) vec.m_pArray[j]);
+    }
+
+    m_nSize = vec.m_nSize;
+}
+
+template <typename T, typename Allocator>
+vector<T, Allocator>::vector(const vector<T, Allocator>& vec)
+    : m_allocator(vec.m_allocator)
+    , m_pArray(NULL)
+    , m_nSize(0)
+    , m_nCapacity(0)
+{
+    ensure_capacity(vec.m_nSize);
+    for (size_type i = 0, j = 0; i < vec.m_nSize; ++i, ++j)
+    {
+        new (m_pArray + i, placement_t()) T(vec.m_pArray[j]);
+    }
+
+    m_nSize = vec.m_nSize;
+}
+
+
+template <typename T, typename Allocator>
+vector<T, Allocator>::~vector()
+{
+    for (size_type i = 0; i < m_nSize; ++i)
+    {
+        m_pArray[i].~T();
+    }
+    m_allocator.deallocate(m_pArray, m_nCapacity);
+    m_nSize = 0;
+    m_nCapacity = 0;
+}
+
+
+// public methods
+
+template <typename T, typename Allocator>
+template <typename InputIterator>
+void vector<T, Allocator>::assign(InputIterator first, InputIterator last)
+{
+    construct_helper(first, last, iterator_traits<InputIterator>::iterator_category());
+}
+
+template <typename T, typename Allocator>
+void vector<T, Allocator>::assign(size_type size, const T& value)
+{
+    ensure_capacity(size);
+    for (int i = 0; i < size; ++i)
+    {
+        m_pArray[i] = value;
+    }
+    m_nSize = size;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_reference
+    vector<T, Allocator>::at(size_type i) const
+{
+    return operator[](i);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::reference
+    vector<T, Allocator>::at(size_type i)
+{
+    return operator[](i);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::reference
+    vector<T, Allocator>::back()
+{
+    return operator[](m_nSize - 1);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_reference
+    vector<T, Allocator>::back() const
+{
+    return operator[](m_nSize - 1);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::iterator
+    vector<T, Allocator>::begin()
+{
+    return iterator(m_pArray);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_iterator
+    vector<T, Allocator>::begin() const
+{
+    return const_iterator(m_pArray);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_iterator
+    vector<T, Allocator>::cbegin() const
+{
+    return const_iterator(m_pArray);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::size_type
+    vector<T, Allocator>::capacity() const
+{
+    return m_nCapacity;
+}
+
+
+template <typename T, typename Allocator>
+void vector<T, Allocator>::clear()
+{
+    for (size_type i = 0; i < m_nSize; ++i)
+    {
+        m_pArray[i].~T();
+    }
+    m_allocator.deallocate(m_pArray, m_nCapacity);
+    m_pArray = NULL;
+    m_nSize = 0;
+    m_nCapacity = 0;
+}
+
+template <typename T, typename Allocator>
+bool vector<T, Allocator>::empty() const
+{
+    return m_nSize == 0;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::iterator vector<T, Allocator>::end()
+{
+    return iterator(m_pArray + m_nSize);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_iterator
+    vector<T, Allocator>::end() const
+{
+    return const_iterator(m_pArray + m_nSize);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_iterator vector<T, Allocator>::cend() const
+{
+    return const_iterator(m_pArray + m_nSize);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::iterator
+    vector<T, Allocator>::erase(
+        typename vector<T, Allocator>::iterator position)
+{
+    return erase(position, position + 1);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::iterator
+    vector<T, Allocator>::erase(
+        typename vector<T, Allocator>::iterator first,
+        typename vector<T, Allocator>::iterator last)
+{
+    assert(m_nSize > 0);
+    assert(first.m_pElem >= m_pArray);
+    assert(last.m_pElem >= m_pArray);
+    assert(first.m_pElem <= m_pArray + m_nSize);
+    assert(last.m_pElem <= m_pArray + m_nSize);
+    assert(last.m_pElem > first.m_pElem);
+
+    pointer fptr = first.m_pElem;
+    pointer lptr = last.m_pElem;
+    pointer eptr = m_pArray + m_nSize;
+    for (; lptr != eptr; ++lptr, fptr++)
+    {
+        (*fptr).~T();
+        *fptr = *lptr;
+    }
+    m_nSize -= (size_type)(lptr - fptr);
+    return first;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::reference
+    vector<T, Allocator>::front()
+{
+    return operator[](0);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_reference
+    vector<T, Allocator>::front() const
+{
+    return operator[](0);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::allocator_type
+    vector<T, Allocator>::get_allocator() const
+{
+    return m_allocator;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::iterator
+    vector<T, Allocator>::insert(
+        typename vector<T, Allocator>::iterator iter,
+        const T& value)
+{
+    size_type pos = (size_type) (iter.m_pElem - m_pArray);
+    insert_elements_helper(iter, 1, value);
+    return iterator(m_pArray + pos);
+}
+
+template <typename T, typename Allocator>
+void vector<T, Allocator>::insert(
+    iterator iter,
+    size_type size,
+    const T& value)
+{
+    insert_elements_helper(iter, size, value);
+}
+
+template <typename T, typename Allocator>
+template <typename InputIterator>
+void vector<T, Allocator>::insert(
+    iterator iter,
+    InputIterator first,
+    InputIterator last)
+{
+    insert_helper(iter, first, last, iterator_traits<InputIterator>::iterator_category());
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::size_type
+    vector<T, Allocator>::max_size() const
+{
+    return ((size_type) -1) >> 1;
+}
+
+template <typename T, typename Allocator>
+template <typename Alt, typename AltAllocator>
+vector<T, Allocator>& vector<T, Allocator>::operator=(const vector<Alt, AltAllocator>& vec)
+{
+    // We'll not observe copy-on-write for now.
+    m_allocator = vec.m_allocator;
+    ensure_capacity(vec.m_nSize);
+    m_nSize = vec.m_nSize;
+    for (size_type i = 0; i < m_nSize; ++i)
+    {
+        m_pArray[i] = (T) vec.m_pArray[i];
+    }
+    return *this;
+}
+
+template <typename T, typename Allocator>
+vector<T, Allocator>& vector<T, Allocator>::operator=(const vector<T, Allocator>& vec)
+{
+    // We'll not observe copy-on-write for now.
+    m_allocator = vec.m_allocator;
+    ensure_capacity(vec.m_nSize);
+    m_nSize = vec.m_nSize;
+    for (size_type i = 0; i < m_nSize; ++i)
+    {
+        new (m_pArray + i, placement_t()) T(vec.m_pArray[i]);
+    }
+    return *this;
+}
+
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::reference vector<T, Allocator>::operator[](size_type n)
+{
+    return m_pArray[n];
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_reference
+    vector<T, Allocator>::operator[](size_type n) const
+{
+    return m_pArray[n];
+}
+
+template <typename T, typename Allocator>
+void vector<T, Allocator>::pop_back()
+{
+    m_pArray[m_nSize - 1].~T();
+    --m_nSize;
+}
+
+template <typename T, typename Allocator>
+void vector<T, Allocator>::push_back(const T& value)
+{
+    ensure_capacity(m_nSize + 1);
+    new (m_pArray + m_nSize, placement_t()) T(value);
+    ++m_nSize;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::reverse_iterator vector<T, Allocator>::rbegin()
+{
+    return reverse_iterator(m_pArray + m_nSize - 1);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_reverse_iterator
+    vector<T, Allocator>::rbegin() const
+{
+    return const_reverse_iterator(m_pArray + m_nSize - 1);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::reverse_iterator
+    vector<T, Allocator>::rend()
+{
+    return reverse_iterator(m_pArray - 1);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_reverse_iterator
+    vector<T, Allocator>::rend() const
+{
+    return const_reverse_iterator(m_pArray - 1);
+}
+
+template <typename T, typename Allocator>
+void vector<T, Allocator>::reserve(size_type n)
+{
+    ensure_capacity(n);
+}
+
+template <typename T, typename Allocator>
+void vector<T, Allocator>::resize(
+    size_type sz,
+    const T& c)
+{
+    for (; m_nSize > sz; m_nSize--)
+    {
+        m_pArray[m_nSize - 1].~T();
+    }
+    ensure_capacity(sz);
+    for (; m_nSize < sz; m_nSize++)
+    {
+        new (m_pArray + m_nSize, placement_t()) T(c);
+    }
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::size_type vector<T, Allocator>::size() const
+{
+    return m_nSize;
+}
+
+template <typename T, typename Allocator>
+void vector<T, Allocator>::swap(vector<T, Allocator>& vec)
+{
+    jitstd::swap(m_pArray, vec.m_pArray);
+    jitstd::swap(m_nSize, vec.m_nSize);
+    jitstd::swap(m_nCapacity, vec.m_nCapacity);
+    jitstd::swap(m_nCapacity, vec.m_nCapacity);
+    jitstd::swap(m_allocator, vec.m_allocator);
+}
+
+// =======================================================================================
+
+template <typename T, typename Allocator>
+void vector<T, Allocator>::construct_helper(size_type size, const T& value)
+{
+    ensure_capacity(size);
+
+    for (size_type i = 0; i < size; ++i)
+    {
+        new (m_pArray + i, placement_t()) T(value);
+    }
+
+    m_nSize = size;
+}
+
+
+template <typename T, typename Allocator>
+template <typename InputIterator>
+void vector<T, Allocator>::construct_helper(InputIterator first, InputIterator last, int_not_an_iterator_tag)
+{
+    construct_helper(first, last);
+}
+
+template <typename T, typename Allocator>
+template <typename InputIterator>
+void vector<T, Allocator>::construct_helper(InputIterator first, InputIterator last, forward_iterator_tag)
+{
+    size_type size = iterator_difference(first, last);
+
+    ensure_capacity(size);
+    for (size_type i = 0; i < size; ++i)
+    {
+        new (m_pArray + i, placement_t()) T(*first);
+        first++;
+    }
+
+    m_nSize = size;
+}
+
+// =======================================================================================
+
+template <typename T, typename Allocator>
+void vector<T, Allocator>::insert_elements_helper(iterator iter, size_type size, const T& value)
+{
+    assert(size < max_size());
+
+    // m_pElem could be NULL then m_pArray would be NULL too.
+    size_type pos = iter.m_pElem - m_pArray;
+
+    assert(pos <= m_nSize); // <= could insert at end.
+    assert(pos >= 0);
+
+    ensure_capacity(m_nSize + size);
+
+    for (int src = m_nSize - 1, dst = m_nSize + size - 1; src >= (int) pos; --src, --dst)
+    {
+        m_pArray[dst] = m_pArray[src];
+    }
+
+    for (size_type i = 0; i < size; ++i)
+    {
+        new (m_pArray + pos + i, placement_t()) T(value);
+    }
+
+    m_nSize += size;
+}
+
+template <typename T, typename Allocator>
+template <typename InputIterator>
+void vector<T, Allocator>::insert_helper(iterator iter, InputIterator first, InputIterator last, int_not_an_iterator_tag)
+{
+    insert_elements_helper(iter, first, last);
+}
+
+template <typename T, typename Allocator>
+template <typename InputIterator>
+void vector<T, Allocator>::insert_helper(iterator iter, InputIterator first, InputIterator last, forward_iterator_tag)
+{
+    // m_pElem could be NULL then m_pArray would be NULL too.
+    size_type pos = iter.m_pElem - m_pArray;
+
+    assert(pos <= m_nSize); // <= could insert at end.
+    assert(pos >= 0);
+
+    size_type size = iterator_difference(first, last);
+    assert(size < max_size());
+
+    ensure_capacity(m_nSize + size);
+
+    pointer lst = m_pArray + m_nSize + size - 1;
+    for (size_type i = pos; i < m_nSize; ++i)
+    {
+        *lst-- = m_pArray[i];
+    }
+    for (size_type i = 0; i < size; ++i, ++first)
+    {
+        m_pArray[pos + i] = *first;
+    }
+
+    m_nSize += size;
+}
+
+// =======================================================================================
+
+template <typename T, typename Allocator>
+template <typename InputIterator>
+void vector<T, Allocator>::assign_helper(InputIterator first, InputIterator last, forward_iterator_tag)
+{
+    size_type size = iterator_difference(first, last);
+
+    ensure_capacity(size);
+    for (size_type i = 0; i < size; ++i)
+    {
+        m_pArray[i] = *first;
+        first++;
+    }
+
+    m_nSize = size;
+}
+
+template <typename T, typename Allocator>
+template <typename InputIterator>
+void vector<T, Allocator>::assign_helper(InputIterator first, InputIterator last, int_not_an_iterator_tag)
+{
+    assign_helper(first, last);
+}
+
+// =======================================================================================
+
+template <typename T, typename Allocator>
+bool vector<T, Allocator>::ensure_capacity(size_type newCap)
+{
+    if (newCap <= m_nCapacity)
+    {
+        return false;
+    }
+
+    // Double the alloc capacity based on size.
+    size_type allocCap = m_nSize * 2;
+
+    // Is it still not sufficient?
+    if (allocCap < newCap)
+    {
+        allocCap = newCap;
+    }
+
+    // Allocate space.
+    pointer ptr = m_allocator.allocate(allocCap);
+
+    // Copy over.
+    for (size_type i = 0; i < m_nSize; ++i)
+    {
+        new (ptr + i, placement_t()) T(m_pArray[i]);
+    }
+
+    // Deallocate currently allocated space.
+    m_allocator.deallocate(m_pArray, m_nCapacity);
+
+    // Update the pointers and capacity;
+    m_pArray = ptr;
+    m_nCapacity = allocCap;
+    return true;
+}
+
+} // end of namespace jitstd.
+
+
+
+// Implementation of vector iterators
+
+namespace jitstd
+{
+
+// iterator
+template <typename T, typename Allocator>
+vector<T, Allocator>::iterator::iterator()
+    : m_pElem(NULL)
+{
+}
+
+template <typename T, typename Allocator>
+vector<T, Allocator>::iterator::iterator(T* ptr)
+    : m_pElem(ptr)
+{
+}
+
+template <typename T, typename Allocator>
+vector<T, Allocator>::iterator::iterator(const iterator& it)
+    : m_pElem(it.m_pElem)
+{
+}
+
+        
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::iterator& vector<T, Allocator>::iterator::operator++()
+{
+    ++m_pElem;
+    return *this;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::iterator& vector<T, Allocator>::iterator::operator++(int)
+{
+    ++m_pElem;
+    return *this;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::iterator& vector<T, Allocator>::iterator::operator--()
+{
+    --m_pElem;
+    return *this;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::iterator& vector<T, Allocator>::iterator::operator--(int)
+{
+    --m_pElem;
+    return *this;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::iterator vector<T, Allocator>::iterator::operator+(difference_type n)
+{
+    return iterator(m_pElem + n);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::iterator vector<T, Allocator>::iterator::operator-(difference_type n)
+{
+    return iterator(m_pElem - n);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::size_type
+    vector<T, Allocator>::iterator::operator-(
+        const typename vector<T, Allocator>::iterator& that)
+{
+    return m_pElem - that.m_pElem;
+}
+
+template <typename T, typename Allocator>
+bool vector<T, Allocator>::iterator::operator==(const iterator& it)
+{
+    return (m_pElem == it.m_pElem);
+}
+
+template <typename T, typename Allocator>
+bool vector<T, Allocator>::iterator::operator!=(const iterator& it)
+{
+    return !operator==(it);
+}
+
+template <typename T, typename Allocator>
+T& vector<T, Allocator>::iterator::operator*()
+{
+    return *m_pElem;
+}
+
+template <typename T, typename Allocator>
+T* vector<T, Allocator>::iterator::operator&()
+{
+    return &m_pElem;
+}
+
+template <typename T, typename Allocator>
+vector<T, Allocator>::iterator::operator T*()
+{
+    return &m_pElem;
+}
+
+// const_iterator
+template <typename T, typename Allocator>
+vector<T, Allocator>::const_iterator::const_iterator()
+    : m_pElem(NULL)
+{
+}
+
+template <typename T, typename Allocator>
+vector<T, Allocator>::const_iterator::const_iterator(T* ptr)
+    : m_pElem(ptr)
+{
+}
+
+template <typename T, typename Allocator>
+vector<T, Allocator>::const_iterator::const_iterator(const const_iterator& it)
+    : m_pElem(it.m_pElem)
+{
+}
+
+        
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_iterator& vector<T, Allocator>::const_iterator::operator++()
+{
+    ++m_pElem;
+    return *this;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_iterator& vector<T, Allocator>::const_iterator::operator++(int)
+{
+    ++m_pElem;
+    return *this;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_iterator& vector<T, Allocator>::const_iterator::operator--()
+{
+    --m_pElem;
+    return *this;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_iterator& vector<T, Allocator>::const_iterator::operator--(int)
+{
+    --m_pElem;
+    return *this;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_iterator vector<T, Allocator>::const_iterator::operator+(difference_type n)
+{
+    return const_iterator(m_pElem + n);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_iterator vector<T, Allocator>::const_iterator::operator-(difference_type n)
+{
+    return const_iterator(m_pElem - n);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::size_type
+    vector<T, Allocator>::const_iterator::operator-(
+        const typename vector<T, Allocator>::const_iterator& that)
+{
+    return m_pElem - that.m_pElem;
+}
+
+template <typename T, typename Allocator>
+bool vector<T, Allocator>::const_iterator::operator==(const const_iterator& it) const
+{
+    return (m_pElem == it.m_pElem);
+}
+
+template <typename T, typename Allocator>
+bool vector<T, Allocator>::const_iterator::operator!=(const const_iterator& it) const
+{
+    return !operator==(it);
+}
+
+template <typename T, typename Allocator>
+const T& vector<T, Allocator>::const_iterator::operator*() const
+{
+    return *m_pElem;
+}
+
+
+template <typename T, typename Allocator>
+const T* vector<T, Allocator>::const_iterator::operator&() const
+{
+    return &m_pElem;
+}
+
+template <typename T, typename Allocator>
+vector<T, Allocator>::const_iterator::operator const T*() const
+{
+    return &m_pElem;
+}
+
+
+// reverse_iterator
+template <typename T, typename Allocator>
+vector<T, Allocator>::reverse_iterator::reverse_iterator()
+    : m_pElem(NULL)
+{
+}
+
+template <typename T, typename Allocator>
+vector<T, Allocator>::reverse_iterator::reverse_iterator(T* ptr)
+    : m_pElem(ptr)
+{
+}
+
+template <typename T, typename Allocator>
+vector<T, Allocator>::reverse_iterator::reverse_iterator(const reverse_iterator& it)
+    : m_pElem(it.m_pElem)
+{
+}
+
+        
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::reverse_iterator& vector<T, Allocator>::reverse_iterator::operator++()
+{
+    --m_pElem;
+    return *this;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::reverse_iterator& vector<T, Allocator>::reverse_iterator::operator++(int)
+{
+    --m_pElem;
+    return *this;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::reverse_iterator& vector<T, Allocator>::reverse_iterator::operator--()
+{
+    ++m_pElem;
+    return *this;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::reverse_iterator& vector<T, Allocator>::reverse_iterator::operator--(int)
+{
+    ++m_pElem;
+    return *this;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::reverse_iterator vector<T, Allocator>::reverse_iterator::operator+(difference_type n)
+{
+    return reverse_iterator(m_pElem + n);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::reverse_iterator vector<T, Allocator>::reverse_iterator::operator-(difference_type n)
+{
+    return reverse_iterator(m_pElem - n);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::size_type
+    vector<T, Allocator>::reverse_iterator::operator-(
+        const typename vector<T, Allocator>::reverse_iterator& that)
+{
+    return m_pElem - that.m_pElem;
+}
+
+template <typename T, typename Allocator>
+bool vector<T, Allocator>::reverse_iterator::operator==(const reverse_iterator& it)
+{
+    return (m_pElem == it.m_pElem);
+}
+
+template <typename T, typename Allocator>
+bool vector<T, Allocator>::reverse_iterator::operator!=(const reverse_iterator& it)
+{
+    return !operator==(it);
+}
+
+template <typename T, typename Allocator>
+T& vector<T, Allocator>::reverse_iterator::operator*()
+{
+    return *m_pElem;
+}
+
+template <typename T, typename Allocator>
+T* vector<T, Allocator>::reverse_iterator::operator&()
+{
+    return &m_pElem;
+}
+
+template <typename T, typename Allocator>
+vector<T, Allocator>::reverse_iterator::operator T*()
+{
+    return &m_pElem;
+}
+
+// const_reverse_iterator
+template <typename T, typename Allocator>
+vector<T, Allocator>::const_reverse_iterator::const_reverse_iterator()
+    : m_pElem(NULL)
+{
+}
+
+template <typename T, typename Allocator>
+vector<T, Allocator>::const_reverse_iterator::const_reverse_iterator(T* ptr)
+    : m_pElem(ptr)
+{
+}
+
+template <typename T, typename Allocator>
+vector<T, Allocator>::const_reverse_iterator::const_reverse_iterator(const const_reverse_iterator& it)
+    : m_pElem(it.m_pElem)
+{
+}
+
+        
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_reverse_iterator& vector<T, Allocator>::const_reverse_iterator::operator++()
+{
+    --m_pElem;
+    return *this;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_reverse_iterator& vector<T, Allocator>::const_reverse_iterator::operator++(int)
+{
+    --m_pElem;
+    return *this;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_reverse_iterator& vector<T, Allocator>::const_reverse_iterator::operator--()
+{
+    ++m_pElem;
+    return *this;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_reverse_iterator& vector<T, Allocator>::const_reverse_iterator::operator--(int)
+{
+    ++m_pElem;
+    return *this;
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_reverse_iterator vector<T, Allocator>::const_reverse_iterator::operator+(difference_type n)
+{
+    return const_reverse_iterator(m_pElem + n);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::const_reverse_iterator vector<T, Allocator>::const_reverse_iterator::operator-(difference_type n)
+{
+    return const_reverse_iterator(m_pElem - n);
+}
+
+template <typename T, typename Allocator>
+typename vector<T, Allocator>::size_type
+    vector<T, Allocator>::const_reverse_iterator::operator-(
+        const typename vector<T, Allocator>::const_reverse_iterator& that)
+{
+    return m_pElem - that.m_pElem;
+}
+
+template <typename T, typename Allocator>
+bool vector<T, Allocator>::const_reverse_iterator::operator==(const const_reverse_iterator& it) const
+{
+    return (m_pElem == it.m_pElem);
+}
+
+template <typename T, typename Allocator>
+bool vector<T, Allocator>::const_reverse_iterator::operator!=(const const_reverse_iterator& it) const
+{
+    return !operator==(it);
+}
+
+template <typename T, typename Allocator>
+const T& vector<T, Allocator>::const_reverse_iterator::operator*() const
+{
+    return *m_pElem;
+}
+
+template <typename T, typename Allocator>
+const T* vector<T, Allocator>::const_reverse_iterator::operator&() const
+{
+    return &m_pElem;
+}
+
+template <typename T, typename Allocator>
+vector<T, Allocator>::const_reverse_iterator::operator const T*() const
+{
+    return &m_pElem;
+}
+
+}
diff --git a/src/jit/jittelemetry.cpp b/src/jit/jittelemetry.cpp
new file mode 100644
index 0000000000..2d5a2102d1
--- /dev/null
+++ b/src/jit/jittelemetry.cpp
@@ -0,0 +1,390 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+// <OWNER>clrjit</OWNER>
+//
+// This class abstracts the telemetry information collected for the JIT.
+//
+// Goals:
+//    1. Telemetry information should be a NO-op when JIT level telemetry is disabled.
+//    2. Data collection should be actionable.
+//    3. Data collection should comply to privacy rules.
+//    4. Data collection cannot impact JIT/OS performance.
+//    5. Data collection volume should be manageable by our remote services.
+//
+// DESIGN CONCERNS:
+//
+// > To collect data, we use the TraceLogging API provided by Windows.
+//
+//   The brief workflow suggested is:
+//     #include <TraceLoggingProvider.h>
+//     TRACELOGGING_DEFINE_PROVIDER( // defines g_hProvider
+//         g_hProvider,  // Name of the provider variable
+//         "MyProvider", // Human-readable name of the provider
+//         (0xb3864c38, 0x4273, 0x58c5, 0x54, 0x5b, 0x8b, 0x36, 0x08, 0x34, 0x34, 0x71)); // Provider GUID
+//     int main(int argc, char* argv[]) // or DriverEntry for kernel-mode.
+//     {
+//         TraceLoggingRegister(g_hProvider, NULL, NULL, NULL); // NULLs only needed for C. Please do not include the
+//                                                              // NULLs in C++ code.
+//         TraceLoggingWrite(g_hProvider,
+//            "MyEvent1",
+//            TraceLoggingString(argv[0], "arg0"),
+//            TraceLoggingInt32(argc));
+//         TraceLoggingUnregister(g_hProvider);
+//         return 0;
+//     }
+//
+//     In summary, this involves:
+//     1. Creating a binary/DLL local provider using:
+//        TRACELOGGING_DEFINE_PROVIDER(g_hProvider, "ProviderName", providerId, [option])
+//     2. Registering the provider instance
+//        TraceLoggingRegister(g_hProvider)
+//     3. Perform TraceLoggingWrite operations to write out data.
+//     4. Unregister the provider instance.
+//        TraceLoggingUnregister(g_hProvider)
+//
+//     A. Determining where to create the provider instance?
+//        1) We use the same provider name/GUID as the CLR and the CLR creates its own DLL local provider handle.
+//           For CLRJIT.dll, the question is, can the same provider name/GUIDs be shared across binaries?
+//
+//           Answer:
+//           "For TraceLogging providers, it is okay to use the same provider GUID / name
+//           in different binaries. Do not share the same provider handle across DLLs.
+//           As long as you do not pass an hProvider from one DLL to another, TraceLogging
+//           will properly keep track of the events."
+//
+//        2) CoreCLR is linked into the CLR. CLR already creates an instance, so where do we create the JIT's instance?
+//            Answer:
+//            "Ideally you would have one provider per DLL, but if you're folding distinct sets
+//            of functionality into one DLL (like shell32.dll or similar sort of catch-all things)
+//            you can have perhaps a few more providers per binary."
+//
+//    B. Determining where to register and unregister the provider instance?
+//         1) For CLRJIT.dll we can register the provider instance during jitDllOnProcessAttach.
+//            Since one of our goals is to turn telemetry off, we need to be careful about
+//            referencing environment variables during the DLL load and unload path.
+//            Referencing environment variables through ConfigDWORD uses UtilCode.
+//            This roughly translates to InitUtilcode() being called before jitDllOnProcessAttach.
+//
+//            For CLRJIT.dll, compStartup is called on jitOnDllProcessAttach().
+//            This can be called twice through sxsJitStartup -- so prevent double initialization.
+//            UtilCode is init-ed by this time. The same is true for CoreCLR.
+//
+//         2) For CLRJIT.dll and CoreCLR, compShutdown will be called during jitOnDllProcessDetach().
+//
+//    C. Determining the data to collect:
+//
+//         IMPORTANT: Since telemetry data can be collected at any time after DLL load,
+//         make sure you initialize the compiler state variables you access in telemetry
+//         data collection. For example, if you are transmitting method names, then
+//         make sure info.compMethodHnd is initialized at that point.
+//
+//         1) Tracking noway assert count:
+//            After a noway assert is hit, in both min-opts and non-min-opts, we collect
+//            info such as the JIT version, method hash being compiled, filename and
+//            line number etc.
+//
+//         2) Tracking baseline for the noway asserts:
+//            During DLL unload, we report the number of methods that were compiled by
+//            the JIT per process both under normal mode and during min-opts. NOTE that
+//            this is ON for all processes.
+//
+//         3) For the future, be aware of privacy, performance and actionability of the data.
+//
+
+#include "jitpch.h"
+#include "compiler.h"
+
+#ifdef FEATURE_TRACELOGGING
+#include "TraceLoggingProvider.h"
+#include "MicrosoftTelemetry.h"
+#include "clrtraceloggingcommon.h"
+#include "fxver.h"
+
+// Since telemetry code could be called under a noway_assert, make sure,
+// we don't call noway_assert again.
+#undef noway_assert
+
+#define BUILD_STR1(x) #x
+#define BUILD_STR2(x) BUILD_STR1(x)
+#define BUILD_MACHINE BUILD_STR2(__BUILDMACHINE__)
+
+// A DLL local instance of the DotNet provider
+TRACELOGGING_DEFINE_PROVIDER(g_hClrJitProvider,
+                             CLRJIT_PROVIDER_NAME,
+                             CLRJIT_PROVIDER_ID,
+                             TraceLoggingOptionMicrosoftTelemetry());
+
+// Threshold to detect if we are hitting too many bad (noway) methods
+// over good methods per process to prevent logging too much data.
+static const double NOWAY_NOISE_RATIO = 0.6;            // Threshold of (bad / total) beyond which we'd stop
+                                                        // logging. We'd restart if the pass rate improves.
+static const unsigned NOWAY_SUFFICIENCY_THRESHOLD = 25; // Count of methods beyond which we'd apply percent
+                                                        // threshold
+
+// Initialize Telemetry State
+volatile bool   JitTelemetry::s_fProviderRegistered    = false;
+volatile UINT32 JitTelemetry::s_uMethodsCompiled       = 0;
+volatile UINT32 JitTelemetry::s_uMethodsHitNowayAssert = 0;
+
+// Constructor for telemetry state per compiler instance
+JitTelemetry::JitTelemetry()
+{
+    Initialize(nullptr);
+}
+
+//------------------------------------------------------------------------
+// Initialize: Initialize the object with the compiler instance
+//
+//  Description:
+//     Compiler instance may not be fully initialized. If you are
+//     tracking object data for telemetry, make sure they are initialized
+//     in the compiler is ready.
+//
+void JitTelemetry::Initialize(Compiler* c)
+{
+    comp                = c;
+    m_pszAssemblyName   = "";
+    m_pszScopeName      = "";
+    m_pszMethodName     = "";
+    m_uMethodHash       = 0;
+    m_fMethodInfoCached = false;
+}
+
+//------------------------------------------------------------------------
+// IsTelemetryEnabled: Can we perform JIT telemetry
+//
+//  Return Value:
+//      Returns "true" if COMPlus_JitTelemetry environment flag is
+//      non-zero. Else returns "false".
+//
+//
+/* static */
+bool JitTelemetry::IsTelemetryEnabled()
+{
+    return JitConfig.JitTelemetry() != 0;
+}
+
+//------------------------------------------------------------------------
+// NotifyDllProcessAttach: Notification for DLL load and static initializations
+//
+//  Description:
+//     Register telemetry provider with the OS.
+//
+//  Note:
+//     This method can be called twice in NGEN scenario.
+//
+void JitTelemetry::NotifyDllProcessAttach()
+{
+    if (!IsTelemetryEnabled())
+    {
+        return;
+    }
+
+    if (!s_fProviderRegistered)
+    {
+        // Register the provider.
+        TraceLoggingRegister(g_hClrJitProvider);
+        s_fProviderRegistered = true;
+    }
+}
+
+//------------------------------------------------------------------------
+// NotifyDllProcessDetach: Notification for DLL unload and teardown
+//
+//  Description:
+//     Log the methods compiled data if telemetry is enabled and
+//     Unregister telemetry provider with the OS.
+//
+void JitTelemetry::NotifyDllProcessDetach()
+{
+    if (!IsTelemetryEnabled())
+    {
+        return;
+    }
+
+    assert(s_fProviderRegistered); // volatile read
+
+    // Unregister the provider.
+    TraceLoggingUnregister(g_hClrJitProvider);
+}
+
+//------------------------------------------------------------------------
+// NotifyEndOfCompilation: Notification for end of current method
+//     compilation.
+//
+//  Description:
+//      Increment static volatile counters for the current compiled method.
+//      This is slightly inaccurate due to lack of synchronization around
+//      the counters. Inaccuracy is the tradeoff for JITting cost.
+//
+//  Note:
+//      1. Must be called post fully successful compilation of the method.
+//      2. This serves as an effective baseline as how many methods compiled
+//         successfully.
+void JitTelemetry::NotifyEndOfCompilation()
+{
+    if (!IsTelemetryEnabled())
+    {
+        return;
+    }
+
+    s_uMethodsCompiled++; // volatile increment
+}
+
+//------------------------------------------------------------------------
+// NotifyNowayAssert: Notification that a noway handling is under-way.
+//
+//  Arguments:
+//      filename - The JIT source file name's absolute path at the time of
+//                 building the JIT.
+//      line     - The line number where the noway assert was hit.
+//
+//  Description:
+//      If telemetry is enabled, then obtain data to collect from the
+//      compiler or the VM and use the tracelogging APIs to write out.
+//
+void JitTelemetry::NotifyNowayAssert(const char* filename, unsigned line)
+{
+    if (!IsTelemetryEnabled())
+    {
+        return;
+    }
+
+    s_uMethodsHitNowayAssert++;
+
+    // Check if our assumption that noways are rare is invalid for this
+    // process. If so, return early than logging too much data.
+    unsigned noways   = s_uMethodsHitNowayAssert;
+    unsigned attempts = max(1, s_uMethodsCompiled + noways);
+    double   ratio    = (noways / ((double)attempts));
+    if (noways > NOWAY_SUFFICIENCY_THRESHOLD && ratio > NOWAY_NOISE_RATIO)
+    {
+        return;
+    }
+
+    assert(comp);
+
+    UINT32      nowayIndex = s_uMethodsHitNowayAssert;
+    UINT32      codeSize   = 0;
+    INT32       minOpts    = -1;
+    const char* lastPhase  = "";
+    if (comp != nullptr)
+    {
+        codeSize  = comp->info.compILCodeSize;
+        minOpts   = comp->opts.IsMinOptsSet() ? comp->opts.MinOpts() : -1;
+        lastPhase = PhaseNames[comp->previousCompletedPhase];
+    }
+
+    CacheCurrentMethodInfo();
+
+    TraceLoggingWrite(g_hClrJitProvider, "CLRJIT.NowayAssert",
+
+                      TraceLoggingUInt32(codeSize, "IL_CODE_SIZE"), TraceLoggingInt32(minOpts, "MINOPTS_MODE"),
+                      TraceLoggingString(lastPhase, "PREVIOUS_COMPLETED_PHASE"),
+
+                      TraceLoggingString(m_pszAssemblyName, "ASSEMBLY_NAME"),
+                      TraceLoggingString(m_pszMethodName, "METHOD_NAME"),
+                      TraceLoggingString(m_pszScopeName, "METHOD_SCOPE"),
+                      TraceLoggingUInt32(m_uMethodHash, "METHOD_HASH"),
+
+                      TraceLoggingString(filename, "FILENAME"), TraceLoggingUInt32(line, "LINE"),
+                      TraceLoggingUInt32(nowayIndex, "NOWAY_INDEX"),
+
+                      TraceLoggingString(TARGET_READABLE_NAME, "ARCH"),
+                      TraceLoggingString(VER_FILEVERSION_STR, "VERSION"), TraceLoggingString(BUILD_MACHINE, "BUILD"),
+                      TraceLoggingString(VER_COMMENTS_STR, "FLAVOR"),
+
+                      TraceLoggingKeyword(MICROSOFT_KEYWORD_TELEMETRY));
+}
+
+//------------------------------------------------------------------------
+// CacheCurrentMethodInfo: Cache the method/assembly/scope name info.
+//
+//  Description:
+//      Obtain the method information if not already cached, for the
+//      method under compilation from the compiler. This includes:
+//
+//          Method name, assembly name, scope name, method hash.
+//
+void JitTelemetry::CacheCurrentMethodInfo()
+{
+    if (m_fMethodInfoCached)
+    {
+        return;
+    }
+
+    assert(comp);
+    if (comp != nullptr)
+    {
+        comp->compGetTelemetryDefaults(&m_pszAssemblyName, &m_pszScopeName, &m_pszMethodName, &m_uMethodHash);
+        assert(m_pszAssemblyName);
+        assert(m_pszScopeName);
+        assert(m_pszMethodName);
+    }
+
+    // Set cached to prevent getting this twice.
+    m_fMethodInfoCached = true;
+}
+
+//------------------------------------------------------------------------
+// compGetTelemetryDefaults: Obtain information specific to telemetry
+//      from the JIT-interface.
+//
+//  Arguments:
+//      assemblyName - Pointer to hold assembly name upon return
+//      scopeName    - Pointer to hold scope name upon return
+//      methodName   - Pointer to hold method name upon return
+//      methodHash   - Pointer to hold method hash upon return
+//
+//  Description:
+//      Obtains from the JIT EE interface the information for the
+//      current method under compilation.
+//
+//  Warning:
+//      The eeGetMethodName call could be expensive for generic
+//      methods, so call this method only when there is less impact
+//      to throughput.
+//
+void Compiler::compGetTelemetryDefaults(const char** assemblyName,
+                                        const char** scopeName,
+                                        const char** methodName,
+                                        unsigned*    methodHash)
+{
+    if (info.compMethodHnd != nullptr)
+    {
+        __try
+        {
+
+            // Expensive calls, call infrequently or in exceptional scenarios.
+            *methodHash = info.compCompHnd->getMethodHash(info.compMethodHnd);
+            *methodName = eeGetMethodName(info.compMethodHnd, scopeName);
+
+            // SuperPMI needs to implement record/replay of these method calls.
+            *assemblyName = info.compCompHnd->getAssemblyName(
+                info.compCompHnd->getModuleAssembly(info.compCompHnd->getClassModule(info.compClassHnd)));
+        }
+        __except (EXCEPTION_EXECUTE_HANDLER)
+        {
+        }
+    }
+
+    // If the JIT interface methods init-ed these values to nullptr,
+    // make sure they are set to empty string.
+    if (*methodName == nullptr)
+    {
+        *methodName = "";
+    }
+    if (*scopeName == nullptr)
+    {
+        *scopeName = "";
+    }
+    if (*assemblyName == nullptr)
+    {
+        *assemblyName = "";
+    }
+}
+
+#endif // FEATURE_TRACELOGGING
diff --git a/src/jit/jittelemetry.h b/src/jit/jittelemetry.h
new file mode 100644
index 0000000000..24a0ce7b5d
--- /dev/null
+++ b/src/jit/jittelemetry.h
@@ -0,0 +1,78 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+// <OWNER>clrjit</OWNER>
+#pragma once
+
+#ifdef FEATURE_TRACELOGGING
+
+class Compiler;
+
+class JitTelemetry
+{
+public:
+    // Notify DLL load.
+    static void NotifyDllProcessAttach();
+
+    // Notify DLL unload.
+    static void NotifyDllProcessDetach();
+
+    // Constructor
+    JitTelemetry();
+
+    // Initialize with compiler instance
+    void Initialize(Compiler* comp);
+
+    // Notification of end of compilation of the current method.
+    void NotifyEndOfCompilation();
+
+    // Notification of noway_assert.
+    void NotifyNowayAssert(const char* filename, unsigned line);
+
+    // Is telemetry enabled through COMPlus_JitTelemetry?
+    static bool IsTelemetryEnabled();
+
+private:
+    // Obtain current method information from VM and cache for
+    // future uses.
+    void CacheCurrentMethodInfo();
+
+    //
+    //--------------------------------------------------------------------------------
+    // The below per process counters are updated without synchronization or
+    // thread-safety to avoid interfering with the JIT throughput. Accuracy
+    // of these counters will be traded-off for throughput.
+    //
+
+    // Methods compiled per DLL unload
+    static volatile UINT32 s_uMethodsCompiled;
+
+    // Methods compiled per DLL unload that hit noway assert (per process)
+    static volatile UINT32 s_uMethodsHitNowayAssert;
+    //--------------------------------------------------------------------------------
+
+    // Has the provider been registered already (per process)
+    static volatile bool s_fProviderRegistered;
+
+    // Cached value of current method hash.
+    unsigned m_uMethodHash;
+
+    // Cached value of current assembly name.
+    const char* m_pszAssemblyName;
+
+    // Cached value of current scope name, i.e., "Program.Foo" in "Program.Foo:Main"
+    const char* m_pszScopeName;
+
+    // Cached value of current method name, i.e., "Main" in "Program.Foo:Main"
+    const char* m_pszMethodName;
+
+    // Have we already cached the method/scope/assembly names?
+    bool m_fMethodInfoCached;
+
+    // Compiler instance.
+    Compiler* comp;
+};
+
+#endif // FEATURE_TRACELOGGING
diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp
new file mode 100644
index 0000000000..369c96322d
--- /dev/null
+++ b/src/jit/lclvars.cpp
@@ -0,0 +1,6788 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                           LclVarsInfo                                     XX
+XX                                                                           XX
+XX   The variables to be used by the code generator.                         XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+#include "emit.h"
+
+#include "register_arg_convention.h"
+
+/*****************************************************************************/
+
+#ifdef DEBUG
+#if DOUBLE_ALIGN
+/* static */
+unsigned Compiler::s_lvaDoubleAlignedProcsCount = 0;
+#endif
+#endif
+
+/*****************************************************************************/
+
+void Compiler::lvaInit()
+{
+    /* We haven't allocated stack variables yet */
+    lvaRefCountingStarted = false;
+    lvaLocalVarRefCounted = false;
+
+    lvaSortAgain    = false; // false: We don't need to call lvaSortOnly()
+    lvaTrackedFixed = false; // false: We can still add new tracked variables
+
+    lvaDoneFrameLayout = NO_FRAME_LAYOUT;
+#if !FEATURE_EH_FUNCLETS
+    lvaShadowSPslotsVar = BAD_VAR_NUM;
+#endif // !FEATURE_EH_FUNCLETS
+    lvaInlinedPInvokeFrameVar = BAD_VAR_NUM;
+    lvaReversePInvokeFrameVar = BAD_VAR_NUM;
+#if FEATURE_FIXED_OUT_ARGS
+    lvaPInvokeFrameRegSaveVar = BAD_VAR_NUM;
+    lvaOutgoingArgSpaceVar    = BAD_VAR_NUM;
+#endif // FEATURE_FIXED_OUT_ARGS
+#ifdef _TARGET_ARM_
+    lvaPromotedStructAssemblyScratchVar = BAD_VAR_NUM;
+#endif // _TARGET_ARM_
+    lvaLocAllocSPvar    = BAD_VAR_NUM;
+    lvaNewObjArrayArgs  = BAD_VAR_NUM;
+    lvaGSSecurityCookie = BAD_VAR_NUM;
+#ifdef _TARGET_X86_
+    lvaVarargsBaseOfStkArgs = BAD_VAR_NUM;
+#endif // _TARGET_X86_
+    lvaVarargsHandleArg = BAD_VAR_NUM;
+    lvaSecurityObject   = BAD_VAR_NUM;
+    lvaStubArgumentVar  = BAD_VAR_NUM;
+    lvaArg0Var          = BAD_VAR_NUM;
+    lvaMonAcquired      = BAD_VAR_NUM;
+
+    lvaInlineeReturnSpillTemp = BAD_VAR_NUM;
+
+    gsShadowVarInfo = nullptr;
+#if FEATURE_EH_FUNCLETS
+    lvaPSPSym = BAD_VAR_NUM;
+#endif
+#if FEATURE_SIMD
+    lvaSIMDInitTempVarNum = BAD_VAR_NUM;
+#endif // FEATURE_SIMD
+    lvaCurEpoch = 0;
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+    lvaFirstStackIncomingArgNum = BAD_VAR_NUM;
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+}
+
+/*****************************************************************************/
+
+void Compiler::lvaInitTypeRef()
+{
+
+    /* x86 args look something like this:
+        [this ptr] [hidden return buffer] [declared arguments]* [generic context] [var arg cookie]
+
+       x64 is closer to the native ABI:
+        [this ptr] [hidden return buffer] [generic context] [var arg cookie] [declared arguments]*
+        (Note: prior to .NET Framework 4.5.1 for Windows 8.1 (but not .NET Framework 4.5.1 "downlevel"),
+        the "hidden return buffer" came before the "this ptr". Now, the "this ptr" comes first. This
+        is different from the C++ order, where the "hidden return buffer" always comes first.)
+
+       ARM and ARM64 are the same as the current x64 convention:
+        [this ptr] [hidden return buffer] [generic context] [var arg cookie] [declared arguments]*
+
+       Key difference:
+           The var arg cookie and generic context are swapped with respect to the user arguments
+    */
+
+    /* Set compArgsCount and compLocalsCount */
+
+    info.compArgsCount = info.compMethodInfo->args.numArgs;
+
+    // Is there a 'this' pointer
+
+    if (!info.compIsStatic)
+    {
+        info.compArgsCount++;
+    }
+    else
+    {
+        info.compThisArg = BAD_VAR_NUM;
+    }
+
+    info.compILargsCount = info.compArgsCount;
+
+#ifdef FEATURE_SIMD
+    if (featureSIMD && (info.compRetNativeType == TYP_STRUCT))
+    {
+        var_types structType = impNormStructType(info.compMethodInfo->args.retTypeClass);
+        info.compRetType     = structType;
+    }
+#endif // FEATURE_SIMD
+
+    // Are we returning a struct using a return buffer argument?
+    //
+    const bool hasRetBuffArg = impMethodInfo_hasRetBuffArg(info.compMethodInfo);
+
+    // Possibly change the compRetNativeType from TYP_STRUCT to a "primitive" type
+    // when we are returning a struct by value and it fits in one register
+    //
+    if (!hasRetBuffArg && varTypeIsStruct(info.compRetNativeType))
+    {
+        CORINFO_CLASS_HANDLE retClsHnd = info.compMethodInfo->args.retTypeClass;
+
+        Compiler::structPassingKind howToReturnStruct;
+        var_types                   returnType = getReturnTypeForStruct(retClsHnd, &howToReturnStruct);
+
+        if (howToReturnStruct == SPK_PrimitiveType)
+        {
+            assert(returnType != TYP_UNKNOWN);
+            assert(returnType != TYP_STRUCT);
+
+            info.compRetNativeType = returnType;
+
+            // ToDo: Refactor this common code sequence into its own method as it is used 4+ times
+            if ((returnType == TYP_LONG) && (compLongUsed == false))
+            {
+                compLongUsed = true;
+            }
+            else if (((returnType == TYP_FLOAT) || (returnType == TYP_DOUBLE)) && (compFloatingPointUsed == false))
+            {
+                compFloatingPointUsed = true;
+            }
+        }
+    }
+
+    // Do we have a RetBuffArg?
+
+    if (hasRetBuffArg)
+    {
+        info.compArgsCount++;
+    }
+    else
+    {
+        info.compRetBuffArg = BAD_VAR_NUM;
+    }
+
+    /* There is a 'hidden' cookie pushed last when the
+       calling convention is varargs */
+
+    if (info.compIsVarArgs)
+    {
+        info.compArgsCount++;
+    }
+
+    // Is there an extra parameter used to pass instantiation info to
+    // shared generic methods and shared generic struct instance methods?
+    if (info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE)
+    {
+        info.compArgsCount++;
+    }
+    else
+    {
+        info.compTypeCtxtArg = BAD_VAR_NUM;
+    }
+
+    lvaCount = info.compLocalsCount = info.compArgsCount + info.compMethodInfo->locals.numArgs;
+
+    info.compILlocalsCount = info.compILargsCount + info.compMethodInfo->locals.numArgs;
+
+    /* Now allocate the variable descriptor table */
+
+    if (compIsForInlining())
+    {
+        lvaTable    = impInlineInfo->InlinerCompiler->lvaTable;
+        lvaCount    = impInlineInfo->InlinerCompiler->lvaCount;
+        lvaTableCnt = impInlineInfo->InlinerCompiler->lvaTableCnt;
+
+        // No more stuff needs to be done.
+        return;
+    }
+
+    lvaTableCnt = lvaCount * 2;
+
+    if (lvaTableCnt < 16)
+    {
+        lvaTableCnt = 16;
+    }
+
+    lvaTable         = (LclVarDsc*)compGetMemArray(lvaTableCnt, sizeof(*lvaTable), CMK_LvaTable);
+    size_t tableSize = lvaTableCnt * sizeof(*lvaTable);
+    memset(lvaTable, 0, tableSize);
+    for (unsigned i = 0; i < lvaTableCnt; i++)
+    {
+        new (&lvaTable[i], jitstd::placement_t()) LclVarDsc(this); // call the constructor.
+    }
+
+    //-------------------------------------------------------------------------
+    // Count the arguments and initialize the respective lvaTable[] entries
+    //
+    // First the implicit arguments
+    //-------------------------------------------------------------------------
+
+    InitVarDscInfo varDscInfo;
+    varDscInfo.Init(lvaTable, hasRetBuffArg);
+
+    lvaInitArgs(&varDscInfo);
+
+    //-------------------------------------------------------------------------
+    // Finally the local variables
+    //-------------------------------------------------------------------------
+
+    unsigned                varNum    = varDscInfo.varNum;
+    LclVarDsc*              varDsc    = varDscInfo.varDsc;
+    CORINFO_ARG_LIST_HANDLE localsSig = info.compMethodInfo->locals.args;
+
+    for (unsigned i = 0; i < info.compMethodInfo->locals.numArgs;
+         i++, varNum++, varDsc++, localsSig = info.compCompHnd->getArgNext(localsSig))
+    {
+        CORINFO_CLASS_HANDLE typeHnd;
+        CorInfoTypeWithMod   corInfoType =
+            info.compCompHnd->getArgType(&info.compMethodInfo->locals, localsSig, &typeHnd);
+        lvaInitVarDsc(varDsc, varNum, strip(corInfoType), typeHnd, localsSig, &info.compMethodInfo->locals);
+
+        varDsc->lvPinned  = ((corInfoType & CORINFO_TYPE_MOD_PINNED) != 0);
+        varDsc->lvOnFrame = true; // The final home for this local variable might be our local stack frame
+    }
+
+    if ( // If there already exist unsafe buffers, don't mark more structs as unsafe
+        // as that will cause them to be placed along with the real unsafe buffers,
+        // unnecessarily exposing them to overruns. This can affect GS tests which
+        // intentionally do buffer-overruns.
+        !getNeedsGSSecurityCookie() &&
+        // GS checks require the stack to be re-ordered, which can't be done with EnC
+        !opts.compDbgEnC && compStressCompile(STRESS_UNSAFE_BUFFER_CHECKS, 25))
+    {
+        setNeedsGSSecurityCookie();
+        compGSReorderStackLayout = true;
+
+        for (unsigned i = 0; i < lvaCount; i++)
+        {
+            if ((lvaTable[i].lvType == TYP_STRUCT) && compStressCompile(STRESS_GENERIC_VARN, 60))
+            {
+                lvaTable[i].lvIsUnsafeBuffer = true;
+            }
+        }
+    }
+
+    if (getNeedsGSSecurityCookie())
+    {
+        // Ensure that there will be at least one stack variable since
+        // we require that the GSCookie does not have a 0 stack offset.
+        unsigned dummy         = lvaGrabTempWithImplicitUse(false DEBUGARG("GSCookie dummy"));
+        lvaTable[dummy].lvType = TYP_INT;
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        lvaTableDump(INITIAL_FRAME_LAYOUT);
+    }
+#endif
+}
+
+/*****************************************************************************/
+void Compiler::lvaInitArgs(InitVarDscInfo* varDscInfo)
+{
+    compArgSize = 0;
+
+#if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
+    // Prespill all argument regs on to stack in case of Arm when under profiler.
+    if (compIsProfilerHookNeeded())
+    {
+        codeGen->regSet.rsMaskPreSpillRegArg |= RBM_ARG_REGS;
+    }
+#endif
+
+    //----------------------------------------------------------------------
+
+    /* Is there a "this" pointer ? */
+    lvaInitThisPtr(varDscInfo);
+
+    /* If we have a hidden return-buffer parameter, that comes here */
+    lvaInitRetBuffArg(varDscInfo);
+
+//======================================================================
+
+#if USER_ARGS_COME_LAST
+    //@GENERICS: final instantiation-info argument for shared generic methods
+    // and shared generic struct instance methods
+    lvaInitGenericsCtxt(varDscInfo);
+
+    /* If the method is varargs, process the varargs cookie */
+    lvaInitVarArgsHandle(varDscInfo);
+#endif
+
+    //-------------------------------------------------------------------------
+    // Now walk the function signature for the explicit user arguments
+    //-------------------------------------------------------------------------
+    lvaInitUserArgs(varDscInfo);
+
+#if !USER_ARGS_COME_LAST
+    //@GENERICS: final instantiation-info argument for shared generic methods
+    // and shared generic struct instance methods
+    lvaInitGenericsCtxt(varDscInfo);
+
+    /* If the method is varargs, process the varargs cookie */
+    lvaInitVarArgsHandle(varDscInfo);
+#endif
+
+    //----------------------------------------------------------------------
+
+    // We have set info.compArgsCount in compCompile()
+    noway_assert(varDscInfo->varNum == info.compArgsCount);
+    assert(varDscInfo->intRegArgNum <= MAX_REG_ARG);
+
+    codeGen->intRegState.rsCalleeRegArgCount = varDscInfo->intRegArgNum;
+#if !FEATURE_STACK_FP_X87
+    codeGen->floatRegState.rsCalleeRegArgCount = varDscInfo->floatRegArgNum;
+#endif // FEATURE_STACK_FP_X87
+
+    // The total argument size must be aligned.
+    noway_assert((compArgSize % sizeof(void*)) == 0);
+
+#ifdef _TARGET_X86_
+    /* We can not pass more than 2^16 dwords as arguments as the "ret"
+       instruction can only pop 2^16 arguments. Could be handled correctly
+       but it will be very difficult for fully interruptible code */
+
+    if (compArgSize != (size_t)(unsigned short)compArgSize)
+        NO_WAY("Too many arguments for the \"ret\" instruction to pop");
+#endif
+}
+
+/*****************************************************************************/
+void Compiler::lvaInitThisPtr(InitVarDscInfo* varDscInfo)
+{
+    LclVarDsc* varDsc = varDscInfo->varDsc;
+    if (!info.compIsStatic)
+    {
+        varDsc->lvIsParam = 1;
+#if ASSERTION_PROP
+        varDsc->lvSingleDef = 1;
+#endif
+
+        varDsc->lvIsPtr = 1;
+
+        lvaArg0Var = info.compThisArg = varDscInfo->varNum;
+        noway_assert(info.compThisArg == 0);
+
+        if (eeIsValueClass(info.compClassHnd))
+        {
+            varDsc->lvType = TYP_BYREF;
+#ifdef FEATURE_SIMD
+            if (featureSIMD)
+            {
+                var_types simdBaseType = TYP_UNKNOWN;
+                var_types type         = impNormStructType(info.compClassHnd, nullptr, nullptr, &simdBaseType);
+                if (simdBaseType != TYP_UNKNOWN)
+                {
+                    assert(varTypeIsSIMD(type));
+                    varDsc->lvSIMDType = true;
+                    varDsc->lvBaseType = simdBaseType;
+                }
+            }
+#endif // FEATURE_SIMD
+        }
+        else
+        {
+            varDsc->lvType = TYP_REF;
+        }
+
+        if (tiVerificationNeeded)
+        {
+            varDsc->lvVerTypeInfo = verMakeTypeInfo(info.compClassHnd);
+
+            if (varDsc->lvVerTypeInfo.IsValueClass())
+            {
+                varDsc->lvVerTypeInfo.MakeByRef();
+            }
+        }
+        else
+        {
+            varDsc->lvVerTypeInfo = typeInfo();
+        }
+
+        // Mark the 'this' pointer for the method
+        varDsc->lvVerTypeInfo.SetIsThisPtr();
+
+        varDsc->lvIsRegArg = 1;
+        noway_assert(varDscInfo->intRegArgNum == 0);
+
+        varDsc->lvArgReg = genMapRegArgNumToRegNum(varDscInfo->allocRegArg(TYP_INT), varDsc->TypeGet());
+#if FEATURE_MULTIREG_ARGS
+        varDsc->lvOtherArgReg = REG_NA;
+#endif
+        varDsc->setPrefReg(varDsc->lvArgReg, this);
+        varDsc->lvOnFrame = true; // The final home for this incoming register might be our local stack frame
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("'this'    passed in register %s\n", getRegName(varDsc->lvArgReg));
+        }
+#endif
+        compArgSize += TARGET_POINTER_SIZE;
+
+        varDscInfo->varNum++;
+        varDscInfo->varDsc++;
+    }
+}
+
+/*****************************************************************************/
+void Compiler::lvaInitRetBuffArg(InitVarDscInfo* varDscInfo)
+{
+    LclVarDsc* varDsc        = varDscInfo->varDsc;
+    bool       hasRetBuffArg = impMethodInfo_hasRetBuffArg(info.compMethodInfo);
+
+    // These two should always match
+    noway_assert(hasRetBuffArg == varDscInfo->hasRetBufArg);
+
+    if (hasRetBuffArg)
+    {
+        info.compRetBuffArg = varDscInfo->varNum;
+        varDsc->lvType      = TYP_BYREF;
+        varDsc->lvIsParam   = 1;
+        varDsc->lvIsRegArg  = 1;
+#if ASSERTION_PROP
+        varDsc->lvSingleDef = 1;
+#endif
+        if (hasFixedRetBuffReg())
+        {
+            varDsc->lvArgReg = theFixedRetBuffReg();
+        }
+        else
+        {
+            unsigned retBuffArgNum = varDscInfo->allocRegArg(TYP_INT);
+            varDsc->lvArgReg       = genMapIntRegArgNumToRegNum(retBuffArgNum);
+        }
+
+#if FEATURE_MULTIREG__ARGS
+        varDsc->lvOtherArgReg = REG_NA;
+#endif
+        varDsc->setPrefReg(varDsc->lvArgReg, this);
+        varDsc->lvOnFrame = true; // The final home for this incoming register might be our local stack frame
+
+        info.compRetBuffDefStack = 0;
+        if (info.compRetType == TYP_STRUCT)
+        {
+            CORINFO_SIG_INFO sigInfo;
+            info.compCompHnd->getMethodSig(info.compMethodHnd, &sigInfo);
+            assert(JITtype2varType(sigInfo.retType) == info.compRetType); // Else shouldn't have a ret buff.
+
+            info.compRetBuffDefStack =
+                (info.compCompHnd->isStructRequiringStackAllocRetBuf(sigInfo.retTypeClass) == TRUE);
+            if (info.compRetBuffDefStack)
+            {
+                // If we're assured that the ret buff argument points into a callers stack, we will type it as
+                // "TYP_I_IMPL"
+                // (native int/unmanaged pointer) so that it's not tracked as a GC ref.
+                varDsc->lvType = TYP_I_IMPL;
+            }
+        }
+        assert(isValidIntArgReg(varDsc->lvArgReg));
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("'__retBuf'  passed in register %s\n", getRegName(varDsc->lvArgReg));
+        }
+#endif
+
+        /* Update the total argument size, count and varDsc */
+
+        compArgSize += TARGET_POINTER_SIZE;
+        varDscInfo->varNum++;
+        varDscInfo->varDsc++;
+    }
+}
+
+/*****************************************************************************/
+void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo)
+{
+//-------------------------------------------------------------------------
+// Walk the function signature for the explicit arguments
+//-------------------------------------------------------------------------
+
+#if defined(_TARGET_X86_)
+    // Only (some of) the implicit args are enregistered for varargs
+    varDscInfo->maxIntRegArgNum = info.compIsVarArgs ? varDscInfo->intRegArgNum : MAX_REG_ARG;
+#elif defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
+    // On System V type environment the float registers are not indexed together with the int ones.
+    varDscInfo->floatRegArgNum = varDscInfo->intRegArgNum;
+#endif // _TARGET_*
+
+    CORINFO_ARG_LIST_HANDLE argLst = info.compMethodInfo->args.args;
+
+    const unsigned argSigLen = info.compMethodInfo->args.numArgs;
+
+    regMaskTP doubleAlignMask = RBM_NONE;
+    for (unsigned i = 0; i < argSigLen;
+         i++, varDscInfo->varNum++, varDscInfo->varDsc++, argLst = info.compCompHnd->getArgNext(argLst))
+    {
+        LclVarDsc*           varDsc  = varDscInfo->varDsc;
+        CORINFO_CLASS_HANDLE typeHnd = nullptr;
+
+        CorInfoTypeWithMod corInfoType = info.compCompHnd->getArgType(&info.compMethodInfo->args, argLst, &typeHnd);
+        varDsc->lvIsParam              = 1;
+#if ASSERTION_PROP
+        varDsc->lvSingleDef = 1;
+#endif
+
+        lvaInitVarDsc(varDsc, varDscInfo->varNum, strip(corInfoType), typeHnd, argLst, &info.compMethodInfo->args);
+
+        // For ARM, ARM64, and AMD64 varargs, all arguments go in integer registers
+        var_types argType     = mangleVarArgsType(varDsc->TypeGet());
+        var_types origArgType = argType;
+        // ARM softfp calling convention should affect only the floating point arguments.
+        // Otherwise there appear too many surplus pre-spills and other memory operations
+        // with the associated locations .
+        bool      isSoftFPPreSpill = opts.compUseSoftFP && varTypeIsFloating(varDsc->TypeGet());
+        unsigned  argSize          = eeGetArgSize(argLst, &info.compMethodInfo->args);
+        unsigned  cSlots           = argSize / TARGET_POINTER_SIZE; // the total number of slots of this argument
+        bool      isHfaArg         = false;
+        var_types hfaType          = TYP_UNDEF;
+
+        // Methods that use VarArg or SoftFP cannot have HFA arguments
+        if (!info.compIsVarArgs && !opts.compUseSoftFP)
+        {
+            // If the argType is a struct, then check if it is an HFA
+            if (varTypeIsStruct(argType))
+            {
+                hfaType  = GetHfaType(typeHnd); // set to float or double if it is an HFA, otherwise TYP_UNDEF
+                isHfaArg = varTypeIsFloating(hfaType);
+            }
+        }
+        if (isHfaArg)
+        {
+            // We have an HFA argument, so from here on out treat the type as a float or double.
+            // The orginal struct type is available by using origArgType
+            // We also update the cSlots to be the number of float/double fields in the HFA
+            argType = hfaType;
+            cSlots  = varDsc->lvHfaSlots();
+        }
+        // The number of slots that must be enregistered if we are to consider this argument enregistered.
+        // This is normally the same as cSlots, since we normally either enregister the entire object,
+        // or none of it. For structs on ARM, however, we only need to enregister a single slot to consider
+        // it enregistered, as long as we can split the rest onto the stack.
+        unsigned cSlotsToEnregister = cSlots;
+
+#ifdef _TARGET_ARM_
+        // On ARM we pass the first 4 words of integer arguments and non-HFA structs in registers.
+        // But we pre-spill user arguments in varargs methods and structs.
+        //
+        unsigned cAlign;
+        bool     preSpill = info.compIsVarArgs || isSoftFPPreSpill;
+
+        switch (origArgType)
+        {
+            case TYP_STRUCT:
+                assert(varDsc->lvSize() == argSize);
+                cAlign = varDsc->lvStructDoubleAlign ? 2 : 1;
+
+                // HFA arguments go on the stack frame. They don't get spilled in the prolog like struct
+                // arguments passed in the integer registers but get homed immediately after the prolog.
+                if (!isHfaArg)
+                {
+                    cSlotsToEnregister = 1; // HFAs must be totally enregistered or not, but other structs can be split.
+                    preSpill           = true;
+                }
+                break;
+
+            case TYP_DOUBLE:
+            case TYP_LONG:
+                cAlign = 2;
+                break;
+
+            default:
+                cAlign = 1;
+                break;
+        }
+
+        if (isRegParamType(argType))
+        {
+            compArgSize += varDscInfo->alignReg(argType, cAlign) * REGSIZE_BYTES;
+        }
+
+        if (argType == TYP_STRUCT)
+        {
+            // Are we going to split the struct between registers and stack? We can do that as long as
+            // no floating-point arguments have been put on the stack.
+            //
+            // From the ARM Procedure Call Standard:
+            // Rule C.5: "If the NCRN is less than r4 **and** the NSAA is equal to the SP,"
+            // then split the argument between registers and stack. Implication: if something
+            // has already been spilled to the stack, then anything that would normally be
+            // split between the core registers and the stack will be put on the stack.
+            // Anything that follows will also be on the stack. However, if something from
+            // floating point regs has been spilled to the stack, we can still use r0-r3 until they are full.
+
+            if (varDscInfo->canEnreg(TYP_INT, 1) &&       // The beginning of the struct can go in a register
+                !varDscInfo->canEnreg(TYP_INT, cSlots) && // The end of the struct can't fit in a register
+                varDscInfo->existAnyFloatStackArgs())     // There's at least one stack-based FP arg already
+            {
+                varDscInfo->setAllRegArgUsed(TYP_INT); // Prevent all future use of integer registers
+                preSpill = false;                      // This struct won't be prespilled, since it will go on the stack
+            }
+        }
+
+        if (preSpill)
+        {
+            for (unsigned ix = 0; ix < cSlots; ix++)
+            {
+                if (!varDscInfo->canEnreg(TYP_INT, ix + 1))
+                {
+                    break;
+                }
+                regMaskTP regMask = genMapArgNumToRegMask(varDscInfo->regArgNum(TYP_INT) + ix, TYP_INT);
+                if (cAlign == 2)
+                {
+                    doubleAlignMask |= regMask;
+                }
+                codeGen->regSet.rsMaskPreSpillRegArg |= regMask;
+            }
+        }
+        else
+        {
+            varDsc->lvOnFrame = true; // The final home for this incoming register might be our local stack frame
+        }
+
+#else // !_TARGET_ARM_
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+        SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+        if (varTypeIsStruct(argType))
+        {
+            assert(typeHnd != nullptr);
+            eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
+            if (structDesc.passedInRegisters)
+            {
+                unsigned intRegCount   = 0;
+                unsigned floatRegCount = 0;
+
+                for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
+                {
+                    if (structDesc.IsIntegralSlot(i))
+                    {
+                        intRegCount++;
+                    }
+                    else if (structDesc.IsSseSlot(i))
+                    {
+                        floatRegCount++;
+                    }
+                    else
+                    {
+                        assert(false && "Invalid eightbyte classification type.");
+                        break;
+                    }
+                }
+
+                if (intRegCount != 0 && !varDscInfo->canEnreg(TYP_INT, intRegCount))
+                {
+                    structDesc.passedInRegisters = false; // No register to enregister the eightbytes.
+                }
+
+                if (floatRegCount != 0 && !varDscInfo->canEnreg(TYP_FLOAT, floatRegCount))
+                {
+                    structDesc.passedInRegisters = false; // No register to enregister the eightbytes.
+                }
+            }
+        }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+        // The final home for this incoming register might be our local stack frame.
+        // For System V platforms the final home will always be on the local stack frame.
+        varDsc->lvOnFrame = true;
+
+#endif // !_TARGET_ARM_
+
+        bool canPassArgInRegisters = false;
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+        if (varTypeIsStruct(argType))
+        {
+            canPassArgInRegisters = structDesc.passedInRegisters;
+        }
+        else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+        {
+            canPassArgInRegisters = varDscInfo->canEnreg(argType, cSlotsToEnregister);
+        }
+
+        if (canPassArgInRegisters)
+        {
+            /* Another register argument */
+
+            // Allocate the registers we need. allocRegArg() returns the first argument register number of the set.
+            // For non-HFA structs, we still "try" to enregister the whole thing; it will just max out if splitting
+            // to the stack happens.
+            unsigned firstAllocatedRegArgNum = 0;
+
+#if FEATURE_MULTIREG_ARGS
+            varDsc->lvOtherArgReg = REG_NA;
+#endif // FEATURE_MULTIREG_ARGS
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+            unsigned  secondAllocatedRegArgNum = 0;
+            var_types firstEightByteType       = TYP_UNDEF;
+            var_types secondEightByteType      = TYP_UNDEF;
+
+            if (varTypeIsStruct(argType))
+            {
+                if (structDesc.eightByteCount >= 1)
+                {
+                    firstEightByteType      = GetEightByteType(structDesc, 0);
+                    firstAllocatedRegArgNum = varDscInfo->allocRegArg(firstEightByteType, 1);
+                }
+            }
+            else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+            {
+                firstAllocatedRegArgNum = varDscInfo->allocRegArg(argType, cSlots);
+            }
+
+            if (isHfaArg)
+            {
+                // We need to save the fact that this HFA is enregistered
+                varDsc->lvSetIsHfa();
+                varDsc->lvSetIsHfaRegArg();
+                varDsc->SetHfaType(hfaType);
+                varDsc->lvIsMultiRegArg = (varDsc->lvHfaSlots() > 1);
+            }
+
+            varDsc->lvIsRegArg = 1;
+
+#if FEATURE_MULTIREG_ARGS
+            if (varTypeIsStruct(argType))
+            {
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+                varDsc->lvArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum, firstEightByteType);
+
+                // If there is a second eightbyte, get a register for it too and map the arg to the reg number.
+                if (structDesc.eightByteCount >= 2)
+                {
+                    secondEightByteType      = GetEightByteType(structDesc, 1);
+                    secondAllocatedRegArgNum = varDscInfo->allocRegArg(secondEightByteType, 1);
+                }
+
+                if (secondEightByteType != TYP_UNDEF)
+                {
+                    varDsc->lvOtherArgReg = genMapRegArgNumToRegNum(secondAllocatedRegArgNum, secondEightByteType);
+                    varDsc->addPrefReg(genRegMask(varDsc->lvOtherArgReg), this);
+                }
+#else // ARM32 or ARM64
+                varDsc->lvArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum, TYP_I_IMPL);
+#ifdef _TARGET_ARM64_
+                if (cSlots == 2)
+                {
+                    varDsc->lvOtherArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum + 1, TYP_I_IMPL);
+                    varDsc->addPrefReg(genRegMask(varDsc->lvOtherArgReg), this);
+                }
+#endif //  _TARGET_ARM64_
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+            }
+            else
+#endif // FEATURE_MULTIREG_ARGS
+            {
+                varDsc->lvArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum, argType);
+            }
+
+            varDsc->setPrefReg(varDsc->lvArgReg, this);
+
+#ifdef _TARGET_ARM_
+            if (varDsc->TypeGet() == TYP_LONG)
+            {
+                varDsc->lvOtherReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum + 1, TYP_INT);
+                varDsc->addPrefReg(genRegMask(varDsc->lvOtherReg), this);
+            }
+#endif // _TARGET_ARM_
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("Arg #%u    passed in register(s) ", varDscInfo->varNum);
+                bool isFloat = false;
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+                // In case of one eightbyte struct the type is already normalized earlier.
+                // The varTypeIsFloating(argType) is good for this case.
+                if (varTypeIsStruct(argType) && (structDesc.eightByteCount >= 1))
+                {
+                    isFloat = varTypeIsFloating(firstEightByteType);
+                }
+                else
+#else  // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+                {
+                    isFloat = varTypeIsFloating(argType);
+                }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+                    if (varTypeIsStruct(argType))
+                {
+                    // Print both registers, just to be clear
+                    if (firstEightByteType == TYP_UNDEF)
+                    {
+                        printf("firstEightByte: <not used>");
+                    }
+                    else
+                    {
+                        printf("firstEightByte: %s",
+                               getRegName(genMapRegArgNumToRegNum(firstAllocatedRegArgNum, firstEightByteType),
+                                          isFloat));
+                    }
+
+                    if (secondEightByteType == TYP_UNDEF)
+                    {
+                        printf(", secondEightByte: <not used>");
+                    }
+                    else
+                    {
+                        printf(", secondEightByte: %s",
+                               getRegName(genMapRegArgNumToRegNum(secondAllocatedRegArgNum, secondEightByteType),
+                                          varTypeIsFloating(secondEightByteType)));
+                    }
+                }
+                else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+                {
+                    unsigned regArgNum = genMapRegNumToRegArgNum(varDsc->lvArgReg, argType);
+
+                    for (unsigned ix = 0; ix < cSlots; ix++, regArgNum++)
+                    {
+                        if (ix > 0)
+                        {
+                            printf(",");
+                        }
+
+                        if (!isFloat && (regArgNum >= varDscInfo->maxIntRegArgNum)) // a struct has been split between
+                                                                                    // registers and stack
+                        {
+                            printf(" stack slots:%d", cSlots - ix);
+                            break;
+                        }
+
+#ifdef _TARGET_ARM_
+                        if (isFloat)
+                        {
+                            // Print register size prefix
+                            if (argType == TYP_DOUBLE)
+                            {
+                                // Print both registers, just to be clear
+                                printf("%s/%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType), isFloat),
+                                       getRegName(genMapRegArgNumToRegNum(regArgNum + 1, argType), isFloat));
+
+                                // doubles take 2 slots
+                                assert(ix + 1 < cSlots);
+                                ++ix;
+                                ++regArgNum;
+                            }
+                            else
+                            {
+                                printf("%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType), isFloat));
+                            }
+                        }
+                        else
+#endif // _TARGET_ARM_
+                        {
+                            printf("%s", getRegName(genMapRegArgNumToRegNum(regArgNum, argType), isFloat));
+                        }
+                    }
+                }
+                printf("\n");
+            }
+#endif    // DEBUG
+        } // end if (canPassArgInRegisters)
+        else
+        {
+#if defined(_TARGET_ARM_)
+
+            varDscInfo->setAllRegArgUsed(argType);
+            if (varTypeIsFloating(argType))
+            {
+                varDscInfo->setAnyFloatStackArgs();
+            }
+
+#elif defined(_TARGET_ARM64_)
+
+            // If we needed to use the stack in order to pass this argument then
+            // record the fact that we have used up any remaining registers of this 'type'
+            // This prevents any 'backfilling' from occuring on ARM64
+            //
+            varDscInfo->setAllRegArgUsed(argType);
+
+#endif // _TARGET_XXX_
+        }
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+        // The arg size is returning the number of bytes of the argument. For a struct it could return a size not a
+        // multiple of TARGET_POINTER_SIZE. The stack allocated space should always be multiple of TARGET_POINTER_SIZE,
+        // so round it up.
+        compArgSize += (unsigned)roundUp(argSize, TARGET_POINTER_SIZE);
+#else  // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+        compArgSize += argSize;
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+        if (info.compIsVarArgs || isHfaArg || isSoftFPPreSpill)
+        {
+#if defined(_TARGET_X86_)
+            varDsc->lvStkOffs = compArgSize;
+#else  // !_TARGET_X86_
+            // TODO-CQ: We shouldn't have to go as far as to declare these
+            // address-exposed -- DoNotEnregister should suffice.
+            lvaSetVarAddrExposed(varDscInfo->varNum);
+#endif // !_TARGET_X86_
+        }
+    } // for each user arg
+
+#ifdef _TARGET_ARM_
+    if (doubleAlignMask != RBM_NONE)
+    {
+        assert(RBM_ARG_REGS == 0xF);
+        assert((doubleAlignMask & RBM_ARG_REGS) == doubleAlignMask);
+        if (doubleAlignMask != RBM_NONE && doubleAlignMask != RBM_ARG_REGS)
+        {
+            // doubleAlignMask can only be 0011 and/or 1100 as 'double aligned types' can
+            // begin at r0 or r2.
+            assert(doubleAlignMask == 0x3 || doubleAlignMask == 0xC /* || 0xF is if'ed out */);
+
+            // Now if doubleAlignMask is 0011 i.e., {r0,r1} and we prespill r2 or r3
+            // but not both, then the stack would be misaligned for r0. So spill both
+            // r2 and r3.
+            //
+            // ; +0 --- caller SP double aligned ----
+            // ; -4 r2    r3
+            // ; -8 r1    r1
+            // ; -c r0    r0   <-- misaligned.
+            // ; callee saved regs
+            if (doubleAlignMask == 0x3 && doubleAlignMask != codeGen->regSet.rsMaskPreSpillRegArg)
+            {
+                codeGen->regSet.rsMaskPreSpillAlign =
+                    (~codeGen->regSet.rsMaskPreSpillRegArg & ~doubleAlignMask) & RBM_ARG_REGS;
+            }
+        }
+    }
+#endif // _TARGET_ARM_
+}
+
+/*****************************************************************************/
+void Compiler::lvaInitGenericsCtxt(InitVarDscInfo* varDscInfo)
+{
+    //@GENERICS: final instantiation-info argument for shared generic methods
+    // and shared generic struct instance methods
+    if (info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE)
+    {
+        info.compTypeCtxtArg = varDscInfo->varNum;
+
+        LclVarDsc* varDsc = varDscInfo->varDsc;
+        varDsc->lvIsParam = 1;
+#if ASSERTION_PROP
+        varDsc->lvSingleDef = 1;
+#endif
+
+        varDsc->lvType = TYP_I_IMPL;
+
+        if (varDscInfo->canEnreg(TYP_I_IMPL))
+        {
+            /* Another register argument */
+
+            varDsc->lvIsRegArg = 1;
+            varDsc->lvArgReg   = genMapRegArgNumToRegNum(varDscInfo->regArgNum(TYP_INT), varDsc->TypeGet());
+#if FEATURE_MULTIREG_ARGS
+            varDsc->lvOtherArgReg = REG_NA;
+#endif
+            varDsc->setPrefReg(varDsc->lvArgReg, this);
+            varDsc->lvOnFrame = true; // The final home for this incoming register might be our local stack frame
+
+            varDscInfo->intRegArgNum++;
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("'GenCtxt'   passed in register %s\n", getRegName(varDsc->lvArgReg));
+            }
+#endif
+        }
+#ifndef LEGACY_BACKEND
+        else
+        {
+            // For the RyuJIT backend, we need to mark these as being on the stack,
+            // as this is not done elsewhere in the case that canEnreg returns false.
+            varDsc->lvOnFrame = true;
+        }
+#endif // !LEGACY_BACKEND
+
+        compArgSize += TARGET_POINTER_SIZE;
+
+#if defined(_TARGET_X86_)
+        if (info.compIsVarArgs)
+            varDsc->lvStkOffs = compArgSize;
+#endif // _TARGET_X86_
+
+        varDscInfo->varNum++;
+        varDscInfo->varDsc++;
+    }
+}
+
+/*****************************************************************************/
+void Compiler::lvaInitVarArgsHandle(InitVarDscInfo* varDscInfo)
+{
+    if (info.compIsVarArgs)
+    {
+        lvaVarargsHandleArg = varDscInfo->varNum;
+
+        LclVarDsc* varDsc = varDscInfo->varDsc;
+        varDsc->lvType    = TYP_I_IMPL;
+        varDsc->lvIsParam = 1;
+        // Make sure this lives in the stack -- address may be reported to the VM.
+        // TODO-CQ: This should probably be:
+        //   lvaSetVarDoNotEnregister(varDscInfo->varNum DEBUGARG(DNER_VMNeedsStackAddr));
+        // But that causes problems, so, for expedience, I switched back to this heavyweight
+        // hammer.  But I think it should be possible to switch; it may just work now
+        // that other problems are fixed.
+        lvaSetVarAddrExposed(varDscInfo->varNum);
+
+#if ASSERTION_PROP
+        varDsc->lvSingleDef = 1;
+#endif
+
+        if (varDscInfo->canEnreg(TYP_I_IMPL))
+        {
+            /* Another register argument */
+
+            unsigned varArgHndArgNum = varDscInfo->allocRegArg(TYP_I_IMPL);
+
+            varDsc->lvIsRegArg = 1;
+            varDsc->lvArgReg   = genMapRegArgNumToRegNum(varArgHndArgNum, TYP_I_IMPL);
+#if FEATURE_MULTIREG__ARGS
+            varDsc->lvOtherArgReg = REG_NA;
+#endif
+            varDsc->setPrefReg(varDsc->lvArgReg, this);
+            varDsc->lvOnFrame = true; // The final home for this incoming register might be our local stack frame
+#ifdef _TARGET_ARM_
+            // This has to be spilled right in front of the real arguments and we have
+            // to pre-spill all the argument registers explicitly because we only have
+            // have symbols for the declared ones, not any potential variadic ones.
+            for (unsigned ix = varArgHndArgNum; ix < ArrLen(intArgMasks); ix++)
+            {
+                codeGen->regSet.rsMaskPreSpillRegArg |= intArgMasks[ix];
+            }
+#endif // _TARGET_ARM_
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("'VarArgHnd' passed in register %s\n", getRegName(varDsc->lvArgReg));
+            }
+#endif // DEBUG
+        }
+#ifndef LEGACY_BACKEND
+        else
+        {
+            // For the RyuJIT backend, we need to mark these as being on the stack,
+            // as this is not done elsewhere in the case that canEnreg returns false.
+            varDsc->lvOnFrame = true;
+        }
+#endif // !LEGACY_BACKEND
+
+        /* Update the total argument size, count and varDsc */
+
+        compArgSize += TARGET_POINTER_SIZE;
+
+        varDscInfo->varNum++;
+        varDscInfo->varDsc++;
+
+#if defined(_TARGET_X86_)
+        varDsc->lvStkOffs = compArgSize;
+
+        // Allocate a temp to point at the beginning of the args
+
+        lvaVarargsBaseOfStkArgs                  = lvaGrabTemp(false DEBUGARG("Varargs BaseOfStkArgs"));
+        lvaTable[lvaVarargsBaseOfStkArgs].lvType = TYP_I_IMPL;
+
+#endif // _TARGET_X86_
+    }
+}
+
+/*****************************************************************************/
+void Compiler::lvaInitVarDsc(LclVarDsc*              varDsc,
+                             unsigned                varNum,
+                             CorInfoType             corInfoType,
+                             CORINFO_CLASS_HANDLE    typeHnd,
+                             CORINFO_ARG_LIST_HANDLE varList,
+                             CORINFO_SIG_INFO*       varSig)
+{
+    noway_assert(varDsc == &lvaTable[varNum]);
+
+    switch (corInfoType)
+    {
+        // Mark types that looks like a pointer for doing shadow-copying of
+        // parameters if we have an unsafe buffer.
+        // Note that this does not handle structs with pointer fields. Instead,
+        // we rely on using the assign-groups/equivalence-groups in
+        // gsFindVulnerableParams() to determine if a buffer-struct contains a
+        // pointer. We could do better by having the EE determine this for us.
+        // Note that we want to keep buffers without pointers at lower memory
+        // addresses than buffers with pointers.
+        case CORINFO_TYPE_PTR:
+        case CORINFO_TYPE_BYREF:
+        case CORINFO_TYPE_CLASS:
+        case CORINFO_TYPE_STRING:
+        case CORINFO_TYPE_VAR:
+        case CORINFO_TYPE_REFANY:
+            varDsc->lvIsPtr = 1;
+            break;
+        default:
+            break;
+    }
+
+    var_types type = JITtype2varType(corInfoType);
+    if (varTypeIsFloating(type))
+    {
+        compFloatingPointUsed = true;
+    }
+
+    if (tiVerificationNeeded)
+    {
+        varDsc->lvVerTypeInfo = verParseArgSigToTypeInfo(varSig, varList);
+    }
+
+    if (tiVerificationNeeded)
+    {
+        if (varDsc->lvIsParam)
+        {
+            // For an incoming ValueType we better be able to have the full type information
+            // so that we can layout the parameter offsets correctly
+
+            if (varTypeIsStruct(type) && varDsc->lvVerTypeInfo.IsDead())
+            {
+                BADCODE("invalid ValueType parameter");
+            }
+
+            // For an incoming reference type we need to verify that the actual type is
+            // a reference type and not a valuetype.
+
+            if (type == TYP_REF &&
+                !(varDsc->lvVerTypeInfo.IsType(TI_REF) || varDsc->lvVerTypeInfo.IsUnboxedGenericTypeVar()))
+            {
+                BADCODE("parameter type mismatch");
+            }
+        }
+
+        // Disallow byrefs to byref like objects (ArgTypeHandle)
+        // techncally we could get away with just not setting them
+        if (varDsc->lvVerTypeInfo.IsByRef() && verIsByRefLike(DereferenceByRef(varDsc->lvVerTypeInfo)))
+        {
+            varDsc->lvVerTypeInfo = typeInfo();
+        }
+
+        // we don't want the EE to assert in lvaSetStruct on bad sigs, so change
+        // the JIT type to avoid even trying to call back
+        if (varTypeIsStruct(type) && varDsc->lvVerTypeInfo.IsDead())
+        {
+            type = TYP_VOID;
+        }
+    }
+
+    if (typeHnd)
+    {
+        unsigned cFlags = info.compCompHnd->getClassAttribs(typeHnd);
+
+        // We can get typeHnds for primitive types, these are value types which only contain
+        // a primitive. We will need the typeHnd to distinguish them, so we store it here.
+        if ((cFlags & CORINFO_FLG_VALUECLASS) && !varTypeIsStruct(type))
+        {
+            if (tiVerificationNeeded == false)
+            {
+                // printf("This is a struct that the JIT will treat as a primitive\n");
+                varDsc->lvVerTypeInfo = verMakeTypeInfo(typeHnd);
+            }
+        }
+
+        varDsc->lvOverlappingFields = StructHasOverlappingFields(cFlags);
+    }
+
+    if (varTypeIsGC(type))
+    {
+        varDsc->lvStructGcCount = 1;
+    }
+
+    // Set the lvType (before this point it is TYP_UNDEF).
+    if ((varTypeIsStruct(type)))
+    {
+        lvaSetStruct(varNum, typeHnd, typeHnd != nullptr, !tiVerificationNeeded);
+    }
+    else
+    {
+        varDsc->lvType = type;
+    }
+
+#if OPT_BOOL_OPS
+    if (type == TYP_BOOL)
+    {
+        varDsc->lvIsBoolean = true;
+    }
+#endif
+
+#ifdef DEBUG
+    varDsc->lvStkOffs = BAD_STK_OFFS;
+#endif
+}
+
+/*****************************************************************************
+ * Returns our internal varNum for a given IL variable.
+ * Asserts assume it is called after lvaTable[] has been set up.
+ */
+
+unsigned Compiler::compMapILvarNum(unsigned ILvarNum)
+{
+    noway_assert(ILvarNum < info.compILlocalsCount || ILvarNum > unsigned(ICorDebugInfo::UNKNOWN_ILNUM));
+
+    unsigned varNum;
+
+    if (ILvarNum == (unsigned)ICorDebugInfo::VARARGS_HND_ILNUM)
+    {
+        // The varargs cookie is the last argument in lvaTable[]
+        noway_assert(info.compIsVarArgs);
+
+        varNum = lvaVarargsHandleArg;
+        noway_assert(lvaTable[varNum].lvIsParam);
+    }
+    else if (ILvarNum == (unsigned)ICorDebugInfo::RETBUF_ILNUM)
+    {
+        noway_assert(info.compRetBuffArg != BAD_VAR_NUM);
+        varNum = info.compRetBuffArg;
+    }
+    else if (ILvarNum == (unsigned)ICorDebugInfo::TYPECTXT_ILNUM)
+    {
+        noway_assert(info.compTypeCtxtArg >= 0);
+        varNum = unsigned(info.compTypeCtxtArg);
+    }
+    else if (ILvarNum < info.compILargsCount)
+    {
+        // Parameter
+        varNum = compMapILargNum(ILvarNum);
+        noway_assert(lvaTable[varNum].lvIsParam);
+    }
+    else if (ILvarNum < info.compILlocalsCount)
+    {
+        // Local variable
+        unsigned lclNum = ILvarNum - info.compILargsCount;
+        varNum          = info.compArgsCount + lclNum;
+        noway_assert(!lvaTable[varNum].lvIsParam);
+    }
+    else
+    {
+        unreached();
+    }
+
+    noway_assert(varNum < info.compLocalsCount);
+    return varNum;
+}
+
+/*****************************************************************************
+ * Returns the IL variable number given our internal varNum.
+ * Special return values are VARG_ILNUM, RETBUF_ILNUM, TYPECTXT_ILNUM.
+ *
+ * Returns UNKNOWN_ILNUM if it can't be mapped.
+ */
+
+unsigned Compiler::compMap2ILvarNum(unsigned varNum)
+{
+    if (compIsForInlining())
+    {
+        return impInlineInfo->InlinerCompiler->compMap2ILvarNum(varNum);
+    }
+
+    noway_assert(varNum < lvaCount);
+
+    if (varNum == info.compRetBuffArg)
+    {
+        return (unsigned)ICorDebugInfo::RETBUF_ILNUM;
+    }
+
+    // Is this a varargs function?
+    if (info.compIsVarArgs && varNum == lvaVarargsHandleArg)
+    {
+        return (unsigned)ICorDebugInfo::VARARGS_HND_ILNUM;
+    }
+
+    // We create an extra argument for the type context parameter
+    // needed for shared generic code.
+    if ((info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE) && varNum == (unsigned)info.compTypeCtxtArg)
+    {
+        return (unsigned)ICorDebugInfo::TYPECTXT_ILNUM;
+    }
+
+    // Now mutate varNum to remove extra parameters from the count.
+    if ((info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE) && varNum > (unsigned)info.compTypeCtxtArg)
+    {
+        varNum--;
+    }
+
+    if (info.compIsVarArgs && varNum > lvaVarargsHandleArg)
+    {
+        varNum--;
+    }
+
+    /* Is there a hidden argument for the return buffer.
+       Note that this code works because if the RetBuffArg is not present,
+       compRetBuffArg will be BAD_VAR_NUM */
+    if (info.compRetBuffArg != BAD_VAR_NUM && varNum > info.compRetBuffArg)
+    {
+        varNum--;
+    }
+
+    if (varNum >= info.compLocalsCount)
+    {
+        return (unsigned)ICorDebugInfo::UNKNOWN_ILNUM; // Cannot be mapped
+    }
+
+    return varNum;
+}
+
+/*****************************************************************************
+ * Returns true if variable "varNum" may be address-exposed.
+ */
+
+bool Compiler::lvaVarAddrExposed(unsigned varNum)
+{
+    noway_assert(varNum < lvaCount);
+    LclVarDsc* varDsc = &lvaTable[varNum];
+
+    return varDsc->lvAddrExposed;
+}
+
+/*****************************************************************************
+ * Returns true iff variable "varNum" should not be enregistered (or one of several reasons).
+ */
+
+bool Compiler::lvaVarDoNotEnregister(unsigned varNum)
+{
+    noway_assert(varNum < lvaCount);
+    LclVarDsc* varDsc = &lvaTable[varNum];
+
+    return varDsc->lvDoNotEnregister;
+}
+
+/*****************************************************************************
+ * Returns the handle to the class of the local variable varNum
+ */
+
+CORINFO_CLASS_HANDLE Compiler::lvaGetStruct(unsigned varNum)
+{
+    noway_assert(varNum < lvaCount);
+    LclVarDsc* varDsc = &lvaTable[varNum];
+
+    return varDsc->lvVerTypeInfo.GetClassHandleForValueClass();
+}
+
+/*****************************************************************************
+ *
+ *  Compare function passed to qsort() by Compiler::lvaCanPromoteStructVar().
+ */
+
+/* static */
+int __cdecl Compiler::lvaFieldOffsetCmp(const void* field1, const void* field2)
+{
+    lvaStructFieldInfo* pFieldInfo1 = (lvaStructFieldInfo*)field1;
+    lvaStructFieldInfo* pFieldInfo2 = (lvaStructFieldInfo*)field2;
+
+    if (pFieldInfo1->fldOffset == pFieldInfo2->fldOffset)
+    {
+        return 0;
+    }
+    else
+    {
+        return (pFieldInfo1->fldOffset > pFieldInfo2->fldOffset) ? +1 : -1;
+    }
+}
+
+/*****************************************************************************
+ * Is this type promotable? */
+
+void Compiler::lvaCanPromoteStructType(CORINFO_CLASS_HANDLE    typeHnd,
+                                       lvaStructPromotionInfo* StructPromotionInfo,
+                                       bool                    sortFields)
+{
+    assert(eeIsValueClass(typeHnd));
+
+    if (typeHnd != StructPromotionInfo->typeHnd)
+    {
+        // sizeof(double) represents the size of the largest primitive type that we can struct promote
+        // In the future this may be changing to XMM_REGSIZE_BYTES
+        const int MaxOffset = MAX_NumOfFieldsInPromotableStruct * sizeof(double); // must be a compile time constant
+
+        assert((BYTE)MaxOffset == MaxOffset); // because lvaStructFieldInfo.fldOffset is byte-sized
+        assert((BYTE)MAX_NumOfFieldsInPromotableStruct ==
+               MAX_NumOfFieldsInPromotableStruct); // because lvaStructFieldInfo.fieldCnt is byte-sized
+
+        bool requiresScratchVar = false;
+        bool containsHoles      = false;
+        bool customLayout       = false;
+        bool containsGCpointers = false;
+
+        StructPromotionInfo->typeHnd    = typeHnd;
+        StructPromotionInfo->canPromote = false;
+
+        unsigned structSize = info.compCompHnd->getClassSize(typeHnd);
+        if (structSize >= MaxOffset)
+        {
+            return; // struct is too large
+        }
+
+        unsigned fieldCnt = info.compCompHnd->getClassNumInstanceFields(typeHnd);
+        if (fieldCnt == 0 || fieldCnt > MAX_NumOfFieldsInPromotableStruct)
+        {
+            return; // struct must have between 1 and MAX_NumOfFieldsInPromotableStruct fields
+        }
+
+        StructPromotionInfo->fieldCnt = (BYTE)fieldCnt;
+        DWORD typeFlags               = info.compCompHnd->getClassAttribs(typeHnd);
+
+        bool treatAsOverlapping = StructHasOverlappingFields(typeFlags);
+
+#if 1 // TODO-Cleanup: Consider removing this entire #if block in the future
+
+        // This method has two callers. The one in Importer.cpp passes sortFields == false
+        // and the other passes sortFields == true.
+        // This is a workaround that leave the inlining behavior the same and before while still
+        // performing extra struct promotions when compiling the method.
+        //
+        if (!sortFields) // the condition "!sortFields" really means "we are inlining"
+        {
+            treatAsOverlapping = StructHasCustomLayout(typeFlags);
+        }
+#endif
+
+        if (treatAsOverlapping)
+        {
+            return;
+        }
+
+        // Don't struct promote if we have an CUSTOMLAYOUT flag on an HFA type
+        if (StructHasCustomLayout(typeFlags) && IsHfa(typeHnd))
+        {
+            return;
+        }
+
+#ifdef _TARGET_ARM_
+        // On ARM, we have a requirement on the struct alignment; see below.
+        unsigned structAlignment =
+            roundUp(info.compCompHnd->getClassAlignmentRequirement(typeHnd), TARGET_POINTER_SIZE);
+#endif // _TARGET_ARM
+
+        bool isHole[MaxOffset]; // isHole[] is initialized to true for every valid offset in the struct and false for
+                                // the rest
+        unsigned i;             // then as we process the fields we clear the isHole[] values that the field spans.
+        for (i = 0; i < MaxOffset; i++)
+        {
+            isHole[i] = (i < structSize) ? true : false;
+        }
+
+        for (BYTE ordinal = 0; ordinal < fieldCnt; ++ordinal)
+        {
+            lvaStructFieldInfo* pFieldInfo = &StructPromotionInfo->fields[ordinal];
+            pFieldInfo->fldHnd             = info.compCompHnd->getFieldInClass(typeHnd, ordinal);
+            unsigned fldOffset             = info.compCompHnd->getFieldOffset(pFieldInfo->fldHnd);
+
+            // The fldOffset value should never be larger than our structSize.
+            if (fldOffset >= structSize)
+            {
+                noway_assert(false);
+                return;
+            }
+
+            pFieldInfo->fldOffset  = (BYTE)fldOffset;
+            pFieldInfo->fldOrdinal = ordinal;
+            CorInfoType corType    = info.compCompHnd->getFieldType(pFieldInfo->fldHnd, &pFieldInfo->fldTypeHnd);
+            var_types   varType    = JITtype2varType(corType);
+            pFieldInfo->fldType    = varType;
+            pFieldInfo->fldSize    = genTypeSize(varType);
+
+            if (varTypeIsGC(varType))
+            {
+                containsGCpointers = true;
+            }
+
+            if (pFieldInfo->fldSize == 0)
+            {
+                // Non-primitive struct field. Don't promote.
+                return;
+            }
+
+            if ((pFieldInfo->fldOffset % pFieldInfo->fldSize) != 0)
+            {
+                // The code in Compiler::genPushArgList that reconstitutes
+                // struct values on the stack from promoted fields expects
+                // those fields to be at their natural alignment.
+                return;
+            }
+
+            // The end offset for this field should never be larger than our structSize.
+            noway_assert(fldOffset + pFieldInfo->fldSize <= structSize);
+
+            for (i = 0; i < pFieldInfo->fldSize; i++)
+            {
+                isHole[fldOffset + i] = false;
+            }
+
+#ifdef _TARGET_ARM_
+            // On ARM, for struct types that don't use explicit layout, the alignment of the struct is
+            // at least the max alignment of its fields.  We take advantage of this invariant in struct promotion,
+            // so verify it here.
+            if (pFieldInfo->fldSize > structAlignment)
+            {
+                // Don't promote vars whose struct types violates the invariant.  (Alignment == size for primitives.)
+                return;
+            }
+            // If we have any small fields we will allocate a single PromotedStructScratch local var for the method.
+            // This is a stack area that we use to assemble the small fields in order to place them in a register
+            // argument.
+            //
+            if (pFieldInfo->fldSize < TARGET_POINTER_SIZE)
+            {
+                requiresScratchVar = true;
+            }
+#endif // _TARGET_ARM_
+        }
+
+        // If we saw any GC pointer fields above then the CORINFO_FLG_CONTAINS_GC_PTR has to be set!
+        noway_assert((containsGCpointers == false) || ((typeFlags & CORINFO_FLG_CONTAINS_GC_PTR) != 0));
+
+        // If we have "Custom Layout" then we might have an explicit Size attribute
+        // Managed C++ uses this for its structs, such C++ types will not contain GC pointers.
+        //
+        // The current VM implementation also incorrectly sets the CORINFO_FLG_CUSTOMLAYOUT
+        // whenever a managed value class contains any GC pointers.
+        // (See the comment for VMFLAG_NOT_TIGHTLY_PACKED in class.h)
+        //
+        // It is important to struct promote managed value classes that have GC pointers
+        // So we compute the correct value for "CustomLayout" here
+        //
+        if (StructHasCustomLayout(typeFlags) && ((typeFlags & CORINFO_FLG_CONTAINS_GC_PTR) == 0))
+        {
+            customLayout = true;
+        }
+
+        // Check if this promoted struct contains any holes
+        //
+        for (i = 0; i < structSize; i++)
+        {
+            if (isHole[i])
+            {
+                containsHoles = true;
+                break;
+            }
+        }
+
+        // Cool, this struct is promotable.
+        StructPromotionInfo->canPromote         = true;
+        StructPromotionInfo->requiresScratchVar = requiresScratchVar;
+        StructPromotionInfo->containsHoles      = containsHoles;
+        StructPromotionInfo->customLayout       = customLayout;
+
+        if (sortFields)
+        {
+            // Sort the fields according to the increasing order of the field offset.
+            // This is needed because the fields need to be pushed on stack (when referenced
+            // as a struct) in order.
+            qsort(StructPromotionInfo->fields, StructPromotionInfo->fieldCnt, sizeof(*StructPromotionInfo->fields),
+                  lvaFieldOffsetCmp);
+        }
+    }
+    else
+    {
+        // Asking for the same type of struct as the last time.
+        // Nothing need to be done.
+        // Fall through ...
+    }
+}
+
+/*****************************************************************************
+ * Is this struct type local variable promotable? */
+
+void Compiler::lvaCanPromoteStructVar(unsigned lclNum, lvaStructPromotionInfo* StructPromotionInfo)
+{
+    noway_assert(lclNum < lvaCount);
+
+    LclVarDsc* varDsc = &lvaTable[lclNum];
+
+    noway_assert(varTypeIsStruct(varDsc));
+    noway_assert(!varDsc->lvPromoted); // Don't ask again :)
+
+#ifdef FEATURE_SIMD
+    // If this lclVar is used in a SIMD intrinsic, then we don't want to struct promote it.
+    // Note, however, that SIMD lclVars that are NOT used in a SIMD intrinsic may be
+    // profitably promoted.
+    if (varDsc->lvIsUsedInSIMDIntrinsic())
+    {
+        StructPromotionInfo->canPromote = false;
+        return;
+    }
+
+#endif
+
+    // TODO-PERF - Allow struct promotion for HFA register arguments
+
+    // Explicitly check for HFA reg args and reject them for promotion here.
+    // Promoting HFA args will fire an assert in lvaAssignFrameOffsets
+    // when the HFA reg arg is struct promoted.
+    //
+    if (varDsc->lvIsHfaRegArg())
+    {
+        StructPromotionInfo->canPromote = false;
+        return;
+    }
+
+    CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
+    lvaCanPromoteStructType(typeHnd, StructPromotionInfo, true);
+}
+
+/*****************************************************************************
+ * Promote a struct type local */
+
+void Compiler::lvaPromoteStructVar(unsigned lclNum, lvaStructPromotionInfo* StructPromotionInfo)
+{
+    LclVarDsc* varDsc = &lvaTable[lclNum];
+
+    // We should never see a reg-sized non-field-addressed struct here.
+    noway_assert(!varDsc->lvRegStruct);
+
+    noway_assert(StructPromotionInfo->canPromote);
+    noway_assert(StructPromotionInfo->typeHnd == varDsc->lvVerTypeInfo.GetClassHandle());
+
+    varDsc->lvFieldCnt      = StructPromotionInfo->fieldCnt;
+    varDsc->lvFieldLclStart = lvaCount;
+    varDsc->lvPromoted      = true;
+    varDsc->lvContainsHoles = StructPromotionInfo->containsHoles;
+    varDsc->lvCustomLayout  = StructPromotionInfo->customLayout;
+
+#ifdef DEBUG
+    // Don't change the source to a TYP_BLK either.
+    varDsc->lvKeepType = 1;
+#endif
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nPromoting struct local V%02u (%s):", lclNum, eeGetClassName(StructPromotionInfo->typeHnd));
+    }
+#endif
+
+    for (unsigned index = 0; index < StructPromotionInfo->fieldCnt; ++index)
+    {
+        lvaStructFieldInfo* pFieldInfo = &StructPromotionInfo->fields[index];
+
+        if (varTypeIsFloating(pFieldInfo->fldType))
+        {
+            lvaTable[lclNum].lvContainsFloatingFields = 1;
+            // Whenever we promote a struct that contains a floating point field
+            // it's possible we transition from a method that originally only had integer
+            // local vars to start having FP.  We have to communicate this through this flag
+            // since LSRA later on will use this flag to determine whether or not to track FP register sets.
+            compFloatingPointUsed = true;
+        }
+
+// Now grab the temp for the field local.
+
+#ifdef DEBUG
+        char  buf[200];
+        char* bufp = &buf[0];
+
+        sprintf_s(bufp, sizeof(buf), "%s V%02u.%s (fldOffset=0x%x)", "field", lclNum,
+                  eeGetFieldName(pFieldInfo->fldHnd), pFieldInfo->fldOffset);
+
+        if (index > 0)
+        {
+            noway_assert(pFieldInfo->fldOffset > (pFieldInfo - 1)->fldOffset);
+        }
+#endif
+
+        unsigned varNum = lvaGrabTemp(false DEBUGARG(bufp)); // Lifetime of field locals might span multiple BBs, so
+                                                             // they are long lifetime temps.
+
+        LclVarDsc* fieldVarDsc       = &lvaTable[varNum];
+        fieldVarDsc->lvType          = pFieldInfo->fldType;
+        fieldVarDsc->lvExactSize     = pFieldInfo->fldSize;
+        fieldVarDsc->lvIsStructField = true;
+        fieldVarDsc->lvFldOffset     = pFieldInfo->fldOffset;
+        fieldVarDsc->lvFldOrdinal    = pFieldInfo->fldOrdinal;
+        fieldVarDsc->lvParentLcl     = lclNum;
+        fieldVarDsc->lvIsParam       = varDsc->lvIsParam;
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+        // Do we have a parameter that can be enregistered?
+        //
+        if (varDsc->lvIsRegArg)
+        {
+            fieldVarDsc->lvIsRegArg = true;
+            fieldVarDsc->lvArgReg   = varDsc->lvArgReg;
+            fieldVarDsc->setPrefReg(varDsc->lvArgReg, this); // Set the preferred register
+
+            lvaMarkRefsWeight = BB_UNITY_WEIGHT;            // incRefCnts can use this compiler global variable
+            fieldVarDsc->incRefCnts(BB_UNITY_WEIGHT, this); // increment the ref count for prolog initialization
+        }
+#endif
+
+#ifdef DEBUG
+        // This temporary should not be converted to a double in stress mode,
+        // because we introduce assigns to it after the stress conversion
+        fieldVarDsc->lvKeepType = 1;
+#endif
+    }
+}
+
+#if !defined(_TARGET_64BIT_)
+//------------------------------------------------------------------------
+// lvaPromoteLongVars: "Struct promote" all register candidate longs as if they are structs of two ints.
+//
+// Arguments:
+//    None.
+//
+// Return Value:
+//    None.
+//
+void Compiler::lvaPromoteLongVars()
+{
+    if ((opts.compFlags & CLFLG_REGVAR) == 0)
+    {
+        return;
+    }
+    // The lvaTable might grow as we grab temps. Make a local copy here.
+    unsigned startLvaCount = lvaCount;
+    for (unsigned lclNum = 0; lclNum < startLvaCount; lclNum++)
+    {
+        LclVarDsc* varDsc = &lvaTable[lclNum];
+        if (!varTypeIsLong(varDsc) || varDsc->lvDoNotEnregister || varDsc->lvIsMultiRegArgOrRet() ||
+            (varDsc->lvRefCnt == 0))
+        {
+            continue;
+        }
+
+        // Will this work ???
+        // We can't have nested promoted structs.
+        if (varDsc->lvIsStructField)
+        {
+            if (lvaGetPromotionType(varDsc->lvParentLcl) != PROMOTION_TYPE_INDEPENDENT)
+            {
+                continue;
+            }
+            varDsc->lvIsStructField = false;
+            varDsc->lvTracked       = false;
+        }
+
+        varDsc->lvFieldCnt      = 2;
+        varDsc->lvFieldLclStart = lvaCount;
+        varDsc->lvPromoted      = true;
+        varDsc->lvContainsHoles = false;
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\nPromoting long local V%02u:", lclNum);
+        }
+#endif
+
+        bool isParam = varDsc->lvIsParam;
+
+        for (unsigned index = 0; index < 2; ++index)
+        {
+            // Grab the temp for the field local.
+            CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+            char  buf[200];
+            char* bufp = &buf[0];
+
+            sprintf_s(bufp, sizeof(buf), "%s V%02u.%s (fldOffset=0x%x)", "field", lclNum, index == 0 ? "lo" : "hi",
+                      index * 4);
+#endif
+            unsigned varNum = lvaGrabTemp(false DEBUGARG(bufp)); // Lifetime of field locals might span multiple BBs, so
+                                                                 // they are long lifetime temps.
+
+            LclVarDsc* fieldVarDsc       = &lvaTable[varNum];
+            fieldVarDsc->lvType          = TYP_INT;
+            fieldVarDsc->lvExactSize     = genTypeSize(TYP_INT);
+            fieldVarDsc->lvIsStructField = true;
+            fieldVarDsc->lvFldOffset     = (unsigned char)(index * genTypeSize(TYP_INT));
+            fieldVarDsc->lvFldOrdinal    = (unsigned char)index;
+            fieldVarDsc->lvParentLcl     = lclNum;
+            fieldVarDsc->lvIsParam       = isParam;
+        }
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nlvaTable after lvaPromoteLongVars\n");
+        lvaTableDump();
+    }
+#endif // DEBUG
+}
+#endif // !_TARGET_64BIT_
+
+/*****************************************************************************
+ * Given a fldOffset in a promoted struct var, return the index of the local
+   that represents this field.
+*/
+
+unsigned Compiler::lvaGetFieldLocal(LclVarDsc* varDsc, unsigned int fldOffset)
+{
+    noway_assert(varTypeIsStruct(varDsc));
+    noway_assert(varDsc->lvPromoted);
+
+    for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
+    {
+        noway_assert(lvaTable[i].lvIsStructField);
+        noway_assert(lvaTable[i].lvParentLcl == (unsigned)(varDsc - lvaTable));
+        if (lvaTable[i].lvFldOffset == fldOffset)
+        {
+            return i;
+        }
+    }
+
+    // This is the not-found error return path, the caller should check for BAD_VAR_NUM
+    return BAD_VAR_NUM;
+}
+
+/*****************************************************************************
+ *
+ *  Set the local var "varNum" as address-exposed.
+ *  If this is a promoted struct, label it's fields the same way.
+ */
+
+void Compiler::lvaSetVarAddrExposed(unsigned varNum)
+{
+    noway_assert(varNum < lvaCount);
+
+    LclVarDsc* varDsc = &lvaTable[varNum];
+
+    varDsc->lvAddrExposed = 1;
+
+    if (varDsc->lvPromoted)
+    {
+        noway_assert(varTypeIsStruct(varDsc));
+
+        for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
+        {
+            noway_assert(lvaTable[i].lvIsStructField);
+            lvaTable[i].lvAddrExposed = 1; // Make field local as address-exposed.
+            lvaSetVarDoNotEnregister(i DEBUGARG(DNER_AddrExposed));
+        }
+    }
+
+    lvaSetVarDoNotEnregister(varNum DEBUGARG(DNER_AddrExposed));
+}
+
+/*****************************************************************************
+ *
+ *  Record that the local var "varNum" should not be enregistered (for one of several reasons.)
+ */
+
+void Compiler::lvaSetVarDoNotEnregister(unsigned varNum DEBUGARG(DoNotEnregisterReason reason))
+{
+    noway_assert(varNum < lvaCount);
+    LclVarDsc* varDsc         = &lvaTable[varNum];
+    varDsc->lvDoNotEnregister = 1;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nLocal V%02u should not be enregistered because: ", varNum);
+    }
+    switch (reason)
+    {
+        case DNER_AddrExposed:
+            JITDUMP("it is address exposed\n");
+            assert(varDsc->lvAddrExposed);
+            break;
+        case DNER_IsStruct:
+            JITDUMP("it is a struct\n");
+            assert(varTypeIsStruct(varDsc));
+            break;
+        case DNER_BlockOp:
+            JITDUMP("written in a block op\n");
+            varDsc->lvLclBlockOpAddr = 1;
+            break;
+        case DNER_LocalField:
+            JITDUMP("was accessed as a local field\n");
+            varDsc->lvLclFieldExpr = 1;
+            break;
+        case DNER_VMNeedsStackAddr:
+            JITDUMP("needs stack addr\n");
+            varDsc->lvVMNeedsStackAddr = 1;
+            break;
+        case DNER_LiveInOutOfHandler:
+            JITDUMP("live in/out of a handler\n");
+            varDsc->lvLiveInOutOfHndlr = 1;
+            break;
+        case DNER_LiveAcrossUnmanagedCall:
+            JITDUMP("live across unmanaged call\n");
+            varDsc->lvLiveAcrossUCall = 1;
+            break;
+#ifdef JIT32_GCENCODER
+        case DNER_PinningRef:
+            JITDUMP("pinning ref\n");
+            assert(varDsc->lvPinned);
+            break;
+#endif
+        default:
+            unreached();
+            break;
+    }
+#endif
+}
+
+// Returns true if this local var is a multireg struct
+bool Compiler::lvaIsMultiregStruct(LclVarDsc* varDsc)
+{
+    if (varDsc->TypeGet() == TYP_STRUCT)
+    {
+        CORINFO_CLASS_HANDLE clsHnd = varDsc->lvVerTypeInfo.GetClassHandleForValueClass();
+        structPassingKind    howToPassStruct;
+
+        var_types type = getArgTypeForStruct(clsHnd, &howToPassStruct, varDsc->lvExactSize);
+
+        if (howToPassStruct == SPK_ByValueAsHfa)
+        {
+            assert(type = TYP_STRUCT);
+            return true;
+        }
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) || defined(_TARGET_ARM64_)
+        if (howToPassStruct == SPK_ByValue)
+        {
+            assert(type = TYP_STRUCT);
+            return true;
+        }
+#endif
+    }
+    return false;
+}
+
+/*****************************************************************************
+ * Set the lvClass for a local variable of a struct type */
+
+void Compiler::lvaSetStruct(unsigned varNum, CORINFO_CLASS_HANDLE typeHnd, bool unsafeValueClsCheck, bool setTypeInfo)
+{
+    noway_assert(varNum < lvaCount);
+
+    LclVarDsc* varDsc = &lvaTable[varNum];
+    if (setTypeInfo)
+    {
+        varDsc->lvVerTypeInfo = typeInfo(TI_STRUCT, typeHnd);
+    }
+
+    // Set the type and associated info if we haven't already set it.
+    var_types structType = varDsc->lvType;
+    if (varDsc->lvType == TYP_UNDEF)
+    {
+        varDsc->lvType = TYP_STRUCT;
+    }
+    if (varDsc->lvExactSize == 0)
+    {
+        varDsc->lvExactSize = info.compCompHnd->getClassSize(typeHnd);
+
+        size_t lvSize = varDsc->lvSize();
+        assert((lvSize % sizeof(void*)) ==
+               0); // The struct needs to be a multiple of sizeof(void*) bytes for getClassGClayout() to be valid.
+        varDsc->lvGcLayout = (BYTE*)compGetMemA((lvSize / sizeof(void*)) * sizeof(BYTE), CMK_LvaTable);
+        unsigned  numGCVars;
+        var_types simdBaseType = TYP_UNKNOWN;
+        varDsc->lvType         = impNormStructType(typeHnd, varDsc->lvGcLayout, &numGCVars, &simdBaseType);
+
+        // We only save the count of GC vars in a struct up to 7.
+        if (numGCVars >= 8)
+        {
+            numGCVars = 7;
+        }
+        varDsc->lvStructGcCount = numGCVars;
+#if FEATURE_SIMD
+        if (simdBaseType != TYP_UNKNOWN)
+        {
+            assert(varTypeIsSIMD(varDsc));
+            varDsc->lvSIMDType = true;
+            varDsc->lvBaseType = simdBaseType;
+        }
+#endif // FEATURE_SIMD
+#ifdef FEATURE_HFA
+        // for structs that are small enough, we check and set lvIsHfa and lvHfaTypeIsFloat
+        if (varDsc->lvExactSize <= MAX_PASS_MULTIREG_BYTES)
+        {
+            var_types hfaType = GetHfaType(typeHnd); // set to float or double if it is an HFA, otherwise TYP_UNDEF
+            if (varTypeIsFloating(hfaType))
+            {
+                varDsc->_lvIsHfa = true;
+                varDsc->lvSetHfaTypeIsFloat(hfaType == TYP_FLOAT);
+
+                // hfa variables can never contain GC pointers
+                assert(varDsc->lvStructGcCount == 0);
+                // The size of this struct should be evenly divisible by 4 or 8
+                assert((varDsc->lvExactSize % genTypeSize(hfaType)) == 0);
+                // The number of elements in the HFA should fit into our MAX_ARG_REG_COUNT limit
+                assert((varDsc->lvExactSize / genTypeSize(hfaType)) <= MAX_ARG_REG_COUNT);
+            }
+        }
+#endif // FEATURE_HFA
+    }
+    else
+    {
+        assert(varDsc->lvExactSize != 0);
+#if FEATURE_SIMD
+        assert(!varTypeIsSIMD(varDsc) || (varDsc->lvBaseType != TYP_UNKNOWN));
+#endif // FEATURE_SIMD
+    }
+
+#ifndef _TARGET_64BIT_
+    bool fDoubleAlignHint = FALSE;
+#ifdef _TARGET_X86_
+    fDoubleAlignHint = TRUE;
+#endif
+
+    if (info.compCompHnd->getClassAlignmentRequirement(typeHnd, fDoubleAlignHint) == 8)
+    {
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("Marking struct in V%02i with double align flag\n", varNum);
+        }
+#endif
+        varDsc->lvStructDoubleAlign = 1;
+    }
+#endif // not _TARGET_64BIT_
+
+    unsigned classAttribs = info.compCompHnd->getClassAttribs(typeHnd);
+
+    varDsc->lvOverlappingFields = StructHasOverlappingFields(classAttribs);
+
+    // Check whether this local is an unsafe value type and requires GS cookie protection.
+    // GS checks require the stack to be re-ordered, which can't be done with EnC.
+    if (unsafeValueClsCheck && (classAttribs & CORINFO_FLG_UNSAFE_VALUECLASS) && !opts.compDbgEnC)
+    {
+        setNeedsGSSecurityCookie();
+        compGSReorderStackLayout = true;
+        varDsc->lvIsUnsafeBuffer = true;
+    }
+}
+
+/*****************************************************************************
+ * Returns the array of BYTEs containing the GC layout information
+ */
+
+BYTE* Compiler::lvaGetGcLayout(unsigned varNum)
+{
+    noway_assert(varTypeIsStruct(lvaTable[varNum].lvType) && (lvaTable[varNum].lvExactSize >= TARGET_POINTER_SIZE));
+
+    return lvaTable[varNum].lvGcLayout;
+}
+
+/*****************************************************************************
+ * Return the number of bytes needed for a local variable
+ */
+
+unsigned Compiler::lvaLclSize(unsigned varNum)
+{
+    noway_assert(varNum < lvaCount);
+
+    var_types varType = lvaTable[varNum].TypeGet();
+
+    switch (varType)
+    {
+        case TYP_STRUCT:
+        case TYP_BLK:
+            return lvaTable[varNum].lvSize();
+
+        case TYP_LCLBLK:
+#if FEATURE_FIXED_OUT_ARGS
+            noway_assert(lvaOutgoingArgSpaceSize >= 0);
+            noway_assert(varNum == lvaOutgoingArgSpaceVar);
+            return lvaOutgoingArgSpaceSize;
+
+#else // FEATURE_FIXED_OUT_ARGS
+            assert(!"Unknown size");
+            NO_WAY("Target doesn't support TYP_LCLBLK");
+
+            // Keep prefast happy
+            __fallthrough;
+
+#endif // FEATURE_FIXED_OUT_ARGS
+
+        default: // This must be a primitive var. Fall out of switch statement
+            break;
+    }
+#ifdef _TARGET_64BIT_
+    // We only need this Quirk for _TARGET_64BIT_
+    if (lvaTable[varNum].lvQuirkToLong)
+    {
+        noway_assert(lvaTable[varNum].lvAddrExposed);
+        return genTypeStSz(TYP_LONG) * sizeof(int); // return 8  (2 * 4)
+    }
+#endif
+    return genTypeStSz(varType) * sizeof(int);
+}
+
+//
+// Return the exact width of local variable "varNum" -- the number of bytes
+// you'd need to copy in order to overwrite the value.
+//
+unsigned Compiler::lvaLclExactSize(unsigned varNum)
+{
+    noway_assert(varNum < lvaCount);
+
+    var_types varType = lvaTable[varNum].TypeGet();
+
+    switch (varType)
+    {
+        case TYP_STRUCT:
+        case TYP_BLK:
+            return lvaTable[varNum].lvExactSize;
+
+        case TYP_LCLBLK:
+#if FEATURE_FIXED_OUT_ARGS
+            noway_assert(lvaOutgoingArgSpaceSize >= 0);
+            noway_assert(varNum == lvaOutgoingArgSpaceVar);
+            return lvaOutgoingArgSpaceSize;
+
+#else // FEATURE_FIXED_OUT_ARGS
+            assert(!"Unknown size");
+            NO_WAY("Target doesn't support TYP_LCLBLK");
+
+            // Keep prefast happy
+            __fallthrough;
+
+#endif // FEATURE_FIXED_OUT_ARGS
+
+        default: // This must be a primitive var. Fall out of switch statement
+            break;
+    }
+
+    return genTypeSize(varType);
+}
+
+// getBBWeight -- get the normalized weight of this block
+unsigned BasicBlock::getBBWeight(Compiler* comp)
+{
+    if (this->bbWeight == 0)
+    {
+        return 0;
+    }
+    else
+    {
+        unsigned calledWeight = comp->fgCalledWeight;
+        if (calledWeight == 0)
+        {
+            calledWeight = comp->fgFirstBB->bbWeight;
+            if (calledWeight == 0)
+            {
+                calledWeight = BB_UNITY_WEIGHT;
+            }
+        }
+        if (this->bbWeight < (BB_MAX_WEIGHT / BB_UNITY_WEIGHT))
+        {
+            return max(1, (((this->bbWeight * BB_UNITY_WEIGHT) + (calledWeight / 2)) / calledWeight));
+        }
+        else
+        {
+            return (unsigned)((((double)this->bbWeight * (double)BB_UNITY_WEIGHT) / (double)calledWeight) + 0.5);
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Callback used by the tree walker to call lvaDecRefCnts
+ */
+Compiler::fgWalkResult Compiler::lvaDecRefCntsCB(GenTreePtr* pTree, fgWalkData* data)
+{
+    data->compiler->lvaDecRefCnts(*pTree);
+    return WALK_CONTINUE;
+}
+
+// Decrement the ref counts for all locals contained in the tree and its children.
+void Compiler::lvaRecursiveDecRefCounts(GenTreePtr tree)
+{
+    assert(lvaLocalVarRefCounted);
+
+    // We could just use the recursive walker for all cases but that is a
+    // fairly heavyweight thing to spin up when we're usually just handling a leaf.
+    if (tree->OperIsLeaf())
+    {
+        if (tree->OperIsLocal())
+        {
+            lvaDecRefCnts(tree);
+        }
+    }
+    else
+    {
+        fgWalkTreePre(&tree, Compiler::lvaDecRefCntsCB, (void*)this, true);
+    }
+}
+
+// Increment the ref counts for all locals contained in the tree and its children.
+void Compiler::lvaRecursiveIncRefCounts(GenTreePtr tree)
+{
+    assert(lvaLocalVarRefCounted);
+
+    // We could just use the recursive walker for all cases but that is a
+    // fairly heavyweight thing to spin up when we're usually just handling a leaf.
+    if (tree->OperIsLeaf())
+    {
+        if (tree->OperIsLocal())
+        {
+            lvaIncRefCnts(tree);
+        }
+    }
+    else
+    {
+        fgWalkTreePre(&tree, Compiler::lvaIncRefCntsCB, (void*)this, true);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Helper passed to the tree walker to decrement the refCnts for
+ *  all local variables in an expression
+ */
+void Compiler::lvaDecRefCnts(GenTreePtr tree)
+{
+    assert(compCurBB != nullptr);
+    lvaDecRefCnts(compCurBB, tree);
+}
+
+void Compiler::lvaDecRefCnts(BasicBlock* block, GenTreePtr tree)
+{
+    assert(block != nullptr);
+    assert(tree != nullptr);
+
+    unsigned   lclNum;
+    LclVarDsc* varDsc;
+
+    noway_assert(lvaRefCountingStarted || lvaLocalVarRefCounted);
+
+    if ((tree->gtOper == GT_CALL) && (tree->gtFlags & GTF_CALL_UNMANAGED))
+    {
+        assert((!opts.ShouldUsePInvokeHelpers()) || (info.compLvFrameListRoot == BAD_VAR_NUM));
+        if (!opts.ShouldUsePInvokeHelpers())
+        {
+            /* Get the special variable descriptor */
+
+            lclNum = info.compLvFrameListRoot;
+
+            noway_assert(lclNum <= lvaCount);
+            varDsc = lvaTable + lclNum;
+
+            /* Decrement the reference counts twice */
+
+            varDsc->decRefCnts(block->getBBWeight(this), this);
+            varDsc->decRefCnts(block->getBBWeight(this), this);
+        }
+    }
+    else
+    {
+        /* This must be a local variable */
+
+        noway_assert(tree->OperIsLocal());
+
+        /* Get the variable descriptor */
+
+        lclNum = tree->gtLclVarCommon.gtLclNum;
+
+        noway_assert(lclNum < lvaCount);
+        varDsc = lvaTable + lclNum;
+
+        /* Decrement its lvRefCnt and lvRefCntWtd */
+
+        varDsc->decRefCnts(block->getBBWeight(this), this);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Callback used by the tree walker to call lvaIncRefCnts
+ */
+Compiler::fgWalkResult Compiler::lvaIncRefCntsCB(GenTreePtr* pTree, fgWalkData* data)
+{
+    data->compiler->lvaIncRefCnts(*pTree);
+    return WALK_CONTINUE;
+}
+
+/*****************************************************************************
+ *
+ *  Helper passed to the tree walker to increment the refCnts for
+ *  all local variables in an expression
+ */
+void Compiler::lvaIncRefCnts(GenTreePtr tree)
+{
+    unsigned   lclNum;
+    LclVarDsc* varDsc;
+
+    noway_assert(lvaRefCountingStarted || lvaLocalVarRefCounted);
+
+    if ((tree->gtOper == GT_CALL) && (tree->gtFlags & GTF_CALL_UNMANAGED))
+    {
+        assert((!opts.ShouldUsePInvokeHelpers()) || (info.compLvFrameListRoot == BAD_VAR_NUM));
+        if (!opts.ShouldUsePInvokeHelpers())
+        {
+            /* Get the special variable descriptor */
+
+            lclNum = info.compLvFrameListRoot;
+
+            noway_assert(lclNum <= lvaCount);
+            varDsc = lvaTable + lclNum;
+
+            /* Increment the reference counts twice */
+
+            varDsc->incRefCnts(compCurBB->getBBWeight(this), this);
+            varDsc->incRefCnts(compCurBB->getBBWeight(this), this);
+        }
+    }
+    else
+    {
+        /* This must be a local variable */
+
+        noway_assert(tree->gtOper == GT_LCL_VAR || tree->gtOper == GT_LCL_FLD || tree->gtOper == GT_STORE_LCL_VAR ||
+                     tree->gtOper == GT_STORE_LCL_FLD);
+
+        /* Get the variable descriptor */
+
+        lclNum = tree->gtLclVarCommon.gtLclNum;
+
+        noway_assert(lclNum < lvaCount);
+        varDsc = lvaTable + lclNum;
+
+        /* Increment its lvRefCnt and lvRefCntWtd */
+
+        varDsc->incRefCnts(compCurBB->getBBWeight(this), this);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Compare function passed to qsort() by Compiler::lclVars.lvaSortByRefCount().
+ *  when generating SMALL_CODE.
+ *    Return positive if dsc2 has a higher ref count
+ *    Return negative if dsc1 has a higher ref count
+ *    Return zero     if the ref counts are the same
+ *    lvPrefReg is only used to break ties
+ */
+
+/* static */
+int __cdecl Compiler::RefCntCmp(const void* op1, const void* op2)
+{
+    LclVarDsc* dsc1 = *(LclVarDsc**)op1;
+    LclVarDsc* dsc2 = *(LclVarDsc**)op2;
+
+    /* Make sure we preference tracked variables over untracked variables */
+
+    if (dsc1->lvTracked != dsc2->lvTracked)
+    {
+        return (dsc2->lvTracked) ? +1 : -1;
+    }
+
+    unsigned weight1 = dsc1->lvRefCnt;
+    unsigned weight2 = dsc2->lvRefCnt;
+
+#if !FEATURE_FP_REGALLOC
+    /* Force integer candidates to sort above float candidates */
+
+    bool isFloat1 = isFloatRegType(dsc1->lvType);
+    bool isFloat2 = isFloatRegType(dsc2->lvType);
+
+    if (isFloat1 != isFloat2)
+    {
+        if (weight2 && isFloat1)
+        {
+            return +1;
+        }
+        if (weight1 && isFloat2)
+        {
+            return -1;
+        }
+    }
+#endif
+
+    int diff = weight2 - weight1;
+
+    if (diff != 0)
+    {
+        return diff;
+    }
+
+    /* The unweighted ref counts were the same */
+    /* If the weighted ref counts are different then use their difference */
+    diff = dsc2->lvRefCntWtd - dsc1->lvRefCntWtd;
+
+    if (diff != 0)
+    {
+        return diff;
+    }
+
+    /* We have equal ref counts and weighted ref counts */
+
+    /* Break the tie by: */
+    /* Increasing the weight by 2   if we have exactly one bit set in lvPrefReg   */
+    /* Increasing the weight by 1   if we have more than one bit set in lvPrefReg */
+    /* Increasing the weight by 0.5 if we are a GC type */
+    /* Increasing the weight by 0.5 if we were enregistered in the previous pass  */
+
+    if (weight1)
+    {
+        if (dsc1->lvPrefReg)
+        {
+            if ((dsc1->lvPrefReg & ~RBM_BYTE_REG_FLAG) && genMaxOneBit((unsigned)dsc1->lvPrefReg))
+            {
+                weight1 += 2 * BB_UNITY_WEIGHT;
+            }
+            else
+            {
+                weight1 += 1 * BB_UNITY_WEIGHT;
+            }
+        }
+        if (varTypeIsGC(dsc1->TypeGet()))
+        {
+            weight1 += BB_UNITY_WEIGHT / 2;
+        }
+
+        if (dsc1->lvRegister)
+        {
+            weight1 += BB_UNITY_WEIGHT / 2;
+        }
+    }
+
+    if (weight2)
+    {
+        if (dsc2->lvPrefReg)
+        {
+            if ((dsc2->lvPrefReg & ~RBM_BYTE_REG_FLAG) && genMaxOneBit((unsigned)dsc2->lvPrefReg))
+            {
+                weight2 += 2 * BB_UNITY_WEIGHT;
+            }
+            else
+            {
+                weight2 += 1 * BB_UNITY_WEIGHT;
+            }
+        }
+        if (varTypeIsGC(dsc2->TypeGet()))
+        {
+            weight1 += BB_UNITY_WEIGHT / 2;
+        }
+
+        if (dsc2->lvRegister)
+        {
+            weight2 += BB_UNITY_WEIGHT / 2;
+        }
+    }
+
+    diff = weight2 - weight1;
+
+    if (diff != 0)
+    {
+        return diff;
+    }
+
+    /* To achieve a Stable Sort we use the LclNum (by way of the pointer address) */
+
+    if (dsc1 < dsc2)
+    {
+        return -1;
+    }
+    if (dsc1 > dsc2)
+    {
+        return +1;
+    }
+
+    return 0;
+}
+
+/*****************************************************************************
+ *
+ *  Compare function passed to qsort() by Compiler::lclVars.lvaSortByRefCount().
+ *  when not generating SMALL_CODE.
+ *    Return positive if dsc2 has a higher weighted ref count
+ *    Return negative if dsc1 has a higher weighted ref count
+ *    Return zero     if the ref counts are the same
+ */
+
+/* static */
+int __cdecl Compiler::WtdRefCntCmp(const void* op1, const void* op2)
+{
+    LclVarDsc* dsc1 = *(LclVarDsc**)op1;
+    LclVarDsc* dsc2 = *(LclVarDsc**)op2;
+
+    /* Make sure we preference tracked variables over untracked variables */
+
+    if (dsc1->lvTracked != dsc2->lvTracked)
+    {
+        return (dsc2->lvTracked) ? +1 : -1;
+    }
+
+    unsigned weight1 = dsc1->lvRefCntWtd;
+    unsigned weight2 = dsc2->lvRefCntWtd;
+
+#if !FEATURE_FP_REGALLOC
+    /* Force integer candidates to sort above float candidates */
+
+    bool isFloat1 = isFloatRegType(dsc1->lvType);
+    bool isFloat2 = isFloatRegType(dsc2->lvType);
+
+    if (isFloat1 != isFloat2)
+    {
+        if (weight2 && isFloat1)
+        {
+            return +1;
+        }
+        if (weight1 && isFloat2)
+        {
+            return -1;
+        }
+    }
+#endif
+
+    /* Increase the weight by 2 if we have exactly one bit set in lvPrefReg */
+    /* Increase the weight by 1 if we have more than one bit set in lvPrefReg */
+
+    if (weight1 && dsc1->lvPrefReg)
+    {
+        if ((dsc1->lvPrefReg & ~RBM_BYTE_REG_FLAG) && genMaxOneBit((unsigned)dsc1->lvPrefReg))
+        {
+            weight1 += 2 * BB_UNITY_WEIGHT;
+        }
+        else
+        {
+            weight1 += 1 * BB_UNITY_WEIGHT;
+        }
+    }
+
+    if (weight2 && dsc2->lvPrefReg)
+    {
+        if ((dsc2->lvPrefReg & ~RBM_BYTE_REG_FLAG) && genMaxOneBit((unsigned)dsc2->lvPrefReg))
+        {
+            weight2 += 2 * BB_UNITY_WEIGHT;
+        }
+        else
+        {
+            weight2 += 1 * BB_UNITY_WEIGHT;
+        }
+    }
+
+    if (weight2 > weight1)
+    {
+        return 1;
+    }
+    else if (weight2 < weight1)
+    {
+        return -1;
+    }
+
+    // Otherwise, we have equal weighted ref counts.
+
+    /* If the unweighted ref counts are different then use their difference */
+    int diff = (int)dsc2->lvRefCnt - (int)dsc1->lvRefCnt;
+
+    if (diff != 0)
+    {
+        return diff;
+    }
+
+    /* If one is a GC type and the other is not the GC type wins */
+    if (varTypeIsGC(dsc1->TypeGet()) != varTypeIsGC(dsc2->TypeGet()))
+    {
+        if (varTypeIsGC(dsc1->TypeGet()))
+        {
+            diff = -1;
+        }
+        else
+        {
+            diff = +1;
+        }
+
+        return diff;
+    }
+
+    /* If one was enregistered in the previous pass then it wins */
+    if (dsc1->lvRegister != dsc2->lvRegister)
+    {
+        if (dsc1->lvRegister)
+        {
+            diff = -1;
+        }
+        else
+        {
+            diff = +1;
+        }
+
+        return diff;
+    }
+
+    /* We have a tie! */
+
+    /* To achieve a Stable Sort we use the LclNum (by way of the pointer address) */
+
+    if (dsc1 < dsc2)
+    {
+        return -1;
+    }
+    if (dsc1 > dsc2)
+    {
+        return +1;
+    }
+
+    return 0;
+}
+
+/*****************************************************************************
+ *
+ *  Sort the local variable table by refcount and assign tracking indices.
+ */
+
+void Compiler::lvaSortOnly()
+{
+    /* Now sort the variable table by ref-count */
+
+    qsort(lvaRefSorted, lvaCount, sizeof(*lvaRefSorted), (compCodeOpt() == SMALL_CODE) ? RefCntCmp : WtdRefCntCmp);
+
+    lvaSortAgain = false;
+
+    lvaDumpRefCounts();
+}
+
+void Compiler::lvaDumpRefCounts()
+{
+#ifdef DEBUG
+
+    if (verbose && lvaCount)
+    {
+        printf("refCnt table for '%s':\n", info.compMethodName);
+
+        for (unsigned lclNum = 0; lclNum < lvaCount; lclNum++)
+        {
+            unsigned refCnt = lvaRefSorted[lclNum]->lvRefCnt;
+            if (refCnt == 0)
+            {
+                break;
+            }
+            unsigned refCntWtd = lvaRefSorted[lclNum]->lvRefCntWtd;
+
+            printf("   ");
+            gtDispLclVar((unsigned)(lvaRefSorted[lclNum] - lvaTable));
+            printf(" [%6s]: refCnt = %4u, refCntWtd = %6s", varTypeName(lvaRefSorted[lclNum]->TypeGet()), refCnt,
+                   refCntWtd2str(refCntWtd));
+
+            regMaskSmall pref = lvaRefSorted[lclNum]->lvPrefReg;
+            if (pref)
+            {
+                printf(" pref ");
+                dspRegMask(pref);
+            }
+            printf("\n");
+        }
+
+        printf("\n");
+    }
+
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Sort the local variable table by refcount and assign tracking indices.
+ */
+
+void Compiler::lvaSortByRefCount()
+{
+    lvaTrackedCount             = 0;
+    lvaTrackedCountInSizeTUnits = 0;
+
+    if (lvaCount == 0)
+    {
+        return;
+    }
+
+    unsigned   lclNum;
+    LclVarDsc* varDsc;
+
+    LclVarDsc** refTab;
+
+    /* We'll sort the variables by ref count - allocate the sorted table */
+
+    lvaRefSorted = refTab = new (this, CMK_LvaTable) LclVarDsc*[lvaCount];
+
+    /* Fill in the table used for sorting */
+
+    for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+    {
+        /* Append this variable to the table for sorting */
+
+        *refTab++ = varDsc;
+
+        /* If we have JMP, all arguments must have a location
+         * even if we don't use them inside the method */
+
+        if (compJmpOpUsed && varDsc->lvIsParam)
+        {
+            /* ...except when we have varargs and the argument is
+              passed on the stack.  In that case, it's important
+              for the ref count to be zero, so that we don't attempt
+              to track them for GC info (which is not possible since we
+              don't know their offset in the stack).  See the assert at the
+              end of raMarkStkVars and bug #28949 for more info. */
+
+            if (!raIsVarargsStackArg(lclNum))
+            {
+                varDsc->incRefCnts(1, this);
+            }
+        }
+
+        /* For now assume we'll be able to track all locals */
+
+        varDsc->lvTracked = 1;
+
+        /* If the ref count is zero */
+        if (varDsc->lvRefCnt == 0)
+        {
+            /* Zero ref count, make this untracked */
+            varDsc->lvTracked   = 0;
+            varDsc->lvRefCntWtd = 0;
+        }
+
+#if !defined(_TARGET_64BIT_) && !defined(LEGACY_BACKEND)
+        if (varTypeIsLong(varDsc) && varDsc->lvPromoted)
+        {
+            varDsc->lvTracked = 0;
+        }
+#endif // !defined(_TARGET_64BIT_) && !defined(LEGACY_BACKEND)
+
+        // Variables that are address-exposed, and all struct locals, are never enregistered, or tracked.
+        // (The struct may be promoted, and its field variables enregistered/tracked, or the VM may "normalize"
+        // its type so that its not seen by the JIT as a struct.)
+        // Pinned variables may not be tracked (a condition of the GCInfo representation)
+        // or enregistered, on x86 -- it is believed that we can enregister pinned (more properly, "pinning")
+        // references when using the general GC encoding.
+        if (varDsc->lvAddrExposed)
+        {
+            varDsc->lvTracked = 0;
+            assert(varDsc->lvType != TYP_STRUCT ||
+                   varDsc->lvDoNotEnregister); // For structs, should have set this when we set lvAddrExposed.
+        }
+        else if (varTypeIsStruct(varDsc))
+        {
+            // Promoted structs will never be considered for enregistration anyway,
+            // and the DoNotEnregister flag was used to indicate whether promotion was
+            // independent or dependent.
+            if (varDsc->lvPromoted)
+            {
+                varDsc->lvTracked = 0;
+            }
+            else if ((varDsc->lvType == TYP_STRUCT) && !varDsc->lvRegStruct)
+            {
+                lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_IsStruct));
+            }
+        }
+        else if (varDsc->lvIsStructField && (lvaGetParentPromotionType(lclNum) != PROMOTION_TYPE_INDEPENDENT))
+        {
+            // SSA must exclude struct fields that are not independently promoted
+            // as dependent fields could be assigned using a CopyBlock
+            // resulting in a single node causing multiple SSA definitions
+            // which isn't currently supported by SSA
+            //
+            // TODO-CQ:  Consider using lvLclBlockOpAddr and only marking these LclVars
+            // untracked when a blockOp is used to assign the struct.
+            //
+            varDsc->lvTracked = 0; // so, don't mark as tracked
+        }
+        else if (varDsc->lvPinned)
+        {
+            varDsc->lvTracked = 0;
+#ifdef JIT32_GCENCODER
+            lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_PinningRef));
+#endif
+        }
+
+        //  Are we not optimizing and we have exception handlers?
+        //   if so mark all args and locals "do not enregister".
+        //
+        if (opts.MinOpts() && compHndBBtabCount > 0)
+        {
+            lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LiveInOutOfHandler));
+            continue;
+        }
+
+        var_types type = genActualType(varDsc->TypeGet());
+
+        switch (type)
+        {
+#if CPU_HAS_FP_SUPPORT
+            case TYP_FLOAT:
+            case TYP_DOUBLE:
+#endif
+            case TYP_INT:
+            case TYP_LONG:
+            case TYP_REF:
+            case TYP_BYREF:
+#ifdef FEATURE_SIMD
+            case TYP_SIMD8:
+            case TYP_SIMD12:
+            case TYP_SIMD16:
+            case TYP_SIMD32:
+#endif // FEATURE_SIMD
+            case TYP_STRUCT:
+                break;
+
+            case TYP_UNDEF:
+            case TYP_UNKNOWN:
+                noway_assert(!"lvType not set correctly");
+                varDsc->lvType = TYP_INT;
+
+                __fallthrough;
+
+            default:
+                varDsc->lvTracked = 0;
+        }
+    }
+
+    /* Now sort the variable table by ref-count */
+
+    lvaSortOnly();
+
+    /* Decide which variables will be worth tracking */
+
+    if (lvaCount > lclMAX_TRACKED)
+    {
+        /* Mark all variables past the first 'lclMAX_TRACKED' as untracked */
+
+        for (lclNum = lclMAX_TRACKED; lclNum < lvaCount; lclNum++)
+        {
+            lvaRefSorted[lclNum]->lvTracked = 0;
+        }
+    }
+
+#ifdef DEBUG
+    // Re-Initialize to -1 for safety in debug build.
+    memset(lvaTrackedToVarNum, -1, sizeof(lvaTrackedToVarNum));
+#endif
+
+    /* Assign indices to all the variables we've decided to track */
+
+    for (lclNum = 0; lclNum < min(lvaCount, lclMAX_TRACKED); lclNum++)
+    {
+        varDsc = lvaRefSorted[lclNum];
+        if (varDsc->lvTracked)
+        {
+            noway_assert(varDsc->lvRefCnt > 0);
+
+            /* This variable will be tracked - assign it an index */
+
+            lvaTrackedToVarNum[lvaTrackedCount] = (unsigned)(varDsc - lvaTable); // The type of varDsc and lvaTable
+            // is LclVarDsc. Subtraction will give us
+            // the index.
+            varDsc->lvVarIndex = lvaTrackedCount++;
+        }
+    }
+
+    // We have a new epoch, and also cache the tracked var count in terms of size_t's sufficient to hold that many bits.
+    lvaCurEpoch++;
+    lvaTrackedCountInSizeTUnits = unsigned(roundUp(lvaTrackedCount, sizeof(size_t) * 8)) / unsigned(sizeof(size_t) * 8);
+
+#ifdef DEBUG
+    VarSetOps::AssignNoCopy(this, lvaTrackedVars, VarSetOps::MakeFull(this));
+#endif
+}
+
+#if ASSERTION_PROP
+/*****************************************************************************
+ *
+ *  This is called by lvaMarkLclRefs to disqualify a variable from being
+ *  considered by optAddCopies()
+ */
+void LclVarDsc::lvaDisqualifyVar()
+{
+    this->lvDisqualify = true;
+    this->lvSingleDef  = false;
+    this->lvDefStmt    = nullptr;
+}
+#endif // ASSERTION_PROP
+
+#ifndef LEGACY_BACKEND
+/**********************************************************************************
+* Get type of a variable when passed as an argument.
+*/
+var_types LclVarDsc::lvaArgType()
+{
+    var_types type = TypeGet();
+
+#ifdef _TARGET_AMD64_
+    if (type == TYP_STRUCT)
+    {
+        switch (lvExactSize)
+        {
+            case 1:
+                type = TYP_BYTE;
+                break;
+            case 2:
+                type = TYP_SHORT;
+                break;
+            case 4:
+                type = TYP_INT;
+                break;
+            case 8:
+                switch (*lvGcLayout)
+                {
+                    case TYPE_GC_NONE:
+                        type = TYP_I_IMPL;
+                        break;
+
+                    case TYPE_GC_REF:
+                        type = TYP_REF;
+                        break;
+
+                    case TYPE_GC_BYREF:
+                        type = TYP_BYREF;
+                        break;
+
+                    default:
+                        unreached();
+                }
+                break;
+
+            default:
+                type = TYP_BYREF;
+                break;
+        }
+    }
+#elif defined(_TARGET_X86_)
+// Nothing to do; use the type as is.
+#else
+    NYI("lvaArgType");
+#endif //_TARGET_AMD64_
+
+    return type;
+}
+#endif // !LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ *  This is called by lvaMarkLclRefsCallback() to do variable ref marking
+ */
+
+void Compiler::lvaMarkLclRefs(GenTreePtr tree)
+{
+    /* Is this a call to unmanaged code ? */
+    if (tree->gtOper == GT_CALL && tree->gtFlags & GTF_CALL_UNMANAGED)
+    {
+        assert((!opts.ShouldUsePInvokeHelpers()) || (info.compLvFrameListRoot == BAD_VAR_NUM));
+        if (!opts.ShouldUsePInvokeHelpers())
+        {
+            /* Get the special variable descriptor */
+
+            unsigned lclNum = info.compLvFrameListRoot;
+
+            noway_assert(lclNum <= lvaCount);
+            LclVarDsc* varDsc = lvaTable + lclNum;
+
+            /* Increment the ref counts twice */
+            varDsc->incRefCnts(lvaMarkRefsWeight, this);
+            varDsc->incRefCnts(lvaMarkRefsWeight, this);
+        }
+    }
+
+    /* Is this an assigment? */
+
+    if (tree->OperKind() & GTK_ASGOP)
+    {
+        GenTreePtr op1 = tree->gtOp.gtOp1;
+        GenTreePtr op2 = tree->gtOp.gtOp2;
+
+        /* Set target register for RHS local if assignment is of a "small" type */
+
+        if (varTypeIsByte(tree->gtType))
+        {
+            unsigned   lclNum;
+            LclVarDsc* varDsc = nullptr;
+
+            /* GT_CHS is special it doesn't have a valid op2 */
+            if (tree->gtOper == GT_CHS)
+            {
+                if (op1->gtOper == GT_LCL_VAR)
+                {
+                    lclNum = op1->gtLclVarCommon.gtLclNum;
+                    noway_assert(lclNum < lvaCount);
+                    varDsc = &lvaTable[lclNum];
+                }
+            }
+            else
+            {
+                if (op2->gtOper == GT_LCL_VAR)
+                {
+                    lclNum = op2->gtLclVarCommon.gtLclNum;
+                    noway_assert(lclNum < lvaCount);
+                    varDsc = &lvaTable[lclNum];
+                }
+            }
+#if CPU_HAS_BYTE_REGS
+            if (varDsc)
+                varDsc->addPrefReg(RBM_BYTE_REG_FLAG, this);
+#endif
+        }
+
+#if OPT_BOOL_OPS
+
+        /* Is this an assignment to a local variable? */
+
+        if (op1->gtOper == GT_LCL_VAR && op2->gtType != TYP_BOOL)
+        {
+            /* Only simple assignments allowed for booleans */
+
+            if (tree->gtOper != GT_ASG)
+            {
+                goto NOT_BOOL;
+            }
+
+            /* Is the RHS clearly a boolean value? */
+
+            switch (op2->gtOper)
+            {
+                unsigned lclNum;
+
+                case GT_CNS_INT:
+
+                    if (op2->gtIntCon.gtIconVal == 0)
+                    {
+                        break;
+                    }
+                    if (op2->gtIntCon.gtIconVal == 1)
+                    {
+                        break;
+                    }
+
+                    // Not 0 or 1, fall through ....
+                    __fallthrough;
+
+                default:
+
+                    if (op2->OperIsCompare())
+                    {
+                        break;
+                    }
+
+                NOT_BOOL:
+
+                    lclNum = op1->gtLclVarCommon.gtLclNum;
+                    noway_assert(lclNum < lvaCount);
+
+                    lvaTable[lclNum].lvIsBoolean = false;
+                    break;
+            }
+        }
+#endif
+    }
+
+#if FANCY_ARRAY_OPT
+
+    /* Special case: assignment node */
+
+    if (tree->gtOper == GT_ASG)
+    {
+        if (tree->gtType == TYP_INT)
+        {
+            unsigned   lclNum1;
+            LclVarDsc* varDsc1;
+
+            GenTreePtr op1 = tree->gtOp.gtOp1;
+
+            if (op1->gtOper != GT_LCL_VAR)
+                return;
+
+            lclNum1 = op1->gtLclVarCommon.gtLclNum;
+            noway_assert(lclNum1 < lvaCount);
+            varDsc1 = lvaTable + lclNum1;
+
+            if (varDsc1->lvAssignOne)
+                varDsc1->lvAssignTwo = true;
+            else
+                varDsc1->lvAssignOne = true;
+        }
+
+        return;
+    }
+
+#endif
+
+#ifdef _TARGET_XARCH_
+    /* Special case: integer shift node by a variable amount */
+
+    if (tree->OperIsShiftOrRotate())
+    {
+        if (tree->gtType == TYP_INT)
+        {
+            GenTreePtr op2 = tree->gtOp.gtOp2;
+
+            if (op2->gtOper == GT_LCL_VAR)
+            {
+                unsigned lclNum = op2->gtLclVarCommon.gtLclNum;
+                noway_assert(lclNum < lvaCount);
+                lvaTable[lclNum].setPrefReg(REG_ECX, this);
+            }
+        }
+
+        return;
+    }
+#endif
+
+    if ((tree->gtOper != GT_LCL_VAR) && (tree->gtOper != GT_LCL_FLD))
+    {
+        return;
+    }
+
+    /* This must be a local variable reference */
+
+    noway_assert((tree->gtOper == GT_LCL_VAR) || (tree->gtOper == GT_LCL_FLD));
+    unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
+
+    noway_assert(lclNum < lvaCount);
+    LclVarDsc* varDsc = lvaTable + lclNum;
+
+    /* Increment the reference counts */
+
+    varDsc->incRefCnts(lvaMarkRefsWeight, this);
+
+    if (lvaVarAddrExposed(lclNum))
+    {
+        varDsc->lvIsBoolean = false;
+    }
+
+    if (tree->gtOper == GT_LCL_FLD)
+    {
+#if ASSERTION_PROP
+        // variables that have uses inside a GT_LCL_FLD
+        // cause problems, so we will disqualify them here
+        varDsc->lvaDisqualifyVar();
+#endif // ASSERTION_PROP
+        return;
+    }
+
+#if ASSERTION_PROP
+    /* Exclude the normal entry block */
+    if (fgDomsComputed && (lvaMarkRefsCurBlock->bbNum != 1) && lvaMarkRefsCurBlock->bbIDom != nullptr)
+    {
+        // If any entry block except the normal entry block dominates the block, then mark the local with the
+        // lvVolatileHint flag.
+
+        if (BlockSetOps::MayBeUninit(lvaMarkRefsCurBlock->bbDoms))
+        {
+            // Lazy init (If a block is not dominated by any other block, we'll redo this every time, but it'll be fast)
+            BlockSetOps::AssignNoCopy(this, lvaMarkRefsCurBlock->bbDoms, fgGetDominatorSet(lvaMarkRefsCurBlock));
+            BlockSetOps::RemoveElemD(this, lvaMarkRefsCurBlock->bbDoms, fgFirstBB->bbNum);
+        }
+        assert(fgEnterBlksSetValid);
+        if (!BlockSetOps::IsEmptyIntersection(this, lvaMarkRefsCurBlock->bbDoms, fgEnterBlks))
+        {
+            varDsc->lvVolatileHint = 1;
+        }
+    }
+
+    /* Record if the variable has a single def or not */
+
+    if (!varDsc->lvDisqualify) // If this variable is already disqualified we can skip this
+    {
+        if (tree->gtFlags & GTF_VAR_DEF) // Is this is a def of our variable
+        {
+            /*
+               If we have one of these cases:
+                   1.    We have already seen a definition (i.e lvSingleDef is true)
+                   2. or info.CompInitMem is true (thus this would be the second definition)
+                   3. or we have an assignment inside QMARK-COLON trees
+                   4. or we have an update form of assignment (i.e. +=, -=, *=)
+               Then we must disqualify this variable for use in optAddCopies()
+
+               Note that all parameters start out with lvSingleDef set to true
+            */
+            if ((varDsc->lvSingleDef == true) || (info.compInitMem == true) || (tree->gtFlags & GTF_COLON_COND) ||
+                (tree->gtFlags & GTF_VAR_USEASG))
+            {
+                varDsc->lvaDisqualifyVar();
+            }
+            else
+            {
+                varDsc->lvSingleDef = true;
+                varDsc->lvDefStmt   = lvaMarkRefsCurStmt;
+            }
+        }
+        else // otherwise this is a ref of our variable
+        {
+            if (BlockSetOps::MayBeUninit(varDsc->lvRefBlks))
+            {
+                // Lazy initialization
+                BlockSetOps::AssignNoCopy(this, varDsc->lvRefBlks, BlockSetOps::MakeEmpty(this));
+            }
+            BlockSetOps::AddElemD(this, varDsc->lvRefBlks, lvaMarkRefsCurBlock->bbNum);
+        }
+    }
+#endif // ASSERTION_PROP
+
+    bool allowStructs = false;
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+    // On System V the type of the var could be a struct type.
+    allowStructs = varTypeIsStruct(varDsc);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+    /* Variables must be used as the same type throughout the method */
+    noway_assert(tiVerificationNeeded || varDsc->lvType == TYP_UNDEF || tree->gtType == TYP_UNKNOWN || allowStructs ||
+                 genActualType(varDsc->TypeGet()) == genActualType(tree->gtType) ||
+                 (tree->gtType == TYP_BYREF && varDsc->TypeGet() == TYP_I_IMPL) ||
+                 (tree->gtType == TYP_I_IMPL && varDsc->TypeGet() == TYP_BYREF) || (tree->gtFlags & GTF_VAR_CAST) ||
+                 varTypeIsFloating(varDsc->TypeGet()) && varTypeIsFloating(tree->gtType));
+
+    /* Remember the type of the reference */
+
+    if (tree->gtType == TYP_UNKNOWN || varDsc->lvType == TYP_UNDEF)
+    {
+        varDsc->lvType = tree->gtType;
+        noway_assert(genActualType(varDsc->TypeGet()) == tree->gtType); // no truncation
+    }
+
+#ifdef DEBUG
+    if (tree->gtFlags & GTF_VAR_CAST)
+    {
+        // it should never be bigger than the variable slot
+
+        // Trees don't store the full information about structs
+        // so we can't check them.
+        if (tree->TypeGet() != TYP_STRUCT)
+        {
+            unsigned treeSize = genTypeSize(tree->TypeGet());
+            unsigned varSize  = genTypeSize(varDsc->TypeGet());
+            if (varDsc->TypeGet() == TYP_STRUCT)
+            {
+                varSize = varDsc->lvSize();
+            }
+
+            assert(treeSize <= varSize);
+        }
+    }
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Helper passed to Compiler::fgWalkTreePre() to do variable ref marking.
+ */
+
+/* static */
+Compiler::fgWalkResult Compiler::lvaMarkLclRefsCallback(GenTreePtr* pTree, fgWalkData* data)
+{
+    data->compiler->lvaMarkLclRefs(*pTree);
+
+    return WALK_CONTINUE;
+}
+
+/*****************************************************************************
+ *
+ *  Update the local variable reference counts for one basic block
+ */
+
+void Compiler::lvaMarkLocalVars(BasicBlock* block)
+{
+#if ASSERTION_PROP
+    lvaMarkRefsCurBlock = block;
+#endif
+    lvaMarkRefsWeight = block->getBBWeight(this);
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\n*** marking local variables in block BB%02u (weight=%s)\n", block->bbNum,
+               refCntWtd2str(lvaMarkRefsWeight));
+    }
+#endif
+
+    for (GenTreePtr tree = block->FirstNonPhiDef(); tree; tree = tree->gtNext)
+    {
+        noway_assert(tree->gtOper == GT_STMT);
+
+#if ASSERTION_PROP
+        lvaMarkRefsCurStmt = tree;
+#endif
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            gtDispTree(tree);
+        }
+#endif
+
+        fgWalkTreePre(&tree->gtStmt.gtStmtExpr, Compiler::lvaMarkLclRefsCallback, (void*)this, false);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Create the local variable table and compute local variable reference
+ *  counts.
+ */
+
+void Compiler::lvaMarkLocalVars()
+{
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\n*************** In lvaMarkLocalVars()");
+    }
+#endif
+
+    /* If there is a call to an unmanaged target, we already grabbed a
+       local slot for the current thread control block.
+     */
+
+    if (info.compCallUnmanaged != 0)
+    {
+        assert((!opts.ShouldUsePInvokeHelpers()) || (info.compLvFrameListRoot == BAD_VAR_NUM));
+        if (!opts.ShouldUsePInvokeHelpers())
+        {
+            noway_assert(info.compLvFrameListRoot >= info.compLocalsCount && info.compLvFrameListRoot < lvaCount);
+
+            lvaTable[info.compLvFrameListRoot].lvType = TYP_I_IMPL;
+
+            /* Set the refCnt, it is used in the prolog and return block(s) */
+
+            lvaTable[info.compLvFrameListRoot].lvRefCnt    = 2;
+            lvaTable[info.compLvFrameListRoot].lvRefCntWtd = 2 * BB_UNITY_WEIGHT;
+        }
+    }
+
+    lvaAllocOutgoingArgSpace();
+
+#if !FEATURE_EH_FUNCLETS
+
+    // Grab space for exception handling
+
+    if (ehNeedsShadowSPslots())
+    {
+        // The first slot is reserved for ICodeManager::FixContext(ppEndRegion)
+        // ie. the offset of the end-of-last-executed-filter
+        unsigned slotsNeeded = 1;
+
+        unsigned handlerNestingLevel = ehMaxHndNestingCount;
+
+        if (opts.compDbgEnC && (handlerNestingLevel < (unsigned)MAX_EnC_HANDLER_NESTING_LEVEL))
+            handlerNestingLevel = (unsigned)MAX_EnC_HANDLER_NESTING_LEVEL;
+
+        slotsNeeded += handlerNestingLevel;
+
+        // For a filter (which can be active at the same time as a catch/finally handler)
+        slotsNeeded++;
+        // For zero-termination of the shadow-Stack-pointer chain
+        slotsNeeded++;
+
+        lvaShadowSPslotsVar           = lvaGrabTempWithImplicitUse(false DEBUGARG("lvaShadowSPslotsVar"));
+        LclVarDsc* shadowSPslotsVar   = &lvaTable[lvaShadowSPslotsVar];
+        shadowSPslotsVar->lvType      = TYP_BLK;
+        shadowSPslotsVar->lvExactSize = (slotsNeeded * TARGET_POINTER_SIZE);
+    }
+
+#endif // !FEATURE_EH_FUNCLETS
+
+#if FEATURE_EH_FUNCLETS
+    if (ehNeedsPSPSym())
+    {
+        lvaPSPSym            = lvaGrabTempWithImplicitUse(false DEBUGARG("PSPSym"));
+        LclVarDsc* lclPSPSym = &lvaTable[lvaPSPSym];
+        lclPSPSym->lvType    = TYP_I_IMPL;
+    }
+#endif // FEATURE_EH_FUNCLETS
+
+    if (compLocallocUsed)
+    {
+        lvaLocAllocSPvar         = lvaGrabTempWithImplicitUse(false DEBUGARG("LocAllocSPvar"));
+        LclVarDsc* locAllocSPvar = &lvaTable[lvaLocAllocSPvar];
+        locAllocSPvar->lvType    = TYP_I_IMPL;
+    }
+
+    BasicBlock* block;
+
+#if defined(DEBUGGING_SUPPORT) || defined(DEBUG)
+
+#ifndef DEBUG
+    // Assign slot numbers to all variables.
+    // If compiler generated local variables, slot numbers will be
+    // invalid (out of range of info.compVarScopes).
+
+    // Also have to check if variable was not reallocated to another
+    // slot in which case we have to register the original slot #.
+
+    // We don't need to do this for IL, but this keeps lvSlotNum consistent.
+
+    if (opts.compScopeInfo && (info.compVarScopesCount > 0))
+#endif
+    {
+        unsigned   lclNum;
+        LclVarDsc* varDsc;
+
+        for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+        {
+            varDsc->lvSlotNum = lclNum;
+        }
+    }
+
+#endif // defined(DEBUGGING_SUPPORT) || defined(DEBUG)
+
+    /* Mark all local variable references */
+
+    lvaRefCountingStarted = true;
+    for (block = fgFirstBB; block; block = block->bbNext)
+    {
+        lvaMarkLocalVars(block);
+    }
+
+    /*  For incoming register arguments, if there are references in the body
+     *  then we will have to copy them to the final home in the prolog
+     *  This counts as an extra reference with a weight of 2
+     */
+
+    unsigned   lclNum;
+    LclVarDsc* varDsc;
+
+    for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+    {
+        if (lclNum >= info.compArgsCount)
+        {
+            break; // early exit for loop
+        }
+
+        if ((varDsc->lvIsRegArg) && (varDsc->lvRefCnt > 0))
+        {
+            // Fix 388376 ARM JitStress WP7
+            varDsc->incRefCnts(BB_UNITY_WEIGHT, this);
+            varDsc->incRefCnts(BB_UNITY_WEIGHT, this);
+        }
+    }
+
+#if ASSERTION_PROP
+    if (!opts.MinOpts() && !opts.compDbgCode)
+    {
+        // Note: optAddCopies() depends on lvaRefBlks, which is set in lvaMarkLocalVars(BasicBlock*), called above.
+        optAddCopies();
+    }
+#endif
+
+    if (lvaKeepAliveAndReportThis() && lvaTable[0].lvRefCnt == 0)
+    {
+        lvaTable[0].lvRefCnt = 1;
+        // This isn't strictly needed as we will make a copy of the param-type-arg
+        // in the prolog. However, this ensures that the LclVarDsc corresponding to
+        // info.compTypeCtxtArg is valid.
+    }
+    else if (lvaReportParamTypeArg() && lvaTable[info.compTypeCtxtArg].lvRefCnt == 0)
+    {
+        lvaTable[info.compTypeCtxtArg].lvRefCnt = 1;
+    }
+
+    lvaLocalVarRefCounted = true;
+    lvaRefCountingStarted = false;
+
+    lvaSortByRefCount();
+}
+
+void Compiler::lvaAllocOutgoingArgSpace()
+{
+#if FEATURE_FIXED_OUT_ARGS
+
+    // Setup the outgoing argument region, in case we end up using it later
+
+    if (lvaOutgoingArgSpaceVar == BAD_VAR_NUM)
+    {
+        lvaOutgoingArgSpaceVar = lvaGrabTemp(false DEBUGARG("OutgoingArgSpace"));
+
+        lvaTable[lvaOutgoingArgSpaceVar].lvType = TYP_LCLBLK;
+
+        /* Set the refCnts */
+
+        lvaTable[lvaOutgoingArgSpaceVar].lvRefCnt    = 1;
+        lvaTable[lvaOutgoingArgSpaceVar].lvRefCntWtd = BB_UNITY_WEIGHT;
+
+        if (lvaOutgoingArgSpaceSize == 0)
+        {
+            if (compUsesThrowHelper || compIsProfilerHookNeeded())
+            {
+                // Need to make sure the MIN_ARG_AREA_FOR_CALL space is added to the frame if:
+                // 1. there are calls to THROW_HEPLPER methods.
+                // 2. we are generating profiling Enter/Leave/TailCall hooks. This will ensure
+                //    that even methods without any calls will have outgoing arg area space allocated.
+                //
+                // An example for these two cases is Windows Amd64, where the ABI requires to have 4 slots for
+                // the outgoing arg space if the method makes any calls.
+                lvaOutgoingArgSpaceSize = MIN_ARG_AREA_FOR_CALL;
+            }
+        }
+    }
+
+    noway_assert(lvaOutgoingArgSpaceVar >= info.compLocalsCount && lvaOutgoingArgSpaceVar < lvaCount);
+
+#endif // FEATURE_FIXED_OUT_ARGS
+}
+
+inline void Compiler::lvaIncrementFrameSize(unsigned size)
+{
+    if (size > MAX_FrameSize || compLclFrameSize + size > MAX_FrameSize)
+    {
+        BADCODE("Frame size overflow");
+    }
+
+    compLclFrameSize += size;
+}
+
+/****************************************************************************
+*
+*  Return true if absolute offsets of temps are larger than vars, or in other
+*  words, did we allocate temps before of after vars.  The /GS buffer overrun
+*  checks want temps to be at low stack addresses than buffers
+*/
+bool Compiler::lvaTempsHaveLargerOffsetThanVars()
+{
+#ifdef _TARGET_ARM_
+    // We never want to place the temps with larger offsets for ARM
+    return false;
+#else
+    if (compGSReorderStackLayout)
+    {
+        return codeGen->isFramePointerUsed();
+    }
+    else
+    {
+        return true;
+    }
+#endif
+}
+
+/****************************************************************************
+*
+*  Return an upper bound estimate for the size of the compiler spill temps
+*
+*/
+unsigned Compiler::lvaGetMaxSpillTempSize()
+{
+    unsigned result = 0;
+
+#ifndef LEGACY_BACKEND
+    if (lvaDoneFrameLayout >= REGALLOC_FRAME_LAYOUT)
+    {
+        result = tmpSize;
+    }
+    else
+    {
+        result = MAX_SPILL_TEMP_SIZE;
+    }
+#else // LEGACY_BACKEND
+    if (lvaDoneFrameLayout >= FINAL_FRAME_LAYOUT)
+    {
+        result = tmpSize;
+    }
+    else
+    {
+        if (lvaDoneFrameLayout >= REGALLOC_FRAME_LAYOUT)
+        {
+            unsigned maxTmpSize = sizeof(double) + sizeof(int);
+
+            maxTmpSize += (tmpDoubleSpillMax * sizeof(double)) + (tmpIntSpillMax * sizeof(int));
+
+            result = maxTmpSize;
+        }
+        else
+        {
+            result = MAX_SPILL_TEMP_SIZE;
+        }
+#ifdef DEBUG
+        // When StressRegs is >=1, there can  be a bunch of spills that are not
+        // predicted by the predictor (see logic in rsPickReg).  It is very hard
+        // to teach the predictor about the behavior of rsPickReg for StressRegs >= 1,
+        // so instead let's make MaxTmpSize large enough so that we won't be wrong.
+
+        if (codeGen->regSet.rsStressRegs() >= 1)
+        {
+            result += (REG_TMP_ORDER_COUNT * REGSIZE_BYTES);
+        }
+#endif // DEBUG
+    }
+#endif // LEGACY_BACKEND
+    return result;
+}
+
+// clang-format off
+/*****************************************************************************
+ *
+ *  Compute stack frame offsets for arguments, locals and optionally temps.
+ *
+ *  The frame is laid out as follows for x86:
+ *
+ *              ESP frames                
+ *
+ *      |                       |         
+ *      |-----------------------|         
+ *      |       incoming        |         
+ *      |       arguments       |         
+ *      |-----------------------| <---- Virtual '0'         
+ *      |    return address     |         
+ *      +=======================+
+ *      |Callee saved registers |         
+ *      |-----------------------|         
+ *      |       Temps           |         
+ *      |-----------------------|         
+ *      |       Variables       |         
+ *      |-----------------------| <---- Ambient ESP
+ *      |   Arguments for the   |         
+ *      ~    next function      ~ 
+ *      |                       |         
+ *      |       |               |         
+ *      |       | Stack grows   |         
+ *              | downward                
+ *              V                         
+ *
+ *
+ *              EBP frames
+ *
+ *      |                       |
+ *      |-----------------------|
+ *      |       incoming        |
+ *      |       arguments       |
+ *      |-----------------------| <---- Virtual '0'         
+ *      |    return address     |         
+ *      +=======================+
+ *      |    incoming EBP       |
+ *      |-----------------------| <---- EBP
+ *      |Callee saved registers |         
+ *      |-----------------------|         
+ *      |   security object     |
+ *      |-----------------------|
+ *      |     ParamTypeArg      |
+ *      |-----------------------|
+ *      |  Last-executed-filter |
+ *      |-----------------------|
+ *      |                       |
+ *      ~      Shadow SPs       ~
+ *      |                       |
+ *      |-----------------------|
+ *      |                       |
+ *      ~      Variables        ~
+ *      |                       |
+ *      ~-----------------------|
+ *      |       Temps           |
+ *      |-----------------------|
+ *      |       localloc        |
+ *      |-----------------------| <---- Ambient ESP
+ *      |   Arguments for the   |
+ *      |    next function      ~
+ *      |                       |
+ *      |       |               |
+ *      |       | Stack grows   |
+ *              | downward
+ *              V
+ *
+ *
+ *  The frame is laid out as follows for x64:
+ *
+ *              RSP frames                
+ *      |                       |         
+ *      |-----------------------|         
+ *      |       incoming        |         
+ *      |       arguments       |         
+ *      |-----------------------|         
+ *      |   4 fixed incoming    |         
+ *      |    argument slots     |         
+ *      |-----------------------| <---- Caller's SP & Virtual '0'
+ *      |    return address     |         
+ *      +=======================+
+ *      | Callee saved Int regs |  
+ *      -------------------------
+ *      |        Padding        | <---- this padding (0 or 8 bytes) is to ensure flt registers are saved at a mem location aligned at 16-bytes
+ *      |                       |       so that we can save 128-bit callee saved xmm regs using performant "movaps" instruction instead of "movups"
+ *      -------------------------
+ *      | Callee saved Flt regs | <----- entire 128-bits of callee saved xmm registers are stored here
+ *      |-----------------------|         
+ *      |         Temps         |         
+ *      |-----------------------|         
+ *      |       Variables       |         
+ *      |-----------------------|
+ *      |   Arguments for the   |         
+ *      ~    next function      ~ 
+ *      |                       |         
+ *      |-----------------------|         
+ *      |   4 fixed outgoing    |         
+ *      |    argument slots     |         
+ *      |-----------------------| <---- Ambient RSP
+ *      |       |               |         
+ *      ~       | Stack grows   ~         
+ *      |       | downward      |         
+ *              V                         
+ *
+ *
+ *              RBP frames
+ *      |                       |
+ *      |-----------------------|
+ *      |       incoming        |
+ *      |       arguments       |
+ *      |-----------------------|         
+ *      |   4 fixed incoming    |         
+ *      |    argument slots     |         
+ *      |-----------------------| <---- Caller's SP & Virtual '0'
+ *      |    return address     |         
+ *      +=======================+
+ *      | Callee saved Int regs |         
+ *      -------------------------
+ *      |        Padding        | 
+ *      -------------------------
+ *      | Callee saved Flt regs | 
+ *      |-----------------------|         
+ *      |   security object     |
+ *      |-----------------------|
+ *      |     ParamTypeArg      |
+ *      |-----------------------|
+ *      |                       |
+ *      |                       | 
+ *      ~       Variables       ~
+ *      |                       | 
+ *      |                       |
+ *      |-----------------------|
+ *      |        Temps          |
+ *      |-----------------------| 
+ *      |                       |
+ *      ~       localloc        ~   // not in frames with EH
+ *      |                       |
+ *      |-----------------------|
+ *      |        PSPSym         |   // only in frames with EH (thus no localloc)
+ *      |                       |
+ *      |-----------------------| <---- RBP in localloc frames (max 240 bytes from Initial-SP)
+ *      |   Arguments for the   |         
+ *      ~    next function      ~ 
+ *      |                       |         
+ *      |-----------------------| 
+ *      |   4 fixed outgoing    |         
+ *      |    argument slots     |         
+ *      |-----------------------| <---- Ambient RSP (before localloc, this is Initial-SP)
+ *      |       |               |         
+ *      ~       | Stack grows   ~         
+ *      |       | downward      |         
+ *              V
+ *
+ *
+ *  The frame is laid out as follows for ARM (this is a general picture; details may differ for different conditions):
+ *
+ *              SP frames                
+ *      |                       |         
+ *      |-----------------------|         
+ *      |       incoming        |         
+ *      |       arguments       |         
+ *      +=======================+ <---- Caller's SP
+ *      |  Pre-spill registers  |         
+ *      |-----------------------| <---- Virtual '0'
+ *      |Callee saved registers |         
+ *      |-----------------------|         
+ *      ~ possible double align ~
+ *      |-----------------------|
+ *      |   security object     |
+ *      |-----------------------|
+ *      |     ParamTypeArg      |
+ *      |-----------------------|
+ *      |  possible GS cookie   |
+ *      |-----------------------|
+ *      |       Variables       |
+ *      |-----------------------|
+ *      |  possible GS cookie   |
+ *      |-----------------------|
+ *      |        Temps          |
+ *      |-----------------------|
+ *      |   Stub Argument Var   |
+ *      |-----------------------|
+ *      |Inlined PInvoke Frame V|
+ *      |-----------------------|
+ *      ~ possible double align ~
+ *      |-----------------------|
+ *      |   Arguments for the   |         
+ *      ~    next function      ~ 
+ *      |                       |         
+ *      |-----------------------| <---- Ambient SP
+ *      |       |               |         
+ *      ~       | Stack grows   ~         
+ *      |       | downward      |         
+ *              V
+ *
+ *
+ *              FP / R11 frames
+ *      |                       |
+ *      |-----------------------|
+ *      |       incoming        |
+ *      |       arguments       |
+ *      +=======================+ <---- Caller's SP
+ *      |  Pre-spill registers  |         
+ *      |-----------------------| <---- Virtual '0'
+ *      |Callee saved registers |         
+ *      |-----------------------|         
+ *      |        PSPSym         |   // Only for frames with EH, which means FP-based frames
+ *      |-----------------------|
+ *      ~ possible double align ~
+ *      |-----------------------|
+ *      |   security object     |
+ *      |-----------------------|
+ *      |     ParamTypeArg      |
+ *      |-----------------------|
+ *      |  possible GS cookie   |
+ *      |-----------------------|
+ *      |       Variables       |
+ *      |-----------------------|
+ *      |  possible GS cookie   |
+ *      |-----------------------|
+ *      |        Temps          |
+ *      |-----------------------|
+ *      |   Stub Argument Var   |
+ *      |-----------------------|
+ *      |Inlined PInvoke Frame V|
+ *      |-----------------------|
+ *      ~ possible double align ~
+ *      |-----------------------|
+ *      |       localloc        |
+ *      |-----------------------|
+ *      |   Arguments for the   |         
+ *      ~    next function      ~ 
+ *      |                       |         
+ *      |-----------------------| <---- Ambient SP
+ *      |       |               |         
+ *      ~       | Stack grows   ~         
+ *      |       | downward      |         
+ *              V
+ *
+ *
+ *  The frame is laid out as follows for ARM64 (this is a general picture; details may differ for different conditions):
+ *  TODO-ARM64-NYI: this is preliminary (copied from ARM and modified), and needs to be reviewed.
+ *  NOTE: SP must be 16-byte aligned, so there may be alignment slots in the frame.
+ *  We will often save and establish a frame pointer to create better ETW stack walks.
+ *
+ *              SP frames                
+ *      |                       |         
+ *      |-----------------------|         
+ *      |       incoming        |         
+ *      |       arguments       |         
+ *      +=======================+ <---- Caller's SP
+ *      |         homed         | // this is only needed if reg argument need to be homed, e.g., for varargs
+ *      |   register arguments  |         
+ *      |-----------------------| <---- Virtual '0'
+ *      |Callee saved registers |
+ *      |   except fp/lr        |         
+ *      |-----------------------|
+ *      |   security object     |
+ *      |-----------------------|
+ *      |     ParamTypeArg      |
+ *      |-----------------------|
+ *      |  possible GS cookie   |
+ *      |-----------------------|
+ *      |       Variables       |
+ *      |-----------------------|
+ *      |  possible GS cookie   |
+ *      |-----------------------|
+ *      |        Temps          |
+ *      |-----------------------|
+ *      |   Stub Argument Var   |
+ *      |-----------------------|
+ *      |Inlined PInvoke Frame V|
+ *      |-----------------------|
+ *      |      Saved LR         |
+ *      |-----------------------|
+ *      |      Saved FP         | <---- Frame pointer
+ *      |-----------------------|         
+ *      |  Stack arguments for  |
+ *      |   the next function   |
+ *      |-----------------------| <---- SP
+ *      |       |               |         
+ *      ~       | Stack grows   ~         
+ *      |       | downward      |         
+ *              V
+ *
+ *
+ *              FP (R29 / x29) frames
+ *      |                       |
+ *      |-----------------------|
+ *      |       incoming        |
+ *      |       arguments       |
+ *      +=======================+ <---- Caller's SP
+ *      |     optional homed    | // this is only needed if reg argument need to be homed, e.g., for varargs
+ *      |   register arguments  |         
+ *      |-----------------------| <---- Virtual '0'         
+ *      |Callee saved registers |
+ *      |   except fp/lr        |         
+ *      |-----------------------|
+ *      |        PSPSym         | // Only for frames with EH, which requires FP-based frames
+ *      |-----------------------|
+ *      |   security object     |
+ *      |-----------------------|
+ *      |     ParamTypeArg      |
+ *      |-----------------------|
+ *      |  possible GS cookie   |
+ *      |-----------------------|
+ *      |       Variables       |
+ *      |-----------------------|
+ *      |  possible GS cookie   |
+ *      |-----------------------|
+ *      |        Temps          |
+ *      |-----------------------|
+ *      |   Stub Argument Var   |
+ *      |-----------------------|
+ *      |Inlined PInvoke Frame V|
+ *      |-----------------------|
+ *      |      Saved LR         |
+ *      |-----------------------|
+ *      |      Saved FP         | <---- Frame pointer
+ *      |-----------------------|
+ *      ~       localloc        ~
+ *      |-----------------------|
+ *      |  Stack arguments for  |
+ *      |   the next function   |
+ *      |-----------------------| <---- Ambient SP
+ *      |       |               |         
+ *      ~       | Stack grows   ~         
+ *      |       | downward      |         
+ *              V
+ *
+ *
+ *  Doing this all in one pass is 'hard'.  So instead we do it in 2 basic passes:
+ *    1. Assign all the offsets relative to the Virtual '0'. Offsets above (the
+ *      incoming arguments) are positive. Offsets below (everything else) are
+ *      negative.  This pass also calcuates the total frame size (between Caller's
+ *      SP/return address and the Ambient SP).
+ *    2. Figure out where to place the frame pointer, and then adjust the offsets
+ *      as needed for the final stack size and whether the offset is frame pointer
+ *      relative or stack pointer relative.
+ *
+ */
+// clang-format on
+
+void Compiler::lvaAssignFrameOffsets(FrameLayoutState curState)
+{
+    noway_assert(lvaDoneFrameLayout < curState);
+
+    lvaDoneFrameLayout = curState;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+
+        printf("*************** In lvaAssignFrameOffsets");
+        if (curState == INITIAL_FRAME_LAYOUT)
+        {
+            printf("(INITIAL_FRAME_LAYOUT)");
+        }
+        else if (curState == PRE_REGALLOC_FRAME_LAYOUT)
+        {
+            printf("(PRE_REGALLOC_FRAME_LAYOUT)");
+        }
+        else if (curState == REGALLOC_FRAME_LAYOUT)
+        {
+            printf("(REGALLOC_FRAME_LAYOUT)");
+        }
+        else if (curState == TENTATIVE_FRAME_LAYOUT)
+        {
+            printf("(TENTATIVE_FRAME_LAYOUT)");
+        }
+        else if (curState == FINAL_FRAME_LAYOUT)
+        {
+            printf("(FINAL_FRAME_LAYOUT)");
+        }
+        else
+        {
+            printf("(UNKNOWN)");
+            unreached();
+        }
+        printf("\n");
+    }
+#endif
+
+#if FEATURE_FIXED_OUT_ARGS
+    assert(lvaOutgoingArgSpaceVar != BAD_VAR_NUM);
+#endif // FEATURE_FIXED_OUT_ARGS
+
+    /*-------------------------------------------------------------------------
+     *
+     * First process the arguments.
+     *
+     *-------------------------------------------------------------------------
+     */
+
+    lvaAssignVirtualFrameOffsetsToArgs();
+
+    /*-------------------------------------------------------------------------
+     *
+     * Now compute stack offsets for any variables that don't live in registers
+     *
+     *-------------------------------------------------------------------------
+     */
+
+    lvaAssignVirtualFrameOffsetsToLocals();
+
+    lvaAlignFrame();
+
+    /*-------------------------------------------------------------------------
+     *
+     * Now patch the offsets
+     *
+     *-------------------------------------------------------------------------
+     */
+
+    lvaFixVirtualFrameOffsets();
+
+    // Modify the stack offset for fields of promoted structs.
+    lvaAssignFrameOffsetsToPromotedStructs();
+
+    /*-------------------------------------------------------------------------
+     *
+     * Finalize
+     *
+     *-------------------------------------------------------------------------
+     */
+
+    // If it's not the final frame layout, then it's just an estimate. This means
+    // we're allowed to once again write to these variables, even if we've read
+    // from them to make tentative code generation or frame layout decisions.
+    if (curState < FINAL_FRAME_LAYOUT)
+    {
+        codeGen->resetFramePointerUsedWritePhase();
+    }
+}
+
+/*****************************************************************************
+ *  lvaFixVirtualFrameOffsets() : Now that everything has a virtual offset,
+ *  determine the final value for the frame pointer (if needed) and then
+ *  adjust all the offsets appropriately.
+ *
+ *  This routine fixes virtual offset to be relative to frame pointer or SP
+ *  based on whether varDsc->lvFramePointerBased is true or false respectively.
+ */
+void Compiler::lvaFixVirtualFrameOffsets()
+{
+    LclVarDsc* varDsc;
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_AMD64_)
+    if (ehNeedsPSPSym())
+    {
+        // We need to fix the offset of the PSPSym so there is no padding between it and the outgoing argument space.
+        // Without this code, lvaAlignFrame might have put the padding lower than the PSPSym, which would be between
+        // the PSPSym and the outgoing argument space.
+        assert(lvaPSPSym != BAD_VAR_NUM);
+        varDsc = &lvaTable[lvaPSPSym];
+        assert(varDsc->lvFramePointerBased); // We always access it RBP-relative.
+        assert(!varDsc->lvMustInit);         // It is never "must init".
+        varDsc->lvStkOffs = codeGen->genCallerSPtoInitialSPdelta() + lvaLclSize(lvaOutgoingArgSpaceVar);
+    }
+#endif
+
+    // The delta to be added to virtual offset to adjust it relative to frame pointer or SP
+    int delta = 0;
+
+#ifdef _TARGET_XARCH_
+    delta += REGSIZE_BYTES; // pushed PC (return address) for x86/x64
+
+    if (codeGen->doubleAlignOrFramePointerUsed())
+    {
+        delta += REGSIZE_BYTES; // pushed EBP (frame pointer)
+    }
+#endif
+
+    if (!codeGen->isFramePointerUsed())
+    {
+        // pushed registers, return address, and padding
+        delta += codeGen->genTotalFrameSize();
+    }
+#if defined(_TARGET_ARM_)
+    else
+    {
+        // We set FP to be after LR, FP
+        delta += 2 * REGSIZE_BYTES;
+    }
+#elif defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+    else
+    {
+        // FP is used.
+        delta += codeGen->genTotalFrameSize() - codeGen->genSPtoFPdelta();
+    }
+#endif //_TARGET_AMD64_
+
+    unsigned lclNum;
+    for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+    {
+        bool doAssignStkOffs = true;
+
+        // Can't be relative to EBP unless we have an EBP
+        noway_assert(!varDsc->lvFramePointerBased || codeGen->doubleAlignOrFramePointerUsed());
+
+        // Is this a non-param promoted struct field?
+        //   if so then set doAssignStkOffs to false.
+        //
+        if (varDsc->lvIsStructField && !varDsc->lvIsParam)
+        {
+            LclVarDsc*       parentvarDsc  = &lvaTable[varDsc->lvParentLcl];
+            lvaPromotionType promotionType = lvaGetPromotionType(parentvarDsc);
+
+            if (promotionType == PROMOTION_TYPE_DEPENDENT)
+            {
+                doAssignStkOffs = false; // Assigned later in lvaAssignFrameOffsetsToPromotedStructs()
+            }
+        }
+
+        if (!varDsc->lvOnFrame)
+        {
+            if (!varDsc->lvIsParam
+#if !defined(_TARGET_AMD64_)
+                || (varDsc->lvIsRegArg
+#if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
+                    && compIsProfilerHookNeeded() &&
+                    !lvaIsPreSpilled(lclNum, codeGen->regSet.rsMaskPreSpillRegs(false)) // We need assign stack offsets
+                                                                                        // for prespilled arguments
+#endif
+                    )
+#endif // !defined(_TARGET_AMD64_)
+                    )
+            {
+                doAssignStkOffs = false; // Not on frame or an incomming stack arg
+            }
+        }
+
+        if (doAssignStkOffs)
+        {
+            varDsc->lvStkOffs += delta;
+
+#if DOUBLE_ALIGN
+            if (genDoubleAlign() && !codeGen->isFramePointerUsed())
+            {
+                if (varDsc->lvFramePointerBased)
+                {
+                    varDsc->lvStkOffs -= delta;
+
+                    // We need to re-adjust the offsets of the parameters so they are EBP
+                    // relative rather than stack/frame pointer relative
+
+                    varDsc->lvStkOffs += (2 * sizeof(void*)); // return address and pushed EBP
+
+                    noway_assert(varDsc->lvStkOffs >= FIRST_ARG_STACK_OFFS);
+                }
+            }
+#endif
+            // On System V environments the stkOffs could be 0 for params passed in registers.
+            assert(codeGen->isFramePointerUsed() ||
+                   varDsc->lvStkOffs >= 0); // Only EBP relative references can have negative offsets
+        }
+    }
+
+    assert(tmpAllFree());
+    for (TempDsc* temp = tmpListBeg(); temp != nullptr; temp = tmpListNxt(temp))
+    {
+        temp->tdAdjustTempOffs(delta);
+    }
+
+    lvaCachedGenericContextArgOffs += delta;
+
+#if FEATURE_FIXED_OUT_ARGS
+
+    if (lvaOutgoingArgSpaceVar != BAD_VAR_NUM)
+    {
+        varDsc                      = &lvaTable[lvaOutgoingArgSpaceVar];
+        varDsc->lvStkOffs           = 0;
+        varDsc->lvFramePointerBased = false;
+        varDsc->lvMustInit          = false;
+    }
+
+#endif // FEATURE_FIXED_OUT_ARGS
+}
+
+#ifdef _TARGET_ARM_
+bool Compiler::lvaIsPreSpilled(unsigned lclNum, regMaskTP preSpillMask)
+{
+    const LclVarDsc& desc = lvaTable[lclNum];
+    return desc.lvIsRegArg && (preSpillMask & genRegMask(desc.lvArgReg));
+}
+#endif // _TARGET_ARM_
+
+#ifndef LEGACY_BACKEND
+/*****************************************************************************
+ *  lvaUpdateArgsWithInitialReg() : For each argument variable descriptor, update
+ *  its current register with the initial register as assigned by LSRA.
+ */
+void Compiler::lvaUpdateArgsWithInitialReg()
+{
+    if (!compLSRADone)
+    {
+        return;
+    }
+
+    for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
+    {
+        LclVarDsc* varDsc = lvaTable + lclNum;
+
+        if (varDsc->lvPromotedStruct())
+        {
+            noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+
+            unsigned fieldVarNum = varDsc->lvFieldLclStart;
+            varDsc               = lvaTable + fieldVarNum;
+        }
+
+        noway_assert(varDsc->lvIsParam);
+
+        if (varDsc->lvIsRegCandidate())
+        {
+            if (varTypeIsMultiReg(varDsc))
+            {
+                regPairNo initialRegPair = varDsc->lvArgInitRegPair;
+                varDsc->lvRegNum         = genRegPairLo(initialRegPair);
+                varDsc->lvOtherReg       = genRegPairHi(initialRegPair);
+            }
+            else
+            {
+                varDsc->lvRegNum = varDsc->lvArgInitReg;
+            }
+        }
+    }
+}
+#endif // !LEGACY_BACKEND
+
+/*****************************************************************************
+ *  lvaAssignVirtualFrameOffsetsToArgs() : Assign virtual stack offsets to the
+ *  arguments, and implicit arguments (this ptr, return buffer, generics,
+ *  and varargs).
+ */
+void Compiler::lvaAssignVirtualFrameOffsetsToArgs()
+{
+    unsigned lclNum  = 0;
+    int      argOffs = 0;
+#ifdef UNIX_AMD64_ABI
+    int callerArgOffset = 0;
+#endif // UNIX_AMD64_ABI
+
+    /*
+        Assign stack offsets to arguments (in reverse order of passing).
+
+        This means that if we pass arguments left->right, we start at
+        the end of the list and work backwards, for right->left we start
+        with the first argument and move forward.
+
+        This is all relative to our Virtual '0'
+     */
+
+    if (Target::g_tgtArgOrder == Target::ARG_ORDER_L2R)
+    {
+        argOffs = compArgSize;
+    }
+
+    /* Update the argOffs to reflect arguments that are passed in registers */
+
+    noway_assert(codeGen->intRegState.rsCalleeRegArgCount <= MAX_REG_ARG);
+    noway_assert(compArgSize >= codeGen->intRegState.rsCalleeRegArgCount * sizeof(void*));
+
+#ifdef _TARGET_X86_
+    argOffs -= codeGen->intRegState.rsCalleeRegArgCount * sizeof(void*);
+#endif
+
+#ifndef LEGACY_BACKEND
+    // Update the arg initial register locations.
+    lvaUpdateArgsWithInitialReg();
+#endif // !LEGACY_BACKEND
+
+    /* Is there a "this" argument? */
+
+    if (!info.compIsStatic)
+    {
+        noway_assert(lclNum == info.compThisArg);
+#ifndef _TARGET_X86_
+        argOffs =
+            lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
+#endif // _TARGET_X86_
+        lclNum++;
+    }
+
+    /* if we have a hidden buffer parameter, that comes here */
+
+    if (info.compRetBuffArg != BAD_VAR_NUM)
+    {
+        noway_assert(lclNum == info.compRetBuffArg);
+        noway_assert(lvaTable[lclNum].lvIsRegArg);
+#ifndef _TARGET_X86_
+        argOffs =
+            lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
+#endif // _TARGET_X86_
+        lclNum++;
+    }
+
+#if USER_ARGS_COME_LAST
+
+    //@GENERICS: extra argument for instantiation info
+    if (info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE)
+    {
+        noway_assert(lclNum == (unsigned)info.compTypeCtxtArg);
+        argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void*),
+                                                   argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
+    }
+
+    if (info.compIsVarArgs)
+    {
+        argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void*),
+                                                   argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
+    }
+
+#endif // USER_ARGS_COME_LAST
+
+    CORINFO_ARG_LIST_HANDLE argLst    = info.compMethodInfo->args.args;
+    unsigned                argSigLen = info.compMethodInfo->args.numArgs;
+
+#ifdef _TARGET_ARM_
+    //
+    // struct_n { int; int; ... n times };
+    //
+    // Consider signature:
+    //
+    // Foo (float a,double b,float c,double d,float e,double f,float g,double h,
+    //      float i,double j,float k,double l,struct_3 m) { }
+    //
+    // Basically the signature is: (all float regs full, 1 double, struct_3);
+    //
+    // The double argument occurs before pre spill in the argument iteration and
+    // computes an argOffset of 0. struct_3 offset becomes 8. This is wrong.
+    // Because struct_3 is prespilled and double occurs after prespill.
+    // The correct offsets are double = 16 (aligned stk), struct_3 = 0..12,
+    // Offset 12 will be skipped for double alignment of double.
+    //
+    // Another example is (struct_2, all float regs full, double, struct_2);
+    // Here, notice the order is similarly messed up because of 2 pre-spilled
+    // struct_2.
+    //
+    // Succinctly,
+    // ARG_INDEX(i) > ARG_INDEX(j) DOES NOT IMPLY |ARG_OFFSET(i)| > |ARG_OFFSET(j)|
+    //
+    // Therefore, we'll do a two pass offset calculation, one that considers pre-spill
+    // and the next, stack args.
+    //
+
+    unsigned argLcls = 0;
+
+    // Take care of pre spill registers first.
+    regMaskTP preSpillMask = codeGen->regSet.rsMaskPreSpillRegs(false);
+    regMaskTP tempMask     = RBM_NONE;
+    for (unsigned i = 0, preSpillLclNum = lclNum; i < argSigLen; ++i, ++preSpillLclNum)
+    {
+        if (lvaIsPreSpilled(preSpillLclNum, preSpillMask))
+        {
+            unsigned argSize = eeGetArgSize(argLst, &info.compMethodInfo->args);
+            argOffs          = lvaAssignVirtualFrameOffsetToArg(preSpillLclNum, argSize, argOffs);
+            argLcls++;
+
+            // Early out if we can. If size is 8 and base reg is 2, then the mask is 0x1100
+            tempMask |= ((((1 << (roundUp(argSize) / REGSIZE_BYTES))) - 1) << lvaTable[preSpillLclNum].lvArgReg);
+            if (tempMask == preSpillMask)
+            {
+                // We won't encounter more pre-spilled registers,
+                // so don't bother iterating further.
+                break;
+            }
+        }
+        argLst = info.compCompHnd->getArgNext(argLst);
+    }
+
+    // Take care of non pre-spilled stack arguments.
+    argLst = info.compMethodInfo->args.args;
+    for (unsigned i = 0, stkLclNum = lclNum; i < argSigLen; ++i, ++stkLclNum)
+    {
+        if (!lvaIsPreSpilled(stkLclNum, preSpillMask))
+        {
+            argOffs =
+                lvaAssignVirtualFrameOffsetToArg(stkLclNum, eeGetArgSize(argLst, &info.compMethodInfo->args), argOffs);
+            argLcls++;
+        }
+        argLst = info.compCompHnd->getArgNext(argLst);
+    }
+
+    lclNum += argLcls;
+#else // !_TARGET_ARM_
+    for (unsigned i = 0; i < argSigLen; i++)
+    {
+        unsigned argumentSize = eeGetArgSize(argLst, &info.compMethodInfo->args);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+        // On the stack frame the homed arg always takes a full number of slots
+        // for proper stack alignment. Make sure the real struct size is properly rounded up.
+        argumentSize = (unsigned)roundUp(argumentSize, TARGET_POINTER_SIZE);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+        argOffs =
+            lvaAssignVirtualFrameOffsetToArg(lclNum++, argumentSize, argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
+        argLst = info.compCompHnd->getArgNext(argLst);
+    }
+#endif // !_TARGET_ARM_
+
+#if !USER_ARGS_COME_LAST
+
+    //@GENERICS: extra argument for instantiation info
+    if (info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE)
+    {
+        noway_assert(lclNum == (unsigned)info.compTypeCtxtArg);
+        argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void*),
+                                                   argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
+    }
+
+    if (info.compIsVarArgs)
+    {
+        argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, sizeof(void*),
+                                                   argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset));
+    }
+
+#endif // USER_ARGS_COME_LAST
+}
+
+#ifdef UNIX_AMD64_ABI
+//
+//  lvaAssignVirtualFrameOffsetToArg() : Assign virtual stack offsets to an
+//  individual argument, and return the offset for the next argument.
+//  Note: This method only calculates the initial offset of the stack passed/spilled arguments
+//  (if any - the RA might decide to spill(home on the stack) register passed arguments, if rarely used.)
+//        The final offset is calculated in lvaFixVirtualFrameOffsets method. It accounts for FP existance,
+//        ret address slot, stack frame padding, alloca instructions, etc.
+//  Note: This is the implementation for UNIX_AMD64 System V platforms.
+//
+int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum,
+                                               unsigned argSize,
+                                               int argOffs UNIX_AMD64_ABI_ONLY_ARG(int* callerArgOffset))
+{
+    noway_assert(lclNum < info.compArgsCount);
+    noway_assert(argSize);
+
+    if (Target::g_tgtArgOrder == Target::ARG_ORDER_L2R)
+        argOffs -= argSize;
+
+    unsigned fieldVarNum = BAD_VAR_NUM;
+
+    noway_assert(lclNum < lvaCount);
+    LclVarDsc* varDsc = lvaTable + lclNum;
+
+    if (varDsc->lvPromotedStruct())
+    {
+        noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+        fieldVarNum = varDsc->lvFieldLclStart;
+
+        lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
+
+        if (promotionType == PROMOTION_TYPE_INDEPENDENT)
+        {
+            lclNum = fieldVarNum;
+            noway_assert(lclNum < lvaCount);
+            varDsc = lvaTable + lclNum;
+            assert(varDsc->lvIsStructField);
+        }
+    }
+
+    noway_assert(varDsc->lvIsParam);
+
+    if (varDsc->lvIsRegArg)
+    {
+        // Argument is passed in a register, don't count it
+        // when updating the current offset on the stack.
+
+        if (varDsc->lvOnFrame)
+        {
+            // The offset for args needs to be set only for the stack homed arguments for System V.
+            varDsc->lvStkOffs = argOffs;
+        }
+        else
+        {
+            varDsc->lvStkOffs = 0;
+        }
+    }
+    else
+    {
+        // For Windows AMD64 there are 4 slots for the register passed arguments on the top of the caller's stack.
+        // This is where they are always homed. So, they can be accessed with positive offset.
+        // On System V platforms, if the RA decides to home a register passed arg on the stack, it creates a stack
+        // location on the callee stack (like any other local var.) In such a case, the register passed, stack homed
+        // arguments are accessed using negative offsets and the stack passed arguments are accessed using positive
+        // offset (from the caller's stack.)
+        // For  System V platforms if there is no frame pointer the caller stack parameter offset should include the
+        // callee allocated space. If frame register is used, the callee allocated space should not be included for
+        // accessing the caller stack parameters. The last two requirements are met in lvaFixVirtualFrameOffsets
+        // method, which fixes the offsets, based on frame pointer existence, existence of alloca instructions, ret
+        // address pushed, ets.
+
+        varDsc->lvStkOffs = *callerArgOffset;
+        // Structs passed on stack could be of size less than TARGET_POINTER_SIZE.
+        // Make sure they get at least TARGET_POINTER_SIZE on the stack - this is required for alignment.
+        if (argSize > TARGET_POINTER_SIZE)
+        {
+            *callerArgOffset += (int)roundUp(argSize, TARGET_POINTER_SIZE);
+        }
+        else
+        {
+            *callerArgOffset += TARGET_POINTER_SIZE;
+        }
+    }
+
+    // For struct promoted parameters we need to set the offsets for both LclVars.
+    //
+    // For a dependent promoted struct we also assign the struct fields stack offset
+    if (varDsc->lvPromotedStruct())
+    {
+        lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
+
+        if (promotionType == PROMOTION_TYPE_DEPENDENT)
+        {
+            noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+
+            assert(fieldVarNum == varDsc->lvFieldLclStart);
+            lvaTable[fieldVarNum].lvStkOffs = varDsc->lvStkOffs;
+        }
+    }
+    // For an independent promoted struct field we also assign the parent struct stack offset
+    else if (varDsc->lvIsStructField)
+    {
+        noway_assert(varDsc->lvParentLcl < lvaCount);
+        lvaTable[varDsc->lvParentLcl].lvStkOffs = varDsc->lvStkOffs;
+    }
+
+    if (Target::g_tgtArgOrder == Target::ARG_ORDER_R2L && !varDsc->lvIsRegArg)
+        argOffs += argSize;
+
+    return argOffs;
+}
+
+#else // !UNIX_AMD64_ABI
+
+//
+//  lvaAssignVirtualFrameOffsetToArg() : Assign virtual stack offsets to an
+//  individual argument, and return the offset for the next argument.
+//  Note: This method only calculates the initial offset of the stack passed/spilled arguments
+//  (if any - the RA might decide to spill(home on the stack) register passed arguments, if rarely used.)
+//        The final offset is calculated in lvaFixVirtualFrameOffsets method. It accounts for FP existance,
+//        ret address slot, stack frame padding, alloca instructions, etc.
+//  Note: This implementation for all the platforms but UNIX_AMD64 OSs (System V 64 bit.)
+int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum,
+                                               unsigned argSize,
+                                               int argOffs UNIX_AMD64_ABI_ONLY_ARG(int* callerArgOffset))
+{
+    noway_assert(lclNum < info.compArgsCount);
+    noway_assert(argSize);
+
+    if (Target::g_tgtArgOrder == Target::ARG_ORDER_L2R)
+    {
+        argOffs -= argSize;
+    }
+
+    unsigned fieldVarNum = BAD_VAR_NUM;
+
+    noway_assert(lclNum < lvaCount);
+    LclVarDsc* varDsc = lvaTable + lclNum;
+
+    if (varDsc->lvPromotedStruct())
+    {
+        noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+        fieldVarNum = varDsc->lvFieldLclStart;
+
+        lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
+
+        if (promotionType == PROMOTION_TYPE_INDEPENDENT)
+        {
+            lclNum = fieldVarNum;
+            noway_assert(lclNum < lvaCount);
+            varDsc = lvaTable + lclNum;
+            assert(varDsc->lvIsStructField);
+        }
+    }
+
+    noway_assert(varDsc->lvIsParam);
+
+    if (varDsc->lvIsRegArg)
+    {
+        /* Argument is passed in a register, don't count it
+         * when updating the current offset on the stack */
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if !defined(_TARGET_ARMARCH_)
+#if DEBUG
+        // TODO: Remove this noway_assert and replace occurrences of sizeof(void *) with argSize
+        // Also investigate why we are incrementing argOffs for X86 as this seems incorrect
+        //
+        noway_assert(argSize == sizeof(void*));
+#endif // DEBUG
+#endif
+
+#if defined(_TARGET_X86_)
+        argOffs += sizeof(void*);
+#elif defined(_TARGET_AMD64_)
+        // Register arguments on AMD64 also takes stack space. (in the backing store)
+        varDsc->lvStkOffs = argOffs;
+        argOffs += sizeof(void*);
+#elif defined(_TARGET_ARM64_)
+// Register arguments on ARM64 only take stack space when they have a frame home.
+#elif defined(_TARGET_ARM_)
+        // On ARM we spill the registers in codeGen->regSet.rsMaskPreSpillRegArg
+        // in the prolog, so we have to fill in lvStkOffs here
+        //
+        regMaskTP regMask = genRegMask(varDsc->lvArgReg);
+        if (codeGen->regSet.rsMaskPreSpillRegArg & regMask)
+        {
+            // Signature: void foo(struct_8, int, struct_4)
+            // ------- CALLER SP -------
+            // r3 struct_4
+            // r2 int - not prespilled, but added for alignment. argOffs should skip this.
+            // r1 struct_8
+            // r0 struct_8
+            // -------------------------
+            // If we added alignment we need to fix argOffs for all registers above alignment.
+            if (codeGen->regSet.rsMaskPreSpillAlign != RBM_NONE)
+            {
+                assert(genCountBits(codeGen->regSet.rsMaskPreSpillAlign) == 1);
+                // Is register beyond the alignment pos?
+                if (regMask > codeGen->regSet.rsMaskPreSpillAlign)
+                {
+                    // Increment argOffs just once for the _first_ register after alignment pos
+                    // in the prespill mask.
+                    if (!BitsBetween(codeGen->regSet.rsMaskPreSpillRegArg, regMask,
+                                     codeGen->regSet.rsMaskPreSpillAlign))
+                    {
+                        argOffs += TARGET_POINTER_SIZE;
+                    }
+                }
+            }
+
+            switch (varDsc->lvType)
+            {
+                case TYP_STRUCT:
+                    if (!varDsc->lvStructDoubleAlign)
+                    {
+                        break;
+                    }
+                    __fallthrough;
+
+                case TYP_DOUBLE:
+                case TYP_LONG:
+                {
+                    //
+                    // Let's assign offsets to arg1, a double in r2. argOffs has to be 4 not 8.
+                    //
+                    // ------- CALLER SP -------
+                    // r3
+                    // r2 double   -- argOffs = 4, but it doesn't need to be skipped, because there is no skipping.
+                    // r1 VACookie -- argOffs = 0
+                    // -------------------------
+                    //
+                    // Consider argOffs as if it accounts for number of prespilled registers before the current
+                    // register. In the above example, for r2, it is r1 that is prespilled, but since r1 is
+                    // accounted for by argOffs being 4, there should have been no skipping. Instead, if we didn't
+                    // assign r1 to any variable, then argOffs would still be 0 which implies it is not accounting
+                    // for r1, equivalently r1 is skipped.
+                    //
+                    // If prevRegsSize is unaccounted for by a corresponding argOffs, we must have skipped a register.
+                    int prevRegsSize =
+                        genCountBits(codeGen->regSet.rsMaskPreSpillRegArg & (regMask - 1)) * TARGET_POINTER_SIZE;
+                    if (argOffs < prevRegsSize)
+                    {
+                        // We must align up the argOffset to a multiple of 8 to account for skipped registers.
+                        argOffs = roundUp(argOffs, 2 * TARGET_POINTER_SIZE);
+                    }
+                    // We should've skipped only a single register.
+                    assert(argOffs == prevRegsSize);
+                }
+                break;
+
+                default:
+                    // No alignment of argOffs required
+                    break;
+            }
+            varDsc->lvStkOffs = argOffs;
+            argOffs += argSize;
+        }
+#else // _TARGET_*
+#error Unsupported or unset target architecture
+#endif // _TARGET_*
+    }
+    else
+    {
+#if defined(_TARGET_ARM_)
+        // Dev11 Bug 42817: incorrect codegen for DrawFlatCheckBox causes A/V in WinForms
+        //
+        // Here we have method with a signature (int a1, struct a2, struct a3, int a4, int a5).
+        // Struct parameter 'a2' is 16-bytes with no alignment requirements;
+        //  it uses r1,r2,r3 and [OutArg+0] when passed.
+        // Struct parameter 'a3' is 16-bytes that is required to be double aligned;
+        //  the caller skips [OutArg+4] and starts the argument at [OutArg+8].
+        // Thus the caller generates the correct code to pass the arguments.
+        // When generating code to receive the arguments we set codeGen->regSet.rsMaskPreSpillRegArg to [r1,r2,r3]
+        //  and spill these three registers as the first instruction in the prolog.
+        // Then when we layout the arguments' stack offsets we have an argOffs 0 which
+        //  points at the location that we spilled r1 into the stack.  For this first
+        //  struct we take the lvIsRegArg path above with "codeGen->regSet.rsMaskPreSpillRegArg &" matching.
+        // Next when we calculate the argOffs for the second 16-byte struct we have an argOffs
+        //  of 16, which appears to be aligned properly so we don't skip a stack slot.
+        //
+        // To fix this we must recover the actual OutArg offset by subtracting off the
+        //  sizeof of the PreSpill register args.
+        // Then we align this offset to a multiple of 8 and add back the sizeof
+        //  of the PreSpill register args.
+        //
+        // Dev11 Bug 71767: failure of assert(sizeofPreSpillRegArgs <= argOffs)
+        //
+        // We have a method with 'this' passed in r0, RetBuf arg in r1, VarArgs cookie
+        // in r2. The first user arg is a 144 byte struct with double alignment required,
+        // r3 is skipped, and the struct is passed on the stack. However, 'r3' is added
+        // to the codeGen->regSet.rsMaskPreSpillRegArg mask by the VarArgs cookie code, since we need to
+        // home all the potential varargs arguments in registers, even if we don't have
+        // signature type information for the variadic arguments. However, due to alignment,
+        // we have skipped a register that doesn't have a corresponding symbol. Make up
+        // for that by increasing argOffs here.
+        //
+
+        int sizeofPreSpillRegArgs = genCountBits(codeGen->regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
+
+        if (argOffs < sizeofPreSpillRegArgs)
+        {
+            // This can only happen if we skipped the last register spot because current stk arg
+            // is a struct requiring alignment or a pre-spill alignment was required because the
+            // first reg arg needed alignment.
+            //
+            // Example 1: First Stk Argument requiring alignment in vararg case (same as above comment.)
+            //            Signature (int a0, int a1, int a2, struct {long} a3, ...)
+            //
+            // stk arg    a3             --> argOffs here will be 12 (r0-r2) but pre-spill will be 16.
+            // ---- Caller SP ----
+            // r3                        --> Stack slot is skipped in this case.
+            // r2    int  a2
+            // r1    int  a1
+            // r0    int  a0
+            //
+            // Example 2: First Reg Argument requiring alignment in no-vararg case.
+            //            Signature (struct {long} a0, struct {int} a1, int a2, int a3)
+            //
+            // stk arg                  --> argOffs here will be 12 {r0-r2} but pre-spill will be 16.
+            // ---- Caller SP ----
+            // r3    int             a2 --> pushed (not pre-spilled) for alignment of a0 by lvaInitUserArgs.
+            // r2    struct { int }  a1
+            // r0-r1 struct { long } a0
+            CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef PROFILING_SUPPORTED
+            // On Arm under profiler, r0-r3 are always prespilled on stack.
+            // It is possible to have methods that accept only HFAs as parameters e.g. Signature(struct hfa1, struct
+            // hfa2), in which case hfa1 and hfa2 will be en-registered in co-processor registers and will have an
+            // argument offset less than size of preSpill.
+            //
+            // For this reason the following conditions are asserted when not under profiler.
+            if (!compIsProfilerHookNeeded())
+#endif
+            {
+                bool cond = ((info.compIsVarArgs || opts.compUseSoftFP) &&
+                             // Does cur stk arg require double alignment?
+                             ((varDsc->lvType == TYP_STRUCT && varDsc->lvStructDoubleAlign) ||
+                              (varDsc->lvType == TYP_DOUBLE) || (varDsc->lvType == TYP_LONG))) ||
+                            // Did first reg arg require alignment?
+                            (codeGen->regSet.rsMaskPreSpillAlign & genRegMask(REG_ARG_LAST));
+
+                noway_assert(cond);
+                noway_assert(sizeofPreSpillRegArgs <=
+                             argOffs + TARGET_POINTER_SIZE); // at most one register of alignment
+            }
+            argOffs = sizeofPreSpillRegArgs;
+        }
+
+        noway_assert(argOffs >= sizeofPreSpillRegArgs);
+        int argOffsWithoutPreSpillRegArgs = argOffs - sizeofPreSpillRegArgs;
+
+        switch (varDsc->lvType)
+        {
+            case TYP_STRUCT:
+                if (!varDsc->lvStructDoubleAlign)
+                    break;
+
+                __fallthrough;
+
+            case TYP_DOUBLE:
+            case TYP_LONG:
+                // We must align up the argOffset to a multiple of 8
+                argOffs = roundUp(argOffsWithoutPreSpillRegArgs, 2 * TARGET_POINTER_SIZE) + sizeofPreSpillRegArgs;
+                break;
+
+            default:
+                // No alignment of argOffs required
+                break;
+        }
+#endif // _TARGET_ARM_
+
+        varDsc->lvStkOffs = argOffs;
+    }
+
+    // For struct promoted parameters we need to set the offsets for both LclVars.
+    //
+    // For a dependent promoted struct we also assign the struct fields stack offset
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if !defined(_TARGET_64BIT_)
+    if ((varDsc->TypeGet() == TYP_LONG) && varDsc->lvPromoted)
+    {
+        noway_assert(varDsc->lvFieldCnt == 2);
+        fieldVarNum                         = varDsc->lvFieldLclStart;
+        lvaTable[fieldVarNum].lvStkOffs     = varDsc->lvStkOffs;
+        lvaTable[fieldVarNum + 1].lvStkOffs = varDsc->lvStkOffs + genTypeSize(TYP_INT);
+    }
+    else
+#endif // !defined(_TARGET_64BIT_)
+        if (varDsc->lvPromotedStruct())
+    {
+        lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
+
+        if (promotionType == PROMOTION_TYPE_DEPENDENT)
+        {
+            noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+
+            assert(fieldVarNum == varDsc->lvFieldLclStart);
+            lvaTable[fieldVarNum].lvStkOffs = varDsc->lvStkOffs;
+        }
+    }
+    // For an independent promoted struct field we also assign the parent struct stack offset
+    else if (varDsc->lvIsStructField)
+    {
+        noway_assert(varDsc->lvParentLcl < lvaCount);
+        lvaTable[varDsc->lvParentLcl].lvStkOffs = varDsc->lvStkOffs;
+    }
+
+    if (Target::g_tgtArgOrder == Target::ARG_ORDER_R2L && !varDsc->lvIsRegArg)
+    {
+        argOffs += argSize;
+    }
+
+    return argOffs;
+}
+#endif // !UNIX_AMD64_ABI
+
+/*****************************************************************************
+ *  lvaAssignVirtualFrameOffsetsToLocals() : Assign virtual stack offsets to
+ *  locals, temps, and anything else.  These will all be negative offsets
+ *  (stack grows down) relative to the virtual '0'/return address
+ */
+void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
+{
+    int stkOffs = 0;
+    // codeGen->isFramePointerUsed is set in regalloc phase. Initialize it to a guess for pre-regalloc layout.
+    if (lvaDoneFrameLayout <= PRE_REGALLOC_FRAME_LAYOUT)
+    {
+        codeGen->setFramePointerUsed(codeGen->isFramePointerRequired());
+    }
+
+#ifdef _TARGET_XARCH_
+    // On x86/amd64, the return address has already been pushed by the call instruction in the caller.
+    stkOffs -= sizeof(void*); // return address;
+
+    // TODO-AMD64-CQ: for X64 eventually this should be pushed with all the other
+    // calleeregs.  When you fix this, you'll also need to fix
+    // the assert at the bottom of this method
+    if (codeGen->doubleAlignOrFramePointerUsed())
+    {
+        stkOffs -= REGSIZE_BYTES;
+    }
+#endif //_TARGET_XARCH_
+
+    int  preSpillSize    = 0;
+    bool mustDoubleAlign = false;
+
+#ifdef _TARGET_ARM_
+    mustDoubleAlign = true;
+    preSpillSize    = genCountBits(codeGen->regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES;
+#else // !_TARGET_ARM_
+#if DOUBLE_ALIGN
+    if (genDoubleAlign())
+    {
+        mustDoubleAlign = true; // X86 only
+    }
+#endif
+#endif // !_TARGET_ARM_
+
+#ifdef _TARGET_ARM64_
+    // If the frame pointer is used, then we'll save FP/LR at the bottom of the stack.
+    // Otherwise, we won't store FP, and we'll store LR at the top, with the other callee-save
+    // registers (if any).
+
+    int initialStkOffs = 0;
+    if (info.compIsVarArgs)
+    {
+        // For varargs we always save all of the integer register arguments
+        // so that they are contiguous with the incoming stack arguments.
+        initialStkOffs = MAX_REG_ARG * REGSIZE_BYTES;
+        stkOffs -= initialStkOffs;
+    }
+
+    if (isFramePointerUsed())
+    {
+        // Subtract off FP and LR.
+        assert(compCalleeRegsPushed >= 2);
+        stkOffs -= (compCalleeRegsPushed - 2) * REGSIZE_BYTES;
+    }
+    else
+    {
+        stkOffs -= compCalleeRegsPushed * REGSIZE_BYTES;
+    }
+
+#else  // !_TARGET_ARM64_
+    stkOffs -= compCalleeRegsPushed * REGSIZE_BYTES;
+#endif // !_TARGET_ARM64_
+
+    compLclFrameSize = 0;
+
+#ifdef _TARGET_AMD64_
+    // In case of Amd64 compCalleeRegsPushed includes float regs (Xmm6-xmm15) that
+    // need to be pushed.  But Amd64 doesn't support push/pop of xmm registers.
+    // Instead we need to allocate space for them on the stack and save them in prolog.
+    // Therefore, we consider xmm registers being saved while computing stack offsets
+    // but space for xmm registers is considered part of compLclFrameSize.
+    // Notes
+    //  1) We need to save the entire 128-bits of xmm register to stack, since amd64
+    //     prolog unwind codes allow encoding of an instruction that stores the entire xmm reg
+    //     at an offset relative to SP
+    //  2) We adjust frame size so that SP is aligned at 16-bytes after pushing integer registers.
+    //     This means while saving the first xmm register to its allocated stack location we might
+    //     have to skip 8-bytes.  The reason for padding is to use efficient "movaps" to save/restore
+    //     xmm registers to/from stack to match Jit64 codegen.  Without the aligning on 16-byte
+    //     boundary we would have to use movups when offset turns out unaligned.  Movaps is more
+    //     performant than movups.
+    unsigned calleeFPRegsSavedSize = genCountBits(compCalleeFPRegsSavedMask) * XMM_REGSIZE_BYTES;
+    if (calleeFPRegsSavedSize > 0 && ((stkOffs % XMM_REGSIZE_BYTES) != 0))
+    {
+        // Take care of alignment
+        int alignPad = (int)AlignmentPad((unsigned)-stkOffs, XMM_REGSIZE_BYTES);
+        stkOffs -= alignPad;
+        lvaIncrementFrameSize(alignPad);
+    }
+
+    stkOffs -= calleeFPRegsSavedSize;
+    lvaIncrementFrameSize(calleeFPRegsSavedSize);
+
+    // Quirk for VS debug-launch scenario to work
+    if (compVSQuirkStackPaddingNeeded > 0)
+    {
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\nAdding VS quirk stack padding of %d bytes between save-reg area and locals\n",
+                   compVSQuirkStackPaddingNeeded);
+        }
+#endif // DEBUG
+
+        stkOffs -= compVSQuirkStackPaddingNeeded;
+        lvaIncrementFrameSize(compVSQuirkStackPaddingNeeded);
+    }
+#endif //_TARGET_AMD64_
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARMARCH_)
+    if (ehNeedsPSPSym())
+    {
+        // On ARM/ARM64, if we need a PSPSym, allocate it first, before anything else, including
+        // padding (so we can avoid computing the same padding in the funclet
+        // frame). Note that there is no special padding requirement for the PSPSym.
+        noway_assert(codeGen->isFramePointerUsed()); // We need an explicit frame pointer
+        assert(lvaPSPSym != BAD_VAR_NUM);            // We should have created the PSPSym variable
+        stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaPSPSym, TARGET_POINTER_SIZE, stkOffs);
+    }
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARMARCH_)
+
+    if (mustDoubleAlign)
+    {
+        if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT)
+        {
+            // Allocate a pointer sized stack slot, since we may need to double align here
+            // when lvaDoneFrameLayout == FINAL_FRAME_LAYOUT
+            //
+            lvaIncrementFrameSize(TARGET_POINTER_SIZE);
+            stkOffs -= TARGET_POINTER_SIZE;
+
+            // If we have any TYP_LONG, TYP_DOUBLE or double aligned structs
+            // then we need to allocate a second pointer sized stack slot,
+            // since we may need to double align that LclVar when we see it
+            // in the loop below.  We will just always do this so that the
+            // offsets that we calculate for the stack frame will always
+            // be greater (or equal) to what they can be in the final layout.
+            //
+            lvaIncrementFrameSize(TARGET_POINTER_SIZE);
+            stkOffs -= TARGET_POINTER_SIZE;
+        }
+        else // FINAL_FRAME_LAYOUT
+        {
+            if (((stkOffs + preSpillSize) % (2 * TARGET_POINTER_SIZE)) != 0)
+            {
+                lvaIncrementFrameSize(TARGET_POINTER_SIZE);
+                stkOffs -= TARGET_POINTER_SIZE;
+            }
+            // We should now have a double-aligned (stkOffs+preSpillSize)
+            noway_assert(((stkOffs + preSpillSize) % (2 * TARGET_POINTER_SIZE)) == 0);
+        }
+    }
+
+    if (lvaMonAcquired != BAD_VAR_NUM)
+    {
+        // This var must go first, in what is called the 'frame header' for EnC so that it is
+        // preserved when remapping occurs.  See vm\eetwain.cpp for detailed comment specifying frame
+        // layout requirements for EnC to work.
+        stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaMonAcquired, lvaLclSize(lvaMonAcquired), stkOffs);
+    }
+
+    if (opts.compNeedSecurityCheck)
+    {
+#ifdef JIT32_GCENCODER
+        /* This can't work without an explicit frame, so make sure */
+        noway_assert(codeGen->isFramePointerUsed());
+#endif
+        stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaSecurityObject, TARGET_POINTER_SIZE, stkOffs);
+    }
+
+    if (compLocallocUsed)
+    {
+#ifdef JIT32_GCENCODER
+        noway_assert(codeGen->isFramePointerUsed()); // else offsets of locals of frameless methods will be incorrect
+#endif
+        stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaLocAllocSPvar, TARGET_POINTER_SIZE, stkOffs);
+    }
+
+    if (lvaReportParamTypeArg())
+    {
+#ifdef JIT32_GCENCODER
+        noway_assert(codeGen->isFramePointerUsed());
+#endif
+        // For CORINFO_CALLCONV_PARAMTYPE (if needed)
+        lvaIncrementFrameSize(TARGET_POINTER_SIZE);
+        stkOffs -= TARGET_POINTER_SIZE;
+        lvaCachedGenericContextArgOffs = stkOffs;
+    }
+#ifndef JIT32_GCENCODER
+    else if (lvaKeepAliveAndReportThis())
+    {
+        // When "this" is also used as generic context arg.
+        lvaIncrementFrameSize(TARGET_POINTER_SIZE);
+        stkOffs -= TARGET_POINTER_SIZE;
+        lvaCachedGenericContextArgOffs = stkOffs;
+    }
+#endif
+
+#if !FEATURE_EH_FUNCLETS
+    /* If we need space for slots for shadow SP, reserve it now */
+    if (ehNeedsShadowSPslots())
+    {
+        noway_assert(codeGen->isFramePointerUsed()); // else offsets of locals of frameless methods will be incorrect
+        if (!lvaReportParamTypeArg())
+        {
+#ifndef JIT32_GCENCODER
+            if (!lvaKeepAliveAndReportThis())
+#endif
+            {
+                // In order to keep the gc info encoding smaller, the VM assumes that all methods with EH
+                // have also saved space for a ParamTypeArg, so we need to do that here
+                lvaIncrementFrameSize(TARGET_POINTER_SIZE);
+                stkOffs -= TARGET_POINTER_SIZE;
+            }
+        }
+        stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaShadowSPslotsVar, lvaLclSize(lvaShadowSPslotsVar), stkOffs);
+    }
+#endif // !FEATURE_EH_FUNCLETS
+
+    if (compGSReorderStackLayout)
+    {
+        assert(getNeedsGSSecurityCookie());
+        stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaGSSecurityCookie, lvaLclSize(lvaGSSecurityCookie), stkOffs);
+    }
+
+    /*
+        If we're supposed to track lifetimes of pointer temps, we'll
+        assign frame offsets in the following order:
+
+            non-pointer local variables (also untracked pointer variables)
+                pointer local variables
+                pointer temps
+            non-pointer temps
+     */
+
+    enum Allocation
+    {
+        ALLOC_NON_PTRS                 = 0x1, // assign offsets to non-ptr
+        ALLOC_PTRS                     = 0x2, // Second pass, assign offsets to tracked ptrs
+        ALLOC_UNSAFE_BUFFERS           = 0x4,
+        ALLOC_UNSAFE_BUFFERS_WITH_PTRS = 0x8
+    };
+    UINT alloc_order[5];
+
+    unsigned int cur = 0;
+
+    if (compGSReorderStackLayout)
+    {
+        noway_assert(getNeedsGSSecurityCookie());
+
+        if (codeGen->isFramePointerUsed())
+        {
+            alloc_order[cur++] = ALLOC_UNSAFE_BUFFERS;
+            alloc_order[cur++] = ALLOC_UNSAFE_BUFFERS_WITH_PTRS;
+        }
+    }
+
+    bool tempsAllocated = false;
+
+#ifdef _TARGET_ARM_
+    // On ARM, SP based offsets use smaller encoding. Since temps are relatively
+    // rarer than lcl usage, allocate them farther from SP.
+    if (!opts.MinOpts() && !compLocallocUsed)
+#else
+    if (lvaTempsHaveLargerOffsetThanVars() && !codeGen->isFramePointerUsed())
+#endif
+    {
+        // Because we want the temps to have a larger offset than locals
+        // and we're not using a frame pointer, we have to place the temps
+        // above the vars.  Otherwise we place them after the vars (at the
+        // bottom of the frame).
+        noway_assert(!tempsAllocated);
+        stkOffs        = lvaAllocateTemps(stkOffs, mustDoubleAlign);
+        tempsAllocated = true;
+    }
+
+    alloc_order[cur++] = ALLOC_NON_PTRS;
+
+    if (opts.compDbgEnC)
+    {
+        /* We will use just one pass, and assign offsets to all variables */
+        alloc_order[cur - 1] |= ALLOC_PTRS;
+        noway_assert(compGSReorderStackLayout == false);
+    }
+    else
+    {
+        alloc_order[cur++] = ALLOC_PTRS;
+    }
+
+    if (!codeGen->isFramePointerUsed() && compGSReorderStackLayout)
+    {
+        alloc_order[cur++] = ALLOC_UNSAFE_BUFFERS_WITH_PTRS;
+        alloc_order[cur++] = ALLOC_UNSAFE_BUFFERS;
+    }
+
+    alloc_order[cur] = 0;
+
+    noway_assert(cur < sizeof(alloc_order) / sizeof(alloc_order[0]));
+
+    // Force first pass to happen
+    UINT assignMore             = 0xFFFFFFFF;
+    bool have_LclVarDoubleAlign = false;
+
+    for (cur = 0; alloc_order[cur]; cur++)
+    {
+        if ((assignMore & alloc_order[cur]) == 0)
+        {
+            continue;
+        }
+
+        assignMore = 0;
+
+        unsigned   lclNum;
+        LclVarDsc* varDsc;
+
+        for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+        {
+            /* Ignore field locals of the promotion type PROMOTION_TYPE_FIELD_DEPENDENT.
+               In other words, we will not calculate the "base" address of the struct local if
+               the promotion type is PROMOTION_TYPE_FIELD_DEPENDENT.
+            */
+            if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+            {
+                continue;
+            }
+
+#if FEATURE_FIXED_OUT_ARGS
+            // The scratch mem is used for the outgoing arguments, and it must be absolutely last
+            if (lclNum == lvaOutgoingArgSpaceVar)
+            {
+                continue;
+            }
+#endif
+
+            bool allocateOnFrame = varDsc->lvOnFrame;
+
+            if (varDsc->lvRegister && (lvaDoneFrameLayout == REGALLOC_FRAME_LAYOUT) &&
+                ((varDsc->TypeGet() != TYP_LONG) || (varDsc->lvOtherReg != REG_STK)))
+            {
+                allocateOnFrame = false;
+            }
+
+            /* Ignore variables that are not on the stack frame */
+
+            if (!allocateOnFrame)
+            {
+                /* For EnC, all variables have to be allocated space on the
+                   stack, even though they may actually be enregistered. This
+                   way, the frame layout can be directly inferred from the
+                   locals-sig.
+                 */
+
+                if (!opts.compDbgEnC)
+                {
+                    continue;
+                }
+                else if (lclNum >= info.compLocalsCount)
+                { // ignore temps for EnC
+                    continue;
+                }
+            }
+            else if (lvaGSSecurityCookie == lclNum && getNeedsGSSecurityCookie())
+            {
+                continue; // This is allocated outside of this loop.
+            }
+
+            // These need to be located as the very first variables (highest memory address)
+            // and so they have already been assigned an offset
+            if (
+#if FEATURE_EH_FUNCLETS
+                lclNum == lvaPSPSym ||
+#else
+                lclNum == lvaShadowSPslotsVar ||
+#endif // FEATURE_EH_FUNCLETS
+                lclNum == lvaLocAllocSPvar || lclNum == lvaSecurityObject)
+            {
+                assert(varDsc->lvStkOffs != BAD_STK_OFFS);
+                continue;
+            }
+
+            if (lclNum == lvaMonAcquired)
+            {
+                continue;
+            }
+
+            // This should be low on the stack. Hence, it will be assigned later.
+            if (lclNum == lvaStubArgumentVar)
+            {
+#ifdef JIT32_GCENCODER
+                noway_assert(codeGen->isFramePointerUsed());
+#endif
+                continue;
+            }
+
+            // This should be low on the stack. Hence, it will be assigned later.
+            if (lclNum == lvaInlinedPInvokeFrameVar)
+            {
+                noway_assert(codeGen->isFramePointerUsed());
+                continue;
+            }
+
+            if (varDsc->lvIsParam)
+            {
+#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
+
+                // On Windows AMD64 we can use the caller-reserved stack area that is already setup
+                assert(varDsc->lvStkOffs != BAD_STK_OFFS);
+                continue;
+
+#else // !_TARGET_AMD64_
+
+                //  A register argument that is not enregistered ends up as
+                //  a local variable which will need stack frame space.
+                //
+                if (!varDsc->lvIsRegArg)
+                    continue;
+
+#ifdef _TARGET_ARM64_
+                if (info.compIsVarArgs)
+                {
+                    // Stack offset to varargs (parameters) should point to home area which will be preallocated.
+                    varDsc->lvStkOffs =
+                        -initialStkOffs + genMapIntRegNumToRegArgNum(varDsc->GetArgReg()) * REGSIZE_BYTES;
+                    continue;
+                }
+#endif
+
+#ifdef _TARGET_ARM_
+                // On ARM we spill the registers in codeGen->regSet.rsMaskPreSpillRegArg
+                // in the prolog, thus they don't need stack frame space.
+                //
+                if ((codeGen->regSet.rsMaskPreSpillRegs(false) & genRegMask(varDsc->lvArgReg)) != 0)
+                {
+                    assert(varDsc->lvStkOffs != BAD_STK_OFFS);
+                    continue;
+                }
+#endif
+
+#endif // !_TARGET_AMD64_
+            }
+
+            /* Make sure the type is appropriate */
+
+            if (varDsc->lvIsUnsafeBuffer && compGSReorderStackLayout)
+            {
+                if (varDsc->lvIsPtr)
+                {
+                    if ((alloc_order[cur] & ALLOC_UNSAFE_BUFFERS_WITH_PTRS) == 0)
+                    {
+                        assignMore |= ALLOC_UNSAFE_BUFFERS_WITH_PTRS;
+                        continue;
+                    }
+                }
+                else
+                {
+                    if ((alloc_order[cur] & ALLOC_UNSAFE_BUFFERS) == 0)
+                    {
+                        assignMore |= ALLOC_UNSAFE_BUFFERS;
+                        continue;
+                    }
+                }
+            }
+            else if (varTypeIsGC(varDsc->TypeGet()) && varDsc->lvTracked)
+            {
+                if ((alloc_order[cur] & ALLOC_PTRS) == 0)
+                {
+                    assignMore |= ALLOC_PTRS;
+                    continue;
+                }
+            }
+            else
+            {
+                if ((alloc_order[cur] & ALLOC_NON_PTRS) == 0)
+                {
+                    assignMore |= ALLOC_NON_PTRS;
+                    continue;
+                }
+            }
+
+            /* Need to align the offset? */
+
+            if (mustDoubleAlign && (varDsc->lvType == TYP_DOUBLE // Align doubles for ARM and x86
+#ifdef _TARGET_ARM_
+                                    || varDsc->lvType == TYP_LONG // Align longs for ARM
+#endif
+#ifndef _TARGET_64BIT_
+                                    || varDsc->lvStructDoubleAlign // Align when lvStructDoubleAlign is true
+#endif                                                             // !_TARGET_64BIT_
+                                    ))
+            {
+                noway_assert((compLclFrameSize % TARGET_POINTER_SIZE) == 0);
+
+                if ((lvaDoneFrameLayout != FINAL_FRAME_LAYOUT) && !have_LclVarDoubleAlign)
+                {
+                    // If this is the first TYP_LONG, TYP_DOUBLE or double aligned struct
+                    // then we have seen in this loop then we allocate a pointer sized
+                    // stack slot since we may need to double align this LclVar
+                    // when lvaDoneFrameLayout == FINAL_FRAME_LAYOUT
+                    //
+                    lvaIncrementFrameSize(TARGET_POINTER_SIZE);
+                    stkOffs -= TARGET_POINTER_SIZE;
+                }
+                else
+                {
+                    if (((stkOffs + preSpillSize) % (2 * TARGET_POINTER_SIZE)) != 0)
+                    {
+                        lvaIncrementFrameSize(TARGET_POINTER_SIZE);
+                        stkOffs -= TARGET_POINTER_SIZE;
+                    }
+
+                    // We should now have a double-aligned (stkOffs+preSpillSize)
+                    noway_assert(((stkOffs + preSpillSize) % (2 * TARGET_POINTER_SIZE)) == 0);
+                }
+
+                // Remember that we had to double align a LclVar
+                have_LclVarDoubleAlign = true;
+            }
+
+            // Reserve the stack space for this variable
+            stkOffs = lvaAllocLocalAndSetVirtualOffset(lclNum, lvaLclSize(lclNum), stkOffs);
+#ifdef _TARGET_ARM64_
+            // If we have an incoming register argument that has a struct promoted field
+            // then we need to copy the lvStkOff (the stack home) from the reg arg to the field lclvar
+            //
+            if (varDsc->lvIsRegArg && varDsc->lvPromotedStruct())
+            {
+                noway_assert(varDsc->lvFieldCnt == 1); // We only handle one field here
+
+                unsigned fieldVarNum            = varDsc->lvFieldLclStart;
+                lvaTable[fieldVarNum].lvStkOffs = varDsc->lvStkOffs;
+            }
+#endif
+        }
+    }
+
+    if (getNeedsGSSecurityCookie() && !compGSReorderStackLayout)
+    {
+        // LOCALLOC used, but we have no unsafe buffer.  Allocated cookie last, close to localloc buffer.
+        stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaGSSecurityCookie, lvaLclSize(lvaGSSecurityCookie), stkOffs);
+    }
+
+    if (tempsAllocated == false)
+    {
+        /*-------------------------------------------------------------------------
+         *
+         * Now the temps
+         *
+         *-------------------------------------------------------------------------
+         */
+        stkOffs = lvaAllocateTemps(stkOffs, mustDoubleAlign);
+    }
+
+    /*-------------------------------------------------------------------------
+     *
+     * Now do some final stuff
+     *
+     *-------------------------------------------------------------------------
+     */
+
+    // lvaInlinedPInvokeFrameVar and lvaStubArgumentVar need to be assigned last
+    // Important: The stack walker depends on lvaStubArgumentVar immediately
+    // following lvaInlinedPInvokeFrameVar in the frame.
+
+    if (lvaStubArgumentVar != BAD_VAR_NUM)
+    {
+#ifdef JIT32_GCENCODER
+        noway_assert(codeGen->isFramePointerUsed());
+#endif
+        stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaStubArgumentVar, lvaLclSize(lvaStubArgumentVar), stkOffs);
+    }
+
+    if (lvaInlinedPInvokeFrameVar != BAD_VAR_NUM)
+    {
+        noway_assert(codeGen->isFramePointerUsed());
+        stkOffs =
+            lvaAllocLocalAndSetVirtualOffset(lvaInlinedPInvokeFrameVar, lvaLclSize(lvaInlinedPInvokeFrameVar), stkOffs);
+    }
+
+    if (mustDoubleAlign)
+    {
+        if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT)
+        {
+            // Allocate a pointer sized stack slot, since we may need to double align here
+            // when lvaDoneFrameLayout == FINAL_FRAME_LAYOUT
+            //
+            lvaIncrementFrameSize(TARGET_POINTER_SIZE);
+            stkOffs -= TARGET_POINTER_SIZE;
+
+            if (have_LclVarDoubleAlign)
+            {
+                // If we have any TYP_LONG, TYP_DOUBLE or double aligned structs
+                // the we need to allocate a second pointer sized stack slot,
+                // since we may need to double align the last LclVar that we saw
+                // in the loop above. We do this so that the offsets that we
+                // calculate for the stack frame are always greater than they will
+                // be in the final layout.
+                //
+                lvaIncrementFrameSize(TARGET_POINTER_SIZE);
+                stkOffs -= TARGET_POINTER_SIZE;
+            }
+        }
+        else // FINAL_FRAME_LAYOUT
+        {
+            if (((stkOffs + preSpillSize) % (2 * TARGET_POINTER_SIZE)) != 0)
+            {
+                lvaIncrementFrameSize(TARGET_POINTER_SIZE);
+                stkOffs -= TARGET_POINTER_SIZE;
+            }
+            // We should now have a double-aligned (stkOffs+preSpillSize)
+            noway_assert(((stkOffs + preSpillSize) % (2 * TARGET_POINTER_SIZE)) == 0);
+        }
+    }
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_AMD64_)
+    if (ehNeedsPSPSym())
+    {
+        // On AMD64, if we need a PSPSym, allocate it last, immediately above the outgoing argument
+        // space. Any padding will be higher on the stack than this
+        // (including the padding added by lvaAlignFrame()).
+        noway_assert(codeGen->isFramePointerUsed()); // We need an explicit frame pointer
+        assert(lvaPSPSym != BAD_VAR_NUM);            // We should have created the PSPSym variable
+        stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaPSPSym, TARGET_POINTER_SIZE, stkOffs);
+    }
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_AMD64_)
+
+#ifdef _TARGET_ARM64_
+    if (isFramePointerUsed())
+    {
+        // Create space for saving FP and LR.
+        stkOffs -= 2 * REGSIZE_BYTES;
+    }
+#endif // _TARGET_ARM64_
+
+#if FEATURE_FIXED_OUT_ARGS
+    if (lvaOutgoingArgSpaceSize > 0)
+    {
+#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI) // No 4 slots for outgoing params on System V.
+        noway_assert(lvaOutgoingArgSpaceSize >= (4 * sizeof(void*)));
+#endif
+        noway_assert((lvaOutgoingArgSpaceSize % sizeof(void*)) == 0);
+
+        // Give it a value so we can avoid asserts in CHK builds.
+        // Since this will always use an SP relative offset of zero
+        // at the end of lvaFixVirtualFrameOffsets, it will be set to absolute '0'
+
+        stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaOutgoingArgSpaceVar, lvaLclSize(lvaOutgoingArgSpaceVar), stkOffs);
+    }
+#endif // FEATURE_FIXED_OUT_ARGS
+
+    // compLclFrameSize equals our negated virtual stack offset minus the pushed registers and return address
+    // and the pushed frame pointer register which for some strange reason isn't part of 'compCalleeRegsPushed'.
+    int pushedCount = compCalleeRegsPushed;
+
+#ifdef _TARGET_ARM64_
+    if (info.compIsVarArgs)
+    {
+        pushedCount += MAX_REG_ARG;
+    }
+#endif
+
+#ifdef _TARGET_XARCH_
+    if (codeGen->doubleAlignOrFramePointerUsed())
+    {
+        pushedCount += 1; // pushed EBP (frame pointer)
+    }
+    pushedCount += 1; // pushed PC (return address)
+#endif
+
+    noway_assert(compLclFrameSize == (unsigned)-(stkOffs + (pushedCount * (int)sizeof(void*))));
+}
+
+int Compiler::lvaAllocLocalAndSetVirtualOffset(unsigned lclNum, unsigned size, int stkOffs)
+{
+    noway_assert(lclNum != BAD_VAR_NUM);
+
+#ifdef _TARGET_64BIT_
+    // Before final frame layout, assume the worst case, that every >=8 byte local will need
+    // maximum padding to be aligned. This is because we generate code based on the stack offset
+    // computed during tentative frame layout. These offsets cannot get bigger during final
+    // frame layout, as that would possibly require different code generation (for example,
+    // using a 4-byte offset instead of a 1-byte offset in an instruction). The offsets can get
+    // smaller. It is possible there is different alignment at the point locals are allocated
+    // between tentative and final frame layout which would introduce padding between locals
+    // and thus increase the offset (from the stack pointer) of one of the locals. Hence the
+    // need to assume the worst alignment before final frame layout.
+    // We could probably improve this by sorting all the objects by alignment,
+    // such that all 8 byte objects are together, 4 byte objects are together, etc., which
+    // would require at most one alignment padding per group.
+    //
+    // TYP_SIMD structs locals have alignment preference given by getSIMDTypeAlignment() for
+    // better performance.
+    if ((size >= 8) && ((lvaDoneFrameLayout != FINAL_FRAME_LAYOUT) || ((stkOffs % 8) != 0)
+#if defined(FEATURE_SIMD) && ALIGN_SIMD_TYPES
+                        || lclVarIsSIMDType(lclNum)
+#endif
+                            ))
+    {
+        // Note that stack offsets are negative
+        assert(stkOffs < 0);
+
+        // alignment padding
+        unsigned pad = 0;
+#if defined(FEATURE_SIMD) && ALIGN_SIMD_TYPES
+        if (lclVarIsSIMDType(lclNum) && !lvaIsImplicitByRefLocal(lclNum))
+        {
+            int alignment = getSIMDTypeAlignment(lvaTable[lclNum].lvType);
+
+            if (stkOffs % alignment != 0)
+            {
+                if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT)
+                {
+                    pad = alignment - 1;
+                    // Note that all the objects will probably be misaligned, but we'll fix that in final layout.
+                }
+                else
+                {
+                    pad = alignment + (stkOffs % alignment); // +1 to +(alignment-1) bytes
+                }
+            }
+        }
+        else
+#endif // FEATURE_SIMD && ALIGN_SIMD_TYPES
+        {
+            if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT)
+            {
+                pad = 7;
+                // Note that all the objects will probably be misaligned, but we'll fix that in final layout.
+            }
+            else
+            {
+                pad = 8 + (stkOffs % 8); // +1 to +7 bytes
+            }
+        }
+        // Will the pad ever be anything except 4? Do we put smaller-than-4-sized objects on the stack?
+        lvaIncrementFrameSize(pad);
+        stkOffs -= pad;
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("Pad ");
+            gtDispLclVar(lclNum, /*pad*/ false);
+            printf(", size=%d, stkOffs=%c0x%x, pad=%d\n", size, stkOffs < 0 ? '-' : '+',
+                   stkOffs < 0 ? -stkOffs : stkOffs, pad);
+        }
+#endif
+    }
+#endif // _TARGET_64BIT_
+
+    /* Reserve space on the stack by bumping the frame size */
+
+    lvaIncrementFrameSize(size);
+    stkOffs -= size;
+    lvaTable[lclNum].lvStkOffs = stkOffs;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("Assign ");
+        gtDispLclVar(lclNum, /*pad*/ false);
+        printf(", size=%d, stkOffs=%c0x%x\n", size, stkOffs < 0 ? '-' : '+', stkOffs < 0 ? -stkOffs : stkOffs);
+    }
+#endif
+
+    return stkOffs;
+}
+
+#ifdef _TARGET_AMD64_
+/*****************************************************************************
+ *  lvaIsCalleeSavedIntRegCountEven() :  returns true if the number of integer registers
+ *  pushed onto stack is even including RBP if used as frame pointer
+ *
+ *  Note that this excludes return address (PC) pushed by caller.  To know whether
+ *  the SP offset after pushing integer registers is aligned, we need to take
+ *  negation of this routine.
+ */
+bool Compiler::lvaIsCalleeSavedIntRegCountEven()
+{
+    unsigned regsPushed = compCalleeRegsPushed + (codeGen->isFramePointerUsed() ? 1 : 0);
+    return (regsPushed % (16 / REGSIZE_BYTES)) == 0;
+}
+#endif //_TARGET_AMD64_
+
+/*****************************************************************************
+ *  lvaAlignFrame() :  After allocating everything on the frame, reserve any
+ *  extra space needed to keep the frame aligned
+ */
+void Compiler::lvaAlignFrame()
+{
+#if defined(_TARGET_AMD64_)
+
+    // Leaf frames do not need full alignment, but the unwind info is smaller if we
+    // are at least 8 byte aligned (and we assert as much)
+    if ((compLclFrameSize % 8) != 0)
+    {
+        lvaIncrementFrameSize(8 - (compLclFrameSize % 8));
+    }
+    else if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT)
+    {
+        // If we are not doing final layout, we don't know the exact value of compLclFrameSize
+        // and thus do not know how much we will need to add in order to be aligned.
+        // We add 8 so compLclFrameSize is still a multiple of 8.
+        lvaIncrementFrameSize(8);
+    }
+    assert((compLclFrameSize % 8) == 0);
+
+    // Ensure that the stack is always 16-byte aligned by grabbing an unused QWORD
+    // if needed, but off by 8 because of the return value.
+    // And don't forget that compCalleeRegsPused does *not* include RBP if we are
+    // using it as the frame pointer.
+    //
+    bool regPushedCountAligned = lvaIsCalleeSavedIntRegCountEven();
+    bool lclFrameSizeAligned   = (compLclFrameSize % 16) == 0;
+
+    // If this isn't the final frame layout, assume we have to push an extra QWORD
+    // Just so the offsets are true upper limits.
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef UNIX_AMD64_ABI
+    // The compNeedToAlignFrame flag  is indicating if there is a need to align the frame.
+    // On AMD64-Windows, if there are calls, 4 slots for the outgoing ars are allocated, except for
+    // FastTailCall. This slots makes the frame size non-zero, so alignment logic will be called.
+    // On AMD64-Unix, there are no such slots. There is a possibility to have calls in the method with frame size of 0.
+    // The frame alignment logic won't kick in. This flags takes care of the AMD64-Unix case by remembering that there
+    // are calls and making sure the frame alignment logic is executed.
+    bool stackNeedsAlignment = (compLclFrameSize != 0 || opts.compNeedToAlignFrame);
+#else  // !UNIX_AMD64_ABI
+    bool stackNeedsAlignment = compLclFrameSize != 0;
+#endif // !UNIX_AMD64_ABI
+    if ((!codeGen->isFramePointerUsed() && (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT)) ||
+        (stackNeedsAlignment && (regPushedCountAligned == lclFrameSizeAligned)))
+    {
+        lvaIncrementFrameSize(REGSIZE_BYTES);
+    }
+
+#elif defined(_TARGET_ARM64_)
+
+    // The stack on ARM64 must be 16 byte aligned.
+
+    // First, align up to 8.
+    if ((compLclFrameSize % 8) != 0)
+    {
+        lvaIncrementFrameSize(8 - (compLclFrameSize % 8));
+    }
+    else if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT)
+    {
+        // If we are not doing final layout, we don't know the exact value of compLclFrameSize
+        // and thus do not know how much we will need to add in order to be aligned.
+        // We add 8 so compLclFrameSize is still a multiple of 8.
+        lvaIncrementFrameSize(8);
+    }
+    assert((compLclFrameSize % 8) == 0);
+
+    // Ensure that the stack is always 16-byte aligned by grabbing an unused QWORD
+    // if needed.
+    bool regPushedCountAligned = (compCalleeRegsPushed % (16 / REGSIZE_BYTES)) == 0;
+    bool lclFrameSizeAligned   = (compLclFrameSize % 16) == 0;
+
+    // If this isn't the final frame layout, assume we have to push an extra QWORD
+    // Just so the offsets are true upper limits.
+    if ((lvaDoneFrameLayout != FINAL_FRAME_LAYOUT) || (regPushedCountAligned != lclFrameSizeAligned))
+    {
+        lvaIncrementFrameSize(REGSIZE_BYTES);
+    }
+
+#elif defined(_TARGET_ARM_)
+
+    // Ensure that stack offsets will be double-aligned by grabbing an unused DWORD if needed.
+    //
+    bool lclFrameSizeAligned   = (compLclFrameSize % sizeof(double)) == 0;
+    bool regPushedCountAligned = ((compCalleeRegsPushed + genCountBits(codeGen->regSet.rsMaskPreSpillRegs(true))) %
+                                  (sizeof(double) / sizeof(void*))) == 0;
+
+    if (regPushedCountAligned != lclFrameSizeAligned)
+    {
+        lvaIncrementFrameSize(sizeof(void*));
+    }
+
+#elif defined(_TARGET_X86_)
+
+    if (genDoubleAlign())
+    {
+        // Double Frame Alignement for x86 is handled in Compiler::lvaAssignVirtualFrameOffsetsToLocals()
+
+        if (compLclFrameSize == 0)
+        {
+            // This can only happen with JitStress=1 or JitDoubleAlign=2
+            lvaIncrementFrameSize(sizeof(void*));
+        }
+    }
+
+#else
+    NYI("TARGET specific lvaAlignFrame");
+#endif // !_TARGET_AMD64_
+}
+
+/*****************************************************************************
+ *  lvaAssignFrameOffsetsToPromotedStructs() :  Assign offsets to fields
+ *  within a promoted struct (worker for lvaAssignFrameOffsets).
+ */
+void Compiler::lvaAssignFrameOffsetsToPromotedStructs()
+{
+    LclVarDsc* varDsc = lvaTable;
+    for (unsigned lclNum = 0; lclNum < lvaCount; lclNum++, varDsc++)
+    {
+        // For promoted struct fields that are params, we will
+        // assign their offsets in lvaAssignVirtualFrameOffsetToArg().
+        // This is not true for the System V systems since there is no
+        // outgoing args space. Assign the dependently promoted fields properly.
+        //
+        if (varDsc->lvIsStructField
+#ifndef UNIX_AMD64_ABI
+            // For System V platforms there is no outgoing args space.
+            // A register passed struct arg is homed on the stack in a separate local var.
+            // The offset of these structs is already calculated in lvaAssignVirtualFrameOffsetToArg methos.
+            // Make sure the code below is not executed for these structs and the offset is not changed.
+            && !varDsc->lvIsParam
+#endif // UNIX_AMD64_ABI
+            )
+        {
+            LclVarDsc*       parentvarDsc  = &lvaTable[varDsc->lvParentLcl];
+            lvaPromotionType promotionType = lvaGetPromotionType(parentvarDsc);
+
+            if (promotionType == PROMOTION_TYPE_INDEPENDENT)
+            {
+                // The stack offset for these field locals must have been calculated
+                // by the normal frame offset assignment.
+                continue;
+            }
+            else
+            {
+                noway_assert(promotionType == PROMOTION_TYPE_DEPENDENT);
+                noway_assert(varDsc->lvOnFrame);
+                varDsc->lvStkOffs = parentvarDsc->lvStkOffs + varDsc->lvFldOffset;
+            }
+        }
+    }
+}
+
+/*****************************************************************************
+ *  lvaAllocateTemps() :  Assign virtual offsets to temps (always negative).
+ */
+int Compiler::lvaAllocateTemps(int stkOffs, bool mustDoubleAlign)
+{
+    unsigned spillTempSize = 0;
+
+    if (lvaDoneFrameLayout == FINAL_FRAME_LAYOUT)
+    {
+        int preSpillSize = 0;
+#ifdef _TARGET_ARM_
+        preSpillSize = genCountBits(codeGen->regSet.rsMaskPreSpillRegs(true)) * TARGET_POINTER_SIZE;
+#endif
+        bool assignDone;
+        bool assignNptr;
+        bool assignPtrs = true;
+
+        /* Allocate temps */
+
+        if (TRACK_GC_TEMP_LIFETIMES)
+        {
+            /* first pointers, then non-pointers in second pass */
+            assignNptr = false;
+            assignDone = false;
+        }
+        else
+        {
+            /* Pointers and non-pointers together in single pass */
+            assignNptr = true;
+            assignDone = true;
+        }
+
+        assert(tmpAllFree());
+
+    AGAIN2:
+
+        for (TempDsc* temp = tmpListBeg(); temp != nullptr; temp = tmpListNxt(temp))
+        {
+            var_types tempType = temp->tdTempType();
+            unsigned  size;
+
+            /* Make sure the type is appropriate */
+
+            if (!assignPtrs && varTypeIsGC(tempType))
+            {
+                continue;
+            }
+            if (!assignNptr && !varTypeIsGC(tempType))
+            {
+                continue;
+            }
+
+            size = temp->tdTempSize();
+
+            /* Figure out and record the stack offset of the temp */
+
+            /* Need to align the offset? */
+            CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_64BIT_
+            if (varTypeIsGC(tempType) && ((stkOffs % TARGET_POINTER_SIZE) != 0))
+            {
+                // Calculate 'pad' as the number of bytes to align up 'stkOffs' to be a multiple of TARGET_POINTER_SIZE
+                // In practice this is really just a fancy way of writing 4. (as all stack locations are at least 4-byte
+                // aligned). Note stkOffs is always negative, so (stkOffs % TARGET_POINTER_SIZE) yields a negative
+                // value.
+                //
+                int alignPad = (int)AlignmentPad((unsigned)-stkOffs, TARGET_POINTER_SIZE);
+
+                spillTempSize += alignPad;
+                lvaIncrementFrameSize(alignPad);
+                stkOffs -= alignPad;
+
+                noway_assert((stkOffs % TARGET_POINTER_SIZE) == 0);
+            }
+#endif
+
+            if (mustDoubleAlign && (tempType == TYP_DOUBLE)) // Align doubles for x86 and ARM
+            {
+                noway_assert((compLclFrameSize % TARGET_POINTER_SIZE) == 0);
+
+                if (((stkOffs + preSpillSize) % (2 * TARGET_POINTER_SIZE)) != 0)
+                {
+                    spillTempSize += TARGET_POINTER_SIZE;
+                    lvaIncrementFrameSize(TARGET_POINTER_SIZE);
+                    stkOffs -= TARGET_POINTER_SIZE;
+                }
+                // We should now have a double-aligned (stkOffs+preSpillSize)
+                noway_assert(((stkOffs + preSpillSize) % (2 * TARGET_POINTER_SIZE)) == 0);
+            }
+
+            spillTempSize += size;
+            lvaIncrementFrameSize(size);
+            stkOffs -= size;
+            temp->tdSetTempOffs(stkOffs);
+        }
+#ifdef _TARGET_ARM_
+        // Only required for the ARM platform that we have an accurate estimate for the spillTempSize
+        noway_assert(spillTempSize <= lvaGetMaxSpillTempSize());
+#endif
+
+        /* If we've only assigned some temps, go back and do the rest now */
+
+        if (!assignDone)
+        {
+            assignNptr = !assignNptr;
+            assignPtrs = !assignPtrs;
+            assignDone = true;
+
+            goto AGAIN2;
+        }
+    }
+    else // We haven't run codegen, so there are no Spill temps yet!
+    {
+        unsigned size = lvaGetMaxSpillTempSize();
+
+        lvaIncrementFrameSize(size);
+        stkOffs -= size;
+    }
+
+    return stkOffs;
+}
+
+#ifdef DEBUG
+
+/*****************************************************************************
+ *
+ *  Dump the register a local is in right now.
+ *  For non-LSRA, this will be the register it is always in. For LSRA, it's only the current
+ *  location, since the location changes and it is updated throughout code generation based on
+ *  LSRA register assignments.
+ */
+
+void Compiler::lvaDumpRegLocation(unsigned lclNum)
+{
+    LclVarDsc* varDsc = lvaTable + lclNum;
+    var_types  type   = varDsc->TypeGet();
+
+#if FEATURE_STACK_FP_X87
+    if (varTypeIsFloating(type))
+    {
+        printf("fpu stack   ");
+    }
+    else
+#endif
+        if (isRegPairType(type))
+    {
+        if (!doLSRA())
+        {
+            noway_assert(varDsc->lvRegNum != REG_STK);
+        }
+        if (doLSRA() && varDsc->lvRegNum == REG_STK)
+        {
+            /* Hi-only enregistered long */
+            int offset = varDsc->lvStkOffs;
+            printf("%-3s:[%1s0x%02X]",
+                   getRegName(varDsc->lvOtherReg), // hi32
+                   (offset < 0 ? "-" : "+"), (offset < 0 ? -offset : offset));
+        }
+        else if (varDsc->lvOtherReg != REG_STK)
+        {
+            /* Fully enregistered long */
+            printf("%3s:%-3s    ",
+                   getRegName(varDsc->lvOtherReg), // hi32
+                   getRegName(varDsc->lvRegNum));  // lo32
+        }
+        else
+        {
+            /* Partially enregistered long */
+            int offset = varDsc->lvStkOffs + 4;
+            printf("[%1s0x%02X]:%-3s", (offset < 0 ? "-" : "+"), (offset < 0 ? -offset : offset),
+                   getRegName(varDsc->lvRegNum)); // lo32
+        }
+    }
+#ifdef _TARGET_ARM_
+    else if (varDsc->TypeGet() == TYP_DOUBLE)
+    {
+        printf("%3s:%-3s    ", getRegName(varDsc->lvRegNum), getRegName(varDsc->lvOtherReg));
+    }
+#endif
+    else
+    {
+        printf("%3s        ", getRegName(varDsc->lvRegNum));
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Dump the frame location assigned to a local.
+ *  For non-LSRA, this will only be valid if there is no assigned register.
+ *  For LSRA, it's the home location, even though the variable doesn't always live
+ *  in its home location.
+ */
+
+void Compiler::lvaDumpFrameLocation(unsigned lclNum)
+{
+    int       offset;
+    regNumber baseReg;
+
+#ifdef _TARGET_ARM_
+    offset = lvaFrameAddress(lclNum, compLocallocUsed, &baseReg, 0);
+#else
+    bool EBPbased;
+    offset  = lvaFrameAddress(lclNum, &EBPbased);
+    baseReg = EBPbased ? REG_FPBASE : REG_SPBASE;
+#endif
+
+    printf("[%2s%1s0x%02X]  ", getRegName(baseReg), (offset < 0 ? "-" : "+"), (offset < 0 ? -offset : offset));
+}
+
+/*****************************************************************************
+ *
+ *  dump a single lvaTable entry
+ */
+
+void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t refCntWtdWidth)
+{
+    LclVarDsc* varDsc = lvaTable + lclNum;
+    var_types  type   = varDsc->TypeGet();
+
+    if (curState == INITIAL_FRAME_LAYOUT)
+    {
+        printf(";  ");
+        gtDispLclVar(lclNum);
+
+        printf(" %7s ", varTypeName(type));
+        if (genTypeSize(type) == 0)
+        {
+            printf("(%2d) ", lvaLclSize(lclNum));
+        }
+    }
+    else
+    {
+        if (varDsc->lvRefCnt == 0)
+        {
+            // Print this with a special indicator that the variable is unused. Even though the
+            // variable itself is unused, it might be a struct that is promoted, so seeing it
+            // can be useful when looking at the promoted struct fields. It's also weird to see
+            // missing var numbers if these aren't printed.
+            printf(";* ");
+        }
+        else
+#if FEATURE_FIXED_OUT_ARGS
+            if ((lclNum == lvaOutgoingArgSpaceVar) && (lvaLclSize(lclNum) == 0))
+        {
+            // Similar to above; print this anyway.
+            printf(";# ");
+        }
+        else
+#endif
+        {
+            printf(";  ");
+        }
+
+        gtDispLclVar(lclNum);
+
+        printf("[V%02u", lclNum);
+        if (varDsc->lvTracked)
+        {
+            printf(",T%02u]", varDsc->lvVarIndex);
+        }
+        else
+        {
+            printf("    ]");
+        }
+
+        printf(" (%3u,%*s)", varDsc->lvRefCnt, (int)refCntWtdWidth, refCntWtd2str(varDsc->lvRefCntWtd));
+
+        printf(" %7s ", varTypeName(type));
+        if (genTypeSize(type) == 0)
+        {
+            printf("(%2d) ", lvaLclSize(lclNum));
+        }
+        else
+        {
+            printf(" ->  ");
+        }
+
+        // The register or stack location field is 11 characters wide.
+        if (varDsc->lvRefCnt == 0)
+        {
+            printf("zero-ref   ");
+        }
+        else if (varDsc->lvRegister != 0)
+        {
+            // It's always a register, and always in the same register.
+            lvaDumpRegLocation(lclNum);
+        }
+        else if (varDsc->lvOnFrame == 0)
+        {
+            printf("registers  ");
+        }
+        else
+        {
+            // For RyuJIT backend, it might be in a register part of the time, but it will definitely have a stack home
+            // location. Otherwise, it's always on the stack.
+            if (lvaDoneFrameLayout != NO_FRAME_LAYOUT)
+            {
+                lvaDumpFrameLocation(lclNum);
+            }
+        }
+    }
+
+    if (varDsc->lvIsHfaRegArg())
+    {
+        if (varDsc->lvHfaTypeIsFloat())
+        {
+            printf(" (enregistered HFA: float) ");
+        }
+        else
+        {
+            printf(" (enregistered HFA: double)");
+        }
+    }
+
+    if (varDsc->lvDoNotEnregister)
+    {
+        printf(" do-not-enreg[");
+        if (varDsc->lvAddrExposed)
+        {
+            printf("X");
+        }
+        if (varTypeIsStruct(varDsc))
+        {
+            printf("S");
+        }
+        if (varDsc->lvVMNeedsStackAddr)
+        {
+            printf("V");
+        }
+        if (varDsc->lvLiveInOutOfHndlr)
+        {
+            printf("H");
+        }
+        if (varDsc->lvLclFieldExpr)
+        {
+            printf("F");
+        }
+        if (varDsc->lvLclBlockOpAddr)
+        {
+            printf("B");
+        }
+        if (varDsc->lvLiveAcrossUCall)
+        {
+            printf("U");
+        }
+        if (varDsc->lvIsMultiRegArg)
+        {
+            printf("A");
+        }
+        if (varDsc->lvIsMultiRegRet)
+        {
+            printf("R");
+        }
+#ifdef JIT32_GCENCODER
+        if (varDsc->lvPinned)
+            printf("P");
+#endif // JIT32_GCENCODER
+        printf("]");
+    }
+
+    if (varDsc->lvIsMultiRegArg)
+    {
+        printf(" multireg-arg");
+    }
+    if (varDsc->lvIsMultiRegRet)
+    {
+        printf(" multireg-ret");
+    }
+    if (varDsc->lvMustInit)
+    {
+        printf(" must-init");
+    }
+    if (varDsc->lvAddrExposed)
+    {
+        printf(" addr-exposed");
+    }
+    if (varDsc->lvHasLdAddrOp)
+    {
+        printf(" ld-addr-op");
+    }
+    if (varDsc->lvVerTypeInfo.IsThisPtr())
+    {
+        printf(" this");
+    }
+    if (varDsc->lvPinned)
+    {
+        printf(" pinned");
+    }
+    if (varDsc->lvRefAssign)
+    {
+        printf(" ref-asgn");
+    }
+    if (varDsc->lvStackByref)
+    {
+        printf(" stack-byref");
+    }
+#ifndef _TARGET_64BIT_
+    if (varDsc->lvStructDoubleAlign)
+        printf(" double-align");
+#endif // !_TARGET_64BIT_
+    if (varDsc->lvOverlappingFields)
+    {
+        printf(" overlapping-fields");
+    }
+
+    if (compGSReorderStackLayout && !varDsc->lvRegister)
+    {
+        if (varDsc->lvIsPtr)
+        {
+            printf(" ptr");
+        }
+        if (varDsc->lvIsUnsafeBuffer)
+        {
+            printf(" unsafe-buffer");
+        }
+    }
+    if (varDsc->lvIsStructField)
+    {
+        LclVarDsc* parentvarDsc = &lvaTable[varDsc->lvParentLcl];
+#if !defined(_TARGET_64BIT_)
+        if (varTypeIsLong(parentvarDsc))
+        {
+            bool isLo = (lclNum == parentvarDsc->lvFieldLclStart);
+            printf(" V%02u.%s(offs=0x%02x)", varDsc->lvParentLcl, isLo ? "lo" : "hi", isLo ? 0 : genTypeSize(TYP_INT));
+        }
+        else
+#endif // !defined(_TARGET_64BIT_)
+        {
+            CORINFO_CLASS_HANDLE typeHnd = parentvarDsc->lvVerTypeInfo.GetClassHandle();
+            CORINFO_FIELD_HANDLE fldHnd  = info.compCompHnd->getFieldInClass(typeHnd, varDsc->lvFldOrdinal);
+
+            printf(" V%02u.%s(offs=0x%02x)", varDsc->lvParentLcl, eeGetFieldName(fldHnd), varDsc->lvFldOffset);
+
+            lvaPromotionType promotionType = lvaGetPromotionType(parentvarDsc);
+            // We should never have lvIsStructField set if it is a reg-sized non-field-addressed struct.
+            assert(!varDsc->lvRegStruct);
+            switch (promotionType)
+            {
+                case PROMOTION_TYPE_NONE:
+                    printf(" P-NONE");
+                    break;
+                case PROMOTION_TYPE_DEPENDENT:
+                    printf(" P-DEP");
+                    break;
+                case PROMOTION_TYPE_INDEPENDENT:
+                    printf(" P-INDEP");
+                    break;
+            }
+        }
+    }
+
+    printf("\n");
+}
+
+/*****************************************************************************
+*
+*  dump the lvaTable
+*/
+
+void Compiler::lvaTableDump(FrameLayoutState curState)
+{
+    if (curState == NO_FRAME_LAYOUT)
+    {
+        curState = lvaDoneFrameLayout;
+        if (curState == NO_FRAME_LAYOUT)
+        {
+            // Still no layout? Could be a bug, but just display the initial layout
+            curState = INITIAL_FRAME_LAYOUT;
+        }
+    }
+
+    if (curState == INITIAL_FRAME_LAYOUT)
+    {
+        printf("; Initial");
+    }
+    else if (curState == PRE_REGALLOC_FRAME_LAYOUT)
+    {
+        printf("; Pre-RegAlloc");
+    }
+    else if (curState == REGALLOC_FRAME_LAYOUT)
+    {
+        printf("; RegAlloc");
+    }
+    else if (curState == TENTATIVE_FRAME_LAYOUT)
+    {
+        printf("; Tentative");
+    }
+    else if (curState == FINAL_FRAME_LAYOUT)
+    {
+        printf("; Final");
+    }
+    else
+    {
+        printf("UNKNOWN FrameLayoutState!");
+        unreached();
+    }
+
+    printf(" local variable assignments\n");
+    printf(";\n");
+
+    unsigned   lclNum;
+    LclVarDsc* varDsc;
+
+    // Figure out some sizes, to help line things up
+
+    size_t refCntWtdWidth = 6; // Use 6 as the minimum width
+
+    if (curState != INITIAL_FRAME_LAYOUT) // don't need this info for INITIAL_FRAME_LAYOUT
+    {
+        for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+        {
+            size_t width = strlen(refCntWtd2str(varDsc->lvRefCntWtd));
+            if (width > refCntWtdWidth)
+            {
+                refCntWtdWidth = width;
+            }
+        }
+    }
+
+    // Do the actual output
+
+    for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+    {
+        lvaDumpEntry(lclNum, curState, refCntWtdWidth);
+    }
+
+    //-------------------------------------------------------------------------
+    // Display the code-gen temps
+
+    assert(tmpAllFree());
+    for (TempDsc* temp = tmpListBeg(); temp != nullptr; temp = tmpListNxt(temp))
+    {
+        printf(";  TEMP_%02u %26s%*s%7s  -> ", -temp->tdTempNum(), " ", refCntWtdWidth, " ",
+               varTypeName(temp->tdTempType()));
+        int offset = temp->tdTempOffs();
+        printf(" [%2s%1s0x%02X]\n", isFramePointerUsed() ? STR_FPBASE : STR_SPBASE, (offset < 0 ? "-" : "+"),
+               (offset < 0 ? -offset : offset));
+    }
+
+    if (curState >= TENTATIVE_FRAME_LAYOUT)
+    {
+        printf(";\n");
+        printf("; Lcl frame size = %d\n", compLclFrameSize);
+    }
+}
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ *  Conservatively estimate the layout of the stack frame.
+ *
+ *  This function is only used before final frame layout. It conservatively estimates the
+ *  number of callee-saved registers that must be saved, then calls lvaAssignFrameOffsets().
+ *  To do final frame layout, the callee-saved registers are known precisely, so
+ *  lvaAssignFrameOffsets() is called directly.
+ *
+ *  Returns the (conservative, that is, overly large) estimated size of the frame,
+ *  including the callee-saved registers. This is only used by the emitter during code
+ *  generation when estimating the size of the offset of instructions accessing temps,
+ *  and only if temps have a larger offset than variables.
+ */
+
+unsigned Compiler::lvaFrameSize(FrameLayoutState curState)
+{
+    assert(curState < FINAL_FRAME_LAYOUT);
+
+    unsigned result;
+
+    /* Layout the stack frame conservatively.
+       Assume all callee-saved registers are spilled to stack */
+
+    compCalleeRegsPushed = CNT_CALLEE_SAVED;
+
+#if defined(_TARGET_ARMARCH_)
+    if (compFloatingPointUsed)
+        compCalleeRegsPushed += CNT_CALLEE_SAVED_FLOAT;
+
+    compCalleeRegsPushed++; // we always push LR.  See genPushCalleeSavedRegisters
+#elif defined(_TARGET_AMD64_)
+    if (compFloatingPointUsed)
+    {
+        compCalleeFPRegsSavedMask = RBM_FLT_CALLEE_SAVED;
+    }
+    else
+    {
+        compCalleeFPRegsSavedMask = RBM_NONE;
+    }
+#endif
+
+#if DOUBLE_ALIGN
+    if (genDoubleAlign())
+    {
+        // X86 only - account for extra 4-byte pad that may be created by "and  esp, -8"  instruction
+        compCalleeRegsPushed++;
+    }
+#endif
+
+#ifdef _TARGET_XARCH_
+    // Since FP/EBP is included in the SAVED_REG_MAXSZ we need to
+    // subtract 1 register if codeGen->isFramePointerUsed() is true.
+    if (codeGen->isFramePointerUsed())
+    {
+        compCalleeRegsPushed--;
+    }
+#endif
+
+    lvaAssignFrameOffsets(curState);
+
+    unsigned calleeSavedRegMaxSz = CALLEE_SAVED_REG_MAXSZ;
+#if defined(_TARGET_ARMARCH_)
+    if (compFloatingPointUsed)
+    {
+        calleeSavedRegMaxSz += CALLEE_SAVED_FLOAT_MAXSZ;
+    }
+    calleeSavedRegMaxSz += REGSIZE_BYTES; // we always push LR.  See genPushCalleeSavedRegisters
+#endif
+
+    result = compLclFrameSize + calleeSavedRegMaxSz;
+    return result;
+}
+
+//------------------------------------------------------------------------
+// lvaGetSPRelativeOffset: Given a variable, return the offset of that
+// variable in the frame from the stack pointer. This number will be positive,
+// since the stack pointer must be at a lower address than everything on the
+// stack.
+//
+// This can't be called for localloc functions, since the stack pointer
+// varies, and thus there is no fixed offset to a variable from the stack pointer.
+//
+// Arguments:
+//    varNum - the variable number
+//
+// Return Value:
+//    The offset.
+
+int Compiler::lvaGetSPRelativeOffset(unsigned varNum)
+{
+    assert(!compLocallocUsed);
+    assert(lvaDoneFrameLayout == FINAL_FRAME_LAYOUT);
+    assert(varNum < lvaCount);
+    const LclVarDsc* varDsc = lvaTable + varNum;
+    assert(varDsc->lvOnFrame);
+    int spRelativeOffset;
+
+    if (varDsc->lvFramePointerBased)
+    {
+        // The stack offset is relative to the frame pointer, so convert it to be
+        // relative to the stack pointer (which makes no sense for localloc functions).
+        spRelativeOffset = varDsc->lvStkOffs + codeGen->genSPtoFPdelta();
+    }
+    else
+    {
+        spRelativeOffset = varDsc->lvStkOffs;
+    }
+
+    assert(spRelativeOffset >= 0);
+    return spRelativeOffset;
+}
+
+/*****************************************************************************
+ *
+ *  Return the caller-SP-relative stack offset of a local/parameter.
+ *  Requires the local to be on the stack and frame layout to be complete.
+ */
+
+int Compiler::lvaGetCallerSPRelativeOffset(unsigned varNum)
+{
+    assert(lvaDoneFrameLayout == FINAL_FRAME_LAYOUT);
+    assert(varNum < lvaCount);
+    LclVarDsc* varDsc = lvaTable + varNum;
+    assert(varDsc->lvOnFrame);
+
+    return lvaToCallerSPRelativeOffset(varDsc->lvStkOffs, varDsc->lvFramePointerBased);
+}
+
+int Compiler::lvaToCallerSPRelativeOffset(int offset, bool isFpBased)
+{
+    assert(lvaDoneFrameLayout == FINAL_FRAME_LAYOUT);
+
+    if (isFpBased)
+    {
+        offset += codeGen->genCallerSPtoFPdelta();
+    }
+    else
+    {
+        offset += codeGen->genCallerSPtoInitialSPdelta();
+    }
+
+    return offset;
+}
+
+/*****************************************************************************
+ *
+ *  Return the Initial-SP-relative stack offset of a local/parameter.
+ *  Requires the local to be on the stack and frame layout to be complete.
+ */
+
+int Compiler::lvaGetInitialSPRelativeOffset(unsigned varNum)
+{
+    assert(lvaDoneFrameLayout == FINAL_FRAME_LAYOUT);
+    assert(varNum < lvaCount);
+    LclVarDsc* varDsc = lvaTable + varNum;
+    assert(varDsc->lvOnFrame);
+
+    return lvaToInitialSPRelativeOffset(varDsc->lvStkOffs, varDsc->lvFramePointerBased);
+}
+
+// Given a local variable offset, and whether that offset is frame-pointer based, return its offset from Initial-SP.
+// This is used, for example, to figure out the offset of the frame pointer from Initial-SP.
+int Compiler::lvaToInitialSPRelativeOffset(unsigned offset, bool isFpBased)
+{
+    assert(lvaDoneFrameLayout == FINAL_FRAME_LAYOUT);
+#ifdef _TARGET_AMD64_
+    if (isFpBased)
+    {
+        // Currently, the frame starts by pushing ebp, ebp points to the saved ebp
+        // (so we have ebp pointer chaining). Add the fixed-size frame size plus the
+        // size of the callee-saved regs (not including ebp itself) to find Initial-SP.
+
+        assert(codeGen->isFramePointerUsed());
+        offset += codeGen->genSPtoFPdelta();
+    }
+    else
+    {
+        // The offset is correct already!
+    }
+#else  // !_TARGET_AMD64_
+    NYI("lvaToInitialSPRelativeOffset");
+#endif // !_TARGET_AMD64_
+
+    return offset;
+}
+
+/*****************************************************************************/
+
+#ifdef DEBUG
+/*****************************************************************************
+ *  Pick a padding size at "random" for the local.
+ *  0 means that it should not be converted to a GT_LCL_FLD
+ */
+
+static unsigned LCL_FLD_PADDING(unsigned lclNum)
+{
+    // Convert every 2nd variable
+    if (lclNum % 2)
+    {
+        return 0;
+    }
+
+    // Pick a padding size at "random"
+    unsigned size = lclNum % 7;
+
+    return size;
+}
+
+/*****************************************************************************
+ *
+ *  Callback for fgWalkAllTreesPre()
+ *  Convert as many GT_LCL_VAR's to GT_LCL_FLD's
+ */
+
+/* static */
+/*
+    The stress mode does 2 passes.
+
+    In the first pass we will mark the locals where we CAN't apply the stress mode.
+    In the second pass we will do the appropiate morphing wherever we've not determined we can't do it.
+*/
+Compiler::fgWalkResult Compiler::lvaStressLclFldCB(GenTreePtr* pTree, fgWalkData* data)
+{
+    GenTreePtr tree = *pTree;
+    genTreeOps oper = tree->OperGet();
+    GenTreePtr lcl;
+
+    switch (oper)
+    {
+        case GT_LCL_VAR:
+            lcl = tree;
+            break;
+
+        case GT_ADDR:
+            if (tree->gtOp.gtOp1->gtOper != GT_LCL_VAR)
+            {
+                return WALK_CONTINUE;
+            }
+            lcl = tree->gtOp.gtOp1;
+            break;
+
+        default:
+            return WALK_CONTINUE;
+    }
+
+    Compiler* pComp      = ((lvaStressLclFldArgs*)data->pCallbackData)->m_pCompiler;
+    bool      bFirstPass = ((lvaStressLclFldArgs*)data->pCallbackData)->m_bFirstPass;
+    noway_assert(lcl->gtOper == GT_LCL_VAR);
+    unsigned   lclNum = lcl->gtLclVarCommon.gtLclNum;
+    var_types  type   = lcl->TypeGet();
+    LclVarDsc* varDsc = &pComp->lvaTable[lclNum];
+
+    if (varDsc->lvNoLclFldStress)
+    {
+        // Already determined we can't do anything for this var
+        return WALK_SKIP_SUBTREES;
+    }
+
+    if (bFirstPass)
+    {
+        // Ignore arguments and temps
+        if (varDsc->lvIsParam || lclNum >= pComp->info.compLocalsCount)
+        {
+            varDsc->lvNoLclFldStress = true;
+            return WALK_SKIP_SUBTREES;
+        }
+
+        // Fix for lcl_fld stress mode
+        if (varDsc->lvKeepType)
+        {
+            varDsc->lvNoLclFldStress = true;
+            return WALK_SKIP_SUBTREES;
+        }
+
+        // Can't have GC ptrs in TYP_BLK.
+        if (!varTypeIsArithmetic(type))
+        {
+            varDsc->lvNoLclFldStress = true;
+            return WALK_SKIP_SUBTREES;
+        }
+
+        // Weed out "small" types like TYP_BYTE as we don't mark the GT_LCL_VAR
+        // node with the accurate small type. If we bash lvaTable[].lvType,
+        // then there will be no indication that it was ever a small type.
+        var_types varType = varDsc->TypeGet();
+        if (varType != TYP_BLK && genTypeSize(varType) != genTypeSize(genActualType(varType)))
+        {
+            varDsc->lvNoLclFldStress = true;
+            return WALK_SKIP_SUBTREES;
+        }
+
+        // Offset some of the local variable by a "random" non-zero amount
+        unsigned padding = LCL_FLD_PADDING(lclNum);
+        if (padding == 0)
+        {
+            varDsc->lvNoLclFldStress = true;
+            return WALK_SKIP_SUBTREES;
+        }
+    }
+    else
+    {
+        // Do the morphing
+        noway_assert(varDsc->lvType == lcl->gtType || varDsc->lvType == TYP_BLK);
+        var_types varType = varDsc->TypeGet();
+
+        // Calculate padding
+        unsigned padding = LCL_FLD_PADDING(lclNum);
+
+        // Change the variable to a TYP_BLK
+        if (varType != TYP_BLK)
+        {
+            varDsc->lvExactSize = (unsigned)(roundUp(padding + pComp->lvaLclSize(lclNum)));
+            varDsc->lvType      = TYP_BLK;
+            pComp->lvaSetVarAddrExposed(lclNum);
+        }
+
+        tree->gtFlags |= GTF_GLOB_REF;
+
+        /* Now morph the tree appropriately */
+        if (oper == GT_LCL_VAR)
+        {
+            /* Change lclVar(lclNum) to lclFld(lclNum,padding) */
+
+            tree->ChangeOper(GT_LCL_FLD);
+            tree->gtLclFld.gtLclOffs = padding;
+        }
+        else
+        {
+            /* Change addr(lclVar) to addr(lclVar)+padding */
+
+            noway_assert(oper == GT_ADDR);
+            GenTreePtr newAddr = new (pComp, GT_NONE) GenTreeOp(*tree->AsOp());
+
+            tree->ChangeOper(GT_ADD);
+            tree->gtOp.gtOp1 = newAddr;
+            tree->gtOp.gtOp2 = pComp->gtNewIconNode(padding);
+
+            lcl->gtType = TYP_BLK;
+        }
+    }
+
+    return WALK_SKIP_SUBTREES;
+}
+
+/*****************************************************************************/
+
+void Compiler::lvaStressLclFld()
+{
+    if (!compStressCompile(STRESS_LCL_FLDS, 5))
+    {
+        return;
+    }
+
+    lvaStressLclFldArgs Args;
+    Args.m_pCompiler  = this;
+    Args.m_bFirstPass = true;
+
+    // Do First pass
+    fgWalkAllTreesPre(lvaStressLclFldCB, &Args);
+
+    // Second pass
+    Args.m_bFirstPass = false;
+    fgWalkAllTreesPre(lvaStressLclFldCB, &Args);
+}
+
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ *  A little routine that displays a local variable bitset.
+ *  'set' is mask of variables that have to be displayed
+ *  'allVars' is the complete set of interesting variables (blank space is
+ *    inserted if its corresponding bit is not in 'set').
+ */
+
+#ifdef DEBUG
+void Compiler::lvaDispVarSet(VARSET_VALARG_TP set)
+{
+    VARSET_TP VARSET_INIT_NOCOPY(allVars, VarSetOps::MakeEmpty(this));
+    lvaDispVarSet(set, allVars);
+}
+
+void Compiler::lvaDispVarSet(VARSET_VALARG_TP set, VARSET_VALARG_TP allVars)
+{
+    printf("{");
+
+    bool needSpace = false;
+
+    for (unsigned index = 0; index < lvaTrackedCount; index++)
+    {
+        if (VarSetOps::IsMember(this, set, index))
+        {
+            unsigned   lclNum;
+            LclVarDsc* varDsc;
+
+            /* Look for the matching variable */
+
+            for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+            {
+                if ((varDsc->lvVarIndex == index) && varDsc->lvTracked)
+                {
+                    break;
+                }
+            }
+
+            if (needSpace)
+            {
+                printf(" ");
+            }
+            else
+            {
+                needSpace = true;
+            }
+
+            printf("V%02u", lclNum);
+        }
+        else if (VarSetOps::IsMember(this, allVars, index))
+        {
+            if (needSpace)
+            {
+                printf(" ");
+            }
+            else
+            {
+                needSpace = true;
+            }
+
+            printf("   ");
+        }
+    }
+
+    printf("}");
+}
+
+#endif // DEBUG
diff --git a/src/jit/lir.cpp b/src/jit/lir.cpp
new file mode 100644
index 0000000000..94206def1c
--- /dev/null
+++ b/src/jit/lir.cpp
@@ -0,0 +1,1640 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+#include "smallhash.h"
+
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+LIR::Use::Use() : m_range(nullptr), m_edge(nullptr), m_user(nullptr)
+{
+}
+
+LIR::Use::Use(const Use& other)
+{
+    *this = other;
+}
+
+//------------------------------------------------------------------------
+// LIR::Use::Use: Constructs a use <-> def edge given the range that
+//                contains the use and the def, the use -> def edge, and
+//                the user.
+//
+// Arguments:
+//    range - The range that contains the use and the def.
+//    edge - The use -> def edge.
+//    user - The node that uses the def.
+//
+// Return Value:
+//
+LIR::Use::Use(Range& range, GenTree** edge, GenTree* user) : m_range(&range), m_edge(edge), m_user(user)
+{
+    AssertIsValid();
+}
+
+LIR::Use& LIR::Use::operator=(const Use& other)
+{
+    m_range = other.m_range;
+    m_user  = other.m_user;
+    m_edge  = other.IsDummyUse() ? &m_user : other.m_edge;
+
+    assert(IsDummyUse() == other.IsDummyUse());
+    return *this;
+}
+
+LIR::Use& LIR::Use::operator=(Use&& other)
+{
+    *this = other;
+    return *this;
+}
+
+//------------------------------------------------------------------------
+// LIR::Use::GetDummyUse: Returns a dummy use for a node.
+//
+// This method is provided as a convenience to allow transforms to work
+// uniformly over Use values. It allows the creation of a Use given a node
+// that is not used.
+//
+// Arguments:
+//    range - The range that contains the node.
+//    node - The node for which to create a dummy use.
+//
+// Return Value:
+//
+LIR::Use LIR::Use::GetDummyUse(Range& range, GenTree* node)
+{
+    assert(node != nullptr);
+
+    Use dummyUse;
+    dummyUse.m_range = &range;
+    dummyUse.m_user  = node;
+    dummyUse.m_edge  = &dummyUse.m_user;
+
+    assert(dummyUse.IsInitialized());
+    return dummyUse;
+}
+
+//------------------------------------------------------------------------
+// LIR::Use::IsDummyUse: Indicates whether or not a use is a dummy use.
+//
+// This method must be called before attempting to call the User() method
+// below: for dummy uses, the user is the same node as the def.
+//
+// Return Value: true if this use is a dummy use; false otherwise.
+//
+bool LIR::Use::IsDummyUse() const
+{
+    return m_edge == &m_user;
+}
+
+//------------------------------------------------------------------------
+// LIR::Use::Def: Returns the node that produces the def for this use.
+//
+GenTree* LIR::Use::Def() const
+{
+    assert(IsInitialized());
+
+    return *m_edge;
+}
+
+//------------------------------------------------------------------------
+// LIR::Use::User: Returns the node that uses the def for this use.
+///
+GenTree* LIR::Use::User() const
+{
+    assert(IsInitialized());
+    assert(!IsDummyUse());
+
+    return m_user;
+}
+
+//------------------------------------------------------------------------
+// LIR::Use::IsInitialized: Returns true if the use is minimally valid; false otherwise.
+//
+bool LIR::Use::IsInitialized() const
+{
+    return (m_range != nullptr) && (m_user != nullptr) && (m_edge != nullptr);
+}
+
+//------------------------------------------------------------------------
+// LIR::Use::AssertIsValid: DEBUG function to assert on many validity conditions.
+//
+void LIR::Use::AssertIsValid() const
+{
+    assert(IsInitialized());
+    assert(m_range->Contains(m_user));
+    assert(Def() != nullptr);
+
+    GenTree** useEdge = nullptr;
+    assert(m_user->TryGetUse(Def(), &useEdge));
+    assert(useEdge == m_edge);
+}
+
+//------------------------------------------------------------------------
+// LIR::Use::ReplaceWith: Changes the use to point to a new value.
+//
+// For example, given the following LIR:
+//
+//    t15 =    lclVar    int    arg1
+//    t16 =    lclVar    int    arg1
+//
+//          /--*  t15 int
+//          +--*  t16 int
+//    t17 = *  ==        int
+//
+//          /--*  t17 int
+//          *  jmpTrue   void
+//
+// If we wanted to replace the use of t17 with a use of the constant "1", we
+// might do the following (where `opEq` is a `Use` value that represents the
+// use of t17):
+//
+//    GenTree* constantOne = compiler->gtNewIconNode(1);
+//    range.InsertAfter(opEq.Def(), constantOne);
+//    opEq.ReplaceWith(compiler, constantOne);
+//
+// Which would produce something like the following LIR:
+//
+//    t15 =    lclVar    int    arg1
+//    t16 =    lclVar    int    arg1
+//
+//          /--*  t15 int
+//          +--*  t16 int
+//    t17 = *  ==        int
+//
+//    t18 =    const     int    1
+//
+//          /--*  t18 int
+//          *  jmpTrue   void
+//
+// Elminating the now-dead compare and its operands using `LIR::Range::Remove`
+// would then give us:
+//
+//    t18 =    const     int    1
+//
+//          /--*  t18 int
+//          *  jmpTrue   void
+//
+// Arguments:
+//    compiler - The Compiler context.
+//    replacement - The replacement node.
+//
+void LIR::Use::ReplaceWith(Compiler* compiler, GenTree* replacement)
+{
+    assert(IsInitialized());
+    assert(compiler != nullptr);
+    assert(replacement != nullptr);
+    assert(IsDummyUse() || m_range->Contains(m_user));
+    assert(m_range->Contains(replacement));
+
+    GenTree* replacedNode = *m_edge;
+
+    *m_edge = replacement;
+    if (!IsDummyUse() && m_user->IsCall())
+    {
+        compiler->fgFixupArgTabEntryPtr(m_user, replacedNode, replacement);
+    }
+}
+
+//------------------------------------------------------------------------
+// LIR::Use::ReplaceWithLclVar: Assigns the def for this use to a local
+//                              var and points the use to a use of that
+//                              local var. If no local number is provided,
+//                              creates a new local var.
+//
+// For example, given the following IR:
+//
+//    t15 =    lclVar    int    arg1
+//    t16 =    lclVar    int    arg1
+//
+//          /--*  t15 int
+//          +--*  t16 int
+//    t17 = *  ==        int
+//
+//          /--*  t17 int
+//          *  jmpTrue   void
+//
+// If we wanted to replace the use of t17 with a use of a new local var
+// that holds the value represented by t17, we might do the following
+// (where `opEq` is a `Use` value that represents the use of t17):
+//
+//    opEq.ReplaceUseWithLclVar(compiler, block->getBBWeight(compiler));
+//
+// This would produce the following LIR:
+//
+//    t15 =    lclVar    int    arg1
+//    t16 =    lclVar    int    arg1
+//
+//          /--*  t15 int
+//          +--*  t16 int
+//    t17 = *  ==        int
+//
+//          /--*  t17 int
+//          *  st.lclVar int    tmp0
+//
+//    t18 =    lclVar    int    tmp0
+//
+//          /--*  t18 int
+//          *  jmpTrue   void
+//
+// Arguments:
+//    compiler - The Compiler context.
+//    blockWeight - The weight of the basic block that contains the use.
+//    lclNum - The local to use for temporary storage. If BAD_VAR_NUM (the
+//             default) is provided, this method will create and use a new
+//             local var.
+//
+// Return Value: The number of the local var used for temporary storage.
+//
+unsigned LIR::Use::ReplaceWithLclVar(Compiler* compiler, unsigned blockWeight, unsigned lclNum)
+{
+    assert(IsInitialized());
+    assert(compiler != nullptr);
+    assert(m_range->Contains(m_user));
+    assert(m_range->Contains(*m_edge));
+
+    GenTree* node = *m_edge;
+
+    if (lclNum == BAD_VAR_NUM)
+    {
+        lclNum = compiler->lvaGrabTemp(true DEBUGARG("ReplaceWithLclVar is creating a new local variable"));
+    }
+
+    // Increment its lvRefCnt and lvRefCntWtd twice, one for the def and one for the use
+    compiler->lvaTable[lclNum].incRefCnts(blockWeight, compiler);
+    compiler->lvaTable[lclNum].incRefCnts(blockWeight, compiler);
+
+    GenTreeLclVar* store = compiler->gtNewTempAssign(lclNum, node)->AsLclVar();
+
+    GenTree* load =
+        new (compiler, GT_LCL_VAR) GenTreeLclVar(store->TypeGet(), store->AsLclVarCommon()->GetLclNum(), BAD_IL_OFFSET);
+
+    m_range->InsertAfter(node, store, load);
+
+    ReplaceWith(compiler, load);
+
+    JITDUMP("ReplaceWithLclVar created store :\n");
+    DISPNODE(store);
+
+    return lclNum;
+}
+
+LIR::ReadOnlyRange::ReadOnlyRange() : m_firstNode(nullptr), m_lastNode(nullptr)
+{
+}
+
+LIR::ReadOnlyRange::ReadOnlyRange(ReadOnlyRange&& other) : m_firstNode(other.m_firstNode), m_lastNode(other.m_lastNode)
+{
+#ifdef DEBUG
+    other.m_firstNode = nullptr;
+    other.m_lastNode  = nullptr;
+#endif
+}
+
+//------------------------------------------------------------------------
+// LIR::ReadOnlyRange::ReadOnlyRange:
+//    Creates a `ReadOnlyRange` value given the first and last node in
+//    the range.
+//
+// Arguments:
+//    firstNode - The first node in the range.
+//    lastNode  - The last node in the range.
+//
+LIR::ReadOnlyRange::ReadOnlyRange(GenTree* firstNode, GenTree* lastNode) : m_firstNode(firstNode), m_lastNode(lastNode)
+{
+    assert((m_firstNode == nullptr) == (m_lastNode == nullptr));
+    assert((m_firstNode == m_lastNode) || (Contains(m_lastNode)));
+}
+
+//------------------------------------------------------------------------
+// LIR::ReadOnlyRange::FirstNode: Returns the first node in the range.
+//
+GenTree* LIR::ReadOnlyRange::FirstNode() const
+{
+    return m_firstNode;
+}
+
+//------------------------------------------------------------------------
+// LIR::ReadOnlyRange::LastNode: Returns the last node in the range.
+//
+GenTree* LIR::ReadOnlyRange::LastNode() const
+{
+    return m_lastNode;
+}
+
+//------------------------------------------------------------------------
+// LIR::ReadOnlyRange::IsEmpty: Returns true if the range is empty; false
+//                              otherwise.
+//
+bool LIR::ReadOnlyRange::IsEmpty() const
+{
+    assert((m_firstNode == nullptr) == (m_lastNode == nullptr));
+    return m_firstNode == nullptr;
+}
+
+//------------------------------------------------------------------------
+// LIR::ReadOnlyRange::begin: Returns an iterator positioned at the first
+//                            node in the range.
+//
+LIR::ReadOnlyRange::Iterator LIR::ReadOnlyRange::begin() const
+{
+    return Iterator(m_firstNode);
+}
+
+//------------------------------------------------------------------------
+// LIR::ReadOnlyRange::end: Returns an iterator positioned after the last
+//                          node in the range.
+//
+LIR::ReadOnlyRange::Iterator LIR::ReadOnlyRange::end() const
+{
+    return Iterator(m_lastNode == nullptr ? nullptr : m_lastNode->gtNext);
+}
+
+//------------------------------------------------------------------------
+// LIR::ReadOnlyRange::rbegin: Returns an iterator positioned at the last
+//                             node in the range.
+//
+LIR::ReadOnlyRange::ReverseIterator LIR::ReadOnlyRange::rbegin() const
+{
+    return ReverseIterator(m_lastNode);
+}
+
+//------------------------------------------------------------------------
+// LIR::ReadOnlyRange::rend: Returns an iterator positioned before the first
+//                           node in the range.
+//
+LIR::ReadOnlyRange::ReverseIterator LIR::ReadOnlyRange::rend() const
+{
+    return ReverseIterator(m_firstNode == nullptr ? nullptr : m_firstNode->gtPrev);
+}
+
+#ifdef DEBUG
+
+//------------------------------------------------------------------------
+// LIR::ReadOnlyRange::Contains: Indicates whether or not this range
+//                               contains a given node.
+//
+// Arguments:
+//    node - The node to find.
+//
+// Return Value: True if this range contains the given node; false
+//               otherwise.
+//
+bool LIR::ReadOnlyRange::Contains(GenTree* node) const
+{
+    assert(node != nullptr);
+
+    // TODO-LIR: derive this from the # of nodes in the function as well as
+    // the debug level. Checking small functions is pretty cheap; checking
+    // large functions is not.
+    if (JitConfig.JitExpensiveDebugCheckLevel() < 2)
+    {
+        return true;
+    }
+
+    for (GenTree* n : *this)
+    {
+        if (n == node)
+        {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+#endif
+
+LIR::Range::Range() : ReadOnlyRange()
+{
+}
+
+LIR::Range::Range(Range&& other) : ReadOnlyRange(std::move(other))
+{
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::Range: Creates a `Range` value given the first and last
+//                    node in the range.
+//
+// Arguments:
+//    firstNode - The first node in the range.
+//    lastNode  - The last node in the range.
+//
+LIR::Range::Range(GenTree* firstNode, GenTree* lastNode) : ReadOnlyRange(firstNode, lastNode)
+{
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::LastPhiNode: Returns the last phi node in the range or
+//                          `nullptr` if no phis exist.
+//
+GenTree* LIR::Range::LastPhiNode() const
+{
+    GenTree* lastPhiNode = nullptr;
+    for (GenTree* node : *this)
+    {
+        if (!node->IsPhiNode())
+        {
+            break;
+        }
+
+        lastPhiNode = node;
+    }
+
+    return lastPhiNode;
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::FirstNonPhiNode: Returns the first non-phi node in the
+//                              range or `nullptr` if no non-phi nodes
+//                              exist.
+//
+GenTree* LIR::Range::FirstNonPhiNode() const
+{
+    for (GenTree* node : *this)
+    {
+        if (!node->IsPhiNode())
+        {
+            return node;
+        }
+    }
+
+    return nullptr;
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::FirstNonPhiOrCatchArgNode: Returns the first node after all
+//                                        phi or catch arg nodes in this
+//                                        range.
+//
+GenTree* LIR::Range::FirstNonPhiOrCatchArgNode() const
+{
+    for (GenTree* node : NonPhiNodes())
+    {
+        if (node->OperGet() == GT_CATCH_ARG)
+        {
+            continue;
+        }
+        else if ((node->OperGet() == GT_STORE_LCL_VAR) && (node->gtGetOp1()->OperGet() == GT_CATCH_ARG))
+        {
+            continue;
+        }
+
+        return node;
+    }
+
+    return nullptr;
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::PhiNodes: Returns the range of phi nodes inside this range.
+//
+LIR::ReadOnlyRange LIR::Range::PhiNodes() const
+{
+    GenTree* lastPhiNode = LastPhiNode();
+    if (lastPhiNode == nullptr)
+    {
+        return ReadOnlyRange();
+    }
+
+    return ReadOnlyRange(m_firstNode, lastPhiNode);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::PhiNodes: Returns the range of non-phi nodes inside this
+//                       range.
+//
+LIR::ReadOnlyRange LIR::Range::NonPhiNodes() const
+{
+    GenTree* firstNonPhiNode = FirstNonPhiNode();
+    if (firstNonPhiNode == nullptr)
+    {
+        return ReadOnlyRange();
+    }
+
+    return ReadOnlyRange(firstNonPhiNode, m_lastNode);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::InsertBefore: Inserts a node before another node in this range.
+//
+// Arguments:
+//    insertionPoint - The node before which `node` will be inserted. If non-null, must be part
+//                     of this range. If null, insert at the end of the range.
+//    node - The node to insert. Must not be part of any range.
+//
+void LIR::Range::InsertBefore(GenTree* insertionPoint, GenTree* node)
+{
+    assert(node != nullptr);
+    assert(node->gtPrev == nullptr);
+    assert(node->gtNext == nullptr);
+
+    FinishInsertBefore(insertionPoint, node, node);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::InsertBefore: Inserts 2 nodes before another node in this range.
+//
+// Arguments:
+//    insertionPoint - The node before which the nodes will be inserted. If non-null, must be part
+//                     of this range. If null, insert at the end of the range.
+//    node1 - The first node to insert. Must not be part of any range.
+//    node2 - The second node to insert. Must not be part of any range.
+//
+// Notes:
+// Resulting order:
+//      previous insertionPoint->gtPrev <-> node1 <-> node2 <-> insertionPoint
+//
+void LIR::Range::InsertBefore(GenTree* insertionPoint, GenTree* node1, GenTree* node2)
+{
+    assert(node1 != nullptr);
+    assert(node2 != nullptr);
+
+    assert(node1->gtNext == nullptr);
+    assert(node1->gtPrev == nullptr);
+    assert(node2->gtNext == nullptr);
+    assert(node2->gtPrev == nullptr);
+
+    node1->gtNext = node2;
+    node2->gtPrev = node1;
+
+    FinishInsertBefore(insertionPoint, node1, node2);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::InsertBefore: Inserts 3 nodes before another node in this range.
+//
+// Arguments:
+//    insertionPoint - The node before which the nodes will be inserted. If non-null, must be part
+//                     of this range. If null, insert at the end of the range.
+//    node1 - The first node to insert. Must not be part of any range.
+//    node2 - The second node to insert. Must not be part of any range.
+//    node3 - The third node to insert. Must not be part of any range.
+//
+// Notes:
+// Resulting order:
+//      previous insertionPoint->gtPrev <-> node1 <-> node2 <-> node3 <-> insertionPoint
+//
+void LIR::Range::InsertBefore(GenTree* insertionPoint, GenTree* node1, GenTree* node2, GenTree* node3)
+{
+    assert(node1 != nullptr);
+    assert(node2 != nullptr);
+    assert(node3 != nullptr);
+
+    assert(node1->gtNext == nullptr);
+    assert(node1->gtPrev == nullptr);
+    assert(node2->gtNext == nullptr);
+    assert(node2->gtPrev == nullptr);
+    assert(node3->gtNext == nullptr);
+    assert(node3->gtPrev == nullptr);
+
+    node1->gtNext = node2;
+
+    node2->gtPrev = node1;
+    node2->gtNext = node3;
+
+    node3->gtPrev = node2;
+
+    FinishInsertBefore(insertionPoint, node1, node3);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::InsertBefore: Inserts 4 nodes before another node in this range.
+//
+// Arguments:
+//    insertionPoint - The node before which the nodes will be inserted. If non-null, must be part
+//                     of this range. If null, insert at the end of the range.
+//    node1 - The first node to insert. Must not be part of any range.
+//    node2 - The second node to insert. Must not be part of any range.
+//    node3 - The third node to insert. Must not be part of any range.
+//    node4 - The fourth node to insert. Must not be part of any range.
+//
+// Notes:
+// Resulting order:
+//      previous insertionPoint->gtPrev <-> node1 <-> node2 <-> node3 <-> node4 <-> insertionPoint
+//
+void LIR::Range::InsertBefore(GenTree* insertionPoint, GenTree* node1, GenTree* node2, GenTree* node3, GenTree* node4)
+{
+    assert(node1 != nullptr);
+    assert(node2 != nullptr);
+    assert(node3 != nullptr);
+    assert(node4 != nullptr);
+
+    assert(node1->gtNext == nullptr);
+    assert(node1->gtPrev == nullptr);
+    assert(node2->gtNext == nullptr);
+    assert(node2->gtPrev == nullptr);
+    assert(node3->gtNext == nullptr);
+    assert(node3->gtPrev == nullptr);
+    assert(node4->gtNext == nullptr);
+    assert(node4->gtPrev == nullptr);
+
+    node1->gtNext = node2;
+
+    node2->gtPrev = node1;
+    node2->gtNext = node3;
+
+    node3->gtPrev = node2;
+    node3->gtNext = node4;
+
+    node4->gtPrev = node3;
+
+    FinishInsertBefore(insertionPoint, node1, node4);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::FinishInsertBefore: Helper function to finalize InsertBefore processing: link the
+// range to insertionPoint. gtNext/gtPrev links between first and last are already set.
+//
+// Arguments:
+//    insertionPoint - The node before which the nodes will be inserted. If non-null, must be part
+//                     of this range. If null, indicates to insert at the end of the range.
+//    first - The first node of the range to insert.
+//    last - The last node of the range to insert.
+//
+// Notes:
+// Resulting order:
+//      previous insertionPoint->gtPrev <-> first <-> ... <-> last <-> insertionPoint
+//
+void LIR::Range::FinishInsertBefore(GenTree* insertionPoint, GenTree* first, GenTree* last)
+{
+    assert(first != nullptr);
+    assert(last != nullptr);
+    assert(first->gtPrev == nullptr);
+    assert(last->gtNext == nullptr);
+
+    if (insertionPoint == nullptr)
+    {
+        if (m_firstNode == nullptr)
+        {
+            m_firstNode = first;
+        }
+        else
+        {
+            assert(m_lastNode != nullptr);
+            assert(m_lastNode->gtNext == nullptr);
+            m_lastNode->gtNext = first;
+            first->gtPrev = m_lastNode;
+        }
+        m_lastNode = last;
+    }
+    else
+    {
+        assert(Contains(insertionPoint));
+
+        first->gtPrev = insertionPoint->gtPrev;
+        if (first->gtPrev == nullptr)
+        {
+            assert(insertionPoint == m_firstNode);
+            m_firstNode = first;
+        }
+        else
+        {
+            first->gtPrev->gtNext = first;
+        }
+
+        last->gtNext           = insertionPoint;
+        insertionPoint->gtPrev = last;
+    }
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::InsertAfter: Inserts a node after another node in this range.
+//
+// Arguments:
+//    insertionPoint - The node after which `node` will be inserted. If non-null, must be part
+//                     of this range. If null, insert at the beginning of the range.
+//    node - The node to insert. Must not be part of any range.
+//
+// Notes:
+// Resulting order:
+//      insertionPoint <-> node <-> previous insertionPoint->gtNext
+//
+void LIR::Range::InsertAfter(GenTree* insertionPoint, GenTree* node)
+{
+    assert(node != nullptr);
+
+    assert(node->gtNext == nullptr);
+    assert(node->gtPrev == nullptr);
+
+    FinishInsertAfter(insertionPoint, node, node);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::InsertAfter: Inserts 2 nodes after another node in this range.
+//
+// Arguments:
+//    insertionPoint - The node after which the nodes will be inserted. If non-null, must be part
+//                     of this range. If null, insert at the beginning of the range.
+//    node1 - The first node to insert. Must not be part of any range.
+//    node2 - The second node to insert. Must not be part of any range. Inserted after node1.
+//
+// Notes:
+// Resulting order:
+//      insertionPoint <-> node1 <-> node2 <-> previous insertionPoint->gtNext
+//
+void LIR::Range::InsertAfter(GenTree* insertionPoint, GenTree* node1, GenTree* node2)
+{
+    assert(node1 != nullptr);
+    assert(node2 != nullptr);
+
+    assert(node1->gtNext == nullptr);
+    assert(node1->gtPrev == nullptr);
+    assert(node2->gtNext == nullptr);
+    assert(node2->gtPrev == nullptr);
+
+    node1->gtNext = node2;
+    node2->gtPrev = node1;
+
+    FinishInsertAfter(insertionPoint, node1, node2);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::InsertAfter: Inserts 3 nodes after another node in this range.
+//
+// Arguments:
+//    insertionPoint - The node after which the nodes will be inserted. If non-null, must be part
+//                     of this range. If null, insert at the beginning of the range.
+//    node1 - The first node to insert. Must not be part of any range.
+//    node2 - The second node to insert. Must not be part of any range. Inserted after node1.
+//    node3 - The third node to insert. Must not be part of any range. Inserted after node2.
+//
+// Notes:
+// Resulting order:
+//      insertionPoint <-> node1 <-> node2 <-> node3 <-> previous insertionPoint->gtNext
+//
+void LIR::Range::InsertAfter(GenTree* insertionPoint, GenTree* node1, GenTree* node2, GenTree* node3)
+{
+    assert(node1 != nullptr);
+    assert(node2 != nullptr);
+    assert(node3 != nullptr);
+
+    assert(node1->gtNext == nullptr);
+    assert(node1->gtPrev == nullptr);
+    assert(node2->gtNext == nullptr);
+    assert(node2->gtPrev == nullptr);
+    assert(node3->gtNext == nullptr);
+    assert(node3->gtPrev == nullptr);
+
+    node1->gtNext = node2;
+
+    node2->gtPrev = node1;
+    node2->gtNext = node3;
+
+    node3->gtPrev = node2;
+
+    FinishInsertAfter(insertionPoint, node1, node3);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::InsertAfter: Inserts 4 nodes after another node in this range.
+//
+// Arguments:
+//    insertionPoint - The node after which the nodes will be inserted. If non-null, must be part
+//                     of this range. If null, insert at the beginning of the range.
+//    node1 - The first node to insert. Must not be part of any range.
+//    node2 - The second node to insert. Must not be part of any range. Inserted after node1.
+//    node3 - The third node to insert. Must not be part of any range. Inserted after node2.
+//    node4 - The fourth node to insert. Must not be part of any range. Inserted after node3.
+//
+// Notes:
+// Resulting order:
+//      insertionPoint <-> node1 <-> node2 <-> node3 <-> node4 <-> previous insertionPoint->gtNext
+//
+void LIR::Range::InsertAfter(GenTree* insertionPoint, GenTree* node1, GenTree* node2, GenTree* node3, GenTree* node4)
+{
+    assert(node1 != nullptr);
+    assert(node2 != nullptr);
+    assert(node3 != nullptr);
+    assert(node4 != nullptr);
+
+    assert(node1->gtNext == nullptr);
+    assert(node1->gtPrev == nullptr);
+    assert(node2->gtNext == nullptr);
+    assert(node2->gtPrev == nullptr);
+    assert(node3->gtNext == nullptr);
+    assert(node3->gtPrev == nullptr);
+    assert(node4->gtNext == nullptr);
+    assert(node4->gtPrev == nullptr);
+
+    node1->gtNext = node2;
+
+    node2->gtPrev = node1;
+    node2->gtNext = node3;
+
+    node3->gtPrev = node2;
+    node3->gtNext = node4;
+
+    node4->gtPrev = node3;
+
+    FinishInsertAfter(insertionPoint, node1, node4);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::FinishInsertAfter: Helper function to finalize InsertAfter processing: link the
+// range to insertionPoint. gtNext/gtPrev links between first and last are already set.
+//
+// Arguments:
+//    insertionPoint - The node after which the nodes will be inserted. If non-null, must be part
+//                     of this range. If null, insert at the beginning of the range.
+//    first - The first node of the range to insert.
+//    last - The last node of the range to insert.
+//
+// Notes:
+// Resulting order:
+//      insertionPoint <-> first <-> ... <-> last <-> previous insertionPoint->gtNext
+//
+void LIR::Range::FinishInsertAfter(GenTree* insertionPoint, GenTree* first, GenTree* last)
+{
+    assert(first != nullptr);
+    assert(last != nullptr);
+    assert(first->gtPrev == nullptr);
+    assert(last->gtNext == nullptr);
+
+    if (insertionPoint == nullptr)
+    {
+        if (m_lastNode == nullptr)
+        {
+            m_lastNode = last;
+        }
+        else
+        {
+            assert(m_firstNode != nullptr);
+            assert(m_firstNode->gtPrev == nullptr);
+            m_firstNode->gtPrev = last;
+            last->gtNext = m_firstNode;
+        }
+        m_firstNode = first;
+    }
+    else
+    {
+        assert(Contains(insertionPoint));
+
+        last->gtNext = insertionPoint->gtNext;
+        if (last->gtNext == nullptr)
+        {
+            assert(insertionPoint == m_lastNode);
+            m_lastNode = last;
+        }
+        else
+        {
+            last->gtNext->gtPrev = last;
+        }
+
+        first->gtPrev          = insertionPoint;
+        insertionPoint->gtNext = first;
+    }
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::InsertBefore: Inserts a range before another node in `this` range.
+//
+// Arguments:
+//    insertionPoint - The node before which the nodes will be inserted. If non-null, must be part
+//                     of this range. If null, insert at the end of the range.
+//    range - The range to splice in.
+//
+void LIR::Range::InsertBefore(GenTree* insertionPoint, Range&& range)
+{
+    assert(!range.IsEmpty());
+    FinishInsertBefore(insertionPoint, range.m_firstNode, range.m_lastNode);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::InsertAfter: Inserts a range after another node in `this` range.
+//
+// Arguments:
+//    insertionPoint - The node after which the nodes will be inserted. If non-null, must be part
+//                     of this range. If null, insert at the beginning of the range.
+//    range - The range to splice in.
+//
+void LIR::Range::InsertAfter(GenTree* insertionPoint, Range&& range)
+{
+    assert(!range.IsEmpty());
+    FinishInsertAfter(insertionPoint, range.m_firstNode, range.m_lastNode);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::InsertAtBeginning: Inserts a node at the beginning of this range.
+//
+// Arguments:
+//    node - The node to insert. Must not be part of any range.
+//
+void LIR::Range::InsertAtBeginning(GenTree* node)
+{
+    InsertBefore(m_firstNode, node);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::InsertAtEnd: Inserts a node at the end of this range.
+//
+// Arguments:
+//    node - The node to insert. Must not be part of any range.
+//
+void LIR::Range::InsertAtEnd(GenTree* node)
+{
+    InsertAfter(m_lastNode, node);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::InsertAtBeginning: Inserts a range at the beginning of `this` range.
+//
+// Arguments:
+//    range - The range to splice in.
+//
+void LIR::Range::InsertAtBeginning(Range&& range)
+{
+    InsertBefore(m_firstNode, std::move(range));
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::InsertAtEnd: Inserts a range at the end of `this` range.
+//
+// Arguments:
+//    range - The range to splice in.
+//
+void LIR::Range::InsertAtEnd(Range&& range)
+{
+    InsertAfter(m_lastNode, std::move(range));
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::Remove: Removes a node from this range.
+//
+// Arguments:
+//    node - The node to remove. Must be part of this range.
+//
+void LIR::Range::Remove(GenTree* node)
+{
+    assert(node != nullptr);
+    assert(Contains(node));
+
+    GenTree* prev = node->gtPrev;
+    GenTree* next = node->gtNext;
+
+    if (prev != nullptr)
+    {
+        prev->gtNext = next;
+    }
+    else
+    {
+        assert(node == m_firstNode);
+        m_firstNode = next;
+    }
+
+    if (next != nullptr)
+    {
+        next->gtPrev = prev;
+    }
+    else
+    {
+        assert(node == m_lastNode);
+        m_lastNode = prev;
+    }
+
+    node->gtPrev = nullptr;
+    node->gtNext = nullptr;
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::Remove: Removes a subrange from this range.
+//
+// Both the start and the end of the subrange must be part of this range.
+//
+// Arguments:
+//    firstNode - The first node in the subrange.
+//    lastNode - The last node in the subrange.
+//
+// Returns:
+//    A mutable range containing the removed nodes.
+//
+LIR::Range LIR::Range::Remove(GenTree* firstNode, GenTree* lastNode)
+{
+    assert(firstNode != nullptr);
+    assert(lastNode != nullptr);
+    assert(Contains(firstNode));
+    assert((firstNode == lastNode) || firstNode->Precedes(lastNode));
+
+    GenTree* prev = firstNode->gtPrev;
+    GenTree* next = lastNode->gtNext;
+
+    if (prev != nullptr)
+    {
+        prev->gtNext = next;
+    }
+    else
+    {
+        assert(firstNode == m_firstNode);
+        m_firstNode = next;
+    }
+
+    if (next != nullptr)
+    {
+        next->gtPrev = prev;
+    }
+    else
+    {
+        assert(lastNode == m_lastNode);
+        m_lastNode = prev;
+    }
+
+    firstNode->gtPrev = nullptr;
+    lastNode->gtNext  = nullptr;
+
+    return Range(firstNode, lastNode);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::Remove: Removes a subrange from this range.
+//
+// Arguments:
+//    range - The subrange to remove. Must be part of this range.
+//
+// Returns:
+//    A mutable range containing the removed nodes.
+//
+LIR::Range LIR::Range::Remove(ReadOnlyRange&& range)
+{
+    return Remove(range.m_firstNode, range.m_lastNode);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::Delete: Deletes a node from this range.
+//
+// Note that the deleted node must not be used after this function has
+// been called. If the deleted node is part of a block, this function also
+// calls `Compiler::lvaDecRefCnts` as necessary.
+//
+// Arguments:
+//    node - The node to delete. Must be part of this range.
+//    block - The block that contains the node, if any. May be null.
+//    compiler - The compiler context. May be null if block is null.
+//
+void LIR::Range::Delete(Compiler* compiler, BasicBlock* block, GenTree* node)
+{
+    assert(node != nullptr);
+    assert((block == nullptr) == (compiler == nullptr));
+
+    Remove(node);
+
+    if (block != nullptr)
+    {
+        if (((node->OperGet() == GT_CALL) && ((node->gtFlags & GTF_CALL_UNMANAGED) != 0)) ||
+            (node->OperIsLocal() && !node->IsPhiNode()))
+        {
+            compiler->lvaDecRefCnts(block, node);
+        }
+    }
+
+    DEBUG_DESTROY_NODE(node);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::Delete: Deletes a subrange from this range.
+//
+// Both the start and the end of the subrange must be part of this range.
+// Note that the deleted nodes must not be used after this function has
+// been called. If the deleted nodes are part of a block, this function
+// also calls `Compiler::lvaDecRefCnts` as necessary.
+//
+// Arguments:
+//    firstNode - The first node in the subrange.
+//    lastNode - The last node in the subrange.
+//    block - The block that contains the subrange, if any. May be null.
+//    compiler - The compiler context. May be null if block is null.
+//
+void LIR::Range::Delete(Compiler* compiler, BasicBlock* block, GenTree* firstNode, GenTree* lastNode)
+{
+    assert(firstNode != nullptr);
+    assert(lastNode != nullptr);
+    assert((block == nullptr) == (compiler == nullptr));
+
+    Remove(firstNode, lastNode);
+
+    assert(lastNode->gtNext == nullptr);
+
+    if (block != nullptr)
+    {
+        for (GenTree* node = firstNode; node != nullptr; node = node->gtNext)
+        {
+            if (((node->OperGet() == GT_CALL) && ((node->gtFlags & GTF_CALL_UNMANAGED) != 0)) ||
+                (node->OperIsLocal() && !node->IsPhiNode()))
+            {
+                compiler->lvaDecRefCnts(block, node);
+            }
+        }
+    }
+
+#ifdef DEBUG
+    // We can't do this in the loop above because it causes `IsPhiNode` to return a false negative
+    // for `GT_STORE_LCL_VAR` nodes that participate in phi definitions.
+    for (GenTree* node = firstNode; node != nullptr; node = node->gtNext)
+    {
+        DEBUG_DESTROY_NODE(node);
+    }
+#endif
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::Delete: Deletes a subrange from this range.
+//
+// Both the start and the end of the subrange must be part of this range.
+// Note that the deleted nodes must not be used after this function has
+// been called. If the deleted nodes are part of a block, this function
+// also calls `Compiler::lvaDecRefCnts` as necessary.
+//
+// Arguments:
+//    range - The subrange to delete.
+//    block - The block that contains the subrange, if any. May be null.
+//    compiler - The compiler context. May be null if block is null.
+//
+void LIR::Range::Delete(Compiler* compiler, BasicBlock* block, ReadOnlyRange&& range)
+{
+    Delete(compiler, block, range.m_firstNode, range.m_lastNode);
+}
+
+
+//------------------------------------------------------------------------
+// LIR::Range::TryGetUse: Try to find the use for a given node.
+//
+// Arguments:
+//    node - The node for which to find the corresponding use.
+//    use (out) - The use of the corresponding node, if any. Invalid if
+//                this method returns false.
+//
+// Return Value: Returns true if a use was found; false otherwise.
+//
+bool LIR::Range::TryGetUse(GenTree* node, Use* use)
+{
+    assert(node != nullptr);
+    assert(use != nullptr);
+    assert(Contains(node));
+
+    // Don't bother looking for uses of nodes that are not values.
+    // If the node is the last node, we won't find a use (and we would
+    // end up creating an illegal range if we tried).
+    if (node->IsValue() && (node != LastNode()))
+    {
+        for (GenTree* n : ReadOnlyRange(node->gtNext, m_lastNode))
+        {
+            GenTree** edge;
+            if (n->TryGetUse(node, &edge))
+            {
+                *use = Use(*this, edge, n);
+                return true;
+            }
+        }
+    }
+
+    *use = Use();
+    return false;
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::GetTreeRange: Computes the subrange that includes all nodes
+//                           in the dataflow trees rooted at a particular
+//                           set of nodes.
+//
+// This method logically uses the following algorithm to compute the
+// range:
+//
+//    worklist = { set }
+//    firstNode = start
+//    isClosed = true
+//
+//    while not worklist.isEmpty:
+//        if not worklist.contains(firstNode):
+//            isClosed = false
+//        else:
+//            for operand in firstNode:
+//                worklist.add(operand)
+//
+//            worklist.remove(firstNode)
+//
+//        firstNode = firstNode.previousNode
+//
+//    return firstNode
+//
+// Instead of using a set for the worklist, the implementation uses the
+// `LIR::Mark` bit of the `GenTree::LIRFlags` field to track whether or
+// not a node is in the worklist.
+//
+// Note also that this algorithm depends LIR nodes being SDSU, SDSU defs
+// and uses occurring in the same block, and correct dataflow (i.e. defs
+// occurring before uses).
+//
+// Arguments:
+//    root        - The root of the dataflow tree.
+//    isClosed    - An output parameter that is set to true if the returned
+//                  range contains only nodes in the dataflow tree and false
+//                  otherwise.
+//
+// Returns:
+//    The computed subrange.
+//
+LIR::ReadOnlyRange LIR::Range::GetMarkedRange(unsigned  markCount,
+                                              GenTree*  start,
+                                              bool*     isClosed,
+                                              unsigned* sideEffects) const
+{
+    assert(markCount != 0);
+    assert(start != nullptr);
+    assert(isClosed != nullptr);
+    assert(sideEffects != nullptr);
+
+    bool     sawUnmarkedNode    = false;
+    unsigned sideEffectsInRange = 0;
+
+    GenTree* firstNode = start;
+    GenTree* lastNode  = nullptr;
+    for (;;)
+    {
+        if ((firstNode->gtLIRFlags & LIR::Flags::Mark) != 0)
+        {
+            if (lastNode == nullptr)
+            {
+                lastNode = firstNode;
+            }
+
+            // Mark the node's operands
+            for (GenTree* operand : firstNode->Operands())
+            {
+                // Do not mark nodes that do not appear in the execution order
+                if (operand->OperGet() == GT_ARGPLACE)
+                {
+                    continue;
+                }
+
+                operand->gtLIRFlags |= LIR::Flags::Mark;
+                markCount++;
+            }
+
+            // Unmark the the node and update `firstNode`
+            firstNode->gtLIRFlags &= ~LIR::Flags::Mark;
+            markCount--;
+        }
+        else if (lastNode != nullptr)
+        {
+            sawUnmarkedNode = true;
+        }
+
+        if (lastNode != nullptr)
+        {
+            sideEffectsInRange |= (firstNode->gtFlags & GTF_ALL_EFFECT);
+        }
+
+        if (markCount == 0)
+        {
+            break;
+        }
+
+        firstNode = firstNode->gtPrev;
+
+        // This assert will fail if the dataflow that feeds the root node
+        // is incorrect in that it crosses a block boundary or if it involves
+        // a use that occurs before its corresponding def.
+        assert(firstNode != nullptr);
+    }
+
+    assert(lastNode != nullptr);
+
+    *isClosed    = !sawUnmarkedNode;
+    *sideEffects = sideEffectsInRange;
+    return ReadOnlyRange(firstNode, lastNode);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::GetTreeRange: Computes the subrange that includes all nodes
+//                           in the dataflow tree rooted at a particular
+//                           node.
+//
+// Arguments:
+//    root        - The root of the dataflow tree.
+//    isClosed    - An output parameter that is set to true if the returned
+//                  range contains only nodes in the dataflow tree and false
+//                  otherwise.
+//
+// Returns:
+//    The computed subrange.
+LIR::ReadOnlyRange LIR::Range::GetTreeRange(GenTree* root, bool* isClosed) const
+{
+    unsigned unused;
+    return GetTreeRange(root, isClosed, &unused);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::GetTreeRange: Computes the subrange that includes all nodes
+//                           in the dataflow tree rooted at a particular
+//                           node.
+//
+// Arguments:
+//    root        - The root of the dataflow tree.
+//    isClosed    - An output parameter that is set to true if the returned
+//                  range contains only nodes in the dataflow tree and false
+//                  otherwise.
+//    sideEffects - An output parameter that summarizes the side effects
+//                  contained in the returned range.
+//
+// Returns:
+//    The computed subrange.
+LIR::ReadOnlyRange LIR::Range::GetTreeRange(GenTree* root, bool* isClosed, unsigned* sideEffects) const
+{
+    assert(root != nullptr);
+
+    // Mark the root of the tree
+    const unsigned markCount = 1;
+    root->gtLIRFlags |= LIR::Flags::Mark;
+
+    return GetMarkedRange(markCount, root, isClosed, sideEffects);
+}
+
+//------------------------------------------------------------------------
+// LIR::Range::GetTreeRange: Computes the subrange that includes all nodes
+//                           in the dataflow trees rooted by the operands
+//                           to a particular node.
+//
+// Arguments:
+//    root        - The root of the dataflow tree.
+//    isClosed    - An output parameter that is set to true if the returned
+//                  range contains only nodes in the dataflow tree and false
+//                  otherwise.
+//    sideEffects - An output parameter that summarizes the side effects
+//                  contained in the returned range.
+//
+// Returns:
+//    The computed subrange.
+//
+LIR::ReadOnlyRange LIR::Range::GetRangeOfOperandTrees(GenTree* root, bool* isClosed, unsigned* sideEffects) const
+{
+    assert(root != nullptr);
+    assert(isClosed != nullptr);
+    assert(sideEffects != nullptr);
+
+    // Mark the root node's operands
+    unsigned markCount = 0;
+    for (GenTree* operand : root->Operands())
+    {
+        operand->gtLIRFlags |= LIR::Flags::Mark;
+        markCount++;
+    }
+
+    if (markCount == 0)
+    {
+        *isClosed    = true;
+        *sideEffects = 0;
+        return ReadOnlyRange();
+    }
+
+    return GetMarkedRange(markCount, root, isClosed, sideEffects);
+}
+
+#ifdef DEBUG
+
+//------------------------------------------------------------------------
+// LIR::Range::CheckLIR: Performs a set of correctness checks on the LIR
+//                       contained in this range.
+//
+// This method checks the following properties:
+// - Defs are singly-used
+// - Uses follow defs
+// - Uses are correctly linked into the block
+// - Nodes that do not produce values are not used
+// - Only LIR nodes are present in the block
+// - If any phi nodes are present in the range, they precede all other
+//   nodes
+//
+// The first four properties are verified by walking the range's LIR in execution order,
+// inserting defs into a set as they are visited, and removing them as they are used. The
+// different cases are distinguished only when an error is detected.
+//
+// Arguments:
+//    compiler - A compiler context.
+//
+// Return Value:
+//    'true' if the LIR for the specified range is legal.
+//
+bool LIR::Range::CheckLIR(Compiler* compiler, bool checkUnusedValues) const
+{
+    if (IsEmpty())
+    {
+        // Nothing more to check.
+        return true;
+    }
+
+    // Check the gtNext/gtPrev links: (1) ensure there are no circularities, (2) ensure the gtPrev list is
+    // precisely the inverse of the gtNext list.
+    //
+    // To detect circularity, use the "tortoise and hare" 2-pointer algorithm.
+
+    GenTree* slowNode = FirstNode();
+    assert(slowNode != nullptr); // because it's a non-empty range
+    GenTree* fastNode1    = nullptr;
+    GenTree* fastNode2    = slowNode;
+    GenTree* prevSlowNode = nullptr;
+    while (((fastNode1 = fastNode2->gtNext) != nullptr) && ((fastNode2 = fastNode1->gtNext) != nullptr))
+    {
+        if ((slowNode == fastNode1) || (slowNode == fastNode2))
+        {
+            assert(!"gtNext nodes have a circularity!");
+        }
+        assert(slowNode->gtPrev == prevSlowNode);
+        prevSlowNode = slowNode;
+        slowNode     = slowNode->gtNext;
+        assert(slowNode != nullptr); // the fastNodes would have gone null first.
+    }
+    // If we get here, the list had no circularities, so either fastNode1 or fastNode2 must be nullptr.
+    assert((fastNode1 == nullptr) || (fastNode2 == nullptr));
+
+    // Need to check the rest of the gtPrev links.
+    while (slowNode != nullptr)
+    {
+        assert(slowNode->gtPrev == prevSlowNode);
+        prevSlowNode = slowNode;
+        slowNode     = slowNode->gtNext;
+    }
+
+    SmallHashTable<GenTree*, bool, 32> unusedDefs(compiler);
+
+    bool     pastPhis = false;
+    GenTree* prev     = nullptr;
+    for (Iterator node = begin(), end = this->end(); node != end; prev = *node, ++node)
+    {
+        // Verify that the node is allowed in LIR.
+        assert(node->IsLIR());
+
+        // TODO: validate catch arg stores
+
+        // Check that all phi nodes (if any) occur at the start of the range.
+        if ((node->OperGet() == GT_PHI_ARG) || (node->OperGet() == GT_PHI) || node->IsPhiDefn())
+        {
+            assert(!pastPhis);
+        }
+        else
+        {
+            pastPhis = true;
+        }
+
+        for (GenTree** useEdge : node->UseEdges())
+        {
+            GenTree* def = *useEdge;
+
+            assert((!checkUnusedValues || ((def->gtLIRFlags & LIR::Flags::IsUnusedValue) == 0)) &&
+                   "operands should never be marked as unused values");
+
+            if (def->OperGet() == GT_ARGPLACE)
+            {
+                // ARGPLACE nodes are not represented in the LIR sequence. Ignore them.
+                continue;
+            }
+            else if (!def->IsValue())
+            {
+                // Calls may contain "uses" of nodes that do not produce a value. This is an artifact of
+                // the HIR and should probably be fixed, but doing so is an unknown amount of work.
+                assert(node->OperGet() == GT_CALL);
+                continue;
+            }
+
+            bool v;
+            bool foundDef = unusedDefs.TryRemove(def, &v);
+            if (!foundDef)
+            {
+                // First, scan backwards and look for a preceding use.
+                for (GenTree* prev = *node; prev != nullptr; prev = prev->gtPrev)
+                {
+                    // TODO: dump the users and the def
+                    GenTree** earlierUseEdge;
+                    bool      foundEarlierUse = prev->TryGetUse(def, &earlierUseEdge) && earlierUseEdge != useEdge;
+                    assert(!foundEarlierUse && "found multiply-used LIR node");
+                }
+
+                // The def did not precede the use. Check to see if it exists in the block at all.
+                for (GenTree* next = node->gtNext; next != nullptr; next = next->gtNext)
+                {
+                    // TODO: dump the user and the def
+                    assert(next != def && "found def after use");
+                }
+
+                // The def might not be a node that produces a value.
+                assert(def->IsValue() && "found use of a node that does not produce a value");
+
+                // By this point, the only possibility is that the def is not threaded into the LIR sequence.
+                assert(false && "found use of a node that is not in the LIR sequence");
+            }
+        }
+
+        if (node->IsValue())
+        {
+            bool added = unusedDefs.AddOrUpdate(*node, true);
+            assert(added);
+        }
+    }
+
+    assert(prev == m_lastNode);
+
+    // At this point the unusedDefs map should contain only unused values.
+    if (checkUnusedValues)
+    {
+        for (auto kvp : unusedDefs)
+        {
+            GenTree* node = kvp.Key();
+            assert(((node->gtLIRFlags & LIR::Flags::IsUnusedValue) != 0) && "found an unmarked unused value");
+        }
+    }
+
+    return true;
+}
+
+#endif // DEBUG
+
+//------------------------------------------------------------------------
+// LIR::AsRange: Returns an LIR view of the given basic block.
+//
+LIR::Range& LIR::AsRange(BasicBlock* block)
+{
+    return *static_cast<Range*>(block);
+}
+
+//------------------------------------------------------------------------
+// LIR::EmptyRange: Constructs and returns an empty range.
+//
+// static
+LIR::Range LIR::EmptyRange()
+{
+    return Range(nullptr, nullptr);
+}
+
+//------------------------------------------------------------------------
+// LIR::SeqTree:
+//    Given a newly created, unsequenced HIR tree, set the evaluation
+//    order (call gtSetEvalOrder) and sequence the tree (set gtNext/gtPrev
+//    pointers by calling fgSetTreeSeq), and return a Range representing
+//    the list of nodes. It is expected this will later be spliced into
+//    an LIR range.
+//
+// Arguments:
+//    compiler - The Compiler context.
+//    tree - The tree to sequence.
+//
+// Return Value: The newly constructed range.
+//
+// static
+LIR::Range LIR::SeqTree(Compiler* compiler, GenTree* tree)
+{
+    // TODO-LIR: it would be great to assert that the tree has not already been
+    // threaded into an order, but I'm not sure that will be practical at this
+    // point.
+
+    compiler->gtSetEvalOrder(tree);
+    return Range(compiler->fgSetTreeSeq(tree, nullptr, true), tree);
+}
+
+//------------------------------------------------------------------------
+// LIR::InsertBeforeTerminator:
+//    Insert an LIR range before the terminating instruction in the given
+//    basic block. If the basic block has no terminating instruction (i.e.
+//    it has a jump kind that is not `BBJ_RETURN`, `BBJ_COND`, or
+//    `BBJ_SWITCH`), the range is inserted at the end of the block.
+//
+// Arguments:
+//    block - The block in which to insert the range.
+//    range - The range to insert.
+//
+void LIR::InsertBeforeTerminator(BasicBlock* block, LIR::Range&& range)
+{
+    LIR::Range& blockRange = LIR::AsRange(block);
+
+    GenTree* insertionPoint = nullptr;
+    if ((block->bbJumpKind == BBJ_COND) || (block->bbJumpKind == BBJ_SWITCH) || (block->bbJumpKind == BBJ_RETURN))
+    {
+        insertionPoint = blockRange.LastNode();
+        assert(insertionPoint != nullptr);
+
+#if DEBUG
+        switch (block->bbJumpKind)
+        {
+        case BBJ_COND:
+            assert(insertionPoint->OperGet() == GT_JTRUE);
+            break;
+
+        case BBJ_SWITCH:
+            assert((insertionPoint->OperGet() == GT_SWITCH) || (insertionPoint->OperGet() == GT_SWITCH_TABLE));
+            break;
+
+        case BBJ_RETURN:
+            assert((insertionPoint->OperGet() == GT_RETURN) ||
+                (insertionPoint->OperGet() == GT_JMP) ||
+                (insertionPoint->OperGet() == GT_CALL));
+            break;
+
+        default:
+            unreached();
+        }
+#endif
+    }
+
+    blockRange.InsertBefore(insertionPoint, std::move(range));
+}
diff --git a/src/jit/lir.h b/src/jit/lir.h
new file mode 100644
index 0000000000..e633303244
--- /dev/null
+++ b/src/jit/lir.h
@@ -0,0 +1,310 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef _LIR_H_
+#define _LIR_H_
+
+class Compiler;
+struct GenTree;
+struct BasicBlock;
+
+class LIR final
+{
+public:
+    class Range;
+
+    //------------------------------------------------------------------------
+    // LIR::Flags: Defines the set of flags that may appear in the
+    //             GenTree::gtLIRFlags field.
+    class Flags final
+    {
+        // Disallow the creation of values of this type.
+        Flags() = delete;
+
+    public:
+        enum : unsigned char
+        {
+            None = 0x00,
+
+            Mark = 0x01, // An aribtrary "mark" bit that can be used in place of
+                         // a more expensive data structure when processing a set
+                         // of LIR nodes. See for example `LIR::GetTreeRange`.
+
+            IsUnusedValue = 0x02, // Set on a node if it produces a value that is not
+                                  // subsequently used. Should never be set on nodes
+                                  // that return `false` for `GenTree::IsValue`. Note
+                                  // that this bit should not be assumed to be valid
+                                  // at all points during compilation: it is currently
+                                  // only computed during target-dependent lowering.
+        };
+    };
+
+    //------------------------------------------------------------------------
+    // LIR::Use: Represents a use <-> def edge between two nodes in a range
+    //           of LIR. Provides utilities to point the use to a different
+    //           def. Note that because this type deals in edges between
+    //           nodes, it represents the single use of the def.
+    //
+    class Use final
+    {
+    private:
+        Range*    m_range;
+        GenTree** m_edge;
+        GenTree*  m_user;
+
+    public:
+        Use();
+        Use(const Use& other);
+        Use(Range& range, GenTree** edge, GenTree* user);
+
+        Use& operator=(const Use& other);
+        Use& operator=(Use&& other);
+
+        static Use GetDummyUse(Range& range, GenTree* node);
+
+        GenTree* Def() const;
+        GenTree* User() const;
+
+        bool IsInitialized() const;
+        void AssertIsValid() const;
+        bool IsDummyUse() const;
+
+        void ReplaceWith(Compiler* compiler, GenTree* replacement);
+        unsigned ReplaceWithLclVar(Compiler* compiler, unsigned blockWeight, unsigned lclNum = BAD_VAR_NUM);
+    };
+
+    //------------------------------------------------------------------------
+    // LIR::ReadOnlyRange:
+    //
+    // Represents a contiguous range of LIR nodes that may be a subrange of
+    // a containing range. Provides a small set of utilities for iteration.
+    // Instances of this type are primarily created by and provided to
+    // analysis and utility methods on LIR::Range.
+    //
+    // Although some pains have been taken to help guard against the existence
+    // of invalid subranges, it remains possible to create them. For example,
+    // consider the following:
+    //
+    //     // View the block as a range
+    //     LIR::Range& blockRange = LIR::AsRange(block);
+    //
+    //     // Create a range from the first non-phi node in the block to the
+    //     // last node in the block
+    //     LIR::ReadOnlyRange nonPhis = blockRange.NonPhiNodes();
+    //
+    //     // Remove the last node from the block
+    //     blockRange.Remove(blockRange.LastNode());
+    //
+    // After the removal of the last node in the block, the last node of
+    // nonPhis is no longer linked to any of the other nodes in nonPhis. Due
+    // to issues such as the above, some care must be taken in order to
+    // ensure that ranges are not used once they have been invalidated.
+    //
+    class ReadOnlyRange
+    {
+        friend class LIR;
+        friend class Range;
+        friend struct BasicBlock;
+
+    private:
+        GenTree* m_firstNode;
+        GenTree* m_lastNode;
+
+        ReadOnlyRange(GenTree* firstNode, GenTree* lastNode);
+
+        ReadOnlyRange(const ReadOnlyRange& other) = delete;
+        ReadOnlyRange& operator=(const ReadOnlyRange& other) = delete;
+
+    public:
+        class Iterator
+        {
+            friend class ReadOnlyRange;
+
+            GenTree* m_node;
+
+            Iterator(GenTree* begin) : m_node(begin)
+            {
+            }
+
+        public:
+            Iterator() : m_node(nullptr)
+            {
+            }
+
+            inline GenTree* operator*()
+            {
+                return m_node;
+            }
+
+            inline GenTree* operator->()
+            {
+                return m_node;
+            }
+
+            inline bool operator==(const Iterator& other) const
+            {
+                return m_node == other.m_node;
+            }
+
+            inline bool operator!=(const Iterator& other) const
+            {
+                return m_node != other.m_node;
+            }
+
+            inline Iterator& operator++()
+            {
+                m_node = (m_node == nullptr) ? nullptr : m_node->gtNext;
+                return *this;
+            }
+        };
+
+        class ReverseIterator
+        {
+            friend class ReadOnlyRange;
+
+            GenTree* m_node;
+
+            ReverseIterator(GenTree* begin) : m_node(begin)
+            {
+            }
+
+        public:
+            ReverseIterator() : m_node(nullptr)
+            {
+            }
+
+            inline GenTree* operator*()
+            {
+                return m_node;
+            }
+
+            inline GenTree* operator->()
+            {
+                return m_node;
+            }
+
+            inline bool operator==(const ReverseIterator& other) const
+            {
+                return m_node == other.m_node;
+            }
+
+            inline bool operator!=(const ReverseIterator& other) const
+            {
+                return m_node != other.m_node;
+            }
+
+            inline ReverseIterator& operator++()
+            {
+                m_node = (m_node == nullptr) ? nullptr : m_node->gtPrev;
+                return *this;
+            }
+        };
+
+        ReadOnlyRange();
+        ReadOnlyRange(ReadOnlyRange&& other);
+
+        GenTree* FirstNode() const;
+        GenTree* LastNode() const;
+
+        bool IsEmpty() const;
+
+        Iterator begin() const;
+        Iterator end() const;
+
+        ReverseIterator rbegin() const;
+        ReverseIterator rend() const;
+
+#ifdef DEBUG
+        bool Contains(GenTree* node) const;
+#endif
+    };
+
+    //------------------------------------------------------------------------
+    // LIR::Range:
+    //
+    // Represents a contiguous range of LIR nodes. Provides a variety of
+    // variety of utilites that modify the LIR contained in the range. Unlike
+    // `ReadOnlyRange`, values of this type may be edited.
+    //
+    // Because it is not a final class, it is possible to slice values of this
+    // type; this is especially dangerous when the Range value is actually of
+    // type `BasicBlock`. As a result, this type is not copyable and it is
+    // not possible to view a `BasicBlock` as anything other than a `Range&`.
+    //
+    class Range : public ReadOnlyRange
+    {
+        friend class LIR;
+        friend struct BasicBlock;
+
+    private:
+        Range(GenTree* firstNode, GenTree* lastNode);
+
+        Range(const Range& other) = delete;
+        Range& operator=(const Range& other) = delete;
+
+        ReadOnlyRange GetMarkedRange(unsigned markCount, GenTree* start, bool* isClosed, unsigned* sideEffects) const;
+
+        void FinishInsertBefore(GenTree* insertionPoint, GenTree* first, GenTree* last);
+        void FinishInsertAfter(GenTree* insertionPoint, GenTree* first, GenTree* last);
+
+    public:
+        Range();
+        Range(Range&& other);
+
+        GenTree* LastPhiNode() const;
+        GenTree* FirstNonPhiNode() const;
+        GenTree* FirstNonPhiOrCatchArgNode() const;
+
+        ReadOnlyRange PhiNodes() const;
+        ReadOnlyRange NonPhiNodes() const;
+
+        void InsertBefore(GenTree* insertionPoint, GenTree* node);
+        void InsertAfter(GenTree* insertionPoint, GenTree* node);
+
+        void InsertBefore(GenTree* insertionPoint, GenTree* node1, GenTree* node2);
+        void InsertBefore(GenTree* insertionPoint, GenTree* node1, GenTree* node2, GenTree* node3);
+        void InsertBefore(GenTree* insertionPoint, GenTree* node1, GenTree* node2, GenTree* node3, GenTree* node4);
+
+        void InsertAfter(GenTree* insertionPoint, GenTree* node1, GenTree* node2);
+        void InsertAfter(GenTree* insertionPoint, GenTree* node1, GenTree* node2, GenTree* node3);
+        void InsertAfter(GenTree* insertionPoint, GenTree* node1, GenTree* node2, GenTree* node3, GenTree* node4);
+
+        void InsertBefore(GenTree* insertionPoint, Range&& range);
+        void InsertAfter(GenTree* insertionPoint, Range&& range);
+
+        void InsertAtBeginning(GenTree* node);
+        void InsertAtEnd(GenTree* node);
+
+        void InsertAtBeginning(Range&& range);
+        void InsertAtEnd(Range&& range);
+
+        void Remove(GenTree* node);
+        Range Remove(GenTree* firstNode, GenTree* lastNode);
+        Range Remove(ReadOnlyRange&& range);
+
+        void Delete(Compiler* compiler, BasicBlock* block, GenTree* node);
+        void Delete(Compiler* compiler, BasicBlock* block, GenTree* firstNode, GenTree* lastNode);
+        void Delete(Compiler* compiler, BasicBlock* block, ReadOnlyRange&& range);
+
+        bool TryGetUse(GenTree* node, Use* use);
+
+        ReadOnlyRange GetTreeRange(GenTree* root, bool* isClosed) const;
+        ReadOnlyRange GetTreeRange(GenTree* root, bool* isClosed, unsigned* sideEffects) const;
+        ReadOnlyRange GetRangeOfOperandTrees(GenTree* root, bool* isClosed, unsigned* sideEffects) const;
+
+#ifdef DEBUG
+        bool CheckLIR(Compiler* compiler, bool checkUnusedValues = false) const;
+#endif
+    };
+
+public:
+    static Range& AsRange(BasicBlock* block);
+
+    static Range EmptyRange();
+    static Range SeqTree(Compiler* compiler, GenTree* tree);
+
+    static void InsertBeforeTerminator(BasicBlock* block, LIR::Range&& range);
+};
+
+#endif // _LIR_H_
diff --git a/src/jit/liveness.cpp b/src/jit/liveness.cpp
new file mode 100644
index 0000000000..19d326303e
--- /dev/null
+++ b/src/jit/liveness.cpp
@@ -0,0 +1,3133 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// =================================================================================
+//  Code that works with liveness and related concepts (interference, debug scope)
+// =================================================================================
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#if !defined(_TARGET_64BIT_)
+#include "decomposelongs.h"
+#endif
+
+/*****************************************************************************
+ *
+ *  Helper for Compiler::fgPerBlockLocalVarLiveness().
+ *  The goal is to compute the USE and DEF sets for a basic block.
+ *  However with the new improvement to the data flow analysis (DFA),
+ *  we do not mark x as used in x = f(x) when there are no side effects in f(x).
+ *  'asgdLclVar' is set when 'tree' is part of an expression with no side-effects
+ *  which is assigned to asgdLclVar, ie. asgdLclVar = (... tree ...)
+ */
+void Compiler::fgMarkUseDef(GenTreeLclVarCommon* tree, GenTree* asgdLclVar)
+{
+    bool       rhsUSEDEF = false;
+    unsigned   lclNum;
+    unsigned   lhsLclNum;
+    LclVarDsc* varDsc;
+
+    noway_assert(tree->gtOper == GT_LCL_VAR || tree->gtOper == GT_LCL_VAR_ADDR || tree->gtOper == GT_LCL_FLD ||
+                 tree->gtOper == GT_LCL_FLD_ADDR || tree->gtOper == GT_STORE_LCL_VAR ||
+                 tree->gtOper == GT_STORE_LCL_FLD);
+
+    if (tree->gtOper == GT_LCL_VAR || tree->gtOper == GT_LCL_VAR_ADDR || tree->gtOper == GT_STORE_LCL_VAR)
+    {
+        lclNum = tree->gtLclNum;
+    }
+    else
+    {
+        noway_assert(tree->OperIsLocalField());
+        lclNum = tree->gtLclFld.gtLclNum;
+    }
+
+    noway_assert(lclNum < lvaCount);
+    varDsc = lvaTable + lclNum;
+
+    // We should never encounter a reference to a lclVar that has a zero refCnt.
+    if (varDsc->lvRefCnt == 0 && (!varTypeIsPromotable(varDsc) || !varDsc->lvPromoted))
+    {
+        JITDUMP("Found reference to V%02u with zero refCnt.\n", lclNum);
+        assert(!"We should never encounter a reference to a lclVar that has a zero refCnt.");
+        varDsc->lvRefCnt = 1;
+    }
+
+    // NOTE: the analysis done below is neither necessary nor correct for LIR: it depends on
+    // the nodes that precede `asgdLclVar` in execution order to factor into the dataflow for the
+    // value being assigned to the local var, which is not necessarily the case without tree
+    // order. Furthermore, LIR is always traversed in an order that reflects the dataflow for the
+    // block.
+    if (asgdLclVar != nullptr)
+    {
+        assert(!compCurBB->IsLIR());
+
+        /* we have an assignment to a local var : asgdLclVar = ... tree ...
+         * check for x = f(x) case */
+
+        noway_assert(asgdLclVar->gtOper == GT_LCL_VAR || asgdLclVar->gtOper == GT_STORE_LCL_VAR);
+        noway_assert(asgdLclVar->gtFlags & GTF_VAR_DEF);
+
+        lhsLclNum = asgdLclVar->gtLclVarCommon.gtLclNum;
+
+        if ((lhsLclNum == lclNum) && ((tree->gtFlags & GTF_VAR_DEF) == 0) && (tree != asgdLclVar))
+        {
+            /* bingo - we have an x = f(x) case */
+            noway_assert(lvaTable[lhsLclNum].lvType != TYP_STRUCT);
+            asgdLclVar->gtFlags |= GTF_VAR_USEDEF;
+            rhsUSEDEF = true;
+        }
+    }
+
+    /* Is this a tracked variable? */
+
+    if (varDsc->lvTracked)
+    {
+        noway_assert(varDsc->lvVarIndex < lvaTrackedCount);
+
+        if ((tree->gtFlags & GTF_VAR_DEF) != 0 && (tree->gtFlags & (GTF_VAR_USEASG | GTF_VAR_USEDEF)) == 0)
+        {
+            // if  (!(fgCurUseSet & bitMask)) printf("V%02u,T%02u def at %08p\n", lclNum, varDsc->lvVarIndex, tree);
+            VarSetOps::AddElemD(this, fgCurDefSet, varDsc->lvVarIndex);
+        }
+        else
+        {
+            // if  (!(fgCurDefSet & bitMask))
+            // {
+            //      printf("V%02u,T%02u use at ", lclNum, varDsc->lvVarIndex);
+            //      printTreeID(tree);
+            //      printf("\n");
+            // }
+
+            /* We have the following scenarios:
+             *   1. "x += something" - in this case x is flagged GTF_VAR_USEASG
+             *   2. "x = ... x ..." - the LHS x is flagged GTF_VAR_USEDEF,
+             *                        the RHS x is has rhsUSEDEF = true
+             *                        (both set by the code above)
+             *
+             * We should not mark an USE of x in the above cases provided the value "x" is not used
+             * further up in the tree. For example "while (i++)" is required to mark i as used.
+             */
+
+            /* make sure we don't include USEDEF variables in the USE set
+             * The first test is for LSH, the second (!rhsUSEDEF) is for any var in the RHS */
+
+            if ((tree->gtFlags & (GTF_VAR_USEASG | GTF_VAR_USEDEF)) == 0)
+            {
+                /* Not a special flag - check to see if used to assign to itself */
+
+                if (rhsUSEDEF)
+                {
+                    /* assign to itself - do not include it in the USE set */
+                    if (!opts.MinOpts() && !opts.compDbgCode)
+                    {
+                        return;
+                    }
+                }
+            }
+
+            /* Fall through for the "good" cases above - add the variable to the USE set */
+
+            if (!VarSetOps::IsMember(this, fgCurDefSet, varDsc->lvVarIndex))
+            {
+                VarSetOps::AddElemD(this, fgCurUseSet, varDsc->lvVarIndex);
+            }
+
+            // For defs, also add to the (all) def set.
+            if ((tree->gtFlags & GTF_VAR_DEF) != 0)
+            {
+                VarSetOps::AddElemD(this, fgCurDefSet, varDsc->lvVarIndex);
+            }
+        }
+    }
+    else if (varTypeIsStruct(varDsc))
+    {
+        noway_assert(!varDsc->lvTracked);
+
+        lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
+
+        if (promotionType != PROMOTION_TYPE_NONE)
+        {
+            VARSET_TP VARSET_INIT_NOCOPY(bitMask, VarSetOps::MakeEmpty(this));
+
+            for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
+            {
+                noway_assert(lvaTable[i].lvIsStructField);
+                if (lvaTable[i].lvTracked)
+                {
+                    noway_assert(lvaTable[i].lvVarIndex < lvaTrackedCount);
+                    VarSetOps::AddElemD(this, bitMask, lvaTable[i].lvVarIndex);
+                }
+            }
+
+            // For pure defs (i.e. not an "update" def which is also a use), add to the (all) def set.
+            if ((tree->gtFlags & GTF_VAR_DEF) != 0 && (tree->gtFlags & (GTF_VAR_USEASG | GTF_VAR_USEDEF)) == 0)
+            {
+                VarSetOps::UnionD(this, fgCurDefSet, bitMask);
+            }
+            else if (!VarSetOps::IsSubset(this, bitMask, fgCurDefSet))
+            {
+                // Mark as used any struct fields that are not yet defined.
+                VarSetOps::UnionD(this, fgCurUseSet, bitMask);
+            }
+        }
+    }
+}
+
+/*****************************************************************************/
+void Compiler::fgLocalVarLiveness()
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In fgLocalVarLiveness()\n");
+
+#ifndef LEGACY_BACKEND
+        if (compRationalIRForm)
+        {
+            lvaTableDump();
+        }
+#endif // !LEGACY_BACKEND
+    }
+#endif // DEBUG
+
+    // Init liveness data structures.
+    fgLocalVarLivenessInit();
+    assert(lvaSortAgain == false); // Set to false by lvaSortOnly()
+
+    EndPhase(PHASE_LCLVARLIVENESS_INIT);
+
+    // Make sure we haven't noted any partial last uses of promoted structs.
+    GetPromotedStructDeathVars()->RemoveAll();
+
+    // Initialize the per-block var sets.
+    fgInitBlockVarSets();
+
+    fgLocalVarLivenessChanged = false;
+    do
+    {
+        /* Figure out use/def info for all basic blocks */
+        fgPerBlockLocalVarLiveness();
+        EndPhase(PHASE_LCLVARLIVENESS_PERBLOCK);
+
+        /* Live variable analysis. */
+
+        fgStmtRemoved = false;
+        fgInterBlockLocalVarLiveness();
+    } while (fgStmtRemoved && fgLocalVarLivenessChanged);
+
+    // If we removed any dead code we will have set 'lvaSortAgain' via decRefCnts
+    if (lvaSortAgain)
+    {
+        JITDUMP("In fgLocalVarLiveness, setting lvaSortAgain back to false (set during dead-code removal)\n");
+        lvaSortAgain = false; // We don't re-Sort because we just performed LclVar liveness.
+    }
+
+    EndPhase(PHASE_LCLVARLIVENESS_INTERBLOCK);
+}
+
+/*****************************************************************************/
+void Compiler::fgLocalVarLivenessInit()
+{
+    // If necessary, re-sort the variable table by ref-count...before creating any varsets using this sorting.
+    if (lvaSortAgain)
+    {
+        JITDUMP("In fgLocalVarLivenessInit, sorting locals\n");
+        lvaSortByRefCount();
+        assert(lvaSortAgain == false); // Set to false by lvaSortOnly()
+    }
+
+#ifdef LEGACY_BACKEND // RyuJIT backend does not use interference info
+
+    for (unsigned i = 0; i < lclMAX_TRACKED; i++)
+    {
+        VarSetOps::AssignNoCopy(this, lvaVarIntf[i], VarSetOps::MakeEmpty(this));
+    }
+
+    /* If we're not optimizing at all, things are simple */
+    if (opts.MinOpts())
+    {
+        VARSET_TP VARSET_INIT_NOCOPY(allOnes, VarSetOps::MakeFull(this));
+        for (unsigned i = 0; i < lvaTrackedCount; i++)
+        {
+            VarSetOps::Assign(this, lvaVarIntf[i], allOnes);
+        }
+        return;
+    }
+#endif // LEGACY_BACKEND
+
+    // We mark a lcl as must-init in a first pass of local variable
+    // liveness (Liveness1), then assertion prop eliminates the
+    // uninit-use of a variable Vk, asserting it will be init'ed to
+    // null.  Then, in a second local-var liveness (Liveness2), the
+    // variable Vk is no longer live on entry to the method, since its
+    // uses have been replaced via constant propagation.
+    //
+    // This leads to a bug: since Vk is no longer live on entry, the
+    // register allocator sees Vk and an argument Vj as having
+    // disjoint lifetimes, and allocates them to the same register.
+    // But Vk is still marked "must-init", and this initialization (of
+    // the register) trashes the value in Vj.
+    //
+    // Therefore, initialize must-init to false for all variables in
+    // each liveness phase.
+    for (unsigned lclNum = 0; lclNum < lvaCount; ++lclNum)
+    {
+        lvaTable[lclNum].lvMustInit = false;
+    }
+}
+
+// Note that for the LEGACY_BACKEND this method is replaced with
+// fgLegacyPerStatementLocalVarLiveness and it lives in codegenlegacy.cpp
+//
+#ifndef LEGACY_BACKEND
+//------------------------------------------------------------------------
+// fgPerNodeLocalVarLiveness:
+//   Set fgCurHeapUse and fgCurHeapDef when the global heap is read or updated
+//   Call fgMarkUseDef for any Local variables encountered
+//
+// Arguments:
+//    tree       - The current node.
+//    asgdLclVar - Either nullptr or the assignement's left-hand-side GT_LCL_VAR.
+//                 Used as an argument to fgMarkUseDef(); only valid for HIR blocks.
+//
+void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree, GenTree* asgdLclVar)
+{
+    assert(tree != nullptr);
+    assert(asgdLclVar == nullptr || !compCurBB->IsLIR());
+
+    switch (tree->gtOper)
+    {
+        case GT_QMARK:
+        case GT_COLON:
+            // We never should encounter a GT_QMARK or GT_COLON node
+            noway_assert(!"unexpected GT_QMARK/GT_COLON");
+            break;
+
+        case GT_LCL_VAR:
+        case GT_LCL_FLD:
+        case GT_LCL_VAR_ADDR:
+        case GT_LCL_FLD_ADDR:
+        case GT_STORE_LCL_VAR:
+        case GT_STORE_LCL_FLD:
+            fgMarkUseDef(tree->AsLclVarCommon(), asgdLclVar);
+            break;
+
+        case GT_CLS_VAR:
+            // For Volatile indirection, first mutate the global heap
+            // see comments in ValueNum.cpp (under case GT_CLS_VAR)
+            // This models Volatile reads as def-then-use of the heap.
+            // and allows for a CSE of a subsequent non-volatile read
+            if ((tree->gtFlags & GTF_FLD_VOLATILE) != 0)
+            {
+                // For any Volatile indirection, we must handle it as a
+                // definition of the global heap
+                fgCurHeapDef = true;
+            }
+            // If the GT_CLS_VAR is the lhs of an assignment, we'll handle it as a heap def, when we get to assignment.
+            // Otherwise, we treat it as a use here.
+            if (!fgCurHeapDef && (tree->gtFlags & GTF_CLS_VAR_ASG_LHS) == 0)
+            {
+                fgCurHeapUse = true;
+            }
+            break;
+
+        case GT_IND:
+            // For Volatile indirection, first mutate the global heap
+            // see comments in ValueNum.cpp (under case GT_CLS_VAR)
+            // This models Volatile reads as def-then-use of the heap.
+            // and allows for a CSE of a subsequent non-volatile read
+            if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
+            {
+                // For any Volatile indirection, we must handle it as a
+                // definition of the global heap
+                fgCurHeapDef = true;
+            }
+
+            // If the GT_IND is the lhs of an assignment, we'll handle it
+            // as a heap def, when we get to assignment.
+            // Otherwise, we treat it as a use here.
+            if ((tree->gtFlags & GTF_IND_ASG_LHS) == 0)
+            {
+                GenTreeLclVarCommon* dummyLclVarTree = nullptr;
+                bool                 dummyIsEntire   = false;
+                GenTreePtr           addrArg         = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
+                if (!addrArg->DefinesLocalAddr(this, /*width doesn't matter*/ 0, &dummyLclVarTree, &dummyIsEntire))
+                {
+                    if (!fgCurHeapDef)
+                    {
+                        fgCurHeapUse = true;
+                    }
+                }
+                else
+                {
+                    // Defines a local addr
+                    assert(dummyLclVarTree != nullptr);
+                    fgMarkUseDef(dummyLclVarTree->AsLclVarCommon(), asgdLclVar);
+                }
+            }
+            break;
+
+        // These should have been morphed away to become GT_INDs:
+        case GT_FIELD:
+        case GT_INDEX:
+            unreached();
+            break;
+
+        // We'll assume these are use-then-defs of the heap.
+        case GT_LOCKADD:
+        case GT_XADD:
+        case GT_XCHG:
+        case GT_CMPXCHG:
+            if (!fgCurHeapDef)
+            {
+                fgCurHeapUse = true;
+            }
+            fgCurHeapDef   = true;
+            fgCurHeapHavoc = true;
+            break;
+
+        case GT_MEMORYBARRIER:
+            // Simliar to any Volatile indirection, we must handle this as a definition of the global heap
+            fgCurHeapDef = true;
+            break;
+
+        // For now, all calls read/write the heap, the latter in its entirety.  Might tighten this case later.
+        case GT_CALL:
+        {
+            GenTreeCall* call    = tree->AsCall();
+            bool         modHeap = true;
+            if (call->gtCallType == CT_HELPER)
+            {
+                CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
+
+                if (!s_helperCallProperties.MutatesHeap(helpFunc) && !s_helperCallProperties.MayRunCctor(helpFunc))
+                {
+                    modHeap = false;
+                }
+            }
+            if (modHeap)
+            {
+                if (!fgCurHeapDef)
+                {
+                    fgCurHeapUse = true;
+                }
+                fgCurHeapDef   = true;
+                fgCurHeapHavoc = true;
+            }
+        }
+
+            // If this is a p/invoke unmanaged call or if this is a tail-call
+            // and we have an unmanaged p/invoke call in the method,
+            // then we're going to run the p/invoke epilog.
+            // So we mark the FrameRoot as used by this instruction.
+            // This ensures that the block->bbVarUse will contain
+            // the FrameRoot local var if is it a tracked variable.
+
+            if ((tree->gtCall.IsUnmanaged() || (tree->gtCall.IsTailCall() && info.compCallUnmanaged)))
+            {
+                assert((!opts.ShouldUsePInvokeHelpers()) || (info.compLvFrameListRoot == BAD_VAR_NUM));
+                if (!opts.ShouldUsePInvokeHelpers())
+                {
+                    /* Get the TCB local and mark it as used */
+
+                    noway_assert(info.compLvFrameListRoot < lvaCount);
+
+                    LclVarDsc* varDsc = &lvaTable[info.compLvFrameListRoot];
+
+                    if (varDsc->lvTracked)
+                    {
+                        if (!VarSetOps::IsMember(this, fgCurDefSet, varDsc->lvVarIndex))
+                        {
+                            VarSetOps::AddElemD(this, fgCurUseSet, varDsc->lvVarIndex);
+                        }
+                    }
+                }
+            }
+
+            break;
+
+        default:
+
+            // Determine whether it defines a heap location.
+            if (tree->OperIsAssignment() || tree->OperIsBlkOp())
+            {
+                GenTreeLclVarCommon* dummyLclVarTree = nullptr;
+                if (!tree->DefinesLocal(this, &dummyLclVarTree))
+                {
+                    // If it doesn't define a local, then it might update the heap.
+                    fgCurHeapDef = true;
+                }
+            }
+            break;
+    }
+}
+
+void Compiler::fgPerStatementLocalVarLiveness(GenTree* startNode, GenTree* asgdLclVar)
+{
+    // The startNode must be the 1st node of the statement.
+    assert(startNode == compCurStmt->gtStmt.gtStmtList);
+
+    // The asgdLclVar node must be either nullptr or a GT_LCL_VAR or GT_STORE_LCL_VAR
+    assert((asgdLclVar == nullptr) || (asgdLclVar->gtOper == GT_LCL_VAR || asgdLclVar->gtOper == GT_STORE_LCL_VAR));
+
+    // We always walk every node in statement list
+    for (GenTreePtr node = startNode; node != nullptr; node = node->gtNext)
+    {
+        fgPerNodeLocalVarLiveness(node, asgdLclVar);
+    }
+}
+
+#endif // !LEGACY_BACKEND
+
+/*****************************************************************************/
+void Compiler::fgPerBlockLocalVarLiveness()
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In fgPerBlockLocalVarLiveness()\n");
+    }
+#endif // DEBUG
+
+    BasicBlock* block;
+
+#if CAN_DISABLE_DFA
+
+    /* If we're not optimizing at all, things are simple */
+
+    if (opts.MinOpts())
+    {
+        unsigned   lclNum;
+        LclVarDsc* varDsc;
+
+        VARSET_TP VARSET_INIT_NOCOPY(liveAll, VarSetOps::MakeEmpty(this));
+
+        /* We simply make everything live everywhere */
+
+        for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+        {
+            if (varDsc->lvTracked)
+            {
+                VarSetOps::AddElemD(this, liveAll, varDsc->lvVarIndex);
+            }
+        }
+
+        for (block = fgFirstBB; block; block = block->bbNext)
+        {
+            // Strictly speaking, the assignments for the "Def" cases aren't necessary here.
+            // The empty set would do as well.  Use means "use-before-def", so as long as that's
+            // "all", this has the right effect.
+            VarSetOps::Assign(this, block->bbVarUse, liveAll);
+            VarSetOps::Assign(this, block->bbVarDef, liveAll);
+            VarSetOps::Assign(this, block->bbLiveIn, liveAll);
+            VarSetOps::Assign(this, block->bbLiveOut, liveAll);
+            block->bbHeapUse     = true;
+            block->bbHeapDef     = true;
+            block->bbHeapLiveIn  = true;
+            block->bbHeapLiveOut = true;
+
+            switch (block->bbJumpKind)
+            {
+                case BBJ_EHFINALLYRET:
+                case BBJ_THROW:
+                case BBJ_RETURN:
+                    VarSetOps::AssignNoCopy(this, block->bbLiveOut, VarSetOps::MakeEmpty(this));
+                    break;
+                default:
+                    break;
+            }
+        }
+        return;
+    }
+
+#endif // CAN_DISABLE_DFA
+
+    // Avoid allocations in the long case.
+    VarSetOps::AssignNoCopy(this, fgCurUseSet, VarSetOps::MakeEmpty(this));
+    VarSetOps::AssignNoCopy(this, fgCurDefSet, VarSetOps::MakeEmpty(this));
+
+    for (block = fgFirstBB; block; block = block->bbNext)
+    {
+        GenTreePtr stmt;
+        GenTreePtr tree;
+        GenTreePtr asgdLclVar;
+
+        VarSetOps::ClearD(this, fgCurUseSet);
+        VarSetOps::ClearD(this, fgCurDefSet);
+
+        fgCurHeapUse   = false;
+        fgCurHeapDef   = false;
+        fgCurHeapHavoc = false;
+
+        compCurBB = block;
+
+        if (!block->IsLIR())
+        {
+            for (stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
+            {
+                noway_assert(stmt->gtOper == GT_STMT);
+
+                compCurStmt = stmt;
+
+                asgdLclVar = nullptr;
+                tree       = stmt->gtStmt.gtStmtExpr;
+                noway_assert(tree);
+
+                // The following code checks if we have an assignment expression
+                // which may become a GTF_VAR_USEDEF - x=f(x).
+                // consider if LHS is local var - ignore if RHS contains SIDE_EFFECTS
+
+                if ((tree->gtOper == GT_ASG && tree->gtOp.gtOp1->gtOper == GT_LCL_VAR) ||
+                    tree->gtOper == GT_STORE_LCL_VAR)
+                {
+                    noway_assert(tree->gtOp.gtOp1);
+                    GenTreePtr rhsNode;
+                    if (tree->gtOper == GT_ASG)
+                    {
+                        noway_assert(tree->gtOp.gtOp2);
+                        asgdLclVar = tree->gtOp.gtOp1;
+                        rhsNode    = tree->gtOp.gtOp2;
+                    }
+                    else
+                    {
+                        asgdLclVar = tree;
+                        rhsNode    = tree->gtOp.gtOp1;
+                    }
+
+                    // If this is an assignment to local var with no SIDE EFFECTS,
+                    // set asgdLclVar so that genMarkUseDef will flag potential
+                    // x=f(x) expressions as GTF_VAR_USEDEF.
+                    // Reset the flag before recomputing it - it may have been set before,
+                    // but subsequent optimizations could have removed the rhs reference.
+                    asgdLclVar->gtFlags &= ~GTF_VAR_USEDEF;
+                    if ((rhsNode->gtFlags & GTF_SIDE_EFFECT) == 0)
+                    {
+                        noway_assert(asgdLclVar->gtFlags & GTF_VAR_DEF);
+                    }
+                    else
+                    {
+                        asgdLclVar = nullptr;
+                    }
+                }
+
+#ifdef LEGACY_BACKEND
+                tree = fgLegacyPerStatementLocalVarLiveness(stmt->gtStmt.gtStmtList, NULL, asgdLclVar);
+
+                // We must have walked to the end of this statement.
+                noway_assert(!tree);
+#else  // !LEGACY_BACKEND
+                fgPerStatementLocalVarLiveness(stmt->gtStmt.gtStmtList, asgdLclVar);
+#endif // !LEGACY_BACKEND
+            }
+        }
+        else
+        {
+#ifdef LEGACY_BACKEND
+            unreached();
+#else  // !LEGACY_BACKEND
+            // NOTE: the `asgdLclVar` analysis done above is not correct for LIR: it depends
+            // on all of the nodes that precede `asgdLclVar` in execution order to factor into the
+            // dataflow for the value being assigned to the local var, which is not necessarily the
+            // case without tree order. As a result, we simply pass `nullptr` for `asgdLclVar`.
+            for (GenTree* node : LIR::AsRange(block).NonPhiNodes())
+            {
+                fgPerNodeLocalVarLiveness(node, nullptr);
+            }
+#endif // !LEGACY_BACKEND
+        }
+
+        /* Get the TCB local and mark it as used */
+
+        if (block->bbJumpKind == BBJ_RETURN && info.compCallUnmanaged)
+        {
+            assert((!opts.ShouldUsePInvokeHelpers()) || (info.compLvFrameListRoot == BAD_VAR_NUM));
+            if (!opts.ShouldUsePInvokeHelpers())
+            {
+                noway_assert(info.compLvFrameListRoot < lvaCount);
+
+                LclVarDsc* varDsc = &lvaTable[info.compLvFrameListRoot];
+
+                if (varDsc->lvTracked)
+                {
+                    if (!VarSetOps::IsMember(this, fgCurDefSet, varDsc->lvVarIndex))
+                    {
+                        VarSetOps::AddElemD(this, fgCurUseSet, varDsc->lvVarIndex);
+                    }
+                }
+            }
+        }
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            VARSET_TP VARSET_INIT_NOCOPY(allVars, VarSetOps::Union(this, fgCurUseSet, fgCurDefSet));
+            printf("BB%02u", block->bbNum);
+            printf(" USE(%d)=", VarSetOps::Count(this, fgCurUseSet));
+            lvaDispVarSet(fgCurUseSet, allVars);
+            if (fgCurHeapUse)
+            {
+                printf(" + HEAP");
+            }
+            printf("\n     DEF(%d)=", VarSetOps::Count(this, fgCurDefSet));
+            lvaDispVarSet(fgCurDefSet, allVars);
+            if (fgCurHeapDef)
+            {
+                printf(" + HEAP");
+            }
+            if (fgCurHeapHavoc)
+            {
+                printf("*");
+            }
+            printf("\n\n");
+        }
+#endif // DEBUG
+
+        VarSetOps::Assign(this, block->bbVarUse, fgCurUseSet);
+        VarSetOps::Assign(this, block->bbVarDef, fgCurDefSet);
+        block->bbHeapUse   = fgCurHeapUse;
+        block->bbHeapDef   = fgCurHeapDef;
+        block->bbHeapHavoc = fgCurHeapHavoc;
+
+        /* also initialize the IN set, just in case we will do multiple DFAs */
+
+        VarSetOps::AssignNoCopy(this, block->bbLiveIn, VarSetOps::MakeEmpty(this));
+        block->bbHeapLiveIn = false;
+    }
+}
+
+/*****************************************************************************/
+#ifdef DEBUGGING_SUPPORT
+/*****************************************************************************/
+
+// Helper functions to mark variables live over their entire scope
+
+void Compiler::fgBeginScopeLife(VARSET_TP* inScope, VarScopeDsc* var)
+{
+    assert(var);
+
+    LclVarDsc* lclVarDsc1 = &lvaTable[var->vsdVarNum];
+
+    if (lclVarDsc1->lvTracked)
+    {
+        VarSetOps::AddElemD(this, *inScope, lclVarDsc1->lvVarIndex);
+    }
+}
+
+void Compiler::fgEndScopeLife(VARSET_TP* inScope, VarScopeDsc* var)
+{
+    assert(var);
+
+    LclVarDsc* lclVarDsc1 = &lvaTable[var->vsdVarNum];
+
+    if (lclVarDsc1->lvTracked)
+    {
+        VarSetOps::RemoveElemD(this, *inScope, lclVarDsc1->lvVarIndex);
+    }
+}
+
+/*****************************************************************************/
+
+void Compiler::fgMarkInScope(BasicBlock* block, VARSET_VALARG_TP inScope)
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("Scope info: block BB%02u marking in scope: ", block->bbNum);
+        dumpConvertedVarSet(this, inScope);
+        printf("\n");
+    }
+#endif // DEBUG
+
+    /* Record which vars are artifically kept alive for debugging */
+
+    VarSetOps::Assign(this, block->bbScope, inScope);
+
+    /* Being in scope implies a use of the variable. Add the var to bbVarUse
+       so that redoing fgLiveVarAnalysis() will work correctly */
+
+    VarSetOps::UnionD(this, block->bbVarUse, inScope);
+
+    /* Artifically mark all vars in scope as alive */
+
+    VarSetOps::UnionD(this, block->bbLiveIn, inScope);
+    VarSetOps::UnionD(this, block->bbLiveOut, inScope);
+}
+
+void Compiler::fgUnmarkInScope(BasicBlock* block, VARSET_VALARG_TP unmarkScope)
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("Scope info: block BB%02u UNmarking in scope: ", block->bbNum);
+        dumpConvertedVarSet(this, unmarkScope);
+        printf("\n");
+    }
+#endif // DEBUG
+
+    assert(VarSetOps::IsSubset(this, unmarkScope, block->bbScope));
+
+    VarSetOps::DiffD(this, block->bbScope, unmarkScope);
+    VarSetOps::DiffD(this, block->bbVarUse, unmarkScope);
+    VarSetOps::DiffD(this, block->bbLiveIn, unmarkScope);
+    VarSetOps::DiffD(this, block->bbLiveOut, unmarkScope);
+}
+
+#ifdef DEBUG
+
+void Compiler::fgDispDebugScopes()
+{
+    printf("\nDebug scopes:\n");
+
+    BasicBlock* block;
+    for (block = fgFirstBB; block; block = block->bbNext)
+    {
+        printf("BB%02u: ", block->bbNum);
+        dumpConvertedVarSet(this, block->bbScope);
+        printf("\n");
+    }
+}
+
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ * Mark variables live across their entire scope.
+ */
+
+#if FEATURE_EH_FUNCLETS
+
+void Compiler::fgExtendDbgScopes()
+{
+    compResetScopeLists();
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nMarking vars alive over their entire scope :\n\n");
+    }
+
+    if (verbose)
+    {
+        compDispScopeLists();
+    }
+#endif // DEBUG
+
+    VARSET_TP VARSET_INIT_NOCOPY(inScope, VarSetOps::MakeEmpty(this));
+
+    // Mark all tracked LocalVars live over their scope - walk the blocks
+    // keeping track of the current life, and assign it to the blocks.
+
+    for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+    {
+        // If we get to a funclet, reset the scope lists and start again, since the block
+        // offsets will be out of order compared to the previous block.
+
+        if (block->bbFlags & BBF_FUNCLET_BEG)
+        {
+            compResetScopeLists();
+            VarSetOps::ClearD(this, inScope);
+        }
+
+        // Process all scopes up to the current offset
+
+        if (block->bbCodeOffs != BAD_IL_OFFSET)
+        {
+            compProcessScopesUntil(block->bbCodeOffs, &inScope, &Compiler::fgBeginScopeLife, &Compiler::fgEndScopeLife);
+        }
+
+        // Assign the current set of variables that are in scope to the block variables tracking this.
+
+        fgMarkInScope(block, inScope);
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        fgDispDebugScopes();
+    }
+#endif // DEBUG
+}
+
+#else // !FEATURE_EH_FUNCLETS
+
+void Compiler::fgExtendDbgScopes()
+{
+    compResetScopeLists();
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nMarking vars alive over their entire scope :\n\n");
+        compDispScopeLists();
+    }
+#endif // DEBUG
+
+    VARSET_TP VARSET_INIT_NOCOPY(inScope, VarSetOps::MakeEmpty(this));
+    compProcessScopesUntil(0, &inScope, &Compiler::fgBeginScopeLife, &Compiler::fgEndScopeLife);
+
+    IL_OFFSET lastEndOffs = 0;
+
+    // Mark all tracked LocalVars live over their scope - walk the blocks
+    // keeping track of the current life, and assign it to the blocks.
+
+    BasicBlock* block;
+    for (block = fgFirstBB; block; block = block->bbNext)
+    {
+        // Find scopes becoming alive. If there is a gap in the instr
+        // sequence, we need to process any scopes on those missing offsets.
+
+        if (block->bbCodeOffs != BAD_IL_OFFSET)
+        {
+            if (lastEndOffs != block->bbCodeOffs)
+            {
+                noway_assert(lastEndOffs < block->bbCodeOffs);
+
+                compProcessScopesUntil(block->bbCodeOffs, &inScope, &Compiler::fgBeginScopeLife,
+                                       &Compiler::fgEndScopeLife);
+            }
+            else
+            {
+                while (VarScopeDsc* varScope = compGetNextEnterScope(block->bbCodeOffs))
+                {
+                    fgBeginScopeLife(&inScope, varScope);
+                }
+            }
+        }
+
+        // Assign the current set of variables that are in scope to the block variables tracking this.
+
+        fgMarkInScope(block, inScope);
+
+        // Find scopes going dead.
+
+        if (block->bbCodeOffsEnd != BAD_IL_OFFSET)
+        {
+            VarScopeDsc* varScope;
+            while ((varScope = compGetNextExitScope(block->bbCodeOffsEnd)) != nullptr)
+            {
+                fgEndScopeLife(&inScope, varScope);
+            }
+
+            lastEndOffs = block->bbCodeOffsEnd;
+        }
+    }
+
+    /* Everything should be out of scope by the end of the method. But if the
+       last BB got removed, then inScope may not be empty. */
+
+    noway_assert(VarSetOps::IsEmpty(this, inScope) || lastEndOffs < info.compILCodeSize);
+}
+
+#endif // !FEATURE_EH_FUNCLETS
+
+/*****************************************************************************
+ *
+ * For debuggable code, we allow redundant assignments to vars
+ * by marking them live over their entire scope.
+ */
+
+void Compiler::fgExtendDbgLifetimes()
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In fgExtendDbgLifetimes()\n");
+    }
+#endif // DEBUG
+
+    noway_assert(opts.compDbgCode && (info.compVarScopesCount > 0));
+
+    /*-------------------------------------------------------------------------
+     *   Extend the lifetimes over the entire reported scope of the variable.
+     */
+
+    fgExtendDbgScopes();
+
+/*-------------------------------------------------------------------------
+ * Partly update liveness info so that we handle any funky BBF_INTERNAL
+ * blocks inserted out of sequence.
+ */
+
+#ifdef DEBUG
+    if (verbose && 0)
+    {
+        fgDispBBLiveness();
+    }
+#endif
+
+    fgLiveVarAnalysis(true);
+
+    /* For compDbgCode, we prepend an empty BB which will hold the
+       initializations of variables which are in scope at IL offset 0 (but
+       not initialized by the IL code). Since they will currently be
+       marked as live on entry to fgFirstBB, unmark the liveness so that
+       the following code will know to add the initializations. */
+
+    assert(fgFirstBBisScratch());
+
+    VARSET_TP VARSET_INIT_NOCOPY(trackedArgs, VarSetOps::MakeEmpty(this));
+
+    for (unsigned argNum = 0; argNum < info.compArgsCount; argNum++)
+    {
+        LclVarDsc* argDsc = lvaTable + argNum;
+        if (argDsc->lvPromoted)
+        {
+            lvaPromotionType promotionType = lvaGetPromotionType(argDsc);
+
+            if (promotionType == PROMOTION_TYPE_INDEPENDENT)
+            {
+                noway_assert(argDsc->lvFieldCnt == 1); // We only handle one field here
+
+                unsigned fieldVarNum = argDsc->lvFieldLclStart;
+                argDsc               = lvaTable + fieldVarNum;
+            }
+        }
+        noway_assert(argDsc->lvIsParam);
+        if (argDsc->lvTracked)
+        {
+            noway_assert(!VarSetOps::IsMember(this, trackedArgs, argDsc->lvVarIndex)); // Each arg should define a
+                                                                                       // different bit.
+            VarSetOps::AddElemD(this, trackedArgs, argDsc->lvVarIndex);
+        }
+    }
+
+    // Don't unmark struct locals, either.
+    VARSET_TP VARSET_INIT_NOCOPY(noUnmarkVars, trackedArgs);
+
+    for (unsigned i = 0; i < lvaCount; i++)
+    {
+        LclVarDsc* varDsc = &lvaTable[i];
+        if (varTypeIsStruct(varDsc) && varDsc->lvTracked)
+        {
+            VarSetOps::AddElemD(this, noUnmarkVars, varDsc->lvVarIndex);
+        }
+    }
+    fgUnmarkInScope(fgFirstBB, VarSetOps::Diff(this, fgFirstBB->bbScope, noUnmarkVars));
+
+    /*-------------------------------------------------------------------------
+     * As we keep variables artifically alive over their entire scope,
+     * we need to also artificially initialize them if the scope does
+     * not exactly match the real lifetimes, or they will contain
+     * garbage until they are initialized by the IL code.
+     */
+
+    VARSET_TP VARSET_INIT_NOCOPY(initVars, VarSetOps::MakeEmpty(this)); // Vars which are artificially made alive
+
+    for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+    {
+        VarSetOps::ClearD(this, initVars);
+
+        switch (block->bbJumpKind)
+        {
+            case BBJ_NONE:
+                PREFIX_ASSUME(block->bbNext != nullptr);
+                VarSetOps::UnionD(this, initVars, block->bbNext->bbScope);
+                break;
+
+            case BBJ_ALWAYS:
+            case BBJ_EHCATCHRET:
+            case BBJ_EHFILTERRET:
+                VarSetOps::UnionD(this, initVars, block->bbJumpDest->bbScope);
+                break;
+
+            case BBJ_CALLFINALLY:
+                if (!(block->bbFlags & BBF_RETLESS_CALL))
+                {
+                    assert(block->isBBCallAlwaysPair());
+                    PREFIX_ASSUME(block->bbNext != nullptr);
+                    VarSetOps::UnionD(this, initVars, block->bbNext->bbScope);
+                }
+                VarSetOps::UnionD(this, initVars, block->bbJumpDest->bbScope);
+                break;
+
+            case BBJ_COND:
+                PREFIX_ASSUME(block->bbNext != nullptr);
+                VarSetOps::UnionD(this, initVars, block->bbNext->bbScope);
+                VarSetOps::UnionD(this, initVars, block->bbJumpDest->bbScope);
+                break;
+
+            case BBJ_SWITCH:
+            {
+                BasicBlock** jmpTab;
+                unsigned     jmpCnt;
+
+                jmpCnt = block->bbJumpSwt->bbsCount;
+                jmpTab = block->bbJumpSwt->bbsDstTab;
+
+                do
+                {
+                    VarSetOps::UnionD(this, initVars, (*jmpTab)->bbScope);
+                } while (++jmpTab, --jmpCnt);
+            }
+            break;
+
+            case BBJ_EHFINALLYRET:
+            case BBJ_RETURN:
+                break;
+
+            case BBJ_THROW:
+                /* We don't have to do anything as we mark
+                 * all vars live on entry to a catch handler as
+                 * volatile anyway
+                 */
+                break;
+
+            default:
+                noway_assert(!"Unexpected bbJumpKind");
+                break;
+        }
+
+        /* If the var is already live on entry to the current BB,
+           we would have already initialized it. So ignore bbLiveIn */
+
+        VarSetOps::DiffD(this, initVars, block->bbLiveIn);
+
+        /* Add statements initializing the vars, if there are any to initialize */
+        unsigned blockWeight = block->getBBWeight(this);
+
+        VARSET_ITER_INIT(this, iter, initVars, varIndex);
+        while (iter.NextElem(this, &varIndex))
+        {
+            /* Create initialization tree */
+
+            unsigned   varNum = lvaTrackedToVarNum[varIndex];
+            LclVarDsc* varDsc = &lvaTable[varNum];
+            var_types  type   = varDsc->TypeGet();
+
+            // Don't extend struct lifetimes -- they aren't enregistered, anyway.
+            if (type == TYP_STRUCT)
+            {
+                continue;
+            }
+
+            // If we haven't already done this ...
+            if (!fgLocalVarLivenessDone)
+            {
+                // Create a "zero" node
+                GenTree* zero = gtNewZeroConNode(genActualType(type));
+
+                // Create initialization node
+                if (!block->IsLIR())
+                {
+                    GenTree* varNode = gtNewLclvNode(varNum, type);
+                    GenTree* initNode = gtNewAssignNode(varNode, zero);
+
+                    // Create a statement for the initializer, sequence it, and append it to the current BB.
+                    GenTree* initStmt = gtNewStmt(initNode);
+                    gtSetStmtInfo(initStmt);
+                    fgSetStmtSeq(initStmt);
+                    fgInsertStmtNearEnd(block, initStmt);
+                }
+                else
+                {
+                    GenTree* store = new (this, GT_STORE_LCL_VAR) GenTreeLclVar(GT_STORE_LCL_VAR, type, varNum, BAD_IL_OFFSET);
+                    store->gtOp.gtOp1 = zero;
+                    store->gtFlags |= (GTF_VAR_DEF | GTF_ASG);
+
+                    LIR::Range initRange = LIR::EmptyRange();
+                    initRange.InsertBefore(nullptr, zero, store);
+
+#if !defined(_TARGET_64BIT_) && !defined(LEGACY_BACKEND)
+                    DecomposeLongs::DecomposeRange(this, blockWeight, initRange);
+#endif
+
+                    // Naively inserting the initializer at the end of the block may add code after the block's
+                    // terminator, in which case the inserted code will never be executed (and the IR for the
+                    // block will be invalid). Use `LIR::InsertBeforeTerminator` to avoid this problem.
+                    LIR::InsertBeforeTerminator(block, std::move(initRange));
+                }
+
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("Created zero-init of V%02u in BB%02u\n", varNum, block->bbNum);
+                }
+#endif // DEBUG
+
+                varDsc->incRefCnts(block->getBBWeight(this), this);
+
+                block->bbFlags |= BBF_CHANGED; // indicates that the contents of the block have changed.
+            }
+
+            /* Update liveness information so that redoing fgLiveVarAnalysis()
+               will work correctly if needed */
+
+            VarSetOps::AddElemD(this, block->bbVarDef, varIndex);
+            VarSetOps::AddElemD(this, block->bbLiveOut, varIndex);
+        }
+    }
+
+    // raMarkStkVars() reserves stack space for unused variables (which
+    //   needs to be initialized). However, arguments don't need to be initialized.
+    //   So just ensure that they don't have a 0 ref cnt
+
+    unsigned lclNum = 0;
+    for (LclVarDsc *varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+    {
+        if (varDsc->lvRefCnt == 0 && varDsc->lvIsRegArg)
+        {
+            varDsc->lvRefCnt = 1;
+        }
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nBB liveness after fgExtendDbgLifetimes():\n\n");
+        fgDispBBLiveness();
+        printf("\n");
+    }
+#endif // DEBUG
+}
+
+/*****************************************************************************/
+#endif // DEBUGGING_SUPPORT
+/*****************************************************************************/
+
+VARSET_VALRET_TP Compiler::fgGetHandlerLiveVars(BasicBlock* block)
+{
+    noway_assert(block);
+    noway_assert(ehBlockHasExnFlowDsc(block));
+
+    VARSET_TP VARSET_INIT_NOCOPY(liveVars, VarSetOps::MakeEmpty(this));
+    EHblkDsc* HBtab = ehGetBlockExnFlowDsc(block);
+
+    do
+    {
+        /* Either we enter the filter first or the catch/finally */
+
+        if (HBtab->HasFilter())
+        {
+            VarSetOps::UnionD(this, liveVars, HBtab->ebdFilter->bbLiveIn);
+#if FEATURE_EH_FUNCLETS
+            // The EH subsystem can trigger a stack walk after the filter
+            // has returned, but before invoking the handler, and the only
+            // IP address reported from this method will be the original
+            // faulting instruction, thus everything in the try body
+            // must report as live any variables live-out of the filter
+            // (which is the same as those live-in to the handler)
+            VarSetOps::UnionD(this, liveVars, HBtab->ebdHndBeg->bbLiveIn);
+#endif // FEATURE_EH_FUNCLETS
+        }
+        else
+        {
+            VarSetOps::UnionD(this, liveVars, HBtab->ebdHndBeg->bbLiveIn);
+        }
+
+        /* If we have nested try's edbEnclosing will provide them */
+        noway_assert((HBtab->ebdEnclosingTryIndex == EHblkDsc::NO_ENCLOSING_INDEX) ||
+                     (HBtab->ebdEnclosingTryIndex > ehGetIndex(HBtab)));
+
+        unsigned outerIndex = HBtab->ebdEnclosingTryIndex;
+        if (outerIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+        {
+            break;
+        }
+        HBtab = ehGetDsc(outerIndex);
+
+    } while (true);
+
+    return liveVars;
+}
+
+/*****************************************************************************
+ *
+ *  This is the classic algorithm for Live Variable Analysis.
+ *  If updateInternalOnly==true, only update BBF_INTERNAL blocks.
+ */
+
+void Compiler::fgLiveVarAnalysis(bool updateInternalOnly)
+{
+    BasicBlock* block;
+    bool        change;
+#ifdef DEBUG
+    VARSET_TP VARSET_INIT_NOCOPY(extraLiveOutFromFinally, VarSetOps::MakeEmpty(this));
+#endif // DEBUG
+    bool keepAliveThis = lvaKeepAliveAndReportThis() && lvaTable[info.compThisArg].lvTracked;
+
+    /* Live Variable Analysis - Backward dataflow */
+
+    bool hasPossibleBackEdge = false;
+
+    do
+    {
+        change = false;
+
+        /* Visit all blocks and compute new data flow values */
+
+        VARSET_TP VARSET_INIT_NOCOPY(liveIn, VarSetOps::MakeEmpty(this));
+        VARSET_TP VARSET_INIT_NOCOPY(liveOut, VarSetOps::MakeEmpty(this));
+
+        bool heapLiveIn  = false;
+        bool heapLiveOut = false;
+
+        for (block = fgLastBB; block; block = block->bbPrev)
+        {
+            // sometimes block numbers are not monotonically increasing which
+            // would cause us not to identify backedges
+            if (block->bbNext && block->bbNext->bbNum <= block->bbNum)
+            {
+                hasPossibleBackEdge = true;
+            }
+
+            if (updateInternalOnly)
+            {
+                /* Only update BBF_INTERNAL blocks as they may be
+                   syntactically out of sequence. */
+
+                noway_assert(opts.compDbgCode && (info.compVarScopesCount > 0));
+
+                if (!(block->bbFlags & BBF_INTERNAL))
+                {
+                    continue;
+                }
+            }
+
+            /* Compute the 'liveOut' set */
+
+            VarSetOps::ClearD(this, liveOut);
+            heapLiveOut = false;
+            if (block->endsWithJmpMethod(this))
+            {
+                // A JMP uses all the arguments, so mark them all
+                // as live at the JMP instruction
+                //
+                const LclVarDsc* varDscEndParams = lvaTable + info.compArgsCount;
+                for (LclVarDsc* varDsc = lvaTable; varDsc < varDscEndParams; varDsc++)
+                {
+                    noway_assert(!varDsc->lvPromoted);
+                    if (varDsc->lvTracked)
+                    {
+                        VarSetOps::AddElemD(this, liveOut, varDsc->lvVarIndex);
+                    }
+                }
+            }
+
+            // Additionally, union in all the live-in tracked vars of successors.
+            AllSuccessorIter succsEnd = block->GetAllSuccs(this).end();
+            for (AllSuccessorIter succs = block->GetAllSuccs(this).begin(); succs != succsEnd; ++succs)
+            {
+                BasicBlock* succ = (*succs);
+                VarSetOps::UnionD(this, liveOut, succ->bbLiveIn);
+                heapLiveOut = heapLiveOut || (*succs)->bbHeapLiveIn;
+                if (succ->bbNum <= block->bbNum)
+                {
+                    hasPossibleBackEdge = true;
+                }
+            }
+
+            /* For lvaKeepAliveAndReportThis methods, "this" has to be kept alive everywhere
+               Note that a function may end in a throw on an infinite loop (as opposed to a return).
+               "this" has to be alive everywhere even in such methods. */
+
+            if (keepAliveThis)
+            {
+                VarSetOps::AddElemD(this, liveOut, lvaTable[info.compThisArg].lvVarIndex);
+            }
+
+            /* Compute the 'liveIn'  set */
+
+            VarSetOps::Assign(this, liveIn, liveOut);
+            VarSetOps::DiffD(this, liveIn, block->bbVarDef);
+            VarSetOps::UnionD(this, liveIn, block->bbVarUse);
+
+            heapLiveIn = (heapLiveOut && !block->bbHeapDef) || block->bbHeapUse;
+
+            /* Can exceptions from this block be handled (in this function)? */
+
+            if (ehBlockHasExnFlowDsc(block))
+            {
+                VARSET_TP VARSET_INIT_NOCOPY(liveVars, fgGetHandlerLiveVars(block));
+
+                VarSetOps::UnionD(this, liveIn, liveVars);
+                VarSetOps::UnionD(this, liveOut, liveVars);
+            }
+
+            /* Has there been any change in either live set? */
+
+            if (!VarSetOps::Equal(this, block->bbLiveIn, liveIn) || !VarSetOps::Equal(this, block->bbLiveOut, liveOut))
+            {
+                if (updateInternalOnly)
+                {
+                    // Only "extend" liveness over BBF_INTERNAL blocks
+
+                    noway_assert(block->bbFlags & BBF_INTERNAL);
+
+                    if (!VarSetOps::Equal(this, VarSetOps::Intersection(this, block->bbLiveIn, liveIn), liveIn) ||
+                        !VarSetOps::Equal(this, VarSetOps::Intersection(this, block->bbLiveOut, liveOut), liveOut))
+                    {
+#ifdef DEBUG
+                        if (verbose)
+                        {
+                            printf("Scope info: block BB%02u LiveIn+ ", block->bbNum);
+                            dumpConvertedVarSet(this, VarSetOps::Diff(this, liveIn, block->bbLiveIn));
+                            printf(", LiveOut+ ");
+                            dumpConvertedVarSet(this, VarSetOps::Diff(this, liveOut, block->bbLiveOut));
+                            printf("\n");
+                        }
+#endif // DEBUG
+
+                        VarSetOps::UnionD(this, block->bbLiveIn, liveIn);
+                        VarSetOps::UnionD(this, block->bbLiveOut, liveOut);
+                        change = true;
+                    }
+                }
+                else
+                {
+                    VarSetOps::Assign(this, block->bbLiveIn, liveIn);
+                    VarSetOps::Assign(this, block->bbLiveOut, liveOut);
+                    change = true;
+                }
+            }
+
+            if ((block->bbHeapLiveIn == 1) != heapLiveIn || (block->bbHeapLiveOut == 1) != heapLiveOut)
+            {
+                block->bbHeapLiveIn  = heapLiveIn;
+                block->bbHeapLiveOut = heapLiveOut;
+                change               = true;
+            }
+        }
+        // if there is no way we could have processed a block without seeing all of its predecessors
+        // then there is no need to iterate
+        if (!hasPossibleBackEdge)
+        {
+            break;
+        }
+    } while (change);
+
+//-------------------------------------------------------------------------
+
+#ifdef DEBUG
+
+    if (verbose && !updateInternalOnly)
+    {
+        printf("\nBB liveness after fgLiveVarAnalysis():\n\n");
+        fgDispBBLiveness();
+    }
+
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ *  Mark any variables in varSet1 as interfering with any variables
+ *  specified in varSet2.
+ *  We ensure that the interference graph is reflective:
+ *  (if T11 interferes with T16, then T16 interferes with T11)
+ *  returns true if an interference was added
+ *  This function returns true if any new interferences were added
+ *  and returns false if no new interference were added
+ */
+bool Compiler::fgMarkIntf(VARSET_VALARG_TP varSet1, VARSET_VALARG_TP varSet2)
+{
+#ifdef LEGACY_BACKEND
+    /* If either set has no bits set (or we are not optimizing), take an early out */
+    if (VarSetOps::IsEmpty(this, varSet2) || VarSetOps::IsEmpty(this, varSet1) || opts.MinOpts())
+    {
+        return false;
+    }
+
+    bool addedIntf = false; // This is set to true if we add any new interferences
+
+    VarSetOps::Assign(this, fgMarkIntfUnionVS, varSet1);
+    VarSetOps::UnionD(this, fgMarkIntfUnionVS, varSet2);
+
+    VARSET_ITER_INIT(this, iter, fgMarkIntfUnionVS, refIndex);
+    while (iter.NextElem(this, &refIndex))
+    {
+        // if varSet1 has this bit set then it interferes with varSet2
+        if (VarSetOps::IsMember(this, varSet1, refIndex))
+        {
+            // Calculate the set of new interference to add
+            VARSET_TP VARSET_INIT_NOCOPY(newIntf, VarSetOps::Diff(this, varSet2, lvaVarIntf[refIndex]));
+            if (!VarSetOps::IsEmpty(this, newIntf))
+            {
+                addedIntf = true;
+                VarSetOps::UnionD(this, lvaVarIntf[refIndex], newIntf);
+            }
+        }
+
+        // if varSet2 has this bit set then it interferes with varSet1
+        if (VarSetOps::IsMember(this, varSet2, refIndex))
+        {
+            // Calculate the set of new interference to add
+            VARSET_TP VARSET_INIT_NOCOPY(newIntf, VarSetOps::Diff(this, varSet1, lvaVarIntf[refIndex]));
+            if (!VarSetOps::IsEmpty(this, newIntf))
+            {
+                addedIntf = true;
+                VarSetOps::UnionD(this, lvaVarIntf[refIndex], newIntf);
+            }
+        }
+    }
+
+    return addedIntf;
+#else
+    return false;
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Mark any variables in varSet as interfering with each other,
+ *  This is a specialized version of the above, when both args are the same
+ *  We ensure that the interference graph is reflective:
+ *  (if T11 interferes with T16, then T16 interferes with T11)
+ *  This function returns true if any new interferences were added
+ *  and returns false if no new interference were added
+ */
+
+bool Compiler::fgMarkIntf(VARSET_VALARG_TP varSet)
+{
+#ifdef LEGACY_BACKEND
+    /* No bits set or we are not optimizing, take an early out */
+    if (VarSetOps::IsEmpty(this, varSet) || opts.MinOpts())
+        return false;
+
+    bool addedIntf = false; // This is set to true if we add any new interferences
+
+    VARSET_ITER_INIT(this, iter, varSet, refIndex);
+    while (iter.NextElem(this, &refIndex))
+    {
+        // Calculate the set of new interference to add
+        VARSET_TP VARSET_INIT_NOCOPY(newIntf, VarSetOps::Diff(this, varSet, lvaVarIntf[refIndex]));
+        if (!VarSetOps::IsEmpty(this, newIntf))
+        {
+            addedIntf = true;
+            VarSetOps::UnionD(this, lvaVarIntf[refIndex], newIntf);
+        }
+    }
+
+    return addedIntf;
+#else  // !LEGACY_BACKEND
+    return false;
+#endif // !LEGACY_BACKEND
+}
+
+/*****************************************************************************
+ * For updating liveset during traversal AFTER fgComputeLife has completed
+ */
+
+VARSET_VALRET_TP Compiler::fgUpdateLiveSet(VARSET_VALARG_TP liveSet, GenTreePtr tree)
+{
+    VARSET_TP VARSET_INIT(this, newLiveSet, liveSet);
+    assert(fgLocalVarLivenessDone == true);
+    GenTreePtr lclVarTree = tree; // After the tests below, "lclVarTree" will be the local variable.
+    if (tree->gtOper == GT_LCL_VAR || tree->gtOper == GT_LCL_FLD || tree->gtOper == GT_REG_VAR ||
+        (lclVarTree = fgIsIndirOfAddrOfLocal(tree)) != nullptr)
+    {
+        VARSET_TP VARSET_INIT_NOCOPY(varBits, fgGetVarBits(lclVarTree));
+
+        if (!VarSetOps::IsEmpty(this, varBits))
+        {
+            if (tree->gtFlags & GTF_VAR_DEATH)
+            {
+                // We'd like to be able to assert the following, however if we are walking
+                // through a qmark/colon tree, we may encounter multiple last-use nodes.
+                // assert (VarSetOps::IsSubset(this, varBits, newLiveSet));
+
+                // We maintain the invariant that if the lclVarTree is a promoted struct, but the
+                // the lookup fails, then all the field vars (i.e., "varBits") are dying.
+                VARSET_TP* deadVarBits = nullptr;
+                if (varTypeIsStruct(lclVarTree) && GetPromotedStructDeathVars()->Lookup(lclVarTree, &deadVarBits))
+                {
+                    VarSetOps::DiffD(this, newLiveSet, *deadVarBits);
+                }
+                else
+                {
+                    VarSetOps::DiffD(this, newLiveSet, varBits);
+                }
+            }
+            else if ((tree->gtFlags & GTF_VAR_DEF) != 0 && (tree->gtFlags & GTF_VAR_USEASG) == 0)
+            {
+                assert(tree == lclVarTree); // LDOBJ case should only be a use.
+
+                // This shouldn't be in newLiveSet, unless this is debug code, in which
+                // case we keep vars live everywhere, OR it is address-exposed, OR this block
+                // is part of a try block, in which case it may be live at the handler
+                // Could add a check that, if it's in the newLiveSet, that it's also in
+                // fgGetHandlerLiveVars(compCurBB), but seems excessive
+                //
+                assert(VarSetOps::IsEmptyIntersection(this, newLiveSet, varBits) || opts.compDbgCode ||
+                       lvaTable[tree->gtLclVarCommon.gtLclNum].lvAddrExposed ||
+                       (compCurBB != nullptr && ehBlockHasExnFlowDsc(compCurBB)));
+                VarSetOps::UnionD(this, newLiveSet, varBits);
+            }
+        }
+    }
+    return newLiveSet;
+}
+
+//------------------------------------------------------------------------
+// Compiler::fgComputeLifeCall: compute the changes to local var liveness
+//                              due to a GT_CALL node.
+//
+// Arguments:
+//    life - The live set that is being computed.
+//    call - The call node in question.
+//
+void Compiler::fgComputeLifeCall(VARSET_TP& life, GenTreeCall* call)
+{
+    assert(call != nullptr);
+
+    // If this is a tail-call and we have any unmanaged p/invoke calls in
+    // the method then we're going to run the p/invoke epilog
+    // So we mark the FrameRoot as used by this instruction.
+    // This ensure that this variable is kept alive at the tail-call
+    if (call->IsTailCall() && info.compCallUnmanaged)
+    {
+        assert((!opts.ShouldUsePInvokeHelpers()) || (info.compLvFrameListRoot == BAD_VAR_NUM));
+        if (!opts.ShouldUsePInvokeHelpers())
+        {
+            /* Get the TCB local and make it live */
+
+            noway_assert(info.compLvFrameListRoot < lvaCount);
+
+            LclVarDsc* frameVarDsc = &lvaTable[info.compLvFrameListRoot];
+
+            if (frameVarDsc->lvTracked)
+            {
+                VARSET_TP VARSET_INIT_NOCOPY(varBit, VarSetOps::MakeSingleton(this, frameVarDsc->lvVarIndex));
+
+                VarSetOps::AddElemD(this, life, frameVarDsc->lvVarIndex);
+
+                /* Record interference with other live variables */
+
+                fgMarkIntf(life, varBit);
+            }
+        }
+    }
+
+    /* GC refs cannot be enregistered accross an unmanaged call */
+
+    // TODO: we should generate the code for saving to/restoring
+    //       from the inlined N/Direct frame instead.
+
+    /* Is this call to unmanaged code? */
+    if (call->IsUnmanaged())
+    {
+        /* Get the TCB local and make it live */
+        assert((!opts.ShouldUsePInvokeHelpers()) || (info.compLvFrameListRoot == BAD_VAR_NUM));
+        if (!opts.ShouldUsePInvokeHelpers())
+        {
+            noway_assert(info.compLvFrameListRoot < lvaCount);
+
+            LclVarDsc* frameVarDsc = &lvaTable[info.compLvFrameListRoot];
+
+            if (frameVarDsc->lvTracked)
+            {
+                unsigned varIndex = frameVarDsc->lvVarIndex;
+                noway_assert(varIndex < lvaTrackedCount);
+
+                // Is the variable already known to be alive?
+                //
+                if (VarSetOps::IsMember(this, life, varIndex))
+                {
+                    // Since we may call this multiple times, clear the GTF_CALL_M_FRAME_VAR_DEATH if set.
+                    //
+                    call->gtCallMoreFlags &= ~GTF_CALL_M_FRAME_VAR_DEATH;
+                }
+                else
+                {
+                    // The variable is just coming to life
+                    // Since this is a backwards walk of the trees
+                    // that makes this change in liveness a 'last-use'
+                    //
+                    VarSetOps::AddElemD(this, life, varIndex);
+                    call->gtCallMoreFlags |= GTF_CALL_M_FRAME_VAR_DEATH;
+                }
+
+                // Record an interference with the other live variables
+                //
+                VARSET_TP VARSET_INIT_NOCOPY(varBit, VarSetOps::MakeSingleton(this, varIndex));
+                fgMarkIntf(life, varBit);
+            }
+        }
+
+        /* Do we have any live variables? */
+
+        if (!VarSetOps::IsEmpty(this, life))
+        {
+            // For each live variable if it is a GC-ref type, we
+            // mark it volatile to prevent if from being enregistered
+            // across the unmanaged call.
+
+            unsigned   lclNum;
+            LclVarDsc* varDsc;
+            for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+            {
+                /* Ignore the variable if it's not tracked */
+
+                if (!varDsc->lvTracked)
+                {
+                    continue;
+                }
+
+                unsigned varNum = varDsc->lvVarIndex;
+
+                /* Ignore the variable if it's not live here */
+
+                if (!VarSetOps::IsMember(this, life, varDsc->lvVarIndex))
+                {
+                    continue;
+                }
+
+                // If it is a GC-ref type then mark it DoNotEnregister.
+                if (varTypeIsGC(varDsc->TypeGet()))
+                {
+                    lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LiveAcrossUnmanagedCall));
+                }
+            }
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// Compiler::fgComputeLifeLocal: compute the changes to local var liveness
+//                               due to a use or a def of a local var and
+//                               indicates wither the use/def is a dead
+//                               store.
+//
+// Arguments:
+//    life          - The live set that is being computed.
+//    keepAliveVars - The currents set of variables to keep alive
+//                    regardless of their actual lifetime.
+//    lclVarNode    - The node that corresponds to the local var def or
+//                    use. Only differs from `node` when targeting the
+//                    legacy backend.
+//    node          - The actual tree node being processed.
+//
+// Returns:
+//    `true` if the local var node corresponds to a dead store; `false`
+//    otherwise.
+//
+bool Compiler::fgComputeLifeLocal(VARSET_TP& life, VARSET_TP& keepAliveVars, GenTree* lclVarNode, GenTree* node)
+{
+    unsigned lclNum = lclVarNode->gtLclVarCommon.gtLclNum;
+
+    noway_assert(lclNum < lvaCount);
+    LclVarDsc* varDsc = &lvaTable[lclNum];
+
+    unsigned  varIndex;
+    VARSET_TP varBit;
+
+    // Is this a tracked variable?
+    if (varDsc->lvTracked)
+    {
+        varIndex = varDsc->lvVarIndex;
+        noway_assert(varIndex < lvaTrackedCount);
+
+        /* Is this a definition or use? */
+
+        if (lclVarNode->gtFlags & GTF_VAR_DEF)
+        {
+            /*
+                The variable is being defined here. The variable
+                should be marked dead from here until its closest
+                previous use.
+
+                IMPORTANT OBSERVATION:
+
+                    For GTF_VAR_USEASG (i.e. x <op>= a) we cannot
+                    consider it a "pure" definition because it would
+                    kill x (which would be wrong because x is
+                    "used" in such a construct) -> see below the case when x is live
+             */
+
+            if (VarSetOps::IsMember(this, life, varIndex))
+            {
+                /* The variable is live */
+
+                if ((lclVarNode->gtFlags & GTF_VAR_USEASG) == 0)
+                {
+                    /* Mark variable as dead from here to its closest use */
+
+                    if (!VarSetOps::IsMember(this, keepAliveVars, varIndex))
+                    {
+                        VarSetOps::RemoveElemD(this, life, varIndex);
+                    }
+#ifdef DEBUG
+                    if (verbose && 0)
+                    {
+                        printf("Def V%02u,T%02u at ", lclNum, varIndex);
+                        printTreeID(lclVarNode);
+                        printf(" life %s -> %s\n",
+                               VarSetOps::ToString(this, VarSetOps::Union(this, life,
+                                                                          VarSetOps::MakeSingleton(this, varIndex))),
+                               VarSetOps::ToString(this, life));
+                    }
+#endif // DEBUG
+                }
+            }
+            else
+            {
+                /* Dead assignment to the variable */
+                lclVarNode->gtFlags |= GTF_VAR_DEATH;
+
+                if (!opts.MinOpts())
+                {
+                    // keepAliveVars always stay alive
+                    noway_assert(!VarSetOps::IsMember(this, keepAliveVars, varIndex));
+
+                    /* This is a dead store unless the variable is marked
+                       GTF_VAR_USEASG and we are in an interior statement
+                       that will be used (e.g. while (i++) or a GT_COMMA) */
+
+                    // Do not consider this store dead if the target local variable represents
+                    // a promoted struct field of an address exposed local or if the address
+                    // of the variable has been exposed. Improved alias analysis could allow
+                    // stores to these sorts of variables to be removed at the cost of compile
+                    // time.
+                    return !varDsc->lvAddrExposed &&
+                           !(varDsc->lvIsStructField && lvaTable[varDsc->lvParentLcl].lvAddrExposed);
+                }
+            }
+
+            return false;
+        }
+        else // it is a use
+        {
+            // Is the variable already known to be alive?
+            if (VarSetOps::IsMember(this, life, varIndex))
+            {
+                // Since we may do liveness analysis multiple times, clear the GTF_VAR_DEATH if set.
+                lclVarNode->gtFlags &= ~GTF_VAR_DEATH;
+                return false;
+            }
+
+#ifdef DEBUG
+            if (verbose && 0)
+            {
+                printf("Ref V%02u,T%02u] at ", lclNum, varIndex);
+                printTreeID(node);
+                printf(" life %s -> %s\n", VarSetOps::ToString(this, life),
+                       VarSetOps::ToString(this, VarSetOps::Union(this, life, varBit)));
+            }
+#endif // DEBUG
+
+            // The variable is being used, and it is not currently live.
+            // So the variable is just coming to life
+            lclVarNode->gtFlags |= GTF_VAR_DEATH;
+            VarSetOps::AddElemD(this, life, varIndex);
+
+            // Record interference with other live variables
+            fgMarkIntf(life, VarSetOps::MakeSingleton(this, varIndex));
+        }
+    }
+    // Note that promoted implies not tracked (i.e. only the fields are tracked).
+    else if (varTypeIsStruct(varDsc->lvType))
+    {
+        noway_assert(!varDsc->lvTracked);
+
+        lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
+
+        if (promotionType != PROMOTION_TYPE_NONE)
+        {
+            VarSetOps::AssignNoCopy(this, varBit, VarSetOps::MakeEmpty(this));
+
+            for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
+            {
+#if !defined(_TARGET_64BIT_) && !defined(LEGACY_BACKEND)
+                if (!varTypeIsLong(lvaTable[i].lvType) || !lvaTable[i].lvPromoted)
+#endif // !defined(_TARGET_64BIT_) && !defined(LEGACY_BACKEND)
+                {
+                    noway_assert(lvaTable[i].lvIsStructField);
+                }
+                if (lvaTable[i].lvTracked)
+                {
+                    varIndex = lvaTable[i].lvVarIndex;
+                    noway_assert(varIndex < lvaTrackedCount);
+                    VarSetOps::AddElemD(this, varBit, varIndex);
+                }
+            }
+            if (node->gtFlags & GTF_VAR_DEF)
+            {
+                VarSetOps::DiffD(this, varBit, keepAliveVars);
+                VarSetOps::DiffD(this, life, varBit);
+                return false;
+            }
+            // This is a use.
+
+            // Are the variables already known to be alive?
+            if (VarSetOps::IsSubset(this, varBit, life))
+            {
+                node->gtFlags &= ~GTF_VAR_DEATH; // Since we may now call this multiple times, reset if live.
+                return false;
+            }
+
+            // Some variables are being used, and they are not currently live.
+            // So they are just coming to life, in the backwards traversal; in a forwards
+            // traversal, one or more are dying.  Mark this.
+
+            node->gtFlags |= GTF_VAR_DEATH;
+
+            // Are all the variables becoming alive (in the backwards traversal), or just a subset?
+            if (!VarSetOps::IsEmptyIntersection(this, varBit, life))
+            {
+                // Only a subset of the variables are become live; we must record that subset.
+                // (Lack of an entry for "lclVarNode" will be considered to imply all become dead in the
+                // forward traversal.)
+                VARSET_TP* deadVarSet = new (this, CMK_bitset) VARSET_TP;
+                VarSetOps::AssignNoCopy(this, *deadVarSet, VarSetOps::Diff(this, varBit, life));
+                GetPromotedStructDeathVars()->Set(lclVarNode, deadVarSet);
+            }
+
+            // In any case, all the field vars are now live (in the backwards traversal).
+            VarSetOps::UnionD(this, life, varBit);
+
+            // Record interference with other live variables
+            fgMarkIntf(life, varBit);
+        }
+    }
+
+    return false;
+}
+
+/*****************************************************************************
+ *
+ * Compute the set of live variables at each node in a given statement
+ * or subtree of a statement moving backward from startNode to endNode
+ */
+
+#ifndef LEGACY_BACKEND
+VARSET_VALRET_TP Compiler::fgComputeLife(VARSET_VALARG_TP lifeArg,
+                                         GenTreePtr       startNode,
+                                         GenTreePtr       endNode,
+                                         VARSET_VALARG_TP volatileVars,
+                                         bool* pStmtInfoDirty DEBUGARG(bool* treeModf))
+{
+    GenTreePtr tree;
+    unsigned   lclNum;
+
+    VARSET_TP VARSET_INIT(this, life, lifeArg); // lifeArg is const ref; copy to allow modification.
+
+    VARSET_TP VARSET_INIT(this, keepAliveVars, volatileVars);
+#ifdef DEBUGGING_SUPPORT
+    VarSetOps::UnionD(this, keepAliveVars, compCurBB->bbScope); // Don't kill vars in scope
+#endif
+
+    noway_assert(VarSetOps::Equal(this, VarSetOps::Intersection(this, keepAliveVars, life), keepAliveVars));
+    noway_assert(compCurStmt->gtOper == GT_STMT);
+    noway_assert(endNode || (startNode == compCurStmt->gtStmt.gtStmtExpr));
+
+    // NOTE: Live variable analysis will not work if you try
+    // to use the result of an assignment node directly!
+    for (tree = startNode; tree != endNode; tree = tree->gtPrev)
+    {
+    AGAIN:
+        assert(tree->OperGet() != GT_QMARK);
+
+        if (tree->gtOper == GT_CALL)
+        {
+            fgComputeLifeCall(life, tree->AsCall());
+        }
+        else if (tree->OperIsNonPhiLocal() || tree->OperIsLocalAddr())
+        {
+            bool isDeadStore = fgComputeLifeLocal(life, keepAliveVars, tree, tree);
+            if (isDeadStore)
+            {
+                LclVarDsc* varDsc = &lvaTable[tree->gtLclVarCommon.gtLclNum];
+
+                bool doAgain = false;
+                if (fgRemoveDeadStore(&tree, varDsc, life, &doAgain, pStmtInfoDirty DEBUGARG(treeModf)))
+                {
+                    assert(!doAgain);
+                    break;
+                }
+
+                if (doAgain)
+                {
+                    goto AGAIN;
+                }
+            }
+        }
+    }
+
+    // Return the set of live variables out of this statement
+    return life;
+}
+
+VARSET_VALRET_TP Compiler::fgComputeLifeLIR(VARSET_VALARG_TP lifeArg, BasicBlock* block, VARSET_VALARG_TP volatileVars)
+{
+    VARSET_TP VARSET_INIT(this, life, lifeArg); // lifeArg is const ref; copy to allow modification.
+
+    VARSET_TP VARSET_INIT(this, keepAliveVars, volatileVars);
+#ifdef DEBUGGING_SUPPORT
+    VarSetOps::UnionD(this, keepAliveVars, block->bbScope); // Don't kill vars in scope
+#endif
+
+    noway_assert(VarSetOps::Equal(this, VarSetOps::Intersection(this, keepAliveVars, life), keepAliveVars));
+
+    LIR::Range& blockRange      = LIR::AsRange(block);
+    GenTree*    firstNonPhiNode = blockRange.FirstNonPhiNode();
+    if (firstNonPhiNode == nullptr)
+    {
+        return life;
+    }
+
+    for (GenTree *node = blockRange.LastNode(), *next = nullptr, *end = firstNonPhiNode->gtPrev; node != end;
+         node = next)
+    {
+        next = node->gtPrev;
+
+        if (node->OperGet() == GT_CALL)
+        {
+            fgComputeLifeCall(life, node->AsCall());
+        }
+        else if (node->OperIsNonPhiLocal() || node->OperIsLocalAddr())
+        {
+            bool isDeadStore = fgComputeLifeLocal(life, keepAliveVars, node, node);
+            if (isDeadStore)
+            {
+                fgTryRemoveDeadLIRStore(blockRange, node, &next);
+            }
+        }
+    }
+
+    return life;
+}
+
+#else // LEGACY_BACKEND
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+
+VARSET_VALRET_TP Compiler::fgComputeLife(VARSET_VALARG_TP lifeArg,
+                                         GenTreePtr       startNode,
+                                         GenTreePtr       endNode,
+                                         VARSET_VALARG_TP volatileVars,
+                                         bool* pStmtInfoDirty DEBUGARG(bool* treeModf))
+{
+    GenTreePtr tree;
+    unsigned   lclNum;
+
+    GenTreePtr gtQMark       = NULL; // current GT_QMARK node (walking the trees backwards)
+    GenTreePtr nextColonExit = 0;    // gtQMark->gtOp.gtOp2 while walking the 'else' branch.
+                                     // gtQMark->gtOp.gtOp1 while walking the 'then' branch
+
+    VARSET_TP VARSET_INIT(this, life, lifeArg); // lifeArg is const ref; copy to allow modification.
+
+    // TBD: This used to be an initialization to VARSET_NOT_ACCEPTABLE.  Try to figure out what's going on here.
+    VARSET_TP  VARSET_INIT_NOCOPY(entryLiveSet, VarSetOps::MakeFull(this));   // liveness when we see gtQMark
+    VARSET_TP  VARSET_INIT_NOCOPY(gtColonLiveSet, VarSetOps::MakeFull(this)); // liveness when we see gtColon
+    GenTreePtr gtColon = NULL;
+
+    VARSET_TP VARSET_INIT(this, keepAliveVars, volatileVars);
+#ifdef DEBUGGING_SUPPORT
+    VarSetOps::UnionD(this, keepAliveVars, compCurBB->bbScope); /* Dont kill vars in scope */
+#endif
+    noway_assert(VarSetOps::Equal(this, VarSetOps::Intersection(this, keepAliveVars, life), keepAliveVars));
+    noway_assert(compCurStmt->gtOper == GT_STMT);
+    noway_assert(endNode || (startNode == compCurStmt->gtStmt.gtStmtExpr));
+
+    /* NOTE: Live variable analysis will not work if you try
+     * to use the result of an assignment node directly */
+
+    for (tree = startNode; tree != endNode; tree = tree->gtPrev)
+    {
+    AGAIN:
+        /* For ?: nodes if we're done with the then branch, remember
+         * the liveness */
+        if (gtQMark && (tree == gtColon))
+        {
+            VarSetOps::Assign(this, gtColonLiveSet, life);
+            VarSetOps::Assign(this, gtQMark->gtQmark.gtThenLiveSet, gtColonLiveSet);
+        }
+
+        /* For ?: nodes if we're done with the else branch
+         * then set the correct life as the union of the two branches */
+
+        if (gtQMark && (tree == gtQMark->gtOp.gtOp1))
+        {
+            noway_assert(tree->gtFlags & GTF_RELOP_QMARK);
+            noway_assert(gtQMark->gtOp.gtOp2->gtOper == GT_COLON);
+
+            GenTreePtr thenNode = gtColon->AsColon()->ThenNode();
+            GenTreePtr elseNode = gtColon->AsColon()->ElseNode();
+
+            noway_assert(thenNode && elseNode);
+
+            VarSetOps::Assign(this, gtQMark->gtQmark.gtElseLiveSet, life);
+
+            /* Check if we optimized away the ?: */
+
+            if (elseNode->IsNothingNode())
+            {
+                if (thenNode->IsNothingNode())
+                {
+                    /* This can only happen for VOID ?: */
+                    noway_assert(gtColon->gtType == TYP_VOID);
+
+#ifdef DEBUG
+                    if (verbose)
+                    {
+                        printf("BB%02u - Removing dead QMark - Colon ...\n", compCurBB->bbNum);
+                        gtDispTree(gtQMark);
+                        printf("\n");
+                    }
+#endif // DEBUG
+
+                    /* Remove the '?:' - keep the side effects in the condition */
+
+                    noway_assert(tree->OperKind() & GTK_RELOP);
+
+                    /* Change the node to a NOP */
+
+                    gtQMark->gtBashToNOP();
+#ifdef DEBUG
+                    *treeModf = true;
+#endif // DEBUG
+
+                    /* Extract and keep the side effects */
+
+                    if (tree->gtFlags & GTF_SIDE_EFFECT)
+                    {
+                        GenTreePtr sideEffList = NULL;
+
+                        gtExtractSideEffList(tree, &sideEffList);
+
+                        if (sideEffList)
+                        {
+                            noway_assert(sideEffList->gtFlags & GTF_SIDE_EFFECT);
+#ifdef DEBUG
+                            if (verbose)
+                            {
+                                printf("Extracted side effects list from condition...\n");
+                                gtDispTree(sideEffList);
+                                printf("\n");
+                            }
+#endif // DEBUG
+                            fgUpdateRefCntForExtract(tree, sideEffList);
+
+                            /* The NOP node becomes a GT_COMMA holding the side effect list */
+
+                            gtQMark->ChangeOper(GT_COMMA);
+                            gtQMark->gtFlags |= sideEffList->gtFlags & GTF_ALL_EFFECT;
+
+                            if (sideEffList->gtOper == GT_COMMA)
+                            {
+                                gtQMark->gtOp.gtOp1 = sideEffList->gtOp.gtOp1;
+                                gtQMark->gtOp.gtOp2 = sideEffList->gtOp.gtOp2;
+                            }
+                            else
+                            {
+                                gtQMark->gtOp.gtOp1 = sideEffList;
+                                gtQMark->gtOp.gtOp2 = gtNewNothingNode();
+                            }
+                        }
+                        else
+                        {
+#ifdef DEBUG
+                            if (verbose)
+                            {
+                                printf("\nRemoving tree ");
+                                printTreeID(tree);
+                                printf(" in BB%02u as useless\n", compCurBB->bbNum);
+                                gtDispTree(tree);
+                                printf("\n");
+                            }
+#endif // DEBUG
+                            fgUpdateRefCntForExtract(tree, NULL);
+                        }
+                    }
+
+                    /* If top node without side effects remove it */
+
+                    if ((gtQMark == compCurStmt->gtStmt.gtStmtExpr) && gtQMark->IsNothingNode())
+                    {
+                        fgRemoveStmt(compCurBB, compCurStmt);
+                        break;
+                    }
+
+                    /* Re-link the nodes for this statement */
+
+                    fgSetStmtSeq(compCurStmt);
+
+                    /* Continue analysis from this node */
+
+                    tree = gtQMark;
+
+                    /* As the 'then' and 'else' branches are emtpy, liveness
+                       should not have changed */
+
+                    noway_assert(VarSetOps::Equal(this, life, entryLiveSet));
+                    goto SKIP_QMARK;
+                }
+                else
+                {
+                    // The 'else' branch is empty and the 'then' branch is non-empty
+                    // so swap the two branches and reverse the condition.  If one is
+                    // non-empty, we want it to be the 'else'
+
+                    GenTreePtr tmp = thenNode;
+
+                    gtColon->AsColon()->ThenNode() = thenNode = elseNode;
+                    gtColon->AsColon()->ElseNode() = elseNode = tmp;
+                    noway_assert(tree == gtQMark->gtOp.gtOp1);
+                    gtReverseCond(tree);
+
+                    // Remember to also swap the live sets of the two branches.
+                    VARSET_TP VARSET_INIT_NOCOPY(tmpVS, gtQMark->gtQmark.gtElseLiveSet);
+                    VarSetOps::AssignNoCopy(this, gtQMark->gtQmark.gtElseLiveSet, gtQMark->gtQmark.gtThenLiveSet);
+                    VarSetOps::AssignNoCopy(this, gtQMark->gtQmark.gtThenLiveSet, tmpVS);
+
+                    /* Re-link the nodes for this statement */
+
+                    fgSetStmtSeq(compCurStmt);
+                }
+            }
+
+            /* Variables in the two branches that are live at the split
+             * must interfere with each other */
+
+            fgMarkIntf(life, gtColonLiveSet);
+
+            /* The live set at the split is the union of the two branches */
+
+            VarSetOps::UnionD(this, life, gtColonLiveSet);
+
+        SKIP_QMARK:
+
+            /* We are out of the parallel branches, the rest is sequential */
+
+            gtQMark = NULL;
+        }
+
+        if (tree->gtOper == GT_CALL)
+        {
+            fgComputeLifeCall(life, tree->AsCall());
+            continue;
+        }
+
+        // Is this a use/def of a local variable?
+        // Generally, the last use information is associated with the lclVar node.
+        // However, for LEGACY_BACKEND, the information must be associated
+        // with the OBJ itself for promoted structs.
+        // In that case, the LDOBJ may be require an implementation that might itself allocate registers,
+        // so the variable(s) should stay live until the end of the LDOBJ.
+        // Note that for promoted structs lvTracked is false.
+
+        GenTreePtr lclVarTree = nullptr;
+        if (tree->gtOper == GT_OBJ)
+        {
+            // fgIsIndirOfAddrOfLocal returns nullptr if the tree is
+            // not an indir(addr(local)), in which case we will set lclVarTree
+            // back to the original tree, and not handle it as a use/def.
+            lclVarTree = fgIsIndirOfAddrOfLocal(tree);
+            if ((lclVarTree != nullptr) && lvaTable[lclVarTree->gtLclVarCommon.gtLclNum].lvTracked)
+            {
+                lclVarTree = nullptr;
+            }
+        }
+        if (lclVarTree == nullptr)
+        {
+            lclVarTree = tree;
+        }
+
+        if (lclVarTree->OperIsNonPhiLocal() || lclVarTree->OperIsLocalAddr())
+        {
+            bool isDeadStore = fgComputeLifeLocal(life, keepAliveVars, lclVarTree, tree);
+            if (isDeadStore)
+            {
+                LclVarDsc* varDsc = &lvaTable[lclVarTree->gtLclVarCommon.gtLclNum];
+
+                bool doAgain = false;
+                if (fgRemoveDeadStore(&tree, varDsc, life, &doAgain, pStmtInfoDirty DEBUGARG(treeModf)))
+                {
+                    assert(!doAgain);
+                    break;
+                }
+
+                if (doAgain)
+                {
+                    goto AGAIN;
+                }
+            }
+        }
+        else
+        {
+            if (tree->gtOper == GT_QMARK && tree->gtOp.gtOp1)
+            {
+                /* Special cases - "? :" operators.
+
+                   The trees are threaded as shown below with nodes 1 to 11 linked
+                   by gtNext. Both GT_<cond>->gtLiveSet and GT_COLON->gtLiveSet are
+                   the union of the liveness on entry to thenTree and elseTree.
+
+                                  +--------------------+
+                                  |      GT_QMARK    11|
+                                  +----------+---------+
+                                             |
+                                             *
+                                            / \
+                                          /     \
+                                        /         \
+                   +---------------------+       +--------------------+
+                   |      GT_<cond>    3 |       |     GT_COLON     7 |
+                   |  w/ GTF_RELOP_QMARK |       |  w/ GTF_COLON_COND |
+                   +----------+----------+       +---------+----------+
+                              |                            |
+                              *                            *
+                             / \                          / \
+                           /     \                      /     \
+                         /         \                  /         \
+                        2           1          thenTree 6       elseTree 10
+                                   x               |                |
+                                  /                *                *
+      +----------------+        /                 / \              / \
+      |prevExpr->gtNext+------/                 /     \          /     \
+      +----------------+                      /         \      /         \
+                                             5           4    9           8
+
+                 */
+
+                noway_assert(tree->gtOp.gtOp1->OperKind() & GTK_RELOP);
+                noway_assert(tree->gtOp.gtOp1->gtFlags & GTF_RELOP_QMARK);
+                noway_assert(tree->gtOp.gtOp2->gtOper == GT_COLON);
+
+                if (gtQMark)
+                {
+                    /* This is a nested QMARK sequence - we need to use recursion.
+                     * Compute the liveness for each node of the COLON branches
+                     * The new computation starts from the GT_QMARK node and ends
+                     * when the COLON branch of the enclosing QMARK ends */
+
+                    noway_assert(nextColonExit &&
+                                 (nextColonExit == gtQMark->gtOp.gtOp1 || nextColonExit == gtQMark->gtOp.gtOp2));
+
+                    VarSetOps::AssignNoCopy(this, life, fgComputeLife(life, tree, nextColonExit, volatileVars,
+                                                                      pStmtInfoDirty DEBUGARG(treeModf)));
+
+                    /* Continue with exit node (the last node in the enclosing colon branch) */
+
+                    tree = nextColonExit;
+                    goto AGAIN;
+                }
+                else
+                {
+                    gtQMark = tree;
+                    VarSetOps::Assign(this, entryLiveSet, life);
+                    gtColon       = gtQMark->gtOp.gtOp2;
+                    nextColonExit = gtColon;
+                }
+            }
+
+            /* If found the GT_COLON, start the new branch with the original life */
+
+            if (gtQMark && tree == gtQMark->gtOp.gtOp2)
+            {
+                /* The node better be a COLON. */
+                noway_assert(tree->gtOper == GT_COLON);
+
+                VarSetOps::Assign(this, life, entryLiveSet);
+                nextColonExit = gtQMark->gtOp.gtOp1;
+            }
+        }
+    }
+
+    /* Return the set of live variables out of this statement */
+
+    return life;
+}
+
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+#endif // !LEGACY_BACKEND
+
+bool Compiler::fgTryRemoveDeadLIRStore(LIR::Range& blockRange, GenTree* node, GenTree** next)
+{
+    assert(node != nullptr);
+    assert(next != nullptr);
+
+    assert(node->OperIsLocalStore() || node->OperIsLocalAddr());
+
+    GenTree* store = nullptr;
+    GenTree* value = nullptr;
+    if (node->OperIsLocalStore())
+    {
+        store = node;
+        value = store->gtGetOp1();
+    }
+    else if (node->OperIsLocalAddr())
+    {
+        LIR::Use addrUse;
+        if (!blockRange.TryGetUse(node, &addrUse) || (addrUse.User()->OperGet() != GT_STOREIND))
+        {
+            *next = node->gtPrev;
+            return false;
+        }
+
+        store = addrUse.User();
+        value = store->gtGetOp2();
+    }
+
+    bool               isClosed      = false;
+    unsigned           sideEffects   = 0;
+    LIR::ReadOnlyRange operandsRange = blockRange.GetRangeOfOperandTrees(store, &isClosed, &sideEffects);
+    if (!isClosed || ((sideEffects & GTF_SIDE_EFFECT) != 0) ||
+        (((sideEffects & GTF_ORDER_SIDEEFF) != 0) && (value->OperGet() == GT_CATCH_ARG)))
+    {
+        // If the range of the operands contains unrelated code or if it contains any side effects,
+        // do not remove it. Instead, just remove the store.
+
+        *next = node->gtPrev;
+    }
+    else
+    {
+        // Okay, the operands to the store form a contiguous range that has no side effects. Remove the
+        // range containing the operands and decrement the local var ref counts appropriately.
+
+        // Compute the next node to process. Note that we must be careful not to set the next node to
+        // process to a node that we are about to remove.
+        if (node->OperIsLocalStore())
+        {
+            assert(node == store);
+            *next = (operandsRange.LastNode()->gtNext == store) ? operandsRange.FirstNode()->gtPrev : node->gtPrev;
+        }
+        else
+        {
+            assert(operandsRange.Contains(node));
+            *next = operandsRange.FirstNode()->gtPrev;
+        }
+
+        blockRange.Delete(this, compCurBB, std::move(operandsRange));
+    }
+
+    // If the store is marked as a late argument, it is referenced by a call. Instead of removing it,
+    // bash it to a NOP.
+    if ((store->gtFlags & GTF_LATE_ARG) != 0)
+    {
+        if (store->IsLocal())
+        {
+            lvaDecRefCnts(compCurBB, store);
+        }
+
+        store->gtBashToNOP();
+    }
+    else
+    {
+        blockRange.Delete(this, compCurBB, store);
+    }
+
+    return true;
+}
+
+// fgRemoveDeadStore - remove a store to a local which has no exposed uses.
+//
+//   pTree          - GenTree** to local, including store-form local or local addr (post-rationalize)
+//   varDsc         - var that is being stored to
+//   life           - current live tracked vars (maintained as we walk backwards)
+//   doAgain        - out parameter, true if we should restart the statement
+//   pStmtInfoDirty - should defer the cost computation to the point after the reverse walk is completed?
+//
+// Returns: true if we should skip the rest of the statement, false if we should continue
+
+bool Compiler::fgRemoveDeadStore(
+    GenTree** pTree, LclVarDsc* varDsc, VARSET_TP life, bool* doAgain, bool* pStmtInfoDirty DEBUGARG(bool* treeModf))
+{
+    assert(!compRationalIRForm);
+
+    // Vars should have already been checked for address exposure by this point.
+    assert(!varDsc->lvIsStructField || !lvaTable[varDsc->lvParentLcl].lvAddrExposed);
+    assert(!varDsc->lvAddrExposed);
+
+    GenTree*       asgNode  = nullptr;
+    GenTree*       rhsNode  = nullptr;
+    GenTree*       addrNode = nullptr;
+    GenTree* const tree     = *pTree;
+
+    GenTree* nextNode = tree->gtNext;
+
+    // First, characterize the lclVarTree and see if we are taking its address.
+    if (tree->OperIsLocalStore())
+    {
+        rhsNode = tree->gtOp.gtOp1;
+        asgNode = tree;
+    }
+    else if (tree->OperIsLocal())
+    {
+        if (nextNode == nullptr)
+        {
+            return false;
+        }
+        if (nextNode->OperGet() == GT_ADDR)
+        {
+            addrNode = nextNode;
+            nextNode = nextNode->gtNext;
+        }
+    }
+    else
+    {
+        assert(tree->OperIsLocalAddr());
+        addrNode = tree;
+    }
+
+    // Next, find the assignment.
+    if (asgNode == nullptr)
+    {
+        if (addrNode == nullptr)
+        {
+            asgNode = nextNode;
+        }
+        else if (asgNode == nullptr)
+        {
+            // This may be followed by GT_IND/assign or GT_STOREIND.
+            if (nextNode == nullptr)
+            {
+                return false;
+            }
+            if (nextNode->OperIsIndir())
+            {
+                // This must be a non-nullcheck form of indir, or it would not be a def.
+                assert(nextNode->OperGet() != GT_NULLCHECK);
+                if (nextNode->OperIsStore())
+                {
+                    asgNode = nextNode;
+                    if (asgNode->OperIsBlk())
+                    {
+                        rhsNode = asgNode->AsBlk()->Data();
+                    }
+                    // TODO-1stClassStructs: There should be an else clause here to handle
+                    // the non-block forms of store ops (GT_STORE_LCL_VAR, etc.) for which
+                    // rhsNode is op1. (This isn't really a 1stClassStructs item, but the
+                    // above was added to catch what used to be dead block ops, and that
+                    // made this omission apparent.)
+                }
+                else
+                {
+                    asgNode = nextNode->gtNext;
+                }
+            }
+        }
+    }
+
+    if (asgNode == nullptr)
+    {
+        return false;
+    }
+
+    if (asgNode->OperIsAssignment())
+    {
+        rhsNode = asgNode->gtGetOp2();
+    }
+    else if (rhsNode == nullptr)
+    {
+        return false;
+    }
+
+    if (asgNode && (asgNode->gtFlags & GTF_ASG))
+    {
+        noway_assert(rhsNode);
+        noway_assert(tree->gtFlags & GTF_VAR_DEF);
+
+        if (asgNode->gtOper != GT_ASG && asgNode->gtOverflowEx())
+        {
+            // asgNode may be <op_ovf>= (with GTF_OVERFLOW). In that case, we need to keep the <op_ovf>
+
+            // Dead <OpOvf>= assignment. We change it to the right operation (taking out the assignment),
+            // update the flags, update order of statement, as we have changed the order of the operation
+            // and we start computing life again from the op_ovf node (we go backwards). Note that we
+            // don't need to update ref counts because we don't change them, we're only changing the
+            // operation.
+            CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("\nChanging dead <asgop> ovf to <op> ovf...\n");
+            }
+#endif // DEBUG
+
+            switch (asgNode->gtOper)
+            {
+                case GT_ASG_ADD:
+                    asgNode->gtOper = GT_ADD;
+                    break;
+                case GT_ASG_SUB:
+                    asgNode->gtOper = GT_SUB;
+                    break;
+                default:
+                    // Only add and sub allowed, we don't have ASG_MUL and ASG_DIV for ints, and
+                    // floats don't allow OVF forms.
+                    noway_assert(!"Unexpected ASG_OP");
+            }
+
+            asgNode->gtFlags &= ~GTF_REVERSE_OPS;
+            if (!((asgNode->gtOp.gtOp1->gtFlags | rhsNode->gtFlags) & GTF_ASG))
+            {
+                asgNode->gtFlags &= ~GTF_ASG;
+            }
+            asgNode->gtOp.gtOp1->gtFlags &= ~(GTF_VAR_DEF | GTF_VAR_USEASG);
+
+#ifdef DEBUG
+            *treeModf = true;
+#endif // DEBUG
+
+            // Make sure no previous cousin subtree rooted at a common ancestor has
+            // asked to defer the recomputation of costs.
+            if (!*pStmtInfoDirty)
+            {
+                /* Update ordering, costs, FP levels, etc. */
+                gtSetStmtInfo(compCurStmt);
+
+                /* Re-link the nodes for this statement */
+                fgSetStmtSeq(compCurStmt);
+
+                // Start from the old assign node, as we have changed the order of its operands.
+                // No need to update liveness, as nothing has changed (the target of the asgNode
+                // either goes dead here, in which case the whole expression is now dead, or it
+                // was already live).
+
+                // TODO-Throughput: Redo this so that the graph is modified BEFORE traversing it!
+                // We can determine this case when we first see the asgNode
+
+                *pTree = asgNode;
+
+                *doAgain = true;
+            }
+            return false;
+        }
+
+        // Do not remove if this local variable represents
+        // a promoted struct field of an address exposed local.
+        if (varDsc->lvIsStructField && lvaTable[varDsc->lvParentLcl].lvAddrExposed)
+        {
+            return false;
+        }
+
+        // Do not remove if the address of the variable has been exposed.
+        if (varDsc->lvAddrExposed)
+        {
+            return false;
+        }
+
+        /* Test for interior statement */
+
+        if (asgNode->gtNext == nullptr)
+        {
+            /* This is a "NORMAL" statement with the
+             * assignment node hanging from the GT_STMT node */
+
+            noway_assert(compCurStmt->gtStmt.gtStmtExpr == asgNode);
+            JITDUMP("top level assign\n");
+
+            /* Check for side effects */
+
+            if (rhsNode->gtFlags & GTF_SIDE_EFFECT)
+            {
+            EXTRACT_SIDE_EFFECTS:
+                /* Extract the side effects */
+
+                GenTreePtr sideEffList = nullptr;
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("BB%02u - Dead assignment has side effects...\n", compCurBB->bbNum);
+                    gtDispTree(asgNode);
+                    printf("\n");
+                }
+#endif // DEBUG
+                if (rhsNode->TypeGet() == TYP_STRUCT)
+                {
+                    // This is a block assignment. An indirection of the rhs is not considered to
+                    // happen until the assignment, so we will extract the side effects from only
+                    // the address.
+                    if (rhsNode->OperIsIndir())
+                    {
+                        assert(rhsNode->OperGet() != GT_NULLCHECK);
+                        rhsNode = rhsNode->AsIndir()->Addr();
+                    }
+                }
+                gtExtractSideEffList(rhsNode, &sideEffList);
+
+                if (sideEffList)
+                {
+                    noway_assert(sideEffList->gtFlags & GTF_SIDE_EFFECT);
+#ifdef DEBUG
+                    if (verbose)
+                    {
+                        printf("Extracted side effects list...\n");
+                        gtDispTree(sideEffList);
+                        printf("\n");
+                    }
+#endif // DEBUG
+                    fgUpdateRefCntForExtract(asgNode, sideEffList);
+
+                    /* Replace the assignment statement with the list of side effects */
+                    noway_assert(sideEffList->gtOper != GT_STMT);
+
+                    *pTree = compCurStmt->gtStmt.gtStmtExpr = sideEffList;
+#ifdef DEBUG
+                    *treeModf = true;
+#endif // DEBUG
+                    /* Update ordering, costs, FP levels, etc. */
+                    gtSetStmtInfo(compCurStmt);
+
+                    /* Re-link the nodes for this statement */
+                    fgSetStmtSeq(compCurStmt);
+
+                    // Since the whole statement gets replaced it is safe to
+                    // re-thread and update order. No need to compute costs again.
+                    *pStmtInfoDirty = false;
+
+                    /* Compute the live set for the new statement */
+                    *doAgain = true;
+                    return false;
+                }
+                else
+                {
+                    /* No side effects, most likely we forgot to reset some flags */
+                    fgRemoveStmt(compCurBB, compCurStmt);
+
+                    return true;
+                }
+            }
+            else
+            {
+                /* If this is GT_CATCH_ARG saved to a local var don't bother */
+
+                JITDUMP("removing stmt with no side effects\n");
+
+                if (asgNode->gtFlags & GTF_ORDER_SIDEEFF)
+                {
+                    if (rhsNode->gtOper == GT_CATCH_ARG)
+                    {
+                        goto EXTRACT_SIDE_EFFECTS;
+                    }
+                }
+
+                /* No side effects - remove the whole statement from the block->bbTreeList */
+
+                fgRemoveStmt(compCurBB, compCurStmt);
+
+                /* Since we removed it do not process the rest (i.e. RHS) of the statement
+                 * variables in the RHS will not be marked as live, so we get the benefit of
+                 * propagating dead variables up the chain */
+
+                return true;
+            }
+        }
+        else
+        {
+            /* This is an INTERIOR STATEMENT with a dead assignment - remove it */
+
+            noway_assert(!VarSetOps::IsMember(this, life, varDsc->lvVarIndex));
+
+            if (rhsNode->gtFlags & GTF_SIDE_EFFECT)
+            {
+                /* :-( we have side effects */
+
+                GenTreePtr sideEffList = nullptr;
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("BB%02u - INTERIOR dead assignment has side effects...\n", compCurBB->bbNum);
+                    gtDispTree(asgNode);
+                    printf("\n");
+                }
+#endif // DEBUG
+                gtExtractSideEffList(rhsNode, &sideEffList);
+
+                if (!sideEffList)
+                {
+                    goto NO_SIDE_EFFECTS;
+                }
+
+                noway_assert(sideEffList->gtFlags & GTF_SIDE_EFFECT);
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("Extracted side effects list from condition...\n");
+                    gtDispTree(sideEffList);
+                    printf("\n");
+                }
+#endif // DEBUG
+                if (sideEffList->gtOper == asgNode->gtOper)
+                {
+                    fgUpdateRefCntForExtract(asgNode, sideEffList);
+#ifdef DEBUG
+                    *treeModf = true;
+#endif // DEBUG
+                    asgNode->gtOp.gtOp1 = sideEffList->gtOp.gtOp1;
+                    asgNode->gtOp.gtOp2 = sideEffList->gtOp.gtOp2;
+                    asgNode->gtType     = sideEffList->gtType;
+                }
+                else
+                {
+                    fgUpdateRefCntForExtract(asgNode, sideEffList);
+#ifdef DEBUG
+                    *treeModf = true;
+#endif // DEBUG
+                    /* Change the node to a GT_COMMA holding the side effect list */
+                    asgNode->gtBashToNOP();
+
+                    asgNode->ChangeOper(GT_COMMA);
+                    asgNode->gtFlags |= sideEffList->gtFlags & GTF_ALL_EFFECT;
+
+                    if (sideEffList->gtOper == GT_COMMA)
+                    {
+                        asgNode->gtOp.gtOp1 = sideEffList->gtOp.gtOp1;
+                        asgNode->gtOp.gtOp2 = sideEffList->gtOp.gtOp2;
+                    }
+                    else
+                    {
+                        asgNode->gtOp.gtOp1 = sideEffList;
+                        asgNode->gtOp.gtOp2 = gtNewNothingNode();
+                    }
+                }
+            }
+            else
+            {
+            NO_SIDE_EFFECTS:
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("\nRemoving tree ");
+                    printTreeID(asgNode);
+                    printf(" in BB%02u as useless\n", compCurBB->bbNum);
+                    gtDispTree(asgNode);
+                    printf("\n");
+                }
+#endif // DEBUG
+                /* No side effects - Remove the interior statement */
+                fgUpdateRefCntForExtract(asgNode, nullptr);
+
+                /* Change the assignment to a GT_NOP node */
+
+                asgNode->gtBashToNOP();
+
+#ifdef DEBUG
+                *treeModf = true;
+#endif // DEBUG
+            }
+
+            /* Re-link the nodes for this statement - Do not update ordering! */
+
+            // Do not update costs by calling gtSetStmtInfo. fgSetStmtSeq modifies
+            // the tree threading based on the new costs. Removing nodes could
+            // cause a subtree to get evaluated first (earlier second) during the
+            // liveness walk. Instead just set a flag that costs are dirty and
+            // caller has to call gtSetStmtInfo.
+            *pStmtInfoDirty = true;
+
+            fgSetStmtSeq(compCurStmt);
+
+            /* Continue analysis from this node */
+
+            *pTree = asgNode;
+
+            return false;
+        }
+    }
+    return false;
+}
+
+/*****************************************************************************
+ *
+ *  Iterative data flow for live variable info and availability of range
+ *  check index expressions.
+ */
+void Compiler::fgInterBlockLocalVarLiveness()
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In fgInterBlockLocalVarLiveness()\n");
+    }
+#endif
+
+    /* This global flag is set whenever we remove a statement */
+
+    fgStmtRemoved = false;
+
+    // keep track if a bbLiveIn changed due to dead store removal
+    fgLocalVarLivenessChanged = false;
+
+    /* Compute the IN and OUT sets for tracked variables */
+
+    fgLiveVarAnalysis();
+
+//-------------------------------------------------------------------------
+
+#ifdef DEBUGGING_SUPPORT
+
+    /* For debuggable code, we mark vars as live over their entire
+     * reported scope, so that it will be visible over the entire scope
+     */
+
+    if (opts.compDbgCode && (info.compVarScopesCount > 0))
+    {
+        fgExtendDbgLifetimes();
+    }
+
+#endif // DEBUGGING_SUPPORT
+
+    /*-------------------------------------------------------------------------
+     * Variables involved in exception-handlers and finally blocks need
+     * to be specially marked
+     */
+    BasicBlock* block;
+
+    VARSET_TP VARSET_INIT_NOCOPY(exceptVars, VarSetOps::MakeEmpty(this));  // vars live on entry to a handler
+    VARSET_TP VARSET_INIT_NOCOPY(finallyVars, VarSetOps::MakeEmpty(this)); // vars live on exit of a 'finally' block
+    VARSET_TP VARSET_INIT_NOCOPY(filterVars, VarSetOps::MakeEmpty(this));  // vars live on exit from a 'filter'
+
+    for (block = fgFirstBB; block; block = block->bbNext)
+    {
+        if (block->bbCatchTyp != BBCT_NONE)
+        {
+            /* Note the set of variables live on entry to exception handler */
+
+            VarSetOps::UnionD(this, exceptVars, block->bbLiveIn);
+        }
+
+        if (block->bbJumpKind == BBJ_EHFILTERRET)
+        {
+            /* Get the set of live variables on exit from a 'filter' */
+            VarSetOps::UnionD(this, filterVars, block->bbLiveOut);
+        }
+        else if (block->bbJumpKind == BBJ_EHFINALLYRET)
+        {
+            /* Get the set of live variables on exit from a 'finally' block */
+
+            VarSetOps::UnionD(this, finallyVars, block->bbLiveOut);
+        }
+#if FEATURE_EH_FUNCLETS
+        // Funclets are called and returned from, as such we can only count on the frame
+        // pointer being restored, and thus everything live in or live out must be on the
+        // stack
+        if (block->bbFlags & BBF_FUNCLET_BEG)
+        {
+            VarSetOps::UnionD(this, exceptVars, block->bbLiveIn);
+        }
+        if ((block->bbJumpKind == BBJ_EHFINALLYRET) || (block->bbJumpKind == BBJ_EHFILTERRET) ||
+            (block->bbJumpKind == BBJ_EHCATCHRET))
+        {
+            VarSetOps::UnionD(this, exceptVars, block->bbLiveOut);
+        }
+#endif // FEATURE_EH_FUNCLETS
+    }
+
+    LclVarDsc* varDsc;
+    unsigned   varNum;
+
+    for (varNum = 0, varDsc = lvaTable; varNum < lvaCount; varNum++, varDsc++)
+    {
+        /* Ignore the variable if it's not tracked */
+
+        if (!varDsc->lvTracked)
+        {
+            continue;
+        }
+
+        if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+        {
+            continue;
+        }
+
+        /* Un-init locals may need auto-initialization. Note that the
+           liveness of such locals will bubble to the top (fgFirstBB)
+           in fgInterBlockLocalVarLiveness() */
+
+        if (!varDsc->lvIsParam && VarSetOps::IsMember(this, fgFirstBB->bbLiveIn, varDsc->lvVarIndex) &&
+            (info.compInitMem || varTypeIsGC(varDsc->TypeGet())))
+        {
+            varDsc->lvMustInit = true;
+        }
+
+        // Mark all variables that are live on entry to an exception handler
+        // or on exit from a filter handler or finally as DoNotEnregister */
+
+        if (VarSetOps::IsMember(this, exceptVars, varDsc->lvVarIndex) ||
+            VarSetOps::IsMember(this, filterVars, varDsc->lvVarIndex))
+        {
+            /* Mark the variable appropriately */
+            lvaSetVarDoNotEnregister(varNum DEBUGARG(DNER_LiveInOutOfHandler));
+        }
+
+        /* Mark all pointer variables live on exit from a 'finally'
+           block as either volatile for non-GC ref types or as
+           'explicitly initialized' (volatile and must-init) for GC-ref types */
+
+        if (VarSetOps::IsMember(this, finallyVars, varDsc->lvVarIndex))
+        {
+            lvaSetVarDoNotEnregister(varNum DEBUGARG(DNER_LiveInOutOfHandler));
+
+            /* Don't set lvMustInit unless we have a non-arg, GC pointer */
+
+            if (varDsc->lvIsParam)
+            {
+                continue;
+            }
+
+            if (!varTypeIsGC(varDsc->TypeGet()))
+            {
+                continue;
+            }
+
+            /* Mark it */
+            varDsc->lvMustInit = true;
+        }
+    }
+
+    /*-------------------------------------------------------------------------
+     * Now fill in liveness info within each basic block - Backward DataFlow
+     */
+
+    // This is used in the liveness computation, as a temporary.
+    VarSetOps::AssignNoCopy(this, fgMarkIntfUnionVS, VarSetOps::MakeEmpty(this));
+
+    for (block = fgFirstBB; block; block = block->bbNext)
+    {
+        /* Tell everyone what block we're working on */
+
+        compCurBB = block;
+
+        /* Remember those vars live on entry to exception handlers */
+        /* if we are part of a try block */
+
+        VARSET_TP VARSET_INIT_NOCOPY(volatileVars, VarSetOps::MakeEmpty(this));
+
+        if (ehBlockHasExnFlowDsc(block))
+        {
+            VarSetOps::Assign(this, volatileVars, fgGetHandlerLiveVars(block));
+
+            // volatileVars is a subset of exceptVars
+            noway_assert(VarSetOps::IsSubset(this, volatileVars, exceptVars));
+        }
+
+        /* Start with the variables live on exit from the block */
+
+        VARSET_TP VARSET_INIT(this, life, block->bbLiveOut);
+
+        /* Mark any interference we might have at the end of the block */
+
+        fgMarkIntf(life);
+
+        if (!block->IsLIR())
+        {
+            /* Get the first statement in the block */
+
+            GenTreePtr firstStmt = block->FirstNonPhiDef();
+
+            if (!firstStmt)
+            {
+                continue;
+            }
+
+            /* Walk all the statements of the block backwards - Get the LAST stmt */
+
+            GenTreePtr nextStmt = block->bbTreeList->gtPrev;
+
+            do
+            {
+#ifdef DEBUG
+                bool treeModf = false;
+#endif // DEBUG
+                noway_assert(nextStmt);
+                noway_assert(nextStmt->gtOper == GT_STMT);
+
+                compCurStmt = nextStmt;
+                nextStmt    = nextStmt->gtPrev;
+
+                /* Compute the liveness for each tree node in the statement */
+                bool stmtInfoDirty = false;
+
+                VarSetOps::AssignNoCopy(this, life, fgComputeLife(life, compCurStmt->gtStmt.gtStmtExpr, nullptr,
+                                                                  volatileVars, &stmtInfoDirty DEBUGARG(&treeModf)));
+
+                if (stmtInfoDirty)
+                {
+                    gtSetStmtInfo(compCurStmt);
+                    fgSetStmtSeq(compCurStmt);
+                }
+
+#ifdef DEBUG
+                if (verbose && treeModf)
+                {
+                    printf("\nfgComputeLife modified tree:\n");
+                    gtDispTree(compCurStmt->gtStmt.gtStmtExpr);
+                    printf("\n");
+                }
+#endif // DEBUG
+            } while (compCurStmt != firstStmt);
+        }
+        else
+        {
+#ifdef LEGACY_BACKEND
+            unreached();
+#else  // !LEGACY_BACKEND
+            VarSetOps::AssignNoCopy(this, life, fgComputeLifeLIR(life, block, volatileVars));
+#endif // !LEGACY_BACKEND
+        }
+
+        /* Done with the current block - if we removed any statements, some
+         * variables may have become dead at the beginning of the block
+         * -> have to update bbLiveIn */
+
+        if (!VarSetOps::Equal(this, life, block->bbLiveIn))
+        {
+            /* some variables have become dead all across the block
+               So life should be a subset of block->bbLiveIn */
+
+            // We changed the liveIn of the block, which may affect liveOut of others,
+            // which may expose more dead stores.
+            fgLocalVarLivenessChanged = true;
+
+            noway_assert(VarSetOps::Equal(this, VarSetOps::Intersection(this, life, block->bbLiveIn), life));
+
+            /* set the new bbLiveIn */
+
+            VarSetOps::Assign(this, block->bbLiveIn, life);
+
+            /* compute the new bbLiveOut for all the predecessors of this block */
+        }
+
+        noway_assert(compCurBB == block);
+#ifdef DEBUG
+        compCurBB = nullptr;
+#endif
+    }
+
+    fgLocalVarLivenessDone = true;
+}
+
+#ifdef DEBUG
+
+/*****************************************************************************/
+
+void Compiler::fgDispBBLiveness(BasicBlock* block)
+{
+    VARSET_TP VARSET_INIT_NOCOPY(allVars, VarSetOps::Union(this, block->bbLiveIn, block->bbLiveOut));
+    printf("BB%02u", block->bbNum);
+    printf(" IN (%d)=", VarSetOps::Count(this, block->bbLiveIn));
+    lvaDispVarSet(block->bbLiveIn, allVars);
+    if (block->bbHeapLiveIn)
+    {
+        printf(" + HEAP");
+    }
+    printf("\n     OUT(%d)=", VarSetOps::Count(this, block->bbLiveOut));
+    lvaDispVarSet(block->bbLiveOut, allVars);
+    if (block->bbHeapLiveOut)
+    {
+        printf(" + HEAP");
+    }
+    printf("\n\n");
+}
+
+void Compiler::fgDispBBLiveness()
+{
+    for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+    {
+        fgDispBBLiveness(block);
+    }
+}
+
+#endif // DEBUG
diff --git a/src/jit/loopcloning.cpp b/src/jit/loopcloning.cpp
new file mode 100644
index 0000000000..8ce015e607
--- /dev/null
+++ b/src/jit/loopcloning.cpp
@@ -0,0 +1,845 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                            LoopCloning                                    XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+
+//--------------------------------------------------------------------------------------------------
+// ToGenTree - Convert an arrLen operation into a gentree node.
+//
+// Arguments:
+//      comp    Compiler instance to allocate trees
+//
+// Return Values:
+//      Returns the gen tree representation for arrLen or MD Array node as defined by
+//      the "type" member
+//
+// Notes:
+//      This tree produces GT_INDEX node, the caller is supposed to morph it appropriately
+//      so it can be codegen'ed.
+//
+GenTreePtr LC_Array::ToGenTree(Compiler* comp)
+{
+    // If jagged array
+    if (type == Jagged)
+    {
+        // Create a a[i][j][k].length type node.
+        GenTreePtr arr  = comp->gtNewLclvNode(arrIndex->arrLcl, comp->lvaTable[arrIndex->arrLcl].lvType);
+        int        rank = GetDimRank();
+        for (int i = 0; i < rank; ++i)
+        {
+            arr = comp->gtNewIndexRef(TYP_REF, arr, comp->gtNewLclvNode(arrIndex->indLcls[i],
+                                                                        comp->lvaTable[arrIndex->indLcls[i]].lvType));
+        }
+        // If asked for arrlen invoke arr length operator.
+        if (oper == ArrLen)
+        {
+            GenTreePtr arrLen = new (comp, GT_ARR_LENGTH) GenTreeArrLen(TYP_INT, arr, offsetof(CORINFO_Array, length));
+            return arrLen;
+        }
+        else
+        {
+            assert(oper == None);
+            return arr;
+        }
+    }
+    else
+    {
+        // TODO-CQ: Optimize for MD Array.
+        assert(!"Optimize for MD Array");
+    }
+    return nullptr;
+}
+
+//--------------------------------------------------------------------------------------------------
+// ToGenTree - Convert an "identifier" into a gentree node.
+//
+// Arguments:
+//      comp    Compiler instance to allocate trees
+//
+// Return Values:
+//      Returns the gen tree representation for either a constant or a variable or an arrLen operation
+//      defined by the "type" member
+//
+GenTreePtr LC_Ident::ToGenTree(Compiler* comp)
+{
+    // Convert to GenTree nodes.
+    switch (type)
+    {
+        case Const:
+#ifdef _TARGET_64BIT_
+            return comp->gtNewLconNode(constant);
+#else
+            return comp->gtNewIconNode((ssize_t)constant);
+#endif
+        case Var:
+            return comp->gtNewLclvNode((unsigned)constant, comp->lvaTable[constant].lvType);
+        case ArrLen:
+            return arrLen.ToGenTree(comp);
+        case Null:
+            return comp->gtNewIconNode(0, TYP_REF);
+        default:
+            assert(!"Could not convert LC_Ident to GenTree");
+            unreached();
+            break;
+    }
+}
+
+//--------------------------------------------------------------------------------------------------
+// ToGenTree - Convert an "expression" into a gentree node.
+//
+// Arguments:
+//      comp    Compiler instance to allocate trees
+//
+// Return Values:
+//      Returns the gen tree representation for either a constant or a variable or an arrLen operation
+//      defined by the "type" member
+//
+GenTreePtr LC_Expr::ToGenTree(Compiler* comp)
+{
+    // Convert to GenTree nodes.
+    switch (type)
+    {
+        case Ident:
+            return ident.ToGenTree(comp);
+        case IdentPlusConst:
+#ifdef _TARGET_64BIT_
+            return comp->gtNewOperNode(GT_ADD, TYP_LONG, ident.ToGenTree(comp), comp->gtNewLconNode(constant));
+#else
+            return comp->gtNewOperNode(GT_ADD, TYP_INT, ident.ToGenTree(comp), comp->gtNewIconNode((ssize_t)constant));
+#endif
+        default:
+            assert(!"Could not convert LC_Expr to GenTree");
+            unreached();
+            break;
+    }
+}
+
+//--------------------------------------------------------------------------------------------------
+// ToGenTree - Convert a "condition" into a gentree node.
+//
+// Arguments:
+//      comp    Compiler instance to allocate trees
+//
+// Return Values:
+//      Returns the gen tree representation for the conditional operator on lhs and rhs trees
+//
+GenTreePtr LC_Condition::ToGenTree(Compiler* comp)
+{
+    return comp->gtNewOperNode(oper, TYP_INT, op1.ToGenTree(comp), op2.ToGenTree(comp));
+}
+
+//--------------------------------------------------------------------------------------------------
+// Evaluates - Evaluate a given loop cloning condition if it can be statically evaluated.
+//
+// Arguments:
+//      pResult     The evaluation result
+//
+// Return Values:
+//      Returns true if the condition can be statically evaluated. If the condition's result
+//      is statically unknown then return false. In other words, true if "pResult" is valid.
+//
+bool LC_Condition::Evaluates(bool* pResult)
+{
+    switch (oper)
+    {
+        case GT_EQ:
+        case GT_GE:
+        case GT_LE:
+            // If op1 == op2 then equality should result in true.
+            if (op1 == op2)
+            {
+                *pResult = true;
+                return true;
+            }
+            break;
+
+        case GT_GT:
+        case GT_LT:
+        case GT_NE:
+            // If op1 == op2 then inequality should result in false.
+            if (op1 == op2)
+            {
+                *pResult = false;
+                return true;
+            }
+            break;
+
+        default:
+            // for all other 'oper' kinds, we will return false
+            break;
+    }
+    return false;
+}
+
+//--------------------------------------------------------------------------------------------------
+// Combines - Check whether two conditions would combine to yield a single new condition.
+//
+// Arguments:
+//      cond        The condition that is checked if it would combine with "*this" condition.
+//      newCond     The resulting combined condition.
+//
+// Return Values:
+//      Returns true if "cond" combines with the "this" condition.
+//      "newCond" contains the combines condition.
+//
+// Operation:
+//      Check if both conditions are equal. If so, return just 1 of them.
+//      Reverse their operators and check if their reversed operands match. If so, return either of them.
+//
+// Notes:
+//      This is not a full-fledged expression optimizer, it is supposed
+//      to remove redundant conditions that are generated for optimization
+//      opportunities. Anything further should be implemented as needed.
+//      For example, for (i = beg; i < end; i += inc) a[i]. Then, the conditions
+//      would be: "beg >= 0, end <= a.len, inc > 0"
+bool LC_Condition::Combines(const LC_Condition& cond, LC_Condition* newCond)
+{
+    if (oper == cond.oper && op1 == cond.op1 && op2 == cond.op2)
+    {
+        *newCond = *this;
+        return true;
+    }
+    else if ((oper == GT_LT || oper == GT_LE || oper == GT_GT || oper == GT_GE) &&
+             GenTree::ReverseRelop(oper) == cond.oper && op1 == cond.op2 && op2 == cond.op1)
+    {
+        *newCond = *this;
+        return true;
+    }
+    return false;
+}
+
+//--------------------------------------------------------------------------------------------------
+// GetLoopOptInfo - Retrieve the loop opt info candidate array.
+//
+// Arguments:
+//      loopNum     the loop index.
+//
+// Return Values:
+//      Return the optInfo array member. The method doesn't allocate memory.
+//
+ExpandArrayStack<LcOptInfo*>* LoopCloneContext::GetLoopOptInfo(unsigned loopNum)
+{
+    return optInfo[loopNum];
+}
+
+//--------------------------------------------------------------------------------------------------
+// CancelLoopOptInfo - Cancel loop cloning optimization for this loop.
+//
+// Arguments:
+//      loopNum     the loop index.
+//
+// Return Values:
+//      None.
+//
+void LoopCloneContext::CancelLoopOptInfo(unsigned loopNum)
+{
+    JITDUMP("Cancelling loop cloning for loop L_%02u\n", loopNum);
+    optInfo[loopNum] = nullptr;
+    if (conditions[loopNum] != nullptr)
+    {
+        conditions[loopNum]->Reset();
+        conditions[loopNum] = nullptr;
+    }
+}
+
+//--------------------------------------------------------------------------------------------------
+// EnsureLoopOptInfo - Retrieve the loop opt info candidate array, if it is not present, allocate
+//      memory.
+//
+// Arguments:
+//      loopNum     the loop index.
+//
+// Return Values:
+//      The array of optimization candidates for the loop.
+//
+ExpandArrayStack<LcOptInfo*>* LoopCloneContext::EnsureLoopOptInfo(unsigned loopNum)
+{
+    if (optInfo[loopNum] == nullptr)
+    {
+        optInfo[loopNum] = new (alloc) ExpandArrayStack<LcOptInfo*>(alloc, 4);
+    }
+    return optInfo[loopNum];
+}
+
+//--------------------------------------------------------------------------------------------------
+// EnsureLoopOptInfo - Retrieve the loop cloning conditions candidate array,
+//      if it is not present, allocate memory.
+//
+// Arguments:
+//      loopNum     the loop index.
+//
+// Return Values:
+//      The array of cloning conditions for the loop.
+//
+ExpandArrayStack<LC_Condition>* LoopCloneContext::EnsureConditions(unsigned loopNum)
+{
+    if (conditions[loopNum] == nullptr)
+    {
+        conditions[loopNum] = new (alloc) ExpandArrayStack<LC_Condition>(alloc, 4);
+    }
+    return conditions[loopNum];
+}
+
+//--------------------------------------------------------------------------------------------------
+// GetConditions - Get the cloning conditions array for the loop, no allocation.
+//
+// Arguments:
+//      loopNum     the loop index.
+//
+// Return Values:
+//      The array of cloning conditions for the loop.
+//
+ExpandArrayStack<LC_Condition>* LoopCloneContext::GetConditions(unsigned loopNum)
+{
+    return conditions[loopNum];
+}
+
+//--------------------------------------------------------------------------------------------------
+// EnsureDerefs - Ensure an array of dereferences is created if it doesn't exist.
+//
+// Arguments:
+//      loopNum     the loop index.
+//
+// Return Values:
+//      The array of dereferences for the loop.
+//
+ExpandArrayStack<LC_Array>* LoopCloneContext::EnsureDerefs(unsigned loopNum)
+{
+    if (derefs[loopNum] == nullptr)
+    {
+        derefs[loopNum] = new (alloc) ExpandArrayStack<LC_Array>(alloc, 4);
+    }
+    return derefs[loopNum];
+}
+
+//--------------------------------------------------------------------------------------------------
+// HasBlockConditions - Check if there are block level conditions for the loop.
+//
+// Arguments:
+//      loopNum     the loop index.
+//
+// Return Values:
+//      Return true if there are any block level conditions.
+//
+bool LoopCloneContext::HasBlockConditions(unsigned loopNum)
+{
+    ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = blockConditions[loopNum];
+    if (levelCond == nullptr)
+    {
+        return false;
+    }
+
+    // Walk through each block to check if any of them has conditions.
+    for (unsigned i = 0; i < levelCond->Size(); ++i)
+    {
+        if ((*levelCond)[i]->Size() > 0)
+        {
+            return true;
+        }
+    }
+    return false;
+}
+
+//--------------------------------------------------------------------------------------------------
+// GetBlockConditions - Return block level conditions for the loop.
+//
+// Arguments:
+//      loopNum     the loop index.
+//
+// Return Values:
+//      Return block conditions.
+//
+ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* LoopCloneContext::GetBlockConditions(unsigned loopNum)
+{
+    assert(HasBlockConditions(loopNum));
+    return blockConditions[loopNum];
+}
+
+//--------------------------------------------------------------------------------------------------
+// EnsureBlockConditions - Allocate block level conditions for the loop if not exists.
+//
+// Arguments:
+//      loopNum     the loop index.
+//      condBlocks  the number of block-level conditions for each loop, corresponding to the blocks
+//                  created.
+//
+// Return Values:
+//      Return block conditions.
+//
+ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* LoopCloneContext::EnsureBlockConditions(unsigned loopNum,
+                                                                                           unsigned condBlocks)
+{
+    if (blockConditions[loopNum] == nullptr)
+    {
+        blockConditions[loopNum] = new (alloc) ExpandArrayStack<ExpandArrayStack<LC_Condition>*>(alloc, condBlocks);
+    }
+    ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = blockConditions[loopNum];
+    for (unsigned i = 0; i < condBlocks; ++i)
+    {
+        levelCond->Set(i, new (alloc) ExpandArrayStack<LC_Condition>(alloc));
+    }
+    return levelCond;
+}
+
+#ifdef DEBUG
+void LoopCloneContext::PrintBlockConditions(unsigned loopNum)
+{
+    ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = blockConditions[loopNum];
+    if (levelCond == nullptr || levelCond->Size() == 0)
+    {
+        JITDUMP("No block conditions\n");
+        return;
+    }
+
+    for (unsigned i = 0; i < levelCond->Size(); ++i)
+    {
+        JITDUMP("%d = {", i);
+        for (unsigned j = 0; j < ((*levelCond)[i])->Size(); ++j)
+        {
+            if (j != 0)
+            {
+                JITDUMP(" & ");
+            }
+            (*((*levelCond)[i]))[j].Print();
+        }
+        JITDUMP("}\n");
+    }
+}
+#endif
+
+//--------------------------------------------------------------------------------------------------
+// EvaluateConditions - Evaluate the loop cloning conditions statically, if it can be evaluated.
+//
+// Arguments:
+//      loopNum     the loop index.
+//      pAllTrue    all the cloning conditions evaluated to "true" statically.
+//      pAnyFalse   some cloning condition evaluated to "false" statically.
+//      verbose     verbose logging required.
+//
+// Return Values:
+//      None.
+//
+// Operation:
+//      For example, a condition like "V02 >= V02" statically evaluates to true. Caller should detect such
+//      conditions and remove them from the "conditions" array.
+//
+//      Similarly, conditions like "V02 > V02" will evaluate to "false". In this case caller has to abort
+//      loop cloning optimization for the loop. Note that the assumption for conditions is that they will
+//      all be "AND"ed, so statically we know we will never take the fast path.
+//
+//      Sometimes we simply can't say statically whether "V02 > V01.length" is true or false.
+//      In that case, the "pAllTrue" will be false because this condition doesn't evaluate to "true" and
+//      "pAnyFalse" could be false if no other condition statically evaluates to "false".
+void LoopCloneContext::EvaluateConditions(unsigned loopNum, bool* pAllTrue, bool* pAnyFalse DEBUGARG(bool verbose))
+{
+    bool allTrue  = true;
+    bool anyFalse = false;
+
+    ExpandArrayStack<LC_Condition>& conds = *conditions[loopNum];
+
+    JITDUMP("Evaluating %d loop cloning conditions for loop %d\n", conds.Size(), loopNum);
+
+    assert(conds.Size() > 0);
+    for (unsigned i = 0; i < conds.Size(); ++i)
+    {
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("Considering condition %d: (", i);
+            conds[i].Print();
+        }
+#endif
+
+        bool res = false;
+        // Check if this condition evaluates to true or false.
+        if (conds[i].Evaluates(&res))
+        {
+            JITDUMP(") evaluates to %d\n", res);
+            if (!res)
+            {
+                anyFalse = true;
+                return;
+            }
+        }
+        else
+        {
+            JITDUMP("), could not be evaluated\n");
+            allTrue = false;
+        }
+    }
+
+    JITDUMP("Evaluation result allTrue = %d, anyFalse = %d\n", allTrue, anyFalse);
+    *pAllTrue  = allTrue;
+    *pAnyFalse = anyFalse;
+}
+
+//--------------------------------------------------------------------------------------------------
+// OptimizeConditions - Evaluate the loop cloning conditions statically, if they can be evaluated
+//      then optimize the "conditions" array accordingly.
+//
+// Arguments:
+//      conds   The conditions array to optimize.
+//
+// Return Values:
+//      None.
+//
+// Operation:
+//      For example, a condition like "V02 >= V02" statically evaluates to true. Remove such conditions
+//      from the "conditions" array.
+//
+//      Similarly, conditions like "V02 > V02" will evaluate to "false". In this case abort loop cloning
+//      optimization for the loop.
+//
+//      Sometimes, two conditions will combine together to yield a single condition, then remove a
+//      duplicate condition.
+void LoopCloneContext::OptimizeConditions(ExpandArrayStack<LC_Condition>& conds)
+{
+    for (unsigned i = 0; i < conds.Size(); ++i)
+    {
+        // Check if the conditions evaluate.
+        bool result = false;
+        if (conds[i].Evaluates(&result))
+        {
+            // If statically known to be true, then remove this condition.
+            if (result)
+            {
+                conds.Remove(i);
+                --i;
+                continue;
+            }
+            else
+            {
+                // Some condition is statically false, then simply indicate
+                // not to clone this loop.
+                CancelLoopOptInfo(i);
+                break;
+            }
+        }
+
+        // Check for all other conditions[j], if it would combine with
+        // conditions[i].
+        for (unsigned j = i + 1; j < conds.Size(); ++j)
+        {
+            LC_Condition newCond;
+            if (conds[i].Combines(conds[j], &newCond))
+            {
+                conds.Remove(j);
+                conds[i] = newCond;
+                i        = -1;
+                break;
+            }
+        }
+    }
+#ifdef DEBUG
+    // Make sure we didn't miss some combining.
+    for (unsigned i = 0; i < conds.Size(); ++i)
+    {
+        for (unsigned j = 0; j < conds.Size(); ++j)
+        {
+            LC_Condition newCond;
+            if ((i != j) && conds[i].Combines(conds[j], &newCond))
+            {
+                assert(!"Loop cloning conditions can still be optimized further.");
+            }
+        }
+    }
+#endif
+}
+
+//--------------------------------------------------------------------------------------------------
+// OptimizeBlockConditions - Optimize block level conditions.
+//
+// Arguments:
+//      loopNum     the loop index.
+//
+// Operation:
+//       Calls OptimizeConditions helper on block level conditions.
+//
+// Return Values:
+//      None.
+//
+void LoopCloneContext::OptimizeBlockConditions(unsigned loopNum DEBUGARG(bool verbose))
+{
+    if (!HasBlockConditions(loopNum))
+    {
+        return;
+    }
+    ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = blockConditions[loopNum];
+    for (unsigned i = 0; i < levelCond->Size(); ++i)
+    {
+        OptimizeConditions(*((*levelCond)[i]));
+    }
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("After optimizing block-level cloning conditions\n\t");
+        PrintConditions(loopNum);
+        printf("\n");
+    }
+#endif
+}
+
+//--------------------------------------------------------------------------------------------------
+// OptimizeConditions - Optimize cloning conditions.
+//
+// Arguments:
+//      loopNum     the loop index.
+//      verbose     verbose logging required.
+//
+// Operation:
+//       Calls OptimizeConditions helper on cloning conditions.
+//
+// Return Values:
+//      None.
+//
+void LoopCloneContext::OptimizeConditions(unsigned loopNum DEBUGARG(bool verbose))
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("Before optimizing cloning conditions\n\t");
+        PrintConditions(loopNum);
+        printf("\n");
+    }
+#endif
+    ExpandArrayStack<LC_Condition>& conds = *conditions[loopNum];
+    OptimizeConditions(conds);
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("After optimizing cloning conditions\n\t");
+        PrintConditions(loopNum);
+        printf("\n");
+    }
+#endif
+}
+
+#ifdef DEBUG
+//--------------------------------------------------------------------------------------------------
+// PrintConditions - Print loop cloning conditions necessary to clone the loop.
+//
+// Arguments:
+//      loopNum     the loop index.
+//
+// Return Values:
+//      None.
+//
+void LoopCloneContext::PrintConditions(unsigned loopNum)
+{
+    if (conditions[loopNum] == nullptr)
+    {
+        JITDUMP("NO conditions");
+        return;
+    }
+    if (conditions[loopNum]->Size() == 0)
+    {
+        JITDUMP("Conditions were optimized away! Will always take cloned path.");
+    }
+    for (unsigned i = 0; i < conditions[loopNum]->Size(); ++i)
+    {
+        if (i != 0)
+        {
+            JITDUMP(" & ");
+        }
+        (*conditions[loopNum])[i].Print();
+    }
+}
+#endif
+
+//--------------------------------------------------------------------------------------------------
+// CondToStmtInBlock - Convert an array of conditions. Evaluate them into a JTRUE stmt and add it to
+//      the block
+//
+// Arguments:
+//      comp    Compiler instance
+//      conds   Array of conditions to evaluate into a JTRUE stmt
+//      block   Block to insert the stmt into
+//      reverse Reverse conditions if true.
+//
+// Note:
+//      The condition that will be generated: jmpTrue(cond1 & cond2 ... == 0)
+//
+// Return Values:
+//      None.
+//
+void LoopCloneContext::CondToStmtInBlock(Compiler*                       comp,
+                                         ExpandArrayStack<LC_Condition>& conds,
+                                         BasicBlock*                     block,
+                                         bool                            reverse)
+{
+    noway_assert(conds.Size() > 0);
+
+    // Get the first condition.
+    GenTreePtr cond = conds[0].ToGenTree(comp);
+    for (unsigned i = 1; i < conds.Size(); ++i)
+    {
+        // Append all conditions using AND operator.
+        cond = comp->gtNewOperNode(GT_AND, TYP_INT, cond, conds[i].ToGenTree(comp));
+    }
+
+    // Add "cond == 0" node
+    cond = comp->gtNewOperNode(reverse ? GT_NE : GT_EQ, TYP_INT, cond, comp->gtNewIconNode(0));
+
+    // Add jmpTrue "cond == 0" to slow path.
+    GenTreePtr stmt = comp->fgNewStmtFromTree(comp->gtNewOperNode(GT_JTRUE, TYP_VOID, cond));
+
+    // Add stmt to the block.
+    comp->fgInsertStmtAtEnd(block, stmt);
+
+    // Remorph.
+    comp->fgMorphBlockStmt(block, stmt DEBUGARG("Loop cloning condition"));
+}
+
+//--------------------------------------------------------------------------------------------------
+// Lcl - the current node's local variable.
+//
+// Arguments:
+//      None.
+//
+// Operation:
+//      If level is 0, then just return the array base. Else return the index variable on dim 'level'
+//
+// Return Values:
+//      The local variable in the node's level.
+//
+unsigned LC_Deref::Lcl()
+{
+    unsigned lvl = level;
+    if (lvl == 0)
+    {
+        return array.arrIndex->arrLcl;
+    }
+    lvl--;
+    return array.arrIndex->indLcls[lvl];
+}
+
+//--------------------------------------------------------------------------------------------------
+// HasChildren - Check if there are children to 'this' node.
+//
+// Arguments:
+//      None.
+//
+// Return Values:
+//      Return true if children are present.
+//
+bool LC_Deref::HasChildren()
+{
+    return children != nullptr && children->Size() > 0;
+}
+
+//--------------------------------------------------------------------------------------------------
+// DeriveLevelConditions - Generate conditions for each level of the tree.
+//
+// Arguments:
+//      conds       An array of conditions for each level i.e., (level x conditions). This array will
+//                  contain the conditions for the tree at the end of the method.
+//
+// Operation:
+//      level0 yields only (a != null) condition. All other levels yield two conditions:
+//      (level < a[...].length && a[...][level] != null)
+//
+// Return Values:
+//      None
+//
+void LC_Deref::DeriveLevelConditions(ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* conds)
+{
+    if (level == 0)
+    {
+        // For level 0, just push (a != null).
+        (*conds)[level]->Push(
+            LC_Condition(GT_NE, LC_Expr(LC_Ident(Lcl(), LC_Ident::Var)), LC_Expr(LC_Ident(LC_Ident::Null))));
+    }
+    else
+    {
+        // Adjust for level0 having just 1 condition and push condition (i < a.len).
+        LC_Array arrLen = array;
+        arrLen.oper     = LC_Array::ArrLen;
+        arrLen.dim      = level - 1;
+        (*conds)[level * 2 - 1]->Push(
+            LC_Condition(GT_LT, LC_Expr(LC_Ident(Lcl(), LC_Ident::Var)), LC_Expr(LC_Ident(arrLen))));
+
+        // Push condition (a[i] != null)
+        LC_Array arrTmp = array;
+        arrTmp.dim      = level;
+        (*conds)[level * 2]->Push(LC_Condition(GT_NE, LC_Expr(LC_Ident(arrTmp)), LC_Expr(LC_Ident(LC_Ident::Null))));
+    }
+
+    // Invoke on the children recursively.
+    if (HasChildren())
+    {
+        for (unsigned i = 0; i < children->Size(); ++i)
+        {
+            (*children)[i]->DeriveLevelConditions(conds);
+        }
+    }
+}
+
+//--------------------------------------------------------------------------------------------------
+// EnsureChildren - Create an array of child nodes if nullptr.
+//
+// Arguments:
+//      alloc   IAllocator instance
+//
+// Return Values:
+//      None
+//
+void LC_Deref::EnsureChildren(IAllocator* alloc)
+{
+    if (children == nullptr)
+    {
+        children = new (alloc) ExpandArrayStack<LC_Deref*>(alloc);
+    }
+}
+
+//--------------------------------------------------------------------------------------------------
+// Find - Find the node representing the local variable in child nodes of the 'this' node.
+//
+// Arguments:
+//      lcl     the local to find in the children array
+//
+// Return Values:
+//      The child node if found or nullptr.
+//
+LC_Deref* LC_Deref::Find(unsigned lcl)
+{
+    return Find(children, lcl);
+}
+
+//--------------------------------------------------------------------------------------------------
+// Find - Find the node representing the local variable in a list of nodes.
+//
+// Arguments:
+//      lcl          the local to find.
+//      children     the list of nodes to find the node representing the lcl.
+//
+// Return Values:
+//      The node if found or nullptr.
+//
+
+// static
+LC_Deref* LC_Deref::Find(ExpandArrayStack<LC_Deref*>* children, unsigned lcl)
+{
+    if (children == nullptr)
+    {
+        return nullptr;
+    }
+    for (unsigned i = 0; i < children->Size(); ++i)
+    {
+        if ((*children)[i]->Lcl() == lcl)
+        {
+            return (*children)[i];
+        }
+    }
+    return nullptr;
+}
diff --git a/src/jit/loopcloning.h b/src/jit/loopcloning.h
new file mode 100644
index 0000000000..40793afcf1
--- /dev/null
+++ b/src/jit/loopcloning.h
@@ -0,0 +1,667 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                            LoopCloning                                    XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+
+    Loop cloning optimizations comprise of the following steps:
+        - Loop detection logic which is existing logic in the JIT that records
+        loop information with loop flags.
+        - The next step is to identify loop optimization candidates. This is done
+        by optObtainLoopCloningOpts. The loop context variable is updated with
+        all the necessary information (for ex: block, stmt, tree information)
+        to do the optimization later.
+            a) This involves checking if the loop is well-formed with respect to
+            the optimization being performed.
+            b) In array bounds check case, reconstructing the morphed GT_INDEX
+            nodes back to their array representation.
+                i) The array index is stored in the "context" variable with
+                additional block, tree, stmt info.
+        - Once the optimization candidates are identified, we derive cloning conditions
+          For ex: to clone a simple "for (i=0; i<n; ++i) { a[i] }" loop, we need the
+          following conditions:
+              (a != null) && ((n >= 0) & (n <= a.length) & (stride > 0))
+              a) Note the short circuit AND for (a != null). These are called block
+              conditions or deref-conditions since these conditions need to be in their
+              own blocks to be able to short-circuit.
+                 i) For a doubly nested loop on i, j, we would then have
+                 conditions like
+                 (a != null) && (i < a.len) && (a[i] != null) && (j < a[i].len)
+                 all short-circuiting creating blocks.
+
+                 Advantage:
+                    All conditions are checked before we enter the fast path. So fast
+                    path gets as fast as it can be.
+
+                 Disadvantage:
+                    Creation of blocks.
+
+                 Heuristic:
+                    Therefore we will not clone if we exceed creating 4 blocks.
+
+              b) The other conditions called cloning conditions are transformed into LC_Condition
+              structs which are then optimized.
+                 i) Optimization of conditions involves removing redundant condition checks.
+                 ii) If some conditions evaluate to true statically, then they are removed.
+                 iii) If any condition evaluates to false statically, then loop cloning is
+                 aborted for that loop.
+        - Then the block splitting occurs and loop cloning conditions is transformed into
+        GenTree and added to the loop cloning choice block.
+
+    Preconditions
+        - Loop detection should have completed and the loop table should be
+        populated with the loop dscs.
+        - The loops that will be considered are the ones with the LPFLG_ITER
+        marked on them.
+
+    Limitations
+        - For array based optimizations the loop choice condition is checked
+        before the loop body. This implies that the loop initializer statement
+        has not executed at the time of the check. So any loop cloning condition
+        involving the initial value of the loop counter cannot be condition checked
+        as it hasn't been assigned yet at the time of condition checking. Therefore
+        the initial value has to be statically known. This can be fixed with further
+        effort.
+
+    Assumption
+        - The assumption is that the optimization candidates collected during the
+        identification phase will be the ones that will be optimized. In other words,
+        the loop that is present originally will be the fast path. Explicitly, the cloned
+        path will be the slow path and will be unoptimized. This allows us to
+        collect additional information at the same time of identifying the optimization
+        candidates. This later helps us to perform the optimizations during actual cloning.
+        - All loop cloning choice conditions will automatically be "AND"-ed. These are
+        bitwise AND operations.
+        - Perform short circuit AND for (array != null) side effect check
+        before hoisting (limit <= a.length) check.
+          For ex: to clone a simple "for (i=0; i<n; ++i) { a[i] }" loop, we need the
+          following conditions:
+              (a != null) && ((n >= 0) & (n <= a.length) & (stride > 0))
+
+*/
+#pragma once
+
+class Compiler;
+
+/**
+ *
+ *  Represents an array access and associated bounds checks.
+ *  Array access is required have the array and indices in local variables.
+ *  This struct is constructed using a GT_INDEX node that is broken into
+ *  its sub trees.
+ *
+ */
+struct ArrIndex
+{
+    unsigned                   arrLcl;   // The array base local num
+    ExpandArrayStack<unsigned> indLcls;  // The indices local nums
+    ExpandArrayStack<GenTree*> bndsChks; // The bounds checks nodes along each dimension.
+    unsigned                   rank;     // Rank of the array
+    BasicBlock*                useBlock; // Block where the [] occurs
+
+    ArrIndex(IAllocator* alloc) : arrLcl(BAD_VAR_NUM), indLcls(alloc), bndsChks(alloc), rank(0), useBlock(nullptr)
+    {
+    }
+
+#ifdef DEBUG
+    void Print(unsigned dim = -1)
+    {
+        printf("V%02d", arrLcl);
+        for (unsigned i = 0; i < ((dim == -1) ? rank : dim); ++i)
+        {
+            printf("[V%02d]", indLcls.GetRef(i));
+        }
+    }
+#endif
+};
+
+// Forward declarations
+#define LC_OPT(en) struct en##OptInfo;
+#include "loopcloningopts.h"
+
+/**
+ *
+ *  LcOptInfo represents the optimization information for loop cloning,
+ *  other classes are supposed to derive from this base class.
+ *
+ *  Example usage:
+ *  LcMdArrayOptInfo is multi-dimensional array optimization for which the
+ *  loop can be cloned.
+ *  LcArrIndexOptInfo is a jagged array optimization for which the loop
+ *  can be cloned.
+ *
+ *  So LcOptInfo represents any type of optimization opportunity that
+ *  occurs in a loop and the metadata for the optimization is stored in
+ *  this class.
+ */
+struct LcOptInfo
+{
+    enum OptType
+    {
+#undef LC_OPT
+#define LC_OPT(en) en,
+#include "loopcloningopts.h"
+    };
+
+    void*   optInfo;
+    OptType optType;
+    LcOptInfo(void* optInfo, OptType optType) : optInfo(optInfo), optType(optType)
+    {
+    }
+
+    OptType GetOptType()
+    {
+        return optType;
+    }
+#undef LC_OPT
+#define LC_OPT(en)                                                                                                     \
+    en##OptInfo* As##en##OptInfo()                                                                                     \
+    {                                                                                                                  \
+        assert(optType == en);                                                                                         \
+        return reinterpret_cast<en##OptInfo*>(this);                                                                   \
+    }
+#include "loopcloningopts.h"
+};
+
+/**
+ *
+ * Optimization info for a multi-dimensional array.
+ */
+struct LcMdArrayOptInfo : public LcOptInfo
+{
+    GenTreeArrElem* arrElem; // "arrElem" node of an MD array.
+    unsigned        dim;     // "dim" represents upto what level of the rank this optimization applies to.
+                             //    For example, a[i,j,k] could be the MD array "arrElem" but if "dim" is 2,
+                             //    then this node is treated as though it were a[i,j]
+    ArrIndex* index;         // "index" cached computation in the form of an ArrIndex representation.
+
+    LcMdArrayOptInfo(GenTreeArrElem* arrElem, unsigned dim)
+        : LcOptInfo(this, LcMdArray), arrElem(arrElem), dim(dim), index(nullptr)
+    {
+    }
+
+    ArrIndex* GetArrIndexForDim(IAllocator* alloc)
+    {
+        if (index == nullptr)
+        {
+            index       = new (alloc) ArrIndex(alloc);
+            index->rank = arrElem->gtArrRank;
+            for (unsigned i = 0; i < dim; ++i)
+            {
+                index->indLcls.Push(arrElem->gtArrInds[i]->gtLclVarCommon.gtLclNum);
+            }
+            index->arrLcl = arrElem->gtArrObj->gtLclVarCommon.gtLclNum;
+        }
+        return index;
+    }
+};
+
+/**
+ *
+ * Optimization info for a jagged array.
+ */
+struct LcJaggedArrayOptInfo : public LcOptInfo
+{
+    unsigned dim;        // "dim" represents upto what level of the rank this optimization applies to.
+                         //    For example, a[i][j][k] could be the jagged array but if "dim" is 2,
+                         //    then this node is treated as though it were a[i][j]
+    ArrIndex   arrIndex; // ArrIndex representation of the array.
+    GenTreePtr stmt;     // "stmt" where the optimization opportunity occurs.
+
+    LcJaggedArrayOptInfo(ArrIndex& arrIndex, unsigned dim, GenTreePtr stmt)
+        : LcOptInfo(this, LcJaggedArray), dim(dim), arrIndex(arrIndex), stmt(stmt)
+    {
+    }
+};
+
+/**
+ *
+ * Symbolic representation of a.length, or a[i][j].length or a[i,j].length and so on.
+ * OperType decides whether "arrLength" is invoked on the array or if it is just an array.
+ */
+struct LC_Array
+{
+    enum ArrType
+    {
+        Invalid,
+        Jagged,
+        MdArray
+    };
+
+    enum OperType
+    {
+        None,
+        ArrLen,
+    };
+
+    ArrType   type;     // The type of the array on which to invoke length operator.
+    ArrIndex* arrIndex; // ArrIndex representation of this array.
+
+    OperType oper;
+
+#ifdef DEBUG
+    void Print()
+    {
+        arrIndex->Print(dim);
+        if (oper == ArrLen)
+        {
+            printf(".Length");
+        }
+    }
+#endif
+
+    int dim; // "dim" = which index to invoke arrLen on, if -1 invoke on the whole array
+             //     Example 1: a[0][1][2] and dim =  2 implies a[0][1].length
+             //     Example 2: a[0][1][2] and dim = -1 implies a[0][1][2].length
+    LC_Array() : type(Invalid), dim(-1)
+    {
+    }
+    LC_Array(ArrType type, ArrIndex* arrIndex, int dim, OperType oper)
+        : type(type), arrIndex(arrIndex), oper(oper), dim(dim)
+    {
+    }
+
+    LC_Array(ArrType type, ArrIndex* arrIndex, OperType oper) : type(type), arrIndex(arrIndex), oper(oper), dim(-1)
+    {
+    }
+
+    // Equality operator
+    bool operator==(const LC_Array& that) const
+    {
+        assert(type != Invalid && that.type != Invalid);
+
+        // Types match and the array base matches.
+        if (type != that.type || arrIndex->arrLcl != that.arrIndex->arrLcl || oper != that.oper)
+        {
+            return false;
+        }
+
+        // If the dim ranks are not matching, quit.
+        int rank1 = GetDimRank();
+        int rank2 = that.GetDimRank();
+        if (rank1 != rank2)
+        {
+            return false;
+        }
+
+        // Check for the indices.
+        for (int i = 0; i < rank1; ++i)
+        {
+            if (arrIndex->indLcls[i] != that.arrIndex->indLcls[i])
+            {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    // The max dim on which length is invoked.
+    int GetDimRank() const
+    {
+        return (dim < 0) ? (int)arrIndex->rank : dim;
+    }
+
+    // Get a tree representation for this symbolic a.length
+    GenTreePtr ToGenTree(Compiler* comp);
+};
+
+/**
+ *
+ * Symbolic representation of either a constant like 1, 2 or a variable V02, V03 etc. or an "LC_Array" or the null
+ * constant.
+ */
+struct LC_Ident
+{
+    enum IdentType
+    {
+        Invalid,
+        Const,
+        Var,
+        ArrLen,
+        Null,
+    };
+
+    INT64     constant; // The constant value if this node is of type "Const", or the lcl num if "Var"
+    LC_Array  arrLen;   // The LC_Array if the type is "ArrLen"
+    IdentType type;     // The type of this object
+
+    // Equality operator
+    bool operator==(const LC_Ident& that) const
+    {
+        switch (type)
+        {
+            case Const:
+            case Var:
+                return (type == that.type) && constant == that.constant;
+            case ArrLen:
+                return (type == that.type) && (arrLen == that.arrLen);
+            case Null:
+                return (type == that.type);
+            default:
+                assert(!"Unknown LC_Ident type");
+                unreached();
+        }
+    }
+
+#ifdef DEBUG
+    void Print()
+    {
+        switch (type)
+        {
+            case Const:
+                printf("%I64d", constant);
+                break;
+            case Var:
+                printf("V%02d", constant);
+                break;
+            case ArrLen:
+                arrLen.Print();
+                break;
+            case Null:
+                printf("null");
+                break;
+            default:
+                assert(false);
+                break;
+        }
+    }
+#endif
+
+    LC_Ident() : type(Invalid)
+    {
+    }
+    LC_Ident(INT64 constant, IdentType type) : constant(constant), type(type)
+    {
+    }
+    explicit LC_Ident(IdentType type) : type(type)
+    {
+    }
+    explicit LC_Ident(const LC_Array& arrLen) : arrLen(arrLen), type(ArrLen)
+    {
+    }
+
+    // Convert this symbolic representation into a tree node.
+    GenTreePtr ToGenTree(Compiler* comp);
+};
+
+/**
+ *
+ *  Symbolic representation of an expr that involves an "LC_Ident" or an "LC_Ident - constant"
+ */
+struct LC_Expr
+{
+    enum ExprType
+    {
+        Invalid,
+        Ident,
+        IdentPlusConst
+    };
+
+    LC_Ident ident;
+    INT64    constant;
+    ExprType type;
+
+    // Equality operator
+    bool operator==(const LC_Expr& that) const
+    {
+        assert(type != Invalid && that.type != Invalid);
+
+        // If the types don't match quit.
+        if (type != that.type)
+        {
+            return false;
+        }
+
+        // If the type involves arithmetic, the constant should match.
+        if (type == IdentPlusConst && constant != that.constant)
+        {
+            return false;
+        }
+
+        // Check if the ident match.
+        return (ident == that.ident);
+    }
+
+#ifdef DEBUG
+    void Print()
+    {
+        if (type == IdentPlusConst)
+        {
+            printf("(%I64d - ", constant);
+            ident.Print();
+            printf(")");
+        }
+        else
+        {
+            ident.Print();
+        }
+    }
+#endif
+
+    LC_Expr() : type(Invalid)
+    {
+    }
+    explicit LC_Expr(const LC_Ident& ident) : ident(ident), type(Ident)
+    {
+    }
+    LC_Expr(const LC_Ident& ident, INT64 constant) : ident(ident), constant(constant), type(IdentPlusConst)
+    {
+    }
+
+    // Convert LC_Expr into a tree node.
+    GenTreePtr ToGenTree(Compiler* comp);
+};
+
+/**
+ *
+ *  Symbolic representation of a conditional operation involving two "LC_Expr":
+ *  LC_Expr < LC_Expr, for example: i > 0, i < a.length
+ */
+struct LC_Condition
+{
+    LC_Expr    op1;
+    LC_Expr    op2;
+    genTreeOps oper;
+
+#ifdef DEBUG
+    void Print()
+    {
+        op1.Print();
+        printf(" %s ", GenTree::NodeName(oper));
+        op2.Print();
+    }
+#endif
+
+    // Check if the condition evaluates statically to true or false, i < i => false, a.length > 0 => true
+    // The result is put in "pResult" parameter and is valid if the method returns "true". Otherwise, the
+    // condition could not be evaluated.
+    bool Evaluates(bool* pResult);
+
+    // Check if two conditions can be combined to yield one condition.
+    bool Combines(const LC_Condition& cond, LC_Condition* newCond);
+
+    LC_Condition()
+    {
+    }
+    LC_Condition(genTreeOps oper, const LC_Expr& op1, const LC_Expr& op2) : op1(op1), op2(op2), oper(oper)
+    {
+    }
+
+    // Convert this conditional operation into a GenTree.
+    GenTreePtr ToGenTree(Compiler* comp);
+};
+
+/**
+ *  A deref tree of an array expression.
+ *  a[i][j][k], b[i] and a[i][y][k] are the occurrences in the loop, then, the tree would be:
+ *      a => {
+ *          i => {
+ *              j => {
+ *                  k => {}
+ *              },
+ *              y => {
+ *                  k => {}
+ *              },
+ *          }
+ *      },
+ *      b => {
+ *          i => {}
+ *      }
+ */
+struct LC_Deref
+{
+    const LC_Array               array;
+    ExpandArrayStack<LC_Deref*>* children;
+
+    unsigned level;
+
+    LC_Deref(const LC_Array& array, unsigned level) : array(array), children(nullptr), level(level)
+    {
+    }
+
+    LC_Deref* Find(unsigned lcl);
+
+    unsigned Lcl();
+
+    bool HasChildren();
+    void EnsureChildren(IAllocator* alloc);
+    static LC_Deref* Find(ExpandArrayStack<LC_Deref*>* children, unsigned lcl);
+
+    void DeriveLevelConditions(ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* len);
+#ifdef DEBUG
+    void Print(unsigned indent = 0)
+    {
+        unsigned tab = 4 * indent;
+        printf("%*s%d,%d => {", tab, "", Lcl(), level);
+        if (children != nullptr)
+        {
+            for (unsigned i = 0; i < children->Size(); ++i)
+            {
+                if (i > 0)
+                {
+                    printf(",");
+                }
+                printf("\n");
+#ifdef _MSC_VER
+                (*children)[i]->Print(indent + 1);
+#else  // _MSC_VER
+                (*((ExpandArray<LC_Deref*>*)children))[i]->Print(indent + 1);
+#endif // _MSC_VER
+            }
+        }
+        printf("\n%*s}", tab, "");
+    }
+#endif
+};
+
+/**
+ *
+ *  The "context" represents data that is used for making loop-cloning decisions.
+ *   - The data is the collection of optimization opportunities
+ *   - and the conditions (LC_Condition) that decide between the fast
+ *     path or the slow path.
+ *
+ *   BNF for LC_Condition:
+ *       LC_Condition :  LC_Expr genTreeOps LC_Expr
+ *       LC_Expr      :  LC_Ident | LC_Ident + Constant
+ *       LC_Ident     :  Constant | Var | LC_Array
+ *       LC_Array    :  .
+ *       genTreeOps   :  GT_GE | GT_LE | GT_GT | GT_LT
+ *
+ */
+struct LoopCloneContext
+{
+    IAllocator*                    alloc;        // The allocator
+    ExpandArrayStack<LcOptInfo*>** optInfo;      // The array of optimization opportunities found in each loop. (loop x
+                                                 // optimization-opportunities)
+    ExpandArrayStack<LC_Condition>** conditions; // The array of conditions that influence which path to take for each
+                                                 // loop. (loop x cloning-conditions)
+    ExpandArrayStack<LC_Array>** derefs;         // The array of dereference conditions found in each loop. (loop x
+                                                 // deref-conditions)
+    ExpandArrayStack<ExpandArrayStack<LC_Condition>*>** blockConditions; // The array of block levels of conditions for
+                                                                         // each loop. (loop x level x conditions)
+
+    LoopCloneContext(unsigned loopCount, IAllocator* alloc) : alloc(alloc)
+    {
+        optInfo         = new (alloc) ExpandArrayStack<LcOptInfo*>*[loopCount];
+        conditions      = new (alloc) ExpandArrayStack<LC_Condition>*[loopCount];
+        derefs          = new (alloc) ExpandArrayStack<LC_Array>*[loopCount];
+        blockConditions = new (alloc) ExpandArrayStack<ExpandArrayStack<LC_Condition>*>*[loopCount];
+        for (unsigned i = 0; i < loopCount; ++i)
+        {
+            optInfo[i]         = nullptr;
+            conditions[i]      = nullptr;
+            derefs[i]          = nullptr;
+            blockConditions[i] = nullptr;
+        }
+    }
+
+    // Evaluate conditions into a JTRUE stmt and put it in the block. Reverse condition if 'reverse' is true.
+    void CondToStmtInBlock(Compiler* comp, ExpandArrayStack<LC_Condition>& conds, BasicBlock* block, bool reverse);
+
+    // Get all the optimization information for loop "loopNum"; This information is held in "optInfo" array.
+    // If NULL this allocates the optInfo[loopNum] array for "loopNum"
+    ExpandArrayStack<LcOptInfo*>* EnsureLoopOptInfo(unsigned loopNum);
+
+    // Get all the optimization information for loop "loopNum"; This information is held in "optInfo" array.
+    // If NULL this does not allocate the optInfo[loopNum] array for "loopNum"
+    ExpandArrayStack<LcOptInfo*>* GetLoopOptInfo(unsigned loopNum);
+
+    // Cancel all optimizations for loop "loopNum" by clearing out the "conditions" member if non-null
+    // and setting the optInfo to "null.", If "null", then the user of this class is not supposed to
+    // clone this loop.
+    void CancelLoopOptInfo(unsigned loopNum);
+
+    // Get the conditions that decide which loop to take for "loopNum." If NULL allocate an empty array.
+    ExpandArrayStack<LC_Condition>* EnsureConditions(unsigned loopNum);
+
+    // Get the conditions for loop. No allocation is performed.
+    ExpandArrayStack<LC_Condition>* GetConditions(unsigned loopNum);
+
+    // Ensure that the "deref" conditions array is allocated.
+    ExpandArrayStack<LC_Array>* EnsureDerefs(unsigned loopNum);
+
+    // Get block conditions for each loop, no allocation is performed.
+    ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* GetBlockConditions(unsigned loopNum);
+
+    // Ensure that the block condition is present, if not allocate space.
+    ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* EnsureBlockConditions(unsigned loopNum, unsigned totalBlocks);
+
+    // Print the block conditions for the loop.
+    void PrintBlockConditions(unsigned loopNum);
+
+    // Does the loop have block conditions?
+    bool HasBlockConditions(unsigned loopNum);
+
+    // Evaluate the conditions for "loopNum" and indicate if they are either all true or any of them are false.
+    // "pAllTrue" implies all the conditions are statically known to be true.
+    // "pAnyFalse" implies at least one condition is statically known to be false.
+    // If neither of them are true, then some conditions' evaluations are statically unknown.
+    //
+    // If all conditions yield true, then the caller doesn't need to clone the loop, but it can perform
+    // fast path optimizations.
+    // If any condition yields false, then the caller needs to abort cloning the loop (neither clone nor
+    // fast path optimizations.)
+    //
+    // Assumes the conditions involve an AND join operator.
+    void EvaluateConditions(unsigned loopNum, bool* pAllTrue, bool* pAnyFalse DEBUGARG(bool verbose));
+
+private:
+    void OptimizeConditions(ExpandArrayStack<LC_Condition>& conds);
+
+public:
+    // Optimize conditions to remove redundant conditions.
+    void OptimizeConditions(unsigned loopNum DEBUGARG(bool verbose));
+
+    void OptimizeBlockConditions(unsigned loopNum DEBUGARG(bool verbose));
+
+#ifdef DEBUG
+    void PrintConditions(unsigned loopNum);
+#endif
+};
diff --git a/src/jit/loopcloningopts.h b/src/jit/loopcloningopts.h
new file mode 100644
index 0000000000..9048a41a14
--- /dev/null
+++ b/src/jit/loopcloningopts.h
@@ -0,0 +1,16 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+/*****************************************************************************/
+
+#ifndef LC_OPT
+#error Define LC_OPT before including this file.
+#endif
+
+// Types of Loop Cloning based optimizations.
+LC_OPT(LcMdArray)
+LC_OPT(LcJaggedArray)
+
+#undef LC_OPT
diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp
new file mode 100644
index 0000000000..09eb9146ac
--- /dev/null
+++ b/src/jit/lower.cpp
@@ -0,0 +1,4196 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                               Lower                                       XX
+XX                                                                           XX
+XX  Preconditions:                                                           XX
+XX                                                                           XX
+XX  Postconditions (for the nodes currently handled):                        XX
+XX    - All operands requiring a register are explicit in the graph          XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
+
+#include "lower.h"
+
+#if !defined(_TARGET_64BIT_)
+#include "decomposelongs.h"
+#endif // !defined(_TARGET_64BIT_)
+
+//------------------------------------------------------------------------
+// MakeSrcContained: Make "childNode" a contained node
+//
+// Arguments:
+//    parentNode - is a non-leaf node that can contain its 'childNode'
+//    childNode  - is an op that will now be contained by its parent.
+//
+// Notes:
+//    If 'childNode' it has any existing sources, they will now be sources for the parent.
+//
+void Lowering::MakeSrcContained(GenTreePtr parentNode, GenTreePtr childNode)
+{
+    assert(!parentNode->OperIsLeaf());
+    int srcCount = childNode->gtLsraInfo.srcCount;
+    assert(srcCount >= 0);
+    m_lsra->clearOperandCounts(childNode);
+    assert(parentNode->gtLsraInfo.srcCount > 0);
+    parentNode->gtLsraInfo.srcCount += srcCount - 1;
+}
+
+//------------------------------------------------------------------------
+// CheckImmedAndMakeContained: Checks if the 'childNode' is a containable immediate
+//    and, if so, makes it contained.
+//
+// Arguments:
+//    parentNode - is any non-leaf node
+//    childNode  - is an child op of 'parentNode'
+//
+// Return value:
+//     true if we are able to make childNode a contained immediate
+//
+bool Lowering::CheckImmedAndMakeContained(GenTree* parentNode, GenTree* childNode)
+{
+    assert(!parentNode->OperIsLeaf());
+    // If childNode is a containable immediate
+    if (IsContainableImmed(parentNode, childNode))
+    {
+        // then make it contained within the parentNode
+        MakeSrcContained(parentNode, childNode);
+        return true;
+    }
+    return false;
+}
+
+//------------------------------------------------------------------------
+// IsSafeToContainMem: Checks for conflicts between childNode and parentNode,
+// and returns 'true' iff memory operand childNode can be contained in parentNode.
+//
+// Arguments:
+//    parentNode - any non-leaf node
+//    childNode  - some node that is an input to `parentNode`
+//
+// Return value:
+//    true if it is safe to make childNode a contained memory operand.
+//
+bool Lowering::IsSafeToContainMem(GenTree* parentNode, GenTree* childNode)
+{
+    m_scratchSideEffects.Clear();
+    m_scratchSideEffects.AddNode(comp, childNode);
+
+    for (GenTree* node = childNode->gtNext; node != parentNode; node = node->gtNext)
+    {
+        if (m_scratchSideEffects.InterferesWith(comp, node, false))
+        {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+//------------------------------------------------------------------------
+
+// This is the main entry point for Lowering.
+GenTree* Lowering::LowerNode(GenTree* node)
+{
+    assert(node != nullptr);
+    switch (node->gtOper)
+    {
+        case GT_IND:
+            TryCreateAddrMode(LIR::Use(BlockRange(), &node->gtOp.gtOp1, node), true);
+            break;
+
+        case GT_STOREIND:
+            LowerStoreInd(node);
+            break;
+
+        case GT_ADD:
+            return LowerAdd(node);
+
+        case GT_UDIV:
+        case GT_UMOD:
+            LowerUnsignedDivOrMod(node);
+            break;
+
+        case GT_DIV:
+        case GT_MOD:
+            return LowerSignedDivOrMod(node);
+
+        case GT_SWITCH:
+            return LowerSwitch(node);
+
+        case GT_CALL:
+            LowerCall(node);
+            break;
+
+        case GT_JMP:
+            LowerJmpMethod(node);
+            break;
+
+        case GT_RETURN:
+            LowerRet(node);
+            break;
+
+        case GT_CAST:
+            LowerCast(node);
+            break;
+
+        case GT_ARR_ELEM:
+            return LowerArrElem(node);
+
+        case GT_ROL:
+        case GT_ROR:
+            LowerRotate(node);
+            break;
+
+        case GT_STORE_BLK:
+        case GT_STORE_OBJ:
+        case GT_STORE_DYN_BLK:
+            LowerBlockStore(node->AsBlk());
+            break;
+
+#ifdef FEATURE_SIMD
+        case GT_SIMD:
+            if (node->TypeGet() == TYP_SIMD12)
+            {
+                // GT_SIMD node requiring to produce TYP_SIMD12 in fact
+                // produces a TYP_SIMD16 result
+                node->gtType = TYP_SIMD16;
+            }
+            break;
+
+        case GT_LCL_VAR:
+        case GT_STORE_LCL_VAR:
+            if (node->TypeGet() == TYP_SIMD12)
+            {
+#ifdef _TARGET_64BIT_
+                // Assumption 1:
+                // RyuJit backend depends on the assumption that on 64-Bit targets Vector3 size is rounded off
+                // to TARGET_POINTER_SIZE and hence Vector3 locals on stack can be treated as TYP_SIMD16 for
+                // reading and writing purposes.
+                //
+                // Assumption 2:
+                // RyuJit backend is making another implicit assumption that Vector3 type args when passed in
+                // registers or on stack, the upper most 4-bytes will be zero.
+                //
+                // For P/Invoke return and Reverse P/Invoke argument passing, native compiler doesn't guarantee
+                // that upper 4-bytes of a Vector3 type struct is zero initialized and hence assumption 2 is
+                // invalid.
+                //
+                // RyuJIT x64 Windows: arguments are treated as passed by ref and hence read/written just 12
+                // bytes. In case of Vector3 returns, Caller allocates a zero initialized Vector3 local and
+                // passes it retBuf arg and Callee method writes only 12 bytes to retBuf. For this reason,
+                // there is no need to clear upper 4-bytes of Vector3 type args.
+                //
+                // RyuJIT x64 Unix: arguments are treated as passed by value and read/writen as if TYP_SIMD16.
+                // Vector3 return values are returned two return registers and Caller assembles them into a
+                // single xmm reg. Hence RyuJIT explicitly generates code to clears upper 4-bytes of Vector3
+                // type args in prolog and Vector3 type return value of a call
+                node->gtType = TYP_SIMD16;
+#else
+                NYI("Lowering of TYP_SIMD12 locals");
+#endif // _TARGET_64BIT_
+            }
+#endif // FEATURE_SIMD
+            __fallthrough;
+
+        case GT_STORE_LCL_FLD:
+            // TODO-1stClassStructs: Once we remove the requirement that all struct stores
+            // are block stores (GT_STORE_BLK or GT_STORE_OBJ), here is where we would put the local
+            // store under a block store if codegen will require it.
+            if (node->OperIsStore() && (node->TypeGet() == TYP_STRUCT) && (node->gtGetOp1()->OperGet() != GT_PHI))
+            {
+#if FEATURE_MULTIREG_RET
+                GenTree* src = node->gtGetOp1();
+                assert((src->OperGet() == GT_CALL) && src->AsCall()->HasMultiRegRetVal());
+#else // !FEATURE_MULTIREG_RET
+                assert(!"Unexpected struct local store in Lowering");
+#endif // !FEATURE_MULTIREG_RET
+            }
+            break;
+
+        default:
+            break;
+    }
+
+    return node->gtNext;
+}
+
+/**  -- Switch Lowering --
+ * The main idea of switch lowering is to keep transparency of the register requirements of this node
+ * downstream in LSRA.  Given that the switch instruction is inherently a control statement which in the JIT
+ * is represented as a simple tree node, at the time we actually generate code for it we end up
+ * generating instructions that actually modify the flow of execution that imposes complicated
+ * register requirement and lifetimes.
+ *
+ * So, for the purpose of LSRA, we want to have a more detailed specification of what a switch node actually
+ * means and more importantly, which and when do we need a register for each instruction we want to issue
+ * to correctly allocate them downstream.
+ *
+ * For this purpose, this procedure performs switch lowering in two different ways:
+ *
+ * a) Represent the switch statement as a zero-index jump table construct.  This means that for every destination
+ *    of the switch, we will store this destination in an array of addresses and the code generator will issue
+ *    a data section where this array will live and will emit code that based on the switch index, will indirect and
+ *    jump to the destination specified in the jump table.
+ *
+ *    For this transformation we introduce a new GT node called GT_SWITCH_TABLE that is a specialization of the switch
+ *    node for jump table based switches.
+ *    The overall structure of a GT_SWITCH_TABLE is:
+ *
+ *    GT_SWITCH_TABLE
+ *           |_________ localVar   (a temporary local that holds the switch index)
+ *           |_________ jumpTable  (this is a special node that holds the address of the jump table array)
+ *
+ *     Now, the way we morph a GT_SWITCH node into this lowered switch table node form is the following:
+ *
+ *    Input:     GT_SWITCH (inside a basic block whose Branch Type is BBJ_SWITCH)
+ *                    |_____ expr (an arbitrarily complex GT_NODE that represents the switch index)
+ *
+ *    This gets transformed into the following statements inside a BBJ_COND basic block (the target would be
+ *    the default case of the switch in case the conditional is evaluated to true).
+ *
+ *     ----- original block, transformed
+ *     GT_ASG
+ *        |_____ tempLocal (a new temporary local variable used to store the switch index)
+ *        |_____ expr      (the index expression)
+ *
+ *     GT_JTRUE
+ *        |_____ GT_COND
+ *                 |_____ GT_GE
+ *                           |___ Int_Constant  (This constant is the index of the default case
+ *                                               that happens to be the highest index in the jump table).
+ *                           |___ tempLocal     (The local variable were we stored the index expression).
+ *
+ *     ----- new basic block
+ *     GT_SWITCH_TABLE
+ *        |_____ tempLocal
+ *        |_____ jumpTable (a new jump table node that now LSRA can allocate registers for explicitly
+ *                          and LinearCodeGen will be responsible to generate downstream).
+ *
+ *     This way there are no implicit temporaries.
+ *
+ * b) For small-sized switches, we will actually morph them into a series of conditionals of the form
+ *     if (case falls into the default){ goto jumpTable[size]; // last entry in the jump table is the default case }
+ *     (For the default case conditional, we'll be constructing the exact same code as the jump table case one).
+ *     else if (case == firstCase){ goto jumpTable[1]; }
+ *     else if (case == secondCase) { goto jumptable[2]; } and so on.
+ *
+ *     This transformation is of course made in JIT-IR, not downstream to CodeGen level, so this way we no longer
+ *     require internal temporaries to maintain the index we're evaluating plus we're using existing code from
+ *     LinearCodeGen to implement this instead of implement all the control flow constructs using InstrDscs and
+ *     InstrGroups downstream.
+ */
+
+GenTree* Lowering::LowerSwitch(GenTree* node)
+{
+    unsigned     jumpCnt;
+    unsigned     targetCnt;
+    BasicBlock** jumpTab;
+
+    assert(node->gtOper == GT_SWITCH);
+
+    // The first step is to build the default case conditional construct that is
+    // shared between both kinds of expansion of the switch node.
+
+    // To avoid confusion, we'll alias m_block to originalSwitchBB
+    // that represents the node we're morphing.
+    BasicBlock* originalSwitchBB = m_block;
+    LIR::Range& switchBBRange    = LIR::AsRange(originalSwitchBB);
+
+    // jumpCnt is the number of elements in the jump table array.
+    // jumpTab is the actual pointer to the jump table array.
+    // targetCnt is the number of unique targets in the jump table array.
+    jumpCnt   = originalSwitchBB->bbJumpSwt->bbsCount;
+    jumpTab   = originalSwitchBB->bbJumpSwt->bbsDstTab;
+    targetCnt = originalSwitchBB->NumSucc(comp);
+
+// GT_SWITCH must be a top-level node with no use.
+#ifdef DEBUG
+    {
+        LIR::Use use;
+        assert(!switchBBRange.TryGetUse(node, &use));
+    }
+#endif
+
+    JITDUMP("Lowering switch BB%02u, %d cases\n", originalSwitchBB->bbNum, jumpCnt);
+
+    // Handle a degenerate case: if the switch has only a default case, just convert it
+    // to an unconditional branch. This should only happen in minopts or with debuggable
+    // code.
+    if (targetCnt == 1)
+    {
+        JITDUMP("Lowering switch BB%02u: single target; converting to BBJ_ALWAYS\n", originalSwitchBB->bbNum);
+        noway_assert(comp->opts.MinOpts() || comp->opts.compDbgCode);
+        if (originalSwitchBB->bbNext == jumpTab[0])
+        {
+            originalSwitchBB->bbJumpKind = BBJ_NONE;
+            originalSwitchBB->bbJumpDest = nullptr;
+        }
+        else
+        {
+            originalSwitchBB->bbJumpKind = BBJ_ALWAYS;
+            originalSwitchBB->bbJumpDest = jumpTab[0];
+        }
+        // Remove extra predecessor links if there was more than one case.
+        for (unsigned i = 1; i < jumpCnt; ++i)
+        {
+            (void)comp->fgRemoveRefPred(jumpTab[i], originalSwitchBB);
+        }
+
+        // We have to get rid of the GT_SWITCH node but a child might have side effects so just assign
+        // the result of the child subtree to a temp.
+        GenTree* rhs = node->gtOp.gtOp1;
+
+        unsigned lclNum                 = comp->lvaGrabTemp(true DEBUGARG("Lowering is creating a new local variable"));
+        comp->lvaSortAgain              = true;
+        comp->lvaTable[lclNum].lvType   = rhs->TypeGet();
+        comp->lvaTable[lclNum].lvRefCnt = 1;
+
+        GenTreeLclVar* store =
+            new (comp, GT_STORE_LCL_VAR) GenTreeLclVar(GT_STORE_LCL_VAR, rhs->TypeGet(), lclNum, BAD_IL_OFFSET);
+        store->gtOp1   = rhs;
+        store->gtFlags = (rhs->gtFlags & GTF_COMMON_MASK);
+        store->gtFlags |= GTF_VAR_DEF;
+
+        switchBBRange.InsertAfter(node, store);
+        switchBBRange.Remove(node);
+
+        return store;
+    }
+
+    noway_assert(jumpCnt >= 2);
+
+    // Spill the argument to the switch node into a local so that it can be used later.
+    unsigned blockWeight = originalSwitchBB->getBBWeight(comp);
+
+    LIR::Use use(switchBBRange, &(node->gtOp.gtOp1), node);
+    use.ReplaceWithLclVar(comp, blockWeight);
+
+    // GT_SWITCH(indexExpression) is now two statements:
+    //   1. a statement containing 'asg' (for temp = indexExpression)
+    //   2. and a statement with GT_SWITCH(temp)
+
+    assert(node->gtOper == GT_SWITCH);
+    GenTreePtr temp = node->gtOp.gtOp1;
+    assert(temp->gtOper == GT_LCL_VAR);
+    unsigned   tempLclNum  = temp->gtLclVarCommon.gtLclNum;
+    LclVarDsc* tempVarDsc  = comp->lvaTable + tempLclNum;
+    var_types  tempLclType = tempVarDsc->TypeGet();
+
+    BasicBlock* defaultBB   = jumpTab[jumpCnt - 1];
+    BasicBlock* followingBB = originalSwitchBB->bbNext;
+
+    /* Is the number of cases right for a test and jump switch? */
+    const bool fFirstCaseFollows = (followingBB == jumpTab[0]);
+    const bool fDefaultFollows   = (followingBB == defaultBB);
+
+    unsigned minSwitchTabJumpCnt = 2; // table is better than just 2 cmp/jcc
+
+    // This means really just a single cmp/jcc (aka a simple if/else)
+    if (fFirstCaseFollows || fDefaultFollows)
+    {
+        minSwitchTabJumpCnt++;
+    }
+
+#if defined(_TARGET_ARM_)
+    // On ARM for small switch tables we will
+    // generate a sequence of compare and branch instructions
+    // because the code to load the base of the switch
+    // table is huge and hideous due to the relocation... :(
+    minSwitchTabJumpCnt += 2;
+#endif // _TARGET_ARM_
+
+    // Once we have the temporary variable, we construct the conditional branch for
+    // the default case.  As stated above, this conditional is being shared between
+    // both GT_SWITCH lowering code paths.
+    // This condition is of the form: if (temp > jumpTableLength - 2){ goto jumpTable[jumpTableLength - 1]; }
+    GenTreePtr gtDefaultCaseCond = comp->gtNewOperNode(GT_GT, TYP_INT, comp->gtNewLclvNode(tempLclNum, tempLclType),
+                                                       comp->gtNewIconNode(jumpCnt - 2, TYP_INT));
+
+    // Make sure we perform an unsigned comparison, just in case the switch index in 'temp'
+    // is now less than zero 0 (that would also hit the default case).
+    gtDefaultCaseCond->gtFlags |= GTF_UNSIGNED;
+
+    /* Increment the lvRefCnt and lvRefCntWtd for temp */
+    tempVarDsc->incRefCnts(blockWeight, comp);
+
+    GenTreePtr gtDefaultCaseJump = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, gtDefaultCaseCond);
+    gtDefaultCaseJump->gtFlags   = node->gtFlags;
+
+    LIR::Range condRange = LIR::SeqTree(comp, gtDefaultCaseJump);
+    switchBBRange.InsertAtEnd(std::move(condRange));
+
+    BasicBlock* afterDefaultCondBlock = comp->fgSplitBlockAfterNode(originalSwitchBB, condRange.LastNode());
+
+    // afterDefaultCondBlock is now the switch, and all the switch targets have it as a predecessor.
+    // originalSwitchBB is now a BBJ_NONE, and there is a predecessor edge in afterDefaultCondBlock
+    // representing the fall-through flow from originalSwitchBB.
+    assert(originalSwitchBB->bbJumpKind == BBJ_NONE);
+    assert(originalSwitchBB->bbNext == afterDefaultCondBlock);
+    assert(afterDefaultCondBlock->bbJumpKind == BBJ_SWITCH);
+    assert(afterDefaultCondBlock->bbJumpSwt->bbsHasDefault);
+    assert(afterDefaultCondBlock->isEmpty()); // Nothing here yet.
+
+    // The GT_SWITCH code is still in originalSwitchBB (it will be removed later).
+
+    // Turn originalSwitchBB into a BBJ_COND.
+    originalSwitchBB->bbJumpKind = BBJ_COND;
+    originalSwitchBB->bbJumpDest = jumpTab[jumpCnt - 1];
+
+    // Fix the pred for the default case: the default block target still has originalSwitchBB
+    // as a predecessor, but the fgSplitBlockAfterStatement() moved all predecessors to point
+    // to afterDefaultCondBlock.
+    flowList* oldEdge = comp->fgRemoveRefPred(jumpTab[jumpCnt - 1], afterDefaultCondBlock);
+    comp->fgAddRefPred(jumpTab[jumpCnt - 1], originalSwitchBB, oldEdge);
+
+    // If we originally had 2 unique successors, check to see whether there is a unique
+    // non-default case, in which case we can eliminate the switch altogether.
+    // Note that the single unique successor case is handled above.
+    BasicBlock* uniqueSucc = nullptr;
+    if (targetCnt == 2)
+    {
+        uniqueSucc = jumpTab[0];
+        noway_assert(jumpCnt >= 2);
+        for (unsigned i = 1; i < jumpCnt - 1; i++)
+        {
+            if (jumpTab[i] != uniqueSucc)
+            {
+                uniqueSucc = nullptr;
+                break;
+            }
+        }
+    }
+    if (uniqueSucc != nullptr)
+    {
+        // If the unique successor immediately follows this block, we have nothing to do -
+        // it will simply fall-through after we remove the switch, below.
+        // Otherwise, make this a BBJ_ALWAYS.
+        // Now, fixup the predecessor links to uniqueSucc.  In the original jumpTab:
+        //   jumpTab[i-1] was the default target, which we handled above,
+        //   jumpTab[0] is the first target, and we'll leave that predecessor link.
+        // Remove any additional predecessor links to uniqueSucc.
+        for (unsigned i = 1; i < jumpCnt - 1; ++i)
+        {
+            assert(jumpTab[i] == uniqueSucc);
+            (void)comp->fgRemoveRefPred(uniqueSucc, afterDefaultCondBlock);
+        }
+        if (afterDefaultCondBlock->bbNext == uniqueSucc)
+        {
+            afterDefaultCondBlock->bbJumpKind = BBJ_NONE;
+            afterDefaultCondBlock->bbJumpDest = nullptr;
+        }
+        else
+        {
+            afterDefaultCondBlock->bbJumpKind = BBJ_ALWAYS;
+            afterDefaultCondBlock->bbJumpDest = uniqueSucc;
+        }
+    }
+    // If the number of possible destinations is small enough, we proceed to expand the switch
+    // into a series of conditional branches, otherwise we follow the jump table based switch
+    // transformation.
+    else if (jumpCnt < minSwitchTabJumpCnt)
+    {
+        // Lower the switch into a series of compare and branch IR trees.
+        //
+        // In this case we will morph the node in the following way:
+        // 1. Generate a JTRUE statement to evaluate the default case. (This happens above.)
+        // 2. Start splitting the switch basic block into subsequent basic blocks, each of which will contain
+        //    a statement that is responsible for performing a comparison of the table index and conditional
+        //    branch if equal.
+
+        JITDUMP("Lowering switch BB%02u: using compare/branch expansion\n", originalSwitchBB->bbNum);
+
+        // We'll use 'afterDefaultCondBlock' for the first conditional. After that, we'll add new
+        // blocks. If we end up not needing it at all (say, if all the non-default cases just fall through),
+        // we'll delete it.
+        bool        fUsedAfterDefaultCondBlock = false;
+        BasicBlock* currentBlock               = afterDefaultCondBlock;
+        LIR::Range* currentBBRange             = &LIR::AsRange(currentBlock);
+
+        // Walk to entries 0 to jumpCnt - 1. If a case target follows, ignore it and let it fall through.
+        // If no case target follows, the last one doesn't need to be a compare/branch: it can be an
+        // unconditional branch.
+        bool fAnyTargetFollows = false;
+        for (unsigned i = 0; i < jumpCnt - 1; ++i)
+        {
+            assert(currentBlock != nullptr);
+
+            // Remove the switch from the predecessor list of this case target's block.
+            // We'll add the proper new predecessor edge later.
+            flowList* oldEdge = comp->fgRemoveRefPred(jumpTab[i], afterDefaultCondBlock);
+
+            if (jumpTab[i] == followingBB)
+            {
+                // This case label follows the switch; let it fall through.
+                fAnyTargetFollows = true;
+                continue;
+            }
+
+            // We need a block to put in the new compare and/or branch.
+            // If we haven't used the afterDefaultCondBlock yet, then use that.
+            if (fUsedAfterDefaultCondBlock)
+            {
+                BasicBlock* newBlock = comp->fgNewBBafter(BBJ_NONE, currentBlock, true);
+                comp->fgAddRefPred(newBlock, currentBlock); // The fall-through predecessor.
+                currentBlock   = newBlock;
+                currentBBRange = &LIR::AsRange(currentBlock);
+            }
+            else
+            {
+                assert(currentBlock == afterDefaultCondBlock);
+                fUsedAfterDefaultCondBlock = true;
+            }
+
+            // We're going to have a branch, either a conditional or unconditional,
+            // to the target. Set the target.
+            currentBlock->bbJumpDest = jumpTab[i];
+
+            // Wire up the predecessor list for the "branch" case.
+            comp->fgAddRefPred(jumpTab[i], currentBlock, oldEdge);
+
+            if (!fAnyTargetFollows && (i == jumpCnt - 2))
+            {
+                // We're processing the last one, and there is no fall through from any case
+                // to the following block, so we can use an unconditional branch to the final
+                // case: there is no need to compare against the case index, since it's
+                // guaranteed to be taken (since the default case was handled first, above).
+
+                currentBlock->bbJumpKind = BBJ_ALWAYS;
+            }
+            else
+            {
+                // Otherwise, it's a conditional branch. Set the branch kind, then add the
+                // condition statement.
+                currentBlock->bbJumpKind = BBJ_COND;
+
+                // Now, build the conditional statement for the current case that is
+                // being evaluated:
+                // GT_JTRUE
+                //   |__ GT_COND
+                //          |____GT_EQ
+                //                 |____ (switchIndex) (The temp variable)
+                //                 |____ (ICon)        (The actual case constant)
+                GenTreePtr gtCaseCond =
+                    comp->gtNewOperNode(GT_EQ, TYP_INT, comp->gtNewLclvNode(tempLclNum, tempLclType),
+                                        comp->gtNewIconNode(i, TYP_INT));
+                /* Increment the lvRefCnt and lvRefCntWtd for temp */
+                tempVarDsc->incRefCnts(blockWeight, comp);
+
+                GenTreePtr gtCaseBranch = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, gtCaseCond);
+                LIR::Range caseRange    = LIR::SeqTree(comp, gtCaseBranch);
+                currentBBRange->InsertAtEnd(std::move(condRange));
+            }
+        }
+
+        if (fAnyTargetFollows)
+        {
+            // There is a fall-through to the following block. In the loop
+            // above, we deleted all the predecessor edges from the switch.
+            // In this case, we need to add one back.
+            comp->fgAddRefPred(currentBlock->bbNext, currentBlock);
+        }
+
+        if (!fUsedAfterDefaultCondBlock)
+        {
+            // All the cases were fall-through! We don't need this block.
+            // Convert it from BBJ_SWITCH to BBJ_NONE and unset the BBF_DONT_REMOVE flag
+            // so fgRemoveBlock() doesn't complain.
+            JITDUMP("Lowering switch BB%02u: all switch cases were fall-through\n", originalSwitchBB->bbNum);
+            assert(currentBlock == afterDefaultCondBlock);
+            assert(currentBlock->bbJumpKind == BBJ_SWITCH);
+            currentBlock->bbJumpKind = BBJ_NONE;
+            currentBlock->bbFlags &= ~BBF_DONT_REMOVE;
+            comp->fgRemoveBlock(currentBlock, /* unreachable */ false); // It's an empty block.
+        }
+    }
+    else
+    {
+        // Lower the switch into an indirect branch using a jump table:
+        //
+        // 1. Create the constant for the default case
+        // 2. Generate a GT_GE condition to compare to the default case
+        // 3. Generate a GT_JTRUE to jump.
+        // 4. Load the jump table address into a local (presumably the just
+        //    created constant for GT_SWITCH).
+        // 5. Create a new node for the lowered switch, this will both generate
+        //    the branch table and also will be responsible for the indirect
+        //    branch.
+
+        JITDUMP("Lowering switch BB%02u: using jump table expansion\n", originalSwitchBB->bbNum);
+
+        GenTreePtr gtTableSwitch =
+            comp->gtNewOperNode(GT_SWITCH_TABLE, TYP_VOID, comp->gtNewLclvNode(tempLclNum, tempLclType),
+                                comp->gtNewJmpTableNode());
+        /* Increment the lvRefCnt and lvRefCntWtd for temp */
+        tempVarDsc->incRefCnts(blockWeight, comp);
+
+        // this block no longer branches to the default block
+        afterDefaultCondBlock->bbJumpSwt->removeDefault();
+        comp->fgInvalidateSwitchDescMapEntry(afterDefaultCondBlock);
+
+        LIR::Range& afterDefaultCondBBRange = LIR::AsRange(afterDefaultCondBlock);
+        afterDefaultCondBBRange.InsertAtEnd(LIR::SeqTree(comp, gtTableSwitch));
+    }
+
+    GenTree* next = node->gtNext;
+
+    // Get rid of the GT_SWITCH(temp).
+    switchBBRange.Remove(node->gtOp.gtOp1);
+    switchBBRange.Remove(node);
+
+    return next;
+}
+
+// NOTE: this method deliberately does not update the call arg table. It must only
+// be used by NewPutArg and LowerArg; these functions are responsible for updating
+// the call arg table as necessary.
+void Lowering::ReplaceArgWithPutArgOrCopy(GenTree** argSlot, GenTree* putArgOrCopy)
+{
+    assert(argSlot != nullptr);
+    assert(*argSlot != nullptr);
+    assert(putArgOrCopy->OperGet() == GT_PUTARG_REG || putArgOrCopy->OperGet() == GT_PUTARG_STK ||
+           putArgOrCopy->OperGet() == GT_COPY);
+
+    GenTree* arg = *argSlot;
+
+    // Replace the argument with the putarg/copy
+    *argSlot                 = putArgOrCopy;
+    putArgOrCopy->gtOp.gtOp1 = arg;
+
+    // Insert the putarg/copy into the block
+    BlockRange().InsertAfter(arg, putArgOrCopy);
+}
+
+//------------------------------------------------------------------------
+// NewPutArg: rewrites the tree to put an arg in a register or on the stack.
+//
+// Arguments:
+//    call - the call whose arg is being rewritten.
+//    arg  - the arg being rewritten.
+//    info - the ArgTabEntry information for the argument.
+//    type - the type of the argument.
+//
+// Return Value:
+//    The new tree that was created to put the arg in the right place
+//    or the incoming arg if the arg tree was not rewritten.
+//
+// Assumptions:
+//    call, arg, and info must be non-null.
+//
+// Notes:
+//    For System V systems with native struct passing (i.e. FEATURE_UNIX_AMD64_STRUCT_PASSING defined)
+//    this method allocates a single GT_PUTARG_REG for 1 eightbyte structs and a GT_LIST of two GT_PUTARG_REGs
+//    for two eightbyte structs.
+//
+//    For STK passed structs the method generates GT_PUTARG_STK tree. For System V systems with native struct passing
+//    (i.e. FEATURE_UNIX_AMD64_STRUCT_PASSING defined) this method also sets the GP pointers count and the pointers
+//    layout object, so the codegen of the GT_PUTARG_STK could use this for optimizing copying to the stack by value.
+//    (using block copy primitives for non GC pointers and a single TARGET_POINTER_SIZE copy with recording GC info.)
+//
+GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryPtr info, var_types type)
+{
+    assert(call != nullptr);
+    assert(arg != nullptr);
+    assert(info != nullptr);
+
+    GenTreePtr putArg         = nullptr;
+    bool       updateArgTable = true;
+
+#if !defined(_TARGET_64BIT_)
+    if (varTypeIsLong(type))
+    {
+        // For TYP_LONG, we leave the GT_LONG as the arg, and put the putArg below it.
+        // Therefore, we don't update the arg table entry.
+        updateArgTable = false;
+        type           = TYP_INT;
+    }
+#endif // !defined(_TARGET_64BIT_)
+
+    bool isOnStack = true;
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+    if (varTypeIsStruct(type))
+    {
+        isOnStack = !info->structDesc.passedInRegisters;
+    }
+    else
+    {
+        isOnStack = info->regNum == REG_STK;
+    }
+#else  // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+    isOnStack = info->regNum == REG_STK;
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+    if (!isOnStack)
+    {
+#ifdef FEATURE_SIMD
+        // TYP_SIMD8 is passed in an integer register.  We need the putArg node to be of the int type.
+        if (type == TYP_SIMD8 && genIsValidIntReg(info->regNum))
+        {
+            type = TYP_LONG;
+        }
+#endif // FEATURE_SIMD
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+        if (info->isStruct)
+        {
+            // The following code makes sure a register passed struct arg is moved to
+            // the register before the call is made.
+            // There are two cases (comments added in the code below.)
+            // 1. The struct is of size one eightbyte:
+            //    In this case a new tree is created that is GT_PUTARG_REG
+            //    with a op1 the original argument.
+            // 2. The struct is contained in 2 eightbytes:
+            //    in this case the arg comes as a GT_LIST of two GT_LCL_FLDs - the two eightbytes of the struct.
+            //    The code creates a GT_PUTARG_REG node for each GT_LCL_FLD in the GT_LIST
+            //    and splices it in the list with the corresponding original GT_LCL_FLD tree as op1.
+
+            assert(info->structDesc.eightByteCount != 0);
+
+            if (info->structDesc.eightByteCount == 1)
+            {
+                // clang-format off
+                // Case 1 above: Create a GT_PUTARG_REG node with op1 of the original tree.
+                //
+                // Here the IR for this operation:
+                // lowering call :
+                //     N001(3, 2)[000017] ------ - N---- / --*  &lclVar   byref  V00 loc0
+                //     N003(6, 5)[000052] * --XG------ - / --*  indir     int
+                //     N004(3, 2)[000046] ------ - N---- + --*  &lclVar   byref  V02 tmp0
+                //     (13, 11)[000070] -- - XG-- - R-- - arg0 in out + 00 / --*  storeIndir int
+                //     N009(3, 4)[000054] ------ - N----arg0 in rdi + --*  lclFld    int    V02 tmp0[+0](last use)
+                //     N011(33, 21)[000018] --CXG------ - *call      void   Test.Foo.test1
+                //
+                // args :
+                //     lowering arg : (13, 11)[000070] -- - XG-- - R-- - *storeIndir int
+                //
+                // late :
+                //    lowering arg : N009(3, 4)[000054] ------ - N----             *  lclFld    int    V02 tmp0[+0](last use)
+                //    new node is : (3, 4)[000071] ------------             *  putarg_reg int    RV
+                //
+                // after :
+                //    N001(3, 2)[000017] ------ - N---- / --*  &lclVar   byref  V00 loc0
+                //    N003(6, 5)[000052] * --XG------ - / --*  indir     int
+                //    N004(3, 2)[000046] ------ - N---- + --*  &lclVar   byref  V02 tmp0
+                //    (13, 11)[000070] -- - XG-- - R-- - arg0 in out + 00 / --*  storeIndir int
+                //    N009(3, 4)[000054] ------ - N---- | / --*  lclFld    int    V02 tmp0[+0](last use)
+                //    (3, 4)[000071] ------------arg0 in rdi + --*  putarg_reg int    RV
+                //    N011(33, 21)[000018] --CXG------ - *call      void   Test.Foo.test1
+                //
+                // clang-format on
+
+                putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg);
+            }
+            else if (info->structDesc.eightByteCount == 2)
+            {
+                // clang-format off
+                // Case 2 above: Convert the LCL_FLDs to PUTARG_REG
+                //
+                // lowering call :
+                //     N001(3, 2)  [000025] ------ - N----Source / --*  &lclVar   byref  V01 loc1
+                //     N003(3, 2)  [000056] ------ - N----Destination + --*  &lclVar   byref  V03 tmp1
+                //     N006(1, 1)  [000058] ------------ + --*  const     int    16
+                //     N007(12, 12)[000059] - A--G---- - L - arg0 SETUP / --*  copyBlk   void
+                //     N009(3, 4)  [000061] ------ - N----arg0 in rdi + --*  lclFld    long   V03 tmp1[+0]
+                //     N010(3, 4)  [000063] ------------arg0 in rsi + --*  lclFld    long   V03 tmp1[+8](last use)
+                //     N014(40, 31)[000026] --CXG------ - *call      void   Test.Foo.test2
+                //
+                // args :
+                //     lowering arg : N007(12, 12)[000059] - A--G---- - L - *copyBlk   void
+                //
+                // late :
+                //     lowering arg : N012(11, 13)[000065] ------------             *  <list>    struct
+                //
+                // after :
+                //     N001(3, 2)[000025] ------ - N----Source / --*  &lclVar   byref  V01 loc1
+                //     N003(3, 2)[000056] ------ - N----Destination + --*  &lclVar   byref  V03 tmp1
+                //     N006(1, 1)[000058] ------------ + --*  const     int    16
+                //     N007(12, 12)[000059] - A--G---- - L - arg0 SETUP / --*  copyBlk   void
+                //     N009(3, 4)[000061] ------ - N---- | / --*  lclFld    long   V03 tmp1[+0]
+                //     (3, 4)[000072] ------------arg0 in rdi + --*  putarg_reg long
+                //     N010(3, 4)[000063] ------------ | / --*  lclFld    long   V03 tmp1[+8](last use)
+                //     (3, 4)[000073] ------------arg0 in rsi + --*  putarg_reg long
+                //     N014(40, 31)[000026] --CXG------ - *call      void   Test.Foo.test2
+                //
+                // clang-format on
+
+                assert(arg->OperGet() == GT_LIST);
+
+                GenTreeArgList* argListPtr = arg->AsArgList();
+                assert(argListPtr->IsAggregate());
+
+                for (unsigned ctr = 0; argListPtr != nullptr; argListPtr = argListPtr->Rest(), ctr++)
+                {
+                    // Create a new GT_PUTARG_REG node with op1 the original GT_LCL_FLD.
+                    GenTreePtr newOper = comp->gtNewOperNode(
+                        GT_PUTARG_REG,
+                        comp->GetTypeFromClassificationAndSizes(info->structDesc.eightByteClassifications[ctr],
+                                                                info->structDesc.eightByteSizes[ctr]),
+                        argListPtr->gtOp.gtOp1);
+
+                    // Splice in the new GT_PUTARG_REG node in the GT_LIST
+                    ReplaceArgWithPutArgOrCopy(&argListPtr->gtOp.gtOp1, newOper);
+                }
+
+                // Just return arg. The GT_LIST is not replaced.
+                // Nothing more to do.
+                return arg;
+            }
+            else
+            {
+                assert(false &&
+                       "Illegal count of eightbytes for the CLR type system"); // No more than 2 eightbytes for the CLR.
+            }
+        }
+        else
+#else // not defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#if FEATURE_MULTIREG_ARGS
+        if ((info->numRegs > 1) && (arg->OperGet() == GT_LIST))
+        {
+            assert(arg->OperGet() == GT_LIST);
+
+            GenTreeArgList* argListPtr = arg->AsArgList();
+            assert(argListPtr->IsAggregate());
+
+            for (unsigned ctr = 0; argListPtr != nullptr; argListPtr = argListPtr->Rest(), ctr++)
+            {
+                GenTreePtr curOp  = argListPtr->gtOp.gtOp1;
+                var_types  curTyp = curOp->TypeGet();
+
+                // Create a new GT_PUTARG_REG node with op1
+                GenTreePtr newOper = comp->gtNewOperNode(GT_PUTARG_REG, curTyp, curOp);
+
+                // Splice in the new GT_PUTARG_REG node in the GT_LIST
+                ReplaceArgWithPutArgOrCopy(&argListPtr->gtOp.gtOp1, newOper);
+            }
+
+            // Just return arg. The GT_LIST is not replaced.
+            // Nothing more to do.
+            return arg;
+        }
+        else
+#endif // FEATURE_MULTIREG_ARGS
+#endif // not defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+        {
+            putArg = comp->gtNewOperNode(GT_PUTARG_REG, type, arg);
+        }
+    }
+    else
+    {
+        // Mark this one as tail call arg if it is a fast tail call.
+        // This provides the info to put this argument in in-coming arg area slot
+        // instead of in out-going arg area slot.
+
+        FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(assert(info->isStruct == varTypeIsStruct(type))); // Make sure state is
+                                                                                                 // correct
+
+#if FEATURE_FASTTAILCALL
+        putArg = new (comp, GT_PUTARG_STK)
+            GenTreePutArgStk(GT_PUTARG_STK, type, arg,
+                             info->slotNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(info->numSlots)
+                                 FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(info->isStruct),
+                             call->IsFastTailCall() DEBUGARG(call));
+#else
+        putArg = new (comp, GT_PUTARG_STK)
+            GenTreePutArgStk(GT_PUTARG_STK, type, arg,
+                             info->slotNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(info->numSlots)
+                                 FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(info->isStruct) DEBUGARG(call));
+#endif
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+        // If the ArgTabEntry indicates that this arg is a struct
+        // get and store the number of slots that are references.
+        // This is later used in the codegen for PUT_ARG_STK implementation
+        // for struct to decide whether and how many single eight-byte copies
+        // to be done (only for reference slots), so gcinfo is emitted.
+        // For non-reference slots faster/smaller size instructions are used -
+        // pair copying using XMM registers or rep mov instructions.
+        if (info->isStruct)
+        {
+            unsigned numRefs  = 0;
+            BYTE*    gcLayout = new (comp, CMK_Codegen) BYTE[info->numSlots];
+            // We use GT_OBJ for non-SIMD struct arguments. However, for
+            // SIMD arguments the GT_OBJ has already been transformed.
+            if (arg->gtOper != GT_OBJ)
+            {
+                assert(varTypeIsSIMD(arg));
+            }
+            else
+            {
+                assert(!varTypeIsSIMD(arg));
+                numRefs = comp->info.compCompHnd->getClassGClayout(arg->gtObj.gtClass, gcLayout);
+            }
+
+            putArg->AsPutArgStk()->setGcPointers(numRefs, gcLayout);
+        }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+    }
+
+    if (arg->InReg())
+    {
+        putArg->SetInReg();
+    }
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+    else if (info->isStruct)
+    {
+        if (info->structDesc.passedInRegisters)
+        {
+            putArg->SetInReg();
+        }
+    }
+#endif
+
+    JITDUMP("new node is : ");
+    DISPNODE(putArg);
+    JITDUMP("\n");
+
+    if (arg->gtFlags & GTF_LATE_ARG)
+    {
+        putArg->gtFlags |= GTF_LATE_ARG;
+    }
+    else if (updateArgTable)
+    {
+        info->node = putArg;
+    }
+    return putArg;
+}
+
+//------------------------------------------------------------------------
+// LowerArg: Lower one argument of a call. This entails splicing a "putarg" node between
+// the argument evaluation and the call. This is the point at which the source is
+// consumed and the value transitions from control of the register allocator to the calling
+// convention.
+//
+// Arguments:
+//    call  - The call node
+//    ppArg - Pointer to the call argument pointer. We might replace the call argument by
+//            changing *ppArg.
+//
+// Return Value:
+//    None.
+//
+void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg)
+{
+    GenTreePtr arg = *ppArg;
+
+    JITDUMP("lowering arg : ");
+    DISPNODE(arg);
+
+    // No assignments should remain by Lowering.
+    assert(!arg->OperIsAssignment());
+    assert(!arg->OperIsPutArgStk());
+
+    // Assignments/stores at this level are not really placing an argument.
+    // They are setting up temporary locals that will later be placed into
+    // outgoing regs or stack.
+    if (arg->OperIsStore() || arg->IsArgPlaceHolderNode() || arg->IsNothingNode() || arg->OperIsCopyBlkOp())
+    {
+        return;
+    }
+
+    fgArgTabEntryPtr info = comp->gtArgEntryByNode(call, arg);
+    assert(info->node == arg);
+    bool      isReg = (info->regNum != REG_STK);
+    var_types type  = arg->TypeGet();
+
+    if (varTypeIsSmall(type))
+    {
+        // Normalize 'type', it represents the item that we will be storing in the Outgoing Args
+        type = TYP_INT;
+    }
+
+    GenTreePtr putArg;
+
+    // If we hit this we are probably double-lowering.
+    assert(!arg->OperIsPutArg());
+
+#if !defined(_TARGET_64BIT_)
+    if (varTypeIsLong(type))
+    {
+        if (isReg)
+        {
+            NYI("Lowering of long register argument");
+        }
+
+        // For longs, we will create two PUTARG_STKs below the GT_LONG. The hi argument needs to
+        // be pushed first, so the hi PUTARG_STK will precede the lo PUTARG_STK in execution order.
+        noway_assert(arg->OperGet() == GT_LONG);
+        GenTreePtr argLo = arg->gtGetOp1();
+        GenTreePtr argHi = arg->gtGetOp2();
+
+        GenTreePtr putArgLo = NewPutArg(call, argLo, info, type);
+        GenTreePtr putArgHi = NewPutArg(call, argHi, info, type);
+
+        arg->gtOp.gtOp1 = putArgLo;
+        arg->gtOp.gtOp2 = putArgHi;
+
+        BlockRange().InsertBefore(arg, putArgHi, putArgLo);
+
+        // The execution order now looks like this:
+        // argLoPrev <-> argLoFirst ... argLo <-> argHiFirst ... argHi <-> putArgHi <-> putArgLo <-> arg(GT_LONG)
+
+        assert((arg->gtFlags & GTF_REVERSE_OPS) == 0);
+        arg->gtFlags |= GTF_REVERSE_OPS; // We consume the high arg (op2) first.
+    }
+    else
+#endif // !defined(_TARGET_64BIT_)
+    {
+
+#ifdef _TARGET_ARM64_
+        // For vararg call, reg args should be all integer.
+        // Insert a copy to move float value to integer register.
+        if (call->IsVarargs() && varTypeIsFloating(type))
+        {
+            var_types  intType = (type == TYP_DOUBLE) ? TYP_LONG : TYP_INT;
+            GenTreePtr intArg  = comp->gtNewOperNode(GT_COPY, intType, arg);
+
+            info->node = intArg;
+            ReplaceArgWithPutArgOrCopy(ppArg, intArg);
+
+            // Update arg/type with new ones.
+            arg  = intArg;
+            type = intType;
+        }
+#endif
+
+        putArg = NewPutArg(call, arg, info, type);
+
+        // In the case of register passable struct (in one or two registers)
+        // the NewPutArg returns a new node (GT_PUTARG_REG or a GT_LIST with two GT_PUTARG_REGs.)
+        // If an extra node is returned, splice it in the right place in the tree.
+        if (arg != putArg)
+        {
+            ReplaceArgWithPutArgOrCopy(ppArg, putArg);
+        }
+    }
+}
+
+// do lowering steps for each arg of a call
+void Lowering::LowerArgsForCall(GenTreeCall* call)
+{
+    JITDUMP("objp:\n======\n");
+    if (call->gtCallObjp)
+    {
+        LowerArg(call, &call->gtCallObjp);
+    }
+
+    GenTreeArgList* args = call->gtCallArgs;
+
+    JITDUMP("\nargs:\n======\n");
+    for (; args; args = args->Rest())
+    {
+        LowerArg(call, &args->Current());
+    }
+
+    JITDUMP("\nlate:\n======\n");
+    for (args = call->gtCallLateArgs; args; args = args->Rest())
+    {
+        LowerArg(call, &args->Current());
+    }
+}
+
+// helper that create a node representing a relocatable physical address computation
+// (optionally specifying the register to place it in)
+GenTree* Lowering::AddrGen(ssize_t addr, regNumber reg)
+{
+    // this should end up in codegen as : instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, reg, addr)
+    GenTree* result = comp->gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
+
+    result->gtRegNum = reg;
+
+    return result;
+}
+
+// variant that takes a void*
+GenTree* Lowering::AddrGen(void* addr, regNumber reg)
+{
+    return AddrGen((ssize_t)addr, reg);
+}
+
+// do lowering steps for a call
+// this includes:
+//   - adding the placement nodes (either stack or register variety) for arguments
+//   - lowering the expression that calculates the target address
+//   - adding nodes for other operations that occur after the call sequence starts and before
+//        control transfer occurs (profiling and tail call helpers, pinvoke incantations)
+//
+void Lowering::LowerCall(GenTree* node)
+{
+    GenTreeCall* call = node->AsCall();
+
+    JITDUMP("lowering call (before):\n");
+    DISPTREERANGE(BlockRange(), call);
+    JITDUMP("\n");
+
+    LowerArgsForCall(call);
+
+// RyuJIT arm is not set up for lowered call control
+#ifndef _TARGET_ARM_
+
+    // note that everything generated from this point on runs AFTER the outgoing args are placed
+    GenTree* result = nullptr;
+
+    // for x86, this is where we record ESP for checking later to make sure stack is balanced
+
+    // Check for Delegate.Invoke(). If so, we inline it. We get the
+    // target-object and target-function from the delegate-object, and do
+    // an indirect call.
+    if (call->IsDelegateInvoke())
+    {
+        result = LowerDelegateInvoke(call);
+    }
+    else
+    {
+        //  Virtual and interface calls
+        switch (call->gtFlags & GTF_CALL_VIRT_KIND_MASK)
+        {
+            case GTF_CALL_VIRT_STUB:
+                result = LowerVirtualStubCall(call);
+                break;
+
+            case GTF_CALL_VIRT_VTABLE:
+                // stub dispatching is off or this is not a virtual call (could be a tailcall)
+                result = LowerVirtualVtableCall(call);
+                break;
+
+            case GTF_CALL_NONVIRT:
+                if (call->IsUnmanaged())
+                {
+                    result = LowerNonvirtPinvokeCall(call);
+                }
+                else if (call->gtCallType == CT_INDIRECT)
+                {
+                    result = LowerIndirectNonvirtCall(call);
+                }
+                else
+                {
+                    result = LowerDirectCall(call);
+                }
+                break;
+
+            default:
+                noway_assert(!"strange call type");
+                break;
+        }
+    }
+
+    if (call->IsTailCallViaHelper())
+    {
+        // Either controlExpr or gtCallAddr must contain real call target.
+        if (result == nullptr)
+        {
+            assert(call->gtCallType == CT_INDIRECT);
+            assert(call->gtCallAddr != nullptr);
+            result = call->gtCallAddr;
+        }
+
+        result = LowerTailCallViaHelper(call, result);
+    }
+    else if (call->IsFastTailCall())
+    {
+        LowerFastTailCall(call);
+    }
+
+    if (result != nullptr)
+    {
+        LIR::Range resultRange = LIR::SeqTree(comp, result);
+
+        JITDUMP("results of lowering call:\n");
+        DISPRANGE(resultRange);
+
+        GenTree* insertionPoint = call;
+        if (!call->IsTailCallViaHelper())
+        {
+            // The controlExpr should go before the gtCallCookie and the gtCallAddr, if they exist
+            //
+            // TODO-LIR: find out what's really required here, as this is currently a tree order
+            // dependency.
+            if (call->gtCallType == CT_INDIRECT)
+            {
+                bool isClosed = false;
+                if (call->gtCallCookie != nullptr)
+                {
+#ifdef DEBUG
+                    GenTree* firstCallAddrNode = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed).FirstNode();
+                    assert(isClosed);
+                    assert(call->gtCallCookie->Precedes(firstCallAddrNode));
+#endif // DEBUG
+
+                    insertionPoint = BlockRange().GetTreeRange(call->gtCallCookie, &isClosed).FirstNode();
+                    assert(isClosed);
+                }
+                else if (call->gtCallAddr != nullptr)
+                {
+                    insertionPoint = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed).FirstNode();
+                    assert(isClosed);
+                }
+            }
+        }
+
+        BlockRange().InsertBefore(insertionPoint, std::move(resultRange));
+
+        call->gtControlExpr = result;
+    }
+#endif //!_TARGET_ARM_
+
+    if (comp->opts.IsJit64Compat())
+    {
+        CheckVSQuirkStackPaddingNeeded(call);
+    }
+
+    JITDUMP("lowering call (after):\n");
+    DISPTREERANGE(BlockRange(), call);
+    JITDUMP("\n");
+}
+
+// Though the below described issue gets fixed in intellitrace dll of VS2015 (a.k.a Dev14),
+// we still need this quirk for desktop so that older version of VS (e.g. VS2010/2012)
+// continues to work.
+// This quirk is excluded from other targets that have no back compat burden.
+//
+// Quirk for VS debug-launch scenario to work:
+// See if this is a PInvoke call with exactly one param that is the address of a struct local.
+// In such a case indicate to frame-layout logic to add 16-bytes of padding
+// between save-reg area and locals.  This is to protect against the buffer
+// overrun bug in microsoft.intellitrace.11.0.0.dll!ProfilerInterop.InitInterop().
+//
+// A work-around to this bug is to disable IntelliTrace debugging
+// (VS->Tools->Options->IntelliTrace->Enable IntelliTrace - uncheck this option).
+// The reason why this works on Jit64 is that at the point of AV the call stack is
+//
+// GetSystemInfo() Native call
+// IL_Stub generated for PInvoke declaration.
+// ProfilerInterface::InitInterop()
+// ProfilerInterface.Cctor()
+// VM asm worker
+//
+// The cctor body has just the call to InitInterop().  VM asm worker is holding
+// something in rbx that is used immediately after the Cctor call.  Jit64 generated
+// InitInterop() method is pushing the registers in the following order
+//
+//  rbx
+//  rbp
+//  rsi
+//  rdi
+//  r12
+//  r13
+//  Struct local
+//
+// Due to buffer overrun, rbx doesn't get impacted.  Whereas RyuJIT jitted code of
+// the same method is pushing regs in the following order
+//
+//  rbp
+//  rdi
+//  rsi
+//  rbx
+//  struct local
+//
+// Therefore as a fix, we add padding between save-reg area and locals to
+// make this scenario work against JB.
+//
+// Note: If this quirk gets broken due to other JIT optimizations, we should consider
+// more tolerant fix.  One such fix is to padd the struct.
+void Lowering::CheckVSQuirkStackPaddingNeeded(GenTreeCall* call)
+{
+    assert(comp->opts.IsJit64Compat());
+
+#ifdef _TARGET_AMD64_
+    // Confine this to IL stub calls which aren't marked as unmanaged.
+    if (call->IsPInvoke() && !call->IsUnmanaged())
+    {
+        bool       paddingNeeded  = false;
+        GenTreePtr firstPutArgReg = nullptr;
+        for (GenTreeArgList* args = call->gtCallLateArgs; args; args = args->Rest())
+        {
+            GenTreePtr tmp = args->Current();
+            if (tmp->OperGet() == GT_PUTARG_REG)
+            {
+                if (firstPutArgReg == nullptr)
+                {
+                    firstPutArgReg = tmp;
+                    GenTreePtr op1 = firstPutArgReg->gtOp.gtOp1;
+
+                    if (op1->OperGet() == GT_LCL_VAR_ADDR)
+                    {
+                        unsigned lclNum = op1->AsLclVarCommon()->GetLclNum();
+                        // TODO-1stClassStructs: This is here to duplicate previous behavior,
+                        // but is not needed because the scenario being quirked did not involve
+                        // a SIMD or enregisterable struct.
+                        // if(comp->lvaTable[lclNum].TypeGet() == TYP_STRUCT)
+                        if (varTypeIsStruct(comp->lvaTable[lclNum].TypeGet()))
+                        {
+                            // First arg is addr of a struct local.
+                            paddingNeeded = true;
+                        }
+                        else
+                        {
+                            // Not a struct local.
+                            assert(paddingNeeded == false);
+                            break;
+                        }
+                    }
+                    else
+                    {
+                        // First arg is not a local var addr.
+                        assert(paddingNeeded == false);
+                        break;
+                    }
+                }
+                else
+                {
+                    // Has more than one arg.
+                    paddingNeeded = false;
+                    break;
+                }
+            }
+        }
+
+        if (paddingNeeded)
+        {
+            comp->compVSQuirkStackPaddingNeeded = VSQUIRK_STACK_PAD;
+        }
+    }
+#endif // _TARGET_AMD64_
+}
+
+// Inserts profiler hook, GT_PROF_HOOK for a tail call node.
+//
+// We need to insert this after all nested calls, but before all the arguments to this call have been set up.
+// To do this, we look for the first GT_PUTARG_STK or GT_PUTARG_REG, and insert the hook immediately before
+// that. If there are no args, then it should be inserted before the call node.
+//
+// For example:
+//              *  stmtExpr  void  (top level) (IL 0x000...0x010)
+// arg0 SETUP   |  /--*  argPlace  ref    REG NA $c5
+// this in rcx  |  |     /--*  argPlace  ref    REG NA $c1
+//              |  |     |  /--*  call      ref    System.Globalization.CultureInfo.get_InvariantCulture $c2
+// arg1 SETUP   |  |     +--*  st.lclVar ref    V02 tmp1          REG NA $c2
+//              |  |     |  /--*  lclVar    ref    V02 tmp1         u : 2 (last use) REG NA $c2
+// arg1 in rdx  |  |     +--*  putarg_reg ref    REG NA
+//              |  |     |  /--*  lclVar    ref    V00 arg0         u : 2 (last use) REG NA $80
+// this in rcx  |  |     +--*  putarg_reg ref    REG NA
+//              |  |  /--*  call nullcheck ref    System.String.ToLower $c5
+//              |  |  {  *  stmtExpr  void  (embedded)(IL 0x000... ? ? ? )
+//              |  |  {  \--*  prof_hook void   REG NA
+// arg0 in rcx  |  +--*  putarg_reg ref    REG NA
+// control expr |  +--*  const(h)  long   0x7ffe8e910e98 ftn REG NA
+//              \--*  call      void   System.Runtime.Remoting.Identity.RemoveAppNameOrAppGuidIfNecessary $VN.Void
+//
+// In this case, the GT_PUTARG_REG src is a nested call. We need to put the instructions after that call
+// (as shown). We assume that of all the GT_PUTARG_*, only the first one can have a nested call.
+//
+// Params:
+//    callNode        - tail call node
+//    insertionPoint  - if caller has an insertion point; If null
+//                      profiler hook is inserted before args are setup
+//                      but after all arg side effects are computed.
+void Lowering::InsertProfTailCallHook(GenTreeCall* call, GenTree* insertionPoint)
+{
+    assert(call->IsTailCall());
+    assert(comp->compIsProfilerHookNeeded());
+
+    if (insertionPoint == nullptr)
+    {
+        GenTreePtr tmp = nullptr;
+        for (GenTreeArgList* args = call->gtCallArgs; args; args = args->Rest())
+        {
+            tmp = args->Current();
+            assert(tmp->OperGet() != GT_PUTARG_REG); // We don't expect to see these in gtCallArgs
+            if (tmp->OperGet() == GT_PUTARG_STK)
+            {
+                // found it
+                insertionPoint = tmp;
+                break;
+            }
+        }
+
+        if (insertionPoint == nullptr)
+        {
+            for (GenTreeArgList* args = call->gtCallLateArgs; args; args = args->Rest())
+            {
+                tmp = args->Current();
+                if ((tmp->OperGet() == GT_PUTARG_REG) || (tmp->OperGet() == GT_PUTARG_STK))
+                {
+                    // found it
+                    insertionPoint = tmp;
+                    break;
+                }
+            }
+
+            // If there are no args, insert before the call node
+            if (insertionPoint == nullptr)
+            {
+                insertionPoint = call;
+            }
+        }
+    }
+
+    assert(insertionPoint != nullptr);
+    GenTreePtr profHookNode = new (comp, GT_PROF_HOOK) GenTree(GT_PROF_HOOK, TYP_VOID);
+    BlockRange().InsertBefore(insertionPoint, profHookNode);
+}
+
+// Lower fast tail call implemented as epilog+jmp.
+// Also inserts PInvoke method epilog if required.
+void Lowering::LowerFastTailCall(GenTreeCall* call)
+{
+#if FEATURE_FASTTAILCALL
+    // Tail call restrictions i.e. conditions under which tail prefix is ignored.
+    // Most of these checks are already done by importer or fgMorphTailCall().
+    // This serves as a double sanity check.
+    assert((comp->info.compFlags & CORINFO_FLG_SYNCH) == 0); // tail calls from synchronized methods
+    assert(!comp->opts.compNeedSecurityCheck);               // tail call from methods that need security check
+    assert(!call->IsUnmanaged());                            // tail calls to unamanaged methods
+    assert(!comp->compLocallocUsed);                         // tail call from methods that also do localloc
+    assert(!comp->getNeedsGSSecurityCookie());               // jit64 compat: tail calls from methods that need GS check
+
+    // We expect to see a call that meets the following conditions
+    assert(call->IsFastTailCall());
+
+    // VM cannot use return address hijacking when A() and B() tail call each
+    // other in mutual recursion.  Therefore, this block is reachable through
+    // a GC-safe point or the whole method is marked as fully interruptible.
+    //
+    // TODO-Cleanup:
+    // optReachWithoutCall() depends on the fact that loop headers blocks
+    // will have a block number > fgLastBB.  These loop headers gets added
+    // after dominator computation and get skipped by OptReachWithoutCall().
+    // The below condition cannot be asserted in lower because fgSimpleLowering()
+    // can add a new basic block for range check failure which becomes
+    // fgLastBB with block number > loop header block number.
+    // assert((comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT) ||
+    //         !comp->optReachWithoutCall(comp->fgFirstBB, comp->compCurBB) || comp->genInterruptible);
+
+    // If PInvokes are in-lined, we have to remember to execute PInvoke method epilog anywhere that
+    // a method returns.  This is a case of caller method has both PInvokes and tail calls.
+    if (comp->info.compCallUnmanaged)
+    {
+        InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(call));
+    }
+
+    // Args for tail call are setup in incoming arg area.  The gc-ness of args of
+    // caller and callee (which being tail called) may not match.  Therefore, everything
+    // from arg setup until the epilog need to be non-interuptible by GC.  This is
+    // achieved by inserting GT_START_NONGC before the very first GT_PUTARG_STK node
+    // of call is setup.  Note that once a stack arg is setup, it cannot have nested
+    // calls subsequently in execution order to setup other args, because the nested
+    // call could over-write the stack arg that is setup earlier.
+    GenTreePtr           firstPutArgStk = nullptr;
+    GenTreeArgList*      args;
+    ArrayStack<GenTree*> putargs(comp);
+
+    for (args = call->gtCallArgs; args; args = args->Rest())
+    {
+        GenTreePtr tmp = args->Current();
+        if (tmp->OperGet() == GT_PUTARG_STK)
+        {
+            putargs.Push(tmp);
+        }
+    }
+
+    for (args = call->gtCallLateArgs; args; args = args->Rest())
+    {
+        GenTreePtr tmp = args->Current();
+        if (tmp->OperGet() == GT_PUTARG_STK)
+        {
+            putargs.Push(tmp);
+        }
+    }
+
+    if (putargs.Height() > 0)
+    {
+        firstPutArgStk = putargs.Bottom();
+    }
+
+    // If we have a putarg_stk node, also count the number of non-standard args the
+    // call node has.  Note that while determining whether a tail call can be fast
+    // tail called, we don't count non-standard args (passed in R10 or R11) since they
+    // don't contribute to outgoing arg space.  These non-standard args are not
+    // accounted in caller's arg count but accounted in callee's arg count after
+    // fgMorphArgs(). Therefore, exclude callee's non-standard args while mapping
+    // callee's stack arg num to corresponding caller's stack arg num.
+    unsigned calleeNonStandardArgCount = call->GetNonStandardAddedArgCount(comp);
+
+    // Say Caller(a, b, c, d, e) fast tail calls Callee(e, d, c, b, a)
+    // i.e. passes its arguments in reverse to Callee. During call site
+    // setup, after computing argument side effects, stack args are setup
+    // first and reg args next.  In the above example, both Callers and
+    // Callee stack args (e and a respectively) share the same stack slot
+    // and are alive at the same time.  The act of setting up Callee's
+    // stack arg will over-write the stack arg of Caller and if there are
+    // further uses of Caller stack arg we have to make sure that we move
+    // it to a temp before over-writing its slot and use temp in place of
+    // the corresponding Caller stack arg.
+    //
+    // For the above example, conceptually this is what is done
+    //       tmp = e;
+    //       Stack slot of e  = a
+    //       R9 = b, R8 = c, RDx = d
+    //       RCX = tmp
+    //
+    // The below logic is meant to detect cases like this and introduce
+    // temps to set up args correctly for Callee.
+
+    for (int i = 0; i < putargs.Height(); i++)
+    {
+        GenTreePtr putArgStkNode = putargs.Bottom(i);
+
+        assert(putArgStkNode->OperGet() == GT_PUTARG_STK);
+
+        // Get the caller arg num corresponding to this callee arg.
+        // Note that these two args share the same stack slot. Therefore,
+        // if there are further uses of corresponding caller arg, we need
+        // to move it to a temp and use the temp in this call tree.
+        //
+        // Note that Caller is guaranteed to have a param corresponding to
+        // this Callee's arg since fast tail call mechanism counts the
+        // stack slots required for both Caller and Callee for passing params
+        // and allow fast tail call only if stack slots required by Caller >=
+        // Callee.
+        fgArgTabEntryPtr argTabEntry = comp->gtArgEntryByNode(call, putArgStkNode);
+        assert(argTabEntry);
+        unsigned callerArgNum = argTabEntry->argNum - calleeNonStandardArgCount;
+        noway_assert(callerArgNum < comp->info.compArgsCount);
+
+        unsigned   callerArgLclNum = callerArgNum;
+        LclVarDsc* callerArgDsc    = comp->lvaTable + callerArgLclNum;
+        if (callerArgDsc->lvPromoted)
+        {
+            callerArgLclNum =
+                callerArgDsc->lvFieldLclStart; // update the callerArgNum to the promoted struct field's lclNum
+            callerArgDsc = comp->lvaTable + callerArgLclNum;
+        }
+        noway_assert(callerArgDsc->lvIsParam);
+
+        // Start searching in execution order list till we encounter call node
+        unsigned  tmpLclNum = BAD_VAR_NUM;
+        var_types tmpType   = TYP_UNDEF;
+        for (GenTreePtr treeNode = putArgStkNode->gtNext; treeNode != call; treeNode = treeNode->gtNext)
+        {
+            if (treeNode->OperIsLocal() || treeNode->OperIsLocalAddr())
+            {
+                // This should neither be a GT_REG_VAR nor GT_PHI_ARG.
+                assert((treeNode->OperGet() != GT_REG_VAR) && (treeNode->OperGet() != GT_PHI_ARG));
+
+                GenTreeLclVarCommon* lcl    = treeNode->AsLclVarCommon();
+                LclVarDsc*           lclVar = &comp->lvaTable[lcl->gtLclNum];
+
+                // Fast tail calling criteria permits passing of structs of size 1, 2, 4 and 8 as args.
+                // It is possible that the callerArgLclNum corresponds to such a struct whose stack slot
+                // is getting over-written by setting up of a stack arg and there are further uses of
+                // any of its fields if such a struct is type-dependently promoted.  In this case too
+                // we need to introduce a temp.
+                if ((lcl->gtLclNum == callerArgNum) || (lcl->gtLclNum == callerArgLclNum))
+                {
+                    // Create tmp and use it in place of callerArgDsc
+                    if (tmpLclNum == BAD_VAR_NUM)
+                    {
+                        tmpLclNum = comp->lvaGrabTemp(
+                            true DEBUGARG("Fast tail call lowering is creating a new local variable"));
+                        comp->lvaSortAgain                 = true;
+                        tmpType                            = genActualType(callerArgDsc->lvaArgType());
+                        comp->lvaTable[tmpLclNum].lvType   = tmpType;
+                        comp->lvaTable[tmpLclNum].lvRefCnt = 1;
+                    }
+
+                    lcl->SetLclNum(tmpLclNum);
+                    lcl->SetOper(GT_LCL_VAR);
+                }
+            }
+        }
+
+        // If we have created a temp, insert an embedded assignment stmnt before
+        // the first putargStkNode i.e.
+        //     tmpLcl = CallerArg
+        if (tmpLclNum != BAD_VAR_NUM)
+        {
+            assert(tmpType != TYP_UNDEF);
+            GenTreeLclVar* local =
+                new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, tmpType, callerArgLclNum, BAD_IL_OFFSET);
+            GenTree* assignExpr = comp->gtNewTempAssign(tmpLclNum, local);
+            BlockRange().InsertBefore(firstPutArgStk, LIR::SeqTree(comp, assignExpr));
+        }
+    }
+
+    // Insert GT_START_NONGC node before the first GT_PUTARG_STK node.
+    // Note that if there are no args to be setup on stack, no need to
+    // insert GT_START_NONGC node.
+    GenTreePtr startNonGCNode = nullptr;
+    if (firstPutArgStk != nullptr)
+    {
+        startNonGCNode = new (comp, GT_START_NONGC) GenTree(GT_START_NONGC, TYP_VOID);
+        BlockRange().InsertBefore(firstPutArgStk, startNonGCNode);
+
+        // Gc-interruptability in the following case:
+        //     foo(a, b, c, d, e) { bar(a, b, c, d, e); }
+        //     bar(a, b, c, d, e) { foo(a, b, d, d, e); }
+        //
+        // Since the instruction group starting from the instruction that sets up first
+        // stack arg to the end of the tail call is marked as non-gc interruptible,
+        // this will form a non-interruptible tight loop causing gc-starvation. To fix
+        // this we insert GT_NO_OP as embedded stmt before GT_START_NONGC, if the method
+        // has a single basic block and is not a GC-safe point.  The presence of a single
+        // nop outside non-gc interruptible region will prevent gc starvation.
+        if ((comp->fgBBcount == 1) && !(comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT))
+        {
+            assert(comp->fgFirstBB == comp->compCurBB);
+            GenTreePtr noOp = new (comp, GT_NO_OP) GenTree(GT_NO_OP, TYP_VOID);
+            BlockRange().InsertBefore(startNonGCNode, noOp);
+        }
+    }
+
+    // Insert GT_PROF_HOOK node to emit profiler tail call hook. This should be
+    // inserted before the args are setup but after the side effects of args are
+    // computed. That is, GT_PROF_HOOK node needs to be inserted before GT_START_NONGC
+    // node if one exists.
+    if (comp->compIsProfilerHookNeeded())
+    {
+        InsertProfTailCallHook(call, startNonGCNode);
+    }
+
+#else // !FEATURE_FASTTAILCALL
+
+    // Platform choose not to implement fast tail call mechanism.
+    // In such a case we should never be reaching this method as
+    // the expectation is that IsTailCallViaHelper() will always
+    // be true on such a platform.
+    unreached();
+#endif
+}
+
+//------------------------------------------------------------------------
+// LowerTailCallViaHelper: lower a call via the tailcall helper. Morph
+// has already inserted tailcall helper special arguments. This function
+// inserts actual data for some placeholders.
+//
+// For AMD64, lower
+//      tail.call(void* copyRoutine, void* dummyArg, ...)
+// as
+//      Jit_TailCall(void* copyRoutine, void* callTarget, ...)
+//
+// For x86, lower
+//      tail.call(<function args>, int numberOfOldStackArgs, int dummyNumberOfNewStackArgs, int flags, void* dummyArg)
+// as
+//      JIT_TailCall(<function args>, int numberOfOldStackArgsWords, int numberOfNewStackArgsWords, int flags, void*
+//      callTarget)
+// Note that the special arguments are on the stack, whereas the function arguments follow the normal convention.
+//
+// Also inserts PInvoke method epilog if required.
+//
+// Arguments:
+//    call         -  The call node
+//    callTarget   -  The real call target. This is used to replace the dummyArg during lowering.
+//
+// Return Value:
+//    Returns control expression tree for making a call to helper Jit_TailCall.
+//
+GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree* callTarget)
+{
+    // Tail call restrictions i.e. conditions under which tail prefix is ignored.
+    // Most of these checks are already done by importer or fgMorphTailCall().
+    // This serves as a double sanity check.
+    assert((comp->info.compFlags & CORINFO_FLG_SYNCH) == 0); // tail calls from synchronized methods
+    assert(!comp->opts.compNeedSecurityCheck);               // tail call from methods that need security check
+    assert(!call->IsUnmanaged());                            // tail calls to unamanaged methods
+    assert(!comp->compLocallocUsed);                         // tail call from methods that also do localloc
+    assert(!comp->getNeedsGSSecurityCookie());               // jit64 compat: tail calls from methods that need GS check
+
+    // We expect to see a call that meets the following conditions
+    assert(call->IsTailCallViaHelper());
+    assert(callTarget != nullptr);
+
+    // The TailCall helper call never returns to the caller and is not GC interruptible.
+    // Therefore the block containing the tail call should be a GC safe point to avoid
+    // GC starvation.
+    assert(comp->compCurBB->bbFlags & BBF_GC_SAFE_POINT);
+
+    // If PInvokes are in-lined, we have to remember to execute PInvoke method epilog anywhere that
+    // a method returns.  This is a case of caller method has both PInvokes and tail calls.
+    if (comp->info.compCallUnmanaged)
+    {
+        InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(call));
+    }
+
+    // Remove gtCallAddr from execution order if present.
+    if (call->gtCallType == CT_INDIRECT)
+    {
+        assert(call->gtCallAddr != nullptr);
+
+        bool               isClosed;
+        LIR::ReadOnlyRange callAddrRange = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed);
+        assert(isClosed);
+
+        BlockRange().Remove(std::move(callAddrRange));
+    }
+
+    // The callTarget tree needs to be sequenced.
+    LIR::Range callTargetRange = LIR::SeqTree(comp, callTarget);
+
+    fgArgTabEntry* argEntry;
+
+#if defined(_TARGET_AMD64_)
+
+// For AMD64, first argument is CopyRoutine and second argument is a place holder node.
+
+#ifdef DEBUG
+    argEntry = comp->gtArgEntryByArgNum(call, 0);
+    assert(argEntry != nullptr);
+    assert(argEntry->node->gtOper == GT_PUTARG_REG);
+    GenTree* firstArg = argEntry->node->gtOp.gtOp1;
+    assert(firstArg->gtOper == GT_CNS_INT);
+#endif
+
+    // Replace second arg by callTarget.
+    argEntry = comp->gtArgEntryByArgNum(call, 1);
+    assert(argEntry != nullptr);
+    assert(argEntry->node->gtOper == GT_PUTARG_REG);
+    GenTree* secondArg = argEntry->node->gtOp.gtOp1;
+
+    BlockRange().InsertAfter(secondArg, std::move(callTargetRange));
+
+    bool               isClosed;
+    LIR::ReadOnlyRange secondArgRange = BlockRange().GetTreeRange(secondArg, &isClosed);
+    assert(isClosed);
+
+    BlockRange().Remove(std::move(secondArgRange));
+
+    argEntry->node->gtOp.gtOp1 = callTarget;
+
+#elif defined(_TARGET_X86_)
+
+    // Verify the special args are what we expect, and replace the dummy args with real values.
+    // We need to figure out the size of the outgoing stack arguments, not including the special args.
+    // The number of 4-byte words is passed to the helper for the incoming and outgoing argument sizes.
+    // This number is exactly the next slot number in the call's argument info struct.
+    unsigned nNewStkArgsWords = call->fgArgInfo->GetNextSlotNum();
+    assert(nNewStkArgsWords >= 4); // There must be at least the four special stack args.
+    nNewStkArgsWords -= 4;
+
+    unsigned numArgs = call->fgArgInfo->ArgCount();
+
+    // arg 0 == callTarget.
+    argEntry = comp->gtArgEntryByArgNum(call, numArgs - 1);
+    assert(argEntry != nullptr);
+    assert(argEntry->node->gtOper == GT_PUTARG_STK);
+    GenTree* arg0 = argEntry->node->gtOp.gtOp1;
+
+    BlockRange().InsertAfter(arg0, std::move(callTargetRange));
+
+    bool               isClosed;
+    LIR::ReadOnlyRange secondArgRange = BlockRange().GetTreeRange(arg0, &isClosed);
+    assert(isClosed);
+
+    argEntry->node->gtOp.gtOp1 = callTarget;
+
+    // arg 1 == flags
+    argEntry = comp->gtArgEntryByArgNum(call, numArgs - 2);
+    assert(argEntry != nullptr);
+    assert(argEntry->node->gtOper == GT_PUTARG_STK);
+    GenTree* arg1 = argEntry->node->gtOp.gtOp1;
+    assert(arg1->gtOper == GT_CNS_INT);
+
+    ssize_t tailCallHelperFlags = 1 |                                  // always restore EDI,ESI,EBX
+                                  (call->IsVirtualStub() ? 0x2 : 0x0); // Stub dispatch flag
+    arg1->gtIntCon.gtIconVal = tailCallHelperFlags;
+
+    // arg 2 == numberOfNewStackArgsWords
+    argEntry = comp->gtArgEntryByArgNum(call, numArgs - 3);
+    assert(argEntry != nullptr);
+    assert(argEntry->node->gtOper == GT_PUTARG_STK);
+    GenTree* arg2 = argEntry->node->gtOp.gtOp1;
+    assert(arg2->gtOper == GT_CNS_INT);
+
+    arg2->gtIntCon.gtIconVal = nNewStkArgsWords;
+
+#ifdef DEBUG
+    // arg 3 == numberOfOldStackArgsWords
+    argEntry = comp->gtArgEntryByArgNum(call, numArgs - 4);
+    assert(argEntry != nullptr);
+    assert(argEntry->node->gtOper == GT_PUTARG_STK);
+    GenTree* arg3 = argEntry->node->gtOp.gtOp1;
+    assert(arg3->gtOper == GT_CNS_INT);
+#endif // DEBUG
+
+#else
+    NYI("LowerTailCallViaHelper");
+#endif // _TARGET_*
+
+    // Transform this call node into a call to Jit tail call helper.
+    call->gtCallType    = CT_HELPER;
+    call->gtCallMethHnd = comp->eeFindHelper(CORINFO_HELP_TAILCALL);
+    call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK;
+
+    // Lower this as if it were a pure helper call.
+    call->gtCallMoreFlags &= ~(GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER);
+    GenTree* result = LowerDirectCall(call);
+
+    // Now add back tail call flags for identifying this node as tail call dispatched via helper.
+    call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER;
+
+    // Insert profiler tail call hook if needed.
+    // Since we don't know the insertion point, pass null for second param.
+    if (comp->compIsProfilerHookNeeded())
+    {
+        InsertProfTailCallHook(call, nullptr);
+    }
+
+    return result;
+}
+
+// Lower "jmp <method>" tail call to insert PInvoke method epilog if required.
+void Lowering::LowerJmpMethod(GenTree* jmp)
+{
+    assert(jmp->OperGet() == GT_JMP);
+
+    JITDUMP("lowering GT_JMP\n");
+    DISPNODE(jmp);
+    JITDUMP("============");
+
+    // If PInvokes are in-lined, we have to remember to execute PInvoke method epilog anywhere that
+    // a method returns.
+    if (comp->info.compCallUnmanaged)
+    {
+        InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(jmp));
+    }
+}
+
+// Lower GT_RETURN node to insert PInvoke method epilog if required.
+void Lowering::LowerRet(GenTree* ret)
+{
+    assert(ret->OperGet() == GT_RETURN);
+
+    JITDUMP("lowering GT_RETURN\n");
+    DISPNODE(ret);
+    JITDUMP("============");
+
+    // Method doing PInvokes has exactly one return block unless it has tail calls.
+    if (comp->info.compCallUnmanaged && (comp->compCurBB == comp->genReturnBB))
+    {
+        InsertPInvokeMethodEpilog(comp->compCurBB DEBUGARG(ret));
+    }
+}
+
+GenTree* Lowering::LowerDirectCall(GenTreeCall* call)
+{
+    noway_assert(call->gtCallType == CT_USER_FUNC || call->gtCallType == CT_HELPER);
+
+    // Don't support tail calling helper methods.
+    // But we might encounter tail calls dispatched via JIT helper appear as a tail call to helper.
+    noway_assert(!call->IsTailCall() || call->IsTailCallViaHelper() || call->gtCallType == CT_USER_FUNC);
+
+    // Non-virtual direct/indirect calls: Work out if the address of the
+    // call is known at JIT time.  If not it is either an indirect call
+    // or the address must be accessed via an single/double indirection.
+
+    void*           addr;
+    InfoAccessType  accessType;
+    CorInfoHelpFunc helperNum = comp->eeGetHelperNum(call->gtCallMethHnd);
+
+#ifdef FEATURE_READYTORUN_COMPILER
+    if (call->gtEntryPoint.addr != nullptr)
+    {
+        accessType = call->gtEntryPoint.accessType;
+        addr       = call->gtEntryPoint.addr;
+    }
+    else
+#endif
+        if (call->gtCallType == CT_HELPER)
+    {
+        noway_assert(helperNum != CORINFO_HELP_UNDEF);
+
+        // the convention on getHelperFtn seems to be (it's not documented)
+        // that it returns an address or if it returns null, pAddr is set to
+        // another address, which requires an indirection
+        void* pAddr;
+        addr = comp->info.compCompHnd->getHelperFtn(helperNum, (void**)&pAddr);
+
+        if (addr != nullptr)
+        {
+            accessType = IAT_VALUE;
+        }
+        else
+        {
+            accessType = IAT_PVALUE;
+            addr       = pAddr;
+        }
+    }
+    else
+    {
+        noway_assert(helperNum == CORINFO_HELP_UNDEF);
+
+        CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY;
+
+        if (call->IsSameThis())
+        {
+            aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS);
+        }
+
+        if (!call->NeedsNullCheck())
+        {
+            aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL);
+        }
+
+        CORINFO_CONST_LOOKUP addrInfo;
+        comp->info.compCompHnd->getFunctionEntryPoint(call->gtCallMethHnd, &addrInfo, aflags);
+
+        accessType = addrInfo.accessType;
+        addr       = addrInfo.addr;
+    }
+
+    GenTree* result = nullptr;
+    switch (accessType)
+    {
+        case IAT_VALUE:
+            // Non-virtual direct call to known address
+            if (!IsCallTargetInRange(addr) || call->IsTailCall())
+            {
+                result = AddrGen(addr);
+            }
+            else
+            {
+                // a direct call within range of hardware relative call instruction
+                // stash the address for codegen
+                call->gtDirectCallAddress = addr;
+            }
+            break;
+
+        case IAT_PVALUE:
+        {
+            // Non-virtual direct calls to addresses accessed by
+            // a single indirection.
+            GenTree* cellAddr = AddrGen(addr);
+            GenTree* indir    = Ind(cellAddr);
+
+#ifdef FEATURE_READYTORUN_COMPILER
+#ifdef _TARGET_ARM64_
+            // For arm64, we dispatch code same as VSD using X11 for indirection cell address,
+            // which ZapIndirectHelperThunk expects.
+            if (call->IsR2RRelativeIndir())
+            {
+                cellAddr->gtRegNum = REG_R2R_INDIRECT_PARAM;
+                indir->gtRegNum    = REG_JUMP_THUNK_PARAM;
+            }
+#endif
+#endif
+            result = indir;
+            break;
+        }
+
+        case IAT_PPVALUE:
+            // Non-virtual direct calls to addresses accessed by
+            // a double indirection.
+            //
+            // Double-indirection. Load the address into a register
+            // and call indirectly through the register
+            noway_assert(helperNum == CORINFO_HELP_UNDEF);
+            result = AddrGen(addr);
+            result = Ind(Ind(result));
+            break;
+
+        default:
+            noway_assert(!"Bad accessType");
+            break;
+    }
+
+    return result;
+}
+
+GenTree* Lowering::LowerDelegateInvoke(GenTreeCall* call)
+{
+    noway_assert(call->gtCallType == CT_USER_FUNC);
+
+    assert((comp->info.compCompHnd->getMethodAttribs(call->gtCallMethHnd) &
+            (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL)) == (CORINFO_FLG_DELEGATE_INVOKE | CORINFO_FLG_FINAL));
+
+    GenTree* thisArgNode;
+    if (call->IsTailCallViaHelper())
+    {
+#ifdef _TARGET_X86_ // x86 tailcall via helper follows normal calling convention, but with extra stack args.
+        const unsigned argNum = 0;
+#else  // !_TARGET_X86_
+        // In case of helper dispatched tail calls, "thisptr" will be the third arg.
+        // The first two args are: real call target and addr of args copy routine.
+        const unsigned argNum  = 2;
+#endif // !_TARGET_X86_
+
+        fgArgTabEntryPtr thisArgTabEntry = comp->gtArgEntryByArgNum(call, argNum);
+        thisArgNode                      = thisArgTabEntry->node;
+    }
+    else
+    {
+        thisArgNode = comp->gtGetThisArg(call);
+    }
+
+    assert(thisArgNode->gtOper == GT_PUTARG_REG);
+    GenTree* originalThisExpr = thisArgNode->gtOp.gtOp1;
+
+    // We're going to use the 'this' expression multiple times, so make a local to copy it.
+
+    unsigned lclNum;
+
+#ifdef _TARGET_X86_
+    if (call->IsTailCallViaHelper() && originalThisExpr->IsLocal())
+    {
+        // For ordering purposes for the special tailcall arguments on x86, we forced the
+        // 'this' pointer in this case to a local in Compiler::fgMorphTailCall().
+        // We could possibly use this case to remove copies for all architectures and non-tailcall
+        // calls by creating a new lcl var or lcl field reference, as is done in the
+        // LowerVirtualVtableCall() code.
+        assert(originalThisExpr->OperGet() == GT_LCL_VAR);
+        lclNum = originalThisExpr->AsLclVarCommon()->GetLclNum();
+    }
+    else
+#endif // _TARGET_X86_
+    {
+        unsigned delegateInvokeTmp = comp->lvaGrabTemp(true DEBUGARG("delegate invoke call"));
+
+        LIR::Use thisExprUse(BlockRange(), &thisArgNode->gtOp.gtOp1, thisArgNode);
+        thisExprUse.ReplaceWithLclVar(comp, m_block->getBBWeight(comp), delegateInvokeTmp);
+
+        originalThisExpr = thisExprUse.Def(); // it's changed; reload it.
+        lclNum           = delegateInvokeTmp;
+    }
+
+    // replace original expression feeding into thisPtr with
+    // [originalThis + offsetOfDelegateInstance]
+
+    GenTree* newThisAddr = new (comp, GT_LEA)
+        GenTreeAddrMode(TYP_REF, originalThisExpr, nullptr, 0, comp->eeGetEEInfo()->offsetOfDelegateInstance);
+
+    GenTree* newThis = comp->gtNewOperNode(GT_IND, TYP_REF, newThisAddr);
+
+    BlockRange().InsertAfter(originalThisExpr, newThisAddr, newThis);
+
+    thisArgNode->gtOp.gtOp1 = newThis;
+
+    // the control target is
+    // [originalThis + firstTgtOffs]
+
+    GenTree* base = new (comp, GT_LCL_VAR) GenTreeLclVar(originalThisExpr->TypeGet(), lclNum, BAD_IL_OFFSET);
+
+    unsigned targetOffs = comp->eeGetEEInfo()->offsetOfDelegateFirstTarget;
+    GenTree* result     = new (comp, GT_LEA) GenTreeAddrMode(TYP_REF, base, nullptr, 0, targetOffs);
+    GenTree* callTarget = Ind(result);
+
+    // don't need to sequence and insert this tree, caller will do it
+
+    return callTarget;
+}
+
+GenTree* Lowering::LowerIndirectNonvirtCall(GenTreeCall* call)
+{
+#ifdef _TARGET_X86_
+    if (call->gtCallCookie != nullptr)
+    {
+        NYI_X86("Morphing indirect non-virtual call with non-standard args");
+    }
+#endif
+
+    // Indirect cookie calls gets transformed by fgMorphArgs as indirect call with non-standard args.
+    // Hence we should never see this type of call in lower.
+
+    noway_assert(call->gtCallCookie == nullptr);
+
+    return nullptr;
+}
+
+//------------------------------------------------------------------------
+// CreateReturnTrapSeq: Create a tree to perform a "return trap", used in PInvoke
+// epilogs to invoke a GC under a condition. The return trap checks some global
+// location (the runtime tells us where that is and how many indirections to make),
+// then, based on the result, conditionally calls a GC helper. We use a special node
+// for this because at this time (late in the compilation phases), introducing flow
+// is tedious/difficult.
+//
+// This is used for PInvoke inlining.
+//
+// Return Value:
+//    Code tree to perform the action.
+//
+GenTree* Lowering::CreateReturnTrapSeq()
+{
+    // The GT_RETURNTRAP node expands to this:
+    //    if (g_TrapReturningThreads)
+    //    {
+    //       RareDisablePreemptiveGC();
+    //    }
+
+    // The only thing to do here is build up the expression that evaluates 'g_TrapReturningThreads'.
+
+    void* pAddrOfCaptureThreadGlobal = nullptr;
+    LONG* addrOfCaptureThreadGlobal = comp->info.compCompHnd->getAddrOfCaptureThreadGlobal(&pAddrOfCaptureThreadGlobal);
+
+    GenTree* testTree;
+    if (addrOfCaptureThreadGlobal != nullptr)
+    {
+        testTree = Ind(AddrGen(addrOfCaptureThreadGlobal));
+    }
+    else
+    {
+        testTree = Ind(Ind(AddrGen(pAddrOfCaptureThreadGlobal)));
+    }
+    return comp->gtNewOperNode(GT_RETURNTRAP, TYP_INT, testTree);
+}
+
+//------------------------------------------------------------------------
+// SetGCState: Create a tree that stores the given constant (0 or 1) into the
+// thread's GC state field.
+//
+// This is used for PInvoke inlining.
+//
+// Arguments:
+//    state - constant (0 or 1) to store into the thread's GC state field.
+//
+// Return Value:
+//    Code tree to perform the action.
+//
+GenTree* Lowering::SetGCState(int state)
+{
+    // Thread.offsetOfGcState = 0/1
+
+    assert(state == 0 || state == 1);
+
+    const CORINFO_EE_INFO* pInfo = comp->eeGetEEInfo();
+
+    GenTree* base = new (comp, GT_LCL_VAR) GenTreeLclVar(TYP_I_IMPL, comp->info.compLvFrameListRoot, -1);
+
+    GenTree* storeGcState = new (comp, GT_STOREIND)
+        GenTreeStoreInd(TYP_BYTE,
+                        new (comp, GT_LEA) GenTreeAddrMode(TYP_I_IMPL, base, nullptr, 1, pInfo->offsetOfGCState),
+                        new (comp, GT_CNS_INT) GenTreeIntCon(TYP_BYTE, state));
+
+    return storeGcState;
+}
+
+//------------------------------------------------------------------------
+// CreateFrameLinkUpdate: Create a tree that either links or unlinks the
+// locally-allocated InlinedCallFrame from the Frame list.
+//
+// This is used for PInvoke inlining.
+//
+// Arguments:
+//    action - whether to link (push) or unlink (pop) the Frame
+//
+// Return Value:
+//    Code tree to perform the action.
+//
+GenTree* Lowering::CreateFrameLinkUpdate(FrameLinkAction action)
+{
+    const CORINFO_EE_INFO*                       pInfo         = comp->eeGetEEInfo();
+    const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = pInfo->inlinedCallFrameInfo;
+
+    GenTree* TCB = new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, TYP_I_IMPL, comp->info.compLvFrameListRoot,
+                                                        (IL_OFFSET)-1); // cast to resolve ambiguity.
+
+    // Thread->m_pFrame
+    GenTree* addr = new (comp, GT_LEA) GenTreeAddrMode(TYP_I_IMPL, TCB, nullptr, 1, pInfo->offsetOfThreadFrame);
+
+    GenTree* data = nullptr;
+
+    if (action == PushFrame)
+    {
+        // Thread->m_pFrame = &inlinedCallFrame;
+        data = new (comp, GT_LCL_FLD_ADDR)
+            GenTreeLclFld(GT_LCL_FLD_ADDR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfFrameVptr);
+    }
+    else
+    {
+        assert(action == PopFrame);
+        // Thread->m_pFrame = inlinedCallFrame.m_pNext;
+
+        data = new (comp, GT_LCL_FLD) GenTreeLclFld(GT_LCL_FLD, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar,
+                                                    pInfo->inlinedCallFrameInfo.offsetOfFrameLink);
+    }
+    GenTree* storeInd = new (comp, GT_STOREIND) GenTreeStoreInd(TYP_I_IMPL, addr, data);
+    return storeInd;
+}
+
+//------------------------------------------------------------------------
+// InsertPInvokeMethodProlog: Create the code that runs at the start of
+// every method that has PInvoke calls.
+//
+// Initialize the TCB local and the InlinedCallFrame object. Then link ("push")
+// the InlinedCallFrame object on the Frame chain. The layout of InlinedCallFrame
+// is defined in vm/frames.h. See also vm/jitinterface.cpp for more information.
+// The offsets of these fields is returned by the VM in a call to ICorStaticInfo::getEEInfo().
+//
+// The (current) layout is as follows:
+//
+//  64-bit  32-bit                                    CORINFO_EE_INFO
+//  offset  offset  field name                        offset                  when set
+//  -----------------------------------------------------------------------------------------
+//  +00h    +00h    GS cookie                         offsetOfGSCookie
+//  +08h    +04h    vptr for class InlinedCallFrame   offsetOfFrameVptr       method prolog
+//  +10h    +08h    m_Next                            offsetOfFrameLink       method prolog
+//  +18h    +0Ch    m_Datum                           offsetOfCallTarget      call site
+//  +20h    n/a     m_StubSecretArg                                           not set by JIT
+//  +28h    +10h    m_pCallSiteSP                     offsetOfCallSiteSP      x86: call site, and zeroed in method
+//                                                                              prolog;
+//                                                                            non-x86: method prolog (SP remains
+//                                                                              constant in function, after prolog: no
+//                                                                              localloc and PInvoke in same function)
+//  +30h    +14h    m_pCallerReturnAddress            offsetOfReturnAddress   call site
+//  +38h    +18h    m_pCalleeSavedFP                  offsetOfCalleeSavedFP   not set by JIT
+//          +1Ch    JIT retval spill area (int)                               before call_gc    ???
+//          +20h    JIT retval spill area (long)                              before call_gc    ???
+//          +24h    Saved value of EBP                                        method prolog     ???
+//
+// Note that in the VM, InlinedCallFrame is a C++ class whose objects have a 'this' pointer that points
+// to the InlinedCallFrame vptr (the 2nd field listed above), and the GS cookie is stored *before*
+// the object. When we link the InlinedCallFrame onto the Frame chain, we must point at this location,
+// and not at the beginning of the InlinedCallFrame local, which is actually the GS cookie.
+//
+// Return Value:
+//    none
+//
+void Lowering::InsertPInvokeMethodProlog()
+{
+    noway_assert(comp->info.compCallUnmanaged);
+    noway_assert(comp->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
+
+    if (comp->opts.ShouldUsePInvokeHelpers())
+    {
+        return;
+    }
+
+    JITDUMP("======= Inserting PInvoke method prolog\n");
+
+    LIR::Range& firstBlockRange = LIR::AsRange(comp->fgFirstBB);
+
+    const CORINFO_EE_INFO*                       pInfo         = comp->eeGetEEInfo();
+    const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = pInfo->inlinedCallFrameInfo;
+
+    // First arg:  &compiler->lvaInlinedPInvokeFrameVar + callFrameInfo.offsetOfFrameVptr
+
+    GenTree* frameAddr = new (comp, GT_LCL_FLD_ADDR)
+        GenTreeLclFld(GT_LCL_FLD_ADDR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfFrameVptr);
+
+    // Call runtime helper to fill in our InlinedCallFrame and push it on the Frame list:
+    //     TCB = CORINFO_HELP_INIT_PINVOKE_FRAME(&symFrameStart, secretArg);
+    // for x86, don't pass the secretArg.
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_X86_
+    GenTreeArgList* argList = comp->gtNewArgList(frameAddr);
+#else  // !_TARGET_X86_
+    GenTreeArgList*    argList = comp->gtNewArgList(frameAddr, PhysReg(REG_SECRET_STUB_PARAM));
+#endif // !_TARGET_X86_
+
+    GenTree* call = comp->gtNewHelperCallNode(CORINFO_HELP_INIT_PINVOKE_FRAME, TYP_I_IMPL, 0, argList);
+
+    // some sanity checks on the frame list root vardsc
+    LclVarDsc* varDsc = &comp->lvaTable[comp->info.compLvFrameListRoot];
+    noway_assert(!varDsc->lvIsParam);
+    noway_assert(varDsc->lvType == TYP_I_IMPL);
+
+    GenTree* store =
+        new (comp, GT_STORE_LCL_VAR) GenTreeLclVar(GT_STORE_LCL_VAR, TYP_I_IMPL, comp->info.compLvFrameListRoot,
+                                                   (IL_OFFSET)-1); // cast to resolve ambiguity.
+    store->gtOp.gtOp1 = call;
+    store->gtFlags |= GTF_VAR_DEF;
+
+    GenTree* insertionPoint = firstBlockRange.FirstNonPhiOrCatchArgNode();
+
+    comp->fgMorphTree(store);
+    firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, store));
+    DISPTREERANGE(firstBlockRange, store);
+
+#ifndef _TARGET_X86_ // For x86, this step is done at the call site (due to stack pointer not being static in the
+                     // function).
+
+    // --------------------------------------------------------
+    // InlinedCallFrame.m_pCallSiteSP = @RSP;
+
+    GenTreeLclFld* storeSP = new (comp, GT_STORE_LCL_FLD)
+        GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfCallSiteSP);
+    storeSP->gtOp1 = PhysReg(REG_SPBASE);
+
+    firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeSP));
+    DISPTREERANGE(firstBlockRange, storeSP);
+
+#endif // !_TARGET_X86_
+
+    // --------------------------------------------------------
+    // InlinedCallFrame.m_pCalleeSavedEBP = @RBP;
+
+    GenTreeLclFld* storeFP =
+        new (comp, GT_STORE_LCL_FLD) GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar,
+                                                   callFrameInfo.offsetOfCalleeSavedFP);
+    storeFP->gtOp1 = PhysReg(REG_FPBASE);
+
+    firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeFP));
+    DISPTREERANGE(firstBlockRange, storeFP);
+
+    // --------------------------------------------------------
+
+    if (comp->opts.eeFlags & CORJIT_FLG_IL_STUB)
+    {
+        // Push a frame - if we are NOT in an IL stub, this is done right before the call
+        // The init routine sets InlinedCallFrame's m_pNext, so we just set the thead's top-of-stack
+        GenTree* frameUpd = CreateFrameLinkUpdate(PushFrame);
+        firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd));
+        DISPTREERANGE(firstBlockRange, frameUpd);
+    }
+}
+
+//------------------------------------------------------------------------
+// InsertPInvokeMethodEpilog: Code that needs to be run when exiting any method
+// that has PInvoke inlines. This needs to be inserted any place you can exit the
+// function: returns, tailcalls and jmps.
+//
+// Arguments:
+//    returnBB   -  basic block from which a method can return
+//    lastExpr   -  GenTree of the last top level stmnt of returnBB (debug only arg)
+//
+// Return Value:
+//    Code tree to perform the action.
+//
+void Lowering::InsertPInvokeMethodEpilog(BasicBlock* returnBB DEBUGARG(GenTreePtr lastExpr))
+{
+    assert(returnBB != nullptr);
+    assert(comp->info.compCallUnmanaged);
+
+    if (comp->opts.ShouldUsePInvokeHelpers())
+    {
+        return;
+    }
+
+    JITDUMP("======= Inserting PInvoke method epilog\n");
+
+    // Method doing PInvoke calls has exactly one return block unless it has "jmp" or tail calls.
+    assert(((returnBB == comp->genReturnBB) && (returnBB->bbJumpKind == BBJ_RETURN)) ||
+           returnBB->endsWithTailCallOrJmp(comp));
+
+    LIR::Range& returnBlockRange = LIR::AsRange(returnBB);
+
+    GenTree* insertionPoint = returnBlockRange.LastNode();
+    assert(insertionPoint == lastExpr);
+
+    // Note: PInvoke Method Epilog (PME) needs to be inserted just before GT_RETURN, GT_JMP or GT_CALL node in execution
+    // order so that it is guaranteed that there will be no further PInvokes after that point in the method.
+    //
+    // Example1: GT_RETURN(op1) - say execution order is: Op1, GT_RETURN.  After inserting PME, execution order would be
+    //           Op1, PME, GT_RETURN
+    //
+    // Example2: GT_CALL(arg side effect computing nodes, Stk Args Setup, Reg Args setup). The execution order would be
+    //           arg side effect computing nodes, Stk Args setup, Reg Args setup, GT_CALL
+    //           After inserting PME execution order would be:
+    //           arg side effect computing nodes, Stk Args setup, Reg Args setup, PME, GT_CALL
+    //
+    // Example3: GT_JMP.  After inserting PME execution order would be: PME, GT_JMP
+    //           That is after PME, args for GT_JMP call will be setup.
+
+    // TODO-Cleanup: setting GCState to 1 seems to be redundant as InsertPInvokeCallProlog will set it to zero before a
+    // PInvoke call and InsertPInvokeCallEpilog() will set it back to 1 after the PInvoke.  Though this is redundant,
+    // it is harmeless.
+    // Note that liveness is artificially extending the life of compLvFrameListRoot var if the method being compiled has
+    // PInvokes.  Deleting the below stmnt would cause an an assert in lsra.cpp::SetLastUses() since compLvFrameListRoot
+    // will be live-in to a BBJ_RETURN block without any uses.  Long term we need to fix liveness for x64 case to
+    // properly extend the life of compLvFrameListRoot var.
+    //
+    // Thread.offsetOfGcState = 0/1
+    // That is [tcb + offsetOfGcState] = 1
+    GenTree* storeGCState = SetGCState(1);
+    returnBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, storeGCState));
+
+    if (comp->opts.eeFlags & CORJIT_FLG_IL_STUB)
+    {
+        // Pop the frame, in non-stubs we do this around each PInvoke call
+        GenTree* frameUpd = CreateFrameLinkUpdate(PopFrame);
+        returnBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd));
+    }
+}
+
+//------------------------------------------------------------------------
+// InsertPInvokeCallProlog: Emit the call-site prolog for direct calls to unmanaged code.
+// It does all the necessary call-site setup of the InlinedCallFrame.
+//
+// Arguments:
+//    call - the call for which we are inserting the PInvoke prolog.
+//
+// Return Value:
+//    None.
+//
+void Lowering::InsertPInvokeCallProlog(GenTreeCall* call)
+{
+    JITDUMP("======= Inserting PInvoke call prolog\n");
+
+    GenTree* insertBefore = call;
+    if (call->gtCallType == CT_INDIRECT)
+    {
+        bool isClosed;
+        insertBefore = BlockRange().GetTreeRange(call->gtCallAddr, &isClosed).FirstNode();
+        assert(isClosed);
+    }
+
+    const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = comp->eeGetEEInfo()->inlinedCallFrameInfo;
+
+    gtCallTypes callType = (gtCallTypes)call->gtCallType;
+
+    noway_assert(comp->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
+
+#if COR_JIT_EE_VERSION > 460
+    if (comp->opts.ShouldUsePInvokeHelpers())
+    {
+        // First argument is the address of the frame variable.
+        GenTree* frameAddr = new (comp, GT_LCL_VAR_ADDR)
+            GenTreeLclVar(GT_LCL_VAR_ADDR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, BAD_IL_OFFSET);
+
+        // Insert call to CORINFO_HELP_JIT_PINVOKE_BEGIN
+        GenTree* helperCall =
+            comp->gtNewHelperCallNode(CORINFO_HELP_JIT_PINVOKE_BEGIN, TYP_VOID, 0, comp->gtNewArgList(frameAddr));
+
+        comp->fgMorphTree(helperCall);
+        BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, helperCall));
+        return;
+    }
+#endif
+
+    // Emit the following sequence:
+    //
+    // InlinedCallFrame.callTarget = methodHandle   // stored in m_Datum
+    // InlinedCallFrame.m_pCallSiteSP = SP          // x86 only
+    // InlinedCallFrame.m_pCallerReturnAddress = return address
+    // Thread.gcState = 0
+    // (non-stub) - update top Frame on TCB
+
+    // ----------------------------------------------------------------------------------
+    // Setup InlinedCallFrame.callSiteTarget (which is how the JIT refers to it).
+    // The actual field is InlinedCallFrame.m_Datum which has many different uses and meanings.
+
+    GenTree* src = nullptr;
+
+    if (callType == CT_INDIRECT)
+    {
+        if (comp->info.compPublishStubParam)
+        {
+            src = new (comp, GT_LCL_VAR) GenTreeLclVar(TYP_I_IMPL, comp->lvaStubArgumentVar, BAD_IL_OFFSET);
+        }
+        // else { If we don't have secret parameter, m_Datum will be initialized by VM code }
+    }
+    else
+    {
+        assert(callType == CT_USER_FUNC);
+
+        void*                 pEmbedMethodHandle = nullptr;
+        CORINFO_METHOD_HANDLE embedMethodHandle =
+            comp->info.compCompHnd->embedMethodHandle(call->gtCallMethHnd, &pEmbedMethodHandle);
+
+        noway_assert((!embedMethodHandle) != (!pEmbedMethodHandle));
+
+        if (embedMethodHandle != nullptr)
+        {
+            // InlinedCallFrame.callSiteTarget = methodHandle
+            src = AddrGen(embedMethodHandle);
+        }
+        else
+        {
+            // InlinedCallFrame.callSiteTarget = *pEmbedMethodHandle
+            src = Ind(AddrGen(pEmbedMethodHandle));
+        }
+    }
+
+    if (src != nullptr)
+    {
+        // Store into InlinedCallFrame.m_Datum, the offset of which is given by offsetOfCallTarget.
+        GenTreeLclFld* store =
+            new (comp, GT_STORE_LCL_FLD) GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar,
+                                                       callFrameInfo.offsetOfCallTarget);
+        store->gtOp1 = src;
+
+        BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, store));
+    }
+
+#ifdef _TARGET_X86_
+
+    // ----------------------------------------------------------------------------------
+    // InlinedCallFrame.m_pCallSiteSP = SP
+
+    GenTreeLclFld* storeCallSiteSP = new (comp, GT_STORE_LCL_FLD)
+        GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar, callFrameInfo.offsetOfCallSiteSP);
+
+    storeCallSiteSP->gtOp1 = PhysReg(REG_SPBASE);
+
+    BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, storeCallSiteSP));
+
+#endif
+
+    // ----------------------------------------------------------------------------------
+    // InlinedCallFrame.m_pCallerReturnAddress = &label (the address of the instruction immediately following the call)
+
+    GenTreeLclFld* storeLab =
+        new (comp, GT_STORE_LCL_FLD) GenTreeLclFld(GT_STORE_LCL_FLD, TYP_I_IMPL, comp->lvaInlinedPInvokeFrameVar,
+                                                   callFrameInfo.offsetOfReturnAddress);
+
+    // We don't have a real label, and inserting one is hard (even if we made a special node),
+    // so for now we will just 'know' what this means in codegen.
+    GenTreeLabel* labelRef = new (comp, GT_LABEL) GenTreeLabel(nullptr);
+    labelRef->gtType       = TYP_I_IMPL;
+    storeLab->gtOp1        = labelRef;
+
+    BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, storeLab));
+
+    if (!(comp->opts.eeFlags & CORJIT_FLG_IL_STUB))
+    {
+        // Set the TCB's frame to be the one we just created.
+        // Note the init routine for the InlinedCallFrame (CORINFO_HELP_INIT_PINVOKE_FRAME)
+        // has prepended it to the linked list to maintain the stack of Frames.
+        //
+        // Stubs do this once per stub, not once per call.
+        GenTree* frameUpd = CreateFrameLinkUpdate(PushFrame);
+        BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, frameUpd));
+    }
+
+    // IMPORTANT **** This instruction must come last!!! ****
+    // It changes the thread's state to Preemptive mode
+    // ----------------------------------------------------------------------------------
+    //  [tcb + offsetOfGcState] = 0
+
+    GenTree* storeGCState = SetGCState(0);
+    BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, storeGCState));
+}
+
+//------------------------------------------------------------------------
+// InsertPInvokeCallEpilog: Insert the code that goes after every inlined pinvoke call.
+//
+// Arguments:
+//    call - the call for which we are inserting the PInvoke epilog.
+//
+// Return Value:
+//    None.
+//
+void Lowering::InsertPInvokeCallEpilog(GenTreeCall* call)
+{
+    JITDUMP("======= Inserting PInvoke call epilog\n");
+
+#if COR_JIT_EE_VERSION > 460
+    if (comp->opts.ShouldUsePInvokeHelpers())
+    {
+        noway_assert(comp->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM);
+
+        // First argument is the address of the frame variable.
+        GenTree* frameAddr =
+            new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, TYP_BYREF, comp->lvaInlinedPInvokeFrameVar, BAD_IL_OFFSET);
+        frameAddr->gtOper = GT_LCL_VAR_ADDR;
+
+        // Insert call to CORINFO_HELP_JIT_PINVOKE_END
+        GenTree* helperCall =
+            comp->gtNewHelperCallNode(CORINFO_HELP_JIT_PINVOKE_END, TYP_VOID, 0, comp->gtNewArgList(frameAddr));
+
+        comp->fgMorphTree(helperCall);
+        BlockRange().InsertAfter(call, LIR::SeqTree(comp, helperCall));
+        return;
+    }
+#endif
+
+    // gcstate = 1
+    GenTree* insertionPoint = call->gtNext;
+
+    GenTree* tree = SetGCState(1);
+    BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree));
+
+    tree = CreateReturnTrapSeq();
+    BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree));
+
+    // Pop the frame if necessasry
+    if (!(comp->opts.eeFlags & CORJIT_FLG_IL_STUB))
+    {
+        tree = CreateFrameLinkUpdate(PopFrame);
+        BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree));
+    }
+}
+
+//------------------------------------------------------------------------
+// LowerNonvirtPinvokeCall: Lower a non-virtual / indirect PInvoke call
+//
+// Arguments:
+//    call - The call to lower.
+//
+// Return Value:
+//    The lowered call tree.
+//
+GenTree* Lowering::LowerNonvirtPinvokeCall(GenTreeCall* call)
+{
+    // PInvoke lowering varies depending on the flags passed in by the EE. By default,
+    // GC transitions are generated inline; if CORJIT_FLG2_USE_PINVOKE_HELPERS is specified,
+    // GC transitions are instead performed using helper calls. Examples of each case are given
+    // below. Note that the data structure that is used to store information about a call frame
+    // containing any P/Invoke calls is initialized in the method prolog (see
+    // InsertPInvokeMethod{Prolog,Epilog} for details).
+    //
+    // Inline transitions:
+    //     InlinedCallFrame inlinedCallFrame;
+    //
+    //     ...
+    //
+    //     // Set up frame information
+    //     inlinedCallFrame.callTarget = methodHandle;      // stored in m_Datum
+    //     inlinedCallFrame.m_pCallSiteSP = SP;             // x86 only
+    //     inlinedCallFrame.m_pCallerReturnAddress = &label; (the address of the instruction immediately following the
+    //     call)
+    //     Thread.m_pFrame = &inlinedCallFrame; (non-IL-stub only)
+    //
+    //     // Switch the thread's GC mode to preemptive mode
+    //     thread->m_fPreemptiveGCDisabled = 0;
+    //
+    //     // Call the unmanaged method
+    //     target();
+    //
+    //     // Switch the thread's GC mode back to cooperative mode
+    //     thread->m_fPreemptiveGCDisabled = 1;
+    //
+    //     // Rendezvous with a running collection if necessary
+    //     if (g_TrapReturningThreads)
+    //         RareDisablePreemptiveGC();
+    //
+    // Transistions using helpers:
+    //
+    //     OpaqueFrame opaqueFrame;
+    //
+    //     ...
+    //
+    //     // Call the JIT_PINVOKE_BEGIN helper
+    //     JIT_PINVOKE_BEGIN(&opaqueFrame);
+    //
+    //     // Call the unmanaged method
+    //     target();
+    //
+    //     // Call the JIT_PINVOKE_END helper
+    //     JIT_PINVOKE_END(&opaqueFrame);
+    //
+    // Note that the JIT_PINVOKE_{BEGIN.END} helpers currently use the default calling convention for the target
+    // platform. They may be changed in the future such that they preserve all register values.
+
+    GenTree* result = nullptr;
+    void*    addr   = nullptr;
+
+    // assert we have seen one of these
+    noway_assert(comp->info.compCallUnmanaged != 0);
+
+    // All code generated by this function must not contain the randomly-inserted NOPs
+    // that we insert to inhibit JIT spraying in partial trust scenarios.
+    // The PINVOKE_PROLOG op signals this to the code generator/emitter.
+
+    GenTree* prolog = new (comp, GT_NOP) GenTree(GT_PINVOKE_PROLOG, TYP_VOID);
+    BlockRange().InsertBefore(call, prolog);
+
+    InsertPInvokeCallProlog(call);
+
+    if (call->gtCallType != CT_INDIRECT)
+    {
+        noway_assert(call->gtCallType == CT_USER_FUNC);
+        CORINFO_METHOD_HANDLE methHnd = call->gtCallMethHnd;
+
+        CORINFO_CONST_LOOKUP lookup;
+#if COR_JIT_EE_VERSION > 460
+        comp->info.compCompHnd->getAddressOfPInvokeTarget(methHnd, &lookup);
+#else
+        void*          pIndirection;
+        lookup.accessType = IAT_PVALUE;
+        lookup.addr       = comp->info.compCompHnd->getAddressOfPInvokeFixup(methHnd, &pIndirection);
+        if (lookup.addr == nullptr)
+        {
+            lookup.accessType = IAT_PPVALUE;
+            lookup.addr       = pIndirection;
+        }
+#endif
+
+        void* addr = lookup.addr;
+        switch (lookup.accessType)
+        {
+            case IAT_VALUE:
+                if (!IsCallTargetInRange(addr))
+                {
+                    result = AddrGen(addr);
+                }
+                else
+                {
+                    // a direct call within range of hardware relative call instruction
+                    // stash the address for codegen
+                    call->gtDirectCallAddress = addr;
+#ifdef FEATURE_READYTORUN_COMPILER
+                    call->gtEntryPoint.addr = nullptr;
+#endif
+                }
+                break;
+
+            case IAT_PVALUE:
+                result = Ind(AddrGen(addr));
+                break;
+
+            case IAT_PPVALUE:
+                result = Ind(Ind(AddrGen(addr)));
+                break;
+        }
+    }
+
+    InsertPInvokeCallEpilog(call);
+
+    return result;
+}
+
+// Expand the code necessary to calculate the control target.
+// Returns: the expression needed to calculate the control target
+// May insert embedded statements
+GenTree* Lowering::LowerVirtualVtableCall(GenTreeCall* call)
+{
+    noway_assert(call->gtCallType == CT_USER_FUNC);
+
+    // If this is a tail call via helper, thisPtr will be the third argument.
+    int       thisPtrArgNum;
+    regNumber thisPtrArgReg;
+
+#ifndef _TARGET_X86_ // x86 tailcall via helper follows normal calling convention, but with extra stack args.
+    if (call->IsTailCallViaHelper())
+    {
+        thisPtrArgNum = 2;
+        thisPtrArgReg = REG_ARG_2;
+    }
+    else
+#endif // !_TARGET_X86_
+    {
+        thisPtrArgNum = 0;
+        thisPtrArgReg = comp->codeGen->genGetThisArgReg(call);
+    }
+
+    // get a reference to the thisPtr being passed
+    fgArgTabEntry* argEntry = comp->gtArgEntryByArgNum(call, thisPtrArgNum);
+    assert(argEntry->regNum == thisPtrArgReg);
+    assert(argEntry->node->gtOper == GT_PUTARG_REG);
+    GenTree* thisPtr = argEntry->node->gtOp.gtOp1;
+
+    // If what we are passing as the thisptr is not already a local, make a new local to place it in
+    // because we will be creating expressions based on it.
+    unsigned lclNum;
+    if (thisPtr->IsLocal())
+    {
+        lclNum = thisPtr->gtLclVarCommon.gtLclNum;
+    }
+    else
+    {
+        // Split off the thisPtr and store to a temporary variable.
+        if (vtableCallTemp == BAD_VAR_NUM)
+        {
+            vtableCallTemp = comp->lvaGrabTemp(true DEBUGARG("virtual vtable call"));
+        }
+
+        LIR::Use thisPtrUse(BlockRange(), &(argEntry->node->gtOp.gtOp1), argEntry->node);
+        thisPtrUse.ReplaceWithLclVar(comp, m_block->getBBWeight(comp), vtableCallTemp);
+
+        lclNum = vtableCallTemp;
+    }
+
+    // We'll introduce another use of this local so increase its ref count.
+    comp->lvaTable[lclNum].incRefCnts(comp->compCurBB->getBBWeight(comp), comp);
+
+    // If the thisPtr is a local field, then construct a local field type node
+    GenTree* local;
+    if (thisPtr->isLclField())
+    {
+        local = new (comp, GT_LCL_FLD)
+            GenTreeLclFld(GT_LCL_FLD, thisPtr->TypeGet(), lclNum, thisPtr->AsLclFld()->gtLclOffs);
+    }
+    else
+    {
+        local = new (comp, GT_LCL_VAR) GenTreeLclVar(GT_LCL_VAR, thisPtr->TypeGet(), lclNum, BAD_IL_OFFSET);
+    }
+
+    // pointer to virtual table = [REG_CALL_THIS + offs]
+    GenTree* result = Ind(Offset(local, VPTR_OFFS));
+
+    // Get hold of the vtable offset (note: this might be expensive)
+    unsigned vtabOffsOfIndirection;
+    unsigned vtabOffsAfterIndirection;
+    comp->info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection,
+                                                  &vtabOffsAfterIndirection);
+
+    // Get the appropriate vtable chunk
+    // result = [REG_CALL_IND_SCRATCH + vtabOffsOfIndirection]
+    result = Ind(Offset(result, vtabOffsOfIndirection));
+
+    // Load the function address
+    // result = [reg+vtabOffs]
+    result = Ind(Offset(result, vtabOffsAfterIndirection));
+
+    return result;
+}
+
+// Lower stub dispatched virtual calls.
+GenTree* Lowering::LowerVirtualStubCall(GenTreeCall* call)
+{
+    assert((call->gtFlags & GTF_CALL_VIRT_KIND_MASK) == GTF_CALL_VIRT_STUB);
+
+    // An x86 JIT which uses full stub dispatch must generate only
+    // the following stub dispatch calls:
+    //
+    // (1) isCallRelativeIndirect:
+    //        call dword ptr [rel32]  ;  FF 15 ---rel32----
+    // (2) isCallRelative:
+    //        call abc                ;     E8 ---rel32----
+    // (3) isCallRegisterIndirect:
+    //     3-byte nop                 ;
+    //     call dword ptr [eax]       ;     FF 10
+    //
+    // THIS IS VERY TIGHTLY TIED TO THE PREDICATES IN
+    // vm\i386\cGenCpu.h, esp. isCallRegisterIndirect.
+
+    GenTree* result = nullptr;
+
+#ifdef _TARGET_64BIT_
+    // Non-tail calls: Jump Stubs are not taken into account by VM for mapping an AV into a NullRef
+    // exception. Therefore, JIT needs to emit an explicit null check.  Note that Jit64 too generates
+    // an explicit null check.
+    //
+    // Tail calls: fgMorphTailCall() materializes null check explicitly and hence no need to emit
+    // null check.
+
+    // Non-64-bit: No need to null check the this pointer - the dispatch code will deal with this.
+    // The VM considers exceptions that occur in stubs on 64-bit to be not managed exceptions and
+    // it would be difficult to change this in a way so that it affects only the right stubs.
+
+    if (!call->IsTailCallViaHelper())
+    {
+        call->gtFlags |= GTF_CALL_NULLCHECK;
+    }
+#endif
+
+    // TODO-Cleanup: Disable emitting random NOPs
+
+    // This is code to set up an indirect call to a stub address computed
+    // via dictionary lookup.
+    if (call->gtCallType == CT_INDIRECT)
+    {
+        NYI_X86("Virtual Stub dispatched call lowering via dictionary lookup");
+
+        // The importer decided we needed a stub call via a computed
+        // stub dispatch address, i.e. an address which came from a dictionary lookup.
+        //   - The dictionary lookup produces an indirected address, suitable for call
+        //     via "call [REG_VIRTUAL_STUB_PARAM]"
+        //
+        // This combination will only be generated for shared generic code and when
+        // stub dispatch is active.
+
+        // fgMorphArgs will have created trees to pass the address in REG_VIRTUAL_STUB_PARAM.
+        // All we have to do here is add an indirection to generate the actual call target.
+
+        GenTree* ind = Ind(call->gtCallAddr);
+        BlockRange().InsertAfter(call->gtCallAddr, ind);
+        call->gtCallAddr = ind;
+    }
+    else
+    {
+        // Direct stub call.
+        // Get stub addr. This will return NULL if virtual call stubs are not active
+        void* stubAddr = call->gtStubCallStubAddr;
+        noway_assert(stubAddr != nullptr);
+
+        // If not CT_INDIRECT,  then it should always be relative indir call.
+        // This is ensured by VM.
+        noway_assert(call->IsVirtualStubRelativeIndir());
+
+        // Direct stub calls, though the stubAddr itself may still need to be
+        // accesed via an indirection.
+        GenTree* addr = AddrGen(stubAddr);
+
+#ifdef _TARGET_X86_
+        // On x86, for tailcall via helper, the JIT_TailCall helper takes the stubAddr as
+        // the target address, and we set a flag that it's a VSD call. The helper then
+        // handles any necessary indirection.
+        if (call->IsTailCallViaHelper())
+        {
+            result = addr;
+        }
+#endif // _TARGET_X86_
+
+        if (result == nullptr)
+        {
+            GenTree* indir = Ind(addr);
+
+// On x86 we generate this:
+//        call dword ptr [rel32]  ;  FF 15 ---rel32----
+// So we don't use a register.
+#ifndef _TARGET_X86_
+            // on x64 we must materialize the target using specific registers.
+            addr->gtRegNum  = REG_VIRTUAL_STUB_PARAM;
+            indir->gtRegNum = REG_JUMP_THUNK_PARAM;
+#endif
+            result = indir;
+        }
+    }
+
+    // TODO-Cleanup: start emitting random NOPS
+    return result;
+}
+
+//------------------------------------------------------------------------
+// AddrModeCleanupHelper: Remove the nodes that are no longer used after an
+// addressing mode is constructed
+//
+// Arguments:
+//    addrMode - A pointer to a new GenTreeAddrMode
+//    node     - The node currently being considered for removal
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    'addrMode' and 'node' must be contained in the current block
+//
+void Lowering::AddrModeCleanupHelper(GenTreeAddrMode* addrMode, GenTree* node)
+{
+    if (node == addrMode->Base() || node == addrMode->Index())
+    {
+        return;
+    }
+
+    // TODO-LIR: change this to use the LIR mark bit and iterate instead of recursing
+    for (GenTree* operand : node->Operands())
+    {
+        AddrModeCleanupHelper(addrMode, operand);
+    }
+
+    BlockRange().Remove(node);
+}
+
+//------------------------------------------------------------------------
+// Lowering::AreSourcesPossibleModifiedLocals:
+//    Given two nodes which will be used in an addressing mode (base,
+//    index), check to see if they are lclVar reads, and if so, walk
+//    backwards from the use until both reads have been visited to
+//    determine if they are potentially modified in that range.
+//
+// Arguments:
+//    addr - the node that uses the base and index nodes
+//    base - the base node
+//    index - the index node
+//
+// Returns: true if either the base or index may be modified between the
+//          node and addr.
+//
+bool Lowering::AreSourcesPossiblyModifiedLocals(GenTree* addr, GenTree* base, GenTree* index)
+{
+    assert(addr != nullptr);
+
+    unsigned markCount = 0;
+
+    SideEffectSet baseSideEffects;
+    if (base != nullptr)
+    {
+        if (base->OperIsLocalRead())
+        {
+            baseSideEffects.AddNode(comp, base);
+        }
+        else
+        {
+            base = nullptr;
+        }
+    }
+
+    SideEffectSet indexSideEffects;
+    if (index != nullptr)
+    {
+        if (index->OperIsLocalRead())
+        {
+            indexSideEffects.AddNode(comp, index);
+        }
+        else
+        {
+            index = nullptr;
+        }
+    }
+
+    for (GenTree* cursor = addr;; cursor = cursor->gtPrev)
+    {
+        assert(cursor != nullptr);
+
+        if (cursor == base)
+        {
+            base = nullptr;
+        }
+
+        if (cursor == index)
+        {
+            index = nullptr;
+        }
+
+        if ((base == nullptr) && (index == nullptr))
+        {
+            return false;
+        }
+
+        m_scratchSideEffects.Clear();
+        m_scratchSideEffects.AddNode(comp, cursor);
+        if ((base != nullptr) && m_scratchSideEffects.InterferesWith(baseSideEffects, false))
+        {
+            return true;
+        }
+
+        if ((index != nullptr) && m_scratchSideEffects.InterferesWith(indexSideEffects, false))
+        {
+            return true;
+        }
+    }
+
+    unreached();
+}
+
+//------------------------------------------------------------------------
+// TryCreateAddrMode: recognize trees which can be implemented using an
+//    addressing mode and transform them to a GT_LEA
+//
+// Arguments:
+//    use:     the use of the address we want to transform
+//    isIndir: true if this addressing mode is the child of an indir
+//
+// Returns:
+//    The created LEA node or the original address node if an LEA could
+//    not be formed.
+//
+GenTree* Lowering::TryCreateAddrMode(LIR::Use&& use, bool isIndir)
+{
+    GenTree*   addr   = use.Def();
+    GenTreePtr base   = nullptr;
+    GenTreePtr index  = nullptr;
+    unsigned   scale  = 0;
+    unsigned   offset = 0;
+    bool       rev    = false;
+
+    // TODO-1stClassStructs: This logic is here to preserve prior behavior. Note that previously
+    // block ops were not considered for addressing modes, but an add under it may have been.
+    // This should be replaced with logic that more carefully determines when an addressing mode
+    // would be beneficial for a block op.
+    if (isIndir)
+    {
+        GenTree* indir = use.User();
+        if (indir->TypeGet() == TYP_STRUCT)
+        {
+            isIndir = false;
+        }
+        else if (varTypeIsStruct(indir))
+        {
+            // We can have an indirection on the rhs of a block copy (it is the source
+            // object). This is not a "regular" indirection.
+            // (Note that the parent check could be costly.)
+            GenTree* parent = indir->gtGetParent(nullptr);
+            if ((parent != nullptr) && parent->OperIsIndir())
+            {
+                isIndir = false;
+            }
+            else
+            {
+                isIndir = !indir->OperIsBlk();
+            }
+        }
+    }
+
+    // Find out if an addressing mode can be constructed
+    bool doAddrMode =
+        comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &scale, &offset, true /*nogen*/);
+
+    if (scale == 0)
+    {
+        scale = 1;
+    }
+
+    if (!isIndir)
+    {
+        // this is just a reg-const add
+        if (index == nullptr)
+        {
+            return addr;
+        }
+
+        // this is just a reg-reg add
+        if (scale == 1 && offset == 0)
+        {
+            return addr;
+        }
+    }
+
+    // make sure there are not any side effects between def of leaves and use
+    if (!doAddrMode || AreSourcesPossiblyModifiedLocals(addr, base, index))
+    {
+        JITDUMP("  No addressing mode\n");
+        return addr;
+    }
+
+    GenTreePtr arrLength = nullptr;
+
+    JITDUMP("Addressing mode:\n");
+    JITDUMP("  Base\n");
+    DISPNODE(base);
+    if (index != nullptr)
+    {
+        JITDUMP("  + Index * %u + %u\n", scale, offset);
+        DISPNODE(index);
+    }
+    else
+    {
+        JITDUMP("  + %u\n", offset);
+    }
+
+    var_types addrModeType = addr->TypeGet();
+    if (addrModeType == TYP_REF)
+    {
+        addrModeType = TYP_BYREF;
+    }
+
+    GenTreeAddrMode* addrMode = new (comp, GT_LEA) GenTreeAddrMode(addrModeType, base, index, scale, offset);
+
+    addrMode->gtRsvdRegs = addr->gtRsvdRegs;
+    addrMode->gtFlags |= (addr->gtFlags & GTF_IND_FLAGS);
+    addrMode->gtFlags &= ~GTF_ALL_EFFECT; // LEAs are side-effect-free.
+
+    JITDUMP("New addressing mode node:\n");
+    DISPNODE(addrMode);
+    JITDUMP("\n");
+
+    // Required to prevent assert failure:
+    //    Assertion failed 'op1 && op2' in flowgraph.cpp, Line: 34431
+    // when iterating the operands of a GT_LEA
+    // Test Case: self_host_tests_amd64\jit\jit64\opt\cse\VolatileTest_op_mul.exe
+    //    Method: TestCSE:.cctor
+    // The method genCreateAddrMode() above probably should be fixed
+    //    to not return rev=true, when index is returned as NULL
+    //
+    if (rev && index == nullptr)
+    {
+        rev = false;
+    }
+
+    if (rev)
+    {
+        addrMode->gtFlags |= GTF_REVERSE_OPS;
+    }
+    else
+    {
+        addrMode->gtFlags &= ~(GTF_REVERSE_OPS);
+    }
+
+    BlockRange().InsertAfter(addr, addrMode);
+
+    // Now we need to remove all the nodes subsumed by the addrMode
+    AddrModeCleanupHelper(addrMode, addr);
+
+    // Replace the original address node with the addrMode.
+    use.ReplaceWith(comp, addrMode);
+
+    return addrMode;
+}
+
+//------------------------------------------------------------------------
+// LowerAdd: turn this add into a GT_LEA if that would be profitable
+//
+// Arguments:
+//    node - the node we care about
+//
+// Returns:
+//    The next node to lower.
+//
+GenTree* Lowering::LowerAdd(GenTree* node)
+{
+    GenTree* next = node->gtNext;
+
+#ifdef _TARGET_ARMARCH_
+    // For ARM architectures we don't have the LEA instruction
+    // therefore we won't get much benefit from doing this.
+    return next;
+#else  // _TARGET_ARMARCH_
+    if (!varTypeIsIntegralOrI(node))
+    {
+        return next;
+    }
+
+    LIR::Use use;
+    if (!BlockRange().TryGetUse(node, &use))
+    {
+        return next;
+    }
+
+    // if this is a child of an indir, let the parent handle it.
+    GenTree* parent = use.User();
+    if (parent->OperIsIndir())
+    {
+        return next;
+    }
+
+    // if there is a chain of adds, only look at the topmost one
+    if (parent->gtOper == GT_ADD)
+    {
+        return next;
+    }
+
+    GenTree* addr = TryCreateAddrMode(std::move(use), false);
+    return addr->gtNext;
+#endif // !_TARGET_ARMARCH_
+}
+
+//------------------------------------------------------------------------
+// LowerUnsignedDivOrMod: transform GT_UDIV/GT_UMOD nodes with a const power of 2
+// divisor into GT_RSZ/GT_AND nodes.
+//
+// Arguments:
+//    node - pointer to the GT_UDIV/GT_UMOD node to be lowered
+//
+void Lowering::LowerUnsignedDivOrMod(GenTree* node)
+{
+    assert((node->OperGet() == GT_UDIV) || (node->OperGet() == GT_UMOD));
+
+    GenTree* divisor = node->gtGetOp2();
+
+    if (divisor->IsCnsIntOrI())
+    {
+        size_t divisorValue = static_cast<size_t>(divisor->gtIntCon.IconValue());
+
+        if (isPow2(divisorValue))
+        {
+            genTreeOps newOper;
+
+            if (node->OperGet() == GT_UDIV)
+            {
+                newOper      = GT_RSZ;
+                divisorValue = genLog2(divisorValue);
+            }
+            else
+            {
+                newOper = GT_AND;
+                divisorValue -= 1;
+            }
+
+            node->SetOper(newOper);
+            divisor->gtIntCon.SetIconValue(divisorValue);
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// LowerSignedDivOrMod: transform integer GT_DIV/GT_MOD nodes with a power of 2
+// const divisor into equivalent but faster sequences.
+//
+// Arguments:
+//    node - pointer to node we care about
+//
+// Returns:
+//    The next node to lower.
+//
+GenTree* Lowering::LowerSignedDivOrMod(GenTreePtr node)
+{
+    assert((node->OperGet() == GT_DIV) || (node->OperGet() == GT_MOD));
+
+    GenTree* next    = node->gtNext;
+    GenTree* divMod  = node;
+    GenTree* divisor = divMod->gtGetOp2();
+
+    if (!divisor->IsCnsIntOrI())
+    {
+        return next; // no transformations to make
+    }
+
+    const var_types type = divMod->TypeGet();
+    assert((type == TYP_INT) || (type == TYP_LONG));
+
+    GenTree* dividend = divMod->gtGetOp1();
+
+    if (dividend->IsCnsIntOrI())
+    {
+        // We shouldn't see a divmod with constant operands here but if we do then it's likely
+        // because optimizations are disabled or it's a case that's supposed to throw an exception.
+        // Don't optimize this.
+        return next;
+    }
+
+    ssize_t divisorValue = divisor->gtIntCon.IconValue();
+
+    if (divisorValue == -1)
+    {
+        // x / -1 can't be optimized because INT_MIN / -1 is required to throw an exception.
+
+        // x % -1 is always 0 and the IL spec says that the rem instruction "can" throw an exception if x is
+        // the minimum representable integer. However, the C# spec says that an exception "is" thrown in this
+        // case so optimizing this case would break C# code.
+
+        // A runtime check could be used to handle this case but it's probably too rare to matter.
+        return next;
+    }
+
+    bool isDiv = divMod->OperGet() == GT_DIV;
+
+    if (isDiv)
+    {
+        if ((type == TYP_INT && divisorValue == INT_MIN) || (type == TYP_LONG && divisorValue == INT64_MIN))
+        {
+            // If the divisor is the minimum representable integer value then we can use a compare,
+            // the result is 1 iff the dividend equals divisor.
+            divMod->SetOper(GT_EQ);
+            return next;
+        }
+    }
+
+    size_t absDivisorValue =
+        (divisorValue == SSIZE_T_MIN) ? static_cast<size_t>(divisorValue) : static_cast<size_t>(abs(divisorValue));
+
+    if (!isPow2(absDivisorValue))
+    {
+        return next;
+    }
+
+    // We're committed to the conversion now. Go find the use.
+    LIR::Use use;
+    if (!BlockRange().TryGetUse(node, &use))
+    {
+        assert(!"signed DIV/MOD node is unused");
+        return next;
+    }
+
+    // We need to use the dividend node multiple times so its value needs to be
+    // computed once and stored in a temp variable.
+
+    unsigned curBBWeight = comp->compCurBB->getBBWeight(comp);
+
+    LIR::Use opDividend(BlockRange(), &divMod->gtOp.gtOp1, divMod);
+    opDividend.ReplaceWithLclVar(comp, curBBWeight);
+
+    dividend = divMod->gtGetOp1();
+    assert(dividend->OperGet() == GT_LCL_VAR);
+
+    unsigned dividendLclNum = dividend->gtLclVar.gtLclNum;
+
+    GenTree* adjustment = comp->gtNewOperNode(GT_RSH, type, dividend, comp->gtNewIconNode(type == TYP_INT ? 31 : 63));
+
+    if (absDivisorValue == 2)
+    {
+        // If the divisor is +/-2 then we'd end up with a bitwise and between 0/-1 and 1.
+        // We can get the same result by using GT_RSZ instead of GT_RSH.
+        adjustment->SetOper(GT_RSZ);
+    }
+    else
+    {
+        adjustment = comp->gtNewOperNode(GT_AND, type, adjustment, comp->gtNewIconNode(absDivisorValue - 1, type));
+    }
+
+    GenTree* adjustedDividend =
+        comp->gtNewOperNode(GT_ADD, type, adjustment, comp->gtNewLclvNode(dividendLclNum, type));
+
+    comp->lvaTable[dividendLclNum].incRefCnts(curBBWeight, comp);
+
+    GenTree* newDivMod;
+
+    if (isDiv)
+    {
+        // perform the division by right shifting the adjusted dividend
+        divisor->gtIntCon.SetIconValue(genLog2(absDivisorValue));
+
+        newDivMod = comp->gtNewOperNode(GT_RSH, type, adjustedDividend, divisor);
+
+        if (divisorValue < 0)
+        {
+            // negate the result if the divisor is negative
+            newDivMod = comp->gtNewOperNode(GT_NEG, type, newDivMod);
+        }
+    }
+    else
+    {
+        // divisor % dividend = dividend - divisor x (dividend / divisor)
+        // divisor x (dividend / divisor) translates to (dividend >> log2(divisor)) << log2(divisor)
+        // which simply discards the low log2(divisor) bits, that's just dividend & ~(divisor - 1)
+        divisor->gtIntCon.SetIconValue(~(absDivisorValue - 1));
+
+        newDivMod = comp->gtNewOperNode(GT_SUB, type, comp->gtNewLclvNode(dividendLclNum, type),
+                                        comp->gtNewOperNode(GT_AND, type, adjustedDividend, divisor));
+
+        comp->lvaTable[dividendLclNum].incRefCnts(curBBWeight, comp);
+    }
+
+    // Remove the divisor and dividend nodes from the linear order,
+    // since we have reused them and will resequence the tree
+    BlockRange().Remove(divisor);
+    BlockRange().Remove(dividend);
+
+    // linearize and insert the new tree before the original divMod node
+    BlockRange().InsertBefore(divMod, LIR::SeqTree(comp, newDivMod));
+    BlockRange().Remove(divMod);
+
+    // replace the original divmod node with the new divmod tree
+    use.ReplaceWith(comp, newDivMod);
+
+    return newDivMod->gtNext;
+}
+
+//------------------------------------------------------------------------
+// LowerStoreInd: attempt to transform an indirect store to use an
+//    addressing mode
+//
+// Arguments:
+//    node - the node we care about
+//
+void Lowering::LowerStoreInd(GenTree* node)
+{
+    assert(node != nullptr);
+    assert(node->OperGet() == GT_STOREIND);
+
+    TryCreateAddrMode(LIR::Use(BlockRange(), &node->gtOp.gtOp1, node), true);
+
+    // Mark all GT_STOREIND nodes to indicate that it is not known
+    // whether it represents a RMW memory op.
+    node->AsStoreInd()->SetRMWStatusDefault();
+}
+
+void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
+{
+    GenTree* src = blkNode->Data();
+    // TODO-1stClassStructs: Don't require this.
+    assert(blkNode->OperIsInitBlkOp() || !src->OperIsLocal());
+    TryCreateAddrMode(LIR::Use(BlockRange(), &blkNode->Addr(), blkNode), false);
+}
+
+//------------------------------------------------------------------------
+// LowerArrElem: Lower a GT_ARR_ELEM node
+//
+// Arguments:
+//    node - the GT_ARR_ELEM node to lower.
+//
+// Return Value:
+//    The next node to lower.
+//
+// Assumptions:
+//    pTree points to a pointer to a GT_ARR_ELEM node.
+//
+// Notes:
+//    This performs the following lowering.  We start with a node of the form:
+//          /--*  <arrObj>
+//          +--*  <index0>
+//          +--*  <index1>
+//       /--*  arrMD&[,]
+//
+//    First, we create temps for arrObj if it is not already a lclVar, and for any of the index
+//    expressions that have side-effects.
+//    We then transform the tree into:
+//                      <offset is null - no accumulated offset for the first index>
+//                   /--*  <arrObj>
+//                   +--*  <index0>
+//                /--*  ArrIndex[i, ]
+//                +--*  <arrObj>
+//             /--|  arrOffs[i, ]
+//             |  +--*  <arrObj>
+//             |  +--*  <index1>
+//             +--*  ArrIndex[*,j]
+//             +--*  <arrObj>
+//          /--|  arrOffs[*,j]
+//          +--*  lclVar NewTemp
+//       /--*  lea (scale = element size, offset = offset of first element)
+//
+//    The new stmtExpr may be omitted if the <arrObj> is a lclVar.
+//    The new stmtExpr may be embedded if the <arrObj> is not the first tree in linear order for
+//    the statement containing the original arrMD.
+//    Note that the arrMDOffs is the INDEX of the lea, but is evaluated before the BASE (which is the second
+//    reference to NewTemp), because that provides more accurate lifetimes.
+//    There may be 1, 2 or 3 dimensions, with 1, 2 or 3 arrMDIdx nodes, respectively.
+//
+GenTree* Lowering::LowerArrElem(GenTree* node)
+{
+    // This will assert if we don't have an ArrElem node
+    GenTreeArrElem*     arrElem     = node->AsArrElem();
+    const unsigned char rank        = arrElem->gtArrElem.gtArrRank;
+    const unsigned      blockWeight = m_block->getBBWeight(comp);
+
+    JITDUMP("Lowering ArrElem\n");
+    JITDUMP("============\n");
+    DISPTREERANGE(BlockRange(), arrElem);
+    JITDUMP("\n");
+
+    assert(arrElem->gtArrObj->TypeGet() == TYP_REF);
+
+    // We need to have the array object in a lclVar.
+    if (!arrElem->gtArrObj->IsLocal())
+    {
+        LIR::Use arrObjUse(BlockRange(), &arrElem->gtArrObj, arrElem);
+        arrObjUse.ReplaceWithLclVar(comp, blockWeight);
+    }
+
+    GenTree* arrObjNode = arrElem->gtArrObj;
+    assert(arrObjNode->IsLocal());
+
+    GenTree* insertionPoint = arrElem;
+
+    // The first ArrOffs node will have 0 for the offset of the previous dimension.
+    GenTree* prevArrOffs = new (comp, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, 0);
+    BlockRange().InsertBefore(insertionPoint, prevArrOffs);
+
+    for (unsigned char dim = 0; dim < rank; dim++)
+    {
+        GenTree* indexNode = arrElem->gtArrElem.gtArrInds[dim];
+
+        // Use the original arrObjNode on the 0th ArrIndex node, and clone it for subsequent ones.
+        GenTreePtr idxArrObjNode;
+        if (dim == 0)
+        {
+            idxArrObjNode = arrObjNode;
+        }
+        else
+        {
+            idxArrObjNode = comp->gtClone(arrObjNode);
+            BlockRange().InsertBefore(insertionPoint, idxArrObjNode);
+        }
+
+        // Next comes the GT_ARR_INDEX node.
+        GenTreeArrIndex* arrMDIdx = new (comp, GT_ARR_INDEX)
+            GenTreeArrIndex(TYP_INT, idxArrObjNode, indexNode, dim, rank, arrElem->gtArrElem.gtArrElemType);
+        arrMDIdx->gtFlags |= ((idxArrObjNode->gtFlags | indexNode->gtFlags) & GTF_ALL_EFFECT);
+        BlockRange().InsertBefore(insertionPoint, arrMDIdx);
+
+        GenTree* offsArrObjNode = comp->gtClone(arrObjNode);
+        BlockRange().InsertBefore(insertionPoint, offsArrObjNode);
+
+        GenTreeArrOffs* arrOffs =
+            new (comp, GT_ARR_OFFSET) GenTreeArrOffs(TYP_I_IMPL, prevArrOffs, arrMDIdx, offsArrObjNode, dim, rank,
+                                                     arrElem->gtArrElem.gtArrElemType);
+        arrOffs->gtFlags |= ((prevArrOffs->gtFlags | arrMDIdx->gtFlags | offsArrObjNode->gtFlags) & GTF_ALL_EFFECT);
+        BlockRange().InsertBefore(insertionPoint, arrOffs);
+
+        prevArrOffs = arrOffs;
+    }
+
+    // Generate the LEA and make it reverse evaluation, because we want to evaluate the index expression before the
+    // base.
+    unsigned scale  = arrElem->gtArrElem.gtArrElemSize;
+    unsigned offset = comp->eeGetMDArrayDataOffset(arrElem->gtArrElem.gtArrElemType, arrElem->gtArrElem.gtArrRank);
+
+    GenTreePtr leaIndexNode = prevArrOffs;
+    if (!jitIsScaleIndexMul(scale))
+    {
+        // We do the address arithmetic in TYP_I_IMPL, though note that the lower bounds and lengths in memory are
+        // TYP_INT
+        GenTreePtr scaleNode = new (comp, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, scale);
+        GenTreePtr mulNode   = new (comp, GT_MUL) GenTreeOp(GT_MUL, TYP_I_IMPL, leaIndexNode, scaleNode);
+        BlockRange().InsertBefore(insertionPoint, scaleNode, mulNode);
+        leaIndexNode = mulNode;
+        scale        = 1;
+    }
+
+    GenTreePtr leaBase = comp->gtClone(arrObjNode);
+    BlockRange().InsertBefore(insertionPoint, leaBase);
+
+    GenTreePtr leaNode = new (comp, GT_LEA) GenTreeAddrMode(arrElem->TypeGet(), leaBase, leaIndexNode, scale, offset);
+    leaNode->gtFlags |= GTF_REVERSE_OPS;
+
+    BlockRange().InsertBefore(insertionPoint, leaNode);
+
+    LIR::Use arrElemUse;
+    if (BlockRange().TryGetUse(arrElem, &arrElemUse))
+    {
+        arrElemUse.ReplaceWith(comp, leaNode);
+    }
+
+    BlockRange().Remove(arrElem);
+
+    JITDUMP("Results of lowering ArrElem:\n");
+    DISPTREERANGE(BlockRange(), leaNode);
+    JITDUMP("\n\n");
+
+    return leaNode;
+}
+
+void Lowering::DoPhase()
+{
+#if 0
+    // The code in this #if can be used to debug lowering issues according to
+    // method hash.  To use, simply set environment variables lowerhashlo and lowerhashhi
+#ifdef DEBUG
+    unsigned methHash = info.compMethodHash();
+    char* lostr = getenv("lowerhashlo");
+    unsigned methHashLo = 0;
+    if (lostr != NULL)
+    {
+        sscanf_s(lostr, "%x", &methHashLo);
+    }
+    char* histr = getenv("lowerhashhi");
+    unsigned methHashHi = UINT32_MAX;
+    if (histr != NULL)
+    {
+        sscanf_s(histr, "%x", &methHashHi);
+    }
+    if (methHash < methHashLo || methHash > methHashHi)
+        return;
+    else
+    {
+        printf("Lowering for method %s, hash = 0x%x.\n",
+               info.compFullName, info.compMethodHash());
+        printf("");         // in our logic this causes a flush
+    }
+#endif
+#endif
+
+#if !defined(_TARGET_64BIT_)
+    DecomposeLongs decomp(comp); // Initialize the long decomposition class.
+    decomp.PrepareForDecomposition();
+#endif // !defined(_TARGET_64BIT_)
+
+    for (BasicBlock* block = comp->fgFirstBB; block; block = block->bbNext)
+    {
+        /* Make the block publicly available */
+        comp->compCurBB = block;
+
+#if !defined(_TARGET_64BIT_)
+        decomp.DecomposeBlock(block);
+#endif //!_TARGET_64BIT_
+
+        LowerBlock(block);
+    }
+
+    // If we have any PInvoke calls, insert the one-time prolog code. We've already inserted the epilog code in the
+    // appropriate spots. NOTE: there is a minor optimization opportunity here, as we still create p/invoke data
+    // structures and setup/teardown even if we've eliminated all p/invoke calls due to dead code elimination.
+    if (comp->info.compCallUnmanaged)
+    {
+        InsertPInvokeMethodProlog();
+    }
+
+#ifdef DEBUG
+    JITDUMP("Lower has completed modifying nodes, proceeding to initialize LSRA TreeNodeInfo structs...\n");
+    if (VERBOSE)
+    {
+        comp->fgDispBasicBlocks(true);
+    }
+#endif
+
+    // TODO-Throughput: We re-sort local variables to get the goodness of enregistering recently
+    // introduced local variables both by Rationalize and Lower; downside is we need to
+    // recompute standard local variable liveness in order to get Linear CodeGen working.
+    // For now we'll take the throughput hit of recomputing local liveness but in the long term
+    // we're striving to use the unified liveness computation (fgLocalVarLiveness) and stop
+    // computing it separately in LSRA.
+    if (comp->lvaCount != 0)
+    {
+        comp->lvaSortAgain = true;
+    }
+    comp->EndPhase(PHASE_LOWERING_DECOMP);
+
+    comp->fgLocalVarLiveness();
+    // local var liveness can delete code, which may create empty blocks
+    if (!comp->opts.MinOpts() && !comp->opts.compDbgCode)
+    {
+        comp->optLoopsMarked = false;
+        bool modified        = comp->fgUpdateFlowGraph();
+        if (modified || comp->lvaSortAgain)
+        {
+            JITDUMP("had to run another liveness pass:\n");
+            comp->fgLocalVarLiveness();
+        }
+    }
+#ifdef DEBUG
+    JITDUMP("Liveness pass finished after lowering, IR:\n");
+    JITDUMP("lvasortagain = %d\n", comp->lvaSortAgain);
+    if (VERBOSE)
+    {
+        comp->fgDispBasicBlocks(true);
+    }
+#endif
+
+    // The initialization code for the TreeNodeInfo map was initially part of a single full IR
+    // traversal and it has been split because the order of traversal performed by fgWalkTreePost
+    // does not necessarily lower nodes in execution order and also, it could potentially
+    // add new BasicBlocks on the fly as part of the Lowering pass so the traversal won't be complete.
+    //
+    // Doing a new traversal guarantees we 'see' all new introduced trees and basic blocks allowing us
+    // to correctly initialize all the data structures LSRA requires later on.
+    // This code still has issues when it has to do with initialization of recently introduced locals by
+    // lowering.  The effect of this is that any temporary local variable introduced by lowering won't be
+    // enregistered yielding suboptimal CQ.
+    // The reason for this is because we cannot re-sort the local variables per ref-count and bump of the number of
+    // tracked variables just here because then LSRA will work with mismatching BitSets (i.e. BitSets with different
+    // 'epochs' that were created before and after variable resorting, that will result in different number of tracked
+    // local variables).
+    //
+    // The fix for this is to refactor this code to be run JUST BEFORE LSRA and not as part of lowering.
+    // It's also desirable to avoid initializing this code using a non-execution order traversal.
+    //
+    LsraLocation currentLoc = 1;
+    for (BasicBlock* block = m_lsra->startBlockSequence(); block != nullptr; block = m_lsra->moveToNextBlock())
+    {
+        GenTreePtr stmt;
+
+        // Increment the LsraLocation (currentLoc) at each BasicBlock.
+        // This ensures that the block boundary (RefTypeBB, RefTypeExpUse and RefTypeDummyDef) RefPositions
+        // are in increasing location order.
+        currentLoc += 2;
+
+        m_block = block;
+        for (GenTree* node : BlockRange().NonPhiNodes())
+        {
+/* We increment the number position of each tree node by 2 to
+* simplify the logic when there's the case of a tree that implicitly
+* does a dual-definition of temps (the long case).  In this case
+* is easier to already have an idle spot to handle a dual-def instead
+* of making some messy adjustments if we only increment the
+* number position by one.
+*/
+#ifdef DEBUG
+            node->gtSeqNum = currentLoc;
+#endif
+
+            node->gtLsraInfo.Initialize(m_lsra, node, currentLoc);
+            node->gtClearReg(comp);
+
+            // Mark the node's operands as used
+            for (GenTree* operand : node->Operands())
+            {
+                operand->gtLIRFlags &= ~LIR::Flags::IsUnusedValue;
+            }
+
+            // If the node produces a value, mark it as unused.
+            if (node->IsValue())
+            {
+                node->gtLIRFlags |= LIR::Flags::IsUnusedValue;
+            }
+
+            currentLoc += 2;
+        }
+
+        for (GenTree* node : BlockRange().NonPhiNodes())
+        {
+            TreeNodeInfoInit(node);
+
+            // Only nodes that produce values should have a non-zero dstCount.
+            assert((node->gtLsraInfo.dstCount == 0) || node->IsValue());
+
+            // If the node produces an unused value, mark it as a local def-use
+            if ((node->gtLIRFlags & LIR::Flags::IsUnusedValue) != 0)
+            {
+                node->gtLsraInfo.isLocalDefUse = true;
+                node->gtLsraInfo.dstCount      = 0;
+            }
+
+#if 0
+            // TODO-CQ: Enable this code after fixing the isContained() logic to not abort for these
+            // top-level nodes that throw away their result.
+            // If this is an interlocked operation that has a non-last-use lclVar as its op2,
+            // make sure we allocate a target register for the interlocked operation.; otherwise we need
+            // not allocate a register
+            else if ((tree->OperGet() == GT_LOCKADD || tree->OperGet() == GT_XCHG || tree->OperGet() == GT_XADD))
+            {
+                tree->gtLsraInfo.dstCount = 0;
+                if (tree->gtGetOp2()->IsLocal() && (tree->gtFlags & GTF_VAR_DEATH) == 0)
+                    tree->gtLsraInfo.isLocalDefUse = true;
+            }
+#endif
+        }
+
+        assert(BlockRange().CheckLIR(comp, true));
+    }
+    DBEXEC(VERBOSE, DumpNodeInfoMap());
+}
+
+#ifdef DEBUG
+
+//------------------------------------------------------------------------
+// Lowering::CheckCallArg: check that a call argument is in an expected
+//                         form after lowering.
+//
+// Arguments:
+//   arg - the argument to check.
+//
+void Lowering::CheckCallArg(GenTree* arg)
+{
+    if (arg->OperIsStore() || arg->IsArgPlaceHolderNode() || arg->IsNothingNode() || arg->OperIsCopyBlkOp())
+    {
+        return;
+    }
+
+    switch (arg->OperGet())
+    {
+#if !defined(_TARGET_64BIT_)
+        case GT_LONG:
+            assert(arg->gtGetOp1()->OperIsPutArg());
+            assert(arg->gtGetOp2()->OperIsPutArg());
+            break;
+#endif
+
+        case GT_LIST:
+            {
+                GenTreeArgList* list = arg->AsArgList();
+                assert(list->IsAggregate());
+
+                for (; list != nullptr; list = list->Rest())
+                {
+                    assert(list->Current()->OperIsPutArg());
+                }
+            }
+            break;
+
+        default:
+            assert(arg->OperIsPutArg());
+            break;
+    }
+}
+
+//------------------------------------------------------------------------
+// Lowering::CheckCall: check that a call is in an expected form after
+//                      lowering. Currently this amounts to checking its
+//                      arguments, but could be expanded to verify more
+//                      properties in the future.
+//
+// Arguments:
+//   call - the call to check.
+//
+void Lowering::CheckCall(GenTreeCall* call)
+{
+    if (call->gtCallObjp != nullptr)
+    {
+        CheckCallArg(call->gtCallObjp);
+    }
+
+    for (GenTreeArgList* args = call->gtCallArgs; args != nullptr; args = args->Rest())
+    {
+        CheckCallArg(args->Current());
+    }
+
+    for (GenTreeArgList* args = call->gtCallLateArgs; args != nullptr; args = args->Rest())
+    {
+        CheckCallArg(args->Current());
+    }
+}
+
+//------------------------------------------------------------------------
+// Lowering::CheckNode: check that an LIR node is in an expected form
+//                      after lowering.
+//
+// Arguments:
+//   node - the node to check.
+//
+void Lowering::CheckNode(GenTree* node)
+{
+    switch (node->OperGet())
+    {
+        case GT_CALL:
+            CheckCall(node->AsCall());
+            break;
+
+#ifdef FEATURE_SIMD
+        case GT_SIMD:
+#ifdef _TARGET_64BIT_
+        case GT_LCL_VAR:
+        case GT_STORE_LCL_VAR:
+#endif // _TARGET_64BIT_
+            assert(node->TypeGet() != TYP_SIMD12);
+            break;
+#endif
+
+        default:
+            break;
+    }
+}
+
+//------------------------------------------------------------------------
+// Lowering::CheckBlock: check that the contents of an LIR block are in an
+//                       expected form after lowering.
+//
+// Arguments:
+//   compiler - the compiler context.
+//   block    - the block to check.
+//
+bool Lowering::CheckBlock(Compiler* compiler, BasicBlock* block)
+{
+    assert(block->isEmpty() || block->IsLIR());
+
+    LIR::Range& blockRange = LIR::AsRange(block);
+    for (GenTree* node : blockRange)
+    {
+        CheckNode(node);
+    }
+
+    assert(blockRange.CheckLIR(compiler));
+    return true;
+}
+#endif
+
+void Lowering::LowerBlock(BasicBlock* block)
+{
+    assert(block == comp->compCurBB); // compCurBB must already be set.
+    assert(block->isEmpty() || block->IsLIR());
+
+    m_block = block;
+
+    // NOTE: some of the lowering methods insert calls before the node being
+    // lowered (See e.g. InsertPInvoke{Method,Call}{Prolog,Epilog}). In
+    // general, any code that is inserted before the current node should be
+    // "pre-lowered" as they won't be subject to further processing.
+    // Lowering::CheckBlock() runs some extra checks on call arguments in
+    // order to help catch unlowered nodes.
+
+    GenTree* node = BlockRange().FirstNode();
+    while (node != nullptr)
+    {
+        node = LowerNode(node);
+    }
+
+    assert(CheckBlock(comp, block));
+}
+
+/** Verifies if both of these trees represent the same indirection.
+ * Used by Lower to annotate if CodeGen generate an instruction of the
+ * form *addrMode BinOp= expr
+ *
+ * Preconditions: both trees are children of GT_INDs and their underlying children
+ * have the same gtOper.
+ *
+ * This is a first iteration to actually recognize trees that can be code-generated
+ * as a single read-modify-write instruction on AMD64/x86.  For now
+ * this method only supports the recognition of simple addressing modes (through GT_LEA)
+ * or local var indirections.  Local fields, array access and other more complex nodes are
+ * not yet supported.
+ *
+ * TODO-CQ:  Perform tree recognition by using the Value Numbering Package, that way we can recognize
+ * arbitrary complex trees and support much more addressing patterns.
+ */
+bool Lowering::IndirsAreEquivalent(GenTreePtr candidate, GenTreePtr storeInd)
+{
+    assert(candidate->OperGet() == GT_IND);
+    assert(storeInd->OperGet() == GT_STOREIND);
+
+    // We should check the size of the indirections.  If they are
+    // different, say because of a cast, then we can't call them equivalent.  Doing so could cause us
+    // to drop a cast.
+    // Signed-ness difference is okay and expected since a store indirection must always
+    // be signed based on the CIL spec, but a load could be unsigned.
+    if (genTypeSize(candidate->gtType) != genTypeSize(storeInd->gtType))
+    {
+        return false;
+    }
+
+    GenTreePtr pTreeA = candidate->gtGetOp1();
+    GenTreePtr pTreeB = storeInd->gtGetOp1();
+
+    // This method will be called by codegen (as well as during lowering).
+    // After register allocation, the sources may have been spilled and reloaded
+    // to a different register, indicated by an inserted GT_RELOAD node.
+    pTreeA = pTreeA->gtSkipReloadOrCopy();
+    pTreeB = pTreeB->gtSkipReloadOrCopy();
+
+    genTreeOps oper;
+    unsigned   kind;
+
+    if (pTreeA->OperGet() != pTreeB->OperGet())
+    {
+        return false;
+    }
+
+    oper = pTreeA->OperGet();
+    switch (oper)
+    {
+        case GT_LCL_VAR:
+        case GT_LCL_VAR_ADDR:
+        case GT_CLS_VAR_ADDR:
+        case GT_CNS_INT:
+            return NodesAreEquivalentLeaves(pTreeA, pTreeB);
+
+        case GT_LEA:
+        {
+            GenTreeAddrMode* gtAddr1 = pTreeA->AsAddrMode();
+            GenTreeAddrMode* gtAddr2 = pTreeB->AsAddrMode();
+            return NodesAreEquivalentLeaves(gtAddr1->Base(), gtAddr2->Base()) &&
+                   NodesAreEquivalentLeaves(gtAddr1->Index(), gtAddr2->Index()) &&
+                   gtAddr1->gtScale == gtAddr2->gtScale && gtAddr1->gtOffset == gtAddr2->gtOffset;
+        }
+        default:
+            // We don't handle anything that is not either a constant,
+            // a local var or LEA.
+            return false;
+    }
+}
+
+/** Test whether the two given nodes are the same leaves.
+ *  Right now, only constant integers and local variables are supported
+ */
+bool Lowering::NodesAreEquivalentLeaves(GenTreePtr tree1, GenTreePtr tree2)
+{
+    if (tree1 == nullptr && tree2 == nullptr)
+    {
+        return true;
+    }
+
+    // both null, they are equivalent, otherwise if either is null not equivalent
+    if (tree1 == nullptr || tree2 == nullptr)
+    {
+        return false;
+    }
+
+    tree1 = tree1->gtSkipReloadOrCopy();
+    tree2 = tree2->gtSkipReloadOrCopy();
+
+    if (tree1->TypeGet() != tree2->TypeGet())
+    {
+        return false;
+    }
+
+    if (tree1->OperGet() != tree2->OperGet())
+    {
+        return false;
+    }
+
+    if (!tree1->OperIsLeaf() || !tree2->OperIsLeaf())
+    {
+        return false;
+    }
+
+    switch (tree1->OperGet())
+    {
+        case GT_CNS_INT:
+            return tree1->gtIntCon.gtIconVal == tree2->gtIntCon.gtIconVal &&
+                   tree1->IsIconHandle() == tree2->IsIconHandle();
+        case GT_LCL_VAR:
+        case GT_LCL_VAR_ADDR:
+            return tree1->gtLclVarCommon.gtLclNum == tree2->gtLclVarCommon.gtLclNum;
+        case GT_CLS_VAR_ADDR:
+            return tree1->gtClsVar.gtClsVarHnd == tree2->gtClsVar.gtClsVarHnd;
+        default:
+            return false;
+    }
+}
+
+#ifdef _TARGET_64BIT_
+/**
+ * Get common information required to handle a cast instruction
+ *
+ * Right now only supports 64 bit targets. In order to support 32 bit targets the
+ * switch statement needs work.
+ *
+ */
+void Lowering::getCastDescription(GenTreePtr treeNode, CastInfo* castInfo)
+{
+    // Intialize castInfo
+    memset(castInfo, 0, sizeof(*castInfo));
+
+    GenTreePtr castOp = treeNode->gtCast.CastOp();
+
+    var_types dstType = treeNode->CastToType();
+    var_types srcType = castOp->TypeGet();
+
+    castInfo->unsignedDest   = varTypeIsUnsigned(dstType);
+    castInfo->unsignedSource = varTypeIsUnsigned(srcType);
+
+    // If necessary, force the srcType to unsigned when the GT_UNSIGNED flag is set.
+    if (!castInfo->unsignedSource && (treeNode->gtFlags & GTF_UNSIGNED) != 0)
+    {
+        srcType                  = genUnsignedType(srcType);
+        castInfo->unsignedSource = true;
+    }
+
+    if (treeNode->gtOverflow() &&
+        (genTypeSize(srcType) >= genTypeSize(dstType) || (srcType == TYP_INT && dstType == TYP_ULONG)))
+    {
+        castInfo->requiresOverflowCheck = true;
+    }
+
+    if (castInfo->requiresOverflowCheck)
+    {
+        ssize_t typeMin       = 0;
+        ssize_t typeMax       = 0;
+        ssize_t typeMask      = 0;
+        bool    signCheckOnly = false;
+
+        // Do we need to compare the value, or just check masks
+
+        switch (dstType)
+        {
+            default:
+                assert(!"unreachable: getCastDescription");
+                break;
+
+            case TYP_BYTE:
+                typeMask = ssize_t((int)0xFFFFFF80);
+                typeMin  = SCHAR_MIN;
+                typeMax  = SCHAR_MAX;
+                break;
+
+            case TYP_UBYTE:
+                typeMask = ssize_t((int)0xFFFFFF00L);
+                break;
+
+            case TYP_SHORT:
+                typeMask = ssize_t((int)0xFFFF8000);
+                typeMin  = SHRT_MIN;
+                typeMax  = SHRT_MAX;
+                break;
+
+            case TYP_CHAR:
+                typeMask = ssize_t((int)0xFFFF0000L);
+                break;
+
+            case TYP_INT:
+                if (srcType == TYP_UINT)
+                {
+                    signCheckOnly = true;
+                }
+                else
+                {
+                    typeMask = 0xFFFFFFFF80000000LL;
+                    typeMin  = INT_MIN;
+                    typeMax  = INT_MAX;
+                }
+                break;
+
+            case TYP_UINT:
+                if (srcType == TYP_INT)
+                {
+                    signCheckOnly = true;
+                }
+                else
+                {
+                    typeMask = 0xFFFFFFFF00000000LL;
+                }
+                break;
+
+            case TYP_LONG:
+                signCheckOnly = true;
+                break;
+
+            case TYP_ULONG:
+                signCheckOnly = true;
+                break;
+        }
+
+        if (signCheckOnly)
+        {
+            castInfo->signCheckOnly = true;
+        }
+
+        castInfo->typeMax  = typeMax;
+        castInfo->typeMin  = typeMin;
+        castInfo->typeMask = typeMask;
+    }
+}
+
+#endif // _TARGET_64BIT_
+
+#ifdef DEBUG
+void Lowering::DumpNodeInfoMap()
+{
+    printf("-----------------------------\n");
+    printf("TREE NODE INFO DUMP\n");
+    printf("-----------------------------\n");
+
+    for (BasicBlock* block = comp->fgFirstBB; block != nullptr; block = block->bbNext)
+    {
+        for (GenTree* node : LIR::AsRange(block).NonPhiNodes())
+        {
+            comp->gtDispTree(node, nullptr, nullptr, true);
+            printf("    +");
+            node->gtLsraInfo.dump(m_lsra);
+        }
+    }
+}
+#endif // DEBUG
+
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/lower.h b/src/jit/lower.h
new file mode 100644
index 0000000000..620636d8bd
--- /dev/null
+++ b/src/jit/lower.h
@@ -0,0 +1,280 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                               Lower                                       XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#ifndef _LOWER_H_
+#define _LOWER_H_
+
+#include "compiler.h"
+#include "phase.h"
+#include "lsra.h"
+#include "sideeffects.h"
+
+class Lowering : public Phase
+{
+public:
+    inline Lowering(Compiler* compiler, LinearScanInterface* lsra)
+        : Phase(compiler, "Lowering", PHASE_LOWERING), vtableCallTemp(BAD_VAR_NUM)
+    {
+        m_lsra = (LinearScan*)lsra;
+        assert(m_lsra);
+    }
+    virtual void DoPhase();
+
+    // If requiresOverflowCheck is false, all other values will be unset
+    struct CastInfo
+    {
+        bool requiresOverflowCheck; // Will the cast require an overflow check
+        bool unsignedSource;        // Is the source unsigned
+        bool unsignedDest;          // is the dest unsigned
+
+        // All other fields are only meaningful if requiresOverflowCheck is set.
+
+        ssize_t typeMin;       // Lowest storable value of the dest type
+        ssize_t typeMax;       // Highest storable value of the dest type
+        ssize_t typeMask;      // For converting from/to unsigned
+        bool    signCheckOnly; // For converting between unsigned/signed int
+    };
+
+#ifdef _TARGET_64BIT_
+    static void getCastDescription(GenTreePtr treeNode, CastInfo* castInfo);
+#endif // _TARGET_64BIT_
+
+private:
+#ifdef DEBUG
+    static void CheckCallArg(GenTree* arg);
+    static void CheckCall(GenTreeCall* call);
+    static void CheckNode(GenTree* node);
+    static bool CheckBlock(Compiler* compiler, BasicBlock* block);
+#endif // DEBUG
+
+    void LowerBlock(BasicBlock* block);
+    GenTree* LowerNode(GenTree* node);
+    void CheckVSQuirkStackPaddingNeeded(GenTreeCall* call);
+
+    // ------------------------------
+    // Call Lowering
+    // ------------------------------
+    void LowerCall(GenTree* call);
+    void LowerJmpMethod(GenTree* jmp);
+    void LowerRet(GenTree* ret);
+    GenTree* LowerDelegateInvoke(GenTreeCall* call);
+    GenTree* LowerIndirectNonvirtCall(GenTreeCall* call);
+    GenTree* LowerDirectCall(GenTreeCall* call);
+    GenTree* LowerNonvirtPinvokeCall(GenTreeCall* call);
+    GenTree* LowerTailCallViaHelper(GenTreeCall* callNode, GenTree* callTarget);
+    void LowerFastTailCall(GenTreeCall* callNode);
+    void InsertProfTailCallHook(GenTreeCall* callNode, GenTree* insertionPoint);
+    GenTree* LowerVirtualVtableCall(GenTreeCall* call);
+    GenTree* LowerVirtualStubCall(GenTreeCall* call);
+    void LowerArgsForCall(GenTreeCall* call);
+    void ReplaceArgWithPutArgOrCopy(GenTreePtr* ppChild, GenTreePtr newNode);
+    GenTree* NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryPtr info, var_types type);
+    void LowerArg(GenTreeCall* call, GenTreePtr* ppTree);
+    void InsertPInvokeCallProlog(GenTreeCall* call);
+    void InsertPInvokeCallEpilog(GenTreeCall* call);
+    void InsertPInvokeMethodProlog();
+    void InsertPInvokeMethodEpilog(BasicBlock* returnBB DEBUGARG(GenTreePtr lastExpr));
+    GenTree* SetGCState(int cns);
+    GenTree* CreateReturnTrapSeq();
+    enum FrameLinkAction
+    {
+        PushFrame,
+        PopFrame
+    };
+    GenTree* CreateFrameLinkUpdate(FrameLinkAction);
+    GenTree* AddrGen(ssize_t addr, regNumber reg = REG_NA);
+    GenTree* AddrGen(void* addr, regNumber reg = REG_NA);
+
+    GenTree* Ind(GenTree* tree)
+    {
+        return comp->gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
+    }
+
+    GenTree* PhysReg(regNumber reg, var_types type = TYP_I_IMPL)
+    {
+        return comp->gtNewPhysRegNode(reg, type);
+    }
+
+    GenTree* PhysRegDst(regNumber reg, GenTree* src)
+    {
+        return comp->gtNewPhysRegNode(reg, src);
+    }
+
+    GenTree* ThisReg(GenTreeCall* call)
+    {
+        return PhysReg(comp->codeGen->genGetThisArgReg(call), TYP_REF);
+    }
+
+    GenTree* Offset(GenTree* base, unsigned offset)
+    {
+        var_types resultType = (base->TypeGet() == TYP_REF) ? TYP_BYREF : base->TypeGet();
+        return new (comp, GT_LEA) GenTreeAddrMode(resultType, base, nullptr, 0, offset);
+    }
+
+    // returns true if the tree can use the read-modify-write memory instruction form
+    bool isRMWRegOper(GenTreePtr tree);
+
+    // return true if this call target is within range of a pc-rel call on the machine
+    bool IsCallTargetInRange(void* addr);
+
+    void TreeNodeInfoInit(GenTree* stmt);
+
+#if defined(_TARGET_XARCH_)
+    void TreeNodeInfoInitSimple(GenTree* tree);
+
+    //----------------------------------------------------------------------
+    // SetRegOptional - sets a bit to indicate to LSRA that register
+    // for a given tree node is optional for codegen purpose.  If no
+    // register is allocated to such a tree node, its parent node treats
+    // it as a contained memory operand during codegen.
+    //
+    // Arguments:
+    //    tree    -   GenTree node
+    //
+    // Returns
+    //    None
+    void SetRegOptional(GenTree* tree)
+    {
+        tree->gtLsraInfo.regOptional = true;
+    }
+
+    GenTree* PreferredRegOptionalOperand(GenTree* tree);
+
+    // ------------------------------------------------------------------
+    // SetRegOptionalBinOp - Indicates which of the operands of a bin-op
+    // register requirement is optional. Xarch instruction set allows
+    // either of op1 or op2 of binary operation (e.g. add, mul etc) to be
+    // a memory operand.  This routine provides info to register allocator
+    // which of its operands optionally require a register.  Lsra might not
+    // allocate a register to RefTypeUse positions of such operands if it
+    // is beneficial. In such a case codegen will treat them as memory
+    // operands.
+    //
+    // Arguments:
+    //     tree  -  Gentree of a bininary operation.
+    //
+    // Returns
+    //     None.
+    //
+    // Note: On xarch at most only one of the operands will be marked as
+    // reg optional, even when both operands could be considered register
+    // optional.
+    void SetRegOptionalForBinOp(GenTree* tree)
+    {
+        assert(GenTree::OperIsBinary(tree->OperGet()));
+
+        GenTree* op1 = tree->gtGetOp1();
+        GenTree* op2 = tree->gtGetOp2();
+
+        if (tree->OperIsCommutative() && tree->TypeGet() == op1->TypeGet())
+        {
+            GenTree* preferredOp = PreferredRegOptionalOperand(tree);
+            SetRegOptional(preferredOp);
+        }
+        else if (tree->TypeGet() == op2->TypeGet())
+        {
+            SetRegOptional(op2);
+        }
+    }
+#endif // defined(_TARGET_XARCH_)
+    void TreeNodeInfoInitReturn(GenTree* tree);
+    void TreeNodeInfoInitShiftRotate(GenTree* tree);
+    void TreeNodeInfoInitCall(GenTreeCall* call);
+    void TreeNodeInfoInitStructArg(GenTreePtr structArg);
+    void TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode);
+    void TreeNodeInfoInitLogicalOp(GenTree* tree);
+    void TreeNodeInfoInitModDiv(GenTree* tree);
+    void TreeNodeInfoInitIntrinsic(GenTree* tree);
+#ifdef FEATURE_SIMD
+    void TreeNodeInfoInitSIMD(GenTree* tree);
+#endif // FEATURE_SIMD
+    void TreeNodeInfoInitCast(GenTree* tree);
+#ifdef _TARGET_ARM64_
+    void TreeNodeInfoInitPutArgStk(GenTree* argNode, fgArgTabEntryPtr info);
+#endif // _TARGET_ARM64_
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+    void TreeNodeInfoInitPutArgStk(GenTree* tree);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+    void TreeNodeInfoInitLclHeap(GenTree* tree);
+
+    void DumpNodeInfoMap();
+
+    // Per tree node member functions
+    void LowerStoreInd(GenTree* node);
+    GenTree* LowerAdd(GenTree* node);
+    void LowerUnsignedDivOrMod(GenTree* node);
+    GenTree* LowerSignedDivOrMod(GenTree* node);
+    void LowerBlockStore(GenTreeBlk* blkNode);
+
+    GenTree* TryCreateAddrMode(LIR::Use&& use, bool isIndir);
+    void AddrModeCleanupHelper(GenTreeAddrMode* addrMode, GenTree* node);
+
+    GenTree* LowerSwitch(GenTree* node);
+    void LowerCast(GenTree* node);
+
+#if defined(_TARGET_XARCH_)
+    void SetMulOpCounts(GenTreePtr tree);
+#endif // defined(_TARGET_XARCH_)
+
+    void LowerCmp(GenTreePtr tree);
+
+#if !CPU_LOAD_STORE_ARCH
+    bool IsRMWIndirCandidate(GenTree* operand, GenTree* storeInd);
+    bool IsBinOpInRMWStoreInd(GenTreePtr tree);
+    bool IsRMWMemOpRootedAtStoreInd(GenTreePtr storeIndTree, GenTreePtr* indirCandidate, GenTreePtr* indirOpSource);
+    bool SetStoreIndOpCountsIfRMWMemOp(GenTreePtr storeInd);
+#endif
+    void LowerStoreLoc(GenTreeLclVarCommon* tree);
+    void SetIndirAddrOpCounts(GenTree* indirTree);
+    void LowerGCWriteBarrier(GenTree* tree);
+    GenTree* LowerArrElem(GenTree* node);
+    void LowerRotate(GenTree* tree);
+
+    // Utility functions
+    void MorphBlkIntoHelperCall(GenTreePtr pTree, GenTreePtr treeStmt);
+
+public:
+    static bool IndirsAreEquivalent(GenTreePtr pTreeA, GenTreePtr pTreeB);
+
+private:
+    static bool NodesAreEquivalentLeaves(GenTreePtr candidate, GenTreePtr storeInd);
+
+    bool AreSourcesPossiblyModifiedLocals(GenTree* addr, GenTree* base, GenTree* index);
+
+    // return true if 'childNode' is an immediate that can be contained
+    //  by the 'parentNode' (i.e. folded into an instruction)
+    //  for example small enough and non-relocatable
+    bool IsContainableImmed(GenTree* parentNode, GenTree* childNode);
+
+    // Makes 'childNode' contained in the 'parentNode'
+    void MakeSrcContained(GenTreePtr parentNode, GenTreePtr childNode);
+
+    // Checks and makes 'childNode' contained in the 'parentNode'
+    bool CheckImmedAndMakeContained(GenTree* parentNode, GenTree* childNode);
+
+    // Checks for memory conflicts in the instructions between childNode and parentNode, and returns true if childNode
+    // can be contained.
+    bool IsSafeToContainMem(GenTree* parentNode, GenTree* childNode);
+
+    inline LIR::Range& BlockRange() const
+    {
+        return LIR::AsRange(m_block);
+    }
+
+    LinearScan*   m_lsra;
+    unsigned      vtableCallTemp;       // local variable we use as a temp for vtable calls
+    SideEffectSet m_scratchSideEffects; // SideEffectSet used for IsSafeToContainMem and isRMWIndirCandidate
+    BasicBlock*   m_block;
+};
+
+#endif // _LOWER_H_
diff --git a/src/jit/lowerarm.cpp b/src/jit/lowerarm.cpp
new file mode 100644
index 0000000000..67cea2ff4e
--- /dev/null
+++ b/src/jit/lowerarm.cpp
@@ -0,0 +1,71 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                           Lowering for ARM                                XX
+XX                                                                           XX
+XX  This encapsulates all the logic for lowering trees for the ARM           XX
+XX  architecture.  For a more detailed view of what is lowering, please      XX
+XX  take a look at Lower.cpp                                                 XX
+XX                                                                           XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
+
+// The ARM backend is not yet implemented, so the methods here are all NYI.
+// TODO-ARM-NYI: Lowering for ARM.
+#ifdef _TARGET_ARM_
+
+#include "jit.h"
+#include "sideeffects.h"
+#include "lower.h"
+#include "lsra.h"
+
+/* Lowering of GT_CAST nodes */
+void Lowering::LowerCast(GenTree* tree)
+{
+    NYI_ARM("ARM Lowering for cast");
+}
+
+void Lowering::LowerRotate(GenTreePtr tree)
+{
+    NYI_ARM("ARM Lowering for ROL and ROR");
+}
+
+void Lowering::TreeNodeInfoInit(GenTree* stmt)
+{
+    NYI("ARM TreeNodInfoInit");
+}
+
+// returns true if the tree can use the read-modify-write memory instruction form
+bool Lowering::isRMWRegOper(GenTreePtr tree)
+{
+    return false;
+}
+
+bool Lowering::IsCallTargetInRange(void* addr)
+{
+    return comp->codeGen->validImmForBL((ssize_t)addr);
+}
+
+// return true if the immediate can be folded into an instruction, for example small enough and non-relocatable
+bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode)
+{
+    NYI_ARM("ARM IsContainableImmed");
+    return false;
+}
+
+#endif // _TARGET_ARM_
+
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/lowerarm64.cpp b/src/jit/lowerarm64.cpp
new file mode 100644
index 0000000000..1720c62acb
--- /dev/null
+++ b/src/jit/lowerarm64.cpp
@@ -0,0 +1,2063 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                           Lowering for ARM64                              XX
+XX                                                                           XX
+XX  This encapsulates all the logic for lowering trees for the ARM64         XX
+XX  architecture.  For a more detailed view of what is lowering, please      XX
+XX  take a look at Lower.cpp                                                 XX
+XX                                                                           XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
+
+#ifdef _TARGET_ARM64_
+
+#include "jit.h"
+#include "sideeffects.h"
+#include "lower.h"
+
+// there is not much lowering to do with storing a local but
+// we do some handling of contained immediates and widening operations of unsigneds
+void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
+{
+    TreeNodeInfo* info = &(storeLoc->gtLsraInfo);
+
+    // Is this the case of var = call where call is returning
+    // a value in multiple return registers?
+    GenTree* op1 = storeLoc->gtGetOp1();
+    if (op1->IsMultiRegCall())
+    {
+        // backend expects to see this case only for store lclvar.
+        assert(storeLoc->OperGet() == GT_STORE_LCL_VAR);
+
+        // srcCount = number of registers in which the value is returned by call
+        GenTreeCall*    call        = op1->AsCall();
+        ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+        info->srcCount              = retTypeDesc->GetReturnRegCount();
+
+        // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1
+        regMaskTP srcCandidates = m_lsra->allMultiRegCallNodeRegs(call);
+        op1->gtLsraInfo.setSrcCandidates(m_lsra, srcCandidates);
+        return;
+    }
+
+    CheckImmedAndMakeContained(storeLoc, op1);
+
+    // Try to widen the ops if they are going into a local var.
+    if ((storeLoc->gtOper == GT_STORE_LCL_VAR) && (op1->gtOper == GT_CNS_INT))
+    {
+        GenTreeIntCon* con    = op1->AsIntCon();
+        ssize_t        ival   = con->gtIconVal;
+        unsigned       varNum = storeLoc->gtLclNum;
+        LclVarDsc*     varDsc = comp->lvaTable + varNum;
+
+        if (varDsc->lvIsSIMDType())
+        {
+            noway_assert(storeLoc->gtType != TYP_STRUCT);
+        }
+        unsigned size = genTypeSize(storeLoc);
+        // If we are storing a constant into a local variable
+        // we extend the size of the store here
+        if ((size < 4) && !varTypeIsStruct(varDsc))
+        {
+            if (!varTypeIsUnsigned(varDsc))
+            {
+                if (genTypeSize(storeLoc) == 1)
+                {
+                    if ((ival & 0x7f) != ival)
+                    {
+                        ival = ival | 0xffffff00;
+                    }
+                }
+                else
+                {
+                    assert(genTypeSize(storeLoc) == 2);
+                    if ((ival & 0x7fff) != ival)
+                    {
+                        ival = ival | 0xffff0000;
+                    }
+                }
+            }
+
+            // A local stack slot is at least 4 bytes in size, regardless of
+            // what the local var is typed as, so auto-promote it here
+            // unless it is a field of a promoted struct
+            // TODO-ARM64-CQ: if the field is promoted shouldn't we also be able to do this?
+            if (!varDsc->lvIsStructField)
+            {
+                storeLoc->gtType = TYP_INT;
+                con->SetIconValue(ival);
+            }
+        }
+    }
+}
+
+/**
+ * Takes care of annotating the register requirements
+ * for every TreeNodeInfo struct that maps to each tree node.
+ * Preconditions:
+ *    LSRA has been initialized and there is a TreeNodeInfo node
+ *    already allocated and initialized for every tree in the IR.
+ * Postconditions:
+ *    Every TreeNodeInfo instance has the right annotations on register
+ *    requirements needed by LSRA to build the Interval Table (source,
+ *    destination and internal [temp] register counts).
+ *    This code is refactored originally from LSRA.
+ */
+void Lowering::TreeNodeInfoInit(GenTree* tree)
+{
+    LinearScan* l        = m_lsra;
+    Compiler*   compiler = comp;
+
+    unsigned      kind         = tree->OperKind();
+    TreeNodeInfo* info         = &(tree->gtLsraInfo);
+    RegisterType  registerType = TypeGet(tree);
+
+    switch (tree->OperGet())
+    {
+        GenTree* op1;
+        GenTree* op2;
+
+        default:
+            info->dstCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
+            if (kind & (GTK_CONST | GTK_LEAF))
+            {
+                info->srcCount = 0;
+            }
+            else if (kind & (GTK_SMPOP))
+            {
+                if (tree->gtGetOp2() != nullptr)
+                {
+                    info->srcCount = 2;
+                }
+                else
+                {
+                    info->srcCount = 1;
+                }
+            }
+            else
+            {
+                unreached();
+            }
+            break;
+
+        case GT_STORE_LCL_FLD:
+        case GT_STORE_LCL_VAR:
+            info->srcCount = 1;
+            info->dstCount = 0;
+            LowerStoreLoc(tree->AsLclVarCommon());
+            break;
+
+        case GT_BOX:
+            noway_assert(!"box should not exist here");
+            // The result of 'op1' is also the final result
+            info->srcCount = 0;
+            info->dstCount = 0;
+            break;
+
+        case GT_PHYSREGDST:
+            info->srcCount = 1;
+            info->dstCount = 0;
+            break;
+
+        case GT_COMMA:
+        {
+            GenTreePtr firstOperand;
+            GenTreePtr secondOperand;
+            if (tree->gtFlags & GTF_REVERSE_OPS)
+            {
+                firstOperand  = tree->gtOp.gtOp2;
+                secondOperand = tree->gtOp.gtOp1;
+            }
+            else
+            {
+                firstOperand  = tree->gtOp.gtOp1;
+                secondOperand = tree->gtOp.gtOp2;
+            }
+            if (firstOperand->TypeGet() != TYP_VOID)
+            {
+                firstOperand->gtLsraInfo.isLocalDefUse = true;
+                firstOperand->gtLsraInfo.dstCount      = 0;
+            }
+            if (tree->TypeGet() == TYP_VOID && secondOperand->TypeGet() != TYP_VOID)
+            {
+                secondOperand->gtLsraInfo.isLocalDefUse = true;
+                secondOperand->gtLsraInfo.dstCount      = 0;
+            }
+        }
+
+            __fallthrough;
+
+        case GT_LIST:
+        case GT_ARGPLACE:
+        case GT_NO_OP:
+        case GT_START_NONGC:
+        case GT_PROF_HOOK:
+            info->srcCount = 0;
+            info->dstCount = 0;
+            break;
+
+        case GT_CNS_DBL:
+            info->srcCount = 0;
+            info->dstCount = 1;
+            {
+                GenTreeDblCon* dblConst   = tree->AsDblCon();
+                double         constValue = dblConst->gtDblCon.gtDconVal;
+
+                if (emitter::emitIns_valid_imm_for_fmov(constValue))
+                {
+                    // Directly encode constant to instructions.
+                }
+                else
+                {
+                    // Reserve int to load constant from memory (IF_LARGELDC)
+                    info->internalIntCount = 1;
+                }
+            }
+            break;
+
+        case GT_QMARK:
+        case GT_COLON:
+            info->srcCount = 0;
+            info->dstCount = 0;
+            unreached();
+            break;
+
+        case GT_RETURN:
+            TreeNodeInfoInitReturn(tree);
+            break;
+
+        case GT_RETFILT:
+            if (tree->TypeGet() == TYP_VOID)
+            {
+                info->srcCount = 0;
+                info->dstCount = 0;
+            }
+            else
+            {
+                assert(tree->TypeGet() == TYP_INT);
+
+                info->srcCount = 1;
+                info->dstCount = 0;
+
+                info->setSrcCandidates(l, RBM_INTRET);
+                tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, RBM_INTRET);
+            }
+            break;
+
+        case GT_NOP:
+            // A GT_NOP is either a passthrough (if it is void, or if it has
+            // a child), but must be considered to produce a dummy value if it
+            // has a type but no child
+            info->srcCount = 0;
+            if (tree->TypeGet() != TYP_VOID && tree->gtOp.gtOp1 == nullptr)
+            {
+                info->dstCount = 1;
+            }
+            else
+            {
+                info->dstCount = 0;
+            }
+            break;
+
+        case GT_JTRUE:
+            info->srcCount = 0;
+            info->dstCount = 0;
+            l->clearDstCount(tree->gtOp.gtOp1);
+            break;
+
+        case GT_JMP:
+            info->srcCount = 0;
+            info->dstCount = 0;
+            break;
+
+        case GT_SWITCH:
+            // This should never occur since switch nodes must not be visible at this
+            // point in the JIT.
+            info->srcCount = 0;
+            info->dstCount = 0; // To avoid getting uninit errors.
+            noway_assert(!"Switch must be lowered at this point");
+            break;
+
+        case GT_JMPTABLE:
+            info->srcCount = 0;
+            info->dstCount = 1;
+            break;
+
+        case GT_SWITCH_TABLE:
+            info->srcCount         = 2;
+            info->internalIntCount = 1;
+            info->dstCount         = 0;
+            break;
+
+        case GT_ASG:
+        case GT_ASG_ADD:
+        case GT_ASG_SUB:
+            noway_assert(!"We should never hit any assignment operator in lowering");
+            info->srcCount = 0;
+            info->dstCount = 0;
+            break;
+
+        case GT_ADD:
+        case GT_SUB:
+            if (varTypeIsFloating(tree->TypeGet()))
+            {
+                // overflow operations aren't supported on float/double types.
+                assert(!tree->gtOverflow());
+
+                // No implicit conversions at this stage as the expectation is that
+                // everything is made explicit by adding casts.
+                assert(tree->gtOp.gtOp1->TypeGet() == tree->gtOp.gtOp2->TypeGet());
+
+                info->srcCount = 2;
+                info->dstCount = 1;
+
+                break;
+            }
+
+            __fallthrough;
+
+        case GT_AND:
+        case GT_OR:
+        case GT_XOR:
+            info->srcCount = 2;
+            info->dstCount = 1;
+            // Check and make op2 contained (if it is a containable immediate)
+            CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2);
+            break;
+
+        case GT_RETURNTRAP:
+            // this just turns into a compare of its child with an int
+            // + a conditional call
+            info->srcCount = 1;
+            info->dstCount = 0;
+            break;
+
+        case GT_MOD:
+        case GT_UMOD:
+            NYI_IF(varTypeIsFloating(tree->TypeGet()), "FP Remainder in ARM64");
+            assert(!"Shouldn't see an integer typed GT_MOD node in ARM64");
+            break;
+
+        case GT_MUL:
+            if (tree->gtOverflow())
+            {
+                // Need a register different from target reg to check for overflow.
+                info->internalIntCount = 2;
+            }
+            __fallthrough;
+
+        case GT_DIV:
+        case GT_MULHI:
+        case GT_UDIV:
+        {
+            info->srcCount = 2;
+            info->dstCount = 1;
+        }
+        break;
+
+        case GT_INTRINSIC:
+        {
+            // TODO-ARM64-NYI
+            // Right now only Abs/Round/Sqrt are treated as math intrinsics
+            noway_assert((tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs) ||
+                         (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round) ||
+                         (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Sqrt));
+
+            // Both operand and its result must be of the same floating point type.
+            op1 = tree->gtOp.gtOp1;
+            assert(varTypeIsFloating(op1));
+            assert(op1->TypeGet() == tree->TypeGet());
+
+            info->srcCount = 1;
+            info->dstCount = 1;
+        }
+        break;
+
+#ifdef FEATURE_SIMD
+        case GT_SIMD:
+            TreeNodeInfoInitSIMD(tree);
+            break;
+#endif // FEATURE_SIMD
+
+        case GT_CAST:
+        {
+            // TODO-ARM64-CQ: Int-To-Int conversions - castOp cannot be a memory op and must have an assigned
+            //                register.
+            //         see CodeGen::genIntToIntCast()
+
+            info->srcCount = 1;
+            info->dstCount = 1;
+
+            // Non-overflow casts to/from float/double are done using SSE2 instructions
+            // and that allow the source operand to be either a reg or memop. Given the
+            // fact that casts from small int to float/double are done as two-level casts,
+            // the source operand is always guaranteed to be of size 4 or 8 bytes.
+            var_types  castToType = tree->CastToType();
+            GenTreePtr castOp     = tree->gtCast.CastOp();
+            var_types  castOpType = castOp->TypeGet();
+            if (tree->gtFlags & GTF_UNSIGNED)
+            {
+                castOpType = genUnsignedType(castOpType);
+            }
+#ifdef DEBUG
+            if (!tree->gtOverflow() && (varTypeIsFloating(castToType) || varTypeIsFloating(castOpType)))
+            {
+                // If converting to float/double, the operand must be 4 or 8 byte in size.
+                if (varTypeIsFloating(castToType))
+                {
+                    unsigned opSize = genTypeSize(castOpType);
+                    assert(opSize == 4 || opSize == 8);
+                }
+            }
+#endif // DEBUG
+            // Some overflow checks need a temp reg
+
+            CastInfo castInfo;
+
+            // Get information about the cast.
+            getCastDescription(tree, &castInfo);
+
+            if (castInfo.requiresOverflowCheck)
+            {
+                var_types srcType = castOp->TypeGet();
+                emitAttr  cmpSize = EA_ATTR(genTypeSize(srcType));
+
+                // If we cannot store the comparisons in an immediate for either
+                // comparing against the max or min value, then we will need to
+                // reserve a temporary register.
+
+                bool canStoreMaxValue = emitter::emitIns_valid_imm_for_cmp(castInfo.typeMax, cmpSize);
+                bool canStoreMinValue = emitter::emitIns_valid_imm_for_cmp(castInfo.typeMin, cmpSize);
+
+                if (!canStoreMaxValue || !canStoreMinValue)
+                {
+                    info->internalIntCount = 1;
+                }
+            }
+        }
+        break;
+
+        case GT_NEG:
+            info->srcCount = 1;
+            info->dstCount = 1;
+            break;
+
+        case GT_NOT:
+            info->srcCount = 1;
+            info->dstCount = 1;
+            break;
+
+        case GT_LSH:
+        case GT_RSH:
+        case GT_RSZ:
+        case GT_ROR:
+        {
+            info->srcCount = 2;
+            info->dstCount = 1;
+
+            GenTreePtr shiftBy = tree->gtOp.gtOp2;
+            GenTreePtr source  = tree->gtOp.gtOp1;
+            if (shiftBy->IsCnsIntOrI())
+            {
+                l->clearDstCount(shiftBy);
+                info->srcCount--;
+            }
+        }
+        break;
+
+        case GT_EQ:
+        case GT_NE:
+        case GT_LT:
+        case GT_LE:
+        case GT_GE:
+        case GT_GT:
+            LowerCmp(tree);
+            break;
+
+        case GT_CKFINITE:
+            info->srcCount         = 1;
+            info->dstCount         = 1;
+            info->internalIntCount = 1;
+            break;
+
+        case GT_CMPXCHG:
+            info->srcCount = 3;
+            info->dstCount = 1;
+
+            // TODO-ARM64-NYI
+            NYI("CMPXCHG");
+            break;
+
+        case GT_LOCKADD:
+            info->srcCount = 2;
+            info->dstCount = 0;
+            CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2);
+            break;
+
+        case GT_CALL:
+            TreeNodeInfoInitCall(tree->AsCall());
+            break;
+
+        case GT_ADDR:
+        {
+            // For a GT_ADDR, the child node should not be evaluated into a register
+            GenTreePtr child = tree->gtOp.gtOp1;
+            assert(!l->isCandidateLocalRef(child));
+            l->clearDstCount(child);
+            info->srcCount = 0;
+            info->dstCount = 1;
+        }
+        break;
+
+        case GT_BLK:
+        case GT_OBJ:
+        case GT_DYN_BLK:
+            // These should all be eliminated prior to Lowering.
+            assert(!"Non-store block node in Lowering");
+            info->srcCount = 0;
+            info->dstCount = 0;
+
+        case GT_STORE_BLK:
+        case GT_STORE_OBJ:
+        case GT_STORE_DYN_BLK:
+            TreeNodeInfoInitBlockStore(tree->AsBlk());
+            break;
+
+        case GT_LCLHEAP:
+        {
+            info->srcCount = 1;
+            info->dstCount = 1;
+
+            // Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp):
+            // Here '-' means don't care.
+            //
+            //  Size?                   Init Memory?    # temp regs
+            //   0                          -               0
+            //   const and <=6 ptr words    -               0
+            //   const and <PageSize        No              0
+            //   >6 ptr words               Yes           hasPspSym ? 1 : 0
+            //   Non-const                  Yes           hasPspSym ? 1 : 0
+            //   Non-const                  No              2
+            //
+            // PSPSym - If the method has PSPSym increment internalIntCount by 1.
+            //
+            bool hasPspSym;
+#if FEATURE_EH_FUNCLETS
+            hasPspSym = (compiler->lvaPSPSym != BAD_VAR_NUM);
+#else
+            hasPspSym = false;
+#endif
+
+            GenTreePtr size = tree->gtOp.gtOp1;
+            if (size->IsCnsIntOrI())
+            {
+                MakeSrcContained(tree, size);
+
+                size_t sizeVal = size->gtIntCon.gtIconVal;
+
+                if (sizeVal == 0)
+                {
+                    info->internalIntCount = 0;
+                }
+                else
+                {
+                    // Compute the amount of memory to properly STACK_ALIGN.
+                    // Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size.
+                    // This should also help in debugging as we can examine the original size specified with
+                    // localloc.
+                    sizeVal                          = AlignUp(sizeVal, STACK_ALIGN);
+                    size_t cntStackAlignedWidthItems = (sizeVal >> STACK_ALIGN_SHIFT);
+
+                    // For small allocations upto 4 'stp' instructions (i.e. 64 bytes of localloc)
+                    //
+                    if (cntStackAlignedWidthItems <= 4)
+                    {
+                        info->internalIntCount = 0;
+                    }
+                    else if (!compiler->info.compInitMem)
+                    {
+                        // No need to initialize allocated stack space.
+                        if (sizeVal < compiler->eeGetPageSize())
+                        {
+                            info->internalIntCount = 0;
+                        }
+                        else
+                        {
+                            // We need two registers: regCnt and RegTmp
+                            info->internalIntCount = 2;
+                        }
+                    }
+                    else
+                    {
+                        // greater than 4 and need to zero initialize allocated stack space.
+                        // If the method has PSPSym, we need an internal register to hold regCnt
+                        // since targetReg allocated to GT_LCLHEAP node could be the same as one of
+                        // the the internal registers.
+                        info->internalIntCount = hasPspSym ? 1 : 0;
+                    }
+                }
+            }
+            else
+            {
+                if (!compiler->info.compInitMem)
+                {
+                    info->internalIntCount = 2;
+                }
+                else
+                {
+                    // If the method has PSPSym, we need an internal register to hold regCnt
+                    // since targetReg allocated to GT_LCLHEAP node could be the same as one of
+                    // the the internal registers.
+                    info->internalIntCount = hasPspSym ? 1 : 0;
+                }
+            }
+
+            // If the method has PSPSym, we would need an addtional register to relocate it on stack.
+            if (hasPspSym)
+            {
+                // Exclude const size 0
+                if (!size->IsCnsIntOrI() || (size->gtIntCon.gtIconVal > 0))
+                    info->internalIntCount++;
+            }
+        }
+        break;
+
+        case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+        case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+        {
+            GenTreeBoundsChk* node = tree->AsBoundsChk();
+            // Consumes arrLen & index - has no result
+            info->srcCount = 2;
+            info->dstCount = 0;
+
+            GenTree* intCns = nullptr;
+            GenTree* other  = nullptr;
+            if (CheckImmedAndMakeContained(tree, node->gtIndex))
+            {
+                intCns = node->gtIndex;
+                other  = node->gtArrLen;
+            }
+            else if (CheckImmedAndMakeContained(tree, node->gtArrLen))
+            {
+                intCns = node->gtArrLen;
+                other  = node->gtIndex;
+            }
+            else
+            {
+                other = node->gtIndex;
+            }
+        }
+        break;
+
+        case GT_ARR_ELEM:
+            // These must have been lowered to GT_ARR_INDEX
+            noway_assert(!"We should never see a GT_ARR_ELEM in lowering");
+            info->srcCount = 0;
+            info->dstCount = 0;
+            break;
+
+        case GT_ARR_INDEX:
+            info->srcCount = 2;
+            info->dstCount = 1;
+
+            // We need one internal register when generating code for GT_ARR_INDEX, however the
+            // register allocator always may just give us the same one as it gives us for the 'dst'
+            // as a workaround we will just ask for two internal registers.
+            //
+            info->internalIntCount = 2;
+
+            // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple
+            // times while the result is being computed.
+            tree->AsArrIndex()->ArrObj()->gtLsraInfo.isDelayFree = true;
+            info->hasDelayFreeSrc                                = true;
+            break;
+
+        case GT_ARR_OFFSET:
+            // This consumes the offset, if any, the arrObj and the effective index,
+            // and produces the flattened offset for this dimension.
+            info->srcCount         = 3;
+            info->dstCount         = 1;
+            info->internalIntCount = 1;
+
+            // we don't want to generate code for this
+            if (tree->gtArrOffs.gtOffset->IsIntegralConst(0))
+            {
+                MakeSrcContained(tree, tree->gtArrOffs.gtOffset);
+            }
+            break;
+
+        case GT_LEA:
+        {
+            GenTreeAddrMode* lea = tree->AsAddrMode();
+
+            GenTree* base  = lea->Base();
+            GenTree* index = lea->Index();
+            unsigned cns   = lea->gtOffset;
+
+            // This LEA is instantiating an address,
+            // so we set up the srcCount and dstCount here.
+            info->srcCount = 0;
+            if (base != nullptr)
+            {
+                info->srcCount++;
+            }
+            if (index != nullptr)
+            {
+                info->srcCount++;
+            }
+            info->dstCount = 1;
+
+            // On ARM64 we may need a single internal register
+            // (when both conditions are true then we still only need a single internal register)
+            if ((index != nullptr) && (cns != 0))
+            {
+                // ARM64 does not support both Index and offset so we need an internal register
+                info->internalIntCount = 1;
+            }
+            else if (!emitter::emitIns_valid_imm_for_add(cns, EA_8BYTE))
+            {
+                // This offset can't be contained in the add instruction, so we need an internal register
+                info->internalIntCount = 1;
+            }
+        }
+        break;
+
+        case GT_STOREIND:
+        {
+            info->srcCount = 2;
+            info->dstCount = 0;
+            GenTree* src   = tree->gtOp.gtOp2;
+
+            if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree))
+            {
+                LowerGCWriteBarrier(tree);
+                break;
+            }
+            if (!varTypeIsFloating(src->TypeGet()) && src->IsIntegralConst(0))
+            {
+                // an integer zero for 'src' can be contained.
+                MakeSrcContained(tree, src);
+            }
+
+            SetIndirAddrOpCounts(tree);
+        }
+        break;
+
+        case GT_NULLCHECK:
+            info->dstCount      = 0;
+            info->srcCount      = 1;
+            info->isLocalDefUse = true;
+            // null check is an indirection on an addr
+            SetIndirAddrOpCounts(tree);
+            break;
+
+        case GT_IND:
+            info->dstCount = 1;
+            info->srcCount = 1;
+            SetIndirAddrOpCounts(tree);
+            break;
+
+        case GT_CATCH_ARG:
+            info->srcCount = 0;
+            info->dstCount = 1;
+            info->setDstCandidates(l, RBM_EXCEPTION_OBJECT);
+            break;
+
+        case GT_CLS_VAR:
+            info->srcCount = 0;
+            // GT_CLS_VAR, by the time we reach the backend, must always
+            // be a pure use.
+            // It will produce a result of the type of the
+            // node, and use an internal register for the address.
+
+            info->dstCount = 1;
+            assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG | GTF_VAR_USEDEF)) == 0);
+            info->internalIntCount = 1;
+            break;
+    } // end switch (tree->OperGet())
+
+    // We need to be sure that we've set info->srcCount and info->dstCount appropriately
+    assert((info->dstCount < 2) || tree->IsMultiRegCall());
+}
+//------------------------------------------------------------------------
+// TreeNodeInfoInitReturn: Set the NodeInfo for a GT_RETURN.
+//
+// Arguments:
+//    tree      - The node of interest
+//
+// Return Value:
+//    None.
+//
+void Lowering::TreeNodeInfoInitReturn(GenTree* tree)
+{
+    TreeNodeInfo* info     = &(tree->gtLsraInfo);
+    LinearScan*   l        = m_lsra;
+    Compiler*     compiler = comp;
+
+    GenTree*  op1           = tree->gtGetOp1();
+    regMaskTP useCandidates = RBM_NONE;
+
+    info->srcCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
+    info->dstCount = 0;
+
+    if (varTypeIsStruct(tree))
+    {
+        // op1 has to be either an lclvar or a multi-reg returning call
+        if ((op1->OperGet() == GT_LCL_VAR) || (op1->OperGet() == GT_LCL_FLD))
+        {
+            GenTreeLclVarCommon* lclVarCommon = op1->AsLclVarCommon();
+            LclVarDsc*           varDsc       = &(compiler->lvaTable[lclVarCommon->gtLclNum]);
+            assert(varDsc->lvIsMultiRegRet);
+
+            // Mark var as contained if not enregistrable.
+            if (!varTypeIsEnregisterableStruct(op1))
+            {
+                MakeSrcContained(tree, op1);
+            }
+        }
+        else
+        {
+            noway_assert(op1->IsMultiRegCall());
+
+            ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc();
+            info->srcCount              = retTypeDesc->GetReturnRegCount();
+            useCandidates               = retTypeDesc->GetABIReturnRegs();
+        }
+    }
+    else
+    {
+        // Non-struct type return - determine useCandidates
+        switch (tree->TypeGet())
+        {
+            case TYP_VOID:
+                useCandidates = RBM_NONE;
+                break;
+            case TYP_FLOAT:
+                useCandidates = RBM_FLOATRET;
+                break;
+            case TYP_DOUBLE:
+                useCandidates = RBM_DOUBLERET;
+                break;
+            case TYP_LONG:
+                useCandidates = RBM_LNGRET;
+                break;
+            default:
+                useCandidates = RBM_INTRET;
+                break;
+        }
+    }
+
+    if (useCandidates != RBM_NONE)
+    {
+        tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, useCandidates);
+    }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitCall: Set the NodeInfo for a call.
+//
+// Arguments:
+//    call      - The call node of interest
+//
+// Return Value:
+//    None.
+//
+void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
+{
+    TreeNodeInfo*   info              = &(call->gtLsraInfo);
+    LinearScan*     l                 = m_lsra;
+    Compiler*       compiler          = comp;
+    bool            hasMultiRegRetVal = false;
+    ReturnTypeDesc* retTypeDesc       = nullptr;
+
+    info->srcCount = 0;
+    if (call->TypeGet() != TYP_VOID)
+    {
+        hasMultiRegRetVal = call->HasMultiRegRetVal();
+        if (hasMultiRegRetVal)
+        {
+            // dst count = number of registers in which the value is returned by call
+            retTypeDesc    = call->GetReturnTypeDesc();
+            info->dstCount = retTypeDesc->GetReturnRegCount();
+        }
+        else
+        {
+            info->dstCount = 1;
+        }
+    }
+    else
+    {
+        info->dstCount = 0;
+    }
+
+    GenTree* ctrlExpr = call->gtControlExpr;
+    if (call->gtCallType == CT_INDIRECT)
+    {
+        // either gtControlExpr != null or gtCallAddr != null.
+        // Both cannot be non-null at the same time.
+        assert(ctrlExpr == nullptr);
+        assert(call->gtCallAddr != nullptr);
+        ctrlExpr = call->gtCallAddr;
+    }
+
+    // set reg requirements on call target represented as control sequence.
+    if (ctrlExpr != nullptr)
+    {
+        // we should never see a gtControlExpr whose type is void.
+        assert(ctrlExpr->TypeGet() != TYP_VOID);
+
+        info->srcCount++;
+
+        // In case of fast tail implemented as jmp, make sure that gtControlExpr is
+        // computed into a register.
+        if (call->IsFastTailCall())
+        {
+            // Fast tail call - make sure that call target is always computed in IP0
+            // so that epilog sequence can generate "br xip0" to achieve fast tail call.
+            ctrlExpr->gtLsraInfo.setSrcCandidates(l, genRegMask(REG_IP0));
+        }
+    }
+
+    RegisterType registerType = call->TypeGet();
+
+    // Set destination candidates for return value of the call.
+    if (hasMultiRegRetVal)
+    {
+        assert(retTypeDesc != nullptr);
+        info->setDstCandidates(l, retTypeDesc->GetABIReturnRegs());
+    }
+    else if (varTypeIsFloating(registerType))
+    {
+        info->setDstCandidates(l, RBM_FLOATRET);
+    }
+    else if (registerType == TYP_LONG)
+    {
+        info->setDstCandidates(l, RBM_LNGRET);
+    }
+    else
+    {
+        info->setDstCandidates(l, RBM_INTRET);
+    }
+
+    // If there is an explicit this pointer, we don't want that node to produce anything
+    // as it is redundant
+    if (call->gtCallObjp != nullptr)
+    {
+        GenTreePtr thisPtrNode = call->gtCallObjp;
+
+        if (thisPtrNode->gtOper == GT_PUTARG_REG)
+        {
+            l->clearOperandCounts(thisPtrNode);
+            l->clearDstCount(thisPtrNode->gtOp.gtOp1);
+        }
+        else
+        {
+            l->clearDstCount(thisPtrNode);
+        }
+    }
+
+    // First, count reg args
+    bool callHasFloatRegArgs = false;
+
+    for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+    {
+        assert(list->IsList());
+
+        GenTreePtr argNode = list->Current();
+
+        fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
+        assert(curArgTabEntry);
+
+        if (curArgTabEntry->regNum == REG_STK)
+        {
+            // late arg that is not passed in a register
+            assert(argNode->gtOper == GT_PUTARG_STK);
+
+            TreeNodeInfoInitPutArgStk(argNode, curArgTabEntry);
+            continue;
+        }
+
+        var_types argType    = argNode->TypeGet();
+        bool      argIsFloat = varTypeIsFloating(argType);
+        callHasFloatRegArgs |= argIsFloat;
+
+        regNumber argReg = curArgTabEntry->regNum;
+        // We will setup argMask to the set of all registers that compose this argument
+        regMaskTP argMask = 0;
+
+        argNode = argNode->gtEffectiveVal();
+
+        // A GT_LIST has a TYP_VOID, but is used to represent a multireg struct
+        if (varTypeIsStruct(argNode) || (argNode->gtOper == GT_LIST))
+        {
+            GenTreePtr actualArgNode = argNode;
+            unsigned   originalSize  = 0;
+
+            if (argNode->gtOper == GT_LIST)
+            {
+                // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs)
+                GenTreeArgList* argListPtr = argNode->AsArgList();
+
+                // Initailize the first register and the first regmask in our list
+                regNumber targetReg    = argReg;
+                regMaskTP targetMask   = genRegMask(targetReg);
+                unsigned  iterationNum = 0;
+                originalSize           = 0;
+
+                for (; argListPtr; argListPtr = argListPtr->Rest())
+                {
+                    GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1;
+                    assert(putArgRegNode->gtOper == GT_PUTARG_REG);
+                    GenTreePtr putArgChild = putArgRegNode->gtOp.gtOp1;
+
+                    originalSize += REGSIZE_BYTES; // 8 bytes
+
+                    // Record the register requirements for the GT_PUTARG_REG node
+                    putArgRegNode->gtLsraInfo.setDstCandidates(l, targetMask);
+                    putArgRegNode->gtLsraInfo.setSrcCandidates(l, targetMask);
+
+                    // To avoid redundant moves, request that the argument child tree be
+                    // computed in the register in which the argument is passed to the call.
+                    putArgChild->gtLsraInfo.setSrcCandidates(l, targetMask);
+
+                    // We consume one source for each item in this list
+                    info->srcCount++;
+                    iterationNum++;
+
+                    // Update targetReg and targetMask for the next putarg_reg (if any)
+                    targetReg  = genRegArgNext(targetReg);
+                    targetMask = genRegMask(targetReg);
+                }
+            }
+            else
+            {
+#ifdef DEBUG
+                compiler->gtDispTreeRange(BlockRange(), argNode);
+#endif
+                noway_assert(!"Unsupported TYP_STRUCT arg kind");
+            }
+
+            unsigned  slots          = ((unsigned)(roundUp(originalSize, REGSIZE_BYTES))) / REGSIZE_BYTES;
+            regNumber curReg         = argReg;
+            regNumber lastReg        = argIsFloat ? REG_ARG_FP_LAST : REG_ARG_LAST;
+            unsigned  remainingSlots = slots;
+
+            while (remainingSlots > 0)
+            {
+                argMask |= genRegMask(curReg);
+                remainingSlots--;
+
+                if (curReg == lastReg)
+                    break;
+
+                curReg = genRegArgNext(curReg);
+            }
+
+            // Struct typed arguments must be fully passed in registers (Reg/Stk split not allowed)
+            noway_assert(remainingSlots == 0);
+            argNode->gtLsraInfo.internalIntCount = 0;
+        }
+        else // A scalar argument (not a struct)
+        {
+            // We consume one source
+            info->srcCount++;
+
+            argMask |= genRegMask(argReg);
+            argNode->gtLsraInfo.setDstCandidates(l, argMask);
+            argNode->gtLsraInfo.setSrcCandidates(l, argMask);
+
+            if (argNode->gtOper == GT_PUTARG_REG)
+            {
+                GenTreePtr putArgChild = argNode->gtOp.gtOp1;
+
+                // To avoid redundant moves, request that the argument child tree be
+                // computed in the register in which the argument is passed to the call.
+                putArgChild->gtLsraInfo.setSrcCandidates(l, argMask);
+            }
+        }
+    }
+
+    // Now, count stack args
+    // Note that these need to be computed into a register, but then
+    // they're just stored to the stack - so the reg doesn't
+    // need to remain live until the call.  In fact, it must not
+    // because the code generator doesn't actually consider it live,
+    // so it can't be spilled.
+
+    GenTreePtr args = call->gtCallArgs;
+    while (args)
+    {
+        GenTreePtr arg = args->gtOp.gtOp1;
+
+        // Skip arguments that have been moved to the Late Arg list
+        if (!(args->gtFlags & GTF_LATE_ARG))
+        {
+            if (arg->gtOper == GT_PUTARG_STK)
+            {
+                fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg);
+                assert(curArgTabEntry);
+
+                assert(curArgTabEntry->regNum == REG_STK);
+
+                TreeNodeInfoInitPutArgStk(arg, curArgTabEntry);
+            }
+            else
+            {
+                TreeNodeInfo* argInfo = &(arg->gtLsraInfo);
+                if (argInfo->dstCount != 0)
+                {
+                    argInfo->isLocalDefUse = true;
+                }
+
+                argInfo->dstCount = 0;
+            }
+        }
+        args = args->gtOp.gtOp2;
+    }
+
+    // If it is a fast tail call, it is already preferenced to use IP0.
+    // Therefore, no need set src candidates on call tgt again.
+    if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr))
+    {
+        // Don't assign the call target to any of the argument registers because
+        // we will use them to also pass floating point arguments as required
+        // by Arm64 ABI.
+        ctrlExpr->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_ARG_REGS));
+    }
+}
+
+//------------------------------------------------------------------------
+//  TreeNodeInfoInitPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node
+//
+// Arguments:
+//    argNode       - a GT_PUTARG_STK node
+//
+// Return Value:
+//    None.
+//
+// Notes:
+//    Set the child node(s) to be contained when we have a multireg arg
+//
+void Lowering::TreeNodeInfoInitPutArgStk(GenTree* argNode, fgArgTabEntryPtr info)
+{
+    assert(argNode->gtOper == GT_PUTARG_STK);
+
+    GenTreePtr putArgChild = argNode->gtOp.gtOp1;
+
+    // Initialize 'argNode' as not contained, as this is both the default case
+    //  and how MakeSrcContained expects to find things setup.
+    //
+    argNode->gtLsraInfo.srcCount = 1;
+    argNode->gtLsraInfo.dstCount = 0;
+
+    // Do we have a TYP_STRUCT argument (or a GT_LIST), if so it must be a multireg pass-by-value struct
+    if ((putArgChild->TypeGet() == TYP_STRUCT) || (putArgChild->OperGet() == GT_LIST))
+    {
+        // We will use store instructions that each write a register sized value
+
+        if (putArgChild->OperGet() == GT_LIST)
+        {
+            // We consume all of the items in the GT_LIST
+            argNode->gtLsraInfo.srcCount = info->numSlots;
+        }
+        else
+        {
+            // We could use a ldp/stp sequence so we need two internal registers
+            argNode->gtLsraInfo.internalIntCount = 2;
+
+            if (putArgChild->OperGet() == GT_OBJ)
+            {
+                GenTreePtr objChild = putArgChild->gtOp.gtOp1;
+                if (objChild->OperGet() == GT_LCL_VAR_ADDR)
+                {
+                    // We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR
+                    // as one contained operation
+                    //
+                    MakeSrcContained(putArgChild, objChild);
+                }
+            }
+
+            // We will generate all of the code for the GT_PUTARG_STK and it's child node
+            // as one contained operation
+            //
+            MakeSrcContained(argNode, putArgChild);
+        }
+    }
+    else
+    {
+        // We must not have a multi-reg struct
+        assert(info->numSlots == 1);
+    }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitBlockStore: Set the NodeInfo for a block store.
+//
+// Arguments:
+//    blkNode       - The block store node of interest
+//
+// Return Value:
+//    None.
+//
+// Notes:
+
+void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
+{
+    GenTree*    dstAddr = blkNode->Addr();
+    unsigned    size;
+    LinearScan* l        = m_lsra;
+    Compiler*   compiler = comp;
+
+    // Sources are dest address and initVal or source.
+    // We may require an additional source or temp register for the size.
+    blkNode->gtLsraInfo.srcCount = 2;
+    blkNode->gtLsraInfo.dstCount = 0;
+
+    if ((blkNode->OperGet() == GT_STORE_OBJ) && (blkNode->AsObj()->gtGcPtrCount == 0))
+    {
+        blkNode->SetOper(GT_STORE_BLK);
+    }
+
+    if (blkNode->OperIsInitBlkOp())
+    {
+        unsigned   size    = blkNode->gtBlkSize;
+        GenTreePtr initVal = blkNode->Data();
+
+#if 0
+        // TODO-ARM64-CQ: Currently we generate a helper call for every
+        // initblk we encounter.  Later on we should implement loop unrolling
+        // code sequences to improve CQ.
+        // For reference see the code in LowerXArch.cpp.
+        if ((size != 0) && (size <= INITBLK_UNROLL_LIMIT) && initVal->IsCnsIntOrI())
+        {
+            // The fill value of an initblk is interpreted to hold a
+            // value of (unsigned int8) however a constant of any size
+            // may practically reside on the evaluation stack. So extract
+            // the lower byte out of the initVal constant and replicate
+            // it to a larger constant whose size is sufficient to support
+            // the largest width store of the desired inline expansion.
+
+            ssize_t fill = initVal->gtIntCon.gtIconVal & 0xFF;
+            if (size < REGSIZE_BYTES)
+            {
+                initVal->gtIntCon.gtIconVal = 0x01010101 * fill;
+            }
+            else
+            {
+                initVal->gtIntCon.gtIconVal = 0x0101010101010101LL * fill;
+                initVal->gtType = TYP_LONG;
+            }
+
+            MakeSrcContained(tree, blockSize);
+
+            // In case we have a buffer >= 16 bytes
+            // we can use SSE2 to do a 128-bit store in a single
+            // instruction.
+            if (size >= XMM_REGSIZE_BYTES)
+            {
+                // Reserve an XMM register to fill it with 
+                // a pack of 16 init value constants.
+                blkNode->gtLsraInfo.internalFloatCount = 1;
+                blkNode->gtLsraInfo.setInternalCandidates(l, l->internalFloatRegCandidates());
+            }
+            initBlkNode->gtBlkOpKind = GenTreeBlkOp::BlkOpKindUnroll;
+            }
+        }
+        else
+#endif // 0
+        {
+            // The helper follows the regular AMD64 ABI.
+            dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0);
+            initVal->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1);
+            blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
+            if (size != 0)
+            {
+                // Reserve a temp register for the block size argument.
+                blkNode->gtLsraInfo.setInternalCandidates(l, RBM_ARG_2);
+                blkNode->gtLsraInfo.internalIntCount = 1;
+            }
+            else
+            {
+                // The block size argument is a third argument to GT_STORE_DYN_BLK
+                noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
+                blkNode->gtLsraInfo.setSrcCount(3);
+                GenTree* sizeNode = blkNode->AsDynBlk()->gtDynamicSize;
+                sizeNode->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2);
+            }
+        }
+    }
+    else
+    {
+        // CopyObj or CopyBlk
+        // Sources are src and dest and size if not constant.
+        unsigned   size              = blkNode->gtBlkSize;
+        GenTreePtr source            = blkNode->Data();
+        GenTree*   srcAddr           = nullptr;
+
+        if (source->gtOper == GT_IND)
+        {
+            srcAddr = blkNode->Data()->gtGetOp1();
+            // We're effectively setting source as contained, but can't call MakeSrcContained, because the
+            // "inheritance" of the srcCount is to a child not a parent - it would "just work" but could be misleading.
+            // If srcAddr is already non-contained, we don't need to change it.
+            if (srcAddr->gtLsraInfo.getDstCount() == 0)
+            {
+                srcAddr->gtLsraInfo.setDstCount(1);
+                srcAddr->gtLsraInfo.setSrcCount(source->gtLsraInfo.srcCount);
+            }
+            m_lsra->clearOperandCounts(source);
+        }
+        else
+        {
+            assert(source->IsLocal());
+            MakeSrcContained(blkNode, source);
+        }
+        if (blkNode->OperGet() == GT_STORE_OBJ)
+        {
+            // CopyObj
+
+            GenTreeObj* objNode = blkNode->AsObj();
+            GenTreePtr  source  = objNode->Data();
+
+            unsigned slots = objNode->gtSlots;
+
+#ifdef DEBUG
+            // CpObj must always have at least one GC-Pointer as a member.
+            assert(objNode->gtGcPtrCount > 0);
+
+            assert(dstAddr->gtType == TYP_BYREF || dstAddr->gtType == TYP_I_IMPL);
+
+            CORINFO_CLASS_HANDLE clsHnd    = objNode->gtClass;
+            size_t               classSize = compiler->info.compCompHnd->getClassSize(clsHnd);
+            size_t               blkSize   = roundUp(classSize, TARGET_POINTER_SIZE);
+
+            // Currently, the EE always round up a class data structure so
+            // we are not handling the case where we have a non multiple of pointer sized
+            // struct. This behavior may change in the future so in order to keeps things correct
+            // let's assert it just to be safe. Going forward we should simply
+            // handle this case.
+            assert(classSize == blkSize);
+            assert((blkSize / TARGET_POINTER_SIZE) == slots);
+            assert(objNode->HasGCPtr());
+#endif
+
+            // We don't need to materialize the struct size but we still need
+            // a temporary register to perform the sequence of loads and stores.
+            blkNode->gtLsraInfo.internalIntCount = 1;
+
+            dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_DST_BYREF);
+            srcAddr->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_SRC_BYREF);
+        }
+        else
+        {
+            // CopyBlk
+            unsigned   size                  = blkNode->gtBlkSize;
+            GenTreePtr dstAddr               = blkNode->Addr();
+            GenTreePtr srcAddr               = blkNode->Data();
+            short      internalIntCount      = 0;
+            regMaskTP  internalIntCandidates = RBM_NONE;
+
+#if 0
+            // In case of a CpBlk with a constant size and less than CPBLK_UNROLL_LIMIT size
+            // we should unroll the loop to improve CQ.
+
+            // TODO-ARM64-CQ: cpblk loop unrolling is currently not implemented.
+
+            if (blockSize->IsCnsIntOrI() && blockSize->gtIntCon.gtIconVal <= CPBLK_UNROLL_LIMIT)
+            {
+                assert(!blockSize->IsIconHandle());
+                ssize_t size = blockSize->gtIntCon.gtIconVal;
+
+                // If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2. 
+                // Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of
+                // our framework assemblies, so this is the main code generation scheme we'll use.
+                if ((size & (XMM_REGSIZE_BYTES - 1)) != 0)
+                {
+                    info->internalIntCount++;
+                    info->addInternalCandidates(l, l->allRegs(TYP_INT));
+                }
+
+                if (size >= XMM_REGSIZE_BYTES)
+                {
+                    // If we have a buffer larger than XMM_REGSIZE_BYTES, 
+                    // reserve an XMM register to use it for a 
+                    // series of 16-byte loads and stores.
+                    blkNode->gtLsraInfo.internalFloatCount = 1;
+                    blkNode->gtLsraInfo.addInternalCandidates(l, l->internalFloatRegCandidates());
+                }
+
+                // If src or dst are on stack, we don't have to generate the address into a register
+                // because it's just some constant+SP
+                if (srcAddr->OperIsLocalAddr())
+                {
+                    MakeSrcContained(blkNode, srcAddr);
+                }
+
+                if (dstAddr->OperIsLocalAddr())
+                {
+                    MakeSrcContained(blkNode, dstAddr);
+                }
+
+                blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
+            }
+            else
+#endif // 0
+            {
+                // In case we have a constant integer this means we went beyond
+                // CPBLK_UNROLL_LIMIT bytes of size, still we should never have the case of
+                // any GC-Pointers in the src struct.
+
+                dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0);
+                // The srcAddr goes in arg1.
+                if (srcAddr != nullptr)
+                {
+                    srcAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1);
+                }
+                else
+                {
+                    // This is a local; we'll use a temp register for its address.
+                    internalIntCandidates |= RBM_ARG_1;
+                    internalIntCount++;
+                }
+                if (size != 0)
+                {
+                    // Reserve a temp register for the block size argument.
+                    internalIntCandidates |= RBM_ARG_2;
+                    internalIntCount++;
+                }
+                else
+                {
+                    // The block size argument is a third argument to GT_STORE_DYN_BLK
+                    noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
+                    blkNode->gtLsraInfo.setSrcCount(3);
+                    GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize;
+                    assert(!blockSize->IsIconHandle());
+                    blockSize->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2);
+                }
+                blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
+            }
+            if (internalIntCount != 0)
+            {
+                blkNode->gtLsraInfo.internalIntCount = internalIntCount;
+                blkNode->gtLsraInfo.setInternalCandidates(l, internalIntCandidates);
+            }
+        }
+    }
+}
+
+#ifdef FEATURE_SIMD
+//------------------------------------------------------------------------
+// TreeNodeInfoInitSIMD: Set the NodeInfo for a GT_SIMD tree.
+//
+// Arguments:
+//    tree       - The GT_SIMD node of interest
+//
+// Return Value:
+//    None.
+
+void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
+{
+    NYI("TreeNodeInfoInitSIMD");
+    GenTreeSIMD*  simdTree = tree->AsSIMD();
+    TreeNodeInfo* info     = &(tree->gtLsraInfo);
+    LinearScan*   lsra     = m_lsra;
+    info->dstCount         = 1;
+    switch (simdTree->gtSIMDIntrinsicID)
+    {
+        case SIMDIntrinsicInit:
+        {
+            // This sets all fields of a SIMD struct to the given value.
+            // Mark op1 as contained if it is either zero or int constant of all 1's.
+            info->srcCount = 1;
+            GenTree* op1   = tree->gtOp.gtOp1;
+            if (op1->IsIntegralConst(0) || (simdTree->gtSIMDBaseType == TYP_INT && op1->IsCnsIntOrI() &&
+                                            op1->AsIntConCommon()->IconValue() == 0xffffffff) ||
+                (simdTree->gtSIMDBaseType == TYP_LONG && op1->IsCnsIntOrI() &&
+                 op1->AsIntConCommon()->IconValue() == 0xffffffffffffffffLL))
+            {
+                MakeSrcContained(tree, tree->gtOp.gtOp1);
+                info->srcCount = 0;
+            }
+        }
+        break;
+
+        case SIMDIntrinsicInitN:
+            info->srcCount = (int)(simdTree->gtSIMDSize / genTypeSize(simdTree->gtSIMDBaseType));
+            // Need an internal register to stitch together all the values into a single vector in an XMM reg.
+            info->internalFloatCount = 1;
+            info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+            break;
+
+        case SIMDIntrinsicInitArray:
+            // We have an array and an index, which may be contained.
+            info->srcCount = 2;
+            CheckImmedAndMakeContained(tree, tree->gtGetOp2());
+            break;
+
+        case SIMDIntrinsicDiv:
+            // SSE2 has no instruction support for division on integer vectors
+            noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType));
+            info->srcCount = 2;
+            break;
+
+        case SIMDIntrinsicAbs:
+            // This gets implemented as bitwise-And operation with a mask
+            // and hence should never see it here.
+            unreached();
+            break;
+
+        case SIMDIntrinsicSqrt:
+            // SSE2 has no instruction support for sqrt on integer vectors.
+            noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType));
+            info->srcCount = 1;
+            break;
+
+        case SIMDIntrinsicAdd:
+        case SIMDIntrinsicSub:
+        case SIMDIntrinsicMul:
+        case SIMDIntrinsicBitwiseAnd:
+        case SIMDIntrinsicBitwiseAndNot:
+        case SIMDIntrinsicBitwiseOr:
+        case SIMDIntrinsicBitwiseXor:
+        case SIMDIntrinsicMin:
+        case SIMDIntrinsicMax:
+            info->srcCount = 2;
+
+            // SSE2 32-bit integer multiplication requires two temp regs
+            if (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicMul && simdTree->gtSIMDBaseType == TYP_INT)
+            {
+                info->internalFloatCount = 2;
+                info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+            }
+            break;
+
+        case SIMDIntrinsicEqual:
+            info->srcCount = 2;
+            break;
+
+        // SSE2 doesn't support < and <= directly on int vectors.
+        // Instead we need to use > and >= with swapped operands.
+        case SIMDIntrinsicLessThan:
+        case SIMDIntrinsicLessThanOrEqual:
+            info->srcCount = 2;
+            noway_assert(!varTypeIsIntegral(simdTree->gtSIMDBaseType));
+            break;
+
+        // SIMDIntrinsicEqual is supported only on non-floating point base type vectors.
+        // SSE2 cmpps/pd doesn't support > and >=  directly on float/double vectors.
+        // Instead we need to use <  and <= with swapped operands.
+        case SIMDIntrinsicGreaterThan:
+            noway_assert(!varTypeIsFloating(simdTree->gtSIMDBaseType));
+            info->srcCount = 2;
+            break;
+
+        case SIMDIntrinsicGreaterThanOrEqual:
+            noway_assert(!varTypeIsFloating(simdTree->gtSIMDBaseType));
+            info->srcCount = 2;
+
+            // a >= b = (a==b) | (a>b)
+            // To hold intermediate result of a==b and a>b we need two distinct
+            // registers.  We can use targetReg and one internal reg provided
+            // they are distinct which is not guaranteed. Therefore, we request
+            // two internal registers so that one of the internal registers has
+            // to be different from targetReg.
+            info->internalFloatCount = 2;
+            info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+            break;
+
+        case SIMDIntrinsicOpEquality:
+        case SIMDIntrinsicOpInEquality:
+            // Need two SIMD registers as scratch.
+            // See genSIMDIntrinsicRelOp() for details on code sequence generate and
+            // the need for two scratch registers.
+            info->srcCount           = 2;
+            info->internalFloatCount = 2;
+            info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+            break;
+
+        case SIMDIntrinsicDotProduct:
+            // Also need an internal register as scratch. Further we need that targetReg and internal reg
+            // are two distinct regs.  It is achieved by requesting two internal registers and one of them
+            // has to be different from targetReg.
+            //
+            // See genSIMDIntrinsicDotProduct() for details on code sequence generated and
+            // the need for scratch registers.
+            info->srcCount           = 2;
+            info->internalFloatCount = 2;
+            info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+            break;
+
+        case SIMDIntrinsicGetItem:
+            // This implements get_Item method. The sources are:
+            //  - the source SIMD struct
+            //  - index (which element to get)
+            // The result is baseType of SIMD struct.
+            info->srcCount = 2;
+
+            op2 = tree->gtGetOp2()
+                  // If the index is a constant, mark it as contained.
+                  if (CheckImmedAndMakeContained(tree, op2))
+            {
+                info->srcCount = 1;
+            }
+
+            // If the index is not a constant, we will use the SIMD temp location to store the vector.
+            // Otherwise, if the baseType is floating point, the targetReg will be a xmm reg and we
+            // can use that in the process of extracting the element.
+            // In all other cases with constant index, we need a temp xmm register to extract the
+            // element if index is other than zero.
+            if (!op2->IsCnsIntOrI())
+            {
+                (void)comp->getSIMDInitTempVarNum();
+            }
+            else if (!varTypeIsFloating(simdTree->gtSIMDBaseType) && !op2->IsIntegralConst(0))
+            {
+                info->internalFloatCount = 1;
+                info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+            }
+            break;
+
+        case SIMDIntrinsicCast:
+            info->srcCount = 1;
+            break;
+
+        // These should have been transformed in terms of other intrinsics
+        case SIMDIntrinsicOpEquality:
+        case SIMDIntrinsicOpInEquality:
+            assert("OpEquality/OpInEquality intrinsics should not be seen during Lowering.");
+            unreached();
+
+        case SIMDIntrinsicGetX:
+        case SIMDIntrinsicGetY:
+        case SIMDIntrinsicGetZ:
+        case SIMDIntrinsicGetW:
+        case SIMDIntrinsicGetOne:
+        case SIMDIntrinsicGetZero:
+        case SIMDIntrinsicGetLength:
+        case SIMDIntrinsicGetAllOnes:
+            assert(!"Get intrinsics should not be seen during Lowering.");
+            unreached();
+
+        default:
+            noway_assert(!"Unimplemented SIMD node type.");
+            unreached();
+    }
+}
+#endif // FEATURE_SIMD
+
+void Lowering::LowerGCWriteBarrier(GenTree* tree)
+{
+    GenTreePtr dst  = tree;
+    GenTreePtr addr = tree->gtOp.gtOp1;
+    GenTreePtr src  = tree->gtOp.gtOp2;
+
+    if (addr->OperGet() == GT_LEA)
+    {
+        // In the case where we are doing a helper assignment, if the dst
+        // is an indir through an lea, we need to actually instantiate the
+        // lea in a register
+        GenTreeAddrMode* lea = addr->AsAddrMode();
+
+        short leaSrcCount = 0;
+        if (lea->Base() != nullptr)
+        {
+            leaSrcCount++;
+        }
+        if (lea->Index() != nullptr)
+        {
+            leaSrcCount++;
+        }
+        lea->gtLsraInfo.srcCount = leaSrcCount;
+        lea->gtLsraInfo.dstCount = 1;
+    }
+
+#if NOGC_WRITE_BARRIERS
+    // For the NOGC JIT Helper calls
+    //
+    // the 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF)
+    // the 'src'  goes into x15 (REG_WRITE_BARRIER)
+    //
+    addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER_DST_BYREF);
+    src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER);
+#else
+    // For the standard JIT Helper calls
+    // op1 goes into REG_ARG_0 and
+    // op2 goes into REG_ARG_1
+    //
+    addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_0);
+    src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_1);
+#endif // NOGC_WRITE_BARRIERS
+
+    // Both src and dst must reside in a register, which they should since we haven't set
+    // either of them as contained.
+    assert(addr->gtLsraInfo.dstCount == 1);
+    assert(src->gtLsraInfo.dstCount == 1);
+}
+
+//-----------------------------------------------------------------------------------------
+// Specify register requirements for address expression of an indirection operation.
+//
+// Arguments:
+//    indirTree    -   GT_IND, GT_STOREIND, block node or GT_NULLCHECK gentree node
+//
+void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree)
+{
+    assert(indirTree->OperIsIndir());
+    // If this is the rhs of a block copy (i.e. non-enregisterable struct),
+    // it has no register requirements.
+    if (indirTree->TypeGet() == TYP_STRUCT)
+    {
+        return;
+    }
+
+    GenTreePtr    addr = indirTree->gtGetOp1();
+    TreeNodeInfo* info = &(indirTree->gtLsraInfo);
+
+    GenTreePtr base  = nullptr;
+    GenTreePtr index = nullptr;
+    unsigned   cns   = 0;
+    unsigned   mul;
+    bool       rev;
+    bool       modifiedSources = false;
+
+    if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirTree, addr))
+    {
+        GenTreeAddrMode* lea = addr->AsAddrMode();
+        base                 = lea->Base();
+        index                = lea->Index();
+        cns                  = lea->gtOffset;
+
+        m_lsra->clearOperandCounts(addr);
+        // The srcCount is decremented because addr is now "contained",
+        // then we account for the base and index below, if they are non-null.
+        info->srcCount--;
+    }
+    else if (comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &mul, &cns, true /*nogen*/) &&
+             !(modifiedSources = AreSourcesPossiblyModifiedLocals(indirTree, base, index)))
+    {
+        // An addressing mode will be constructed that may cause some
+        // nodes to not need a register, and cause others' lifetimes to be extended
+        // to the GT_IND or even its parent if it's an assignment
+
+        assert(base != addr);
+        m_lsra->clearOperandCounts(addr);
+
+        GenTreePtr arrLength = nullptr;
+
+        // Traverse the computation below GT_IND to find the operands
+        // for the addressing mode, marking the various constants and
+        // intermediate results as not consuming/producing.
+        // If the traversal were more complex, we might consider using
+        // a traversal function, but the addressing mode is only made
+        // up of simple arithmetic operators, and the code generator
+        // only traverses one leg of each node.
+
+        bool       foundBase  = (base == nullptr);
+        bool       foundIndex = (index == nullptr);
+        GenTreePtr nextChild  = nullptr;
+        for (GenTreePtr child = addr; child != nullptr && !child->OperIsLeaf(); child = nextChild)
+        {
+            nextChild      = nullptr;
+            GenTreePtr op1 = child->gtOp.gtOp1;
+            GenTreePtr op2 = (child->OperIsBinary()) ? child->gtOp.gtOp2 : nullptr;
+
+            if (op1 == base)
+            {
+                foundBase = true;
+            }
+            else if (op1 == index)
+            {
+                foundIndex = true;
+            }
+            else
+            {
+                m_lsra->clearOperandCounts(op1);
+                if (!op1->OperIsLeaf())
+                {
+                    nextChild = op1;
+                }
+            }
+
+            if (op2 != nullptr)
+            {
+                if (op2 == base)
+                {
+                    foundBase = true;
+                }
+                else if (op2 == index)
+                {
+                    foundIndex = true;
+                }
+                else
+                {
+                    m_lsra->clearOperandCounts(op2);
+                    if (!op2->OperIsLeaf())
+                    {
+                        assert(nextChild == nullptr);
+                        nextChild = op2;
+                    }
+                }
+            }
+        }
+        assert(foundBase && foundIndex);
+        info->srcCount--; // it gets incremented below.
+    }
+    else if (addr->gtOper == GT_ARR_ELEM)
+    {
+        // The GT_ARR_ELEM consumes all the indices and produces the offset.
+        // The array object lives until the mem access.
+        // We also consume the target register to which the address is
+        // computed
+
+        info->srcCount++;
+        assert(addr->gtLsraInfo.srcCount >= 2);
+        addr->gtLsraInfo.srcCount -= 1;
+    }
+    else
+    {
+        // it is nothing but a plain indir
+        info->srcCount--; // base gets added in below
+        base = addr;
+    }
+
+    if (base != nullptr)
+    {
+        info->srcCount++;
+    }
+
+    if (index != nullptr && !modifiedSources)
+    {
+        info->srcCount++;
+    }
+
+    // On ARM64 we may need a single internal register
+    // (when both conditions are true then we still only need a single internal register)
+    if ((index != nullptr) && (cns != 0))
+    {
+        // ARM64 does not support both Index and offset so we need an internal register
+        info->internalIntCount = 1;
+    }
+    else if (!emitter::emitIns_valid_imm_for_ldst_offset(cns, emitTypeSize(indirTree)))
+    {
+        // This offset can't be contained in the ldr/str instruction, so we need an internal register
+        info->internalIntCount = 1;
+    }
+}
+
+void Lowering::LowerCmp(GenTreePtr tree)
+{
+    TreeNodeInfo* info = &(tree->gtLsraInfo);
+
+    info->srcCount = 2;
+    info->dstCount = 1;
+    CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2);
+}
+
+/* Lower GT_CAST(srcType, DstType) nodes.
+ *
+ * Casts from small int type to float/double are transformed as follows:
+ * GT_CAST(byte, float/double)     =   GT_CAST(GT_CAST(byte, int32), float/double)
+ * GT_CAST(sbyte, float/double)    =   GT_CAST(GT_CAST(sbyte, int32), float/double)
+ * GT_CAST(int16, float/double)    =   GT_CAST(GT_CAST(int16, int32), float/double)
+ * GT_CAST(uint16, float/double)   =   GT_CAST(GT_CAST(uint16, int32), float/double)
+ *
+ * SSE2 conversion instructions operate on signed integers. casts from Uint32/Uint64
+ * are morphed as follows by front-end and hence should not be seen here.
+ * GT_CAST(uint32, float/double)   =   GT_CAST(GT_CAST(uint32, long), float/double)
+ * GT_CAST(uint64, float)          =   GT_CAST(GT_CAST(uint64, double), float)
+ *
+ *
+ * Similarly casts from float/double to a smaller int type are transformed as follows:
+ * GT_CAST(float/double, byte)     =   GT_CAST(GT_CAST(float/double, int32), byte)
+ * GT_CAST(float/double, sbyte)    =   GT_CAST(GT_CAST(float/double, int32), sbyte)
+ * GT_CAST(float/double, int16)    =   GT_CAST(GT_CAST(double/double, int32), int16)
+ * GT_CAST(float/double, uint16)   =   GT_CAST(GT_CAST(double/double, int32), uint16)
+ *
+ * SSE2 has instructions to convert a float/double vlaue into a signed 32/64-bit
+ * integer.  The above transformations help us to leverage those instructions.
+ *
+ * Note that for the overflow conversions we still depend on helper calls and
+ * don't expect to see them here.
+ * i) GT_CAST(float/double, int type with overflow detection)
+ *
+ */
+void Lowering::LowerCast(GenTree* tree)
+{
+    assert(tree->OperGet() == GT_CAST);
+
+    GenTreePtr op1     = tree->gtOp.gtOp1;
+    var_types  dstType = tree->CastToType();
+    var_types  srcType = op1->TypeGet();
+    var_types  tmpType = TYP_UNDEF;
+
+    // We should never see the following casts as they are expected to be lowered
+    // apropriately or converted into helper calls by front-end.
+    //   srcType = float/double   dstType = * and overflow detecting cast
+    //       Reason: must be converted to a helper call
+    //
+    if (varTypeIsFloating(srcType))
+    {
+        noway_assert(!tree->gtOverflow());
+    }
+
+    // Case of src is a small type and dst is a floating point type.
+    if (varTypeIsSmall(srcType) && varTypeIsFloating(dstType))
+    {
+        // These conversions can never be overflow detecting ones.
+        noway_assert(!tree->gtOverflow());
+        tmpType = TYP_INT;
+    }
+    // case of src is a floating point type and dst is a small type.
+    else if (varTypeIsFloating(srcType) && varTypeIsSmall(dstType))
+    {
+        tmpType = TYP_INT;
+    }
+
+    if (tmpType != TYP_UNDEF)
+    {
+        GenTreePtr tmp = comp->gtNewCastNode(tmpType, op1, tmpType);
+        tmp->gtFlags |= (tree->gtFlags & (GTF_UNSIGNED | GTF_OVERFLOW | GTF_EXCEPT));
+
+        tree->gtFlags &= ~GTF_UNSIGNED;
+        tree->gtOp.gtOp1 = tmp;
+        BlockRange().InsertAfter(op1, tmp);
+    }
+}
+
+void Lowering::LowerRotate(GenTreePtr tree)
+{
+    if (tree->OperGet() == GT_ROL)
+    {
+        // There is no ROL instruction on ARM. Convert ROL into ROR.
+        GenTreePtr rotatedValue        = tree->gtOp.gtOp1;
+        unsigned   rotatedValueBitSize = genTypeSize(rotatedValue->gtType) * 8;
+        GenTreePtr rotateLeftIndexNode = tree->gtOp.gtOp2;
+
+        if (rotateLeftIndexNode->IsCnsIntOrI())
+        {
+            ssize_t rotateLeftIndex                 = rotateLeftIndexNode->gtIntCon.gtIconVal;
+            ssize_t rotateRightIndex                = rotatedValueBitSize - rotateLeftIndex;
+            rotateLeftIndexNode->gtIntCon.gtIconVal = rotateRightIndex;
+        }
+        else
+        {
+            GenTreePtr tmp =
+                comp->gtNewOperNode(GT_NEG, genActualType(rotateLeftIndexNode->gtType), rotateLeftIndexNode);
+            BlockRange().InsertAfter(rotateLeftIndexNode, tmp);
+            tree->gtOp.gtOp2 = tmp;
+        }
+        tree->ChangeOper(GT_ROR);
+    }
+}
+
+// returns true if the tree can use the read-modify-write memory instruction form
+bool Lowering::isRMWRegOper(GenTreePtr tree)
+{
+    return false;
+}
+
+bool Lowering::IsCallTargetInRange(void* addr)
+{
+    // TODO-ARM64-CQ:  This is a workaround to unblock the JIT from getting calls working.
+    // Currently, we'll be generating calls using blr and manually loading an absolute
+    // call target in a register using a sequence of load immediate instructions.
+    //
+    // As you can expect, this is inefficient and it's not the recommended way as per the
+    // ARM64 ABI Manual but will get us getting things done for now.
+    // The work to get this right would be to implement PC-relative calls, the bl instruction
+    // can only address things -128 + 128MB away, so this will require getting some additional
+    // code to get jump thunks working.
+    return true;
+}
+
+// return true if the immediate can be folded into an instruction, for example small enough and non-relocatable
+bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode)
+{
+    if (varTypeIsFloating(parentNode->TypeGet()))
+    {
+        // We can contain a floating point 0.0 constant in a compare instruction
+        switch (parentNode->OperGet())
+        {
+            default:
+                return false;
+
+            case GT_EQ:
+            case GT_NE:
+            case GT_LT:
+            case GT_LE:
+            case GT_GE:
+            case GT_GT:
+                if (childNode->IsIntegralConst(0))
+                    return true;
+                break;
+        }
+    }
+    else
+    {
+        // Make sure we have an actual immediate
+        if (!childNode->IsCnsIntOrI())
+            return false;
+        if (childNode->IsIconHandle() && comp->opts.compReloc)
+            return false;
+
+        ssize_t  immVal = childNode->gtIntCon.gtIconVal;
+        emitAttr attr   = emitActualTypeSize(childNode->TypeGet());
+        emitAttr size   = EA_SIZE(attr);
+
+        switch (parentNode->OperGet())
+        {
+            default:
+                return false;
+
+            case GT_ADD:
+            case GT_SUB:
+                if (emitter::emitIns_valid_imm_for_add(immVal, size))
+                    return true;
+                break;
+
+            case GT_EQ:
+            case GT_NE:
+            case GT_LT:
+            case GT_LE:
+            case GT_GE:
+            case GT_GT:
+                if (emitter::emitIns_valid_imm_for_cmp(immVal, size))
+                    return true;
+                break;
+
+            case GT_AND:
+            case GT_OR:
+            case GT_XOR:
+                if (emitter::emitIns_valid_imm_for_alu(immVal, size))
+                    return true;
+                break;
+
+            case GT_STORE_LCL_VAR:
+                if (immVal == 0)
+                    return true;
+                break;
+        }
+    }
+
+    return false;
+}
+
+#endif // _TARGET_ARM64_
+
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp
new file mode 100644
index 0000000000..6f98eb6661
--- /dev/null
+++ b/src/jit/lowerxarch.cpp
@@ -0,0 +1,4192 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                           Lowering for AMD64                              XX
+XX                                                                           XX
+XX  This encapsulates all the logic for lowering trees for the AMD64         XX
+XX  architecture.  For a more detailed view of what is lowering, please      XX
+XX  take a look at Lower.cpp                                                 XX
+XX                                                                           XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
+
+#ifdef _TARGET_XARCH_
+
+#include "jit.h"
+#include "sideeffects.h"
+#include "lower.h"
+
+// xarch supports both ROL and ROR instructions so no lowering is required.
+void Lowering::LowerRotate(GenTreePtr tree)
+{
+}
+
+//------------------------------------------------------------------------
+// LowerStoreLoc: Lower a store of a lclVar
+//
+// Arguments:
+//    storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR)
+//
+// Notes:
+//    This involves:
+//    - Setting the appropriate candidates for a store of a multi-reg call return value.
+//    - Requesting an internal register for SIMD12 stores.
+//    - Handling of contained immediates and widening operations of unsigneds.
+
+void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
+{
+    TreeNodeInfo* info = &(storeLoc->gtLsraInfo);
+
+    // Is this the case of var = call where call is returning
+    // a value in multiple return registers?
+    GenTree* op1 = storeLoc->gtGetOp1();
+    if (op1->IsMultiRegCall())
+    {
+        // backend expects to see this case only for store lclvar.
+        assert(storeLoc->OperGet() == GT_STORE_LCL_VAR);
+
+        // srcCount = number of registers in which the value is returned by call
+        GenTreeCall*    call        = op1->AsCall();
+        ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+        info->srcCount              = retTypeDesc->GetReturnRegCount();
+
+        // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1
+        regMaskTP srcCandidates = m_lsra->allMultiRegCallNodeRegs(call);
+        op1->gtLsraInfo.setSrcCandidates(m_lsra, srcCandidates);
+        return;
+    }
+
+#ifdef FEATURE_SIMD
+    if (varTypeIsSIMD(storeLoc))
+    {
+        if (op1->IsCnsIntOrI())
+        {
+            // InitBlk
+            MakeSrcContained(storeLoc, op1);
+        }
+        else if (storeLoc->TypeGet() == TYP_SIMD12)
+        {
+            // Need an additional register to extract upper 4 bytes of Vector3.
+            info->internalFloatCount = 1;
+            info->setInternalCandidates(m_lsra, m_lsra->allSIMDRegs());
+
+            // In this case don't mark the operand as contained as we want it to
+            // be evaluated into an xmm register
+        }
+        return;
+    }
+#endif // FEATURE_SIMD
+
+    // If the source is a containable immediate, make it contained, unless it is
+    // an int-size or larger store of zero to memory, because we can generate smaller code
+    // by zeroing a register and then storing it.
+    if (IsContainableImmed(storeLoc, op1) && (!op1->IsIntegralConst(0) || varTypeIsSmall(storeLoc)))
+    {
+        MakeSrcContained(storeLoc, op1);
+    }
+
+    // Try to widen the ops if they are going into a local var.
+    if ((storeLoc->gtOper == GT_STORE_LCL_VAR) && (storeLoc->gtOp1->gtOper == GT_CNS_INT))
+    {
+        GenTreeIntCon* con  = storeLoc->gtOp1->AsIntCon();
+        ssize_t        ival = con->gtIconVal;
+
+        unsigned   varNum = storeLoc->gtLclNum;
+        LclVarDsc* varDsc = comp->lvaTable + varNum;
+
+        if (varDsc->lvIsSIMDType())
+        {
+            noway_assert(storeLoc->gtType != TYP_STRUCT);
+        }
+        unsigned size = genTypeSize(storeLoc);
+        // If we are storing a constant into a local variable
+        // we extend the size of the store here
+        if ((size < 4) && !varTypeIsStruct(varDsc))
+        {
+            if (!varTypeIsUnsigned(varDsc))
+            {
+                if (genTypeSize(storeLoc) == 1)
+                {
+                    if ((ival & 0x7f) != ival)
+                    {
+                        ival = ival | 0xffffff00;
+                    }
+                }
+                else
+                {
+                    assert(genTypeSize(storeLoc) == 2);
+                    if ((ival & 0x7fff) != ival)
+                    {
+                        ival = ival | 0xffff0000;
+                    }
+                }
+            }
+
+            // A local stack slot is at least 4 bytes in size, regardless of
+            // what the local var is typed as, so auto-promote it here
+            // unless it is a field of a promoted struct
+            // TODO-XArch-CQ: if the field is promoted shouldn't we also be able to do this?
+            if (!varDsc->lvIsStructField)
+            {
+                storeLoc->gtType = TYP_INT;
+                con->SetIconValue(ival);
+            }
+        }
+    }
+}
+
+/**
+ * Takes care of annotating the register requirements
+ * for every TreeNodeInfo struct that maps to each tree node.
+ * Preconditions:
+ *    LSRA Has been initialized and there is a TreeNodeInfo node
+ *    already allocated and initialized for every tree in the IR.
+ * Postconditions:
+ *    Every TreeNodeInfo instance has the right annotations on register
+ *    requirements needed by LSRA to build the Interval Table (source,
+ *    destination and internal [temp] register counts).
+ *    This code is refactored originally from LSRA.
+ */
+void Lowering::TreeNodeInfoInit(GenTree* tree)
+{
+    LinearScan* l        = m_lsra;
+    Compiler*   compiler = comp;
+
+    TreeNodeInfo* info = &(tree->gtLsraInfo);
+
+    switch (tree->OperGet())
+    {
+        GenTree* op1;
+        GenTree* op2;
+
+        default:
+            TreeNodeInfoInitSimple(tree);
+            break;
+
+        case GT_LCL_FLD:
+            info->srcCount = 0;
+            info->dstCount = 1;
+
+#ifdef FEATURE_SIMD
+            // Need an additional register to read upper 4 bytes of Vector3.
+            if (tree->TypeGet() == TYP_SIMD12)
+            {
+                // We need an internal register different from targetReg in which 'tree' produces its result
+                // because both targetReg and internal reg will be in use at the same time. This is achieved
+                // by asking for two internal registers.
+                info->internalFloatCount = 2;
+                info->setInternalCandidates(m_lsra, m_lsra->allSIMDRegs());
+            }
+#endif
+            break;
+
+        case GT_STORE_LCL_FLD:
+        case GT_STORE_LCL_VAR:
+            info->srcCount = 1;
+            info->dstCount = 0;
+            LowerStoreLoc(tree->AsLclVarCommon());
+            break;
+
+        case GT_BOX:
+            noway_assert(!"box should not exist here");
+            // The result of 'op1' is also the final result
+            info->srcCount = 0;
+            info->dstCount = 0;
+            break;
+
+        case GT_PHYSREGDST:
+            info->srcCount = 1;
+            info->dstCount = 0;
+            break;
+
+        case GT_COMMA:
+        {
+            GenTreePtr firstOperand;
+            GenTreePtr secondOperand;
+            if (tree->gtFlags & GTF_REVERSE_OPS)
+            {
+                firstOperand  = tree->gtOp.gtOp2;
+                secondOperand = tree->gtOp.gtOp1;
+            }
+            else
+            {
+                firstOperand  = tree->gtOp.gtOp1;
+                secondOperand = tree->gtOp.gtOp2;
+            }
+            if (firstOperand->TypeGet() != TYP_VOID)
+            {
+                firstOperand->gtLsraInfo.isLocalDefUse = true;
+                firstOperand->gtLsraInfo.dstCount      = 0;
+            }
+            if (tree->TypeGet() == TYP_VOID && secondOperand->TypeGet() != TYP_VOID)
+            {
+                secondOperand->gtLsraInfo.isLocalDefUse = true;
+                secondOperand->gtLsraInfo.dstCount      = 0;
+            }
+        }
+            info->srcCount = 0;
+            info->dstCount = 0;
+            break;
+
+        case GT_LIST:
+        case GT_ARGPLACE:
+        case GT_NO_OP:
+        case GT_START_NONGC:
+        case GT_PROF_HOOK:
+            info->srcCount = 0;
+            info->dstCount = 0;
+            break;
+
+        case GT_CNS_DBL:
+            info->srcCount = 0;
+            info->dstCount = 1;
+            break;
+
+#if !defined(_TARGET_64BIT_)
+
+        case GT_LONG:
+            if ((tree->gtLIRFlags & LIR::Flags::IsUnusedValue) != 0)
+            {
+                // An unused GT_LONG node needs to consume its sources.
+                info->srcCount = 2;
+            }
+            else
+            {
+                // Passthrough
+                info->srcCount = 0;
+            }
+
+            info->dstCount = 0;
+            break;
+
+#endif // !defined(_TARGET_64BIT_)
+
+        case GT_QMARK:
+        case GT_COLON:
+            info->srcCount = 0;
+            info->dstCount = 0;
+            unreached();
+            break;
+
+        case GT_RETURN:
+            TreeNodeInfoInitReturn(tree);
+            break;
+
+        case GT_RETFILT:
+            if (tree->TypeGet() == TYP_VOID)
+            {
+                info->srcCount = 0;
+                info->dstCount = 0;
+            }
+            else
+            {
+                assert(tree->TypeGet() == TYP_INT);
+
+                info->srcCount = 1;
+                info->dstCount = 0;
+
+                info->setSrcCandidates(l, RBM_INTRET);
+                tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, RBM_INTRET);
+            }
+            break;
+
+        // A GT_NOP is either a passthrough (if it is void, or if it has
+        // a child), but must be considered to produce a dummy value if it
+        // has a type but no child
+        case GT_NOP:
+            info->srcCount = 0;
+            if (tree->TypeGet() != TYP_VOID && tree->gtOp.gtOp1 == nullptr)
+            {
+                info->dstCount = 1;
+            }
+            else
+            {
+                info->dstCount = 0;
+            }
+            break;
+
+        case GT_JTRUE:
+            info->srcCount = 0;
+            info->dstCount = 0;
+            l->clearDstCount(tree->gtOp.gtOp1);
+            break;
+
+        case GT_JMP:
+            info->srcCount = 0;
+            info->dstCount = 0;
+            break;
+
+        case GT_SWITCH:
+            // This should never occur since switch nodes must not be visible at this
+            // point in the JIT.
+            info->srcCount = 0;
+            info->dstCount = 0; // To avoid getting uninit errors.
+            noway_assert(!"Switch must be lowered at this point");
+            break;
+
+        case GT_JMPTABLE:
+            info->srcCount = 0;
+            info->dstCount = 1;
+            break;
+
+        case GT_SWITCH_TABLE:
+            info->srcCount         = 2;
+            info->internalIntCount = 1;
+            info->dstCount         = 0;
+            break;
+
+        case GT_ASG:
+        case GT_ASG_ADD:
+        case GT_ASG_SUB:
+            noway_assert(!"We should never hit any assignment operator in lowering");
+            info->srcCount = 0;
+            info->dstCount = 0;
+            break;
+
+#if !defined(_TARGET_64BIT_)
+        case GT_ADD_LO:
+        case GT_ADD_HI:
+        case GT_SUB_LO:
+        case GT_SUB_HI:
+#endif
+        case GT_ADD:
+        case GT_SUB:
+            // SSE2 arithmetic instructions doesn't support the form "op mem, xmm".
+            // Rather they only support "op xmm, mem/xmm" form.
+            if (varTypeIsFloating(tree->TypeGet()))
+            {
+                // overflow operations aren't supported on float/double types.
+                assert(!tree->gtOverflow());
+
+                op1 = tree->gtGetOp1();
+                op2 = tree->gtGetOp2();
+
+                // No implicit conversions at this stage as the expectation is that
+                // everything is made explicit by adding casts.
+                assert(op1->TypeGet() == op2->TypeGet());
+
+                info->srcCount = 2;
+                info->dstCount = 1;
+
+                if (op2->isMemoryOp() || op2->IsCnsNonZeroFltOrDbl())
+                {
+                    MakeSrcContained(tree, op2);
+                }
+                else if (tree->OperIsCommutative() &&
+                         (op1->IsCnsNonZeroFltOrDbl() || (op1->isMemoryOp() && IsSafeToContainMem(tree, op1))))
+                {
+                    // Though we have GT_ADD(op1=memOp, op2=non-memOp, we try to reorder the operands
+                    // as long as it is safe so that the following efficient code sequence is generated:
+                    //      addss/sd targetReg, memOp    (if op1Reg == targetReg) OR
+                    //      movaps targetReg, op2Reg; addss/sd targetReg, [memOp]
+                    //
+                    // Instead of
+                    //      movss op1Reg, [memOp]; addss/sd targetReg, Op2Reg  (if op1Reg == targetReg) OR
+                    //      movss op1Reg, [memOp]; movaps targetReg, op1Reg, addss/sd targetReg, Op2Reg
+                    MakeSrcContained(tree, op1);
+                }
+                else
+                {
+                    // If there are no containable operands, we can make an operand reg optional.
+                    SetRegOptionalForBinOp(tree);
+                }
+                break;
+            }
+
+            __fallthrough;
+
+        case GT_AND:
+        case GT_OR:
+        case GT_XOR:
+            TreeNodeInfoInitLogicalOp(tree);
+            break;
+
+        case GT_RETURNTRAP:
+            // this just turns into a compare of its child with an int
+            // + a conditional call
+            info->srcCount = 1;
+            info->dstCount = 0;
+            if (tree->gtOp.gtOp1->isIndir())
+            {
+                MakeSrcContained(tree, tree->gtOp.gtOp1);
+            }
+            info->internalIntCount = 1;
+            info->setInternalCandidates(l, l->allRegs(TYP_INT));
+            break;
+
+        case GT_MOD:
+        case GT_DIV:
+        case GT_UMOD:
+        case GT_UDIV:
+            TreeNodeInfoInitModDiv(tree);
+            break;
+
+        case GT_MUL:
+        case GT_MULHI:
+            SetMulOpCounts(tree);
+            break;
+
+        case GT_INTRINSIC:
+            TreeNodeInfoInitIntrinsic(tree);
+            break;
+
+#ifdef FEATURE_SIMD
+        case GT_SIMD:
+            TreeNodeInfoInitSIMD(tree);
+            break;
+#endif // FEATURE_SIMD
+
+        case GT_CAST:
+            TreeNodeInfoInitCast(tree);
+            break;
+
+        case GT_NEG:
+            info->srcCount = 1;
+            info->dstCount = 1;
+
+            // TODO-XArch-CQ:
+            // SSE instruction set doesn't have an instruction to negate a number.
+            // The recommended way is to xor the float/double number with a bitmask.
+            // The only way to xor is using xorps or xorpd both of which operate on
+            // 128-bit operands.  To hold the bit-mask we would need another xmm
+            // register or a 16-byte aligned 128-bit data constant. Right now emitter
+            // lacks the support for emitting such constants or instruction with mem
+            // addressing mode referring to a 128-bit operand. For now we use an
+            // internal xmm register to load 32/64-bit bitmask from data section.
+            // Note that by trading additional data section memory (128-bit) we can
+            // save on the need for an internal register and also a memory-to-reg
+            // move.
+            //
+            // Note: another option to avoid internal register requirement is by
+            // lowering as GT_SUB(0, src).  This will generate code different from
+            // Jit64 and could possibly result in compat issues (?).
+            if (varTypeIsFloating(tree))
+            {
+                info->internalFloatCount = 1;
+                info->setInternalCandidates(l, l->internalFloatRegCandidates());
+            }
+            break;
+
+        case GT_NOT:
+            info->srcCount = 1;
+            info->dstCount = 1;
+            break;
+
+        case GT_LSH:
+        case GT_RSH:
+        case GT_RSZ:
+        case GT_ROL:
+        case GT_ROR:
+            TreeNodeInfoInitShiftRotate(tree);
+            break;
+
+        case GT_EQ:
+        case GT_NE:
+        case GT_LT:
+        case GT_LE:
+        case GT_GE:
+        case GT_GT:
+            LowerCmp(tree);
+            break;
+
+        case GT_CKFINITE:
+            info->srcCount         = 1;
+            info->dstCount         = 1;
+            info->internalIntCount = 1;
+            break;
+
+        case GT_CMPXCHG:
+            info->srcCount = 3;
+            info->dstCount = 1;
+
+            // comparand is preferenced to RAX.
+            // Remaining two operands can be in any reg other than RAX.
+            tree->gtCmpXchg.gtOpComparand->gtLsraInfo.setSrcCandidates(l, RBM_RAX);
+            tree->gtCmpXchg.gtOpLocation->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~RBM_RAX);
+            tree->gtCmpXchg.gtOpValue->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~RBM_RAX);
+            tree->gtLsraInfo.setDstCandidates(l, RBM_RAX);
+            break;
+
+        case GT_LOCKADD:
+            info->srcCount = 2;
+            info->dstCount = 0;
+
+            CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2);
+            break;
+
+        case GT_CALL:
+            TreeNodeInfoInitCall(tree->AsCall());
+            break;
+
+        case GT_ADDR:
+        {
+            // For a GT_ADDR, the child node should not be evaluated into a register
+            GenTreePtr child = tree->gtOp.gtOp1;
+            assert(!l->isCandidateLocalRef(child));
+            l->clearDstCount(child);
+            info->srcCount = 0;
+            info->dstCount = 1;
+        }
+        break;
+
+#ifdef _TARGET_X86_
+        case GT_OBJ:
+            NYI_X86("GT_OBJ");
+#elif !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+        case GT_OBJ:
+#endif
+        case GT_BLK:
+        case GT_DYN_BLK:
+            // These should all be eliminated prior to Lowering.
+            assert(!"Non-store block node in Lowering");
+            info->srcCount = 0;
+            info->dstCount = 0;
+            break;
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+        case GT_PUTARG_STK:
+            TreeNodeInfoInitPutArgStk(tree);
+            break;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+        case GT_STORE_BLK:
+        case GT_STORE_OBJ:
+        case GT_STORE_DYN_BLK:
+            TreeNodeInfoInitBlockStore(tree->AsBlk());
+            break;
+
+        case GT_LCLHEAP:
+            TreeNodeInfoInitLclHeap(tree);
+            break;
+
+        case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+        case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+        {
+            GenTreeBoundsChk* node = tree->AsBoundsChk();
+            // Consumes arrLen & index - has no result
+            info->srcCount = 2;
+            info->dstCount = 0;
+
+            GenTreePtr other;
+            if (CheckImmedAndMakeContained(tree, node->gtIndex))
+            {
+                other = node->gtArrLen;
+            }
+            else if (CheckImmedAndMakeContained(tree, node->gtArrLen))
+            {
+                other = node->gtIndex;
+            }
+            else if (node->gtIndex->isMemoryOp())
+            {
+                other = node->gtIndex;
+            }
+            else
+            {
+                other = node->gtArrLen;
+            }
+
+            if (node->gtIndex->TypeGet() == node->gtArrLen->TypeGet())
+            {
+                if (other->isMemoryOp())
+                {
+                    MakeSrcContained(tree, other);
+                }
+                else
+                {
+                    // We can mark 'other' as reg optional, since it is not contained.
+                    SetRegOptional(other);
+                }
+            }
+        }
+        break;
+
+        case GT_ARR_ELEM:
+            // These must have been lowered to GT_ARR_INDEX
+            noway_assert(!"We should never see a GT_ARR_ELEM in lowering");
+            info->srcCount = 0;
+            info->dstCount = 0;
+            break;
+
+        case GT_ARR_INDEX:
+            info->srcCount = 2;
+            info->dstCount = 1;
+            // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple
+            // times while the result is being computed.
+            tree->AsArrIndex()->ArrObj()->gtLsraInfo.isDelayFree = true;
+            info->hasDelayFreeSrc                                = true;
+            break;
+
+        case GT_ARR_OFFSET:
+            // This consumes the offset, if any, the arrObj and the effective index,
+            // and produces the flattened offset for this dimension.
+            info->srcCount         = 3;
+            info->dstCount         = 1;
+            info->internalIntCount = 1;
+            // we don't want to generate code for this
+            if (tree->gtArrOffs.gtOffset->IsIntegralConst(0))
+            {
+                MakeSrcContained(tree, tree->gtArrOffs.gtOffset);
+            }
+            break;
+
+        case GT_LEA:
+            // The LEA usually passes its operands through to the GT_IND, in which case we'll
+            // clear the info->srcCount and info->dstCount later, but we may be instantiating an address,
+            // so we set them here.
+            info->srcCount = 0;
+            if (tree->AsAddrMode()->HasBase())
+            {
+                info->srcCount++;
+            }
+            if (tree->AsAddrMode()->HasIndex())
+            {
+                info->srcCount++;
+            }
+            info->dstCount = 1;
+            break;
+
+        case GT_STOREIND:
+        {
+            info->srcCount = 2;
+            info->dstCount = 0;
+            GenTree* src   = tree->gtOp.gtOp2;
+
+            if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree))
+            {
+                LowerGCWriteBarrier(tree);
+                break;
+            }
+
+            // If the source is a containable immediate, make it contained, unless it is
+            // an int-size or larger store of zero to memory, because we can generate smaller code
+            // by zeroing a register and then storing it.
+            if (IsContainableImmed(tree, src) &&
+                (!src->IsIntegralConst(0) || varTypeIsSmall(tree) || tree->gtGetOp1()->OperGet() == GT_CLS_VAR_ADDR))
+            {
+                MakeSrcContained(tree, src);
+            }
+            else if (!varTypeIsFloating(tree))
+            {
+                // Perform recognition of trees with the following structure:
+                //        StoreInd(addr, BinOp(expr, GT_IND(addr)))
+                // to be able to fold this into an instruction of the form
+                //        BINOP [addr], register
+                // where register is the actual place where 'expr' is computed.
+                //
+                // SSE2 doesn't support RMW form of instructions.
+                if (SetStoreIndOpCountsIfRMWMemOp(tree))
+                {
+                    break;
+                }
+            }
+
+            SetIndirAddrOpCounts(tree);
+        }
+        break;
+
+        case GT_NULLCHECK:
+            info->dstCount      = 0;
+            info->srcCount      = 1;
+            info->isLocalDefUse = true;
+            break;
+
+        case GT_IND:
+            info->dstCount = 1;
+            info->srcCount = 1;
+            SetIndirAddrOpCounts(tree);
+            break;
+
+        case GT_CATCH_ARG:
+            info->srcCount = 0;
+            info->dstCount = 1;
+            info->setDstCandidates(l, RBM_EXCEPTION_OBJECT);
+            break;
+
+#if !FEATURE_EH_FUNCLETS
+        case GT_END_LFIN:
+            info->srcCount = 0;
+            info->dstCount = 0;
+            break;
+#endif
+
+        case GT_CLS_VAR:
+            info->srcCount = 0;
+            // GT_CLS_VAR, by the time we reach the backend, must always
+            // be a pure use.
+            // It will produce a result of the type of the
+            // node, and use an internal register for the address.
+
+            info->dstCount = 1;
+            assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG | GTF_VAR_USEDEF)) == 0);
+            info->internalIntCount = 1;
+            break;
+    } // end switch (tree->OperGet())
+
+    // If op2 of a binary-op gets marked as contained, then binary-op srcCount will be 1.
+    // Even then we would like to set isTgtPref on Op1.
+    if (tree->OperIsBinary() && info->srcCount >= 1)
+    {
+        if (isRMWRegOper(tree))
+        {
+            GenTree* op1 = tree->gtOp.gtOp1;
+            GenTree* op2 = tree->gtOp.gtOp2;
+
+            // Commutative opers like add/mul/and/or/xor could reverse the order of
+            // operands if it is safe to do so.  In such a case we would like op2 to be
+            // target preferenced instead of op1.
+            if (tree->OperIsCommutative() && op1->gtLsraInfo.dstCount == 0 && op2 != nullptr)
+            {
+                op1 = op2;
+                op2 = tree->gtOp.gtOp1;
+            }
+
+            // If we have a read-modify-write operation, we want to preference op1 to the target.
+            // If op1 is contained, we don't want to preference it, but it won't
+            // show up as a source in that case, so it will be ignored.
+            op1->gtLsraInfo.isTgtPref = true;
+
+            // Is this a non-commutative operator, or is op2 a contained memory op?
+            // (Note that we can't call IsContained() at this point because it uses exactly the
+            // same information we're currently computing.)
+            // In either case, we need to make op2 remain live until the op is complete, by marking
+            // the source(s) associated with op2 as "delayFree".
+            // Note that if op2 of a binary RMW operator is a memory op, even if the operator
+            // is commutative, codegen cannot reverse them.
+            // TODO-XArch-CQ: This is not actually the case for all RMW binary operators, but there's
+            // more work to be done to correctly reverse the operands if they involve memory
+            // operands.  Also, we may need to handle more cases than GT_IND, especially once
+            // we've modified the register allocator to not require all nodes to be assigned
+            // a register (e.g. a spilled lclVar can often be referenced directly from memory).
+            // Note that we may have a null op2, even with 2 sources, if op1 is a base/index memory op.
+
+            GenTree* delayUseSrc = nullptr;
+            // TODO-XArch-Cleanup: We should make the indirection explicit on these nodes so that we don't have
+            // to special case them.
+            if (tree->OperGet() == GT_XADD || tree->OperGet() == GT_XCHG || tree->OperGet() == GT_LOCKADD)
+            {
+                delayUseSrc = op1;
+            }
+            else if ((op2 != nullptr) &&
+                     (!tree->OperIsCommutative() || (op2->isMemoryOp() && (op2->gtLsraInfo.srcCount == 0))))
+            {
+                delayUseSrc = op2;
+            }
+            if (delayUseSrc != nullptr)
+            {
+                // If delayUseSrc is an indirection and it doesn't produce a result, then we need to set "delayFree'
+                // on the base & index, if any.
+                // Otherwise, we set it on delayUseSrc itself.
+                if (delayUseSrc->isIndir() && (delayUseSrc->gtLsraInfo.dstCount == 0))
+                {
+                    GenTree* base  = delayUseSrc->AsIndir()->Base();
+                    GenTree* index = delayUseSrc->AsIndir()->Index();
+                    if (base != nullptr)
+                    {
+                        base->gtLsraInfo.isDelayFree = true;
+                    }
+                    if (index != nullptr)
+                    {
+                        index->gtLsraInfo.isDelayFree = true;
+                    }
+                }
+                else
+                {
+                    delayUseSrc->gtLsraInfo.isDelayFree = true;
+                }
+                info->hasDelayFreeSrc = true;
+            }
+        }
+    }
+
+#ifdef _TARGET_X86_
+    // Exclude RBM_NON_BYTE_REGS from dst candidates of tree node and src candidates of operands
+    // if the tree node is a byte type.
+    //
+    // Example1: GT_STOREIND(byte, addr, op2) - storeind of byte sized value from op2 into mem 'addr'
+    // Storeind itself will not produce any value and hence dstCount=0. But op2 could be TYP_INT
+    // value. In this case we need to exclude esi/edi from the src candidates of op2.
+    //
+    // Example2: GT_CAST(int <- bool <- int) - here type of GT_CAST node is int and castToType is bool.
+    //
+    // Example3: GT_EQ(int, op1 of type ubyte, op2 of type ubyte) - in this case codegen uses
+    // ubyte as the result of comparison and if the result needs to be materialized into a reg
+    // simply zero extend it to TYP_INT size.  Here is an example of generated code:
+    //         cmp dl, byte ptr[addr mode]
+    //         movzx edx, dl
+    //
+    // Though this looks conservative in theory, in practice we could not think of a case where
+    // the below logic leads to conservative register specification.  In future when or if we find
+    // one such case, this logic needs to be fine tuned for that case(s).
+    if (varTypeIsByte(tree) || ((tree->OperGet() == GT_CAST) && varTypeIsByte(tree->CastToType())) ||
+        (tree->OperIsCompare() && varTypeIsByte(tree->gtGetOp1()) && varTypeIsByte(tree->gtGetOp2())))
+    {
+        regMaskTP regMask;
+        if (info->dstCount > 0)
+        {
+            regMask = info->getDstCandidates(l);
+            assert(regMask != RBM_NONE);
+            info->setDstCandidates(l, regMask & ~RBM_NON_BYTE_REGS);
+        }
+
+        if (tree->OperIsSimple() && (info->srcCount > 0))
+        {
+            // No need to set src candidates on a contained child operand.
+            GenTree* op = tree->gtOp.gtOp1;
+            assert(op != nullptr);
+            bool containedNode = (op->gtLsraInfo.srcCount == 0) && (op->gtLsraInfo.dstCount == 0);
+            if (!containedNode)
+            {
+                regMask = op->gtLsraInfo.getSrcCandidates(l);
+                assert(regMask != RBM_NONE);
+                op->gtLsraInfo.setSrcCandidates(l, regMask & ~RBM_NON_BYTE_REGS);
+            }
+
+            if (tree->OperIsBinary() && (tree->gtOp.gtOp2 != nullptr))
+            {
+                op            = tree->gtOp.gtOp2;
+                containedNode = (op->gtLsraInfo.srcCount == 0) && (op->gtLsraInfo.dstCount == 0);
+                if (!containedNode)
+                {
+                    regMask = op->gtLsraInfo.getSrcCandidates(l);
+                    assert(regMask != RBM_NONE);
+                    op->gtLsraInfo.setSrcCandidates(l, regMask & ~RBM_NON_BYTE_REGS);
+                }
+            }
+        }
+    }
+#endif //_TARGET_X86_
+
+    // We need to be sure that we've set info->srcCount and info->dstCount appropriately
+    assert((info->dstCount < 2) || (tree->IsMultiRegCall() && info->dstCount == MAX_RET_REG_COUNT));
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitSimple: Sets the srcCount and dstCount for all the trees
+// without special handling based on the tree node type.
+//
+// Arguments:
+//    tree      - The node of interest
+//
+// Return Value:
+//    None.
+//
+void Lowering::TreeNodeInfoInitSimple(GenTree* tree)
+{
+    TreeNodeInfo* info = &(tree->gtLsraInfo);
+    unsigned      kind = tree->OperKind();
+    info->dstCount     = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
+    if (kind & (GTK_CONST | GTK_LEAF))
+    {
+        info->srcCount = 0;
+    }
+    else if (kind & (GTK_SMPOP))
+    {
+        if (tree->gtGetOp2() != nullptr)
+        {
+            info->srcCount = 2;
+        }
+        else
+        {
+            info->srcCount = 1;
+        }
+    }
+    else
+    {
+        unreached();
+    }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitReturn: Set the NodeInfo for a GT_RETURN.
+//
+// Arguments:
+//    tree      - The node of interest
+//
+// Return Value:
+//    None.
+//
+void Lowering::TreeNodeInfoInitReturn(GenTree* tree)
+{
+    TreeNodeInfo* info     = &(tree->gtLsraInfo);
+    LinearScan*   l        = m_lsra;
+    Compiler*     compiler = comp;
+
+#if !defined(_TARGET_64BIT_)
+    if (tree->TypeGet() == TYP_LONG)
+    {
+        GenTree* op1 = tree->gtGetOp1();
+        noway_assert(op1->OperGet() == GT_LONG);
+        GenTree* loVal = op1->gtGetOp1();
+        GenTree* hiVal = op1->gtGetOp2();
+        info->srcCount = 2;
+        loVal->gtLsraInfo.setSrcCandidates(l, RBM_LNGRET_LO);
+        hiVal->gtLsraInfo.setSrcCandidates(l, RBM_LNGRET_HI);
+        info->dstCount = 0;
+    }
+    else
+#endif // !defined(_TARGET_64BIT_)
+    {
+        GenTree*  op1           = tree->gtGetOp1();
+        regMaskTP useCandidates = RBM_NONE;
+
+        info->srcCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
+        info->dstCount = 0;
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+        if (varTypeIsStruct(tree))
+        {
+            // op1 has to be either an lclvar or a multi-reg returning call
+            if (op1->OperGet() == GT_LCL_VAR)
+            {
+                GenTreeLclVarCommon* lclVarCommon = op1->AsLclVarCommon();
+                LclVarDsc*           varDsc       = &(compiler->lvaTable[lclVarCommon->gtLclNum]);
+                assert(varDsc->lvIsMultiRegRet);
+
+                // Mark var as contained if not enregistrable.
+                if (!varTypeIsEnregisterableStruct(op1))
+                {
+                    MakeSrcContained(tree, op1);
+                }
+            }
+            else
+            {
+                noway_assert(op1->IsMultiRegCall());
+
+                ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc();
+                info->srcCount              = retTypeDesc->GetReturnRegCount();
+                useCandidates               = retTypeDesc->GetABIReturnRegs();
+            }
+        }
+        else
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+        {
+            // Non-struct type return - determine useCandidates
+            switch (tree->TypeGet())
+            {
+                case TYP_VOID:
+                    useCandidates = RBM_NONE;
+                    break;
+                case TYP_FLOAT:
+                    useCandidates = RBM_FLOATRET;
+                    break;
+                case TYP_DOUBLE:
+                    useCandidates = RBM_DOUBLERET;
+                    break;
+#if defined(_TARGET_64BIT_)
+                case TYP_LONG:
+                    useCandidates = RBM_LNGRET;
+                    break;
+#endif // defined(_TARGET_64BIT_)
+                default:
+                    useCandidates = RBM_INTRET;
+                    break;
+            }
+        }
+
+        if (useCandidates != RBM_NONE)
+        {
+            op1->gtLsraInfo.setSrcCandidates(l, useCandidates);
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitShiftRotate: Set the NodeInfo for a shift or rotate.
+//
+// Arguments:
+//    tree      - The node of interest
+//
+// Return Value:
+//    None.
+//
+void Lowering::TreeNodeInfoInitShiftRotate(GenTree* tree)
+{
+    TreeNodeInfo* info = &(tree->gtLsraInfo);
+    LinearScan*   l    = m_lsra;
+
+    info->srcCount = 2;
+    info->dstCount = 1;
+
+    // For shift operations, we need that the number
+    // of bits moved gets stored in CL in case
+    // the number of bits to shift is not a constant.
+    GenTreePtr shiftBy = tree->gtOp.gtOp2;
+    GenTreePtr source  = tree->gtOp.gtOp1;
+
+    // x64 can encode 8 bits of shift and it will use 5 or 6. (the others are masked off)
+    // We will allow whatever can be encoded - hope you know what you are doing.
+    if (!IsContainableImmed(tree, shiftBy) || (shiftBy->gtIntConCommon.IconValue() > 255) ||
+        (shiftBy->gtIntConCommon.IconValue() < 0))
+    {
+        source->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~RBM_RCX);
+        shiftBy->gtLsraInfo.setSrcCandidates(l, RBM_RCX);
+        info->setDstCandidates(l, l->allRegs(TYP_INT) & ~RBM_RCX);
+    }
+    else
+    {
+        MakeSrcContained(tree, shiftBy);
+    }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitCall: Set the NodeInfo for a call.
+//
+// Arguments:
+//    call      - The call node of interest
+//
+// Return Value:
+//    None.
+//
+void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
+{
+    TreeNodeInfo*   info              = &(call->gtLsraInfo);
+    LinearScan*     l                 = m_lsra;
+    Compiler*       compiler          = comp;
+    bool            hasMultiRegRetVal = false;
+    ReturnTypeDesc* retTypeDesc       = nullptr;
+
+    info->srcCount = 0;
+    if (call->TypeGet() != TYP_VOID)
+    {
+        hasMultiRegRetVal = call->HasMultiRegRetVal();
+        if (hasMultiRegRetVal)
+        {
+            // dst count = number of registers in which the value is returned by call
+            retTypeDesc    = call->GetReturnTypeDesc();
+            info->dstCount = retTypeDesc->GetReturnRegCount();
+        }
+        else
+        {
+            info->dstCount = 1;
+        }
+    }
+    else
+    {
+        info->dstCount = 0;
+    }
+
+    GenTree* ctrlExpr = call->gtControlExpr;
+    if (call->gtCallType == CT_INDIRECT)
+    {
+        // either gtControlExpr != null or gtCallAddr != null.
+        // Both cannot be non-null at the same time.
+        assert(ctrlExpr == nullptr);
+        assert(call->gtCallAddr != nullptr);
+        ctrlExpr = call->gtCallAddr;
+    }
+
+    // set reg requirements on call target represented as control sequence.
+    if (ctrlExpr != nullptr)
+    {
+        // we should never see a gtControlExpr whose type is void.
+        assert(ctrlExpr->TypeGet() != TYP_VOID);
+
+        // call can take a Rm op on x64
+        info->srcCount++;
+
+        // In case of fast tail implemented as jmp, make sure that gtControlExpr is
+        // computed into a register.
+        if (!call->IsFastTailCall())
+        {
+            if (ctrlExpr->isIndir())
+            {
+                MakeSrcContained(call, ctrlExpr);
+            }
+        }
+        else
+        {
+            // Fast tail call - make sure that call target is always computed in RAX
+            // so that epilog sequence can generate "jmp rax" to achieve fast tail call.
+            ctrlExpr->gtLsraInfo.setSrcCandidates(l, RBM_RAX);
+        }
+    }
+
+    // If this is a varargs call, we will clear the internal candidates in case we need
+    // to reserve some integer registers for copying float args.
+    // We have to do this because otherwise the default candidates are allRegs, and adding
+    // the individual specific registers will have no effect.
+    if (call->IsVarargs())
+    {
+        info->setInternalCandidates(l, RBM_NONE);
+    }
+
+    RegisterType registerType = call->TypeGet();
+
+    // Set destination candidates for return value of the call.
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_X86_
+    if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME))
+    {
+        // The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with
+        // TCB in REG_PINVOKE_TCB. AMD64/ARM64 use the standard calling convention. fgMorphCall() sets the
+        // correct argument registers.
+        info->setDstCandidates(l, RBM_PINVOKE_TCB);
+    }
+    else
+#endif // _TARGET_X86_
+        if (hasMultiRegRetVal)
+    {
+        assert(retTypeDesc != nullptr);
+        info->setDstCandidates(l, retTypeDesc->GetABIReturnRegs());
+    }
+    else if (varTypeIsFloating(registerType))
+    {
+#ifdef _TARGET_X86_
+        // The return value will be on the X87 stack, and we will need to move it.
+        info->setDstCandidates(l, l->allRegs(registerType));
+#else  // !_TARGET_X86_
+        info->setDstCandidates(l, RBM_FLOATRET);
+#endif // !_TARGET_X86_
+    }
+    else if (registerType == TYP_LONG)
+    {
+        info->setDstCandidates(l, RBM_LNGRET);
+    }
+    else
+    {
+        info->setDstCandidates(l, RBM_INTRET);
+    }
+
+    // number of args to a call =
+    // callRegArgs + (callargs - placeholders, setup, etc)
+    // there is an explicit thisPtr but it is redundant
+
+    // If there is an explicit this pointer, we don't want that node to produce anything
+    // as it is redundant
+    if (call->gtCallObjp != nullptr)
+    {
+        GenTreePtr thisPtrNode = call->gtCallObjp;
+
+        if (thisPtrNode->gtOper == GT_PUTARG_REG)
+        {
+            l->clearOperandCounts(thisPtrNode);
+            l->clearDstCount(thisPtrNode->gtOp.gtOp1);
+        }
+        else
+        {
+            l->clearDstCount(thisPtrNode);
+        }
+    }
+
+#if FEATURE_VARARG
+    bool callHasFloatRegArgs = false;
+#endif // !FEATURE_VARARG
+
+    // First, count reg args
+    for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+    {
+        assert(list->IsList());
+
+        GenTreePtr argNode = list->Current();
+
+        fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
+        assert(curArgTabEntry);
+
+        if (curArgTabEntry->regNum == REG_STK)
+        {
+            // late arg that is not passed in a register
+            DISPNODE(argNode);
+            assert(argNode->gtOper == GT_PUTARG_STK);
+            argNode->gtLsraInfo.srcCount = 1;
+            argNode->gtLsraInfo.dstCount = 0;
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+            // If the node is TYP_STRUCT and it is put on stack with
+            // putarg_stk operation, we consume and produce no registers.
+            // In this case the embedded Obj node should not produce
+            // registers too since it is contained.
+            // Note that if it is a SIMD type the argument will be in a register.
+            if (argNode->TypeGet() == TYP_STRUCT)
+            {
+                assert(argNode->gtOp.gtOp1 != nullptr && argNode->gtOp.gtOp1->OperGet() == GT_OBJ);
+                argNode->gtOp.gtOp1->gtLsraInfo.dstCount = 0;
+                argNode->gtLsraInfo.srcCount             = 0;
+            }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+            continue;
+        }
+
+        regNumber argReg    = REG_NA;
+        regMaskTP argMask   = RBM_NONE;
+        short     regCount  = 0;
+        bool      isOnStack = true;
+        if (curArgTabEntry->regNum != REG_STK)
+        {
+            isOnStack         = false;
+            var_types argType = argNode->TypeGet();
+
+#if FEATURE_VARARG
+            callHasFloatRegArgs |= varTypeIsFloating(argType);
+#endif // !FEATURE_VARARG
+
+            argReg   = curArgTabEntry->regNum;
+            regCount = 1;
+
+            // Default case is that we consume one source; modify this later (e.g. for
+            // promoted structs)
+            info->srcCount++;
+
+            argMask = genRegMask(argReg);
+            argNode = argNode->gtEffectiveVal();
+        }
+
+        // If the struct arg is wrapped in CPYBLK the type of the param will be TYP_VOID.
+        // Use the curArgTabEntry's isStruct to get whether the param is a struct.
+        if (varTypeIsStruct(argNode) FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(|| curArgTabEntry->isStruct))
+        {
+            unsigned   originalSize = 0;
+            LclVarDsc* varDsc       = nullptr;
+            if (argNode->gtOper == GT_LCL_VAR)
+            {
+                varDsc       = compiler->lvaTable + argNode->gtLclVarCommon.gtLclNum;
+                originalSize = varDsc->lvSize();
+            }
+            else if (argNode->gtOper == GT_MKREFANY)
+            {
+                originalSize = 2 * TARGET_POINTER_SIZE;
+            }
+            else if (argNode->gtOper == GT_OBJ)
+            {
+                noway_assert(!"GT_OBJ not supported for amd64");
+            }
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+            else if (argNode->gtOper == GT_PUTARG_REG)
+            {
+                originalSize = genTypeSize(argNode->gtType);
+            }
+            else if (argNode->gtOper == GT_LIST)
+            {
+                originalSize = 0;
+
+                // There could be up to 2 PUTARG_REGs in the list
+                GenTreeArgList* argListPtr   = argNode->AsArgList();
+                unsigned        iterationNum = 0;
+                for (; argListPtr; argListPtr = argListPtr->Rest())
+                {
+                    GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1;
+                    assert(putArgRegNode->gtOper == GT_PUTARG_REG);
+
+                    if (iterationNum == 0)
+                    {
+                        varDsc       = compiler->lvaTable + putArgRegNode->gtOp.gtOp1->gtLclVarCommon.gtLclNum;
+                        originalSize = varDsc->lvSize();
+                        assert(originalSize != 0);
+                    }
+                    else
+                    {
+                        // Need an extra source for every node, but the first in the list.
+                        info->srcCount++;
+
+                        // Get the mask for the second putarg_reg
+                        argMask = genRegMask(curArgTabEntry->otherRegNum);
+                    }
+
+                    putArgRegNode->gtLsraInfo.setDstCandidates(l, argMask);
+                    putArgRegNode->gtLsraInfo.setSrcCandidates(l, argMask);
+
+                    // To avoid redundant moves, have the argument child tree computed in the
+                    // register in which the argument is passed to the call.
+                    putArgRegNode->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, l->getUseCandidates(putArgRegNode));
+                    iterationNum++;
+                }
+
+                assert(iterationNum <= CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS);
+            }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+            else
+            {
+                noway_assert(!"Can't predict unsupported TYP_STRUCT arg kind");
+            }
+
+            unsigned slots          = ((unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE))) / REGSIZE_BYTES;
+            unsigned remainingSlots = slots;
+
+            if (!isOnStack)
+            {
+                remainingSlots = slots - 1;
+
+                regNumber reg = (regNumber)(argReg + 1);
+                while (remainingSlots > 0 && reg <= REG_ARG_LAST)
+                {
+                    argMask |= genRegMask(reg);
+                    reg = (regNumber)(reg + 1);
+                    remainingSlots--;
+                    regCount++;
+                }
+            }
+
+            short internalIntCount = 0;
+            if (remainingSlots > 0)
+            {
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+                // This TYP_STRUCT argument is also passed in the outgoing argument area
+                // We need a register to address the TYP_STRUCT
+                internalIntCount = 1;
+#else  // FEATURE_UNIX_AMD64_STRUCT_PASSING
+                // And we may need 2
+                internalIntCount            = 2;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+            }
+            argNode->gtLsraInfo.internalIntCount = internalIntCount;
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+            if (argNode->gtOper == GT_PUTARG_REG)
+            {
+                argNode->gtLsraInfo.setDstCandidates(l, argMask);
+                argNode->gtLsraInfo.setSrcCandidates(l, argMask);
+            }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+        }
+        else
+        {
+            argNode->gtLsraInfo.setDstCandidates(l, argMask);
+            argNode->gtLsraInfo.setSrcCandidates(l, argMask);
+        }
+
+        // To avoid redundant moves, have the argument child tree computed in the
+        // register in which the argument is passed to the call.
+        if (argNode->gtOper == GT_PUTARG_REG)
+        {
+            argNode->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, l->getUseCandidates(argNode));
+        }
+
+#if FEATURE_VARARG
+        // In the case of a varargs call, the ABI dictates that if we have floating point args,
+        // we must pass the enregistered arguments in both the integer and floating point registers.
+        // Since the integer register is not associated with this arg node, we will reserve it as
+        // an internal register so that it is not used during the evaluation of the call node
+        // (e.g. for the target).
+        if (call->IsVarargs() && varTypeIsFloating(argNode))
+        {
+            regNumber targetReg = compiler->getCallArgIntRegister(argReg);
+            info->setInternalIntCount(info->internalIntCount + 1);
+            info->addInternalCandidates(l, genRegMask(targetReg));
+        }
+#endif // FEATURE_VARARG
+    }
+
+    // Now, count stack args
+    // Note that these need to be computed into a register, but then
+    // they're just stored to the stack - so the reg doesn't
+    // need to remain live until the call.  In fact, it must not
+    // because the code generator doesn't actually consider it live,
+    // so it can't be spilled.
+
+    GenTreePtr args = call->gtCallArgs;
+    while (args)
+    {
+        GenTreePtr arg = args->gtOp.gtOp1;
+        if (!(args->gtFlags & GTF_LATE_ARG))
+        {
+            TreeNodeInfo* argInfo = &(arg->gtLsraInfo);
+#if !defined(_TARGET_64BIT_)
+            if (arg->TypeGet() == TYP_LONG)
+            {
+                assert(arg->OperGet() == GT_LONG);
+                GenTreePtr loArg = arg->gtGetOp1();
+                GenTreePtr hiArg = arg->gtGetOp2();
+                assert((loArg->OperGet() == GT_PUTARG_STK) && (hiArg->OperGet() == GT_PUTARG_STK));
+                assert((loArg->gtLsraInfo.dstCount == 1) && (hiArg->gtLsraInfo.dstCount == 1));
+                loArg->gtLsraInfo.isLocalDefUse = true;
+                hiArg->gtLsraInfo.isLocalDefUse = true;
+            }
+            else
+#endif // !defined(_TARGET_64BIT_)
+            {
+                if (argInfo->dstCount != 0)
+                {
+                    argInfo->isLocalDefUse = true;
+                }
+
+                // If the child of GT_PUTARG_STK is a constant, we don't need a register to
+                // move it to memory (stack location).
+                //
+                // On AMD64, we don't want to make 0 contained, because we can generate smaller code
+                // by zeroing a register and then storing it. E.g.:
+                //      xor rdx, rdx
+                //      mov gword ptr [rsp+28H], rdx
+                // is 2 bytes smaller than:
+                //      mov gword ptr [rsp+28H], 0
+                //
+                // On x86, we push stack arguments; we don't use 'mov'. So:
+                //      push 0
+                // is 1 byte smaller than:
+                //      xor rdx, rdx
+                //      push rdx
+
+                argInfo->dstCount = 0;
+                if (arg->gtOper == GT_PUTARG_STK)
+                {
+                    GenTree* op1 = arg->gtOp.gtOp1;
+                    if (IsContainableImmed(arg, op1)
+#if defined(_TARGET_AMD64_)
+                        && !op1->IsIntegralConst(0)
+#endif // _TARGET_AMD64_
+                            )
+                    {
+                        MakeSrcContained(arg, op1);
+                    }
+                }
+            }
+        }
+        args = args->gtOp.gtOp2;
+    }
+
+#if FEATURE_VARARG
+    // If it is a fast tail call, it is already preferenced to use RAX.
+    // Therefore, no need set src candidates on call tgt again.
+    if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr))
+    {
+        // Don't assign the call target to any of the argument registers because
+        // we will use them to also pass floating point arguments as required
+        // by Amd64 ABI.
+        ctrlExpr->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_ARG_REGS));
+    }
+#endif // !FEATURE_VARARG
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitBlockStore: Set the NodeInfo for a block store.
+//
+// Arguments:
+//    blkNode       - The block store node of interest
+//
+// Return Value:
+//    None.
+//
+void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
+{
+    GenTree*    dstAddr  = blkNode->Addr();
+    unsigned    size     = blkNode->gtBlkSize;
+    GenTree*    source   = blkNode->Data();
+    LinearScan* l        = m_lsra;
+    Compiler*   compiler = comp;
+
+    // Sources are dest address, initVal or source.
+    // We may require an additional source or temp register for the size.
+    blkNode->gtLsraInfo.srcCount = 2;
+    blkNode->gtLsraInfo.dstCount = 0;
+    blkNode->gtLsraInfo.setInternalCandidates(l, RBM_NONE);
+    GenTreePtr srcAddrOrFill = nullptr;
+    bool       isInitBlk     = blkNode->OperIsInitBlkOp();
+
+    regMaskTP dstAddrRegMask = RBM_NONE;
+    regMaskTP sourceRegMask  = RBM_NONE;
+    regMaskTP blkSizeRegMask = RBM_NONE;
+    if (!isInitBlk)
+    {
+        // CopyObj or CopyBlk
+        if ((blkNode->OperGet() == GT_STORE_OBJ) && ((blkNode->AsObj()->gtGcPtrCount == 0) || blkNode->gtBlkOpGcUnsafe))
+        {
+            blkNode->SetOper(GT_STORE_BLK);
+        }
+        if (source->gtOper == GT_IND)
+        {
+            srcAddrOrFill = blkNode->Data()->gtGetOp1();
+            // We're effectively setting source as contained, but can't call MakeSrcContained, because the
+            // "inheritance" of the srcCount is to a child not a parent - it would "just work" but could be misleading.
+            // If srcAddr is already non-contained, we don't need to change it.
+            if (srcAddrOrFill->gtLsraInfo.getDstCount() == 0)
+            {
+                srcAddrOrFill->gtLsraInfo.setDstCount(1);
+                srcAddrOrFill->gtLsraInfo.setSrcCount(source->gtLsraInfo.srcCount);
+            }
+            m_lsra->clearOperandCounts(source);
+        }
+        else if (!source->OperIsSIMD())
+        {
+            assert(source->IsLocal());
+            MakeSrcContained(blkNode, source);
+        }
+    }
+
+    if (isInitBlk)
+    {
+        GenTree* initVal = source;
+        srcAddrOrFill    = source;
+        // If we have an InitBlk with constant block size we can optimize several ways:
+        // a) If the size is smaller than a small memory page but larger than INITBLK_UNROLL_LIMIT bytes
+        //    we use rep stosb since this reduces the register pressure in LSRA and we have
+        //    roughly the same performance as calling the helper.
+        // b) If the size is <= INITBLK_UNROLL_LIMIT bytes and the fill byte is a constant,
+        //    we can speed this up by unrolling the loop using SSE2 stores.  The reason for
+        //    this threshold is because our last investigation (Fall 2013), more than 95% of initblks
+        //    in our framework assemblies are actually <= INITBLK_UNROLL_LIMIT bytes size, so this is the
+        //    preferred code sequence for the vast majority of cases.
+
+        // This threshold will decide from using the helper or let the JIT decide to inline
+        // a code sequence of its choice.
+        unsigned helperThreshold = max(INITBLK_STOS_LIMIT, INITBLK_UNROLL_LIMIT);
+
+        // TODO-X86-CQ: Investigate whether a helper call would be beneficial on x86
+        if (size != 0 && size <= helperThreshold)
+        {
+            // Always favor unrolling vs rep stos.
+            if (size <= INITBLK_UNROLL_LIMIT && initVal->IsCnsIntOrI())
+            {
+                // The fill value of an initblk is interpreted to hold a
+                // value of (unsigned int8) however a constant of any size
+                // may practically reside on the evaluation stack. So extract
+                // the lower byte out of the initVal constant and replicate
+                // it to a larger constant whose size is sufficient to support
+                // the largest width store of the desired inline expansion.
+
+                ssize_t fill = initVal->gtIntCon.gtIconVal & 0xFF;
+#ifdef _TARGET_AMD64_
+                if (size < REGSIZE_BYTES)
+                {
+                    initVal->gtIntCon.gtIconVal = 0x01010101 * fill;
+                }
+                else
+                {
+                    initVal->gtIntCon.gtIconVal = 0x0101010101010101LL * fill;
+                    initVal->gtType             = TYP_LONG;
+                }
+#else  // !_TARGET_AMD64_
+                initVal->gtIntCon.gtIconVal = 0x01010101 * fill;
+#endif // !_TARGET_AMD64_
+
+                // In case we have a buffer >= 16 bytes
+                // we can use SSE2 to do a 128-bit store in a single
+                // instruction.
+                if (size >= XMM_REGSIZE_BYTES)
+                {
+                    // Reserve an XMM register to fill it with
+                    // a pack of 16 init value constants.
+                    blkNode->gtLsraInfo.internalFloatCount = 1;
+                    blkNode->gtLsraInfo.setInternalCandidates(l, l->internalFloatRegCandidates());
+                }
+                blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
+            }
+            else
+            {
+                // rep stos has the following register requirements:
+                // a) The memory address to be in RDI.
+                // b) The fill value has to be in RAX.
+                // c) The buffer size will go in RCX.
+                dstAddrRegMask       = RBM_RDI;
+                srcAddrOrFill        = initVal;
+                sourceRegMask        = RBM_RAX;
+                blkSizeRegMask       = RBM_RCX;
+                blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr;
+            }
+        }
+        else
+        {
+#ifdef _TARGET_AMD64_
+            // The helper follows the regular AMD64 ABI.
+            dstAddrRegMask       = RBM_ARG_0;
+            sourceRegMask        = RBM_ARG_1;
+            blkSizeRegMask       = RBM_ARG_2;
+            blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
+#else  // !_TARGET_AMD64_
+            dstAddrRegMask                  = RBM_RDI;
+            sourceRegMask                   = RBM_RAX;
+            blkSizeRegMask                  = RBM_RCX;
+            blkNode->gtBlkOpKind            = GenTreeBlk::BlkOpKindRepInstr;
+#endif // !_TARGET_AMD64_
+        }
+    }
+    else if (blkNode->gtOper == GT_STORE_OBJ)
+    {
+        // CopyObj
+
+        GenTreeObj* cpObjNode = blkNode->AsObj();
+
+        unsigned slots = cpObjNode->gtSlots;
+
+#ifdef DEBUG
+        // CpObj must always have at least one GC-Pointer as a member.
+        assert(cpObjNode->gtGcPtrCount > 0);
+
+        assert(dstAddr->gtType == TYP_BYREF || dstAddr->gtType == TYP_I_IMPL);
+
+        CORINFO_CLASS_HANDLE clsHnd    = cpObjNode->gtClass;
+        size_t               classSize = comp->info.compCompHnd->getClassSize(clsHnd);
+        size_t               blkSize   = roundUp(classSize, TARGET_POINTER_SIZE);
+
+        // Currently, the EE always round up a class data structure so
+        // we are not handling the case where we have a non multiple of pointer sized
+        // struct. This behavior may change in the future so in order to keeps things correct
+        // let's assert it just to be safe. Going forward we should simply
+        // handle this case.
+        assert(classSize == blkSize);
+        assert((blkSize / TARGET_POINTER_SIZE) == slots);
+        assert(cpObjNode->HasGCPtr());
+#endif
+
+        bool IsRepMovsProfitable = false;
+
+        // If the destination is not on the stack, let's find out if we
+        // can improve code size by using rep movsq instead of generating
+        // sequences of movsq instructions.
+        if (!dstAddr->OperIsLocalAddr())
+        {
+            // Let's inspect the struct/class layout and determine if it's profitable
+            // to use rep movsq for copying non-gc memory instead of using single movsq
+            // instructions for each memory slot.
+            unsigned i      = 0;
+            BYTE*    gcPtrs = cpObjNode->gtGcPtrs;
+
+            do
+            {
+                unsigned nonGCSlots = 0;
+                // Measure a contiguous non-gc area inside the struct and note the maximum.
+                while (i < slots && gcPtrs[i] == TYPE_GC_NONE)
+                {
+                    nonGCSlots++;
+                    i++;
+                }
+
+                while (i < slots && gcPtrs[i] != TYPE_GC_NONE)
+                {
+                    i++;
+                }
+
+                if (nonGCSlots >= CPOBJ_NONGC_SLOTS_LIMIT)
+                {
+                    IsRepMovsProfitable = true;
+                    break;
+                }
+            } while (i < slots);
+        }
+        else if (slots >= CPOBJ_NONGC_SLOTS_LIMIT)
+        {
+            IsRepMovsProfitable = true;
+        }
+
+        // There are two cases in which we need to materialize the
+        // struct size:
+        // a) When the destination is on the stack we don't need to use the
+        //    write barrier, we can just simply call rep movsq and get a win in codesize.
+        // b) If we determine we have contiguous non-gc regions in the struct where it's profitable
+        //    to use rep movsq instead of a sequence of single movsq instructions.  According to the
+        //    Intel Manual, the sweet spot for small structs is between 4 to 12 slots of size where
+        //    the entire operation takes 20 cycles and encodes in 5 bytes (moving RCX, and calling rep movsq).
+        if (IsRepMovsProfitable)
+        {
+            // We need the size of the contiguous Non-GC-region to be in RCX to call rep movsq.
+            blkSizeRegMask       = RBM_RCX;
+            blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr;
+        }
+        else
+        {
+            blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
+        }
+
+        dstAddrRegMask = RBM_RDI;
+
+        // The srcAddr must be in a register.  If it was under a GT_IND, we need to subsume all of its
+        // sources.
+        sourceRegMask = RBM_RSI;
+    }
+    else
+    {
+        assert((blkNode->OperGet() == GT_STORE_BLK) || (blkNode->OperGet() == GT_STORE_DYN_BLK));
+        // CopyBlk
+        // In case of a CpBlk with a constant size and less than CPBLK_MOVS_LIMIT size
+        // we can use rep movs to generate code instead of the helper call.
+
+        // This threshold will decide between using the helper or let the JIT decide to inline
+        // a code sequence of its choice.
+        unsigned helperThreshold = max(CPBLK_MOVS_LIMIT, CPBLK_UNROLL_LIMIT);
+
+        // TODO-X86-CQ: Investigate whether a helper call would be beneficial on x86
+        if ((size != 0) && (size <= helperThreshold))
+        {
+            // If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2.
+            // Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of
+            // our framework assemblies, so this is the main code generation scheme we'll use.
+            if (size <= CPBLK_UNROLL_LIMIT)
+            {
+                // If we have a remainder smaller than XMM_REGSIZE_BYTES, we need an integer temp reg.
+                //
+                // x86 specific note: if the size is odd, the last copy operation would be of size 1 byte.
+                // But on x86 only RBM_BYTE_REGS could be used as byte registers.  Therefore, exclude
+                // RBM_NON_BYTE_REGS from internal candidates.
+                if ((size & (XMM_REGSIZE_BYTES - 1)) != 0)
+                {
+                    blkNode->gtLsraInfo.internalIntCount++;
+                    regMaskTP regMask = l->allRegs(TYP_INT);
+
+#ifdef _TARGET_X86_
+                    if ((size % 2) != 0)
+                    {
+                        regMask &= ~RBM_NON_BYTE_REGS;
+                    }
+#endif
+                    blkNode->gtLsraInfo.setInternalCandidates(l, regMask);
+                }
+
+                if (size >= XMM_REGSIZE_BYTES)
+                {
+                    // If we have a buffer larger than XMM_REGSIZE_BYTES,
+                    // reserve an XMM register to use it for a
+                    // series of 16-byte loads and stores.
+                    blkNode->gtLsraInfo.internalFloatCount = 1;
+                    blkNode->gtLsraInfo.addInternalCandidates(l, l->internalFloatRegCandidates());
+                }
+
+                // If src or dst are on stack, we don't have to generate the address into a register
+                // because it's just some constant+SP
+                if (srcAddrOrFill != nullptr && srcAddrOrFill->OperIsLocalAddr())
+                {
+                    MakeSrcContained(blkNode, srcAddrOrFill);
+                }
+
+                if (dstAddr->OperIsLocalAddr())
+                {
+                    MakeSrcContained(blkNode, dstAddr);
+                }
+
+                blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
+            }
+            else
+            {
+                blkNode->gtLsraInfo.setInternalCandidates(l, RBM_NONE);
+                dstAddrRegMask       = RBM_RDI;
+                sourceRegMask        = RBM_RSI;
+                blkSizeRegMask       = RBM_RCX;
+                blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr;
+            }
+        }
+#ifdef _TARGET_AMD64_
+        else
+        {
+            // In case we have a constant integer this means we went beyond
+            // CPBLK_MOVS_LIMIT bytes of size, still we should never have the case of
+            // any GC-Pointers in the src struct.
+            blkNode->gtLsraInfo.setInternalCandidates(l, RBM_NONE);
+            dstAddrRegMask       = RBM_ARG_0;
+            sourceRegMask        = RBM_ARG_1;
+            blkSizeRegMask       = RBM_ARG_2;
+            blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
+        }
+#elif defined(_TARGET_X86_)
+        else
+        {
+            dstAddrRegMask       = RBM_RDI;
+            sourceRegMask        = RBM_RSI;
+            blkSizeRegMask       = RBM_RCX;
+            blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr;
+        }
+#endif // _TARGET_X86_
+        assert(blkNode->gtBlkOpKind != GenTreeBlk::BlkOpKindInvalid);
+    }
+    if (dstAddrRegMask != RBM_NONE)
+    {
+        dstAddr->gtLsraInfo.setSrcCandidates(l, dstAddrRegMask);
+    }
+    if (sourceRegMask != RBM_NONE)
+    {
+        if (srcAddrOrFill != nullptr)
+        {
+            srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, sourceRegMask);
+        }
+        else
+        {
+            // This is a local source; we'll use a temp register for its address.
+            blkNode->gtLsraInfo.addInternalCandidates(l, sourceRegMask);
+            blkNode->gtLsraInfo.internalIntCount++;
+        }
+    }
+    if (blkSizeRegMask != RBM_NONE)
+    {
+        if (size != 0)
+        {
+            // Reserve a temp register for the block size argument.
+            blkNode->gtLsraInfo.addInternalCandidates(l, blkSizeRegMask);
+            blkNode->gtLsraInfo.internalIntCount++;
+        }
+        else
+        {
+            // The block size argument is a third argument to GT_STORE_DYN_BLK
+            noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
+            blkNode->gtLsraInfo.setSrcCount(3);
+            GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize;
+            blockSize->gtLsraInfo.setSrcCandidates(l, blkSizeRegMask);
+        }
+    }
+}
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+//------------------------------------------------------------------------
+// TreeNodeInfoInitPutArgStk: Set the NodeInfo for a GT_PUTARG_STK.
+//
+// Arguments:
+//    tree      - The node of interest
+//
+// Return Value:
+//    None.
+//
+void Lowering::TreeNodeInfoInitPutArgStk(GenTree* tree)
+{
+    TreeNodeInfo* info = &(tree->gtLsraInfo);
+    LinearScan*   l    = m_lsra;
+
+    if (tree->TypeGet() != TYP_STRUCT)
+    {
+        TreeNodeInfoInitSimple(tree);
+        return;
+    }
+
+    GenTreePutArgStk* putArgStkTree = tree->AsPutArgStk();
+
+    GenTreePtr dst     = tree;
+    GenTreePtr src     = tree->gtOp.gtOp1;
+    GenTreePtr srcAddr = nullptr;
+
+    if ((src->OperGet() == GT_OBJ) || (src->OperGet() == GT_IND))
+    {
+        srcAddr = src->gtOp.gtOp1;
+    }
+    else
+    {
+        assert(varTypeIsSIMD(tree));
+    }
+    info->srcCount = src->gtLsraInfo.dstCount;
+
+    // If this is a stack variable address,
+    // make the op1 contained, so this way
+    // there is no unnecessary copying between registers.
+    // To avoid assertion, increment the parent's source.
+    // It is recovered below.
+    bool haveLocalAddr = ((srcAddr != nullptr) && (srcAddr->OperIsLocalAddr()));
+    if (haveLocalAddr)
+    {
+        info->srcCount += 1;
+    }
+
+    info->dstCount = 0;
+
+    // In case of a CpBlk we could use a helper call. In case of putarg_stk we
+    // can't do that since the helper call could kill some already set up outgoing args.
+    // TODO-Amd64-Unix: converge the code for putarg_stk with cpyblk/cpyobj.
+    // The cpyXXXX code is rather complex and this could cause it to be more complex, but
+    // it might be the right thing to do.
+
+    // This threshold will decide from using the helper or let the JIT decide to inline
+    // a code sequence of its choice.
+    ssize_t helperThreshold = max(CPBLK_MOVS_LIMIT, CPBLK_UNROLL_LIMIT);
+    ssize_t size            = putArgStkTree->gtNumSlots * TARGET_POINTER_SIZE;
+
+    // TODO-X86-CQ: The helper call either is not supported on x86 or required more work
+    // (I don't know which).
+
+    // If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2.
+    // Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of
+    // our framework assemblies, so this is the main code generation scheme we'll use.
+    if (size <= CPBLK_UNROLL_LIMIT && putArgStkTree->gtNumberReferenceSlots == 0)
+    {
+        // If we have a remainder smaller than XMM_REGSIZE_BYTES, we need an integer temp reg.
+        //
+        // x86 specific note: if the size is odd, the last copy operation would be of size 1 byte.
+        // But on x86 only RBM_BYTE_REGS could be used as byte registers.  Therefore, exclude
+        // RBM_NON_BYTE_REGS from internal candidates.
+        if ((size & (XMM_REGSIZE_BYTES - 1)) != 0)
+        {
+            info->internalIntCount++;
+            regMaskTP regMask = l->allRegs(TYP_INT);
+
+#ifdef _TARGET_X86_
+            if ((size % 2) != 0)
+            {
+                regMask &= ~RBM_NON_BYTE_REGS;
+            }
+#endif
+            info->setInternalCandidates(l, regMask);
+        }
+
+        if (size >= XMM_REGSIZE_BYTES)
+        {
+            // If we have a buffer larger than XMM_REGSIZE_BYTES,
+            // reserve an XMM register to use it for a
+            // series of 16-byte loads and stores.
+            info->internalFloatCount = 1;
+            info->addInternalCandidates(l, l->internalFloatRegCandidates());
+        }
+
+        if (haveLocalAddr)
+        {
+            MakeSrcContained(putArgStkTree, srcAddr);
+        }
+
+        // If src or dst are on stack, we don't have to generate the address into a register
+        // because it's just some constant+SP
+        putArgStkTree->gtPutArgStkKind = GenTreePutArgStk::PutArgStkKindUnroll;
+    }
+    else
+    {
+        info->internalIntCount += 3;
+        info->setInternalCandidates(l, (RBM_RDI | RBM_RCX | RBM_RSI));
+        if (haveLocalAddr)
+        {
+            MakeSrcContained(putArgStkTree, srcAddr);
+        }
+
+        putArgStkTree->gtPutArgStkKind = GenTreePutArgStk::PutArgStkKindRepInstr;
+    }
+
+    // Always mark the OBJ and ADDR as contained trees by the putarg_stk. The codegen will deal with this tree.
+    MakeSrcContained(putArgStkTree, src);
+
+    // Balance up the inc above.
+    if (haveLocalAddr)
+    {
+        info->srcCount -= 1;
+    }
+}
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitLclHeap: Set the NodeInfo for a GT_LCLHEAP.
+//
+// Arguments:
+//    tree      - The node of interest
+//
+// Return Value:
+//    None.
+//
+void Lowering::TreeNodeInfoInitLclHeap(GenTree* tree)
+{
+    TreeNodeInfo* info     = &(tree->gtLsraInfo);
+    LinearScan*   l        = m_lsra;
+    Compiler*     compiler = comp;
+
+    info->srcCount = 1;
+    info->dstCount = 1;
+
+    // Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp):
+    // Here '-' means don't care.
+    //
+    //     Size?                    Init Memory?         # temp regs
+    //      0                            -                  0
+    //      const and <=6 reg words      -                  0
+    //      const and >6 reg words       Yes                0
+    //      const and <PageSize          No                 0 (amd64) 1 (x86)
+    //      const and >=PageSize         No                 2
+    //      Non-const                    Yes                0
+    //      Non-const                    No                 2
+
+    GenTreePtr size = tree->gtOp.gtOp1;
+    if (size->IsCnsIntOrI())
+    {
+        MakeSrcContained(tree, size);
+
+        size_t sizeVal = size->gtIntCon.gtIconVal;
+
+        if (sizeVal == 0)
+        {
+            info->internalIntCount = 0;
+        }
+        else
+        {
+            // Compute the amount of memory to properly STACK_ALIGN.
+            // Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size.
+            // This should also help in debugging as we can examine the original size specified with localloc.
+            sizeVal = AlignUp(sizeVal, STACK_ALIGN);
+
+            // For small allocations up to 6 pointer sized words (i.e. 48 bytes of localloc)
+            // we will generate 'push 0'.
+            assert((sizeVal % REGSIZE_BYTES) == 0);
+            size_t cntRegSizedWords = sizeVal / REGSIZE_BYTES;
+            if (cntRegSizedWords <= 6)
+            {
+                info->internalIntCount = 0;
+            }
+            else if (!compiler->info.compInitMem)
+            {
+                // No need to initialize allocated stack space.
+                if (sizeVal < compiler->eeGetPageSize())
+                {
+#ifdef _TARGET_X86_
+                    info->internalIntCount = 1; // x86 needs a register here to avoid generating "sub" on ESP.
+#else                                           // !_TARGET_X86_
+                    info->internalIntCount = 0;
+#endif                                          // !_TARGET_X86_
+                }
+                else
+                {
+                    // We need two registers: regCnt and RegTmp
+                    info->internalIntCount = 2;
+                }
+            }
+            else
+            {
+                // >6 and need to zero initialize allocated stack space.
+                info->internalIntCount = 0;
+            }
+        }
+    }
+    else
+    {
+        if (!compiler->info.compInitMem)
+        {
+            info->internalIntCount = 2;
+        }
+        else
+        {
+            info->internalIntCount = 0;
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitLogicalOp: Set the NodeInfo for GT_AND/GT_OR/GT_XOR,
+// as well as GT_ADD/GT_SUB.
+//
+// Arguments:
+//    tree      - The node of interest
+//
+// Return Value:
+//    None.
+//
+void Lowering::TreeNodeInfoInitLogicalOp(GenTree* tree)
+{
+    TreeNodeInfo* info = &(tree->gtLsraInfo);
+    LinearScan*   l    = m_lsra;
+
+    // We're not marking a constant hanging on the left of the add
+    // as containable so we assign it to a register having CQ impact.
+    // TODO-XArch-CQ: Detect this case and support both generating a single instruction
+    // for GT_ADD(Constant, SomeTree)
+    info->srcCount = 2;
+    info->dstCount = 1;
+
+    GenTree* op1 = tree->gtGetOp1();
+    GenTree* op2 = tree->gtGetOp2();
+
+    // We can directly encode the second operand if it is either a containable constant or a memory-op.
+    // In case of memory-op, we can encode it directly provided its type matches with 'tree' type.
+    // This is because during codegen, type of 'tree' is used to determine emit Type size. If the types
+    // do not match, they get normalized (i.e. sign/zero extended) on load into a register.
+    bool       directlyEncodable = false;
+    bool       binOpInRMW        = false;
+    GenTreePtr operand           = nullptr;
+
+    if (IsContainableImmed(tree, op2))
+    {
+        directlyEncodable = true;
+        operand           = op2;
+    }
+    else
+    {
+        binOpInRMW = IsBinOpInRMWStoreInd(tree);
+        if (!binOpInRMW)
+        {
+            if (op2->isMemoryOp() && tree->TypeGet() == op2->TypeGet())
+            {
+                directlyEncodable = true;
+                operand           = op2;
+            }
+            else if (tree->OperIsCommutative())
+            {
+                if (IsContainableImmed(tree, op1) ||
+                    (op1->isMemoryOp() && tree->TypeGet() == op1->TypeGet() && IsSafeToContainMem(tree, op1)))
+                {
+                    // If it is safe, we can reverse the order of operands of commutative operations for efficient
+                    // codegen
+                    directlyEncodable = true;
+                    operand           = op1;
+                }
+            }
+        }
+    }
+
+    if (directlyEncodable)
+    {
+        assert(operand != nullptr);
+        MakeSrcContained(tree, operand);
+    }
+    else if (!binOpInRMW)
+    {
+        // If this binary op neither has contained operands, nor is a
+        // Read-Modify-Write (RMW) operation, we can mark its operands
+        // as reg optional.
+        SetRegOptionalForBinOp(tree);
+    }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitModDiv: Set the NodeInfo for GT_MOD/GT_DIV/GT_UMOD/GT_UDIV.
+//
+// Arguments:
+//    tree      - The node of interest
+//
+// Return Value:
+//    None.
+//
+void Lowering::TreeNodeInfoInitModDiv(GenTree* tree)
+{
+    TreeNodeInfo* info = &(tree->gtLsraInfo);
+    LinearScan*   l    = m_lsra;
+
+    GenTree* op1 = tree->gtGetOp1();
+    GenTree* op2 = tree->gtGetOp2();
+
+    info->srcCount = 2;
+    info->dstCount = 1;
+
+    switch (tree->OperGet())
+    {
+        case GT_MOD:
+        case GT_DIV:
+            if (varTypeIsFloating(tree->TypeGet()))
+            {
+                // No implicit conversions at this stage as the expectation is that
+                // everything is made explicit by adding casts.
+                assert(op1->TypeGet() == op2->TypeGet());
+
+                if (op2->isMemoryOp() || op2->IsCnsNonZeroFltOrDbl())
+                {
+                    MakeSrcContained(tree, op2);
+                }
+                else
+                {
+                    // If there are no containable operands, we can make an operand reg optional.
+                    // SSE2 allows only op2 to be a memory-op.
+                    SetRegOptional(op2);
+                }
+
+                return;
+            }
+            break;
+
+        default:
+            break;
+    }
+
+    // Amd64 Div/Idiv instruction:
+    //    Dividend in RAX:RDX  and computes
+    //    Quotient in RAX, Remainder in RDX
+
+    if (tree->OperGet() == GT_MOD || tree->OperGet() == GT_UMOD)
+    {
+        // We are interested in just the remainder.
+        // RAX is used as a trashable register during computation of remainder.
+        info->setDstCandidates(l, RBM_RDX);
+    }
+    else
+    {
+        // We are interested in just the quotient.
+        // RDX gets used as trashable register during computation of quotient
+        info->setDstCandidates(l, RBM_RAX);
+    }
+
+    // If possible would like to have op1 in RAX to avoid a register move
+    op1->gtLsraInfo.setSrcCandidates(l, RBM_RAX);
+
+    // divisor can be an r/m, but the memory indirection must be of the same size as the divide
+    if (op2->isMemoryOp() && (op2->TypeGet() == tree->TypeGet()))
+    {
+        MakeSrcContained(tree, op2);
+    }
+    else
+    {
+        op2->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX));
+
+        // If there are no containable operands, we can make an operand reg optional.
+        // Div instruction allows only op2 to be a memory op.
+        SetRegOptional(op2);
+    }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitIntrinsic: Set the NodeInfo for a GT_INTRINSIC.
+//
+// Arguments:
+//    tree      - The node of interest
+//
+// Return Value:
+//    None.
+//
+void Lowering::TreeNodeInfoInitIntrinsic(GenTree* tree)
+{
+    TreeNodeInfo* info = &(tree->gtLsraInfo);
+    LinearScan*   l    = m_lsra;
+
+    // Both operand and its result must be of floating point type.
+    GenTree* op1 = tree->gtGetOp1();
+    assert(varTypeIsFloating(op1));
+    assert(op1->TypeGet() == tree->TypeGet());
+
+    info->srcCount = 1;
+    info->dstCount = 1;
+
+    switch (tree->gtIntrinsic.gtIntrinsicId)
+    {
+        case CORINFO_INTRINSIC_Sqrt:
+            if (op1->isMemoryOp() || op1->IsCnsNonZeroFltOrDbl())
+            {
+                MakeSrcContained(tree, op1);
+            }
+            else
+            {
+                // Mark the operand as reg optional since codegen can still
+                // generate code if op1 is on stack.
+                SetRegOptional(op1);
+            }
+            break;
+
+        case CORINFO_INTRINSIC_Abs:
+            // Abs(float x) = x & 0x7fffffff
+            // Abs(double x) = x & 0x7ffffff ffffffff
+
+            // In case of Abs we need an internal register to hold mask.
+
+            // TODO-XArch-CQ: avoid using an internal register for the mask.
+            // Andps or andpd both will operate on 128-bit operands.
+            // The data section constant to hold the mask is a 64-bit size.
+            // Therefore, we need both the operand and mask to be in
+            // xmm register. When we add support in emitter to emit 128-bit
+            // data constants and instructions that operate on 128-bit
+            // memory operands we can avoid the need for an internal register.
+            if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs)
+            {
+                info->internalFloatCount = 1;
+                info->setInternalCandidates(l, l->internalFloatRegCandidates());
+            }
+            break;
+
+#ifdef _TARGET_X86_
+        case CORINFO_INTRINSIC_Cos:
+        case CORINFO_INTRINSIC_Sin:
+        case CORINFO_INTRINSIC_Round:
+            NYI_X86("Math intrinsics Cos, Sin and Round");
+            break;
+#endif // _TARGET_X86_
+
+        default:
+            // Right now only Sqrt/Abs are treated as math intrinsics
+            noway_assert(!"Unsupported math intrinsic");
+            unreached();
+            break;
+    }
+}
+
+#ifdef FEATURE_SIMD
+//------------------------------------------------------------------------
+// TreeNodeInfoInitSIMD: Set the NodeInfo for a GT_SIMD tree.
+//
+// Arguments:
+//    tree       - The GT_SIMD node of interest
+//
+// Return Value:
+//    None.
+
+void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
+{
+    GenTreeSIMD*  simdTree = tree->AsSIMD();
+    TreeNodeInfo* info     = &(tree->gtLsraInfo);
+    LinearScan*   lsra     = m_lsra;
+    info->dstCount         = 1;
+    switch (simdTree->gtSIMDIntrinsicID)
+    {
+        GenTree* op2;
+
+        case SIMDIntrinsicInit:
+        {
+            info->srcCount = 1;
+            GenTree* op1   = tree->gtOp.gtOp1;
+
+            // This sets all fields of a SIMD struct to the given value.
+            // Mark op1 as contained if it is either zero or int constant of all 1's,
+            // or a float constant with 16 or 32 byte simdType (AVX case)
+            //
+            // Should never see small int base type vectors except for zero initialization.
+            assert(!varTypeIsSmallInt(simdTree->gtSIMDBaseType) || op1->IsIntegralConst(0));
+
+            if (op1->IsFPZero() || op1->IsIntegralConst(0) ||
+                (varTypeIsIntegral(simdTree->gtSIMDBaseType) && op1->IsIntegralConst(-1)))
+            {
+                MakeSrcContained(tree, tree->gtOp.gtOp1);
+                info->srcCount = 0;
+            }
+            else if ((comp->getSIMDInstructionSet() == InstructionSet_AVX) &&
+                     ((simdTree->gtSIMDSize == 16) || (simdTree->gtSIMDSize == 32)))
+            {
+                // Either op1 is a float or dbl constant or an addr
+                if (op1->IsCnsFltOrDbl() || op1->OperIsLocalAddr())
+                {
+                    MakeSrcContained(tree, tree->gtOp.gtOp1);
+                    info->srcCount = 0;
+                }
+            }
+        }
+        break;
+
+        case SIMDIntrinsicInitN:
+        {
+            info->srcCount = (short)(simdTree->gtSIMDSize / genTypeSize(simdTree->gtSIMDBaseType));
+
+            // Need an internal register to stitch together all the values into a single vector in a SIMD reg.
+            info->internalFloatCount = 1;
+            info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+        }
+        break;
+
+        case SIMDIntrinsicInitArray:
+            // We have an array and an index, which may be contained.
+            info->srcCount = 2;
+            CheckImmedAndMakeContained(tree, tree->gtGetOp2());
+            break;
+
+        case SIMDIntrinsicDiv:
+            // SSE2 has no instruction support for division on integer vectors
+            noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType));
+            info->srcCount = 2;
+            break;
+
+        case SIMDIntrinsicAbs:
+            // This gets implemented as bitwise-And operation with a mask
+            // and hence should never see it here.
+            unreached();
+            break;
+
+        case SIMDIntrinsicSqrt:
+            // SSE2 has no instruction support for sqrt on integer vectors.
+            noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType));
+            info->srcCount = 1;
+            break;
+
+        case SIMDIntrinsicAdd:
+        case SIMDIntrinsicSub:
+        case SIMDIntrinsicMul:
+        case SIMDIntrinsicBitwiseAnd:
+        case SIMDIntrinsicBitwiseAndNot:
+        case SIMDIntrinsicBitwiseOr:
+        case SIMDIntrinsicBitwiseXor:
+        case SIMDIntrinsicMin:
+        case SIMDIntrinsicMax:
+            info->srcCount = 2;
+
+            // SSE2 32-bit integer multiplication requires two temp regs
+            if (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicMul && simdTree->gtSIMDBaseType == TYP_INT)
+            {
+                info->internalFloatCount = 2;
+                info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+            }
+            break;
+
+        case SIMDIntrinsicEqual:
+            info->srcCount = 2;
+            break;
+
+        // SSE2 doesn't support < and <= directly on int vectors.
+        // Instead we need to use > and >= with swapped operands.
+        case SIMDIntrinsicLessThan:
+        case SIMDIntrinsicLessThanOrEqual:
+            info->srcCount = 2;
+            noway_assert(!varTypeIsIntegral(simdTree->gtSIMDBaseType));
+            break;
+
+        // SIMDIntrinsicEqual is supported only on non-floating point base type vectors.
+        // SSE2 cmpps/pd doesn't support > and >=  directly on float/double vectors.
+        // Instead we need to use <  and <= with swapped operands.
+        case SIMDIntrinsicGreaterThan:
+            noway_assert(!varTypeIsFloating(simdTree->gtSIMDBaseType));
+            info->srcCount = 2;
+            break;
+
+        case SIMDIntrinsicOpEquality:
+        case SIMDIntrinsicOpInEquality:
+            // Need two SIMD registers as scratch.
+            // See genSIMDIntrinsicRelOp() for details on code sequence generate and
+            // the need for two scratch registers.
+            info->srcCount           = 2;
+            info->internalFloatCount = 2;
+            info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+            break;
+
+        case SIMDIntrinsicDotProduct:
+            if ((comp->getSIMDInstructionSet() == InstructionSet_SSE2) ||
+                (simdTree->gtOp.gtOp1->TypeGet() == TYP_SIMD32))
+            {
+                // For SSE, or AVX with 32-byte vectors, we also need an internal register as scratch.
+                // Further we need the targetReg and internal reg to be distinct registers.
+                // This is achieved by requesting two internal registers; thus one of them
+                // will be different from targetReg.
+                // Note that if this is a TYP_SIMD16 or smaller on AVX, then we don't need a tmpReg.
+                //
+                // See genSIMDIntrinsicDotProduct() for details on code sequence generated and
+                // the need for scratch registers.
+                info->internalFloatCount = 2;
+                info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+            }
+            info->srcCount = 2;
+            break;
+
+        case SIMDIntrinsicGetItem:
+            // This implements get_Item method. The sources are:
+            //  - the source SIMD struct
+            //  - index (which element to get)
+            // The result is baseType of SIMD struct.
+            info->srcCount = 2;
+            op2            = tree->gtOp.gtOp2;
+
+            // If the index is a constant, mark it as contained.
+            if (CheckImmedAndMakeContained(tree, op2))
+            {
+                info->srcCount = 1;
+            }
+
+            // If the index is not a constant, we will use the SIMD temp location to store the vector.
+            // Otherwise, if the baseType is floating point, the targetReg will be a xmm reg and we
+            // can use that in the process of extracting the element.
+            //
+            // If the index is a constant and base type is a small int we can use pextrw, but on AVX
+            // we will need a temp if are indexing into the upper half of the AVX register.
+            // In all other cases with constant index, we need a temp xmm register to extract the
+            // element if index is other than zero.
+
+            if (!op2->IsCnsIntOrI())
+            {
+                (void)comp->getSIMDInitTempVarNum();
+            }
+            else if (!varTypeIsFloating(simdTree->gtSIMDBaseType))
+            {
+                bool needFloatTemp;
+                if (varTypeIsSmallInt(simdTree->gtSIMDBaseType) &&
+                    (comp->getSIMDInstructionSet() == InstructionSet_AVX))
+                {
+                    int byteShiftCnt = (int)op2->AsIntCon()->gtIconVal * genTypeSize(simdTree->gtSIMDBaseType);
+                    needFloatTemp    = (byteShiftCnt >= 16);
+                }
+                else
+                {
+                    needFloatTemp = !op2->IsIntegralConst(0);
+                }
+                if (needFloatTemp)
+                {
+                    info->internalFloatCount = 1;
+                    info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+                }
+            }
+            break;
+
+        case SIMDIntrinsicSetX:
+        case SIMDIntrinsicSetY:
+        case SIMDIntrinsicSetZ:
+        case SIMDIntrinsicSetW:
+            // We need an internal integer register
+            info->srcCount         = 2;
+            info->internalIntCount = 1;
+            info->setInternalCandidates(lsra, lsra->allRegs(TYP_INT));
+            break;
+
+        case SIMDIntrinsicCast:
+            info->srcCount = 1;
+            break;
+
+        case SIMDIntrinsicShuffleSSE2:
+            info->srcCount = 2;
+            // Second operand is an integer constant and marked as contained.
+            op2 = tree->gtOp.gtOp2;
+            noway_assert(op2->IsCnsIntOrI());
+            MakeSrcContained(tree, op2);
+            break;
+
+        case SIMDIntrinsicGetX:
+        case SIMDIntrinsicGetY:
+        case SIMDIntrinsicGetZ:
+        case SIMDIntrinsicGetW:
+        case SIMDIntrinsicGetOne:
+        case SIMDIntrinsicGetZero:
+        case SIMDIntrinsicGetCount:
+        case SIMDIntrinsicGetAllOnes:
+            assert(!"Get intrinsics should not be seen during Lowering.");
+            unreached();
+
+        default:
+            noway_assert(!"Unimplemented SIMD node type.");
+            unreached();
+    }
+}
+#endif // FEATURE_SIMD
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitCast: Set the NodeInfo for a GT_CAST.
+//
+// Arguments:
+//    tree      - The node of interest
+//
+// Return Value:
+//    None.
+//
+void Lowering::TreeNodeInfoInitCast(GenTree* tree)
+{
+    TreeNodeInfo* info = &(tree->gtLsraInfo);
+
+    // TODO-XArch-CQ: Int-To-Int conversions - castOp cannot be a memory op and must have an assigned register.
+    //         see CodeGen::genIntToIntCast()
+
+    info->srcCount = 1;
+    info->dstCount = 1;
+
+    // Non-overflow casts to/from float/double are done using SSE2 instructions
+    // and that allow the source operand to be either a reg or memop. Given the
+    // fact that casts from small int to float/double are done as two-level casts,
+    // the source operand is always guaranteed to be of size 4 or 8 bytes.
+    var_types  castToType = tree->CastToType();
+    GenTreePtr castOp     = tree->gtCast.CastOp();
+    var_types  castOpType = castOp->TypeGet();
+    if (tree->gtFlags & GTF_UNSIGNED)
+    {
+        castOpType = genUnsignedType(castOpType);
+    }
+
+    if (!tree->gtOverflow() && (varTypeIsFloating(castToType) || varTypeIsFloating(castOpType)))
+    {
+#ifdef DEBUG
+        // If converting to float/double, the operand must be 4 or 8 byte in size.
+        if (varTypeIsFloating(castToType))
+        {
+            unsigned opSize = genTypeSize(castOpType);
+            assert(opSize == 4 || opSize == 8);
+        }
+#endif // DEBUG
+
+        // U8 -> R8 conversion requires that the operand be in a register.
+        if (castOpType != TYP_ULONG)
+        {
+            if (castOp->isMemoryOp() || castOp->IsCnsNonZeroFltOrDbl())
+            {
+                MakeSrcContained(tree, castOp);
+            }
+            else
+            {
+                // Mark castOp as reg optional to indicate codegen
+                // can still generate code if it is on stack.
+                SetRegOptional(castOp);
+            }
+        }
+    }
+
+#if !defined(_TARGET_64BIT_)
+    if (varTypeIsLong(castOpType))
+    {
+        noway_assert(castOp->OperGet() == GT_LONG);
+        info->srcCount = 2;
+    }
+#endif // !defined(_TARGET_64BIT_)
+
+    // some overflow checks need a temp reg:
+    //  - GT_CAST from INT64/UINT64 to UINT32
+    if (tree->gtOverflow() && (castToType == TYP_UINT))
+    {
+        if (genTypeSize(castOpType) == 8)
+        {
+            info->internalIntCount = 1;
+        }
+    }
+}
+
+void Lowering::LowerGCWriteBarrier(GenTree* tree)
+{
+    assert(tree->OperGet() == GT_STOREIND);
+
+    GenTreeStoreInd* dst  = tree->AsStoreInd();
+    GenTreePtr       addr = dst->Addr();
+    GenTreePtr       src  = dst->Data();
+
+    if (addr->OperGet() == GT_LEA)
+    {
+        // In the case where we are doing a helper assignment, if the dst
+        // is an indir through an lea, we need to actually instantiate the
+        // lea in a register
+        GenTreeAddrMode* lea = addr->AsAddrMode();
+
+        int leaSrcCount = 0;
+        if (lea->HasBase())
+        {
+            leaSrcCount++;
+        }
+        if (lea->HasIndex())
+        {
+            leaSrcCount++;
+        }
+        lea->gtLsraInfo.srcCount = leaSrcCount;
+        lea->gtLsraInfo.dstCount = 1;
+    }
+
+    bool useOptimizedWriteBarrierHelper = false; // By default, assume no optimized write barriers.
+
+#if NOGC_WRITE_BARRIERS
+
+#if defined(_TARGET_X86_)
+
+    useOptimizedWriteBarrierHelper = true; // On x86, use the optimized write barriers by default.
+#ifdef DEBUG
+    GCInfo::WriteBarrierForm wbf = comp->codeGen->gcInfo.gcIsWriteBarrierCandidate(tree, src);
+    if (wbf == GCInfo::WBF_NoBarrier_CheckNotHeapInDebug) // This one is always a call to a C++ method.
+    {
+        useOptimizedWriteBarrierHelper = false;
+    }
+#endif
+
+    if (useOptimizedWriteBarrierHelper)
+    {
+        // Special write barrier:
+        // op1 (addr) goes into REG_WRITE_BARRIER (rdx) and
+        // op2 (src) goes into any int register.
+        addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER);
+        src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER_SRC);
+    }
+
+#else // !defined(_TARGET_X86_)
+#error "NOGC_WRITE_BARRIERS is not supported"
+#endif // !defined(_TARGET_X86_)
+
+#endif // NOGC_WRITE_BARRIERS
+
+    if (!useOptimizedWriteBarrierHelper)
+    {
+        // For the standard JIT Helper calls:
+        // op1 (addr) goes into REG_ARG_0 and
+        // op2 (src) goes into REG_ARG_1
+        addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_0);
+        src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_1);
+    }
+
+    // Both src and dst must reside in a register, which they should since we haven't set
+    // either of them as contained.
+    assert(addr->gtLsraInfo.dstCount == 1);
+    assert(src->gtLsraInfo.dstCount == 1);
+}
+
+//-----------------------------------------------------------------------------------------
+// Specify register requirements for address expression of an indirection operation.
+//
+// Arguments:
+//    indirTree    -   GT_IND or GT_STOREIND gentree node
+//
+void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree)
+{
+    assert(indirTree->isIndir());
+    // If this is the rhs of a block copy (i.e. non-enregisterable struct),
+    // it has no register requirements.
+    if (indirTree->TypeGet() == TYP_STRUCT)
+    {
+        return;
+    }
+
+    GenTreePtr    addr = indirTree->gtGetOp1();
+    TreeNodeInfo* info = &(indirTree->gtLsraInfo);
+
+    GenTreePtr base  = nullptr;
+    GenTreePtr index = nullptr;
+    unsigned   mul, cns;
+    bool       rev;
+    bool       modifiedSources = false;
+
+#ifdef FEATURE_SIMD
+    // If indirTree is of TYP_SIMD12, don't mark addr as contained
+    // so that it always get computed to a register.  This would
+    // mean codegen side logic doesn't need to handle all possible
+    // addr expressions that could be contained.
+    //
+    // TODO-XArch-CQ: handle other addr mode expressions that could be marked
+    // as contained.
+    if (indirTree->TypeGet() == TYP_SIMD12)
+    {
+        // Vector3 is read/written as two reads/writes: 8 byte and 4 byte.
+        // To assemble the vector properly we would need an additional
+        // XMM register.
+        info->internalFloatCount = 1;
+
+        // In case of GT_IND we need an internal register different from targetReg and
+        // both of the registers are used at the same time. This achieved by reserving
+        // two internal registers
+        if (indirTree->OperGet() == GT_IND)
+        {
+            (info->internalFloatCount)++;
+        }
+
+        info->setInternalCandidates(m_lsra, m_lsra->allSIMDRegs());
+
+        return;
+    }
+#endif // FEATURE_SIMD
+
+    // These nodes go into an addr mode:
+    // - GT_CLS_VAR_ADDR turns into a constant.
+    // - GT_LCL_VAR_ADDR is a stack addr mode.
+    if ((addr->OperGet() == GT_CLS_VAR_ADDR) || (addr->OperGet() == GT_LCL_VAR_ADDR))
+    {
+        // make this contained, it turns into a constant that goes into an addr mode
+        MakeSrcContained(indirTree, addr);
+    }
+    else if (addr->IsCnsIntOrI() && addr->AsIntConCommon()->FitsInAddrBase(comp) &&
+             addr->gtLsraInfo.getDstCandidates(m_lsra) != RBM_VIRTUAL_STUB_PARAM)
+    {
+        // Amd64:
+        // We can mark any pc-relative 32-bit addr as containable, except for a direct VSD call address.
+        // (i.e. those VSD calls for which stub addr is known during JIT compilation time).  In this case,
+        // VM requires us to pass stub addr in REG_VIRTUAL_STUB_PARAM - see LowerVirtualStubCall().  For
+        // that reason we cannot mark such an addr as contained.  Note that this is not an issue for
+        // indirect VSD calls since morphArgs() is explicitly materializing hidden param as a non-standard
+        // argument.
+        //
+        // Workaround:
+        // Note that LowerVirtualStubCall() sets addr->gtRegNum to REG_VIRTUAL_STUB_PARAM and Lowering::doPhase()
+        // sets destination candidates on such nodes and resets addr->gtRegNum to REG_NA before calling
+        // TreeNodeInfoInit(). Ideally we should set a flag on addr nodes that shouldn't be marked as contained
+        // (in LowerVirtualStubCall()), but we don't have any GTF_* flags left for that purpose.  As a workaround
+        // an explicit check is made here.
+        //
+        // On x86, direct VSD is done via a relative branch, and in fact it MUST be contained.
+        MakeSrcContained(indirTree, addr);
+    }
+    else if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirTree, addr))
+    {
+        GenTreeAddrMode* lea = addr->AsAddrMode();
+        base                 = lea->Base();
+        index                = lea->Index();
+
+        m_lsra->clearOperandCounts(addr);
+        // The srcCount is decremented because addr is now "contained",
+        // then we account for the base and index below, if they are non-null.
+        info->srcCount--;
+    }
+    else if (comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &mul, &cns, true /*nogen*/) &&
+             !(modifiedSources = AreSourcesPossiblyModifiedLocals(indirTree, base, index)))
+    {
+        // An addressing mode will be constructed that may cause some
+        // nodes to not need a register, and cause others' lifetimes to be extended
+        // to the GT_IND or even its parent if it's an assignment
+
+        assert(base != addr);
+        m_lsra->clearOperandCounts(addr);
+
+        GenTreePtr arrLength = nullptr;
+
+        // Traverse the computation below GT_IND to find the operands
+        // for the addressing mode, marking the various constants and
+        // intermediate results as not consuming/producing.
+        // If the traversal were more complex, we might consider using
+        // a traversal function, but the addressing mode is only made
+        // up of simple arithmetic operators, and the code generator
+        // only traverses one leg of each node.
+
+        bool       foundBase  = (base == nullptr);
+        bool       foundIndex = (index == nullptr);
+        GenTreePtr nextChild  = nullptr;
+        for (GenTreePtr child = addr; child != nullptr && !child->OperIsLeaf(); child = nextChild)
+        {
+            nextChild      = nullptr;
+            GenTreePtr op1 = child->gtOp.gtOp1;
+            GenTreePtr op2 = (child->OperIsBinary()) ? child->gtOp.gtOp2 : nullptr;
+
+            if (op1 == base)
+            {
+                foundBase = true;
+            }
+            else if (op1 == index)
+            {
+                foundIndex = true;
+            }
+            else
+            {
+                m_lsra->clearOperandCounts(op1);
+                if (!op1->OperIsLeaf())
+                {
+                    nextChild = op1;
+                }
+            }
+
+            if (op2 != nullptr)
+            {
+                if (op2 == base)
+                {
+                    foundBase = true;
+                }
+                else if (op2 == index)
+                {
+                    foundIndex = true;
+                }
+                else
+                {
+                    m_lsra->clearOperandCounts(op2);
+                    if (!op2->OperIsLeaf())
+                    {
+                        assert(nextChild == nullptr);
+                        nextChild = op2;
+                    }
+                }
+            }
+        }
+        assert(foundBase && foundIndex);
+        info->srcCount--; // it gets incremented below.
+    }
+    else if (addr->gtOper == GT_ARR_ELEM)
+    {
+        // The GT_ARR_ELEM consumes all the indices and produces the offset.
+        // The array object lives until the mem access.
+        // We also consume the target register to which the address is
+        // computed
+
+        info->srcCount++;
+        assert(addr->gtLsraInfo.srcCount >= 2);
+        addr->gtLsraInfo.srcCount -= 1;
+    }
+    else
+    {
+        // it is nothing but a plain indir
+        info->srcCount--; // base gets added in below
+        base = addr;
+    }
+
+    if (base != nullptr)
+    {
+        info->srcCount++;
+    }
+
+    if (index != nullptr && !modifiedSources)
+    {
+        info->srcCount++;
+    }
+}
+
+void Lowering::LowerCmp(GenTreePtr tree)
+{
+    TreeNodeInfo* info = &(tree->gtLsraInfo);
+
+    info->srcCount = 2;
+    info->dstCount = 1;
+
+#ifdef _TARGET_X86_
+    info->setDstCandidates(m_lsra, RBM_BYTE_REGS);
+#endif // _TARGET_X86_
+
+    GenTreePtr op1     = tree->gtOp.gtOp1;
+    GenTreePtr op2     = tree->gtOp.gtOp2;
+    var_types  op1Type = op1->TypeGet();
+    var_types  op2Type = op2->TypeGet();
+
+#if !defined(_TARGET_64BIT_)
+    // Long compares will consume GT_LONG nodes, each of which produces two results.
+    // Thus for each long operand there will be an additional source.
+    // TODO-X86-CQ: Mark hiOp2 and loOp2 as contained if it is a constant or a memory op.
+    if (varTypeIsLong(op1Type))
+    {
+        info->srcCount++;
+    }
+    if (varTypeIsLong(op2Type))
+    {
+        info->srcCount++;
+    }
+#endif // !defined(_TARGET_64BIT_)
+
+    // If either of op1 or op2 is floating point values, then we need to use
+    // ucomiss or ucomisd to compare, both of which support the following form
+    // ucomis[s|d] xmm, xmm/mem.  That is only the second operand can be a memory
+    // op.
+    //
+    // Second operand is a memory Op:  Note that depending on comparison operator,
+    // the operands of ucomis[s|d] need to be reversed.  Therefore, either op1 or
+    // op2 can be a memory op depending on the comparison operator.
+    if (varTypeIsFloating(op1Type))
+    {
+        // The type of the operands has to be the same and no implicit conversions at this stage.
+        assert(op1Type == op2Type);
+
+        bool reverseOps;
+        if ((tree->gtFlags & GTF_RELOP_NAN_UN) != 0)
+        {
+            // Unordered comparison case
+            reverseOps = (tree->gtOper == GT_GT || tree->gtOper == GT_GE);
+        }
+        else
+        {
+            reverseOps = (tree->gtOper == GT_LT || tree->gtOper == GT_LE);
+        }
+
+        GenTreePtr otherOp;
+        if (reverseOps)
+        {
+            otherOp = op1;
+        }
+        else
+        {
+            otherOp = op2;
+        }
+
+        assert(otherOp != nullptr);
+        if (otherOp->IsCnsNonZeroFltOrDbl())
+        {
+            MakeSrcContained(tree, otherOp);
+        }
+        else if (otherOp->isMemoryOp() && ((otherOp == op2) || IsSafeToContainMem(tree, otherOp)))
+        {
+            MakeSrcContained(tree, otherOp);
+        }
+        else
+        {
+            // SSE2 allows only otherOp to be a memory-op. Since otherOp is not
+            // contained, we can mark it reg-optional.
+            SetRegOptional(otherOp);
+        }
+
+        return;
+    }
+
+    // TODO-XArch-CQ: factor out cmp optimization in 'genCondSetFlags' to be used here
+    // or in other backend.
+
+    bool hasShortCast = false;
+    if (CheckImmedAndMakeContained(tree, op2))
+    {
+        bool op1CanBeContained = (op1Type == op2Type);
+        if (!op1CanBeContained)
+        {
+            if (genTypeSize(op1Type) == genTypeSize(op2Type))
+            {
+                // The constant is of the correct size, but we don't have an exact type match
+                // We can treat the isMemoryOp as "contained"
+                op1CanBeContained = true;
+            }
+        }
+
+        // Do we have a short compare against a constant in op2
+        //
+        if (varTypeIsSmall(op1Type))
+        {
+            GenTreeIntCon* con  = op2->AsIntCon();
+            ssize_t        ival = con->gtIconVal;
+
+            bool isEqualityCompare = (tree->gtOper == GT_EQ || tree->gtOper == GT_NE);
+            bool useTest           = isEqualityCompare && (ival == 0);
+
+            if (!useTest)
+            {
+                ssize_t lo         = 0; // minimum imm value allowed for cmp reg,imm
+                ssize_t hi         = 0; // maximum imm value allowed for cmp reg,imm
+                bool    isUnsigned = false;
+
+                switch (op1Type)
+                {
+                    case TYP_BOOL:
+                        op1Type = TYP_UBYTE;
+                        __fallthrough;
+                    case TYP_UBYTE:
+                        lo         = 0;
+                        hi         = 0x7f;
+                        isUnsigned = true;
+                        break;
+                    case TYP_BYTE:
+                        lo = -0x80;
+                        hi = 0x7f;
+                        break;
+                    case TYP_CHAR:
+                        lo         = 0;
+                        hi         = 0x7fff;
+                        isUnsigned = true;
+                        break;
+                    case TYP_SHORT:
+                        lo = -0x8000;
+                        hi = 0x7fff;
+                        break;
+                    default:
+                        unreached();
+                }
+
+                if ((ival >= lo) && (ival <= hi))
+                {
+                    // We can perform a small compare with the immediate 'ival'
+                    tree->gtFlags |= GTF_RELOP_SMALL;
+                    if (isUnsigned && !isEqualityCompare)
+                    {
+                        tree->gtFlags |= GTF_UNSIGNED;
+                    }
+                    // We can treat the isMemoryOp as "contained"
+                    op1CanBeContained = true;
+                }
+            }
+        }
+
+        if (op1CanBeContained)
+        {
+            if (op1->isMemoryOp())
+            {
+                MakeSrcContained(tree, op1);
+            }
+            else
+            {
+                bool op1IsMadeContained = false;
+
+                // When op1 is a GT_AND we can often generate a single "test" instruction
+                // instead of two instructions (an "and" instruction followed by a "cmp"/"test")
+                //
+                // This instruction can only be used for equality or inequality comparions.
+                // and we must have a compare against zero.
+                //
+                // If we have a postive test for a single bit we can reverse the condition and
+                // make the compare be against zero
+                //
+                // Example:
+                //                  GT_EQ                              GT_NE
+                //                  /   \                              /   \
+                //             GT_AND   GT_CNS (0x100)  ==>>      GT_AND   GT_CNS (0)
+                //             /    \                             /    \
+                //          andOp1  GT_CNS (0x100)             andOp1  GT_CNS (0x100)
+                //
+                // We will mark the GT_AND node as contained if the tree is a equality compare with zero
+                // Additionally when we do this we also allow for a contained memory operand for "andOp1".
+                //
+                bool isEqualityCompare = (tree->gtOper == GT_EQ || tree->gtOper == GT_NE);
+
+                if (isEqualityCompare && (op1->OperGet() == GT_AND))
+                {
+                    GenTreePtr andOp2 = op1->gtOp.gtOp2;
+                    if (IsContainableImmed(op1, andOp2))
+                    {
+                        ssize_t andOp2CnsVal = andOp2->AsIntConCommon()->IconValue();
+                        ssize_t relOp2CnsVal = op2->AsIntConCommon()->IconValue();
+
+                        if ((relOp2CnsVal == andOp2CnsVal) && isPow2(andOp2CnsVal))
+                        {
+                            // We have a single bit test, so now we can change the
+                            // tree into the alternative form,
+                            // so that we can generate a test instruction.
+
+                            // Reverse the equality comparison
+                            tree->gtOper = (tree->gtOper == GT_EQ) ? GT_NE : GT_EQ;
+
+                            // Change the relOp2CnsVal to zero
+                            relOp2CnsVal = 0;
+                            op2->AsIntConCommon()->SetIconValue(0);
+                        }
+
+                        // Now do we have a equality compare with zero?
+                        //
+                        if (relOp2CnsVal == 0)
+                        {
+                            // Note that child nodes must be made contained before parent nodes
+
+                            // Check for a memory operand for op1 with the test instruction
+                            //
+                            GenTreePtr andOp1 = op1->gtOp.gtOp1;
+                            if (andOp1->isMemoryOp())
+                            {
+                                // If the type of value memoryOp (andOp1) is not the same as the type of constant
+                                // (andOp2) check to see whether it is safe to mark AndOp1 as contained.  For e.g. in
+                                // the following case it is not safe to mark andOp1 as contained
+                                //    AndOp1 = signed byte and andOp2 is an int constant of value 512.
+                                //
+                                // If it is safe, we update the type and value of andOp2 to match with andOp1.
+                                bool containable = (andOp1->TypeGet() == op1->TypeGet());
+                                if (!containable)
+                                {
+                                    ssize_t newIconVal = 0;
+
+                                    switch (andOp1->TypeGet())
+                                    {
+                                        default:
+                                            break;
+                                        case TYP_BYTE:
+                                            newIconVal  = (signed char)andOp2CnsVal;
+                                            containable = FitsIn<signed char>(andOp2CnsVal);
+                                            break;
+                                        case TYP_BOOL:
+                                        case TYP_UBYTE:
+                                            newIconVal  = andOp2CnsVal & 0xFF;
+                                            containable = true;
+                                            break;
+                                        case TYP_SHORT:
+                                            newIconVal  = (signed short)andOp2CnsVal;
+                                            containable = FitsIn<signed short>(andOp2CnsVal);
+                                            break;
+                                        case TYP_CHAR:
+                                            newIconVal  = andOp2CnsVal & 0xFFFF;
+                                            containable = true;
+                                            break;
+                                        case TYP_INT:
+                                            newIconVal  = (INT32)andOp2CnsVal;
+                                            containable = FitsIn<INT32>(andOp2CnsVal);
+                                            break;
+                                        case TYP_UINT:
+                                            newIconVal  = andOp2CnsVal & 0xFFFFFFFF;
+                                            containable = true;
+                                            break;
+
+#ifdef _TARGET_64BIT_
+                                        case TYP_LONG:
+                                            newIconVal  = (INT64)andOp2CnsVal;
+                                            containable = true;
+                                            break;
+                                        case TYP_ULONG:
+                                            newIconVal  = (UINT64)andOp2CnsVal;
+                                            containable = true;
+                                            break;
+#endif //_TARGET_64BIT_
+                                    }
+
+                                    if (containable)
+                                    {
+                                        andOp2->gtType = andOp1->TypeGet();
+                                        andOp2->AsIntConCommon()->SetIconValue(newIconVal);
+                                    }
+                                }
+
+                                // Mark the 'andOp1' memory operand as contained
+                                // Note that for equality comparisons we don't need
+                                // to deal with any signed or unsigned issues.
+                                if (containable)
+                                {
+                                    MakeSrcContained(op1, andOp1);
+                                }
+                            }
+                            // Mark the 'op1' (the GT_AND) operand as contained
+                            MakeSrcContained(tree, op1);
+                            op1IsMadeContained = true;
+
+                            // During Codegen we will now generate "test andOp1, andOp2CnsVal"
+                        }
+                    }
+                }
+                else if (op1->OperGet() == GT_CAST)
+                {
+                    // If the op1 is a cast operation, and cast type is one byte sized unsigned type,
+                    // we can directly use the number in register, instead of doing an extra cast step.
+                    var_types  dstType       = op1->CastToType();
+                    bool       isUnsignedDst = varTypeIsUnsigned(dstType);
+                    emitAttr   castSize      = EA_ATTR(genTypeSize(dstType));
+                    GenTreePtr castOp1       = op1->gtOp.gtOp1;
+                    genTreeOps castOp1Oper   = castOp1->OperGet();
+                    bool       safeOper      = false;
+
+                    // It is not always safe to change the gtType of 'castOp1' to TYP_UBYTE
+                    // For example when 'castOp1Oper' is a GT_RSZ or GT_RSH then we are shifting
+                    // bits from the left into the lower bits.  If we change the type to a TYP_UBYTE
+                    // we will instead generate a byte sized shift operation:  shr  al, 24
+                    // For the following ALU operations is it safe to change the gtType to the
+                    // smaller type:
+                    //
+                    if ((castOp1Oper == GT_CNS_INT) || (castOp1Oper == GT_CALL) || // the return value from a Call
+                        (castOp1Oper == GT_LCL_VAR) || castOp1->OperIsLogical() || // GT_AND, GT_OR, GT_XOR
+                        castOp1->isMemoryOp())                                     // isIndir() || isLclField();
+                    {
+                        safeOper = true;
+                    }
+
+                    if ((castSize == EA_1BYTE) && isUnsignedDst && // Unsigned cast to TYP_UBYTE
+                        safeOper &&                                // Must be a safe operation
+                        !op1->gtOverflow())                        // Must not be an overflow checking cast
+                    {
+                        // Currently all of the Oper accepted as 'safeOper' are
+                        // non-overflow checking operations.  If we were to add
+                        // an overflow checking operation then this assert needs
+                        // to be moved above to guard entry to this block.
+                        //
+                        assert(!castOp1->gtOverflowEx()); // Must not be an overflow checking operation
+
+                        GenTreePtr removeTreeNode = op1;
+                        tree->gtOp.gtOp1          = castOp1;
+                        op1                       = castOp1;
+                        castOp1->gtType           = TYP_UBYTE;
+
+                        // trim down the value if castOp1 is an int constant since its type changed to UBYTE.
+                        if (castOp1Oper == GT_CNS_INT)
+                        {
+                            castOp1->gtIntCon.gtIconVal = (UINT8)castOp1->gtIntCon.gtIconVal;
+                        }
+
+                        if (op2->isContainedIntOrIImmed())
+                        {
+                            ssize_t val = (ssize_t)op2->AsIntConCommon()->IconValue();
+                            if (val >= 0 && val <= 255)
+                            {
+                                op2->gtType = TYP_UBYTE;
+                                tree->gtFlags |= GTF_UNSIGNED;
+
+                                // right now the op1's type is the same as op2's type.
+                                // if op1 is MemoryOp, we should make the op1 as contained node.
+                                if (castOp1->isMemoryOp())
+                                {
+                                    MakeSrcContained(tree, op1);
+                                    op1IsMadeContained = true;
+                                }
+                            }
+                        }
+
+                        BlockRange().Remove(removeTreeNode);
+#ifdef DEBUG
+                        if (comp->verbose)
+                        {
+                            printf("LowerCmp: Removing a GT_CAST to TYP_UBYTE and changing castOp1->gtType to "
+                                   "TYP_UBYTE\n");
+                            comp->gtDispTreeRange(BlockRange(), tree);
+                        }
+#endif
+                    }
+                }
+
+                // If not made contained, op1 can be marked as reg-optional.
+                if (!op1IsMadeContained)
+                {
+                    SetRegOptional(op1);
+                }
+            }
+        }
+    }
+    else if (op1Type == op2Type)
+    {
+        if (op2->isMemoryOp())
+        {
+            MakeSrcContained(tree, op2);
+        }
+        else if (op1->isMemoryOp() && IsSafeToContainMem(tree, op1))
+        {
+            MakeSrcContained(tree, op1);
+        }
+        else
+        {
+            // One of op1 or op2 could be marked as reg optional
+            // to indicate that codgen can still generate code
+            // if one of them is on stack.
+            SetRegOptional(PreferredRegOptionalOperand(tree));
+        }
+
+        if (varTypeIsSmall(op1Type) && varTypeIsUnsigned(op1Type))
+        {
+            // Mark the tree as doing unsigned comparison if
+            // both the operands are small and unsigned types.
+            // Otherwise we will end up performing a signed comparison
+            // of two small unsigned values without zero extending them to
+            // TYP_INT size and which is incorrect.
+            tree->gtFlags |= GTF_UNSIGNED;
+        }
+    }
+}
+
+/* Lower GT_CAST(srcType, DstType) nodes.
+ *
+ * Casts from small int type to float/double are transformed as follows:
+ * GT_CAST(byte, float/double)     =   GT_CAST(GT_CAST(byte, int32), float/double)
+ * GT_CAST(sbyte, float/double)    =   GT_CAST(GT_CAST(sbyte, int32), float/double)
+ * GT_CAST(int16, float/double)    =   GT_CAST(GT_CAST(int16, int32), float/double)
+ * GT_CAST(uint16, float/double)   =   GT_CAST(GT_CAST(uint16, int32), float/double)
+ *
+ * SSE2 conversion instructions operate on signed integers. casts from Uint32/Uint64
+ * are morphed as follows by front-end and hence should not be seen here.
+ * GT_CAST(uint32, float/double)   =   GT_CAST(GT_CAST(uint32, long), float/double)
+ * GT_CAST(uint64, float)          =   GT_CAST(GT_CAST(uint64, double), float)
+ *
+ *
+ * Similarly casts from float/double to a smaller int type are transformed as follows:
+ * GT_CAST(float/double, byte)     =   GT_CAST(GT_CAST(float/double, int32), byte)
+ * GT_CAST(float/double, sbyte)    =   GT_CAST(GT_CAST(float/double, int32), sbyte)
+ * GT_CAST(float/double, int16)    =   GT_CAST(GT_CAST(double/double, int32), int16)
+ * GT_CAST(float/double, uint16)   =   GT_CAST(GT_CAST(double/double, int32), uint16)
+ *
+ * SSE2 has instructions to convert a float/double vlaue into a signed 32/64-bit
+ * integer.  The above transformations help us to leverage those instructions.
+ *
+ * Note that for the following conversions we still depend on helper calls and
+ * don't expect to see them here.
+ *  i) GT_CAST(float/double, uint64)
+ * ii) GT_CAST(float/double, int type with overflow detection)
+ *
+ * TODO-XArch-CQ: (Low-pri): Jit64 generates in-line code of 8 instructions for (i) above.
+ * There are hardly any occurrences of this conversion operation in platform
+ * assemblies or in CQ perf benchmarks (1 occurrence in mscorlib, microsoft.jscript,
+ * 1 occurence in Roslyn and no occurrences in system, system.core, system.numerics
+ * system.windows.forms, scimark, fractals, bio mums). If we ever find evidence that
+ * doing this optimization is a win, should consider generating in-lined code.
+ */
+void Lowering::LowerCast(GenTree* tree)
+{
+    assert(tree->OperGet() == GT_CAST);
+
+    GenTreePtr op1     = tree->gtOp.gtOp1;
+    var_types  dstType = tree->CastToType();
+    var_types  srcType = op1->TypeGet();
+    var_types  tmpType = TYP_UNDEF;
+    bool       srcUns  = false;
+
+    // force the srcType to unsigned if GT_UNSIGNED flag is set
+    if (tree->gtFlags & GTF_UNSIGNED)
+    {
+        srcType = genUnsignedType(srcType);
+    }
+
+    // We should never see the following casts as they are expected to be lowered
+    // apropriately or converted into helper calls by front-end.
+    //   srcType = float/double                    dstType = * and overflow detecting cast
+    //       Reason: must be converted to a helper call
+    //   srcType = float/double,                   dstType = ulong
+    //       Reason: must be converted to a helper call
+    //   srcType = uint                            dstType = float/double
+    //       Reason: uint -> float/double = uint -> long -> float/double
+    //   srcType = ulong                           dstType = float
+    //       Reason: ulong -> float = ulong -> double -> float
+    if (varTypeIsFloating(srcType))
+    {
+        noway_assert(!tree->gtOverflow());
+        noway_assert(dstType != TYP_ULONG);
+    }
+    else if (srcType == TYP_UINT)
+    {
+        noway_assert(!varTypeIsFloating(dstType));
+    }
+    else if (srcType == TYP_ULONG)
+    {
+        noway_assert(dstType != TYP_FLOAT);
+    }
+
+    // Case of src is a small type and dst is a floating point type.
+    if (varTypeIsSmall(srcType) && varTypeIsFloating(dstType))
+    {
+        // These conversions can never be overflow detecting ones.
+        noway_assert(!tree->gtOverflow());
+        tmpType = TYP_INT;
+    }
+    // case of src is a floating point type and dst is a small type.
+    else if (varTypeIsFloating(srcType) && varTypeIsSmall(dstType))
+    {
+        tmpType = TYP_INT;
+    }
+
+    if (tmpType != TYP_UNDEF)
+    {
+        GenTreePtr tmp = comp->gtNewCastNode(tmpType, op1, tmpType);
+        tmp->gtFlags |= (tree->gtFlags & (GTF_UNSIGNED | GTF_OVERFLOW | GTF_EXCEPT));
+
+        tree->gtFlags &= ~GTF_UNSIGNED;
+        tree->gtOp.gtOp1 = tmp;
+        BlockRange().InsertAfter(op1, tmp);
+    }
+}
+
+//----------------------------------------------------------------------------------------------
+// Lowering::IsRMWIndirCandidate:
+//    Returns true if the given operand is a candidate indirection for a read-modify-write
+//    operator.
+//
+//  Arguments:
+//     operand - The operand to consider.
+//     storeInd - The indirect store that roots the possible RMW operator.
+//
+bool Lowering::IsRMWIndirCandidate(GenTree* operand, GenTree* storeInd)
+{
+    // If the operand isn't an indirection, it's trivially not a candidate.
+    if (operand->OperGet() != GT_IND)
+    {
+        return false;
+    }
+
+    // If the indirection's source address isn't equivalent to the destination address of the storeIndir, then the
+    // indirection is not a candidate.
+    GenTree* srcAddr = operand->gtGetOp1();
+    GenTree* dstAddr = storeInd->gtGetOp1();
+    if ((srcAddr->OperGet() != dstAddr->OperGet()) || !IndirsAreEquivalent(operand, storeInd))
+    {
+        return false;
+    }
+
+    // If it is not safe to contain the entire tree rooted at the indirection, then the indirection is not a
+    // candidate. Crawl the IR from the node immediately preceding the storeIndir until the last node in the
+    // indirection's tree is visited and check the side effects at each point.
+
+    m_scratchSideEffects.Clear();
+
+    assert((operand->gtLIRFlags & LIR::Flags::Mark) == 0);
+    operand->gtLIRFlags |= LIR::Flags::Mark;
+
+    unsigned markCount = 1;
+    GenTree* node;
+    for (node = storeInd->gtPrev; markCount > 0; node = node->gtPrev)
+    {
+        assert(node != nullptr);
+
+        if ((node->gtLIRFlags & LIR::Flags::Mark) == 0)
+        {
+            m_scratchSideEffects.AddNode(comp, node);
+        }
+        else
+        {
+            node->gtLIRFlags &= ~LIR::Flags::Mark;
+            markCount--;
+
+            if (m_scratchSideEffects.InterferesWith(comp, node, false))
+            {
+                // The indirection's tree contains some node that can't be moved to the storeInder. The indirection is
+                // not a candidate. Clear any leftover mark bits and return.
+                for (; markCount > 0; node = node->gtPrev)
+                {
+                    if ((node->gtLIRFlags & LIR::Flags::Mark) != 0)
+                    {
+                        node->gtLIRFlags &= ~LIR::Flags::Mark;
+                        markCount--;
+                    }
+                }
+                return false;
+            }
+
+            for (GenTree* nodeOperand : node->Operands())
+            {
+                assert((nodeOperand->gtLIRFlags & LIR::Flags::Mark) == 0);
+                nodeOperand->gtLIRFlags |= LIR::Flags::Mark;
+                markCount++;
+            }
+        }
+    }
+
+    // At this point we've verified that the operand is an indirection, its address is equivalent to the storeIndir's
+    // destination address, and that it and the transitive closure of its operand can be safely contained by the
+    // storeIndir. This indirection is therefore a candidate for an RMW op.
+    return true;
+}
+
+//----------------------------------------------------------------------------------------------
+// Returns true if this tree is bin-op of a GT_STOREIND of the following form
+//      storeInd(subTreeA, binOp(gtInd(subTreeA), subtreeB)) or
+//      storeInd(subTreeA, binOp(subtreeB, gtInd(subTreeA)) in case of commutative bin-ops
+//
+// The above form for storeInd represents a read-modify-write memory binary operation.
+//
+// Parameters
+//     tree   -   GentreePtr of binOp
+//
+// Return Value
+//     True if 'tree' is part of a RMW memory operation pattern
+//
+bool Lowering::IsBinOpInRMWStoreInd(GenTreePtr tree)
+{
+    // Must be a non floating-point type binary operator since SSE2 doesn't support RMW memory ops
+    assert(!varTypeIsFloating(tree));
+    assert(GenTree::OperIsBinary(tree->OperGet()));
+
+    // Cheap bail out check before more expensive checks are performed.
+    // RMW memory op pattern requires that one of the operands of binOp to be GT_IND.
+    if (tree->gtGetOp1()->OperGet() != GT_IND && tree->gtGetOp2()->OperGet() != GT_IND)
+    {
+        return false;
+    }
+
+    LIR::Use use;
+    if (!BlockRange().TryGetUse(tree, &use) || use.User()->OperGet() != GT_STOREIND || use.User()->gtGetOp2() != tree)
+    {
+        return false;
+    }
+
+    // Since it is not relatively cheap to recognize RMW memory op pattern, we
+    // cache the result in GT_STOREIND node so that while lowering GT_STOREIND
+    // we can use the result.
+    GenTreePtr indirCandidate = nullptr;
+    GenTreePtr indirOpSource  = nullptr;
+    return IsRMWMemOpRootedAtStoreInd(use.User(), &indirCandidate, &indirOpSource);
+}
+
+//----------------------------------------------------------------------------------------------
+// This method recognizes the case where we have a treeNode with the following structure:
+//         storeInd(IndirDst, binOp(gtInd(IndirDst), indirOpSource)) OR
+//         storeInd(IndirDst, binOp(indirOpSource, gtInd(IndirDst)) in case of commutative operations OR
+//         storeInd(IndirDst, unaryOp(gtInd(IndirDst)) in case of unary operations
+//
+// Terminology:
+//         indirDst = memory write of an addr mode  (i.e. storeind destination)
+//         indirSrc = value being written to memory (i.e. storeind source which could either be a binary or unary op)
+//         indirCandidate = memory read i.e. a gtInd of an addr mode
+//         indirOpSource = source operand used in binary/unary op (i.e. source operand of indirSrc node)
+//
+// In x86/x64 this storeInd pattern can be effectively encoded in a single instruction of the
+// following form in case of integer operations:
+//         binOp [addressing mode], RegIndirOpSource
+//         binOp [addressing mode], immediateVal
+// where RegIndirOpSource is the register where indirOpSource was computed.
+//
+// Right now, we recognize few cases:
+//     a) The gtInd child is a lea/lclVar/lclVarAddr/clsVarAddr/constant
+//     b) BinOp is either add, sub, xor, or, and, shl, rsh, rsz.
+//     c) unaryOp is either not/neg
+//
+// Implementation Note: The following routines need to be in sync for RMW memory op optimization
+// to be correct and functional.
+//     IndirsAreEquivalent()
+//     NodesAreEquivalentLeaves()
+//     Codegen of GT_STOREIND and genCodeForShiftRMW()
+//     emitInsRMW()
+//
+//  TODO-CQ: Enable support for more complex indirections (if needed) or use the value numbering
+//  package to perform more complex tree recognition.
+//
+//  TODO-XArch-CQ: Add support for RMW of lcl fields (e.g. lclfield binop= source)
+//
+//  Parameters:
+//     tree               -  GT_STOREIND node
+//     outIndirCandidate  -  out param set to indirCandidate as described above
+//     ouutIndirOpSource  -  out param set to indirOpSource as described above
+//
+//  Return value
+//     True if there is a RMW memory operation rooted at a GT_STOREIND tree
+//     and out params indirCandidate and indirOpSource are set to non-null values.
+//     Otherwise, returns false with indirCandidate and indirOpSource set to null.
+//     Also updates flags of GT_STOREIND tree with its RMW status.
+//
+bool Lowering::IsRMWMemOpRootedAtStoreInd(GenTreePtr tree, GenTreePtr* outIndirCandidate, GenTreePtr* outIndirOpSource)
+{
+    assert(!varTypeIsFloating(tree));
+    assert(outIndirCandidate != nullptr);
+    assert(outIndirOpSource != nullptr);
+
+    *outIndirCandidate = nullptr;
+    *outIndirOpSource  = nullptr;
+
+    // Early out if storeInd is already known to be a non-RMW memory op
+    GenTreeStoreInd* storeInd = tree->AsStoreInd();
+    if (storeInd->IsNonRMWMemoryOp())
+    {
+        return false;
+    }
+
+    GenTreePtr indirDst = storeInd->gtGetOp1();
+    GenTreePtr indirSrc = storeInd->gtGetOp2();
+    genTreeOps oper     = indirSrc->OperGet();
+
+    // Early out if it is already known to be a RMW memory op
+    if (storeInd->IsRMWMemoryOp())
+    {
+        if (GenTree::OperIsBinary(oper))
+        {
+            if (storeInd->IsRMWDstOp1())
+            {
+                *outIndirCandidate = indirSrc->gtGetOp1();
+                *outIndirOpSource  = indirSrc->gtGetOp2();
+            }
+            else
+            {
+                assert(storeInd->IsRMWDstOp2());
+                *outIndirCandidate = indirSrc->gtGetOp2();
+                *outIndirOpSource  = indirSrc->gtGetOp1();
+            }
+            assert(IndirsAreEquivalent(*outIndirCandidate, storeInd));
+        }
+        else
+        {
+            assert(GenTree::OperIsUnary(oper));
+            assert(IndirsAreEquivalent(indirSrc->gtGetOp1(), storeInd));
+            *outIndirCandidate = indirSrc->gtGetOp1();
+            *outIndirOpSource  = indirSrc->gtGetOp1();
+        }
+
+        return true;
+    }
+
+    // If reached here means that we do not know RMW status of tree rooted at storeInd
+    assert(storeInd->IsRMWStatusUnknown());
+
+    // Early out if indirDst is not one of the supported memory operands.
+    if (indirDst->OperGet() != GT_LEA && indirDst->OperGet() != GT_LCL_VAR && indirDst->OperGet() != GT_LCL_VAR_ADDR &&
+        indirDst->OperGet() != GT_CLS_VAR_ADDR && indirDst->OperGet() != GT_CNS_INT)
+    {
+        storeInd->SetRMWStatus(STOREIND_RMW_UNSUPPORTED_ADDR);
+        return false;
+    }
+
+    // We can not use Read-Modify-Write instruction forms with overflow checking instructions
+    // because we are not allowed to modify the target until after the overflow check.
+    if (indirSrc->gtOverflowEx())
+    {
+        storeInd->SetRMWStatus(STOREIND_RMW_UNSUPPORTED_OPER);
+        return false;
+    }
+
+    // At this point we can match one of two patterns:
+    //
+    //     t_ind = indir t_addr_0
+    //       ...
+    //     t_value = binop t_ind, t_other
+    //       ...
+    //     storeIndir t_addr_1, t_value
+    //
+    // or
+    //
+    //     t_ind = indir t_addr_0
+    //       ...
+    //     t_value = unop t_ind
+    //       ...
+    //     storeIndir t_addr_1, t_value
+    //
+    // In all cases, we will eventually make the binop that produces t_value and the entire dataflow tree rooted at
+    // t_ind contained by t_value.
+
+    GenTree*  indirCandidate = nullptr;
+    GenTree*  indirOpSource  = nullptr;
+    RMWStatus status         = STOREIND_RMW_STATUS_UNKNOWN;
+    if (GenTree::OperIsBinary(oper))
+    {
+        // Return if binary op is not one of the supported operations for RMW of memory.
+        if (oper != GT_ADD && oper != GT_SUB && oper != GT_AND && oper != GT_OR && oper != GT_XOR &&
+            !GenTree::OperIsShiftOrRotate(oper))
+        {
+            storeInd->SetRMWStatus(STOREIND_RMW_UNSUPPORTED_OPER);
+            return false;
+        }
+
+        if (GenTree::OperIsShiftOrRotate(oper) && varTypeIsSmall(storeInd))
+        {
+            // In ldind, Integer values smaller than 4 bytes, a boolean, or a character converted to 4 bytes
+            // by sign or zero-extension as appropriate. If we directly shift the short type data using sar, we
+            // will lose the sign or zero-extension bits.
+            storeInd->SetRMWStatus(STOREIND_RMW_UNSUPPORTED_TYPE);
+            return false;
+        }
+
+        // In the common case, the second operand to the binop will be the indir candidate.
+        GenTreeOp* binOp = indirSrc->AsOp();
+        if (GenTree::OperIsCommutative(oper) && IsRMWIndirCandidate(binOp->gtOp2, storeInd))
+        {
+            indirCandidate = binOp->gtOp2;
+            indirOpSource  = binOp->gtOp1;
+            status         = STOREIND_RMW_DST_IS_OP2;
+        }
+        else if (IsRMWIndirCandidate(binOp->gtOp1, storeInd))
+        {
+            indirCandidate = binOp->gtOp1;
+            indirOpSource  = binOp->gtOp2;
+            status         = STOREIND_RMW_DST_IS_OP1;
+        }
+        else
+        {
+            storeInd->SetRMWStatus(STOREIND_RMW_UNSUPPORTED_ADDR);
+            return false;
+        }
+    }
+    else if (GenTree::OperIsUnary(oper))
+    {
+        // Nodes other than GT_NOT and GT_NEG are not yet supported.
+        if (oper != GT_NOT && oper != GT_NEG)
+        {
+            storeInd->SetRMWStatus(STOREIND_RMW_UNSUPPORTED_OPER);
+            return false;
+        }
+
+        if (indirSrc->gtGetOp1()->OperGet() != GT_IND)
+        {
+            storeInd->SetRMWStatus(STOREIND_RMW_UNSUPPORTED_ADDR);
+            return false;
+        }
+
+        GenTreeUnOp* unOp = indirSrc->AsUnOp();
+        if (IsRMWIndirCandidate(unOp->gtOp1, storeInd))
+        {
+            // src and dest are the same in case of unary ops
+            indirCandidate = unOp->gtOp1;
+            indirOpSource  = unOp->gtOp1;
+            status         = STOREIND_RMW_DST_IS_OP1;
+        }
+        else
+        {
+            storeInd->SetRMWStatus(STOREIND_RMW_UNSUPPORTED_ADDR);
+            return false;
+        }
+    }
+    else
+    {
+        storeInd->SetRMWStatus(STOREIND_RMW_UNSUPPORTED_OPER);
+        return false;
+    }
+
+    // By this point we've verified that we have a supported operand with a supported address. Now we need to ensure
+    // that we're able to move the destination address for the source indirection forwards.
+    if (!IsSafeToContainMem(storeInd, indirDst))
+    {
+        storeInd->SetRMWStatus(STOREIND_RMW_UNSUPPORTED_ADDR);
+        return false;
+    }
+
+    assert(indirCandidate != nullptr);
+    assert(indirOpSource != nullptr);
+    assert(status != STOREIND_RMW_STATUS_UNKNOWN);
+
+    *outIndirCandidate = indirCandidate;
+    *outIndirOpSource  = indirOpSource;
+    storeInd->SetRMWStatus(status);
+    return true;
+}
+
+//--------------------------------------------------------------------------------------------
+// SetStoreIndOpCountsIfRMWMemOp checks to see if there is a RMW memory operation rooted at
+// GT_STOREIND node and if so will mark register requirements for nodes under storeInd so
+// that CodeGen will generate a single instruction of the form:
+//
+//         binOp [addressing mode], reg
+//
+// Parameters
+//         storeInd   - GT_STOREIND node
+//
+// Return value
+//         True, if RMW memory op tree pattern is recognized and op counts are set.
+//         False otherwise.
+//
+bool Lowering::SetStoreIndOpCountsIfRMWMemOp(GenTreePtr storeInd)
+{
+    assert(storeInd->OperGet() == GT_STOREIND);
+
+    // SSE2 doesn't support RMW on float values
+    assert(!varTypeIsFloating(storeInd));
+
+    // Terminology:
+    // indirDst = memory write of an addr mode  (i.e. storeind destination)
+    // indirSrc = value being written to memory (i.e. storeind source which could a binary/unary op)
+    // indirCandidate = memory read i.e. a gtInd of an addr mode
+    // indirOpSource = source operand used in binary/unary op (i.e. source operand of indirSrc node)
+
+    GenTreePtr indirCandidate = nullptr;
+    GenTreePtr indirOpSource  = nullptr;
+
+    if (!IsRMWMemOpRootedAtStoreInd(storeInd, &indirCandidate, &indirOpSource))
+    {
+        JITDUMP("Lower of StoreInd didn't mark the node as self contained for reason: %d\n",
+                storeInd->AsStoreInd()->GetRMWStatus());
+        DISPTREERANGE(BlockRange(), storeInd);
+        return false;
+    }
+
+    GenTreePtr indirDst = storeInd->gtGetOp1();
+    GenTreePtr indirSrc = storeInd->gtGetOp2();
+    genTreeOps oper     = indirSrc->OperGet();
+
+    // At this point we have successfully detected a RMW memory op of one of the following forms
+    //         storeInd(indirDst, indirSrc(indirCandidate, indirOpSource)) OR
+    //         storeInd(indirDst, indirSrc(indirOpSource, indirCandidate) in case of commutative operations OR
+    //         storeInd(indirDst, indirSrc(indirCandidate) in case of unary operations
+    //
+    // Here indirSrc = one of the supported binary or unary operation for RMW of memory
+    //      indirCandidate = a GT_IND node
+    //      indirCandidateChild = operand of GT_IND indirCandidate
+    //
+    // The logic below essentially does the following
+    //      set storeInd src count to that of the dst count of indirOpSource
+    //      clear operand counts on indirSrc  (i.e. marked as contained and storeInd will generate code for it)
+    //      clear operand counts on indirCandidate
+    //      clear operand counts on indirDst except when it is a GT_LCL_VAR or GT_CNS_INT that doesn't fit within addr
+    //      base
+    //      Increment src count of storeInd to account for the registers required to form indirDst addr mode
+    //      clear operand counts on indirCandidateChild
+
+    TreeNodeInfo* info = &(storeInd->gtLsraInfo);
+    info->dstCount     = 0;
+
+    if (GenTree::OperIsBinary(oper))
+    {
+        // On Xarch RMW operations require that the source memory-op be in a register.
+        assert(!indirOpSource->isMemoryOp() || indirOpSource->gtLsraInfo.dstCount == 1);
+        JITDUMP("Lower succesfully detected an assignment of the form: *addrMode BinOp= source\n");
+        info->srcCount = indirOpSource->gtLsraInfo.dstCount;
+    }
+    else
+    {
+        assert(GenTree::OperIsUnary(oper));
+        JITDUMP("Lower succesfully detected an assignment of the form: *addrMode = UnaryOp(*addrMode)\n");
+        info->srcCount = 0;
+    }
+    DISPTREERANGE(BlockRange(), storeInd);
+
+    m_lsra->clearOperandCounts(indirSrc);
+    m_lsra->clearOperandCounts(indirCandidate);
+
+    GenTreePtr indirCandidateChild = indirCandidate->gtGetOp1();
+    if (indirCandidateChild->OperGet() == GT_LEA)
+    {
+        GenTreeAddrMode* addrMode = indirCandidateChild->AsAddrMode();
+
+        if (addrMode->HasBase())
+        {
+            assert(addrMode->Base()->OperIsLeaf());
+            m_lsra->clearOperandCounts(addrMode->Base());
+            info->srcCount++;
+        }
+
+        if (addrMode->HasIndex())
+        {
+            assert(addrMode->Index()->OperIsLeaf());
+            m_lsra->clearOperandCounts(addrMode->Index());
+            info->srcCount++;
+        }
+
+        m_lsra->clearOperandCounts(indirDst);
+    }
+    else
+    {
+        assert(indirCandidateChild->OperGet() == GT_LCL_VAR || indirCandidateChild->OperGet() == GT_LCL_VAR_ADDR ||
+               indirCandidateChild->OperGet() == GT_CLS_VAR_ADDR || indirCandidateChild->OperGet() == GT_CNS_INT);
+
+        // If it is a GT_LCL_VAR, it still needs the reg to hold the address.
+        // We would still need a reg for GT_CNS_INT if it doesn't fit within addressing mode base.
+        // For GT_CLS_VAR_ADDR, we don't need a reg to hold the address, because field address value is known at jit
+        // time. Also, we don't need a reg for GT_CLS_VAR_ADDR.
+        if (indirCandidateChild->OperGet() == GT_LCL_VAR_ADDR || indirCandidateChild->OperGet() == GT_CLS_VAR_ADDR)
+        {
+            m_lsra->clearOperandCounts(indirDst);
+        }
+        else if (indirCandidateChild->IsCnsIntOrI() && indirCandidateChild->AsIntConCommon()->FitsInAddrBase(comp))
+        {
+            m_lsra->clearOperandCounts(indirDst);
+        }
+        else
+        {
+            // Need a reg and hence increment src count of storeind
+            info->srcCount += indirCandidateChild->gtLsraInfo.dstCount;
+        }
+    }
+    m_lsra->clearOperandCounts(indirCandidateChild);
+
+    return true;
+}
+
+/**
+ * Takes care of annotating the src and dst register
+ * requirements for a GT_MUL treenode.
+ */
+void Lowering::SetMulOpCounts(GenTreePtr tree)
+{
+    assert(tree->OperGet() == GT_MUL || tree->OperGet() == GT_MULHI);
+
+    TreeNodeInfo* info = &(tree->gtLsraInfo);
+
+    info->srcCount = 2;
+    info->dstCount = 1;
+
+    GenTreePtr op1 = tree->gtOp.gtOp1;
+    GenTreePtr op2 = tree->gtOp.gtOp2;
+
+    // Case of float/double mul.
+    if (varTypeIsFloating(tree->TypeGet()))
+    {
+        assert(tree->OperGet() == GT_MUL);
+
+        if (op2->isMemoryOp() || op2->IsCnsNonZeroFltOrDbl())
+        {
+            MakeSrcContained(tree, op2);
+        }
+        else if (op1->IsCnsNonZeroFltOrDbl() || (op1->isMemoryOp() && IsSafeToContainMem(tree, op1)))
+        {
+            // Since  GT_MUL is commutative, we will try to re-order operands if it is safe to
+            // generate more efficient code sequence for the case of GT_MUL(op1=memOp, op2=non-memOp)
+            MakeSrcContained(tree, op1);
+        }
+        else
+        {
+            // If there are no containable operands, we can make an operand reg optional.
+            SetRegOptionalForBinOp(tree);
+        }
+        return;
+    }
+
+    bool       isUnsignedMultiply    = ((tree->gtFlags & GTF_UNSIGNED) != 0);
+    bool       requiresOverflowCheck = tree->gtOverflowEx();
+    bool       useLeaEncoding        = false;
+    GenTreePtr memOp                 = nullptr;
+
+    bool                 hasImpliedFirstOperand = false;
+    GenTreeIntConCommon* imm                    = nullptr;
+    GenTreePtr           other                  = nullptr;
+
+    // There are three forms of x86 multiply:
+    // one-op form:     RDX:RAX = RAX * r/m
+    // two-op form:     reg *= r/m
+    // three-op form:   reg = r/m * imm
+
+    // This special widening 32x32->64 MUL is not used on x64
+    assert((tree->gtFlags & GTF_MUL_64RSLT) == 0);
+
+    // Multiply should never be using small types
+    assert(!varTypeIsSmall(tree->TypeGet()));
+
+    // We do use the widening multiply to implement
+    // the overflow checking for unsigned multiply
+    //
+    if (isUnsignedMultiply && requiresOverflowCheck)
+    {
+        // The only encoding provided is RDX:RAX = RAX * rm
+        //
+        // Here we set RAX as the only destination candidate
+        // In LSRA we set the kill set for this operation to RBM_RAX|RBM_RDX
+        //
+        info->setDstCandidates(m_lsra, RBM_RAX);
+        hasImpliedFirstOperand = true;
+    }
+    else if (tree->gtOper == GT_MULHI)
+    {
+        // have to use the encoding:RDX:RAX = RAX * rm
+        info->setDstCandidates(m_lsra, RBM_RAX);
+        hasImpliedFirstOperand = true;
+    }
+    else if (IsContainableImmed(tree, op2) || IsContainableImmed(tree, op1))
+    {
+        if (IsContainableImmed(tree, op2))
+        {
+            imm   = op2->AsIntConCommon();
+            other = op1;
+        }
+        else
+        {
+            imm   = op1->AsIntConCommon();
+            other = op2;
+        }
+
+        // CQ: We want to rewrite this into a LEA
+        ssize_t immVal = imm->AsIntConCommon()->IconValue();
+        if (!requiresOverflowCheck && (immVal == 3 || immVal == 5 || immVal == 9))
+        {
+            useLeaEncoding = true;
+        }
+
+        MakeSrcContained(tree, imm); // The imm is always contained
+        if (other->isMemoryOp())
+        {
+            memOp = other; // memOp may be contained below
+        }
+    }
+
+    // We allow one operand to be a contained memory operand.
+    // The memory op type must match with the 'tree' type.
+    // This is because during codegen we use 'tree' type to derive EmitTypeSize.
+    // E.g op1 type = byte, op2 type = byte but GT_MUL tree type is int.
+    //
+    if (memOp == nullptr && op2->isMemoryOp())
+    {
+        memOp = op2;
+    }
+
+    // To generate an LEA we need to force memOp into a register
+    // so don't allow memOp to be 'contained'
+    //
+    if (!useLeaEncoding)
+    {
+        if ((memOp != nullptr) && (memOp->TypeGet() == tree->TypeGet()) && IsSafeToContainMem(tree, memOp))
+        {
+            MakeSrcContained(tree, memOp);
+        }
+        else if (imm != nullptr)
+        {
+            // Has a contained immediate operand.
+            // Only 'other' operand can be marked as reg optional.
+            assert(other != nullptr);
+            SetRegOptional(other);
+        }
+        else if (hasImpliedFirstOperand)
+        {
+            // Only op2 can be marke as reg optional.
+            SetRegOptional(op2);
+        }
+        else
+        {
+            // If there are no containable operands, we can make either of op1 or op2
+            // as reg optional.
+            SetRegOptionalForBinOp(tree);
+        }
+    }
+}
+
+//------------------------------------------------------------------------------
+// isRMWRegOper: Can this binary tree node be used in a Read-Modify-Write format
+//
+// Arguments:
+//    tree      - a binary tree node
+//
+// Return Value:
+//    Returns true if we can use the read-modify-write instruction form
+//
+// Notes:
+//    This is used to determine whether to preference the source to the destination register.
+//
+bool Lowering::isRMWRegOper(GenTreePtr tree)
+{
+    // TODO-XArch-CQ: Make this more accurate.
+    // For now, We assume that most binary operators are of the RMW form.
+    assert(tree->OperIsBinary());
+
+    if (tree->OperIsCompare())
+    {
+        return false;
+    }
+
+    switch (tree->OperGet())
+    {
+        // These Opers either support a three op form (i.e. GT_LEA), or do not read/write their first operand
+        case GT_LEA:
+        case GT_STOREIND:
+        case GT_ARR_INDEX:
+        case GT_STORE_BLK:
+        case GT_STORE_OBJ:
+            return false;
+
+        // x86/x64 does support a three op multiply when op2|op1 is a contained immediate
+        case GT_MUL:
+            return (!IsContainableImmed(tree, tree->gtOp.gtOp2) && !IsContainableImmed(tree, tree->gtOp.gtOp1));
+
+        default:
+            return true;
+    }
+}
+
+// anything is in range for AMD64
+bool Lowering::IsCallTargetInRange(void* addr)
+{
+    return true;
+}
+
+// return true if the immediate can be folded into an instruction, for example small enough and non-relocatable
+bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode)
+{
+    if (!childNode->IsIntCnsFitsInI32())
+    {
+        return false;
+    }
+
+    // At this point we know that it is an int const fits within 4-bytes and hence can safely cast to IntConCommon.
+    // Icons that need relocation should never be marked as contained immed
+    if (childNode->AsIntConCommon()->ImmedValNeedsReloc(comp))
+    {
+        return false;
+    }
+
+    return true;
+}
+
+//-----------------------------------------------------------------------
+// PreferredRegOptionalOperand: returns one of the operands of given
+// binary oper that is to be preferred for marking as reg optional.
+//
+// Since only one of op1 or op2 can be a memory operand on xarch, only
+// one of  them have to be marked as reg optional.  Since Lower doesn't
+// know apriori which of op1 or op2 is not likely to get a register, it
+// has to make a guess. This routine encapsulates heuristics that
+// guess whether it is likely to be beneficial to mark op1 or op2 as
+// reg optional.
+//
+//
+// Arguments:
+//     tree  -  a binary-op tree node that is either commutative
+//              or a compare oper.
+//
+// Returns:
+//     Returns op1 or op2 of tree node that is preferred for
+//     marking as reg optional.
+//
+// Note: if the tree oper is neither commutative nor a compare oper
+// then only op2 can be reg optional on xarch and hence no need to
+// call this routine.
+GenTree* Lowering::PreferredRegOptionalOperand(GenTree* tree)
+{
+    assert(GenTree::OperIsBinary(tree->OperGet()));
+    assert(tree->OperIsCommutative() || tree->OperIsCompare());
+
+    GenTree* op1         = tree->gtGetOp1();
+    GenTree* op2         = tree->gtGetOp2();
+    GenTree* preferredOp = nullptr;
+
+    // This routine uses the following heuristics:
+    //
+    // a) If both are tracked locals, marking the one with lower weighted
+    // ref count as reg-optional would likely be beneficial as it has
+    // higher probability of not getting a register.
+    //
+    // b) op1 = tracked local and op2 = untracked local: LSRA creates two
+    // ref positions for op2: a def and use position. op2's def position
+    // requires a reg and it is allocated a reg by spilling another
+    // interval (if required) and that could be even op1.  For this reason
+    // it is beneficial to mark op1 as reg optional.
+    //
+    // TODO: It is not always mandatory for a def position of an untracked
+    // local to be allocated a register if it is on rhs of an assignment
+    // and its use position is reg-optional and has not been assigned a
+    // register.  Reg optional def positions is currently not yet supported.
+    //
+    // c) op1 = untracked local and op2 = tracked local: marking op1 as
+    // reg optional is beneficial, since its use position is less likely
+    // to get a register.
+    //
+    // d) If both are untracked locals (i.e. treated like tree temps by
+    // LSRA): though either of them could be marked as reg optional,
+    // marking op1 as reg optional is likely to be beneficial because
+    // while allocating op2's def position, there is a possibility of
+    // spilling op1's def and in which case op1 is treated as contained
+    // memory operand rather than requiring to reload.
+    //
+    // e) If only one of them is a local var, prefer to mark it as
+    // reg-optional.  This is heuristic is based on the results
+    // obtained against CQ perf benchmarks.
+    //
+    // f) If neither of them are local vars (i.e. tree temps), prefer to
+    // mark op1 as reg optional for the same reason as mentioned in (d) above.
+    if (op1->OperGet() == GT_LCL_VAR && op2->OperGet() == GT_LCL_VAR)
+    {
+        LclVarDsc* v1 = comp->lvaTable + op1->AsLclVarCommon()->GetLclNum();
+        LclVarDsc* v2 = comp->lvaTable + op2->AsLclVarCommon()->GetLclNum();
+
+        if (v1->lvTracked && v2->lvTracked)
+        {
+            // Both are tracked locals.  The one with lower weight is less likely
+            // to get a register and hence beneficial to mark the one with lower
+            // weight as reg optional.
+            if (v1->lvRefCntWtd < v2->lvRefCntWtd)
+            {
+                preferredOp = op1;
+            }
+            else
+            {
+                preferredOp = op2;
+            }
+        }
+        else if (v2->lvTracked)
+        {
+            // v1 is an untracked lcl and it is use position is less likely to
+            // get a register.
+            preferredOp = op1;
+        }
+        else if (v1->lvTracked)
+        {
+            // v2 is an untracked lcl and its def position always
+            // needs a reg.  Hence it is better to mark v1 as
+            // reg optional.
+            preferredOp = op1;
+        }
+        else
+        {
+            preferredOp = op1;
+            ;
+        }
+    }
+    else if (op1->OperGet() == GT_LCL_VAR)
+    {
+        preferredOp = op1;
+    }
+    else if (op2->OperGet() == GT_LCL_VAR)
+    {
+        preferredOp = op2;
+    }
+    else
+    {
+        // Neither of the operands is a local, prefer marking
+        // operand that is evaluated first as reg optional
+        // since its use position is less likely to get a register.
+        bool reverseOps = ((tree->gtFlags & GTF_REVERSE_OPS) != 0);
+        preferredOp     = reverseOps ? op2 : op1;
+    }
+
+    return preferredOp;
+}
+
+#endif // _TARGET_XARCH_
+
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp
new file mode 100644
index 0000000000..317b976e42
--- /dev/null
+++ b/src/jit/lsra.cpp
@@ -0,0 +1,11578 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+
+                 Linear Scan Register Allocation
+
+                         a.k.a. LSRA
+
+  Preconditions
+    - All register requirements are expressed in the code stream, either as destination
+      registers of tree nodes, or as internal registers.  These requirements are
+      expressed in the TreeNodeInfo (gtLsraInfo) on each node, which includes:
+      - The number of register sources and destinations.
+      - The register restrictions (candidates) of the target register, both from itself,
+        as producer of the value (dstCandidates), and from its consuming node (srcCandidates).
+        Note that the srcCandidates field of TreeNodeInfo refers to the destination register
+        (not any of its sources).
+      - The number (internalCount) of registers required, and their register restrictions (internalCandidates).
+        These are neither inputs nor outputs of the node, but used in the sequence of code generated for the tree.
+    "Internal registers" are registers used during the code sequence generated for the node.
+    The register lifetimes must obey the following lifetime model:
+    - First, any internal registers are defined.
+    - Next, any source registers are used (and are then freed if they are last use and are not identified as
+      "delayRegFree").
+    - Next, the internal registers are used (and are then freed).
+    - Next, any registers in the kill set for the instruction are killed.
+    - Next, the destination register(s) are defined (multiple destination registers are only supported on ARM)
+    - Finally, any "delayRegFree" source registers are freed.
+  There are several things to note about this order:
+    - The internal registers will never overlap any use, but they may overlap a destination register.
+    - Internal registers are never live beyond the node.
+    - The "delayRegFree" annotation is used for instructions that are only available in a Read-Modify-Write form.
+      That is, the destination register is one of the sources.  In this case, we must not use the same register for
+      the non-RMW operand as for the destination.
+
+  Overview (doLinearScan):
+    - Walk all blocks, building intervals and RefPositions (buildIntervals)
+    - Traverse the RefPositions, marking last uses (setLastUses)
+      - Note that this is necessary because the execution order doesn't accurately reflect use order.
+        There is a "TODO-Throughput" to eliminate this.
+    - Allocate registers (allocateRegisters)
+    - Annotate nodes with register assignments (resolveRegisters)
+    - Add move nodes as needed to resolve conflicting register
+      assignments across non-adjacent edges. (resolveEdges, called from resolveRegisters)
+
+  Postconditions:
+
+    Tree nodes (GenTree):
+    - GenTree::gtRegNum (and gtRegPair for ARM) is annotated with the register
+      assignment for a node. If the node does not require a register, it is
+      annotated as such (for single registers, gtRegNum = REG_NA; for register
+      pair type, gtRegPair = REG_PAIR_NONE). For a variable definition or interior
+      tree node (an "implicit" definition), this is the register to put the result.
+      For an expression use, this is the place to find the value that has previously
+      been computed.
+      - In most cases, this register must satisfy the constraints specified by the TreeNodeInfo.
+      - In some cases, this is difficult:
+        - If a lclVar node currently lives in some register, it may not be desirable to move it
+          (i.e. its current location may be desirable for future uses, e.g. if it's a callee save register,
+          but needs to be in a specific arg register for a call).
+        - In other cases there may be conflicts on the restrictions placed by the defining node and the node which
+          consumes it
+      - If such a node is constrained to a single fixed register (e.g. an arg register, or a return from a call),
+        then LSRA is free to annotate the node with a different register.  The code generator must issue the appropriate
+        move.
+      - However, if such a node is constrained to a set of registers, and its current location does not satisfy that
+        requirement, LSRA must insert a GT_COPY node between the node and its parent.  The gtRegNum on the GT_COPY node
+        must satisfy the register requirement of the parent.
+    - GenTree::gtRsvdRegs has a set of registers used for internal temps.
+    - A tree node is marked GTF_SPILL if the tree node must be spilled by the code generator after it has been
+      evaluated.
+      - LSRA currently does not set GTF_SPILLED on such nodes, because it caused problems in the old code generator.
+        In the new backend perhaps this should change (see also the note below under CodeGen).
+    - A tree node is marked GTF_SPILLED if it is a lclVar that must be reloaded prior to use.
+      - The register (gtRegNum) on the node indicates the register to which it must be reloaded.
+      - For lclVar nodes, since the uses and defs are distinct tree nodes, it is always possible to annotate the node
+        with the register to which the variable must be reloaded.
+      - For other nodes, since they represent both the def and use, if the value must be reloaded to a different
+        register, LSRA must insert a GT_RELOAD node in order to specify the register to which it should be reloaded.
+
+    Local variable table (LclVarDsc):
+    - LclVarDsc::lvRegister is set to true if a local variable has the
+      same register assignment for its entire lifetime.
+    - LclVarDsc::lvRegNum / lvOtherReg: these are initialized to their
+      first value at the end of LSRA (it looks like lvOtherReg isn't?
+      This is probably a bug (ARM)). Codegen will set them to their current value
+      as it processes the trees, since a variable can (now) be assigned different
+      registers over its lifetimes.
+
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
+
+#include "lsra.h"
+
+#ifdef DEBUG
+const char* LinearScan::resolveTypeName[] = {"Split", "Join", "Critical", "SharedCritical"};
+#endif // DEBUG
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                    Small Helper functions                                 XX
+XX                                                                           XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+//--------------------------------------------------------------
+// lsraAssignRegToTree: Assign the given reg to tree node.
+//
+// Arguments:
+//    tree    -    Gentree node
+//    reg     -    register to be assigned
+//    regIdx  -    register idx, if tree is a multi-reg call node.
+//                 regIdx will be zero for single-reg result producing tree nodes.
+//
+// Return Value:
+//    None
+//
+void lsraAssignRegToTree(GenTreePtr tree, regNumber reg, unsigned regIdx)
+{
+    if (regIdx == 0)
+    {
+        tree->gtRegNum = reg;
+    }
+    else
+    {
+        assert(tree->IsMultiRegCall());
+        GenTreeCall* call = tree->AsCall();
+        call->SetRegNumByIdx(reg, regIdx);
+    }
+}
+
+//-------------------------------------------------------------
+// getWeight: Returns the weight of the RefPosition.
+//
+// Arguments:
+//    refPos   -   ref position
+//
+// Returns:
+//    Weight of ref position.
+unsigned LinearScan::getWeight(RefPosition* refPos)
+{
+    unsigned   weight;
+    GenTreePtr treeNode = refPos->treeNode;
+
+    if (treeNode != nullptr)
+    {
+        if (isCandidateLocalRef(treeNode))
+        {
+            // Tracked locals: use weighted ref cnt as the weight of the
+            // ref position.
+            GenTreeLclVarCommon* lclCommon = treeNode->AsLclVarCommon();
+            LclVarDsc*           varDsc    = &(compiler->lvaTable[lclCommon->gtLclNum]);
+            weight                         = varDsc->lvRefCntWtd;
+        }
+        else
+        {
+            // Non-candidate local ref or non-lcl tree node.
+            // These are considered to have two references in the basic block:
+            // a def and a use and hence weighted ref count is 2 times
+            // the basic block weight in which they appear.
+            weight = 2 * this->blockInfo[refPos->bbNum].weight;
+        }
+    }
+    else
+    {
+        // Non-tree node ref positions.  These will have a single
+        // reference in the basic block and hence their weighted
+        // refcount is equal to the block weight in which they
+        // appear.
+        weight = this->blockInfo[refPos->bbNum].weight;
+    }
+
+    return weight;
+}
+
+// allRegs represents a set of registers that can
+// be used to allocate the specified type in any point
+// in time (more of a 'bank' of registers).
+regMaskTP LinearScan::allRegs(RegisterType rt)
+{
+    if (rt == TYP_FLOAT)
+    {
+        return availableFloatRegs;
+    }
+    else if (rt == TYP_DOUBLE)
+    {
+        return availableDoubleRegs;
+#ifdef FEATURE_SIMD
+        // TODO-Cleanup: Add an RBM_ALLSIMD
+    }
+    else if (varTypeIsSIMD(rt))
+    {
+        return availableDoubleRegs;
+#endif // FEATURE_SIMD
+    }
+    else
+    {
+        return availableIntRegs;
+    }
+}
+
+//--------------------------------------------------------------------------
+// allMultiRegCallNodeRegs: represents a set of registers that can be used
+// to allocate a multi-reg call node.
+//
+// Arguments:
+//    call   -  Multi-reg call node
+//
+// Return Value:
+//    Mask representing the set of available registers for multi-reg call
+//    node.
+//
+// Note:
+// Multi-reg call node available regs = Bitwise-OR(allregs(GetReturnRegType(i)))
+// for all i=0..RetRegCount-1.
+regMaskTP LinearScan::allMultiRegCallNodeRegs(GenTreeCall* call)
+{
+    assert(call->HasMultiRegRetVal());
+
+    ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+    regMaskTP       resultMask  = allRegs(retTypeDesc->GetReturnRegType(0));
+
+    unsigned count = retTypeDesc->GetReturnRegCount();
+    for (unsigned i = 1; i < count; ++i)
+    {
+        resultMask |= allRegs(retTypeDesc->GetReturnRegType(i));
+    }
+
+    return resultMask;
+}
+
+//--------------------------------------------------------------------------
+// allRegs: returns the set of registers that can accomodate the type of
+// given node.
+//
+// Arguments:
+//    tree   -  GenTree node
+//
+// Return Value:
+//    Mask representing the set of available registers for given tree
+//
+// Note: In case of multi-reg call node, the full set of registers must be
+// determined by looking at types of individual return register types.
+// In this case, the registers may include registers from different register
+// sets and will not be limited to the actual ABI return registers.
+regMaskTP LinearScan::allRegs(GenTree* tree)
+{
+    regMaskTP resultMask;
+
+    // In case of multi-reg calls, allRegs is defined as
+    // Bitwise-Or(allRegs(GetReturnRegType(i)) for i=0..ReturnRegCount-1
+    if (tree->IsMultiRegCall())
+    {
+        resultMask = allMultiRegCallNodeRegs(tree->AsCall());
+    }
+    else
+    {
+        resultMask = allRegs(tree->TypeGet());
+    }
+
+    return resultMask;
+}
+
+regMaskTP LinearScan::allSIMDRegs()
+{
+    return availableFloatRegs;
+}
+
+//------------------------------------------------------------------------
+// internalFloatRegCandidates: Return the set of registers that are appropriate
+//                             for use as internal float registers.
+//
+// Return Value:
+//    The set of registers (as a regMaskTP).
+//
+// Notes:
+//    compFloatingPointUsed is only required to be set if it is possible that we
+//    will use floating point callee-save registers.
+//    It is unlikely, if an internal register is the only use of floating point,
+//    that it will select a callee-save register.  But to be safe, we restrict
+//    the set of candidates if compFloatingPointUsed is not already set.
+
+regMaskTP LinearScan::internalFloatRegCandidates()
+{
+    if (compiler->compFloatingPointUsed)
+    {
+        return allRegs(TYP_FLOAT);
+    }
+    else
+    {
+        return RBM_FLT_CALLEE_TRASH;
+    }
+}
+
+/*****************************************************************************
+ * Register types
+ *****************************************************************************/
+template <class T>
+RegisterType regType(T type)
+{
+#ifdef FEATURE_SIMD
+    if (varTypeIsSIMD(type))
+    {
+        return FloatRegisterType;
+    }
+#endif // FEATURE_SIMD
+    return varTypeIsFloating(TypeGet(type)) ? FloatRegisterType : IntRegisterType;
+}
+
+bool useFloatReg(var_types type)
+{
+    return (regType(type) == FloatRegisterType);
+}
+
+bool registerTypesEquivalent(RegisterType a, RegisterType b)
+{
+    return varTypeIsIntegralOrI(a) == varTypeIsIntegralOrI(b);
+}
+
+bool isSingleRegister(regMaskTP regMask)
+{
+    return (regMask != RBM_NONE && genMaxOneBit(regMask));
+}
+
+/*****************************************************************************
+ * Inline functions for RegRecord
+ *****************************************************************************/
+
+bool RegRecord::isFree()
+{
+    return ((assignedInterval == nullptr || !assignedInterval->isActive) && !isBusyUntilNextKill);
+}
+
+/*****************************************************************************
+ * Inline functions for LinearScan
+ *****************************************************************************/
+RegRecord* LinearScan::getRegisterRecord(regNumber regNum)
+{
+    return &physRegs[regNum];
+}
+
+#ifdef DEBUG
+//------------------------------------------------------------------------
+// stressLimitRegs: Given a set of registers, expressed as a register mask, reduce
+//            them based on the current stress options.
+//
+// Arguments:
+//    mask      - The current mask of register candidates for a node
+//
+// Return Value:
+//    A possibly-modified mask, based on the value of COMPlus_JitStressRegs.
+//
+// Notes:
+//    This is the method used to implement the stress options that limit
+//    the set of registers considered for allocation.
+
+regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask)
+{
+    if (getStressLimitRegs() != LSRA_LIMIT_NONE)
+    {
+        switch (getStressLimitRegs())
+        {
+            case LSRA_LIMIT_CALLEE:
+                if (!compiler->opts.compDbgEnC && (mask & RBM_CALLEE_SAVED) != RBM_NONE)
+                {
+                    mask &= RBM_CALLEE_SAVED;
+                }
+                break;
+            case LSRA_LIMIT_CALLER:
+                if ((mask & RBM_CALLEE_TRASH) != RBM_NONE)
+                {
+                    mask &= RBM_CALLEE_TRASH;
+                }
+                break;
+            case LSRA_LIMIT_SMALL_SET:
+                if ((mask & LsraLimitSmallIntSet) != RBM_NONE)
+                {
+                    mask &= LsraLimitSmallIntSet;
+                }
+                else if ((mask & LsraLimitSmallFPSet) != RBM_NONE)
+                {
+                    mask &= LsraLimitSmallFPSet;
+                }
+                break;
+            default:
+                unreached();
+        }
+        if (refPosition != nullptr && refPosition->isFixedRegRef)
+        {
+            mask |= refPosition->registerAssignment;
+        }
+    }
+    return mask;
+}
+#endif // DEBUG
+
+// TODO-Cleanup: Consider adding an overload that takes a varDsc, and can appropriately
+// set such fields as isStructField
+
+Interval* LinearScan::newInterval(RegisterType theRegisterType)
+{
+    intervals.emplace_back(theRegisterType, allRegs(theRegisterType));
+    Interval* newInt = &intervals.back();
+
+#ifdef DEBUG
+    newInt->intervalIndex = static_cast<unsigned>(intervals.size() - 1);
+#endif // DEBUG
+
+    DBEXEC(VERBOSE, newInt->dump());
+    return newInt;
+}
+
+RefPosition* LinearScan::newRefPositionRaw(LsraLocation nodeLocation, GenTree* treeNode, RefType refType)
+{
+    refPositions.emplace_back(curBBNum, nodeLocation, treeNode, refType);
+    RefPosition* newRP = &refPositions.back();
+#ifdef DEBUG
+    newRP->rpNum = static_cast<unsigned>(refPositions.size() - 1);
+#endif // DEBUG
+    return newRP;
+}
+
+//------------------------------------------------------------------------
+// resolveConflictingDefAndUse: Resolve the situation where we have conflicting def and use
+//    register requirements on a single-def, single-use interval.
+//
+// Arguments:
+//    defRefPosition - The interval definition
+//    useRefPosition - The (sole) interval use
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    The two RefPositions are for the same interval, which is a tree-temp.
+//
+// Notes:
+//    We require some special handling for the case where the use is a "delayRegFree" case of a fixedReg.
+//    In that case, if we change the registerAssignment on the useRefPosition, we will lose the fact that,
+//    even if we assign a different register (and rely on codegen to do the copy), that fixedReg also needs
+//    to remain busy until the Def register has been allocated.  In that case, we don't allow Case 1 or Case 4
+//    below.
+//    Here are the cases we consider (in this order):
+//    1. If The defRefPosition specifies a single register, and there are no conflicting
+//       FixedReg uses of it between the def and use, we use that register, and the code generator
+//       will insert the copy.  Note that it cannot be in use because there is a FixedRegRef for the def.
+//    2. If the useRefPosition specifies a single register, and it is not in use, and there are no
+//       conflicting FixedReg uses of it between the def and use, we use that register, and the code generator
+//       will insert the copy.
+//    3. If the defRefPosition specifies a single register (but there are conflicts, as determined
+//       in 1.), and there are no conflicts with the useRefPosition register (if it's a single register),
+///      we set the register requirements on the defRefPosition to the use registers, and the
+//       code generator will insert a copy on the def.  We can't rely on the code generator to put a copy
+//       on the use if it has multiple possible candidates, as it won't know which one has been allocated.
+//    4. If the useRefPosition specifies a single register, and there are no conflicts with the register
+//       on the defRefPosition, we leave the register requirements on the defRefPosition as-is, and set
+//       the useRefPosition to the def registers, for similar reasons to case #3.
+//    5. If both the defRefPosition and the useRefPosition specify single registers, but both have conflicts,
+//       We set the candiates on defRefPosition to be all regs of the appropriate type, and since they are
+//       single registers, codegen can insert the copy.
+//    6. Finally, if the RefPositions specify disjoint subsets of the registers (or the use is fixed but
+//       has a conflict), we must insert a copy.  The copy will be inserted before the use if the
+//       use is not fixed (in the fixed case, the code generator will insert the use).
+//
+// TODO-CQ: We get bad register allocation in case #3 in the situation where no register is
+// available for the lifetime.  We end up allocating a register that must be spilled, and it probably
+// won't be the register that is actually defined by the target instruction.  So, we have to copy it
+// and THEN spill it.  In this case, we should be using the def requirement.  But we need to change
+// the interface to this method a bit to make that work (e.g. returning a candidate set to use, but
+// leaving the registerAssignment as-is on the def, so that if we find that we need to spill anyway
+// we can use the fixed-reg on the def.
+//
+
+void LinearScan::resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition)
+{
+    assert(!interval->isLocalVar);
+
+    RefPosition* useRefPosition   = defRefPosition->nextRefPosition;
+    regMaskTP    defRegAssignment = defRefPosition->registerAssignment;
+    regMaskTP    useRegAssignment = useRefPosition->registerAssignment;
+    RegRecord*   defRegRecord     = nullptr;
+    RegRecord*   useRegRecord     = nullptr;
+    regNumber    defReg           = REG_NA;
+    regNumber    useReg           = REG_NA;
+    bool         defRegConflict   = false;
+    bool         useRegConflict   = false;
+
+    // If the useRefPosition is a "delayRegFree", we can't change the registerAssignment
+    // on it, or we will fail to ensure that the fixedReg is busy at the time the target
+    // (of the node that uses this interval) is allocated.
+    bool canChangeUseAssignment = !useRefPosition->isFixedRegRef || !useRefPosition->delayRegFree;
+
+    INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CONFLICT));
+    if (!canChangeUseAssignment)
+    {
+        INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_FIXED_DELAY_USE));
+    }
+    if (defRefPosition->isFixedRegRef)
+    {
+        defReg       = defRefPosition->assignedReg();
+        defRegRecord = getRegisterRecord(defReg);
+        if (canChangeUseAssignment)
+        {
+            RefPosition* currFixedRegRefPosition = defRegRecord->recentRefPosition;
+            assert(currFixedRegRefPosition != nullptr &&
+                   currFixedRegRefPosition->nodeLocation == defRefPosition->nodeLocation);
+
+            if (currFixedRegRefPosition->nextRefPosition == nullptr ||
+                currFixedRegRefPosition->nextRefPosition->nodeLocation > useRefPosition->getRefEndLocation())
+            {
+                // This is case #1.  Use the defRegAssignment
+                INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE1));
+                useRefPosition->registerAssignment = defRegAssignment;
+                return;
+            }
+            else
+            {
+                defRegConflict = true;
+            }
+        }
+    }
+    if (useRefPosition->isFixedRegRef)
+    {
+        useReg                               = useRefPosition->assignedReg();
+        useRegRecord                         = getRegisterRecord(useReg);
+        RefPosition* currFixedRegRefPosition = useRegRecord->recentRefPosition;
+
+        // We know that useRefPosition is a fixed use, so the nextRefPosition must not be null.
+        RefPosition* nextFixedRegRefPosition = useRegRecord->getNextRefPosition();
+        assert(nextFixedRegRefPosition != nullptr &&
+               nextFixedRegRefPosition->nodeLocation <= useRefPosition->nodeLocation);
+
+        // First, check to see if there are any conflicting FixedReg references between the def and use.
+        if (nextFixedRegRefPosition->nodeLocation == useRefPosition->nodeLocation)
+        {
+            // OK, no conflicting FixedReg references.
+            // Now, check to see whether it is currently in use.
+            if (useRegRecord->assignedInterval != nullptr)
+            {
+                RefPosition* possiblyConflictingRef         = useRegRecord->assignedInterval->recentRefPosition;
+                LsraLocation possiblyConflictingRefLocation = possiblyConflictingRef->getRefEndLocation();
+                if (possiblyConflictingRefLocation >= defRefPosition->nodeLocation)
+                {
+                    useRegConflict = true;
+                }
+            }
+            if (!useRegConflict)
+            {
+                // This is case #2.  Use the useRegAssignment
+                INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE2));
+                defRefPosition->registerAssignment = useRegAssignment;
+                return;
+            }
+        }
+        else
+        {
+            useRegConflict = true;
+        }
+    }
+    if (defRegRecord != nullptr && !useRegConflict)
+    {
+        // This is case #3.
+        INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE3));
+        defRefPosition->registerAssignment = useRegAssignment;
+        return;
+    }
+    if (useRegRecord != nullptr && !defRegConflict && canChangeUseAssignment)
+    {
+        // This is case #4.
+        INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE4));
+        useRefPosition->registerAssignment = defRegAssignment;
+        return;
+    }
+    if (defRegRecord != nullptr && useRegRecord != nullptr)
+    {
+        // This is case #5.
+        INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE5));
+        RegisterType regType = interval->registerType;
+        assert((getRegisterType(interval, defRefPosition) == regType) &&
+               (getRegisterType(interval, useRefPosition) == regType));
+        regMaskTP candidates               = allRegs(regType);
+        defRefPosition->registerAssignment = candidates;
+        return;
+    }
+    INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE6));
+    return;
+}
+
+//------------------------------------------------------------------------
+// conflictingFixedRegReference: Determine whether the current RegRecord has a
+//                               fixed register use that conflicts with 'refPosition'
+//
+// Arguments:
+//    refPosition - The RefPosition of interest
+//
+// Return Value:
+//    Returns true iff the given RefPosition is NOT a fixed use of this register,
+//    AND either:
+//    - there is a RefPosition on this RegRecord at the nodeLocation of the given RefPosition, or
+//    - the given RefPosition has a delayRegFree, and there is a RefPosition on this RegRecord at
+//      the nodeLocation just past the given RefPosition.
+//
+// Assumptions:
+//    'refPosition is non-null.
+
+bool RegRecord::conflictingFixedRegReference(RefPosition* refPosition)
+{
+    // Is this a fixed reference of this register?  If so, there is no conflict.
+    if (refPosition->isFixedRefOfRegMask(genRegMask(regNum)))
+    {
+        return false;
+    }
+    // Otherwise, check for conflicts.
+    // There is a conflict if:
+    // 1. There is a recent RefPosition on this RegRecord that is at this location,
+    //    except in the case where it is a special "putarg" that is associated with this interval, OR
+    // 2. There is an upcoming RefPosition at this location, or at the next location
+    //    if refPosition is a delayed use (i.e. must be kept live through the next/def location).
+
+    LsraLocation refLocation = refPosition->nodeLocation;
+    if (recentRefPosition != nullptr && recentRefPosition->refType != RefTypeKill &&
+        recentRefPosition->nodeLocation == refLocation &&
+        (!isBusyUntilNextKill || assignedInterval != refPosition->getInterval()))
+    {
+        return true;
+    }
+    LsraLocation nextPhysRefLocation = getNextRefLocation();
+    if (nextPhysRefLocation == refLocation || (refPosition->delayRegFree && nextPhysRefLocation == (refLocation + 1)))
+    {
+        return true;
+    }
+    return false;
+}
+
+void LinearScan::applyCalleeSaveHeuristics(RefPosition* rp)
+{
+#ifdef _TARGET_AMD64_
+    if (compiler->opts.compDbgEnC)
+    {
+        // We only use RSI and RDI for EnC code, so we don't want to favor callee-save regs.
+        return;
+    }
+#endif // _TARGET_AMD64_
+
+    Interval* theInterval = rp->getInterval();
+#ifdef DEBUG
+    regMaskTP calleeSaveMask = calleeSaveRegs(getRegisterType(theInterval, rp));
+    if (doReverseCallerCallee())
+    {
+        regMaskTP newAssignment = rp->registerAssignment;
+        newAssignment &= calleeSaveMask;
+        if (newAssignment != RBM_NONE)
+        {
+            rp->registerAssignment = newAssignment;
+        }
+    }
+    else
+#endif // DEBUG
+    {
+        // Set preferences so that this register set will be preferred for earlier refs
+        theInterval->updateRegisterPreferences(rp->registerAssignment);
+    }
+}
+
+void LinearScan::associateRefPosWithInterval(RefPosition* rp)
+{
+    Referenceable* theReferent = rp->referent;
+
+    if (theReferent != nullptr)
+    {
+        // All RefPositions except the dummy ones at the beginning of blocks
+
+        if (rp->isIntervalRef())
+        {
+            Interval* theInterval = rp->getInterval();
+
+            applyCalleeSaveHeuristics(rp);
+
+            // Ensure that we have consistent def/use on SDSU temps.
+            // However, in the case of a non-commutative rmw def, we must avoid over-constraining
+            // the def, so don't propagate a single-register restriction from the consumer to the producer
+
+            if (RefTypeIsUse(rp->refType) && !theInterval->isLocalVar)
+            {
+                RefPosition* prevRefPosition = theInterval->recentRefPosition;
+                assert(prevRefPosition != nullptr && theInterval->firstRefPosition == prevRefPosition);
+                regMaskTP prevAssignment = prevRefPosition->registerAssignment;
+                regMaskTP newAssignment  = (prevAssignment & rp->registerAssignment);
+                if (newAssignment != RBM_NONE)
+                {
+                    if (!theInterval->hasNonCommutativeRMWDef || !isSingleRegister(newAssignment))
+                    {
+                        prevRefPosition->registerAssignment = newAssignment;
+                    }
+                }
+                else
+                {
+                    theInterval->hasConflictingDefUse = true;
+                }
+            }
+        }
+
+        RefPosition* prevRP = theReferent->recentRefPosition;
+        if (prevRP != nullptr)
+        {
+            prevRP->nextRefPosition = rp;
+        }
+        else
+        {
+            theReferent->firstRefPosition = rp;
+        }
+        theReferent->recentRefPosition = rp;
+        theReferent->lastRefPosition   = rp;
+    }
+    else
+    {
+        assert((rp->refType == RefTypeBB) || (rp->refType == RefTypeKillGCRefs));
+    }
+}
+
+//---------------------------------------------------------------------------
+// newRefPosition: allocate and initialize a new RefPosition.
+//
+// Arguments:
+//     reg             -  reg number that identifies RegRecord to be associated
+//                        with this RefPosition
+//     theLocation     -  LSRA location of RefPosition
+//     theRefType      -  RefPosition type
+//     theTreeNode     -  GenTree node for which this RefPosition is created
+//     mask            -  Set of valid registers for this RefPosition
+//     multiRegIdx     -  register position if this RefPosition corresponds to a
+//                        multi-reg call node.
+//
+// Return Value:
+//     a new RefPosition
+//
+RefPosition* LinearScan::newRefPosition(
+    regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask)
+{
+    RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType);
+
+    newRP->setReg(getRegisterRecord(reg));
+    newRP->registerAssignment = mask;
+
+    newRP->setMultiRegIdx(0);
+    newRP->setAllocateIfProfitable(0);
+
+    associateRefPosWithInterval(newRP);
+
+    DBEXEC(VERBOSE, newRP->dump());
+    return newRP;
+}
+
+//---------------------------------------------------------------------------
+// newRefPosition: allocate and initialize a new RefPosition.
+//
+// Arguments:
+//     theInterval     -  interval to which RefPosition is associated with.
+//     theLocation     -  LSRA location of RefPosition
+//     theRefType      -  RefPosition type
+//     theTreeNode     -  GenTree node for which this RefPosition is created
+//     mask            -  Set of valid registers for this RefPosition
+//     multiRegIdx     -  register position if this RefPosition corresponds to a
+//                        multi-reg call node.
+//
+// Return Value:
+//     a new RefPosition
+//
+RefPosition* LinearScan::newRefPosition(Interval*    theInterval,
+                                        LsraLocation theLocation,
+                                        RefType      theRefType,
+                                        GenTree*     theTreeNode,
+                                        regMaskTP    mask,
+                                        unsigned     multiRegIdx /* = 0 */)
+{
+#ifdef DEBUG
+    if (theInterval != nullptr && regType(theInterval->registerType) == FloatRegisterType)
+    {
+        // In the case we're using floating point registers we must make sure
+        // this flag was set previously in the compiler since this will mandate
+        // whether LSRA will take into consideration FP reg killsets.
+        assert(compiler->compFloatingPointUsed || ((mask & RBM_FLT_CALLEE_SAVED) == 0));
+    }
+#endif // DEBUG
+
+    // If this reference is constrained to a single register (and it's not a dummy
+    // or Kill reftype already), add a RefTypeFixedReg at this location so that its
+    // availability can be more accurately determined
+
+    bool isFixedRegister = isSingleRegister(mask);
+    bool insertFixedRef  = false;
+    if (isFixedRegister)
+    {
+        // Insert a RefTypeFixedReg for any normal def or use (not ParamDef or BB)
+        if (theRefType == RefTypeUse || theRefType == RefTypeDef)
+        {
+            insertFixedRef = true;
+        }
+    }
+
+    if (insertFixedRef)
+    {
+        regNumber    physicalReg = genRegNumFromMask(mask);
+        RefPosition* pos         = newRefPosition(physicalReg, theLocation, RefTypeFixedReg, nullptr, mask);
+        assert(theInterval != nullptr);
+        assert((allRegs(theInterval->registerType) & mask) != 0);
+    }
+
+    RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType);
+
+    newRP->setInterval(theInterval);
+
+    // Spill info
+    newRP->isFixedRegRef = isFixedRegister;
+
+#ifndef _TARGET_AMD64_
+    // We don't need this for AMD because the PInvoke method epilog code is explicit
+    // at register allocation time.
+    if (theInterval != nullptr && theInterval->isLocalVar && compiler->info.compCallUnmanaged &&
+        theInterval->varNum == compiler->genReturnLocal)
+    {
+        mask &= ~(RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME);
+        noway_assert(mask != RBM_NONE);
+    }
+#endif // !_TARGET_AMD64_
+    newRP->registerAssignment = mask;
+
+    newRP->setMultiRegIdx(multiRegIdx);
+    newRP->setAllocateIfProfitable(0);
+
+    associateRefPosWithInterval(newRP);
+
+    DBEXEC(VERBOSE, newRP->dump());
+    return newRP;
+}
+
+/*****************************************************************************
+ * Inline functions for Interval
+ *****************************************************************************/
+RefPosition* Referenceable::getNextRefPosition()
+{
+    if (recentRefPosition == nullptr)
+    {
+        return firstRefPosition;
+    }
+    else
+    {
+        return recentRefPosition->nextRefPosition;
+    }
+}
+
+LsraLocation Referenceable::getNextRefLocation()
+{
+    RefPosition* nextRefPosition = getNextRefPosition();
+    if (nextRefPosition == nullptr)
+    {
+        return MaxLocation;
+    }
+    else
+    {
+        return nextRefPosition->nodeLocation;
+    }
+}
+
+// Iterate through all the registers of the given type
+class RegisterIterator
+{
+    friend class Registers;
+
+public:
+    RegisterIterator(RegisterType type) : regType(type)
+    {
+        if (useFloatReg(regType))
+        {
+            currentRegNum = REG_FP_FIRST;
+        }
+        else
+        {
+            currentRegNum = REG_INT_FIRST;
+        }
+    }
+
+protected:
+    static RegisterIterator Begin(RegisterType regType)
+    {
+        return RegisterIterator(regType);
+    }
+    static RegisterIterator End(RegisterType regType)
+    {
+        RegisterIterator endIter = RegisterIterator(regType);
+        // This assumes only integer and floating point register types
+        // if we target a processor with additional register types,
+        // this would have to change
+        if (useFloatReg(regType))
+        {
+            // This just happens to work for both double & float
+            endIter.currentRegNum = REG_NEXT(REG_FP_LAST);
+        }
+        else
+        {
+            endIter.currentRegNum = REG_NEXT(REG_INT_LAST);
+        }
+        return endIter;
+    }
+
+public:
+    void operator++(int dummy) // int dummy is c++ for "this is postfix ++"
+    {
+        currentRegNum = REG_NEXT(currentRegNum);
+#ifdef _TARGET_ARM_
+        if (regType == TYP_DOUBLE)
+            currentRegNum = REG_NEXT(currentRegNum);
+#endif
+    }
+    void operator++() // prefix operator++
+    {
+        currentRegNum = REG_NEXT(currentRegNum);
+#ifdef _TARGET_ARM_
+        if (regType == TYP_DOUBLE)
+            currentRegNum = REG_NEXT(currentRegNum);
+#endif
+    }
+    regNumber operator*()
+    {
+        return currentRegNum;
+    }
+    bool operator!=(const RegisterIterator& other)
+    {
+        return other.currentRegNum != currentRegNum;
+    }
+
+private:
+    regNumber    currentRegNum;
+    RegisterType regType;
+};
+
+class Registers
+{
+public:
+    friend class RegisterIterator;
+    RegisterType type;
+    Registers(RegisterType t)
+    {
+        type = t;
+    }
+    RegisterIterator begin()
+    {
+        return RegisterIterator::Begin(type);
+    }
+    RegisterIterator end()
+    {
+        return RegisterIterator::End(type);
+    }
+};
+
+#ifdef DEBUG
+void LinearScan::dumpVarToRegMap(VarToRegMap map)
+{
+    bool anyPrinted = false;
+    for (unsigned varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++)
+    {
+        unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+        if (map[varIndex] != REG_STK)
+        {
+            printf("V%02u=%s ", varNum, getRegName(map[varIndex]));
+            anyPrinted = true;
+        }
+    }
+    if (!anyPrinted)
+    {
+        printf("none");
+    }
+    printf("\n");
+}
+
+void LinearScan::dumpInVarToRegMap(BasicBlock* block)
+{
+    printf("Var=Reg beg of BB%02u: ", block->bbNum);
+    VarToRegMap map = getInVarToRegMap(block->bbNum);
+    dumpVarToRegMap(map);
+}
+
+void LinearScan::dumpOutVarToRegMap(BasicBlock* block)
+{
+    printf("Var=Reg end of BB%02u: ", block->bbNum);
+    VarToRegMap map = getOutVarToRegMap(block->bbNum);
+    dumpVarToRegMap(map);
+}
+
+#endif // DEBUG
+
+LinearScanInterface* getLinearScanAllocator(Compiler* comp)
+{
+    return new (comp, CMK_LSRA) LinearScan(comp);
+}
+
+//------------------------------------------------------------------------
+// LSRA constructor
+//
+// Arguments:
+//    theCompiler
+//
+// Notes:
+//    The constructor takes care of initializing the data structures that are used
+//    during Lowering, including (in DEBUG) getting the stress environment variables,
+//    as they may affect the block ordering.
+
+LinearScan::LinearScan(Compiler* theCompiler)
+    : compiler(theCompiler)
+#if MEASURE_MEM_ALLOC
+    , lsraIAllocator(nullptr)
+#endif // MEASURE_MEM_ALLOC
+    , intervals(LinearScanMemoryAllocatorInterval(theCompiler))
+    , refPositions(LinearScanMemoryAllocatorRefPosition(theCompiler))
+{
+#ifdef DEBUG
+    maxNodeLocation   = 0;
+    activeRefPosition = nullptr;
+
+    // Get the value of the environment variable that controls stress for register allocation
+    lsraStressMask = JitConfig.JitStressRegs();
+#if 0
+#ifdef DEBUG
+    if (lsraStressMask != 0)
+    {
+        // The code in this #if can be used to debug JitStressRegs issues according to
+        // method hash.  To use, simply set environment variables JitStressRegsHashLo and JitStressRegsHashHi
+        unsigned methHash = compiler->info.compMethodHash();
+        char* lostr = getenv("JitStressRegsHashLo");
+        unsigned methHashLo = 0;
+        bool dump = false;
+        if (lostr != nullptr)
+        {
+            sscanf_s(lostr, "%x", &methHashLo);
+            dump = true;
+        }
+        char* histr = getenv("JitStressRegsHashHi");
+        unsigned methHashHi = UINT32_MAX;
+        if (histr != nullptr)
+        {
+            sscanf_s(histr, "%x", &methHashHi);
+            dump = true;
+        }
+        if (methHash < methHashLo || methHash > methHashHi)
+        {
+            lsraStressMask = 0;
+        }
+        else if (dump == true)
+        {
+            printf("JitStressRegs = %x for method %s, hash = 0x%x.\n",
+                   lsraStressMask, compiler->info.compFullName, compiler->info.compMethodHash());
+            printf("");         // in our logic this causes a flush
+        }
+    }
+#endif // DEBUG
+#endif
+
+    dumpTerse = (JitConfig.JitDumpTerseLsra() != 0);
+
+#endif // DEBUG
+    availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd);
+#if ETW_EBP_FRAMED
+    availableIntRegs &= ~RBM_FPBASE;
+#endif // ETW_EBP_FRAMED
+    availableFloatRegs  = RBM_ALLFLOAT;
+    availableDoubleRegs = RBM_ALLDOUBLE;
+
+#ifdef _TARGET_AMD64_
+    if (compiler->opts.compDbgEnC)
+    {
+        // On x64 when the EnC option is set, we always save exactly RBP, RSI and RDI.
+        // RBP is not available to the register allocator, so RSI and RDI are the only
+        // callee-save registers available.
+        availableIntRegs &= ~RBM_CALLEE_SAVED | RBM_RSI | RBM_RDI;
+        availableFloatRegs &= ~RBM_CALLEE_SAVED;
+        availableDoubleRegs &= ~RBM_CALLEE_SAVED;
+    }
+#endif // _TARGET_AMD64_
+    compiler->rpFrameType           = FT_NOT_SET;
+    compiler->rpMustCreateEBPCalled = false;
+
+    compiler->codeGen->intRegState.rsIsFloat   = false;
+    compiler->codeGen->floatRegState.rsIsFloat = true;
+
+    // Block sequencing (the order in which we schedule).
+    // Note that we don't initialize the bbVisitedSet until we do the first traversal
+    // (currently during Lowering's second phase, where it sets the TreeNodeInfo).
+    // This is so that any blocks that are added during the first phase of Lowering
+    // are accounted for (and we don't have BasicBlockEpoch issues).
+    blockSequencingDone   = false;
+    blockSequence         = nullptr;
+    blockSequenceWorkList = nullptr;
+    curBBSeqNum           = 0;
+    bbSeqCount            = 0;
+
+    // Information about each block, including predecessor blocks used for variable locations at block entry.
+    blockInfo = nullptr;
+
+    // Populate the register mask table.
+    // The first two masks in the table are allint/allfloat
+    // The next N are the masks for each single register.
+    // After that are the dynamically added ones.
+    regMaskTable               = new (compiler, CMK_LSRA) regMaskTP[numMasks];
+    regMaskTable[ALLINT_IDX]   = allRegs(TYP_INT);
+    regMaskTable[ALLFLOAT_IDX] = allRegs(TYP_DOUBLE);
+
+    regNumber reg;
+    for (reg = REG_FIRST; reg < REG_COUNT; reg = REG_NEXT(reg))
+    {
+        regMaskTable[FIRST_SINGLE_REG_IDX + reg - REG_FIRST] = (reg == REG_STK) ? RBM_NONE : genRegMask(reg);
+    }
+    nextFreeMask = FIRST_SINGLE_REG_IDX + REG_COUNT;
+    noway_assert(nextFreeMask <= numMasks);
+}
+
+// Return the reg mask corresponding to the given index.
+regMaskTP LinearScan::GetRegMaskForIndex(RegMaskIndex index)
+{
+    assert(index < numMasks);
+    assert(index < nextFreeMask);
+    return regMaskTable[index];
+}
+
+// Given a reg mask, return the index it corresponds to. If it is not a 'well known' reg mask,
+// add it at the end. This method has linear behavior in the worst cases but that is fairly rare.
+// Most methods never use any but the well-known masks, and when they do use more
+// it is only one or two more.
+LinearScan::RegMaskIndex LinearScan::GetIndexForRegMask(regMaskTP mask)
+{
+    RegMaskIndex result;
+    if (isSingleRegister(mask))
+    {
+        result = genRegNumFromMask(mask) + FIRST_SINGLE_REG_IDX;
+    }
+    else if (mask == allRegs(TYP_INT))
+    {
+        result = ALLINT_IDX;
+    }
+    else if (mask == allRegs(TYP_DOUBLE))
+    {
+        result = ALLFLOAT_IDX;
+    }
+    else
+    {
+        for (int i = FIRST_SINGLE_REG_IDX + REG_COUNT; i < nextFreeMask; i++)
+        {
+            if (regMaskTable[i] == mask)
+            {
+                return i;
+            }
+        }
+
+        // We only allocate a fixed number of masks. Since we don't reallocate, we will throw a
+        // noway_assert if we exceed this limit.
+        noway_assert(nextFreeMask < numMasks);
+
+        regMaskTable[nextFreeMask] = mask;
+        result                     = nextFreeMask;
+        nextFreeMask++;
+    }
+    assert(mask == regMaskTable[result]);
+    return result;
+}
+
+// We've decided that we can't use a register during register allocation (probably FPBASE),
+// but we've already added it to the register masks. Go through the masks and remove it.
+void LinearScan::RemoveRegisterFromMasks(regNumber reg)
+{
+    JITDUMP("Removing register %s from LSRA register masks\n", getRegName(reg));
+
+    regMaskTP mask = ~genRegMask(reg);
+    for (int i = 0; i < nextFreeMask; i++)
+    {
+        regMaskTable[i] &= mask;
+    }
+
+    JITDUMP("After removing register:\n");
+    DBEXEC(VERBOSE, dspRegisterMaskTable());
+}
+
+#ifdef DEBUG
+void LinearScan::dspRegisterMaskTable()
+{
+    printf("LSRA register masks. Total allocated: %d, total used: %d\n", numMasks, nextFreeMask);
+    for (int i = 0; i < nextFreeMask; i++)
+    {
+        printf("%2u: ", i);
+        dspRegMask(regMaskTable[i]);
+        printf("\n");
+    }
+}
+#endif // DEBUG
+
+//------------------------------------------------------------------------
+// getNextCandidateFromWorkList: Get the next candidate for block sequencing
+//
+// Arguments:
+//    None.
+//
+// Return Value:
+//    The next block to be placed in the sequence.
+//
+// Notes:
+//    This method currently always returns the next block in the list, and relies on having
+//    blocks added to the list only when they are "ready", and on the
+//    addToBlockSequenceWorkList() method to insert them in the proper order.
+//    However, a block may be in the list and already selected, if it was subsequently
+//    encountered as both a flow and layout successor of the most recently selected
+//    block.
+
+BasicBlock* LinearScan::getNextCandidateFromWorkList()
+{
+    BasicBlockList* nextWorkList = nullptr;
+    for (BasicBlockList* workList = blockSequenceWorkList; workList != nullptr; workList = nextWorkList)
+    {
+        nextWorkList          = workList->next;
+        BasicBlock* candBlock = workList->block;
+        removeFromBlockSequenceWorkList(workList, nullptr);
+        if (!isBlockVisited(candBlock))
+        {
+            return candBlock;
+        }
+    }
+    return nullptr;
+}
+
+//------------------------------------------------------------------------
+// setBlockSequence:Determine the block order for register allocation.
+//
+// Arguments:
+//    None
+//
+// Return Value:
+//    None
+//
+// Notes:
+//    On return, the blockSequence array contains the blocks, in the order in which they
+//    will be allocated.
+//    This method clears the bbVisitedSet on LinearScan, and when it returns the set
+//    contains all the bbNums for the block.
+//    This requires a traversal of the BasicBlocks, and could potentially be
+//    combined with the first traversal (currently the one in Lowering that sets the
+//    TreeNodeInfo).
+
+void LinearScan::setBlockSequence()
+{
+    // Reset the "visited" flag on each block.
+    compiler->EnsureBasicBlockEpoch();
+    bbVisitedSet = BlockSetOps::MakeEmpty(compiler);
+    BlockSet BLOCKSET_INIT_NOCOPY(readySet, BlockSetOps::MakeEmpty(compiler));
+    assert(blockSequence == nullptr && bbSeqCount == 0);
+    blockSequence            = new (compiler, CMK_LSRA) BasicBlock*[compiler->fgBBcount];
+    bbNumMaxBeforeResolution = compiler->fgBBNumMax;
+    blockInfo                = new (compiler, CMK_LSRA) LsraBlockInfo[bbNumMaxBeforeResolution + 1];
+
+    assert(blockSequenceWorkList == nullptr);
+
+    bool addedInternalBlocks = false;
+    verifiedAllBBs           = false;
+    BasicBlock* nextBlock;
+    for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = nextBlock)
+    {
+        blockSequence[bbSeqCount] = block;
+        markBlockVisited(block);
+        bbSeqCount++;
+        nextBlock = nullptr;
+
+        // Initialize the blockInfo.
+        // predBBNum will be set later.  0 is never used as a bbNum.
+        blockInfo[block->bbNum].predBBNum = 0;
+        // We check for critical edges below, but initialize to false.
+        blockInfo[block->bbNum].hasCriticalInEdge  = false;
+        blockInfo[block->bbNum].hasCriticalOutEdge = false;
+        blockInfo[block->bbNum].weight             = block->bbWeight;
+
+        if (block->GetUniquePred(compiler) == nullptr)
+        {
+            for (flowList* pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
+            {
+                BasicBlock* predBlock = pred->flBlock;
+                if (predBlock->NumSucc(compiler) > 1)
+                {
+                    blockInfo[block->bbNum].hasCriticalInEdge = true;
+                    break;
+                }
+                else if (predBlock->bbJumpKind == BBJ_SWITCH)
+                {
+                    assert(!"Switch with single successor");
+                }
+            }
+        }
+
+        // Determine which block to schedule next.
+
+        // First, update the NORMAL successors of the current block, adding them to the worklist
+        // according to the desired order.  We will handle the EH successors below.
+        bool checkForCriticalOutEdge = (block->NumSucc(compiler) > 1);
+        if (!checkForCriticalOutEdge && block->bbJumpKind == BBJ_SWITCH)
+        {
+            assert(!"Switch with single successor");
+        }
+
+        for (unsigned succIndex = 0; succIndex < block->NumSucc(compiler); succIndex++)
+        {
+            BasicBlock* succ = block->GetSucc(succIndex, compiler);
+            if (checkForCriticalOutEdge && succ->GetUniquePred(compiler) == nullptr)
+            {
+                blockInfo[block->bbNum].hasCriticalOutEdge = true;
+                // We can stop checking now.
+                checkForCriticalOutEdge = false;
+            }
+
+            if (isTraversalLayoutOrder() || isBlockVisited(succ))
+            {
+                continue;
+            }
+
+            // We've now seen a predecessor, so add it to the work list and the "readySet".
+            // It will be inserted in the worklist according to the specified traversal order
+            // (i.e. pred-first or random, since layout order is handled above).
+            if (!BlockSetOps::IsMember(compiler, readySet, succ->bbNum))
+            {
+                addToBlockSequenceWorkList(readySet, succ);
+                BlockSetOps::AddElemD(compiler, readySet, succ->bbNum);
+            }
+        }
+
+        // For layout order, simply use bbNext
+        if (isTraversalLayoutOrder())
+        {
+            nextBlock = block->bbNext;
+            continue;
+        }
+
+        while (nextBlock == nullptr)
+        {
+            nextBlock = getNextCandidateFromWorkList();
+
+            // TODO-Throughput: We would like to bypass this traversal if we know we've handled all
+            // the blocks - but fgBBcount does not appear to be updated when blocks are removed.
+            if (nextBlock == nullptr /* && bbSeqCount != compiler->fgBBcount*/ && !verifiedAllBBs)
+            {
+                // If we don't encounter all blocks by traversing the regular sucessor links, do a full
+                // traversal of all the blocks, and add them in layout order.
+                // This may include:
+                //   - internal-only blocks (in the fgAddCodeList) which may not be in the flow graph
+                //     (these are not even in the bbNext links).
+                //   - blocks that have become unreachable due to optimizations, but that are strongly
+                //     connected (these are not removed)
+                //   - EH blocks
+
+                for (Compiler::AddCodeDsc* desc = compiler->fgAddCodeList; desc != nullptr; desc = desc->acdNext)
+                {
+                    if (!isBlockVisited(block))
+                    {
+                        addToBlockSequenceWorkList(readySet, block);
+                        BlockSetOps::AddElemD(compiler, readySet, block->bbNum);
+                    }
+                }
+
+                for (BasicBlock* block = compiler->fgFirstBB; block; block = block->bbNext)
+                {
+                    if (!isBlockVisited(block))
+                    {
+                        addToBlockSequenceWorkList(readySet, block);
+                        BlockSetOps::AddElemD(compiler, readySet, block->bbNum);
+                    }
+                }
+                verifiedAllBBs = true;
+            }
+            else
+            {
+                break;
+            }
+        }
+    }
+    blockSequencingDone = true;
+
+#ifdef DEBUG
+    // Make sure that we've visited all the blocks.
+    for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
+    {
+        assert(isBlockVisited(block));
+    }
+
+    JITDUMP("LSRA Block Sequence: ");
+    int i = 1;
+    for (BasicBlock *block = startBlockSequence(); block != nullptr; ++i, block = moveToNextBlock())
+    {
+        JITDUMP("BB%02u", block->bbNum);
+
+        if (block->isMaxBBWeight())
+        {
+            JITDUMP("(MAX) ");
+        }
+        else
+        {
+            JITDUMP("(%6s) ", refCntWtd2str(block->getBBWeight(compiler)));
+        }
+
+        if (i % 10 == 0)
+        {
+            JITDUMP("\n                     ");
+        }
+    }
+    JITDUMP("\n\n");
+#endif
+}
+
+//------------------------------------------------------------------------
+// compareBlocksForSequencing: Compare two basic blocks for sequencing order.
+//
+// Arguments:
+//    block1            - the first block for comparison
+//    block2            - the second block for comparison
+//    useBlockWeights   - whether to use block weights for comparison
+//
+// Return Value:
+//    -1 if block1 is preferred.
+//     0 if the blocks are equivalent.
+//     1 if block2 is preferred.
+//
+// Notes:
+//    See addToBlockSequenceWorkList.
+int LinearScan::compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block2, bool useBlockWeights)
+{
+    if (useBlockWeights)
+    {
+        unsigned weight1 = block1->getBBWeight(compiler);
+        unsigned weight2 = block2->getBBWeight(compiler);
+
+        if (weight1 > weight2)
+        {
+            return -1;
+        }
+        else if (weight1 < weight2)
+        {
+            return 1;
+        }
+    }
+
+    // If weights are the same prefer LOWER bbnum
+    if (block1->bbNum < block2->bbNum)
+    {
+        return -1;
+    }
+    else if (block1->bbNum == block2->bbNum)
+    {
+        return 0;
+    }
+    else
+    {
+        return 1;
+    }
+}
+
+//------------------------------------------------------------------------
+// addToBlockSequenceWorkList: Add a BasicBlock to the work list for sequencing.
+//
+// Arguments:
+//    sequencedBlockSet - the set of blocks that are already sequenced
+//    block             - the new block to be added
+//
+// Return Value:
+//    None.
+//
+// Notes:
+//    The first block in the list will be the next one to be sequenced, as soon
+//    as we encounter a block whose successors have all been sequenced, in pred-first
+//    order, or the very next block if we are traversing in random order (once implemented).
+//    This method uses a comparison method to determine the order in which to place
+//    the blocks in the list.  This method queries whether all predecessors of the
+//    block are sequenced at the time it is added to the list and if so uses block weights
+//    for inserting the block.  A block is never inserted ahead of its predecessors.
+//    A block at the time of insertion may not have all its predecessors sequenced, in
+//    which case it will be sequenced based on its block number. Once a block is inserted,
+//    its priority\order will not be changed later once its remaining predecessors are
+//    sequenced.  This would mean that work list may not be sorted entirely based on
+//    block weights alone.
+//
+//    Note also that, when random traversal order is implemented, this method
+//    should insert the blocks into the list in random order, so that we can always
+//    simply select the first block in the list.
+void LinearScan::addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block)
+{
+    // The block that is being added is not already sequenced
+    assert(!BlockSetOps::IsMember(compiler, sequencedBlockSet, block->bbNum));
+
+    // Get predSet of block
+    BlockSet  BLOCKSET_INIT_NOCOPY(predSet, BlockSetOps::MakeEmpty(compiler));
+    flowList* pred;
+    for (pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
+    {
+        BlockSetOps::AddElemD(compiler, predSet, pred->flBlock->bbNum);
+    }
+
+    // If either a rarely run block or all its preds are already sequenced, use block's weight to sequence
+    bool useBlockWeight = block->isRunRarely() || BlockSetOps::IsSubset(compiler, sequencedBlockSet, predSet);
+
+    BasicBlockList* prevNode = nullptr;
+    BasicBlockList* nextNode = blockSequenceWorkList;
+
+    while (nextNode != nullptr)
+    {
+        int seqResult;
+
+        if (nextNode->block->isRunRarely())
+        {
+            // If the block that is yet to be sequenced is a rarely run block, always use block weights for sequencing
+            seqResult = compareBlocksForSequencing(nextNode->block, block, true);
+        }
+        else if (BlockSetOps::IsMember(compiler, predSet, nextNode->block->bbNum))
+        {
+            // always prefer unsequenced pred blocks
+            seqResult = -1;
+        }
+        else
+        {
+            seqResult = compareBlocksForSequencing(nextNode->block, block, useBlockWeight);
+        }
+
+        if (seqResult > 0)
+        {
+            break;
+        }
+
+        prevNode = nextNode;
+        nextNode = nextNode->next;
+    }
+
+    BasicBlockList* newListNode = new (compiler, CMK_LSRA) BasicBlockList(block, nextNode);
+    if (prevNode == nullptr)
+    {
+        blockSequenceWorkList = newListNode;
+    }
+    else
+    {
+        prevNode->next = newListNode;
+    }
+}
+
+void LinearScan::removeFromBlockSequenceWorkList(BasicBlockList* listNode, BasicBlockList* prevNode)
+{
+    if (listNode == blockSequenceWorkList)
+    {
+        assert(prevNode == nullptr);
+        blockSequenceWorkList = listNode->next;
+    }
+    else
+    {
+        assert(prevNode != nullptr && prevNode->next == listNode);
+        prevNode->next = listNode->next;
+    }
+    // TODO-Cleanup: consider merging Compiler::BlockListNode and BasicBlockList
+    // compiler->FreeBlockListNode(listNode);
+}
+
+// Initialize the block order for allocation (called each time a new traversal begins).
+BasicBlock* LinearScan::startBlockSequence()
+{
+    if (!blockSequencingDone)
+    {
+        setBlockSequence();
+    }
+    BasicBlock* curBB = compiler->fgFirstBB;
+    curBBSeqNum       = 0;
+    curBBNum          = curBB->bbNum;
+    clearVisitedBlocks();
+    assert(blockSequence[0] == compiler->fgFirstBB);
+    markBlockVisited(curBB);
+    return curBB;
+}
+
+//------------------------------------------------------------------------
+// moveToNextBlock: Move to the next block in order for allocation or resolution.
+//
+// Arguments:
+//    None
+//
+// Return Value:
+//    The next block.
+//
+// Notes:
+//    This method is used when the next block is actually going to be handled.
+//    It changes curBBNum.
+
+BasicBlock* LinearScan::moveToNextBlock()
+{
+    BasicBlock* nextBlock = getNextBlock();
+    curBBSeqNum++;
+    if (nextBlock != nullptr)
+    {
+        curBBNum = nextBlock->bbNum;
+    }
+    return nextBlock;
+}
+
+//------------------------------------------------------------------------
+// getNextBlock: Get the next block in order for allocation or resolution.
+//
+// Arguments:
+//    None
+//
+// Return Value:
+//    The next block.
+//
+// Notes:
+//    This method does not actually change the current block - it is used simply
+//    to determine which block will be next.
+
+BasicBlock* LinearScan::getNextBlock()
+{
+    assert(blockSequencingDone);
+    unsigned int nextBBSeqNum = curBBSeqNum + 1;
+    if (nextBBSeqNum < bbSeqCount)
+    {
+        return blockSequence[nextBBSeqNum];
+    }
+    return nullptr;
+}
+
+//------------------------------------------------------------------------
+// doLinearScan: The main method for register allocation.
+//
+// Arguments:
+//    None
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    Lowering must have set the NodeInfo (gtLsraInfo) on each node to communicate
+//    the register requirements.
+
+void LinearScan::doLinearScan()
+{
+#ifdef DEBUG
+    if (VERBOSE)
+    {
+        printf("*************** In doLinearScan\n");
+        printf("Trees before linear scan register allocator (LSRA)\n");
+        compiler->fgDispBasicBlocks(true);
+    }
+#endif // DEBUG
+
+    splitBBNumToTargetBBNumMap = nullptr;
+
+    // This is complicated by the fact that physical registers have refs associated
+    // with locations where they are killed (e.g. calls), but we don't want to
+    // count these as being touched.
+
+    compiler->codeGen->regSet.rsClearRegsModified();
+
+    // Figure out if we're going to use an RSP frame or an RBP frame. We need to do this
+    // before building the intervals and ref positions, because those objects will embed
+    // RBP in various register masks (like preferences) if RBP is allowed to be allocated.
+    setFrameType();
+
+    initMaxSpill();
+    buildIntervals();
+    DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_REFPOS));
+    compiler->EndPhase(PHASE_LINEAR_SCAN_BUILD);
+
+    DBEXEC(VERBOSE, lsraDumpIntervals("after buildIntervals"));
+
+    BlockSetOps::ClearD(compiler, bbVisitedSet);
+    initVarRegMaps();
+    allocateRegisters();
+    compiler->EndPhase(PHASE_LINEAR_SCAN_ALLOC);
+    resolveRegisters();
+    compiler->EndPhase(PHASE_LINEAR_SCAN_RESOLVE);
+
+    DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_POST));
+
+    compiler->compLSRADone = true;
+}
+
+//------------------------------------------------------------------------
+// recordVarLocationsAtStartOfBB: Update live-in LclVarDscs with the appropriate
+//    register location at the start of a block, during codegen.
+//
+// Arguments:
+//    bb - the block for which code is about to be generated.
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    CodeGen will take care of updating the reg masks and the current var liveness,
+//    after calling this method.
+//    This is because we need to kill off the dead registers before setting the newly live ones.
+
+void LinearScan::recordVarLocationsAtStartOfBB(BasicBlock* bb)
+{
+    JITDUMP("Recording Var Locations at start of BB%02u\n", bb->bbNum);
+    VarToRegMap map   = getInVarToRegMap(bb->bbNum);
+    unsigned    count = 0;
+
+    VARSET_ITER_INIT(compiler, iter, bb->bbLiveIn, varIndex);
+    while (iter.NextElem(compiler, &varIndex))
+    {
+        unsigned   varNum = compiler->lvaTrackedToVarNum[varIndex];
+        LclVarDsc* varDsc = &(compiler->lvaTable[varNum]);
+        regNumber  regNum = getVarReg(map, varNum);
+
+        regNumber oldRegNum = varDsc->lvRegNum;
+        regNumber newRegNum = regNum;
+
+        if (oldRegNum != newRegNum)
+        {
+            JITDUMP("  V%02u(%s->%s)", varNum, compiler->compRegVarName(oldRegNum),
+                    compiler->compRegVarName(newRegNum));
+            varDsc->lvRegNum = newRegNum;
+            count++;
+        }
+        else if (newRegNum != REG_STK)
+        {
+            JITDUMP("  V%02u(%s)", varNum, compiler->compRegVarName(newRegNum));
+            count++;
+        }
+    }
+
+    if (count == 0)
+    {
+        JITDUMP("  <none>\n");
+    }
+
+    JITDUMP("\n");
+}
+
+void Interval::setLocalNumber(unsigned lclNum, LinearScan* linScan)
+{
+    linScan->localVarIntervals[lclNum] = this;
+
+    assert(linScan->getIntervalForLocalVar(lclNum) == this);
+    this->isLocalVar = true;
+    this->varNum     = lclNum;
+}
+
+// identify the candidates which we are not going to enregister due to
+// being used in EH in a way we don't want to deal with
+// this logic cloned from fgInterBlockLocalVarLiveness
+void LinearScan::identifyCandidatesExceptionDataflow()
+{
+    VARSET_TP   VARSET_INIT_NOCOPY(exceptVars, VarSetOps::MakeEmpty(compiler));
+    VARSET_TP   VARSET_INIT_NOCOPY(filterVars, VarSetOps::MakeEmpty(compiler));
+    VARSET_TP   VARSET_INIT_NOCOPY(finallyVars, VarSetOps::MakeEmpty(compiler));
+    BasicBlock* block;
+
+    foreach_block(compiler, block)
+    {
+        if (block->bbCatchTyp != BBCT_NONE)
+        {
+            // live on entry to handler
+            VarSetOps::UnionD(compiler, exceptVars, block->bbLiveIn);
+        }
+
+        if (block->bbJumpKind == BBJ_EHFILTERRET)
+        {
+            // live on exit from filter
+            VarSetOps::UnionD(compiler, filterVars, block->bbLiveOut);
+        }
+        else if (block->bbJumpKind == BBJ_EHFINALLYRET)
+        {
+            // live on exit from finally
+            VarSetOps::UnionD(compiler, finallyVars, block->bbLiveOut);
+        }
+#if FEATURE_EH_FUNCLETS
+        // Funclets are called and returned from, as such we can only count on the frame
+        // pointer being restored, and thus everything live in or live out must be on the
+        // stack
+        if (block->bbFlags & BBF_FUNCLET_BEG)
+        {
+            VarSetOps::UnionD(compiler, exceptVars, block->bbLiveIn);
+        }
+        if ((block->bbJumpKind == BBJ_EHFINALLYRET) || (block->bbJumpKind == BBJ_EHFILTERRET) ||
+            (block->bbJumpKind == BBJ_EHCATCHRET))
+        {
+            VarSetOps::UnionD(compiler, exceptVars, block->bbLiveOut);
+        }
+#endif // FEATURE_EH_FUNCLETS
+    }
+
+    // slam them all together (there was really no need to use more than 2 bitvectors here)
+    VarSetOps::UnionD(compiler, exceptVars, filterVars);
+    VarSetOps::UnionD(compiler, exceptVars, finallyVars);
+
+    /* Mark all pointer variables live on exit from a 'finally'
+        block as either volatile for non-GC ref types or as
+        'explicitly initialized' (volatile and must-init) for GC-ref types */
+
+    VARSET_ITER_INIT(compiler, iter, exceptVars, varIndex);
+    while (iter.NextElem(compiler, &varIndex))
+    {
+        unsigned   varNum = compiler->lvaTrackedToVarNum[varIndex];
+        LclVarDsc* varDsc = compiler->lvaTable + varNum;
+
+        compiler->lvaSetVarDoNotEnregister(varNum DEBUGARG(Compiler::DNER_LiveInOutOfHandler));
+
+        if (varTypeIsGC(varDsc))
+        {
+            if (VarSetOps::IsMember(compiler, finallyVars, varIndex) && !varDsc->lvIsParam)
+            {
+                varDsc->lvMustInit = true;
+            }
+        }
+    }
+}
+
+bool LinearScan::isRegCandidate(LclVarDsc* varDsc)
+{
+    // Check to see if opt settings permit register variables
+    if ((compiler->opts.compFlags & CLFLG_REGVAR) == 0)
+    {
+        return false;
+    }
+
+    // If we have JMP, reg args must be put on the stack
+
+    if (compiler->compJmpOpUsed && varDsc->lvIsRegArg)
+    {
+        return false;
+    }
+
+    if (!varDsc->lvTracked)
+    {
+        return false;
+    }
+
+    // Don't allocate registers for dependently promoted struct fields
+    if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+    {
+        return false;
+    }
+    return true;
+}
+
+// Identify locals & compiler temps that are register candidates
+// TODO-Cleanup: This was cloned from Compiler::lvaSortByRefCount() in lclvars.cpp in order
+// to avoid perturbation, but should be merged.
+
+void LinearScan::identifyCandidates()
+{
+    if (compiler->lvaCount == 0)
+    {
+        return;
+    }
+
+    if (compiler->compHndBBtabCount > 0)
+    {
+        identifyCandidatesExceptionDataflow();
+    }
+
+    // initialize mapping from local to interval
+    localVarIntervals = new (compiler, CMK_LSRA) Interval*[compiler->lvaCount];
+
+    unsigned   lclNum;
+    LclVarDsc* varDsc;
+
+    // While we build intervals for the candidate lclVars, we will determine the floating point
+    // lclVars, if any, to consider for callee-save register preferencing.
+    // We maintain two sets of FP vars - those that meet the first threshold of weighted ref Count,
+    // and those that meet the second.
+    // The first threshold is used for methods that are heuristically deemed either to have light
+    // fp usage, or other factors that encourage conservative use of callee-save registers, such
+    // as multiple exits (where there might be an early exit that woudl be excessively penalized by
+    // lots of prolog/epilog saves & restores).
+    // The second threshold is used where there are factors deemed to make it more likely that fp
+    // fp callee save registers will be needed, such as loops or many fp vars.
+    // We keep two sets of vars, since we collect some of the information to determine which set to
+    // use as we iterate over the vars.
+    // When we are generating AVX code on non-Unix (FEATURE_PARTIAL_SIMD_CALLEE_SAVE), we maintain an
+    // additional set of LargeVectorType vars, and there is a separate threshold defined for those.
+    // It is assumed that if we encounter these, that we should consider this a "high use" scenario,
+    // so we don't maintain two sets of these vars.
+    // This is defined as thresholdLargeVectorRefCntWtd, as we are likely to use the same mechanism
+    // for vectors on Arm64, though the actual value may differ.
+
+    VarSetOps::AssignNoCopy(compiler, fpCalleeSaveCandidateVars, VarSetOps::MakeEmpty(compiler));
+    VARSET_TP    VARSET_INIT_NOCOPY(fpMaybeCandidateVars, VarSetOps::MakeEmpty(compiler));
+    unsigned int floatVarCount        = 0;
+    unsigned int thresholdFPRefCntWtd = 4 * BB_UNITY_WEIGHT;
+    unsigned int maybeFPRefCntWtd     = 2 * BB_UNITY_WEIGHT;
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+    VarSetOps::AssignNoCopy(compiler, largeVectorVars, VarSetOps::MakeEmpty(compiler));
+    VarSetOps::AssignNoCopy(compiler, largeVectorCalleeSaveCandidateVars, VarSetOps::MakeEmpty(compiler));
+    unsigned int largeVectorVarCount           = 0;
+    unsigned int thresholdLargeVectorRefCntWtd = 4 * BB_UNITY_WEIGHT;
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+
+    for (lclNum = 0, varDsc = compiler->lvaTable; lclNum < compiler->lvaCount; lclNum++, varDsc++)
+    {
+        // Assign intervals to all the variables - this makes it easier to map
+        // them back
+        var_types intervalType = (var_types)varDsc->lvType;
+        Interval* newInt       = newInterval(intervalType);
+
+        newInt->setLocalNumber(lclNum, this);
+        if (varDsc->lvIsStructField)
+        {
+            newInt->isStructField = true;
+        }
+
+        // Initialize all variables to REG_STK
+        varDsc->lvRegNum = REG_STK;
+#ifndef _TARGET_64BIT_
+        varDsc->lvOtherReg = REG_STK;
+#endif // _TARGET_64BIT_
+
+#if !defined(_TARGET_64BIT_)
+        if (intervalType == TYP_LONG)
+        {
+            // Long variables should not be register candidates.
+            // Lowering will have split any candidate lclVars into lo/hi vars.
+            varDsc->lvLRACandidate = 0;
+            continue;
+        }
+#endif // !defined(_TARGET_64BIT)
+
+        /* Track all locals that can be enregistered */
+
+        varDsc->lvLRACandidate = 1;
+
+        if (!isRegCandidate(varDsc))
+        {
+            varDsc->lvLRACandidate = 0;
+            continue;
+        }
+
+        // Start with lvRegister as false - set it true only if the variable gets
+        // the same register assignment throughout
+        varDsc->lvRegister = false;
+
+        /* If the ref count is zero */
+        if (varDsc->lvRefCnt == 0)
+        {
+            /* Zero ref count, make this untracked */
+            varDsc->lvRefCntWtd    = 0;
+            varDsc->lvLRACandidate = 0;
+        }
+
+        // Variables that are address-exposed are never enregistered, or tracked.
+        // A struct may be promoted, and a struct that fits in a register may be fully enregistered.
+        // Pinned variables may not be tracked (a condition of the GCInfo representation)
+        // or enregistered, on x86 -- it is believed that we can enregister pinned (more properly, "pinning")
+        // references when using the general GC encoding.
+
+        if (varDsc->lvAddrExposed || !varTypeIsEnregisterableStruct(varDsc))
+        {
+            varDsc->lvLRACandidate = 0;
+#ifdef DEBUG
+            Compiler::DoNotEnregisterReason dner = Compiler::DNER_AddrExposed;
+            if (!varDsc->lvAddrExposed)
+            {
+                dner = Compiler::DNER_IsStruct;
+            }
+#endif // DEBUG
+            compiler->lvaSetVarDoNotEnregister(lclNum DEBUGARG(dner));
+        }
+        else if (varDsc->lvPinned)
+        {
+            varDsc->lvTracked = 0;
+#ifdef JIT32_GCENCODER
+            compiler->lvaSetVarDoNotEnregister(lclNum DEBUGARG(Compiler::DNER_PinningRef));
+#endif // JIT32_GCENCODER
+        }
+
+        //  Are we not optimizing and we have exception handlers?
+        //   if so mark all args and locals as volatile, so that they
+        //   won't ever get enregistered.
+        //
+        if (compiler->opts.MinOpts() && compiler->compHndBBtabCount > 0)
+        {
+            compiler->lvaSetVarDoNotEnregister(lclNum DEBUGARG(Compiler::DNER_LiveInOutOfHandler));
+            varDsc->lvLRACandidate = 0;
+            continue;
+        }
+
+        if (varDsc->lvDoNotEnregister)
+        {
+            varDsc->lvLRACandidate = 0;
+            continue;
+        }
+
+        var_types type = genActualType(varDsc->TypeGet());
+
+        switch (type)
+        {
+#if CPU_HAS_FP_SUPPORT
+            case TYP_FLOAT:
+            case TYP_DOUBLE:
+                if (compiler->opts.compDbgCode)
+                {
+                    varDsc->lvLRACandidate = 0;
+                }
+                break;
+#endif // CPU_HAS_FP_SUPPORT
+
+            case TYP_INT:
+            case TYP_LONG:
+            case TYP_REF:
+            case TYP_BYREF:
+                break;
+
+#ifdef FEATURE_SIMD
+            case TYP_SIMD12:
+            case TYP_SIMD16:
+            case TYP_SIMD32:
+                if (varDsc->lvPromoted)
+                {
+                    varDsc->lvLRACandidate = 0;
+                }
+                break;
+            // TODO-1stClassStructs: Move TYP_SIMD8 up with the other SIMD types, after handling the param issue
+            // (passing & returning as TYP_LONG).
+            case TYP_SIMD8:
+#endif // FEATURE_SIMD
+
+            case TYP_STRUCT:
+            {
+                varDsc->lvLRACandidate = 0;
+            }
+            break;
+
+            case TYP_UNDEF:
+            case TYP_UNKNOWN:
+                noway_assert(!"lvType not set correctly");
+                varDsc->lvType = TYP_INT;
+
+                __fallthrough;
+
+            default:
+                varDsc->lvLRACandidate = 0;
+        }
+
+        // we will set this later when we have determined liveness
+        if (varDsc->lvLRACandidate)
+        {
+            varDsc->lvMustInit = false;
+        }
+
+        // We maintain two sets of FP vars - those that meet the first threshold of weighted ref Count,
+        // and those that meet the second (see the definitions of thresholdFPRefCntWtd and maybeFPRefCntWtd
+        // above).
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+        // Additionally, when we are generating AVX on non-UNIX amd64, we keep a separate set of the LargeVectorType
+        // vars.
+        if (varDsc->lvType == LargeVectorType)
+        {
+            largeVectorVarCount++;
+            VarSetOps::AddElemD(compiler, largeVectorVars, varDsc->lvVarIndex);
+            unsigned refCntWtd = varDsc->lvRefCntWtd;
+            if (refCntWtd >= thresholdLargeVectorRefCntWtd)
+            {
+                VarSetOps::AddElemD(compiler, largeVectorCalleeSaveCandidateVars, varDsc->lvVarIndex);
+            }
+        }
+        else
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+            if (regType(newInt->registerType) == FloatRegisterType)
+        {
+            floatVarCount++;
+            unsigned refCntWtd = varDsc->lvRefCntWtd;
+            if (varDsc->lvIsRegArg)
+            {
+                // Don't count the initial reference for register params.  In those cases,
+                // using a callee-save causes an extra copy.
+                refCntWtd -= BB_UNITY_WEIGHT;
+            }
+            if (refCntWtd >= thresholdFPRefCntWtd)
+            {
+                VarSetOps::AddElemD(compiler, fpCalleeSaveCandidateVars, varDsc->lvVarIndex);
+            }
+            else if (refCntWtd >= maybeFPRefCntWtd)
+            {
+                VarSetOps::AddElemD(compiler, fpMaybeCandidateVars, varDsc->lvVarIndex);
+            }
+        }
+    }
+
+    // The factors we consider to determine which set of fp vars to use as candidates for callee save
+    // registers current include the number of fp vars, whether there are loops, and whether there are
+    // multiple exits.  These have been selected somewhat empirically, but there is probably room for
+    // more tuning.
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+    if (VERBOSE)
+    {
+        printf("\nFP callee save candidate vars: ");
+        if (!VarSetOps::IsEmpty(compiler, fpCalleeSaveCandidateVars))
+        {
+            dumpConvertedVarSet(compiler, fpCalleeSaveCandidateVars);
+            printf("\n");
+        }
+        else
+        {
+            printf("None\n\n");
+        }
+    }
+#endif
+
+    JITDUMP("floatVarCount = %d; hasLoops = %d, singleExit = %d\n", floatVarCount, compiler->fgHasLoops,
+            (compiler->fgReturnBlocks == nullptr || compiler->fgReturnBlocks->next == nullptr));
+
+    // Determine whether to use the 2nd, more aggressive, threshold for fp callee saves.
+    if (floatVarCount > 6 && compiler->fgHasLoops &&
+        (compiler->fgReturnBlocks == nullptr || compiler->fgReturnBlocks->next == nullptr))
+    {
+#ifdef DEBUG
+        if (VERBOSE)
+        {
+            printf("Adding additional fp callee save candidates: \n");
+            if (!VarSetOps::IsEmpty(compiler, fpMaybeCandidateVars))
+            {
+                dumpConvertedVarSet(compiler, fpMaybeCandidateVars);
+                printf("\n");
+            }
+            else
+            {
+                printf("None\n\n");
+            }
+        }
+#endif
+        VarSetOps::UnionD(compiler, fpCalleeSaveCandidateVars, fpMaybeCandidateVars);
+    }
+
+#ifdef _TARGET_ARM_
+#ifdef DEBUG
+    if (VERBOSE)
+    {
+        // Frame layout is only pre-computed for ARM
+        printf("\nlvaTable after IdentifyCandidates\n");
+        compiler->lvaTableDump();
+    }
+#endif // DEBUG
+#endif // _TARGET_ARM_
+}
+
+// TODO-Throughput: This mapping can surely be more efficiently done
+void LinearScan::initVarRegMaps()
+{
+    assert(compiler->lvaTrackedFixed); // We should have already set this to prevent us from adding any new tracked
+                                       // variables.
+
+    // The compiler memory allocator requires that the allocation be an
+    // even multiple of int-sized objects
+    unsigned int varCount = compiler->lvaTrackedCount;
+    regMapCount           = (unsigned int)roundUp(varCount, sizeof(int));
+
+    // Not sure why blocks aren't numbered from zero, but they don't appear to be.
+    // So, if we want to index by bbNum we have to know the maximum value.
+    unsigned int bbCount = compiler->fgBBNumMax + 1;
+
+    inVarToRegMaps  = new (compiler, CMK_LSRA) regNumber*[bbCount];
+    outVarToRegMaps = new (compiler, CMK_LSRA) regNumber*[bbCount];
+
+    if (varCount > 0)
+    {
+        // This VarToRegMap is used during the resolution of critical edges.
+        sharedCriticalVarToRegMap = new (compiler, CMK_LSRA) regNumber[regMapCount];
+
+        for (unsigned int i = 0; i < bbCount; i++)
+        {
+            regNumber* inVarToRegMap  = new (compiler, CMK_LSRA) regNumber[regMapCount];
+            regNumber* outVarToRegMap = new (compiler, CMK_LSRA) regNumber[regMapCount];
+
+            for (unsigned int j = 0; j < regMapCount; j++)
+            {
+                inVarToRegMap[j]  = REG_STK;
+                outVarToRegMap[j] = REG_STK;
+            }
+            inVarToRegMaps[i]  = inVarToRegMap;
+            outVarToRegMaps[i] = outVarToRegMap;
+        }
+    }
+    else
+    {
+        sharedCriticalVarToRegMap = nullptr;
+        for (unsigned int i = 0; i < bbCount; i++)
+        {
+            inVarToRegMaps[i]  = nullptr;
+            outVarToRegMaps[i] = nullptr;
+        }
+    }
+}
+
+void LinearScan::setInVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg)
+{
+    assert(reg < UCHAR_MAX && varNum < compiler->lvaCount);
+    inVarToRegMaps[bbNum][compiler->lvaTable[varNum].lvVarIndex] = reg;
+}
+
+void LinearScan::setOutVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg)
+{
+    assert(reg < UCHAR_MAX && varNum < compiler->lvaCount);
+    outVarToRegMaps[bbNum][compiler->lvaTable[varNum].lvVarIndex] = reg;
+}
+
+LinearScan::SplitEdgeInfo LinearScan::getSplitEdgeInfo(unsigned int bbNum)
+{
+    SplitEdgeInfo splitEdgeInfo;
+    assert(bbNum <= compiler->fgBBNumMax);
+    assert(bbNum > bbNumMaxBeforeResolution);
+    assert(splitBBNumToTargetBBNumMap != nullptr);
+    splitBBNumToTargetBBNumMap->Lookup(bbNum, &splitEdgeInfo);
+    assert(splitEdgeInfo.toBBNum <= bbNumMaxBeforeResolution);
+    assert(splitEdgeInfo.fromBBNum <= bbNumMaxBeforeResolution);
+    return splitEdgeInfo;
+}
+
+VarToRegMap LinearScan::getInVarToRegMap(unsigned int bbNum)
+{
+    assert(bbNum <= compiler->fgBBNumMax);
+    // For the blocks inserted to split critical edges, the inVarToRegMap is
+    // equal to the outVarToRegMap at the "from" block.
+    if (bbNum > bbNumMaxBeforeResolution)
+    {
+        SplitEdgeInfo splitEdgeInfo = getSplitEdgeInfo(bbNum);
+        unsigned      fromBBNum     = splitEdgeInfo.fromBBNum;
+        if (fromBBNum == 0)
+        {
+            assert(splitEdgeInfo.toBBNum != 0);
+            return inVarToRegMaps[splitEdgeInfo.toBBNum];
+        }
+        else
+        {
+            return outVarToRegMaps[fromBBNum];
+        }
+    }
+
+    return inVarToRegMaps[bbNum];
+}
+
+VarToRegMap LinearScan::getOutVarToRegMap(unsigned int bbNum)
+{
+    assert(bbNum <= compiler->fgBBNumMax);
+    // For the blocks inserted to split critical edges, the outVarToRegMap is
+    // equal to the inVarToRegMap at the target.
+    if (bbNum > bbNumMaxBeforeResolution)
+    {
+        // If this is an empty block, its in and out maps are both the same.
+        // We identify this case by setting fromBBNum or toBBNum to 0, and using only the other.
+        SplitEdgeInfo splitEdgeInfo = getSplitEdgeInfo(bbNum);
+        unsigned      toBBNum       = splitEdgeInfo.toBBNum;
+        if (toBBNum == 0)
+        {
+            assert(splitEdgeInfo.fromBBNum != 0);
+            return outVarToRegMaps[splitEdgeInfo.fromBBNum];
+        }
+        else
+        {
+            return inVarToRegMaps[toBBNum];
+        }
+    }
+    return outVarToRegMaps[bbNum];
+}
+
+regNumber LinearScan::getVarReg(VarToRegMap bbVarToRegMap, unsigned int varNum)
+{
+    assert(compiler->lvaTable[varNum].lvTracked);
+    return bbVarToRegMap[compiler->lvaTable[varNum].lvVarIndex];
+}
+
+// Initialize the incoming VarToRegMap to the given map values (generally a predecessor of
+// the block)
+VarToRegMap LinearScan::setInVarToRegMap(unsigned int bbNum, VarToRegMap srcVarToRegMap)
+{
+    VarToRegMap inVarToRegMap = inVarToRegMaps[bbNum];
+    memcpy(inVarToRegMap, srcVarToRegMap, (regMapCount * sizeof(regNumber)));
+    return inVarToRegMap;
+}
+
+// find the last node in the tree in execution order
+// TODO-Throughput: this is inefficient!
+GenTree* lastNodeInTree(GenTree* tree)
+{
+    // There is no gtprev on the top level tree node so
+    // apparently the way to walk a tree backwards is to walk
+    // it forward, find the last node, and walk back from there.
+
+    GenTree* last = nullptr;
+    if (tree->OperGet() == GT_STMT)
+    {
+        GenTree* statement = tree;
+
+        foreach_treenode_execution_order(tree, statement)
+        {
+            last = tree;
+        }
+        return last;
+    }
+    else
+    {
+        while (tree)
+        {
+            last = tree;
+            tree = tree->gtNext;
+        }
+        return last;
+    }
+}
+
+// given a tree node
+RefType refTypeForLocalRefNode(GenTree* node)
+{
+    assert(node->IsLocal());
+
+    // We don't support updates
+    assert((node->gtFlags & GTF_VAR_USEASG) == 0);
+
+    if (node->gtFlags & GTF_VAR_DEF)
+    {
+        return RefTypeDef;
+    }
+    else
+    {
+        return RefTypeUse;
+    }
+}
+
+// This function sets RefPosition last uses by walking the RefPositions, instead of walking the
+// tree nodes in execution order (as was done in a previous version).
+// This is because the execution order isn't strictly correct, specifically for
+// references to local variables that occur in arg lists.
+//
+// TODO-Throughput: This function should eventually be eliminated, as we should be able to rely on last uses
+// being set by dataflow analysis.  It is necessary to do it this way only because the execution
+// order wasn't strictly correct.
+
+void LinearScan::setLastUses(BasicBlock* block)
+{
+#ifdef DEBUG
+    if (VERBOSE)
+    {
+        JITDUMP("\n\nCALCULATING LAST USES for block %u, liveout=", block->bbNum);
+        dumpConvertedVarSet(compiler, block->bbLiveOut);
+        JITDUMP("\n==============================\n");
+    }
+#endif // DEBUG
+
+    unsigned keepAliveVarNum = BAD_VAR_NUM;
+    if (compiler->lvaKeepAliveAndReportThis())
+    {
+        keepAliveVarNum = compiler->info.compThisArg;
+        assert(compiler->info.compIsStatic == false);
+    }
+
+    // find which uses are lastUses
+
+    // Work backwards starting with live out.
+    // 'temp' is updated to include any exposed use (including those in this
+    // block that we've already seen).  When we encounter a use, if it's
+    // not in that set, then it's a last use.
+
+    VARSET_TP VARSET_INIT(compiler, temp, block->bbLiveOut);
+
+    auto currentRefPosition = refPositions.rbegin();
+
+    while (currentRefPosition->refType != RefTypeBB)
+    {
+        // We should never see ParamDefs or ZeroInits within a basic block.
+        assert(currentRefPosition->refType != RefTypeParamDef && currentRefPosition->refType != RefTypeZeroInit);
+        if (currentRefPosition->isIntervalRef() && currentRefPosition->getInterval()->isLocalVar)
+        {
+            unsigned varNum   = currentRefPosition->getInterval()->varNum;
+            unsigned varIndex = currentRefPosition->getInterval()->getVarIndex(compiler);
+            // We should always have a tree node for a localVar, except for the "special" RefPositions.
+            GenTreePtr tree = currentRefPosition->treeNode;
+            assert(tree != nullptr || currentRefPosition->refType == RefTypeExpUse ||
+                   currentRefPosition->refType == RefTypeDummyDef);
+            if (!VarSetOps::IsMember(compiler, temp, varIndex) && varNum != keepAliveVarNum)
+            {
+                // There was no exposed use, so this is a
+                // "last use" (and we mark it thus even if it's a def)
+
+                if (tree != nullptr)
+                {
+                    tree->gtFlags |= GTF_VAR_DEATH;
+                }
+                LsraLocation loc = currentRefPosition->nodeLocation;
+#ifdef DEBUG
+                if (getLsraExtendLifeTimes())
+                {
+                    JITDUMP("last use of V%02u @%u (not marked as last use for LSRA due to extendLifetimes stress "
+                            "option)\n",
+                            compiler->lvaTrackedToVarNum[varIndex], loc);
+                }
+                else
+#endif // DEBUG
+                {
+                    JITDUMP("last use of V%02u @%u\n", compiler->lvaTrackedToVarNum[varIndex], loc);
+                    currentRefPosition->lastUse = true;
+                }
+                VarSetOps::AddElemD(compiler, temp, varIndex);
+            }
+            else
+            {
+                currentRefPosition->lastUse = false;
+                if (tree != nullptr)
+                {
+                    tree->gtFlags &= ~GTF_VAR_DEATH;
+                }
+            }
+
+            if (currentRefPosition->refType == RefTypeDef || currentRefPosition->refType == RefTypeDummyDef)
+            {
+                VarSetOps::RemoveElemD(compiler, temp, varIndex);
+            }
+        }
+        assert(currentRefPosition != refPositions.rend());
+        ++currentRefPosition;
+    }
+
+#ifdef DEBUG
+    VARSET_TP VARSET_INIT(compiler, temp2, block->bbLiveIn);
+    VarSetOps::DiffD(compiler, temp2, temp);
+    VarSetOps::DiffD(compiler, temp, block->bbLiveIn);
+    bool foundDiff = false;
+
+    {
+        VARSET_ITER_INIT(compiler, iter, temp, varIndex);
+        while (iter.NextElem(compiler, &varIndex))
+        {
+            unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+            if (compiler->lvaTable[varNum].lvLRACandidate)
+            {
+                JITDUMP("BB%02u: V%02u is computed live, but not in LiveIn set.\n", block->bbNum, varNum);
+                foundDiff = true;
+            }
+        }
+    }
+
+    {
+        VARSET_ITER_INIT(compiler, iter, temp2, varIndex);
+        while (iter.NextElem(compiler, &varIndex))
+        {
+            unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+            if (compiler->lvaTable[varNum].lvLRACandidate)
+            {
+                JITDUMP("BB%02u: V%02u is in LiveIn set, but not computed live.\n", block->bbNum, varNum);
+                foundDiff = true;
+            }
+        }
+    }
+
+    assert(!foundDiff);
+#endif // DEBUG
+}
+
+void LinearScan::addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse)
+{
+    for (regNumber reg = REG_FIRST; mask; reg = REG_NEXT(reg), mask >>= 1)
+    {
+        if (mask & 1)
+        {
+            // This assumes that these are all "special" RefTypes that
+            // don't need to be recorded on the tree (hence treeNode is nullptr)
+            RefPosition* pos = newRefPosition(reg, currentLoc, refType, nullptr,
+                                              genRegMask(reg)); // This MUST occupy the physical register (obviously)
+
+            if (isLastUse)
+            {
+                pos->lastUse = true;
+            }
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// getKillSetForNode:   Return the registers killed by the given tree node.
+//
+// Arguments:
+//    compiler   - the compiler context to use
+//    tree       - the tree for which the kill set is needed.
+//
+// Return Value:    a register mask of the registers killed
+//
+regMaskTP LinearScan::getKillSetForNode(GenTree* tree)
+{
+    regMaskTP killMask = RBM_NONE;
+    switch (tree->OperGet())
+    {
+#ifdef _TARGET_XARCH_
+        case GT_MUL:
+            // We use the 128-bit multiply when performing an overflow checking unsigned multiply
+            //
+            if (((tree->gtFlags & GTF_UNSIGNED) != 0) && tree->gtOverflowEx())
+            {
+                // Both RAX and RDX are killed by the operation
+                killMask = RBM_RAX | RBM_RDX;
+            }
+            break;
+
+        case GT_MULHI:
+            killMask = RBM_RAX | RBM_RDX;
+            break;
+
+        case GT_MOD:
+        case GT_DIV:
+        case GT_UMOD:
+        case GT_UDIV:
+            if (!varTypeIsFloating(tree->TypeGet()))
+            {
+                // RDX needs to be killed early, because it must not be used as a source register
+                // (unlike most cases, where the kill happens AFTER the uses).  So for this kill,
+                // we add the RefPosition at the tree loc (where the uses are located) instead of the
+                // usual kill location which is the same as the defs at tree loc+1.
+                // Note that we don't have to add interference for the live vars, because that
+                // will be done below, and is not sensitive to the precise location.
+                LsraLocation currentLoc = tree->gtLsraInfo.loc;
+                assert(currentLoc != 0);
+                addRefsForPhysRegMask(RBM_RDX, currentLoc, RefTypeKill, true);
+                // Both RAX and RDX are killed by the operation
+                killMask = RBM_RAX | RBM_RDX;
+            }
+            break;
+#endif // _TARGET_XARCH_
+
+        case GT_STORE_OBJ:
+            if (tree->OperIsCopyBlkOp())
+            {
+                assert(tree->AsObj()->gtGcPtrCount != 0);
+                killMask = compiler->compHelperCallKillSet(CORINFO_HELP_ASSIGN_BYREF);
+                break;
+            }
+            __fallthrough;
+
+        case GT_STORE_BLK:
+        case GT_STORE_DYN_BLK:
+        {
+            GenTreeBlk* blkNode   = tree->AsBlk();
+            bool        isCopyBlk = varTypeIsStruct(blkNode->Data());
+            switch (blkNode->gtBlkOpKind)
+            {
+                case GenTreeBlk::BlkOpKindHelper:
+                    if (isCopyBlk)
+                    {
+                        killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMCPY);
+                    }
+                    else
+                    {
+                        killMask = compiler->compHelperCallKillSet(CORINFO_HELP_MEMSET);
+                    }
+                    break;
+
+#ifdef _TARGET_XARCH_
+                case GenTreeBlk::BlkOpKindRepInstr:
+                    if (isCopyBlk)
+                    {
+                        // rep movs kills RCX, RDI and RSI
+                        killMask = RBM_RCX | RBM_RDI | RBM_RSI;
+                    }
+                    else
+                    {
+                        // rep stos kills RCX and RDI.
+                        // (Note that the Data() node, if not constant, will be assigned to
+                        // RCX, but it's find that this kills it, as the value is not available
+                        // after this node in any case.)
+                        killMask = RBM_RDI | RBM_RCX;
+                    }
+                    break;
+#else
+                case GenTreeBlk::BlkOpKindRepInstr:
+#endif
+                case GenTreeBlk::BlkOpKindUnroll:
+                case GenTreeBlk::BlkOpKindInvalid:
+                    // for these 'gtBlkOpKind' kinds, we leave 'killMask' = RBM_NONE
+                    break;
+            }
+        }
+        break;
+
+        case GT_LSH:
+        case GT_RSH:
+        case GT_RSZ:
+        case GT_ROL:
+        case GT_ROR:
+            if (tree->gtLsraInfo.isHelperCallWithKills)
+            {
+                killMask = RBM_CALLEE_TRASH;
+            }
+            break;
+        case GT_RETURNTRAP:
+            killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC);
+            break;
+        case GT_CALL:
+#ifdef _TARGET_X86_
+            if (compiler->compFloatingPointUsed)
+            {
+                if (tree->TypeGet() == TYP_DOUBLE)
+                {
+                    needDoubleTmpForFPCall = true;
+                }
+                else if (tree->TypeGet() == TYP_FLOAT)
+                {
+                    needFloatTmpForFPCall = true;
+                }
+            }
+            if (tree->IsHelperCall())
+            {
+                GenTreeCall*    call     = tree->AsCall();
+                CorInfoHelpFunc helpFunc = compiler->eeGetHelperNum(call->gtCallMethHnd);
+                killMask                 = compiler->compHelperCallKillSet(helpFunc);
+            }
+            else
+#endif // _TARGET_X86_
+            {
+                // if there is no FP used, we can ignore the FP kills
+                if (compiler->compFloatingPointUsed)
+                {
+                    killMask = RBM_CALLEE_TRASH;
+                }
+                else
+                {
+                    killMask = RBM_INT_CALLEE_TRASH;
+                }
+            }
+            break;
+        case GT_STOREIND:
+            if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree))
+            {
+                killMask = RBM_CALLEE_TRASH_NOGC;
+#if !NOGC_WRITE_BARRIERS && (defined(_TARGET_ARM_) || defined(_TARGET_AMD64_))
+                killMask |= (RBM_ARG_0 | RBM_ARG_1);
+#endif // !NOGC_WRITE_BARRIERS && (defined(_TARGET_ARM_) || defined(_TARGET_AMD64_))
+            }
+            break;
+
+#if defined(PROFILING_SUPPORTED) && defined(_TARGET_AMD64_)
+        // If this method requires profiler ELT hook then mark these nodes as killing
+        // callee trash registers (excluding RAX and XMM0). The reason for this is that
+        // profiler callback would trash these registers. See vm\amd64\asmhelpers.asm for
+        // more details.
+        case GT_RETURN:
+            if (compiler->compIsProfilerHookNeeded())
+            {
+                killMask = compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_LEAVE);
+            }
+            break;
+
+        case GT_PROF_HOOK:
+            if (compiler->compIsProfilerHookNeeded())
+            {
+                killMask = compiler->compHelperCallKillSet(CORINFO_HELP_PROF_FCN_TAILCALL);
+                ;
+            }
+            break;
+#endif // PROFILING_SUPPORTED && _TARGET_AMD64_
+
+        default:
+            // for all other 'tree->OperGet()' kinds, leave 'killMask' = RBM_NONE
+            break;
+    }
+    return killMask;
+}
+
+//------------------------------------------------------------------------
+// buildKillPositionsForNode:
+// Given some tree node add refpositions for all the registers this node kills
+//
+// Arguments:
+//    tree       - the tree for which kill positions should be generated
+//    currentLoc - the location at which the kills should be added
+//
+// Return Value:
+//    true       - kills were inserted
+//    false      - no kills were inserted
+//
+// Notes:
+//    The return value is needed because if we have any kills, we need to make sure that
+//    all defs are located AFTER the kills.  On the other hand, if there aren't kills,
+//    the multiple defs for a regPair are in different locations.
+//    If we generate any kills, we will mark all currentLiveVars as being preferenced
+//    to avoid the killed registers.  This is somewhat conservative.
+
+bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc)
+{
+    regMaskTP killMask   = getKillSetForNode(tree);
+    bool      isCallKill = ((killMask == RBM_INT_CALLEE_TRASH) || (killMask == RBM_CALLEE_TRASH));
+    if (killMask != RBM_NONE)
+    {
+        // The killMask identifies a set of registers that will be used during codegen.
+        // Mark these as modified here, so when we do final frame layout, we'll know about
+        // all these registers. This is especially important if killMask contains
+        // callee-saved registers, which affect the frame size since we need to save/restore them.
+        // In the case where we have a copyBlk with GC pointers, can need to call the
+        // CORINFO_HELP_ASSIGN_BYREF helper, which kills callee-saved RSI and RDI, if
+        // LSRA doesn't assign RSI/RDI, they wouldn't get marked as modified until codegen,
+        // which is too late.
+        compiler->codeGen->regSet.rsSetRegsModified(killMask DEBUGARG(dumpTerse));
+
+        addRefsForPhysRegMask(killMask, currentLoc, RefTypeKill, true);
+
+        // TODO-CQ: It appears to be valuable for both fp and int registers to avoid killing the callee
+        // save regs on infrequently exectued paths.  However, it results in a large number of asmDiffs,
+        // many of which appear to be regressions (because there is more spill on the infrequently path),
+        // but are not really because the frequent path becomes smaller.  Validating these diffs will need
+        // to be done before making this change.
+        // if (!blockSequence[curBBSeqNum]->isRunRarely())
+        {
+
+            VARSET_ITER_INIT(compiler, iter, currentLiveVars, varIndex);
+            while (iter.NextElem(compiler, &varIndex))
+            {
+                unsigned   varNum = compiler->lvaTrackedToVarNum[varIndex];
+                LclVarDsc* varDsc = compiler->lvaTable + varNum;
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+                if (varDsc->lvType == LargeVectorType)
+                {
+                    if (!VarSetOps::IsMember(compiler, largeVectorCalleeSaveCandidateVars, varIndex))
+                    {
+                        continue;
+                    }
+                }
+                else
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+                    if (varTypeIsFloating(varDsc) &&
+                        !VarSetOps::IsMember(compiler, fpCalleeSaveCandidateVars, varIndex))
+                {
+                    continue;
+                }
+                Interval* interval = getIntervalForLocalVar(varNum);
+                if (isCallKill)
+                {
+                    interval->preferCalleeSave = true;
+                }
+                regMaskTP newPreferences = allRegs(interval->registerType) & (~killMask);
+
+                if (newPreferences != RBM_NONE)
+                {
+                    interval->updateRegisterPreferences(newPreferences);
+                }
+                else
+                {
+                    // If there are no callee-saved registers, the call could kill all the registers.
+                    // This is a valid state, so in that case assert should not trigger. The RA will spill in order to
+                    // free a register later.
+                    assert(compiler->opts.compDbgEnC || (calleeSaveRegs(varDsc->lvType)) == RBM_NONE);
+                }
+            }
+        }
+
+        if (tree->IsCall() && (tree->gtFlags & GTF_CALL_UNMANAGED) != 0)
+        {
+            RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKillGCRefs, tree,
+                                              (allRegs(TYP_REF) & ~RBM_ARG_REGS));
+        }
+        return true;
+    }
+
+    return false;
+}
+
+RefPosition* LinearScan::defineNewInternalTemp(GenTree*     tree,
+                                               RegisterType regType,
+                                               LsraLocation currentLoc,
+                                               regMaskTP    regMask)
+{
+    Interval* current   = newInterval(regType);
+    current->isInternal = true;
+    return newRefPosition(current, currentLoc, RefTypeDef, tree, regMask);
+}
+
+int LinearScan::buildInternalRegisterDefsForNode(GenTree*     tree,
+                                                 LsraLocation currentLoc,
+                                                 RefPosition* temps[]) // populates
+{
+    int       count;
+    int       internalIntCount = tree->gtLsraInfo.internalIntCount;
+    regMaskTP internalCands    = tree->gtLsraInfo.getInternalCandidates(this);
+
+    // If the number of internal integer registers required is the same as the number of candidate integer registers in
+    // the candidate set, then they must be handled as fixed registers.
+    // (E.g. for the integer registers that floating point arguments must be copied into for a varargs call.)
+    bool      fixedRegs             = false;
+    regMaskTP internalIntCandidates = (internalCands & allRegs(TYP_INT));
+    if (((int)genCountBits(internalIntCandidates)) == internalIntCount)
+    {
+        fixedRegs = true;
+    }
+
+    for (count = 0; count < internalIntCount; count++)
+    {
+        regMaskTP internalIntCands = (internalCands & allRegs(TYP_INT));
+        if (fixedRegs)
+        {
+            internalIntCands = genFindLowestBit(internalIntCands);
+            internalCands &= ~internalIntCands;
+        }
+        temps[count] = defineNewInternalTemp(tree, IntRegisterType, currentLoc, internalIntCands);
+    }
+
+    int internalFloatCount = tree->gtLsraInfo.internalFloatCount;
+    for (int i = 0; i < internalFloatCount; i++)
+    {
+        regMaskTP internalFPCands = (internalCands & internalFloatRegCandidates());
+        temps[count++]            = defineNewInternalTemp(tree, FloatRegisterType, currentLoc, internalFPCands);
+    }
+
+    noway_assert(count < MaxInternalRegisters);
+    assert(count == (internalIntCount + internalFloatCount));
+    return count;
+}
+
+void LinearScan::buildInternalRegisterUsesForNode(GenTree*     tree,
+                                                  LsraLocation currentLoc,
+                                                  RefPosition* defs[],
+                                                  int          total)
+{
+    assert(total < MaxInternalRegisters);
+
+    // defs[] has been populated by buildInternalRegisterDefsForNode
+    // now just add uses to the defs previously added.
+    for (int i = 0; i < total; i++)
+    {
+        RefPosition* prevRefPosition = defs[i];
+        assert(prevRefPosition != nullptr);
+        regMaskTP mask = prevRefPosition->registerAssignment;
+        if (prevRefPosition->isPhysRegRef)
+        {
+            newRefPosition(defs[i]->getReg()->regNum, currentLoc, RefTypeUse, tree, mask);
+        }
+        else
+        {
+            RefPosition* newest = newRefPosition(defs[i]->getInterval(), currentLoc, RefTypeUse, tree, mask);
+            newest->lastUse     = true;
+        }
+    }
+}
+
+regMaskTP LinearScan::getUseCandidates(GenTree* useNode)
+{
+    TreeNodeInfo info = useNode->gtLsraInfo;
+    return info.getSrcCandidates(this);
+}
+
+regMaskTP LinearScan::getDefCandidates(GenTree* tree)
+{
+    TreeNodeInfo info = tree->gtLsraInfo;
+    return info.getDstCandidates(this);
+}
+
+RegisterType LinearScan::getDefType(GenTree* tree)
+{
+    return tree->TypeGet();
+}
+
+regMaskTP fixedCandidateMask(var_types type, regMaskTP candidates)
+{
+    if (genMaxOneBit(candidates))
+    {
+        return candidates;
+    }
+    return RBM_NONE;
+}
+
+//------------------------------------------------------------------------
+// LocationInfoListNode: used to store a single `LocationInfo` value for a
+//                       node during `buildIntervals`.
+//
+// This is the node type for `LocationInfoList` below.
+//
+class LocationInfoListNode final : public LocationInfo
+{
+    friend class LocationInfoList;
+    friend class LocationInfoListNodePool;
+
+    LocationInfoListNode* m_next; // The next node in the list
+
+public:
+    LocationInfoListNode(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx = 0) : LocationInfo(l, i, t, regIdx)
+    {
+    }
+
+    //------------------------------------------------------------------------
+    // LocationInfoListNode::Next: Returns the next node in the list.
+    LocationInfoListNode* Next() const
+    {
+        return m_next;
+    }
+};
+
+//------------------------------------------------------------------------
+// LocationInfoList: used to store a list of `LocationInfo` values for a
+//                   node during `buildIntervals`.
+//
+// Given an IR node that either directly defines N registers or that is a
+// contained node with uses that define a total of N registers, that node
+// will map to N `LocationInfo` values. These values are stored as a
+// linked list of `LocationInfoListNode` values.
+//
+class LocationInfoList final
+{
+    friend class LocationInfoListNodePool;
+
+    LocationInfoListNode* m_head; // The head of the list
+    LocationInfoListNode* m_tail; // The tail of the list
+
+public:
+    LocationInfoList() : m_head(nullptr), m_tail(nullptr)
+    {
+    }
+
+    LocationInfoList(LocationInfoListNode* node) : m_head(node), m_tail(node)
+    {
+        assert(m_head->m_next == nullptr);
+    }
+
+    //------------------------------------------------------------------------
+    // LocationInfoList::IsEmpty: Returns true if the list is empty.
+    //
+    bool IsEmpty() const
+    {
+        return m_head == nullptr;
+    }
+
+    //------------------------------------------------------------------------
+    // LocationInfoList::Begin: Returns the first node in the list.
+    //
+    LocationInfoListNode* Begin() const
+    {
+        return m_head;
+    }
+
+    //------------------------------------------------------------------------
+    // LocationInfoList::End: Returns the position after the last node in the
+    //                        list. The returned value is suitable for use as
+    //                        a sentinel for iteration.
+    //
+    LocationInfoListNode* End() const
+    {
+        return nullptr;
+    }
+
+    //------------------------------------------------------------------------
+    // LocationInfoList::Append: Appends a node to the list.
+    //
+    // Arguments:
+    //    node - The node to append. Must not be part of an existing list.
+    //
+    void Append(LocationInfoListNode* node)
+    {
+        assert(node->m_next == nullptr);
+
+        if (m_tail == nullptr)
+        {
+            assert(m_head == nullptr);
+            m_head = node;
+        }
+        else
+        {
+            m_tail->m_next = node;
+        }
+
+        m_tail = node;
+    }
+
+    //------------------------------------------------------------------------
+    // LocationInfoList::Append: Appends another list to this list.
+    //
+    // Arguments:
+    //    other - The list to append.
+    //
+    void Append(LocationInfoList other)
+    {
+        if (m_tail == nullptr)
+        {
+            assert(m_head == nullptr);
+            m_head = other.m_head;
+        }
+        else
+        {
+            m_tail->m_next = other.m_head;
+        }
+
+        m_tail = other.m_tail;
+    }
+};
+
+//------------------------------------------------------------------------
+// LocationInfoListNodePool: manages a pool of `LocationInfoListNode`
+//                           values to decrease overall memory usage
+//                           during `buildIntervals`.
+//
+// `buildIntervals` involves creating a list of location info values per
+// node that either directly produces a set of registers or that is a
+// contained node with register-producing sources. However, these lists
+// are short-lived: they are destroyed once the use of the corresponding
+// node is processed. As such, there is typically only a small number of
+// `LocationInfoListNode` values in use at any given time. Pooling these
+// values avoids otherwise frequent allocations.
+class LocationInfoListNodePool final
+{
+    LocationInfoListNode* m_freeList;
+    Compiler*             m_compiler;
+
+public:
+    //------------------------------------------------------------------------
+    // LocationInfoListNodePool::LocationInfoListNodePool:
+    //    Creates a pool of `LocationInfoListNode` values.
+    //
+    // Arguments:
+    //    compiler    - The compiler context.
+    //    preallocate - The number of nodes to preallocate.
+    //
+    LocationInfoListNodePool(Compiler* compiler, unsigned preallocate = 0) : m_compiler(compiler)
+    {
+        if (preallocate > 0)
+        {
+            size_t preallocateSize   = sizeof(LocationInfoListNode) * preallocate;
+            auto*  preallocatedNodes = reinterpret_cast<LocationInfoListNode*>(compiler->compGetMem(preallocateSize));
+
+            LocationInfoListNode* head = preallocatedNodes;
+            head->m_next               = nullptr;
+
+            for (unsigned i = 1; i < preallocate; i++)
+            {
+                LocationInfoListNode* node = &preallocatedNodes[i];
+                node->m_next               = head;
+                head                       = node;
+            }
+
+            m_freeList = head;
+        }
+    }
+
+    //------------------------------------------------------------------------
+    // LocationInfoListNodePool::GetNode: Fetches an unused node from the
+    //                                    pool.
+    //
+    // Arguments:
+    //    l -    - The `LsraLocation` for the `LocationInfo` value.
+    //    i      - The interval for the `LocationInfo` value.
+    //    t      - The IR node for the `LocationInfo` value
+    //    regIdx - The register index for the `LocationInfo` value.
+    //
+    // Returns:
+    //    A pooled or newly-allocated `LocationInfoListNode`, depending on the
+    //    contents of the pool.
+    LocationInfoListNode* GetNode(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx = 0)
+    {
+        LocationInfoListNode* head = m_freeList;
+        if (head == nullptr)
+        {
+            head = reinterpret_cast<LocationInfoListNode*>(m_compiler->compGetMem(sizeof(LocationInfoListNode)));
+        }
+        else
+        {
+            m_freeList = head->m_next;
+        }
+
+        head->loc         = l;
+        head->interval    = i;
+        head->treeNode    = t;
+        head->multiRegIdx = regIdx;
+        head->m_next      = nullptr;
+
+        return head;
+    }
+
+    //------------------------------------------------------------------------
+    // LocationInfoListNodePool::ReturnNodes: Returns a list of nodes to the
+    //                                        pool.
+    //
+    // Arguments:
+    //    list - The list to return.
+    //
+    void ReturnNodes(LocationInfoList& list)
+    {
+        assert(list.m_head != nullptr);
+        assert(list.m_tail != nullptr);
+
+        LocationInfoListNode* head = m_freeList;
+        list.m_tail->m_next        = head;
+        m_freeList                 = list.m_head;
+    }
+};
+
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+VARSET_VALRET_TP
+LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc)
+{
+    VARSET_TP VARSET_INIT_NOCOPY(liveLargeVectors, VarSetOps::MakeEmpty(compiler));
+    regMaskTP fpCalleeKillSet = RBM_NONE;
+    if (!VarSetOps::IsEmpty(compiler, largeVectorVars))
+    {
+        // We actually need to find any calls that kill the upper-half of the callee-save vector registers.
+        // But we will use as a proxy any node that kills floating point registers.
+        // (Note that some calls are masquerading as other nodes at this point so we can't just check for calls.)
+        fpCalleeKillSet = getKillSetForNode(tree);
+        if ((fpCalleeKillSet & RBM_FLT_CALLEE_TRASH) != RBM_NONE)
+        {
+            VarSetOps::AssignNoCopy(compiler, liveLargeVectors,
+                                    VarSetOps::Intersection(compiler, currentLiveVars, largeVectorVars));
+            VARSET_ITER_INIT(compiler, iter, liveLargeVectors, varIndex);
+            while (iter.NextElem(compiler, &varIndex))
+            {
+                unsigned  varNum         = compiler->lvaTrackedToVarNum[varIndex];
+                Interval* varInterval    = getIntervalForLocalVar(varNum);
+                Interval* tempInterval   = newInterval(LargeVectorType);
+                tempInterval->isInternal = true;
+                RefPosition* pos =
+                    newRefPosition(tempInterval, currentLoc, RefTypeUpperVectorSaveDef, tree, RBM_FLT_CALLEE_SAVED);
+                // We are going to save the existing relatedInterval of varInterval on tempInterval, so that we can set
+                // the tempInterval as the relatedInterval of varInterval, so that we can build the corresponding
+                // RefTypeUpperVectorSaveUse RefPosition.  We will then restore the relatedInterval onto varInterval,
+                // and set varInterval as the relatedInterval of tempInterval.
+                tempInterval->relatedInterval = varInterval->relatedInterval;
+                varInterval->relatedInterval  = tempInterval;
+            }
+        }
+    }
+    return liveLargeVectors;
+}
+
+void LinearScan::buildUpperVectorRestoreRefPositions(GenTree*         tree,
+                                                     LsraLocation     currentLoc,
+                                                     VARSET_VALARG_TP liveLargeVectors)
+{
+    if (!VarSetOps::IsEmpty(compiler, liveLargeVectors))
+    {
+        VARSET_ITER_INIT(compiler, iter, liveLargeVectors, varIndex);
+        while (iter.NextElem(compiler, &varIndex))
+        {
+            unsigned  varNum       = compiler->lvaTrackedToVarNum[varIndex];
+            Interval* varInterval  = getIntervalForLocalVar(varNum);
+            Interval* tempInterval = varInterval->relatedInterval;
+            assert(tempInterval->isInternal == true);
+            RefPosition* pos =
+                newRefPosition(tempInterval, currentLoc, RefTypeUpperVectorSaveUse, tree, RBM_FLT_CALLEE_SAVED);
+            // Restore the relatedInterval onto varInterval, and set varInterval as the relatedInterval
+            // of tempInterval.
+            varInterval->relatedInterval  = tempInterval->relatedInterval;
+            tempInterval->relatedInterval = varInterval;
+        }
+    }
+}
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+
+#ifdef DEBUG
+//------------------------------------------------------------------------
+// ComputeOperandDstCount: computes the number of registers defined by a
+//                         node.
+//
+// For most nodes, this is simple:
+// - Nodes that do not produce values (e.g. stores and other void-typed
+//   nodes) and nodes that immediately use the registers they define
+//   produce no registers
+// - Nodes that are marked as defining N registers define N registers.
+//
+// For contained nodes, however, things are more complicated: for purposes
+// of bookkeeping, a contained node is treated as producing the transitive
+// closure of the registers produced by its sources.
+//
+// Arguments:
+//    operand - The operand for which to compute a register count.
+//
+// Returns:
+//    The number of registers defined by `operand`.
+//
+static int ComputeOperandDstCount(GenTree* operand)
+{
+    TreeNodeInfo& operandInfo = operand->gtLsraInfo;
+
+    if (operandInfo.isLocalDefUse)
+    {
+        // Operands that define an unused value do not produce any registers.
+        return 0;
+    }
+    else if (operandInfo.dstCount != 0)
+    {
+        // Operands that have a specified number of destination registers consume all of their operands
+        // and therefore produce exactly that number of registers.
+        return operandInfo.dstCount;
+    }
+    else if (operandInfo.srcCount != 0)
+    {
+        // If an operand has no destination registers but does have source registers, it must be a store
+        // or a compare.
+        assert(operand->OperIsStore() || operand->OperIsBlkOp() || operand->OperIsPutArgStk() ||
+               operand->OperIsCompare());
+        return 0;
+    }
+    else if (!operand->OperIsAggregate() && (operand->OperIsStore() || operand->TypeGet() == TYP_VOID))
+    {
+        // Stores and void-typed operands may be encountered when processing call nodes, which contain
+        // pointers to argument setup stores.
+        return 0;
+    }
+    else
+    {
+        // If an aggregate or non-void-typed operand is not an unsued value and does not have source registers,
+        // that argument is contained within its parent and produces `sum(operand_dst_count)` registers.
+        int dstCount = 0;
+        for (GenTree* op : operand->Operands())
+        {
+            dstCount += ComputeOperandDstCount(op);
+        }
+
+        return dstCount;
+    }
+}
+
+//------------------------------------------------------------------------
+// ComputeAvailableSrcCount: computes the number of registers available as
+//                           sources for a node.
+//
+// This is simply the sum of the number of registers prduced by each
+// operand to the node.
+//
+// Arguments:
+//    node - The node for which to compute a source count.
+//
+// Retures:
+//    The number of registers available as sources for `node`.
+//
+static int ComputeAvailableSrcCount(GenTree* node)
+{
+    int numSources = 0;
+    for (GenTree* operand : node->Operands())
+    {
+        numSources += ComputeOperandDstCount(operand);
+    }
+
+    return numSources;
+}
+#endif
+
+void LinearScan::buildRefPositionsForNode(GenTree*                  tree,
+                                          BasicBlock*               block,
+                                          LocationInfoListNodePool& listNodePool,
+                                          HashTableBase<GenTree*, LocationInfoList>& operandToLocationInfoMap,
+                                          LsraLocation currentLoc)
+{
+#ifdef _TARGET_ARM_
+    assert(!isRegPairType(tree->TypeGet()));
+#endif // _TARGET_ARM_
+
+    // The LIR traversal doesn't visit non-aggregate GT_LIST or GT_ARGPLACE nodes
+    assert(tree->OperGet() != GT_ARGPLACE);
+    assert((tree->OperGet() != GT_LIST) || tree->AsArgList()->IsAggregate());
+
+    // These nodes are eliminated by the Rationalizer.
+    if (tree->OperGet() == GT_CLS_VAR)
+    {
+        JITDUMP("Unexpected node %s in LSRA.\n", GenTree::NodeName(tree->OperGet()));
+        assert(!"Unexpected node in LSRA.");
+    }
+
+    // The set of internal temporary registers used by this node are stored in the
+    // gtRsvdRegs register mask. Clear it out.
+    tree->gtRsvdRegs = RBM_NONE;
+
+#ifdef DEBUG
+    if (VERBOSE)
+    {
+        JITDUMP("at start of tree, map contains: { ");
+        bool first = true;
+        for (auto kvp : operandToLocationInfoMap)
+        {
+            GenTree*         node    = kvp.Key();
+            LocationInfoList defList = kvp.Value();
+
+            JITDUMP("%sN%03u. %s -> (", first ? "" : "; ", node->gtSeqNum, GenTree::NodeName(node->OperGet()));
+            for (LocationInfoListNode *def = defList.Begin(), *end = defList.End(); def != end; def = def->Next())
+            {
+                JITDUMP("%s%d.N%03u", def == defList.Begin() ? "" : ", ", def->loc, def->treeNode->gtSeqNum);
+            }
+            JITDUMP(")");
+
+            first = false;
+        }
+        JITDUMP(" }\n");
+    }
+#endif // DEBUG
+
+    TreeNodeInfo info = tree->gtLsraInfo;
+    assert(info.IsValid(this));
+    int consume = info.srcCount;
+    int produce = info.dstCount;
+
+    assert(((consume == 0) && (produce == 0)) || (ComputeAvailableSrcCount(tree) == consume));
+
+    if (isCandidateLocalRef(tree) && !tree->OperIsLocalStore())
+    {
+        assert(consume == 0);
+
+        // We handle tracked variables differently from non-tracked ones.  If it is tracked,
+        // we simply add a use or def of the tracked variable.  Otherwise, for a use we need
+        // to actually add the appropriate references for loading or storing the variable.
+        //
+        // It won't actually get used or defined until the appropriate ancestor tree node
+        // is processed, unless this is marked "isLocalDefUse" because it is a stack-based argument
+        // to a call
+
+        Interval* interval        = getIntervalForLocalVar(tree->gtLclVarCommon.gtLclNum);
+        regMaskTP candidates      = getUseCandidates(tree);
+        regMaskTP fixedAssignment = fixedCandidateMask(tree->TypeGet(), candidates);
+
+        // We have only approximate last-use information at this point.  This is because the
+        // execution order doesn't actually reflect the true order in which the localVars
+        // are referenced - but the order of the RefPositions will, so we recompute it after
+        // RefPositions are built.
+        // Use the old value for setting currentLiveVars - note that we do this with the
+        // not-quite-correct setting of lastUse.  However, this is OK because
+        // 1) this is only for preferencing, which doesn't require strict correctness, and
+        // 2) the cases where these out-of-order uses occur should not overlap a kill.
+        // TODO-Throughput: clean this up once we have the execution order correct.  At that point
+        // we can update currentLiveVars at the same place that we create the RefPosition.
+        if ((tree->gtFlags & GTF_VAR_DEATH) != 0)
+        {
+            VarSetOps::RemoveElemD(compiler, currentLiveVars,
+                                   compiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvVarIndex);
+        }
+
+        JITDUMP("t%u (i:%u)\n", currentLoc, interval->intervalIndex);
+
+        if (!info.isLocalDefUse)
+        {
+            if (produce != 0)
+            {
+                LocationInfoList list(listNodePool.GetNode(currentLoc, interval, tree));
+                bool             added = operandToLocationInfoMap.AddOrUpdate(tree, list);
+                assert(added);
+
+                tree->gtLsraInfo.definesAnyRegisters = true;
+            }
+
+            return;
+        }
+        else
+        {
+            JITDUMP("    Not added to map\n");
+            regMaskTP candidates = getUseCandidates(tree);
+
+            if (fixedAssignment != RBM_NONE)
+            {
+                candidates = fixedAssignment;
+            }
+            RefPosition* pos   = newRefPosition(interval, currentLoc, RefTypeUse, tree, candidates);
+            pos->isLocalDefUse = true;
+            bool isLastUse     = ((tree->gtFlags & GTF_VAR_DEATH) != 0);
+            pos->lastUse       = isLastUse;
+            pos->setAllocateIfProfitable(tree->IsRegOptional());
+            DBEXEC(VERBOSE, pos->dump());
+            return;
+        }
+    }
+
+#ifdef DEBUG
+    if (VERBOSE)
+    {
+        lsraDispNode(tree, LSRA_DUMP_REFPOS, (produce != 0));
+        JITDUMP("\n");
+        JITDUMP("  consume=%d produce=%d\n", consume, produce);
+    }
+#endif // DEBUG
+
+    // Handle the case of local variable assignment
+    Interval* varDefInterval = nullptr;
+    RefType   defRefType     = RefTypeDef;
+
+    GenTree* defNode = tree;
+
+    // noAdd means the node creates a def but for purposes of map
+    // management do not add it because data is not flowing up the
+    // tree but over (as in ASG nodes)
+
+    bool         noAdd   = info.isLocalDefUse;
+    RefPosition* prevPos = nullptr;
+
+    bool isSpecialPutArg = false;
+
+    assert(!tree->OperIsAssignment());
+    if (tree->OperIsLocalStore())
+    {
+        if (isCandidateLocalRef(tree))
+        {
+            // We always push the tracked lclVar intervals
+            varDefInterval = getIntervalForLocalVar(tree->gtLclVarCommon.gtLclNum);
+            defRefType     = refTypeForLocalRefNode(tree);
+            defNode        = tree;
+            if (produce == 0)
+            {
+                produce = 1;
+                noAdd   = true;
+            }
+
+            assert(consume <= MAX_RET_REG_COUNT);
+            if (consume == 1)
+            {
+                // Get the location info for the register defined by the first operand.
+                LocationInfoList operandDefs;
+                bool found = operandToLocationInfoMap.TryGetValue(*(tree->OperandsBegin()), &operandDefs);
+                assert(found);
+
+                // Since we only expect to consume one register, we should only have a single register to
+                // consume.
+                assert(operandDefs.Begin()->Next() == operandDefs.End());
+
+                LocationInfo& operandInfo = *static_cast<LocationInfo*>(operandDefs.Begin());
+
+                Interval* srcInterval = operandInfo.interval;
+                if (srcInterval->relatedInterval == nullptr)
+                {
+                    // Preference the source to the dest, unless this is a non-last-use localVar.
+                    // Note that the last-use info is not correct, but it is a better approximation than preferencing
+                    // the source to the dest, if the source's lifetime extends beyond the dest.
+                    if (!srcInterval->isLocalVar || (operandInfo.treeNode->gtFlags & GTF_VAR_DEATH) != 0)
+                    {
+                        srcInterval->assignRelatedInterval(varDefInterval);
+                    }
+                }
+                else if (!srcInterval->isLocalVar)
+                {
+                    // Preference the source to dest, if src is not a local var.
+                    srcInterval->assignRelatedInterval(varDefInterval);
+                }
+
+                // We can have a case where the source of the store has a different register type,
+                // e.g. when the store is of a return value temp, and op1 is a Vector2
+                // (TYP_SIMD8).  We will need to set the
+                // src candidates accordingly on op1 so that LSRA will generate a copy.
+                // We could do this during Lowering, but at that point we don't know whether
+                // this lclVar will be a register candidate, and if not, we would prefer to leave
+                // the type alone.
+                if (regType(tree->gtGetOp1()->TypeGet()) != regType(tree->TypeGet()))
+                {
+                    tree->gtGetOp1()->gtLsraInfo.setSrcCandidates(this, allRegs(tree->TypeGet()));
+                }
+            }
+
+            if ((tree->gtFlags & GTF_VAR_DEATH) == 0)
+            {
+                VarSetOps::AddElemD(compiler, currentLiveVars,
+                                    compiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvVarIndex);
+            }
+        }
+    }
+    else if (noAdd && produce == 0)
+    {
+        // This is the case for dead nodes that occur after
+        // tree rationalization
+        // TODO-Cleanup: Identify and remove these dead nodes prior to register allocation.
+        if (tree->IsMultiRegCall())
+        {
+            // In case of multi-reg call node, produce = number of return registers
+            produce = tree->AsCall()->GetReturnTypeDesc()->GetReturnRegCount();
+        }
+        else
+        {
+            produce = 1;
+        }
+    }
+
+#ifdef DEBUG
+    if (VERBOSE)
+    {
+        if (produce)
+        {
+            if (varDefInterval != nullptr)
+            {
+                printf("t%u (i:%u) = op ", currentLoc, varDefInterval->intervalIndex);
+            }
+            else
+            {
+                for (int i = 0; i < produce; i++)
+                {
+                    printf("t%u ", currentLoc);
+                }
+                printf("= op ");
+            }
+        }
+        else
+        {
+            printf("     op ");
+        }
+        printf("\n");
+    }
+#endif // DEBUG
+
+    Interval* prefSrcInterval = nullptr;
+
+    // If this is a binary operator that will be encoded with 2 operand fields
+    // (i.e. the target is read-modify-write), preference the dst to op1.
+
+    bool hasDelayFreeSrc = tree->gtLsraInfo.hasDelayFreeSrc;
+    if (tree->OperGet() == GT_PUTARG_REG && isCandidateLocalRef(tree->gtGetOp1()) &&
+        (tree->gtGetOp1()->gtFlags & GTF_VAR_DEATH) == 0)
+    {
+        // This is the case for a "pass-through" copy of a lclVar.  In the case where it is a non-last-use,
+        // we don't want the def of the copy to kill the lclVar register, if it is assigned the same register
+        // (which is actually what we hope will happen).
+        JITDUMP("Setting putarg_reg as a pass-through of a non-last use lclVar\n");
+
+        // Get the register information for the first operand of the node.
+        LocationInfoList operandDefs;
+        bool             found = operandToLocationInfoMap.TryGetValue(*(tree->OperandsBegin()), &operandDefs);
+        assert(found);
+
+        // Preference the destination to the interval of the first register defined by the first operand.
+        Interval* srcInterval = operandDefs.Begin()->interval;
+        assert(srcInterval->isLocalVar);
+        prefSrcInterval = srcInterval;
+        isSpecialPutArg = true;
+    }
+
+    RefPosition* internalRefs[MaxInternalRegisters];
+
+    // make intervals for all the 'internal' register requirements for this node
+    // where internal means additional registers required temporarily
+    int internalCount = buildInternalRegisterDefsForNode(tree, currentLoc, internalRefs);
+
+    // pop all ref'd tree temps
+    GenTreeOperandIterator iterator = tree->OperandsBegin();
+
+    // `operandDefs` holds the list of `LocationInfo` values for the registers defined by the current
+    // operand. `operandDefsIterator` points to the current `LocationInfo` value in `operandDefs`.
+    LocationInfoList      operandDefs;
+    LocationInfoListNode* operandDefsIterator = operandDefs.End();
+    for (int useIndex = 0; useIndex < consume; useIndex++)
+    {
+        // If we've consumed all of the registers defined by the current operand, advance to the next
+        // operand that defines any registers.
+        if (operandDefsIterator == operandDefs.End())
+        {
+            // Skip operands that do not define any registers, whether directly or indirectly.
+            GenTree* operand;
+            do
+            {
+                assert(iterator != tree->OperandsEnd());
+                operand = *iterator;
+
+                ++iterator;
+            } while (!operand->gtLsraInfo.definesAnyRegisters);
+
+            // If we have already processed a previous operand, return its `LocationInfo` list to the
+            // pool.
+            if (useIndex > 0)
+            {
+                assert(!operandDefs.IsEmpty());
+                listNodePool.ReturnNodes(operandDefs);
+            }
+
+            // Remove the list of registers defined by the current operand from the map. Note that this
+            // is only correct because tree nodes are singly-used: if this property ever changes (e.g.
+            // if tree nodes are eventually allowed to be multiply-used), then the removal is only
+            // correct at the last use.
+            bool removed = operandToLocationInfoMap.TryRemove(operand, &operandDefs);
+            assert(removed);
+
+            // Move the operand def iterator to the `LocationInfo` for the first register defined by the
+            // current operand.
+            operandDefsIterator = operandDefs.Begin();
+            assert(operandDefsIterator != operandDefs.End());
+        }
+
+        LocationInfo& locInfo = *static_cast<LocationInfo*>(operandDefsIterator);
+        operandDefsIterator   = operandDefsIterator->Next();
+
+        JITDUMP("t%u ", locInfo.loc);
+
+        // for interstitial tree temps, a use is always last and end;
+        // this is  set by default in newRefPosition
+        GenTree* useNode = locInfo.treeNode;
+        assert(useNode != nullptr);
+        var_types type        = useNode->TypeGet();
+        regMaskTP candidates  = getUseCandidates(useNode);
+        Interval* i           = locInfo.interval;
+        unsigned  multiRegIdx = locInfo.multiRegIdx;
+
+#ifdef FEATURE_SIMD
+        // In case of multi-reg call store to a local, there won't be any mismatch of
+        // use candidates with the type of the tree node.
+        if (tree->OperIsLocalStore() && varDefInterval == nullptr && !useNode->IsMultiRegCall())
+        {
+            // This is a non-candidate store.  If this is a SIMD type, the use candidates
+            // may not match the type of the tree node.  If that is the case, change the
+            // type of the tree node to match, so that we do the right kind of store.
+            if ((candidates & allRegs(tree->gtType)) == RBM_NONE)
+            {
+                noway_assert((candidates & allRegs(useNode->gtType)) != RBM_NONE);
+                // Currently, the only case where this should happen is for a TYP_LONG
+                // source and a TYP_SIMD8 target.
+                assert((useNode->gtType == TYP_LONG && tree->gtType == TYP_SIMD8) ||
+                       (useNode->gtType == TYP_SIMD8 && tree->gtType == TYP_LONG));
+                tree->gtType = useNode->gtType;
+            }
+        }
+#endif // FEATURE_SIMD
+
+        bool delayRegFree = (hasDelayFreeSrc && useNode->gtLsraInfo.isDelayFree);
+        if (useNode->gtLsraInfo.isTgtPref)
+        {
+            prefSrcInterval = i;
+        }
+
+        bool regOptionalAtUse = useNode->IsRegOptional();
+        bool isLastUse        = true;
+        if (isCandidateLocalRef(useNode))
+        {
+            isLastUse = ((useNode->gtFlags & GTF_VAR_DEATH) != 0);
+        }
+        else
+        {
+            // For non-localVar uses we record nothing,
+            // as nothing needs to be written back to the tree.
+            useNode = nullptr;
+        }
+
+        regMaskTP fixedAssignment = fixedCandidateMask(type, candidates);
+        if (fixedAssignment != RBM_NONE)
+        {
+            candidates = fixedAssignment;
+        }
+
+        RefPosition* pos;
+        if ((candidates & allRegs(i->registerType)) == 0)
+        {
+            // This should only occur where we've got a type mismatch due to SIMD
+            // pointer-size types that are passed & returned as longs.
+            i->hasConflictingDefUse = true;
+            if (fixedAssignment != RBM_NONE)
+            {
+                // Explicitly insert a FixedRefPosition and fake the candidates, because otherwise newRefPosition
+                // will complain about the types not matching.
+                regNumber    physicalReg = genRegNumFromMask(fixedAssignment);
+                RefPosition* pos = newRefPosition(physicalReg, currentLoc, RefTypeFixedReg, nullptr, fixedAssignment);
+            }
+            pos = newRefPosition(i, currentLoc, RefTypeUse, useNode, allRegs(i->registerType), multiRegIdx);
+            pos->registerAssignment = candidates;
+        }
+        else
+        {
+            pos = newRefPosition(i, currentLoc, RefTypeUse, useNode, candidates, multiRegIdx);
+        }
+        if (delayRegFree)
+        {
+            hasDelayFreeSrc   = true;
+            pos->delayRegFree = true;
+        }
+
+        if (isLastUse)
+        {
+            pos->lastUse = true;
+        }
+
+        if (regOptionalAtUse)
+        {
+            pos->setAllocateIfProfitable(1);
+        }
+    }
+    JITDUMP("\n");
+
+    if (!operandDefs.IsEmpty())
+    {
+        listNodePool.ReturnNodes(operandDefs);
+    }
+
+    buildInternalRegisterUsesForNode(tree, currentLoc, internalRefs, internalCount);
+
+    RegisterType registerType  = getDefType(tree);
+    regMaskTP    candidates    = getDefCandidates(tree);
+    regMaskTP    useCandidates = getUseCandidates(tree);
+
+#ifdef DEBUG
+    if (VERBOSE)
+    {
+        printf("Def candidates ");
+        dumpRegMask(candidates);
+        printf(", Use candidates ");
+        dumpRegMask(useCandidates);
+        printf("\n");
+    }
+#endif // DEBUG
+
+#if defined(_TARGET_AMD64_)
+    // Multi-reg call node is the only node that could produce multi-reg value
+    assert(produce <= 1 || (tree->IsMultiRegCall() && produce == MAX_RET_REG_COUNT));
+#elif defined(_TARGET_ARM_)
+    assert(!varTypeIsMultiReg(tree->TypeGet()));
+#endif // _TARGET_xxx_
+
+    // Add kill positions before adding def positions
+    buildKillPositionsForNode(tree, currentLoc + 1);
+
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+    VARSET_TP VARSET_INIT_NOCOPY(liveLargeVectors, VarSetOps::UninitVal());
+    if (RBM_FLT_CALLEE_SAVED != RBM_NONE)
+    {
+        // Build RefPositions for saving any live large vectors.
+        // This must be done after the kills, so that we know which large vectors are still live.
+        VarSetOps::AssignNoCopy(compiler, liveLargeVectors, buildUpperVectorSaveRefPositions(tree, currentLoc));
+    }
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+
+    ReturnTypeDesc* retTypeDesc    = nullptr;
+    bool            isMultiRegCall = tree->IsMultiRegCall();
+    if (isMultiRegCall)
+    {
+        retTypeDesc = tree->AsCall()->GetReturnTypeDesc();
+        assert((int)genCountBits(candidates) == produce);
+        assert(candidates == retTypeDesc->GetABIReturnRegs());
+    }
+
+    // push defs
+    LocationInfoList locationInfoList;
+    LsraLocation     defLocation = currentLoc + 1;
+    for (int i = 0; i < produce; i++)
+    {
+        regMaskTP currCandidates = candidates;
+        Interval* interval       = varDefInterval;
+
+        // In case of multi-reg call node, registerType is given by
+        // the type of ith position return register.
+        if (isMultiRegCall)
+        {
+            registerType   = retTypeDesc->GetReturnRegType((unsigned)i);
+            currCandidates = genRegMask(retTypeDesc->GetABIReturnReg(i));
+            useCandidates  = allRegs(registerType);
+        }
+
+        if (interval == nullptr)
+        {
+            // Make a new interval
+            interval = newInterval(registerType);
+            if (hasDelayFreeSrc)
+            {
+                interval->hasNonCommutativeRMWDef = true;
+            }
+            else if (tree->OperIsConst())
+            {
+                assert(!tree->IsReuseRegVal());
+                interval->isConstant = true;
+            }
+
+            if ((currCandidates & useCandidates) != RBM_NONE)
+            {
+                interval->updateRegisterPreferences(currCandidates & useCandidates);
+            }
+
+            if (isSpecialPutArg)
+            {
+                interval->isSpecialPutArg = true;
+            }
+        }
+        else
+        {
+            assert(registerTypesEquivalent(interval->registerType, registerType));
+        }
+
+        if (prefSrcInterval != nullptr)
+        {
+            interval->assignRelatedIntervalIfUnassigned(prefSrcInterval);
+        }
+
+        // for assignments, we want to create a refposition for the def
+        // but not push it
+        if (!noAdd)
+        {
+            locationInfoList.Append(listNodePool.GetNode(defLocation, interval, tree, (unsigned)i));
+        }
+
+        RefPosition* pos = newRefPosition(interval, defLocation, defRefType, defNode, currCandidates, (unsigned)i);
+        if (info.isLocalDefUse)
+        {
+            pos->isLocalDefUse = true;
+            pos->lastUse       = true;
+        }
+        DBEXEC(VERBOSE, pos->dump());
+        interval->updateRegisterPreferences(currCandidates);
+        interval->updateRegisterPreferences(useCandidates);
+    }
+
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+    buildUpperVectorRestoreRefPositions(tree, currentLoc, liveLargeVectors);
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+
+    bool isContainedNode =
+        !noAdd && consume == 0 && produce == 0 && (tree->OperIsAggregate() || (tree->TypeGet() != TYP_VOID && !tree->OperIsStore()));
+    if (isContainedNode)
+    {
+        // Contained nodes map to the concatenated lists of their operands.
+        for (GenTree* op : tree->Operands())
+        {
+            if (!op->gtLsraInfo.definesAnyRegisters)
+            {
+                assert(ComputeOperandDstCount(op) == 0);
+                continue;
+            }
+
+            LocationInfoList operandList;
+            bool             removed = operandToLocationInfoMap.TryRemove(op, &operandList);
+            assert(removed);
+
+            locationInfoList.Append(operandList);
+        }
+    }
+
+    if (!locationInfoList.IsEmpty())
+    {
+        bool added = operandToLocationInfoMap.AddOrUpdate(tree, locationInfoList);
+        assert(added);
+        tree->gtLsraInfo.definesAnyRegisters = true;
+    }
+}
+
+// make an interval for each physical register
+void LinearScan::buildPhysRegRecords()
+{
+    RegisterType regType = IntRegisterType;
+    for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
+    {
+        RegRecord* curr = &physRegs[reg];
+        curr->init(reg);
+    }
+}
+
+BasicBlock* getNonEmptyBlock(BasicBlock* block)
+{
+    while (block != nullptr && block->bbTreeList == nullptr)
+    {
+        BasicBlock* nextBlock = block->bbNext;
+        // Note that here we use the version of NumSucc that does not take a compiler.
+        // That way this doesn't have to take a compiler, or be an instance method, e.g. of LinearScan.
+        // If we have an empty block, it must have jump type BBJ_NONE or BBJ_ALWAYS, in which
+        // case we don't need the version that takes a compiler.
+        assert(block->NumSucc() == 1 && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_NONE)));
+        // sometimes the first block is empty and ends with an uncond branch
+        // assert( block->GetSucc(0) == nextBlock);
+        block = nextBlock;
+    }
+    assert(block != nullptr && block->bbTreeList != nullptr);
+    return block;
+}
+
+void LinearScan::insertZeroInitRefPositions()
+{
+    // insert defs for this, then a block boundary
+
+    VARSET_ITER_INIT(compiler, iter, compiler->fgFirstBB->bbLiveIn, varIndex);
+    while (iter.NextElem(compiler, &varIndex))
+    {
+        unsigned   varNum = compiler->lvaTrackedToVarNum[varIndex];
+        LclVarDsc* varDsc = compiler->lvaTable + varNum;
+        if (!varDsc->lvIsParam && isCandidateVar(varDsc) &&
+            (compiler->info.compInitMem || varTypeIsGC(varDsc->TypeGet())))
+        {
+            GenTree* firstNode = getNonEmptyBlock(compiler->fgFirstBB)->firstNode();
+            JITDUMP("V%02u was live in\n", varNum);
+            Interval*    interval = getIntervalForLocalVar(varNum);
+            RefPosition* pos =
+                newRefPosition(interval, MinLocation, RefTypeZeroInit, firstNode, allRegs(interval->registerType));
+            varDsc->lvMustInit = true;
+        }
+    }
+}
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+// -----------------------------------------------------------------------
+// Sets the register state for an argument of type STRUCT for System V systems.
+//     See Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *argDsc) in regalloc.cpp
+//         for how state for argument is updated for unix non-structs and Windows AMD64 structs.
+void LinearScan::unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc)
+{
+    assert(varTypeIsStruct(argDsc));
+    RegState* intRegState   = &compiler->codeGen->intRegState;
+    RegState* floatRegState = &compiler->codeGen->floatRegState;
+
+    if ((argDsc->lvArgReg != REG_STK) && (argDsc->lvArgReg != REG_NA))
+    {
+        if (genRegMask(argDsc->lvArgReg) & (RBM_ALLFLOAT))
+        {
+            assert(genRegMask(argDsc->lvArgReg) & (RBM_FLTARG_REGS));
+            floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg);
+        }
+        else
+        {
+            assert(genRegMask(argDsc->lvArgReg) & (RBM_ARG_REGS));
+            intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvArgReg);
+        }
+    }
+
+    if ((argDsc->lvOtherArgReg != REG_STK) && (argDsc->lvOtherArgReg != REG_NA))
+    {
+        if (genRegMask(argDsc->lvOtherArgReg) & (RBM_ALLFLOAT))
+        {
+            assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_FLTARG_REGS));
+            floatRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg);
+        }
+        else
+        {
+            assert(genRegMask(argDsc->lvOtherArgReg) & (RBM_ARG_REGS));
+            intRegState->rsCalleeRegArgMaskLiveIn |= genRegMask(argDsc->lvOtherArgReg);
+        }
+    }
+}
+
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+//------------------------------------------------------------------------
+// updateRegStateForArg: Updates rsCalleeRegArgMaskLiveIn for the appropriate
+//    regState (either compiler->intRegState or compiler->floatRegState),
+//    with the lvArgReg on "argDsc"
+//
+// Arguments:
+//    argDsc - the argument for which the state is to be updated.
+//
+// Return Value: None
+//
+// Assumptions:
+//    The argument is live on entry to the function
+//    (or is untracked and therefore assumed live)
+//
+// Notes:
+//    This relies on a method in regAlloc.cpp that is shared between LSRA
+//    and regAlloc.  It is further abstracted here because regState is updated
+//    separately for tracked and untracked variables in LSRA.
+//
+void LinearScan::updateRegStateForArg(LclVarDsc* argDsc)
+{
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+    // For System V AMD64 calls the argDsc can have 2 registers (for structs.)
+    // Handle them here.
+    if (varTypeIsStruct(argDsc))
+    {
+        unixAmd64UpdateRegStateForArg(argDsc);
+    }
+    else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+    {
+        RegState* intRegState   = &compiler->codeGen->intRegState;
+        RegState* floatRegState = &compiler->codeGen->floatRegState;
+        // In the case of AMD64 we'll still use the floating point registers
+        // to model the register usage for argument on vararg calls, so
+        // we will ignore the varargs condition to determine whether we use
+        // XMM registers or not for setting up the call.
+        bool isFloat = (isFloatRegType(argDsc->lvType)
+#ifndef _TARGET_AMD64_
+                        && !compiler->info.compIsVarArgs
+#endif
+                        );
+
+        if (argDsc->lvIsHfaRegArg())
+        {
+            isFloat = true;
+        }
+
+        if (isFloat)
+        {
+            JITDUMP("Float arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg));
+            compiler->raUpdateRegStateForArg(floatRegState, argDsc);
+        }
+        else
+        {
+            JITDUMP("Int arg V%02u in reg %s\n", (argDsc - compiler->lvaTable), getRegName(argDsc->lvArgReg));
+#if FEATURE_MULTIREG_ARGS
+            if (argDsc->lvOtherArgReg != REG_NA)
+            {
+                JITDUMP("(second half) in reg %s\n", getRegName(argDsc->lvOtherArgReg));
+            }
+#endif // FEATURE_MULTIREG_ARGS
+            compiler->raUpdateRegStateForArg(intRegState, argDsc);
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// findPredBlockForLiveIn: Determine which block should be used for the register locations of the live-in variables.
+//
+// Arguments:
+//    block                 - The block for which we're selecting a predecesor.
+//    prevBlock             - The previous block in in allocation order.
+//    pPredBlockIsAllocated - A debug-only argument that indicates whether any of the predecessors have been seen
+//                            in allocation order.
+//
+// Return Value:
+//    The selected predecessor.
+//
+// Assumptions:
+//    in DEBUG, caller initializes *pPredBlockIsAllocated to false, and it will be set to true if the block
+//    returned is in fact a predecessor.
+//
+// Notes:
+//    This will select a predecessor based on the heuristics obtained by getLsraBlockBoundaryLocations(), which can be
+//    one of:
+//      LSRA_BLOCK_BOUNDARY_PRED    - Use the register locations of a predecessor block (default)
+//      LSRA_BLOCK_BOUNDARY_LAYOUT  - Use the register locations of the previous block in layout order.
+//                                    This is the only case where this actually returns a different block.
+//      LSRA_BLOCK_BOUNDARY_ROTATE  - Rotate the register locations from a predecessor.
+//                                    For this case, the block returned is the same as for LSRA_BLOCK_BOUNDARY_PRED, but
+//                                    the register locations will be "rotated" to stress the resolution and allocation
+//                                    code.
+
+BasicBlock* LinearScan::findPredBlockForLiveIn(BasicBlock* block,
+                                               BasicBlock* prevBlock DEBUGARG(bool* pPredBlockIsAllocated))
+{
+    BasicBlock* predBlock = nullptr;
+#ifdef DEBUG
+    assert(*pPredBlockIsAllocated == false);
+    if (getLsraBlockBoundaryLocations() == LSRA_BLOCK_BOUNDARY_LAYOUT)
+    {
+        if (prevBlock != nullptr)
+        {
+            predBlock = prevBlock;
+        }
+    }
+    else
+#endif // DEBUG
+        if (block != compiler->fgFirstBB)
+    {
+        predBlock = block->GetUniquePred(compiler);
+        if (predBlock != nullptr)
+        {
+            if (isBlockVisited(predBlock))
+            {
+                if (predBlock->bbJumpKind == BBJ_COND)
+                {
+                    // Special handling to improve matching on backedges.
+                    BasicBlock* otherBlock = (block == predBlock->bbNext) ? predBlock->bbJumpDest : predBlock->bbNext;
+                    noway_assert(otherBlock != nullptr);
+                    if (isBlockVisited(otherBlock))
+                    {
+                        // This is the case when we have a conditional branch where one target has already
+                        // been visited.  It would be best to use the same incoming regs as that block,
+                        // so that we have less likelihood of having to move registers.
+                        // For example, in determining the block to use for the starting register locations for
+                        // "block" in the following example, we'd like to use the same predecessor for "block"
+                        // as for "otherBlock", so that both successors of predBlock have the same locations, reducing
+                        // the likelihood of needing a split block on a backedge:
+                        //
+                        //   otherPred
+                        //       |
+                        //   otherBlock <-+
+                        //     . . .      |
+                        //                |
+                        //   predBlock----+
+                        //       |
+                        //     block
+                        //
+                        for (flowList* pred = otherBlock->bbPreds; pred != nullptr; pred = pred->flNext)
+                        {
+                            BasicBlock* otherPred = pred->flBlock;
+                            if (otherPred->bbNum == blockInfo[otherBlock->bbNum].predBBNum)
+                            {
+                                predBlock = otherPred;
+                                break;
+                            }
+                        }
+                    }
+                }
+            }
+            else
+            {
+                predBlock = nullptr;
+            }
+        }
+        else
+        {
+            for (flowList* pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
+            {
+                BasicBlock* candidatePredBlock = pred->flBlock;
+                if (isBlockVisited(candidatePredBlock))
+                {
+                    if (predBlock == nullptr || predBlock->bbWeight < candidatePredBlock->bbWeight)
+                    {
+                        predBlock = candidatePredBlock;
+                        INDEBUG(*pPredBlockIsAllocated = true;)
+                    }
+                }
+            }
+        }
+        if (predBlock == nullptr)
+        {
+            predBlock = prevBlock;
+            assert(predBlock != nullptr);
+            JITDUMP("\n\nNo allocated predecessor; ");
+        }
+    }
+    return predBlock;
+}
+
+void LinearScan::buildIntervals()
+{
+    BasicBlock* block;
+
+    // start numbering at 1; 0 is the entry
+    LsraLocation currentLoc = 1;
+
+    JITDUMP("\nbuildIntervals ========\n");
+
+    // Now build (empty) records for all of the physical registers
+    buildPhysRegRecords();
+
+#ifdef DEBUG
+    if (VERBOSE)
+    {
+        printf("\n-----------------\n");
+        printf("LIVENESS:\n");
+        printf("-----------------\n");
+        foreach_block(compiler, block)
+        {
+            printf("BB%02u use def in out\n", block->bbNum);
+            dumpConvertedVarSet(compiler, block->bbVarUse);
+            printf("\n");
+            dumpConvertedVarSet(compiler, block->bbVarDef);
+            printf("\n");
+            dumpConvertedVarSet(compiler, block->bbLiveIn);
+            printf("\n");
+            dumpConvertedVarSet(compiler, block->bbLiveOut);
+            printf("\n");
+        }
+    }
+#endif // DEBUG
+
+    identifyCandidates();
+
+    DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_PRE));
+
+    // second part:
+    JITDUMP("\nbuildIntervals second part ========\n");
+    currentLoc = 0;
+
+    // Next, create ParamDef RefPositions for all the tracked parameters,
+    // in order of their varIndex
+
+    LclVarDsc*   argDsc;
+    unsigned int lclNum;
+
+    RegState* intRegState                   = &compiler->codeGen->intRegState;
+    RegState* floatRegState                 = &compiler->codeGen->floatRegState;
+    intRegState->rsCalleeRegArgMaskLiveIn   = RBM_NONE;
+    floatRegState->rsCalleeRegArgMaskLiveIn = RBM_NONE;
+
+    for (unsigned int varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++)
+    {
+        lclNum = compiler->lvaTrackedToVarNum[varIndex];
+        argDsc = &(compiler->lvaTable[lclNum]);
+
+        if (!argDsc->lvIsParam)
+        {
+            continue;
+        }
+
+        // Only reserve a register if the argument is actually used.
+        // Is it dead on entry? If compJmpOpUsed is true, then the arguments
+        // have to be kept alive, so we have to consider it as live on entry.
+        // Use lvRefCnt instead of checking bbLiveIn because if it's volatile we
+        // won't have done dataflow on it, but it needs to be marked as live-in so
+        // it will get saved in the prolog.
+        if (!compiler->compJmpOpUsed && argDsc->lvRefCnt == 0 && !compiler->opts.compDbgCode)
+        {
+            continue;
+        }
+
+        if (argDsc->lvIsRegArg)
+        {
+            updateRegStateForArg(argDsc);
+        }
+
+        if (isCandidateVar(argDsc))
+        {
+            Interval* interval = getIntervalForLocalVar(lclNum);
+            regMaskTP mask     = allRegs(TypeGet(argDsc));
+            if (argDsc->lvIsRegArg)
+            {
+                // Set this interval as currently assigned to that register
+                regNumber inArgReg = argDsc->lvArgReg;
+                assert(inArgReg < REG_COUNT);
+                mask = genRegMask(inArgReg);
+                assignPhysReg(inArgReg, interval);
+            }
+            RefPosition* pos = newRefPosition(interval, MinLocation, RefTypeParamDef, nullptr, mask);
+        }
+        else if (varTypeIsStruct(argDsc->lvType))
+        {
+            for (unsigned fieldVarNum = argDsc->lvFieldLclStart;
+                 fieldVarNum < argDsc->lvFieldLclStart + argDsc->lvFieldCnt; ++fieldVarNum)
+            {
+                LclVarDsc* fieldVarDsc = &(compiler->lvaTable[fieldVarNum]);
+                if (fieldVarDsc->lvLRACandidate)
+                {
+                    Interval*    interval = getIntervalForLocalVar(fieldVarNum);
+                    RefPosition* pos =
+                        newRefPosition(interval, MinLocation, RefTypeParamDef, nullptr, allRegs(TypeGet(fieldVarDsc)));
+                }
+            }
+        }
+        else
+        {
+            // We can overwrite the register (i.e. codegen saves it on entry)
+            assert(argDsc->lvRefCnt == 0 || !argDsc->lvIsRegArg || argDsc->lvDoNotEnregister ||
+                   !argDsc->lvLRACandidate || (varTypeIsFloating(argDsc->TypeGet()) && compiler->opts.compDbgCode));
+        }
+    }
+
+    // Now set up the reg state for the non-tracked args
+    // (We do this here because we want to generate the ParamDef RefPositions in tracked
+    // order, so that loop doesn't hit the non-tracked args)
+
+    for (unsigned argNum = 0; argNum < compiler->info.compArgsCount; argNum++, argDsc++)
+    {
+        argDsc = &(compiler->lvaTable[argNum]);
+
+        if (argDsc->lvPromotedStruct())
+        {
+            noway_assert(argDsc->lvFieldCnt == 1); // We only handle one field here
+
+            unsigned fieldVarNum = argDsc->lvFieldLclStart;
+            argDsc               = &(compiler->lvaTable[fieldVarNum]);
+        }
+        noway_assert(argDsc->lvIsParam);
+        if (!argDsc->lvTracked && argDsc->lvIsRegArg)
+        {
+            updateRegStateForArg(argDsc);
+        }
+    }
+
+    // If there is a secret stub param, it is also live in
+    if (compiler->info.compPublishStubParam)
+    {
+        intRegState->rsCalleeRegArgMaskLiveIn |= RBM_SECRET_STUB_PARAM;
+    }
+
+    LocationInfoListNodePool listNodePool(compiler, 8);
+    SmallHashTable<GenTree*, LocationInfoList, 32> operandToLocationInfoMap(compiler);
+
+    BasicBlock* predBlock = nullptr;
+    BasicBlock* prevBlock = nullptr;
+
+    // Initialize currentLiveVars to the empty set.  We will set it to the current
+    // live-in at the entry to each block (this will include the incoming args on
+    // the first block).
+    VarSetOps::AssignNoCopy(compiler, currentLiveVars, VarSetOps::MakeEmpty(compiler));
+
+    for (block = startBlockSequence(); block != nullptr; block = moveToNextBlock())
+    {
+        JITDUMP("\nNEW BLOCK BB%02u\n", block->bbNum);
+
+        bool predBlockIsAllocated = false;
+        predBlock                 = findPredBlockForLiveIn(block, prevBlock DEBUGARG(&predBlockIsAllocated));
+
+        if (block == compiler->fgFirstBB)
+        {
+            insertZeroInitRefPositions();
+        }
+
+        // Determine if we need any DummyDefs.
+        // We need DummyDefs for cases where "predBlock" isn't really a predecessor.
+        // Note that it's possible to have uses of unitialized variables, in which case even the first
+        // block may require DummyDefs, which we are not currently adding - this means that these variables
+        // will always be considered to be in memory on entry (and reloaded when the use is encountered).
+        // TODO-CQ: Consider how best to tune this.  Currently, if we create DummyDefs for uninitialized
+        // variables (which may actually be initialized along the dynamically executed paths, but not
+        // on all static paths), we wind up with excessive liveranges for some of these variables.
+        VARSET_TP VARSET_INIT(compiler, newLiveIn, block->bbLiveIn);
+        if (predBlock)
+        {
+            JITDUMP("\n\nSetting incoming variable registers of BB%02u to outVarToRegMap of BB%02u\n", block->bbNum,
+                    predBlock->bbNum);
+            assert(predBlock->bbNum <= bbNumMaxBeforeResolution);
+            blockInfo[block->bbNum].predBBNum = predBlock->bbNum;
+            // Compute set difference: newLiveIn = block->bbLiveIn - predBlock->bbLiveOut
+            VarSetOps::DiffD(compiler, newLiveIn, predBlock->bbLiveOut);
+        }
+        bool needsDummyDefs = (!VarSetOps::IsEmpty(compiler, newLiveIn) && block != compiler->fgFirstBB);
+
+        // Create dummy def RefPositions
+
+        if (needsDummyDefs)
+        {
+            // If we are using locations from a predecessor, we should never require DummyDefs.
+            assert(!predBlockIsAllocated);
+
+            JITDUMP("Creating dummy definitions\n");
+            VARSET_ITER_INIT(compiler, iter, newLiveIn, varIndex);
+            while (iter.NextElem(compiler, &varIndex))
+            {
+                unsigned   varNum = compiler->lvaTrackedToVarNum[varIndex];
+                LclVarDsc* varDsc = compiler->lvaTable + varNum;
+                // Add a dummyDef for any candidate vars that are in the "newLiveIn" set.
+                // If this is the entry block, don't add any incoming parameters (they're handled with ParamDefs).
+                if (isCandidateVar(varDsc) && (predBlock != nullptr || !varDsc->lvIsParam))
+                {
+                    Interval*    interval = getIntervalForLocalVar(varNum);
+                    RefPosition* pos =
+                        newRefPosition(interval, currentLoc, RefTypeDummyDef, nullptr, allRegs(interval->registerType));
+                }
+            }
+            JITDUMP("Finished creating dummy definitions\n\n");
+        }
+
+        // Add a dummy RefPosition to mark the block boundary.
+        // Note that we do this AFTER adding the exposed uses above, because the
+        // register positions for those exposed uses need to be recorded at
+        // this point.
+
+        RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeBB, nullptr, RBM_NONE);
+
+        VarSetOps::Assign(compiler, currentLiveVars, block->bbLiveIn);
+
+        LIR::Range& blockRange = LIR::AsRange(block);
+        for (GenTree* node : blockRange.NonPhiNodes())
+        {
+            assert(node->gtLsraInfo.loc >= currentLoc);
+            assert(((node->gtLIRFlags & LIR::Flags::IsUnusedValue) == 0) || node->gtLsraInfo.isLocalDefUse);
+
+            currentLoc = node->gtLsraInfo.loc;
+            buildRefPositionsForNode(node, block, listNodePool, operandToLocationInfoMap, currentLoc);
+
+#ifdef DEBUG
+            if (currentLoc > maxNodeLocation)
+            {
+                maxNodeLocation = currentLoc;
+            }
+#endif // DEBUG
+        }
+
+        // Increment the LsraLocation at this point, so that the dummy RefPositions
+        // will not have the same LsraLocation as any "real" RefPosition.
+        currentLoc += 2;
+
+        // Note: the visited set is cleared in LinearScan::doLinearScan()
+        markBlockVisited(block);
+
+        // Insert exposed uses for a lclVar that is live-out of 'block' but not live-in to the
+        // next block, or any unvisited successors.
+        // This will address lclVars that are live on a backedge, as well as those that are kept
+        // live at a GT_JMP.
+        //
+        // Blocks ending with "jmp method" are marked as BBJ_HAS_JMP,
+        // and jmp call is represented using GT_JMP node which is a leaf node.
+        // Liveness phase keeps all the arguments of the method live till the end of
+        // block by adding them to liveout set of the block containing GT_JMP.
+        //
+        // The target of a GT_JMP implicitly uses all the current method arguments, however
+        // there are no actual references to them.  This can cause LSRA to assert, because
+        // the variables are live but it sees no references.  In order to correctly model the
+        // liveness of these arguments, we add dummy exposed uses, in the same manner as for
+        // backward branches.  This will happen automatically via expUseSet.
+        //
+        // Note that a block ending with GT_JMP has no successors and hence the variables
+        // for which dummy use ref positions are added are arguments of the method.
+
+        VARSET_TP   VARSET_INIT(compiler, expUseSet, block->bbLiveOut);
+        BasicBlock* nextBlock = getNextBlock();
+        if (nextBlock != nullptr)
+        {
+            VarSetOps::DiffD(compiler, expUseSet, nextBlock->bbLiveIn);
+        }
+        AllSuccessorIter succsEnd = block->GetAllSuccs(compiler).end();
+        for (AllSuccessorIter succs = block->GetAllSuccs(compiler).begin();
+             succs != succsEnd && !VarSetOps::IsEmpty(compiler, expUseSet); ++succs)
+        {
+            BasicBlock* succ = (*succs);
+            if (isBlockVisited(succ))
+            {
+                continue;
+            }
+            VarSetOps::DiffD(compiler, expUseSet, succ->bbLiveIn);
+        }
+
+        if (!VarSetOps::IsEmpty(compiler, expUseSet))
+        {
+            JITDUMP("Exposed uses:");
+            VARSET_ITER_INIT(compiler, iter, expUseSet, varIndex);
+            while (iter.NextElem(compiler, &varIndex))
+            {
+                unsigned   varNum = compiler->lvaTrackedToVarNum[varIndex];
+                LclVarDsc* varDsc = compiler->lvaTable + varNum;
+                if (isCandidateVar(varDsc))
+                {
+                    Interval*    interval = getIntervalForLocalVar(varNum);
+                    RefPosition* pos =
+                        newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType));
+                    JITDUMP(" V%02u", varNum);
+                }
+            }
+            JITDUMP("\n");
+        }
+
+        // Identify the last uses of each variable, except in the case of MinOpts, where all vars
+        // are kept live everywhere.
+
+        if (!compiler->opts.MinOpts())
+        {
+            setLastUses(block);
+        }
+
+#ifdef DEBUG
+        if (VERBOSE)
+        {
+            printf("use: ");
+            dumpConvertedVarSet(compiler, block->bbVarUse);
+            printf("\ndef: ");
+            dumpConvertedVarSet(compiler, block->bbVarDef);
+            printf("\n");
+        }
+#endif // DEBUG
+
+        prevBlock = block;
+    }
+
+    // If we need to KeepAliveAndReportThis, add a dummy exposed use of it at the end
+    if (compiler->lvaKeepAliveAndReportThis())
+    {
+        unsigned keepAliveVarNum = compiler->info.compThisArg;
+        assert(compiler->info.compIsStatic == false);
+        if (isCandidateVar(&compiler->lvaTable[keepAliveVarNum]))
+        {
+            JITDUMP("Adding exposed use of this, for lvaKeepAliveAndReportThis\n");
+            Interval*    interval = getIntervalForLocalVar(keepAliveVarNum);
+            RefPosition* pos =
+                newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType));
+        }
+    }
+
+#ifdef DEBUG
+    if (getLsraExtendLifeTimes())
+    {
+        LclVarDsc* varDsc;
+        for (lclNum = 0, varDsc = compiler->lvaTable; lclNum < compiler->lvaCount; lclNum++, varDsc++)
+        {
+            if (varDsc->lvLRACandidate)
+            {
+                JITDUMP("Adding exposed use of V%02u for LsraExtendLifetimes\n", lclNum);
+                Interval*    interval = getIntervalForLocalVar(lclNum);
+                RefPosition* pos =
+                    newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType));
+            }
+        }
+    }
+#endif // DEBUG
+
+    // If the last block has successors, create a RefTypeBB to record
+    // what's live
+
+    if (prevBlock->NumSucc(compiler) > 0)
+    {
+        RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeBB, nullptr, RBM_NONE);
+    }
+
+#ifdef DEBUG
+    // Make sure we don't have any blocks that were not visited
+    foreach_block(compiler, block)
+    {
+        assert(isBlockVisited(block));
+    }
+
+    if (VERBOSE)
+    {
+        lsraDumpIntervals("BEFORE VALIDATING INTERVALS");
+        dumpRefPositions("BEFORE VALIDATING INTERVALS");
+        validateIntervals();
+    }
+#endif // DEBUG
+}
+
+#ifdef DEBUG
+void LinearScan::dumpVarRefPositions(const char* title)
+{
+    printf("\nVAR REFPOSITIONS %s\n", title);
+
+    for (unsigned i = 0; i < compiler->lvaCount; i++)
+    {
+        Interval* interval = getIntervalForLocalVar(i);
+        printf("--- V%02u\n", i);
+
+        for (RefPosition* ref = interval->firstRefPosition; ref != nullptr; ref = ref->nextRefPosition)
+        {
+            ref->dump();
+        }
+    }
+
+    printf("\n");
+}
+
+void LinearScan::validateIntervals()
+{
+    for (unsigned i = 0; i < compiler->lvaCount; i++)
+    {
+        Interval* interval = getIntervalForLocalVar(i);
+
+        bool defined = false;
+        printf("-----------------\n");
+        for (RefPosition* ref = interval->firstRefPosition; ref != nullptr; ref = ref->nextRefPosition)
+        {
+            ref->dump();
+            RefType refType = ref->refType;
+            if (!defined && RefTypeIsUse(refType))
+            {
+                if (compiler->info.compMethodName != nullptr)
+                {
+                    printf("%s: ", compiler->info.compMethodName);
+                }
+                printf("LocalVar V%02u: undefined use at %u\n", i, ref->nodeLocation);
+            }
+            // Note that there can be multiple last uses if they are on disjoint paths,
+            // so we can't really check the lastUse flag
+            if (ref->lastUse)
+            {
+                defined = false;
+            }
+            if (RefTypeIsDef(refType))
+            {
+                defined = true;
+            }
+        }
+    }
+}
+#endif // DEBUG
+
+// Set the default rpFrameType based upon codeGen->isFramePointerRequired()
+// This was lifted from the register predictor
+//
+void LinearScan::setFrameType()
+{
+    FrameType frameType = FT_NOT_SET;
+    if (compiler->codeGen->isFramePointerRequired())
+    {
+        frameType = FT_EBP_FRAME;
+    }
+    else
+    {
+        if (compiler->rpMustCreateEBPCalled == false)
+        {
+#ifdef DEBUG
+            const char* reason;
+#endif // DEBUG
+            compiler->rpMustCreateEBPCalled = true;
+            if (compiler->rpMustCreateEBPFrame(INDEBUG(&reason)))
+            {
+                JITDUMP("; Decided to create an EBP based frame for ETW stackwalking (%s)\n", reason);
+                compiler->codeGen->setFrameRequired(true);
+            }
+        }
+
+        if (compiler->codeGen->isFrameRequired())
+        {
+            frameType = FT_EBP_FRAME;
+        }
+        else
+        {
+            frameType = FT_ESP_FRAME;
+        }
+    }
+
+#if DOUBLE_ALIGN
+    // The DOUBLE_ALIGN feature indicates whether the JIT will attempt to double-align the
+    // frame if needed.  Note that this feature isn't on for amd64, because the stack is
+    // always double-aligned by default.
+    compiler->codeGen->setDoubleAlign(false);
+
+    // TODO-CQ: Tune this (see regalloc.cpp, in which raCntWtdStkDblStackFP is used to
+    // determine whether to double-align). Note, though that there is at least one test
+    // (jit\opt\Perf\DoubleAlign\Locals.exe) that depends on double-alignment being set
+    // in certain situations.
+    if (!compiler->opts.MinOpts() && !compiler->codeGen->isFramePointerRequired() && compiler->compFloatingPointUsed)
+    {
+        frameType = FT_DOUBLE_ALIGN_FRAME;
+    }
+#endif // DOUBLE_ALIGN
+
+    switch (frameType)
+    {
+        case FT_ESP_FRAME:
+            noway_assert(!compiler->codeGen->isFramePointerRequired());
+            noway_assert(!compiler->codeGen->isFrameRequired());
+            compiler->codeGen->setFramePointerUsed(false);
+            break;
+        case FT_EBP_FRAME:
+            compiler->codeGen->setFramePointerUsed(true);
+            break;
+#if DOUBLE_ALIGN
+        case FT_DOUBLE_ALIGN_FRAME:
+            noway_assert(!compiler->codeGen->isFramePointerRequired());
+            compiler->codeGen->setFramePointerUsed(false);
+            compiler->codeGen->setDoubleAlign(true);
+            break;
+#endif // DOUBLE_ALIGN
+        default:
+            noway_assert(!"rpFrameType not set correctly!");
+            break;
+    }
+
+    // If we are using FPBASE as the frame register, we cannot also use it for
+    // a local var. Note that we may have already added it to the register masks,
+    // which are computed when the LinearScan class constructor is created, and
+    // used during lowering. Luckily, the TreeNodeInfo only stores an index to
+    // the masks stored in the LinearScan class, so we only need to walk the
+    // unique masks and remove FPBASE.
+    if (frameType == FT_EBP_FRAME)
+    {
+        if ((availableIntRegs & RBM_FPBASE) != 0)
+        {
+            RemoveRegisterFromMasks(REG_FPBASE);
+
+            // We know that we're already in "read mode" for availableIntRegs. However,
+            // we need to remove the FPBASE register, so subsequent users (like callers
+            // to allRegs()) get the right thing. The RemoveRegisterFromMasks() code
+            // fixes up everything that already took a dependency on the value that was
+            // previously read, so this completes the picture.
+            availableIntRegs.OverrideAssign(availableIntRegs & ~RBM_FPBASE);
+        }
+    }
+
+    compiler->rpFrameType = frameType;
+}
+
+// Is the copyReg given by this RefPosition still busy at the
+// given location?
+bool copyRegInUse(RefPosition* ref, LsraLocation loc)
+{
+    assert(ref->copyReg);
+    if (ref->getRefEndLocation() >= loc)
+    {
+        return true;
+    }
+    Interval*    interval = ref->getInterval();
+    RefPosition* nextRef  = interval->getNextRefPosition();
+    if (nextRef != nullptr && nextRef->treeNode == ref->treeNode && nextRef->getRefEndLocation() >= loc)
+    {
+        return true;
+    }
+    return false;
+}
+
+// Determine whether the register represented by "physRegRecord" is available at least
+// at the "currentLoc", and if so, return the next location at which it is in use in
+// "nextRefLocationPtr"
+//
+bool LinearScan::registerIsAvailable(RegRecord*    physRegRecord,
+                                     LsraLocation  currentLoc,
+                                     LsraLocation* nextRefLocationPtr,
+                                     RegisterType  regType)
+{
+    *nextRefLocationPtr          = MaxLocation;
+    LsraLocation nextRefLocation = MaxLocation;
+    regMaskTP    regMask         = genRegMask(physRegRecord->regNum);
+    if (physRegRecord->isBusyUntilNextKill)
+    {
+        return false;
+    }
+
+    RefPosition* nextPhysReference = physRegRecord->getNextRefPosition();
+    if (nextPhysReference != nullptr)
+    {
+        nextRefLocation = nextPhysReference->nodeLocation;
+        // if (nextPhysReference->refType == RefTypeFixedReg) nextRefLocation--;
+    }
+    else if (!physRegRecord->isCalleeSave)
+    {
+        nextRefLocation = MaxLocation - 1;
+    }
+
+    Interval* assignedInterval = physRegRecord->assignedInterval;
+
+    if (assignedInterval != nullptr)
+    {
+        RefPosition* recentReference = assignedInterval->recentRefPosition;
+
+        // The only case where we have an assignedInterval, but recentReference is null
+        // is where this interval is live at procedure entry (i.e. an arg register), in which
+        // case it's still live and its assigned register is not available
+        // (Note that the ParamDef will be recorded as a recentReference when we encounter
+        // it, but we will be allocating registers, potentially to other incoming parameters,
+        // as we process the ParamDefs.)
+
+        if (recentReference == nullptr)
+        {
+            return false;
+        }
+
+        // Is this a copyReg?  It is if the register assignment doesn't match.
+        // (the recentReference may not be a copyReg, because we could have seen another
+        // reference since the copyReg)
+
+        if (!assignedInterval->isAssignedTo(physRegRecord->regNum))
+        {
+            // Don't reassign it if it's still in use
+            if (recentReference->copyReg && copyRegInUse(recentReference, currentLoc))
+            {
+                return false;
+            }
+        }
+        else if (!assignedInterval->isActive && assignedInterval->isConstant)
+        {
+            // Treat this as unassigned, i.e. do nothing.
+            // TODO-CQ: Consider adjusting the heuristics (probably in the caller of this method)
+            // to avoid reusing these registers.
+        }
+        // If this interval isn't active, it's available if it isn't referenced
+        // at this location (or the previous location, if the recent RefPosition
+        // is a delayRegFree).
+        else if (!assignedInterval->isActive &&
+                 (recentReference->refType == RefTypeExpUse || recentReference->getRefEndLocation() < currentLoc))
+        {
+            // This interval must have a next reference (otherwise it wouldn't be assigned to this register)
+            RefPosition* nextReference = recentReference->nextRefPosition;
+            if (nextReference != nullptr)
+            {
+                if (nextReference->nodeLocation < nextRefLocation)
+                {
+                    nextRefLocation = nextReference->nodeLocation;
+                }
+            }
+            else
+            {
+                assert(recentReference->copyReg && recentReference->registerAssignment != regMask);
+            }
+        }
+        else
+        {
+            return false;
+        }
+    }
+    if (nextRefLocation < *nextRefLocationPtr)
+    {
+        *nextRefLocationPtr = nextRefLocation;
+    }
+
+#ifdef _TARGET_ARM_
+    if (regType == TYP_DOUBLE)
+    {
+        // Recurse, but check the other half this time (TYP_FLOAT)
+        if (!registerIsAvailable(getRegisterRecord(REG_NEXT(physRegRecord->regNum)), currentLoc, nextRefLocationPtr,
+                                 TYP_FLOAT))
+            return false;
+        nextRefLocation = *nextRefLocationPtr;
+    }
+#endif // _TARGET_ARM_
+
+    return (nextRefLocation >= currentLoc);
+}
+
+//------------------------------------------------------------------------
+// getRegisterType: Get the RegisterType to use for the given RefPosition
+//
+// Arguments:
+//    currentInterval: The interval for the current allocation
+//    refPosition:     The RefPosition of the current Interval for which a register is being allocated
+//
+// Return Value:
+//    The RegisterType that should be allocated for this RefPosition
+//
+// Notes:
+//    This will nearly always be identical to the registerType of the interval, except in the case
+//    of SIMD types of 8 bytes (currently only Vector2) when they are passed and returned in integer
+//    registers, or copied to a return temp.
+//    This method need only be called in situations where we may be dealing with the register requirements
+//    of a RefTypeUse RefPosition (i.e. not when we are only looking at the type of an interval, nor when
+//    we are interested in the "defining" type of the interval).  This is because the situation of interest
+//    only happens at the use (where it must be copied to an integer register).
+
+RegisterType LinearScan::getRegisterType(Interval* currentInterval, RefPosition* refPosition)
+{
+    assert(refPosition->getInterval() == currentInterval);
+    RegisterType regType    = currentInterval->registerType;
+    regMaskTP    candidates = refPosition->registerAssignment;
+#if defined(FEATURE_SIMD) && defined(_TARGET_AMD64_)
+    if ((candidates & allRegs(regType)) == RBM_NONE)
+    {
+        assert((regType == TYP_SIMD8) && (refPosition->refType == RefTypeUse) &&
+               ((candidates & allRegs(TYP_INT)) != RBM_NONE));
+        regType = TYP_INT;
+    }
+#else  // !(defined(FEATURE_SIMD) && defined(_TARGET_AMD64_))
+    assert((candidates & allRegs(regType)) != RBM_NONE);
+#endif // !(defined(FEATURE_SIMD) && defined(_TARGET_AMD64_))
+    return regType;
+}
+
+//------------------------------------------------------------------------
+// tryAllocateFreeReg: Find a free register that satisfies the requirements for refPosition,
+//                     and takes into account the preferences for the given Interval
+//
+// Arguments:
+//    currentInterval: The interval for the current allocation
+//    refPosition:     The RefPosition of the current Interval for which a register is being allocated
+//
+// Return Value:
+//    The regNumber, if any, allocated to the RefPositon.  Returns REG_NA if no free register is found.
+//
+// Notes:
+//    TODO-CQ: Consider whether we need to use a different order for tree temps than for vars, as
+//    reg predict does
+
+static const regNumber lsraRegOrder[]      = {REG_VAR_ORDER};
+const unsigned         lsraRegOrderSize    = ArrLen(lsraRegOrder);
+static const regNumber lsraRegOrderFlt[]   = {REG_VAR_ORDER_FLT};
+const unsigned         lsraRegOrderFltSize = ArrLen(lsraRegOrderFlt);
+
+regNumber LinearScan::tryAllocateFreeReg(Interval* currentInterval, RefPosition* refPosition)
+{
+    regNumber foundReg = REG_NA;
+
+    RegisterType     regType = getRegisterType(currentInterval, refPosition);
+    const regNumber* regOrder;
+    unsigned         regOrderSize;
+    if (useFloatReg(regType))
+    {
+        regOrder     = lsraRegOrderFlt;
+        regOrderSize = lsraRegOrderFltSize;
+    }
+    else
+    {
+        regOrder     = lsraRegOrder;
+        regOrderSize = lsraRegOrderSize;
+    }
+
+    LsraLocation currentLocation = refPosition->nodeLocation;
+    RefPosition* nextRefPos      = refPosition->nextRefPosition;
+    LsraLocation nextLocation    = (nextRefPos == nullptr) ? currentLocation : nextRefPos->nodeLocation;
+    regMaskTP    candidates      = refPosition->registerAssignment;
+    regMaskTP    preferences     = currentInterval->registerPreferences;
+
+    if (RefTypeIsDef(refPosition->refType))
+    {
+        if (currentInterval->hasConflictingDefUse)
+        {
+            resolveConflictingDefAndUse(currentInterval, refPosition);
+            candidates = refPosition->registerAssignment;
+        }
+        // Otherwise, check for the case of a fixed-reg def of a reg that will be killed before the
+        // use, or interferes at the point of use (which shouldn't happen, but Lower doesn't mark
+        // the contained nodes as interfering).
+        // Note that we may have a ParamDef RefPosition that is marked isFixedRegRef, but which
+        // has had its registerAssignment changed to no longer be a single register.
+        else if (refPosition->isFixedRegRef && nextRefPos != nullptr && RefTypeIsUse(nextRefPos->refType) &&
+                 !nextRefPos->isFixedRegRef && genMaxOneBit(refPosition->registerAssignment))
+        {
+            regNumber  defReg       = refPosition->assignedReg();
+            RegRecord* defRegRecord = getRegisterRecord(defReg);
+
+            RefPosition* currFixedRegRefPosition = defRegRecord->recentRefPosition;
+            assert(currFixedRegRefPosition != nullptr &&
+                   currFixedRegRefPosition->nodeLocation == refPosition->nodeLocation);
+
+            // If there is another fixed reference to this register before the use, change the candidates
+            // on this RefPosition to include that of nextRefPos.
+            if (currFixedRegRefPosition->nextRefPosition != nullptr &&
+                currFixedRegRefPosition->nextRefPosition->nodeLocation <= nextRefPos->getRefEndLocation())
+            {
+                candidates |= nextRefPos->registerAssignment;
+                if (preferences == refPosition->registerAssignment)
+                {
+                    preferences = candidates;
+                }
+            }
+        }
+    }
+
+    preferences &= candidates;
+    if (preferences == RBM_NONE)
+    {
+        preferences = candidates;
+    }
+    regMaskTP relatedPreferences = RBM_NONE;
+
+#ifdef DEBUG
+    candidates = stressLimitRegs(refPosition, candidates);
+#endif
+    bool mustAssignARegister = true;
+    assert(candidates != RBM_NONE);
+
+    // If the related interval has no further references, it is possible that it is a source of the
+    // node that produces this interval.  However, we don't want to use the relatedInterval for preferencing
+    // if its next reference is not a new definition (as it either is or will become live).
+    Interval* relatedInterval = currentInterval->relatedInterval;
+    if (relatedInterval != nullptr)
+    {
+        RefPosition* nextRelatedRefPosition = relatedInterval->getNextRefPosition();
+        if (nextRelatedRefPosition != nullptr)
+        {
+            // Don't use the relatedInterval for preferencing if its next reference is not a new definition.
+            if (!RefTypeIsDef(nextRelatedRefPosition->refType))
+            {
+                relatedInterval = nullptr;
+            }
+            // Is the relatedInterval simply a copy to another relatedInterval?
+            else if ((relatedInterval->relatedInterval != nullptr) &&
+                     (nextRelatedRefPosition->nextRefPosition != nullptr) &&
+                     (nextRelatedRefPosition->nextRefPosition->nextRefPosition == nullptr) &&
+                     (nextRelatedRefPosition->nextRefPosition->nodeLocation <
+                      relatedInterval->relatedInterval->getNextRefLocation()))
+            {
+                // The current relatedInterval has only two remaining RefPositions, both of which
+                // occur prior to the next RefPosition for its relatedInterval.
+                // It is likely a copy.
+                relatedInterval = relatedInterval->relatedInterval;
+            }
+        }
+    }
+
+    if (relatedInterval != nullptr)
+    {
+        // If the related interval already has an assigned register, then use that
+        // as the related preference.  We'll take the related
+        // interval preferences into account in the loop over all the registers.
+
+        if (relatedInterval->assignedReg != nullptr)
+        {
+            relatedPreferences = genRegMask(relatedInterval->assignedReg->regNum);
+        }
+        else
+        {
+            relatedPreferences = relatedInterval->registerPreferences;
+        }
+    }
+
+    bool preferCalleeSave = currentInterval->preferCalleeSave;
+
+    // For floating point, we want to be less aggressive about using callee-save registers.
+    // So in that case, we just need to ensure that the current RefPosition is covered.
+    RefPosition* rangeEndRefPosition;
+    RefPosition* lastRefPosition = currentInterval->lastRefPosition;
+    if (useFloatReg(currentInterval->registerType))
+    {
+        rangeEndRefPosition = refPosition;
+    }
+    else
+    {
+        rangeEndRefPosition = currentInterval->lastRefPosition;
+        // If we have a relatedInterval that is not currently occupying a register,
+        // and whose lifetime begins after this one ends,
+        // we want to try to select a register that will cover its lifetime.
+        if ((relatedInterval != nullptr) && (relatedInterval->assignedReg == nullptr) &&
+            (relatedInterval->getNextRefLocation() >= rangeEndRefPosition->nodeLocation))
+        {
+            lastRefPosition  = relatedInterval->lastRefPosition;
+            preferCalleeSave = relatedInterval->preferCalleeSave;
+        }
+    }
+
+    // If this has a delayed use (due to being used in a rmw position of a
+    // non-commutative operator), its endLocation is delayed until the "def"
+    // position, which is one location past the use (getRefEndLocation() takes care of this).
+    LsraLocation rangeEndLocation = rangeEndRefPosition->getRefEndLocation();
+    LsraLocation lastLocation     = lastRefPosition->getRefEndLocation();
+    regNumber    prevReg          = REG_NA;
+
+    if (currentInterval->assignedReg)
+    {
+        bool useAssignedReg = false;
+        // This was an interval that was previously allocated to the given
+        // physical register, and we should try to allocate it to that register
+        // again, if possible and reasonable.
+        // Use it preemptively (i.e. before checking other available regs)
+        // only if it is preferred and available.
+
+        RegRecord* regRec    = currentInterval->assignedReg;
+        prevReg              = regRec->regNum;
+        regMaskTP prevRegBit = genRegMask(prevReg);
+
+        // Is it in the preferred set of regs?
+        if ((prevRegBit & preferences) != RBM_NONE)
+        {
+            // Is it currently available?
+            LsraLocation nextPhysRefLoc;
+            if (registerIsAvailable(regRec, currentLocation, &nextPhysRefLoc, currentInterval->registerType))
+            {
+                // If the register is next referenced at this location, only use it if
+                // this has a fixed reg requirement (i.e. this is the reference that caused
+                // the FixedReg ref to be created)
+
+                if (!regRec->conflictingFixedRegReference(refPosition))
+                {
+                    useAssignedReg = true;
+                }
+            }
+        }
+        if (useAssignedReg)
+        {
+            regNumber foundReg = prevReg;
+            assignPhysReg(regRec, currentInterval);
+            refPosition->registerAssignment = genRegMask(foundReg);
+            return foundReg;
+        }
+        else
+        {
+            // Don't keep trying to allocate to this register
+            currentInterval->assignedReg = nullptr;
+        }
+    }
+
+    RegRecord* availablePhysRegInterval = nullptr;
+    Interval*  intervalToUnassign       = nullptr;
+
+    // Each register will receive a score which is the sum of the scoring criteria below.
+    // These were selected on the assumption that they will have an impact on the "goodness"
+    // of a register selection, and have been tuned to a certain extent by observing the impact
+    // of the ordering on asmDiffs.  However, there is probably much more room for tuning,
+    // and perhaps additional criteria.
+    //
+    // These are FLAGS (bits) so that we can easily order them and add them together.
+    // If the scores are equal, but one covers more of the current interval's range,
+    // then it wins.  Otherwise, the one encountered earlier in the regOrder wins.
+
+    enum RegisterScore
+    {
+        VALUE_AVAILABLE = 0x40, // It is a constant value that is already in an acceptable register.
+        COVERS          = 0x20, // It is in the interval's preference set and it covers the entire lifetime.
+        OWN_PREFERENCE  = 0x10, // It is in the preference set of this interval.
+        COVERS_RELATED  = 0x08, // It is in the preference set of the related interval and covers the entire lifetime.
+        RELATED_PREFERENCE = 0x04, // It is in the preference set of the related interval.
+        CALLER_CALLEE      = 0x02, // It is in the right "set" for the interval (caller or callee-save).
+        UNASSIGNED         = 0x01, // It is not currently assigned to an inactive interval.
+    };
+
+    int bestScore = 0;
+
+    // Compute the best possible score so we can stop looping early if we find it.
+    // TODO-Throughput: At some point we may want to short-circuit the computation of each score, but
+    // probably not until we've tuned the order of these criteria.  At that point,
+    // we'll need to avoid the short-circuit if we've got a stress option to reverse
+    // the selection.
+    int bestPossibleScore = COVERS + UNASSIGNED + OWN_PREFERENCE + CALLER_CALLEE;
+    if (relatedPreferences != RBM_NONE)
+    {
+        bestPossibleScore |= RELATED_PREFERENCE + COVERS_RELATED;
+    }
+
+    LsraLocation bestLocation = MinLocation;
+
+    // In non-debug builds, this will simply get optimized away
+    bool reverseSelect = false;
+#ifdef DEBUG
+    reverseSelect = doReverseSelect();
+#endif // DEBUG
+
+    // An optimization for the common case where there is only one candidate -
+    // avoid looping over all the other registers
+
+    regNumber singleReg = REG_NA;
+
+    if (genMaxOneBit(candidates))
+    {
+        regOrderSize = 1;
+        singleReg    = genRegNumFromMask(candidates);
+        regOrder     = &singleReg;
+    }
+
+    for (unsigned i = 0; i < regOrderSize && (candidates != RBM_NONE); i++)
+    {
+        regNumber regNum       = regOrder[i];
+        regMaskTP candidateBit = genRegMask(regNum);
+
+        if (!(candidates & candidateBit))
+        {
+            continue;
+        }
+
+        candidates &= ~candidateBit;
+
+        RegRecord* physRegRecord = getRegisterRecord(regNum);
+
+        int          score               = 0;
+        LsraLocation nextPhysRefLocation = MaxLocation;
+
+        // By chance, is this register already holding this interval, as a copyReg or having
+        // been restored as inactive after a kill?
+        if (physRegRecord->assignedInterval == currentInterval)
+        {
+            availablePhysRegInterval = physRegRecord;
+            intervalToUnassign       = nullptr;
+            break;
+        }
+
+        // Find the next RefPosition of the physical register
+        if (!registerIsAvailable(physRegRecord, currentLocation, &nextPhysRefLocation, regType))
+        {
+            continue;
+        }
+
+        // If the register is next referenced at this location, only use it if
+        // this has a fixed reg requirement (i.e. this is the reference that caused
+        // the FixedReg ref to be created)
+
+        if (physRegRecord->conflictingFixedRegReference(refPosition))
+        {
+            continue;
+        }
+
+        // If this is a definition of a constant interval, check to see if its value is already in this register.
+        if (currentInterval->isConstant && RefTypeIsDef(refPosition->refType) &&
+            (physRegRecord->assignedInterval != nullptr) && physRegRecord->assignedInterval->isConstant)
+        {
+            noway_assert(refPosition->treeNode != nullptr);
+            GenTree* otherTreeNode = physRegRecord->assignedInterval->firstRefPosition->treeNode;
+            noway_assert(otherTreeNode != nullptr);
+
+            if (refPosition->treeNode->OperGet() == otherTreeNode->OperGet())
+            {
+                switch (otherTreeNode->OperGet())
+                {
+                    case GT_CNS_INT:
+                        if ((refPosition->treeNode->AsIntCon()->IconValue() ==
+                             otherTreeNode->AsIntCon()->IconValue()) &&
+                            (varTypeGCtype(refPosition->treeNode) == varTypeGCtype(otherTreeNode)))
+                        {
+#ifdef _TARGET_64BIT_
+                            // If the constant is negative, only reuse registers of the same type.
+                            // This is because, on a 64-bit system, we do not sign-extend immediates in registers to
+                            // 64-bits unless they are actually longs, as this requires a longer instruction.
+                            // This doesn't apply to a 32-bit system, on which long values occupy multiple registers.
+                            // (We could sign-extend, but we would have to always sign-extend, because if we reuse more
+                            // than once, we won't have access to the instruction that originally defines the constant).
+                            if ((refPosition->treeNode->TypeGet() == otherTreeNode->TypeGet()) ||
+                                (refPosition->treeNode->AsIntCon()->IconValue() >= 0))
+#endif // _TARGET_64BIT_
+                            {
+                                score |= VALUE_AVAILABLE;
+                            }
+                        }
+                        break;
+                    case GT_CNS_DBL:
+                    {
+                        // For floating point constants, the values must be identical, not simply compare
+                        // equal.  So we compare the bits.
+                        if (refPosition->treeNode->AsDblCon()->isBitwiseEqual(otherTreeNode->AsDblCon()) &&
+                            (refPosition->treeNode->TypeGet() == otherTreeNode->TypeGet()))
+                        {
+                            score |= VALUE_AVAILABLE;
+                        }
+                        break;
+                    }
+                    default:
+                        // for all other 'otherTreeNode->OperGet()' kinds, we leave 'score' unchanged
+                        break;
+                }
+            }
+        }
+
+        // If the nextPhysRefLocation is a fixedRef for the rangeEndRefPosition, increment it so that
+        // we don't think it isn't covering the live range.
+        // This doesn't handle the case where earlier RefPositions for this Interval are also
+        // FixedRefs of this regNum, but at least those are only interesting in the case where those
+        // are "local last uses" of the Interval - otherwise the liveRange would interfere with the reg.
+        if (nextPhysRefLocation == rangeEndLocation && rangeEndRefPosition->isFixedRefOfReg(regNum))
+        {
+            INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_INCREMENT_RANGE_END, currentInterval, regNum));
+            nextPhysRefLocation++;
+        }
+
+        if ((candidateBit & preferences) != RBM_NONE)
+        {
+            score |= OWN_PREFERENCE;
+            if (nextPhysRefLocation > rangeEndLocation)
+            {
+                score |= COVERS;
+            }
+        }
+        if (relatedInterval != nullptr && (candidateBit & relatedPreferences) != RBM_NONE)
+        {
+            score |= RELATED_PREFERENCE;
+            if (nextPhysRefLocation > relatedInterval->lastRefPosition->nodeLocation)
+            {
+                score |= COVERS_RELATED;
+            }
+        }
+
+        // If we had a fixed-reg def of a reg that will be killed before the use, prefer it to any other registers
+        // with the same score.  (Note that we haven't changed the original registerAssignment on the RefPosition).
+        // Overload the RELATED_PREFERENCE value.
+        else if (candidateBit == refPosition->registerAssignment)
+        {
+            score |= RELATED_PREFERENCE;
+        }
+
+        if ((preferCalleeSave && physRegRecord->isCalleeSave) || (!preferCalleeSave && !physRegRecord->isCalleeSave))
+        {
+            score |= CALLER_CALLEE;
+        }
+
+        // The register is considered unassigned if it has no assignedInterval, OR
+        // if its next reference is beyond the range of this interval.
+        if (physRegRecord->assignedInterval == nullptr ||
+            physRegRecord->assignedInterval->getNextRefLocation() > lastLocation)
+        {
+            score |= UNASSIGNED;
+        }
+
+        bool foundBetterCandidate = false;
+
+        if (score > bestScore)
+        {
+            foundBetterCandidate = true;
+        }
+        else if (score == bestScore)
+        {
+            // Prefer a register that covers the range.
+            if (bestLocation <= lastLocation)
+            {
+                if (nextPhysRefLocation > bestLocation)
+                {
+                    foundBetterCandidate = true;
+                }
+            }
+            // If both cover the range, prefer a register that is killed sooner (leaving the longer range register
+            // available). If both cover the range and also getting killed at the same location, prefer the one which
+            // is same as previous assignment.
+            else if (nextPhysRefLocation > lastLocation)
+            {
+                if (nextPhysRefLocation < bestLocation)
+                {
+                    foundBetterCandidate = true;
+                }
+                else if (nextPhysRefLocation == bestLocation && prevReg == regNum)
+                {
+                    foundBetterCandidate = true;
+                }
+            }
+        }
+
+#ifdef DEBUG
+        if (doReverseSelect() && bestScore != 0)
+        {
+            foundBetterCandidate = !foundBetterCandidate;
+        }
+#endif // DEBUG
+
+        if (foundBetterCandidate)
+        {
+            bestLocation             = nextPhysRefLocation;
+            availablePhysRegInterval = physRegRecord;
+            intervalToUnassign       = physRegRecord->assignedInterval;
+            bestScore                = score;
+        }
+
+        // there is no way we can get a better score so break out
+        if (!reverseSelect && score == bestPossibleScore && bestLocation == rangeEndLocation + 1)
+        {
+            break;
+        }
+    }
+
+    if (availablePhysRegInterval != nullptr)
+    {
+        if (intervalToUnassign != nullptr)
+        {
+            unassignPhysReg(availablePhysRegInterval, intervalToUnassign->recentRefPosition);
+            if (bestScore & VALUE_AVAILABLE)
+            {
+                assert(intervalToUnassign->isConstant);
+                refPosition->treeNode->SetReuseRegVal();
+                refPosition->treeNode->SetInReg();
+            }
+            // If we considered this "unassigned" because this interval's lifetime ends before
+            // the next ref, remember it.
+            else if ((bestScore & UNASSIGNED) != 0 && intervalToUnassign != nullptr)
+            {
+                availablePhysRegInterval->previousInterval = intervalToUnassign;
+            }
+        }
+        else
+        {
+            assert((bestScore & VALUE_AVAILABLE) == 0);
+        }
+        assignPhysReg(availablePhysRegInterval, currentInterval);
+        foundReg                        = availablePhysRegInterval->regNum;
+        regMaskTP foundRegMask          = genRegMask(foundReg);
+        refPosition->registerAssignment = foundRegMask;
+        if (relatedInterval != nullptr)
+        {
+            relatedInterval->updateRegisterPreferences(foundRegMask);
+        }
+    }
+
+    return foundReg;
+}
+
+//------------------------------------------------------------------------
+// allocateBusyReg: Find a busy register that satisfies the requirements for refPosition,
+//                  and that can be spilled.
+//
+// Arguments:
+//    current               The interval for the current allocation
+//    refPosition           The RefPosition of the current Interval for which a register is being allocated
+//    allocateIfProfitable  If true, a reg may not be allocated if all other ref positions currently
+//                          occupying registers are more important than the 'refPosition'.
+//
+// Return Value:
+//    The regNumber allocated to the RefPositon.  Returns REG_NA if no free register is found.
+//
+// Note:  Currently this routine uses weight and farthest distance of next reference
+// to select a ref position for spilling.
+// a) if allocateIfProfitable = false
+//        The ref position chosen for spilling will be the lowest weight
+//        of all and if there is is more than one ref position with the
+//        same lowest weight, among them choses the one with farthest
+//        distance to its next reference.
+//
+// b) if allocateIfProfitable = true
+//        The ref position chosen for spilling will not only be lowest weight
+//        of all but also has a weight lower than 'refPosition'.  If there is
+//        no such ref position, reg will not be allocated.
+regNumber LinearScan::allocateBusyReg(Interval* current, RefPosition* refPosition, bool allocateIfProfitable)
+{
+    regNumber foundReg = REG_NA;
+
+    RegisterType regType     = getRegisterType(current, refPosition);
+    regMaskTP    candidates  = refPosition->registerAssignment;
+    regMaskTP    preferences = (current->registerPreferences & candidates);
+    if (preferences == RBM_NONE)
+    {
+        preferences = candidates;
+    }
+    if (candidates == RBM_NONE)
+    {
+        // This assumes only integer and floating point register types
+        // if we target a processor with additional register types,
+        // this would have to change
+        candidates = allRegs(regType);
+    }
+
+#ifdef DEBUG
+    candidates = stressLimitRegs(refPosition, candidates);
+#endif // DEBUG
+
+    // TODO-CQ: Determine whether/how to take preferences into account in addition to
+    // prefering the one with the furthest ref position when considering
+    // a candidate to spill
+    RegRecord*   farthestRefPhysRegRecord = nullptr;
+    LsraLocation farthestLocation         = MinLocation;
+    LsraLocation refLocation              = refPosition->nodeLocation;
+    unsigned     farthestRefPosWeight;
+    if (allocateIfProfitable)
+    {
+        // If allocating a reg is optional, we will consider those ref positions
+        // whose weight is less than 'refPosition' for spilling.
+        farthestRefPosWeight = getWeight(refPosition);
+    }
+    else
+    {
+        // If allocating a reg is a must, we start off with max weight so
+        // that the first spill candidate will be selected based on
+        // farthest distance alone.  Since we start off with farthestLocation
+        // initialized to MinLocation, the first available ref position
+        // will be selected as spill candidate and its weight as the
+        // fathestRefPosWeight.
+        farthestRefPosWeight = BB_MAX_WEIGHT;
+    }
+
+    for (regNumber regNum : Registers(regType))
+    {
+        regMaskTP candidateBit = genRegMask(regNum);
+        if (!(candidates & candidateBit))
+        {
+            continue;
+        }
+        RegRecord* physRegRecord = getRegisterRecord(regNum);
+
+        if (physRegRecord->isBusyUntilNextKill)
+        {
+            continue;
+        }
+        Interval* assignedInterval = physRegRecord->assignedInterval;
+
+        // If there is a fixed reference at the same location (and it's not due to this reference),
+        // don't use it.
+
+        if (physRegRecord->conflictingFixedRegReference(refPosition))
+        {
+            assert(candidates != candidateBit);
+            continue;
+        }
+
+        LsraLocation physRegNextLocation = MaxLocation;
+        if (refPosition->isFixedRefOfRegMask(candidateBit))
+        {
+            // Either there is a fixed reference due to this node, or one associated with a
+            // fixed use fed by a def at this node.
+            // In either case, we must use this register as it's the only candidate
+            // TODO-CQ: At the time we allocate a register to a fixed-reg def, if it's not going
+            // to remain live until the use, we should set the candidates to allRegs(regType)
+            // to avoid a spill - codegen can then insert the copy.
+            assert(candidates == candidateBit);
+            physRegNextLocation  = MaxLocation;
+            farthestRefPosWeight = BB_MAX_WEIGHT;
+        }
+        else
+        {
+            physRegNextLocation = physRegRecord->getNextRefLocation();
+
+            // If refPosition requires a fixed register, we should reject all others.
+            // Otherwise, we will still evaluate all phyRegs though their next location is
+            // not better than farthestLocation found so far.
+            //
+            // TODO: this method should be using an approach similar to tryAllocateFreeReg()
+            // where it uses a regOrder array to avoid iterating over any but the single
+            // fixed candidate.
+            if (refPosition->isFixedRegRef && physRegNextLocation < farthestLocation)
+            {
+                continue;
+            }
+        }
+
+        // If this register is not assigned to an interval, either
+        // - it has a FixedReg reference at the current location that is not this reference, OR
+        // - this is the special case of a fixed loReg, where this interval has a use at the same location
+        // In either case, we cannot use it
+
+        if (assignedInterval == nullptr)
+        {
+            RefPosition* nextPhysRegPosition = physRegRecord->getNextRefPosition();
+
+#ifndef _TARGET_ARM64_
+            // TODO-Cleanup: Revisit this after Issue #3524 is complete
+            // On ARM64 the nodeLocation is not always == refLocation, Disabling this assert for now.
+            assert(nextPhysRegPosition->nodeLocation == refLocation && candidateBit != candidates);
+#endif
+            continue;
+        }
+
+        RefPosition* recentAssignedRef = assignedInterval->recentRefPosition;
+
+        if (!assignedInterval->isActive)
+        {
+            // The assigned interval has a reference at this location - otherwise, we would have found
+            // this in tryAllocateFreeReg().
+            // Note that we may or may not have actually handled the reference yet, so it could either
+            // be recentAssigedRef, or the next reference.
+            assert(recentAssignedRef != nullptr);
+            if (recentAssignedRef->nodeLocation != refLocation)
+            {
+                if (recentAssignedRef->nodeLocation + 1 == refLocation)
+                {
+                    assert(recentAssignedRef->delayRegFree);
+                }
+                else
+                {
+                    RefPosition* nextAssignedRef = recentAssignedRef->nextRefPosition;
+                    assert(nextAssignedRef != nullptr);
+                    assert(nextAssignedRef->nodeLocation == refLocation ||
+                           (nextAssignedRef->nodeLocation + 1 == refLocation && nextAssignedRef->delayRegFree));
+                }
+            }
+            continue;
+        }
+
+        // If we have a recentAssignedRef, check that it is going to be OK to spill it
+        //
+        // TODO-Review: Under what conditions recentAssginedRef would be null?
+        unsigned recentAssignedRefWeight = BB_ZERO_WEIGHT;
+        if (recentAssignedRef != nullptr)
+        {
+            if (recentAssignedRef->nodeLocation == refLocation)
+            {
+                // We can't spill a register that's being used at the current location
+                RefPosition* physRegRef = physRegRecord->recentRefPosition;
+                continue;
+            }
+
+            // If the current position has the candidate register marked to be delayed,
+            // check if the previous location is using this register, if that's the case we have to skip
+            // since we can't spill this register.
+            if (recentAssignedRef->delayRegFree && (refLocation == recentAssignedRef->nodeLocation + 1))
+            {
+                continue;
+            }
+
+            // We don't prefer to spill a register if the weight of recentAssignedRef > weight
+            // of the spill candidate found so far.  We would consider spilling a greater weight
+            // ref position only if the refPosition being allocated must need a reg.
+            recentAssignedRefWeight = getWeight(recentAssignedRef);
+            if (recentAssignedRefWeight > farthestRefPosWeight)
+            {
+                continue;
+            }
+        }
+
+        LsraLocation nextLocation = assignedInterval->getNextRefLocation();
+
+        // We should never spill a register that's occupied by an Interval with its next use at the current location.
+        // Normally this won't occur (unless we actually had more uses in a single node than there are registers),
+        // because we'll always find something with a later nextLocation, but it can happen in stress when
+        // we have LSRA_SELECT_NEAREST.
+        if ((nextLocation == refLocation) && !refPosition->isFixedRegRef)
+        {
+            continue;
+        }
+
+        if (nextLocation > physRegNextLocation)
+        {
+            nextLocation = physRegNextLocation;
+        }
+
+        bool isBetterLocation;
+
+#ifdef DEBUG
+        if (doSelectNearest() && farthestRefPhysRegRecord != nullptr)
+        {
+            isBetterLocation = (nextLocation <= farthestLocation);
+        }
+        else
+#endif
+            // This if-stmt is associated with the above else
+            if (recentAssignedRefWeight < farthestRefPosWeight)
+        {
+            isBetterLocation = true;
+        }
+        else
+        {
+            // This would mean the weight of spill ref position we found so far is equal
+            // to the weight of the ref position that is being evaluated.  In this case
+            // we prefer to spill ref position whose distance to its next reference is
+            // the farthest.
+            assert(recentAssignedRefWeight == farthestRefPosWeight);
+
+            // If allocateIfProfitable=true, the first spill candidate selected
+            // will be based on weight alone. After we have found a spill
+            // candidate whose weight is less than the 'refPosition', we will
+            // consider farthest distance when there is a tie in weights.
+            // This is to ensure that we don't spill a ref position whose
+            // weight is equal to weight of 'refPosition'.
+            if (allocateIfProfitable && farthestRefPhysRegRecord == nullptr)
+            {
+                isBetterLocation = false;
+            }
+            else
+            {
+                isBetterLocation = (nextLocation > farthestLocation);
+
+                if (nextLocation > farthestLocation)
+                {
+                    isBetterLocation = true;
+                }
+                else if (nextLocation == farthestLocation)
+                {
+                    // Both weight and distance are equal.
+                    // Prefer that ref position which is marked both reload and
+                    // allocate if profitable.  These ref positions don't need
+                    // need to be spilled as they are already in memory and
+                    // codegen considers them as contained memory operands.
+                    isBetterLocation = (recentAssignedRef != nullptr) && recentAssignedRef->reload &&
+                                       recentAssignedRef->AllocateIfProfitable();
+                }
+                else
+                {
+                    isBetterLocation = false;
+                }
+            }
+        }
+
+        if (isBetterLocation)
+        {
+            farthestLocation         = nextLocation;
+            farthestRefPhysRegRecord = physRegRecord;
+            farthestRefPosWeight     = recentAssignedRefWeight;
+        }
+    }
+
+#if DEBUG
+    if (allocateIfProfitable)
+    {
+        // There may not be a spill candidate or if one is found
+        // its weight must be less than the weight of 'refPosition'
+        assert((farthestRefPhysRegRecord == nullptr) || (farthestRefPosWeight < getWeight(refPosition)));
+    }
+    else
+    {
+        // Must have found a spill candidate.
+        assert((farthestRefPhysRegRecord != nullptr) && (farthestLocation > refLocation || refPosition->isFixedRegRef));
+    }
+#endif
+
+    if (farthestRefPhysRegRecord != nullptr)
+    {
+        foundReg = farthestRefPhysRegRecord->regNum;
+        unassignPhysReg(farthestRefPhysRegRecord, farthestRefPhysRegRecord->assignedInterval->recentRefPosition);
+        assignPhysReg(farthestRefPhysRegRecord, current);
+        refPosition->registerAssignment = genRegMask(foundReg);
+    }
+    else
+    {
+        foundReg                        = REG_NA;
+        refPosition->registerAssignment = RBM_NONE;
+    }
+
+    return foundReg;
+}
+
+// Grab a register to use to copy and then immediately use.
+// This is called only for localVar intervals that already have a register
+// assignment that is not compatible with the current RefPosition.
+// This is not like regular assignment, because we don't want to change
+// any preferences or existing register assignments.
+// Prefer a free register that's got the earliest next use.
+// Otherwise, spill something with the farthest next use
+//
+regNumber LinearScan::assignCopyReg(RefPosition* refPosition)
+{
+    Interval* currentInterval = refPosition->getInterval();
+    assert(currentInterval != nullptr);
+    assert(currentInterval->isActive);
+
+    bool         foundFreeReg = false;
+    RegRecord*   bestPhysReg  = nullptr;
+    LsraLocation bestLocation = MinLocation;
+    regMaskTP    candidates   = refPosition->registerAssignment;
+
+    // Save the relatedInterval, if any, so that it doesn't get modified during allocation.
+    Interval* savedRelatedInterval   = currentInterval->relatedInterval;
+    currentInterval->relatedInterval = nullptr;
+
+    // We don't want really want to change the default assignment,
+    // so 1) pretend this isn't active, and 2) remember the old reg
+    regNumber  oldPhysReg   = currentInterval->physReg;
+    RegRecord* oldRegRecord = currentInterval->assignedReg;
+    assert(oldRegRecord->regNum == oldPhysReg);
+    currentInterval->isActive = false;
+
+    regNumber allocatedReg = tryAllocateFreeReg(currentInterval, refPosition);
+    if (allocatedReg == REG_NA)
+    {
+        allocatedReg = allocateBusyReg(currentInterval, refPosition, false);
+    }
+
+    // Now restore the old info
+    currentInterval->relatedInterval = savedRelatedInterval;
+    currentInterval->physReg         = oldPhysReg;
+    currentInterval->assignedReg     = oldRegRecord;
+    currentInterval->isActive        = true;
+
+    refPosition->copyReg = true;
+    return allocatedReg;
+}
+
+// Check if the interval is already assigned and if it is then unassign the physical record
+// then set the assignedInterval to 'interval'
+//
+void LinearScan::checkAndAssignInterval(RegRecord* regRec, Interval* interval)
+{
+    if (regRec->assignedInterval != nullptr && regRec->assignedInterval != interval)
+    {
+        // This is allocated to another interval.  Either it is inactive, or it was allocated as a
+        // copyReg and is therefore not the "assignedReg" of the other interval.  In the latter case,
+        // we simply unassign it - in the former case we need to set the physReg on the interval to
+        // REG_NA to indicate that it is no longer in that register.
+        // The lack of checking for this case resulted in an assert in the retail version of System.dll,
+        // in method SerialStream.GetDcbFlag.
+        // Note that we can't check for the copyReg case, because we may have seen a more recent
+        // RefPosition for the Interval that was NOT a copyReg.
+        if (regRec->assignedInterval->assignedReg == regRec)
+        {
+            assert(regRec->assignedInterval->isActive == false);
+            regRec->assignedInterval->physReg = REG_NA;
+        }
+        unassignPhysReg(regRec->regNum);
+    }
+
+    regRec->assignedInterval = interval;
+}
+
+// Assign the given physical register interval to the given interval
+void LinearScan::assignPhysReg(RegRecord* regRec, Interval* interval)
+{
+    regMaskTP assignedRegMask = genRegMask(regRec->regNum);
+    compiler->codeGen->regSet.rsSetRegsModified(assignedRegMask DEBUGARG(dumpTerse));
+
+    checkAndAssignInterval(regRec, interval);
+    interval->assignedReg = regRec;
+
+#ifdef _TARGET_ARM_
+    if ((interval->registerType == TYP_DOUBLE) && isFloatRegType(regRec->registerType))
+    {
+        regNumber  nextRegNum = REG_NEXT(regRec->regNum);
+        RegRecord* nextRegRec = getRegisterRecord(nextRegNum);
+
+        checkAndAssignInterval(nextRegRec, interval);
+    }
+#endif // _TARGET_ARM_
+
+    interval->physReg  = regRec->regNum;
+    interval->isActive = true;
+    if (interval->isLocalVar)
+    {
+        // Prefer this register for future references
+        interval->updateRegisterPreferences(assignedRegMask);
+    }
+}
+
+//------------------------------------------------------------------------
+// spill: Spill this Interval between "fromRefPosition" and "toRefPosition"
+//
+// Arguments:
+//    fromRefPosition - The RefPosition at which the Interval is to be spilled
+//    toRefPosition   - The RefPosition at which it must be reloaded
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    fromRefPosition and toRefPosition must not be null
+//
+void LinearScan::spillInterval(Interval* interval, RefPosition* fromRefPosition, RefPosition* toRefPosition)
+{
+    assert(fromRefPosition != nullptr && toRefPosition != nullptr);
+    assert(fromRefPosition->getInterval() == interval && toRefPosition->getInterval() == interval);
+    assert(fromRefPosition->nextRefPosition == toRefPosition);
+
+    if (!fromRefPosition->lastUse)
+    {
+        // If not allocated a register, Lcl var def/use ref positions even if reg optional
+        // should be marked as spillAfter.
+        if (!fromRefPosition->RequiresRegister() && !(interval->isLocalVar && fromRefPosition->IsActualRef()))
+        {
+            fromRefPosition->registerAssignment = RBM_NONE;
+        }
+        else
+        {
+            fromRefPosition->spillAfter = true;
+        }
+    }
+    assert(toRefPosition != nullptr);
+
+#ifdef DEBUG
+    if (VERBOSE)
+    {
+        dumpLsraAllocationEvent(LSRA_EVENT_SPILL, interval);
+    }
+#endif // DEBUG
+
+    interval->isActive  = false;
+    interval->isSpilled = true;
+
+    // If fromRefPosition occurs before the beginning of this block, mark this as living in the stack
+    // on entry to this block.
+    if (fromRefPosition->nodeLocation <= curBBStartLocation)
+    {
+        // This must be a lclVar interval
+        assert(interval->isLocalVar);
+        setInVarRegForBB(curBBNum, interval->varNum, REG_STK);
+    }
+}
+
+//------------------------------------------------------------------------
+// unassignPhysRegNoSpill: Unassign the given physical register record from
+//                         an active interval, without spilling.
+//
+// Arguments:
+//    regRec           - the RegRecord to be unasssigned
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    The assignedInterval must not be null, and must be active.
+//
+// Notes:
+//    This method is used to unassign a register when an interval needs to be moved to a
+//    different register, but not (yet) spilled.
+
+void LinearScan::unassignPhysRegNoSpill(RegRecord* regRec)
+{
+    Interval* assignedInterval = regRec->assignedInterval;
+    assert(assignedInterval != nullptr && assignedInterval->isActive);
+    assignedInterval->isActive = false;
+    unassignPhysReg(regRec, nullptr);
+    assignedInterval->isActive = true;
+}
+
+//------------------------------------------------------------------------
+// checkAndClearInterval: Clear the assignedInterval for the given
+//                        physical register record
+//
+// Arguments:
+//    regRec           - the physical RegRecord to be unasssigned
+//    spillRefPosition - The RefPosition at which the assignedInterval is to be spilled
+//                       or nullptr if we aren't spilling
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    see unassignPhysReg
+//
+void LinearScan::checkAndClearInterval(RegRecord* regRec, RefPosition* spillRefPosition)
+{
+    Interval* assignedInterval = regRec->assignedInterval;
+    assert(assignedInterval != nullptr);
+    regNumber thisRegNum = regRec->regNum;
+
+    if (spillRefPosition == nullptr)
+    {
+        // Note that we can't assert  for the copyReg case
+        //
+        if (assignedInterval->physReg == thisRegNum)
+        {
+            assert(assignedInterval->isActive == false);
+        }
+    }
+    else
+    {
+        assert(spillRefPosition->getInterval() == assignedInterval);
+    }
+
+    regRec->assignedInterval = nullptr;
+}
+
+//------------------------------------------------------------------------
+// unassignPhysReg: Unassign the given physical register record, and spill the
+//                  assignedInterval at the given spillRefPosition, if any.
+//
+// Arguments:
+//    regRec           - the RegRecord to be unasssigned
+//    spillRefPosition - The RefPosition at which the assignedInterval is to be spilled
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    The assignedInterval must not be null.
+//    If spillRefPosition is null, the assignedInterval must be inactive, or not currently
+//    assigned to this register (e.g. this is a copyReg for that Interval).
+//    Otherwise, spillRefPosition must be associated with the assignedInterval.
+//
+void LinearScan::unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPosition)
+{
+    Interval* assignedInterval = regRec->assignedInterval;
+    assert(assignedInterval != nullptr);
+    checkAndClearInterval(regRec, spillRefPosition);
+    regNumber thisRegNum = regRec->regNum;
+
+#ifdef _TARGET_ARM_
+    if ((assignedInterval->registerType == TYP_DOUBLE) && isFloatRegType(regRec->registerType))
+    {
+        regNumber  nextRegNum = REG_NEXT(regRec->regNum);
+        RegRecord* nextRegRec = getRegisterRecord(nextRegNum);
+        checkAndClearInterval(nextRegRec, spillRefPosition);
+    }
+#endif // _TARGET_ARM_
+
+#ifdef DEBUG
+    if (VERBOSE && !dumpTerse)
+    {
+        printf("unassigning %s: ", getRegName(regRec->regNum));
+        assignedInterval->dump();
+        printf("\n");
+    }
+#endif // DEBUG
+
+    RefPosition* nextRefPosition = nullptr;
+    if (spillRefPosition != nullptr)
+    {
+        nextRefPosition = spillRefPosition->nextRefPosition;
+    }
+
+    if (assignedInterval->physReg != REG_NA && assignedInterval->physReg != thisRegNum)
+    {
+        // This must have been a temporary copy reg, but we can't assert that because there
+        // may have been intervening RefPositions that were not copyRegs.
+        regRec->assignedInterval = nullptr;
+        return;
+    }
+
+    regNumber victimAssignedReg = assignedInterval->physReg;
+    assignedInterval->physReg   = REG_NA;
+
+    bool spill = assignedInterval->isActive && nextRefPosition != nullptr;
+    if (spill)
+    {
+        // If this is an active interval, it must have a recentRefPosition,
+        // otherwise it would not be active
+        assert(spillRefPosition != nullptr);
+
+#if 0
+        // TODO-CQ: Enable this and insert an explicit GT_COPY (otherwise there's no way to communicate
+        // to codegen that we want the copyReg to be the new home location).
+        // If the last reference was a copyReg, and we're spilling the register
+        // it was copied from, then make the copyReg the new primary location
+        // if possible
+        if (spillRefPosition->copyReg)
+        {
+            regNumber copyFromRegNum = victimAssignedReg;
+            regNumber copyRegNum = genRegNumFromMask(spillRefPosition->registerAssignment);
+            if (copyFromRegNum == thisRegNum &&
+                getRegisterRecord(copyRegNum)->assignedInterval == assignedInterval)
+            {
+                assert(copyRegNum != thisRegNum);
+                assignedInterval->physReg = copyRegNum;
+                assignedInterval->assignedReg = this->getRegisterRecord(copyRegNum);
+                return;
+            }
+        }
+#endif // 0
+#ifdef DEBUG
+        // With JitStressRegs == 0x80 (LSRA_EXTEND_LIFETIMES), we may have a RefPosition
+        // that is not marked lastUse even though the treeNode is a lastUse.  In that case
+        // we must not mark it for spill because the register will have been immediately freed
+        // after use.  While we could conceivably add special handling for this case in codegen,
+        // it would be messy and undesirably cause the "bleeding" of LSRA stress modes outside
+        // of LSRA.
+        if (extendLifetimes() && assignedInterval->isLocalVar && RefTypeIsUse(spillRefPosition->refType) &&
+            spillRefPosition->treeNode != nullptr && (spillRefPosition->treeNode->gtFlags & GTF_VAR_DEATH) != 0)
+        {
+            dumpLsraAllocationEvent(LSRA_EVENT_SPILL_EXTENDED_LIFETIME, assignedInterval);
+            assignedInterval->isActive = false;
+            spill                      = false;
+            // If the spillRefPosition occurs before the beginning of this block, it will have
+            // been marked as living in this register on entry to this block, but we now need
+            // to mark this as living on the stack.
+            if (spillRefPosition->nodeLocation <= curBBStartLocation)
+            {
+                setInVarRegForBB(curBBNum, assignedInterval->varNum, REG_STK);
+                if (spillRefPosition->nextRefPosition != nullptr)
+                {
+                    assignedInterval->isSpilled = true;
+                }
+            }
+            else
+            {
+                // Otherwise, we need to mark spillRefPosition as lastUse, or the interval
+                // will remain active beyond its allocated range during the resolution phase.
+                spillRefPosition->lastUse = true;
+            }
+        }
+        else
+#endif // DEBUG
+        {
+            spillInterval(assignedInterval, spillRefPosition, nextRefPosition);
+        }
+    }
+    // Maintain the association with the interval, if it has more references.
+    // Or, if we "remembered" an interval assigned to this register, restore it.
+    if (nextRefPosition != nullptr)
+    {
+        assignedInterval->assignedReg = regRec;
+    }
+    else if (regRec->previousInterval != nullptr && regRec->previousInterval->assignedReg == regRec &&
+             regRec->previousInterval->getNextRefPosition() != nullptr)
+    {
+        regRec->assignedInterval = regRec->previousInterval;
+        regRec->previousInterval = nullptr;
+#ifdef DEBUG
+        if (spill)
+        {
+            dumpLsraAllocationEvent(LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL_AFTER_SPILL, regRec->assignedInterval,
+                                    thisRegNum);
+        }
+        else
+        {
+            dumpLsraAllocationEvent(LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL, regRec->assignedInterval, thisRegNum);
+        }
+#endif // DEBUG
+    }
+    else
+    {
+        regRec->assignedInterval = nullptr;
+        regRec->previousInterval = nullptr;
+    }
+}
+
+//------------------------------------------------------------------------
+// spillGCRefs: Spill any GC-type intervals that are currently in registers.a
+//
+// Arguments:
+//    killRefPosition - The RefPosition for the kill
+//
+// Return Value:
+//    None.
+//
+void LinearScan::spillGCRefs(RefPosition* killRefPosition)
+{
+    // For each physical register that can hold a GC type,
+    // if it is occupied by an interval of a GC type, spill that interval.
+    regMaskTP candidateRegs = killRefPosition->registerAssignment;
+    while (candidateRegs != RBM_NONE)
+    {
+        regMaskTP nextRegBit = genFindLowestBit(candidateRegs);
+        candidateRegs &= ~nextRegBit;
+        regNumber  nextReg          = genRegNumFromMask(nextRegBit);
+        RegRecord* regRecord        = getRegisterRecord(nextReg);
+        Interval*  assignedInterval = regRecord->assignedInterval;
+        if (assignedInterval == nullptr || (assignedInterval->isActive == false) ||
+            !varTypeIsGC(assignedInterval->registerType))
+        {
+            continue;
+        }
+        unassignPhysReg(regRecord, assignedInterval->recentRefPosition);
+    }
+    INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DONE_KILL_GC_REFS, nullptr, REG_NA, nullptr));
+}
+
+//------------------------------------------------------------------------
+// processBlockEndAllocation: Update var locations after 'currentBlock' has been allocated
+//
+// Arguments:
+//    currentBlock - the BasicBlock we have just finished allocating registers for
+//
+// Return Value:
+//    None
+//
+// Notes:
+//    Calls processBlockEndLocation() to set the outVarToRegMap, then gets the next block,
+//    and sets the inVarToRegMap appropriately.
+
+void LinearScan::processBlockEndAllocation(BasicBlock* currentBlock)
+{
+    assert(currentBlock != nullptr);
+    processBlockEndLocations(currentBlock);
+    markBlockVisited(currentBlock);
+
+    // Get the next block to allocate.
+    // When the last block in the method has successors, there will be a final "RefTypeBB" to
+    // ensure that we get the varToRegMap set appropriately, but in that case we don't need
+    // to worry about "nextBlock".
+    BasicBlock* nextBlock = getNextBlock();
+    if (nextBlock != nullptr)
+    {
+        processBlockStartLocations(nextBlock, true);
+    }
+}
+
+//------------------------------------------------------------------------
+// rotateBlockStartLocation: When in the LSRA_BLOCK_BOUNDARY_ROTATE stress mode, attempt to
+//                           "rotate" the register assignment for a localVar to the next higher
+//                           register that is available.
+//
+// Arguments:
+//    interval      - the Interval for the variable whose register is getting rotated
+//    targetReg     - its register assignment from the predecessor block being used for live-in
+//    availableRegs - registers available for use
+//
+// Return Value:
+//    The new register to use.
+
+#ifdef DEBUG
+regNumber LinearScan::rotateBlockStartLocation(Interval* interval, regNumber targetReg, regMaskTP availableRegs)
+{
+    if (targetReg != REG_STK && getLsraBlockBoundaryLocations() == LSRA_BLOCK_BOUNDARY_ROTATE)
+    {
+        // If we're rotating the register locations at block boundaries, try to use
+        // the next higher register number of the appropriate register type.
+        regMaskTP candidateRegs = allRegs(interval->registerType) & availableRegs;
+        regNumber firstReg      = REG_NA;
+        regNumber newReg        = REG_NA;
+        while (candidateRegs != RBM_NONE)
+        {
+            regMaskTP nextRegBit = genFindLowestBit(candidateRegs);
+            candidateRegs &= ~nextRegBit;
+            regNumber nextReg = genRegNumFromMask(nextRegBit);
+            if (nextReg > targetReg)
+            {
+                newReg = nextReg;
+                break;
+            }
+            else if (firstReg == REG_NA)
+            {
+                firstReg = nextReg;
+            }
+        }
+        if (newReg == REG_NA)
+        {
+            assert(firstReg != REG_NA);
+            newReg = firstReg;
+        }
+        targetReg = newReg;
+    }
+    return targetReg;
+}
+#endif // DEBUG
+
+//------------------------------------------------------------------------
+// processBlockStartLocations: Update var locations on entry to 'currentBlock'
+//
+// Arguments:
+//    currentBlock   - the BasicBlock we have just finished allocating registers for
+//    allocationPass - true if we are currently allocating registers (versus writing them back)
+//
+// Return Value:
+//    None
+//
+// Notes:
+//    During the allocation pass, we use the outVarToRegMap of the selected predecessor to
+//    determine the lclVar locations for the inVarToRegMap.
+//    During the resolution (write-back) pass, we only modify the inVarToRegMap in cases where
+//    a lclVar was spilled after the block had been completed.
+void LinearScan::processBlockStartLocations(BasicBlock* currentBlock, bool allocationPass)
+{
+    unsigned    predBBNum         = blockInfo[currentBlock->bbNum].predBBNum;
+    VarToRegMap predVarToRegMap   = getOutVarToRegMap(predBBNum);
+    VarToRegMap inVarToRegMap     = getInVarToRegMap(currentBlock->bbNum);
+    bool        hasCriticalInEdge = blockInfo[currentBlock->bbNum].hasCriticalInEdge;
+
+    VARSET_TP VARSET_INIT_NOCOPY(liveIn, currentBlock->bbLiveIn);
+#ifdef DEBUG
+    if (getLsraExtendLifeTimes())
+    {
+        VarSetOps::AssignNoCopy(compiler, liveIn, compiler->lvaTrackedVars);
+    }
+    // If we are rotating register assignments at block boundaries, we want to make the
+    // inactive registers available for the rotation.
+    regMaskTP inactiveRegs = RBM_NONE;
+#endif // DEBUG
+    regMaskTP liveRegs = RBM_NONE;
+    VARSET_ITER_INIT(compiler, iter, liveIn, varIndex);
+    while (iter.NextElem(compiler, &varIndex))
+    {
+        unsigned varNum = compiler->lvaTrackedToVarNum[varIndex];
+        if (!compiler->lvaTable[varNum].lvLRACandidate)
+        {
+            continue;
+        }
+        regNumber    targetReg;
+        Interval*    interval        = getIntervalForLocalVar(varNum);
+        RefPosition* nextRefPosition = interval->getNextRefPosition();
+        assert(nextRefPosition != nullptr);
+
+        if (allocationPass)
+        {
+            targetReg = predVarToRegMap[varIndex];
+            INDEBUG(targetReg       = rotateBlockStartLocation(interval, targetReg, (~liveRegs | inactiveRegs)));
+            inVarToRegMap[varIndex] = targetReg;
+        }
+        else // !allocationPass (i.e. resolution/write-back pass)
+        {
+            targetReg = inVarToRegMap[varIndex];
+            // There are four cases that we need to consider during the resolution pass:
+            // 1. This variable had a register allocated initially, and it was not spilled in the RefPosition
+            //    that feeds this block.  In this case, both targetReg and predVarToRegMap[varIndex] will be targetReg.
+            // 2. This variable had not been spilled prior to the end of predBB, but was later spilled, so
+            //    predVarToRegMap[varIndex] will be REG_STK, but targetReg is its former allocated value.
+            //    In this case, we will normally change it to REG_STK.  We will update its "spilled" status when we
+            //    encounter it in resolveLocalRef().
+            // 2a. If the next RefPosition is marked as a copyReg, we need to retain the allocated register.  This is
+            //     because the copyReg RefPosition will not have recorded the "home" register, yet downstream
+            //     RefPositions rely on the correct "home" register.
+            // 3. This variable was spilled before we reached the end of predBB.  In this case, both targetReg and
+            //    predVarToRegMap[varIndex] will be REG_STK, and the next RefPosition will have been marked
+            //    as reload during allocation time if necessary (note that by the time we actually reach the next
+            //    RefPosition, we may be using a different predecessor, at which it is still in a register).
+            // 4. This variable was spilled during the allocation of this block, so targetReg is REG_STK
+            //    (because we set inVarToRegMap at the time we spilled it), but predVarToRegMap[varIndex]
+            //    is not REG_STK.  We retain the REG_STK value in the inVarToRegMap.
+            if (targetReg != REG_STK)
+            {
+                if (predVarToRegMap[varIndex] != REG_STK)
+                {
+                    // Case #1 above.
+                    assert(predVarToRegMap[varIndex] == targetReg ||
+                           getLsraBlockBoundaryLocations() == LSRA_BLOCK_BOUNDARY_ROTATE);
+                }
+                else if (!nextRefPosition->copyReg)
+                {
+                    // case #2 above.
+                    inVarToRegMap[varIndex] = REG_STK;
+                    targetReg               = REG_STK;
+                }
+                // Else case 2a. - retain targetReg.
+            }
+            // Else case #3 or #4, we retain targetReg and nothing further to do or assert.
+        }
+        if (interval->physReg == targetReg)
+        {
+            if (interval->isActive)
+            {
+                assert(targetReg != REG_STK);
+                assert(interval->assignedReg != nullptr && interval->assignedReg->regNum == targetReg &&
+                       interval->assignedReg->assignedInterval == interval);
+                liveRegs |= genRegMask(targetReg);
+                continue;
+            }
+        }
+        else if (interval->physReg != REG_NA)
+        {
+            // This can happen if we are using the locations from a basic block other than the
+            // immediately preceding one - where the variable was in a different location.
+            if (targetReg != REG_STK)
+            {
+                // Unassign it from the register (it will get a new register below).
+                if (interval->assignedReg != nullptr && interval->assignedReg->assignedInterval == interval)
+                {
+                    interval->isActive = false;
+                    unassignPhysReg(getRegisterRecord(interval->physReg), nullptr);
+                }
+                else
+                {
+                    // This interval was live in this register the last time we saw a reference to it,
+                    // but has since been displaced.
+                    interval->physReg = REG_NA;
+                }
+            }
+            else if (allocationPass)
+            {
+                // Keep the register assignment - if another var has it, it will get unassigned.
+                // Otherwise, resolution will fix it up later, and it will be more
+                // likely to match other assignments this way.
+                interval->isActive = true;
+                liveRegs |= genRegMask(interval->physReg);
+                INDEBUG(inactiveRegs |= genRegMask(interval->physReg));
+                inVarToRegMap[varIndex] = interval->physReg;
+            }
+            else
+            {
+                interval->physReg = REG_NA;
+            }
+        }
+        if (targetReg != REG_STK)
+        {
+            RegRecord* targetRegRecord = getRegisterRecord(targetReg);
+            liveRegs |= genRegMask(targetReg);
+            if (!interval->isActive)
+            {
+                interval->isActive    = true;
+                interval->physReg     = targetReg;
+                interval->assignedReg = targetRegRecord;
+            }
+            Interval* assignedInterval = targetRegRecord->assignedInterval;
+            if (assignedInterval != interval)
+            {
+                // Is there another interval currently assigned to this register?  If so unassign it.
+                if (assignedInterval != nullptr)
+                {
+                    if (assignedInterval->assignedReg == targetRegRecord)
+                    {
+                        // If the interval is active, it will be set to active when we reach its new
+                        // register assignment (which we must not yet have done, or it wouldn't still be
+                        // assigned to this register).
+                        assignedInterval->isActive = false;
+                        unassignPhysReg(targetRegRecord, nullptr);
+                        if (allocationPass && assignedInterval->isLocalVar &&
+                            inVarToRegMap[assignedInterval->getVarIndex(compiler)] == targetReg)
+                        {
+                            inVarToRegMap[assignedInterval->getVarIndex(compiler)] = REG_STK;
+                        }
+                    }
+                    else
+                    {
+                        // This interval is no longer assigned to this register.
+                        targetRegRecord->assignedInterval = nullptr;
+                    }
+                }
+                assignPhysReg(targetRegRecord, interval);
+            }
+            if (interval->recentRefPosition != nullptr && !interval->recentRefPosition->copyReg &&
+                interval->recentRefPosition->registerAssignment != genRegMask(targetReg))
+            {
+                interval->getNextRefPosition()->outOfOrder = true;
+            }
+        }
+    }
+
+    // Unassign any registers that are no longer live.
+    for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
+    {
+        if ((liveRegs & genRegMask(reg)) == 0)
+        {
+            RegRecord* physRegRecord    = getRegisterRecord(reg);
+            Interval*  assignedInterval = physRegRecord->assignedInterval;
+
+            if (assignedInterval != nullptr)
+            {
+                assert(assignedInterval->isLocalVar || assignedInterval->isConstant);
+                if (!assignedInterval->isConstant && assignedInterval->assignedReg == physRegRecord)
+                {
+                    assignedInterval->isActive = false;
+                    if (assignedInterval->getNextRefPosition() == nullptr)
+                    {
+                        unassignPhysReg(physRegRecord, nullptr);
+                    }
+                    inVarToRegMap[assignedInterval->getVarIndex(compiler)] = REG_STK;
+                }
+                else
+                {
+                    // This interval may still be active, but was in another register in an
+                    // intervening block.
+                    physRegRecord->assignedInterval = nullptr;
+                }
+            }
+        }
+    }
+    INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_START_BB, nullptr, REG_NA, currentBlock));
+}
+
+//------------------------------------------------------------------------
+// processBlockEndLocations: Record the variables occupying registers after completing the current block.
+//
+// Arguments:
+//    currentBlock - the block we have just completed.
+//
+// Return Value:
+//    None
+//
+// Notes:
+//    This must be called both during the allocation and resolution (write-back) phases.
+//    This is because we need to have the outVarToRegMap locations in order to set the locations
+//    at successor blocks during allocation time, but if lclVars are spilled after a block has been
+//    completed, we need to record the REG_STK location for those variables at resolution time.
+
+void LinearScan::processBlockEndLocations(BasicBlock* currentBlock)
+{
+    assert(currentBlock != nullptr && currentBlock->bbNum == curBBNum);
+    VarToRegMap outVarToRegMap = getOutVarToRegMap(curBBNum);
+
+    VARSET_TP VARSET_INIT_NOCOPY(liveOut, currentBlock->bbLiveOut);
+#ifdef DEBUG
+    if (getLsraExtendLifeTimes())
+    {
+        VarSetOps::AssignNoCopy(compiler, liveOut, compiler->lvaTrackedVars);
+    }
+#endif // DEBUG
+    regMaskTP liveRegs = RBM_NONE;
+    VARSET_ITER_INIT(compiler, iter, liveOut, varIndex);
+    while (iter.NextElem(compiler, &varIndex))
+    {
+        unsigned  varNum   = compiler->lvaTrackedToVarNum[varIndex];
+        Interval* interval = getIntervalForLocalVar(varNum);
+        if (interval->isActive)
+        {
+            assert(interval->physReg != REG_NA && interval->physReg != REG_STK);
+            outVarToRegMap[varIndex] = interval->physReg;
+        }
+        else
+        {
+            outVarToRegMap[varIndex] = REG_STK;
+        }
+    }
+    INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_END_BB));
+}
+
+#ifdef DEBUG
+void LinearScan::dumpRefPositions(const char* str)
+{
+    printf("------------\n");
+    printf("REFPOSITIONS %s: \n", str);
+    printf("------------\n");
+    for (auto& refPos : refPositions)
+    {
+        refPos.dump();
+    }
+}
+#endif // DEBUG
+
+bool LinearScan::registerIsFree(regNumber regNum, RegisterType regType)
+{
+    RegRecord* physRegRecord = getRegisterRecord(regNum);
+
+    bool isFree = physRegRecord->isFree();
+
+#ifdef _TARGET_ARM_
+    if (isFree && regType == TYP_DOUBLE)
+    {
+        isFree = getRegisterRecord(REG_NEXT(regNum))->isFree();
+    }
+#endif // _TARGET_ARM_
+
+    return isFree;
+}
+
+//------------------------------------------------------------------------
+// LinearScan::freeRegister: Make a register available for use
+//
+// Arguments:
+//    physRegRecord - the RegRecord for the register to be freed.
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    None.
+//    It may be that the RegRecord has already been freed, e.g. due to a kill,
+//    in which case this method has no effect.
+//
+// Notes:
+//    If there is currently an Interval assigned to this register, and it has
+//    more references (i.e. this is a local last-use, but more uses and/or
+//    defs remain), it will remain assigned to the physRegRecord.  However, since
+//    it is marked inactive, the register will be available, albeit less desirable
+//    to allocate.
+void LinearScan::freeRegister(RegRecord* physRegRecord)
+{
+    Interval* assignedInterval = physRegRecord->assignedInterval;
+    // It may have already been freed by a "Kill"
+    if (assignedInterval != nullptr)
+    {
+        assignedInterval->isActive = false;
+        // If this is a constant node, that we may encounter again (e.g. constant),
+        // don't unassign it until we need the register.
+        if (!assignedInterval->isConstant)
+        {
+            RefPosition* nextRefPosition = assignedInterval->getNextRefPosition();
+            // Unassign the register only if there are no more RefPositions, or the next
+            // one is a def.  Note that the latter condition doesn't actually ensure that
+            // there aren't subsequent uses that could be reached by a def in the assigned
+            // register, but is merely a heuristic to avoid tying up the register (or using
+            // it when it's non-optimal).  A better alternative would be to use SSA, so that
+            // we wouldn't unnecessarily link separate live ranges to the same register.
+            if (nextRefPosition == nullptr || RefTypeIsDef(nextRefPosition->refType))
+            {
+                unassignPhysReg(physRegRecord, nullptr);
+            }
+        }
+    }
+}
+
+void LinearScan::freeRegisters(regMaskTP regsToFree)
+{
+    if (regsToFree == RBM_NONE)
+    {
+        return;
+    }
+
+    INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FREE_REGS));
+    while (regsToFree != RBM_NONE)
+    {
+        regMaskTP nextRegBit = genFindLowestBit(regsToFree);
+        regsToFree &= ~nextRegBit;
+        regNumber nextReg = genRegNumFromMask(nextRegBit);
+        freeRegister(getRegisterRecord(nextReg));
+    }
+}
+
+// Actual register allocation, accomplished by iterating over all of the previously
+// constructed Intervals
+// Loosely based on raAssignVars()
+//
+void LinearScan::allocateRegisters()
+{
+    JITDUMP("*************** In LinearScan::allocateRegisters()\n");
+    DBEXEC(VERBOSE, lsraDumpIntervals("before allocateRegisters"));
+
+    // at start, nothing is active except for register args
+    for (auto& interval : intervals)
+    {
+        Interval* currentInterval          = &interval;
+        currentInterval->recentRefPosition = nullptr;
+        currentInterval->isActive          = false;
+        if (currentInterval->isLocalVar)
+        {
+            LclVarDsc* varDsc = currentInterval->getLocalVar(compiler);
+            if (varDsc->lvIsRegArg && currentInterval->firstRefPosition != nullptr)
+            {
+                currentInterval->isActive = true;
+            }
+        }
+    }
+
+    for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
+    {
+        getRegisterRecord(reg)->recentRefPosition = nullptr;
+        getRegisterRecord(reg)->isActive          = false;
+    }
+
+#ifdef DEBUG
+    regNumber lastAllocatedReg = REG_NA;
+    if (VERBOSE)
+    {
+        dumpRefPositions("BEFORE ALLOCATION");
+        dumpVarRefPositions("BEFORE ALLOCATION");
+
+        printf("\n\nAllocating Registers\n"
+               "--------------------\n");
+        if (dumpTerse)
+        {
+            dumpRegRecordHeader();
+            // Now print an empty indent
+            printf(indentFormat, "");
+        }
+    }
+#endif // DEBUG
+
+    BasicBlock* currentBlock = nullptr;
+
+    LsraLocation prevLocation    = MinLocation;
+    regMaskTP    regsToFree      = RBM_NONE;
+    regMaskTP    delayRegsToFree = RBM_NONE;
+
+    // This is the most recent RefPosition for which a register was allocated
+    // - currently only used for DEBUG but maintained in non-debug, for clarity of code
+    //   (and will be optimized away because in non-debug spillAlways() unconditionally returns false)
+    RefPosition* lastAllocatedRefPosition = nullptr;
+
+    bool handledBlockEnd = false;
+
+    for (auto& refPosition : refPositions)
+    {
+        RefPosition* currentRefPosition = &refPosition;
+
+#ifdef DEBUG
+        // Set the activeRefPosition to null until we're done with any boundary handling.
+        activeRefPosition = nullptr;
+        if (VERBOSE)
+        {
+            if (dumpTerse)
+            {
+                // We're really dumping the RegRecords "after" the previous RefPosition, but it's more convenient
+                // to do this here, since there are a number of "continue"s in this loop.
+                dumpRegRecords();
+            }
+            else
+            {
+                printf("\n");
+            }
+        }
+#endif // DEBUG
+
+        // This is the previousRefPosition of the current Referent, if any
+        RefPosition* previousRefPosition = nullptr;
+
+        Interval*      currentInterval = nullptr;
+        Referenceable* currentReferent = nullptr;
+        bool           isInternalRef   = false;
+        RefType        refType         = currentRefPosition->refType;
+
+        currentReferent = currentRefPosition->referent;
+
+        if (spillAlways() && lastAllocatedRefPosition != nullptr && !lastAllocatedRefPosition->isPhysRegRef &&
+            !lastAllocatedRefPosition->getInterval()->isInternal &&
+            (RefTypeIsDef(lastAllocatedRefPosition->refType) || lastAllocatedRefPosition->getInterval()->isLocalVar))
+        {
+            assert(lastAllocatedRefPosition->registerAssignment != RBM_NONE);
+            RegRecord* regRecord = lastAllocatedRefPosition->getInterval()->assignedReg;
+            unassignPhysReg(regRecord, lastAllocatedRefPosition);
+            // Now set lastAllocatedRefPosition to null, so that we don't try to spill it again
+            lastAllocatedRefPosition = nullptr;
+        }
+
+        // We wait to free any registers until we've completed all the
+        // uses for the current node.
+        // This avoids reusing registers too soon.
+        // We free before the last true def (after all the uses & internal
+        // registers), and then again at the beginning of the next node.
+        // This is made easier by assigning two LsraLocations per node - one
+        // for all the uses, internal registers & all but the last def, and
+        // another for the final def (if any).
+
+        LsraLocation currentLocation = currentRefPosition->nodeLocation;
+
+        if ((regsToFree | delayRegsToFree) != RBM_NONE)
+        {
+            bool doFreeRegs = false;
+            // Free at a new location, or at a basic block boundary
+            if (currentLocation > prevLocation || refType == RefTypeBB)
+            {
+                doFreeRegs = true;
+            }
+
+            if (doFreeRegs)
+            {
+                freeRegisters(regsToFree);
+                regsToFree      = delayRegsToFree;
+                delayRegsToFree = RBM_NONE;
+            }
+        }
+        prevLocation = currentLocation;
+
+        // get previous refposition, then current refpos is the new previous
+        if (currentReferent != nullptr)
+        {
+            previousRefPosition                = currentReferent->recentRefPosition;
+            currentReferent->recentRefPosition = currentRefPosition;
+        }
+        else
+        {
+            assert((refType == RefTypeBB) || (refType == RefTypeKillGCRefs));
+        }
+
+        // For the purposes of register resolution, we handle the DummyDefs before
+        // the block boundary - so the RefTypeBB is after all the DummyDefs.
+        // However, for the purposes of allocation, we want to handle the block
+        // boundary first, so that we can free any registers occupied by lclVars
+        // that aren't live in the next block and make them available for the
+        // DummyDefs.
+
+        if (!handledBlockEnd && (refType == RefTypeBB || refType == RefTypeDummyDef))
+        {
+            // Free any delayed regs (now in regsToFree) before processing the block boundary
+            freeRegisters(regsToFree);
+            regsToFree         = RBM_NONE;
+            handledBlockEnd    = true;
+            curBBStartLocation = currentRefPosition->nodeLocation;
+            if (currentBlock == nullptr)
+            {
+                currentBlock = startBlockSequence();
+            }
+            else
+            {
+                processBlockEndAllocation(currentBlock);
+                currentBlock = moveToNextBlock();
+            }
+#ifdef DEBUG
+            if (VERBOSE && currentBlock != nullptr && !dumpTerse)
+            {
+                currentBlock->dspBlockHeader(compiler);
+                printf("\n");
+            }
+#endif // DEBUG
+        }
+
+#ifdef DEBUG
+        activeRefPosition = currentRefPosition;
+        if (VERBOSE)
+        {
+            if (dumpTerse)
+            {
+                dumpRefPositionShort(currentRefPosition, currentBlock);
+            }
+            else
+            {
+                currentRefPosition->dump();
+            }
+        }
+#endif // DEBUG
+
+        if (refType == RefTypeBB)
+        {
+            handledBlockEnd = false;
+            continue;
+        }
+
+        if (refType == RefTypeKillGCRefs)
+        {
+            spillGCRefs(currentRefPosition);
+            continue;
+        }
+
+        // If this is a FixedReg, disassociate any inactive constant interval from this register.
+        // Otherwise, do nothing.
+        if (refType == RefTypeFixedReg)
+        {
+            RegRecord* regRecord = currentRefPosition->getReg();
+            if (regRecord->assignedInterval != nullptr && !regRecord->assignedInterval->isActive &&
+                regRecord->assignedInterval->isConstant)
+            {
+                regRecord->assignedInterval = nullptr;
+            }
+            INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FIXED_REG, nullptr, currentRefPosition->assignedReg()));
+            continue;
+        }
+
+        // If this is an exposed use, do nothing - this is merely a placeholder to attempt to
+        // ensure that a register is allocated for the full lifetime.  The resolution logic
+        // will take care of moving to the appropriate register if needed.
+
+        if (refType == RefTypeExpUse)
+        {
+            INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_EXP_USE));
+            continue;
+        }
+
+        regNumber assignedRegister = REG_NA;
+
+        if (currentRefPosition->isIntervalRef())
+        {
+            currentInterval  = currentRefPosition->getInterval();
+            assignedRegister = currentInterval->physReg;
+#if DEBUG
+            if (VERBOSE && !dumpTerse)
+            {
+                currentInterval->dump();
+            }
+#endif // DEBUG
+
+            // Identify the special cases where we decide up-front not to allocate
+            bool allocate = true;
+            bool didDump  = false;
+
+            if (refType == RefTypeParamDef || refType == RefTypeZeroInit)
+            {
+                // For a ParamDef with a weighted refCount less than unity, don't enregister it at entry.
+                // TODO-CQ: Consider doing this only for stack parameters, since otherwise we may be needlessly
+                // inserting a store.
+                LclVarDsc* varDsc = currentInterval->getLocalVar(compiler);
+                assert(varDsc != nullptr);
+                if (refType == RefTypeParamDef && varDsc->lvRefCntWtd <= BB_UNITY_WEIGHT)
+                {
+                    INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_ENTRY_REG_ALLOCATED, currentInterval));
+                    didDump  = true;
+                    allocate = false;
+                }
+                // If it has no actual references, mark it as "lastUse"; since they're not actually part
+                // of any flow they won't have been marked during dataflow.  Otherwise, if we allocate a
+                // register we won't unassign it.
+                else if (currentRefPosition->nextRefPosition == nullptr)
+                {
+                    INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_ZERO_REF, currentInterval));
+                    currentRefPosition->lastUse = true;
+                }
+            }
+#ifdef FEATURE_SIMD
+            else if (refType == RefTypeUpperVectorSaveDef || refType == RefTypeUpperVectorSaveUse)
+            {
+                Interval* lclVarInterval = currentInterval->relatedInterval;
+                if (lclVarInterval->physReg == REG_NA)
+                {
+                    allocate = false;
+                }
+            }
+#endif // FEATURE_SIMD
+
+            if (allocate == false)
+            {
+                if (assignedRegister != REG_NA)
+                {
+                    unassignPhysReg(getRegisterRecord(assignedRegister), currentRefPosition);
+                }
+                else if (!didDump)
+                {
+                    INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval));
+                    didDump = true;
+                }
+                currentRefPosition->registerAssignment = RBM_NONE;
+                continue;
+            }
+
+            if (currentInterval->isSpecialPutArg)
+            {
+                assert(!currentInterval->isLocalVar);
+                Interval* srcInterval = currentInterval->relatedInterval;
+                assert(srcInterval->isLocalVar);
+                if (refType == RefTypeDef)
+                {
+                    assert(srcInterval->recentRefPosition->nodeLocation == currentLocation - 1);
+                    RegRecord* physRegRecord = srcInterval->assignedReg;
+
+                    // For a putarg_reg to be special, its next use location has to be the same
+                    // as fixed reg's next kill location. Otherwise, if source lcl var's next use
+                    // is after the kill of fixed reg but before putarg_reg's next use, fixed reg's
+                    // kill would lead to spill of source but not the putarg_reg if it were treated
+                    // as special.
+                    if (srcInterval->isActive &&
+                        genRegMask(srcInterval->physReg) == currentRefPosition->registerAssignment &&
+                        currentInterval->getNextRefLocation() == physRegRecord->getNextRefLocation())
+                    {
+                        assert(physRegRecord->regNum == srcInterval->physReg);
+
+                        // Special putarg_reg acts as a pass-thru since both source lcl var
+                        // and putarg_reg have the same register allocated.  Physical reg
+                        // record of reg continue to point to source lcl var's interval
+                        // instead of to putarg_reg's interval.  So if a spill of reg
+                        // allocated to source lcl var happens, to reallocate to another
+                        // tree node, before its use at call node it will lead to spill of
+                        // lcl var instead of putarg_reg since physical reg record is pointing
+                        // to lcl var's interval. As a result, arg reg would get trashed leading
+                        // to bad codegen. The assumption here is that source lcl var of a
+                        // special putarg_reg doesn't get spilled and re-allocated prior to
+                        // its use at the call node.  This is ensured by marking physical reg
+                        // record as busy until next kill.
+                        physRegRecord->isBusyUntilNextKill = true;
+                    }
+                    else
+                    {
+                        currentInterval->isSpecialPutArg = false;
+                    }
+                }
+                // If this is still a SpecialPutArg, continue;
+                if (currentInterval->isSpecialPutArg)
+                {
+                    INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_SPECIAL_PUTARG, currentInterval,
+                                                    currentRefPosition->assignedReg()));
+                    continue;
+                }
+            }
+
+            if (assignedRegister == REG_NA && RefTypeIsUse(refType))
+            {
+                currentRefPosition->reload = true;
+                INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_RELOAD, currentInterval, assignedRegister));
+            }
+        }
+
+        regMaskTP assignedRegBit = RBM_NONE;
+        bool      isInRegister   = false;
+        if (assignedRegister != REG_NA)
+        {
+            isInRegister   = true;
+            assignedRegBit = genRegMask(assignedRegister);
+            if (!currentInterval->isActive)
+            {
+                // If this is a use, it must have started the block on the stack, but the register
+                // was available for use so we kept the association.
+                if (RefTypeIsUse(refType))
+                {
+                    assert(inVarToRegMaps[curBBNum][currentInterval->getVarIndex(compiler)] == REG_STK &&
+                           previousRefPosition->nodeLocation <= curBBStartLocation);
+                    isInRegister = false;
+                }
+                else
+                {
+                    currentInterval->isActive = true;
+                }
+            }
+            assert(currentInterval->assignedReg != nullptr &&
+                   currentInterval->assignedReg->regNum == assignedRegister &&
+                   currentInterval->assignedReg->assignedInterval == currentInterval);
+        }
+
+        // If this is a physical register, we unconditionally assign it to itself!
+        if (currentRefPosition->isPhysRegRef)
+        {
+            RegRecord* currentReg       = currentRefPosition->getReg();
+            Interval*  assignedInterval = currentReg->assignedInterval;
+
+            if (assignedInterval != nullptr)
+            {
+                unassignPhysReg(currentReg, assignedInterval->recentRefPosition);
+            }
+            currentReg->isActive = true;
+            assignedRegister     = currentReg->regNum;
+            assignedRegBit       = genRegMask(assignedRegister);
+            if (refType == RefTypeKill)
+            {
+                currentReg->isBusyUntilNextKill = false;
+            }
+        }
+        else if (previousRefPosition != nullptr)
+        {
+            assert(previousRefPosition->nextRefPosition == currentRefPosition);
+            assert(assignedRegister == REG_NA || assignedRegBit == previousRefPosition->registerAssignment ||
+                   currentRefPosition->outOfOrder || previousRefPosition->copyReg ||
+                   previousRefPosition->refType == RefTypeExpUse || currentRefPosition->refType == RefTypeDummyDef);
+        }
+        else if (assignedRegister != REG_NA)
+        {
+            // Handle the case where this is a preassigned register (i.e. parameter).
+            // We don't want to actually use the preassigned register if it's not
+            // going to cover the lifetime - but we had to preallocate it to ensure
+            // that it remained live.
+            // TODO-CQ: At some point we may want to refine the analysis here, in case
+            // it might be beneficial to keep it in this reg for PART of the lifetime
+            if (currentInterval->isLocalVar)
+            {
+                regMaskTP preferences        = currentInterval->registerPreferences;
+                bool      keepAssignment     = true;
+                bool      matchesPreferences = (preferences & genRegMask(assignedRegister)) != RBM_NONE;
+
+                // Will the assigned register cover the lifetime?  If not, does it at least
+                // meet the preferences for the next RefPosition?
+                RegRecord*   physRegRecord     = getRegisterRecord(currentInterval->physReg);
+                RefPosition* nextPhysRegRefPos = physRegRecord->getNextRefPosition();
+                if (nextPhysRegRefPos != nullptr &&
+                    nextPhysRegRefPos->nodeLocation <= currentInterval->lastRefPosition->nodeLocation)
+                {
+                    // Check to see if the existing assignment matches the preferences (e.g. callee save registers)
+                    // and ensure that the next use of this localVar does not occur after the nextPhysRegRefPos
+                    // There must be a next RefPosition, because we know that the Interval extends beyond the
+                    // nextPhysRegRefPos.
+                    RefPosition* nextLclVarRefPos = currentRefPosition->nextRefPosition;
+                    assert(nextLclVarRefPos != nullptr);
+                    if (!matchesPreferences || nextPhysRegRefPos->nodeLocation < nextLclVarRefPos->nodeLocation ||
+                        physRegRecord->conflictingFixedRegReference(nextLclVarRefPos))
+                    {
+                        keepAssignment = false;
+                    }
+                }
+                else if (refType == RefTypeParamDef && !matchesPreferences)
+                {
+                    // Don't use the register, even if available, if it doesn't match the preferences.
+                    // Note that this case is only for ParamDefs, for which we haven't yet taken preferences
+                    // into account (we've just automatically got the initial location).  In other cases,
+                    // we would already have put it in a preferenced register, if it was available.
+                    // TODO-CQ: Consider expanding this to check availability - that would duplicate
+                    // code here, but otherwise we may wind up in this register anyway.
+                    keepAssignment = false;
+                }
+
+                if (keepAssignment == false)
+                {
+                    currentRefPosition->registerAssignment = allRegs(currentInterval->registerType);
+                    unassignPhysRegNoSpill(physRegRecord);
+
+                    // If the preferences are currently set to just this register, reset them to allRegs
+                    // of the appropriate type (just as we just reset the registerAssignment for this
+                    // RefPosition.
+                    // Otherwise, simply remove this register from the preferences, if it's there.
+
+                    if (currentInterval->registerPreferences == assignedRegBit)
+                    {
+                        currentInterval->registerPreferences = currentRefPosition->registerAssignment;
+                    }
+                    else
+                    {
+                        currentInterval->registerPreferences &= ~assignedRegBit;
+                    }
+
+                    assignedRegister = REG_NA;
+                    assignedRegBit   = RBM_NONE;
+                }
+            }
+        }
+
+        if (assignedRegister != REG_NA)
+        {
+            // If there is a conflicting fixed reference, insert a copy.
+            RegRecord* physRegRecord = getRegisterRecord(assignedRegister);
+            if (physRegRecord->conflictingFixedRegReference(currentRefPosition))
+            {
+                // We may have already reassigned the register to the conflicting reference.
+                // If not, we need to unassign this interval.
+                if (physRegRecord->assignedInterval == currentInterval)
+                {
+                    unassignPhysRegNoSpill(physRegRecord);
+                }
+                currentRefPosition->moveReg = true;
+                assignedRegister            = REG_NA;
+                INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_MOVE_REG, currentInterval, assignedRegister));
+            }
+            else if ((genRegMask(assignedRegister) & currentRefPosition->registerAssignment) != 0)
+            {
+                currentRefPosition->registerAssignment = assignedRegBit;
+                if (!currentReferent->isActive)
+                {
+                    // If we've got an exposed use at the top of a block, the
+                    // interval might not have been active.  Otherwise if it's a use,
+                    // the interval must be active.
+                    if (refType == RefTypeDummyDef)
+                    {
+                        currentReferent->isActive = true;
+                        assert(getRegisterRecord(assignedRegister)->assignedInterval == currentInterval);
+                    }
+                    else
+                    {
+                        currentRefPosition->reload = true;
+                    }
+                }
+                INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, currentInterval, assignedRegister));
+            }
+            else
+            {
+                // This must be a localVar or a single-reg fixed use or a tree temp with conflicting def & use.
+
+                assert(currentInterval && (currentInterval->isLocalVar || currentRefPosition->isFixedRegRef ||
+                                           currentInterval->hasConflictingDefUse));
+
+                // It's already in a register, but not one we need.
+                // If it is a fixed use that is not marked "delayRegFree", there is already a FixedReg to ensure that
+                // the needed reg is not otherwise in use, so we can simply ignore it and codegen will do the copy.
+                // The reason we need special handling for the "delayRegFree" case is that we need to mark the
+                // fixed-reg as in-use and delayed (the FixedReg RefPosition doesn't handle the delay requirement).
+                // Otherwise, if this is a pure use localVar or tree temp, we assign a copyReg, but must free both regs
+                // if it is a last use.
+                if (!currentRefPosition->isFixedRegRef || currentRefPosition->delayRegFree)
+                {
+                    if (!RefTypeIsDef(currentRefPosition->refType))
+                    {
+                        regNumber copyReg = assignCopyReg(currentRefPosition);
+                        assert(copyReg != REG_NA);
+                        INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_COPY_REG, currentInterval, copyReg));
+                        lastAllocatedRefPosition = currentRefPosition;
+                        if (currentRefPosition->lastUse)
+                        {
+                            if (currentRefPosition->delayRegFree)
+                            {
+                                INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED, currentInterval,
+                                                                assignedRegister));
+                                delayRegsToFree |=
+                                    (genRegMask(assignedRegister) | currentRefPosition->registerAssignment);
+                            }
+                            else
+                            {
+                                INDEBUG(
+                                    dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE, currentInterval, assignedRegister));
+                                regsToFree |= (genRegMask(assignedRegister) | currentRefPosition->registerAssignment);
+                            }
+                        }
+                        // If this is a tree temp (non-localVar) interval, we will need an explicit move.
+                        if (!currentInterval->isLocalVar)
+                        {
+                            currentRefPosition->moveReg = true;
+                            currentRefPosition->copyReg = false;
+                        }
+                        continue;
+                    }
+                    else
+                    {
+                        INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NEEDS_NEW_REG, nullptr, assignedRegister));
+                        regsToFree |= genRegMask(assignedRegister);
+                        // We want a new register, but we don't want this to be considered a spill.
+                        assignedRegister = REG_NA;
+                        if (physRegRecord->assignedInterval == currentInterval)
+                        {
+                            unassignPhysRegNoSpill(physRegRecord);
+                        }
+                    }
+                }
+                else
+                {
+                    INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, assignedRegister));
+                }
+            }
+        }
+
+        if (assignedRegister == REG_NA)
+        {
+            bool allocateReg = true;
+
+            if (currentRefPosition->AllocateIfProfitable())
+            {
+                // We can avoid allocating a register if it is a the last use requiring a reload.
+                if (currentRefPosition->lastUse && currentRefPosition->reload)
+                {
+                    allocateReg = false;
+                }
+
+#ifdef DEBUG
+                // Under stress mode, don't attempt to allocate a reg to
+                // reg optional ref position.
+                if (allocateReg && regOptionalNoAlloc())
+                {
+                    allocateReg = false;
+                }
+#endif
+            }
+
+            if (allocateReg)
+            {
+                // Try to allocate a register
+                assignedRegister = tryAllocateFreeReg(currentInterval, currentRefPosition);
+            }
+
+            // If no register was found, and if the currentRefPosition must have a register,
+            // then find a register to spill
+            if (assignedRegister == REG_NA)
+            {
+#ifdef FEATURE_SIMD
+                if (refType == RefTypeUpperVectorSaveDef)
+                {
+                    // TODO-CQ: Determine whether copying to two integer callee-save registers would be profitable.
+                    currentRefPosition->registerAssignment = (allRegs(TYP_FLOAT) & RBM_FLT_CALLEE_TRASH);
+                    assignedRegister                       = tryAllocateFreeReg(currentInterval, currentRefPosition);
+                    // There MUST be caller-save registers available, because they have all just been killed.
+                    assert(assignedRegister != REG_NA);
+                    // Now, spill it.
+                    // (These will look a bit backward in the dump, but it's a pain to dump the alloc before the spill).
+                    unassignPhysReg(getRegisterRecord(assignedRegister), currentRefPosition);
+                    INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_REG, currentInterval, assignedRegister));
+                    // Now set assignedRegister to REG_NA again so that we don't re-activate it.
+                    assignedRegister = REG_NA;
+                }
+                else
+#endif // FEATURE_SIMD
+                    if (currentRefPosition->RequiresRegister() || currentRefPosition->AllocateIfProfitable())
+                {
+                    if (allocateReg)
+                    {
+                        assignedRegister = allocateBusyReg(currentInterval, currentRefPosition,
+                                                           currentRefPosition->AllocateIfProfitable());
+                    }
+
+                    if (assignedRegister != REG_NA)
+                    {
+                        INDEBUG(
+                            dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_SPILLED_REG, currentInterval, assignedRegister));
+                    }
+                    else
+                    {
+                        // This can happen only for those ref positions that are to be allocated
+                        // only if profitable.
+                        noway_assert(currentRefPosition->AllocateIfProfitable());
+
+                        currentRefPosition->registerAssignment = RBM_NONE;
+                        currentRefPosition->reload             = false;
+
+                        INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval));
+                    }
+                }
+                else
+                {
+                    INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval));
+                    currentRefPosition->registerAssignment = RBM_NONE;
+                    currentInterval->isActive              = false;
+                }
+            }
+#ifdef DEBUG
+            else
+            {
+                if (VERBOSE)
+                {
+                    if (currentInterval->isConstant && (currentRefPosition->treeNode != nullptr) &&
+                        currentRefPosition->treeNode->IsReuseRegVal())
+                    {
+                        dumpLsraAllocationEvent(LSRA_EVENT_REUSE_REG, nullptr, assignedRegister, currentBlock);
+                    }
+                    else
+                    {
+                        dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_REG, nullptr, assignedRegister, currentBlock);
+                    }
+                }
+            }
+#endif // DEBUG
+
+            if (refType == RefTypeDummyDef && assignedRegister != REG_NA)
+            {
+                setInVarRegForBB(curBBNum, currentInterval->varNum, assignedRegister);
+            }
+
+            // If we allocated a register, and this is a use of a spilled value,
+            // it should have been marked for reload above.
+            if (assignedRegister != REG_NA && RefTypeIsUse(refType) && !isInRegister)
+            {
+                assert(currentRefPosition->reload);
+            }
+        }
+
+        // If we allocated a register, record it
+        if (currentInterval != nullptr && assignedRegister != REG_NA)
+        {
+            assignedRegBit                         = genRegMask(assignedRegister);
+            currentRefPosition->registerAssignment = assignedRegBit;
+            currentInterval->physReg               = assignedRegister;
+            regsToFree &= ~assignedRegBit; // we'll set it again later if it's dead
+
+            // If this interval is dead, free the register.
+            // The interval could be dead if this is a user variable, or if the
+            // node is being evaluated for side effects, or a call whose result
+            // is not used, etc.
+            if (currentRefPosition->lastUse || currentRefPosition->nextRefPosition == nullptr)
+            {
+                assert(currentRefPosition->isIntervalRef());
+
+                if (refType != RefTypeExpUse && currentRefPosition->nextRefPosition == nullptr)
+                {
+                    if (currentRefPosition->delayRegFree)
+                    {
+                        delayRegsToFree |= assignedRegBit;
+                        INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED));
+                    }
+                    else
+                    {
+                        regsToFree |= assignedRegBit;
+                        INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE));
+                    }
+                }
+                else
+                {
+                    currentInterval->isActive = false;
+                }
+            }
+
+            lastAllocatedRefPosition = currentRefPosition;
+        }
+    }
+
+    // Free registers to clear associated intervals for resolution phase
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+    if (getLsraExtendLifeTimes())
+    {
+        // If we have extended lifetimes, we need to make sure all the registers are freed.
+        for (int regNumIndex = 0; regNumIndex <= REG_FP_LAST; regNumIndex++)
+        {
+            RegRecord& regRecord = physRegs[regNumIndex];
+            Interval*  interval  = regRecord.assignedInterval;
+            if (interval != nullptr)
+            {
+                interval->isActive = false;
+                unassignPhysReg(&regRecord, nullptr);
+            }
+        }
+    }
+    else
+#endif // DEBUG
+    {
+        freeRegisters(regsToFree | delayRegsToFree);
+    }
+
+#ifdef DEBUG
+    if (VERBOSE)
+    {
+        if (dumpTerse)
+        {
+            // Dump the RegRecords after the last RefPosition is handled.
+            dumpRegRecords();
+            printf("\n");
+        }
+
+        dumpRefPositions("AFTER ALLOCATION");
+        dumpVarRefPositions("AFTER ALLOCATION");
+
+        // Dump the intervals that remain active
+        printf("Active intervals at end of allocation:\n");
+
+        // We COULD just reuse the intervalIter from above, but ArrayListIterator doesn't
+        // provide a Reset function (!) - we'll probably replace this so don't bother
+        // adding it
+
+        for (auto& interval : intervals)
+        {
+            if (interval.isActive)
+            {
+                printf("Active ");
+                interval.dump();
+            }
+        }
+
+        printf("\n");
+    }
+#endif // DEBUG
+}
+
+// LinearScan::resolveLocalRef
+// Description:
+//      Update the graph for a local reference.
+//      Also, track the register (if any) that is currently occupied.
+// Arguments:
+//      treeNode: The lclVar that's being resolved
+//      currentRefPosition: the RefPosition associated with the treeNode
+//
+// Details:
+// This method is called for each local reference, during the resolveRegisters
+// phase of LSRA.  It is responsible for keeping the following in sync:
+//   - varDsc->lvRegNum (and lvOtherReg) contain the unique register location.
+//     If it is not in the same register through its lifetime, it is set to REG_STK.
+//   - interval->physReg is set to the assigned register
+//     (i.e. at the code location which is currently being handled by resolveRegisters())
+//     - interval->isActive is true iff the interval is live and occupying a register
+//     - interval->isSpilled is set to true if the interval is EVER spilled
+//     - interval->isSplit is set to true if the interval does not occupy the same
+//       register throughout the method
+//   - RegRecord->assignedInterval points to the interval which currently occupies
+//     the register
+//   - For each lclVar node:
+//     - gtRegNum/gtRegPair is set to the currently allocated register(s)
+//     - GTF_REG_VAL is set if it is a use, and is in a register
+//     - GTF_SPILLED is set on a use if it must be reloaded prior to use (GTF_REG_VAL
+//       must not be set)
+//     - GTF_SPILL is set if it must be spilled after use (GTF_REG_VAL may or may not
+//       be set)
+//
+// A copyReg is an ugly case where the variable must be in a specific (fixed) register,
+// but it currently resides elsewhere.  The register allocator must track the use of the
+// fixed register, but it marks the lclVar node with the register it currently lives in
+// and the code generator does the necessary move.
+//
+// Before beginning, the varDsc for each parameter must be set to its initial location.
+//
+// NICE: Consider tracking whether an Interval is always in the same location (register/stack)
+// in which case it will require no resolution.
+//
+void LinearScan::resolveLocalRef(BasicBlock* block, GenTreePtr treeNode, RefPosition* currentRefPosition)
+{
+    assert((block == nullptr) == (treeNode == nullptr));
+
+    // Is this a tracked local?  Or just a register allocated for loading
+    // a non-tracked one?
+    Interval* interval = currentRefPosition->getInterval();
+    if (!interval->isLocalVar)
+    {
+        return;
+    }
+    interval->recentRefPosition = currentRefPosition;
+    LclVarDsc* varDsc           = interval->getLocalVar(compiler);
+
+    if (currentRefPosition->registerAssignment == RBM_NONE)
+    {
+        assert(!currentRefPosition->RequiresRegister());
+
+        interval->isSpilled = true;
+        varDsc->lvRegNum    = REG_STK;
+        if (interval->assignedReg != nullptr && interval->assignedReg->assignedInterval == interval)
+        {
+            interval->assignedReg->assignedInterval = nullptr;
+        }
+        interval->assignedReg = nullptr;
+        interval->physReg     = REG_NA;
+
+        return;
+    }
+
+    // In most cases, assigned and home registers will be the same
+    // The exception is the copyReg case, where we've assigned a register
+    // for a specific purpose, but will be keeping the register assignment
+    regNumber assignedReg = currentRefPosition->assignedReg();
+    regNumber homeReg     = assignedReg;
+
+    // Undo any previous association with a physical register, UNLESS this
+    // is a copyReg
+    if (!currentRefPosition->copyReg)
+    {
+        regNumber oldAssignedReg = interval->physReg;
+        if (oldAssignedReg != REG_NA && assignedReg != oldAssignedReg)
+        {
+            RegRecord* oldRegRecord = getRegisterRecord(oldAssignedReg);
+            if (oldRegRecord->assignedInterval == interval)
+            {
+                oldRegRecord->assignedInterval = nullptr;
+            }
+        }
+    }
+
+    if (currentRefPosition->refType == RefTypeUse && !currentRefPosition->reload)
+    {
+        // Was this spilled after our predecessor was scheduled?
+        if (interval->physReg == REG_NA)
+        {
+            assert(inVarToRegMaps[curBBNum][varDsc->lvVarIndex] == REG_STK);
+            currentRefPosition->reload = true;
+        }
+    }
+
+    bool reload     = currentRefPosition->reload;
+    bool spillAfter = currentRefPosition->spillAfter;
+
+    // In the reload case we simply do not set GTF_REG_VAL, and it gets
+    // referenced from the variable's home location.
+    // This is also true for a pure def which is spilled.
+    if (reload && currentRefPosition->refType != RefTypeDef)
+    {
+        varDsc->lvRegNum = REG_STK;
+        if (!spillAfter)
+        {
+            interval->physReg = assignedReg;
+        }
+
+        // If there is no treeNode, this must be a RefTypeExpUse, in
+        // which case we did the reload already
+        if (treeNode != nullptr)
+        {
+            treeNode->gtFlags |= GTF_SPILLED;
+            if (spillAfter)
+            {
+                if (currentRefPosition->AllocateIfProfitable())
+                {
+                    // This is a use of lclVar that is flagged as reg-optional
+                    // by lower/codegen and marked for both reload and spillAfter.
+                    // In this case we can avoid unnecessary reload and spill
+                    // by setting reg on lclVar to REG_STK and reg on tree node
+                    // to REG_NA.  Codegen will generate the code by considering
+                    // it as a contained memory operand.
+                    //
+                    // Note that varDsc->lvRegNum is already to REG_STK above.
+                    interval->physReg  = REG_NA;
+                    treeNode->gtRegNum = REG_NA;
+                    treeNode->gtFlags &= ~GTF_SPILLED;
+                }
+                else
+                {
+                    treeNode->gtFlags |= GTF_SPILL;
+                }
+            }
+        }
+        else
+        {
+            assert(currentRefPosition->refType == RefTypeExpUse);
+        }
+
+        // If we have an undefined use set it as non-reg
+        if (!interval->isSpilled)
+        {
+            if (varDsc->lvIsParam && !varDsc->lvIsRegArg && currentRefPosition == interval->firstRefPosition)
+            {
+                // Parameters are the only thing that can be used before defined
+            }
+            else
+            {
+                // if we see a use before def of something else, the zero init flag better not be set.
+                noway_assert(!compiler->info.compInitMem);
+                // if it is not set, then the behavior is undefined but we don't want to crash or assert
+                interval->isSpilled = true;
+            }
+        }
+    }
+    else if (spillAfter && !RefTypeIsUse(currentRefPosition->refType))
+    {
+        // In the case of a pure def, don't bother spilling - just assign it to the
+        // stack.  However, we need to remember that it was spilled.
+
+        interval->isSpilled = true;
+        varDsc->lvRegNum    = REG_STK;
+        interval->physReg   = REG_NA;
+        if (treeNode != nullptr)
+        {
+            treeNode->gtRegNum = REG_NA;
+        }
+    }
+    else
+    {
+        // Not reload and Not pure-def that's spillAfter
+
+        if (currentRefPosition->copyReg || currentRefPosition->moveReg)
+        {
+            // For a copyReg or moveReg, we have two cases:
+            //  - In the first case, we have a fixedReg - i.e. a register which the code
+            //    generator is constrained to use.
+            //    The code generator will generate the appropriate move to meet the requirement.
+            //  - In the second case, we were forced to use a different register because of
+            //    interference (or JitStressRegs).
+            //    In this case, we generate a GT_COPY.
+            // In either case, we annotate the treeNode with the register in which the value
+            // currently lives.  For moveReg, the homeReg is the new register (as assigned above).
+            // But for copyReg, the homeReg remains unchanged.
+
+            assert(treeNode != nullptr);
+            treeNode->gtRegNum = interval->physReg;
+
+            if (currentRefPosition->copyReg)
+            {
+                homeReg = interval->physReg;
+            }
+            else
+            {
+                interval->physReg = assignedReg;
+            }
+
+            if (!currentRefPosition->isFixedRegRef || currentRefPosition->moveReg)
+            {
+                // This is the second case, where we need to generate a copy
+                insertCopyOrReload(block, treeNode, currentRefPosition->getMultiRegIdx(), currentRefPosition);
+            }
+        }
+        else
+        {
+            interval->physReg = assignedReg;
+
+            if (!interval->isSpilled && !interval->isSplit)
+            {
+                if (varDsc->lvRegNum != REG_STK)
+                {
+                    // If the register assignments don't match, then this interval is spilt,
+                    // but not spilled (yet)
+                    // However, we don't have a single register assignment now
+                    if (varDsc->lvRegNum != assignedReg)
+                    {
+                        interval->isSplit = TRUE;
+                        varDsc->lvRegNum  = REG_STK;
+                    }
+                }
+                else
+                {
+                    varDsc->lvRegNum = assignedReg;
+                }
+            }
+        }
+        if (spillAfter)
+        {
+            if (treeNode != nullptr)
+            {
+                treeNode->gtFlags |= GTF_SPILL;
+            }
+            interval->isSpilled = true;
+            interval->physReg   = REG_NA;
+            varDsc->lvRegNum    = REG_STK;
+        }
+
+        // This value is in a register, UNLESS we already saw this treeNode
+        // and marked it for reload
+        if (treeNode != nullptr && !(treeNode->gtFlags & GTF_SPILLED))
+        {
+            treeNode->gtFlags |= GTF_REG_VAL;
+        }
+    }
+
+    // Update the physRegRecord for the register, so that we know what vars are in
+    // regs at the block boundaries
+    RegRecord* physRegRecord = getRegisterRecord(homeReg);
+    if (spillAfter || currentRefPosition->lastUse)
+    {
+        physRegRecord->assignedInterval = nullptr;
+        interval->assignedReg           = nullptr;
+        interval->physReg               = REG_NA;
+        interval->isActive              = false;
+    }
+    else
+    {
+        interval->isActive              = true;
+        physRegRecord->assignedInterval = interval;
+        interval->assignedReg           = physRegRecord;
+    }
+}
+
+void LinearScan::writeRegisters(RefPosition* currentRefPosition, GenTree* tree)
+{
+    lsraAssignRegToTree(tree, currentRefPosition->assignedReg(), currentRefPosition->getMultiRegIdx());
+}
+
+//------------------------------------------------------------------------
+// insertCopyOrReload: Insert a copy in the case where a tree node value must be moved
+//   to a different register at the point of use (GT_COPY), or it is reloaded to a different register
+//   than the one it was spilled from (GT_RELOAD).
+//
+// Arguments:
+//    tree              - This is the node to copy or reload.
+//                        Insert copy or reload node between this node and its parent.
+//    multiRegIdx       - register position of tree node for which copy or reload is needed.
+//    refPosition       - The RefPosition at which copy or reload will take place.
+//
+// Notes:
+//    The GT_COPY or GT_RELOAD will be inserted in the proper spot in execution order where the reload is to occur.
+//
+// For example, for this tree (numbers are execution order, lower is earlier and higher is later):
+//
+//                                   +---------+----------+
+//                                   |       GT_ADD (3)   |
+//                                   +---------+----------+
+//                                             |
+//                                           /   \
+//                                         /       \
+//                                       /           \
+//                   +-------------------+           +----------------------+
+//                   |         x (1)     | "tree"    |         y (2)        |
+//                   +-------------------+           +----------------------+
+//
+// generate this tree:
+//
+//                                   +---------+----------+
+//                                   |       GT_ADD (4)   |
+//                                   +---------+----------+
+//                                             |
+//                                           /   \
+//                                         /       \
+//                                       /           \
+//                   +-------------------+           +----------------------+
+//                   |  GT_RELOAD (3)    |           |         y (2)        |
+//                   +-------------------+           +----------------------+
+//                             |
+//                   +-------------------+
+//                   |         x (1)     | "tree"
+//                   +-------------------+
+//
+// Note in particular that the GT_RELOAD node gets inserted in execution order immediately before the parent of "tree",
+// which seems a bit weird since normally a node's parent (in this case, the parent of "x", GT_RELOAD in the "after"
+// picture) immediately follows all of its children (that is, normally the execution ordering is postorder).
+// The ordering must be this weird "out of normal order" way because the "x" node is being spilled, probably
+// because the expression in the tree represented above by "y" has high register requirements. We don't want
+// to reload immediately, of course. So we put GT_RELOAD where the reload should actually happen.
+//
+// Note that GT_RELOAD is required when we reload to a different register than the one we spilled to. It can also be
+// used if we reload to the same register. Normally, though, in that case we just mark the node with GTF_SPILLED,
+// and the unspilling code automatically reuses the same register, and does the reload when it notices that flag
+// when considering a node's operands.
+//
+void LinearScan::insertCopyOrReload(BasicBlock* block, GenTreePtr tree, unsigned multiRegIdx, RefPosition* refPosition)
+{
+    LIR::Range& blockRange = LIR::AsRange(block);
+
+    LIR::Use treeUse;
+    bool     foundUse = blockRange.TryGetUse(tree, &treeUse);
+    assert(foundUse);
+
+    GenTree* parent = treeUse.User();
+
+    genTreeOps oper;
+    if (refPosition->reload)
+    {
+        oper = GT_RELOAD;
+    }
+    else
+    {
+        oper = GT_COPY;
+    }
+
+    // If the parent is a reload/copy node, then tree must be a multi-reg call node
+    // that has already had one of its registers spilled. This is Because multi-reg
+    // call node is the only node whose RefTypeDef positions get independently
+    // spilled or reloaded.  It is possible that one of its RefTypeDef position got
+    // spilled and the next use of it requires it to be in a different register.
+    //
+    // In this case set the ith position reg of reload/copy node to the reg allocated
+    // for copy/reload refPosition.  Essentially a copy/reload node will have a reg
+    // for each multi-reg position of its child. If there is a valid reg in ith
+    // position of GT_COPY or GT_RELOAD node then the corresponding result of its
+    // child needs to be copied or reloaded to that reg.
+    if (parent->IsCopyOrReload())
+    {
+        noway_assert(parent->OperGet() == oper);
+        noway_assert(tree->IsMultiRegCall());
+        GenTreeCall*         call         = tree->AsCall();
+        GenTreeCopyOrReload* copyOrReload = parent->AsCopyOrReload();
+        noway_assert(copyOrReload->GetRegNumByIdx(multiRegIdx) == REG_NA);
+        copyOrReload->SetRegNumByIdx(refPosition->assignedReg(), multiRegIdx);
+    }
+    else
+    {
+        // Create the new node, with "tree" as its only child.
+        var_types treeType = tree->TypeGet();
+
+#ifdef FEATURE_SIMD
+        // Check to see whether we need to move to a different register set.
+        // This currently only happens in the case of SIMD vector types that are small enough (pointer size)
+        // that they must be passed & returned in integer registers.
+        // 'treeType' is the type of the register we are moving FROM,
+        // and refPosition->registerAssignment is the mask for the register we are moving TO.
+        // If they don't match, we need to reverse the type for the "move" node.
+
+        if ((allRegs(treeType) & refPosition->registerAssignment) == 0)
+        {
+            treeType = (useFloatReg(treeType)) ? TYP_I_IMPL : TYP_SIMD8;
+        }
+#endif // FEATURE_SIMD
+
+        GenTreeCopyOrReload* newNode = new (compiler, oper) GenTreeCopyOrReload(oper, treeType, tree);
+        assert(refPosition->registerAssignment != RBM_NONE);
+        newNode->SetRegNumByIdx(refPosition->assignedReg(), multiRegIdx);
+        newNode->gtLsraInfo.isLsraAdded   = true;
+        newNode->gtLsraInfo.isLocalDefUse = false;
+        if (refPosition->copyReg)
+        {
+            // This is a TEMPORARY copy
+            assert(isCandidateLocalRef(tree));
+            newNode->gtFlags |= GTF_VAR_DEATH;
+        }
+
+        // Insert the copy/reload after the spilled node and replace the use of the original node with a use
+        // of the copy/reload.
+        blockRange.InsertAfter(tree, newNode);
+        treeUse.ReplaceWith(compiler, newNode);
+    }
+}
+
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+//------------------------------------------------------------------------
+// insertUpperVectorSaveAndReload: Insert code to save and restore the upper half of a vector that lives
+//                                 in a callee-save register at the point of a kill (the upper half is
+//                                 not preserved).
+//
+// Arguments:
+//    tree              - This is the node around which we will insert the Save & Reload.
+//                        It will be a call or some node that turns into a call.
+//    refPosition       - The RefTypeUpperVectorSaveDef RefPosition.
+//
+void LinearScan::insertUpperVectorSaveAndReload(GenTreePtr tree, RefPosition* refPosition, BasicBlock* block)
+{
+    Interval* lclVarInterval = refPosition->getInterval()->relatedInterval;
+    assert(lclVarInterval->isLocalVar == true);
+    LclVarDsc* varDsc = compiler->lvaTable + lclVarInterval->varNum;
+    assert(varDsc->lvType == LargeVectorType);
+    regNumber lclVarReg = lclVarInterval->physReg;
+    if (lclVarReg == REG_NA)
+    {
+        return;
+    }
+
+    assert((genRegMask(lclVarReg) & RBM_FLT_CALLEE_SAVED) != RBM_NONE);
+
+    regNumber spillReg   = refPosition->assignedReg();
+    bool      spillToMem = refPosition->spillAfter;
+
+    LIR::Range& blockRange = LIR::AsRange(block);
+
+    // First, insert the save as an embedded statement before the call.
+
+    GenTreePtr saveLcl              = compiler->gtNewLclvNode(lclVarInterval->varNum, LargeVectorType);
+    saveLcl->gtLsraInfo.isLsraAdded = true;
+    saveLcl->gtRegNum               = lclVarReg;
+    saveLcl->gtFlags |= GTF_REG_VAL;
+    saveLcl->gtLsraInfo.isLocalDefUse = false;
+
+    GenTreeSIMD* simdNode =
+        new (compiler, GT_SIMD) GenTreeSIMD(LargeVectorSaveType, saveLcl, nullptr, SIMDIntrinsicUpperSave,
+                                            varDsc->lvBaseType, genTypeSize(LargeVectorType));
+    simdNode->gtLsraInfo.isLsraAdded = true;
+    simdNode->gtRegNum               = spillReg;
+    if (spillToMem)
+    {
+        simdNode->gtFlags |= GTF_SPILL;
+    }
+
+    blockRange.InsertBefore(tree, LIR::SeqTree(compiler, simdNode));
+
+    // Now insert the restore after the call.
+
+    GenTreePtr restoreLcl              = compiler->gtNewLclvNode(lclVarInterval->varNum, LargeVectorType);
+    restoreLcl->gtLsraInfo.isLsraAdded = true;
+    restoreLcl->gtRegNum               = lclVarReg;
+    restoreLcl->gtFlags |= GTF_REG_VAL;
+    restoreLcl->gtLsraInfo.isLocalDefUse = false;
+
+    simdNode = new (compiler, GT_SIMD)
+        GenTreeSIMD(LargeVectorType, restoreLcl, nullptr, SIMDIntrinsicUpperRestore, varDsc->lvBaseType, 32);
+    simdNode->gtLsraInfo.isLsraAdded = true;
+    simdNode->gtRegNum               = spillReg;
+    if (spillToMem)
+    {
+        simdNode->gtFlags |= GTF_SPILLED;
+    }
+
+    blockRange.InsertAfter(tree, LIR::SeqTree(compiler, simdNode));
+}
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+
+//------------------------------------------------------------------------
+// initMaxSpill: Initializes the LinearScan members used to track the max number
+//               of concurrent spills.  This is needed so that we can set the
+//               fields in Compiler, so that the code generator, in turn can
+//               allocate the right number of spill locations.
+//
+// Arguments:
+//    None.
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    This is called before any calls to updateMaxSpill().
+
+void LinearScan::initMaxSpill()
+{
+    needDoubleTmpForFPCall = false;
+    needFloatTmpForFPCall  = false;
+    for (int i = 0; i < TYP_COUNT; i++)
+    {
+        maxSpill[i]     = 0;
+        currentSpill[i] = 0;
+    }
+}
+
+//------------------------------------------------------------------------
+// recordMaxSpill: Sets the fields in Compiler for the max number of concurrent spills.
+//                 (See the comment on initMaxSpill.)
+//
+// Arguments:
+//    None.
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    This is called after updateMaxSpill() has been called for all "real"
+//    RefPositions.
+
+void LinearScan::recordMaxSpill()
+{
+    // Note: due to the temp normalization process (see tmpNormalizeType)
+    // only a few types should actually be seen here.
+    JITDUMP("Recording the maximum number of concurrent spills:\n");
+#ifdef _TARGET_X86_
+    var_types returnType = compiler->tmpNormalizeType(compiler->info.compRetType);
+    if (needDoubleTmpForFPCall || (returnType == TYP_DOUBLE))
+    {
+        JITDUMP("Adding a spill temp for moving a double call/return value between xmm reg and x87 stack.\n");
+        maxSpill[TYP_DOUBLE] += 1;
+    }
+    if (needFloatTmpForFPCall || (returnType == TYP_FLOAT))
+    {
+        JITDUMP("Adding a spill temp for moving a float call/return value between xmm reg and x87 stack.\n");
+        maxSpill[TYP_FLOAT] += 1;
+    }
+#endif // _TARGET_X86_
+    for (int i = 0; i < TYP_COUNT; i++)
+    {
+        if (var_types(i) != compiler->tmpNormalizeType(var_types(i)))
+        {
+            // Only normalized types should have anything in the maxSpill array.
+            // We assume here that if type 'i' does not normalize to itself, then
+            // nothing else normalizes to 'i', either.
+            assert(maxSpill[i] == 0);
+        }
+        JITDUMP("  %s: %d\n", varTypeName(var_types(i)), maxSpill[i]);
+        if (maxSpill[i] != 0)
+        {
+            compiler->tmpPreAllocateTemps(var_types(i), maxSpill[i]);
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// updateMaxSpill: Update the maximum number of concurrent spills
+//
+// Arguments:
+//    refPosition - the current RefPosition being handled
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    The RefPosition has an associated interval (getInterval() will
+//    otherwise assert).
+//
+// Notes:
+//    This is called for each "real" RefPosition during the writeback
+//    phase of LSRA.  It keeps track of how many concurrently-live
+//    spills there are, and the largest number seen so far.
+
+void LinearScan::updateMaxSpill(RefPosition* refPosition)
+{
+    RefType refType = refPosition->refType;
+
+    if (refPosition->spillAfter || refPosition->reload ||
+        (refPosition->AllocateIfProfitable() && refPosition->assignedReg() == REG_NA))
+    {
+        Interval* interval = refPosition->getInterval();
+        if (!interval->isLocalVar)
+        {
+            // The tmp allocation logic 'normalizes' types to a small number of
+            // types that need distinct stack locations from each other.
+            // Those types are currently gc refs, byrefs, <= 4 byte non-GC items,
+            // 8-byte non-GC items, and 16-byte or 32-byte SIMD vectors.
+            // LSRA is agnostic to those choices but needs
+            // to know what they are here.
+            var_types typ;
+
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+            if ((refType == RefTypeUpperVectorSaveDef) || (refType == RefTypeUpperVectorSaveUse))
+            {
+                typ = LargeVectorSaveType;
+            }
+            else
+#endif // !FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+            {
+                GenTreePtr treeNode = refPosition->treeNode;
+                if (treeNode == nullptr)
+                {
+                    assert(RefTypeIsUse(refType));
+                    treeNode = interval->firstRefPosition->treeNode;
+                }
+                assert(treeNode != nullptr);
+
+                // In case of multi-reg call nodes, we need to use the type
+                // of the return register given by multiRegIdx of the refposition.
+                if (treeNode->IsMultiRegCall())
+                {
+                    ReturnTypeDesc* retTypeDesc = treeNode->AsCall()->GetReturnTypeDesc();
+                    typ                         = retTypeDesc->GetReturnRegType(refPosition->getMultiRegIdx());
+                }
+                else
+                {
+                    typ = treeNode->TypeGet();
+                }
+                typ = compiler->tmpNormalizeType(typ);
+            }
+
+            if (refPosition->spillAfter && !refPosition->reload)
+            {
+                currentSpill[typ]++;
+                if (currentSpill[typ] > maxSpill[typ])
+                {
+                    maxSpill[typ] = currentSpill[typ];
+                }
+            }
+            else if (refPosition->reload)
+            {
+                assert(currentSpill[typ] > 0);
+                currentSpill[typ]--;
+            }
+            else if (refPosition->AllocateIfProfitable() && refPosition->assignedReg() == REG_NA)
+            {
+                // A spill temp not getting reloaded into a reg because it is
+                // marked as allocate if profitable and getting used from its
+                // memory location.  To properly account max spill for typ we
+                // decrement spill count.
+                assert(RefTypeIsUse(refType));
+                assert(currentSpill[typ] > 0);
+                currentSpill[typ]--;
+            }
+            JITDUMP("  Max spill for %s is %d\n", varTypeName(typ), maxSpill[typ]);
+        }
+    }
+}
+
+// This is the final phase of register allocation.  It writes the register assignments to
+// the tree, and performs resolution across joins and backedges.
+//
+void LinearScan::resolveRegisters()
+{
+    // Iterate over the tree and the RefPositions in lockstep
+    //  - annotate the tree with register assignments by setting gtRegNum or gtRegPair (for longs)
+    //    on the tree node
+    //  - track globally-live var locations
+    //  - add resolution points at split/merge/critical points as needed
+
+    // Need to use the same traversal order as the one that assigns the location numbers.
+
+    // Dummy RefPositions have been added at any split, join or critical edge, at the
+    // point where resolution may be required.  These are located:
+    //  - for a split, at the top of the non-adjacent block
+    //  - for a join, at the bottom of the non-adjacent joining block
+    //  - for a critical edge, at the top of the target block of each critical
+    //    edge.
+    // Note that a target block may have multiple incoming critical or split edges
+    //
+    // These RefPositions record the expected location of the Interval at that point.
+    // At each branch, we identify the location of each liveOut interval, and check
+    // against the RefPositions at the target.
+
+    BasicBlock*  block;
+    LsraLocation currentLocation = MinLocation;
+
+    // Clear register assignments - these will be reestablished as lclVar defs (including RefTypeParamDefs)
+    // are encountered.
+    for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
+    {
+        RegRecord* physRegRecord    = getRegisterRecord(reg);
+        Interval*  assignedInterval = physRegRecord->assignedInterval;
+        if (assignedInterval != nullptr)
+        {
+            assignedInterval->assignedReg = nullptr;
+            assignedInterval->physReg     = REG_NA;
+        }
+        physRegRecord->assignedInterval  = nullptr;
+        physRegRecord->recentRefPosition = nullptr;
+    }
+
+    // Clear "recentRefPosition" for lclVar intervals
+    for (unsigned lclNum = 0; lclNum < compiler->lvaCount; lclNum++)
+    {
+        localVarIntervals[lclNum]->recentRefPosition = nullptr;
+        localVarIntervals[lclNum]->isActive          = false;
+    }
+
+    // handle incoming arguments and special temps
+    auto currentRefPosition = refPositions.begin();
+
+    VarToRegMap entryVarToRegMap = inVarToRegMaps[compiler->fgFirstBB->bbNum];
+    while (currentRefPosition != refPositions.end() &&
+           (currentRefPosition->refType == RefTypeParamDef || currentRefPosition->refType == RefTypeZeroInit))
+    {
+        Interval* interval = currentRefPosition->getInterval();
+        assert(interval != nullptr && interval->isLocalVar);
+        resolveLocalRef(nullptr, nullptr, currentRefPosition);
+        regNumber reg      = REG_STK;
+        int       varIndex = interval->getVarIndex(compiler);
+
+        if (!currentRefPosition->spillAfter && currentRefPosition->registerAssignment != RBM_NONE)
+        {
+            reg = currentRefPosition->assignedReg();
+        }
+        else
+        {
+            reg                = REG_STK;
+            interval->isActive = false;
+        }
+        entryVarToRegMap[varIndex] = reg;
+        ++currentRefPosition;
+    }
+
+    JITDUMP("------------------------\n");
+    JITDUMP("WRITING BACK ASSIGNMENTS\n");
+    JITDUMP("------------------------\n");
+
+    BasicBlock* insertionBlock = compiler->fgFirstBB;
+    GenTreePtr  insertionPoint = LIR::AsRange(insertionBlock).FirstNonPhiNode();
+
+    // write back assignments
+    for (block = startBlockSequence(); block != nullptr; block = moveToNextBlock())
+    {
+        assert(curBBNum == block->bbNum);
+
+#ifdef DEBUG
+        if (VERBOSE)
+        {
+            block->dspBlockHeader(compiler);
+            currentRefPosition->dump();
+        }
+#endif // DEBUG
+
+        // Record the var locations at the start of this block.
+        // (If it's fgFirstBB, we've already done that above, see entryVarToRegMap)
+
+        curBBStartLocation = currentRefPosition->nodeLocation;
+        if (block != compiler->fgFirstBB)
+        {
+            processBlockStartLocations(block, false);
+        }
+
+        // Handle the DummyDefs, updating the incoming var location.
+        for (; currentRefPosition != refPositions.end() && currentRefPosition->refType == RefTypeDummyDef;
+             ++currentRefPosition)
+        {
+            assert(currentRefPosition->isIntervalRef());
+            // Don't mark dummy defs as reload
+            currentRefPosition->reload = false;
+            resolveLocalRef(nullptr, nullptr, currentRefPosition);
+            regNumber reg;
+            if (currentRefPosition->registerAssignment != RBM_NONE)
+            {
+                reg = currentRefPosition->assignedReg();
+            }
+            else
+            {
+                reg                                         = REG_STK;
+                currentRefPosition->getInterval()->isActive = false;
+            }
+            setInVarRegForBB(curBBNum, currentRefPosition->getInterval()->varNum, reg);
+        }
+
+        // The next RefPosition should be for the block.  Move past it.
+        assert(currentRefPosition != refPositions.end());
+        assert(currentRefPosition->refType == RefTypeBB);
+        ++currentRefPosition;
+
+        // Handle the RefPositions for the block
+        for (; currentRefPosition != refPositions.end() && currentRefPosition->refType != RefTypeBB &&
+               currentRefPosition->refType != RefTypeDummyDef;
+             ++currentRefPosition)
+        {
+            currentLocation = currentRefPosition->nodeLocation;
+            JITDUMP("current : ");
+            DBEXEC(VERBOSE, currentRefPosition->dump());
+
+            // Ensure that the spill & copy info is valid.
+            // First, if it's reload, it must not be copyReg or moveReg
+            assert(!currentRefPosition->reload || (!currentRefPosition->copyReg && !currentRefPosition->moveReg));
+            // If it's copyReg it must not be moveReg, and vice-versa
+            assert(!currentRefPosition->copyReg || !currentRefPosition->moveReg);
+
+            switch (currentRefPosition->refType)
+            {
+#ifdef FEATURE_SIMD
+                case RefTypeUpperVectorSaveUse:
+                case RefTypeUpperVectorSaveDef:
+#endif // FEATURE_SIMD
+                case RefTypeUse:
+                case RefTypeDef:
+                    // These are the ones we're interested in
+                    break;
+                case RefTypeKill:
+                case RefTypeFixedReg:
+                    // These require no handling at resolution time
+                    assert(currentRefPosition->referent != nullptr);
+                    currentRefPosition->referent->recentRefPosition = currentRefPosition;
+                    continue;
+                case RefTypeExpUse:
+                    // Ignore the ExpUse cases - a RefTypeExpUse would only exist if the
+                    // variable is dead at the entry to the next block.  So we'll mark
+                    // it as in its current location and resolution will take care of any
+                    // mismatch.
+                    assert(getNextBlock() == nullptr ||
+                           !VarSetOps::IsMember(compiler, getNextBlock()->bbLiveIn,
+                                                currentRefPosition->getInterval()->getVarIndex(compiler)));
+                    currentRefPosition->referent->recentRefPosition = currentRefPosition;
+                    continue;
+                case RefTypeKillGCRefs:
+                    // No action to take at resolution time, and no interval to update recentRefPosition for.
+                    continue;
+                case RefTypeDummyDef:
+                case RefTypeParamDef:
+                case RefTypeZeroInit:
+                // Should have handled all of these already
+                default:
+                    unreached();
+                    break;
+            }
+            updateMaxSpill(currentRefPosition);
+            GenTree* treeNode = currentRefPosition->treeNode;
+
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+            if (currentRefPosition->refType == RefTypeUpperVectorSaveDef)
+            {
+                // The treeNode must be a call, and this must be a RefPosition for a LargeVectorType LocalVar.
+                // If the LocalVar is in a callee-save register, we are going to spill its upper half around the call.
+                // If we have allocated a register to spill it to, we will use that; otherwise, we will spill it
+                // to the stack.  We can use as a temp register any non-arg caller-save register.
+                noway_assert(treeNode != nullptr);
+                currentRefPosition->referent->recentRefPosition = currentRefPosition;
+                insertUpperVectorSaveAndReload(treeNode, currentRefPosition, block);
+            }
+            else if (currentRefPosition->refType == RefTypeUpperVectorSaveUse)
+            {
+                continue;
+            }
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+
+            // Most uses won't actually need to be recorded (they're on the def).
+            // In those cases, treeNode will be nullptr.
+            if (treeNode == nullptr)
+            {
+                // This is either a use, a dead def, or a field of a struct
+                Interval* interval = currentRefPosition->getInterval();
+                assert(currentRefPosition->refType == RefTypeUse ||
+                       currentRefPosition->registerAssignment == RBM_NONE || interval->isStructField);
+
+                // TODO-Review: Need to handle the case where any of the struct fields
+                // are reloaded/spilled at this use
+                assert(!interval->isStructField ||
+                       (currentRefPosition->reload == false && currentRefPosition->spillAfter == false));
+
+                if (interval->isLocalVar && !interval->isStructField)
+                {
+                    LclVarDsc* varDsc = interval->getLocalVar(compiler);
+
+                    // This must be a dead definition.  We need to mark the lclVar
+                    // so that it's not considered a candidate for lvRegister, as
+                    // this dead def will have to go to the stack.
+                    assert(currentRefPosition->refType == RefTypeDef);
+                    varDsc->lvRegNum = REG_STK;
+                }
+
+                JITDUMP("No tree node to write back to\n");
+                continue;
+            }
+
+            DBEXEC(VERBOSE, lsraDispNode(treeNode, LSRA_DUMP_REFPOS, true));
+            JITDUMP("\n");
+
+            LsraLocation loc = treeNode->gtLsraInfo.loc;
+            JITDUMP("curr = %u mapped = %u", currentLocation, loc);
+            assert(treeNode->IsLocal() || currentLocation == loc || currentLocation == loc + 1);
+
+            if (currentRefPosition->isIntervalRef() && currentRefPosition->getInterval()->isInternal)
+            {
+                JITDUMP(" internal");
+                GenTreePtr indNode = nullptr;
+                if (treeNode->OperIsIndir())
+                {
+                    indNode = treeNode;
+                    JITDUMP(" allocated at GT_IND");
+                }
+                if (indNode != nullptr)
+                {
+                    GenTreePtr addrNode = indNode->gtOp.gtOp1->gtEffectiveVal();
+                    if (addrNode->OperGet() != GT_ARR_ELEM)
+                    {
+                        addrNode->gtRsvdRegs |= currentRefPosition->registerAssignment;
+                        JITDUMP(", recorded on addr");
+                    }
+                }
+                if (treeNode->OperGet() == GT_ARR_ELEM)
+                {
+                    // TODO-Review: See WORKAROUND ALERT in buildRefPositionsForNode()
+                    GenTreePtr firstIndexTree = treeNode->gtArrElem.gtArrInds[0]->gtEffectiveVal();
+                    assert(firstIndexTree != nullptr);
+                    if (firstIndexTree->IsLocal() && (firstIndexTree->gtFlags & GTF_VAR_DEATH) == 0)
+                    {
+                        // Record the LAST internal interval
+                        // (Yes, this naively just records each one, but the next will replace it;
+                        // I'd fix this if it wasn't just a temporary fix)
+                        if (currentRefPosition->refType == RefTypeDef)
+                        {
+                            JITDUMP(" allocated at GT_ARR_ELEM, recorded on firstIndex V%02u");
+                            firstIndexTree->gtRsvdRegs = (regMaskSmall)currentRefPosition->registerAssignment;
+                        }
+                    }
+                }
+                treeNode->gtRsvdRegs |= currentRefPosition->registerAssignment;
+            }
+            else
+            {
+                writeRegisters(currentRefPosition, treeNode);
+
+                if (treeNode->IsLocal() && currentRefPosition->getInterval()->isLocalVar)
+                {
+                    resolveLocalRef(block, treeNode, currentRefPosition);
+                }
+
+                // Mark spill locations on temps
+                // (local vars are handled in resolveLocalRef, above)
+                // Note that the tree node will be changed from GTF_SPILL to GTF_SPILLED
+                // in codegen, taking care of the "reload" case for temps
+                else if (currentRefPosition->spillAfter || (currentRefPosition->nextRefPosition != nullptr &&
+                                                            currentRefPosition->nextRefPosition->moveReg))
+                {
+                    if (treeNode != nullptr && currentRefPosition->isIntervalRef())
+                    {
+                        if (currentRefPosition->spillAfter)
+                        {
+                            treeNode->gtFlags |= GTF_SPILL;
+
+                            // If this is a constant interval that is reusing a pre-existing value, we actually need
+                            // to generate the value at this point in order to spill it.
+                            if (treeNode->IsReuseRegVal())
+                            {
+                                treeNode->ResetReuseRegVal();
+                            }
+
+                            // In case of multi-reg call node, also set spill flag on the
+                            // register specified by multi-reg index of current RefPosition.
+                            // Note that the spill flag on treeNode indicates that one or
+                            // more its allocated registers are in that state.
+                            if (treeNode->IsMultiRegCall())
+                            {
+                                GenTreeCall* call = treeNode->AsCall();
+                                call->SetRegSpillFlagByIdx(GTF_SPILL, currentRefPosition->getMultiRegIdx());
+                            }
+                        }
+
+                        // If the value is reloaded or moved to a different register, we need to insert
+                        // a node to hold the register to which it should be reloaded
+                        RefPosition* nextRefPosition = currentRefPosition->nextRefPosition;
+                        assert(nextRefPosition != nullptr);
+                        if (INDEBUG(alwaysInsertReload() ||)
+                                nextRefPosition->assignedReg() != currentRefPosition->assignedReg())
+                        {
+                            if (nextRefPosition->assignedReg() != REG_NA)
+                            {
+                                insertCopyOrReload(block, treeNode, currentRefPosition->getMultiRegIdx(),
+                                                   nextRefPosition);
+                            }
+                            else
+                            {
+                                assert(nextRefPosition->AllocateIfProfitable());
+
+                                // In case of tree temps, if def is spilled and use didn't
+                                // get a register, set a flag on tree node to be treated as
+                                // contained at the point of its use.
+                                if (currentRefPosition->spillAfter && currentRefPosition->refType == RefTypeDef &&
+                                    nextRefPosition->refType == RefTypeUse)
+                                {
+                                    assert(nextRefPosition->treeNode == nullptr);
+                                    treeNode->gtFlags |= GTF_NOREG_AT_USE;
+                                }
+                            }
+                        }
+                    }
+
+                    // We should never have to "spill after" a temp use, since
+                    // they're single use
+                    else
+                    {
+                        unreached();
+                    }
+                }
+            }
+            JITDUMP("\n");
+        }
+
+        processBlockEndLocations(block);
+    }
+
+#ifdef DEBUG
+    if (VERBOSE)
+    {
+        printf("-----------------------\n");
+        printf("RESOLVING BB BOUNDARIES\n");
+        printf("-----------------------\n");
+
+        printf("Prior to Resolution\n");
+        foreach_block(compiler, block)
+        {
+            printf("\nBB%02u use def in out\n", block->bbNum);
+            dumpConvertedVarSet(compiler, block->bbVarUse);
+            printf("\n");
+            dumpConvertedVarSet(compiler, block->bbVarDef);
+            printf("\n");
+            dumpConvertedVarSet(compiler, block->bbLiveIn);
+            printf("\n");
+            dumpConvertedVarSet(compiler, block->bbLiveOut);
+            printf("\n");
+
+            dumpInVarToRegMap(block);
+            dumpOutVarToRegMap(block);
+        }
+
+        printf("\n\n");
+    }
+#endif // DEBUG
+
+    resolveEdges();
+
+    // Verify register assignments on variables
+    unsigned   lclNum;
+    LclVarDsc* varDsc;
+    for (lclNum = 0, varDsc = compiler->lvaTable; lclNum < compiler->lvaCount; lclNum++, varDsc++)
+    {
+        if (!isCandidateVar(varDsc))
+        {
+            varDsc->lvRegNum = REG_STK;
+        }
+        else
+        {
+            Interval* interval = getIntervalForLocalVar(lclNum);
+
+            // Determine initial position for parameters
+
+            if (varDsc->lvIsParam)
+            {
+                regMaskTP initialRegMask = interval->firstRefPosition->registerAssignment;
+                regNumber initialReg     = (initialRegMask == RBM_NONE || interval->firstRefPosition->spillAfter)
+                                           ? REG_STK
+                                           : genRegNumFromMask(initialRegMask);
+                regNumber sourceReg = (varDsc->lvIsRegArg) ? varDsc->lvArgReg : REG_STK;
+
+#ifdef _TARGET_ARM_
+                if (varTypeIsMultiReg(varDsc))
+                {
+                    // TODO-ARM-NYI: Map the hi/lo intervals back to lvRegNum and lvOtherReg (these should NYI before
+                    // this)
+                    assert(!"Multi-reg types not yet supported");
+                }
+                else
+#endif // _TARGET_ARM_
+                {
+                    varDsc->lvArgInitReg = initialReg;
+                    JITDUMP("  Set V%02u argument initial register to %s\n", lclNum, getRegName(initialReg));
+                }
+                if (!varDsc->lvIsRegArg)
+                {
+                    // stack arg
+                    if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+                    {
+                        if (sourceReg != initialReg)
+                        {
+                            // The code generator won't initialize struct
+                            // fields, so we have to do that if it's not already
+                            // where it belongs.
+                            assert(interval->isStructField);
+                            JITDUMP("  Move struct field param V%02u from %s to %s\n", lclNum, getRegName(sourceReg),
+                                    getRegName(initialReg));
+                            insertMove(insertionBlock, insertionPoint, lclNum, sourceReg, initialReg);
+                        }
+                    }
+                }
+            }
+
+            // If lvRegNum is REG_STK, that means that either no register
+            // was assigned, or (more likely) that the same register was not
+            // used for all references.  In that case, codegen gets the register
+            // from the tree node.
+            if (varDsc->lvRegNum == REG_STK || interval->isSpilled || interval->isSplit)
+            {
+                // For codegen purposes, we'll set lvRegNum to whatever register
+                // it's currently in as we go.
+                // However, we never mark an interval as lvRegister if it has either been spilled
+                // or split.
+                varDsc->lvRegister = false;
+
+                // Skip any dead defs or exposed uses
+                // (first use exposed will only occur when there is no explicit initialization)
+                RefPosition* firstRefPosition = interval->firstRefPosition;
+                while ((firstRefPosition != nullptr) && (firstRefPosition->refType == RefTypeExpUse))
+                {
+                    firstRefPosition = firstRefPosition->nextRefPosition;
+                }
+                if (firstRefPosition == nullptr)
+                {
+                    // Dead interval
+                    varDsc->lvLRACandidate = false;
+                    if (varDsc->lvRefCnt == 0)
+                    {
+                        varDsc->lvOnFrame = false;
+                    }
+                    else
+                    {
+                        // We may encounter cases where a lclVar actually has no references, but
+                        // a non-zero refCnt.  For safety (in case this is some "hidden" lclVar that we're
+                        // not correctly recognizing), we'll mark those as needing a stack location.
+                        // TODO-Cleanup: Make this an assert if/when we correct the refCnt
+                        // updating.
+                        varDsc->lvOnFrame = true;
+                    }
+                }
+                else
+                {
+                    // If the interval was not spilled, it doesn't need a stack location.
+                    if (!interval->isSpilled)
+                    {
+                        varDsc->lvOnFrame = false;
+                    }
+                    if (firstRefPosition->registerAssignment == RBM_NONE || firstRefPosition->spillAfter)
+                    {
+                        // Either this RefPosition is spilled, or it is not a "real" def or use
+                        assert(firstRefPosition->spillAfter ||
+                               (firstRefPosition->refType != RefTypeDef && firstRefPosition->refType != RefTypeUse));
+                        varDsc->lvRegNum = REG_STK;
+                    }
+                    else
+                    {
+                        varDsc->lvRegNum = firstRefPosition->assignedReg();
+                    }
+                }
+            }
+            else
+            {
+                {
+                    varDsc->lvRegister = true;
+                    varDsc->lvOnFrame  = false;
+                }
+#ifdef DEBUG
+                regMaskTP registerAssignment = genRegMask(varDsc->lvRegNum);
+                assert(!interval->isSpilled && !interval->isSplit);
+                RefPosition* refPosition = interval->firstRefPosition;
+                assert(refPosition != nullptr);
+
+                while (refPosition != nullptr)
+                {
+                    // All RefPositions must match, except for dead definitions,
+                    // copyReg/moveReg and RefTypeExpUse positions
+                    if (refPosition->registerAssignment != RBM_NONE && !refPosition->copyReg && !refPosition->moveReg &&
+                        refPosition->refType != RefTypeExpUse)
+                    {
+                        assert(refPosition->registerAssignment == registerAssignment);
+                    }
+                    refPosition = refPosition->nextRefPosition;
+                }
+#endif // DEBUG
+            }
+        }
+    }
+
+#ifdef DEBUG
+    if (VERBOSE)
+    {
+        printf("Trees after linear scan register allocator (LSRA)\n");
+        compiler->fgDispBasicBlocks(true);
+    }
+
+    verifyFinalAllocation();
+#endif // DEBUG
+
+    compiler->raMarkStkVars();
+    recordMaxSpill();
+
+    // TODO-CQ: Review this comment and address as needed.
+    // Change all unused promoted non-argument struct locals to a non-GC type (in this case TYP_INT)
+    // so that the gc tracking logic and lvMustInit logic will ignore them.
+    // Extract the code that does this from raAssignVars, and call it here.
+    // PRECONDITIONS: Ensure that lvPromoted is set on promoted structs, if and
+    // only if it is promoted on all paths.
+    // Call might be something like:
+    // compiler->BashUnusedStructLocals();
+}
+
+//
+//------------------------------------------------------------------------
+// insertMove: Insert a move of a lclVar with the given lclNum into the given block.
+//
+// Arguments:
+//    block          - the BasicBlock into which the move will be inserted.
+//    insertionPoint - the instruction before which to insert the move
+//    lclNum         - the lclNum of the var to be moved
+//    fromReg        - the register from which the var is moving
+//    toReg          - the register to which the var is moving
+//
+// Return Value:
+//    None.
+//
+// Notes:
+//    If insertionPoint is non-NULL, insert before that instruction;
+//    otherwise, insert "near" the end (prior to the branch, if any).
+//    If fromReg or toReg is REG_STK, then move from/to memory, respectively.
+
+void LinearScan::insertMove(
+    BasicBlock* block, GenTreePtr insertionPoint, unsigned lclNum, regNumber fromReg, regNumber toReg)
+{
+    LclVarDsc* varDsc = compiler->lvaTable + lclNum;
+    // One or both MUST be a register
+    assert(fromReg != REG_STK || toReg != REG_STK);
+    // They must not be the same register.
+    assert(fromReg != toReg);
+
+    // This var can't be marked lvRegister now
+    varDsc->lvRegNum = REG_STK;
+
+    var_types lclTyp = varDsc->TypeGet();
+    if (varDsc->lvNormalizeOnStore())
+    {
+        lclTyp = genActualType(lclTyp);
+    }
+    GenTreePtr src              = compiler->gtNewLclvNode(lclNum, lclTyp);
+    src->gtLsraInfo.isLsraAdded = true;
+    GenTreePtr top;
+
+    // If we are moving from STK to reg, mark the lclVar nodes with GTF_SPILLED
+    // Otherwise, if we are moving from reg to stack, mark it as GTF_SPILL
+    // Finally, for a reg-to-reg move, generate a GT_COPY
+
+    top = src;
+    if (fromReg == REG_STK)
+    {
+        src->gtFlags |= GTF_SPILLED;
+        src->gtRegNum = toReg;
+    }
+    else if (toReg == REG_STK)
+    {
+        src->gtFlags |= GTF_SPILL;
+        src->SetInReg();
+        src->gtRegNum = fromReg;
+    }
+    else
+    {
+        top = new (compiler, GT_COPY) GenTreeCopyOrReload(GT_COPY, varDsc->TypeGet(), src);
+        // This is the new home of the lclVar - indicate that by clearing the GTF_VAR_DEATH flag.
+        // Note that if src is itself a lastUse, this will have no effect.
+        top->gtFlags &= ~(GTF_VAR_DEATH);
+        src->gtRegNum = fromReg;
+        src->SetInReg();
+        top->gtRegNum                 = toReg;
+        src->gtNext                   = top;
+        top->gtPrev                   = src;
+        src->gtLsraInfo.isLocalDefUse = false;
+        top->gtLsraInfo.isLsraAdded   = true;
+    }
+    top->gtLsraInfo.isLocalDefUse = true;
+
+    LIR::Range  treeRange  = LIR::SeqTree(compiler, top);
+    LIR::Range& blockRange = LIR::AsRange(block);
+
+    if (insertionPoint != nullptr)
+    {
+        blockRange.InsertBefore(insertionPoint, std::move(treeRange));
+    }
+    else
+    {
+        // Put the copy at the bottom
+        // If there's a branch, make an embedded statement that executes just prior to the branch
+        if (block->bbJumpKind == BBJ_COND || block->bbJumpKind == BBJ_SWITCH)
+        {
+            noway_assert(!blockRange.IsEmpty());
+
+            GenTree* branch = blockRange.LastNode();
+            assert(branch->OperGet() == GT_JTRUE || branch->OperGet() == GT_SWITCH_TABLE ||
+                   branch->OperGet() == GT_SWITCH);
+
+            blockRange.InsertBefore(branch, std::move(treeRange));
+        }
+        else
+        {
+            assert(block->bbJumpKind == BBJ_NONE || block->bbJumpKind == BBJ_ALWAYS);
+            blockRange.InsertAtEnd(std::move(treeRange));
+        }
+    }
+}
+
+void LinearScan::insertSwap(
+    BasicBlock* block, GenTreePtr insertionPoint, unsigned lclNum1, regNumber reg1, unsigned lclNum2, regNumber reg2)
+{
+#ifdef DEBUG
+    if (VERBOSE)
+    {
+        const char* insertionPointString = "top";
+        if (insertionPoint == nullptr)
+        {
+            insertionPointString = "bottom";
+        }
+        printf("   BB%02u %s: swap V%02u in %s with V%02u in %s\n", block->bbNum, insertionPointString, lclNum1,
+               getRegName(reg1), lclNum2, getRegName(reg2));
+    }
+#endif // DEBUG
+
+    LclVarDsc* varDsc1 = compiler->lvaTable + lclNum1;
+    LclVarDsc* varDsc2 = compiler->lvaTable + lclNum2;
+    assert(reg1 != REG_STK && reg1 != REG_NA && reg2 != REG_STK && reg2 != REG_NA);
+
+    GenTreePtr lcl1                = compiler->gtNewLclvNode(lclNum1, varDsc1->TypeGet());
+    lcl1->gtLsraInfo.isLsraAdded   = true;
+    lcl1->gtLsraInfo.isLocalDefUse = false;
+    lcl1->SetInReg();
+    lcl1->gtRegNum = reg1;
+
+    GenTreePtr lcl2                = compiler->gtNewLclvNode(lclNum2, varDsc2->TypeGet());
+    lcl2->gtLsraInfo.isLsraAdded   = true;
+    lcl2->gtLsraInfo.isLocalDefUse = false;
+    lcl2->SetInReg();
+    lcl2->gtRegNum = reg2;
+
+    GenTreePtr swap                = compiler->gtNewOperNode(GT_SWAP, TYP_VOID, lcl1, lcl2);
+    swap->gtLsraInfo.isLsraAdded   = true;
+    swap->gtLsraInfo.isLocalDefUse = false;
+    swap->gtRegNum                 = REG_NA;
+
+    lcl1->gtNext = lcl2;
+    lcl2->gtPrev = lcl1;
+    lcl2->gtNext = swap;
+    swap->gtPrev = lcl2;
+
+    LIR::Range  swapRange  = LIR::SeqTree(compiler, swap);
+    LIR::Range& blockRange = LIR::AsRange(block);
+
+    if (insertionPoint != nullptr)
+    {
+        blockRange.InsertBefore(insertionPoint, std::move(swapRange));
+    }
+    else
+    {
+        // Put the copy at the bottom
+        // If there's a branch, make an embedded statement that executes just prior to the branch
+        if (block->bbJumpKind == BBJ_COND || block->bbJumpKind == BBJ_SWITCH)
+        {
+            noway_assert(!blockRange.IsEmpty());
+
+            GenTree* branch = blockRange.LastNode();
+            assert(branch->OperGet() == GT_JTRUE || branch->OperGet() == GT_SWITCH_TABLE ||
+                   branch->OperGet() == GT_SWITCH);
+
+            blockRange.InsertBefore(branch, std::move(swapRange));
+        }
+        else
+        {
+            assert(block->bbJumpKind == BBJ_NONE || block->bbJumpKind == BBJ_ALWAYS);
+            blockRange.InsertAtEnd(std::move(swapRange));
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// getTempRegForResolution: Get a free register to use for resolution code.
+//
+// Arguments:
+//    fromBlock - The "from" block on the edge being resolved.
+//    toBlock   - The "to"block on the edge
+//    type      - the type of register required
+//
+// Return Value:
+//    Returns a register that is free on the given edge, or REG_NA if none is available.
+//
+// Notes:
+//    It is up to the caller to check the return value, and to determine whether a register is
+//    available, and to handle that case appropriately.
+//    It is also up to the caller to cache the return value, as this is not cheap to compute.
+
+regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock, BasicBlock* toBlock, var_types type)
+{
+    // TODO-Throughput: This would be much more efficient if we add RegToVarMaps instead of VarToRegMaps
+    // and they would be more space-efficient as well.
+    VarToRegMap fromVarToRegMap = getOutVarToRegMap(fromBlock->bbNum);
+    VarToRegMap toVarToRegMap   = getInVarToRegMap(toBlock->bbNum);
+
+    regMaskTP freeRegs = allRegs(type);
+#ifdef DEBUG
+    if (getStressLimitRegs() == LSRA_LIMIT_SMALL_SET)
+    {
+        return REG_NA;
+    }
+#endif // DEBUG
+    INDEBUG(freeRegs = stressLimitRegs(nullptr, freeRegs));
+
+    // We are only interested in the variables that are live-in to the "to" block.
+    VARSET_ITER_INIT(compiler, iter, toBlock->bbLiveIn, varIndex);
+    while (iter.NextElem(compiler, &varIndex) && freeRegs != RBM_NONE)
+    {
+        regNumber fromReg = fromVarToRegMap[varIndex];
+        regNumber toReg   = toVarToRegMap[varIndex];
+        assert(fromReg != REG_NA && toReg != REG_NA);
+        if (fromReg != REG_STK)
+        {
+            freeRegs &= ~genRegMask(fromReg);
+        }
+        if (toReg != REG_STK)
+        {
+            freeRegs &= ~genRegMask(toReg);
+        }
+    }
+    if (freeRegs == RBM_NONE)
+    {
+        return REG_NA;
+    }
+    else
+    {
+        regNumber tempReg = genRegNumFromMask(genFindLowestBit(freeRegs));
+        return tempReg;
+    }
+}
+
+//------------------------------------------------------------------------
+// addResolution: Add a resolution move of the given interval
+//
+// Arguments:
+//    block          - the BasicBlock into which the move will be inserted.
+//    insertionPoint - the instruction before which to insert the move
+//    interval       - the interval of the var to be moved
+//    toReg          - the register to which the var is moving
+//    fromReg        - the register from which the var is moving
+//
+// Return Value:
+//    None.
+//
+// Notes:
+//    For joins, we insert at the bottom (indicated by an insertionPoint
+//    of nullptr), while for splits we insert at the top.
+//    This is because for joins 'block' is a pred of the join, while for splits it is a succ.
+//    For critical edges, this function may be called twice - once to move from
+//    the source (fromReg), if any, to the stack, in which case toReg will be
+//    REG_STK, and we insert at the bottom (leave insertionPoint as nullptr).
+//    The next time, we want to move from the stack to the destination (toReg),
+//    in which case fromReg will be REG_STK, and we insert at the top.
+
+void LinearScan::addResolution(
+    BasicBlock* block, GenTreePtr insertionPoint, Interval* interval, regNumber toReg, regNumber fromReg)
+{
+#ifdef DEBUG
+    const char* insertionPointString = "top";
+#endif // DEBUG
+    if (insertionPoint == nullptr)
+    {
+#ifdef DEBUG
+        insertionPointString = "bottom";
+#endif // DEBUG
+    }
+
+    JITDUMP("   BB%02u %s: move V%02u from ", block->bbNum, insertionPointString, interval->varNum);
+    JITDUMP("%s to %s", getRegName(fromReg), getRegName(toReg));
+
+    insertMove(block, insertionPoint, interval->varNum, fromReg, toReg);
+    if (fromReg == REG_STK || toReg == REG_STK)
+    {
+        interval->isSpilled = true;
+    }
+    else
+    {
+        interval->isSplit = true;
+    }
+}
+
+//------------------------------------------------------------------------
+// handleOutgoingCriticalEdges: Performs the necessary resolution on all critical edges that feed out of 'block'
+//
+// Arguments:
+//    block     - the block with outgoing critical edges.
+//
+// Return Value:
+//    None..
+//
+// Notes:
+//    For all outgoing critical edges (i.e. any successor of this block which is
+//    a join edge), if there are any conflicts, split the edge by adding a new block,
+//    and generate the resolution code into that block.
+
+void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block)
+{
+    VARSET_TP VARSET_INIT_NOCOPY(sameResolutionSet, VarSetOps::MakeEmpty(compiler));
+    VARSET_TP VARSET_INIT_NOCOPY(sameLivePathsSet, VarSetOps::MakeEmpty(compiler));
+    VARSET_TP VARSET_INIT_NOCOPY(singleTargetSet, VarSetOps::MakeEmpty(compiler));
+    VARSET_TP VARSET_INIT_NOCOPY(diffResolutionSet, VarSetOps::MakeEmpty(compiler));
+
+    // Get the outVarToRegMap for this block
+    VarToRegMap outVarToRegMap = getOutVarToRegMap(block->bbNum);
+    unsigned    succCount      = block->NumSucc(compiler);
+    assert(succCount > 1);
+    VarToRegMap firstSuccInVarToRegMap = nullptr;
+    BasicBlock* firstSucc              = nullptr;
+
+    // First, determine the live regs at the end of this block so that we know what regs are
+    // available to copy into.
+    regMaskTP liveOutRegs = RBM_NONE;
+    VARSET_ITER_INIT(compiler, iter1, block->bbLiveOut, varIndex1);
+    while (iter1.NextElem(compiler, &varIndex1))
+    {
+        unsigned  varNum  = compiler->lvaTrackedToVarNum[varIndex1];
+        regNumber fromReg = getVarReg(outVarToRegMap, varNum);
+        if (fromReg != REG_STK)
+        {
+            liveOutRegs |= genRegMask(fromReg);
+        }
+    }
+
+    // Next, if this blocks ends with a switch table, we have to make sure not to copy
+    // into the registers that it uses.
+    regMaskTP switchRegs = RBM_NONE;
+    if (block->bbJumpKind == BBJ_SWITCH)
+    {
+        // At this point, Lowering has transformed any non-switch-table blocks into
+        // cascading ifs.
+        GenTree* switchTable = LIR::AsRange(block).LastNode();
+        assert(switchTable != nullptr && switchTable->OperGet() == GT_SWITCH_TABLE);
+
+        switchRegs   = switchTable->gtRsvdRegs;
+        GenTree* op1 = switchTable->gtGetOp1();
+        GenTree* op2 = switchTable->gtGetOp2();
+        noway_assert(op1 != nullptr && op2 != nullptr);
+        assert(op1->gtRegNum != REG_NA && op2->gtRegNum != REG_NA);
+        switchRegs |= genRegMask(op1->gtRegNum);
+        switchRegs |= genRegMask(op2->gtRegNum);
+    }
+
+    VarToRegMap sameVarToRegMap = sharedCriticalVarToRegMap;
+    regMaskTP   sameWriteRegs   = RBM_NONE;
+    regMaskTP   diffReadRegs    = RBM_NONE;
+
+    // For each var, classify them as:
+    // - in the same register at the end of this block and at each target (no resolution needed)
+    // - in different registers at different targets (resolve separately):
+    //     diffResolutionSet
+    // - in the same register at each target at which it's live, but different from the end of
+    //   this block.  We may be able to resolve these as if it is "join", but only if they do not
+    //   write to any registers that are read by those in the diffResolutionSet:
+    //     sameResolutionSet
+
+    VARSET_ITER_INIT(compiler, iter, block->bbLiveOut, varIndex);
+    while (iter.NextElem(compiler, &varIndex))
+    {
+        unsigned  varNum              = compiler->lvaTrackedToVarNum[varIndex];
+        regNumber fromReg             = getVarReg(outVarToRegMap, varNum);
+        bool      isMatch             = true;
+        bool      isSame              = false;
+        bool      maybeSingleTarget   = false;
+        bool      maybeSameLivePaths  = false;
+        bool      liveOnlyAtSplitEdge = true;
+        regNumber sameToReg           = REG_NA;
+        for (unsigned succIndex = 0; succIndex < succCount; succIndex++)
+        {
+            BasicBlock* succBlock = block->GetSucc(succIndex, compiler);
+            if (!VarSetOps::IsMember(compiler, succBlock->bbLiveIn, varIndex))
+            {
+                maybeSameLivePaths = true;
+                continue;
+            }
+            else if (liveOnlyAtSplitEdge)
+            {
+                // Is the var live only at those target blocks which are connected by a split edge to this block
+                liveOnlyAtSplitEdge = ((succBlock->bbPreds->flNext == nullptr) && (succBlock != compiler->fgFirstBB));
+            }
+
+            regNumber toReg = getVarReg(getInVarToRegMap(succBlock->bbNum), varNum);
+            if (sameToReg == REG_NA)
+            {
+                sameToReg = toReg;
+                continue;
+            }
+            if (toReg == sameToReg)
+            {
+                continue;
+            }
+            sameToReg = REG_NA;
+            break;
+        }
+
+        // Check for the cases where we can't write to a register.
+        // We only need to check for these cases if sameToReg is an actual register (not REG_STK).
+        if (sameToReg != REG_NA && sameToReg != REG_STK)
+        {
+            // If there's a path on which this var isn't live, it may use the original value in sameToReg.
+            // In this case, sameToReg will be in the liveOutRegs of this block.
+            // Similarly, if sameToReg is in sameWriteRegs, it has already been used (i.e. for a lclVar that's
+            // live only at another target), and we can't copy another lclVar into that reg in this block.
+            regMaskTP sameToRegMask = genRegMask(sameToReg);
+            if (maybeSameLivePaths &&
+                (((sameToRegMask & liveOutRegs) != RBM_NONE) || ((sameToRegMask & sameWriteRegs) != RBM_NONE)))
+            {
+                sameToReg = REG_NA;
+            }
+            // If this register is used by a switch table at the end of the block, we can't do the copy
+            // in this block (since we can't insert it after the switch).
+            if ((sameToRegMask & switchRegs) != RBM_NONE)
+            {
+                sameToReg = REG_NA;
+            }
+
+            // If the var is live only at those blocks connected by a split edge and not live-in at some of the
+            // target blocks, we will resolve it the same way as if it were in diffResolutionSet and resolution
+            // will be deferred to the handling of split edges, which means copy will only be at those target(s).
+            //
+            // Another way to achieve similar resolution for vars live only at split edges is by removing them
+            // from consideration up-front but it requires that we traverse those edges anyway to account for
+            // the registers that must note be overwritten.
+            if (liveOnlyAtSplitEdge && maybeSameLivePaths)
+            {
+                sameToReg = REG_NA;
+            }
+        }
+
+        if (sameToReg == REG_NA)
+        {
+            VarSetOps::AddElemD(compiler, diffResolutionSet, varIndex);
+            if (fromReg != REG_STK)
+            {
+                diffReadRegs |= genRegMask(fromReg);
+            }
+        }
+        else if (sameToReg != fromReg)
+        {
+            VarSetOps::AddElemD(compiler, sameResolutionSet, varIndex);
+            sameVarToRegMap[varIndex] = sameToReg;
+            if (sameToReg != REG_STK)
+            {
+                sameWriteRegs |= genRegMask(sameToReg);
+            }
+        }
+    }
+
+    if (!VarSetOps::IsEmpty(compiler, sameResolutionSet))
+    {
+        if ((sameWriteRegs & diffReadRegs) != RBM_NONE)
+        {
+            // We cannot split the "same" and "diff" regs if the "same" set writes registers
+            // that must be read by the "diff" set.  (Note that when these are done as a "batch"
+            // we carefully order them to ensure all the input regs are read before they are
+            // overwritten.)
+            VarSetOps::UnionD(compiler, diffResolutionSet, sameResolutionSet);
+            VarSetOps::ClearD(compiler, sameResolutionSet);
+        }
+        else
+        {
+            // For any vars in the sameResolutionSet, we can simply add the move at the end of "block".
+            resolveEdge(block, nullptr, ResolveSharedCritical, sameResolutionSet);
+        }
+    }
+    if (!VarSetOps::IsEmpty(compiler, diffResolutionSet))
+    {
+        for (unsigned succIndex = 0; succIndex < succCount; succIndex++)
+        {
+            BasicBlock* succBlock = block->GetSucc(succIndex, compiler);
+
+            // Any "diffResolutionSet" resolution for a block with no other predecessors will be handled later
+            // as split resolution.
+            if ((succBlock->bbPreds->flNext == nullptr) && (succBlock != compiler->fgFirstBB))
+            {
+                continue;
+            }
+
+            // Now collect the resolution set for just this edge, if any.
+            // Check only the vars in diffResolutionSet that are live-in to this successor.
+            bool        needsResolution   = false;
+            VarToRegMap succInVarToRegMap = getInVarToRegMap(succBlock->bbNum);
+            VARSET_TP   VARSET_INIT_NOCOPY(edgeResolutionSet,
+                                         VarSetOps::Intersection(compiler, diffResolutionSet, succBlock->bbLiveIn));
+            VARSET_ITER_INIT(compiler, iter, edgeResolutionSet, varIndex);
+            while (iter.NextElem(compiler, &varIndex))
+            {
+                unsigned  varNum   = compiler->lvaTrackedToVarNum[varIndex];
+                Interval* interval = getIntervalForLocalVar(varNum);
+                regNumber fromReg  = getVarReg(outVarToRegMap, varNum);
+                regNumber toReg    = getVarReg(succInVarToRegMap, varNum);
+
+                if (fromReg == toReg)
+                {
+                    VarSetOps::RemoveElemD(compiler, edgeResolutionSet, varIndex);
+                }
+            }
+            if (!VarSetOps::IsEmpty(compiler, edgeResolutionSet))
+            {
+                resolveEdge(block, succBlock, ResolveCritical, edgeResolutionSet);
+            }
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// resolveEdges: Perform resolution across basic block edges
+//
+// Arguments:
+//    None.
+//
+// Return Value:
+//    None.
+//
+// Notes:
+//    Traverse the basic blocks.
+//    - If this block has a single predecessor that is not the immediately
+//      preceding block, perform any needed 'split' resolution at the beginning of this block
+//    - Otherwise if this block has critical incoming edges, handle them.
+//    - If this block has a single successor that has multiple predecesors, perform any needed
+//      'join' resolution at the end of this block.
+//    Note that a block may have both 'split' or 'critical' incoming edge(s) and 'join' outgoing
+//    edges.
+
+void LinearScan::resolveEdges()
+{
+    JITDUMP("RESOLVING EDGES\n");
+
+    BasicBlock *block, *prevBlock = nullptr;
+
+    // Handle all the critical edges first.
+    // We will try to avoid resolution across critical edges in cases where all the critical-edge
+    // targets of a block have the same home.  We will then split the edges only for the
+    // remaining mismatches.  We visit the out-edges, as that allows us to share the moves that are
+    // common among allt he targets.
+
+    foreach_block(compiler, block)
+    {
+        if (block->bbNum > bbNumMaxBeforeResolution)
+        {
+            // This is a new block added during resolution - we don't need to visit these now.
+            continue;
+        }
+        if (blockInfo[block->bbNum].hasCriticalOutEdge)
+        {
+            handleOutgoingCriticalEdges(block);
+        }
+        prevBlock = block;
+    }
+
+    prevBlock = nullptr;
+    foreach_block(compiler, block)
+    {
+        if (block->bbNum > bbNumMaxBeforeResolution)
+        {
+            // This is a new block added during resolution - we don't need to visit these now.
+            continue;
+        }
+
+        unsigned    succCount       = block->NumSucc(compiler);
+        flowList*   preds           = block->bbPreds;
+        BasicBlock* uniquePredBlock = block->GetUniquePred(compiler);
+
+        // First, if this block has a single predecessor,
+        // we may need resolution at the beginning of this block.
+        // This may be true even if it's the block we used for starting locations,
+        // if a variable was spilled.
+        if (!VarSetOps::IsEmpty(compiler, block->bbLiveIn))
+        {
+            if (uniquePredBlock != nullptr)
+            {
+                // We may have split edges during critical edge resolution, and in the process split
+                // a non-critical edge as well.
+                // It is unlikely that we would ever have more than one of these in sequence (indeed,
+                // I don't think it's possible), but there's no need to assume that it can't.
+                while (uniquePredBlock->bbNum > bbNumMaxBeforeResolution)
+                {
+                    uniquePredBlock = uniquePredBlock->GetUniquePred(compiler);
+                    noway_assert(uniquePredBlock != nullptr);
+                }
+                resolveEdge(uniquePredBlock, block, ResolveSplit, block->bbLiveIn);
+            }
+        }
+
+        // Finally, if this block has a single successor:
+        //  - and that has at least one other predecessor (otherwise we will do the resolution at the
+        //    top of the successor),
+        //  - and that is not the target of a critical edge (otherwise we've already handled it)
+        // we may need resolution at the end of this block.
+
+        if (succCount == 1)
+        {
+            BasicBlock* succBlock = block->GetSucc(0, compiler);
+            if (succBlock->GetUniquePred(compiler) == nullptr)
+            {
+                resolveEdge(block, succBlock, ResolveJoin, succBlock->bbLiveIn);
+            }
+        }
+    }
+
+    // Now, fixup the mapping for any blocks that were adding for edge splitting.
+    // See the comment prior to the call to fgSplitEdge() in resolveEdge().
+    // Note that we could fold this loop in with the checking code below, but that
+    // would only improve the debug case, and would clutter up the code somewhat.
+    if (compiler->fgBBNumMax > bbNumMaxBeforeResolution)
+    {
+        foreach_block(compiler, block)
+        {
+            if (block->bbNum > bbNumMaxBeforeResolution)
+            {
+                // There may be multiple blocks inserted when we split.  But we must always have exactly
+                // one path (i.e. all blocks must be single-successor and single-predecessor),
+                // and only one block along the path may be non-empty.
+                // Note that we may have a newly-inserted block that is empty, but which connects
+                // two non-resolution blocks. This happens when an edge is split that requires it.
+
+                BasicBlock* succBlock = block;
+                do
+                {
+                    succBlock = succBlock->GetUniqueSucc();
+                    noway_assert(succBlock != nullptr);
+                } while ((succBlock->bbNum > bbNumMaxBeforeResolution) && succBlock->isEmpty());
+
+                BasicBlock* predBlock = block;
+                do
+                {
+                    predBlock = predBlock->GetUniquePred(compiler);
+                    noway_assert(predBlock != nullptr);
+                } while ((predBlock->bbNum > bbNumMaxBeforeResolution) && predBlock->isEmpty());
+
+                unsigned succBBNum = succBlock->bbNum;
+                unsigned predBBNum = predBlock->bbNum;
+                if (block->isEmpty())
+                {
+                    // For the case of the empty block, find the non-resolution block (succ or pred).
+                    if (predBBNum > bbNumMaxBeforeResolution)
+                    {
+                        assert(succBBNum <= bbNumMaxBeforeResolution);
+                        predBBNum = 0;
+                    }
+                    else
+                    {
+                        succBBNum = 0;
+                    }
+                }
+                else
+                {
+                    assert((succBBNum <= bbNumMaxBeforeResolution) && (predBBNum <= bbNumMaxBeforeResolution));
+                }
+                SplitEdgeInfo info = {predBBNum, succBBNum};
+                getSplitBBNumToTargetBBNumMap()->Set(block->bbNum, info);
+            }
+        }
+    }
+
+#ifdef DEBUG
+    // Make sure the varToRegMaps match up on all edges.
+    bool foundMismatch = false;
+    foreach_block(compiler, block)
+    {
+        if (block->isEmpty() && block->bbNum > bbNumMaxBeforeResolution)
+        {
+            continue;
+        }
+        VarToRegMap toVarToRegMap = getInVarToRegMap(block->bbNum);
+        for (flowList* pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
+        {
+            BasicBlock* predBlock       = pred->flBlock;
+            VarToRegMap fromVarToRegMap = getOutVarToRegMap(predBlock->bbNum);
+            VARSET_ITER_INIT(compiler, iter, block->bbLiveIn, varIndex);
+            while (iter.NextElem(compiler, &varIndex))
+            {
+                unsigned  varNum  = compiler->lvaTrackedToVarNum[varIndex];
+                regNumber fromReg = getVarReg(fromVarToRegMap, varNum);
+                regNumber toReg   = getVarReg(toVarToRegMap, varNum);
+                if (fromReg != toReg)
+                {
+                    Interval* interval = getIntervalForLocalVar(varNum);
+                    if (!foundMismatch)
+                    {
+                        foundMismatch = true;
+                        printf("Found mismatched var locations after resolution!\n");
+                    }
+                    printf(" V%02u: BB%02u to BB%02u: ", varNum, predBlock->bbNum, block->bbNum);
+                    printf("%s to %s\n", getRegName(fromReg), getRegName(toReg));
+                }
+            }
+        }
+    }
+    assert(!foundMismatch);
+#endif
+    JITDUMP("\n");
+}
+
+//------------------------------------------------------------------------
+// resolveEdge: Perform the specified type of resolution between two blocks.
+//
+// Arguments:
+//    fromBlock     - the block from which the edge originates
+//    toBlock       - the block at which the edge terminates
+//    resolveType   - the type of resolution to be performed
+//    liveSet       - the set of tracked lclVar indices which may require resolution
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    The caller must have performed the analysis to determine the type of the edge.
+//
+// Notes:
+//    This method emits the correctly ordered moves necessary to place variables in the
+//    correct registers across a Split, Join or Critical edge.
+//    In order to avoid overwriting register values before they have been moved to their
+//    new home (register/stack), it first does the register-to-stack moves (to free those
+//    registers), then the register to register moves, ensuring that the target register
+//    is free before the move, and then finally the stack to register moves.
+
+void LinearScan::resolveEdge(BasicBlock*      fromBlock,
+                             BasicBlock*      toBlock,
+                             ResolveType      resolveType,
+                             VARSET_VALARG_TP liveSet)
+{
+    VarToRegMap fromVarToRegMap = getOutVarToRegMap(fromBlock->bbNum);
+    VarToRegMap toVarToRegMap;
+    if (resolveType == ResolveSharedCritical)
+    {
+        toVarToRegMap = sharedCriticalVarToRegMap;
+    }
+    else
+    {
+        toVarToRegMap = getInVarToRegMap(toBlock->bbNum);
+    }
+
+    // The block to which we add the resolution moves depends on the resolveType
+    BasicBlock* block;
+    switch (resolveType)
+    {
+        case ResolveJoin:
+        case ResolveSharedCritical:
+            block = fromBlock;
+            break;
+        case ResolveSplit:
+            block = toBlock;
+            break;
+        case ResolveCritical:
+            // fgSplitEdge may add one or two BasicBlocks.  It returns the block that splits
+            // the edge from 'fromBlock' and 'toBlock', but if it inserts that block right after
+            // a block with a fall-through it will have to create another block to handle that edge.
+            // These new blocks can be mapped to existing blocks in order to correctly handle
+            // the calls to recordVarLocationsAtStartOfBB() from codegen.  That mapping is handled
+            // in resolveEdges(), after all the edge resolution has been done (by calling this
+            // method for each edge).
+            block = compiler->fgSplitEdge(fromBlock, toBlock);
+            break;
+        default:
+            unreached();
+            break;
+    }
+
+#ifndef _TARGET_XARCH_
+    // We record tempregs for beginning and end of each block.
+    // For amd64/x86 we only need a tempReg for float - we'll use xchg for int.
+    // TODO-Throughput: It would be better to determine the tempRegs on demand, but the code below
+    // modifies the varToRegMaps so we don't have all the correct registers at the time
+    // we need to get the tempReg.
+    regNumber tempRegInt =
+        (resolveType == ResolveSharedCritical) ? REG_NA : getTempRegForResolution(fromBlock, toBlock, TYP_INT);
+#endif // !_TARGET_XARCH_
+    regNumber tempRegFlt = REG_NA;
+    if ((compiler->compFloatingPointUsed) && (resolveType != ResolveSharedCritical))
+    {
+        tempRegFlt = getTempRegForResolution(fromBlock, toBlock, TYP_FLOAT);
+    }
+
+    regMaskTP targetRegsToDo      = RBM_NONE;
+    regMaskTP targetRegsReady     = RBM_NONE;
+    regMaskTP targetRegsFromStack = RBM_NONE;
+
+    // The following arrays capture the location of the registers as they are moved:
+    // - location[reg] gives the current location of the var that was originally in 'reg'.
+    //   (Note that a var may be moved more than once.)
+    // - source[reg] gives the original location of the var that needs to be moved to 'reg'.
+    // For example, if a var is in rax and needs to be moved to rsi, then we would start with:
+    //   location[rax] == rax
+    //   source[rsi] == rax     -- this doesn't change
+    // Then, if for some reason we need to move it temporary to rbx, we would have:
+    //   location[rax] == rbx
+    // Once we have completed the move, we will have:
+    //   location[rax] == REG_NA
+    // This indicates that the var originally in rax is now in its target register.
+
+    regNumberSmall location[REG_COUNT];
+    C_ASSERT(sizeof(char) == sizeof(regNumberSmall)); // for memset to work
+    memset(location, REG_NA, REG_COUNT);
+    regNumberSmall source[REG_COUNT];
+    memset(source, REG_NA, REG_COUNT);
+
+    // What interval is this register associated with?
+    // (associated with incoming reg)
+    Interval* sourceIntervals[REG_COUNT] = {nullptr};
+
+    // Intervals for vars that need to be loaded from the stack
+    Interval* stackToRegIntervals[REG_COUNT] = {nullptr};
+
+    // Get the starting insertion point for the "to" resolution
+    GenTreePtr insertionPoint = nullptr;
+    if (resolveType == ResolveSplit || resolveType == ResolveCritical)
+    {
+        insertionPoint = LIR::AsRange(block).FirstNonPhiNode();
+    }
+
+    // First:
+    //   - Perform all moves from reg to stack (no ordering needed on these)
+    //   - For reg to reg moves, record the current location, associating their
+    //     source location with the target register they need to go into
+    //   - For stack to reg moves (done last, no ordering needed between them)
+    //     record the interval associated with the target reg
+    // TODO-Throughput: We should be looping over the liveIn and liveOut registers, since
+    // that will scale better than the live variables
+
+    VARSET_ITER_INIT(compiler, iter, liveSet, varIndex);
+    while (iter.NextElem(compiler, &varIndex))
+    {
+        unsigned  varNum    = compiler->lvaTrackedToVarNum[varIndex];
+        bool      isSpilled = false;
+        Interval* interval  = getIntervalForLocalVar(varNum);
+        regNumber fromReg   = getVarReg(fromVarToRegMap, varNum);
+        regNumber toReg     = getVarReg(toVarToRegMap, varNum);
+        if (fromReg == toReg)
+        {
+            continue;
+        }
+
+        // For Critical edges, the location will not change on either side of the edge,
+        // since we'll add a new block to do the move.
+        if (resolveType == ResolveSplit)
+        {
+            toVarToRegMap[varIndex] = fromReg;
+        }
+        else if (resolveType == ResolveJoin || resolveType == ResolveSharedCritical)
+        {
+            fromVarToRegMap[varIndex] = toReg;
+        }
+
+        assert(fromReg < UCHAR_MAX && toReg < UCHAR_MAX);
+
+        bool done = false;
+
+        if (fromReg != toReg)
+        {
+            if (fromReg == REG_STK)
+            {
+                stackToRegIntervals[toReg] = interval;
+                targetRegsFromStack |= genRegMask(toReg);
+            }
+            else if (toReg == REG_STK)
+            {
+                // Do the reg to stack moves now
+                addResolution(block, insertionPoint, interval, REG_STK, fromReg);
+                JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
+            }
+            else
+            {
+                location[fromReg]        = (regNumberSmall)fromReg;
+                source[toReg]            = (regNumberSmall)fromReg;
+                sourceIntervals[fromReg] = interval;
+                targetRegsToDo |= genRegMask(toReg);
+            }
+        }
+    }
+
+    // REGISTER to REGISTER MOVES
+
+    // First, find all the ones that are ready to move now
+    regMaskTP targetCandidates = targetRegsToDo;
+    while (targetCandidates != RBM_NONE)
+    {
+        regMaskTP targetRegMask = genFindLowestBit(targetCandidates);
+        targetCandidates &= ~targetRegMask;
+        regNumber targetReg = genRegNumFromMask(targetRegMask);
+        if (location[targetReg] == REG_NA)
+        {
+            targetRegsReady |= targetRegMask;
+        }
+    }
+
+    // Perform reg to reg moves
+    while (targetRegsToDo != RBM_NONE)
+    {
+        while (targetRegsReady != RBM_NONE)
+        {
+            regMaskTP targetRegMask = genFindLowestBit(targetRegsReady);
+            targetRegsToDo &= ~targetRegMask;
+            targetRegsReady &= ~targetRegMask;
+            regNumber targetReg = genRegNumFromMask(targetRegMask);
+            assert(location[targetReg] != targetReg);
+            regNumber sourceReg = (regNumber)source[targetReg];
+            regNumber fromReg   = (regNumber)location[sourceReg];
+            assert(fromReg < UCHAR_MAX && sourceReg < UCHAR_MAX);
+            Interval* interval = sourceIntervals[sourceReg];
+            assert(interval != nullptr);
+            addResolution(block, insertionPoint, interval, targetReg, fromReg);
+            JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
+            sourceIntervals[sourceReg] = nullptr;
+            location[sourceReg]        = REG_NA;
+
+            // Do we have a free targetReg?
+            if (fromReg == sourceReg && source[fromReg] != REG_NA)
+            {
+                regMaskTP fromRegMask = genRegMask(fromReg);
+                targetRegsReady |= fromRegMask;
+            }
+        }
+        if (targetRegsToDo != RBM_NONE)
+        {
+            regMaskTP targetRegMask = genFindLowestBit(targetRegsToDo);
+            regNumber targetReg     = genRegNumFromMask(targetRegMask);
+
+            // Is it already there due to other moves?
+            // If not, move it to the temp reg, OR swap it with another register
+            regNumber sourceReg = (regNumber)source[targetReg];
+            regNumber fromReg   = (regNumber)location[sourceReg];
+            if (targetReg == fromReg)
+            {
+                targetRegsToDo &= ~targetRegMask;
+            }
+            else
+            {
+                regNumber tempReg = REG_NA;
+                bool      useSwap = false;
+                if (emitter::isFloatReg(targetReg))
+                {
+                    tempReg = tempRegFlt;
+                }
+#ifdef _TARGET_XARCH_
+                else
+                {
+                    useSwap = true;
+                }
+#else  // !_TARGET_XARCH_
+                else
+                {
+                    tempReg = tempRegInt;
+                }
+#endif // !_TARGET_XARCH_
+                if (useSwap || tempReg == REG_NA)
+                {
+                    // First, we have to figure out the destination register for what's currently in fromReg,
+                    // so that we can find its sourceInterval.
+                    regNumber otherTargetReg = REG_NA;
+
+                    // By chance, is fromReg going where it belongs?
+                    if (location[source[fromReg]] == targetReg)
+                    {
+                        otherTargetReg = fromReg;
+                        // If we can swap, we will be done with otherTargetReg as well.
+                        // Otherwise, we'll spill it to the stack and reload it later.
+                        if (useSwap)
+                        {
+                            regMaskTP fromRegMask = genRegMask(fromReg);
+                            targetRegsToDo &= ~fromRegMask;
+                        }
+                    }
+                    else
+                    {
+                        // Look at the remaining registers from targetRegsToDo (which we expect to be relatively
+                        // small at this point) to find out what's currently in targetReg.
+                        regMaskTP mask = targetRegsToDo;
+                        while (mask != RBM_NONE && otherTargetReg == REG_NA)
+                        {
+                            regMaskTP nextRegMask = genFindLowestBit(mask);
+                            regNumber nextReg     = genRegNumFromMask(nextRegMask);
+                            mask &= ~nextRegMask;
+                            if (location[source[nextReg]] == targetReg)
+                            {
+                                otherTargetReg = nextReg;
+                            }
+                        }
+                    }
+                    assert(otherTargetReg != REG_NA);
+
+                    if (useSwap)
+                    {
+                        // Generate a "swap" of fromReg and targetReg
+                        insertSwap(block, insertionPoint, sourceIntervals[source[otherTargetReg]]->varNum, targetReg,
+                                   sourceIntervals[sourceReg]->varNum, fromReg);
+                        location[sourceReg]              = REG_NA;
+                        location[source[otherTargetReg]] = (regNumberSmall)fromReg;
+                    }
+                    else
+                    {
+                        // Spill "targetReg" to the stack and add its eventual target (otherTargetReg)
+                        // to "targetRegsFromStack", which will be handled below.
+                        // NOTE: This condition is very rare.  Setting COMPlus_JitStressRegs=0x203
+                        // has been known to trigger it in JIT SH.
+
+                        // First, spill "otherInterval" from targetReg to the stack.
+                        Interval* otherInterval = sourceIntervals[source[otherTargetReg]];
+                        addResolution(block, insertionPoint, otherInterval, REG_STK, targetReg);
+                        JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
+                        location[source[otherTargetReg]] = REG_STK;
+
+                        // Now, move the interval that is going to targetReg, and add its "fromReg" to
+                        // "targetRegsReady".
+                        addResolution(block, insertionPoint, sourceIntervals[sourceReg], targetReg, fromReg);
+                        JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
+                        location[sourceReg] = REG_NA;
+                        targetRegsReady |= genRegMask(fromReg);
+                    }
+                    targetRegsToDo &= ~targetRegMask;
+                }
+                else
+                {
+                    compiler->codeGen->regSet.rsSetRegsModified(genRegMask(tempReg) DEBUGARG(dumpTerse));
+                    assert(sourceIntervals[targetReg] != nullptr);
+                    addResolution(block, insertionPoint, sourceIntervals[targetReg], tempReg, targetReg);
+                    JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
+                    location[targetReg] = (regNumberSmall)tempReg;
+                    targetRegsReady |= targetRegMask;
+                }
+            }
+        }
+    }
+
+    // Finally, perform stack to reg moves
+    // All the target regs will be empty at this point
+    while (targetRegsFromStack != RBM_NONE)
+    {
+        regMaskTP targetRegMask = genFindLowestBit(targetRegsFromStack);
+        targetRegsFromStack &= ~targetRegMask;
+        regNumber targetReg = genRegNumFromMask(targetRegMask);
+
+        Interval* interval = stackToRegIntervals[targetReg];
+        assert(interval != nullptr);
+
+        addResolution(block, insertionPoint, interval, targetReg, REG_STK);
+        JITDUMP(" (%s)\n", resolveTypeName[resolveType]);
+    }
+}
+
+void TreeNodeInfo::Initialize(LinearScan* lsra, GenTree* node, LsraLocation location)
+{
+    regMaskTP dstCandidates;
+
+    // if there is a reg indicated on the tree node, use that for dstCandidates
+    // the exception is the NOP, which sometimes show up around late args.
+    // TODO-Cleanup: get rid of those NOPs.
+    if (node->gtRegNum == REG_NA || node->gtOper == GT_NOP)
+    {
+        dstCandidates = lsra->allRegs(node->TypeGet());
+    }
+    else
+    {
+        dstCandidates = genRegMask(node->gtRegNum);
+    }
+
+    internalIntCount      = 0;
+    internalFloatCount    = 0;
+    isLocalDefUse         = false;
+    isHelperCallWithKills = false;
+    isLsraAdded           = false;
+    definesAnyRegisters   = false;
+
+    setDstCandidates(lsra, dstCandidates);
+    srcCandsIndex = dstCandsIndex;
+
+    setInternalCandidates(lsra, lsra->allRegs(TYP_INT));
+
+    loc = location;
+#ifdef DEBUG
+    isInitialized = true;
+#endif
+
+    assert(IsValid(lsra));
+}
+
+regMaskTP TreeNodeInfo::getSrcCandidates(LinearScan* lsra)
+{
+    return lsra->GetRegMaskForIndex(srcCandsIndex);
+}
+
+void TreeNodeInfo::setSrcCandidates(LinearScan* lsra, regMaskTP mask)
+{
+    LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask);
+    assert(FitsIn<unsigned char>(i));
+    srcCandsIndex = (unsigned char)i;
+}
+
+regMaskTP TreeNodeInfo::getDstCandidates(LinearScan* lsra)
+{
+    return lsra->GetRegMaskForIndex(dstCandsIndex);
+}
+
+void TreeNodeInfo::setDstCandidates(LinearScan* lsra, regMaskTP mask)
+{
+    LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask);
+    assert(FitsIn<unsigned char>(i));
+    dstCandsIndex = (unsigned char)i;
+}
+
+regMaskTP TreeNodeInfo::getInternalCandidates(LinearScan* lsra)
+{
+    return lsra->GetRegMaskForIndex(internalCandsIndex);
+}
+
+void TreeNodeInfo::setInternalCandidates(LinearScan* lsra, regMaskTP mask)
+{
+    LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(mask);
+    assert(FitsIn<unsigned char>(i));
+    internalCandsIndex = (unsigned char)i;
+}
+
+void TreeNodeInfo::addInternalCandidates(LinearScan* lsra, regMaskTP mask)
+{
+    LinearScan::RegMaskIndex i = lsra->GetIndexForRegMask(lsra->GetRegMaskForIndex(internalCandsIndex) | mask);
+    assert(FitsIn<unsigned char>(i));
+    internalCandsIndex = (unsigned char)i;
+}
+
+#ifdef DEBUG
+void dumpRegMask(regMaskTP regs)
+{
+    if (regs == RBM_ALLINT)
+    {
+        printf("[allInt]");
+    }
+    else if (regs == (RBM_ALLINT & ~RBM_FPBASE))
+    {
+        printf("[allIntButFP]");
+    }
+    else if (regs == RBM_ALLFLOAT)
+    {
+        printf("[allFloat]");
+    }
+    else if (regs == RBM_ALLDOUBLE)
+    {
+        printf("[allDouble]");
+    }
+    else
+    {
+        dspRegMask(regs);
+    }
+}
+
+static const char* getRefTypeName(RefType refType)
+{
+    switch (refType)
+    {
+#define DEF_REFTYPE(memberName, memberValue, shortName)                                                                \
+    case memberName:                                                                                                   \
+        return #memberName;
+#include "lsra_reftypes.h"
+#undef DEF_REFTYPE
+        default:
+            return nullptr;
+    }
+}
+
+static const char* getRefTypeShortName(RefType refType)
+{
+    switch (refType)
+    {
+#define DEF_REFTYPE(memberName, memberValue, shortName)                                                                \
+    case memberName:                                                                                                   \
+        return shortName;
+#include "lsra_reftypes.h"
+#undef DEF_REFTYPE
+        default:
+            return nullptr;
+    }
+}
+
+void RefPosition::dump()
+{
+    printf("<RefPosition #%-3u @%-3u", rpNum, nodeLocation);
+
+    if (nextRefPosition)
+    {
+        printf(" ->#%-3u", nextRefPosition->rpNum);
+    }
+
+    printf(" %s ", getRefTypeName(refType));
+
+    if (this->isPhysRegRef)
+    {
+        this->getReg()->tinyDump();
+    }
+    else if (getInterval())
+    {
+        this->getInterval()->tinyDump();
+    }
+
+    if (this->treeNode)
+    {
+        printf("%s ", treeNode->OpName(treeNode->OperGet()));
+    }
+    printf("BB%02u ", this->bbNum);
+
+    printf("regmask=");
+    dumpRegMask(registerAssignment);
+
+    if (this->lastUse)
+    {
+        printf(" last");
+    }
+    if (this->reload)
+    {
+        printf(" reload");
+    }
+    if (this->spillAfter)
+    {
+        printf(" spillAfter");
+    }
+    if (this->moveReg)
+    {
+        printf(" move");
+    }
+    if (this->copyReg)
+    {
+        printf(" copy");
+    }
+    if (this->isFixedRegRef)
+    {
+        printf(" fixed");
+    }
+    if (this->isLocalDefUse)
+    {
+        printf(" local");
+    }
+    if (this->delayRegFree)
+    {
+        printf(" delay");
+    }
+    if (this->outOfOrder)
+    {
+        printf(" outOfOrder");
+    }
+    printf(">\n");
+}
+
+void RegRecord::dump()
+{
+    tinyDump();
+}
+
+void Interval::dump()
+{
+    printf("Interval %2u:", intervalIndex);
+
+    if (isLocalVar)
+    {
+        printf(" (V%02u)", varNum);
+    }
+    if (isInternal)
+    {
+        printf(" (INTERNAL)");
+    }
+    if (isSpilled)
+    {
+        printf(" (SPILLED)");
+    }
+    if (isSplit)
+    {
+        printf(" (SPLIT)");
+    }
+    if (isStructField)
+    {
+        printf(" (struct)");
+    }
+    if (isSpecialPutArg)
+    {
+        printf(" (specialPutArg)");
+    }
+    if (isConstant)
+    {
+        printf(" (constant)");
+    }
+
+    printf(" RefPositions {");
+    for (RefPosition* refPosition = this->firstRefPosition; refPosition != nullptr;
+         refPosition              = refPosition->nextRefPosition)
+    {
+        printf("#%u@%u", refPosition->rpNum, refPosition->nodeLocation);
+        if (refPosition->nextRefPosition)
+        {
+            printf(" ");
+        }
+    }
+    printf("}");
+
+    // this is not used (yet?)
+    // printf(" SpillOffset %d", this->spillOffset);
+
+    printf(" physReg:%s", getRegName(physReg));
+
+    printf(" Preferences=");
+    dumpRegMask(this->registerPreferences);
+
+    if (relatedInterval)
+    {
+        printf(" RelatedInterval ");
+        relatedInterval->microDump();
+        printf("[%p]", dspPtr(relatedInterval));
+    }
+
+    printf("\n");
+}
+
+// print out very concise representation
+void Interval::tinyDump()
+{
+    printf("<Ivl:%u", intervalIndex);
+    if (isLocalVar)
+    {
+        printf(" V%02u", varNum);
+    }
+    if (isInternal)
+    {
+        printf(" internal");
+    }
+    printf("> ");
+}
+
+// print out extremely concise representation
+void Interval::microDump()
+{
+    char intervalTypeChar = 'I';
+    if (isInternal)
+    {
+        intervalTypeChar = 'T';
+    }
+    else if (isLocalVar)
+    {
+        intervalTypeChar = 'L';
+    }
+
+    printf("<%c%u>", intervalTypeChar, intervalIndex);
+}
+
+void RegRecord::tinyDump()
+{
+    printf("<Reg:%-3s> ", getRegName(regNum));
+}
+
+void TreeNodeInfo::dump(LinearScan* lsra)
+{
+    printf("<TreeNodeInfo @ %2u %d=%d %di %df", loc, dstCount, srcCount, internalIntCount, internalFloatCount);
+    printf(" src=");
+    dumpRegMask(getSrcCandidates(lsra));
+    printf(" int=");
+    dumpRegMask(getInternalCandidates(lsra));
+    printf(" dst=");
+    dumpRegMask(getDstCandidates(lsra));
+    if (isLocalDefUse)
+    {
+        printf(" L");
+    }
+    if (isInitialized)
+    {
+        printf(" I");
+    }
+    if (isHelperCallWithKills)
+    {
+        printf(" H");
+    }
+    if (isLsraAdded)
+    {
+        printf(" A");
+    }
+    if (isDelayFree)
+    {
+        printf(" D");
+    }
+    if (isTgtPref)
+    {
+        printf(" P");
+    }
+    printf(">\n");
+}
+
+void LinearScan::lsraDumpIntervals(const char* msg)
+{
+    Interval* interval;
+
+    printf("\nLinear scan intervals %s:\n", msg);
+    for (auto& interval : intervals)
+    {
+        // only dump something if it has references
+        // if (interval->firstRefPosition)
+        interval.dump();
+    }
+
+    printf("\n");
+}
+
+// Dumps a tree node as a destination or source operand, with the style
+// of dump dependent on the mode
+void LinearScan::lsraGetOperandString(GenTreePtr        tree,
+                                      LsraTupleDumpMode mode,
+                                      char*             operandString,
+                                      unsigned          operandStringLength)
+{
+    const char* lastUseChar = "";
+    if ((tree->gtFlags & GTF_VAR_DEATH) != 0)
+    {
+        lastUseChar = "*";
+    }
+    switch (mode)
+    {
+        case LinearScan::LSRA_DUMP_PRE:
+            _snprintf_s(operandString, operandStringLength, operandStringLength, "t%d%s", tree->gtSeqNum, lastUseChar);
+            break;
+        case LinearScan::LSRA_DUMP_REFPOS:
+            _snprintf_s(operandString, operandStringLength, operandStringLength, "t%d%s", tree->gtSeqNum, lastUseChar);
+            break;
+        case LinearScan::LSRA_DUMP_POST:
+        {
+            Compiler* compiler = JitTls::GetCompiler();
+
+            if (!tree->gtHasReg())
+            {
+                _snprintf_s(operandString, operandStringLength, operandStringLength, "STK%s", lastUseChar);
+            }
+            else
+            {
+                _snprintf_s(operandString, operandStringLength, operandStringLength, "%s%s",
+                            getRegName(tree->gtRegNum, useFloatReg(tree->TypeGet())), lastUseChar);
+            }
+        }
+        break;
+        default:
+            printf("ERROR: INVALID TUPLE DUMP MODE\n");
+            break;
+    }
+}
+void LinearScan::lsraDispNode(GenTreePtr tree, LsraTupleDumpMode mode, bool hasDest)
+{
+    Compiler*      compiler            = JitTls::GetCompiler();
+    const unsigned operandStringLength = 16;
+    char           operandString[operandStringLength];
+    const char*    emptyDestOperand = "               ";
+    char           spillChar        = ' ';
+
+    if (mode == LinearScan::LSRA_DUMP_POST)
+    {
+        if ((tree->gtFlags & GTF_SPILL) != 0)
+        {
+            spillChar = 'S';
+        }
+        if (!hasDest && tree->gtHasReg())
+        {
+            // This can be true for the "localDefUse" case - defining a reg, but
+            // pushing it on the stack
+            assert(spillChar == ' ');
+            spillChar = '*';
+            hasDest   = true;
+        }
+    }
+    printf("%c N%03u. ", spillChar, tree->gtSeqNum);
+
+    LclVarDsc* varDsc = nullptr;
+    unsigned   varNum = UINT_MAX;
+    if (tree->IsLocal())
+    {
+        varNum = tree->gtLclVarCommon.gtLclNum;
+        varDsc = &(compiler->lvaTable[varNum]);
+        if (varDsc->lvLRACandidate)
+        {
+            hasDest = false;
+        }
+    }
+    if (hasDest)
+    {
+        if (mode == LinearScan::LSRA_DUMP_POST && tree->gtFlags & GTF_SPILLED)
+        {
+            assert(tree->gtHasReg());
+        }
+        lsraGetOperandString(tree, mode, operandString, operandStringLength);
+        printf("%-15s =", operandString);
+    }
+    else
+    {
+        printf("%-15s  ", emptyDestOperand);
+    }
+    if (varDsc != nullptr)
+    {
+        if (varDsc->lvLRACandidate)
+        {
+            if (mode == LSRA_DUMP_REFPOS)
+            {
+                printf("  V%02u(L%d)", varNum, getIntervalForLocalVar(varNum)->intervalIndex);
+            }
+            else
+            {
+                lsraGetOperandString(tree, mode, operandString, operandStringLength);
+                printf("  V%02u(%s)", varNum, operandString);
+                if (mode == LinearScan::LSRA_DUMP_POST && tree->gtFlags & GTF_SPILLED)
+                {
+                    printf("R");
+                }
+            }
+        }
+        else
+        {
+            printf("  V%02u MEM", varNum);
+        }
+    }
+    else if (tree->OperIsAssignment())
+    {
+        assert(!tree->gtHasReg());
+        const char* isRev = "";
+        if ((tree->gtFlags & GTF_REVERSE_OPS) != 0)
+        {
+            isRev = "(Rev)";
+        }
+        printf("  asg%s%s  ", GenTree::NodeName(tree->OperGet()), isRev);
+    }
+    else
+    {
+        compiler->gtDispNodeName(tree);
+        if ((tree->gtFlags & GTF_REVERSE_OPS) != 0)
+        {
+            printf("(Rev)");
+        }
+        if (tree->OperKind() & GTK_LEAF)
+        {
+            compiler->gtDispLeaf(tree, nullptr);
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// ComputeOperandDstCount: computes the number of registers defined by a
+//                         node.
+//
+// For most nodes, this is simple:
+// - Nodes that do not produce values (e.g. stores and other void-typed
+//   nodes) and nodes that immediately use the registers they define
+//   produce no registers
+// - Nodes that are marked as defining N registers define N registers.
+//
+// For contained nodes, however, things are more complicated: for purposes
+// of bookkeeping, a contained node is treated as producing the transitive
+// closure of the registers produced by its sources.
+//
+// Arguments:
+//    operand - The operand for which to compute a register count.
+//
+// Returns:
+//    The number of registers defined by `operand`.
+//
+void LinearScan::DumpOperandDefs(
+    GenTree* operand, bool& first, LsraTupleDumpMode mode, char* operandString, const unsigned operandStringLength)
+{
+    assert(operand != nullptr);
+    assert(operandString != nullptr);
+
+    if (ComputeOperandDstCount(operand) == 0)
+    {
+        return;
+    }
+
+    if (operand->gtLsraInfo.dstCount != 0)
+    {
+        // This operand directly produces registers; print it.
+        for (int i = 0; i < operand->gtLsraInfo.dstCount; i++)
+        {
+            if (!first)
+            {
+                printf(",");
+            }
+
+            lsraGetOperandString(operand, mode, operandString, operandStringLength);
+            printf("%s", operandString);
+
+            first = false;
+        }
+    }
+    else
+    {
+        // This is a contained node. Dump the defs produced by its operands.
+        for (GenTree* op : operand->Operands())
+        {
+            DumpOperandDefs(op, first, mode, operandString, operandStringLength);
+        }
+    }
+}
+
+void LinearScan::TupleStyleDump(LsraTupleDumpMode mode)
+{
+    BasicBlock*    block;
+    LsraLocation   currentLoc          = 1; // 0 is the entry
+    const unsigned operandStringLength = 16;
+    char           operandString[operandStringLength];
+
+    // currentRefPosition is not used for LSRA_DUMP_PRE
+    // We keep separate iterators for defs, so that we can print them
+    // on the lhs of the dump
+    auto currentRefPosition = refPositions.begin();
+
+    switch (mode)
+    {
+        case LSRA_DUMP_PRE:
+            printf("TUPLE STYLE DUMP BEFORE LSRA\n");
+            break;
+        case LSRA_DUMP_REFPOS:
+            printf("TUPLE STYLE DUMP WITH REF POSITIONS\n");
+            break;
+        case LSRA_DUMP_POST:
+            printf("TUPLE STYLE DUMP WITH REGISTER ASSIGNMENTS\n");
+            break;
+        default:
+            printf("ERROR: INVALID TUPLE DUMP MODE\n");
+            return;
+    }
+
+    if (mode != LSRA_DUMP_PRE)
+    {
+        printf("Incoming Parameters: ");
+        for (; currentRefPosition != refPositions.end() && currentRefPosition->refType != RefTypeBB;
+             ++currentRefPosition)
+        {
+            Interval* interval = currentRefPosition->getInterval();
+            assert(interval != nullptr && interval->isLocalVar);
+            printf(" V%02d", interval->varNum);
+            if (mode == LSRA_DUMP_POST)
+            {
+                regNumber reg;
+                if (currentRefPosition->registerAssignment == RBM_NONE)
+                {
+                    reg = REG_STK;
+                }
+                else
+                {
+                    reg = currentRefPosition->assignedReg();
+                }
+                LclVarDsc* varDsc = &(compiler->lvaTable[interval->varNum]);
+                printf("(");
+                regNumber assignedReg = varDsc->lvRegNum;
+                regNumber argReg      = (varDsc->lvIsRegArg) ? varDsc->lvArgReg : REG_STK;
+
+                assert(reg == assignedReg || varDsc->lvRegister == false);
+                if (reg != argReg)
+                {
+                    printf(getRegName(argReg, isFloatRegType(interval->registerType)));
+                    printf("=>");
+                }
+                printf("%s)", getRegName(reg, isFloatRegType(interval->registerType)));
+            }
+        }
+        printf("\n");
+    }
+
+    for (block = startBlockSequence(); block != nullptr; block = moveToNextBlock())
+    {
+        currentLoc += 2;
+
+        if (mode == LSRA_DUMP_REFPOS)
+        {
+            bool printedBlockHeader = false;
+            // We should find the boundary RefPositions in the order of exposed uses, dummy defs, and the blocks
+            for (; currentRefPosition != refPositions.end() &&
+                   (currentRefPosition->refType == RefTypeExpUse || currentRefPosition->refType == RefTypeDummyDef ||
+                    (currentRefPosition->refType == RefTypeBB && !printedBlockHeader));
+                 ++currentRefPosition)
+            {
+                Interval* interval = nullptr;
+                if (currentRefPosition->isIntervalRef())
+                {
+                    interval = currentRefPosition->getInterval();
+                }
+                switch (currentRefPosition->refType)
+                {
+                    case RefTypeExpUse:
+                        assert(interval != nullptr);
+                        assert(interval->isLocalVar);
+                        printf("  Exposed use of V%02u at #%d\n", interval->varNum, currentRefPosition->rpNum);
+                        break;
+                    case RefTypeDummyDef:
+                        assert(interval != nullptr);
+                        assert(interval->isLocalVar);
+                        printf("  Dummy def of V%02u at #%d\n", interval->varNum, currentRefPosition->rpNum);
+                        break;
+                    case RefTypeBB:
+                        block->dspBlockHeader(compiler);
+                        printedBlockHeader = true;
+                        printf("=====\n");
+                        break;
+                    default:
+                        printf("Unexpected RefPosition type at #%d\n", currentRefPosition->rpNum);
+                        break;
+                }
+            }
+        }
+        else
+        {
+            block->dspBlockHeader(compiler);
+            printf("=====\n");
+        }
+        if (mode == LSRA_DUMP_POST && block != compiler->fgFirstBB && block->bbNum <= bbNumMaxBeforeResolution)
+        {
+            printf("Predecessor for variable locations: BB%02u\n", blockInfo[block->bbNum].predBBNum);
+            dumpInVarToRegMap(block);
+        }
+        if (block->bbNum > bbNumMaxBeforeResolution)
+        {
+            SplitEdgeInfo splitEdgeInfo;
+            splitBBNumToTargetBBNumMap->Lookup(block->bbNum, &splitEdgeInfo);
+            assert(splitEdgeInfo.toBBNum <= bbNumMaxBeforeResolution);
+            assert(splitEdgeInfo.fromBBNum <= bbNumMaxBeforeResolution);
+            printf("New block introduced for resolution from BB%02u to BB%02u\n", splitEdgeInfo.fromBBNum,
+                   splitEdgeInfo.toBBNum);
+        }
+
+        for (GenTree* node : LIR::AsRange(block).NonPhiNodes())
+        {
+            GenTree* tree = node;
+
+            genTreeOps    oper = tree->OperGet();
+            TreeNodeInfo& info = tree->gtLsraInfo;
+            if (tree->gtLsraInfo.isLsraAdded)
+            {
+                // This must be one of the nodes that we add during LSRA
+
+                if (oper == GT_LCL_VAR)
+                {
+                    info.srcCount = 0;
+                    info.dstCount = 1;
+                }
+                else if (oper == GT_RELOAD || oper == GT_COPY)
+                {
+                    info.srcCount = 1;
+                    info.dstCount = 1;
+                }
+#ifdef FEATURE_SIMD
+                else if (oper == GT_SIMD)
+                {
+                    if (tree->gtSIMD.gtSIMDIntrinsicID == SIMDIntrinsicUpperSave)
+                    {
+                        info.srcCount = 1;
+                        info.dstCount = 1;
+                    }
+                    else
+                    {
+                        assert(tree->gtSIMD.gtSIMDIntrinsicID == SIMDIntrinsicUpperRestore);
+                        info.srcCount = 2;
+                        info.dstCount = 0;
+                    }
+                }
+#endif // FEATURE_SIMD
+                else
+                {
+                    assert(oper == GT_SWAP);
+                    info.srcCount = 2;
+                    info.dstCount = 0;
+                }
+                info.internalIntCount   = 0;
+                info.internalFloatCount = 0;
+            }
+
+            int       consume   = info.srcCount;
+            int       produce   = info.dstCount;
+            regMaskTP killMask  = RBM_NONE;
+            regMaskTP fixedMask = RBM_NONE;
+
+            lsraDispNode(tree, mode, produce != 0 && mode != LSRA_DUMP_REFPOS);
+
+            if (mode != LSRA_DUMP_REFPOS)
+            {
+                if (consume > 0)
+                {
+                    printf("; ");
+
+                    bool first = true;
+                    for (GenTree* operand : tree->Operands())
+                    {
+                        DumpOperandDefs(operand, first, mode, operandString, operandStringLength);
+                    }
+                }
+            }
+            else
+            {
+                // Print each RefPosition on a new line, but
+                // printing all the kills for each node on a single line
+                // and combining the fixed regs with their associated def or use
+                bool         killPrinted        = false;
+                RefPosition* lastFixedRegRefPos = nullptr;
+                for (; currentRefPosition != refPositions.end() &&
+                       (currentRefPosition->refType == RefTypeUse || currentRefPosition->refType == RefTypeFixedReg ||
+                        currentRefPosition->refType == RefTypeKill || currentRefPosition->refType == RefTypeDef) &&
+                       (currentRefPosition->nodeLocation == tree->gtSeqNum ||
+                        currentRefPosition->nodeLocation == tree->gtSeqNum + 1);
+                     ++currentRefPosition)
+                {
+                    Interval* interval = nullptr;
+                    if (currentRefPosition->isIntervalRef())
+                    {
+                        interval = currentRefPosition->getInterval();
+                    }
+                    switch (currentRefPosition->refType)
+                    {
+                        case RefTypeUse:
+                            if (currentRefPosition->isPhysRegRef)
+                            {
+                                printf("\n                               Use:R%d(#%d)",
+                                       currentRefPosition->getReg()->regNum, currentRefPosition->rpNum);
+                            }
+                            else
+                            {
+                                assert(interval != nullptr);
+                                printf("\n                               Use:");
+                                interval->microDump();
+                                printf("(#%d)", currentRefPosition->rpNum);
+                                if (currentRefPosition->isFixedRegRef)
+                                {
+                                    assert(genMaxOneBit(currentRefPosition->registerAssignment));
+                                    assert(lastFixedRegRefPos != nullptr);
+                                    printf(" Fixed:%s(#%d)", getRegName(currentRefPosition->assignedReg(),
+                                                                        isFloatRegType(interval->registerType)),
+                                           lastFixedRegRefPos->rpNum);
+                                    lastFixedRegRefPos = nullptr;
+                                }
+                                if (currentRefPosition->isLocalDefUse)
+                                {
+                                    printf(" LocalDefUse");
+                                }
+                                if (currentRefPosition->lastUse)
+                                {
+                                    printf(" *");
+                                }
+                            }
+                            break;
+                        case RefTypeDef:
+                        {
+                            // Print each def on a new line
+                            assert(interval != nullptr);
+                            printf("\n        Def:");
+                            interval->microDump();
+                            printf("(#%d)", currentRefPosition->rpNum);
+                            if (currentRefPosition->isFixedRegRef)
+                            {
+                                assert(genMaxOneBit(currentRefPosition->registerAssignment));
+                                printf(" %s", getRegName(currentRefPosition->assignedReg(),
+                                                         isFloatRegType(interval->registerType)));
+                            }
+                            if (currentRefPosition->isLocalDefUse)
+                            {
+                                printf(" LocalDefUse");
+                            }
+                            if (currentRefPosition->lastUse)
+                            {
+                                printf(" *");
+                            }
+                            if (interval->relatedInterval != nullptr)
+                            {
+                                printf(" Pref:");
+                                interval->relatedInterval->microDump();
+                            }
+                        }
+                        break;
+                        case RefTypeKill:
+                            if (!killPrinted)
+                            {
+                                printf("\n        Kill: ");
+                                killPrinted = true;
+                            }
+                            printf(getRegName(currentRefPosition->assignedReg(),
+                                              isFloatRegType(currentRefPosition->getReg()->registerType)));
+                            printf(" ");
+                            break;
+                        case RefTypeFixedReg:
+                            lastFixedRegRefPos = currentRefPosition;
+                            break;
+                        default:
+                            printf("Unexpected RefPosition type at #%d\n", currentRefPosition->rpNum);
+                            break;
+                    }
+                }
+            }
+            printf("\n");
+            if (info.internalIntCount != 0 && mode != LSRA_DUMP_REFPOS)
+            {
+                printf("\tinternal (%d):\t", info.internalIntCount);
+                if (mode == LSRA_DUMP_POST)
+                {
+                    dumpRegMask(tree->gtRsvdRegs);
+                }
+                else if ((info.getInternalCandidates(this) & allRegs(TYP_INT)) != allRegs(TYP_INT))
+                {
+                    dumpRegMask(info.getInternalCandidates(this) & allRegs(TYP_INT));
+                }
+                printf("\n");
+            }
+            if (info.internalFloatCount != 0 && mode != LSRA_DUMP_REFPOS)
+            {
+                printf("\tinternal (%d):\t", info.internalFloatCount);
+                if (mode == LSRA_DUMP_POST)
+                {
+                    dumpRegMask(tree->gtRsvdRegs);
+                }
+                else if ((info.getInternalCandidates(this) & allRegs(TYP_INT)) != allRegs(TYP_INT))
+                {
+                    dumpRegMask(info.getInternalCandidates(this) & allRegs(TYP_INT));
+                }
+                printf("\n");
+            }
+        }
+        if (mode == LSRA_DUMP_POST)
+        {
+            dumpOutVarToRegMap(block);
+        }
+        printf("\n");
+    }
+    printf("\n\n");
+}
+
+void LinearScan::dumpLsraAllocationEvent(LsraDumpEvent event,
+                                         Interval*     interval,
+                                         regNumber     reg,
+                                         BasicBlock*   currentBlock)
+{
+    if (!(VERBOSE))
+    {
+        return;
+    }
+    switch (event)
+    {
+        // Conflicting def/use
+        case LSRA_EVENT_DEFUSE_CONFLICT:
+            if (!dumpTerse)
+            {
+                printf("  Def and Use have conflicting register requirements:");
+            }
+            else
+            {
+                printf("DUconflict ");
+                dumpRegRecords();
+            }
+            break;
+        case LSRA_EVENT_DEFUSE_FIXED_DELAY_USE:
+            if (!dumpTerse)
+            {
+                printf(" Can't change useAssignment ");
+            }
+            break;
+        case LSRA_EVENT_DEFUSE_CASE1:
+            if (!dumpTerse)
+            {
+                printf(" case #1, use the defRegAssignment\n");
+            }
+            else
+            {
+                printf(indentFormat, " case #1 use defRegAssignment");
+                dumpRegRecords();
+                dumpEmptyRefPosition();
+            }
+            break;
+        case LSRA_EVENT_DEFUSE_CASE2:
+            if (!dumpTerse)
+            {
+                printf(" case #2, use the useRegAssignment\n");
+            }
+            else
+            {
+                printf(indentFormat, " case #2 use useRegAssignment");
+                dumpRegRecords();
+                dumpEmptyRefPosition();
+            }
+            break;
+        case LSRA_EVENT_DEFUSE_CASE3:
+            if (!dumpTerse)
+            {
+                printf(" case #3, change the defRegAssignment to the use regs\n");
+            }
+            else
+            {
+                printf(indentFormat, " case #3 use useRegAssignment");
+                dumpRegRecords();
+                dumpEmptyRefPosition();
+            }
+            break;
+        case LSRA_EVENT_DEFUSE_CASE4:
+            if (!dumpTerse)
+            {
+                printf(" case #4, change the useRegAssignment to the def regs\n");
+            }
+            else
+            {
+                printf(indentFormat, " case #4 use defRegAssignment");
+                dumpRegRecords();
+                dumpEmptyRefPosition();
+            }
+            break;
+        case LSRA_EVENT_DEFUSE_CASE5:
+            if (!dumpTerse)
+            {
+                printf(" case #5, Conflicting Def and Use single-register requirements require copies - set def to all "
+                       "regs of the appropriate type\n");
+            }
+            else
+            {
+                printf(indentFormat, " case #5 set def to all regs");
+                dumpRegRecords();
+                dumpEmptyRefPosition();
+            }
+            break;
+        case LSRA_EVENT_DEFUSE_CASE6:
+            if (!dumpTerse)
+            {
+                printf(" case #6, Conflicting Def and Use register requirements require a copy\n");
+            }
+            else
+            {
+                printf(indentFormat, " case #6 need a copy");
+                dumpRegRecords();
+                dumpEmptyRefPosition();
+            }
+            break;
+
+        case LSRA_EVENT_SPILL:
+            if (!dumpTerse)
+            {
+                printf("Spilled:\n");
+                interval->dump();
+            }
+            else
+            {
+                assert(interval != nullptr && interval->assignedReg != nullptr);
+                printf("Spill %-4s ", getRegName(interval->assignedReg->regNum));
+                dumpRegRecords();
+                dumpEmptyRefPosition();
+            }
+            break;
+        case LSRA_EVENT_SPILL_EXTENDED_LIFETIME:
+            if (!dumpTerse)
+            {
+                printf("  Spilled extended lifetime var V%02u at last use; not marked for actual spill.",
+                       interval->intervalIndex);
+            }
+            break;
+
+        // Restoring the previous register
+        case LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL_AFTER_SPILL:
+            assert(interval != nullptr);
+            if (!dumpTerse)
+            {
+                printf("  Assign register %s to previous interval Ivl:%d after spill\n", getRegName(reg),
+                       interval->intervalIndex);
+            }
+            else
+            {
+                // If we spilled, then the dump is already pre-indented, but we need to pre-indent for the subsequent
+                // allocation
+                // with a dumpEmptyRefPosition().
+                printf("SRstr %-4s ", getRegName(reg));
+                dumpRegRecords();
+                dumpEmptyRefPosition();
+            }
+            break;
+        case LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL:
+            assert(interval != nullptr);
+            if (!dumpTerse)
+            {
+                printf("  Assign register %s to previous interval Ivl:%d\n", getRegName(reg), interval->intervalIndex);
+            }
+            else
+            {
+                if (activeRefPosition == nullptr)
+                {
+                    printf(emptyRefPositionFormat, "");
+                }
+                printf("Restr %-4s ", getRegName(reg));
+                dumpRegRecords();
+                if (activeRefPosition != nullptr)
+                {
+                    printf(emptyRefPositionFormat, "");
+                }
+            }
+            break;
+
+        // Done with GC Kills
+        case LSRA_EVENT_DONE_KILL_GC_REFS:
+            printf("DoneKillGC ");
+            break;
+
+        // Block boundaries
+        case LSRA_EVENT_START_BB:
+            assert(currentBlock != nullptr);
+            if (!dumpTerse)
+            {
+                printf("\n\n  Live Vars(Regs) at start of BB%02u (from pred BB%02u):", currentBlock->bbNum,
+                       blockInfo[currentBlock->bbNum].predBBNum);
+                dumpVarToRegMap(inVarToRegMaps[currentBlock->bbNum]);
+            }
+            break;
+        case LSRA_EVENT_END_BB:
+            if (!dumpTerse)
+            {
+                printf("\n\n  Live Vars(Regs) after BB%02u:", currentBlock->bbNum);
+                dumpVarToRegMap(outVarToRegMaps[currentBlock->bbNum]);
+            }
+            break;
+
+        case LSRA_EVENT_FREE_REGS:
+            if (!dumpTerse)
+            {
+                printf("Freeing registers:\n");
+            }
+            break;
+
+        // Characteristics of the current RefPosition
+        case LSRA_EVENT_INCREMENT_RANGE_END:
+            if (!dumpTerse)
+            {
+                printf("  Incrementing nextPhysRegLocation for %s\n", getRegName(reg));
+            }
+            // else ???
+            break;
+        case LSRA_EVENT_LAST_USE:
+            if (!dumpTerse)
+            {
+                printf("    Last use, marked to be freed\n");
+            }
+            break;
+        case LSRA_EVENT_LAST_USE_DELAYED:
+            if (!dumpTerse)
+            {
+                printf("    Last use, marked to be freed (delayed)\n");
+            }
+            break;
+        case LSRA_EVENT_NEEDS_NEW_REG:
+            if (!dumpTerse)
+            {
+                printf("    Needs new register; mark %s to be freed\n", getRegName(reg));
+            }
+            else
+            {
+                printf("Free  %-4s ", getRegName(reg));
+                dumpRegRecords();
+                dumpEmptyRefPosition();
+            }
+            break;
+
+        // Allocation decisions
+        case LSRA_EVENT_FIXED_REG:
+        case LSRA_EVENT_EXP_USE:
+            if (!dumpTerse)
+            {
+                printf("No allocation\n");
+            }
+            else
+            {
+                printf("Keep  %-4s ", getRegName(reg));
+            }
+            break;
+        case LSRA_EVENT_ZERO_REF:
+            assert(interval != nullptr && interval->isLocalVar);
+            if (!dumpTerse)
+            {
+                printf("Marking V%02u as last use there are no actual references\n", interval->varNum);
+            }
+            else
+            {
+                printf("NoRef      ");
+                dumpRegRecords();
+                dumpEmptyRefPosition();
+            }
+            break;
+        case LSRA_EVENT_KEPT_ALLOCATION:
+            if (!dumpTerse)
+            {
+                printf("already allocated %4s\n", getRegName(reg));
+            }
+            else
+            {
+                printf("Keep  %-4s ", getRegName(reg));
+            }
+            break;
+        case LSRA_EVENT_COPY_REG:
+            assert(interval != nullptr && interval->recentRefPosition != nullptr);
+            if (!dumpTerse)
+            {
+                printf("allocated %s as copyReg\n\n", getRegName(reg));
+            }
+            else
+            {
+                printf("Copy  %-4s ", getRegName(reg));
+            }
+            break;
+        case LSRA_EVENT_MOVE_REG:
+            assert(interval != nullptr && interval->recentRefPosition != nullptr);
+            if (!dumpTerse)
+            {
+                printf("  needs a new register; marked as moveReg\n");
+            }
+            else
+            {
+                printf("Move  %-4s ", getRegName(reg));
+                dumpRegRecords();
+                dumpEmptyRefPosition();
+            }
+            break;
+        case LSRA_EVENT_ALLOC_REG:
+            if (!dumpTerse)
+            {
+                printf("allocated %s\n", getRegName(reg));
+            }
+            else
+            {
+                printf("Alloc %-4s ", getRegName(reg));
+            }
+            break;
+        case LSRA_EVENT_REUSE_REG:
+            if (!dumpTerse)
+            {
+                printf("reused constant in %s\n", getRegName(reg));
+            }
+            else
+            {
+                printf("Reuse %-4s ", getRegName(reg));
+            }
+            break;
+        case LSRA_EVENT_ALLOC_SPILLED_REG:
+            if (!dumpTerse)
+            {
+                printf("allocated spilled register %s\n", getRegName(reg));
+            }
+            else
+            {
+                printf("Steal %-4s ", getRegName(reg));
+            }
+            break;
+        case LSRA_EVENT_NO_ENTRY_REG_ALLOCATED:
+            assert(interval != nullptr && interval->isLocalVar);
+            if (!dumpTerse)
+            {
+                printf("Not allocating an entry register for V%02u due to low ref count\n", interval->varNum);
+            }
+            else
+            {
+                printf("LoRef      ");
+            }
+            break;
+        case LSRA_EVENT_NO_REG_ALLOCATED:
+            if (!dumpTerse)
+            {
+                printf("no register allocated\n");
+            }
+            else
+            {
+                printf("NoReg      ");
+            }
+            break;
+        case LSRA_EVENT_RELOAD:
+            if (!dumpTerse)
+            {
+                printf("    Marked for reload\n");
+            }
+            else
+            {
+                printf("ReLod %-4s ", getRegName(reg));
+                dumpRegRecords();
+                dumpEmptyRefPosition();
+            }
+            break;
+        case LSRA_EVENT_SPECIAL_PUTARG:
+            if (!dumpTerse)
+            {
+                printf("    Special case of putArg - using lclVar that's in the expected reg\n");
+            }
+            else
+            {
+                printf("PtArg %-4s ", getRegName(reg));
+            }
+            break;
+        default:
+            break;
+    }
+}
+
+//------------------------------------------------------------------------
+// dumpRegRecordHeader: Dump the header for a column-based dump of the register state.
+//
+// Arguments:
+//    None.
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    Reg names fit in 4 characters (minimum width of the columns)
+//
+// Notes:
+//    In order to make the table as dense as possible (for ease of reading the dumps),
+//    we determine the minimum regColumnWidth width required to represent:
+//      regs, by name (e.g. eax or xmm0) - this is fixed at 4 characters.
+//      intervals, as Vnn for lclVar intervals, or as I<num> for other intervals.
+//    The table is indented by the amount needed for dumpRefPositionShort, which is
+//    captured in shortRefPositionDumpWidth.
+//
+void LinearScan::dumpRegRecordHeader()
+{
+    printf("The following table has one or more rows for each RefPosition that is handled during allocation.\n"
+           "The first column provides the basic information about the RefPosition, with its type (e.g. Def,\n"
+           "Use, Fixd) followed by a '*' if it is a last use, and a 'D' if it is delayRegFree, and then the\n"
+           "action taken during allocation (e.g. Alloc a new register, or Keep an existing one).\n"
+           "The subsequent columns show the Interval occupying each register, if any, followed by 'a' if it is\n"
+           "active, and 'i'if it is inactive.  Columns are only printed up to the last modifed register, which\n"
+           "may increase during allocation, in which case additional columns will appear.  Registers which are\n"
+           "not marked modified have ---- in their column.\n\n");
+
+    // First, determine the width of each register column (which holds a reg name in the
+    // header, and an interval name in each subsequent row).
+    int intervalNumberWidth = (int)log10((double)intervals.size()) + 1;
+    // The regColumnWidth includes the identifying character (I or V) and an 'i' or 'a' (inactive or active)
+    regColumnWidth = intervalNumberWidth + 2;
+    if (regColumnWidth < 4)
+    {
+        regColumnWidth = 4;
+    }
+    sprintf_s(intervalNameFormat, MAX_FORMAT_CHARS, "%%c%%-%dd", regColumnWidth - 2);
+    sprintf_s(regNameFormat, MAX_FORMAT_CHARS, "%%-%ds", regColumnWidth);
+
+    // Next, determine the width of the short RefPosition (see dumpRefPositionShort()).
+    // This is in the form:
+    // nnn.#mmm NAME TYPEld
+    // Where:
+    //    nnn is the Location, right-justified to the width needed for the highest location.
+    //    mmm is the RefPosition rpNum, left-justified to the width needed for the highest rpNum.
+    //    NAME is dumped by dumpReferentName(), and is "regColumnWidth".
+    //    TYPE is RefTypeNameShort, and is 4 characters
+    //    l is either '*' (if a last use) or ' ' (otherwise)
+    //    d is either 'D' (if a delayed use) or ' ' (otherwise)
+
+    maxNodeLocation = (maxNodeLocation == 0)
+                          ? 1
+                          : maxNodeLocation; // corner case of a method with an infinite loop without any gentree nodes
+    assert(maxNodeLocation >= 1);
+    assert(refPositions.size() >= 1);
+    int nodeLocationWidth         = (int)log10((double)maxNodeLocation) + 1;
+    int refPositionWidth          = (int)log10((double)refPositions.size()) + 1;
+    int refTypeInfoWidth          = 4 /*TYPE*/ + 2 /* last-use and delayed */ + 1 /* space */;
+    int locationAndRPNumWidth     = nodeLocationWidth + 2 /* .# */ + refPositionWidth + 1 /* space */;
+    int shortRefPositionDumpWidth = locationAndRPNumWidth + regColumnWidth + 1 /* space */ + refTypeInfoWidth;
+    sprintf_s(shortRefPositionFormat, MAX_FORMAT_CHARS, "%%%dd.#%%-%dd ", nodeLocationWidth, refPositionWidth);
+    sprintf_s(emptyRefPositionFormat, MAX_FORMAT_CHARS, "%%-%ds", shortRefPositionDumpWidth);
+
+    // The width of the "allocation info"
+    //  - a 5-character allocation decision
+    //  - a space
+    //  - a 4-character register
+    //  - a space
+    int allocationInfoWidth = 5 + 1 + 4 + 1;
+
+    // Next, determine the width of the legend for each row.  This includes:
+    //  - a short RefPosition dump (shortRefPositionDumpWidth), which includes a space
+    //  - the allocation info (allocationInfoWidth), which also includes a space
+
+    regTableIndent = shortRefPositionDumpWidth + allocationInfoWidth;
+
+    // BBnn printed left-justified in the NAME Typeld and allocationInfo space.
+    int bbDumpWidth = regColumnWidth + 1 + refTypeInfoWidth + allocationInfoWidth;
+    int bbNumWidth  = (int)log10((double)compiler->fgBBNumMax) + 1;
+    // In the unlikely event that BB numbers overflow the space, we'll simply omit the predBB
+    int predBBNumDumpSpace = regTableIndent - locationAndRPNumWidth - bbNumWidth - 9; // 'BB' + ' PredBB'
+    if (predBBNumDumpSpace < bbNumWidth)
+    {
+        sprintf_s(bbRefPosFormat, MAX_LEGEND_FORMAT_CHARS, "BB%%-%dd", shortRefPositionDumpWidth - 2);
+    }
+    else
+    {
+        sprintf_s(bbRefPosFormat, MAX_LEGEND_FORMAT_CHARS, "BB%%-%dd PredBB%%-%dd", bbNumWidth, predBBNumDumpSpace);
+    }
+
+    if (compiler->shouldDumpASCIITrees())
+    {
+        columnSeparator = "|";
+        line            = "-";
+        leftBox         = "+";
+        middleBox       = "+";
+        rightBox        = "+";
+    }
+    else
+    {
+        columnSeparator = "\xe2\x94\x82";
+        line            = "\xe2\x94\x80";
+        leftBox         = "\xe2\x94\x9c";
+        middleBox       = "\xe2\x94\xbc";
+        rightBox        = "\xe2\x94\xa4";
+    }
+    sprintf_s(indentFormat, MAX_FORMAT_CHARS, "%%-%ds", regTableIndent);
+
+    // Now, set up the legend format for the RefPosition info
+    sprintf_s(legendFormat, MAX_LEGEND_FORMAT_CHARS, "%%-%d.%ds%%-%d.%ds%%-%ds%%s", nodeLocationWidth + 1,
+              nodeLocationWidth + 1, refPositionWidth + 2, refPositionWidth + 2, regColumnWidth + 1);
+
+    // Finally, print a "title row" including the legend and the reg names
+    dumpRegRecordTitle();
+}
+
+int LinearScan::getLastUsedRegNumIndex()
+{
+    int       lastUsedRegNumIndex = 0;
+    regMaskTP usedRegsMask        = compiler->codeGen->regSet.rsGetModifiedRegsMask();
+    int       lastRegNumIndex     = compiler->compFloatingPointUsed ? REG_FP_LAST : REG_INT_LAST;
+    for (int regNumIndex = 0; regNumIndex <= lastRegNumIndex; regNumIndex++)
+    {
+        if ((usedRegsMask & genRegMask((regNumber)regNumIndex)) != 0)
+        {
+            lastUsedRegNumIndex = regNumIndex;
+        }
+    }
+    return lastUsedRegNumIndex;
+}
+
+void LinearScan::dumpRegRecordTitleLines()
+{
+    for (int i = 0; i < regTableIndent; i++)
+    {
+        printf("%s", line);
+    }
+    int lastUsedRegNumIndex = getLastUsedRegNumIndex();
+    for (int regNumIndex = 0; regNumIndex <= lastUsedRegNumIndex; regNumIndex++)
+    {
+        printf("%s", middleBox);
+        for (int i = 0; i < regColumnWidth; i++)
+        {
+            printf("%s", line);
+        }
+    }
+    printf("%s\n", rightBox);
+}
+void LinearScan::dumpRegRecordTitle()
+{
+    dumpRegRecordTitleLines();
+
+    // Print out the legend for the RefPosition info
+    printf(legendFormat, "Loc ", "RP# ", "Name ", "Type  Action Reg  ");
+
+    // Print out the register name column headers
+    char columnFormatArray[MAX_FORMAT_CHARS];
+    sprintf_s(columnFormatArray, MAX_FORMAT_CHARS, "%s%%-%d.%ds", columnSeparator, regColumnWidth, regColumnWidth);
+    int lastUsedRegNumIndex = getLastUsedRegNumIndex();
+    for (int regNumIndex = 0; regNumIndex <= lastUsedRegNumIndex; regNumIndex++)
+    {
+        regNumber   regNum  = (regNumber)regNumIndex;
+        const char* regName = getRegName(regNum);
+        printf(columnFormatArray, regName);
+    }
+    printf("%s\n", columnSeparator);
+
+    rowCountSinceLastTitle = 0;
+
+    dumpRegRecordTitleLines();
+}
+
+void LinearScan::dumpRegRecords()
+{
+    static char columnFormatArray[18];
+    int         lastUsedRegNumIndex = getLastUsedRegNumIndex();
+    regMaskTP   usedRegsMask        = compiler->codeGen->regSet.rsGetModifiedRegsMask();
+
+    for (int regNumIndex = 0; regNumIndex <= lastUsedRegNumIndex; regNumIndex++)
+    {
+        printf("%s", columnSeparator);
+        RegRecord& regRecord = physRegs[regNumIndex];
+        Interval*  interval  = regRecord.assignedInterval;
+        if (interval != nullptr)
+        {
+            dumpIntervalName(interval);
+            char activeChar = interval->isActive ? 'a' : 'i';
+            printf("%c", activeChar);
+        }
+        else if (regRecord.isBusyUntilNextKill)
+        {
+            printf(columnFormatArray, "Busy");
+        }
+        else if ((usedRegsMask & genRegMask((regNumber)regNumIndex)) == 0)
+        {
+            sprintf_s(columnFormatArray, MAX_FORMAT_CHARS, "%%-%ds", regColumnWidth);
+            printf(columnFormatArray, "----");
+        }
+        else
+        {
+            sprintf_s(columnFormatArray, MAX_FORMAT_CHARS, "%%-%ds", regColumnWidth);
+            printf(columnFormatArray, "");
+        }
+    }
+    printf("%s\n", columnSeparator);
+
+    if (rowCountSinceLastTitle > MAX_ROWS_BETWEEN_TITLES)
+    {
+        dumpRegRecordTitle();
+    }
+    rowCountSinceLastTitle++;
+}
+
+void LinearScan::dumpIntervalName(Interval* interval)
+{
+    char intervalChar;
+    if (interval->isLocalVar)
+    {
+        intervalChar = 'V';
+    }
+    else if (interval->isConstant)
+    {
+        intervalChar = 'C';
+    }
+    else
+    {
+        intervalChar = 'I';
+    }
+    printf(intervalNameFormat, intervalChar, interval->intervalIndex);
+}
+
+void LinearScan::dumpEmptyRefPosition()
+{
+    printf(emptyRefPositionFormat, "");
+}
+
+// Note that the size of this dump is computed in dumpRegRecordHeader().
+//
+void LinearScan::dumpRefPositionShort(RefPosition* refPosition, BasicBlock* currentBlock)
+{
+    BasicBlock* block = currentBlock;
+    if (refPosition->refType == RefTypeBB)
+    {
+        // Always print a title row before a RefTypeBB (except for the first, because we
+        // will already have printed it before the parameters)
+        if (refPosition->refType == RefTypeBB && block != compiler->fgFirstBB && block != nullptr)
+        {
+            dumpRegRecordTitle();
+        }
+    }
+    printf(shortRefPositionFormat, refPosition->nodeLocation, refPosition->rpNum);
+    if (refPosition->refType == RefTypeBB)
+    {
+        if (block == nullptr)
+        {
+            printf(regNameFormat, "END");
+            printf("               ");
+            printf(regNameFormat, "");
+        }
+        else
+        {
+            printf(bbRefPosFormat, block->bbNum, block == compiler->fgFirstBB ? 0 : blockInfo[block->bbNum].predBBNum);
+        }
+    }
+    else if (refPosition->isIntervalRef())
+    {
+        Interval* interval = refPosition->getInterval();
+        dumpIntervalName(interval);
+        char lastUseChar = ' ';
+        char delayChar   = ' ';
+        if (refPosition->lastUse)
+        {
+            lastUseChar = '*';
+            if (refPosition->delayRegFree)
+            {
+                delayChar = 'D';
+            }
+        }
+        printf("  %s%c%c ", getRefTypeShortName(refPosition->refType), lastUseChar, delayChar);
+    }
+    else if (refPosition->isPhysRegRef)
+    {
+        RegRecord* regRecord = refPosition->getReg();
+        printf(regNameFormat, getRegName(regRecord->regNum));
+        printf(" %s   ", getRefTypeShortName(refPosition->refType));
+    }
+    else
+    {
+        assert(refPosition->refType == RefTypeKillGCRefs);
+        // There's no interval or reg name associated with this.
+        printf(regNameFormat, "   ");
+        printf(" %s   ", getRefTypeShortName(refPosition->refType));
+    }
+}
+
+//------------------------------------------------------------------------
+// LinearScan::IsResolutionMove:
+//     Returns true if the given node is a move inserted by LSRA
+//     resolution.
+//
+// Arguments:
+//     node - the node to check.
+//
+bool LinearScan::IsResolutionMove(GenTree* node)
+{
+    if (!node->gtLsraInfo.isLsraAdded)
+    {
+        return false;
+    }
+
+    switch (node->OperGet())
+    {
+        case GT_LCL_VAR:
+        case GT_COPY:
+            return node->gtLsraInfo.isLocalDefUse;
+
+        case GT_SWAP:
+            return true;
+
+        default:
+            return false;
+    }
+}
+
+//------------------------------------------------------------------------
+// LinearScan::IsResolutionNode:
+//     Returns true if the given node is either a move inserted by LSRA
+//     resolution or an operand to such a move.
+//
+// Arguments:
+//     containingRange - the range that contains the node to check.
+//     node - the node to check.
+//
+bool LinearScan::IsResolutionNode(LIR::Range& containingRange, GenTree* node)
+{
+    for (;;)
+    {
+        if (IsResolutionMove(node))
+        {
+            return true;
+        }
+
+        if (!node->gtLsraInfo.isLsraAdded || (node->OperGet() != GT_LCL_VAR))
+        {
+            return false;
+        }
+
+        LIR::Use use;
+        bool     foundUse = containingRange.TryGetUse(node, &use);
+        assert(foundUse);
+
+        node = use.User();
+    }
+}
+
+//------------------------------------------------------------------------
+// verifyFinalAllocation: Traverse the RefPositions and verify various invariants.
+//
+// Arguments:
+//    None.
+//
+// Return Value:
+//    None.
+//
+// Notes:
+//    If verbose is set, this will also dump a table of the final allocations.
+void LinearScan::verifyFinalAllocation()
+{
+    if (VERBOSE)
+    {
+        printf("\nFinal allocation\n");
+    }
+
+    // Clear register assignments.
+    for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
+    {
+        RegRecord* physRegRecord        = getRegisterRecord(reg);
+        physRegRecord->assignedInterval = nullptr;
+    }
+
+    for (auto& interval : intervals)
+    {
+        interval.assignedReg = nullptr;
+        interval.physReg     = REG_NA;
+    }
+
+    DBEXEC(VERBOSE, dumpRegRecordTitle());
+
+    BasicBlock*  currentBlock                = nullptr;
+    GenTree*     firstBlockEndResolutionNode = nullptr;
+    regMaskTP    regsToFree                  = RBM_NONE;
+    regMaskTP    delayRegsToFree             = RBM_NONE;
+    LsraLocation currentLocation             = MinLocation;
+    for (auto& refPosition : refPositions)
+    {
+        RefPosition* currentRefPosition = &refPosition;
+        Interval*    interval           = nullptr;
+        RegRecord*   regRecord          = nullptr;
+        regNumber    regNum             = REG_NA;
+        if (currentRefPosition->refType == RefTypeBB)
+        {
+            regsToFree |= delayRegsToFree;
+            delayRegsToFree = RBM_NONE;
+            // For BB RefPositions, wait until we dump the "end of block" info before dumping the basic RefPosition
+            // info.
+        }
+        else
+        {
+            // For other RefPosition types, we can dump the basic RefPosition info now.
+            DBEXEC(VERBOSE, dumpRefPositionShort(currentRefPosition, currentBlock));
+
+            if (currentRefPosition->isPhysRegRef)
+            {
+                regRecord                    = currentRefPosition->getReg();
+                regRecord->recentRefPosition = currentRefPosition;
+                regNum                       = regRecord->regNum;
+            }
+            else if (currentRefPosition->isIntervalRef())
+            {
+                interval                    = currentRefPosition->getInterval();
+                interval->recentRefPosition = currentRefPosition;
+                if (currentRefPosition->registerAssignment != RBM_NONE)
+                {
+                    if (!genMaxOneBit(currentRefPosition->registerAssignment))
+                    {
+                        assert(currentRefPosition->refType == RefTypeExpUse ||
+                               currentRefPosition->refType == RefTypeDummyDef);
+                    }
+                    else
+                    {
+                        regNum    = currentRefPosition->assignedReg();
+                        regRecord = getRegisterRecord(regNum);
+                    }
+                }
+            }
+        }
+
+        LsraLocation newLocation = currentRefPosition->nodeLocation;
+
+        if (newLocation > currentLocation)
+        {
+            // Free Registers.
+            // We could use the freeRegisters() method, but we'd have to carefully manage the active intervals.
+            for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
+            {
+                regMaskTP regMask = genRegMask(reg);
+                if ((regsToFree & regMask) != RBM_NONE)
+                {
+                    RegRecord* physRegRecord        = getRegisterRecord(reg);
+                    physRegRecord->assignedInterval = nullptr;
+                }
+            }
+            regsToFree = delayRegsToFree;
+            regsToFree = RBM_NONE;
+        }
+        currentLocation = newLocation;
+
+        switch (currentRefPosition->refType)
+        {
+            case RefTypeBB:
+            {
+                if (currentBlock == nullptr)
+                {
+                    currentBlock = startBlockSequence();
+                }
+                else
+                {
+                    // Verify the resolution moves at the end of the previous block.
+                    for (GenTree* node = firstBlockEndResolutionNode; node != nullptr; node = node->gtNext)
+                    {
+                        // Only verify nodes that are actually moves; don't bother with the nodes that are
+                        // operands to moves.
+                        if (IsResolutionMove(node))
+                        {
+                            verifyResolutionMove(node, currentLocation);
+                        }
+                    }
+
+                    // Validate the locations at the end of the previous block.
+                    VarToRegMap outVarToRegMap = outVarToRegMaps[currentBlock->bbNum];
+                    VARSET_ITER_INIT(compiler, iter, currentBlock->bbLiveOut, varIndex);
+                    while (iter.NextElem(compiler, &varIndex))
+                    {
+                        unsigned  varNum = compiler->lvaTrackedToVarNum[varIndex];
+                        regNumber regNum = getVarReg(outVarToRegMap, varNum);
+                        interval         = getIntervalForLocalVar(varNum);
+                        assert(interval->physReg == regNum || (interval->physReg == REG_NA && regNum == REG_STK));
+                        interval->physReg     = REG_NA;
+                        interval->assignedReg = nullptr;
+                        interval->isActive    = false;
+                    }
+
+                    // Clear register assignments.
+                    for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
+                    {
+                        RegRecord* physRegRecord        = getRegisterRecord(reg);
+                        physRegRecord->assignedInterval = nullptr;
+                    }
+
+                    // Now, record the locations at the beginning of this block.
+                    currentBlock = moveToNextBlock();
+                }
+
+                if (currentBlock != nullptr)
+                {
+                    VarToRegMap inVarToRegMap = inVarToRegMaps[currentBlock->bbNum];
+                    VARSET_ITER_INIT(compiler, iter, currentBlock->bbLiveIn, varIndex);
+                    while (iter.NextElem(compiler, &varIndex))
+                    {
+                        unsigned  varNum                  = compiler->lvaTrackedToVarNum[varIndex];
+                        regNumber regNum                  = getVarReg(inVarToRegMap, varNum);
+                        interval                          = getIntervalForLocalVar(varNum);
+                        interval->physReg                 = regNum;
+                        interval->assignedReg             = &(physRegs[regNum]);
+                        interval->isActive                = true;
+                        physRegs[regNum].assignedInterval = interval;
+                    }
+
+                    if (VERBOSE)
+                    {
+                        dumpRefPositionShort(currentRefPosition, currentBlock);
+                        dumpRegRecords();
+                    }
+
+                    // Finally, handle the resolution moves, if any, at the beginning of the next block.
+                    firstBlockEndResolutionNode = nullptr;
+                    bool foundNonResolutionNode = false;
+
+                    LIR::Range& currentBlockRange = LIR::AsRange(currentBlock);
+                    for (GenTree* node : currentBlockRange.NonPhiNodes())
+                    {
+                        if (IsResolutionNode(currentBlockRange, node))
+                        {
+                            if (foundNonResolutionNode)
+                            {
+                                firstBlockEndResolutionNode = node;
+                                break;
+                            }
+                            else if (IsResolutionMove(node))
+                            {
+                                // Only verify nodes that are actually moves; don't bother with the nodes that are
+                                // operands to moves.
+                                verifyResolutionMove(node, currentLocation);
+                            }
+                        }
+                        else
+                        {
+                            foundNonResolutionNode = true;
+                        }
+                    }
+                }
+            }
+
+            break;
+
+            case RefTypeKill:
+                assert(regRecord != nullptr);
+                assert(regRecord->assignedInterval == nullptr);
+                dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum, currentBlock);
+                break;
+            case RefTypeFixedReg:
+                assert(regRecord != nullptr);
+                dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum, currentBlock);
+                break;
+
+            case RefTypeUpperVectorSaveDef:
+            case RefTypeUpperVectorSaveUse:
+            case RefTypeDef:
+            case RefTypeUse:
+            case RefTypeParamDef:
+            case RefTypeZeroInit:
+                assert(interval != nullptr);
+
+                if (interval->isSpecialPutArg)
+                {
+                    dumpLsraAllocationEvent(LSRA_EVENT_SPECIAL_PUTARG, interval, regNum);
+                    break;
+                }
+                if (currentRefPosition->reload)
+                {
+                    interval->isActive = true;
+                    assert(regNum != REG_NA);
+                    interval->physReg           = regNum;
+                    interval->assignedReg       = regRecord;
+                    regRecord->assignedInterval = interval;
+                    dumpLsraAllocationEvent(LSRA_EVENT_RELOAD, nullptr, regRecord->regNum, currentBlock);
+                }
+                if (regNum == REG_NA)
+                {
+                    dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, interval);
+                }
+                else if (RefTypeIsDef(currentRefPosition->refType))
+                {
+                    interval->isActive = true;
+                    if (VERBOSE)
+                    {
+                        if (interval->isConstant && (currentRefPosition->treeNode != nullptr) &&
+                            currentRefPosition->treeNode->IsReuseRegVal())
+                        {
+                            dumpLsraAllocationEvent(LSRA_EVENT_REUSE_REG, nullptr, regRecord->regNum, currentBlock);
+                        }
+                        else
+                        {
+                            dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_REG, nullptr, regRecord->regNum, currentBlock);
+                        }
+                    }
+                }
+                else if (currentRefPosition->copyReg)
+                {
+                    dumpLsraAllocationEvent(LSRA_EVENT_COPY_REG, interval, regRecord->regNum, currentBlock);
+                }
+                else if (currentRefPosition->moveReg)
+                {
+                    assert(interval->assignedReg != nullptr);
+                    interval->assignedReg->assignedInterval = nullptr;
+                    interval->physReg                       = regNum;
+                    interval->assignedReg                   = regRecord;
+                    regRecord->assignedInterval             = interval;
+                    if (VERBOSE)
+                    {
+                        printf("Move  %-4s ", getRegName(regRecord->regNum));
+                    }
+                }
+                else
+                {
+                    dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum, currentBlock);
+                }
+                if (currentRefPosition->lastUse || currentRefPosition->spillAfter)
+                {
+                    interval->isActive = false;
+                }
+                if (regNum != REG_NA)
+                {
+                    if (currentRefPosition->spillAfter)
+                    {
+                        if (VERBOSE)
+                        {
+                            dumpRegRecords();
+                            dumpEmptyRefPosition();
+                            printf("Spill %-4s ", getRegName(regNum));
+                        }
+                    }
+                    else if (currentRefPosition->copyReg)
+                    {
+                        regRecord->assignedInterval = interval;
+                    }
+                    else
+                    {
+                        interval->physReg           = regNum;
+                        interval->assignedReg       = regRecord;
+                        regRecord->assignedInterval = interval;
+                    }
+                }
+                break;
+            case RefTypeKillGCRefs:
+                // No action to take.
+                // However, we will assert that, at resolution time, no registers contain GC refs.
+                {
+                    DBEXEC(VERBOSE, printf("           "));
+                    regMaskTP candidateRegs = currentRefPosition->registerAssignment;
+                    while (candidateRegs != RBM_NONE)
+                    {
+                        regMaskTP nextRegBit = genFindLowestBit(candidateRegs);
+                        candidateRegs &= ~nextRegBit;
+                        regNumber  nextReg          = genRegNumFromMask(nextRegBit);
+                        RegRecord* regRecord        = getRegisterRecord(nextReg);
+                        Interval*  assignedInterval = regRecord->assignedInterval;
+                        assert(assignedInterval == nullptr || !varTypeIsGC(assignedInterval->registerType));
+                    }
+                }
+                break;
+
+            case RefTypeExpUse:
+            case RefTypeDummyDef:
+                // Do nothing; these will be handled by the RefTypeBB.
+                DBEXEC(VERBOSE, printf("           "));
+                break;
+
+            case RefTypeInvalid:
+                // for these 'currentRefPosition->refType' values, No action to take
+                break;
+        }
+
+        if (currentRefPosition->refType != RefTypeBB)
+        {
+            DBEXEC(VERBOSE, dumpRegRecords());
+            if (interval != nullptr)
+            {
+                if (currentRefPosition->copyReg)
+                {
+                    assert(interval->physReg != regNum);
+                    regRecord->assignedInterval = nullptr;
+                    assert(interval->assignedReg != nullptr);
+                    regRecord = interval->assignedReg;
+                }
+                if (currentRefPosition->spillAfter || currentRefPosition->lastUse)
+                {
+                    interval->physReg     = REG_NA;
+                    interval->assignedReg = nullptr;
+
+                    // regRegcord could be null if RefPosition is to be allocated a
+                    // reg only if profitable.
+                    if (regRecord != nullptr)
+                    {
+                        regRecord->assignedInterval = nullptr;
+                    }
+                    else
+                    {
+                        assert(currentRefPosition->AllocateIfProfitable());
+                    }
+                }
+            }
+        }
+    }
+
+    // Now, verify the resolution blocks.
+    // Currently these are nearly always at the end of the method, but that may not alwyas be the case.
+    // So, we'll go through all the BBs looking for blocks whose bbNum is greater than bbNumMaxBeforeResolution.
+    for (BasicBlock* currentBlock = compiler->fgFirstBB; currentBlock != nullptr; currentBlock = currentBlock->bbNext)
+    {
+        if (currentBlock->bbNum > bbNumMaxBeforeResolution)
+        {
+            if (VERBOSE)
+            {
+                dumpRegRecordTitle();
+                printf(shortRefPositionFormat, 0, 0);
+                assert(currentBlock->bbPreds != nullptr && currentBlock->bbPreds->flBlock != nullptr);
+                printf(bbRefPosFormat, currentBlock->bbNum, currentBlock->bbPreds->flBlock->bbNum);
+                dumpRegRecords();
+            }
+
+            // Clear register assignments.
+            for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
+            {
+                RegRecord* physRegRecord        = getRegisterRecord(reg);
+                physRegRecord->assignedInterval = nullptr;
+            }
+
+            // Set the incoming register assignments
+            VarToRegMap inVarToRegMap = getInVarToRegMap(currentBlock->bbNum);
+            VARSET_ITER_INIT(compiler, iter, currentBlock->bbLiveIn, varIndex);
+            while (iter.NextElem(compiler, &varIndex))
+            {
+                unsigned  varNum                  = compiler->lvaTrackedToVarNum[varIndex];
+                regNumber regNum                  = getVarReg(inVarToRegMap, varNum);
+                Interval* interval                = getIntervalForLocalVar(varNum);
+                interval->physReg                 = regNum;
+                interval->assignedReg             = &(physRegs[regNum]);
+                interval->isActive                = true;
+                physRegs[regNum].assignedInterval = interval;
+            }
+
+            // Verify the moves in this block
+            LIR::Range& currentBlockRange = LIR::AsRange(currentBlock);
+            for (GenTree* node : currentBlockRange.NonPhiNodes())
+            {
+                assert(IsResolutionNode(currentBlockRange, node));
+                if (IsResolutionMove(node))
+                {
+                    // Only verify nodes that are actually moves; don't bother with the nodes that are
+                    // operands to moves.
+                    verifyResolutionMove(node, currentLocation);
+                }
+            }
+
+            // Verify the outgoing register assignments
+            {
+                VarToRegMap outVarToRegMap = getOutVarToRegMap(currentBlock->bbNum);
+                VARSET_ITER_INIT(compiler, iter, currentBlock->bbLiveOut, varIndex);
+                while (iter.NextElem(compiler, &varIndex))
+                {
+                    unsigned  varNum   = compiler->lvaTrackedToVarNum[varIndex];
+                    regNumber regNum   = getVarReg(outVarToRegMap, varNum);
+                    Interval* interval = getIntervalForLocalVar(varNum);
+                    assert(interval->physReg == regNum || (interval->physReg == REG_NA && regNum == REG_STK));
+                    interval->physReg     = REG_NA;
+                    interval->assignedReg = nullptr;
+                    interval->isActive    = false;
+                }
+            }
+        }
+    }
+
+    DBEXEC(VERBOSE, printf("\n"));
+}
+
+//------------------------------------------------------------------------
+// verifyResolutionMove: Verify a resolution statement.  Called by verifyFinalAllocation()
+//
+// Arguments:
+//    resolutionMove    - A GenTree* that must be a resolution move.
+//    currentLocation   - The LsraLocation of the most recent RefPosition that has been verified.
+//
+// Return Value:
+//    None.
+//
+// Notes:
+//    If verbose is set, this will also dump the moves into the table of final allocations.
+void LinearScan::verifyResolutionMove(GenTree* resolutionMove, LsraLocation currentLocation)
+{
+    GenTree* dst = resolutionMove;
+    assert(IsResolutionMove(dst));
+
+    if (dst->OperGet() == GT_SWAP)
+    {
+        GenTreeLclVarCommon* left          = dst->gtGetOp1()->AsLclVarCommon();
+        GenTreeLclVarCommon* right         = dst->gtGetOp2()->AsLclVarCommon();
+        regNumber            leftRegNum    = left->gtRegNum;
+        regNumber            rightRegNum   = right->gtRegNum;
+        Interval*            leftInterval  = getIntervalForLocalVar(left->gtLclNum);
+        Interval*            rightInterval = getIntervalForLocalVar(right->gtLclNum);
+        assert(leftInterval->physReg == leftRegNum && rightInterval->physReg == rightRegNum);
+        leftInterval->physReg                  = rightRegNum;
+        rightInterval->physReg                 = leftRegNum;
+        physRegs[rightRegNum].assignedInterval = leftInterval;
+        physRegs[leftRegNum].assignedInterval  = rightInterval;
+        if (VERBOSE)
+        {
+            printf(shortRefPositionFormat, currentLocation, 0);
+            dumpIntervalName(leftInterval);
+            printf("  Swap   ");
+            printf("      %-4s ", getRegName(rightRegNum));
+            dumpRegRecords();
+            printf(shortRefPositionFormat, currentLocation, 0);
+            dumpIntervalName(rightInterval);
+            printf("  \"      ");
+            printf("      %-4s ", getRegName(leftRegNum));
+            dumpRegRecords();
+        }
+        return;
+    }
+    regNumber            dstRegNum = dst->gtRegNum;
+    regNumber            srcRegNum;
+    GenTreeLclVarCommon* lcl;
+    if (dst->OperGet() == GT_COPY)
+    {
+        lcl       = dst->gtGetOp1()->AsLclVarCommon();
+        srcRegNum = lcl->gtRegNum;
+    }
+    else
+    {
+        lcl = dst->AsLclVarCommon();
+        if ((lcl->gtFlags & GTF_SPILLED) != 0)
+        {
+            srcRegNum = REG_STK;
+        }
+        else
+        {
+            assert((lcl->gtFlags & GTF_SPILL) != 0);
+            srcRegNum = dstRegNum;
+            dstRegNum = REG_STK;
+        }
+    }
+    Interval* interval = getIntervalForLocalVar(lcl->gtLclNum);
+    assert(interval->physReg == srcRegNum || (srcRegNum == REG_STK && interval->physReg == REG_NA));
+    if (srcRegNum != REG_STK)
+    {
+        physRegs[srcRegNum].assignedInterval = nullptr;
+    }
+    if (dstRegNum != REG_STK)
+    {
+        interval->physReg                    = dstRegNum;
+        interval->assignedReg                = &(physRegs[dstRegNum]);
+        physRegs[dstRegNum].assignedInterval = interval;
+        interval->isActive                   = true;
+    }
+    else
+    {
+        interval->physReg     = REG_NA;
+        interval->assignedReg = nullptr;
+        interval->isActive    = false;
+    }
+    if (VERBOSE)
+    {
+        printf(shortRefPositionFormat, currentLocation, 0);
+        dumpIntervalName(interval);
+        printf("  Move   ");
+        printf("      %-4s ", getRegName(dstRegNum));
+        dumpRegRecords();
+    }
+}
+#endif // DEBUG
+
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/lsra.h b/src/jit/lsra.h
new file mode 100644
index 0000000000..a3c41fe1e3
--- /dev/null
+++ b/src/jit/lsra.h
@@ -0,0 +1,1608 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+/*****************************************************************************/
+
+#ifndef _LSRA_H_
+#define _LSRA_H_
+
+#include "arraylist.h"
+#include "smallhash.h"
+#include "nodeinfo.h"
+
+// Minor and forward-reference types
+class Interval;
+class RefPosition;
+class LinearScan;
+class RegRecord;
+
+template <class T>
+class ArrayStack;
+
+// LsraLocation tracks the linearized order of the nodes.
+// Each node is assigned two LsraLocations - one for all the uses and all but the last
+// def, and a second location for the last def (if any)
+
+typedef unsigned int LsraLocation;
+const unsigned int   MinLocation = 0;
+const unsigned int   MaxLocation = UINT_MAX;
+// max number of registers an operation could require internally (in addition to uses and defs)
+const unsigned int MaxInternalRegisters = 8;
+const unsigned int RegisterTypeCount    = 2;
+
+typedef var_types RegisterType;
+#define IntRegisterType TYP_INT
+#define FloatRegisterType TYP_FLOAT
+
+inline regMaskTP calleeSaveRegs(RegisterType rt)
+{
+    return varTypeIsIntegralOrI(rt) ? RBM_INT_CALLEE_SAVED : RBM_FLT_CALLEE_SAVED;
+}
+
+struct LocationInfo
+{
+    LsraLocation loc;
+
+    // Reg Index in case of multi-reg result producing call node.
+    // Indicates the position of the register that this location refers to.
+    // The max bits needed is based on max value of MAX_RET_REG_COUNT value
+    // across all targets and that happens 4 on on Arm.  Hence index value
+    // would be 0..MAX_RET_REG_COUNT-1.
+    unsigned multiRegIdx : 2;
+
+    Interval* interval;
+    GenTree*  treeNode;
+
+    LocationInfo(LsraLocation l, Interval* i, GenTree* t, unsigned regIdx = 0)
+        : loc(l), multiRegIdx(regIdx), interval(i), treeNode(t)
+    {
+        assert(multiRegIdx == regIdx);
+    }
+
+    // default constructor for data structures
+    LocationInfo()
+    {
+    }
+};
+
+struct LsraBlockInfo
+{
+    // bbNum of the predecessor to use for the register location of live-in variables.
+    // 0 for fgFirstBB.
+    BasicBlock::weight_t weight;
+    unsigned int         predBBNum;
+    bool                 hasCriticalInEdge;
+    bool                 hasCriticalOutEdge;
+};
+
+// This is sort of a bit mask
+// The low order 2 bits will be 1 for defs, and 2 for uses
+enum RefType : unsigned char
+{
+#define DEF_REFTYPE(memberName, memberValue, shortName) memberName = memberValue,
+#include "lsra_reftypes.h"
+#undef DEF_REFTYPE
+};
+
+// position in a block (for resolution)
+enum BlockStartOrEnd
+{
+    BlockPositionStart = 0,
+    BlockPositionEnd   = 1,
+    PositionCount      = 2
+};
+
+inline bool RefTypeIsUse(RefType refType)
+{
+    return ((refType & RefTypeUse) == RefTypeUse);
+}
+
+inline bool RefTypeIsDef(RefType refType)
+{
+    return ((refType & RefTypeDef) == RefTypeDef);
+}
+
+typedef regNumber* VarToRegMap;
+
+template <typename ElementType, CompMemKind MemKind>
+class ListElementAllocator
+{
+private:
+    template <typename U, CompMemKind CMK>
+    friend class ListElementAllocator;
+
+    Compiler* m_compiler;
+
+public:
+    ListElementAllocator(Compiler* compiler) : m_compiler(compiler)
+    {
+    }
+
+    template <typename U>
+    ListElementAllocator(const ListElementAllocator<U, MemKind>& other) : m_compiler(other.m_compiler)
+    {
+    }
+
+    ElementType* allocate(size_t count)
+    {
+        return reinterpret_cast<ElementType*>(m_compiler->compGetMem(sizeof(ElementType) * count, MemKind));
+    }
+
+    void deallocate(ElementType* pointer, size_t count)
+    {
+    }
+
+    template <typename U>
+    struct rebind
+    {
+        typedef ListElementAllocator<U, MemKind> allocator;
+    };
+};
+
+typedef ListElementAllocator<Interval, CMK_LSRA_Interval>       LinearScanMemoryAllocatorInterval;
+typedef ListElementAllocator<RefPosition, CMK_LSRA_RefPosition> LinearScanMemoryAllocatorRefPosition;
+
+typedef jitstd::list<Interval, LinearScanMemoryAllocatorInterval>       IntervalList;
+typedef jitstd::list<RefPosition, LinearScanMemoryAllocatorRefPosition> RefPositionList;
+
+class Referenceable
+{
+public:
+    Referenceable()
+    {
+        firstRefPosition  = nullptr;
+        recentRefPosition = nullptr;
+        lastRefPosition   = nullptr;
+        isActive          = false;
+    }
+
+    // A linked list of RefPositions.  These are only traversed in the forward
+    // direction, and are not moved, so they don't need to be doubly linked
+    // (see RefPosition).
+
+    RefPosition* firstRefPosition;
+    RefPosition* recentRefPosition;
+    RefPosition* lastRefPosition;
+
+    bool isActive;
+
+    // Get the position of the next reference which is at or greater than
+    // the current location (relies upon recentRefPosition being udpated
+    // during traversal).
+    RefPosition* getNextRefPosition();
+    LsraLocation getNextRefLocation();
+};
+
+class RegRecord : public Referenceable
+{
+public:
+    RegRecord()
+    {
+        assignedInterval    = nullptr;
+        previousInterval    = nullptr;
+        regNum              = REG_NA;
+        isCalleeSave        = false;
+        registerType        = IntRegisterType;
+        isBusyUntilNextKill = false;
+    }
+
+    void init(regNumber reg)
+    {
+#ifdef _TARGET_ARM64_
+        // The Zero register, or the SP
+        if ((reg == REG_ZR) || (reg == REG_SP))
+        {
+            // IsGeneralRegister returns false for REG_ZR and REG_SP
+            regNum       = reg;
+            registerType = IntRegisterType;
+        }
+        else
+#endif
+            if (emitter::isFloatReg(reg))
+        {
+            registerType = FloatRegisterType;
+        }
+        else
+        {
+            // The constructor defaults to IntRegisterType
+            assert(emitter::isGeneralRegister(reg) && registerType == IntRegisterType);
+        }
+        regNum       = reg;
+        isCalleeSave = ((RBM_CALLEE_SAVED & genRegMask(reg)) != 0);
+    }
+
+#ifdef DEBUG
+    // print out representation
+    void dump();
+    // concise representation for embedding
+    void tinyDump();
+#endif // DEBUG
+
+    bool isFree();
+
+    // RefPosition   * getNextRefPosition();
+    // LsraLocation    getNextRefLocation();
+
+    // DATA
+
+    // interval to which this register is currently allocated.
+    // If the interval is inactive (isActive == false) then it is not currently live,
+    // and the register call be unassigned (i.e. setting assignedInterval to nullptr)
+    // without spilling the register.
+    Interval* assignedInterval;
+    // Interval to which this register was previously allocated, and which was unassigned
+    // because it was inactive.  This register will be reassigned to this Interval when
+    // assignedInterval becomes inactive.
+    Interval* previousInterval;
+
+    regNumber    regNum;
+    bool         isCalleeSave;
+    RegisterType registerType;
+    // This register must be considered busy until the next time it is explicitly killed.
+    // This is used so that putarg_reg can avoid killing its lclVar source, while avoiding
+    // the problem with the reg becoming free if the last-use is encountered before the call.
+    bool isBusyUntilNextKill;
+
+    bool conflictingFixedRegReference(RefPosition* refPosition);
+};
+
+inline bool leafInRange(GenTree* leaf, int lower, int upper)
+{
+    if (!leaf->IsIntCnsFitsInI32())
+    {
+        return false;
+    }
+    if (leaf->gtIntCon.gtIconVal < lower)
+    {
+        return false;
+    }
+    if (leaf->gtIntCon.gtIconVal > upper)
+    {
+        return false;
+    }
+
+    return true;
+}
+
+inline bool leafInRange(GenTree* leaf, int lower, int upper, int multiple)
+{
+    if (!leafInRange(leaf, lower, upper))
+    {
+        return false;
+    }
+    if (leaf->gtIntCon.gtIconVal % multiple)
+    {
+        return false;
+    }
+
+    return true;
+}
+
+inline bool leafAddInRange(GenTree* leaf, int lower, int upper, int multiple = 1)
+{
+    if (leaf->OperGet() != GT_ADD)
+    {
+        return false;
+    }
+    return leafInRange(leaf->gtOp.gtOp2, lower, upper, multiple);
+}
+
+inline bool isCandidateVar(LclVarDsc* varDsc)
+{
+    return varDsc->lvLRACandidate;
+}
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                           LinearScan                                      XX
+XX                                                                           XX
+XX This is the container for the Linear Scan data structures and methods.    XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+// OPTION 1: The algorithm as described in "Optimized Interval Splitting in a
+// Linear Scan Register Allocator".  It is driven by iterating over the Interval
+// lists.  In this case, we need multiple IntervalLists, and Intervals will be
+// moved between them so they must be easily updated.
+
+// OPTION 2: The algorithm is driven by iterating over the RefPositions.  In this
+// case, we only need a single IntervalList, and it won't be updated.
+// The RefPosition must refer to its Interval, and we need to be able to traverse
+// to the next RefPosition in code order
+// THIS IS THE OPTION CURRENTLY BEING PURSUED
+
+class LocationInfoList;
+class LocationInfoListNodePool;
+
+class LinearScan : public LinearScanInterface
+{
+    friend class RefPosition;
+    friend class Interval;
+    friend class Lowering;
+    friend class TreeNodeInfo;
+
+public:
+    // This could use further abstraction.  From Compiler we need the tree,
+    // the flowgraph and the allocator.
+    LinearScan(Compiler* theCompiler);
+
+    // This is the main driver
+    virtual void doLinearScan();
+
+    // TreeNodeInfo contains three register masks: src candidates, dst candidates, and internal condidates.
+    // Instead of storing actual register masks, however, which are large, we store a small index into a table
+    // of register masks, stored in this class. We create only as many distinct register masks as are needed.
+    // All identical register masks get the same index. The register mask table contains:
+    // 1. A mask containing all eligible integer registers.
+    // 2. A mask containing all elibible floating-point registers.
+    // 3. A mask for each of single register.
+    // 4. A mask for each combination of registers, created dynamically as required.
+    //
+    // Currently, the maximum number of masks allowed is a constant defined by 'numMasks'. The register mask
+    // table is never resized. It is also limited by the size of the index, currently an unsigned char.
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_ARM64_)
+    static const int numMasks = 128;
+#else
+    static const int numMasks = 64;
+#endif
+
+    regMaskTP* regMaskTable;
+    int        nextFreeMask;
+
+    typedef int RegMaskIndex;
+
+    // allint is 0, allfloat is 1, all the single-bit masks start at 2
+    enum KnownRegIndex
+    {
+        ALLINT_IDX           = 0,
+        ALLFLOAT_IDX         = 1,
+        FIRST_SINGLE_REG_IDX = 2
+    };
+
+    RegMaskIndex GetIndexForRegMask(regMaskTP mask);
+    regMaskTP GetRegMaskForIndex(RegMaskIndex index);
+    void RemoveRegisterFromMasks(regNumber reg);
+
+#ifdef DEBUG
+    void dspRegisterMaskTable();
+#endif // DEBUG
+
+    // Initialize the block traversal for LSRA.
+    // This resets the bbVisitedSet, and on the first invocation sets the blockSequence array,
+    // which determines the order in which blocks will be allocated (currently called during Lowering).
+    BasicBlock* startBlockSequence();
+    // Move to the next block in sequence, updating the current block information.
+    BasicBlock* moveToNextBlock();
+    // Get the next block to be scheduled without changing the current block,
+    // but updating the blockSequence during the first iteration if it is not fully computed.
+    BasicBlock* getNextBlock();
+
+    // This is called during code generation to update the location of variables
+    virtual void recordVarLocationsAtStartOfBB(BasicBlock* bb);
+
+    // This does the dataflow analysis and builds the intervals
+    void buildIntervals();
+
+    // This is where the actual assignment is done
+    void allocateRegisters();
+
+    // This is the resolution phase, where cross-block mismatches are fixed up
+    void resolveRegisters();
+
+    void writeRegisters(RefPosition* currentRefPosition, GenTree* tree);
+
+    // Insert a copy in the case where a tree node value must be moved to a different
+    // register at the point of use, or it is reloaded to a different register
+    // than the one it was spilled from
+    void insertCopyOrReload(BasicBlock* block, GenTreePtr tree, unsigned multiRegIdx, RefPosition* refPosition);
+
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+    // Insert code to save and restore the upper half of a vector that lives
+    // in a callee-save register at the point of a call (the upper half is
+    // not preserved).
+    void insertUpperVectorSaveAndReload(GenTreePtr tree, RefPosition* refPosition, BasicBlock* block);
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+
+    // resolve along one block-block edge
+    enum ResolveType
+    {
+        ResolveSplit,
+        ResolveJoin,
+        ResolveCritical,
+        ResolveSharedCritical,
+        ResolveTypeCount
+    };
+#ifdef DEBUG
+    static const char* resolveTypeName[ResolveTypeCount];
+#endif
+
+    enum WhereToInsert
+    {
+        InsertAtTop,
+        InsertAtBottom
+    };
+
+    void addResolution(
+        BasicBlock* block, GenTreePtr insertionPoint, Interval* interval, regNumber outReg, regNumber inReg);
+
+    void handleOutgoingCriticalEdges(BasicBlock* block);
+
+    void resolveEdge(BasicBlock* fromBlock, BasicBlock* toBlock, ResolveType resolveType, VARSET_VALARG_TP liveSet);
+
+    void resolveEdges();
+
+    // Finally, the register assignments are written back to the tree nodes.
+    void recordRegisterAssignments();
+
+    // Keep track of how many temp locations we'll need for spill
+    void initMaxSpill();
+    void updateMaxSpill(RefPosition* refPosition);
+    void recordMaxSpill();
+
+    // max simultaneous spill locations used of every type
+    unsigned int maxSpill[TYP_COUNT];
+    unsigned int currentSpill[TYP_COUNT];
+    bool         needFloatTmpForFPCall;
+    bool         needDoubleTmpForFPCall;
+
+#ifdef DEBUG
+private:
+    //------------------------------------------------------------------------
+    // Should we stress lsra?
+    // This uses the same COMPLUS variable as rsStressRegs (COMPlus_JitStressRegs)
+    // However, the possible values and their interpretation are entirely different.
+    //
+    // The mask bits are currently divided into fields in which each non-zero value
+    // is a distinct stress option (e.g. 0x3 is not a combination of 0x1 and 0x2).
+    // However, subject to possible constraints (to be determined), the different
+    // fields can be combined (e.g. 0x7 is a combination of 0x3 and 0x4).
+    // Note that the field values are declared in a public enum, but the actual bits are
+    // only accessed via accessors.
+
+    unsigned lsraStressMask;
+
+    // This controls the registers available for allocation
+    enum LsraStressLimitRegs{LSRA_LIMIT_NONE = 0, LSRA_LIMIT_CALLEE = 0x1, LSRA_LIMIT_CALLER = 0x2,
+                             LSRA_LIMIT_SMALL_SET = 0x3, LSRA_LIMIT_MASK = 0x3};
+
+    // When LSRA_LIMIT_SMALL_SET is specified, it is desirable to select a "mixed" set of caller- and callee-save
+    // registers, so as to get different coverage than limiting to callee or caller.
+    // At least for x86 and AMD64, and potentially other architecture that will support SIMD,
+    // we need a minimum of 5 fp regs in order to support the InitN intrinsic for Vector4.
+    // Hence the "SmallFPSet" has 5 elements.
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_AMD64_)
+#ifdef UNIX_AMD64_ABI
+    // On System V the RDI and RSI are not callee saved. Use R12 ans R13 as callee saved registers.
+    static const regMaskTP LsraLimitSmallIntSet =
+        (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_R12 | RBM_R13);
+#else  // !UNIX_AMD64_ABI
+    // On Windows Amd64 use the RDI and RSI as callee saved registers.
+    static const regMaskTP LsraLimitSmallIntSet =
+        (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_ESI | RBM_EDI);
+#endif // !UNIX_AMD64_ABI
+    static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7);
+#elif defined(_TARGET_ARM_)
+    static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4);
+    static const regMaskTP LsraLimitSmallFPSet  = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17);
+#elif defined(_TARGET_ARM64_)
+    static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20);
+    static const regMaskTP LsraLimitSmallFPSet  = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9);
+#elif defined(_TARGET_X86_)
+    static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI);
+    static const regMaskTP LsraLimitSmallFPSet  = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7);
+#else
+#error Unsupported or unset target architecture
+#endif // target
+
+    LsraStressLimitRegs getStressLimitRegs()
+    {
+        return (LsraStressLimitRegs)(lsraStressMask & LSRA_LIMIT_MASK);
+    }
+    regMaskTP stressLimitRegs(RefPosition* refPosition, regMaskTP mask);
+
+    // This controls the heuristics used to select registers
+    // These can be combined.
+    enum LsraSelect{LSRA_SELECT_DEFAULT = 0, LSRA_SELECT_REVERSE_HEURISTICS = 0x04,
+                    LSRA_SELECT_REVERSE_CALLER_CALLEE = 0x08, LSRA_SELECT_NEAREST = 0x10, LSRA_SELECT_MASK = 0x1c};
+    LsraSelect getSelectionHeuristics()
+    {
+        return (LsraSelect)(lsraStressMask & LSRA_SELECT_MASK);
+    }
+    bool doReverseSelect()
+    {
+        return ((lsraStressMask & LSRA_SELECT_REVERSE_HEURISTICS) != 0);
+    }
+    bool doReverseCallerCallee()
+    {
+        return ((lsraStressMask & LSRA_SELECT_REVERSE_CALLER_CALLEE) != 0);
+    }
+    bool doSelectNearest()
+    {
+        return ((lsraStressMask & LSRA_SELECT_NEAREST) != 0);
+    }
+
+    // This controls the order in which basic blocks are visited during allocation
+    enum LsraTraversalOrder{LSRA_TRAVERSE_LAYOUT = 0x20, LSRA_TRAVERSE_PRED_FIRST = 0x40,
+                            LSRA_TRAVERSE_RANDOM  = 0x60, // NYI
+                            LSRA_TRAVERSE_DEFAULT = LSRA_TRAVERSE_PRED_FIRST, LSRA_TRAVERSE_MASK = 0x60};
+    LsraTraversalOrder getLsraTraversalOrder()
+    {
+        if ((lsraStressMask & LSRA_TRAVERSE_MASK) == 0)
+        {
+            return LSRA_TRAVERSE_DEFAULT;
+        }
+        return (LsraTraversalOrder)(lsraStressMask & LSRA_TRAVERSE_MASK);
+    }
+    bool isTraversalLayoutOrder()
+    {
+        return getLsraTraversalOrder() == LSRA_TRAVERSE_LAYOUT;
+    }
+    bool isTraversalPredFirstOrder()
+    {
+        return getLsraTraversalOrder() == LSRA_TRAVERSE_PRED_FIRST;
+    }
+
+    // This controls whether lifetimes should be extended to the entire method.
+    // Note that this has no effect under MinOpts
+    enum LsraExtendLifetimes{LSRA_DONT_EXTEND = 0, LSRA_EXTEND_LIFETIMES = 0x80, LSRA_EXTEND_LIFETIMES_MASK = 0x80};
+    LsraExtendLifetimes getLsraExtendLifeTimes()
+    {
+        return (LsraExtendLifetimes)(lsraStressMask & LSRA_EXTEND_LIFETIMES_MASK);
+    }
+    bool extendLifetimes()
+    {
+        return getLsraExtendLifeTimes() == LSRA_EXTEND_LIFETIMES;
+    }
+
+    // This controls whether variables locations should be set to the previous block in layout order
+    // (LSRA_BLOCK_BOUNDARY_LAYOUT), or to that of the highest-weight predecessor (LSRA_BLOCK_BOUNDARY_PRED -
+    // the default), or rotated (LSRA_BLOCK_BOUNDARY_ROTATE).
+    enum LsraBlockBoundaryLocations{LSRA_BLOCK_BOUNDARY_PRED = 0, LSRA_BLOCK_BOUNDARY_LAYOUT = 0x100,
+                                    LSRA_BLOCK_BOUNDARY_ROTATE = 0x200, LSRA_BLOCK_BOUNDARY_MASK = 0x300};
+    LsraBlockBoundaryLocations getLsraBlockBoundaryLocations()
+    {
+        return (LsraBlockBoundaryLocations)(lsraStressMask & LSRA_BLOCK_BOUNDARY_MASK);
+    }
+    regNumber rotateBlockStartLocation(Interval* interval, regNumber targetReg, regMaskTP availableRegs);
+
+    // This controls whether we always insert a GT_RELOAD instruction after a spill
+    // Note that this can be combined with LsraSpillAlways (or not)
+    enum LsraReload{LSRA_NO_RELOAD_IF_SAME = 0, LSRA_ALWAYS_INSERT_RELOAD = 0x400, LSRA_RELOAD_MASK = 0x400};
+    LsraReload getLsraReload()
+    {
+        return (LsraReload)(lsraStressMask & LSRA_RELOAD_MASK);
+    }
+    bool alwaysInsertReload()
+    {
+        return getLsraReload() == LSRA_ALWAYS_INSERT_RELOAD;
+    }
+
+    // This controls whether we spill everywhere
+    enum LsraSpill{LSRA_DONT_SPILL_ALWAYS = 0, LSRA_SPILL_ALWAYS = 0x800, LSRA_SPILL_MASK = 0x800};
+    LsraSpill getLsraSpill()
+    {
+        return (LsraSpill)(lsraStressMask & LSRA_SPILL_MASK);
+    }
+    bool spillAlways()
+    {
+        return getLsraSpill() == LSRA_SPILL_ALWAYS;
+    }
+
+    // This controls whether RefPositions that lower/codegen indicated as reg optional be
+    // allocated a reg at all.
+    enum LsraRegOptionalControl{LSRA_REG_OPTIONAL_DEFAULT = 0, LSRA_REG_OPTIONAL_NO_ALLOC = 0x1000,
+                                LSRA_REG_OPTIONAL_MASK = 0x1000};
+
+    LsraRegOptionalControl getLsraRegOptionalControl()
+    {
+        return (LsraRegOptionalControl)(lsraStressMask & LSRA_REG_OPTIONAL_MASK);
+    }
+
+    bool regOptionalNoAlloc()
+    {
+        return getLsraRegOptionalControl() == LSRA_REG_OPTIONAL_NO_ALLOC;
+    }
+
+    // Dump support
+    void lsraDumpIntervals(const char* msg);
+    void dumpRefPositions(const char* msg);
+    void dumpVarRefPositions(const char* msg);
+
+    static bool IsResolutionMove(GenTree* node);
+    static bool IsResolutionNode(LIR::Range& containingRange, GenTree* node);
+
+    void verifyFinalAllocation();
+    void verifyResolutionMove(GenTree* resolutionNode, LsraLocation currentLocation);
+#else  // !DEBUG
+    bool             doSelectNearest()
+    {
+        return false;
+    }
+    bool extendLifetimes()
+    {
+        return false;
+    }
+    bool spillAlways()
+    {
+        return false;
+    }
+    // In a retail build we support only the default traversal order
+    bool isTraversalLayoutOrder()
+    {
+        return false;
+    }
+    bool isTraversalPredFirstOrder()
+    {
+        return true;
+    }
+    bool getLsraExtendLifeTimes()
+    {
+        return false;
+    }
+#endif // !DEBUG
+
+public:
+    // Used by Lowering when considering whether to split Longs, as well as by identifyCandidates().
+    bool isRegCandidate(LclVarDsc* varDsc);
+
+private:
+    // Determine which locals are candidates for allocation
+    void identifyCandidates();
+
+    // determine which locals are used in EH constructs we don't want to deal with
+    void identifyCandidatesExceptionDataflow();
+
+    void buildPhysRegRecords();
+
+    void setLastUses(BasicBlock* block);
+
+    void setFrameType();
+
+    // Update allocations at start/end of block
+    void processBlockEndAllocation(BasicBlock* current);
+
+    // Record variable locations at start/end of block
+    void processBlockStartLocations(BasicBlock* current, bool allocationPass);
+    void processBlockEndLocations(BasicBlock* current);
+
+    RefType CheckBlockType(BasicBlock* block, BasicBlock* prevBlock);
+
+    // insert refpositions representing prolog zero-inits which will be added later
+    void insertZeroInitRefPositions();
+
+    void AddMapping(GenTree* node, LsraLocation loc);
+
+    // add physreg refpositions for a tree node, based on calling convention and instruction selection predictions
+    void addRefsForPhysRegMask(regMaskTP mask, LsraLocation currentLoc, RefType refType, bool isLastUse);
+
+    void resolveConflictingDefAndUse(Interval* interval, RefPosition* defRefPosition);
+
+    void buildRefPositionsForNode(GenTree*                  tree,
+                                  BasicBlock*               block,
+                                  LocationInfoListNodePool& listNodePool,
+                                  HashTableBase<GenTree*, LocationInfoList>& operandToLocationInfoMap,
+                                  LsraLocation loc);
+
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+    VARSET_VALRET_TP buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc);
+    void buildUpperVectorRestoreRefPositions(GenTree* tree, LsraLocation currentLoc, VARSET_VALARG_TP liveLargeVectors);
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+    // For AMD64 on SystemV machines. This method
+    // is called as replacement for raUpdateRegStateForArg
+    // that is used on Windows. On System V systems a struct can be passed
+    // partially using registers from the 2 register files.
+    void unixAmd64UpdateRegStateForArg(LclVarDsc* argDsc);
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+    // Update reg state for an incoming register argument
+    void updateRegStateForArg(LclVarDsc* argDsc);
+
+    inline void setTreeNodeInfo(GenTree* tree, TreeNodeInfo info)
+    {
+        tree->gtLsraInfo = info;
+        tree->gtClearReg(compiler);
+
+        DBEXEC(VERBOSE, info.dump(this));
+    }
+
+    inline void clearDstCount(GenTree* tree)
+    {
+        tree->gtLsraInfo.dstCount = 0;
+    }
+
+    inline void clearOperandCounts(GenTree* tree)
+    {
+        TreeNodeInfo& info = tree->gtLsraInfo;
+        info.srcCount      = 0;
+        info.dstCount      = 0;
+    }
+
+    inline bool isLocalDefUse(GenTree* tree)
+    {
+        return tree->gtLsraInfo.isLocalDefUse;
+    }
+
+    inline bool isCandidateLocalRef(GenTree* tree)
+    {
+        if (tree->IsLocal())
+        {
+            unsigned int lclNum = tree->gtLclVarCommon.gtLclNum;
+            assert(lclNum < compiler->lvaCount);
+            LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
+
+            return isCandidateVar(varDsc);
+        }
+        return false;
+    }
+
+    static Compiler::fgWalkResult markAddrModeOperandsHelperMD(GenTreePtr tree, void* p);
+
+    // Return the registers killed by the given tree node.
+    regMaskTP getKillSetForNode(GenTree* tree);
+
+    // Given some tree node add refpositions for all the registers this node kills
+    bool buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc);
+
+    regMaskTP allRegs(RegisterType rt);
+    regMaskTP allRegs(GenTree* tree);
+    regMaskTP allMultiRegCallNodeRegs(GenTreeCall* tree);
+    regMaskTP allSIMDRegs();
+    regMaskTP internalFloatRegCandidates();
+
+    bool registerIsFree(regNumber regNum, RegisterType regType);
+    bool registerIsAvailable(RegRecord*    physRegRecord,
+                             LsraLocation  currentLoc,
+                             LsraLocation* nextRefLocationPtr,
+                             RegisterType  regType);
+    void freeRegister(RegRecord* physRegRecord);
+    void freeRegisters(regMaskTP regsToFree);
+
+    regMaskTP getUseCandidates(GenTree* useNode);
+    regMaskTP getDefCandidates(GenTree* tree);
+    var_types getDefType(GenTree* tree);
+
+    RefPosition* defineNewInternalTemp(GenTree* tree, RegisterType regType, LsraLocation currentLoc, regMaskTP regMask);
+
+    int buildInternalRegisterDefsForNode(GenTree* tree, LsraLocation currentLoc, RefPosition* defs[]);
+
+    void buildInternalRegisterUsesForNode(GenTree* tree, LsraLocation currentLoc, RefPosition* defs[], int total);
+
+    void resolveLocalRef(BasicBlock* block, GenTreePtr treeNode, RefPosition* currentRefPosition);
+
+    void insertMove(BasicBlock* block, GenTreePtr insertionPoint, unsigned lclNum, regNumber inReg, regNumber outReg);
+
+    void insertSwap(BasicBlock* block,
+                    GenTreePtr  insertionPoint,
+                    unsigned    lclNum1,
+                    regNumber   reg1,
+                    unsigned    lclNum2,
+                    regNumber   reg2);
+
+public:
+    // TODO-Cleanup: unused?
+    class PhysRegIntervalIterator
+    {
+    public:
+        PhysRegIntervalIterator(LinearScan* theLinearScan)
+        {
+            nextRegNumber = (regNumber)0;
+            linearScan    = theLinearScan;
+        }
+        RegRecord* GetNext()
+        {
+            return &linearScan->physRegs[nextRegNumber];
+        }
+
+    private:
+        // This assumes that the physical registers are contiguous, starting
+        // with a register number of 0
+        regNumber   nextRegNumber;
+        LinearScan* linearScan;
+    };
+
+private:
+    Interval* newInterval(RegisterType regType);
+
+    Interval* getIntervalForLocalVar(unsigned varNum)
+    {
+        return localVarIntervals[varNum];
+    }
+    RegRecord* getRegisterRecord(regNumber regNum);
+
+    RefPosition* newRefPositionRaw(LsraLocation nodeLocation, GenTree* treeNode, RefType refType);
+
+    RefPosition* newRefPosition(Interval*    theInterval,
+                                LsraLocation theLocation,
+                                RefType      theRefType,
+                                GenTree*     theTreeNode,
+                                regMaskTP    mask,
+                                unsigned     multiRegIdx = 0);
+
+    RefPosition* newRefPosition(
+        regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask);
+
+    void applyCalleeSaveHeuristics(RefPosition* rp);
+
+    void associateRefPosWithInterval(RefPosition* rp);
+
+    void associateRefPosWithRegister(RefPosition* rp);
+
+    unsigned getWeight(RefPosition* refPos);
+
+    /*****************************************************************************
+     * Register management
+     ****************************************************************************/
+    RegisterType getRegisterType(Interval* currentInterval, RefPosition* refPosition);
+    regNumber tryAllocateFreeReg(Interval* current, RefPosition* refPosition);
+    RegRecord* findBestPhysicalReg(RegisterType regType,
+                                   LsraLocation endLocation,
+                                   regMaskTP    candidates,
+                                   regMaskTP    preferences);
+    regNumber allocateBusyReg(Interval* current, RefPosition* refPosition, bool allocateIfProfitable);
+    regNumber assignCopyReg(RefPosition* refPosition);
+
+    void checkAndAssignInterval(RegRecord* regRec, Interval* interval);
+    void assignPhysReg(RegRecord* regRec, Interval* interval);
+    void assignPhysReg(regNumber reg, Interval* interval)
+    {
+        assignPhysReg(getRegisterRecord(reg), interval);
+    }
+
+    void checkAndClearInterval(RegRecord* regRec, RefPosition* spillRefPosition);
+    void unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPosition);
+    void unassignPhysRegNoSpill(RegRecord* reg);
+    void unassignPhysReg(regNumber reg)
+    {
+        unassignPhysReg(getRegisterRecord(reg), nullptr);
+    }
+
+    void spillInterval(Interval* interval, RefPosition* fromRefPosition, RefPosition* toRefPosition);
+
+    void spillGCRefs(RefPosition* killRefPosition);
+
+    /*****************************************************************************
+     * For Resolution phase
+     ****************************************************************************/
+    // TODO-Throughput: Consider refactoring this so that we keep a map from regs to vars for better scaling
+    unsigned int regMapCount;
+
+    // When we split edges, we create new blocks, and instead of expanding the VarToRegMaps, we
+    // rely on the property that the "in" map is the same as the "from" block of the edge, and the
+    // "out" map is the same as the "to" block of the edge (by construction).
+    // So, for any block whose bbNum is greater than bbNumMaxBeforeResolution, we use the
+    // splitBBNumToTargetBBNumMap.
+    // TODO-Throughput: We may want to look into the cost/benefit tradeoff of doing this vs. expanding
+    // the arrays.
+
+    unsigned bbNumMaxBeforeResolution;
+    struct SplitEdgeInfo
+    {
+        unsigned fromBBNum;
+        unsigned toBBNum;
+    };
+    typedef SimplerHashTable<unsigned, SmallPrimitiveKeyFuncs<unsigned>, SplitEdgeInfo, JitSimplerHashBehavior>
+                                SplitBBNumToTargetBBNumMap;
+    SplitBBNumToTargetBBNumMap* splitBBNumToTargetBBNumMap;
+    SplitBBNumToTargetBBNumMap* getSplitBBNumToTargetBBNumMap()
+    {
+        if (splitBBNumToTargetBBNumMap == nullptr)
+        {
+            splitBBNumToTargetBBNumMap =
+                new (getAllocator(compiler)) SplitBBNumToTargetBBNumMap(getAllocator(compiler));
+        }
+        return splitBBNumToTargetBBNumMap;
+    }
+    SplitEdgeInfo getSplitEdgeInfo(unsigned int bbNum);
+
+    void initVarRegMaps();
+    void setInVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg);
+    void setOutVarRegForBB(unsigned int bbNum, unsigned int varNum, regNumber reg);
+    VarToRegMap getInVarToRegMap(unsigned int bbNum);
+    VarToRegMap getOutVarToRegMap(unsigned int bbNum);
+    regNumber getVarReg(VarToRegMap map, unsigned int varNum);
+    // Initialize the incoming VarToRegMap to the given map values (generally a predecessor of
+    // the block)
+    VarToRegMap setInVarToRegMap(unsigned int bbNum, VarToRegMap srcVarToRegMap);
+
+    regNumber getTempRegForResolution(BasicBlock* fromBlock, BasicBlock* toBlock, var_types type);
+
+#ifdef DEBUG
+    void dumpVarToRegMap(VarToRegMap map);
+    void dumpInVarToRegMap(BasicBlock* block);
+    void dumpOutVarToRegMap(BasicBlock* block);
+
+    // There are three points at which a tuple-style dump is produced, and each
+    // differs slightly:
+    //   - In LSRA_DUMP_PRE, it does a simple dump of each node, with indications of what
+    //     tree nodes are consumed.
+    //   - In LSRA_DUMP_REFPOS, which is after the intervals are built, but before
+    //     register allocation, each node is dumped, along with all of the RefPositions,
+    //     The Intervals are identifed as Lnnn for lclVar intervals, Innn for for other
+    //     intervals, and Tnnn for internal temps.
+    //   - In LSRA_DUMP_POST, which is after register allocation, the registers are
+    //     shown.
+
+    enum LsraTupleDumpMode{LSRA_DUMP_PRE, LSRA_DUMP_REFPOS, LSRA_DUMP_POST};
+    void lsraGetOperandString(GenTreePtr        tree,
+                              LsraTupleDumpMode mode,
+                              char*             operandString,
+                              unsigned          operandStringLength);
+    void lsraDispNode(GenTreePtr tree, LsraTupleDumpMode mode, bool hasDest);
+    void DumpOperandDefs(GenTree* operand,
+                         bool& first,
+                         LsraTupleDumpMode mode,
+                         char* operandString,
+                         const unsigned operandStringLength);
+    void TupleStyleDump(LsraTupleDumpMode mode);
+
+    bool         dumpTerse;
+    LsraLocation maxNodeLocation;
+
+    // Width of various fields - used to create a streamlined dump during allocation that shows the
+    // state of all the registers in columns.
+    int regColumnWidth;
+    int regTableIndent;
+
+    const char* columnSeparator;
+    const char* line;
+    const char* leftBox;
+    const char* middleBox;
+    const char* rightBox;
+
+    static const int MAX_FORMAT_CHARS = 12;
+    char             intervalNameFormat[MAX_FORMAT_CHARS];
+    char             regNameFormat[MAX_FORMAT_CHARS];
+    char             shortRefPositionFormat[MAX_FORMAT_CHARS];
+    char             emptyRefPositionFormat[MAX_FORMAT_CHARS];
+    char             indentFormat[MAX_FORMAT_CHARS];
+    static const int MAX_LEGEND_FORMAT_CHARS = 25;
+    char             bbRefPosFormat[MAX_LEGEND_FORMAT_CHARS];
+    char             legendFormat[MAX_LEGEND_FORMAT_CHARS];
+
+    // How many rows have we printed since last printing a "title row"?
+    static const int MAX_ROWS_BETWEEN_TITLES = 50;
+    int              rowCountSinceLastTitle;
+
+    void dumpRegRecordHeader();
+    void dumpRegRecordTitle();
+    void dumpRegRecordTitleLines();
+    int  getLastUsedRegNumIndex();
+    void dumpRegRecords();
+    // An abbreviated RefPosition dump for printing with column-based register state
+    void dumpRefPositionShort(RefPosition* refPosition, BasicBlock* currentBlock);
+    // Print the number of spaces occupied by a dumpRefPositionShort()
+    void dumpEmptyRefPosition();
+    // A dump of Referent, in exactly regColumnWidth characters
+    void dumpIntervalName(Interval* interval);
+
+    // Events during the allocation phase that cause some dump output, which differs depending
+    // upon whether dumpTerse is set:
+    enum LsraDumpEvent{
+        // Conflicting def/use
+        LSRA_EVENT_DEFUSE_CONFLICT, LSRA_EVENT_DEFUSE_FIXED_DELAY_USE, LSRA_EVENT_DEFUSE_CASE1, LSRA_EVENT_DEFUSE_CASE2,
+        LSRA_EVENT_DEFUSE_CASE3, LSRA_EVENT_DEFUSE_CASE4, LSRA_EVENT_DEFUSE_CASE5, LSRA_EVENT_DEFUSE_CASE6,
+
+        // Spilling
+        LSRA_EVENT_SPILL, LSRA_EVENT_SPILL_EXTENDED_LIFETIME, LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL,
+        LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL_AFTER_SPILL, LSRA_EVENT_DONE_KILL_GC_REFS,
+
+        // Block boundaries
+        LSRA_EVENT_START_BB, LSRA_EVENT_END_BB,
+
+        // Miscellaneous
+        LSRA_EVENT_FREE_REGS,
+
+        // Characteristics of the current RefPosition
+        LSRA_EVENT_INCREMENT_RANGE_END, // ???
+        LSRA_EVENT_LAST_USE, LSRA_EVENT_LAST_USE_DELAYED, LSRA_EVENT_NEEDS_NEW_REG,
+
+        // Allocation decisions
+        LSRA_EVENT_FIXED_REG, LSRA_EVENT_EXP_USE, LSRA_EVENT_ZERO_REF, LSRA_EVENT_NO_ENTRY_REG_ALLOCATED,
+        LSRA_EVENT_KEPT_ALLOCATION, LSRA_EVENT_COPY_REG, LSRA_EVENT_MOVE_REG, LSRA_EVENT_ALLOC_REG,
+        LSRA_EVENT_ALLOC_SPILLED_REG, LSRA_EVENT_NO_REG_ALLOCATED, LSRA_EVENT_RELOAD, LSRA_EVENT_SPECIAL_PUTARG,
+        LSRA_EVENT_REUSE_REG,
+    };
+    void dumpLsraAllocationEvent(LsraDumpEvent event,
+                                 Interval*     interval     = nullptr,
+                                 regNumber     reg          = REG_NA,
+                                 BasicBlock*   currentBlock = nullptr);
+
+    void dumpBlockHeader(BasicBlock* block);
+
+    void validateIntervals();
+#endif // DEBUG
+
+    Compiler* compiler;
+
+private:
+#if MEASURE_MEM_ALLOC
+    IAllocator* lsraIAllocator;
+#endif
+
+    IAllocator* getAllocator(Compiler* comp)
+    {
+#if MEASURE_MEM_ALLOC
+        if (lsraIAllocator == nullptr)
+        {
+            lsraIAllocator = new (comp, CMK_LSRA) CompAllocator(comp, CMK_LSRA);
+        }
+        return lsraIAllocator;
+#else
+        return comp->getAllocator();
+#endif
+    }
+
+#ifdef DEBUG
+    // This is used for dumping
+    RefPosition* activeRefPosition;
+#endif // DEBUG
+
+    IntervalList intervals;
+
+    RegRecord physRegs[REG_COUNT];
+
+    Interval** localVarIntervals;
+
+    // Set of blocks that have been visited.
+    BlockSet bbVisitedSet;
+    void markBlockVisited(BasicBlock* block)
+    {
+        BlockSetOps::AddElemD(compiler, bbVisitedSet, block->bbNum);
+    }
+    void clearVisitedBlocks()
+    {
+        BlockSetOps::ClearD(compiler, bbVisitedSet);
+    }
+    bool isBlockVisited(BasicBlock* block)
+    {
+        return BlockSetOps::IsMember(compiler, bbVisitedSet, block->bbNum);
+    }
+
+    // A map from bbNum to the block information used during register allocation.
+    LsraBlockInfo* blockInfo;
+    BasicBlock* findPredBlockForLiveIn(BasicBlock* block, BasicBlock* prevBlock DEBUGARG(bool* pPredBlockIsAllocated));
+
+    // The order in which the blocks will be allocated.
+    // This is any array of BasicBlock*, in the order in which they should be traversed.
+    BasicBlock** blockSequence;
+    // The verifiedAllBBs flag indicates whether we have verified that all BBs have been
+    // included in the blockSeuqence above, during setBlockSequence().
+    bool verifiedAllBBs;
+    void setBlockSequence();
+    int compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block2, bool useBlockWeights);
+    BasicBlockList* blockSequenceWorkList;
+    bool            blockSequencingDone;
+    void addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block);
+    void removeFromBlockSequenceWorkList(BasicBlockList* listNode, BasicBlockList* prevNode);
+    BasicBlock* getNextCandidateFromWorkList();
+
+    // The bbNum of the block being currently allocated or resolved.
+    unsigned int curBBNum;
+    // The ordinal of the block we're on (i.e. this is the curBBSeqNum-th block we've allocated).
+    unsigned int curBBSeqNum;
+    // The number of blocks that we've sequenced.
+    unsigned int bbSeqCount;
+    // The Location of the start of the current block.
+    LsraLocation curBBStartLocation;
+
+    // Ordered list of RefPositions
+    RefPositionList refPositions;
+
+    // Per-block variable location mappings: an array indexed by block number that yields a
+    // pointer to an array of regNumber, one per variable.
+    VarToRegMap* inVarToRegMaps;
+    VarToRegMap* outVarToRegMaps;
+
+    // A temporary VarToRegMap used during the resolution of critical edges.
+    VarToRegMap sharedCriticalVarToRegMap;
+
+    PhasedVar<regMaskTP> availableIntRegs;
+    PhasedVar<regMaskTP> availableFloatRegs;
+    PhasedVar<regMaskTP> availableDoubleRegs;
+
+    // Current set of live tracked vars, used during building of RefPositions to determine whether
+    // to preference to callee-save
+    VARSET_TP currentLiveVars;
+    // Set of floating point variables to consider for callee-save registers.
+    VARSET_TP fpCalleeSaveCandidateVars;
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+#if defined(_TARGET_AMD64_)
+    static const var_types LargeVectorType     = TYP_SIMD32;
+    static const var_types LargeVectorSaveType = TYP_SIMD16;
+#elif defined(_TARGET_ARM64_)
+    static const var_types LargeVectorType      = TYP_SIMD16;
+    static const var_types LargeVectorSaveType  = TYP_DOUBLE;
+#else // !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_)
+#error("Unknown target architecture for FEATURE_SIMD")
+#endif // !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_)
+
+    // Set of large vector (TYP_SIMD32 on AVX) variables.
+    VARSET_TP largeVectorVars;
+    // Set of large vector (TYP_SIMD32 on AVX) variables to consider for callee-save registers.
+    VARSET_TP largeVectorCalleeSaveCandidateVars;
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+};
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                           Interval                                        XX
+XX                                                                           XX
+XX This is the fundamental data structure for linear scan register           XX
+XX allocation.  It represents the live range(s) for a variable or temp.      XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+class Interval : public Referenceable
+{
+public:
+    Interval(RegisterType registerType, regMaskTP registerPreferences)
+        : registerPreferences(registerPreferences)
+        , relatedInterval(nullptr)
+        , assignedReg(nullptr)
+        , registerType(registerType)
+        , isLocalVar(false)
+        , isSplit(false)
+        , isSpilled(false)
+        , isInternal(false)
+        , isStructField(false)
+        , isPromotedStruct(false)
+        , hasConflictingDefUse(false)
+        , hasNonCommutativeRMWDef(false)
+        , isSpecialPutArg(false)
+        , preferCalleeSave(false)
+        , isConstant(false)
+        , physReg(REG_COUNT)
+#ifdef DEBUG
+        , intervalIndex(0)
+#endif
+        , varNum(0)
+    {
+    }
+
+#ifdef DEBUG
+    // print out representation
+    void dump();
+    // concise representation for embedding
+    void tinyDump();
+    // extremely concise representation
+    void microDump();
+#endif // DEBUG
+
+    void setLocalNumber(unsigned localNum, LinearScan* l);
+
+    // Fixed registers for which this Interval has a preference
+    regMaskTP registerPreferences;
+
+    // The relatedInterval is:
+    //  - for any other interval, it is the interval to which this interval
+    //    is currently preferenced (e.g. because they are related by a copy)
+    Interval* relatedInterval;
+
+    // The assignedReg is the RecRecord for the register to which this interval
+    // has been assigned at some point - if the interval is active, this is the
+    // register it currently occupies.
+    RegRecord* assignedReg;
+
+    // DECIDE : put this in a union or do something w/ inheritance?
+    // this is an interval for a physical register, not a allocatable entity
+
+    RegisterType registerType;
+    bool         isLocalVar : 1;
+    // Indicates whether this interval has been assigned to different registers
+    bool isSplit : 1;
+    // Indicates whether this interval is ever spilled
+    bool isSpilled : 1;
+    // indicates an interval representing the internal requirements for
+    // generating code for a node (temp registers internal to the node)
+    // Note that this interval may live beyond a node in the GT_ARR_LENREF/GT_IND
+    // case (though never lives beyond a stmt)
+    bool isInternal : 1;
+    // true if this is a LocalVar for a struct field
+    bool isStructField : 1;
+    // true iff this is a GT_LDOBJ for a fully promoted (PROMOTION_TYPE_INDEPENDENT) struct
+    bool isPromotedStruct : 1;
+    // true if this is an SDSU interval for which the def and use have conflicting register
+    // requirements
+    bool hasConflictingDefUse : 1;
+    // true if this interval is defined by a non-commutative 2-operand instruction
+    bool hasNonCommutativeRMWDef : 1;
+
+    // True if this interval is defined by a putArg, whose source is a non-last-use lclVar.
+    // During allocation, this flag will be cleared if the source is not already in the required register.
+    // Othewise, we will leave the register allocated to the lclVar, but mark the RegRecord as
+    // isBusyUntilNextKill, so that it won't be reused if the lclVar goes dead before the call.
+    bool isSpecialPutArg : 1;
+
+    // True if this interval interferes with a call.
+    bool preferCalleeSave : 1;
+
+    // True if this interval is defined by a constant node that may be reused and/or may be
+    // able to reuse a constant that's already in a register.
+    bool isConstant : 1;
+
+    // The register to which it is currently assigned.
+    regNumber physReg;
+
+#ifdef DEBUG
+    unsigned int intervalIndex;
+#endif // DEBUG
+
+    unsigned int varNum; // This is the "variable number": the index into the lvaTable array
+
+    LclVarDsc* getLocalVar(Compiler* comp)
+    {
+        assert(isLocalVar);
+        return &(comp->lvaTable[this->varNum]);
+    }
+
+    // Get the local tracked variable "index" (lvVarIndex), used in bitmasks.
+    unsigned getVarIndex(Compiler* comp)
+    {
+        LclVarDsc* varDsc = getLocalVar(comp);
+        assert(varDsc->lvTracked); // If this isn't true, we shouldn't be calling this function!
+        return varDsc->lvVarIndex;
+    }
+
+    bool isAssignedTo(regNumber regNum)
+    {
+        // This uses regMasks to handle the case where a double actually occupies two registers
+        // TODO-Throughput: This could/should be done more cheaply.
+        return (physReg != REG_NA && (genRegMask(physReg, registerType) & genRegMask(regNum)) != RBM_NONE);
+    }
+
+    // Assign the related interval.
+    void assignRelatedInterval(Interval* newRelatedInterval)
+    {
+#ifdef DEBUG
+        if (VERBOSE)
+        {
+            printf("Assigning related ");
+            newRelatedInterval->microDump();
+            printf(" to ");
+            this->microDump();
+            printf("\n");
+        }
+#endif // DEBUG
+        relatedInterval = newRelatedInterval;
+    }
+
+    // Assign the related interval, but only if it isn't already assigned.
+    void assignRelatedIntervalIfUnassigned(Interval* newRelatedInterval)
+    {
+        if (relatedInterval == nullptr)
+        {
+            assignRelatedInterval(newRelatedInterval);
+        }
+        else
+        {
+#ifdef DEBUG
+            if (VERBOSE)
+            {
+                printf("Interval ");
+                this->microDump();
+                printf(" already has a related interval\n");
+            }
+#endif // DEBUG
+        }
+    }
+
+    // Update the registerPreferences on the interval.
+    // If there are conflicting requirements on this interval, set the preferences to
+    // the union of them.  That way maybe we'll get at least one of them.
+    // An exception is made in the case where one of the existing or new
+    // preferences are all callee-save, in which case we "prefer" the callee-save
+
+    void updateRegisterPreferences(regMaskTP preferences)
+    {
+        // We require registerPreferences to have been initialized.
+        assert(registerPreferences != RBM_NONE);
+        // It is invalid to update with empty preferences
+        assert(preferences != RBM_NONE);
+
+        regMaskTP commonPreferences = (registerPreferences & preferences);
+        if (commonPreferences != RBM_NONE)
+        {
+            registerPreferences = commonPreferences;
+            return;
+        }
+
+        // There are no preferences in common.
+        // Preferences need to reflect both cases where a var must occupy a specific register,
+        // as well as cases where a var is live when a register is killed.
+        // In the former case, we would like to record all such registers, however we don't
+        // really want to use any registers that will interfere.
+        // To approximate this, we never "or" together multi-reg sets, which are generally kill sets.
+
+        if (!genMaxOneBit(preferences))
+        {
+            // The new preference value is a multi-reg set, so it's probably a kill.
+            // Keep the new value.
+            registerPreferences = preferences;
+            return;
+        }
+
+        if (!genMaxOneBit(registerPreferences))
+        {
+            // The old preference value is a multi-reg set.
+            // Keep the existing preference set, as it probably reflects one or more kills.
+            // It may have been a union of multiple individual registers, but we can't
+            // distinguish that case without extra cost.
+            return;
+        }
+
+        // If we reach here, we have two disjoint single-reg sets.
+        // Keep only the callee-save preferences, if not empty.
+        // Otherwise, take the union of the preferences.
+
+        regMaskTP newPreferences = registerPreferences | preferences;
+
+        if (preferCalleeSave)
+        {
+            regMaskTP calleeSaveMask = (calleeSaveRegs(this->registerType) & (newPreferences));
+            if (calleeSaveMask != RBM_NONE)
+            {
+                newPreferences = calleeSaveMask;
+            }
+        }
+        registerPreferences = newPreferences;
+    }
+};
+
+class RefPosition
+{
+public:
+    RefPosition(unsigned int bbNum, LsraLocation nodeLocation, GenTree* treeNode, RefType refType)
+        : referent(nullptr)
+        , nextRefPosition(nullptr)
+        , treeNode(treeNode)
+        , bbNum(bbNum)
+        , nodeLocation(nodeLocation)
+        , registerAssignment(RBM_NONE)
+        , refType(refType)
+        , multiRegIdx(0)
+        , lastUse(false)
+        , reload(false)
+        , spillAfter(false)
+        , copyReg(false)
+        , moveReg(false)
+        , isPhysRegRef(false)
+        , isFixedRegRef(false)
+        , isLocalDefUse(false)
+        , delayRegFree(false)
+        , outOfOrder(false)
+#ifdef DEBUG
+        , rpNum(0)
+#endif
+    {
+    }
+
+    // A RefPosition refers to either an Interval or a RegRecord. 'referent' points to one
+    // of these types. If it refers to a RegRecord, then 'isPhysRegRef' is true. If it
+    // refers to an Interval, then 'isPhysRegRef' is false.
+    //
+    // Q: can 'referent' be NULL?
+
+    Referenceable* referent;
+
+    Interval* getInterval()
+    {
+        assert(!isPhysRegRef);
+        return (Interval*)referent;
+    }
+    void setInterval(Interval* i)
+    {
+        referent     = i;
+        isPhysRegRef = false;
+    }
+
+    RegRecord* getReg()
+    {
+        assert(isPhysRegRef);
+        return (RegRecord*)referent;
+    }
+    void setReg(RegRecord* r)
+    {
+        referent           = r;
+        isPhysRegRef       = true;
+        registerAssignment = genRegMask(r->regNum);
+    }
+
+    // nextRefPosition is the next in code order.
+    // Note that in either case there is no need for these to be doubly linked, as they
+    // are only traversed in the forward direction, and are not moved.
+    RefPosition* nextRefPosition;
+
+    // The remaining fields are common to both options
+    GenTree*     treeNode;
+    unsigned int bbNum;
+
+    // Prior to the allocation pass, registerAssignment captures the valid registers
+    // for this RefPosition. An empty set means that any register is valid.  A non-empty
+    // set means that it must be one of the given registers (may be the full set if the
+    // only constraint is that it must reside in SOME register)
+    // After the allocation pass, this contains the actual assignment
+    LsraLocation nodeLocation;
+    regMaskTP    registerAssignment;
+
+    regNumber assignedReg()
+    {
+        if (registerAssignment == RBM_NONE)
+        {
+            return REG_NA;
+        }
+
+        return genRegNumFromMask(registerAssignment);
+    }
+
+    RefType refType;
+
+    // Returns true if it is a reference on a gentree node.
+    bool IsActualRef()
+    {
+        return (refType == RefTypeDef || refType == RefTypeUse);
+    }
+
+    bool RequiresRegister()
+    {
+        return (IsActualRef()
+#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+                || refType == RefTypeUpperVectorSaveDef || refType == RefTypeUpperVectorSaveUse
+#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
+                ) &&
+               !AllocateIfProfitable();
+    }
+
+    // Indicates whether this ref position is to be allocated
+    // a reg only if profitable. Currently these are the
+    // ref positions that lower/codegen has indicated as reg
+    // optional and is considered a contained memory operand if
+    // no reg is allocated.
+    unsigned allocRegIfProfitable : 1;
+
+    void setAllocateIfProfitable(unsigned val)
+    {
+        allocRegIfProfitable = val;
+    }
+
+    // Returns true whether this ref position is to be allocated
+    // a reg only if it is profitable.
+    bool AllocateIfProfitable()
+    {
+        // TODO-CQ: Right now if a ref position is marked as
+        // copyreg or movereg, then it is not treated as
+        // 'allocate if profitable'. This is an implementation
+        // limitation that needs to be addressed.
+        return allocRegIfProfitable && !copyReg && !moveReg;
+    }
+
+    // Used by RefTypeDef/Use positions of a multi-reg call node.
+    // Indicates the position of the register that this ref position refers to.
+    // The max bits needed is based on max value of MAX_RET_REG_COUNT value
+    // across all targets and that happens 4 on on Arm.  Hence index value
+    // would be 0..MAX_RET_REG_COUNT-1.
+    unsigned multiRegIdx : 2;
+
+    void setMultiRegIdx(unsigned idx)
+    {
+        multiRegIdx = idx;
+        assert(multiRegIdx == idx);
+    }
+
+    unsigned getMultiRegIdx()
+    {
+        return multiRegIdx;
+    }
+
+    // Last Use - this may be true for multiple RefPositions in the same Interval
+    bool lastUse : 1;
+
+    // Spill and Copy info
+    //   reload indicates that the value was spilled, and must be reloaded here.
+    //   spillAfter indicates that the value is spilled here, so a spill must be added.
+    //   copyReg indicates that the value needs to be copied to a specific register,
+    //      but that it will also retain its current assigned register.
+    //   moveReg indicates that the value needs to be moved to a different register,
+    //      and that this will be its new assigned register.
+    // A RefPosition may have any flag individually or the following combinations:
+    //  - reload and spillAfter (i.e. it remains in memory), but not in combination with copyReg or moveReg
+    //    (reload cannot exist with copyReg or moveReg; it should be reloaded into the appropriate reg)
+    //  - spillAfter and copyReg (i.e. it must be copied to a new reg for use, but is then spilled)
+    //  - spillAfter and moveReg (i.e. it most be both spilled and moved)
+    //    NOTE: a moveReg involves an explicit move, and would usually not be needed for a fixed Reg if it is going
+    //    to be spilled, because the code generator will do the move to the fixed register, and doesn't need to
+    //    record the new register location as the new "home" location of the lclVar. However, if there is a conflicting
+    //    use at the same location (e.g. lclVar V1 is in rdx and needs to be in rcx, but V2 needs to be in rdx), then
+    //    we need an explicit move.
+    //  - copyReg and moveReg must not exist with each other.
+
+    bool reload : 1;
+    bool spillAfter : 1;
+    bool copyReg : 1;
+    bool moveReg : 1; // true if this var is moved to a new register
+
+    bool isPhysRegRef : 1; // true if 'referent' points of a RegRecord, false if it points to an Interval
+    bool isFixedRegRef : 1;
+    bool isLocalDefUse : 1;
+
+    // delayRegFree indicates that the register should not be freed right away, but instead wait
+    // until the next Location after it would normally be freed.  This is used for the case of
+    // non-commutative binary operators, where op2 must not be assigned the same register as
+    // the target.  We do this by not freeing it until after the target has been defined.
+    // Another option would be to actually change the Location of the op2 use until the same
+    // Location as the def, but then it could potentially reuse a register that has been freed
+    // from the other source(s), e.g. if it's a lastUse or spilled.
+    bool delayRegFree : 1;
+
+    // outOfOrder is marked on a (non-def) RefPosition that doesn't follow a definition of the
+    // register currently assigned to the Interval.  This happens when we use the assigned
+    // register from a predecessor that is not the most recently allocated BasicBlock.
+    bool outOfOrder : 1;
+
+    LsraLocation getRefEndLocation()
+    {
+        return delayRegFree ? nodeLocation + 1 : nodeLocation;
+    }
+
+#ifdef DEBUG
+    unsigned rpNum; // The unique RefPosition number, equal to its index in the refPositions list. Only used for
+                    // debugging dumps.
+#endif              // DEBUG
+
+    bool isIntervalRef()
+    {
+        return (!isPhysRegRef && (referent != nullptr));
+    }
+
+    // isTrueDef indicates that the RefPosition is a non-update def of a non-internal
+    // interval
+    bool isTrueDef()
+    {
+        return (refType == RefTypeDef && isIntervalRef() && !getInterval()->isInternal);
+    }
+
+    // isFixedRefOfRegMask indicates that the RefPosition has a fixed assignment to the register
+    // specified by the given mask
+    bool isFixedRefOfRegMask(regMaskTP regMask)
+    {
+        assert(genMaxOneBit(regMask));
+        return (registerAssignment == regMask);
+    }
+
+    // isFixedRefOfReg indicates that the RefPosition has a fixed assignment to the given register
+    bool isFixedRefOfReg(regNumber regNum)
+    {
+        return (isFixedRefOfRegMask(genRegMask(regNum)));
+    }
+
+#ifdef DEBUG
+    // operator= copies everything except 'rpNum', which must remain unique
+    RefPosition& operator=(const RefPosition& rp)
+    {
+        unsigned rpNumSave = rpNum;
+        memcpy(this, &rp, sizeof(rp));
+        rpNum = rpNumSave;
+        return *this;
+    }
+
+    void dump();
+#endif // DEBUG
+};
+
+#ifdef DEBUG
+void dumpRegMask(regMaskTP regs);
+#endif // DEBUG
+
+/*****************************************************************************/
+#endif //_LSRA_H_
+/*****************************************************************************/
diff --git a/src/jit/lsra_reftypes.h b/src/jit/lsra_reftypes.h
new file mode 100644
index 0000000000..841b78c881
--- /dev/null
+++ b/src/jit/lsra_reftypes.h
@@ -0,0 +1,23 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// clang-format off
+//  memberName - enum member name
+//  memberValue - enum member value
+//  shortName - short name string
+//  DEF_REFTYPE(memberName               , memberValue        , shortName )
+    DEF_REFTYPE(RefTypeInvalid           , 0x00               , "Invl"    )
+    DEF_REFTYPE(RefTypeDef               , 0x01               , "Def "    )
+    DEF_REFTYPE(RefTypeUse               , 0x02               , "Use "    )
+    DEF_REFTYPE(RefTypeKill              , 0x04               , "Kill"    )
+    DEF_REFTYPE(RefTypeBB                , 0x08               , "BB  "    )
+    DEF_REFTYPE(RefTypeFixedReg          , 0x10               , "Fixd"    )
+    DEF_REFTYPE(RefTypeExpUse            , (0x20 | RefTypeUse), "ExpU"    )
+    DEF_REFTYPE(RefTypeParamDef          , (0x10 | RefTypeDef), "Parm"    )
+    DEF_REFTYPE(RefTypeDummyDef          , (0x20 | RefTypeDef), "DDef"    )
+    DEF_REFTYPE(RefTypeZeroInit          , (0x30 | RefTypeDef), "Zero"    )
+    DEF_REFTYPE(RefTypeUpperVectorSaveDef, (0x40 | RefTypeDef), "UVSv"    )
+    DEF_REFTYPE(RefTypeUpperVectorSaveUse, (0x40 | RefTypeUse), "UVRs"    )
+    DEF_REFTYPE(RefTypeKillGCRefs        , 0x80               , "KlGC"    )
+// clang-format on
diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp
new file mode 100644
index 0000000000..00df17baa0
--- /dev/null
+++ b/src/jit/morph.cpp
@@ -0,0 +1,18245 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                          Morph                                            XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "allocacheck.h" // for alloca
+
+// Convert the given node into a call to the specified helper passing
+// the given argument list.
+//
+// Tries to fold constants and also adds an edge for overflow exception
+// returns the morphed tree
+GenTreePtr Compiler::fgMorphCastIntoHelper(GenTreePtr tree, int helper, GenTreePtr oper)
+{
+    GenTree* result;
+
+    /* If the operand is a constant, we'll try to fold it */
+    if (oper->OperIsConst())
+    {
+        GenTreePtr oldTree = tree;
+
+        tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)
+
+        if (tree != oldTree)
+        {
+            return fgMorphTree(tree);
+        }
+        else if (tree->OperKind() & GTK_CONST)
+        {
+            return fgMorphConst(tree);
+        }
+
+        // assert that oper is unchanged and that it is still a GT_CAST node
+        noway_assert(tree->gtCast.CastOp() == oper);
+        noway_assert(tree->gtOper == GT_CAST);
+    }
+    result = fgMorphIntoHelperCall(tree, helper, gtNewArgList(oper));
+    assert(result == tree);
+    return result;
+}
+
+/*****************************************************************************
+ *
+ *  Convert the given node into a call to the specified helper passing
+ *  the given argument list.
+ */
+
+GenTreePtr Compiler::fgMorphIntoHelperCall(GenTreePtr tree, int helper, GenTreeArgList* args)
+{
+    tree->ChangeOper(GT_CALL);
+
+    tree->gtFlags |= GTF_CALL;
+    if (args)
+    {
+        tree->gtFlags |= (args->gtFlags & GTF_ALL_EFFECT);
+    }
+    tree->gtCall.gtCallType            = CT_HELPER;
+    tree->gtCall.gtCallMethHnd         = eeFindHelper(helper);
+    tree->gtCall.gtCallArgs            = args;
+    tree->gtCall.gtCallObjp            = nullptr;
+    tree->gtCall.gtCallLateArgs        = nullptr;
+    tree->gtCall.fgArgInfo             = nullptr;
+    tree->gtCall.gtRetClsHnd           = nullptr;
+    tree->gtCall.gtCallMoreFlags       = 0;
+    tree->gtCall.gtInlineCandidateInfo = nullptr;
+    tree->gtCall.gtControlExpr         = nullptr;
+
+#ifdef LEGACY_BACKEND
+    tree->gtCall.gtCallRegUsedMask = RBM_NONE;
+#endif // LEGACY_BACKEND
+
+#if DEBUG
+    // Helper calls are never candidates.
+
+    tree->gtCall.gtInlineObservation = InlineObservation::CALLSITE_IS_CALL_TO_HELPER;
+#endif // DEBUG
+
+#ifdef FEATURE_READYTORUN_COMPILER
+    tree->gtCall.gtEntryPoint.addr = nullptr;
+#endif
+
+#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+    if (varTypeIsLong(tree))
+    {
+        GenTreeCall*    callNode    = tree->AsCall();
+        ReturnTypeDesc* retTypeDesc = callNode->GetReturnTypeDesc();
+        retTypeDesc->Reset();
+        retTypeDesc->InitializeLongReturnType(this);
+        callNode->ClearOtherRegs();
+    }
+#endif
+
+    /* Perform the morphing */
+
+    tree = fgMorphArgs(tree->AsCall());
+
+    return tree;
+}
+
+/*****************************************************************************
+ *
+ *  Determine if a relop must be morphed to a qmark to manifest a boolean value.
+ *  This is done when code generation can't create straight-line code to do it.
+ */
+bool Compiler::fgMorphRelopToQmark(GenTreePtr tree)
+{
+#ifndef LEGACY_BACKEND
+    return false;
+#else  // LEGACY_BACKEND
+    return (genActualType(tree->TypeGet()) == TYP_LONG) || varTypeIsFloating(tree->TypeGet());
+#endif // LEGACY_BACKEND
+}
+
+/*****************************************************************************
+ *
+ *  Morph a cast node (we perform some very simple transformations here).
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+GenTreePtr Compiler::fgMorphCast(GenTreePtr tree)
+{
+    noway_assert(tree->gtOper == GT_CAST);
+    noway_assert(genTypeSize(TYP_I_IMPL) == sizeof(void*));
+
+    /* The first sub-operand is the thing being cast */
+
+    GenTreePtr oper    = tree->gtCast.CastOp();
+    var_types  srcType = genActualType(oper->TypeGet());
+    unsigned   srcSize;
+
+    var_types dstType = tree->CastToType();
+    unsigned  dstSize = genTypeSize(dstType);
+
+    // See if the cast has to be done in two steps.  R -> I
+    if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType))
+    {
+        // Only x86 must go through TYP_DOUBLE to get to all
+        // integral types everybody else can get straight there
+        // except for when using helpers
+        if (srcType == TYP_FLOAT
+#if !FEATURE_STACK_FP_X87
+
+#if defined(_TARGET_ARM64_)
+            // Amd64: src = float, dst is overflow conversion.
+            // This goes through helper and hence src needs to be converted to double.
+            && tree->gtOverflow()
+#elif defined(_TARGET_AMD64_)
+            // Amd64: src = float, dst = uint64 or overflow conversion.
+            // This goes through helper and hence src needs to be converted to double.
+            && (tree->gtOverflow() || (dstType == TYP_ULONG))
+#elif defined(_TARGET_ARM_)
+            // Arm: src = float, dst = int64/uint64 or overflow conversion.
+            && (tree->gtOverflow() || varTypeIsLong(dstType))
+#endif
+
+#endif // FEATURE_STACK_FP_X87
+                )
+        {
+            oper = gtNewCastNode(TYP_DOUBLE, oper, TYP_DOUBLE);
+        }
+
+        // do we need to do it in two steps R -> I, '-> smallType
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_ARM64_) || defined(_TARGET_AMD64_)
+        if (dstSize < genTypeSize(TYP_INT))
+        {
+            oper = gtNewCastNodeL(TYP_INT, oper, TYP_INT);
+            oper->gtFlags |= (tree->gtFlags & (GTF_UNSIGNED | GTF_OVERFLOW | GTF_EXCEPT));
+            tree->gtFlags &= ~GTF_UNSIGNED;
+        }
+#else
+        if (dstSize < sizeof(void*))
+        {
+            oper = gtNewCastNodeL(TYP_I_IMPL, oper, TYP_I_IMPL);
+            oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
+        }
+#endif
+        else
+        {
+            /* Note that if we need to use a helper call then we can not morph oper */
+            if (!tree->gtOverflow())
+            {
+#ifdef _TARGET_ARM64_ // On ARM64 All non-overflow checking conversions can be optimized
+                goto OPTIMIZECAST;
+#else
+                switch (dstType)
+                {
+                    case TYP_INT:
+#ifdef _TARGET_X86_ // there is no rounding convert to integer instruction on ARM or x64 so skip this
+                        if ((oper->gtOper == GT_INTRINSIC) &&
+                            (oper->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round))
+                        {
+                            /* optimization: conv.i4(round.d(d)) -> round.i(d) */
+                            oper->gtType = dstType;
+                            return fgMorphTree(oper);
+                        }
+                        // if SSE2 is not enabled, we need the helper
+                        else if (!opts.compCanUseSSE2)
+                        {
+                            return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper);
+                        }
+                        else
+#endif // _TARGET_X86_
+                        {
+                            goto OPTIMIZECAST;
+                        }
+#if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
+                    case TYP_UINT:
+                        goto OPTIMIZECAST;
+#else  // _TARGET_ARM_
+                    case TYP_UINT:
+                        return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper);
+#endif // _TARGET_ARM_
+
+#ifdef _TARGET_AMD64_
+                    // SSE2 has instructions to convert a float/double directly to a long
+                    case TYP_LONG:
+                        goto OPTIMIZECAST;
+#else
+                    case TYP_LONG:
+                        return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper);
+#endif //_TARGET_AMD64_
+                    case TYP_ULONG:
+                        return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper);
+                    default:
+                        break;
+                }
+#endif // _TARGET_ARM64_
+            }
+            else
+            {
+                switch (dstType)
+                {
+                    case TYP_INT:
+                        return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT_OVF, oper);
+                    case TYP_UINT:
+                        return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT_OVF, oper);
+                    case TYP_LONG:
+                        return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG_OVF, oper);
+                    case TYP_ULONG:
+                        return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG_OVF, oper);
+                    default:
+                        break;
+                }
+            }
+            noway_assert(!"Unexpected dstType");
+        }
+    }
+#ifndef _TARGET_64BIT_
+    // The code generation phase (for x86 & ARM32) does not handle casts
+    // directly from [u]long to anything other than [u]int. Insert an
+    // intermediate cast to native int.
+    else if (varTypeIsLong(srcType) && varTypeIsSmall(dstType))
+    {
+        oper = gtNewCastNode(TYP_I_IMPL, oper, TYP_I_IMPL);
+        oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
+        tree->gtFlags &= ~GTF_UNSIGNED;
+    }
+#endif //!_TARGET_64BIT_
+
+#ifdef _TARGET_ARM_
+    else if ((dstType == TYP_FLOAT) && (srcType == TYP_DOUBLE) && (oper->gtOper == GT_CAST) &&
+             !varTypeIsLong(oper->gtCast.CastOp()))
+    {
+        // optimization: conv.r4(conv.r8(?)) -> conv.r4(d)
+        // except when the ultimate source is a long because there is no long-to-float helper, so it must be 2 step.
+        // This happens semi-frequently because there is no IL 'conv.r4.un'
+        oper->gtType       = TYP_FLOAT;
+        oper->CastToType() = TYP_FLOAT;
+        return fgMorphTree(oper);
+    }
+    // converts long/ulong --> float/double casts into helper calls.
+    else if (varTypeIsFloating(dstType) && varTypeIsLong(srcType))
+    {
+        if (dstType == TYP_FLOAT)
+        {
+            // there is only a double helper, so we
+            // - change the dsttype to double
+            // - insert a cast from double to float
+            // - recurse into the resulting tree
+            tree->CastToType() = TYP_DOUBLE;
+            tree->gtType       = TYP_DOUBLE;
+
+            tree = gtNewCastNode(TYP_FLOAT, tree, TYP_FLOAT);
+
+            return fgMorphTree(tree);
+        }
+        if (tree->gtFlags & GTF_UNSIGNED)
+            return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
+        return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
+    }
+#endif //_TARGET_ARM_
+
+#ifdef _TARGET_AMD64_
+    // Do we have to do two step U4/8 -> R4/8 ?
+    // Codegen supports the following conversion as one-step operation
+    // a) Long -> R4/R8
+    // b) U8 -> R8
+    //
+    // The following conversions are performed as two-step operations using above.
+    // U4 -> R4/8 = U4-> Long -> R4/8
+    // U8 -> R4   = U8 -> R8 -> R4
+    else if ((tree->gtFlags & GTF_UNSIGNED) && varTypeIsFloating(dstType))
+    {
+        srcType = genUnsignedType(srcType);
+
+        if (srcType == TYP_ULONG)
+        {
+            if (dstType == TYP_FLOAT)
+            {
+                // Codegen can handle U8 -> R8 conversion.
+                // U8 -> R4 =  U8 -> R8 -> R4
+                // - change the dsttype to double
+                // - insert a cast from double to float
+                // - recurse into the resulting tree
+                tree->CastToType() = TYP_DOUBLE;
+                tree->gtType       = TYP_DOUBLE;
+                tree               = gtNewCastNode(TYP_FLOAT, tree, TYP_FLOAT);
+                return fgMorphTree(tree);
+            }
+        }
+        else if (srcType == TYP_UINT)
+        {
+            oper = gtNewCastNode(TYP_LONG, oper, TYP_LONG);
+            oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
+            tree->gtFlags &= ~GTF_UNSIGNED;
+        }
+    }
+#endif // _TARGET_AMD64_
+
+#ifdef _TARGET_X86_
+    // Do we have to do two step U4/8 -> R4/8 ?
+    else if ((tree->gtFlags & GTF_UNSIGNED) && varTypeIsFloating(dstType))
+    {
+        srcType = genUnsignedType(srcType);
+
+        if (srcType == TYP_ULONG)
+        {
+            return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
+        }
+        else if (srcType == TYP_UINT)
+        {
+            oper = gtNewCastNode(TYP_LONG, oper, TYP_LONG);
+            oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
+            tree->gtFlags &= ~GTF_UNSIGNED;
+        }
+    }
+#endif //_TARGET_XARCH_
+    else if (varTypeIsGC(srcType) != varTypeIsGC(dstType))
+    {
+        // We are casting away GC information.  we would like to just
+        // change the type to int, however this gives the emitter fits because
+        // it believes the variable is a GC variable at the begining of the
+        // instruction group, but is not turned non-gc by the code generator
+        // we fix this by copying the GC pointer to a non-gc pointer temp.
+        noway_assert(!varTypeIsGC(dstType) && "How can we have a cast to a GCRef here?");
+
+        // We generate an assignment to an int and then do the cast from an int. With this we avoid
+        // the gc problem and we allow casts to bytes, longs,  etc...
+        unsigned lclNum = lvaGrabTemp(true DEBUGARG("Cast away GC"));
+        oper->gtType    = TYP_I_IMPL;
+        GenTreePtr asg  = gtNewTempAssign(lclNum, oper);
+        oper->gtType    = srcType;
+
+        // do the real cast
+        GenTreePtr cast = gtNewCastNode(tree->TypeGet(), gtNewLclvNode(lclNum, TYP_I_IMPL), dstType);
+
+        // Generate the comma tree
+        oper = gtNewOperNode(GT_COMMA, tree->TypeGet(), asg, cast);
+
+        return fgMorphTree(oper);
+    }
+
+    // Look for narrowing casts ([u]long -> [u]int) and try to push them
+    // down into the operand before morphing it.
+    //
+    // It doesn't matter if this is cast is from ulong or long (i.e. if
+    // GTF_UNSIGNED is set) because the transformation is only applied to
+    // overflow-insensitive narrowing casts, which always silently truncate.
+    //
+    // Note that casts from [u]long to small integer types are handled above.
+    if ((srcType == TYP_LONG) && ((dstType == TYP_INT) || (dstType == TYP_UINT)))
+    {
+        // As a special case, look for overflow-sensitive casts of an AND
+        // expression, and see if the second operand is a small constant. Since
+        // the result of an AND is bound by its smaller operand, it may be
+        // possible to prove that the cast won't overflow, which will in turn
+        // allow the cast's operand to be transformed.
+        if (tree->gtOverflow() && (oper->OperGet() == GT_AND))
+        {
+            GenTreePtr andOp2 = oper->gtOp.gtOp2;
+
+            // Special case to the special case: AND with a casted int.
+            if ((andOp2->OperGet() == GT_CAST) && (andOp2->gtCast.CastOp()->OperGet() == GT_CNS_INT))
+            {
+                // gtFoldExprConst will deal with whether the cast is signed or
+                // unsigned, or overflow-sensitive.
+                andOp2 = oper->gtOp.gtOp2 = gtFoldExprConst(andOp2);
+            }
+
+            // Look for a constant less than 2^{32} for a cast to uint, or less
+            // than 2^{31} for a cast to int.
+            int maxWidth = (dstType == TYP_UINT) ? 32 : 31;
+
+            if ((andOp2->OperGet() == GT_CNS_NATIVELONG) && ((andOp2->gtIntConCommon.LngValue() >> maxWidth) == 0))
+            {
+                // This cast can't overflow.
+                tree->gtFlags &= ~(GTF_OVERFLOW | GTF_EXCEPT);
+            }
+        }
+
+        // Only apply this transformation during global morph,
+        // when neither the cast node nor the oper node may throw an exception
+        // based on the upper 32 bits.
+        //
+        if (fgGlobalMorph && !tree->gtOverflow() && !oper->gtOverflowEx())
+        {
+            // For these operations the lower 32 bits of the result only depends
+            // upon the lower 32 bits of the operands
+            //
+            if ((oper->OperGet() == GT_ADD) || (oper->OperGet() == GT_MUL) || (oper->OperGet() == GT_AND) ||
+                (oper->OperGet() == GT_OR) || (oper->OperGet() == GT_XOR))
+            {
+                DEBUG_DESTROY_NODE(tree);
+
+                // Insert narrowing casts for op1 and op2
+                oper->gtOp.gtOp1 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp1, dstType);
+                oper->gtOp.gtOp2 = gtNewCastNode(TYP_INT, oper->gtOp.gtOp2, dstType);
+
+                // Clear the GT_MUL_64RSLT if it is set
+                if (oper->gtOper == GT_MUL && (oper->gtFlags & GTF_MUL_64RSLT))
+                {
+                    oper->gtFlags &= ~GTF_MUL_64RSLT;
+                }
+
+                // The operation now produces a 32-bit result.
+                oper->gtType = TYP_INT;
+
+                // Remorph the new tree as the casts that we added may be folded away.
+                return fgMorphTree(oper);
+            }
+        }
+    }
+
+OPTIMIZECAST:
+    noway_assert(tree->gtOper == GT_CAST);
+
+    /* Morph the operand */
+    tree->gtCast.CastOp() = oper = fgMorphTree(oper);
+
+    /* Reset the call flag */
+    tree->gtFlags &= ~GTF_CALL;
+
+    /* unless we have an overflow cast, reset the except flag */
+    if (!tree->gtOverflow())
+    {
+        tree->gtFlags &= ~GTF_EXCEPT;
+    }
+
+    /* Just in case new side effects were introduced */
+    tree->gtFlags |= (oper->gtFlags & GTF_ALL_EFFECT);
+
+    srcType = oper->TypeGet();
+
+    /* if GTF_UNSIGNED is set then force srcType to an unsigned type */
+    if (tree->gtFlags & GTF_UNSIGNED)
+    {
+        srcType = genUnsignedType(srcType);
+    }
+
+    srcSize = genTypeSize(srcType);
+
+    if (!gtIsActiveCSE_Candidate(tree)) // tree cannot be a CSE candidate
+    {
+        /* See if we can discard the cast */
+        if (varTypeIsIntegral(srcType) && varTypeIsIntegral(dstType))
+        {
+            if (srcType == dstType)
+            { // Certainly if they are identical it is pointless
+                goto REMOVE_CAST;
+            }
+
+            if (oper->OperGet() == GT_LCL_VAR && varTypeIsSmall(dstType))
+            {
+                unsigned   varNum = oper->gtLclVarCommon.gtLclNum;
+                LclVarDsc* varDsc = &lvaTable[varNum];
+                if (varDsc->TypeGet() == dstType && varDsc->lvNormalizeOnStore())
+                {
+                    goto REMOVE_CAST;
+                }
+            }
+
+            bool unsignedSrc = varTypeIsUnsigned(srcType);
+            bool unsignedDst = varTypeIsUnsigned(dstType);
+            bool signsDiffer = (unsignedSrc != unsignedDst);
+
+            // For same sized casts with
+            //    the same signs or non-overflow cast we discard them as well
+            if (srcSize == dstSize)
+            {
+                /* This should have been handled above */
+                noway_assert(varTypeIsGC(srcType) == varTypeIsGC(dstType));
+
+                if (!signsDiffer)
+                {
+                    goto REMOVE_CAST;
+                }
+
+                if (!tree->gtOverflow())
+                {
+                    /* For small type casts, when necessary we force
+                       the src operand to the dstType and allow the
+                       implied load from memory to perform the casting */
+                    if (varTypeIsSmall(srcType))
+                    {
+                        switch (oper->gtOper)
+                        {
+                            case GT_IND:
+                            case GT_CLS_VAR:
+                            case GT_LCL_FLD:
+                            case GT_ARR_ELEM:
+                                oper->gtType = dstType;
+                                goto REMOVE_CAST;
+                            default:
+                                break;
+                        }
+                    }
+                    else
+                    {
+                        goto REMOVE_CAST;
+                    }
+                }
+            }
+
+            if (srcSize < dstSize) // widening cast
+            {
+                // Keep any long casts
+                if (dstSize == sizeof(int))
+                {
+                    // Only keep signed to unsigned widening cast with overflow check
+                    if (!tree->gtOverflow() || !unsignedDst || unsignedSrc)
+                    {
+                        goto REMOVE_CAST;
+                    }
+                }
+
+                // Casts from signed->unsigned can never overflow while widening
+
+                if (unsignedSrc || !unsignedDst)
+                {
+                    tree->gtFlags &= ~GTF_OVERFLOW;
+                }
+            }
+            else
+            {
+                // Try to narrow the operand of the cast and discard the cast
+                // Note: Do not narrow a cast that is marked as a CSE
+                // And do not narrow if the oper is marked as a CSE either
+                //
+                if (!tree->gtOverflow() && !gtIsActiveCSE_Candidate(oper) && (opts.compFlags & CLFLG_TREETRANS) &&
+                    optNarrowTree(oper, srcType, dstType, tree->gtVNPair, false))
+                {
+                    optNarrowTree(oper, srcType, dstType, tree->gtVNPair, true);
+
+                    /* If oper is changed into a cast to TYP_INT, or to a GT_NOP, we may need to discard it */
+                    if (oper->gtOper == GT_CAST && oper->CastToType() == genActualType(oper->CastFromType()))
+                    {
+                        oper = oper->gtCast.CastOp();
+                    }
+                    goto REMOVE_CAST;
+                }
+            }
+        }
+
+        switch (oper->gtOper)
+        {
+            /* If the operand is a constant, we'll fold it */
+            case GT_CNS_INT:
+            case GT_CNS_LNG:
+            case GT_CNS_DBL:
+            case GT_CNS_STR:
+            {
+                GenTreePtr oldTree = tree;
+
+                tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)
+
+                // Did we get a comma throw as a result of gtFoldExprConst?
+                if ((oldTree != tree) && (oldTree->gtOper != GT_COMMA))
+                {
+                    noway_assert(fgIsCommaThrow(tree));
+                    tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1);
+                    fgMorphTreeDone(tree);
+                    return tree;
+                }
+                else if (tree->gtOper != GT_CAST)
+                {
+                    return tree;
+                }
+
+                noway_assert(tree->gtCast.CastOp() == oper); // unchanged
+            }
+            break;
+
+            case GT_CAST:
+                /* Check for two consecutive casts into the same dstType */
+                if (!tree->gtOverflow())
+                {
+                    var_types dstType2 = oper->CastToType();
+                    if (dstType == dstType2)
+                    {
+                        goto REMOVE_CAST;
+                    }
+                }
+                break;
+
+            /* If op1 is a mod node, mark it with the GTF_MOD_INT_RESULT flag
+               so that the code generator will know not to convert the result
+               of the idiv to a regpair */
+            case GT_MOD:
+                if (dstType == TYP_INT)
+                {
+                    tree->gtOp.gtOp1->gtFlags |= GTF_MOD_INT_RESULT;
+                }
+
+                break;
+            case GT_UMOD:
+                if (dstType == TYP_UINT)
+                {
+                    tree->gtOp.gtOp1->gtFlags |= GTF_MOD_INT_RESULT;
+                }
+                break;
+
+            case GT_COMMA:
+                // Check for cast of a GT_COMMA with a throw overflow
+                // Bug 110829: Since this optimization will bash the types
+                // neither oper or commaOp2 can be CSE candidates
+                if (fgIsCommaThrow(oper) && !gtIsActiveCSE_Candidate(oper)) // oper can not be a CSE candidate
+                {
+                    GenTreePtr commaOp2 = oper->gtOp.gtOp2;
+
+                    if (!gtIsActiveCSE_Candidate(commaOp2)) // commaOp2 can not be a CSE candidate
+                    {
+                        // need type of oper to be same as tree
+                        if (tree->gtType == TYP_LONG)
+                        {
+                            commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
+                            commaOp2->gtIntConCommon.SetLngValue(0);
+                            /* Change the types of oper and commaOp2 to TYP_LONG */
+                            oper->gtType = commaOp2->gtType = TYP_LONG;
+                        }
+                        else if (varTypeIsFloating(tree->gtType))
+                        {
+                            commaOp2->ChangeOperConst(GT_CNS_DBL);
+                            commaOp2->gtDblCon.gtDconVal = 0.0;
+                            // Change the types of oper and commaOp2
+                            // X87 promotes everything to TYP_DOUBLE
+                            // But other's are a little more precise
+                            const var_types newTyp
+#if FEATURE_X87_DOUBLES
+                                = TYP_DOUBLE;
+#else  // FEATURE_X87_DOUBLES
+                                = tree->gtType;
+#endif // FEATURE_X87_DOUBLES
+                            oper->gtType = commaOp2->gtType = newTyp;
+                        }
+                        else
+                        {
+                            commaOp2->ChangeOperConst(GT_CNS_INT);
+                            commaOp2->gtIntCon.gtIconVal = 0;
+                            /* Change the types of oper and commaOp2 to TYP_INT */
+                            oper->gtType = commaOp2->gtType = TYP_INT;
+                        }
+                    }
+
+                    if (vnStore != nullptr)
+                    {
+                        fgValueNumberTreeConst(commaOp2);
+                    }
+
+                    /* Return the GT_COMMA node as the new tree */
+                    return oper;
+                }
+                break;
+
+            default:
+                break;
+        } /* end switch (oper->gtOper) */
+    }
+
+    if (tree->gtOverflow())
+    {
+        fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
+    }
+
+    return tree;
+
+REMOVE_CAST:
+
+    /* Here we've eliminated the cast, so just return it's operand */
+    assert(!gtIsActiveCSE_Candidate(tree)); // tree cannot be a CSE candidate
+
+    DEBUG_DESTROY_NODE(tree);
+    return oper;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************
+ *
+ *  Perform an unwrap operation on a Proxy object
+ */
+
+GenTreePtr Compiler::fgUnwrapProxy(GenTreePtr objRef)
+{
+    assert(info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef));
+
+    CORINFO_EE_INFO* pInfo = eeGetEEInfo();
+    GenTreePtr       addTree;
+
+    // Perform the unwrap:
+    //
+    //   This requires two extra indirections.
+    //   We mark these indirections as 'invariant' and
+    //   the CSE logic will hoist them when appropriate.
+    //
+    //  Note that each dereference is a GC pointer
+
+    addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfTransparentProxyRP, TYP_I_IMPL));
+
+    objRef = gtNewOperNode(GT_IND, TYP_REF, addTree);
+    objRef->gtFlags |= GTF_IND_INVARIANT;
+
+    addTree = gtNewOperNode(GT_ADD, TYP_I_IMPL, objRef, gtNewIconNode(pInfo->offsetOfRealProxyServer, TYP_I_IMPL));
+
+    objRef = gtNewOperNode(GT_IND, TYP_REF, addTree);
+    objRef->gtFlags |= GTF_IND_INVARIANT;
+
+    // objRef now hold the 'real this' reference (i.e. the unwrapped proxy)
+    return objRef;
+}
+
+/*****************************************************************************
+ *
+ *  Morph an argument list; compute the pointer argument count in the process.
+ *
+ *  NOTE: This function can be called from any place in the JIT to perform re-morphing
+ *  due to graph altering modifications such as copy / constant propagation
+ */
+
+unsigned UpdateGT_LISTFlags(GenTreePtr tree)
+{
+    assert(tree->gtOper == GT_LIST);
+
+    unsigned flags = 0;
+    if (tree->gtOp.gtOp2)
+    {
+        flags |= UpdateGT_LISTFlags(tree->gtOp.gtOp2);
+    }
+
+    flags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
+
+    tree->gtFlags &= ~GTF_ALL_EFFECT;
+    tree->gtFlags |= flags;
+
+    return tree->gtFlags;
+}
+
+#ifdef DEBUG
+void fgArgTabEntry::Dump()
+{
+    printf("fgArgTabEntry[arg %u", argNum);
+    if (regNum != REG_STK)
+    {
+        printf(", %s, regs=%u", getRegName(regNum), numRegs);
+    }
+    if (numSlots > 0)
+    {
+        printf(", numSlots=%u, slotNum=%u", numSlots, slotNum);
+    }
+    printf(", align=%u", alignment);
+    if (lateArgInx != (unsigned)-1)
+    {
+        printf(", lateArgInx=%u", lateArgInx);
+    }
+    if (isSplit)
+    {
+        printf(", isSplit");
+    }
+    if (needTmp)
+    {
+        printf(", tmpNum=V%02u", tmpNum);
+    }
+    if (needPlace)
+    {
+        printf(", needPlace");
+    }
+    if (isTmp)
+    {
+        printf(", isTmp");
+    }
+    if (processed)
+    {
+        printf(", processed");
+    }
+    if (isHfaRegArg)
+    {
+        printf(", isHfa");
+    }
+    if (isBackFilled)
+    {
+        printf(", isBackFilled");
+    }
+    if (isNonStandard)
+    {
+        printf(", isNonStandard");
+    }
+    printf("]\n");
+}
+#endif
+
+fgArgInfo::fgArgInfo(Compiler* comp, GenTreePtr call, unsigned numArgs)
+{
+    compiler = comp;
+    callTree = call;
+    assert(call->IsCall());
+    argCount     = 0; // filled in arg count, starts at zero
+    nextSlotNum  = INIT_ARG_STACK_SLOT;
+    stkLevel     = 0;
+    argTableSize = numArgs; // the allocated table size
+
+    hasRegArgs   = false;
+    hasStackArgs = false;
+    argsComplete = false;
+    argsSorted   = false;
+
+    if (argTableSize == 0)
+    {
+        argTable = nullptr;
+    }
+    else
+    {
+        argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntryPtr[argTableSize];
+    }
+}
+
+/*****************************************************************************
+ *
+ *  fgArgInfo Copy Constructor
+ *
+ *  This method needs to act like a copy constructor for fgArgInfo.
+ *  The newCall needs to have its fgArgInfo initialized such that
+ *  we have newCall that is an exact copy of the oldCall.
+ *  We have to take care since the argument information
+ *  in the argTable contains pointers that must point to the
+ *  new arguments and not the old arguments.
+ */
+fgArgInfo::fgArgInfo(GenTreePtr newCall, GenTreePtr oldCall)
+{
+    assert(oldCall->IsCall());
+    assert(newCall->IsCall());
+
+    fgArgInfoPtr oldArgInfo = oldCall->gtCall.fgArgInfo;
+
+    compiler = oldArgInfo->compiler;
+    ;
+    callTree = newCall;
+    assert(newCall->IsCall());
+    argCount     = 0; // filled in arg count, starts at zero
+    nextSlotNum  = INIT_ARG_STACK_SLOT;
+    stkLevel     = oldArgInfo->stkLevel;
+    argTableSize = oldArgInfo->argTableSize;
+    argsComplete = false;
+    argTable     = nullptr;
+    if (argTableSize > 0)
+    {
+        argTable = new (compiler, CMK_fgArgInfoPtrArr) fgArgTabEntryPtr[argTableSize];
+        for (unsigned inx = 0; inx < argTableSize; inx++)
+        {
+            argTable[inx] = nullptr;
+        }
+    }
+
+    assert(oldArgInfo->argsComplete);
+
+    // We create local, artificial GenTreeArgLists that includes the gtCallObjp, if that exists, as first argument,
+    // so we can iterate over these argument lists more uniformly.
+    // Need to provide a temporary non-null first arguments to these constructors: if we use them, we'll replace them
+    GenTreeArgList* newArgs;
+    GenTreeArgList  newArgObjp(newCall, newCall->gtCall.gtCallArgs);
+    GenTreeArgList* oldArgs;
+    GenTreeArgList  oldArgObjp(oldCall, oldCall->gtCall.gtCallArgs);
+
+    if (newCall->gtCall.gtCallObjp == nullptr)
+    {
+        assert(oldCall->gtCall.gtCallObjp == nullptr);
+        newArgs = newCall->gtCall.gtCallArgs;
+        oldArgs = oldCall->gtCall.gtCallArgs;
+    }
+    else
+    {
+        assert(oldCall->gtCall.gtCallObjp != nullptr);
+        newArgObjp.Current() = newCall->gtCall.gtCallArgs;
+        newArgs              = &newArgObjp;
+        oldArgObjp.Current() = oldCall->gtCall.gtCallObjp;
+        oldArgs              = &oldArgObjp;
+    }
+
+    GenTreePtr        newCurr;
+    GenTreePtr        oldCurr;
+    GenTreeArgList*   newParent   = nullptr;
+    GenTreeArgList*   oldParent   = nullptr;
+    fgArgTabEntryPtr* oldArgTable = oldArgInfo->argTable;
+    bool              scanRegArgs = false;
+
+    while (newArgs)
+    {
+        /* Get hold of the next argument values for the oldCall and newCall */
+
+        newCurr = newArgs->Current();
+        oldCurr = oldArgs->Current();
+        if (newArgs != &newArgObjp)
+        {
+            newParent = newArgs;
+            oldParent = oldArgs;
+        }
+        else
+        {
+            assert(newParent == nullptr && oldParent == nullptr);
+        }
+        newArgs = newArgs->Rest();
+        oldArgs = oldArgs->Rest();
+
+        fgArgTabEntryPtr oldArgTabEntry = nullptr;
+        fgArgTabEntryPtr newArgTabEntry = nullptr;
+
+        for (unsigned inx = 0; inx < argTableSize; inx++)
+        {
+            oldArgTabEntry = oldArgTable[inx];
+
+            if (oldArgTabEntry->parent == oldParent)
+            {
+                assert((oldParent == nullptr) == (newParent == nullptr));
+
+                // We have found the matching "parent" field in oldArgTabEntry
+
+                newArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
+
+                // First block copy all fields
+                //
+                *newArgTabEntry = *oldArgTabEntry;
+
+                // Then update all GenTreePtr fields in the newArgTabEntry
+                //
+                newArgTabEntry->parent = newParent;
+
+                // The node field is likely to have been updated
+                //  to point at a node in the gtCallLateArgs list
+                //
+                if (oldArgTabEntry->node == oldCurr)
+                {
+                    // node is not pointing into the gtCallLateArgs list
+                    newArgTabEntry->node = newCurr;
+                }
+                else
+                {
+                    // node must be pointing into the gtCallLateArgs list
+                    //
+                    // We will fix this pointer up in the next loop
+                    //
+                    newArgTabEntry->node = nullptr; // For now we assign a NULL to this field
+
+                    scanRegArgs = true;
+                }
+
+                // Now initialize the proper element in the argTable array
+                //
+                argTable[inx] = newArgTabEntry;
+                break;
+            }
+        }
+        // We should have found the matching oldArgTabEntry and created the newArgTabEntry
+        //
+        assert(newArgTabEntry != nullptr);
+    }
+
+    if (scanRegArgs)
+    {
+        newArgs = newCall->gtCall.gtCallLateArgs;
+        oldArgs = oldCall->gtCall.gtCallLateArgs;
+
+        while (newArgs)
+        {
+            /* Get hold of the next argument values for the oldCall and newCall */
+
+            assert(newArgs->IsList());
+
+            newCurr = newArgs->Current();
+            newArgs = newArgs->Rest();
+
+            assert(oldArgs->IsList());
+
+            oldCurr = oldArgs->Current();
+            oldArgs = oldArgs->Rest();
+
+            fgArgTabEntryPtr oldArgTabEntry = nullptr;
+            fgArgTabEntryPtr newArgTabEntry = nullptr;
+
+            for (unsigned inx = 0; inx < argTableSize; inx++)
+            {
+                oldArgTabEntry = oldArgTable[inx];
+
+                if (oldArgTabEntry->node == oldCurr)
+                {
+                    // We have found the matching "node" field in oldArgTabEntry
+
+                    newArgTabEntry = argTable[inx];
+                    assert(newArgTabEntry != nullptr);
+
+                    // update the "node" GenTreePtr fields in the newArgTabEntry
+                    //
+                    assert(newArgTabEntry->node == nullptr); // We previously assigned NULL to this field
+
+                    newArgTabEntry->node = newCurr;
+                    break;
+                }
+            }
+        }
+    }
+
+    argCount     = oldArgInfo->argCount;
+    nextSlotNum  = oldArgInfo->nextSlotNum;
+    argsComplete = true;
+    argsSorted   = true;
+}
+
+void fgArgInfo::AddArg(fgArgTabEntryPtr curArgTabEntry)
+{
+    assert(argCount < argTableSize);
+    argTable[argCount] = curArgTabEntry;
+    argCount++;
+}
+
+fgArgTabEntryPtr fgArgInfo::AddRegArg(
+    unsigned argNum, GenTreePtr node, GenTreePtr parent, regNumber regNum, unsigned numRegs, unsigned alignment)
+{
+    fgArgTabEntryPtr curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
+
+    curArgTabEntry->argNum        = argNum;
+    curArgTabEntry->node          = node;
+    curArgTabEntry->parent        = parent;
+    curArgTabEntry->regNum        = regNum;
+    curArgTabEntry->slotNum       = 0;
+    curArgTabEntry->numRegs       = numRegs;
+    curArgTabEntry->numSlots      = 0;
+    curArgTabEntry->alignment     = alignment;
+    curArgTabEntry->lateArgInx    = (unsigned)-1;
+    curArgTabEntry->tmpNum        = (unsigned)-1;
+    curArgTabEntry->isSplit       = false;
+    curArgTabEntry->isTmp         = false;
+    curArgTabEntry->needTmp       = false;
+    curArgTabEntry->needPlace     = false;
+    curArgTabEntry->processed     = false;
+    curArgTabEntry->isHfaRegArg   = false;
+    curArgTabEntry->isBackFilled  = false;
+    curArgTabEntry->isNonStandard = false;
+
+    hasRegArgs = true;
+    AddArg(curArgTabEntry);
+    return curArgTabEntry;
+}
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+fgArgTabEntryPtr fgArgInfo::AddRegArg(unsigned                                                         argNum,
+                                      GenTreePtr                                                       node,
+                                      GenTreePtr                                                       parent,
+                                      regNumber                                                        regNum,
+                                      unsigned                                                         numRegs,
+                                      unsigned                                                         alignment,
+                                      const bool                                                       isStruct,
+                                      const regNumber                                                  otherRegNum,
+                                      const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr)
+{
+    fgArgTabEntryPtr curArgTabEntry = AddRegArg(argNum, node, parent, regNum, numRegs, alignment);
+    assert(curArgTabEntry != nullptr);
+
+    // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
+    // PlaceHolder node (in case of needed late argument, for example.)
+    // This requires using of an extra flag. At creation time the state is right, so
+    // and this assert enforces that.
+    assert((varTypeIsStruct(node) && isStruct) || (!varTypeIsStruct(node) && !isStruct));
+    curArgTabEntry->otherRegNum = otherRegNum; // Second reg for the struct
+    curArgTabEntry->isStruct    = isStruct;    // is this a struct arg
+
+    if (isStruct && structDescPtr != nullptr)
+    {
+        curArgTabEntry->structDesc.CopyFrom(*structDescPtr);
+    }
+
+    return curArgTabEntry;
+}
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+fgArgTabEntryPtr fgArgInfo::AddStkArg(unsigned   argNum,
+                                      GenTreePtr node,
+                                      GenTreePtr parent,
+                                      unsigned   numSlots,
+                                      unsigned   alignment
+                                          FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool isStruct))
+{
+    fgArgTabEntryPtr curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
+
+    nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment);
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+    // The node of the ArgTabEntry could change after remorphing - it could be rewritten to a cpyblk or a
+    // PlaceHolder node (in case of needed late argument, for example.)
+    // This reqires using of an extra flag. At creation time the state is right, so
+    // and this assert enforces that.
+    assert((varTypeIsStruct(node) && isStruct) || (!varTypeIsStruct(node) && !isStruct));
+    curArgTabEntry->isStruct = isStruct; // is this a struct arg
+#endif                                   // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+    curArgTabEntry->argNum        = argNum;
+    curArgTabEntry->node          = node;
+    curArgTabEntry->parent        = parent;
+    curArgTabEntry->regNum        = REG_STK;
+    curArgTabEntry->slotNum       = nextSlotNum;
+    curArgTabEntry->numRegs       = 0;
+    curArgTabEntry->numSlots      = numSlots;
+    curArgTabEntry->alignment     = alignment;
+    curArgTabEntry->lateArgInx    = (unsigned)-1;
+    curArgTabEntry->tmpNum        = (unsigned)-1;
+    curArgTabEntry->isSplit       = false;
+    curArgTabEntry->isTmp         = false;
+    curArgTabEntry->needTmp       = false;
+    curArgTabEntry->needPlace     = false;
+    curArgTabEntry->processed     = false;
+    curArgTabEntry->isHfaRegArg   = false;
+    curArgTabEntry->isBackFilled  = false;
+    curArgTabEntry->isNonStandard = false;
+
+    hasStackArgs = true;
+    AddArg(curArgTabEntry);
+
+    nextSlotNum += numSlots;
+    return curArgTabEntry;
+}
+
+void fgArgInfo::RemorphReset()
+{
+    nextSlotNum = INIT_ARG_STACK_SLOT;
+}
+
+fgArgTabEntry* fgArgInfo::RemorphRegArg(
+    unsigned argNum, GenTreePtr node, GenTreePtr parent, regNumber regNum, unsigned numRegs, unsigned alignment)
+{
+    fgArgTabEntryPtr curArgTabEntry = nullptr;
+    unsigned         regArgInx      = 0;
+    unsigned         inx;
+
+    for (inx = 0; inx < argCount; inx++)
+    {
+        curArgTabEntry = argTable[inx];
+        if (curArgTabEntry->argNum == argNum)
+        {
+            break;
+        }
+
+        bool       isRegArg;
+        GenTreePtr argx;
+        if (curArgTabEntry->parent != nullptr)
+        {
+            assert(curArgTabEntry->parent->IsList());
+            argx     = curArgTabEntry->parent->Current();
+            isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0;
+        }
+        else
+        {
+            argx     = curArgTabEntry->node;
+            isRegArg = true;
+        }
+
+        if (isRegArg)
+        {
+            regArgInx++;
+        }
+    }
+    // if this was a nonstandard arg the table is definitive
+    if (curArgTabEntry->isNonStandard)
+    {
+        regNum = curArgTabEntry->regNum;
+    }
+
+    assert(curArgTabEntry->argNum == argNum);
+    assert(curArgTabEntry->regNum == regNum);
+    assert(curArgTabEntry->alignment == alignment);
+    assert(curArgTabEntry->parent == parent);
+
+    if (curArgTabEntry->node != node)
+    {
+        GenTreePtr argx     = nullptr;
+        unsigned   regIndex = 0;
+
+        /* process the register argument list */
+        for (GenTreeArgList* list = callTree->gtCall.gtCallLateArgs; list; (regIndex++, list = list->Rest()))
+        {
+            argx = list->Current();
+            assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
+            if (regIndex == regArgInx)
+            {
+                break;
+            }
+        }
+        assert(regIndex == regArgInx);
+        assert(regArgInx == curArgTabEntry->lateArgInx);
+
+        if (curArgTabEntry->node != argx)
+        {
+            curArgTabEntry->node = argx;
+        }
+    }
+    return curArgTabEntry;
+}
+
+void fgArgInfo::RemorphStkArg(
+    unsigned argNum, GenTreePtr node, GenTreePtr parent, unsigned numSlots, unsigned alignment)
+{
+    fgArgTabEntryPtr curArgTabEntry = nullptr;
+    bool             isRegArg       = false;
+    unsigned         regArgInx      = 0;
+    GenTreePtr       argx;
+    unsigned         inx;
+
+    for (inx = 0; inx < argCount; inx++)
+    {
+        curArgTabEntry = argTable[inx];
+
+        if (curArgTabEntry->parent != nullptr)
+        {
+            assert(curArgTabEntry->parent->IsList());
+            argx     = curArgTabEntry->parent->Current();
+            isRegArg = (argx->gtFlags & GTF_LATE_ARG) != 0;
+        }
+        else
+        {
+            argx     = curArgTabEntry->node;
+            isRegArg = true;
+        }
+
+        if (curArgTabEntry->argNum == argNum)
+        {
+            break;
+        }
+
+        if (isRegArg)
+        {
+            regArgInx++;
+        }
+    }
+
+    nextSlotNum = (unsigned)roundUp(nextSlotNum, alignment);
+
+    assert(curArgTabEntry->argNum == argNum);
+    assert(curArgTabEntry->slotNum == nextSlotNum);
+    assert(curArgTabEntry->numSlots == numSlots);
+    assert(curArgTabEntry->alignment == alignment);
+    assert(curArgTabEntry->parent == parent);
+    assert(parent->IsList());
+
+#if FEATURE_FIXED_OUT_ARGS
+    if (curArgTabEntry->node != node)
+    {
+        if (isRegArg)
+        {
+            GenTreePtr argx     = nullptr;
+            unsigned   regIndex = 0;
+
+            /* process the register argument list */
+            for (GenTreeArgList *list = callTree->gtCall.gtCallLateArgs; list; list = list->Rest(), regIndex++)
+            {
+                argx = list->Current();
+                assert(!argx->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
+                if (regIndex == regArgInx)
+                {
+                    break;
+                }
+            }
+            assert(regIndex == regArgInx);
+            assert(regArgInx == curArgTabEntry->lateArgInx);
+
+            if (curArgTabEntry->node != argx)
+            {
+                curArgTabEntry->node = argx;
+            }
+        }
+        else
+        {
+            assert(parent->Current() == node);
+            curArgTabEntry->node = node;
+        }
+    }
+#else
+    curArgTabEntry->node = node;
+#endif
+
+    nextSlotNum += numSlots;
+}
+
+void fgArgInfo::SplitArg(unsigned argNum, unsigned numRegs, unsigned numSlots)
+{
+    fgArgTabEntryPtr curArgTabEntry = nullptr;
+    assert(argNum < argCount);
+    for (unsigned inx = 0; inx < argCount; inx++)
+    {
+        curArgTabEntry = argTable[inx];
+        if (curArgTabEntry->argNum == argNum)
+        {
+            break;
+        }
+    }
+
+    assert(numRegs > 0);
+    assert(numSlots > 0);
+
+    curArgTabEntry->isSplit  = true;
+    curArgTabEntry->numRegs  = numRegs;
+    curArgTabEntry->numSlots = numSlots;
+
+    nextSlotNum += numSlots;
+}
+
+void fgArgInfo::EvalToTmp(unsigned argNum, unsigned tmpNum, GenTreePtr newNode)
+{
+    fgArgTabEntryPtr curArgTabEntry = nullptr;
+    assert(argNum < argCount);
+    for (unsigned inx = 0; inx < argCount; inx++)
+    {
+        curArgTabEntry = argTable[inx];
+        if (curArgTabEntry->argNum == argNum)
+        {
+            break;
+        }
+    }
+    assert(curArgTabEntry->parent->Current() == newNode);
+
+    curArgTabEntry->node   = newNode;
+    curArgTabEntry->tmpNum = tmpNum;
+    curArgTabEntry->isTmp  = true;
+}
+
+void fgArgInfo::ArgsComplete()
+{
+    bool hasStackArgs    = false;
+    bool hasStructRegArg = false;
+
+    for (unsigned curInx = 0; curInx < argCount; curInx++)
+    {
+        fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+        assert(curArgTabEntry != nullptr);
+        GenTreePtr argx = curArgTabEntry->node;
+
+        if (curArgTabEntry->regNum == REG_STK)
+        {
+            hasStackArgs = true;
+#if !FEATURE_FIXED_OUT_ARGS
+            // On x86 we use push instructions to pass arguments:
+            //   The non-register arguments are evaluated and pushed in order
+            //   and they are never evaluated into temps
+            //
+            continue;
+#endif
+        }
+        else // we have a register argument, next we look for a struct type.
+        {
+            if (varTypeIsStruct(argx) FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY(|| curArgTabEntry->isStruct))
+            {
+                hasStructRegArg = true;
+            }
+        }
+
+        /* If the argument tree contains an assignment (GTF_ASG) then the argument and
+           and every earlier argument (except constants) must be evaluated into temps
+           since there may be other arguments that follow and they may use the value being assigned.
+
+           EXAMPLE: ArgTab is "a, a=5, a"
+                    -> when we see the second arg "a=5"
+                       we know the first two arguments "a, a=5" have to be evaluated into temps
+
+           For the case of an assignment, we only know that there exist some assignment someplace
+           in the tree.  We don't know what is being assigned so we are very conservative here
+           and assume that any local variable could have been assigned.
+         */
+
+        if (argx->gtFlags & GTF_ASG)
+        {
+            // If this is not the only argument, or it's a copyblk, or it already evaluates the expression to
+            // a tmp, then we need a temp in the late arg list.
+            if ((argCount > 1) || argx->OperIsCopyBlkOp()
+#ifdef FEATURE_FIXED_OUT_ARGS
+                || curArgTabEntry->isTmp // I protect this by "FEATURE_FIXED_OUT_ARGS" to preserve the property
+                                         // that we only have late non-register args when that feature is on.
+#endif                                   // FEATURE_FIXED_OUT_ARGS
+                )
+            {
+                curArgTabEntry->needTmp = true;
+            }
+
+            // For all previous arguments, unless they are a simple constant
+            //  we require that they be evaluated into temps
+            for (unsigned prevInx = 0; prevInx < curInx; prevInx++)
+            {
+                fgArgTabEntryPtr prevArgTabEntry = argTable[prevInx];
+                assert(prevArgTabEntry->argNum < curArgTabEntry->argNum);
+
+                assert(prevArgTabEntry->node);
+                if (prevArgTabEntry->node->gtOper != GT_CNS_INT)
+                {
+                    prevArgTabEntry->needTmp = true;
+                }
+            }
+        }
+
+#if FEATURE_FIXED_OUT_ARGS
+        // Like calls, if this argument has a tree that will do an inline throw,
+        // a call to a jit helper, then we need to treat it like a call (but only
+        // if there are/were any stack args).
+        // This means unnesting, sorting, etc.  Technically this is overly
+        // conservative, but I want to avoid as much special-case debug-only code
+        // as possible, so leveraging the GTF_CALL flag is the easiest.
+        if (!(argx->gtFlags & GTF_CALL) && (argx->gtFlags & GTF_EXCEPT) && (argCount > 1) &&
+            compiler->opts.compDbgCode &&
+            (compiler->fgWalkTreePre(&argx, Compiler::fgChkThrowCB) == Compiler::WALK_ABORT))
+        {
+            for (unsigned otherInx = 0; otherInx < argCount; otherInx++)
+            {
+                if (otherInx == curInx)
+                {
+                    continue;
+                }
+
+                if (argTable[otherInx]->regNum == REG_STK)
+                {
+                    argx->gtFlags |= GTF_CALL;
+                    break;
+                }
+            }
+        }
+#endif // FEATURE_FIXED_OUT_ARGS
+
+        /* If it contains a call (GTF_CALL) then itself and everything before the call
+           with a GLOB_EFFECT must eval to temp (this is because everything with SIDE_EFFECT
+           has to be kept in the right order since we will move the call to the first position)
+
+           For calls we don't have to be quite as conservative as we are with an assignment
+           since the call won't be modifying any non-address taken LclVars.
+         */
+
+        if (argx->gtFlags & GTF_CALL)
+        {
+            if (argCount > 1) // If this is not the only argument
+            {
+                curArgTabEntry->needTmp = true;
+            }
+            else if (varTypeIsFloating(argx->TypeGet()) && (argx->OperGet() == GT_CALL))
+            {
+                // Spill all arguments that are floating point calls
+                curArgTabEntry->needTmp = true;
+            }
+
+            // All previous arguments may need to be evaluated into temps
+            for (unsigned prevInx = 0; prevInx < curInx; prevInx++)
+            {
+                fgArgTabEntryPtr prevArgTabEntry = argTable[prevInx];
+                assert(prevArgTabEntry->argNum < curArgTabEntry->argNum);
+                assert(prevArgTabEntry->node);
+
+                // For all previous arguments, if they have any GTF_ALL_EFFECT
+                //  we require that they be evaluated into a temp
+                if ((prevArgTabEntry->node->gtFlags & GTF_ALL_EFFECT) != 0)
+                {
+                    prevArgTabEntry->needTmp = true;
+                }
+#if FEATURE_FIXED_OUT_ARGS
+                // Or, if they are stored into the FIXED_OUT_ARG area
+                // we require that they be moved to the gtCallLateArgs
+                // and replaced with a placeholder node
+                else if (prevArgTabEntry->regNum == REG_STK)
+                {
+                    prevArgTabEntry->needPlace = true;
+                }
+#endif
+            }
+        }
+
+#ifndef LEGACY_BACKEND
+#if FEATURE_MULTIREG_ARGS
+        // For RyuJIT backend we will expand a Multireg arg into a GT_LIST
+        // with multiple indirections, so here we consider spilling it into a tmp LclVar.
+        //
+        // Note that Arm32 is a LEGACY_BACKEND and it defines FEATURE_MULTIREG_ARGS
+        // so we skip this for ARM32 until it is ported to use RyuJIT backend
+        //
+
+        bool isMultiRegArg = (curArgTabEntry->numRegs > 1);
+
+        if ((argx->TypeGet() == TYP_STRUCT) && (curArgTabEntry->needTmp == false))
+        {
+            if (isMultiRegArg && ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0))
+            {
+                // Spill multireg struct arguments that have Assignments or Calls embedded in them
+                curArgTabEntry->needTmp = true;
+            }
+            else
+            {
+                // We call gtPrepareCost to measure the cost of evaluating this tree
+                compiler->gtPrepareCost(argx);
+
+                if (isMultiRegArg && (argx->gtCostEx > (6 * IND_COST_EX)))
+                {
+                    // Spill multireg struct arguments that are expensive to evaluate twice
+                    curArgTabEntry->needTmp = true;
+                }
+                else if (argx->OperGet() == GT_OBJ)
+                {
+                    GenTreeObj*          argObj     = argx->AsObj();
+                    CORINFO_CLASS_HANDLE objClass   = argObj->gtClass;
+                    unsigned             structSize = compiler->info.compCompHnd->getClassSize(objClass);
+                    switch (structSize)
+                    {
+                        case 3:
+                        case 5:
+                        case 6:
+                        case 7:
+                            // If we have a stack based LclVar we can perform a wider read of 4 or 8 bytes
+                            //
+                            if (argObj->gtObj.gtOp1->IsVarAddr() == false) // Is the source not a LclVar?
+                            {
+                                // If we don't have a LclVar we need to read exactly 3,5,6 or 7 bytes
+                                // For now we use a a GT_CPBLK to copy the exact size into a GT_LCL_VAR temp.
+                                //
+                                curArgTabEntry->needTmp = true;
+                            }
+                            break;
+
+                        case 11:
+                        case 13:
+                        case 14:
+                        case 15:
+                            // Spill any GT_OBJ multireg structs that are difficult to extract
+                            //
+                            // When we have a GT_OBJ of a struct with the above sizes we would need
+                            // to use 3 or 4 load instructions to load the exact size of this struct.
+                            // Instead we spill the GT_OBJ into a new GT_LCL_VAR temp and this sequence
+                            // will use a GT_CPBLK to copy the exact size into the GT_LCL_VAR temp.
+                            // Then we can just load all 16 bytes of the GT_LCL_VAR temp when passing
+                            // the argument.
+                            //
+                            curArgTabEntry->needTmp = true;
+                            break;
+
+                        default:
+                            break;
+                    }
+                }
+            }
+        }
+#endif // FEATURE_MULTIREG_ARGS
+#endif // LEGACY_BACKEND
+    }
+
+    // We only care because we can't spill structs and qmarks involve a lot of spilling, but
+    // if we don't have qmarks, then it doesn't matter.
+    // So check for Qmark's globally once here, instead of inside the loop.
+    //
+    const bool hasStructRegArgWeCareAbout = (hasStructRegArg && compiler->compQmarkUsed);
+
+#if FEATURE_FIXED_OUT_ARGS
+
+    // For Arm/x64 we only care because we can't reorder a register
+    // argument that uses GT_LCLHEAP.  This is an optimization to
+    // save a check inside the below loop.
+    //
+    const bool hasStackArgsWeCareAbout = (hasStackArgs && compiler->compLocallocUsed);
+
+#else
+
+    const bool hasStackArgsWeCareAbout = hasStackArgs;
+
+#endif // FEATURE_FIXED_OUT_ARGS
+
+    // If we have any stack args we have to force the evaluation
+    // of any arguments passed in registers that might throw an exception
+    //
+    // Technically we only a required to handle the following two cases:
+    //     a GT_IND with GTF_IND_RNGCHK (only on x86) or
+    //     a GT_LCLHEAP node that allocates stuff on the stack
+    //
+    if (hasStackArgsWeCareAbout || hasStructRegArgWeCareAbout)
+    {
+        for (unsigned curInx = 0; curInx < argCount; curInx++)
+        {
+            fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+            assert(curArgTabEntry != nullptr);
+            GenTreePtr argx = curArgTabEntry->node;
+
+            // Examine the register args that are currently not marked needTmp
+            //
+            if (!curArgTabEntry->needTmp && (curArgTabEntry->regNum != REG_STK))
+            {
+                if (hasStackArgsWeCareAbout)
+                {
+#if !FEATURE_FIXED_OUT_ARGS
+                    // On x86 we previously recorded a stack depth of zero when
+                    // morphing the register arguments of any GT_IND with a GTF_IND_RNGCHK flag
+                    // Thus we can not reorder the argument after any stack based argument
+                    // (Note that GT_LCLHEAP sets the GTF_EXCEPT flag so we don't need to
+                    // check for it explicitly
+                    //
+                    if (argx->gtFlags & GTF_EXCEPT)
+                    {
+                        curArgTabEntry->needTmp = true;
+                        continue;
+                    }
+#else
+                    // For Arm/X64 we can't reorder a register argument that uses a GT_LCLHEAP
+                    //
+                    if (argx->gtFlags & GTF_EXCEPT)
+                    {
+                        assert(compiler->compLocallocUsed);
+
+                        // Returns WALK_ABORT if a GT_LCLHEAP node is encountered in the argx tree
+                        //
+                        if (compiler->fgWalkTreePre(&argx, Compiler::fgChkLocAllocCB) == Compiler::WALK_ABORT)
+                        {
+                            curArgTabEntry->needTmp = true;
+                            continue;
+                        }
+                    }
+#endif
+                }
+                if (hasStructRegArgWeCareAbout)
+                {
+                    // Returns true if a GT_QMARK node is encountered in the argx tree
+                    //
+                    if (compiler->fgWalkTreePre(&argx, Compiler::fgChkQmarkCB) == Compiler::WALK_ABORT)
+                    {
+                        curArgTabEntry->needTmp = true;
+                        continue;
+                    }
+                }
+            }
+        }
+    }
+
+    argsComplete = true;
+}
+
+void fgArgInfo::SortArgs()
+{
+    assert(argsComplete == true);
+
+#ifdef DEBUG
+    if (compiler->verbose)
+    {
+        printf("\nSorting the arguments:\n");
+    }
+#endif
+
+    /* Shuffle the arguments around before we build the gtCallLateArgs list.
+       The idea is to move all "simple" arguments like constants and local vars
+       to the end of the table, and move the complex arguments towards the beginning
+       of the table. This will help prevent registers from being spilled by
+       allowing us to evaluate the more complex arguments before the simpler arguments.
+       The argTable ends up looking like:
+           +------------------------------------+  <--- argTable[argCount - 1]
+           |          constants                 |
+           +------------------------------------+
+           |    local var / local field         |
+           +------------------------------------+
+           | remaining arguments sorted by cost |
+           +------------------------------------+
+           | temps (argTable[].needTmp = true)  |
+           +------------------------------------+
+           |  args with calls (GTF_CALL)        |
+           +------------------------------------+  <--- argTable[0]
+     */
+
+    /* Set the beginning and end for the new argument table */
+    unsigned curInx;
+    int      regCount      = 0;
+    unsigned begTab        = 0;
+    unsigned endTab        = argCount - 1;
+    unsigned argsRemaining = argCount;
+
+    // First take care of arguments that are constants.
+    // [We use a backward iterator pattern]
+    //
+    curInx = argCount;
+    do
+    {
+        curInx--;
+
+        fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+
+        if (curArgTabEntry->regNum != REG_STK)
+        {
+            regCount++;
+        }
+
+        // Skip any already processed args
+        //
+        if (!curArgTabEntry->processed)
+        {
+            GenTreePtr argx = curArgTabEntry->node;
+
+            // put constants at the end of the table
+            //
+            if (argx->gtOper == GT_CNS_INT)
+            {
+                noway_assert(curInx <= endTab);
+
+                curArgTabEntry->processed = true;
+
+                // place curArgTabEntry at the endTab position by performing a swap
+                //
+                if (curInx != endTab)
+                {
+                    argTable[curInx] = argTable[endTab];
+                    argTable[endTab] = curArgTabEntry;
+                }
+
+                endTab--;
+                argsRemaining--;
+            }
+        }
+    } while (curInx > 0);
+
+    if (argsRemaining > 0)
+    {
+        // Next take care of arguments that are calls.
+        // [We use a forward iterator pattern]
+        //
+        for (curInx = begTab; curInx <= endTab; curInx++)
+        {
+            fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+
+            // Skip any already processed args
+            //
+            if (!curArgTabEntry->processed)
+            {
+                GenTreePtr argx = curArgTabEntry->node;
+
+                // put calls at the beginning of the table
+                //
+                if (argx->gtFlags & GTF_CALL)
+                {
+                    curArgTabEntry->processed = true;
+
+                    // place curArgTabEntry at the begTab position by performing a swap
+                    //
+                    if (curInx != begTab)
+                    {
+                        argTable[curInx] = argTable[begTab];
+                        argTable[begTab] = curArgTabEntry;
+                    }
+
+                    begTab++;
+                    argsRemaining--;
+                }
+            }
+        }
+    }
+
+    if (argsRemaining > 0)
+    {
+        // Next take care arguments that are temps.
+        // These temps come before the arguments that are
+        // ordinary local vars or local fields
+        // since this will give them a better chance to become
+        // enregistered into their actual argument register.
+        // [We use a forward iterator pattern]
+        //
+        for (curInx = begTab; curInx <= endTab; curInx++)
+        {
+            fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+
+            // Skip any already processed args
+            //
+            if (!curArgTabEntry->processed)
+            {
+                if (curArgTabEntry->needTmp)
+                {
+                    curArgTabEntry->processed = true;
+
+                    // place curArgTabEntry at the begTab position by performing a swap
+                    //
+                    if (curInx != begTab)
+                    {
+                        argTable[curInx] = argTable[begTab];
+                        argTable[begTab] = curArgTabEntry;
+                    }
+
+                    begTab++;
+                    argsRemaining--;
+                }
+            }
+        }
+    }
+
+    if (argsRemaining > 0)
+    {
+        // Next take care of local var and local field arguments.
+        // These are moved towards the end of the argument evaluation.
+        // [We use a backward iterator pattern]
+        //
+        curInx = endTab + 1;
+        do
+        {
+            curInx--;
+
+            fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+
+            // Skip any already processed args
+            //
+            if (!curArgTabEntry->processed)
+            {
+                GenTreePtr argx = curArgTabEntry->node;
+
+                if ((argx->gtOper == GT_LCL_VAR) || (argx->gtOper == GT_LCL_FLD))
+                {
+                    noway_assert(curInx <= endTab);
+
+                    curArgTabEntry->processed = true;
+
+                    // place curArgTabEntry at the endTab position by performing a swap
+                    //
+                    if (curInx != endTab)
+                    {
+                        argTable[curInx] = argTable[endTab];
+                        argTable[endTab] = curArgTabEntry;
+                    }
+
+                    endTab--;
+                    argsRemaining--;
+                }
+            }
+        } while (curInx > begTab);
+    }
+
+    // Finally, take care of all the remaining arguments.
+    // Note that we fill in one arg at a time using a while loop.
+    bool costsPrepared = false; // Only prepare tree costs once, the first time through this loop
+    while (argsRemaining > 0)
+    {
+        /* Find the most expensive arg remaining and evaluate it next */
+
+        fgArgTabEntryPtr expensiveArgTabEntry = nullptr;
+        unsigned         expensiveArg         = UINT_MAX;
+        unsigned         expensiveArgCost     = 0;
+
+        // [We use a forward iterator pattern]
+        //
+        for (curInx = begTab; curInx <= endTab; curInx++)
+        {
+            fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+
+            // Skip any already processed args
+            //
+            if (!curArgTabEntry->processed)
+            {
+                GenTreePtr argx = curArgTabEntry->node;
+
+                // We should have already handled these kinds of args
+                assert(argx->gtOper != GT_LCL_VAR);
+                assert(argx->gtOper != GT_LCL_FLD);
+                assert(argx->gtOper != GT_CNS_INT);
+
+                // This arg should either have no persistent side effects or be the last one in our table
+                // assert(((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0) || (curInx == (argCount-1)));
+
+                if (argsRemaining == 1)
+                {
+                    // This is the last arg to place
+                    expensiveArg         = curInx;
+                    expensiveArgTabEntry = curArgTabEntry;
+                    assert(begTab == endTab);
+                    break;
+                }
+                else
+                {
+                    if (!costsPrepared)
+                    {
+                        /* We call gtPrepareCost to measure the cost of evaluating this tree */
+                        compiler->gtPrepareCost(argx);
+                    }
+
+                    if (argx->gtCostEx > expensiveArgCost)
+                    {
+                        // Remember this arg as the most expensive one that we have yet seen
+                        expensiveArgCost     = argx->gtCostEx;
+                        expensiveArg         = curInx;
+                        expensiveArgTabEntry = curArgTabEntry;
+                    }
+                }
+            }
+        }
+
+        noway_assert(expensiveArg != UINT_MAX);
+
+        // put the most expensive arg towards the beginning of the table
+
+        expensiveArgTabEntry->processed = true;
+
+        // place expensiveArgTabEntry at the begTab position by performing a swap
+        //
+        if (expensiveArg != begTab)
+        {
+            argTable[expensiveArg] = argTable[begTab];
+            argTable[begTab]       = expensiveArgTabEntry;
+        }
+
+        begTab++;
+        argsRemaining--;
+
+        costsPrepared = true; // If we have more expensive arguments, don't re-evaluate the tree cost on the next loop
+    }
+
+    // The table should now be completely filled and thus begTab should now be adjacent to endTab
+    // and regArgsRemaining should be zero
+    assert(begTab == (endTab + 1));
+    assert(argsRemaining == 0);
+
+#if !FEATURE_FIXED_OUT_ARGS
+    // Finally build the regArgList
+    //
+    callTree->gtCall.regArgList      = NULL;
+    callTree->gtCall.regArgListCount = regCount;
+
+    unsigned regInx = 0;
+    for (curInx = 0; curInx < argCount; curInx++)
+    {
+        fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+
+        if (curArgTabEntry->regNum != REG_STK)
+        {
+            // Encode the argument register in the register mask
+            //
+            callTree->gtCall.regArgList[regInx] = curArgTabEntry->regNum;
+            regInx++;
+        }
+    }
+#endif // !FEATURE_FIXED_OUT_ARGS
+
+    argsSorted = true;
+}
+
+//------------------------------------------------------------------------------
+// fgMakeTmpArgNode : This function creates a tmp var only if needed.
+//                    We need this to be done in order to enforce ordering
+//                    of the evaluation of arguments.
+//
+// Arguments:
+//    tmpVarNum  - the var num which we clone into the newly created temp var.
+//
+// Return Value:
+//    the newly created temp var tree.
+
+GenTreePtr Compiler::fgMakeTmpArgNode(
+    unsigned tmpVarNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(const bool passedInRegisters))
+{
+    LclVarDsc* varDsc = &lvaTable[tmpVarNum];
+    assert(varDsc->lvIsTemp);
+    var_types type = varDsc->TypeGet();
+
+    // Create a copy of the temp to go into the late argument list
+    GenTreePtr arg      = gtNewLclvNode(tmpVarNum, type);
+    GenTreePtr addrNode = nullptr;
+
+    if (varTypeIsStruct(type))
+    {
+
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+        arg->gtFlags |= GTF_DONT_CSE;
+
+#else  // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+        // Can this type be passed in a single register?
+        // If so, the following call will return the corresponding primitive type.
+        // Otherwise, it will return TYP_UNKNOWN and we will pass by reference.
+
+        bool                 passedInRegisters = false;
+        structPassingKind    kind;
+        CORINFO_CLASS_HANDLE clsHnd         = varDsc->lvVerTypeInfo.GetClassHandle();
+        var_types            structBaseType = getPrimitiveTypeForStruct(lvaLclExactSize(tmpVarNum), clsHnd);
+
+        if (structBaseType != TYP_UNKNOWN)
+        {
+            passedInRegisters = true;
+            type              = structBaseType;
+        }
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+        // If it is passed in registers, don't get the address of the var. Make it a
+        // field instead. It will be loaded in registers with putarg_reg tree in lower.
+        if (passedInRegisters)
+        {
+            arg->ChangeOper(GT_LCL_FLD);
+            arg->gtType = type;
+        }
+        else
+        {
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+            // TODO-Cleanup: Fix this - we should never have an address that is TYP_STRUCT.
+            var_types addrType = type;
+#else
+            var_types addrType = TYP_BYREF;
+#endif
+            arg      = gtNewOperNode(GT_ADDR, addrType, arg);
+            addrNode = arg;
+
+#if FEATURE_MULTIREG_ARGS
+#ifdef _TARGET_ARM64_
+            assert(varTypeIsStruct(type));
+            if (lvaIsMultiregStruct(varDsc))
+            {
+                // ToDo-ARM64: Consider using:  arg->ChangeOper(GT_LCL_FLD);
+                // as that is how FEATURE_UNIX_AMD64_STRUCT_PASSING works.
+                // We will create a GT_OBJ for the argument below.
+                // This will be passed by value in two registers.
+                assert(addrNode != nullptr);
+
+                // Create an Obj of the temp to use it as a call argument.
+                arg = gtNewObjNode(lvaGetStruct(tmpVarNum), arg);
+
+                // TODO-1stClassStructs: We should not need to set the GTF_DONT_CSE flag here;
+                // this is only to preserve former behavior (though some CSE'ing of struct
+                // values can be pessimizing, so enabling this may require some additional tuning).
+                arg->gtFlags |= GTF_DONT_CSE;
+            }
+#endif // _TARGET_ARM64_
+#endif // FEATURE_MULTIREG_ARGS
+        }
+
+#else // not (_TARGET_AMD64_ or _TARGET_ARM64_)
+
+        // other targets, we pass the struct by value
+        assert(varTypeIsStruct(type));
+
+        addrNode = gtNewOperNode(GT_ADDR, TYP_BYREF, arg);
+
+        // Get a new Obj node temp to use it as a call argument.
+        // gtNewObjNode will set the GTF_EXCEPT flag if this is not a local stack object.
+        arg = gtNewObjNode(lvaGetStruct(tmpVarNum), addrNode);
+
+#endif // not (_TARGET_AMD64_ or _TARGET_ARM64_)
+
+    } // (varTypeIsStruct(type))
+
+    if (addrNode != nullptr)
+    {
+        assert(addrNode->gtOper == GT_ADDR);
+
+        // This will prevent this LclVar from being optimized away
+        lvaSetVarAddrExposed(tmpVarNum);
+
+        // the child of a GT_ADDR is required to have this flag set
+        addrNode->gtOp.gtOp1->gtFlags |= GTF_DONT_CSE;
+    }
+
+    return arg;
+}
+
+void fgArgInfo::EvalArgsToTemps()
+{
+    assert(argsSorted == true);
+
+    unsigned regArgInx = 0;
+    // Now go through the argument table and perform the necessary evaluation into temps
+    GenTreeArgList* tmpRegArgNext = nullptr;
+    for (unsigned curInx = 0; curInx < argCount; curInx++)
+    {
+        fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+
+        GenTreePtr argx     = curArgTabEntry->node;
+        GenTreePtr setupArg = nullptr;
+        GenTreePtr defArg;
+
+#if !FEATURE_FIXED_OUT_ARGS
+        // Only ever set for FEATURE_FIXED_OUT_ARGS
+        assert(curArgTabEntry->needPlace == false);
+
+        // On x86 and other archs that use push instructions to pass arguments:
+        //   Only the register arguments need to be replaced with placeholder nodes.
+        //   Stacked arguments are evaluated and pushed (or stored into the stack) in order.
+        //
+        if (curArgTabEntry->regNum == REG_STK)
+            continue;
+#endif
+
+        if (curArgTabEntry->needTmp)
+        {
+            unsigned tmpVarNum;
+
+            if (curArgTabEntry->isTmp == true)
+            {
+                // Create a copy of the temp to go into the late argument list
+                tmpVarNum = curArgTabEntry->tmpNum;
+                defArg    = compiler->fgMakeTmpArgNode(tmpVarNum FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(
+                    argTable[curInx]->structDesc.passedInRegisters));
+
+                // mark the original node as a late argument
+                argx->gtFlags |= GTF_LATE_ARG;
+            }
+            else
+            {
+                // Create a temp assignment for the argument
+                // Put the temp in the gtCallLateArgs list
+                CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+                if (compiler->verbose)
+                {
+                    printf("Argument with 'side effect'...\n");
+                    compiler->gtDispTree(argx);
+                }
+#endif
+
+#if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+                noway_assert(argx->gtType != TYP_STRUCT);
+#endif
+
+                tmpVarNum = compiler->lvaGrabTemp(true DEBUGARG("argument with side effect"));
+                if (argx->gtOper == GT_MKREFANY)
+                {
+                    // For GT_MKREFANY, typically the actual struct copying does
+                    // not have any side-effects and can be delayed. So instead
+                    // of using a temp for the whole struct, we can just use a temp
+                    // for operand that that has a side-effect
+                    GenTreePtr operand;
+                    if ((argx->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT) == 0)
+                    {
+                        operand = argx->gtOp.gtOp1;
+
+                        // In the early argument evaluation, place an assignment to the temp
+                        // from the source operand of the mkrefany
+                        setupArg = compiler->gtNewTempAssign(tmpVarNum, operand);
+
+                        // Replace the operand for the mkrefany with the new temp.
+                        argx->gtOp.gtOp1 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet());
+                    }
+                    else if ((argx->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) == 0)
+                    {
+                        operand = argx->gtOp.gtOp2;
+
+                        // In the early argument evaluation, place an assignment to the temp
+                        // from the source operand of the mkrefany
+                        setupArg = compiler->gtNewTempAssign(tmpVarNum, operand);
+
+                        // Replace the operand for the mkrefany with the new temp.
+                        argx->gtOp.gtOp2 = compiler->gtNewLclvNode(tmpVarNum, operand->TypeGet());
+                    }
+                }
+
+                if (setupArg != nullptr)
+                {
+                    // Now keep the mkrefany for the late argument list
+                    defArg = argx;
+
+                    // Clear the side-effect flags because now both op1 and op2 have no side-effects
+                    defArg->gtFlags &= ~GTF_ALL_EFFECT;
+                }
+                else
+                {
+                    setupArg = compiler->gtNewTempAssign(tmpVarNum, argx);
+
+                    LclVarDsc* varDsc = compiler->lvaTable + tmpVarNum;
+
+#ifndef LEGACY_BACKEND
+                    if (compiler->fgOrder == Compiler::FGOrderLinear)
+                    {
+                        // We'll reference this temporary variable just once
+                        // when we perform the function call after
+                        // setting up this argument.
+                        varDsc->lvRefCnt = 1;
+                    }
+#endif // !LEGACY_BACKEND
+
+                    var_types lclVarType = genActualType(argx->gtType);
+                    var_types scalarType = TYP_UNKNOWN;
+
+                    if (setupArg->OperIsCopyBlkOp())
+                    {
+                        setupArg = compiler->fgMorphCopyBlock(setupArg);
+#ifdef _TARGET_ARM64_
+                        // This scalar LclVar widening step is only performed for ARM64
+                        //
+                        CORINFO_CLASS_HANDLE clsHnd     = compiler->lvaGetStruct(tmpVarNum);
+                        unsigned             structSize = varDsc->lvExactSize;
+
+                        scalarType = compiler->getPrimitiveTypeForStruct(structSize, clsHnd);
+#endif // _TARGET_ARM64_
+                    }
+
+                    // scalarType can be set to a wider type for ARM64: (3 => 4)  or (5,6,7 => 8)
+                    if ((scalarType != TYP_UNKNOWN) && (scalarType != lclVarType))
+                    {
+                        // Create a GT_LCL_FLD using the wider type to go to the late argument list
+                        defArg = compiler->gtNewLclFldNode(tmpVarNum, scalarType, 0);
+                    }
+                    else
+                    {
+                        // Create a copy of the temp to go to the late argument list
+                        defArg = compiler->gtNewLclvNode(tmpVarNum, lclVarType);
+                    }
+
+                    curArgTabEntry->isTmp  = true;
+                    curArgTabEntry->tmpNum = tmpVarNum;
+
+#ifdef _TARGET_ARM_
+                    // Previously we might have thought the local was promoted, and thus the 'COPYBLK'
+                    // might have left holes in the used registers (see
+                    // fgAddSkippedRegsInPromotedStructArg).
+                    // Too bad we're not that smart for these intermediate temps...
+                    if (isValidIntArgReg(curArgTabEntry->regNum) && (curArgTabEntry->numRegs > 1))
+                    {
+                        regNumber argReg      = curArgTabEntry->regNum;
+                        regMaskTP allUsedRegs = genRegMask(curArgTabEntry->regNum);
+                        for (unsigned i = 1; i < curArgTabEntry->numRegs; i++)
+                        {
+                            argReg = genRegArgNext(argReg);
+                            allUsedRegs |= genRegMask(argReg);
+                        }
+#ifdef LEGACY_BACKEND
+                        callTree->gtCall.gtCallRegUsedMask |= allUsedRegs;
+#endif // LEGACY_BACKEND
+                    }
+#endif // _TARGET_ARM_
+                }
+
+                /* mark the assignment as a late argument */
+                setupArg->gtFlags |= GTF_LATE_ARG;
+
+#ifdef DEBUG
+                if (compiler->verbose)
+                {
+                    printf("\n  Evaluate to a temp:\n");
+                    compiler->gtDispTree(setupArg);
+                }
+#endif
+            }
+        }
+        else // curArgTabEntry->needTmp == false
+        {
+            //   On x86 -
+            //      Only register args are replaced with placeholder nodes
+            //      and the stack based arguments are evaluated and pushed in order.
+            //
+            //   On Arm/x64 - When needTmp is false and needPlace is false,
+            //      the non-register arguments are evaluated and stored in order.
+            //      When needPlace is true we have a nested call that comes after
+            //      this argument so we have to replace it in the gtCallArgs list
+            //      (the initial argument evaluation list) with a placeholder.
+            //
+            if ((curArgTabEntry->regNum == REG_STK) && (curArgTabEntry->needPlace == false))
+            {
+                continue;
+            }
+
+            /* No temp needed - move the whole node to the gtCallLateArgs list */
+
+            /* The argument is deferred and put in the late argument list */
+
+            defArg = argx;
+
+            // Create a placeholder node to put in its place in gtCallLateArgs.
+
+            // For a struct type we also need to record the class handle of the arg.
+            CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;
+
+#if defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+            // All structs are either passed (and retyped) as integral types, OR they
+            // are passed by reference.
+            noway_assert(argx->gtType != TYP_STRUCT);
+
+#else // !defined(_TARGET_AMD64_) || defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+            if (varTypeIsStruct(defArg))
+            {
+                // Need a temp to walk any GT_COMMA nodes when searching for the clsHnd
+                GenTreePtr defArgTmp = defArg;
+
+                // The GT_OBJ may be be a child of a GT_COMMA.
+                while (defArgTmp->gtOper == GT_COMMA)
+                {
+                    defArgTmp = defArgTmp->gtOp.gtOp2;
+                }
+                assert(varTypeIsStruct(defArgTmp));
+
+                // We handle two opcodes: GT_MKREFANY and GT_OBJ.
+                if (defArgTmp->gtOper == GT_MKREFANY)
+                {
+                    clsHnd = compiler->impGetRefAnyClass();
+                }
+                else if (defArgTmp->gtOper == GT_OBJ)
+                {
+                    clsHnd = defArgTmp->AsObj()->gtClass;
+                }
+                else
+                {
+                    BADCODE("Unhandled struct argument tree in fgMorphArgs");
+                }
+            }
+
+#endif // !(defined(_TARGET_AMD64_) && !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING))
+
+            setupArg = compiler->gtNewArgPlaceHolderNode(defArg->gtType, clsHnd);
+
+            /* mark the placeholder node as a late argument */
+            setupArg->gtFlags |= GTF_LATE_ARG;
+
+#ifdef DEBUG
+            if (compiler->verbose)
+            {
+                if (curArgTabEntry->regNum == REG_STK)
+                {
+                    printf("Deferred stack argument :\n");
+                }
+                else
+                {
+                    printf("Deferred argument ('%s'):\n", getRegName(curArgTabEntry->regNum));
+                }
+
+                compiler->gtDispTree(argx);
+                printf("Replaced with placeholder node:\n");
+                compiler->gtDispTree(setupArg);
+            }
+#endif
+        }
+
+        if (setupArg != nullptr)
+        {
+            if (curArgTabEntry->parent)
+            {
+                GenTreePtr parent = curArgTabEntry->parent;
+                /* a normal argument from the list */
+                noway_assert(parent->IsList());
+                noway_assert(parent->gtOp.gtOp1 == argx);
+
+                parent->gtOp.gtOp1 = setupArg;
+            }
+            else
+            {
+                /* must be the gtCallObjp */
+                noway_assert(callTree->gtCall.gtCallObjp == argx);
+
+                callTree->gtCall.gtCallObjp = setupArg;
+            }
+        }
+
+        /* deferred arg goes into the late argument list */
+
+        if (tmpRegArgNext == nullptr)
+        {
+            tmpRegArgNext                   = compiler->gtNewArgList(defArg);
+            callTree->gtCall.gtCallLateArgs = tmpRegArgNext;
+        }
+        else
+        {
+            noway_assert(tmpRegArgNext->IsList());
+            noway_assert(tmpRegArgNext->Current());
+            tmpRegArgNext->gtOp.gtOp2 = compiler->gtNewArgList(defArg);
+            tmpRegArgNext             = tmpRegArgNext->Rest();
+        }
+
+        curArgTabEntry->node       = defArg;
+        curArgTabEntry->lateArgInx = regArgInx++;
+    }
+
+#ifdef DEBUG
+    if (compiler->verbose)
+    {
+        printf("\nShuffled argument table:    ");
+        for (unsigned curInx = 0; curInx < argCount; curInx++)
+        {
+            fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+
+            if (curArgTabEntry->regNum != REG_STK)
+            {
+                printf("%s ", getRegName(curArgTabEntry->regNum));
+            }
+        }
+        printf("\n");
+    }
+#endif
+}
+
+void fgArgInfo::RecordStkLevel(unsigned stkLvl)
+{
+    assert(!IsUninitialized(stkLvl));
+    this->stkLevel = stkLvl;
+}
+
+unsigned fgArgInfo::RetrieveStkLevel()
+{
+    assert(!IsUninitialized(stkLevel));
+    return stkLevel;
+}
+
+// Return a conservative estimate of the stack size in bytes.
+// It will be used only on the intercepted-for-host code path to copy the arguments.
+int Compiler::fgEstimateCallStackSize(GenTreeCall* call)
+{
+
+    int numArgs = 0;
+    for (GenTreeArgList* args = call->gtCallArgs; args; args = args->Rest())
+    {
+        numArgs++;
+    }
+
+    int numStkArgs;
+    if (numArgs > MAX_REG_ARG)
+    {
+        numStkArgs = numArgs - MAX_REG_ARG;
+    }
+    else
+    {
+        numStkArgs = 0;
+    }
+
+    return numStkArgs * REGSIZE_BYTES;
+}
+
+//------------------------------------------------------------------------------
+// fgMakeMultiUse : If the node is a local, clone it and increase the ref count
+//                  otherwise insert a comma form temp
+//
+// Arguments:
+//    ppTree  - a pointer to the child node we will be replacing with the comma expression that
+//              evaluates ppTree to a temp and returns the result
+//
+// Return Value:
+//    A fresh GT_LCL_VAR node referencing the temp which has not been used
+//
+// Assumption:
+//    The result tree MUST be added to the tree structure since the ref counts are
+//    already incremented.
+
+GenTree* Compiler::fgMakeMultiUse(GenTree** pOp)
+{
+    GenTree* tree = *pOp;
+    if (tree->IsLocal())
+    {
+        auto result = gtClone(tree);
+        if (lvaLocalVarRefCounted)
+        {
+            lvaTable[tree->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
+        }
+        return result;
+    }
+    else
+    {
+        GenTree* result = fgInsertCommaFormTemp(pOp);
+
+        // At this point, *pOp is GT_COMMA(GT_ASG(V01, *pOp), V01) and result = V01
+        // Therefore, the ref count has to be incremented 3 times for *pOp and result, if result will
+        // be added by the caller.
+        if (lvaLocalVarRefCounted)
+        {
+            lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
+            lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
+            lvaTable[result->gtLclVarCommon.gtLclNum].incRefCnts(compCurBB->getBBWeight(this), this);
+        }
+
+        return result;
+    }
+}
+
+//------------------------------------------------------------------------------
+// fgInsertCommaFormTemp: Create a new temporary variable to hold the result of *ppTree,
+//                        and replace *ppTree with comma(asg(newLcl, *ppTree), newLcl)
+//
+// Arguments:
+//    ppTree     - a pointer to the child node we will be replacing with the comma expression that
+//                 evaluates ppTree to a temp and returns the result
+//
+//    structType - value type handle if the temp created is of TYP_STRUCT.
+//
+// Return Value:
+//    A fresh GT_LCL_VAR node referencing the temp which has not been used
+//
+
+GenTree* Compiler::fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE structType /*= nullptr*/)
+{
+    GenTree* subTree = *ppTree;
+
+    unsigned lclNum = lvaGrabTemp(true DEBUGARG("fgInsertCommaFormTemp is creating a new local variable"));
+
+    if (varTypeIsStruct(subTree))
+    {
+        assert(structType != nullptr);
+        lvaSetStruct(lclNum, structType, false);
+    }
+
+    // If subTree->TypeGet() == TYP_STRUCT, gtNewTempAssign() will create a GT_COPYBLK tree.
+    // The type of GT_COPYBLK is TYP_VOID.  Therefore, we should use subTree->TypeGet() for
+    // setting type of lcl vars created.
+    GenTree* asg = gtNewTempAssign(lclNum, subTree);
+
+    GenTree* load = new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET);
+
+    GenTree* comma = gtNewOperNode(GT_COMMA, subTree->TypeGet(), asg, load);
+
+    *ppTree = comma;
+
+    return new (this, GT_LCL_VAR) GenTreeLclVar(subTree->TypeGet(), lclNum, BAD_IL_OFFSET);
+}
+
+//------------------------------------------------------------------------
+// fgMorphArgs: Walk and transform (morph) the arguments of a call
+//
+// Arguments:
+//    callNode - the call for which we are doing the argument morphing
+//
+// Return Value:
+//    Like most morph methods, this method returns the morphed node,
+//    though in this case there are currently no scenarios where the
+//    node itself is re-created.
+//
+// Notes:
+//    This method is even less idempotent than most morph methods.
+//    That is, it makes changes that should not be redone. It uses the existence
+//    of gtCallLateArgs (the late arguments list) to determine if it has
+//    already done that work.
+//
+//    The first time it is called (i.e. during global morphing), this method
+//    computes the "late arguments". This is when it determines which arguments
+//    need to be evaluated to temps prior to the main argument setup, and which
+//    can be directly evaluated into the argument location. It also creates a
+//    second argument list (gtCallLateArgs) that does the final placement of the
+//    arguments, e.g. into registers or onto the stack.
+//
+//    The "non-late arguments", aka the gtCallArgs, are doing the in-order
+//    evaluation of the arguments that might have side-effects, such as embedded
+//    assignments, calls or possible throws. In these cases, it and earlier
+//    arguments must be evaluated to temps.
+//
+//    On targets with a fixed outgoing argument area (FEATURE_FIXED_OUT_ARGS),
+//    if we have any nested calls, we need to defer the copying of the argument
+//    into the fixed argument area until after the call. If the argument did not
+//    otherwise need to be computed into a temp, it is moved to gtCallLateArgs and
+//    replaced in the "early" arg list (gtCallArgs) with a placeholder node.
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
+{
+    GenTreeCall* call = callNode->AsCall();
+
+    GenTreePtr args;
+    GenTreePtr argx;
+
+    unsigned flagsSummary    = 0;
+    unsigned genPtrArgCntSav = fgPtrArgCntCur;
+
+    unsigned argIndex = 0;
+
+    unsigned intArgRegNum = 0;
+    unsigned fltArgRegNum = 0;
+
+#ifdef _TARGET_ARM_
+    regMaskTP argSkippedRegMask    = RBM_NONE;
+    regMaskTP fltArgSkippedRegMask = RBM_NONE;
+#endif //  _TARGET_ARM_
+
+#if defined(_TARGET_X86_)
+    unsigned maxRegArgs = MAX_REG_ARG; // X86: non-const, must be calculated
+#else
+    const unsigned maxRegArgs = MAX_REG_ARG; // other arch: fixed constant number
+#endif
+
+    unsigned argSlots                = 0;
+    unsigned nonRegPassedStructSlots = 0;
+    bool     lateArgsComputed        = (call->gtCallLateArgs != nullptr);
+    bool     callHasRetBuffArg       = call->HasRetBufArg();
+
+#ifndef _TARGET_X86_ // i.e. _TARGET_AMD64_ or _TARGET_ARM_
+    bool callIsVararg = call->IsVarargs();
+#endif
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+    // If fgMakeOutgoingStructArgCopy is called and copies are generated, hasStackArgCopy is set
+    // to make sure to call EvalArgsToTemp. fgMakeOutgoingStructArgCopy just marks the argument
+    // to need a temp variable, and EvalArgsToTemp actually creates the temp variable node.
+    bool hasStackArgCopy = false;
+#endif
+
+#ifndef LEGACY_BACKEND
+    // Data structure for keeping track of non-standard args. Non-standard args are those that are not passed
+    // following the normal calling convention or in the normal argument registers. We either mark existing
+    // arguments as non-standard (such as the x8 return buffer register on ARM64), or we manually insert the
+    // non-standard arguments into the argument list, below.
+    class NonStandardArgs
+    {
+        struct NonStandardArg
+        {
+            regNumber reg;  // The register to be assigned to this non-standard argument.
+            GenTree*  node; // The tree node representing this non-standard argument.
+                            //   Note that this must be updated if the tree node changes due to morphing!
+        };
+
+        ArrayStack<NonStandardArg> args;
+
+    public:
+        NonStandardArgs(Compiler* compiler) : args(compiler, 3) // We will have at most 3 non-standard arguments
+        {
+        }
+
+        //-----------------------------------------------------------------------------
+        // Add: add a non-standard argument to the table of non-standard arguments
+        //
+        // Arguments:
+        //    node - a GenTree node that has a non-standard argument.
+        //    reg - the register to assign to this node.
+        //
+        // Return Value:
+        //    None.
+        //
+        void Add(GenTree* node, regNumber reg)
+        {
+            NonStandardArg nsa = {reg, node};
+            args.Push(nsa);
+        }
+
+        //-----------------------------------------------------------------------------
+        // Find: Look for a GenTree* in the set of non-standard args.
+        //
+        // Arguments:
+        //    node - a GenTree node to look for
+        //
+        // Return Value:
+        //    The index of the non-standard argument (a non-negative, unique, stable number).
+        //    If the node is not a non-standard argument, return -1.
+        //
+        int Find(GenTree* node)
+        {
+            for (int i = 0; i < args.Height(); i++)
+            {
+                if (node == args.Index(i).node)
+                {
+                    return i;
+                }
+            }
+            return -1;
+        }
+
+        //-----------------------------------------------------------------------------
+        // FindReg: Look for a GenTree node in the non-standard arguments set. If found,
+        // set the register to use for the node.
+        //
+        // Arguments:
+        //    node - a GenTree node to look for
+        //    pReg - an OUT argument. *pReg is set to the non-standard register to use if
+        //           'node' is found in the non-standard argument set.
+        //
+        // Return Value:
+        //    'true' if 'node' is a non-standard argument. In this case, *pReg is set to the
+        //          register to use.
+        //    'false' otherwise (in this case, *pReg is unmodified).
+        //
+        bool FindReg(GenTree* node, regNumber* pReg)
+        {
+            for (int i = 0; i < args.Height(); i++)
+            {
+                NonStandardArg& nsa = args.IndexRef(i);
+                if (node == nsa.node)
+                {
+                    *pReg = nsa.reg;
+                    return true;
+                }
+            }
+            return false;
+        }
+
+        //-----------------------------------------------------------------------------
+        // Replace: Replace the non-standard argument node at a given index. This is done when
+        // the original node was replaced via morphing, but we need to continue to assign a
+        // particular non-standard arg to it.
+        //
+        // Arguments:
+        //    index - the index of the non-standard arg. It must exist.
+        //    node - the new GenTree node.
+        //
+        // Return Value:
+        //    None.
+        //
+        void Replace(int index, GenTree* node)
+        {
+            args.IndexRef(index).node = node;
+        }
+
+    } nonStandardArgs(this);
+#endif // !LEGACY_BACKEND
+
+    // Count of args. On first morph, this is counted before we've filled in the arg table.
+    // On remorph, we grab it from the arg table.
+    unsigned numArgs = 0;
+
+    // Process the late arguments (which were determined by a previous caller).
+    // Do this before resetting fgPtrArgCntCur as fgMorphTree(call->gtCallLateArgs)
+    // may need to refer to it.
+    if (lateArgsComputed)
+    {
+        // We need to reMorph the gtCallLateArgs early since that is what triggers
+        // the expression folding and we need to have the final folded gtCallLateArgs
+        // available when we call RemorphRegArg so that we correctly update the fgArgInfo
+        // with the folded tree that represents the final optimized argument nodes.
+        //
+        // However if a range-check needs to be generated for any of these late
+        // arguments we also need to "know" what the stack depth will be when we generate
+        // code to branch to the throw range check failure block as that is part of the
+        // GC information contract for that block.
+        //
+        // Since the late arguments are evaluated last we have pushed all of the
+        // other arguments on the stack before we evaluate these late arguments,
+        // so we record the stack depth on the first morph call when lateArgsComputed
+        // was false (via RecordStkLevel) and then retrieve that value here (via RetrieveStkLevel)
+        //
+        unsigned callStkLevel = call->fgArgInfo->RetrieveStkLevel();
+        fgPtrArgCntCur += callStkLevel;
+        call->gtCallLateArgs = fgMorphTree(call->gtCallLateArgs)->AsArgList();
+        flagsSummary |= call->gtCallLateArgs->gtFlags;
+        fgPtrArgCntCur -= callStkLevel;
+        assert(call->fgArgInfo != nullptr);
+        call->fgArgInfo->RemorphReset();
+
+        numArgs = call->fgArgInfo->ArgCount();
+    }
+    else
+    {
+        // First we need to count the args
+        if (call->gtCallObjp)
+        {
+            numArgs++;
+        }
+        for (args = call->gtCallArgs; (args != nullptr); args = args->gtOp.gtOp2)
+        {
+            numArgs++;
+        }
+
+        // Insert or mark non-standard args. These are either outside the normal calling convention, or
+        // arguments registers that don't follow the normal progression of argument registers in the calling
+        // convention (such as for the ARM64 fixed return buffer argument x8).
+        //
+        // *********** NOTE *************
+        // The logic here must remain in sync with GetNonStandardAddedArgCount(), which is used to map arguments
+        // in the implementation of fast tail call.
+        // *********** END NOTE *********
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if !defined(LEGACY_BACKEND) && defined(_TARGET_X86_)
+        // The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper has a custom calling convention. Set the argument registers
+        // correctly here.
+        if (call->IsHelperCall(this, CORINFO_HELP_INIT_PINVOKE_FRAME))
+        {
+            GenTreeArgList* args = call->gtCallArgs;
+            GenTree*        arg1 = args->Current();
+            assert(arg1 != nullptr);
+            nonStandardArgs.Add(arg1, REG_PINVOKE_FRAME);
+        }
+        // The x86 shift helpers have custom calling conventions and expect the lo part of the long to be in EAX and the
+        // hi part to be in EDX. This sets the argument registers up correctly.
+        else if (call->IsHelperCall(this, CORINFO_HELP_LLSH) || call->IsHelperCall(this, CORINFO_HELP_LRSH) || call->IsHelperCall(this, CORINFO_HELP_LRSZ))
+        {
+            GenTreeArgList* args = call->gtCallArgs;
+            GenTree* arg1 = args->Current();
+            assert(arg1 != nullptr);
+            nonStandardArgs.Add(arg1, REG_LNGARG_LO);
+
+            args = args->Rest();
+            GenTree* arg2 = args->Current();
+            assert(arg2 != nullptr);
+            nonStandardArgs.Add(arg2, REG_LNGARG_HI);
+        }
+#endif // !defined(LEGACY_BACKEND) && defined(_TARGET_X86_)
+
+#if !defined(LEGACY_BACKEND) && !defined(_TARGET_X86_)
+        // TODO-X86-CQ: Currently RyuJIT/x86 passes args on the stack, so this is not needed.
+        // If/when we change that, the following code needs to be changed to correctly support the (TBD) managed calling
+        // convention for x86/SSE.
+
+        // If we have a Fixed Return Buffer argument register then we setup a non-standard argument for it
+        //
+        if (hasFixedRetBuffReg() && call->HasRetBufArg())
+        {
+            args = call->gtCallArgs;
+            assert(args != nullptr);
+            assert(args->IsList());
+
+            argx = call->gtCallArgs->Current();
+
+            // We don't increment numArgs here, since we already counted this argument above.
+
+            nonStandardArgs.Add(argx, theFixedRetBuffReg());
+        }
+
+        // We are allowed to have a Fixed Return Buffer argument combined
+        // with any of the remaining non-standard arguments
+        //
+        if (call->IsUnmanaged() && !opts.ShouldUsePInvokeHelpers())
+        {
+            assert(!call->gtCallCookie);
+            // Add a conservative estimate of the stack size in a special parameter (r11) at the call site.
+            // It will be used only on the intercepted-for-host code path to copy the arguments.
+
+            GenTree* cns     = new (this, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, fgEstimateCallStackSize(call));
+            call->gtCallArgs = gtNewListNode(cns, call->gtCallArgs);
+            numArgs++;
+
+            nonStandardArgs.Add(cns, REG_PINVOKE_COOKIE_PARAM);
+        }
+        else if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT) && !call->IsTailCallViaHelper())
+        {
+            // indirect VSD stubs need the base of the indirection cell to be
+            // passed in addition.  At this point that is the value in gtCallAddr.
+            // The actual call target will be derived from gtCallAddr in call
+            // lowering.
+
+            // If it is a VSD call getting dispatched via tail call helper,
+            // fgMorphTailCall() would materialize stub addr as an additional
+            // parameter added to the original arg list and hence no need to
+            // add as a non-standard arg.
+
+            GenTree* arg = call->gtCallAddr;
+            if (arg->OperIsLocal())
+            {
+                arg = gtClone(arg, true);
+            }
+            else
+            {
+                call->gtCallAddr = fgInsertCommaFormTemp(&arg);
+                call->gtFlags |= GTF_ASG;
+            }
+            noway_assert(arg != nullptr);
+
+            // And push the stub address onto the list of arguments
+            call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
+            numArgs++;
+
+            nonStandardArgs.Add(arg, REG_VIRTUAL_STUB_PARAM);
+        }
+        else if (call->gtCallType == CT_INDIRECT && call->gtCallCookie)
+        {
+            assert(!call->IsUnmanaged());
+
+            // put cookie into R11
+            GenTree* arg = call->gtCallCookie;
+            noway_assert(arg != nullptr);
+            call->gtCallCookie = nullptr;
+
+            call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
+            numArgs++;
+
+            nonStandardArgs.Add(arg, REG_PINVOKE_COOKIE_PARAM);
+
+            // put destination into R10
+            arg              = gtClone(call->gtCallAddr, true);
+            call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
+            numArgs++;
+
+            nonStandardArgs.Add(arg, REG_PINVOKE_TARGET_PARAM);
+
+            // finally change this call to a helper call
+            call->gtCallType    = CT_HELPER;
+            call->gtCallMethHnd = eeFindHelper(CORINFO_HELP_PINVOKE_CALLI);
+        }
+#endif // !defined(LEGACY_BACKEND) && !defined(_TARGET_X86_)
+
+        // Allocate the fgArgInfo for the call node;
+        //
+        call->fgArgInfo = new (this, CMK_Unknown) fgArgInfo(this, call, numArgs);
+    }
+
+    if (varTypeIsStruct(call))
+    {
+        fgFixupStructReturn(call);
+    }
+
+    /* First we morph the argument subtrees ('this' pointer, arguments, etc.).
+     * During the first call to fgMorphArgs we also record the
+     * information about late arguments we have in 'fgArgInfo'.
+     * This information is used later to contruct the gtCallLateArgs */
+
+    /* Process the 'this' argument value, if present */
+
+    argx = call->gtCallObjp;
+
+    if (argx)
+    {
+        argx             = fgMorphTree(argx);
+        call->gtCallObjp = argx;
+        flagsSummary |= argx->gtFlags;
+
+        assert(call->gtCallType == CT_USER_FUNC || call->gtCallType == CT_INDIRECT);
+
+        assert(argIndex == 0);
+
+        /* We must fill in or update the argInfo table */
+
+        if (lateArgsComputed)
+        {
+            /* this is a register argument - possibly update it in the table */
+            call->fgArgInfo->RemorphRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1);
+        }
+        else
+        {
+            assert(varTypeIsGC(call->gtCallObjp->gtType) || (call->gtCallObjp->gtType == TYP_I_IMPL));
+
+            /* this is a register argument - put it in the table */
+            call->fgArgInfo->AddRegArg(argIndex, argx, nullptr, genMapIntRegArgNumToRegNum(intArgRegNum), 1, 1
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+                                       ,
+                                       false, REG_STK, nullptr
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+                                       );
+        }
+        // this can't be a struct.
+        assert(argx->gtType != TYP_STRUCT);
+
+        /* Increment the argument register count and argument index */
+        if (!varTypeIsFloating(argx->gtType) || opts.compUseSoftFP)
+        {
+            intArgRegNum++;
+#ifdef WINDOWS_AMD64_ABI
+            // Whenever we pass an integer register argument
+            // we skip the corresponding floating point register argument
+            fltArgRegNum++;
+#endif // WINDOWS_AMD64_ABI
+        }
+        else
+        {
+            noway_assert(!"the 'this' pointer can not be a floating point type");
+        }
+        argIndex++;
+        argSlots++;
+    }
+
+#ifdef _TARGET_X86_
+    // Compute the maximum number of arguments that can be passed in registers.
+    // For X86 we handle the varargs and unmanaged calling conventions
+
+    if (call->gtFlags & GTF_CALL_POP_ARGS)
+    {
+        noway_assert(intArgRegNum < MAX_REG_ARG);
+        // No more register arguments for varargs (CALL_POP_ARGS)
+        maxRegArgs = intArgRegNum;
+
+        // Add in the ret buff arg
+        if (callHasRetBuffArg)
+            maxRegArgs++;
+    }
+
+    if (call->IsUnmanaged())
+    {
+        noway_assert(intArgRegNum == 0);
+
+        if (call->gtCallMoreFlags & GTF_CALL_M_UNMGD_THISCALL)
+        {
+            noway_assert(call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_I_IMPL ||
+                         call->gtCallArgs->gtOp.gtOp1->TypeGet() == TYP_BYREF ||
+                         call->gtCallArgs->gtOp.gtOp1->gtOper ==
+                             GT_NOP); // the arg was already morphed to a register (fgMorph called twice)
+            maxRegArgs = 1;
+        }
+        else
+        {
+            maxRegArgs = 0;
+        }
+
+        // Add in the ret buff arg
+        if (callHasRetBuffArg)
+            maxRegArgs++;
+    }
+#endif // _TARGET_X86_
+
+    /* Morph the user arguments */
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_ARM_)
+
+    // The ARM ABI has a concept of back-filling of floating-point argument registers, according
+    // to the "Procedure Call Standard for the ARM Architecture" document, especially
+    // section 6.1.2.3 "Parameter passing". Back-filling is where floating-point argument N+1 can
+    // appear in a lower-numbered register than floating point argument N. That is, argument
+    // register allocation is not strictly increasing. To support this, we need to keep track of unused
+    // floating-point argument registers that we can back-fill. We only support 4-byte float and
+    // 8-byte double types, and one to four element HFAs composed of these types. With this, we will
+    // only back-fill single registers, since there is no way with these types to create
+    // an alignment hole greater than one register. However, there can be up to 3 back-fill slots
+    // available (with 16 FP argument registers). Consider this code:
+    //
+    // struct HFA { float x, y, z; }; // a three element HFA
+    // void bar(float a1,   // passed in f0
+    //          double a2,  // passed in f2/f3; skip f1 for alignment
+    //          HFA a3,     // passed in f4/f5/f6
+    //          double a4,  // passed in f8/f9; skip f7 for alignment. NOTE: it doesn't fit in the f1 back-fill slot
+    //          HFA a5,     // passed in f10/f11/f12
+    //          double a6,  // passed in f14/f15; skip f13 for alignment. NOTE: it doesn't fit in the f1 or f7 back-fill
+    //                      // slots
+    //          float a7,   // passed in f1 (back-filled)
+    //          float a8,   // passed in f7 (back-filled)
+    //          float a9,   // passed in f13 (back-filled)
+    //          float a10)  // passed on the stack in [OutArg+0]
+    //
+    // Note that if we ever support FP types with larger alignment requirements, then there could
+    // be more than single register back-fills.
+    //
+    // Once we assign a floating-pointer register to the stack, they all must be on the stack.
+    // See "Procedure Call Standard for the ARM Architecture", section 6.1.2.3, "The back-filling
+    // continues only so long as no VFP CPRC has been allocated to a slot on the stack."
+    // We set anyFloatStackArgs to true when a floating-point argument has been assigned to the stack
+    // and prevent any additional floating-point arguments from going in registers.
+
+    bool anyFloatStackArgs = false;
+
+#endif // _TARGET_ARM_
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+    SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+    bool hasStructArgument     = false; // @TODO-ARM64-UNIX: Remove this bool during a future refactoring
+    bool hasMultiregStructArgs = false;
+    for (args = call->gtCallArgs; args; args = args->gtOp.gtOp2, argIndex++)
+    {
+        GenTreePtr* parentArgx = &args->gtOp.gtOp1;
+
+#if FEATURE_MULTIREG_ARGS
+        if (!hasStructArgument)
+        {
+            hasStructArgument = varTypeIsStruct(args->gtOp.gtOp1);
+        }
+#endif // FEATURE_MULTIREG_ARGS
+
+#ifndef LEGACY_BACKEND
+        // Record the index of any nonStandard arg that we may be processing here, as we are
+        // about to call fgMorphTree on it and fgMorphTree may replace it with a new tree.
+        GenTreePtr orig_argx         = *parentArgx;
+        int        nonStandard_index = nonStandardArgs.Find(orig_argx);
+#endif // !LEGACY_BACKEND
+
+        argx        = fgMorphTree(*parentArgx);
+        *parentArgx = argx;
+        flagsSummary |= argx->gtFlags;
+
+        assert(args->IsList());
+        assert(argx == args->Current());
+
+#ifndef LEGACY_BACKEND
+        if ((nonStandard_index != -1) && (argx != orig_argx))
+        {
+            // We need to update the node field for this nonStandard arg here
+            // as it was changed by the call to fgMorphTree
+            nonStandardArgs.Replace(nonStandard_index, argx);
+        }
+#endif // !LEGACY_BACKEND
+
+        /* Change the node to TYP_I_IMPL so we don't report GC info
+         * NOTE: We deferred this from the importer because of the inliner */
+
+        if (argx->IsVarAddr())
+        {
+            argx->gtType = TYP_I_IMPL;
+        }
+
+        bool     passUsingFloatRegs;
+        unsigned argAlign = 1;
+        // Setup any HFA information about 'argx'
+        var_types hfaType  = GetHfaType(argx);
+        bool      isHfaArg = varTypeIsFloating(hfaType);
+        unsigned  hfaSlots = 0;
+
+        if (isHfaArg)
+        {
+            hfaSlots = GetHfaCount(argx);
+
+            // If we have a HFA struct it's possible we transition from a method that originally
+            // only had integer types to now start having FP types.  We have to communicate this
+            // through this flag since LSRA later on will use this flag to determine whether
+            // or not to track the FP register set.
+            //
+            compFloatingPointUsed = true;
+        }
+
+        unsigned             size         = 0;
+        CORINFO_CLASS_HANDLE copyBlkClass = nullptr;
+        bool                 isRegArg     = false;
+
+        fgArgTabEntryPtr argEntry = nullptr;
+
+        if (lateArgsComputed)
+        {
+            argEntry = gtArgEntryByArgNum(call, argIndex);
+        }
+
+#ifdef _TARGET_ARM_
+
+        bool passUsingIntRegs;
+        if (lateArgsComputed)
+        {
+            passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
+            passUsingIntRegs   = isValidIntArgReg(argEntry->regNum);
+        }
+        else
+        {
+            passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx)) && !opts.compUseSoftFP;
+            passUsingIntRegs   = passUsingFloatRegs ? false : (intArgRegNum < MAX_REG_ARG);
+        }
+
+        GenTreePtr curArg = argx;
+        // If late args have already been computed, use the node in the argument table.
+        if (argEntry != NULL && argEntry->isTmp)
+        {
+            curArg = argEntry->node;
+        }
+
+        // We don't use the "size" return value from InferOpSizeAlign().
+        codeGen->InferOpSizeAlign(curArg, &argAlign);
+
+        argAlign = roundUp(argAlign, TARGET_POINTER_SIZE);
+        argAlign /= TARGET_POINTER_SIZE;
+
+        if (argAlign == 2)
+        {
+            if (passUsingFloatRegs)
+            {
+                if (fltArgRegNum % 2 == 1)
+                {
+                    fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT);
+                    fltArgRegNum++;
+                }
+            }
+            else if (passUsingIntRegs)
+            {
+                if (intArgRegNum % 2 == 1)
+                {
+                    argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
+                    intArgRegNum++;
+                }
+            }
+
+            if (argSlots % 2 == 1)
+            {
+                argSlots++;
+            }
+        }
+
+#elif defined(_TARGET_ARM64_)
+
+        if (lateArgsComputed)
+        {
+            passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
+        }
+        else
+        {
+            passUsingFloatRegs = !callIsVararg && (isHfaArg || varTypeIsFloating(argx));
+        }
+
+#elif defined(_TARGET_AMD64_)
+#if defined(UNIX_AMD64_ABI)
+        if (lateArgsComputed)
+        {
+            passUsingFloatRegs = isValidFloatArgReg(argEntry->regNum);
+        }
+        else
+        {
+            passUsingFloatRegs = varTypeIsFloating(argx);
+        }
+#else  // WINDOWS_AMD64_ABI
+        passUsingFloatRegs = varTypeIsFloating(argx);
+#endif // !UNIX_AMD64_ABI
+#elif defined(_TARGET_X86_)
+
+        passUsingFloatRegs = false;
+
+#else
+#error Unsupported or unset target architecture
+#endif // _TARGET_*
+
+        bool      isBackFilled     = false;
+        unsigned  nextFltArgRegNum = fltArgRegNum; // This is the next floating-point argument register number to use
+        var_types structBaseType   = TYP_STRUCT;
+        unsigned  structSize       = 0;
+
+        bool isStructArg = varTypeIsStruct(argx);
+
+        if (lateArgsComputed)
+        {
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+            // Get the struct description for the already completed struct argument.
+            fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, argx);
+            assert(fgEntryPtr != nullptr);
+
+            // As described in few other places, this can happen when the argx was morphed
+            // into an arg setup node - COPYBLK. The COPYBLK has always a type of void.
+            // In such case the fgArgTabEntry keeps track of whether the original node (before morphing)
+            // was a struct and the struct classification.
+            isStructArg = fgEntryPtr->isStruct;
+
+            if (isStructArg)
+            {
+                structDesc.CopyFrom(fgEntryPtr->structDesc);
+            }
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+
+            assert(argEntry != nullptr);
+            if (argEntry->IsBackFilled())
+            {
+                isRegArg         = true;
+                size             = argEntry->numRegs;
+                nextFltArgRegNum = genMapFloatRegNumToRegArgNum(argEntry->regNum);
+                assert(size == 1);
+                isBackFilled = true;
+            }
+            else if (argEntry->regNum == REG_STK)
+            {
+                isRegArg = false;
+                assert(argEntry->numRegs == 0);
+                size = argEntry->numSlots;
+            }
+            else
+            {
+                isRegArg = true;
+                assert(argEntry->numRegs > 0);
+                size = argEntry->numRegs + argEntry->numSlots;
+            }
+
+            // This size has now been computed
+            assert(size != 0);
+        }
+        else // !lateArgsComputed
+        {
+            //
+            // Figure out the size of the argument. This is either in number of registers, or number of
+            // TARGET_POINTER_SIZE stack slots, or the sum of these if the argument is split between the registers and
+            // the stack.
+            //
+            if (argx->IsArgPlaceHolderNode() || (!isStructArg))
+            {
+#if defined(_TARGET_AMD64_)
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+                if (!isStructArg)
+                {
+                    size = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
+                }
+                else
+                {
+                    size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
+                                              TARGET_POINTER_SIZE)) /
+                           TARGET_POINTER_SIZE;
+                    eeGetSystemVAmd64PassStructInRegisterDescriptor(argx->gtArgPlace.gtArgPlaceClsHnd, &structDesc);
+                    if (size > 1)
+                    {
+                        hasMultiregStructArgs = true;
+                    }
+                }
+#else  // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+                size           = 1; // On AMD64, all primitives fit in a single (64-bit) 'slot'
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#elif defined(_TARGET_ARM64_)
+                if (isStructArg)
+                {
+                    if (isHfaArg)
+                    {
+                        size = GetHfaCount(argx);
+                        // HFA structs are passed by value in multiple registers
+                        hasMultiregStructArgs = true;
+                    }
+                    else
+                    {
+                        // Structs are either passed in 1 or 2 (64-bit) slots
+                        size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
+                                                  TARGET_POINTER_SIZE)) /
+                               TARGET_POINTER_SIZE;
+
+                        if (size == 2)
+                        {
+                            // Structs that are the size of 2 pointers are passed by value in multiple registers
+                            hasMultiregStructArgs = true;
+                        }
+                        else if (size > 2)
+                        {
+                            size = 1; // Structs that are larger that 2 pointers (except for HFAs) are passed by
+                                      // reference (to a copy)
+                        }
+                    }
+                    // Note that there are some additional rules for multireg structs.
+                    // (i.e they cannot be split between registers and the stack)
+                }
+                else
+                {
+                    size = 1; // Otherwise, all primitive types fit in a single (64-bit) 'slot'
+                }
+#elif defined(_TARGET_ARM_)
+                if (isStructArg)
+                {
+                    size = (unsigned)(roundUp(info.compCompHnd->getClassSize(argx->gtArgPlace.gtArgPlaceClsHnd),
+                                              TARGET_POINTER_SIZE)) /
+                           TARGET_POINTER_SIZE;
+                }
+                else
+                {
+                    // The typical case
+                    size = genTypeStSz(argx->gtType);
+                }
+#elif defined(_TARGET_X86_)
+                size       = genTypeStSz(argx->gtType);
+#else
+#error Unsupported or unset target architecture
+#endif // _TARGET_XXX_
+            }
+#ifdef _TARGET_ARM_
+            else if (isHfaArg)
+            {
+                size = GetHfaCount(argx);
+            }
+#endif           // _TARGET_ARM_
+            else // struct type
+            {
+                // We handle two opcodes: GT_MKREFANY and GT_OBJ
+                if (argx->gtOper == GT_MKREFANY)
+                {
+                    if (varTypeIsStruct(argx))
+                    {
+                        isStructArg = true;
+                    }
+#ifdef _TARGET_AMD64_
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+                    if (varTypeIsStruct(argx))
+                    {
+                        size                 = info.compCompHnd->getClassSize(impGetRefAnyClass());
+                        unsigned roundupSize = (unsigned)roundUp(size, TARGET_POINTER_SIZE);
+                        size                 = roundupSize / TARGET_POINTER_SIZE;
+                        eeGetSystemVAmd64PassStructInRegisterDescriptor(impGetRefAnyClass(), &structDesc);
+                    }
+                    else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+                    {
+                        size = 1;
+                    }
+#else
+                    size                 = 2;
+#endif
+                }
+                else // We must have a GT_OBJ with a struct type, but the GT_OBJ may be be a child of a GT_COMMA
+                {
+                    GenTreePtr  argObj         = argx;
+                    GenTreePtr* parentOfArgObj = parentArgx;
+
+                    assert(args->IsList());
+                    assert(argx == args->Current());
+
+                    /* The GT_OBJ may be be a child of a GT_COMMA */
+                    while (argObj->gtOper == GT_COMMA)
+                    {
+                        parentOfArgObj = &argObj->gtOp.gtOp2;
+                        argObj         = argObj->gtOp.gtOp2;
+                    }
+
+                    // TODO-1stClassStructs: An OBJ node should not be required for lclVars.
+                    if (argObj->gtOper != GT_OBJ)
+                    {
+                        BADCODE("illegal argument tree in fgMorphArgs");
+                    }
+
+                    CORINFO_CLASS_HANDLE objClass = argObj->gtObj.gtClass;
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+                    eeGetSystemVAmd64PassStructInRegisterDescriptor(objClass, &structDesc);
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+                    unsigned originalSize = info.compCompHnd->getClassSize(objClass);
+                    originalSize          = (originalSize == 0 ? TARGET_POINTER_SIZE : originalSize);
+                    unsigned roundupSize  = (unsigned)roundUp(originalSize, TARGET_POINTER_SIZE);
+
+                    structSize = originalSize;
+
+                    structPassingKind howToPassStruct;
+                    structBaseType = getArgTypeForStruct(objClass, &howToPassStruct, originalSize);
+
+#ifdef _TARGET_ARM64_
+                    if ((howToPassStruct == SPK_PrimitiveType) && // Passed in a single register
+                        !isPow2(originalSize))                    // size is 3,5,6 or 7 bytes
+                    {
+                        if (argObj->gtObj.gtOp1->IsVarAddr()) // Is the source a LclVar?
+                        {
+                            // For ARM64 we pass structs that are 3,5,6,7 bytes in size
+                            // we can read 4 or 8 bytes from the LclVar to pass this arg
+                            originalSize = genTypeSize(structBaseType);
+                        }
+                    }
+#endif //  _TARGET_ARM64_
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+                    // On System V OS-es a struct is never passed by reference.
+                    // It is either passed by value on the stack or in registers.
+                    bool passStructInRegisters = false;
+#else  // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+                    bool passStructByRef = false;
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+                    // The following if-then-else needs to be carefully refactored.
+                    // Basically the else portion wants to turn a struct load (a GT_OBJ)
+                    // into a GT_IND of the appropriate size.
+                    // It can do this with structs sizes that are 1, 2, 4, or 8 bytes.
+                    // It can't do this when FEATURE_UNIX_AMD64_STRUCT_PASSING is defined  (Why?)
+                    // TODO-Cleanup: Remove the #ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING below.
+                    // It also can't do this if we have a HFA arg,
+                    // unless we have a 1-elem HFA in which case we want to do the optimization.
+                    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef _TARGET_X86_
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
+                    // Check for struct argument with size 1, 2, 4 or 8 bytes
+                    // As we can optimize these by turning them into a GT_IND of the correct type
+                    //
+                    // Check for cases that we cannot optimize:
+                    //
+                    if ((originalSize > TARGET_POINTER_SIZE) || // it is struct that is larger than a pointer
+                        !isPow2(originalSize) ||                // it is not a power of two (1, 2, 4 or 8)
+                        (isHfaArg && (hfaSlots != 1)))          // it is a one element HFA struct
+#endif                                                          // FEATURE_UNIX_AMD64_STRUCT_PASSING
+                    {
+                        // Normalize 'size' to the number of pointer sized items
+                        // 'size' is the number of register slots that we will use to pass the argument
+                        size = roundupSize / TARGET_POINTER_SIZE;
+#if defined(_TARGET_AMD64_)
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
+                        size            = 1; // This must be copied to a temp and passed by address
+                        passStructByRef = true;
+                        copyBlkClass    = objClass;
+#else // FEATURE_UNIX_AMD64_STRUCT_PASSING
+                        if (!structDesc.passedInRegisters)
+                        {
+                            GenTreePtr lclVar     = fgIsIndirOfAddrOfLocal(argObj);
+                            bool       needCpyBlk = false;
+                            if (lclVar != nullptr)
+                            {
+                                // If the struct is promoted to registers, it has to be materialized
+                                // on stack. We may want to support promoted structures in
+                                // codegening pugarg_stk instead of creating a copy here.
+                                LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
+                                needCpyBlk        = varDsc->lvPromoted;
+                            }
+                            else
+                            {
+                                // If simd16 comes from vector<t>, eeGetSystemVAmd64PassStructInRegisterDescriptor
+                                // sets structDesc.passedInRegisters to be false.
+                                //
+                                // GT_ADDR(GT_SIMD) is not a rationalized IR form and is not handled
+                                // by rationalizer. For now we will let SIMD struct arg to be copied to
+                                // a local. As part of cpblk rewrite, rationalizer will handle GT_ADDR(GT_SIMD)
+                                //
+                                // +--*  obj       simd16
+                                // |  \--*  addr      byref
+                                // |     |  /--*  lclVar    simd16 V05 loc4
+                                // |     \--*  simd      simd16 int -
+                                // |        \--*  lclVar    simd16 V08 tmp1
+                                //
+                                // TODO-Amd64-Unix: The rationalizer can be updated to handle this pattern,
+                                // so that we don't need to generate a copy here.
+                                GenTree* addr = argObj->gtOp.gtOp1;
+                                if (addr->OperGet() == GT_ADDR)
+                                {
+                                    GenTree* addrChild = addr->gtOp.gtOp1;
+                                    if (addrChild->OperGet() == GT_SIMD)
+                                    {
+                                        needCpyBlk = true;
+                                    }
+                                }
+                            }
+                            passStructInRegisters = false;
+                            if (needCpyBlk)
+                            {
+                                copyBlkClass = objClass;
+                            }
+                            else
+                            {
+                                copyBlkClass = NO_CLASS_HANDLE;
+                            }
+                        }
+                        else
+                        {
+                            // The objClass is used to materialize the struct on stack.
+                            // For SystemV, the code below generates copies for struct arguments classified
+                            // as register argument.
+                            // TODO-Amd64-Unix: We don't always need copies for this case. Struct arguments
+                            // can be passed on registers or can be copied directly to outgoing area.
+                            passStructInRegisters = true;
+                            copyBlkClass          = objClass;
+                        }
+
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+#elif defined(_TARGET_ARM64_)
+                        if ((size > 2) && !isHfaArg)
+                        {
+                            size            = 1; // This must be copied to a temp and passed by address
+                            passStructByRef = true;
+                            copyBlkClass    = objClass;
+                        }
+#endif
+
+#ifdef _TARGET_ARM_
+                        // If we're passing a promoted struct local var,
+                        // we may need to skip some registers due to alignment; record those.
+                        GenTreePtr lclVar = fgIsIndirOfAddrOfLocal(argObj);
+                        if (lclVar != NULL)
+                        {
+                            LclVarDsc* varDsc = &lvaTable[lclVar->gtLclVarCommon.gtLclNum];
+                            if (varDsc->lvPromoted)
+                            {
+                                assert(argObj->OperGet() == GT_OBJ);
+                                if (lvaGetPromotionType(varDsc) == PROMOTION_TYPE_INDEPENDENT)
+                                {
+                                    fgAddSkippedRegsInPromotedStructArg(varDsc, intArgRegNum, &argSkippedRegMask);
+                                }
+                            }
+                        }
+#endif // _TARGET_ARM_
+                    }
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
+                    // TODO-Amd64-Unix: Since the else part below is disabled for UNIX_AMD64, copies are always
+                    // generated for struct 1, 2, 4, or 8.
+                    else // We have a struct argument with size 1, 2, 4 or 8 bytes
+                    {
+                        // change our GT_OBJ into a GT_IND of the correct type.
+                        // We've already ensured above that size is a power of 2, and less than or equal to pointer
+                        // size.
+
+                        assert(howToPassStruct == SPK_PrimitiveType);
+
+                        // ToDo: remove this block as getArgTypeForStruct properly handles turning one element HFAs into
+                        // primitives
+                        if (isHfaArg)
+                        {
+                            // If we reach here with an HFA arg it has to be a one element HFA
+                            assert(hfaSlots == 1);
+                            structBaseType = hfaType; // change the indirection type to a floating point type
+                        }
+
+                        noway_assert(structBaseType != TYP_UNKNOWN);
+
+                        argObj->ChangeOper(GT_IND);
+
+                        // Now see if we can fold *(&X) into X
+                        if (argObj->gtOp.gtOp1->gtOper == GT_ADDR)
+                        {
+                            GenTreePtr temp = argObj->gtOp.gtOp1->gtOp.gtOp1;
+
+                            // Keep the DONT_CSE flag in sync
+                            // (as the addr always marks it for its op1)
+                            temp->gtFlags &= ~GTF_DONT_CSE;
+                            temp->gtFlags |= (argObj->gtFlags & GTF_DONT_CSE);
+                            DEBUG_DESTROY_NODE(argObj->gtOp.gtOp1); // GT_ADDR
+                            DEBUG_DESTROY_NODE(argObj);             // GT_IND
+
+                            argObj          = temp;
+                            *parentOfArgObj = temp;
+
+                            // If the OBJ had been the top level node, we've now changed argx.
+                            if (parentOfArgObj == parentArgx)
+                            {
+                                argx = temp;
+                            }
+                        }
+                        if (argObj->gtOper == GT_LCL_VAR)
+                        {
+                            unsigned   lclNum = argObj->gtLclVarCommon.gtLclNum;
+                            LclVarDsc* varDsc = &lvaTable[lclNum];
+
+                            if (varDsc->lvPromoted)
+                            {
+                                if (varDsc->lvFieldCnt == 1)
+                                {
+                                    // get the first and only promoted field
+                                    LclVarDsc* fieldVarDsc = &lvaTable[varDsc->lvFieldLclStart];
+                                    if (genTypeSize(fieldVarDsc->TypeGet()) >= originalSize)
+                                    {
+                                        // we will use the first and only promoted field
+                                        argObj->gtLclVarCommon.SetLclNum(varDsc->lvFieldLclStart);
+
+                                        if (varTypeCanReg(fieldVarDsc->TypeGet()) &&
+                                            (genTypeSize(fieldVarDsc->TypeGet()) == originalSize))
+                                        {
+                                            // Just use the existing field's type
+                                            argObj->gtType = fieldVarDsc->TypeGet();
+                                        }
+                                        else
+                                        {
+                                            // Can't use the existing field's type, so use GT_LCL_FLD to swizzle
+                                            // to a new type
+                                            argObj->ChangeOper(GT_LCL_FLD);
+                                            argObj->gtType = structBaseType;
+                                        }
+                                        assert(varTypeCanReg(argObj->TypeGet()));
+                                        assert(copyBlkClass == NO_CLASS_HANDLE);
+                                    }
+                                    else
+                                    {
+                                        // use GT_LCL_FLD to swizzle the single field struct to a new type
+                                        lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
+                                        argObj->ChangeOper(GT_LCL_FLD);
+                                        argObj->gtType = structBaseType;
+                                    }
+                                }
+                                else
+                                {
+                                    // The struct fits into a single register, but it has been promoted into its
+                                    // constituent fields, and so we have to re-assemble it
+                                    copyBlkClass = objClass;
+#ifdef _TARGET_ARM_
+                                    // Alignment constraints may cause us not to use (to "skip") some argument
+                                    // registers. Add those, if any, to the skipped (int) arg reg mask.
+                                    fgAddSkippedRegsInPromotedStructArg(varDsc, intArgRegNum, &argSkippedRegMask);
+#endif // _TARGET_ARM_
+                                }
+                            }
+                            else if (!varTypeIsIntegralOrI(varDsc->TypeGet()))
+                            {
+                                // Not a promoted struct, so just swizzle the type by using GT_LCL_FLD
+                                argObj->ChangeOper(GT_LCL_FLD);
+                                argObj->gtType = structBaseType;
+                            }
+                        }
+                        else
+                        {
+                            // Not a GT_LCL_VAR, so we can just change the type on the node
+                            argObj->gtType = structBaseType;
+                        }
+                        assert(varTypeCanReg(argObj->TypeGet()) ||
+                               ((copyBlkClass != NO_CLASS_HANDLE) && varTypeIsIntegral(structBaseType)));
+
+                        size = 1;
+                    }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+#endif // not _TARGET_X86_
+                    // We still have a struct unless we converted the GT_OBJ into a GT_IND above...
+                    if ((structBaseType == TYP_STRUCT) &&
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+                        !passStructInRegisters
+#else  // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+                        !passStructByRef
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+                        )
+                    {
+                        if (isHfaArg && passUsingFloatRegs)
+                        {
+                            size = GetHfaCount(argx); // GetHfaCount returns number of elements in the HFA
+                        }
+                        else
+                        {
+                            // If the valuetype size is not a multiple of sizeof(void*),
+                            // we must copyblk to a temp before doing the obj to avoid
+                            // the obj reading memory past the end of the valuetype
+                            CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+                            // TODO-X86-CQ: [1091733] Revisit for small structs, we should use push instruction
+                            copyBlkClass = objClass;
+                            size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items
+#else                                                                 // !defined(_TARGET_X86_) || defined(LEGACY_BACKEND)
+                            if (roundupSize > originalSize)
+                            {
+                                copyBlkClass = objClass;
+
+                                // There are a few special cases where we can omit using a CopyBlk
+                                // where we normally would need to use one.
+
+                                if (argObj->gtObj.gtOp1->IsVarAddr()) // Is the source a LclVar?
+                                {
+                                    copyBlkClass = NO_CLASS_HANDLE;
+                                }
+                            }
+
+                            size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items
+#endif // !defined(_TARGET_X86_) || defined(LEGACY_BACKEND)
+                        }
+                    }
+                }
+
+#ifndef _TARGET_X86_
+                // TODO-Arm: Does this apply for _TARGET_ARM_, where structs passed by value can be split between
+                // registers and stack?
+                if (size > 1)
+                {
+                    hasMultiregStructArgs = true;
+                }
+#endif // !_TARGET_X86_
+            }
+
+            // The 'size' value has now must have been set. (the original value of zero is an invalid value)
+            assert(size != 0);
+
+            //
+            // Figure out if the argument will be passed in a register.
+            //
+
+            if (isRegParamType(genActualType(argx->TypeGet()))
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+                && (!isStructArg || structDesc.passedInRegisters)
+#endif
+                    )
+            {
+#ifdef _TARGET_ARM_
+                if (passUsingFloatRegs)
+                {
+                    // First, see if it can be back-filled
+                    if (!anyFloatStackArgs && // Is it legal to back-fill? (We haven't put any FP args on the stack yet)
+                        (fltArgSkippedRegMask != RBM_NONE) && // Is there an available back-fill slot?
+                        (size == 1))                          // The size to back-fill is one float register
+                    {
+                        // Back-fill the register.
+                        isBackFilled              = true;
+                        regMaskTP backFillBitMask = genFindLowestBit(fltArgSkippedRegMask);
+                        fltArgSkippedRegMask &=
+                            ~backFillBitMask; // Remove the back-filled register(s) from the skipped mask
+                        nextFltArgRegNum = genMapFloatRegNumToRegArgNum(genRegNumFromMask(backFillBitMask));
+                        assert(nextFltArgRegNum < MAX_FLOAT_REG_ARG);
+                    }
+
+                    // Does the entire float, double, or HFA fit in the FP arg registers?
+                    // Check if the last register needed is still in the argument register range.
+                    isRegArg = (nextFltArgRegNum + size - 1) < MAX_FLOAT_REG_ARG;
+
+                    if (!isRegArg)
+                    {
+                        anyFloatStackArgs = true;
+                    }
+                }
+                else
+                {
+                    isRegArg = intArgRegNum < MAX_REG_ARG;
+                }
+#elif defined(_TARGET_ARM64_)
+                if (passUsingFloatRegs)
+                {
+                    // Check if the last register needed is still in the fp argument register range.
+                    isRegArg = (nextFltArgRegNum + (size - 1)) < MAX_FLOAT_REG_ARG;
+
+                    // Do we have a HFA arg that we wanted to pass in registers, but we ran out of FP registers?
+                    if (isHfaArg && !isRegArg)
+                    {
+                        // recompute the 'size' so that it represent the number of stack slots rather than the number of
+                        // registers
+                        //
+                        unsigned roundupSize = (unsigned)roundUp(structSize, TARGET_POINTER_SIZE);
+                        size                 = roundupSize / TARGET_POINTER_SIZE;
+
+                        // We also must update fltArgRegNum so that we no longer try to
+                        // allocate any new floating point registers for args
+                        // This prevents us from backfilling a subsequent arg into d7
+                        //
+                        fltArgRegNum = MAX_FLOAT_REG_ARG;
+                    }
+                }
+                else
+                {
+                    // Check if the last register needed is still in the int argument register range.
+                    isRegArg = (intArgRegNum + (size - 1)) < maxRegArgs;
+
+                    // Did we run out of registers when we had a 16-byte struct (size===2) ?
+                    // (i.e we only have one register remaining but we needed two registers to pass this arg)
+                    // This prevents us from backfilling a subsequent arg into x7
+                    //
+                    if (!isRegArg && (size > 1))
+                    {
+                        // We also must update intArgRegNum so that we no longer try to
+                        // allocate any new general purpose registers for args
+                        //
+                        intArgRegNum = maxRegArgs;
+                    }
+                }
+#else // not _TARGET_ARM_ or _TARGET_ARM64_
+
+#if defined(UNIX_AMD64_ABI)
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+                // Here a struct can be passed in register following the classifications of its members and size.
+                // Now make sure there are actually enough registers to do so.
+                if (isStructArg)
+                {
+                    unsigned int structFloatRegs = 0;
+                    unsigned int structIntRegs   = 0;
+                    for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
+                    {
+                        if (structDesc.IsIntegralSlot(i))
+                        {
+                            structIntRegs++;
+                        }
+                        else if (structDesc.IsSseSlot(i))
+                        {
+                            structFloatRegs++;
+                        }
+                    }
+
+                    isRegArg = ((nextFltArgRegNum + structFloatRegs) <= MAX_FLOAT_REG_ARG) &&
+                               ((intArgRegNum + structIntRegs) <= MAX_REG_ARG);
+                }
+                else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+                {
+                    if (passUsingFloatRegs)
+                    {
+                        isRegArg = nextFltArgRegNum < MAX_FLOAT_REG_ARG;
+                    }
+                    else
+                    {
+                        isRegArg = intArgRegNum < MAX_REG_ARG;
+                    }
+                }
+#else  // !defined(UNIX_AMD64_ABI)
+                isRegArg   = (intArgRegNum + (size - 1)) < maxRegArgs;
+#endif // !defined(UNIX_AMD64_ABI)
+#endif // _TARGET_ARM_
+            }
+            else
+            {
+                isRegArg = false;
+            }
+
+#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+            if (call->IsTailCallViaHelper())
+            {
+                // We have already (before calling fgMorphArgs()) appended the 4 special args
+                // required by the x86 tailcall helper. These args are required to go on the
+                // stack. Force them to the stack here.
+                assert(numArgs >= 4);
+                if (argIndex >= numArgs - 4)
+                {
+                    isRegArg = false;
+                }
+            }
+#endif // defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+
+        } // end !lateArgsComputed
+
+        //
+        // Now we know if the argument goes in registers or not and how big it is,
+        // whether we had to just compute it or this is a re-morph call and we looked it up.
+        //
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_ARM_
+        // If we ever allocate a floating point argument to the stack, then all
+        // subsequent HFA/float/double arguments go on the stack.
+        if (!isRegArg && passUsingFloatRegs)
+        {
+            for (; fltArgRegNum < MAX_FLOAT_REG_ARG; ++fltArgRegNum)
+            {
+                fltArgSkippedRegMask |= genMapArgNumToRegMask(fltArgRegNum, TYP_FLOAT);
+            }
+        }
+
+        // If we think we're going to split a struct between integer registers and the stack, check to
+        // see if we've already assigned a floating-point arg to the stack.
+        if (isRegArg &&                            // We decided above to use a register for the argument
+            !passUsingFloatRegs &&                 // We're using integer registers
+            (intArgRegNum + size > MAX_REG_ARG) && // We're going to split a struct type onto registers and stack
+            anyFloatStackArgs)                     // We've already used the stack for a floating-point argument
+        {
+            isRegArg = false; // Change our mind; don't pass this struct partially in registers
+
+            // Skip the rest of the integer argument registers
+            for (; intArgRegNum < MAX_REG_ARG; ++intArgRegNum)
+            {
+                argSkippedRegMask |= genMapArgNumToRegMask(intArgRegNum, TYP_I_IMPL);
+            }
+        }
+
+#endif // _TARGET_ARM_
+
+        if (isRegArg)
+        {
+            regNumber nextRegNum = REG_STK;
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+            regNumber    nextOtherRegNum = REG_STK;
+            unsigned int structFloatRegs = 0;
+            unsigned int structIntRegs   = 0;
+
+            if (isStructArg && structDesc.passedInRegisters)
+            {
+                // It is a struct passed in registers. Assign the next available register.
+                assert((structDesc.eightByteCount <= 2) && "Too many eightbytes.");
+                regNumber* nextRegNumPtrs[2] = {&nextRegNum, &nextOtherRegNum};
+                for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
+                {
+                    if (structDesc.IsIntegralSlot(i))
+                    {
+                        *nextRegNumPtrs[i] = genMapIntRegArgNumToRegNum(intArgRegNum + structIntRegs);
+                        structIntRegs++;
+                    }
+                    else if (structDesc.IsSseSlot(i))
+                    {
+                        *nextRegNumPtrs[i] = genMapFloatRegArgNumToRegNum(nextFltArgRegNum + structFloatRegs);
+                        structFloatRegs++;
+                    }
+                }
+            }
+            else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+            {
+                // fill in or update the argInfo table
+                nextRegNum = passUsingFloatRegs ? genMapFloatRegArgNumToRegNum(nextFltArgRegNum)
+                                                : genMapIntRegArgNumToRegNum(intArgRegNum);
+            }
+
+#ifdef _TARGET_AMD64_
+#ifndef FEATURE_UNIX_AMD64_STRUCT_PASSING
+            assert(size == 1);
+#endif
+#endif
+
+            fgArgTabEntryPtr newArgEntry;
+            if (lateArgsComputed)
+            {
+                // This is a register argument - possibly update it in the table
+                newArgEntry = call->fgArgInfo->RemorphRegArg(argIndex, argx, args, nextRegNum, size, argAlign);
+            }
+            else
+            {
+                bool isNonStandard = false;
+
+#ifndef LEGACY_BACKEND
+                // If there are nonstandard args (outside the calling convention) they were inserted above
+                // and noted them in a table so we can recognize them here and build their argInfo.
+                //
+                // They should not affect the placement of any other args or stack space required.
+                // Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls.
+                isNonStandard = nonStandardArgs.FindReg(argx, &nextRegNum);
+#endif // !LEGACY_BACKEND
+
+                // This is a register argument - put it in the table
+                newArgEntry = call->fgArgInfo->AddRegArg(argIndex, argx, args, nextRegNum, size, argAlign
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+                                                         ,
+                                                         isStructArg, nextOtherRegNum, &structDesc
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+                                                         );
+
+                newArgEntry->SetIsHfaRegArg(passUsingFloatRegs &&
+                                            isHfaArg); // Note on Arm32 a HFA is passed in int regs for varargs
+                newArgEntry->SetIsBackFilled(isBackFilled);
+                newArgEntry->isNonStandard = isNonStandard;
+            }
+
+            if (newArgEntry->isNonStandard)
+            {
+                continue;
+            }
+
+            // Set up the next intArgRegNum and fltArgRegNum values.
+            if (!isBackFilled)
+            {
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+                if (isStructArg)
+                {
+                    intArgRegNum += structIntRegs;
+                    fltArgRegNum += structFloatRegs;
+                }
+                else
+#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+                {
+                    if (passUsingFloatRegs)
+                    {
+                        fltArgRegNum += size;
+
+#ifdef WINDOWS_AMD64_ABI
+                        // Whenever we pass an integer register argument
+                        // we skip the corresponding floating point register argument
+                        intArgRegNum = min(intArgRegNum + size, MAX_REG_ARG);
+#endif // WINDOWS_AMD64_ABI
+#ifdef _TARGET_ARM_
+                        if (fltArgRegNum > MAX_FLOAT_REG_ARG)
+                        {
+                            // This indicates a partial enregistration of a struct type
+                            assert(varTypeIsStruct(argx));
+                            unsigned numRegsPartial = size - (fltArgRegNum - MAX_FLOAT_REG_ARG);
+                            assert((unsigned char)numRegsPartial == numRegsPartial);
+                            call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
+                            fltArgRegNum = MAX_FLOAT_REG_ARG;
+                        }
+#endif // _TARGET_ARM_
+                    }
+                    else
+                    {
+                        if (hasFixedRetBuffReg() && (nextRegNum == theFixedRetBuffReg()))
+                        {
+                            // we are setting up the fixed return buffer register argument
+                            // so don't increment intArgRegNum
+                            assert(size == 1);
+                        }
+                        else
+                        {
+                            // Increment intArgRegNum by 'size' registers
+                            intArgRegNum += size;
+                        }
+
+#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
+                        fltArgRegNum = min(fltArgRegNum + size, MAX_FLOAT_REG_ARG);
+#endif // _TARGET_AMD64_
+#ifdef _TARGET_ARM_
+                        if (intArgRegNum > MAX_REG_ARG)
+                        {
+                            // This indicates a partial enregistration of a struct type
+                            assert((isStructArg) || argx->OperIsCopyBlkOp() ||
+                                   (argx->gtOper == GT_COMMA && (args->gtFlags & GTF_ASG)));
+                            unsigned numRegsPartial = size - (intArgRegNum - MAX_REG_ARG);
+                            assert((unsigned char)numRegsPartial == numRegsPartial);
+                            call->fgArgInfo->SplitArg(argIndex, numRegsPartial, size - numRegsPartial);
+                            intArgRegNum = MAX_REG_ARG;
+                            fgPtrArgCntCur += size - numRegsPartial;
+                        }
+#endif // _TARGET_ARM_
+                    }
+                }
+            }
+        }
+        else // We have an argument that is not passed in a register
+        {
+            fgPtrArgCntCur += size;
+
+            // If the register arguments have not been determined then we must fill in the argInfo
+
+            if (lateArgsComputed)
+            {
+                // This is a stack argument - possibly update it in the table
+                call->fgArgInfo->RemorphStkArg(argIndex, argx, args, size, argAlign);
+            }
+            else
+            {
+                // This is a stack argument - put it in the table
+                call->fgArgInfo->AddStkArg(argIndex, argx, args, size,
+                                           argAlign FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(isStructArg));
+            }
+        }
+
+        if (copyBlkClass != NO_CLASS_HANDLE)
+        {
+            noway_assert(!lateArgsComputed);
+            fgMakeOutgoingStructArgCopy(call, args, argIndex,
+                                        copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(&structDesc));
+
+            // This can cause a GTF_EXCEPT flag to be set.
+            // TODO-CQ: Fix the cases where this happens. We shouldn't be adding any new flags.
+            // This currently occurs in the case where we are re-morphing the args on x86/RyuJIT, and
+            // there are no register arguments. Then lateArgsComputed is never true, so we keep re-copying
+            // any struct arguments.
+            // i.e. assert(((call->gtFlags & GTF_EXCEPT) != 0) || ((args->Current()->gtFlags & GTF_EXCEPT) == 0)
+            flagsSummary |= (args->Current()->gtFlags & GTF_EXCEPT);
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+            hasStackArgCopy = true;
+#endif
+        }
+
+#ifndef LEGACY_BACKEND
+        if (argx->gtOper == GT_MKREFANY)
+        {
+            NYI_X86("MKREFANY");
+
+            // 'Lower' the MKREFANY tree and insert it.
+            noway_assert(!lateArgsComputed);
+
+            // Get a new temp
+            // Here we don't need unsafe value cls check since the addr of temp is used only in mkrefany
+            unsigned tmp = lvaGrabTemp(true DEBUGARG("by-value mkrefany struct argument"));
+            lvaSetStruct(tmp, impGetRefAnyClass(), false);
+
+            // Build the mkrefany as a comma node:
+            // (tmp.ptr=argx),(tmp.type=handle)
+            GenTreeLclFld* destPtrSlot  = gtNewLclFldNode(tmp, TYP_I_IMPL, offsetof(CORINFO_RefAny, dataPtr));
+            GenTreeLclFld* destTypeSlot = gtNewLclFldNode(tmp, TYP_I_IMPL, offsetof(CORINFO_RefAny, type));
+            destPtrSlot->gtFieldSeq     = GetFieldSeqStore()->CreateSingleton(GetRefanyDataField());
+            destPtrSlot->gtFlags |= GTF_VAR_DEF;
+            destTypeSlot->gtFieldSeq = GetFieldSeqStore()->CreateSingleton(GetRefanyTypeField());
+            destTypeSlot->gtFlags |= GTF_VAR_DEF;
+
+            GenTreePtr asgPtrSlot  = gtNewAssignNode(destPtrSlot, argx->gtOp.gtOp1);
+            GenTreePtr asgTypeSlot = gtNewAssignNode(destTypeSlot, argx->gtOp.gtOp2);
+            GenTreePtr asg         = gtNewOperNode(GT_COMMA, TYP_VOID, asgPtrSlot, asgTypeSlot);
+
+            // Change the expression to "(tmp=val)"
+            args->gtOp.gtOp1 = asg;
+
+            // EvalArgsToTemps will cause tmp to actually get loaded as the argument
+            call->fgArgInfo->EvalToTmp(argIndex, tmp, asg);
+            lvaSetVarAddrExposed(tmp);
+        }
+#endif // !LEGACY_BACKEND
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+        if (isStructArg && !isRegArg)
+        {
+            nonRegPassedStructSlots += size;
+        }
+        else
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+        {
+            argSlots += size;
+        }
+    } // end foreach argument loop
+
+    if (!lateArgsComputed)
+    {
+        call->fgArgInfo->ArgsComplete();
+#ifdef LEGACY_BACKEND
+        call->gtCallRegUsedMask = genIntAllRegArgMask(intArgRegNum);
+#if defined(_TARGET_ARM_)
+        call->gtCallRegUsedMask &= ~argSkippedRegMask;
+#endif
+        if (fltArgRegNum > 0)
+        {
+#if defined(_TARGET_ARM_)
+            call->gtCallRegUsedMask |= genFltAllRegArgMask(fltArgRegNum) & ~fltArgSkippedRegMask;
+#endif
+        }
+#endif // LEGACY_BACKEND
+    }
+
+    if (call->gtCallArgs)
+    {
+        UpdateGT_LISTFlags(call->gtCallArgs);
+    }
+
+    /* Process the function address, if indirect call */
+
+    if (call->gtCallType == CT_INDIRECT)
+    {
+        call->gtCallAddr = fgMorphTree(call->gtCallAddr);
+    }
+
+    call->fgArgInfo->RecordStkLevel(fgPtrArgCntCur);
+
+    if ((call->gtCallType == CT_INDIRECT) && (call->gtCallCookie != nullptr))
+    {
+        fgPtrArgCntCur++;
+    }
+
+    /* Remember the maximum value we ever see */
+
+    if (fgPtrArgCntMax < fgPtrArgCntCur)
+    {
+        fgPtrArgCntMax = fgPtrArgCntCur;
+    }
+
+    /* The call will pop all the arguments we pushed */
+
+    fgPtrArgCntCur = genPtrArgCntSav;
+
+#if FEATURE_FIXED_OUT_ARGS
+
+    // Update the outgoing argument size.
+    // If the call is a fast tail call, it will setup its arguments in incoming arg
+    // area instead of the out-going arg area.  Therefore, don't consider fast tail
+    // calls to update lvaOutgoingArgSpaceSize.
+    if (!call->IsFastTailCall())
+    {
+        unsigned preallocatedArgCount = call->fgArgInfo->GetNextSlotNum();
+
+#if defined(UNIX_AMD64_ABI)
+        opts.compNeedToAlignFrame = true; // this is currently required for the UNIX ABI to work correctly
+
+        // ToDo: Remove this re-calculation preallocatedArgCount and use the value assigned above.
+
+        // First slots go in registers only, no stack needed.
+        // TODO-Amd64-Unix-CQ This calculation is only accurate for integer arguments,
+        // and ignores floating point args (it is overly conservative in that case).
+        preallocatedArgCount = nonRegPassedStructSlots;
+        if (argSlots > MAX_REG_ARG)
+        {
+            preallocatedArgCount += argSlots - MAX_REG_ARG;
+        }
+#endif // UNIX_AMD64_ABI
+
+        // Check if we need to increase the size of our Outgoing Arg Space
+        if (preallocatedArgCount * REGSIZE_BYTES > lvaOutgoingArgSpaceSize)
+        {
+            lvaOutgoingArgSpaceSize = preallocatedArgCount * REGSIZE_BYTES;
+
+            // If a function has localloc, we will need to move the outgoing arg space when the
+            // localloc happens. When we do this, we need to maintain stack alignment. To avoid
+            // leaving alignment-related holes when doing this move, make sure the outgoing
+            // argument space size is a multiple of the stack alignment by aligning up to the next
+            // stack alignment boundary.
+            if (compLocallocUsed)
+            {
+                lvaOutgoingArgSpaceSize = (unsigned)roundUp(lvaOutgoingArgSpaceSize, STACK_ALIGN);
+            }
+        }
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("argSlots=%d, preallocatedArgCount=%d, nextSlotNum=%d, lvaOutgoingArgSpaceSize=%d\n", argSlots,
+                   preallocatedArgCount, call->fgArgInfo->GetNextSlotNum(), lvaOutgoingArgSpaceSize);
+        }
+#endif
+    }
+#endif // FEATURE_FIXED_OUT_ARGS
+
+    /* Update the 'side effect' flags value for the call */
+
+    call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
+
+    // If the register arguments have already been determined
+    // or we have no register arguments then we don't need to
+    // call SortArgs() and EvalArgsToTemps()
+    //
+    // For UNIX_AMD64, the condition without hasStackArgCopy cannot catch
+    // all cases of fgMakeOutgoingStructArgCopy() being called. hasStackArgCopy
+    // is added to make sure to call EvalArgsToTemp.
+    if (!lateArgsComputed && (call->fgArgInfo->HasRegArgs()
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+                              || hasStackArgCopy
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+                              ))
+    {
+        // This is the first time that we morph this call AND it has register arguments.
+        // Follow into the code below and do the 'defer or eval to temp' analysis.
+
+        call->fgArgInfo->SortArgs();
+
+        call->fgArgInfo->EvalArgsToTemps();
+
+        // We may have updated the arguments
+        if (call->gtCallArgs)
+        {
+            UpdateGT_LISTFlags(call->gtCallArgs);
+        }
+    }
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+    // Rewrite the struct args to be passed by value on stack or in registers.
+    fgMorphSystemVStructArgs(call, hasStructArgument);
+
+#else // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+#ifndef LEGACY_BACKEND
+    // In the future we can migrate UNIX_AMD64 to use this
+    // method instead of fgMorphSystemVStructArgs
+
+    // We only build GT_LISTs for MultiReg structs for the RyuJIT backend
+    if (hasMultiregStructArgs)
+    {
+        fgMorphMultiregStructArgs(call);
+    }
+#endif // LEGACY_BACKEND
+
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        fgArgInfoPtr argInfo = call->fgArgInfo;
+        for (unsigned curInx = 0; curInx < argInfo->ArgCount(); curInx++)
+        {
+            fgArgTabEntryPtr curArgEntry = argInfo->ArgTable()[curInx];
+            curArgEntry->Dump();
+        }
+    }
+#endif
+
+    return call;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+// fgMorphSystemVStructArgs:
+//   Rewrite the struct args to be passed by value on stack or in registers.
+//
+// args:
+//   call: The call whose arguments need to be morphed.
+//   hasStructArgument: Whether this call has struct arguments.
+//
+void Compiler::fgMorphSystemVStructArgs(GenTreeCall* call, bool hasStructArgument)
+{
+    unsigned   flagsSummary = 0;
+    GenTreePtr args;
+    GenTreePtr argx;
+
+    if (hasStructArgument)
+    {
+        fgArgInfoPtr allArgInfo = call->fgArgInfo;
+
+        for (args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
+        {
+            // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
+            // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.)
+            // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping
+            // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself,
+            // otherwise points to the list in the late args list.
+            bool             isLateArg  = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
+            fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
+            assert(fgEntryPtr != nullptr);
+            GenTreePtr argx     = fgEntryPtr->node;
+            GenTreePtr lateList = nullptr;
+            GenTreePtr lateNode = nullptr;
+
+            if (isLateArg)
+            {
+                for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+                {
+                    assert(list->IsList());
+
+                    GenTreePtr argNode = list->Current();
+                    if (argx == argNode)
+                    {
+                        lateList = list;
+                        lateNode = argNode;
+                        break;
+                    }
+                }
+                assert(lateList != nullptr && lateNode != nullptr);
+            }
+            GenTreePtr arg            = argx;
+            bool       argListCreated = false;
+
+            var_types type = arg->TypeGet();
+
+            if (varTypeIsStruct(type))
+            {
+                var_types originalType = type;
+                // If we have already processed the arg...
+                if (arg->OperGet() == GT_LIST && varTypeIsStruct(arg))
+                {
+                    continue;
+                }
+
+                // If already OBJ it is set properly already.
+                if (arg->OperGet() == GT_OBJ)
+                {
+                    assert(!fgEntryPtr->structDesc.passedInRegisters);
+                    continue;
+                }
+
+                assert(arg->OperGet() == GT_LCL_VAR || arg->OperGet() == GT_LCL_FLD ||
+                       (arg->OperGet() == GT_ADDR &&
+                        (arg->gtOp.gtOp1->OperGet() == GT_LCL_FLD || arg->gtOp.gtOp1->OperGet() == GT_LCL_VAR)));
+
+                GenTreeLclVarCommon* lclCommon =
+                    arg->OperGet() == GT_ADDR ? arg->gtOp.gtOp1->AsLclVarCommon() : arg->AsLclVarCommon();
+                if (fgEntryPtr->structDesc.passedInRegisters)
+                {
+                    if (fgEntryPtr->structDesc.eightByteCount == 1)
+                    {
+                        // Change the type and below the code will change the LclVar to a LCL_FLD
+                        type = GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0],
+                                                                 fgEntryPtr->structDesc.eightByteSizes[0]);
+                    }
+                    else if (fgEntryPtr->structDesc.eightByteCount == 2)
+                    {
+                        // Create LCL_FLD for each eightbyte.
+                        argListCreated = true;
+
+                        // Second eightbyte.
+                        GenTreeLclFld* newLclField = new (this, GT_LCL_FLD)
+                            GenTreeLclFld(GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc
+                                                                                .eightByteClassifications[1],
+                                                                            fgEntryPtr->structDesc.eightByteSizes[1]),
+                                          lclCommon->gtLclNum, fgEntryPtr->structDesc.eightByteOffsets[1]);
+
+                        GenTreeArgList* aggregate  = gtNewAggregate(newLclField);
+                        aggregate->gtType          = originalType; // Preserve the type. It is a special case.
+                        newLclField->gtFieldSeq    = FieldSeqStore::NotAField();
+
+                        // First field
+                        arg->AsLclFld()->gtFieldSeq = FieldSeqStore::NotAField();
+                        arg->gtType =
+                            GetTypeFromClassificationAndSizes(fgEntryPtr->structDesc.eightByteClassifications[0],
+                                                              fgEntryPtr->structDesc.eightByteSizes[0]);
+                        arg         = aggregate->Prepend(this, arg);
+                        arg->gtType = type; // Preserve the type. It is a special case.
+                    }
+                    else
+                    {
+                        assert(false && "More than two eightbytes detected for CLR."); // No more than two eightbytes
+                                                                                       // for the CLR.
+                    }
+                }
+
+                // If we didn't change the type of the struct, it means
+                // its classification doesn't support to be passed directly through a
+                // register, so we need to pass a pointer to the destination where
+                // where we copied the struct to.
+                if (!argListCreated)
+                {
+                    if (fgEntryPtr->structDesc.passedInRegisters)
+                    {
+                        arg->gtType = type;
+                    }
+                    else
+                    {
+                        // Make sure this is an addr node.
+                        if (arg->OperGet() != GT_ADDR && arg->OperGet() != GT_LCL_VAR_ADDR)
+                        {
+                            arg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, arg);
+                        }
+
+                        assert(arg->OperGet() == GT_ADDR || arg->OperGet() == GT_LCL_VAR_ADDR);
+
+                        // Create an Obj of the temp to use it as a call argument.
+                        arg = gtNewObjNode(lvaGetStruct(lclCommon->gtLclNum), arg);
+                    }
+                }
+            }
+
+            if (argx != arg)
+            {
+                bool             isLateArg  = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
+                fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
+                assert(fgEntryPtr != nullptr);
+                GenTreePtr argx     = fgEntryPtr->node;
+                GenTreePtr lateList = nullptr;
+                GenTreePtr lateNode = nullptr;
+                if (isLateArg)
+                {
+                    for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+                    {
+                        assert(list->IsList());
+
+                        GenTreePtr argNode = list->Current();
+                        if (argx == argNode)
+                        {
+                            lateList = list;
+                            lateNode = argNode;
+                            break;
+                        }
+                    }
+                    assert(lateList != nullptr && lateNode != nullptr);
+                }
+
+                fgEntryPtr->node = arg;
+                if (isLateArg)
+                {
+                    lateList->gtOp.gtOp1 = arg;
+                }
+                else
+                {
+                    args->gtOp.gtOp1 = arg;
+                }
+            }
+        }
+    }
+
+    // Update the flags
+    call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
+}
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+//-----------------------------------------------------------------------------
+// fgMorphMultiregStructArgs:  Locate the TYP_STRUCT arguments and
+//                             call fgMorphMultiregStructArg on each of them.
+//
+// Arguments:
+//    call:    a GenTreeCall node that has one or more TYP_STRUCT arguments
+//
+// Notes:
+//    We only call fgMorphMultiregStructArg for the register passed TYP_STRUCT arguments.
+//    The call to fgMorphMultiregStructArg will mutate the argument into the GT_LIST form
+//    whicj is only used for register arguments.
+//    If this method fails to find any TYP_STRUCT arguments it will assert.
+//
+void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
+{
+    GenTreePtr   args;
+    GenTreePtr   argx;
+    bool         foundStructArg = false;
+    unsigned     initialFlags   = call->gtFlags;
+    unsigned     flagsSummary   = 0;
+    fgArgInfoPtr allArgInfo     = call->fgArgInfo;
+
+    // Currently only ARM64 is using this method to morph the MultiReg struct args
+    //  in the future AMD64_UNIX and for HFAs ARM32, will also use this method
+    //
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_ARM_
+    NYI_ARM("fgMorphMultiregStructArgs");
+#endif
+#ifdef _TARGET_X86_
+    assert(!"Logic error: no MultiregStructArgs for X86");
+#endif
+#ifdef _TARGET_AMD64_
+#if defined(UNIX_AMD64_ABI)
+    NYI_AMD64("fgMorphMultiregStructArgs (UNIX ABI)");
+#else  // WINDOWS_AMD64_ABI
+    assert(!"Logic error: no MultiregStructArgs for Windows X64 ABI");
+#endif // !UNIX_AMD64_ABI
+#endif
+
+    for (args = call->gtCallArgs; args != nullptr; args = args->gtOp.gtOp2)
+    {
+        // For late arguments the arg tree that is overridden is in the gtCallLateArgs list.
+        // For such late args the gtCallArgList contains the setup arg node (evaluating the arg.)
+        // The tree from the gtCallLateArgs list is passed to the callee. The fgArgEntry node contains the mapping
+        // between the nodes in both lists. If the arg is not a late arg, the fgArgEntry->node points to itself,
+        // otherwise points to the list in the late args list.
+        bool             isLateArg  = (args->gtOp.gtOp1->gtFlags & GTF_LATE_ARG) != 0;
+        fgArgTabEntryPtr fgEntryPtr = gtArgEntryByNode(call, args->gtOp.gtOp1);
+        assert(fgEntryPtr != nullptr);
+        GenTreePtr argx     = fgEntryPtr->node;
+        GenTreePtr lateList = nullptr;
+        GenTreePtr lateNode = nullptr;
+
+        if (isLateArg)
+        {
+            for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+            {
+                assert(list->IsList());
+
+                GenTreePtr argNode = list->Current();
+                if (argx == argNode)
+                {
+                    lateList = list;
+                    lateNode = argNode;
+                    break;
+                }
+            }
+            assert(lateList != nullptr && lateNode != nullptr);
+        }
+
+        GenTreePtr arg = argx;
+
+        if (arg->TypeGet() == TYP_STRUCT)
+        {
+            foundStructArg = true;
+
+            arg = fgMorphMultiregStructArg(arg, fgEntryPtr);
+
+            // Did we replace 'argx' with a new tree?
+            if (arg != argx)
+            {
+                fgEntryPtr->node = arg; // Record the new value for the arg in the fgEntryPtr->node
+
+                // link the new arg node into either the late arg list or the gtCallArgs list
+                if (isLateArg)
+                {
+                    lateList->gtOp.gtOp1 = arg;
+                }
+                else
+                {
+                    args->gtOp.gtOp1 = arg;
+                }
+            }
+        }
+    }
+
+    // We should only call this method when we actually have one or more multireg struct args
+    assert(foundStructArg);
+
+    // Update the flags
+    call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
+}
+
+//-----------------------------------------------------------------------------
+// fgMorphMultiregStructArg:  Given a multireg TYP_STRUCT arg from a call argument list
+//   Morph the argument into a set of GT_LIST nodes.
+//
+// Arguments:
+//     arg        - A GenTree node containing a TYP_STRUCT arg that
+//                  is to be passed in multiple registers
+//     fgEntryPtr - the fgArgTabEntry information for the current 'arg'
+//
+// Notes:
+//    arg must be a GT_OBJ or GT_LCL_VAR or GT_LCL_FLD of TYP_STRUCT that is suitable
+//    for passing in multiple registers.
+//    If arg is a LclVar we check if it is struct promoted and has the right number of fields
+//    and if they are at the appropriate offsets we will use the struct promted fields
+//    in the GT_LIST nodes that we create.
+//    If we have a GT_LCL_VAR that isn't struct promoted or doesn't meet the requirements
+//    we will use a set of GT_LCL_FLDs nodes to access the various portions of the struct
+//    this also forces the struct to be stack allocated into the local frame.
+//    For the GT_OBJ case will clone the address expression and generate two (or more)
+//    indirections.
+//    Currently the implementation only handles ARM64 and will NYI for other architectures.
+//
+GenTreePtr Compiler::fgMorphMultiregStructArg(GenTreePtr arg, fgArgTabEntryPtr fgEntryPtr)
+{
+    assert(arg->TypeGet() == TYP_STRUCT);
+
+#ifndef _TARGET_ARM64_
+    NYI("fgMorphMultiregStructArg requires implementation for this target");
+#endif
+
+#if FEATURE_MULTIREG_ARGS
+    // Examine 'arg' and setup argValue objClass and structSize
+    //
+    CORINFO_CLASS_HANDLE objClass   = NO_CLASS_HANDLE;
+    GenTreePtr           argValue   = arg; // normally argValue will be arg, but see right below
+    unsigned             structSize = 0;
+
+    if (arg->OperGet() == GT_OBJ)
+    {
+        GenTreeObj* argObj = arg->AsObj();
+        objClass           = argObj->gtClass;
+        structSize         = info.compCompHnd->getClassSize(objClass);
+
+        // If we have a GT_OBJ of a GT_ADDR then we set argValue to the child node of the GT_ADDR
+        //
+        if (argObj->gtOp1->OperGet() == GT_ADDR)
+        {
+            argValue = argObj->gtOp1->gtOp.gtOp1;
+        }
+    }
+    else if (arg->OperGet() == GT_LCL_VAR)
+    {
+        GenTreeLclVarCommon* varNode = arg->AsLclVarCommon();
+        unsigned             varNum  = varNode->gtLclNum;
+        assert(varNum < lvaCount);
+        LclVarDsc* varDsc = &lvaTable[varNum];
+
+        objClass   = lvaGetStruct(varNum);
+        structSize = varDsc->lvExactSize;
+    }
+    noway_assert(objClass != nullptr);
+
+    var_types hfaType                 = TYP_UNDEF;
+    var_types elemType                = TYP_UNDEF;
+    unsigned  elemCount               = 0;
+    unsigned  elemSize                = 0;
+    var_types type[MAX_ARG_REG_COUNT] = {}; // TYP_UNDEF = 0
+
+    hfaType = GetHfaType(objClass); // set to float or double if it is an HFA, otherwise TYP_UNDEF
+    if (varTypeIsFloating(hfaType))
+    {
+        elemType  = hfaType;
+        elemSize  = genTypeSize(elemType);
+        elemCount = structSize / elemSize;
+        assert(elemSize * elemCount == structSize);
+        for (unsigned inx = 0; inx < elemCount; inx++)
+        {
+            type[inx] = elemType;
+        }
+    }
+    else
+    {
+        assert(structSize <= 2 * TARGET_POINTER_SIZE);
+        BYTE gcPtrs[2] = {TYPE_GC_NONE, TYPE_GC_NONE};
+        info.compCompHnd->getClassGClayout(objClass, &gcPtrs[0]);
+        elemCount = 2;
+        type[0]   = getJitGCType(gcPtrs[0]);
+        type[1]   = getJitGCType(gcPtrs[1]);
+
+        if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
+        {
+            // We can safely widen this to 16 bytes since we are loading from
+            // a GT_LCL_VAR or a GT_LCL_FLD which is properly padded and
+            // lives in the stack frame or will be a promoted field.
+            //
+            elemSize   = TARGET_POINTER_SIZE;
+            structSize = 2 * TARGET_POINTER_SIZE;
+        }
+        else // we must have a GT_OBJ
+        {
+            assert(argValue->OperGet() == GT_OBJ);
+
+            // We need to load the struct from an arbitrary address
+            // and we can't read past the end of the structSize
+            // We adjust the second load type here
+            //
+            if (structSize < 2 * TARGET_POINTER_SIZE)
+            {
+                switch (structSize - TARGET_POINTER_SIZE)
+                {
+                    case 1:
+                        type[1] = TYP_BYTE;
+                        break;
+                    case 2:
+                        type[1] = TYP_SHORT;
+                        break;
+                    case 4:
+                        type[1] = TYP_INT;
+                        break;
+                    default:
+                        noway_assert(!"NYI: odd sized struct in fgMorphMultiregStructArg");
+                        break;
+                }
+            }
+        }
+    }
+    // We should still have a TYP_STRUCT
+    assert(argValue->TypeGet() == TYP_STRUCT);
+
+    GenTreeArgList* newArg = nullptr;
+
+    // Are we passing a struct LclVar?
+    //
+    if (argValue->OperGet() == GT_LCL_VAR)
+    {
+        GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
+        unsigned             varNum  = varNode->gtLclNum;
+        assert(varNum < lvaCount);
+        LclVarDsc* varDsc = &lvaTable[varNum];
+
+        // At this point any TYP_STRUCT LclVar must be a 16-byte struct
+        // or an HFA struct, both which are passed by value.
+        //
+        assert((varDsc->lvSize() == 2 * TARGET_POINTER_SIZE) || varDsc->lvIsHfa());
+
+        varDsc->lvIsMultiRegArg = true;
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            JITDUMP("Multireg struct argument V%02u : ");
+            fgEntryPtr->Dump();
+        }
+#endif // DEBUG
+
+        // This local variable must match the layout of the 'objClass' type exactly
+        if (varDsc->lvIsHfa())
+        {
+            // We have a HFA struct
+            noway_assert(elemType == (varDsc->lvHfaTypeIsFloat() ? TYP_FLOAT : TYP_DOUBLE));
+            noway_assert(elemSize == genTypeSize(elemType));
+            noway_assert(elemCount == (varDsc->lvExactSize / elemSize));
+            noway_assert(elemSize * elemCount == varDsc->lvExactSize);
+
+            for (unsigned inx = 0; (inx < elemCount); inx++)
+            {
+                noway_assert(type[inx] == elemType);
+            }
+        }
+        else
+        {
+            // We must have a 16-byte struct (non-HFA)
+            noway_assert(elemCount == 2);
+
+            for (unsigned inx = 0; inx < elemCount; inx++)
+            {
+                CorInfoGCType currentGcLayoutType = (CorInfoGCType)varDsc->lvGcLayout[inx];
+
+                // We setup the type[inx] value above using the GC info from 'objClass'
+                // This GT_LCL_VAR must have the same GC layout info
+                //
+                if (currentGcLayoutType != TYPE_GC_NONE)
+                {
+                    noway_assert(type[inx] == getJitGCType((BYTE)currentGcLayoutType));
+                }
+                else
+                {
+                    // We may have use a small type when we setup the type[inx] values above
+                    // We can safely widen this to TYP_I_IMPL
+                    type[inx] = TYP_I_IMPL;
+                }
+            }
+        }
+
+        // Is this LclVar a promoted struct with exactly 2 fields?
+        // TODO-ARM64-CQ: Support struct promoted HFA types here
+        if (varDsc->lvPromoted && (varDsc->lvFieldCnt == 2) && !varDsc->lvIsHfa())
+        {
+            // See if we have two promoted fields that start at offset 0 and 8?
+            unsigned loVarNum = lvaGetFieldLocal(varDsc, 0);
+            unsigned hiVarNum = lvaGetFieldLocal(varDsc, TARGET_POINTER_SIZE);
+
+            // Did we find the promoted fields at the necessary offsets?
+            if ((loVarNum != BAD_VAR_NUM) && (hiVarNum != BAD_VAR_NUM))
+            {
+                LclVarDsc* loVarDsc = &lvaTable[loVarNum];
+                LclVarDsc* hiVarDsc = &lvaTable[hiVarNum];
+
+                var_types loType = loVarDsc->lvType;
+                var_types hiType = hiVarDsc->lvType;
+
+                if (varTypeIsFloating(loType) || varTypeIsFloating(hiType))
+                {
+                    // TODO-LSRA - It currently doesn't support the passing of floating point LCL_VARS in the integer
+                    // registers. So for now we will use GT_LCLFLD's to pass this struct (it won't be enregistered)
+                    //
+                    JITDUMP("Multireg struct V%02u will be passed using GT_LCLFLD because it has float fields.\n",
+                            varNum);
+                    //
+                    // we call lvaSetVarDoNotEnregister and do the proper transformation below.
+                    //
+                }
+                else
+                {
+                    // We can use the struct promoted field as the two arguments
+
+                    GenTreePtr loLclVar = gtNewLclvNode(loVarNum, loType, loVarNum);
+                    GenTreePtr hiLclVar = gtNewLclvNode(hiVarNum, hiType, hiVarNum);
+
+                    // Create a new tree for 'arg'
+                    //    replace the existing LDOBJ(ADDR(LCLVAR))
+                    //    with a LIST(LCLVAR-LO, LIST(LCLVAR-HI, nullptr))
+                    //
+                    newArg = gtNewAggregate(hiLclVar)->Prepend(this, loLclVar);
+                }
+            }
+        }
+        else
+        {
+            //
+            // We will create a list of GT_LCL_FLDs nodes to pass this struct
+            //
+            lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
+        }
+    }
+
+    // If we didn't set newarg to a new List Node tree
+    //
+    if (newArg == nullptr)
+    {
+        if (fgEntryPtr->regNum == REG_STK)
+        {
+            // We leave this stack passed argument alone
+            return arg;
+        }
+
+        // Are we passing a GT_LCL_FLD (or a GT_LCL_VAR that was not struct promoted )
+        // A GT_LCL_FLD could also contain a 16-byte struct or HFA struct inside it?
+        //
+        if ((argValue->OperGet() == GT_LCL_FLD) || (argValue->OperGet() == GT_LCL_VAR))
+        {
+            GenTreeLclVarCommon* varNode = argValue->AsLclVarCommon();
+            unsigned             varNum  = varNode->gtLclNum;
+            assert(varNum < lvaCount);
+            LclVarDsc* varDsc = &lvaTable[varNum];
+
+            unsigned baseOffset = (argValue->OperGet() == GT_LCL_FLD) ? argValue->gtLclFld.gtLclOffs : 0;
+            unsigned lastOffset = baseOffset + (elemCount * elemSize);
+
+            // The allocated size of our LocalVar must be at least as big as lastOffset
+            assert(varDsc->lvSize() >= lastOffset);
+
+            if (varDsc->lvStructGcCount > 0)
+            {
+                // alignment of the baseOffset is required
+                noway_assert((baseOffset % TARGET_POINTER_SIZE) == 0);
+                noway_assert(elemSize == TARGET_POINTER_SIZE);
+                unsigned    baseIndex = baseOffset / TARGET_POINTER_SIZE;
+                const BYTE* gcPtrs    = varDsc->lvGcLayout; // Get the GC layout for the local variable
+                for (unsigned inx = 0; (inx < elemCount); inx++)
+                {
+                    // The GC information must match what we setup using 'objClass'
+                    noway_assert(type[inx] == getJitGCType(gcPtrs[baseIndex + inx]));
+                }
+            }
+            else //  this varDsc contains no GC pointers
+            {
+                for (unsigned inx = 0; inx < elemCount; inx++)
+                {
+                    // The GC information must match what we setup using 'objClass'
+                    noway_assert(!varTypeIsGC(type[inx]));
+                }
+            }
+
+            //
+            // We create a list of GT_LCL_FLDs nodes to pass this struct
+            //
+            lvaSetVarDoNotEnregister(varNum DEBUG_ARG(DNER_LocalField));
+
+            // Start building our list from the last element
+            unsigned offset = lastOffset;
+            unsigned inx    = elemCount;
+
+            // Create a new tree for 'arg'
+            //    replace the existing LDOBJ(ADDR(LCLVAR))
+            //    with a LIST(LCLFLD-LO, LIST(LCLFLD-HI, nullptr) ...)
+            //
+            while (inx > 0)
+            {
+                inx--;
+                offset -= elemSize;
+                GenTreePtr nextLclFld = gtNewLclFldNode(varNum, type[inx], offset);
+                if (newArg == nullptr)
+                {
+                    newArg = gtNewAggregate(nextLclFld);
+                }
+                else
+                {
+                    newArg = newArg->Prepend(this, nextLclFld);
+                }
+            }
+        }
+        // Are we passing a GT_OBJ struct?
+        //
+        else if (argValue->OperGet() == GT_OBJ)
+        {
+            GenTreeObj* argObj   = argValue->AsObj();
+            GenTreePtr  baseAddr = argObj->gtOp1;
+            var_types   addrType = baseAddr->TypeGet();
+
+            // Create a new tree for 'arg'
+            //    replace the existing LDOBJ(EXPR)
+            //    with a LIST(IND(EXPR), LIST(IND(EXPR+8), nullptr) ...)
+            //
+
+            // Start building our list from the last element
+            unsigned offset = structSize;
+            unsigned inx    = elemCount;
+            while (inx > 0)
+            {
+                inx--;
+                elemSize = genTypeSize(type[inx]);
+                offset -= elemSize;
+                GenTreePtr curAddr = baseAddr;
+                if (offset != 0)
+                {
+                    GenTreePtr baseAddrDup = gtCloneExpr(baseAddr);
+                    noway_assert(baseAddrDup != nullptr);
+                    curAddr = gtNewOperNode(GT_ADD, addrType, baseAddrDup, gtNewIconNode(offset, TYP_I_IMPL));
+                }
+                else
+                {
+                    curAddr = baseAddr;
+                }
+                GenTreePtr curItem = gtNewOperNode(GT_IND, type[inx], curAddr);
+                if (newArg == nullptr)
+                {
+                    newArg = gtNewAggregate(curItem);
+                }
+                else
+                {
+                    newArg = newArg->Prepend(this, curItem);
+                }
+            }
+        }
+    }
+
+#ifdef DEBUG
+    // If we reach here we should have set newArg to something
+    if (newArg == nullptr)
+    {
+        gtDispTree(argValue);
+        assert(!"Missing case in fgMorphMultiregStructArg");
+    }
+
+    if (verbose)
+    {
+        printf("fgMorphMultiregStructArg created tree:\n");
+        gtDispTree(newArg);
+    }
+#endif
+
+    arg = newArg; // consider calling fgMorphTree(newArg);
+
+#endif // FEATURE_MULTIREG_ARGS
+
+    return arg;
+}
+
+// Make a copy of a struct variable if necessary, to pass to a callee.
+// returns: tree that computes address of the outgoing arg
+void Compiler::fgMakeOutgoingStructArgCopy(
+    GenTreeCall*         call,
+    GenTree*             args,
+    unsigned             argIndex,
+    CORINFO_CLASS_HANDLE copyBlkClass FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(
+        const SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR* const structDescPtr))
+{
+    GenTree* argx = args->Current();
+    noway_assert(argx->gtOper != GT_MKREFANY);
+    // See if we need to insert a copy at all
+    // Case 1: don't need a copy if it is the last use of a local.  We can't determine that all of the time
+    // but if there is only one use and no loops, the use must be last.
+    GenTreeLclVarCommon* lcl = nullptr;
+    if (argx->OperIsLocal())
+    {
+        lcl = argx->AsLclVarCommon();
+    }
+    else if ((argx->OperGet() == GT_OBJ) && argx->AsIndir()->Addr()->OperIsLocal())
+    {
+        lcl = argx->AsObj()->Addr()->AsLclVarCommon();
+    }
+    if (lcl != nullptr)
+    {
+        unsigned varNum = lcl->AsLclVarCommon()->GetLclNum();
+        if (lvaIsImplicitByRefLocal(varNum))
+        {
+            LclVarDsc* varDsc = &lvaTable[varNum];
+            // JIT_TailCall helper has an implicit assumption that all tail call arguments live
+            // on the caller's frame. If an argument lives on the caller caller's frame, it may get
+            // overwritten if that frame is reused for the tail call. Therefore, we should always copy
+            // struct parameters if they are passed as arguments to a tail call.
+            if (!call->IsTailCallViaHelper() && (varDsc->lvRefCnt == 1) && !fgMightHaveLoop())
+            {
+                varDsc->lvRefCnt    = 0;
+                args->gtOp.gtOp1    = lcl;
+                fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(call, argx);
+                fp->node            = lcl;
+
+                JITDUMP("did not have to make outgoing copy for V%2d", varNum);
+                return;
+            }
+        }
+    }
+
+    if (fgOutgoingArgTemps == nullptr)
+    {
+        fgOutgoingArgTemps = hashBv::Create(this);
+    }
+
+    unsigned tmp   = 0;
+    bool     found = false;
+
+    // Attempt to find a local we have already used for an outgoing struct and reuse it.
+    // We do not reuse within a statement.
+    if (!opts.MinOpts())
+    {
+        indexType lclNum;
+        FOREACH_HBV_BIT_SET(lclNum, fgOutgoingArgTemps)
+        {
+            LclVarDsc* varDsc = &lvaTable[lclNum];
+            if (typeInfo::AreEquivalent(varDsc->lvVerTypeInfo, typeInfo(TI_STRUCT, copyBlkClass)) &&
+                !fgCurrentlyInUseArgTemps->testBit(lclNum))
+            {
+                tmp   = (unsigned)lclNum;
+                found = true;
+                JITDUMP("reusing outgoing struct arg");
+                break;
+            }
+        }
+        NEXT_HBV_BIT_SET;
+    }
+
+    // Create the CopyBlk tree and insert it.
+    if (!found)
+    {
+        // Get a new temp
+        // Here We don't need unsafe value cls check, since the addr of this temp is used only in copyblk.
+        tmp = lvaGrabTemp(true DEBUGARG("by-value struct argument"));
+        lvaSetStruct(tmp, copyBlkClass, false);
+        fgOutgoingArgTemps->setBit(tmp);
+    }
+
+    fgCurrentlyInUseArgTemps->setBit(tmp);
+
+    // TYP_SIMD structs should not be enregistered, since ABI requires it to be
+    // allocated on stack and address of it needs to be passed.
+    if (lclVarIsSIMDType(tmp))
+    {
+        lvaSetVarDoNotEnregister(tmp DEBUGARG(DNER_IsStruct));
+    }
+
+    // Create a reference to the temp
+    GenTreePtr dest = gtNewLclvNode(tmp, lvaTable[tmp].lvType);
+    dest->gtFlags |= (GTF_DONT_CSE | GTF_VAR_DEF); // This is a def of the local, "entire" by construction.
+
+    // TODO-Cleanup: This probably shouldn't be done here because arg morphing is done prior
+    // to ref counting of the lclVars.
+    lvaTable[tmp].incRefCnts(compCurBB->getBBWeight(this), this);
+
+    GenTreePtr src;
+    if (argx->gtOper == GT_OBJ)
+    {
+        argx->gtFlags &= ~(GTF_ALL_EFFECT) | (argx->AsBlk()->Addr()->gtFlags & GTF_ALL_EFFECT);
+    }
+    else
+    {
+        argx->gtFlags |= GTF_DONT_CSE;
+    }
+
+    // Copy the valuetype to the temp
+    unsigned   size    = info.compCompHnd->getClassSize(copyBlkClass);
+    GenTreePtr copyBlk = gtNewBlkOpNode(dest, argx, size, false /* not volatile */, true /* copyBlock */);
+    copyBlk            = fgMorphCopyBlock(copyBlk);
+
+#if FEATURE_FIXED_OUT_ARGS
+
+    // Do the copy early, and evalute the temp later (see EvalArgsToTemps)
+    // When on Unix create LCL_FLD for structs passed in more than one registers. See fgMakeTmpArgNode
+    GenTreePtr arg = copyBlk;
+
+#else // FEATURE_FIXED_OUT_ARGS
+
+    // Structs are always on the stack, and thus never need temps
+    // so we have to put the copy and temp all into one expression
+    GenTreePtr arg = fgMakeTmpArgNode(tmp FEATURE_UNIX_AMD64_STRUCT_PASSING_ONLY_ARG(structDescPtr->passedInRegisters));
+
+    // Change the expression to "(tmp=val),tmp"
+    arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg);
+
+#endif // FEATURE_FIXED_OUT_ARGS
+
+    args->gtOp.gtOp1 = arg;
+    call->fgArgInfo->EvalToTmp(argIndex, tmp, arg);
+
+    return;
+}
+
+#ifdef _TARGET_ARM_
+// See declaration for specification comment.
+void Compiler::fgAddSkippedRegsInPromotedStructArg(LclVarDsc* varDsc,
+                                                   unsigned   firstArgRegNum,
+                                                   regMaskTP* pArgSkippedRegMask)
+{
+    assert(varDsc->lvPromoted);
+    // There's no way to do these calculations without breaking abstraction and assuming that
+    // integer register arguments are consecutive ints.  They are on ARM.
+
+    // To start, figure out what register contains the last byte of the first argument.
+    LclVarDsc* firstFldVarDsc = &lvaTable[varDsc->lvFieldLclStart];
+    unsigned   lastFldRegOfLastByte =
+        (firstFldVarDsc->lvFldOffset + firstFldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE;
+    ;
+
+    // Now we're keeping track of the register that the last field ended in; see what registers
+    // subsequent fields start in, and whether any are skipped.
+    // (We assume here the invariant that the fields are sorted in offset order.)
+    for (unsigned fldVarOffset = 1; fldVarOffset < varDsc->lvFieldCnt; fldVarOffset++)
+    {
+        unsigned   fldVarNum    = varDsc->lvFieldLclStart + fldVarOffset;
+        LclVarDsc* fldVarDsc    = &lvaTable[fldVarNum];
+        unsigned   fldRegOffset = fldVarDsc->lvFldOffset / TARGET_POINTER_SIZE;
+        assert(fldRegOffset >= lastFldRegOfLastByte); // Assuming sorted fields.
+        // This loop should enumerate the offsets of any registers skipped.
+        // Find what reg contains the last byte:
+        // And start at the first register after that.  If that isn't the first reg of the current
+        for (unsigned skippedRegOffsets = lastFldRegOfLastByte + 1; skippedRegOffsets < fldRegOffset;
+             skippedRegOffsets++)
+        {
+            // If the register number would not be an arg reg, we're done.
+            if (firstArgRegNum + skippedRegOffsets >= MAX_REG_ARG)
+                return;
+            *pArgSkippedRegMask |= genRegMask(regNumber(firstArgRegNum + skippedRegOffsets));
+        }
+        lastFldRegOfLastByte = (fldVarDsc->lvFldOffset + fldVarDsc->lvExactSize - 1) / TARGET_POINTER_SIZE;
+    }
+}
+
+#endif // _TARGET_ARM_
+
+//****************************************************************************
+//  fgFixupStructReturn:
+//    The companion to impFixupCallStructReturn.  Now that the importer is done
+//    change the gtType to the precomputed native return type
+//    requires that callNode currently has a struct type
+//
+void Compiler::fgFixupStructReturn(GenTreePtr callNode)
+{
+    assert(varTypeIsStruct(callNode));
+
+    GenTreeCall* call              = callNode->AsCall();
+    bool         callHasRetBuffArg = call->HasRetBufArg();
+    bool         isHelperCall      = call->IsHelperCall();
+
+    // Decide on the proper return type for this call that currently returns a struct
+    //
+    CORINFO_CLASS_HANDLE        retClsHnd = call->gtRetClsHnd;
+    Compiler::structPassingKind howToReturnStruct;
+    var_types                   returnType;
+
+    // There are a couple of Helper Calls that say they return a TYP_STRUCT but they
+    // expect this method to re-type this to a TYP_REF (what is in call->gtReturnType)
+    //
+    //    CORINFO_HELP_METHODDESC_TO_STUBRUNTIMEMETHOD
+    //    CORINFO_HELP_FIELDDESC_TO_STUBRUNTIMEFIELD
+    //    CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE_MAYBENULL
+    //
+    if (isHelperCall)
+    {
+        assert(!callHasRetBuffArg);
+        assert(retClsHnd == NO_CLASS_HANDLE);
+
+        // Now that we are past the importer, re-type this node
+        howToReturnStruct = SPK_PrimitiveType;
+        returnType        = (var_types)call->gtReturnType;
+    }
+    else
+    {
+        returnType = getReturnTypeForStruct(retClsHnd, &howToReturnStruct);
+    }
+
+    if (howToReturnStruct == SPK_ByReference)
+    {
+        assert(returnType == TYP_UNKNOWN);
+        assert(callHasRetBuffArg);
+    }
+    else
+    {
+        assert(returnType != TYP_UNKNOWN);
+
+        if (returnType != TYP_STRUCT)
+        {
+            // Widen the primitive type if necessary
+            returnType = genActualType(returnType);
+        }
+        call->gtType = returnType;
+    }
+
+#if FEATURE_MULTIREG_RET
+    // Either we don't have a struct now or if struct, then it is a struct returned in regs or in return buffer.
+    assert(!varTypeIsStruct(call) || call->HasMultiRegRetVal() || callHasRetBuffArg);
+#else // !FEATURE_MULTIREG_RET
+    // No more struct returns
+    assert(call->TypeGet() != TYP_STRUCT);
+#endif
+
+#if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+    // If it was a struct return, it has been transformed into a call
+    // with a return buffer (that returns TYP_VOID) or into a return
+    // of a primitive/enregisterable type
+    assert(!callHasRetBuffArg || (call->TypeGet() == TYP_VOID));
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  A little helper used to rearrange nested commutative operations. The
+ *  effect is that nested associative, commutative operations are transformed
+ *  into a 'left-deep' tree, i.e. into something like this:
+ *
+ *      (((a op b) op c) op d) op...
+ */
+
+#if REARRANGE_ADDS
+
+void Compiler::fgMoveOpsLeft(GenTreePtr tree)
+{
+    GenTreePtr op1;
+    GenTreePtr op2;
+    genTreeOps oper;
+
+    do
+    {
+        op1  = tree->gtOp.gtOp1;
+        op2  = tree->gtOp.gtOp2;
+        oper = tree->OperGet();
+
+        noway_assert(GenTree::OperIsCommutative(oper));
+        noway_assert(oper == GT_ADD || oper == GT_XOR || oper == GT_OR || oper == GT_AND || oper == GT_MUL);
+        noway_assert(!varTypeIsFloating(tree->TypeGet()) || !opts.genFPorder);
+        noway_assert(oper == op2->gtOper);
+
+        // Commutativity doesn't hold if overflow checks are needed
+
+        if (tree->gtOverflowEx() || op2->gtOverflowEx())
+        {
+            return;
+        }
+
+        if (gtIsActiveCSE_Candidate(op2))
+        {
+            // If we have marked op2 as a CSE candidate,
+            // we can't perform a commutative reordering
+            // because any value numbers that we computed for op2
+            // will be incorrect after performing a commutative reordering
+            //
+            return;
+        }
+
+        if (oper == GT_MUL && (op2->gtFlags & GTF_MUL_64RSLT))
+        {
+            return;
+        }
+
+        // Check for GTF_ADDRMODE_NO_CSE flag on add/mul Binary Operators
+        if (((oper == GT_ADD) || (oper == GT_MUL)) && ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0))
+        {
+            return;
+        }
+
+        if ((tree->gtFlags | op2->gtFlags) & GTF_BOOLEAN)
+        {
+            // We could deal with this, but we were always broken and just hit the assert
+            // below regarding flags, which means it's not frequent, so will just bail out.
+            // See #195514
+            return;
+        }
+
+        noway_assert(!tree->gtOverflowEx() && !op2->gtOverflowEx());
+
+        GenTreePtr ad1 = op2->gtOp.gtOp1;
+        GenTreePtr ad2 = op2->gtOp.gtOp2;
+
+        // Compiler::optOptimizeBools() can create GT_OR of two GC pointers yeilding a GT_INT
+        // We can not reorder such GT_OR trees
+        //
+        if (varTypeIsGC(ad1->TypeGet()) != varTypeIsGC(op2->TypeGet()))
+        {
+            break;
+        }
+
+        /* Change "(x op (y op z))" to "(x op y) op z" */
+        /* ie.    "(op1 op (ad1 op ad2))" to "(op1 op ad1) op ad2" */
+
+        GenTreePtr new_op1 = op2;
+
+        new_op1->gtOp.gtOp1 = op1;
+        new_op1->gtOp.gtOp2 = ad1;
+
+        /* Change the flags. */
+
+        // Make sure we arent throwing away any flags
+        noway_assert((new_op1->gtFlags &
+                      ~(GTF_MAKE_CSE | GTF_DONT_CSE | // It is ok that new_op1->gtFlags contains GTF_DONT_CSE flag.
+                        GTF_REVERSE_OPS |             // The reverse ops flag also can be set, it will be re-calculated
+                        GTF_NODE_MASK | GTF_ALL_EFFECT | GTF_UNSIGNED)) == 0);
+
+        new_op1->gtFlags =
+            (new_op1->gtFlags & (GTF_NODE_MASK | GTF_DONT_CSE)) | // Make sure we propagate GTF_DONT_CSE flag.
+            (op1->gtFlags & GTF_ALL_EFFECT) | (ad1->gtFlags & GTF_ALL_EFFECT);
+
+        /* Retype new_op1 if it has not/become a GC ptr. */
+
+        if (varTypeIsGC(op1->TypeGet()))
+        {
+            noway_assert((varTypeIsGC(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL &&
+                          oper == GT_ADD) || // byref(ref + (int+int))
+                         (varTypeIsI(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL &&
+                          oper == GT_OR)); // int(gcref | int(gcref|intval))
+
+            new_op1->gtType = tree->gtType;
+        }
+        else if (varTypeIsGC(ad2->TypeGet()))
+        {
+            // Neither ad1 nor op1 are GC. So new_op1 isnt either
+            noway_assert(op1->gtType == TYP_I_IMPL && ad1->gtType == TYP_I_IMPL);
+            new_op1->gtType = TYP_I_IMPL;
+        }
+
+        // If new_op1 is a new expression. Assign it a new unique value number.
+        // vnStore is null before the ValueNumber phase has run
+        if (vnStore != nullptr)
+        {
+            // We can only keep the old value number on new_op1 if both op1 and ad2
+            // have the same non-NoVN value numbers. Since op is commutative, comparing
+            // only ad2 and op1 is enough.
+            if ((op1->gtVNPair.GetLiberal() == ValueNumStore::NoVN) ||
+                (ad2->gtVNPair.GetLiberal() == ValueNumStore::NoVN) ||
+                (ad2->gtVNPair.GetLiberal() != op1->gtVNPair.GetLiberal()))
+            {
+                new_op1->gtVNPair.SetBoth(vnStore->VNForExpr(nullptr, new_op1->TypeGet()));
+            }
+        }
+
+        tree->gtOp.gtOp1 = new_op1;
+        tree->gtOp.gtOp2 = ad2;
+
+        /* If 'new_op1' is now the same nested op, process it recursively */
+
+        if ((ad1->gtOper == oper) && !ad1->gtOverflowEx())
+        {
+            fgMoveOpsLeft(new_op1);
+        }
+
+        /* If   'ad2'   is now the same nested op, process it
+         * Instead of recursion, we set up op1 and op2 for the next loop.
+         */
+
+        op1 = new_op1;
+        op2 = ad2;
+    } while ((op2->gtOper == oper) && !op2->gtOverflowEx());
+
+    return;
+}
+
+#endif
+
+/*****************************************************************************/
+
+void Compiler::fgSetRngChkTarget(GenTreePtr tree, bool delay)
+{
+    GenTreeBoundsChk* bndsChk = nullptr;
+    SpecialCodeKind   kind    = SCK_RNGCHK_FAIL;
+
+#ifdef FEATURE_SIMD
+    if ((tree->gtOper == GT_ARR_BOUNDS_CHECK) || (tree->gtOper == GT_SIMD_CHK))
+#else  // FEATURE_SIMD
+    if (tree->gtOper == GT_ARR_BOUNDS_CHECK)
+#endif // FEATURE_SIMD
+    {
+        bndsChk = tree->AsBoundsChk();
+        kind    = tree->gtBoundsChk.gtThrowKind;
+    }
+    else
+    {
+        noway_assert((tree->gtOper == GT_ARR_ELEM) || (tree->gtOper == GT_ARR_INDEX));
+    }
+
+#ifdef _TARGET_X86_
+    unsigned callStkDepth = fgPtrArgCntCur;
+#else
+    // only x86 pushes args
+    const unsigned callStkDepth = 0;
+#endif
+
+    if (opts.MinOpts())
+    {
+        delay = false;
+
+        // we need to initialize this field
+        if (fgGlobalMorph && bndsChk != nullptr)
+        {
+            bndsChk->gtStkDepth = callStkDepth;
+        }
+    }
+
+    if (!opts.compDbgCode)
+    {
+        if (delay || compIsForInlining())
+        {
+            /*  We delay this until after loop-oriented range check
+                analysis. For now we merely store the current stack
+                level in the tree node.
+             */
+            if (bndsChk != nullptr)
+            {
+                noway_assert(!bndsChk->gtIndRngFailBB || previousCompletedPhase >= PHASE_OPTIMIZE_LOOPS);
+                bndsChk->gtStkDepth = callStkDepth;
+            }
+        }
+        else
+        {
+            /* Create/find the appropriate "range-fail" label */
+
+            // fgPtrArgCntCur is only valid for global morph or if we walk full stmt.
+            noway_assert((bndsChk != nullptr) || fgGlobalMorph);
+
+            unsigned stkDepth = (bndsChk != nullptr) ? bndsChk->gtStkDepth : callStkDepth;
+
+            BasicBlock* rngErrBlk = fgRngChkTarget(compCurBB, stkDepth, kind);
+
+            /* Add the label to the indirection node */
+
+            if (bndsChk != nullptr)
+            {
+                bndsChk->gtIndRngFailBB = gtNewCodeRef(rngErrBlk);
+            }
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Expand a GT_INDEX node and fully morph the child operands
+ *
+ *  The orginal GT_INDEX node is bashed into the GT_IND node that accesses
+ *  the array element.  We expand the GT_INDEX node into a larger tree that
+ *  evaluates the array base and index.  The simplest expansion is a GT_COMMA
+ *  with a GT_ARR_BOUND_CHK and a GT_IND with a GTF_INX_RNGCHK flag.
+ *  For complex array or index expressions one or more GT_COMMA assignments
+ *  are inserted so that we only evaluate the array or index expressions once.
+ *
+ *  The fully expanded tree is then morphed.  This causes gtFoldExpr to
+ *  perform local constant prop and reorder the constants in the tree and
+ *  fold them.
+ *
+ *  We then parse the resulting array element expression in order to locate
+ *  and label the constants and variables that occur in the tree.
+ */
+
+const int MAX_ARR_COMPLEXITY   = 4;
+const int MAX_INDEX_COMPLEXITY = 4;
+
+GenTreePtr Compiler::fgMorphArrayIndex(GenTreePtr tree)
+{
+    noway_assert(tree->gtOper == GT_INDEX);
+    GenTreeIndex* asIndex = tree->AsIndex();
+
+    var_types            elemTyp        = tree->TypeGet();
+    unsigned             elemSize       = tree->gtIndex.gtIndElemSize;
+    CORINFO_CLASS_HANDLE elemStructType = tree->gtIndex.gtStructElemClass;
+
+    noway_assert(elemTyp != TYP_STRUCT || elemStructType != nullptr);
+
+#ifdef FEATURE_SIMD
+    if (featureSIMD && varTypeIsStruct(elemTyp) && elemSize <= getSIMDVectorRegisterByteLength())
+    {
+        // If this is a SIMD type, this is the point at which we lose the type information,
+        // so we need to set the correct type on the GT_IND.
+        // (We don't care about the base type here, so we only check, but don't retain, the return value).
+        unsigned simdElemSize = 0;
+        if (getBaseTypeAndSizeOfSIMDType(elemStructType, &simdElemSize) != TYP_UNKNOWN)
+        {
+            assert(simdElemSize == elemSize);
+            elemTyp = getSIMDTypeForSize(elemSize);
+            // This is the new type of the node.
+            tree->gtType = elemTyp;
+            // Now set elemStructType to null so that we don't confuse value numbering.
+            elemStructType = nullptr;
+        }
+    }
+#endif // FEATURE_SIMD
+
+    GenTreePtr arrRef = asIndex->Arr();
+    GenTreePtr index  = asIndex->Index();
+
+    // Set up the the array length's offset into lenOffs
+    // And    the the first element's offset into elemOffs
+    ssize_t lenOffs;
+    ssize_t elemOffs;
+    if (tree->gtFlags & GTF_INX_STRING_LAYOUT)
+    {
+        lenOffs  = offsetof(CORINFO_String, stringLen);
+        elemOffs = offsetof(CORINFO_String, chars);
+        tree->gtFlags &= ~GTF_INX_STRING_LAYOUT; // Clear this flag as it is used for GTF_IND_VOLATILE
+    }
+    else if (tree->gtFlags & GTF_INX_REFARR_LAYOUT)
+    {
+        lenOffs  = offsetof(CORINFO_RefArray, length);
+        elemOffs = eeGetEEInfo()->offsetOfObjArrayData;
+    }
+    else // We have a standard array
+    {
+        lenOffs  = offsetof(CORINFO_Array, length);
+        elemOffs = offsetof(CORINFO_Array, u1Elems);
+    }
+
+    bool chkd = ((tree->gtFlags & GTF_INX_RNGCHK) != 0); // if false, range checking will be disabled
+    bool nCSE = ((tree->gtFlags & GTF_DONT_CSE) != 0);
+
+    GenTreePtr arrRefDefn = nullptr; // non-NULL if we need to allocate a temp for the arrRef expression
+    GenTreePtr indexDefn  = nullptr; // non-NULL if we need to allocate a temp for the index expression
+    GenTreePtr bndsChk    = nullptr;
+
+    // If we're doing range checking, introduce a GT_ARR_BOUNDS_CHECK node for the address.
+    if (chkd)
+    {
+        GenTreePtr arrRef2 = nullptr; // The second copy will be used in array address expression
+        GenTreePtr index2  = nullptr;
+
+        // If the arrRef expression involves an assignment, a call or reads from global memory,
+        // then we *must* allocate a temporary in which to "localize" those values,
+        // to ensure that the same values are used in the bounds check and the actual
+        // dereference.
+        // Also we allocate the temporary when the arrRef is sufficiently complex/expensive.
+        //
+        if ((arrRef->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&arrRef, MAX_ARR_COMPLEXITY))
+        {
+            unsigned arrRefTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
+            arrRefDefn            = gtNewTempAssign(arrRefTmpNum, arrRef);
+            arrRef                = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet());
+            arrRef2               = gtNewLclvNode(arrRefTmpNum, arrRef->TypeGet());
+        }
+        else
+        {
+            arrRef2 = gtCloneExpr(arrRef);
+            noway_assert(arrRef2 != nullptr);
+        }
+
+        // If the index expression involves an assignment, a call or reads from global memory,
+        // we *must* allocate a temporary in which to "localize" those values,
+        // to ensure that the same values are used in the bounds check and the actual
+        // dereference.
+        // Also we allocate the temporary when the index is sufficiently complex/expensive.
+        //
+        if ((index->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&index, MAX_ARR_COMPLEXITY))
+        {
+            unsigned indexTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
+            indexDefn            = gtNewTempAssign(indexTmpNum, index);
+            index                = gtNewLclvNode(indexTmpNum, index->TypeGet());
+            index2               = gtNewLclvNode(indexTmpNum, index->TypeGet());
+        }
+        else
+        {
+            index2 = gtCloneExpr(index);
+            noway_assert(index2 != nullptr);
+        }
+
+        // Next introduce a GT_ARR_BOUNDS_CHECK node
+        var_types bndsChkType = TYP_INT; // By default, try to use 32-bit comparison for array bounds check.
+
+#ifdef _TARGET_64BIT_
+        // The CLI Spec allows an array to be indexed by either an int32 or a native int.  In the case
+        // of a 64 bit architecture this means the array index can potentially be a TYP_LONG, so for this case,
+        // the comparison will have to be widen to 64 bits.
+        if (index->TypeGet() == TYP_I_IMPL)
+        {
+            bndsChkType = TYP_I_IMPL;
+        }
+#endif // _TARGET_64BIT_
+
+        GenTree* arrLen = new (this, GT_ARR_LENGTH) GenTreeArrLen(TYP_INT, arrRef, (int)lenOffs);
+
+        if (bndsChkType != TYP_INT)
+        {
+            arrLen = gtNewCastNode(bndsChkType, arrLen, bndsChkType);
+        }
+
+        GenTreeBoundsChk* arrBndsChk = new (this, GT_ARR_BOUNDS_CHECK)
+            GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, arrLen, index, SCK_RNGCHK_FAIL);
+
+        bndsChk = arrBndsChk;
+
+        // Make sure to increment ref-counts if already ref-counted.
+        if (lvaLocalVarRefCounted)
+        {
+            lvaRecursiveIncRefCounts(index);
+            lvaRecursiveIncRefCounts(arrRef);
+        }
+
+        // Now we'll switch to using the second copies for arrRef and index
+        // to compute the address expression
+
+        arrRef = arrRef2;
+        index  = index2;
+    }
+
+    // Create the "addr" which is "*(arrRef + ((index * elemSize) + elemOffs))"
+
+    GenTreePtr addr;
+
+#ifdef _TARGET_64BIT_
+    // Widen 'index' on 64-bit targets
+    if (index->TypeGet() != TYP_I_IMPL)
+    {
+        if (index->OperGet() == GT_CNS_INT)
+        {
+            index->gtType = TYP_I_IMPL;
+        }
+        else
+        {
+            index = gtNewCastNode(TYP_I_IMPL, index, TYP_I_IMPL);
+        }
+    }
+#endif // _TARGET_64BIT_
+
+    /* Scale the index value if necessary */
+    if (elemSize > 1)
+    {
+        GenTreePtr size = gtNewIconNode(elemSize, TYP_I_IMPL);
+
+        // Fix 392756 WP7 Crossgen
+        //
+        // During codegen optGetArrayRefScaleAndIndex() makes the assumption that op2 of a GT_MUL node
+        // is a constant and is not capable of handling CSE'ing the elemSize constant into a lclvar.
+        // Hence to prevent the constant from becoming a CSE we mark it as NO_CSE.
+        //
+        size->gtFlags |= GTF_DONT_CSE;
+
+        /* Multiply by the array element size */
+        addr = gtNewOperNode(GT_MUL, TYP_I_IMPL, index, size);
+    }
+    else
+    {
+        addr = index;
+    }
+
+    /* Add the object ref to the element's offset */
+
+    addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr);
+
+    /* Add the first element's offset */
+
+    GenTreePtr cns = gtNewIconNode(elemOffs, TYP_I_IMPL);
+
+    addr = gtNewOperNode(GT_ADD, TYP_BYREF, addr, cns);
+
+#if SMALL_TREE_NODES
+    assert(tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE);
+#endif
+
+    // Change the orginal GT_INDEX node into a GT_IND node
+    tree->SetOper(GT_IND);
+
+    // If the index node is a floating-point type, notify the compiler
+    // we'll potentially use floating point registers at the time of codegen.
+    if (varTypeIsFloating(tree->gtType))
+    {
+        this->compFloatingPointUsed = true;
+    }
+
+    // We've now consumed the GTF_INX_RNGCHK, and the node
+    // is no longer a GT_INDEX node.
+    tree->gtFlags &= ~GTF_INX_RNGCHK;
+
+    tree->gtOp.gtOp1 = addr;
+
+    // This is an array index expression.
+    tree->gtFlags |= GTF_IND_ARR_INDEX;
+
+    /* An indirection will cause a GPF if the address is null */
+    tree->gtFlags |= GTF_EXCEPT;
+
+    if (nCSE)
+    {
+        tree->gtFlags |= GTF_DONT_CSE;
+    }
+
+    // Store information about it.
+    GetArrayInfoMap()->Set(tree, ArrayInfo(elemTyp, elemSize, (int)elemOffs, elemStructType));
+
+    // Remember this 'indTree' that we just created, as we still need to attach the fieldSeq information to it.
+
+    GenTreePtr indTree = tree;
+
+    // Did we create a bndsChk tree?
+    if (bndsChk)
+    {
+        // Use a GT_COMMA node to prepend the array bound check
+        //
+        tree = gtNewOperNode(GT_COMMA, elemTyp, bndsChk, tree);
+
+        /* Mark the indirection node as needing a range check */
+        fgSetRngChkTarget(bndsChk);
+    }
+
+    if (indexDefn != nullptr)
+    {
+        // Use a GT_COMMA node to prepend the index assignment
+        //
+        tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), indexDefn, tree);
+    }
+    if (arrRefDefn != nullptr)
+    {
+        // Use a GT_COMMA node to prepend the arRef assignment
+        //
+        tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), arrRefDefn, tree);
+    }
+
+    // Currently we morph the tree to perform some folding operations prior
+    // to attaching fieldSeq info and labeling constant array index contributions
+    //
+    fgMorphTree(tree);
+
+    // Ideally we just want to proceed to attaching fieldSeq info and labeling the
+    // constant array index contributions, but the morphing operation may have changed
+    // the 'tree' into something that now unconditionally throws an exception.
+    //
+    // In such case the gtEffectiveVal could be a new tree or it's gtOper could be modified
+    // or it could be left unchanged.  If it is unchanged then we should not return,
+    // instead we should proceed to attaching fieldSeq info, etc...
+    //
+    GenTreePtr arrElem = tree->gtEffectiveVal();
+
+    if (fgIsCommaThrow(tree))
+    {
+        if ((arrElem != indTree) ||         // A new tree node may have been created
+            (indTree->OperGet() != GT_IND)) // The GT_IND may have been changed to a GT_CNS_INT
+        {
+            return tree; // Just return the Comma-Throw, don't try to attach the fieldSeq info, etc..
+        }
+    }
+
+    assert(!fgGlobalMorph || (arrElem->gtDebugFlags & GTF_DEBUG_NODE_MORPHED));
+
+    addr = arrElem->gtOp.gtOp1;
+
+    assert(addr->TypeGet() == TYP_BYREF);
+
+    GenTreePtr cnsOff = nullptr;
+    if (addr->OperGet() == GT_ADD)
+    {
+        if (addr->gtOp.gtOp2->gtOper == GT_CNS_INT)
+        {
+            cnsOff = addr->gtOp.gtOp2;
+            addr   = addr->gtOp.gtOp1;
+        }
+
+        while ((addr->OperGet() == GT_ADD) || (addr->OperGet() == GT_SUB))
+        {
+            assert(addr->TypeGet() == TYP_BYREF);
+            GenTreePtr index = addr->gtOp.gtOp2;
+
+            // Label any constant array index contributions with #ConstantIndex and any LclVars with GTF_VAR_ARR_INDEX
+            index->LabelIndex(this);
+
+            addr = addr->gtOp.gtOp1;
+        }
+        assert(addr->TypeGet() == TYP_REF);
+    }
+    else if (addr->OperGet() == GT_CNS_INT)
+    {
+        cnsOff = addr;
+    }
+
+    FieldSeqNode* firstElemFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::FirstElemPseudoField);
+
+    if ((cnsOff != nullptr) && (cnsOff->gtIntCon.gtIconVal == elemOffs))
+    {
+        // Assign it the [#FirstElem] field sequence
+        //
+        cnsOff->gtIntCon.gtFieldSeq = firstElemFseq;
+    }
+    else //  We have folded the first element's offset with the index expression
+    {
+        // Build the [#ConstantIndex, #FirstElem] field sequence
+        //
+        FieldSeqNode* constantIndexFseq = GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
+        FieldSeqNode* fieldSeq          = GetFieldSeqStore()->Append(constantIndexFseq, firstElemFseq);
+
+        if (cnsOff == nullptr) // It must have folded into a zero offset
+        {
+            // Record in the general zero-offset map.
+            GetZeroOffsetFieldMap()->Set(addr, fieldSeq);
+        }
+        else
+        {
+            cnsOff->gtIntCon.gtFieldSeq = fieldSeq;
+        }
+    }
+
+    return tree;
+}
+
+#ifdef _TARGET_X86_
+/*****************************************************************************
+ *
+ *  Wrap fixed stack arguments for varargs functions to go through varargs
+ *  cookie to access them, except for the cookie itself.
+ *
+ * Non-x86 platforms are allowed to access all arguments directly
+ * so we don't need this code.
+ *
+ */
+GenTreePtr Compiler::fgMorphStackArgForVarArgs(unsigned lclNum, var_types varType, unsigned lclOffs)
+{
+    /* For the fixed stack arguments of a varargs function, we need to go
+        through the varargs cookies to access them, except for the
+        cookie itself */
+
+    LclVarDsc* varDsc = &lvaTable[lclNum];
+
+    if (varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg)
+    {
+        // Create a node representing the local pointing to the base of the args
+        GenTreePtr ptrArg =
+            gtNewOperNode(GT_SUB, TYP_I_IMPL, gtNewLclvNode(lvaVarargsBaseOfStkArgs, TYP_I_IMPL),
+                          gtNewIconNode(varDsc->lvStkOffs - codeGen->intRegState.rsCalleeRegArgCount * sizeof(void*) +
+                                        lclOffs));
+
+        // Access the argument through the local
+        GenTreePtr tree = gtNewOperNode(GT_IND, varType, ptrArg);
+        tree->gtFlags |= GTF_IND_TGTANYWHERE;
+
+        if (varDsc->lvAddrExposed)
+        {
+            tree->gtFlags |= GTF_GLOB_REF;
+        }
+
+        return fgMorphTree(tree);
+    }
+
+    return NULL;
+}
+#endif
+
+/*****************************************************************************
+ *
+ *  Transform the given GT_LCL_VAR tree for code generation.
+ */
+
+GenTreePtr Compiler::fgMorphLocalVar(GenTreePtr tree)
+{
+    noway_assert(tree->gtOper == GT_LCL_VAR);
+
+    unsigned   lclNum  = tree->gtLclVarCommon.gtLclNum;
+    var_types  varType = lvaGetRealType(lclNum);
+    LclVarDsc* varDsc  = &lvaTable[lclNum];
+
+    if (varDsc->lvAddrExposed)
+    {
+        tree->gtFlags |= GTF_GLOB_REF;
+    }
+
+#ifdef _TARGET_X86_
+    if (info.compIsVarArgs)
+    {
+        GenTreePtr newTree = fgMorphStackArgForVarArgs(lclNum, varType, 0);
+        if (newTree != NULL)
+            return newTree;
+    }
+#endif // _TARGET_X86_
+
+    /* If not during the global morphing phase bail */
+
+    if (!fgGlobalMorph)
+    {
+        return tree;
+    }
+
+    bool varAddr = (tree->gtFlags & GTF_DONT_CSE) != 0;
+
+    noway_assert(!(tree->gtFlags & GTF_VAR_DEF) || varAddr); // GTF_VAR_DEF should always imply varAddr
+
+    if (!varAddr && varTypeIsSmall(varDsc->TypeGet()) && varDsc->lvNormalizeOnLoad())
+    {
+#if LOCAL_ASSERTION_PROP
+        /* Assertion prop can tell us to omit adding a cast here */
+        if (optLocalAssertionProp && optAssertionIsSubrange(tree, varType, apFull) != NO_ASSERTION_INDEX)
+        {
+            return tree;
+        }
+#endif
+        /* Small-typed arguments and aliased locals are normalized on load.
+           Other small-typed locals are normalized on store.
+           Also, under the debugger as the debugger could write to the variable.
+           If this is one of the former, insert a narrowing cast on the load.
+                   ie. Convert: var-short --> cast-short(var-int) */
+
+        tree->gtType = TYP_INT;
+        fgMorphTreeDone(tree);
+        tree = gtNewCastNode(TYP_INT, tree, varType);
+        fgMorphTreeDone(tree);
+        return tree;
+    }
+
+    return tree;
+}
+
+/*****************************************************************************
+  Grab a temp for big offset morphing.
+  This method will grab a new temp if no temp of this "type" has been created.
+  Or it will return the same cached one if it has been created.
+*/
+unsigned Compiler::fgGetBigOffsetMorphingTemp(var_types type)
+{
+    unsigned lclNum = fgBigOffsetMorphingTemps[type];
+
+    if (lclNum == BAD_VAR_NUM)
+    {
+        // We haven't created a temp for this kind of type. Create one now.
+        lclNum                         = lvaGrabTemp(false DEBUGARG("Big Offset Morphing"));
+        fgBigOffsetMorphingTemps[type] = lclNum;
+    }
+    else
+    {
+        // We better get the right type.
+        noway_assert(lvaTable[lclNum].TypeGet() == type);
+    }
+
+    noway_assert(lclNum != BAD_VAR_NUM);
+    return lclNum;
+}
+
+/*****************************************************************************
+ *
+ *  Transform the given GT_FIELD tree for code generation.
+ */
+
+GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac)
+{
+    assert(tree->gtOper == GT_FIELD);
+
+    noway_assert(tree->gtFlags & GTF_GLOB_REF);
+
+    CORINFO_FIELD_HANDLE symHnd          = tree->gtField.gtFldHnd;
+    unsigned             fldOffset       = tree->gtField.gtFldOffset;
+    GenTreePtr           objRef          = tree->gtField.gtFldObj;
+    bool                 fieldMayOverlap = false;
+    bool                 objIsLocal      = false;
+
+    if (tree->gtField.gtFldMayOverlap)
+    {
+        fieldMayOverlap = true;
+        // Reset the flag because we may reuse the node.
+        tree->gtField.gtFldMayOverlap = false;
+    }
+
+#ifdef FEATURE_SIMD
+    // if this field belongs to simd struct, tranlate it to simd instrinsic.
+    if (mac == nullptr || mac->m_kind != MACK_Addr)
+    {
+        GenTreePtr newTree = fgMorphFieldToSIMDIntrinsicGet(tree);
+        if (newTree != tree)
+        {
+            newTree = fgMorphSmpOp(newTree);
+            return newTree;
+        }
+    }
+    else if (objRef != nullptr && objRef->OperGet() == GT_ADDR && objRef->OperIsSIMD())
+    {
+        // We have a field of an SIMD intrinsic in an address-taken context.
+        // We need to copy the SIMD result to a temp, and take the field of that.
+        GenTree* copy      = fgCopySIMDNode(objRef->gtOp.gtOp1->AsSIMD());
+        objRef->gtOp.gtOp1 = copy;
+    }
+#endif
+
+    /* Is this an instance data member? */
+
+    if (objRef)
+    {
+        GenTreePtr addr;
+        objIsLocal = objRef->IsLocal();
+
+        if (tree->gtFlags & GTF_IND_TLS_REF)
+        {
+            NO_WAY("instance field can not be a TLS ref.");
+        }
+
+        /* We'll create the expression "*(objRef + mem_offs)" */
+
+        noway_assert(varTypeIsGC(objRef->TypeGet()) || objRef->TypeGet() == TYP_I_IMPL);
+
+        // An optimization for Contextful classes:
+        // we unwrap the proxy when we have a 'this reference'
+        if (info.compIsContextful && info.compUnwrapContextful && impIsThis(objRef))
+        {
+            objRef = fgUnwrapProxy(objRef);
+        }
+
+        /*
+            Now we have a tree like this:
+
+                                  +--------------------+
+                                  |      GT_FIELD      |   tree
+                                  +----------+---------+
+                                             |
+                              +--------------+-------------+
+                              |   tree->gtField.gtFldObj   |
+                              +--------------+-------------+
+
+
+            We want to make it like this (when fldOffset is <= MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT):
+
+                                  +--------------------+
+                                  |   GT_IND/GT_OBJ    |   tree
+                                  +---------+----------+
+                                            |
+                                            |
+                                  +---------+----------+
+                                  |       GT_ADD       |   addr
+                                  +---------+----------+
+                                            |
+                                          /   \
+                                        /       \
+                                      /           \
+                         +-------------------+  +----------------------+
+                         |       objRef      |  |     fldOffset        |
+                         |                   |  | (when fldOffset !=0) |
+                         +-------------------+  +----------------------+
+
+
+            or this (when fldOffset is > MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT):
+
+
+                                  +--------------------+
+                                  |   GT_IND/GT_OBJ    |   tree
+                                  +----------+---------+
+                                             |
+                                  +----------+---------+
+                                  |       GT_COMMA     |  comma2
+                                  +----------+---------+
+                                             |
+                                            / \
+                                          /     \
+                                        /         \
+                                      /             \
+                 +---------+----------+               +---------+----------+
+           comma |      GT_COMMA      |               |  "+" (i.e. GT_ADD) |   addr
+                 +---------+----------+               +---------+----------+
+                           |                                     |
+                         /   \                                  /  \
+                       /       \                              /      \
+                     /           \                          /          \
+         +-----+-----+             +-----+-----+      +---------+   +-----------+
+     asg |  GT_ASG   |         ind |   GT_IND  |      |  tmpLcl |   | fldOffset |
+         +-----+-----+             +-----+-----+      +---------+   +-----------+
+               |                         |
+              / \                        |
+            /     \                      |
+          /         \                    |
+   +-----+-----+   +-----+-----+   +-----------+
+   |   tmpLcl  |   |   objRef  |   |   tmpLcl  |
+   +-----------+   +-----------+   +-----------+
+
+
+        */
+
+        var_types objRefType = objRef->TypeGet();
+
+        GenTreePtr comma = nullptr;
+
+        bool addedExplicitNullCheck = false;
+
+        // NULL mac means we encounter the GT_FIELD first.  This denotes a dereference of the field,
+        // and thus is equivalent to a MACK_Ind with zero offset.
+        MorphAddrContext defMAC(MACK_Ind);
+        if (mac == nullptr)
+        {
+            mac = &defMAC;
+        }
+
+        // This flag is set to enable the "conservative" style of explicit null-check insertion.
+        // This means that we insert an explicit null check whenever we create byref by adding a
+        // constant offset to a ref, in a MACK_Addr context (meaning that the byref is not immediately
+        // dereferenced).  The alternative is "aggressive", which would not insert such checks (for
+        // small offsets); in this plan, we would transfer some null-checking responsibility to
+        // callee's of methods taking byref parameters.  They would have to add explicit null checks
+        // when creating derived byrefs from argument byrefs by adding constants to argument byrefs, in
+        // contexts where the resulting derived byref is not immediately dereferenced (or if the offset is too
+        // large).  To make the "aggressive" scheme work, however, we'd also have to add explicit derived-from-null
+        // checks for byref parameters to "external" methods implemented in C++, and in P/Invoke stubs.
+        // This is left here to point out how to implement it.
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#define CONSERVATIVE_NULL_CHECK_BYREF_CREATION 1
+
+        // If the objRef is a GT_ADDR node, it, itself, never requires null checking.  The expression
+        // whose address is being taken is either a local or static variable, whose address is necessarily
+        // non-null, or else it is a field dereference, which will do its own bounds checking if necessary.
+        if (objRef->gtOper != GT_ADDR && ((mac->m_kind == MACK_Addr || mac->m_kind == MACK_Ind) &&
+                                          (!mac->m_allConstantOffsets || fgIsBigOffset(mac->m_totalOffset + fldOffset)
+#if CONSERVATIVE_NULL_CHECK_BYREF_CREATION
+                                           || (mac->m_kind == MACK_Addr && (mac->m_totalOffset + fldOffset > 0))
+#else
+                                           || (objRef->gtType == TYP_BYREF && mac->m_kind == MACK_Addr &&
+                                               (mac->m_totalOffset + fldOffset > 0))
+#endif
+                                               )))
+        {
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("Before explicit null check morphing:\n");
+                gtDispTree(tree);
+            }
+#endif
+
+            //
+            // Create the "comma" subtree
+            //
+            GenTreePtr asg = nullptr;
+            GenTreePtr nullchk;
+
+            unsigned lclNum;
+
+            if (objRef->gtOper != GT_LCL_VAR)
+            {
+                lclNum = fgGetBigOffsetMorphingTemp(genActualType(objRef->TypeGet()));
+
+                // Create the "asg" node
+                asg = gtNewTempAssign(lclNum, objRef);
+            }
+            else
+            {
+                lclNum = objRef->gtLclVarCommon.gtLclNum;
+            }
+
+            // Create the "nullchk" node.
+            // Make it TYP_BYTE so we only deference it for 1 byte.
+            GenTreePtr lclVar = gtNewLclvNode(lclNum, objRefType);
+            nullchk           = new (this, GT_NULLCHECK) GenTreeIndir(GT_NULLCHECK, TYP_BYTE, lclVar, nullptr);
+
+            nullchk->gtFlags |= GTF_DONT_CSE; // Don't try to create a CSE for these TYP_BYTE indirections
+
+            // An indirection will cause a GPF if the address is null.
+            nullchk->gtFlags |= GTF_EXCEPT;
+
+            compCurBB->bbFlags |= BBF_HAS_NULLCHECK;
+            optMethodFlags |= OMF_HAS_NULLCHECK;
+
+            if (asg)
+            {
+                // Create the "comma" node.
+                comma = gtNewOperNode(GT_COMMA,
+                                      TYP_VOID, // We don't want to return anything from this "comma" node.
+                                                // Set the type to TYP_VOID, so we can select "cmp" instruction
+                                                // instead of "mov" instruction later on.
+                                      asg, nullchk);
+            }
+            else
+            {
+                comma = nullchk;
+            }
+
+            addr = gtNewLclvNode(lclNum, objRefType); // Use "tmpLcl" to create "addr" node.
+
+            addedExplicitNullCheck = true;
+        }
+        else if (fldOffset == 0)
+        {
+            // Generate the "addr" node.
+            addr = objRef;
+            FieldSeqNode* fieldSeq =
+                fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
+            GetZeroOffsetFieldMap()->Set(addr, fieldSeq);
+        }
+        else
+        {
+            addr = objRef;
+        }
+
+#ifdef FEATURE_READYTORUN_COMPILER
+        if (tree->gtField.gtFieldLookup.addr != nullptr)
+        {
+            GenTreePtr baseOffset = gtNewIconEmbHndNode(tree->gtField.gtFieldLookup.addr, nullptr, GTF_ICON_FIELD_HDL);
+
+            if (tree->gtField.gtFieldLookup.accessType == IAT_PVALUE)
+                baseOffset = gtNewOperNode(GT_IND, TYP_I_IMPL, baseOffset);
+
+            addr =
+                gtNewOperNode(GT_ADD, (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL : TYP_BYREF), addr, baseOffset);
+        }
+#endif
+        if (fldOffset != 0)
+        {
+            // Generate the "addr" node.
+            /* Add the member offset to the object's address */
+            FieldSeqNode* fieldSeq =
+                fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
+            addr = gtNewOperNode(GT_ADD, (var_types)(objRefType == TYP_I_IMPL ? TYP_I_IMPL : TYP_BYREF), addr,
+                                 gtNewIconHandleNode(fldOffset, GTF_ICON_FIELD_OFF, fieldSeq));
+        }
+
+        // Now let's set the "tree" as a GT_IND tree.
+
+        tree->SetOper(GT_IND);
+        tree->gtOp.gtOp1 = addr;
+
+        if (fgAddrCouldBeNull(addr))
+        {
+            // This indirection can cause a GPF if the address could be null.
+            tree->gtFlags |= GTF_EXCEPT;
+        }
+
+        if (addedExplicitNullCheck)
+        {
+            //
+            // Create "comma2" node and link it to "tree".
+            //
+            GenTreePtr comma2;
+            comma2 = gtNewOperNode(GT_COMMA,
+                                   addr->TypeGet(), // The type of "comma2" node is the same as the type of "addr" node.
+                                   comma, addr);
+            tree->gtOp.gtOp1 = comma2;
+        }
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            if (addedExplicitNullCheck)
+            {
+                printf("After adding explicit null check:\n");
+                gtDispTree(tree);
+            }
+        }
+#endif
+    }
+    else /* This is a static data member */
+    {
+        if (tree->gtFlags & GTF_IND_TLS_REF)
+        {
+            // Thread Local Storage static field reference
+            //
+            // Field ref is a TLS 'Thread-Local-Storage' reference
+            //
+            // Build this tree:  IND(*) #
+            //                    |
+            //                   ADD(I_IMPL)
+            //                   / \
+            //                  /  CNS(fldOffset)
+            //                 /
+            //                /
+            //               /
+            //             IND(I_IMPL) == [Base of this DLL's TLS]
+            //              |
+            //             ADD(I_IMPL)
+            //             / \
+            //            /   CNS(IdValue*4) or MUL
+            //           /                      / \
+            //          IND(I_IMPL)            /  CNS(4)
+            //           |                    /
+            //          CNS(TLS_HDL,0x2C)    IND
+            //                                |
+            //                               CNS(pIdAddr)
+            //
+            // # Denotes the orginal node
+            //
+            void**   pIdAddr = nullptr;
+            unsigned IdValue = info.compCompHnd->getFieldThreadLocalStoreID(symHnd, (void**)&pIdAddr);
+
+            //
+            // If we can we access the TLS DLL index ID value directly
+            // then pIdAddr will be NULL and
+            //      IdValue will be the actual TLS DLL index ID
+            //
+            GenTreePtr dllRef = nullptr;
+            if (pIdAddr == nullptr)
+            {
+                if (IdValue != 0)
+                {
+                    dllRef = gtNewIconNode(IdValue * 4, TYP_I_IMPL);
+                }
+            }
+            else
+            {
+                dllRef = gtNewIconHandleNode((size_t)pIdAddr, GTF_ICON_STATIC_HDL);
+                dllRef = gtNewOperNode(GT_IND, TYP_I_IMPL, dllRef);
+                dllRef->gtFlags |= GTF_IND_INVARIANT;
+
+                /* Multiply by 4 */
+
+                dllRef = gtNewOperNode(GT_MUL, TYP_I_IMPL, dllRef, gtNewIconNode(4, TYP_I_IMPL));
+            }
+
+#define WIN32_TLS_SLOTS (0x2C) // Offset from fs:[0] where the pointer to the slots resides
+
+            // Mark this ICON as a TLS_HDL, codegen will use FS:[cns]
+
+            GenTreePtr tlsRef = gtNewIconHandleNode(WIN32_TLS_SLOTS, GTF_ICON_TLS_HDL);
+
+            tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef);
+
+            if (dllRef != nullptr)
+            {
+                /* Add the dllRef */
+                tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, dllRef);
+            }
+
+            /* indirect to have tlsRef point at the base of the DLLs Thread Local Storage */
+            tlsRef = gtNewOperNode(GT_IND, TYP_I_IMPL, tlsRef);
+
+            if (fldOffset != 0)
+            {
+                FieldSeqNode* fieldSeq =
+                    fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
+                GenTreePtr fldOffsetNode = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, fldOffset, fieldSeq);
+
+                /* Add the TLS static field offset to the address */
+
+                tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, fldOffsetNode);
+            }
+
+            // Final indirect to get to actual value of TLS static field
+
+            tree->SetOper(GT_IND);
+            tree->gtOp.gtOp1 = tlsRef;
+
+            noway_assert(tree->gtFlags & GTF_IND_TLS_REF);
+        }
+        else
+        {
+            // Normal static field reference
+
+            //
+            // If we can we access the static's address directly
+            // then pFldAddr will be NULL and
+            //      fldAddr will be the actual address of the static field
+            //
+            void** pFldAddr = nullptr;
+            void*  fldAddr  = info.compCompHnd->getFieldAddress(symHnd, (void**)&pFldAddr);
+
+            if (pFldAddr == nullptr)
+            {
+#ifdef _TARGET_64BIT_
+                if (IMAGE_REL_BASED_REL32 != eeGetRelocTypeHint(fldAddr))
+                {
+                    // The address is not directly addressible, so force it into a
+                    // constant, so we handle it properly
+
+                    GenTreePtr addr = gtNewIconHandleNode((size_t)fldAddr, GTF_ICON_STATIC_HDL);
+                    addr->gtType    = TYP_I_IMPL;
+                    FieldSeqNode* fieldSeq =
+                        fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
+                    addr->gtIntCon.gtFieldSeq = fieldSeq;
+
+                    tree->SetOper(GT_IND);
+                    tree->gtOp.gtOp1 = addr;
+
+                    return fgMorphSmpOp(tree);
+                }
+                else
+#endif // _TARGET_64BIT_
+                {
+                    // Only volatile could be set, and it maps over
+                    noway_assert((tree->gtFlags & ~(GTF_FLD_VOLATILE | GTF_COMMON_MASK)) == 0);
+                    noway_assert(GTF_FLD_VOLATILE == GTF_IND_VOLATILE);
+                    tree->SetOper(GT_CLS_VAR);
+                    tree->gtClsVar.gtClsVarHnd = symHnd;
+                    FieldSeqNode* fieldSeq =
+                        fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
+                    tree->gtClsVar.gtFieldSeq = fieldSeq;
+                }
+
+                return tree;
+            }
+            else
+            {
+                GenTreePtr addr = gtNewIconHandleNode((size_t)pFldAddr, GTF_ICON_STATIC_HDL);
+
+                // There are two cases here, either the static is RVA based,
+                // in which case the type of the FIELD node is not a GC type
+                // and the handle to the RVA is a TYP_I_IMPL.  Or the FIELD node is
+                // a GC type and the handle to it is a TYP_BYREF in the GC heap
+                // because handles to statics now go into the large object heap
+
+                var_types  handleTyp = (var_types)(varTypeIsGC(tree->TypeGet()) ? TYP_BYREF : TYP_I_IMPL);
+                GenTreePtr op1       = gtNewOperNode(GT_IND, handleTyp, addr);
+                op1->gtFlags |= GTF_IND_INVARIANT;
+
+                tree->SetOper(GT_IND);
+                tree->gtOp.gtOp1 = op1;
+            }
+        }
+    }
+    noway_assert(tree->gtOper == GT_IND);
+
+    GenTreePtr res = fgMorphSmpOp(tree);
+
+    // If we have a struct type, this node would previously have been under a GT_ADDR,
+    // and therefore would have been marked GTF_DONT_CSE.
+    // TODO-1stClassStructs: revisit this.
+    if ((res->TypeGet() == TYP_STRUCT) && !objIsLocal)
+    {
+        res->gtFlags |= GTF_DONT_CSE;
+    }
+
+    if (fldOffset == 0 && res->OperGet() == GT_IND)
+    {
+        GenTreePtr addr = res->gtOp.gtOp1;
+        // Since we don't make a constant zero to attach the field sequence to, associate it with the "addr" node.
+        FieldSeqNode* fieldSeq =
+            fieldMayOverlap ? FieldSeqStore::NotAField() : GetFieldSeqStore()->CreateSingleton(symHnd);
+        fgAddFieldSeqForZeroOffset(addr, fieldSeq);
+    }
+
+    return res;
+}
+
+//------------------------------------------------------------------------------
+// fgMorphCallInline: attempt to inline a call
+//
+// Arguments:
+//    call         - call expression to inline, inline candidate
+//    inlineResult - result tracking and reporting
+//
+// Notes:
+//    Attempts to inline the call.
+//
+//    If successful, callee's IR is inserted in place of the call, and
+//    is marked with an InlineContext.
+//
+//    If unsuccessful, the transformations done in anticpation of a
+//    possible inline are undone, and the candidate flag on the call
+//    is cleared.
+
+void Compiler::fgMorphCallInline(GenTreeCall* call, InlineResult* inlineResult)
+{
+    // The call must be a candiate for inlining.
+    assert((call->gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0);
+
+    // Attempt the inline
+    fgMorphCallInlineHelper(call, inlineResult);
+
+    // We should have made up our minds one way or another....
+    assert(inlineResult->IsDecided());
+
+    // If we failed to inline, we have a bit of work to do to cleanup
+    if (inlineResult->IsFailure())
+    {
+
+#ifdef DEBUG
+
+        // Before we do any cleanup, create a failing InlineContext to
+        // capture details of the inlining attempt.
+        m_inlineStrategy->NewFailure(fgMorphStmt, inlineResult);
+
+#endif
+
+        // It was an inline candidate, but we haven't expanded it.
+        if (call->gtCall.gtReturnType != TYP_VOID)
+        {
+            // Detach the GT_CALL tree from the original statement by
+            // hanging a "nothing" node to it. Later the "nothing" node will be removed
+            // and the original GT_CALL tree will be picked up by the GT_RET_EXPR node.
+
+            noway_assert(fgMorphStmt->gtStmt.gtStmtExpr == call);
+            fgMorphStmt->gtStmt.gtStmtExpr = gtNewNothingNode();
+        }
+
+        // Clear the Inline Candidate flag so we can ensure later we tried
+        // inlining all candidates.
+        //
+        call->gtFlags &= ~GTF_CALL_INLINE_CANDIDATE;
+    }
+}
+
+/*****************************************************************************
+ *  Helper to attempt to inline a call
+ *  Sets success/failure in inline result
+ *  If success, modifies current method's IR with inlinee's IR
+ *  If failed, undoes any speculative modifications to current method
+ */
+
+void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result)
+{
+    // Don't expect any surprises here.
+    assert(result->IsCandidate());
+
+    if (lvaCount >= MAX_LV_NUM_COUNT_FOR_INLINING)
+    {
+        // For now, attributing this to call site, though it's really
+        // more of a budget issue (lvaCount currently includes all
+        // caller and prospective callee locals). We still might be
+        // able to inline other callees into this caller, or inline
+        // this callee in other callers.
+        result->NoteFatal(InlineObservation::CALLSITE_TOO_MANY_LOCALS);
+        return;
+    }
+
+    if (call->IsVirtual())
+    {
+        result->NoteFatal(InlineObservation::CALLSITE_IS_VIRTUAL);
+        return;
+    }
+
+    // impMarkInlineCandidate() is expected not to mark tail prefixed calls
+    // and recursive tail calls as inline candidates.
+    noway_assert(!call->IsTailPrefixedCall());
+    noway_assert(!call->IsImplicitTailCall() || !gtIsRecursiveCall(call));
+
+    /* If the caller's stack frame is marked, then we can't do any inlining. Period.
+       Although we have checked this in impCanInline, it is possible that later IL instructions
+       might cause compNeedSecurityCheck to be set. Therefore we need to check it here again.
+    */
+
+    if (opts.compNeedSecurityCheck)
+    {
+        result->NoteFatal(InlineObservation::CALLER_NEEDS_SECURITY_CHECK);
+        return;
+    }
+
+    //
+    // Calling inlinee's compiler to inline the method.
+    //
+
+    unsigned startVars = lvaCount;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("Expanding INLINE_CANDIDATE in statement ");
+        printTreeID(fgMorphStmt);
+        printf(" in BB%02u:\n", compCurBB->bbNum);
+        gtDispTree(fgMorphStmt);
+
+        // printf("startVars=%d.\n", startVars);
+    }
+#endif
+
+    impInlineRoot()->m_inlineStrategy->NoteAttempt(result);
+
+    //
+    // Invoke the compiler to inline the call.
+    //
+
+    fgInvokeInlineeCompiler(call, result);
+
+    if (result->IsFailure())
+    {
+        // Undo some changes made in anticipation of inlining...
+
+        // Zero out the used locals
+        memset(lvaTable + startVars, 0, (lvaCount - startVars) * sizeof(*lvaTable));
+        for (unsigned i = startVars; i < lvaCount; i++)
+        {
+            new (&lvaTable[i], jitstd::placement_t()) LclVarDsc(this); // call the constructor.
+        }
+
+        lvaCount = startVars;
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            // printf("Inlining failed. Restore lvaCount to %d.\n", lvaCount);
+        }
+#endif
+
+        return;
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        // printf("After inlining lvaCount=%d.\n", lvaCount);
+    }
+#endif
+}
+
+/*****************************************************************************
+ *
+ * Performs checks to see if this tail call can be optimized as epilog+jmp.
+ */
+bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
+{
+#if FEATURE_FASTTAILCALL
+    // Reached here means that return types of caller and callee are tail call compatible.
+    // In case of structs that can be returned in a register, compRetNativeType is set to the actual return type.
+    //
+    // In an implicit tail call case callSig may not be available but it is guaranteed to be available
+    // for explicit tail call cases.  The reason implicit tail case callSig may not be available is that
+    // a call node might be marked as an in-line candidate and could fail to be in-lined. In which case
+    // fgInline() will replace return value place holder with call node using gtCloneExpr() which is
+    // currently not copying/setting callSig.
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+    if (callee->IsTailPrefixedCall())
+    {
+        assert(impTailCallRetTypeCompatible(info.compRetNativeType, info.compMethodInfo->args.retTypeClass,
+                                            (var_types)callee->gtReturnType, callee->callSig->retTypeClass));
+    }
+#endif
+
+    // Note on vararg methods:
+    // If the caller is vararg method, we don't know the number of arguments passed by caller's caller.
+    // But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its
+    // fixed args. Therefore, we can allow a vararg method to fast tail call other methods as long as
+    // out-going area required for callee is bounded by caller's fixed argument space.
+    //
+    // Note that callee being a vararg method is not a problem since we can account the params being passed.
+
+    // Count of caller args including implicit and hidden (i.e. thisPtr, RetBuf, GenericContext, VarargCookie)
+    unsigned nCallerArgs = info.compArgsCount;
+
+    // Count the callee args including implicit and hidden.
+    // Note that GenericContext and VarargCookie are added by importer while
+    // importing the call to gtCallArgs list along with explicit user args.
+    unsigned nCalleeArgs = 0;
+    if (callee->gtCallObjp) // thisPtr
+    {
+        nCalleeArgs++;
+    }
+
+    if (callee->HasRetBufArg()) // RetBuf
+    {
+        nCalleeArgs++;
+
+        // If callee has RetBuf param, caller too must have it.
+        // Otherwise go the slow route.
+        if (info.compRetBuffArg == BAD_VAR_NUM)
+        {
+            return false;
+        }
+    }
+
+    // Count user args while tracking whether any of them is a multi-byte params
+    // that cannot be passed in a register. Note that we don't need to count
+    // non-standard and secret params passed in registers (e.g. R10, R11) since
+    // these won't contribute to out-going arg size.
+    bool hasMultiByteArgs = false;
+    for (GenTreePtr args = callee->gtCallArgs; (args != nullptr) && !hasMultiByteArgs; args = args->gtOp.gtOp2)
+    {
+        nCalleeArgs++;
+
+        assert(args->IsList());
+        GenTreePtr argx = args->gtOp.gtOp1;
+
+        if (varTypeIsStruct(argx))
+        {
+            // Actual arg may be a child of a GT_COMMA. Skip over comma opers.
+            while (argx->gtOper == GT_COMMA)
+            {
+                argx = argx->gtOp.gtOp2;
+            }
+
+            // Get the size of the struct and see if it is register passable.
+            CORINFO_CLASS_HANDLE objClass = nullptr;
+
+            if (argx->OperGet() == GT_OBJ)
+            {
+                objClass = argx->AsObj()->gtClass;
+            }
+            else if (argx->IsLocal())
+            {
+                objClass = lvaTable[argx->AsLclVarCommon()->gtLclNum].lvVerTypeInfo.GetClassHandle();
+            }
+            if (objClass != nullptr)
+            {
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+
+                unsigned typeSize = 0;
+                hasMultiByteArgs  = !VarTypeIsMultiByteAndCanEnreg(argx->TypeGet(), objClass, &typeSize, false);
+
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) || defined(_TARGET_ARM64_)
+                // On System V/arm64 the args could be a 2 eightbyte struct that is passed in two registers.
+                // Account for the second eightbyte in the nCalleeArgs.
+                // https://github.com/dotnet/coreclr/issues/2666
+                // TODO-CQ-Amd64-Unix/arm64:  Structs of size between 9 to 16 bytes are conservatively estimated
+                //                            as two args, since they need two registers whereas nCallerArgs is
+                //                            counting such an arg as one. This would mean we will not be optimizing
+                //                            certain calls though technically possible.
+
+                if (typeSize > TARGET_POINTER_SIZE)
+                {
+                    unsigned extraArgRegsToAdd = (typeSize / TARGET_POINTER_SIZE);
+                    nCalleeArgs += extraArgRegsToAdd;
+                }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING || _TARGET_ARM64_
+
+#else
+                assert(!"Target platform ABI rules regarding passing struct type args in registers");
+                unreached();
+#endif //_TARGET_AMD64_ || _TARGET_ARM64_
+            }
+            else
+            {
+                hasMultiByteArgs = true;
+            }
+        }
+    }
+
+    // Go the slow route, if it has multi-byte params
+    if (hasMultiByteArgs)
+    {
+        return false;
+    }
+
+    // If we reached here means that callee has only those argument types which can be passed in
+    // a register and if passed on stack will occupy exactly one stack slot in out-going arg area.
+    // If we are passing args on stack for callee and it has more args passed on stack than
+    // caller, then fast tail call cannot be performed.
+    //
+    // Note that the GC'ness of on stack args need not match since the arg setup area is marked
+    // as non-interruptible for fast tail calls.
+    if ((nCalleeArgs > MAX_REG_ARG) && (nCallerArgs < nCalleeArgs))
+    {
+        return false;
+    }
+
+    return true;
+#else
+    return false;
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Transform the given GT_CALL tree for tail call code generation.
+ */
+void Compiler::fgMorphTailCall(GenTreeCall* call)
+{
+    JITDUMP("fgMorphTailCall (before):\n");
+    DISPTREE(call);
+
+#if defined(_TARGET_ARM_)
+    // For the helper-assisted tail calls, we need to push all the arguments
+    // into a single list, and then add a few extra at the beginning
+
+    // Check for PInvoke call types that we don't handle in codegen yet.
+    assert(!call->IsUnmanaged());
+    assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == NULL));
+
+    // First move the this pointer (if any) onto the regular arg list
+    GenTreePtr thisPtr = NULL;
+    if (call->gtCallObjp)
+    {
+        GenTreePtr objp  = call->gtCallObjp;
+        call->gtCallObjp = NULL;
+
+        if ((call->gtFlags & GTF_CALL_NULLCHECK) || call->IsVirtualVtable())
+        {
+            thisPtr      = gtClone(objp, true);
+            var_types vt = objp->TypeGet();
+            if (thisPtr == NULL)
+            {
+                // Too complex, so use a temp
+                unsigned   lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
+                GenTreePtr asg    = gtNewTempAssign(lclNum, objp);
+                if (!call->IsVirtualVtable())
+                {
+                    // Add an indirection to get the nullcheck
+                    GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
+                    GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, tmp);
+                    asg            = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind);
+                }
+                objp    = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt));
+                thisPtr = gtNewLclvNode(lclNum, vt);
+            }
+            else if (!call->IsVirtualVtable())
+            {
+                GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr);
+                objp           = gtNewOperNode(GT_COMMA, vt, ind, objp);
+                thisPtr        = gtClone(thisPtr, true);
+            }
+
+            call->gtFlags &= ~GTF_CALL_NULLCHECK;
+        }
+
+        call->gtCallArgs = gtNewListNode(objp, call->gtCallArgs);
+    }
+
+    // Add the extra VSD parameter if needed
+    CorInfoHelperTailCallSpecialHandling flags = CorInfoHelperTailCallSpecialHandling(0);
+    if (call->IsVirtualStub())
+    {
+        flags = CORINFO_TAILCALL_STUB_DISPATCH_ARG;
+
+        GenTreePtr arg;
+        if (call->gtCallType == CT_INDIRECT)
+        {
+            arg = gtClone(call->gtCallAddr, true);
+            noway_assert(arg != NULL);
+        }
+        else
+        {
+            noway_assert(call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT);
+            ssize_t addr = ssize_t(call->gtStubCallStubAddr);
+            arg          = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
+
+            // Change the call type, so we can add the extra indirection here, rather than in codegen
+            call->gtCallAddr         = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
+            call->gtStubCallStubAddr = NULL;
+            call->gtCallType         = CT_INDIRECT;
+        }
+        // Add the extra indirection to generate the real target
+        call->gtCallAddr = gtNewOperNode(GT_IND, TYP_I_IMPL, call->gtCallAddr);
+        call->gtFlags |= GTF_EXCEPT;
+
+        // And push the stub address onto the list of arguments
+        call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
+    }
+    else if (call->IsVirtualVtable())
+    {
+        // TODO-ARM-NYI: for x64 handle CORINFO_TAILCALL_THIS_IN_SECRET_REGISTER
+
+        noway_assert(thisPtr != NULL);
+
+        GenTreePtr add  = gtNewOperNode(GT_ADD, TYP_I_IMPL, thisPtr, gtNewIconNode(VPTR_OFFS, TYP_I_IMPL));
+        GenTreePtr vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
+        vtbl->gtFlags |= GTF_EXCEPT;
+
+        unsigned vtabOffsOfIndirection;
+        unsigned vtabOffsAfterIndirection;
+        info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection, &vtabOffsAfterIndirection);
+
+        /* Get the appropriate vtable chunk */
+
+        add  = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsOfIndirection, TYP_I_IMPL));
+        vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
+
+        /* Now the appropriate vtable slot */
+
+        add  = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtbl, gtNewIconNode(vtabOffsAfterIndirection, TYP_I_IMPL));
+        vtbl = gtNewOperNode(GT_IND, TYP_I_IMPL, add);
+
+        // Switch this to a plain indirect call
+        call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK;
+        assert(!call->IsVirtual());
+        call->gtCallType = CT_INDIRECT;
+
+        call->gtCallAddr   = vtbl;
+        call->gtCallCookie = NULL;
+        call->gtFlags |= GTF_EXCEPT;
+    }
+
+    // Now inject a placeholder for the real call target that codegen
+    // will generate
+    GenTreePtr arg = new (this, GT_NOP) GenTreeOp(GT_NOP, TYP_I_IMPL);
+    codeGen->genMarkTreeInReg(arg, REG_TAILCALL_ADDR);
+    call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
+
+    // Lastly inject the pointer for the copy routine
+    noway_assert(call->callSig != NULL);
+    void* pfnCopyArgs = info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, flags);
+    arg               = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR);
+    call->gtCallArgs  = gtNewListNode(arg, call->gtCallArgs);
+
+    // It is now a varargs tail call
+    call->gtCallMoreFlags = GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL;
+    call->gtFlags &= ~GTF_CALL_POP_ARGS;
+
+#elif defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
+
+    // x86 classic codegen doesn't require any morphing
+
+    // For the helper-assisted tail calls, we need to push all the arguments
+    // into a single list, and then add a few extra at the beginning or end.
+    //
+    // For AMD64, the tailcall helper (JIT_TailCall) is defined as:
+    //
+    //      JIT_TailCall(void* copyRoutine, void* callTarget, <function args>)
+    //
+    // We need to add "copyRoutine" and "callTarget" extra params at the beginning.
+    // But callTarget is determined by the Lower phase. Therefore, we add a placeholder arg
+    // for callTarget here which will be replaced later with callTarget in tail call lowering.
+    //
+    // For x86, the tailcall helper is defined as:
+    //
+    //      JIT_TailCall(<function args>, int numberOfOldStackArgsWords, int numberOfNewStackArgsWords, int flags, void*
+    //      callTarget)
+    //
+    // Note that the special arguments are on the stack, whereas the function arguments follow
+    // the normal convention: there might be register arguments in ECX and EDX. The stack will
+    // look like (highest address at the top):
+    //      first normal stack argument
+    //      ...
+    //      last normal stack argument
+    //      numberOfOldStackArgs
+    //      numberOfNewStackArgs
+    //      flags
+    //      callTarget
+    //
+    // Each special arg is 4 bytes.
+    //
+    // 'flags' is a bitmask where:
+    //      1 == restore callee-save registers (EDI,ESI,EBX). The JIT always saves all
+    //          callee-saved registers for tailcall functions. Note that the helper assumes
+    //          that the callee-saved registers live immediately below EBP, and must have been
+    //          pushed in this order: EDI, ESI, EBX.
+    //      2 == call target is a virtual stub dispatch.
+    //
+    // The x86 tail call helper lives in VM\i386\jithelp.asm. See that function for more details
+    // on the custom calling convention.
+
+    // Check for PInvoke call types that we don't handle in codegen yet.
+    assert(!call->IsUnmanaged());
+    assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == nullptr));
+
+    // Don't support tail calling helper methods
+    assert(call->gtCallType != CT_HELPER);
+
+    // We come this route only for tail prefixed calls that cannot be dispatched as
+    // fast tail calls
+    assert(!call->IsImplicitTailCall());
+    assert(!fgCanFastTailCall(call));
+
+    // First move the 'this' pointer (if any) onto the regular arg list. We do this because
+    // we are going to prepend special arguments onto the argument list (for non-x86 platforms),
+    // and thus shift where the 'this' pointer will be passed to a later argument slot. In
+    // addition, for all platforms, we are going to change the call into a helper call. Our code
+    // generation code for handling calls to helpers does not handle 'this' pointers. So, when we
+    // do this transformation, we must explicitly create a null 'this' pointer check, if required,
+    // since special 'this' pointer handling will no longer kick in.
+    //
+    // Some call types, such as virtual vtable calls, require creating a call address expression
+    // that involves the "this" pointer. Lowering will sometimes create an embedded statement
+    // to create a temporary that is assigned to the "this" pointer expression, and then use
+    // that temp to create the call address expression. This temp creation embedded statement
+    // will occur immediately before the "this" pointer argument, and then will be used for both
+    // the "this" pointer argument as well as the call address expression. In the normal ordering,
+    // the embedded statement establishing the "this" pointer temp will execute before both uses
+    // of the temp. However, for tail calls via a helper, we move the "this" pointer onto the
+    // normal call argument list, and insert a placeholder which will hold the call address
+    // expression. For non-x86, things are ok, because the order of execution of these is not
+    // altered. However, for x86, the call address expression is inserted as the *last* argument
+    // in the argument list, *after* the "this" pointer. It will be put on the stack, and be
+    // evaluated first. To ensure we don't end up with out-of-order temp definition and use,
+    // for those cases where call lowering creates an embedded form temp of "this", we will
+    // create a temp here, early, that will later get morphed correctly.
+
+    if (call->gtCallObjp)
+    {
+        GenTreePtr thisPtr = nullptr;
+        GenTreePtr objp    = call->gtCallObjp;
+        call->gtCallObjp   = nullptr;
+
+#ifdef _TARGET_X86_
+        if ((call->IsDelegateInvoke() || call->IsVirtualVtable()) && !objp->IsLocal())
+        {
+            // tmp = "this"
+            unsigned   lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
+            GenTreePtr asg    = gtNewTempAssign(lclNum, objp);
+
+            // COMMA(tmp = "this", tmp)
+            var_types  vt  = objp->TypeGet();
+            GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
+            thisPtr        = gtNewOperNode(GT_COMMA, vt, asg, tmp);
+
+            objp = thisPtr;
+        }
+#endif // _TARGET_X86_
+
+        if (call->NeedsNullCheck())
+        {
+            // clone "this" if "this" has no side effects.
+            if ((thisPtr == nullptr) && !(objp->gtFlags & GTF_SIDE_EFFECT))
+            {
+                thisPtr = gtClone(objp, true);
+            }
+
+            var_types vt = objp->TypeGet();
+            if (thisPtr == nullptr)
+            {
+                // create a temp if either "this" has side effects or "this" is too complex to clone.
+
+                // tmp = "this"
+                unsigned   lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
+                GenTreePtr asg    = gtNewTempAssign(lclNum, objp);
+
+                // COMMA(tmp = "this", deref(tmp))
+                GenTreePtr tmp = gtNewLclvNode(lclNum, vt);
+                GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, tmp);
+                asg            = gtNewOperNode(GT_COMMA, TYP_VOID, asg, ind);
+
+                // COMMA(COMMA(tmp = "this", deref(tmp)), tmp)
+                thisPtr = gtNewOperNode(GT_COMMA, vt, asg, gtNewLclvNode(lclNum, vt));
+            }
+            else
+            {
+                // thisPtr = COMMA(deref("this"), "this")
+                GenTreePtr ind = gtNewOperNode(GT_IND, TYP_INT, thisPtr);
+                thisPtr        = gtNewOperNode(GT_COMMA, vt, ind, gtClone(objp, true));
+            }
+
+            call->gtFlags &= ~GTF_CALL_NULLCHECK;
+        }
+        else
+        {
+            thisPtr = objp;
+        }
+
+        // During rationalization tmp="this" and null check will
+        // materialize as embedded stmts in right execution order.
+        assert(thisPtr != nullptr);
+        call->gtCallArgs = gtNewListNode(thisPtr, call->gtCallArgs);
+    }
+
+#if defined(_TARGET_AMD64_)
+
+    // Add the extra VSD parameter to arg list in case of VSD calls.
+    // Tail call arg copying thunk will move this extra VSD parameter
+    // to R11 before tail calling VSD stub. See CreateTailCallCopyArgsThunk()
+    // in Stublinkerx86.cpp for more details.
+    CorInfoHelperTailCallSpecialHandling flags = CorInfoHelperTailCallSpecialHandling(0);
+    if (call->IsVirtualStub())
+    {
+        GenTreePtr stubAddrArg;
+
+        flags = CORINFO_TAILCALL_STUB_DISPATCH_ARG;
+
+        if (call->gtCallType == CT_INDIRECT)
+        {
+            stubAddrArg = gtClone(call->gtCallAddr, true);
+            noway_assert(stubAddrArg != nullptr);
+        }
+        else
+        {
+            noway_assert((call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT) != 0);
+
+            ssize_t addr = ssize_t(call->gtStubCallStubAddr);
+            stubAddrArg  = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
+        }
+
+        // Push the stub address onto the list of arguments
+        call->gtCallArgs = gtNewListNode(stubAddrArg, call->gtCallArgs);
+    }
+
+    // Now inject a placeholder for the real call target that Lower phase will generate.
+    GenTreePtr arg   = gtNewIconNode(0, TYP_I_IMPL);
+    call->gtCallArgs = gtNewListNode(arg, call->gtCallArgs);
+
+    // Inject the pointer for the copy routine to be used for struct copying
+    noway_assert(call->callSig != nullptr);
+    void* pfnCopyArgs = info.compCompHnd->getTailCallCopyArgsThunk(call->callSig, flags);
+    arg               = gtNewIconHandleNode(ssize_t(pfnCopyArgs), GTF_ICON_FTN_ADDR);
+    call->gtCallArgs  = gtNewListNode(arg, call->gtCallArgs);
+
+#else // !_TARGET_AMD64_
+
+    // Find the end of the argument list. ppArg will point at the last pointer; setting *ppArg will
+    // append to the list.
+    GenTreeArgList** ppArg = &call->gtCallArgs;
+    for (GenTreeArgList* args = call->gtCallArgs; args != nullptr; args = args->Rest())
+    {
+        ppArg = (GenTreeArgList**)&args->gtOp2;
+    }
+    assert(ppArg != nullptr);
+    assert(*ppArg == nullptr);
+
+    unsigned nOldStkArgsWords =
+        (compArgSize - (codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES)) / REGSIZE_BYTES;
+    GenTree* arg3 = gtNewIconNode((ssize_t)nOldStkArgsWords, TYP_I_IMPL);
+    *ppArg        = gtNewListNode(arg3, nullptr); // numberOfOldStackArgs
+    ppArg         = (GenTreeArgList**)&((*ppArg)->gtOp2);
+
+    // Inject a placeholder for the count of outgoing stack arguments that the Lowering phase will generate.
+    // The constant will be replaced.
+    GenTree* arg2 = gtNewIconNode(9, TYP_I_IMPL);
+    *ppArg        = gtNewListNode(arg2, nullptr); // numberOfNewStackArgs
+    ppArg         = (GenTreeArgList**)&((*ppArg)->gtOp2);
+
+    // Inject a placeholder for the flags.
+    // The constant will be replaced.
+    GenTree* arg1 = gtNewIconNode(8, TYP_I_IMPL);
+    *ppArg        = gtNewListNode(arg1, nullptr);
+    ppArg         = (GenTreeArgList**)&((*ppArg)->gtOp2);
+
+    // Inject a placeholder for the real call target that the Lowering phase will generate.
+    // The constant will be replaced.
+    GenTree* arg0 = gtNewIconNode(7, TYP_I_IMPL);
+    *ppArg        = gtNewListNode(arg0, nullptr);
+
+#endif // !_TARGET_AMD64_
+
+    // It is now a varargs tail call dispatched via helper.
+    call->gtCallMoreFlags |= GTF_CALL_M_VARARGS | GTF_CALL_M_TAILCALL | GTF_CALL_M_TAILCALL_VIA_HELPER;
+    call->gtFlags &= ~GTF_CALL_POP_ARGS;
+
+#endif // _TARGET_*
+
+    JITDUMP("fgMorphTailCall (after):\n");
+    DISPTREE(call);
+}
+
+//------------------------------------------------------------------------------
+// fgMorphRecursiveFastTailCallIntoLoop : Transform a recursive fast tail call into a loop.
+//
+//
+// Arguments:
+//    block  - basic block ending with a recursive fast tail call
+//    recursiveTailCall - recursive tail call to transform
+//
+// Notes:
+//    The legality of the transformation is ensured by the checks in endsWithTailCallConvertibleToLoop.
+
+void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCall* recursiveTailCall)
+{
+    assert(recursiveTailCall->IsTailCallConvertibleToLoop());
+    GenTreePtr last = block->lastStmt();
+    assert(recursiveTailCall == last->gtStmt.gtStmtExpr);
+
+    // Transform recursive tail call into a loop.
+
+    GenTreePtr earlyArgInsertionPoint = last;
+    IL_OFFSETX callILOffset           = last->gtStmt.gtStmtILoffsx;
+
+    // Hoist arg setup statement for the 'this' argument.
+    GenTreePtr thisArg = recursiveTailCall->gtCallObjp;
+    if (thisArg && !thisArg->IsNothingNode() && !thisArg->IsArgPlaceHolderNode())
+    {
+        GenTreePtr thisArgStmt = gtNewStmt(thisArg, callILOffset);
+        fgInsertStmtBefore(block, earlyArgInsertionPoint, thisArgStmt);
+    }
+
+    // All arguments whose trees may involve caller parameter local variables need to be assigned to temps first;
+    // then the temps need to be assigned to the method parameters. This is done so that the caller
+    // parameters are not re-assigned before call arguments depending on them  are evaluated.
+    // tmpAssignmentInsertionPoint and paramAssignmentInsertionPoint keep track of
+    // where the next temp or parameter assignment should be inserted.
+
+    // In the example below the first call argument (arg1 - 1) needs to be assigned to a temp first
+    // while the second call argument (const 1) doesn't.
+    // Basic block before tail recursion elimination:
+    //  ***** BB04, stmt 1 (top level)
+    //  [000037] ------------             *  stmtExpr  void  (top level) (IL 0x00A...0x013)
+    //  [000033] --C - G------ - \--*  call      void   RecursiveMethod
+    //  [000030] ------------ | / --*  const     int - 1
+    //  [000031] ------------arg0 in rcx + --*  +int
+    //  [000029] ------------ | \--*  lclVar    int    V00 arg1
+    //  [000032] ------------arg1 in rdx    \--*  const     int    1
+    //
+    //
+    //  Basic block after tail recursion elimination :
+    //  ***** BB04, stmt 1 (top level)
+    //  [000051] ------------             *  stmtExpr  void  (top level) (IL 0x00A... ? ? ? )
+    //  [000030] ------------ | / --*  const     int - 1
+    //  [000031] ------------ | / --*  +int
+    //  [000029] ------------ | | \--*  lclVar    int    V00 arg1
+    //  [000050] - A----------             \--* = int
+    //  [000049] D------N----                \--*  lclVar    int    V02 tmp0
+    //
+    //  ***** BB04, stmt 2 (top level)
+    //  [000055] ------------             *  stmtExpr  void  (top level) (IL 0x00A... ? ? ? )
+    //  [000052] ------------ | / --*  lclVar    int    V02 tmp0
+    //  [000054] - A----------             \--* = int
+    //  [000053] D------N----                \--*  lclVar    int    V00 arg0
+
+    //  ***** BB04, stmt 3 (top level)
+    //  [000058] ------------             *  stmtExpr  void  (top level) (IL 0x00A... ? ? ? )
+    //  [000032] ------------ | / --*  const     int    1
+    //  [000057] - A----------             \--* = int
+    //  [000056] D------N----                \--*  lclVar    int    V01 arg1
+
+    GenTreePtr tmpAssignmentInsertionPoint   = last;
+    GenTreePtr paramAssignmentInsertionPoint = last;
+
+    // Process early args. They may contain both setup statements for late args and actual args.
+    // Early args don't include 'this' arg. We need to account for that so that the call to gtArgEntryByArgNum
+    // below has the correct second argument.
+    int earlyArgIndex = (thisArg == nullptr) ? 0 : 1;
+    for (GenTreeArgList* earlyArgs = recursiveTailCall->gtCallArgs; earlyArgs != nullptr;
+         (earlyArgIndex++, earlyArgs = earlyArgs->Rest()))
+    {
+        GenTreePtr earlyArg = earlyArgs->Current();
+        if (!earlyArg->IsNothingNode() && !earlyArg->IsArgPlaceHolderNode())
+        {
+            if ((earlyArg->gtFlags & GTF_LATE_ARG) != 0)
+            {
+                // This is a setup node so we need to hoist it.
+                GenTreePtr earlyArgStmt = gtNewStmt(earlyArg, callILOffset);
+                fgInsertStmtBefore(block, earlyArgInsertionPoint, earlyArgStmt);
+            }
+            else
+            {
+                // This is an actual argument that needs to be assigned to the corresponding caller parameter.
+                fgArgTabEntryPtr curArgTabEntry = gtArgEntryByArgNum(recursiveTailCall, earlyArgIndex);
+                GenTreePtr       paramAssignStmt =
+                    fgAssignRecursiveCallArgToCallerParam(earlyArg, curArgTabEntry, block, callILOffset,
+                                                          tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint);
+                if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr))
+                {
+                    // All temp assignments will happen before the first param assignment.
+                    tmpAssignmentInsertionPoint = paramAssignStmt;
+                }
+            }
+        }
+    }
+
+    // Process late args.
+    int lateArgIndex = 0;
+    for (GenTreeArgList* lateArgs = recursiveTailCall->gtCallLateArgs; lateArgs != nullptr;
+         (lateArgIndex++, lateArgs = lateArgs->Rest()))
+    {
+        // A late argument is an actual argument that needs to be assigned to the corresponding caller's parameter.
+        GenTreePtr       lateArg        = lateArgs->Current();
+        fgArgTabEntryPtr curArgTabEntry = gtArgEntryByLateArgIndex(recursiveTailCall, lateArgIndex);
+        GenTreePtr       paramAssignStmt =
+            fgAssignRecursiveCallArgToCallerParam(lateArg, curArgTabEntry, block, callILOffset,
+                                                  tmpAssignmentInsertionPoint, paramAssignmentInsertionPoint);
+
+        if ((tmpAssignmentInsertionPoint == last) && (paramAssignStmt != nullptr))
+        {
+            // All temp assignments will happen before the first param assignment.
+            tmpAssignmentInsertionPoint = paramAssignStmt;
+        }
+    }
+
+    // If the method has starg.s 0 or ldarga.s 0 a special local (lvaArg0Var) is created so that
+    // compThisArg stays immutable. Normally it's assigned in fgFirstBBScratch block. Since that
+    // block won't be in the loop (it's assumed to have no predecessors), we need to update the special local here.
+    if (!info.compIsStatic && (lvaArg0Var != info.compThisArg))
+    {
+        var_types  thisType           = lvaTable[info.compThisArg].TypeGet();
+        GenTreePtr arg0               = gtNewLclvNode(lvaArg0Var, thisType);
+        GenTreePtr arg0Assignment     = gtNewAssignNode(arg0, gtNewLclvNode(info.compThisArg, thisType));
+        GenTreePtr arg0AssignmentStmt = gtNewStmt(arg0Assignment, callILOffset);
+        fgInsertStmtBefore(block, paramAssignmentInsertionPoint, arg0AssignmentStmt);
+    }
+
+    // Remove the call
+    fgRemoveStmt(block, last);
+
+    // Set the loop edge.
+    block->bbJumpKind = BBJ_ALWAYS;
+    block->bbJumpDest = fgFirstBBisScratch() ? fgFirstBB->bbNext : fgFirstBB;
+    fgAddRefPred(block->bbJumpDest, block);
+    block->bbFlags &= ~BBF_HAS_JMP;
+}
+
+//------------------------------------------------------------------------------
+// fgAssignRecursiveCallArgToCallerParam : Assign argument to a recursive call to the corresponding caller parameter.
+//
+//
+// Arguments:
+//    arg  -  argument to assign
+//    argTabEntry  -  argument table entry corresponding to arg
+//    block  --- basic block the call is in
+//    callILOffset  -  IL offset of the call
+//    tmpAssignmentInsertionPoint  -  tree before which temp assignment should be inserted (if necessary)
+//    paramAssignmentInsertionPoint  -  tree before which parameter assignment should be inserted
+//
+// Return Value:
+//    parameter assignment statement if one was inserted; nullptr otherwise.
+
+GenTreePtr Compiler::fgAssignRecursiveCallArgToCallerParam(GenTreePtr       arg,
+                                                           fgArgTabEntryPtr argTabEntry,
+                                                           BasicBlock*      block,
+                                                           IL_OFFSETX       callILOffset,
+                                                           GenTreePtr       tmpAssignmentInsertionPoint,
+                                                           GenTreePtr       paramAssignmentInsertionPoint)
+{
+    // Call arguments should be assigned to temps first and then the temps should be assigned to parameters because
+    // some argument trees may reference parameters directly.
+
+    GenTreePtr argInTemp             = nullptr;
+    unsigned   originalArgNum        = argTabEntry->argNum;
+    bool       needToAssignParameter = true;
+
+    // TODO-CQ: enable calls with struct arguments passed in registers.
+    noway_assert(!varTypeIsStruct(arg->TypeGet()));
+
+    if ((argTabEntry->isTmp) || arg->IsCnsIntOrI() || arg->IsCnsFltOrDbl())
+    {
+        // The argument is already assigned to a temp or is a const.
+        argInTemp = arg;
+    }
+    else if (arg->OperGet() == GT_LCL_VAR)
+    {
+        unsigned   lclNum = arg->AsLclVar()->gtLclNum;
+        LclVarDsc* varDsc = &lvaTable[lclNum];
+        if (!varDsc->lvIsParam)
+        {
+            // The argument is a non-parameter local so it doesn't need to be assigned to a temp.
+            argInTemp = arg;
+        }
+        else if (lclNum == originalArgNum)
+        {
+            // The argument is the same parameter local that we were about to assign so
+            // we can skip the assignment.
+            needToAssignParameter = false;
+        }
+    }
+
+    // TODO: We don't need temp assignments if we can prove that the argument tree doesn't involve
+    // any caller parameters. Some common cases are handled above but we may be able to eliminate
+    // more temp assignments.
+
+    GenTreePtr paramAssignStmt = nullptr;
+    if (needToAssignParameter)
+    {
+        if (argInTemp == nullptr)
+        {
+            // The argument is not assigned to a temp. We need to create a new temp and insert an assignment.
+            // TODO: we can avoid a temp assignment if we can prove that the argument tree
+            // doesn't involve any caller parameters.
+            unsigned   tmpNum        = lvaGrabTemp(true DEBUGARG("arg temp"));
+            GenTreePtr tempSrc       = arg;
+            GenTreePtr tempDest      = gtNewLclvNode(tmpNum, tempSrc->gtType);
+            GenTreePtr tmpAssignNode = gtNewAssignNode(tempDest, tempSrc);
+            GenTreePtr tmpAssignStmt = gtNewStmt(tmpAssignNode, callILOffset);
+            fgInsertStmtBefore(block, tmpAssignmentInsertionPoint, tmpAssignStmt);
+            argInTemp = gtNewLclvNode(tmpNum, tempSrc->gtType);
+        }
+
+        // Now assign the temp to the parameter.
+        LclVarDsc* paramDsc = lvaTable + originalArgNum;
+        assert(paramDsc->lvIsParam);
+        GenTreePtr paramDest       = gtNewLclvNode(originalArgNum, paramDsc->lvType);
+        GenTreePtr paramAssignNode = gtNewAssignNode(paramDest, argInTemp);
+        paramAssignStmt            = gtNewStmt(paramAssignNode, callILOffset);
+
+        fgInsertStmtBefore(block, paramAssignmentInsertionPoint, paramAssignStmt);
+    }
+    return paramAssignStmt;
+}
+
+/*****************************************************************************
+ *
+ *  Transform the given GT_CALL tree for code generation.
+ */
+
+GenTreePtr Compiler::fgMorphCall(GenTreeCall* call)
+{
+    if (call->CanTailCall())
+    {
+        // It should either be an explicit (i.e. tail prefixed) or an implicit tail call
+        assert(call->IsTailPrefixedCall() ^ call->IsImplicitTailCall());
+
+        // It cannot be an inline candidate
+        assert(!call->IsInlineCandidate());
+
+        const char* szFailReason   = nullptr;
+        bool        hasStructParam = false;
+        if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
+        {
+            szFailReason = "Might turn into an intrinsic";
+        }
+
+        if (opts.compNeedSecurityCheck)
+        {
+            szFailReason = "Needs security check";
+        }
+        else if (compLocallocUsed)
+        {
+            szFailReason = "Localloc used";
+        }
+#ifdef _TARGET_AMD64_
+        // Needed for Jit64 compat.
+        // In future, enabling tail calls from methods that need GS cookie check
+        // would require codegen side work to emit GS cookie check before a tail
+        // call.
+        else if (getNeedsGSSecurityCookie())
+        {
+            szFailReason = "GS Security cookie check";
+        }
+#endif
+#ifdef DEBUG
+        // DDB 99324: Just disable tailcall under compGcChecks stress mode.
+        else if (opts.compGcChecks)
+        {
+            szFailReason = "GcChecks";
+        }
+#endif
+#if FEATURE_TAILCALL_OPT
+        else
+        {
+            // We are still not sure whether it can be a tail call. Because, when converting
+            // a call to an implicit tail call, we must check that there are no locals with
+            // their address taken.  If this is the case, we have to assume that the address
+            // has been leaked and the current stack frame must live until after the final
+            // call.
+
+            // Verify that none of vars has lvHasLdAddrOp or lvAddrExposed bit set. Note
+            // that lvHasLdAddrOp is much more conservative.  We cannot just base it on
+            // lvAddrExposed alone since it is not guaranteed to be set on all VarDscs
+            // during morph stage. The reason for also checking lvAddrExposed is that in case
+            // of vararg methods user args are marked as addr exposed but not lvHasLdAddrOp.
+            // The combination of lvHasLdAddrOp and lvAddrExposed though conservative allows us
+            // never to be incorrect.
+            //
+            // TODO-Throughput: have a compiler level flag to indicate whether method has vars whose
+            // address is taken. Such a flag could be set whenever lvHasLdAddrOp or LvAddrExposed
+            // is set. This avoids the need for iterating through all lcl vars of the current
+            // method.  Right now throughout the code base we are not consistently using 'set'
+            // method to set lvHasLdAddrOp and lvAddrExposed flags.
+            unsigned   varNum;
+            LclVarDsc* varDsc;
+            bool       hasAddrExposedVars     = false;
+            bool       hasStructPromotedParam = false;
+            bool       hasPinnedVars          = false;
+
+            for (varNum = 0, varDsc = lvaTable; varNum < lvaCount; varNum++, varDsc++)
+            {
+                // If the method is marked as an explicit tail call we will skip the
+                // following three hazard checks.
+                // We still must check for any struct parameters and set 'hasStructParam'
+                // so that we won't transform the recursive tail call into a loop.
+                //
+                if (call->IsImplicitTailCall())
+                {
+                    if (varDsc->lvHasLdAddrOp || varDsc->lvAddrExposed)
+                    {
+                        hasAddrExposedVars = true;
+                        break;
+                    }
+                    if (varDsc->lvPromoted && varDsc->lvIsParam)
+                    {
+                        hasStructPromotedParam = true;
+                        break;
+                    }
+                    if (varDsc->lvPinned)
+                    {
+                        // A tail call removes the method from the stack, which means the pinning
+                        // goes away for the callee.  We can't allow that.
+                        hasPinnedVars = true;
+                        break;
+                    }
+                }
+                if (varTypeIsStruct(varDsc->TypeGet()) && varDsc->lvIsParam)
+                {
+                    hasStructParam = true;
+                    // This prevents transforming a recursive tail call into a loop
+                    // but doesn't prevent tail call optimization so we need to
+                    // look at the rest of parameters.
+                    continue;
+                }
+            }
+
+            if (hasAddrExposedVars)
+            {
+                szFailReason = "Local address taken";
+            }
+            if (hasStructPromotedParam)
+            {
+                szFailReason = "Has Struct Promoted Param";
+            }
+            if (hasPinnedVars)
+            {
+                szFailReason = "Has Pinned Vars";
+            }
+        }
+#endif // FEATURE_TAILCALL_OPT
+
+        if (varTypeIsStruct(call))
+        {
+            fgFixupStructReturn(call);
+        }
+
+        var_types callType = call->TypeGet();
+
+        // We have to ensure to pass the incoming retValBuf as the
+        // outgoing one. Using a temp will not do as this function will
+        // not regain control to do the copy.
+
+        if (info.compRetBuffArg != BAD_VAR_NUM)
+        {
+            noway_assert(callType == TYP_VOID);
+            GenTreePtr retValBuf = call->gtCallArgs->gtOp.gtOp1;
+            if (retValBuf->gtOper != GT_LCL_VAR || retValBuf->gtLclVarCommon.gtLclNum != info.compRetBuffArg)
+            {
+                szFailReason = "Need to copy return buffer";
+            }
+        }
+
+        // If this is an opportunistic tail call and cannot be dispatched as
+        // fast tail call, go the non-tail call route.  This is done for perf
+        // reason.
+        //
+        // Avoid the cost of determining whether can be dispatched as fast tail
+        // call if we already know that tail call cannot be honored for other
+        // reasons.
+        bool canFastTailCall = false;
+        if (szFailReason == nullptr)
+        {
+            canFastTailCall = fgCanFastTailCall(call);
+            if (!canFastTailCall)
+            {
+                // Implicit or opportunistic tail calls are always dispatched via fast tail call
+                // mechanism and never via tail call helper for perf.
+                if (call->IsImplicitTailCall())
+                {
+                    szFailReason = "Opportunistic tail call cannot be dispatched as epilog+jmp";
+                }
+#ifndef LEGACY_BACKEND
+                else if (!call->IsVirtualStub() && call->HasNonStandardAddedArgs(this))
+                {
+                    // If we are here, it means that the call is an explicitly ".tail" prefixed and cannot be
+                    // dispatched as a fast tail call.
+
+                    // Methods with non-standard args will have indirection cell or cookie param passed
+                    // in callee trash register (e.g. R11). Tail call helper doesn't preserve it before
+                    // tail calling the target method and hence ".tail" prefix on such calls needs to be
+                    // ignored.
+                    //
+                    // Exception to the above rule: although Virtual Stub Dispatch (VSD) calls require
+                    // extra stub param (e.g. in R11 on Amd64), they can still be called via tail call helper.
+                    // This is done by by adding stubAddr as an additional arg before the original list of
+                    // args. For more details see fgMorphTailCall() and CreateTailCallCopyArgsThunk()
+                    // in Stublinkerx86.cpp.
+                    szFailReason = "Method with non-standard args passed in callee trash register cannot be tail "
+                                   "called via helper";
+                }
+#ifdef _TARGET_ARM64_
+                else
+                {
+                    // NYI - TAILCALL_RECURSIVE/TAILCALL_HELPER.
+                    // So, bail out if we can't make fast tail call.
+                    szFailReason = "Non-qualified fast tail call";
+                }
+#endif
+#endif // LEGACY_BACKEND
+            }
+        }
+
+        // Clear these flags before calling fgMorphCall() to avoid recursion.
+        bool isTailPrefixed = call->IsTailPrefixedCall();
+        call->gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL;
+
+#if FEATURE_TAILCALL_OPT
+        call->gtCallMoreFlags &= ~GTF_CALL_M_IMPLICIT_TAILCALL;
+#endif
+
+#ifdef FEATURE_PAL
+        if (!canFastTailCall && szFailReason == nullptr)
+        {
+            szFailReason = "Non fast tail calls disabled for PAL based systems.";
+        }
+#endif // FEATURE_PAL
+
+        if (szFailReason != nullptr)
+        {
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("\nRejecting tail call late for call ");
+                printTreeID(call);
+                printf(": %s\n", szFailReason);
+            }
+#endif
+
+            // for non user funcs, we have no handles to report
+            info.compCompHnd->reportTailCallDecision(nullptr,
+                                                     (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
+                                                     isTailPrefixed, TAILCALL_FAIL, szFailReason);
+
+            goto NO_TAIL_CALL;
+        }
+
+#if !FEATURE_TAILCALL_OPT_SHARED_RETURN
+        // We enable shared-ret tail call optimization for recursive calls even if
+        // FEATURE_TAILCALL_OPT_SHARED_RETURN is not defined.
+        if (gtIsRecursiveCall(call))
+#endif
+        {
+            // Many tailcalls will have call and ret in the same block, and thus be BBJ_RETURN,
+            // but if the call falls through to a ret, and we are doing a tailcall, change it here.
+            if (compCurBB->bbJumpKind != BBJ_RETURN)
+            {
+                compCurBB->bbJumpKind = BBJ_RETURN;
+            }
+        }
+
+        // Set this flag before calling fgMorphCall() to prevent inlining this call.
+        call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL;
+
+        bool fastTailCallToLoop = false;
+#if FEATURE_TAILCALL_OPT
+        // TODO-CQ: enable the transformation when the method has a struct parameter that can be passed in a register
+        // or return type is a struct that can be passed in a register.
+        //
+        // TODO-CQ: if the method being compiled requires generic context reported in gc-info (either through
+        // hidden generic context param or through keep alive thisptr), then while transforming a recursive
+        // call to such a method requires that the generic context stored on stack slot be updated.  Right now,
+        // fgMorphRecursiveFastTailCallIntoLoop() is not handling update of generic context while transforming
+        // a recursive call into a loop.  Another option is to modify gtIsRecursiveCall() to check that the
+        // generic type parameters of both caller and callee generic method are the same.
+        if (opts.compTailCallLoopOpt && canFastTailCall && gtIsRecursiveCall(call) && !lvaReportParamTypeArg() &&
+            !lvaKeepAliveAndReportThis() && !call->IsVirtual() && !hasStructParam && !varTypeIsStruct(call->TypeGet()))
+        {
+            call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL_TO_LOOP;
+            fastTailCallToLoop = true;
+        }
+#endif
+
+        // Do some target-specific transformations (before we process the args, etc.)
+        // This is needed only for tail prefixed calls that cannot be dispatched as
+        // fast calls.
+        if (!canFastTailCall)
+        {
+            fgMorphTailCall(call);
+        }
+
+        // Implementation note : If we optimize tailcall to do a direct jump
+        // to the target function (after stomping on the return address, etc),
+        // without using CORINFO_HELP_TAILCALL, we have to make certain that
+        // we don't starve the hijacking logic (by stomping on the hijacked
+        // return address etc).
+
+        // At this point, we are committed to do the tailcall.
+        compTailCallUsed = true;
+
+        CorInfoTailCall tailCallResult;
+
+        if (fastTailCallToLoop)
+        {
+            tailCallResult = TAILCALL_RECURSIVE;
+        }
+        else if (canFastTailCall)
+        {
+            tailCallResult = TAILCALL_OPTIMIZED;
+        }
+        else
+        {
+            tailCallResult = TAILCALL_HELPER;
+        }
+
+        // for non user funcs, we have no handles to report
+        info.compCompHnd->reportTailCallDecision(nullptr,
+                                                 (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
+                                                 isTailPrefixed, tailCallResult, nullptr);
+
+        // As we will actually call CORINFO_HELP_TAILCALL, set the callTyp to TYP_VOID.
+        // to avoid doing any extra work for the return value.
+        call->gtType = TYP_VOID;
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\nGTF_CALL_M_TAILCALL bit set for call ");
+            printTreeID(call);
+            printf("\n");
+            if (fastTailCallToLoop)
+            {
+                printf("\nGTF_CALL_M_TAILCALL_TO_LOOP bit set for call ");
+                printTreeID(call);
+                printf("\n");
+            }
+        }
+#endif
+
+        GenTreePtr stmtExpr = fgMorphStmt->gtStmt.gtStmtExpr;
+
+#ifdef DEBUG
+        // Tail call needs to be in one of the following IR forms
+        //    Either a call stmt or
+        //    GT_RETURN(GT_CALL(..)) or
+        //    var = call
+        noway_assert((stmtExpr->gtOper == GT_CALL && stmtExpr == call) ||
+                     (stmtExpr->gtOper == GT_RETURN &&
+                      (stmtExpr->gtOp.gtOp1 == call || stmtExpr->gtOp.gtOp1->gtOp.gtOp1 == call)) ||
+                     (stmtExpr->gtOper == GT_ASG && stmtExpr->gtOp.gtOp2 == call));
+#endif
+
+        // For void calls, we would have created a GT_CALL in the stmt list.
+        // For non-void calls, we would have created a GT_RETURN(GT_CAST(GT_CALL)).
+        // For calls returning structs, we would have a void call, followed by a void return.
+        // For debuggable code, it would be an assignment of the call to a temp
+        // We want to get rid of any of this extra trees, and just leave
+        // the call.
+        GenTreePtr nextMorphStmt = fgMorphStmt->gtNext;
+
+#ifdef _TARGET_AMD64_
+        // Legacy Jit64 Compat:
+        // There could be any number of GT_NOPs between tail call and GT_RETURN.
+        // That is tail call pattern could be one of the following:
+        //  1) tail.call, nop*, ret
+        //  2) tail.call, nop*, pop, nop*, ret
+        //  3) var=tail.call, nop*, ret(var)
+        //  4) var=tail.call, nop*, pop, ret
+        //
+        // See impIsTailCallILPattern() for details on tail call IL patterns
+        // that are supported.
+        if ((stmtExpr->gtOper == GT_CALL) || (stmtExpr->gtOper == GT_ASG))
+        {
+            // First delete all GT_NOPs after the call
+            GenTreePtr morphStmtToRemove = nullptr;
+            while (nextMorphStmt != nullptr)
+            {
+                GenTreePtr nextStmtExpr = nextMorphStmt->gtStmt.gtStmtExpr;
+                if (!nextStmtExpr->IsNothingNode())
+                {
+                    break;
+                }
+
+                morphStmtToRemove = nextMorphStmt;
+                nextMorphStmt     = morphStmtToRemove->gtNext;
+                fgRemoveStmt(compCurBB, morphStmtToRemove);
+            }
+
+            // Check to see if there is a pop.
+            // Since tail call is honored, we can get rid of the stmt corresponding to pop.
+            if (nextMorphStmt != nullptr && nextMorphStmt->gtStmt.gtStmtExpr->gtOper != GT_RETURN)
+            {
+                // Note that pop opcode may or may not result in a new stmt (for details see
+                // impImportBlockCode()). Hence, it is not possible to assert about the IR
+                // form generated by pop but pop tree must be side-effect free so that we can
+                // delete it safely.
+                GenTreePtr popStmt = nextMorphStmt;
+                nextMorphStmt      = nextMorphStmt->gtNext;
+
+                noway_assert((popStmt->gtStmt.gtStmtExpr->gtFlags & GTF_ALL_EFFECT) == 0);
+                fgRemoveStmt(compCurBB, popStmt);
+            }
+
+            // Next delete any GT_NOP nodes after pop
+            while (nextMorphStmt != nullptr)
+            {
+                GenTreePtr nextStmtExpr = nextMorphStmt->gtStmt.gtStmtExpr;
+                if (!nextStmtExpr->IsNothingNode())
+                {
+                    break;
+                }
+
+                morphStmtToRemove = nextMorphStmt;
+                nextMorphStmt     = morphStmtToRemove->gtNext;
+                fgRemoveStmt(compCurBB, morphStmtToRemove);
+            }
+        }
+#endif // _TARGET_AMD64_
+
+        // Delete GT_RETURN  if any
+        if (nextMorphStmt != nullptr)
+        {
+            GenTreePtr retExpr = nextMorphStmt->gtStmt.gtStmtExpr;
+            noway_assert(retExpr->gtOper == GT_RETURN);
+
+            // If var=call, then the next stmt must be a GT_RETURN(TYP_VOID) or GT_RETURN(var).
+            // This can occur if impSpillStackEnsure() has introduced an assignment to a temp.
+            if (stmtExpr->gtOper == GT_ASG && info.compRetType != TYP_VOID)
+            {
+                noway_assert(stmtExpr->gtGetOp1()->OperIsLocal());
+                noway_assert(stmtExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum ==
+                             retExpr->gtGetOp1()->AsLclVarCommon()->gtLclNum);
+            }
+
+            fgRemoveStmt(compCurBB, nextMorphStmt);
+        }
+
+        fgMorphStmt->gtStmt.gtStmtExpr = call;
+
+        // Tail call via helper: The VM can't use return address hijacking if we're
+        // not going to return and the helper doesn't have enough info to safely poll,
+        // so we poll before the tail call, if the block isn't already safe.  Since
+        // tail call via helper is a slow mechanism it doen't matter whether we emit
+        // GC poll.  This is done to be in parity with Jit64. Also this avoids GC info
+        // size increase if all most all methods are expected to be tail calls (e.g. F#).
+        //
+        // Note that we can avoid emitting GC-poll if we know that the current BB is
+        // dominated by a Gc-SafePoint block.  But we don't have dominator info at this
+        // point.  One option is to just add a place holder node for GC-poll (e.g. GT_GCPOLL)
+        // here and remove it in lowering if the block is dominated by a GC-SafePoint.  For
+        // now it not clear whether optimizing slow tail calls is worth the effort.  As a
+        // low cost check, we check whether the first and current basic blocks are
+        // GC-SafePoints.
+        //
+        // Fast Tail call as epilog+jmp - No need to insert GC-poll. Instead, fgSetBlockOrder()
+        // is going to mark the method as fully interruptible if the block containing this tail
+        // call is reachable without executing any call.
+        if (canFastTailCall || (fgFirstBB->bbFlags & BBF_GC_SAFE_POINT) || (compCurBB->bbFlags & BBF_GC_SAFE_POINT) ||
+            !fgCreateGCPoll(GCPOLL_INLINE, compCurBB))
+        {
+            // We didn't insert a poll block, so we need to morph the call now
+            // (Normally it will get morphed when we get to the split poll block)
+            GenTreePtr temp = fgMorphCall(call);
+            noway_assert(temp == call);
+        }
+
+        // Tail call via helper: we just call CORINFO_HELP_TAILCALL, and it jumps to
+        // the target. So we don't need an epilog - just like CORINFO_HELP_THROW.
+        //
+        // Fast tail call: in case of fast tail calls, we need a jmp epilog and
+        // hence mark it as BBJ_RETURN with BBF_JMP flag set.
+        noway_assert(compCurBB->bbJumpKind == BBJ_RETURN);
+
+        if (canFastTailCall)
+        {
+            compCurBB->bbFlags |= BBF_HAS_JMP;
+        }
+        else
+        {
+            compCurBB->bbJumpKind = BBJ_THROW;
+        }
+
+        // For non-void calls, we return a place holder which will be
+        // used by the parent GT_RETURN node of this call.
+
+        GenTree* result = call;
+        if (callType != TYP_VOID && info.compRetType != TYP_VOID)
+        {
+#ifdef FEATURE_HFA
+            // Return a dummy node, as the return is already removed.
+            if (callType == TYP_STRUCT)
+            {
+                // This is a HFA, use float 0.
+                callType = TYP_FLOAT;
+            }
+#elif defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+            // Return a dummy node, as the return is already removed.
+            if (varTypeIsStruct(callType))
+            {
+                // This is a register-returned struct. Return a 0.
+                // The actual return registers are hacked in lower and the register allocator.
+                callType = TYP_INT;
+            }
+#endif
+#ifdef FEATURE_SIMD
+            // Return a dummy node, as the return is already removed.
+            if (varTypeIsSIMD(callType))
+            {
+                callType = TYP_DOUBLE;
+            }
+#endif
+            result = gtNewZeroConNode(genActualType(callType));
+            result = fgMorphTree(result);
+        }
+
+        return result;
+    }
+
+NO_TAIL_CALL:
+
+    if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) == 0 &&
+        (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_VIRTUAL_FUNC_PTR)
+#ifdef FEATURE_READYTORUN_COMPILER
+         || call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_READYTORUN_VIRTUAL_FUNC_PTR)
+#endif
+             ) &&
+        (call == fgMorphStmt->gtStmt.gtStmtExpr))
+    {
+        // This is call to CORINFO_HELP_VIRTUAL_FUNC_PTR with ignored result.
+        // Transform it into a null check.
+
+        GenTreePtr thisPtr = call->gtCallArgs->gtOp.gtOp1;
+
+        GenTreePtr nullCheck = gtNewOperNode(GT_IND, TYP_I_IMPL, thisPtr);
+        nullCheck->gtFlags |= GTF_EXCEPT;
+
+        return fgMorphTree(nullCheck);
+    }
+
+    noway_assert(call->gtOper == GT_CALL);
+
+    //
+    // Only count calls once (only in the global morph phase)
+    //
+    if (fgGlobalMorph)
+    {
+        if (call->gtCallType == CT_INDIRECT)
+        {
+            optCallCount++;
+            optIndirectCallCount++;
+        }
+        else if (call->gtCallType == CT_USER_FUNC)
+        {
+            optCallCount++;
+            if (call->IsVirtual())
+            {
+                optIndirectCallCount++;
+            }
+        }
+    }
+
+    // Couldn't inline - remember that this BB contains method calls
+
+    // If this is a 'regular' call, mark the basic block as
+    // having a call (for computing full interruptibility).
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_AMD64_
+    // Amd64 note: If this is a fast tail call then don't count it as a call
+    // since we don't insert GC-polls but instead make the method fully GC
+    // interruptible.
+    if (!call->IsFastTailCall())
+#endif
+    {
+        if (call->gtCallType == CT_INDIRECT)
+        {
+            compCurBB->bbFlags |= BBF_GC_SAFE_POINT;
+        }
+        else if (call->gtCallType == CT_USER_FUNC)
+        {
+            if ((call->gtCallMoreFlags & GTF_CALL_M_NOGCCHECK) == 0)
+            {
+                compCurBB->bbFlags |= BBF_GC_SAFE_POINT;
+            }
+        }
+        // otherwise we have a CT_HELPER
+    }
+
+    // Morph Type.op_Equality and Type.op_Inequality
+    // We need to do this before the arguments are morphed
+    if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC))
+    {
+        CorInfoIntrinsics methodID = info.compCompHnd->getIntrinsicID(call->gtCallMethHnd);
+
+        genTreeOps simpleOp = GT_CALL;
+        if (methodID == CORINFO_INTRINSIC_TypeEQ)
+        {
+            simpleOp = GT_EQ;
+        }
+        else if (methodID == CORINFO_INTRINSIC_TypeNEQ)
+        {
+            simpleOp = GT_NE;
+        }
+
+        if (simpleOp == GT_EQ || simpleOp == GT_NE)
+        {
+            noway_assert(call->TypeGet() == TYP_INT);
+
+            // Check for GetClassFromHandle(handle) and obj.GetType() both of which will only return RuntimeType
+            // objects. Then if either operand is one of these two calls we can simplify op_Equality/op_Inequality to
+            // GT_NE/GT_NE: One important invariance that should never change is that type equivalency is always
+            // equivalent to object identity equality for runtime type objects in reflection. This is also reflected
+            // in RuntimeTypeHandle::TypeEquals. If this invariance would ever be broken, we need to remove the
+            // optimization below.
+
+            GenTreePtr op1 = call->gtCallArgs->gtOp.gtOp1;
+            GenTreePtr op2 = call->gtCallArgs->gtOp.gtOp2->gtOp.gtOp1;
+
+            if (gtCanOptimizeTypeEquality(op1) || gtCanOptimizeTypeEquality(op2))
+            {
+                GenTreePtr compare = gtNewOperNode(simpleOp, TYP_INT, op1, op2);
+
+                // fgMorphSmpOp will further optimize the following patterns:
+                //  1. typeof(...) == typeof(...)
+                //  2. typeof(...) == obj.GetType()
+                return fgMorphTree(compare);
+            }
+        }
+    }
+
+    // Make sure that return buffers containing GC pointers that aren't too large are pointers into the stack.
+    GenTreePtr origDest = nullptr; // Will only become non-null if we do the transformation (and thus require
+                                   // copy-back).
+    unsigned             retValTmpNum = BAD_VAR_NUM;
+    CORINFO_CLASS_HANDLE structHnd    = nullptr;
+    if (call->HasRetBufArg() &&
+        call->gtCallLateArgs == nullptr) // Don't do this if we're re-morphing (which will make late args non-null).
+    {
+        // We're enforcing the invariant that return buffers pointers (at least for
+        // struct return types containing GC pointers) are never pointers into the heap.
+        // The large majority of cases are address of local variables, which are OK.
+        // Otherwise, allocate a local of the given struct type, pass its address,
+        // then assign from that into the proper destination.  (We don't need to do this
+        // if we're passing the caller's ret buff arg to the callee, since the caller's caller
+        // will maintain the same invariant.)
+
+        GenTreePtr dest = call->gtCallArgs->gtOp.gtOp1;
+        assert(dest->OperGet() != GT_ARGPLACE); // If it was, we'd be in a remorph, which we've already excluded above.
+        if (dest->gtType == TYP_BYREF && !(dest->OperGet() == GT_ADDR && dest->gtOp.gtOp1->OperGet() == GT_LCL_VAR))
+        {
+            // We'll exempt helper calls from this, assuming that the helper implementation
+            // follows the old convention, and does whatever barrier is required.
+            if (call->gtCallType != CT_HELPER)
+            {
+                structHnd = call->gtRetClsHnd;
+                if (info.compCompHnd->isStructRequiringStackAllocRetBuf(structHnd) &&
+                    !((dest->OperGet() == GT_LCL_VAR || dest->OperGet() == GT_REG_VAR) &&
+                      dest->gtLclVar.gtLclNum == info.compRetBuffArg))
+                {
+                    origDest = dest;
+
+                    retValTmpNum = lvaGrabTemp(true DEBUGARG("substitute local for ret buff arg"));
+                    lvaSetStruct(retValTmpNum, structHnd, true);
+                    dest = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT));
+                }
+            }
+        }
+
+        call->gtCallArgs->gtOp.gtOp1 = dest;
+    }
+
+    /* Process the "normal" argument list */
+    call = fgMorphArgs(call);
+    noway_assert(call->gtOper == GT_CALL);
+
+    // Morph stelem.ref helper call to store a null value, into a store into an array without the helper.
+    // This needs to be done after the arguments are morphed to ensure constant propagation has already taken place.
+    if ((call->gtCallType == CT_HELPER) && (call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ARRADDR_ST)))
+    {
+        GenTreePtr value = gtArgEntryByArgNum(call, 2)->node;
+
+        if (value->IsIntegralConst(0))
+        {
+            assert(value->OperGet() == GT_CNS_INT);
+            GenTreePtr arr   = gtArgEntryByArgNum(call, 0)->node;
+            GenTreePtr index = gtArgEntryByArgNum(call, 1)->node;
+
+            arr = gtClone(arr, true);
+            if (arr != nullptr)
+            {
+                index = gtClone(index, true);
+                if (index != nullptr)
+                {
+                    value = gtClone(value);
+                    noway_assert(value != nullptr);
+
+                    GenTreePtr nullCheckedArr = impCheckForNullPointer(arr);
+                    GenTreePtr arrIndexNode   = gtNewIndexRef(TYP_REF, nullCheckedArr, index);
+                    GenTreePtr arrStore       = gtNewAssignNode(arrIndexNode, value);
+                    arrStore->gtFlags |= GTF_ASG;
+
+                    return fgMorphTree(arrStore);
+                }
+            }
+        }
+    }
+
+    // Optimize get_ManagedThreadId(get_CurrentThread)
+    if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) &&
+        info.compCompHnd->getIntrinsicID(call->gtCallMethHnd) == CORINFO_INTRINSIC_GetManagedThreadId)
+    {
+        noway_assert(origDest == nullptr);
+        noway_assert(call->gtCallLateArgs->gtOp.gtOp1 != nullptr);
+
+        GenTreePtr innerCall = call->gtCallLateArgs->gtOp.gtOp1;
+
+        if (innerCall->gtOper == GT_CALL && (innerCall->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) &&
+            info.compCompHnd->getIntrinsicID(innerCall->gtCall.gtCallMethHnd) ==
+                CORINFO_INTRINSIC_GetCurrentManagedThread)
+        {
+            // substitute expression with call to helper
+            GenTreePtr newCall = gtNewHelperCallNode(CORINFO_HELP_GETCURRENTMANAGEDTHREADID, TYP_INT, 0);
+            JITDUMP("get_ManagedThreadId(get_CurrentThread) folding performed\n");
+            return fgMorphTree(newCall);
+        }
+    }
+
+    if (origDest != nullptr)
+    {
+        GenTreePtr retValVarAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, gtNewLclvNode(retValTmpNum, TYP_STRUCT));
+        // If the origDest expression was an assignment to a variable, it might be to an otherwise-unused
+        // var, which would allow the whole assignment to be optimized away to a NOP.  So in that case, make the
+        // origDest into a comma that uses the var.  Note that the var doesn't have to be a temp for this to
+        // be correct.
+        if (origDest->OperGet() == GT_ASG)
+        {
+            if (origDest->gtOp.gtOp1->OperGet() == GT_LCL_VAR)
+            {
+                GenTreePtr var = origDest->gtOp.gtOp1;
+                origDest       = gtNewOperNode(GT_COMMA, var->TypeGet(), origDest,
+                                         gtNewLclvNode(var->gtLclVar.gtLclNum, var->TypeGet()));
+            }
+        }
+        GenTreePtr copyBlk = gtNewCpObjNode(origDest, retValVarAddr, structHnd, false);
+        copyBlk            = fgMorphTree(copyBlk);
+        GenTree* result    = gtNewOperNode(GT_COMMA, TYP_VOID, call, copyBlk);
+#ifdef DEBUG
+        result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+        return result;
+    }
+
+    if (call->IsNoReturn())
+    {
+        //
+        // If we know that the call does not return then we can set fgRemoveRestOfBlock
+        // to remove all subsequent statements and change the call's basic block to BBJ_THROW.
+        // As a result the compiler won't need to preserve live registers across the call.
+        //
+        // This isn't need for tail calls as there shouldn't be any code after the call anyway.
+        // Besides, the tail call code is part of the epilog and converting the block to
+        // BBJ_THROW would result in the tail call being dropped as the epilog is generated
+        // only for BBJ_RETURN blocks.
+        //
+        // Currently this doesn't work for non-void callees. Some of the code that handles
+        // fgRemoveRestOfBlock expects the tree to have GTF_EXCEPT flag set but call nodes
+        // do not have this flag by default. We could add the flag here but the proper solution
+        // would be to replace the return expression with a local var node during inlining
+        // so the rest of the call tree stays in a separate statement. That statement can then
+        // be removed by fgRemoveRestOfBlock without needing to add GTF_EXCEPT anywhere.
+        //
+
+        if (!call->IsTailCall() && call->TypeGet() == TYP_VOID)
+        {
+            fgRemoveRestOfBlock = true;
+        }
+    }
+
+    return call;
+}
+
+/*****************************************************************************
+ *
+ *  Transform the given GTK_CONST tree for code generation.
+ */
+
+GenTreePtr Compiler::fgMorphConst(GenTreePtr tree)
+{
+    noway_assert(tree->OperKind() & GTK_CONST);
+
+    /* Clear any exception flags or other unnecessary flags
+     * that may have been set before folding this node to a constant */
+
+    tree->gtFlags &= ~(GTF_ALL_EFFECT | GTF_REVERSE_OPS);
+
+    if (tree->OperGet() != GT_CNS_STR)
+    {
+        return tree;
+    }
+
+    // TODO-CQ: Do this for compCurBB->isRunRarely(). Doing that currently will
+    // guarantee slow performance for that block. Instead cache the return value
+    // of CORINFO_HELP_STRCNS and go to cache first giving reasonable perf.
+
+    if (compCurBB->bbJumpKind == BBJ_THROW)
+    {
+        CorInfoHelpFunc helper = info.compCompHnd->getLazyStringLiteralHelper(tree->gtStrCon.gtScpHnd);
+        if (helper != CORINFO_HELP_UNDEF)
+        {
+            // For un-important blocks, we want to construct the string lazily
+
+            GenTreeArgList* args;
+            if (helper == CORINFO_HELP_STRCNS_CURRENT_MODULE)
+            {
+                args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT));
+            }
+            else
+            {
+                args = gtNewArgList(gtNewIconNode(RidFromToken(tree->gtStrCon.gtSconCPX), TYP_INT),
+                                    gtNewIconEmbScpHndNode(tree->gtStrCon.gtScpHnd));
+            }
+
+            tree = gtNewHelperCallNode(helper, TYP_REF, 0, args);
+            return fgMorphTree(tree);
+        }
+    }
+
+    assert(tree->gtStrCon.gtScpHnd == info.compScopeHnd || !IsUninitialized(tree->gtStrCon.gtScpHnd));
+
+    LPVOID         pValue;
+    InfoAccessType iat =
+        info.compCompHnd->constructStringLiteral(tree->gtStrCon.gtScpHnd, tree->gtStrCon.gtSconCPX, &pValue);
+
+    tree = gtNewStringLiteralNode(iat, pValue);
+
+    return fgMorphTree(tree);
+}
+
+/*****************************************************************************
+ *
+ *  Transform the given GTK_LEAF tree for code generation.
+ */
+
+GenTreePtr Compiler::fgMorphLeaf(GenTreePtr tree)
+{
+    noway_assert(tree->OperKind() & GTK_LEAF);
+
+    if (tree->gtOper == GT_LCL_VAR)
+    {
+        return fgMorphLocalVar(tree);
+    }
+#ifdef _TARGET_X86_
+    else if (tree->gtOper == GT_LCL_FLD)
+    {
+        if (info.compIsVarArgs)
+        {
+            GenTreePtr newTree =
+                fgMorphStackArgForVarArgs(tree->gtLclFld.gtLclNum, tree->gtType, tree->gtLclFld.gtLclOffs);
+            if (newTree != NULL)
+                return newTree;
+        }
+    }
+#endif // _TARGET_X86_
+    else if (tree->gtOper == GT_FTN_ADDR)
+    {
+        CORINFO_CONST_LOOKUP addrInfo;
+
+#ifdef FEATURE_READYTORUN_COMPILER
+        if (tree->gtFptrVal.gtEntryPoint.addr != nullptr)
+        {
+            addrInfo = tree->gtFptrVal.gtEntryPoint;
+        }
+        else
+#endif
+        {
+            info.compCompHnd->getFunctionFixedEntryPoint(tree->gtFptrVal.gtFptrMethod, &addrInfo);
+        }
+
+        // Refer to gtNewIconHandleNode() as the template for constructing a constant handle
+        //
+        tree->SetOper(GT_CNS_INT);
+        tree->gtIntConCommon.SetIconValue(ssize_t(addrInfo.handle));
+        tree->gtFlags |= GTF_ICON_FTN_ADDR;
+
+        switch (addrInfo.accessType)
+        {
+            case IAT_PPVALUE:
+                tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
+                tree->gtFlags |= GTF_IND_INVARIANT;
+
+                __fallthrough;
+
+            case IAT_PVALUE:
+                tree = gtNewOperNode(GT_IND, TYP_I_IMPL, tree);
+                break;
+
+            case IAT_VALUE:
+                tree = gtNewOperNode(GT_NOP, tree->TypeGet(), tree); // prevents constant folding
+                break;
+
+            default:
+                noway_assert(!"Unknown addrInfo.accessType");
+        }
+
+        return fgMorphTree(tree);
+    }
+
+    return tree;
+}
+
+void Compiler::fgAssignSetVarDef(GenTreePtr tree)
+{
+    GenTreeLclVarCommon* lclVarCmnTree;
+    bool                 isEntire = false;
+    if (tree->DefinesLocal(this, &lclVarCmnTree, &isEntire))
+    {
+        if (isEntire)
+        {
+            lclVarCmnTree->gtFlags |= GTF_VAR_DEF;
+        }
+        else
+        {
+            // We consider partial definitions to be modeled as uses followed by definitions.
+            // This captures the idea that precedings defs are not necessarily made redundant
+            // by this definition.
+            lclVarCmnTree->gtFlags |= (GTF_VAR_DEF | GTF_VAR_USEASG);
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// fgMorphOneAsgBlockOp: Attempt to replace a block assignment with a scalar assignment
+//
+// Arguments:
+//    tree - The block assignment to be possibly morphed
+//
+// Return Value:
+//    The modified tree if successful, nullptr otherwise.
+//
+// Assumptions:
+//    'tree' must be a block assignment.
+//
+// Notes:
+//    If successful, this method always returns the incoming tree, modifying only
+//    its arguments.
+
+GenTreePtr Compiler::fgMorphOneAsgBlockOp(GenTreePtr tree)
+{
+    // This must be a block assignment.
+    noway_assert(tree->OperIsBlkOp());
+    var_types asgType = tree->TypeGet();
+
+    GenTreePtr asg         = tree;
+    GenTreePtr dest        = asg->gtGetOp1();
+    GenTreePtr src         = asg->gtGetOp2();
+    unsigned   destVarNum  = BAD_VAR_NUM;
+    LclVarDsc* destVarDsc  = nullptr;
+    GenTreePtr lclVarTree  = nullptr;
+    bool       isCopyBlock = asg->OperIsCopyBlkOp();
+    bool       isInitBlock = !isCopyBlock;
+
+    unsigned             size;
+    CORINFO_CLASS_HANDLE clsHnd = NO_CLASS_HANDLE;
+#ifdef FEATURE_SIMD
+    // importer introduces cpblk nodes with src = GT_ADDR(GT_SIMD)
+    // The SIMD type in question could be Vector2f which is 8-bytes in size.
+    // The below check is to make sure that we don't turn that copyblk
+    // into a assignment, since rationalizer logic will transform the
+    // copyblk apropriately. Otherwise, the transormation made in this
+    // routine will prevent rationalizer logic and we might end up with
+    // GT_ADDR(GT_SIMD) node post rationalization, leading to a noway assert
+    // in codegen.
+    // TODO-1stClassStructs: This is here to preserve old behavior.
+    // It should be eliminated.
+    if (src->OperGet() == GT_SIMD)
+    {
+        return nullptr;
+    }
+#endif
+
+    if (dest->gtEffectiveVal()->OperIsBlk())
+    {
+        GenTreeBlk* lhsBlk = dest->gtEffectiveVal()->AsBlk();
+        size               = lhsBlk->Size();
+        if (impIsAddressInLocal(lhsBlk->Addr(), &lclVarTree))
+        {
+            destVarNum = lclVarTree->AsLclVarCommon()->gtLclNum;
+            destVarDsc = &(lvaTable[destVarNum]);
+        }
+        if (lhsBlk->OperGet() == GT_OBJ)
+        {
+            clsHnd = lhsBlk->AsObj()->gtClass;
+        }
+    }
+    else
+    {
+        noway_assert(dest->OperIsLocal());
+        lclVarTree = dest;
+        destVarNum = lclVarTree->AsLclVarCommon()->gtLclNum;
+        destVarDsc = &(lvaTable[destVarNum]);
+        if (isCopyBlock)
+        {
+            clsHnd = destVarDsc->lvVerTypeInfo.GetClassHandle();
+            size   = info.compCompHnd->getClassSize(clsHnd);
+        }
+        else
+        {
+            size = destVarDsc->lvExactSize;
+        }
+    }
+
+    //
+    //  See if we can do a simple transformation:
+    //
+    //          GT_ASG <TYP_size>
+    //          /   \
+    //      GT_IND GT_IND or CNS_INT
+    //         |      |
+    //       [dest] [src]
+    //
+
+    if (size == REGSIZE_BYTES)
+    {
+        if (clsHnd == NO_CLASS_HANDLE)
+        {
+            // A register-sized cpblk can be treated as an integer asignment.
+            asgType = TYP_I_IMPL;
+        }
+        else
+        {
+            BYTE gcPtr;
+            info.compCompHnd->getClassGClayout(clsHnd, &gcPtr);
+            asgType = getJitGCType(gcPtr);
+        }
+    }
+    else
+    {
+        switch (size)
+        {
+            case 1:
+                asgType = TYP_BYTE;
+                break;
+            case 2:
+                asgType = TYP_SHORT;
+                break;
+
+#ifdef _TARGET_64BIT_
+            case 4:
+                asgType = TYP_INT;
+                break;
+#endif // _TARGET_64BIT_
+        }
+    }
+
+    // TODO-1stClassStructs: Change this to asgType != TYP_STRUCT.
+    if (!varTypeIsStruct(asgType))
+    {
+        // For initBlk, a non constant source is not going to allow us to fiddle
+        // with the bits to create a single assigment.
+        noway_assert(size <= REGSIZE_BYTES);
+
+        if (isInitBlock && (src->gtOper != GT_CNS_INT))
+        {
+            return nullptr;
+        }
+
+        if (destVarDsc != nullptr)
+        {
+#if LOCAL_ASSERTION_PROP
+            // Kill everything about dest
+            if (optLocalAssertionProp)
+            {
+                if (optAssertionCount > 0)
+                {
+                    fgKillDependentAssertions(destVarNum DEBUGARG(tree));
+                }
+            }
+#endif // LOCAL_ASSERTION_PROP
+
+            // A previous incarnation of this code also required the local not to be
+            // address-exposed(=taken).  That seems orthogonal to the decision of whether
+            // to do field-wise assignments: being address-exposed will cause it to be
+            // "dependently" promoted, so it will be in the right memory location.  One possible
+            // further reason for avoiding field-wise stores is that the struct might have alignment-induced
+            // holes, whose contents could be meaningful in unsafe code.  If we decide that's a valid
+            // concern, then we could compromise, and say that address-exposed + fields do not completely cover the
+            // memory of the struct prevent field-wise assignments.  Same situation exists for the "src" decision.
+            if (varTypeIsStruct(lclVarTree) && (destVarDsc->lvPromoted || destVarDsc->lvIsSIMDType()))
+            {
+                // Let fgMorphInitBlock handle it.  (Since we'll need to do field-var-wise assignments.)
+                return nullptr;
+            }
+            else if (!varTypeIsFloating(lclVarTree->TypeGet()) && (size == genTypeSize(destVarDsc)))
+            {
+                // Use the dest local var directly, as well as its type.
+                dest    = lclVarTree;
+                asgType = destVarDsc->lvType;
+
+                // If the block operation had been a write to a local var of a small int type,
+                // of the exact size of the small int type, and the var is NormalizeOnStore,
+                // we would have labeled it GTF_VAR_USEASG, because the block operation wouldn't
+                // have done that normalization.  If we're now making it into an assignment,
+                // the NormalizeOnStore will work, and it can be a full def.
+                if (destVarDsc->lvNormalizeOnStore())
+                {
+                    dest->gtFlags &= (~GTF_VAR_USEASG);
+                }
+            }
+            else
+            {
+                // Could be a non-promoted struct, or a floating point type local, or
+                // an int subject to a partial write.  Don't enregister.
+                lvaSetVarDoNotEnregister(destVarNum DEBUGARG(DNER_LocalField));
+
+                // Mark the local var tree as a definition point of the local.
+                lclVarTree->gtFlags |= GTF_VAR_DEF;
+                if (size < destVarDsc->lvExactSize)
+                { // If it's not a full-width assignment....
+                    lclVarTree->gtFlags |= GTF_VAR_USEASG;
+                }
+
+                if (dest == lclVarTree)
+                {
+                    dest = gtNewOperNode(GT_IND, asgType, gtNewOperNode(GT_ADDR, TYP_BYREF, dest));
+                }
+            }
+        }
+
+        // Check to ensure we don't have a reducible *(& ... )
+        if (dest->OperIsIndir() && dest->AsIndir()->Addr()->OperGet() == GT_ADDR)
+        {
+            GenTreePtr addrOp = dest->AsIndir()->Addr()->gtGetOp1();
+            // Ignore reinterpret casts between int/gc
+            if ((addrOp->TypeGet() == asgType) || (varTypeIsIntegralOrI(addrOp) && (genTypeSize(asgType) == size)))
+            {
+                dest    = addrOp;
+                asgType = addrOp->TypeGet();
+            }
+        }
+
+        if (dest->gtEffectiveVal()->OperIsIndir())
+        {
+            // If we have no information about the destination, we have to assume it could
+            // live anywhere (not just in the GC heap).
+            // Mark the GT_IND node so that we use the correct write barrier helper in case
+            // the field is a GC ref.
+
+            if (!fgIsIndirOfAddrOfLocal(dest))
+            {
+                dest->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
+                tree->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
+            }
+        }
+
+        LclVarDsc* srcVarDsc = nullptr;
+        if (isCopyBlock)
+        {
+            if (src->OperGet() == GT_LCL_VAR)
+            {
+                lclVarTree = src;
+                srcVarDsc  = &(lvaTable[src->AsLclVarCommon()->gtLclNum]);
+            }
+            else if (src->OperIsIndir() && impIsAddressInLocal(src->gtOp.gtOp1, &lclVarTree))
+            {
+                srcVarDsc = &(lvaTable[lclVarTree->AsLclVarCommon()->gtLclNum]);
+            }
+            if (srcVarDsc != nullptr)
+            {
+                if (varTypeIsStruct(lclVarTree) && (srcVarDsc->lvPromoted || srcVarDsc->lvIsSIMDType()))
+                {
+                    // Let fgMorphCopyBlock handle it.
+                    return nullptr;
+                }
+                else if (!varTypeIsFloating(lclVarTree->TypeGet()) &&
+                         size == genTypeSize(genActualType(lclVarTree->TypeGet())))
+                {
+                    // Use the src local var directly.
+                    src = lclVarTree;
+                }
+                else
+                {
+#ifndef LEGACY_BACKEND
+
+                    // The source argument of the copyblk can potentially
+                    // be accessed only through indir(addr(lclVar))
+                    // or indir(lclVarAddr) in rational form and liveness
+                    // won't account for these uses. That said,
+                    // we have to mark this local as address exposed so
+                    // we don't delete it as a dead store later on.
+                    unsigned lclVarNum                = lclVarTree->gtLclVarCommon.gtLclNum;
+                    lvaTable[lclVarNum].lvAddrExposed = true;
+                    lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed));
+
+#else  // LEGACY_BACKEND
+                    lvaSetVarDoNotEnregister(lclVarTree->gtLclVarCommon.gtLclNum DEBUGARG(DNER_LocalField));
+#endif // LEGACY_BACKEND
+                    GenTree* srcAddr;
+                    if (src == lclVarTree)
+                    {
+                        srcAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, src);
+                        src     = gtNewOperNode(GT_IND, asgType, srcAddr);
+                    }
+                    else
+                    {
+                        assert(src->OperIsIndir());
+                    }
+                }
+            }
+            // If we have no information about the src, we have to assume it could
+            // live anywhere (not just in the GC heap).
+            // Mark the GT_IND node so that we use the correct write barrier helper in case
+            // the field is a GC ref.
+
+            if (!fgIsIndirOfAddrOfLocal(src))
+            {
+                src->gtFlags |= (GTF_EXCEPT | GTF_GLOB_REF | GTF_IND_TGTANYWHERE);
+            }
+        }
+        else
+        {
+// InitBlk
+#if FEATURE_SIMD
+            if (varTypeIsSIMD(asgType))
+            {
+                assert(!isCopyBlock); // Else we would have returned the tree above.
+                noway_assert(src->IsIntegralConst(0));
+                noway_assert(destVarDsc != nullptr);
+
+                src = new (this, GT_SIMD) GenTreeSIMD(asgType, src, SIMDIntrinsicInit, destVarDsc->lvBaseType, size);
+                tree->gtOp.gtOp2 = src;
+                return tree;
+            }
+            else
+#endif
+                if (src->IsCnsIntOrI())
+            {
+                // This will mutate the integer constant, in place, to be the correct
+                // value for the type we are using in the assignment.
+                src->AsIntCon()->FixupInitBlkValue(asgType);
+            }
+        }
+
+        // Ensure that the dest is setup appropriately.
+        if (dest->gtEffectiveVal()->OperIsIndir())
+        {
+            dest = fgMorphBlockOperand(dest, asgType, size, true /*isDest*/);
+        }
+
+        // Ensure that the rhs is setup appropriately.
+        if (isCopyBlock)
+        {
+            src = fgMorphBlockOperand(src, asgType, size, false /*isDest*/);
+        }
+
+        // Set the lhs and rhs on the assignment.
+        if (dest != tree->gtOp.gtOp1)
+        {
+            asg->gtOp.gtOp1 = dest;
+        }
+        if (src != asg->gtOp.gtOp2)
+        {
+            asg->gtOp.gtOp2 = src;
+        }
+
+        asg->ChangeType(asgType);
+        dest->gtFlags |= GTF_DONT_CSE;
+        asg->gtFlags |= ((dest->gtFlags | src->gtFlags) & GTF_ALL_EFFECT);
+        // Un-set GTF_REVERSE_OPS, and it will be set later if appropriate.
+        asg->gtFlags &= ~GTF_REVERSE_OPS;
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("fgMorphOneAsgBlock (after):\n");
+            gtDispTree(tree);
+        }
+#endif
+        return tree;
+    }
+
+    return nullptr;
+}
+
+//------------------------------------------------------------------------
+// fgMorphInitBlock: Perform the Morphing of a GT_INITBLK node
+//
+// Arguments:
+//    tree - a tree node with a gtOper of GT_INITBLK
+//           the child nodes for tree have already been Morphed
+//
+// Return Value:
+//    We can return the orginal GT_INITBLK unmodified (least desirable, but always correct)
+//    We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable)
+//    If we have performed struct promotion of the Dest() then we will try to
+//    perform a field by field assignment for each of the promoted struct fields
+//
+// Notes:
+//    If we leave it as a GT_INITBLK we will call lvaSetVarDoNotEnregister() with a reason of DNER_BlockOp
+//    if the Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we
+//    can not use a field by field assignment and must the orginal GT_INITBLK unmodified.
+
+GenTreePtr Compiler::fgMorphInitBlock(GenTreePtr tree)
+{
+    noway_assert(tree->gtOper == GT_ASG && varTypeIsStruct(tree));
+#ifdef DEBUG
+    bool morphed = false;
+#endif // DEBUG
+
+    GenTree* asg      = tree;
+    GenTree* src      = tree->gtGetOp2();
+    GenTree* origDest = tree->gtGetOp1();
+
+    GenTree* dest = fgMorphBlkNode(origDest, true);
+    if (dest != origDest)
+    {
+        tree->gtOp.gtOp1 = dest;
+    }
+    tree->gtType = dest->TypeGet();
+    JITDUMP("\nfgMorphInitBlock:");
+
+    GenTreePtr oneAsgTree = fgMorphOneAsgBlockOp(tree);
+    if (oneAsgTree)
+    {
+        JITDUMP(" using oneAsgTree.\n");
+        tree = oneAsgTree;
+    }
+    else
+    {
+        GenTree*             destAddr          = nullptr;
+        GenTree*             initVal           = src;
+        GenTree*             blockSize         = nullptr;
+        unsigned             blockWidth        = 0;
+        FieldSeqNode*        destFldSeq        = nullptr;
+        LclVarDsc*           destLclVar        = nullptr;
+        bool                 destDoFldAsg      = false;
+        unsigned             destLclNum        = BAD_VAR_NUM;
+        bool                 blockWidthIsConst = false;
+        GenTreeLclVarCommon* lclVarTree        = nullptr;
+        if (dest->IsLocal())
+        {
+            lclVarTree = dest->AsLclVarCommon();
+        }
+        else
+        {
+            if (dest->OperIsBlk())
+            {
+                destAddr   = dest->AsBlk()->Addr();
+                blockWidth = dest->AsBlk()->gtBlkSize;
+            }
+            else
+            {
+                assert((dest->gtOper == GT_IND) && (dest->TypeGet() != TYP_STRUCT));
+                destAddr   = dest->gtGetOp1();
+                blockWidth = genTypeSize(dest->TypeGet());
+            }
+        }
+        if (lclVarTree != nullptr)
+        {
+            destLclNum        = lclVarTree->gtLclNum;
+            destLclVar        = &lvaTable[destLclNum];
+            blockWidth        = varTypeIsStruct(destLclVar) ? destLclVar->lvExactSize : genTypeSize(destLclVar);
+            blockWidthIsConst = true;
+        }
+        else
+        {
+            if (dest->gtOper == GT_DYN_BLK)
+            {
+                // The size must be an integer type
+                blockSize = dest->AsBlk()->gtDynBlk.gtDynamicSize;
+                assert(varTypeIsIntegral(blockSize->gtType));
+            }
+            else
+            {
+                assert(blockWidth != 0);
+                blockWidthIsConst = true;
+            }
+
+            if ((destAddr != nullptr) && destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq))
+            {
+                destLclNum = lclVarTree->gtLclNum;
+                destLclVar = &lvaTable[destLclNum];
+            }
+        }
+        if (destLclNum != BAD_VAR_NUM)
+        {
+#if LOCAL_ASSERTION_PROP
+            // Kill everything about destLclNum (and its field locals)
+            if (optLocalAssertionProp)
+            {
+                if (optAssertionCount > 0)
+                {
+                    fgKillDependentAssertions(destLclNum DEBUGARG(tree));
+                }
+            }
+#endif // LOCAL_ASSERTION_PROP
+
+            if (destLclVar->lvPromoted && blockWidthIsConst)
+            {
+                noway_assert(varTypeIsStruct(destLclVar));
+                noway_assert(!opts.MinOpts());
+                if (destLclVar->lvAddrExposed & destLclVar->lvContainsHoles)
+                {
+                    JITDUMP(" dest is address exposed");
+                }
+                else
+                {
+                    if (blockWidth == destLclVar->lvExactSize)
+                    {
+                        JITDUMP(" (destDoFldAsg=true)");
+                        // We may decide later that a copyblk is required when this struct has holes
+                        destDoFldAsg = true;
+                    }
+                    else
+                    {
+                        JITDUMP(" with mismatched size");
+                    }
+                }
+            }
+        }
+
+        // Can we use field by field assignment for the dest?
+        if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles)
+        {
+            JITDUMP(" dest contains holes");
+            destDoFldAsg = false;
+        }
+
+        JITDUMP(destDoFldAsg ? " using field by field initialization.\n" : " this requires an InitBlock.\n");
+
+        // If we're doing an InitBlock and we've transformed the dest to a non-Blk
+        // we need to change it back.
+        if (!destDoFldAsg && !dest->OperIsBlk())
+        {
+            noway_assert(blockWidth != 0);
+            tree->gtOp.gtOp1 = origDest;
+            tree->gtType     = origDest->gtType;
+        }
+
+        if (!destDoFldAsg && (destLclVar != nullptr))
+        {
+            // If destLclVar is not a reg-sized non-field-addressed struct, set it as DoNotEnregister.
+            if (!destLclVar->lvRegStruct)
+            {
+                // Mark it as DoNotEnregister.
+                lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp));
+            }
+        }
+
+        // Mark the dest struct as DoNotEnreg
+        // when they are LclVar structs and we are using a CopyBlock
+        // or the struct is not promoted
+        //
+        if (!destDoFldAsg)
+        {
+#if CPU_USES_BLOCK_MOVE
+            compBlkOpUsed = true;
+#endif
+            if (!dest->OperIsBlk())
+            {
+                GenTree*             destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
+                CORINFO_CLASS_HANDLE clsHnd   = gtGetStructHandleIfPresent(dest);
+                if (clsHnd == NO_CLASS_HANDLE)
+                {
+                    dest = new (this, GT_BLK) GenTreeBlk(GT_BLK, dest->TypeGet(), destAddr, blockWidth);
+                }
+                else
+                {
+                    GenTree* newDest = gtNewObjNode(clsHnd, destAddr);
+                    if (newDest->OperGet() == GT_OBJ)
+                    {
+                        gtSetObjGcInfo(newDest->AsObj());
+                    }
+                    dest = newDest;
+                }
+                tree->gtOp.gtOp1 = dest;
+            }
+        }
+        else
+        {
+            // The initVal must be a constant of TYP_INT
+            noway_assert(initVal->OperGet() == GT_CNS_INT);
+            noway_assert(genActualType(initVal->gtType) == TYP_INT);
+
+            // The dest must be of a struct type.
+            noway_assert(varTypeIsStruct(destLclVar));
+
+            //
+            // Now, convert InitBlock to individual assignments
+            //
+
+            tree = nullptr;
+            INDEBUG(morphed = true);
+
+            GenTreePtr dest;
+            GenTreePtr srcCopy;
+            unsigned   fieldLclNum;
+            unsigned   fieldCnt = destLclVar->lvFieldCnt;
+
+            for (unsigned i = 0; i < fieldCnt; ++i)
+            {
+                fieldLclNum = destLclVar->lvFieldLclStart + i;
+                dest        = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
+
+                noway_assert(lclVarTree->gtOper == GT_LCL_VAR);
+                // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not.
+                dest->gtFlags |= (lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG));
+
+                srcCopy = gtCloneExpr(initVal);
+                noway_assert(srcCopy != nullptr);
+
+                // need type of oper to be same as tree
+                if (dest->gtType == TYP_LONG)
+                {
+                    srcCopy->ChangeOperConst(GT_CNS_NATIVELONG);
+                    // copy and extend the value
+                    srcCopy->gtIntConCommon.SetLngValue(initVal->gtIntConCommon.IconValue());
+                    /* Change the types of srcCopy to TYP_LONG */
+                    srcCopy->gtType = TYP_LONG;
+                }
+                else if (varTypeIsFloating(dest->gtType))
+                {
+                    srcCopy->ChangeOperConst(GT_CNS_DBL);
+                    // setup the bit pattern
+                    memset(&srcCopy->gtDblCon.gtDconVal, (int)initVal->gtIntCon.gtIconVal,
+                           sizeof(srcCopy->gtDblCon.gtDconVal));
+                    /* Change the types of srcCopy to TYP_DOUBLE */
+                    srcCopy->gtType = TYP_DOUBLE;
+                }
+                else
+                {
+                    noway_assert(srcCopy->gtOper == GT_CNS_INT);
+                    noway_assert(srcCopy->TypeGet() == TYP_INT);
+                    // setup the bit pattern
+                    memset(&srcCopy->gtIntCon.gtIconVal, (int)initVal->gtIntCon.gtIconVal,
+                           sizeof(srcCopy->gtIntCon.gtIconVal));
+                }
+
+                srcCopy->gtType = dest->TypeGet();
+
+                asg = gtNewAssignNode(dest, srcCopy);
+
+#if LOCAL_ASSERTION_PROP
+                if (optLocalAssertionProp)
+                {
+                    optAssertionGen(asg);
+                }
+#endif // LOCAL_ASSERTION_PROP
+
+                if (tree)
+                {
+                    tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg);
+                }
+                else
+                {
+                    tree = asg;
+                }
+            }
+        }
+    }
+
+#ifdef DEBUG
+    if (morphed)
+    {
+        tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+
+        if (verbose)
+        {
+            printf("fgMorphInitBlock (after):\n");
+            gtDispTree(tree);
+        }
+    }
+#endif
+
+    return tree;
+}
+
+//------------------------------------------------------------------------
+// fgMorphBlkToInd: Change a blk node into a GT_IND of the specified type
+//
+// Arguments:
+//    tree - the node to be modified.
+//    type - the type of indirection to change it to.
+//
+// Return Value:
+//    Returns the node, modified in place.
+//
+// Notes:
+//    This doesn't really warrant a separate method, but is here to abstract
+//    the fact that these nodes can be modified in-place.
+
+GenTreePtr Compiler::fgMorphBlkToInd(GenTreeBlk* tree, var_types type)
+{
+    tree->SetOper(GT_IND);
+    tree->gtType = type;
+    return tree;
+}
+
+//------------------------------------------------------------------------
+// fgMorphGetStructAddr: Gets the address of a struct object
+//
+// Arguments:
+//    pTree    - the parent's pointer to the struct object node
+//    clsHnd   - the class handle for the struct type
+//    isRValue - true if this is a source (not dest)
+//
+// Return Value:
+//    Returns the address of the struct value, possibly modifying the existing tree to
+//    sink the address below any comma nodes (this is to canonicalize for value numbering).
+//    If this is a source, it will morph it to an GT_IND before taking its address,
+//    since it may not be remorphed (and we don't want blk nodes as rvalues).
+
+GenTreePtr Compiler::fgMorphGetStructAddr(GenTreePtr* pTree, CORINFO_CLASS_HANDLE clsHnd, bool isRValue)
+{
+    GenTree* addr;
+    GenTree* tree = *pTree;
+    // If this is an indirection, we can return its op1, unless it's a GTF_IND_ARR_INDEX, in which case we
+    // need to hang onto that for the purposes of value numbering.
+    if (tree->OperIsIndir())
+    {
+        if ((tree->gtFlags & GTF_IND_ARR_INDEX) == 0)
+        {
+            addr = tree->gtOp.gtOp1;
+        }
+        else
+        {
+            if (isRValue && tree->OperIsBlk())
+            {
+                tree->ChangeOper(GT_IND);
+            }
+            addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
+        }
+    }
+    else if (tree->gtOper == GT_COMMA)
+    {
+        // If this is a comma, we're going to "sink" the GT_ADDR below it.
+        (void)fgMorphGetStructAddr(&(tree->gtOp.gtOp2), clsHnd, isRValue);
+        tree->gtType = TYP_BYREF;
+        addr         = tree;
+    }
+    else
+    {
+        switch (tree->gtOper)
+        {
+            case GT_LCL_FLD:
+            case GT_LCL_VAR:
+            case GT_INDEX:
+            case GT_FIELD:
+            case GT_ARR_ELEM:
+                addr = gtNewOperNode(GT_ADDR, TYP_BYREF, tree);
+                break;
+            default:
+            {
+                // TODO: Consider using lvaGrabTemp and gtNewTempAssign instead, since we're
+                // not going to use "temp"
+                GenTree* temp = fgInsertCommaFormTemp(pTree, clsHnd);
+                addr          = fgMorphGetStructAddr(pTree, clsHnd, isRValue);
+                break;
+            }
+        }
+    }
+    *pTree = addr;
+    return addr;
+}
+
+//------------------------------------------------------------------------
+// fgMorphBlkNode: Morph a block node preparatory to morphing a block assignment
+//
+// Arguments:
+//    tree   - The struct type node
+//    isDest - True if this is the destination of the assignment
+//
+// Return Value:
+//    Returns the possibly-morphed node. The caller is responsible for updating
+//    the parent of this node..
+
+GenTree* Compiler::fgMorphBlkNode(GenTreePtr tree, bool isDest)
+{
+    if (tree->gtOper == GT_COMMA)
+    {
+        GenTree* effectiveVal = tree->gtEffectiveVal();
+        GenTree* addr         = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
+#ifdef DEBUG
+        addr->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+        // In order to CSE and value number array index expressions and bounds checks,
+        // the commas in which they are contained need to match.
+        // The pattern is that the COMMA should be the address expression.
+        // Therefore, we insert a GT_ADDR just above the node, and wrap it in an obj or ind.
+        // TODO-1stClassStructs: Consider whether this can be improved.
+        // Also consider whether some of this can be included in gtNewBlockVal (though note
+        // that doing so may cause us to query the type system before we otherwise would.
+        GenTree* lastComma = nullptr;
+        for (GenTree* next = tree; next != nullptr && next->gtOper == GT_COMMA; next = next->gtGetOp2())
+        {
+            next->gtType = TYP_BYREF;
+            lastComma    = next;
+        }
+        if (lastComma != nullptr)
+        {
+            noway_assert(lastComma->gtGetOp2() == effectiveVal);
+            lastComma->gtOp.gtOp2 = addr;
+            addr                  = tree;
+        }
+        var_types structType = effectiveVal->TypeGet();
+        if (structType == TYP_STRUCT)
+        {
+            CORINFO_CLASS_HANDLE structHnd = gtGetStructHandleIfPresent(effectiveVal);
+            if (structHnd == NO_CLASS_HANDLE)
+            {
+                tree = gtNewOperNode(GT_IND, effectiveVal->TypeGet(), addr);
+            }
+            else
+            {
+                tree = gtNewObjNode(structHnd, addr);
+                if (tree->OperGet() == GT_OBJ)
+                {
+                    gtSetObjGcInfo(tree->AsObj());
+                }
+            }
+        }
+        else
+        {
+            tree = new (this, GT_BLK) GenTreeBlk(GT_BLK, structType, addr, genTypeSize(structType));
+        }
+#ifdef DEBUG
+        tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+    }
+
+    if (!tree->OperIsBlk())
+    {
+        return tree;
+    }
+    GenTreeBlk* blkNode = tree->AsBlk();
+    if (blkNode->OperGet() == GT_DYN_BLK)
+    {
+        if (blkNode->AsDynBlk()->gtDynamicSize->IsCnsIntOrI())
+        {
+            unsigned size = (unsigned)blkNode->AsDynBlk()->gtDynamicSize->AsIntConCommon()->IconValue();
+            blkNode->AsDynBlk()->gtDynamicSize = nullptr;
+            blkNode->ChangeOper(GT_BLK);
+            blkNode->gtBlkSize = size;
+        }
+        else
+        {
+            return tree;
+        }
+    }
+    if ((blkNode->TypeGet() != TYP_STRUCT) && (blkNode->Addr()->OperGet() == GT_ADDR) &&
+        (blkNode->Addr()->gtGetOp1()->OperGet() == GT_LCL_VAR))
+    {
+        GenTreeLclVarCommon* lclVarNode = blkNode->Addr()->gtGetOp1()->AsLclVarCommon();
+        if ((genTypeSize(blkNode) != genTypeSize(lclVarNode)) || (!isDest && !varTypeIsStruct(lclVarNode)))
+        {
+            lvaSetVarDoNotEnregister(lclVarNode->gtLclNum DEBUG_ARG(DNER_VMNeedsStackAddr));
+        }
+    }
+
+    return tree;
+}
+
+//------------------------------------------------------------------------
+// fgMorphBlockOperand: Canonicalize an operand of a block assignment
+//
+// Arguments:
+//    tree     - The block operand
+//    asgType  - The type of the assignment
+//    blockWidth - The size of the block
+//    isDest     - true iff this is the destination of the assignment
+//
+// Return Value:
+//    Returns the morphed block operand
+//
+// Notes:
+//    This does the following:
+//    - Ensures that a struct operand is a block node.
+//    - Ensures that any COMMAs are above ADDR nodes.
+//    Although 'tree' WAS an operand of a block assignment, the assignment
+//    may have been retyped to be a scalar assignment.
+
+GenTree* Compiler::fgMorphBlockOperand(GenTree* tree, var_types asgType, unsigned blockWidth, bool isDest)
+{
+    GenTree* effectiveVal = tree->gtEffectiveVal();
+
+    // TODO-1stClassStucts: We would like to transform non-TYP_STRUCT nodes to
+    // either plain lclVars or GT_INDs. However, for now we want to preserve most
+    // of the block nodes until the Rationalizer.
+
+    if (!varTypeIsStruct(asgType))
+    {
+        if (effectiveVal->OperIsIndir())
+        {
+            GenTree* addr = effectiveVal->AsIndir()->Addr();
+            if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->TypeGet() == asgType))
+            {
+                effectiveVal = addr->gtGetOp1();
+            }
+            else if (effectiveVal->OperIsBlk())
+            {
+                effectiveVal = fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
+            }
+            else
+            {
+                effectiveVal->gtType = asgType;
+            }
+        }
+        else if (effectiveVal->TypeGet() != asgType)
+        {
+            GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
+            effectiveVal  = gtNewOperNode(GT_IND, asgType, addr);
+        }
+    }
+    else
+    {
+#ifdef FEATURE_SIMD
+        if (varTypeIsSIMD(asgType))
+        {
+            if (effectiveVal->OperIsIndir())
+            {
+                GenTree* addr = effectiveVal->AsIndir()->Addr();
+                if (!isDest && (addr->OperGet() == GT_ADDR))
+                {
+                    if ((addr->gtGetOp1()->gtOper == GT_SIMD) || (addr->gtGetOp1()->OperGet() == GT_LCL_VAR))
+                    {
+                        effectiveVal = addr->gtGetOp1();
+                    }
+                }
+                else if (isDest && !effectiveVal->OperIsBlk())
+                {
+                    effectiveVal = new (this, GT_BLK) GenTreeBlk(GT_BLK, asgType, addr, blockWidth);
+                }
+            }
+            else if (!effectiveVal->OperIsSIMD() && (!effectiveVal->IsLocal() || isDest) && !effectiveVal->OperIsBlk())
+            {
+                GenTree* addr = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
+                effectiveVal  = new (this, GT_BLK) GenTreeBlk(GT_BLK, asgType, addr, blockWidth);
+            }
+        }
+        else
+#endif // FEATURE_SIMD
+            if (!effectiveVal->OperIsBlk())
+        {
+            GenTree*             addr   = gtNewOperNode(GT_ADDR, TYP_BYREF, effectiveVal);
+            CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleIfPresent(effectiveVal);
+            GenTree*             newTree;
+            if (clsHnd == NO_CLASS_HANDLE)
+            {
+                newTree = new (this, GT_BLK) GenTreeBlk(GT_BLK, TYP_STRUCT, addr, blockWidth);
+            }
+            else
+            {
+                newTree = gtNewObjNode(clsHnd, addr);
+                if (isDest && (newTree->OperGet() == GT_OBJ))
+                {
+                    gtSetObjGcInfo(newTree->AsObj());
+                }
+                if (effectiveVal->IsLocal() && ((effectiveVal->gtFlags & GTF_GLOB_EFFECT) == 0))
+                {
+                    // This is not necessarily a global reference, though gtNewObjNode always assumes it is.
+                    // TODO-1stClassStructs: This check should be done in the GenTreeObj constructor,
+                    // where it currently sets GTF_GLOB_EFFECT unconditionally, but it is handled
+                    // separately now to avoid excess diffs.
+                    newTree->gtFlags &= ~(GTF_GLOB_EFFECT);
+                }
+            }
+            effectiveVal = newTree;
+        }
+    }
+    if (!isDest && effectiveVal->OperIsBlk())
+    {
+        (void)fgMorphBlkToInd(effectiveVal->AsBlk(), asgType);
+    }
+    tree = effectiveVal;
+    return tree;
+}
+
+//------------------------------------------------------------------------
+// fgMorphCopyBlock: Perform the Morphing of block copy
+//
+// Arguments:
+//    tree - a block copy (i.e. an assignment with a block op on the lhs).
+//
+// Return Value:
+//    We can return the orginal block copy unmodified (least desirable, but always correct)
+//    We can return a single assignment, when fgMorphOneAsgBlockOp transforms it (most desirable).
+//    If we have performed struct promotion of the Source() or the Dest() then we will try to
+//    perform a field by field assignment for each of the promoted struct fields.
+//
+// Assumptions:
+//    The child nodes for tree have already been Morphed.
+//
+// Notes:
+//    If we leave it as a block copy we will call lvaSetVarDoNotEnregister() on both Source() and Dest().
+//    When performing a field by field assignment we can have one of Source() or Dest treated as a blob of bytes
+//    and in such cases we will call lvaSetVarDoNotEnregister() on the one treated as a blob of bytes.
+//    if the Source() or Dest() is a a struct that has a "CustomLayout" and "ConstainsHoles" then we
+//    can not use a field by field assignment and must the orginal block copy unmodified.
+
+GenTreePtr Compiler::fgMorphCopyBlock(GenTreePtr tree)
+{
+    noway_assert(tree->OperIsCopyBlkOp());
+
+    JITDUMP("\nfgMorphCopyBlock:");
+
+    bool isLateArg = (tree->gtFlags & GTF_LATE_ARG) != 0;
+
+    GenTree* asg  = tree;
+    GenTree* rhs  = asg->gtGetOp2();
+    GenTree* dest = asg->gtGetOp1();
+
+#if FEATURE_MULTIREG_RET
+    // If this is a multi-reg return, we will not do any morphing of this node.
+    if (rhs->IsMultiRegCall())
+    {
+        assert(dest->OperGet() == GT_LCL_VAR);
+        JITDUMP(" not morphing a multireg call return\n");
+        return tree;
+    }
+#endif // FEATURE_MULTIREG_RET
+
+    // If we have an array index on the lhs, we need to create an obj node.
+
+    dest = fgMorphBlkNode(dest, true);
+    if (dest != asg->gtGetOp1())
+    {
+        asg->gtOp.gtOp1 = dest;
+        if (dest->IsLocal())
+        {
+            dest->gtFlags |= GTF_VAR_DEF;
+        }
+    }
+    asg->gtType = dest->TypeGet();
+    rhs         = fgMorphBlkNode(rhs, false);
+
+    asg->gtOp.gtOp2 = rhs;
+
+    GenTreePtr oldTree    = tree;
+    GenTreePtr oneAsgTree = fgMorphOneAsgBlockOp(tree);
+
+    if (oneAsgTree)
+    {
+        JITDUMP(" using oneAsgTree.\n");
+        tree = oneAsgTree;
+    }
+    else
+    {
+        unsigned             blockWidth;
+        bool                 blockWidthIsConst = false;
+        GenTreeLclVarCommon* lclVarTree        = nullptr;
+        GenTreeLclVarCommon* srcLclVarTree     = nullptr;
+        unsigned             destLclNum        = BAD_VAR_NUM;
+        LclVarDsc*           destLclVar        = nullptr;
+        FieldSeqNode*        destFldSeq        = nullptr;
+        bool                 destDoFldAsg      = false;
+        GenTreePtr           destAddr          = nullptr;
+        GenTreePtr           srcAddr           = nullptr;
+        bool                 destOnStack       = false;
+        bool                 hasGCPtrs         = false;
+
+        JITDUMP("block assignment to morph:\n");
+        DISPTREE(asg);
+
+        if (dest->IsLocal())
+        {
+            blockWidthIsConst = true;
+            destOnStack       = true;
+            if (dest->gtOper == GT_LCL_VAR)
+            {
+                lclVarTree = dest->AsLclVarCommon();
+                destLclNum = lclVarTree->gtLclNum;
+                destLclVar = &lvaTable[destLclNum];
+                if (destLclVar->lvType == TYP_STRUCT)
+                {
+                    // It would be nice if lvExactSize always corresponded to the size of the struct,
+                    // but it doesn't always for the temps that the importer creates when it spills side
+                    // effects.
+                    // TODO-Cleanup: Determine when this happens, and whether it can be changed.
+                    blockWidth = info.compCompHnd->getClassSize(destLclVar->lvVerTypeInfo.GetClassHandle());
+                }
+                else
+                {
+                    blockWidth = genTypeSize(destLclVar->lvType);
+                }
+                hasGCPtrs = destLclVar->lvStructGcCount != 0;
+            }
+            else
+            {
+                assert(dest->TypeGet() != TYP_STRUCT);
+                assert(dest->gtOper == GT_LCL_FLD);
+                blockWidth = genTypeSize(dest->TypeGet());
+                destAddr   = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
+            }
+        }
+        else
+        {
+            GenTree* effectiveDest = dest->gtEffectiveVal();
+            if (effectiveDest->OperGet() == GT_IND)
+            {
+                assert(dest->TypeGet() != TYP_STRUCT);
+                blockWidth        = genTypeSize(effectiveDest->TypeGet());
+                blockWidthIsConst = true;
+                if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0))
+                {
+                    destAddr = dest->gtGetOp1();
+                }
+            }
+            else
+            {
+                assert(effectiveDest->OperIsBlk());
+                GenTreeBlk* blk = effectiveDest->AsBlk();
+
+                blockWidth        = blk->gtBlkSize;
+                blockWidthIsConst = (blk->gtOper != GT_DYN_BLK);
+                if ((dest == effectiveDest) && ((dest->gtFlags & GTF_IND_ARR_INDEX) == 0))
+                {
+                    destAddr = blk->Addr();
+                }
+            }
+            if (destAddr != nullptr)
+            {
+                noway_assert(destAddr->TypeGet() == TYP_BYREF || destAddr->TypeGet() == TYP_I_IMPL);
+                if (destAddr->IsLocalAddrExpr(this, &lclVarTree, &destFldSeq))
+                {
+                    destOnStack = true;
+                    destLclNum  = lclVarTree->gtLclNum;
+                    destLclVar  = &lvaTable[destLclNum];
+                }
+            }
+        }
+
+        if (destLclVar != nullptr)
+        {
+#if LOCAL_ASSERTION_PROP
+            // Kill everything about destLclNum (and its field locals)
+            if (optLocalAssertionProp)
+            {
+                if (optAssertionCount > 0)
+                {
+                    fgKillDependentAssertions(destLclNum DEBUGARG(tree));
+                }
+            }
+#endif // LOCAL_ASSERTION_PROP
+
+            if (destLclVar->lvPromoted && blockWidthIsConst)
+            {
+                noway_assert(varTypeIsStruct(destLclVar));
+                noway_assert(!opts.MinOpts());
+
+                if (blockWidth == destLclVar->lvExactSize)
+                {
+                    JITDUMP(" (destDoFldAsg=true)");
+                    // We may decide later that a copyblk is required when this struct has holes
+                    destDoFldAsg = true;
+                }
+                else
+                {
+                    JITDUMP(" with mismatched dest size");
+                }
+            }
+        }
+
+        FieldSeqNode* srcFldSeq   = nullptr;
+        unsigned      srcLclNum   = BAD_VAR_NUM;
+        LclVarDsc*    srcLclVar   = nullptr;
+        bool          srcDoFldAsg = false;
+
+        if (rhs->IsLocal())
+        {
+            srcLclVarTree = rhs->AsLclVarCommon();
+            srcLclNum     = srcLclVarTree->gtLclNum;
+            if (rhs->OperGet() == GT_LCL_FLD)
+            {
+                srcFldSeq = rhs->AsLclFld()->gtFieldSeq;
+            }
+        }
+        else if (rhs->OperIsIndir())
+        {
+            if (rhs->gtOp.gtOp1->IsLocalAddrExpr(this, &srcLclVarTree, &srcFldSeq))
+            {
+                srcLclNum = srcLclVarTree->gtLclNum;
+            }
+            else
+            {
+                srcAddr = rhs->gtOp.gtOp1;
+            }
+        }
+
+        if (srcLclNum != BAD_VAR_NUM)
+        {
+            srcLclVar = &lvaTable[srcLclNum];
+
+            if (srcLclVar->lvPromoted && blockWidthIsConst)
+            {
+                noway_assert(varTypeIsStruct(srcLclVar));
+                noway_assert(!opts.MinOpts());
+
+                if (blockWidth == srcLclVar->lvExactSize)
+                {
+                    JITDUMP(" (srcDoFldAsg=true)");
+                    // We may decide later that a copyblk is required when this struct has holes
+                    srcDoFldAsg = true;
+                }
+                else
+                {
+                    JITDUMP(" with mismatched src size");
+                }
+            }
+        }
+
+        // Check to see if we are required to do a copy block because the struct contains holes
+        // and either the src or dest is externally visible
+        //
+        bool requiresCopyBlock  = false;
+        bool srcSingleLclVarAsg = false;
+
+        // If either src or dest is a reg-sized non-field-addressed struct, keep the copyBlock.
+        if ((destLclVar != nullptr && destLclVar->lvRegStruct) || (srcLclVar != nullptr && srcLclVar->lvRegStruct))
+        {
+            requiresCopyBlock = true;
+        }
+
+        // Can we use field by field assignment for the dest?
+        if (destDoFldAsg && destLclVar->lvCustomLayout && destLclVar->lvContainsHoles)
+        {
+            JITDUMP(" dest contains custom layout and contains holes");
+            // C++ style CopyBlock with holes
+            requiresCopyBlock = true;
+        }
+
+        // Can we use field by field assignment for the src?
+        if (srcDoFldAsg && srcLclVar->lvCustomLayout && srcLclVar->lvContainsHoles)
+        {
+            JITDUMP(" src contains custom layout and contains holes");
+            // C++ style CopyBlock with holes
+            requiresCopyBlock = true;
+        }
+
+        if (dest->OperGet() == GT_OBJ && dest->AsBlk()->gtBlkOpGcUnsafe)
+        {
+            requiresCopyBlock = true;
+        }
+
+        // Can't use field by field assignment if the src is a call.
+        if (rhs->OperGet() == GT_CALL)
+        {
+            JITDUMP(" src is a call");
+            // C++ style CopyBlock with holes
+            requiresCopyBlock = true;
+        }
+
+        // If we passed the above checks, then we will check these two
+        if (!requiresCopyBlock)
+        {
+            // Are both dest and src promoted structs?
+            if (destDoFldAsg && srcDoFldAsg)
+            {
+                // Both structs should be of the same type, if not we will use a copy block
+                if (lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle() !=
+                    lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle())
+                {
+                    requiresCopyBlock = true; // Mismatched types, leave as a CopyBlock
+                    JITDUMP(" with mismatched types");
+                }
+            }
+            // Are neither dest or src promoted structs?
+            else if (!destDoFldAsg && !srcDoFldAsg)
+            {
+                requiresCopyBlock = true; // Leave as a CopyBlock
+                JITDUMP(" with no promoted structs");
+            }
+            else if (destDoFldAsg)
+            {
+                // Match the following kinds of trees:
+                //  fgMorphTree BB01, stmt 9 (before)
+                //   [000052] ------------        const     int    8
+                //   [000053] -A--G-------     copyBlk   void
+                //   [000051] ------------           addr      byref
+                //   [000050] ------------              lclVar    long   V07 loc5
+                //   [000054] --------R---        <list>    void
+                //   [000049] ------------           addr      byref
+                //   [000048] ------------              lclVar    struct(P) V06 loc4
+                //                                              long   V06.h (offs=0x00) -> V17 tmp9
+                // Yields this transformation
+                //  fgMorphCopyBlock (after):
+                //   [000050] ------------        lclVar    long   V07 loc5
+                //   [000085] -A----------     =         long
+                //   [000083] D------N----        lclVar    long   V17 tmp9
+                //
+                if (blockWidthIsConst && (destLclVar->lvFieldCnt == 1) && (srcLclVar != nullptr) &&
+                    (blockWidth == genTypeSize(srcLclVar->TypeGet())))
+                {
+                    // Reject the following tree:
+                    //  - seen on x86chk    jit\jit64\hfa\main\hfa_sf3E_r.exe
+                    //
+                    //  fgMorphTree BB01, stmt 6 (before)
+                    //   [000038] -------------        const     int    4
+                    //   [000039] -A--G--------     copyBlk   void
+                    //   [000037] -------------           addr      byref
+                    //   [000036] -------------              lclVar    int    V05 loc3
+                    //   [000040] --------R----        <list>    void
+                    //   [000035] -------------           addr      byref
+                    //   [000034] -------------              lclVar    struct(P) V04 loc2
+                    //                                          float  V04.f1 (offs=0x00) -> V13 tmp6
+                    // As this would framsform into
+                    //   float V13 = int V05
+                    //
+                    unsigned  fieldLclNum = lvaTable[destLclNum].lvFieldLclStart;
+                    var_types destType    = lvaTable[fieldLclNum].TypeGet();
+                    if (srcLclVar->TypeGet() == destType)
+                    {
+                        srcSingleLclVarAsg = true;
+                    }
+                }
+            }
+        }
+
+        // If we require a copy block the set both of the field assign bools to false
+        if (requiresCopyBlock)
+        {
+            // If a copy block is required then we won't do field by field assignments
+            destDoFldAsg = false;
+            srcDoFldAsg  = false;
+        }
+
+        JITDUMP(requiresCopyBlock ? " this requires a CopyBlock.\n" : " using field by field assignments.\n");
+
+        // Mark the dest/src structs as DoNotEnreg
+        // when they are not reg-sized non-field-addressed structs and we are using a CopyBlock
+        // or the struct is not promoted
+        //
+        if (!destDoFldAsg && (destLclVar != nullptr))
+        {
+            if (!destLclVar->lvRegStruct)
+            {
+                // Mark it as DoNotEnregister.
+                lvaSetVarDoNotEnregister(destLclNum DEBUGARG(DNER_BlockOp));
+            }
+        }
+
+        if (!srcDoFldAsg && (srcLclVar != nullptr) && !srcSingleLclVarAsg)
+        {
+            if (!srcLclVar->lvRegStruct)
+            {
+                lvaSetVarDoNotEnregister(srcLclNum DEBUGARG(DNER_BlockOp));
+            }
+        }
+
+        if (requiresCopyBlock)
+        {
+#if CPU_USES_BLOCK_MOVE
+            compBlkOpUsed = true;
+#endif
+            var_types asgType = dest->TypeGet();
+            dest              = fgMorphBlockOperand(dest, asgType, blockWidth, true /*isDest*/);
+            asg->gtOp.gtOp1   = dest;
+            hasGCPtrs         = ((dest->OperGet() == GT_OBJ) && (dest->AsObj()->gtGcPtrCount != 0));
+
+#ifdef CPBLK_UNROLL_LIMIT
+            // Note that the unrolling of CopyBlk is only implemented on some platforms.
+            // Currently that includes x64 and Arm64 but not x64 or Arm32.
+
+            // If we have a CopyObj with a dest on the stack
+            // we will convert it into an GC Unsafe CopyBlk that is non-interruptible
+            // when its size is small enouch to be completely unrolled (i.e. between [16..64] bytes)
+            //
+            if (hasGCPtrs && destOnStack && blockWidthIsConst && (blockWidth >= (2 * TARGET_POINTER_SIZE)) &&
+                (blockWidth <= CPBLK_UNROLL_LIMIT))
+            {
+                if (dest->OperGet() == GT_OBJ)
+                {
+                    dest->SetOper(GT_BLK);
+                    dest->AsBlk()->gtBlkOpGcUnsafe = true; // Mark as a GC unsafe copy block
+                }
+                else
+                {
+                    assert(dest->OperIsLocal());
+                    GenTree* destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
+                    dest              = new (this, GT_BLK) GenTreeBlk(GT_BLK, dest->TypeGet(), destAddr, blockWidth);
+                    dest->AsBlk()->gtBlkOpGcUnsafe = true; // Mark as a GC unsafe copy block
+                    tree->gtOp.gtOp1               = dest;
+                }
+            }
+#endif
+            // Eliminate the "OBJ or BLK" node on the rhs.
+            rhs             = fgMorphBlockOperand(rhs, asgType, blockWidth, false /*!isDest*/);
+            asg->gtOp.gtOp2 = rhs;
+
+#ifdef LEGACY_BACKEND
+            if (!rhs->OperIsIndir())
+            {
+                noway_assert(rhs->gtOper == GT_LCL_VAR);
+                GenTree* rhsAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, rhs);
+                rhs              = gtNewOperNode(GT_IND, TYP_STRUCT, rhsAddr);
+            }
+#endif // LEGACY_BACKEND
+            // Formerly, liveness did not consider copyblk arguments of simple types as being
+            // a use or def, so these variables were marked as address-exposed.
+            // TODO-1stClassStructs: This should no longer be needed.
+            if (srcLclNum != BAD_VAR_NUM && !varTypeIsStruct(srcLclVar))
+            {
+                JITDUMP("Non-struct copyBlk src V%02d is addr exposed\n", srcLclNum);
+                lvaTable[srcLclNum].lvAddrExposed = true;
+            }
+
+            if (destLclNum != BAD_VAR_NUM && !varTypeIsStruct(destLclVar))
+            {
+                JITDUMP("Non-struct copyBlk dest V%02d is addr exposed\n", destLclNum);
+                lvaTable[destLclNum].lvAddrExposed = true;
+            }
+
+            goto _Done;
+        }
+
+        //
+        // Otherwise we convert this CopyBlock into individual field by field assignments
+        //
+        tree = nullptr;
+
+        GenTreePtr src;
+        GenTreePtr addrSpill            = nullptr;
+        unsigned   addrSpillTemp        = BAD_VAR_NUM;
+        bool       addrSpillIsStackDest = false; // true if 'addrSpill' represents the address in our local stack frame
+
+        unsigned fieldCnt = DUMMY_INIT(0);
+
+        if (destDoFldAsg && srcDoFldAsg)
+        {
+            // To do fieldwise assignments for both sides, they'd better be the same struct type!
+            // All of these conditions were checked above...
+            assert(destLclNum != BAD_VAR_NUM && srcLclNum != BAD_VAR_NUM);
+            assert(lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle() ==
+                   lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle());
+            assert(destLclVar != nullptr && srcLclVar != nullptr && destLclVar->lvFieldCnt == srcLclVar->lvFieldCnt);
+
+            fieldCnt = destLclVar->lvFieldCnt;
+            goto _AssignFields; // No need to spill the address to the temp. Go ahead to morph it into field
+                                // assignments.
+        }
+        else if (destDoFldAsg)
+        {
+            fieldCnt = destLclVar->lvFieldCnt;
+            rhs      = fgMorphBlockOperand(rhs, TYP_STRUCT, blockWidth, false /*isDest*/);
+            if (srcAddr == nullptr)
+            {
+                srcAddr = fgMorphGetStructAddr(&rhs, destLclVar->lvVerTypeInfo.GetClassHandle(), true /* rValue */);
+            }
+        }
+        else
+        {
+            assert(srcDoFldAsg);
+            fieldCnt = srcLclVar->lvFieldCnt;
+            dest     = fgMorphBlockOperand(dest, TYP_STRUCT, blockWidth, true /*isDest*/);
+            if (dest->OperIsBlk())
+            {
+                (void)fgMorphBlkToInd(dest->AsBlk(), TYP_STRUCT);
+            }
+            destAddr = gtNewOperNode(GT_ADDR, TYP_BYREF, dest);
+        }
+
+        if (destDoFldAsg)
+        {
+            noway_assert(!srcDoFldAsg);
+            if (gtClone(srcAddr))
+            {
+                // srcAddr is simple expression. No need to spill.
+                noway_assert((srcAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0);
+            }
+            else
+            {
+                // srcAddr is complex expression. Clone and spill it (unless the destination is
+                // a struct local that only has one field, in which case we'd only use the
+                // address value once...)
+                if (destLclVar->lvFieldCnt > 1)
+                {
+                    addrSpill = gtCloneExpr(srcAddr); // addrSpill represents the 'srcAddr'
+                    noway_assert(addrSpill != nullptr);
+                }
+            }
+        }
+
+        if (srcDoFldAsg)
+        {
+            noway_assert(!destDoFldAsg);
+
+            // If we're doing field-wise stores, to an address within a local, and we copy
+            // the address into "addrSpill", do *not* declare the original local var node in the
+            // field address as GTF_VAR_DEF and GTF_VAR_USEASG; we will declare each of the
+            // field-wise assignments as an "indirect" assignment to the local.
+            // ("lclVarTree" is a subtree of "destAddr"; make sure we remove the flags before
+            // we clone it.)
+            if (lclVarTree != nullptr)
+            {
+                lclVarTree->gtFlags &= ~(GTF_VAR_DEF | GTF_VAR_USEASG);
+            }
+
+            if (gtClone(destAddr))
+            {
+                // destAddr is simple expression. No need to spill
+                noway_assert((destAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0);
+            }
+            else
+            {
+                // destAddr is complex expression. Clone and spill it (unless
+                // the source is a struct local that only has one field, in which case we'd only
+                // use the address value once...)
+                if (srcLclVar->lvFieldCnt > 1)
+                {
+                    addrSpill = gtCloneExpr(destAddr); // addrSpill represents the 'destAddr'
+                    noway_assert(addrSpill != nullptr);
+                }
+
+                // TODO-CQ: this should be based on a more general
+                // "BaseAddress" method, that handles fields of structs, before or after
+                // morphing.
+                if (addrSpill != nullptr && addrSpill->OperGet() == GT_ADDR)
+                {
+                    if (addrSpill->gtOp.gtOp1->IsLocal())
+                    {
+                        // We will *not* consider this to define the local, but rather have each individual field assign
+                        // be a definition.
+                        addrSpill->gtOp.gtOp1->gtFlags &= ~(GTF_LIVENESS_MASK);
+                        assert(lvaGetPromotionType(addrSpill->gtOp.gtOp1->gtLclVarCommon.gtLclNum) !=
+                               PROMOTION_TYPE_INDEPENDENT);
+                        addrSpillIsStackDest = true; // addrSpill represents the address of LclVar[varNum] in our
+                                                     // local stack frame
+                    }
+                }
+            }
+        }
+
+        if (addrSpill != nullptr)
+        {
+            // Spill the (complex) address to a BYREF temp.
+            // Note, at most one address may need to be spilled.
+            addrSpillTemp = lvaGrabTemp(true DEBUGARG("BlockOp address local"));
+
+            lvaTable[addrSpillTemp].lvType = TYP_BYREF;
+
+            if (addrSpillIsStackDest)
+            {
+                lvaTable[addrSpillTemp].lvStackByref = true;
+            }
+
+            tree = gtNewAssignNode(gtNewLclvNode(addrSpillTemp, TYP_BYREF), addrSpill);
+
+#ifndef LEGACY_BACKEND
+            // If we are assigning the address of a LclVar here
+            // liveness does not account for this kind of address taken use.
+            //
+            // We have to mark this local as address exposed so
+            // that we don't delete the definition for this LclVar
+            // as a dead store later on.
+            //
+            if (addrSpill->OperGet() == GT_ADDR)
+            {
+                GenTreePtr addrOp = addrSpill->gtOp.gtOp1;
+                if (addrOp->IsLocal())
+                {
+                    unsigned lclVarNum                = addrOp->gtLclVarCommon.gtLclNum;
+                    lvaTable[lclVarNum].lvAddrExposed = true;
+                    lvaSetVarDoNotEnregister(lclVarNum DEBUGARG(DNER_AddrExposed));
+                }
+            }
+#endif // !LEGACY_BACKEND
+        }
+
+    _AssignFields:
+
+        for (unsigned i = 0; i < fieldCnt; ++i)
+        {
+            FieldSeqNode* curFieldSeq = nullptr;
+            if (destDoFldAsg)
+            {
+                noway_assert(destLclNum != BAD_VAR_NUM);
+                unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i;
+                dest                 = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
+                // If it had been labeled a "USEASG", assignments to the the individual promoted fields are not.
+                if (destAddr != nullptr)
+                {
+                    noway_assert(destAddr->gtOp.gtOp1->gtOper == GT_LCL_VAR);
+                    dest->gtFlags |= destAddr->gtOp.gtOp1->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG);
+                }
+                else
+                {
+                    noway_assert(lclVarTree != nullptr);
+                    dest->gtFlags |= lclVarTree->gtFlags & ~(GTF_NODE_MASK | GTF_VAR_USEASG);
+                }
+                // Don't CSE the lhs of an assignment.
+                dest->gtFlags |= GTF_DONT_CSE;
+            }
+            else
+            {
+                noway_assert(srcDoFldAsg);
+                noway_assert(srcLclNum != BAD_VAR_NUM);
+                unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i;
+
+                if (addrSpill)
+                {
+                    assert(addrSpillTemp != BAD_VAR_NUM);
+                    dest = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
+                }
+                else
+                {
+                    dest = gtCloneExpr(destAddr);
+                    noway_assert(dest != nullptr);
+
+                    // Is the address of a local?
+                    GenTreeLclVarCommon* lclVarTree = nullptr;
+                    bool                 isEntire   = false;
+                    bool*                pIsEntire  = (blockWidthIsConst ? &isEntire : nullptr);
+                    if (dest->DefinesLocalAddr(this, blockWidth, &lclVarTree, pIsEntire))
+                    {
+                        lclVarTree->gtFlags |= GTF_VAR_DEF;
+                        if (!isEntire)
+                        {
+                            lclVarTree->gtFlags |= GTF_VAR_USEASG;
+                        }
+                    }
+                }
+
+                GenTreePtr fieldOffsetNode = gtNewIconNode(lvaTable[fieldLclNum].lvFldOffset, TYP_I_IMPL);
+                // Have to set the field sequence -- which means we need the field handle.
+                CORINFO_CLASS_HANDLE classHnd = lvaTable[srcLclNum].lvVerTypeInfo.GetClassHandle();
+                CORINFO_FIELD_HANDLE fieldHnd =
+                    info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
+                curFieldSeq                          = GetFieldSeqStore()->CreateSingleton(fieldHnd);
+                fieldOffsetNode->gtIntCon.gtFieldSeq = curFieldSeq;
+
+                dest = gtNewOperNode(GT_ADD, TYP_BYREF, dest, fieldOffsetNode);
+
+                dest = gtNewOperNode(GT_IND, lvaTable[fieldLclNum].TypeGet(), dest);
+
+                // !!! The destination could be on stack. !!!
+                // This flag will let us choose the correct write barrier.
+                dest->gtFlags |= GTF_IND_TGTANYWHERE;
+            }
+
+            if (srcDoFldAsg)
+            {
+                noway_assert(srcLclNum != BAD_VAR_NUM);
+                unsigned fieldLclNum = lvaTable[srcLclNum].lvFieldLclStart + i;
+                src                  = gtNewLclvNode(fieldLclNum, lvaTable[fieldLclNum].TypeGet());
+
+                noway_assert(srcLclVarTree != nullptr);
+                src->gtFlags |= srcLclVarTree->gtFlags & ~GTF_NODE_MASK;
+                // TODO-1stClassStructs: These should not need to be marked GTF_DONT_CSE,
+                // but they are when they are under a GT_ADDR.
+                src->gtFlags |= GTF_DONT_CSE;
+            }
+            else
+            {
+                noway_assert(destDoFldAsg);
+                noway_assert(destLclNum != BAD_VAR_NUM);
+                unsigned fieldLclNum = lvaTable[destLclNum].lvFieldLclStart + i;
+
+                if (srcSingleLclVarAsg)
+                {
+                    noway_assert(fieldCnt == 1);
+                    noway_assert(srcLclVar != nullptr);
+                    noway_assert(addrSpill == nullptr);
+
+                    src = gtNewLclvNode(srcLclNum, srcLclVar->TypeGet());
+                }
+                else
+                {
+                    if (addrSpill)
+                    {
+                        assert(addrSpillTemp != BAD_VAR_NUM);
+                        src = gtNewLclvNode(addrSpillTemp, TYP_BYREF);
+                    }
+                    else
+                    {
+                        src = gtCloneExpr(srcAddr);
+                        noway_assert(src != nullptr);
+                    }
+
+                    CORINFO_CLASS_HANDLE classHnd = lvaTable[destLclNum].lvVerTypeInfo.GetClassHandle();
+                    CORINFO_FIELD_HANDLE fieldHnd =
+                        info.compCompHnd->getFieldInClass(classHnd, lvaTable[fieldLclNum].lvFldOrdinal);
+                    curFieldSeq = GetFieldSeqStore()->CreateSingleton(fieldHnd);
+
+                    src = gtNewOperNode(GT_ADD, TYP_BYREF, src,
+                                        new (this, GT_CNS_INT)
+                                            GenTreeIntCon(TYP_I_IMPL, lvaTable[fieldLclNum].lvFldOffset, curFieldSeq));
+
+                    src = gtNewOperNode(GT_IND, lvaTable[fieldLclNum].TypeGet(), src);
+                }
+            }
+
+            noway_assert(dest->TypeGet() == src->TypeGet());
+
+            asg = gtNewAssignNode(dest, src);
+
+            // If we spilled the address, and we didn't do individual field assignments to promoted fields,
+            // and it was of a local, record the assignment as an indirect update of a local.
+            if (addrSpill && !destDoFldAsg && destLclNum != BAD_VAR_NUM)
+            {
+                curFieldSeq   = GetFieldSeqStore()->Append(destFldSeq, curFieldSeq);
+                bool isEntire = (genTypeSize(var_types(lvaTable[destLclNum].lvType)) == genTypeSize(dest->TypeGet()));
+                IndirectAssignmentAnnotation* pIndirAnnot =
+                    new (this, CMK_Unknown) IndirectAssignmentAnnotation(destLclNum, curFieldSeq, isEntire);
+                GetIndirAssignMap()->Set(asg, pIndirAnnot);
+            }
+
+#if LOCAL_ASSERTION_PROP
+            if (optLocalAssertionProp)
+            {
+                optAssertionGen(asg);
+            }
+#endif // LOCAL_ASSERTION_PROP
+
+            if (tree)
+            {
+                tree = gtNewOperNode(GT_COMMA, TYP_VOID, tree, asg);
+            }
+            else
+            {
+                tree = asg;
+            }
+        }
+    }
+
+    if (isLateArg)
+    {
+        tree->gtFlags |= GTF_LATE_ARG;
+    }
+
+#ifdef DEBUG
+    if (tree != oldTree)
+    {
+        tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+    }
+
+    if (verbose)
+    {
+        printf("\nfgMorphCopyBlock (after):\n");
+        gtDispTree(tree);
+    }
+#endif
+
+_Done:
+    return tree;
+}
+
+// insert conversions and normalize to make tree amenable to register
+// FP architectures
+GenTree* Compiler::fgMorphForRegisterFP(GenTree* tree)
+{
+    GenTreePtr op1 = tree->gtOp.gtOp1;
+    GenTreePtr op2 = tree->gtGetOp2();
+
+    if (tree->OperIsArithmetic() && varTypeIsFloating(tree))
+    {
+        if (op1->TypeGet() != tree->TypeGet())
+        {
+            tree->gtOp.gtOp1 = gtNewCastNode(tree->TypeGet(), tree->gtOp.gtOp1, tree->TypeGet());
+        }
+        if (op2->TypeGet() != tree->TypeGet())
+        {
+            tree->gtOp.gtOp2 = gtNewCastNode(tree->TypeGet(), tree->gtOp.gtOp2, tree->TypeGet());
+        }
+    }
+    else if (tree->OperIsCompare() && varTypeIsFloating(op1) && op1->TypeGet() != op2->TypeGet())
+    {
+        // both had better be floating, just one bigger than other
+        assert(varTypeIsFloating(op2));
+        if (op1->TypeGet() == TYP_FLOAT)
+        {
+            tree->gtOp.gtOp1 = gtNewCastNode(TYP_DOUBLE, tree->gtOp.gtOp1, TYP_DOUBLE);
+        }
+        else if (op2->TypeGet() == TYP_FLOAT)
+        {
+            tree->gtOp.gtOp2 = gtNewCastNode(TYP_DOUBLE, tree->gtOp.gtOp2, TYP_DOUBLE);
+        }
+    }
+
+    return tree;
+}
+
+GenTree* Compiler::fgMorphRecognizeBoxNullable(GenTree* compare)
+{
+    GenTree*     op1 = compare->gtOp.gtOp1;
+    GenTree*     op2 = compare->gtOp.gtOp2;
+    GenTree*     opCns;
+    GenTreeCall* opCall;
+
+    // recognize this pattern:
+    //
+    // stmtExpr  void  (IL 0x000...  ???)
+    //     return    int
+    //             const     ref    null
+    //         ==        int
+    //             call help ref    HELPER.CORINFO_HELP_BOX_NULLABLE
+    //                 const(h)  long   0x7fed96836c8 class
+    //                 addr      byref
+    //                     ld.lclVar struct V00 arg0
+    //
+    //
+    // which comes from this code (reported by customer as being slow) :
+    //
+    // private static bool IsNull<T>(T arg)
+    // {
+    //    return arg==null;
+    // }
+    //
+
+    if (op1->IsCnsIntOrI() && op2->IsHelperCall())
+    {
+        opCns  = op1;
+        opCall = op2->AsCall();
+    }
+    else if (op1->IsHelperCall() && op2->IsCnsIntOrI())
+    {
+        opCns  = op2;
+        opCall = op1->AsCall();
+    }
+    else
+    {
+        return compare;
+    }
+
+    if (!opCns->IsIntegralConst(0))
+    {
+        return compare;
+    }
+
+    if (eeGetHelperNum(opCall->gtCallMethHnd) != CORINFO_HELP_BOX_NULLABLE)
+    {
+        return compare;
+    }
+
+    // replace the box with an access of the nullable 'hasValue' field which is at the zero offset
+    GenTree* newOp = gtNewOperNode(GT_IND, TYP_BOOL, opCall->gtCall.gtCallArgs->gtOp.gtOp2->gtOp.gtOp1);
+
+    if (opCall == op1)
+    {
+        compare->gtOp.gtOp1 = newOp;
+    }
+    else
+    {
+        compare->gtOp.gtOp2 = newOp;
+    }
+
+    return compare;
+}
+
+#ifdef FEATURE_SIMD
+
+//--------------------------------------------------------------------------------------
+// fgCopySIMDNode: make a copy of a SIMD intrinsic node, e.g. so that a field can be accessed.
+//
+// Arguments:
+//    simdNode  - The GenTreeSIMD node to be copied
+//
+// Return Value:
+//    A comma node where op1 is the assignment of the simd node to a temp, and op2 is the temp lclVar.
+//
+GenTree* Compiler::fgCopySIMDNode(GenTreeSIMD* simdNode)
+{
+    // Copy the result of the SIMD intrinsic into a temp.
+    unsigned lclNum = lvaGrabTemp(true DEBUGARG("Copy of SIMD intrinsic with field access"));
+
+    CORINFO_CLASS_HANDLE simdHandle = NO_CLASS_HANDLE;
+    // We only have fields of the fixed float vectors.
+    noway_assert(simdNode->gtSIMDBaseType == TYP_FLOAT);
+    switch (simdNode->gtSIMDSize)
+    {
+        case 8:
+            simdHandle = SIMDVector2Handle;
+            break;
+        case 12:
+            simdHandle = SIMDVector3Handle;
+            break;
+        case 16:
+            simdHandle = SIMDVector4Handle;
+            break;
+        default:
+            noway_assert(!"field of unexpected SIMD type");
+            break;
+    }
+    assert(simdHandle != NO_CLASS_HANDLE);
+
+    lvaSetStruct(lclNum, simdHandle, false, true);
+    lvaTable[lclNum].lvFieldAccessed = true;
+
+    GenTree* asg           = gtNewTempAssign(lclNum, simdNode);
+    GenTree* newLclVarNode = new (this, GT_LCL_VAR) GenTreeLclVar(simdNode->TypeGet(), lclNum, BAD_IL_OFFSET);
+
+    GenTree* comma = gtNewOperNode(GT_COMMA, simdNode->TypeGet(), asg, newLclVarNode);
+    return comma;
+}
+
+//--------------------------------------------------------------------------------------------------------------
+// getSIMDStructFromField:
+//   Checking whether the field belongs to a simd struct or not. If it is, return the GenTreePtr for
+//   the struct node, also base type, field index and simd size. If it is not, just return  nullptr.
+//   Usually if the tree node is from a simd lclvar which is not used in any SIMD intrinsic, then we
+//   should return nullptr, since in this case we should treat SIMD struct as a regular struct.
+//   However if no matter what, you just want get simd struct node, you can set the ignoreUsedInSIMDIntrinsic
+//   as true. Then there will be no IsUsedInSIMDIntrinsic checking, and it will return SIMD struct node
+//   if the struct is a SIMD struct.
+//
+// Arguments:
+//       tree - GentreePtr. This node will be checked to see this is a field which belongs to a simd
+//               struct used for simd intrinsic or not.
+//       pBaseTypeOut - var_types pointer, if the tree node is the tree we want, we set *pBaseTypeOut
+//                      to simd lclvar's base type.
+//       indexOut - unsigned pointer, if the tree is used for simd intrinsic, we will set *indexOut
+//                  equals to the index number of this field.
+//       simdSizeOut - unsigned pointer, if the tree is used for simd intrinsic, set the *simdSizeOut
+//                     equals to the simd struct size which this tree belongs to.
+//      ignoreUsedInSIMDIntrinsic - bool. If this is set to true, then this function will ignore
+//                                  the UsedInSIMDIntrinsic check.
+//
+// return value:
+//       A GenTreePtr which points the simd lclvar tree belongs to. If the tree is not the simd
+//       instrinic related field, return nullptr.
+//
+
+GenTreePtr Compiler::getSIMDStructFromField(GenTreePtr tree,
+                                            var_types* pBaseTypeOut,
+                                            unsigned*  indexOut,
+                                            unsigned*  simdSizeOut,
+                                            bool       ignoreUsedInSIMDIntrinsic /*false*/)
+{
+    GenTreePtr ret = nullptr;
+    if (tree->OperGet() == GT_FIELD)
+    {
+        GenTreePtr objRef = tree->gtField.gtFldObj;
+        if (objRef != nullptr)
+        {
+            GenTreePtr obj = nullptr;
+            if (objRef->gtOper == GT_ADDR)
+            {
+                obj = objRef->gtOp.gtOp1;
+            }
+            else if (ignoreUsedInSIMDIntrinsic)
+            {
+                obj = objRef;
+            }
+            else
+            {
+                return nullptr;
+            }
+
+            if (isSIMDTypeLocal(obj))
+            {
+                unsigned   lclNum = obj->gtLclVarCommon.gtLclNum;
+                LclVarDsc* varDsc = &lvaTable[lclNum];
+                if (varDsc->lvIsUsedInSIMDIntrinsic() || ignoreUsedInSIMDIntrinsic)
+                {
+                    *simdSizeOut  = varDsc->lvExactSize;
+                    *pBaseTypeOut = getBaseTypeOfSIMDLocal(obj);
+                    ret           = obj;
+                }
+            }
+            else if (obj->OperGet() == GT_SIMD)
+            {
+                ret                   = obj;
+                GenTreeSIMD* simdNode = obj->AsSIMD();
+                *simdSizeOut          = simdNode->gtSIMDSize;
+                *pBaseTypeOut         = simdNode->gtSIMDBaseType;
+            }
+        }
+    }
+    if (ret != nullptr)
+    {
+        unsigned BaseTypeSize = genTypeSize(*pBaseTypeOut);
+        *indexOut             = tree->gtField.gtFldOffset / BaseTypeSize;
+    }
+    return ret;
+}
+
+/*****************************************************************************
+*  If a read operation tries to access simd struct field, then transform the this
+*  operation to to the SIMD intrinsic SIMDIntrinsicGetItem, and return the new tree.
+*  Otherwise, return the old tree.
+*  Argument:
+*   tree - GenTreePtr. If this pointer points to simd struct which is used for simd
+*          intrinsic. We will morph it as simd intrinsic SIMDIntrinsicGetItem.
+*  Return:
+*   A GenTreePtr which points to the new tree. If the tree is not for simd intrinsic,
+*   return nullptr.
+*/
+
+GenTreePtr Compiler::fgMorphFieldToSIMDIntrinsicGet(GenTreePtr tree)
+{
+    unsigned   index          = 0;
+    var_types  baseType       = TYP_UNKNOWN;
+    unsigned   simdSize       = 0;
+    GenTreePtr simdStructNode = getSIMDStructFromField(tree, &baseType, &index, &simdSize);
+    if (simdStructNode != nullptr)
+    {
+
+        assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
+        GenTree* op2 = gtNewIconNode(index);
+        tree         = gtNewSIMDNode(baseType, simdStructNode, op2, SIMDIntrinsicGetItem, baseType, simdSize);
+#ifdef DEBUG
+        tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+    }
+    return tree;
+}
+
+/*****************************************************************************
+*  Transform an assignment of a SIMD struct field to SIMD intrinsic
+*  SIMDIntrinsicGetItem, and return a new tree. If If it is not such an assignment,
+*  then return the old tree.
+*  Argument:
+*   tree - GenTreePtr. If this pointer points to simd struct which is used for simd
+*          intrinsic. We will morph it as simd intrinsic set.
+*  Return:
+*   A GenTreePtr which points to the new tree. If the tree is not for simd intrinsic,
+*   return nullptr.
+*/
+
+GenTreePtr Compiler::fgMorphFieldAssignToSIMDIntrinsicSet(GenTreePtr tree)
+{
+    assert(tree->OperGet() == GT_ASG);
+    GenTreePtr op1 = tree->gtGetOp1();
+    GenTreePtr op2 = tree->gtGetOp2();
+
+    unsigned   index         = 0;
+    var_types  baseType      = TYP_UNKNOWN;
+    unsigned   simdSize      = 0;
+    GenTreePtr simdOp1Struct = getSIMDStructFromField(op1, &baseType, &index, &simdSize);
+    if (simdOp1Struct != nullptr)
+    {
+        // Generate the simd set intrinsic
+        assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
+
+        SIMDIntrinsicID simdIntrinsicID = SIMDIntrinsicInvalid;
+        switch (index)
+        {
+            case 0:
+                simdIntrinsicID = SIMDIntrinsicSetX;
+                break;
+            case 1:
+                simdIntrinsicID = SIMDIntrinsicSetY;
+                break;
+            case 2:
+                simdIntrinsicID = SIMDIntrinsicSetZ;
+                break;
+            case 3:
+                simdIntrinsicID = SIMDIntrinsicSetW;
+                break;
+            default:
+                noway_assert(!"There is no set intrinsic for index bigger than 3");
+        }
+
+        GenTreePtr target = gtClone(simdOp1Struct);
+        assert(target != nullptr);
+        GenTreePtr simdTree = gtNewSIMDNode(target->gtType, simdOp1Struct, op2, simdIntrinsicID, baseType, simdSize);
+        tree->gtOp.gtOp1    = target;
+        tree->gtOp.gtOp2    = simdTree;
+#ifdef DEBUG
+        tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+    }
+
+    return tree;
+}
+
+#endif
+/*****************************************************************************
+ *
+ *  Transform the given GTK_SMPOP tree for code generation.
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
+{
+    // this extra scope is a workaround for a gcc bug
+    // the inline destructor for ALLOCA_CHECK confuses the control
+    // flow and gcc thinks that the function never returns
+    {
+        ALLOCA_CHECK();
+        noway_assert(tree->OperKind() & GTK_SMPOP);
+
+        /* The steps in this function are :
+           o Perform required preorder processing
+           o Process the first, then second operand, if any
+           o Perform required postorder morphing
+           o Perform optional postorder morphing if optimizing
+         */
+
+        bool isQmarkColon = false;
+
+#if LOCAL_ASSERTION_PROP
+        AssertionIndex origAssertionCount = DUMMY_INIT(0);
+        AssertionDsc*  origAssertionTab   = DUMMY_INIT(NULL);
+
+        AssertionIndex thenAssertionCount = DUMMY_INIT(0);
+        AssertionDsc*  thenAssertionTab   = DUMMY_INIT(NULL);
+#endif
+
+        if (fgGlobalMorph)
+        {
+#if !FEATURE_STACK_FP_X87
+            tree = fgMorphForRegisterFP(tree);
+#endif
+        }
+
+        genTreeOps oper = tree->OperGet();
+        var_types  typ  = tree->TypeGet();
+        GenTreePtr op1  = tree->gtOp.gtOp1;
+        GenTreePtr op2  = tree->gtGetOp2();
+
+        /*-------------------------------------------------------------------------
+         * First do any PRE-ORDER processing
+         */
+
+        switch (oper)
+        {
+            // Some arithmetic operators need to use a helper call to the EE
+            int helper;
+
+            case GT_ASG:
+                tree = fgDoNormalizeOnStore(tree);
+                /* fgDoNormalizeOnStore can change op2 */
+                noway_assert(op1 == tree->gtOp.gtOp1);
+                op2 = tree->gtOp.gtOp2;
+                // TODO-1stClassStructs: this is here to match previous behavior, but results in some
+                // unnecessary pessimization in the handling of addresses in fgMorphCopyBlock().
+                if (tree->OperIsBlkOp())
+                {
+                    op1->gtFlags |= GTF_DONT_CSE;
+                    if (tree->OperIsCopyBlkOp() &&
+                        (op2->IsLocal() || (op2->OperIsIndir() && (op2->AsIndir()->Addr()->OperGet() == GT_ADDR))))
+                    {
+                        op2->gtFlags |= GTF_DONT_CSE;
+                    }
+                }
+
+#ifdef FEATURE_SIMD
+                {
+                    // We should check whether op2 should be assigned to a SIMD field or not.
+                    // If it is, we should tranlate the tree to simd intrinsic.
+                    assert((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0);
+                    GenTreePtr newTree = fgMorphFieldAssignToSIMDIntrinsicSet(tree);
+                    typ                = tree->TypeGet();
+                    op1                = tree->gtGetOp1();
+                    op2                = tree->gtGetOp2();
+#ifdef DEBUG
+                    assert((tree == newTree) && (tree->OperGet() == oper));
+                    if ((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) != 0)
+                    {
+                        tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
+                    }
+#endif // DEBUG
+                }
+#endif
+
+                __fallthrough;
+
+            case GT_ASG_ADD:
+            case GT_ASG_SUB:
+            case GT_ASG_MUL:
+            case GT_ASG_DIV:
+            case GT_ASG_MOD:
+            case GT_ASG_UDIV:
+            case GT_ASG_UMOD:
+            case GT_ASG_OR:
+            case GT_ASG_XOR:
+            case GT_ASG_AND:
+            case GT_ASG_LSH:
+            case GT_ASG_RSH:
+            case GT_ASG_RSZ:
+            case GT_CHS:
+
+                // We can't CSE the LHS of an assignment. Only r-values can be CSEed.
+                // Previously, the "lhs" (addr) of a block op was CSE'd.  So, to duplicate the former
+                // behavior, allow CSE'ing if is a struct type (or a TYP_REF transformed from a struct type)
+                // TODO-1stClassStructs: improve this.
+                if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT))
+                {
+                    op1->gtFlags |= GTF_DONT_CSE;
+                }
+                break;
+
+            case GT_ADDR:
+
+                /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */
+                op1->gtFlags |= GTF_DONT_CSE;
+                break;
+
+            case GT_QMARK:
+            case GT_JTRUE:
+
+                noway_assert(op1);
+
+                if (op1->OperKind() & GTK_RELOP)
+                {
+                    noway_assert((oper == GT_JTRUE) || (op1->gtFlags & GTF_RELOP_QMARK));
+                    /* Mark the comparison node with GTF_RELOP_JMP_USED so it knows that it does
+                       not need to materialize the result as a 0 or 1. */
+
+                    /* We also mark it as DONT_CSE, as we don't handle QMARKs with nonRELOP op1s */
+                    op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE);
+
+                    // Request that the codegen for op1 sets the condition flags
+                    // when it generates the code for op1.
+                    //
+                    // Codegen for op1 must set the condition flags if
+                    // this method returns true.
+                    //
+                    op1->gtRequestSetFlags();
+                }
+                else
+                {
+                    GenTreePtr effOp1 = op1->gtEffectiveVal();
+                    noway_assert((effOp1->gtOper == GT_CNS_INT) &&
+                                 (effOp1->IsIntegralConst(0) || effOp1->IsIntegralConst(1)));
+                }
+                break;
+
+            case GT_COLON:
+#if LOCAL_ASSERTION_PROP
+                if (optLocalAssertionProp)
+                {
+#endif
+                    isQmarkColon = true;
+                }
+                break;
+
+            case GT_INDEX:
+                return fgMorphArrayIndex(tree);
+
+            case GT_CAST:
+                return fgMorphCast(tree);
+
+            case GT_MUL:
+
+#ifndef _TARGET_64BIT_
+                if (typ == TYP_LONG)
+                {
+                    /* For (long)int1 * (long)int2, we dont actually do the
+                       casts, and just multiply the 32 bit values, which will
+                       give us the 64 bit result in edx:eax */
+
+                    noway_assert(op2);
+                    if ((op1->gtOper == GT_CAST && op2->gtOper == GT_CAST &&
+                         genActualType(op1->CastFromType()) == TYP_INT &&
+                         genActualType(op2->CastFromType()) == TYP_INT) &&
+                        !op1->gtOverflow() && !op2->gtOverflow())
+                    {
+                        // The casts have to be of the same signedness.
+                        if ((op1->gtFlags & GTF_UNSIGNED) != (op2->gtFlags & GTF_UNSIGNED))
+                        {
+                            // We see if we can force an int constant to change its signedness
+                            GenTreePtr constOp;
+                            if (op1->gtCast.CastOp()->gtOper == GT_CNS_INT)
+                                constOp = op1;
+                            else if (op2->gtCast.CastOp()->gtOper == GT_CNS_INT)
+                                constOp = op2;
+                            else
+                                goto NO_MUL_64RSLT;
+
+                            if (((unsigned)(constOp->gtCast.CastOp()->gtIntCon.gtIconVal) < (unsigned)(0x80000000)))
+                                constOp->gtFlags ^= GTF_UNSIGNED;
+                            else
+                                goto NO_MUL_64RSLT;
+                        }
+
+                        // The only combination that can overflow
+                        if (tree->gtOverflow() && (tree->gtFlags & GTF_UNSIGNED) && !(op1->gtFlags & GTF_UNSIGNED))
+                            goto NO_MUL_64RSLT;
+
+                        /* Remaining combinations can never overflow during long mul. */
+
+                        tree->gtFlags &= ~GTF_OVERFLOW;
+
+                        /* Do unsigned mul only if the casts were unsigned */
+
+                        tree->gtFlags &= ~GTF_UNSIGNED;
+                        tree->gtFlags |= op1->gtFlags & GTF_UNSIGNED;
+
+                        /* Since we are committing to GTF_MUL_64RSLT, we don't want
+                           the casts to be folded away. So morph the castees directly */
+
+                        op1->gtOp.gtOp1 = fgMorphTree(op1->gtOp.gtOp1);
+                        op2->gtOp.gtOp1 = fgMorphTree(op2->gtOp.gtOp1);
+
+                        // Propagate side effect flags up the tree
+                        op1->gtFlags &= ~GTF_ALL_EFFECT;
+                        op1->gtFlags |= (op1->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
+                        op2->gtFlags &= ~GTF_ALL_EFFECT;
+                        op2->gtFlags |= (op2->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
+
+                        // If the GT_MUL can be altogether folded away, we should do that.
+
+                        if ((op1->gtCast.CastOp()->OperKind() & op2->gtCast.CastOp()->OperKind() & GTK_CONST) &&
+                            opts.OptEnabled(CLFLG_CONSTANTFOLD))
+                        {
+                            tree->gtOp.gtOp1 = op1 = gtFoldExprConst(op1);
+                            tree->gtOp.gtOp2 = op2 = gtFoldExprConst(op2);
+                            noway_assert(op1->OperKind() & op2->OperKind() & GTK_CONST);
+                            tree = gtFoldExprConst(tree);
+                            noway_assert(tree->OperIsConst());
+                            return tree;
+                        }
+
+                        tree->gtFlags |= GTF_MUL_64RSLT;
+
+                        // If op1 and op2 are unsigned casts, we need to do an unsigned mult
+                        tree->gtFlags |= (op1->gtFlags & GTF_UNSIGNED);
+
+                        // Insert GT_NOP nodes for the cast operands so that they do not get folded
+                        // And propagate the new flags. We don't want to CSE the casts because
+                        // codegen expects GTF_MUL_64RSLT muls to have a certain layout.
+
+                        if (op1->gtCast.CastOp()->OperGet() != GT_NOP)
+                        {
+                            op1->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op1->gtCast.CastOp());
+                            op1->gtFlags &= ~GTF_ALL_EFFECT;
+                            op1->gtFlags |= (op1->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
+                            op1->gtFlags |= GTF_DONT_CSE;
+                        }
+
+                        if (op2->gtCast.CastOp()->OperGet() != GT_NOP)
+                        {
+                            op2->gtOp.gtOp1 = gtNewOperNode(GT_NOP, TYP_INT, op2->gtCast.CastOp());
+                            op2->gtFlags &= ~GTF_ALL_EFFECT;
+                            op2->gtFlags |= (op2->gtCast.CastOp()->gtFlags & GTF_ALL_EFFECT);
+                            op2->gtFlags |= GTF_DONT_CSE;
+                        }
+
+                        tree->gtFlags &= ~GTF_ALL_EFFECT;
+                        tree->gtFlags |= ((op1->gtFlags | op2->gtFlags) & GTF_ALL_EFFECT);
+
+                        goto DONE_MORPHING_CHILDREN;
+                    }
+                    else if ((tree->gtFlags & GTF_MUL_64RSLT) == 0)
+                    {
+                    NO_MUL_64RSLT:
+                        if (tree->gtOverflow())
+                            helper = (tree->gtFlags & GTF_UNSIGNED) ? CORINFO_HELP_ULMUL_OVF : CORINFO_HELP_LMUL_OVF;
+                        else
+                            helper = CORINFO_HELP_LMUL;
+
+                        goto USE_HELPER_FOR_ARITH;
+                    }
+                    else
+                    {
+                        /* We are seeing this node again. We have decided to use
+                           GTF_MUL_64RSLT, so leave it alone. */
+
+                        assert(tree->gtIsValid64RsltMul());
+                    }
+                }
+#endif // !_TARGET_64BIT_
+                break;
+
+            case GT_DIV:
+
+#ifndef _TARGET_64BIT_
+                if (typ == TYP_LONG)
+                {
+                    helper = CORINFO_HELP_LDIV;
+                    goto USE_HELPER_FOR_ARITH;
+                }
+
+#if USE_HELPERS_FOR_INT_DIV
+                if (typ == TYP_INT && !fgIsSignedDivOptimizable(op2))
+                {
+                    helper = CORINFO_HELP_DIV;
+                    goto USE_HELPER_FOR_ARITH;
+                }
+#endif
+#endif // !_TARGET_64BIT_
+
+#ifndef LEGACY_BACKEND
+                if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI())
+                {
+                    op2 = gtFoldExprConst(op2);
+                }
+
+                if (fgShouldUseMagicNumberDivide(tree->AsOp()))
+                {
+                    tree = fgMorphDivByConst(tree->AsOp());
+                    op1  = tree->gtOp.gtOp1;
+                    op2  = tree->gtOp.gtOp2;
+                }
+#endif // !LEGACY_BACKEND
+                break;
+
+            case GT_UDIV:
+
+#ifndef _TARGET_64BIT_
+                if (typ == TYP_LONG)
+                {
+                    helper = CORINFO_HELP_ULDIV;
+                    goto USE_HELPER_FOR_ARITH;
+                }
+#if USE_HELPERS_FOR_INT_DIV
+                if (typ == TYP_INT && !fgIsUnsignedDivOptimizable(op2))
+                {
+                    helper = CORINFO_HELP_UDIV;
+                    goto USE_HELPER_FOR_ARITH;
+                }
+#endif
+#endif // _TARGET_64BIT_
+                break;
+
+            case GT_MOD:
+
+                if (varTypeIsFloating(typ))
+                {
+                    helper = CORINFO_HELP_DBLREM;
+                    noway_assert(op2);
+                    if (op1->TypeGet() == TYP_FLOAT)
+                    {
+                        if (op2->TypeGet() == TYP_FLOAT)
+                        {
+                            helper = CORINFO_HELP_FLTREM;
+                        }
+                        else
+                        {
+                            tree->gtOp.gtOp1 = op1 = gtNewCastNode(TYP_DOUBLE, op1, TYP_DOUBLE);
+                        }
+                    }
+                    else if (op2->TypeGet() == TYP_FLOAT)
+                    {
+                        tree->gtOp.gtOp2 = op2 = gtNewCastNode(TYP_DOUBLE, op2, TYP_DOUBLE);
+                    }
+                    goto USE_HELPER_FOR_ARITH;
+                }
+
+                // Do not use optimizations (unlike UMOD's idiv optimizing during codegen) for signed mod.
+                // A similar optimization for signed mod will not work for a negative perfectly divisible
+                // HI-word. To make it correct, we would need to divide without the sign and then flip the
+                // result sign after mod. This requires 18 opcodes + flow making it not worthy to inline.
+                goto ASSIGN_HELPER_FOR_MOD;
+
+            case GT_UMOD:
+
+#ifdef _TARGET_ARMARCH_
+//
+// Note for _TARGET_ARMARCH_ we don't have  a remainder instruction, so we don't do this optimization
+//
+#else  // _TARGET_XARCH
+            /* If this is an unsigned long mod with op2 which is a cast to long from a
+               constant int, then don't morph to a call to the helper.  This can be done
+               faster inline using idiv.
+            */
+
+            noway_assert(op2);
+            if ((typ == TYP_LONG) && opts.OptEnabled(CLFLG_CONSTANTFOLD) &&
+                ((tree->gtFlags & GTF_UNSIGNED) == (op1->gtFlags & GTF_UNSIGNED)) &&
+                ((tree->gtFlags & GTF_UNSIGNED) == (op2->gtFlags & GTF_UNSIGNED)))
+            {
+                if (op2->gtOper == GT_CAST && op2->gtCast.CastOp()->gtOper == GT_CNS_INT &&
+                    op2->gtCast.CastOp()->gtIntCon.gtIconVal >= 2 &&
+                    op2->gtCast.CastOp()->gtIntCon.gtIconVal <= 0x3fffffff &&
+                    (tree->gtFlags & GTF_UNSIGNED) == (op2->gtCast.CastOp()->gtFlags & GTF_UNSIGNED))
+                {
+                    tree->gtOp.gtOp2 = op2 = fgMorphCast(op2);
+                    noway_assert(op2->gtOper == GT_CNS_NATIVELONG);
+                }
+
+                if (op2->gtOper == GT_CNS_NATIVELONG && op2->gtIntConCommon.LngValue() >= 2 &&
+                    op2->gtIntConCommon.LngValue() <= 0x3fffffff)
+                {
+                    tree->gtOp.gtOp1 = op1 = fgMorphTree(op1);
+                    noway_assert(op1->TypeGet() == TYP_LONG);
+
+                    // Update flags for op1 morph
+                    tree->gtFlags &= ~GTF_ALL_EFFECT;
+
+                    tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT); // Only update with op1 as op2 is a constant
+
+                    // If op1 is a constant, then do constant folding of the division operator
+                    if (op1->gtOper == GT_CNS_NATIVELONG)
+                    {
+                        tree = gtFoldExpr(tree);
+                    }
+                    return tree;
+                }
+            }
+#endif // _TARGET_XARCH
+
+            ASSIGN_HELPER_FOR_MOD:
+
+                // For "val % 1", return 0 if op1 doesn't have any side effects
+                // and we are not in the CSE phase, we cannot discard 'tree'
+                // because it may contain CSE expressions that we haven't yet examined.
+                //
+                if (((op1->gtFlags & GTF_SIDE_EFFECT) == 0) && !optValnumCSE_phase)
+                {
+                    if (op2->IsIntegralConst(1))
+                    {
+                        GenTreePtr zeroNode = gtNewZeroConNode(typ);
+#ifdef DEBUG
+                        zeroNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+                        DEBUG_DESTROY_NODE(tree);
+                        return zeroNode;
+                    }
+                }
+
+#ifndef _TARGET_64BIT_
+                if (typ == TYP_LONG)
+                {
+                    helper = (oper == GT_UMOD) ? CORINFO_HELP_ULMOD : CORINFO_HELP_LMOD;
+                    goto USE_HELPER_FOR_ARITH;
+                }
+
+#if USE_HELPERS_FOR_INT_DIV
+                if (typ == TYP_INT)
+                {
+                    if (oper == GT_UMOD && !fgIsUnsignedModOptimizable(op2))
+                    {
+                        helper = CORINFO_HELP_UMOD;
+                        goto USE_HELPER_FOR_ARITH;
+                    }
+                    else if (oper == GT_MOD && !fgIsSignedModOptimizable(op2))
+                    {
+                        helper = CORINFO_HELP_MOD;
+                        goto USE_HELPER_FOR_ARITH;
+                    }
+                }
+#endif
+#endif // !_TARGET_64BIT_
+
+#ifndef LEGACY_BACKEND
+                if (op2->gtOper == GT_CAST && op2->gtOp.gtOp1->IsCnsIntOrI())
+                {
+                    op2 = gtFoldExprConst(op2);
+                }
+
+#ifdef _TARGET_ARM64_
+
+                // For ARM64 we don't have a remainder instruction,
+                // The architecture manual suggests the following transformation to
+                // generate code for such operator:
+                //
+                // a % b = a - (a / b) * b;
+                //
+                tree = fgMorphModToSubMulDiv(tree->AsOp());
+                op1  = tree->gtOp.gtOp1;
+                op2  = tree->gtOp.gtOp2;
+
+#else // !_TARGET_ARM64_
+
+                if (oper != GT_UMOD && fgShouldUseMagicNumberDivide(tree->AsOp()))
+                {
+                    tree = fgMorphModByConst(tree->AsOp());
+                    op1  = tree->gtOp.gtOp1;
+                    op2  = tree->gtOp.gtOp2;
+                }
+
+#endif //_TARGET_ARM64_
+#endif // !LEGACY_BACKEND
+                break;
+
+            USE_HELPER_FOR_ARITH:
+            {
+                /* We have to morph these arithmetic operations into helper calls
+                   before morphing the arguments (preorder), else the arguments
+                   won't get correct values of fgPtrArgCntCur.
+                   However, try to fold the tree first in case we end up with a
+                   simple node which won't need a helper call at all */
+
+                noway_assert(tree->OperIsBinary());
+
+                GenTreePtr oldTree = tree;
+
+                tree = gtFoldExpr(tree);
+
+                // Were we able to fold it ?
+                // Note that gtFoldExpr may return a non-leaf even if successful
+                // e.g. for something like "expr / 1" - see also bug #290853
+                if (tree->OperIsLeaf() || (oldTree != tree))
+
+                {
+                    return (oldTree != tree) ? fgMorphTree(tree) : fgMorphLeaf(tree);
+                }
+
+                // Did we fold it into a comma node with throw?
+                if (tree->gtOper == GT_COMMA)
+                {
+                    noway_assert(fgIsCommaThrow(tree));
+                    return fgMorphTree(tree);
+                }
+            }
+                return fgMorphIntoHelperCall(tree, helper, gtNewArgList(op1, op2));
+
+            case GT_RETURN:
+                // normalize small integer return values
+                if (fgGlobalMorph && varTypeIsSmall(info.compRetType) && (op1 != nullptr) &&
+                    (op1->TypeGet() != TYP_VOID) && fgCastNeeded(op1, info.compRetType))
+                {
+                    // Small-typed return values are normalized by the callee
+                    op1 = gtNewCastNode(TYP_INT, op1, info.compRetType);
+
+                    // Propagate GTF_COLON_COND
+                    op1->gtFlags |= (tree->gtFlags & GTF_COLON_COND);
+
+                    tree->gtOp.gtOp1 = fgMorphCast(op1);
+
+                    // Propagate side effect flags
+                    tree->gtFlags &= ~GTF_ALL_EFFECT;
+                    tree->gtFlags |= (tree->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
+
+                    return tree;
+                }
+                break;
+
+            case GT_EQ:
+            case GT_NE:
+
+                // Check for typeof(...) == obj.GetType()
+                // Also check for typeof(...) == typeof(...)
+                // IMPORTANT NOTE: this optimization relies on a one-to-one mapping between
+                // type handles and instances of System.Type
+                // If this invariant is ever broken, the optimization will need updating
+                CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef LEGACY_BACKEND
+                if (op1->gtOper == GT_CALL && op2->gtOper == GT_CALL &&
+                    ((op1->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) ||
+                     (op1->gtCall.gtCallType == CT_HELPER)) &&
+                    ((op2->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) ||
+                     (op2->gtCall.gtCallType == CT_HELPER)))
+#else
+            if ((((op1->gtOper == GT_INTRINSIC) &&
+                  (op1->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) ||
+                 ((op1->gtOper == GT_CALL) && (op1->gtCall.gtCallType == CT_HELPER))) &&
+                (((op2->gtOper == GT_INTRINSIC) &&
+                  (op2->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType)) ||
+                 ((op2->gtOper == GT_CALL) && (op2->gtCall.gtCallType == CT_HELPER))))
+#endif
+                {
+                    GenTreePtr pGetClassFromHandle;
+                    GenTreePtr pGetType;
+
+#ifdef LEGACY_BACKEND
+                    bool bOp1ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op1);
+                    bool bOp2ClassFromHandle = gtIsTypeHandleToRuntimeTypeHelper(op2);
+#else
+                bool bOp1ClassFromHandle = op1->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op1) : false;
+                bool bOp2ClassFromHandle = op2->gtOper == GT_CALL ? gtIsTypeHandleToRuntimeTypeHelper(op2) : false;
+#endif
+
+                    // Optimize typeof(...) == typeof(...)
+                    // Typically this occurs in generic code that attempts a type switch
+                    // e.g. typeof(T) == typeof(int)
+
+                    if (bOp1ClassFromHandle && bOp2ClassFromHandle)
+                    {
+                        GenTreePtr classFromHandleArg1 = tree->gtOp.gtOp1->gtCall.gtCallArgs->gtOp.gtOp1;
+                        GenTreePtr classFromHandleArg2 = tree->gtOp.gtOp2->gtCall.gtCallArgs->gtOp.gtOp1;
+
+                        GenTreePtr compare = gtNewOperNode(oper, TYP_INT, classFromHandleArg1, classFromHandleArg2);
+
+                        compare->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
+
+                        // Morph and return
+                        return fgMorphTree(compare);
+                    }
+                    else if (bOp1ClassFromHandle || bOp2ClassFromHandle)
+                    {
+                        //
+                        // Now check for GetClassFromHandle(handle) == obj.GetType()
+                        //
+
+                        if (bOp1ClassFromHandle)
+                        {
+                            pGetClassFromHandle = tree->gtOp.gtOp1;
+                            pGetType            = op2;
+                        }
+                        else
+                        {
+                            pGetClassFromHandle = tree->gtOp.gtOp2;
+                            pGetType            = op1;
+                        }
+
+                        GenTreePtr pGetClassFromHandleArgument = pGetClassFromHandle->gtCall.gtCallArgs->gtOp.gtOp1;
+                        GenTreePtr pConstLiteral               = pGetClassFromHandleArgument;
+
+                        // Unwrap GT_NOP node used to prevent constant folding
+                        if (pConstLiteral->gtOper == GT_NOP && pConstLiteral->gtType == TYP_I_IMPL)
+                        {
+                            pConstLiteral = pConstLiteral->gtOp.gtOp1;
+                        }
+
+                        // In the ngen case, we have to go thru an indirection to get the right handle.
+                        if (pConstLiteral->gtOper == GT_IND)
+                        {
+                            pConstLiteral = pConstLiteral->gtOp.gtOp1;
+                        }
+#ifdef LEGACY_BACKEND
+
+                        if (pGetType->gtCall.gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC &&
+                            info.compCompHnd->getIntrinsicID(pGetType->gtCall.gtCallMethHnd) ==
+                                CORINFO_INTRINSIC_Object_GetType &&
+#else
+                    if ((pGetType->gtOper == GT_INTRINSIC) &&
+                        (pGetType->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Object_GetType) &&
+#endif
+                            pConstLiteral->gtOper == GT_CNS_INT && pConstLiteral->gtType == TYP_I_IMPL)
+                        {
+                            CORINFO_CLASS_HANDLE clsHnd =
+                                CORINFO_CLASS_HANDLE(pConstLiteral->gtIntCon.gtCompileTimeHandle);
+
+                            if (info.compCompHnd->canInlineTypeCheckWithObjectVTable(clsHnd))
+                            {
+                                // Method Table tree
+                                CLANG_FORMAT_COMMENT_ANCHOR;
+#ifdef LEGACY_BACKEND
+                                GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtCall.gtCallObjp);
+#else
+                            GenTreePtr objMT = gtNewOperNode(GT_IND, TYP_I_IMPL, pGetType->gtUnOp.gtOp1);
+#endif
+                                objMT->gtFlags |= GTF_EXCEPT; // Null ref exception if object is null
+                                compCurBB->bbFlags |= BBF_HAS_VTABREF;
+                                optMethodFlags |= OMF_HAS_VTABLEREF;
+
+                                // Method table constant
+                                GenTreePtr cnsMT = pGetClassFromHandleArgument;
+
+                                GenTreePtr compare = gtNewOperNode(oper, TYP_INT, objMT, cnsMT);
+
+                                compare->gtFlags |=
+                                    tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
+
+                                // Morph and return
+                                return fgMorphTree(compare);
+                            }
+                        }
+                    }
+                }
+                fgMorphRecognizeBoxNullable(tree);
+                op1 = tree->gtOp.gtOp1;
+                op2 = tree->gtGetOp2();
+
+                break;
+
+#ifdef _TARGET_ARM_
+            case GT_INTRINSIC:
+                if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round)
+                {
+                    switch (tree->TypeGet())
+                    {
+                        case TYP_DOUBLE:
+                            return fgMorphIntoHelperCall(tree, CORINFO_HELP_DBLROUND, gtNewArgList(op1));
+                        case TYP_FLOAT:
+                            return fgMorphIntoHelperCall(tree, CORINFO_HELP_FLTROUND, gtNewArgList(op1));
+                        default:
+                            unreached();
+                    }
+                }
+                break;
+#endif
+
+            default:
+                break;
+        }
+
+#if !CPU_HAS_FP_SUPPORT
+        tree = fgMorphToEmulatedFP(tree);
+#endif
+
+        /* Could this operator throw an exception? */
+        if (fgGlobalMorph && tree->OperMayThrow())
+        {
+            if (((tree->OperGet() != GT_IND) && !tree->OperIsBlk()) || fgAddrCouldBeNull(tree->gtOp.gtOp1))
+            {
+                /* Mark the tree node as potentially throwing an exception */
+                tree->gtFlags |= GTF_EXCEPT;
+            }
+        }
+
+        /*-------------------------------------------------------------------------
+         * Process the first operand, if any
+         */
+
+        if (op1)
+        {
+
+#if LOCAL_ASSERTION_PROP
+            // If we are entering the "then" part of a Qmark-Colon we must
+            // save the state of the current copy assignment table
+            // so that we can restore this state when entering the "else" part
+            if (isQmarkColon)
+            {
+                noway_assert(optLocalAssertionProp);
+                if (optAssertionCount)
+                {
+                    noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea
+                    unsigned tabSize   = optAssertionCount * sizeof(AssertionDsc);
+                    origAssertionTab   = (AssertionDsc*)ALLOCA(tabSize);
+                    origAssertionCount = optAssertionCount;
+                    memcpy(origAssertionTab, optAssertionTabPrivate, tabSize);
+                }
+                else
+                {
+                    origAssertionCount = 0;
+                    origAssertionTab   = nullptr;
+                }
+            }
+#endif // LOCAL_ASSERTION_PROP
+
+            // We might need a new MorphAddressContext context.  (These are used to convey
+            // parent context about how addresses being calculated will be used; see the
+            // specification comment for MorphAddrContext for full details.)
+            // Assume it's an Ind context to start.
+            MorphAddrContext  subIndMac1(MACK_Ind);
+            MorphAddrContext* subMac1 = mac;
+            if (subMac1 == nullptr || subMac1->m_kind == MACK_Ind || subMac1->m_kind == MACK_CopyBlock)
+            {
+                switch (tree->gtOper)
+                {
+                    case GT_ADDR:
+                        if (subMac1 == nullptr)
+                        {
+                            subMac1         = &subIndMac1;
+                            subMac1->m_kind = MACK_Addr;
+                        }
+                        break;
+                    case GT_COMMA:
+                        // In a comma, the incoming context only applies to the rightmost arg of the
+                        // comma list.  The left arg (op1) gets a fresh context.
+                        subMac1 = nullptr;
+                        break;
+                    case GT_ASG:
+                        if (tree->OperIsBlkOp())
+                        {
+                            subMac1 = &subIndMac1;
+                        }
+                        break;
+                    case GT_OBJ:
+                    case GT_BLK:
+                    case GT_DYN_BLK:
+                    case GT_IND:
+                        subMac1 = &subIndMac1;
+                        break;
+                    default:
+                        break;
+                }
+            }
+
+            // For additions, if we're in an IND context keep track of whether
+            // all offsets added to the address are constant, and their sum.
+            if (tree->gtOper == GT_ADD && subMac1 != nullptr)
+            {
+                assert(subMac1->m_kind == MACK_Ind || subMac1->m_kind == MACK_Addr); // Can't be a CopyBlock.
+                GenTreePtr otherOp = tree->gtOp.gtOp2;
+                // Is the other operator a constant?
+                if (otherOp->IsCnsIntOrI())
+                {
+                    ClrSafeInt<size_t> totalOffset(subMac1->m_totalOffset);
+                    totalOffset += otherOp->gtIntConCommon.IconValue();
+                    if (totalOffset.IsOverflow())
+                    {
+                        // We will consider an offset so large as to overflow as "not a constant" --
+                        // we will do a null check.
+                        subMac1->m_allConstantOffsets = false;
+                    }
+                    else
+                    {
+                        subMac1->m_totalOffset += otherOp->gtIntConCommon.IconValue();
+                    }
+                }
+                else
+                {
+                    subMac1->m_allConstantOffsets = false;
+                }
+            }
+
+            tree->gtOp.gtOp1 = op1 = fgMorphTree(op1, subMac1);
+
+#if LOCAL_ASSERTION_PROP
+            // If we are exiting the "then" part of a Qmark-Colon we must
+            // save the state of the current copy assignment table
+            // so that we can merge this state with the "else" part exit
+            if (isQmarkColon)
+            {
+                noway_assert(optLocalAssertionProp);
+                if (optAssertionCount)
+                {
+                    noway_assert(optAssertionCount <= optMaxAssertionCount); // else ALLOCA() is a bad idea
+                    unsigned tabSize   = optAssertionCount * sizeof(AssertionDsc);
+                    thenAssertionTab   = (AssertionDsc*)ALLOCA(tabSize);
+                    thenAssertionCount = optAssertionCount;
+                    memcpy(thenAssertionTab, optAssertionTabPrivate, tabSize);
+                }
+                else
+                {
+                    thenAssertionCount = 0;
+                    thenAssertionTab   = nullptr;
+                }
+            }
+#endif // LOCAL_ASSERTION_PROP
+
+            /* Morphing along with folding and inlining may have changed the
+             * side effect flags, so we have to reset them
+             *
+             * NOTE: Don't reset the exception flags on nodes that may throw */
+
+            noway_assert(tree->gtOper != GT_CALL);
+
+            if ((tree->gtOper != GT_INTRINSIC) || !IsIntrinsicImplementedByUserCall(tree->gtIntrinsic.gtIntrinsicId))
+            {
+                tree->gtFlags &= ~GTF_CALL;
+            }
+
+            if (!tree->OperMayThrow())
+            {
+                tree->gtFlags &= ~GTF_EXCEPT;
+            }
+
+            /* Propagate the new flags */
+            tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT);
+
+            // &aliasedVar doesn't need GTF_GLOB_REF, though alisasedVar does
+            // Similarly for clsVar
+            if (oper == GT_ADDR && (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_CLS_VAR))
+            {
+                tree->gtFlags &= ~GTF_GLOB_REF;
+            }
+        } // if (op1)
+
+        /*-------------------------------------------------------------------------
+         * Process the second operand, if any
+         */
+
+        if (op2)
+        {
+
+#if LOCAL_ASSERTION_PROP
+            // If we are entering the "else" part of a Qmark-Colon we must
+            // reset the state of the current copy assignment table
+            if (isQmarkColon)
+            {
+                noway_assert(optLocalAssertionProp);
+                optAssertionReset(0);
+                if (origAssertionCount)
+                {
+                    size_t tabSize = origAssertionCount * sizeof(AssertionDsc);
+                    memcpy(optAssertionTabPrivate, origAssertionTab, tabSize);
+                    optAssertionReset(origAssertionCount);
+                }
+            }
+#endif // LOCAL_ASSERTION_PROP
+
+            // We might need a new MorphAddressContext context to use in evaluating op2.
+            // (These are used to convey parent context about how addresses being calculated
+            // will be used; see the specification comment for MorphAddrContext for full details.)
+            // Assume it's an Ind context to start.
+            MorphAddrContext subIndMac2(MACK_Ind);
+            switch (tree->gtOper)
+            {
+                case GT_ADD:
+                    if (mac != nullptr && mac->m_kind == MACK_Ind)
+                    {
+                        GenTreePtr otherOp = tree->gtOp.gtOp1;
+                        // Is the other operator a constant?
+                        if (otherOp->IsCnsIntOrI())
+                        {
+                            mac->m_totalOffset += otherOp->gtIntConCommon.IconValue();
+                        }
+                        else
+                        {
+                            mac->m_allConstantOffsets = false;
+                        }
+                    }
+                    break;
+                case GT_ASG:
+                    if (tree->OperIsBlkOp())
+                    {
+                        mac = &subIndMac2;
+                    }
+                    break;
+                default:
+                    break;
+            }
+            tree->gtOp.gtOp2 = op2 = fgMorphTree(op2, mac);
+
+            /* Propagate the side effect flags from op2 */
+
+            tree->gtFlags |= (op2->gtFlags & GTF_ALL_EFFECT);
+
+#if LOCAL_ASSERTION_PROP
+            // If we are exiting the "else" part of a Qmark-Colon we must
+            // merge the state of the current copy assignment table with
+            // that of the exit of the "then" part.
+            if (isQmarkColon)
+            {
+                noway_assert(optLocalAssertionProp);
+                // If either exit table has zero entries then
+                // the merged table also has zero entries
+                if (optAssertionCount == 0 || thenAssertionCount == 0)
+                {
+                    optAssertionReset(0);
+                }
+                else
+                {
+                    size_t tabSize = optAssertionCount * sizeof(AssertionDsc);
+                    if ((optAssertionCount != thenAssertionCount) ||
+                        (memcmp(thenAssertionTab, optAssertionTabPrivate, tabSize) != 0))
+                    {
+                        // Yes they are different so we have to find the merged set
+                        // Iterate over the copy asgn table removing any entries
+                        // that do not have an exact match in the thenAssertionTab
+                        AssertionIndex index = 1;
+                        while (index <= optAssertionCount)
+                        {
+                            AssertionDsc* curAssertion = optGetAssertion(index);
+
+                            for (unsigned j = 0; j < thenAssertionCount; j++)
+                            {
+                                AssertionDsc* thenAssertion = &thenAssertionTab[j];
+
+                                // Do the left sides match?
+                                if ((curAssertion->op1.lcl.lclNum == thenAssertion->op1.lcl.lclNum) &&
+                                    (curAssertion->assertionKind == thenAssertion->assertionKind))
+                                {
+                                    // Do the right sides match?
+                                    if ((curAssertion->op2.kind == thenAssertion->op2.kind) &&
+                                        (curAssertion->op2.lconVal == thenAssertion->op2.lconVal))
+                                    {
+                                        goto KEEP;
+                                    }
+                                    else
+                                    {
+                                        goto REMOVE;
+                                    }
+                                }
+                            }
+                        //
+                        // If we fall out of the loop above then we didn't find
+                        // any matching entry in the thenAssertionTab so it must
+                        // have been killed on that path so we remove it here
+                        //
+                        REMOVE:
+                            // The data at optAssertionTabPrivate[i] is to be removed
+                            CLANG_FORMAT_COMMENT_ANCHOR;
+#ifdef DEBUG
+                            if (verbose)
+                            {
+                                printf("The QMARK-COLON ");
+                                printTreeID(tree);
+                                printf(" removes assertion candidate #%d\n", index);
+                            }
+#endif
+                            optAssertionRemove(index);
+                            continue;
+                        KEEP:
+                            // The data at optAssertionTabPrivate[i] is to be kept
+                            index++;
+                        }
+                    }
+                }
+            }
+#endif    // LOCAL_ASSERTION_PROP
+        } // if (op2)
+
+    DONE_MORPHING_CHILDREN:
+
+/*-------------------------------------------------------------------------
+ * Now do POST-ORDER processing
+ */
+
+#if FEATURE_FIXED_OUT_ARGS && !defined(_TARGET_64BIT_)
+        // Variable shifts of a long end up being helper calls, so mark the tree as such. This
+        // is potentially too conservative, since they'll get treated as having side effects.
+        // It is important to mark them as calls so if they are part of an argument list,
+        // they will get sorted and processed properly (for example, it is important to handle
+        // all nested calls before putting struct arguments in the argument registers). We
+        // could mark the trees just before argument processing, but it would require a full
+        // tree walk of the argument tree, so we just do it here, instead, even though we'll
+        // mark non-argument trees (that will still get converted to calls, anyway).
+        if (GenTree::OperIsShift(oper) && (tree->TypeGet() == TYP_LONG) && (op2->OperGet() != GT_CNS_INT))
+        {
+            tree->gtFlags |= GTF_CALL;
+        }
+#endif // FEATURE_FIXED_OUT_ARGS && !_TARGET_64BIT_
+
+        if (varTypeIsGC(tree->TypeGet()) && (op1 && !varTypeIsGC(op1->TypeGet())) &&
+            (op2 && !varTypeIsGC(op2->TypeGet())))
+        {
+            // The tree is really not GC but was marked as such. Now that the
+            // children have been unmarked, unmark the tree too.
+
+            // Remember that GT_COMMA inherits it's type only from op2
+            if (tree->gtOper == GT_COMMA)
+            {
+                tree->gtType = genActualType(op2->TypeGet());
+            }
+            else
+            {
+                tree->gtType = genActualType(op1->TypeGet());
+            }
+        }
+
+        GenTreePtr oldTree = tree;
+
+        GenTreePtr qmarkOp1 = nullptr;
+        GenTreePtr qmarkOp2 = nullptr;
+
+        if ((tree->OperGet() == GT_QMARK) && (tree->gtOp.gtOp2->OperGet() == GT_COLON))
+        {
+            qmarkOp1 = oldTree->gtOp.gtOp2->gtOp.gtOp1;
+            qmarkOp2 = oldTree->gtOp.gtOp2->gtOp.gtOp2;
+        }
+
+        // Try to fold it, maybe we get lucky,
+        tree = gtFoldExpr(tree);
+
+        if (oldTree != tree)
+        {
+            /* if gtFoldExpr returned op1 or op2 then we are done */
+            if ((tree == op1) || (tree == op2) || (tree == qmarkOp1) || (tree == qmarkOp2))
+            {
+                return tree;
+            }
+
+            /* If we created a comma-throw tree then we need to morph op1 */
+            if (fgIsCommaThrow(tree))
+            {
+                tree->gtOp.gtOp1 = fgMorphTree(tree->gtOp.gtOp1);
+                fgMorphTreeDone(tree);
+                return tree;
+            }
+
+            return tree;
+        }
+        else if (tree->OperKind() & GTK_CONST)
+        {
+            return tree;
+        }
+
+        /* gtFoldExpr could have used setOper to change the oper */
+        oper = tree->OperGet();
+        typ  = tree->TypeGet();
+
+        /* gtFoldExpr could have changed op1 and op2 */
+        op1 = tree->gtOp.gtOp1;
+        op2 = tree->gtGetOp2();
+
+        // Do we have an integer compare operation?
+        //
+        if (tree->OperIsCompare() && varTypeIsIntegralOrI(tree->TypeGet()))
+        {
+            // Are we comparing against zero?
+            //
+            if (op2->IsIntegralConst(0))
+            {
+                // Request that the codegen for op1 sets the condition flags
+                // when it generates the code for op1.
+                //
+                // Codegen for op1 must set the condition flags if
+                // this method returns true.
+                //
+                op1->gtRequestSetFlags();
+            }
+        }
+        /*-------------------------------------------------------------------------
+         * Perform the required oper-specific postorder morphing
+         */
+
+        GenTreePtr           temp;
+        GenTreePtr           cns1, cns2;
+        GenTreePtr           thenNode;
+        GenTreePtr           elseNode;
+        size_t               ival1, ival2;
+        GenTreePtr           lclVarTree;
+        GenTreeLclVarCommon* lclVarCmnTree;
+        FieldSeqNode*        fieldSeq = nullptr;
+
+        switch (oper)
+        {
+            case GT_ASG:
+
+                lclVarTree = fgIsIndirOfAddrOfLocal(op1);
+                if (lclVarTree != nullptr)
+                {
+                    lclVarTree->gtFlags |= GTF_VAR_DEF;
+                }
+
+                if (op1->gtEffectiveVal()->OperIsConst())
+                {
+                    op1              = gtNewOperNode(GT_IND, tree->TypeGet(), op1);
+                    tree->gtOp.gtOp1 = op1;
+                }
+
+                /* If we are storing a small type, we might be able to omit a cast */
+                if ((op1->gtOper == GT_IND) && varTypeIsSmall(op1->TypeGet()))
+                {
+                    if (!gtIsActiveCSE_Candidate(op2) && (op2->gtOper == GT_CAST) && !op2->gtOverflow())
+                    {
+                        var_types castType = op2->CastToType();
+
+                        // If we are performing a narrowing cast and
+                        // castType is larger or the same as op1's type
+                        // then we can discard the cast.
+
+                        if (varTypeIsSmall(castType) && (castType >= op1->TypeGet()))
+                        {
+                            tree->gtOp.gtOp2 = op2 = op2->gtCast.CastOp();
+                        }
+                    }
+                    else if (op2->OperIsCompare() && varTypeIsByte(op1->TypeGet()))
+                    {
+                        /* We don't need to zero extend the setcc instruction */
+                        op2->gtType = TYP_BYTE;
+                    }
+                }
+                // If we introduced a CSE we may need to undo the optimization above
+                // (i.e. " op2->gtType = TYP_BYTE;" which depends upon op1 being a GT_IND of a byte type)
+                // When we introduce the CSE we remove the GT_IND and subsitute a GT_LCL_VAR in it place.
+                else if (op2->OperIsCompare() && (op2->gtType == TYP_BYTE) && (op1->gtOper == GT_LCL_VAR))
+                {
+                    unsigned   varNum = op1->gtLclVarCommon.gtLclNum;
+                    LclVarDsc* varDsc = &lvaTable[varNum];
+
+                    /* We again need to zero extend the setcc instruction */
+                    op2->gtType = varDsc->TypeGet();
+                }
+                fgAssignSetVarDef(tree);
+
+                __fallthrough;
+
+            case GT_ASG_ADD:
+            case GT_ASG_SUB:
+            case GT_ASG_MUL:
+            case GT_ASG_DIV:
+            case GT_ASG_MOD:
+            case GT_ASG_UDIV:
+            case GT_ASG_UMOD:
+            case GT_ASG_OR:
+            case GT_ASG_XOR:
+            case GT_ASG_AND:
+            case GT_ASG_LSH:
+            case GT_ASG_RSH:
+            case GT_ASG_RSZ:
+
+                /* We can't CSE the LHS of an assignment */
+                /* We also must set in the pre-morphing phase, otherwise assertionProp doesn't see it */
+                if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT))
+                {
+                    op1->gtFlags |= GTF_DONT_CSE;
+                }
+                break;
+
+            case GT_EQ:
+            case GT_NE:
+
+                /* Make sure we're allowed to do this */
+
+                if (optValnumCSE_phase)
+                {
+                    // It is not safe to reorder/delete CSE's
+                    break;
+                }
+
+                cns2 = op2;
+
+                /* Check for "(expr +/- icon1) ==/!= (non-zero-icon2)" */
+
+                if (cns2->gtOper == GT_CNS_INT && cns2->gtIntCon.gtIconVal != 0)
+                {
+                    op1 = tree->gtOp.gtOp1;
+
+                    /* Since this can occur repeatedly we use a while loop */
+
+                    while ((op1->gtOper == GT_ADD || op1->gtOper == GT_SUB) &&
+                           (op1->gtOp.gtOp2->gtOper == GT_CNS_INT) && (op1->gtType == TYP_INT) &&
+                           (op1->gtOverflow() == false))
+                    {
+                        /* Got it; change "x+icon1==icon2" to "x==icon2-icon1" */
+
+                        ival1 = op1->gtOp.gtOp2->gtIntCon.gtIconVal;
+                        ival2 = cns2->gtIntCon.gtIconVal;
+
+                        if (op1->gtOper == GT_ADD)
+                        {
+                            ival2 -= ival1;
+                        }
+                        else
+                        {
+                            ival2 += ival1;
+                        }
+                        cns2->gtIntCon.gtIconVal = ival2;
+
+#ifdef _TARGET_64BIT_
+                        // we need to properly re-sign-extend or truncate as needed.
+                        cns2->AsIntCon()->TruncateOrSignExtend32();
+#endif // _TARGET_64BIT_
+
+                        op1 = tree->gtOp.gtOp1 = op1->gtOp.gtOp1;
+                    }
+                }
+
+                //
+                // Here we look for the following tree
+                //
+                //                        EQ/NE
+                //                        /  \
+        //                      op1   CNS 0/1
+                //
+                ival2 = INT_MAX; // The value of INT_MAX for ival2 just means that the constant value is not 0 or 1
+
+                // cast to unsigned allows test for both 0 and 1
+                if ((cns2->gtOper == GT_CNS_INT) && (((size_t)cns2->gtIntConCommon.IconValue()) <= 1U))
+                {
+                    ival2 = (size_t)cns2->gtIntConCommon.IconValue();
+                }
+                else // cast to UINT64 allows test for both 0 and 1
+                    if ((cns2->gtOper == GT_CNS_LNG) && (((UINT64)cns2->gtIntConCommon.LngValue()) <= 1ULL))
+                {
+                    ival2 = (size_t)cns2->gtIntConCommon.LngValue();
+                }
+
+                if (ival2 != INT_MAX)
+                {
+                    // If we don't have a comma and relop, we can't do this optimization
+                    //
+                    if ((op1->gtOper == GT_COMMA) && (op1->gtOp.gtOp2->OperIsCompare()))
+                    {
+                        // Here we look for the following transformation
+                        //
+                        //                  EQ/NE                    Possible REVERSE(RELOP)
+                        //                  /  \                           /      \
+                //               COMMA CNS 0/1             ->   COMMA   relop_op2
+                        //              /   \                          /    \
+                //             x  RELOP                       x     relop_op1
+                        //               /    \
+                //         relop_op1  relop_op2
+                        //
+                        //
+                        //
+                        GenTreePtr comma = op1;
+                        GenTreePtr relop = comma->gtOp.gtOp2;
+
+                        GenTreePtr relop_op1 = relop->gtOp.gtOp1;
+
+                        bool reverse = ((ival2 == 0) == (oper == GT_EQ));
+
+                        if (reverse)
+                        {
+                            gtReverseCond(relop);
+                        }
+
+                        relop->gtOp.gtOp1 = comma;
+                        comma->gtOp.gtOp2 = relop_op1;
+
+                        // Comma now has fewer nodes underneath it, so we need to regenerate its flags
+                        comma->gtFlags &= ~GTF_ALL_EFFECT;
+                        comma->gtFlags |= (comma->gtOp.gtOp1->gtFlags) & GTF_ALL_EFFECT;
+                        comma->gtFlags |= (comma->gtOp.gtOp2->gtFlags) & GTF_ALL_EFFECT;
+
+                        noway_assert((relop->gtFlags & GTF_RELOP_JMP_USED) == 0);
+                        noway_assert((relop->gtFlags & GTF_REVERSE_OPS) == 0);
+                        relop->gtFlags |=
+                            tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE | GTF_ALL_EFFECT);
+
+                        return relop;
+                    }
+
+                    if (op1->gtOper == GT_COMMA)
+                    {
+                        // Here we look for the following tree
+                        // and when the LCL_VAR is a temp we can fold the tree:
+                        //
+                        //                        EQ/NE                  EQ/NE
+                        //                        /  \                   /  \
+                //                     COMMA  CNS 0/1  ->     RELOP CNS 0/1
+                        //                     /   \                   / \
+                //                   ASG  LCL_VAR
+                        //                  /  \
+                //           LCL_VAR   RELOP
+                        //                      / \
+                //
+
+                        GenTreePtr asg = op1->gtOp.gtOp1;
+                        GenTreePtr lcl = op1->gtOp.gtOp2;
+
+                        /* Make sure that the left side of the comma is the assignment of the LCL_VAR */
+                        if (asg->gtOper != GT_ASG)
+                        {
+                            goto SKIP;
+                        }
+
+                        /* The right side of the comma must be a LCL_VAR temp */
+                        if (lcl->gtOper != GT_LCL_VAR)
+                        {
+                            goto SKIP;
+                        }
+
+                        unsigned lclNum = lcl->gtLclVarCommon.gtLclNum;
+                        noway_assert(lclNum < lvaCount);
+
+                        /* If the LCL_VAR is not a temp then bail, a temp has a single def */
+                        if (!lvaTable[lclNum].lvIsTemp)
+                        {
+                            goto SKIP;
+                        }
+
+#if FEATURE_ANYCSE
+                        /* If the LCL_VAR is a CSE temp then bail, it could have multiple defs/uses */
+                        // Fix 383856 X86/ARM ILGEN
+                        if (lclNumIsCSE(lclNum))
+                        {
+                            goto SKIP;
+                        }
+#endif
+
+                        /* We also must be assigning the result of a RELOP */
+                        if (asg->gtOp.gtOp1->gtOper != GT_LCL_VAR)
+                        {
+                            goto SKIP;
+                        }
+
+                        /* Both of the LCL_VAR must match */
+                        if (asg->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lclNum)
+                        {
+                            goto SKIP;
+                        }
+
+                        /* If right side of asg is not a RELOP then skip */
+                        if (!asg->gtOp.gtOp2->OperIsCompare())
+                        {
+                            goto SKIP;
+                        }
+
+                        LclVarDsc* varDsc = lvaTable + lclNum;
+
+                        /* Set op1 to the right side of asg, (i.e. the RELOP) */
+                        op1 = asg->gtOp.gtOp2;
+
+                        DEBUG_DESTROY_NODE(asg->gtOp.gtOp1);
+                        DEBUG_DESTROY_NODE(lcl);
+
+                        /* This local variable should never be used again */
+                        // <BUGNUM>
+                        // VSW 184221: Make RefCnt to zero to indicate that this local var
+                        // is not used any more. (Keey the lvType as is.)
+                        // Otherwise lvOnFrame will be set to true in Compiler::raMarkStkVars
+                        // And then emitter::emitEndCodeGen will assert in the following line:
+                        //        noway_assert( dsc->lvTracked);
+                        // </BUGNUM>
+                        noway_assert(varDsc->lvRefCnt == 0 || // lvRefCnt may not have been set yet.
+                                     varDsc->lvRefCnt == 2    // Or, we assume this tmp should only be used here,
+                                                              // and it only shows up twice.
+                                     );
+                        lvaTable[lclNum].lvRefCnt = 0;
+                        lvaTable[lclNum].lvaResetSortAgainFlag(this);
+                    }
+
+                    if (op1->OperIsCompare())
+                    {
+                        // Here we look for the following tree
+                        //
+                        //                        EQ/NE           ->      RELOP/!RELOP
+                        //                        /  \                       /    \
+                //                     RELOP  CNS 0/1
+                        //                     /   \
+                //
+                        // Note that we will remove/destroy the EQ/NE node and move
+                        // the RELOP up into it's location.
+
+                        /* Here we reverse the RELOP if necessary */
+
+                        bool reverse = ((ival2 == 0) == (oper == GT_EQ));
+
+                        if (reverse)
+                        {
+                            gtReverseCond(op1);
+                        }
+
+                        /* Propagate gtType of tree into op1 in case it is TYP_BYTE for setcc optimization */
+                        op1->gtType = tree->gtType;
+
+                        noway_assert((op1->gtFlags & GTF_RELOP_JMP_USED) == 0);
+                        op1->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
+
+                        DEBUG_DESTROY_NODE(tree);
+                        return op1;
+                    }
+
+                    //
+                    // Now we check for a compare with the result of an '&' operator
+                    //
+                    // Here we look for the following transformation:
+                    //
+                    //                        EQ/NE                  EQ/NE
+                    //                        /  \                   /  \
+            //                      AND   CNS 0/1  ->      AND   CNS 0
+                    //                     /   \                  /   \
+            //                RSZ/RSH   CNS 1            x     CNS (1 << y)
+                    //                  /  \
+            //                 x   CNS_INT +y
+
+                    if (op1->gtOper == GT_AND)
+                    {
+                        GenTreePtr andOp    = op1;
+                        GenTreePtr rshiftOp = andOp->gtOp.gtOp1;
+
+                        if ((rshiftOp->gtOper != GT_RSZ) && (rshiftOp->gtOper != GT_RSH))
+                        {
+                            goto SKIP;
+                        }
+
+                        if (!rshiftOp->gtOp.gtOp2->IsCnsIntOrI())
+                        {
+                            goto SKIP;
+                        }
+
+                        ssize_t shiftAmount = rshiftOp->gtOp.gtOp2->gtIntCon.gtIconVal;
+
+                        if (shiftAmount < 0)
+                        {
+                            goto SKIP;
+                        }
+
+                        if (!andOp->gtOp.gtOp2->IsIntegralConst(1))
+                        {
+                            goto SKIP;
+                        }
+
+                        if (andOp->gtType == TYP_INT)
+                        {
+                            if (shiftAmount > 31)
+                            {
+                                goto SKIP;
+                            }
+
+                            UINT32 newAndOperand = ((UINT32)1) << shiftAmount;
+
+                            andOp->gtOp.gtOp2->gtIntCon.gtIconVal = newAndOperand;
+
+                            // Reverse the cond if necessary
+                            if (ival2 == 1)
+                            {
+                                gtReverseCond(tree);
+                                cns2->gtIntCon.gtIconVal = 0;
+                                oper                     = tree->gtOper;
+                            }
+                        }
+                        else if (andOp->gtType == TYP_LONG)
+                        {
+                            if (shiftAmount > 63)
+                            {
+                                goto SKIP;
+                            }
+
+                            UINT64 newAndOperand = ((UINT64)1) << shiftAmount;
+
+                            andOp->gtOp.gtOp2->gtIntConCommon.SetLngValue(newAndOperand);
+
+                            // Reverse the cond if necessary
+                            if (ival2 == 1)
+                            {
+                                gtReverseCond(tree);
+                                cns2->gtIntConCommon.SetLngValue(0);
+                                oper = tree->gtOper;
+                            }
+                        }
+
+                        andOp->gtOp.gtOp1 = rshiftOp->gtOp.gtOp1;
+
+                        DEBUG_DESTROY_NODE(rshiftOp->gtOp.gtOp2);
+                        DEBUG_DESTROY_NODE(rshiftOp);
+                    }
+                } // END if (ival2 != INT_MAX)
+
+            SKIP:
+                /* Now check for compares with small constant longs that can be cast to int */
+
+                if (!cns2->OperIsConst())
+                {
+                    goto COMPARE;
+                }
+
+                if (cns2->TypeGet() != TYP_LONG)
+                {
+                    goto COMPARE;
+                }
+
+                /* Is the constant 31 bits or smaller? */
+
+                if ((cns2->gtIntConCommon.LngValue() >> 31) != 0)
+                {
+                    goto COMPARE;
+                }
+
+                /* Is the first comparand mask operation of type long ? */
+
+                if (op1->gtOper != GT_AND)
+                {
+                    /* Another interesting case: cast from int */
+
+                    if (op1->gtOper == GT_CAST && op1->CastFromType() == TYP_INT &&
+                        !gtIsActiveCSE_Candidate(op1) && // op1 cannot be a CSE candidate
+                        !op1->gtOverflow())              // cannot be an overflow checking cast
+                    {
+                        /* Simply make this into an integer comparison */
+
+                        tree->gtOp.gtOp1 = op1->gtCast.CastOp();
+                        tree->gtOp.gtOp2 = gtNewIconNode((int)cns2->gtIntConCommon.LngValue(), TYP_INT);
+                    }
+
+                    goto COMPARE;
+                }
+
+                noway_assert(op1->TypeGet() == TYP_LONG && op1->OperGet() == GT_AND);
+
+                /* Is the result of the mask effectively an INT ? */
+
+                GenTreePtr andMask;
+                andMask = op1->gtOp.gtOp2;
+                if (andMask->gtOper != GT_CNS_NATIVELONG)
+                {
+                    goto COMPARE;
+                }
+                if ((andMask->gtIntConCommon.LngValue() >> 32) != 0)
+                {
+                    goto COMPARE;
+                }
+
+                /* Now we know that we can cast gtOp.gtOp1 of AND to int */
+
+                op1->gtOp.gtOp1 = gtNewCastNode(TYP_INT, op1->gtOp.gtOp1, TYP_INT);
+
+                /* now replace the mask node (gtOp.gtOp2 of AND node) */
+
+                noway_assert(andMask == op1->gtOp.gtOp2);
+
+                ival1 = (int)andMask->gtIntConCommon.LngValue();
+                andMask->SetOper(GT_CNS_INT);
+                andMask->gtType             = TYP_INT;
+                andMask->gtIntCon.gtIconVal = ival1;
+
+                /* now change the type of the AND node */
+
+                op1->gtType = TYP_INT;
+
+                /* finally we replace the comparand */
+
+                ival2 = (int)cns2->gtIntConCommon.LngValue();
+                cns2->SetOper(GT_CNS_INT);
+                cns2->gtType = TYP_INT;
+
+                noway_assert(cns2 == op2);
+                cns2->gtIntCon.gtIconVal = ival2;
+
+                goto COMPARE;
+
+            case GT_LT:
+            case GT_LE:
+            case GT_GE:
+            case GT_GT:
+
+                if ((tree->gtFlags & GTF_UNSIGNED) == 0)
+                {
+                    if (op2->gtOper == GT_CNS_INT)
+                    {
+                        cns2 = op2;
+                        /* Check for "expr relop 1" */
+                        if (cns2->IsIntegralConst(1))
+                        {
+                            /* Check for "expr >= 1" */
+                            if (oper == GT_GE)
+                            {
+                                /* Change to "expr > 0" */
+                                oper = GT_GT;
+                                goto SET_OPER;
+                            }
+                            /* Check for "expr < 1" */
+                            else if (oper == GT_LT)
+                            {
+                                /* Change to "expr <= 0" */
+                                oper = GT_LE;
+                                goto SET_OPER;
+                            }
+                        }
+                        /* Check for "expr relop -1" */
+                        else if (cns2->IsIntegralConst(-1) && ((oper == GT_LE) || (oper == GT_GT)))
+                        {
+                            /* Check for "expr <= -1" */
+                            if (oper == GT_LE)
+                            {
+                                /* Change to "expr < 0" */
+                                oper = GT_LT;
+                                goto SET_OPER;
+                            }
+                            /* Check for "expr > -1" */
+                            else if (oper == GT_GT)
+                            {
+                                /* Change to "expr >= 0" */
+                                oper = GT_GE;
+
+                            SET_OPER:
+                                // IF we get here we should be changing 'oper'
+                                assert(tree->OperGet() != oper);
+
+                                // Keep the old ValueNumber for 'tree' as the new expr
+                                // will still compute the same value as before
+                                tree->SetOper(oper, GenTree::PRESERVE_VN);
+                                cns2->gtIntCon.gtIconVal = 0;
+
+                                // vnStore is null before the ValueNumber phase has run
+                                if (vnStore != nullptr)
+                                {
+                                    // Update the ValueNumber for 'cns2', as we just changed it to 0
+                                    fgValueNumberTreeConst(cns2);
+                                }
+
+                                op2 = tree->gtOp.gtOp2 = gtFoldExpr(op2);
+                            }
+                        }
+                    }
+                }
+
+            COMPARE:
+
+                noway_assert(tree->OperKind() & GTK_RELOP);
+
+                /* Check if the result of the comparison is used for a jump.
+                 * If not then only the int (i.e. 32 bit) case is handled in
+                 * the code generator through the (x86) "set" instructions.
+                 * For the rest of the cases, the simplest way is to
+                 * "simulate" the comparison with ?:
+                 *
+                 * On ARM, we previously used the IT instruction, but the IT instructions
+                 * have mostly been declared obsolete and off-limits, so all cases on ARM
+                 * get converted to ?: */
+
+                if (!(tree->gtFlags & GTF_RELOP_JMP_USED) && fgMorphRelopToQmark(op1))
+                {
+                    /* We convert it to "(CMP_TRUE) ? (1):(0)" */
+
+                    op1 = tree;
+                    op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);
+                    op1->gtRequestSetFlags();
+
+                    op2 = new (this, GT_COLON) GenTreeColon(TYP_INT, gtNewIconNode(1), gtNewIconNode(0));
+                    op2 = fgMorphTree(op2);
+
+                    tree = gtNewQmarkNode(TYP_INT, op1, op2);
+
+                    fgMorphTreeDone(tree);
+
+                    return tree;
+                }
+                break;
+
+            case GT_QMARK:
+
+                /* If op1 is a comma throw node then we won't be keeping op2 */
+                if (fgIsCommaThrow(op1))
+                {
+                    break;
+                }
+
+                /* Get hold of the two branches */
+
+                noway_assert(op2->OperGet() == GT_COLON);
+                elseNode = op2->AsColon()->ElseNode();
+                thenNode = op2->AsColon()->ThenNode();
+
+                /* Try to hoist assignments out of qmark colon constructs.
+                   ie. replace (cond?(x=a):(x=b)) with (x=(cond?a:b)). */
+
+                if (tree->TypeGet() == TYP_VOID && thenNode->OperGet() == GT_ASG && elseNode->OperGet() == GT_ASG &&
+                    thenNode->TypeGet() != TYP_LONG && GenTree::Compare(thenNode->gtOp.gtOp1, elseNode->gtOp.gtOp1) &&
+                    thenNode->gtOp.gtOp2->TypeGet() == elseNode->gtOp.gtOp2->TypeGet())
+                {
+                    noway_assert(thenNode->TypeGet() == elseNode->TypeGet());
+
+                    GenTreePtr asg    = thenNode;
+                    GenTreePtr colon  = op2;
+                    colon->gtOp.gtOp1 = thenNode->gtOp.gtOp2;
+                    colon->gtOp.gtOp2 = elseNode->gtOp.gtOp2;
+                    tree->gtType = colon->gtType = asg->gtOp.gtOp2->gtType;
+                    asg->gtOp.gtOp2              = tree;
+
+                    // Asg will have all the flags that the QMARK had
+                    asg->gtFlags |= (tree->gtFlags & GTF_ALL_EFFECT);
+
+                    // Colon flag won't have the flags that x had.
+                    colon->gtFlags &= ~GTF_ALL_EFFECT;
+                    colon->gtFlags |= (colon->gtOp.gtOp1->gtFlags | colon->gtOp.gtOp2->gtFlags) & GTF_ALL_EFFECT;
+
+                    DEBUG_DESTROY_NODE(elseNode->gtOp.gtOp1);
+                    DEBUG_DESTROY_NODE(elseNode);
+
+                    return asg;
+                }
+
+                /* If the 'else' branch is empty swap the two branches and reverse the condition */
+
+                if (elseNode->IsNothingNode())
+                {
+                    /* This can only happen for VOID ?: */
+                    noway_assert(op2->gtType == TYP_VOID);
+
+                    /* If the thenNode and elseNode are both nop nodes then optimize away the QMARK */
+                    if (thenNode->IsNothingNode())
+                    {
+                        // We may be able to throw away op1 (unless it has side-effects)
+
+                        if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0)
+                        {
+                            /* Just return a a Nop Node */
+                            return thenNode;
+                        }
+                        else
+                        {
+                            /* Just return the relop, but clear the special flags.  Note
+                               that we can't do that for longs and floats (see code under
+                               COMPARE label above) */
+
+                            if (!fgMorphRelopToQmark(op1->gtOp.gtOp1))
+                            {
+                                op1->gtFlags &= ~(GTF_RELOP_QMARK | GTF_RELOP_JMP_USED);
+                                return op1;
+                            }
+                        }
+                    }
+                    else
+                    {
+                        GenTreePtr tmp = elseNode;
+
+                        op2->AsColon()->ElseNode() = elseNode = thenNode;
+                        op2->AsColon()->ThenNode() = thenNode = tmp;
+                        gtReverseCond(op1);
+                    }
+                }
+
+#if !defined(_TARGET_ARM_)
+                // If we have (cond)?0:1, then we just return "cond" for TYP_INTs
+                //
+                // Don't do this optimization for ARM: we always require assignment
+                // to boolean to remain ?:, since we don't have any way to generate
+                // this with straight-line code, like x86 does using setcc (at least
+                // after the IT instruction is deprecated).
+
+                if (genActualType(op1->gtOp.gtOp1->gtType) == TYP_INT && genActualType(typ) == TYP_INT &&
+                    thenNode->gtOper == GT_CNS_INT && elseNode->gtOper == GT_CNS_INT)
+                {
+                    ival1 = thenNode->gtIntCon.gtIconVal;
+                    ival2 = elseNode->gtIntCon.gtIconVal;
+
+                    // Is one constant 0 and the other 1?
+                    if ((ival1 | ival2) == 1 && (ival1 & ival2) == 0)
+                    {
+                        // If the constants are {1, 0}, reverse the condition
+                        if (ival1 == 1)
+                        {
+                            gtReverseCond(op1);
+                        }
+
+                        // Unmark GTF_RELOP_JMP_USED on the condition node so it knows that it
+                        // needs to materialize the result as a 0 or 1.
+                        noway_assert(op1->gtFlags & (GTF_RELOP_QMARK | GTF_RELOP_JMP_USED));
+                        op1->gtFlags &= ~(GTF_RELOP_QMARK | GTF_RELOP_JMP_USED);
+
+                        DEBUG_DESTROY_NODE(tree);
+                        DEBUG_DESTROY_NODE(op2);
+
+                        return op1;
+                    }
+                }
+#endif // !_TARGET_ARM_
+
+                break; // end case GT_QMARK
+
+            case GT_MUL:
+
+#ifndef _TARGET_64BIT_
+                if (typ == TYP_LONG)
+                {
+                    // This must be GTF_MUL_64RSLT
+                    assert(tree->gtIsValid64RsltMul());
+                    return tree;
+                }
+#endif // _TARGET_64BIT_
+                goto CM_OVF_OP;
+
+            case GT_SUB:
+
+                if (tree->gtOverflow())
+                {
+                    goto CM_OVF_OP;
+                }
+
+                /* Check for "op1 - cns2" , we change it to "op1 + (-cns2)" */
+
+                noway_assert(op2);
+                if (op2->IsCnsIntOrI())
+                {
+                    /* Negate the constant and change the node to be "+" */
+
+                    op2->gtIntConCommon.SetIconValue(-op2->gtIntConCommon.IconValue());
+                    oper = GT_ADD;
+                    tree->ChangeOper(oper);
+                    goto CM_ADD_OP;
+                }
+
+                /* Check for "cns1 - op2" , we change it to "(cns1 + (-op2))" */
+
+                noway_assert(op1);
+                if (op1->IsCnsIntOrI())
+                {
+                    noway_assert(varTypeIsIntOrI(tree));
+
+                    tree->gtOp.gtOp2 = op2 =
+                        gtNewOperNode(GT_NEG, tree->gtType, op2); // The type of the new GT_NEG node should be the same
+                                                                  // as the type of the tree, i.e. tree->gtType.
+                    fgMorphTreeDone(op2);
+
+                    oper = GT_ADD;
+                    tree->ChangeOper(oper);
+                    goto CM_ADD_OP;
+                }
+
+                /* No match - exit */
+
+                break;
+
+#ifdef _TARGET_ARM64_
+            case GT_DIV:
+                if (!varTypeIsFloating(tree->gtType))
+                {
+                    // Codegen for this instruction needs to be able to throw two exceptions:
+                    fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
+                    fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO, fgPtrArgCntCur);
+                }
+                break;
+            case GT_UDIV:
+                // Codegen for this instruction needs to be able to throw one exception:
+                fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_DIV_BY_ZERO, fgPtrArgCntCur);
+                break;
+#endif
+
+            case GT_ADD:
+
+            CM_OVF_OP:
+                if (tree->gtOverflow())
+                {
+                    tree->gtRequestSetFlags();
+
+                    // Add the excptn-throwing basic block to jump to on overflow
+
+                    fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_OVERFLOW, fgPtrArgCntCur);
+
+                    // We can't do any commutative morphing for overflow instructions
+
+                    break;
+                }
+
+            CM_ADD_OP:
+
+            case GT_OR:
+            case GT_XOR:
+            case GT_AND:
+
+                /* Commute any non-REF constants to the right */
+
+                noway_assert(op1);
+                if (op1->OperIsConst() && (op1->gtType != TYP_REF))
+                {
+                    // TODO-Review: We used to assert here that
+                    // noway_assert(!op2->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD));
+                    // With modifications to AddrTaken==>AddrExposed, we did more assertion propagation,
+                    // and would sometimes hit this assertion.  This may indicate a missed "remorph".
+                    // Task is to re-enable this assertion and investigate.
+
+                    /* Swap the operands */
+                    tree->gtOp.gtOp1 = op2;
+                    tree->gtOp.gtOp2 = op1;
+
+                    op1 = op2;
+                    op2 = tree->gtOp.gtOp2;
+                }
+
+                /* See if we can fold GT_ADD nodes. */
+
+                if (oper == GT_ADD)
+                {
+                    /* Fold "((x+icon1)+(y+icon2)) to ((x+y)+(icon1+icon2))" */
+
+                    if (op1->gtOper == GT_ADD && op2->gtOper == GT_ADD && !gtIsActiveCSE_Candidate(op2) &&
+                        op1->gtOp.gtOp2->gtOper == GT_CNS_INT && op2->gtOp.gtOp2->gtOper == GT_CNS_INT &&
+                        !op1->gtOverflow() && !op2->gtOverflow())
+                    {
+                        cns1 = op1->gtOp.gtOp2;
+                        cns2 = op2->gtOp.gtOp2;
+                        cns1->gtIntCon.gtIconVal += cns2->gtIntCon.gtIconVal;
+#ifdef _TARGET_64BIT_
+                        if (cns1->TypeGet() == TYP_INT)
+                        {
+                            // we need to properly re-sign-extend or truncate after adding two int constants above
+                            cns1->AsIntCon()->TruncateOrSignExtend32();
+                        }
+#endif //_TARGET_64BIT_
+
+                        tree->gtOp.gtOp2 = cns1;
+                        DEBUG_DESTROY_NODE(cns2);
+
+                        op1->gtOp.gtOp2 = op2->gtOp.gtOp1;
+                        op1->gtFlags |= (op1->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT);
+                        DEBUG_DESTROY_NODE(op2);
+                        op2 = tree->gtOp.gtOp2;
+                    }
+
+                    if (op2->IsCnsIntOrI() && varTypeIsIntegralOrI(typ))
+                    {
+                        /* Fold "((x+icon1)+icon2) to (x+(icon1+icon2))" */
+
+                        if (op1->gtOper == GT_ADD && !gtIsActiveCSE_Candidate(op1) && op1->gtOp.gtOp2->IsCnsIntOrI() &&
+                            !op1->gtOverflow() && op1->gtOp.gtOp2->OperGet() == op2->OperGet())
+                        {
+                            cns1 = op1->gtOp.gtOp2;
+                            op2->gtIntConCommon.SetIconValue(cns1->gtIntConCommon.IconValue() +
+                                                             op2->gtIntConCommon.IconValue());
+#ifdef _TARGET_64BIT_
+                            if (op2->TypeGet() == TYP_INT)
+                            {
+                                // we need to properly re-sign-extend or truncate after adding two int constants above
+                                op2->AsIntCon()->TruncateOrSignExtend32();
+                            }
+#endif //_TARGET_64BIT_
+
+                            if (cns1->OperGet() == GT_CNS_INT)
+                            {
+                                op2->gtIntCon.gtFieldSeq =
+                                    GetFieldSeqStore()->Append(cns1->gtIntCon.gtFieldSeq, op2->gtIntCon.gtFieldSeq);
+                            }
+                            DEBUG_DESTROY_NODE(cns1);
+
+                            tree->gtOp.gtOp1 = op1->gtOp.gtOp1;
+                            DEBUG_DESTROY_NODE(op1);
+                            op1 = tree->gtOp.gtOp1;
+                        }
+
+                        // Fold (x + 0).
+
+                        if ((op2->gtIntConCommon.IconValue() == 0) && !gtIsActiveCSE_Candidate(tree))
+                        {
+
+                            // If this addition is adding an offset to a null pointer,
+                            // avoid the work and yield the null pointer immediately.
+                            // Dereferencing the pointer in either case will have the
+                            // same effect.
+
+                            if (!gtIsActiveCSE_Candidate(op1) && varTypeIsGC(op2->TypeGet()))
+                            {
+                                op2->gtType = tree->gtType;
+                                DEBUG_DESTROY_NODE(op1);
+                                DEBUG_DESTROY_NODE(tree);
+                                return op2;
+                            }
+
+                            // Remove the addition iff it won't change the tree type
+                            // to TYP_REF.
+
+                            if (!gtIsActiveCSE_Candidate(op2) &&
+                                ((op1->TypeGet() == tree->TypeGet()) || (op1->TypeGet() != TYP_REF)))
+                            {
+                                if (fgGlobalMorph && (op2->OperGet() == GT_CNS_INT) &&
+                                    (op2->gtIntCon.gtFieldSeq != nullptr) &&
+                                    (op2->gtIntCon.gtFieldSeq != FieldSeqStore::NotAField()))
+                                {
+                                    fgAddFieldSeqForZeroOffset(op1, op2->gtIntCon.gtFieldSeq);
+                                }
+
+                                DEBUG_DESTROY_NODE(op2);
+                                DEBUG_DESTROY_NODE(tree);
+
+                                return op1;
+                            }
+                        }
+                    }
+                }
+                /* See if we can fold GT_MUL by const nodes */
+                else if (oper == GT_MUL && op2->IsCnsIntOrI() && !optValnumCSE_phase)
+                {
+#ifndef _TARGET_64BIT_
+                    noway_assert(typ <= TYP_UINT);
+#endif // _TARGET_64BIT_
+                    noway_assert(!tree->gtOverflow());
+
+                    ssize_t mult            = op2->gtIntConCommon.IconValue();
+                    bool    op2IsConstIndex = op2->OperGet() == GT_CNS_INT && op2->gtIntCon.gtFieldSeq != nullptr &&
+                                           op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq();
+
+                    assert(!op2IsConstIndex || op2->AsIntCon()->gtFieldSeq->m_next == nullptr);
+
+                    if (mult == 0)
+                    {
+                        // We may be able to throw away op1 (unless it has side-effects)
+
+                        if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0)
+                        {
+                            DEBUG_DESTROY_NODE(op1);
+                            DEBUG_DESTROY_NODE(tree);
+                            return op2; // Just return the "0" node
+                        }
+
+                        // We need to keep op1 for the side-effects. Hang it off
+                        // a GT_COMMA node
+
+                        tree->ChangeOper(GT_COMMA);
+                        return tree;
+                    }
+
+                    size_t abs_mult      = (mult >= 0) ? mult : -mult;
+                    size_t lowestBit     = genFindLowestBit(abs_mult);
+                    bool   changeToShift = false;
+
+                    // is it a power of two? (positive or negative)
+                    if (abs_mult == lowestBit)
+                    {
+                        // if negative negate (min-int does not need negation)
+                        if (mult < 0 && mult != SSIZE_T_MIN)
+                        {
+                            tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, op1->gtType, op1);
+                            fgMorphTreeDone(op1);
+                        }
+
+                        // If "op2" is a constant array index, the other multiplicand must be a constant.
+                        // Transfer the annotation to the other one.
+                        if (op2->OperGet() == GT_CNS_INT && op2->gtIntCon.gtFieldSeq != nullptr &&
+                            op2->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
+                        {
+                            assert(op2->gtIntCon.gtFieldSeq->m_next == nullptr);
+                            GenTreePtr otherOp = op1;
+                            if (otherOp->OperGet() == GT_NEG)
+                            {
+                                otherOp = otherOp->gtOp.gtOp1;
+                            }
+                            assert(otherOp->OperGet() == GT_CNS_INT);
+                            assert(otherOp->gtIntCon.gtFieldSeq == FieldSeqStore::NotAField());
+                            otherOp->gtIntCon.gtFieldSeq = op2->gtIntCon.gtFieldSeq;
+                        }
+
+                        if (abs_mult == 1)
+                        {
+                            DEBUG_DESTROY_NODE(op2);
+                            DEBUG_DESTROY_NODE(tree);
+                            return op1;
+                        }
+
+                        /* Change the multiplication into a shift by log2(val) bits */
+                        op2->gtIntConCommon.SetIconValue(genLog2(abs_mult));
+                        changeToShift = true;
+                    }
+#if LEA_AVAILABLE
+                    else if ((lowestBit > 1) && jitIsScaleIndexMul(lowestBit) && optAvoidIntMult())
+                    {
+                        int     shift  = genLog2(lowestBit);
+                        ssize_t factor = abs_mult >> shift;
+
+                        if (factor == 3 || factor == 5 || factor == 9)
+                        {
+                            // if negative negate (min-int does not need negation)
+                            if (mult < 0 && mult != SSIZE_T_MIN)
+                            {
+                                tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_NEG, op1->gtType, op1);
+                                fgMorphTreeDone(op1);
+                            }
+
+                            GenTreePtr factorIcon = gtNewIconNode(factor, TYP_I_IMPL);
+                            if (op2IsConstIndex)
+                            {
+                                factorIcon->AsIntCon()->gtFieldSeq =
+                                    GetFieldSeqStore()->CreateSingleton(FieldSeqStore::ConstantIndexPseudoField);
+                            }
+
+                            // change the multiplication into a smaller multiplication (by 3, 5 or 9) and a shift
+                            tree->gtOp.gtOp1 = op1 = gtNewOperNode(GT_MUL, tree->gtType, op1, factorIcon);
+                            fgMorphTreeDone(op1);
+
+                            op2->gtIntConCommon.SetIconValue(shift);
+                            changeToShift = true;
+                        }
+                    }
+#endif // LEA_AVAILABLE
+                    if (changeToShift)
+                    {
+                        // vnStore is null before the ValueNumber phase has run
+                        if (vnStore != nullptr)
+                        {
+                            // Update the ValueNumber for 'op2', as we just changed the constant
+                            fgValueNumberTreeConst(op2);
+                        }
+                        oper = GT_LSH;
+                        // Keep the old ValueNumber for 'tree' as the new expr
+                        // will still compute the same value as before
+                        tree->ChangeOper(oper, GenTree::PRESERVE_VN);
+
+                        goto DONE_MORPHING_CHILDREN;
+                    }
+                }
+                else if (fgOperIsBitwiseRotationRoot(oper))
+                {
+                    tree = fgRecognizeAndMorphBitwiseRotation(tree);
+
+                    // fgRecognizeAndMorphBitwiseRotation may return a new tree
+                    oper = tree->OperGet();
+                    typ  = tree->TypeGet();
+                    op1  = tree->gtOp.gtOp1;
+                    op2  = tree->gtOp.gtOp2;
+                }
+
+                break;
+
+            case GT_CHS:
+            case GT_NOT:
+            case GT_NEG:
+
+                /* Any constant cases should have been folded earlier */
+                noway_assert(!op1->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD) || optValnumCSE_phase);
+                break;
+
+            case GT_CKFINITE:
+
+                noway_assert(varTypeIsFloating(op1->TypeGet()));
+
+                fgAddCodeRef(compCurBB, bbThrowIndex(compCurBB), SCK_ARITH_EXCPN, fgPtrArgCntCur);
+                break;
+
+            case GT_OBJ:
+                // If we have GT_OBJ(GT_ADDR(X)) and X has GTF_GLOB_REF, we must set GTF_GLOB_REF on
+                // the GT_OBJ. Note that the GTF_GLOB_REF will have been cleared on ADDR(X) where X
+                // is a local or clsVar, even if it has been address-exposed.
+                if (op1->OperGet() == GT_ADDR)
+                {
+                    tree->gtFlags |= (op1->gtGetOp1()->gtFlags & GTF_GLOB_REF);
+                }
+                break;
+
+            case GT_IND:
+
+                // Can not remove a GT_IND if it is currently a CSE candidate.
+                if (gtIsActiveCSE_Candidate(tree))
+                {
+                    break;
+                }
+
+                bool foldAndReturnTemp;
+                foldAndReturnTemp = false;
+                temp              = nullptr;
+                ival1             = 0;
+
+                /* Try to Fold *(&X) into X */
+                if (op1->gtOper == GT_ADDR)
+                {
+                    // Can not remove a GT_ADDR if it is currently a CSE candidate.
+                    if (gtIsActiveCSE_Candidate(op1))
+                    {
+                        break;
+                    }
+
+                    temp = op1->gtOp.gtOp1; // X
+
+                    // In the test below, if they're both TYP_STRUCT, this of course does *not* mean that
+                    // they are the *same* struct type.  In fact, they almost certainly aren't.  If the
+                    // address has an associated field sequence, that identifies this case; go through
+                    // the "lcl_fld" path rather than this one.
+                    FieldSeqNode* addrFieldSeq = nullptr; // This is an unused out parameter below.
+                    if (typ == temp->TypeGet() && !GetZeroOffsetFieldMap()->Lookup(op1, &addrFieldSeq))
+                    {
+                        foldAndReturnTemp = true;
+                    }
+                    else if (temp->OperIsLocal())
+                    {
+                        unsigned   lclNum = temp->gtLclVarCommon.gtLclNum;
+                        LclVarDsc* varDsc = &lvaTable[lclNum];
+
+                        // We will try to optimize when we have a promoted struct promoted with a zero lvFldOffset
+                        if (varDsc->lvPromoted && (varDsc->lvFldOffset == 0))
+                        {
+                            noway_assert(varTypeIsStruct(varDsc));
+
+                            // We will try to optimize when we have a single field struct that is being struct promoted
+                            if (varDsc->lvFieldCnt == 1)
+                            {
+                                unsigned lclNumFld = varDsc->lvFieldLclStart;
+                                // just grab the promoted field
+                                LclVarDsc* fieldVarDsc = &lvaTable[lclNumFld];
+
+                                // Also make sure that the tree type matches the fieldVarType and that it's lvFldOffset
+                                // is zero
+                                if (fieldVarDsc->TypeGet() == tree->TypeGet() && (fieldVarDsc->lvFldOffset == 0))
+                                {
+                                    // We can just use the existing promoted field LclNum
+                                    temp->gtLclVarCommon.SetLclNum(lclNumFld);
+                                    temp->gtType = fieldVarDsc->TypeGet();
+
+                                    foldAndReturnTemp = true;
+                                }
+                            }
+                        }
+                        // If the type of the IND (typ) is a "small int", and the type of the local has the
+                        // same width, then we can reduce to just the local variable -- it will be
+                        // correctly normalized, and signed/unsigned differences won't matter.
+                        //
+                        // The below transformation cannot be applied if the local var needs to be normalized on load.
+                        else if (varTypeIsSmall(typ) && (genTypeSize(lvaTable[lclNum].lvType) == genTypeSize(typ)) &&
+                                 !lvaTable[lclNum].lvNormalizeOnLoad())
+                        {
+                            tree->gtType      = temp->gtType;
+                            foldAndReturnTemp = true;
+                        }
+                        else
+                        {
+                            // Assumes that when Lookup returns "false" it will leave "fieldSeq" unmodified (i.e.
+                            // nullptr)
+                            assert(fieldSeq == nullptr);
+                            bool b = GetZeroOffsetFieldMap()->Lookup(op1, &fieldSeq);
+                            assert(b || fieldSeq == nullptr);
+
+                            if ((fieldSeq != nullptr) && (temp->OperGet() == GT_LCL_FLD))
+                            {
+                                // Append the field sequence, change the type.
+                                temp->AsLclFld()->gtFieldSeq =
+                                    GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq);
+                                temp->gtType = tree->TypeGet();
+
+                                foldAndReturnTemp = true;
+                            }
+                        }
+                        // Otherwise will will fold this into a GT_LCL_FLD below
+                        //   where we check (temp != nullptr)
+                    }
+                    else // !temp->OperIsLocal()
+                    {
+                        // We don't try to fold away the GT_IND/GT_ADDR for this case
+                        temp = nullptr;
+                    }
+                }
+                else if (op1->OperGet() == GT_ADD)
+                {
+                    /* Try to change *(&lcl + cns) into lcl[cns] to prevent materialization of &lcl */
+
+                    if (op1->gtOp.gtOp1->OperGet() == GT_ADDR && op1->gtOp.gtOp2->OperGet() == GT_CNS_INT &&
+                        (!(opts.MinOpts() || opts.compDbgCode)))
+                    {
+                        // No overflow arithmetic with pointers
+                        noway_assert(!op1->gtOverflow());
+
+                        temp = op1->gtOp.gtOp1->gtOp.gtOp1;
+                        if (!temp->OperIsLocal())
+                        {
+                            temp = nullptr;
+                            break;
+                        }
+
+                        // Can not remove the GT_ADDR if it is currently a CSE candidate.
+                        if (gtIsActiveCSE_Candidate(op1->gtOp.gtOp1))
+                        {
+                            break;
+                        }
+
+                        ival1    = op1->gtOp.gtOp2->gtIntCon.gtIconVal;
+                        fieldSeq = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq;
+
+                        // Does the address have an associated zero-offset field sequence?
+                        FieldSeqNode* addrFieldSeq = nullptr;
+                        if (GetZeroOffsetFieldMap()->Lookup(op1->gtOp.gtOp1, &addrFieldSeq))
+                        {
+                            fieldSeq = GetFieldSeqStore()->Append(addrFieldSeq, fieldSeq);
+                        }
+
+                        if (ival1 == 0 && typ == temp->TypeGet() && temp->TypeGet() != TYP_STRUCT)
+                        {
+                            noway_assert(!varTypeIsGC(temp->TypeGet()));
+                            foldAndReturnTemp = true;
+                        }
+                        else
+                        {
+                            // The emitter can't handle large offsets
+                            if (ival1 != (unsigned short)ival1)
+                            {
+                                break;
+                            }
+
+                            // The emitter can get confused by invalid offsets
+                            if (ival1 >= Compiler::lvaLclSize(temp->gtLclVarCommon.gtLclNum))
+                            {
+                                break;
+                            }
+
+#ifdef _TARGET_ARM_
+                            // Check for a LclVar TYP_STRUCT with misalignment on a Floating Point field
+                            //
+                            if (varTypeIsFloating(tree->TypeGet()))
+                            {
+                                if ((ival1 % emitTypeSize(tree->TypeGet())) != 0)
+                                {
+                                    tree->gtFlags |= GTF_IND_UNALIGNED;
+                                    break;
+                                }
+                            }
+#endif
+                        }
+                        // Now we can fold this into a GT_LCL_FLD below
+                        //   where we check (temp != nullptr)
+                    }
+                }
+
+#ifdef DEBUG
+                // If we have decided to fold, then temp cannot be nullptr
+                if (foldAndReturnTemp)
+                {
+                    assert(temp != nullptr);
+                }
+#endif
+
+                if (temp != nullptr)
+                {
+                    noway_assert(op1->gtOper == GT_ADD || op1->gtOper == GT_ADDR);
+
+                    // If we haven't already decided to fold this expression
+                    //
+                    if (!foldAndReturnTemp)
+                    {
+                        noway_assert(temp->OperIsLocal());
+                        LclVarDsc* varDsc = &(lvaTable[temp->AsLclVarCommon()->gtLclNum]);
+                        // Make sure we don't separately promote the fields of this struct.
+                        if (varDsc->lvRegStruct)
+                        {
+                            // We can enregister, but can't promote.
+                            varDsc->lvPromoted = false;
+                        }
+                        else
+                        {
+                            lvaSetVarDoNotEnregister(temp->gtLclVarCommon.gtLclNum DEBUGARG(DNER_LocalField));
+                        }
+
+                        // We will turn a GT_LCL_VAR into a GT_LCL_FLD with an gtLclOffs of 'ival'
+                        // or if we already have a GT_LCL_FLD we will adjust the gtLclOffs by adding 'ival'
+                        // Then we change the type of the GT_LCL_FLD to match the orginal GT_IND type.
+                        //
+                        if (temp->OperGet() == GT_LCL_FLD)
+                        {
+                            temp->AsLclFld()->gtLclOffs += (unsigned short)ival1;
+                            temp->AsLclFld()->gtFieldSeq =
+                                GetFieldSeqStore()->Append(temp->AsLclFld()->gtFieldSeq, fieldSeq);
+                        }
+                        else
+                        {
+                            temp->ChangeOper(GT_LCL_FLD); // Note that this makes the gtFieldSeq "NotAField"...
+                            temp->AsLclFld()->gtLclOffs = (unsigned short)ival1;
+                            if (fieldSeq != nullptr)
+                            { // If it does represent a field, note that.
+                                temp->AsLclFld()->gtFieldSeq = fieldSeq;
+                            }
+                        }
+                        temp->gtType      = tree->gtType;
+                        foldAndReturnTemp = true;
+                    }
+
+                    assert(foldAndReturnTemp == true);
+
+                    // Keep the DONT_CSE flag in sync
+                    // (i.e keep the original value of this flag from tree)
+                    // as it can be set for 'temp' because a GT_ADDR always marks it for it's op1
+                    //
+                    temp->gtFlags &= ~GTF_DONT_CSE;
+                    temp->gtFlags |= (tree->gtFlags & GTF_DONT_CSE);
+
+                    noway_assert(op1->gtOper == GT_ADD || op1->gtOper == GT_ADDR);
+                    noway_assert(temp->gtType == tree->gtType);
+
+                    if (op1->OperGet() == GT_ADD)
+                    {
+                        DEBUG_DESTROY_NODE(op1->gtOp.gtOp1); // GT_ADDR
+                        DEBUG_DESTROY_NODE(op1->gtOp.gtOp2); // GT_CNS_INT
+                    }
+                    DEBUG_DESTROY_NODE(op1);  // GT_ADD or GT_ADDR
+                    DEBUG_DESTROY_NODE(tree); // GT_IND
+
+                    return temp;
+                }
+
+                // Only do this optimization when we are in the global optimizer. Doing this after value numbering
+                // could result in an invalid value number for the newly generated GT_IND node.
+                if ((op1->OperGet() == GT_COMMA) && fgGlobalMorph)
+                {
+                    // Perform the transform IND(COMMA(x, ..., z)) == COMMA(x, ..., IND(z)).
+                    // TBD: this transformation is currently necessary for correctness -- it might
+                    // be good to analyze the failures that result if we don't do this, and fix them
+                    // in other ways.  Ideally, this should be optional.
+                    GenTreePtr commaNode = op1;
+                    unsigned   treeFlags = tree->gtFlags;
+                    commaNode->gtType    = typ;
+                    commaNode->gtFlags   = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is
+                                                                           // dangerous, clear the GTF_REVERSE_OPS at
+                                                                           // least.
+#ifdef DEBUG
+                    commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+                    while (commaNode->gtOp.gtOp2->gtOper == GT_COMMA)
+                    {
+                        commaNode          = commaNode->gtOp.gtOp2;
+                        commaNode->gtType  = typ;
+                        commaNode->gtFlags = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is
+                                                                             // dangerous, clear the GTF_REVERSE_OPS at
+                                                                             // least.
+#ifdef DEBUG
+                        commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+                    }
+                    bool      wasArrIndex = (tree->gtFlags & GTF_IND_ARR_INDEX) != 0;
+                    ArrayInfo arrInfo;
+                    if (wasArrIndex)
+                    {
+                        bool b = GetArrayInfoMap()->Lookup(tree, &arrInfo);
+                        assert(b);
+                        GetArrayInfoMap()->Remove(tree);
+                    }
+                    tree         = op1;
+                    op1          = gtNewOperNode(GT_IND, typ, commaNode->gtOp.gtOp2);
+                    op1->gtFlags = treeFlags;
+                    if (wasArrIndex)
+                    {
+                        GetArrayInfoMap()->Set(op1, arrInfo);
+                    }
+#ifdef DEBUG
+                    op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+                    commaNode->gtOp.gtOp2 = op1;
+                    return tree;
+                }
+
+                break;
+
+            case GT_ADDR:
+
+                // Can not remove op1 if it is currently a CSE candidate.
+                if (gtIsActiveCSE_Candidate(op1))
+                {
+                    break;
+                }
+
+                if (op1->OperGet() == GT_IND)
+                {
+                    if ((op1->gtFlags & GTF_IND_ARR_INDEX) == 0)
+                    {
+                        // Can not remove a GT_ADDR if it is currently a CSE candidate.
+                        if (gtIsActiveCSE_Candidate(tree))
+                        {
+                            break;
+                        }
+
+                        // Perform the transform ADDR(IND(...)) == (...).
+                        GenTreePtr addr = op1->gtOp.gtOp1;
+
+                        noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL);
+
+                        DEBUG_DESTROY_NODE(op1);
+                        DEBUG_DESTROY_NODE(tree);
+
+                        return addr;
+                    }
+                }
+                else if (op1->OperGet() == GT_OBJ)
+                {
+                    // Can not remove a GT_ADDR if it is currently a CSE candidate.
+                    if (gtIsActiveCSE_Candidate(tree))
+                    {
+                        break;
+                    }
+
+                    // Perform the transform ADDR(OBJ(...)) == (...).
+                    GenTreePtr addr = op1->AsObj()->Addr();
+
+                    noway_assert(varTypeIsGC(addr->gtType) || addr->gtType == TYP_I_IMPL);
+
+                    DEBUG_DESTROY_NODE(op1);
+                    DEBUG_DESTROY_NODE(tree);
+
+                    return addr;
+                }
+                else if (op1->gtOper == GT_CAST)
+                {
+                    GenTreePtr casting = op1->gtCast.CastOp();
+                    if (casting->gtOper == GT_LCL_VAR || casting->gtOper == GT_CLS_VAR)
+                    {
+                        DEBUG_DESTROY_NODE(op1);
+                        tree->gtOp.gtOp1 = op1 = casting;
+                    }
+                }
+                else if ((op1->gtOper == GT_COMMA) && !optValnumCSE_phase)
+                {
+                    // Perform the transform ADDR(COMMA(x, ..., z)) == COMMA(x, ..., ADDR(z)).
+                    // (Be sure to mark "z" as an l-value...)
+                    GenTreePtr commaNode = op1;
+                    while (commaNode->gtOp.gtOp2->gtOper == GT_COMMA)
+                    {
+                        commaNode = commaNode->gtOp.gtOp2;
+                    }
+                    // The top-level addr might be annotated with a zeroOffset field.
+                    FieldSeqNode* zeroFieldSeq = nullptr;
+                    bool          isZeroOffset = GetZeroOffsetFieldMap()->Lookup(tree, &zeroFieldSeq);
+                    tree                       = op1;
+                    commaNode->gtOp.gtOp2->gtFlags |= GTF_DONT_CSE;
+
+                    // If the node we're about to put under a GT_ADDR is an indirection, it
+                    // doesn't need to be materialized, since we only want the addressing mode. Because
+                    // of this, this GT_IND is not a faulting indirection and we don't have to extract it
+                    // as a side effect.
+                    GenTree* commaOp2 = commaNode->gtOp.gtOp2;
+                    if (commaOp2->OperIsBlk())
+                    {
+                        commaOp2 = fgMorphBlkToInd(commaOp2->AsBlk(), commaOp2->TypeGet());
+                    }
+                    if (commaOp2->gtOper == GT_IND)
+                    {
+                        commaOp2->gtFlags |= GTF_IND_NONFAULTING;
+                    }
+
+                    op1 = gtNewOperNode(GT_ADDR, TYP_BYREF, commaOp2);
+
+                    if (isZeroOffset)
+                    {
+                        // Transfer the annotation to the new GT_ADDR node.
+                        GetZeroOffsetFieldMap()->Set(op1, zeroFieldSeq);
+                    }
+                    commaNode->gtOp.gtOp2 = op1;
+                    // Originally, I gave all the comma nodes type "byref".  But the ADDR(IND(x)) == x transform
+                    // might give op1 a type different from byref (like, say, native int).  So now go back and give
+                    // all the comma nodes the type of op1.
+                    // TODO: the comma flag update below is conservative and can be improved.
+                    // For example, if we made the ADDR(IND(x)) == x transformation, we may be able to
+                    // get rid of some of the the IND flags on the COMMA nodes (e.g., GTF_GLOB_REF).
+                    commaNode = tree;
+                    while (commaNode->gtOper == GT_COMMA)
+                    {
+                        commaNode->gtType = op1->gtType;
+                        commaNode->gtFlags |= op1->gtFlags;
+#ifdef DEBUG
+                        commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+                        commaNode = commaNode->gtOp.gtOp2;
+                    }
+
+                    return tree;
+                }
+
+                /* op1 of a GT_ADDR is an l-value. Only r-values can be CSEed */
+                op1->gtFlags |= GTF_DONT_CSE;
+                break;
+
+            case GT_COLON:
+                if (fgGlobalMorph)
+                {
+                    /* Mark the nodes that are conditionally executed */
+                    fgWalkTreePre(&tree, gtMarkColonCond);
+                }
+                /* Since we're doing this postorder we clear this if it got set by a child */
+                fgRemoveRestOfBlock = false;
+                break;
+
+            case GT_COMMA:
+
+                /* Special case: trees that don't produce a value */
+                if ((op2->OperKind() & GTK_ASGOP) || (op2->OperGet() == GT_COMMA && op2->TypeGet() == TYP_VOID) ||
+                    fgIsThrow(op2))
+                {
+                    typ = tree->gtType = TYP_VOID;
+                }
+
+                // If we are in the Valuenum CSE phase then don't morph away anything as these
+                // nodes may have CSE defs/uses in them.
+                //
+                if (!optValnumCSE_phase)
+                {
+                    // Extract the side effects from the left side of the comma.  Since they don't "go" anywhere, this
+                    // is all we need.
+
+                    GenTreePtr op1SideEffects = nullptr;
+                    // The addition of "GTF_MAKE_CSE" below prevents us from throwing away (for example)
+                    // hoisted expressions in loops.
+                    gtExtractSideEffList(op1, &op1SideEffects, (GTF_SIDE_EFFECT | GTF_MAKE_CSE));
+                    if (op1SideEffects)
+                    {
+                        // Replace the left hand side with the side effect list.
+                        tree->gtOp.gtOp1 = op1SideEffects;
+                        tree->gtFlags |= (op1SideEffects->gtFlags & GTF_ALL_EFFECT);
+                    }
+                    else
+                    {
+                        /* The left operand is worthless, throw it away */
+                        if (lvaLocalVarRefCounted)
+                        {
+                            lvaRecursiveDecRefCounts(op1);
+                        }
+                        op2->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG));
+                        DEBUG_DESTROY_NODE(tree);
+                        DEBUG_DESTROY_NODE(op1);
+                        return op2;
+                    }
+
+                    /* If the right operand is just a void nop node, throw it away */
+                    if (op2->IsNothingNode() && op1->gtType == TYP_VOID)
+                    {
+                        op1->gtFlags |= (tree->gtFlags & (GTF_DONT_CSE | GTF_LATE_ARG));
+                        DEBUG_DESTROY_NODE(tree);
+                        DEBUG_DESTROY_NODE(op2);
+                        return op1;
+                    }
+                }
+
+                break;
+
+            case GT_JTRUE:
+
+                /* Special case if fgRemoveRestOfBlock is set to true */
+                if (fgRemoveRestOfBlock)
+                {
+                    if (fgIsCommaThrow(op1, true))
+                    {
+                        GenTreePtr throwNode = op1->gtOp.gtOp1;
+                        noway_assert(throwNode->gtType == TYP_VOID);
+
+                        return throwNode;
+                    }
+
+                    noway_assert(op1->OperKind() & GTK_RELOP);
+                    noway_assert(op1->gtFlags & GTF_EXCEPT);
+
+                    // We need to keep op1 for the side-effects. Hang it off
+                    // a GT_COMMA node
+
+                    tree->ChangeOper(GT_COMMA);
+                    tree->gtOp.gtOp2 = op2 = gtNewNothingNode();
+
+                    // Additionally since we're eliminating the JTRUE
+                    // codegen won't like it if op1 is a RELOP of longs, floats or doubles.
+                    // So we change it into a GT_COMMA as well.
+                    op1->ChangeOper(GT_COMMA);
+                    op1->gtType = op1->gtOp.gtOp1->gtType;
+
+                    return tree;
+                }
+
+            default:
+                break;
+        }
+
+        noway_assert(oper == tree->gtOper);
+
+        // If we are in the Valuenum CSE phase then don't morph away anything as these
+        // nodes may have CSE defs/uses in them.
+        //
+        if (!optValnumCSE_phase && (oper != GT_ASG) && (oper != GT_COLON) && !tree->IsList())
+        {
+            /* Check for op1 as a GT_COMMA with a unconditional throw node */
+            if (op1 && fgIsCommaThrow(op1, true))
+            {
+                if ((op1->gtFlags & GTF_COLON_COND) == 0)
+                {
+                    /* We can safely throw out the rest of the statements */
+                    fgRemoveRestOfBlock = true;
+                }
+
+                GenTreePtr throwNode = op1->gtOp.gtOp1;
+                noway_assert(throwNode->gtType == TYP_VOID);
+
+                if (oper == GT_COMMA)
+                {
+                    /* Both tree and op1 are GT_COMMA nodes */
+                    /* Change the tree's op1 to the throw node: op1->gtOp.gtOp1 */
+                    tree->gtOp.gtOp1 = throwNode;
+                    return tree;
+                }
+                else if (oper != GT_NOP)
+                {
+                    if (genActualType(typ) == genActualType(op1->gtType))
+                    {
+                        /* The types match so, return the comma throw node as the new tree */
+                        return op1;
+                    }
+                    else
+                    {
+                        if (typ == TYP_VOID)
+                        {
+                            // Return the throw node
+                            return throwNode;
+                        }
+                        else
+                        {
+                            GenTreePtr commaOp2 = op1->gtOp.gtOp2;
+
+                            // need type of oper to be same as tree
+                            if (typ == TYP_LONG)
+                            {
+                                commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
+                                commaOp2->gtIntConCommon.SetLngValue(0);
+                                /* Change the types of oper and commaOp2 to TYP_LONG */
+                                op1->gtType = commaOp2->gtType = TYP_LONG;
+                            }
+                            else if (varTypeIsFloating(typ))
+                            {
+                                commaOp2->ChangeOperConst(GT_CNS_DBL);
+                                commaOp2->gtDblCon.gtDconVal = 0.0;
+                                /* Change the types of oper and commaOp2 to TYP_DOUBLE */
+                                op1->gtType = commaOp2->gtType = TYP_DOUBLE;
+                            }
+                            else
+                            {
+                                commaOp2->ChangeOperConst(GT_CNS_INT);
+                                commaOp2->gtIntConCommon.SetIconValue(0);
+                                /* Change the types of oper and commaOp2 to TYP_INT */
+                                op1->gtType = commaOp2->gtType = TYP_INT;
+                            }
+
+                            /* Return the GT_COMMA node as the new tree */
+                            return op1;
+                        }
+                    }
+                }
+            }
+
+            /* Check for op2 as a GT_COMMA with a unconditional throw */
+
+            if (op2 && fgIsCommaThrow(op2, true))
+            {
+                if ((op2->gtFlags & GTF_COLON_COND) == 0)
+                {
+                    /* We can safely throw out the rest of the statements */
+                    fgRemoveRestOfBlock = true;
+                }
+
+                // If op1 has no side-effects
+                if ((op1->gtFlags & GTF_ALL_EFFECT) == 0)
+                {
+                    // If tree is an asg node
+                    if (tree->OperIsAssignment())
+                    {
+                        /* Return the throw node as the new tree */
+                        return op2->gtOp.gtOp1;
+                    }
+
+                    if (tree->OperGet() == GT_ARR_BOUNDS_CHECK)
+                    {
+                        /* Return the throw node as the new tree */
+                        return op2->gtOp.gtOp1;
+                    }
+
+                    // If tree is a comma node
+                    if (tree->OperGet() == GT_COMMA)
+                    {
+                        /* Return the throw node as the new tree */
+                        return op2->gtOp.gtOp1;
+                    }
+
+                    /* for the shift nodes the type of op2 can differ from the tree type */
+                    if ((typ == TYP_LONG) && (genActualType(op2->gtType) == TYP_INT))
+                    {
+                        noway_assert(GenTree::OperIsShiftOrRotate(oper));
+
+                        GenTreePtr commaOp2 = op2->gtOp.gtOp2;
+
+                        commaOp2->ChangeOperConst(GT_CNS_NATIVELONG);
+                        commaOp2->gtIntConCommon.SetLngValue(0);
+
+                        /* Change the types of oper and commaOp2 to TYP_LONG */
+                        op2->gtType = commaOp2->gtType = TYP_LONG;
+                    }
+
+                    if ((genActualType(typ) == TYP_INT) &&
+                        (genActualType(op2->gtType) == TYP_LONG || varTypeIsFloating(op2->TypeGet())))
+                    {
+                        // An example case is comparison (say GT_GT) of two longs or floating point values.
+
+                        GenTreePtr commaOp2 = op2->gtOp.gtOp2;
+
+                        commaOp2->ChangeOperConst(GT_CNS_INT);
+                        commaOp2->gtIntCon.gtIconVal = 0;
+                        /* Change the types of oper and commaOp2 to TYP_INT */
+                        op2->gtType = commaOp2->gtType = TYP_INT;
+                    }
+
+                    if ((typ == TYP_BYREF) && (genActualType(op2->gtType) == TYP_I_IMPL))
+                    {
+                        noway_assert(tree->OperGet() == GT_ADD);
+
+                        GenTreePtr commaOp2 = op2->gtOp.gtOp2;
+
+                        commaOp2->ChangeOperConst(GT_CNS_INT);
+                        commaOp2->gtIntCon.gtIconVal = 0;
+                        /* Change the types of oper and commaOp2 to TYP_BYREF */
+                        op2->gtType = commaOp2->gtType = TYP_BYREF;
+                    }
+
+                    /* types should now match */
+                    noway_assert((genActualType(typ) == genActualType(op2->gtType)));
+
+                    /* Return the GT_COMMA node as the new tree */
+                    return op2;
+                }
+            }
+        }
+
+        /*-------------------------------------------------------------------------
+         * Optional morphing is done if tree transformations is permitted
+         */
+
+        if ((opts.compFlags & CLFLG_TREETRANS) == 0)
+        {
+            return tree;
+        }
+
+        tree = fgMorphSmpOpOptional(tree->AsOp());
+
+    } // extra scope for gcc workaround
+    return tree;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree)
+{
+    genTreeOps oper = tree->gtOper;
+    GenTree*   op1  = tree->gtOp1;
+    GenTree*   op2  = tree->gtOp2;
+    var_types  typ  = tree->TypeGet();
+
+    if (GenTree::OperIsCommutative(oper))
+    {
+        /* Swap the operands so that the more expensive one is 'op1' */
+
+        if (tree->gtFlags & GTF_REVERSE_OPS)
+        {
+            tree->gtOp1 = op2;
+            tree->gtOp2 = op1;
+
+            op2 = op1;
+            op1 = tree->gtOp1;
+
+            tree->gtFlags &= ~GTF_REVERSE_OPS;
+        }
+
+        if (oper == op2->gtOper)
+        {
+            /*  Reorder nested operators at the same precedence level to be
+                left-recursive. For example, change "(a+(b+c))" to the
+                equivalent expression "((a+b)+c)".
+             */
+
+            /* Things are handled differently for floating-point operators */
+
+            if (!varTypeIsFloating(tree->TypeGet()))
+            {
+                fgMoveOpsLeft(tree);
+                op1 = tree->gtOp1;
+                op2 = tree->gtOp2;
+            }
+        }
+    }
+
+#if REARRANGE_ADDS
+
+    /* Change "((x+icon)+y)" to "((x+y)+icon)"
+       Don't reorder floating-point operations */
+
+    if ((oper == GT_ADD) && !tree->gtOverflow() && (op1->gtOper == GT_ADD) && !op1->gtOverflow() &&
+        varTypeIsIntegralOrI(typ))
+    {
+        GenTreePtr ad2 = op1->gtOp.gtOp2;
+
+        if (op2->OperIsConst() == 0 && ad2->OperIsConst() != 0)
+        {
+            // This takes
+            //       + (tree)
+            //      / \
+            //     /   \
+            //    /     \
+            //   + (op1) op2
+            //  / \
+            //     \
+            //     ad2
+            //
+            // And it swaps ad2 and op2.  If (op2) is varTypeIsGC, then this implies that (tree) is
+            // varTypeIsGC.  If (op1) is not, then when we swap (ad2) and (op2), then we have a TYP_INT node
+            // (op1) with a child that is varTypeIsGC.  If we encounter that situation, make (op1) the same
+            // type as (tree).
+            //
+            // Also, if (ad2) is varTypeIsGC then (tree) must also be (since op1 is), so no fixing is
+            // necessary
+
+            if (varTypeIsGC(op2->TypeGet()))
+            {
+                noway_assert(varTypeIsGC(typ));
+                op1->gtType = typ;
+            }
+            tree->gtOp2 = ad2;
+
+            op1->gtOp.gtOp2 = op2;
+            op1->gtFlags |= op2->gtFlags & GTF_ALL_EFFECT;
+
+            op2 = tree->gtOp2;
+        }
+    }
+
+#endif
+
+    /*-------------------------------------------------------------------------
+     * Perform optional oper-specific postorder morphing
+     */
+
+    switch (oper)
+    {
+        genTreeOps cmop;
+        bool       dstIsSafeLclVar;
+
+        case GT_ASG:
+            /* We'll convert "a = a <op> x" into "a <op>= x"                     */
+            /*     and also  "a = x <op> a" into "a <op>= x" for communative ops */
+            CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if !LONG_ASG_OPS
+            if (typ == TYP_LONG)
+            {
+                break;
+            }
+#endif
+
+            if (varTypeIsStruct(typ) && !tree->IsPhiDefn())
+            {
+                if (tree->OperIsCopyBlkOp())
+                {
+                    return fgMorphCopyBlock(tree);
+                }
+                else
+                {
+                    return fgMorphInitBlock(tree);
+                }
+            }
+
+            /* Make sure we're allowed to do this */
+
+            if (optValnumCSE_phase)
+            {
+                // It is not safe to reorder/delete CSE's
+                break;
+            }
+
+            /* Are we assigning to a GT_LCL_VAR ? */
+
+            dstIsSafeLclVar = (op1->gtOper == GT_LCL_VAR);
+
+            /* If we have a GT_LCL_VAR, then is the address taken? */
+            if (dstIsSafeLclVar)
+            {
+                unsigned   lclNum = op1->gtLclVarCommon.gtLclNum;
+                LclVarDsc* varDsc = lvaTable + lclNum;
+
+                noway_assert(lclNum < lvaCount);
+
+                /* Is the address taken? */
+                if (varDsc->lvAddrExposed)
+                {
+                    dstIsSafeLclVar = false;
+                }
+                else if (op2->gtFlags & GTF_ASG)
+                {
+                    break;
+                }
+            }
+
+            if (!dstIsSafeLclVar)
+            {
+                if (op2->gtFlags & GTF_ASG)
+                {
+                    break;
+                }
+
+                if ((op2->gtFlags & GTF_CALL) && (op1->gtFlags & GTF_ALL_EFFECT))
+                {
+                    break;
+                }
+            }
+
+            /* Special case: a cast that can be thrown away */
+
+            if (op1->gtOper == GT_IND && op2->gtOper == GT_CAST && !op2->gtOverflow())
+            {
+                var_types srct;
+                var_types cast;
+                var_types dstt;
+
+                srct = op2->gtCast.CastOp()->TypeGet();
+                cast = (var_types)op2->CastToType();
+                dstt = op1->TypeGet();
+
+                /* Make sure these are all ints and precision is not lost */
+
+                if (cast >= dstt && dstt <= TYP_INT && srct <= TYP_INT)
+                {
+                    op2 = tree->gtOp2 = op2->gtCast.CastOp();
+                }
+            }
+
+            /* Make sure we have the operator range right */
+
+            noway_assert(GT_SUB == GT_ADD + 1);
+            noway_assert(GT_MUL == GT_ADD + 2);
+            noway_assert(GT_DIV == GT_ADD + 3);
+            noway_assert(GT_MOD == GT_ADD + 4);
+            noway_assert(GT_UDIV == GT_ADD + 5);
+            noway_assert(GT_UMOD == GT_ADD + 6);
+
+            noway_assert(GT_OR == GT_ADD + 7);
+            noway_assert(GT_XOR == GT_ADD + 8);
+            noway_assert(GT_AND == GT_ADD + 9);
+
+            noway_assert(GT_LSH == GT_ADD + 10);
+            noway_assert(GT_RSH == GT_ADD + 11);
+            noway_assert(GT_RSZ == GT_ADD + 12);
+
+            /* Check for a suitable operator on the RHS */
+
+            cmop = op2->OperGet();
+
+            switch (cmop)
+            {
+                case GT_NEG:
+                    // GT_CHS only supported for integer types
+                    if (varTypeIsFloating(tree->TypeGet()))
+                    {
+                        break;
+                    }
+
+                    goto ASG_OP;
+
+                case GT_MUL:
+                    // GT_ASG_MUL only supported for floating point types
+                    if (!varTypeIsFloating(tree->TypeGet()))
+                    {
+                        break;
+                    }
+
+                    __fallthrough;
+
+                case GT_ADD:
+                case GT_SUB:
+                    if (op2->gtOverflow())
+                    {
+                        /* Disable folding into "<op>=" if the result can be
+                           visible to anyone as <op> may throw an exception and
+                           the assignment should not proceed
+                           We are safe with an assignment to a local variables
+                         */
+                        if (ehBlockHasExnFlowDsc(compCurBB))
+                        {
+                            break;
+                        }
+                        if (!dstIsSafeLclVar)
+                        {
+                            break;
+                        }
+                    }
+#ifndef _TARGET_AMD64_
+                    // This is hard for byte-operations as we need to make
+                    // sure both operands are in RBM_BYTE_REGS.
+                    if (varTypeIsByte(op2->TypeGet()))
+                        break;
+#endif // _TARGET_AMD64_
+                    goto ASG_OP;
+
+                case GT_DIV:
+                case GT_UDIV:
+                    // GT_ASG_DIV only supported for floating point types
+                    if (!varTypeIsFloating(tree->TypeGet()))
+                    {
+                        break;
+                    }
+
+                case GT_LSH:
+                case GT_RSH:
+                case GT_RSZ:
+
+#if LONG_ASG_OPS
+
+                    if (typ == TYP_LONG)
+                        break;
+#endif
+
+                case GT_OR:
+                case GT_XOR:
+                case GT_AND:
+
+#if LONG_ASG_OPS
+
+                    /* TODO: allow non-const long assignment operators */
+
+                    if (typ == TYP_LONG && op2->gtOp.gtOp2->gtOper != GT_CNS_LNG)
+                        break;
+#endif
+
+                ASG_OP:
+                {
+                    bool bReverse       = false;
+                    bool bAsgOpFoldable = fgShouldCreateAssignOp(tree, &bReverse);
+                    if (bAsgOpFoldable)
+                    {
+                        if (bReverse)
+                        {
+                            // We will transform this from "a = x <op> a" to "a <op>= x"
+                            // so we can now destroy the duplicate "a"
+                            DEBUG_DESTROY_NODE(op2->gtOp.gtOp2);
+                            op2->gtOp.gtOp2 = op2->gtOp.gtOp1;
+                        }
+
+                        /* Special case: "x |= -1" and "x &= 0" */
+                        if (((cmop == GT_AND) && op2->gtOp.gtOp2->IsIntegralConst(0)) ||
+                            ((cmop == GT_OR) && op2->gtOp.gtOp2->IsIntegralConst(-1)))
+                        {
+                            /* Simply change to an assignment */
+                            tree->gtOp2 = op2->gtOp.gtOp2;
+                            break;
+                        }
+
+                        if (cmop == GT_NEG)
+                        {
+                            /* This is "x = -x;", use the flipsign operator */
+
+                            tree->ChangeOper(GT_CHS);
+
+                            if (op1->gtOper == GT_LCL_VAR)
+                            {
+                                op1->gtFlags |= GTF_VAR_USEASG;
+                            }
+
+                            tree->gtOp2 = gtNewIconNode(0, op1->TypeGet());
+
+                            break;
+                        }
+
+                        if (cmop == GT_RSH && varTypeIsSmall(op1->TypeGet()) && varTypeIsUnsigned(op1->TypeGet()))
+                        {
+                            // Changing from x = x op y to x op= y when x is a small integer type
+                            // makes the op size smaller (originally the op size was 32 bits, after
+                            // sign or zero extension of x, and there is an implicit truncation in the
+                            // assignment).
+                            // This is ok in most cases because the upper bits were
+                            // lost when assigning the op result to a small type var,
+                            // but it may not be ok for the right shift operation where the higher bits
+                            // could be shifted into the lower bits and preserved.
+                            // Signed right shift of signed x still works (i.e. (sbyte)((int)(sbyte)x >>signed y) ==
+                            // (sbyte)x >>signed y)) as do unsigned right shift ((ubyte)((int)(ubyte)x >>unsigned y) ==
+                            // (ubyte)x >>unsigned y), but signed right shift of an unigned small type may give the
+                            // wrong
+                            // result:
+                            // e.g. (ubyte)((int)(ubyte)0xf0 >>signed 4) == 0x0f,
+                            // but  (ubyte)0xf0 >>signed 4 == 0xff which is incorrect.
+                            // The result becomes correct if we use >>unsigned instead of >>signed.
+                            noway_assert(op1->TypeGet() == op2->gtOp.gtOp1->TypeGet());
+                            cmop = GT_RSZ;
+                        }
+
+                        /* Replace with an assignment operator */
+                        noway_assert(GT_ADD - GT_ADD == GT_ASG_ADD - GT_ASG_ADD);
+                        noway_assert(GT_SUB - GT_ADD == GT_ASG_SUB - GT_ASG_ADD);
+                        noway_assert(GT_OR - GT_ADD == GT_ASG_OR - GT_ASG_ADD);
+                        noway_assert(GT_XOR - GT_ADD == GT_ASG_XOR - GT_ASG_ADD);
+                        noway_assert(GT_AND - GT_ADD == GT_ASG_AND - GT_ASG_ADD);
+                        noway_assert(GT_LSH - GT_ADD == GT_ASG_LSH - GT_ASG_ADD);
+                        noway_assert(GT_RSH - GT_ADD == GT_ASG_RSH - GT_ASG_ADD);
+                        noway_assert(GT_RSZ - GT_ADD == GT_ASG_RSZ - GT_ASG_ADD);
+
+                        tree->SetOper((genTreeOps)(cmop - GT_ADD + GT_ASG_ADD));
+                        tree->gtOp2 = op2->gtOp.gtOp2;
+
+                        /* Propagate GTF_OVERFLOW */
+
+                        if (op2->gtOverflowEx())
+                        {
+                            tree->gtType = op2->gtType;
+                            tree->gtFlags |= (op2->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT | GTF_UNSIGNED));
+                        }
+
+#if FEATURE_SET_FLAGS
+
+                        /* Propagate GTF_SET_FLAGS */
+                        if (op2->gtSetFlags())
+                        {
+                            tree->gtRequestSetFlags();
+                        }
+
+#endif // FEATURE_SET_FLAGS
+
+                        DEBUG_DESTROY_NODE(op2);
+                        op2 = tree->gtOp2;
+
+                        /* The target is used as well as being defined */
+                        if (op1->OperIsLocal())
+                        {
+                            op1->gtFlags |= GTF_VAR_USEASG;
+                        }
+
+#if CPU_HAS_FP_SUPPORT
+                        /* Check for the special case "x += y * x;" */
+
+                        // GT_ASG_MUL only supported for floating point types
+                        if (cmop != GT_ADD && cmop != GT_SUB)
+                        {
+                            break;
+                        }
+
+                        if (op2->gtOper == GT_MUL && varTypeIsFloating(tree->TypeGet()))
+                        {
+                            if (GenTree::Compare(op1, op2->gtOp.gtOp1))
+                            {
+                                /* Change "x += x * y" into "x *= (y + 1)" */
+
+                                op2 = op2->gtOp.gtOp2;
+                            }
+                            else if (GenTree::Compare(op1, op2->gtOp.gtOp2))
+                            {
+                                /* Change "x += y * x" into "x *= (y + 1)" */
+
+                                op2 = op2->gtOp.gtOp1;
+                            }
+                            else
+                            {
+                                break;
+                            }
+
+                            op1 = gtNewDconNode(1.0);
+
+                            /* Now make the "*=" node */
+
+                            if (cmop == GT_ADD)
+                            {
+                                /* Change "x += x * y" into "x *= (y + 1)" */
+
+                                tree->gtOp2 = op2 = gtNewOperNode(GT_ADD, tree->TypeGet(), op2, op1);
+                            }
+                            else
+                            {
+                                /* Change "x -= x * y" into "x *= (1 - y)" */
+
+                                noway_assert(cmop == GT_SUB);
+                                tree->gtOp2 = op2 = gtNewOperNode(GT_SUB, tree->TypeGet(), op1, op2);
+                            }
+                            tree->ChangeOper(GT_ASG_MUL);
+                        }
+#endif // CPU_HAS_FP_SUPPORT
+                    }
+                }
+
+                break;
+
+                case GT_NOT:
+
+                    /* Is the destination identical to the first RHS sub-operand? */
+
+                    if (GenTree::Compare(op1, op2->gtOp.gtOp1))
+                    {
+                        /* This is "x = ~x" which is the same as "x ^= -1"
+                         * Transform the node into a GT_ASG_XOR */
+
+                        noway_assert(genActualType(typ) == TYP_INT || genActualType(typ) == TYP_LONG);
+
+                        op2->gtOp.gtOp2 = (genActualType(typ) == TYP_INT) ? gtNewIconNode(-1) : gtNewLconNode(-1);
+
+                        cmop = GT_XOR;
+                        goto ASG_OP;
+                    }
+
+                    break;
+                default:
+                    break;
+            }
+
+            break;
+
+        case GT_MUL:
+
+            /* Check for the case "(val + icon) * icon" */
+
+            if (op2->gtOper == GT_CNS_INT && op1->gtOper == GT_ADD)
+            {
+                GenTreePtr add = op1->gtOp.gtOp2;
+
+                if (add->IsCnsIntOrI() && (op2->GetScaleIndexMul() != 0))
+                {
+                    if (tree->gtOverflow() || op1->gtOverflow())
+                    {
+                        break;
+                    }
+
+                    ssize_t imul = op2->gtIntCon.gtIconVal;
+                    ssize_t iadd = add->gtIntCon.gtIconVal;
+
+                    /* Change '(val + iadd) * imul' -> '(val * imul) + (iadd * imul)' */
+
+                    oper = GT_ADD;
+                    tree->ChangeOper(oper);
+
+                    op2->gtIntCon.gtIconVal = iadd * imul;
+
+                    op1->ChangeOper(GT_MUL);
+
+                    add->gtIntCon.gtIconVal = imul;
+#ifdef _TARGET_64BIT_
+                    if (add->gtType == TYP_INT)
+                    {
+                        // we need to properly re-sign-extend or truncate after multiplying two int constants above
+                        add->AsIntCon()->TruncateOrSignExtend32();
+                    }
+#endif //_TARGET_64BIT_
+                }
+            }
+
+            break;
+
+        case GT_DIV:
+
+            /* For "val / 1", just return "val" */
+
+            if (op2->IsIntegralConst(1))
+            {
+                DEBUG_DESTROY_NODE(tree);
+                return op1;
+            }
+
+            break;
+
+        case GT_LSH:
+
+            /* Check for the case "(val + icon) << icon" */
+
+            if (op2->IsCnsIntOrI() && op1->gtOper == GT_ADD && !op1->gtOverflow())
+            {
+                GenTreePtr cns = op1->gtOp.gtOp2;
+
+                if (cns->IsCnsIntOrI() && (op2->GetScaleIndexShf() != 0))
+                {
+                    ssize_t ishf = op2->gtIntConCommon.IconValue();
+                    ssize_t iadd = cns->gtIntConCommon.IconValue();
+
+                    // printf("Changing '(val+icon1)<<icon2' into '(val<<icon2+icon1<<icon2)'\n");
+
+                    /* Change "(val + iadd) << ishf" into "(val<<ishf + iadd<<ishf)" */
+
+                    tree->ChangeOper(GT_ADD);
+                    ssize_t result = iadd << ishf;
+                    op2->gtIntConCommon.SetIconValue(result);
+#ifdef _TARGET_64BIT_
+                    if (op1->gtType == TYP_INT)
+                    {
+                        op2->AsIntCon()->TruncateOrSignExtend32();
+                    }
+#endif // _TARGET_64BIT_
+
+                    // we are reusing the shift amount node here, but the type we want is that of the shift result
+                    op2->gtType = op1->gtType;
+
+                    if (cns->gtOper == GT_CNS_INT && cns->gtIntCon.gtFieldSeq != nullptr &&
+                        cns->gtIntCon.gtFieldSeq->IsConstantIndexFieldSeq())
+                    {
+                        assert(cns->gtIntCon.gtFieldSeq->m_next == nullptr);
+                        op2->gtIntCon.gtFieldSeq = cns->gtIntCon.gtFieldSeq;
+                    }
+
+                    op1->ChangeOper(GT_LSH);
+
+                    cns->gtIntConCommon.SetIconValue(ishf);
+                }
+            }
+
+            break;
+
+        case GT_XOR:
+
+            if (!optValnumCSE_phase)
+            {
+                /* "x ^ -1" is "~x" */
+
+                if (op2->IsIntegralConst(-1))
+                {
+                    tree->ChangeOper(GT_NOT);
+                    tree->gtOp2 = nullptr;
+                    DEBUG_DESTROY_NODE(op2);
+                }
+                else if (op2->IsIntegralConst(1) && op1->OperIsCompare())
+                {
+                    /* "binaryVal ^ 1" is "!binaryVal" */
+                    gtReverseCond(op1);
+                    DEBUG_DESTROY_NODE(op2);
+                    DEBUG_DESTROY_NODE(tree);
+                    return op1;
+                }
+            }
+
+            break;
+
+        default:
+            break;
+    }
+    return tree;
+}
+
+// code to generate a magic number and shift amount for the magic number division
+// optimization.  This code is previously from UTC where it notes it was taken from
+// _The_PowerPC_Compiler_Writer's_Guide_, pages 57-58.
+// The paper it is based on is "Division by invariant integers using multiplication"
+// by Torbjorn Granlund and Peter L. Montgomery in PLDI 94
+
+template <typename T>
+T GetSignedMagicNumberForDivide(T denom, int* shift /*out*/)
+{
+    // static SMAG smag;
+    const int bits         = sizeof(T) * 8;
+    const int bits_minus_1 = bits - 1;
+
+    typedef typename jitstd::make_unsigned<T>::type UT;
+
+    const UT two_nminus1 = UT(1) << bits_minus_1;
+
+    int p;
+    UT  absDenom;
+    UT  absNc;
+    UT  delta;
+    UT  q1;
+    UT  r1;
+    UT  r2;
+    UT  q2;
+    UT  t;
+    T   result_magic;
+    int result_shift;
+    int iters = 0;
+
+    absDenom = abs(denom);
+    t        = two_nminus1 + ((unsigned int)denom >> 31);
+    absNc    = t - 1 - (t % absDenom);        // absolute value of nc
+    p        = bits_minus_1;                  // initialize p
+    q1       = two_nminus1 / absNc;           // initialize q1 = 2^p / abs(nc)
+    r1       = two_nminus1 - (q1 * absNc);    // initialize r1 = rem(2^p, abs(nc))
+    q2       = two_nminus1 / absDenom;        // initialize q1 = 2^p / abs(denom)
+    r2       = two_nminus1 - (q2 * absDenom); // initialize r1 = rem(2^p, abs(denom))
+
+    do
+    {
+        iters++;
+        p++;
+        q1 *= 2; // update q1 = 2^p / abs(nc)
+        r1 *= 2; // update r1 = rem(2^p / abs(nc))
+
+        if (r1 >= absNc)
+        { // must be unsigned comparison
+            q1++;
+            r1 -= absNc;
+        }
+
+        q2 *= 2; // update q2 = 2^p / abs(denom)
+        r2 *= 2; // update r2 = rem(2^p / abs(denom))
+
+        if (r2 >= absDenom)
+        { // must be unsigned comparison
+            q2++;
+            r2 -= absDenom;
+        }
+
+        delta = absDenom - r2;
+    } while (q1 < delta || (q1 == delta && r1 == 0));
+
+    result_magic = q2 + 1; // resulting magic number
+    if (denom < 0)
+    {
+        result_magic = -result_magic;
+    }
+    *shift = p - bits; // resulting shift
+
+    return result_magic;
+}
+
+bool Compiler::fgShouldUseMagicNumberDivide(GenTreeOp* tree)
+{
+#ifdef _TARGET_ARM64_
+    // TODO-ARM64-NYI: We don't have a 'mulHi' implementation yet for ARM64
+    return false;
+#else
+
+    // During the optOptimizeValnumCSEs phase we can call fgMorph and when we do,
+    // if this method returns true we will introduce a new LclVar and
+    // a couple of new GenTree nodes, including an assignment to the new LclVar.
+    // None of these new GenTree nodes will have valid ValueNumbers.
+    // That is an invalid state for a GenTree node during the optOptimizeValnumCSEs phase.
+    //
+    // Also during optAssertionProp when extracting side effects we can assert
+    // during gtBuildCommaList if we have one tree that has Value Numbers
+    //  and another one that does not.
+    //
+    if (!fgGlobalMorph)
+    {
+        // We only perform the Magic Number Divide optimization during
+        // the initial global morph phase
+        return false;
+    }
+
+    if (tree->gtFlags & GTF_OVERFLOW)
+    {
+        return false;
+    }
+
+    if (tree->gtOp2->gtOper != GT_CNS_INT && tree->gtOp2->gtOper != GT_CNS_LNG)
+    {
+        return false;
+    }
+
+    ssize_t cons = tree->gtOp2->gtIntConCommon.IconValue();
+
+    if (cons == 0 || cons == -1 || cons == 1)
+    {
+        return false;
+    }
+
+    // codegen will expand these
+    if (cons == SSIZE_T_MIN || isPow2(abs(cons)))
+    {
+        return false;
+    }
+
+    // someone else will fold this away, so don't make it complicated for them
+    if (tree->gtOp1->IsCnsIntOrI())
+    {
+        return false;
+    }
+
+    // There is no technical barrier to handling unsigned, however it is quite rare
+    // and more work to support and test
+    if (tree->gtFlags & GTF_UNSIGNED)
+    {
+        return false;
+    }
+
+    return true;
+#endif
+}
+
+// transform x%c -> x-((x/c)*c)
+
+GenTree* Compiler::fgMorphModByConst(GenTreeOp* tree)
+{
+    assert(fgShouldUseMagicNumberDivide(tree));
+
+    var_types type = tree->gtType;
+
+    GenTree* cns = tree->gtOp2;
+
+    GenTree* numerator = fgMakeMultiUse(&tree->gtOp1);
+
+    tree->SetOper(GT_DIV);
+
+    GenTree* mul = gtNewOperNode(GT_MUL, type, tree, gtCloneExpr(cns));
+
+    GenTree* sub = gtNewOperNode(GT_SUB, type, numerator, mul);
+
+#ifdef DEBUG
+    sub->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+
+    return sub;
+}
+
+// For ARM64 we don't have a remainder instruction,
+// The architecture manual suggests the following transformation to
+// generate code for such operator:
+//
+// a % b = a - (a / b) * b;
+//
+// This method will produce the above expression in 'a' and 'b' are
+// leaf nodes, otherwise, if any of them is not a leaf it will spill
+// its value into a temporary variable, an example:
+// (x * 2 - 1) % (y + 1) ->  t1 - (t2 * ( comma(t1 = x * 2 - 1, t1) / comma(t2 = y + 1, t2) ) )
+//
+GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree)
+{
+#ifndef _TARGET_ARM64_
+    assert(!"This should only be called for ARM64");
+#endif
+
+    if (tree->OperGet() == GT_MOD)
+    {
+        tree->SetOper(GT_DIV);
+    }
+    else if (tree->OperGet() == GT_UMOD)
+    {
+        tree->SetOper(GT_UDIV);
+    }
+    else
+    {
+        noway_assert(!"Illegal gtOper in fgMorphModToSubMulDiv");
+    }
+
+    var_types type        = tree->gtType;
+    GenTree*  denominator = tree->gtOp2;
+    GenTree*  numerator   = tree->gtOp1;
+
+    if (!numerator->OperIsLeaf())
+    {
+        numerator = fgMakeMultiUse(&tree->gtOp1);
+    }
+
+    if (!denominator->OperIsLeaf())
+    {
+        denominator = fgMakeMultiUse(&tree->gtOp2);
+    }
+
+    GenTree* mul = gtNewOperNode(GT_MUL, type, tree, gtCloneExpr(denominator));
+    GenTree* sub = gtNewOperNode(GT_SUB, type, gtCloneExpr(numerator), mul);
+
+#ifdef DEBUG
+    sub->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+
+    return sub;
+}
+
+// Turn a division by a constant into a multiplication by constant + some adjustments
+// see comments on GetSignedMagicNumberForDivide for source of this algorithm.
+// returns: the transformed tree
+
+GenTree* Compiler::fgMorphDivByConst(GenTreeOp* tree)
+{
+    assert(fgShouldUseMagicNumberDivide(tree));
+
+    JITDUMP("doing magic number divide optimization\n");
+
+    int64_t   denominator = tree->gtOp2->gtIntConCommon.IconValue();
+    int64_t   magic;
+    int       shift;
+    var_types type = tree->gtType;
+
+    if (tree->gtType == TYP_INT)
+    {
+        magic = GetSignedMagicNumberForDivide<int32_t>((int32_t)denominator, &shift);
+    }
+    else
+    {
+        magic = GetSignedMagicNumberForDivide<int64_t>((int64_t)denominator, &shift);
+    }
+
+    GenTree* numerator = nullptr;
+
+    // If signs of the denominator and magic number don't match,
+    // we will need to use the numerator again.
+    if (signum(denominator) != signum(magic))
+    {
+        numerator = fgMakeMultiUse(&tree->gtOp1);
+        tree->gtFlags |= GTF_ASG;
+    }
+
+    if (type == TYP_LONG)
+    {
+        tree->gtOp2->gtIntConCommon.SetLngValue(magic);
+    }
+    else
+    {
+        tree->gtOp2->gtIntConCommon.SetIconValue((ssize_t)magic);
+    }
+
+    tree->SetOper(GT_MULHI);
+
+    GenTree* t         = tree;
+    GenTree* mulresult = tree;
+
+    JITDUMP("Multiply Result:\n");
+    DISPTREE(mulresult);
+
+    GenTree* adjusted = mulresult;
+
+    if (denominator > 0 && magic < 0)
+    {
+        // add the numerator back in
+        adjusted = gtNewOperNode(GT_ADD, type, mulresult, numerator);
+    }
+    else if (denominator < 0 && magic > 0)
+    {
+        // subtract the numerator off
+        adjusted = gtNewOperNode(GT_SUB, type, mulresult, numerator);
+    }
+    else
+    {
+        adjusted = mulresult;
+    }
+
+    GenTree* result1 = adjusted;
+    if (shift != 0)
+    {
+        result1 = gtNewOperNode(GT_RSH, type, adjusted, gtNewIconNode(shift, TYP_INT));
+    }
+
+    GenTree* secondClone = fgMakeMultiUse(&result1);
+
+    GenTree* result2 = gtNewOperNode(GT_RSZ, type, secondClone, gtNewIconNode(genTypeSize(type) * 8 - 1, type));
+
+    GenTree* result = gtNewOperNode(GT_ADD, type, result1, result2);
+    JITDUMP("Final Magic Number divide:\n");
+    DISPTREE(result);
+
+#ifdef DEBUG
+    result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+
+    return result;
+}
+
+//------------------------------------------------------------------------------
+// fgOperIsBitwiseRotationRoot : Check if the operation can be a root of a bitwise rotation tree.
+//
+//
+// Arguments:
+//    oper  - Operation to check
+//
+// Return Value:
+//    True if the operation can be a root of a bitwise rotation tree; false otherwise.
+
+bool Compiler::fgOperIsBitwiseRotationRoot(genTreeOps oper)
+{
+    return (oper == GT_OR) || (oper == GT_XOR);
+}
+
+//------------------------------------------------------------------------------
+// fgRecognizeAndMorphBitwiseRotation : Check if the tree represents a left or right rotation. If so, return
+//                                      an equivalent GT_ROL or GT_ROR tree; otherwise, return the original tree.
+//
+// Arguments:
+//    tree  - tree to check for a rotation pattern
+//
+// Return Value:
+//    An equivalent GT_ROL or GT_ROR tree if a pattern is found; original tree otherwise.
+//
+// Assumption:
+//    The input is a GT_OR or a GT_XOR tree.
+
+GenTreePtr Compiler::fgRecognizeAndMorphBitwiseRotation(GenTreePtr tree)
+{
+#ifndef LEGACY_BACKEND
+    //
+    // Check for a rotation pattern, e.g.,
+    //
+    //                         OR                      ROL
+    //                      /      \                   / \
+        //                    LSH      RSZ      ->        x   y
+    //                    / \      / \
+        //                   x  AND   x  AND
+    //                      / \      / \
+        //                     y  31   ADD  31
+    //                             / \
+        //                            NEG 32
+    //                             |
+    //                             y
+    // The patterns recognized:
+    // (x << (y & M)) op (x >>> ((-y + N) & M))
+    // (x >>> ((-y + N) & M)) op (x << (y & M))
+    //
+    // (x << y) op (x >>> (-y + N))
+    // (x >> > (-y + N)) op (x << y)
+    //
+    // (x >>> (y & M)) op (x << ((-y + N) & M))
+    // (x << ((-y + N) & M)) op (x >>> (y & M))
+    //
+    // (x >>> y) op (x << (-y + N))
+    // (x << (-y + N)) op (x >>> y)
+    //
+    // (x << c1) op (x >>> c2)
+    // (x >>> c1) op (x << c2)
+    //
+    // where
+    // c1 and c2 are const
+    // c1 + c2 == bitsize(x)
+    // N == bitsize(x)
+    // M is const
+    // M & (N - 1) == N - 1
+    // op is either | or ^
+
+    if (((tree->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0) || ((tree->gtFlags & GTF_ORDER_SIDEEFF) != 0))
+    {
+        // We can't do anything if the tree has assignments, calls, or volatile
+        // reads. Note that we allow GTF_EXCEPT side effect since any exceptions
+        // thrown by the original tree will be thrown by the transformed tree as well.
+        return tree;
+    }
+
+    genTreeOps oper = tree->OperGet();
+    assert(fgOperIsBitwiseRotationRoot(oper));
+
+    // Check if we have an LSH on one side of the OR and an RSZ on the other side.
+    GenTreePtr op1            = tree->gtGetOp1();
+    GenTreePtr op2            = tree->gtGetOp2();
+    GenTreePtr leftShiftTree  = nullptr;
+    GenTreePtr rightShiftTree = nullptr;
+    if ((op1->OperGet() == GT_LSH) && (op2->OperGet() == GT_RSZ))
+    {
+        leftShiftTree  = op1;
+        rightShiftTree = op2;
+    }
+    else if ((op1->OperGet() == GT_RSZ) && (op2->OperGet() == GT_LSH))
+    {
+        leftShiftTree  = op2;
+        rightShiftTree = op1;
+    }
+    else
+    {
+        return tree;
+    }
+
+    // Check if the trees representing the value to shift are identical.
+    // We already checked that there are no side effects above.
+    if (GenTree::Compare(leftShiftTree->gtGetOp1(), rightShiftTree->gtGetOp1()))
+    {
+        GenTreePtr rotatedValue           = leftShiftTree->gtGetOp1();
+        var_types  rotatedValueActualType = genActualType(rotatedValue->gtType);
+        ssize_t    rotatedValueBitSize    = genTypeSize(rotatedValueActualType) * 8;
+        noway_assert((rotatedValueBitSize == 32) || (rotatedValueBitSize == 64));
+        GenTreePtr leftShiftIndex  = leftShiftTree->gtGetOp2();
+        GenTreePtr rightShiftIndex = rightShiftTree->gtGetOp2();
+
+        // The shift index may be masked. At least (rotatedValueBitSize - 1) lower bits
+        // shouldn't be masked for the transformation to be valid. If additional
+        // higher bits are not masked, the transformation is still valid since the result
+        // of MSIL shift instructions is unspecified if the shift amount is greater or equal
+        // than the width of the value being shifted.
+        ssize_t minimalMask    = rotatedValueBitSize - 1;
+        ssize_t leftShiftMask  = -1;
+        ssize_t rightShiftMask = -1;
+
+        if ((leftShiftIndex->OperGet() == GT_AND))
+        {
+            if (leftShiftIndex->gtGetOp2()->IsCnsIntOrI())
+            {
+                leftShiftMask  = leftShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
+                leftShiftIndex = leftShiftIndex->gtGetOp1();
+            }
+            else
+            {
+                return tree;
+            }
+        }
+
+        if ((rightShiftIndex->OperGet() == GT_AND))
+        {
+            if (rightShiftIndex->gtGetOp2()->IsCnsIntOrI())
+            {
+                rightShiftMask  = rightShiftIndex->gtGetOp2()->gtIntCon.gtIconVal;
+                rightShiftIndex = rightShiftIndex->gtGetOp1();
+            }
+            else
+            {
+                return tree;
+            }
+        }
+
+        if (((minimalMask & leftShiftMask) != minimalMask) || ((minimalMask & rightShiftMask) != minimalMask))
+        {
+            // The shift index is overmasked, e.g., we have
+            // something like (x << y & 15) or
+            // (x >> (32 - y) & 15 with 32 bit x.
+            // The transformation is not valid.
+            return tree;
+        }
+
+        GenTreePtr shiftIndexWithAdd    = nullptr;
+        GenTreePtr shiftIndexWithoutAdd = nullptr;
+        genTreeOps rotateOp             = GT_NONE;
+        GenTreePtr rotateIndex          = nullptr;
+
+        if (leftShiftIndex->OperGet() == GT_ADD)
+        {
+            shiftIndexWithAdd    = leftShiftIndex;
+            shiftIndexWithoutAdd = rightShiftIndex;
+            rotateOp             = GT_ROR;
+        }
+        else if (rightShiftIndex->OperGet() == GT_ADD)
+        {
+            shiftIndexWithAdd    = rightShiftIndex;
+            shiftIndexWithoutAdd = leftShiftIndex;
+            rotateOp             = GT_ROL;
+        }
+
+        if (shiftIndexWithAdd != nullptr)
+        {
+            if (shiftIndexWithAdd->gtGetOp2()->IsCnsIntOrI())
+            {
+                if (shiftIndexWithAdd->gtGetOp2()->gtIntCon.gtIconVal == rotatedValueBitSize)
+                {
+                    if (shiftIndexWithAdd->gtGetOp1()->OperGet() == GT_NEG)
+                    {
+                        if (GenTree::Compare(shiftIndexWithAdd->gtGetOp1()->gtGetOp1(), shiftIndexWithoutAdd))
+                        {
+                            // We found one of these patterns:
+                            // (x << (y & M)) | (x >>> ((-y + N) & M))
+                            // (x << y) | (x >>> (-y + N))
+                            // (x >>> (y & M)) | (x << ((-y + N) & M))
+                            // (x >>> y) | (x << (-y + N))
+                            // where N == bitsize(x), M is const, and
+                            // M & (N - 1) == N - 1
+                            CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef _TARGET_64BIT_
+                            if (!shiftIndexWithoutAdd->IsCnsIntOrI() && (rotatedValueBitSize == 64))
+                            {
+                                // TODO: we need to handle variable-sized long shifts specially on x86.
+                                // GT_LSH, GT_RSH, and GT_RSZ have helpers for this case. We may need
+                                // to add helpers for GT_ROL and GT_ROR.
+                                NYI("Rotation of a long value by variable amount");
+                            }
+#endif
+
+                            rotateIndex = shiftIndexWithoutAdd;
+                        }
+                    }
+                }
+            }
+        }
+        else if ((leftShiftIndex->IsCnsIntOrI() && rightShiftIndex->IsCnsIntOrI()))
+        {
+            if (leftShiftIndex->gtIntCon.gtIconVal + rightShiftIndex->gtIntCon.gtIconVal == rotatedValueBitSize)
+            {
+                // We found this pattern:
+                // (x << c1) | (x >>> c2)
+                // where c1 and c2 are const and c1 + c2 == bitsize(x)
+                rotateOp    = GT_ROL;
+                rotateIndex = leftShiftIndex;
+            }
+        }
+
+        if (rotateIndex != nullptr)
+        {
+            noway_assert(GenTree::OperIsRotate(rotateOp));
+
+            unsigned inputTreeEffects = tree->gtFlags & GTF_ALL_EFFECT;
+
+            // We can use the same tree only during global morph; reusing the tree in a later morph
+            // may invalidate value numbers.
+            if (fgGlobalMorph)
+            {
+                tree->gtOp.gtOp1 = rotatedValue;
+                tree->gtOp.gtOp2 = rotateIndex;
+                tree->ChangeOper(rotateOp);
+                noway_assert(inputTreeEffects == ((rotatedValue->gtFlags | rotateIndex->gtFlags) & GTF_ALL_EFFECT));
+            }
+            else
+            {
+                tree = gtNewOperNode(rotateOp, rotatedValueActualType, rotatedValue, rotateIndex);
+                noway_assert(inputTreeEffects == (tree->gtFlags & GTF_ALL_EFFECT));
+            }
+
+            return tree;
+        }
+    }
+#endif // LEGACY_BACKEND
+    return tree;
+}
+
+#if !CPU_HAS_FP_SUPPORT
+GenTreePtr Compiler::fgMorphToEmulatedFP(GenTreePtr tree)
+{
+
+    genTreeOps oper = tree->OperGet();
+    var_types  typ  = tree->TypeGet();
+    GenTreePtr op1  = tree->gtOp.gtOp1;
+    GenTreePtr op2  = tree->gtGetOp2();
+
+    /*
+        We have to use helper calls for all FP operations:
+
+            FP operators that operate on FP values
+            casts to and from FP
+            comparisons of FP values
+     */
+
+    if (varTypeIsFloating(typ) || (op1 && varTypeIsFloating(op1->TypeGet())))
+    {
+        int        helper;
+        GenTreePtr args;
+        size_t     argc = genTypeStSz(typ);
+
+        /* Not all FP operations need helper calls */
+
+        switch (oper)
+        {
+            case GT_ASG:
+            case GT_IND:
+            case GT_LIST:
+            case GT_ADDR:
+            case GT_COMMA:
+                return tree;
+        }
+
+#ifdef DEBUG
+
+        /* If the result isn't FP, it better be a compare or cast */
+
+        if (!(varTypeIsFloating(typ) || tree->OperIsCompare() || oper == GT_CAST))
+            gtDispTree(tree);
+
+        noway_assert(varTypeIsFloating(typ) || tree->OperIsCompare() || oper == GT_CAST);
+#endif
+
+        /* Keep track of how many arguments we're passing */
+
+        fgPtrArgCntCur += argc;
+
+        /* Is this a binary operator? */
+
+        if (op2)
+        {
+            /* Add the second operand to the argument count */
+
+            fgPtrArgCntCur += argc;
+            argc *= 2;
+
+            /* What kind of an operator do we have? */
+
+            switch (oper)
+            {
+                case GT_ADD:
+                    helper = CPX_R4_ADD;
+                    break;
+                case GT_SUB:
+                    helper = CPX_R4_SUB;
+                    break;
+                case GT_MUL:
+                    helper = CPX_R4_MUL;
+                    break;
+                case GT_DIV:
+                    helper = CPX_R4_DIV;
+                    break;
+                // case GT_MOD: helper = CPX_R4_REM; break;
+
+                case GT_EQ:
+                    helper = CPX_R4_EQ;
+                    break;
+                case GT_NE:
+                    helper = CPX_R4_NE;
+                    break;
+                case GT_LT:
+                    helper = CPX_R4_LT;
+                    break;
+                case GT_LE:
+                    helper = CPX_R4_LE;
+                    break;
+                case GT_GE:
+                    helper = CPX_R4_GE;
+                    break;
+                case GT_GT:
+                    helper = CPX_R4_GT;
+                    break;
+
+                default:
+#ifdef DEBUG
+                    gtDispTree(tree);
+#endif
+                    noway_assert(!"unexpected FP binary op");
+                    break;
+            }
+
+            args = gtNewArgList(tree->gtOp.gtOp2, tree->gtOp.gtOp1);
+        }
+        else
+        {
+            switch (oper)
+            {
+                case GT_RETURN:
+                    return tree;
+
+                case GT_CAST:
+                    noway_assert(!"FP cast");
+
+                case GT_NEG:
+                    helper = CPX_R4_NEG;
+                    break;
+
+                default:
+#ifdef DEBUG
+                    gtDispTree(tree);
+#endif
+                    noway_assert(!"unexpected FP unary op");
+                    break;
+            }
+
+            args = gtNewArgList(tree->gtOp.gtOp1);
+        }
+
+        /* If we have double result/operands, modify the helper */
+
+        if (typ == TYP_DOUBLE)
+        {
+            noway_assert(CPX_R4_NEG + 1 == CPX_R8_NEG);
+            noway_assert(CPX_R4_ADD + 1 == CPX_R8_ADD);
+            noway_assert(CPX_R4_SUB + 1 == CPX_R8_SUB);
+            noway_assert(CPX_R4_MUL + 1 == CPX_R8_MUL);
+            noway_assert(CPX_R4_DIV + 1 == CPX_R8_DIV);
+
+            helper++;
+        }
+        else
+        {
+            noway_assert(tree->OperIsCompare());
+
+            noway_assert(CPX_R4_EQ + 1 == CPX_R8_EQ);
+            noway_assert(CPX_R4_NE + 1 == CPX_R8_NE);
+            noway_assert(CPX_R4_LT + 1 == CPX_R8_LT);
+            noway_assert(CPX_R4_LE + 1 == CPX_R8_LE);
+            noway_assert(CPX_R4_GE + 1 == CPX_R8_GE);
+            noway_assert(CPX_R4_GT + 1 == CPX_R8_GT);
+        }
+
+        tree = fgMorphIntoHelperCall(tree, helper, args);
+
+        if (fgPtrArgCntMax < fgPtrArgCntCur)
+            fgPtrArgCntMax = fgPtrArgCntCur;
+
+        fgPtrArgCntCur -= argc;
+        return tree;
+
+        case GT_RETURN:
+
+            if (op1)
+            {
+
+                if (compCurBB == genReturnBB)
+                {
+                    /* This is the 'exitCrit' call at the exit label */
+
+                    noway_assert(op1->gtType == TYP_VOID);
+                    noway_assert(op2 == 0);
+
+                    tree->gtOp.gtOp1 = op1 = fgMorphTree(op1);
+
+                    return tree;
+                }
+
+                /* This is a (real) return value -- check its type */
+                CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+                if (genActualType(op1->TypeGet()) != genActualType(info.compRetType))
+                {
+                    bool allowMismatch = false;
+
+                    // Allow TYP_BYREF to be returned as TYP_I_IMPL and vice versa
+                    if ((info.compRetType == TYP_BYREF && genActualType(op1->TypeGet()) == TYP_I_IMPL) ||
+                        (op1->TypeGet() == TYP_BYREF && genActualType(info.compRetType) == TYP_I_IMPL))
+                        allowMismatch = true;
+
+                    if (varTypeIsFloating(info.compRetType) && varTypeIsFloating(op1->TypeGet()))
+                        allowMismatch = true;
+
+                    if (!allowMismatch)
+                        NO_WAY("Return type mismatch");
+                }
+#endif
+            }
+            break;
+    }
+    return tree;
+}
+#endif
+
+/*****************************************************************************
+ *
+ *  Transform the given tree for code generation and return an equivalent tree.
+ */
+
+GenTreePtr Compiler::fgMorphTree(GenTreePtr tree, MorphAddrContext* mac)
+{
+    noway_assert(tree);
+    noway_assert(tree->gtOper != GT_STMT);
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        if ((unsigned)JitConfig.JitBreakMorphTree() == tree->gtTreeID)
+        {
+            noway_assert(!"JitBreakMorphTree hit");
+        }
+    }
+#endif
+
+#ifdef DEBUG
+    int thisMorphNum = 0;
+    if (verbose && treesBeforeAfterMorph)
+    {
+        thisMorphNum = morphNum++;
+        printf("\nfgMorphTree (before %d):\n", thisMorphNum);
+        gtDispTree(tree);
+    }
+#endif
+
+/*-------------------------------------------------------------------------
+ * fgMorphTree() can potentially replace a tree with another, and the
+ * caller has to store the return value correctly.
+ * Turn this on to always make copy of "tree" here to shake out
+ * hidden/unupdated references.
+ */
+
+#ifdef DEBUG
+
+    if (compStressCompile(STRESS_GENERIC_CHECK, 0))
+    {
+        GenTreePtr copy;
+
+#ifdef SMALL_TREE_NODES
+        if (GenTree::s_gtNodeSizes[tree->gtOper] == TREE_NODE_SZ_SMALL)
+        {
+            copy = gtNewLargeOperNode(GT_ADD, TYP_INT);
+        }
+        else
+#endif
+        {
+            copy = new (this, GT_CALL) GenTreeCall(TYP_INT);
+        }
+
+        copy->CopyFrom(tree, this);
+
+#if defined(LATE_DISASM)
+        // GT_CNS_INT is considered small, so CopyFrom() won't copy all fields
+        if ((tree->gtOper == GT_CNS_INT) && tree->IsIconHandle())
+        {
+            copy->gtIntCon.gtIconHdl.gtIconHdl1 = tree->gtIntCon.gtIconHdl.gtIconHdl1;
+            copy->gtIntCon.gtIconHdl.gtIconHdl2 = tree->gtIntCon.gtIconHdl.gtIconHdl2;
+        }
+#endif
+
+        DEBUG_DESTROY_NODE(tree);
+        tree = copy;
+    }
+#endif // DEBUG
+
+    if (fgGlobalMorph)
+    {
+        /* Ensure that we haven't morphed this node already */
+        assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!");
+
+#if LOCAL_ASSERTION_PROP
+        /* Before morphing the tree, we try to propagate any active assertions */
+        if (optLocalAssertionProp)
+        {
+            /* Do we have any active assertions? */
+
+            if (optAssertionCount > 0)
+            {
+                GenTreePtr newTree = tree;
+                while (newTree != nullptr)
+                {
+                    tree = newTree;
+                    /* newTree is non-Null if we propagated an assertion */
+                    newTree = optAssertionProp(apFull, tree, nullptr);
+                }
+                noway_assert(tree != nullptr);
+            }
+        }
+        PREFAST_ASSUME(tree != nullptr);
+#endif
+    }
+
+    /* Save the original un-morphed tree for fgMorphTreeDone */
+
+    GenTreePtr oldTree = tree;
+
+    /* Figure out what kind of a node we have */
+
+    unsigned kind = tree->OperKind();
+
+    /* Is this a constant node? */
+
+    if (kind & GTK_CONST)
+    {
+        tree = fgMorphConst(tree);
+        goto DONE;
+    }
+
+    /* Is this a leaf node? */
+
+    if (kind & GTK_LEAF)
+    {
+        tree = fgMorphLeaf(tree);
+        goto DONE;
+    }
+
+    /* Is it a 'simple' unary/binary operator? */
+
+    if (kind & GTK_SMPOP)
+    {
+        tree = fgMorphSmpOp(tree, mac);
+        goto DONE;
+    }
+
+    /* See what kind of a special operator we have here */
+
+    switch (tree->OperGet())
+    {
+        case GT_FIELD:
+            tree = fgMorphField(tree, mac);
+            break;
+
+        case GT_CALL:
+            tree = fgMorphCall(tree->AsCall());
+            break;
+
+        case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+        case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+        {
+            fgSetRngChkTarget(tree);
+
+            GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
+            bndsChk->gtArrLen         = fgMorphTree(bndsChk->gtArrLen);
+            bndsChk->gtIndex          = fgMorphTree(bndsChk->gtIndex);
+            // If the index is a comma(throw, x), just return that.
+            if (!optValnumCSE_phase && fgIsCommaThrow(bndsChk->gtIndex))
+            {
+                tree = bndsChk->gtIndex;
+            }
+
+            // Propagate effects flags upwards
+            bndsChk->gtFlags |= (bndsChk->gtArrLen->gtFlags & GTF_ALL_EFFECT);
+            bndsChk->gtFlags |= (bndsChk->gtIndex->gtFlags & GTF_ALL_EFFECT);
+
+            // Otherwise, we don't change the tree.
+        }
+        break;
+
+        case GT_ARR_ELEM:
+            tree->gtArrElem.gtArrObj = fgMorphTree(tree->gtArrElem.gtArrObj);
+            tree->gtFlags |= tree->gtArrElem.gtArrObj->gtFlags & GTF_ALL_EFFECT;
+
+            unsigned dim;
+            for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+            {
+                tree->gtArrElem.gtArrInds[dim] = fgMorphTree(tree->gtArrElem.gtArrInds[dim]);
+                tree->gtFlags |= tree->gtArrElem.gtArrInds[dim]->gtFlags & GTF_ALL_EFFECT;
+            }
+            if (fgGlobalMorph)
+            {
+                fgSetRngChkTarget(tree, false);
+            }
+            break;
+
+        case GT_ARR_OFFSET:
+            tree->gtArrOffs.gtOffset = fgMorphTree(tree->gtArrOffs.gtOffset);
+            tree->gtFlags |= tree->gtArrOffs.gtOffset->gtFlags & GTF_ALL_EFFECT;
+            tree->gtArrOffs.gtIndex = fgMorphTree(tree->gtArrOffs.gtIndex);
+            tree->gtFlags |= tree->gtArrOffs.gtIndex->gtFlags & GTF_ALL_EFFECT;
+            tree->gtArrOffs.gtArrObj = fgMorphTree(tree->gtArrOffs.gtArrObj);
+            tree->gtFlags |= tree->gtArrOffs.gtArrObj->gtFlags & GTF_ALL_EFFECT;
+            if (fgGlobalMorph)
+            {
+                fgSetRngChkTarget(tree, false);
+            }
+            break;
+
+        case GT_CMPXCHG:
+            tree->gtCmpXchg.gtOpLocation  = fgMorphTree(tree->gtCmpXchg.gtOpLocation);
+            tree->gtCmpXchg.gtOpValue     = fgMorphTree(tree->gtCmpXchg.gtOpValue);
+            tree->gtCmpXchg.gtOpComparand = fgMorphTree(tree->gtCmpXchg.gtOpComparand);
+            break;
+
+        case GT_STORE_DYN_BLK:
+            tree->gtDynBlk.Data() = fgMorphTree(tree->gtDynBlk.Data());
+            __fallthrough;
+        case GT_DYN_BLK:
+            tree->gtDynBlk.Addr()        = fgMorphTree(tree->gtDynBlk.Addr());
+            tree->gtDynBlk.gtDynamicSize = fgMorphTree(tree->gtDynBlk.gtDynamicSize);
+            break;
+
+        default:
+#ifdef DEBUG
+            gtDispTree(tree);
+#endif
+            noway_assert(!"unexpected operator");
+    }
+DONE:
+
+    fgMorphTreeDone(tree, oldTree DEBUGARG(thisMorphNum));
+
+    return tree;
+}
+
+#if LOCAL_ASSERTION_PROP
+/*****************************************************************************
+ *
+ *  Kill all dependent assertions with regard to lclNum.
+ *
+ */
+
+void Compiler::fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTreePtr tree))
+{
+    LclVarDsc* varDsc = &lvaTable[lclNum];
+
+    if (varDsc->lvPromoted)
+    {
+        noway_assert(varTypeIsStruct(varDsc));
+
+        // Kill the field locals.
+        for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
+        {
+            fgKillDependentAssertions(i DEBUGARG(tree));
+        }
+
+        // Fall through to kill the struct local itself.
+    }
+
+    /* All dependent assertions are killed here */
+
+    ASSERT_TP killed = BitVecOps::MakeCopy(apTraits, GetAssertionDep(lclNum));
+
+    if (killed)
+    {
+        AssertionIndex index = optAssertionCount;
+        while (killed && (index > 0))
+        {
+            if (BitVecOps::IsMember(apTraits, killed, index - 1))
+            {
+#ifdef DEBUG
+                AssertionDsc* curAssertion = optGetAssertion(index);
+                noway_assert((curAssertion->op1.lcl.lclNum == lclNum) ||
+                             ((curAssertion->op2.kind == O2K_LCLVAR_COPY) && (curAssertion->op2.lcl.lclNum == lclNum)));
+                if (verbose)
+                {
+                    printf("\nThe assignment ");
+                    printTreeID(tree);
+                    printf(" using V%02u removes: ", curAssertion->op1.lcl.lclNum);
+                    optPrintAssertion(curAssertion);
+                }
+#endif
+                // Remove this bit from the killed mask
+                BitVecOps::RemoveElemD(apTraits, killed, index - 1);
+
+                optAssertionRemove(index);
+            }
+
+            index--;
+        }
+
+        // killed mask should now be zero
+        noway_assert(BitVecOps::IsEmpty(apTraits, killed));
+    }
+}
+#endif // LOCAL_ASSERTION_PROP
+
+/*****************************************************************************
+ *
+ *  This function is called to complete the morphing of a tree node
+ *  It should only be called once for each node.
+ *  If DEBUG is defined the flag GTF_DEBUG_NODE_MORPHED is checked and updated,
+ *  to enforce the invariant that each node is only morphed once.
+ *  If LOCAL_ASSERTION_PROP is enabled the result tree may be replaced
+ *  by an equivalent tree.
+ *
+ */
+
+void Compiler::fgMorphTreeDone(GenTreePtr tree,
+                               GenTreePtr oldTree /* == NULL */
+                               DEBUGARG(int morphNum))
+{
+#ifdef DEBUG
+    if (verbose && treesBeforeAfterMorph)
+    {
+        printf("\nfgMorphTree (after %d):\n", morphNum);
+        gtDispTree(tree);
+        printf(""); // in our logic this causes a flush
+    }
+#endif
+
+    if (!fgGlobalMorph)
+    {
+        return;
+    }
+
+    if ((oldTree != nullptr) && (oldTree != tree))
+    {
+        /* Ensure that we have morphed this node */
+        assert((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) && "ERROR: Did not morph this node!");
+
+#ifdef DEBUG
+        TransferTestDataToNode(oldTree, tree);
+#endif
+    }
+    else
+    {
+        // Ensure that we haven't morphed this node already
+        assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!");
+    }
+
+    if (tree->OperKind() & GTK_CONST)
+    {
+        goto DONE;
+    }
+
+#if LOCAL_ASSERTION_PROP
+
+    if (!optLocalAssertionProp)
+    {
+        goto DONE;
+    }
+
+    /* Do we have any active assertions? */
+
+    if (optAssertionCount > 0)
+    {
+        /* Is this an assignment to a local variable */
+
+        if ((tree->OperKind() & GTK_ASGOP) &&
+            (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR || tree->gtOp.gtOp1->gtOper == GT_LCL_FLD))
+        {
+            unsigned op1LclNum = tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum;
+            noway_assert(op1LclNum < lvaCount);
+            fgKillDependentAssertions(op1LclNum DEBUGARG(tree));
+        }
+    }
+
+    /* If this tree makes a new assertion - make it available */
+    optAssertionGen(tree);
+
+#endif // LOCAL_ASSERTION_PROP
+
+DONE:;
+
+#ifdef DEBUG
+    /* Mark this node as being morphed */
+    tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Check and fold blocks of type BBJ_COND and BBJ_SWITCH on constants
+ *  Returns true if we modified the flow graph
+ */
+
+bool Compiler::fgFoldConditional(BasicBlock* block)
+{
+    bool result = false;
+
+    // We don't want to make any code unreachable
+    if (opts.compDbgCode || opts.MinOpts())
+    {
+        return false;
+    }
+
+    if (block->bbJumpKind == BBJ_COND)
+    {
+        noway_assert(block->bbTreeList && block->bbTreeList->gtPrev);
+
+        GenTreePtr stmt = block->bbTreeList->gtPrev;
+
+        noway_assert(stmt->gtNext == nullptr);
+
+        if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL)
+        {
+            noway_assert(fgRemoveRestOfBlock);
+
+            /* Unconditional throw - transform the basic block into a BBJ_THROW */
+            fgConvertBBToThrowBB(block);
+
+            /* Remove 'block' from the predecessor list of 'block->bbNext' */
+            fgRemoveRefPred(block->bbNext, block);
+
+            /* Remove 'block' from the predecessor list of 'block->bbJumpDest' */
+            fgRemoveRefPred(block->bbJumpDest, block);
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("\nConditional folded at BB%02u\n", block->bbNum);
+                printf("BB%02u becomes a BBJ_THROW\n", block->bbNum);
+            }
+#endif
+            goto DONE_COND;
+        }
+
+        noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
+
+        /* Did we fold the conditional */
+
+        noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1);
+        GenTreePtr cond;
+        cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1;
+
+        if (cond->OperKind() & GTK_CONST)
+        {
+            /* Yupee - we folded the conditional!
+             * Remove the conditional statement */
+
+            noway_assert(cond->gtOper == GT_CNS_INT);
+            noway_assert((block->bbNext->countOfInEdges() > 0) && (block->bbJumpDest->countOfInEdges() > 0));
+
+            /* remove the statement from bbTreelist - No need to update
+             * the reference counts since there are no lcl vars */
+            fgRemoveStmt(block, stmt);
+
+            // block is a BBJ_COND that we are folding the conditional for
+            // bTaken is the path that will always be taken from block
+            // bNotTaken is the path that will never be taken from block
+            //
+            BasicBlock* bTaken;
+            BasicBlock* bNotTaken;
+
+            if (cond->gtIntCon.gtIconVal != 0)
+            {
+                /* JTRUE 1 - transform the basic block into a BBJ_ALWAYS */
+                block->bbJumpKind = BBJ_ALWAYS;
+                bTaken            = block->bbJumpDest;
+                bNotTaken         = block->bbNext;
+            }
+            else
+            {
+                /* Unmark the loop if we are removing a backwards branch */
+                /* dest block must also be marked as a loop head and     */
+                /* We must be able to reach the backedge block           */
+                if ((block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) &&
+                    fgReachable(block->bbJumpDest, block))
+                {
+                    optUnmarkLoopBlocks(block->bbJumpDest, block);
+                }
+
+                /* JTRUE 0 - transform the basic block into a BBJ_NONE   */
+                block->bbJumpKind = BBJ_NONE;
+                noway_assert(!(block->bbFlags & BBF_NEEDS_GCPOLL));
+                bTaken    = block->bbNext;
+                bNotTaken = block->bbJumpDest;
+            }
+
+            if (fgHaveValidEdgeWeights)
+            {
+                // We are removing an edge from block to bNotTaken
+                // and we have already computed the edge weights, so
+                // we will try to adjust some of the weights
+                //
+                flowList*   edgeTaken = fgGetPredForBlock(bTaken, block);
+                BasicBlock* bUpdated  = nullptr; // non-NULL if we updated the weight of an internal block
+
+                // We examine the taken edge (block -> bTaken)
+                // if block has valid profile weight and bTaken does not we try to adjust bTaken's weight
+                // else if bTaken has valid profile weight and block does not we try to adjust block's weight
+                // We can only adjust the block weights when (the edge block -> bTaken) is the only edge into bTaken
+                //
+                if (block->bbFlags & BBF_PROF_WEIGHT)
+                {
+                    // The edge weights for (block -> bTaken) are 100% of block's weight
+                    edgeTaken->flEdgeWeightMin = block->bbWeight;
+                    edgeTaken->flEdgeWeightMax = block->bbWeight;
+
+                    if ((bTaken->bbFlags & BBF_PROF_WEIGHT) == 0)
+                    {
+                        if ((bTaken->countOfInEdges() == 1) || (bTaken->bbWeight < block->bbWeight))
+                        {
+                            // Update the weight of bTaken
+                            bTaken->inheritWeight(block);
+                            bUpdated = bTaken;
+                        }
+                    }
+                }
+                else if (bTaken->bbFlags & BBF_PROF_WEIGHT)
+                {
+                    if (bTaken->countOfInEdges() == 1)
+                    {
+                        // There is only one in edge to bTaken
+                        edgeTaken->flEdgeWeightMin = bTaken->bbWeight;
+                        edgeTaken->flEdgeWeightMax = bTaken->bbWeight;
+
+                        // Update the weight of block
+                        block->inheritWeight(bTaken);
+                        bUpdated = block;
+                    }
+                }
+
+                if (bUpdated != nullptr)
+                {
+                    flowList* edge;
+                    // Now fix the weights of the edges out of 'bUpdated'
+                    switch (bUpdated->bbJumpKind)
+                    {
+                        case BBJ_NONE:
+                            edge                  = fgGetPredForBlock(bUpdated->bbNext, bUpdated);
+                            edge->flEdgeWeightMax = bUpdated->bbWeight;
+                            break;
+                        case BBJ_COND:
+                            edge                  = fgGetPredForBlock(bUpdated->bbNext, bUpdated);
+                            edge->flEdgeWeightMax = bUpdated->bbWeight;
+                            __fallthrough;
+                        case BBJ_ALWAYS:
+                            edge                  = fgGetPredForBlock(bUpdated->bbJumpDest, bUpdated);
+                            edge->flEdgeWeightMax = bUpdated->bbWeight;
+                            break;
+                        default:
+                            // We don't handle BBJ_SWITCH
+                            break;
+                    }
+                }
+            }
+
+            /* modify the flow graph */
+
+            /* Remove 'block' from the predecessor list of 'bNotTaken' */
+            fgRemoveRefPred(bNotTaken, block);
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("\nConditional folded at BB%02u\n", block->bbNum);
+                printf("BB%02u becomes a %s", block->bbNum,
+                       block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE");
+                if (block->bbJumpKind == BBJ_ALWAYS)
+                {
+                    printf(" to BB%02u", block->bbJumpDest->bbNum);
+                }
+                printf("\n");
+            }
+#endif
+
+            /* if the block was a loop condition we may have to modify
+             * the loop table */
+
+            for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
+            {
+                /* Some loops may have been already removed by
+                 * loop unrolling or conditional folding */
+
+                if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
+                {
+                    continue;
+                }
+
+                /* We are only interested in the loop bottom */
+
+                if (optLoopTable[loopNum].lpBottom == block)
+                {
+                    if (cond->gtIntCon.gtIconVal == 0)
+                    {
+                        /* This was a bogus loop (condition always false)
+                         * Remove the loop from the table */
+
+                        optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
+#ifdef DEBUG
+                        if (verbose)
+                        {
+                            printf("Removing loop L%02u (from BB%02u to BB%02u)\n\n", loopNum,
+                                   optLoopTable[loopNum].lpFirst->bbNum, optLoopTable[loopNum].lpBottom->bbNum);
+                        }
+#endif
+                    }
+                }
+            }
+        DONE_COND:
+            result = true;
+        }
+    }
+    else if (block->bbJumpKind == BBJ_SWITCH)
+    {
+        noway_assert(block->bbTreeList && block->bbTreeList->gtPrev);
+
+        GenTreePtr stmt = block->bbTreeList->gtPrev;
+
+        noway_assert(stmt->gtNext == nullptr);
+
+        if (stmt->gtStmt.gtStmtExpr->gtOper == GT_CALL)
+        {
+            noway_assert(fgRemoveRestOfBlock);
+
+            /* Unconditional throw - transform the basic block into a BBJ_THROW */
+            fgConvertBBToThrowBB(block);
+
+            /* update the flow graph */
+
+            unsigned     jumpCnt = block->bbJumpSwt->bbsCount;
+            BasicBlock** jumpTab = block->bbJumpSwt->bbsDstTab;
+
+            for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
+            {
+                BasicBlock* curJump = *jumpTab;
+
+                /* Remove 'block' from the predecessor list of 'curJump' */
+                fgRemoveRefPred(curJump, block);
+            }
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("\nConditional folded at BB%02u\n", block->bbNum);
+                printf("BB%02u becomes a BBJ_THROW\n", block->bbNum);
+            }
+#endif
+            goto DONE_SWITCH;
+        }
+
+        noway_assert(stmt->gtStmt.gtStmtExpr->gtOper == GT_SWITCH);
+
+        /* Did we fold the conditional */
+
+        noway_assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1);
+        GenTreePtr cond;
+        cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1;
+
+        if (cond->OperKind() & GTK_CONST)
+        {
+            /* Yupee - we folded the conditional!
+             * Remove the conditional statement */
+
+            noway_assert(cond->gtOper == GT_CNS_INT);
+
+            /* remove the statement from bbTreelist - No need to update
+             * the reference counts since there are no lcl vars */
+            fgRemoveStmt(block, stmt);
+
+            /* modify the flow graph */
+
+            /* Find the actual jump target */
+            unsigned switchVal;
+            switchVal = (unsigned)cond->gtIntCon.gtIconVal;
+            unsigned jumpCnt;
+            jumpCnt = block->bbJumpSwt->bbsCount;
+            BasicBlock** jumpTab;
+            jumpTab = block->bbJumpSwt->bbsDstTab;
+            bool foundVal;
+            foundVal = false;
+
+            for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
+            {
+                BasicBlock* curJump = *jumpTab;
+
+                assert(curJump->countOfInEdges() > 0);
+
+                // If val matches switchVal or we are at the last entry and
+                // we never found the switch value then set the new jump dest
+
+                if ((val == switchVal) || (!foundVal && (val == jumpCnt - 1)))
+                {
+                    if (curJump != block->bbNext)
+                    {
+                        /* transform the basic block into a BBJ_ALWAYS */
+                        block->bbJumpKind = BBJ_ALWAYS;
+                        block->bbJumpDest = curJump;
+
+                        // if we are jumping backwards, make sure we have a GC Poll.
+                        if (curJump->bbNum > block->bbNum)
+                        {
+                            block->bbFlags &= ~BBF_NEEDS_GCPOLL;
+                        }
+                    }
+                    else
+                    {
+                        /* transform the basic block into a BBJ_NONE */
+                        block->bbJumpKind = BBJ_NONE;
+                        block->bbFlags &= ~BBF_NEEDS_GCPOLL;
+                    }
+                    foundVal = true;
+                }
+                else
+                {
+                    /* Remove 'block' from the predecessor list of 'curJump' */
+                    fgRemoveRefPred(curJump, block);
+                }
+            }
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("\nConditional folded at BB%02u\n", block->bbNum);
+                printf("BB%02u becomes a %s", block->bbNum,
+                       block->bbJumpKind == BBJ_ALWAYS ? "BBJ_ALWAYS" : "BBJ_NONE");
+                if (block->bbJumpKind == BBJ_ALWAYS)
+                {
+                    printf(" to BB%02u", block->bbJumpDest->bbNum);
+                }
+                printf("\n");
+            }
+#endif
+        DONE_SWITCH:
+            result = true;
+        }
+    }
+    return result;
+}
+
+//*****************************************************************************
+//
+// Morphs a single statement in a block.
+// Can be called anytime, unlike fgMorphStmts() which should only be called once.
+//
+// Returns true  if 'stmt' was removed from the block.
+// Returns false if 'stmt' is still in the block (even if other statements were removed).
+//
+
+bool Compiler::fgMorphBlockStmt(BasicBlock* block, GenTreePtr stmt DEBUGARG(const char* msg))
+{
+    noway_assert(stmt->gtOper == GT_STMT);
+
+    compCurBB   = block;
+    compCurStmt = stmt;
+
+    GenTreePtr morph = fgMorphTree(stmt->gtStmt.gtStmtExpr);
+
+    // Bug 1106830 - During the CSE phase we can't just remove
+    // morph->gtOp.gtOp2 as it could contain CSE expressions.
+    // This leads to a noway_assert in OptCSE.cpp when
+    // searching for the removed CSE ref. (using gtFindLink)
+    //
+    if (!optValnumCSE_phase)
+    {
+        /* Check for morph as a GT_COMMA with an unconditional throw */
+        if (fgIsCommaThrow(morph, true))
+        {
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("Folding a top-level fgIsCommaThrow stmt\n");
+                printf("Removing op2 as unreachable:\n");
+                gtDispTree(morph->gtOp.gtOp2);
+                printf("\n");
+            }
+#endif
+            /* Use the call as the new stmt */
+            morph = morph->gtOp.gtOp1;
+            noway_assert(morph->gtOper == GT_CALL);
+        }
+
+        /* we can get a throw as a statement root*/
+        if (fgIsThrow(morph))
+        {
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("We have a top-level fgIsThrow stmt\n");
+                printf("Removing the rest of block as unreachable:\n");
+            }
+#endif
+            noway_assert((morph->gtFlags & GTF_COLON_COND) == 0);
+            fgRemoveRestOfBlock = true;
+        }
+    }
+
+    stmt->gtStmt.gtStmtExpr = morph;
+
+    /* Can the entire tree be removed ? */
+
+    bool removedStmt = fgCheckRemoveStmt(block, stmt);
+
+    /* Or this is the last statement of a conditional branch that was just folded */
+
+    if ((!removedStmt) && (stmt->gtNext == nullptr) && !fgRemoveRestOfBlock)
+    {
+        if (fgFoldConditional(block))
+        {
+            if (block->bbJumpKind != BBJ_THROW)
+            {
+                removedStmt = true;
+            }
+        }
+    }
+
+    if (!removedStmt)
+    {
+        /* Have to re-do the evaluation order since for example
+         * some later code does not expect constants as op1 */
+        gtSetStmtInfo(stmt);
+
+        /* Have to re-link the nodes for this statement */
+        fgSetStmtSeq(stmt);
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("%s %s tree:\n", msg, (removedStmt ? "removed" : "morphed"));
+        gtDispTree(morph);
+        printf("\n");
+    }
+#endif
+
+    if (fgRemoveRestOfBlock)
+    {
+        /* Remove the rest of the stmts in the block */
+
+        while (stmt->gtNext)
+        {
+            stmt = stmt->gtNext;
+            noway_assert(stmt->gtOper == GT_STMT);
+
+            fgRemoveStmt(block, stmt);
+        }
+
+        // The rest of block has been removed
+        // and we will always throw an exception
+
+        // Update succesors of block
+        fgRemoveBlockAsPred(block);
+
+        // For compDbgCode, we prepend an empty BB as the firstBB, it is BBJ_NONE.
+        // We should not convert it to a ThrowBB.
+        if ((block != fgFirstBB) || ((fgFirstBB->bbFlags & BBF_INTERNAL) == 0))
+        {
+            // Convert block to a throw bb
+            fgConvertBBToThrowBB(block);
+        }
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\n%s Block BB%02u becomes a throw block.\n", msg, block->bbNum);
+        }
+#endif
+        fgRemoveRestOfBlock = false;
+    }
+
+    return removedStmt;
+}
+
+/*****************************************************************************
+ *
+ *  Morph the statements of the given block.
+ *  This function should be called just once for a block. Use fgMorphBlockStmt()
+ *  for reentrant calls.
+ */
+
+void Compiler::fgMorphStmts(BasicBlock* block, bool* mult, bool* lnot, bool* loadw)
+{
+    fgRemoveRestOfBlock = false;
+
+    noway_assert(fgExpandInline == false);
+
+    /* Make the current basic block address available globally */
+
+    compCurBB = block;
+
+    *mult = *lnot = *loadw = false;
+
+    fgCurrentlyInUseArgTemps = hashBv::Create(this);
+
+    GenTreePtr stmt, prev;
+    for (stmt = block->bbTreeList, prev = nullptr; stmt; prev = stmt->gtStmt.gtStmtExpr, stmt = stmt->gtNext)
+    {
+        noway_assert(stmt->gtOper == GT_STMT);
+
+        if (fgRemoveRestOfBlock)
+        {
+            fgRemoveStmt(block, stmt);
+            continue;
+        }
+#ifdef FEATURE_SIMD
+        if (!opts.MinOpts() && stmt->gtStmt.gtStmtExpr->TypeGet() == TYP_FLOAT &&
+            stmt->gtStmt.gtStmtExpr->OperGet() == GT_ASG)
+        {
+            fgMorphCombineSIMDFieldAssignments(block, stmt);
+        }
+#endif
+
+        fgMorphStmt     = stmt;
+        compCurStmt     = stmt;
+        GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+
+#ifdef DEBUG
+        compCurStmtNum++;
+        if (stmt == block->bbTreeList)
+        {
+            block->bbStmtNum = compCurStmtNum; // Set the block->bbStmtNum
+        }
+
+        unsigned oldHash = verbose ? gtHashValue(tree) : DUMMY_INIT(~0);
+
+        if (verbose)
+        {
+            printf("\nfgMorphTree BB%02u, stmt %d (before)\n", block->bbNum, compCurStmtNum);
+            gtDispTree(tree);
+        }
+#endif
+
+        /* Morph this statement tree */
+
+        GenTreePtr morph = fgMorphTree(tree);
+
+        // mark any outgoing arg temps as free so we can reuse them in the next statement.
+
+        fgCurrentlyInUseArgTemps->ZeroAll();
+
+        // Has fgMorphStmt been sneakily changed ?
+
+        if (stmt->gtStmt.gtStmtExpr != tree)
+        {
+            /* This must be tailcall. Ignore 'morph' and carry on with
+               the tail-call node */
+
+            morph = stmt->gtStmt.gtStmtExpr;
+            noway_assert(compTailCallUsed);
+            noway_assert((morph->gtOper == GT_CALL) && morph->AsCall()->IsTailCall());
+            noway_assert(stmt->gtNext == nullptr);
+
+            GenTreeCall* call = morph->AsCall();
+            // Could either be
+            //   - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
+            //   - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing
+            //     a jmp.
+            noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) ||
+                         (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) &&
+                          (compCurBB->bbFlags & BBF_HAS_JMP)));
+        }
+        else if (block != compCurBB)
+        {
+            /* This must be a tail call that caused a GCPoll to get
+               injected.  We haven't actually morphed the call yet
+               but the flag still got set, clear it here...  */
+            CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+            tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
+#endif
+
+            noway_assert(compTailCallUsed);
+            noway_assert((tree->gtOper == GT_CALL) && tree->AsCall()->IsTailCall());
+            noway_assert(stmt->gtNext == nullptr);
+
+            GenTreeCall* call = morph->AsCall();
+
+            // Could either be
+            //   - a tail call dispatched via helper in which case block will be ending with BBJ_THROW or
+            //   - a fast call made as jmp in which case block will be ending with BBJ_RETURN and marked as containing
+            //     a jmp.
+            noway_assert((call->IsTailCallViaHelper() && (compCurBB->bbJumpKind == BBJ_THROW)) ||
+                         (call->IsFastTailCall() && (compCurBB->bbJumpKind == BBJ_RETURN) &&
+                          (compCurBB->bbFlags & BBF_HAS_JMP)));
+        }
+
+#ifdef DEBUG
+        if (compStressCompile(STRESS_CLONE_EXPR, 30))
+        {
+            // Clone all the trees to stress gtCloneExpr()
+
+            if (verbose)
+            {
+                printf("\nfgMorphTree (stressClone from):\n");
+                gtDispTree(morph);
+            }
+
+            morph = gtCloneExpr(morph);
+            noway_assert(morph);
+
+            if (verbose)
+            {
+                printf("\nfgMorphTree (stressClone to):\n");
+                gtDispTree(morph);
+            }
+        }
+
+        /* If the hash value changes. we modified the tree during morphing */
+        if (verbose)
+        {
+            unsigned newHash = gtHashValue(morph);
+            if (newHash != oldHash)
+            {
+                printf("\nfgMorphTree BB%02u, stmt %d (after)\n", block->bbNum, compCurStmtNum);
+                gtDispTree(morph);
+            }
+        }
+#endif
+
+        /* Check for morph as a GT_COMMA with an unconditional throw */
+        if (!gtIsActiveCSE_Candidate(morph) && fgIsCommaThrow(morph, true))
+        {
+            /* Use the call as the new stmt */
+            morph = morph->gtOp.gtOp1;
+            noway_assert(morph->gtOper == GT_CALL);
+            noway_assert((morph->gtFlags & GTF_COLON_COND) == 0);
+
+            fgRemoveRestOfBlock = true;
+        }
+
+        stmt->gtStmt.gtStmtExpr = tree = morph;
+
+        noway_assert(fgPtrArgCntCur == 0);
+
+        if (fgRemoveRestOfBlock)
+        {
+            continue;
+        }
+
+        /* Has the statement been optimized away */
+
+        if (fgCheckRemoveStmt(block, stmt))
+        {
+            continue;
+        }
+
+        /* Check if this block ends with a conditional branch that can be folded */
+
+        if (fgFoldConditional(block))
+        {
+            continue;
+        }
+
+        if (ehBlockHasExnFlowDsc(block))
+        {
+            continue;
+        }
+
+#if OPT_MULT_ADDSUB
+
+        /* Note whether we have two or more +=/-= operators in a row */
+
+        if (tree->gtOper == GT_ASG_ADD || tree->gtOper == GT_ASG_SUB)
+        {
+            if (prev && prev->gtOper == tree->gtOper)
+            {
+                *mult = true;
+            }
+        }
+
+#endif
+
+        /* Note "x = a[i] & icon" followed by "x |= a[i] << 8" */
+
+        if (tree->gtOper == GT_ASG_OR && prev && prev->gtOper == GT_ASG)
+        {
+            *loadw = true;
+        }
+    }
+
+    if (fgRemoveRestOfBlock)
+    {
+        if ((block->bbJumpKind == BBJ_COND) || (block->bbJumpKind == BBJ_SWITCH))
+        {
+            GenTreePtr first = block->bbTreeList;
+            noway_assert(first);
+            GenTreePtr last = first->gtPrev;
+            noway_assert(last && last->gtNext == nullptr);
+            GenTreePtr lastStmt = last->gtStmt.gtStmtExpr;
+
+            if (((block->bbJumpKind == BBJ_COND) && (lastStmt->gtOper == GT_JTRUE)) ||
+                ((block->bbJumpKind == BBJ_SWITCH) && (lastStmt->gtOper == GT_SWITCH)))
+            {
+                GenTreePtr op1 = lastStmt->gtOp.gtOp1;
+
+                if (op1->OperKind() & GTK_RELOP)
+                {
+                    /* Unmark the comparison node with GTF_RELOP_JMP_USED */
+                    op1->gtFlags &= ~GTF_RELOP_JMP_USED;
+                }
+
+                last->gtStmt.gtStmtExpr = fgMorphTree(op1);
+            }
+        }
+
+        /* Mark block as a BBJ_THROW block */
+        fgConvertBBToThrowBB(block);
+    }
+
+    noway_assert(fgExpandInline == false);
+
+#if FEATURE_FASTTAILCALL
+    GenTreePtr recursiveTailCall = nullptr;
+    if (block->endsWithTailCallConvertibleToLoop(this, &recursiveTailCall))
+    {
+        fgMorphRecursiveFastTailCallIntoLoop(block, recursiveTailCall->AsCall());
+    }
+#endif
+
+#ifdef DEBUG
+    compCurBB = (BasicBlock*)INVALID_POINTER_VALUE;
+#endif
+
+    // Reset this back so that it doesn't leak out impacting other blocks
+    fgRemoveRestOfBlock = false;
+}
+
+/*****************************************************************************
+ *
+ *  Morph the blocks of the method.
+ *  Returns true if the basic block list is modified.
+ *  This function should be called just once.
+ */
+
+void Compiler::fgMorphBlocks()
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\n*************** In fgMorphBlocks()\n");
+    }
+#endif
+
+    /* Since fgMorphTree can be called after various optimizations to re-arrange
+     * the nodes we need a global flag to signal if we are during the one-pass
+     * global morphing */
+
+    fgGlobalMorph = true;
+
+#if LOCAL_ASSERTION_PROP
+    //
+    // Local assertion prop is enabled if we are optimized
+    //
+    optLocalAssertionProp = (!opts.compDbgCode && !opts.MinOpts());
+
+    if (optLocalAssertionProp)
+    {
+        //
+        // Initialize for local assertion prop
+        //
+        optAssertionInit(true);
+    }
+#elif ASSERTION_PROP
+    //
+    // If LOCAL_ASSERTION_PROP is not set
+    // and we have global assertion prop
+    // then local assertion prop is always off
+    //
+    optLocalAssertionProp = false;
+
+#endif
+
+    /*-------------------------------------------------------------------------
+     * Process all basic blocks in the function
+     */
+
+    BasicBlock* block = fgFirstBB;
+    noway_assert(block);
+
+#ifdef DEBUG
+    compCurStmtNum = 0;
+#endif
+
+    do
+    {
+#if OPT_MULT_ADDSUB
+        bool mult = false;
+#endif
+
+#if OPT_BOOL_OPS
+        bool lnot = false;
+#endif
+
+        bool loadw = false;
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\nMorphing BB%02u of '%s'\n", block->bbNum, info.compFullName);
+        }
+#endif
+
+#if LOCAL_ASSERTION_PROP
+        if (optLocalAssertionProp)
+        {
+            //
+            // Clear out any currently recorded assertion candidates
+            // before processing each basic block,
+            // also we must  handle QMARK-COLON specially
+            //
+            optAssertionReset(0);
+        }
+#endif
+
+        /* Process all statement trees in the basic block */
+
+        GenTreePtr tree;
+
+        fgMorphStmts(block, &mult, &lnot, &loadw);
+
+#if OPT_MULT_ADDSUB
+
+        if (mult && (opts.compFlags & CLFLG_TREETRANS) && !opts.compDbgCode && !opts.MinOpts())
+        {
+            for (tree = block->bbTreeList; tree; tree = tree->gtNext)
+            {
+                noway_assert(tree->gtOper == GT_STMT);
+                GenTreePtr last = tree->gtStmt.gtStmtExpr;
+
+                if (last->gtOper == GT_ASG_ADD || last->gtOper == GT_ASG_SUB)
+                {
+                    GenTreePtr temp;
+                    GenTreePtr next;
+
+                    GenTreePtr dst1 = last->gtOp.gtOp1;
+                    GenTreePtr src1 = last->gtOp.gtOp2;
+
+                    if (!last->IsCnsIntOrI())
+                    {
+                        goto NOT_CAFFE;
+                    }
+
+                    if (dst1->gtOper != GT_LCL_VAR)
+                    {
+                        goto NOT_CAFFE;
+                    }
+                    if (!src1->IsCnsIntOrI())
+                    {
+                        goto NOT_CAFFE;
+                    }
+
+                    for (;;)
+                    {
+                        GenTreePtr dst2;
+                        GenTreePtr src2;
+
+                        /* Look at the next statement */
+
+                        temp = tree->gtNext;
+                        if (!temp)
+                        {
+                            goto NOT_CAFFE;
+                        }
+
+                        noway_assert(temp->gtOper == GT_STMT);
+                        next = temp->gtStmt.gtStmtExpr;
+
+                        if (next->gtOper != last->gtOper)
+                        {
+                            goto NOT_CAFFE;
+                        }
+                        if (next->gtType != last->gtType)
+                        {
+                            goto NOT_CAFFE;
+                        }
+
+                        dst2 = next->gtOp.gtOp1;
+                        src2 = next->gtOp.gtOp2;
+
+                        if (dst2->gtOper != GT_LCL_VAR)
+                        {
+                            goto NOT_CAFFE;
+                        }
+                        if (dst2->gtLclVarCommon.gtLclNum != dst1->gtLclVarCommon.gtLclNum)
+                        {
+                            goto NOT_CAFFE;
+                        }
+
+                        if (!src2->IsCnsIntOrI())
+                        {
+                            goto NOT_CAFFE;
+                        }
+
+                        if (last->gtOverflow() != next->gtOverflow())
+                        {
+                            goto NOT_CAFFE;
+                        }
+
+                        const ssize_t i1    = src1->gtIntCon.gtIconVal;
+                        const ssize_t i2    = src2->gtIntCon.gtIconVal;
+                        const ssize_t itemp = i1 + i2;
+
+                        /* if the operators are checking for overflow, check for overflow of the operands */
+
+                        if (next->gtOverflow())
+                        {
+                            if (next->TypeGet() == TYP_LONG)
+                            {
+                                if (next->gtFlags & GTF_UNSIGNED)
+                                {
+                                    ClrSafeInt<UINT64> si1(i1);
+                                    if ((si1 + ClrSafeInt<UINT64>(i2)).IsOverflow())
+                                    {
+                                        goto NOT_CAFFE;
+                                    }
+                                }
+                                else
+                                {
+                                    ClrSafeInt<INT64> si1(i1);
+                                    if ((si1 + ClrSafeInt<INT64>(i2)).IsOverflow())
+                                    {
+                                        goto NOT_CAFFE;
+                                    }
+                                }
+                            }
+                            else if (next->gtFlags & GTF_UNSIGNED)
+                            {
+                                ClrSafeInt<UINT32> si1(i1);
+                                if ((si1 + ClrSafeInt<UINT32>(i2)).IsOverflow())
+                                {
+                                    goto NOT_CAFFE;
+                                }
+                            }
+                            else
+                            {
+                                ClrSafeInt<INT32> si1(i1);
+                                if ((si1 + ClrSafeInt<INT32>(i2)).IsOverflow())
+                                {
+                                    goto NOT_CAFFE;
+                                }
+                            }
+                        }
+
+                        /* Fold the two increments/decrements into one */
+
+                        src1->gtIntCon.gtIconVal = itemp;
+#ifdef _TARGET_64BIT_
+                        if (src1->gtType == TYP_INT)
+                        {
+                            src1->AsIntCon()->TruncateOrSignExtend32();
+                        }
+#endif //_TARGET_64BIT_
+
+                        /* Remove the second statement completely */
+
+                        noway_assert(tree->gtNext == temp);
+                        noway_assert(temp->gtPrev == tree);
+
+                        if (temp->gtNext)
+                        {
+                            noway_assert(temp->gtNext->gtPrev == temp);
+
+                            temp->gtNext->gtPrev = tree;
+                            tree->gtNext         = temp->gtNext;
+                        }
+                        else
+                        {
+                            tree->gtNext = nullptr;
+
+                            noway_assert(block->bbTreeList->gtPrev == temp);
+
+                            block->bbTreeList->gtPrev = tree;
+                        }
+                    }
+                }
+
+            NOT_CAFFE:;
+            }
+        }
+
+#endif
+
+        /* Are we using a single return block? */
+
+        if (block->bbJumpKind == BBJ_RETURN)
+        {
+            if ((genReturnBB != nullptr) && (genReturnBB != block) && ((block->bbFlags & BBF_HAS_JMP) == 0))
+            {
+                /* We'll jump to the genReturnBB */
+                CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if !defined(_TARGET_X86_)
+                if (info.compFlags & CORINFO_FLG_SYNCH)
+                {
+                    fgConvertSyncReturnToLeave(block);
+                }
+                else
+#endif // !_TARGET_X86_
+                {
+                    block->bbJumpKind = BBJ_ALWAYS;
+                    block->bbJumpDest = genReturnBB;
+                    fgReturnCount--;
+                }
+
+                // Note 1: A block is not guaranteed to have a last stmt if its jump kind is BBJ_RETURN.
+                // For example a method returning void could have an empty block with jump kind BBJ_RETURN.
+                // Such blocks do materialize as part of in-lining.
+                //
+                // Note 2: A block with jump kind BBJ_RETURN does not necessarily need to end with GT_RETURN.
+                // It could end with a tail call or rejected tail call or monitor.exit or a GT_INTRINSIC.
+                // For now it is safe to explicitly check whether last stmt is GT_RETURN if genReturnLocal
+                // is BAD_VAR_NUM.
+                //
+                // TODO: Need to characterize the last top level stmt of a block ending with BBJ_RETURN.
+
+                GenTreePtr last = (block->bbTreeList != nullptr) ? block->bbTreeList->gtPrev : nullptr;
+                GenTreePtr ret  = (last != nullptr) ? last->gtStmt.gtStmtExpr : nullptr;
+
+                // replace the GT_RETURN node to be a GT_ASG that stores the return value into genReturnLocal.
+                if (genReturnLocal != BAD_VAR_NUM)
+                {
+                    // Method must be returning a value other than TYP_VOID.
+                    noway_assert(compMethodHasRetVal());
+
+                    // This block must be ending with a GT_RETURN
+                    noway_assert(last != nullptr);
+                    noway_assert(last->gtOper == GT_STMT);
+                    noway_assert(last->gtNext == nullptr);
+                    noway_assert(ret != nullptr);
+
+                    // GT_RETURN must have non-null operand as the method is returning the value assigned to
+                    // genReturnLocal
+                    noway_assert(ret->OperGet() == GT_RETURN);
+                    noway_assert(ret->gtGetOp1() != nullptr);
+                    noway_assert(ret->gtGetOp2() == nullptr);
+
+                    GenTreePtr tree = gtNewTempAssign(genReturnLocal, ret->gtGetOp1());
+
+                    last->gtStmt.gtStmtExpr = (tree->OperIsCopyBlkOp()) ? fgMorphCopyBlock(tree) : tree;
+
+                    // make sure that copy-prop ignores this assignment.
+                    last->gtStmt.gtStmtExpr->gtFlags |= GTF_DONT_CSE;
+                }
+                else if (ret != nullptr && ret->OperGet() == GT_RETURN)
+                {
+                    // This block ends with a GT_RETURN
+                    noway_assert(last != nullptr);
+                    noway_assert(last->gtOper == GT_STMT);
+                    noway_assert(last->gtNext == nullptr);
+
+                    // Must be a void GT_RETURN with null operand; delete it as this block branches to oneReturn block
+                    noway_assert(ret->TypeGet() == TYP_VOID);
+                    noway_assert(ret->gtGetOp1() == nullptr);
+                    noway_assert(ret->gtGetOp2() == nullptr);
+
+                    fgRemoveStmt(block, last);
+                }
+
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("morph BB%02u to point at onereturn.  New block is\n", block->bbNum);
+                    fgTableDispBasicBlock(block);
+                }
+#endif
+            }
+        }
+
+        block = block->bbNext;
+    } while (block);
+
+    /* We are done with the global morphing phase */
+
+    fgGlobalMorph = false;
+
+#ifdef DEBUG
+    if (verboseTrees)
+    {
+        fgDispBasicBlocks(true);
+    }
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Make some decisions about the kind of code to generate.
+ */
+
+void Compiler::fgSetOptions()
+{
+#ifdef DEBUG
+    /* Should we force fully interruptible code ? */
+    if (JitConfig.JitFullyInt() || compStressCompile(STRESS_GENERIC_VARN, 30))
+    {
+        noway_assert(!codeGen->isGCTypeFixed());
+        genInterruptible = true;
+    }
+#endif
+
+#ifdef DEBUGGING_SUPPORT
+    if (opts.compDbgCode)
+    {
+        assert(!codeGen->isGCTypeFixed());
+        genInterruptible = true; // debugging is easier this way ...
+    }
+#endif
+
+    /* Assume we won't need an explicit stack frame if this is allowed */
+
+    // CORINFO_HELP_TAILCALL won't work with localloc because of the restoring of
+    // the callee-saved registers.
+    noway_assert(!compTailCallUsed || !compLocallocUsed);
+
+    if (compLocallocUsed)
+    {
+        codeGen->setFramePointerRequired(true);
+    }
+
+#ifdef _TARGET_X86_
+
+    if (compTailCallUsed)
+        codeGen->setFramePointerRequired(true);
+
+#endif // _TARGET_X86_
+
+    if (!opts.genFPopt)
+    {
+        codeGen->setFramePointerRequired(true);
+    }
+
+    // Assert that the EH table has been initialized by now. Note that
+    // compHndBBtabAllocCount never decreases; it is a high-water mark
+    // of table allocation. In contrast, compHndBBtabCount does shrink
+    // if we delete a dead EH region, and if it shrinks to zero, the
+    // table pointer compHndBBtab is unreliable.
+    assert(compHndBBtabAllocCount >= info.compXcptnsCount);
+
+#ifdef _TARGET_X86_
+
+    // Note: this case, and the !X86 case below, should both use the
+    // !X86 path. This would require a few more changes for X86 to use
+    // compHndBBtabCount (the current number of EH clauses) instead of
+    // info.compXcptnsCount (the number of EH clauses in IL), such as
+    // in ehNeedsShadowSPslots(). This is because sometimes the IL has
+    // an EH clause that we delete as statically dead code before we
+    // get here, leaving no EH clauses left, and thus no requirement
+    // to use a frame pointer because of EH. But until all the code uses
+    // the same test, leave info.compXcptnsCount here.
+    if (info.compXcptnsCount > 0)
+        codeGen->setFramePointerRequiredEH(true);
+
+#else // !_TARGET_X86_
+
+    if (compHndBBtabCount > 0)
+    {
+        codeGen->setFramePointerRequiredEH(true);
+    }
+
+#endif // _TARGET_X86_
+
+    //  fpPtrArgCntMax records the maximum number of pushed arguments
+    //  Depending upon this value of the maximum number of pushed arguments
+    //  we may need to use an EBP frame or be partially interuptible
+    //
+
+    if (!compCanEncodePtrArgCntMax())
+    {
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("Too many pushed arguments for fully interruptible encoding, marking method as partially "
+                   "interruptible\n");
+        }
+#endif
+        genInterruptible = false;
+    }
+    if (fgPtrArgCntMax >= sizeof(unsigned))
+    {
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("Too many pushed arguments for an ESP based encoding, forcing an EBP frame\n");
+        }
+#endif
+        codeGen->setFramePointerRequiredGCInfo(true);
+    }
+
+    if (info.compCallUnmanaged)
+    {
+        codeGen->setFramePointerRequired(true); // Setup of Pinvoke frame currently requires an EBP style frame
+    }
+
+    if (info.compPublishStubParam)
+    {
+        codeGen->setFramePointerRequiredGCInfo(true);
+    }
+
+    if (opts.compNeedSecurityCheck)
+    {
+        codeGen->setFramePointerRequiredGCInfo(true);
+
+#ifndef JIT32_GCENCODER
+
+        // The decoder only reports objects in frames with exceptions if the frame
+        // is fully interruptible.
+        // Even if there is no catch or other way to resume execution in this frame
+        // the VM requires the security object to remain alive until later, so
+        // Frames with security objects must be fully interruptible.
+        genInterruptible = true;
+
+#endif // JIT32_GCENCODER
+    }
+
+    if (compIsProfilerHookNeeded())
+    {
+        codeGen->setFramePointerRequired(true);
+    }
+
+    if (info.compIsVarArgs)
+    {
+        // Code that initializes lvaVarargsBaseOfStkArgs requires this to be EBP relative.
+        codeGen->setFramePointerRequiredGCInfo(true);
+    }
+
+    if (lvaReportParamTypeArg())
+    {
+        codeGen->setFramePointerRequiredGCInfo(true);
+    }
+
+    // printf("method will %s be fully interruptible\n", genInterruptible ? "   " : "not");
+}
+
+/*****************************************************************************/
+
+GenTreePtr Compiler::fgInitThisClass()
+{
+    noway_assert(!compIsForInlining());
+
+    CORINFO_LOOKUP_KIND kind = info.compCompHnd->getLocationOfThisType(info.compMethodHnd);
+
+    if (!kind.needsRuntimeLookup)
+    {
+        return fgGetSharedCCtor(info.compClassHnd);
+    }
+    else
+    {
+        // Collectible types requires that for shared generic code, if we use the generic context paramter
+        // that we report it. (This is a conservative approach, we could detect some cases particularly when the
+        // context parameter is this that we don't need the eager reporting logic.)
+        lvaGenericsContextUsed = true;
+
+        switch (kind.runtimeLookupKind)
+        {
+            case CORINFO_LOOKUP_THISOBJ:
+                // This code takes a this pointer; but we need to pass the static method desc to get the right point in
+                // the hierarchy
+                {
+                    GenTreePtr vtTree = gtNewLclvNode(info.compThisArg, TYP_REF);
+                    // Vtable pointer of this object
+                    vtTree = gtNewOperNode(GT_IND, TYP_I_IMPL, vtTree);
+                    vtTree->gtFlags |= GTF_EXCEPT; // Null-pointer exception
+                    GenTreePtr methodHnd = gtNewIconEmbMethHndNode(info.compMethodHnd);
+
+                    return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, 0,
+                                               gtNewArgList(vtTree, methodHnd));
+                }
+
+            case CORINFO_LOOKUP_CLASSPARAM:
+            {
+                GenTreePtr vtTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
+                return gtNewHelperCallNode(CORINFO_HELP_INITCLASS, TYP_VOID, 0, gtNewArgList(vtTree));
+            }
+
+            case CORINFO_LOOKUP_METHODPARAM:
+            {
+                GenTreePtr methHndTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
+                return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, 0,
+                                           gtNewArgList(gtNewIconNode(0), methHndTree));
+            }
+        }
+    }
+
+    noway_assert(!"Unknown LOOKUP_KIND");
+    UNREACHABLE();
+}
+
+#ifdef DEBUG
+/*****************************************************************************
+ *
+ *  Tree walk callback to make sure no GT_QMARK nodes are present in the tree,
+ *  except for the allowed ? 1 : 0; pattern.
+ */
+Compiler::fgWalkResult Compiler::fgAssertNoQmark(GenTreePtr* tree, fgWalkData* data)
+{
+    if ((*tree)->OperGet() == GT_QMARK)
+    {
+        fgCheckQmarkAllowedForm(*tree);
+    }
+    return WALK_CONTINUE;
+}
+
+void Compiler::fgCheckQmarkAllowedForm(GenTree* tree)
+{
+    assert(tree->OperGet() == GT_QMARK);
+#ifndef LEGACY_BACKEND
+    assert(!"Qmarks beyond morph disallowed.");
+#else  // LEGACY_BACKEND
+    GenTreePtr colon = tree->gtOp.gtOp2;
+
+    assert(colon->gtOp.gtOp1->IsIntegralConst(0));
+    assert(colon->gtOp.gtOp2->IsIntegralConst(1));
+#endif // LEGACY_BACKEND
+}
+
+/*****************************************************************************
+ *
+ *  Verify that the importer has created GT_QMARK nodes in a way we can
+ *  process them. The following is allowed:
+ *
+ *  1. A top level qmark. Top level qmark is of the form:
+ *      a) (bool) ? (void) : (void) OR
+ *      b) V0N = (bool) ? (type) : (type)
+ *
+ *  2. Recursion is allowed at the top level, i.e., a GT_QMARK can be a child
+ *     of either op1 of colon or op2 of colon but not a child of any other
+ *     operator.
+ */
+void Compiler::fgPreExpandQmarkChecks(GenTreePtr expr)
+{
+    GenTreePtr topQmark = fgGetTopLevelQmark(expr);
+
+    // If the top level Qmark is null, then scan the tree to make sure
+    // there are no qmarks within it.
+    if (topQmark == nullptr)
+    {
+        fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr);
+    }
+    else
+    {
+        // We could probably expand the cond node also, but don't think the extra effort is necessary,
+        // so let's just assert the cond node of a top level qmark doesn't have further top level qmarks.
+        fgWalkTreePre(&topQmark->gtOp.gtOp1, Compiler::fgAssertNoQmark, nullptr);
+
+        fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp1);
+        fgPreExpandQmarkChecks(topQmark->gtOp.gtOp2->gtOp.gtOp2);
+    }
+}
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ *  Get the top level GT_QMARK node in a given "expr", return NULL if such a
+ *  node is not present. If the top level GT_QMARK node is assigned to a
+ *  GT_LCL_VAR, then return the lcl node in ppDst.
+ *
+ */
+GenTreePtr Compiler::fgGetTopLevelQmark(GenTreePtr expr, GenTreePtr* ppDst /* = NULL */)
+{
+    if (ppDst != nullptr)
+    {
+        *ppDst = nullptr;
+    }
+
+    GenTreePtr topQmark = nullptr;
+    if (expr->gtOper == GT_QMARK)
+    {
+        topQmark = expr;
+    }
+    else if (expr->gtOper == GT_ASG && expr->gtOp.gtOp2->gtOper == GT_QMARK && expr->gtOp.gtOp1->gtOper == GT_LCL_VAR)
+    {
+        topQmark = expr->gtOp.gtOp2;
+        if (ppDst != nullptr)
+        {
+            *ppDst = expr->gtOp.gtOp1;
+        }
+    }
+    return topQmark;
+}
+
+/*********************************************************************************
+ *
+ *  For a castclass helper call,
+ *  Importer creates the following tree:
+ *      tmp = (op1 == null) ? op1 : ((*op1 == (cse = op2, cse)) ? op1 : helper());
+ *
+ *  This method splits the qmark expression created by the importer into the
+ *  following blocks: (block, asg, cond1, cond2, helper, remainder)
+ *  Notice that op1 is the result for both the conditions. So we coalesce these
+ *  assignments into a single block instead of two blocks resulting a nested diamond.
+ *
+ *                       +---------->-----------+
+ *                       |          |           |
+ *                       ^          ^           v
+ *                       |          |           |
+ *  block-->asg-->cond1--+-->cond2--+-->helper--+-->remainder
+ *
+ *  We expect to achieve the following codegen:
+ *     mov      rsi, rdx                           tmp = op1                  // asgBlock
+ *     test     rsi, rsi                           goto skip if tmp == null ? // cond1Block
+ *     je       SKIP
+ *     mov      rcx, 0x76543210                    cns = op2                  // cond2Block
+ *     cmp      qword ptr [rsi], rcx               goto skip if *tmp == op2
+ *     je       SKIP
+ *     call     CORINFO_HELP_CHKCASTCLASS_SPECIAL  tmp = helper(cns, tmp)     // helperBlock
+ *     mov      rsi, rax
+ *  SKIP:                                                                     // remainderBlock
+ *     tmp has the result.
+ *
+ */
+void Compiler::fgExpandQmarkForCastInstOf(BasicBlock* block, GenTreePtr stmt)
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nExpanding CastInstOf qmark in BB%02u (before)\n", block->bbNum);
+        fgDispBasicBlocks(block, block, true);
+    }
+#endif // DEBUG
+
+    GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
+
+    GenTreePtr dst   = nullptr;
+    GenTreePtr qmark = fgGetTopLevelQmark(expr, &dst);
+    noway_assert(dst != nullptr);
+
+    assert(qmark->gtFlags & GTF_QMARK_CAST_INSTOF);
+
+    // Get cond, true, false exprs for the qmark.
+    GenTreePtr condExpr  = qmark->gtGetOp1();
+    GenTreePtr trueExpr  = qmark->gtGetOp2()->AsColon()->ThenNode();
+    GenTreePtr falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode();
+
+    // Get cond, true, false exprs for the nested qmark.
+    GenTreePtr nestedQmark = falseExpr;
+    GenTreePtr cond2Expr;
+    GenTreePtr true2Expr;
+    GenTreePtr false2Expr;
+
+    if (nestedQmark->gtOper == GT_QMARK)
+    {
+        cond2Expr  = nestedQmark->gtGetOp1();
+        true2Expr  = nestedQmark->gtGetOp2()->AsColon()->ThenNode();
+        false2Expr = nestedQmark->gtGetOp2()->AsColon()->ElseNode();
+
+        assert(cond2Expr->gtFlags & GTF_RELOP_QMARK);
+        cond2Expr->gtFlags &= ~GTF_RELOP_QMARK;
+    }
+    else
+    {
+        // This is a rare case that arises when we are doing minopts and encounter isinst of null
+        // gtFoldExpr was still is able to optimize away part of the tree (but not all).
+        // That means it does not match our pattern.
+
+        // Rather than write code to handle this case, just fake up some nodes to make it match the common
+        // case.  Synthesize a comparison that is always true, and for the result-on-true, use the
+        // entire subtree we expected to be the nested question op.
+
+        cond2Expr  = gtNewOperNode(GT_EQ, TYP_INT, gtNewIconNode(0, TYP_I_IMPL), gtNewIconNode(0, TYP_I_IMPL));
+        true2Expr  = nestedQmark;
+        false2Expr = gtNewIconNode(0, TYP_I_IMPL);
+    }
+    assert(false2Expr->OperGet() == trueExpr->OperGet());
+
+    // Clear flags as they are now going to be part of JTRUE.
+    assert(condExpr->gtFlags & GTF_RELOP_QMARK);
+    condExpr->gtFlags &= ~GTF_RELOP_QMARK;
+
+    // Create the chain of blocks. See method header comment.
+    // The order of blocks after this is the following:
+    //     block ... asgBlock ... cond1Block ... cond2Block ... helperBlock ... remainderBlock
+    //
+    // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
+    // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
+    // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
+    // remainderBlock will still be GC safe.
+    unsigned    propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT;
+    BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt);
+    fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock.
+
+    BasicBlock* helperBlock = fgNewBBafter(BBJ_NONE, block, true);
+    BasicBlock* cond2Block  = fgNewBBafter(BBJ_COND, block, true);
+    BasicBlock* cond1Block  = fgNewBBafter(BBJ_COND, block, true);
+    BasicBlock* asgBlock    = fgNewBBafter(BBJ_NONE, block, true);
+
+    remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;
+
+    // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter).
+    // If they're not internal, mark them as imported to avoid asserts about un-imported blocks.
+    if ((block->bbFlags & BBF_INTERNAL) == 0)
+    {
+        helperBlock->bbFlags &= ~BBF_INTERNAL;
+        cond2Block->bbFlags &= ~BBF_INTERNAL;
+        cond1Block->bbFlags &= ~BBF_INTERNAL;
+        asgBlock->bbFlags &= ~BBF_INTERNAL;
+        helperBlock->bbFlags |= BBF_IMPORTED;
+        cond2Block->bbFlags |= BBF_IMPORTED;
+        cond1Block->bbFlags |= BBF_IMPORTED;
+        asgBlock->bbFlags |= BBF_IMPORTED;
+    }
+
+    // Chain the flow correctly.
+    fgAddRefPred(asgBlock, block);
+    fgAddRefPred(cond1Block, asgBlock);
+    fgAddRefPred(cond2Block, cond1Block);
+    fgAddRefPred(helperBlock, cond2Block);
+    fgAddRefPred(remainderBlock, helperBlock);
+    fgAddRefPred(remainderBlock, cond1Block);
+    fgAddRefPred(remainderBlock, cond2Block);
+
+    cond1Block->bbJumpDest = remainderBlock;
+    cond2Block->bbJumpDest = remainderBlock;
+
+    // Set the weights; some are guesses.
+    asgBlock->inheritWeight(block);
+    cond1Block->inheritWeight(block);
+    cond2Block->inheritWeightPercentage(cond1Block, 50);
+    helperBlock->inheritWeightPercentage(cond2Block, 50);
+
+    // Append cond1 as JTRUE to cond1Block
+    GenTreePtr jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condExpr);
+    GenTreePtr jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
+    fgInsertStmtAtEnd(cond1Block, jmpStmt);
+
+    // Append cond2 as JTRUE to cond2Block
+    jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, cond2Expr);
+    jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
+    fgInsertStmtAtEnd(cond2Block, jmpStmt);
+
+    // AsgBlock should get tmp = op1 assignment.
+    trueExpr            = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), trueExpr);
+    GenTreePtr trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx);
+    fgInsertStmtAtEnd(asgBlock, trueStmt);
+
+    // Since we are adding helper in the JTRUE false path, reverse the cond2 and add the helper.
+    gtReverseCond(cond2Expr);
+    GenTreePtr helperExpr = gtNewTempAssign(dst->AsLclVarCommon()->GetLclNum(), true2Expr);
+    GenTreePtr helperStmt = fgNewStmtFromTree(helperExpr, stmt->gtStmt.gtStmtILoffsx);
+    fgInsertStmtAtEnd(helperBlock, helperStmt);
+
+    // Finally remove the nested qmark stmt.
+    fgRemoveStmt(block, stmt);
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nExpanding CastInstOf qmark in BB%02u (after)\n", block->bbNum);
+        fgDispBasicBlocks(block, remainderBlock, true);
+    }
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ *  Expand a statement with a top level qmark node. There are three cases, based
+ *  on whether the qmark has both "true" and "false" arms, or just one of them.
+ *
+ *     S0;
+ *     C ? T : F;
+ *     S1;
+ *
+ *     Generates ===>
+ *
+ *                       bbj_always
+ *                       +---->------+
+ *                 false |           |
+ *     S0 -->-- ~C -->-- T   F -->-- S1
+ *              |            |
+ *              +--->--------+
+ *              bbj_cond(true)
+ *
+ *     -----------------------------------------
+ *
+ *     S0;
+ *     C ? T : NOP;
+ *     S1;
+ *
+ *     Generates ===>
+ *
+ *                 false
+ *     S0 -->-- ~C -->-- T -->-- S1
+ *              |                |
+ *              +-->-------------+
+ *              bbj_cond(true)
+ *
+ *     -----------------------------------------
+ *
+ *     S0;
+ *     C ? NOP : F;
+ *     S1;
+ *
+ *     Generates ===>
+ *
+ *                false
+ *     S0 -->-- C -->-- F -->-- S1
+ *              |               |
+ *              +-->------------+
+ *              bbj_cond(true)
+ *
+ *  If the qmark assigns to a variable, then create tmps for "then"
+ *  and "else" results and assign the temp to the variable as a writeback step.
+ */
+void Compiler::fgExpandQmarkStmt(BasicBlock* block, GenTreePtr stmt)
+{
+    GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
+
+    // Retrieve the Qmark node to be expanded.
+    GenTreePtr dst   = nullptr;
+    GenTreePtr qmark = fgGetTopLevelQmark(expr, &dst);
+    if (qmark == nullptr)
+    {
+        return;
+    }
+
+    if (qmark->gtFlags & GTF_QMARK_CAST_INSTOF)
+    {
+        fgExpandQmarkForCastInstOf(block, stmt);
+        return;
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nExpanding top-level qmark in BB%02u (before)\n", block->bbNum);
+        fgDispBasicBlocks(block, block, true);
+    }
+#endif // DEBUG
+
+    // Retrieve the operands.
+    GenTreePtr condExpr  = qmark->gtGetOp1();
+    GenTreePtr trueExpr  = qmark->gtGetOp2()->AsColon()->ThenNode();
+    GenTreePtr falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode();
+
+    assert(condExpr->gtFlags & GTF_RELOP_QMARK);
+    condExpr->gtFlags &= ~GTF_RELOP_QMARK;
+
+    assert(!varTypeIsFloating(condExpr->TypeGet()));
+
+    bool hasTrueExpr  = (trueExpr->OperGet() != GT_NOP);
+    bool hasFalseExpr = (falseExpr->OperGet() != GT_NOP);
+    assert(hasTrueExpr || hasFalseExpr); // We expect to have at least one arm of the qmark!
+
+    // Create remainder, cond and "else" blocks. After this, the blocks are in this order:
+    //     block ... condBlock ... elseBlock ... remainderBlock
+    //
+    // We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
+    // if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
+    // for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
+    // remainderBlock will still be GC safe.
+    unsigned    propagateFlags = block->bbFlags & BBF_GC_SAFE_POINT;
+    BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt);
+    fgRemoveRefPred(remainderBlock, block); // We're going to put more blocks between block and remainderBlock.
+
+    BasicBlock* condBlock = fgNewBBafter(BBJ_COND, block, true);
+    BasicBlock* elseBlock = fgNewBBafter(BBJ_NONE, condBlock, true);
+
+    // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter).
+    // If they're not internal, mark them as imported to avoid asserts about un-imported blocks.
+    if ((block->bbFlags & BBF_INTERNAL) == 0)
+    {
+        condBlock->bbFlags &= ~BBF_INTERNAL;
+        elseBlock->bbFlags &= ~BBF_INTERNAL;
+        condBlock->bbFlags |= BBF_IMPORTED;
+        elseBlock->bbFlags |= BBF_IMPORTED;
+    }
+
+    remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;
+
+    condBlock->inheritWeight(block);
+
+    fgAddRefPred(condBlock, block);
+    fgAddRefPred(elseBlock, condBlock);
+    fgAddRefPred(remainderBlock, elseBlock);
+
+    BasicBlock* thenBlock = nullptr;
+    if (hasTrueExpr && hasFalseExpr)
+    {
+        //                       bbj_always
+        //                       +---->------+
+        //                 false |           |
+        //     S0 -->-- ~C -->-- T   F -->-- S1
+        //              |            |
+        //              +--->--------+
+        //              bbj_cond(true)
+        //
+        gtReverseCond(condExpr);
+        condBlock->bbJumpDest = elseBlock;
+
+        thenBlock             = fgNewBBafter(BBJ_ALWAYS, condBlock, true);
+        thenBlock->bbJumpDest = remainderBlock;
+        if ((block->bbFlags & BBF_INTERNAL) == 0)
+        {
+            thenBlock->bbFlags &= ~BBF_INTERNAL;
+            thenBlock->bbFlags |= BBF_IMPORTED;
+        }
+
+        elseBlock->bbFlags |= (BBF_JMP_TARGET | BBF_HAS_LABEL);
+
+        fgAddRefPred(thenBlock, condBlock);
+        fgAddRefPred(remainderBlock, thenBlock);
+
+        thenBlock->inheritWeightPercentage(condBlock, 50);
+        elseBlock->inheritWeightPercentage(condBlock, 50);
+    }
+    else if (hasTrueExpr)
+    {
+        //                 false
+        //     S0 -->-- ~C -->-- T -->-- S1
+        //              |                |
+        //              +-->-------------+
+        //              bbj_cond(true)
+        //
+        gtReverseCond(condExpr);
+        condBlock->bbJumpDest = remainderBlock;
+        fgAddRefPred(remainderBlock, condBlock);
+        // Since we have no false expr, use the one we'd already created.
+        thenBlock = elseBlock;
+        elseBlock = nullptr;
+
+        thenBlock->inheritWeightPercentage(condBlock, 50);
+    }
+    else if (hasFalseExpr)
+    {
+        //                false
+        //     S0 -->-- C -->-- F -->-- S1
+        //              |               |
+        //              +-->------------+
+        //              bbj_cond(true)
+        //
+        condBlock->bbJumpDest = remainderBlock;
+        fgAddRefPred(remainderBlock, condBlock);
+
+        elseBlock->inheritWeightPercentage(condBlock, 50);
+    }
+
+    GenTreePtr jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, qmark->gtGetOp1());
+    GenTreePtr jmpStmt = fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
+    fgInsertStmtAtEnd(condBlock, jmpStmt);
+
+    // Remove the original qmark statement.
+    fgRemoveStmt(block, stmt);
+
+    // Since we have top level qmarks, we either have a dst for it in which case
+    // we need to create tmps for true and falseExprs, else just don't bother
+    // assigning.
+    unsigned lclNum = BAD_VAR_NUM;
+    if (dst != nullptr)
+    {
+        assert(dst->gtOper == GT_LCL_VAR);
+        lclNum = dst->gtLclVar.gtLclNum;
+    }
+    else
+    {
+        assert(qmark->TypeGet() == TYP_VOID);
+    }
+
+    if (hasTrueExpr)
+    {
+        if (dst != nullptr)
+        {
+            trueExpr = gtNewTempAssign(lclNum, trueExpr);
+        }
+        GenTreePtr trueStmt = fgNewStmtFromTree(trueExpr, stmt->gtStmt.gtStmtILoffsx);
+        fgInsertStmtAtEnd(thenBlock, trueStmt);
+    }
+
+    // Assign the falseExpr into the dst or tmp, insert in elseBlock
+    if (hasFalseExpr)
+    {
+        if (dst != nullptr)
+        {
+            falseExpr = gtNewTempAssign(lclNum, falseExpr);
+        }
+        GenTreePtr falseStmt = fgNewStmtFromTree(falseExpr, stmt->gtStmt.gtStmtILoffsx);
+        fgInsertStmtAtEnd(elseBlock, falseStmt);
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nExpanding top-level qmark in BB%02u (after)\n", block->bbNum);
+        fgDispBasicBlocks(block, remainderBlock, true);
+    }
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ *  Expand GT_QMARK nodes from the flow graph into basic blocks.
+ *
+ */
+
+void Compiler::fgExpandQmarkNodes()
+{
+    if (compQmarkUsed)
+    {
+        for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+        {
+            for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+            {
+                GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
+#ifdef DEBUG
+                fgPreExpandQmarkChecks(expr);
+#endif
+                fgExpandQmarkStmt(block, stmt);
+            }
+        }
+#ifdef DEBUG
+        fgPostExpandQmarkChecks();
+#endif
+    }
+    compQmarkRationalized = true;
+}
+
+#ifdef DEBUG
+/*****************************************************************************
+ *
+ *  Make sure we don't have any more GT_QMARK nodes.
+ *
+ */
+void Compiler::fgPostExpandQmarkChecks()
+{
+    for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+    {
+        for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+        {
+            GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
+            fgWalkTreePre(&expr, Compiler::fgAssertNoQmark, nullptr);
+        }
+    }
+}
+#endif
+
+/*****************************************************************************
+ *
+ *  Transform all basic blocks for codegen.
+ */
+
+void Compiler::fgMorph()
+{
+    noway_assert(!compIsForInlining()); // Inlinee's compiler should never reach here.
+
+    fgOutgoingArgTemps = nullptr;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In fgMorph()\n");
+    }
+    if (verboseTrees)
+    {
+        fgDispBasicBlocks(true);
+    }
+#endif // DEBUG
+
+    // Insert call to class constructor as the first basic block if
+    // we were asked to do so.
+    if (info.compCompHnd->initClass(nullptr /* field */, info.compMethodHnd /* method */,
+                                    impTokenLookupContextHandle /* context */) &
+        CORINFO_INITCLASS_USE_HELPER)
+    {
+        fgEnsureFirstBBisScratch();
+        fgInsertStmtAtBeg(fgFirstBB, fgInitThisClass());
+    }
+
+#ifdef DEBUG
+    if (opts.compGcChecks)
+    {
+        for (unsigned i = 0; i < info.compArgsCount; i++)
+        {
+            if (lvaTable[i].TypeGet() == TYP_REF)
+            {
+                // confirm that the argument is a GC pointer (for debugging (GC stress))
+                GenTreePtr      op   = gtNewLclvNode(i, TYP_REF);
+                GenTreeArgList* args = gtNewArgList(op);
+                op                   = gtNewHelperCallNode(CORINFO_HELP_CHECK_OBJ, TYP_VOID, 0, args);
+
+                fgEnsureFirstBBisScratch();
+                fgInsertStmtAtEnd(fgFirstBB, op);
+            }
+        }
+    }
+
+    if (opts.compStackCheckOnRet)
+    {
+        lvaReturnEspCheck                  = lvaGrabTempWithImplicitUse(false DEBUGARG("ReturnEspCheck"));
+        lvaTable[lvaReturnEspCheck].lvType = TYP_INT;
+    }
+
+    if (opts.compStackCheckOnCall)
+    {
+        lvaCallEspCheck                  = lvaGrabTempWithImplicitUse(false DEBUGARG("CallEspCheck"));
+        lvaTable[lvaCallEspCheck].lvType = TYP_INT;
+    }
+#endif // DEBUG
+
+    /* Filter out unimported BBs */
+
+    fgRemoveEmptyBlocks();
+
+    /* Add any internal blocks/trees we may need */
+
+    fgAddInternal();
+
+#if OPT_BOOL_OPS
+    fgMultipleNots = false;
+#endif
+
+#ifdef DEBUG
+    /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */
+    fgDebugCheckBBlist(false, false);
+#endif // DEBUG
+
+    /* Inline */
+    fgInline();
+#if 0
+    JITDUMP("trees after inlining\n");
+    DBEXEC(VERBOSE, fgDispBasicBlocks(true));
+#endif
+
+    RecordStateAtEndOfInlining(); // Record "start" values for post-inlining cycles and elapsed time.
+
+#ifdef DEBUG
+    /* Inliner could add basic blocks. Check that the flowgraph data is up-to-date */
+    fgDebugCheckBBlist(false, false);
+#endif // DEBUG
+
+    /* For x64 and ARM64 we need to mark irregular parameters early so that they don't get promoted */
+    fgMarkImplicitByRefArgs();
+
+    /* Promote struct locals if necessary */
+    fgPromoteStructs();
+
+    /* Now it is the time to figure out what locals have address-taken. */
+    fgMarkAddressExposedLocals();
+
+#ifdef DEBUG
+    /* Now that locals have address-taken marked, we can safely apply stress. */
+    lvaStressLclFld();
+    fgStress64RsltMul();
+#endif // DEBUG
+
+    /* Morph the trees in all the blocks of the method */
+
+    fgMorphBlocks();
+
+#if 0
+    JITDUMP("trees after fgMorphBlocks\n");
+    DBEXEC(VERBOSE, fgDispBasicBlocks(true));
+#endif
+
+    /* Decide the kind of code we want to generate */
+
+    fgSetOptions();
+
+    fgExpandQmarkNodes();
+
+#ifdef DEBUG
+    compCurBB = nullptr;
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ *  Promoting struct locals
+ */
+void Compiler::fgPromoteStructs()
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In fgPromoteStructs()\n");
+    }
+#endif // DEBUG
+
+    if (!opts.OptEnabled(CLFLG_STRUCTPROMOTE))
+    {
+        return;
+    }
+
+    if (fgNoStructPromotion)
+    {
+        return;
+    }
+
+#if 0
+    // The code in this #if has been useful in debugging struct promotion issues, by
+    // enabling selective enablement of the struct promotion optimization according to
+    // method hash.
+#ifdef DEBUG
+    unsigned methHash = info.compMethodHash();
+    char* lostr = getenv("structpromohashlo");
+    unsigned methHashLo = 0;
+    if (lostr != NULL)
+    {
+        sscanf_s(lostr, "%x", &methHashLo);
+    }
+    char* histr = getenv("structpromohashhi");
+    unsigned methHashHi = UINT32_MAX;
+    if (histr != NULL)
+    {
+        sscanf_s(histr, "%x", &methHashHi);
+    }
+    if (methHash < methHashLo || methHash > methHashHi)
+    {
+        return;
+    }
+    else
+    {
+        printf("Promoting structs for method %s, hash = 0x%x.\n",
+               info.compFullName, info.compMethodHash());
+        printf("");         // in our logic this causes a flush
+    }
+#endif // DEBUG
+#endif // 0
+
+    if (info.compIsVarArgs)
+    {
+        return;
+    }
+
+    if (getNeedsGSSecurityCookie())
+    {
+        return;
+    }
+
+    // The lvaTable might grow as we grab temps. Make a local copy here.
+
+    unsigned startLvaCount = lvaCount;
+
+    //
+    // Loop through the original lvaTable. Looking for struct locals to be promoted.
+    //
+
+    lvaStructPromotionInfo structPromotionInfo;
+    bool                   tooManyLocals = false;
+
+    for (unsigned lclNum = 0; lclNum < startLvaCount; lclNum++)
+    {
+        // Whether this var got promoted
+        bool       promotedVar = false;
+        LclVarDsc* varDsc      = &lvaTable[lclNum];
+
+#ifdef FEATURE_SIMD
+        if (varDsc->lvSIMDType && varDsc->lvUsedInSIMDIntrinsic)
+        {
+            // If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote
+            // its fields.  Instead, we will attempt to enregister the entire struct.
+            varDsc->lvRegStruct = true;
+        }
+        else
+#endif // FEATURE_SIMD
+            // Don't promote if we have reached the tracking limit.
+            if (lvaHaveManyLocals())
+        {
+            // Print the message first time when we detected this condition
+            if (!tooManyLocals)
+            {
+                JITDUMP("Stopped promoting struct fields, due to too many locals.\n");
+            }
+            tooManyLocals = true;
+        }
+#if !FEATURE_MULTIREG_STRUCT_PROMOTE
+        else if (varDsc->lvIsMultiRegArg)
+        {
+            JITDUMP("Skipping V%02u: marked lvIsMultiRegArg.\n", lclNum);
+        }
+#endif // !FEATURE_MULTIREG_STRUCT_PROMOTE
+        else if (varDsc->lvIsMultiRegRet)
+        {
+            JITDUMP("Skipping V%02u: marked lvIsMultiRegRet.\n", lclNum);
+        }
+        else if (varTypeIsStruct(varDsc))
+        {
+            lvaCanPromoteStructVar(lclNum, &structPromotionInfo);
+            bool canPromote = structPromotionInfo.canPromote;
+
+            // We start off with shouldPromote same as canPromote.
+            // Based on further profitablity checks done below, shouldPromote
+            // could be set to false.
+            bool shouldPromote = canPromote;
+
+            if (canPromote)
+            {
+
+                // We *can* promote; *should* we promote?
+                // We should only do so if promotion has potential savings.  One source of savings
+                // is if a field of the struct is accessed, since this access will be turned into
+                // an access of the corresponding promoted field variable.  Even if there are no
+                // field accesses, but only block-level operations on the whole struct, if the struct
+                // has only one or two fields, then doing those block operations field-wise is probably faster
+                // than doing a whole-variable block operation (e.g., a hardware "copy loop" on x86).
+                // So if no fields are accessed independently, and there are three or more fields,
+                // then do not promote.
+                if (structPromotionInfo.fieldCnt > 2 && !varDsc->lvFieldAccessed)
+                {
+                    JITDUMP("Not promoting promotable struct local V%02u: #fields = %d, fieldAccessed = %d.\n", lclNum,
+                            structPromotionInfo.fieldCnt, varDsc->lvFieldAccessed);
+                    shouldPromote = false;
+                }
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+                // TODO-PERF - Only do this when the LclVar is used in an argument context
+                // TODO-ARM64 - HFA support should also eliminate the need for this.
+                // TODO-LSRA - Currently doesn't support the passing of floating point LCL_VARS in the integer registers
+                //
+                // For now we currently don't promote structs with a single float field
+                // Promoting it can cause us to shuffle it back and forth between the int and
+                //  the float regs when it is used as a argument, which is very expensive for XARCH
+                //
+                else if ((structPromotionInfo.fieldCnt == 1) &&
+                         varTypeIsFloating(structPromotionInfo.fields[0].fldType))
+                {
+                    JITDUMP("Not promoting promotable struct local V%02u: #fields = %d because it is a struct with "
+                            "single float field.\n",
+                            lclNum, structPromotionInfo.fieldCnt);
+                    shouldPromote = false;
+                }
+#endif // _TARGET_AMD64_ || _TARGET_ARM64_
+
+#if !FEATURE_MULTIREG_STRUCT_PROMOTE
+#if defined(_TARGET_ARM64_)
+                //
+                // For now we currently don't promote structs that are  passed in registers
+                //
+                else if (lvaIsMultiregStruct(varDsc))
+                {
+                    JITDUMP("Not promoting promotable multireg struct local V%02u (size==%d): ", lclNum,
+                            lvaLclExactSize(lclNum));
+                    shouldPromote = false;
+                }
+#endif // _TARGET_ARM64_
+#endif // !FEATURE_MULTIREG_STRUCT_PROMOTE
+                else if (varDsc->lvIsParam)
+                {
+#if FEATURE_MULTIREG_STRUCT_PROMOTE
+                    if (lvaIsMultiregStruct(
+                            varDsc) && // Is this a variable holding a value that is passed in multiple registers?
+                        (structPromotionInfo.fieldCnt != 2)) // Does it have exactly two fields
+                    {
+                        JITDUMP(
+                            "Not promoting multireg struct local V%02u, because lvIsParam is true and #fields != 2\n",
+                            lclNum);
+                        shouldPromote = false;
+                    }
+                    else
+#endif // !FEATURE_MULTIREG_STRUCT_PROMOTE
+
+                        // TODO-PERF - Implement struct promotion for incoming multireg structs
+                        //             Currently it hits assert(lvFieldCnt==1) in lclvar.cpp line 4417
+
+                        if (structPromotionInfo.fieldCnt != 1)
+                    {
+                        JITDUMP("Not promoting promotable struct local V%02u, because lvIsParam is true and #fields = "
+                                "%d.\n",
+                                lclNum, structPromotionInfo.fieldCnt);
+                        shouldPromote = false;
+                    }
+                }
+
+                //
+                // If the lvRefCnt is zero and we have a struct promoted parameter we can end up with an extra store of
+                // the the incoming register into the stack frame slot.
+                // In that case, we would like to avoid promortion.
+                // However we haven't yet computed the lvRefCnt values so we can't do that.
+                //
+                CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if 0
+                // Often-useful debugging code: if you've narrowed down a struct-promotion problem to a single
+                // method, this allows you to select a subset of the vars to promote (by 1-based ordinal number).
+                static int structPromoVarNum = 0;
+                structPromoVarNum++;
+                if (atoi(getenv("structpromovarnumlo")) <= structPromoVarNum && structPromoVarNum <= atoi(getenv("structpromovarnumhi")))
+#endif // 0
+
+                if (shouldPromote)
+                {
+                    assert(canPromote);
+
+                    // Promote the this struct local var.
+                    lvaPromoteStructVar(lclNum, &structPromotionInfo);
+                    promotedVar = true;
+
+#ifdef _TARGET_ARM_
+                    if (structPromotionInfo.requiresScratchVar)
+                    {
+                        // Ensure that the scratch variable is allocated, in case we
+                        // pass a promoted struct as an argument.
+                        if (lvaPromotedStructAssemblyScratchVar == BAD_VAR_NUM)
+                        {
+                            lvaPromotedStructAssemblyScratchVar =
+                                lvaGrabTempWithImplicitUse(false DEBUGARG("promoted struct assembly scratch var."));
+                            lvaTable[lvaPromotedStructAssemblyScratchVar].lvType = TYP_I_IMPL;
+                        }
+                    }
+#endif // _TARGET_ARM_
+                }
+            }
+        }
+
+#ifdef FEATURE_SIMD
+        if (!promotedVar && varDsc->lvSIMDType && !varDsc->lvFieldAccessed)
+        {
+            // Even if we have not used this in a SIMD intrinsic, if it is not being promoted,
+            // we will treat it as a reg struct.
+            varDsc->lvRegStruct = true;
+        }
+#endif // FEATURE_SIMD
+    }
+}
+
+Compiler::fgWalkResult Compiler::fgMorphStructField(GenTreePtr tree, fgWalkData* fgWalkPre)
+{
+    noway_assert(tree->OperGet() == GT_FIELD);
+    noway_assert(tree->gtFlags & GTF_GLOB_REF);
+
+    GenTreePtr objRef = tree->gtField.gtFldObj;
+
+    /* Is this an instance data member? */
+
+    if (objRef)
+    {
+        if (objRef->gtOper == GT_ADDR)
+        {
+            GenTreePtr obj = objRef->gtOp.gtOp1;
+
+            if (obj->gtOper == GT_LCL_VAR)
+            {
+                unsigned   lclNum = obj->gtLclVarCommon.gtLclNum;
+                LclVarDsc* varDsc = &lvaTable[lclNum];
+
+                if (varTypeIsStruct(obj))
+                {
+                    if (varDsc->lvPromoted)
+                    {
+                        // Promoted struct
+                        unsigned fldOffset     = tree->gtField.gtFldOffset;
+                        unsigned fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
+                        noway_assert(fieldLclIndex != BAD_VAR_NUM);
+
+                        tree->SetOper(GT_LCL_VAR);
+                        tree->gtLclVarCommon.SetLclNum(fieldLclIndex);
+                        tree->gtType = lvaTable[fieldLclIndex].TypeGet();
+                        tree->gtFlags &= GTF_NODE_MASK;
+                        tree->gtFlags &= ~GTF_GLOB_REF;
+
+                        GenTreePtr parent = fgWalkPre->parentStack->Index(1);
+                        if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
+                        {
+                            tree->gtFlags |= GTF_VAR_DEF;
+                            tree->gtFlags |= GTF_DONT_CSE;
+                        }
+#ifdef DEBUG
+                        if (verbose)
+                        {
+                            printf("Replacing the field in promoted struct with a local var:\n");
+                            fgWalkPre->printModified = true;
+                        }
+#endif // DEBUG
+                        return WALK_SKIP_SUBTREES;
+                    }
+                }
+                else
+                {
+                    // Normed struct
+                    // A "normed struct" is a struct that the VM tells us is a basic type. This can only happen if
+                    // the struct contains a single element, and that element is 4 bytes (on x64 it can also be 8
+                    // bytes). Normally, the type of the local var and the type of GT_FIELD are equivalent. However,
+                    // there is one extremely rare case where that won't be true. An enum type is a special value type
+                    // that contains exactly one element of a primitive integer type (that, for CLS programs is named
+                    // "value__"). The VM tells us that a local var of that enum type is the primitive type of the
+                    // enum's single field. It turns out that it is legal for IL to access this field using ldflda or
+                    // ldfld. For example:
+                    //
+                    //  .class public auto ansi sealed mynamespace.e_t extends [mscorlib]System.Enum
+                    //  {
+                    //    .field public specialname rtspecialname int16 value__
+                    //    .field public static literal valuetype mynamespace.e_t one = int16(0x0000)
+                    //  }
+                    //  .method public hidebysig static void  Main() cil managed
+                    //  {
+                    //     .locals init (valuetype mynamespace.e_t V_0)
+                    //     ...
+                    //     ldloca.s   V_0
+                    //     ldflda     int16 mynamespace.e_t::value__
+                    //     ...
+                    //  }
+                    //
+                    // Normally, compilers will not generate the ldflda, since it is superfluous.
+                    //
+                    // In the example, the lclVar is short, but the JIT promotes all trees using this local to the
+                    // "actual type", that is, INT. But the GT_FIELD is still SHORT. So, in the case of a type
+                    // mismatch like this, don't do this morphing. The local var may end up getting marked as
+                    // address taken, and the appropriate SHORT load will be done from memory in that case.
+
+                    if (tree->TypeGet() == obj->TypeGet())
+                    {
+                        tree->ChangeOper(GT_LCL_VAR);
+                        tree->gtLclVarCommon.SetLclNum(lclNum);
+                        tree->gtFlags &= GTF_NODE_MASK;
+
+                        GenTreePtr parent = fgWalkPre->parentStack->Index(1);
+                        if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
+                        {
+                            tree->gtFlags |= GTF_VAR_DEF;
+                            tree->gtFlags |= GTF_DONT_CSE;
+                        }
+#ifdef DEBUG
+                        if (verbose)
+                        {
+                            printf("Replacing the field in normed struct with the local var:\n");
+                            fgWalkPre->printModified = true;
+                        }
+#endif // DEBUG
+                        return WALK_SKIP_SUBTREES;
+                    }
+                }
+            }
+        }
+    }
+
+    return WALK_CONTINUE;
+}
+
+Compiler::fgWalkResult Compiler::fgMorphLocalField(GenTreePtr tree, fgWalkData* fgWalkPre)
+{
+    noway_assert(tree->OperGet() == GT_LCL_FLD);
+
+    unsigned   lclNum = tree->gtLclFld.gtLclNum;
+    LclVarDsc* varDsc = &lvaTable[lclNum];
+
+    if (varTypeIsStruct(varDsc) && (varDsc->lvPromoted))
+    {
+        // Promoted struct
+        unsigned   fldOffset     = tree->gtLclFld.gtLclOffs;
+        unsigned   fieldLclIndex = 0;
+        LclVarDsc* fldVarDsc     = nullptr;
+
+        if (fldOffset != BAD_VAR_NUM)
+        {
+            fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
+            noway_assert(fieldLclIndex != BAD_VAR_NUM);
+            fldVarDsc = &lvaTable[fieldLclIndex];
+        }
+
+        if (fldOffset != BAD_VAR_NUM && genTypeSize(fldVarDsc->TypeGet()) == genTypeSize(tree->gtType)
+#ifdef _TARGET_X86_
+            && varTypeIsFloating(fldVarDsc->TypeGet()) == varTypeIsFloating(tree->gtType)
+#endif
+                )
+        {
+            // There is an existing sub-field we can use
+            tree->gtLclFld.SetLclNum(fieldLclIndex);
+
+            // We need to keep the types 'compatible'.  If we can switch back to a GT_LCL_VAR
+            CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_ARM_
+            assert(varTypeIsIntegralOrI(tree->TypeGet()) || varTypeIsFloating(tree->TypeGet()));
+#else
+            assert(varTypeIsIntegralOrI(tree->TypeGet()));
+#endif
+            if (varTypeCanReg(fldVarDsc->TypeGet()))
+            {
+                // If the type is integer-ish, then we can use it as-is
+                tree->ChangeOper(GT_LCL_VAR);
+                assert(tree->gtLclVarCommon.gtLclNum == fieldLclIndex);
+                tree->gtType = fldVarDsc->TypeGet();
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("Replacing the GT_LCL_FLD in promoted struct with a local var:\n");
+                    fgWalkPre->printModified = true;
+                }
+#endif // DEBUG
+            }
+
+            GenTreePtr parent = fgWalkPre->parentStack->Index(1);
+            if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
+            {
+                tree->gtFlags |= GTF_VAR_DEF;
+                tree->gtFlags |= GTF_DONT_CSE;
+            }
+        }
+        else
+        {
+            // There is no existing field that has all the parts that we need
+            // So we must ensure that the struct lives in memory.
+            lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
+
+#ifdef DEBUG
+            // We can't convert this guy to a float because he really does have his
+            // address taken..
+            varDsc->lvKeepType = 1;
+#endif // DEBUG
+        }
+
+        return WALK_SKIP_SUBTREES;
+    }
+
+    return WALK_CONTINUE;
+}
+
+/*****************************************************************************
+ *
+ *  Mark irregular parameters.  For x64 this is 3, 5, 6, 7, >8 byte structs that are passed by reference.
+ *  For ARM64, this is structs larger than 16 bytes that are also not HFAs that are passed by reference.
+ */
+void Compiler::fgMarkImplicitByRefArgs()
+{
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\n*************** In fgMarkImplicitByRefs()\n");
+    }
+#endif // DEBUG
+
+    for (unsigned lclNum = 0; lclNum < lvaCount; lclNum++)
+    {
+        LclVarDsc* varDsc = &lvaTable[lclNum];
+
+        assert(!varDsc->lvPromoted); // Called in the wrong order?
+
+        if (varDsc->lvIsParam && varTypeIsStruct(varDsc))
+        {
+            size_t size;
+
+            if (varDsc->lvSize() > REGSIZE_BYTES)
+            {
+                size = varDsc->lvSize();
+            }
+            else
+            {
+                CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
+                size                         = info.compCompHnd->getClassSize(typeHnd);
+            }
+
+#if !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+#if defined(_TARGET_AMD64_)
+            if (size > REGSIZE_BYTES || (size & (size - 1)) != 0)
+#elif defined(_TARGET_ARM64_)
+            if ((size > TARGET_POINTER_SIZE) && !lvaIsMultiregStruct(varDsc))
+#endif
+            {
+                // Previously nobody was ever setting lvIsParam and lvIsTemp on the same local
+                // So I am now using it to indicate that this is one of the weird implicit
+                // by ref locals.
+                // The address taken cleanup will look for references to locals marked like
+                // this, and transform them appropriately.
+                varDsc->lvIsTemp = 1;
+
+                // Also marking them as BYREF will hide them from struct promotion.
+                varDsc->lvType   = TYP_BYREF;
+                varDsc->lvRefCnt = 0;
+
+                // Since this previously was a TYP_STRUCT and we have changed it to a TYP_BYREF
+                // make sure that the following flag is not set as these will force SSA to
+                // exclude tracking/enregistering these LclVars. (see fgExcludeFromSsa)
+                //
+                varDsc->lvOverlappingFields = 0; // This flag could have been set, clear it.
+
+#ifdef DEBUG
+                // This should not be converted to a double in stress mode,
+                // because it is really a pointer
+                varDsc->lvKeepType = 1;
+
+                if (verbose)
+                {
+                    printf("Changing the lvType for struct parameter V%02d to TYP_BYREF.\n", lclNum);
+                }
+#endif // DEBUG
+            }
+#endif // !FEATURE_UNIX_AMD64_STRUCT_PASSING
+        }
+    }
+
+#endif // _TARGET_AMD64_ || _TARGET_ARM64_
+}
+
+/*****************************************************************************
+ *
+ *  Morph irregular parameters
+ *    for x64 and ARM64 this means turning them into byrefs, adding extra indirs.
+ */
+bool Compiler::fgMorphImplicitByRefArgs(GenTreePtr* pTree, fgWalkData* fgWalkPre)
+{
+#if !defined(_TARGET_AMD64_) && !defined(_TARGET_ARM64_)
+
+    return false;
+
+#else // _TARGET_AMD64_ || _TARGET_ARM64_
+
+    GenTree* tree = *pTree;
+    assert((tree->gtOper == GT_LCL_VAR) || ((tree->gtOper == GT_ADDR) && (tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)));
+
+    bool       isAddr     = (tree->gtOper == GT_ADDR);
+    GenTreePtr lclVarTree = isAddr ? tree->gtOp.gtOp1 : tree;
+    unsigned   lclNum     = lclVarTree->gtLclVarCommon.gtLclNum;
+    LclVarDsc* lclVarDsc  = &lvaTable[lclNum];
+
+    if (!lvaIsImplicitByRefLocal(lclNum))
+    {
+        // We only need to tranform the 'marked' implicit by ref parameters
+        return false;
+    }
+
+    // The SIMD transformation to coalesce contiguous references to SIMD vector fields will
+    // re-invoke the traversal to mark address-taken locals.
+    // So, we may encounter a tree that has already been transformed to TYP_BYREF.
+    // If we do, leave it as-is.
+    if (!varTypeIsStruct(lclVarTree))
+    {
+        assert(lclVarTree->TypeGet() == TYP_BYREF);
+        return false;
+    }
+
+    // We are overloading the lvRefCnt field here because real ref counts have not been set.
+    lclVarDsc->lvRefCnt++;
+
+    // This is no longer a def of the lclVar, even if it WAS a def of the struct.
+    lclVarTree->gtFlags &= ~(GTF_LIVENESS_MASK);
+
+    if (isAddr)
+    {
+        // change &X into just plain X
+        tree->CopyFrom(lclVarTree, this);
+        tree->gtType = TYP_BYREF;
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("Replacing address of implicit by ref struct parameter with byref:\n");
+            fgWalkPre->printModified = true;
+        }
+#endif // DEBUG
+    }
+    else
+    {
+        // Change X into OBJ(X)
+        var_types structType = tree->gtType;
+        tree->gtType         = TYP_BYREF;
+        tree                 = gtNewObjNode(lclVarDsc->lvVerTypeInfo.GetClassHandle(), tree);
+        if (structType == TYP_STRUCT)
+        {
+            gtSetObjGcInfo(tree->AsObj());
+        }
+
+        // TODO-CQ: If the VM ever stops violating the ABI and passing heap references
+        // we could remove TGTANYWHERE
+        tree->gtFlags = ((tree->gtFlags & GTF_COMMON_MASK) | GTF_IND_TGTANYWHERE);
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("Replacing value of implicit by ref struct parameter with indir of parameter:\n");
+            gtDispTree(tree, nullptr, nullptr, true);
+            fgWalkPre->printModified = true;
+        }
+#endif // DEBUG
+    }
+
+    *pTree = tree;
+    return true;
+
+#endif // _TARGET_AMD64_ || _TARGET_ARM64_
+}
+
+// An "AddrExposedContext" expresses the calling context in which an address expression occurs.
+enum AddrExposedContext
+{
+    AXC_None,     // None of the below seen yet.
+    AXC_Ind,      // The address being computed is to be dereferenced.
+    AXC_Addr,     // We're computing a raw address (not dereferenced, at least not immediately).
+    AXC_IndWide,  // A block operation dereferenced an address referencing more bytes than the address
+                  // addresses -- if the address addresses a field of a struct local, we need to consider
+                  // the entire local address taken (not just the field).
+    AXC_AddrWide, // The address being computed will be dereferenced by a block operation that operates
+                  // on more bytes than the width of the storage location addressed.  If this is a
+                  // field of a promoted struct local, declare the entire struct local address-taken.
+    AXC_InitBlk,  // An GT_INITBLK is the immediate parent.  The first argument is in an IND context.
+    AXC_CopyBlk,  // An GT_COPYBLK is the immediate parent.  The first argument is in a GT_LIST, whose
+                  // args should be evaluated in an IND context.
+    AXC_IndAdd,   // A GT_ADD is the immediate parent, and it was evaluated in an IND contxt.
+                  // If one arg is a constant int, evaluate the other in an IND context.  Otherwise, none.
+};
+
+typedef ArrayStack<AddrExposedContext> AXCStack;
+
+// We use pre-post to simulate passing an argument in a recursion, via a stack.
+Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPostCB(GenTreePtr* pTree, fgWalkData* fgWalkPre)
+{
+    AXCStack* axcStack = reinterpret_cast<AXCStack*>(fgWalkPre->pCallbackData);
+    (void)axcStack->Pop();
+    return WALK_CONTINUE;
+}
+
+Compiler::fgWalkResult Compiler::fgMarkAddrTakenLocalsPreCB(GenTreePtr* pTree, fgWalkData* fgWalkPre)
+{
+    GenTreePtr         tree     = *pTree;
+    Compiler*          comp     = fgWalkPre->compiler;
+    AXCStack*          axcStack = reinterpret_cast<AXCStack*>(fgWalkPre->pCallbackData);
+    AddrExposedContext axc      = axcStack->Top();
+
+    // In some situations, we have to figure out what the effective context is in which to
+    // evaluate the current tree, depending on which argument position it is in its parent.
+
+    switch (axc)
+    {
+
+        case AXC_IndAdd:
+        {
+            GenTreePtr parent = fgWalkPre->parentStack->Index(1);
+            assert(parent->OperGet() == GT_ADD);
+            // Is one of the args a constant representing a field offset,
+            // and is this the other?  If so, Ind context.
+            if (parent->gtOp.gtOp1->IsCnsIntOrI() && parent->gtOp.gtOp2 == tree)
+            {
+                axc = AXC_Ind;
+            }
+            else if (parent->gtOp.gtOp2->IsCnsIntOrI() && parent->gtOp.gtOp1 == tree)
+            {
+                axc = AXC_Ind;
+            }
+            else
+            {
+                axc = AXC_None;
+            }
+        }
+        break;
+
+        default:
+            break;
+    }
+
+    // Now recurse properly for the tree.
+    switch (tree->gtOper)
+    {
+        case GT_IND:
+            if (axc != AXC_Addr)
+            {
+                axcStack->Push(AXC_Ind);
+            }
+            else
+            {
+                axcStack->Push(AXC_None);
+            }
+            return WALK_CONTINUE;
+
+        case GT_BLK:
+        case GT_OBJ:
+            if (axc == AXC_Addr)
+            {
+                axcStack->Push(AXC_None);
+            }
+            else if (tree->TypeGet() == TYP_STRUCT)
+            {
+                // The block operation will derefence its argument(s) -- usually.  If the size of the initblk
+                // or copyblk exceeds the size of a storage location whose address is used as one of the
+                // arguments, then we have to consider that storage location (indeed, it's underlying containing
+                // location) to be address taken.  So get the width of the initblk or copyblk.
+
+                GenTreePtr  parent = fgWalkPre->parentStack->Index(1);
+                GenTreeBlk* blk    = tree->AsBlk();
+                unsigned    width  = blk->gtBlkSize;
+                noway_assert(width != 0);
+                axc           = AXC_Ind;
+                GenTree* addr = blk->Addr();
+                if (addr->OperGet() == GT_ADDR)
+                {
+                    if (parent->gtOper == GT_ASG)
+                    {
+                        if ((tree == parent->gtOp.gtOp1) &&
+                            ((width == 0) || !comp->fgFitsInOrNotLoc(addr->gtGetOp1(), width)))
+                        {
+                            axc = AXC_IndWide;
+                        }
+                    }
+                    else
+                    {
+                        assert(parent->gtOper == GT_CALL);
+                    }
+                }
+                axcStack->Push(axc);
+            }
+            else
+            {
+                // This is like a regular GT_IND.
+                axcStack->Push(AXC_Ind);
+            }
+            return WALK_CONTINUE;
+
+        case GT_DYN_BLK:
+            // Assume maximal width.
+            axcStack->Push(AXC_IndWide);
+            return WALK_CONTINUE;
+
+        case GT_LIST:
+            if (axc == AXC_InitBlk || axc == AXC_CopyBlk)
+            {
+                axcStack->Push(axc);
+            }
+            else
+            {
+                axcStack->Push(AXC_None);
+            }
+            return WALK_CONTINUE;
+
+        case GT_INDEX:
+            // Taking the address of an array element never takes the address of a local.
+            axcStack->Push(AXC_None);
+            return WALK_CONTINUE;
+
+        case GT_ADDR:
+            // If we have ADDR(lcl), and "lcl" is an implicit byref parameter, fgMorphImplicitByRefArgs will
+            // convert to just "lcl".  This is never an address-context use, since the local is already a
+            // byref after this transformation.
+            if (tree->gtOp.gtOp1->OperGet() == GT_LCL_VAR && comp->fgMorphImplicitByRefArgs(pTree, fgWalkPre))
+            {
+                // Push something to keep the PostCB, which will pop it, happy.
+                axcStack->Push(AXC_None);
+                // In the first case, tree may no longer be a leaf, but we're done with it; is a leaf in the second
+                // case.
+                return WALK_SKIP_SUBTREES;
+            }
+#ifdef FEATURE_SIMD
+            if (tree->gtOp.gtOp1->OperGet() == GT_SIMD)
+            {
+                axcStack->Push(AXC_None);
+            }
+            else
+#endif // FEATURE_SIMD
+                if (axc == AXC_Ind)
+            {
+                axcStack->Push(AXC_None);
+            }
+            else if (axc == AXC_IndWide)
+            {
+                axcStack->Push(AXC_AddrWide);
+            }
+            else
+            {
+                assert(axc == AXC_None);
+                axcStack->Push(AXC_Addr);
+            }
+            return WALK_CONTINUE;
+
+        case GT_FIELD:
+            // First, handle a couple of special cases: field of promoted struct local, field
+            // of "normed" struct.
+            if (comp->fgMorphStructField(tree, fgWalkPre) == WALK_SKIP_SUBTREES)
+            {
+                // It (may have) replaced the field with a local var or local field.  If we're in an addr context,
+                // label it addr-taken.
+                if (tree->OperIsLocal() && (axc == AXC_Addr || axc == AXC_AddrWide))
+                {
+                    unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
+                    comp->lvaSetVarAddrExposed(lclNum);
+                    if (axc == AXC_AddrWide)
+                    {
+                        LclVarDsc* varDsc = &comp->lvaTable[lclNum];
+                        if (varDsc->lvIsStructField)
+                        {
+                            comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
+                        }
+                    }
+                }
+                // Push something to keep the PostCB, which will pop it, happy.
+                axcStack->Push(AXC_None);
+                return WALK_SKIP_SUBTREES;
+            }
+            else
+            {
+                // GT_FIELD is an implicit deref.
+                if (axc == AXC_Addr)
+                {
+                    axcStack->Push(AXC_None);
+                }
+                else if (axc == AXC_AddrWide)
+                {
+                    axcStack->Push(AXC_IndWide);
+                }
+                else
+                {
+                    axcStack->Push(AXC_Ind);
+                }
+                return WALK_CONTINUE;
+            }
+
+        case GT_LCL_FLD:
+        {
+            assert(axc != AXC_Addr);
+            // This recognizes certain forms, and does all the work.  In that case, returns WALK_SKIP_SUBTREES,
+            // else WALK_CONTINUE.  We do the same here.
+            fgWalkResult res = comp->fgMorphLocalField(tree, fgWalkPre);
+            if (res == WALK_SKIP_SUBTREES && tree->OperGet() == GT_LCL_VAR && (axc == AXC_Addr || axc == AXC_AddrWide))
+            {
+                unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
+                comp->lvaSetVarAddrExposed(lclNum);
+                if (axc == AXC_AddrWide)
+                {
+                    LclVarDsc* varDsc = &comp->lvaTable[lclNum];
+                    if (varDsc->lvIsStructField)
+                    {
+                        comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
+                    }
+                }
+            }
+            // Must push something; if res is WALK_SKIP_SUBTREES, doesn't matter
+            // what, but something to be popped by the post callback.  If we're going
+            // to analyze children, the LCL_FLD creates an Ind context, so use that.
+            axcStack->Push(AXC_Ind);
+            return res;
+        }
+
+        case GT_LCL_VAR:
+            // On some architectures, some arguments are passed implicitly by reference.
+            // Modify the trees to reflect that, if this local is one of those.
+            if (comp->fgMorphImplicitByRefArgs(pTree, fgWalkPre))
+            {
+                // We can't be in an address context; the ADDR(lcl), where lcl is an implicit byref param, was
+                // handled earlier.  (And we can't have added anything to this address, since it was implicit.)
+                assert(axc != AXC_Addr);
+            }
+            else
+            {
+                if (axc == AXC_Addr || axc == AXC_AddrWide)
+                {
+                    unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
+                    comp->lvaSetVarAddrExposed(lclNum);
+                    if (axc == AXC_AddrWide)
+                    {
+                        LclVarDsc* varDsc = &comp->lvaTable[lclNum];
+                        if (varDsc->lvIsStructField)
+                        {
+                            comp->lvaSetVarAddrExposed(varDsc->lvParentLcl);
+                        }
+                    }
+
+                    // We may need to Quirk the storage size for this LCL_VAR
+                    // some PInvoke signatures incorrectly specify a ByRef to an INT32
+                    // when they actually write a SIZE_T or INT64
+                    if (axc == AXC_Addr)
+                    {
+                        comp->gtCheckQuirkAddrExposedLclVar(tree, fgWalkPre->parentStack);
+                    }
+                }
+            }
+            // Push something to keep the PostCB, which will pop it, happy.
+            axcStack->Push(AXC_None);
+            // In the first case, tree may no longer be a leaf, but we're done with it; is a leaf in the second case.
+            return WALK_SKIP_SUBTREES;
+
+        case GT_ADD:
+            assert(axc != AXC_Addr);
+            // See below about treating pointer operations as wider indirection.
+            if (tree->gtOp.gtOp1->gtType == TYP_BYREF || tree->gtOp.gtOp2->gtType == TYP_BYREF)
+            {
+                axcStack->Push(AXC_IndWide);
+            }
+            else if (axc == AXC_Ind)
+            {
+                // Let the children know that the parent was a GT_ADD, to be evaluated in an IND context.
+                // If it's an add of a constant and an address, and the constant represents a field,
+                // then we'll evaluate the address argument in an Ind context; otherwise, the None context.
+                axcStack->Push(AXC_IndAdd);
+            }
+            else
+            {
+                axcStack->Push(axc);
+            }
+            return WALK_CONTINUE;
+
+        // !!! Treat Pointer Operations as Wider Indirection
+        //
+        // If we are performing pointer operations, make sure we treat that as equivalent to a wider
+        // indirection. This is because the pointers could be pointing to the address of struct fields
+        // and could be used to perform operations on the whole struct or passed to another method.
+        //
+        // When visiting a node in this pre-order walk, we do not know if we would in the future
+        // encounter a GT_ADDR of a GT_FIELD below.
+        //
+        // Note: GT_ADDR of a GT_FIELD is always a TYP_BYREF.
+        // So let us be conservative and treat TYP_BYREF operations as AXC_IndWide and propagate a
+        // wider indirection context down the expr tree.
+        //
+        // Example, in unsafe code,
+        //
+        //   IL_000e  12 00             ldloca.s     0x0
+        //   IL_0010  7c 02 00 00 04    ldflda       0x4000002
+        //   IL_0015  12 00             ldloca.s     0x0
+        //   IL_0017  7c 01 00 00 04    ldflda       0x4000001
+        //   IL_001c  59                sub
+        //
+        // When visiting the GT_SUB node, if the types of either of the GT_SUB's operand are BYREF, then
+        // consider GT_SUB to be equivalent of an AXC_IndWide.
+        //
+        // Similarly for pointer comparisons and pointer escaping as integers through conversions, treat
+        // them as AXC_IndWide.
+        //
+
+        // BINOP
+        case GT_SUB:
+        case GT_MUL:
+        case GT_DIV:
+        case GT_UDIV:
+        case GT_OR:
+        case GT_XOR:
+        case GT_AND:
+        case GT_LSH:
+        case GT_RSH:
+        case GT_RSZ:
+        case GT_ROL:
+        case GT_ROR:
+        case GT_EQ:
+        case GT_NE:
+        case GT_LT:
+        case GT_LE:
+        case GT_GT:
+        case GT_GE:
+        // UNOP
+        case GT_CAST:
+            if ((tree->gtOp.gtOp1->gtType == TYP_BYREF) ||
+                (tree->OperIsBinary() && (tree->gtOp.gtOp2->gtType == TYP_BYREF)))
+            {
+                axcStack->Push(AXC_IndWide);
+                return WALK_CONTINUE;
+            }
+            __fallthrough;
+
+        default:
+            // To be safe/conservative: pass Addr through, but not Ind -- otherwise, revert to "None".  We must
+            // handle the "Ind" propogation explicitly above.
+            if (axc == AXC_Addr || axc == AXC_AddrWide)
+            {
+                axcStack->Push(axc);
+            }
+            else
+            {
+                axcStack->Push(AXC_None);
+            }
+            return WALK_CONTINUE;
+    }
+}
+
+bool Compiler::fgFitsInOrNotLoc(GenTreePtr tree, unsigned width)
+{
+    if (tree->TypeGet() != TYP_STRUCT)
+    {
+        return width <= genTypeSize(tree->TypeGet());
+    }
+    else if (tree->OperGet() == GT_LCL_VAR)
+    {
+        assert(tree->TypeGet() == TYP_STRUCT);
+        unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
+        return width <= lvaTable[lclNum].lvExactSize;
+    }
+    else if (tree->OperGet() == GT_FIELD)
+    {
+        CORINFO_CLASS_HANDLE fldClass = info.compCompHnd->getFieldClass(tree->gtField.gtFldHnd);
+        return width <= info.compCompHnd->getClassSize(fldClass);
+    }
+    else if (tree->OperGet() == GT_INDEX)
+    {
+        return width <= tree->gtIndex.gtIndElemSize;
+    }
+    else
+    {
+        return false;
+    }
+}
+
+void Compiler::fgAddFieldSeqForZeroOffset(GenTreePtr op1, FieldSeqNode* fieldSeq)
+{
+    assert(op1->TypeGet() == TYP_BYREF || op1->TypeGet() == TYP_I_IMPL || op1->TypeGet() == TYP_REF);
+
+    switch (op1->OperGet())
+    {
+        case GT_ADDR:
+            if (op1->gtOp.gtOp1->OperGet() == GT_LCL_FLD)
+            {
+                GenTreeLclFld* lclFld = op1->gtOp.gtOp1->AsLclFld();
+                lclFld->gtFieldSeq    = GetFieldSeqStore()->Append(lclFld->gtFieldSeq, fieldSeq);
+            }
+            break;
+
+        case GT_ADD:
+            if (op1->gtOp.gtOp1->OperGet() == GT_CNS_INT)
+            {
+                FieldSeqNode* op1Fs = op1->gtOp.gtOp1->gtIntCon.gtFieldSeq;
+                if (op1Fs != nullptr)
+                {
+                    op1Fs                                = GetFieldSeqStore()->Append(op1Fs, fieldSeq);
+                    op1->gtOp.gtOp1->gtIntCon.gtFieldSeq = op1Fs;
+                }
+            }
+            else if (op1->gtOp.gtOp2->OperGet() == GT_CNS_INT)
+            {
+                FieldSeqNode* op2Fs = op1->gtOp.gtOp2->gtIntCon.gtFieldSeq;
+                if (op2Fs != nullptr)
+                {
+                    op2Fs                                = GetFieldSeqStore()->Append(op2Fs, fieldSeq);
+                    op1->gtOp.gtOp2->gtIntCon.gtFieldSeq = op2Fs;
+                }
+            }
+            break;
+
+        case GT_CNS_INT:
+        {
+            FieldSeqNode* op1Fs = op1->gtIntCon.gtFieldSeq;
+            if (op1Fs != nullptr)
+            {
+                op1Fs                    = GetFieldSeqStore()->Append(op1Fs, fieldSeq);
+                op1->gtIntCon.gtFieldSeq = op1Fs;
+            }
+        }
+        break;
+
+        default:
+            // Record in the general zero-offset map.
+            GetZeroOffsetFieldMap()->Set(op1, fieldSeq);
+            break;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Mark address-taken locals.
+ */
+
+void Compiler::fgMarkAddressExposedLocals()
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\n*************** In fgMarkAddressExposedLocals()\n");
+    }
+#endif // DEBUG
+
+    BasicBlock* block = fgFirstBB;
+    noway_assert(block);
+
+    do
+    {
+        /* Make the current basic block address available globally */
+
+        compCurBB = block;
+
+        GenTreePtr stmt;
+
+        for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+        {
+            // Call Compiler::fgMarkAddrTakenLocalsCB on each node
+            AXCStack stk(this);
+            stk.Push(AXC_None); // We start in neither an addr or ind context.
+            fgWalkTree(&stmt->gtStmt.gtStmtExpr, fgMarkAddrTakenLocalsPreCB, fgMarkAddrTakenLocalsPostCB, &stk);
+        }
+
+        block = block->bbNext;
+
+    } while (block);
+}
+
+// fgNodesMayInterfere:
+//   return true if moving nodes relative to each other can change the result of a computation
+//
+// args:
+//   read: a node which reads
+//
+
+bool Compiler::fgNodesMayInterfere(GenTree* write, GenTree* read)
+{
+    LclVarDsc* srcVar = nullptr;
+
+    bool readIsIndir  = read->OperIsIndir() || read->OperIsImplicitIndir();
+    bool writeIsIndir = write->OperIsIndir() || write->OperIsImplicitIndir();
+
+    if (read->OperIsLocal())
+    {
+        srcVar = &lvaTable[read->gtLclVarCommon.gtLclNum];
+    }
+
+    if (writeIsIndir)
+    {
+        if (srcVar && srcVar->lvAddrExposed)
+        {
+            return true;
+        }
+        else if (readIsIndir)
+        {
+            return true;
+        }
+        return false;
+    }
+    else if (write->OperIsLocal())
+    {
+        LclVarDsc* dstVar = &lvaTable[write->gtLclVarCommon.gtLclNum];
+        if (readIsIndir)
+        {
+            return dstVar->lvAddrExposed;
+        }
+        else if (read->OperIsLocal())
+        {
+            if (read->gtLclVarCommon.gtLclNum == write->gtLclVarCommon.gtLclNum)
+            {
+                return true;
+            }
+            return false;
+        }
+        else
+        {
+            return false;
+        }
+    }
+    else
+    {
+        return false;
+    }
+}
+
+/** This predicate decides whether we will fold a tree with the structure:
+ *  x = x <op> y where x could be any arbitrary expression into
+ *  x <op>= y.
+ *
+ *  This modification is only performed when the target architecture supports
+ *  complex addressing modes.  In the case of ARM for example, this transformation
+ *  yields no benefit.
+ *
+ *  In case this functions decides we can proceed to fold into an assignment operator
+ *  we need to inspect whether the operator is commutative to tell fgMorph whether we need to
+ *  reverse the tree due to the fact we saw x = y <op> x and we want to fold that into
+ *  x <op>= y because the operator property.
+ */
+bool Compiler::fgShouldCreateAssignOp(GenTreePtr tree, bool* bReverse)
+{
+#if CPU_LOAD_STORE_ARCH
+    /* In the case of a load/store architecture, there's no gain by doing any of this, we bail. */
+    return false;
+#elif !defined(LEGACY_BACKEND)
+    return false;
+#else  // defined(LEGACY_BACKEND)
+
+    GenTreePtr op1  = tree->gtOp.gtOp1;
+    GenTreePtr op2  = tree->gtGetOp2();
+    genTreeOps cmop = op2->OperGet();
+
+    /* Is the destination identical to the first RHS sub-operand? */
+    if (GenTree::Compare(op1, op2->gtOp.gtOp1))
+    {
+        /*
+        Do not transform the following tree
+
+        [0024CFA4] -----------               const     int    1
+        [0024CFDC] ----G------               |         int
+        [0024CF5C] -----------               lclVar    ubyte  V01 tmp0
+        [0024D05C] -A--G------               =         ubyte
+        [0024D014] D------N---               lclVar    ubyte  V01 tmp0
+
+        to
+
+        [0024CFA4] -----------               const     int    1
+        [0024D05C] -A--G------               |=        ubyte
+        [0024D014] U------N---               lclVar    ubyte  V01 tmp0
+
+        , when V01 is a struct field local.
+        */
+
+        if (op1->gtOper == GT_LCL_VAR && varTypeIsSmall(op1->TypeGet()) && op1->TypeGet() != op2->gtOp.gtOp2->TypeGet())
+        {
+            unsigned   lclNum = op1->gtLclVarCommon.gtLclNum;
+            LclVarDsc* varDsc = lvaTable + lclNum;
+
+            if (varDsc->lvIsStructField)
+            {
+                return false;
+            }
+        }
+
+        *bReverse = false;
+        return true;
+    }
+    else if (GenTree::OperIsCommutative(cmop))
+    {
+        /* For commutative ops only, check for "a = x <op> a" */
+
+        /* Should we be doing this at all? */
+        if ((opts.compFlags & CLFLG_TREETRANS) == 0)
+        {
+            return false;
+        }
+
+        /* Can we swap the operands to cmop ... */
+        if ((op2->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT) && (op2->gtOp.gtOp2->gtFlags & GTF_ALL_EFFECT))
+        {
+            // Both sides must have side effects to prevent swap */
+            return false;
+        }
+
+        /* Is the destination identical to the second RHS sub-operand? */
+        if (GenTree::Compare(op1, op2->gtOp.gtOp2))
+        {
+            *bReverse = true;
+            return true;
+        }
+    }
+    return false;
+#endif // defined(LEGACY_BACKEND)
+}
+
+// Static variables.
+Compiler::MorphAddrContext Compiler::s_CopyBlockMAC(Compiler::MACK_CopyBlock);
+
+#ifdef FEATURE_SIMD
+
+//-----------------------------------------------------------------------------------
+// fgMorphCombineSIMDFieldAssignments:
+//  If the RHS of the input stmt is a read for simd vector X Field, then this function
+//  will keep reading next few stmts based on the vector size(2, 3, 4).
+//  If the next stmts LHS are located contiguous and RHS are also located
+//  contiguous, then we replace those statements with a copyblk.
+//
+// Argument:
+//  block - BasicBlock*. block which stmt belongs to
+//  stmt  - GenTreeStmt*. the stmt node we want to check
+//
+// return value:
+//  if this funciton successfully optimized the stmts, then return true. Otherwise
+//  return false;
+
+bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, GenTreePtr stmt)
+{
+
+    noway_assert(stmt->gtOper == GT_STMT);
+    GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+    assert(tree->OperGet() == GT_ASG);
+
+    GenTreePtr originalLHS    = tree->gtOp.gtOp1;
+    GenTreePtr prevLHS        = tree->gtOp.gtOp1;
+    GenTreePtr prevRHS        = tree->gtOp.gtOp2;
+    unsigned   index          = 0;
+    var_types  baseType       = TYP_UNKNOWN;
+    unsigned   simdSize       = 0;
+    GenTreePtr simdStructNode = getSIMDStructFromField(prevRHS, &baseType, &index, &simdSize, true);
+
+    if (simdStructNode == nullptr || index != 0 || baseType != TYP_FLOAT)
+    {
+        // if the RHS is not from a SIMD vector field X, then there is no need to check further.
+        return false;
+    }
+
+    var_types  simdType             = getSIMDTypeForSize(simdSize);
+    int        assignmentsCount     = simdSize / genTypeSize(baseType) - 1;
+    int        remainingAssignments = assignmentsCount;
+    GenTreePtr curStmt              = stmt->gtNext;
+    GenTreePtr lastStmt             = stmt;
+
+    while (curStmt != nullptr && remainingAssignments > 0)
+    {
+        GenTreePtr exp = curStmt->gtStmt.gtStmtExpr;
+        if (exp->OperGet() != GT_ASG)
+        {
+            break;
+        }
+        GenTreePtr curLHS = exp->gtGetOp1();
+        GenTreePtr curRHS = exp->gtGetOp2();
+
+        if (!areArgumentsContiguous(prevLHS, curLHS) || !areArgumentsContiguous(prevRHS, curRHS))
+        {
+            break;
+        }
+
+        remainingAssignments--;
+        prevLHS = curLHS;
+        prevRHS = curRHS;
+
+        lastStmt = curStmt;
+        curStmt  = curStmt->gtNext;
+    }
+
+    if (remainingAssignments > 0)
+    {
+        // if the left assignments number is bigger than zero, then this means
+        // that the assignments are not assgining to the contiguously memory
+        // locations from same vector.
+        return false;
+    }
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nFound contiguous assignments from a SIMD vector to memory.\n");
+        printf("From BB%02u, stmt", block->bbNum);
+        printTreeID(stmt);
+        printf(" to stmt");
+        printTreeID(lastStmt);
+        printf("\n");
+    }
+#endif
+
+    for (int i = 0; i < assignmentsCount; i++)
+    {
+        fgRemoveStmt(block, stmt->gtNext);
+    }
+
+    GenTree* copyBlkDst = createAddressNodeForSIMDInit(originalLHS, simdSize);
+    if (simdStructNode->OperIsLocal())
+    {
+        setLclRelatedToSIMDIntrinsic(simdStructNode);
+    }
+    GenTree* copyBlkAddr = copyBlkDst;
+    if (copyBlkAddr->gtOper == GT_LEA)
+    {
+        copyBlkAddr = copyBlkAddr->AsAddrMode()->Base();
+    }
+    GenTreeLclVarCommon* localDst = nullptr;
+    if (copyBlkAddr->IsLocalAddrExpr(this, &localDst, nullptr))
+    {
+        setLclRelatedToSIMDIntrinsic(localDst);
+    }
+
+    GenTree* simdStructAddr;
+    if (simdStructNode->TypeGet() == TYP_BYREF)
+    {
+        assert(simdStructNode->OperIsLocal());
+        assert(lvaIsImplicitByRefLocal(simdStructNode->AsLclVarCommon()->gtLclNum));
+        simdStructNode = gtNewOperNode(GT_IND, simdType, simdStructNode);
+    }
+    else
+    {
+        assert(varTypeIsSIMD(simdStructNode));
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nBB%02u stmt", block->bbNum);
+        printTreeID(stmt);
+        printf("(before)\n");
+        gtDispTree(stmt);
+    }
+#endif
+
+    // TODO-1stClassStructs: we should be able to simply use a GT_IND here.
+    GenTree* blkNode = gtNewBlockVal(copyBlkDst, simdSize);
+    blkNode->gtType  = simdType;
+    tree             = gtNewBlkOpNode(blkNode, simdStructNode, simdSize,
+                          false, // not volatile
+                          true); // copyBlock
+
+    stmt->gtStmt.gtStmtExpr = tree;
+
+    // Since we generated a new address node which didn't exist before,
+    // we should expose this address manually here.
+    AXCStack stk(this);
+    stk.Push(AXC_None);
+    fgWalkTree(&stmt->gtStmt.gtStmtExpr, fgMarkAddrTakenLocalsPreCB, fgMarkAddrTakenLocalsPostCB, &stk);
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nReplaced BB%02u stmt", block->bbNum);
+        printTreeID(stmt);
+        printf("(after)\n");
+        gtDispTree(stmt);
+    }
+#endif
+    return true;
+}
+
+#endif // FEATURE_SIMD
diff --git a/src/jit/nodeinfo.h b/src/jit/nodeinfo.h
new file mode 100644
index 0000000000..a73033a91f
--- /dev/null
+++ b/src/jit/nodeinfo.h
@@ -0,0 +1,161 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef _NODEINFO_H_
+#define _NODEINFO_H_
+
+struct GenTree;
+
+class LinearScan;
+typedef unsigned int LsraLocation;
+
+class TreeNodeInfo
+{
+public:
+    TreeNodeInfo()
+    {
+        loc                 = 0;
+        _dstCount           = 0;
+        _srcCount           = 0;
+        _internalIntCount   = 0;
+        _internalFloatCount = 0;
+
+        srcCandsIndex         = 0;
+        dstCandsIndex         = 0;
+        internalCandsIndex    = 0;
+        isLocalDefUse         = false;
+        isHelperCallWithKills = false;
+        isLsraAdded           = false;
+        isDelayFree           = false;
+        hasDelayFreeSrc       = false;
+        isTgtPref             = false;
+        regOptional           = false;
+        definesAnyRegisters   = false;
+#ifdef DEBUG
+        isInitialized = false;
+#endif
+    }
+
+    // dst
+    __declspec(property(put = setDstCount, get = getDstCount)) int dstCount;
+    void setDstCount(int count)
+    {
+        assert(count <= MAX_RET_REG_COUNT);
+        _dstCount = (char)count;
+    }
+    int getDstCount()
+    {
+        return _dstCount;
+    }
+
+    // src
+    __declspec(property(put = setSrcCount, get = getSrcCount)) int srcCount;
+    void setSrcCount(int count)
+    {
+        _srcCount = (char)count;
+        assert(_srcCount == count);
+    }
+    int getSrcCount()
+    {
+        return _srcCount;
+    }
+
+    // internalInt
+    __declspec(property(put = setInternalIntCount, get = getInternalIntCount)) int internalIntCount;
+    void setInternalIntCount(int count)
+    {
+        _internalIntCount = (char)count;
+        assert(_internalIntCount == count);
+    }
+    int getInternalIntCount()
+    {
+        return _internalIntCount;
+    }
+
+    // internalFloat
+    __declspec(property(put = setInternalFloatCount, get = getInternalFloatCount)) int internalFloatCount;
+    void setInternalFloatCount(int count)
+    {
+        _internalFloatCount = (char)count;
+        assert(_internalFloatCount == count);
+    }
+    int getInternalFloatCount()
+    {
+        return _internalFloatCount;
+    }
+
+    // SrcCandidates are constraints of the consuming (parent) operation applied to this node
+    // (i.e. what registers it is constrained to consume).
+    regMaskTP getSrcCandidates(LinearScan* lsra);
+    void setSrcCandidates(LinearScan* lsra, regMaskTP mask);
+    // DstCandidates are constraints of this node (i.e. what registers it is constrained to produce).
+    regMaskTP getDstCandidates(LinearScan* lsra);
+    void setDstCandidates(LinearScan* lsra, regMaskTP mask);
+    // InternalCandidates are constraints of the registers used as temps in the evaluation of this node.
+    regMaskTP getInternalCandidates(LinearScan* lsra);
+    void setInternalCandidates(LinearScan* lsra, regMaskTP mask);
+    void addInternalCandidates(LinearScan* lsra, regMaskTP mask);
+
+    LsraLocation loc;
+
+private:
+    unsigned char _dstCount;
+    unsigned char _srcCount;
+    unsigned char _internalIntCount;
+    unsigned char _internalFloatCount;
+
+public:
+    unsigned char srcCandsIndex;
+    unsigned char dstCandsIndex;
+    unsigned char internalCandsIndex;
+
+    // isLocalDefUse identifies trees that produce a value that is not consumed elsewhere.
+    // Examples include stack arguments to a call (they are immediately stored), lhs of comma
+    // nodes, or top-level nodes that are non-void.
+    unsigned char isLocalDefUse : 1;
+    // isHelperCallWithKills is set when this is a helper call that kills more than just its in/out regs.
+    unsigned char isHelperCallWithKills : 1;
+    // Is this node added by LSRA, e.g. as a resolution or copy/reload move.
+    unsigned char isLsraAdded : 1;
+    // isDelayFree is set when the register defined by this node will interfere with the destination
+    // of the consuming node, and therefore it must not be freed immediately after use.
+    unsigned char isDelayFree : 1;
+    // hasDelayFreeSrc is set when this node has sources that are marked "isDelayFree".  This is because,
+    // we may eventually "contain" this node, in which case we don't want it's children (which have
+    // already been marked "isDelayFree" to be handled that way when allocating.
+    unsigned char hasDelayFreeSrc : 1;
+    // isTgtPref is set to true when we have a rmw op, where we would like the result to be allocated
+    // in the same register as op1.
+    unsigned char isTgtPref : 1;
+    // Whether a spilled second src can be treated as a contained operand
+    unsigned char regOptional : 1;
+    // Whether or not a node defines any registers, whether directly (for nodes where dstCout is non-zero)
+    // or indirectly (for contained nodes, which propagate the transitive closure of the registers
+    // defined by their inputs). Used during buildRefPositionsForNode in order to avoid unnecessary work.
+    unsigned char definesAnyRegisters : 1;
+
+#ifdef DEBUG
+    // isInitialized is set when the tree node is handled.
+    unsigned char isInitialized : 1;
+#endif
+
+public:
+    // Initializes the TreeNodeInfo value with the given values.
+    void Initialize(LinearScan* lsra, GenTree* node, LsraLocation location);
+
+#ifdef DEBUG
+    void dump(LinearScan* lsra);
+
+    // This method checks to see whether the information has been initialized,
+    // and is in a consistent state
+    bool IsValid(LinearScan* lsra)
+    {
+        return (isInitialized &&
+                ((getSrcCandidates(lsra) | getInternalCandidates(lsra) | getDstCandidates(lsra)) &
+                 ~(RBM_ALLFLOAT | RBM_ALLINT)) == 0);
+    }
+#endif // DEBUG
+};
+
+#endif // _NODEINFO_H_
diff --git a/src/jit/objectalloc.cpp b/src/jit/objectalloc.cpp
new file mode 100644
index 0000000000..2e19f4378d
--- /dev/null
+++ b/src/jit/objectalloc.cpp
@@ -0,0 +1,207 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                         ObjectAllocator                                   XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+//===============================================================================
+
+//------------------------------------------------------------------------
+// DoPhase: Run analysis (if object stack allocation is enabled) and then
+//          morph each GT_ALLOCOBJ node either into an allocation helper
+//          call or stack allocation.
+// Notes:
+//    Runs only if Compiler::optMethodFlags has flag OMF_HAS_NEWOBJ set.
+void ObjectAllocator::DoPhase()
+{
+    if ((comp->optMethodFlags & OMF_HAS_NEWOBJ) == 0)
+    {
+        return;
+    }
+
+    if (IsObjectStackAllocationEnabled())
+    {
+        DoAnalysis();
+    }
+
+    MorphAllocObjNodes();
+}
+
+//------------------------------------------------------------------------
+// DoAnalysis: Walk over basic blocks of the method and detect all local
+//             variables that can be allocated on the stack.
+//
+// Assumptions:
+//    Must be run after the dominators have been computed (we need this
+//    information to detect loops).
+void ObjectAllocator::DoAnalysis()
+{
+    assert(m_IsObjectStackAllocationEnabled);
+    assert(comp->fgDomsComputed);
+    // TODO-ObjectStackAllocation
+    NYI("DoAnalysis");
+}
+
+//------------------------------------------------------------------------
+// MorphAllocObjNodes: Morph each GT_ALLOCOBJ node either into an
+//                     allocation helper call or stack allocation.
+//
+// Notes:
+//    Runs only over the blocks having bbFlags BBF_HAS_NEWOBJ set.
+void ObjectAllocator::MorphAllocObjNodes()
+{
+    BasicBlock* block;
+
+    foreach_block(comp, block)
+    {
+        const bool basicBlockHasNewObj = (block->bbFlags & BBF_HAS_NEWOBJ) == BBF_HAS_NEWOBJ;
+#ifndef DEBUG
+        if (!basicBlockHasNewObj)
+        {
+            continue;
+        }
+#endif // DEBUG
+
+        for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
+        {
+            GenTreePtr stmtExpr = stmt->gtStmtExpr;
+            GenTreePtr op2      = nullptr;
+
+            bool canonicalAllocObjFound = false;
+
+            if (stmtExpr->OperGet() == GT_ASG && stmtExpr->TypeGet() == TYP_REF)
+            {
+                op2 = stmtExpr->gtGetOp2();
+
+                if (op2->OperGet() == GT_ALLOCOBJ)
+                {
+                    canonicalAllocObjFound = true;
+                }
+            }
+
+            if (canonicalAllocObjFound)
+            {
+                assert(basicBlockHasNewObj);
+                //------------------------------------------------------------------------
+                // We expect the following expression tree at this point
+                //  *  GT_STMT   void  (top level)
+                // 	|  /--*  GT_ALLOCOBJ   ref
+                // 	\--*  GT_ASG    ref
+                // 	   \--*  GT_LCL_VAR    ref
+                //------------------------------------------------------------------------
+
+                GenTreePtr op1 = stmtExpr->gtGetOp1();
+
+                assert(op1->OperGet() == GT_LCL_VAR);
+                assert(op1->TypeGet() == TYP_REF);
+                assert(op2 != nullptr);
+                assert(op2->OperGet() == GT_ALLOCOBJ);
+
+                GenTreeAllocObj* asAllocObj = op2->AsAllocObj();
+                unsigned int     lclNum     = op1->AsLclVar()->GetLclNum();
+
+                if (IsObjectStackAllocationEnabled() && CanAllocateLclVarOnStack(lclNum))
+                {
+                    op2 = MorphAllocObjNodeIntoStackAlloc(asAllocObj, block, stmt);
+                }
+                else
+                {
+                    op2 = MorphAllocObjNodeIntoHelperCall(asAllocObj);
+                }
+
+                // Propagate flags of op2 to its parent.
+                stmtExpr->gtOp.gtOp2 = op2;
+                stmtExpr->gtFlags |= op2->gtFlags & GTF_ALL_EFFECT;
+            }
+#ifdef DEBUG
+            else
+            {
+                // We assume that GT_ALLOCOBJ nodes are always present in the
+                // canonical form.
+                comp->fgWalkTreePre(&stmt->gtStmtExpr, AssertWhenAllocObjFoundVisitor);
+            }
+#endif // DEBUG
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// MorphAllocObjNodeIntoHelperCall: Morph a GT_ALLOCOBJ node into an
+//                                  allocation helper call.
+//
+// Arguments:
+//    allocObj - GT_ALLOCOBJ that will be replaced by helper call.
+//
+// Return Value:
+//    Address of helper call node (can be the same as allocObj).
+//
+// Notes:
+//    Must update parents flags after this.
+GenTreePtr ObjectAllocator::MorphAllocObjNodeIntoHelperCall(GenTreeAllocObj* allocObj)
+{
+    assert(allocObj != nullptr);
+
+    GenTreePtr op1 = allocObj->gtGetOp1();
+
+    GenTreePtr helperCall = comp->fgMorphIntoHelperCall(allocObj, allocObj->gtNewHelper, comp->gtNewArgList(op1));
+
+    return helperCall;
+}
+
+//------------------------------------------------------------------------
+// MorphAllocObjNodeIntoStackAlloc: Morph a GT_ALLOCOBJ node into stack
+//                                  allocation.
+// Arguments:
+//    allocObj - GT_ALLOCOBJ that will be replaced by helper call.
+//    block    - a basic block where allocObj is
+//    stmt     - a statement where allocObj is
+//
+// Return Value:
+//    Address of tree doing stack allocation (can be the same as allocObj).
+//
+// Notes:
+//    Must update parents flags after this.
+//    This function can insert additional statements before stmt.
+GenTreePtr ObjectAllocator::MorphAllocObjNodeIntoStackAlloc(GenTreeAllocObj* allocObj,
+                                                            BasicBlock*      block,
+                                                            GenTreeStmt*     stmt)
+{
+    assert(allocObj != nullptr);
+    assert(m_AnalysisDone);
+
+    // TODO-StackAllocation
+    NYI("MorphAllocObjIntoStackAlloc");
+
+    return allocObj;
+}
+
+#ifdef DEBUG
+
+//------------------------------------------------------------------------
+// AssertWhenAllocObjFoundVisitor: Look for a GT_ALLOCOBJ node and assert
+//                                 when found one.
+Compiler::fgWalkResult ObjectAllocator::AssertWhenAllocObjFoundVisitor(GenTreePtr* pTree, Compiler::fgWalkData* data)
+{
+    GenTreePtr tree = *pTree;
+
+    assert(tree != nullptr);
+    assert(tree->OperGet() != GT_ALLOCOBJ);
+
+    return Compiler::fgWalkResult::WALK_CONTINUE;
+}
+
+#endif // DEBUG
+
+//===============================================================================
diff --git a/src/jit/objectalloc.h b/src/jit/objectalloc.h
new file mode 100644
index 0000000000..bea6744024
--- /dev/null
+++ b/src/jit/objectalloc.h
@@ -0,0 +1,82 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                         ObjectAllocator                                   XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************/
+#ifndef OBJECTALLOC_H
+#define OBJECTALLOC_H
+/*****************************************************************************/
+
+//===============================================================================
+#include "phase.h"
+
+class ObjectAllocator final : public Phase
+{
+    //===============================================================================
+    // Data members
+    bool m_IsObjectStackAllocationEnabled;
+    bool m_AnalysisDone;
+    //===============================================================================
+    // Methods
+public:
+    ObjectAllocator(Compiler* comp);
+    bool IsObjectStackAllocationEnabled() const;
+    void EnableObjectStackAllocation();
+
+protected:
+    virtual void DoPhase() override;
+
+private:
+    bool CanAllocateLclVarOnStack(unsigned int lclNum) const;
+    void       DoAnalysis();
+    void       MorphAllocObjNodes();
+    GenTreePtr MorphAllocObjNodeIntoHelperCall(GenTreeAllocObj* allocObj);
+    GenTreePtr MorphAllocObjNodeIntoStackAlloc(GenTreeAllocObj* allocObj, BasicBlock* block, GenTreeStmt* stmt);
+#ifdef DEBUG
+    static Compiler::fgWalkResult AssertWhenAllocObjFoundVisitor(GenTreePtr* pTree, Compiler::fgWalkData* data);
+#endif // DEBUG
+};
+
+//===============================================================================
+
+inline ObjectAllocator::ObjectAllocator(Compiler* comp)
+    : Phase(comp, "Allocate Objects", PHASE_ALLOCATE_OBJECTS)
+    , m_IsObjectStackAllocationEnabled(false)
+    , m_AnalysisDone(false)
+{
+}
+
+inline bool ObjectAllocator::IsObjectStackAllocationEnabled() const
+{
+    return m_IsObjectStackAllocationEnabled;
+}
+
+inline void ObjectAllocator::EnableObjectStackAllocation()
+{
+    m_IsObjectStackAllocationEnabled = true;
+}
+
+//------------------------------------------------------------------------
+// CanAllocateLclVarOnStack: Returns true iff local variable can not
+//                           potentially escape from the method and
+//                           can be allocated on the stack.
+inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int lclNum) const
+{
+    assert(m_AnalysisDone);
+    // TODO-ObjectStackAllocation
+    NYI("CanAllocateLclVarOnStack");
+    return false;
+}
+
+//===============================================================================
+
+#endif // OBJECTALLOC_H
diff --git a/src/jit/opcode.h b/src/jit/opcode.h
new file mode 100644
index 0000000000..87741e97d9
--- /dev/null
+++ b/src/jit/opcode.h
@@ -0,0 +1,29 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                             opcodes.h                                     XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************/
+#ifndef _OPCODE_H_
+#define _OPCODE_H_
+
+#include "openum.h"
+
+extern const signed char opcodeSizes[];
+
+#if defined(DEBUG)
+extern const char* const opcodeNames[];
+extern const BYTE        opcodeArgKinds[];
+#endif
+
+/*****************************************************************************/
+#endif // _OPCODE_H_
+/*****************************************************************************/
diff --git a/src/jit/optcse.cpp b/src/jit/optcse.cpp
new file mode 100644
index 0000000000..d23b4cd198
--- /dev/null
+++ b/src/jit/optcse.cpp
@@ -0,0 +1,2582 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                              OptCSE                                       XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+/*****************************************************************************/
+#if FEATURE_ANYCSE
+/*****************************************************************************/
+
+/* static */
+const size_t Compiler::s_optCSEhashSize = EXPSET_SZ * 2;
+
+/*****************************************************************************
+ *
+ *  We've found all the candidates, build the index for easy access.
+ */
+
+void Compiler::optCSEstop()
+{
+    if (optCSECandidateCount == 0)
+    {
+        return;
+    }
+
+    CSEdsc*  dsc;
+    CSEdsc** ptr;
+    unsigned cnt;
+
+    optCSEtab = new (this, CMK_CSE) CSEdsc*[optCSECandidateCount]();
+
+    for (cnt = s_optCSEhashSize, ptr = optCSEhash; cnt; cnt--, ptr++)
+    {
+        for (dsc = *ptr; dsc; dsc = dsc->csdNextInBucket)
+        {
+            if (dsc->csdIndex)
+            {
+                noway_assert((unsigned)dsc->csdIndex <= optCSECandidateCount);
+                if (optCSEtab[dsc->csdIndex - 1] == nullptr)
+                {
+                    optCSEtab[dsc->csdIndex - 1] = dsc;
+                }
+            }
+        }
+    }
+
+#ifdef DEBUG
+    for (cnt = 0; cnt < optCSECandidateCount; cnt++)
+    {
+        noway_assert(optCSEtab[cnt] != nullptr);
+    }
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Return the descriptor for the CSE with the given index.
+ */
+
+inline Compiler::CSEdsc* Compiler::optCSEfindDsc(unsigned index)
+{
+    noway_assert(index);
+    noway_assert(index <= optCSECandidateCount);
+    noway_assert(optCSEtab[index - 1]);
+
+    return optCSEtab[index - 1];
+}
+
+/*****************************************************************************
+ *
+ *  For a previously marked CSE, decrement the use counts and unmark it
+ */
+
+void Compiler::optUnmarkCSE(GenTreePtr tree)
+{
+    if (!IS_CSE_INDEX(tree->gtCSEnum))
+    {
+        // This tree is not a CSE candidate, so there is nothing
+        // to do.
+        return;
+    }
+
+    unsigned CSEnum = GET_CSE_INDEX(tree->gtCSEnum);
+    CSEdsc*  desc;
+
+    // make sure it's been initialized
+    noway_assert(optCSEweight <= BB_MAX_WEIGHT);
+
+    /* Is this a CSE use? */
+    if (IS_CSE_USE(tree->gtCSEnum))
+    {
+        desc = optCSEfindDsc(CSEnum);
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("Unmark CSE use #%02d at ", CSEnum);
+            printTreeID(tree);
+            printf(": %3d -> %3d\n", desc->csdUseCount, desc->csdUseCount - 1);
+        }
+#endif
+
+        /* Reduce the nested CSE's 'use' count */
+
+        noway_assert(desc->csdUseCount > 0);
+
+        if (desc->csdUseCount > 0)
+        {
+            desc->csdUseCount -= 1;
+
+            if (desc->csdUseWtCnt < optCSEweight)
+            {
+                desc->csdUseWtCnt = 0;
+            }
+            else
+            {
+                desc->csdUseWtCnt -= optCSEweight;
+            }
+        }
+    }
+    else
+    {
+        desc = optCSEfindDsc(CSEnum);
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("Unmark CSE def #%02d at ", CSEnum);
+            printTreeID(tree);
+            printf(": %3d -> %3d\n", desc->csdDefCount, desc->csdDefCount - 1);
+        }
+#endif
+
+        /* Reduce the nested CSE's 'def' count */
+
+        noway_assert(desc->csdDefCount > 0);
+
+        if (desc->csdDefCount > 0)
+        {
+            desc->csdDefCount -= 1;
+
+            if (desc->csdDefWtCnt < optCSEweight)
+            {
+                desc->csdDefWtCnt = 0;
+            }
+            else
+            {
+                desc->csdDefWtCnt -= optCSEweight;
+            }
+        }
+    }
+
+    tree->gtCSEnum = NO_CSE;
+}
+
+Compiler::fgWalkResult Compiler::optHasNonCSEChild(GenTreePtr* pTree, fgWalkData* data)
+{
+    if (*pTree == data->pCallbackData)
+    {
+        return WALK_CONTINUE;
+    }
+
+    if ((*pTree)->gtFlags & GTF_DONT_CSE)
+    {
+
+        // Fix 392756 WP7 Crossgen
+        // Don't propagate the GTF_DONT_CSE flag up from a GT_CNS_INT
+        //
+        // During codegen optGetArrayRefScaleAndIndex() makes the assumption that op2 of a GT_MUL node
+        // is a constant and is not capable of handling CSE'ing the elemSize constant into a lclvar.
+        // Hence to prevent the constant from becoming a CSE we have marked it as NO_CSE, but this
+        // should not prevent tree's above the constant from becoming CSE's.
+        //
+        if ((*pTree)->gtOper == GT_CNS_INT)
+        {
+            return WALK_SKIP_SUBTREES;
+        }
+
+        return WALK_ABORT;
+    }
+
+    return WALK_SKIP_SUBTREES;
+}
+
+Compiler::fgWalkResult Compiler::optPropagateNonCSE(GenTreePtr* pTree, fgWalkData* data)
+{
+    GenTree*  tree = *pTree;
+    Compiler* comp = data->compiler;
+
+    /* Calls get DONT_CSE implicitly */
+    if (tree->OperGet() == GT_CALL)
+    {
+        if (!IsSharedStaticHelper(tree))
+        {
+            tree->gtFlags |= GTF_DONT_CSE;
+        }
+    }
+
+    if ((tree->gtFlags & GTF_DONT_CSE) == 0)
+    {
+        /* Propagate the DONT_CSE flag from child to parent */
+        if (comp->fgWalkTreePre(&tree, optHasNonCSEChild, tree) == WALK_ABORT)
+        {
+            tree->gtFlags |= GTF_DONT_CSE;
+        }
+    }
+
+    return WALK_CONTINUE;
+}
+
+/*****************************************************************************
+ *
+ *  Helper passed to Compiler::fgWalkAllTreesPre() to unmark nested CSE's.
+ */
+
+/* static */
+Compiler::fgWalkResult Compiler::optUnmarkCSEs(GenTreePtr* pTree, fgWalkData* data)
+{
+    GenTreePtr tree     = *pTree;
+    Compiler*  comp     = data->compiler;
+    GenTreePtr keepList = (GenTreePtr)(data->pCallbackData);
+
+    // We may have a non-NULL side effect list that is being kept
+    //
+    if (keepList)
+    {
+        GenTreePtr keptTree = keepList;
+        while (keptTree->OperGet() == GT_COMMA)
+        {
+            assert(keptTree->OperKind() & GTK_SMPOP);
+            GenTreePtr op1 = keptTree->gtOp.gtOp1;
+            GenTreePtr op2 = keptTree->gtGetOp2();
+
+            // For the GT_COMMA case the op1 is part of the orginal CSE tree
+            // that is being kept because it contains some side-effect
+            //
+            if (tree == op1)
+            {
+                // This tree and all of its sub trees are being kept
+                return WALK_SKIP_SUBTREES;
+            }
+
+            // For the GT_COMMA case the op2 are the remaining side-effects of the orginal CSE tree
+            // which can again be another GT_COMMA or the final side-effect part
+            //
+            keptTree = op2;
+        }
+        if (tree == keptTree)
+        {
+            // This tree and all of its sub trees are being kept
+            return WALK_SKIP_SUBTREES;
+        }
+    }
+
+    // This node is being removed from the graph of GenTreePtr
+    // Call optUnmarkCSE and  decrement the LclVar ref counts.
+    comp->optUnmarkCSE(tree);
+    assert(!IS_CSE_INDEX(tree->gtCSEnum));
+
+    /* Look for any local variable references */
+
+    if (tree->gtOper == GT_LCL_VAR)
+    {
+        unsigned   lclNum;
+        LclVarDsc* varDsc;
+
+        /* This variable ref is going away, decrease its ref counts */
+
+        lclNum = tree->gtLclVarCommon.gtLclNum;
+        assert(lclNum < comp->lvaCount);
+        varDsc = comp->lvaTable + lclNum;
+
+        // make sure it's been initialized
+        assert(comp->optCSEweight <= BB_MAX_WEIGHT);
+
+        /* Decrement its lvRefCnt and lvRefCntWtd */
+
+        varDsc->decRefCnts(comp->optCSEweight, comp);
+    }
+
+    return WALK_CONTINUE;
+}
+
+Compiler::fgWalkResult Compiler::optCSE_MaskHelper(GenTreePtr* pTree, fgWalkData* walkData)
+{
+    GenTree*         tree      = *pTree;
+    Compiler*        comp      = walkData->compiler;
+    optCSE_MaskData* pUserData = (optCSE_MaskData*)(walkData->pCallbackData);
+
+    if (IS_CSE_INDEX(tree->gtCSEnum))
+    {
+        unsigned  cseIndex = GET_CSE_INDEX(tree->gtCSEnum);
+        EXPSET_TP cseBit   = genCSEnum2bit(cseIndex);
+        if (IS_CSE_DEF(tree->gtCSEnum))
+        {
+            pUserData->CSE_defMask |= cseBit;
+        }
+        else
+        {
+            pUserData->CSE_useMask |= cseBit;
+        }
+    }
+
+    return WALK_CONTINUE;
+}
+
+// This functions walks all the node for an given tree
+// and return the mask of CSE defs and uses for the tree
+//
+void Compiler::optCSE_GetMaskData(GenTreePtr tree, optCSE_MaskData* pMaskData)
+{
+    pMaskData->CSE_defMask = 0;
+    pMaskData->CSE_useMask = 0;
+    fgWalkTreePre(&tree, optCSE_MaskHelper, (void*)pMaskData);
+}
+
+//------------------------------------------------------------------------
+// optCSE_canSwap: Determine if the execution order of two nodes can be swapped.
+//
+// Arguments:
+//    op1 - The first node
+//    op2 - The second node
+//
+// Return Value:
+//    Return true iff it safe to swap the execution order of 'op1' and 'op2',
+//    considering only the locations of the CSE defs and uses.
+//
+// Assumptions:
+//    'op1' currently occurse before 'op2' in the execution order.
+//
+bool Compiler::optCSE_canSwap(GenTree* op1, GenTree* op2)
+{
+    // op1 and op2 must be non-null.
+    assert(op1 != nullptr);
+    assert(op2 != nullptr);
+
+    bool canSwap = true; // the default result unless proven otherwise.
+
+    optCSE_MaskData op1MaskData;
+    optCSE_MaskData op2MaskData;
+
+    optCSE_GetMaskData(op1, &op1MaskData);
+    optCSE_GetMaskData(op2, &op2MaskData);
+
+    // We cannot swap if op1 contains a CSE def that is used by op2
+    if ((op1MaskData.CSE_defMask & op2MaskData.CSE_useMask) != 0)
+    {
+        canSwap = false;
+    }
+    else
+    {
+        // We also cannot swap if op2 contains a CSE def that is used by op1.
+        if ((op2MaskData.CSE_defMask & op1MaskData.CSE_useMask) != 0)
+        {
+            canSwap = false;
+        }
+    }
+
+    return canSwap;
+}
+
+//------------------------------------------------------------------------
+// optCSE_canSwap: Determine if the execution order of a node's operands can be swapped.
+//
+// Arguments:
+//    tree - The node of interest
+//
+// Return Value:
+//    Return true iff it safe to swap the execution order of the operands of 'tree',
+//    considering only the locations of the CSE defs and uses.
+//
+bool Compiler::optCSE_canSwap(GenTreePtr tree)
+{
+    // We must have a binary treenode with non-null op1 and op2
+    assert((tree->OperKind() & GTK_SMPOP) != 0);
+
+    GenTreePtr op1 = tree->gtOp.gtOp1;
+    GenTreePtr op2 = tree->gtGetOp2();
+
+    return optCSE_canSwap(op1, op2);
+}
+
+/*****************************************************************************
+ *
+ *  Compare function passed to qsort() by CSE_Heuristic::SortCandidates
+ *  when (CodeOptKind() != Compiler::SMALL_CODE)
+ */
+
+/* static */
+int __cdecl Compiler::optCSEcostCmpEx(const void* op1, const void* op2)
+{
+    CSEdsc* dsc1 = *(CSEdsc**)op1;
+    CSEdsc* dsc2 = *(CSEdsc**)op2;
+
+    GenTreePtr exp1 = dsc1->csdTree;
+    GenTreePtr exp2 = dsc2->csdTree;
+
+    int diff;
+
+    diff = (int)(exp2->gtCostEx - exp1->gtCostEx);
+
+    if (diff != 0)
+    {
+        return diff;
+    }
+
+    // Sort the higher Use Counts toward the top
+    diff = (int)(dsc2->csdUseWtCnt - dsc1->csdUseWtCnt);
+
+    if (diff != 0)
+    {
+        return diff;
+    }
+
+    // With the same use count, Sort the lower Def Counts toward the top
+    diff = (int)(dsc1->csdDefWtCnt - dsc2->csdDefWtCnt);
+
+    if (diff != 0)
+    {
+        return diff;
+    }
+
+    // In order to ensure that we have a stable sort, we break ties using the csdIndex
+    return (int)(dsc1->csdIndex - dsc2->csdIndex);
+}
+
+/*****************************************************************************
+ *
+ *  Compare function passed to qsort() by CSE_Heuristic::SortCandidates
+ *  when (CodeOptKind() == Compiler::SMALL_CODE)
+ */
+
+/* static */
+int __cdecl Compiler::optCSEcostCmpSz(const void* op1, const void* op2)
+{
+    CSEdsc* dsc1 = *(CSEdsc**)op1;
+    CSEdsc* dsc2 = *(CSEdsc**)op2;
+
+    GenTreePtr exp1 = dsc1->csdTree;
+    GenTreePtr exp2 = dsc2->csdTree;
+
+    int diff;
+
+    diff = (int)(exp2->gtCostSz - exp1->gtCostSz);
+
+    if (diff != 0)
+    {
+        return diff;
+    }
+
+    // Sort the higher Use Counts toward the top
+    diff = (int)(dsc2->csdUseCount - dsc1->csdUseCount);
+
+    if (diff != 0)
+    {
+        return diff;
+    }
+
+    // With the same use count, Sort the lower Def Counts toward the top
+    diff = (int)(dsc1->csdDefCount - dsc2->csdDefCount);
+
+    if (diff != 0)
+    {
+        return diff;
+    }
+
+    // In order to ensure that we have a stable sort, we break ties using the csdIndex
+    return (int)(dsc1->csdIndex - dsc2->csdIndex);
+}
+
+/*****************************************************************************/
+#if FEATURE_VALNUM_CSE
+/*****************************************************************************/
+
+/*****************************************************************************
+ *
+ *  Initialize the Value Number CSE tracking logic.
+ */
+
+void Compiler::optValnumCSE_Init()
+{
+#ifdef DEBUG
+    optCSEtab = nullptr;
+#endif
+
+    /* Allocate and clear the hash bucket table */
+
+    optCSEhash = new (this, CMK_CSE) CSEdsc*[s_optCSEhashSize]();
+
+    optCSECandidateCount = 0;
+    optDoCSE             = false; // Stays false until we find duplicate CSE tree
+}
+
+/*****************************************************************************
+ *
+ *  Assign an index to the given expression (adding it to the lookup table,
+ *  if necessary). Returns the index or 0 if the expression can not be a CSE.
+ */
+
+unsigned Compiler::optValnumCSE_Index(GenTreePtr tree, GenTreePtr stmt)
+{
+    unsigned key;
+    unsigned hash;
+    unsigned hval;
+    CSEdsc*  hashDsc;
+
+    ValueNum vnlib = tree->GetVN(VNK_Liberal);
+
+    /* Compute the hash value for the expression */
+
+    key = (unsigned)vnlib;
+
+    hash = key;
+    hash *= (unsigned)(s_optCSEhashSize + 1);
+    hash >>= 7;
+
+    hval = hash % s_optCSEhashSize;
+
+    /* Look for a matching index in the hash table */
+
+    bool newCSE = false;
+
+    for (hashDsc = optCSEhash[hval]; hashDsc; hashDsc = hashDsc->csdNextInBucket)
+    {
+        if (hashDsc->csdHashValue == key)
+        {
+            treeStmtLstPtr newElem;
+
+            /* Have we started the list of matching nodes? */
+
+            if (hashDsc->csdTreeList == nullptr)
+            {
+                // Create the new element based upon the matching hashDsc element.
+
+                newElem = new (this, CMK_TreeStatementList) treeStmtLst;
+
+                newElem->tslTree  = hashDsc->csdTree;
+                newElem->tslStmt  = hashDsc->csdStmt;
+                newElem->tslBlock = hashDsc->csdBlock;
+                newElem->tslNext  = nullptr;
+
+                /* Start the list with the first CSE candidate recorded */
+
+                hashDsc->csdTreeList = newElem;
+                hashDsc->csdTreeLast = newElem;
+            }
+
+            noway_assert(hashDsc->csdTreeList);
+
+            /* Append this expression to the end of the list */
+
+            newElem = new (this, CMK_TreeStatementList) treeStmtLst;
+
+            newElem->tslTree  = tree;
+            newElem->tslStmt  = stmt;
+            newElem->tslBlock = compCurBB;
+            newElem->tslNext  = nullptr;
+
+            hashDsc->csdTreeLast->tslNext = newElem;
+            hashDsc->csdTreeLast          = newElem;
+
+            optDoCSE = true; // Found a duplicate CSE tree
+
+            /* Have we assigned a CSE index? */
+            if (hashDsc->csdIndex == 0)
+            {
+                newCSE = true;
+                break;
+            }
+#if 0 
+            // Use this to see if this Value Number base CSE is also a lexical CSE
+            bool treeMatch = GenTree::Compare(hashDsc->csdTree, tree, true);
+#endif
+
+            assert(FitsIn<signed char>(hashDsc->csdIndex));
+            tree->gtCSEnum = ((signed char)hashDsc->csdIndex);
+            return hashDsc->csdIndex;
+        }
+    }
+
+    if (!newCSE)
+    {
+        /* Not found, create a new entry (unless we have too many already) */
+
+        if (optCSECandidateCount < MAX_CSE_CNT)
+        {
+            hashDsc = new (this, CMK_CSE) CSEdsc;
+
+            hashDsc->csdHashValue      = key;
+            hashDsc->csdIndex          = 0;
+            hashDsc->csdLiveAcrossCall = 0;
+            hashDsc->csdDefCount       = 0;
+            hashDsc->csdUseCount       = 0;
+            hashDsc->csdDefWtCnt       = 0;
+            hashDsc->csdUseWtCnt       = 0;
+
+            hashDsc->csdTree     = tree;
+            hashDsc->csdStmt     = stmt;
+            hashDsc->csdBlock    = compCurBB;
+            hashDsc->csdTreeList = nullptr;
+
+            /* Append the entry to the hash bucket */
+
+            hashDsc->csdNextInBucket = optCSEhash[hval];
+            optCSEhash[hval]         = hashDsc;
+        }
+        return 0;
+    }
+    else // newCSE is true
+    {
+        /* We get here only after finding a matching CSE */
+
+        /* Create a new CSE (unless we have the maximum already) */
+
+        if (optCSECandidateCount == MAX_CSE_CNT)
+        {
+            return 0;
+        }
+
+        C_ASSERT((signed char)MAX_CSE_CNT == MAX_CSE_CNT);
+
+        unsigned  CSEindex = ++optCSECandidateCount;
+        EXPSET_TP CSEmask  = genCSEnum2bit(CSEindex);
+
+        /* Record the new CSE index in the hashDsc */
+        hashDsc->csdIndex = CSEindex;
+
+        /* Update the gtCSEnum field in the original tree */
+        noway_assert(hashDsc->csdTreeList->tslTree->gtCSEnum == 0);
+        assert(FitsIn<signed char>(CSEindex));
+
+        hashDsc->csdTreeList->tslTree->gtCSEnum = ((signed char)CSEindex);
+        noway_assert(((unsigned)hashDsc->csdTreeList->tslTree->gtCSEnum) == CSEindex);
+
+        tree->gtCSEnum = ((signed char)CSEindex);
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\nCSE candidate #%02u, vn=", CSEindex);
+            vnPrint(vnlib, 0);
+            printf(" cseMask=%s in BB%02u, [cost=%2u, size=%2u]: \n", genES2str(genCSEnum2bit(CSEindex)),
+                   compCurBB->bbNum, tree->gtCostEx, tree->gtCostSz);
+            gtDispTree(tree);
+        }
+#endif // DEBUG
+
+        return CSEindex;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Locate CSE candidates and assign indices to them
+ *  return 0 if no CSE candidates were found
+ *  Also initialize bbCseIn, bbCseout and bbCseGen sets for all blocks
+ */
+
+unsigned Compiler::optValnumCSE_Locate()
+{
+    // Locate CSE candidates and assign them indices
+
+    for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+    {
+        GenTreePtr stmt;
+        GenTreePtr tree;
+
+        /* Make the block publicly available */
+
+        compCurBB = block;
+
+        /* Ensure that the BBF_VISITED and BBF_MARKED flag are clear */
+        /* Everyone who uses these flags are required to clear afterwards */
+        noway_assert((block->bbFlags & (BBF_VISITED | BBF_MARKED)) == 0);
+
+        /* Walk the statement trees in this basic block */
+        for (stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
+        {
+            noway_assert(stmt->gtOper == GT_STMT);
+
+            /* We walk the tree in the forwards direction (bottom up) */
+            for (tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+            {
+                if (!optIsCSEcandidate(tree))
+                {
+                    continue;
+                }
+
+                ValueNum vnlib = tree->GetVN(VNK_Liberal);
+
+                if (ValueNumStore::isReservedVN(vnlib))
+                {
+                    continue;
+                }
+
+                // Don't CSE constant values, instead let the Value Number
+                // based Assertion Prop phase handle them.
+                //
+                if (vnStore->IsVNConstant(vnlib))
+                {
+                    continue;
+                }
+
+                /* Assign an index to this expression */
+
+                unsigned CSEindex = optValnumCSE_Index(tree, stmt);
+
+                if (CSEindex != 0)
+                {
+                    noway_assert(((unsigned)tree->gtCSEnum) == CSEindex);
+                }
+            }
+        }
+    }
+
+    /* We're done if there were no interesting expressions */
+
+    if (!optDoCSE)
+    {
+        return 0;
+    }
+
+    /* We're finished building the expression lookup table */
+
+    optCSEstop();
+
+    return 1;
+}
+
+/*****************************************************************************
+ *
+ *  Compute each blocks bbCseGen
+ *  This is the bitset that represents the CSEs that are generated within the block
+ */
+void Compiler::optValnumCSE_InitDataFlow()
+{
+    for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+    {
+        GenTreePtr stmt;
+        GenTreePtr tree;
+
+        /* Initialize the blocks's bbCseIn set */
+
+        bool init_to_zero = false;
+
+        if (block == fgFirstBB)
+        {
+            /* Clear bbCseIn for the entry block */
+            init_to_zero = true;
+        }
+#if !CSE_INTO_HANDLERS
+        else
+        {
+            if (bbIsHandlerBeg(block))
+            {
+                /* Clear everything on entry to filters or handlers */
+                init_to_zero = true;
+            }
+        }
+#endif
+        if (init_to_zero)
+        {
+            /* Initialize to {ZERO} prior to dataflow */
+
+            block->bbCseIn = 0;
+        }
+        else
+        {
+            /* Initialize to {ALL} prior to dataflow */
+
+            block->bbCseIn = EXPSET_ALL;
+        }
+        block->bbCseOut = EXPSET_ALL;
+
+        /* Initialize to {ZERO} prior to locating the CSE candidates */
+        block->bbCseGen = 0;
+    }
+
+    // We walk the set of CSE candidates and set the bit corresponsing to the CSEindex
+    // in the block's bbCseGen bitset
+    //
+    for (unsigned cnt = 0; cnt < optCSECandidateCount; cnt++)
+    {
+        CSEdsc*        dsc      = optCSEtab[cnt];
+        unsigned       CSEindex = dsc->csdIndex;
+        treeStmtLstPtr lst      = dsc->csdTreeList;
+        noway_assert(lst);
+
+        while (lst != nullptr)
+        {
+            BasicBlock* block = lst->tslBlock;
+            block->bbCseGen |= genCSEnum2bit(CSEindex);
+            lst = lst->tslNext;
+        }
+    }
+
+#ifdef DEBUG
+    // Dump out the bbCseGen information that we just created
+    //
+    if (verbose)
+    {
+        bool headerPrinted = false;
+        for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+        {
+            if (block->bbCseGen != 0)
+            {
+                if (!headerPrinted)
+                {
+                    printf("\nBlocks that generate CSE def/uses\n");
+                    headerPrinted = true;
+                }
+                printf("BB%02u", block->bbNum);
+                printf(" cseGen = %s\n", genES2str(block->bbCseGen));
+            }
+        }
+    }
+
+    fgDebugCheckLinks();
+
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ * CSE Dataflow, so that all helper methods for dataflow are in a single place
+ *
+ */
+class CSE_DataFlow
+{
+private:
+    EXPSET_TP m_preMergeOut;
+
+    Compiler* m_pCompiler;
+
+public:
+    CSE_DataFlow(Compiler* pCompiler) : m_pCompiler(pCompiler)
+    {
+    }
+
+    Compiler* getCompiler()
+    {
+        return m_pCompiler;
+    }
+
+    // At the start of the merge function of the dataflow equations, initialize premerge state (to detect changes.)
+    void StartMerge(BasicBlock* block)
+    {
+        m_preMergeOut = block->bbCseOut;
+    }
+
+    // During merge, perform the actual merging of the predecessor's (since this is a forward analysis) dataflow flags.
+    void Merge(BasicBlock* block, BasicBlock* predBlock, flowList* preds)
+    {
+        block->bbCseIn &= predBlock->bbCseOut;
+    }
+
+    // At the end of the merge store results of the dataflow equations, in a postmerge state.
+    bool EndMerge(BasicBlock* block)
+    {
+        EXPSET_TP mergeOut = block->bbCseOut & (block->bbCseIn | block->bbCseGen);
+        block->bbCseOut    = mergeOut;
+        return (mergeOut != m_preMergeOut);
+    }
+};
+
+/*****************************************************************************
+ *
+ *  Perform a DataFlow forward analysis using the block CSE bitsets:
+ *    Inputs:
+ *      bbCseGen  - Exact CSEs that are become available within the block
+ *      bbCseIn   - Maximal estimate of CSEs that are/could be available at input to the block
+ *      bbCseOut  - Maximal estimate of CSEs that are/could be available at exit to the block
+ *
+ *    Outputs:
+ *      bbCseIn   - Computed CSEs that are available at input to the block
+ *      bbCseOut  - Computed CSEs that are available at exit to the block
+ */
+
+void Compiler::optValnumCSE_DataFlow()
+{
+    CSE_DataFlow cse(this);
+
+    // Modified dataflow algorithm for available expressions.
+    DataFlow cse_flow(this);
+
+    cse_flow.ForwardAnalysis(cse);
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nAfter performing DataFlow for ValnumCSE's\n");
+
+        for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+        {
+            printf("BB%02u", block->bbNum);
+            printf(" cseIn  = %s", genES2str(block->bbCseIn));
+            printf(" cseOut = %s", genES2str(block->bbCseOut));
+            printf("\n");
+        }
+
+        printf("\n");
+    }
+#endif // DEBUG
+}
+
+/*****************************************************************************
+ *
+ *   Using the information computed by CSE_DataFlow determine for each
+ *   CSE whether the CSE is a definition (if the CSE was not available)
+ *   or if the CSE is a use (if the CSE was previously made available)
+ *   The implementation iterates of all blocks setting 'available_cses'
+ *   to the CSEs that are available at input to the block.
+ *   When a CSE expression is encountered it is classified as either
+ *   as a definition (if the CSE is not in the 'available_cses' set) or
+ *   as a use (if the CSE is  in the 'available_cses' set).  If the CSE
+ *   is a definition then it is added to the 'available_cses' set.
+ *   In the Value Number based CSEs we do not need to have kill sets
+ */
+
+void Compiler::optValnumCSE_Availablity()
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("Labeling the CSEs with Use/Def information\n");
+    }
+#endif
+    for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+    {
+        GenTreePtr stmt;
+        GenTreePtr tree;
+
+        /* Make the block publicly available */
+
+        compCurBB = block;
+
+        EXPSET_TP available_cses = block->bbCseIn;
+
+        optCSEweight = block->getBBWeight(this);
+
+        /* Walk the statement trees in this basic block */
+
+        for (stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
+        {
+            noway_assert(stmt->gtOper == GT_STMT);
+
+            /* We walk the tree in the forwards direction (bottom up) */
+            for (tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+            {
+                if (IS_CSE_INDEX(tree->gtCSEnum))
+                {
+                    EXPSET_TP mask = genCSEnum2bit(tree->gtCSEnum);
+                    CSEdsc*   desc = optCSEfindDsc(tree->gtCSEnum);
+                    unsigned  stmw = block->getBBWeight(this);
+
+                    /* Is this expression available here? */
+
+                    if (available_cses & mask)
+                    {
+                        /* This is a CSE use */
+
+                        desc->csdUseCount += 1;
+                        desc->csdUseWtCnt += stmw;
+                    }
+                    else
+                    {
+                        if (tree->gtFlags & GTF_COLON_COND)
+                        {
+                            // We can't create CSE definitions inside QMARK-COLON trees
+                            tree->gtCSEnum = NO_CSE;
+                            continue;
+                        }
+
+                        /* This is a CSE def */
+
+                        desc->csdDefCount += 1;
+                        desc->csdDefWtCnt += stmw;
+
+                        /* Mark the node as a CSE definition */
+
+                        tree->gtCSEnum = TO_CSE_DEF(tree->gtCSEnum);
+
+                        /* This CSE will be available after this def */
+
+                        available_cses |= mask;
+                    }
+#ifdef DEBUG
+                    if (verbose && IS_CSE_INDEX(tree->gtCSEnum))
+                    {
+                        printf("BB%02u ", block->bbNum);
+                        printTreeID(tree);
+                        printf(" %s of CSE #%02u [weight=%s]\n", IS_CSE_USE(tree->gtCSEnum) ? "Use" : "Def",
+                               GET_CSE_INDEX(tree->gtCSEnum), refCntWtd2str(stmw));
+                    }
+#endif
+                }
+            }
+        }
+    }
+}
+
+//  The following class handles the CSE heuristics
+//  we use a complex set of heuristic rules
+//  to determine if it is likely to be profitable to perform this CSE
+//
+class CSE_Heuristic
+{
+    Compiler* m_pCompiler;
+    unsigned  m_addCSEcount;
+
+    unsigned               aggressiveRefCnt;
+    unsigned               moderateRefCnt;
+    unsigned               enregCount; // count of the number of enregisterable variables
+    bool                   largeFrame;
+    bool                   hugeFrame;
+    Compiler::codeOptimize codeOptKind;
+    Compiler::CSEdsc**     sortTab;
+    size_t                 sortSiz;
+#ifdef DEBUG
+    CLRRandom m_cseRNG;
+    unsigned  m_bias;
+#endif
+
+public:
+    CSE_Heuristic(Compiler* pCompiler) : m_pCompiler(pCompiler)
+    {
+        codeOptKind = m_pCompiler->compCodeOpt();
+    }
+
+    Compiler::codeOptimize CodeOptKind()
+    {
+        return codeOptKind;
+    }
+
+    // Perform the Initialization step for our CSE Heuristics
+    // determine the various cut off values to use for
+    // the aggressive, moderate and conservative CSE promotions
+    // count the number of enregisterable variables
+    // determine if the method has a large or huge stack frame.
+    //
+    void Initialize()
+    {
+        m_addCSEcount = 0; /* Count of the number of LclVars for CSEs that we added */
+
+        // Record the weighted ref count of the last "for sure" callee saved LclVar
+        aggressiveRefCnt = 0;
+        moderateRefCnt   = 0;
+        enregCount       = 0;
+        largeFrame       = false;
+        hugeFrame        = false;
+        sortTab          = nullptr;
+        sortSiz          = 0;
+
+#ifdef _TARGET_XARCH_
+        if (m_pCompiler->compLongUsed)
+        {
+            enregCount++;
+        }
+#endif
+
+        unsigned   frameSize        = 0;
+        unsigned   regAvailEstimate = ((CNT_CALLEE_ENREG * 3) + (CNT_CALLEE_TRASH * 2) + 1);
+        unsigned   lclNum;
+        LclVarDsc* varDsc;
+
+        for (lclNum = 0, varDsc = m_pCompiler->lvaTable; lclNum < m_pCompiler->lvaCount; lclNum++, varDsc++)
+        {
+            if (varDsc->lvRefCnt == 0)
+            {
+                continue;
+            }
+
+            bool onStack = (regAvailEstimate == 0); // true when it is likely that this LclVar will have a stack home
+
+            // Some LclVars always have stack homes
+            if ((varDsc->lvDoNotEnregister) || (varDsc->lvType == TYP_LCLBLK))
+            {
+                onStack = true;
+            }
+
+#ifdef _TARGET_X86_
+            // Treat floating point and 64 bit integers as always on the stack
+            if (varTypeIsFloating(varDsc->TypeGet()) || varTypeIsLong(varDsc->TypeGet()))
+                onStack = true;
+#endif
+
+            if (onStack)
+            {
+                frameSize += m_pCompiler->lvaLclSize(lclNum);
+            }
+            else
+            {
+                // For the purposes of estimating the frameSize we
+                // will consider this LclVar as being enregistered.
+                // Now we reduce the remaining regAvailEstimate by
+                // an appropriate amount.
+                if (varDsc->lvRefCnt <= 2)
+                {
+                    // a single use single def LclVar only uses 1
+                    regAvailEstimate -= 1;
+                }
+                else
+                {
+                    // a LclVar with multiple uses and defs uses 2
+                    if (regAvailEstimate >= 2)
+                    {
+                        regAvailEstimate -= 2;
+                    }
+                    else
+                    {
+                        // Don't try to subtract when regAvailEstimate is 1
+                        regAvailEstimate = 0;
+                    }
+                }
+            }
+#ifdef _TARGET_XARCH_
+            if (frameSize > 0x080)
+            {
+                // We likely have a large stack frame.
+                // Thus we might need to use large displacements when loading or storing
+                // to CSE LclVars that are not enregistered
+                largeFrame = true;
+                break; // early out,  we don't need to keep increasing frameSize
+            }
+#else // _TARGET_ARM_
+            if (frameSize > 0x0400)
+            {
+                largeFrame = true;
+            }
+            if (frameSize > 0x10000)
+            {
+                hugeFrame = true;
+                break;
+            }
+#endif
+        }
+
+        unsigned sortNum = 0;
+        while (sortNum < m_pCompiler->lvaTrackedCount)
+        {
+            LclVarDsc* varDsc = m_pCompiler->lvaRefSorted[sortNum++];
+            var_types  varTyp = varDsc->TypeGet();
+
+            if (varDsc->lvDoNotEnregister)
+            {
+                continue;
+            }
+
+            if (!varTypeIsFloating(varTyp))
+            {
+                // TODO-1stClassStructs: Remove this; it is here to duplicate previous behavior.
+                // Note that this makes genTypeStSz return 1.
+                if (varTypeIsStruct(varTyp))
+                {
+                    varTyp = TYP_STRUCT;
+                }
+                enregCount += genTypeStSz(varTyp);
+            }
+
+            if ((aggressiveRefCnt == 0) && (enregCount > (CNT_CALLEE_ENREG * 3 / 2)))
+            {
+                if (CodeOptKind() == Compiler::SMALL_CODE)
+                {
+                    aggressiveRefCnt = varDsc->lvRefCnt + BB_UNITY_WEIGHT;
+                }
+                else
+                {
+                    aggressiveRefCnt = varDsc->lvRefCntWtd + BB_UNITY_WEIGHT;
+                }
+            }
+            if ((moderateRefCnt == 0) && (enregCount > ((CNT_CALLEE_ENREG * 3) + (CNT_CALLEE_TRASH * 2))))
+            {
+                if (CodeOptKind() == Compiler::SMALL_CODE)
+                {
+                    moderateRefCnt = varDsc->lvRefCnt;
+                }
+                else
+                {
+                    moderateRefCnt = varDsc->lvRefCntWtd;
+                }
+            }
+        }
+        unsigned mult = 3;
+        // use smaller value for mult when enregCount is in [0..4]
+        if (enregCount <= 4)
+        {
+            mult = (enregCount <= 2) ? 1 : 2;
+        }
+
+        aggressiveRefCnt = max(BB_UNITY_WEIGHT * mult, aggressiveRefCnt);
+        moderateRefCnt   = max((BB_UNITY_WEIGHT * mult) / 2, moderateRefCnt);
+
+#ifdef DEBUG
+        if (m_pCompiler->verbose)
+        {
+            printf("\n");
+            printf("Aggressive CSE Promotion cutoff is %u\n", aggressiveRefCnt);
+            printf("Moderate CSE Promotion cutoff is %u\n", moderateRefCnt);
+            printf("Framesize estimate is 0x%04X\n", frameSize);
+            printf("We have a %s frame\n", hugeFrame ? "huge" : (largeFrame ? "large" : "small"));
+        }
+#endif
+    }
+
+    void SortCandidates()
+    {
+        /* Create an expression table sorted by decreasing cost */
+        sortTab = new (m_pCompiler, CMK_CSE) Compiler::CSEdsc*[m_pCompiler->optCSECandidateCount];
+
+        sortSiz = m_pCompiler->optCSECandidateCount * sizeof(*sortTab);
+        memcpy(sortTab, m_pCompiler->optCSEtab, sortSiz);
+
+        if (CodeOptKind() == Compiler::SMALL_CODE)
+        {
+            qsort(sortTab, m_pCompiler->optCSECandidateCount, sizeof(*sortTab), m_pCompiler->optCSEcostCmpSz);
+        }
+        else
+        {
+            qsort(sortTab, m_pCompiler->optCSECandidateCount, sizeof(*sortTab), m_pCompiler->optCSEcostCmpEx);
+        }
+
+#ifdef DEBUG
+        if (m_pCompiler->verbose)
+        {
+            printf("\nSorted CSE candidates:\n");
+            /* Print out the CSE candidates */
+            for (unsigned cnt = 0; cnt < m_pCompiler->optCSECandidateCount; cnt++)
+            {
+                Compiler::CSEdsc* dsc  = sortTab[cnt];
+                GenTreePtr        expr = dsc->csdTree;
+
+                unsigned def;
+                unsigned use;
+
+                if (CodeOptKind() == Compiler::SMALL_CODE)
+                {
+                    def = dsc->csdDefCount; // def count
+                    use = dsc->csdUseCount; // use count (excluding the implicit uses at defs)
+                }
+                else
+                {
+                    def = dsc->csdDefWtCnt; // weighted def count
+                    use = dsc->csdUseWtCnt; // weighted use count (excluding the implicit uses at defs)
+                }
+
+                printf("CSE #%02u,cseMask=%s,useCnt=%d: [def=%3u, use=%3u", dsc->csdIndex,
+                       genES2str(genCSEnum2bit(dsc->csdIndex)), dsc->csdUseCount, def, use);
+                printf("] :: ");
+                m_pCompiler->gtDispTree(expr, nullptr, nullptr, true);
+            }
+            printf("\n");
+        }
+#endif // DEBUG
+    }
+
+    //  The following class nested within CSE_Heuristic encapsulates the information
+    //  about the current CSE candidate that is under consideration
+    //
+    //  TODO-Cleanup: This is still very much based upon the old Lexical CSE implementation
+    //  and needs to be reworked for the Value Number based implementation
+    //
+    class CSE_Candidate
+    {
+        CSE_Heuristic*    m_context;
+        Compiler::CSEdsc* m_CseDsc;
+
+        unsigned m_cseIndex;
+
+        unsigned m_defCount;
+        unsigned m_useCount;
+
+        unsigned m_Cost;
+        unsigned m_Size;
+
+    public:
+        CSE_Candidate(CSE_Heuristic* context, Compiler::CSEdsc* cseDsc) : m_context(context), m_CseDsc(cseDsc)
+        {
+            m_cseIndex = m_CseDsc->csdIndex;
+        }
+
+        Compiler::CSEdsc* CseDsc()
+        {
+            return m_CseDsc;
+        }
+        unsigned CseIndex()
+        {
+            return m_cseIndex;
+        }
+        unsigned DefCount()
+        {
+            return m_defCount;
+        }
+        unsigned UseCount()
+        {
+            return m_useCount;
+        }
+        // TODO-CQ: With ValNum CSE's the Expr and its cost can vary.
+        GenTreePtr Expr()
+        {
+            return m_CseDsc->csdTree;
+        }
+        unsigned Cost()
+        {
+            return m_Cost;
+        }
+        unsigned Size()
+        {
+            return m_Size;
+        }
+
+        bool LiveAcrossCall()
+        {
+            return (m_CseDsc->csdLiveAcrossCall != 0);
+        }
+
+        void InitializeCounts()
+        {
+            if (m_context->CodeOptKind() == Compiler::SMALL_CODE)
+            {
+                m_Cost     = Expr()->gtCostSz;      // the estimated code size
+                m_Size     = Expr()->gtCostSz;      // always the gtCostSz
+                m_defCount = m_CseDsc->csdDefCount; // def count
+                m_useCount = m_CseDsc->csdUseCount; // use count (excluding the implicit uses at defs)
+            }
+            else
+            {
+                m_Cost     = Expr()->gtCostEx;      // the estimated execution cost
+                m_Size     = Expr()->gtCostSz;      // always the gtCostSz
+                m_defCount = m_CseDsc->csdDefWtCnt; // weighted def count
+                m_useCount = m_CseDsc->csdUseWtCnt; // weighted use count (excluding the implicit uses at defs)
+            }
+        }
+    };
+
+#ifdef DEBUG
+    //------------------------------------------------------------------------
+    // optConfigBiasedCSE:
+    //     Stress mode to shuffle the decision to CSE or not using environment
+    //     variable COMPlus_JitStressBiasedCSE (= 0 to 100%). When the bias value
+    //     is not specified but COMPlus_JitStress is ON, generate a random bias.
+    //
+    // Return Value:
+    //      0 -- This method is indifferent about this CSE (no bias specified and no stress)
+    //      1 -- This CSE must be performed to maintain specified/generated bias.
+    //     -1 -- This CSE mustn't be performed to maintain specified/generated bias.
+    //
+    // Operation:
+    //     A debug stress only method that returns "1" with probability (P)
+    //     defined by:
+    //
+    //         P = (COMPlus_JitStressBiasedCSE / 100) (or)
+    //         P = (random(100) / 100) when COMPlus_JitStress is specified and
+    //                                 COMPlus_JitStressBiasedCSE is unspecified.
+    //
+    //     When specified, the bias is reinterpreted as a decimal number between 0
+    //     to 100.
+    //     When bias is not specified, a bias is randomly generated if COMPlus_JitStress
+    //     is non-zero.
+    //
+    //     Callers are supposed to call this method for each CSE promotion decision
+    //     and ignore the call if return value is 0 and honor the 1 with a CSE and
+    //     -1 with a no-CSE to maintain the specified/generated bias.
+    //
+    int optConfigBiasedCSE()
+    {
+        // Seed the PRNG, if never done before.
+        if (!m_cseRNG.IsInitialized())
+        {
+            m_cseRNG.Init(m_pCompiler->info.compMethodHash());
+            m_bias = m_cseRNG.Next(100);
+        }
+
+        // Obtain the bias value and reinterpret as decimal.
+        unsigned bias = ReinterpretHexAsDecimal(JitConfig.JitStressBiasedCSE());
+
+        // Invalid value, check if JitStress is ON.
+        if (bias > 100)
+        {
+            if (!m_pCompiler->compStressCompile(Compiler::STRESS_MAKE_CSE, MAX_STRESS_WEIGHT))
+            {
+                // JitStress is OFF for CSE, nothing to do.
+                return 0;
+            }
+            bias = m_bias;
+            JITDUMP("JitStressBiasedCSE is OFF, but JitStress is ON: generated bias=%d.\n", bias);
+        }
+
+        // Generate a number between (0, 99) and if the generated
+        // number is smaller than bias, then perform CSE.
+        unsigned gen = m_cseRNG.Next(100);
+        int      ret = (gen < bias) ? 1 : -1;
+
+        if (m_pCompiler->verbose)
+        {
+            if (ret < 0)
+            {
+                printf("No CSE because gen=%d >= bias=%d\n", gen, bias);
+            }
+            else
+            {
+                printf("Promoting CSE because gen=%d < bias=%d\n", gen, bias);
+            }
+        }
+
+        // Indicate whether to perform CSE or not.
+        return ret;
+    }
+#endif
+
+    // Given a CSE candidate decide whether it passes or fails the profitablity heuristic
+    // return true if we believe that it is profitable to promote this candidate to a CSE
+    //
+    bool PromotionCheck(CSE_Candidate* candidate)
+    {
+        bool result = false;
+
+#ifdef DEBUG
+        int stressResult = optConfigBiasedCSE();
+        if (stressResult != 0)
+        {
+            // Stress is enabled. Check whether to perform CSE or not.
+            return (stressResult > 0);
+        }
+
+        if (m_pCompiler->optConfigDisableCSE2())
+        {
+            return false; // skip this CSE
+        }
+#endif
+
+        /*
+            Our calculation is based on the following cost estimate formula
+
+            Existing costs are:
+
+            (def + use) * cost
+
+            If we introduce a CSE temp are each definition and
+            replace the use with a CSE temp then our cost is:
+
+            (def * (cost + cse-def-cost)) + (use * cse-use-cost)
+
+            We must estimate the values to use for cse-def-cost and cse-use-cost
+
+            If we are able to enregister the CSE then the cse-use-cost is one
+            and cse-def-cost is either zero or one.  Zero in the case where
+            we needed to evaluate the def into a register and we can use that
+            register as the CSE temp as well.
+
+            If we are unable to enregister the CSE then the cse-use-cost is IND_COST
+            and the cse-def-cost is also IND_COST.
+
+            If we want to be conservative we use IND_COST as the the value
+            for both cse-def-cost and cse-use-cost and then we never introduce
+            a CSE that could pessimize the execution time of the method.
+
+            If we want to be more moderate we use (IND_COST_EX + 1) / 2 as the
+            values for both cse-def-cost and cse-use-cost.
+
+            If we want to be aggressive we use 1 as the values for both
+            cse-def-cost and cse-use-cost.
+
+            If we believe that the CSE very valuable in terms of weighted ref counts
+            such that it would always be enregistered by the register allocator we choose
+            the aggressive use def costs.
+
+            If we believe that the CSE is somewhat valuable in terms of weighted ref counts
+            such that it could be likely be enregistered by the register allocator we choose
+            the moderate use def costs.
+
+            otherwise we choose the conservative use def costs.
+
+        */
+
+        unsigned cse_def_cost;
+        unsigned cse_use_cost;
+
+        unsigned no_cse_cost    = 0;
+        unsigned yes_cse_cost   = 0;
+        unsigned extra_yes_cost = 0;
+        unsigned extra_no_cost  = 0;
+
+        // The 'cseRefCnt' is the RefCnt that we will have if we promote this CSE into a new LclVar
+        // Each CSE Def will contain two Refs and each CSE Use wil have one Ref of this new LclVar
+        unsigned cseRefCnt = (candidate->DefCount() * 2) + candidate->UseCount();
+
+        if (CodeOptKind() == Compiler::SMALL_CODE)
+        {
+            if (cseRefCnt >= aggressiveRefCnt)
+            {
+#ifdef DEBUG
+                if (m_pCompiler->verbose)
+                {
+                    printf("Aggressive CSE Promotion (%u >= %u)\n", cseRefCnt, aggressiveRefCnt);
+                }
+#endif
+                cse_def_cost = 1;
+                cse_use_cost = 1;
+
+                if (candidate->LiveAcrossCall() != 0)
+                {
+                    if (largeFrame)
+                    {
+                        cse_def_cost++;
+                        cse_use_cost++;
+                    }
+                    if (hugeFrame)
+                    {
+                        cse_def_cost++;
+                        cse_use_cost++;
+                    }
+                }
+            }
+            else if (largeFrame)
+            {
+#ifdef DEBUG
+                if (m_pCompiler->verbose)
+                {
+                    printf("Codesize CSE Promotion (large frame)\n");
+                }
+#endif
+#ifdef _TARGET_XARCH_
+                /* The following formula is good choice when optimizing CSE for SMALL_CODE */
+                cse_def_cost = 6; // mov [EBP-0x00001FC],reg
+                cse_use_cost = 5; //     [EBP-0x00001FC]
+#else                             // _TARGET_ARM_
+                if (hugeFrame)
+                {
+                    cse_def_cost = 12; // movw/movt r10 and str reg,[sp+r10]
+                    cse_use_cost = 12;
+                }
+                else
+                {
+                    cse_def_cost = 8; // movw r10 and str reg,[sp+r10]
+                    cse_use_cost = 8;
+                }
+#endif
+            }
+            else // small frame
+            {
+#ifdef DEBUG
+                if (m_pCompiler->verbose)
+                {
+                    printf("Codesize CSE Promotion (small frame)\n");
+                }
+#endif
+#ifdef _TARGET_XARCH_
+                /* The following formula is good choice when optimizing CSE for SMALL_CODE */
+                cse_def_cost = 3; // mov [EBP-1C],reg
+                cse_use_cost = 2; //     [EBP-1C]
+#else                             // _TARGET_ARM_
+                cse_def_cost = 2; // str reg,[sp+0x9c]
+                cse_use_cost = 2; // ldr reg,[sp+0x9c]
+#endif
+            }
+        }
+        else // not SMALL_CODE ...
+        {
+            if (cseRefCnt >= aggressiveRefCnt)
+            {
+#ifdef DEBUG
+                if (m_pCompiler->verbose)
+                {
+                    printf("Aggressive CSE Promotion (%u >= %u)\n", cseRefCnt, aggressiveRefCnt);
+                }
+#endif
+                cse_def_cost = 1;
+                cse_use_cost = 1;
+            }
+            else if (cseRefCnt >= moderateRefCnt)
+            {
+
+                if (candidate->LiveAcrossCall() == 0)
+                {
+#ifdef DEBUG
+                    if (m_pCompiler->verbose)
+                    {
+                        printf("Moderate CSE Promotion (CSE never live at call) (%u >= %u)\n", cseRefCnt,
+                               moderateRefCnt);
+                    }
+#endif
+                    cse_def_cost = 2;
+                    cse_use_cost = 1;
+                }
+                else // candidate is live across call
+                {
+#ifdef DEBUG
+                    if (m_pCompiler->verbose)
+                    {
+                        printf("Moderate CSE Promotion (%u >= %u)\n", cseRefCnt, moderateRefCnt);
+                    }
+#endif
+                    cse_def_cost   = 2;
+                    cse_use_cost   = 2;
+                    extra_yes_cost = BB_UNITY_WEIGHT * 2; // Extra cost in case we have to spill/restore a caller
+                                                          // saved register
+                }
+            }
+            else // Conservative CSE promotion
+            {
+                if (candidate->LiveAcrossCall() == 0)
+                {
+#ifdef DEBUG
+                    if (m_pCompiler->verbose)
+                    {
+                        printf("Conservative CSE Promotion (CSE never live at call) (%u < %u)\n", cseRefCnt,
+                               moderateRefCnt);
+                    }
+#endif
+                    cse_def_cost = 2;
+                    cse_use_cost = 2;
+                }
+                else // candidate is live across call
+                {
+#ifdef DEBUG
+                    if (m_pCompiler->verbose)
+                    {
+                        printf("Conservative CSE Promotion (%u < %u)\n", cseRefCnt, moderateRefCnt);
+                    }
+#endif
+                    cse_def_cost   = 3;
+                    cse_use_cost   = 3;
+                    extra_yes_cost = BB_UNITY_WEIGHT * 4; // Extra cost in case we have to spill/restore a caller
+                                                          // saved register
+                }
+
+                // If we have maxed out lvaTrackedCount then this CSE may end up as an untracked variable
+                if (m_pCompiler->lvaTrackedCount == lclMAX_TRACKED)
+                {
+                    cse_def_cost++;
+                    cse_use_cost++;
+                }
+            }
+
+            if (largeFrame)
+            {
+                cse_def_cost++;
+                cse_use_cost++;
+            }
+            if (hugeFrame)
+            {
+                cse_def_cost++;
+                cse_use_cost++;
+            }
+        }
+
+        // estimate the cost from lost codesize reduction if we do not perform the CSE
+        if (candidate->Size() > cse_use_cost)
+        {
+            Compiler::CSEdsc* dsc = candidate->CseDsc(); // We need to retrieve the actual use count, not the
+                                                         // weighted count
+            extra_no_cost = candidate->Size() - cse_use_cost;
+            extra_no_cost = extra_no_cost * dsc->csdUseCount * 2;
+        }
+
+        /* no_cse_cost  is the cost estimate when we decide not to make a CSE */
+        /* yes_cse_cost is the cost estimate when we decide to make a CSE     */
+
+        no_cse_cost  = candidate->UseCount() * candidate->Cost();
+        yes_cse_cost = (candidate->DefCount() * cse_def_cost) + (candidate->UseCount() * cse_use_cost);
+
+#if CPU_LONG_USES_REGPAIR
+        if (candidate->Expr()->TypeGet() == TYP_LONG)
+        {
+            yes_cse_cost *= 2;
+        }
+#endif
+        no_cse_cost += extra_no_cost;
+        yes_cse_cost += extra_yes_cost;
+
+#ifdef DEBUG
+        if (m_pCompiler->verbose)
+        {
+            printf("cseRefCnt=%d, aggressiveRefCnt=%d, moderateRefCnt=%d\n", cseRefCnt, aggressiveRefCnt,
+                   moderateRefCnt);
+            printf("defCnt=%d, useCnt=%d, cost=%d, size=%d\n", candidate->DefCount(), candidate->UseCount(),
+                   candidate->Cost(), candidate->Size());
+            printf("def_cost=%d, use_cost=%d, extra_no_cost=%d, extra_yes_cost=%d\n", cse_def_cost, cse_use_cost,
+                   extra_no_cost, extra_yes_cost);
+
+            printf("CSE cost savings check (%u >= %u) %s\n", no_cse_cost, yes_cse_cost,
+                   (no_cse_cost >= yes_cse_cost) ? "passes" : "fails");
+        }
+#endif
+
+        // Should we make this candidate into a CSE?
+        // Is the yes cost less than the no cost
+        //
+        if (yes_cse_cost <= no_cse_cost)
+        {
+            result = true; // Yes make this a CSE
+        }
+        else
+        {
+            /* In stress mode we will make some extra CSEs */
+            if (no_cse_cost > 0)
+            {
+                int percentage = (no_cse_cost * 100) / yes_cse_cost;
+
+                if (m_pCompiler->compStressCompile(Compiler::STRESS_MAKE_CSE, percentage))
+                {
+                    result = true; // Yes make this a CSE
+                }
+            }
+        }
+
+        return result;
+    }
+
+    // PerformCSE() takes a successful candidate and performs  the appropriate replacements:
+    //
+    // It will replace all of the CSE defs with assignments to a new "cse0" LclVar
+    // and will replace all of the CSE uses with reads of the "cse0" LclVar
+    //
+    void PerformCSE(CSE_Candidate* successfulCandidate)
+    {
+        unsigned cseRefCnt = (successfulCandidate->DefCount() * 2) + successfulCandidate->UseCount();
+
+        if (successfulCandidate->LiveAcrossCall() != 0)
+        {
+            // As we introduce new LclVars for these CSE we slightly
+            // increase the cutoffs for aggressive and moderate CSE's
+            //
+            int incr = BB_UNITY_WEIGHT;
+
+#if CPU_LONG_USES_REGPAIR
+            if (successfulCandidate->Expr()->TypeGet() == TYP_LONG)
+                incr *= 2;
+#endif
+
+            if (cseRefCnt > aggressiveRefCnt)
+            {
+                aggressiveRefCnt += incr;
+            }
+
+            if (cseRefCnt > moderateRefCnt)
+            {
+                moderateRefCnt += (incr / 2);
+            }
+        }
+
+        /* Introduce a new temp for the CSE */
+
+        // we will create a  long lifetime temp for the new cse LclVar
+        unsigned  cseLclVarNum = m_pCompiler->lvaGrabTemp(false DEBUGARG("ValNumCSE"));
+        var_types cseLclVarTyp = genActualType(successfulCandidate->Expr()->TypeGet());
+        if (varTypeIsStruct(cseLclVarTyp))
+        {
+            m_pCompiler->lvaSetStruct(cseLclVarNum, m_pCompiler->gtGetStructHandle(successfulCandidate->Expr()), false);
+        }
+        m_pCompiler->lvaTable[cseLclVarNum].lvType  = cseLclVarTyp;
+        m_pCompiler->lvaTable[cseLclVarNum].lvIsCSE = true;
+
+        m_addCSEcount++; // Record that we created a new LclVar for use as a CSE temp
+        m_pCompiler->optCSEcount++;
+
+        /*  Walk all references to this CSE, adding an assignment
+            to the CSE temp to all defs and changing all refs to
+            a simple use of the CSE temp.
+
+            We also unmark nested CSE's for all uses.
+        */
+
+        Compiler::treeStmtLstPtr lst;
+        lst = successfulCandidate->CseDsc()->csdTreeList;
+        noway_assert(lst);
+
+#define QQQ_CHECK_CSE_VNS 0
+#if QQQ_CHECK_CSE_VNS
+        assert(lst != NULL);
+        ValueNum firstVN = lst->tslTree->gtVN;
+        lst              = lst->tslNext;
+        bool allSame     = true;
+        while (lst != NULL)
+        {
+            if (IS_CSE_INDEX(lst->tslTree->gtCSEnum))
+            {
+                if (lst->tslTree->gtVN != firstVN)
+                {
+                    allSame = false;
+                    break;
+                }
+            }
+            lst = lst->tslNext;
+        }
+        if (!allSame)
+        {
+            lst                  = dsc->csdTreeList;
+            GenTreePtr firstTree = lst->tslTree;
+            printf("In %s, CSE (oper = %s, type = %s) has differing VNs: ", info.compFullName,
+                   GenTree::NodeName(firstTree->OperGet()), varTypeName(firstTree->TypeGet()));
+            while (lst != NULL)
+            {
+                if (IS_CSE_INDEX(lst->tslTree->gtCSEnum))
+                {
+                    printf("0x%x(%s,%d)    ", lst->tslTree, IS_CSE_USE(lst->tslTree->gtCSEnum) ? "u" : "d",
+                           lst->tslTree->gtVN);
+                }
+                lst = lst->tslNext;
+            }
+            printf("\n");
+        }
+        lst = dsc->csdTreeList;
+#endif
+
+        do
+        {
+            /* Process the next node in the list */
+            GenTreePtr exp = lst->tslTree;
+            GenTreePtr stm = lst->tslStmt;
+            noway_assert(stm->gtOper == GT_STMT);
+            BasicBlock* blk = lst->tslBlock;
+
+            /* Advance to the next node in the list */
+            lst = lst->tslNext;
+
+            // Assert if we used DEBUG_DESTROY_NODE on this CSE exp
+            assert(exp->gtOper != GT_COUNT);
+
+            /* Ignore the node if it's not been marked as a CSE */
+            if (!IS_CSE_INDEX(exp->gtCSEnum))
+            {
+                continue;
+            }
+
+            /* Make sure we update the weighted ref count correctly */
+            m_pCompiler->optCSEweight = blk->getBBWeight(m_pCompiler);
+
+            /* Figure out the actual type of the value */
+            var_types expTyp = genActualType(exp->TypeGet());
+            noway_assert(expTyp == cseLclVarTyp);
+
+            // This will contain the replacement tree for exp
+            // It will either be the CSE def or CSE ref
+            //
+            GenTreePtr    cse = nullptr;
+            bool          isDef;
+            FieldSeqNode* fldSeq               = nullptr;
+            bool          hasZeroMapAnnotation = m_pCompiler->GetZeroOffsetFieldMap()->Lookup(exp, &fldSeq);
+
+            if (IS_CSE_USE(exp->gtCSEnum))
+            {
+                /* This is a use of the CSE */
+                isDef = false;
+#ifdef DEBUG
+                if (m_pCompiler->verbose)
+                {
+                    printf("\nCSE #%02u use at ", exp->gtCSEnum);
+                    Compiler::printTreeID(exp);
+                    printf(" replaced in BB%02u with temp use.\n", blk->bbNum);
+                }
+#endif // DEBUG
+
+                /* check for and collect any SIDE_EFFECTS */
+                GenTreePtr sideEffList = nullptr;
+
+                if (exp->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS)
+                {
+                    // Extract any side effects from exp
+                    //
+                    m_pCompiler->gtExtractSideEffList(exp, &sideEffList, GTF_PERSISTENT_SIDE_EFFECTS_IN_CSE);
+                }
+
+                // We will replace the CSE ref with a new tree
+                // this is typically just a simple use of the new CSE LclVar
+                //
+                cse           = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp);
+                cse->gtVNPair = exp->gtVNPair; // assign the proper Value Numbers
+#ifdef DEBUG
+                cse->gtDebugFlags |= GTF_DEBUG_VAR_CSE_REF;
+#endif // DEBUG
+
+                // If we have side effects then we need to create a GT_COMMA tree instead
+                //
+                if (sideEffList)
+                {
+                    noway_assert(sideEffList->gtFlags & GTF_SIDE_EFFECT);
+#ifdef DEBUG
+                    if (m_pCompiler->verbose)
+                    {
+                        printf("\nThe CSE has side effects! Extracting side effects...\n");
+                        m_pCompiler->gtDispTree(sideEffList);
+                        printf("\n");
+                    }
+#endif
+
+                    GenTreePtr     cseVal         = cse;
+                    GenTreePtr     curSideEff     = sideEffList;
+                    ValueNumStore* vnStore        = m_pCompiler->vnStore;
+                    ValueNumPair   exceptions_vnp = ValueNumStore::VNPForEmptyExcSet();
+
+                    while ((curSideEff->OperGet() == GT_COMMA) || (curSideEff->OperGet() == GT_ASG))
+                    {
+                        GenTreePtr op1 = curSideEff->gtOp.gtOp1;
+                        GenTreePtr op2 = curSideEff->gtOp.gtOp2;
+
+                        ValueNumPair op1vnp;
+                        ValueNumPair op1Xvnp = ValueNumStore::VNPForEmptyExcSet();
+                        vnStore->VNPUnpackExc(op1->gtVNPair, &op1vnp, &op1Xvnp);
+
+                        exceptions_vnp = vnStore->VNPExcSetUnion(exceptions_vnp, op1Xvnp);
+                        curSideEff     = op2;
+                    }
+
+                    // We may have inserted a narrowing cast during a previous remorph
+                    // and it will not have a value number.
+                    if ((curSideEff->OperGet() == GT_CAST) && !curSideEff->gtVNPair.BothDefined())
+                    {
+                        // The inserted cast will have no exceptional effects
+                        assert(curSideEff->gtOverflow() == false);
+                        // Process the exception effects from the cast's operand.
+                        curSideEff = curSideEff->gtOp.gtOp1;
+                    }
+
+                    ValueNumPair op2vnp;
+                    ValueNumPair op2Xvnp = ValueNumStore::VNPForEmptyExcSet();
+                    vnStore->VNPUnpackExc(curSideEff->gtVNPair, &op2vnp, &op2Xvnp);
+                    exceptions_vnp = vnStore->VNPExcSetUnion(exceptions_vnp, op2Xvnp);
+
+                    op2Xvnp = ValueNumStore::VNPForEmptyExcSet();
+                    vnStore->VNPUnpackExc(cseVal->gtVNPair, &op2vnp, &op2Xvnp);
+                    exceptions_vnp = vnStore->VNPExcSetUnion(exceptions_vnp, op2Xvnp);
+
+                    /* Create a comma node with the sideEffList as op1 */
+                    cse           = m_pCompiler->gtNewOperNode(GT_COMMA, expTyp, sideEffList, cseVal);
+                    cse->gtVNPair = vnStore->VNPWithExc(op2vnp, exceptions_vnp);
+                }
+
+                exp->gtCSEnum = NO_CSE; // clear the gtCSEnum field
+
+                /* Unmark any nested CSE's in the sub-operands */
+
+                // But we do need to communicate the side effect list to optUnmarkCSEs
+                // as any part of the 'exp' tree that is in the sideEffList is preserved
+                // and is not deleted and does not have its ref counts decremented
+                //
+                m_pCompiler->optValnumCSE_UnmarkCSEs(exp, sideEffList);
+            }
+            else
+            {
+                /* This is a def of the CSE */
+                isDef = true;
+#ifdef DEBUG
+                if (m_pCompiler->verbose)
+                {
+                    printf("\nCSE #%02u def at ", GET_CSE_INDEX(exp->gtCSEnum));
+                    Compiler::printTreeID(exp);
+                    printf(" replaced in BB%02u with def of V%02u\n", blk->bbNum, cseLclVarNum);
+                }
+#endif // DEBUG
+
+                exp->gtCSEnum = NO_CSE; // clear the gtCSEnum field
+
+                GenTreePtr val = exp;
+
+                /* Create an assignment of the value to the temp */
+                GenTreePtr asg = m_pCompiler->gtNewTempAssign(cseLclVarNum, val);
+
+                // assign the proper Value Numbers
+                asg->gtVNPair.SetBoth(ValueNumStore::VNForVoid()); // The GT_ASG node itself is $VN.Void
+                asg->gtOp.gtOp1->gtVNPair = val->gtVNPair;         // The dest op is the same as 'val'
+
+                noway_assert(asg->gtOp.gtOp1->gtOper == GT_LCL_VAR);
+                noway_assert(asg->gtOp.gtOp2 == val);
+
+                /* Create a reference to the CSE temp */
+                GenTreePtr ref = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp);
+                ref->gtVNPair  = val->gtVNPair; // The new 'ref' is the same as 'val'
+
+                // If it has a zero-offset field seq, copy annotation to the ref
+                if (hasZeroMapAnnotation)
+                {
+                    m_pCompiler->GetZeroOffsetFieldMap()->Set(ref, fldSeq);
+                }
+
+                /* Create a comma node for the CSE assignment */
+                cse           = m_pCompiler->gtNewOperNode(GT_COMMA, expTyp, asg, ref);
+                cse->gtVNPair = ref->gtVNPair; // The comma's value is the same as 'val'
+                                               // as the assignment to the CSE LclVar
+                                               // cannot add any new exceptions
+            }
+
+            // Increment ref count for the CSE ref
+            m_pCompiler->lvaTable[cseLclVarNum].incRefCnts(blk->getBBWeight(m_pCompiler), m_pCompiler);
+
+            if (isDef)
+            {
+                // Also increment ref count for the CSE assignment
+                m_pCompiler->lvaTable[cseLclVarNum].incRefCnts(blk->getBBWeight(m_pCompiler), m_pCompiler);
+            }
+
+            // Walk the statement 'stm' and find the pointer
+            // in the tree is pointing to 'exp'
+            //
+            GenTreePtr* link = m_pCompiler->gtFindLink(stm, exp);
+
+#ifdef DEBUG
+            if (link == nullptr)
+            {
+                printf("\ngtFindLink failed: stm=");
+                Compiler::printTreeID(stm);
+                printf(", exp=");
+                Compiler::printTreeID(exp);
+                printf("\n");
+                printf("stm =");
+                m_pCompiler->gtDispTree(stm);
+                printf("\n");
+                printf("exp =");
+                m_pCompiler->gtDispTree(exp);
+                printf("\n");
+            }
+#endif // DEBUG
+
+            noway_assert(link);
+
+            // Mutate this link, thus replacing the old exp with the new cse representation
+            //
+            *link = cse;
+
+            // If it has a zero-offset field seq, copy annotation.
+            if (hasZeroMapAnnotation)
+            {
+                m_pCompiler->GetZeroOffsetFieldMap()->Set(cse, fldSeq);
+            }
+
+            assert(m_pCompiler->fgRemoveRestOfBlock == false);
+
+            /* re-morph the statement */
+            m_pCompiler->fgMorphBlockStmt(blk, stm DEBUGARG("optValnumCSE"));
+
+        } while (lst != nullptr);
+    }
+
+    // Consider each of the CSE candidates and if the CSE passes
+    // the PromotionCheck then transform the CSE by calling PerformCSE
+    //
+    void ConsiderCandidates()
+    {
+        /* Consider each CSE candidate, in order of decreasing cost */
+        unsigned           cnt = m_pCompiler->optCSECandidateCount;
+        Compiler::CSEdsc** ptr = sortTab;
+        for (; (cnt > 0); cnt--, ptr++)
+        {
+            Compiler::CSEdsc* dsc = *ptr;
+            CSE_Candidate     candidate(this, dsc);
+
+            candidate.InitializeCounts();
+
+            if (candidate.UseCount() == 0)
+            {
+#ifdef DEBUG
+                if (m_pCompiler->verbose)
+                {
+                    printf("Skipped CSE #%02u because use count is 0\n", candidate.CseIndex());
+                }
+#endif
+                continue;
+            }
+
+#ifdef DEBUG
+            if (m_pCompiler->verbose)
+            {
+                printf("\nConsidering CSE #%02u [def=%2u, use=%2u, cost=%2u] CSE Expression:\n", candidate.CseIndex(),
+                       candidate.DefCount(), candidate.UseCount(), candidate.Cost());
+                m_pCompiler->gtDispTree(candidate.Expr());
+                printf("\n");
+            }
+#endif
+
+            if ((dsc->csdDefCount <= 0) || (dsc->csdUseCount == 0))
+            {
+                // If we reach this point, then the CSE def was incorrectly marked or the
+                // block with this use is unreachable. So skip and go to the next CSE.
+                // Without the "continue", we'd generate bad code in retail.
+                // Commented out a noway_assert(false) here due to bug: 3290124.
+                // The problem is if there is sub-graph that is not reachable from the
+                // entry point, the CSE flags propagated, would be incorrect for it.
+                continue;
+            }
+
+            bool doCSE = PromotionCheck(&candidate);
+
+#ifdef DEBUG
+            if (m_pCompiler->verbose)
+            {
+                if (doCSE)
+                {
+                    printf("\nPromoting CSE:\n");
+                }
+                else
+                {
+                    printf("Did Not promote this CSE\n");
+                }
+            }
+#endif // DEBUG
+
+            if (doCSE)
+            {
+                PerformCSE(&candidate);
+            }
+        }
+    }
+
+    // Perform the necessary cleanup after our CSE heuristics have run
+    //
+    void Cleanup()
+    {
+        if (m_addCSEcount > 0)
+        {
+            /* We've added new local variables to the lvaTable so note that we need to recreate the sorted table */
+            m_pCompiler->lvaSortAgain = true;
+        }
+    }
+};
+
+/*****************************************************************************
+ *
+ *  Routine for performing the Value Number based CSE using our heuristics
+ */
+
+void Compiler::optValnumCSE_Heuristic()
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\n************ Trees at start of optValnumCSE_Heuristic()\n");
+        fgDumpTrees(fgFirstBB, nullptr);
+        printf("\n");
+    }
+#endif // DEBUG
+
+    CSE_Heuristic cse_heuristic(this);
+
+    cse_heuristic.Initialize();
+    cse_heuristic.SortCandidates();
+    cse_heuristic.ConsiderCandidates();
+    cse_heuristic.Cleanup();
+}
+
+/*****************************************************************************
+ *
+ *  Routine to unmark any CSEs contained within a tree
+ *   - optionally a 'keepList' vcan be provided to specify a list of trees that will be kept
+ *
+ */
+
+void Compiler::optValnumCSE_UnmarkCSEs(GenTreePtr deadTree, GenTreePtr keepList)
+{
+    assert(optValnumCSE_phase);
+
+    // We need to communicate the 'keepList' to optUnmarkCSEs
+    // as any part of the 'deadTree' tree that is in the keepList is preserved
+    // and is not deleted and does not have its ref counts decremented
+    // We communicate this value using the walkData.pCallbackData field
+    //
+
+    fgWalkTreePre(&deadTree, optUnmarkCSEs, (void*)keepList);
+}
+
+/*****************************************************************************
+ *
+ *  Perform common sub-expression elimination.
+ */
+
+void Compiler::optOptimizeValnumCSEs()
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\n*************** In optOptimizeValnumCSEs()\n");
+    }
+
+    if (optConfigDisableCSE())
+    {
+        return; // Disabled by JitNoCSE
+    }
+#endif
+
+    optValnumCSE_phase = true;
+
+    /* Initialize the expression tracking logic */
+
+    optValnumCSE_Init();
+
+    /* Locate interesting expressions and assign indices to them */
+
+    if (optValnumCSE_Locate() > 0)
+    {
+        optCSECandidateTotal += optCSECandidateCount;
+
+        optValnumCSE_InitDataFlow();
+
+        optValnumCSE_DataFlow();
+
+        optValnumCSE_Availablity();
+
+        optValnumCSE_Heuristic();
+    }
+
+    optValnumCSE_phase = false;
+}
+
+#endif // FEATURE_VALNUM_CSE
+
+/*****************************************************************************
+ *
+ *  The following determines whether the given expression is a worthy CSE
+ *  candidate.
+ */
+bool Compiler::optIsCSEcandidate(GenTreePtr tree)
+{
+    /* No good if the expression contains side effects or if it was marked as DONT CSE */
+
+    if (tree->gtFlags & (GTF_ASG | GTF_DONT_CSE))
+    {
+        return false;
+    }
+
+    /* The only reason a TYP_STRUCT tree might occur is as an argument to
+       GT_ADDR. It will never be actually materialized. So ignore them.
+       Also TYP_VOIDs */
+
+    var_types  type = tree->TypeGet();
+    genTreeOps oper = tree->OperGet();
+
+    // TODO-1stClassStructs: Enable CSE for struct types (depends on either transforming
+    // to use regular assignments, or handling copyObj.
+    if (varTypeIsStruct(type) || type == TYP_VOID)
+    {
+        return false;
+    }
+
+#ifdef _TARGET_X86_
+    if (type == TYP_FLOAT)
+    {
+        // TODO-X86-CQ: Revisit this
+        // Don't CSE a TYP_FLOAT on x86 as we currently can only enregister doubles
+        return false;
+    }
+#else
+    if (oper == GT_CNS_DBL)
+    {
+        // TODO-CQ: Revisit this
+        // Don't try to CSE a GT_CNS_DBL as they can represent both float and doubles
+        return false;
+    }
+#endif
+
+    unsigned cost;
+    if (compCodeOpt() == SMALL_CODE)
+    {
+        cost = tree->gtCostSz;
+    }
+    else
+    {
+        cost = tree->gtCostEx;
+    }
+
+    /* Don't bother if the potential savings are very low */
+    if (cost < MIN_CSE_COST)
+    {
+        return false;
+    }
+
+#if !CSE_CONSTS
+    /* Don't bother with constants */
+    if (tree->OperKind() & GTK_CONST)
+        return false;
+#endif
+
+    /* Check for some special cases */
+
+    switch (oper)
+    {
+        case GT_CALL:
+            // If we have a simple helper call with no other persistent side-effects
+            // then we allow this tree to be a CSE candidate
+            //
+            if (gtTreeHasSideEffects(tree, GTF_PERSISTENT_SIDE_EFFECTS_IN_CSE) == false)
+            {
+                return true;
+            }
+            else
+            {
+                // Calls generally cannot be CSE-ed
+                return false;
+            }
+
+        case GT_IND:
+            // TODO-CQ: Review this...
+            /* We try to cse GT_ARR_ELEM nodes instead of GT_IND(GT_ARR_ELEM).
+                Doing the first allows cse to also kick in for code like
+                "GT_IND(GT_ARR_ELEM) = GT_IND(GT_ARR_ELEM) + xyz", whereas doing
+                the second would not allow it */
+
+            return (tree->gtOp.gtOp1->gtOper != GT_ARR_ELEM);
+
+        case GT_CNS_INT:
+        case GT_CNS_LNG:
+        case GT_CNS_DBL:
+        case GT_CNS_STR:
+            return true; // We reach here only when CSE_CONSTS is enabled
+
+        case GT_ARR_ELEM:
+        case GT_ARR_LENGTH:
+        case GT_CLS_VAR:
+        case GT_LCL_FLD:
+            return true;
+
+        case GT_LCL_VAR:
+            return false; // Can't CSE a volatile LCL_VAR
+
+        case GT_NEG:
+        case GT_NOT:
+        case GT_CAST:
+            return true; // CSE these Unary Operators
+
+        case GT_SUB:
+        case GT_DIV:
+        case GT_MOD:
+        case GT_UDIV:
+        case GT_UMOD:
+        case GT_OR:
+        case GT_AND:
+        case GT_XOR:
+        case GT_RSH:
+        case GT_RSZ:
+        case GT_ROL:
+        case GT_ROR:
+            return true; // CSE these Binary Operators
+
+        case GT_ADD: // Check for ADDRMODE flag on these Binary Operators
+        case GT_MUL:
+        case GT_LSH:
+            if ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0)
+            {
+                return false;
+            }
+
+        case GT_EQ:
+        case GT_NE:
+        case GT_LT:
+        case GT_LE:
+        case GT_GE:
+        case GT_GT:
+            return true; // Also CSE these Comparison Operators
+
+        case GT_INTRINSIC:
+            return true; // Intrinsics
+
+        case GT_COMMA:
+            return true; // Allow GT_COMMA nodes to be CSE-ed.
+
+        case GT_COLON:
+        case GT_QMARK:
+        case GT_NOP:
+        case GT_RETURN:
+            return false; // Currently the only special nodes that we hit
+                          // that we know that we don't want to CSE
+
+        default:
+            break; // Any new nodes that we might add later...
+    }
+
+    return false;
+}
+
+#ifdef DEBUG
+//
+// A Debug only method that allows you to control whether the CSE logic is enabled for this method.
+//
+// If this method returns false then the CSE phase should be performed.
+// If the method returns true then the CSE phase should be skipped.
+//
+bool Compiler::optConfigDisableCSE()
+{
+    // Next check if COMPlus_JitNoCSE is set and applies to this method
+    //
+    unsigned jitNoCSE = JitConfig.JitNoCSE();
+
+    if (jitNoCSE > 0)
+    {
+        unsigned methodCount = Compiler::jitTotalMethodCompiled;
+        if ((jitNoCSE & 0xF000000) == 0xF000000)
+        {
+            unsigned methodCountMask = methodCount & 0xFFF;
+            unsigned bitsZero        = (jitNoCSE >> 12) & 0xFFF;
+            unsigned bitsOne         = (jitNoCSE >> 0) & 0xFFF;
+
+            if (((methodCountMask & bitsOne) == bitsOne) && ((~methodCountMask & bitsZero) == bitsZero))
+            {
+                if (verbose)
+                {
+                    printf(" Disabled by JitNoCSE methodCountMask\n");
+                }
+
+                return true; // The CSE phase for this method is disabled
+            }
+        }
+        else if (jitNoCSE <= (methodCount + 1))
+        {
+            if (verbose)
+            {
+                printf(" Disabled by JitNoCSE > methodCount\n");
+            }
+
+            return true; // The CSE phase for this method is disabled
+        }
+    }
+
+    return false;
+}
+
+//
+// A Debug only method that allows you to control whether the CSE logic is enabled for
+// a particular CSE in a method
+//
+// If this method returns false then the CSE should be performed.
+// If the method returns true then the CSE should be skipped.
+//
+bool Compiler::optConfigDisableCSE2()
+{
+    static unsigned totalCSEcount = 0;
+
+    unsigned jitNoCSE2 = JitConfig.JitNoCSE2();
+
+    totalCSEcount++;
+
+    if (jitNoCSE2 > 0)
+    {
+        if ((jitNoCSE2 & 0xF000000) == 0xF000000)
+        {
+            unsigned totalCSEMask = totalCSEcount & 0xFFF;
+            unsigned bitsZero     = (jitNoCSE2 >> 12) & 0xFFF;
+            unsigned bitsOne      = (jitNoCSE2 >> 0) & 0xFFF;
+
+            if (((totalCSEMask & bitsOne) == bitsOne) && ((~totalCSEMask & bitsZero) == bitsZero))
+            {
+                if (verbose)
+                {
+                    printf(" Disabled by jitNoCSE2 Ones/Zeros mask\n");
+                }
+                return true;
+            }
+        }
+        else if ((jitNoCSE2 & 0xF000000) == 0xE000000)
+        {
+            unsigned totalCSEMask = totalCSEcount & 0xFFF;
+            unsigned disableMask  = jitNoCSE2 & 0xFFF;
+
+            disableMask >>= (totalCSEMask % 12);
+
+            if (disableMask & 1)
+            {
+                if (verbose)
+                {
+                    printf(" Disabled by jitNoCSE2 rotating disable mask\n");
+                }
+                return true;
+            }
+        }
+        else if (jitNoCSE2 <= totalCSEcount)
+        {
+            if (verbose)
+            {
+                printf(" Disabled by jitNoCSE2 > totalCSEcount\n");
+            }
+            return true;
+        }
+    }
+    return false;
+}
+#endif
+
+void Compiler::optOptimizeCSEs()
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\n*************** In optOptimizeCSEs()\n");
+        printf("Blocks/Trees at start of optOptimizeCSE phase\n");
+        fgDispBasicBlocks(true);
+    }
+#endif // DEBUG
+
+    optCSECandidateCount = 0;
+    optCSEstart          = lvaCount;
+
+#if FEATURE_VALNUM_CSE
+    INDEBUG(optEnsureClearCSEInfo());
+    optOptimizeValnumCSEs();
+    EndPhase(PHASE_OPTIMIZE_VALNUM_CSES);
+#endif // FEATURE_VALNUM_CSE
+}
+
+/*****************************************************************************
+ *
+ *  Cleanup after CSE to allow us to run more than once.
+ */
+
+void Compiler::optCleanupCSEs()
+{
+    // We must clear the BBF_VISITED and BBF_MARKED flags
+    //
+    for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+    {
+        unsigned blkFlags = block->bbFlags;
+
+        // And clear all the "visited" bits on the block
+        //
+        block->bbFlags &= ~(BBF_VISITED | BBF_MARKED);
+
+        /* Walk the statement trees in this basic block */
+
+        GenTreePtr stmt;
+
+        // Initialize 'stmt' to the first non-Phi statement
+        stmt = block->FirstNonPhiDef();
+
+        for (; stmt; stmt = stmt->gtNext)
+        {
+            noway_assert(stmt->gtOper == GT_STMT);
+
+            /* We must clear the gtCSEnum field */
+            for (GenTreePtr tree = stmt->gtStmt.gtStmtExpr; tree; tree = tree->gtPrev)
+            {
+                tree->gtCSEnum = NO_CSE;
+            }
+        }
+    }
+}
+
+#ifdef DEBUG
+
+/*****************************************************************************
+ *
+ *  Ensure that all the CSE information in the IR is initialized the way we expect it,
+ *  before running a CSE phase. This is basically an assert that optCleanupCSEs() is not needed.
+ */
+
+void Compiler::optEnsureClearCSEInfo()
+{
+    for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+    {
+        assert((block->bbFlags & (BBF_VISITED | BBF_MARKED)) == 0);
+
+        /* Walk the statement trees in this basic block */
+
+        GenTreePtr stmt;
+
+        // Initialize 'stmt' to the first non-Phi statement
+        stmt = block->FirstNonPhiDef();
+
+        for (; stmt; stmt = stmt->gtNext)
+        {
+            assert(stmt->gtOper == GT_STMT);
+
+            for (GenTreePtr tree = stmt->gtStmt.gtStmtExpr; tree; tree = tree->gtPrev)
+            {
+                assert(tree->gtCSEnum == NO_CSE);
+            }
+        }
+    }
+}
+
+#endif // DEBUG
+
+/*****************************************************************************/
+#endif // FEATURE_ANYCSE
+/*****************************************************************************/
diff --git a/src/jit/optimizer.cpp b/src/jit/optimizer.cpp
new file mode 100644
index 0000000000..0fbdb27770
--- /dev/null
+++ b/src/jit/optimizer.cpp
@@ -0,0 +1,8540 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                              Optimizer                                    XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#pragma warning(disable : 4701)
+#endif
+
+/*****************************************************************************/
+
+#if COUNT_RANGECHECKS
+/* static */
+unsigned Compiler::optRangeChkRmv = 0;
+/* static */
+unsigned Compiler::optRangeChkAll = 0;
+#endif
+
+/*****************************************************************************/
+
+void Compiler::optInit()
+{
+    optLoopsMarked = false;
+    fgHasLoops     = false;
+
+    /* Initialize the # of tracked loops to 0 */
+    optLoopCount = 0;
+    /* Keep track of the number of calls and indirect calls made by this method */
+    optCallCount         = 0;
+    optIndirectCallCount = 0;
+    optNativeCallCount   = 0;
+    optAssertionCount    = 0;
+    optAssertionDep      = nullptr;
+#if FEATURE_ANYCSE
+    optCSECandidateTotal = 0;
+    optCSEstart          = UINT_MAX;
+    optCSEcount          = 0;
+#endif // FEATURE_ANYCSE
+}
+
+DataFlow::DataFlow(Compiler* pCompiler) : m_pCompiler(pCompiler)
+{
+}
+
+/*****************************************************************************
+ *
+ */
+
+void Compiler::optSetBlockWeights()
+{
+    noway_assert(!opts.MinOpts() && !opts.compDbgCode);
+    assert(fgDomsComputed);
+
+#ifdef DEBUG
+    bool changed = false;
+#endif
+
+    bool firstBBdomsRets = true;
+
+    BasicBlock* block;
+
+    for (block = fgFirstBB; (block != nullptr); block = block->bbNext)
+    {
+        /* Blocks that can't be reached via the first block are rarely executed */
+        if (!fgReachable(fgFirstBB, block))
+        {
+            block->bbSetRunRarely();
+        }
+
+        if (block->bbWeight != BB_ZERO_WEIGHT)
+        {
+            // Calculate our bbWeight:
+            //
+            //  o BB_UNITY_WEIGHT if we dominate all BBJ_RETURN blocks
+            //  o otherwise BB_UNITY_WEIGHT / 2
+            //
+            bool domsRets = true; // Assume that we will dominate
+
+            for (BasicBlockList* retBlocks = fgReturnBlocks; retBlocks != nullptr; retBlocks = retBlocks->next)
+            {
+                if (!fgDominate(block, retBlocks->block))
+                {
+                    domsRets = false;
+                    break;
+                }
+            }
+
+            if (block == fgFirstBB)
+            {
+                firstBBdomsRets = domsRets;
+            }
+
+            // If we are not using profile weight then we lower the weight
+            // of blocks that do not dominate a return block
+            //
+            if (firstBBdomsRets && (fgIsUsingProfileWeights() == false) && (domsRets == false))
+            {
+#if DEBUG
+                changed = true;
+#endif
+                block->modifyBBWeight(block->bbWeight / 2);
+                noway_assert(block->bbWeight);
+            }
+        }
+    }
+
+#if DEBUG
+    if (changed && verbose)
+    {
+        printf("\nAfter optSetBlockWeights:\n");
+        fgDispBasicBlocks();
+        printf("\n");
+    }
+
+    /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
+    fgDebugCheckBBlist();
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Marks the blocks between 'begBlk' and 'endBlk' as part of a loop.
+ */
+
+void Compiler::optMarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk, bool excludeEndBlk)
+{
+    /* Calculate the 'loopWeight',
+       this is the amount to increase each block in the loop
+       Our heuristic is that loops are weighted eight times more
+       than straight line code.
+       Thus we increase each block by 7 times the weight of
+       the loop header block,
+       if the loops are all properly formed gives us:
+       (assuming that BB_LOOP_WEIGHT is 8)
+
+          1 -- non loop basic block
+          8 -- single loop nesting
+         64 -- double loop nesting
+        512 -- triple loop nesting
+
+    */
+
+    noway_assert(begBlk->bbNum <= endBlk->bbNum);
+    noway_assert(begBlk->isLoopHead());
+    noway_assert(fgReachable(begBlk, endBlk));
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nMarking loop L%02u", begBlk->bbLoopNum);
+    }
+#endif
+
+    noway_assert(!opts.MinOpts());
+
+    /* Build list of backedges for block begBlk */
+    flowList* backedgeList = nullptr;
+
+    for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
+    {
+        /* Is this a backedge? */
+        if (pred->flBlock->bbNum >= begBlk->bbNum)
+        {
+            flowList* flow = new (this, CMK_FlowList) flowList();
+
+#if MEASURE_BLOCK_SIZE
+            genFlowNodeCnt += 1;
+            genFlowNodeSize += sizeof(flowList);
+#endif // MEASURE_BLOCK_SIZE
+
+            flow->flNext  = backedgeList;
+            flow->flBlock = pred->flBlock;
+            backedgeList  = flow;
+        }
+    }
+
+    /* At least one backedge must have been found (the one from endBlk) */
+    noway_assert(backedgeList);
+
+    BasicBlock* curBlk = begBlk;
+
+    while (true)
+    {
+        noway_assert(curBlk);
+
+        // For curBlk to be part of a loop that starts at begBlk
+        // curBlk must be reachable from begBlk and (since this is a loop)
+        // likewise begBlk must be reachable from curBlk.
+        //
+
+        if (fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
+        {
+            /* If this block reaches any of the backedge blocks we set reachable   */
+            /* If this block dominates any of the backedge blocks we set dominates */
+            bool reachable = false;
+            bool dominates = false;
+
+            for (flowList* tmp = backedgeList; tmp != nullptr; tmp = tmp->flNext)
+            {
+                BasicBlock* backedge = tmp->flBlock;
+
+                if (!curBlk->isRunRarely())
+                {
+                    reachable |= fgReachable(curBlk, backedge);
+                    dominates |= fgDominate(curBlk, backedge);
+
+                    if (dominates && reachable)
+                    {
+                        break;
+                    }
+                }
+            }
+
+            if (reachable)
+            {
+                noway_assert(curBlk->bbWeight > BB_ZERO_WEIGHT);
+
+                unsigned weight;
+
+                if ((curBlk->bbFlags & BBF_PROF_WEIGHT) != 0)
+                {
+                    // We have real profile weights, so we aren't going to change this blocks weight
+                    weight = curBlk->bbWeight;
+                }
+                else
+                {
+                    if (dominates)
+                    {
+                        weight = curBlk->bbWeight * BB_LOOP_WEIGHT;
+                    }
+                    else
+                    {
+                        weight = curBlk->bbWeight * (BB_LOOP_WEIGHT / 2);
+                    }
+
+                    //
+                    // The multiplication may have caused us to overflow
+                    //
+                    if (weight < curBlk->bbWeight)
+                    {
+                        // The multiplication caused us to overflow
+                        weight = BB_MAX_WEIGHT;
+                    }
+                    //
+                    //  Set the new weight
+                    //
+                    curBlk->modifyBBWeight(weight);
+                }
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("\n    BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
+                }
+#endif
+            }
+        }
+
+        /* Stop if we've reached the last block in the loop */
+
+        if (curBlk == endBlk)
+        {
+            break;
+        }
+
+        curBlk = curBlk->bbNext;
+
+        /* If we are excluding the endBlk then stop if we've reached endBlk */
+
+        if (excludeEndBlk && (curBlk == endBlk))
+        {
+            break;
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *   Unmark the blocks between 'begBlk' and 'endBlk' as part of a loop.
+ */
+
+void Compiler::optUnmarkLoopBlocks(BasicBlock* begBlk, BasicBlock* endBlk)
+{
+    /* A set of blocks that were previously marked as a loop are now
+       to be unmarked, since we have decided that for some reason this
+       loop no longer exists.
+       Basically we are just reseting the blocks bbWeight to their
+       previous values.
+    */
+
+    noway_assert(begBlk->bbNum <= endBlk->bbNum);
+    noway_assert(begBlk->isLoopHead());
+
+    noway_assert(!opts.MinOpts());
+
+    BasicBlock* curBlk;
+    unsigned    backEdgeCount = 0;
+
+    for (flowList* pred = begBlk->bbPreds; pred != nullptr; pred = pred->flNext)
+    {
+        curBlk = pred->flBlock;
+
+        /* is this a backward edge? (from curBlk to begBlk) */
+
+        if (begBlk->bbNum > curBlk->bbNum)
+        {
+            continue;
+        }
+
+        /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
+
+        if ((curBlk->bbJumpKind != BBJ_COND) && (curBlk->bbJumpKind != BBJ_ALWAYS))
+        {
+            continue;
+        }
+
+        backEdgeCount++;
+    }
+
+    /* Only unmark the loop blocks if we have exactly one loop back edge */
+    if (backEdgeCount != 1)
+    {
+#ifdef DEBUG
+        if (verbose)
+        {
+            if (backEdgeCount > 0)
+            {
+                printf("\nNot removing loop L%02u, due to an additional back edge", begBlk->bbLoopNum);
+            }
+            else if (backEdgeCount == 0)
+            {
+                printf("\nNot removing loop L%02u, due to no back edge", begBlk->bbLoopNum);
+            }
+        }
+#endif
+        return;
+    }
+    noway_assert(backEdgeCount == 1);
+    noway_assert(fgReachable(begBlk, endBlk));
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nUnmarking loop L%02u", begBlk->bbLoopNum);
+    }
+#endif
+
+    curBlk = begBlk;
+    while (true)
+    {
+        noway_assert(curBlk);
+
+        // For curBlk to be part of a loop that starts at begBlk
+        // curBlk must be reachable from begBlk and (since this is a loop)
+        // likewise begBlk must be reachable from curBlk.
+        //
+        if (!curBlk->isRunRarely() && fgReachable(curBlk, begBlk) && fgReachable(begBlk, curBlk))
+        {
+            unsigned weight = curBlk->bbWeight;
+
+            // Don't unmark blocks that are set to BB_MAX_WEIGHT
+            // Don't unmark blocks when we are using profile weights
+            //
+            if (!curBlk->isMaxBBWeight() && ((curBlk->bbFlags & BBF_PROF_WEIGHT) == 0))
+            {
+                if (!fgDominate(curBlk, endBlk))
+                {
+                    weight *= 2;
+                }
+                else
+                {
+                    /* Merging of blocks can disturb the Dominates
+                       information (see RAID #46649) */
+                    if (weight < BB_LOOP_WEIGHT)
+                    {
+                        weight *= 2;
+                    }
+                }
+
+                // We can overflow here so check for it
+                if (weight < curBlk->bbWeight)
+                {
+                    weight = BB_MAX_WEIGHT;
+                }
+
+                assert(weight >= BB_LOOP_WEIGHT);
+
+                curBlk->modifyBBWeight(weight / BB_LOOP_WEIGHT);
+            }
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("\n    BB%02u(wt=%s)", curBlk->bbNum, refCntWtd2str(curBlk->getBBWeight(this)));
+            }
+#endif
+        }
+        /* Stop if we've reached the last block in the loop */
+
+        if (curBlk == endBlk)
+        {
+            break;
+        }
+
+        curBlk = curBlk->bbNext;
+
+        /* Stop if we go past the last block in the loop, as it may have been deleted */
+        if (curBlk->bbNum > endBlk->bbNum)
+        {
+            break;
+        }
+    }
+}
+
+/*****************************************************************************************************
+ *
+ *  Function called to update the loop table and bbWeight before removing a block
+ */
+
+void Compiler::optUpdateLoopsBeforeRemoveBlock(BasicBlock* block, bool skipUnmarkLoop)
+{
+    if (!optLoopsMarked)
+    {
+        return;
+    }
+
+    noway_assert(!opts.MinOpts());
+
+    bool removeLoop = false;
+
+    /* If an unreachable block was part of a loop entry or bottom then the loop is unreachable */
+    /* Special case: the block was the head of a loop - or pointing to a loop entry */
+
+    for (unsigned loopNum = 0; loopNum < optLoopCount; loopNum++)
+    {
+        /* Some loops may have been already removed by
+         * loop unrolling or conditional folding */
+
+        if (optLoopTable[loopNum].lpFlags & LPFLG_REMOVED)
+        {
+            continue;
+        }
+
+        if (block == optLoopTable[loopNum].lpEntry || block == optLoopTable[loopNum].lpBottom)
+        {
+            optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
+            continue;
+        }
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\nUpdateLoopsBeforeRemoveBlock Before: ");
+            optPrintLoopInfo(loopNum);
+        }
+#endif
+
+        /* If the loop is still in the table
+         * any block in the loop must be reachable !!! */
+
+        noway_assert(optLoopTable[loopNum].lpEntry != block);
+        noway_assert(optLoopTable[loopNum].lpBottom != block);
+
+        if (optLoopTable[loopNum].lpExit == block)
+        {
+            optLoopTable[loopNum].lpExit = nullptr;
+            optLoopTable[loopNum].lpFlags &= ~LPFLG_ONE_EXIT;
+            ;
+        }
+
+        /* If this points to the actual entry in the loop
+         * then the whole loop may become unreachable */
+
+        switch (block->bbJumpKind)
+        {
+            unsigned     jumpCnt;
+            BasicBlock** jumpTab;
+
+            case BBJ_NONE:
+            case BBJ_COND:
+                if (block->bbNext == optLoopTable[loopNum].lpEntry)
+                {
+                    removeLoop = true;
+                    break;
+                }
+                if (block->bbJumpKind == BBJ_NONE)
+                {
+                    break;
+                }
+
+                __fallthrough;
+
+            case BBJ_ALWAYS:
+                noway_assert(block->bbJumpDest);
+                if (block->bbJumpDest == optLoopTable[loopNum].lpEntry)
+                {
+                    removeLoop = true;
+                }
+                break;
+
+            case BBJ_SWITCH:
+                jumpCnt = block->bbJumpSwt->bbsCount;
+                jumpTab = block->bbJumpSwt->bbsDstTab;
+
+                do
+                {
+                    noway_assert(*jumpTab);
+                    if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
+                    {
+                        removeLoop = true;
+                    }
+                } while (++jumpTab, --jumpCnt);
+                break;
+
+            default:
+                break;
+        }
+
+        if (removeLoop)
+        {
+            /* Check if the entry has other predecessors outside the loop
+             * TODO: Replace this when predecessors are available */
+
+            BasicBlock* auxBlock;
+            for (auxBlock = fgFirstBB; auxBlock; auxBlock = auxBlock->bbNext)
+            {
+                /* Ignore blocks in the loop */
+
+                if (auxBlock->bbNum > optLoopTable[loopNum].lpHead->bbNum &&
+                    auxBlock->bbNum <= optLoopTable[loopNum].lpBottom->bbNum)
+                {
+                    continue;
+                }
+
+                switch (auxBlock->bbJumpKind)
+                {
+                    unsigned     jumpCnt;
+                    BasicBlock** jumpTab;
+
+                    case BBJ_NONE:
+                    case BBJ_COND:
+                        if (auxBlock->bbNext == optLoopTable[loopNum].lpEntry)
+                        {
+                            removeLoop = false;
+                            break;
+                        }
+                        if (auxBlock->bbJumpKind == BBJ_NONE)
+                        {
+                            break;
+                        }
+
+                        __fallthrough;
+
+                    case BBJ_ALWAYS:
+                        noway_assert(auxBlock->bbJumpDest);
+                        if (auxBlock->bbJumpDest == optLoopTable[loopNum].lpEntry)
+                        {
+                            removeLoop = false;
+                        }
+                        break;
+
+                    case BBJ_SWITCH:
+                        jumpCnt = auxBlock->bbJumpSwt->bbsCount;
+                        jumpTab = auxBlock->bbJumpSwt->bbsDstTab;
+
+                        do
+                        {
+                            noway_assert(*jumpTab);
+                            if ((*jumpTab) == optLoopTable[loopNum].lpEntry)
+                            {
+                                removeLoop = false;
+                            }
+                        } while (++jumpTab, --jumpCnt);
+                        break;
+
+                    default:
+                        break;
+                }
+            }
+
+            if (removeLoop)
+            {
+                optLoopTable[loopNum].lpFlags |= LPFLG_REMOVED;
+            }
+        }
+        else if (optLoopTable[loopNum].lpHead == block)
+        {
+            /* The loop has a new head - Just update the loop table */
+            optLoopTable[loopNum].lpHead = block->bbPrev;
+        }
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\nUpdateLoopsBeforeRemoveBlock After: ");
+            optPrintLoopInfo(loopNum);
+        }
+#endif
+    }
+
+    if ((skipUnmarkLoop == false) && ((block->bbJumpKind == BBJ_ALWAYS) || (block->bbJumpKind == BBJ_COND)) &&
+        (block->bbJumpDest->isLoopHead()) && (block->bbJumpDest->bbNum <= block->bbNum) && fgDomsComputed &&
+        (fgCurBBEpochSize == fgDomBBcount + 1) && fgReachable(block->bbJumpDest, block))
+    {
+        optUnmarkLoopBlocks(block->bbJumpDest, block);
+    }
+}
+
+#ifdef DEBUG
+
+/*****************************************************************************
+ *
+ *  Given the beginBlock of the loop, return the index of this loop
+ *  to the loop table.
+ */
+
+unsigned Compiler::optFindLoopNumberFromBeginBlock(BasicBlock* begBlk)
+{
+    unsigned lnum = 0;
+
+    for (lnum = 0; lnum < optLoopCount; lnum++)
+    {
+        if (optLoopTable[lnum].lpHead->bbNext == begBlk)
+        {
+            // Found the loop.
+            return lnum;
+        }
+    }
+
+    noway_assert(!"Loop number not found.");
+
+    return optLoopCount;
+}
+
+/*****************************************************************************
+ *
+ *  Print loop info in an uniform way.
+ */
+
+void Compiler::optPrintLoopInfo(unsigned      loopInd,
+                                BasicBlock*   lpHead,
+                                BasicBlock*   lpFirst,
+                                BasicBlock*   lpTop,
+                                BasicBlock*   lpEntry,
+                                BasicBlock*   lpBottom,
+                                unsigned char lpExitCnt,
+                                BasicBlock*   lpExit,
+                                unsigned      parentLoop)
+{
+    noway_assert(lpHead);
+
+    //
+    // NOTE: we take "loopInd" as an argument instead of using the one
+    //       stored in begBlk->bbLoopNum because sometimes begBlk->bbLoopNum
+    //       has not be set correctly. For example, in optRecordLoop().
+    //       However, in most of the cases, loops should have been recorded.
+    //       Therefore the correct way is to call the Compiler::optPrintLoopInfo(unsigned lnum)
+    //       version of this method.
+    //
+    printf("L%02u, from BB%02u", loopInd, lpFirst->bbNum);
+    if (lpTop != lpFirst)
+    {
+        printf(" (loop top is BB%02u)", lpTop->bbNum);
+    }
+
+    printf(" to BB%02u (Head=BB%02u, Entry=BB%02u, ExitCnt=%d", lpBottom->bbNum, lpHead->bbNum, lpEntry->bbNum,
+           lpExitCnt);
+
+    if (lpExitCnt == 1)
+    {
+        printf(" at BB%02u", lpExit->bbNum);
+    }
+
+    if (parentLoop != BasicBlock::NOT_IN_LOOP)
+    {
+        printf(", parent loop = L%02u", parentLoop);
+    }
+    printf(")");
+}
+
+/*****************************************************************************
+ *
+ *  Print loop information given the index of the loop in the loop table.
+ */
+
+void Compiler::optPrintLoopInfo(unsigned lnum)
+{
+    noway_assert(lnum < optLoopCount);
+
+    LoopDsc* ldsc = &optLoopTable[lnum]; // lnum is the INDEX to the loop table.
+
+    optPrintLoopInfo(lnum, ldsc->lpHead, ldsc->lpFirst, ldsc->lpTop, ldsc->lpEntry, ldsc->lpBottom, ldsc->lpExitCnt,
+                     ldsc->lpExit, ldsc->lpParent);
+}
+
+#endif
+
+//------------------------------------------------------------------------
+// optPopulateInitInfo: Populate loop init info in the loop table.
+//
+// Arguments:
+//     init     -  the tree that is supposed to initialize the loop iterator.
+//     iterVar  -  loop iteration variable.
+//
+// Return Value:
+//     "false" if the loop table could not be populated with the loop iterVar init info.
+//
+// Operation:
+//     The 'init' tree is checked if its lhs is a local and rhs is either
+//     a const or a local.
+//
+bool Compiler::optPopulateInitInfo(unsigned loopInd, GenTreePtr init, unsigned iterVar)
+{
+    // Operator should be =
+    if (init->gtOper != GT_ASG)
+    {
+        return false;
+    }
+
+    GenTreePtr lhs = init->gtOp.gtOp1;
+    GenTreePtr rhs = init->gtOp.gtOp2;
+    // LHS has to be local and should equal iterVar.
+    if (lhs->gtOper != GT_LCL_VAR || lhs->gtLclVarCommon.gtLclNum != iterVar)
+    {
+        return false;
+    }
+
+    // RHS can be constant or local var.
+    // TODO-CQ: CLONE: Add arr length for descending loops.
+    if (rhs->gtOper == GT_CNS_INT && rhs->TypeGet() == TYP_INT)
+    {
+        optLoopTable[loopInd].lpFlags |= LPFLG_CONST_INIT;
+        optLoopTable[loopInd].lpConstInit = (int)rhs->gtIntCon.gtIconVal;
+    }
+    else if (rhs->gtOper == GT_LCL_VAR)
+    {
+        optLoopTable[loopInd].lpFlags |= LPFLG_VAR_INIT;
+        optLoopTable[loopInd].lpVarInit = rhs->gtLclVarCommon.gtLclNum;
+    }
+    else
+    {
+        return false;
+    }
+    return true;
+}
+
+//----------------------------------------------------------------------------------
+// optCheckIterInLoopTest: Check if iter var is used in loop test.
+//
+// Arguments:
+//      test          "jtrue" tree or an asg of the loop iter termination condition
+//      from/to       blocks (beg, end) which are part of the loop.
+//      iterVar       loop iteration variable.
+//      loopInd       loop index.
+//
+//  Operation:
+//      The test tree is parsed to check if "iterVar" matches the lhs of the condition
+//      and the rhs limit is extracted from the "test" tree. The limit information is
+//      added to the loop table.
+//
+//  Return Value:
+//      "false" if the loop table could not be populated with the loop test info or
+//      if the test condition doesn't involve iterVar.
+//
+bool Compiler::optCheckIterInLoopTest(
+    unsigned loopInd, GenTreePtr test, BasicBlock* from, BasicBlock* to, unsigned iterVar)
+{
+    // Obtain the relop from the "test" tree.
+    GenTreePtr relop;
+    if (test->gtOper == GT_JTRUE)
+    {
+        relop = test->gtGetOp1();
+    }
+    else
+    {
+        assert(test->gtOper == GT_ASG);
+        relop = test->gtGetOp2();
+    }
+
+    noway_assert(relop->OperKind() & GTK_RELOP);
+
+    GenTreePtr opr1 = relop->gtOp.gtOp1;
+    GenTreePtr opr2 = relop->gtOp.gtOp2;
+
+    GenTreePtr iterOp;
+    GenTreePtr limitOp;
+
+    // Make sure op1 or op2 is the iterVar.
+    if (opr1->gtOper == GT_LCL_VAR && opr1->gtLclVarCommon.gtLclNum == iterVar)
+    {
+        iterOp  = opr1;
+        limitOp = opr2;
+    }
+    else if (opr2->gtOper == GT_LCL_VAR && opr2->gtLclVarCommon.gtLclNum == iterVar)
+    {
+        iterOp  = opr2;
+        limitOp = opr1;
+    }
+    else
+    {
+        return false;
+    }
+
+    if (iterOp->gtType != TYP_INT)
+    {
+        return false;
+    }
+
+    // Mark the iterator node.
+    iterOp->gtFlags |= GTF_VAR_ITERATOR;
+
+    // Check what type of limit we have - constant, variable or arr-len.
+    if (limitOp->gtOper == GT_CNS_INT)
+    {
+        optLoopTable[loopInd].lpFlags |= LPFLG_CONST_LIMIT;
+    }
+    else if (limitOp->gtOper == GT_LCL_VAR && !optIsVarAssigned(from, to, nullptr, limitOp->gtLclVarCommon.gtLclNum))
+    {
+        optLoopTable[loopInd].lpFlags |= LPFLG_VAR_LIMIT;
+    }
+    else if (limitOp->gtOper == GT_ARR_LENGTH)
+    {
+        optLoopTable[loopInd].lpFlags |= LPFLG_ARRLEN_LIMIT;
+    }
+    else
+    {
+        return false;
+    }
+    // Save the type of the comparison between the iterator and the limit.
+    optLoopTable[loopInd].lpTestTree = relop;
+    return true;
+}
+
+//----------------------------------------------------------------------------------
+// optIsLoopIncrTree: Check if loop is a tree of form v += 1 or v = v + 1
+//
+// Arguments:
+//      incr        The incr tree to be checked. Whether incr tree is
+//                  oper-equal(+=, -=...) type nodes or v=v+1 type ASG nodes.
+//
+//  Operation:
+//      The test tree is parsed to check if "iterVar" matches the lhs of the condition
+//      and the rhs limit is extracted from the "test" tree. The limit information is
+//      added to the loop table.
+//
+//  Return Value:
+//      iterVar local num if the iterVar is found, otherwise BAD_VAR_NUM.
+//
+unsigned Compiler::optIsLoopIncrTree(GenTreePtr incr)
+{
+    GenTree*   incrVal;
+    genTreeOps updateOper;
+    unsigned   iterVar = incr->IsLclVarUpdateTree(&incrVal, &updateOper);
+    if (iterVar != BAD_VAR_NUM)
+    {
+        // We have v = v op y type asg node.
+        switch (updateOper)
+        {
+            case GT_ADD:
+            case GT_SUB:
+            case GT_MUL:
+            case GT_RSH:
+            case GT_LSH:
+                break;
+            default:
+                return BAD_VAR_NUM;
+        }
+
+        // Increment should be by a const int.
+        // TODO-CQ: CLONE: allow variable increments.
+        if ((incrVal->gtOper != GT_CNS_INT) || (incrVal->TypeGet() != TYP_INT))
+        {
+            return BAD_VAR_NUM;
+        }
+    }
+
+    return iterVar;
+}
+
+//----------------------------------------------------------------------------------
+// optComputeIterInfo: Check tree is loop increment of a lcl that is loop-invariant.
+//
+// Arguments:
+//      from, to    - are blocks (beg, end) which are part of the loop.
+//      incr        - tree that increments the loop iterator. v+=1 or v=v+1.
+//      pIterVar    - see return value.
+//
+//  Return Value:
+//      Returns true if iterVar "v" can be returned in "pIterVar", otherwise returns
+//      false.
+//
+//  Operation:
+//      Check if the "incr" tree is a "v=v+1 or v+=1" type tree and make sure it is not
+//      assigned in the loop.
+//
+bool Compiler::optComputeIterInfo(GenTreePtr incr, BasicBlock* from, BasicBlock* to, unsigned* pIterVar)
+{
+
+    unsigned iterVar = optIsLoopIncrTree(incr);
+    if (iterVar == BAD_VAR_NUM)
+    {
+        return false;
+    }
+    if (optIsVarAssigned(from, to, incr, iterVar))
+    {
+        JITDUMP("iterVar is assigned in loop\n");
+        return false;
+    }
+
+    *pIterVar = iterVar;
+    return true;
+}
+
+//----------------------------------------------------------------------------------
+// optIsLoopTestEvalIntoTemp:
+//      Pattern match if the test tree is computed into a tmp
+//      and the "tmp" is used as jump condition for loop termination.
+//
+// Arguments:
+//      testStmt    - is the JTRUE statement that is of the form: jmpTrue (Vtmp != 0)
+//                    where Vtmp contains the actual loop test result.
+//      newStmt     - contains the statement that is the actual test stmt involving
+//                    the loop iterator.
+//
+//  Return Value:
+//      Returns true if a new test tree can be obtained.
+//
+//  Operation:
+//      Scan if the current stmt is a jtrue with (Vtmp != 0) as condition
+//      Then returns the rhs for def of Vtmp as the "test" node.
+//
+//  Note:
+//      This method just retrieves what it thinks is the "test" node,
+//      the callers are expected to verify that "iterVar" is used in the test.
+//
+bool Compiler::optIsLoopTestEvalIntoTemp(GenTreePtr testStmt, GenTreePtr* newTest)
+{
+    GenTreePtr test = testStmt->gtStmt.gtStmtExpr;
+
+    if (test->gtOper != GT_JTRUE)
+    {
+        return false;
+    }
+
+    GenTreePtr relop = test->gtGetOp1();
+    noway_assert(relop->OperIsCompare());
+
+    GenTreePtr opr1 = relop->gtOp.gtOp1;
+    GenTreePtr opr2 = relop->gtOp.gtOp2;
+
+    // Make sure we have jtrue (vtmp != 0)
+    if ((relop->OperGet() == GT_NE) && (opr1->OperGet() == GT_LCL_VAR) && (opr2->OperGet() == GT_CNS_INT) &&
+        opr2->IsIntegralConst(0))
+    {
+        // Get the previous statement to get the def (rhs) of Vtmp to see
+        // if the "test" is evaluated into Vtmp.
+        GenTreePtr prevStmt = testStmt->gtPrev;
+        if (prevStmt == nullptr)
+        {
+            return false;
+        }
+
+        GenTreePtr tree = prevStmt->gtStmt.gtStmtExpr;
+        if (tree->OperGet() == GT_ASG)
+        {
+            GenTreePtr lhs = tree->gtOp.gtOp1;
+            GenTreePtr rhs = tree->gtOp.gtOp2;
+
+            // Return as the new test node.
+            if (lhs->gtOper == GT_LCL_VAR && lhs->AsLclVarCommon()->GetLclNum() == opr1->AsLclVarCommon()->GetLclNum())
+            {
+                if (rhs->OperIsCompare())
+                {
+                    *newTest = prevStmt;
+                    return true;
+                }
+            }
+        }
+    }
+    return false;
+}
+
+//----------------------------------------------------------------------------------
+// optExtractInitTestIncr:
+//      Extract the "init", "test" and "incr" nodes of the loop.
+//
+// Arguments:
+//      head    - Loop head block
+//      bottom  - Loop bottom block
+//      top     - Loop top block
+//      ppInit  - The init stmt of the loop if found.
+//      ppTest  - The test stmt of the loop if found.
+//      ppIncr  - The incr stmt of the loop if found.
+//
+//  Return Value:
+//      The results are put in "ppInit", "ppTest" and "ppIncr" if the method
+//      returns true. Returns false if the information can't be extracted.
+//
+//  Operation:
+//      Check if the "test" stmt is last stmt in the loop "bottom". If found good,
+//      "test" stmt is found. Try to find the "incr" stmt. Check previous stmt of
+//      "test" to get the "incr" stmt. If it is not found it could be a loop of the
+//      below form.
+//
+//                     +-------<-----------------<-----------+
+//                     |                                     |
+//                     v                                     |
+//      BBinit(head) -> BBcond(top) -> BBLoopBody(bottom) ---^
+//
+//      Check if the "incr" tree is present in the loop "top" node as the last stmt.
+//      Also check if the "test" tree is assigned to a tmp node and the tmp is used
+//      in the jtrue condition.
+//
+//  Note:
+//      This method just retrieves what it thinks is the "test" node,
+//      the callers are expected to verify that "iterVar" is used in the test.
+//
+bool Compiler::optExtractInitTestIncr(
+    BasicBlock* head, BasicBlock* bottom, BasicBlock* top, GenTreePtr* ppInit, GenTreePtr* ppTest, GenTreePtr* ppIncr)
+{
+    assert(ppInit != nullptr);
+    assert(ppTest != nullptr);
+    assert(ppIncr != nullptr);
+
+    // Check if last two statements in the loop body are the increment of the iterator
+    // and the loop termination test.
+    noway_assert(bottom->bbTreeList != nullptr);
+    GenTreePtr test = bottom->bbTreeList->gtPrev;
+    noway_assert(test != nullptr && test->gtNext == nullptr);
+
+    GenTreePtr newTest;
+    if (optIsLoopTestEvalIntoTemp(test, &newTest))
+    {
+        test = newTest;
+    }
+
+    // Check if we have the incr tree before the test tree, if we don't,
+    // check if incr is part of the loop "top".
+    GenTreePtr incr = test->gtPrev;
+    if (incr == nullptr || optIsLoopIncrTree(incr->gtStmt.gtStmtExpr) == BAD_VAR_NUM)
+    {
+        if (top == nullptr || top->bbTreeList == nullptr || top->bbTreeList->gtPrev == nullptr)
+        {
+            return false;
+        }
+
+        // If the prev stmt to loop test is not incr, then check if we have loop test evaluated into a tmp.
+        GenTreePtr topLast = top->bbTreeList->gtPrev;
+        if (optIsLoopIncrTree(topLast->gtStmt.gtStmtExpr) != BAD_VAR_NUM)
+        {
+            incr = topLast;
+        }
+        else
+        {
+            return false;
+        }
+    }
+
+    assert(test != incr);
+
+    // Find the last statement in the loop pre-header which we expect to be the initialization of
+    // the loop iterator.
+    GenTreePtr phdr = head->bbTreeList;
+    if (phdr == nullptr)
+    {
+        return false;
+    }
+
+    GenTreePtr init = phdr->gtPrev;
+    noway_assert(init != nullptr && (init->gtNext == nullptr));
+
+    // If it is a duplicated loop condition, skip it.
+    if (init->gtFlags & GTF_STMT_CMPADD)
+    {
+        // Must be a duplicated loop condition.
+        noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
+        init = init->gtPrev;
+        noway_assert(init != nullptr);
+    }
+
+    noway_assert(init->gtOper == GT_STMT);
+    noway_assert(test->gtOper == GT_STMT);
+    noway_assert(incr->gtOper == GT_STMT);
+
+    *ppInit = init->gtStmt.gtStmtExpr;
+    *ppTest = test->gtStmt.gtStmtExpr;
+    *ppIncr = incr->gtStmt.gtStmtExpr;
+
+    return true;
+}
+
+/*****************************************************************************
+ *
+ *  Record the loop in the loop table.
+ */
+
+void Compiler::optRecordLoop(BasicBlock*   head,
+                             BasicBlock*   first,
+                             BasicBlock*   top,
+                             BasicBlock*   entry,
+                             BasicBlock*   bottom,
+                             BasicBlock*   exit,
+                             unsigned char exitCnt)
+{
+    // Record this loop in the table, if there's room.
+
+    assert(optLoopCount <= MAX_LOOP_NUM);
+    if (optLoopCount == MAX_LOOP_NUM)
+    {
+#if COUNT_LOOPS
+        loopOverflowThisMethod = true;
+#endif
+        return;
+    }
+
+    // Assumed preconditions on the loop we're adding.
+    assert(first->bbNum <= top->bbNum);
+    assert(top->bbNum <= entry->bbNum);
+    assert(entry->bbNum <= bottom->bbNum);
+    assert(head->bbNum < top->bbNum || head->bbNum > bottom->bbNum);
+
+    // If the new loop contains any existing ones, add it in the right place.
+    unsigned char loopInd = optLoopCount;
+    for (unsigned char prevPlus1 = optLoopCount; prevPlus1 > 0; prevPlus1--)
+    {
+        unsigned char prev = prevPlus1 - 1;
+        if (optLoopTable[prev].lpContainedBy(first, bottom))
+        {
+            loopInd = prev;
+        }
+    }
+    // Move up any loops if necessary.
+    for (unsigned j = optLoopCount; j > loopInd; j--)
+    {
+        optLoopTable[j] = optLoopTable[j - 1];
+    }
+
+#ifdef DEBUG
+    for (unsigned i = loopInd + 1; i < optLoopCount; i++)
+    {
+        // The loop is well-formed.
+        assert(optLoopTable[i].lpWellFormed());
+        // Check for disjoint.
+        if (optLoopTable[i].lpDisjoint(first, bottom))
+        {
+            continue;
+        }
+        // Otherwise, assert complete containment (of optLoopTable[i] in new loop).
+        assert(optLoopTable[i].lpContainedBy(first, bottom));
+    }
+#endif // DEBUG
+
+    optLoopTable[loopInd].lpHead    = head;
+    optLoopTable[loopInd].lpFirst   = first;
+    optLoopTable[loopInd].lpTop     = top;
+    optLoopTable[loopInd].lpBottom  = bottom;
+    optLoopTable[loopInd].lpEntry   = entry;
+    optLoopTable[loopInd].lpExit    = exit;
+    optLoopTable[loopInd].lpExitCnt = exitCnt;
+
+    optLoopTable[loopInd].lpParent  = BasicBlock::NOT_IN_LOOP;
+    optLoopTable[loopInd].lpChild   = BasicBlock::NOT_IN_LOOP;
+    optLoopTable[loopInd].lpSibling = BasicBlock::NOT_IN_LOOP;
+
+    optLoopTable[loopInd].lpFlags = 0;
+
+    // We haven't yet recorded any side effects.
+    optLoopTable[loopInd].lpLoopHasHeapHavoc       = false;
+    optLoopTable[loopInd].lpFieldsModified         = nullptr;
+    optLoopTable[loopInd].lpArrayElemTypesModified = nullptr;
+
+    // If DO-WHILE loop mark it as such.
+    if (head->bbNext == entry)
+    {
+        optLoopTable[loopInd].lpFlags |= LPFLG_DO_WHILE;
+    }
+
+    // If single exit loop mark it as such.
+    if (exitCnt == 1)
+    {
+        noway_assert(exit);
+        optLoopTable[loopInd].lpFlags |= LPFLG_ONE_EXIT;
+    }
+
+    //
+    // Try to find loops that have an iterator (i.e. for-like loops) "for (init; test; incr){ ... }"
+    // We have the following restrictions:
+    //     1. The loop condition must be a simple one i.e. only one JTRUE node
+    //     2. There must be a loop iterator (a local var) that is
+    //        incremented (decremented or lsh, rsh, mul) with a constant value
+    //     3. The iterator is incremented exactly once
+    //     4. The loop condition must use the iterator.
+    //
+    if (bottom->bbJumpKind == BBJ_COND)
+    {
+        GenTreePtr init;
+        GenTreePtr test;
+        GenTreePtr incr;
+        if (!optExtractInitTestIncr(head, bottom, top, &init, &test, &incr))
+        {
+            goto DONE_LOOP;
+        }
+
+        unsigned iterVar = BAD_VAR_NUM;
+        if (!optComputeIterInfo(incr, head->bbNext, bottom, &iterVar))
+        {
+            goto DONE_LOOP;
+        }
+
+        // Make sure the "iterVar" initialization is never skipped,
+        // i.e. HEAD dominates the ENTRY.
+        if (!fgDominate(head, entry))
+        {
+            goto DONE_LOOP;
+        }
+
+        if (!optPopulateInitInfo(loopInd, init, iterVar))
+        {
+            goto DONE_LOOP;
+        }
+
+        // Check that the iterator is used in the loop condition.
+        if (!optCheckIterInLoopTest(loopInd, test, head->bbNext, bottom, iterVar))
+        {
+            goto DONE_LOOP;
+        }
+
+        // We know the loop has an iterator at this point ->flag it as LPFLG_ITER
+        // Record the iterator, the pointer to the test node
+        // and the initial value of the iterator (constant or local var)
+        optLoopTable[loopInd].lpFlags |= LPFLG_ITER;
+
+        // Record iterator.
+        optLoopTable[loopInd].lpIterTree = incr;
+
+#if COUNT_LOOPS
+        // Save the initial value of the iterator - can be lclVar or constant
+        // Flag the loop accordingly.
+
+        iterLoopCount++;
+#endif
+
+#if COUNT_LOOPS
+        simpleTestLoopCount++;
+#endif
+
+        // Check if a constant iteration loop.
+        if ((optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT) && (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT))
+        {
+            // This is a constant loop.
+            optLoopTable[loopInd].lpFlags |= LPFLG_CONST;
+#if COUNT_LOOPS
+            constIterLoopCount++;
+#endif
+        }
+
+#ifdef DEBUG
+        if (verbose && 0)
+        {
+            printf("\nConstant loop initializer:\n");
+            gtDispTree(init);
+
+            printf("\nConstant loop body:\n");
+
+            BasicBlock* block = head;
+            do
+            {
+                block = block->bbNext;
+                for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
+                {
+                    if (stmt->gtStmt.gtStmtExpr == incr)
+                    {
+                        break;
+                    }
+                    printf("\n");
+                    gtDispTree(stmt->gtStmt.gtStmtExpr);
+                }
+            } while (block != bottom);
+        }
+#endif // DEBUG
+    }
+
+DONE_LOOP:
+    DBEXEC(verbose, optPrintLoopRecording(loopInd));
+    optLoopCount++;
+}
+
+#ifdef DEBUG
+//------------------------------------------------------------------------
+// optPrintLoopRecording: Print a recording of the loop.
+//
+// Arguments:
+//      loopInd     - loop index.
+//
+void Compiler::optPrintLoopRecording(unsigned loopInd)
+{
+    printf("Recorded loop %s", (loopInd != optLoopCount ? "(extended) " : ""));
+    optPrintLoopInfo(optLoopCount, // Not necessarily the loop index, but the number of loops that have been added.
+                     optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpFirst, optLoopTable[loopInd].lpTop,
+                     optLoopTable[loopInd].lpEntry, optLoopTable[loopInd].lpBottom, optLoopTable[loopInd].lpExitCnt,
+                     optLoopTable[loopInd].lpExit);
+
+    // If an iterator loop print the iterator and the initialization.
+    if (optLoopTable[loopInd].lpFlags & LPFLG_ITER)
+    {
+        printf(" [over V%02u", optLoopTable[loopInd].lpIterVar());
+        printf(" (");
+        printf(GenTree::NodeName(optLoopTable[loopInd].lpIterOper()));
+        printf(" ");
+        printf("%d )", optLoopTable[loopInd].lpIterConst());
+
+        if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_INIT)
+        {
+            printf(" from %d", optLoopTable[loopInd].lpConstInit);
+        }
+        if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_INIT)
+        {
+            printf(" from V%02u", optLoopTable[loopInd].lpVarInit);
+        }
+
+        // If a simple test condition print operator and the limits */
+        printf(GenTree::NodeName(optLoopTable[loopInd].lpTestOper()));
+
+        if (optLoopTable[loopInd].lpFlags & LPFLG_CONST_LIMIT)
+        {
+            printf("%d ", optLoopTable[loopInd].lpConstLimit());
+        }
+
+        if (optLoopTable[loopInd].lpFlags & LPFLG_VAR_LIMIT)
+        {
+            printf("V%02u ", optLoopTable[loopInd].lpVarLimit());
+        }
+
+        printf("]");
+    }
+
+    printf("\n");
+}
+
+void Compiler::optCheckPreds()
+{
+    BasicBlock* block;
+    BasicBlock* blockPred;
+    flowList*   pred;
+
+    for (block = fgFirstBB; block; block = block->bbNext)
+    {
+        for (pred = block->bbPreds; pred; pred = pred->flNext)
+        {
+            // make sure this pred is part of the BB list
+            for (blockPred = fgFirstBB; blockPred; blockPred = blockPred->bbNext)
+            {
+                if (blockPred == pred->flBlock)
+                {
+                    break;
+                }
+            }
+            noway_assert(blockPred);
+            switch (blockPred->bbJumpKind)
+            {
+                case BBJ_COND:
+                    if (blockPred->bbJumpDest == block)
+                    {
+                        break;
+                    }
+                    __fallthrough;
+                case BBJ_NONE:
+                    noway_assert(blockPred->bbNext == block);
+                    break;
+                case BBJ_EHFILTERRET:
+                case BBJ_ALWAYS:
+                case BBJ_EHCATCHRET:
+                    noway_assert(blockPred->bbJumpDest == block);
+                    break;
+                default:
+                    break;
+            }
+        }
+    }
+}
+
+#endif // DEBUG
+
+/*****************************************************************************
+ * Find the natural loops, using dominators. Note that the test for
+ * a loop is slightly different from the standard one, because we have
+ * not done a depth first reordering of the basic blocks.
+ */
+
+void Compiler::optFindNaturalLoops()
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In optFindNaturalLoops()\n");
+    }
+#endif // DEBUG
+
+    flowList* pred;
+    flowList* predTop;
+    flowList* predEntry;
+
+    noway_assert(fgDomsComputed);
+    assert(fgHasLoops);
+
+#if COUNT_LOOPS
+    hasMethodLoops         = false;
+    loopsThisMethod        = 0;
+    loopOverflowThisMethod = false;
+#endif
+
+    /* We will use the following terminology:
+     * HEAD    - the basic block that flows into the loop ENTRY block (Currently MUST be lexically before entry).
+                 Not part of the looping of the loop.
+     * FIRST   - the lexically first basic block (in bbNext order) within this loop.  (May be part of a nested loop,
+     *           but not the outer loop. ???)
+     * TOP     - the target of the backward edge from BOTTOM. In most cases FIRST and TOP are the same.
+     * BOTTOM  - the lexically last block in the loop (i.e. the block from which we jump to the top)
+     * EXIT    - the loop exit or the block right after the bottom
+     * ENTRY   - the entry in the loop (not necessarly the TOP), but there must be only one entry
+     *
+     * We (currently) require the body of a loop to be a contiguous (in bbNext order) sequence of basic blocks.
+
+            |
+            v
+          head
+            |
+            |    top/beg <--+
+            |       |       |
+            |      ...      |
+            |       |       |
+            |       v       |
+            +---> entry     |
+                    |       |
+                   ...      |
+                    |       |
+                    v       |
+             +-- exit/tail  |
+             |      |       |
+             |     ...      |
+             |      |       |
+             |      v       |
+             |    bottom ---+
+             |
+             +------+
+                    |
+                    v
+
+     */
+
+    BasicBlock*   head;
+    BasicBlock*   top;
+    BasicBlock*   bottom;
+    BasicBlock*   entry;
+    BasicBlock*   exit;
+    unsigned char exitCount;
+
+    for (head = fgFirstBB; head->bbNext; head = head->bbNext)
+    {
+        top       = head->bbNext;
+        exit      = nullptr;
+        exitCount = 0;
+
+        //  Blocks that are rarely run have a zero bbWeight and should
+        //  never be optimized here
+
+        if (top->bbWeight == BB_ZERO_WEIGHT)
+        {
+            continue;
+        }
+
+        for (pred = top->bbPreds; pred; pred = pred->flNext)
+        {
+            /* Is this a loop candidate? - We look for "back edges", i.e. an edge from BOTTOM
+             * to TOP (note that this is an abuse of notation since this is not necessarily a back edge
+             * as the definition says, but merely an indication that we have a loop there).
+             * Thus, we have to be very careful and after entry discovery check that it is indeed
+             * the only place we enter the loop (especially for non-reducible flow graphs).
+             */
+
+            bottom    = pred->flBlock;
+            exitCount = 0;
+
+            if (top->bbNum <= bottom->bbNum) // is this a backward edge? (from BOTTOM to TOP)
+            {
+                if ((bottom->bbJumpKind == BBJ_EHFINALLYRET) || (bottom->bbJumpKind == BBJ_EHFILTERRET) ||
+                    (bottom->bbJumpKind == BBJ_EHCATCHRET) || (bottom->bbJumpKind == BBJ_CALLFINALLY) ||
+                    (bottom->bbJumpKind == BBJ_SWITCH))
+                {
+                    /* BBJ_EHFINALLYRET, BBJ_EHFILTERRET, BBJ_EHCATCHRET, and BBJ_CALLFINALLY can never form a loop.
+                     * BBJ_SWITCH that has a backward jump appears only for labeled break. */
+                    goto NO_LOOP;
+                }
+
+                BasicBlock* loopBlock;
+
+                /* The presence of a "back edge" is an indication that a loop might be present here
+                 *
+                 * LOOP:
+                 *        1. A collection of STRONGLY CONNECTED nodes i.e. there is a path from any
+                 *           node in the loop to any other node in the loop (wholly within the loop)
+                 *        2. The loop has a unique ENTRY, i.e. there is only one way to reach a node
+                 *           in the loop from outside the loop, and that is through the ENTRY
+                 */
+
+                /* Let's find the loop ENTRY */
+
+                if (head->bbJumpKind == BBJ_ALWAYS)
+                {
+                    if (head->bbJumpDest->bbNum <= bottom->bbNum && head->bbJumpDest->bbNum >= top->bbNum)
+                    {
+                        /* OK - we enter somewhere within the loop */
+                        entry = head->bbJumpDest;
+
+                        /* some useful asserts
+                         * Cannot enter at the top - should have being caught by redundant jumps */
+
+                        assert((entry != top) || (head->bbFlags & BBF_KEEP_BBJ_ALWAYS));
+                    }
+                    else
+                    {
+                        /* special case - don't consider now */
+                        // assert (!"Loop entered in weird way!");
+                        goto NO_LOOP;
+                    }
+                }
+                // Can we fall through into the loop?
+                else if (head->bbJumpKind == BBJ_NONE || head->bbJumpKind == BBJ_COND)
+                {
+                    /* The ENTRY is at the TOP (a do-while loop) */
+                    entry = top;
+                }
+                else
+                {
+                    goto NO_LOOP; // head does not flow into the loop bail for now
+                }
+
+                // Now we find the "first" block -- the earliest block reachable within the loop.
+                // This is usually the same as "top", but can differ in rare cases where "top" is
+                // the entry block of a nested loop, and that nested loop branches backwards to a
+                // a block before "top".  We find this by searching for such backwards branches
+                // in the loop known so far.
+                BasicBlock* first = top;
+                BasicBlock* newFirst;
+                bool        blocksToSearch = true;
+                BasicBlock* validatedAfter = bottom->bbNext;
+                while (blocksToSearch)
+                {
+                    blocksToSearch = false;
+                    newFirst       = nullptr;
+                    blocksToSearch = false;
+                    for (loopBlock = first; loopBlock != validatedAfter; loopBlock = loopBlock->bbNext)
+                    {
+                        unsigned nSucc = loopBlock->NumSucc();
+                        for (unsigned j = 0; j < nSucc; j++)
+                        {
+                            BasicBlock* succ = loopBlock->GetSucc(j);
+                            if ((newFirst == nullptr && succ->bbNum < first->bbNum) ||
+                                (newFirst != nullptr && succ->bbNum < newFirst->bbNum))
+                            {
+                                newFirst = succ;
+                            }
+                        }
+                    }
+                    if (newFirst != nullptr)
+                    {
+                        validatedAfter = first;
+                        first          = newFirst;
+                        blocksToSearch = true;
+                    }
+                }
+
+                // Is "head" still before "first"?  If not, we don't have a valid loop...
+                if (head->bbNum >= first->bbNum)
+                {
+                    JITDUMP(
+                        "Extending loop [BB%02u..BB%02u] 'first' to BB%02u captures head BB%02u.  Rejecting loop.\n",
+                        top->bbNum, bottom->bbNum, first->bbNum, head->bbNum);
+                    goto NO_LOOP;
+                }
+
+                /* Make sure ENTRY dominates all blocks in the loop
+                 * This is necessary to ensure condition 2. above
+                 * At the same time check if the loop has a single exit
+                 * point - those loops are easier to optimize */
+
+                for (loopBlock = top; loopBlock != bottom->bbNext; loopBlock = loopBlock->bbNext)
+                {
+                    if (!fgDominate(entry, loopBlock))
+                    {
+                        goto NO_LOOP;
+                    }
+
+                    if (loopBlock == bottom)
+                    {
+                        if (bottom->bbJumpKind != BBJ_ALWAYS)
+                        {
+                            /* there is an exit at the bottom */
+
+                            noway_assert(bottom->bbJumpDest == top);
+                            exit = bottom;
+                            exitCount++;
+                            continue;
+                        }
+                    }
+
+                    BasicBlock* exitPoint;
+
+                    switch (loopBlock->bbJumpKind)
+                    {
+                        case BBJ_COND:
+                        case BBJ_CALLFINALLY:
+                        case BBJ_ALWAYS:
+                        case BBJ_EHCATCHRET:
+                            assert(loopBlock->bbJumpDest);
+                            exitPoint = loopBlock->bbJumpDest;
+
+                            if (exitPoint->bbNum < top->bbNum || exitPoint->bbNum > bottom->bbNum)
+                            {
+                                /* exit from a block other than BOTTOM */
+                                exit = loopBlock;
+                                exitCount++;
+                            }
+                            break;
+
+                        case BBJ_NONE:
+                            break;
+
+                        case BBJ_EHFINALLYRET:
+                        case BBJ_EHFILTERRET:
+                            /* The "try" associated with this "finally" must be in the
+                             * same loop, so the finally block will return control inside the loop */
+                            break;
+
+                        case BBJ_THROW:
+                        case BBJ_RETURN:
+                            /* those are exits from the loop */
+                            exit = loopBlock;
+                            exitCount++;
+                            break;
+
+                        case BBJ_SWITCH:
+
+                            unsigned jumpCnt;
+                            jumpCnt = loopBlock->bbJumpSwt->bbsCount;
+                            BasicBlock** jumpTab;
+                            jumpTab = loopBlock->bbJumpSwt->bbsDstTab;
+
+                            do
+                            {
+                                noway_assert(*jumpTab);
+                                exitPoint = *jumpTab;
+
+                                if (exitPoint->bbNum < top->bbNum || exitPoint->bbNum > bottom->bbNum)
+                                {
+                                    exit = loopBlock;
+                                    exitCount++;
+                                }
+                            } while (++jumpTab, --jumpCnt);
+                            break;
+
+                        default:
+                            noway_assert(!"Unexpected bbJumpKind");
+                            break;
+                    }
+                }
+
+                /* Make sure we can iterate the loop (i.e. there is a way back to ENTRY)
+                 * This is to ensure condition 1. above which prevents marking fake loops
+                 *
+                 * Below is an example:
+                 *          for (....)
+                 *          {
+                 *            ...
+                 *              computations
+                 *            ...
+                 *            break;
+                 *          }
+                 * The example above is not a loop since we bail after the first iteration
+                 *
+                 * The condition we have to check for is
+                 *  1. ENTRY must have at least one predecessor inside the loop. Since we know that that block is
+                 *     reachable, it can only be reached through ENTRY, therefore we have a way back to ENTRY
+                 *
+                 *  2. If we have a GOTO (BBJ_ALWAYS) outside of the loop and that block dominates the
+                 *     loop bottom then we cannot iterate
+                 *
+                 * NOTE that this doesn't entirely satisfy condition 1. since "break" statements are not
+                 * part of the loop nodes (as per definition they are loop exits executed only once),
+                 * but we have no choice but to include them because we consider all blocks within TOP-BOTTOM */
+
+                for (loopBlock = top; loopBlock != bottom; loopBlock = loopBlock->bbNext)
+                {
+                    switch (loopBlock->bbJumpKind)
+                    {
+                        case BBJ_ALWAYS:
+                        case BBJ_THROW:
+                        case BBJ_RETURN:
+                            if (fgDominate(loopBlock, bottom))
+                            {
+                                goto NO_LOOP;
+                            }
+                        default:
+                            break;
+                    }
+                }
+
+                bool canIterateLoop = false;
+
+                for (predEntry = entry->bbPreds; predEntry; predEntry = predEntry->flNext)
+                {
+                    if (predEntry->flBlock->bbNum >= top->bbNum && predEntry->flBlock->bbNum <= bottom->bbNum)
+                    {
+                        canIterateLoop = true;
+                        break;
+                    }
+                    else if (predEntry->flBlock != head)
+                    {
+                        // The entry block has multiple predecessors outside the loop; the 'head'
+                        // block isn't the only one. We only support a single 'head', so bail.
+                        goto NO_LOOP;
+                    }
+                }
+
+                if (!canIterateLoop)
+                {
+                    goto NO_LOOP;
+                }
+
+                /* Double check - make sure that all loop blocks except ENTRY
+                 * have no predecessors outside the loop - this ensures only one loop entry and prevents
+                 * us from considering non-loops due to incorrectly assuming that we had a back edge
+                 *
+                 * OBSERVATION:
+                 *    Loops of the form "while (a || b)" will be treated as 2 nested loops (with the same header)
+                 */
+
+                for (loopBlock = top; loopBlock != bottom->bbNext; loopBlock = loopBlock->bbNext)
+                {
+                    if (loopBlock == entry)
+                    {
+                        continue;
+                    }
+
+                    for (predTop = loopBlock->bbPreds; predTop != nullptr; predTop = predTop->flNext)
+                    {
+                        if (predTop->flBlock->bbNum < top->bbNum || predTop->flBlock->bbNum > bottom->bbNum)
+                        {
+                            // noway_assert(!"Found loop with multiple entries");
+                            goto NO_LOOP;
+                        }
+                    }
+                }
+
+                // Disqualify loops where the first block of the loop is less nested in EH than
+                // the bottom block. That is, we don't want to handle loops where the back edge
+                // goes from within an EH region to a first block that is outside that same EH
+                // region. Note that we *do* handle loops where the first block is the *first*
+                // block of a more nested EH region (since it is legal to branch to the first
+                // block of an immediately more nested EH region). So, for example, disqualify
+                // this:
+                //
+                // BB02
+                // ...
+                // try {
+                // ...
+                // BB10 BBJ_COND => BB02
+                // ...
+                // }
+                //
+                // Here, BB10 is more nested than BB02.
+
+                if (bottom->hasTryIndex() && !bbInTryRegions(bottom->getTryIndex(), first))
+                {
+                    JITDUMP("Loop 'first' BB%02u is in an outer EH region compared to loop 'bottom' BB%02u. Rejecting "
+                            "loop.\n",
+                            first->bbNum, bottom->bbNum);
+                    goto NO_LOOP;
+                }
+
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+                // Disqualify loops where the first block of the loop is a finally target.
+                // The main problem is when multiple loops share a 'first' block that is a finally
+                // target and we canonicalize the loops by adding a new loop head. In that case, we
+                // need to update the blocks so the finally target bit is moved to the newly created
+                // block, and removed from the old 'first' block. This is 'hard', so at this point
+                // in the RyuJIT codebase (when we don't expect to keep the "old" ARM32 code generator
+                // long-term), it's easier to disallow the loop than to update the flow graph to
+                // support this case.
+
+                if ((first->bbFlags & BBF_FINALLY_TARGET) != 0)
+                {
+                    JITDUMP("Loop 'first' BB%02u is a finally target. Rejecting loop.\n", first->bbNum);
+                    goto NO_LOOP;
+                }
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+
+                /* At this point we have a loop - record it in the loop table
+                 * If we found only one exit, record it in the table too
+                 * (otherwise an exit = 0 in the loop table means multiple exits) */
+
+                assert(pred);
+                if (exitCount != 1)
+                {
+                    exit = nullptr;
+                }
+                optRecordLoop(head, first, top, entry, bottom, exit, exitCount);
+
+#if COUNT_LOOPS
+                if (!hasMethodLoops)
+                {
+                    /* mark the method as containing natural loops */
+                    totalLoopMethods++;
+                    hasMethodLoops = true;
+                }
+
+                /* increment total number of loops found */
+                totalLoopCount++;
+                loopsThisMethod++;
+
+                /* keep track of the number of exits */
+                loopExitCountTable.record(static_cast<unsigned>(exitCount));
+#endif // COUNT_LOOPS
+            }
+
+        /* current predecessor not good for a loop - continue with another one, if any */
+        NO_LOOP:;
+        }
+    }
+
+#if COUNT_LOOPS
+    loopCountTable.record(loopsThisMethod);
+    if (maxLoopsPerMethod < loopsThisMethod)
+    {
+        maxLoopsPerMethod = loopsThisMethod;
+    }
+    if (loopOverflowThisMethod)
+    {
+        totalLoopOverflows++;
+    }
+#endif // COUNT_LOOPS
+
+    // Now the loop indices are stable.  We can figure out parent/child relationships
+    // (using table indices to name loops), and label blocks.
+    for (unsigned char loopInd = 1; loopInd < optLoopCount; loopInd++)
+    {
+        for (unsigned char possibleParent = loopInd; possibleParent > 0;)
+        {
+            possibleParent--;
+            if (optLoopTable[possibleParent].lpContains(optLoopTable[loopInd]))
+            {
+                optLoopTable[loopInd].lpParent       = possibleParent;
+                optLoopTable[loopInd].lpSibling      = optLoopTable[possibleParent].lpChild;
+                optLoopTable[possibleParent].lpChild = loopInd;
+                break;
+            }
+        }
+    }
+
+    // Now label the blocks with the innermost loop to which they belong.  Since parents
+    // precede children in the table, doing the labeling for each loop in order will achieve
+    // this -- the innermost loop labeling will be done last.
+    for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
+    {
+        BasicBlock* first  = optLoopTable[loopInd].lpFirst;
+        BasicBlock* bottom = optLoopTable[loopInd].lpBottom;
+        for (BasicBlock* blk = first; blk != nullptr; blk = blk->bbNext)
+        {
+            blk->bbNatLoopNum = loopInd;
+            if (blk == bottom)
+            {
+                break;
+            }
+            assert(blk->bbNext != nullptr); // We should never reach nullptr.
+        }
+    }
+
+    // Make sure that loops are canonical: that every loop has a unique "top", by creating an empty "nop"
+    // one, if necessary, for loops containing others that share a "top."
+    bool mod = false;
+    for (unsigned char loopInd = 0; loopInd < optLoopCount; loopInd++)
+    {
+        // Traverse the outermost loops as entries into the loop nest; so skip non-outermost.
+        if (optLoopTable[loopInd].lpParent != BasicBlock::NOT_IN_LOOP)
+        {
+            continue;
+        }
+
+        // Otherwise...
+        if (optCanonicalizeLoopNest(loopInd))
+        {
+            mod = true;
+        }
+    }
+    if (mod)
+    {
+        fgUpdateChangedFlowGraph();
+    }
+
+#ifdef DEBUG
+    if (verbose && optLoopCount > 0)
+    {
+        printf("\nFinal natural loop table:\n");
+        for (unsigned loopInd = 0; loopInd < optLoopCount; loopInd++)
+        {
+            optPrintLoopInfo(loopInd);
+            printf("\n");
+        }
+    }
+#endif // DEBUG
+}
+
+void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap)
+{
+    BasicBlock* newJumpDest = nullptr;
+    switch (blk->bbJumpKind)
+    {
+        case BBJ_THROW:
+        case BBJ_RETURN:
+        case BBJ_NONE:
+        case BBJ_EHFILTERRET:
+        case BBJ_EHFINALLYRET:
+        case BBJ_EHCATCHRET:
+            // These have no jump destination to update.
+            break;
+
+        case BBJ_ALWAYS:
+        case BBJ_LEAVE:
+        case BBJ_CALLFINALLY:
+        case BBJ_COND:
+            // All of these have a single jump destination to update.
+            if (redirectMap->Lookup(blk->bbJumpDest, &newJumpDest))
+            {
+                blk->bbJumpDest = newJumpDest;
+            }
+            break;
+
+        case BBJ_SWITCH:
+        {
+            bool redirected = false;
+            for (unsigned i = 0; i < blk->bbJumpSwt->bbsCount; i++)
+            {
+                if (redirectMap->Lookup(blk->bbJumpSwt->bbsDstTab[i], &newJumpDest))
+                {
+                    blk->bbJumpSwt->bbsDstTab[i] = newJumpDest;
+                    redirected                   = true;
+                }
+            }
+            // If any redirections happend, invalidate the switch table map for the switch.
+            if (redirected)
+            {
+                GetSwitchDescMap()->Remove(blk);
+            }
+        }
+        break;
+
+        default:
+            unreached();
+    }
+}
+
+// TODO-Cleanup: This should be a static member of the BasicBlock class.
+void Compiler::optCopyBlkDest(BasicBlock* from, BasicBlock* to)
+{
+    assert(from->bbJumpKind == to->bbJumpKind); // Precondition.
+
+    // copy the jump destination(s) from "from" to "to".
+    switch (to->bbJumpKind)
+    {
+        case BBJ_ALWAYS:
+        case BBJ_LEAVE:
+        case BBJ_CALLFINALLY:
+        case BBJ_COND:
+            // All of these have a single jump destination to update.
+            to->bbJumpDest = from->bbJumpDest;
+            break;
+
+        case BBJ_SWITCH:
+        {
+            to->bbJumpSwt            = new (this, CMK_BasicBlock) BBswtDesc();
+            to->bbJumpSwt->bbsCount  = from->bbJumpSwt->bbsCount;
+            to->bbJumpSwt->bbsDstTab = new (this, CMK_BasicBlock) BasicBlock*[from->bbJumpSwt->bbsCount];
+
+            for (unsigned i = 0; i < from->bbJumpSwt->bbsCount; i++)
+            {
+                to->bbJumpSwt->bbsDstTab[i] = from->bbJumpSwt->bbsDstTab[i];
+            }
+        }
+        break;
+
+        default:
+            break;
+    }
+}
+
+// Canonicalize the loop nest rooted at parent loop 'loopInd'.
+// Returns 'true' if the flow graph is modified.
+bool Compiler::optCanonicalizeLoopNest(unsigned char loopInd)
+{
+    bool modified = false;
+
+    // Is the top of the current loop not in any nested loop?
+    if (optLoopTable[loopInd].lpTop->bbNatLoopNum != loopInd)
+    {
+        if (optCanonicalizeLoop(loopInd))
+        {
+            modified = true;
+        }
+    }
+
+    for (unsigned char child = optLoopTable[loopInd].lpChild; child != BasicBlock::NOT_IN_LOOP;
+         child               = optLoopTable[child].lpSibling)
+    {
+        if (optCanonicalizeLoopNest(child))
+        {
+            modified = true;
+        }
+    }
+
+    return modified;
+}
+
+bool Compiler::optCanonicalizeLoop(unsigned char loopInd)
+{
+    // Is the top uniquely part of the current loop?
+    BasicBlock* t = optLoopTable[loopInd].lpTop;
+
+    if (t->bbNatLoopNum == loopInd)
+    {
+        return false;
+    }
+
+    JITDUMP("in optCanonicalizeLoop: L%02u has top BB%02u (bottom BB%02u) with natural loop number L%02u: need to "
+            "canonicalize\n",
+            loopInd, t->bbNum, optLoopTable[loopInd].lpBottom->bbNum, t->bbNatLoopNum);
+
+    // Otherwise, the top of this loop is also part of a nested loop.
+    //
+    // Insert a new unique top for this loop. We must be careful to put this new
+    // block in the correct EH region. Note that f->bbPrev might be in a different
+    // EH region. For example:
+    //
+    // try {
+    //      ...
+    //      BB07
+    // }
+    // BB08 // "first"
+    //
+    // In this case, first->bbPrev is BB07, which is in a different 'try' region.
+    // On the other hand, the first block of multiple loops might be the first
+    // block of a 'try' region that is completely contained in the multiple loops.
+    // for example:
+    //
+    // BB08 try { }
+    // ...
+    // BB10 BBJ_ALWAYS => BB08
+    // ...
+    // BB12 BBJ_ALWAYS => BB08
+    //
+    // Here, we have two loops, both with BB08 as the "first" block. Block BB08
+    // is a single-block "try" region. Neither loop "bottom" block is in the same
+    // "try" region as BB08. This is legal because you can jump to the first block
+    // of a try region. With EH normalization, no two "try" regions will share
+    // this block. In this case, we need to insert a new block for the outer loop
+    // in the same EH region as the branch from the "bottom":
+    //
+    // BB30 BBJ_NONE
+    // BB08 try { }
+    // ...
+    // BB10 BBJ_ALWAYS => BB08
+    // ...
+    // BB12 BBJ_ALWAYS => BB30
+    //
+    // Another possibility is that the "first" block of the loop nest can be the first block
+    // of a "try" region that also has other predecessors than those in the loop, or even in
+    // the "try" region (since blocks can target the first block of a "try" region). For example:
+    //
+    // BB08 try {
+    // ...
+    // BB10 BBJ_ALWAYS => BB08
+    // ...
+    // BB12 BBJ_ALWAYS => BB08
+    // BB13 }
+    // ...
+    // BB20 BBJ_ALWAYS => BB08
+    // ...
+    // BB25 BBJ_ALWAYS => BB08
+    //
+    // Here, BB08 has 4 flow graph predecessors: BB10, BB12, BB20, BB25. These are all potential loop
+    // bottoms, for four possible nested loops. However, we require all the loop bottoms to be in the
+    // same EH region. For loops BB08..BB10 and BB08..BB12, we need to add a new "top" block within
+    // the try region, immediately before BB08. The bottom of the loop BB08..BB10 loop will target the
+    // old BB08, and the bottom of the BB08..BB12 loop will target the new loop header. The other branches
+    // (BB20, BB25) must target the new loop header, both for correctness, and to avoid the illegal
+    // situation of branching to a non-first block of a 'try' region.
+    //
+    // We can also have a loop nest where the "first" block is outside of a "try" region
+    // and the back edges are inside a "try" region, for example:
+    //
+    // BB02 // "first"
+    // ...
+    // BB09 try { BBJ_COND => BB02
+    // ...
+    // BB15 BBJ_COND => BB02
+    // ...
+    // BB21 } // end of "try"
+    //
+    // In this case, both loop back edges were formed by "leave" instructions that were
+    // imported into branches that were later made conditional. In this case, we don't
+    // want to copy the EH region of the back edge, since that would create a block
+    // outside of and disjoint with the "try" region of the back edge. However, to
+    // simplify things, we disqualify this type of loop, so we should never see this here.
+
+    BasicBlock* h = optLoopTable[loopInd].lpHead;
+    BasicBlock* f = optLoopTable[loopInd].lpFirst;
+    BasicBlock* b = optLoopTable[loopInd].lpBottom;
+
+    // The loop must be entirely contained within a single handler region.
+    assert(BasicBlock::sameHndRegion(f, b));
+
+    // If the bottom block is in the same "try" region, then we extend the EH
+    // region. Otherwise, we add the new block outside the "try" region.
+    bool        extendRegion = BasicBlock::sameTryRegion(f, b);
+    BasicBlock* newT         = fgNewBBbefore(BBJ_NONE, f, extendRegion);
+    if (!extendRegion)
+    {
+        // We need to set the EH region manually. Set it to be the same
+        // as the bottom block.
+        newT->copyEHRegion(b);
+    }
+
+    BlockSetOps::Assign(this, newT->bbReach, t->bbReach);
+
+    // Redirect the "bottom" of the current loop to "newT".
+    BlockToBlockMap* blockMap = new (getAllocatorLoopHoist()) BlockToBlockMap(getAllocatorLoopHoist());
+    blockMap->Set(t, newT);
+    optRedirectBlock(b, blockMap);
+
+    // Redirect non-loop preds of "t" to also go to "newT". Inner loops that also branch to "t" should continue
+    // to do so. However, there maybe be other predecessors from outside the loop nest that need to be updated
+    // to point to "newT". This normally wouldn't happen, since they too would be part of the loop nest. However,
+    // they might have been prevented from participating in the loop nest due to different EH nesting, or some
+    // other reason.
+    //
+    // Note that optRedirectBlock doesn't update the predecessors list. So, if the same 't' block is processed
+    // multiple times while canonicalizing multiple loop nests, we'll attempt to redirect a predecessor multiple times.
+    // This is ok, because after the first redirection, the topPredBlock branch target will no longer match the source
+    // edge of the blockMap, so nothing will happen.
+    for (flowList* topPred = t->bbPreds; topPred != nullptr; topPred = topPred->flNext)
+    {
+        BasicBlock* topPredBlock = topPred->flBlock;
+
+        // Skip if topPredBlock is in the loop.
+        // Note that this uses block number to detect membership in the loop. We are adding blocks during
+        // canonicalization, and those block numbers will be new, and larger than previous blocks. However, we work
+        // outside-in, so we shouldn't encounter the new blocks at the loop boundaries, or in the predecessor lists.
+        if (t->bbNum <= topPredBlock->bbNum && topPredBlock->bbNum <= b->bbNum)
+        {
+            JITDUMP("in optCanonicalizeLoop: 'top' predecessor BB%02u is in the range of L%02u (BB%02u..BB%02u); not "
+                    "redirecting its bottom edge\n",
+                    topPredBlock->bbNum, loopInd, t->bbNum, b->bbNum);
+            continue;
+        }
+
+        JITDUMP("in optCanonicalizeLoop: redirect top predecessor BB%02u to BB%02u\n", topPredBlock->bbNum,
+                newT->bbNum);
+        optRedirectBlock(topPredBlock, blockMap);
+    }
+
+    assert(newT->bbNext == f);
+    if (f != t)
+    {
+        newT->bbJumpKind = BBJ_ALWAYS;
+        newT->bbJumpDest = t;
+        newT->bbTreeList = nullptr;
+        fgInsertStmtAtEnd(newT, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
+    }
+
+    // If it had been a do-while loop (top == entry), update entry, as well.
+    BasicBlock* origE = optLoopTable[loopInd].lpEntry;
+    if (optLoopTable[loopInd].lpTop == origE)
+    {
+        optLoopTable[loopInd].lpEntry = newT;
+    }
+    optLoopTable[loopInd].lpTop   = newT;
+    optLoopTable[loopInd].lpFirst = newT;
+
+    newT->bbNatLoopNum = loopInd;
+
+    JITDUMP("in optCanonicalizeLoop: made new block BB%02u [%p] the new unique top of loop %d.\n", newT->bbNum,
+            dspPtr(newT), loopInd);
+
+    // Make sure the head block still goes to the entry...
+    if (h->bbJumpKind == BBJ_NONE && h->bbNext != optLoopTable[loopInd].lpEntry)
+    {
+        h->bbJumpKind = BBJ_ALWAYS;
+        h->bbJumpDest = optLoopTable[loopInd].lpEntry;
+    }
+    else if (h->bbJumpKind == BBJ_COND && h->bbNext == newT && newT != optLoopTable[loopInd].lpEntry)
+    {
+        BasicBlock* h2               = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/ true);
+        optLoopTable[loopInd].lpHead = h2;
+        h2->bbJumpDest               = optLoopTable[loopInd].lpEntry;
+        h2->bbTreeList               = nullptr;
+        fgInsertStmtAtEnd(h2, fgNewStmtFromTree(gtNewOperNode(GT_NOP, TYP_VOID, nullptr)));
+    }
+
+    // If any loops nested in "loopInd" have the same head and entry as "loopInd",
+    // it must be the case that they were do-while's (since "h" fell through to the entry).
+    // The new node "newT" becomes the head of such loops.
+    for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
+         childLoop               = optLoopTable[childLoop].lpSibling)
+    {
+        if (optLoopTable[childLoop].lpEntry == origE && optLoopTable[childLoop].lpHead == h &&
+            newT->bbJumpKind == BBJ_NONE && newT->bbNext == origE)
+        {
+            optUpdateLoopHead(childLoop, h, newT);
+        }
+    }
+    return true;
+}
+
+bool Compiler::optLoopContains(unsigned l1, unsigned l2)
+{
+    assert(l1 != BasicBlock::NOT_IN_LOOP);
+    if (l1 == l2)
+    {
+        return true;
+    }
+    else if (l2 == BasicBlock::NOT_IN_LOOP)
+    {
+        return false;
+    }
+    else
+    {
+        return optLoopContains(l1, optLoopTable[l2].lpParent);
+    }
+}
+
+void Compiler::optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to)
+{
+    assert(optLoopTable[loopInd].lpHead == from);
+    optLoopTable[loopInd].lpHead = to;
+    for (unsigned char childLoop = optLoopTable[loopInd].lpChild; childLoop != BasicBlock::NOT_IN_LOOP;
+         childLoop               = optLoopTable[childLoop].lpSibling)
+    {
+        if (optLoopTable[childLoop].lpHead == from)
+        {
+            optUpdateLoopHead(childLoop, from, to);
+        }
+    }
+}
+
+/*****************************************************************************
+ * If the : i += const" will cause an overflow exception for the small types.
+ */
+
+bool jitIterSmallOverflow(int iterAtExit, var_types incrType)
+{
+    int type_MAX;
+
+    switch (incrType)
+    {
+        case TYP_BYTE:
+            type_MAX = SCHAR_MAX;
+            break;
+        case TYP_UBYTE:
+            type_MAX = UCHAR_MAX;
+            break;
+        case TYP_SHORT:
+            type_MAX = SHRT_MAX;
+            break;
+        case TYP_CHAR:
+            type_MAX = USHRT_MAX;
+            break;
+
+        case TYP_UINT: // Detected by checking for 32bit ....
+        case TYP_INT:
+            return false; // ... overflow same as done for TYP_INT
+
+        default:
+            NO_WAY("Bad type");
+    }
+
+    if (iterAtExit > type_MAX)
+    {
+        return true;
+    }
+    else
+    {
+        return false;
+    }
+}
+
+/*****************************************************************************
+ * If the "i -= const" will cause an underflow exception for the small types
+ */
+
+bool jitIterSmallUnderflow(int iterAtExit, var_types decrType)
+{
+    int type_MIN;
+
+    switch (decrType)
+    {
+        case TYP_BYTE:
+            type_MIN = SCHAR_MIN;
+            break;
+        case TYP_SHORT:
+            type_MIN = SHRT_MIN;
+            break;
+        case TYP_UBYTE:
+            type_MIN = 0;
+            break;
+        case TYP_CHAR:
+            type_MIN = 0;
+            break;
+
+        case TYP_UINT: // Detected by checking for 32bit ....
+        case TYP_INT:
+            return false; // ... underflow same as done for TYP_INT
+
+        default:
+            NO_WAY("Bad type");
+    }
+
+    if (iterAtExit < type_MIN)
+    {
+        return true;
+    }
+    else
+    {
+        return false;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Helper for unroll loops - Computes the number of repetitions
+ *  in a constant loop. If it cannot prove the number is constant returns false
+ */
+
+bool Compiler::optComputeLoopRep(int        constInit,
+                                 int        constLimit,
+                                 int        iterInc,
+                                 genTreeOps iterOper,
+                                 var_types  iterOperType,
+                                 genTreeOps testOper,
+                                 bool       unsTest,
+                                 bool       dupCond,
+                                 unsigned*  iterCount)
+{
+    noway_assert(genActualType(iterOperType) == TYP_INT);
+
+    __int64 constInitX;
+    __int64 constLimitX;
+
+    unsigned loopCount;
+    int      iterSign;
+
+    // Using this, we can just do a signed comparison with other 32 bit values.
+    if (unsTest)
+    {
+        constLimitX = (unsigned int)constLimit;
+    }
+    else
+    {
+        constLimitX = (signed int)constLimit;
+    }
+
+    switch (iterOperType)
+    {
+// For small types, the iteration operator will narrow these values if big
+
+#define INIT_ITER_BY_TYPE(type)                                                                                        \
+    constInitX = (type)constInit;                                                                                      \
+    iterInc    = (type)iterInc;
+
+        case TYP_BYTE:
+            INIT_ITER_BY_TYPE(signed char);
+            break;
+        case TYP_UBYTE:
+            INIT_ITER_BY_TYPE(unsigned char);
+            break;
+        case TYP_SHORT:
+            INIT_ITER_BY_TYPE(signed short);
+            break;
+        case TYP_CHAR:
+            INIT_ITER_BY_TYPE(unsigned short);
+            break;
+
+        // For the big types, 32 bit arithmetic is performed
+
+        case TYP_INT:
+        case TYP_UINT:
+            if (unsTest)
+            {
+                constInitX = (unsigned int)constInit;
+            }
+            else
+            {
+                constInitX = (signed int)constInit;
+            }
+            break;
+
+        default:
+            noway_assert(!"Bad type");
+            NO_WAY("Bad type");
+    }
+
+    /* If iterInc is zero we have an infinite loop */
+    if (iterInc == 0)
+    {
+        return false;
+    }
+
+    /* Set iterSign to +1 for positive iterInc and -1 for negative iterInc */
+    iterSign = (iterInc > 0) ? +1 : -1;
+
+    /* Initialize loopCount to zero */
+    loopCount = 0;
+
+    // If dupCond is true then the loop head contains a test which skips
+    // this loop, if the constInit does not pass the loop test
+    // Such a loop can execute zero times.
+    // If dupCond is false then we have a true do-while loop which we
+    // always execute the loop once before performing the loop test
+    if (!dupCond)
+    {
+        loopCount += 1;
+        constInitX += iterInc;
+    }
+
+    // bail if count is based on wrap-around math
+    if (iterInc > 0)
+    {
+        if (constLimitX < constInitX)
+        {
+            return false;
+        }
+    }
+    else if (constLimitX > constInitX)
+    {
+        return false;
+    }
+
+    /* Compute the number of repetitions */
+
+    switch (testOper)
+    {
+        __int64 iterAtExitX;
+
+        case GT_EQ:
+            /* something like "for (i=init; i == lim; i++)" doesn't make any sense */
+            return false;
+
+        case GT_NE:
+            /*  "for (i=init; i != lim; i+=const)" - this is tricky since it may
+             *  have a constant number of iterations or loop forever -
+             *  we have to compute (lim-init) mod iterInc to see if it is zero.
+             * If mod iterInc is not zero then the limit test will miss an a wrap will occur
+             * which is probably not what the end user wanted, but it is legal.
+             */
+
+            if (iterInc > 0)
+            {
+                /* Stepping by one, i.e. Mod with 1 is always zero */
+                if (iterInc != 1)
+                {
+                    if (((constLimitX - constInitX) % iterInc) != 0)
+                    {
+                        return false;
+                    }
+                }
+            }
+            else
+            {
+                noway_assert(iterInc < 0);
+                /* Stepping by -1, i.e. Mod with 1 is always zero */
+                if (iterInc != -1)
+                {
+                    if (((constInitX - constLimitX) % (-iterInc)) != 0)
+                    {
+                        return false;
+                    }
+                }
+            }
+
+            switch (iterOper)
+            {
+                case GT_ASG_SUB:
+                case GT_SUB:
+                    iterInc = -iterInc;
+                    __fallthrough;
+
+                case GT_ASG_ADD:
+                case GT_ADD:
+                    if (constInitX != constLimitX)
+                    {
+                        loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
+                    }
+
+                    iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
+
+                    if (unsTest)
+                    {
+                        iterAtExitX = (unsigned)iterAtExitX;
+                    }
+
+                    // Check if iteration incr will cause overflow for small types
+                    if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
+                    {
+                        return false;
+                    }
+
+                    // iterator with 32bit overflow. Bad for TYP_(U)INT
+                    if (iterAtExitX < constLimitX)
+                    {
+                        return false;
+                    }
+
+                    *iterCount = loopCount;
+                    return true;
+
+                case GT_ASG_MUL:
+                case GT_MUL:
+                case GT_ASG_DIV:
+                case GT_DIV:
+                case GT_ASG_RSH:
+                case GT_RSH:
+                case GT_ASG_LSH:
+                case GT_LSH:
+                case GT_ASG_UDIV:
+                case GT_UDIV:
+                    return false;
+
+                default:
+                    noway_assert(!"Unknown operator for loop iterator");
+                    return false;
+            }
+
+        case GT_LT:
+            switch (iterOper)
+            {
+                case GT_ASG_SUB:
+                case GT_SUB:
+                    iterInc = -iterInc;
+                    __fallthrough;
+
+                case GT_ASG_ADD:
+                case GT_ADD:
+                    if (constInitX < constLimitX)
+                    {
+                        loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
+                    }
+
+                    iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
+
+                    if (unsTest)
+                    {
+                        iterAtExitX = (unsigned)iterAtExitX;
+                    }
+
+                    // Check if iteration incr will cause overflow for small types
+                    if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
+                    {
+                        return false;
+                    }
+
+                    // iterator with 32bit overflow. Bad for TYP_(U)INT
+                    if (iterAtExitX < constLimitX)
+                    {
+                        return false;
+                    }
+
+                    *iterCount = loopCount;
+                    return true;
+
+                case GT_ASG_MUL:
+                case GT_MUL:
+                case GT_ASG_DIV:
+                case GT_DIV:
+                case GT_ASG_RSH:
+                case GT_RSH:
+                case GT_ASG_LSH:
+                case GT_LSH:
+                case GT_ASG_UDIV:
+                case GT_UDIV:
+                    return false;
+
+                default:
+                    noway_assert(!"Unknown operator for loop iterator");
+                    return false;
+            }
+
+        case GT_LE:
+            switch (iterOper)
+            {
+                case GT_ASG_SUB:
+                case GT_SUB:
+                    iterInc = -iterInc;
+                    __fallthrough;
+
+                case GT_ASG_ADD:
+                case GT_ADD:
+                    if (constInitX <= constLimitX)
+                    {
+                        loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1;
+                    }
+
+                    iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
+
+                    if (unsTest)
+                    {
+                        iterAtExitX = (unsigned)iterAtExitX;
+                    }
+
+                    // Check if iteration incr will cause overflow for small types
+                    if (jitIterSmallOverflow((int)iterAtExitX, iterOperType))
+                    {
+                        return false;
+                    }
+
+                    // iterator with 32bit overflow. Bad for TYP_(U)INT
+                    if (iterAtExitX <= constLimitX)
+                    {
+                        return false;
+                    }
+
+                    *iterCount = loopCount;
+                    return true;
+
+                case GT_ASG_MUL:
+                case GT_MUL:
+                case GT_ASG_DIV:
+                case GT_DIV:
+                case GT_ASG_RSH:
+                case GT_RSH:
+                case GT_ASG_LSH:
+                case GT_LSH:
+                case GT_ASG_UDIV:
+                case GT_UDIV:
+                    return false;
+
+                default:
+                    noway_assert(!"Unknown operator for loop iterator");
+                    return false;
+            }
+
+        case GT_GT:
+            switch (iterOper)
+            {
+                case GT_ASG_SUB:
+                case GT_SUB:
+                    iterInc = -iterInc;
+                    __fallthrough;
+
+                case GT_ASG_ADD:
+                case GT_ADD:
+                    if (constInitX > constLimitX)
+                    {
+                        loopCount += (unsigned)((constLimitX - constInitX - iterSign) / iterInc) + 1;
+                    }
+
+                    iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
+
+                    if (unsTest)
+                    {
+                        iterAtExitX = (unsigned)iterAtExitX;
+                    }
+
+                    // Check if small types will underflow
+                    if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
+                    {
+                        return false;
+                    }
+
+                    // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
+                    if (iterAtExitX > constLimitX)
+                    {
+                        return false;
+                    }
+
+                    *iterCount = loopCount;
+                    return true;
+
+                case GT_ASG_MUL:
+                case GT_MUL:
+                case GT_ASG_DIV:
+                case GT_DIV:
+                case GT_ASG_RSH:
+                case GT_RSH:
+                case GT_ASG_LSH:
+                case GT_LSH:
+                case GT_ASG_UDIV:
+                case GT_UDIV:
+                    return false;
+
+                default:
+                    noway_assert(!"Unknown operator for loop iterator");
+                    return false;
+            }
+
+        case GT_GE:
+            switch (iterOper)
+            {
+                case GT_ASG_SUB:
+                case GT_SUB:
+                    iterInc = -iterInc;
+                    __fallthrough;
+
+                case GT_ASG_ADD:
+                case GT_ADD:
+                    if (constInitX >= constLimitX)
+                    {
+                        loopCount += (unsigned)((constLimitX - constInitX) / iterInc) + 1;
+                    }
+
+                    iterAtExitX = (int)(constInitX + iterInc * (int)loopCount);
+
+                    if (unsTest)
+                    {
+                        iterAtExitX = (unsigned)iterAtExitX;
+                    }
+
+                    // Check if small types will underflow
+                    if (jitIterSmallUnderflow((int)iterAtExitX, iterOperType))
+                    {
+                        return false;
+                    }
+
+                    // iterator with 32bit underflow. Bad for TYP_INT and unsigneds
+                    if (iterAtExitX >= constLimitX)
+                    {
+                        return false;
+                    }
+
+                    *iterCount = loopCount;
+                    return true;
+
+                case GT_ASG_MUL:
+                case GT_MUL:
+                case GT_ASG_DIV:
+                case GT_DIV:
+                case GT_ASG_RSH:
+                case GT_RSH:
+                case GT_ASG_LSH:
+                case GT_LSH:
+                case GT_ASG_UDIV:
+                case GT_UDIV:
+                    return false;
+
+                default:
+                    noway_assert(!"Unknown operator for loop iterator");
+                    return false;
+            }
+
+        default:
+            noway_assert(!"Unknown operator for loop condition");
+    }
+
+    return false;
+}
+
+/*****************************************************************************
+ *
+ *  Look for loop unrolling candidates and unroll them
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+void Compiler::optUnrollLoops()
+{
+    if (compCodeOpt() == SMALL_CODE)
+    {
+        return;
+    }
+
+    if (optLoopCount == 0)
+    {
+        return;
+    }
+
+#ifdef DEBUG
+    if (JitConfig.JitNoUnroll())
+    {
+        return;
+    }
+#endif
+
+    if (optCanCloneLoops())
+    {
+        return;
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In optUnrollLoops()\n");
+    }
+#endif
+    /* Look for loop unrolling candidates */
+
+    /*  Double loop so that after unrolling an inner loop we set change to true
+     *  and we then go back over all of the loop candidates and try to unroll
+     *  the next outer loop, until we don't unroll any loops,
+     *  then change will be false and we are done.
+     */
+    for (;;)
+    {
+        bool change = false;
+
+        for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
+        {
+            BasicBlock* block;
+            BasicBlock* head;
+            BasicBlock* bottom;
+
+            GenTree* loop;
+            GenTree* test;
+            GenTree* incr;
+            GenTree* phdr;
+            GenTree* init;
+
+            bool       dupCond;
+            int        lval;
+            int        lbeg;         // initial value for iterator
+            int        llim;         // limit value for iterator
+            unsigned   lvar;         // iterator lclVar #
+            int        iterInc;      // value to increment the iterator
+            genTreeOps iterOper;     // type of iterator increment (i.e. ASG_ADD, ASG_SUB, etc.)
+            var_types  iterOperType; // type result of the oper (for overflow instrs)
+            genTreeOps testOper;     // type of loop test (i.e. GT_LE, GT_GE, etc.)
+            bool       unsTest;      // Is the comparison u/int
+
+            unsigned totalIter;     // total number of iterations in the constant loop
+            unsigned loopCostSz;    // Cost is size of one iteration
+            unsigned loopFlags;     // actual lpFlags
+            unsigned requiredFlags; // required lpFlags
+
+            GenTree* loopList; // new stmt list of the unrolled loop
+            GenTree* loopLast;
+
+            static const int ITER_LIMIT[COUNT_OPT_CODE + 1] = {
+                10, // BLENDED_CODE
+                0,  // SMALL_CODE
+                20, // FAST_CODE
+                0   // COUNT_OPT_CODE
+            };
+
+            noway_assert(ITER_LIMIT[SMALL_CODE] == 0);
+            noway_assert(ITER_LIMIT[COUNT_OPT_CODE] == 0);
+
+            unsigned iterLimit = (unsigned)ITER_LIMIT[compCodeOpt()];
+
+#ifdef DEBUG
+            if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
+            {
+                iterLimit *= 10;
+            }
+#endif
+
+            static const int UNROLL_LIMIT_SZ[COUNT_OPT_CODE + 1] = {
+                30, // BLENDED_CODE
+                0,  // SMALL_CODE
+                60, // FAST_CODE
+                0   // COUNT_OPT_CODE
+            };
+
+            noway_assert(UNROLL_LIMIT_SZ[SMALL_CODE] == 0);
+            noway_assert(UNROLL_LIMIT_SZ[COUNT_OPT_CODE] == 0);
+
+            int unrollLimitSz = (unsigned)UNROLL_LIMIT_SZ[compCodeOpt()];
+
+#ifdef DEBUG
+            if (compStressCompile(STRESS_UNROLL_LOOPS, 50))
+            {
+                unrollLimitSz *= 10;
+            }
+#endif
+
+            loopFlags     = optLoopTable[lnum].lpFlags;
+            requiredFlags = LPFLG_DO_WHILE | LPFLG_ONE_EXIT | LPFLG_CONST;
+
+            /* Ignore the loop if we don't have a do-while with a single exit
+               that has a constant number of iterations */
+
+            if ((loopFlags & requiredFlags) != requiredFlags)
+            {
+                continue;
+            }
+
+            /* ignore if removed or marked as not unrollable */
+
+            if (optLoopTable[lnum].lpFlags & (LPFLG_DONT_UNROLL | LPFLG_REMOVED))
+            {
+                continue;
+            }
+
+            head = optLoopTable[lnum].lpHead;
+            noway_assert(head);
+            bottom = optLoopTable[lnum].lpBottom;
+            noway_assert(bottom);
+
+            /* The single exit must be at the bottom of the loop */
+            noway_assert(optLoopTable[lnum].lpExit);
+            if (optLoopTable[lnum].lpExit != bottom)
+            {
+                continue;
+            }
+
+            /* Unrolling loops with jumps in them is not worth the headache
+             * Later we might consider unrolling loops after un-switching */
+
+            block = head;
+            do
+            {
+                block = block->bbNext;
+                noway_assert(block);
+
+                if (block->bbJumpKind != BBJ_NONE)
+                {
+                    if (block != bottom)
+                    {
+                        goto DONE_LOOP;
+                    }
+                }
+            } while (block != bottom);
+
+            /* Get the loop data:
+                - initial constant
+                - limit constant
+                - iterator
+                - iterator increment
+                - increment operation type (i.e. ASG_ADD, ASG_SUB, etc...)
+                - loop test type (i.e. GT_GE, GT_LT, etc...)
+             */
+
+            lbeg     = optLoopTable[lnum].lpConstInit;
+            llim     = optLoopTable[lnum].lpConstLimit();
+            testOper = optLoopTable[lnum].lpTestOper();
+
+            lvar     = optLoopTable[lnum].lpIterVar();
+            iterInc  = optLoopTable[lnum].lpIterConst();
+            iterOper = optLoopTable[lnum].lpIterOper();
+
+            iterOperType = optLoopTable[lnum].lpIterOperType();
+            unsTest      = (optLoopTable[lnum].lpTestTree->gtFlags & GTF_UNSIGNED) != 0;
+
+            if (lvaTable[lvar].lvAddrExposed)
+            { // If the loop iteration variable is address-exposed then bail
+                continue;
+            }
+            if (lvaTable[lvar].lvIsStructField)
+            { // If the loop iteration variable is a promoted field from a struct then
+                // bail
+                continue;
+            }
+
+            /* Locate the pre-header and initialization and increment/test statements */
+
+            phdr = head->bbTreeList;
+            noway_assert(phdr);
+            loop = bottom->bbTreeList;
+            noway_assert(loop);
+
+            init = head->lastStmt();
+            noway_assert(init && (init->gtNext == nullptr));
+            test = bottom->lastStmt();
+            noway_assert(test && (test->gtNext == nullptr));
+            incr = test->gtPrev;
+            noway_assert(incr);
+
+            if (init->gtFlags & GTF_STMT_CMPADD)
+            {
+                /* Must be a duplicated loop condition */
+                noway_assert(init->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
+
+                dupCond = true;
+                init    = init->gtPrev;
+                noway_assert(init);
+            }
+            else
+            {
+                dupCond = false;
+            }
+
+            /* Find the number of iterations - the function returns false if not a constant number */
+
+            if (!optComputeLoopRep(lbeg, llim, iterInc, iterOper, iterOperType, testOper, unsTest, dupCond, &totalIter))
+            {
+                continue;
+            }
+
+            /* Forget it if there are too many repetitions or not a constant loop */
+
+            if (totalIter > iterLimit)
+            {
+                continue;
+            }
+
+            noway_assert(init->gtOper == GT_STMT);
+            init = init->gtStmt.gtStmtExpr;
+            noway_assert(test->gtOper == GT_STMT);
+            test = test->gtStmt.gtStmtExpr;
+            noway_assert(incr->gtOper == GT_STMT);
+            incr = incr->gtStmt.gtStmtExpr;
+
+            // Don't unroll loops we don't understand.
+            if (incr->gtOper == GT_ASG)
+            {
+                continue;
+            }
+
+            /* Make sure everything looks ok */
+            if ((init->gtOper != GT_ASG) || (init->gtOp.gtOp1->gtOper != GT_LCL_VAR) ||
+                (init->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) || (init->gtOp.gtOp2->gtOper != GT_CNS_INT) ||
+                (init->gtOp.gtOp2->gtIntCon.gtIconVal != lbeg) ||
+
+                !((incr->gtOper == GT_ASG_ADD) || (incr->gtOper == GT_ASG_SUB)) ||
+                (incr->gtOp.gtOp1->gtOper != GT_LCL_VAR) || (incr->gtOp.gtOp1->gtLclVarCommon.gtLclNum != lvar) ||
+                (incr->gtOp.gtOp2->gtOper != GT_CNS_INT) || (incr->gtOp.gtOp2->gtIntCon.gtIconVal != iterInc) ||
+
+                (test->gtOper != GT_JTRUE))
+            {
+                noway_assert(!"Bad precondition in Compiler::optUnrollLoops()");
+                continue;
+            }
+
+            /* heuristic - Estimated cost in code size of the unrolled loop */
+
+            loopCostSz = 0;
+
+            block = head;
+
+            do
+            {
+                block = block->bbNext;
+
+                /* Visit all the statements in the block */
+
+                for (GenTreeStmt* stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
+                {
+                    /* Get the expression and stop if end reached */
+
+                    GenTreePtr expr = stmt->gtStmtExpr;
+                    if (expr == incr)
+                    {
+                        break;
+                    }
+
+                    /* Calculate gtCostSz */
+                    gtSetStmtInfo(stmt);
+
+                    /* Update loopCostSz */
+                    loopCostSz += stmt->gtCostSz;
+                }
+            } while (block != bottom);
+
+            /* Compute the estimated increase in code size for the unrolled loop */
+
+            unsigned int fixedLoopCostSz;
+            fixedLoopCostSz = 8;
+
+            int unrollCostSz;
+            unrollCostSz = (loopCostSz * totalIter) - (loopCostSz + fixedLoopCostSz);
+
+            /* Don't unroll if too much code duplication would result. */
+
+            if (unrollCostSz > unrollLimitSz)
+            {
+                /* prevent this loop from being revisited */
+                optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
+                goto DONE_LOOP;
+            }
+
+            /* Looks like a good idea to unroll this loop, let's do it! */
+            CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("\nUnrolling loop BB%02u", head->bbNext->bbNum);
+                if (head->bbNext->bbNum != bottom->bbNum)
+                {
+                    printf("..BB%02u", bottom->bbNum);
+                }
+                printf(" over V%02u from %u to %u", lvar, lbeg, llim);
+                printf(" unrollCostSz = %d\n", unrollCostSz);
+                printf("\n");
+            }
+#endif
+
+            /* Create the unrolled loop statement list */
+
+            loopList = loopLast = nullptr;
+
+            for (lval = lbeg; totalIter; totalIter--)
+            {
+                block = head;
+
+                do
+                {
+                    GenTreeStmt* stmt;
+                    GenTree*     expr;
+
+                    block = block->bbNext;
+                    noway_assert(block);
+
+                    /* Visit all the statements in the block */
+
+                    for (stmt = block->firstStmt(); stmt; stmt = stmt->gtNextStmt)
+                    {
+                        /* Stop if we've reached the end of the loop */
+
+                        if (stmt->gtStmtExpr == incr)
+                        {
+                            break;
+                        }
+
+                        /* Clone/substitute the expression */
+
+                        expr = gtCloneExpr(stmt, 0, lvar, lval);
+
+                        // cloneExpr doesn't handle everything
+
+                        if (!expr)
+                        {
+                            optLoopTable[lnum].lpFlags |= LPFLG_DONT_UNROLL;
+                            goto DONE_LOOP;
+                        }
+
+                        /* Append the expression to our list */
+
+                        if (loopList)
+                        {
+                            loopLast->gtNext = expr;
+                        }
+                        else
+                        {
+                            loopList = expr;
+                        }
+
+                        expr->gtPrev = loopLast;
+                        loopLast     = expr;
+                    }
+                } while (block != bottom);
+
+                /* update the new value for the unrolled iterator */
+
+                switch (iterOper)
+                {
+                    case GT_ASG_ADD:
+                        lval += iterInc;
+                        break;
+
+                    case GT_ASG_SUB:
+                        lval -= iterInc;
+                        break;
+
+                    case GT_ASG_RSH:
+                    case GT_ASG_LSH:
+                        noway_assert(!"Unrolling not implemented for this loop iterator");
+                        goto DONE_LOOP;
+
+                    default:
+                        noway_assert(!"Unknown operator for constant loop iterator");
+                        goto DONE_LOOP;
+                }
+            }
+
+            /* Finish the linked list */
+
+            if (loopList)
+            {
+                loopList->gtPrev = loopLast;
+                loopLast->gtNext = nullptr;
+            }
+
+            /* Replace the body with the unrolled one */
+
+            block = head;
+
+            do
+            {
+                block = block->bbNext;
+                noway_assert(block);
+                block->bbTreeList = nullptr;
+                block->bbJumpKind = BBJ_NONE;
+                block->bbFlags &= ~BBF_NEEDS_GCPOLL;
+            } while (block != bottom);
+
+            bottom->bbJumpKind = BBJ_NONE;
+            bottom->bbTreeList = loopList;
+            bottom->bbFlags &= ~BBF_NEEDS_GCPOLL;
+            bottom->modifyBBWeight(bottom->bbWeight / BB_LOOP_WEIGHT);
+
+            bool dummy;
+
+            fgMorphStmts(bottom, &dummy, &dummy, &dummy);
+
+            /* Update bbRefs and bbPreds */
+            /* Here head->bbNext is bottom !!! - Replace it */
+
+            fgRemoveRefPred(head->bbNext, bottom);
+
+            /* Now change the initialization statement in the HEAD to "lvar = lval;"
+             * (the last value of the iterator in the loop)
+             * and drop the jump condition since the unrolled loop will always execute */
+
+            init->gtOp.gtOp2->gtIntCon.gtIconVal = lval;
+
+            /* if the HEAD is a BBJ_COND drop the condition (and make HEAD a BBJ_NONE block) */
+
+            if (head->bbJumpKind == BBJ_COND)
+            {
+                phdr = head->bbTreeList;
+                noway_assert(phdr);
+                test = phdr->gtPrev;
+
+                noway_assert(test && (test->gtNext == nullptr));
+                noway_assert(test->gtOper == GT_STMT);
+                noway_assert(test->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
+
+                init = test->gtPrev;
+                noway_assert(init && (init->gtNext == test));
+                noway_assert(init->gtOper == GT_STMT);
+
+                init->gtNext     = nullptr;
+                phdr->gtPrev     = init;
+                head->bbJumpKind = BBJ_NONE;
+                head->bbFlags &= ~BBF_NEEDS_GCPOLL;
+
+                /* Update bbRefs and bbPreds */
+
+                fgRemoveRefPred(head->bbJumpDest, head);
+            }
+            else
+            {
+                /* the loop must execute */
+                noway_assert(head->bbJumpKind == BBJ_NONE);
+            }
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("Whole unrolled loop:\n");
+
+                GenTreePtr s = loopList;
+
+                while (s)
+                {
+                    noway_assert(s->gtOper == GT_STMT);
+                    gtDispTree(s);
+                    s = s->gtNext;
+                }
+                printf("\n");
+
+                gtDispTree(init);
+                printf("\n");
+            }
+#endif
+
+            /* Remember that something has changed */
+
+            change = true;
+
+            /* Make sure to update loop table */
+
+            /* Use the LPFLG_REMOVED flag and update the bbLoopMask acordingly
+             * (also make head and bottom NULL - to hit an assert or GPF) */
+
+            optLoopTable[lnum].lpFlags |= LPFLG_REMOVED;
+            optLoopTable[lnum].lpHead = optLoopTable[lnum].lpBottom = nullptr;
+
+        DONE_LOOP:;
+        }
+
+        if (!change)
+        {
+            break;
+        }
+    }
+
+#ifdef DEBUG
+    fgDebugCheckBBlist();
+#endif
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************
+ *
+ *  Return non-zero if there is a code path from 'topBB' to 'botBB' that will
+ *  not execute a method call.
+ */
+
+bool Compiler::optReachWithoutCall(BasicBlock* topBB, BasicBlock* botBB)
+{
+    // TODO-Cleanup: Currently BBF_GC_SAFE_POINT is not set for helper calls,
+    // as some helper calls are neither interruptible nor hijackable.
+    // When we can determine this, then we can set BBF_GC_SAFE_POINT for
+    // those helpers too.
+
+    noway_assert(topBB->bbNum <= botBB->bbNum);
+
+    // We can always check topBB and botBB for any gc safe points and early out
+
+    if ((topBB->bbFlags | botBB->bbFlags) & BBF_GC_SAFE_POINT)
+    {
+        return false;
+    }
+
+    // Otherwise we will need to rely upon the dominator sets
+
+    if (!fgDomsComputed)
+    {
+        // return a conservative answer of true when we don't have the dominator sets
+        return true;
+    }
+
+    BasicBlock* curBB = topBB;
+    for (;;)
+    {
+        noway_assert(curBB);
+
+        // If we added a loop pre-header block then we will
+        //  have a bbNum greater than fgLastBB, and we won't have
+        //  any dominator information about this block, so skip it.
+        //
+        if (curBB->bbNum <= fgLastBB->bbNum)
+        {
+            noway_assert(curBB->bbNum <= botBB->bbNum);
+
+            // Does this block contain a gc safe point?
+
+            if (curBB->bbFlags & BBF_GC_SAFE_POINT)
+            {
+                // Will this block always execute on the way to botBB ?
+                //
+                // Since we are checking every block in [topBB .. botBB] and we are using
+                // a lexical definition of a loop.
+                //  (all that we know is that is that botBB is a back-edge to topBB)
+                // Thus while walking blocks in this range we may encounter some blocks
+                // that are not really part of the loop, and so we need to perform
+                // some additional checks:
+                //
+                // We will check that the current 'curBB' is reachable from 'topBB'
+                // and that it dominates the block containing the back-edge 'botBB'
+                // When both of these are true then we know that the gcsafe point in 'curBB'
+                // will be encountered in the loop and we can return false
+                //
+                if (fgDominate(curBB, botBB) && fgReachable(topBB, curBB))
+                {
+                    return false;
+                }
+            }
+            else
+            {
+                // If we've reached the destination block, then we're done
+
+                if (curBB == botBB)
+                {
+                    break;
+                }
+            }
+        }
+
+        curBB = curBB->bbNext;
+    }
+
+    // If we didn't find any blocks that contained a gc safe point and
+    // also met the fgDominate and fgReachable criteria then we must return true
+    //
+    return true;
+}
+
+/*****************************************************************************
+ *
+ * Find the loop termination test at the bottom of the loop
+ */
+
+static GenTreePtr optFindLoopTermTest(BasicBlock* bottom)
+{
+    GenTreePtr testt = bottom->bbTreeList;
+
+    assert(testt && testt->gtOper == GT_STMT);
+
+    GenTreePtr result = testt->gtPrev;
+
+#ifdef DEBUG
+    while (testt->gtNext)
+    {
+        testt = testt->gtNext;
+    }
+
+    assert(testt == result);
+#endif
+
+    return result;
+}
+
+/*****************************************************************************
+ * Optimize "jmp C; do{} C:while(cond);" loops to "if (cond){ do{}while(cond}; }"
+ */
+
+void Compiler::fgOptWhileLoop(BasicBlock* block)
+{
+    noway_assert(!opts.MinOpts() && !opts.compDbgCode);
+    noway_assert(compCodeOpt() != SMALL_CODE);
+
+    /*
+        Optimize while loops into do { } while loop
+        Our loop hoisting logic requires do { } while loops.
+        Specifically, we're looking for the following case:
+
+                ...
+                jmp test
+        loop:
+                ...
+                ...
+        test:
+                cond
+                jtrue   loop
+
+        If we find this, and the condition is simple enough, we change
+        the loop to the following:
+
+                ...
+                cond
+                jfalse done
+                // else fall-through
+        loop:
+                ...
+                ...
+        test:
+                cond
+                jtrue   loop
+        done:
+
+     */
+
+    /* Does the BB end with an unconditional jump? */
+
+    if (block->bbJumpKind != BBJ_ALWAYS || (block->bbFlags & BBF_KEEP_BBJ_ALWAYS))
+    { // It can't be one of the ones we use for our exception magic
+        return;
+    }
+
+    // It has to be a forward jump
+    //  TODO-CQ: Check if we can also optimize the backwards jump as well.
+    //
+    if (fgIsForwardBranch(block) == false)
+    {
+        return;
+    }
+
+    // Get hold of the jump target
+    BasicBlock* bTest = block->bbJumpDest;
+
+    // Does the block consist of 'jtrue(cond) block' ?
+    if (bTest->bbJumpKind != BBJ_COND)
+    {
+        return;
+    }
+
+    // bTest must be a backwards jump to block->bbNext
+    if (bTest->bbJumpDest != block->bbNext)
+    {
+        return;
+    }
+
+    // Since test is a BBJ_COND it will have a bbNext
+    noway_assert(bTest->bbNext);
+
+    // 'block' must be in the same try region as the condition, since we're going to insert
+    // a duplicated condition in 'block', and the condition might include exception throwing code.
+    if (!BasicBlock::sameTryRegion(block, bTest))
+    {
+        return;
+    }
+
+    // We're going to change 'block' to branch to bTest->bbNext, so that also better be in the
+    // same try region (or no try region) to avoid generating illegal flow.
+    BasicBlock* bTestNext = bTest->bbNext;
+    if (bTestNext->hasTryIndex() && !BasicBlock::sameTryRegion(block, bTestNext))
+    {
+        return;
+    }
+
+    GenTreePtr condStmt = optFindLoopTermTest(bTest);
+
+    // bTest must only contain only a jtrue with no other stmts, we will only clone
+    // the conditional, so any other statements will not get cloned
+    //  TODO-CQ: consider cloning the whole bTest block as inserting it after block.
+    //
+    if (bTest->bbTreeList != condStmt)
+    {
+        return;
+    }
+
+    /* Get to the condition node from the statement tree */
+
+    noway_assert(condStmt->gtOper == GT_STMT);
+
+    GenTreePtr condTree = condStmt->gtStmt.gtStmtExpr;
+    noway_assert(condTree->gtOper == GT_JTRUE);
+
+    condTree = condTree->gtOp.gtOp1;
+
+    // The condTree has to be a RelOp comparison
+    //  TODO-CQ: Check if we can also optimize the backwards jump as well.
+    //
+    if (condTree->OperIsCompare() == false)
+    {
+        return;
+    }
+
+    /* We call gtPrepareCost to measure the cost of duplicating this tree */
+
+    gtPrepareCost(condTree);
+    unsigned estDupCostSz = condTree->gtCostSz;
+
+    double loopIterations = (double)BB_LOOP_WEIGHT;
+
+    bool                 allProfileWeightsAreValid = false;
+    BasicBlock::weight_t weightBlock               = block->bbWeight;
+    BasicBlock::weight_t weightTest                = bTest->bbWeight;
+    BasicBlock::weight_t weightNext                = block->bbNext->bbWeight;
+
+    // If we have profile data then we calculate the number of time
+    // the loop will iterate into loopIterations
+    if (fgIsUsingProfileWeights())
+    {
+        // Only rely upon the profile weight when all three of these blocks
+        // have good profile weights
+        if ((block->bbFlags & BBF_PROF_WEIGHT) && (bTest->bbFlags & BBF_PROF_WEIGHT) &&
+            (block->bbNext->bbFlags & BBF_PROF_WEIGHT))
+        {
+            allProfileWeightsAreValid = true;
+
+            // If this while loop never iterates then don't bother transforming
+            if (weightNext == 0)
+            {
+                return;
+            }
+
+            // with (weighNext > 0) we should also have (weightTest >= weightBlock)
+            // if the profile weights are all valid.
+            //
+            //   weightNext is the number of time this loop iterates
+            //   weightBlock is the number of times that we enter the while loop
+            //   loopIterations is the average number of times that this loop iterates
+            //
+            if (weightTest >= weightBlock)
+            {
+                loopIterations = (double)block->bbNext->bbWeight / (double)block->bbWeight;
+            }
+        }
+    }
+
+    unsigned maxDupCostSz = 32;
+
+    // optFastCodeOrBlendedLoop(bTest->bbWeight) does not work here as we have not
+    // set loop weights yet
+    if ((compCodeOpt() == FAST_CODE) || compStressCompile(STRESS_DO_WHILE_LOOPS, 30))
+    {
+        maxDupCostSz *= 4;
+    }
+
+    // If this loop iterates a lot then raise the maxDupCost
+    if (loopIterations >= 12.0)
+    {
+        maxDupCostSz *= 2;
+    }
+    if (loopIterations >= 96.0)
+    {
+        maxDupCostSz *= 2;
+    }
+
+    // If the loop condition has a shared static helper, we really want this loop converted
+    // as not converting the loop will disable loop hoisting, meaning the shared helper will
+    // be executed on every loop iteration.
+    int countOfHelpers = 0;
+    fgWalkTreePre(&condTree, CountSharedStaticHelper, &countOfHelpers);
+
+    if (countOfHelpers > 0 && compCodeOpt() != SMALL_CODE)
+    {
+        maxDupCostSz += 24 * min(countOfHelpers, (int)(loopIterations + 1.5));
+    }
+
+    // If the compare has too high cost then we don't want to dup
+
+    bool costIsTooHigh = (estDupCostSz > maxDupCostSz);
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nDuplication of loop condition [%06u] is %s, because the cost of duplication (%i) is %s than %i,"
+               "\n   loopIterations = %7.3f, countOfHelpers = %d, validProfileWeights = %s\n",
+               condTree->gtTreeID, costIsTooHigh ? "not done" : "performed", estDupCostSz,
+               costIsTooHigh ? "greater" : "less or equal", maxDupCostSz, loopIterations, countOfHelpers,
+               allProfileWeightsAreValid ? "true" : "false");
+    }
+#endif
+
+    if (costIsTooHigh)
+    {
+        return;
+    }
+
+    /* Looks good - duplicate the condition test */
+
+    condTree->gtFlags |= GTF_RELOP_ZTT;
+
+    condTree = gtCloneExpr(condTree);
+    gtReverseCond(condTree);
+
+    // Make sure clone expr copied the flag
+    assert(condTree->gtFlags & GTF_RELOP_ZTT);
+
+    condTree = gtNewOperNode(GT_JTRUE, TYP_VOID, condTree);
+
+    /* Create a statement entry out of the condition and
+       append the condition test at the end of 'block' */
+
+    GenTreePtr copyOfCondStmt = fgInsertStmtAtEnd(block, condTree);
+
+    copyOfCondStmt->gtFlags |= GTF_STMT_CMPADD;
+
+#ifdef DEBUGGING_SUPPORT
+    if (opts.compDbgInfo)
+    {
+        copyOfCondStmt->gtStmt.gtStmtILoffsx = condStmt->gtStmt.gtStmtILoffsx;
+    }
+#endif
+
+    // Flag the block that received the copy as potentially having an array/vtable
+    // reference if the block copied from did; this is a conservative guess.
+    if (auto copyFlags = bTest->bbFlags & (BBF_HAS_VTABREF | BBF_HAS_IDX_LEN))
+    {
+        block->bbFlags |= copyFlags;
+    }
+
+    // If we have profile data for all blocks and we know that we are cloning the
+    //  bTest block into block and thus changing the control flow from block so
+    //  that it no longer goes directly to bTest anymore, we have to adjust the
+    //  weight of bTest by subtracting out the weight of block.
+    //
+    if (allProfileWeightsAreValid)
+    {
+        //
+        // Some additional sanity checks before adjusting the weight of bTest
+        //
+        if ((weightNext > 0) && (weightTest >= weightBlock) && (weightTest != BB_MAX_WEIGHT))
+        {
+            // Get the two edge that flow out of bTest
+            flowList* edgeToNext = fgGetPredForBlock(bTest->bbNext, bTest);
+            flowList* edgeToJump = fgGetPredForBlock(bTest->bbJumpDest, bTest);
+
+            // Calculate the new weight for block bTest
+
+            BasicBlock::weight_t newWeightTest =
+                (weightTest > weightBlock) ? (weightTest - weightBlock) : BB_ZERO_WEIGHT;
+            bTest->bbWeight = newWeightTest;
+
+            if (newWeightTest == BB_ZERO_WEIGHT)
+            {
+                bTest->bbFlags |= BBF_RUN_RARELY;
+                // All out edge weights are set to zero
+                edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
+                edgeToNext->flEdgeWeightMax = BB_ZERO_WEIGHT;
+                edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
+                edgeToJump->flEdgeWeightMax = BB_ZERO_WEIGHT;
+            }
+            else
+            {
+                // Update the our edge weights
+                edgeToNext->flEdgeWeightMin = BB_ZERO_WEIGHT;
+                edgeToNext->flEdgeWeightMax = min(edgeToNext->flEdgeWeightMax, newWeightTest);
+                edgeToJump->flEdgeWeightMin = BB_ZERO_WEIGHT;
+                edgeToJump->flEdgeWeightMax = min(edgeToJump->flEdgeWeightMax, newWeightTest);
+            }
+        }
+    }
+
+    /* Change the block to end with a conditional jump */
+
+    block->bbJumpKind = BBJ_COND;
+    block->bbJumpDest = bTest->bbNext;
+
+    /* Mark the jump dest block as being a jump target */
+    block->bbJumpDest->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+
+    /* Update bbRefs and bbPreds for 'block->bbNext' 'bTest' and 'bTest->bbNext' */
+
+    fgAddRefPred(block->bbNext, block);
+
+    fgRemoveRefPred(bTest, block);
+    fgAddRefPred(bTest->bbNext, block);
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nDuplicating loop condition in BB%02u for loop (BB%02u - BB%02u)", block->bbNum, block->bbNext->bbNum,
+               bTest->bbNum);
+        printf("\nEstimated code size expansion is %d\n ", estDupCostSz);
+
+        gtDispTree(copyOfCondStmt);
+    }
+
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Optimize the BasicBlock layout of the method
+ */
+
+void Compiler::optOptimizeLayout()
+{
+    noway_assert(!opts.MinOpts() && !opts.compDbgCode);
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In optOptimizeLayout()\n");
+        fgDispHandlerTab();
+    }
+
+    /* Check that the flowgraph data (bbNum, bbRefs, bbPreds) is up-to-date */
+    fgDebugCheckBBlist();
+#endif
+
+    noway_assert(fgModified == false);
+
+    for (BasicBlock* block = fgFirstBB; block; block = block->bbNext)
+    {
+        /* Make sure the appropriate fields are initialized */
+
+        if (block->bbWeight == BB_ZERO_WEIGHT)
+        {
+            /* Zero weighted block can't have a LOOP_HEAD flag */
+            noway_assert(block->isLoopHead() == false);
+            continue;
+        }
+
+        assert(block->bbLoopNum == 0);
+
+        if (compCodeOpt() != SMALL_CODE)
+        {
+            /* Optimize "while(cond){}" loops to "cond; do{}while(cond);" */
+
+            fgOptWhileLoop(block);
+        }
+    }
+
+    if (fgModified)
+    {
+        // Recompute the edge weight if we have modified the flow graph in fgOptWhileLoop
+        fgComputeEdgeWeights();
+    }
+
+    fgUpdateFlowGraph(true);
+    fgReorderBlocks();
+    fgUpdateFlowGraph();
+}
+
+/*****************************************************************************
+ *
+ *  Perform loop inversion, find and classify natural loops
+ */
+
+void Compiler::optOptimizeLoops()
+{
+    noway_assert(!opts.MinOpts() && !opts.compDbgCode);
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In optOptimizeLoops()\n");
+    }
+#endif
+
+    optSetBlockWeights();
+
+    /* Were there any loops in the flow graph? */
+
+    if (fgHasLoops)
+    {
+        /* now that we have dominator information we can find loops */
+
+        optFindNaturalLoops();
+
+        unsigned loopNum = 0;
+
+        /* Iterate over the flow graph, marking all loops */
+
+        /* We will use the following terminology:
+         * top        - the first basic block in the loop (i.e. the head of the backward edge)
+         * bottom     - the last block in the loop (i.e. the block from which we jump to the top)
+         * lastBottom - used when we have multiple back-edges to the same top
+         */
+
+        flowList* pred;
+
+        BasicBlock* top;
+
+        for (top = fgFirstBB; top; top = top->bbNext)
+        {
+            BasicBlock* foundBottom = nullptr;
+
+            for (pred = top->bbPreds; pred; pred = pred->flNext)
+            {
+                /* Is this a loop candidate? - We look for "back edges" */
+
+                BasicBlock* bottom = pred->flBlock;
+
+                /* is this a backward edge? (from BOTTOM to TOP) */
+
+                if (top->bbNum > bottom->bbNum)
+                {
+                    continue;
+                }
+
+                /* 'top' also must have the BBF_LOOP_HEAD flag set */
+
+                if (top->isLoopHead() == false)
+                {
+                    continue;
+                }
+
+                /* We only consider back-edges that are BBJ_COND or BBJ_ALWAYS for loops */
+
+                if ((bottom->bbJumpKind != BBJ_COND) && (bottom->bbJumpKind != BBJ_ALWAYS))
+                {
+                    continue;
+                }
+
+                /* the top block must be able to reach the bottom block */
+                if (!fgReachable(top, bottom))
+                {
+                    continue;
+                }
+
+                /* Found a new loop, record the longest backedge in foundBottom */
+
+                if ((foundBottom == nullptr) || (bottom->bbNum > foundBottom->bbNum))
+                {
+                    foundBottom = bottom;
+                }
+            }
+
+            if (foundBottom)
+            {
+                loopNum++;
+#ifdef DEBUG
+                /* Mark the loop header as such */
+                assert(FitsIn<unsigned char>(loopNum));
+                top->bbLoopNum = (unsigned char)loopNum;
+#endif
+
+                /* Mark all blocks between 'top' and 'bottom' */
+
+                optMarkLoopBlocks(top, foundBottom, false);
+            }
+
+            // We track at most 255 loops
+            if (loopNum == 255)
+            {
+#if COUNT_LOOPS
+                totalUnnatLoopOverflows++;
+#endif
+                break;
+            }
+        }
+
+#if COUNT_LOOPS
+        totalUnnatLoopCount += loopNum;
+#endif
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            if (loopNum > 0)
+            {
+                printf("\nFound a total of %d loops.", loopNum);
+                printf("\nAfter loop weight marking:\n");
+                fgDispBasicBlocks();
+                printf("\n");
+            }
+        }
+#endif
+        optLoopsMarked = true;
+    }
+}
+
+//------------------------------------------------------------------------
+// optDeriveLoopCloningConditions: Derive loop cloning conditions.
+//
+// Arguments:
+//     loopNum     -  the current loop index for which conditions are derived.
+//     context     -  data structure where all loop cloning info is kept.
+//
+// Return Value:
+//     "false" if conditions cannot be obtained. "true" otherwise.
+//     The cloning conditions are updated in the "conditions"[loopNum] field
+//     of the "context" parameter.
+//
+// Operation:
+//     Inspect the loop cloning optimization candidates and populate the conditions necessary
+//     for each optimization candidate. Checks if the loop stride is "> 0" if the loop
+//     condition is "less than". If the initializer is "var" init then adds condition
+//     "var >= 0", and if the loop is var limit then, "var >= 0" and "var <= a.len"
+//     are added to "context". These conditions are checked in the pre-header block
+//     and the cloning choice is made.
+//
+// Assumption:
+//      Callers should assume AND operation is used i.e., if all conditions are
+//      true, then take the fast path.
+//
+bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext* context)
+{
+    JITDUMP("------------------------------------------------------------\n");
+    JITDUMP("Deriving cloning conditions for L%02u\n", loopNum);
+
+    LoopDsc*                      loop     = &optLoopTable[loopNum];
+    ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
+
+    if (loop->lpTestOper() == GT_LT)
+    {
+        // Stride conditions
+        if (loop->lpIterConst() <= 0)
+        {
+            JITDUMP("> Stride %d is invalid\n", loop->lpIterConst());
+            return false;
+        }
+
+        // Init conditions
+        if (loop->lpFlags & LPFLG_CONST_INIT)
+        {
+            // Only allowing const init at this time.
+            if (loop->lpConstInit < 0)
+            {
+                JITDUMP("> Init %d is invalid\n", loop->lpConstInit);
+                return false;
+            }
+        }
+        else if (loop->lpFlags & LPFLG_VAR_INIT)
+        {
+            // limitVar >= 0
+            LC_Condition geZero(GT_GE, LC_Expr(LC_Ident(loop->lpVarInit, LC_Ident::Var)),
+                                LC_Expr(LC_Ident(0, LC_Ident::Const)));
+            context->EnsureConditions(loopNum)->Push(geZero);
+        }
+        else
+        {
+            JITDUMP("> Not variable init\n");
+            return false;
+        }
+
+        // Limit Conditions
+        LC_Ident ident;
+        if (loop->lpFlags & LPFLG_CONST_LIMIT)
+        {
+            int limit = loop->lpConstLimit();
+            if (limit < 0)
+            {
+                JITDUMP("> limit %d is invalid\n", limit);
+                return false;
+            }
+            ident = LC_Ident(limit, LC_Ident::Const);
+        }
+        else if (loop->lpFlags & LPFLG_VAR_LIMIT)
+        {
+            unsigned limitLcl = loop->lpVarLimit();
+            ident             = LC_Ident(limitLcl, LC_Ident::Var);
+
+            LC_Condition geZero(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0, LC_Ident::Const)));
+
+            context->EnsureConditions(loopNum)->Push(geZero);
+        }
+        else if (loop->lpFlags & LPFLG_ARRLEN_LIMIT)
+        {
+            ArrIndex* index = new (getAllocator()) ArrIndex(getAllocator());
+            if (!loop->lpArrLenLimit(this, index))
+            {
+                JITDUMP("> ArrLen not matching");
+                return false;
+            }
+            ident = LC_Ident(LC_Array(LC_Array::Jagged, index, LC_Array::ArrLen));
+
+            // Ensure that this array must be dereference-able, before executing the actual condition.
+            LC_Array array(LC_Array::Jagged, index, LC_Array::None);
+            context->EnsureDerefs(loopNum)->Push(array);
+        }
+        else
+        {
+            JITDUMP("> Undetected limit\n");
+            return false;
+        }
+
+        for (unsigned i = 0; i < optInfos->Size(); ++i)
+        {
+            LcOptInfo* optInfo = optInfos->GetRef(i);
+            switch (optInfo->GetOptType())
+            {
+                case LcOptInfo::LcJaggedArray:
+                {
+                    // limit <= arrLen
+                    LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
+                    LC_Array arrLen(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::ArrLen);
+                    LC_Ident arrLenIdent = LC_Ident(arrLen);
+
+                    LC_Condition cond(GT_LE, LC_Expr(ident), LC_Expr(arrLenIdent));
+                    context->EnsureConditions(loopNum)->Push(cond);
+
+                    // Ensure that this array must be dereference-able, before executing the actual condition.
+                    LC_Array array(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::None);
+                    context->EnsureDerefs(loopNum)->Push(array);
+                }
+                break;
+                case LcOptInfo::LcMdArray:
+                {
+                    // limit <= mdArrLen
+                    LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo();
+                    LC_Condition      cond(GT_LE, LC_Expr(ident),
+                                      LC_Expr(LC_Ident(LC_Array(LC_Array::MdArray,
+                                                                mdArrInfo->GetArrIndexForDim(getAllocator()),
+                                                                mdArrInfo->dim, LC_Array::None))));
+                    context->EnsureConditions(loopNum)->Push(cond);
+                }
+                break;
+
+                default:
+                    JITDUMP("Unknown opt\n");
+                    return false;
+            }
+        }
+        JITDUMP("Conditions: (");
+        DBEXEC(verbose, context->PrintConditions(loopNum));
+        JITDUMP(")\n");
+        return true;
+    }
+    return false;
+}
+
+//------------------------------------------------------------------------------------
+// optComputeDerefConditions: Derive loop cloning conditions for dereferencing arrays.
+//
+// Arguments:
+//     loopNum     -  the current loop index for which conditions are derived.
+//     context     -  data structure where all loop cloning info is kept.
+//
+// Return Value:
+//     "false" if conditions cannot be obtained. "true" otherwise.
+//     The deref conditions are updated in the "derefConditions"[loopNum] field
+//     of the "context" parameter.
+//
+// Definition of Deref Conditions:
+//     To be able to check for the loop cloning condition that (limitVar <= a.len)
+//     we should first be able to dereference "a". i.e., "a" is non-null.
+//
+//     Example:
+//
+//     for (i in 0..n)
+//       for (j in 0..n)
+//         for (k in 0..n)      // Inner most loop is being cloned. Cloning needs to check if
+//                              // (n <= a[i][j].len) and other safer conditions to take the fast path
+//           a[i][j][k] = 0;
+//
+//     Now, we want to deref a[i][j] to invoke length operator on it to perform the cloning fast path check.
+//     This involves deref of (a), (a[i]), (a[i][j]), therefore, the following should first
+//     be true to do the deref.
+//
+//     (a != null) && (i < a.len) && (a[i] != null) && (j < a[i].len) && (a[i][j] != null) --> (1)
+//
+//     Note the short circuiting AND. Implication: these conditions should be performed in separate
+//     blocks each of which will branch to slow path if the condition evaluates to false.
+//
+//     Now, imagine a situation where we have
+//      a[x][y][k] = 20 and a[i][j][k] = 0
+//     also in the inner most loop where x, y are parameters, then our conditions will have
+//     to include
+//     (x < a.len) &&
+//     (y < a[x].len)
+//     in addition to the above conditions (1) to get rid of bounds check on index 'k'
+//
+//     But these conditions can be checked together with conditions
+//     (i < a.len) without a need for a separate block. In summary, the conditions will be:
+//
+//     (a != null) &&
+//     ((i < a.len) & (x < a.len)) &&      <-- Note the bitwise AND here.
+//     (a[i] != null & a[x] != null) &&    <-- Note the bitwise AND here.
+//     (j < a[i].len & y < a[x].len) &&    <-- Note the bitwise AND here.
+//     (a[i][j] != null & a[x][y] != null) <-- Note the bitwise AND here.
+//
+//     This naturally yields a tree style pattern, where the nodes of the tree are
+//     the array and indices respectively.
+//
+//     Example:
+//         a => {
+//             i => {
+//                 j => {
+//                     k => {}
+//                 }
+//             },
+//             x => {
+//                 y => {
+//                     k => {}
+//                 }
+//             }
+//         }
+//
+//         Notice that the variables in the same levels can have their conditions combined in the
+//         same block with a bitwise AND. Whereas, the conditions in consecutive levels will be
+//         combined with a short-circuiting AND (i.e., different basic blocks).
+//
+//  Operation:
+//      Construct a tree of array indices and the array which will generate the optimal
+//      conditions for loop cloning.
+//
+//      a[i][j][k], b[i] and a[i][y][k] are the occurrences in the loop. Then, the tree should be:
+//
+//      a => {
+//          i => {
+//              j => {
+//                  k => {}
+//              },
+//              y => {
+//                  k => {}
+//              },
+//          }
+//      },
+//      b => {
+//          i => {}
+//      }
+//      In this method, we will construct such a tree by descending depth first into the array
+//      index operation and forming a tree structure as we encounter the array or the index variables.
+//
+//      This tree structure will then be used to generate conditions like below:
+//      (a != null) & (b != null) &&       // from the first level of the tree.
+//
+//      (i < a.len) & (i < b.len) &&       // from the second level of the tree. Levels can be combined.
+//      (a[i] != null) & (b[i] != null) && // from the second level of the tree.
+//
+//      (j < a[i].len) & (y < a[i].len) &&       // from the third level.
+//      (a[i][j] != null) & (a[i][y] != null) && // from the third level.
+//
+//      and so on.
+//
+//
+bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* context)
+{
+    ExpandArrayStack<LC_Deref*> nodes(getAllocator());
+    int                         maxRank = -1;
+
+    // Get the dereference-able arrays.
+    ExpandArrayStack<LC_Array>* deref = context->EnsureDerefs(loopNum);
+
+    // For each array in the dereference list, construct a tree,
+    // where the nodes are array and index variables and an edge 'u-v'
+    // exists if a node 'v' indexes node 'u' directly as in u[v] or an edge
+    // 'u-v-w' transitively if u[v][w] occurs.
+    for (unsigned i = 0; i < deref->Size(); ++i)
+    {
+        LC_Array& array = (*deref)[i];
+
+        // First populate the array base variable.
+        LC_Deref* node = LC_Deref::Find(&nodes, array.arrIndex->arrLcl);
+        if (node == nullptr)
+        {
+            node = new (getAllocator()) LC_Deref(array, 0 /*level*/);
+            nodes.Push(node);
+        }
+
+        // For each dimension (level) for the array, populate the tree with the variable
+        // from that dimension.
+        unsigned rank = (unsigned)array.GetDimRank();
+        for (unsigned i = 0; i < rank; ++i)
+        {
+            node->EnsureChildren(getAllocator());
+            LC_Deref* tmp = node->Find(array.arrIndex->indLcls[i]);
+            if (tmp == nullptr)
+            {
+                tmp = new (getAllocator()) LC_Deref(array, node->level + 1);
+                node->children->Push(tmp);
+            }
+
+            // Descend one level down.
+            node = tmp;
+        }
+
+        // Keep the maxRank of all array dereferences.
+        maxRank = max((int)rank, maxRank);
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        for (unsigned i = 0; i < nodes.Size(); ++i)
+        {
+            if (i != 0)
+            {
+                printf(",");
+            }
+            nodes[i]->Print();
+            printf("\n");
+        }
+    }
+#endif
+
+    if (maxRank == -1)
+    {
+        return false;
+    }
+
+    // First level will always yield the null-check, since it is made of the array base variables.
+    // All other levels (dimensions) will yield two conditions ex: (i < a.length && a[i] != null)
+    // So add 1 after rank * 2.
+    unsigned condBlocks = (unsigned)maxRank * 2 + 1;
+
+    // Heuristic to not create too many blocks;
+    if (condBlocks > 4)
+    {
+        return false;
+    }
+
+    // Derive conditions into an 'array of level x array of conditions' i.e., levelCond[levels][conds]
+    ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->EnsureBlockConditions(loopNum, condBlocks);
+    for (unsigned i = 0; i < nodes.Size(); ++i)
+    {
+        nodes[i]->DeriveLevelConditions(levelCond);
+    }
+
+    DBEXEC(verbose, context->PrintBlockConditions(loopNum));
+    return true;
+}
+
+#ifdef DEBUG
+//----------------------------------------------------------------------------
+// optDebugLogLoopCloning:  Insert a call to jithelper that prints a message.
+//
+// Arguments:
+//      block        - the block in which the helper call needs to be inserted.
+//      insertBefore - the tree before which the helper call will be inserted.
+//
+void Compiler::optDebugLogLoopCloning(BasicBlock* block, GenTreePtr insertBefore)
+{
+    if (JitConfig.JitDebugLogLoopCloning() == 0)
+    {
+        return;
+    }
+    GenTreePtr logCall = gtNewHelperCallNode(CORINFO_HELP_DEBUG_LOG_LOOP_CLONING, TYP_VOID);
+    GenTreePtr stmt    = fgNewStmtFromTree(logCall);
+    fgInsertStmtBefore(block, insertBefore, stmt);
+    fgMorphBlockStmt(block, stmt DEBUGARG("Debug log loop cloning"));
+}
+#endif
+
+//------------------------------------------------------------------------
+// optPerformStaticOptimizations: Perform the optimizations for the optimization
+//      candidates gathered during the cloning phase.
+//
+// Arguments:
+//     loopNum     -  the current loop index for which the optimizations are performed.
+//     context     -  data structure where all loop cloning info is kept.
+//     dynamicPath -  If true, the optimization is performed in the fast path among the
+//                    cloned loops. If false, it means this is the only path (i.e.,
+//                    there is no slow path.)
+//
+// Operation:
+//      Perform the optimizations on the fast path i.e., the path in which the
+//      optimization candidates were collected at the time of identifying them.
+//      The candidates store all the information necessary (the tree/stmt/block
+//      they are from) to perform the optimization.
+//
+// Assumption:
+//      The unoptimized path is either already cloned when this method is called or
+//      there is no unoptimized path (got eliminated statically.) So this method
+//      performs the optimizations assuming that the path in which the candidates
+//      were collected is the fast path in which the optimizations will be performed.
+//
+void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* context DEBUGARG(bool dynamicPath))
+{
+    ExpandArrayStack<LcOptInfo*>* optInfos = context->GetLoopOptInfo(loopNum);
+    for (unsigned i = 0; i < optInfos->Size(); ++i)
+    {
+        LcOptInfo* optInfo = optInfos->GetRef(i);
+        switch (optInfo->GetOptType())
+        {
+            case LcOptInfo::LcJaggedArray:
+            {
+                LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo();
+                compCurBB                          = arrIndexInfo->arrIndex.useBlock;
+                optRemoveRangeCheck(arrIndexInfo->arrIndex.bndsChks[arrIndexInfo->dim], arrIndexInfo->stmt, true,
+                                    GTF_ASG, true);
+                DBEXEC(dynamicPath, optDebugLogLoopCloning(arrIndexInfo->arrIndex.useBlock, arrIndexInfo->stmt));
+            }
+            break;
+            case LcOptInfo::LcMdArray:
+                // TODO-CQ: CLONE: Implement.
+                break;
+            default:
+                break;
+        }
+    }
+}
+
+//----------------------------------------------------------------------------
+//  optCanCloneLoops: Use the environment flag to determine whether loop
+//      cloning is allowed to be performed.
+//
+//  Return Value:
+//      Returns true in debug builds if COMPlus_JitCloneLoops flag is set.
+//      Disabled for retail for now.
+//
+bool Compiler::optCanCloneLoops()
+{
+    // Enabled for retail builds now.
+    unsigned cloneLoopsFlag = 1;
+#ifdef DEBUG
+    cloneLoopsFlag = JitConfig.JitCloneLoops();
+#endif
+    return (cloneLoopsFlag != 0);
+}
+
+//----------------------------------------------------------------------------
+//  optIsLoopClonable: Determine whether this loop can be cloned.
+//
+//  Arguments:
+//      loopInd     loop index which needs to be checked if it can be cloned.
+//
+//  Return Value:
+//      Returns true if the loop can be cloned. If it returns false
+//      prints a message in debug as why the loop can't be cloned.
+//
+bool Compiler::optIsLoopClonable(unsigned loopInd)
+{
+    // First, for now, make sure the loop doesn't have any embedded exception handling -- I don't want to tackle
+    // inserting new EH regions in the exception table yet.
+    BasicBlock* stopAt       = optLoopTable[loopInd].lpBottom->bbNext;
+    unsigned    loopRetCount = 0;
+    for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != stopAt; blk = blk->bbNext)
+    {
+        if (blk->bbJumpKind == BBJ_RETURN)
+        {
+            loopRetCount++;
+        }
+        if (bbIsTryBeg(blk))
+        {
+            JITDUMP("Loop cloning: rejecting loop %d in %s, because it has a try begin.\n", loopInd, info.compFullName);
+            return false;
+        }
+    }
+
+    // Is the entry block a handler or filter start?  If so, then if we cloned, we could create a jump
+    // into the middle of a handler (to go to the cloned copy.)  Reject.
+    if (bbIsHandlerBeg(optLoopTable[loopInd].lpEntry))
+    {
+        JITDUMP("Loop cloning: rejecting loop because entry block is a handler start.\n");
+        return false;
+    }
+
+    // If the head and entry are in different EH regions, reject.
+    if (!BasicBlock::sameEHRegion(optLoopTable[loopInd].lpHead, optLoopTable[loopInd].lpEntry))
+    {
+        JITDUMP("Loop cloning: rejecting loop because head and entry blocks are in different EH regions.\n");
+        return false;
+    }
+
+    // Is the first block after the last block of the loop a handler or filter start?
+    // Usually, we create a dummy block after the orginal loop, to skip over the loop clone
+    // and go to where the original loop did.  That raises problems when we don't actually go to
+    // that block; this is one of those cases.  This could be fixed fairly easily; for example,
+    // we could add a dummy nop block after the (cloned) loop bottom, in the same handler scope as the
+    // loop.  This is just a corner to cut to get this working faster.
+    BasicBlock* bbAfterLoop = optLoopTable[loopInd].lpBottom->bbNext;
+    if (bbAfterLoop != nullptr && bbIsHandlerBeg(bbAfterLoop))
+    {
+        JITDUMP("Loop cloning: rejecting loop because next block after bottom is a handler start.\n");
+        return false;
+    }
+
+    // We've previously made a decision whether to have separate return epilogs, or branch to one.
+    // There's a GCInfo limitation in the x86 case, so that there can be no more than 4 separate epilogs.
+    // (I thought this was x86-specific, but it's not if-d.  On other architectures, the decision should be made as a
+    // heuristic tradeoff; perhaps we're just choosing to live with 4 as the limit.)
+    if (fgReturnCount + loopRetCount > 4)
+    {
+        JITDUMP("Loop cloning: rejecting loop because it has %d returns; if added to previously-existing %d returns, "
+                "would exceed the limit of 4.\n",
+                loopRetCount, fgReturnCount);
+        return false;
+    }
+
+    // Otherwise, we're going to add those return blocks.
+    fgReturnCount += loopRetCount;
+
+    return true;
+}
+
+/*****************************************************************************
+ *
+ *  Identify loop cloning opportunities, derive loop cloning conditions,
+ *  perform loop cloning, use the derived conditions to choose which
+ *  path to take.
+ */
+void Compiler::optCloneLoops()
+{
+    JITDUMP("\n*************** In optCloneLoops()\n");
+    if (optLoopCount == 0 || !optCanCloneLoops())
+    {
+        return;
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("Blocks/Trees at start of phase\n");
+        fgDispBasicBlocks(true);
+    }
+#endif
+
+    LoopCloneContext context(optLoopCount, getAllocator());
+
+    // Obtain array optimization candidates in the context.
+    optObtainLoopCloningOpts(&context);
+
+    // For each loop, derive cloning conditions for the optimization candidates.
+    for (unsigned i = 0; i < optLoopCount; ++i)
+    {
+        ExpandArrayStack<LcOptInfo*>* optInfos = context.GetLoopOptInfo(i);
+        if (optInfos == nullptr)
+        {
+            continue;
+        }
+
+        if (!optDeriveLoopCloningConditions(i, &context) || !optComputeDerefConditions(i, &context))
+        {
+            JITDUMP("> Conditions could not be obtained\n");
+            context.CancelLoopOptInfo(i);
+        }
+        else
+        {
+            bool allTrue  = false;
+            bool anyFalse = false;
+            context.EvaluateConditions(i, &allTrue, &anyFalse DEBUGARG(verbose));
+            if (anyFalse)
+            {
+                context.CancelLoopOptInfo(i);
+            }
+            if (allTrue)
+            {
+                // Perform static optimizations on the fast path since we always
+                // have to take the cloned path.
+                optPerformStaticOptimizations(i, &context DEBUGARG(false));
+
+                // No need to clone.
+                context.CancelLoopOptInfo(i);
+            }
+        }
+    }
+
+#if 0
+    // The code in this #if has been useful in debugging loop cloning issues, by
+    // enabling selective enablement of the loop cloning optimization according to
+    // method hash.
+#ifdef DEBUG
+    unsigned methHash = info.compMethodHash();
+    char* lostr = getenv("loopclonehashlo");
+    unsigned methHashLo = 0;
+    if (lostr != NULL) 
+    {
+        sscanf_s(lostr, "%x", &methHashLo);
+        // methHashLo = (unsigned(atoi(lostr)) << 2);  // So we don't have to use negative numbers.
+    }
+    char* histr = getenv("loopclonehashhi");
+    unsigned methHashHi = UINT32_MAX;
+    if (histr != NULL) 
+    {
+        sscanf_s(histr, "%x", &methHashHi);
+        // methHashHi = (unsigned(atoi(histr)) << 2);  // So we don't have to use negative numbers.
+    }
+    if (methHash < methHashLo || methHash > methHashHi)
+        return;
+#endif
+#endif
+
+    for (unsigned i = 0; i < optLoopCount; ++i)
+    {
+        if (context.GetLoopOptInfo(i) != nullptr)
+        {
+            optLoopsCloned++;
+            context.OptimizeConditions(i DEBUGARG(verbose));
+            context.OptimizeBlockConditions(i DEBUGARG(verbose));
+            optCloneLoop(i, &context);
+        }
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nAfter loop cloning:\n");
+        fgDispBasicBlocks(/*dumpTrees*/ true);
+    }
+#endif
+}
+
+void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
+{
+    assert(loopInd < optLoopCount);
+
+    JITDUMP("\nCloning loop %d: [h: %d, f: %d, t: %d, e: %d, b: %d].\n", loopInd, optLoopTable[loopInd].lpHead->bbNum,
+            optLoopTable[loopInd].lpFirst->bbNum, optLoopTable[loopInd].lpTop->bbNum,
+            optLoopTable[loopInd].lpEntry->bbNum, optLoopTable[loopInd].lpBottom->bbNum);
+
+    // Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks).
+    unsigned depth         = optLoopDepth(loopInd);
+    unsigned ambientWeight = 1;
+    for (unsigned j = 0; j < depth; j++)
+    {
+        unsigned lastWeight = ambientWeight;
+        ambientWeight *= BB_LOOP_WEIGHT;
+        // If the multiplication overflowed, stick at max.
+        // (Strictly speaking, a multiplication could overflow and still have a result
+        // that is >= lastWeight...but if so, the original weight must be pretty large,
+        // and it got bigger, so that's OK.)
+        if (ambientWeight < lastWeight)
+        {
+            ambientWeight = BB_MAX_WEIGHT;
+            break;
+        }
+    }
+
+    // If we're in a non-natural loop, the ambient weight might be higher than we computed above.
+    // Be safe by taking the max with the head block's weight.
+    ambientWeight = max(ambientWeight, optLoopTable[loopInd].lpHead->bbWeight);
+
+    // This is the containing loop, if any -- to label any blocks we create that are outside
+    // the loop being cloned.
+    unsigned char ambientLoop = optLoopTable[loopInd].lpParent;
+
+    // First, make sure that the loop has a unique header block, creating an empty one if necessary.
+    optEnsureUniqueHead(loopInd, ambientWeight);
+
+    // We're going to make
+
+    // H --> E
+    // F
+    // T
+    // E
+    // B  ?-> T
+    // X
+    //
+    //   become
+    //
+    // H ?-> E2
+    // H2--> E    (Optional; if E == T == F, let H fall through to F/T/E)
+    // F
+    // T
+    // E
+    // B  ?-> T
+    // X2--> X
+    // F2
+    // T2
+    // E2
+    // B2 ?-> T2
+    // X
+
+    BasicBlock* h = optLoopTable[loopInd].lpHead;
+    if (h->bbJumpKind != BBJ_NONE && h->bbJumpKind != BBJ_ALWAYS)
+    {
+        // Make a new block to be the unique entry to the loop.
+        assert(h->bbJumpKind == BBJ_COND && h->bbNext == optLoopTable[loopInd].lpEntry);
+        BasicBlock* newH = fgNewBBafter(BBJ_NONE, h,
+                                        /*extendRegion*/ true);
+        newH->bbWeight = (newH->isRunRarely() ? 0 : ambientWeight);
+        BlockSetOps::Assign(this, newH->bbReach, h->bbReach);
+        // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
+        newH->bbNatLoopNum = ambientLoop;
+        h                  = newH;
+        optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h);
+    }
+
+    // First, make X2 after B, if necessary.  (Not necessary if b is a BBJ_ALWAYS.)
+    // "newPred" will be the predecessor of the blocks of the cloned loop.
+    BasicBlock* b       = optLoopTable[loopInd].lpBottom;
+    BasicBlock* newPred = b;
+    if (b->bbJumpKind != BBJ_ALWAYS)
+    {
+        BasicBlock* x = b->bbNext;
+        if (x != nullptr)
+        {
+            BasicBlock* x2 = fgNewBBafter(BBJ_ALWAYS, b, /*extendRegion*/ true);
+            x2->bbWeight   = (x2->isRunRarely() ? 0 : ambientWeight);
+
+            // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
+            x2->bbNatLoopNum = ambientLoop;
+
+            x2->bbJumpDest = x;
+            BlockSetOps::Assign(this, x2->bbReach, h->bbReach);
+            newPred = x2;
+        }
+    }
+
+    // Now we'll make "h2", after "h" to go to "e" -- unless the loop is a do-while,
+    // so that "h" already falls through to "e" (e == t == f).
+    BasicBlock* h2 = nullptr;
+    if (optLoopTable[loopInd].lpHead->bbNext != optLoopTable[loopInd].lpEntry)
+    {
+        BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, optLoopTable[loopInd].lpHead,
+                                      /*extendRegion*/ true);
+        h2->bbWeight = (h2->isRunRarely() ? 0 : ambientWeight);
+
+        // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
+        h2->bbNatLoopNum = ambientLoop;
+
+        h2->bbJumpDest = optLoopTable[loopInd].lpEntry;
+        optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
+    }
+
+    // Now we'll clone the blocks of the loop body.
+    BasicBlock* newFirst = nullptr;
+    BasicBlock* newBot   = nullptr;
+
+    BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
+    for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext;
+         blk             = blk->bbNext)
+    {
+        BasicBlock* newBlk = fgNewBBafter(blk->bbJumpKind, newPred,
+                                          /*extendRegion*/ true);
+
+        BasicBlock::CloneBlockState(this, newBlk, blk);
+        // TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect.  Eventually, we should probably insert
+        // the cloned loop in the loop table.  For now, however, we'll just make these blocks be part of the surrounding
+        // loop, if one exists -- the parent of the loop we're cloning.
+        newBlk->bbNatLoopNum = optLoopTable[loopInd].lpParent;
+
+        if (newFirst == nullptr)
+        {
+            newFirst = newBlk;
+        }
+        newBot  = newBlk; // Continually overwrite to make sure we get the last one.
+        newPred = newBlk;
+        blockMap->Set(blk, newBlk);
+    }
+
+    // Perform the static optimizations on the fast path.
+    optPerformStaticOptimizations(loopInd, context DEBUGARG(true));
+
+    // Now go through the new blocks, remapping their jump targets within the loop.
+    for (BasicBlock* blk = optLoopTable[loopInd].lpFirst; blk != optLoopTable[loopInd].lpBottom->bbNext;
+         blk             = blk->bbNext)
+    {
+
+        BasicBlock* newblk = nullptr;
+        bool        b      = blockMap->Lookup(blk, &newblk);
+        assert(b && newblk != nullptr);
+
+        assert(blk->bbJumpKind == newblk->bbJumpKind);
+
+        // First copy the jump destination(s) from "blk".
+        optCopyBlkDest(blk, newblk);
+
+        // Now redirect the new block according to "blockMap".
+        optRedirectBlock(newblk, blockMap);
+    }
+
+    assert((h->bbJumpKind == BBJ_NONE && (h->bbNext == h2 || h->bbNext == optLoopTable[loopInd].lpEntry)) ||
+           (h->bbJumpKind == BBJ_ALWAYS));
+
+    // If all the conditions are true, go to E2.
+    BasicBlock* e2      = nullptr;
+    bool        foundIt = blockMap->Lookup(optLoopTable[loopInd].lpEntry, &e2);
+
+    h->bbJumpKind = BBJ_COND;
+
+    // We will create the following structure
+    //
+    // cond0 (in h)  -?> cond1
+    // slow          --> e2 (slow) always
+    // !cond1        -?> slow
+    // !cond2        -?> slow
+    // ...
+    // !condn        -?> slow
+    // h2/entry (fast)
+    //
+    // We should always have block conditions, at the minimum, the array should be deref-able
+    assert(context->HasBlockConditions(loopInd));
+
+    // Create a unique header for the slow path.
+    BasicBlock* slowHead   = fgNewBBafter(BBJ_ALWAYS, h, true);
+    slowHead->bbWeight     = (h->isRunRarely() ? 0 : ambientWeight);
+    slowHead->bbNatLoopNum = ambientLoop;
+    slowHead->bbJumpDest   = e2;
+
+    BasicBlock* condLast = optInsertLoopChoiceConditions(context, loopInd, h, slowHead);
+    condLast->bbJumpDest = slowHead;
+
+    // If h2 is present it is already the head or replace 'h' by 'condLast'.
+    if (h2 == nullptr)
+    {
+        optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, condLast);
+    }
+    assert(foundIt && e2 != nullptr);
+
+    fgUpdateChangedFlowGraph();
+}
+
+//--------------------------------------------------------------------------------------------------
+// optInsertLoopChoiceConditions - Insert the loop conditions for a loop between loop head and entry
+//
+// Arguments:
+//      context     loop cloning context variable
+//      loopNum     the loop index
+//      head        loop head for "loopNum"
+//      slowHead    the slow path loop head
+//
+// Return Values:
+//      None.
+//
+// Operation:
+//      Create the following structure.
+//
+//      Note below that the cond0 is inverted in head i.e., if true jump to cond1. This is because
+//      condn cannot jtrue to loop head h2. It has to be from a direct pred block.
+//
+//      cond0 (in h)  -?> cond1
+//      slowHead      --> e2 (slowHead) always
+//      !cond1        -?> slowHead
+//      !cond2        -?> slowHead
+//      ...
+//      !condn        -?> slowHead
+//      h2/entry (fast)
+//
+//      Insert condition 0 in 'h' and create other condition blocks and insert conditions in them.
+//
+BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context,
+                                                    unsigned          loopNum,
+                                                    BasicBlock*       head,
+                                                    BasicBlock*       slowHead)
+{
+    JITDUMP("Inserting loop cloning conditions\n");
+    assert(context->HasBlockConditions(loopNum));
+
+    BasicBlock*                                        curCond   = head;
+    ExpandArrayStack<ExpandArrayStack<LC_Condition>*>* levelCond = context->GetBlockConditions(loopNum);
+    for (unsigned i = 0; i < levelCond->Size(); ++i)
+    {
+        bool isHeaderBlock = (curCond == head);
+
+        // Flip the condition if header block.
+        context->CondToStmtInBlock(this, *((*levelCond)[i]), curCond, isHeaderBlock);
+
+        // Create each condition block ensuring wiring between them.
+        BasicBlock* tmp     = fgNewBBafter(BBJ_COND, isHeaderBlock ? slowHead : curCond, true);
+        curCond->bbJumpDest = isHeaderBlock ? tmp : slowHead;
+        curCond             = tmp;
+
+        curCond->inheritWeight(head);
+        curCond->bbNatLoopNum = head->bbNatLoopNum;
+        JITDUMP("Created new block %02d for new level\n", curCond->bbNum);
+    }
+
+    // Finally insert cloning conditions after all deref conditions have been inserted.
+    context->CondToStmtInBlock(this, *(context->GetConditions(loopNum)), curCond, false);
+    return curCond;
+}
+
+void Compiler::optEnsureUniqueHead(unsigned loopInd, unsigned ambientWeight)
+{
+    BasicBlock* h = optLoopTable[loopInd].lpHead;
+    BasicBlock* t = optLoopTable[loopInd].lpTop;
+    BasicBlock* e = optLoopTable[loopInd].lpEntry;
+    BasicBlock* b = optLoopTable[loopInd].lpBottom;
+
+    // If "h" dominates the entry block, then it is the unique header.
+    if (fgDominate(h, e))
+    {
+        return;
+    }
+
+    // Otherwise, create a new empty header block, make it the pred of the entry block,
+    // and redirect the preds of the entry block to go to this.
+
+    BasicBlock* beforeTop = t->bbPrev;
+    // Make sure that the new block is in the same region as the loop.
+    // (We will only create loops that are entirely within a region.)
+    BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, beforeTop, true);
+    // This is in the containing loop.
+    h2->bbNatLoopNum = optLoopTable[loopInd].lpParent;
+    h2->bbWeight     = (h2->isRunRarely() ? 0 : ambientWeight);
+
+    // We don't care where it was put; splice it between beforeTop and top.
+    if (beforeTop->bbNext != h2)
+    {
+        h2->bbPrev->setNext(h2->bbNext); // Splice h2 out.
+        beforeTop->setNext(h2);          // Splice h2 in, between beforeTop and t.
+        h2->setNext(t);
+    }
+
+    if (h2->bbNext != e)
+    {
+        h2->bbJumpKind = BBJ_ALWAYS;
+        h2->bbJumpDest = e;
+    }
+    BlockSetOps::Assign(this, h2->bbReach, e->bbReach);
+
+    // Redirect paths from preds of "e" to go to "h2" instead of "e".
+    BlockToBlockMap* blockMap = new (getAllocator()) BlockToBlockMap(getAllocator());
+    blockMap->Set(e, h2);
+
+    for (flowList* predEntry = e->bbPreds; predEntry; predEntry = predEntry->flNext)
+    {
+        BasicBlock* predBlock = predEntry->flBlock;
+
+        // Skip if predBlock is in the loop.
+        if (t->bbNum <= predBlock->bbNum && predBlock->bbNum <= b->bbNum)
+        {
+            continue;
+        }
+        optRedirectBlock(predBlock, blockMap);
+    }
+
+    optUpdateLoopHead(loopInd, optLoopTable[loopInd].lpHead, h2);
+}
+
+/*****************************************************************************
+ *
+ *  Determine the kind of interference for the call.
+ */
+
+/* static */ inline Compiler::callInterf Compiler::optCallInterf(GenTreePtr call)
+{
+    assert(call->gtOper == GT_CALL);
+
+    // if not a helper, kills everything
+    if (call->gtCall.gtCallType != CT_HELPER)
+    {
+        return CALLINT_ALL;
+    }
+
+    // setfield and array address store kill all indirections
+    switch (eeGetHelperNum(call->gtCall.gtCallMethHnd))
+    {
+        case CORINFO_HELP_ASSIGN_REF:         // Not strictly needed as we don't make a GT_CALL with this
+        case CORINFO_HELP_CHECKED_ASSIGN_REF: // Not strictly needed as we don't make a GT_CALL with this
+        case CORINFO_HELP_ASSIGN_BYREF:       // Not strictly needed as we don't make a GT_CALL with this
+        case CORINFO_HELP_SETFIELDOBJ:
+        case CORINFO_HELP_ARRADDR_ST:
+
+            return CALLINT_REF_INDIRS;
+
+        case CORINFO_HELP_SETFIELDFLOAT:
+        case CORINFO_HELP_SETFIELDDOUBLE:
+        case CORINFO_HELP_SETFIELD8:
+        case CORINFO_HELP_SETFIELD16:
+        case CORINFO_HELP_SETFIELD32:
+        case CORINFO_HELP_SETFIELD64:
+
+            return CALLINT_SCL_INDIRS;
+
+        case CORINFO_HELP_ASSIGN_STRUCT: // Not strictly needed as we don't use this in Jit32
+        case CORINFO_HELP_MEMSET:        // Not strictly needed as we don't make a GT_CALL with this
+        case CORINFO_HELP_MEMCPY:        // Not strictly needed as we don't make a GT_CALL with this
+        case CORINFO_HELP_SETFIELDSTRUCT:
+
+            return CALLINT_ALL_INDIRS;
+
+        default:
+            break;
+    }
+
+    // other helpers kill nothing
+    return CALLINT_NONE;
+}
+
+/*****************************************************************************
+ *
+ *  See if the given tree can be computed in the given precision (which must
+ *  be smaller than the type of the tree for this to make sense). If 'doit'
+ *  is false, we merely check to see whether narrowing is possible; if we
+ *  get called with 'doit' being true, we actually perform the narrowing.
+ */
+
+bool Compiler::optNarrowTree(GenTreePtr tree, var_types srct, var_types dstt, ValueNumPair vnpNarrow, bool doit)
+{
+    genTreeOps oper;
+    unsigned   kind;
+
+    noway_assert(tree);
+    noway_assert(genActualType(tree->gtType) == genActualType(srct));
+
+    /* Assume we're only handling integer types */
+    noway_assert(varTypeIsIntegral(srct));
+    noway_assert(varTypeIsIntegral(dstt));
+
+    unsigned srcSize = genTypeSize(srct);
+    unsigned dstSize = genTypeSize(dstt);
+
+    /* dstt must be smaller than srct to narrow */
+    if (dstSize >= srcSize)
+    {
+        return false;
+    }
+
+    /* Figure out what kind of a node we have */
+    oper = tree->OperGet();
+    kind = tree->OperKind();
+
+    if (kind & GTK_ASGOP)
+    {
+        noway_assert(doit == false);
+        return false;
+    }
+
+    ValueNumPair NoVNPair = ValueNumPair();
+
+    if (kind & GTK_LEAF)
+    {
+        switch (oper)
+        {
+            /* Constants can usually be narrowed by changing their value */
+            CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef _TARGET_64BIT_
+            __int64 lval;
+            __int64 lmask;
+
+            case GT_CNS_LNG:
+                lval  = tree->gtIntConCommon.LngValue();
+                lmask = 0;
+
+                switch (dstt)
+                {
+                    case TYP_BYTE:
+                        lmask = 0x0000007F;
+                        break;
+                    case TYP_BOOL:
+                    case TYP_UBYTE:
+                        lmask = 0x000000FF;
+                        break;
+                    case TYP_SHORT:
+                        lmask = 0x00007FFF;
+                        break;
+                    case TYP_CHAR:
+                        lmask = 0x0000FFFF;
+                        break;
+                    case TYP_INT:
+                        lmask = 0x7FFFFFFF;
+                        break;
+                    case TYP_UINT:
+                        lmask = 0xFFFFFFFF;
+                        break;
+
+                    default:
+                        return false;
+                }
+
+                if ((lval & lmask) != lval)
+                    return false;
+
+                if (doit)
+                {
+                    tree->ChangeOperConst(GT_CNS_INT);
+                    tree->gtType             = TYP_INT;
+                    tree->gtIntCon.gtIconVal = (int)lval;
+                    if (vnStore != nullptr)
+                    {
+                        fgValueNumberTreeConst(tree);
+                    }
+                }
+
+                return true;
+#endif
+
+            case GT_CNS_INT:
+
+                ssize_t ival;
+                ival = tree->gtIntCon.gtIconVal;
+                ssize_t imask;
+                imask = 0;
+
+                switch (dstt)
+                {
+                    case TYP_BYTE:
+                        imask = 0x0000007F;
+                        break;
+                    case TYP_BOOL:
+                    case TYP_UBYTE:
+                        imask = 0x000000FF;
+                        break;
+                    case TYP_SHORT:
+                        imask = 0x00007FFF;
+                        break;
+                    case TYP_CHAR:
+                        imask = 0x0000FFFF;
+                        break;
+#ifdef _TARGET_64BIT_
+                    case TYP_INT:
+                        imask = 0x7FFFFFFF;
+                        break;
+                    case TYP_UINT:
+                        imask = 0xFFFFFFFF;
+                        break;
+#endif // _TARGET_64BIT_
+                    default:
+                        return false;
+                }
+
+                if ((ival & imask) != ival)
+                {
+                    return false;
+                }
+
+#ifdef _TARGET_64BIT_
+                if (doit)
+                {
+                    tree->gtType             = TYP_INT;
+                    tree->gtIntCon.gtIconVal = (int)ival;
+                    if (vnStore != nullptr)
+                    {
+                        fgValueNumberTreeConst(tree);
+                    }
+                }
+#endif // _TARGET_64BIT_
+
+                return true;
+
+            /* Operands that are in memory can usually be narrowed
+               simply by changing their gtType */
+
+            case GT_LCL_VAR:
+                /* We only allow narrowing long -> int for a GT_LCL_VAR */
+                if (dstSize == sizeof(int))
+                {
+                    goto NARROW_IND;
+                }
+                break;
+
+            case GT_CLS_VAR:
+            case GT_LCL_FLD:
+                goto NARROW_IND;
+            default:
+                break;
+        }
+
+        noway_assert(doit == false);
+        return false;
+    }
+
+    if (kind & (GTK_BINOP | GTK_UNOP))
+    {
+        GenTreePtr op1;
+        op1 = tree->gtOp.gtOp1;
+        GenTreePtr op2;
+        op2 = tree->gtOp.gtOp2;
+
+        switch (tree->gtOper)
+        {
+            case GT_AND:
+                noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
+
+                // Is op2 a small constant than can be narrowed into dstt?
+                // if so the result of the GT_AND will also fit into 'dstt' and can be narrowed
+                if ((op2->gtOper == GT_CNS_INT) && optNarrowTree(op2, srct, dstt, NoVNPair, false))
+                {
+                    // We will change the type of the tree and narrow op2
+                    //
+                    if (doit)
+                    {
+                        tree->gtType = genActualType(dstt);
+                        tree->SetVNs(vnpNarrow);
+
+                        optNarrowTree(op2, srct, dstt, NoVNPair, true);
+                        // We may also need to cast away the upper bits of op1
+                        if (srcSize == 8)
+                        {
+                            assert(tree->gtType == TYP_INT);
+                            op1 = gtNewCastNode(TYP_INT, op1, TYP_INT);
+#ifdef DEBUG
+                            op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
+#endif
+                            tree->gtOp.gtOp1 = op1;
+                        }
+                    }
+                    return true;
+                }
+
+                goto COMMON_BINOP;
+
+            case GT_ADD:
+            case GT_MUL:
+
+                if (tree->gtOverflow() || varTypeIsSmall(dstt))
+                {
+                    noway_assert(doit == false);
+                    return false;
+                }
+                __fallthrough;
+
+            case GT_OR:
+            case GT_XOR:
+            COMMON_BINOP:
+                noway_assert(genActualType(tree->gtType) == genActualType(op1->gtType));
+                noway_assert(genActualType(tree->gtType) == genActualType(op2->gtType));
+
+                if (gtIsActiveCSE_Candidate(op1) || gtIsActiveCSE_Candidate(op2) ||
+                    !optNarrowTree(op1, srct, dstt, NoVNPair, doit) || !optNarrowTree(op2, srct, dstt, NoVNPair, doit))
+                {
+                    noway_assert(doit == false);
+                    return false;
+                }
+
+                /* Simply change the type of the tree */
+
+                if (doit)
+                {
+                    if (tree->gtOper == GT_MUL && (tree->gtFlags & GTF_MUL_64RSLT))
+                    {
+                        tree->gtFlags &= ~GTF_MUL_64RSLT;
+                    }
+
+                    tree->gtType = genActualType(dstt);
+                    tree->SetVNs(vnpNarrow);
+                }
+
+                return true;
+
+            case GT_IND:
+
+            NARROW_IND:
+                /* Simply change the type of the tree */
+
+                if (doit && (dstSize <= genTypeSize(tree->gtType)))
+                {
+                    tree->gtType = genSignedType(dstt);
+                    tree->SetVNs(vnpNarrow);
+
+                    /* Make sure we don't mess up the variable type */
+                    if ((oper == GT_LCL_VAR) || (oper == GT_LCL_FLD))
+                    {
+                        tree->gtFlags |= GTF_VAR_CAST;
+                    }
+                }
+
+                return true;
+
+            case GT_EQ:
+            case GT_NE:
+            case GT_LT:
+            case GT_LE:
+            case GT_GT:
+            case GT_GE:
+
+                /* These can always be narrowed since they only represent 0 or 1 */
+                return true;
+
+            case GT_CAST:
+            {
+                var_types cast    = tree->CastToType();
+                var_types oprt    = op1->TypeGet();
+                unsigned  oprSize = genTypeSize(oprt);
+
+                if (cast != srct)
+                {
+                    return false;
+                }
+
+                if (varTypeIsIntegralOrI(dstt) != varTypeIsIntegralOrI(oprt))
+                {
+                    return false;
+                }
+
+                if (tree->gtOverflow())
+                {
+                    return false;
+                }
+
+                /* Is this a cast from the type we're narrowing to or a smaller one? */
+
+                if (oprSize <= dstSize)
+                {
+                    /* Bash the target type of the cast */
+
+                    if (doit)
+                    {
+                        dstt = genSignedType(dstt);
+
+                        if (oprSize == dstSize)
+                        {
+                            // Same size: change the CAST into a NOP
+                            tree->ChangeOper(GT_NOP);
+                            tree->gtType     = dstt;
+                            tree->gtOp.gtOp2 = nullptr;
+                            tree->gtVNPair   = op1->gtVNPair; // Set to op1's ValueNumber
+                        }
+                        else
+                        {
+                            // oprSize is smaller
+                            assert(oprSize < dstSize);
+
+                            // Change the CastToType in the GT_CAST node
+                            tree->CastToType() = dstt;
+
+                            // The result type of a GT_CAST is never a small type.
+                            // Use genActualType to widen dstt when it is a small types.
+                            tree->gtType = genActualType(dstt);
+                            tree->SetVNs(vnpNarrow);
+                        }
+                    }
+
+                    return true;
+                }
+            }
+                return false;
+
+            case GT_COMMA:
+                if (!gtIsActiveCSE_Candidate(op2) && optNarrowTree(op2, srct, dstt, vnpNarrow, doit))
+                {
+                    /* Simply change the type of the tree */
+
+                    if (doit)
+                    {
+                        tree->gtType = genActualType(dstt);
+                        tree->SetVNs(vnpNarrow);
+                    }
+                    return true;
+                }
+                return false;
+
+            default:
+                noway_assert(doit == false);
+                return false;
+        }
+    }
+
+    return false;
+}
+
+/*****************************************************************************
+ *
+ *  The following logic figures out whether the given variable is assigned
+ *  somewhere in a list of basic blocks (or in an entire loop).
+ */
+
+Compiler::fgWalkResult Compiler::optIsVarAssgCB(GenTreePtr* pTree, fgWalkData* data)
+{
+    GenTreePtr tree = *pTree;
+
+    if (tree->OperKind() & GTK_ASGOP)
+    {
+        GenTreePtr dest     = tree->gtOp.gtOp1;
+        genTreeOps destOper = dest->OperGet();
+
+        isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
+        assert(desc && desc->ivaSelf == desc);
+
+        if (destOper == GT_LCL_VAR)
+        {
+            unsigned tvar = dest->gtLclVarCommon.gtLclNum;
+            if (tvar < lclMAX_ALLSET_TRACKED)
+            {
+                AllVarSetOps::AddElemD(data->compiler, desc->ivaMaskVal, tvar);
+            }
+            else
+            {
+                desc->ivaMaskIncomplete = true;
+            }
+
+            if (tvar == desc->ivaVar)
+            {
+                if (tree != desc->ivaSkip)
+                {
+                    return WALK_ABORT;
+                }
+            }
+        }
+        else if (destOper == GT_LCL_FLD)
+        {
+            /* We can't track every field of every var. Moreover, indirections
+               may access different parts of the var as different (but
+               overlapping) fields. So just treat them as indirect accesses */
+
+            // unsigned    lclNum = dest->gtLclFld.gtLclNum;
+            // noway_assert(lvaTable[lclNum].lvAddrTaken);
+
+            varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL;
+            desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
+        }
+        else if (destOper == GT_CLS_VAR)
+        {
+            desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | VR_GLB_VAR);
+        }
+        else if (destOper == GT_IND)
+        {
+            /* Set the proper indirection bits */
+
+            varRefKinds refs = varTypeIsGC(tree->TypeGet()) ? VR_IND_REF : VR_IND_SCL;
+            desc->ivaMaskInd = varRefKinds(desc->ivaMaskInd | refs);
+        }
+    }
+    else if (tree->gtOper == GT_CALL)
+    {
+        isVarAssgDsc* desc = (isVarAssgDsc*)data->pCallbackData;
+        assert(desc && desc->ivaSelf == desc);
+
+        desc->ivaMaskCall = optCallInterf(tree);
+    }
+
+    return WALK_CONTINUE;
+}
+
+/*****************************************************************************/
+
+bool Compiler::optIsVarAssigned(BasicBlock* beg, BasicBlock* end, GenTreePtr skip, unsigned var)
+{
+    bool         result;
+    isVarAssgDsc desc;
+
+    desc.ivaSkip = skip;
+#ifdef DEBUG
+    desc.ivaSelf = &desc;
+#endif
+    desc.ivaVar      = var;
+    desc.ivaMaskCall = CALLINT_NONE;
+    AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
+
+    for (;;)
+    {
+        noway_assert(beg);
+
+        for (GenTreeStmt* stmt = beg->firstStmt(); stmt; stmt = stmt->gtNextStmt)
+        {
+            noway_assert(stmt->gtOper == GT_STMT);
+            if (fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc))
+            {
+                result = true;
+                goto DONE;
+            }
+        }
+
+        if (beg == end)
+        {
+            break;
+        }
+
+        beg = beg->bbNext;
+    }
+
+    result = false;
+
+DONE:
+
+    return result;
+}
+
+/*****************************************************************************/
+int Compiler::optIsSetAssgLoop(unsigned lnum, ALLVARSET_VALARG_TP vars, varRefKinds inds)
+{
+    LoopDsc* loop;
+
+    /* Get hold of the loop descriptor */
+
+    noway_assert(lnum < optLoopCount);
+    loop = optLoopTable + lnum;
+
+    /* Do we already know what variables are assigned within this loop? */
+
+    if (!(loop->lpFlags & LPFLG_ASGVARS_YES))
+    {
+        isVarAssgDsc desc;
+
+        BasicBlock* beg;
+        BasicBlock* end;
+
+        /* Prepare the descriptor used by the tree walker call-back */
+
+        desc.ivaVar  = (unsigned)-1;
+        desc.ivaSkip = nullptr;
+#ifdef DEBUG
+        desc.ivaSelf = &desc;
+#endif
+        AllVarSetOps::AssignNoCopy(this, desc.ivaMaskVal, AllVarSetOps::MakeEmpty(this));
+        desc.ivaMaskInd        = VR_NONE;
+        desc.ivaMaskCall       = CALLINT_NONE;
+        desc.ivaMaskIncomplete = false;
+
+        /* Now walk all the statements of the loop */
+
+        beg = loop->lpHead->bbNext;
+        end = loop->lpBottom;
+
+        for (/**/; /**/; beg = beg->bbNext)
+        {
+            noway_assert(beg);
+
+            for (GenTreeStmt* stmt = beg->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
+            {
+                noway_assert(stmt->gtOper == GT_STMT);
+                fgWalkTreePre(&stmt->gtStmtExpr, optIsVarAssgCB, &desc);
+
+                if (desc.ivaMaskIncomplete)
+                {
+                    loop->lpFlags |= LPFLG_ASGVARS_INC;
+                }
+            }
+
+            if (beg == end)
+            {
+                break;
+            }
+        }
+
+        AllVarSetOps::Assign(this, loop->lpAsgVars, desc.ivaMaskVal);
+        loop->lpAsgInds = desc.ivaMaskInd;
+        loop->lpAsgCall = desc.ivaMaskCall;
+
+        /* Now we know what variables are assigned in the loop */
+
+        loop->lpFlags |= LPFLG_ASGVARS_YES;
+    }
+
+    /* Now we can finally test the caller's mask against the loop's */
+    if (!AllVarSetOps::IsEmptyIntersection(this, loop->lpAsgVars, vars) || (loop->lpAsgInds & inds))
+    {
+        return 1;
+    }
+
+    switch (loop->lpAsgCall)
+    {
+        case CALLINT_ALL:
+
+            /* Can't hoist if the call might have side effect on an indirection. */
+
+            if (loop->lpAsgInds != VR_NONE)
+            {
+                return 1;
+            }
+
+            break;
+
+        case CALLINT_REF_INDIRS:
+
+            /* Can't hoist if the call might have side effect on an ref indirection. */
+
+            if (loop->lpAsgInds & VR_IND_REF)
+            {
+                return 1;
+            }
+
+            break;
+
+        case CALLINT_SCL_INDIRS:
+
+            /* Can't hoist if the call might have side effect on an non-ref indirection. */
+
+            if (loop->lpAsgInds & VR_IND_SCL)
+            {
+                return 1;
+            }
+
+            break;
+
+        case CALLINT_ALL_INDIRS:
+
+            /* Can't hoist if the call might have side effect on any indirection. */
+
+            if (loop->lpAsgInds & (VR_IND_REF | VR_IND_SCL))
+            {
+                return 1;
+            }
+
+            break;
+
+        case CALLINT_NONE:
+
+            /* Other helpers kill nothing */
+
+            break;
+
+        default:
+            noway_assert(!"Unexpected lpAsgCall value");
+    }
+
+    return 0;
+}
+
+void Compiler::optPerformHoistExpr(GenTreePtr origExpr, unsigned lnum)
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nHoisting a copy of ");
+        printTreeID(origExpr);
+        printf(" into PreHeader for loop L%02u <BB%02u..BB%02u>:\n", lnum, optLoopTable[lnum].lpFirst->bbNum,
+               optLoopTable[lnum].lpBottom->bbNum);
+        gtDispTree(origExpr);
+        printf("\n");
+    }
+#endif
+
+    // This loop has to be in a form that is approved for hoisting.
+    assert(optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE);
+
+    // Create a copy of the expression and mark it for CSE's.
+    GenTreePtr hoistExpr = gtCloneExpr(origExpr, GTF_MAKE_CSE);
+
+    // At this point we should have a cloned expression, marked with the GTF_MAKE_CSE flag
+    assert(hoistExpr != origExpr);
+    assert(hoistExpr->gtFlags & GTF_MAKE_CSE);
+
+    GenTreePtr hoist = hoistExpr;
+    // The value of the expression isn't used (unless it's an assignment).
+    if (hoistExpr->OperGet() != GT_ASG)
+    {
+        hoist = gtUnusedValNode(hoistExpr);
+    }
+
+    /* Put the statement in the preheader */
+
+    fgCreateLoopPreHeader(lnum);
+
+    BasicBlock* preHead = optLoopTable[lnum].lpHead;
+    assert(preHead->bbJumpKind == BBJ_NONE);
+
+    // fgMorphTree and lvaRecursiveIncRefCounts requires that compCurBB be the block that contains
+    // (or in this case, will contain) the expression.
+    compCurBB = preHead;
+
+    // Increment the ref counts of any local vars appearing in "hoist".
+    // Note that we need to do this before fgMorphTree() as fgMorph() could constant
+    // fold away some of the lcl vars referenced by "hoist".
+    lvaRecursiveIncRefCounts(hoist);
+
+    hoist = fgMorphTree(hoist);
+
+    GenTreePtr hoistStmt = gtNewStmt(hoist);
+    hoistStmt->gtFlags |= GTF_STMT_CMPADD;
+
+    /* simply append the statement at the end of the preHead's list */
+
+    GenTreePtr treeList = preHead->bbTreeList;
+
+    if (treeList)
+    {
+        /* append after last statement */
+
+        GenTreePtr last = treeList->gtPrev;
+        assert(last->gtNext == nullptr);
+
+        last->gtNext      = hoistStmt;
+        hoistStmt->gtPrev = last;
+        treeList->gtPrev  = hoistStmt;
+    }
+    else
+    {
+        /* Empty pre-header - store the single statement in the block */
+
+        preHead->bbTreeList = hoistStmt;
+        hoistStmt->gtPrev   = hoistStmt;
+    }
+
+    hoistStmt->gtNext = nullptr;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("This hoisted copy placed in PreHeader (BB%02u):\n", preHead->bbNum);
+        gtDispTree(hoist);
+    }
+#endif
+
+    if (fgStmtListThreaded)
+    {
+        gtSetStmtInfo(hoistStmt);
+        fgSetStmtSeq(hoistStmt);
+    }
+
+#ifdef DEBUG
+    if (m_nodeTestData != nullptr)
+    {
+
+        // What is the depth of the loop "lnum"?
+        ssize_t  depth    = 0;
+        unsigned lnumIter = lnum;
+        while (optLoopTable[lnumIter].lpParent != BasicBlock::NOT_IN_LOOP)
+        {
+            depth++;
+            lnumIter = optLoopTable[lnumIter].lpParent;
+        }
+
+        NodeToTestDataMap* testData = GetNodeTestData();
+
+        TestLabelAndNum tlAndN;
+        if (testData->Lookup(origExpr, &tlAndN) && tlAndN.m_tl == TL_LoopHoist)
+        {
+            if (tlAndN.m_num == -1)
+            {
+                printf("Node ");
+                printTreeID(origExpr);
+                printf(" was declared 'do not hoist', but is being hoisted.\n");
+                assert(false);
+            }
+            else if (tlAndN.m_num != depth)
+            {
+                printf("Node ");
+                printTreeID(origExpr);
+                printf(" was declared as hoistable from loop at nesting depth %d; actually hoisted from loop at depth "
+                       "%d.\n",
+                       tlAndN.m_num, depth);
+                assert(false);
+            }
+            else
+            {
+                // We've correctly hoisted this, so remove the annotation.  Later, we'll check for any remaining "must
+                // hoist" annotations.
+                testData->Remove(origExpr);
+                // Now we insert an annotation to make sure that "hoistExpr" is actually CSE'd.
+                tlAndN.m_tl  = TL_CSE_Def;
+                tlAndN.m_num = m_loopHoistCSEClass++;
+                testData->Set(hoistExpr, tlAndN);
+            }
+        }
+    }
+#endif
+
+#if LOOP_HOIST_STATS
+    if (!m_curLoopHasHoistedExpression)
+    {
+        m_loopsWithHoistedExpressions++;
+        m_curLoopHasHoistedExpression = true;
+    }
+    m_totalHoistedExpressions++;
+#endif // LOOP_HOIST_STATS
+}
+
+void Compiler::optHoistLoopCode()
+{
+    // If we don't have any loops in the method then take an early out now.
+    if (optLoopCount == 0)
+    {
+        return;
+    }
+
+#ifdef DEBUG
+    unsigned jitNoHoist = JitConfig.JitNoHoist();
+    if (jitNoHoist > 0)
+    {
+        return;
+    }
+#endif
+
+#if 0
+    // The code in this #if has been useful in debugging loop cloning issues, by
+    // enabling selective enablement of the loop cloning optimization according to
+    // method hash.
+#ifdef DEBUG
+    unsigned methHash = info.compMethodHash();
+    char* lostr = getenv("loophoisthashlo");
+    unsigned methHashLo = 0;
+    if (lostr != NULL) 
+    {
+        sscanf_s(lostr, "%x", &methHashLo);
+        // methHashLo = (unsigned(atoi(lostr)) << 2);  // So we don't have to use negative numbers.
+    }
+    char* histr = getenv("loophoisthashhi");
+    unsigned methHashHi = UINT32_MAX;
+    if (histr != NULL) 
+    {
+        sscanf_s(histr, "%x", &methHashHi);
+        // methHashHi = (unsigned(atoi(histr)) << 2);  // So we don't have to use negative numbers.
+    }
+    if (methHash < methHashLo || methHash > methHashHi)
+        return;
+    printf("Doing loop hoisting in %s (0x%x).\n", info.compFullName, methHash);
+#endif // DEBUG
+#endif // 0     -- debugging loop cloning issues
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\n*************** In optHoistLoopCode()\n");
+        printf("Blocks/Trees before phase\n");
+        fgDispBasicBlocks(true);
+        printf("");
+    }
+#endif
+
+    // Consider all the loop nests, in outer-to-inner order (thus hoisting expressions outside the largest loop in which
+    // they are invariant.)
+    LoopHoistContext hoistCtxt(this);
+    for (unsigned lnum = 0; lnum < optLoopCount; lnum++)
+    {
+        if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
+        {
+            continue;
+        }
+
+        if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
+        {
+            optHoistLoopNest(lnum, &hoistCtxt);
+        }
+    }
+
+#if DEBUG
+    if (fgModified)
+    {
+        if (verbose)
+        {
+            printf("Blocks/Trees after optHoistLoopCode() modified flowgraph\n");
+            fgDispBasicBlocks(true);
+            printf("");
+        }
+
+        // Make sure that the predecessor lists are accurate
+        fgDebugCheckBBlist();
+    }
+#endif
+
+#ifdef DEBUG
+    // Test Data stuff..
+    // If we have no test data, early out.
+    if (m_nodeTestData == nullptr)
+    {
+        return;
+    }
+    NodeToTestDataMap* testData = GetNodeTestData();
+    for (NodeToTestDataMap::KeyIterator ki = testData->Begin(); !ki.Equal(testData->End()); ++ki)
+    {
+        TestLabelAndNum tlAndN;
+        GenTreePtr      node = ki.Get();
+        bool            b    = testData->Lookup(node, &tlAndN);
+        assert(b);
+        if (tlAndN.m_tl != TL_LoopHoist)
+        {
+            continue;
+        }
+        // Otherwise, it is a loop hoist annotation.
+        assert(tlAndN.m_num < 100); // >= 100 indicates nested static field address, should already have been moved.
+        if (tlAndN.m_num >= 0)
+        {
+            printf("Node ");
+            printTreeID(node);
+            printf(" was declared 'must hoist', but has not been hoisted.\n");
+            assert(false);
+        }
+    }
+#endif // DEBUG
+}
+
+void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt)
+{
+    // Do this loop, then recursively do all nested loops.
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if LOOP_HOIST_STATS
+    // Record stats
+    m_curLoopHasHoistedExpression = false;
+    m_loopsConsidered++;
+#endif // LOOP_HOIST_STATS
+
+    optHoistThisLoop(lnum, hoistCtxt);
+
+    VNSet* hoistedInCurLoop = hoistCtxt->ExtractHoistedInCurLoop();
+
+    if (optLoopTable[lnum].lpChild != BasicBlock::NOT_IN_LOOP)
+    {
+        // Add the ones hoisted in "lnum" to "hoistedInParents" for any nested loops.
+        // TODO-Cleanup: we should have a set abstraction for loops.
+        if (hoistedInCurLoop != nullptr)
+        {
+            for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
+            {
+#ifdef DEBUG
+                bool b;
+                assert(!hoistCtxt->m_hoistedInParentLoops.Lookup(keys.Get(), &b));
+#endif
+                hoistCtxt->m_hoistedInParentLoops.Set(keys.Get(), true);
+            }
+        }
+
+        for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP;
+             child          = optLoopTable[child].lpSibling)
+        {
+            optHoistLoopNest(child, hoistCtxt);
+        }
+
+        // Now remove them.
+        // TODO-Cleanup: we should have a set abstraction for loops.
+        if (hoistedInCurLoop != nullptr)
+        {
+            for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys)
+            {
+                // Note that we asserted when we added these that they hadn't been members, so removing is appropriate.
+                hoistCtxt->m_hoistedInParentLoops.Remove(keys.Get());
+            }
+        }
+    }
+}
+
+void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt)
+{
+    LoopDsc* pLoopDsc = &optLoopTable[lnum];
+
+    /* If loop was removed continue */
+
+    if (pLoopDsc->lpFlags & LPFLG_REMOVED)
+    {
+        return;
+    }
+
+    /* Get the head and tail of the loop */
+
+    BasicBlock* head = pLoopDsc->lpHead;
+    BasicBlock* tail = pLoopDsc->lpBottom;
+    BasicBlock* lbeg = pLoopDsc->lpEntry;
+    BasicBlock* block;
+
+    // We must have a do-while loop
+    if ((pLoopDsc->lpFlags & LPFLG_DO_WHILE) == 0)
+    {
+        return;
+    }
+
+    // The loop-head must dominate the loop-entry.
+    // TODO-CQ: Couldn't we make this true if it's not?
+    if (!fgDominate(head, lbeg))
+    {
+        return;
+    }
+
+    // if lbeg is the start of a new try block then we won't be able to hoist
+    if (!BasicBlock::sameTryRegion(head, lbeg))
+    {
+        return;
+    }
+
+    // We don't bother hoisting when inside of a catch block
+    if ((lbeg->bbCatchTyp != BBCT_NONE) && (lbeg->bbCatchTyp != BBCT_FINALLY))
+    {
+        return;
+    }
+
+    pLoopDsc->lpFlags |= LPFLG_HOISTABLE;
+
+    unsigned begn = lbeg->bbNum;
+    unsigned endn = tail->bbNum;
+
+    // Ensure the per-loop sets/tables are empty.
+    hoistCtxt->m_curLoopVnInvariantCache.RemoveAll();
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("optHoistLoopCode for loop L%02u <BB%02u..BB%02u>:\n", lnum, begn, endn);
+        printf("  Loop body %s a call\n", pLoopDsc->lpContainsCall ? "contains" : "does not contain");
+    }
+#endif
+
+    VARSET_TP VARSET_INIT_NOCOPY(loopVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, pLoopDsc->lpVarUseDef));
+
+    pLoopDsc->lpVarInOutCount    = VarSetOps::Count(this, pLoopDsc->lpVarInOut);
+    pLoopDsc->lpLoopVarCount     = VarSetOps::Count(this, loopVars);
+    pLoopDsc->lpHoistedExprCount = 0;
+
+#ifndef _TARGET_64BIT_
+    unsigned longVarsCount = VarSetOps::Count(this, lvaLongVars);
+
+    if (longVarsCount > 0)
+    {
+        // Since 64-bit variables take up two registers on 32-bit targets, we increase
+        //  the Counts such that each TYP_LONG variable counts twice.
+        //
+        VARSET_TP VARSET_INIT_NOCOPY(loopLongVars, VarSetOps::Intersection(this, loopVars, lvaLongVars));
+        VARSET_TP VARSET_INIT_NOCOPY(inOutLongVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaLongVars));
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\n  LONGVARS(%d)=", VarSetOps::Count(this, lvaLongVars));
+            lvaDispVarSet(lvaLongVars);
+        }
+#endif
+        pLoopDsc->lpLoopVarCount += VarSetOps::Count(this, loopLongVars);
+        pLoopDsc->lpVarInOutCount += VarSetOps::Count(this, inOutLongVars);
+    }
+#endif // !_TARGET_64BIT_
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\n  USEDEF  (%d)=", VarSetOps::Count(this, pLoopDsc->lpVarUseDef));
+        lvaDispVarSet(pLoopDsc->lpVarUseDef);
+
+        printf("\n  INOUT   (%d)=", pLoopDsc->lpVarInOutCount);
+        lvaDispVarSet(pLoopDsc->lpVarInOut);
+
+        printf("\n  LOOPVARS(%d)=", pLoopDsc->lpLoopVarCount);
+        lvaDispVarSet(loopVars);
+        printf("\n");
+    }
+#endif
+
+    unsigned floatVarsCount = VarSetOps::Count(this, lvaFloatVars);
+
+    if (floatVarsCount > 0)
+    {
+        VARSET_TP VARSET_INIT_NOCOPY(loopFPVars, VarSetOps::Intersection(this, loopVars, lvaFloatVars));
+        VARSET_TP VARSET_INIT_NOCOPY(inOutFPVars, VarSetOps::Intersection(this, pLoopDsc->lpVarInOut, lvaFloatVars));
+
+        pLoopDsc->lpLoopVarFPCount     = VarSetOps::Count(this, loopFPVars);
+        pLoopDsc->lpVarInOutFPCount    = VarSetOps::Count(this, inOutFPVars);
+        pLoopDsc->lpHoistedFPExprCount = 0;
+
+        pLoopDsc->lpLoopVarCount -= pLoopDsc->lpLoopVarFPCount;
+        pLoopDsc->lpVarInOutCount -= pLoopDsc->lpVarInOutFPCount;
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("  INOUT-FP(%d)=", pLoopDsc->lpVarInOutFPCount);
+            lvaDispVarSet(inOutFPVars);
+
+            printf("\n  LOOPV-FP(%d)=", pLoopDsc->lpLoopVarFPCount);
+            lvaDispVarSet(loopFPVars);
+        }
+#endif
+    }
+    else // (floatVarsCount == 0)
+    {
+        pLoopDsc->lpLoopVarFPCount     = 0;
+        pLoopDsc->lpVarInOutFPCount    = 0;
+        pLoopDsc->lpHoistedFPExprCount = 0;
+    }
+
+    // Find the set of definitely-executed blocks.
+    // Ideally, the definitely-executed blocks are the ones that post-dominate the entry block.
+    // Until we have post-dominators, we'll special-case for single-exit blocks.
+    ExpandArrayStack<BasicBlock*> defExec(getAllocatorLoopHoist());
+    if (pLoopDsc->lpFlags & LPFLG_ONE_EXIT)
+    {
+        assert(pLoopDsc->lpExit != nullptr);
+        BasicBlock* cur = pLoopDsc->lpExit;
+        // Push dominators, until we reach "entry" or exit the loop.
+        while (cur != nullptr && pLoopDsc->lpContains(cur) && cur != pLoopDsc->lpEntry)
+        {
+            defExec.Push(cur);
+            cur = cur->bbIDom;
+        }
+        // If we didn't reach the entry block, give up and *just* push the entry block.
+        if (cur != pLoopDsc->lpEntry)
+        {
+            defExec.Reset();
+        }
+        defExec.Push(pLoopDsc->lpEntry);
+    }
+    else // More than one exit
+    {
+        // We'll assume that only the entry block is definitely executed.
+        // We could in the future do better.
+        defExec.Push(pLoopDsc->lpEntry);
+    }
+
+    while (defExec.Size() > 0)
+    {
+        // Consider in reverse order: dominator before dominatee.
+        BasicBlock* blk = defExec.Pop();
+        optHoistLoopExprsForBlock(blk, lnum, hoistCtxt);
+    }
+}
+
+// Hoist any expressions in "blk" that are invariant in loop "lnum" outside of "blk" and into a PreHead for loop "lnum".
+void Compiler::optHoistLoopExprsForBlock(BasicBlock* blk, unsigned lnum, LoopHoistContext* hoistCtxt)
+{
+    LoopDsc* pLoopDsc                      = &optLoopTable[lnum];
+    bool     firstBlockAndBeforeSideEffect = (blk == pLoopDsc->lpEntry);
+    unsigned blkWeight                     = blk->getBBWeight(this);
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("    optHoistLoopExprsForBlock BB%02u (weight=%6s) of loop L%02u <BB%02u..BB%02u>, firstBlock is %s\n",
+               blk->bbNum, refCntWtd2str(blkWeight), lnum, pLoopDsc->lpFirst->bbNum, pLoopDsc->lpBottom->bbNum,
+               firstBlockAndBeforeSideEffect ? "true" : "false");
+        if (blkWeight < (BB_UNITY_WEIGHT / 10))
+        {
+            printf("      block weight is too small to perform hoisting.\n");
+        }
+    }
+#endif
+
+    if (blkWeight < (BB_UNITY_WEIGHT / 10))
+    {
+        // Block weight is too small to perform hoisting.
+        return;
+    }
+
+    for (GenTreeStmt* stmt = blk->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
+    {
+        GenTreePtr stmtTree = stmt->gtStmtExpr;
+        bool       hoistable;
+        (void)optHoistLoopExprsForTree(stmtTree, lnum, hoistCtxt, &firstBlockAndBeforeSideEffect, &hoistable);
+        if (hoistable)
+        {
+            // we will try to hoist the top-level stmtTree
+            optHoistCandidate(stmtTree, lnum, hoistCtxt);
+        }
+    }
+}
+
+bool Compiler::optIsProfitableToHoistableTree(GenTreePtr tree, unsigned lnum)
+{
+    LoopDsc* pLoopDsc = &optLoopTable[lnum];
+
+    bool loopContainsCall = pLoopDsc->lpContainsCall;
+
+    int availRegCount;
+    int hoistedExprCount;
+    int loopVarCount;
+    int varInOutCount;
+
+    if (varTypeIsFloating(tree->TypeGet()))
+    {
+        hoistedExprCount = pLoopDsc->lpHoistedFPExprCount;
+        loopVarCount     = pLoopDsc->lpLoopVarFPCount;
+        varInOutCount    = pLoopDsc->lpVarInOutFPCount;
+
+        availRegCount = CNT_CALLEE_SAVED_FLOAT;
+        if (!loopContainsCall)
+        {
+            availRegCount += CNT_CALLEE_TRASH_FLOAT - 1;
+        }
+#ifdef _TARGET_ARM_
+        // For ARM each double takes two FP registers
+        // For now on ARM we won't track singles/doubles
+        // and instead just assume that we always have doubles.
+        //
+        availRegCount /= 2;
+#endif
+    }
+    else
+    {
+        hoistedExprCount = pLoopDsc->lpHoistedExprCount;
+        loopVarCount     = pLoopDsc->lpLoopVarCount;
+        varInOutCount    = pLoopDsc->lpVarInOutCount;
+
+        availRegCount = CNT_CALLEE_SAVED - 1;
+        if (!loopContainsCall)
+        {
+            availRegCount += CNT_CALLEE_TRASH - 1;
+        }
+#ifndef _TARGET_64BIT_
+        // For our 32-bit targets Long types take two registers.
+        if (varTypeIsLong(tree->TypeGet()))
+        {
+            availRegCount = (availRegCount + 1) / 2;
+        }
+#endif
+    }
+
+    // decrement the availRegCount by the count of expression that we have already hoisted.
+    availRegCount -= hoistedExprCount;
+
+    // the variables that are read/written inside the loop should
+    // always be a subset of the InOut variables for the loop
+    assert(loopVarCount <= varInOutCount);
+
+    // When loopVarCount >= availRegCount we believe that all of the
+    // available registers will get used to hold LclVars inside the loop.
+    // This pessimistically assumes that each loopVar has a conflicting
+    // lifetime with every other loopVar.
+    // For this case we will hoist the expression only if is profitable
+    // to place it in a stack home location (gtCostEx >= 2*IND_COST_EX)
+    // as we believe it will be placed in the stack or one of the other
+    // loopVars will be spilled into the stack
+    //
+    if (loopVarCount >= availRegCount)
+    {
+        // Don't hoist expressions that are not heavy: tree->gtCostEx < (2*IND_COST_EX)
+        if (tree->gtCostEx < (2 * IND_COST_EX))
+        {
+            return false;
+        }
+    }
+
+    // When varInOutCount < availRegCount we are know that there are
+    // some available register(s) when we enter the loop body.
+    // When varInOutCount == availRegCount there often will be a register
+    // available when we enter the loop body, since a loop often defines a
+    // LclVar on exit or there is often at least one LclVar that is worth
+    // spilling to the stack to make way for this hoisted expression.
+    // So we are willing hoist an expression with gtCostEx == MIN_CSE_COST
+    //
+    if (varInOutCount > availRegCount)
+    {
+        // Don't hoist expressions that barely meet CSE cost requirements: tree->gtCostEx == MIN_CSE_COST
+        if (tree->gtCostEx <= MIN_CSE_COST + 1)
+        {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+//
+//  This function returns true if 'tree' is a loop invariant expression.
+//  It also sets '*pHoistable' to true if 'tree' can be hoisted into a loop PreHeader block
+//
+bool Compiler::optHoistLoopExprsForTree(
+    GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt, bool* pFirstBlockAndBeforeSideEffect, bool* pHoistable)
+{
+    // First do the children.
+    // We must keep track of whether each child node was hoistable or not
+    //
+    unsigned nChildren = tree->NumChildren();
+    bool     childrenHoistable[GenTree::MAX_CHILDREN];
+
+    // Initialize the array elements for childrenHoistable[] to false
+    for (unsigned i = 0; i < nChildren; i++)
+    {
+        childrenHoistable[i] = false;
+    }
+
+    bool treeIsInvariant = true;
+    for (unsigned childNum = 0; childNum < nChildren; childNum++)
+    {
+        if (!optHoistLoopExprsForTree(tree->GetChild(childNum), lnum, hoistCtxt, pFirstBlockAndBeforeSideEffect,
+                                      &childrenHoistable[childNum]))
+        {
+            treeIsInvariant = false;
+        }
+    }
+
+    // If all the children of "tree" are hoistable, then "tree" itself can be hoisted
+    //
+    bool treeIsHoistable = treeIsInvariant;
+
+    // But we must see if anything else prevents "tree" from being hoisted.
+    //
+    if (treeIsInvariant)
+    {
+        // Tree must be a suitable CSE candidate for us to be able to hoist it.
+        treeIsHoistable = optIsCSEcandidate(tree);
+
+        // If it's a call, it must be a helper call, and be pure.
+        // Further, if it may run a cctor, it must be labeled as "Hoistable"
+        // (meaning it won't run a cctor because the class is not precise-init).
+        if (treeIsHoistable && tree->OperGet() == GT_CALL)
+        {
+            GenTreeCall* call = tree->AsCall();
+            if (call->gtCallType != CT_HELPER)
+            {
+                treeIsHoistable = false;
+            }
+            else
+            {
+                CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
+                if (!s_helperCallProperties.IsPure(helpFunc))
+                {
+                    treeIsHoistable = false;
+                }
+                else if (s_helperCallProperties.MayRunCctor(helpFunc) && (call->gtFlags & GTF_CALL_HOISTABLE) == 0)
+                {
+                    treeIsHoistable = false;
+                }
+            }
+        }
+
+        if (treeIsHoistable)
+        {
+            if (!(*pFirstBlockAndBeforeSideEffect))
+            {
+                // For now, we give up on an expression that might raise an exception if it is after the
+                // first possible global side effect (and we assume we're after that if we're not in the first block).
+                // TODO-CQ: this is when we might do loop cloning.
+                //
+                if ((tree->gtFlags & GTF_EXCEPT) != 0)
+                {
+                    treeIsHoistable = false;
+                }
+            }
+            // Currently we must give up on reads from static variables (even if we are in the first block).
+            //
+            if (tree->OperGet() == GT_CLS_VAR)
+            {
+                // TODO-CQ: test that fails if we hoist GT_CLS_VAR: JIT\Directed\Languages\ComponentPascal\pi_r.exe
+                // method Main
+                treeIsHoistable = false;
+            }
+        }
+
+        // Is the value of the whole tree loop invariant?
+        treeIsInvariant =
+            optVNIsLoopInvariant(tree->gtVNPair.GetLiberal(), lnum, &hoistCtxt->m_curLoopVnInvariantCache);
+
+        // Is the value of the whole tree loop invariant?
+        if (!treeIsInvariant)
+        {
+            treeIsHoistable = false;
+        }
+    }
+
+    // Check if we need to set '*pFirstBlockAndBeforeSideEffect' to false.
+    // If we encounter a tree with a call in it
+    //  or if we see an assignment to global we set it to false.
+    //
+    // If we are already set to false then we can skip these checks
+    //
+    if (*pFirstBlockAndBeforeSideEffect)
+    {
+        // For this purpose, we only care about memory side effects.  We assume that expressions will
+        // be hoisted so that they are evaluated in the same order as they would have been in the loop,
+        // and therefore throw exceptions in the same order.  (So we don't use GTF_GLOBALLY_VISIBLE_SIDE_EFFECTS
+        // here, since that includes exceptions.)
+        if (tree->gtFlags & GTF_CALL)
+        {
+            *pFirstBlockAndBeforeSideEffect = false;
+        }
+        else if (tree->OperIsAssignment())
+        {
+            // If the LHS of the assignment has a global reference, then assume it's a global side effect.
+            GenTreePtr lhs = tree->gtOp.gtOp1;
+            if (lhs->gtFlags & GTF_GLOB_REF)
+            {
+                *pFirstBlockAndBeforeSideEffect = false;
+            }
+        }
+        else if (tree->OperIsCopyBlkOp())
+        {
+            GenTreePtr args = tree->gtOp.gtOp1;
+            assert(args->OperGet() == GT_LIST);
+            if (args->gtOp.gtOp1->gtFlags & GTF_GLOB_REF)
+            {
+                *pFirstBlockAndBeforeSideEffect = false;
+            }
+        }
+    }
+
+    // If this 'tree' is hoistable then we return and the caller will
+    // decide to hoist it as part of larger hoistable expression.
+    //
+    if (!treeIsHoistable)
+    {
+        // We are not hoistable so we will now hoist any hoistable children.
+        //
+        for (unsigned childNum = 0; childNum < nChildren; childNum++)
+        {
+            if (childrenHoistable[childNum])
+            {
+                // We can't hoist the LHS of an assignment, isn't a real use.
+                if (childNum == 0 && (tree->OperIsAssignment()))
+                {
+                    continue;
+                }
+
+                GenTreePtr child = tree->GetChild(childNum);
+
+                // We try to hoist this 'child' tree
+                optHoistCandidate(child, lnum, hoistCtxt);
+            }
+        }
+    }
+
+    *pHoistable = treeIsHoistable;
+    return treeIsInvariant;
+}
+
+void Compiler::optHoistCandidate(GenTreePtr tree, unsigned lnum, LoopHoistContext* hoistCtxt)
+{
+    if (lnum == BasicBlock::NOT_IN_LOOP)
+    {
+        // The hoisted expression isn't valid at any loop head so don't hoist this expression.
+        return;
+    }
+
+    // The outer loop also must be suitable for hoisting...
+    if ((optLoopTable[lnum].lpFlags & LPFLG_HOISTABLE) == 0)
+    {
+        return;
+    }
+
+    // If the hoisted expression isn't valid at this loop head then break
+    if (!optTreeIsValidAtLoopHead(tree, lnum))
+    {
+        return;
+    }
+
+    // It must pass the hoistable profitablity tests for this loop level
+    if (!optIsProfitableToHoistableTree(tree, lnum))
+    {
+        return;
+    }
+
+    bool b;
+    if (hoistCtxt->m_hoistedInParentLoops.Lookup(tree->gtVNPair.GetLiberal(), &b))
+    {
+        // already hoisted in a parent loop, so don't hoist this expression.
+        return;
+    }
+
+    if (hoistCtxt->GetHoistedInCurLoop(this)->Lookup(tree->gtVNPair.GetLiberal(), &b))
+    {
+        // already hoisted this expression in the current loop, so don't hoist this expression.
+        return;
+    }
+
+    // Expression can be hoisted
+    optPerformHoistExpr(tree, lnum);
+
+    // Increment lpHoistedExprCount or lpHoistedFPExprCount
+    if (!varTypeIsFloating(tree->TypeGet()))
+    {
+        optLoopTable[lnum].lpHoistedExprCount++;
+#ifndef _TARGET_64BIT_
+        // For our 32-bit targets Long types take two registers.
+        if (varTypeIsLong(tree->TypeGet()))
+        {
+            optLoopTable[lnum].lpHoistedExprCount++;
+        }
+#endif
+    }
+    else // Floating point expr hoisted
+    {
+        optLoopTable[lnum].lpHoistedFPExprCount++;
+    }
+
+    // Record the hoisted expression in hoistCtxt
+    hoistCtxt->GetHoistedInCurLoop(this)->Set(tree->gtVNPair.GetLiberal(), true);
+}
+
+bool Compiler::optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNToBoolMap* loopVnInvariantCache)
+{
+    // If it is not a VN, is not loop-invariant.
+    if (vn == ValueNumStore::NoVN)
+    {
+        return false;
+    }
+
+    // We'll always short-circuit constants.
+    if (vnStore->IsVNConstant(vn) || vn == vnStore->VNForVoid())
+    {
+        return true;
+    }
+
+    // If we've done this query previously, don't repeat.
+    bool previousRes = false;
+    if (loopVnInvariantCache->Lookup(vn, &previousRes))
+    {
+        return previousRes;
+    }
+
+    bool      res = true;
+    VNFuncApp funcApp;
+    if (vnStore->GetVNFunc(vn, &funcApp))
+    {
+        if (funcApp.m_func == VNF_PhiDef)
+        {
+            // First, make sure it's a "proper" phi -- the definition is a Phi application.
+            VNFuncApp phiDefValFuncApp;
+            if (!vnStore->GetVNFunc(funcApp.m_args[2], &phiDefValFuncApp) || phiDefValFuncApp.m_func != VNF_Phi)
+            {
+                // It's not *really* a definition, rather a pass-through of some other VN.
+                // (This could occur, say if both sides of an if-then-else diamond made the
+                // same assignment to a variable.)
+                res = optVNIsLoopInvariant(funcApp.m_args[2], lnum, loopVnInvariantCache);
+            }
+            else
+            {
+                // Is the definition within the loop?  If so, is not loop-invariant.
+                unsigned      lclNum = funcApp.m_args[0];
+                unsigned      ssaNum = funcApp.m_args[1];
+                LclSsaVarDsc* ssaDef = lvaTable[lclNum].GetPerSsaData(ssaNum);
+                res                  = !optLoopContains(lnum, ssaDef->m_defLoc.m_blk->bbNatLoopNum);
+            }
+        }
+        else if (funcApp.m_func == VNF_PhiHeapDef)
+        {
+            BasicBlock* defnBlk = reinterpret_cast<BasicBlock*>(vnStore->ConstantValue<ssize_t>(funcApp.m_args[0]));
+            res                 = !optLoopContains(lnum, defnBlk->bbNatLoopNum);
+        }
+        else
+        {
+            for (unsigned i = 0; i < funcApp.m_arity; i++)
+            {
+                // TODO-CQ: We need to either make sure that *all* VN functions
+                // always take VN args, or else have a list of arg positions to exempt, as implicitly
+                // constant.
+                if (!optVNIsLoopInvariant(funcApp.m_args[i], lnum, loopVnInvariantCache))
+                {
+                    res = false;
+                    break;
+                }
+            }
+        }
+    }
+    else
+    {
+        // Non-function "new, unique" VN's may be annotated with the loop nest where
+        // their definition occurs.
+        BasicBlock::loopNumber vnLoopNum = vnStore->LoopOfVN(vn);
+
+        if (vnLoopNum == MAX_LOOP_NUM)
+        {
+            res = false;
+        }
+        else
+        {
+            res = !optLoopContains(lnum, vnLoopNum);
+        }
+    }
+
+    loopVnInvariantCache->Set(vn, res);
+    return res;
+}
+
+bool Compiler::optTreeIsValidAtLoopHead(GenTreePtr tree, unsigned lnum)
+{
+    if (tree->OperIsLocal())
+    {
+        GenTreeLclVarCommon* lclVar = tree->AsLclVarCommon();
+        unsigned             lclNum = lclVar->gtLclNum;
+
+        // The lvlVar must be have an Ssa tracked lifetime
+        if (fgExcludeFromSsa(lclNum))
+        {
+            return false;
+        }
+
+        // If the loop does not contains the SSA def we can hoist it.
+        if (!optLoopTable[lnum].lpContains(lvaTable[lclNum].GetPerSsaData(lclVar->GetSsaNum())->m_defLoc.m_blk))
+        {
+            return true;
+        }
+    }
+    else if (tree->OperIsConst())
+    {
+        return true;
+    }
+    else // If every one of the children nodes are valid at this Loop's Head.
+    {
+        unsigned nChildren = tree->NumChildren();
+        for (unsigned childNum = 0; childNum < nChildren; childNum++)
+        {
+            if (!optTreeIsValidAtLoopHead(tree->GetChild(childNum), lnum))
+            {
+                return false;
+            }
+        }
+        return true;
+    }
+    return false;
+}
+
+/*****************************************************************************
+ *
+ *  Creates a pre-header block for the given loop - a preheader is a BBJ_NONE
+ *  header. The pre-header will replace the current lpHead in the loop table.
+ *  The loop has to be a do-while loop. Thus, all blocks dominated by lpHead
+ *  will also be dominated by the loop-top, lpHead->bbNext.
+ *
+ */
+
+void Compiler::fgCreateLoopPreHeader(unsigned lnum)
+{
+    LoopDsc* pLoopDsc = &optLoopTable[lnum];
+
+    /* This loop has to be a "do-while" loop */
+
+    assert(pLoopDsc->lpFlags & LPFLG_DO_WHILE);
+
+    /* Have we already created a loop-preheader block? */
+
+    if (pLoopDsc->lpFlags & LPFLG_HAS_PREHEAD)
+    {
+        return;
+    }
+
+    BasicBlock* head  = pLoopDsc->lpHead;
+    BasicBlock* top   = pLoopDsc->lpTop;
+    BasicBlock* entry = pLoopDsc->lpEntry;
+
+    // if 'entry' and 'head' are in different try regions then we won't be able to hoist
+    if (!BasicBlock::sameTryRegion(head, entry))
+    {
+        return;
+    }
+
+    // Ensure that lpHead always dominates lpEntry
+
+    noway_assert(fgDominate(head, entry));
+
+    /* Get hold of the first block of the loop body */
+
+    assert(top == entry);
+
+    /* Allocate a new basic block */
+
+    BasicBlock* preHead = bbNewBasicBlock(BBJ_NONE);
+    preHead->bbFlags |= BBF_INTERNAL | BBF_LOOP_PREHEADER;
+
+    // Must set IL code offset
+    preHead->bbCodeOffs = top->bbCodeOffs;
+
+    // Set the default value of the preHead weight in case we don't have
+    // valid profile data and since this blocks weight is just an estimate
+    // we clear any BBF_PROF_WEIGHT flag that we may have picked up from head.
+    //
+    preHead->inheritWeight(head);
+    preHead->bbFlags &= ~BBF_PROF_WEIGHT;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nCreated PreHeader (BB%02u) for loop L%02u (BB%02u - BB%02u), with weight = %s\n", preHead->bbNum,
+               lnum, top->bbNum, pLoopDsc->lpBottom->bbNum, refCntWtd2str(preHead->getBBWeight(this)));
+    }
+#endif
+
+    // The preheader block is part of the containing loop (if any).
+    preHead->bbNatLoopNum = pLoopDsc->lpParent;
+
+    if (fgIsUsingProfileWeights() && (head->bbJumpKind == BBJ_COND))
+    {
+        if ((head->bbWeight == 0) || (head->bbNext->bbWeight == 0))
+        {
+            preHead->bbWeight = 0;
+            preHead->bbFlags |= BBF_RUN_RARELY;
+        }
+        else
+        {
+            bool allValidProfileWeights = ((head->bbFlags & BBF_PROF_WEIGHT) != 0) &&
+                                          ((head->bbJumpDest->bbFlags & BBF_PROF_WEIGHT) != 0) &&
+                                          ((head->bbNext->bbFlags & BBF_PROF_WEIGHT) != 0);
+
+            if (allValidProfileWeights)
+            {
+                double loopEnteredCount;
+                double loopSkippedCount;
+
+                if (fgHaveValidEdgeWeights)
+                {
+                    flowList* edgeToNext = fgGetPredForBlock(head->bbNext, head);
+                    flowList* edgeToJump = fgGetPredForBlock(head->bbJumpDest, head);
+                    noway_assert(edgeToNext != nullptr);
+                    noway_assert(edgeToJump != nullptr);
+
+                    loopEnteredCount =
+                        ((double)edgeToNext->flEdgeWeightMin + (double)edgeToNext->flEdgeWeightMax) / 2.0;
+                    loopSkippedCount =
+                        ((double)edgeToJump->flEdgeWeightMin + (double)edgeToJump->flEdgeWeightMax) / 2.0;
+                }
+                else
+                {
+                    loopEnteredCount = (double)head->bbNext->bbWeight;
+                    loopSkippedCount = (double)head->bbJumpDest->bbWeight;
+                }
+
+                double loopTakenRatio = loopEnteredCount / (loopEnteredCount + loopSkippedCount);
+
+                // Calculate a good approximation of the preHead's block weight
+                unsigned preHeadWeight = (unsigned)(((double)head->bbWeight * loopTakenRatio) + 0.5);
+                preHead->setBBWeight(max(preHeadWeight, 1));
+                noway_assert(!preHead->isRunRarely());
+            }
+        }
+    }
+
+    // Link in the preHead block.
+    fgInsertBBbefore(top, preHead);
+
+    // Ideally we would re-run SSA and VN if we optimized by doing loop hoisting.
+    // However, that is too expensive at this point. Instead, we update the phi
+    // node block references, if we created pre-header block due to hoisting.
+    // This is sufficient because any definition participating in SSA that flowed
+    // into the phi via the loop header block will now flow through the preheader
+    // block from the header block.
+
+    for (GenTreePtr stmt = top->bbTreeList; stmt; stmt = stmt->gtNext)
+    {
+        GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+        if (tree->OperGet() != GT_ASG)
+        {
+            break;
+        }
+        GenTreePtr op2 = tree->gtGetOp2();
+        if (op2->OperGet() != GT_PHI)
+        {
+            break;
+        }
+        GenTreeArgList* args = op2->gtGetOp1()->AsArgList();
+        while (args != nullptr)
+        {
+            GenTreePhiArg* phiArg = args->Current()->AsPhiArg();
+            if (phiArg->gtPredBB == head)
+            {
+                phiArg->gtPredBB = preHead;
+            }
+            args = args->Rest();
+        }
+    }
+
+    // The handler can't begin at the top of the loop.  If it did, it would be incorrect
+    // to set the handler index on the pre header without updating the exception table.
+    noway_assert(!top->hasHndIndex() || fgFirstBlockOfHandler(top) != top);
+
+    // Update the EH table to make the hoisted block part of the loop's EH block.
+    fgExtendEHRegionBefore(top);
+
+    // TODO-CQ: set dominators for this block, to allow loop optimizations requiring them
+    //        (e.g: hoisting expression in a loop with the same 'head' as this one)
+
+    /* Update the loop entry */
+
+    pLoopDsc->lpHead = preHead;
+    pLoopDsc->lpFlags |= LPFLG_HAS_PREHEAD;
+
+    /* The new block becomes the 'head' of the loop - update bbRefs and bbPreds
+       All predecessors of 'beg', (which is the entry in the loop)
+       now have to jump to 'preHead', unless they are dominated by 'head' */
+
+    preHead->bbRefs = 0;
+    fgAddRefPred(preHead, head);
+    bool checkNestedLoops = false;
+
+    for (flowList* pred = top->bbPreds; pred; pred = pred->flNext)
+    {
+        BasicBlock* predBlock = pred->flBlock;
+
+        if (fgDominate(top, predBlock))
+        {
+            // note: if 'top' dominates predBlock, 'head' dominates predBlock too
+            // (we know that 'head' dominates 'top'), but using 'top' instead of
+            // 'head' in the test allows us to not enter here if 'predBlock == head'
+
+            if (predBlock != pLoopDsc->lpBottom)
+            {
+                noway_assert(predBlock != head);
+                checkNestedLoops = true;
+            }
+            continue;
+        }
+
+        switch (predBlock->bbJumpKind)
+        {
+            case BBJ_NONE:
+                noway_assert(predBlock == head);
+                break;
+
+            case BBJ_COND:
+                if (predBlock == head)
+                {
+                    noway_assert(predBlock->bbJumpDest != top);
+                    break;
+                }
+                __fallthrough;
+
+            case BBJ_ALWAYS:
+            case BBJ_EHCATCHRET:
+                noway_assert(predBlock->bbJumpDest == top);
+                predBlock->bbJumpDest = preHead;
+                preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+
+                if (predBlock == head)
+                {
+                    // This is essentially the same case of predBlock being a BBJ_NONE. We may not be
+                    // able to make this a BBJ_NONE if it's an internal block (for example, a leave).
+                    // Just break, pred will be removed after switch.
+                }
+                else
+                {
+                    fgRemoveRefPred(top, predBlock);
+                    fgAddRefPred(preHead, predBlock);
+                }
+                break;
+
+            case BBJ_SWITCH:
+                unsigned jumpCnt;
+                jumpCnt = predBlock->bbJumpSwt->bbsCount;
+                BasicBlock** jumpTab;
+                jumpTab = predBlock->bbJumpSwt->bbsDstTab;
+
+                do
+                {
+                    assert(*jumpTab);
+                    if ((*jumpTab) == top)
+                    {
+                        (*jumpTab) = preHead;
+
+                        fgRemoveRefPred(top, predBlock);
+                        fgAddRefPred(preHead, predBlock);
+                        preHead->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+                    }
+                } while (++jumpTab, --jumpCnt);
+
+            default:
+                noway_assert(!"Unexpected bbJumpKind");
+                break;
+        }
+    }
+
+    noway_assert(!fgGetPredForBlock(top, preHead));
+    fgRemoveRefPred(top, head);
+    fgAddRefPred(top, preHead);
+
+    /*
+        If we found at least one back-edge in the flowgraph pointing to the top/entry of the loop
+        (other than the back-edge of the loop we are considering) then we likely have nested
+        do-while loops with the same entry block and inserting the preheader block changes the head
+        of all the nested loops. Now we will update this piece of information in the loop table, and
+        mark all nested loops as having a preheader (the preheader block can be shared among all nested
+        do-while loops with the same entry block).
+    */
+    if (checkNestedLoops)
+    {
+        for (unsigned l = 0; l < optLoopCount; l++)
+        {
+            if (optLoopTable[l].lpHead == head)
+            {
+                noway_assert(l != lnum); // pLoopDsc->lpHead was already changed from 'head' to 'preHead'
+                noway_assert(optLoopTable[l].lpEntry == top);
+                optUpdateLoopHead(l, optLoopTable[l].lpHead, preHead);
+                optLoopTable[l].lpFlags |= LPFLG_HAS_PREHEAD;
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("Same PreHeader (BB%02u) can be used for loop L%02u (BB%02u - BB%02u)\n\n", preHead->bbNum,
+                           l, top->bbNum, optLoopTable[l].lpBottom->bbNum);
+                }
+#endif
+            }
+        }
+    }
+}
+
+bool Compiler::optBlockIsLoopEntry(BasicBlock* blk, unsigned* pLnum)
+{
+    unsigned lnum = blk->bbNatLoopNum;
+    while (lnum != BasicBlock::NOT_IN_LOOP)
+    {
+        if (optLoopTable[lnum].lpEntry == blk)
+        {
+            *pLnum = lnum;
+            return true;
+        }
+        lnum = optLoopTable[lnum].lpParent;
+    }
+    return false;
+}
+
+void Compiler::optComputeLoopSideEffects()
+{
+    unsigned lnum;
+    for (lnum = 0; lnum < optLoopCount; lnum++)
+    {
+        VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarInOut, VarSetOps::MakeEmpty(this));
+        VarSetOps::AssignNoCopy(this, optLoopTable[lnum].lpVarUseDef, VarSetOps::MakeEmpty(this));
+        optLoopTable[lnum].lpContainsCall = false;
+    }
+
+    for (lnum = 0; lnum < optLoopCount; lnum++)
+    {
+        if (optLoopTable[lnum].lpFlags & LPFLG_REMOVED)
+        {
+            continue;
+        }
+
+        if (optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP)
+        { // Is outermost...
+            optComputeLoopNestSideEffects(lnum);
+        }
+    }
+
+    VarSetOps::AssignNoCopy(this, lvaFloatVars, VarSetOps::MakeEmpty(this));
+#ifndef _TARGET_64BIT_
+    VarSetOps::AssignNoCopy(this, lvaLongVars, VarSetOps::MakeEmpty(this));
+#endif
+
+    for (unsigned i = 0; i < lvaCount; i++)
+    {
+        LclVarDsc* varDsc = &lvaTable[i];
+        if (varDsc->lvTracked)
+        {
+            if (varTypeIsFloating(varDsc->lvType))
+            {
+                VarSetOps::AddElemD(this, lvaFloatVars, varDsc->lvVarIndex);
+            }
+#ifndef _TARGET_64BIT_
+            else if (varTypeIsLong(varDsc->lvType))
+            {
+                VarSetOps::AddElemD(this, lvaLongVars, varDsc->lvVarIndex);
+            }
+#endif
+        }
+    }
+}
+
+void Compiler::optComputeLoopNestSideEffects(unsigned lnum)
+{
+    assert(optLoopTable[lnum].lpParent == BasicBlock::NOT_IN_LOOP); // Requires: lnum is outermost.
+    BasicBlock* botNext = optLoopTable[lnum].lpBottom->bbNext;
+    for (BasicBlock* bbInLoop = optLoopTable[lnum].lpFirst; bbInLoop != botNext; bbInLoop = bbInLoop->bbNext)
+    {
+        optComputeLoopSideEffectsOfBlock(bbInLoop);
+    }
+}
+
+void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
+{
+    unsigned mostNestedLoop = blk->bbNatLoopNum;
+    assert(mostNestedLoop != BasicBlock::NOT_IN_LOOP);
+
+    AddVariableLivenessAllContainingLoops(mostNestedLoop, blk);
+
+    bool heapHavoc = false; // True ==> there's a call or a memory store that has arbitrary heap effects.
+
+    // Now iterate over the remaining statements, and their trees.
+    for (GenTreePtr stmts = blk->FirstNonPhiDef(); (stmts != nullptr); stmts = stmts->gtNext)
+    {
+        for (GenTreePtr tree = stmts->gtStmt.gtStmtList; (tree != nullptr); tree = tree->gtNext)
+        {
+            genTreeOps oper = tree->OperGet();
+
+            // Even after we set heapHavoc we still may want to know if a loop contains calls
+            if (heapHavoc)
+            {
+                if (oper == GT_CALL)
+                {
+                    // Record that this loop contains a call
+                    AddContainsCallAllContainingLoops(mostNestedLoop);
+                }
+
+                // If we just set lpContainsCall or it was previously set
+                if (optLoopTable[mostNestedLoop].lpContainsCall)
+                {
+                    // We can early exit after both heapHavoc and lpContainsCall are both set to true.
+                    break;
+                }
+
+                // We are just looking for GT_CALL nodes after heapHavoc was set.
+                continue;
+            }
+
+            // otherwise heapHavoc is not set
+            assert(!heapHavoc);
+
+            // This body is a distillation of the heap-side effect code of value numbering.
+            // We also do a very limited analysis if byref PtrTo values, to cover some cases
+            // that the compiler creates.
+
+            if (GenTree::OperIsAssignment(oper))
+            {
+                GenTreePtr lhs = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
+
+                if (lhs->OperGet() == GT_IND)
+                {
+                    GenTreePtr    arg           = lhs->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
+                    FieldSeqNode* fldSeqArrElem = nullptr;
+
+                    if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
+                    {
+                        heapHavoc = true;
+                        continue;
+                    }
+
+                    ArrayInfo arrInfo;
+
+                    if (arg->TypeGet() == TYP_BYREF && arg->OperGet() == GT_LCL_VAR)
+                    {
+                        // If it's a local byref for which we recorded a value number, use that...
+                        GenTreeLclVar* argLcl = arg->AsLclVar();
+                        if (!fgExcludeFromSsa(argLcl->GetLclNum()))
+                        {
+                            ValueNum argVN =
+                                lvaTable[argLcl->GetLclNum()].GetPerSsaData(argLcl->GetSsaNum())->m_vnPair.GetLiberal();
+                            VNFuncApp funcApp;
+                            if (argVN != ValueNumStore::NoVN && vnStore->GetVNFunc(argVN, &funcApp) &&
+                                funcApp.m_func == VNF_PtrToArrElem)
+                            {
+                                assert(vnStore->IsVNHandle(funcApp.m_args[0]));
+                                CORINFO_CLASS_HANDLE elemType =
+                                    CORINFO_CLASS_HANDLE(vnStore->ConstantValue<size_t>(funcApp.m_args[0]));
+                                AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemType);
+                                // Don't set heapHavoc below.
+                                continue;
+                            }
+                        }
+                        // Otherwise...
+                        heapHavoc = true;
+                    }
+                    // Is the LHS an array index expression?
+                    else if (lhs->ParseArrayElemForm(this, &arrInfo, &fldSeqArrElem))
+                    {
+                        // We actually ignore "fldSeq" -- any modification to an S[], at any
+                        // field of "S", will lose all information about the array type.
+                        CORINFO_CLASS_HANDLE elemTypeEq = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
+                        AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemTypeEq);
+                    }
+                    else
+                    {
+                        // We are only interested in IsFieldAddr()'s fldSeq out parameter.
+                        //
+                        GenTreePtr    obj          = nullptr; // unused
+                        GenTreePtr    staticOffset = nullptr; // unused
+                        FieldSeqNode* fldSeq       = nullptr;
+
+                        if (arg->IsFieldAddr(this, &obj, &staticOffset, &fldSeq) &&
+                            (fldSeq != FieldSeqStore::NotAField()))
+                        {
+                            // Get the first (object) field from field seq.  Heap[field] will yield the "field map".
+                            assert(fldSeq != nullptr);
+                            if (fldSeq->IsFirstElemFieldSeq())
+                            {
+                                fldSeq = fldSeq->m_next;
+                                assert(fldSeq != nullptr);
+                            }
+
+                            AddModifiedFieldAllContainingLoops(mostNestedLoop, fldSeq->m_fieldHnd);
+                        }
+                        else
+                        {
+                            heapHavoc = true;
+                        }
+                    }
+                }
+                else if (lhs->OperIsBlk())
+                {
+                    GenTreeLclVarCommon* lclVarTree;
+                    bool                 isEntire;
+                    if (!tree->DefinesLocal(this, &lclVarTree, &isEntire))
+                    {
+                        // For now, assume arbitrary side effects on the heap...
+                        heapHavoc = true;
+                    }
+                }
+                else if (lhs->OperGet() == GT_CLS_VAR)
+                {
+                    AddModifiedFieldAllContainingLoops(mostNestedLoop, lhs->gtClsVar.gtClsVarHnd);
+                }
+                // Otherwise, must be local lhs form.  I should assert that.
+                else if (lhs->OperGet() == GT_LCL_VAR)
+                {
+                    GenTreeLclVar* lhsLcl = lhs->AsLclVar();
+                    GenTreePtr     rhs    = tree->gtOp.gtOp2;
+                    ValueNum       rhsVN  = rhs->gtVNPair.GetLiberal();
+                    // If we gave the RHS a value number, propagate it.
+                    if (rhsVN != ValueNumStore::NoVN)
+                    {
+                        rhsVN = vnStore->VNNormVal(rhsVN);
+                        if (!fgExcludeFromSsa(lhsLcl->GetLclNum()))
+                        {
+                            lvaTable[lhsLcl->GetLclNum()]
+                                .GetPerSsaData(lhsLcl->GetSsaNum())
+                                ->m_vnPair.SetLiberal(rhsVN);
+                        }
+                    }
+                }
+            }
+            else // not GenTree::OperIsAssignment(oper)
+            {
+                switch (oper)
+                {
+                    case GT_COMMA:
+                        tree->gtVNPair = tree->gtOp.gtOp2->gtVNPair;
+                        break;
+
+                    case GT_ADDR:
+                        // Is it an addr of a array index expression?
+                        {
+                            GenTreePtr addrArg = tree->gtOp.gtOp1;
+                            if (addrArg->OperGet() == GT_IND)
+                            {
+                                // Is the LHS an array index expression?
+                                if (addrArg->gtFlags & GTF_IND_ARR_INDEX)
+                                {
+                                    ArrayInfo arrInfo;
+                                    bool      b = GetArrayInfoMap()->Lookup(addrArg, &arrInfo);
+                                    assert(b);
+                                    CORINFO_CLASS_HANDLE elemType =
+                                        EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
+                                    tree->gtVNPair.SetBoth(
+                                        vnStore->VNForFunc(TYP_BYREF, VNF_PtrToArrElem,
+                                                           vnStore->VNForHandle(ssize_t(elemType), GTF_ICON_CLASS_HDL),
+                                                           // The rest are dummy arguments.
+                                                           vnStore->VNForNull(), vnStore->VNForNull(),
+                                                           vnStore->VNForNull()));
+                                }
+                            }
+                        }
+                        break;
+
+                    case GT_LOCKADD: // Binop
+                    case GT_XADD:    // Binop
+                    case GT_XCHG:    // Binop
+                    case GT_CMPXCHG: // Specialop
+                    {
+                        heapHavoc = true;
+                    }
+                    break;
+
+                    case GT_CALL:
+                    {
+                        GenTreeCall* call = tree->AsCall();
+
+                        // Record that this loop contains a call
+                        AddContainsCallAllContainingLoops(mostNestedLoop);
+
+                        if (call->gtCallType == CT_HELPER)
+                        {
+                            CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
+                            if (s_helperCallProperties.MutatesHeap(helpFunc))
+                            {
+                                heapHavoc = true;
+                            }
+                            else if (s_helperCallProperties.MayRunCctor(helpFunc))
+                            {
+                                // If the call is labeled as "Hoistable", then we've checked the
+                                // class that would be constructed, and it is not precise-init, so
+                                // the cctor will not be run by this call.  Otherwise, it might be,
+                                // and might have arbitrary side effects.
+                                if ((tree->gtFlags & GTF_CALL_HOISTABLE) == 0)
+                                {
+                                    heapHavoc = true;
+                                }
+                            }
+                        }
+                        else
+                        {
+                            heapHavoc = true;
+                        }
+                        break;
+                    }
+
+                    default:
+                        // All other gtOper node kinds, leave 'heapHavoc' unchanged (i.e. false)
+                        break;
+                }
+            }
+        }
+    }
+
+    if (heapHavoc)
+    {
+        // Record that all loops containing this block have heap havoc effects.
+        unsigned lnum = mostNestedLoop;
+        while (lnum != BasicBlock::NOT_IN_LOOP)
+        {
+            optLoopTable[lnum].lpLoopHasHeapHavoc = true;
+            lnum                                  = optLoopTable[lnum].lpParent;
+        }
+    }
+}
+
+// Marks the containsCall information to "lnum" and any parent loops.
+void Compiler::AddContainsCallAllContainingLoops(unsigned lnum)
+{
+    assert(0 <= lnum && lnum < optLoopCount);
+    while (lnum != BasicBlock::NOT_IN_LOOP)
+    {
+        optLoopTable[lnum].lpContainsCall = true;
+        lnum                              = optLoopTable[lnum].lpParent;
+    }
+}
+
+// Adds the variable liveness information for 'blk' to 'this' LoopDsc
+void Compiler::LoopDsc::AddVariableLiveness(Compiler* comp, BasicBlock* blk)
+{
+    VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveIn);
+    VarSetOps::UnionD(comp, this->lpVarInOut, blk->bbLiveOut);
+
+    VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarUse);
+    VarSetOps::UnionD(comp, this->lpVarUseDef, blk->bbVarDef);
+}
+
+// Adds the variable liveness information for 'blk' to "lnum" and any parent loops.
+void Compiler::AddVariableLivenessAllContainingLoops(unsigned lnum, BasicBlock* blk)
+{
+    assert(0 <= lnum && lnum < optLoopCount);
+    while (lnum != BasicBlock::NOT_IN_LOOP)
+    {
+        optLoopTable[lnum].AddVariableLiveness(this, blk);
+        lnum = optLoopTable[lnum].lpParent;
+    }
+}
+
+// Adds "fldHnd" to the set of modified fields of "lnum" and any parent loops.
+void Compiler::AddModifiedFieldAllContainingLoops(unsigned lnum, CORINFO_FIELD_HANDLE fldHnd)
+{
+    assert(0 <= lnum && lnum < optLoopCount);
+    while (lnum != BasicBlock::NOT_IN_LOOP)
+    {
+        optLoopTable[lnum].AddModifiedField(this, fldHnd);
+        lnum = optLoopTable[lnum].lpParent;
+    }
+}
+
+// Adds "elemType" to the set of modified array element types of "lnum" and any parent loops.
+void Compiler::AddModifiedElemTypeAllContainingLoops(unsigned lnum, CORINFO_CLASS_HANDLE elemClsHnd)
+{
+    assert(0 <= lnum && lnum < optLoopCount);
+    while (lnum != BasicBlock::NOT_IN_LOOP)
+    {
+        optLoopTable[lnum].AddModifiedElemType(this, elemClsHnd);
+        lnum = optLoopTable[lnum].lpParent;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Helper passed to Compiler::fgWalkAllTreesPre() to decrement the LclVar usage counts
+ *  The 'keepList'is either a single tree or a list of trees that are formed by
+ *  one or more GT_COMMA nodes.  It is the kept side-effects as returned by the
+ *  gtExtractSideEffList method.
+ */
+
+/* static */
+Compiler::fgWalkResult Compiler::optRemoveTreeVisitor(GenTreePtr* pTree, fgWalkData* data)
+{
+    GenTreePtr tree     = *pTree;
+    Compiler*  comp     = data->compiler;
+    GenTreePtr keepList = (GenTreePtr)(data->pCallbackData);
+
+    // We may have a non-NULL side effect list that is being kept
+    //
+    if (keepList)
+    {
+        GenTreePtr keptTree = keepList;
+        while (keptTree->OperGet() == GT_COMMA)
+        {
+            assert(keptTree->OperKind() & GTK_SMPOP);
+            GenTreePtr op1 = keptTree->gtOp.gtOp1;
+            GenTreePtr op2 = keptTree->gtGetOp2();
+
+            // For the GT_COMMA case the op1 is part of the orginal CSE tree
+            // that is being kept because it contains some side-effect
+            //
+            if (tree == op1)
+            {
+                // This tree and all of its sub trees are being kept.
+                return WALK_SKIP_SUBTREES;
+            }
+
+            // For the GT_COMMA case the op2 are the remaining side-effects of the orginal CSE tree
+            // which can again be another GT_COMMA or the final side-effect part
+            //
+            keptTree = op2;
+        }
+        if (tree == keptTree)
+        {
+            // This tree and all of its sub trees are being kept.
+            return WALK_SKIP_SUBTREES;
+        }
+    }
+
+    // This node is being removed from the graph of GenTreePtr
+
+    // Look for any local variable references
+
+    if (tree->gtOper == GT_LCL_VAR && comp->lvaLocalVarRefCounted)
+    {
+        unsigned   lclNum;
+        LclVarDsc* varDsc;
+
+        /* This variable ref is going away, decrease its ref counts */
+
+        lclNum = tree->gtLclVarCommon.gtLclNum;
+        assert(lclNum < comp->lvaCount);
+        varDsc = comp->lvaTable + lclNum;
+
+        // make sure it's been initialized
+        assert(comp->compCurBB != nullptr);
+        assert(comp->compCurBB->bbWeight <= BB_MAX_WEIGHT);
+
+        /* Decrement its lvRefCnt and lvRefCntWtd */
+
+        // Use getBBWeight to determine the proper block weight.
+        // This impacts the block weights when we have IBC data.
+        varDsc->decRefCnts(comp->compCurBB->getBBWeight(comp), comp);
+    }
+
+    return WALK_CONTINUE;
+}
+
+/*****************************************************************************
+ *
+ *  Routine called to decrement the LclVar ref counts when removing a tree
+ *  during the remove RangeCheck phase.
+ *  This method will decrement the refcounts for any LclVars used below 'deadTree',
+ *  unless the node is found in the 'keepList' (which are saved side effects)
+ *  The keepList is communicated using the walkData.pCallbackData field
+ *  Also the compCurBB must be set to the current BasicBlock  which contains
+ *  'deadTree' as we need to fetch the block weight when decrementing the ref counts.
+ */
+
+void Compiler::optRemoveTree(GenTreePtr deadTree, GenTreePtr keepList)
+{
+    // We communicate this value using the walkData.pCallbackData field
+    //
+    fgWalkTreePre(&deadTree, optRemoveTreeVisitor, (void*)keepList);
+}
+
+/*****************************************************************************
+ *
+ *  Given an array index node, mark it as not needing a range check.
+ */
+
+void Compiler::optRemoveRangeCheck(
+    GenTreePtr tree, GenTreePtr stmt, bool updateCSEcounts, unsigned sideEffFlags, bool forceRemove)
+{
+    GenTreePtr  add1;
+    GenTreePtr* addp;
+
+    GenTreePtr  nop1;
+    GenTreePtr* nopp;
+
+    GenTreePtr icon;
+    GenTreePtr mult;
+
+    GenTreePtr base;
+
+    ssize_t ival;
+
+#if !REARRANGE_ADDS
+    noway_assert(!"can't remove range checks without REARRANGE_ADDS right now");
+#endif
+
+    noway_assert(stmt->gtOper == GT_STMT);
+    noway_assert(tree->gtOper == GT_COMMA);
+    noway_assert(tree->gtOp.gtOp1->gtOper == GT_ARR_BOUNDS_CHECK);
+    noway_assert(forceRemove || optIsRangeCheckRemovable(tree->gtOp.gtOp1));
+
+    GenTreeBoundsChk* bndsChk = tree->gtOp.gtOp1->AsBoundsChk();
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("Before optRemoveRangeCheck:\n");
+        gtDispTree(tree);
+    }
+#endif
+
+    GenTreePtr sideEffList = nullptr;
+    if (sideEffFlags)
+    {
+        gtExtractSideEffList(tree->gtOp.gtOp1, &sideEffList, sideEffFlags);
+    }
+
+    // Decrement the ref counts for any LclVars that are being deleted
+    //
+    optRemoveTree(tree->gtOp.gtOp1, sideEffList);
+
+    // Just replace the bndsChk with a NOP as an operand to the GT_COMMA, if there are no side effects.
+    tree->gtOp.gtOp1 = (sideEffList != nullptr) ? sideEffList : gtNewNothingNode();
+
+    // TODO-CQ: We should also remove the GT_COMMA, but in any case we can no longer CSE the GT_COMMA.
+    tree->gtFlags |= GTF_DONT_CSE;
+
+    /* Recalculate the gtCostSz, etc... */
+    gtSetStmtInfo(stmt);
+
+    /* Re-thread the nodes if necessary */
+    if (fgStmtListThreaded)
+    {
+        fgSetStmtSeq(stmt);
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("After optRemoveRangeCheck:\n");
+        gtDispTree(tree);
+    }
+#endif
+}
+
+/*****************************************************************************
+ * Return the scale in an array reference, given a pointer to the
+ * multiplication node.
+ */
+
+ssize_t Compiler::optGetArrayRefScaleAndIndex(GenTreePtr mul, GenTreePtr* pIndex DEBUGARG(bool bRngChk))
+{
+    assert(mul);
+    assert(mul->gtOper == GT_MUL || mul->gtOper == GT_LSH);
+    assert(mul->gtOp.gtOp2->IsCnsIntOrI());
+
+    ssize_t scale = mul->gtOp.gtOp2->gtIntConCommon.IconValue();
+
+    if (mul->gtOper == GT_LSH)
+    {
+        scale = ((ssize_t)1) << scale;
+    }
+
+    GenTreePtr index = mul->gtOp.gtOp1;
+
+    if (index->gtOper == GT_MUL && index->gtOp.gtOp2->IsCnsIntOrI())
+    {
+        // case of two cascading multiplications for constant int (e.g.  * 20 morphed to * 5 * 4):
+        // When index->gtOper is GT_MUL and index->gtOp.gtOp2->gtOper is GT_CNS_INT (i.e. * 5),
+        //     we can bump up the scale from 4 to 5*4, and then change index to index->gtOp.gtOp1.
+        // Otherwise, we cannot optimize it. We will simply keep the original scale and index.
+        scale *= index->gtOp.gtOp2->gtIntConCommon.IconValue();
+        index = index->gtOp.gtOp1;
+    }
+
+    assert(!bRngChk || index->gtOper != GT_COMMA);
+
+    if (pIndex)
+    {
+        *pIndex = index;
+    }
+
+    return scale;
+}
+
+/*****************************************************************************
+ * Find the last assignment to of the local variable in the block. Return
+ * RHS or NULL. If any local variable in the RHS has been killed in
+ * intervening code, return NULL. If the variable being searched for is killed
+ * in the intervening code, return NULL.
+ *
+ */
+
+GenTreePtr Compiler::optFindLocalInit(BasicBlock* block,
+                                      GenTreePtr  local,
+                                      VARSET_TP*  pKilledInOut,
+                                      bool*       pLhsRhsKilledAfterInit)
+{
+    assert(pKilledInOut);
+    assert(pLhsRhsKilledAfterInit);
+
+    *pLhsRhsKilledAfterInit = false;
+
+    unsigned LclNum = local->gtLclVarCommon.gtLclNum;
+
+    GenTreePtr list = block->bbTreeList;
+    if (list == nullptr)
+    {
+        return nullptr;
+    }
+
+    GenTreePtr rhs  = nullptr;
+    GenTreePtr stmt = list;
+    do
+    {
+        stmt = stmt->gtPrev;
+        if (stmt == nullptr)
+        {
+            break;
+        }
+
+        GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+        // If we encounter an assignment to a local variable,
+        if ((tree->OperKind() & GTK_ASGOP) && tree->gtOp.gtOp1->gtOper == GT_LCL_VAR)
+        {
+            // And the assigned variable equals the input local,
+            if (tree->gtOp.gtOp1->gtLclVarCommon.gtLclNum == LclNum)
+            {
+                // If the assignment is '=' and it is not a conditional, then return rhs.
+                if (tree->gtOper == GT_ASG && !(tree->gtFlags & GTF_COLON_COND))
+                {
+                    rhs = tree->gtOp.gtOp2;
+                }
+                // If the assignment is 'op=' or a conditional equal, then the search ends here,
+                // as we found a kill to the input local.
+                else
+                {
+                    *pLhsRhsKilledAfterInit = true;
+                    assert(rhs == nullptr);
+                }
+                break;
+            }
+            else
+            {
+                LclVarDsc* varDsc = optIsTrackedLocal(tree->gtOp.gtOp1);
+                if (varDsc == nullptr)
+                {
+                    return nullptr;
+                }
+                VarSetOps::AddElemD(this, *pKilledInOut, varDsc->lvVarIndex);
+            }
+        }
+    } while (stmt != list);
+
+    if (rhs == nullptr)
+    {
+        return nullptr;
+    }
+
+    // If any local in the RHS is killed in intervening code, or RHS has an indirection, return NULL.
+    varRefKinds rhsRefs = VR_NONE;
+    VARSET_TP   VARSET_INIT_NOCOPY(rhsLocals, VarSetOps::UninitVal());
+    bool        b = lvaLclVarRefs(rhs, nullptr, &rhsRefs, &rhsLocals);
+    if (!b || !VarSetOps::IsEmptyIntersection(this, rhsLocals, *pKilledInOut) || (rhsRefs != VR_NONE))
+    {
+        // If RHS has been indirectly referenced, consider it a write and a kill.
+        *pLhsRhsKilledAfterInit = true;
+        return nullptr;
+    }
+
+    return rhs;
+}
+
+/*****************************************************************************
+ *
+ *  Return true if "op1" is guaranteed to be less then or equal to "op2".
+ */
+
+#if FANCY_ARRAY_OPT
+
+bool Compiler::optIsNoMore(GenTreePtr op1, GenTreePtr op2, int add1, int add2)
+{
+    if (op1->gtOper == GT_CNS_INT && op2->gtOper == GT_CNS_INT)
+    {
+        add1 += op1->gtIntCon.gtIconVal;
+        add2 += op2->gtIntCon.gtIconVal;
+    }
+    else
+    {
+        /* Check for +/- constant on either operand */
+
+        if (op1->gtOper == GT_ADD && op1->gtOp.gtOp2->gtOper == GT_CNS_INT)
+        {
+            add1 += op1->gtOp.gtOp2->gtIntCon.gtIconVal;
+            op1 = op1->gtOp.gtOp1;
+        }
+
+        if (op2->gtOper == GT_ADD && op2->gtOp.gtOp2->gtOper == GT_CNS_INT)
+        {
+            add2 += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
+            op2 = op2->gtOp.gtOp1;
+        }
+
+        /* We only allow local variable references */
+
+        if (op1->gtOper != GT_LCL_VAR)
+            return false;
+        if (op2->gtOper != GT_LCL_VAR)
+            return false;
+        if (op1->gtLclVarCommon.gtLclNum != op2->gtLclVarCommon.gtLclNum)
+            return false;
+
+        /* NOTE: Caller ensures that this variable has only one def */
+
+        // printf("limit [%d]:\n", add1); gtDispTree(op1);
+        // printf("size  [%d]:\n", add2); gtDispTree(op2);
+        // printf("\n");
+    }
+
+    return (bool)(add1 <= add2);
+}
+
+#endif
+
+//------------------------------------------------------------------------------
+// optObtainLoopCloningOpts: Identify optimization candidates and update
+//      the "context" for array optimizations.
+//
+// Arguments:
+//     context     -  data structure where all loop cloning info is kept. The
+//                    optInfo fields of the context are updated with the
+//                    identified optimization candidates.
+//
+void Compiler::optObtainLoopCloningOpts(LoopCloneContext* context)
+{
+    for (unsigned i = 0; i < optLoopCount; i++)
+    {
+        JITDUMP("Considering loop %d to clone for optimizations.\n", i);
+        if (optIsLoopClonable(i))
+        {
+            if (!(optLoopTable[i].lpFlags & LPFLG_REMOVED))
+            {
+                optIdentifyLoopOptInfo(i, context);
+            }
+        }
+        JITDUMP("------------------------------------------------------------\n");
+    }
+    JITDUMP("\n");
+}
+
+//------------------------------------------------------------------------
+// optIdentifyLoopOptInfo: Identify loop optimization candidates an also
+//      check if the loop is suitable for the optimizations performed.
+//
+// Arguments:
+//     loopNum     -  the current loop index for which conditions are derived.
+//     context     -  data structure where all loop cloning candidates will be
+//                    updated.
+//
+// Return Value:
+//     If the loop is not suitable for the optimizations, return false - context
+//     should not contain any optimization candidate for the loop if false.
+//     Else return true.
+//
+// Operation:
+//      Check if the loop is well formed for this optimization and identify the
+//      optimization candidates and update the "context" parameter with all the
+//      contextual information necessary to perform the optimization later.
+//
+bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* context)
+{
+    noway_assert(loopNum < optLoopCount);
+
+    LoopDsc* pLoop = &optLoopTable[loopNum];
+
+    if (!(pLoop->lpFlags & LPFLG_ITER))
+    {
+        JITDUMP("> No iter flag on loop %d.\n", loopNum);
+        return false;
+    }
+
+    unsigned ivLclNum = pLoop->lpIterVar();
+    if (lvaVarAddrExposed(ivLclNum))
+    {
+        JITDUMP("> Rejected V%02u as iter var because is address-exposed.\n", ivLclNum);
+        return false;
+    }
+
+    BasicBlock* head = pLoop->lpHead;
+    BasicBlock* end  = pLoop->lpBottom;
+    BasicBlock* beg  = head->bbNext;
+
+    if (end->bbJumpKind != BBJ_COND)
+    {
+        JITDUMP("> Couldn't find termination test.\n");
+        return false;
+    }
+
+    if (end->bbJumpDest != beg)
+    {
+        JITDUMP("> Branch at loop 'end' not looping to 'begin'.\n");
+        return false;
+    }
+
+    // TODO-CQ: CLONE: Mark increasing or decreasing loops.
+    if ((pLoop->lpIterOper() != GT_ASG_ADD && pLoop->lpIterOper() != GT_ADD) || (pLoop->lpIterConst() != 1))
+    {
+        JITDUMP("> Loop iteration operator not matching\n");
+        return false;
+    }
+
+    if ((pLoop->lpFlags & LPFLG_CONST_LIMIT) == 0 && (pLoop->lpFlags & LPFLG_VAR_LIMIT) == 0 &&
+        (pLoop->lpFlags & LPFLG_ARRLEN_LIMIT) == 0)
+    {
+        JITDUMP("> Loop limit is neither constant, variable or array length\n");
+        return false;
+    }
+
+    if (!(((pLoop->lpTestOper() == GT_LT || pLoop->lpTestOper() == GT_LE) &&
+           (pLoop->lpIterOper() == GT_ADD || pLoop->lpIterOper() == GT_ASG_ADD)) ||
+          ((pLoop->lpTestOper() == GT_GT || pLoop->lpTestOper() == GT_GE) &&
+           (pLoop->lpIterOper() == GT_SUB || pLoop->lpIterOper() == GT_ASG_SUB))))
+    {
+        JITDUMP("> Loop test (%s) doesn't agree with the direction (%s) of the pLoop->\n",
+                GenTree::NodeName(pLoop->lpTestOper()), GenTree::NodeName(pLoop->lpIterOper()));
+        return false;
+    }
+
+    if (!(pLoop->lpTestTree->OperKind() & GTK_RELOP) || !(pLoop->lpTestTree->gtFlags & GTF_RELOP_ZTT))
+    {
+        JITDUMP("> Loop inversion NOT present, loop test [%06u] may not protect entry from head.\n",
+                pLoop->lpTestTree->gtTreeID);
+        return false;
+    }
+
+#ifdef DEBUG
+    GenTreePtr op1 = pLoop->lpIterator();
+    noway_assert((op1->gtOper == GT_LCL_VAR) && (op1->gtLclVarCommon.gtLclNum == ivLclNum));
+#endif
+
+    JITDUMP("Checking blocks BB%02d..BB%02d for optimization candidates\n", beg->bbNum,
+            end->bbNext ? end->bbNext->bbNum : 0);
+
+    LoopCloneVisitorInfo info(context, loopNum, nullptr);
+    for (BasicBlock* block = beg; block != end->bbNext; block = block->bbNext)
+    {
+        compCurBB = block;
+        for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+        {
+            info.stmt = stmt;
+            fgWalkTreePre(&stmt->gtStmt.gtStmtExpr, optCanOptimizeByLoopCloningVisitor, &info, false, false);
+        }
+    }
+
+    return true;
+}
+
+//---------------------------------------------------------------------------------------------------------------
+//  optExtractArrIndex: Try to extract the array index from "tree".
+//
+//  Arguments:
+//      tree        the tree to be checked if it is the array [] operation.
+//      result      the extracted GT_INDEX information is updated in result.
+//      lhsNum      for the root level (function is recursive) callers should be BAD_VAR_NUM.
+//
+//  Return Value:
+//      Returns true if array index can be extracted, else, return false. See assumption about
+//      what will be extracted. The "result" variable's rank parameter is advanced for every
+//      dimension of [] encountered.
+//
+//  Operation:
+//      Given a "tree" extract the GT_INDEX node in "result" as ArrIndex. In FlowGraph morph
+//      we have converted a GT_INDEX tree into a scaled index base offset expression. We need
+//      to reconstruct this to be able to know if this is an array access.
+//
+//  Assumption:
+//      The method extracts only if the array base and indices are GT_LCL_VAR.
+//
+//  TODO-CQ: CLONE: After morph make sure this method extracts values before morph.
+//
+//    [000000001AF828D8] ---XG-------                     indir     int
+//    [000000001AF872C8] ------------                           const     long   16 Fseq[#FirstElem]
+//    [000000001AF87340] ------------                        +         byref
+//    [000000001AF87160] -------N----                                 const     long   2
+//    [000000001AF871D8] ------------                              <<        long
+//    [000000001AF870C0] ------------                                 cast      long <- int
+//    [000000001AF86F30] i-----------                                    lclVar    int    V04 loc0
+//    [000000001AF87250] ------------                           +         byref
+//    [000000001AF86EB8] ------------                              lclVar    ref    V01 arg1
+//    [000000001AF87468] ---XG-------                  comma     int
+//    [000000001AF87020] ---X--------                     arrBndsChk void
+//    [000000001AF86FA8] ---X--------                        arrLen    int
+//    [000000001AF827E8] ------------                           lclVar    ref    V01 arg1
+//    [000000001AF82860] ------------                        lclVar    int    V04 loc0
+//    [000000001AF829F0] -A-XG-------               =         int
+//    [000000001AF82978] D------N----                  lclVar    int    V06 tmp0
+//
+bool Compiler::optExtractArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum)
+{
+    if (tree->gtOper != GT_COMMA)
+    {
+        return false;
+    }
+    GenTreePtr before = tree->gtGetOp1();
+    if (before->gtOper != GT_ARR_BOUNDS_CHECK)
+    {
+        return false;
+    }
+    GenTreeBoundsChk* arrBndsChk = before->AsBoundsChk();
+    if (arrBndsChk->gtArrLen->gtGetOp1()->gtOper != GT_LCL_VAR)
+    {
+        return false;
+    }
+    if (arrBndsChk->gtIndex->gtOper != GT_LCL_VAR)
+    {
+        return false;
+    }
+    unsigned arrLcl = arrBndsChk->gtArrLen->gtGetOp1()->gtLclVarCommon.gtLclNum;
+    if (lhsNum != BAD_VAR_NUM && arrLcl != lhsNum)
+    {
+        return false;
+    }
+
+    unsigned indLcl = arrBndsChk->gtIndex->gtLclVarCommon.gtLclNum;
+
+    GenTreePtr after = tree->gtGetOp2();
+
+    if (after->gtOper != GT_IND)
+    {
+        return false;
+    }
+    // It used to be the case that arrBndsChks for struct types would fail the previous check because
+    // after->gtOper was an address (for a block op).  In order to avoid asmDiffs we will for now
+    // return false if the type of 'after' is a struct type.  (This was causing us to clone loops
+    // that we were not previously cloning.)
+    // TODO-1stClassStructs: Remove this check to enable optimization of array bounds checks for struct
+    // types.
+    if (varTypeIsStruct(after))
+    {
+        return false;
+    }
+
+    GenTreePtr sibo = after->gtGetOp1();
+    if (sibo->gtOper != GT_ADD)
+    {
+        return false;
+    }
+    GenTreePtr sib = sibo->gtGetOp1();
+    GenTreePtr ofs = sibo->gtGetOp2();
+    if (ofs->gtOper != GT_CNS_INT)
+    {
+        return false;
+    }
+    if (sib->gtOper != GT_ADD)
+    {
+        return false;
+    }
+    GenTreePtr si   = sib->gtGetOp2();
+    GenTreePtr base = sib->gtGetOp1();
+    if (si->gtOper != GT_LSH)
+    {
+        return false;
+    }
+    if (base->OperGet() != GT_LCL_VAR || base->gtLclVarCommon.gtLclNum != arrLcl)
+    {
+        return false;
+    }
+    GenTreePtr scale = si->gtGetOp2();
+    GenTreePtr index = si->gtGetOp1();
+    if (scale->gtOper != GT_CNS_INT)
+    {
+        return false;
+    }
+#ifdef _TARGET_AMD64_
+    if (index->gtOper != GT_CAST)
+    {
+        return false;
+    }
+    GenTreePtr indexVar = index->gtGetOp1();
+#else
+    GenTreePtr indexVar = index;
+#endif
+    if (indexVar->gtOper != GT_LCL_VAR || indexVar->gtLclVarCommon.gtLclNum != indLcl)
+    {
+        return false;
+    }
+    if (lhsNum == BAD_VAR_NUM)
+    {
+        result->arrLcl = arrLcl;
+    }
+    result->indLcls.Push(indLcl);
+    result->bndsChks.Push(tree);
+    result->useBlock = compCurBB;
+    result->rank++;
+
+    return true;
+}
+
+//---------------------------------------------------------------------------------------------------------------
+//  optReconstructArrIndex: Reconstruct array index.
+//
+//  Arguments:
+//      tree        the tree to be checked if it is an array [][][] operation.
+//      result      the extracted GT_INDEX information.
+//      lhsNum      for the root level (function is recursive) callers should be BAD_VAR_NUM.
+//
+//  Return Value:
+//      Returns true if array index can be extracted, else, return false. "rank" field in
+//      "result" contains the array access depth. The "indLcls" fields contain the indices.
+//
+//  Operation:
+//      Recursively look for a list of array indices. In the example below, we encounter,
+//      V03 = ((V05 = V00[V01]), (V05[V02])) which corresponds to access of V00[V01][V02]
+//      The return value would then be:
+//      ArrIndex result { arrLcl: V00, indLcls: [V01, V02], rank: 2 }
+//
+//      V00[V01][V02] would be morphed as:
+//
+//      [000000001B366848] ---XG-------                        indir     int
+//      [000000001B36BC50] ------------                                 V05 + (V02 << 2) + 16
+//      [000000001B36C200] ---XG-------                     comma     int
+//      [000000001B36BDB8] ---X--------                        arrBndsChk(V05, V02)
+//      [000000001B36C278] -A-XG-------                  comma     int
+//      [000000001B366730] R--XG-------                           indir     ref
+//      [000000001B36C2F0] ------------                             V00 + (V01 << 3) + 24
+//      [000000001B36C818] ---XG-------                        comma     ref
+//      [000000001B36C458] ---X--------                           arrBndsChk(V00, V01)
+//      [000000001B36BB60] -A-XG-------                     =         ref
+//      [000000001B36BAE8] D------N----                        lclVar    ref    V05 tmp2
+//      [000000001B36A668] -A-XG-------               =         int
+//      [000000001B36A5F0] D------N----                  lclVar    int    V03 tmp0
+//
+//  Assumption:
+//      The method extracts only if the array base and indices are GT_LCL_VAR.
+//
+bool Compiler::optReconstructArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lhsNum)
+{
+    // If we can extract "tree" (which is a top level comma) return.
+    if (optExtractArrIndex(tree, result, lhsNum))
+    {
+        return true;
+    }
+    // We have a comma (check if array base expr is computed in "before"), descend further.
+    else if (tree->OperGet() == GT_COMMA)
+    {
+        GenTreePtr before = tree->gtGetOp1();
+        // "before" should evaluate an array base for the "after" indexing.
+        if (before->OperGet() != GT_ASG)
+        {
+            return false;
+        }
+        GenTreePtr lhs = before->gtGetOp1();
+        GenTreePtr rhs = before->gtGetOp2();
+
+        // "rhs" should contain an GT_INDEX
+        if (!lhs->IsLocal() || !optReconstructArrIndex(rhs, result, lhsNum))
+        {
+            return false;
+        }
+        unsigned   lhsNum = lhs->gtLclVarCommon.gtLclNum;
+        GenTreePtr after  = tree->gtGetOp2();
+        // Pass the "lhsNum", so we can verify if indeed it is used as the array base.
+        return optExtractArrIndex(after, result, lhsNum);
+    }
+    return false;
+}
+
+/* static */
+Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTreePtr* pTree, Compiler::fgWalkData* data)
+{
+    return data->compiler->optCanOptimizeByLoopCloning(*pTree, (LoopCloneVisitorInfo*)data->pCallbackData);
+}
+
+//-------------------------------------------------------------------------
+//  optIsStackLocalInvariant: Is stack local invariant in loop.
+//
+//  Arguments:
+//      loopNum      The loop in which the variable is tested for invariance.
+//      lclNum       The local that is tested for invariance in the loop.
+//
+//  Return Value:
+//      Returns true if the variable is loop invariant in loopNum.
+//
+bool Compiler::optIsStackLocalInvariant(unsigned loopNum, unsigned lclNum)
+{
+    if (lvaVarAddrExposed(lclNum))
+    {
+        return false;
+    }
+    if (optIsVarAssgLoop(loopNum, lclNum))
+    {
+        return false;
+    }
+    return true;
+}
+
+//----------------------------------------------------------------------------------------------
+//  optCanOptimizeByLoopCloning: Check if the tree can be optimized by loop cloning and if so,
+//      identify as potential candidate and update the loop context.
+//
+//  Arguments:
+//      tree         The tree encountered during the tree walk.
+//      info         Supplies information about the current block or stmt in which the tree is.
+//                   Also supplies the "context" pointer for updating with loop cloning
+//                   candidates. Also supplies loopNum.
+//
+//  Operation:
+//      If array index can be reconstructed, check if the iter var of the loop matches the
+//      array index var in some dim. Also ensure other index vars before the identified
+//      dim are loop invariant.
+//
+//  Return Value:
+//      Skip sub trees if the optimization candidate is identified or else continue walking
+//
+Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTreePtr tree, LoopCloneVisitorInfo* info)
+{
+    ArrIndex arrIndex(getAllocator());
+
+    // Check if array index can be optimized.
+    if (optReconstructArrIndex(tree, &arrIndex, BAD_VAR_NUM))
+    {
+        assert(tree->gtOper == GT_COMMA);
+#ifdef DEBUG
+        if (verbose)
+        {
+            JITDUMP("Found ArrIndex at tree ");
+            printTreeID(tree);
+            printf(" which is equivalent to: ");
+            arrIndex.Print();
+            JITDUMP("\n");
+        }
+#endif
+        if (!optIsStackLocalInvariant(info->loopNum, arrIndex.arrLcl))
+        {
+            return WALK_SKIP_SUBTREES;
+        }
+
+        // Walk the dimensions and see if iterVar of the loop is used as index.
+        for (unsigned dim = 0; dim < arrIndex.rank; ++dim)
+        {
+            // Is index variable also used as the loop iter var.
+            if (arrIndex.indLcls[dim] == optLoopTable[info->loopNum].lpIterVar())
+            {
+                // Check the previous indices are all loop invariant.
+                for (unsigned dim2 = 0; dim2 < dim; ++dim2)
+                {
+                    if (optIsVarAssgLoop(info->loopNum, arrIndex.indLcls[dim2]))
+                    {
+                        JITDUMP("V%02d is assigned in loop\n", arrIndex.indLcls[dim2]);
+                        return WALK_SKIP_SUBTREES;
+                    }
+                }
+#ifdef DEBUG
+                if (verbose)
+                {
+                    JITDUMP("Loop %d can be cloned for ArrIndex ", info->loopNum);
+                    arrIndex.Print();
+                    JITDUMP(" on dim %d\n", dim);
+                }
+#endif
+                // Update the loop context.
+                info->context->EnsureLoopOptInfo(info->loopNum)
+                    ->Push(new (this, CMK_LoopOpt) LcJaggedArrayOptInfo(arrIndex, dim, info->stmt));
+            }
+            else
+            {
+                JITDUMP("Induction V%02d is not used as index on dim %d\n", optLoopTable[info->loopNum].lpIterVar(),
+                        dim);
+            }
+        }
+        return WALK_SKIP_SUBTREES;
+    }
+    else if (tree->gtOper == GT_ARR_ELEM)
+    {
+        // TODO-CQ: CLONE: Implement.
+        return WALK_SKIP_SUBTREES;
+    }
+    return WALK_CONTINUE;
+}
+
+struct optRangeCheckDsc
+{
+    Compiler* pCompiler;
+    bool      bValidIndex;
+};
+/*
+    Walk to make sure that only locals and constants are contained in the index
+    for a range check
+*/
+Compiler::fgWalkResult Compiler::optValidRangeCheckIndex(GenTreePtr* pTree, fgWalkData* data)
+{
+    GenTreePtr        tree  = *pTree;
+    optRangeCheckDsc* pData = (optRangeCheckDsc*)data->pCallbackData;
+
+    if (tree->gtOper == GT_IND || tree->gtOper == GT_CLS_VAR || tree->gtOper == GT_FIELD || tree->gtOper == GT_LCL_FLD)
+    {
+        pData->bValidIndex = false;
+        return WALK_ABORT;
+    }
+
+    if (tree->gtOper == GT_LCL_VAR)
+    {
+        if (pData->pCompiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvAddrExposed)
+        {
+            pData->bValidIndex = false;
+            return WALK_ABORT;
+        }
+    }
+
+    return WALK_CONTINUE;
+}
+
+/*
+    returns true if a range check can legally be removed (for the moment it checks
+    that the array is a local array (non subject to racing conditions) and that the
+    index is either a constant or a local
+*/
+bool Compiler::optIsRangeCheckRemovable(GenTreePtr tree)
+{
+    noway_assert(tree->gtOper == GT_ARR_BOUNDS_CHECK);
+    GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
+    GenTreePtr        pArray  = bndsChk->GetArray();
+    if (pArray == nullptr && !bndsChk->gtArrLen->IsCnsIntOrI())
+    {
+        return false;
+    }
+    GenTreePtr pIndex = bndsChk->gtIndex;
+
+    // The length must be a constant (the pArray == NULL case) or the array reference must be a local.
+    // Otherwise we can be targeted by malicious race-conditions.
+    if (pArray != nullptr)
+    {
+        if (pArray->gtOper != GT_LCL_VAR)
+        {
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("Can't remove range check if the array isn't referenced with a local\n");
+                gtDispTree(pArray);
+            }
+#endif
+            return false;
+        }
+        else
+        {
+            noway_assert(pArray->gtType == TYP_REF);
+            noway_assert(pArray->gtLclVarCommon.gtLclNum < lvaCount);
+
+            if (lvaTable[pArray->gtLclVarCommon.gtLclNum].lvAddrExposed)
+            {
+                // If the array address has been taken, don't do the optimization
+                // (this restriction can be lowered a bit, but i don't think it's worth it)
+                CLANG_FORMAT_COMMENT_ANCHOR;
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("Can't remove range check if the array has its address taken\n");
+                    gtDispTree(pArray);
+                }
+#endif
+                return false;
+            }
+        }
+    }
+
+    optRangeCheckDsc Data;
+    Data.pCompiler   = this;
+    Data.bValidIndex = true;
+
+    fgWalkTreePre(&pIndex, optValidRangeCheckIndex, &Data);
+
+    if (!Data.bValidIndex)
+    {
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("Can't remove range check with this index");
+            gtDispTree(pIndex);
+        }
+#endif
+
+        return false;
+    }
+
+    return true;
+}
+
+/******************************************************************************
+ *
+ * Replace x==null with (x|x)==0 if x is a GC-type.
+ * This will stress code-gen and the emitter to make sure they support such trees.
+ */
+
+#ifdef DEBUG
+
+void Compiler::optOptimizeBoolsGcStress(BasicBlock* condBlock)
+{
+    if (!compStressCompile(STRESS_OPT_BOOLS_GC, 20))
+    {
+        return;
+    }
+
+    noway_assert(condBlock->bbJumpKind == BBJ_COND);
+    GenTreePtr condStmt = condBlock->bbTreeList->gtPrev->gtStmt.gtStmtExpr;
+
+    noway_assert(condStmt->gtOper == GT_JTRUE);
+
+    bool       isBool;
+    GenTreePtr relop;
+
+    GenTreePtr comparand = optIsBoolCond(condStmt, &relop, &isBool);
+
+    if (comparand == nullptr || !varTypeIsGC(comparand->TypeGet()))
+    {
+        return;
+    }
+
+    if (comparand->gtFlags & (GTF_ASG | GTF_CALL | GTF_ORDER_SIDEEFF))
+    {
+        return;
+    }
+
+    GenTreePtr comparandClone = gtCloneExpr(comparand);
+
+    // Bump up the ref-counts of any variables in 'comparandClone'
+    compCurBB = condBlock;
+    fgWalkTreePre(&comparandClone, Compiler::lvaIncRefCntsCB, (void*)this, true);
+
+    noway_assert(relop->gtOp.gtOp1 == comparand);
+    genTreeOps oper   = compStressCompile(STRESS_OPT_BOOLS_GC, 50) ? GT_OR : GT_AND;
+    relop->gtOp.gtOp1 = gtNewOperNode(oper, TYP_I_IMPL, comparand, comparandClone);
+
+    // Comparand type is already checked, and we have const int, there is no harm
+    // morphing it into a TYP_I_IMPL.
+    noway_assert(relop->gtOp.gtOp2->gtOper == GT_CNS_INT);
+    relop->gtOp.gtOp2->gtType = TYP_I_IMPL;
+}
+
+#endif
+
+/******************************************************************************
+ * Function used by folding of boolean conditionals
+ * Given a GT_JTRUE node, checks that it is a boolean comparison of the form
+ *    "if (boolVal ==/!=  0/1)". This is translated into a GT_EQ node with "op1"
+ *    being a boolean lclVar and "op2" the const 0/1.
+ * On success, the comparand (ie. boolVal) is returned.   Else NULL.
+ * compPtr returns the compare node (i.e. GT_EQ or GT_NE node)
+ * boolPtr returns whether the comparand is a boolean value (must be 0 or 1).
+ * When return boolPtr equal to true, if the comparison was against a 1 (i.e true)
+ * value then we morph the tree by reversing the GT_EQ/GT_NE and change the 1 to 0.
+ */
+
+GenTree* Compiler::optIsBoolCond(GenTree* condBranch, GenTree** compPtr, bool* boolPtr)
+{
+    bool isBool = false;
+
+    noway_assert(condBranch->gtOper == GT_JTRUE);
+    GenTree* cond = condBranch->gtOp.gtOp1;
+
+    /* The condition must be "!= 0" or "== 0" */
+
+    if ((cond->gtOper != GT_EQ) && (cond->gtOper != GT_NE))
+    {
+        return nullptr;
+    }
+
+    /* Return the compare node to the caller */
+
+    *compPtr = cond;
+
+    /* Get hold of the comparands */
+
+    GenTree* opr1 = cond->gtOp.gtOp1;
+    GenTree* opr2 = cond->gtOp.gtOp2;
+
+    if (opr2->gtOper != GT_CNS_INT)
+    {
+        return nullptr;
+    }
+
+    if (!opr2->IsIntegralConst(0) && !opr2->IsIntegralConst(1))
+    {
+        return nullptr;
+    }
+
+    ssize_t ival2 = opr2->gtIntCon.gtIconVal;
+
+    /* Is the value a boolean?
+     * We can either have a boolean expression (marked GTF_BOOLEAN) or
+     * a local variable that is marked as being boolean (lvIsBoolean) */
+
+    if (opr1->gtFlags & GTF_BOOLEAN)
+    {
+        isBool = true;
+    }
+    else if ((opr1->gtOper == GT_CNS_INT) && (opr1->IsIntegralConst(0) || opr1->IsIntegralConst(1)))
+    {
+        isBool = true;
+    }
+    else if (opr1->gtOper == GT_LCL_VAR)
+    {
+        /* is it a boolean local variable */
+
+        unsigned lclNum = opr1->gtLclVarCommon.gtLclNum;
+        noway_assert(lclNum < lvaCount);
+
+        if (lvaTable[lclNum].lvIsBoolean)
+        {
+            isBool = true;
+        }
+    }
+
+    /* Was our comparison against the constant 1 (i.e. true) */
+    if (ival2 == 1)
+    {
+        // If this is a boolean expression tree we can reverse the relop
+        // and change the true to false.
+        if (isBool)
+        {
+            gtReverseCond(cond);
+            opr2->gtIntCon.gtIconVal = 0;
+        }
+        else
+        {
+            return nullptr;
+        }
+    }
+
+    *boolPtr = isBool;
+    return opr1;
+}
+
+void Compiler::optOptimizeBools()
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("*************** In optOptimizeBools()\n");
+        if (verboseTrees)
+        {
+            printf("Blocks/Trees before phase\n");
+            fgDispBasicBlocks(true);
+        }
+    }
+#endif
+    bool change;
+
+    do
+    {
+        change = false;
+
+        for (BasicBlock* b1 = fgFirstBB; b1; b1 = b1->bbNext)
+        {
+            /* We're only interested in conditional jumps here */
+
+            if (b1->bbJumpKind != BBJ_COND)
+            {
+                continue;
+            }
+
+            /* If there is no next block, we're done */
+
+            BasicBlock* b2 = b1->bbNext;
+            if (!b2)
+            {
+                break;
+            }
+
+            /* The next block must not be marked as BBF_DONT_REMOVE */
+            if (b2->bbFlags & BBF_DONT_REMOVE)
+            {
+                continue;
+            }
+
+            /* The next block also needs to be a condition */
+
+            if (b2->bbJumpKind != BBJ_COND)
+            {
+#ifdef DEBUG
+                optOptimizeBoolsGcStress(b1);
+#endif
+                continue;
+            }
+
+            bool sameTarget; // Do b1 and b2 have the same bbJumpDest?
+
+            if (b1->bbJumpDest == b2->bbJumpDest)
+            {
+                /* Given the following sequence of blocks :
+                        B1: brtrue(t1, BX)
+                        B2: brtrue(t2, BX)
+                        B3:
+                   we wil try to fold it to :
+                        B1: brtrue(t1|t2, BX)
+                        B3:
+                */
+
+                sameTarget = true;
+            }
+            else if (b1->bbJumpDest == b2->bbNext) /*b1->bbJumpDest->bbNum == n1+2*/
+            {
+                /* Given the following sequence of blocks :
+                        B1: brtrue(t1, B3)
+                        B2: brtrue(t2, BX)
+                        B3:
+                   we will try to fold it to :
+                        B1: brtrue((!t1)&&t2, B3)
+                        B3:
+                */
+
+                sameTarget = false;
+            }
+            else
+            {
+                continue;
+            }
+
+            /* The second block must contain a single statement */
+
+            GenTreePtr s2 = b2->bbTreeList;
+            if (s2->gtPrev != s2)
+            {
+                continue;
+            }
+
+            noway_assert(s2->gtOper == GT_STMT);
+            GenTreePtr t2 = s2->gtStmt.gtStmtExpr;
+            noway_assert(t2->gtOper == GT_JTRUE);
+
+            /* Find the condition for the first block */
+
+            GenTreePtr s1 = b1->bbTreeList->gtPrev;
+
+            noway_assert(s1->gtOper == GT_STMT);
+            GenTreePtr t1 = s1->gtStmt.gtStmtExpr;
+            noway_assert(t1->gtOper == GT_JTRUE);
+
+            if (b2->countOfInEdges() > 1)
+            {
+                continue;
+            }
+
+            /* Find the branch conditions of b1 and b2 */
+
+            bool bool1, bool2;
+
+            GenTreePtr c1 = optIsBoolCond(t1, &t1, &bool1);
+            if (!c1)
+            {
+                continue;
+            }
+
+            GenTreePtr c2 = optIsBoolCond(t2, &t2, &bool2);
+            if (!c2)
+            {
+                continue;
+            }
+
+            noway_assert(t1->gtOper == GT_EQ || t1->gtOper == GT_NE && t1->gtOp.gtOp1 == c1);
+            noway_assert(t2->gtOper == GT_EQ || t2->gtOper == GT_NE && t2->gtOp.gtOp1 == c2);
+
+            // Leave out floats where the bit-representation is more complicated
+            // - there are two representations for 0.
+            //
+            if (varTypeIsFloating(c1->TypeGet()) || varTypeIsFloating(c2->TypeGet()))
+            {
+                continue;
+            }
+
+            // Make sure the types involved are of the same sizes
+            if (genTypeSize(c1->TypeGet()) != genTypeSize(c2->TypeGet()))
+            {
+                continue;
+            }
+            if (genTypeSize(t1->TypeGet()) != genTypeSize(t2->TypeGet()))
+            {
+                continue;
+            }
+#ifdef _TARGET_ARMARCH_
+            // Skip the small operand which we cannot encode.
+            if (varTypeIsSmall(c1->TypeGet()))
+                continue;
+#endif
+            /* The second condition must not contain side effects */
+
+            if (c2->gtFlags & GTF_GLOB_EFFECT)
+            {
+                continue;
+            }
+
+            /* The second condition must not be too expensive */
+
+            gtPrepareCost(c2);
+
+            if (c2->gtCostEx > 12)
+            {
+                continue;
+            }
+
+            genTreeOps foldOp;
+            genTreeOps cmpOp;
+            var_types  foldType = c1->TypeGet();
+            if (varTypeIsGC(foldType))
+            {
+                foldType = TYP_I_IMPL;
+            }
+
+            if (sameTarget)
+            {
+                /* Both conditions must be the same */
+
+                if (t1->gtOper != t2->gtOper)
+                {
+                    continue;
+                }
+
+                if (t1->gtOper == GT_EQ)
+                {
+                    /* t1:c1==0 t2:c2==0 ==> Branch to BX if either value is 0
+                       So we will branch to BX if (c1&c2)==0 */
+
+                    foldOp = GT_AND;
+                    cmpOp  = GT_EQ;
+                }
+                else
+                {
+                    /* t1:c1!=0 t2:c2!=0 ==> Branch to BX if either value is non-0
+                       So we will branch to BX if (c1|c2)!=0 */
+
+                    foldOp = GT_OR;
+                    cmpOp  = GT_NE;
+                }
+            }
+            else
+            {
+                /* The b1 condition must be the reverse of the b2 condition */
+
+                if (t1->gtOper == t2->gtOper)
+                {
+                    continue;
+                }
+
+                if (t1->gtOper == GT_EQ)
+                {
+                    /* t1:c1==0 t2:c2!=0 ==> Branch to BX if both values are non-0
+                       So we will branch to BX if (c1&c2)!=0 */
+
+                    foldOp = GT_AND;
+                    cmpOp  = GT_NE;
+                }
+                else
+                {
+                    /* t1:c1!=0 t2:c2==0 ==> Branch to BX if both values are 0
+                       So we will branch to BX if (c1|c2)==0 */
+
+                    foldOp = GT_OR;
+                    cmpOp  = GT_EQ;
+                }
+            }
+
+            // Anding requires both values to be 0 or 1
+
+            if ((foldOp == GT_AND) && (!bool1 || !bool2))
+            {
+                continue;
+            }
+
+            //
+            // Now update the trees
+            //
+            GenTreePtr cmpOp1 = gtNewOperNode(foldOp, foldType, c1, c2);
+            if (bool1 && bool2)
+            {
+                /* When we 'OR'/'AND' two booleans, the result is boolean as well */
+                cmpOp1->gtFlags |= GTF_BOOLEAN;
+            }
+
+            t1->SetOper(cmpOp);
+            t1->gtOp.gtOp1         = cmpOp1;
+            t1->gtOp.gtOp2->gtType = foldType; // Could have been varTypeIsGC()
+
+#if FEATURE_SET_FLAGS
+            // For comparisons against zero we will have the GTF_SET_FLAGS set
+            // and this can cause an assert to fire in fgMoveOpsLeft(GenTreePtr tree)
+            // during the CSE phase.
+            //
+            // So make sure to clear any GTF_SET_FLAGS bit on these operations
+            // as they are no longer feeding directly into a comparisons against zero
+
+            // Make sure that the GTF_SET_FLAGS bit is cleared.
+            // Fix 388436 ARM JitStress WP7
+            c1->gtFlags &= ~GTF_SET_FLAGS;
+            c2->gtFlags &= ~GTF_SET_FLAGS;
+
+            // The new top level node that we just created does feed directly into
+            // a comparison against zero, so set the GTF_SET_FLAGS bit so that
+            // we generate an instuction that sets the flags, which allows us
+            // to omit the cmp with zero instruction.
+
+            // Request that the codegen for cmpOp1 sets the condition flags
+            // when it generates the code for cmpOp1.
+            //
+            cmpOp1->gtRequestSetFlags();
+#endif
+
+            flowList* edge1 = fgGetPredForBlock(b1->bbJumpDest, b1);
+            flowList* edge2;
+
+            /* Modify the target of the conditional jump and update bbRefs and bbPreds */
+
+            if (sameTarget)
+            {
+                edge2 = fgGetPredForBlock(b2->bbJumpDest, b2);
+            }
+            else
+            {
+                edge2 = fgGetPredForBlock(b2->bbNext, b2);
+
+                fgRemoveRefPred(b1->bbJumpDest, b1);
+
+                b1->bbJumpDest = b2->bbJumpDest;
+
+                fgAddRefPred(b2->bbJumpDest, b1);
+            }
+
+            noway_assert(edge1 != nullptr);
+            noway_assert(edge2 != nullptr);
+
+            BasicBlock::weight_t edgeSumMin = edge1->flEdgeWeightMin + edge2->flEdgeWeightMin;
+            BasicBlock::weight_t edgeSumMax = edge1->flEdgeWeightMax + edge2->flEdgeWeightMax;
+            if ((edgeSumMax >= edge1->flEdgeWeightMax) && (edgeSumMax >= edge2->flEdgeWeightMax))
+            {
+                edge1->flEdgeWeightMin = edgeSumMin;
+                edge1->flEdgeWeightMax = edgeSumMax;
+            }
+            else
+            {
+                edge1->flEdgeWeightMin = BB_ZERO_WEIGHT;
+                edge1->flEdgeWeightMax = BB_MAX_WEIGHT;
+            }
+
+            /* Get rid of the second block (which is a BBJ_COND) */
+
+            noway_assert(b1->bbJumpKind == BBJ_COND);
+            noway_assert(b2->bbJumpKind == BBJ_COND);
+            noway_assert(b1->bbJumpDest == b2->bbJumpDest);
+            noway_assert(b1->bbNext == b2);
+            noway_assert(b2->bbNext);
+
+            fgUnlinkBlock(b2);
+            b2->bbFlags |= BBF_REMOVED;
+
+            // If b2 was the last block of a try or handler, update the EH table.
+
+            ehUpdateForDeletedBlock(b2);
+
+            /* Update bbRefs and bbPreds */
+
+            /* Replace pred 'b2' for 'b2->bbNext' with 'b1'
+             * Remove  pred 'b2' for 'b2->bbJumpDest' */
+
+            fgReplacePred(b2->bbNext, b2, b1);
+
+            fgRemoveRefPred(b2->bbJumpDest, b2);
+
+            /* Update the block numbers and try again */
+
+            change = true;
+            /*
+                        do
+                        {
+                            b2->bbNum = ++n1;
+                            b2 = b2->bbNext;
+                        }
+                        while (b2);
+            */
+
+            // Update loop table
+            fgUpdateLoopsAfterCompacting(b1, b2);
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("Folded %sboolean conditions of BB%02u and BB%02u to :\n", c2->OperIsLeaf() ? "" : "non-leaf ",
+                       b1->bbNum, b2->bbNum);
+                gtDispTree(s1);
+                printf("\n");
+            }
+#endif
+        }
+    } while (change);
+
+#ifdef DEBUG
+    fgDebugCheckBBlist();
+#endif
+}
diff --git a/src/jit/phase.h b/src/jit/phase.h
new file mode 100644
index 0000000000..d8e2940089
--- /dev/null
+++ b/src/jit/phase.h
@@ -0,0 +1,77 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+#ifndef _PHASE_H_
+#define _PHASE_H_
+
+class Phase
+{
+public:
+    virtual void Run();
+
+protected:
+    Phase(Compiler* _comp, const char* _name, Phases _phase = PHASE_NUMBER_OF) : comp(_comp), name(_name), phase(_phase)
+    {
+    }
+
+    virtual void PrePhase();
+    virtual void DoPhase() = 0;
+    virtual void PostPhase();
+
+    Compiler*   comp;
+    const char* name;
+    Phases      phase;
+};
+
+inline void Phase::Run()
+{
+    PrePhase();
+    DoPhase();
+    PostPhase();
+}
+
+inline void Phase::PrePhase()
+{
+#ifdef DEBUG
+    if (VERBOSE)
+    {
+        printf("*************** In %s\n", name);
+        printf("Trees before %s\n", name);
+        comp->fgDispBasicBlocks(true);
+    }
+
+    if (comp->expensiveDebugCheckLevel >= 2)
+    {
+        // If everyone used the Phase class, this would duplicate the PostPhase() from the previous phase.
+        // But, not everyone does, so go ahead and do the check here, too.
+        comp->fgDebugCheckBBlist();
+        comp->fgDebugCheckLinks();
+    }
+#endif // DEBUG
+}
+
+inline void Phase::PostPhase()
+{
+#ifdef DEBUG
+    if (VERBOSE)
+    {
+        printf("*************** Exiting %s\n", name);
+        printf("Trees after %s\n", name);
+        comp->fgDispBasicBlocks(true);
+    }
+#endif // DEBUG
+
+    if (phase != PHASE_NUMBER_OF)
+    {
+        comp->EndPhase(phase);
+    }
+
+#ifdef DEBUG
+    comp->fgDebugCheckBBlist();
+    comp->fgDebugCheckLinks();
+#endif // DEBUG
+}
+
+#endif /* End of _PHASE_H_ */
diff --git a/src/jit/protojit/.gitmirror b/src/jit/protojit/.gitmirror
new file mode 100644
index 0000000000..f507630f94
--- /dev/null
+++ b/src/jit/protojit/.gitmirror
@@ -0,0 +1 @@
+Only contents of this folder, excluding subfolders, will be mirrored by the Git-TFS Mirror. 
+\ No newline at end of file
diff --git a/src/jit/protojit/CMakeLists.txt b/src/jit/protojit/CMakeLists.txt
new file mode 100644
index 0000000000..e3cc769ba0
--- /dev/null
+++ b/src/jit/protojit/CMakeLists.txt
@@ -0,0 +1,51 @@
+project(protojit)
+
+add_definitions(-DALT_JIT)
+add_definitions(-DFEATURE_NO_HOST)
+add_definitions(-DSELF_NO_HOST)
+remove_definitions(-DFEATURE_MERGE_JIT_AND_ENGINE)
+
+add_library_clr(protojit
+   SHARED
+   ${SHARED_LIB_SOURCES}
+)
+
+add_dependencies(protojit jit_exports)
+
+set_property(TARGET protojit APPEND_STRING PROPERTY LINK_FLAGS ${JIT_EXPORTS_LINKER_OPTION})
+set_property(TARGET protojit APPEND_STRING PROPERTY LINK_DEPENDS ${JIT_EXPORTS_FILE})
+
+set(RYUJIT_LINK_LIBRARIES
+   utilcodestaticnohost
+   gcinfo
+)
+
+if(CLR_CMAKE_PLATFORM_UNIX)
+    list(APPEND RYUJIT_LINK_LIBRARIES
+       mscorrc_debug
+       coreclrpal
+       palrt
+    )
+else()
+    list(APPEND RYUJIT_LINK_LIBRARIES
+       msvcrt.lib
+       kernel32.lib
+       advapi32.lib
+       ole32.lib
+       oleaut32.lib
+       uuid.lib
+       user32.lib
+       version.lib
+       shlwapi.lib
+       bcrypt.lib
+       crypt32.lib
+       RuntimeObject.lib
+    )
+endif(CLR_CMAKE_PLATFORM_UNIX)
+
+target_link_libraries(protojit
+   ${RYUJIT_LINK_LIBRARIES}
+)
+
+# add the install targets
+install_clr(protojit)
diff --git a/src/jit/protojit/SOURCES b/src/jit/protojit/SOURCES
new file mode 100644
index 0000000000..5f46bf8aad
--- /dev/null
+++ b/src/jit/protojit/SOURCES
@@ -0,0 +1,10 @@
+
+#
+# DO NOT EDIT THIS FILE!!! Modify the project file in this directory
+# This file merely allows the MSBuild project file in this directory to be integrated with Build.Exe
+#
+TARGETTYPE=NOTARGET
+CLR_TARGETTYPE=DLL
+MSBuildProjectFile=protojit.nativeproj
+SOURCES=
+    
diff --git a/src/jit/protojit/makefile b/src/jit/protojit/makefile
new file mode 100644
index 0000000000..84abb1cb0d
--- /dev/null
+++ b/src/jit/protojit/makefile
@@ -0,0 +1,7 @@
+
+#
+# DO NOT EDIT THIS FILE!!! Modify the project file in this directory
+# This file merely allows the MSBuild project file in this directory to be integrated with Build.Exe
+#
+!INCLUDE $(NTMAKEENV)\msbuild.def
+    
+\ No newline at end of file
diff --git a/src/jit/protojit/protojit.def b/src/jit/protojit/protojit.def
new file mode 100644
index 0000000000..1603af74ca
--- /dev/null
+++ b/src/jit/protojit/protojit.def
@@ -0,0 +1,7 @@
+; Licensed to the .NET Foundation under one or more agreements.
+; The .NET Foundation licenses this file to you under the MIT license.
+; See the LICENSE file in the project root for more information.
+EXPORTS
+    getJit
+    jitStartup
+    sxsJitStartup
diff --git a/src/jit/protojit/protojit.nativeproj b/src/jit/protojit/protojit.nativeproj
new file mode 100644
index 0000000000..3de0f0aeed
--- /dev/null
+++ b/src/jit/protojit/protojit.nativeproj
@@ -0,0 +1,88 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003" ToolsVersion="dogfood">
+
+  <!--
+      PROTO JIT: The purpose of this module is to provide an isolated environment to develop
+      the RyuJIT backend without interfering with the development of the frontend.  The
+      idea is to fork codegen and registerfp, that way we leave the PUCLR backend intact so 
+      it can be still consumed by the RyuJIT frontend separately maintaining the code stability 
+      of the PUCLR codegen.cpp logic.
+
+      This module is meant to be a 'development' JIT, i.e. try to use the generated code by this JIT
+      and in case something goes wrong, fallback to the default JIT.
+  -->
+
+  <!-- Import the CLR's settings -->
+
+  <Import Project="$(_NTDRIVE)$(_NTROOT)\ndp\clr\clr.props" />
+
+  <PropertyGroup>
+
+    <!-- Set the output -->
+
+    <OutputName>protojit</OutputName>
+    <StaticLinkJit>$(FeatureMergeJitAndEngine)</StaticLinkJit>    
+    <FeatureMergeJitAndEngine>false</FeatureMergeJitAndEngine>
+    <TargetType>DYNLINK</TargetType>
+    <FileToMarkForSigning>$(BinariesDirectory)\protojit.dll</FileToMarkForSigning>
+    <BuildCoreBinaries>false</BuildCoreBinaries>
+    <BuildSysBinaries>false</BuildSysBinaries>
+
+    <!-- Motherhood & apple pie here -->
+
+    <DllEntryPoint>_DllMainCRTStartup</DllEntryPoint>
+    <LinkSubsystem>windows</LinkSubsystem>
+    <LibCLib Condition="'$(StaticLinkJit)'!='true'">$(ClrCrtLib)</LibCLib>
+
+    <!-- JIT specific baloney -->
+
+    <LinkModuleDefinitionFile>$(OutputName).def</LinkModuleDefinitionFile>
+
+    <ClDefines>$(ClDefines);ALT_JIT</ClDefines>
+    <ClDefines Condition="'$(BuildArchitecture)' == 'amd64'">$(ClDefines);FEATURE_SIMD;FEATURE_AVX_SUPPORT</ClDefines>
+
+    <Win32DllLibs>$(SdkLibPath)\kernel32.lib;$(SdkLibPath)\user32.lib;$(SdkLibPath)\advapi32.lib;$(SdkLibPath)\oleaut32.lib;$(SdkLibPath)\uuid.lib</Win32DllLibs>
+    <Win32DllLibs>$(Win32DllLibs);$(ClrLibPath)\utilcode.lib</Win32DllLibs>
+
+    <!-- Profile-guided optimization -->
+
+    <PogoOptimize>false</PogoOptimize>
+    <PogoInstrument>false</PogoInstrument>
+    <PogoUpdate>false</PogoUpdate>
+
+    <!-- Do we want to build with msvcdis disassembly capability? This should be enabled for DEBUG, disabled otherwise.
+         However, it can be useful for debugging purposes, such as generating assembly diffs between CHK and RET JITs,
+         to enable it temporarily in non-DEBUG builds, by forcing the EnableLateDisasm property to 'true'.
+    -->
+    <EnableLateDisasm Condition="'$(DebugBuild)' == 'true' and '$(BuildArchitecture)' != 'arm' and '$(BuildForCoreSystem)' != 'true'">true</EnableLateDisasm>
+    <!--
+    <EnableLateDisasm Condition="'$(BuildArchitecture)' != 'arm' and '$(BuildForCoreSystem)' != 'true'">true</EnableLateDisasm>
+    -->
+    <ClDefines Condition="'$(EnableLateDisasm)' == 'true'">$(ClDefines);LATE_DISASM=1</ClDefines>
+    <LinkDelayLoad Condition="'$(EnableLateDisasm)' == 'true'">$(LinkDelayLoad);msvcdis$(VC_NONCRT_ProdVerX).dll</LinkDelayLoad>
+    <UseDelayimpLib Condition="'$(EnableLateDisasm)' == 'true' and '$(FeatureMergeJitAndEngine)'!='true'">true</UseDelayimpLib>
+
+  </PropertyGroup>
+
+  <!-- Leaf Project Items -->
+
+  <ItemGroup>
+    <ProjectReference Include="$(ClrSrcDirectory)utilcode\dyncrt\dyncrt.nativeproj" />
+    <TargetLib Include="$(SdkLibPath)\mscoree.lib" />
+    <TargetLib Condition="'$(BuildArchitecture)'!='i386'" Include="$(ClrLibPath)\gcinfo.lib">
+      <ProjectReference>$(ClrSrcDirectory)gcinfo\lib\gcinfo.nativeproj</ProjectReference>
+    </TargetLib>
+    <TargetLib Condition="'$(UseDelayimpLib)' == 'true'" Include="$(ClrLibPath)\delayimp.lib">
+      <ProjectReference>$(ClrSrcDirectory)delayimp\delayimp.nativeproj</ProjectReference>
+    </TargetLib>
+    <TargetLib Condition="'$(DebugBuild)' == 'true'" Include="$(ClrLibPath)\gcdump.lib">
+      <ProjectReference>$(ClrSrcDirectory)gcdump\lib\gcdump.nativeproj</ProjectReference>
+    </TargetLib>
+    <TargetLib Condition="'$(DebugBuild)' == 'true'" Include="$(SdkLibPath)\ole32.lib" />
+    <TargetLib Condition="'$(EnableLateDisasm)' == 'true'" Include="$(VCToolsLibPath)\msvcdis.lib" />
+    <RCResourceFile Include="..\native.rc" />
+  </ItemGroup>
+
+  <Import Project="..\jit.settings.targets" />
+
+</Project>
diff --git a/src/jit/rangecheck.cpp b/src/jit/rangecheck.cpp
new file mode 100644
index 0000000000..ae0c792f11
--- /dev/null
+++ b/src/jit/rangecheck.cpp
@@ -0,0 +1,1388 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+
+#include "jitpch.h"
+#include "rangecheck.h"
+
+// Max stack depth (path length) in walking the UD chain.
+static const int MAX_SEARCH_DEPTH = 100;
+
+// Max nodes to visit in the UD chain for the current method being compiled.
+static const int MAX_VISIT_BUDGET = 8192;
+
+// RangeCheck constructor.
+RangeCheck::RangeCheck(Compiler* pCompiler)
+    : m_pOverflowMap(nullptr)
+    , m_pRangeMap(nullptr)
+    , m_fMappedDefs(false)
+    , m_pDefTable(nullptr)
+    , m_pCompiler(pCompiler)
+    , m_nVisitBudget(MAX_VISIT_BUDGET)
+{
+}
+
+bool RangeCheck::IsOverBudget()
+{
+    return (m_nVisitBudget <= 0);
+}
+
+// Get the range map in which computed ranges are cached.
+RangeCheck::RangeMap* RangeCheck::GetRangeMap()
+{
+    if (m_pRangeMap == nullptr)
+    {
+        m_pRangeMap = new (m_pCompiler->getAllocator()) RangeMap(m_pCompiler->getAllocator());
+    }
+    return m_pRangeMap;
+}
+
+// Get the overflow map in which computed overflows are cached.
+RangeCheck::OverflowMap* RangeCheck::GetOverflowMap()
+{
+    if (m_pOverflowMap == nullptr)
+    {
+        m_pOverflowMap = new (m_pCompiler->getAllocator()) OverflowMap(m_pCompiler->getAllocator());
+    }
+    return m_pOverflowMap;
+}
+
+// Get the length of the array vn, if it is new.
+int RangeCheck::GetArrLength(ValueNum vn)
+{
+    ValueNum arrRefVN = m_pCompiler->vnStore->GetArrForLenVn(vn);
+    return m_pCompiler->vnStore->GetNewArrSize(arrRefVN);
+}
+
+// Check if the computed range is within bounds.
+bool RangeCheck::BetweenBounds(Range& range, int lower, GenTreePtr upper)
+{
+#ifdef DEBUG
+    if (m_pCompiler->verbose)
+    {
+        printf("%s BetweenBounds <%d, ", range.ToString(m_pCompiler->getAllocatorDebugOnly()), lower);
+        Compiler::printTreeID(upper);
+        printf(">\n");
+    }
+#endif // DEBUG
+
+    // Get the VN for the upper limit.
+    ValueNum uLimitVN = upper->gtVNPair.GetConservative();
+
+#ifdef DEBUG
+    JITDUMP("VN%04X upper bound is: ", uLimitVN);
+    if (m_pCompiler->verbose)
+    {
+        m_pCompiler->vnStore->vnDump(m_pCompiler, uLimitVN);
+    }
+    JITDUMP("\n");
+#endif
+
+    ValueNum arrRefVN = ValueNumStore::NoVN;
+    int      arrSize  = 0;
+
+    if (m_pCompiler->vnStore->IsVNConstant(uLimitVN))
+    {
+        ssize_t  constVal  = -1;
+        unsigned iconFlags = 0;
+
+        if (m_pCompiler->optIsTreeKnownIntValue(true, upper, &constVal, &iconFlags))
+        {
+            arrSize = (int)constVal;
+        }
+    }
+    else if (m_pCompiler->vnStore->IsVNArrLen(uLimitVN))
+    {
+        // Get the array reference from the length.
+        arrRefVN = m_pCompiler->vnStore->GetArrForLenVn(uLimitVN);
+        // Check if array size can be obtained.
+        arrSize = m_pCompiler->vnStore->GetNewArrSize(arrRefVN);
+    }
+    else
+    {
+        // If the upper limit is not length, then bail.
+        return false;
+    }
+
+#ifdef DEBUG
+    JITDUMP("Array ref VN");
+    if (m_pCompiler->verbose)
+    {
+        m_pCompiler->vnStore->vnDump(m_pCompiler, arrRefVN);
+    }
+    JITDUMP("\n");
+#endif
+
+    JITDUMP("Array size is: %d\n", arrSize);
+
+    // Upper limit: a.len + ucns (upper limit constant).
+    if (range.UpperLimit().IsBinOpArray())
+    {
+        if (range.UpperLimit().vn != arrRefVN)
+        {
+            return false;
+        }
+
+        int ucns = range.UpperLimit().GetConstant();
+
+        // Upper limit: a.Len + [0..n]
+        if (ucns >= 0)
+        {
+            return false;
+        }
+
+        // If lower limit is a.len return false.
+        if (range.LowerLimit().IsArray())
+        {
+            return false;
+        }
+
+        // Since upper limit is bounded by the array, return true if lower bound is good.
+        if (range.LowerLimit().IsConstant() && range.LowerLimit().GetConstant() >= 0)
+        {
+            return true;
+        }
+
+        // Check if we have the array size allocated by new.
+        if (arrSize <= 0)
+        {
+            return false;
+        }
+
+        // At this point,
+        // upper limit = a.len + ucns. ucns < 0
+        // lower limit = a.len + lcns.
+        if (range.LowerLimit().IsBinOpArray())
+        {
+            int lcns = range.LowerLimit().GetConstant();
+            if (lcns >= 0 || -lcns > arrSize)
+            {
+                return false;
+            }
+            return (range.LowerLimit().vn == arrRefVN && lcns <= ucns);
+        }
+    }
+    // If upper limit is constant
+    else if (range.UpperLimit().IsConstant())
+    {
+        if (arrSize <= 0)
+        {
+            return false;
+        }
+        int ucns = range.UpperLimit().GetConstant();
+        if (ucns >= arrSize)
+        {
+            return false;
+        }
+        if (range.LowerLimit().IsConstant())
+        {
+            int lcns = range.LowerLimit().GetConstant();
+            // Make sure lcns < ucns which is already less than arrSize.
+            return (lcns >= 0 && lcns <= ucns);
+        }
+        if (range.LowerLimit().IsBinOpArray())
+        {
+            int lcns = range.LowerLimit().GetConstant();
+            // a.len + lcns, make sure we don't subtract too much from a.len.
+            if (lcns >= 0 || -lcns > arrSize)
+            {
+                return false;
+            }
+            // Make sure a.len + lcns <= ucns.
+            return (range.LowerLimit().vn == arrRefVN && (arrSize + lcns) <= ucns);
+        }
+    }
+
+    return false;
+}
+
+void RangeCheck::OptimizeRangeCheck(BasicBlock* block, GenTreePtr stmt, GenTreePtr treeParent)
+{
+    // Check if we are dealing with a bounds check node.
+    if (treeParent->OperGet() != GT_COMMA)
+    {
+        return;
+    }
+
+    // If we are not looking at array bounds check, bail.
+    GenTreePtr tree = treeParent->gtOp.gtOp1;
+    if (tree->gtOper != GT_ARR_BOUNDS_CHECK)
+    {
+        return;
+    }
+
+    GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
+    m_pCurBndsChk             = bndsChk;
+    GenTreePtr treeIndex      = bndsChk->gtIndex;
+
+    // Take care of constant index first, like a[2], for example.
+    ValueNum idxVn    = treeIndex->gtVNPair.GetConservative();
+    ValueNum arrLenVn = bndsChk->gtArrLen->gtVNPair.GetConservative();
+    int      arrSize  = 0;
+
+    if (m_pCompiler->vnStore->IsVNConstant(arrLenVn))
+    {
+        ssize_t  constVal  = -1;
+        unsigned iconFlags = 0;
+
+        if (m_pCompiler->optIsTreeKnownIntValue(true, bndsChk->gtArrLen, &constVal, &iconFlags))
+        {
+            arrSize = (int)constVal;
+        }
+    }
+    else
+    {
+        arrSize = GetArrLength(arrLenVn);
+    }
+
+    JITDUMP("ArrSize for lengthVN:%03X = %d\n", arrLenVn, arrSize);
+    if (m_pCompiler->vnStore->IsVNConstant(idxVn) && arrSize > 0)
+    {
+        ssize_t  idxVal    = -1;
+        unsigned iconFlags = 0;
+        if (!m_pCompiler->optIsTreeKnownIntValue(true, treeIndex, &idxVal, &iconFlags))
+        {
+            return;
+        }
+
+        JITDUMP("[RangeCheck::OptimizeRangeCheck] Is index %d in <0, arrLenVn VN%X sz:%d>.\n", idxVal, arrLenVn,
+                arrSize);
+        if (arrSize > 0 && idxVal < arrSize && idxVal >= 0)
+        {
+            JITDUMP("Removing range check\n");
+            m_pCompiler->optRemoveRangeCheck(treeParent, stmt, true, GTF_ASG, true /* force remove */);
+            return;
+        }
+    }
+
+    GetRangeMap()->RemoveAll();
+    GetOverflowMap()->RemoveAll();
+
+    // Get the range for this index.
+    SearchPath* path = new (m_pCompiler->getAllocator()) SearchPath(m_pCompiler->getAllocator());
+
+    Range range = GetRange(block, stmt, treeIndex, path, false DEBUGARG(0));
+
+    // If upper or lower limit is found to be unknown (top), or it was found to
+    // be unknown because of over budget or a deep search, then return early.
+    if (range.UpperLimit().IsUnknown() || range.LowerLimit().IsUnknown())
+    {
+        // Note: If we had stack depth too deep in the GetRange call, we'd be
+        // too deep even in the DoesOverflow call. So return early.
+        return;
+    }
+
+    if (DoesOverflow(block, stmt, treeIndex, path))
+    {
+        JITDUMP("Method determined to overflow.\n");
+        return;
+    }
+
+    JITDUMP("Range value %s\n", range.ToString(m_pCompiler->getAllocatorDebugOnly()));
+    path->RemoveAll();
+    Widen(block, stmt, treeIndex, path, &range);
+
+    // If upper or lower limit is unknown, then return.
+    if (range.UpperLimit().IsUnknown() || range.LowerLimit().IsUnknown())
+    {
+        return;
+    }
+
+    // Is the range between the lower and upper bound values.
+    if (BetweenBounds(range, 0, bndsChk->gtArrLen))
+    {
+        JITDUMP("[RangeCheck::OptimizeRangeCheck] Between bounds\n");
+        m_pCompiler->optRemoveRangeCheck(treeParent, stmt, true, GTF_ASG, true /* force remove */);
+    }
+    return;
+}
+
+void RangeCheck::Widen(BasicBlock* block, GenTreePtr stmt, GenTreePtr tree, SearchPath* path, Range* pRange)
+{
+#ifdef DEBUG
+    if (m_pCompiler->verbose)
+    {
+        printf("[RangeCheck::Widen] BB%02d, \n", block->bbNum);
+        Compiler::printTreeID(tree);
+        printf("\n");
+    }
+#endif // DEBUG
+
+    Range& range = *pRange;
+
+    // Try to deduce the lower bound, if it is not known already.
+    if (range.LowerLimit().IsDependent() || range.LowerLimit().IsUnknown())
+    {
+        // To determine the lower bound, ask if the loop increases monotonically.
+        bool increasing = IsMonotonicallyIncreasing(tree, path);
+        JITDUMP("IsMonotonicallyIncreasing %d", increasing);
+        if (increasing)
+        {
+            GetRangeMap()->RemoveAll();
+            *pRange = GetRange(block, stmt, tree, path, true DEBUGARG(0));
+        }
+    }
+}
+
+bool RangeCheck::IsBinOpMonotonicallyIncreasing(GenTreePtr op1, GenTreePtr op2, genTreeOps oper, SearchPath* path)
+{
+    JITDUMP("[RangeCheck::IsBinOpMonotonicallyIncreasing] %p, %p\n", dspPtr(op1), dspPtr(op2));
+    // Check if we have a var + const.
+    if (op2->OperGet() == GT_LCL_VAR)
+    {
+        jitstd::swap(op1, op2);
+    }
+    if (op1->OperGet() != GT_LCL_VAR)
+    {
+        JITDUMP("Not monotonic because op1 is not lclVar.\n");
+        return false;
+    }
+    switch (op2->OperGet())
+    {
+        case GT_LCL_VAR:
+            return IsMonotonicallyIncreasing(op1, path) && IsMonotonicallyIncreasing(op2, path);
+
+        case GT_CNS_INT:
+            return oper == GT_ADD && op2->AsIntConCommon()->IconValue() >= 0 && IsMonotonicallyIncreasing(op1, path);
+
+        default:
+            JITDUMP("Not monotonic because expression is not recognized.\n");
+            return false;
+    }
+}
+
+bool RangeCheck::IsMonotonicallyIncreasing(GenTreePtr expr, SearchPath* path)
+{
+    JITDUMP("[RangeCheck::IsMonotonicallyIncreasing] %p\n", dspPtr(expr));
+    if (path->Lookup(expr))
+    {
+        return true;
+    }
+
+    // Add hashtable entry for expr.
+    path->Set(expr, nullptr);
+
+    // Remove hashtable entry for expr when we exit the present scope.
+    auto                                         code = [&] { path->Remove(expr); };
+    jitstd::utility::scoped_code<decltype(code)> finally(code);
+
+    // If the rhs expr is constant, then it is not part of the dependency
+    // loop which has to increase monotonically.
+    ValueNum vn = expr->gtVNPair.GetConservative();
+    if (path->GetCount() > MAX_SEARCH_DEPTH)
+    {
+        return false;
+    }
+    else if (m_pCompiler->vnStore->IsVNConstant(vn))
+    {
+        return true;
+    }
+    // If the rhs expr is local, then try to find the def of the local.
+    else if (expr->IsLocal())
+    {
+        Location* loc = GetDef(expr);
+        if (loc == nullptr)
+        {
+            return false;
+        }
+        GenTreePtr asg = loc->parent;
+        assert(asg->OperKind() & GTK_ASGOP);
+        switch (asg->OperGet())
+        {
+            case GT_ASG:
+                return IsMonotonicallyIncreasing(asg->gtGetOp2(), path);
+
+            case GT_ASG_ADD:
+                return IsBinOpMonotonicallyIncreasing(asg->gtGetOp1(), asg->gtGetOp2(), GT_ADD, path);
+
+            default:
+                // All other 'asg->OperGet()' kinds, return false
+                break;
+        }
+        JITDUMP("Unknown local definition type\n");
+        return false;
+    }
+    else if (expr->OperGet() == GT_ADD)
+    {
+        return IsBinOpMonotonicallyIncreasing(expr->gtGetOp1(), expr->gtGetOp2(), GT_ADD, path);
+    }
+    else if (expr->OperGet() == GT_PHI)
+    {
+        for (GenTreeArgList* args = expr->gtOp.gtOp1->AsArgList(); args != nullptr; args = args->Rest())
+        {
+            // If the arg is already in the path, skip.
+            if (path->Lookup(args->Current()))
+            {
+                continue;
+            }
+            if (!IsMonotonicallyIncreasing(args->Current(), path))
+            {
+                JITDUMP("Phi argument not monotonic\n");
+                return false;
+            }
+        }
+        return true;
+    }
+    JITDUMP("Unknown tree type\n");
+    return false;
+}
+
+UINT64 RangeCheck::HashCode(unsigned lclNum, unsigned ssaNum)
+{
+    assert(ssaNum != SsaConfig::RESERVED_SSA_NUM);
+    return UINT64(lclNum) << 32 | ssaNum;
+}
+
+// Get the def location of a given variable.
+RangeCheck::Location* RangeCheck::GetDef(unsigned lclNum, unsigned ssaNum)
+{
+    Location* loc = nullptr;
+    if (ssaNum == SsaConfig::RESERVED_SSA_NUM)
+    {
+        return nullptr;
+    }
+    if (!m_fMappedDefs)
+    {
+        MapMethodDefs();
+    }
+    // No defs.
+    if (m_pDefTable == nullptr)
+    {
+        return nullptr;
+    }
+    m_pDefTable->Lookup(HashCode(lclNum, ssaNum), &loc);
+    return loc;
+}
+
+RangeCheck::Location* RangeCheck::GetDef(GenTreePtr tree)
+{
+    assert(tree->IsLocal());
+    unsigned lclNum = tree->AsLclVarCommon()->GetLclNum();
+    unsigned ssaNum = tree->AsLclVarCommon()->GetSsaNum();
+    return GetDef(lclNum, ssaNum);
+}
+
+// Add the def location to the hash table.
+void RangeCheck::SetDef(UINT64 hash, Location* loc)
+{
+    if (m_pDefTable == nullptr)
+    {
+        m_pDefTable = new (m_pCompiler->getAllocator()) VarToLocMap(m_pCompiler->getAllocator());
+    }
+#ifdef DEBUG
+    Location* loc2;
+    if (m_pDefTable->Lookup(hash, &loc2))
+    {
+        JITDUMP("Already have BB%02d, %08X, %08X for hash => %0I64X", loc2->block->bbNum, dspPtr(loc2->stmt),
+                dspPtr(loc2->tree), hash);
+        assert(false);
+    }
+#endif
+    m_pDefTable->Set(hash, loc);
+}
+
+// Merge assertions on the edge flowing into the block about a variable.
+void RangeCheck::MergeEdgeAssertions(GenTreePtr tree, const ASSERT_VALARG_TP assertions, Range* pRange)
+{
+    if (BitVecOps::IsEmpty(m_pCompiler->apTraits, assertions))
+    {
+        return;
+    }
+
+    GenTreeLclVarCommon* lcl = (GenTreeLclVarCommon*)tree;
+    if (lcl->gtSsaNum == SsaConfig::RESERVED_SSA_NUM)
+    {
+        return;
+    }
+    // Walk through the "assertions" to check if the apply.
+    BitVecOps::Iter iter(m_pCompiler->apTraits, assertions);
+    unsigned        index = 0;
+    while (iter.NextElem(m_pCompiler->apTraits, &index))
+    {
+        index++;
+
+        Compiler::AssertionDsc* curAssertion = m_pCompiler->optGetAssertion((Compiler::AssertionIndex)index);
+
+        // Current assertion is about array length.
+        if (!curAssertion->IsArrLenArithBound() && !curAssertion->IsArrLenBound() && !curAssertion->IsConstantBound())
+        {
+            continue;
+        }
+
+#ifdef DEBUG
+        if (m_pCompiler->verbose)
+        {
+            m_pCompiler->optPrintAssertion(curAssertion, (Compiler::AssertionIndex)index);
+        }
+#endif
+
+        assert(m_pCompiler->vnStore->IsVNArrLenArithBound(curAssertion->op1.vn) ||
+               m_pCompiler->vnStore->IsVNArrLenBound(curAssertion->op1.vn) ||
+               m_pCompiler->vnStore->IsVNConstantBound(curAssertion->op1.vn));
+
+        Limit      limit(Limit::keUndef);
+        genTreeOps cmpOper = GT_NONE;
+
+        // Current assertion is of the form (i < a.len - cns) != 0
+        if (curAssertion->IsArrLenArithBound())
+        {
+            ValueNumStore::ArrLenArithBoundInfo info;
+
+            // Get i, a.len, cns and < as "info."
+            m_pCompiler->vnStore->GetArrLenArithBoundInfo(curAssertion->op1.vn, &info);
+
+            if (m_pCompiler->lvaTable[lcl->gtLclNum].GetPerSsaData(lcl->gtSsaNum)->m_vnPair.GetConservative() !=
+                info.cmpOp)
+            {
+                continue;
+            }
+
+            switch (info.arrOper)
+            {
+                case GT_SUB:
+                case GT_ADD:
+                {
+                    // If the operand that operates on the array is not constant, then done.
+                    if (!m_pCompiler->vnStore->IsVNConstant(info.arrOp) ||
+                        m_pCompiler->vnStore->TypeOfVN(info.arrOp) != TYP_INT)
+                    {
+                        break;
+                    }
+                    int cons = m_pCompiler->vnStore->ConstantValue<int>(info.arrOp);
+                    limit    = Limit(Limit::keBinOpArray, info.vnArray, info.arrOper == GT_SUB ? -cons : cons);
+                }
+            }
+
+            cmpOper = (genTreeOps)info.cmpOper;
+        }
+        // Current assertion is of the form (i < a.len) != 0
+        else if (curAssertion->IsArrLenBound())
+        {
+            ValueNumStore::ArrLenArithBoundInfo info;
+
+            // Get the info as "i", "<" and "a.len"
+            m_pCompiler->vnStore->GetArrLenBoundInfo(curAssertion->op1.vn, &info);
+
+            ValueNum lclVn =
+                m_pCompiler->lvaTable[lcl->gtLclNum].GetPerSsaData(lcl->gtSsaNum)->m_vnPair.GetConservative();
+            // If we don't have the same variable we are comparing against, bail.
+            if (lclVn != info.cmpOp)
+            {
+                continue;
+            }
+            limit.type = Limit::keArray;
+            limit.vn   = info.vnArray;
+            cmpOper    = (genTreeOps)info.cmpOper;
+        }
+        // Current assertion is of the form (i < 100) != 0
+        else if (curAssertion->IsConstantBound())
+        {
+            ValueNumStore::ConstantBoundInfo info;
+
+            // Get the info as "i", "<" and "100"
+            m_pCompiler->vnStore->GetConstantBoundInfo(curAssertion->op1.vn, &info);
+
+            ValueNum lclVn =
+                m_pCompiler->lvaTable[lcl->gtLclNum].GetPerSsaData(lcl->gtSsaNum)->m_vnPair.GetConservative();
+
+            // If we don't have the same variable we are comparing against, bail.
+            if (lclVn != info.cmpOpVN)
+            {
+                continue;
+            }
+
+            limit   = Limit(Limit::keConstant, ValueNumStore::NoVN, info.constVal);
+            cmpOper = (genTreeOps)info.cmpOper;
+        }
+        else
+        {
+            noway_assert(false);
+        }
+
+        if (limit.IsUndef())
+        {
+            continue;
+        }
+
+        // Make sure the assertion is of the form != 0 or == 0.
+        if (curAssertion->op2.vn != m_pCompiler->vnStore->VNZeroForType(TYP_INT))
+        {
+            continue;
+        }
+#ifdef DEBUG
+        if (m_pCompiler->verbose)
+        {
+            m_pCompiler->optPrintAssertion(curAssertion, (Compiler::AssertionIndex)index);
+        }
+#endif
+
+        noway_assert(limit.IsBinOpArray() || limit.IsArray() || limit.IsConstant());
+
+        ValueNum arrLenVN = m_pCurBndsChk->gtArrLen->gtVNPair.GetConservative();
+        ValueNum arrRefVN = ValueNumStore::NoVN;
+
+        if (m_pCompiler->vnStore->IsVNArrLen(arrLenVN))
+        {
+            // Get the array reference from the length.
+            arrRefVN = m_pCompiler->vnStore->GetArrForLenVn(arrLenVN);
+        }
+
+        // During assertion prop we add assertions of the form:
+        //
+        //      (i < a.Length) == 0
+        //      (i < a.Length) != 0
+        //      (i < 100) == 0
+        //      (i < 100) != 0
+        //
+        // At this point, we have detected that op1.vn is (i < a.Length) or (i < a.Length + cns) or
+        // (i < 100) and the op2.vn is 0.
+        //
+        // Now, let us check if we are == 0 (i.e., op1 assertion is false) or != 0 (op1 assertion
+        // is true.),
+        //
+        // If we have an assertion of the form == 0 (i.e., equals false), then reverse relop.
+        // The relop has to be reversed because we have: (i < a.Length) is false which is the same
+        // as (i >= a.Length).
+        if (curAssertion->assertionKind == Compiler::OAK_EQUAL)
+        {
+            cmpOper = GenTree::ReverseRelop(cmpOper);
+        }
+
+        // Bounds are inclusive, so add -1 for upper bound when "<". But make sure we won't overflow.
+        if (cmpOper == GT_LT && !limit.AddConstant(-1))
+        {
+            continue;
+        }
+        // Bounds are inclusive, so add +1 for lower bound when ">". But make sure we won't overflow.
+        if (cmpOper == GT_GT && !limit.AddConstant(1))
+        {
+            continue;
+        }
+
+        // Doesn't tighten the current bound. So skip.
+        if (pRange->uLimit.IsConstant() && limit.vn != arrRefVN)
+        {
+            continue;
+        }
+
+        // Check if the incoming limit from assertions tightens the existing upper limit.
+        if ((pRange->uLimit.IsArray() || pRange->uLimit.IsBinOpArray()) && pRange->uLimit.vn == arrRefVN)
+        {
+            // We have checked the current range's (pRange's) upper limit is either of the form:
+            //      a.Length
+            //      a.Length + cns
+            //      and a == the bndsChkCandidate's arrRef
+            //
+            // We want to check if the incoming limit tightens the bound, and for that the
+            // we need to make sure that incoming limit is also on a.Length or a.Length + cns
+            // and not b.Length or some c.Length.
+
+            if (limit.vn != arrRefVN)
+            {
+                JITDUMP("Array ref did not match cur=$%x, assert=$%x\n", arrRefVN, limit.vn);
+                continue;
+            }
+
+            int curCns = (pRange->uLimit.IsBinOpArray()) ? pRange->uLimit.cns : 0;
+            int limCns = (limit.IsBinOpArray()) ? limit.cns : 0;
+
+            // Incoming limit doesn't tighten the existing upper limit.
+            if (limCns >= curCns)
+            {
+                JITDUMP("Bound limit %d doesn't tighten current bound %d\n", limCns, curCns);
+                continue;
+            }
+        }
+        else
+        {
+            // Current range's upper bound is not "a.Length or a.Length + cns" and the
+            // incoming limit is not on the same arrRef as the bounds check candidate.
+            // So we could skip this assertion. But in cases, of Dependent or Unknown
+            // type of upper limit, the incoming assertion still tightens the upper
+            // bound to a saner value. So do not skip the assertion.
+        }
+
+        // cmpOp (loop index i) cmpOper a.len +/- cns
+        switch (cmpOper)
+        {
+            case GT_LT:
+                pRange->uLimit = limit;
+                break;
+
+            case GT_GT:
+                pRange->lLimit = limit;
+                break;
+
+            case GT_GE:
+                pRange->lLimit = limit;
+                break;
+
+            case GT_LE:
+                pRange->uLimit = limit;
+                break;
+
+            default:
+                // All other 'cmpOper' kinds leave lLimit/uLimit unchanged
+                break;
+        }
+        JITDUMP("The range after edge merging:");
+        JITDUMP(pRange->ToString(m_pCompiler->getAllocatorDebugOnly()));
+        JITDUMP("\n");
+    }
+}
+
+// Merge assertions from the pred edges of the block, i.e., check for any assertions about "op's" value numbers for phi
+// arguments. If not a phi argument, check if we assertions about local variables.
+void RangeCheck::MergeAssertion(
+    BasicBlock* block, GenTreePtr stmt, GenTreePtr op, SearchPath* path, Range* pRange DEBUGARG(int indent))
+{
+    JITDUMP("Merging assertions from pred edges of BB%02d for op(%p) $%03x\n", block->bbNum, dspPtr(op),
+            op->gtVNPair.GetConservative());
+    ASSERT_TP assertions = BitVecOps::UninitVal();
+
+    // If we have a phi arg, we can get to the block from it and use its assertion out.
+    if (op->gtOper == GT_PHI_ARG)
+    {
+        GenTreePhiArg* arg  = (GenTreePhiArg*)op;
+        BasicBlock*    pred = arg->gtPredBB;
+        if (pred->bbFallsThrough() && pred->bbNext == block)
+        {
+            assertions = pred->bbAssertionOut;
+            JITDUMP("Merge assertions from pred BB%02d edge: %s\n", pred->bbNum,
+                    BitVecOps::ToString(m_pCompiler->apTraits, assertions));
+        }
+        else if ((pred->bbJumpKind == BBJ_COND || pred->bbJumpKind == BBJ_ALWAYS) && pred->bbJumpDest == block)
+        {
+            if (m_pCompiler->bbJtrueAssertionOut != nullptr)
+            {
+                assertions = m_pCompiler->bbJtrueAssertionOut[pred->bbNum];
+                JITDUMP("Merge assertions from pred BB%02d JTrue edge: %s\n", pred->bbNum,
+                        BitVecOps::ToString(m_pCompiler->apTraits, assertions));
+            }
+        }
+    }
+    // Get assertions from bbAssertionIn.
+    else if (op->IsLocal())
+    {
+        assertions = block->bbAssertionIn;
+    }
+
+    if (!BitVecOps::MayBeUninit(assertions))
+    {
+        // Perform the merge step to fine tune the range value.
+        MergeEdgeAssertions(op, assertions, pRange);
+    }
+}
+
+// Compute the range for a binary operation.
+Range RangeCheck::ComputeRangeForBinOp(BasicBlock* block,
+                                       GenTreePtr  stmt,
+                                       GenTreePtr  op1,
+                                       GenTreePtr  op2,
+                                       genTreeOps  oper,
+                                       SearchPath* path,
+                                       bool monotonic DEBUGARG(int indent))
+{
+    Range* op1RangeCached = nullptr;
+    Range  op1Range       = Limit(Limit::keUndef);
+    bool   inPath1        = path->Lookup(op1);
+    // Check if the range value is already cached.
+    if (!GetRangeMap()->Lookup(op1, &op1RangeCached))
+    {
+        // If we already have the op in the path, then, just rely on assertions, else
+        // find the range.
+        if (!inPath1)
+        {
+            op1Range = GetRange(block, stmt, op1, path, monotonic DEBUGARG(indent));
+        }
+        else
+        {
+            op1Range = Range(Limit(Limit::keDependent));
+        }
+        MergeAssertion(block, stmt, op1, path, &op1Range DEBUGARG(indent + 1));
+    }
+    else
+    {
+        op1Range = *op1RangeCached;
+    }
+
+    Range* op2RangeCached;
+    Range  op2Range = Limit(Limit::keUndef);
+    bool   inPath2  = path->Lookup(op2);
+    // Check if the range value is already cached.
+    if (!GetRangeMap()->Lookup(op2, &op2RangeCached))
+    {
+        // If we already have the op in the path, then, just rely on assertions, else
+        // find the range.
+        if (!inPath2)
+        {
+            op2Range = GetRange(block, stmt, op2, path, monotonic DEBUGARG(indent));
+        }
+        else
+        {
+            op2Range = Range(Limit(Limit::keDependent));
+        }
+        MergeAssertion(block, stmt, op2, path, &op2Range DEBUGARG(indent + 1));
+    }
+    else
+    {
+        op2Range = *op2RangeCached;
+    }
+
+    assert(oper == GT_ADD); // For now just GT_ADD.
+    Range r = RangeOps::Add(op1Range, op2Range);
+    JITDUMP("BinOp add ranges %s %s = %s\n", op1Range.ToString(m_pCompiler->getAllocatorDebugOnly()),
+            op2Range.ToString(m_pCompiler->getAllocatorDebugOnly()), r.ToString(m_pCompiler->getAllocatorDebugOnly()));
+    return r;
+}
+
+// Compute the range for a local var definition.
+Range RangeCheck::ComputeRangeForLocalDef(
+    BasicBlock* block, GenTreePtr stmt, GenTreePtr expr, SearchPath* path, bool monotonic DEBUGARG(int indent))
+{
+    // Get the program location of the def.
+    Location* loc = GetDef(expr);
+
+    // If we can't reach the def, then return unknown range.
+    if (loc == nullptr)
+    {
+        return Range(Limit(Limit::keUnknown));
+    }
+#ifdef DEBUG
+    if (m_pCompiler->verbose)
+    {
+        JITDUMP("----------------------------------------------------\n");
+        m_pCompiler->gtDispTree(loc->stmt);
+        JITDUMP("----------------------------------------------------\n");
+    }
+#endif
+    GenTreePtr asg = loc->parent;
+    assert(asg->OperKind() & GTK_ASGOP);
+    switch (asg->OperGet())
+    {
+        // If the operator of the definition is assignment, then compute the range of the rhs.
+        case GT_ASG:
+        {
+            Range range = GetRange(loc->block, loc->stmt, asg->gtGetOp2(), path, monotonic DEBUGARG(indent));
+            JITDUMP("Merge assertions from BB%02d:%s for assignment about %p\n", block->bbNum,
+                    BitVecOps::ToString(m_pCompiler->apTraits, block->bbAssertionIn), dspPtr(asg->gtGetOp1()));
+            MergeEdgeAssertions(asg->gtGetOp1(), block->bbAssertionIn, &range);
+            JITDUMP("done merging\n");
+            return range;
+        }
+
+        case GT_ASG_ADD:
+            // If the operator of the definition is +=, then compute the range of the operands of +.
+            // Note that gtGetOp1 will return op1 to be the lhs; in the formulation of ssa, we have
+            // a side table for defs and the lhs of a += is considered to be a use for SSA numbering.
+            return ComputeRangeForBinOp(loc->block, loc->stmt, asg->gtGetOp1(), asg->gtGetOp2(), GT_ADD, path,
+                                        monotonic DEBUGARG(indent));
+
+        default:
+            // All other 'asg->OperGet()' kinds, return Limit::keUnknown
+            break;
+    }
+    return Range(Limit(Limit::keUnknown));
+}
+
+// https://msdn.microsoft.com/en-us/windows/apps/hh285054.aspx
+// CLR throws IDS_EE_ARRAY_DIMENSIONS_EXCEEDED if array length is > INT_MAX.
+// new byte[INT_MAX]; still throws OutOfMemoryException on my system with 32 GB RAM.
+// I believe practical limits are still smaller than this number.
+#define ARRLEN_MAX (0x7FFFFFFF)
+
+// Get the limit's maximum possible value, treating array length to be ARRLEN_MAX.
+bool RangeCheck::GetLimitMax(Limit& limit, int* pMax)
+{
+    int& max1 = *pMax;
+    switch (limit.type)
+    {
+        case Limit::keConstant:
+            max1 = limit.GetConstant();
+            break;
+
+        case Limit::keBinOpArray:
+        {
+            int tmp = GetArrLength(limit.vn);
+            if (tmp <= 0)
+            {
+                tmp = ARRLEN_MAX;
+            }
+            if (IntAddOverflows(tmp, limit.GetConstant()))
+            {
+                return false;
+            }
+            max1 = tmp + limit.GetConstant();
+        }
+        break;
+
+        case Limit::keArray:
+        {
+            int tmp = GetArrLength(limit.vn);
+            if (tmp <= 0)
+            {
+                tmp = ARRLEN_MAX;
+            }
+            max1 = tmp;
+        }
+        break;
+
+        case Limit::keSsaVar:
+        case Limit::keBinOp:
+            if (m_pCompiler->vnStore->IsVNConstant(limit.vn) && m_pCompiler->vnStore->TypeOfVN(limit.vn) == TYP_INT)
+            {
+                max1 = m_pCompiler->vnStore->ConstantValue<int>(limit.vn);
+            }
+            else
+            {
+                return false;
+            }
+            if (limit.type == Limit::keBinOp)
+            {
+                if (IntAddOverflows(max1, limit.GetConstant()))
+                {
+                    return false;
+                }
+                max1 += limit.GetConstant();
+            }
+            break;
+
+        default:
+            return false;
+    }
+    return true;
+}
+
+// Check if the arithmetic overflows.
+bool RangeCheck::AddOverflows(Limit& limit1, Limit& limit2)
+{
+    int max1;
+    if (!GetLimitMax(limit1, &max1))
+    {
+        return true;
+    }
+
+    int max2;
+    if (!GetLimitMax(limit2, &max2))
+    {
+        return true;
+    }
+
+    return IntAddOverflows(max1, max2);
+}
+
+// Does the bin operation overflow.
+bool RangeCheck::DoesBinOpOverflow(BasicBlock* block, GenTreePtr stmt, GenTreePtr op1, GenTreePtr op2, SearchPath* path)
+{
+    if (!path->Lookup(op1) && DoesOverflow(block, stmt, op1, path))
+    {
+        return true;
+    }
+
+    if (!path->Lookup(op2) && DoesOverflow(block, stmt, op2, path))
+    {
+        return true;
+    }
+
+    // Get the cached ranges of op1
+    Range* op1Range = nullptr;
+    if (!GetRangeMap()->Lookup(op1, &op1Range))
+    {
+        return true;
+    }
+    // Get the cached ranges of op2
+    Range* op2Range = nullptr;
+    if (!GetRangeMap()->Lookup(op2, &op2Range))
+    {
+        return true;
+    }
+
+    // If dependent, check if we can use some assertions.
+    if (op1Range->UpperLimit().IsDependent())
+    {
+        MergeAssertion(block, stmt, op1, path, op1Range DEBUGARG(0));
+    }
+
+    // If dependent, check if we can use some assertions.
+    if (op2Range->UpperLimit().IsDependent())
+    {
+        MergeAssertion(block, stmt, op2, path, op2Range DEBUGARG(0));
+    }
+
+    JITDUMP("Checking bin op overflow %s %s\n", op1Range->ToString(m_pCompiler->getAllocatorDebugOnly()),
+            op2Range->ToString(m_pCompiler->getAllocatorDebugOnly()));
+
+    if (!AddOverflows(op1Range->UpperLimit(), op2Range->UpperLimit()))
+    {
+        return false;
+    }
+    return true;
+}
+
+// Check if the var definition the rhs involves arithmetic that overflows.
+bool RangeCheck::DoesVarDefOverflow(BasicBlock* block, GenTreePtr stmt, GenTreePtr expr, SearchPath* path)
+{
+    // Get the definition.
+    Location* loc = GetDef(expr);
+    if (loc == nullptr)
+    {
+        return true;
+    }
+    // Get the parent node which is an asg.
+    GenTreePtr asg = loc->parent;
+    assert(asg->OperKind() & GTK_ASGOP);
+    switch (asg->OperGet())
+    {
+        case GT_ASG:
+            return DoesOverflow(loc->block, loc->stmt, asg->gtGetOp2(), path);
+
+        case GT_ASG_ADD:
+            // For GT_ASG_ADD, op2 is use, op1 is also use since we side table for defs in useasg case.
+            return DoesBinOpOverflow(loc->block, loc->stmt, asg->gtGetOp1(), asg->gtGetOp2(), path);
+
+        default:
+            // All other 'asg->OperGet()' kinds, conservatively return true
+            break;
+    }
+    return true;
+}
+
+bool RangeCheck::DoesPhiOverflow(BasicBlock* block, GenTreePtr stmt, GenTreePtr expr, SearchPath* path)
+{
+    for (GenTreeArgList* args = expr->gtOp.gtOp1->AsArgList(); args != nullptr; args = args->Rest())
+    {
+        GenTreePtr arg = args->Current();
+        if (path->Lookup(arg))
+        {
+            continue;
+        }
+        if (DoesOverflow(block, stmt, args->Current(), path))
+        {
+            return true;
+        }
+    }
+    return false;
+}
+
+bool RangeCheck::DoesOverflow(BasicBlock* block, GenTreePtr stmt, GenTreePtr expr, SearchPath* path)
+{
+    bool overflows = false;
+    if (!GetOverflowMap()->Lookup(expr, &overflows))
+    {
+        overflows = ComputeDoesOverflow(block, stmt, expr, path);
+    }
+    return overflows;
+}
+
+bool RangeCheck::ComputeDoesOverflow(BasicBlock* block, GenTreePtr stmt, GenTreePtr expr, SearchPath* path)
+{
+    JITDUMP("Does overflow %p?\n", dspPtr(expr));
+    path->Set(expr, block);
+
+    bool overflows = true;
+
+    // Remove hashtable entry for expr when we exit the present scope.
+    Range    range = Limit(Limit::keUndef);
+    ValueNum vn    = expr->gtVNPair.GetConservative();
+    if (path->GetCount() > MAX_SEARCH_DEPTH)
+    {
+        overflows = true;
+    }
+    // If the definition chain resolves to a constant, it doesn't overflow.
+    else if (m_pCompiler->vnStore->IsVNConstant(vn))
+    {
+        overflows = false;
+    }
+    // Check if the var def has rhs involving arithmetic that overflows.
+    else if (expr->IsLocal())
+    {
+        overflows = DoesVarDefOverflow(block, stmt, expr, path);
+    }
+    // Check if add overflows.
+    else if (expr->OperGet() == GT_ADD)
+    {
+        overflows = DoesBinOpOverflow(block, stmt, expr->gtGetOp1(), expr->gtGetOp2(), path);
+    }
+    // Walk through phi arguments to check if phi arguments involve arithmetic that overflows.
+    else if (expr->OperGet() == GT_PHI)
+    {
+        overflows = DoesPhiOverflow(block, stmt, expr, path);
+    }
+    GetOverflowMap()->Set(expr, overflows);
+    path->Remove(expr);
+    return overflows;
+}
+
+struct Node
+{
+    Range range;
+    Node* next;
+    Node() : range(Limit(Limit::keUndef)), next(nullptr)
+    {
+    }
+};
+
+// Compute the range recursively by asking for the range of each variable in the dependency chain.
+// eg.: c = a + b; ask range of "a" and "b" and add the results.
+// If the result cannot be determined i.e., the dependency chain does not terminate in a value,
+// but continues to loop, which will happen with phi nodes. We end the looping by calling the
+// value as "dependent" (dep).
+// If the loop is proven to be "monotonic", then make liberal decisions while merging phi node.
+// eg.: merge((0, dep), (dep, dep)) = (0, dep)
+Range RangeCheck::ComputeRange(
+    BasicBlock* block, GenTreePtr stmt, GenTreePtr expr, SearchPath* path, bool monotonic DEBUGARG(int indent))
+{
+    bool  newlyAdded = !path->Set(expr, block);
+    Range range      = Limit(Limit::keUndef);
+
+    ValueNum vn = expr->gtVNPair.GetConservative();
+    // If newly added in the current search path, then reduce the budget.
+    if (newlyAdded)
+    {
+        // Assert that we are not re-entrant for a node which has been
+        // visited and resolved before and not currently on the search path.
+        noway_assert(!GetRangeMap()->Lookup(expr));
+        m_nVisitBudget--;
+    }
+    // Prevent quadratic behavior.
+    if (IsOverBudget())
+    {
+        // Set to unknown, since an Unknown range resolution, will stop further
+        // searches. This is because anything that merges with Unknown will
+        // yield Unknown. Unknown is lattice top.
+        range = Range(Limit(Limit::keUnknown));
+        JITDUMP("GetRange not tractable within max node visit budget.\n");
+    }
+    // Prevent unbounded recursion.
+    else if (path->GetCount() > MAX_SEARCH_DEPTH)
+    {
+        // Unknown is lattice top, anything that merges with Unknown will yield Unknown.
+        range = Range(Limit(Limit::keUnknown));
+        JITDUMP("GetRange not tractable within max stack depth.\n");
+    }
+    // TODO-CQ: The current implementation is reliant on integer storage types
+    // for constants. It could use INT64. Still, representing ULONG constants
+    // might require preserving the var_type whether it is a un/signed 64-bit.
+    // JIT64 doesn't do anything for "long" either. No asm diffs.
+    else if (expr->TypeGet() == TYP_LONG || expr->TypeGet() == TYP_ULONG)
+    {
+        range = Range(Limit(Limit::keUnknown));
+        JITDUMP("GetRange long or ulong, setting to unknown value.\n");
+    }
+    // If VN is constant return range as constant.
+    else if (m_pCompiler->vnStore->IsVNConstant(vn))
+    {
+        range = (m_pCompiler->vnStore->TypeOfVN(vn) == TYP_INT)
+                    ? Range(Limit(Limit::keConstant, m_pCompiler->vnStore->ConstantValue<int>(vn)))
+                    : Limit(Limit::keUnknown);
+    }
+    // If local, find the definition from the def map and evaluate the range for rhs.
+    else if (expr->IsLocal())
+    {
+        range = ComputeRangeForLocalDef(block, stmt, expr, path, monotonic DEBUGARG(indent + 1));
+        MergeAssertion(block, stmt, expr, path, &range DEBUGARG(indent + 1));
+    }
+    // If add, then compute the range for the operands and add them.
+    else if (expr->OperGet() == GT_ADD)
+    {
+        range = ComputeRangeForBinOp(block, stmt, expr->gtGetOp1(), expr->gtGetOp2(), GT_ADD, path,
+                                     monotonic DEBUGARG(indent + 1));
+    }
+    // If phi, then compute the range for arguments, calling the result "dependent" when looping begins.
+    else if (expr->OperGet() == GT_PHI)
+    {
+        Node* cur  = nullptr;
+        Node* head = nullptr;
+        for (GenTreeArgList* args = expr->gtOp.gtOp1->AsArgList(); args != nullptr; args = args->Rest())
+        {
+            // Collect the range for each phi argument in a linked list.
+            Node* node = new (m_pCompiler->getAllocator()) Node();
+            if (cur != nullptr)
+            {
+                cur->next = node;
+                cur       = cur->next;
+            }
+            else
+            {
+                head = node;
+                cur  = head;
+            }
+            if (path->Lookup(args->Current()))
+            {
+                JITDUMP("PhiArg %p is already being computed\n", dspPtr(args->Current()));
+                cur->range = Range(Limit(Limit::keDependent));
+                MergeAssertion(block, stmt, args->Current(), path, &cur->range DEBUGARG(indent + 1));
+                continue;
+            }
+            cur->range = GetRange(block, stmt, args->Current(), path, monotonic DEBUGARG(indent + 1));
+            MergeAssertion(block, stmt, args->Current(), path, &cur->range DEBUGARG(indent + 1));
+        }
+        // Walk the linked list and merge the ranges.
+        for (cur = head; cur; cur = cur->next)
+        {
+            assert(!cur->range.LowerLimit().IsUndef());
+            assert(!cur->range.UpperLimit().IsUndef());
+            JITDUMP("Merging ranges %s %s:", range.ToString(m_pCompiler->getAllocatorDebugOnly()),
+                    cur->range.ToString(m_pCompiler->getAllocatorDebugOnly()));
+            range = RangeOps::Merge(range, cur->range, monotonic);
+            JITDUMP("%s\n", range.ToString(m_pCompiler->getAllocatorDebugOnly()));
+        }
+    }
+    else
+    {
+        // The expression is not recognized, so the result is unknown.
+        range = Range(Limit(Limit::keUnknown));
+    }
+
+    GetRangeMap()->Set(expr, new (m_pCompiler->getAllocator()) Range(range));
+    path->Remove(expr);
+    return range;
+}
+
+#ifdef DEBUG
+void Indent(int indent)
+{
+    for (int i = 0; i < indent; ++i)
+    {
+        JITDUMP("   ");
+    }
+}
+#endif
+
+// Get the range, if it is already computed, use the cached range value, else compute it.
+Range RangeCheck::GetRange(
+    BasicBlock* block, GenTreePtr stmt, GenTreePtr expr, SearchPath* path, bool monotonic DEBUGARG(int indent))
+{
+#ifdef DEBUG
+    if (m_pCompiler->verbose)
+    {
+        Indent(indent);
+        JITDUMP("[RangeCheck::GetRange] BB%02d", block->bbNum);
+        m_pCompiler->gtDispTree(expr);
+        Indent(indent);
+        JITDUMP("{\n", expr);
+    }
+#endif
+
+    Range* pRange = nullptr;
+    Range  range  = GetRangeMap()->Lookup(expr, &pRange) ? *pRange : ComputeRange(block, stmt, expr, path,
+                                                                                monotonic DEBUGARG(indent));
+
+#ifdef DEBUG
+    if (m_pCompiler->verbose)
+    {
+        Indent(indent);
+        JITDUMP("   %s Range (%08X) => %s\n", (pRange == nullptr) ? "Computed" : "Cached", dspPtr(expr),
+                range.ToString(m_pCompiler->getAllocatorDebugOnly()));
+        Indent(indent);
+        JITDUMP("}\n", expr);
+    }
+#endif
+    return range;
+}
+
+// If this is a tree local definition add its location to the def map.
+void RangeCheck::MapStmtDefs(const Location& loc)
+{
+    GenTreePtr tree = loc.tree;
+    if (!tree->IsLocal())
+    {
+        return;
+    }
+
+    unsigned lclNum = tree->AsLclVarCommon()->GetLclNum();
+    unsigned ssaNum = tree->AsLclVarCommon()->GetSsaNum();
+    if (ssaNum == SsaConfig::RESERVED_SSA_NUM)
+    {
+        return;
+    }
+
+    // If useasg then get the correct ssaNum to add to the map.
+    if (tree->gtFlags & GTF_VAR_USEASG)
+    {
+        unsigned ssaNum = m_pCompiler->GetSsaNumForLocalVarDef(tree);
+        if (ssaNum != SsaConfig::RESERVED_SSA_NUM)
+        {
+            // To avoid ind(addr) use asgs
+            if (loc.parent->OperKind() & GTK_ASGOP)
+            {
+                SetDef(HashCode(lclNum, ssaNum), new (m_pCompiler->getAllocator()) Location(loc));
+            }
+        }
+    }
+    // If def get the location and store it against the variable's ssaNum.
+    else if (tree->gtFlags & GTF_VAR_DEF)
+    {
+        if (loc.parent->OperGet() == GT_ASG)
+        {
+            SetDef(HashCode(lclNum, ssaNum), new (m_pCompiler->getAllocator()) Location(loc));
+        }
+    }
+}
+
+struct MapMethodDefsData
+{
+    RangeCheck* rc;
+    BasicBlock* block;
+    GenTreePtr  stmt;
+
+    MapMethodDefsData(RangeCheck* rc, BasicBlock* block, GenTreePtr stmt) : rc(rc), block(block), stmt(stmt)
+    {
+    }
+};
+
+Compiler::fgWalkResult MapMethodDefsVisitor(GenTreePtr* ptr, Compiler::fgWalkData* data)
+{
+    MapMethodDefsData* rcd = ((MapMethodDefsData*)data->pCallbackData);
+    rcd->rc->MapStmtDefs(RangeCheck::Location(rcd->block, rcd->stmt, *ptr, data->parent));
+    return Compiler::WALK_CONTINUE;
+}
+
+void RangeCheck::MapMethodDefs()
+{
+    // First, gather where all definitions occur in the program and store it in a map.
+    for (BasicBlock* block = m_pCompiler->fgFirstBB; block; block = block->bbNext)
+    {
+        for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+        {
+            MapMethodDefsData data(this, block, stmt);
+            m_pCompiler->fgWalkTreePre(&stmt->gtStmt.gtStmtExpr, MapMethodDefsVisitor, &data, false, true);
+        }
+    }
+    m_fMappedDefs = true;
+}
+
+// Entry point to range check optimizations.
+void RangeCheck::OptimizeRangeChecks()
+{
+    if (m_pCompiler->fgSsaPassesCompleted == 0)
+    {
+        return;
+    }
+#ifdef DEBUG
+    if (m_pCompiler->verbose)
+    {
+        JITDUMP("*************** In OptimizeRangeChecks()\n");
+        JITDUMP("Blocks/trees before phase\n");
+        m_pCompiler->fgDispBasicBlocks(true);
+    }
+#endif
+
+    // Walk through trees looking for arrBndsChk node and check if it can be optimized.
+    for (BasicBlock* block = m_pCompiler->fgFirstBB; block; block = block->bbNext)
+    {
+        for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+        {
+            for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+            {
+                if (IsOverBudget())
+                {
+                    return;
+                }
+                OptimizeRangeCheck(block, stmt, tree);
+            }
+        }
+    }
+}
diff --git a/src/jit/rangecheck.h b/src/jit/rangecheck.h
new file mode 100644
index 0000000000..b00bfb8a67
--- /dev/null
+++ b/src/jit/rangecheck.h
@@ -0,0 +1,603 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+//
+//  We take the following approach to range check analysis:
+//
+//  Consider the following loop:
+//  for (int i = 0; i < a.len; ++i) {
+//      a[i] = 0;
+//  }
+//
+//  This would be represented as:
+//              i_0 = 0; BB0
+//               /        ______  a[i_1] = 0;     BB2
+//              /        /        i_2 = i_1 + 1;
+//             /        /          ^
+//  i_1 = phi(i_0, i_2); BB1       |
+//  i_1 < a.len -------------------+
+//
+//  BB0 -> BB1
+//  BB1 -> (i_1 < a.len) ? BB2 : BB3
+//  BB2 -> BB1
+//  BB3 -> return
+//
+//  **Step 1. Walk the statements in the method checking if there is a bounds check.
+//  If there is a bounds check, ask the range of the index variable.
+//  In the above example i_1's range.
+//
+//  **Step 2. Follow the defs and the dependency chain:
+//  i_1 is a local, so go to its definition which is i_1 = phi(i_0, i_2).
+//
+//  Since rhs is a phi, we ask the range for i_0 and i_2 in the hopes of merging
+//  the resulting ranges for i_1.
+//
+//  The range of i_0 follows immediately when going to its definition.
+//  Ask for the range of i_2, which leads to i_1 + 1.
+//  Ask for the range of i_1 and figure we are looping. Call the range of i_1 as
+//  "dependent" and quit looping further. The range of "1" is just <1, 1>.
+//
+//  Now we have exhausted all the variables for which the range can be determined.
+//  The others are either "unknown" or "dependent."
+//
+//  We also merge assertions from its pred block's edges for a phi argument otherwise
+//  from the block's assertionIn. This gives us an upper bound for i_1 as a.len.
+//
+//  **Step 3. Check if an overflow occurs in the dependency chain (loop.)
+//  In the above case, we want to make sure there is no overflow in the definitions
+//  involving i_1 and i_2. Merge assertions from the block's edges whenever possible.
+//
+//  **Step 4. Check if the dependency chain is monotonic.
+//
+//  **Step 5. If monotonic is true, then perform a widening step, where we assume, the
+//  SSA variables that are "dependent" get their values from the definitions in the
+//  dependency loop and their initial values must be the definitions that are not in
+//  the dependency loop, in this case i_0's value which is 0.
+//
+
+#pragma once
+#include "compiler.h"
+#include "expandarray.h"
+
+static bool IntAddOverflows(int max1, int max2)
+{
+    if (max1 > 0 && max2 > 0 && INT_MAX - max1 < max2)
+    {
+        return true;
+    }
+    if (max1 < 0 && max2 < 0 && max1 < INT_MIN - max2)
+    {
+        return true;
+    }
+    return false;
+}
+
+// BNF for range and limit structures
+// Range -> Limit, Limit | Dependent | None | Unknown
+// Limit -> Symbol | BinOp | int
+// BinOp -> Symbol + int
+// SsaVar -> lclNum, ssaNum
+// Symbol -> SsaVar | ArrLen
+// ArrLen -> SsaVar
+// SsaVar -> vn
+struct Limit
+{
+    enum LimitType
+    {
+        keUndef, // The limit is yet to be computed.
+        keBinOp,
+        keBinOpArray,
+        keSsaVar,
+        keArray,
+        keConstant,
+        keDependent, // The limit is dependent on some other value.
+        keUnknown,   // The limit could not be determined.
+    };
+
+    Limit() : type(keUndef)
+    {
+    }
+
+    Limit(LimitType type) : type(type)
+    {
+    }
+
+    Limit(LimitType type, int cns) : cns(cns), type(type)
+    {
+        assert(type == keConstant);
+    }
+
+    Limit(LimitType type, ValueNum vn, int cns) : cns(cns), vn(vn), type(type)
+    {
+        assert(type == keBinOpArray || keBinOp);
+    }
+
+    bool IsUndef()
+    {
+        return type == keUndef;
+    }
+    bool IsDependent()
+    {
+        return type == keDependent;
+    }
+    bool IsUnknown()
+    {
+        return type == keUnknown;
+    }
+    bool IsConstant()
+    {
+        return type == keConstant;
+    }
+    int GetConstant()
+    {
+        return cns;
+    }
+    bool IsArray()
+    {
+        return type == keArray;
+    }
+    bool IsSsaVar()
+    {
+        return type == keSsaVar;
+    }
+    bool IsBinOpArray()
+    {
+        return type == keBinOpArray;
+    }
+    bool IsBinOp()
+    {
+        return type == keBinOp;
+    }
+    bool AddConstant(int i)
+    {
+        switch (type)
+        {
+            case keDependent:
+                return true;
+            case keBinOp:
+            case keBinOpArray:
+                if (IntAddOverflows(cns, i))
+                {
+                    return false;
+                }
+                cns += i;
+                return true;
+
+            case keSsaVar:
+                type = keBinOp;
+                cns  = i;
+                return true;
+
+            case keArray:
+                type = keBinOpArray;
+                cns  = i;
+                return true;
+
+            case keConstant:
+                if (IntAddOverflows(cns, i))
+                {
+                    return false;
+                }
+                cns += i;
+                return true;
+
+            case keUndef:
+            case keUnknown:
+                // For these values of 'type', conservatively return false
+                break;
+        }
+
+        return false;
+    }
+
+    bool Equals(Limit& l)
+    {
+        switch (type)
+        {
+            case keUndef:
+            case keUnknown:
+            case keDependent:
+                return l.type == type;
+
+            case keBinOp:
+            case keBinOpArray:
+                return l.type == type && l.vn == vn && l.cns == cns;
+
+            case keSsaVar:
+            case keArray:
+                return l.type == type && l.vn == vn;
+
+            case keConstant:
+                return l.type == type && l.cns == cns;
+        }
+        return false;
+    }
+#ifdef DEBUG
+    const char* ToString(IAllocator* alloc)
+    {
+        unsigned size = 64;
+        char*    buf  = (char*)alloc->Alloc(size);
+        switch (type)
+        {
+            case keUndef:
+                return "Undef";
+
+            case keUnknown:
+                return "Unknown";
+
+            case keDependent:
+                return "Dependent";
+
+            case keBinOp:
+            case keBinOpArray:
+                sprintf_s(buf, size, "VN%04X + %d", vn, cns);
+                return buf;
+
+            case keSsaVar:
+                sprintf_s(buf, size, "VN%04X", vn);
+                return buf;
+
+            case keArray:
+                sprintf_s(buf, size, "VN%04X", vn);
+                return buf;
+
+            case keConstant:
+                sprintf_s(buf, size, "%d", cns);
+                return buf;
+        }
+        unreached();
+    }
+#endif
+    int       cns;
+    ValueNum  vn;
+    LimitType type;
+};
+
+// Range struct contains upper and lower limit.
+struct Range
+{
+    Limit uLimit;
+    Limit lLimit;
+
+    Range(const Limit& limit) : uLimit(limit), lLimit(limit)
+    {
+    }
+
+    Range(const Limit& lLimit, const Limit& uLimit) : uLimit(uLimit), lLimit(lLimit)
+    {
+    }
+
+    Limit& UpperLimit()
+    {
+        return uLimit;
+    }
+
+    Limit& LowerLimit()
+    {
+        return lLimit;
+    }
+
+#ifdef DEBUG
+    char* ToString(IAllocator* alloc)
+    {
+        size_t size = 64;
+        char*  buf  = (char*)alloc->Alloc(size);
+        sprintf_s(buf, size, "<%s, %s>", lLimit.ToString(alloc), uLimit.ToString(alloc));
+        return buf;
+    }
+#endif
+};
+
+// Helpers for operations performed on ranges
+struct RangeOps
+{
+    // Given a constant limit in "l1", add it to l2 and mutate "l2".
+    static Limit AddConstantLimit(Limit& l1, Limit& l2)
+    {
+        assert(l1.IsConstant());
+        Limit l = l2;
+        if (l.AddConstant(l1.GetConstant()))
+        {
+            return l;
+        }
+        else
+        {
+            return Limit(Limit::keUnknown);
+        }
+    }
+
+    // Given two ranges "r1" and "r2", perform an add operation on the
+    // ranges.
+    static Range Add(Range& r1, Range& r2)
+    {
+        Limit& r1lo = r1.LowerLimit();
+        Limit& r1hi = r1.UpperLimit();
+        Limit& r2lo = r2.LowerLimit();
+        Limit& r2hi = r2.UpperLimit();
+
+        Range result = Limit(Limit::keUnknown);
+
+        // Check lo ranges if they are dependent and not unknown.
+        if ((r1lo.IsDependent() && !r1lo.IsUnknown()) || (r2lo.IsDependent() && !r2lo.IsUnknown()))
+        {
+            result.lLimit = Limit(Limit::keDependent);
+        }
+        // Check hi ranges if they are dependent and not unknown.
+        if ((r1hi.IsDependent() && !r1hi.IsUnknown()) || (r2hi.IsDependent() && !r2hi.IsUnknown()))
+        {
+            result.uLimit = Limit(Limit::keDependent);
+        }
+
+        if (r1lo.IsConstant())
+        {
+            result.lLimit = AddConstantLimit(r1lo, r2lo);
+        }
+        if (r2lo.IsConstant())
+        {
+            result.lLimit = AddConstantLimit(r2lo, r1lo);
+        }
+        if (r1hi.IsConstant())
+        {
+            result.uLimit = AddConstantLimit(r1hi, r2hi);
+        }
+        if (r2hi.IsConstant())
+        {
+            result.uLimit = AddConstantLimit(r2hi, r1hi);
+        }
+        return result;
+    }
+
+    // Given two ranges "r1" and "r2", do a Phi merge. If "monotonic" is true,
+    // then ignore the dependent variables.
+    static Range Merge(Range& r1, Range& r2, bool monotonic)
+    {
+        Limit& r1lo = r1.LowerLimit();
+        Limit& r1hi = r1.UpperLimit();
+        Limit& r2lo = r2.LowerLimit();
+        Limit& r2hi = r2.UpperLimit();
+
+        // Take care of lo part.
+        Range result = Limit(Limit::keUnknown);
+        if (r1lo.IsUnknown() || r2lo.IsUnknown())
+        {
+            result.lLimit = Limit(Limit::keUnknown);
+        }
+        // Uninitialized, just copy.
+        else if (r1lo.IsUndef())
+        {
+            result.lLimit = r2lo;
+        }
+        else if (r1lo.IsDependent() || r2lo.IsDependent())
+        {
+            if (monotonic)
+            {
+                result.lLimit = r1lo.IsDependent() ? r2lo : r1lo;
+            }
+            else
+            {
+                result.lLimit = Limit(Limit::keDependent);
+            }
+        }
+
+        // Take care of hi part.
+        if (r1hi.IsUnknown() || r2hi.IsUnknown())
+        {
+            result.uLimit = Limit(Limit::keUnknown);
+        }
+        else if (r1hi.IsUndef())
+        {
+            result.uLimit = r2hi;
+        }
+        else if (r1hi.IsDependent() || r2hi.IsDependent())
+        {
+            if (monotonic)
+            {
+                result.uLimit = r1hi.IsDependent() ? r2hi : r1hi;
+            }
+            else
+            {
+                result.uLimit = Limit(Limit::keDependent);
+            }
+        }
+
+        if (r1lo.IsConstant() && r2lo.IsConstant())
+        {
+            result.lLimit = Limit(Limit::keConstant, min(r1lo.GetConstant(), r2lo.GetConstant()));
+        }
+        if (r1hi.IsConstant() && r2hi.IsConstant())
+        {
+            result.uLimit = Limit(Limit::keConstant, max(r1hi.GetConstant(), r2hi.GetConstant()));
+        }
+        if (r2hi.Equals(r1hi))
+        {
+            result.uLimit = r2hi;
+        }
+        if (r2lo.Equals(r1lo))
+        {
+            result.lLimit = r1lo;
+        }
+        // Widen Upper Limit => Max(k, (a.len + n)) yields (a.len + n),
+        // This is correct if k >= 0 and n >= k, since a.len always >= 0
+        // (a.len + n) could overflow, but the result (a.len + n) also
+        // preserves the overflow.
+        if (r1hi.IsConstant() && r1hi.GetConstant() >= 0 && r2hi.IsBinOpArray() &&
+            r2hi.GetConstant() >= r1hi.GetConstant())
+        {
+            result.uLimit = r2hi;
+        }
+        if (r2hi.IsConstant() && r2hi.GetConstant() >= 0 && r1hi.IsBinOpArray() &&
+            r1hi.GetConstant() >= r2hi.GetConstant())
+        {
+            result.uLimit = r1hi;
+        }
+        if (r1hi.IsBinOpArray() && r2hi.IsBinOpArray() && r1hi.vn == r2hi.vn)
+        {
+            result.uLimit = r1hi;
+            // Widen the upper bound if the other constant is greater.
+            if (r2hi.GetConstant() > r1hi.GetConstant())
+            {
+                result.uLimit = r2hi;
+            }
+        }
+        return result;
+    }
+};
+
+class RangeCheck
+{
+public:
+    // Constructor
+    RangeCheck(Compiler* pCompiler);
+
+    // Location information is used to map where the defs occur in the method.
+    struct Location
+    {
+        BasicBlock* block;
+        GenTreePtr  stmt;
+        GenTreePtr  tree;
+        GenTreePtr  parent;
+        Location(BasicBlock* block, GenTreePtr stmt, GenTreePtr tree, GenTreePtr parent)
+            : block(block), stmt(stmt), tree(tree), parent(parent)
+        {
+        }
+
+    private:
+        Location();
+    };
+
+    typedef SimplerHashTable<GenTreePtr, PtrKeyFuncs<GenTree>, bool, JitSimplerHashBehavior>          OverflowMap;
+    typedef SimplerHashTable<GenTreePtr, PtrKeyFuncs<GenTree>, Range*, JitSimplerHashBehavior>        RangeMap;
+    typedef SimplerHashTable<GenTreePtr, PtrKeyFuncs<GenTree>, BasicBlock*, JitSimplerHashBehavior>   SearchPath;
+    typedef SimplerHashTable<INT64, LargePrimitiveKeyFuncs<INT64>, Location*, JitSimplerHashBehavior> VarToLocMap;
+    typedef SimplerHashTable<INT64, LargePrimitiveKeyFuncs<INT64>, ExpandArrayStack<Location*>*, JitSimplerHashBehavior>
+        VarToLocArrayMap;
+
+    // Generate a hashcode unique for this ssa var.
+    UINT64 HashCode(unsigned lclNum, unsigned ssaNum);
+
+    // Add a location of the definition of ssa var to the location map.
+    // Requires "hash" to be computed using HashCode.
+    // Requires "location" to be the local definition.
+    void SetDef(UINT64 hash, Location* loc);
+
+    // Given a tree node that is a local, return the Location defining the local.
+    Location* GetDef(GenTreePtr tree);
+    Location* GetDef(unsigned lclNum, unsigned ssaNum);
+
+    int GetArrLength(ValueNum vn);
+
+    // Check whether the computed range is within lower and upper bounds. This function
+    // assumes that the lower range is resolved and upper range is symbolic as in an
+    // increasing loop.
+    // TODO-CQ: This is not general enough.
+    bool BetweenBounds(Range& range, int lower, GenTreePtr upper);
+
+    // Given a statement, check if it is a def and add its locations in a map.
+    void MapStmtDefs(const Location& loc);
+
+    // Given the CFG, check if it has defs and add their locations in a map.
+    void MapMethodDefs();
+
+    // Entry point to optimize range checks in the block. Assumes value numbering
+    // and assertion prop phases are completed.
+    void OptimizeRangeChecks();
+
+    // Given a "tree" node, check if it contains array bounds check node and
+    // optimize to remove it, if possible. Requires "stmt" and "block" that
+    // contain the tree.
+    void OptimizeRangeCheck(BasicBlock* block, GenTreePtr stmt, GenTreePtr tree);
+
+    // Given the index expression try to find its range.
+    // The range of a variable depends on its rhs which in turn depends on its constituent variables.
+    // The "path" is the path taken in the search for the rhs' range and its constituents' range.
+    // If "monotonic" is true, the calculations are made more liberally assuming initial values
+    // at phi definitions.
+    Range GetRange(
+        BasicBlock* block, GenTreePtr stmt, GenTreePtr expr, SearchPath* path, bool monotonic DEBUGARG(int indent));
+
+    // Given the local variable, first find the definition of the local and find the range of the rhs.
+    // Helper for GetRange.
+    Range ComputeRangeForLocalDef(
+        BasicBlock* block, GenTreePtr stmt, GenTreePtr expr, SearchPath* path, bool monotonic DEBUGARG(int indent));
+
+    // Compute the range, rather than retrieve a cached value. Helper for GetRange.
+    Range ComputeRange(
+        BasicBlock* block, GenTreePtr stmt, GenTreePtr expr, SearchPath* path, bool monotonic DEBUGARG(int indent));
+
+    // Compute the range for the op1 and op2 for the given binary operator.
+    Range ComputeRangeForBinOp(BasicBlock* block,
+                               GenTreePtr  stmt,
+                               GenTreePtr  op1,
+                               GenTreePtr  op2,
+                               genTreeOps  oper,
+                               SearchPath* path,
+                               bool monotonic DEBUGARG(int indent));
+
+    // Merge assertions from AssertionProp's flags, for the corresponding "phiArg."
+    // Requires "pRange" to contain range that is computed partially.
+    void MergeAssertion(
+        BasicBlock* block, GenTreePtr stmt, GenTreePtr phiArg, SearchPath* path, Range* pRange DEBUGARG(int indent));
+
+    // Inspect the "assertions" and extract assertions about the given "phiArg" and
+    // refine the "pRange" value.
+    void MergeEdgeAssertions(GenTreePtr phiArg, const ASSERT_VALARG_TP assertions, Range* pRange);
+
+    // The maximum possible value of the given "limit." If such a value could not be determined
+    // return "false." For example: ARRLEN_MAX for array length.
+    bool GetLimitMax(Limit& limit, int* pMax);
+
+    // Does the addition of the two limits overflow?
+    bool AddOverflows(Limit& limit1, Limit& limit2);
+
+    // Does the binary operation between the operands overflow? Check recursively.
+    bool DoesBinOpOverflow(BasicBlock* block, GenTreePtr stmt, GenTreePtr op1, GenTreePtr op2, SearchPath* path);
+
+    // Does the phi operands involve an assignment that could overflow?
+    bool DoesPhiOverflow(BasicBlock* block, GenTreePtr stmt, GenTreePtr expr, SearchPath* path);
+
+    // Find the def of the "expr" local and recurse on the arguments if any of them involve a
+    // calculation that overflows.
+    bool DoesVarDefOverflow(BasicBlock* block, GenTreePtr stmt, GenTreePtr expr, SearchPath* path);
+
+    bool ComputeDoesOverflow(BasicBlock* block, GenTreePtr stmt, GenTreePtr expr, SearchPath* path);
+
+    // Does the current "expr" which is a use involve a definition, that overflows.
+    bool DoesOverflow(BasicBlock* block, GenTreePtr stmt, GenTreePtr tree, SearchPath* path);
+
+    // Widen the range by first checking if the induction variable is monotonic. Requires "pRange"
+    // to be partially computed.
+    void Widen(BasicBlock* block, GenTreePtr stmt, GenTreePtr tree, SearchPath* path, Range* pRange);
+
+    // Is the binary operation increasing the value.
+    bool IsBinOpMonotonicallyIncreasing(GenTreePtr op1, GenTreePtr op2, genTreeOps oper, SearchPath* path);
+
+    // Given an "expr" trace its rhs and their definitions to check if all the assignments
+    // are monotonically increasing.
+    bool IsMonotonicallyIncreasing(GenTreePtr tree, SearchPath* path);
+
+    // We allocate a budget to avoid walking long UD chains. When traversing each link in the UD
+    // chain, we decrement the budget. When the budget hits 0, then no more range check optimization
+    // will be applied for the currently compiled method.
+    bool IsOverBudget();
+
+private:
+    GenTreeBoundsChk* m_pCurBndsChk;
+
+    // Get the cached overflow values.
+    OverflowMap* GetOverflowMap();
+    OverflowMap* m_pOverflowMap;
+
+    // Get the cached range values.
+    RangeMap* GetRangeMap();
+    RangeMap* m_pRangeMap;
+
+    bool         m_fMappedDefs;
+    VarToLocMap* m_pDefTable;
+    Compiler*    m_pCompiler;
+
+    // The number of nodes for which range is computed throughout the current method.
+    // When this limit is zero, we have exhausted all the budget to walk the ud-chain.
+    int m_nVisitBudget;
+};
diff --git a/src/jit/rationalize.cpp b/src/jit/rationalize.cpp
new file mode 100644
index 0000000000..03e0c9a27e
--- /dev/null
+++ b/src/jit/rationalize.cpp
@@ -0,0 +1,1056 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+// state carried over the tree walk, to be used in making
+// a splitting decision.
+struct SplitData
+{
+    GenTree*      root; // root stmt of tree being processed
+    BasicBlock*   block;
+    Rationalizer* thisPhase;
+};
+
+//------------------------------------------------------------------------------
+// isNodeCallArg - given a context (stack of parent nodes), determine if the TOS is an arg to a call
+//------------------------------------------------------------------------------
+
+GenTree* isNodeCallArg(ArrayStack<GenTree*>* parentStack)
+{
+    for (int i = 1; // 0 is current node, so start at 1
+         i < parentStack->Height(); i++)
+    {
+        GenTree* node = parentStack->Index(i);
+        switch (node->OperGet())
+        {
+            case GT_LIST:
+            case GT_ARGPLACE:
+                break;
+            case GT_NOP:
+                // Currently there's an issue when the rationalizer performs
+                // the fixup of a call argument: the case is when we remove an
+                // inserted NOP as a parent of a call introduced by fgMorph;
+                // when then the rationalizer removes it, the tree stack in the
+                // walk is not consistent with the node it was just deleted, so the
+                // solution is just to go 1 level deeper.
+                // TODO-Cleanup: This has to be fixed in a proper way: make the rationalizer
+                // correctly modify the evaluation stack when removing treenodes.
+                if (node->gtOp.gtOp1->gtOper == GT_CALL)
+                {
+                    return node->gtOp.gtOp1;
+                }
+                break;
+            case GT_CALL:
+                return node;
+            default:
+                return nullptr;
+        }
+    }
+    return nullptr;
+}
+
+// return op that is the store equivalent of the given load opcode
+genTreeOps storeForm(genTreeOps loadForm)
+{
+    switch (loadForm)
+    {
+        case GT_LCL_VAR:
+            return GT_STORE_LCL_VAR;
+        case GT_LCL_FLD:
+            return GT_STORE_LCL_FLD;
+        case GT_REG_VAR:
+            noway_assert(!"reg vars only supported in classic backend\n");
+            unreached();
+        default:
+            noway_assert(!"not a data load opcode\n");
+            unreached();
+    }
+}
+
+// return op that is the addr equivalent of the given load opcode
+genTreeOps addrForm(genTreeOps loadForm)
+{
+    switch (loadForm)
+    {
+        case GT_LCL_VAR:
+            return GT_LCL_VAR_ADDR;
+        case GT_LCL_FLD:
+            return GT_LCL_FLD_ADDR;
+        default:
+            noway_assert(!"not a data load opcode\n");
+            unreached();
+    }
+}
+
+// return op that is the load equivalent of the given addr opcode
+genTreeOps loadForm(genTreeOps addrForm)
+{
+    switch (addrForm)
+    {
+        case GT_LCL_VAR_ADDR:
+            return GT_LCL_VAR;
+        case GT_LCL_FLD_ADDR:
+            return GT_LCL_FLD;
+        default:
+            noway_assert(!"not a local address opcode\n");
+            unreached();
+    }
+}
+
+// copy the flags determined by mask from src to dst
+void copyFlags(GenTree* dst, GenTree* src, unsigned mask)
+{
+    dst->gtFlags &= ~mask;
+    dst->gtFlags |= (src->gtFlags & mask);
+}
+
+// call args have other pointers to them which must be fixed up if
+// they are replaced
+void Compiler::fgFixupIfCallArg(ArrayStack<GenTree*>* parentStack, GenTree* oldChild, GenTree* newChild)
+{
+    GenTree* parentCall = isNodeCallArg(parentStack);
+    if (!parentCall)
+    {
+        return;
+    }
+
+    // we have replaced an arg, so update pointers in argtable
+    fgFixupArgTabEntryPtr(parentCall, oldChild, newChild);
+}
+
+//------------------------------------------------------------------------
+// fgFixupArgTabEntryPtr: Fixup the fgArgTabEntryPtr of parentCall after
+//                        replacing oldArg with newArg
+//
+// Arguments:
+//    parentCall - a pointer to the parent call node
+//    oldArg     - the original argument node
+//    newArg     - the replacement argument node
+//
+
+void Compiler::fgFixupArgTabEntryPtr(GenTreePtr parentCall, GenTreePtr oldArg, GenTreePtr newArg)
+{
+    assert(parentCall != nullptr);
+    assert(oldArg != nullptr);
+    assert(newArg != nullptr);
+
+    JITDUMP("parent call was :\n");
+    DISPNODE(parentCall);
+
+    JITDUMP("old child was :\n");
+    DISPNODE(oldArg);
+
+    if (oldArg->gtFlags & GTF_LATE_ARG)
+    {
+        newArg->gtFlags |= GTF_LATE_ARG;
+    }
+    else
+    {
+        fgArgTabEntryPtr fp = Compiler::gtArgEntryByNode(parentCall, oldArg);
+        assert(fp->node == oldArg);
+        fp->node = newArg;
+    }
+}
+
+// Rewrite a SIMD indirection as GT_IND(GT_LEA(obj.op1)), or as a simple
+// lclVar if possible.
+//
+// Arguments:
+//    use      - A use reference for a block node
+//    keepBlk  - True if this should remain a block node if it is not a lclVar
+//
+// Return Value:
+//    None.
+//
+// TODO-1stClassStructs: These should be eliminated earlier, once we can handle
+// lclVars in all the places that used to have GT_OBJ.
+//
+void Rationalizer::RewriteSIMDOperand(LIR::Use& use, bool keepBlk)
+{
+#ifdef FEATURE_SIMD
+    // No lowering is needed for non-SIMD nodes, so early out if featureSIMD is not enabled.
+    if (!comp->featureSIMD)
+    {
+        return;
+    }
+
+    GenTree* tree = use.Def();
+    if (!tree->OperIsIndir())
+    {
+        return;
+    }
+    var_types simdType = tree->TypeGet();
+
+    if (!varTypeIsSIMD(simdType))
+    {
+        return;
+    }
+
+    // If the operand of is a GT_ADDR(GT_LCL_VAR) and LclVar is known to be of simdType,
+    // replace obj by GT_LCL_VAR.
+    GenTree* addr = tree->AsIndir()->Addr();
+    if (addr->OperIsLocalAddr() && comp->isAddrOfSIMDType(addr))
+    {
+        BlockRange().Remove(tree);
+
+        addr->SetOper(loadForm(addr->OperGet()));
+        addr->gtType = simdType;
+        use.ReplaceWith(comp, addr);
+    }
+    else if (!keepBlk)
+    {
+        tree->SetOper(GT_IND);
+        tree->gtType = simdType;
+    }
+#endif // FEATURE_SIMD
+}
+
+// RewriteNodeAsCall : Replace the given tree node by a GT_CALL.
+//
+// Arguments:
+//    ppTree      - A pointer-to-a-pointer for the tree node
+//    fgWalkData  - A pointer to tree walk data providing the context
+//    callHnd     - The method handle of the call to be generated
+//    entryPoint  - The method entrypoint of the call to be generated
+//    args        - The argument list of the call to be generated
+//
+// Return Value:
+//    None.
+//
+
+void Rationalizer::RewriteNodeAsCall(GenTree**             use,
+                                     Compiler::fgWalkData* data,
+                                     CORINFO_METHOD_HANDLE callHnd,
+#ifdef FEATURE_READYTORUN_COMPILER
+                                     CORINFO_CONST_LOOKUP entryPoint,
+#endif
+                                     GenTreeArgList* args)
+{
+    GenTreePtr tree          = *use;
+    Compiler*  comp          = data->compiler;
+    SplitData* tmpState      = (SplitData*)data->pCallbackData;
+    GenTreePtr root          = tmpState->root;
+    GenTreePtr treeFirstNode = comp->fgGetFirstNode(tree);
+    GenTreePtr treeLastNode  = tree;
+    GenTreePtr treePrevNode  = treeFirstNode->gtPrev;
+    GenTreePtr treeNextNode  = treeLastNode->gtNext;
+
+    // Create the call node
+    GenTreeCall* call = comp->gtNewCallNode(CT_USER_FUNC, callHnd, tree->gtType, args);
+    call              = comp->fgMorphArgs(call);
+#ifdef FEATURE_READYTORUN_COMPILER
+    call->gtCall.setEntryPoint(entryPoint);
+#endif
+
+    // Replace "tree" with "call"
+    *use = call;
+
+    // Rebuild the evaluation order.
+    comp->gtSetStmtInfo(root);
+
+    // Rebuild the execution order.
+    comp->fgSetTreeSeq(call, treePrevNode);
+
+    // Restore linear-order Prev and Next for "call".
+    if (treePrevNode)
+    {
+        treeFirstNode         = comp->fgGetFirstNode(call);
+        treeFirstNode->gtPrev = treePrevNode;
+        treePrevNode->gtNext  = treeFirstNode;
+    }
+    else
+    {
+        // Update the linear oder start of "root" if treeFirstNode
+        // appears to have replaced the original first node.
+        assert(treeFirstNode == root->gtStmt.gtStmtList);
+        root->gtStmt.gtStmtList = comp->fgGetFirstNode(call);
+    }
+
+    if (treeNextNode)
+    {
+        treeLastNode         = call;
+        treeLastNode->gtNext = treeNextNode;
+        treeNextNode->gtPrev = treeLastNode;
+    }
+
+    comp->fgFixupIfCallArg(data->parentStack, tree, call);
+
+    // Propagate flags of "call" to its parents.
+    // 0 is current node, so start at 1
+    for (int i = 1; i < data->parentStack->Height(); i++)
+    {
+        GenTree* node = data->parentStack->Index(i);
+        node->gtFlags |= GTF_CALL;
+        node->gtFlags |= call->gtFlags & GTF_ALL_EFFECT;
+    }
+
+    // Since "tree" is replaced with "call", pop "tree" node (i.e the current node)
+    // and replace it with "call" on parent stack.
+    assert(data->parentStack->Top() == tree);
+    (void)data->parentStack->Pop();
+    data->parentStack->Push(call);
+}
+
+// RewriteIntrinsicAsUserCall : Rewrite an intrinsic operator as a GT_CALL to the original method.
+//
+// Arguments:
+//    ppTree      - A pointer-to-a-pointer for the intrinsic node
+//    fgWalkData  - A pointer to tree walk data providing the context
+//
+// Return Value:
+//    None.
+//
+// Some intrinsics, such as operation Sqrt, are rewritten back to calls, and some are not.
+// The ones that are not being rewritten here must be handled in Codegen.
+// Conceptually, the lower is the right place to do the rewrite. Keeping it in rationalization is
+// mainly for throughput issue.
+
+void Rationalizer::RewriteIntrinsicAsUserCall(GenTree** use, Compiler::fgWalkData* data)
+{
+    GenTreeIntrinsic* intrinsic = (*use)->AsIntrinsic();
+    Compiler*         comp      = data->compiler;
+
+    GenTreeArgList* args;
+    if (intrinsic->gtOp.gtOp2 == nullptr)
+    {
+        args = comp->gtNewArgList(intrinsic->gtGetOp1());
+    }
+    else
+    {
+        args = comp->gtNewArgList(intrinsic->gtGetOp1(), intrinsic->gtGetOp2());
+    }
+
+    RewriteNodeAsCall(use, data, intrinsic->gtMethodHandle,
+#ifdef FEATURE_READYTORUN_COMPILER
+                      intrinsic->gtEntryPoint,
+#endif
+                      args);
+}
+
+// FixupIfSIMDLocal: Fixup the type of a lclVar tree, as needed, if it is a SIMD type vector.
+//
+// Arguments:
+//    comp      - the Compiler object.
+//    tree      - the GenTreeLclVarCommon tree to be fixed up.
+//
+// Return Value:
+//    None.
+//
+// TODO-1stClassStructs: This is now only here to preserve existing behavior. It is actually not
+// desirable to change the lclFld nodes back to TYP_SIMD (it will cause them to be loaded
+// into a vector register, and then moved to an int register).
+
+void Rationalizer::FixupIfSIMDLocal(GenTreeLclVarCommon* node)
+{
+#ifdef FEATURE_SIMD
+    if (!comp->featureSIMD)
+    {
+        return;
+    }
+
+    LclVarDsc* varDsc = &(comp->lvaTable[node->gtLclNum]);
+
+    // Don't mark byref of SIMD vector as a SIMD type.
+    // Note that struct args though marked as lvIsSIMD=true,
+    // the tree node representing such an arg should not be
+    // marked as a SIMD type, since it is a byref of a SIMD type.
+    if (!varTypeIsSIMD(varDsc))
+    {
+        return;
+    }
+    switch (node->OperGet())
+    {
+        default:
+            // Nothing to do for most tree nodes.
+            break;
+
+        case GT_LCL_FLD:
+            // We may see a lclFld used for pointer-sized structs that have been morphed, in which
+            // case we can change it to GT_LCL_VAR.
+            // However, we may also see a lclFld with FieldSeqStore::NotAField() for structs that can't
+            // be analyzed, e.g. those with overlapping fields such as the IL implementation of Vector<T>.
+            if ((node->AsLclFld()->gtFieldSeq == FieldSeqStore::NotAField()) && (node->AsLclFld()->gtLclOffs == 0) &&
+                (node->gtType == TYP_I_IMPL) && (varDsc->lvExactSize == TARGET_POINTER_SIZE))
+            {
+                node->SetOper(GT_LCL_VAR);
+                node->gtFlags &= ~(GTF_VAR_USEASG);
+            }
+            else
+            {
+                // If we access a field of a SIMD lclVar via GT_LCL_FLD, it cannot have been
+                // independently promoted.
+                assert(comp->lvaGetPromotionType(varDsc) != Compiler::PROMOTION_TYPE_INDEPENDENT);
+                return;
+            }
+            break;
+        case GT_STORE_LCL_FLD:
+            assert(node->gtType == TYP_I_IMPL);
+            node->SetOper(GT_STORE_LCL_VAR);
+            node->gtFlags &= ~(GTF_VAR_USEASG);
+            break;
+    }
+    unsigned simdSize = (unsigned int)roundUp(varDsc->lvExactSize, TARGET_POINTER_SIZE);
+    node->gtType      = comp->getSIMDTypeForSize(simdSize);
+#endif // FEATURE_SIMD
+}
+
+#ifdef DEBUG
+
+void Rationalizer::ValidateStatement(GenTree* tree, BasicBlock* block)
+{
+    assert(tree->gtOper == GT_STMT);
+    DBEXEC(TRUE, JitTls::GetCompiler()->fgDebugCheckNodeLinks(block, tree));
+}
+
+// sanity checks that apply to all kinds of IR
+void Rationalizer::SanityCheck()
+{
+    // TODO: assert(!IsLIR());
+    BasicBlock* block;
+    foreach_block(comp, block)
+    {
+        for (GenTree* statement = block->bbTreeList; statement != nullptr; statement = statement->gtNext)
+        {
+            ValidateStatement(statement, block);
+
+            for (GenTree* tree = statement->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+            {
+                // QMARK nodes should have been removed before this phase.
+                assert(tree->OperGet() != GT_QMARK);
+
+                if (tree->OperGet() == GT_ASG)
+                {
+                    if (tree->gtGetOp1()->OperGet() == GT_LCL_VAR)
+                    {
+                        assert(tree->gtGetOp1()->gtFlags & GTF_VAR_DEF);
+                    }
+                    else if (tree->gtGetOp2()->OperGet() == GT_LCL_VAR)
+                    {
+                        assert(!(tree->gtGetOp2()->gtFlags & GTF_VAR_DEF));
+                    }
+                }
+            }
+        }
+    }
+}
+
+void Rationalizer::SanityCheckRational()
+{
+    // TODO-Cleanup : check that the tree is rational here
+    // then do normal checks
+    SanityCheck();
+}
+
+#endif // DEBUG
+
+static void RewriteAssignmentIntoStoreLclCore(GenTreeOp* assignment,
+                                              GenTree*   location,
+                                              GenTree*   value,
+                                              genTreeOps locationOp)
+{
+    assert(assignment != nullptr);
+    assert(assignment->OperGet() == GT_ASG);
+    assert(location != nullptr);
+    assert(value != nullptr);
+
+    genTreeOps storeOp = storeForm(locationOp);
+
+#ifdef DEBUG
+    JITDUMP("rewriting asg(%s, X) to %s(X)\n", GenTree::NodeName(locationOp), GenTree::NodeName(storeOp));
+#endif // DEBUG
+
+    assignment->SetOper(storeOp);
+    GenTreeLclVarCommon* store = assignment->AsLclVarCommon();
+
+    GenTreeLclVarCommon* var = location->AsLclVarCommon();
+    store->SetLclNum(var->gtLclNum);
+    store->SetSsaNum(var->gtSsaNum);
+
+    if (locationOp == GT_LCL_FLD)
+    {
+        store->gtLclFld.gtLclOffs  = var->gtLclFld.gtLclOffs;
+        store->gtLclFld.gtFieldSeq = var->gtLclFld.gtFieldSeq;
+    }
+
+    copyFlags(store, var, GTF_LIVENESS_MASK);
+    store->gtFlags &= ~GTF_REVERSE_OPS;
+
+    store->gtType = var->TypeGet();
+    store->gtOp1  = value;
+
+    DISPNODE(store);
+    JITDUMP("\n");
+}
+
+void Rationalizer::RewriteAssignmentIntoStoreLcl(GenTreeOp* assignment)
+{
+    assert(assignment != nullptr);
+    assert(assignment->OperGet() == GT_ASG);
+
+    GenTree* location = assignment->gtGetOp1();
+    GenTree* value    = assignment->gtGetOp2();
+
+    RewriteAssignmentIntoStoreLclCore(assignment, location, value, location->OperGet());
+}
+
+void Rationalizer::RewriteAssignment(LIR::Use& use)
+{
+    assert(use.IsInitialized());
+
+    GenTreeOp* assignment = use.Def()->AsOp();
+    assert(assignment->OperGet() == GT_ASG);
+
+    GenTree* location = assignment->gtGetOp1();
+    GenTree* value    = assignment->gtGetOp2();
+
+    genTreeOps locationOp = location->OperGet();
+
+#ifdef FEATURE_SIMD
+    if (varTypeIsSIMD(location) && assignment->OperIsInitBlkOp())
+    {
+        if (location->OperGet() == GT_LCL_VAR)
+        {
+            var_types simdType = location->TypeGet();
+            GenTree*  initVal  = assignment->gtOp.gtOp2;
+            var_types baseType = comp->getBaseTypeOfSIMDLocal(location);
+            if (baseType != TYP_UNKNOWN)
+            {
+                GenTreeSIMD* simdTree = new (comp, GT_SIMD)
+                    GenTreeSIMD(simdType, initVal, SIMDIntrinsicInit, baseType, genTypeSize(simdType));
+                assignment->gtOp.gtOp2 = simdTree;
+                value                  = simdTree;
+                initVal->gtNext        = simdTree;
+                simdTree->gtPrev       = initVal;
+
+                simdTree->gtNext = location;
+                location->gtPrev = simdTree;
+            }
+        }
+        else
+        {
+            assert(location->OperIsBlk());
+        }
+    }
+#endif // FEATURE_SIMD
+
+    switch (locationOp)
+    {
+        case GT_LCL_VAR:
+        case GT_LCL_FLD:
+        case GT_REG_VAR:
+        case GT_PHI_ARG:
+            RewriteAssignmentIntoStoreLclCore(assignment, location, value, locationOp);
+            BlockRange().Remove(location);
+            break;
+
+        case GT_IND:
+        {
+            GenTreeStoreInd* store =
+                new (comp, GT_STOREIND) GenTreeStoreInd(location->TypeGet(), location->gtGetOp1(), value);
+
+            copyFlags(store, assignment, GTF_ALL_EFFECT);
+            copyFlags(store, location, GTF_IND_FLAGS);
+
+            if (assignment->IsReverseOp())
+            {
+                store->gtFlags |= GTF_REVERSE_OPS;
+            }
+
+            // TODO: JIT dump
+
+            // Remove the GT_IND node and replace the assignment node with the store
+            BlockRange().Remove(location);
+            BlockRange().InsertBefore(assignment, store);
+            use.ReplaceWith(comp, store);
+            BlockRange().Remove(assignment);
+        }
+        break;
+
+        case GT_CLS_VAR:
+        {
+            location->SetOper(GT_CLS_VAR_ADDR);
+            location->gtType = TYP_BYREF;
+
+            assignment->SetOper(GT_STOREIND);
+
+            // TODO: JIT dump
+        }
+        break;
+
+        case GT_BLK:
+        case GT_OBJ:
+        case GT_DYN_BLK:
+        {
+            assert(varTypeIsStruct(location));
+            GenTreeBlk* storeBlk = location->AsBlk();
+            genTreeOps  storeOper;
+            switch (location->gtOper)
+            {
+                case GT_BLK:
+                    storeOper = GT_STORE_BLK;
+                    break;
+                case GT_OBJ:
+                    storeOper = GT_STORE_OBJ;
+                    break;
+                case GT_DYN_BLK:
+                    storeOper = GT_STORE_DYN_BLK;
+                    break;
+                default:
+                    unreached();
+            }
+            JITDUMP("Rewriting GT_ASG(%s(X), Y) to %s(X,Y):\n", GenTree::NodeName(location->gtOper),
+                    GenTree::NodeName(storeOper));
+            storeBlk->gtOper = storeOper;
+            storeBlk->gtFlags &= ~GTF_DONT_CSE;
+            storeBlk->gtFlags |= (assignment->gtFlags & (GTF_ALL_EFFECT | GTF_REVERSE_OPS | GTF_BLK_VOLATILE |
+                                                         GTF_BLK_UNALIGNED | GTF_BLK_INIT | GTF_DONT_CSE));
+            storeBlk->gtBlk.Data() = value;
+
+            // Replace the assignment node with the store
+            use.ReplaceWith(comp, storeBlk);
+            BlockRange().Remove(assignment);
+            DISPTREERANGE(BlockRange(), use.Def());
+            JITDUMP("\n");
+        }
+        break;
+
+        default:
+            unreached();
+            break;
+    }
+}
+
+void Rationalizer::RewriteAddress(LIR::Use& use)
+{
+    assert(use.IsInitialized());
+
+    GenTreeUnOp* address = use.Def()->AsUnOp();
+    assert(address->OperGet() == GT_ADDR);
+
+    GenTree*   location   = address->gtGetOp1();
+    genTreeOps locationOp = location->OperGet();
+
+    if (location->IsLocal())
+    {
+// We are changing the child from GT_LCL_VAR TO GT_LCL_VAR_ADDR.
+// Therefore gtType of the child needs to be changed to a TYP_BYREF
+#ifdef DEBUG
+        if (locationOp == GT_LCL_VAR)
+        {
+            JITDUMP("Rewriting GT_ADDR(GT_LCL_VAR) to GT_LCL_VAR_ADDR:\n");
+        }
+        else
+        {
+            assert(locationOp == GT_LCL_FLD);
+            JITDUMP("Rewriting GT_ADDR(GT_LCL_FLD) to GT_LCL_FLD_ADDR:\n");
+        }
+#endif // DEBUG
+
+        location->SetOper(addrForm(locationOp));
+        location->gtType = TYP_BYREF;
+        copyFlags(location, address, GTF_ALL_EFFECT);
+
+        use.ReplaceWith(comp, location);
+        BlockRange().Remove(address);
+    }
+    else if (locationOp == GT_CLS_VAR)
+    {
+        location->SetOper(GT_CLS_VAR_ADDR);
+        location->gtType = TYP_BYREF;
+        copyFlags(location, address, GTF_ALL_EFFECT);
+
+        use.ReplaceWith(comp, location);
+        BlockRange().Remove(address);
+
+        JITDUMP("Rewriting GT_ADDR(GT_CLS_VAR) to GT_CLS_VAR_ADDR:\n");
+    }
+    else if (location->OperIsIndir())
+    {
+        use.ReplaceWith(comp, location->gtGetOp1());
+        BlockRange().Remove(location);
+        BlockRange().Remove(address);
+
+        JITDUMP("Rewriting GT_ADDR(GT_IND(X)) to X:\n");
+    }
+
+    DISPTREERANGE(BlockRange(), use.Def());
+    JITDUMP("\n");
+}
+
+Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, ArrayStack<GenTree*>& parentStack)
+{
+    assert(useEdge != nullptr);
+
+    GenTree* node = *useEdge;
+    assert(node != nullptr);
+
+#ifdef DEBUG
+    const bool isLateArg = (node->gtFlags & GTF_LATE_ARG) != 0;
+#endif
+
+    // First, remove any preceeding GT_LIST nodes, which are not otherwise visited by the tree walk.
+    //
+    // NOTE: GT_LIST nodes that are used as aggregates, by block ops, and by phi nodes will in fact be visited.
+    for (GenTree* prev = node->gtPrev;
+        prev != nullptr && prev->OperGet() == GT_LIST && !(prev->AsArgList()->IsAggregate());
+        prev = node->gtPrev)
+    {
+        BlockRange().Remove(prev);
+    }
+
+    // In addition, remove the current node if it is a GT_LIST node that is not an aggregate.
+    if (node->OperGet() == GT_LIST)
+    {
+        GenTreeArgList* list = node->AsArgList();
+        if (!list->IsAggregate())
+        {
+            BlockRange().Remove(list);
+        }
+        return Compiler::WALK_CONTINUE;
+    }
+
+    LIR::Use use;
+    if (parentStack.Height() < 2)
+    {
+        use = LIR::Use::GetDummyUse(BlockRange(), *useEdge);
+    }
+    else
+    {
+        use = LIR::Use(BlockRange(), useEdge, parentStack.Index(1));
+    }
+
+    assert(node == use.Def());
+    switch (node->OperGet())
+    {
+        case GT_ASG:
+            RewriteAssignment(use);
+            break;
+
+        case GT_BOX:
+            // GT_BOX at this level just passes through so get rid of it
+            use.ReplaceWith(comp, node->gtGetOp1());
+            BlockRange().Remove(node);
+            break;
+
+        case GT_ADDR:
+            RewriteAddress(use);
+            break;
+
+        case GT_NOP:
+            // fgMorph sometimes inserts NOP nodes between defs and uses
+            // supposedly 'to prevent constant folding'. In this case, remove the
+            // NOP.
+            if (node->gtGetOp1() != nullptr)
+            {
+                use.ReplaceWith(comp, node->gtGetOp1());
+                BlockRange().Remove(node);
+            }
+            break;
+
+        case GT_COMMA:
+        {
+            GenTree* op1 = node->gtGetOp1();
+            if ((op1->gtFlags & GTF_ALL_EFFECT) == 0)
+            {
+                // The LHS has no side effects. Remove it.
+                bool               isClosed    = false;
+                unsigned           sideEffects = 0;
+                LIR::ReadOnlyRange lhsRange    = BlockRange().GetTreeRange(op1, &isClosed, &sideEffects);
+
+                // None of the transforms performed herein violate tree order, so these
+                // should always be true.
+                assert(isClosed);
+                assert((sideEffects & GTF_ALL_EFFECT) == 0);
+
+                BlockRange().Delete(comp, m_block, std::move(lhsRange));
+            }
+
+            GenTree* replacement = node->gtGetOp2();
+            if (!use.IsDummyUse())
+            {
+                use.ReplaceWith(comp, replacement);
+            }
+            else
+            {
+                // This is a top-level comma. If the RHS has no side effects we can remove
+                // it as well.
+                if ((replacement->gtFlags & GTF_ALL_EFFECT) == 0)
+                {
+                    bool               isClosed    = false;
+                    unsigned           sideEffects = 0;
+                    LIR::ReadOnlyRange rhsRange    = BlockRange().GetTreeRange(replacement, &isClosed, &sideEffects);
+
+                    // None of the transforms performed herein violate tree order, so these
+                    // should always be true.
+                    assert(isClosed);
+                    assert((sideEffects & GTF_ALL_EFFECT) == 0);
+
+                    BlockRange().Delete(comp, m_block, std::move(rhsRange));
+                }
+            }
+
+            BlockRange().Remove(node);
+        }
+        break;
+
+        case GT_ARGPLACE:
+            // Remove argplace and list nodes from the execution order.
+            //
+            // TODO: remove phi args and phi nodes as well?
+            BlockRange().Remove(node);
+            break;
+
+#ifdef _TARGET_XARCH_
+        case GT_CLS_VAR:
+        {
+            // Class vars that are the target of an assignment will get rewritten into
+            // GT_STOREIND(GT_CLS_VAR_ADDR, val) by RewriteAssignment. This check is
+            // not strictly necessary--the GT_IND(GT_CLS_VAR_ADDR) pattern that would
+            // otherwise be generated would also be picked up by RewriteAssignment--but
+            // skipping the rewrite here saves an allocation and a bit of extra work.
+            const bool isLHSOfAssignment = (use.User()->OperGet() == GT_ASG) && (use.User()->gtGetOp1() == node);
+            if (!isLHSOfAssignment)
+            {
+                GenTree* ind = comp->gtNewOperNode(GT_IND, node->TypeGet(), node);
+
+                node->SetOper(GT_CLS_VAR_ADDR);
+                node->gtType = TYP_BYREF;
+
+                BlockRange().InsertAfter(node, ind);
+                use.ReplaceWith(comp, ind);
+
+                // TODO: JIT dump
+            }
+        }
+        break;
+#endif // _TARGET_XARCH_
+
+        case GT_INTRINSIC:
+            // Non-target intrinsics should have already been rewritten back into user calls.
+            assert(Compiler::IsTargetIntrinsic(node->gtIntrinsic.gtIntrinsicId));
+            break;
+
+#ifdef FEATURE_SIMD
+        case GT_BLK:
+        case GT_OBJ:
+        {
+            // TODO-1stClassStructs: These should have been transformed to GT_INDs, but in order
+            // to preserve existing behavior, we will keep this as a block node if this is the
+            // lhs of a block assignment, and either:
+            // - It is a "generic" TYP_STRUCT assignment, OR
+            // - It is an initblk, OR
+            // - Neither the lhs or rhs are known to be of SIMD type.
+
+            GenTree* parent  = use.User();
+            bool     keepBlk = false;
+            if ((parent->OperGet() == GT_ASG) && (node == parent->gtGetOp1()))
+            {
+                if ((node->TypeGet() == TYP_STRUCT) || parent->OperIsInitBlkOp())
+                {
+                    keepBlk = true;
+                }
+                else if (!comp->isAddrOfSIMDType(node->AsBlk()->Addr()))
+                {
+                    GenTree* dataSrc = parent->gtGetOp2();
+                    if (!dataSrc->IsLocal() && (dataSrc->OperGet() != GT_SIMD))
+                    {
+                        noway_assert(dataSrc->OperIsIndir());
+                        keepBlk = !comp->isAddrOfSIMDType(dataSrc->AsIndir()->Addr());
+                    }
+                }
+            }
+            RewriteSIMDOperand(use, keepBlk);
+        }
+        break;
+
+        case GT_LCL_FLD:
+        case GT_STORE_LCL_FLD:
+            // TODO-1stClassStructs: Eliminate this.
+            FixupIfSIMDLocal(node->AsLclVarCommon());
+            break;
+
+        case GT_SIMD:
+        {
+            noway_assert(comp->featureSIMD);
+            GenTreeSIMD* simdNode = node->AsSIMD();
+            unsigned     simdSize = simdNode->gtSIMDSize;
+            var_types    simdType = comp->getSIMDTypeForSize(simdSize);
+
+            // TODO-1stClassStructs: This should be handled more generally for enregistered or promoted
+            // structs that are passed or returned in a different register type than their enregistered
+            // type(s).
+            if (simdNode->gtType == TYP_I_IMPL && simdNode->gtSIMDSize == TARGET_POINTER_SIZE)
+            {
+                // This happens when it is consumed by a GT_RET_EXPR.
+                // It can only be a Vector2f or Vector2i.
+                assert(genTypeSize(simdNode->gtSIMDBaseType) == 4);
+                simdNode->gtType = TYP_SIMD8;
+            }
+            // Certain SIMD trees require rationalizing.
+            if (simdNode->gtSIMD.gtSIMDIntrinsicID == SIMDIntrinsicInitArray)
+            {
+                // Rewrite this as an explicit load.
+                JITDUMP("Rewriting GT_SIMD array init as an explicit load:\n");
+                unsigned int baseTypeSize = genTypeSize(simdNode->gtSIMDBaseType);
+                GenTree*     address = new (comp, GT_LEA) GenTreeAddrMode(TYP_BYREF, simdNode->gtOp1, simdNode->gtOp2,
+                                                                      baseTypeSize, offsetof(CORINFO_Array, u1Elems));
+                GenTree* ind = comp->gtNewOperNode(GT_IND, simdType, address);
+
+                BlockRange().InsertBefore(simdNode, address, ind);
+                use.ReplaceWith(comp, ind);
+                BlockRange().Remove(simdNode);
+
+                DISPTREERANGE(BlockRange(), use.Def());
+                JITDUMP("\n");
+            }
+            else
+            {
+                // This code depends on the fact that NONE of the SIMD intrinsics take vector operands
+                // of a different width.  If that assumption changes, we will EITHER have to make these type
+                // transformations during importation, and plumb the types all the way through the JIT,
+                // OR add a lot of special handling here.
+                GenTree* op1 = simdNode->gtGetOp1();
+                if (op1 != nullptr && op1->gtType == TYP_STRUCT)
+                {
+                    op1->gtType = simdType;
+                }
+
+                GenTree* op2 = simdNode->gtGetOp2();
+                if (op2 != nullptr && op2->gtType == TYP_STRUCT)
+                {
+                    op2->gtType = simdType;
+                }
+            }
+        }
+        break;
+#endif // FEATURE_SIMD
+
+        default:
+            break;
+    }
+
+    // Do some extra processing on top-level nodes to remove unused local reads.
+    if (use.IsDummyUse() && node->OperIsLocalRead())
+    {
+        assert((node->gtFlags & GTF_ALL_EFFECT) == 0);
+
+        comp->lvaDecRefCnts(node);
+        BlockRange().Remove(node);
+    }
+
+    assert(isLateArg == ((node->gtFlags & GTF_LATE_ARG) != 0));
+
+    return Compiler::WALK_CONTINUE;
+}
+
+void Rationalizer::DoPhase()
+{
+    DBEXEC(TRUE, SanityCheck());
+
+    comp->compCurBB = nullptr;
+    comp->fgOrder   = Compiler::FGOrderLinear;
+
+    BasicBlock* firstBlock = comp->fgFirstBB;
+
+    for (BasicBlock* block = comp->fgFirstBB; block != nullptr; block = block->bbNext)
+    {
+        comp->compCurBB = block;
+        m_block         = block;
+
+        // Establish the first and last nodes for the block. This is necessary in order for the LIR
+        // utilities that hang off the BasicBlock type to work correctly.
+        GenTreeStmt* firstStatement = block->firstStmt();
+        if (firstStatement == nullptr)
+        {
+            // No statements in this block; skip it.
+            block->MakeLIR(nullptr, nullptr);
+            continue;
+        }
+
+        GenTreeStmt* lastStatement = block->lastStmt();
+
+        // Rewrite intrinsics that are not supported by the target back into user calls.
+        // This needs to be done before the transition to LIR because it relies on the use
+        // of fgMorphArgs, which is designed to operate on HIR. Once this is done for a
+        // particular statement, link that statement's nodes into the current basic block.
+        //
+        // This walk also clears the GTF_VAR_USEDEF bit on locals, which is not necessary
+        // in the backend.
+        GenTree* lastNodeInPreviousStatement = nullptr;
+        for (GenTreeStmt* statement = firstStatement; statement != nullptr; statement = statement->getNextStmt())
+        {
+            assert(statement->gtStmtList != nullptr);
+            assert(statement->gtStmtList->gtPrev == nullptr);
+            assert(statement->gtStmtExpr != nullptr);
+            assert(statement->gtStmtExpr->gtNext == nullptr);
+
+            SplitData splitData;
+            splitData.root      = statement;
+            splitData.block     = block;
+            splitData.thisPhase = this;
+
+            comp->fgWalkTreePost(&statement->gtStmtExpr,
+                                 [](GenTree** use, Compiler::fgWalkData* walkData) -> Compiler::fgWalkResult {
+                                     GenTree* node = *use;
+                                     if (node->OperGet() == GT_INTRINSIC &&
+                                         Compiler::IsIntrinsicImplementedByUserCall(node->gtIntrinsic.gtIntrinsicId))
+                                     {
+                                         RewriteIntrinsicAsUserCall(use, walkData);
+                                     }
+                                     else if (node->OperIsLocal())
+                                     {
+                                         node->gtFlags &= ~GTF_VAR_USEDEF;
+                                     }
+
+                                     return Compiler::WALK_CONTINUE;
+                                 },
+                                 &splitData, true);
+
+            GenTree* firstNodeInStatement = statement->gtStmtList;
+            if (lastNodeInPreviousStatement != nullptr)
+            {
+                lastNodeInPreviousStatement->gtNext = firstNodeInStatement;
+            }
+
+            firstNodeInStatement->gtPrev = lastNodeInPreviousStatement;
+            lastNodeInPreviousStatement  = statement->gtStmtExpr;
+        }
+
+        block->MakeLIR(firstStatement->gtStmtList, lastStatement->gtStmtExpr);
+
+        // Rewrite HIR nodes into LIR nodes.
+        for (GenTreeStmt *statement = firstStatement, *nextStatement; statement != nullptr; statement = nextStatement)
+        {
+            nextStatement = statement->getNextStmt();
+
+            // If this statement has correct offset information, change it into an IL offset
+            // node and insert it into the LIR.
+            if (statement->gtStmtILoffsx != BAD_IL_OFFSET)
+            {
+                assert(!statement->IsPhiDefnStmt());
+                statement->SetOper(GT_IL_OFFSET);
+                statement->gtNext = nullptr;
+                statement->gtPrev = nullptr;
+
+                BlockRange().InsertBefore(statement->gtStmtList, statement);
+            }
+
+            m_statement = statement;
+            comp->fgWalkTreePost(&statement->gtStmtExpr,
+                                 [](GenTree** use, Compiler::fgWalkData* walkData) -> Compiler::fgWalkResult {
+                                     return reinterpret_cast<Rationalizer*>(walkData->pCallbackData)
+                                         ->RewriteNode(use, *walkData->parentStack);
+                                 },
+                                 this, true);
+        }
+
+        assert(BlockRange().CheckLIR(comp));
+    }
+
+    comp->compRationalIRForm = true;
+}
diff --git a/src/jit/rationalize.h b/src/jit/rationalize.h
new file mode 100644
index 0000000000..9b15fe4871
--- /dev/null
+++ b/src/jit/rationalize.h
@@ -0,0 +1,67 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//===============================================================================
+#include "phase.h"
+
+class Rationalizer : public Phase
+{
+private:
+    BasicBlock*  m_block;
+    GenTreeStmt* m_statement;
+
+public:
+    Rationalizer(Compiler* comp);
+
+#ifdef DEBUG
+    static void ValidateStatement(GenTree* tree, BasicBlock* block);
+
+    // general purpose sanity checking of de facto standard GenTree
+    void SanityCheck();
+
+    // sanity checking of rationalized IR
+    void SanityCheckRational();
+
+#endif // DEBUG
+
+    virtual void DoPhase() override;
+
+    static void RewriteAssignmentIntoStoreLcl(GenTreeOp* assignment);
+    static void MorphAsgIntoStoreObj(Compiler::fgWalkData* data, GenTreeStmt* stmt, GenTree** ppTree);
+
+private:
+    inline LIR::Range& BlockRange() const
+    {
+        return LIR::AsRange(m_block);
+    }
+
+    // SIMD related
+    void RewriteSIMDOperand(LIR::Use& use, bool keepBlk);
+    void FixupIfSIMDLocal(GenTreeLclVarCommon* node);
+
+    // Intrinsic related transformations
+    static void RewriteNodeAsCall(GenTreePtr*           ppTree,
+                                  Compiler::fgWalkData* data,
+                                  CORINFO_METHOD_HANDLE callHnd,
+#ifdef FEATURE_READYTORUN_COMPILER
+                                  CORINFO_CONST_LOOKUP entryPoint,
+#endif
+                                  GenTreeArgList* args);
+
+    static void RewriteIntrinsicAsUserCall(GenTreePtr* ppTree, Compiler::fgWalkData* data);
+
+    // Other transformations
+    void RewriteAssignment(LIR::Use& use);
+    void RewriteAddress(LIR::Use& use);
+
+    // Root visitor
+    Compiler::fgWalkResult RewriteNode(GenTree** useEdge, ArrayStack<GenTree*>& parents);
+};
+
+inline Rationalizer::Rationalizer(Compiler* _comp) : Phase(_comp, "IR Rationalize", PHASE_RATIONALIZE)
+{
+#ifdef DEBUG
+    comp->compNumStatementLinksTraversed = 0;
+#endif
+}
diff --git a/src/jit/regalloc.cpp b/src/jit/regalloc.cpp
new file mode 100644
index 0000000000..9dd7299906
--- /dev/null
+++ b/src/jit/regalloc.cpp
@@ -0,0 +1,6841 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                           RegAlloc                                        XX
+XX                                                                           XX
+XX  Does the register allocation and puts the remaining lclVars on the stack XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+#include "regalloc.h"
+
+#if FEATURE_FP_REGALLOC
+Compiler::enumConfigRegisterFP Compiler::raConfigRegisterFP()
+{
+    DWORD val = JitConfig.JitRegisterFP();
+
+    return (enumConfigRegisterFP)(val & 0x3);
+}
+#endif // FEATURE_FP_REGALLOC
+
+regMaskTP Compiler::raConfigRestrictMaskFP()
+{
+    regMaskTP result = RBM_NONE;
+
+#if FEATURE_FP_REGALLOC
+    switch (raConfigRegisterFP())
+    {
+        case CONFIG_REGISTER_FP_NONE:
+            result = RBM_NONE;
+            break;
+        case CONFIG_REGISTER_FP_CALLEE_TRASH:
+            result = RBM_FLT_CALLEE_TRASH;
+            break;
+        case CONFIG_REGISTER_FP_CALLEE_SAVED:
+            result = RBM_FLT_CALLEE_SAVED;
+            break;
+        case CONFIG_REGISTER_FP_FULL:
+            result = RBM_ALLFLOAT;
+            break;
+    }
+#endif
+
+    return result;
+}
+
+#ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
+
+#if DOUBLE_ALIGN
+DWORD Compiler::getCanDoubleAlign()
+{
+#ifdef DEBUG
+    if (compStressCompile(STRESS_DBL_ALN, 20))
+        return MUST_DOUBLE_ALIGN;
+
+    return JitConfig.JitDoubleAlign();
+#else
+    return DEFAULT_DOUBLE_ALIGN;
+#endif
+}
+#endif // DOUBLE_ALIGN
+
+void Compiler::raInit()
+{
+#if FEATURE_STACK_FP_X87
+    /* We have not assigned any FP variables to registers yet */
+
+    VarSetOps::AssignNoCopy(this, optAllFPregVars, VarSetOps::UninitVal());
+#endif
+    codeGen->intRegState.rsIsFloat   = false;
+    codeGen->floatRegState.rsIsFloat = true;
+
+    rpReverseEBPenreg = false;
+    rpAsgVarNum       = -1;
+    rpPassesMax       = 6;
+    rpPassesPessimize = rpPassesMax - 3;
+    if (opts.compDbgCode)
+    {
+        rpPassesMax++;
+    }
+    rpStkPredict            = (unsigned)-1;
+    rpFrameType             = FT_NOT_SET;
+    rpLostEnreg             = false;
+    rpMustCreateEBPCalled   = false;
+    rpRegAllocDone          = false;
+    rpMaskPInvokeEpilogIntf = RBM_NONE;
+
+    rpPredictMap[PREDICT_NONE] = RBM_NONE;
+    rpPredictMap[PREDICT_ADDR] = RBM_NONE;
+
+#if FEATURE_FP_REGALLOC
+    rpPredictMap[PREDICT_REG]         = RBM_ALLINT | RBM_ALLFLOAT;
+    rpPredictMap[PREDICT_SCRATCH_REG] = RBM_ALLINT | RBM_ALLFLOAT;
+#else
+    rpPredictMap[PREDICT_REG]         = RBM_ALLINT;
+    rpPredictMap[PREDICT_SCRATCH_REG] = RBM_ALLINT;
+#endif
+
+#define REGDEF(name, rnum, mask, sname) rpPredictMap[PREDICT_REG_##name] = RBM_##name;
+#include "register.h"
+
+#if defined(_TARGET_ARM_)
+
+    rpPredictMap[PREDICT_PAIR_R0R1] = RBM_R0 | RBM_R1;
+    rpPredictMap[PREDICT_PAIR_R2R3] = RBM_R2 | RBM_R3;
+    rpPredictMap[PREDICT_REG_SP]    = RBM_ILLEGAL;
+
+#elif defined(_TARGET_AMD64_)
+
+    rpPredictMap[PREDICT_NOT_REG_EAX] = RBM_ALLINT & ~RBM_EAX;
+    rpPredictMap[PREDICT_NOT_REG_ECX] = RBM_ALLINT & ~RBM_ECX;
+    rpPredictMap[PREDICT_REG_ESP]     = RBM_ILLEGAL;
+
+#elif defined(_TARGET_X86_)
+
+    rpPredictMap[PREDICT_NOT_REG_EAX] = RBM_ALLINT & ~RBM_EAX;
+    rpPredictMap[PREDICT_NOT_REG_ECX] = RBM_ALLINT & ~RBM_ECX;
+    rpPredictMap[PREDICT_REG_ESP]     = RBM_ILLEGAL;
+    rpPredictMap[PREDICT_PAIR_EAXEDX] = RBM_EAX | RBM_EDX;
+    rpPredictMap[PREDICT_PAIR_ECXEBX] = RBM_ECX | RBM_EBX;
+
+#endif
+
+    rpBestRecordedPrediction = NULL;
+}
+
+/*****************************************************************************
+ *
+ *  The following table(s) determines the order in which registers are considered
+ *  for variables to live in
+ */
+
+const regNumber* Compiler::raGetRegVarOrder(var_types regType, unsigned* wbVarOrderSize)
+{
+#if FEATURE_FP_REGALLOC
+    if (varTypeIsFloating(regType))
+    {
+        static const regNumber raRegVarOrderFlt[]   = {REG_VAR_ORDER_FLT};
+        const unsigned         raRegVarOrderFltSize = sizeof(raRegVarOrderFlt) / sizeof(raRegVarOrderFlt[0]);
+
+        if (wbVarOrderSize != NULL)
+            *wbVarOrderSize = raRegVarOrderFltSize;
+
+        return &raRegVarOrderFlt[0];
+    }
+    else
+#endif
+    {
+        static const regNumber raRegVarOrder[]   = {REG_VAR_ORDER};
+        const unsigned         raRegVarOrderSize = sizeof(raRegVarOrder) / sizeof(raRegVarOrder[0]);
+
+        if (wbVarOrderSize != NULL)
+            *wbVarOrderSize = raRegVarOrderSize;
+
+        return &raRegVarOrder[0];
+    }
+}
+
+#ifdef DEBUG
+
+/*****************************************************************************
+ *
+ *  Dump out the variable interference graph
+ *
+ */
+
+void Compiler::raDumpVarIntf()
+{
+    unsigned   lclNum;
+    LclVarDsc* varDsc;
+
+    printf("Var. interference graph for %s\n", info.compFullName);
+
+    for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+    {
+        /* Ignore the variable if it's not tracked */
+
+        if (!varDsc->lvTracked)
+            continue;
+
+        /* Get hold of the index and the interference mask for the variable */
+        unsigned varIndex = varDsc->lvVarIndex;
+
+        printf("  V%02u,T%02u and ", lclNum, varIndex);
+
+        unsigned refIndex;
+
+        for (refIndex = 0; refIndex < lvaTrackedCount; refIndex++)
+        {
+            if (VarSetOps::IsMember(this, lvaVarIntf[varIndex], refIndex))
+                printf("T%02u ", refIndex);
+            else
+                printf("    ");
+        }
+
+        printf("\n");
+    }
+
+    printf("\n");
+}
+
+/*****************************************************************************
+ *
+ *  Dump out the register interference graph
+ *
+ */
+void Compiler::raDumpRegIntf()
+{
+    printf("Reg. interference graph for %s\n", info.compFullName);
+
+    unsigned   lclNum;
+    LclVarDsc* varDsc;
+
+    for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+    {
+        unsigned varNum;
+
+        /* Ignore the variable if it's not tracked */
+
+        if (!varDsc->lvTracked)
+            continue;
+
+        /* Get hold of the index and the interference mask for the variable */
+
+        varNum = varDsc->lvVarIndex;
+
+        printf("  V%02u,T%02u and ", lclNum, varNum);
+
+        if (varDsc->IsFloatRegType())
+        {
+#if !FEATURE_STACK_FP_X87
+            for (regNumber regNum = REG_FP_FIRST; regNum <= REG_FP_LAST; regNum = REG_NEXT(regNum))
+            {
+                if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varNum))
+                    printf("%3s ", getRegName(regNum, true));
+                else
+                    printf("    ");
+            }
+#endif
+        }
+        else
+        {
+            for (regNumber regNum = REG_INT_FIRST; regNum <= REG_INT_LAST; regNum = REG_NEXT(regNum))
+            {
+                if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varNum))
+                    printf("%3s ", getRegName(regNum));
+                else
+                    printf("    ");
+            }
+        }
+
+        printf("\n");
+    }
+
+    printf("\n");
+}
+#endif // DEBUG
+
+/*****************************************************************************
+ *
+ * We'll adjust the ref counts based on interference
+ *
+ */
+
+void Compiler::raAdjustVarIntf()
+{
+    // This method was not correct and has been disabled.
+    return;
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+/* Determine register mask for a call/return from type.
+ */
+
+inline regMaskTP Compiler::genReturnRegForTree(GenTreePtr tree)
+{
+    var_types type = tree->TypeGet();
+
+    if (type == TYP_STRUCT && IsHfa(tree))
+    {
+        int retSlots = GetHfaCount(tree);
+        return ((1 << retSlots) - 1) << REG_FLOATRET;
+    }
+
+    const static regMaskTP returnMap[TYP_COUNT] = {
+        RBM_ILLEGAL,   // TYP_UNDEF,
+        RBM_NONE,      // TYP_VOID,
+        RBM_INTRET,    // TYP_BOOL,
+        RBM_INTRET,    // TYP_CHAR,
+        RBM_INTRET,    // TYP_BYTE,
+        RBM_INTRET,    // TYP_UBYTE,
+        RBM_INTRET,    // TYP_SHORT,
+        RBM_INTRET,    // TYP_USHORT,
+        RBM_INTRET,    // TYP_INT,
+        RBM_INTRET,    // TYP_UINT,
+        RBM_LNGRET,    // TYP_LONG,
+        RBM_LNGRET,    // TYP_ULONG,
+        RBM_FLOATRET,  // TYP_FLOAT,
+        RBM_DOUBLERET, // TYP_DOUBLE,
+        RBM_INTRET,    // TYP_REF,
+        RBM_INTRET,    // TYP_BYREF,
+        RBM_INTRET,    // TYP_ARRAY,
+        RBM_ILLEGAL,   // TYP_STRUCT,
+        RBM_ILLEGAL,   // TYP_BLK,
+        RBM_ILLEGAL,   // TYP_LCLBLK,
+        RBM_ILLEGAL,   // TYP_PTR,
+        RBM_ILLEGAL,   // TYP_FNC,
+        RBM_ILLEGAL,   // TYP_UNKNOWN,
+    };
+
+    assert((unsigned)type < sizeof(returnMap) / sizeof(returnMap[0]));
+    assert(returnMap[TYP_LONG] == RBM_LNGRET);
+    assert(returnMap[TYP_DOUBLE] == RBM_DOUBLERET);
+    assert(returnMap[TYP_REF] == RBM_INTRET);
+    assert(returnMap[TYP_STRUCT] == RBM_ILLEGAL);
+
+    regMaskTP result = returnMap[type];
+    assert(result != RBM_ILLEGAL);
+    return result;
+}
+
+/*****************************************************************************/
+
+/****************************************************************************/
+
+#ifdef DEBUG
+
+static void dispLifeSet(Compiler* comp, VARSET_VALARG_TP mask, VARSET_VALARG_TP life)
+{
+    unsigned   lclNum;
+    LclVarDsc* varDsc;
+
+    for (lclNum = 0, varDsc = comp->lvaTable; lclNum < comp->lvaCount; lclNum++, varDsc++)
+    {
+        if (!varDsc->lvTracked)
+            continue;
+
+        if (!VarSetOps::IsMember(comp, mask, varDsc->lvVarIndex))
+            continue;
+
+        if (VarSetOps::IsMember(comp, life, varDsc->lvVarIndex))
+            printf("V%02u ", lclNum);
+    }
+}
+
+#endif
+
+/*****************************************************************************/
+#ifdef DEBUG
+/*****************************************************************************
+ *
+ *  Debugging helpers - display variables liveness info.
+ */
+
+void dispFPvarsInBBlist(BasicBlock* beg, BasicBlock* end, VARSET_TP mask, Compiler* comp)
+{
+    do
+    {
+        printf("BB%02u: ", beg->bbNum);
+
+        printf(" in  = [ ");
+        dispLifeSet(comp, mask, beg->bbLiveIn);
+        printf("] ,");
+
+        printf(" out = [ ");
+        dispLifeSet(comp, mask, beg->bbLiveOut);
+        printf("]");
+
+        if (beg->bbFlags & BBF_VISITED)
+            printf(" inner=%u", beg->bbFPinVars);
+
+        printf("\n");
+
+        beg = beg->bbNext;
+        if (!beg)
+            return;
+    } while (beg != end);
+}
+
+#if FEATURE_STACK_FP_X87
+void Compiler::raDispFPlifeInfo()
+{
+    BasicBlock* block;
+
+    for (block = fgFirstBB; block; block = block->bbNext)
+    {
+        GenTreePtr stmt;
+
+        printf("BB%02u: in  = [ ", block->bbNum);
+        dispLifeSet(this, optAllFloatVars, block->bbLiveIn);
+        printf("]\n\n");
+
+        VARSET_TP VARSET_INIT(this, life, block->bbLiveIn);
+        for (stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+        {
+            GenTreePtr tree;
+
+            noway_assert(stmt->gtOper == GT_STMT);
+
+            for (tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+            {
+                VarSetOps::AssignNoCopy(this, life, fgUpdateLiveSet(life, tree));
+
+                dispLifeSet(this, optAllFloatVars, life);
+                printf("   ");
+                gtDispTree(tree, 0, NULL, true);
+            }
+
+            printf("\n");
+        }
+
+        printf("BB%02u: out = [ ", block->bbNum);
+        dispLifeSet(this, optAllFloatVars, block->bbLiveOut);
+        printf("]\n\n");
+    }
+}
+#endif // FEATURE_STACK_FP_X87
+/*****************************************************************************/
+#endif // DEBUG
+/*****************************************************************************/
+
+/*****************************************************************************/
+
+void Compiler::raSetRegVarOrder(
+    var_types regType, regNumber* customVarOrder, unsigned* customVarOrderSize, regMaskTP prefReg, regMaskTP avoidReg)
+{
+    unsigned         normalVarOrderSize;
+    const regNumber* normalVarOrder = raGetRegVarOrder(regType, &normalVarOrderSize);
+    unsigned         index;
+    unsigned         listIndex = 0;
+    regMaskTP        usedReg   = avoidReg;
+
+    noway_assert(*customVarOrderSize >= normalVarOrderSize);
+
+    if (prefReg)
+    {
+        /* First place the preferred registers at the start of customVarOrder */
+
+        regMaskTP regBit;
+        regNumber regNum;
+
+        for (index = 0; index < normalVarOrderSize; index++)
+        {
+            regNum = normalVarOrder[index];
+            regBit = genRegMask(regNum);
+
+            if (usedReg & regBit)
+                continue;
+
+            if (prefReg & regBit)
+            {
+                usedReg |= regBit;
+                noway_assert(listIndex < normalVarOrderSize);
+                customVarOrder[listIndex++] = regNum;
+                prefReg -= regBit;
+                if (prefReg == 0)
+                    break;
+            }
+        }
+
+#if CPU_HAS_BYTE_REGS
+        /* Then if byteable registers are preferred place them */
+
+        if (prefReg & RBM_BYTE_REG_FLAG)
+        {
+            for (index = 0; index < normalVarOrderSize; index++)
+            {
+                regNum = normalVarOrder[index];
+                regBit = genRegMask(regNum);
+
+                if (usedReg & regBit)
+                    continue;
+
+                if (RBM_BYTE_REGS & regBit)
+                {
+                    usedReg |= regBit;
+                    noway_assert(listIndex < normalVarOrderSize);
+                    customVarOrder[listIndex++] = regNum;
+                }
+            }
+        }
+
+#endif // CPU_HAS_BYTE_REGS
+    }
+
+    /* Now place all the non-preferred registers */
+
+    for (index = 0; index < normalVarOrderSize; index++)
+    {
+        regNumber regNum = normalVarOrder[index];
+        regMaskTP regBit = genRegMask(regNum);
+
+        if (usedReg & regBit)
+            continue;
+
+        usedReg |= regBit;
+        noway_assert(listIndex < normalVarOrderSize);
+        customVarOrder[listIndex++] = regNum;
+    }
+
+    if (avoidReg)
+    {
+        /* Now place the "avoid" registers */
+
+        for (index = 0; index < normalVarOrderSize; index++)
+        {
+            regNumber regNum = normalVarOrder[index];
+            regMaskTP regBit = genRegMask(regNum);
+
+            if (avoidReg & regBit)
+            {
+                noway_assert(listIndex < normalVarOrderSize);
+                customVarOrder[listIndex++] = regNum;
+                avoidReg -= regBit;
+                if (avoidReg == 0)
+                    break;
+            }
+        }
+    }
+
+    *customVarOrderSize = listIndex;
+    noway_assert(listIndex == normalVarOrderSize);
+}
+
+/*****************************************************************************
+ *
+ *  Setup the raAvoidArgRegMask and rsCalleeRegArgMaskLiveIn
+ */
+
+void Compiler::raSetupArgMasks(RegState* regState)
+{
+    /* Determine the registers holding incoming register arguments */
+    /*  and setup raAvoidArgRegMask to the set of registers that we  */
+    /*  may want to avoid when enregistering the locals.            */
+
+    regState->rsCalleeRegArgMaskLiveIn = RBM_NONE;
+    raAvoidArgRegMask                  = RBM_NONE;
+
+    LclVarDsc* argsEnd = lvaTable + info.compArgsCount;
+
+    for (LclVarDsc* argDsc = lvaTable; argDsc < argsEnd; argDsc++)
+    {
+        noway_assert(argDsc->lvIsParam);
+
+        // Is it a register argument ?
+        if (!argDsc->lvIsRegArg)
+            continue;
+
+        // only process args that apply to the current register file
+        if ((argDsc->IsFloatRegType() && !info.compIsVarArgs && !opts.compUseSoftFP) != regState->rsIsFloat)
+        {
+            continue;
+        }
+
+        // Is it dead on entry ??
+        // In certain cases such as when compJmpOpUsed is true,
+        // or when we have a generic type context arg that we must report
+        // then the arguments have to be kept alive throughout the prolog.
+        // So we have to consider it as live on entry.
+        //
+        bool keepArgAlive = compJmpOpUsed;
+        if ((unsigned(info.compTypeCtxtArg) != BAD_VAR_NUM) && lvaReportParamTypeArg() &&
+            ((lvaTable + info.compTypeCtxtArg) == argDsc))
+        {
+            keepArgAlive = true;
+        }
+
+        if (!keepArgAlive && argDsc->lvTracked && !VarSetOps::IsMember(this, fgFirstBB->bbLiveIn, argDsc->lvVarIndex))
+        {
+            continue;
+        }
+
+        // The code to set the regState for each arg is outlined for shared use
+        // by linear scan
+        regNumber inArgReg = raUpdateRegStateForArg(regState, argDsc);
+
+        // Do we need to try to avoid this incoming arg registers?
+
+        // If it's not tracked, don't do the stuff below.
+        if (!argDsc->lvTracked)
+            continue;
+
+        // If the incoming arg is used after a call it is live accross
+        //  a call and will have to be allocated to a caller saved
+        //  register anyway (a very common case).
+        //
+        // In this case it is pointless to ask that the higher ref count
+        //  locals to avoid using the incoming arg register
+
+        unsigned argVarIndex = argDsc->lvVarIndex;
+
+        /* Does the incoming register and the arg variable interfere? */
+
+        if (!VarSetOps::IsMember(this, raLclRegIntf[inArgReg], argVarIndex))
+        {
+            // No they do not interfere,
+            //  so we add inArgReg to raAvoidArgRegMask
+
+            raAvoidArgRegMask |= genRegMask(inArgReg);
+        }
+#ifdef _TARGET_ARM_
+        if (argDsc->lvType == TYP_DOUBLE)
+        {
+            // Avoid the double register argument pair for register allocation.
+            if (!VarSetOps::IsMember(this, raLclRegIntf[inArgReg + 1], argVarIndex))
+            {
+                raAvoidArgRegMask |= genRegMask(static_cast<regNumber>(inArgReg + 1));
+            }
+        }
+#endif
+    }
+}
+
+#endif // LEGACY_BACKEND
+
+// The code to set the regState for each arg is outlined for shared use
+// by linear scan. (It is not shared for System V AMD64 platform.)
+regNumber Compiler::raUpdateRegStateForArg(RegState* regState, LclVarDsc* argDsc)
+{
+    regNumber inArgReg  = argDsc->lvArgReg;
+    regMaskTP inArgMask = genRegMask(inArgReg);
+
+    if (regState->rsIsFloat)
+    {
+        noway_assert(inArgMask & RBM_FLTARG_REGS);
+    }
+    else //  regState is for the integer registers
+    {
+        // This might be the fixed return buffer register argument (on ARM64)
+        // We check and allow inArgReg to be theFixedRetBuffReg
+        if (hasFixedRetBuffReg() && (inArgReg == theFixedRetBuffReg()))
+        {
+            // We should have a TYP_BYREF or TYP_I_IMPL arg and not a TYP_STRUCT arg
+            noway_assert(argDsc->lvType == TYP_BYREF || argDsc->lvType == TYP_I_IMPL);
+            // We should have recorded the variable number for the return buffer arg
+            noway_assert(info.compRetBuffArg != BAD_VAR_NUM);
+        }
+        else // we have a regular arg
+        {
+            noway_assert(inArgMask & RBM_ARG_REGS);
+        }
+    }
+
+    regState->rsCalleeRegArgMaskLiveIn |= inArgMask;
+
+#ifdef _TARGET_ARM_
+    if (argDsc->lvType == TYP_DOUBLE)
+    {
+        if (info.compIsVarArgs || opts.compUseSoftFP)
+        {
+            assert((inArgReg == REG_R0) || (inArgReg == REG_R2));
+            assert(!regState->rsIsFloat);
+        }
+        else
+        {
+            assert(regState->rsIsFloat);
+            assert(emitter::isDoubleReg(inArgReg));
+        }
+        regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1));
+    }
+    else if (argDsc->lvType == TYP_LONG)
+    {
+        assert((inArgReg == REG_R0) || (inArgReg == REG_R2));
+        assert(!regState->rsIsFloat);
+        regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1));
+    }
+#endif // _TARGET_ARM_
+
+#if FEATURE_MULTIREG_ARGS
+    if (argDsc->lvType == TYP_STRUCT)
+    {
+        if (argDsc->lvIsHfaRegArg())
+        {
+            assert(regState->rsIsFloat);
+            unsigned cSlots = GetHfaCount(argDsc->lvVerTypeInfo.GetClassHandleForValueClass());
+            for (unsigned i = 1; i < cSlots; i++)
+            {
+                assert(inArgReg + i <= LAST_FP_ARGREG);
+                regState->rsCalleeRegArgMaskLiveIn |= genRegMask(static_cast<regNumber>(inArgReg + i));
+            }
+        }
+        else
+        {
+            unsigned cSlots = argDsc->lvSize() / TARGET_POINTER_SIZE;
+            for (unsigned i = 1; i < cSlots; i++)
+            {
+                regNumber nextArgReg = (regNumber)(inArgReg + i);
+                if (nextArgReg > REG_ARG_LAST)
+                {
+                    break;
+                }
+                assert(regState->rsIsFloat == false);
+                regState->rsCalleeRegArgMaskLiveIn |= genRegMask(nextArgReg);
+            }
+        }
+    }
+#endif // FEATURE_MULTIREG_ARGS
+
+    return inArgReg;
+}
+
+#ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
+
+/*****************************************************************************
+ *
+ *  Assign variables to live in registers, etc.
+ */
+
+void Compiler::raAssignVars()
+{
+#ifdef DEBUG
+    if (verbose)
+        printf("*************** In raAssignVars()\n");
+#endif
+    /* We need to keep track of which registers we ever touch */
+
+    codeGen->regSet.rsClearRegsModified();
+
+#if FEATURE_STACK_FP_X87
+    // FP register allocation
+    raEnregisterVarsStackFP();
+    raGenerateFPRefCounts();
+#endif
+
+    /* Predict registers used by code generation */
+    rpPredictRegUse(); // New reg predictor/allocator
+
+    // Change all unused promoted non-argument struct locals to a non-GC type (in this case TYP_INT)
+    // so that the gc tracking logic and lvMustInit logic will ignore them.
+
+    unsigned   lclNum;
+    LclVarDsc* varDsc;
+
+    for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+    {
+        if (varDsc->lvType != TYP_STRUCT)
+            continue;
+
+        if (!varDsc->lvPromoted)
+            continue;
+
+        if (varDsc->lvIsParam)
+            continue;
+
+        if (varDsc->lvRefCnt > 0)
+            continue;
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("Mark unused struct local V%02u\n", lclNum);
+        }
+
+        lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
+
+        if (promotionType == PROMOTION_TYPE_DEPENDENT)
+        {
+            // This should only happen when all its field locals are unused as well.
+
+            for (unsigned varNum = varDsc->lvFieldLclStart; varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt;
+                 varNum++)
+            {
+                noway_assert(lvaTable[varNum].lvRefCnt == 0);
+            }
+        }
+        else
+        {
+            noway_assert(promotionType == PROMOTION_TYPE_INDEPENDENT);
+        }
+
+        varDsc->lvUnusedStruct = 1;
+#endif
+
+        // Change such struct locals to ints
+
+        varDsc->lvType = TYP_INT; // Bash to a non-gc type.
+        noway_assert(!varDsc->lvTracked);
+        noway_assert(!varDsc->lvRegister);
+        varDsc->lvOnFrame  = false; // Force it not to be onstack.
+        varDsc->lvMustInit = false; // Force not to init it.
+        varDsc->lvStkOffs  = 0;     // Set it to anything other than BAD_STK_OFFS to make genSetScopeInfo() happy
+    }
+}
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+/*****************************************************************************
+ *
+ *   Given a regNumber return the correct predictReg enum value
+ */
+
+inline static rpPredictReg rpGetPredictForReg(regNumber reg)
+{
+    return (rpPredictReg)(((int)reg) + ((int)PREDICT_REG_FIRST));
+}
+
+/*****************************************************************************
+ *
+ *   Given a varIndex return the correct predictReg enum value
+ */
+
+inline static rpPredictReg rpGetPredictForVarIndex(unsigned varIndex)
+{
+    return (rpPredictReg)(varIndex + ((int)PREDICT_REG_VAR_T00));
+}
+
+/*****************************************************************************
+ *
+ *   Given a rpPredictReg return the correct varNumber value
+ */
+
+inline static unsigned rpGetVarIndexForPredict(rpPredictReg predict)
+{
+    return (unsigned)predict - (unsigned)PREDICT_REG_VAR_T00;
+}
+
+/*****************************************************************************
+ *
+ *   Given a rpPredictReg return true if it specifies a Txx register
+ */
+
+inline static bool rpHasVarIndexForPredict(rpPredictReg predict)
+{
+    if ((predict >= PREDICT_REG_VAR_T00) && (predict <= PREDICT_REG_VAR_MAX))
+        return true;
+    else
+        return false;
+}
+
+/*****************************************************************************
+ *
+ *   Given a regmask return the correct predictReg enum value
+ */
+
+static rpPredictReg rpGetPredictForMask(regMaskTP regmask)
+{
+    rpPredictReg result = PREDICT_NONE;
+    if (regmask != 0) /* Check if regmask has zero bits set */
+    {
+        if (((regmask - 1) & regmask) == 0) /* Check if regmask has one bit set */
+        {
+            DWORD reg = 0;
+            assert(FitsIn<DWORD>(regmask));
+            BitScanForward(&reg, (DWORD)regmask);
+            return rpGetPredictForReg((regNumber)reg);
+        }
+
+#if defined(_TARGET_ARM_)
+        /* It has multiple bits set */
+        else if (regmask == (RBM_R0 | RBM_R1))
+        {
+            result = PREDICT_PAIR_R0R1;
+        }
+        else if (regmask == (RBM_R2 | RBM_R3))
+        {
+            result = PREDICT_PAIR_R2R3;
+        }
+#elif defined(_TARGET_X86_)
+        /* It has multiple bits set */
+        else if (regmask == (RBM_EAX | RBM_EDX))
+        {
+            result = PREDICT_PAIR_EAXEDX;
+        }
+        else if (regmask == (RBM_ECX | RBM_EBX))
+        {
+            result = PREDICT_PAIR_ECXEBX;
+        }
+#endif
+        else /* It doesn't match anything */
+        {
+            result = PREDICT_NONE;
+            assert(!"unreachable");
+            NO_WAY("bad regpair");
+        }
+    }
+    return result;
+}
+
+/*****************************************************************************
+ *
+ *  Record a variable to register(s) interference
+ */
+
+bool Compiler::rpRecordRegIntf(regMaskTP regMask, VARSET_VALARG_TP life DEBUGARG(const char* msg))
+
+{
+    bool addedIntf = false;
+
+    if (regMask != 0)
+    {
+        for (regNumber regNum = REG_FIRST; regNum < REG_COUNT; regNum = REG_NEXT(regNum))
+        {
+            regMaskTP regBit = genRegMask(regNum);
+
+            if (regMask & regBit)
+            {
+                VARSET_TP VARSET_INIT_NOCOPY(newIntf, VarSetOps::Diff(this, life, raLclRegIntf[regNum]));
+                if (!VarSetOps::IsEmpty(this, newIntf))
+                {
+#ifdef DEBUG
+                    if (verbose)
+                    {
+                        VARSET_ITER_INIT(this, newIntfIter, newIntf, varNum);
+                        while (newIntfIter.NextElem(this, &varNum))
+                        {
+                            unsigned   lclNum = lvaTrackedToVarNum[varNum];
+                            LclVarDsc* varDsc = &lvaTable[varNum];
+#if FEATURE_FP_REGALLOC
+                            // Only print the useful interferences
+                            // i.e. floating point LclVar interference with floating point registers
+                            //         or integer LclVar interference with general purpose registers
+                            if (varTypeIsFloating(varDsc->TypeGet()) == genIsValidFloatReg(regNum))
+#endif
+                            {
+                                printf("Record interference between V%02u,T%02u and %s -- %s\n", lclNum, varNum,
+                                       getRegName(regNum), msg);
+                            }
+                        }
+                    }
+#endif
+                    addedIntf = true;
+                    VarSetOps::UnionD(this, raLclRegIntf[regNum], newIntf);
+                }
+
+                regMask -= regBit;
+                if (regMask == 0)
+                    break;
+            }
+        }
+    }
+    return addedIntf;
+}
+
+/*****************************************************************************
+ *
+ *  Record a new variable to variable(s) interference
+ */
+
+bool Compiler::rpRecordVarIntf(unsigned varNum, VARSET_VALARG_TP intfVar DEBUGARG(const char* msg))
+{
+    noway_assert((varNum >= 0) && (varNum < lvaTrackedCount));
+    noway_assert(!VarSetOps::IsEmpty(this, intfVar));
+
+    VARSET_TP VARSET_INIT_NOCOPY(oneVar, VarSetOps::MakeEmpty(this));
+    VarSetOps::AddElemD(this, oneVar, varNum);
+
+    bool newIntf = fgMarkIntf(intfVar, oneVar);
+
+    if (newIntf)
+        rpAddedVarIntf = true;
+
+#ifdef DEBUG
+    if (verbose && newIntf)
+    {
+        for (unsigned oneNum = 0; oneNum < lvaTrackedCount; oneNum++)
+        {
+            if (VarSetOps::IsMember(this, intfVar, oneNum))
+            {
+                unsigned lclNum = lvaTrackedToVarNum[varNum];
+                unsigned lclOne = lvaTrackedToVarNum[oneNum];
+                printf("Record interference between V%02u,T%02u and V%02u,T%02u -- %s\n", lclNum, varNum, lclOne,
+                       oneNum, msg);
+            }
+        }
+    }
+#endif
+
+    return newIntf;
+}
+
+/*****************************************************************************
+ *
+ *   Determine preferred register mask for a given predictReg value
+ */
+
+inline regMaskTP Compiler::rpPredictRegMask(rpPredictReg predictReg, var_types type)
+{
+    if (rpHasVarIndexForPredict(predictReg))
+        predictReg = PREDICT_REG;
+
+    noway_assert((unsigned)predictReg < sizeof(rpPredictMap) / sizeof(rpPredictMap[0]));
+    noway_assert(rpPredictMap[predictReg] != RBM_ILLEGAL);
+
+    regMaskTP regAvailForType = rpPredictMap[predictReg];
+    if (varTypeIsFloating(type))
+    {
+        regAvailForType &= RBM_ALLFLOAT;
+    }
+    else
+    {
+        regAvailForType &= RBM_ALLINT;
+    }
+#ifdef _TARGET_ARM_
+    if (type == TYP_DOUBLE)
+    {
+        if ((predictReg >= PREDICT_REG_F0) && (predictReg <= PREDICT_REG_F31))
+        {
+            // Fix 388433 ARM JitStress WP7
+            if ((regAvailForType & RBM_DBL_REGS) != 0)
+            {
+                regAvailForType |= (regAvailForType << 1);
+            }
+            else
+            {
+                regAvailForType = RBM_NONE;
+            }
+        }
+    }
+#endif
+    return regAvailForType;
+}
+
+/*****************************************************************************
+ *
+ *  Predict register choice for a type.
+ *
+ *  Adds the predicted registers to rsModifiedRegsMask.
+ */
+regMaskTP Compiler::rpPredictRegPick(var_types type, rpPredictReg predictReg, regMaskTP lockedRegs)
+{
+    regMaskTP preferReg = rpPredictRegMask(predictReg, type);
+    regNumber regNum;
+    regMaskTP regBits;
+
+    // Add any reserved register to the lockedRegs
+    lockedRegs |= codeGen->regSet.rsMaskResvd;
+
+    /* Clear out the lockedRegs from preferReg */
+    preferReg &= ~lockedRegs;
+
+    if (rpAsgVarNum != -1)
+    {
+        noway_assert((rpAsgVarNum >= 0) && (rpAsgVarNum < (int)lclMAX_TRACKED));
+
+        /* Don't pick the register used by rpAsgVarNum either */
+        LclVarDsc* tgtVar = lvaTable + lvaTrackedToVarNum[rpAsgVarNum];
+        noway_assert(tgtVar->lvRegNum != REG_STK);
+
+        preferReg &= ~genRegMask(tgtVar->lvRegNum);
+    }
+
+    switch (type)
+    {
+        case TYP_BOOL:
+        case TYP_BYTE:
+        case TYP_UBYTE:
+        case TYP_SHORT:
+        case TYP_CHAR:
+        case TYP_INT:
+        case TYP_UINT:
+        case TYP_REF:
+        case TYP_BYREF:
+#ifdef _TARGET_AMD64_
+        case TYP_LONG:
+#endif // _TARGET_AMD64_
+
+            // expand preferReg to all non-locked registers if no bits set
+            preferReg = codeGen->regSet.rsUseIfZero(preferReg & RBM_ALLINT, RBM_ALLINT & ~lockedRegs);
+
+            if (preferReg == 0) // no bits set?
+            {
+                // Add one predefined spill choice register if no bits set.
+                // (The jit will introduce one spill temp)
+                preferReg |= RBM_SPILL_CHOICE;
+                rpPredictSpillCnt++;
+
+#ifdef DEBUG
+                if (verbose)
+                    printf("Predict one spill temp\n");
+#endif
+            }
+
+            if (preferReg != 0)
+            {
+                /* Iterate the registers in the order specified by rpRegTmpOrder */
+
+                for (unsigned index = 0; index < REG_TMP_ORDER_COUNT; index++)
+                {
+                    regNum  = rpRegTmpOrder[index];
+                    regBits = genRegMask(regNum);
+
+                    if ((preferReg & regBits) == regBits)
+                    {
+                        goto RET;
+                    }
+                }
+            }
+            /* Otherwise we have allocated all registers, so do nothing */
+            break;
+
+#ifndef _TARGET_AMD64_
+        case TYP_LONG:
+
+            if ((preferReg == 0) ||                   // no bits set?
+                ((preferReg & (preferReg - 1)) == 0)) // or only one bit set?
+            {
+                // expand preferReg to all non-locked registers
+                preferReg = RBM_ALLINT & ~lockedRegs;
+            }
+
+            if (preferReg == 0) // no bits set?
+            {
+                // Add EAX:EDX to the registers
+                // (The jit will introduce two spill temps)
+                preferReg = RBM_PAIR_TMP;
+                rpPredictSpillCnt += 2;
+#ifdef DEBUG
+                if (verbose)
+                    printf("Predict two spill temps\n");
+#endif
+            }
+            else if ((preferReg & (preferReg - 1)) == 0) // only one bit set?
+            {
+                if ((preferReg & RBM_PAIR_TMP_LO) == 0)
+                {
+                    // Add EAX to the registers
+                    // (The jit will introduce one spill temp)
+                    preferReg |= RBM_PAIR_TMP_LO;
+                }
+                else
+                {
+                    // Add EDX to the registers
+                    // (The jit will introduce one spill temp)
+                    preferReg |= RBM_PAIR_TMP_HI;
+                }
+                rpPredictSpillCnt++;
+#ifdef DEBUG
+                if (verbose)
+                    printf("Predict one spill temp\n");
+#endif
+            }
+
+            regPairNo regPair;
+            regPair = codeGen->regSet.rsFindRegPairNo(preferReg);
+            if (regPair != REG_PAIR_NONE)
+            {
+                regBits = genRegPairMask(regPair);
+                goto RET;
+            }
+
+            /* Otherwise we have allocated all registers, so do nothing */
+            break;
+#endif // _TARGET_AMD64_
+
+#ifdef _TARGET_ARM_
+        case TYP_STRUCT:
+#endif
+
+        case TYP_FLOAT:
+        case TYP_DOUBLE:
+
+#if FEATURE_FP_REGALLOC
+            regMaskTP restrictMask;
+            restrictMask = (raConfigRestrictMaskFP() | RBM_FLT_CALLEE_TRASH);
+            assert((restrictMask & RBM_SPILL_CHOICE_FLT) == RBM_SPILL_CHOICE_FLT);
+
+            // expand preferReg to all available non-locked registers if no bits set
+            preferReg = codeGen->regSet.rsUseIfZero(preferReg & restrictMask, restrictMask & ~lockedRegs);
+            regMaskTP preferDouble;
+            preferDouble = preferReg & (preferReg >> 1);
+
+            if ((preferReg == 0) // no bits set?
+#ifdef _TARGET_ARM_
+                || ((type == TYP_DOUBLE) &&
+                    ((preferReg & (preferReg >> 1)) == 0)) // or two consecutive bits set for TYP_DOUBLE
+#endif
+                )
+            {
+                // Add one predefined spill choice register if no bits set.
+                // (The jit will introduce one spill temp)
+                preferReg |= RBM_SPILL_CHOICE_FLT;
+                rpPredictSpillCnt++;
+
+#ifdef DEBUG
+                if (verbose)
+                    printf("Predict one spill temp (float)\n");
+#endif
+            }
+
+            assert(preferReg != 0);
+
+            /* Iterate the registers in the order specified by raRegFltTmpOrder */
+
+            for (unsigned index = 0; index < REG_FLT_TMP_ORDER_COUNT; index++)
+            {
+                regNum  = raRegFltTmpOrder[index];
+                regBits = genRegMask(regNum);
+
+                if (varTypeIsFloating(type))
+                {
+#ifdef _TARGET_ARM_
+                    if (type == TYP_DOUBLE)
+                    {
+                        if ((regBits & RBM_DBL_REGS) == 0)
+                        {
+                            continue; // We must restrict the set to the double registers
+                        }
+                        else
+                        {
+                            // TYP_DOUBLE use two consecutive registers
+                            regBits |= genRegMask(REG_NEXT(regNum));
+                        }
+                    }
+#endif
+                    // See if COMPlus_JitRegisterFP is restricting this FP register
+                    //
+                    if ((restrictMask & regBits) != regBits)
+                        continue;
+                }
+
+                if ((preferReg & regBits) == regBits)
+                {
+                    goto RET;
+                }
+            }
+            /* Otherwise we have allocated all registers, so do nothing */
+            break;
+
+#else // !FEATURE_FP_REGALLOC
+
+            return RBM_NONE;
+
+#endif
+
+        default:
+            noway_assert(!"unexpected type in reg use prediction");
+    }
+
+    /* Abnormal return */
+    noway_assert(!"Ran out of registers in rpPredictRegPick");
+    return RBM_NONE;
+
+RET:
+    /*
+     *  If during the first prediction we need to allocate
+     *  one of the registers that we used for coloring locals
+     *  then flag this by setting rpPredictAssignAgain.
+     *  We will have to go back and repredict the registers
+     */
+    if ((rpPasses == 0) && ((rpPredictAssignMask & regBits) == regBits))
+        rpPredictAssignAgain = true;
+
+    // Add a register interference to each of the last use variables
+    if (!VarSetOps::IsEmpty(this, rpLastUseVars) || !VarSetOps::IsEmpty(this, rpUseInPlace))
+    {
+        VARSET_TP VARSET_INIT_NOCOPY(lastUse, VarSetOps::MakeEmpty(this));
+        VarSetOps::Assign(this, lastUse, rpLastUseVars);
+        VARSET_TP VARSET_INIT_NOCOPY(inPlaceUse, VarSetOps::MakeEmpty(this));
+        VarSetOps::Assign(this, inPlaceUse, rpUseInPlace);
+        // While we still have any lastUse or inPlaceUse bits
+        VARSET_TP VARSET_INIT_NOCOPY(useUnion, VarSetOps::Union(this, lastUse, inPlaceUse));
+
+        VARSET_TP VARSET_INIT_NOCOPY(varAsSet, VarSetOps::MakeEmpty(this));
+        VARSET_ITER_INIT(this, iter, useUnion, varNum);
+        while (iter.NextElem(this, &varNum))
+        {
+            // We'll need this for one of the calls...
+            VarSetOps::ClearD(this, varAsSet);
+            VarSetOps::AddElemD(this, varAsSet, varNum);
+
+            // If this varBit and lastUse?
+            if (VarSetOps::IsMember(this, lastUse, varNum))
+            {
+                // Record a register to variable interference
+                rpRecordRegIntf(regBits, varAsSet DEBUGARG("last use RegPick"));
+            }
+
+            // If this varBit and inPlaceUse?
+            if (VarSetOps::IsMember(this, inPlaceUse, varNum))
+            {
+                // Record a register to variable interference
+                rpRecordRegIntf(regBits, varAsSet DEBUGARG("used in place RegPick"));
+            }
+        }
+    }
+    codeGen->regSet.rsSetRegsModified(regBits);
+
+    return regBits;
+}
+
+/*****************************************************************************
+ *
+ *  Predict integer register use for generating an address mode for a tree,
+ *  by setting tree->gtUsedRegs to all registers used by this tree and its
+ *  children.
+ *    tree       - is the child of a GT_IND node
+ *    type       - the type of the GT_IND node (floating point/integer)
+ *    lockedRegs - are the registers which are currently held by
+ *                 a previously evaluated node.
+ *    rsvdRegs   - registers which should not be allocated because they will
+ *                 be needed to evaluate a node in the future
+ *               - Also if rsvdRegs has the RBM_LASTUSE bit set then
+ *                 the rpLastUseVars set should be saved and restored
+ *                 so that we don't add any new variables to rpLastUseVars
+ *    lenCSE     - is non-NULL only when we have a lenCSE expression
+ *
+ *  Return the scratch registers to be held by this tree. (one or two registers
+ *  to form an address expression)
+ */
+
+regMaskTP Compiler::rpPredictAddressMode(
+    GenTreePtr tree, var_types type, regMaskTP lockedRegs, regMaskTP rsvdRegs, GenTreePtr lenCSE)
+{
+    GenTreePtr op1;
+    GenTreePtr op2;
+    GenTreePtr opTemp;
+    genTreeOps oper = tree->OperGet();
+    regMaskTP  op1Mask;
+    regMaskTP  op2Mask;
+    regMaskTP  regMask;
+    ssize_t    sh;
+    ssize_t    cns = 0;
+    bool       rev;
+    bool       hasTwoAddConst     = false;
+    bool       restoreLastUseVars = false;
+    VARSET_TP  VARSET_INIT_NOCOPY(oldLastUseVars, VarSetOps::MakeEmpty(this));
+
+    /* do we need to save and restore the rpLastUseVars set ? */
+    if ((rsvdRegs & RBM_LASTUSE) && (lenCSE == NULL))
+    {
+        restoreLastUseVars = true;
+        VarSetOps::Assign(this, oldLastUseVars, rpLastUseVars);
+    }
+    rsvdRegs &= ~RBM_LASTUSE;
+
+    /* if not an add, then just force it to a register */
+
+    if (oper != GT_ADD)
+    {
+        if (oper == GT_ARR_ELEM)
+        {
+            regMask = rpPredictTreeRegUse(tree, PREDICT_NONE, lockedRegs, rsvdRegs);
+            goto DONE;
+        }
+        else
+        {
+            goto NO_ADDR_EXPR;
+        }
+    }
+
+    op1 = tree->gtOp.gtOp1;
+    op2 = tree->gtOp.gtOp2;
+    rev = ((tree->gtFlags & GTF_REVERSE_OPS) != 0);
+
+    /* look for (x + y) + icon address mode */
+
+    if (op2->OperGet() == GT_CNS_INT)
+    {
+        cns = op2->gtIntCon.gtIconVal;
+
+        /* if not an add, then just force op1 into a register */
+        if (op1->OperGet() != GT_ADD)
+            goto ONE_ADDR_EXPR;
+
+        hasTwoAddConst = true;
+
+        /* Record the 'rev' flag, reverse evaluation order */
+        rev = ((op1->gtFlags & GTF_REVERSE_OPS) != 0);
+
+        op2 = op1->gtOp.gtOp2;
+        op1 = op1->gtOp.gtOp1; // Overwrite op1 last!!
+    }
+
+    /* Check for CNS_INT or LSH of CNS_INT in op2 slot */
+
+    sh = 0;
+    if (op2->OperGet() == GT_LSH)
+    {
+        if (op2->gtOp.gtOp2->OperGet() == GT_CNS_INT)
+        {
+            sh     = op2->gtOp.gtOp2->gtIntCon.gtIconVal;
+            opTemp = op2->gtOp.gtOp1;
+        }
+        else
+        {
+            opTemp = NULL;
+        }
+    }
+    else
+    {
+        opTemp = op2;
+    }
+
+    if (opTemp != NULL)
+    {
+        if (opTemp->OperGet() == GT_NOP)
+        {
+            opTemp = opTemp->gtOp.gtOp1;
+        }
+
+        // Is this a const operand?
+        if (opTemp->OperGet() == GT_CNS_INT)
+        {
+            // Compute the new cns value that Codegen will end up using
+            cns += (opTemp->gtIntCon.gtIconVal << sh);
+
+            goto ONE_ADDR_EXPR;
+        }
+    }
+
+    /* Check for LSH in op1 slot */
+
+    if (op1->OperGet() != GT_LSH)
+        goto TWO_ADDR_EXPR;
+
+    opTemp = op1->gtOp.gtOp2;
+
+    if (opTemp->OperGet() != GT_CNS_INT)
+        goto TWO_ADDR_EXPR;
+
+    sh = opTemp->gtIntCon.gtIconVal;
+
+    /* Check for LSH of 0, special case */
+    if (sh == 0)
+        goto TWO_ADDR_EXPR;
+
+#if defined(_TARGET_XARCH_)
+
+    /* Check for LSH of 1 2 or 3 */
+    if (sh > 3)
+        goto TWO_ADDR_EXPR;
+
+#elif defined(_TARGET_ARM_)
+
+    /* Check for LSH of 1 to 30 */
+    if (sh > 30)
+        goto TWO_ADDR_EXPR;
+
+#else
+
+    goto TWO_ADDR_EXPR;
+
+#endif
+
+    /* Matched a leftShift by 'sh' subtree, move op1 down */
+    op1 = op1->gtOp.gtOp1;
+
+TWO_ADDR_EXPR:
+
+    /* Now we have to evaluate op1 and op2 into registers */
+
+    /* Evaluate op1 and op2 in the correct order */
+    if (rev)
+    {
+        op2Mask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
+        op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs | op2Mask, rsvdRegs);
+    }
+    else
+    {
+        op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
+        op2Mask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | op1Mask, rsvdRegs);
+    }
+
+    /*  If op1 and op2 must be spilled and reloaded then
+     *  op1 and op2 might be reloaded into the same register
+     *  This can only happen when all the registers are lockedRegs
+     */
+    if ((op1Mask == op2Mask) && (op1Mask != 0))
+    {
+        /* We'll need to grab a different register for op2 */
+        op2Mask = rpPredictRegPick(TYP_INT, PREDICT_REG, op1Mask);
+    }
+
+#ifdef _TARGET_ARM_
+    // On the ARM we need a scratch register to evaluate the shifted operand for trees that have this form
+    //      [op2 + op1<<sh + cns]
+    // when op1 is an enregistered variable, thus the op1Mask is RBM_NONE
+    //
+    if (hasTwoAddConst && (sh != 0) && (op1Mask == RBM_NONE))
+    {
+        op1Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
+    }
+
+    //
+    // On the ARM we will need at least one scratch register for trees that have this form:
+    //     [op1 + op2 + cns] or  [op1 + op2<<sh + cns]
+    // or for a float/double or long when we have both op1 and op2
+    // or when we have an 'cns' that is too large for the ld/st instruction
+    //
+    if (hasTwoAddConst || varTypeIsFloating(type) || (type == TYP_LONG) || !codeGen->validDispForLdSt(cns, type))
+    {
+        op2Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
+    }
+
+    //
+    // If we create a CSE that immediately dies then we may need to add an additional register interference
+    // so we don't color the CSE into R3
+    //
+    if (!rev && (op1Mask != RBM_NONE) && (op2->OperGet() == GT_COMMA))
+    {
+        opTemp = op2->gtOp.gtOp2;
+        if (opTemp->OperGet() == GT_LCL_VAR)
+        {
+            unsigned   varNum = opTemp->gtLclVar.gtLclNum;
+            LclVarDsc* varDsc = &lvaTable[varNum];
+
+            if (varDsc->lvTracked && !VarSetOps::IsMember(this, compCurLife, varDsc->lvVarIndex))
+            {
+                rpRecordRegIntf(RBM_TMP_0,
+                                VarSetOps::MakeSingleton(this, varDsc->lvVarIndex) DEBUGARG("dead CSE (gt_ind)"));
+            }
+        }
+    }
+#endif
+
+    regMask          = (op1Mask | op2Mask);
+    tree->gtUsedRegs = (regMaskSmall)regMask;
+    goto DONE;
+
+ONE_ADDR_EXPR:
+
+    /* now we have to evaluate op1 into a register */
+
+    op1Mask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
+    op2Mask = RBM_NONE;
+
+#ifdef _TARGET_ARM_
+    //
+    // On the ARM we will need another scratch register when we have an 'cns' that is too large for the ld/st
+    // instruction
+    //
+    if (!codeGen->validDispForLdSt(cns, type))
+    {
+        op2Mask |= rpPredictRegPick(TYP_INT, PREDICT_REG, (lockedRegs | op1Mask | op2Mask));
+    }
+#endif
+
+    regMask          = (op1Mask | op2Mask);
+    tree->gtUsedRegs = (regMaskSmall)regMask;
+    goto DONE;
+
+NO_ADDR_EXPR:
+
+#if !CPU_LOAD_STORE_ARCH
+    if (oper == GT_CNS_INT)
+    {
+        /* Indirect of a constant does not require a register */
+        regMask = RBM_NONE;
+    }
+    else
+#endif
+    {
+        /* now we have to evaluate tree into a register */
+        regMask = rpPredictTreeRegUse(tree, PREDICT_REG, lockedRegs, rsvdRegs);
+    }
+
+DONE:
+    regMaskTP regUse = tree->gtUsedRegs;
+
+    if (!VarSetOps::IsEmpty(this, compCurLife))
+    {
+        // Add interference between the current set of life variables and
+        //  the set of temporary registers need to evaluate the sub tree
+        if (regUse)
+        {
+            rpRecordRegIntf(regUse, compCurLife DEBUGARG("tmp use (gt_ind)"));
+        }
+    }
+
+    /* Do we need to resore the oldLastUseVars value */
+    if (restoreLastUseVars)
+    {
+        /*
+         *  If we used a GT_ASG targeted register then we need to add
+         *  a variable interference between any new last use variables
+         *  and the GT_ASG targeted register
+         */
+        if (!VarSetOps::Equal(this, rpLastUseVars, oldLastUseVars) && rpAsgVarNum != -1)
+        {
+            rpRecordVarIntf(rpAsgVarNum,
+                            VarSetOps::Diff(this, rpLastUseVars, oldLastUseVars) DEBUGARG("asgn conflict (gt_ind)"));
+        }
+        VarSetOps::Assign(this, rpLastUseVars, oldLastUseVars);
+    }
+
+    return regMask;
+}
+
+/*****************************************************************************
+ *
+ *
+ */
+
+void Compiler::rpPredictRefAssign(unsigned lclNum)
+{
+    LclVarDsc* varDsc = lvaTable + lclNum;
+
+    varDsc->lvRefAssign = 1;
+
+#if NOGC_WRITE_BARRIERS
+#ifdef DEBUG
+    if (verbose)
+    {
+        if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex))
+            printf("Record interference between V%02u,T%02u and REG WRITE BARRIER -- ref assign\n", lclNum,
+                   varDsc->lvVarIndex);
+    }
+#endif
+
+    /* Make sure that write barrier pointer variables never land in EDX */
+    VarSetOps::AddElemD(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex);
+#endif // NOGC_WRITE_BARRIERS
+}
+
+/*****************************************************************************
+ *
+ * Predict the internal temp physical register usage for a block assignment tree,
+ * by setting tree->gtUsedRegs.
+ * Records the internal temp physical register usage for this tree.
+ * Returns a mask of interfering registers for this tree.
+ *
+ * Each of the switch labels in this function updates regMask and assigns tree->gtUsedRegs
+ * to the set of scratch registers needed when evaluating the tree.
+ * Generally tree->gtUsedRegs and the return value retMask are the same, except when the
+ * parameter "lockedRegs" conflicts with the computed tree->gtUsedRegs, in which case we
+ * predict additional internal temp physical registers to spill into.
+ *
+ *    tree       - is the child of a GT_IND node
+ *    predictReg - what type of register does the tree need
+ *    lockedRegs - are the registers which are currently held by a previously evaluated node.
+ *                 Don't modify lockedRegs as it is used at the end to compute a spill mask.
+ *    rsvdRegs   - registers which should not be allocated because they will
+ *                 be needed to evaluate a node in the future
+ *               - Also, if rsvdRegs has the RBM_LASTUSE bit set then
+ *                 the rpLastUseVars set should be saved and restored
+ *                 so that we don't add any new variables to rpLastUseVars.
+ */
+regMaskTP Compiler::rpPredictBlkAsgRegUse(GenTreePtr   tree,
+                                          rpPredictReg predictReg,
+                                          regMaskTP    lockedRegs,
+                                          regMaskTP    rsvdRegs)
+{
+    regMaskTP regMask         = RBM_NONE;
+    regMaskTP interferingRegs = RBM_NONE;
+
+    bool        hasGCpointer  = false;
+    bool        dstIsOnStack  = false;
+    bool        useMemHelper  = false;
+    bool        useBarriers   = false;
+    GenTreeBlk* dst           = tree->gtGetOp1()->AsBlk();
+    GenTreePtr  dstAddr       = dst->Addr();
+    GenTreePtr  srcAddrOrFill = tree->gtGetOp2();
+
+    size_t blkSize = dst->gtBlkSize;
+
+    hasGCpointer = (dst->HasGCPtr());
+
+    bool isCopyBlk = tree->OperIsCopyBlkOp();
+    bool isCopyObj = isCopyBlk && hasGCpointer;
+    bool isInitBlk = tree->OperIsInitBlkOp();
+
+    if (isCopyBlk)
+    {
+        assert(srcAddrOrFill->OperIsIndir());
+        srcAddrOrFill = srcAddrOrFill->AsIndir()->Addr();
+    }
+    else
+    {
+        // For initBlk, we don't need to worry about the GC pointers.
+        hasGCpointer = false;
+    }
+
+    if (blkSize != 0)
+    {
+        if (isCopyObj)
+        {
+            dstIsOnStack = (dstAddr->gtOper == GT_ADDR && (dstAddr->gtFlags & GTF_ADDR_ONSTACK));
+        }
+
+        if (isInitBlk)
+        {
+            if (srcAddrOrFill->OperGet() != GT_CNS_INT)
+            {
+                useMemHelper = true;
+            }
+        }
+    }
+    else
+    {
+        useMemHelper = true;
+    }
+
+    if (hasGCpointer && !dstIsOnStack)
+    {
+        useBarriers = true;
+    }
+
+#ifdef _TARGET_ARM_
+    //
+    // On ARM For COPYBLK & INITBLK we have special treatment for constant lengths.
+    //
+    if (!useMemHelper && !useBarriers)
+    {
+        bool     useLoop        = false;
+        unsigned fullStoreCount = blkSize / TARGET_POINTER_SIZE;
+
+        // A mask to use to force the predictor to choose low registers (to reduce code size)
+        regMaskTP avoidReg = (RBM_R12 | RBM_LR);
+
+        // Allow the src and dst to be used in place, unless we use a loop, in which
+        // case we will need scratch registers as we will be writing to them.
+        rpPredictReg srcAndDstPredict = PREDICT_REG;
+
+        // Will we be using a loop to implement this INITBLK/COPYBLK?
+        if ((isCopyBlk && (fullStoreCount >= 8)) || (isInitBlk && (fullStoreCount >= 16)))
+        {
+            useLoop          = true;
+            avoidReg         = RBM_NONE;
+            srcAndDstPredict = PREDICT_SCRATCH_REG;
+        }
+
+        if (tree->gtFlags & GTF_REVERSE_OPS)
+        {
+            regMask |= rpPredictTreeRegUse(srcAddrOrFill, srcAndDstPredict, lockedRegs,
+                                           dstAddr->gtRsvdRegs | avoidReg | RBM_LASTUSE);
+            regMask |= rpPredictTreeRegUse(dstAddr, srcAndDstPredict, lockedRegs | regMask, avoidReg);
+        }
+        else
+        {
+            regMask |= rpPredictTreeRegUse(dstAddr, srcAndDstPredict, lockedRegs,
+                                           srcAddrOrFill->gtRsvdRegs | avoidReg | RBM_LASTUSE);
+            regMask |= rpPredictTreeRegUse(srcAddrOrFill, srcAndDstPredict, lockedRegs | regMask, avoidReg);
+        }
+
+        // We need at least one scratch register for a copyBlk
+        if (isCopyBlk)
+        {
+            // Pick a low register to reduce the code size
+            regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | avoidReg);
+        }
+
+        if (useLoop)
+        {
+            if (isCopyBlk)
+            {
+                // We need a second temp register for a copyBlk (our code gen is load two/store two)
+                // Pick another low register to reduce the code size
+                regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | avoidReg);
+            }
+
+            // We need a loop index register
+            regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
+        }
+
+        tree->gtUsedRegs = dstAddr->gtUsedRegs | srcAddrOrFill->gtUsedRegs | (regMaskSmall)regMask;
+
+        return interferingRegs;
+    }
+#endif
+    // What order should the Dest, Val/Src, and Size be calculated
+    GenTreePtr opsPtr[3];
+    regMaskTP  regsPtr[3];
+
+#if defined(_TARGET_XARCH_)
+    fgOrderBlockOps(tree, RBM_EDI, (isInitBlk) ? RBM_EAX : RBM_ESI, RBM_ECX, opsPtr, regsPtr);
+
+    // We're going to use these, might as well make them available now
+
+    codeGen->regSet.rsSetRegsModified(RBM_EDI | RBM_ECX);
+    if (isCopyBlk)
+        codeGen->regSet.rsSetRegsModified(RBM_ESI);
+
+#elif defined(_TARGET_ARM_)
+
+    if (useMemHelper)
+    {
+        // For all other cases that involve non-constants, we just call memcpy/memset
+        // JIT helpers
+        fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, RBM_ARG_2, opsPtr, regsPtr);
+        interferingRegs |= RBM_CALLEE_TRASH;
+#ifdef DEBUG
+        if (verbose)
+            printf("Adding interference with RBM_CALLEE_TRASH for memcpy/memset\n");
+#endif
+    }
+    else // useBarriers
+    {
+        assert(useBarriers);
+        assert(isCopyBlk);
+
+        fgOrderBlockOps(tree, RBM_ARG_0, RBM_ARG_1, REG_TMP_1, opsPtr, regsPtr);
+
+        // For this case Codegen will call the CORINFO_HELP_ASSIGN_BYREF helper
+        interferingRegs |= RBM_CALLEE_TRASH_NOGC;
+#ifdef DEBUG
+        if (verbose)
+            printf("Adding interference with RBM_CALLEE_TRASH_NOGC for Byref WriteBarrier\n");
+#endif
+    }
+#else // !_TARGET_X86_ && !_TARGET_ARM_
+#error "Non-ARM or x86 _TARGET_ in RegPredict for INITBLK/COPYBLK"
+#endif // !_TARGET_X86_ && !_TARGET_ARM_
+    regMaskTP opsPtr2RsvdRegs = opsPtr[2] == nullptr ? RBM_NONE : opsPtr[2]->gtRsvdRegs;
+    regMask |= rpPredictTreeRegUse(opsPtr[0], rpGetPredictForMask(regsPtr[0]), lockedRegs,
+                                   opsPtr[1]->gtRsvdRegs | opsPtr2RsvdRegs | RBM_LASTUSE);
+    regMask |= regsPtr[0];
+    opsPtr[0]->gtUsedRegs |= regsPtr[0];
+    rpRecordRegIntf(regsPtr[0], compCurLife DEBUGARG("movsd dest"));
+
+    regMask |= rpPredictTreeRegUse(opsPtr[1], rpGetPredictForMask(regsPtr[1]), lockedRegs | regMask,
+                                   opsPtr2RsvdRegs | RBM_LASTUSE);
+    regMask |= regsPtr[1];
+    opsPtr[1]->gtUsedRegs |= regsPtr[1];
+    rpRecordRegIntf(regsPtr[1], compCurLife DEBUGARG("movsd src"));
+
+    regMaskSmall opsPtr2UsedRegs = (regMaskSmall)regsPtr[2];
+    if (opsPtr[2] == nullptr)
+    {
+        // If we have no "size" node, we will predict that regsPtr[2] will be used for the size.
+        // Note that it is quite possible that no register is required, but this preserves
+        // former behavior.
+        regMask |= rpPredictRegPick(TYP_INT, rpGetPredictForMask(regsPtr[2]), lockedRegs | regMask);
+        rpRecordRegIntf(regsPtr[2], compCurLife DEBUGARG("tmp use"));
+    }
+    else
+    {
+        regMask |= rpPredictTreeRegUse(opsPtr[2], rpGetPredictForMask(regsPtr[2]), lockedRegs | regMask, RBM_NONE);
+        opsPtr[2]->gtUsedRegs |= opsPtr2UsedRegs;
+    }
+    regMask |= opsPtr2UsedRegs;
+
+    tree->gtUsedRegs = opsPtr[0]->gtUsedRegs | opsPtr[1]->gtUsedRegs | opsPtr2UsedRegs | (regMaskSmall)regMask;
+    return interferingRegs;
+}
+
+/*****************************************************************************
+ *
+ * Predict the internal temp physical register usage for a tree by setting tree->gtUsedRegs.
+ * Returns a regMask with the internal temp physical register usage for this tree.
+ *
+ * Each of the switch labels in this function updates regMask and assigns tree->gtUsedRegs
+ * to the set of scratch registers needed when evaluating the tree.
+ * Generally tree->gtUsedRegs and the return value retMask are the same, except when the
+ * parameter "lockedRegs" conflicts with the computed tree->gtUsedRegs, in which case we
+ * predict additional internal temp physical registers to spill into.
+ *
+ *    tree       - is the child of a GT_IND node
+ *    predictReg - what type of register does the tree need
+ *    lockedRegs - are the registers which are currently held by a previously evaluated node.
+ *                 Don't modify lockedRegs as it is used at the end to compute a spill mask.
+ *    rsvdRegs   - registers which should not be allocated because they will
+ *                 be needed to evaluate a node in the future
+ *               - Also, if rsvdRegs has the RBM_LASTUSE bit set then
+ *                 the rpLastUseVars set should be saved and restored
+ *                 so that we don't add any new variables to rpLastUseVars.
+ */
+
+#pragma warning(disable : 4701)
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+regMaskTP Compiler::rpPredictTreeRegUse(GenTreePtr   tree,
+                                        rpPredictReg predictReg,
+                                        regMaskTP    lockedRegs,
+                                        regMaskTP    rsvdRegs)
+{
+    regMaskTP    regMask = DUMMY_INIT(RBM_ILLEGAL);
+    regMaskTP    op2Mask;
+    regMaskTP    tmpMask;
+    rpPredictReg op1PredictReg;
+    rpPredictReg op2PredictReg;
+    LclVarDsc*   varDsc = NULL;
+    VARSET_TP    VARSET_INIT_NOCOPY(oldLastUseVars, VarSetOps::UninitVal());
+
+    VARSET_TP VARSET_INIT_NOCOPY(varBits, VarSetOps::UninitVal());
+    VARSET_TP VARSET_INIT_NOCOPY(lastUseVarBits, VarSetOps::MakeEmpty(this));
+
+    bool      restoreLastUseVars = false;
+    regMaskTP interferingRegs    = RBM_NONE;
+
+#ifdef DEBUG
+    // if (verbose) printf("rpPredictTreeRegUse() [%08x]\n", tree);
+    noway_assert(tree);
+    noway_assert(((RBM_ILLEGAL & RBM_ALLINT) == 0));
+    noway_assert(RBM_ILLEGAL);
+    noway_assert((lockedRegs & RBM_ILLEGAL) == 0);
+    /* impossible values, to make sure that we set them */
+    tree->gtUsedRegs = RBM_ILLEGAL;
+#endif
+
+    /* Figure out what kind of a node we have */
+
+    genTreeOps oper = tree->OperGet();
+    var_types  type = tree->TypeGet();
+    unsigned   kind = tree->OperKind();
+
+    // In the comma case, we care about whether this is "effectively" ADDR(IND(...))
+    genTreeOps effectiveOper = tree->gtEffectiveVal()->OperGet();
+    if ((predictReg == PREDICT_ADDR) && (effectiveOper != GT_IND))
+        predictReg = PREDICT_NONE;
+    else if (rpHasVarIndexForPredict(predictReg))
+    {
+        // The only place where predictReg is set to a var is in the PURE
+        // assignment case where varIndex is the var being assigned to.
+        // We need to check whether the variable is used between here and
+        // its redefinition.
+        unsigned varIndex = rpGetVarIndexForPredict(predictReg);
+        unsigned lclNum   = lvaTrackedToVarNum[varIndex];
+        bool     found    = false;
+        for (GenTreePtr nextTree = tree->gtNext; nextTree != NULL && !found; nextTree = nextTree->gtNext)
+        {
+            if (nextTree->gtOper == GT_LCL_VAR && nextTree->gtLclVarCommon.gtLclNum == lclNum)
+            {
+                // Is this the pure assignment?
+                if ((nextTree->gtFlags & GTF_VAR_DEF) == 0)
+                {
+                    predictReg = PREDICT_SCRATCH_REG;
+                }
+                found = true;
+                break;
+            }
+        }
+        assert(found);
+    }
+
+    if (rsvdRegs & RBM_LASTUSE)
+    {
+        restoreLastUseVars = true;
+        VarSetOps::Assign(this, oldLastUseVars, rpLastUseVars);
+        rsvdRegs &= ~RBM_LASTUSE;
+    }
+
+    /* Is this a constant or leaf node? */
+
+    if (kind & (GTK_CONST | GTK_LEAF))
+    {
+        bool      lastUse   = false;
+        regMaskTP enregMask = RBM_NONE;
+
+        switch (oper)
+        {
+#ifdef _TARGET_ARM_
+            case GT_CNS_DBL:
+                // Codegen for floating point constants on the ARM is currently
+                // movw/movt    rT1, <lo32 bits>
+                // movw/movt    rT2, <hi32 bits>
+                //  vmov.i2d    dT0, rT1,rT2
+                //
+                // For TYP_FLOAT one integer register is required
+                //
+                // These integer register(s) immediately die
+                tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
+                if (type == TYP_DOUBLE)
+                {
+                    // For TYP_DOUBLE a second integer register is required
+                    //
+                    tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
+                }
+
+                // We also need a floating point register that we keep
+                //
+                if (predictReg == PREDICT_NONE)
+                    predictReg = PREDICT_SCRATCH_REG;
+
+                regMask          = rpPredictRegPick(type, predictReg, lockedRegs | rsvdRegs);
+                tree->gtUsedRegs = regMask | tmpMask;
+                goto RETURN_CHECK;
+#endif
+
+            case GT_CNS_INT:
+            case GT_CNS_LNG:
+
+                if (rpHasVarIndexForPredict(predictReg))
+                {
+                    unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
+                    rpAsgVarNum       = tgtIndex;
+
+                    // We don't need any register as we plan on writing to the rpAsgVarNum register
+                    predictReg = PREDICT_NONE;
+
+                    LclVarDsc* tgtVar   = lvaTable + lvaTrackedToVarNum[tgtIndex];
+                    tgtVar->lvDependReg = true;
+
+                    if (type == TYP_LONG)
+                    {
+                        assert(oper == GT_CNS_LNG);
+
+                        if (tgtVar->lvOtherReg == REG_STK)
+                        {
+                            // Well we do need one register for a partially enregistered
+                            type       = TYP_INT;
+                            predictReg = PREDICT_SCRATCH_REG;
+                        }
+                    }
+                }
+                else
+                {
+#if !CPU_LOAD_STORE_ARCH
+                    /* If the constant is a handle then it will need to have a relocation
+                       applied to it.  It will need to be loaded into a register.
+                       But never throw away an existing hint.
+                       */
+                    if (opts.compReloc && tree->IsCnsIntOrI() && tree->IsIconHandle())
+#endif
+                    {
+                        if (predictReg == PREDICT_NONE)
+                            predictReg = PREDICT_SCRATCH_REG;
+                    }
+                }
+                break;
+
+            case GT_NO_OP:
+                break;
+
+            case GT_CLS_VAR:
+                if ((predictReg == PREDICT_NONE) && (genActualType(type) == TYP_INT) &&
+                    (genTypeSize(type) < sizeof(int)))
+                {
+                    predictReg = PREDICT_SCRATCH_REG;
+                }
+#ifdef _TARGET_ARM_
+                // Unaligned loads/stores for floating point values must first be loaded into integer register(s)
+                //
+                if ((tree->gtFlags & GTF_IND_UNALIGNED) && varTypeIsFloating(type))
+                {
+                    // These integer register(s) immediately die
+                    tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
+                    // Two integer registers are required for a TYP_DOUBLE
+                    if (type == TYP_DOUBLE)
+                        tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
+                }
+                // We need a temp register in some cases of loads/stores to a class var
+                if (predictReg == PREDICT_NONE)
+                {
+                    predictReg = PREDICT_SCRATCH_REG;
+                }
+#endif
+                if (rpHasVarIndexForPredict(predictReg))
+                {
+                    unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
+                    rpAsgVarNum       = tgtIndex;
+
+                    // We don't need any register as we plan on writing to the rpAsgVarNum register
+                    predictReg = PREDICT_NONE;
+
+                    LclVarDsc* tgtVar   = lvaTable + lvaTrackedToVarNum[tgtIndex];
+                    tgtVar->lvDependReg = true;
+
+                    if (type == TYP_LONG)
+                    {
+                        if (tgtVar->lvOtherReg == REG_STK)
+                        {
+                            // Well we do need one register for a partially enregistered
+                            type       = TYP_INT;
+                            predictReg = PREDICT_SCRATCH_REG;
+                        }
+                    }
+                }
+                break;
+
+            case GT_LCL_FLD:
+#ifdef _TARGET_ARM_
+                // Check for a misalignment on a Floating Point field
+                //
+                if (varTypeIsFloating(type))
+                {
+                    if ((tree->gtLclFld.gtLclOffs % emitTypeSize(tree->TypeGet())) != 0)
+                    {
+                        // These integer register(s) immediately die
+                        tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs);
+                        // Two integer registers are required for a TYP_DOUBLE
+                        if (type == TYP_DOUBLE)
+                            tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | rsvdRegs | tmpMask);
+                    }
+                }
+#endif
+                __fallthrough;
+
+            case GT_LCL_VAR:
+            case GT_REG_VAR:
+
+                varDsc = lvaTable + tree->gtLclVarCommon.gtLclNum;
+
+                VarSetOps::Assign(this, varBits, fgGetVarBits(tree));
+                compUpdateLifeVar</*ForCodeGen*/ false>(tree, &lastUseVarBits);
+                lastUse = !VarSetOps::IsEmpty(this, lastUseVarBits);
+
+#if FEATURE_STACK_FP_X87
+                // If it's a floating point var, there's nothing to do
+                if (varTypeIsFloating(type))
+                {
+                    tree->gtUsedRegs = RBM_NONE;
+                    regMask          = RBM_NONE;
+                    goto RETURN_CHECK;
+                }
+#endif
+
+                // If the variable is already a register variable, no need to go further.
+                if (oper == GT_REG_VAR)
+                    break;
+
+                /* Apply the type of predictReg to the LCL_VAR */
+
+                if (predictReg == PREDICT_REG)
+                {
+                PREDICT_REG_COMMON:
+                    if (varDsc->lvRegNum == REG_STK)
+                        break;
+
+                    goto GRAB_COUNT;
+                }
+                else if (predictReg == PREDICT_SCRATCH_REG)
+                {
+                    noway_assert(predictReg == PREDICT_SCRATCH_REG);
+
+                    /* Is this the last use of a local var?   */
+                    if (lastUse)
+                    {
+                        if (VarSetOps::IsEmptyIntersection(this, rpUseInPlace, lastUseVarBits))
+                            goto PREDICT_REG_COMMON;
+                    }
+                }
+                else if (rpHasVarIndexForPredict(predictReg))
+                {
+                    /* Get the tracked local variable that has an lvVarIndex of tgtIndex1 */
+                    {
+                        unsigned   tgtIndex1 = rpGetVarIndexForPredict(predictReg);
+                        LclVarDsc* tgtVar    = lvaTable + lvaTrackedToVarNum[tgtIndex1];
+                        VarSetOps::MakeSingleton(this, tgtIndex1);
+
+                        noway_assert(tgtVar->lvVarIndex == tgtIndex1);
+                        noway_assert(tgtVar->lvRegNum != REG_STK); /* Must have been enregistered */
+#ifndef _TARGET_AMD64_
+                        // On amd64 we have the occasional spec-allowed implicit conversion from TYP_I_IMPL to TYP_INT
+                        // so this assert is meaningless
+                        noway_assert((type != TYP_LONG) || (tgtVar->TypeGet() == TYP_LONG));
+#endif // !_TARGET_AMD64_
+
+                        if (varDsc->lvTracked)
+                        {
+                            unsigned srcIndex;
+                            srcIndex = varDsc->lvVarIndex;
+
+                            // If this register has it's last use here then we will prefer
+                            // to color to the same register as tgtVar.
+                            if (lastUse)
+                            {
+                                /*
+                                 *  Add an entry in the lvaVarPref graph to indicate
+                                 *  that it would be worthwhile to color these two variables
+                                 *  into the same physical register.
+                                 *  This will help us avoid having an extra copy instruction
+                                 */
+                                VarSetOps::AddElemD(this, lvaVarPref[srcIndex], tgtIndex1);
+                                VarSetOps::AddElemD(this, lvaVarPref[tgtIndex1], srcIndex);
+                            }
+
+                            // Add a variable interference from srcIndex to each of the last use variables
+                            if (!VarSetOps::IsEmpty(this, rpLastUseVars))
+                            {
+                                rpRecordVarIntf(srcIndex, rpLastUseVars DEBUGARG("src reg conflict"));
+                            }
+                        }
+                        rpAsgVarNum = tgtIndex1;
+
+                        /* We will rely on the target enregistered variable from the GT_ASG */
+                        varDsc = tgtVar;
+                    }
+                GRAB_COUNT:
+                    unsigned grabCount;
+                    grabCount = 0;
+
+                    if (genIsValidFloatReg(varDsc->lvRegNum))
+                    {
+                        enregMask = genRegMaskFloat(varDsc->lvRegNum, varDsc->TypeGet());
+                    }
+                    else
+                    {
+                        enregMask = genRegMask(varDsc->lvRegNum);
+                    }
+
+#ifdef _TARGET_ARM_
+                    if ((type == TYP_DOUBLE) && (varDsc->TypeGet() == TYP_FLOAT))
+                    {
+                        // We need to compute the intermediate value using a TYP_DOUBLE
+                        // but we storing the result in a TYP_SINGLE enregistered variable
+                        //
+                        grabCount++;
+                    }
+                    else
+#endif
+                    {
+                        /* We can't trust a prediction of rsvdRegs or lockedRegs sets */
+                        if (enregMask & (rsvdRegs | lockedRegs))
+                        {
+                            grabCount++;
+                        }
+#ifndef _TARGET_64BIT_
+                        if (type == TYP_LONG)
+                        {
+                            if (varDsc->lvOtherReg != REG_STK)
+                            {
+                                tmpMask = genRegMask(varDsc->lvOtherReg);
+                                enregMask |= tmpMask;
+
+                                /* We can't trust a prediction of rsvdRegs or lockedRegs sets */
+                                if (tmpMask & (rsvdRegs | lockedRegs))
+                                    grabCount++;
+                            }
+                            else // lvOtherReg == REG_STK
+                            {
+                                grabCount++;
+                            }
+                        }
+#endif // _TARGET_64BIT_
+                    }
+
+                    varDsc->lvDependReg = true;
+
+                    if (grabCount == 0)
+                    {
+                        /* Does not need a register */
+                        predictReg = PREDICT_NONE;
+                        // noway_assert(!VarSetOps::IsEmpty(this, varBits));
+                        VarSetOps::UnionD(this, rpUseInPlace, varBits);
+                    }
+                    else // (grabCount > 0)
+                    {
+#ifndef _TARGET_64BIT_
+                        /* For TYP_LONG and we only need one register then change the type to TYP_INT */
+                        if ((type == TYP_LONG) && (grabCount == 1))
+                        {
+                            /* We will need to pick one register */
+                            type = TYP_INT;
+                            // noway_assert(!VarSetOps::IsEmpty(this, varBits));
+                            VarSetOps::UnionD(this, rpUseInPlace, varBits);
+                        }
+                        noway_assert((type == TYP_DOUBLE) ||
+                                     (grabCount == (genTypeSize(genActualType(type)) / REGSIZE_BYTES)));
+#else  // !_TARGET_64BIT_
+                        noway_assert(grabCount == 1);
+#endif // !_TARGET_64BIT_
+                    }
+                }
+                else if (type == TYP_STRUCT)
+                {
+#ifdef _TARGET_ARM_
+                    // TODO-ARM-Bug?: Passing structs in registers on ARM hits an assert here when
+                    //        predictReg is PREDICT_REG_R0 to PREDICT_REG_R3
+                    //        As a workaround we just bash it to PREDICT_NONE here
+                    //
+                    if (predictReg != PREDICT_NONE)
+                        predictReg = PREDICT_NONE;
+#endif
+                    // Currently predictReg is saying that we will not need any scratch registers
+                    noway_assert(predictReg == PREDICT_NONE);
+
+                    /* We may need to sign or zero extend a small type when pushing a struct */
+                    if (varDsc->lvPromoted && !varDsc->lvAddrExposed)
+                    {
+                        for (unsigned varNum = varDsc->lvFieldLclStart;
+                             varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; varNum++)
+                        {
+                            LclVarDsc* fldVar = lvaTable + varNum;
+
+                            if (fldVar->lvStackAligned())
+                            {
+                                // When we are stack aligned Codegen will just use
+                                // a push instruction and thus doesn't need any register
+                                // since we can push both a register or a stack frame location
+                                continue;
+                            }
+
+                            if (varTypeIsByte(fldVar->TypeGet()))
+                            {
+                                // We will need to reserve one byteable register,
+                                //
+                                type       = TYP_BYTE;
+                                predictReg = PREDICT_SCRATCH_REG;
+#if CPU_HAS_BYTE_REGS
+                                // It is best to enregister this fldVar in a byteable register
+                                //
+                                fldVar->addPrefReg(RBM_BYTE_REG_FLAG, this);
+#endif
+                            }
+                            else if (varTypeIsShort(fldVar->TypeGet()))
+                            {
+                                bool isEnregistered = fldVar->lvTracked && (fldVar->lvRegNum != REG_STK);
+                                // If fldVar is not enregistered then we will need a scratch register
+                                //
+                                if (!isEnregistered)
+                                {
+                                    // We will need either an int register or a byte register
+                                    // If we are not requesting a byte register we will request an int register
+                                    //
+                                    if (type != TYP_BYTE)
+                                        type   = TYP_INT;
+                                    predictReg = PREDICT_SCRATCH_REG;
+                                }
+                            }
+                        }
+                    }
+                }
+                else
+                {
+                    regMaskTP preferReg = rpPredictRegMask(predictReg, type);
+                    if (preferReg != 0)
+                    {
+                        if ((genTypeStSz(type) == 1) || (genCountBits(preferReg) <= genTypeStSz(type)))
+                        {
+                            varDsc->addPrefReg(preferReg, this);
+                        }
+                    }
+                }
+                break; /* end of case GT_LCL_VAR */
+
+            case GT_JMP:
+                tree->gtUsedRegs = RBM_NONE;
+                regMask          = RBM_NONE;
+
+#if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
+                // Mark the registers required to emit a tailcall profiler callback
+                if (compIsProfilerHookNeeded())
+                {
+                    tree->gtUsedRegs |= RBM_PROFILER_JMP_USED;
+                }
+#endif
+                goto RETURN_CHECK;
+
+            default:
+                break;
+        } /* end of switch (oper) */
+
+        /* If we don't need to evaluate to register, regmask is the empty set */
+        /* Otherwise we grab a temp for the local variable                    */
+
+        if (predictReg == PREDICT_NONE)
+            regMask = RBM_NONE;
+        else
+        {
+            regMask = rpPredictRegPick(type, predictReg, lockedRegs | rsvdRegs | enregMask);
+
+            if ((oper == GT_LCL_VAR) && (tree->TypeGet() == TYP_STRUCT))
+            {
+                /* We need to sign or zero extend a small type when pushing a struct */
+                noway_assert((type == TYP_INT) || (type == TYP_BYTE));
+
+                varDsc = lvaTable + tree->gtLclVarCommon.gtLclNum;
+                noway_assert(varDsc->lvPromoted && !varDsc->lvAddrExposed);
+
+                for (unsigned varNum = varDsc->lvFieldLclStart; varNum < varDsc->lvFieldLclStart + varDsc->lvFieldCnt;
+                     varNum++)
+                {
+                    LclVarDsc* fldVar = lvaTable + varNum;
+                    if (fldVar->lvTracked)
+                    {
+                        VARSET_TP VARSET_INIT_NOCOPY(fldBit, VarSetOps::MakeSingleton(this, fldVar->lvVarIndex));
+                        rpRecordRegIntf(regMask, fldBit DEBUGARG(
+                                                     "need scratch register when pushing a small field of a struct"));
+                    }
+                }
+            }
+        }
+
+        /* Update the set of lastUse variables that we encountered so far */
+        if (lastUse)
+        {
+            VarSetOps::UnionD(this, rpLastUseVars, lastUseVarBits);
+            VARSET_TP VARSET_INIT(this, varAsSet, lastUseVarBits);
+
+            /*
+             *  Add interference from any previously locked temps into this last use variable.
+             */
+            if (lockedRegs)
+            {
+                rpRecordRegIntf(lockedRegs, varAsSet DEBUGARG("last use Predict lockedRegs"));
+            }
+            /*
+             *  Add interference from any reserved temps into this last use variable.
+             */
+            if (rsvdRegs)
+            {
+                rpRecordRegIntf(rsvdRegs, varAsSet DEBUGARG("last use Predict rsvdRegs"));
+            }
+            /*
+             *  For partially enregistered longs add an interference with the
+             *  register return by rpPredictRegPick
+             */
+            if ((type == TYP_INT) && (tree->TypeGet() == TYP_LONG))
+            {
+                rpRecordRegIntf(regMask, varAsSet DEBUGARG("last use with partial enreg"));
+            }
+        }
+
+        tree->gtUsedRegs = (regMaskSmall)regMask;
+        goto RETURN_CHECK;
+    }
+
+    /* Is it a 'simple' unary/binary operator? */
+
+    if (kind & GTK_SMPOP)
+    {
+        GenTreePtr op1 = tree->gtOp.gtOp1;
+        GenTreePtr op2 = tree->gtGetOp2();
+
+        GenTreePtr opsPtr[3];
+        regMaskTP  regsPtr[3];
+
+        VARSET_TP VARSET_INIT_NOCOPY(startAsgUseInPlaceVars, VarSetOps::UninitVal());
+
+        switch (oper)
+        {
+            case GT_ASG:
+
+                if (tree->OperIsBlkOp())
+                {
+                    interferingRegs |= rpPredictBlkAsgRegUse(tree, predictReg, lockedRegs, rsvdRegs);
+                    regMask = 0;
+                    goto RETURN_CHECK;
+                }
+                /* Is the value being assigned into a LCL_VAR? */
+                if (op1->gtOper == GT_LCL_VAR)
+                {
+                    varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
+
+                    /* Are we assigning a LCL_VAR the result of a call? */
+                    if (op2->gtOper == GT_CALL)
+                    {
+                        /* Set a preferred register for the LCL_VAR */
+                        if (isRegPairType(varDsc->TypeGet()))
+                            varDsc->addPrefReg(RBM_LNGRET, this);
+                        else if (!varTypeIsFloating(varDsc->TypeGet()))
+                            varDsc->addPrefReg(RBM_INTRET, this);
+#ifdef _TARGET_AMD64_
+                        else
+                            varDsc->addPrefReg(RBM_FLOATRET, this);
+#endif
+                        /*
+                         *  When assigning the result of a call we don't
+                         *  bother trying to target the right side of the
+                         *  assignment, since we have a fixed calling convention.
+                         */
+                    }
+                    else if (varDsc->lvTracked)
+                    {
+                        // We interfere with uses in place
+                        if (!VarSetOps::IsEmpty(this, rpUseInPlace))
+                        {
+                            rpRecordVarIntf(varDsc->lvVarIndex, rpUseInPlace DEBUGARG("Assign UseInPlace conflict"));
+                        }
+
+                        // Did we predict that this local will be fully enregistered?
+                        // and the assignment type is the same as the expression type?
+                        // and it is dead on the right side of the assignment?
+                        // and we current have no other rpAsgVarNum active?
+                        //
+                        if ((varDsc->lvRegNum != REG_STK) && ((type != TYP_LONG) || (varDsc->lvOtherReg != REG_STK)) &&
+                            (type == op2->TypeGet()) && (op1->gtFlags & GTF_VAR_DEF) && (rpAsgVarNum == -1))
+                        {
+                            //
+                            //  Yes, we should try to target the right side (op2) of this
+                            //  assignment into the (enregistered) tracked variable.
+                            //
+
+                            op1PredictReg = PREDICT_NONE; /* really PREDICT_REG, but we've already done the check */
+                            op2PredictReg = rpGetPredictForVarIndex(varDsc->lvVarIndex);
+
+                            // Remember that this is a new use in place
+
+                            // We've added "new UseInPlace"; remove from the global set.
+                            VarSetOps::RemoveElemD(this, rpUseInPlace, varDsc->lvVarIndex);
+
+                            //  Note that later when we walk down to the leaf node for op2
+                            //  if we decide to actually use the register for the 'varDsc'
+                            //  to enregister the operand, the we will set rpAsgVarNum to
+                            //  varDsc->lvVarIndex, by extracting this value using
+                            //  rpGetVarIndexForPredict()
+                            //
+                            //  Also we reset rpAsgVarNum back to -1 after we have finished
+                            //  predicting the current GT_ASG node
+                            //
+                            goto ASG_COMMON;
+                        }
+                    }
+                }
+                __fallthrough;
+
+            case GT_CHS:
+
+            case GT_ASG_OR:
+            case GT_ASG_XOR:
+            case GT_ASG_AND:
+            case GT_ASG_SUB:
+            case GT_ASG_ADD:
+            case GT_ASG_MUL:
+            case GT_ASG_DIV:
+            case GT_ASG_UDIV:
+
+                /* We can't use "reg <op>= addr" for TYP_LONG or if op2 is a short type */
+                if ((type != TYP_LONG) && !varTypeIsSmall(op2->gtType))
+                {
+                    /* Is the value being assigned into an enregistered LCL_VAR? */
+                    /* For debug code we only allow a simple op2 to be assigned */
+                    if ((op1->gtOper == GT_LCL_VAR) && (!opts.compDbgCode || rpCanAsgOperWithoutReg(op2, false)))
+                    {
+                        varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
+                        /* Did we predict that this local will be enregistered? */
+                        if (varDsc->lvRegNum != REG_STK)
+                        {
+                            /* Yes, we can use "reg <op>= addr" */
+
+                            op1PredictReg = PREDICT_NONE; /* really PREDICT_REG, but we've already done the check */
+                            op2PredictReg = PREDICT_NONE;
+
+                            goto ASG_COMMON;
+                        }
+                    }
+                }
+
+#if CPU_LOAD_STORE_ARCH
+                if (oper != GT_ASG)
+                {
+                    op1PredictReg = PREDICT_REG;
+                    op2PredictReg = PREDICT_REG;
+                }
+                else
+#endif
+                {
+                    /*
+                     *  Otherwise, initialize the normal forcing of operands:
+                     *   "addr <op>= reg"
+                     */
+                    op1PredictReg = PREDICT_ADDR;
+                    op2PredictReg = PREDICT_REG;
+                }
+
+            ASG_COMMON:
+
+#if !CPU_LOAD_STORE_ARCH
+                if (op2PredictReg != PREDICT_NONE)
+                {
+                    /* Is the value being assigned a simple one? */
+                    if (rpCanAsgOperWithoutReg(op2, false))
+                        op2PredictReg = PREDICT_NONE;
+                }
+#endif
+
+                bool simpleAssignment;
+                simpleAssignment = false;
+
+                if ((oper == GT_ASG) && (op1->gtOper == GT_LCL_VAR))
+                {
+                    // Add a variable interference from the assign target
+                    // to each of the last use variables
+                    if (!VarSetOps::IsEmpty(this, rpLastUseVars))
+                    {
+                        varDsc = lvaTable + op1->gtLclVarCommon.gtLclNum;
+
+                        if (varDsc->lvTracked)
+                        {
+                            unsigned varIndex = varDsc->lvVarIndex;
+
+                            rpRecordVarIntf(varIndex, rpLastUseVars DEBUGARG("Assign conflict"));
+                        }
+                    }
+
+                    /*  Record whether this tree is a simple assignment to a local */
+
+                    simpleAssignment = ((type != TYP_LONG) || !opts.compDbgCode);
+                }
+
+                bool requireByteReg;
+                requireByteReg = false;
+
+#if CPU_HAS_BYTE_REGS
+                /* Byte-assignments need the byte registers, unless op1 is an enregistered local */
+
+                if (varTypeIsByte(type) &&
+                    ((op1->gtOper != GT_LCL_VAR) || (lvaTable[op1->gtLclVarCommon.gtLclNum].lvRegNum == REG_STK)))
+
+                {
+                    // Byte-assignments typically need a byte register
+                    requireByteReg = true;
+
+                    if (op1->gtOper == GT_LCL_VAR)
+                    {
+                        varDsc = lvaTable + op1->gtLclVar.gtLclNum;
+
+                        // Did we predict that this local will be enregistered?
+                        if (varDsc->lvTracked && (varDsc->lvRegNum != REG_STK) && (oper != GT_CHS))
+                        {
+                            // We don't require a byte register when op1 is an enregistered local */
+                            requireByteReg = false;
+                        }
+
+                        // Is op1 part of an Assign-Op or is the RHS a simple memory indirection?
+                        if ((oper != GT_ASG) || (op2->gtOper == GT_IND) || (op2->gtOper == GT_CLS_VAR))
+                        {
+                            // We should try to put op1 in an byte register
+                            varDsc->addPrefReg(RBM_BYTE_REG_FLAG, this);
+                        }
+                    }
+                }
+#endif
+
+                VarSetOps::Assign(this, startAsgUseInPlaceVars, rpUseInPlace);
+
+                bool isWriteBarrierAsgNode;
+                isWriteBarrierAsgNode = codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree);
+#ifdef DEBUG
+                GCInfo::WriteBarrierForm wbf;
+                if (isWriteBarrierAsgNode)
+                    wbf = codeGen->gcInfo.gcIsWriteBarrierCandidate(tree->gtOp.gtOp1, tree->gtOp.gtOp2);
+                else
+                    wbf = GCInfo::WBF_NoBarrier;
+#endif // DEBUG
+
+                regMaskTP wbaLockedRegs;
+                wbaLockedRegs = lockedRegs;
+                if (isWriteBarrierAsgNode)
+                {
+#if defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
+#ifdef DEBUG
+                    if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
+                    {
+#endif // DEBUG
+                        wbaLockedRegs |= RBM_WRITE_BARRIER;
+                        op1->gtRsvdRegs |= RBM_WRITE_BARRIER; // This will steer op2 away from REG_WRITE_BARRIER
+                        assert(REG_WRITE_BARRIER == REG_EDX);
+                        op1PredictReg = PREDICT_REG_EDX;
+#ifdef DEBUG
+                    }
+                    else
+#endif // DEBUG
+#endif // defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS
+
+#if defined(DEBUG) || !(defined(_TARGET_X86_) && NOGC_WRITE_BARRIERS)
+                    {
+#ifdef _TARGET_X86_
+                        op1PredictReg = PREDICT_REG_ECX;
+                        op2PredictReg = PREDICT_REG_EDX;
+#elif defined(_TARGET_ARM_)
+                        op1PredictReg = PREDICT_REG_R0;
+                        op2PredictReg = PREDICT_REG_R1;
+
+                        // This is my best guess as to what the previous code meant by checking "gtRngChkLen() == NULL".
+                        if ((op1->OperGet() == GT_IND) && (op1->gtOp.gtOp1->OperGet() != GT_ARR_BOUNDS_CHECK))
+                        {
+                            op1 = op1->gtOp.gtOp1;
+                        }
+#else // !_TARGET_X86_ && !_TARGET_ARM_
+#error "Non-ARM or x86 _TARGET_ in RegPredict for WriteBarrierAsg"
+#endif
+                    }
+#endif
+                }
+
+                /*  Are we supposed to evaluate RHS first? */
+
+                if (tree->gtFlags & GTF_REVERSE_OPS)
+                {
+                    op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
+
+#if CPU_HAS_BYTE_REGS
+                    // Should we insure that op2 gets evaluated into a byte register?
+                    if (requireByteReg && ((op2Mask & RBM_BYTE_REGS) == 0))
+                    {
+                        // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
+                        // and we can't select one that is already reserved (i.e. lockedRegs)
+                        //
+                        op2Mask |= rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | RBM_NON_BYTE_REGS));
+                        op2->gtUsedRegs |= op2Mask;
+
+                        // No longer a simple assignment because we're using extra registers and might
+                        // have interference between op1 and op2.  See DevDiv #136681
+                        simpleAssignment = false;
+                    }
+#endif
+                    /*
+                     *  For a simple assignment we don't want the op2Mask to be
+                     *  marked as interferring with the LCL_VAR, since it is likely
+                     *  that we will want to enregister the LCL_VAR in exactly
+                     *  the register that is used to compute op2
+                     */
+                    tmpMask = lockedRegs;
+
+                    if (!simpleAssignment)
+                        tmpMask |= op2Mask;
+
+                    regMask = rpPredictTreeRegUse(op1, op1PredictReg, tmpMask, RBM_NONE);
+
+                    // Did we relax the register prediction for op1 and op2 above ?
+                    // - because we are depending upon op1 being enregistered
+                    //
+                    if ((op1PredictReg == PREDICT_NONE) &&
+                        ((op2PredictReg == PREDICT_NONE) || rpHasVarIndexForPredict(op2PredictReg)))
+                    {
+                        /* We must be assigning into an enregistered LCL_VAR */
+                        noway_assert(op1->gtOper == GT_LCL_VAR);
+                        varDsc = lvaTable + op1->gtLclVar.gtLclNum;
+                        noway_assert(varDsc->lvRegNum != REG_STK);
+
+                        /* We need to set lvDependReg, in case we lose the enregistration of op1 */
+                        varDsc->lvDependReg = true;
+                    }
+                }
+                else
+                {
+                    // For the case of simpleAssignments op2 should always be evaluated first
+                    noway_assert(!simpleAssignment);
+
+                    regMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
+                    if (isWriteBarrierAsgNode)
+                    {
+                        wbaLockedRegs |= op1->gtUsedRegs;
+                    }
+                    op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, wbaLockedRegs | regMask, RBM_NONE);
+
+#if CPU_HAS_BYTE_REGS
+                    // Should we insure that op2 gets evaluated into a byte register?
+                    if (requireByteReg && ((op2Mask & RBM_BYTE_REGS) == 0))
+                    {
+                        // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
+                        // and we can't select one that is already reserved (i.e. lockedRegs or regMask)
+                        //
+                        op2Mask |=
+                            rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | regMask | RBM_NON_BYTE_REGS));
+                        op2->gtUsedRegs |= op2Mask;
+                    }
+#endif
+                }
+
+                if (rpHasVarIndexForPredict(op2PredictReg))
+                {
+                    rpAsgVarNum = -1;
+                }
+
+                if (isWriteBarrierAsgNode)
+                {
+#if NOGC_WRITE_BARRIERS
+#ifdef DEBUG
+                    if (wbf != GCInfo::WBF_NoBarrier_CheckNotHeapInDebug)
+                    {
+#endif // DEBUG
+
+                        /* Steer computation away from REG_WRITE_BARRIER as the pointer is
+                           passed to the write-barrier call in REG_WRITE_BARRIER */
+
+                        regMask = op2Mask;
+
+                        if (op1->gtOper == GT_IND)
+                        {
+                            GenTreePtr rv1, rv2;
+                            unsigned   mul, cns;
+                            bool       rev;
+
+                            /* Special handling of indirect assigns for write barrier */
+
+                            bool yes = codeGen->genCreateAddrMode(op1->gtOp.gtOp1, -1, true, RBM_NONE, &rev, &rv1, &rv2,
+                                                                  &mul, &cns);
+
+                            /* Check address mode for enregisterable locals */
+
+                            if (yes)
+                            {
+                                if (rv1 != NULL && rv1->gtOper == GT_LCL_VAR)
+                                {
+                                    rpPredictRefAssign(rv1->gtLclVarCommon.gtLclNum);
+                                }
+                                if (rv2 != NULL && rv2->gtOper == GT_LCL_VAR)
+                                {
+                                    rpPredictRefAssign(rv2->gtLclVarCommon.gtLclNum);
+                                }
+                            }
+                        }
+
+                        if (op2->gtOper == GT_LCL_VAR)
+                        {
+                            rpPredictRefAssign(op2->gtLclVarCommon.gtLclNum);
+                        }
+
+                        // Add a register interference for REG_WRITE_BARRIER to each of the last use variables
+                        if (!VarSetOps::IsEmpty(this, rpLastUseVars))
+                        {
+                            rpRecordRegIntf(RBM_WRITE_BARRIER,
+                                            rpLastUseVars DEBUGARG("WriteBarrier and rpLastUseVars conflict"));
+                        }
+                        tree->gtUsedRegs |= RBM_WRITE_BARRIER;
+#ifdef DEBUG
+                    }
+                    else
+#endif // DEBUG
+#endif // NOGC_WRITE_BARRIERS
+
+#if defined(DEBUG) || !NOGC_WRITE_BARRIERS
+                    {
+#ifdef _TARGET_ARM_
+#ifdef DEBUG
+                        if (verbose)
+                            printf("Adding interference with RBM_CALLEE_TRASH_NOGC for NoGC WriteBarrierAsg\n");
+#endif
+                        //
+                        // For the ARM target we have an optimized JIT Helper
+                        // that only trashes a subset of the callee saved registers
+                        //
+
+                        // NOTE: Adding it to the gtUsedRegs will cause the interference to
+                        // be added appropriately
+
+                        // the RBM_CALLEE_TRASH_NOGC set is killed.  We will record this in interferingRegs
+                        // instead of gtUsedRegs, because the latter will be modified later, but we need
+                        // to remember to add the interference.
+
+                        interferingRegs |= RBM_CALLEE_TRASH_NOGC;
+
+                        op1->gtUsedRegs |= RBM_R0;
+                        op2->gtUsedRegs |= RBM_R1;
+#else // _TARGET_ARM_
+
+#ifdef DEBUG
+                        if (verbose)
+                            printf("Adding interference with RBM_CALLEE_TRASH for NoGC WriteBarrierAsg\n");
+#endif
+                        // We have to call a normal JIT helper to perform the Write Barrier Assignment
+                        // It will trash the callee saved registers
+
+                        tree->gtUsedRegs |= RBM_CALLEE_TRASH;
+#endif // _TARGET_ARM_
+                    }
+#endif // defined(DEBUG) || !NOGC_WRITE_BARRIERS
+                }
+
+                if (simpleAssignment)
+                {
+                    /*
+                     *  Consider a simple assignment to a local:
+                     *
+                     *   lcl = expr;
+                     *
+                     *  Since the "=" node is visited after the variable
+                     *  is marked live (assuming it's live after the
+                     *  assignment), we don't want to use the register
+                     *  use mask of the "=" node but rather that of the
+                     *  variable itself.
+                     */
+                    tree->gtUsedRegs = op1->gtUsedRegs;
+                }
+                else
+                {
+                    tree->gtUsedRegs = op1->gtUsedRegs | op2->gtUsedRegs;
+                }
+                VarSetOps::Assign(this, rpUseInPlace, startAsgUseInPlaceVars);
+                goto RETURN_CHECK;
+
+            case GT_ASG_LSH:
+            case GT_ASG_RSH:
+            case GT_ASG_RSZ:
+                /* assigning shift operators */
+
+                noway_assert(type != TYP_LONG);
+
+#if CPU_LOAD_STORE_ARCH
+                predictReg = PREDICT_ADDR;
+#else
+                predictReg = PREDICT_NONE;
+#endif
+
+                /* shift count is handled same as ordinary shift */
+                goto HANDLE_SHIFT_COUNT;
+
+            case GT_ADDR:
+                regMask = rpPredictTreeRegUse(op1, PREDICT_ADDR, lockedRegs, RBM_LASTUSE);
+
+                if ((regMask == RBM_NONE) && (predictReg >= PREDICT_REG))
+                {
+                    // We need a scratch register for the LEA instruction
+                    regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | rsvdRegs);
+                }
+
+                tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
+                goto RETURN_CHECK;
+
+            case GT_CAST:
+
+                /* Cannot cast to VOID */
+                noway_assert(type != TYP_VOID);
+
+                /* cast to long is special */
+                if (type == TYP_LONG && op1->gtType <= TYP_INT)
+                {
+                    noway_assert(tree->gtCast.gtCastType == TYP_LONG || tree->gtCast.gtCastType == TYP_ULONG);
+#if CPU_LONG_USES_REGPAIR
+                    rpPredictReg predictRegHi = PREDICT_SCRATCH_REG;
+
+                    if (rpHasVarIndexForPredict(predictReg))
+                    {
+                        unsigned tgtIndex = rpGetVarIndexForPredict(predictReg);
+                        rpAsgVarNum       = tgtIndex;
+
+                        // We don't need any register as we plan on writing to the rpAsgVarNum register
+                        predictReg = PREDICT_NONE;
+
+                        LclVarDsc* tgtVar   = lvaTable + lvaTrackedToVarNum[tgtIndex];
+                        tgtVar->lvDependReg = true;
+
+                        if (tgtVar->lvOtherReg != REG_STK)
+                        {
+                            predictRegHi = PREDICT_NONE;
+                        }
+                    }
+                    else
+#endif
+                        if (predictReg == PREDICT_NONE)
+                    {
+                        predictReg = PREDICT_SCRATCH_REG;
+                    }
+#ifdef _TARGET_ARM_
+                    // If we are widening an int into a long using a targeted register pair we
+                    // should retarget so that the low part get loaded into the appropriate register
+                    else if (predictReg == PREDICT_PAIR_R0R1)
+                    {
+                        predictReg   = PREDICT_REG_R0;
+                        predictRegHi = PREDICT_REG_R1;
+                    }
+                    else if (predictReg == PREDICT_PAIR_R2R3)
+                    {
+                        predictReg   = PREDICT_REG_R2;
+                        predictRegHi = PREDICT_REG_R3;
+                    }
+#endif
+#ifdef _TARGET_X86_
+                    // If we are widening an int into a long using a targeted register pair we
+                    // should retarget so that the low part get loaded into the appropriate register
+                    else if (predictReg == PREDICT_PAIR_EAXEDX)
+                    {
+                        predictReg   = PREDICT_REG_EAX;
+                        predictRegHi = PREDICT_REG_EDX;
+                    }
+                    else if (predictReg == PREDICT_PAIR_ECXEBX)
+                    {
+                        predictReg   = PREDICT_REG_ECX;
+                        predictRegHi = PREDICT_REG_EBX;
+                    }
+#endif
+
+                    regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
+
+#if CPU_LONG_USES_REGPAIR
+                    if (predictRegHi != PREDICT_NONE)
+                    {
+                        // Now get one more reg for the upper part
+                        regMask |= rpPredictRegPick(TYP_INT, predictRegHi, lockedRegs | rsvdRegs | regMask);
+                    }
+#endif
+                    tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
+                    goto RETURN_CHECK;
+                }
+
+                /* cast from long is special - it frees a register */
+                if (type <= TYP_INT // nice.  this presumably is intended to mean "signed int and shorter types"
+                    && op1->gtType == TYP_LONG)
+                {
+                    if ((predictReg == PREDICT_NONE) || rpHasVarIndexForPredict(predictReg))
+                        predictReg = PREDICT_REG;
+
+                    regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
+
+                    // If we have 2 or more regs, free one of them
+                    if (!genMaxOneBit(regMask))
+                    {
+                        /* Clear the 2nd lowest bit in regMask */
+                        /* First set tmpMask to the lowest bit in regMask */
+                        tmpMask = genFindLowestBit(regMask);
+                        /* Next find the second lowest bit in regMask */
+                        tmpMask = genFindLowestBit(regMask & ~tmpMask);
+                        /* Clear this bit from regmask */
+                        regMask &= ~tmpMask;
+                    }
+                    tree->gtUsedRegs = op1->gtUsedRegs;
+                    goto RETURN_CHECK;
+                }
+
+#if CPU_HAS_BYTE_REGS
+                /* cast from signed-byte is special - it uses byteable registers */
+                if (type == TYP_INT)
+                {
+                    var_types smallType;
+
+                    if (genTypeSize(tree->gtCast.CastOp()->TypeGet()) < genTypeSize(tree->gtCast.gtCastType))
+                        smallType = tree->gtCast.CastOp()->TypeGet();
+                    else
+                        smallType = tree->gtCast.gtCastType;
+
+                    if (smallType == TYP_BYTE)
+                    {
+                        regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
+
+                        if ((regMask & RBM_BYTE_REGS) == 0)
+                            regMask = rpPredictRegPick(type, PREDICT_SCRATCH_REG, RBM_NON_BYTE_REGS);
+
+                        tree->gtUsedRegs = (regMaskSmall)regMask;
+                        goto RETURN_CHECK;
+                    }
+                }
+#endif
+
+#if FEATURE_STACK_FP_X87
+                /* cast to float/double is special */
+                if (varTypeIsFloating(type))
+                {
+                    switch (op1->TypeGet())
+                    {
+                        /* uses fild, so don't need to be loaded to reg */
+                        case TYP_INT:
+                        case TYP_LONG:
+                            rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, rsvdRegs);
+                            tree->gtUsedRegs = op1->gtUsedRegs;
+                            regMask          = 0;
+                            goto RETURN_CHECK;
+                        default:
+                            break;
+                    }
+                }
+
+                /* Casting from integral type to floating type is special */
+                if (!varTypeIsFloating(type) && varTypeIsFloating(op1->TypeGet()))
+                {
+                    if (opts.compCanUseSSE2)
+                    {
+                        // predict for SSE2 based casting
+                        if (predictReg <= PREDICT_REG)
+                            predictReg = PREDICT_SCRATCH_REG;
+                        regMask        = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
+
+                        // Get one more int reg to hold cast result
+                        regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs | regMask);
+                        tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
+                        goto RETURN_CHECK;
+                    }
+                }
+#endif
+
+#if FEATURE_FP_REGALLOC
+                // Are we casting between int to float or float to int
+                // Fix 388428 ARM JitStress WP7
+                if (varTypeIsFloating(type) != varTypeIsFloating(op1->TypeGet()))
+                {
+                    // op1 needs to go into a register
+                    regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
+
+#ifdef _TARGET_ARM_
+                    if (varTypeIsFloating(op1->TypeGet()))
+                    {
+                        // We also need a fp scratch register for the convert operation
+                        regMask |= rpPredictRegPick((genTypeStSz(type) == 1) ? TYP_FLOAT : TYP_DOUBLE,
+                                                    PREDICT_SCRATCH_REG, regMask | lockedRegs | rsvdRegs);
+                    }
+#endif
+                    // We also need a register to hold the result
+                    regMask |= rpPredictRegPick(type, PREDICT_SCRATCH_REG, regMask | lockedRegs | rsvdRegs);
+                    tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
+                    goto RETURN_CHECK;
+                }
+#endif
+
+                /* otherwise must load op1 into a register */
+                goto GENERIC_UNARY;
+
+            case GT_INTRINSIC:
+
+#ifdef _TARGET_XARCH_
+                if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round && tree->TypeGet() == TYP_INT)
+                {
+                    // This is a special case to handle the following
+                    // optimization: conv.i4(round.d(d)) -> round.i(d)
+                    // if flowgraph 3186
+
+                    if (predictReg <= PREDICT_REG)
+                        predictReg = PREDICT_SCRATCH_REG;
+
+                    rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
+
+                    regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | rsvdRegs);
+
+                    tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
+                    goto RETURN_CHECK;
+                }
+#endif
+                __fallthrough;
+
+            case GT_NEG:
+#ifdef _TARGET_ARM_
+                if (tree->TypeGet() == TYP_LONG)
+                {
+                    // On ARM this consumes an extra register for the '0' value
+                    if (predictReg <= PREDICT_REG)
+                        predictReg = PREDICT_SCRATCH_REG;
+
+                    regMaskTP op1Mask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
+
+                    regMask = rpPredictRegPick(TYP_INT, predictReg, lockedRegs | op1Mask | rsvdRegs);
+
+                    tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
+                    goto RETURN_CHECK;
+                }
+#endif // _TARGET_ARM_
+
+                __fallthrough;
+
+            case GT_NOT:
+            // these unary operators will write new values
+            // and thus will need a scratch register
+            GENERIC_UNARY:
+                /* generic unary operators */
+
+                if (predictReg <= PREDICT_REG)
+                    predictReg = PREDICT_SCRATCH_REG;
+
+                __fallthrough;
+
+            case GT_NOP:
+                // these unary operators do not write new values
+                // and thus won't need a scratch register
+                CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if OPT_BOOL_OPS
+                if (!op1)
+                {
+                    tree->gtUsedRegs = 0;
+                    regMask          = 0;
+                    goto RETURN_CHECK;
+                }
+#endif
+                regMask          = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
+                tree->gtUsedRegs = op1->gtUsedRegs;
+                goto RETURN_CHECK;
+
+            case GT_IND:
+            case GT_NULLCHECK: // At this point, nullcheck is just like an IND...
+            {
+                bool      intoReg = true;
+                VARSET_TP VARSET_INIT(this, startIndUseInPlaceVars, rpUseInPlace);
+
+                if (fgIsIndirOfAddrOfLocal(tree) != NULL)
+                {
+                    compUpdateLifeVar</*ForCodeGen*/ false>(tree);
+                }
+
+                if (predictReg == PREDICT_ADDR)
+                {
+                    intoReg = false;
+                }
+                else if (predictReg == PREDICT_NONE)
+                {
+                    if (type != TYP_LONG)
+                    {
+                        intoReg = false;
+                    }
+                    else
+                    {
+                        predictReg = PREDICT_REG;
+                    }
+                }
+
+                /* forcing to register? */
+                if (intoReg && (type != TYP_LONG))
+                {
+                    rsvdRegs |= RBM_LASTUSE;
+                }
+
+                GenTreePtr lenCSE;
+                lenCSE = NULL;
+
+                /* check for address mode */
+                regMask = rpPredictAddressMode(op1, type, lockedRegs, rsvdRegs, lenCSE);
+                tmpMask = RBM_NONE;
+
+#if CPU_LOAD_STORE_ARCH
+                // We may need a scratch register for loading a long
+                if (type == TYP_LONG)
+                {
+                    /* This scratch register immediately dies */
+                    tmpMask = rpPredictRegPick(TYP_BYREF, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs);
+                }
+#endif // CPU_LOAD_STORE_ARCH
+
+#ifdef _TARGET_ARM_
+                // Unaligned loads/stores for floating point values must first be loaded into integer register(s)
+                //
+                if ((tree->gtFlags & GTF_IND_UNALIGNED) && varTypeIsFloating(type))
+                {
+                    /* These integer register(s) immediately die */
+                    tmpMask = rpPredictRegPick(TYP_INT, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs);
+                    // Two integer registers are required for a TYP_DOUBLE
+                    if (type == TYP_DOUBLE)
+                        tmpMask |=
+                            rpPredictRegPick(TYP_INT, PREDICT_REG, op1->gtUsedRegs | lockedRegs | rsvdRegs | tmpMask);
+                }
+#endif
+
+                /* forcing to register? */
+                if (intoReg)
+                {
+                    regMaskTP lockedMask = lockedRegs | rsvdRegs;
+                    tmpMask |= regMask;
+
+                    // We will compute a new regMask that holds the register(s)
+                    // that we will load the indirection into.
+                    //
+                    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifndef _TARGET_64BIT_
+                    if (type == TYP_LONG)
+                    {
+                        // We need to use multiple load instructions here:
+                        // For the first register we can not choose
+                        // any registers that are being used in place or
+                        // any register in the current regMask
+                        //
+                        regMask = rpPredictRegPick(TYP_INT, predictReg, regMask | lockedMask);
+
+                        // For the second register we can choose a register that was
+                        // used in place or any register in the old now overwritten regMask
+                        // but not the same register that we picked above in 'regMask'
+                        //
+                        VarSetOps::Assign(this, rpUseInPlace, startIndUseInPlaceVars);
+                        regMask |= rpPredictRegPick(TYP_INT, predictReg, regMask | lockedMask);
+                    }
+                    else
+#endif
+                    {
+                        // We will use one load instruction here:
+                        // The load target register can be a register that was used in place
+                        // or one of the register from the orginal regMask.
+                        //
+                        VarSetOps::Assign(this, rpUseInPlace, startIndUseInPlaceVars);
+                        regMask = rpPredictRegPick(type, predictReg, lockedMask);
+                    }
+                }
+                else if (predictReg != PREDICT_ADDR)
+                {
+                    /* Unless the caller specified PREDICT_ADDR   */
+                    /* we don't return the temp registers used    */
+                    /* to form the address                        */
+                    regMask = RBM_NONE;
+                }
+            }
+
+                tree->gtUsedRegs = (regMaskSmall)(regMask | tmpMask);
+
+                goto RETURN_CHECK;
+
+            case GT_EQ:
+            case GT_NE:
+            case GT_LT:
+            case GT_LE:
+            case GT_GE:
+            case GT_GT:
+
+#ifdef _TARGET_X86_
+                /* Floating point comparison uses EAX for flags */
+                if (varTypeIsFloating(op1->TypeGet()))
+                {
+                    regMask = RBM_EAX;
+                }
+                else
+#endif
+                    if (!(tree->gtFlags & GTF_RELOP_JMP_USED))
+                {
+                    // Some comparisons are converted to ?:
+                    noway_assert(!fgMorphRelopToQmark(op1));
+
+                    if (predictReg <= PREDICT_REG)
+                        predictReg = PREDICT_SCRATCH_REG;
+
+                    // The set instructions need a byte register
+                    regMask = rpPredictRegPick(TYP_BYTE, predictReg, lockedRegs | rsvdRegs);
+                }
+                else
+                {
+                    regMask = RBM_NONE;
+#ifdef _TARGET_XARCH_
+                    tmpMask = RBM_NONE;
+                    // Optimize the compare with a constant cases for xarch
+                    if (op1->gtOper == GT_CNS_INT)
+                    {
+                        if (op2->gtOper == GT_CNS_INT)
+                            tmpMask =
+                                rpPredictTreeRegUse(op1, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
+                        rpPredictTreeRegUse(op2, PREDICT_NONE, lockedRegs | tmpMask, RBM_LASTUSE);
+                        tree->gtUsedRegs = op2->gtUsedRegs;
+                        goto RETURN_CHECK;
+                    }
+                    else if (op2->gtOper == GT_CNS_INT)
+                    {
+                        rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, rsvdRegs);
+                        tree->gtUsedRegs = op1->gtUsedRegs;
+                        goto RETURN_CHECK;
+                    }
+                    else if (op2->gtOper == GT_CNS_LNG)
+                    {
+                        regMaskTP op1Mask = rpPredictTreeRegUse(op1, PREDICT_ADDR, lockedRegs, rsvdRegs);
+#ifdef _TARGET_X86_
+                        // We also need one extra register to read values from
+                        tmpMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | op1Mask | rsvdRegs);
+#endif // _TARGET_X86_
+                        tree->gtUsedRegs = (regMaskSmall)tmpMask | op1->gtUsedRegs;
+                        goto RETURN_CHECK;
+                    }
+#endif // _TARGET_XARCH_
+                }
+
+                unsigned op1TypeSize;
+                unsigned op2TypeSize;
+
+                op1TypeSize = genTypeSize(op1->TypeGet());
+                op2TypeSize = genTypeSize(op2->TypeGet());
+
+                op1PredictReg = PREDICT_REG;
+                op2PredictReg = PREDICT_REG;
+
+                if (tree->gtFlags & GTF_REVERSE_OPS)
+                {
+#ifdef _TARGET_XARCH_
+                    if (op1TypeSize == sizeof(int))
+                        op1PredictReg = PREDICT_NONE;
+#endif
+
+                    tmpMask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
+                    rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | tmpMask, RBM_LASTUSE);
+                }
+                else
+                {
+#ifdef _TARGET_XARCH_
+                    // For full DWORD compares we can have
+                    //
+                    //      op1 is an address mode and op2 is a register
+                    // or
+                    //      op1 is a register and op2 is an address mode
+                    //
+                    if ((op2TypeSize == sizeof(int)) && (op1TypeSize == op2TypeSize))
+                    {
+                        if (op2->gtOper == GT_LCL_VAR)
+                        {
+                            unsigned lclNum = op2->gtLclVar.gtLclNum;
+                            varDsc          = lvaTable + lclNum;
+                            /* Did we predict that this local will be enregistered? */
+                            if (varDsc->lvTracked && (varDsc->lvRegNum != REG_STK))
+                            {
+                                op1PredictReg = PREDICT_ADDR;
+                            }
+                        }
+                    }
+                    // Codegen will generate cmp reg,[mem] for 4 or 8-byte types, but not for 1 or 2 byte types
+                    if ((op1PredictReg != PREDICT_ADDR) && (op2TypeSize >= sizeof(int)))
+                        op2PredictReg = PREDICT_ADDR;
+#endif // _TARGET_XARCH_
+
+                    tmpMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
+#ifdef _TARGET_ARM_
+                    if ((op2->gtOper != GT_CNS_INT) || !codeGen->validImmForAlu(op2->gtIntCon.gtIconVal))
+#endif
+                    {
+                        rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | tmpMask, RBM_LASTUSE);
+                    }
+                }
+
+#ifdef _TARGET_XARCH_
+                // In some cases in genCondSetFlags(), we need to use a temporary register (via rsPickReg())
+                // to generate a sign/zero extension before doing a compare. Save a register for this purpose
+                // if one of the registers is small and the types aren't equal.
+
+                if (regMask == RBM_NONE)
+                {
+                    rpPredictReg op1xPredictReg, op2xPredictReg;
+                    GenTreePtr   op1x, op2x;
+                    if (tree->gtFlags & GTF_REVERSE_OPS) // TODO: do we really need to handle this case?
+                    {
+                        op1xPredictReg = op2PredictReg;
+                        op2xPredictReg = op1PredictReg;
+                        op1x           = op2;
+                        op2x           = op1;
+                    }
+                    else
+                    {
+                        op1xPredictReg = op1PredictReg;
+                        op2xPredictReg = op2PredictReg;
+                        op1x           = op1;
+                        op2x           = op2;
+                    }
+                    if ((op1xPredictReg < PREDICT_REG) &&  // op1 doesn't get a register (probably an indir)
+                        (op2xPredictReg >= PREDICT_REG) && // op2 gets a register
+                        varTypeIsSmall(op1x->TypeGet()))   // op1 is smaller than an int
+                    {
+                        bool needTmp = false;
+
+                        // If op1x is a byte, and op2x is not a byteable register, we'll need a temp.
+                        // We could predict a byteable register for op2x, but what if we don't get it?
+                        // So, be conservative and always ask for a temp. There are a couple small CQ losses as a
+                        // result.
+                        if (varTypeIsByte(op1x->TypeGet()))
+                        {
+                            needTmp = true;
+                        }
+                        else
+                        {
+                            if (op2x->gtOper == GT_LCL_VAR) // this will be a GT_REG_VAR during code generation
+                            {
+                                if (genActualType(op1x->TypeGet()) != lvaGetActualType(op2x->gtLclVar.gtLclNum))
+                                    needTmp = true;
+                            }
+                            else
+                            {
+                                if (op1x->TypeGet() != op2x->TypeGet())
+                                    needTmp = true;
+                            }
+                        }
+                        if (needTmp)
+                        {
+                            regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
+                        }
+                    }
+                }
+#endif // _TARGET_XARCH_
+
+                tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs | op2->gtUsedRegs;
+                goto RETURN_CHECK;
+
+            case GT_MUL:
+
+#ifndef _TARGET_AMD64_
+                if (type == TYP_LONG)
+                {
+                    assert(tree->gtIsValid64RsltMul());
+
+                    /* Strip out the cast nodes */
+
+                    noway_assert(op1->gtOper == GT_CAST && op2->gtOper == GT_CAST);
+                    op1 = op1->gtCast.CastOp();
+                    op2 = op2->gtCast.CastOp();
+#else
+                if (false)
+                {
+#endif // !_TARGET_AMD64_
+                USE_MULT_EAX:
+
+#if defined(_TARGET_X86_)
+                    // This will done by a 64-bit imul "imul eax, reg"
+                    //   (i.e. EDX:EAX = EAX * reg)
+
+                    /* Are we supposed to evaluate op2 first? */
+                    if (tree->gtFlags & GTF_REVERSE_OPS)
+                    {
+                        rpPredictTreeRegUse(op2, PREDICT_PAIR_TMP_LO, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
+                        rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs | RBM_PAIR_TMP_LO, RBM_LASTUSE);
+                    }
+                    else
+                    {
+                        rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP_LO, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
+                        rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | RBM_PAIR_TMP_LO, RBM_LASTUSE);
+                    }
+
+                    /* set gtUsedRegs to EAX, EDX and the registers needed by op1 and op2 */
+
+                    tree->gtUsedRegs = RBM_PAIR_TMP | op1->gtUsedRegs | op2->gtUsedRegs;
+
+                    /* set regMask to the set of held registers */
+
+                    regMask = RBM_PAIR_TMP_LO;
+
+                    if (type == TYP_LONG)
+                        regMask |= RBM_PAIR_TMP_HI;
+
+#elif defined(_TARGET_ARM_)
+                    // This will done by a 4 operand multiply
+
+                    // Are we supposed to evaluate op2 first?
+                    if (tree->gtFlags & GTF_REVERSE_OPS)
+                    {
+                        rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
+                        rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_LASTUSE);
+                    }
+                    else
+                    {
+                        rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
+                        rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs, RBM_LASTUSE);
+                    }
+
+                    // set regMask to the set of held registers,
+                    //  the two scratch register we need to compute the mul result
+
+                    regMask = rpPredictRegPick(TYP_LONG, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
+
+                    // set gtUsedRegs toregMask and the registers needed by op1 and op2
+
+                    tree->gtUsedRegs = regMask | op1->gtUsedRegs | op2->gtUsedRegs;
+
+#else // !_TARGET_X86_ && !_TARGET_ARM_
+#error "Non-ARM or x86 _TARGET_ in RegPredict for 64-bit imul"
+#endif
+
+                    goto RETURN_CHECK;
+                }
+                else
+                {
+                    /* We use imulEAX for most unsigned multiply operations */
+                    if (tree->gtOverflow())
+                    {
+                        if ((tree->gtFlags & GTF_UNSIGNED) || varTypeIsSmall(tree->TypeGet()))
+                        {
+                            goto USE_MULT_EAX;
+                        }
+                    }
+                }
+
+                __fallthrough;
+
+            case GT_OR:
+            case GT_XOR:
+            case GT_AND:
+
+            case GT_SUB:
+            case GT_ADD:
+                tree->gtUsedRegs = 0;
+
+                if (predictReg <= PREDICT_REG)
+                    predictReg = PREDICT_SCRATCH_REG;
+
+            GENERIC_BINARY:
+
+                noway_assert(op2);
+                if (tree->gtFlags & GTF_REVERSE_OPS)
+                {
+                    op1PredictReg = PREDICT_REG;
+#if !CPU_LOAD_STORE_ARCH
+                    if (genTypeSize(op1->gtType) >= sizeof(int))
+                        op1PredictReg = PREDICT_NONE;
+#endif
+                    regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs | op1->gtRsvdRegs);
+                    rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | regMask, RBM_LASTUSE);
+                }
+                else
+                {
+                    op2PredictReg = PREDICT_REG;
+#if !CPU_LOAD_STORE_ARCH
+                    if (genTypeSize(op2->gtType) >= sizeof(int))
+                        op2PredictReg = PREDICT_NONE;
+#endif
+                    regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
+#ifdef _TARGET_ARM_
+                    // For most ALU operations we can generate a single instruction that encodes
+                    // a small immediate integer constant value.  (except for multiply)
+                    //
+                    if ((op2->gtOper == GT_CNS_INT) && (oper != GT_MUL))
+                    {
+                        ssize_t ival = op2->gtIntCon.gtIconVal;
+                        if (codeGen->validImmForAlu(ival))
+                        {
+                            op2PredictReg = PREDICT_NONE;
+                        }
+                        else if (codeGen->validImmForAdd(ival, INS_FLAGS_DONT_CARE) &&
+                                 ((oper == GT_ADD) || (oper == GT_SUB)))
+                        {
+                            op2PredictReg = PREDICT_NONE;
+                        }
+                    }
+                    if (op2PredictReg == PREDICT_NONE)
+                    {
+                        op2->gtUsedRegs = RBM_NONE;
+                    }
+                    else
+#endif
+                    {
+                        rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMask, RBM_LASTUSE);
+                    }
+                }
+                tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs | op2->gtUsedRegs;
+
+#if CPU_HAS_BYTE_REGS
+                /* We have special register requirements for byte operations */
+
+                if (varTypeIsByte(tree->TypeGet()))
+                {
+                    /* For 8 bit arithmetic, one operands has to be in a
+                       byte-addressable register, and the other has to be
+                       in a byte-addrble reg or in memory. Assume its in a reg */
+
+                    regMaskTP regByteMask = 0;
+                    regMaskTP op1ByteMask = op1->gtUsedRegs;
+
+                    if (!(op1->gtUsedRegs & RBM_BYTE_REGS))
+                    {
+                        // Pick a Byte register to use for op1
+                        regByteMask = rpPredictRegPick(TYP_BYTE, PREDICT_REG, lockedRegs | rsvdRegs);
+                        op1ByteMask = regByteMask;
+                    }
+
+                    if (!(op2->gtUsedRegs & RBM_BYTE_REGS))
+                    {
+                        // Pick a Byte register to use for op2, avoiding the one used by op1
+                        regByteMask |= rpPredictRegPick(TYP_BYTE, PREDICT_REG, lockedRegs | rsvdRegs | op1ByteMask);
+                    }
+
+                    if (regByteMask)
+                    {
+                        tree->gtUsedRegs |= regByteMask;
+                        regMask = regByteMask;
+                    }
+                }
+#endif
+                goto RETURN_CHECK;
+
+            case GT_DIV:
+            case GT_MOD:
+
+            case GT_UDIV:
+            case GT_UMOD:
+
+                /* non-integer division handled in generic way */
+                if (!varTypeIsIntegral(type))
+                {
+                    tree->gtUsedRegs = 0;
+                    if (predictReg <= PREDICT_REG)
+                        predictReg = PREDICT_SCRATCH_REG;
+                    goto GENERIC_BINARY;
+                }
+
+#ifndef _TARGET_64BIT_
+
+                if (type == TYP_LONG && (oper == GT_MOD || oper == GT_UMOD))
+                {
+                    /* Special case:  a mod with an int op2 is done inline using idiv or div
+                       to avoid a costly call to the helper */
+
+                    noway_assert((op2->gtOper == GT_CNS_LNG) &&
+                                 (op2->gtLngCon.gtLconVal == int(op2->gtLngCon.gtLconVal)));
+
+#if defined(_TARGET_X86_) || defined(_TARGET_ARM_)
+                    if (tree->gtFlags & GTF_REVERSE_OPS)
+                    {
+                        tmpMask = rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | RBM_PAIR_TMP,
+                                                      rsvdRegs | op1->gtRsvdRegs);
+                        tmpMask |= rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP, lockedRegs | tmpMask, RBM_LASTUSE);
+                    }
+                    else
+                    {
+                        tmpMask = rpPredictTreeRegUse(op1, PREDICT_PAIR_TMP, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
+                        tmpMask |=
+                            rpPredictTreeRegUse(op2, PREDICT_REG, lockedRegs | tmpMask | RBM_PAIR_TMP, RBM_LASTUSE);
+                    }
+                    regMask = RBM_PAIR_TMP;
+#else // !_TARGET_X86_ && !_TARGET_ARM_
+#error "Non-ARM or x86 _TARGET_ in RegPredict for 64-bit MOD"
+#endif // !_TARGET_X86_ && !_TARGET_ARM_
+
+                    tree->gtUsedRegs =
+                        (regMaskSmall)(regMask | op1->gtUsedRegs | op2->gtUsedRegs |
+                                       rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, regMask | tmpMask));
+
+                    goto RETURN_CHECK;
+                }
+#endif // _TARGET_64BIT_
+
+                /* no divide immediate, so force integer constant which is not
+                 * a power of two to register
+                 */
+
+                if (op2->OperKind() & GTK_CONST)
+                {
+                    ssize_t ival = op2->gtIntConCommon.IconValue();
+
+                    /* Is the divisor a power of 2 ? */
+
+                    if (ival > 0 && genMaxOneBit(size_t(ival)))
+                    {
+                        goto GENERIC_UNARY;
+                    }
+                    else
+                        op2PredictReg = PREDICT_SCRATCH_REG;
+                }
+                else
+                {
+                    /* Non integer constant also must be enregistered */
+                    op2PredictReg = PREDICT_REG;
+                }
+
+                regMaskTP trashedMask;
+                trashedMask = DUMMY_INIT(RBM_ILLEGAL);
+                regMaskTP op1ExcludeMask;
+                op1ExcludeMask = DUMMY_INIT(RBM_ILLEGAL);
+                regMaskTP op2ExcludeMask;
+                op2ExcludeMask = DUMMY_INIT(RBM_ILLEGAL);
+
+#ifdef _TARGET_XARCH_
+                /*  Consider the case "a / b" - we'll need to trash EDX (via "CDQ") before
+                 *  we can safely allow the "b" value to die. Unfortunately, if we simply
+                 *  mark the node "b" as using EDX, this will not work if "b" is a register
+                 *  variable that dies with this particular reference. Thus, if we want to
+                 *  avoid this situation (where we would have to spill the variable from
+                 *  EDX to someplace else), we need to explicitly mark the interference
+                 *  of the variable at this point.
+                 */
+
+                if (op2->gtOper == GT_LCL_VAR)
+                {
+                    unsigned lclNum = op2->gtLclVarCommon.gtLclNum;
+                    varDsc          = lvaTable + lclNum;
+                    if (varDsc->lvTracked)
+                    {
+#ifdef DEBUG
+                        if (verbose)
+                        {
+                            if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EAX], varDsc->lvVarIndex))
+                                printf("Record interference between V%02u,T%02u and EAX -- int divide\n", lclNum,
+                                       varDsc->lvVarIndex);
+                            if (!VarSetOps::IsMember(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex))
+                                printf("Record interference between V%02u,T%02u and EDX -- int divide\n", lclNum,
+                                       varDsc->lvVarIndex);
+                        }
+#endif
+                        VarSetOps::AddElemD(this, raLclRegIntf[REG_EAX], varDsc->lvVarIndex);
+                        VarSetOps::AddElemD(this, raLclRegIntf[REG_EDX], varDsc->lvVarIndex);
+                    }
+                }
+
+                /* set the held register based on opcode */
+                if (oper == GT_DIV || oper == GT_UDIV)
+                    regMask = RBM_EAX;
+                else
+                    regMask    = RBM_EDX;
+                trashedMask    = (RBM_EAX | RBM_EDX);
+                op1ExcludeMask = 0;
+                op2ExcludeMask = (RBM_EAX | RBM_EDX);
+
+#endif // _TARGET_XARCH_
+
+#ifdef _TARGET_ARM_
+                trashedMask    = RBM_NONE;
+                op1ExcludeMask = RBM_NONE;
+                op2ExcludeMask = RBM_NONE;
+#endif
+
+                /* set the lvPref reg if possible */
+                GenTreePtr dest;
+                /*
+                 *  Walking the gtNext link twice from here should get us back
+                 *  to our parent node, if this is an simple assignment tree.
+                 */
+                dest = tree->gtNext;
+                if (dest && (dest->gtOper == GT_LCL_VAR) && dest->gtNext && (dest->gtNext->OperKind() & GTK_ASGOP) &&
+                    dest->gtNext->gtOp.gtOp2 == tree)
+                {
+                    varDsc = lvaTable + dest->gtLclVarCommon.gtLclNum;
+                    varDsc->addPrefReg(regMask, this);
+                }
+#ifdef _TARGET_XARCH_
+                op1PredictReg = PREDICT_REG_EDX; /* Normally target op1 into EDX */
+#else
+                op1PredictReg        = PREDICT_SCRATCH_REG;
+#endif
+
+                /* are we supposed to evaluate op2 first? */
+                if (tree->gtFlags & GTF_REVERSE_OPS)
+                {
+                    tmpMask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | op2ExcludeMask,
+                                                  rsvdRegs | op1->gtRsvdRegs);
+                    rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | tmpMask | op1ExcludeMask, RBM_LASTUSE);
+                }
+                else
+                {
+                    tmpMask = rpPredictTreeRegUse(op1, op1PredictReg, lockedRegs | op1ExcludeMask,
+                                                  rsvdRegs | op2->gtRsvdRegs);
+                    rpPredictTreeRegUse(op2, op2PredictReg, tmpMask | lockedRegs | op2ExcludeMask, RBM_LASTUSE);
+                }
+#ifdef _TARGET_ARM_
+                regMask = tmpMask;
+#endif
+                /* grab EAX, EDX for this tree node */
+                tree->gtUsedRegs = (regMaskSmall)trashedMask | op1->gtUsedRegs | op2->gtUsedRegs;
+
+                goto RETURN_CHECK;
+
+            case GT_LSH:
+            case GT_RSH:
+            case GT_RSZ:
+
+                if (predictReg <= PREDICT_REG)
+                    predictReg = PREDICT_SCRATCH_REG;
+
+#ifndef _TARGET_64BIT_
+                if (type == TYP_LONG)
+                {
+                    if (op2->IsCnsIntOrI())
+                    {
+                        regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
+                        // no register used by op2
+                        op2->gtUsedRegs  = 0;
+                        tree->gtUsedRegs = op1->gtUsedRegs;
+                    }
+                    else
+                    {
+                        // since RBM_LNGARG_0 and RBM_SHIFT_LNG are hardwired we can't have them in the locked registers
+                        tmpMask = lockedRegs;
+                        tmpMask &= ~RBM_LNGARG_0;
+                        tmpMask &= ~RBM_SHIFT_LNG;
+
+                        // op2 goes to RBM_SHIFT, op1 to the RBM_LNGARG_0 pair
+                        if (tree->gtFlags & GTF_REVERSE_OPS)
+                        {
+                            rpPredictTreeRegUse(op2, PREDICT_REG_SHIFT_LNG, tmpMask, RBM_NONE);
+                            tmpMask |= RBM_SHIFT_LNG;
+                            // Ensure that the RBM_SHIFT_LNG register interfere with op2's compCurLife
+                            // Fix 383843 X86/ARM ILGEN
+                            rpRecordRegIntf(RBM_SHIFT_LNG, compCurLife DEBUGARG("SHIFT_LNG arg setup"));
+                            rpPredictTreeRegUse(op1, PREDICT_PAIR_LNGARG_0, tmpMask, RBM_LASTUSE);
+                        }
+                        else
+                        {
+                            rpPredictTreeRegUse(op1, PREDICT_PAIR_LNGARG_0, tmpMask, RBM_NONE);
+                            tmpMask |= RBM_LNGARG_0;
+                            // Ensure that the RBM_LNGARG_0 registers interfere with op1's compCurLife
+                            // Fix 383839 ARM ILGEN
+                            rpRecordRegIntf(RBM_LNGARG_0, compCurLife DEBUGARG("LNGARG_0 arg setup"));
+                            rpPredictTreeRegUse(op2, PREDICT_REG_SHIFT_LNG, tmpMask, RBM_LASTUSE);
+                        }
+                        regMask = RBM_LNGRET; // function return registers
+                        op1->gtUsedRegs |= RBM_LNGARG_0;
+                        op2->gtUsedRegs |= RBM_SHIFT_LNG;
+
+                        tree->gtUsedRegs = op1->gtUsedRegs | op2->gtUsedRegs;
+
+                        // We are using a helper function to do shift:
+                        //
+                        tree->gtUsedRegs |= RBM_CALLEE_TRASH;
+                    }
+                }
+                else
+#endif // _TARGET_64BIT_
+                {
+#ifdef _TARGET_XARCH_
+                    if (!op2->IsCnsIntOrI())
+                        predictReg = PREDICT_NOT_REG_ECX;
+#endif
+
+                HANDLE_SHIFT_COUNT:
+                    // Note that this code is also used by assigning shift operators (i.e. GT_ASG_LSH)
+
+                    regMaskTP tmpRsvdRegs;
+
+                    if ((tree->gtFlags & GTF_REVERSE_OPS) == 0)
+                    {
+                        regMask     = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
+                        rsvdRegs    = RBM_LASTUSE;
+                        tmpRsvdRegs = RBM_NONE;
+                    }
+                    else
+                    {
+                        regMask = RBM_NONE;
+                        // Special case op1 of a constant
+                        if (op1->IsCnsIntOrI())
+                            tmpRsvdRegs = RBM_LASTUSE; // Allow a last use to occur in op2; See
+                                                       // System.Xml.Schema.BitSet:Get(int):bool
+                        else
+                            tmpRsvdRegs = op1->gtRsvdRegs;
+                    }
+
+                    op2Mask = RBM_NONE;
+                    if (!op2->IsCnsIntOrI())
+                    {
+                        if ((REG_SHIFT != REG_NA) && ((RBM_SHIFT & tmpRsvdRegs) == 0))
+                        {
+                            op2PredictReg = PREDICT_REG_SHIFT;
+                        }
+                        else
+                        {
+                            op2PredictReg = PREDICT_REG;
+                        }
+
+                        /* evaluate shift count into a register, likely the PREDICT_REG_SHIFT register */
+                        op2Mask = rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMask, tmpRsvdRegs);
+
+                        // If our target arch has a REG_SHIFT register then
+                        //     we set the PrefReg when we have a LclVar for op2
+                        //     we add an interference with REG_SHIFT for any other LclVars alive at op2
+                        if (REG_SHIFT != REG_NA)
+                        {
+                            VARSET_TP VARSET_INIT(this, liveSet, compCurLife);
+
+                            while (op2->gtOper == GT_COMMA)
+                            {
+                                op2 = op2->gtOp.gtOp2;
+                            }
+
+                            if (op2->gtOper == GT_LCL_VAR)
+                            {
+                                varDsc = lvaTable + op2->gtLclVarCommon.gtLclNum;
+                                varDsc->setPrefReg(REG_SHIFT, this);
+                                if (varDsc->lvTracked)
+                                {
+                                    VarSetOps::RemoveElemD(this, liveSet, varDsc->lvVarIndex);
+                                }
+                            }
+
+                            // Ensure that we have a register interference with the LclVar in tree's LiveSet,
+                            // excluding the LclVar that was used for the shift amount as it is read-only
+                            // and can be kept alive through the shift operation
+                            //
+                            rpRecordRegIntf(RBM_SHIFT, liveSet DEBUGARG("Variable Shift Register"));
+                            // In case op2Mask doesn't contain the required shift register,
+                            // we will or it in now.
+                            op2Mask |= RBM_SHIFT;
+                        }
+                    }
+
+                    if (tree->gtFlags & GTF_REVERSE_OPS)
+                    {
+                        assert(regMask == RBM_NONE);
+                        regMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs | op2Mask, rsvdRegs | RBM_LASTUSE);
+                    }
+
+#if CPU_HAS_BYTE_REGS
+                    if (varTypeIsByte(type))
+                    {
+                        // Fix 383789 X86 ILGEN
+                        // Fix 383813 X86 ILGEN
+                        // Fix 383828 X86 ILGEN
+                        if (op1->gtOper == GT_LCL_VAR)
+                        {
+                            varDsc = lvaTable + op1->gtLclVar.gtLclNum;
+                            if (varDsc->lvTracked)
+                            {
+                                VARSET_TP VARSET_INIT_NOCOPY(op1VarBit,
+                                                             VarSetOps::MakeSingleton(this, varDsc->lvVarIndex));
+
+                                // Ensure that we don't assign a Non-Byteable register for op1's LCL_VAR
+                                rpRecordRegIntf(RBM_NON_BYTE_REGS, op1VarBit DEBUGARG("Non Byte Register"));
+                            }
+                        }
+                        if ((regMask & RBM_BYTE_REGS) == 0)
+                        {
+                            // We need to grab a byte-able register, (i.e. EAX, EDX, ECX, EBX)
+                            // and we can't select one that is already reserved (i.e. lockedRegs or regMask)
+                            //
+                            regMask |=
+                                rpPredictRegPick(type, PREDICT_SCRATCH_REG, (lockedRegs | regMask | RBM_NON_BYTE_REGS));
+                        }
+                    }
+#endif
+                    tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask);
+                }
+
+                goto RETURN_CHECK;
+
+            case GT_COMMA:
+                if (tree->gtFlags & GTF_REVERSE_OPS)
+                {
+                    if (predictReg == PREDICT_NONE)
+                    {
+                        predictReg = PREDICT_REG;
+                    }
+                    else if (rpHasVarIndexForPredict(predictReg))
+                    {
+                        /* Don't propagate the use of tgt reg use in a GT_COMMA */
+                        predictReg = PREDICT_SCRATCH_REG;
+                    }
+
+                    regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
+                    rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs | regMask, RBM_LASTUSE);
+                }
+                else
+                {
+                    rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
+
+                    /* CodeGen will enregister the op2 side of a GT_COMMA */
+                    if (predictReg == PREDICT_NONE)
+                    {
+                        predictReg = PREDICT_REG;
+                    }
+                    else if (rpHasVarIndexForPredict(predictReg))
+                    {
+                        /* Don't propagate the use of tgt reg use in a GT_COMMA */
+                        predictReg = PREDICT_SCRATCH_REG;
+                    }
+
+                    regMask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
+                }
+                // tree should only accumulate the used registers from the op2 side of the GT_COMMA
+                //
+                tree->gtUsedRegs = op2->gtUsedRegs;
+                if ((op2->gtOper == GT_LCL_VAR) && (rsvdRegs != 0))
+                {
+                    LclVarDsc* op2VarDsc = lvaTable + op2->gtLclVarCommon.gtLclNum;
+
+                    if (op2VarDsc->lvTracked)
+                    {
+                        VARSET_TP VARSET_INIT_NOCOPY(op2VarBit, VarSetOps::MakeSingleton(this, op2VarDsc->lvVarIndex));
+                        rpRecordRegIntf(rsvdRegs, op2VarBit DEBUGARG("comma use"));
+                    }
+                }
+                goto RETURN_CHECK;
+
+            case GT_QMARK:
+            {
+                noway_assert(op1 != NULL && op2 != NULL);
+
+                /*
+                 *  If the gtUsedRegs conflicts with lockedRegs
+                 *  then we going to have to spill some registers
+                 *  into the non-trashed register set to keep it alive
+                 */
+                unsigned spillCnt;
+                spillCnt = 0;
+                regMaskTP spillRegs;
+                spillRegs = lockedRegs & tree->gtUsedRegs;
+
+                while (spillRegs)
+                {
+                    /* Find the next register that needs to be spilled */
+                    tmpMask = genFindLowestBit(spillRegs);
+
+#ifdef DEBUG
+                    if (verbose)
+                    {
+                        printf("Predict spill  of   %s before: ", getRegName(genRegNumFromMask(tmpMask)));
+                        gtDispTree(tree, 0, NULL, true);
+                    }
+#endif
+                    /* In Codegen it will typically introduce a spill temp here */
+                    /* rather than relocating the register to a non trashed reg */
+                    rpPredictSpillCnt++;
+                    spillCnt++;
+
+                    /* Remove it from the spillRegs and lockedRegs*/
+                    spillRegs &= ~tmpMask;
+                    lockedRegs &= ~tmpMask;
+                }
+                {
+                    VARSET_TP VARSET_INIT(this, startQmarkCondUseInPlaceVars, rpUseInPlace);
+
+                    /* Evaluate the <cond> subtree */
+                    rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
+                    VarSetOps::Assign(this, rpUseInPlace, startQmarkCondUseInPlaceVars);
+                    tree->gtUsedRegs = op1->gtUsedRegs;
+
+                    noway_assert(op2->gtOper == GT_COLON);
+                    if (rpHasVarIndexForPredict(predictReg) && ((op2->gtFlags & (GTF_ASG | GTF_CALL)) != 0))
+                    {
+                        // Don't try to target the register specified in predictReg when we have complex subtrees
+                        //
+                        predictReg = PREDICT_SCRATCH_REG;
+                    }
+                    GenTreePtr elseTree = op2->AsColon()->ElseNode();
+                    GenTreePtr thenTree = op2->AsColon()->ThenNode();
+
+                    noway_assert(thenTree != NULL && elseTree != NULL);
+
+                    // Update compCurLife to only those vars live on the <then> subtree
+
+                    VarSetOps::Assign(this, compCurLife, tree->gtQmark.gtThenLiveSet);
+
+                    if (type == TYP_VOID)
+                    {
+                        /* Evaluate the <then> subtree */
+                        rpPredictTreeRegUse(thenTree, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
+                        regMask    = RBM_NONE;
+                        predictReg = PREDICT_NONE;
+                    }
+                    else
+                    {
+                        // A mask to use to force the predictor to choose low registers (to reduce code size)
+                        regMaskTP avoidRegs = RBM_NONE;
+#ifdef _TARGET_ARM_
+                        avoidRegs = (RBM_R12 | RBM_LR);
+#endif
+                        if (predictReg <= PREDICT_REG)
+                            predictReg = PREDICT_SCRATCH_REG;
+
+                        /* Evaluate the <then> subtree */
+                        regMask =
+                            rpPredictTreeRegUse(thenTree, predictReg, lockedRegs, rsvdRegs | avoidRegs | RBM_LASTUSE);
+
+                        if (regMask)
+                        {
+                            rpPredictReg op1PredictReg = rpGetPredictForMask(regMask);
+                            if (op1PredictReg != PREDICT_NONE)
+                                predictReg = op1PredictReg;
+                        }
+                    }
+
+                    VarSetOps::Assign(this, rpUseInPlace, startQmarkCondUseInPlaceVars);
+
+                    /* Evaluate the <else> subtree */
+                    // First record the post-then liveness, and reset the current liveness to the else
+                    // branch liveness.
+                    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+                    VARSET_TP VARSET_INIT(this, postThenLive, compCurLife);
+#endif
+
+                    VarSetOps::Assign(this, compCurLife, tree->gtQmark.gtElseLiveSet);
+
+                    rpPredictTreeRegUse(elseTree, predictReg, lockedRegs, rsvdRegs | RBM_LASTUSE);
+                    tree->gtUsedRegs |= thenTree->gtUsedRegs | elseTree->gtUsedRegs;
+
+                    // The then and the else are "virtual basic blocks" that form a control-flow diamond.
+                    // They each have only one successor, which they share.  Their live-out sets must equal the
+                    // live-in set of this virtual successor block, and thus must be the same.  We can assert
+                    // that equality here.
+                    assert(VarSetOps::Equal(this, compCurLife, postThenLive));
+
+                    if (spillCnt > 0)
+                    {
+                        regMaskTP reloadMask = RBM_NONE;
+
+                        while (spillCnt)
+                        {
+                            regMaskTP reloadReg;
+
+                            /* Get an extra register to hold it */
+                            reloadReg = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | regMask | reloadMask);
+#ifdef DEBUG
+                            if (verbose)
+                            {
+                                printf("Predict reload into %s after : ", getRegName(genRegNumFromMask(reloadReg)));
+                                gtDispTree(tree, 0, NULL, true);
+                            }
+#endif
+                            reloadMask |= reloadReg;
+
+                            spillCnt--;
+                        }
+
+                        /* update the gtUsedRegs mask */
+                        tree->gtUsedRegs |= reloadMask;
+                    }
+                }
+
+                goto RETURN_CHECK;
+            }
+            case GT_RETURN:
+                tree->gtUsedRegs = RBM_NONE;
+                regMask          = RBM_NONE;
+
+                /* Is there a return value? */
+                if (op1 != NULL)
+                {
+#if FEATURE_FP_REGALLOC
+                    if (varTypeIsFloating(type))
+                    {
+                        predictReg = PREDICT_FLTRET;
+                        if (type == TYP_FLOAT)
+                            regMask = RBM_FLOATRET;
+                        else
+                            regMask = RBM_DOUBLERET;
+                    }
+                    else
+#endif
+                        if (isRegPairType(type))
+                    {
+                        predictReg = PREDICT_LNGRET;
+                        regMask    = RBM_LNGRET;
+                    }
+                    else
+                    {
+                        predictReg = PREDICT_INTRET;
+                        regMask    = RBM_INTRET;
+                    }
+                    if (info.compCallUnmanaged)
+                    {
+                        lockedRegs |= (RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME);
+                    }
+                    rpPredictTreeRegUse(op1, predictReg, lockedRegs, RBM_LASTUSE);
+                    tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
+                }
+
+#if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
+                // When on Arm under profiler, to emit Leave callback we would need RBM_PROFILER_RETURN_USED.
+                // We could optimize on registers based on int/long or no return value.  But to
+                // keep it simple we will mark entire RBM_PROFILER_RETURN_USED as used regs here.
+                if (compIsProfilerHookNeeded())
+                {
+                    tree->gtUsedRegs |= RBM_PROFILER_RET_USED;
+                }
+
+#endif
+                goto RETURN_CHECK;
+
+            case GT_RETFILT:
+                if (op1 != NULL)
+                {
+                    rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
+                    regMask          = genReturnRegForTree(tree);
+                    tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)regMask;
+                    goto RETURN_CHECK;
+                }
+                tree->gtUsedRegs = 0;
+                regMask          = 0;
+
+                goto RETURN_CHECK;
+
+            case GT_JTRUE:
+                /* This must be a test of a relational operator */
+
+                noway_assert(op1->OperIsCompare());
+
+                /* Only condition code set by this operation */
+
+                rpPredictTreeRegUse(op1, PREDICT_NONE, lockedRegs, RBM_NONE);
+
+                tree->gtUsedRegs = op1->gtUsedRegs;
+                regMask          = 0;
+
+                goto RETURN_CHECK;
+
+            case GT_SWITCH:
+                noway_assert(type <= TYP_INT);
+                noway_assert(compCurBB->bbJumpKind == BBJ_SWITCH);
+#ifdef _TARGET_ARM_
+                {
+                    regMask          = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_NONE);
+                    unsigned jumpCnt = compCurBB->bbJumpSwt->bbsCount;
+                    if (jumpCnt > 2)
+                    {
+                        // Table based switch requires an extra register for the table base
+                        regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
+                    }
+                    tree->gtUsedRegs = op1->gtUsedRegs | regMask;
+                }
+#else  // !_TARGET_ARM_
+                rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, RBM_NONE);
+                tree->gtUsedRegs = op1->gtUsedRegs;
+#endif // _TARGET_ARM_
+                regMask = 0;
+                goto RETURN_CHECK;
+
+            case GT_CKFINITE:
+                if (predictReg <= PREDICT_REG)
+                    predictReg = PREDICT_SCRATCH_REG;
+
+                rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs);
+                // Need a reg to load exponent into
+                regMask          = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs);
+                tree->gtUsedRegs = (regMaskSmall)regMask | op1->gtUsedRegs;
+                goto RETURN_CHECK;
+
+            case GT_LCLHEAP:
+                regMask = rpPredictTreeRegUse(op1, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs);
+                op2Mask = 0;
+
+#ifdef _TARGET_ARM_
+                if (info.compInitMem)
+                {
+                    // We zero out two registers in the ARM codegen path
+                    op2Mask |=
+                        rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | rsvdRegs | regMask | op2Mask);
+                }
+#endif
+
+                op1->gtUsedRegs |= (regMaskSmall)regMask;
+                tree->gtUsedRegs = op1->gtUsedRegs | (regMaskSmall)op2Mask;
+
+                // The result will be put in the reg we picked for the size
+                // regMask = <already set as we want it to be>
+
+                goto RETURN_CHECK;
+
+            case GT_OBJ:
+            {
+#ifdef _TARGET_ARM_
+                if (predictReg <= PREDICT_REG)
+                    predictReg = PREDICT_SCRATCH_REG;
+
+                regMaskTP avoidRegs = (RBM_R12 | RBM_LR); // A mask to use to force the predictor to choose low
+                                                          // registers (to reduce code size)
+                regMask = RBM_NONE;
+                tmpMask = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | avoidRegs);
+#endif
+
+                if (fgIsIndirOfAddrOfLocal(tree) != NULL)
+                {
+                    compUpdateLifeVar</*ForCodeGen*/ false>(tree);
+                }
+
+#ifdef _TARGET_ARM_
+                unsigned  objSize   = info.compCompHnd->getClassSize(tree->gtObj.gtClass);
+                regMaskTP preferReg = rpPredictRegMask(predictReg, TYP_I_IMPL);
+                // If it has one bit set, and that's an arg reg...
+                if (preferReg != RBM_NONE && genMaxOneBit(preferReg) && ((preferReg & RBM_ARG_REGS) != 0))
+                {
+                    // We are passing the 'obj' in the argument registers
+                    //
+                    regNumber rn = genRegNumFromMask(preferReg);
+
+                    //  Add the registers used to pass the 'obj' to regMask.
+                    for (unsigned i = 0; i < objSize / 4; i++)
+                    {
+                        if (rn == MAX_REG_ARG)
+                            break;
+                        // Otherwise...
+                        regMask |= genRegMask(rn);
+                        rn = genRegArgNext(rn);
+                    }
+                }
+                else
+                {
+                    // We are passing the 'obj' in the outgoing arg space
+                    // We will need one register to load into unless the 'obj' size is 4 or less.
+                    //
+                    if (objSize > 4)
+                    {
+                        regMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | tmpMask | avoidRegs);
+                    }
+                }
+                tree->gtUsedRegs = (regMaskSmall)(regMask | tmpMask);
+                goto RETURN_CHECK;
+#else  // !_TARGET_ARM
+                goto GENERIC_UNARY;
+#endif // _TARGET_ARM_
+            }
+
+            case GT_MKREFANY:
+            {
+#ifdef _TARGET_ARM_
+                regMaskTP preferReg = rpPredictRegMask(predictReg, TYP_I_IMPL);
+                regMask             = RBM_NONE;
+                if ((((preferReg - 1) & preferReg) == 0) && ((preferReg & RBM_ARG_REGS) != 0))
+                {
+                    // A MKREFANY takes up two registers.
+                    regNumber rn = genRegNumFromMask(preferReg);
+                    regMask      = RBM_NONE;
+                    if (rn < MAX_REG_ARG)
+                    {
+                        regMask |= genRegMask(rn);
+                        rn = genRegArgNext(rn);
+                        if (rn < MAX_REG_ARG)
+                            regMask |= genRegMask(rn);
+                    }
+                }
+                if (regMask != RBM_NONE)
+                {
+                    // Condensation of GENERIC_BINARY path.
+                    assert((tree->gtFlags & GTF_REVERSE_OPS) == 0);
+                    op2PredictReg        = PREDICT_REG;
+                    regMaskTP regMaskOp1 = rpPredictTreeRegUse(op1, predictReg, lockedRegs, rsvdRegs | op2->gtRsvdRegs);
+                    rpPredictTreeRegUse(op2, op2PredictReg, lockedRegs | regMaskOp1, RBM_LASTUSE);
+                    regMask |= op1->gtUsedRegs | op2->gtUsedRegs;
+                    tree->gtUsedRegs = (regMaskSmall)regMask;
+                    goto RETURN_CHECK;
+                }
+                tree->gtUsedRegs = op1->gtUsedRegs;
+#endif // _TARGET_ARM_
+                goto GENERIC_BINARY;
+            }
+
+            case GT_BOX:
+                goto GENERIC_UNARY;
+
+            case GT_LOCKADD:
+                goto GENERIC_BINARY;
+
+            case GT_XADD:
+            case GT_XCHG:
+                // Ensure we can write to op2.  op2 will hold the output.
+                if (predictReg < PREDICT_SCRATCH_REG)
+                    predictReg = PREDICT_SCRATCH_REG;
+
+                if (tree->gtFlags & GTF_REVERSE_OPS)
+                {
+                    op2Mask = rpPredictTreeRegUse(op2, predictReg, lockedRegs, rsvdRegs);
+                    regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs | op2Mask);
+                }
+                else
+                {
+                    regMask = rpPredictTreeRegUse(op1, PREDICT_REG, lockedRegs, rsvdRegs);
+                    op2Mask = rpPredictTreeRegUse(op2, PREDICT_SCRATCH_REG, lockedRegs, rsvdRegs | regMask);
+                }
+                tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask);
+                goto RETURN_CHECK;
+
+            case GT_ARR_LENGTH:
+                goto GENERIC_UNARY;
+
+            default:
+#ifdef DEBUG
+                gtDispTree(tree);
+#endif
+                noway_assert(!"unexpected simple operator in reg use prediction");
+                break;
+        }
+    }
+
+    /* See what kind of a special operator we have here */
+
+    switch (oper)
+    {
+        GenTreePtr      args;
+        GenTreeArgList* list;
+        regMaskTP       keepMask;
+        unsigned        regArgsNum;
+        int             regIndex;
+        regMaskTP       regArgMask;
+        regMaskTP       curArgMask;
+
+        case GT_CALL:
+
+        {
+
+            /* initialize so we can just or in various bits */
+            tree->gtUsedRegs = RBM_NONE;
+
+#if GTF_CALL_REG_SAVE
+            /*
+             *  Unless the GTF_CALL_REG_SAVE flag is set,
+             *  we can't preserve the RBM_CALLEE_TRASH registers.
+             *  (likewise we can't preserve the return registers)
+             *  So we remove them from the lockedRegs set and
+             *  record any of them in the keepMask
+             */
+
+            if (tree->gtFlags & GTF_CALL_REG_SAVE)
+            {
+                regMaskTP trashMask = genReturnRegForTree(tree);
+
+                keepMask = lockedRegs & trashMask;
+                lockedRegs &= ~trashMask;
+            }
+            else
+#endif
+            {
+                keepMask = lockedRegs & RBM_CALLEE_TRASH;
+                lockedRegs &= ~RBM_CALLEE_TRASH;
+            }
+
+            regArgsNum = 0;
+            regIndex   = 0;
+
+            /* Is there an object pointer? */
+            if (tree->gtCall.gtCallObjp)
+            {
+                /* Evaluate the instance pointer first */
+
+                args = tree->gtCall.gtCallObjp;
+
+                /* the objPtr always goes to an integer register (through temp or directly) */
+                noway_assert(regArgsNum == 0);
+                regArgsNum++;
+
+                /* Must be passed in a register */
+
+                noway_assert(args->gtFlags & GTF_LATE_ARG);
+
+                /* Must be either a deferred reg arg node or a GT_ASG node */
+
+                noway_assert(args->IsArgPlaceHolderNode() || args->IsNothingNode() || (args->gtOper == GT_ASG) ||
+                             args->OperIsCopyBlkOp() || (args->gtOper == GT_COMMA));
+
+                if (!args->IsArgPlaceHolderNode())
+                {
+                    rpPredictTreeRegUse(args, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
+                }
+            }
+            VARSET_TP VARSET_INIT_NOCOPY(startArgUseInPlaceVars, VarSetOps::UninitVal());
+            VarSetOps::Assign(this, startArgUseInPlaceVars, rpUseInPlace);
+
+            /* process argument list */
+            for (list = tree->gtCall.gtCallArgs; list; list = list->Rest())
+            {
+                args = list->Current();
+
+                if (args->gtFlags & GTF_LATE_ARG)
+                {
+                    /* Must be either a Placeholder/NOP node or a GT_ASG node */
+
+                    noway_assert(args->IsArgPlaceHolderNode() || args->IsNothingNode() || (args->gtOper == GT_ASG) ||
+                                 args->OperIsCopyBlkOp() || (args->gtOper == GT_COMMA));
+
+                    if (!args->IsArgPlaceHolderNode())
+                    {
+                        rpPredictTreeRegUse(args, PREDICT_NONE, lockedRegs, RBM_LASTUSE);
+                    }
+
+                    regArgsNum++;
+                }
+                else
+                {
+#ifdef FEATURE_FIXED_OUT_ARGS
+                    // We'll store this argument into the outgoing argument area
+                    // It needs to be in a register to be stored.
+                    //
+                    predictReg = PREDICT_REG;
+
+#else // !FEATURE_FIXED_OUT_ARGS
+                    // We'll generate a push for this argument
+                    //
+                    predictReg = PREDICT_NONE;
+                    if (varTypeIsSmall(args->TypeGet()))
+                    {
+                        /* We may need to sign or zero extend a small type using a register */
+                        predictReg = PREDICT_SCRATCH_REG;
+                    }
+#endif
+
+                    rpPredictTreeRegUse(args, predictReg, lockedRegs, RBM_LASTUSE);
+                }
+                VarSetOps::Assign(this, rpUseInPlace, startArgUseInPlaceVars);
+                tree->gtUsedRegs |= args->gtUsedRegs;
+            }
+
+            /* Is there a late argument list */
+
+            regIndex   = 0;
+            regArgMask = RBM_NONE; // Set of argument registers that have already been setup.
+            args       = NULL;
+
+            /* process the late argument list */
+            for (list = tree->gtCall.gtCallLateArgs; list; regIndex++)
+            {
+                // If the current argument being copied is a promoted struct local, set this pointer to its description.
+                LclVarDsc* promotedStructLocal = NULL;
+
+                curArgMask = RBM_NONE; // Set of argument registers that are going to be setup by this arg
+                tmpMask    = RBM_NONE; // Set of additional temp registers that are need only to setup the current arg
+
+                assert(list->IsList());
+
+                args = list->Current();
+                list = list->Rest();
+
+                assert(!args->IsArgPlaceHolderNode()); // No place holders nodes are in gtCallLateArgs;
+
+                fgArgTabEntryPtr curArgTabEntry = gtArgEntryByNode(tree, args);
+                assert(curArgTabEntry);
+
+                regNumber regNum = curArgTabEntry->regNum; // first register use to pass this argument
+                unsigned  numSlots =
+                    curArgTabEntry->numSlots; // number of outgoing arg stack slots used by this argument
+
+                rpPredictReg argPredictReg;
+                regMaskTP    avoidReg = RBM_NONE;
+
+                if (regNum != REG_STK)
+                {
+                    argPredictReg = rpGetPredictForReg(regNum);
+                    curArgMask |= genRegMask(regNum);
+                }
+                else
+                {
+                    assert(numSlots > 0);
+                    argPredictReg = PREDICT_NONE;
+#ifdef _TARGET_ARM_
+                    // Force the predictor to choose a low register when regNum is REG_STK to reduce code bloat
+                    avoidReg = (RBM_R12 | RBM_LR);
+#endif
+                }
+
+#ifdef _TARGET_ARM_
+                // For TYP_LONG or TYP_DOUBLE register arguments we need to add the second argument register
+                //
+                if ((regNum != REG_STK) && ((args->TypeGet() == TYP_LONG) || (args->TypeGet() == TYP_DOUBLE)))
+                {
+                    // 64-bit longs and doubles require 2 consecutive argument registers
+                    curArgMask |= genRegMask(REG_NEXT(regNum));
+                }
+                else if (args->TypeGet() == TYP_STRUCT)
+                {
+                    GenTreePtr argx       = args;
+                    GenTreePtr lclVarTree = NULL;
+
+                    /* The GT_OBJ may be be a child of a GT_COMMA */
+                    while (argx->gtOper == GT_COMMA)
+                    {
+                        argx = argx->gtOp.gtOp2;
+                    }
+                    unsigned originalSize = 0;
+
+                    if (argx->gtOper == GT_OBJ)
+                    {
+                        originalSize = info.compCompHnd->getClassSize(argx->gtObj.gtClass);
+
+                        // Is it the address of a promoted struct local?
+                        if (argx->gtObj.gtOp1->gtOper == GT_ADDR && argx->gtObj.gtOp1->gtOp.gtOp1->gtOper == GT_LCL_VAR)
+                        {
+                            lclVarTree        = argx->gtObj.gtOp1->gtOp.gtOp1;
+                            LclVarDsc* varDsc = &lvaTable[lclVarTree->gtLclVarCommon.gtLclNum];
+                            if (varDsc->lvPromoted)
+                                promotedStructLocal = varDsc;
+                        }
+                    }
+                    else if (argx->gtOper == GT_LCL_VAR)
+                    {
+                        varDsc       = lvaTable + argx->gtLclVarCommon.gtLclNum;
+                        originalSize = varDsc->lvSize();
+
+                        // Is it a promoted struct local?
+                        if (varDsc->lvPromoted)
+                            promotedStructLocal = varDsc;
+                    }
+                    else if (argx->gtOper == GT_MKREFANY)
+                    {
+                        originalSize = 2 * TARGET_POINTER_SIZE;
+                    }
+                    else
+                    {
+                        noway_assert(!"Can't predict unsupported TYP_STRUCT arg kind");
+                    }
+
+                    // We only pass arguments differently if it a struct local "independently" promoted, which
+                    // allows the field locals can be independently enregistered.
+                    if (promotedStructLocal != NULL)
+                    {
+                        if (lvaGetPromotionType(promotedStructLocal) != PROMOTION_TYPE_INDEPENDENT)
+                            promotedStructLocal = NULL;
+                    }
+
+                    unsigned slots = ((unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE))) / REGSIZE_BYTES;
+
+                    // Are we passing a TYP_STRUCT in multiple integer registers?
+                    // if so set up curArgMask to reflect this
+                    // Also slots is updated to reflect the number of outgoing arg slots that we will write
+                    if (regNum != REG_STK)
+                    {
+                        regNumber regLast = (curArgTabEntry->isHfaRegArg) ? LAST_FP_ARGREG : REG_ARG_LAST;
+                        assert(genIsValidReg(regNum));
+                        regNumber nextReg = REG_NEXT(regNum);
+                        slots--;
+                        while (slots > 0 && nextReg <= regLast)
+                        {
+                            curArgMask |= genRegMask(nextReg);
+                            nextReg = REG_NEXT(nextReg);
+                            slots--;
+                        }
+                    }
+
+                    if ((promotedStructLocal != NULL) && (curArgMask != RBM_NONE))
+                    {
+                        // All or a portion of this struct will be placed in the argument registers indicated by
+                        // "curArgMask". We build in knowledge of the order in which the code is generated here, so
+                        // that the second arg to be evaluated interferes with the reg for the first, the third with
+                        // the regs for the first and second, etc. But since we always place the stack slots before
+                        // placing the register slots we do not add inteferences for any part of the struct that gets
+                        // passed on the stack.
+
+                        argPredictReg =
+                            PREDICT_NONE; // We will target the indivual fields into registers but not the whole struct
+                        regMaskTP prevArgMask = RBM_NONE;
+                        for (unsigned i = 0; i < promotedStructLocal->lvFieldCnt; i++)
+                        {
+                            LclVarDsc* fieldVarDsc = &lvaTable[promotedStructLocal->lvFieldLclStart + i];
+                            if (fieldVarDsc->lvTracked)
+                            {
+                                assert(lclVarTree != NULL);
+                                if (prevArgMask != RBM_NONE)
+                                {
+                                    rpRecordRegIntf(prevArgMask, VarSetOps::MakeSingleton(this, fieldVarDsc->lvVarIndex)
+                                                                     DEBUGARG("fieldVar/argReg"));
+                                }
+                            }
+                            // Now see many registers this uses up.
+                            unsigned firstRegOffset = fieldVarDsc->lvFldOffset / TARGET_POINTER_SIZE;
+                            unsigned nextAfterLastRegOffset =
+                                (fieldVarDsc->lvFldOffset + fieldVarDsc->lvExactSize + TARGET_POINTER_SIZE - 1) /
+                                TARGET_POINTER_SIZE;
+                            unsigned nextAfterLastArgRegOffset =
+                                min(nextAfterLastRegOffset,
+                                    genIsValidIntReg(regNum) ? REG_NEXT(REG_ARG_LAST) : REG_NEXT(LAST_FP_ARGREG));
+
+                            for (unsigned regOffset = firstRegOffset; regOffset < nextAfterLastArgRegOffset;
+                                 regOffset++)
+                            {
+                                prevArgMask |= genRegMask(regNumber(regNum + regOffset));
+                            }
+
+                            if (nextAfterLastRegOffset > nextAfterLastArgRegOffset)
+                            {
+                                break;
+                            }
+
+                            if ((fieldVarDsc->lvFldOffset % TARGET_POINTER_SIZE) == 0)
+                            {
+                                // Add the argument register used here as a preferred register for this fieldVarDsc
+                                //
+                                regNumber firstRegUsed = regNumber(regNum + firstRegOffset);
+                                fieldVarDsc->setPrefReg(firstRegUsed, this);
+                            }
+                        }
+                        compUpdateLifeVar</*ForCodeGen*/ false>(argx);
+                    }
+
+                    // If slots is greater than zero then part or all of this TYP_STRUCT
+                    // argument is passed in the outgoing argument area. (except HFA arg)
+                    //
+                    if ((slots > 0) && !curArgTabEntry->isHfaRegArg)
+                    {
+                        // We will need a register to address the TYP_STRUCT
+                        // Note that we can use an argument register in curArgMask as in
+                        // codegen we pass the stack portion of the argument before we
+                        // setup the register part.
+                        //
+
+                        // Force the predictor to choose a LOW_REG here to reduce code bloat
+                        avoidReg = (RBM_R12 | RBM_LR);
+
+                        assert(tmpMask == RBM_NONE);
+                        tmpMask = rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | avoidReg);
+
+                        // If slots > 1 then we will need a second register to perform the load/store into the outgoing
+                        // arg area
+                        if (slots > 1)
+                        {
+                            tmpMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG,
+                                                        lockedRegs | regArgMask | tmpMask | avoidReg);
+                        }
+                    }
+                } // (args->TypeGet() == TYP_STRUCT)
+#endif            // _TARGET_ARM_
+
+                // If we have a promotedStructLocal we don't need to call rpPredictTreeRegUse(args, ...
+                // as we have already calculated the correct tmpMask and curArgMask values and
+                // by calling rpPredictTreeRegUse we would just add unnecessary register inteferences.
+                //
+                if (promotedStructLocal == NULL)
+                {
+                    /* Target the appropriate argument register */
+                    tmpMask |= rpPredictTreeRegUse(args, argPredictReg, lockedRegs | regArgMask, RBM_LASTUSE);
+                }
+
+                // We mark OBJ(ADDR(LOCAL)) with GTF_VAR_DEATH since the local is required to live
+                // for the duration of the OBJ.
+                if (args->OperGet() == GT_OBJ && (args->gtFlags & GTF_VAR_DEATH))
+                {
+                    GenTreePtr lclVarTree = fgIsIndirOfAddrOfLocal(args);
+                    assert(lclVarTree != NULL); // Or else would not be marked with GTF_VAR_DEATH.
+                    compUpdateLifeVar</*ForCodeGen*/ false>(lclVarTree);
+                }
+
+                regArgMask |= curArgMask;
+                args->gtUsedRegs |= (tmpMask | regArgMask);
+                tree->gtUsedRegs |= args->gtUsedRegs;
+                tree->gtCall.gtCallLateArgs->gtUsedRegs |= args->gtUsedRegs;
+
+                if (args->gtUsedRegs != RBM_NONE)
+                {
+                    // Add register interference with the set of registers used or in use when we evaluated
+                    // the current arg, with whatever is alive after the current arg
+                    //
+                    rpRecordRegIntf(args->gtUsedRegs, compCurLife DEBUGARG("register arg setup"));
+                }
+                VarSetOps::Assign(this, rpUseInPlace, startArgUseInPlaceVars);
+            }
+            assert(list == NULL);
+
+            regMaskTP callAddrMask;
+            callAddrMask = RBM_NONE;
+#if CPU_LOAD_STORE_ARCH
+            predictReg = PREDICT_SCRATCH_REG;
+#else
+            predictReg       = PREDICT_NONE;
+#endif
+
+            switch (tree->gtFlags & GTF_CALL_VIRT_KIND_MASK)
+            {
+                case GTF_CALL_VIRT_STUB:
+
+                    // We only want to record an interference between the virtual stub
+                    // param reg and anything that's live AFTER the call, but we've not
+                    // yet processed the indirect target.  So add RBM_VIRTUAL_STUB_PARAM
+                    // to interferingRegs.
+                    interferingRegs |= RBM_VIRTUAL_STUB_PARAM;
+#ifdef DEBUG
+                    if (verbose)
+                        printf("Adding interference with Virtual Stub Param\n");
+#endif
+                    codeGen->regSet.rsSetRegsModified(RBM_VIRTUAL_STUB_PARAM);
+
+                    if (tree->gtCall.gtCallType == CT_INDIRECT)
+                    {
+                        predictReg = PREDICT_REG_VIRTUAL_STUB_PARAM;
+                    }
+                    break;
+
+                case GTF_CALL_VIRT_VTABLE:
+                    predictReg = PREDICT_SCRATCH_REG;
+                    break;
+
+                case GTF_CALL_NONVIRT:
+                    predictReg = PREDICT_SCRATCH_REG;
+                    break;
+            }
+
+            if (tree->gtCall.gtCallType == CT_INDIRECT)
+            {
+#if defined(_TARGET_ARM_) || defined(_TARGET_AMD64_)
+                if (tree->gtCall.gtCallCookie)
+                {
+                    codeGen->regSet.rsSetRegsModified(RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
+
+                    callAddrMask |= rpPredictTreeRegUse(tree->gtCall.gtCallCookie, PREDICT_REG_PINVOKE_COOKIE_PARAM,
+                                                        lockedRegs | regArgMask, RBM_LASTUSE);
+
+                    // Just in case we predict some other registers, force interference with our two special
+                    // parameters: PINVOKE_COOKIE_PARAM & PINVOKE_TARGET_PARAM
+                    callAddrMask |= (RBM_PINVOKE_COOKIE_PARAM | RBM_PINVOKE_TARGET_PARAM);
+
+                    predictReg = PREDICT_REG_PINVOKE_TARGET_PARAM;
+                }
+#endif
+                callAddrMask |=
+                    rpPredictTreeRegUse(tree->gtCall.gtCallAddr, predictReg, lockedRegs | regArgMask, RBM_LASTUSE);
+            }
+            else if (predictReg != PREDICT_NONE)
+            {
+                callAddrMask |= rpPredictRegPick(TYP_I_IMPL, predictReg, lockedRegs | regArgMask);
+            }
+
+            if (tree->gtFlags & GTF_CALL_UNMANAGED)
+            {
+                // Need a register for tcbReg
+                callAddrMask |=
+                    rpPredictRegPick(TYP_I_IMPL, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | callAddrMask);
+#if CPU_LOAD_STORE_ARCH
+                // Need an extra register for tmpReg
+                callAddrMask |=
+                    rpPredictRegPick(TYP_I_IMPL, PREDICT_SCRATCH_REG, lockedRegs | regArgMask | callAddrMask);
+#endif
+            }
+
+            tree->gtUsedRegs |= callAddrMask;
+
+            /* After the call restore the orginal value of lockedRegs */
+            lockedRegs |= keepMask;
+
+            /* set the return register */
+            regMask = genReturnRegForTree(tree);
+
+            if (regMask & rsvdRegs)
+            {
+                // We will need to relocate the return register value
+                regMaskTP intRegMask = (regMask & RBM_ALLINT);
+#if FEATURE_FP_REGALLOC
+                regMaskTP floatRegMask = (regMask & RBM_ALLFLOAT);
+#endif
+                regMask = RBM_NONE;
+
+                if (intRegMask)
+                {
+                    if (intRegMask == RBM_INTRET)
+                    {
+                        regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
+                    }
+                    else if (intRegMask == RBM_LNGRET)
+                    {
+                        regMask |= rpPredictRegPick(TYP_LONG, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
+                    }
+                    else
+                    {
+                        noway_assert(!"unexpected return regMask");
+                    }
+                }
+
+#if FEATURE_FP_REGALLOC
+                if (floatRegMask)
+                {
+                    if (floatRegMask == RBM_FLOATRET)
+                    {
+                        regMask |= rpPredictRegPick(TYP_FLOAT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
+                    }
+                    else if (floatRegMask == RBM_DOUBLERET)
+                    {
+                        regMask |= rpPredictRegPick(TYP_DOUBLE, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
+                    }
+                    else // HFA return case
+                    {
+                        for (unsigned f = 0; f < genCountBits(floatRegMask); f++)
+                        {
+                            regMask |= rpPredictRegPick(TYP_FLOAT, PREDICT_SCRATCH_REG, rsvdRegs | regMask);
+                        }
+                    }
+                }
+#endif
+            }
+
+            /* the return registers (if any) are killed */
+            tree->gtUsedRegs |= regMask;
+
+#if GTF_CALL_REG_SAVE
+            if (!(tree->gtFlags & GTF_CALL_REG_SAVE))
+#endif
+            {
+                /* the RBM_CALLEE_TRASH set are killed (i.e. EAX,ECX,EDX) */
+                tree->gtUsedRegs |= RBM_CALLEE_TRASH;
+            }
+        }
+
+#if defined(_TARGET_ARM_) && defined(PROFILING_SUPPORTED)
+            // Mark required registers for emitting tailcall profiler callback as used
+            if (compIsProfilerHookNeeded() && tree->gtCall.IsTailCall() && (tree->gtCall.gtCallType == CT_USER_FUNC))
+            {
+                tree->gtUsedRegs |= RBM_PROFILER_TAIL_USED;
+            }
+#endif
+            break;
+
+        case GT_ARR_ELEM:
+
+            // Figure out which registers can't be touched
+            unsigned dim;
+            for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+                rsvdRegs |= tree->gtArrElem.gtArrInds[dim]->gtRsvdRegs;
+
+            regMask = rpPredictTreeRegUse(tree->gtArrElem.gtArrObj, PREDICT_REG, lockedRegs, rsvdRegs);
+
+            regMaskTP dimsMask;
+            dimsMask = 0;
+
+#if CPU_LOAD_STORE_ARCH
+            // We need a register to load the bounds of the MD array
+            regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask);
+#endif
+
+            for (dim = 0; dim < tree->gtArrElem.gtArrRank; dim++)
+            {
+                /* We need scratch registers to compute index-lower_bound.
+                   Also, gtArrInds[0]'s register will be used as the second
+                   addressability register (besides gtArrObj's) */
+
+                regMaskTP dimMask = rpPredictTreeRegUse(tree->gtArrElem.gtArrInds[dim], PREDICT_SCRATCH_REG,
+                                                        lockedRegs | regMask | dimsMask, rsvdRegs);
+                if (dim == 0)
+                    regMask |= dimMask;
+
+                dimsMask |= dimMask;
+            }
+#ifdef _TARGET_XARCH_
+            // INS_imul doesnt have an immediate constant.
+            if (!jitIsScaleIndexMul(tree->gtArrElem.gtArrElemSize))
+                regMask |= rpPredictRegPick(TYP_INT, PREDICT_SCRATCH_REG, lockedRegs | regMask | dimsMask);
+#endif
+            tree->gtUsedRegs = (regMaskSmall)(regMask | dimsMask);
+            break;
+
+        case GT_CMPXCHG:
+        {
+#ifdef _TARGET_XARCH_
+            rsvdRegs |= RBM_EAX;
+#endif
+            if (tree->gtCmpXchg.gtOpLocation->OperGet() == GT_LCL_VAR)
+            {
+                regMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpLocation, PREDICT_REG, lockedRegs, rsvdRegs);
+            }
+            else
+            {
+                regMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpLocation, PREDICT_ADDR, lockedRegs, rsvdRegs);
+            }
+            op2Mask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpValue, PREDICT_REG, lockedRegs, rsvdRegs | regMask);
+
+#ifdef _TARGET_XARCH_
+            rsvdRegs &= ~RBM_EAX;
+            tmpMask = rpPredictTreeRegUse(tree->gtCmpXchg.gtOpComparand, PREDICT_REG_EAX, lockedRegs,
+                                          rsvdRegs | regMask | op2Mask);
+            tree->gtUsedRegs = (regMaskSmall)(RBM_EAX | regMask | op2Mask | tmpMask);
+            predictReg       = PREDICT_REG_EAX; // When this is done the result is always in EAX.
+#else
+            tmpMask          = 0;
+            tree->gtUsedRegs = (regMaskSmall)(regMask | op2Mask | tmpMask);
+#endif
+        }
+        break;
+
+        case GT_ARR_BOUNDS_CHECK:
+        {
+            regMaskTP opArrLenRsvd = rsvdRegs | tree->gtBoundsChk.gtIndex->gtRsvdRegs;
+            regMask = rpPredictTreeRegUse(tree->gtBoundsChk.gtArrLen, PREDICT_REG, lockedRegs, opArrLenRsvd);
+            rpPredictTreeRegUse(tree->gtBoundsChk.gtIndex, PREDICT_REG, lockedRegs | regMask, RBM_LASTUSE);
+
+            tree->gtUsedRegs =
+                (regMaskSmall)regMask | tree->gtBoundsChk.gtArrLen->gtUsedRegs | tree->gtBoundsChk.gtIndex->gtUsedRegs;
+        }
+        break;
+
+        default:
+            NO_WAY("unexpected special operator in reg use prediction");
+            break;
+    }
+
+RETURN_CHECK:
+
+#ifdef DEBUG
+    /* make sure we set them to something reasonable */
+    if (tree->gtUsedRegs & RBM_ILLEGAL)
+        noway_assert(!"used regs not set properly in reg use prediction");
+
+    if (regMask & RBM_ILLEGAL)
+        noway_assert(!"return value not set propery in reg use prediction");
+
+#endif
+
+    /*
+     *  If the gtUsedRegs conflicts with lockedRegs
+     *  then we going to have to spill some registers
+     *  into the non-trashed register set to keep it alive
+     */
+    regMaskTP spillMask;
+    spillMask = tree->gtUsedRegs & lockedRegs;
+
+    if (spillMask)
+    {
+        while (spillMask)
+        {
+            /* Find the next register that needs to be spilled */
+            tmpMask = genFindLowestBit(spillMask);
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("Predict spill  of   %s before: ", getRegName(genRegNumFromMask(tmpMask)));
+                gtDispTree(tree, 0, NULL, true);
+                if ((tmpMask & regMask) == 0)
+                {
+                    printf("Predict reload of   %s after : ", getRegName(genRegNumFromMask(tmpMask)));
+                    gtDispTree(tree, 0, NULL, true);
+                }
+            }
+#endif
+            /* In Codegen it will typically introduce a spill temp here */
+            /* rather than relocating the register to a non trashed reg */
+            rpPredictSpillCnt++;
+
+            /* Remove it from the spillMask */
+            spillMask &= ~tmpMask;
+        }
+    }
+
+    /*
+     *  If the return registers in regMask conflicts with the lockedRegs
+     *  then we allocate extra registers for the reload of the conflicting
+     *  registers.
+     *
+     *  Set spillMask to the set of locked registers that have to be reloaded here.
+     *  reloadMask is set to the extra registers that are used to reload
+     *  the spilled lockedRegs.
+     */
+
+    noway_assert(regMask != DUMMY_INIT(RBM_ILLEGAL));
+    spillMask = lockedRegs & regMask;
+
+    if (spillMask)
+    {
+        /* Remove the spillMask from regMask */
+        regMask &= ~spillMask;
+
+        regMaskTP reloadMask = RBM_NONE;
+        while (spillMask)
+        {
+            /* Get an extra register to hold it */
+            regMaskTP reloadReg = rpPredictRegPick(TYP_INT, PREDICT_REG, lockedRegs | regMask | reloadMask);
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("Predict reload into %s after : ", getRegName(genRegNumFromMask(reloadReg)));
+                gtDispTree(tree, 0, NULL, true);
+            }
+#endif
+            reloadMask |= reloadReg;
+
+            /* Remove it from the spillMask */
+            spillMask &= ~genFindLowestBit(spillMask);
+        }
+
+        /* Update regMask to use the reloadMask */
+        regMask |= reloadMask;
+
+        /* update the gtUsedRegs mask */
+        tree->gtUsedRegs |= (regMaskSmall)regMask;
+    }
+
+    regMaskTP regUse = tree->gtUsedRegs;
+    regUse |= interferingRegs;
+
+    if (!VarSetOps::IsEmpty(this, compCurLife))
+    {
+        // Add interference between the current set of live variables and
+        //  the set of temporary registers need to evaluate the sub tree
+        if (regUse)
+        {
+            rpRecordRegIntf(regUse, compCurLife DEBUGARG("tmp use"));
+        }
+    }
+
+    if (rpAsgVarNum != -1)
+    {
+        // Add interference between the registers used (if any)
+        // and the assignment target variable
+        if (regUse)
+        {
+            rpRecordRegIntf(regUse, VarSetOps::MakeSingleton(this, rpAsgVarNum) DEBUGARG("tgt var tmp use"));
+        }
+
+        // Add a variable interference from rpAsgVarNum (i.e. the enregistered left hand
+        // side of the assignment passed here using PREDICT_REG_VAR_Txx)
+        // to the set of currently live variables. This new interference will prevent us
+        // from using the register value used here for enregistering different live variable
+        //
+        if (!VarSetOps::IsEmpty(this, compCurLife))
+        {
+            rpRecordVarIntf(rpAsgVarNum, compCurLife DEBUGARG("asg tgt live conflict"));
+        }
+    }
+
+    /* Do we need to resore the oldLastUseVars value */
+    if (restoreLastUseVars)
+    {
+        /*  If we used a GT_ASG targeted register then we need to add
+         *  a variable interference between any new last use variables
+         *  and the GT_ASG targeted register
+         */
+        if (!VarSetOps::Equal(this, rpLastUseVars, oldLastUseVars) && rpAsgVarNum != -1)
+        {
+            rpRecordVarIntf(rpAsgVarNum, VarSetOps::Diff(this, rpLastUseVars, oldLastUseVars)
+                                             DEBUGARG("asgn tgt last use conflict"));
+        }
+        VarSetOps::Assign(this, rpLastUseVars, oldLastUseVars);
+    }
+
+    return regMask;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+#endif // LEGACY_BACKEND
+
+/****************************************************************************/
+/* Returns true when we must create an EBP frame
+   This is used to force most managed methods to have EBP based frames
+   which allows the ETW kernel stackwalker to walk the stacks of managed code
+   this allows the kernel to perform light weight profiling
+ */
+bool Compiler::rpMustCreateEBPFrame(INDEBUG(const char** wbReason))
+{
+    bool result = false;
+#ifdef DEBUG
+    const char* reason = nullptr;
+#endif
+
+#if ETW_EBP_FRAMED
+    if (!result && (opts.MinOpts() || opts.compDbgCode))
+    {
+        INDEBUG(reason = "Debug Code");
+        result = true;
+    }
+    if (!result && (info.compMethodInfo->ILCodeSize > DEFAULT_MAX_INLINE_SIZE))
+    {
+        INDEBUG(reason = "IL Code Size");
+        result = true;
+    }
+    if (!result && (fgBBcount > 3))
+    {
+        INDEBUG(reason = "BasicBlock Count");
+        result = true;
+    }
+    if (!result && fgHasLoops)
+    {
+        INDEBUG(reason = "Method has Loops");
+        result = true;
+    }
+    if (!result && (optCallCount >= 2))
+    {
+        INDEBUG(reason = "Call Count");
+        result = true;
+    }
+    if (!result && (optIndirectCallCount >= 1))
+    {
+        INDEBUG(reason = "Indirect Call");
+        result = true;
+    }
+#endif // ETW_EBP_FRAMED
+
+    // VM wants to identify the containing frame of an InlinedCallFrame always
+    // via the frame register never the stack register so we need a frame.
+    if (!result && (optNativeCallCount != 0))
+    {
+        INDEBUG(reason = "Uses PInvoke");
+        result = true;
+    }
+
+#ifdef _TARGET_ARM64_
+    // TODO-ARM64-NYI: This is temporary: force a frame pointer-based frame until genFnProlog can handle non-frame
+    // pointer frames.
+    if (!result)
+    {
+        INDEBUG(reason = "Temporary ARM64 force frame pointer");
+        result = true;
+    }
+#endif // _TARGET_ARM64_
+
+#ifdef DEBUG
+    if ((result == true) && (wbReason != nullptr))
+    {
+        *wbReason = reason;
+    }
+#endif
+
+    return result;
+}
+
+#ifdef LEGACY_BACKEND // We don't use any of the old register allocator functions when LSRA is used instead.
+
+/*****************************************************************************
+ *
+ *  Predict which variables will be assigned to registers
+ *  This is x86 specific and only predicts the integer registers and
+ *  must be conservative, any register that is predicted to be enregister
+ *  must end up being enregistered.
+ *
+ *  The rpPredictTreeRegUse takes advantage of the LCL_VARS that are
+ *  predicted to be enregistered to minimize calls to rpPredictRegPick.
+ *
+ */
+
+#ifdef _PREFAST_
+#pragma warning(push)
+#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
+#endif
+regMaskTP Compiler::rpPredictAssignRegVars(regMaskTP regAvail)
+{
+    unsigned regInx;
+
+    if (rpPasses <= rpPassesPessimize)
+    {
+        // Assume that we won't have to reverse EBP enregistration
+        rpReverseEBPenreg = false;
+
+        // Set the default rpFrameType based upon codeGen->isFramePointerRequired()
+        if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
+            rpFrameType = FT_EBP_FRAME;
+        else
+            rpFrameType = FT_ESP_FRAME;
+    }
+
+#if !ETW_EBP_FRAMED
+    // If we are using FPBASE as the frame register, we cannot also use it for
+    // a local var
+    if (rpFrameType == FT_EBP_FRAME)
+    {
+        regAvail &= ~RBM_FPBASE;
+    }
+#endif // !ETW_EBP_FRAMED
+
+    rpStkPredict        = 0;
+    rpPredictAssignMask = regAvail;
+
+    raSetupArgMasks(&codeGen->intRegState);
+#if !FEATURE_STACK_FP_X87
+    raSetupArgMasks(&codeGen->floatRegState);
+#endif
+
+    // If there is a secret stub param, it is also live in
+    if (info.compPublishStubParam)
+    {
+        codeGen->intRegState.rsCalleeRegArgMaskLiveIn |= RBM_SECRET_STUB_PARAM;
+    }
+
+    if (regAvail == RBM_NONE)
+    {
+        unsigned   lclNum;
+        LclVarDsc* varDsc;
+
+        for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+        {
+#if FEATURE_STACK_FP_X87
+            if (!varDsc->IsFloatRegType())
+#endif
+            {
+                varDsc->lvRegNum = REG_STK;
+                if (isRegPairType(varDsc->lvType))
+                    varDsc->lvOtherReg = REG_STK;
+            }
+        }
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nCompiler::rpPredictAssignRegVars pass #%d", rpPasses);
+        printf("\n        Available registers = ");
+        dspRegMask(regAvail);
+        printf("\n");
+    }
+#endif
+
+    if (regAvail == RBM_NONE)
+    {
+        return RBM_NONE;
+    }
+
+    /* We cannot change the lvVarIndexes at this point, so we  */
+    /* can only re-order the existing set of tracked variables */
+    /* Which will change the order in which we select the      */
+    /* locals for enregistering.                               */
+
+    assert(lvaTrackedFixed); // We should have already set this to prevent us from adding any new tracked variables.
+
+    // Should not be set unless optimizing
+    noway_assert((lvaSortAgain == false) || (opts.MinOpts() == false));
+
+    if (lvaSortAgain)
+        lvaSortOnly();
+
+#ifdef DEBUG
+    fgDebugCheckBBlist();
+#endif
+
+    /* Initialize the weighted count of variables that could have */
+    /* been enregistered but weren't */
+    unsigned refCntStk    = 0; // sum of     ref counts for all stack based variables
+    unsigned refCntEBP    = 0; // sum of     ref counts for EBP enregistered variables
+    unsigned refCntWtdEBP = 0; // sum of wtd ref counts for EBP enregistered variables
+#if DOUBLE_ALIGN
+    unsigned refCntStkParam;  // sum of     ref counts for all stack based parameters
+    unsigned refCntWtdStkDbl; // sum of wtd ref counts for stack based doubles
+
+#if FEATURE_STACK_FP_X87
+    refCntStkParam  = raCntStkParamDblStackFP;
+    refCntWtdStkDbl = raCntWtdStkDblStackFP;
+    refCntStk       = raCntStkStackFP;
+#else
+    refCntStkParam  = 0;
+    refCntWtdStkDbl = 0;
+    refCntStk       = 0;
+#endif // FEATURE_STACK_FP_X87
+
+#endif // DOUBLE_ALIGN
+
+    /* Set of registers used to enregister variables in the predition */
+    regMaskTP regUsed = RBM_NONE;
+
+    /*-------------------------------------------------------------------------
+     *
+     *  Predict/Assign the enregistered locals in ref-count order
+     *
+     */
+
+    VARSET_TP VARSET_INIT_NOCOPY(unprocessedVars, VarSetOps::MakeFull(this));
+
+    unsigned FPRegVarLiveInCnt;
+    FPRegVarLiveInCnt = 0; // How many enregistered doubles are live on entry to the method
+
+    LclVarDsc* varDsc;
+    for (unsigned sortNum = 0; sortNum < lvaCount; sortNum++)
+    {
+        bool notWorthy = false;
+
+        unsigned  varIndex;
+        bool      isDouble;
+        regMaskTP regAvailForType;
+        var_types regType;
+        regMaskTP avoidReg;
+        unsigned  customVarOrderSize;
+        regNumber customVarOrder[MAX_VAR_ORDER_SIZE];
+        bool      firstHalf;
+        regNumber saveOtherReg;
+
+        varDsc = lvaRefSorted[sortNum];
+
+#if FEATURE_STACK_FP_X87
+        if (varTypeIsFloating(varDsc->TypeGet()))
+        {
+#ifdef DEBUG
+            if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+            {
+                // Field local of a PROMOTION_TYPE_DEPENDENT struct should not
+                // be en-registered.
+                noway_assert(!varDsc->lvRegister);
+            }
+#endif
+            continue;
+        }
+#endif
+
+        /* Check the set of invariant things that would prevent enregistration */
+
+        /* Ignore the variable if it's not tracked */
+        if (!varDsc->lvTracked)
+            goto CANT_REG;
+
+        /* Get hold of the index and the interference mask for the variable */
+        varIndex = varDsc->lvVarIndex;
+
+        // Remove 'varIndex' from unprocessedVars
+        VarSetOps::RemoveElemD(this, unprocessedVars, varIndex);
+
+        // Skip the variable if it's marked as DoNotEnregister.
+
+        if (varDsc->lvDoNotEnregister)
+            goto CANT_REG;
+
+        /* TODO: For now if we have JMP all register args go to stack
+         * TODO: Later consider extending the life of the argument or make a copy of it */
+
+        if (compJmpOpUsed && varDsc->lvIsRegArg)
+            goto CANT_REG;
+
+        /* Skip the variable if the ref count is zero */
+
+        if (varDsc->lvRefCnt == 0)
+            goto CANT_REG;
+
+        /* Ignore field of PROMOTION_TYPE_DEPENDENT type of promoted struct */
+
+        if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+        {
+            goto CANT_REG;
+        }
+
+        /* Is the unweighted ref count too low to be interesting? */
+
+        if (!varDsc->lvIsStructField && // We do encourage enregistering field locals.
+            (varDsc->lvRefCnt <= 1))
+        {
+            /* Sometimes it's useful to enregister a variable with only one use */
+            /*   arguments referenced in loops are one example */
+
+            if (varDsc->lvIsParam && varDsc->lvRefCntWtd > BB_UNITY_WEIGHT)
+                goto OK_TO_ENREGISTER;
+
+            /* If the variable has a preferred register set it may be useful to put it there */
+            if (varDsc->lvPrefReg && varDsc->lvIsRegArg)
+                goto OK_TO_ENREGISTER;
+
+            /* Keep going; the table is sorted by "weighted" ref count */
+            goto CANT_REG;
+        }
+
+    OK_TO_ENREGISTER:
+
+        if (varTypeIsFloating(varDsc->TypeGet()))
+        {
+            regType         = varDsc->TypeGet();
+            regAvailForType = regAvail & RBM_ALLFLOAT;
+        }
+        else
+        {
+            regType         = TYP_INT;
+            regAvailForType = regAvail & RBM_ALLINT;
+        }
+
+#ifdef _TARGET_ARM_
+        isDouble = (varDsc->TypeGet() == TYP_DOUBLE);
+
+        if (isDouble)
+        {
+            regAvailForType &= RBM_DBL_REGS; // We must restrict the set to the double registers
+        }
+#endif
+
+        /* If we don't have any registers available then skip the enregistration attempt */
+        if (regAvailForType == RBM_NONE)
+            goto NO_REG;
+
+        // On the pessimize passes don't even try to enregister LONGS
+        if (isRegPairType(varDsc->lvType))
+        {
+            if (rpPasses > rpPassesPessimize)
+                goto NO_REG;
+            else if (rpLostEnreg && (rpPasses == rpPassesPessimize))
+                goto NO_REG;
+        }
+
+        // Set of registers to avoid when performing register allocation
+        avoidReg = RBM_NONE;
+
+        if (!varDsc->lvIsRegArg)
+        {
+            /* For local variables,
+             *  avoid the incoming arguments,
+             *  but only if you conflict with them */
+
+            if (raAvoidArgRegMask != 0)
+            {
+                LclVarDsc* argDsc;
+                LclVarDsc* argsEnd = lvaTable + info.compArgsCount;
+
+                for (argDsc = lvaTable; argDsc < argsEnd; argDsc++)
+                {
+                    if (!argDsc->lvIsRegArg)
+                        continue;
+
+                    bool      isFloat  = argDsc->IsFloatRegType();
+                    regNumber inArgReg = argDsc->lvArgReg;
+                    regMaskTP inArgBit = genRegMask(inArgReg);
+
+                    // Is this inArgReg in the raAvoidArgRegMask set?
+
+                    if (!(raAvoidArgRegMask & inArgBit))
+                        continue;
+
+                    noway_assert(argDsc->lvIsParam);
+                    noway_assert(inArgBit & (isFloat ? RBM_FLTARG_REGS : RBM_ARG_REGS));
+
+                    unsigned locVarIndex = varDsc->lvVarIndex;
+                    unsigned argVarIndex = argDsc->lvVarIndex;
+
+                    /* Does this variable interfere with the arg variable ? */
+                    if (VarSetOps::IsMember(this, lvaVarIntf[locVarIndex], argVarIndex))
+                    {
+                        noway_assert(VarSetOps::IsMember(this, lvaVarIntf[argVarIndex], locVarIndex));
+                        /* Yes, so try to avoid the incoming arg reg */
+                        avoidReg |= inArgBit;
+                    }
+                    else
+                    {
+                        noway_assert(!VarSetOps::IsMember(this, lvaVarIntf[argVarIndex], locVarIndex));
+                    }
+                }
+            }
+        }
+
+        // Now we will try to predict which register the variable
+        // could  be enregistered in
+
+        customVarOrderSize = MAX_VAR_ORDER_SIZE;
+
+        raSetRegVarOrder(regType, customVarOrder, &customVarOrderSize, varDsc->lvPrefReg, avoidReg);
+
+        firstHalf    = false;
+        saveOtherReg = DUMMY_INIT(REG_NA);
+
+        for (regInx = 0; regInx < customVarOrderSize; regInx++)
+        {
+            regNumber regNum  = customVarOrder[regInx];
+            regMaskTP regBits = genRegMask(regNum);
+
+            /* Skip this register if it isn't available */
+            if ((regAvailForType & regBits) == 0)
+                continue;
+
+            /* Skip this register if it interferes with the variable */
+
+            if (VarSetOps::IsMember(this, raLclRegIntf[regNum], varIndex))
+                continue;
+
+            if (varTypeIsFloating(regType))
+            {
+#ifdef _TARGET_ARM_
+                if (isDouble)
+                {
+                    regNumber regNext = REG_NEXT(regNum);
+                    regBits |= genRegMask(regNext);
+
+                    /* Skip if regNext interferes with the variable */
+                    if (VarSetOps::IsMember(this, raLclRegIntf[regNext], varIndex))
+                        continue;
+                }
+#endif
+            }
+
+            bool firstUseOfReg     = ((regBits & (regUsed | codeGen->regSet.rsGetModifiedRegsMask())) == 0);
+            bool lessThanTwoRefWtd = (varDsc->lvRefCntWtd < (2 * BB_UNITY_WEIGHT));
+            bool calleeSavedReg    = ((regBits & RBM_CALLEE_SAVED) != 0);
+
+            /* Skip this register if the weighted ref count is less than two
+               and we are considering a unused callee saved register */
+
+            if (lessThanTwoRefWtd && // less than two references (weighted)
+                firstUseOfReg &&     // first use of this register
+                calleeSavedReg)      // callee saved register
+            {
+                unsigned int totalRefCntWtd = varDsc->lvRefCntWtd;
+
+                // psc is abbeviation for possibleSameColor
+                VARSET_TP VARSET_INIT_NOCOPY(pscVarSet, VarSetOps::Diff(this, unprocessedVars, lvaVarIntf[varIndex]));
+
+                VARSET_ITER_INIT(this, pscIndexIter, pscVarSet, pscIndex);
+                while (pscIndexIter.NextElem(this, &pscIndex))
+                {
+                    LclVarDsc* pscVar = lvaTable + lvaTrackedToVarNum[pscIndex];
+                    totalRefCntWtd += pscVar->lvRefCntWtd;
+                    if (totalRefCntWtd > (2 * BB_UNITY_WEIGHT))
+                        break;
+                }
+
+                if (totalRefCntWtd <= (2 * BB_UNITY_WEIGHT))
+                {
+                    notWorthy = true;
+                    continue; // not worth spilling a callee saved register
+                }
+                // otherwise we will spill this callee saved registers,
+                // because its uses when combined with the uses of
+                // other yet to be processed candidates exceed our threshold.
+                // totalRefCntWtd = totalRefCntWtd;
+            }
+
+            /* Looks good - mark the variable as living in the register */
+
+            if (isRegPairType(varDsc->lvType))
+            {
+                if (firstHalf == false)
+                {
+                    /* Enregister the first half of the long */
+                    varDsc->lvRegNum   = regNum;
+                    saveOtherReg       = varDsc->lvOtherReg;
+                    varDsc->lvOtherReg = REG_STK;
+                    firstHalf          = true;
+                }
+                else
+                {
+                    /* Ensure 'well-formed' register pairs */
+                    /* (those returned by gen[Pick|Grab]RegPair) */
+
+                    if (regNum < varDsc->lvRegNum)
+                    {
+                        varDsc->lvOtherReg = varDsc->lvRegNum;
+                        varDsc->lvRegNum   = regNum;
+                    }
+                    else
+                    {
+                        varDsc->lvOtherReg = regNum;
+                    }
+                    firstHalf = false;
+                }
+            }
+            else
+            {
+                varDsc->lvRegNum = regNum;
+#ifdef _TARGET_ARM_
+                if (isDouble)
+                {
+                    varDsc->lvOtherReg = REG_NEXT(regNum);
+                }
+#endif
+            }
+
+            if (regNum == REG_FPBASE)
+            {
+                refCntEBP += varDsc->lvRefCnt;
+                refCntWtdEBP += varDsc->lvRefCntWtd;
+#if DOUBLE_ALIGN
+                if (varDsc->lvIsParam)
+                {
+                    refCntStkParam += varDsc->lvRefCnt;
+                }
+#endif
+            }
+
+            /* Record this register in the regUsed set */
+            regUsed |= regBits;
+
+            /* The register is now ineligible for all interfering variables */
+
+            VarSetOps::UnionD(this, raLclRegIntf[regNum], lvaVarIntf[varIndex]);
+
+#ifdef _TARGET_ARM_
+            if (isDouble)
+            {
+                regNumber secondHalf = REG_NEXT(regNum);
+                VARSET_ITER_INIT(this, iter, lvaVarIntf[varIndex], intfIndex);
+                while (iter.NextElem(this, &intfIndex))
+                {
+                    VarSetOps::AddElemD(this, raLclRegIntf[secondHalf], intfIndex);
+                }
+            }
+#endif
+
+            /* If a register argument, remove its incoming register
+             * from the "avoid" list */
+
+            if (varDsc->lvIsRegArg)
+            {
+                raAvoidArgRegMask &= ~genRegMask(varDsc->lvArgReg);
+#ifdef _TARGET_ARM_
+                if (isDouble)
+                {
+                    raAvoidArgRegMask &= ~genRegMask(REG_NEXT(varDsc->lvArgReg));
+                }
+#endif
+            }
+
+            /* A variable of TYP_LONG can take two registers */
+            if (firstHalf)
+                continue;
+
+            // Since we have successfully enregistered this variable it is
+            // now time to move on and consider the next variable
+            goto ENREG_VAR;
+        }
+
+        if (firstHalf)
+        {
+            noway_assert(isRegPairType(varDsc->lvType));
+
+            /* This TYP_LONG is partially enregistered */
+
+            noway_assert(saveOtherReg != DUMMY_INIT(REG_NA));
+
+            if (varDsc->lvDependReg && (saveOtherReg != REG_STK))
+            {
+                rpLostEnreg = true;
+            }
+
+            raAddToStkPredict(varDsc->lvRefCntWtd);
+            goto ENREG_VAR;
+        }
+
+    NO_REG:;
+        if (varDsc->lvDependReg)
+        {
+            rpLostEnreg = true;
+        }
+
+        if (!notWorthy)
+        {
+            /* Weighted count of variables that could have been enregistered but weren't */
+            raAddToStkPredict(varDsc->lvRefCntWtd);
+
+            if (isRegPairType(varDsc->lvType) && (varDsc->lvOtherReg == REG_STK))
+                raAddToStkPredict(varDsc->lvRefCntWtd);
+        }
+
+    CANT_REG:;
+        varDsc->lvRegister = false;
+
+        varDsc->lvRegNum = REG_STK;
+        if (isRegPairType(varDsc->lvType))
+            varDsc->lvOtherReg = REG_STK;
+
+        /* unweighted count of variables that were not enregistered */
+
+        refCntStk += varDsc->lvRefCnt;
+
+#if DOUBLE_ALIGN
+        if (varDsc->lvIsParam)
+        {
+            refCntStkParam += varDsc->lvRefCnt;
+        }
+        else
+        {
+            /* Is it a stack based double? */
+            /* Note that double params are excluded since they can not be double aligned */
+            if (varDsc->lvType == TYP_DOUBLE)
+            {
+                refCntWtdStkDbl += varDsc->lvRefCntWtd;
+            }
+        }
+#endif
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("; ");
+            gtDispLclVar((unsigned)(varDsc - lvaTable));
+            if (varDsc->lvTracked)
+                printf("T%02u", varDsc->lvVarIndex);
+            else
+                printf("   ");
+            printf(" (refcnt=%2u,refwtd=%s) not enregistered", varDsc->lvRefCnt, refCntWtd2str(varDsc->lvRefCntWtd));
+            if (varDsc->lvDoNotEnregister)
+                printf(", do-not-enregister");
+            printf("\n");
+        }
+#endif
+        continue;
+
+    ENREG_VAR:;
+
+        varDsc->lvRegister = true;
+
+        // Record the fact that we enregistered a stack arg when tail call is used.
+        if (compJmpOpUsed && !varDsc->lvIsRegArg)
+        {
+            rpMaskPInvokeEpilogIntf |= genRegMask(varDsc->lvRegNum);
+            if (isRegPairType(varDsc->lvType))
+            {
+                rpMaskPInvokeEpilogIntf |= genRegMask(varDsc->lvOtherReg);
+            }
+        }
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("; ");
+            gtDispLclVar((unsigned)(varDsc - lvaTable));
+            printf("T%02u (refcnt=%2u,refwtd=%s) predicted to be assigned to ", varIndex, varDsc->lvRefCnt,
+                   refCntWtd2str(varDsc->lvRefCntWtd));
+            varDsc->PrintVarReg();
+#ifdef _TARGET_ARM_
+            if (isDouble)
+            {
+                printf(":%s", getRegName(varDsc->lvOtherReg));
+            }
+#endif
+            printf("\n");
+        }
+#endif
+    }
+
+#if ETW_EBP_FRAMED
+    noway_assert(refCntEBP == 0);
+#endif
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        if (refCntStk > 0)
+            printf("; refCntStk       = %u\n", refCntStk);
+        if (refCntEBP > 0)
+            printf("; refCntEBP       = %u\n", refCntEBP);
+        if (refCntWtdEBP > 0)
+            printf("; refCntWtdEBP    = %u\n", refCntWtdEBP);
+#if DOUBLE_ALIGN
+        if (refCntStkParam > 0)
+            printf("; refCntStkParam  = %u\n", refCntStkParam);
+        if (refCntWtdStkDbl > 0)
+            printf("; refCntWtdStkDbl = %u\n", refCntWtdStkDbl);
+#endif
+    }
+#endif
+
+    /* Determine how the EBP register should be used */
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if DOUBLE_ALIGN
+
+    if (!codeGen->isFramePointerRequired())
+    {
+        noway_assert(getCanDoubleAlign() < COUNT_DOUBLE_ALIGN);
+
+        /*
+            First let us decide if we should use EBP to create a
+            double-aligned frame, instead of enregistering variables
+        */
+
+        if (getCanDoubleAlign() == MUST_DOUBLE_ALIGN)
+        {
+            rpFrameType = FT_DOUBLE_ALIGN_FRAME;
+            goto REVERSE_EBP_ENREG;
+        }
+
+        if (getCanDoubleAlign() == CAN_DOUBLE_ALIGN && (refCntWtdStkDbl > 0))
+        {
+            /* OK, there may be some benefit to double-aligning the frame */
+            /* But let us compare the benefits vs. the costs of this      */
+
+            /*
+               One cost to consider is the benefit of smaller code
+               when using EBP as a frame pointer register
+
+               Each stack variable reference is an extra byte of code
+               if we use a double-aligned frame, parameters are
+               accessed via EBP for a double-aligned frame so they
+               don't use an extra byte of code.
+
+               We pay one byte of code for each refCntStk and we pay
+               one byte or more for each refCntEBP but we save one
+               byte for each refCntStkParam.
+
+               Our savings are the elimination of a possible misaligned
+               access and a possible DCU spilt when an access crossed
+               a cache-line boundry.
+
+               We use the loop weighted value of
+                  refCntWtdStkDbl * misaligned_weight (0, 4, 16)
+               to represent this savings.
+            */
+
+            // We also pay 7 extra bytes for the MOV EBP,ESP,
+            // LEA ESP,[EBP-0x10] and the AND ESP,-8 to double align ESP
+            const unsigned DBL_ALIGN_SETUP_SIZE = 7;
+
+            unsigned bytesUsed         = refCntStk + refCntEBP - refCntStkParam + DBL_ALIGN_SETUP_SIZE;
+            unsigned misaligned_weight = 4;
+
+            if (compCodeOpt() == SMALL_CODE)
+                misaligned_weight = 0;
+
+            if (compCodeOpt() == FAST_CODE)
+                misaligned_weight *= 4;
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("; Double alignment:\n");
+                printf("; Bytes that could be save by not using EBP frame: %i\n", bytesUsed);
+                printf("; Sum of weighted ref counts for EBP enregistered variables: %i\n", refCntWtdEBP);
+                printf("; Sum of weighted ref counts for weighted stack based doubles: %i\n", refCntWtdStkDbl);
+            }
+#endif
+
+            if (bytesUsed > ((refCntWtdStkDbl * misaligned_weight) / BB_UNITY_WEIGHT))
+            {
+                /* It's probably better to use EBP as a frame pointer */
+                CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+                if (verbose)
+                    printf("; Predicting not to double-align ESP to save %d bytes of code.\n", bytesUsed);
+#endif
+                goto NO_DOUBLE_ALIGN;
+            }
+
+            /*
+               Another cost to consider is the benefit of using EBP to enregister
+               one or more integer variables
+
+               We pay one extra memory reference for each refCntWtdEBP
+
+               Our savings are the elimination of a possible misaligned
+               access and a possible DCU spilt when an access crossed
+               a cache-line boundry.
+
+            */
+
+            // <BUGNUM>
+            // VSW 346717: On P4 2 Proc XEON's, SciMark.FFT degrades if SciMark.FFT.transform_internal is
+            // not double aligned.
+            // Here are the numbers that make this not double-aligned.
+            //     refCntWtdStkDbl = 0x164
+            //     refCntWtdEBP    = 0x1a4
+            // We think we do need to change the heuristic to be in favor of double-align.
+            // </BUGNUM>
+
+            if (refCntWtdEBP > refCntWtdStkDbl * 2)
+            {
+                /* It's probably better to use EBP to enregister integer variables */
+                CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+                if (verbose)
+                    printf("; Predicting not to double-align ESP to allow EBP to be used to enregister variables\n");
+#endif
+                goto NO_DOUBLE_ALIGN;
+            }
+
+#ifdef DEBUG
+            if (verbose)
+                printf("; Predicting to create a double-aligned frame\n");
+#endif
+            /*
+               OK we passed all of the benefit tests
+               so we'll predict a double aligned frame
+            */
+
+            rpFrameType = FT_DOUBLE_ALIGN_FRAME;
+            goto REVERSE_EBP_ENREG;
+        }
+    }
+
+NO_DOUBLE_ALIGN:
+#endif // DOUBLE_ALIGN
+
+    if (!codeGen->isFramePointerRequired() && !codeGen->isFrameRequired())
+    {
+#ifdef _TARGET_XARCH_
+// clang-format off
+        /*  If we are using EBP to enregister variables then
+            will we actually save bytes by setting up an EBP frame?
+
+            Each stack reference is an extra byte of code if we use
+            an ESP frame.
+
+            Here we measure the savings that we get by using EBP to
+            enregister variables vs. the cost in code size that we
+            pay when using an ESP based frame.
+
+            We pay one byte of code for each refCntStk
+            but we save one byte (or more) for each refCntEBP.
+
+            Our savings are the elimination of a stack memory read/write.
+            We use the loop weighted value of
+               refCntWtdEBP * mem_access_weight (0, 3, 6)
+            to represent this savings.
+         */
+
+        // We also pay 5 extra bytes for the MOV EBP,ESP and LEA ESP,[EBP-0x10]
+        // to set up an EBP frame in the prolog and epilog
+        #define EBP_FRAME_SETUP_SIZE  5
+        // clang-format on
+
+        if (refCntStk > (refCntEBP + EBP_FRAME_SETUP_SIZE))
+        {
+            unsigned bytesSaved        = refCntStk - (refCntEBP + EBP_FRAME_SETUP_SIZE);
+            unsigned mem_access_weight = 3;
+
+            if (compCodeOpt() == SMALL_CODE)
+                mem_access_weight = 0;
+            else if (compCodeOpt() == FAST_CODE)
+                mem_access_weight *= 2;
+
+            if (bytesSaved > ((refCntWtdEBP * mem_access_weight) / BB_UNITY_WEIGHT))
+            {
+                /* It's not be a good idea to use EBP in our predictions */
+                CLANG_FORMAT_COMMENT_ANCHOR;
+#ifdef DEBUG
+                if (verbose && (refCntEBP > 0))
+                    printf("; Predicting that it's not worth using EBP to enregister variables\n");
+#endif
+                rpFrameType = FT_EBP_FRAME;
+                goto REVERSE_EBP_ENREG;
+            }
+        }
+#endif // _TARGET_XARCH_
+
+        if ((rpFrameType == FT_NOT_SET) || (rpFrameType == FT_ESP_FRAME))
+        {
+#ifdef DEBUG
+            const char* reason;
+#endif
+            if (rpMustCreateEBPCalled == false)
+            {
+                rpMustCreateEBPCalled = true;
+                if (rpMustCreateEBPFrame(INDEBUG(&reason)))
+                {
+#ifdef DEBUG
+                    if (verbose)
+                        printf("; Decided to create an EBP based frame for ETW stackwalking (%s)\n", reason);
+#endif
+                    codeGen->setFrameRequired(true);
+
+                    rpFrameType = FT_EBP_FRAME;
+                    goto REVERSE_EBP_ENREG;
+                }
+            }
+        }
+    }
+
+    goto EXIT;
+
+REVERSE_EBP_ENREG:
+
+    noway_assert(rpFrameType != FT_ESP_FRAME);
+
+    rpReverseEBPenreg = true;
+
+#if !ETW_EBP_FRAMED
+    if (refCntEBP > 0)
+    {
+        noway_assert(regUsed & RBM_FPBASE);
+
+        regUsed &= ~RBM_FPBASE;
+
+        /* variables that were enregistered in EBP become stack based variables */
+        raAddToStkPredict(refCntWtdEBP);
+
+        unsigned lclNum;
+
+        /* We're going to have to undo some predicted enregistered variables */
+        for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+        {
+            /* Is this a register variable? */
+            if (varDsc->lvRegNum != REG_STK)
+            {
+                if (isRegPairType(varDsc->lvType))
+                {
+                    /* Only one can be EBP */
+                    if (varDsc->lvRegNum == REG_FPBASE || varDsc->lvOtherReg == REG_FPBASE)
+                    {
+                        if (varDsc->lvRegNum == REG_FPBASE)
+                            varDsc->lvRegNum = varDsc->lvOtherReg;
+
+                        varDsc->lvOtherReg = REG_STK;
+
+                        if (varDsc->lvRegNum == REG_STK)
+                            varDsc->lvRegister = false;
+
+                        if (varDsc->lvDependReg)
+                            rpLostEnreg = true;
+#ifdef DEBUG
+                        if (verbose)
+                            goto DUMP_MSG;
+#endif
+                    }
+                }
+                else
+                {
+                    if ((varDsc->lvRegNum == REG_FPBASE) && (!varDsc->IsFloatRegType()))
+                    {
+                        varDsc->lvRegNum = REG_STK;
+
+                        varDsc->lvRegister = false;
+
+                        if (varDsc->lvDependReg)
+                            rpLostEnreg = true;
+#ifdef DEBUG
+                        if (verbose)
+                        {
+                        DUMP_MSG:
+                            printf("; reversing enregisteration of V%02u,T%02u (refcnt=%2u,refwtd=%4u%s)\n", lclNum,
+                                   varDsc->lvVarIndex, varDsc->lvRefCnt, varDsc->lvRefCntWtd / 2,
+                                   (varDsc->lvRefCntWtd & 1) ? ".5" : "");
+                        }
+#endif
+                    }
+                }
+            }
+        }
+    }
+#endif // ETW_EBP_FRAMED
+
+EXIT:;
+
+    unsigned lclNum;
+    for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+    {
+        /* Clear the lvDependReg flag for next iteration of the predictor */
+        varDsc->lvDependReg = false;
+
+        // If we set rpLostEnreg and this is the first pessimize pass
+        // then reverse the enreg of all TYP_LONG
+        if (rpLostEnreg && isRegPairType(varDsc->lvType) && (rpPasses == rpPassesPessimize))
+        {
+            varDsc->lvRegNum   = REG_STK;
+            varDsc->lvOtherReg = REG_STK;
+        }
+    }
+
+#ifdef DEBUG
+    if (verbose && raNewBlocks)
+    {
+        printf("\nAdded FP register killing blocks:\n");
+        fgDispBasicBlocks();
+        printf("\n");
+    }
+#endif
+    noway_assert(rpFrameType != FT_NOT_SET);
+
+    /* return the set of registers used to enregister variables */
+    return regUsed;
+}
+#ifdef _PREFAST_
+#pragma warning(pop)
+#endif
+
+/*****************************************************************************
+ *
+ *  Predict register use for every tree in the function. Note that we do this
+ *  at different times (not to mention in a totally different way) for x86 vs
+ *  RISC targets.
+ */
+void Compiler::rpPredictRegUse()
+{
+#ifdef DEBUG
+    if (verbose)
+        raDumpVarIntf();
+#endif
+
+    // We might want to adjust the ref counts based on interference
+    raAdjustVarIntf();
+
+    regMaskTP allAcceptableRegs = RBM_ALLINT;
+
+#if FEATURE_FP_REGALLOC
+    allAcceptableRegs |= raConfigRestrictMaskFP();
+#endif
+
+    allAcceptableRegs &= ~codeGen->regSet.rsMaskResvd; // Remove any register reserved for special purposes
+
+    /* For debuggable code, genJumpToThrowHlpBlk() generates an inline call
+       to acdHelper(). This is done implicitly, without creating a GT_CALL
+       node. Hence, this interference is be handled implicitly by
+       restricting the registers used for enregistering variables */
+
+    if (opts.compDbgCode)
+    {
+        allAcceptableRegs &= RBM_CALLEE_SAVED;
+    }
+
+    /* Compute the initial regmask to use for the first pass */
+    regMaskTP regAvail = RBM_CALLEE_SAVED & allAcceptableRegs;
+    regMaskTP regUsed;
+
+#if CPU_USES_BLOCK_MOVE
+    /* If we might need to generate a rep mov instruction */
+    /* remove ESI and EDI */
+    if (compBlkOpUsed)
+        regAvail &= ~(RBM_ESI | RBM_EDI);
+#endif
+
+#ifdef _TARGET_X86_
+    /* If we using longs then we remove ESI to allow */
+    /* ESI:EBX to be saved accross a call */
+    if (compLongUsed)
+        regAvail &= ~(RBM_ESI);
+#endif
+
+#ifdef _TARGET_ARM_
+    // For the first register allocation pass we don't want to color using r4
+    // as we want to allow it to be used to color the internal temps instead
+    // when r0,r1,r2,r3 are all in use.
+    //
+    regAvail &= ~(RBM_R4);
+#endif
+
+#if ETW_EBP_FRAMED
+    // We never have EBP available when ETW_EBP_FRAME is defined
+    regAvail &= ~RBM_FPBASE;
+#else
+    /* If a frame pointer is required then we remove EBP */
+    if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
+        regAvail &= ~RBM_FPBASE;
+#endif
+
+#ifdef DEBUG
+    BOOL fJitNoRegLoc = JitConfig.JitNoRegLoc();
+    if (fJitNoRegLoc)
+        regAvail = RBM_NONE;
+#endif
+
+    if ((opts.compFlags & CLFLG_REGVAR) == 0)
+        regAvail = RBM_NONE;
+
+#if FEATURE_STACK_FP_X87
+    VarSetOps::AssignNoCopy(this, optAllNonFPvars, VarSetOps::MakeEmpty(this));
+    VarSetOps::AssignNoCopy(this, optAllFloatVars, VarSetOps::MakeEmpty(this));
+
+    // Calculate the set of all tracked FP/non-FP variables
+    //  into optAllFloatVars and optAllNonFPvars
+
+    unsigned   lclNum;
+    LclVarDsc* varDsc;
+
+    for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+    {
+        /* Ignore the variable if it's not tracked */
+
+        if (!varDsc->lvTracked)
+            continue;
+
+        /* Get hold of the index and the interference mask for the variable */
+
+        unsigned varNum = varDsc->lvVarIndex;
+
+        /* add to the set of all tracked FP/non-FP variables */
+
+        if (varDsc->IsFloatRegType())
+            VarSetOps::AddElemD(this, optAllFloatVars, varNum);
+        else
+            VarSetOps::AddElemD(this, optAllNonFPvars, varNum);
+    }
+#endif
+
+    for (unsigned i = 0; i < REG_COUNT; i++)
+    {
+        VarSetOps::AssignNoCopy(this, raLclRegIntf[i], VarSetOps::MakeEmpty(this));
+    }
+    for (unsigned i = 0; i < lvaTrackedCount; i++)
+    {
+        VarSetOps::AssignNoCopy(this, lvaVarPref[i], VarSetOps::MakeEmpty(this));
+    }
+
+    raNewBlocks          = false;
+    rpPredictAssignAgain = false;
+    rpPasses             = 0;
+
+    bool      mustPredict   = true;
+    unsigned  stmtNum       = 0;
+    unsigned  oldStkPredict = DUMMY_INIT(~0);
+    VARSET_TP oldLclRegIntf[REG_COUNT];
+
+    for (unsigned i = 0; i < REG_COUNT; i++)
+    {
+        VarSetOps::AssignNoCopy(this, oldLclRegIntf[i], VarSetOps::MakeEmpty(this));
+    }
+
+    while (true)
+    {
+        /* Assign registers to variables using the variable/register interference
+           graph (raLclRegIntf[]) calculated in the previous pass */
+        regUsed = rpPredictAssignRegVars(regAvail);
+
+        mustPredict |= rpLostEnreg;
+
+#ifdef _TARGET_ARM_
+
+        // See if we previously reserved REG_R10 and try to make it available if we have a small frame now
+        //
+        if ((rpPasses == 0) && (codeGen->regSet.rsMaskResvd & RBM_OPT_RSVD))
+        {
+            if (compRsvdRegCheck(REGALLOC_FRAME_LAYOUT))
+            {
+                // We must keep reserving R10 in this case
+                codeGen->regSet.rsMaskResvd |= RBM_OPT_RSVD;
+            }
+            else
+            {
+                // We can release our reservation on R10 and use it to color registers
+                //
+                codeGen->regSet.rsMaskResvd &= ~RBM_OPT_RSVD;
+                allAcceptableRegs |= RBM_OPT_RSVD;
+            }
+        }
+#endif
+
+        /* Is our new prediction good enough?? */
+        if (!mustPredict)
+        {
+            /* For small methods (less than 12 stmts), we add a    */
+            /*   extra pass if we are predicting the use of some   */
+            /*   of the caller saved registers.                    */
+            /* This fixes RAID perf bug 43440 VB Ackerman function */
+
+            if ((rpPasses == 1) && (stmtNum <= 12) && (regUsed & RBM_CALLEE_SAVED))
+            {
+                goto EXTRA_PASS;
+            }
+
+            /* If every variable was fully enregistered then we're done */
+            if (rpStkPredict == 0)
+                goto ALL_DONE;
+
+            // This was a successful prediction.  Record it, in case it turns out to be the best one.
+            rpRecordPrediction();
+
+            if (rpPasses > 1)
+            {
+                noway_assert(oldStkPredict != (unsigned)DUMMY_INIT(~0));
+
+                // Be careful about overflow
+                unsigned highStkPredict = (rpStkPredict * 2 < rpStkPredict) ? ULONG_MAX : rpStkPredict * 2;
+                if (oldStkPredict < highStkPredict)
+                    goto ALL_DONE;
+
+                if (rpStkPredict < rpPasses * 8)
+                    goto ALL_DONE;
+
+                if (rpPasses >= (rpPassesMax - 1))
+                    goto ALL_DONE;
+            }
+
+        EXTRA_PASS:
+            /* We will do another pass */;
+        }
+
+#ifdef DEBUG
+        if (JitConfig.JitAssertOnMaxRAPasses())
+        {
+            noway_assert(rpPasses < rpPassesMax &&
+                         "This may not a bug, but dev team should look and see what is happening");
+        }
+#endif
+
+        // The "64" here had been "VARSET_SZ".  It is unclear why this number is connected with
+        // the (max) size of a VARSET.  We've eliminated this constant, so I left this as a constant.  We hope
+        // that we're phasing out this code, anyway, and this leaves the behavior the way that it was.
+        if (rpPasses > (rpPassesMax - rpPassesPessimize) + 64)
+        {
+            NO_WAY("we seem to be stuck in an infinite loop. breaking out");
+        }
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            if (rpPasses > 0)
+            {
+                if (rpLostEnreg)
+                    printf("\n; Another pass due to rpLostEnreg");
+                if (rpAddedVarIntf)
+                    printf("\n; Another pass due to rpAddedVarIntf");
+                if ((rpPasses == 1) && rpPredictAssignAgain)
+                    printf("\n; Another pass due to rpPredictAssignAgain");
+            }
+            printf("\n; Register predicting pass# %d\n", rpPasses + 1);
+        }
+#endif
+
+        /*  Zero the variable/register interference graph */
+        for (unsigned i = 0; i < REG_COUNT; i++)
+        {
+            VarSetOps::ClearD(this, raLclRegIntf[i]);
+        }
+
+        // if there are PInvoke calls and compLvFrameListRoot is enregistered,
+        // it must not be in a register trashed by the callee
+        if (info.compCallUnmanaged != 0)
+        {
+            assert(!opts.ShouldUsePInvokeHelpers());
+            noway_assert(info.compLvFrameListRoot < lvaCount);
+
+            LclVarDsc* pinvokeVarDsc = &lvaTable[info.compLvFrameListRoot];
+
+            if (pinvokeVarDsc->lvTracked)
+            {
+                rpRecordRegIntf(RBM_CALLEE_TRASH, VarSetOps::MakeSingleton(this, pinvokeVarDsc->lvVarIndex)
+                                                      DEBUGARG("compLvFrameListRoot"));
+
+                // We would prefer to have this be enregister in the PINVOKE_TCB register
+                pinvokeVarDsc->addPrefReg(RBM_PINVOKE_TCB, this);
+            }
+
+            // If we're using a single return block, the p/invoke epilog code trashes ESI and EDI (in the
+            // worst case).  Make sure that the return value compiler temp that we create for the single
+            // return block knows about this interference.
+            if (genReturnLocal != BAD_VAR_NUM)
+            {
+                noway_assert(genReturnBB);
+                LclVarDsc* localTmp = &lvaTable[genReturnLocal];
+                if (localTmp->lvTracked)
+                {
+                    rpRecordRegIntf(RBM_PINVOKE_TCB | RBM_PINVOKE_FRAME,
+                                    VarSetOps::MakeSingleton(this, localTmp->lvVarIndex) DEBUGARG("genReturnLocal"));
+                }
+            }
+        }
+
+#ifdef _TARGET_ARM_
+        if (compFloatingPointUsed)
+        {
+            bool hasMustInitFloat = false;
+
+            // if we have any must-init floating point LclVars then we will add register interferences
+            // for the arguments with RBM_SCRATCH
+            // this is so that if we need to reset the initReg to REG_SCRATCH in Compiler::genFnProlog()
+            // we won't home the arguments into REG_SCRATCH
+
+            unsigned   lclNum;
+            LclVarDsc* varDsc;
+
+            for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+            {
+                if (varDsc->lvMustInit && varTypeIsFloating(varDsc->TypeGet()))
+                {
+                    hasMustInitFloat = true;
+                    break;
+                }
+            }
+
+            if (hasMustInitFloat)
+            {
+                for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+                {
+                    // If is an incoming argument, that is tracked and not floating-point
+                    if (varDsc->lvIsParam && varDsc->lvTracked && !varTypeIsFloating(varDsc->TypeGet()))
+                    {
+                        rpRecordRegIntf(RBM_SCRATCH, VarSetOps::MakeSingleton(this, varDsc->lvVarIndex)
+                                                         DEBUGARG("arg home with must-init fp"));
+                    }
+                }
+            }
+        }
+#endif
+
+        stmtNum        = 0;
+        rpAddedVarIntf = false;
+        rpLostEnreg    = false;
+
+        /* Walk the basic blocks and predict reg use for each tree */
+
+        for (BasicBlock* block = fgFirstBB; block != NULL; block = block->bbNext)
+        {
+            GenTreePtr stmt;
+            compCurBB       = block;
+            compCurLifeTree = NULL;
+            VarSetOps::Assign(this, compCurLife, block->bbLiveIn);
+
+            compCurBB = block;
+
+            for (stmt = block->FirstNonPhiDef(); stmt != NULL; stmt = stmt->gtNext)
+            {
+                noway_assert(stmt->gtOper == GT_STMT);
+
+                rpPredictSpillCnt = 0;
+                VarSetOps::AssignNoCopy(this, rpLastUseVars, VarSetOps::MakeEmpty(this));
+                VarSetOps::AssignNoCopy(this, rpUseInPlace, VarSetOps::MakeEmpty(this));
+
+                GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+                stmtNum++;
+#ifdef DEBUG
+                if (verbose && 1)
+                {
+                    printf("\nRegister predicting BB%02u, stmt %d\n", block->bbNum, stmtNum);
+                    gtDispTree(tree);
+                    printf("\n");
+                }
+#endif
+                rpPredictTreeRegUse(tree, PREDICT_NONE, RBM_NONE, RBM_NONE);
+
+                noway_assert(rpAsgVarNum == -1);
+
+                if (rpPredictSpillCnt > tmpIntSpillMax)
+                    tmpIntSpillMax = rpPredictSpillCnt;
+            }
+        }
+        rpPasses++;
+
+        /* Decide whether we need to set mustPredict */
+        mustPredict = false;
+
+        if (rpAddedVarIntf)
+        {
+            mustPredict = true;
+#ifdef DEBUG
+            if (verbose)
+                raDumpVarIntf();
+#endif
+        }
+
+        if (rpPasses == 1)
+        {
+            if ((opts.compFlags & CLFLG_REGVAR) == 0)
+                goto ALL_DONE;
+
+            if (rpPredictAssignAgain)
+                mustPredict = true;
+#ifdef DEBUG
+            if (fJitNoRegLoc)
+                goto ALL_DONE;
+#endif
+        }
+
+        /* Calculate the new value to use for regAvail */
+
+        regAvail = allAcceptableRegs;
+
+        /* If a frame pointer is required then we remove EBP */
+        if (codeGen->isFramePointerRequired() || codeGen->isFrameRequired())
+            regAvail &= ~RBM_FPBASE;
+
+#if ETW_EBP_FRAMED
+        // We never have EBP available when ETW_EBP_FRAME is defined
+        regAvail &= ~RBM_FPBASE;
+#endif
+
+        // If we have done n-passes then we must continue to pessimize the
+        // interference graph by or-ing the interferences from the previous pass
+
+        if (rpPasses > rpPassesPessimize)
+        {
+            for (unsigned regInx = 0; regInx < REG_COUNT; regInx++)
+                VarSetOps::UnionD(this, raLclRegIntf[regInx], oldLclRegIntf[regInx]);
+
+            /* If we reverse an EBP enregistration then keep it that way */
+            if (rpReverseEBPenreg)
+                regAvail &= ~RBM_FPBASE;
+        }
+
+#ifdef DEBUG
+        if (verbose)
+            raDumpRegIntf();
+#endif
+
+        /*  Save the old variable/register interference graph */
+        for (unsigned i = 0; i < REG_COUNT; i++)
+        {
+            VarSetOps::Assign(this, oldLclRegIntf[i], raLclRegIntf[i]);
+        }
+        oldStkPredict = rpStkPredict;
+    } // end of while (true)
+
+ALL_DONE:;
+
+    // If we recorded a better feasible allocation than we ended up with, go back to using it.
+    rpUseRecordedPredictionIfBetter();
+
+#if DOUBLE_ALIGN
+    codeGen->setDoubleAlign(false);
+#endif
+
+    switch (rpFrameType)
+    {
+        default:
+            noway_assert(!"rpFrameType not set correctly!");
+            break;
+        case FT_ESP_FRAME:
+            noway_assert(!codeGen->isFramePointerRequired());
+            noway_assert(!codeGen->isFrameRequired());
+            codeGen->setFramePointerUsed(false);
+            break;
+        case FT_EBP_FRAME:
+            noway_assert((regUsed & RBM_FPBASE) == 0);
+            codeGen->setFramePointerUsed(true);
+            break;
+#if DOUBLE_ALIGN
+        case FT_DOUBLE_ALIGN_FRAME:
+            noway_assert((regUsed & RBM_FPBASE) == 0);
+            noway_assert(!codeGen->isFramePointerRequired());
+            codeGen->setFramePointerUsed(false);
+            codeGen->setDoubleAlign(true);
+            break;
+#endif
+    }
+
+    /* Record the set of registers that we need */
+    codeGen->regSet.rsClearRegsModified();
+    if (regUsed != RBM_NONE)
+    {
+        codeGen->regSet.rsSetRegsModified(regUsed);
+    }
+
+    /* We need genFullPtrRegMap if :
+     * The method is fully interruptible, or
+     * We are generating an EBP-less frame (for stack-pointer deltas)
+     */
+
+    genFullPtrRegMap = (genInterruptible || !codeGen->isFramePointerUsed());
+
+    raMarkStkVars();
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("# rpPasses was %u for %s\n", rpPasses, info.compFullName);
+        printf("  rpStkPredict was %u\n", rpStkPredict);
+    }
+#endif
+    rpRegAllocDone = true;
+}
+
+#endif // LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ *  Mark all variables as to whether they live on the stack frame
+ *  (part or whole), and if so what the base is (FP or SP).
+ */
+
+void Compiler::raMarkStkVars()
+{
+    unsigned   lclNum;
+    LclVarDsc* varDsc;
+
+    for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+    {
+        // For RyuJIT, lvOnFrame is set by LSRA, except in the case of zero-ref, which is set below.
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef LEGACY_BACKEND
+        varDsc->lvOnFrame = false;
+#endif // LEGACY_BACKEND
+
+        if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+        {
+            noway_assert(!varDsc->lvRegister);
+            goto ON_STK;
+        }
+
+        /* Fully enregistered variables don't need any frame space */
+
+        if (varDsc->lvRegister)
+        {
+            if (!isRegPairType(varDsc->TypeGet()))
+            {
+                goto NOT_STK;
+            }
+
+            /* For "large" variables make sure both halves are enregistered */
+
+            if (varDsc->lvRegNum != REG_STK && varDsc->lvOtherReg != REG_STK)
+            {
+                goto NOT_STK;
+            }
+        }
+        /* Unused variables typically don't get any frame space */
+        else if (varDsc->lvRefCnt == 0)
+        {
+            bool needSlot = false;
+
+            bool stkFixedArgInVarArgs =
+                info.compIsVarArgs && varDsc->lvIsParam && !varDsc->lvIsRegArg && lclNum != lvaVarargsHandleArg;
+
+            // If its address has been exposed, ignore lvRefCnt. However, exclude
+            // fixed arguments in varargs method as lvOnFrame shouldn't be set
+            // for them as we don't want to explicitly report them to GC.
+
+            if (!stkFixedArgInVarArgs)
+            {
+                needSlot |= varDsc->lvAddrExposed;
+            }
+
+#if FEATURE_FIXED_OUT_ARGS
+
+            /* Is this the dummy variable representing GT_LCLBLK ? */
+            needSlot |= (lclNum == lvaOutgoingArgSpaceVar);
+
+#endif // FEATURE_FIXED_OUT_ARGS
+
+#ifdef DEBUGGING_SUPPORT
+
+#ifdef DEBUG
+            /* For debugging, note that we have to reserve space even for
+               unused variables if they are ever in scope. However, this is not
+               an issue as fgExtendDbgLifetimes() adds an initialization and
+               variables in scope will not have a zero ref-cnt.
+             */
+            if (opts.compDbgCode && !varDsc->lvIsParam && varDsc->lvTracked)
+            {
+                for (unsigned scopeNum = 0; scopeNum < info.compVarScopesCount; scopeNum++)
+                {
+                    noway_assert(info.compVarScopes[scopeNum].vsdVarNum != lclNum);
+                }
+            }
+#endif
+            /*
+              For Debug Code, we have to reserve space even if the variable is never
+              in scope. We will also need to initialize it if it is a GC var.
+              So we set lvMustInit and artifically bump up the ref-cnt.
+             */
+
+            if (opts.compDbgCode && !stkFixedArgInVarArgs && lclNum < info.compLocalsCount)
+            {
+                needSlot |= true;
+
+                if (lvaTypeIsGC(lclNum))
+                {
+                    varDsc->lvRefCnt = 1;
+                }
+
+                if (!varDsc->lvIsParam)
+                {
+                    varDsc->lvMustInit = true;
+                }
+            }
+#endif // DEBUGGING_SUPPORT
+
+#ifndef LEGACY_BACKEND
+            varDsc->lvOnFrame = needSlot;
+#endif // !LEGACY_BACKEND
+            if (!needSlot)
+            {
+                /* Clear the lvMustInit flag in case it is set */
+                varDsc->lvMustInit = false;
+
+                goto NOT_STK;
+            }
+        }
+
+#ifndef LEGACY_BACKEND
+        if (!varDsc->lvOnFrame)
+        {
+            goto NOT_STK;
+        }
+#endif // !LEGACY_BACKEND
+
+    ON_STK:
+        /* The variable (or part of it) lives on the stack frame */
+
+        noway_assert((varDsc->lvType != TYP_UNDEF) && (varDsc->lvType != TYP_VOID) && (varDsc->lvType != TYP_UNKNOWN));
+#if FEATURE_FIXED_OUT_ARGS
+        noway_assert((lclNum == lvaOutgoingArgSpaceVar) || lvaLclSize(lclNum) != 0);
+#else  // FEATURE_FIXED_OUT_ARGS
+        noway_assert(lvaLclSize(lclNum) != 0);
+#endif // FEATURE_FIXED_OUT_ARGS
+
+        varDsc->lvOnFrame = true; // Our prediction is that the final home for this local variable will be in the
+                                  // stack frame
+
+    NOT_STK:;
+        varDsc->lvFramePointerBased = codeGen->isFramePointerUsed();
+
+#if DOUBLE_ALIGN
+
+        if (codeGen->doDoubleAlign())
+        {
+            noway_assert(codeGen->isFramePointerUsed() == false);
+
+            /* All arguments are off of EBP with double-aligned frames */
+
+            if (varDsc->lvIsParam && !varDsc->lvIsRegArg)
+            {
+                varDsc->lvFramePointerBased = true;
+            }
+        }
+
+#endif
+
+        /* Some basic checks */
+
+        // It must be in a register, on frame, or have zero references.
+
+        noway_assert(varDsc->lvIsInReg() || varDsc->lvOnFrame || varDsc->lvRefCnt == 0);
+
+#ifndef LEGACY_BACKEND
+        // We can't have both lvRegister and lvOnFrame for RyuJIT
+        noway_assert(!varDsc->lvRegister || !varDsc->lvOnFrame);
+#else  // LEGACY_BACKEND
+
+        /* If both lvRegister and lvOnFrame are set, it must be partially enregistered */
+        noway_assert(!varDsc->lvRegister || !varDsc->lvOnFrame ||
+                     (varDsc->lvType == TYP_LONG && varDsc->lvOtherReg == REG_STK));
+#endif // LEGACY_BACKEND
+
+#ifdef DEBUG
+
+        // For varargs functions, there should be no direct references to
+        // parameter variables except for 'this' (because these were morphed
+        // in the importer) and the 'arglist' parameter (which is not a GC
+        // pointer). and the return buffer argument (if we are returning a
+        // struct).
+        // This is important because we don't want to try to report them
+        // to the GC, as the frame offsets in these local varables would
+        // not be correct.
+
+        if (varDsc->lvIsParam && raIsVarargsStackArg(lclNum))
+        {
+            if (!varDsc->lvPromoted && !varDsc->lvIsStructField)
+            {
+                noway_assert(varDsc->lvRefCnt == 0 && !varDsc->lvRegister && !varDsc->lvOnFrame);
+            }
+        }
+#endif
+    }
+}
+
+#ifdef LEGACY_BACKEND
+void Compiler::rpRecordPrediction()
+{
+    if (rpBestRecordedPrediction == NULL || rpStkPredict < rpBestRecordedStkPredict)
+    {
+        if (rpBestRecordedPrediction == NULL)
+        {
+            rpBestRecordedPrediction =
+                reinterpret_cast<VarRegPrediction*>(compGetMemArrayA(lvaCount, sizeof(VarRegPrediction)));
+        }
+        for (unsigned k = 0; k < lvaCount; k++)
+        {
+            rpBestRecordedPrediction[k].m_isEnregistered = lvaTable[k].lvRegister;
+            rpBestRecordedPrediction[k].m_regNum         = (regNumberSmall)lvaTable[k].GetRegNum();
+            rpBestRecordedPrediction[k].m_otherReg       = (regNumberSmall)lvaTable[k].GetOtherReg();
+        }
+        rpBestRecordedStkPredict = rpStkPredict;
+        JITDUMP("Recorded a feasible reg prediction with weighted stack use count %d.\n", rpBestRecordedStkPredict);
+    }
+}
+
+void Compiler::rpUseRecordedPredictionIfBetter()
+{
+    JITDUMP("rpStkPredict is %d; previous feasible reg prediction is %d.\n", rpStkPredict,
+            rpBestRecordedPrediction != NULL ? rpBestRecordedStkPredict : 0);
+    if (rpBestRecordedPrediction != NULL && rpStkPredict > rpBestRecordedStkPredict)
+    {
+        JITDUMP("Reverting to a previously-recorded feasible reg prediction with weighted stack use count %d.\n",
+                rpBestRecordedStkPredict);
+
+        for (unsigned k = 0; k < lvaCount; k++)
+        {
+            lvaTable[k].lvRegister = rpBestRecordedPrediction[k].m_isEnregistered;
+            lvaTable[k].SetRegNum(static_cast<regNumber>(rpBestRecordedPrediction[k].m_regNum));
+            lvaTable[k].SetOtherReg(static_cast<regNumber>(rpBestRecordedPrediction[k].m_otherReg));
+        }
+    }
+}
+#endif // LEGACY_BACKEND
diff --git a/src/jit/regalloc.h b/src/jit/regalloc.h
new file mode 100644
index 0000000000..7e2d7c7eb1
--- /dev/null
+++ b/src/jit/regalloc.h
@@ -0,0 +1,111 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef REGALLOC_H_
+#define REGALLOC_H_
+
+// Some things that are used by both LSRA and regpredict allocators.
+
+enum FrameType
+{
+    FT_NOT_SET,
+    FT_ESP_FRAME,
+    FT_EBP_FRAME,
+#if DOUBLE_ALIGN
+    FT_DOUBLE_ALIGN_FRAME,
+#endif
+};
+
+#ifdef LEGACY_BACKEND
+
+#include "varset.h"
+
+/*****************************************************************************/
+/*****************************************************************************/
+
+// This enumeration specifies register restrictions for the predictor
+enum rpPredictReg
+{
+    PREDICT_NONE,        // any subtree
+    PREDICT_ADDR,        // subtree is left side of an assignment
+    PREDICT_REG,         // subtree must be any register
+    PREDICT_SCRATCH_REG, // subtree must be any writable register
+
+#if defined(_TARGET_ARM_)
+    PREDICT_PAIR_R0R1, // subtree will write R0 and R1
+    PREDICT_PAIR_R2R3, // subtree will write R2 and R3
+
+#elif defined(_TARGET_AMD64_)
+
+    PREDICT_NOT_REG_EAX, // subtree must be any writable register, except EAX
+    PREDICT_NOT_REG_ECX, // subtree must be any writable register, except ECX
+
+#elif defined(_TARGET_X86_)
+
+    PREDICT_NOT_REG_EAX, // subtree must be any writable register, except EAX
+    PREDICT_NOT_REG_ECX, // subtree must be any writable register, except ECX
+
+    PREDICT_PAIR_EAXEDX, // subtree will write EAX and EDX
+    PREDICT_PAIR_ECXEBX, // subtree will write ECX and EBX
+
+#else
+#error "Unknown Target!"
+#endif // _TARGET_
+
+#define REGDEF(name, rnum, mask, sname) PREDICT_REG_##name,
+#include "register.h"
+
+    // The following are use whenever we have a ASG node into a LCL_VAR that
+    // we predict to be enregistered.  This flags indicates that we can expect
+    // to use the register that is being assigned into as the temporary to
+    // compute the right side of the ASGN node.
+
+    PREDICT_REG_VAR_T00, // write the register used by tracked varable 00
+    PREDICT_REG_VAR_MAX = PREDICT_REG_VAR_T00 + lclMAX_TRACKED - 1,
+
+    PREDICT_COUNT = PREDICT_REG_VAR_T00,
+
+#define REGDEF(name, rnum, mask, sname)
+#define REGALIAS(alias, realname) PREDICT_REG_##alias = PREDICT_REG_##realname,
+#include "register.h"
+
+#if defined(_TARGET_ARM_)
+
+    PREDICT_REG_FIRST = PREDICT_REG_R0,
+    PREDICT_INTRET    = PREDICT_REG_R0,
+    PREDICT_LNGRET    = PREDICT_PAIR_R0R1,
+    PREDICT_FLTRET    = PREDICT_REG_F0,
+
+#elif defined(_TARGET_AMD64_)
+
+    PREDICT_REG_FIRST = PREDICT_REG_RAX,
+    PREDICT_INTRET    = PREDICT_REG_EAX,
+    PREDICT_LNGRET    = PREDICT_REG_RAX,
+
+#elif defined(_TARGET_X86_)
+
+    PREDICT_REG_FIRST = PREDICT_REG_EAX,
+    PREDICT_INTRET    = PREDICT_REG_EAX,
+    PREDICT_LNGRET    = PREDICT_PAIR_EAXEDX,
+
+#else
+#error "Unknown _TARGET_"
+#endif // _TARGET_
+
+};
+#if DOUBLE_ALIGN
+enum CanDoubleAlign
+{
+    CANT_DOUBLE_ALIGN,
+    CAN_DOUBLE_ALIGN,
+    MUST_DOUBLE_ALIGN,
+    COUNT_DOUBLE_ALIGN,
+
+    DEFAULT_DOUBLE_ALIGN = CAN_DOUBLE_ALIGN
+};
+#endif
+
+#endif // LEGACY_BACKEND
+
+#endif // REGALLOC_H_
diff --git a/src/jit/register.h b/src/jit/register.h
new file mode 100644
index 0000000000..9e351037fd
--- /dev/null
+++ b/src/jit/register.h
@@ -0,0 +1,124 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// clang-format off
+
+/*****************************************************************************/
+/*****************************************************************************/
+#ifndef REGDEF
+#error  Must define REGDEF macro before including this file
+#endif
+#ifndef REGALIAS
+#define REGALIAS(alias, realname)
+#endif
+
+#if defined(_TARGET_XARCH_)
+
+#if defined(_TARGET_X86_)
+/*
+REGDEF(name, rnum,   mask, sname) */
+REGDEF(EAX,     0,   0x01, "eax"   )
+REGDEF(ECX,     1,   0x02, "ecx"   )
+REGDEF(EDX,     2,   0x04, "edx"   )
+REGDEF(EBX,     3,   0x08, "ebx"   )
+REGDEF(ESP,     4,   0x10, "esp"   )
+REGDEF(EBP,     5,   0x20, "ebp"   )
+REGDEF(ESI,     6,   0x40, "esi"   )
+REGDEF(EDI,     7,   0x80, "edi"   )
+REGALIAS(RAX, EAX)
+REGALIAS(RCX, ECX)
+REGALIAS(RDX, EDX)
+REGALIAS(RBX, EBX)
+REGALIAS(RSP, ESP)
+REGALIAS(RBP, EBP)
+REGALIAS(RSI, ESI)
+REGALIAS(RDI, EDI)
+
+#else // !defined(_TARGET_X86_)
+
+/*
+REGDEF(name, rnum,   mask, sname) */
+REGDEF(RAX,     0, 0x0001, "rax"   )
+REGDEF(RCX,     1, 0x0002, "rcx"   )
+REGDEF(RDX,     2, 0x0004, "rdx"   )
+REGDEF(RBX,     3, 0x0008, "rbx"   )
+REGDEF(RSP,     4, 0x0010, "rsp"   )
+REGDEF(RBP,     5, 0x0020, "rbp"   )
+REGDEF(RSI,     6, 0x0040, "rsi"   )
+REGDEF(RDI,     7, 0x0080, "rdi"   )
+REGDEF(R8,      8, 0x0100, "r8"    )
+REGDEF(R9,      9, 0x0200, "r9"    )
+REGDEF(R10,    10, 0x0400, "r10"   )
+REGDEF(R11,    11, 0x0800, "r11"   )
+REGDEF(R12,    12, 0x1000, "r12"   )
+REGDEF(R13,    13, 0x2000, "r13"   )
+REGDEF(R14,    14, 0x4000, "r14"   )
+REGDEF(R15,    15, 0x8000, "r15"   )
+
+REGALIAS(EAX, RAX)
+REGALIAS(ECX, RCX)
+REGALIAS(EDX, RDX)
+REGALIAS(EBX, RBX)
+REGALIAS(ESP, RSP)
+REGALIAS(EBP, RBP)
+REGALIAS(ESI, RSI)
+REGALIAS(EDI, RDI)
+
+#endif // !defined(_TARGET_X86_)
+
+#ifdef LEGACY_BACKEND
+
+REGDEF(STK,     8,   0x00, "STK"   )
+
+#else // !LEGACY_BACKEND
+
+#ifdef _TARGET_AMD64_
+#define XMMBASE 16
+#define XMMMASK(x) (__int64(1) << (x+XMMBASE))
+#else // !_TARGET_AMD64_
+#define XMMBASE 8
+#define XMMMASK(x) (__int32(1) << (x+XMMBASE))
+#endif // !_TARGET_AMD64_
+
+REGDEF(XMM0,    0+XMMBASE,  XMMMASK(0),   "mm0"  )
+REGDEF(XMM1,    1+XMMBASE,  XMMMASK(1),   "mm1"  )
+REGDEF(XMM2,    2+XMMBASE,  XMMMASK(2),   "mm2"  )
+REGDEF(XMM3,    3+XMMBASE,  XMMMASK(3),   "mm3"  )
+REGDEF(XMM4,    4+XMMBASE,  XMMMASK(4),   "mm4"  )
+REGDEF(XMM5,    5+XMMBASE,  XMMMASK(5),   "mm5"  )
+REGDEF(XMM6,    6+XMMBASE,  XMMMASK(6),   "mm6"  )
+REGDEF(XMM7,    7+XMMBASE,  XMMMASK(7),   "mm7"  )
+
+#ifdef _TARGET_X86_
+REGDEF(STK,     8+XMMBASE,  0x0000,       "STK"  )
+#else // !_TARGET_X86_
+REGDEF(XMM8,    8+XMMBASE,  XMMMASK(8),   "mm8"  )
+REGDEF(XMM9,    9+XMMBASE,  XMMMASK(9),   "mm9"  )
+REGDEF(XMM10,  10+XMMBASE,  XMMMASK(10),  "mm10" )
+REGDEF(XMM11,  11+XMMBASE,  XMMMASK(11),  "mm11" )
+REGDEF(XMM12,  12+XMMBASE,  XMMMASK(12),  "mm12" )
+REGDEF(XMM13,  13+XMMBASE,  XMMMASK(13),  "mm13" )
+REGDEF(XMM14,  14+XMMBASE,  XMMMASK(14),  "mm14" )
+REGDEF(XMM15,  15+XMMBASE,  XMMMASK(15),  "mm15" )
+REGDEF(STK,    16+XMMBASE,  0x0000,       "STK"  )
+#endif // !_TARGET_X86_
+
+#endif // !LEGACY_BACKEND
+
+#elif defined(_TARGET_ARM_)
+ #include "registerarm.h"
+
+#elif defined(_TARGET_ARM64_)
+ #include "registerarm64.h"
+
+#else
+  #error Unsupported or unset target architecture
+#endif // target type
+/*****************************************************************************/
+#undef  REGDEF
+#undef  REGALIAS
+#undef  XMMMASK
+/*****************************************************************************/
+
+// clang-format on
diff --git a/src/jit/register_arg_convention.cpp b/src/jit/register_arg_convention.cpp
new file mode 100644
index 0000000000..4678cdec41
--- /dev/null
+++ b/src/jit/register_arg_convention.cpp
@@ -0,0 +1,123 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "register_arg_convention.h"
+
+unsigned InitVarDscInfo::allocRegArg(var_types type, unsigned numRegs /* = 1 */)
+{
+    assert(numRegs > 0);
+
+    unsigned resultArgNum = regArgNum(type);
+    bool     isBackFilled = false;
+
+#ifdef _TARGET_ARM_
+    // Check for back-filling
+    if (varTypeIsFloating(type) &&          // We only back-fill the float registers
+        !anyFloatStackArgs &&               // Is it legal to back-fill? (We haven't put any FP args on the stack yet)
+        (numRegs == 1) &&                   // Is there a possibility we could back-fill?
+        (fltArgSkippedRegMask != RBM_NONE)) // Is there an available back-fill slot?
+    {
+        // We will never back-fill something greater than a single register
+        // (TYP_FLOAT, or TYP_STRUCT HFA with a single float). This is because
+        // we don't have any types that require > 2 register alignment, so we
+        // can't create a > 1 register alignment hole to back-fill.
+
+        // Back-fill the register
+        regMaskTP backFillBitMask = genFindLowestBit(fltArgSkippedRegMask);
+        fltArgSkippedRegMask &= ~backFillBitMask; // Remove the back-filled register(s) from the skipped mask
+        resultArgNum = genMapFloatRegNumToRegArgNum(genRegNumFromMask(backFillBitMask));
+        assert(resultArgNum < MAX_FLOAT_REG_ARG);
+        isBackFilled = true;
+    }
+#endif // _TARGET_ARM_
+
+    if (!isBackFilled)
+    {
+#if defined(_TARGET_AMD64_) && !defined(UNIX_AMD64_ABI)
+        // For System V the reg type counters should be independent.
+        nextReg(TYP_INT, numRegs);
+        nextReg(TYP_FLOAT, numRegs);
+#else
+        // We didn't back-fill a register (on ARM), so skip the number of registers that we allocated.
+        nextReg(type, numRegs);
+#endif
+    }
+
+    return resultArgNum;
+}
+
+bool InitVarDscInfo::enoughAvailRegs(var_types type, unsigned numRegs /* = 1 */)
+{
+    assert(numRegs > 0);
+
+    unsigned backFillCount = 0;
+
+#ifdef _TARGET_ARM_
+    // Check for back-filling
+    if (varTypeIsFloating(type) &&          // We only back-fill the float registers
+        !anyFloatStackArgs &&               // Is it legal to back-fill? (We haven't put any FP args on the stack yet)
+        (numRegs == 1) &&                   // Is there a possibility we could back-fill?
+        (fltArgSkippedRegMask != RBM_NONE)) // Is there an available back-fill slot?
+    {
+        backFillCount = 1;
+    }
+#endif // _TARGET_ARM_
+
+    return regArgNum(type) + numRegs - backFillCount <= maxRegArgNum(type);
+}
+
+unsigned InitVarDscInfo::alignReg(var_types type, unsigned requiredRegAlignment)
+{
+    NYI_ARM64("alignReg");
+
+    assert(requiredRegAlignment > 0);
+    if (requiredRegAlignment == 1)
+    {
+        return 0; // Everything is always "1" aligned
+    }
+
+    assert(requiredRegAlignment == 2); // we don't expect anything else right now
+
+    int alignMask = regArgNum(type) & (requiredRegAlignment - 1);
+    if (alignMask == 0)
+    {
+        return 0; // We're already aligned
+    }
+
+    unsigned cAlignSkipped = requiredRegAlignment - alignMask;
+    assert(cAlignSkipped == 1); // Alignment is currently only 1 or 2, so misalignment can only be 1.
+
+#ifdef _TARGET_ARM_
+    if (varTypeIsFloating(type))
+    {
+        fltArgSkippedRegMask |= genMapFloatRegArgNumToRegMask(floatRegArgNum);
+    }
+#endif // _TARGET_ARM_
+
+    assert(regArgNum(type) + cAlignSkipped <= maxRegArgNum(type)); // if equal, then we aligned the last slot, and the
+                                                                   // arg can't be enregistered
+    regArgNum(type) += cAlignSkipped;
+
+    return cAlignSkipped;
+}
+
+bool InitVarDscInfo::canEnreg(var_types type, unsigned numRegs /* = 1 */)
+{
+    if (!isRegParamType(type))
+    {
+        return false;
+    }
+
+    if (!enoughAvailRegs(type, numRegs))
+    {
+        return false;
+    }
+
+    return true;
+}
diff --git a/src/jit/register_arg_convention.h b/src/jit/register_arg_convention.h
new file mode 100644
index 0000000000..5073732a3e
--- /dev/null
+++ b/src/jit/register_arg_convention.h
@@ -0,0 +1,111 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef __register_arg_convention__
+#define __register_arg_convention__
+
+class LclVarDsc;
+
+struct InitVarDscInfo
+{
+    LclVarDsc* varDsc;
+    unsigned   varNum;
+
+    unsigned intRegArgNum;
+    unsigned floatRegArgNum;
+    unsigned maxIntRegArgNum;
+    unsigned maxFloatRegArgNum;
+
+    bool hasRetBufArg;
+
+#ifdef _TARGET_ARM_
+    // Support back-filling of FP parameters. This is similar to code in gtMorphArgs() that
+    // handles arguments.
+    regMaskTP fltArgSkippedRegMask;
+    bool      anyFloatStackArgs;
+#endif // _TARGET_ARM_
+
+public:
+    // set to initial values
+    void Init(LclVarDsc* lvaTable, bool _hasRetBufArg)
+    {
+        hasRetBufArg      = _hasRetBufArg;
+        varDsc            = &lvaTable[0]; // the first argument LclVar 0
+        varNum            = 0;            // the first argument varNum 0
+        intRegArgNum      = 0;
+        floatRegArgNum    = 0;
+        maxIntRegArgNum   = MAX_REG_ARG;
+        maxFloatRegArgNum = MAX_FLOAT_REG_ARG;
+
+#ifdef _TARGET_ARM_
+        fltArgSkippedRegMask = RBM_NONE;
+        anyFloatStackArgs    = false;
+#endif // _TARGET_ARM_
+    }
+
+    // return ref to current register arg for this type
+    unsigned& regArgNum(var_types type)
+    {
+        return varTypeIsFloating(type) ? floatRegArgNum : intRegArgNum;
+    }
+
+    // Allocate a set of contiguous argument registers. "type" is either an integer
+    // type, indicating to use the integer registers, or a floating-point type, indicating
+    // to use the floating-point registers. The actual type (TYP_FLOAT vs. TYP_DOUBLE) is
+    // ignored. "numRegs" is the number of registers to allocate. Thus, on ARM, to allocate
+    // a double-precision floating-point register, you need to pass numRegs=2. For an HFA,
+    // pass the number of slots/registers needed.
+    // This routine handles floating-point register back-filling on ARM.
+    // Returns the first argument register of the allocated set.
+    unsigned allocRegArg(var_types type, unsigned numRegs = 1);
+
+    // We are aligning the register to an ABI-required boundary, such as putting
+    // double-precision floats in even-numbered registers, by skipping one register.
+    // "requiredRegAlignment" is the amount to align to: 1 for no alignment (everything
+    // is 1-aligned), 2 for "double" alignment.
+    // Returns the number of registers skipped.
+    unsigned alignReg(var_types type, unsigned requiredRegAlignment);
+
+    // Return true if it is an enregisterable type and there is room.
+    // Note that for "type", we only care if it is float or not. In particular,
+    // "numRegs" must be "2" to allocate an ARM double-precision floating-point register.
+    bool canEnreg(var_types type, unsigned numRegs = 1);
+
+    // Set the fact that we have used up all remaining registers of 'type'
+    //
+    void setAllRegArgUsed(var_types type)
+    {
+        regArgNum(type) = maxRegArgNum(type);
+    }
+
+#ifdef _TARGET_ARM_
+
+    void setAnyFloatStackArgs()
+    {
+        anyFloatStackArgs = true;
+    }
+
+    bool existAnyFloatStackArgs()
+    {
+        return anyFloatStackArgs;
+    }
+
+#endif // _TARGET_ARM_
+
+private:
+    // return max register arg for this type
+    unsigned maxRegArgNum(var_types type)
+    {
+        return varTypeIsFloating(type) ? maxFloatRegArgNum : maxIntRegArgNum;
+    }
+
+    bool enoughAvailRegs(var_types type, unsigned numRegs = 1);
+
+    void nextReg(var_types type, unsigned numRegs = 1)
+    {
+        regArgNum(type) = min(regArgNum(type) + numRegs, maxRegArgNum(type));
+    }
+};
+
+#endif // __register_arg_convention__
diff --git a/src/jit/registerarm.h b/src/jit/registerarm.h
new file mode 100644
index 0000000000..38b82c26f2
--- /dev/null
+++ b/src/jit/registerarm.h
@@ -0,0 +1,86 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// clang-format off
+
+/*****************************************************************************/
+/*****************************************************************************/
+#ifndef REGDEF
+#error  Must define REGDEF macro before including this file
+#endif
+#ifndef REGALIAS
+#define REGALIAS(alias, realname)
+#endif
+
+/*
+REGDEF(name, rnum,   mask, sname) */
+REGDEF(R0,      0, 0x0001, "r0"   )
+REGDEF(R1,      1, 0x0002, "r1"   )
+REGDEF(R2,      2, 0x0004, "r2"   )
+REGDEF(R3,      3, 0x0008, "r3"   )
+REGDEF(R4,      4, 0x0010, "r4"   )
+REGDEF(R5,      5, 0x0020, "r5"   )
+REGDEF(R6,      6, 0x0040, "r6"   )
+REGDEF(R7,      7, 0x0080, "r7"   )
+REGDEF(R8,      8, 0x0100, "r8"   )
+REGDEF(R9,      9, 0x0200, "r9"   )
+REGDEF(R10,    10, 0x0400, "r10"  )
+REGDEF(R11,    11, 0x0800, "r11"  )
+REGDEF(R12,    12, 0x1000, "r12"  )
+REGDEF(SP,     13, 0x2000, "sp"   )
+REGDEF(LR,     14, 0x4000, "lr"   )
+REGDEF(PC,     15, 0x8000, "pc"   )
+
+#define FPBASE 16
+#define VFPMASK(x) (((__int64)1) << (x+FPBASE))
+
+REGDEF(F0,    0+FPBASE, VFPMASK(0),  "f0")
+REGDEF(F1,    1+FPBASE, VFPMASK(1),  "f1")
+REGDEF(F2,    2+FPBASE, VFPMASK(2),  "f2")
+REGDEF(F3,    3+FPBASE, VFPMASK(3),  "f3")
+REGDEF(F4,    4+FPBASE, VFPMASK(4),  "f4")
+REGDEF(F5,    5+FPBASE, VFPMASK(5),  "f5")
+REGDEF(F6,    6+FPBASE, VFPMASK(6),  "f6")
+REGDEF(F7,    7+FPBASE, VFPMASK(7),  "f7")
+REGDEF(F8,    8+FPBASE, VFPMASK(8),  "f8")
+REGDEF(F9,    9+FPBASE, VFPMASK(9),  "f9")
+REGDEF(F10,  10+FPBASE, VFPMASK(10), "f10")
+REGDEF(F11,  11+FPBASE, VFPMASK(11), "f11")
+REGDEF(F12,  12+FPBASE, VFPMASK(12), "f12")
+REGDEF(F13,  13+FPBASE, VFPMASK(13), "f13")
+REGDEF(F14,  14+FPBASE, VFPMASK(14), "f14")
+REGDEF(F15,  15+FPBASE, VFPMASK(15), "f15")
+REGDEF(F16,  16+FPBASE, VFPMASK(16), "f16")
+REGDEF(F17,  17+FPBASE, VFPMASK(17), "f17")
+REGDEF(F18,  18+FPBASE, VFPMASK(18), "f18")
+REGDEF(F19,  19+FPBASE, VFPMASK(19), "f19")
+REGDEF(F20,  20+FPBASE, VFPMASK(20), "f20")
+REGDEF(F21,  21+FPBASE, VFPMASK(21), "f21")
+REGDEF(F22,  22+FPBASE, VFPMASK(22), "f22")
+REGDEF(F23,  23+FPBASE, VFPMASK(23), "f23")
+REGDEF(F24,  24+FPBASE, VFPMASK(24), "f24")
+REGDEF(F25,  25+FPBASE, VFPMASK(25), "f25")
+REGDEF(F26,  26+FPBASE, VFPMASK(26), "f26")
+REGDEF(F27,  27+FPBASE, VFPMASK(27), "f27")
+REGDEF(F28,  28+FPBASE, VFPMASK(28), "f28")
+REGDEF(F29,  29+FPBASE, VFPMASK(29), "f29")
+REGDEF(F30,  30+FPBASE, VFPMASK(30), "f30")
+REGDEF(F31,  31+FPBASE, VFPMASK(31), "f31")
+
+
+// Allow us to call R11/FP, SP, LR and PC by their register number names 
+REGALIAS(FP,  R11)
+REGALIAS(R13, SP)
+REGALIAS(R14, LR)
+REGALIAS(R15, PC)
+
+// This must be last!
+REGDEF(STK,  32+FPBASE, 0x0000,      "STK")
+
+/*****************************************************************************/
+#undef  REGDEF
+#undef  REGALIAS
+/*****************************************************************************/
+
+// clang-format on
diff --git a/src/jit/registerarm64.h b/src/jit/registerarm64.h
new file mode 100644
index 0000000000..f53197259c
--- /dev/null
+++ b/src/jit/registerarm64.h
@@ -0,0 +1,114 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// clang-format off
+
+/*****************************************************************************/
+/*****************************************************************************/
+#ifndef REGDEF
+#error  Must define REGDEF macro before including this file
+#endif
+#ifndef REGALIAS
+#define REGALIAS(alias, realname)
+#endif
+
+#define RMASK(x) (1ULL << (x))
+
+/*
+REGDEF(name, rnum,       mask, xname, wname) */
+REGDEF(R0,      0,     0x0001, "x0" , "w0"   )
+REGDEF(R1,      1,     0x0002, "x1" , "w1"   )
+REGDEF(R2,      2,     0x0004, "x2" , "w2"   )
+REGDEF(R3,      3,     0x0008, "x3" , "w3"   )
+REGDEF(R4,      4,     0x0010, "x4" , "w4"   )
+REGDEF(R5,      5,     0x0020, "x5" , "w5"   )
+REGDEF(R6,      6,     0x0040, "x6" , "w6"   )
+REGDEF(R7,      7,     0x0080, "x7" , "w7"   )
+REGDEF(R8,      8,     0x0100, "x8" , "w8"   )
+REGDEF(R9,      9,     0x0200, "x9" , "w9"   )
+REGDEF(R10,    10,     0x0400, "x10", "w10"  )
+REGDEF(R11,    11,     0x0800, "x11", "w11"  )
+REGDEF(R12,    12,     0x1000, "x12", "w12"  )
+REGDEF(R13,    13,     0x2000, "x13", "w13"  )
+REGDEF(R14,    14,     0x4000, "x14", "w14"  )
+REGDEF(R15,    15,     0x8000, "x15", "w15"  )
+REGDEF(IP0,    16,    0x10000, "xip0","wip0" )
+REGDEF(IP1,    17,    0x20000, "xip1","wip1" )
+REGDEF(PR,     18,    0x40000, "xpr", "wpr"  )
+REGDEF(R19,    19,    0x80000, "x19", "w19"  )
+REGDEF(R20,    20,   0x100000, "x20", "w20"  )
+REGDEF(R21,    21,   0x200000, "x21", "w21"  )
+REGDEF(R22,    22,   0x400000, "x22", "w22"  )
+REGDEF(R23,    23,   0x800000, "x23", "w23"  )
+REGDEF(R24,    24,  0x1000000, "x24", "w24"  )
+REGDEF(R25,    25,  0x2000000, "x25", "w25"  )
+REGDEF(R26,    26,  0x4000000, "x26", "w26"  )
+REGDEF(R27,    27,  0x8000000, "x27", "w27"  )
+REGDEF(R28,    28, 0x10000000, "x28", "w28"  )
+REGDEF(FP,     29, 0x20000000, "fp" , "w29"  )
+REGDEF(LR,     30, 0x40000000, "lr" , "w30"  )
+REGDEF(ZR,     31, 0x80000000, "xzr", "wzr"  )
+
+// Allow us to call IP0,IP1,PR,FP,LR by their register number names 
+REGALIAS(R16, IP0)
+REGALIAS(R17, IP1)
+REGALIAS(R18, PR)
+REGALIAS(R29, FP)
+REGALIAS(R30, LR)
+
+#define VBASE 32
+#define VMASK(x) (1ULL << (VBASE+(x)))
+
+/*
+REGDEF(name,  rnum,       mask,  xname,  wname) */
+REGDEF(V0,    0+VBASE, VMASK(0),  "d0",  "s0")
+REGDEF(V1,    1+VBASE, VMASK(1),  "d1",  "s1")
+REGDEF(V2,    2+VBASE, VMASK(2),  "d2",  "s2")
+REGDEF(V3,    3+VBASE, VMASK(3),  "d3",  "s3")
+REGDEF(V4,    4+VBASE, VMASK(4),  "d4",  "s4")
+REGDEF(V5,    5+VBASE, VMASK(5),  "d5",  "s5")
+REGDEF(V6,    6+VBASE, VMASK(6),  "d6",  "s6")
+REGDEF(V7,    7+VBASE, VMASK(7),  "d7",  "s7")
+REGDEF(V8,    8+VBASE, VMASK(8),  "d8",  "s8")
+REGDEF(V9,    9+VBASE, VMASK(9),  "d9",  "s9")
+REGDEF(V10,  10+VBASE, VMASK(10), "d10", "s10")
+REGDEF(V11,  11+VBASE, VMASK(11), "d11", "s11")
+REGDEF(V12,  12+VBASE, VMASK(12), "d12", "s12")
+REGDEF(V13,  13+VBASE, VMASK(13), "d13", "s13")
+REGDEF(V14,  14+VBASE, VMASK(14), "d14", "s14")
+REGDEF(V15,  15+VBASE, VMASK(15), "d15", "s15")
+REGDEF(V16,  16+VBASE, VMASK(16), "d16", "s16")
+REGDEF(V17,  17+VBASE, VMASK(17), "d17", "s17")
+REGDEF(V18,  18+VBASE, VMASK(18), "d18", "s18")
+REGDEF(V19,  19+VBASE, VMASK(19), "d19", "s19")
+REGDEF(V20,  20+VBASE, VMASK(20), "d20", "s20")
+REGDEF(V21,  21+VBASE, VMASK(21), "d21", "s21")
+REGDEF(V22,  22+VBASE, VMASK(22), "d22", "s22")
+REGDEF(V23,  23+VBASE, VMASK(23), "d23", "s23")
+REGDEF(V24,  24+VBASE, VMASK(24), "d24", "s24")
+REGDEF(V25,  25+VBASE, VMASK(25), "d25", "s25")
+REGDEF(V26,  26+VBASE, VMASK(26), "d26", "s26")
+REGDEF(V27,  27+VBASE, VMASK(27), "d27", "s27")
+REGDEF(V28,  28+VBASE, VMASK(28), "d28", "s28")
+REGDEF(V29,  29+VBASE, VMASK(29), "d29", "s29")
+REGDEF(V30,  30+VBASE, VMASK(30), "d30", "s30")
+REGDEF(V31,  31+VBASE, VMASK(31), "d31", "s31")
+
+// The registers with values 64 (NBASE) and above are not real register numbers
+#define NBASE 64
+
+REGDEF(SP,    0+NBASE, 0x0000,    "sp",  "wsp?")
+// This must be last!
+REGDEF(STK,   1+NBASE, 0x0000,    "STK", "STK")
+
+/*****************************************************************************/
+#undef  RMASK
+#undef  VMASK
+#undef  VBASE
+#undef  NBASE
+#undef  REGDEF
+#undef  REGALIAS
+/*****************************************************************************/
+
+// clang-format on
diff --git a/src/jit/registerfp.cpp b/src/jit/registerfp.cpp
new file mode 100644
index 0000000000..997c223ed4
--- /dev/null
+++ b/src/jit/registerfp.cpp
@@ -0,0 +1,1522 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifdef LEGACY_BACKEND // This file is NOT used for the RyuJIT backend that uses the linear scan register allocator.
+
+#include "compiler.h"
+#include "emit.h"
+#include "codegen.h"
+
+#ifndef _TARGET_ARM_
+#error "Non-ARM target for registerfp.cpp"
+#endif // !_TARGET_ARM_
+
+// get the next argument register which is aligned to 'alignment' # of bytes
+regNumber alignFloatArgReg(regNumber argReg, int alignment)
+{
+    assert(isValidFloatArgReg(argReg));
+
+    int regsize_alignment = alignment /= REGSIZE_BYTES;
+    if (genMapFloatRegNumToRegArgNum(argReg) % regsize_alignment)
+        argReg = genRegArgNext(argReg);
+
+    // technically the above should be a 'while' so make sure
+    // we never should have incremented more than once
+    assert(!(genMapFloatRegNumToRegArgNum(argReg) % regsize_alignment));
+
+    return argReg;
+}
+
+// Instruction list
+// N=normal, R=reverse, P=pop
+
+void CodeGen::genFloatConst(GenTree* tree, RegSet::RegisterPreference* pref)
+{
+    assert(tree->gtOper == GT_CNS_DBL);
+    var_types type       = tree->gtType;
+    double    constValue = tree->gtDblCon.gtDconVal;
+    size_t*   cv         = (size_t*)&constValue;
+
+    regNumber dst = regSet.PickRegFloat(type, pref);
+
+    if (type == TYP_FLOAT)
+    {
+        regNumber reg = regSet.rsPickReg();
+
+        float f = forceCastToFloat(constValue);
+        genSetRegToIcon(reg, *((int*)(&f)));
+        getEmitter()->emitIns_R_R(INS_vmov_i2f, EA_4BYTE, dst, reg);
+    }
+    else
+    {
+        assert(type == TYP_DOUBLE);
+        regNumber reg1 = regSet.rsPickReg();
+        regNumber reg2 = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg1));
+
+        genSetRegToIcon(reg1, cv[0]);
+        regSet.rsLockReg(genRegMask(reg1));
+        genSetRegToIcon(reg2, cv[1]);
+        regSet.rsUnlockReg(genRegMask(reg1));
+
+        getEmitter()->emitIns_R_R_R(INS_vmov_i2d, EA_8BYTE, dst, reg1, reg2);
+    }
+    genMarkTreeInReg(tree, dst);
+
+    return;
+}
+
+void CodeGen::genFloatMath(GenTree* tree, RegSet::RegisterPreference* pref)
+{
+    assert(tree->OperGet() == GT_INTRINSIC);
+
+    GenTreePtr op1 = tree->gtOp.gtOp1;
+
+    // get tree into a register
+    genCodeForTreeFloat(op1, pref);
+
+    instruction ins;
+
+    switch (tree->gtIntrinsic.gtIntrinsicId)
+    {
+        case CORINFO_INTRINSIC_Sin:
+            ins = INS_invalid;
+            break;
+        case CORINFO_INTRINSIC_Cos:
+            ins = INS_invalid;
+            break;
+        case CORINFO_INTRINSIC_Sqrt:
+            ins = INS_vsqrt;
+            break;
+        case CORINFO_INTRINSIC_Abs:
+            ins = INS_vabs;
+            break;
+        case CORINFO_INTRINSIC_Round:
+        {
+            regNumber reg = regSet.PickRegFloat(tree->TypeGet(), pref);
+            genMarkTreeInReg(tree, reg);
+            // convert it to a long and back
+            inst_RV_RV(ins_FloatConv(TYP_LONG, tree->TypeGet()), reg, op1->gtRegNum, tree->TypeGet());
+            inst_RV_RV(ins_FloatConv(tree->TypeGet(), TYP_LONG), reg, reg);
+            genCodeForTreeFloat_DONE(tree, op1->gtRegNum);
+            return;
+        }
+        break;
+        default:
+            unreached();
+    }
+
+    if (ins != INS_invalid)
+    {
+        regNumber reg = regSet.PickRegFloat(tree->TypeGet(), pref);
+        genMarkTreeInReg(tree, reg);
+        inst_RV_RV(ins, reg, op1->gtRegNum, tree->TypeGet());
+        // mark register that holds tree
+        genCodeForTreeFloat_DONE(tree, reg);
+    }
+    else
+    {
+        unreached();
+        // If unreached is removed, mark register that holds tree
+        // genCodeForTreeFloat_DONE(tree, op1->gtRegNum);
+    }
+
+    return;
+}
+
+void CodeGen::genFloatSimple(GenTree* tree, RegSet::RegisterPreference* pref)
+{
+    assert(tree->OperKind() & GTK_SMPOP);
+    var_types type = tree->TypeGet();
+
+    RegSet::RegisterPreference defaultPref(RBM_ALLFLOAT, RBM_NONE);
+    if (pref == NULL)
+    {
+        pref = &defaultPref;
+    }
+
+    switch (tree->OperGet())
+    {
+        // Assignment
+        case GT_ASG:
+        {
+            genFloatAssign(tree);
+            break;
+        }
+
+        // Arithmetic binops
+        case GT_ADD:
+        case GT_SUB:
+        case GT_MUL:
+        case GT_DIV:
+        {
+            genFloatArith(tree, pref);
+            break;
+        }
+
+        case GT_NEG:
+        {
+            GenTreePtr op1 = tree->gtOp.gtOp1;
+
+            // get the tree into a register
+            genCodeForTreeFloat(op1, pref);
+
+            // change the sign
+            regNumber reg = regSet.PickRegFloat(type, pref);
+            genMarkTreeInReg(tree, reg);
+            inst_RV_RV(ins_MathOp(tree->OperGet(), type), reg, op1->gtRegNum, type);
+
+            // mark register that holds tree
+            genCodeForTreeFloat_DONE(tree, reg);
+            return;
+        }
+
+        case GT_IND:
+        {
+            regMaskTP addrReg;
+
+            // Make sure the address value is 'addressable' */
+            addrReg = genMakeAddressable(tree, 0, RegSet::FREE_REG);
+
+            // Load the value onto the FP stack
+            regNumber reg = regSet.PickRegFloat(type, pref);
+            genLoadFloat(tree, reg);
+
+            genDoneAddressable(tree, addrReg, RegSet::FREE_REG);
+
+            genCodeForTreeFloat_DONE(tree, reg);
+
+            break;
+        }
+        case GT_CAST:
+        {
+            genCodeForTreeCastFloat(tree, pref);
+            break;
+        }
+
+        // Asg-Arithmetic ops
+        case GT_ASG_ADD:
+        case GT_ASG_SUB:
+        case GT_ASG_MUL:
+        case GT_ASG_DIV:
+        {
+            genFloatAsgArith(tree);
+            break;
+        }
+        case GT_INTRINSIC:
+            genFloatMath(tree, pref);
+            break;
+
+        case GT_RETURN:
+        {
+            GenTreePtr op1 = tree->gtOp.gtOp1;
+            assert(op1);
+
+            pref->best = (type == TYP_DOUBLE) ? RBM_DOUBLERET : RBM_FLOATRET;
+
+            // Compute the result
+            genCodeForTreeFloat(op1, pref);
+
+            inst_RV_TT(ins_FloatConv(tree->TypeGet(), op1->TypeGet()), REG_FLOATRET, op1);
+            if (compiler->info.compIsVarArgs || compiler->opts.compUseSoftFP)
+            {
+                if (tree->TypeGet() == TYP_FLOAT)
+                {
+                    inst_RV_RV(INS_vmov_f2i, REG_INTRET, REG_FLOATRET, TYP_FLOAT, EA_4BYTE);
+                }
+                else
+                {
+                    assert(tree->TypeGet() == TYP_DOUBLE);
+                    inst_RV_RV_RV(INS_vmov_d2i, REG_INTRET, REG_NEXT(REG_INTRET), REG_FLOATRET, EA_8BYTE);
+                }
+            }
+            break;
+        }
+        case GT_ARGPLACE:
+            break;
+
+        case GT_COMMA:
+        {
+            GenTreePtr op1 = tree->gtOp.gtOp1;
+            GenTreePtr op2 = tree->gtGetOp2();
+
+            if (tree->gtFlags & GTF_REVERSE_OPS)
+            {
+                genCodeForTreeFloat(op2, pref);
+
+                regSet.SetUsedRegFloat(op2, true);
+                genEvalSideEffects(op1);
+                regSet.SetUsedRegFloat(op2, false);
+            }
+            else
+            {
+                genEvalSideEffects(op1);
+                genCodeForTreeFloat(op2, pref);
+            }
+
+            genCodeForTreeFloat_DONE(tree, op2->gtRegNum);
+            break;
+        }
+
+        case GT_CKFINITE:
+            genFloatCheckFinite(tree, pref);
+            break;
+
+        default:
+            NYI("Unhandled register FP codegen");
+    }
+}
+
+// generate code for ckfinite tree/instruction
+void CodeGen::genFloatCheckFinite(GenTree* tree, RegSet::RegisterPreference* pref)
+{
+    TempDsc* temp;
+    int      offs;
+
+    GenTreePtr op1 = tree->gtOp.gtOp1;
+
+    // Offset of the DWord containing the exponent
+    offs = (op1->gtType == TYP_FLOAT) ? 0 : sizeof(int);
+
+    // get tree into a register
+    genCodeForTreeFloat(op1, pref);
+
+    regNumber reg = regSet.rsPickReg();
+
+    int expMask;
+    if (op1->gtType == TYP_FLOAT)
+    {
+        getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, reg, op1->gtRegNum);
+        expMask = 0x7F800000;
+    }
+    else // double
+    {
+        assert(op1->gtType == TYP_DOUBLE);
+        getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, reg,
+                                  REG_NEXT(op1->gtRegNum)); // the high 32 bits of the double register
+        expMask = 0x7FF00000;
+    }
+    regTracker.rsTrackRegTrash(reg);
+
+    // Check if the exponent is all ones
+    inst_RV_IV(INS_and, reg, expMask, EA_4BYTE);
+    inst_RV_IV(INS_cmp, reg, expMask, EA_4BYTE);
+
+    // If exponent was all 1's, we need to throw ArithExcep
+    emitJumpKind jmpEqual = genJumpKindForOper(GT_EQ, CK_SIGNED);
+    genJumpToThrowHlpBlk(jmpEqual, SCK_ARITH_EXCPN);
+
+    genCodeForTreeFloat_DONE(tree, op1->gtRegNum);
+}
+
+void CodeGen::genFloatAssign(GenTree* tree)
+{
+    var_types  type = tree->TypeGet();
+    GenTreePtr op1  = tree->gtGetOp1();
+    GenTreePtr op2  = tree->gtGetOp2();
+
+    regMaskTP needRegOp1 = RBM_ALLINT;
+    regMaskTP addrReg    = RBM_NONE;
+    bool      volat      = false; // Is this a volatile store
+    bool      unaligned  = false; // Is this an unaligned store
+    regNumber op2reg     = REG_NA;
+
+#ifdef DEBUGGING_SUPPORT
+    unsigned lclVarNum = compiler->lvaCount;
+    unsigned lclILoffs = DUMMY_INIT(0);
+#endif
+
+    noway_assert(tree->OperGet() == GT_ASG);
+
+    // Is the target a floating-point local variable?
+    //  possibly even an enregistered floating-point local variable?
+    //
+    switch (op1->gtOper)
+    {
+        unsigned   varNum;
+        LclVarDsc* varDsc;
+
+        case GT_LCL_FLD:
+            // Check for a misalignment on a Floating Point field
+            //
+            if (varTypeIsFloating(op1->TypeGet()))
+            {
+                if ((op1->gtLclFld.gtLclOffs % emitTypeSize(op1->TypeGet())) != 0)
+                {
+                    unaligned = true;
+                }
+            }
+            break;
+
+        case GT_LCL_VAR:
+            varNum = op1->gtLclVarCommon.gtLclNum;
+            noway_assert(varNum < compiler->lvaCount);
+            varDsc = compiler->lvaTable + varNum;
+
+#ifdef DEBUGGING_SUPPORT
+            // For non-debuggable code, every definition of a lcl-var has
+            // to be checked to see if we need to open a new scope for it.
+            // Remember the local var info to call siCheckVarScope
+            // AFTER code generation of the assignment.
+            //
+            if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode && (compiler->info.compVarScopesCount > 0))
+            {
+                lclVarNum = varNum;
+                lclILoffs = op1->gtLclVar.gtLclILoffs;
+            }
+#endif
+
+            // Dead Store assert (with min opts we may have dead stores)
+            //
+            noway_assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1->gtFlags & GTF_VAR_DEATH));
+
+            // Does this variable live in a register?
+            //
+            if (genMarkLclVar(op1))
+            {
+                noway_assert(!compiler->opts.compDbgCode); // We don't enregister any floats with debug codegen
+
+                // Get hold of the target register
+                //
+                regNumber op1Reg = op1->gtRegVar.gtRegNum;
+
+                // the variable being assigned should be dead in op2
+                assert(!varDsc->lvTracked ||
+                       !VarSetOps::IsMember(compiler, genUpdateLiveSetForward(op2), varDsc->lvVarIndex));
+
+                // Setup register preferencing, so that we try to target the op1 enregistered variable
+                //
+                regMaskTP bestMask = genRegMask(op1Reg);
+                if (type == TYP_DOUBLE)
+                {
+                    assert((bestMask & RBM_DBL_REGS) != 0);
+                    bestMask |= genRegMask(REG_NEXT(op1Reg));
+                }
+                RegSet::RegisterPreference pref(RBM_ALLFLOAT, bestMask);
+
+                // Evaluate op2 into a floating point register
+                //
+                genCodeForTreeFloat(op2, &pref);
+
+                noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+                // Make sure the value ends up in the right place ...
+                // For example if op2 is a call that returns a result
+                // in REG_F0, we will need to do a move instruction here
+                //
+                if ((op2->gtRegNum != op1Reg) || (op2->TypeGet() != type))
+                {
+                    regMaskTP spillRegs = regSet.rsMaskUsed & genRegMaskFloat(op1Reg, op1->TypeGet());
+                    if (spillRegs != 0)
+                        regSet.rsSpillRegs(spillRegs);
+
+                    assert(type == op1->TypeGet());
+
+                    inst_RV_RV(ins_FloatConv(type, op2->TypeGet()), op1Reg, op2->gtRegNum, type);
+                }
+                genUpdateLife(op1);
+                goto DONE_ASG;
+            }
+            break;
+
+        case GT_CLS_VAR:
+        case GT_IND:
+            // Check for a volatile/unaligned store
+            //
+            assert((op1->OperGet() == GT_CLS_VAR) ||
+                   (op1->OperGet() == GT_IND)); // Required for GTF_IND_VOLATILE flag to be valid
+            if (op1->gtFlags & GTF_IND_VOLATILE)
+                volat = true;
+            if (op1->gtFlags & GTF_IND_UNALIGNED)
+                unaligned = true;
+            break;
+
+        default:
+            break;
+    }
+
+    // Is the value being assigned an enregistered floating-point local variable?
+    //
+    switch (op2->gtOper)
+    {
+        case GT_LCL_VAR:
+
+            if (!genMarkLclVar(op2))
+                break;
+
+            __fallthrough;
+
+        case GT_REG_VAR:
+
+            // We must honor the order evalauation in case op1 reassigns our op2 register
+            //
+            if (tree->gtFlags & GTF_REVERSE_OPS)
+                break;
+
+            // Is there an implicit conversion that we have to insert?
+            // Handle this case with the normal cases below.
+            //
+            if (type != op2->TypeGet())
+                break;
+
+            // Make the target addressable
+            //
+            addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true);
+
+            noway_assert(op2->gtFlags & GTF_REG_VAL);
+            noway_assert(op2->IsRegVar());
+
+            op2reg = op2->gtRegVar.gtRegNum;
+            genUpdateLife(op2);
+
+            goto CHK_VOLAT_UNALIGN;
+        default:
+            break;
+    }
+
+    // Is the op2 (RHS) more complex than op1 (LHS)?
+    //
+    if (tree->gtFlags & GTF_REVERSE_OPS)
+    {
+        regMaskTP                  bestRegs = regSet.rsNarrowHint(RBM_ALLFLOAT, ~op1->gtRsvdRegs);
+        RegSet::RegisterPreference pref(RBM_ALLFLOAT, bestRegs);
+
+        // Generate op2 (RHS) into a floating point register
+        //
+        genCodeForTreeFloat(op2, &pref);
+        regSet.SetUsedRegFloat(op2, true);
+
+        // Make the target addressable
+        //
+        addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true);
+
+        genRecoverReg(op2, RBM_ALLFLOAT, RegSet::KEEP_REG);
+        noway_assert(op2->gtFlags & GTF_REG_VAL);
+        regSet.SetUsedRegFloat(op2, false);
+    }
+    else
+    {
+        needRegOp1 = regSet.rsNarrowHint(needRegOp1, ~op2->gtRsvdRegs);
+
+        // Make the target addressable
+        //
+        addrReg = genMakeAddressable(op1, needRegOp1, RegSet::KEEP_REG, true);
+
+        // Generate the RHS into any floating point register
+        genCodeForTreeFloat(op2);
+    }
+    noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+    op2reg = op2->gtRegNum;
+
+    // Is there an implicit conversion that we have to insert?
+    //
+    if (type != op2->TypeGet())
+    {
+        regMaskTP bestMask = genRegMask(op2reg);
+        if (type == TYP_DOUBLE)
+        {
+            if (bestMask & RBM_DBL_REGS)
+            {
+                bestMask |= genRegMask(REG_NEXT(op2reg));
+            }
+            else
+            {
+                bestMask |= genRegMask(REG_PREV(op2reg));
+            }
+        }
+        RegSet::RegisterPreference op2Pref(RBM_ALLFLOAT, bestMask);
+        op2reg = regSet.PickRegFloat(type, &op2Pref);
+
+        inst_RV_RV(ins_FloatConv(type, op2->TypeGet()), op2reg, op2->gtRegNum, type);
+    }
+
+    // Make sure the LHS is still addressable
+    //
+    addrReg = genKeepAddressable(op1, addrReg);
+
+CHK_VOLAT_UNALIGN:
+
+    regSet.rsLockUsedReg(addrReg); // Must prevent unaligned regSet.rsGrabReg from choosing an addrReg
+
+    if (volat)
+    {
+        // Emit a memory barrier instruction before the store
+        instGen_MemoryBarrier();
+    }
+    if (unaligned)
+    {
+        var_types storeType = op1->TypeGet();
+        assert(storeType == TYP_DOUBLE || storeType == TYP_FLOAT);
+
+        // Unaligned Floating-Point Stores must be done using the integer register(s)
+        regNumber intRegLo    = regSet.rsGrabReg(RBM_ALLINT);
+        regNumber intRegHi    = REG_NA;
+        regMaskTP tmpLockMask = genRegMask(intRegLo);
+
+        if (storeType == TYP_DOUBLE)
+        {
+            intRegHi = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(intRegLo));
+            tmpLockMask |= genRegMask(intRegHi);
+        }
+
+        // move the FP register over to the integer register(s)
+        //
+        if (storeType == TYP_DOUBLE)
+        {
+            getEmitter()->emitIns_R_R_R(INS_vmov_d2i, EA_8BYTE, intRegLo, intRegHi, op2reg);
+            regTracker.rsTrackRegTrash(intRegHi);
+        }
+        else
+        {
+            getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, intRegLo, op2reg);
+        }
+        regTracker.rsTrackRegTrash(intRegLo);
+
+        regSet.rsLockReg(tmpLockMask); // Temporarily lock the intRegs
+        op1->gtType = TYP_INT;         // Temporarily change the type to TYP_INT
+
+        inst_TT_RV(ins_Store(TYP_INT), op1, intRegLo);
+
+        if (storeType == TYP_DOUBLE)
+        {
+            inst_TT_RV(ins_Store(TYP_INT), op1, intRegHi, 4);
+        }
+
+        op1->gtType = storeType;         // Change the type back to the floating point type
+        regSet.rsUnlockReg(tmpLockMask); // Unlock the intRegs
+    }
+    else
+    {
+        // Move the value into the target
+        //
+        inst_TT_RV(ins_Store(op1->TypeGet()), op1, op2reg);
+    }
+
+    // Free up anything that was tied up by the LHS
+    //
+    regSet.rsUnlockUsedReg(addrReg);
+    genDoneAddressable(op1, addrReg, RegSet::KEEP_REG);
+
+DONE_ASG:
+
+    genUpdateLife(tree);
+
+#ifdef DEBUGGING_SUPPORT
+    /* For non-debuggable code, every definition of a lcl-var has
+     * to be checked to see if we need to open a new scope for it.
+     */
+    if (lclVarNum < compiler->lvaCount)
+        siCheckVarScope(lclVarNum, lclILoffs);
+#endif
+}
+
+void CodeGen::genCodeForTreeFloat(GenTreePtr tree, RegSet::RegisterPreference* pref)
+{
+    genTreeOps oper;
+    unsigned   kind;
+
+    assert(tree);
+    assert(tree->gtOper != GT_STMT);
+
+    // What kind of node do we have?
+    oper = tree->OperGet();
+    kind = tree->OperKind();
+
+    if (kind & GTK_CONST)
+    {
+        genFloatConst(tree, pref);
+    }
+    else if (kind & GTK_LEAF)
+    {
+        genFloatLeaf(tree, pref);
+    }
+    else if (kind & GTK_SMPOP)
+    {
+        genFloatSimple(tree, pref);
+    }
+    else
+    {
+        assert(oper == GT_CALL);
+        genCodeForCall(tree, true);
+    }
+}
+
+void CodeGen::genFloatLeaf(GenTree* tree, RegSet::RegisterPreference* pref)
+{
+    regNumber reg = REG_NA;
+
+    switch (tree->OperGet())
+    {
+        case GT_LCL_VAR:
+            // Does the variable live in a register?
+            //
+            if (!genMarkLclVar(tree))
+                goto MEM_LEAF;
+            __fallthrough;
+
+        case GT_REG_VAR:
+            noway_assert(tree->gtFlags & GTF_REG_VAL);
+            reg = tree->gtRegVar.gtRegNum;
+            break;
+
+        case GT_LCL_FLD:
+            // We only use GT_LCL_FLD for lvAddrTaken vars, so we don't have
+            // to worry about it being enregistered.
+            noway_assert(compiler->lvaTable[tree->gtLclFld.gtLclNum].lvRegister == 0);
+            __fallthrough;
+
+        case GT_CLS_VAR:
+
+        MEM_LEAF:
+            reg = regSet.PickRegFloat(tree->TypeGet(), pref);
+            genLoadFloat(tree, reg);
+            break;
+
+        default:
+            DISPTREE(tree);
+            assert(!"unexpected leaf");
+    }
+
+    genCodeForTreeFloat_DONE(tree, reg);
+    return;
+}
+
+void CodeGen::genLoadFloat(GenTreePtr tree, regNumber reg)
+{
+    if (tree->IsRegVar())
+    {
+        // if it has been spilled, unspill it.%
+        LclVarDsc* varDsc = &compiler->lvaTable[tree->gtLclVarCommon.gtLclNum];
+        if (varDsc->lvSpilled)
+        {
+            UnspillFloat(varDsc);
+        }
+
+        inst_RV_RV(ins_FloatCopy(tree->TypeGet()), reg, tree->gtRegNum, tree->TypeGet());
+    }
+    else
+    {
+        bool unalignedLoad = false;
+        switch (tree->OperGet())
+        {
+            case GT_IND:
+            case GT_CLS_VAR:
+                if (tree->gtFlags & GTF_IND_UNALIGNED)
+                    unalignedLoad = true;
+                break;
+            case GT_LCL_FLD:
+                // Check for a misalignment on a Floating Point field
+                //
+                if (varTypeIsFloating(tree->TypeGet()))
+                {
+                    if ((tree->gtLclFld.gtLclOffs % emitTypeSize(tree->TypeGet())) != 0)
+                    {
+                        unalignedLoad = true;
+                    }
+                }
+                break;
+            default:
+                break;
+        }
+
+        if (unalignedLoad)
+        {
+            // Make the target addressable
+            //
+            regMaskTP addrReg = genMakeAddressable(tree, 0, RegSet::KEEP_REG, true);
+            regSet.rsLockUsedReg(addrReg); // Must prevent regSet.rsGrabReg from choosing an addrReg
+
+            var_types loadType = tree->TypeGet();
+            assert(loadType == TYP_DOUBLE || loadType == TYP_FLOAT);
+
+            // Unaligned Floating-Point Loads must be loaded into integer register(s)
+            // and then moved over to the Floating-Point register
+            regNumber intRegLo    = regSet.rsGrabReg(RBM_ALLINT);
+            regNumber intRegHi    = REG_NA;
+            regMaskTP tmpLockMask = genRegMask(intRegLo);
+
+            if (loadType == TYP_DOUBLE)
+            {
+                intRegHi = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(intRegLo));
+                tmpLockMask |= genRegMask(intRegHi);
+            }
+
+            regSet.rsLockReg(tmpLockMask); // Temporarily lock the intRegs
+            tree->gtType = TYP_INT;        // Temporarily change the type to TYP_INT
+
+            inst_RV_TT(ins_Load(TYP_INT), intRegLo, tree);
+            regTracker.rsTrackRegTrash(intRegLo);
+
+            if (loadType == TYP_DOUBLE)
+            {
+                inst_RV_TT(ins_Load(TYP_INT), intRegHi, tree, 4);
+                regTracker.rsTrackRegTrash(intRegHi);
+            }
+
+            tree->gtType = loadType;         // Change the type back to the floating point type
+            regSet.rsUnlockReg(tmpLockMask); // Unlock the intRegs
+
+            // move the integer register(s) over to the FP register
+            //
+            if (loadType == TYP_DOUBLE)
+                getEmitter()->emitIns_R_R_R(INS_vmov_i2d, EA_8BYTE, reg, intRegLo, intRegHi);
+            else
+                getEmitter()->emitIns_R_R(INS_vmov_i2f, EA_4BYTE, reg, intRegLo);
+
+            // Free up anything that was tied up by genMakeAddressable
+            //
+            regSet.rsUnlockUsedReg(addrReg);
+            genDoneAddressable(tree, addrReg, RegSet::KEEP_REG);
+        }
+        else
+        {
+            inst_RV_TT(ins_FloatLoad(tree->TypeGet()), reg, tree);
+        }
+        if (((tree->OperGet() == GT_CLS_VAR) || (tree->OperGet() == GT_IND)) && (tree->gtFlags & GTF_IND_VOLATILE))
+        {
+            // Emit a memory barrier instruction after the load
+            instGen_MemoryBarrier();
+        }
+    }
+}
+
+void CodeGen::genCodeForTreeFloat_DONE(GenTreePtr tree, regNumber reg)
+{
+    return genCodeForTree_DONE(tree, reg);
+}
+
+void CodeGen::genFloatAsgArith(GenTreePtr tree)
+{
+    // Set Flowgraph.cpp, line 13750
+    // arm VFP has tons of regs, 3-op instructions, and no addressing modes
+    // so asg ops are kind of pointless
+    noway_assert(!"Not Reachable for _TARGET_ARM_");
+}
+
+regNumber CodeGen::genAssignArithFloat(
+    genTreeOps oper, GenTreePtr dst, regNumber dstreg, GenTreePtr src, regNumber srcreg)
+{
+    regNumber result;
+
+    // dst should be a regvar or memory
+
+    if (dst->IsRegVar())
+    {
+        regNumber reg = dst->gtRegNum;
+
+        if (src->IsRegVar())
+        {
+            inst_RV_RV(ins_MathOp(oper, dst->gtType), reg, src->gtRegNum, dst->gtType);
+        }
+        else
+        {
+            inst_RV_TT(ins_MathOp(oper, dst->gtType), reg, src, 0, EmitSize(dst));
+        }
+        result = reg;
+    }
+    else // dst in memory
+    {
+        // since this is an asgop the ACTUAL destination is memory
+        // but it is also one of the sources and SSE ops do not allow mem dests
+        // so we have loaded it into a reg, and that is what dstreg represents
+        assert(dstreg != REG_NA);
+
+        if ((src->InReg()))
+        {
+            inst_RV_RV(ins_MathOp(oper, dst->gtType), dstreg, src->gtRegNum, dst->gtType);
+        }
+        else
+        {
+            // mem mem operation
+            inst_RV_TT(ins_MathOp(oper, dst->gtType), dstreg, src, 0, EmitSize(dst));
+        }
+
+        dst->gtFlags &= ~GTF_REG_VAL; // ???
+
+        inst_TT_RV(ins_FloatStore(dst->gtType), dst, dstreg, 0, EmitSize(dst));
+
+        result = REG_NA;
+    }
+
+    return result;
+}
+
+void CodeGen::genFloatArith(GenTreePtr tree, RegSet::RegisterPreference* tgtPref)
+{
+    var_types  type = tree->TypeGet();
+    genTreeOps oper = tree->OperGet();
+    GenTreePtr op1  = tree->gtGetOp1();
+    GenTreePtr op2  = tree->gtGetOp2();
+
+    regNumber  tgtReg;
+    unsigned   varNum;
+    LclVarDsc* varDsc;
+    VARSET_TP  varBit;
+
+    assert(oper == GT_ADD || oper == GT_SUB || oper == GT_MUL || oper == GT_DIV);
+
+    RegSet::RegisterPreference defaultPref(RBM_ALLFLOAT, RBM_NONE);
+    if (tgtPref == NULL)
+    {
+        tgtPref = &defaultPref;
+    }
+
+    // Is the op2 (RHS)more complex than op1 (LHS)?
+    //
+    if (tree->gtFlags & GTF_REVERSE_OPS)
+    {
+        regMaskTP                  bestRegs = regSet.rsNarrowHint(RBM_ALLFLOAT, ~op1->gtRsvdRegs);
+        RegSet::RegisterPreference pref(RBM_ALLFLOAT, bestRegs);
+
+        // Evaluate op2 into a floating point register
+        //
+        genCodeForTreeFloat(op2, &pref);
+        regSet.SetUsedRegFloat(op2, true);
+
+        // Evaluate op1 into any floating point register
+        //
+        genCodeForTreeFloat(op1);
+        regSet.SetUsedRegFloat(op1, true);
+
+        regNumber op1Reg  = op1->gtRegNum;
+        regMaskTP op1Mask = genRegMaskFloat(op1Reg, type);
+
+        // Fix 388445 ARM JitStress WP7
+        regSet.rsLockUsedReg(op1Mask);
+        genRecoverReg(op2, RBM_ALLFLOAT, RegSet::KEEP_REG);
+        noway_assert(op2->gtFlags & GTF_REG_VAL);
+        regSet.rsUnlockUsedReg(op1Mask);
+
+        regSet.SetUsedRegFloat(op1, false);
+        regSet.SetUsedRegFloat(op2, false);
+    }
+    else
+    {
+        regMaskTP                  bestRegs = regSet.rsNarrowHint(RBM_ALLFLOAT, ~op2->gtRsvdRegs);
+        RegSet::RegisterPreference pref(RBM_ALLFLOAT, bestRegs);
+
+        // Evaluate op1 into a floating point register
+        //
+        genCodeForTreeFloat(op1, &pref);
+        regSet.SetUsedRegFloat(op1, true);
+
+        // Evaluate op2 into any floating point register
+        //
+        genCodeForTreeFloat(op2);
+        regSet.SetUsedRegFloat(op2, true);
+
+        regNumber op2Reg  = op2->gtRegNum;
+        regMaskTP op2Mask = genRegMaskFloat(op2Reg, type);
+
+        // Fix 388445 ARM JitStress WP7
+        regSet.rsLockUsedReg(op2Mask);
+        genRecoverReg(op1, RBM_ALLFLOAT, RegSet::KEEP_REG);
+        noway_assert(op1->gtFlags & GTF_REG_VAL);
+        regSet.rsUnlockUsedReg(op2Mask);
+
+        regSet.SetUsedRegFloat(op2, false);
+        regSet.SetUsedRegFloat(op1, false);
+    }
+
+    tgtReg = regSet.PickRegFloat(type, tgtPref, true);
+
+    noway_assert(op1->gtFlags & GTF_REG_VAL);
+    noway_assert(op2->gtFlags & GTF_REG_VAL);
+
+    inst_RV_RV_RV(ins_MathOp(oper, type), tgtReg, op1->gtRegNum, op2->gtRegNum, emitActualTypeSize(type));
+
+    genCodeForTreeFloat_DONE(tree, tgtReg);
+}
+
+regNumber CodeGen::genArithmFloat(
+    genTreeOps oper, GenTreePtr dst, regNumber dstreg, GenTreePtr src, regNumber srcreg, bool bReverse)
+{
+    regNumber result = REG_NA;
+
+    assert(dstreg != REG_NA);
+
+    if (bReverse)
+    {
+        GenTree*  temp    = src;
+        regNumber tempreg = srcreg;
+        src               = dst;
+        srcreg            = dstreg;
+        dst               = temp;
+        dstreg            = tempreg;
+    }
+
+    if (srcreg == REG_NA)
+    {
+        if (src->IsRegVar())
+        {
+            inst_RV_RV(ins_MathOp(oper, dst->gtType), dst->gtRegNum, src->gtRegNum, dst->gtType);
+        }
+        else
+        {
+            inst_RV_TT(ins_MathOp(oper, dst->gtType), dst->gtRegNum, src);
+        }
+    }
+    else
+    {
+        inst_RV_RV(ins_MathOp(oper, dst->gtType), dstreg, srcreg, dst->gtType);
+    }
+
+    result = dstreg;
+
+    assert(result != REG_NA);
+    return result;
+}
+
+void CodeGen::genKeepAddressableFloat(GenTreePtr tree, regMaskTP* regMaskIntPtr, regMaskTP* regMaskFltPtr)
+{
+    regMaskTP regMaskInt, regMaskFlt;
+
+    regMaskInt = *regMaskIntPtr;
+    regMaskFlt = *regMaskFltPtr;
+
+    *regMaskIntPtr = *regMaskFltPtr = 0;
+
+    switch (tree->OperGet())
+    {
+        case GT_REG_VAR:
+            // If register has been spilled, unspill it
+            if (tree->gtFlags & GTF_SPILLED)
+            {
+                UnspillFloat(&compiler->lvaTable[tree->gtLclVarCommon.gtLclNum]);
+            }
+            break;
+
+        case GT_CNS_DBL:
+            if (tree->gtFlags & GTF_SPILLED)
+            {
+                UnspillFloat(tree);
+            }
+            *regMaskFltPtr = genRegMaskFloat(tree->gtRegNum, tree->TypeGet());
+            break;
+
+        case GT_LCL_FLD:
+        case GT_LCL_VAR:
+        case GT_CLS_VAR:
+            break;
+
+        case GT_IND:
+            if (regMaskFlt == RBM_NONE)
+            {
+                *regMaskIntPtr = genKeepAddressable(tree, regMaskInt, 0);
+                *regMaskFltPtr = 0;
+                return;
+            }
+            __fallthrough;
+
+        default:
+            *regMaskIntPtr = 0;
+            if (tree->gtFlags & GTF_SPILLED)
+            {
+                UnspillFloat(tree);
+            }
+            *regMaskFltPtr = genRegMaskFloat(tree->gtRegNum, tree->TypeGet());
+            break;
+    }
+}
+
+void CodeGen::genComputeAddressableFloat(GenTreePtr      tree,
+                                         regMaskTP       addrRegInt,
+                                         regMaskTP       addrRegFlt,
+                                         RegSet::KeepReg keptReg,
+                                         regMaskTP       needReg,
+                                         RegSet::KeepReg keepReg,
+                                         bool            freeOnly /* = false */)
+{
+    noway_assert(genStillAddressable(tree));
+    noway_assert(varTypeIsFloating(tree->TypeGet()));
+
+    genDoneAddressableFloat(tree, addrRegInt, addrRegFlt, keptReg);
+
+    regNumber reg;
+    if (tree->gtFlags & GTF_REG_VAL)
+    {
+        reg = tree->gtRegNum;
+        if (freeOnly && !(genRegMaskFloat(reg, tree->TypeGet()) & regSet.RegFreeFloat()))
+        {
+            goto LOAD_REG;
+        }
+    }
+    else
+    {
+    LOAD_REG:
+        RegSet::RegisterPreference pref(needReg, RBM_NONE);
+        reg = regSet.PickRegFloat(tree->TypeGet(), &pref);
+        genLoadFloat(tree, reg);
+    }
+
+    genMarkTreeInReg(tree, reg);
+
+    if (keepReg == RegSet::KEEP_REG)
+    {
+        regSet.SetUsedRegFloat(tree, true);
+    }
+}
+
+void CodeGen::genDoneAddressableFloat(GenTreePtr      tree,
+                                      regMaskTP       addrRegInt,
+                                      regMaskTP       addrRegFlt,
+                                      RegSet::KeepReg keptReg)
+{
+    assert(!(addrRegInt && addrRegFlt));
+
+    if (addrRegInt)
+    {
+        return genDoneAddressable(tree, addrRegInt, keptReg);
+    }
+    else if (addrRegFlt)
+    {
+        if (keptReg == RegSet::KEEP_REG)
+        {
+            for (regNumber r = REG_FP_FIRST; r != REG_NA; r = regNextOfType(r, tree->TypeGet()))
+            {
+                regMaskTP mask = genRegMaskFloat(r, tree->TypeGet());
+                // some masks take up more than one bit
+                if ((mask & addrRegFlt) == mask)
+                {
+                    regSet.SetUsedRegFloat(tree, false);
+                }
+            }
+        }
+    }
+}
+
+GenTreePtr CodeGen::genMakeAddressableFloat(GenTreePtr tree,
+                                            regMaskTP* regMaskIntPtr,
+                                            regMaskTP* regMaskFltPtr,
+                                            bool       bCollapseConstantDoubles)
+{
+    *regMaskIntPtr = *regMaskFltPtr = 0;
+
+    switch (tree->OperGet())
+    {
+
+        case GT_LCL_VAR:
+            genMarkLclVar(tree);
+            __fallthrough;
+
+        case GT_REG_VAR:
+        case GT_LCL_FLD:
+        case GT_CLS_VAR:
+            return tree;
+
+        case GT_IND:
+            // Try to make the address directly addressable
+
+            if (genMakeIndAddrMode(tree->gtOp.gtOp1, tree, false, RBM_ALLFLOAT, RegSet::KEEP_REG, regMaskIntPtr, false))
+            {
+                genUpdateLife(tree);
+                return tree;
+            }
+            else
+            {
+                GenTreePtr addr = tree;
+                tree            = tree->gtOp.gtOp1;
+                genCodeForTree(tree, 0);
+                regSet.rsMarkRegUsed(tree, addr);
+
+                *regMaskIntPtr = genRegMask(tree->gtRegNum);
+                return addr;
+            }
+
+        // fall through
+
+        default:
+            genCodeForTreeFloat(tree);
+            regSet.SetUsedRegFloat(tree, true);
+
+            // update mask
+            *regMaskFltPtr = genRegMaskFloat(tree->gtRegNum, tree->TypeGet());
+
+            return tree;
+            break;
+    }
+}
+
+void CodeGen::genCodeForTreeCastFloat(GenTree* tree, RegSet::RegisterPreference* pref)
+{
+    GenTreePtr op1  = tree->gtOp.gtOp1;
+    var_types  from = op1->gtType;
+    var_types  to   = tree->gtType;
+
+    if (varTypeIsFloating(from))
+        genCodeForTreeCastFromFloat(tree, pref);
+    else
+        genCodeForTreeCastToFloat(tree, pref);
+}
+
+void CodeGen::genCodeForTreeCastFromFloat(GenTree* tree, RegSet::RegisterPreference* pref)
+{
+    GenTreePtr op1          = tree->gtOp.gtOp1;
+    var_types  from         = op1->gtType;
+    var_types  final        = tree->gtType;
+    var_types  intermediate = tree->CastToType();
+
+    regNumber srcReg;
+    regNumber dstReg;
+
+    assert(varTypeIsFloating(from));
+
+    // Evaluate op1 into a floating point register
+    //
+    if (varTypeIsFloating(final))
+    {
+        genCodeForTreeFloat(op1, pref);
+    }
+    else
+    {
+        RegSet::RegisterPreference defaultPref(RBM_ALLFLOAT, RBM_NONE);
+        genCodeForTreeFloat(op1, &defaultPref);
+    }
+
+    srcReg = op1->gtRegNum;
+
+    if (varTypeIsFloating(final))
+    {
+        // float  => double  or
+        // double => float
+
+        dstReg = regSet.PickRegFloat(final, pref);
+
+        instruction ins = ins_FloatConv(final, from);
+        if (!isMoveIns(ins) || (srcReg != dstReg))
+        {
+            inst_RV_RV(ins, dstReg, srcReg, from);
+        }
+    }
+    else
+    {
+        // float  => int  or
+        // double => int
+
+        dstReg = regSet.rsPickReg(pref->ok, pref->best);
+
+        RegSet::RegisterPreference defaultPref(RBM_ALLFLOAT, genRegMask(srcReg));
+        regNumber                  intermediateReg = regSet.PickRegFloat(TYP_FLOAT, &defaultPref);
+
+        if ((intermediate == TYP_UINT) && (final == TYP_INT))
+        {
+            // Perform the conversion using the FP unit
+            inst_RV_RV(ins_FloatConv(TYP_UINT, from), intermediateReg, srcReg, from);
+
+            // Prevent the call to genIntegerCast
+            final = TYP_UINT;
+        }
+        else
+        {
+            // Perform the conversion using the FP unit
+            inst_RV_RV(ins_FloatConv(TYP_INT, from), intermediateReg, srcReg, from);
+        }
+
+        // the integer result is now in the FP register, move it to the integer ones
+        getEmitter()->emitIns_R_R(INS_vmov_f2i, EA_4BYTE, dstReg, intermediateReg);
+
+        regTracker.rsTrackRegTrash(dstReg);
+
+        // handle things like int <- short <- double
+        if (final != intermediate)
+        {
+            // lie about the register so integer cast logic will finish the job
+            op1->gtRegNum = dstReg;
+            genIntegerCast(tree, pref->ok, pref->best);
+        }
+    }
+
+    genUpdateLife(op1);
+    genCodeForTree_DONE(tree, dstReg);
+}
+
+void CodeGen::genCodeForTreeCastToFloat(GenTreePtr tree, RegSet::RegisterPreference* pref)
+{
+    regNumber srcReg;
+    regNumber dstReg;
+    regNumber vmovReg;
+
+    regMaskTP addrReg;
+
+    GenTreePtr op1 = tree->gtOp.gtOp1;
+    op1            = genCodeForCommaTree(op1); // Trim off any comma expressions.
+    var_types from = op1->gtType;
+    var_types to   = tree->gtType;
+
+    switch (from)
+    {
+        case TYP_BOOL:
+        case TYP_BYTE:
+        case TYP_UBYTE:
+        case TYP_CHAR:
+        case TYP_SHORT:
+            // load it into a register
+            genCodeForTree(op1, 0);
+
+            __fallthrough;
+
+        case TYP_BYREF:
+            from = TYP_INT;
+
+            __fallthrough;
+
+        case TYP_INT:
+        {
+            if (op1->gtOper == GT_LCL_FLD)
+            {
+                genComputeReg(op1, 0, RegSet::ANY_REG, RegSet::FREE_REG);
+                addrReg = 0;
+            }
+            else
+            {
+                addrReg = genMakeAddressable(op1, 0, RegSet::FREE_REG);
+            }
+
+            // Grab register for the cast
+            dstReg = regSet.PickRegFloat(to, pref);
+
+            // float type that is same size as the int we are coming from
+            var_types vmovType = TYP_FLOAT;
+            regNumber vmovReg  = regSet.PickRegFloat(vmovType);
+
+            if (tree->gtFlags & GTF_UNSIGNED)
+                from = TYP_UINT;
+
+            // Is the value a constant, or now sitting in a register?
+            if (op1->InReg() || op1->IsCnsIntOrI())
+            {
+                if (op1->IsCnsIntOrI())
+                {
+                    srcReg = genGetRegSetToIcon(op1->AsIntConCommon()->IconValue(), RBM_NONE, op1->TypeGet());
+                }
+                else
+                {
+                    srcReg = op1->gtRegNum;
+                }
+
+                // move the integer register value over to the FP register
+                getEmitter()->emitIns_R_R(INS_vmov_i2f, EA_4BYTE, vmovReg, srcReg);
+                // now perform the conversion to the proper floating point representation
+                inst_RV_RV(ins_FloatConv(to, from), dstReg, vmovReg, to);
+            }
+            else
+            {
+                // Load the value from its address
+                inst_RV_TT(ins_FloatLoad(vmovType), vmovReg, op1);
+                inst_RV_RV(ins_FloatConv(to, from), dstReg, vmovReg, to);
+            }
+
+            if (addrReg)
+            {
+                genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
+            }
+            genMarkTreeInReg(tree, dstReg);
+
+            break;
+        }
+        case TYP_FLOAT:
+        case TYP_DOUBLE:
+        {
+            //  This is a cast from float to double or double to float
+
+            genCodeForTreeFloat(op1, pref);
+
+            // Grab register for the cast
+            dstReg = regSet.PickRegFloat(to, pref);
+
+            if ((from != to) || (dstReg != op1->gtRegNum))
+            {
+                inst_RV_RV(ins_FloatConv(to, from), dstReg, op1->gtRegNum, to);
+            }
+
+            // Assign reg to tree
+            genMarkTreeInReg(tree, dstReg);
+
+            break;
+        }
+        default:
+        {
+            assert(!"unsupported cast");
+            break;
+        }
+    }
+}
+
+void CodeGen::genRoundFloatExpression(GenTreePtr op, var_types type)
+{
+    // Do nothing with memory resident opcodes - these are the right precision
+    if (type == TYP_UNDEF)
+        type = op->TypeGet();
+
+    switch (op->gtOper)
+    {
+        case GT_LCL_VAR:
+            genMarkLclVar(op);
+            __fallthrough;
+
+        case GT_LCL_FLD:
+        case GT_CLS_VAR:
+        case GT_CNS_DBL:
+        case GT_IND:
+            if (type == op->TypeGet())
+                return;
+
+        default:
+            break;
+    }
+}
+
+#ifdef DEBUG
+
+regMaskTP CodeGenInterface::genStressLockedMaskFloat()
+{
+    return 0;
+}
+
+#endif // DEBUG
+
+/*********************************************************************
+ * Preserve used callee trashed registers across calls.
+ *
+ */
+void CodeGen::SpillForCallRegisterFP(regMaskTP noSpillMask)
+{
+    regMaskTP regBit = 1;
+    for (regNumber regNum = REG_FIRST; regNum < REG_COUNT; regNum = REG_NEXT(regNum), regBit <<= 1)
+    {
+        if (!(regBit & noSpillMask) && (regBit & RBM_FLT_CALLEE_TRASH) && regSet.rsUsedTree[regNum])
+        {
+            SpillFloat(regNum, true);
+        }
+    }
+}
+
+/*********************************************************************
+ *
+ * Spill the used floating point register or the enregistered var.
+ * If spilling for a call, then record so, so we can unspill the
+ * ones that were spilled for the call.
+ *
+ */
+void CodeGenInterface::SpillFloat(regNumber reg, bool bIsCall /* = false */)
+{
+    regSet.rsSpillReg(reg);
+}
+
+void CodeGen::UnspillFloatMachineDep(RegSet::SpillDsc* spillDsc)
+{
+    // Do actual unspill
+    regNumber reg;
+    if (spillDsc->bEnregisteredVariable)
+    {
+        NYI("unspill enreg var");
+        reg = regSet.PickRegFloat();
+    }
+    else
+    {
+        UnspillFloatMachineDep(spillDsc, false);
+    }
+}
+
+void CodeGen::UnspillFloatMachineDep(RegSet::SpillDsc* spillDsc, bool useSameReg)
+{
+    assert(!spillDsc->bEnregisteredVariable);
+
+    assert(spillDsc->spillTree->gtFlags & GTF_SPILLED);
+
+    spillDsc->spillTree->gtFlags &= ~GTF_SPILLED;
+
+    var_types type = spillDsc->spillTree->TypeGet();
+    regNumber reg;
+    if (useSameReg)
+    {
+        // Give register preference as the same register that the tree was originally using.
+        reg = spillDsc->spillTree->gtRegNum;
+
+        regMaskTP maskPref = genRegMask(reg);
+        if (type == TYP_DOUBLE)
+        {
+            assert((maskPref & RBM_DBL_REGS) != 0);
+            maskPref |= genRegMask(REG_NEXT(reg));
+        }
+
+        RegSet::RegisterPreference pref(RBM_ALLFLOAT, maskPref);
+        reg = regSet.PickRegFloat(type, &pref);
+    }
+    else
+    {
+        reg = regSet.PickRegFloat();
+    }
+
+    // load from spilled spot
+    compiler->codeGen->reloadFloatReg(type, spillDsc->spillTemp, reg);
+
+    compiler->codeGen->genMarkTreeInReg(spillDsc->spillTree, reg);
+    regSet.SetUsedRegFloat(spillDsc->spillTree, true);
+}
+
+//
+instruction genFloatJumpInstr(genTreeOps cmp, bool isUnordered)
+{
+    switch (cmp)
+    {
+        case GT_EQ:
+            return INS_beq;
+        case GT_NE:
+            return INS_bne;
+        case GT_LT:
+            return isUnordered ? INS_blt : INS_blo;
+        case GT_LE:
+            return isUnordered ? INS_ble : INS_bls;
+        case GT_GE:
+            return isUnordered ? INS_bpl : INS_bge;
+        case GT_GT:
+            return isUnordered ? INS_bhi : INS_bgt;
+        default:
+            unreached();
+    }
+}
+
+void CodeGen::genCondJumpFloat(GenTreePtr cond, BasicBlock* jumpTrue, BasicBlock* jumpFalse)
+{
+    assert(jumpTrue && jumpFalse);
+    assert(!(cond->gtFlags & GTF_REVERSE_OPS)); // Done in genCondJump()
+    assert(varTypeIsFloating(cond->gtOp.gtOp1->gtType));
+
+    GenTreePtr op1         = cond->gtOp.gtOp1;
+    GenTreePtr op2         = cond->gtOp.gtOp2;
+    genTreeOps cmp         = cond->OperGet();
+    bool       isUnordered = cond->gtFlags & GTF_RELOP_NAN_UN ? true : false;
+
+    regMaskTP                  bestRegs = regSet.rsNarrowHint(RBM_ALLFLOAT, ~op2->gtRsvdRegs);
+    RegSet::RegisterPreference pref(RBM_ALLFLOAT, bestRegs);
+
+    // Prepare operands.
+    genCodeForTreeFloat(op1, &pref);
+    regSet.SetUsedRegFloat(op1, true);
+
+    genCodeForTreeFloat(op2);
+    regSet.SetUsedRegFloat(op2, true);
+
+    genRecoverReg(op1, RBM_ALLFLOAT, RegSet::KEEP_REG);
+    noway_assert(op1->gtFlags & GTF_REG_VAL);
+
+    // cmp here
+    getEmitter()->emitIns_R_R(INS_vcmp, EmitSize(op1), op1->gtRegNum, op2->gtRegNum);
+
+    // vmrs with register 0xf has special meaning of transferring flags
+    getEmitter()->emitIns_R(INS_vmrs, EA_4BYTE, REG_R15);
+
+    regSet.SetUsedRegFloat(op2, false);
+    regSet.SetUsedRegFloat(op1, false);
+
+    getEmitter()->emitIns_J(genFloatJumpInstr(cmp, isUnordered), jumpTrue);
+}
+
+#endif // LEGACY_BACKEND
diff --git a/src/jit/registerfp.h b/src/jit/registerfp.h
new file mode 100644
index 0000000000..4c3ecb6050
--- /dev/null
+++ b/src/jit/registerfp.h
@@ -0,0 +1,26 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+/*****************************************************************************/
+#ifndef REGDEF
+#error Must define REGDEF macro before including this file
+#endif
+/*****************************************************************************/
+/*                  The following is x86 specific                            */
+/*****************************************************************************/
+/*
+REGDEF(name, rnum,  mask, sname) */
+REGDEF(FPV0, 0, 0x01, "FPV0")
+REGDEF(FPV1, 1, 0x02, "FPV1")
+REGDEF(FPV2, 2, 0x04, "FPV2")
+REGDEF(FPV3, 3, 0x08, "FPV3")
+REGDEF(FPV4, 4, 0x10, "FPV4")
+REGDEF(FPV5, 5, 0x20, "FPV5")
+REGDEF(FPV6, 6, 0x40, "FPV6")
+REGDEF(FPV7, 7, 0x80, "FPV7")
+
+/*****************************************************************************/
+#undef REGDEF
+/*****************************************************************************/
diff --git a/src/jit/registerxmm.h b/src/jit/registerxmm.h
new file mode 100644
index 0000000000..4c34261ba8
--- /dev/null
+++ b/src/jit/registerxmm.h
@@ -0,0 +1,48 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// clang-format off
+/*****************************************************************************/
+/*****************************************************************************/
+#ifndef REGDEF
+#error  Must define REGDEF macro before including this file
+#endif
+
+#ifndef LEGACY_BACKEND
+#error This file is only used for the LEGACY_BACKEND build.
+#endif
+
+#if defined(_TARGET_XARCH_)
+
+#define XMMMASK(x) (unsigned(1) << (x-1))
+
+/*
+REGDEF(name, rnum,         mask,  sname) */
+REGDEF(XMM0,    0,   XMMMASK(1),  "xmm0"  )
+REGDEF(XMM1,    1,   XMMMASK(2),  "xmm1"  )
+REGDEF(XMM2,    2,   XMMMASK(3),  "xmm2"  )
+REGDEF(XMM3,    3,   XMMMASK(4),  "xmm3"  )
+REGDEF(XMM4,    4,   XMMMASK(5),  "xmm4"  )
+REGDEF(XMM5,    5,   XMMMASK(6),  "xmm5"  )
+REGDEF(XMM6,    6,   XMMMASK(7),  "xmm6"  )
+REGDEF(XMM7,    7,   XMMMASK(8),  "xmm7"  )
+
+#ifdef _TARGET_AMD64_
+REGDEF(XMM8,    8,   XMMMASK(9),  "xmm8"  )
+REGDEF(XMM9,    9,   XMMMASK(10), "xmm9"  )
+REGDEF(XMM10,   10,  XMMMASK(11), "xmm10" )
+REGDEF(XMM11,   11,  XMMMASK(12), "xmm11" )
+REGDEF(XMM12,   12,  XMMMASK(13), "xmm12" )
+REGDEF(XMM13,   13,  XMMMASK(14), "xmm13" )
+REGDEF(XMM14,   14,  XMMMASK(15), "xmm14" )
+REGDEF(XMM15,   15,  XMMMASK(16), "xmm15" )
+#endif
+
+#endif // _TARGET_*
+
+/*****************************************************************************/
+#undef  REGDEF
+/*****************************************************************************/
+
+// clang-format on
diff --git a/src/jit/reglist.h b/src/jit/reglist.h
new file mode 100644
index 0000000000..7b706110a8
--- /dev/null
+++ b/src/jit/reglist.h
@@ -0,0 +1,18 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef REGLIST_H
+#define REGLIST_H
+
+#include "target.h"
+#include "tinyarray.h"
+
+// The "regList" type is a small set of registerse
+#ifdef _TARGET_X86_
+typedef TinyArray<unsigned short, regNumber, REGNUM_BITS> regList;
+#else
+// The regList is unused for all other targets.
+#endif // _TARGET_*
+
+#endif // REGLIST_H
diff --git a/src/jit/regpair.h b/src/jit/regpair.h
new file mode 100644
index 0000000000..cfc109b882
--- /dev/null
+++ b/src/jit/regpair.h
@@ -0,0 +1,357 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+
+#ifndef PAIRBEG
+#define PAIRBEG(reg)
+#endif
+
+#ifndef PAIRDEF
+#define PAIRDEF(r1, r2)
+#endif
+
+#ifndef PAIRSTK
+#define PAIRSTK(r1, r2) PAIRDEF(r1, r2)
+#endif
+
+#if defined(_TARGET_X86_)
+/*****************************************************************************/
+/*                  The following is for x86                                 */
+/*****************************************************************************/
+
+//      rlo rhi
+
+PAIRBEG(EAX)
+PAIRDEF(EAX, ECX)
+PAIRDEF(EAX, EDX)
+PAIRDEF(EAX, EBX)
+PAIRDEF(EAX, EBP)
+PAIRDEF(EAX, ESI)
+PAIRDEF(EAX, EDI)
+PAIRSTK(EAX, STK)
+
+PAIRBEG(ECX)
+PAIRDEF(ECX, EAX)
+PAIRDEF(ECX, EDX)
+PAIRDEF(ECX, EBX)
+PAIRDEF(ECX, EBP)
+PAIRDEF(ECX, ESI)
+PAIRDEF(ECX, EDI)
+PAIRSTK(ECX, STK)
+
+PAIRBEG(EDX)
+PAIRDEF(EDX, EAX)
+PAIRDEF(EDX, ECX)
+PAIRDEF(EDX, EBX)
+PAIRDEF(EDX, EBP)
+PAIRDEF(EDX, ESI)
+PAIRDEF(EDX, EDI)
+PAIRSTK(EDX, STK)
+
+PAIRBEG(EBX)
+PAIRDEF(EBX, EAX)
+PAIRDEF(EBX, EDX)
+PAIRDEF(EBX, ECX)
+PAIRDEF(EBX, EBP)
+PAIRDEF(EBX, ESI)
+PAIRDEF(EBX, EDI)
+PAIRSTK(EBX, STK)
+
+PAIRBEG(EBP)
+PAIRDEF(EBP, EAX)
+PAIRDEF(EBP, EDX)
+PAIRDEF(EBP, ECX)
+PAIRDEF(EBP, EBX)
+PAIRDEF(EBP, ESI)
+PAIRDEF(EBP, EDI)
+PAIRSTK(EBP, STK)
+
+PAIRBEG(ESI)
+PAIRDEF(ESI, EAX)
+PAIRDEF(ESI, EDX)
+PAIRDEF(ESI, ECX)
+PAIRDEF(ESI, EBX)
+PAIRDEF(ESI, EBP)
+PAIRDEF(ESI, EDI)
+PAIRSTK(ESI, STK)
+
+PAIRBEG(EDI)
+PAIRDEF(EDI, EAX)
+PAIRDEF(EDI, EDX)
+PAIRDEF(EDI, ECX)
+PAIRDEF(EDI, EBX)
+PAIRDEF(EDI, EBP)
+PAIRDEF(EDI, ESI)
+PAIRSTK(EDI, STK)
+
+PAIRBEG(STK)
+PAIRSTK(STK, EAX)
+PAIRSTK(STK, EDX)
+PAIRSTK(STK, ECX)
+PAIRSTK(STK, EBX)
+PAIRSTK(STK, EBP)
+PAIRSTK(STK, ESI)
+PAIRSTK(STK, EDI)
+
+#endif
+
+/*****************************************************************************/
+
+#ifdef _TARGET_ARM_
+/*****************************************************************************/
+/*                  The following is for ARM                                 */
+/*****************************************************************************/
+
+//      rlo rhi
+
+PAIRBEG(R0)
+PAIRDEF(R0, R1)
+PAIRDEF(R0, R2)
+PAIRDEF(R0, R3)
+PAIRDEF(R0, R4)
+PAIRDEF(R0, R5)
+PAIRDEF(R0, R6)
+PAIRDEF(R0, R7)
+PAIRDEF(R0, R8)
+PAIRDEF(R0, R9)
+PAIRDEF(R0, R10)
+PAIRDEF(R0, R11)
+PAIRDEF(R0, R12)
+PAIRDEF(R0, LR)
+PAIRSTK(R0, STK)
+
+PAIRBEG(R1)
+PAIRDEF(R1, R0)
+PAIRDEF(R1, R2)
+PAIRDEF(R1, R3)
+PAIRDEF(R1, R4)
+PAIRDEF(R1, R5)
+PAIRDEF(R1, R6)
+PAIRDEF(R1, R7)
+PAIRDEF(R1, R8)
+PAIRDEF(R1, R9)
+PAIRDEF(R1, R10)
+PAIRDEF(R1, R11)
+PAIRDEF(R1, R12)
+PAIRDEF(R1, LR)
+PAIRSTK(R1, STK)
+
+PAIRBEG(R2)
+PAIRDEF(R2, R0)
+PAIRDEF(R2, R1)
+PAIRDEF(R2, R3)
+PAIRDEF(R2, R4)
+PAIRDEF(R2, R5)
+PAIRDEF(R2, R6)
+PAIRDEF(R2, R7)
+PAIRDEF(R2, R8)
+PAIRDEF(R2, R9)
+PAIRDEF(R2, R10)
+PAIRDEF(R2, R11)
+PAIRDEF(R2, R12)
+PAIRDEF(R2, LR)
+PAIRSTK(R2, STK)
+
+PAIRBEG(R3)
+PAIRDEF(R3, R0)
+PAIRDEF(R3, R1)
+PAIRDEF(R3, R2)
+PAIRDEF(R3, R4)
+PAIRDEF(R3, R5)
+PAIRDEF(R3, R6)
+PAIRDEF(R3, R7)
+PAIRDEF(R3, R8)
+PAIRDEF(R3, R9)
+PAIRDEF(R3, R10)
+PAIRDEF(R3, R11)
+PAIRDEF(R3, R12)
+PAIRDEF(R3, LR)
+PAIRSTK(R3, STK)
+
+PAIRBEG(R4)
+PAIRDEF(R4, R0)
+PAIRDEF(R4, R1)
+PAIRDEF(R4, R2)
+PAIRDEF(R4, R3)
+PAIRDEF(R4, R5)
+PAIRDEF(R4, R6)
+PAIRDEF(R4, R7)
+PAIRDEF(R4, R8)
+PAIRDEF(R4, R9)
+PAIRDEF(R4, R10)
+PAIRDEF(R4, R11)
+PAIRDEF(R4, R12)
+PAIRDEF(R4, LR)
+PAIRSTK(R4, STK)
+
+PAIRBEG(R5)
+PAIRDEF(R5, R0)
+PAIRDEF(R5, R1)
+PAIRDEF(R5, R2)
+PAIRDEF(R5, R3)
+PAIRDEF(R5, R4)
+PAIRDEF(R5, R6)
+PAIRDEF(R5, R7)
+PAIRDEF(R5, R8)
+PAIRDEF(R5, R9)
+PAIRDEF(R5, R10)
+PAIRDEF(R5, R11)
+PAIRDEF(R5, R12)
+PAIRDEF(R5, LR)
+PAIRSTK(R5, STK)
+
+PAIRBEG(R6)
+PAIRDEF(R6, R0)
+PAIRDEF(R6, R1)
+PAIRDEF(R6, R2)
+PAIRDEF(R6, R3)
+PAIRDEF(R6, R4)
+PAIRDEF(R6, R5)
+PAIRDEF(R6, R7)
+PAIRDEF(R6, R8)
+PAIRDEF(R6, R9)
+PAIRDEF(R6, R10)
+PAIRDEF(R6, R11)
+PAIRDEF(R6, R12)
+PAIRDEF(R6, LR)
+PAIRSTK(R6, STK)
+
+PAIRBEG(R7)
+PAIRDEF(R7, R0)
+PAIRDEF(R7, R1)
+PAIRDEF(R7, R2)
+PAIRDEF(R7, R3)
+PAIRDEF(R7, R4)
+PAIRDEF(R7, R5)
+PAIRDEF(R7, R6)
+PAIRDEF(R7, R8)
+PAIRDEF(R7, R9)
+PAIRDEF(R7, R10)
+PAIRDEF(R7, R11)
+PAIRDEF(R7, R12)
+PAIRDEF(R7, LR)
+PAIRSTK(R7, STK)
+
+PAIRBEG(R8)
+PAIRDEF(R8, R0)
+PAIRDEF(R8, R1)
+PAIRDEF(R8, R2)
+PAIRDEF(R8, R3)
+PAIRDEF(R8, R4)
+PAIRDEF(R8, R5)
+PAIRDEF(R8, R6)
+PAIRDEF(R8, R7)
+PAIRDEF(R8, R9)
+PAIRDEF(R8, R10)
+PAIRDEF(R8, R11)
+PAIRDEF(R8, R12)
+PAIRDEF(R8, LR)
+PAIRSTK(R8, STK)
+
+PAIRBEG(R9)
+PAIRDEF(R9, R0)
+PAIRDEF(R9, R1)
+PAIRDEF(R9, R2)
+PAIRDEF(R9, R3)
+PAIRDEF(R9, R4)
+PAIRDEF(R9, R5)
+PAIRDEF(R9, R6)
+PAIRDEF(R9, R7)
+PAIRDEF(R9, R8)
+PAIRDEF(R9, R10)
+PAIRDEF(R9, R11)
+PAIRDEF(R9, R12)
+PAIRDEF(R9, LR)
+PAIRSTK(R9, STK)
+
+PAIRBEG(R10)
+PAIRDEF(R10, R0)
+PAIRDEF(R10, R1)
+PAIRDEF(R10, R2)
+PAIRDEF(R10, R3)
+PAIRDEF(R10, R4)
+PAIRDEF(R10, R5)
+PAIRDEF(R10, R6)
+PAIRDEF(R10, R7)
+PAIRDEF(R10, R8)
+PAIRDEF(R10, R9)
+PAIRDEF(R10, R11)
+PAIRDEF(R10, R12)
+PAIRDEF(R10, LR)
+PAIRSTK(R10, STK)
+
+PAIRBEG(R11)
+PAIRDEF(R11, R0)
+PAIRDEF(R11, R1)
+PAIRDEF(R11, R2)
+PAIRDEF(R11, R3)
+PAIRDEF(R11, R4)
+PAIRDEF(R11, R5)
+PAIRDEF(R11, R6)
+PAIRDEF(R11, R7)
+PAIRDEF(R11, R8)
+PAIRDEF(R11, R9)
+PAIRDEF(R11, R10)
+PAIRDEF(R11, R12)
+PAIRDEF(R11, LR)
+PAIRSTK(R11, STK)
+
+PAIRBEG(R12)
+PAIRDEF(R12, R0)
+PAIRDEF(R12, R1)
+PAIRDEF(R12, R2)
+PAIRDEF(R12, R3)
+PAIRDEF(R12, R4)
+PAIRDEF(R12, R5)
+PAIRDEF(R12, R6)
+PAIRDEF(R12, R7)
+PAIRDEF(R12, R8)
+PAIRDEF(R12, R9)
+PAIRDEF(R12, R10)
+PAIRDEF(R12, R11)
+PAIRDEF(R12, LR)
+PAIRSTK(R12, STK)
+
+PAIRBEG(LR)
+PAIRDEF(LR, R0)
+PAIRDEF(LR, R1)
+PAIRDEF(LR, R2)
+PAIRDEF(LR, R3)
+PAIRDEF(LR, R4)
+PAIRDEF(LR, R5)
+PAIRDEF(LR, R6)
+PAIRDEF(LR, R7)
+PAIRDEF(LR, R8)
+PAIRDEF(LR, R9)
+PAIRDEF(LR, R10)
+PAIRDEF(LR, R11)
+PAIRDEF(LR, R12)
+PAIRSTK(LR, STK)
+
+PAIRBEG(STK)
+PAIRSTK(STK, R0)
+PAIRSTK(STK, R1)
+PAIRSTK(STK, R2)
+PAIRSTK(STK, R3)
+PAIRSTK(STK, R4)
+PAIRSTK(STK, R5)
+PAIRSTK(STK, R6)
+PAIRSTK(STK, R7)
+PAIRSTK(STK, R8)
+PAIRSTK(STK, R9)
+PAIRSTK(STK, R10)
+PAIRSTK(STK, R11)
+PAIRSTK(STK, R12)
+PAIRSTK(STK, LR)
+
+#endif
+
+/*****************************************************************************/
+
+#undef PAIRBEG
+#undef PAIRDEF
+#undef PAIRSTK
+
+/*****************************************************************************/
diff --git a/src/jit/regset.cpp b/src/jit/regset.cpp
new file mode 100644
index 0000000000..2980f96813
--- /dev/null
+++ b/src/jit/regset.cpp
@@ -0,0 +1,3777 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                           RegSet                                          XX
+XX                                                                           XX
+XX  Represents the register set, and their states during code generation     XX
+XX  Can select an unused register, keeps track of the contents of the        XX
+XX  registers, and can spill registers                                       XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "emit.h"
+
+/*****************************************************************************/
+
+#ifdef _TARGET_ARM64_
+const regMaskSmall regMasks[] = {
+#define REGDEF(name, rnum, mask, xname, wname) mask,
+#include "register.h"
+};
+#else // !_TARGET_ARM64_
+const regMaskSmall regMasks[] = {
+#define REGDEF(name, rnum, mask, sname) mask,
+#include "register.h"
+};
+#endif
+
+#ifdef _TARGET_X86_
+const regMaskSmall regFPMasks[] = {
+#define REGDEF(name, rnum, mask, sname) mask,
+#include "registerfp.h"
+};
+#endif // _TARGET_X86_
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                          RegSet                                           XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+void RegSet::rsClearRegsModified()
+{
+#ifndef LEGACY_BACKEND
+    assert(m_rsCompiler->lvaDoneFrameLayout < Compiler::FINAL_FRAME_LAYOUT);
+#endif // !LEGACY_BACKEND
+
+#ifdef DEBUG
+    if (m_rsCompiler->verbose)
+    {
+        printf("Clearing modified regs.\n");
+    }
+    rsModifiedRegsMaskInitialized = true;
+#endif // DEBUG
+
+    rsModifiedRegsMask = RBM_NONE;
+}
+
+void RegSet::rsSetRegsModified(regMaskTP mask DEBUGARG(bool suppressDump))
+{
+    assert(mask != RBM_NONE);
+    assert(rsModifiedRegsMaskInitialized);
+
+#ifndef LEGACY_BACKEND
+    // We can't update the modified registers set after final frame layout (that is, during code
+    // generation and after). Ignore prolog and epilog generation: they call register tracking to
+    // modify rbp, for example, even in functions that use rbp as a frame pointer. Make sure normal
+    // code generation isn't actually adding to set of modified registers.
+    // Frame layout is only affected by callee-saved registers, so only ensure that callee-saved
+    // registers aren't modified after final frame layout.
+    assert((m_rsCompiler->lvaDoneFrameLayout < Compiler::FINAL_FRAME_LAYOUT) || m_rsCompiler->compGeneratingProlog ||
+           m_rsCompiler->compGeneratingEpilog ||
+           (((rsModifiedRegsMask | mask) & RBM_CALLEE_SAVED) == (rsModifiedRegsMask & RBM_CALLEE_SAVED)));
+#endif // !LEGACY_BACKEND
+
+#ifdef DEBUG
+    if (m_rsCompiler->verbose && !suppressDump)
+    {
+        if (rsModifiedRegsMask != (rsModifiedRegsMask | mask))
+        {
+            printf("Marking regs modified: ");
+            dspRegMask(mask);
+            printf(" (");
+            dspRegMask(rsModifiedRegsMask);
+            printf(" => ");
+            dspRegMask(rsModifiedRegsMask | mask);
+            printf(")\n");
+        }
+    }
+#endif // DEBUG
+
+    rsModifiedRegsMask |= mask;
+}
+
+void RegSet::rsRemoveRegsModified(regMaskTP mask)
+{
+    assert(mask != RBM_NONE);
+    assert(rsModifiedRegsMaskInitialized);
+
+#ifndef LEGACY_BACKEND
+    // See comment in rsSetRegsModified().
+    assert((m_rsCompiler->lvaDoneFrameLayout < Compiler::FINAL_FRAME_LAYOUT) || m_rsCompiler->compGeneratingProlog ||
+           m_rsCompiler->compGeneratingEpilog ||
+           (((rsModifiedRegsMask & ~mask) & RBM_CALLEE_SAVED) == (rsModifiedRegsMask & RBM_CALLEE_SAVED)));
+#endif // !LEGACY_BACKEND
+
+#ifdef DEBUG
+    if (m_rsCompiler->verbose)
+    {
+        printf("Removing modified regs: ");
+        dspRegMask(mask);
+        if (rsModifiedRegsMask == (rsModifiedRegsMask & ~mask))
+        {
+            printf(" (unchanged)");
+        }
+        else
+        {
+            printf(" (");
+            dspRegMask(rsModifiedRegsMask);
+            printf(" => ");
+            dspRegMask(rsModifiedRegsMask & ~mask);
+            printf(")");
+        }
+        printf("\n");
+    }
+#endif // DEBUG
+
+    rsModifiedRegsMask &= ~mask;
+}
+
+void RegSet::SetMaskVars(regMaskTP newMaskVars)
+{
+#ifdef DEBUG
+    if (m_rsCompiler->verbose)
+    {
+        printf("\t\t\t\t\t\t\tLive regs: ");
+        if (_rsMaskVars == newMaskVars)
+        {
+            printf("(unchanged) ");
+        }
+        else
+        {
+            printRegMaskInt(_rsMaskVars);
+            m_rsCompiler->getEmitter()->emitDispRegSet(_rsMaskVars);
+            printf(" => ");
+        }
+        printRegMaskInt(newMaskVars);
+        m_rsCompiler->getEmitter()->emitDispRegSet(newMaskVars);
+        printf("\n");
+    }
+#endif // DEBUG
+
+    _rsMaskVars = newMaskVars;
+}
+
+#ifdef DEBUG
+
+RegSet::rsStressRegsType RegSet::rsStressRegs()
+{
+#ifndef LEGACY_BACKEND
+    return RS_STRESS_NONE;
+#else  // LEGACY_BACKEND
+    rsStressRegsType val = (rsStressRegsType)JitConfig.JitStressRegs();
+    if (val == RS_STRESS_NONE && m_rsCompiler->compStressCompile(Compiler::STRESS_REGS, 15))
+        val = RS_PICK_BAD_REG;
+    return val;
+#endif // LEGACY_BACKEND
+}
+#endif // DEBUG
+
+#ifdef LEGACY_BACKEND
+/*****************************************************************************
+ *  Includes 'includeHint' if 'regs' is empty
+ */
+
+regMaskTP RegSet::rsUseIfZero(regMaskTP regs, regMaskTP includeHint)
+{
+    return regs ? regs : includeHint;
+}
+
+/*****************************************************************************
+ *  Excludes 'excludeHint' if it results in a non-empty mask
+ */
+
+regMaskTP RegSet::rsExcludeHint(regMaskTP regs, regMaskTP excludeHint)
+{
+    regMaskTP OKmask = regs & ~excludeHint;
+    return OKmask ? OKmask : regs;
+}
+
+/*****************************************************************************
+ *  Narrows choice by 'narrowHint' if it results in a non-empty mask
+ */
+
+regMaskTP RegSet::rsNarrowHint(regMaskTP regs, regMaskTP narrowHint)
+{
+    regMaskTP narrowed = regs & narrowHint;
+    return narrowed ? narrowed : regs;
+}
+
+/*****************************************************************************
+ *  Excludes 'exclude' from regs if non-zero, or from RBM_ALLINT
+ */
+
+regMaskTP RegSet::rsMustExclude(regMaskTP regs, regMaskTP exclude)
+{
+    // Try to exclude from current set
+    regMaskTP OKmask = regs & ~exclude;
+
+    // If current set wont work, exclude from RBM_ALLINT
+    if (OKmask == RBM_NONE)
+        OKmask = (RBM_ALLINT & ~exclude);
+
+    assert(OKmask);
+
+    return OKmask;
+}
+
+/*****************************************************************************
+ *
+ *  The following returns a mask that yields all free registers.
+ */
+
+// inline
+regMaskTP RegSet::rsRegMaskFree()
+{
+    /* Any register that is locked must also be marked as 'used' */
+
+    assert((rsMaskUsed & rsMaskLock) == rsMaskLock);
+
+    /* Any register that isn't used and doesn't hold a variable is free */
+
+    return RBM_ALLINT & ~(rsMaskUsed | rsMaskVars | rsMaskResvd);
+}
+
+/*****************************************************************************
+ *
+ *  The following returns a mask of registers that may be grabbed.
+ */
+
+// inline
+regMaskTP RegSet::rsRegMaskCanGrab()
+{
+    /* Any register that is locked must also be marked as 'used' */
+
+    assert((rsMaskUsed & rsMaskLock) == rsMaskLock);
+
+    /* Any register that isn't locked and doesn't hold a var can be grabbed */
+
+    regMaskTP result = (RBM_ALLINT & ~(rsMaskLock | rsMaskVars));
+
+#ifdef _TARGET_ARM_
+
+    // On the ARM when we pass structs in registers we set the rsUsedTree[]
+    // to be the full TYP_STRUCT tree, which doesn't allow us to spill/unspill
+    // these argument registers.  To fix JitStress issues that can occur
+    // when rsPickReg tries to spill one of these registers we just remove them
+    // from the set of registers that we can grab
+    //
+    regMaskTP structArgMask = RBM_NONE;
+    // Load all the variable arguments in registers back to their registers.
+    for (regNumber reg = REG_ARG_FIRST; reg <= REG_ARG_LAST; reg = REG_NEXT(reg))
+    {
+        GenTreePtr regHolds = rsUsedTree[reg];
+        if ((regHolds != NULL) && (regHolds->TypeGet() == TYP_STRUCT))
+        {
+            structArgMask |= genRegMask(reg);
+        }
+    }
+    result &= ~structArgMask;
+#endif
+
+    return result;
+}
+
+/*****************************************************************************
+ *
+ *  Pick a free register. It is guaranteed that a register is available.
+ *  Note that rsPickReg() can spill a register, whereas rsPickFreeReg() will not.
+ */
+
+// inline
+regNumber RegSet::rsPickFreeReg(regMaskTP regMaskHint)
+{
+    regMaskTP freeRegs = rsRegMaskFree();
+    assert(freeRegs != RBM_NONE);
+
+    regMaskTP regs = rsNarrowHint(freeRegs, regMaskHint);
+
+    return rsGrabReg(regs);
+}
+
+/*****************************************************************************
+ *
+ *  Mark the given set of registers as used and locked.
+ */
+
+// inline
+void RegSet::rsLockReg(regMaskTP regMask)
+{
+    /* Must not be already marked as either used or locked */
+
+    assert((rsMaskUsed & regMask) == 0);
+    rsMaskUsed |= regMask;
+    assert((rsMaskLock & regMask) == 0);
+    rsMaskLock |= regMask;
+}
+
+/*****************************************************************************
+ *
+ *  Mark an already used set of registers as locked.
+ */
+
+// inline
+void RegSet::rsLockUsedReg(regMaskTP regMask)
+{
+    /* Must not be already marked as locked. Must be already marked as used. */
+
+    assert((rsMaskLock & regMask) == 0);
+    assert((rsMaskUsed & regMask) == regMask);
+
+    rsMaskLock |= regMask;
+}
+
+/*****************************************************************************
+ *
+ *  Mark the given set of registers as no longer used/locked.
+ */
+
+// inline
+void RegSet::rsUnlockReg(regMaskTP regMask)
+{
+    /* Must be currently marked as both used and locked */
+
+    assert((rsMaskUsed & regMask) == regMask);
+    rsMaskUsed -= regMask;
+    assert((rsMaskLock & regMask) == regMask);
+    rsMaskLock -= regMask;
+}
+
+/*****************************************************************************
+ *
+ *  Mark the given set of registers as no longer locked.
+ */
+
+// inline
+void RegSet::rsUnlockUsedReg(regMaskTP regMask)
+{
+    /* Must be currently marked as both used and locked */
+
+    assert((rsMaskUsed & regMask) == regMask);
+    assert((rsMaskLock & regMask) == regMask);
+    rsMaskLock -= regMask;
+}
+
+/*****************************************************************************
+ *
+ *  Mark the given set of registers as used and locked. It may already have
+ *  been marked as used.
+ */
+
+// inline
+void RegSet::rsLockReg(regMaskTP regMask, regMaskTP* usedMask)
+{
+    /* Is it already marked as used? */
+
+    regMaskTP used   = (rsMaskUsed & regMask);
+    regMaskTP unused = (regMask & ~used);
+
+    if (used)
+        rsLockUsedReg(used);
+
+    if (unused)
+        rsLockReg(unused);
+
+    *usedMask = used;
+}
+
+/*****************************************************************************
+ *
+ *  Mark the given set of registers as no longer
+ */
+
+// inline
+void RegSet::rsUnlockReg(regMaskTP regMask, regMaskTP usedMask)
+{
+    regMaskTP unused = (regMask & ~usedMask);
+
+    if (usedMask)
+        rsUnlockUsedReg(usedMask);
+
+    if (unused)
+        rsUnlockReg(unused);
+}
+#endif // LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ *  Assume all registers contain garbage (called at start of codegen and when
+ *  we encounter a code label).
+ */
+
+// inline
+void RegTracker::rsTrackRegClr()
+{
+    assert(RV_TRASH == 0);
+    memset(rsRegValues, 0, sizeof(rsRegValues));
+}
+
+/*****************************************************************************
+ *
+ *  Trash the rsRegValues associated with a register
+ */
+
+// inline
+void RegTracker::rsTrackRegTrash(regNumber reg)
+{
+    /* Keep track of which registers we ever touch */
+
+    regSet->rsSetRegsModified(genRegMask(reg));
+
+    /* Record the new value for the register */
+
+    rsRegValues[reg].rvdKind = RV_TRASH;
+}
+
+/*****************************************************************************
+ *
+ *  calls rsTrackRegTrash on the set of registers in regmask
+ */
+
+// inline
+void RegTracker::rsTrackRegMaskTrash(regMaskTP regMask)
+{
+    regMaskTP regBit = 1;
+
+    for (regNumber regNum = REG_FIRST; regNum < REG_COUNT; regNum = REG_NEXT(regNum), regBit <<= 1)
+    {
+        if (regBit > regMask)
+        {
+            break;
+        }
+
+        if (regBit & regMask)
+        {
+            rsTrackRegTrash(regNum);
+        }
+    }
+}
+
+/*****************************************************************************/
+
+// inline
+void RegTracker::rsTrackRegIntCns(regNumber reg, ssize_t val)
+{
+    assert(genIsValidIntReg(reg));
+
+    /* Keep track of which registers we ever touch */
+
+    regSet->rsSetRegsModified(genRegMask(reg));
+
+    /* Record the new value for the register */
+
+    rsRegValues[reg].rvdKind      = RV_INT_CNS;
+    rsRegValues[reg].rvdIntCnsVal = val;
+}
+
+/*****************************************************************************/
+
+// inline
+void RegTracker::rsTrackRegLclVarLng(regNumber reg, unsigned var, bool low)
+{
+    assert(genIsValidIntReg(reg));
+
+    if (compiler->lvaTable[var].lvAddrExposed)
+    {
+        return;
+    }
+
+    /* Keep track of which registers we ever touch */
+
+    regSet->rsSetRegsModified(genRegMask(reg));
+
+    /* Record the new value for the register */
+
+    rsRegValues[reg].rvdKind      = (low ? RV_LCL_VAR_LNG_LO : RV_LCL_VAR_LNG_HI);
+    rsRegValues[reg].rvdLclVarNum = var;
+}
+
+/*****************************************************************************/
+
+// inline
+bool RegTracker::rsTrackIsLclVarLng(regValKind rvKind)
+{
+    if (compiler->opts.MinOpts() || compiler->opts.compDbgCode)
+    {
+        return false;
+    }
+
+    if (rvKind == RV_LCL_VAR_LNG_LO || rvKind == RV_LCL_VAR_LNG_HI)
+    {
+        return true;
+    }
+    else
+    {
+        return false;
+    }
+}
+
+/*****************************************************************************/
+
+// inline
+void RegTracker::rsTrackRegClsVar(regNumber reg, GenTreePtr clsVar)
+{
+    rsTrackRegTrash(reg);
+}
+
+/*****************************************************************************/
+
+// inline
+void RegTracker::rsTrackRegAssign(GenTree* op1, GenTree* op2)
+{
+    /* Constant/bitvalue has precedence over local */
+    switch (rsRegValues[op2->gtRegNum].rvdKind)
+    {
+        case RV_INT_CNS:
+            break;
+
+        default:
+
+            /* Mark RHS register as containing the value */
+
+            switch (op1->gtOper)
+            {
+                case GT_LCL_VAR:
+                    rsTrackRegLclVar(op2->gtRegNum, op1->gtLclVarCommon.gtLclNum);
+                    break;
+                case GT_CLS_VAR:
+                    rsTrackRegClsVar(op2->gtRegNum, op1);
+                    break;
+                default:
+                    break;
+            }
+    }
+}
+
+#ifdef LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ *  Given a regmask, find the best regPairNo that can be formed
+ *  or return REG_PAIR_NONE if no register pair can be formed
+ */
+
+regPairNo RegSet::rsFindRegPairNo(regMaskTP regAllowedMask)
+{
+    regPairNo regPair;
+
+    // Remove any special purpose registers such as SP, EBP, etc...
+    regMaskTP specialUseMask = (rsMaskResvd | RBM_SPBASE);
+#if ETW_EBP_FRAMED
+    specialUseMask |= RBM_FPBASE;
+#else
+    if (m_rsCompiler->codeGen->isFramePointerUsed())
+        specialUseMask |= RBM_FPBASE;
+#endif
+
+    regAllowedMask &= ~specialUseMask;
+
+    /* Check if regAllowedMask has zero or one bits set */
+    if ((regAllowedMask & (regAllowedMask - 1)) == 0)
+    {
+        /* If so we won't be able to find a reg pair */
+        return REG_PAIR_NONE;
+    }
+
+#ifdef _TARGET_X86_
+    if (regAllowedMask & RBM_EAX)
+    {
+        /* EAX is available, see if we can pair it with another reg */
+
+        if (regAllowedMask & RBM_EDX)
+        {
+            regPair = REG_PAIR_EAXEDX;
+            goto RET;
+        }
+        if (regAllowedMask & RBM_ECX)
+        {
+            regPair = REG_PAIR_EAXECX;
+            goto RET;
+        }
+        if (regAllowedMask & RBM_EBX)
+        {
+            regPair = REG_PAIR_EAXEBX;
+            goto RET;
+        }
+        if (regAllowedMask & RBM_ESI)
+        {
+            regPair = REG_PAIR_EAXESI;
+            goto RET;
+        }
+        if (regAllowedMask & RBM_EDI)
+        {
+            regPair = REG_PAIR_EAXEDI;
+            goto RET;
+        }
+        if (regAllowedMask & RBM_EBP)
+        {
+            regPair = REG_PAIR_EAXEBP;
+            goto RET;
+        }
+    }
+
+    if (regAllowedMask & RBM_ECX)
+    {
+        /* ECX is available, see if we can pair it with another reg */
+
+        if (regAllowedMask & RBM_EDX)
+        {
+            regPair = REG_PAIR_ECXEDX;
+            goto RET;
+        }
+        if (regAllowedMask & RBM_EBX)
+        {
+            regPair = REG_PAIR_ECXEBX;
+            goto RET;
+        }
+        if (regAllowedMask & RBM_ESI)
+        {
+            regPair = REG_PAIR_ECXESI;
+            goto RET;
+        }
+        if (regAllowedMask & RBM_EDI)
+        {
+            regPair = REG_PAIR_ECXEDI;
+            goto RET;
+        }
+        if (regAllowedMask & RBM_EBP)
+        {
+            regPair = REG_PAIR_ECXEBP;
+            goto RET;
+        }
+    }
+
+    if (regAllowedMask & RBM_EDX)
+    {
+        /* EDX is available, see if we can pair it with another reg */
+
+        if (regAllowedMask & RBM_EBX)
+        {
+            regPair = REG_PAIR_EDXEBX;
+            goto RET;
+        }
+        if (regAllowedMask & RBM_ESI)
+        {
+            regPair = REG_PAIR_EDXESI;
+            goto RET;
+        }
+        if (regAllowedMask & RBM_EDI)
+        {
+            regPair = REG_PAIR_EDXEDI;
+            goto RET;
+        }
+        if (regAllowedMask & RBM_EBP)
+        {
+            regPair = REG_PAIR_EDXEBP;
+            goto RET;
+        }
+    }
+
+    if (regAllowedMask & RBM_EBX)
+    {
+        /* EBX is available, see if we can pair it with another reg */
+
+        if (regAllowedMask & RBM_ESI)
+        {
+            regPair = REG_PAIR_EBXESI;
+            goto RET;
+        }
+        if (regAllowedMask & RBM_EDI)
+        {
+            regPair = REG_PAIR_EBXEDI;
+            goto RET;
+        }
+        if (regAllowedMask & RBM_EBP)
+        {
+            regPair = REG_PAIR_EBXEBP;
+            goto RET;
+        }
+    }
+
+    if (regAllowedMask & RBM_ESI)
+    {
+        /* ESI is available, see if we can pair it with another reg */
+
+        if (regAllowedMask & RBM_EDI)
+        {
+            regPair = REG_PAIR_ESIEDI;
+            goto RET;
+        }
+        if (regAllowedMask & RBM_EBP)
+        {
+            regPair = REG_PAIR_EBPESI;
+            goto RET;
+        }
+    }
+
+    if (regAllowedMask & RBM_EDI)
+    {
+        /* EDI is available, see if we can pair it with another reg */
+
+        if (regAllowedMask & RBM_EBP)
+        {
+            regPair = REG_PAIR_EBPEDI;
+            goto RET;
+        }
+    }
+#endif
+
+#ifdef _TARGET_ARM_
+    // ARM is symmetric, so don't bother to prefer some pairs to others
+    //
+    // Iterate the registers in the order specified by rpRegTmpOrder/raRegTmpOrder
+
+    for (unsigned index1 = 0; index1 < REG_TMP_ORDER_COUNT; index1++)
+    {
+        regNumber reg1;
+        if (m_rsCompiler->rpRegAllocDone)
+            reg1 = raRegTmpOrder[index1];
+        else
+            reg1 = rpRegTmpOrder[index1];
+
+        regMaskTP reg1Mask = genRegMask(reg1);
+
+        if ((regAllowedMask & reg1Mask) == 0)
+            continue;
+
+        for (unsigned index2 = index1 + 1; index2 < REG_TMP_ORDER_COUNT; index2++)
+        {
+            regNumber reg2;
+            if (m_rsCompiler->rpRegAllocDone)
+                reg2 = raRegTmpOrder[index2];
+            else
+                reg2 = rpRegTmpOrder[index2];
+
+            regMaskTP reg2Mask = genRegMask(reg2);
+
+            if ((regAllowedMask & reg2Mask) == 0)
+                continue;
+
+            regMaskTP pairMask = genRegMask(reg1) | genRegMask(reg2);
+
+            // if reg1 is larger than reg2 then swap the registers
+            if (reg1 > reg2)
+            {
+                regNumber regT = reg1;
+                reg1           = reg2;
+                reg2           = regT;
+            }
+
+            regPair = gen2regs2pair(reg1, reg2);
+            return regPair;
+        }
+    }
+#endif
+
+    assert(!"Unreachable code");
+    regPair = REG_PAIR_NONE;
+
+#ifdef _TARGET_X86_
+RET:
+#endif
+
+    return regPair;
+}
+
+#endif // LEGACY_BACKEND
+
+/*****************************************************************************/
+
+RegSet::RegSet(Compiler* compiler, GCInfo& gcInfo) : m_rsCompiler(compiler), m_rsGCInfo(gcInfo)
+{
+    /* Initialize the spill logic */
+
+    rsSpillInit();
+
+    /* Initialize the argument register count */
+    // TODO-Cleanup: Consider moving intRegState and floatRegState to RegSet.  They used
+    // to be initialized here, but are now initialized in the CodeGen constructor.
+    // intRegState.rsCurRegArgNum   = 0;
+    // loatRegState.rsCurRegArgNum = 0;
+
+    rsMaskResvd = RBM_NONE;
+
+#ifdef LEGACY_BACKEND
+    rsMaskMult = RBM_NONE;
+    rsMaskUsed = RBM_NONE;
+    rsMaskLock = RBM_NONE;
+#endif // LEGACY_BACKEND
+
+#ifdef _TARGET_ARMARCH_
+    rsMaskCalleeSaved = RBM_NONE;
+#endif // _TARGET_ARMARCH_
+
+#ifdef _TARGET_ARM_
+    rsMaskPreSpillRegArg = RBM_NONE;
+    rsMaskPreSpillAlign  = RBM_NONE;
+#endif
+
+#ifdef DEBUG
+    rsModifiedRegsMaskInitialized = false;
+#endif // DEBUG
+}
+
+#ifdef LEGACY_BACKEND
+/*****************************************************************************
+ *
+ *  Marks the register that holds the given operand value as 'used'. If 'addr'
+ *  is non-zero, the register is part of a complex address mode that needs to
+ *  be marked if the register is ever spilled.
+ */
+
+void RegSet::rsMarkRegUsed(GenTreePtr tree, GenTreePtr addr)
+{
+    var_types type;
+    regNumber regNum;
+    regMaskTP regMask;
+
+    /* The value must be sitting in a register */
+
+    assert(tree);
+    assert(tree->gtFlags & GTF_REG_VAL);
+
+    type   = tree->TypeGet();
+    regNum = tree->gtRegNum;
+
+    if (isFloatRegType(type))
+        regMask = genRegMaskFloat(regNum, type);
+    else
+        regMask = genRegMask(regNum);
+
+#ifdef DEBUG
+    if (m_rsCompiler->verbose)
+    {
+        printf("\t\t\t\t\t\t\tThe register %s currently holds ", m_rsCompiler->compRegVarName(regNum));
+        Compiler::printTreeID(tree);
+        if (addr != NULL)
+        {
+            printf("/");
+            Compiler::printTreeID(addr);
+        }
+        else if (tree->gtOper == GT_CNS_INT)
+        {
+            if (tree->IsIconHandle())
+                printf(" / Handle(0x%08p)", dspPtr(tree->gtIntCon.gtIconVal));
+            else
+                printf(" / Constant(0x%X)", tree->gtIntCon.gtIconVal);
+        }
+        printf("]\n");
+    }
+#endif // DEBUG
+
+    /* Remember whether the register holds a pointer */
+
+    m_rsGCInfo.gcMarkRegPtrVal(regNum, type);
+
+    /* No locked register may ever be marked as free */
+
+    assert((rsMaskLock & rsRegMaskFree()) == 0);
+
+    /* Is the register used by two different values simultaneously? */
+
+    if (regMask & rsMaskUsed)
+    {
+        /* Save the preceding use information */
+
+        rsRecMultiReg(regNum, type);
+    }
+
+    /* Set the register's bit in the 'used' bitset */
+
+    rsMaskUsed |= regMask;
+
+    /* Remember what values are in what registers, in case we have to spill */
+    assert(regNum != REG_SPBASE);
+    assert(rsUsedTree[regNum] == NULL);
+    rsUsedTree[regNum] = tree;
+    assert(rsUsedAddr[regNum] == NULL);
+    rsUsedAddr[regNum] = addr;
+}
+
+void RegSet::rsMarkArgRegUsedByPromotedFieldArg(GenTreePtr promotedStructArg, regNumber regNum, bool isGCRef)
+{
+    regMaskTP regMask;
+
+    /* The value must be sitting in a register */
+
+    assert(promotedStructArg);
+    assert(promotedStructArg->TypeGet() == TYP_STRUCT);
+
+    assert(regNum < MAX_REG_ARG);
+    regMask = genRegMask(regNum);
+    assert((regMask & RBM_ARG_REGS) != RBM_NONE);
+
+#ifdef DEBUG
+    if (m_rsCompiler->verbose)
+    {
+        printf("\t\t\t\t\t\t\tThe register %s currently holds ", m_rsCompiler->compRegVarName(regNum));
+        Compiler::printTreeID(promotedStructArg);
+        if (promotedStructArg->gtOper == GT_CNS_INT)
+        {
+            if (promotedStructArg->IsIconHandle())
+                printf(" / Handle(0x%08p)", dspPtr(promotedStructArg->gtIntCon.gtIconVal));
+            else
+                printf(" / Constant(0x%X)", promotedStructArg->gtIntCon.gtIconVal);
+        }
+        printf("]\n");
+    }
+#endif
+
+    /* Remember whether the register holds a pointer */
+
+    m_rsGCInfo.gcMarkRegPtrVal(regNum, (isGCRef ? TYP_REF : TYP_INT));
+
+    /* No locked register may ever be marked as free */
+
+    assert((rsMaskLock & rsRegMaskFree()) == 0);
+
+    /* Is the register used by two different values simultaneously? */
+
+    if (regMask & rsMaskUsed)
+    {
+        /* Save the preceding use information */
+
+        assert(isValidIntArgReg(regNum)); // We are expecting only integer argument registers here
+        rsRecMultiReg(regNum, TYP_I_IMPL);
+    }
+
+    /* Set the register's bit in the 'used' bitset */
+
+    rsMaskUsed |= regMask;
+
+    /* Remember what values are in what registers, in case we have to spill */
+    assert(regNum != REG_SPBASE);
+    assert(rsUsedTree[regNum] == 0);
+    rsUsedTree[regNum] = promotedStructArg;
+}
+
+/*****************************************************************************
+ *
+ *  Marks the register pair that holds the given operand value as 'used'.
+ */
+
+void RegSet::rsMarkRegPairUsed(GenTreePtr tree)
+{
+    regNumber regLo;
+    regNumber regHi;
+    regPairNo regPair;
+    regMaskTP regMask;
+
+    /* The value must be sitting in a register */
+
+    assert(tree);
+#if CPU_HAS_FP_SUPPORT
+    assert(tree->gtType == TYP_LONG);
+#else
+    assert(tree->gtType == TYP_LONG || tree->gtType == TYP_DOUBLE);
+#endif
+    assert(tree->gtFlags & GTF_REG_VAL);
+
+    regPair = tree->gtRegPair;
+    regMask = genRegPairMask(regPair);
+
+    regLo = genRegPairLo(regPair);
+    regHi = genRegPairHi(regPair);
+
+#ifdef DEBUG
+    if (m_rsCompiler->verbose)
+    {
+        printf("\t\t\t\t\t\t\tThe register %s currently holds \n", m_rsCompiler->compRegVarName(regLo));
+        Compiler::printTreeID(tree);
+        printf("/lo32\n");
+        printf("\t\t\t\t\t\t\tThe register %s currently holds \n", m_rsCompiler->compRegVarName(regHi));
+        Compiler::printTreeID(tree);
+        printf("/hi32\n");
+    }
+#endif
+
+    /* Neither register obviously holds a pointer value */
+
+    m_rsGCInfo.gcMarkRegSetNpt(regMask);
+
+    /* No locked register may ever be marked as free */
+
+    assert((rsMaskLock & rsRegMaskFree()) == 0);
+
+    /* Are the registers used by two different values simultaneously? */
+
+    if (rsMaskUsed & genRegMask(regLo))
+    {
+        /* Save the preceding use information */
+
+        rsRecMultiReg(regLo, TYP_INT);
+    }
+
+    if (rsMaskUsed & genRegMask(regHi))
+    {
+        /* Save the preceding use information */
+
+        rsRecMultiReg(regHi, TYP_INT);
+    }
+
+    /* Can't mark a register pair more than once as used */
+
+    // assert((regMask & rsMaskUsed) == 0);
+
+    /* Mark the registers as 'used' */
+
+    rsMaskUsed |= regMask;
+
+    /* Remember what values are in what registers, in case we have to spill */
+
+    if (regLo != REG_STK)
+    {
+        assert(rsUsedTree[regLo] == 0);
+        assert(regLo != REG_SPBASE);
+        rsUsedTree[regLo] = tree;
+    }
+
+    if (regHi != REG_STK)
+    {
+        assert(rsUsedTree[regHi] == 0);
+        assert(regHi != REG_SPBASE);
+        rsUsedTree[regHi] = tree;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Returns true if the given tree is currently held in reg.
+ *  Note that reg may by used by multiple trees, in which case we have
+ *  to search rsMultiDesc[reg].
+ */
+
+bool RegSet::rsIsTreeInReg(regNumber reg, GenTreePtr tree)
+{
+    /* First do the trivial check */
+
+    if (rsUsedTree[reg] == tree)
+        return true;
+
+    /* If the register is used by multiple trees, we have to search the list
+       in rsMultiDesc[reg] */
+
+    if (genRegMask(reg) & rsMaskMult)
+    {
+        SpillDsc* multiDesc = rsMultiDesc[reg];
+        assert(multiDesc);
+
+        for (/**/; multiDesc; multiDesc = multiDesc->spillNext)
+        {
+            if (multiDesc->spillTree == tree)
+                return true;
+
+            assert((!multiDesc->spillNext) == (!multiDesc->spillMoreMultis));
+        }
+    }
+
+    /* Not found. It must be spilled */
+
+    return false;
+}
+#endif // LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ *  Finds the SpillDsc corresponding to 'tree' assuming it was spilled from 'reg'.
+ */
+
+RegSet::SpillDsc* RegSet::rsGetSpillInfo(GenTreePtr tree,
+                                         regNumber  reg,
+                                         SpillDsc** pPrevDsc
+#ifdef LEGACY_BACKEND
+                                         ,
+                                         SpillDsc** pMultiDsc
+#endif // LEGACY_BACKEND
+                                         )
+{
+    /* Normally, trees are unspilled in the order of being spilled due to
+       the post-order walking of trees during code-gen. However, this will
+       not be true for something like a GT_ARR_ELEM node */
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef LEGACY_BACKEND
+    SpillDsc* multi = rsSpillDesc[reg];
+#endif // LEGACY_BACKEND
+
+    SpillDsc* prev;
+    SpillDsc* dsc;
+    for (prev = nullptr, dsc = rsSpillDesc[reg]; dsc != nullptr; prev = dsc, dsc = dsc->spillNext)
+    {
+#ifdef LEGACY_BACKEND
+        if (prev && !prev->spillMoreMultis)
+            multi = dsc;
+#endif // LEGACY_BACKEND
+
+        if (dsc->spillTree == tree)
+        {
+            break;
+        }
+    }
+
+    if (pPrevDsc)
+    {
+        *pPrevDsc = prev;
+    }
+#ifdef LEGACY_BACKEND
+    if (pMultiDsc)
+        *pMultiDsc = multi;
+#endif // LEGACY_BACKEND
+
+    return dsc;
+}
+
+#ifdef LEGACY_BACKEND
+/*****************************************************************************
+ *
+ *  Mark the register set given by the register mask as not used.
+ */
+
+void RegSet::rsMarkRegFree(regMaskTP regMask)
+{
+    /* Are we freeing any multi-use registers? */
+
+    if (regMask & rsMaskMult)
+    {
+        rsMultRegFree(regMask);
+        return;
+    }
+
+    m_rsGCInfo.gcMarkRegSetNpt(regMask);
+
+    regMaskTP regBit = 1;
+
+    for (regNumber regNum = REG_FIRST; regNum < REG_COUNT; regNum = REG_NEXT(regNum), regBit <<= 1)
+    {
+        if (regBit > regMask)
+            break;
+
+        if (regBit & regMask)
+        {
+#ifdef DEBUG
+            if (m_rsCompiler->verbose)
+            {
+                printf("\t\t\t\t\t\t\tThe register %s no longer holds ", m_rsCompiler->compRegVarName(regNum));
+                Compiler::printTreeID(rsUsedTree[regNum]);
+                Compiler::printTreeID(rsUsedAddr[regNum]);
+                printf("\n");
+            }
+#endif
+            GenTreePtr usedTree = rsUsedTree[regNum];
+            assert(usedTree != NULL);
+            rsUsedTree[regNum] = NULL;
+            rsUsedAddr[regNum] = NULL;
+#ifdef _TARGET_ARM_
+            if (usedTree->TypeGet() == TYP_DOUBLE)
+            {
+                regNum = REG_NEXT(regNum);
+                regBit <<= 1;
+
+                assert(regBit & regMask);
+                assert(rsUsedTree[regNum] == NULL);
+                assert(rsUsedAddr[regNum] == NULL);
+            }
+#endif
+        }
+    }
+
+    /* Remove the register set from the 'used' set */
+
+    assert((regMask & rsMaskUsed) == regMask);
+    rsMaskUsed -= regMask;
+
+    /* No locked register may ever be marked as free */
+
+    assert((rsMaskLock & rsRegMaskFree()) == 0);
+}
+
+/*****************************************************************************
+ *
+ *  Free the register from the given tree. If the register holds other tree,
+ *  it will still be marked as used, else it will be completely free.
+ */
+
+void RegSet::rsMarkRegFree(regNumber reg, GenTreePtr tree)
+{
+    assert(rsIsTreeInReg(reg, tree));
+    regMaskTP regMask = genRegMask(reg);
+
+    /* If the register is not multi-used, it's easy. Just do the default work */
+
+    if (!(regMask & rsMaskMult))
+    {
+        rsMarkRegFree(regMask);
+        return;
+    }
+
+    /* The tree is multi-used. We just have to free it off the given tree but
+       leave other trees which use the register as they are. The register may
+       not be multi-used after freeing it from the given tree */
+
+    /* Is the tree in rsUsedTree[] or in rsMultiDesc[]?
+       If it is in rsUsedTree[], update rsUsedTree[] */
+
+    if (rsUsedTree[reg] == tree)
+    {
+        rsRmvMultiReg(reg);
+        return;
+    }
+
+    /* The tree is in rsMultiDesc[] instead of in rsUsedTree[]. Find the desc
+       corresponding to the tree and just remove it from there */
+
+    for (SpillDsc *multiDesc = rsMultiDesc[reg], *prevDesc = NULL; multiDesc;
+         prevDesc = multiDesc, multiDesc = multiDesc->spillNext)
+    {
+        /* If we find the descriptor with the tree we are looking for,
+           discard it */
+
+        if (multiDesc->spillTree != tree)
+            continue;
+
+        if (prevDesc == NULL)
+        {
+            /* The very first desc in rsMultiDesc[] matched. If there are
+               no further descs, then the register is no longer multi-used */
+
+            if (!multiDesc->spillMoreMultis)
+                rsMaskMult -= regMask;
+
+            rsMultiDesc[reg] = multiDesc->spillNext;
+        }
+        else
+        {
+            /* There are a couple of other descs before the match. So the
+               register is still multi-used. However, we may have to
+               update spillMoreMultis for the previous desc. */
+
+            if (!multiDesc->spillMoreMultis)
+                prevDesc->spillMoreMultis = false;
+
+            prevDesc->spillNext = multiDesc->spillNext;
+        }
+
+        SpillDsc::freeDsc(this, multiDesc);
+
+#ifdef DEBUG
+        if (m_rsCompiler->verbose)
+        {
+            printf("\t\t\t\t\t\t\tRegister %s multi-use dec for ", m_rsCompiler->compRegVarName(reg));
+            Compiler::printTreeID(tree);
+            printf(" - now ");
+            Compiler::printTreeID(rsUsedTree[reg]);
+            printf(" multMask=" REG_MASK_ALL_FMT "\n", rsMaskMult);
+        }
+#endif
+
+        return;
+    }
+
+    assert(!"Didn't find the spilled tree in rsMultiDesc[]");
+}
+
+/*****************************************************************************
+ *
+ *  Mark the register set given by the register mask as not used; there may
+ *  be some 'multiple-use' registers in the set.
+ */
+
+void RegSet::rsMultRegFree(regMaskTP regMask)
+{
+    /* Free any multiple-use registers first */
+    regMaskTP nonMultMask = regMask & ~rsMaskMult;
+    regMaskTP myMultMask  = regMask & rsMaskMult;
+
+    if (myMultMask)
+    {
+        regNumber regNum;
+        regMaskTP regBit;
+
+        for (regNum = REG_FIRST, regBit = 1; regNum < REG_COUNT; regNum = REG_NEXT(regNum), regBit <<= 1)
+        {
+            if (regBit > myMultMask)
+                break;
+
+            if (regBit & myMultMask)
+            {
+                /* Free the multi-use register 'regNum' */
+                var_types type = rsRmvMultiReg(regNum);
+#ifdef _TARGET_ARM_
+                if (genIsValidFloatReg(regNum) && (type == TYP_DOUBLE))
+                {
+                    // On ARM32, We skip the second register for a TYP_DOUBLE
+                    regNum = REG_NEXT(regNum);
+                    regBit <<= 1;
+                }
+#endif // _TARGET_ARM_
+            }
+        }
+    }
+
+    /* If there are any single-use registers, free them */
+
+    if (nonMultMask)
+        rsMarkRegFree(nonMultMask);
+}
+
+/*****************************************************************************
+ *
+ *  Returns the number of registers that are currently free which appear in needReg.
+ */
+
+unsigned RegSet::rsFreeNeededRegCount(regMaskTP needReg)
+{
+    regMaskTP regNeededFree = rsRegMaskFree() & needReg;
+    unsigned  cntFree       = 0;
+
+    /* While some registers are free ... */
+
+    while (regNeededFree)
+    {
+        /* Remove the next register bit and bump the count */
+
+        regNeededFree -= genFindLowestBit(regNeededFree);
+        cntFree += 1;
+    }
+
+    return cntFree;
+}
+#endif // LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ *  Record the fact that the given register now contains the given local
+ *  variable. Pointers are handled specially since reusing the register
+ *  will extend the lifetime of a pointer register which is not a register
+ *  variable.
+ */
+
+void RegTracker::rsTrackRegLclVar(regNumber reg, unsigned var)
+{
+    LclVarDsc* varDsc = &compiler->lvaTable[var];
+    assert(reg != REG_STK);
+#if CPU_HAS_FP_SUPPORT
+    assert(varTypeIsFloating(varDsc->TypeGet()) == false);
+#endif
+    // Kill the register before doing anything in case we take a
+    // shortcut out of here
+    rsRegValues[reg].rvdKind = RV_TRASH;
+
+    if (compiler->lvaTable[var].lvAddrExposed)
+    {
+        return;
+    }
+
+    /* Keep track of which registers we ever touch */
+
+    regSet->rsSetRegsModified(genRegMask(reg));
+
+#if REDUNDANT_LOAD
+
+    /* Is the variable a pointer? */
+
+    if (varTypeIsGC(varDsc->TypeGet()))
+    {
+        /* Don't track pointer register vars */
+
+        if (varDsc->lvRegister)
+        {
+            return;
+        }
+
+        /* Don't track when fully interruptible */
+
+        if (compiler->genInterruptible)
+        {
+            return;
+        }
+    }
+    else if (varDsc->lvNormalizeOnLoad())
+    {
+        return;
+    }
+
+#endif
+
+#ifdef DEBUG
+    if (compiler->verbose)
+    {
+        printf("\t\t\t\t\t\t\tThe register %s now holds V%02u\n", compiler->compRegVarName(reg), var);
+    }
+#endif
+
+    /* Record the new value for the register. ptr var needed for
+     * lifetime extension
+     */
+
+    rsRegValues[reg].rvdKind = RV_LCL_VAR;
+
+    // If this is a cast of a 64 bit int, then we must have the low 32 bits.
+    if (genActualType(varDsc->TypeGet()) == TYP_LONG)
+    {
+        rsRegValues[reg].rvdKind = RV_LCL_VAR_LNG_LO;
+    }
+
+    rsRegValues[reg].rvdLclVarNum = var;
+}
+
+/*****************************************************************************/
+
+void RegTracker::rsTrackRegSwap(regNumber reg1, regNumber reg2)
+{
+    RegValDsc tmp;
+
+    tmp               = rsRegValues[reg1];
+    rsRegValues[reg1] = rsRegValues[reg2];
+    rsRegValues[reg2] = tmp;
+}
+
+void RegTracker::rsTrackRegCopy(regNumber reg1, regNumber reg2)
+{
+    /* Keep track of which registers we ever touch */
+
+    assert(reg1 < REG_COUNT);
+    assert(reg2 < REG_COUNT);
+
+    regSet->rsSetRegsModified(genRegMask(reg1));
+
+    rsRegValues[reg1] = rsRegValues[reg2];
+}
+
+#ifdef LEGACY_BACKEND
+
+/*****************************************************************************
+ *  One of the operands of this complex address mode has been spilled
+ */
+
+void rsAddrSpillOper(GenTreePtr addr)
+{
+    if (addr)
+    {
+        assert(addr->gtOper == GT_IND || addr->gtOper == GT_ARR_ELEM || addr->gtOper == GT_LEA ||
+               addr->gtOper == GT_CMPXCHG);
+
+        // GTF_SPILLED_OP2 says "both operands have been spilled"
+        assert((addr->gtFlags & GTF_SPILLED_OP2) == 0);
+
+        if ((addr->gtFlags & GTF_SPILLED_OPER) == 0)
+            addr->gtFlags |= GTF_SPILLED_OPER;
+        else
+            addr->gtFlags |= GTF_SPILLED_OP2;
+    }
+}
+
+void rsAddrUnspillOper(GenTreePtr addr)
+{
+    if (addr)
+    {
+        assert(addr->gtOper == GT_IND || addr->gtOper == GT_ARR_ELEM || addr->gtOper == GT_LEA ||
+               addr->gtOper == GT_CMPXCHG);
+
+        assert((addr->gtFlags & GTF_SPILLED_OPER) != 0);
+
+        // Both operands spilled? */
+        if ((addr->gtFlags & GTF_SPILLED_OP2) != 0)
+            addr->gtFlags &= ~GTF_SPILLED_OP2;
+        else
+            addr->gtFlags &= ~GTF_SPILLED_OPER;
+    }
+}
+
+void RegSet::rsSpillRegIfUsed(regNumber reg)
+{
+    if (rsMaskUsed & genRegMask(reg))
+    {
+        rsSpillReg(reg);
+    }
+}
+
+#endif // LEGACY_BACKEND
+
+//------------------------------------------------------------
+// rsSpillTree: Spill the tree held in 'reg'.
+//
+// Arguments:
+//   reg     -   Register of tree node that is to be spilled
+//   tree    -   GenTree node that is being spilled
+//   regIdx  -   Register index identifying the specific result
+//               register of a multi-reg call node. For single-reg
+//               producing tree nodes its value is zero.
+//
+// Return Value:
+//   None.
+//
+// Assumption:
+//    RyuJIT backend specific: in case of multi-reg call nodes, GTF_SPILL
+//    flag associated with the reg that is being spilled is cleared.  The
+//    caller of this method is expected to clear GTF_SPILL flag on call
+//    node after all of its registers marked for spilling are spilled.
+//
+void RegSet::rsSpillTree(regNumber reg, GenTreePtr tree, unsigned regIdx /* =0 */)
+{
+    assert(tree != nullptr);
+
+    GenTreeCall* call = nullptr;
+    var_types    treeType;
+
+#ifndef LEGACY_BACKEND
+    if (tree->IsMultiRegCall())
+    {
+        call                        = tree->AsCall();
+        ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+        treeType                    = retTypeDesc->GetReturnRegType(regIdx);
+    }
+    else
+#endif
+    {
+        treeType = tree->TypeGet();
+    }
+
+    var_types tempType = Compiler::tmpNormalizeType(treeType);
+    regMaskTP mask;
+    bool      floatSpill = false;
+
+    if (isFloatRegType(treeType))
+    {
+        floatSpill = true;
+        mask       = genRegMaskFloat(reg, treeType);
+    }
+    else
+    {
+        mask = genRegMask(reg);
+    }
+
+    rsNeededSpillReg = true;
+
+#ifdef LEGACY_BACKEND
+    // The register we're spilling must be used but not locked
+    // or an enregistered variable.
+
+    assert((mask & rsMaskUsed) == mask);
+    assert((mask & rsMaskLock) == 0);
+    assert((mask & rsMaskVars) == 0);
+#endif // LEGACY_BACKEND
+
+#ifndef LEGACY_BACKEND
+    // We should only be spilling nodes marked for spill,
+    // vars should be handled elsewhere, and to prevent
+    // spilling twice clear GTF_SPILL flag on tree node.
+    //
+    // In case of multi-reg call nodes only the spill flag
+    // associated with the reg is cleared. Spill flag on
+    // call node should be cleared by the caller of this method.
+    assert(tree->gtOper != GT_REG_VAR);
+    assert((tree->gtFlags & GTF_SPILL) != 0);
+
+    unsigned regFlags = 0;
+    if (call != nullptr)
+    {
+        regFlags = call->GetRegSpillFlagByIdx(regIdx);
+        assert((regFlags & GTF_SPILL) != 0);
+        regFlags &= ~GTF_SPILL;
+    }
+    else
+    {
+        assert(!varTypeIsMultiReg(tree));
+        tree->gtFlags &= ~GTF_SPILL;
+    }
+#endif // !LEGACY_BACKEND
+
+#if CPU_LONG_USES_REGPAIR
+    // Are we spilling a part of a register pair?
+    if (treeType == TYP_LONG)
+    {
+        tempType = TYP_I_IMPL;
+        assert(genRegPairLo(tree->gtRegPair) == reg || genRegPairHi(tree->gtRegPair) == reg);
+    }
+    else
+    {
+        assert(tree->gtFlags & GTF_REG_VAL);
+        assert(tree->gtRegNum == reg);
+    }
+#else
+    assert(tree->InReg());
+    assert(tree->gtRegNum == reg || (call != nullptr && call->GetRegNumByIdx(regIdx) == reg));
+#endif // CPU_LONG_USES_REGPAIR
+
+    // Are any registers free for spillage?
+    SpillDsc* spill = SpillDsc::alloc(m_rsCompiler, this, tempType);
+
+    // Grab a temp to store the spilled value
+    TempDsc* temp    = m_rsCompiler->tmpGetTemp(tempType);
+    spill->spillTemp = temp;
+    tempType         = temp->tdTempType();
+
+    // Remember what it is we have spilled
+    spill->spillTree = tree;
+#ifdef LEGACY_BACKEND
+    spill->spillAddr = rsUsedAddr[reg];
+#endif // LEGACY_BACKEND
+
+#ifdef DEBUG
+    if (m_rsCompiler->verbose)
+    {
+        printf("\t\t\t\t\t\t\tThe register %s spilled with    ", m_rsCompiler->compRegVarName(reg));
+        Compiler::printTreeID(spill->spillTree);
+#ifdef LEGACY_BACKEND
+        printf("/");
+        Compiler::printTreeID(spill->spillAddr);
+#endif // LEGACY_BACKEND
+    }
+#endif
+
+#ifdef LEGACY_BACKEND
+    // Is the register part of a complex address mode?
+    rsAddrSpillOper(rsUsedAddr[reg]);
+#endif // LEGACY_BACKEND
+
+    // 'lastDsc' is 'spill' for simple cases, and will point to the last
+    // multi-use descriptor if 'reg' is being multi-used
+    SpillDsc* lastDsc = spill;
+
+#ifdef LEGACY_BACKEND
+    if ((rsMaskMult & mask) == 0)
+    {
+        spill->spillMoreMultis = false;
+    }
+    else
+    {
+        // The register is being multi-used and will have entries in
+        // rsMultiDesc[reg]. Spill all of them (ie. move them to
+        // rsSpillDesc[reg]).
+        // When we unspill the reg, they will all be moved back to
+        // rsMultiDesc[].
+
+        spill->spillMoreMultis = true;
+
+        SpillDsc* nextDsc = rsMultiDesc[reg];
+
+        do
+        {
+            assert(nextDsc != nullptr);
+
+            // Is this multi-use part of a complex address mode?
+            rsAddrSpillOper(nextDsc->spillAddr);
+
+            // Mark the tree node as having been spilled
+            rsMarkSpill(nextDsc->spillTree, reg);
+
+            // lastDsc points to the last of the multi-spill descrs for 'reg'
+            nextDsc->spillTemp = temp;
+
+#ifdef DEBUG
+            if (m_rsCompiler->verbose)
+            {
+                printf(", ");
+                Compiler::printTreeID(nextDsc->spillTree);
+                printf("/");
+                Compiler::printTreeID(nextDsc->spillAddr);
+            }
+#endif
+
+            lastDsc->spillNext = nextDsc;
+            lastDsc            = nextDsc;
+
+            nextDsc = nextDsc->spillNext;
+        } while (lastDsc->spillMoreMultis);
+
+        rsMultiDesc[reg] = nextDsc;
+
+        // 'reg' is no longer considered to be multi-used. We will set this
+        // mask again when this value gets unspilled
+        rsMaskMult &= ~mask;
+    }
+#endif // LEGACY_BACKEND
+
+    // Insert the spill descriptor(s) in the list
+    lastDsc->spillNext = rsSpillDesc[reg];
+    rsSpillDesc[reg]   = spill;
+
+#ifdef DEBUG
+    if (m_rsCompiler->verbose)
+    {
+        printf("\n");
+    }
+#endif
+
+    // Generate the code to spill the register
+    var_types storeType = floatSpill ? treeType : tempType;
+
+    m_rsCompiler->codeGen->spillReg(storeType, temp, reg);
+
+    // Mark the tree node as having been spilled
+    rsMarkSpill(tree, reg);
+
+#ifdef LEGACY_BACKEND
+    // The register is now free
+    rsMarkRegFree(mask);
+#else
+    // In case of multi-reg call node also mark the specific
+    // result reg as spilled.
+    if (call != nullptr)
+    {
+        regFlags |= GTF_SPILLED;
+        call->SetRegSpillFlagByIdx(regFlags, regIdx);
+    }
+#endif //! LEGACY_BACKEND
+}
+
+#if defined(_TARGET_X86_) && !FEATURE_STACK_FP_X87
+/*****************************************************************************
+*
+*  Spill the top of the FP x87 stack.
+*/
+void RegSet::rsSpillFPStack(GenTreePtr tree)
+{
+    SpillDsc* spill;
+    TempDsc*  temp;
+    var_types treeType = tree->TypeGet();
+
+    assert(tree->OperGet() == GT_CALL);
+    spill = SpillDsc::alloc(m_rsCompiler, this, treeType);
+
+    /* Grab a temp to store the spilled value */
+
+    spill->spillTemp = temp = m_rsCompiler->tmpGetTemp(treeType);
+
+    /* Remember what it is we have spilled */
+
+    spill->spillTree  = tree;
+    SpillDsc* lastDsc = spill;
+
+    regNumber reg      = tree->gtRegNum;
+    lastDsc->spillNext = rsSpillDesc[reg];
+    rsSpillDesc[reg]   = spill;
+
+#ifdef DEBUG
+    if (m_rsCompiler->verbose)
+        printf("\n");
+#endif
+    // m_rsCompiler->codeGen->inst_FS_ST(INS_fstp, emitActualTypeSize(treeType), temp, 0);
+    m_rsCompiler->codeGen->getEmitter()->emitIns_S(INS_fstp, emitActualTypeSize(treeType), temp->tdTempNum(), 0);
+
+    /* Mark the tree node as having been spilled */
+
+    rsMarkSpill(tree, reg);
+}
+#endif // defined(_TARGET_X86_) && !FEATURE_STACK_FP_X87
+
+#ifdef LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ *  Spill the given register (which we assume to be currently marked as used).
+ */
+
+void RegSet::rsSpillReg(regNumber reg)
+{
+    /* We must know the value in the register that we are spilling */
+    GenTreePtr tree = rsUsedTree[reg];
+
+#ifdef _TARGET_ARM_
+    if (tree == NULL && genIsValidFloatReg(reg) && !genIsValidDoubleReg(reg))
+    {
+        reg = REG_PREV(reg);
+        assert(rsUsedTree[reg]);
+        assert(rsUsedTree[reg]->TypeGet() == TYP_DOUBLE);
+        tree = rsUsedTree[reg];
+    }
+#endif
+
+    rsSpillTree(reg, tree);
+
+    /* The register no longer holds its original value */
+
+    rsUsedTree[reg] = NULL;
+}
+
+/*****************************************************************************
+ *
+ *  Spill all registers in 'regMask' that are currently marked as used.
+ */
+
+void RegSet::rsSpillRegs(regMaskTP regMask)
+{
+    /* The registers we're spilling must not be locked,
+       or enregistered variables */
+
+    assert((regMask & rsMaskLock) == 0);
+    assert((regMask & rsMaskVars) == 0);
+
+    /* Only spill what's currently marked as used */
+
+    regMask &= rsMaskUsed;
+    assert(regMask);
+
+    regNumber regNum;
+    regMaskTP regBit;
+
+    for (regNum = REG_FIRST, regBit = 1; regNum < REG_COUNT; regNum = REG_NEXT(regNum), regBit <<= 1)
+    {
+        if (regMask & regBit)
+        {
+            rsSpillReg(regNum);
+
+            regMask &= rsMaskUsed;
+
+            if (!regMask)
+                break;
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  The following table determines the order in which registers are considered
+ *  for internal tree temps to live in
+ */
+
+extern const regNumber raRegTmpOrder[] = {REG_TMP_ORDER};
+extern const regNumber rpRegTmpOrder[] = {REG_PREDICT_ORDER};
+#if FEATURE_FP_REGALLOC
+extern const regNumber raRegFltTmpOrder[] = {REG_FLT_TMP_ORDER};
+#endif
+
+/*****************************************************************************
+ *
+ *  Choose a register from the given set in the preferred order (see above);
+ *  if no registers are in the set return REG_STK.
+ */
+
+regNumber RegSet::rsPickRegInTmpOrder(regMaskTP regMask)
+{
+    if (regMask == RBM_NONE)
+        return REG_STK;
+
+    bool      firstPass = true;
+    regMaskTP avoidMask =
+        ~rsGetModifiedRegsMask() & RBM_CALLEE_SAVED; // We want to avoid using any new callee saved register
+
+    while (true)
+    {
+        /* Iterate the registers in the order specified by raRegTmpOrder */
+
+        for (unsigned index = 0; index < REG_TMP_ORDER_COUNT; index++)
+        {
+            regNumber candidateReg  = raRegTmpOrder[index];
+            regMaskTP candidateMask = genRegMask(candidateReg);
+
+            // For a FP base frame, don't use FP register.
+            if (m_rsCompiler->codeGen->isFramePointerUsed() && (candidateMask == RBM_FPBASE))
+                continue;
+
+            // For the first pass avoid selecting a never used register when there are other registers available
+            if (firstPass && ((candidateMask & avoidMask) != 0))
+                continue;
+
+            if (regMask & candidateMask)
+                return candidateReg;
+        }
+
+        if (firstPass == true)
+            firstPass = false; // OK, now we are willing to select a never used register
+        else
+            break;
+    }
+
+    return REG_STK;
+}
+
+/*****************************************************************************
+ *  Choose a register from the 'regMask' set and return it. If no registers in
+ *  the set are currently free, one of them will be spilled (even if other
+ *  registers - not in the set - are currently free).
+ *
+ *  If you don't require a register from a particular set, you should use rsPickReg() instead.
+ *
+ *  rsModifiedRegsMask is modified to include the returned register.
+ */
+
+regNumber RegSet::rsGrabReg(regMaskTP regMask)
+{
+    regMaskTP OKmask;
+    regNumber regNum;
+    regMaskTP regBit;
+
+    assert(regMask);
+    regMask &= ~rsMaskLock;
+    assert(regMask);
+
+    /* See if one of the desired registers happens to be free */
+
+    OKmask = regMask & rsRegMaskFree();
+
+    regNum = rsPickRegInTmpOrder(OKmask);
+    if (REG_STK != regNum)
+    {
+        goto RET;
+    }
+
+    /* We'll have to spill one of the registers in 'regMask' */
+
+    OKmask = regMask & rsRegMaskCanGrab();
+    assert(OKmask);
+
+    for (regNum = REG_FIRST, regBit = 1; (regBit & OKmask) == 0; regNum = REG_NEXT(regNum), regBit <<= 1)
+    {
+        if (regNum >= REG_COUNT)
+        {
+            assert(!"no register to grab!");
+            NO_WAY("Could not grab a register, Predictor should have prevented this!");
+        }
+    }
+
+    /* This will be the victim -- spill it */
+    rsSpillReg(regNum);
+
+    /* Make sure we did find a register to spill */
+    assert(genIsValidReg(regNum));
+
+RET:
+    /* Keep track of which registers we ever touch */
+    rsSetRegsModified(genRegMask(regNum));
+    return regNum;
+}
+
+/*****************************************************************************
+ *  Find a register to use and return it, spilling if necessary.
+ *
+ *  Look for a register in the following order: First, try and find a free register
+ *  in 'regBest' (if 'regBest' is RBM_NONE, skip this step). Second, try to find a
+ *  free register in 'regMask' (if 'regMask' is RBM_NONE, skip this step). Note that
+ *  'regBest' doesn't need to be a subset of 'regMask'. Third, find any free
+ *  register. Fourth, spill a register. The register to spill will be in 'regMask',
+ *  if 'regMask' is not RBM_NONE.
+ *
+ *  Note that 'regMask' and 'regBest' are purely recommendations, and can be ignored;
+ *  the caller can't expect that the returned register will be in those sets. In
+ *  particular, under register stress, we specifically will pick registers not in
+ *  these sets to ensure that callers don't require a register from those sets
+ *  (and to ensure callers can handle the spilling that might ensue).
+ *
+ *  Calling rsPickReg() with the default arguments (which sets 'regMask' and 'regBest' to RBM_NONE)
+ *  is equivalent to calling rsGrabReg(rsRegMaskFree()).
+ *
+ *  rsModifiedRegsMask is modified to include the returned register.
+ */
+
+regNumber RegSet::rsPickReg(regMaskTP regMask, regMaskTP regBest)
+{
+    regNumber regNum;
+    regMaskTP spillMask;
+    regMaskTP canGrabMask;
+
+#ifdef DEBUG
+    if (rsStressRegs() >= 1)
+    {
+        /* 'regMask' is purely a recommendation, and callers should be
+           able to handle the case where it is not satisfied.
+           The logic here tries to return ~regMask to check that all callers
+           are prepared to handle such a case */
+
+        regMaskTP badRegs = rsMaskMult & rsRegMaskCanGrab();
+
+        badRegs = rsUseIfZero(badRegs, rsMaskUsed & rsRegMaskCanGrab());
+        badRegs = rsUseIfZero(badRegs, rsRegMaskCanGrab());
+        badRegs = rsExcludeHint(badRegs, regMask);
+
+        assert(badRegs != RBM_NONE);
+
+        return rsGrabReg(badRegs);
+    }
+
+#endif
+
+    regMaskTP freeMask = rsRegMaskFree();
+
+AGAIN:
+
+    /* By default we'd prefer to accept all available registers */
+
+    regMaskTP OKmask = freeMask;
+
+    // OKmask = rsNarrowHint(OKmask, rsUselessRegs());
+
+    /* Is there a 'best' register set? */
+
+    if (regBest)
+    {
+        OKmask &= regBest;
+        if (OKmask)
+            goto TRY_REG;
+        else
+            goto TRY_ALL;
+    }
+
+    /* Was a register set recommended by the caller? */
+
+    if (regMask)
+    {
+        OKmask &= regMask;
+        if (!OKmask)
+            goto TRY_ALL;
+    }
+
+TRY_REG:
+
+    /* Iterate the registers in the order specified by raRegTmpOrder */
+
+    regNum = rsPickRegInTmpOrder(OKmask);
+    if (REG_STK != regNum)
+    {
+        goto RET;
+    }
+
+TRY_ALL:
+
+    /* Were we considering 'regBest' ? */
+
+    if (regBest)
+    {
+        /* 'regBest' is no good -- ignore it and try 'regMask' instead */
+
+        regBest = RBM_NONE;
+        goto AGAIN;
+    }
+
+    /* Now let's consider all available registers */
+
+    /* Were we limited in our consideration? */
+
+    if (!regMask)
+    {
+        /* We need to spill one of the free registers */
+
+        spillMask = freeMask;
+    }
+    else
+    {
+        /* Did we not consider all free registers? */
+
+        if ((regMask & freeMask) != freeMask)
+        {
+            /* The recommended regset didn't work, so try all available regs */
+
+            regNum = rsPickRegInTmpOrder(freeMask);
+            if (REG_STK != regNum)
+                goto RET;
+        }
+
+        /* If we're going to spill, might as well go for the right one */
+
+        spillMask = regMask;
+    }
+
+    /* Make sure we can spill some register. */
+
+    canGrabMask = rsRegMaskCanGrab();
+    if ((spillMask & canGrabMask) == 0)
+        spillMask = canGrabMask;
+
+    assert(spillMask);
+
+    /* We have no choice but to spill one of the regs */
+
+    return rsGrabReg(spillMask);
+
+RET:
+
+    rsSetRegsModified(genRegMask(regNum));
+    return regNum;
+}
+
+#endif // LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ *  Get the temp that was spilled from the given register (and free its
+ *  spill descriptor while we're at it). Returns the temp (i.e. local var)
+ */
+
+TempDsc* RegSet::rsGetSpillTempWord(regNumber reg, SpillDsc* dsc, SpillDsc* prevDsc)
+{
+    assert((prevDsc == nullptr) || (prevDsc->spillNext == dsc));
+
+#ifdef LEGACY_BACKEND
+    /* Is dsc the last of a set of multi-used values */
+
+    if (prevDsc && prevDsc->spillMoreMultis && !dsc->spillMoreMultis)
+        prevDsc->spillMoreMultis = false;
+#endif // LEGACY_BACKEND
+
+    /* Remove this spill entry from the register's list */
+
+    (prevDsc ? prevDsc->spillNext : rsSpillDesc[reg]) = dsc->spillNext;
+
+    /* Remember which temp the value is in */
+
+    TempDsc* temp = dsc->spillTemp;
+
+    SpillDsc::freeDsc(this, dsc);
+
+    /* return the temp variable */
+
+    return temp;
+}
+
+#ifdef LEGACY_BACKEND
+/*****************************************************************************
+ *
+ *  Reload the value that was spilled from the given register (and free its
+ *  spill descriptor while we're at it). Returns the new register (which will
+ *  be a member of 'needReg' if that value is non-zero).
+ *
+ *  'willKeepNewReg' indicates if the caller intends to mark newReg as used.
+ *      If not, then we can't unspill the other multi-used descriptor (if any).
+ *      Instead, we will just hold on to the temp and unspill them
+ *      again as needed.
+ */
+
+regNumber RegSet::rsUnspillOneReg(GenTreePtr tree, regNumber oldReg, KeepReg willKeepNewReg, regMaskTP needReg)
+{
+    /* Was oldReg multi-used when it was spilled? */
+
+    SpillDsc *prevDsc, *multiDsc;
+    SpillDsc* spillDsc = rsGetSpillInfo(tree, oldReg, &prevDsc, &multiDsc);
+    noway_assert((spillDsc != NULL) && (multiDsc != NULL));
+
+    bool multiUsed = multiDsc->spillMoreMultis;
+
+    /* We will use multiDsc to walk the rest of the spill list (if it's
+       multiUsed). As we're going to remove spillDsc from the multiDsc
+       list in the rsGetSpillTempWord() call we have to take care of the
+       case where multiDsc==spillDsc. We will set multiDsc as spillDsc->spillNext */
+    if (multiUsed && multiDsc == spillDsc)
+    {
+        assert(spillDsc->spillNext);
+        multiDsc = spillDsc->spillNext;
+    }
+
+    /* Get the temp and free the spill-descriptor */
+
+    TempDsc* temp = rsGetSpillTempWord(oldReg, spillDsc, prevDsc);
+
+    //  Pick a new home for the value:
+    //    This must be a register matching the 'needReg' mask, if it is non-zero.
+    //    Additionally, if 'oldReg' is in 'needMask' and it is free we will select oldReg.
+    //    Also note that the rsGrabReg() call below may cause the chosen register to be spilled.
+    //
+    regMaskTP prefMask;
+    regMaskTP freeMask;
+    regNumber newReg;
+    var_types regType;
+    var_types loadType;
+
+    bool floatUnspill = false;
+
+#if FEATURE_FP_REGALLOC
+    floatUnspill = genIsValidFloatReg(oldReg);
+#endif
+
+    if (floatUnspill)
+    {
+        if (temp->tdTempType() == TYP_DOUBLE)
+            regType = TYP_DOUBLE;
+        else
+            regType = TYP_FLOAT;
+        loadType    = regType;
+        prefMask    = genRegMaskFloat(oldReg, regType);
+        freeMask    = RegFreeFloat();
+    }
+    else
+    {
+        regType  = TYP_I_IMPL;
+        loadType = temp->tdTempType();
+        prefMask = genRegMask(oldReg);
+        freeMask = rsRegMaskFree();
+    }
+
+    if ((((prefMask & needReg) != 0) || (needReg == 0)) && ((prefMask & freeMask) != 0))
+    {
+        needReg = prefMask;
+    }
+
+    if (floatUnspill)
+    {
+        RegisterPreference pref(RBM_ALLFLOAT, needReg);
+        newReg = PickRegFloat(regType, &pref, true);
+    }
+    else
+    {
+        newReg = rsGrabReg(rsUseIfZero(needReg, RBM_ALLINT));
+    }
+
+    m_rsCompiler->codeGen->trashReg(newReg);
+
+    /* Reload the value from the saved location into the new register */
+
+    m_rsCompiler->codeGen->reloadReg(loadType, temp, newReg);
+
+    if (multiUsed && (willKeepNewReg == KEEP_REG))
+    {
+        /* We will unspill all the other multi-use trees if the register
+           is going to be marked as used. If it is not going to be marked
+           as used, we will have a problem if the new register gets spilled
+           again.
+         */
+
+        /* We don't do the extra unspilling for complex address modes,
+           since someone up the call chain may have a different idea about
+           what registers are used to form the complex address mode (the
+           addrReg return value from genMakeAddressable).
+
+           Also, it is not safe to unspill all the multi-uses with a TYP_LONG.
+
+           Finally, it is not safe to unspill into a different register, because
+           the caller of genMakeAddressable caches the addrReg return value
+           (register mask), but when unspilling into a different register it's
+           not possible to inform the caller that addrReg is now different.
+           See bug #89946 for an example of this.  There is an assert for this
+           in rsMarkRegFree via genDoneAddressable.
+         */
+
+        for (SpillDsc* dsc = multiDsc; /**/; dsc = dsc->spillNext)
+        {
+            if ((oldReg != newReg) || (dsc->spillAddr != NULL) || (dsc->spillTree->gtType == TYP_LONG))
+            {
+                return newReg;
+            }
+
+            if (!dsc->spillMoreMultis)
+            {
+                /* All the remaining multi-uses are fine. We will now
+                   unspill them all */
+                break;
+            }
+        }
+
+        bool       bFound = false;
+        SpillDsc*  pDsc;
+        SpillDsc** ppPrev;
+
+        for (pDsc = rsSpillDesc[oldReg], ppPrev = &rsSpillDesc[oldReg];; pDsc = pDsc->spillNext)
+        {
+            if (pDsc == multiDsc)
+            {
+                // We've found the sequence we were searching for
+                bFound = true;
+            }
+
+            if (bFound)
+            {
+                rsAddrUnspillOper(pDsc->spillAddr);
+
+                // Mark the tree node as having been unspilled into newReg
+                rsMarkUnspill(pDsc->spillTree, newReg);
+            }
+
+            if (!pDsc->spillMoreMultis)
+            {
+                if (bFound)
+                {
+                    // End of sequence
+
+                    // We link remaining sides of list
+                    *ppPrev = pDsc->spillNext;
+
+                    // Exit walk
+                    break;
+                }
+                else
+                {
+                    ppPrev = &(pDsc->spillNext);
+                }
+            }
+        }
+
+        /* pDsc points to the last multi-used descriptor from the spill-list
+           for the current value (pDsc->spillMoreMultis == false) */
+
+        pDsc->spillNext     = rsMultiDesc[newReg];
+        rsMultiDesc[newReg] = multiDsc;
+
+        if (floatUnspill)
+            rsMaskMult |= genRegMaskFloat(newReg, regType);
+        else
+            rsMaskMult |= genRegMask(newReg);
+    }
+
+    /* Free the temp, it's no longer used */
+
+    m_rsCompiler->tmpRlsTemp(temp);
+
+    return newReg;
+}
+#endif // LEGACY_BACKEND
+
+//---------------------------------------------------------------------
+//  rsUnspillInPlace: The given tree operand has been spilled; just mark
+//  it as unspilled so that we can use it as "normal" local.
+//
+//  Arguments:
+//     tree    -  GenTree that needs to be marked as unspilled.
+//     oldReg  -  reg of tree that was spilled.
+//
+//  Return Value:
+//     None.
+//
+//  Assumptions:
+//  1. It is the responsibility of the caller to free the spill temp.
+//  2. RyuJIT backend specific: In case of multi-reg call node
+//     GTF_SPILLED flag associated with reg is cleared.  It is the
+//     responsibility of caller to clear GTF_SPILLED flag on call node
+//     itself after ensuring there are no outstanding regs in GTF_SPILLED
+//     state.
+//
+TempDsc* RegSet::rsUnspillInPlace(GenTreePtr tree, regNumber oldReg, unsigned regIdx /* =0 */)
+{
+    assert(!isRegPairType(tree->gtType));
+
+    // Get the tree's SpillDsc
+    SpillDsc* prevDsc;
+    SpillDsc* spillDsc = rsGetSpillInfo(tree, oldReg, &prevDsc);
+    PREFIX_ASSUME(spillDsc != nullptr);
+
+    // Get the temp
+    TempDsc* temp = rsGetSpillTempWord(oldReg, spillDsc, prevDsc);
+
+    // The value is now unspilled
+    if (tree->IsMultiRegCall())
+    {
+        GenTreeCall* call  = tree->AsCall();
+        unsigned     flags = call->GetRegSpillFlagByIdx(regIdx);
+        flags &= ~GTF_SPILLED;
+        call->SetRegSpillFlagByIdx(flags, regIdx);
+    }
+    else
+    {
+        tree->gtFlags &= ~GTF_SPILLED;
+    }
+
+#ifdef DEBUG
+    if (m_rsCompiler->verbose)
+    {
+        printf("\t\t\t\t\t\t\tTree-Node marked unspilled from  ");
+        Compiler::printTreeID(tree);
+        printf("\n");
+    }
+#endif
+
+    return temp;
+}
+
+#ifdef LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ *  The given tree operand has been spilled; reload it into a register that
+ *  is in 'needReg' (if 'needReg' is RBM_NONE, any register will do). If 'keepReg'
+ *  is set to KEEP_REG, we'll mark the new register as used.
+ */
+
+void RegSet::rsUnspillReg(GenTreePtr tree, regMaskTP needReg, KeepReg keepReg)
+{
+    assert(!isRegPairType(tree->gtType)); // use rsUnspillRegPair()
+    regNumber oldReg = tree->gtRegNum;
+
+    /* Get the SpillDsc for the tree */
+
+    SpillDsc* spillDsc = rsGetSpillInfo(tree, oldReg);
+    PREFIX_ASSUME(spillDsc != NULL);
+
+    /* Before spillDsc is stomped on by rsUnspillOneReg(), note whether
+     * the reg was part of an address mode
+     */
+
+    GenTreePtr unspillAddr = spillDsc->spillAddr;
+
+    /* Pick a new home for the value */
+
+    regNumber newReg = rsUnspillOneReg(tree, oldReg, keepReg, needReg);
+
+    /* Mark the tree node as having been unspilled into newReg */
+
+    rsMarkUnspill(tree, newReg);
+
+    // If this reg was part of a complex address mode, need to clear this flag which
+    // tells address mode building that a component has been spilled
+
+    rsAddrUnspillOper(unspillAddr);
+
+#ifdef DEBUG
+    if (m_rsCompiler->verbose)
+    {
+        printf("\t\t\t\t\t\t\tThe register %s unspilled from  ", m_rsCompiler->compRegVarName(newReg));
+        Compiler::printTreeID(tree);
+        printf("\n");
+    }
+#endif
+
+    /* Mark the new value as used, if the caller desires so */
+
+    if (keepReg == KEEP_REG)
+        rsMarkRegUsed(tree, unspillAddr);
+}
+#endif // LEGACY_BACKEND
+
+void RegSet::rsMarkSpill(GenTreePtr tree, regNumber reg)
+{
+    tree->gtFlags &= ~GTF_REG_VAL;
+    tree->gtFlags |= GTF_SPILLED;
+}
+
+#ifdef LEGACY_BACKEND
+
+void RegSet::rsMarkUnspill(GenTreePtr tree, regNumber reg)
+{
+#ifndef _TARGET_AMD64_
+    assert(tree->gtType != TYP_LONG);
+#endif // _TARGET_AMD64_
+
+    tree->gtFlags |= GTF_REG_VAL;
+    tree->gtFlags &= ~GTF_SPILLED;
+    tree->gtRegNum = reg;
+}
+
+/*****************************************************************************
+ *
+ *  Choose a register pair from the given set (note: only registers in the
+ *  given set will be considered).
+ */
+
+regPairNo RegSet::rsGrabRegPair(regMaskTP regMask)
+{
+    regPairNo regPair;
+    regMaskTP OKmask;
+    regNumber reg1;
+    regNumber reg2;
+
+    assert(regMask);
+    regMask &= ~rsMaskLock;
+    assert(regMask);
+
+    /* We'd prefer to choose a free register pair if possible */
+
+    OKmask = regMask & rsRegMaskFree();
+
+    /* Any takers in the recommended/free set? */
+
+    regPair = rsFindRegPairNo(OKmask);
+
+    if (regPair != REG_PAIR_NONE)
+    {
+        // The normal early exit
+
+        /* Keep track of which registers we ever touch */
+        rsSetRegsModified(genRegPairMask(regPair));
+
+        return regPair;
+    }
+
+    /* We have no choice but to spill one or two used regs */
+
+    if (OKmask)
+    {
+        /* One (and only one) register is free and acceptable - grab it */
+
+        assert(genMaxOneBit(OKmask));
+
+        for (reg1 = REG_INT_FIRST; reg1 <= REG_INT_LAST; reg1 = REG_NEXT(reg1))
+        {
+            if (OKmask & genRegMask(reg1))
+                break;
+        }
+        assert(OKmask & genRegMask(reg1));
+    }
+    else
+    {
+        /* No register is free and acceptable - we'll have to spill two */
+
+        reg1 = rsGrabReg(regMask);
+    }
+
+    /* Temporarily lock the first register so it doesn't go away */
+
+    rsLockReg(genRegMask(reg1));
+
+    /* Now grab another register */
+
+    reg2 = rsGrabReg(regMask);
+
+    /* We can unlock the first register now */
+
+    rsUnlockReg(genRegMask(reg1));
+
+    /* Convert the two register numbers into a pair */
+
+    if (reg1 < reg2)
+        regPair = gen2regs2pair(reg1, reg2);
+    else
+        regPair = gen2regs2pair(reg2, reg1);
+
+    return regPair;
+}
+
+/*****************************************************************************
+ *
+ *  Choose a register pair from the given set (if non-zero) or from the set of
+ *  currently available registers (if 'regMask' is zero).
+ */
+
+regPairNo RegSet::rsPickRegPair(regMaskTP regMask)
+{
+    regMaskTP OKmask;
+    regPairNo regPair;
+
+    int repeat = 0;
+
+    /* By default we'd prefer to accept all available registers */
+
+    OKmask = rsRegMaskFree();
+
+    if (regMask)
+    {
+        /* A register set was recommended by the caller */
+
+        OKmask &= regMask;
+    }
+
+AGAIN:
+
+    regPair = rsFindRegPairNo(OKmask);
+
+    if (regPair != REG_PAIR_NONE)
+    {
+        return regPair; // Normal early exit
+    }
+
+    regMaskTP freeMask;
+    regMaskTP spillMask;
+
+    /* Now let's consider all available registers */
+
+    freeMask = rsRegMaskFree();
+
+    /* Were we limited in our consideration? */
+
+    if (!regMask)
+    {
+        /* We need to spill two of the free registers */
+
+        spillMask = freeMask;
+    }
+    else
+    {
+        /* Did we not consider all free registers? */
+
+        if ((regMask & freeMask) != freeMask && repeat == 0)
+        {
+            /* The recommended regset didn't work, so try all available regs */
+
+            OKmask = freeMask;
+            repeat++;
+            goto AGAIN;
+        }
+
+        /* If we're going to spill, might as well go for the right one */
+
+        spillMask = regMask;
+    }
+
+    /* Make sure that we have at least two bits set */
+
+    if (genMaxOneBit(spillMask & rsRegMaskCanGrab()))
+        spillMask = rsRegMaskCanGrab();
+
+    assert(!genMaxOneBit(spillMask));
+
+    /* We have no choice but to spill 1/2 of the regs */
+
+    return rsGrabRegPair(spillMask);
+}
+
+/*****************************************************************************
+ *
+ *  The given tree operand has been spilled; reload it into a register pair
+ *  that is in 'needReg' (if 'needReg' is RBM_NONE, any register pair will do). If
+ *  'keepReg' is KEEP_REG, we'll mark the new register pair as used. It is
+ *  assumed that the current register pair has been marked as used (modulo
+ *  any spillage, of course).
+ */
+
+void RegSet::rsUnspillRegPair(GenTreePtr tree, regMaskTP needReg, KeepReg keepReg)
+{
+    assert(isRegPairType(tree->gtType));
+
+    regPairNo regPair = tree->gtRegPair;
+    regNumber regLo   = genRegPairLo(regPair);
+    regNumber regHi   = genRegPairHi(regPair);
+
+    /* Has the register holding the lower half been spilled? */
+
+    if (!rsIsTreeInReg(regLo, tree))
+    {
+        /* Is the upper half already in the right place? */
+
+        if (rsIsTreeInReg(regHi, tree))
+        {
+            /* Temporarily lock the high part */
+
+            rsLockUsedReg(genRegMask(regHi));
+
+            /* Pick a new home for the lower half */
+
+            regLo = rsUnspillOneReg(tree, regLo, keepReg, needReg);
+
+            /* We can unlock the high part now */
+
+            rsUnlockUsedReg(genRegMask(regHi));
+        }
+        else
+        {
+            /* Pick a new home for the lower half */
+
+            regLo = rsUnspillOneReg(tree, regLo, keepReg, needReg);
+        }
+    }
+    else
+    {
+        /* Free the register holding the lower half */
+
+        rsMarkRegFree(genRegMask(regLo));
+    }
+
+    if (regHi != REG_STK)
+    {
+        /* Has the register holding the upper half been spilled? */
+
+        if (!rsIsTreeInReg(regHi, tree))
+        {
+            regMaskTP regLoUsed;
+
+            /* Temporarily lock the low part so it doesnt get spilled */
+
+            rsLockReg(genRegMask(regLo), &regLoUsed);
+
+            /* Pick a new home for the upper half */
+
+            regHi = rsUnspillOneReg(tree, regHi, keepReg, needReg);
+
+            /* We can unlock the low register now */
+
+            rsUnlockReg(genRegMask(regLo), regLoUsed);
+        }
+        else
+        {
+            /* Free the register holding the upper half */
+
+            rsMarkRegFree(genRegMask(regHi));
+        }
+    }
+
+    /* The value is now residing in the new register */
+
+    tree->gtFlags |= GTF_REG_VAL;
+    tree->gtFlags &= ~GTF_SPILLED;
+    tree->gtRegPair = gen2regs2pair(regLo, regHi);
+
+    /* Mark the new value as used, if the caller desires so */
+
+    if (keepReg == KEEP_REG)
+        rsMarkRegPairUsed(tree);
+}
+
+/*****************************************************************************
+ *
+ *  The given register is being used by multiple trees (all of which represent
+ *  the same logical value). Happens mainly because of REDUNDANT_LOAD;
+ *  We don't want to really spill the register as it actually holds the
+ *  value we want. But the multiple trees may be part of different
+ *  addressing modes.
+ *  Save the previous 'use' info so that when we return the register will
+ *  appear unused.
+ */
+
+void RegSet::rsRecMultiReg(regNumber reg, var_types type)
+{
+    SpillDsc* spill;
+    regMaskTP regMask;
+
+    if (genIsValidFloatReg(reg) && isFloatRegType(type))
+        regMask = genRegMaskFloat(reg, type);
+    else
+        regMask = genRegMask(reg);
+
+#ifdef DEBUG
+    if (m_rsCompiler->verbose)
+    {
+        printf("\t\t\t\t\t\t\tRegister %s multi-use inc for   ", m_rsCompiler->compRegVarName(reg));
+        Compiler::printTreeID(rsUsedTree[reg]);
+        printf(" multMask=" REG_MASK_ALL_FMT "\n", rsMaskMult | regMask);
+    }
+#endif
+
+    /* The register is supposed to be already used */
+
+    assert(regMask & rsMaskUsed);
+
+    assert(rsUsedTree[reg]);
+
+    /* Allocate/reuse a spill descriptor */
+
+    spill = SpillDsc::alloc(m_rsCompiler, this, rsUsedTree[reg]->TypeGet());
+
+    /* Record the current 'use' info in the spill descriptor */
+
+    spill->spillTree = rsUsedTree[reg];
+    rsUsedTree[reg]  = 0;
+    spill->spillAddr = rsUsedAddr[reg];
+    rsUsedAddr[reg]  = 0;
+
+    /* Remember whether the register is already 'multi-use' */
+
+    spill->spillMoreMultis = ((rsMaskMult & regMask) != 0);
+
+    /* Insert the new multi-use record in the list for the register */
+
+    spill->spillNext = rsMultiDesc[reg];
+    rsMultiDesc[reg] = spill;
+
+    /* This register is now 'multi-use' */
+
+    rsMaskMult |= regMask;
+}
+
+/*****************************************************************************
+ *
+ *  Free the given register, which is known to have multiple uses.
+ */
+
+var_types RegSet::rsRmvMultiReg(regNumber reg)
+{
+    SpillDsc* dsc;
+
+    assert(rsMaskMult & genRegMask(reg));
+
+#ifdef DEBUG
+    if (m_rsCompiler->verbose)
+    {
+        printf("\t\t\t\t\t\t\tRegister %s multi-use dec for   ", m_rsCompiler->compRegVarName(reg));
+        Compiler::printTreeID(rsUsedTree[reg]);
+        printf(" multMask=" REG_MASK_ALL_FMT "\n", rsMaskMult);
+    }
+#endif
+
+    /* Get hold of the spill descriptor for the register */
+
+    dsc = rsMultiDesc[reg];
+    assert(dsc);
+    rsMultiDesc[reg] = dsc->spillNext;
+
+    /* Copy the previous 'use' info from the descriptor */
+
+    assert(reg != REG_SPBASE);
+    rsUsedTree[reg] = dsc->spillTree;
+    rsUsedAddr[reg] = dsc->spillAddr;
+
+    if (!(dsc->spillTree->gtFlags & GTF_SPILLED))
+        m_rsGCInfo.gcMarkRegPtrVal(reg, dsc->spillTree->TypeGet());
+
+    var_types type = dsc->spillTree->TypeGet();
+    regMaskTP regMask;
+
+    if (genIsValidFloatReg(reg) && isFloatRegType(type))
+        regMask = genRegMaskFloat(reg, type);
+    else
+        regMask = genRegMask(reg);
+
+    /* Is only one use of the register left? */
+
+    if (!dsc->spillMoreMultis)
+    {
+        rsMaskMult -= regMask;
+    }
+
+#ifdef DEBUG
+    if (m_rsCompiler->verbose)
+    {
+        printf("\t\t\t\t\t\t\tRegister %s multi-use dec - now ", m_rsCompiler->compRegVarName(reg));
+        Compiler::printTreeID(rsUsedTree[reg]);
+        printf(" multMask=" REG_MASK_ALL_FMT "\n", rsMaskMult);
+    }
+#endif
+
+    SpillDsc::freeDsc(this, dsc);
+    return type;
+}
+#endif // LEGACY_BACKEND
+
+/*****************************************************************************/
+#if REDUNDANT_LOAD
+/*****************************************************************************
+ *
+ *  Search for a register which contains the given constant value.
+ *  Return success/failure and set the register if success.
+ *  If the closeDelta argument is non-NULL then look for a
+ *  register that has a close constant value. For ARM, find
+ *  the closest register value, independent of constant delta.
+ *  For non-ARM, only consider values that are within -128..+127.
+ *  If one is found, *closeDelta is set to the difference that needs
+ *  to be added to the register returned. On x86/amd64, an lea instruction
+ *  is used to set the target register using the register that
+ *  contains the close integer constant.
+ */
+
+regNumber RegTracker::rsIconIsInReg(ssize_t val, ssize_t* closeDelta /* = NULL */)
+{
+    regNumber closeReg = REG_NA;
+
+    if (compiler->opts.MinOpts() || compiler->opts.compDbgCode)
+    {
+        return REG_NA;
+    }
+
+    for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg))
+    {
+        if (rsRegValues[reg].rvdKind == RV_INT_CNS)
+        {
+            ssize_t regCnsVal = rsRegValues[reg].rvdIntCnsVal;
+            if (regCnsVal == val)
+            {
+                if (closeDelta)
+                {
+                    *closeDelta = 0;
+                }
+                return reg;
+            }
+            if (closeDelta)
+            {
+#ifdef _TARGET_ARM_
+                // Find the smallest delta; the caller checks the size
+                // TODO-CQ: find the smallest delta from a low register?
+                //       That is, is it better to return a high register with a
+                //       small constant delta, or a low register with
+                //       a larger offset? It's better to have a low register with an offset within the low register
+                //       range, or a high register otherwise...
+
+                ssize_t regCnsDelta = val - regCnsVal;
+                if ((closeReg == REG_NA) || (unsigned_abs(regCnsDelta) < unsigned_abs(*closeDelta)))
+                {
+                    closeReg    = reg;
+                    *closeDelta = regCnsDelta;
+                }
+#else
+                if (closeReg == REG_NA)
+                {
+                    ssize_t regCnsDelta = val - regCnsVal;
+                    /* Does delta fit inside a byte [-128..127] */
+                    if (regCnsDelta == (signed char)regCnsDelta)
+                    {
+                        closeReg    = reg;
+                        *closeDelta = (int)regCnsDelta;
+                    }
+                }
+#endif
+            }
+        }
+    }
+
+    /* There was not an exact match */
+
+    return closeReg; /* will always be REG_NA when closeDelta is NULL */
+}
+
+/*****************************************************************************
+ *
+ *  Assume all non-integer registers contain garbage (this is called when
+ *  we encounter a code label that isn't jumped by any block; we need to
+ *  clear pointer values out of the table lest the GC pointer tables get
+ *  out of date).
+ */
+
+void RegTracker::rsTrackRegClrPtr()
+{
+    for (regNumber reg = REG_FIRST; reg < REG_COUNT; reg = REG_NEXT(reg))
+    {
+        /* Preserve constant values */
+
+        if (rsRegValues[reg].rvdKind == RV_INT_CNS)
+        {
+            /* Make sure we don't preserve NULL (it's a pointer) */
+
+            if (rsRegValues[reg].rvdIntCnsVal != NULL)
+            {
+                continue;
+            }
+        }
+
+        /* Preserve variables known to not be pointers */
+
+        if (rsRegValues[reg].rvdKind == RV_LCL_VAR)
+        {
+            if (!varTypeIsGC(compiler->lvaTable[rsRegValues[reg].rvdLclVarNum].TypeGet()))
+            {
+                continue;
+            }
+        }
+
+        rsRegValues[reg].rvdKind = RV_TRASH;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  This routine trashes the registers that hold stack GCRef/ByRef variables. (VSW: 561129)
+ *  It should be called at each gc-safe point.
+ *
+ *  It returns a mask of the registers that used to contain tracked stack variables that
+ *  were trashed.
+ *
+ */
+
+regMaskTP RegTracker::rsTrashRegsForGCInterruptability()
+{
+    regMaskTP result = RBM_NONE;
+    for (regNumber reg = REG_FIRST; reg < REG_COUNT; reg = REG_NEXT(reg))
+    {
+        if (rsRegValues[reg].rvdKind == RV_LCL_VAR)
+        {
+            LclVarDsc* varDsc = &compiler->lvaTable[rsRegValues[reg].rvdLclVarNum];
+
+            if (!varTypeIsGC(varDsc->TypeGet()))
+            {
+                continue;
+            }
+
+            // Only stack locals got tracked.
+            assert(!varDsc->lvRegister);
+
+            rsRegValues[reg].rvdKind = RV_TRASH;
+
+            result |= genRegMask(reg);
+        }
+    }
+
+    return result;
+}
+
+/*****************************************************************************
+ *
+ *  Search for a register which contains the given local var.
+ *  Return success/failure and set the register if success.
+ *  Return FALSE on register variables, because otherwise their lifetimes
+ *  can get bungled with respect to pointer tracking.
+ */
+
+regNumber RegTracker::rsLclIsInReg(unsigned var)
+{
+    assert(var < compiler->lvaCount);
+
+    if (compiler->opts.MinOpts() || compiler->opts.compDbgCode)
+    {
+        return REG_NA;
+    }
+
+    /* return false if register var so genMarkLclVar can do its job */
+
+    if (compiler->lvaTable[var].lvRegister)
+    {
+        return REG_NA;
+    }
+
+    for (regNumber reg = REG_FIRST; reg < REG_COUNT; reg = REG_NEXT(reg))
+    {
+        if (rsRegValues[reg].rvdLclVarNum == var && rsRegValues[reg].rvdKind == RV_LCL_VAR)
+        {
+            return reg;
+        }
+    }
+
+    return REG_NA;
+}
+
+/*****************************************************************************/
+
+regPairNo RegTracker::rsLclIsInRegPair(unsigned var)
+{
+    assert(var < compiler->lvaCount);
+
+    if (compiler->opts.MinOpts() || compiler->opts.compDbgCode)
+    {
+        return REG_PAIR_NONE;
+    }
+
+    regValKind rvKind = RV_TRASH;
+    regNumber  regNo  = DUMMY_INIT(REG_NA);
+
+    for (regNumber reg = REG_FIRST; reg < REG_COUNT; reg = REG_NEXT(reg))
+    {
+        if (rvKind != rsRegValues[reg].rvdKind && rsTrackIsLclVarLng(rsRegValues[reg].rvdKind) &&
+            rsRegValues[reg].rvdLclVarNum == var)
+        {
+            /* first occurrence of this variable ? */
+
+            if (rvKind == RV_TRASH)
+            {
+                regNo  = reg;
+                rvKind = rsRegValues[reg].rvdKind;
+            }
+            else if (rvKind == RV_LCL_VAR_LNG_HI)
+            {
+                /* We found the lower half of the long */
+
+                return gen2regs2pair(reg, regNo);
+            }
+            else
+            {
+                /* We found the upper half of the long */
+
+                assert(rvKind == RV_LCL_VAR_LNG_LO);
+                return gen2regs2pair(regNo, reg);
+            }
+        }
+    }
+
+    return REG_PAIR_NONE;
+}
+
+/*****************************************************************************/
+
+void RegTracker::rsTrashLclLong(unsigned var)
+{
+    if (compiler->opts.MinOpts() || compiler->opts.compDbgCode)
+    {
+        return;
+    }
+
+    for (regNumber reg = REG_FIRST; reg < REG_COUNT; reg = REG_NEXT(reg))
+    {
+        if (rsTrackIsLclVarLng(rsRegValues[reg].rvdKind) && rsRegValues[reg].rvdLclVarNum == var)
+        {
+            rsRegValues[reg].rvdKind = RV_TRASH;
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Local's value has changed, mark all regs which contained it as trash.
+ */
+
+void RegTracker::rsTrashLcl(unsigned var)
+{
+    if (compiler->opts.MinOpts() || compiler->opts.compDbgCode)
+    {
+        return;
+    }
+
+    for (regNumber reg = REG_FIRST; reg < REG_COUNT; reg = REG_NEXT(reg))
+    {
+        if (rsRegValues[reg].rvdKind == RV_LCL_VAR && rsRegValues[reg].rvdLclVarNum == var)
+        {
+            rsRegValues[reg].rvdKind = RV_TRASH;
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  A little helper to trash the given set of registers.
+ *  Usually used after a call has been generated.
+ */
+
+void RegTracker::rsTrashRegSet(regMaskTP regMask)
+{
+    if (compiler->opts.MinOpts() || compiler->opts.compDbgCode)
+    {
+        return;
+    }
+    regMaskTP regBit = 1;
+    for (regNumber regNum = REG_FIRST; regMask != 0; regNum = REG_NEXT(regNum), regBit <<= 1)
+    {
+        if (regBit & regMask)
+        {
+            rsTrackRegTrash(regNum);
+            regMask -= regBit;
+        }
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Return a mask of registers that hold no useful value.
+ */
+
+regMaskTP RegTracker::rsUselessRegs()
+{
+    if (compiler->opts.MinOpts() || compiler->opts.compDbgCode)
+    {
+        return RBM_ALLINT;
+    }
+
+    regMaskTP mask = RBM_NONE;
+    for (regNumber reg = REG_FIRST; reg < REG_COUNT; reg = REG_NEXT(reg))
+    {
+        if (rsRegValues[reg].rvdKind == RV_TRASH)
+        {
+            mask |= genRegMask(reg);
+        }
+    }
+
+    return mask;
+}
+
+/*****************************************************************************/
+#endif // REDUNDANT_LOAD
+/*****************************************************************************/
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                           TempsInfo                                       XX
+XX                                                                           XX
+XX  The temporary lclVars allocated by the compiler for code generation      XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+void Compiler::tmpInit()
+{
+#ifdef LEGACY_BACKEND
+    tmpDoubleSpillMax = 0;
+    tmpIntSpillMax    = 0;
+#endif // LEGACY_BACKEND
+
+    tmpCount = 0;
+    tmpSize  = 0;
+#ifdef DEBUG
+    tmpGetCount = 0;
+#endif
+
+    memset(tmpFree, 0, sizeof(tmpFree));
+    memset(tmpUsed, 0, sizeof(tmpUsed));
+}
+
+/* static */
+var_types Compiler::tmpNormalizeType(var_types type)
+{
+#ifndef LEGACY_BACKEND
+
+    type = genActualType(type);
+
+#else  // LEGACY_BACKEND
+    if (!varTypeIsGC(type))
+    {
+        switch (genTypeStSz(type))
+        {
+            case 1:
+                type = TYP_INT; // Maps all 4-byte non-GC types to TYP_INT temps
+                break;
+            case 2:
+                type = TYP_DOUBLE; // Maps all 8-byte types to TYP_DOUBLE temps
+                break;
+            default:
+                assert(!"unexpected type");
+        }
+    }
+#endif // LEGACY_BACKEND
+
+    return type;
+}
+
+/*****************************************************************************
+ *
+ *  Allocate a temp of the given size (and type, if tracking pointers for
+ *  the garbage collector).
+ */
+
+TempDsc* Compiler::tmpGetTemp(var_types type)
+{
+    type          = tmpNormalizeType(type);
+    unsigned size = genTypeSize(type);
+
+    // If TYP_STRUCT ever gets in here we do bad things (tmpSlot returns -1)
+    noway_assert(size >= sizeof(int));
+
+    /* Find the slot to search for a free temp of the right size */
+
+    unsigned slot = tmpSlot(size);
+
+    /* Look for a temp with a matching type */
+
+    TempDsc** last = &tmpFree[slot];
+    TempDsc*  temp;
+
+    for (temp = *last; temp; last = &temp->tdNext, temp = *last)
+    {
+        /* Does the type match? */
+
+        if (temp->tdTempType() == type)
+        {
+            /* We have a match -- remove it from the free list */
+
+            *last = temp->tdNext;
+            break;
+        }
+    }
+
+#ifdef DEBUG
+    /* Do we need to allocate a new temp */
+    bool isNewTemp = false;
+#endif // DEBUG
+
+#ifndef LEGACY_BACKEND
+
+    noway_assert(temp != nullptr);
+
+#else // LEGACY_BACKEND
+
+    if (temp == nullptr)
+    {
+#ifdef DEBUG
+        isNewTemp = true;
+#endif // DEBUG
+        tmpCount++;
+        tmpSize += (unsigned)size;
+
+#ifdef _TARGET_ARM_
+        if (type == TYP_DOUBLE)
+        {
+            // Adjust tmpSize in case it needs alignment
+            tmpSize += TARGET_POINTER_SIZE;
+        }
+#endif // _TARGET_ARM_
+
+        genEmitter->emitTmpSizeChanged(tmpSize);
+
+        temp = new (this, CMK_Unknown) TempDsc(-((int)tmpCount), size, type);
+    }
+
+#endif // LEGACY_BACKEND
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("%s temp #%u, slot %u, size = %u\n", isNewTemp ? "created" : "reused", -temp->tdTempNum(), slot,
+               temp->tdTempSize());
+    }
+    tmpGetCount++;
+#endif // DEBUG
+
+    temp->tdNext  = tmpUsed[slot];
+    tmpUsed[slot] = temp;
+
+    return temp;
+}
+
+#ifndef LEGACY_BACKEND
+
+/*****************************************************************************
+ * Preallocate 'count' temps of type 'type'. This type must be a normalized
+ * type (by the definition of tmpNormalizeType()).
+ *
+ * This is used at the end of LSRA, which knows precisely the maximum concurrent
+ * number of each type of spill temp needed, before code generation. Code generation
+ * then uses these preallocated temp. If code generation ever asks for more than
+ * has been preallocated, it is a fatal error.
+ */
+
+void Compiler::tmpPreAllocateTemps(var_types type, unsigned count)
+{
+    assert(type == tmpNormalizeType(type));
+    unsigned size = genTypeSize(type);
+
+    // If TYP_STRUCT ever gets in here we do bad things (tmpSlot returns -1)
+    noway_assert(size >= sizeof(int));
+
+    // Find the slot to search for a free temp of the right size.
+    // Note that slots are shared by types of the identical size (e.g., TYP_REF and TYP_LONG on AMD64),
+    // so we can't assert that the slot is empty when we get here.
+
+    unsigned slot = tmpSlot(size);
+
+    for (unsigned i = 0; i < count; i++)
+    {
+        tmpCount++;
+        tmpSize += size;
+
+        TempDsc* temp = new (this, CMK_Unknown) TempDsc(-((int)tmpCount), size, type);
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("pre-allocated temp #%u, slot %u, size = %u\n", -temp->tdTempNum(), slot, temp->tdTempSize());
+        }
+#endif // DEBUG
+
+        // Add it to the front of the appropriate slot list.
+        temp->tdNext  = tmpFree[slot];
+        tmpFree[slot] = temp;
+    }
+}
+
+#endif // !LEGACY_BACKEND
+
+/*****************************************************************************
+ *
+ *  Release the given temp.
+ */
+
+void Compiler::tmpRlsTemp(TempDsc* temp)
+{
+    assert(temp != nullptr);
+
+    unsigned slot;
+
+    /* Add the temp to the 'free' list */
+
+    slot = tmpSlot(temp->tdTempSize());
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("release temp #%u, slot %u, size = %u\n", -temp->tdTempNum(), slot, temp->tdTempSize());
+    }
+    assert(tmpGetCount);
+    tmpGetCount--;
+#endif
+
+    // Remove it from the 'used' list.
+
+    TempDsc** last = &tmpUsed[slot];
+    TempDsc*  t;
+    for (t = *last; t != nullptr; last = &t->tdNext, t = *last)
+    {
+        if (t == temp)
+        {
+            /* Found it! -- remove it from the 'used' list */
+
+            *last = t->tdNext;
+            break;
+        }
+    }
+    assert(t != nullptr); // We better have found it!
+
+    // Add it to the free list.
+
+    temp->tdNext  = tmpFree[slot];
+    tmpFree[slot] = temp;
+}
+
+/*****************************************************************************
+ *  Given a temp number, find the corresponding temp.
+ *
+ *  When looking for temps on the "free" list, this can only be used after code generation. (This is
+ *  simply because we have an assert to that effect in tmpListBeg(); we could relax that, or hoist
+ *  the assert to the appropriate callers.)
+ *
+ *  When looking for temps on the "used" list, this can be used any time.
+ */
+TempDsc* Compiler::tmpFindNum(int tnum, TEMP_USAGE_TYPE usageType /* = TEMP_USAGE_FREE */) const
+{
+    assert(tnum < 0); // temp numbers are negative
+
+    for (TempDsc* temp = tmpListBeg(usageType); temp != nullptr; temp = tmpListNxt(temp, usageType))
+    {
+        if (temp->tdTempNum() == tnum)
+        {
+            return temp;
+        }
+    }
+
+    return nullptr;
+}
+
+/*****************************************************************************
+ *
+ *  A helper function is used to iterate over all the temps.
+ */
+
+TempDsc* Compiler::tmpListBeg(TEMP_USAGE_TYPE usageType /* = TEMP_USAGE_FREE */) const
+{
+    TempDsc* const* tmpLists;
+    if (usageType == TEMP_USAGE_FREE)
+    {
+        tmpLists = tmpFree;
+    }
+    else
+    {
+        tmpLists = tmpUsed;
+    }
+
+    // Return the first temp in the slot for the smallest size
+    unsigned slot = 0;
+    while (slot < (TEMP_SLOT_COUNT - 1) && tmpLists[slot] == nullptr)
+    {
+        slot++;
+    }
+    TempDsc* temp = tmpLists[slot];
+
+    return temp;
+}
+
+/*****************************************************************************
+ * Used with tmpListBeg() to iterate over the list of temps.
+ */
+
+TempDsc* Compiler::tmpListNxt(TempDsc* curTemp, TEMP_USAGE_TYPE usageType /* = TEMP_USAGE_FREE */) const
+{
+    assert(curTemp != nullptr);
+
+    TempDsc* temp = curTemp->tdNext;
+    if (temp == nullptr)
+    {
+        unsigned size = curTemp->tdTempSize();
+
+        // If there are no more temps in the list, check if there are more
+        // slots (for bigger sized temps) to walk.
+
+        TempDsc* const* tmpLists;
+        if (usageType == TEMP_USAGE_FREE)
+        {
+            tmpLists = tmpFree;
+        }
+        else
+        {
+            tmpLists = tmpUsed;
+        }
+
+        while (size < TEMP_MAX_SIZE && temp == nullptr)
+        {
+            size += sizeof(int);
+            unsigned slot = tmpSlot(size);
+            temp          = tmpLists[slot];
+        }
+
+        assert((temp == nullptr) || (temp->tdTempSize() == size));
+    }
+
+    return temp;
+}
+
+#ifdef DEBUG
+/*****************************************************************************
+ * Return 'true' if all allocated temps are free (not in use).
+ */
+bool Compiler::tmpAllFree() const
+{
+    // The 'tmpGetCount' should equal the number of things in the 'tmpUsed' lists. This is a convenient place
+    // to assert that.
+    unsigned usedCount = 0;
+    for (TempDsc* temp = tmpListBeg(TEMP_USAGE_USED); temp != nullptr; temp = tmpListNxt(temp, TEMP_USAGE_USED))
+    {
+        ++usedCount;
+    }
+    assert(usedCount == tmpGetCount);
+
+    if (tmpGetCount != 0)
+    {
+        return false;
+    }
+
+    for (unsigned i = 0; i < sizeof(tmpUsed) / sizeof(tmpUsed[0]); i++)
+    {
+        if (tmpUsed[i] != nullptr)
+        {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+#endif // DEBUG
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX  Register-related utility functions                                       XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************
+ *
+ *  Returns whether regPair is a combination of two x86 registers or
+ *  contains a pseudo register.
+ *  In debug it also asserts that reg1 and reg2 are not the same.
+ */
+
+bool genIsProperRegPair(regPairNo regPair)
+{
+    regNumber rlo = genRegPairLo(regPair);
+    regNumber rhi = genRegPairHi(regPair);
+
+    assert(regPair >= REG_PAIR_FIRST && regPair <= REG_PAIR_LAST);
+
+    if (rlo == rhi)
+    {
+        return false;
+    }
+
+    if (rlo == REG_L_STK || rhi == REG_L_STK)
+    {
+        return false;
+    }
+
+    if (rlo >= REG_COUNT || rhi >= REG_COUNT)
+    {
+        return false;
+    }
+
+    return (rlo != REG_STK && rhi != REG_STK);
+}
+
+/*****************************************************************************
+ *
+ *  Given a register that is an argument register
+ *   returns the next argument register
+ *
+ *  Note: that this method will return a non arg register
+ *   when given REG_ARG_LAST
+ *
+ */
+
+regNumber genRegArgNext(regNumber argReg)
+{
+    regNumber result = REG_NA;
+
+    if (isValidFloatArgReg(argReg))
+    {
+        // We can iterate the floating point argument registers by using +1
+        result = REG_NEXT(argReg);
+    }
+    else
+    {
+        assert(isValidIntArgReg(argReg));
+
+#ifdef _TARGET_AMD64_
+#ifdef UNIX_AMD64_ABI
+        // Windows X64 ABI:
+        //     REG_EDI, REG_ESI, REG_ECX, REG_EDX, REG_R8, REG_R9
+        //
+        if (argReg == REG_ARG_1) // REG_ESI
+        {
+            result = REG_ARG_2; // REG_ECX
+        }
+        else if (argReg == REG_ARG_3) // REG_EDX
+        {
+            result = REG_ARG_4; // REG_R8
+        }
+#else  // Windows ABI
+        // Windows X64 ABI:
+        //     REG_ECX, REG_EDX, REG_R8, REG_R9
+        //
+        if (argReg == REG_ARG_1) // REG_EDX
+        {
+            result = REG_ARG_2; // REG_R8
+        }
+#endif // UNIX or Windows ABI
+#endif // _TARGET_AMD64_
+
+        // If we didn't set 'result' to valid register above
+        // then we will just iterate 'argReg' using REG_NEXT
+        //
+        if (result == REG_NA)
+        {
+            // Otherwise we just iterate the argument registers by using REG_NEXT
+            result = REG_NEXT(argReg);
+        }
+    }
+
+    return result;
+}
+
+/*****************************************************************************
+ *
+ *  The following table determines the order in which callee-saved registers
+ *  are encoded in GC information at call sites (perhaps among other things).
+ *  In any case, they establish a mapping from ordinal callee-save reg "indices" to
+ *  register numbers and corresponding bitmaps.
+ */
+
+const regNumber raRegCalleeSaveOrder[] = {REG_CALLEE_SAVED_ORDER};
+const regMaskTP raRbmCalleeSaveOrder[] = {RBM_CALLEE_SAVED_ORDER};
+
+regMaskSmall genRegMaskFromCalleeSavedMask(unsigned short calleeSaveMask)
+{
+    regMaskSmall res = 0;
+    for (int i = 0; i < CNT_CALLEE_SAVED; i++)
+    {
+        if ((calleeSaveMask & ((regMaskTP)1 << i)) != 0)
+        {
+            res |= raRbmCalleeSaveOrder[i];
+        }
+    }
+    return res;
+}
+
+/*****************************************************************************
+ *
+ *  Initializes the spill code. Should be called once per function compiled.
+ */
+
+// inline
+void RegSet::rsSpillInit()
+{
+    /* Clear out the spill and multi-use tables */
+
+    memset(rsSpillDesc, 0, sizeof(rsSpillDesc));
+
+#ifdef LEGACY_BACKEND
+    memset(rsUsedTree, 0, sizeof(rsUsedTree));
+    memset(rsUsedAddr, 0, sizeof(rsUsedAddr));
+    memset(rsMultiDesc, 0, sizeof(rsMultiDesc));
+    rsSpillFloat = nullptr;
+#endif // LEGACY_BACKEND
+
+    rsNeededSpillReg = false;
+
+    /* We don't have any descriptors allocated */
+
+    rsSpillFree = nullptr;
+}
+
+/*****************************************************************************
+ *
+ *  Shuts down the spill code. Should be called once per function compiled.
+ */
+
+// inline
+void RegSet::rsSpillDone()
+{
+    rsSpillChk();
+}
+
+/*****************************************************************************
+ *
+ *  Begin tracking spills - should be called each time before a pass is made
+ *  over a function body.
+ */
+
+// inline
+void RegSet::rsSpillBeg()
+{
+    rsSpillChk();
+}
+
+/*****************************************************************************
+ *
+ *  Finish tracking spills - should be called each time after a pass is made
+ *  over a function body.
+ */
+
+// inline
+void RegSet::rsSpillEnd()
+{
+    rsSpillChk();
+}
+
+//****************************************************************************
+//  Create a new SpillDsc or get one off the free list
+//
+
+// inline
+RegSet::SpillDsc* RegSet::SpillDsc::alloc(Compiler* pComp, RegSet* regSet, var_types type)
+{
+    RegSet::SpillDsc*  spill;
+    RegSet::SpillDsc** pSpill;
+
+    pSpill = &(regSet->rsSpillFree);
+
+    // Allocate spill structure
+    if (*pSpill)
+    {
+        spill   = *pSpill;
+        *pSpill = spill->spillNext;
+    }
+    else
+    {
+        spill = (RegSet::SpillDsc*)pComp->compGetMem(sizeof(SpillDsc));
+    }
+    return spill;
+}
+
+//****************************************************************************
+//  Free a SpillDsc and return it to the rsSpillFree list
+//
+
+// inline
+void RegSet::SpillDsc::freeDsc(RegSet* regSet, RegSet::SpillDsc* spillDsc)
+{
+    spillDsc->spillNext = regSet->rsSpillFree;
+    regSet->rsSpillFree = spillDsc;
+}
+
+/*****************************************************************************
+ *
+ *  Make sure no spills are currently active - used for debugging of the code
+ *  generator.
+ */
+
+#ifdef DEBUG
+
+// inline
+void RegSet::rsSpillChk()
+{
+    // All grabbed temps should have been released
+    assert(m_rsCompiler->tmpGetCount == 0);
+
+    for (regNumber reg = REG_FIRST; reg < REG_COUNT; reg = REG_NEXT(reg))
+    {
+        assert(rsSpillDesc[reg] == nullptr);
+
+#ifdef LEGACY_BACKEND
+        assert(rsUsedTree[reg] == NULL);
+        assert(rsMultiDesc[reg] == NULL);
+#endif // LEGACY_BACKEND
+    }
+}
+
+#else
+
+// inline
+void RegSet::rsSpillChk()
+{
+}
+
+#endif
+
+/*****************************************************************************/
+#if REDUNDANT_LOAD
+
+// inline
+bool RegTracker::rsIconIsInReg(ssize_t val, regNumber reg)
+{
+    if (compiler->opts.MinOpts() || compiler->opts.compDbgCode)
+    {
+        return false;
+    }
+
+    if (rsRegValues[reg].rvdKind == RV_INT_CNS && rsRegValues[reg].rvdIntCnsVal == val)
+    {
+        return true;
+    }
+    return false;
+}
+
+#endif // REDUNDANT_LOAD
+/*****************************************************************************/
diff --git a/src/jit/regset.h b/src/jit/regset.h
new file mode 100644
index 0000000000..cdfbb1502a
--- /dev/null
+++ b/src/jit/regset.h
@@ -0,0 +1,460 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+
+#ifndef _REGSET_H
+#define _REGSET_H
+#include "vartype.h"
+#include "target.h"
+
+class LclVarDsc;
+class TempDsc;
+typedef struct GenTree* GenTreePtr;
+class Compiler;
+class CodeGen;
+class GCInfo;
+
+/*
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                           RegSet                                          XX
+XX                                                                           XX
+XX  Represents the register set, and their states during code generation     XX
+XX  Can select an unused register, keeps track of the contents of the        XX
+XX  registers, and can spill registers                                       XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/*****************************************************************************
+*
+*  Keep track of the current state of each register. This is intended to be
+*  used for things like register reload suppression, but for now the only
+*  thing it does is note which registers we use in each method.
+*/
+
+enum regValKind
+{
+    RV_TRASH,          // random unclassified garbage
+    RV_INT_CNS,        // integer constant
+    RV_LCL_VAR,        // local variable value
+    RV_LCL_VAR_LNG_LO, // lower half of long local variable
+    RV_LCL_VAR_LNG_HI,
+};
+
+/*****************************************************************************/
+
+class RegSet
+{
+    friend class CodeGen;
+    friend class CodeGenInterface;
+
+private:
+    Compiler* m_rsCompiler;
+    GCInfo&   m_rsGCInfo;
+
+public:
+    RegSet(Compiler* compiler, GCInfo& gcInfo);
+
+#ifdef _TARGET_ARM_
+    regMaskTP rsMaskPreSpillRegs(bool includeAlignment)
+    {
+        return includeAlignment ? (rsMaskPreSpillRegArg | rsMaskPreSpillAlign) : rsMaskPreSpillRegArg;
+    }
+#endif // _TARGET_ARM_
+
+private:
+    // The same descriptor is also used for 'multi-use' register tracking, BTW.
+    struct SpillDsc
+    {
+        SpillDsc* spillNext; // next spilled value of same reg
+
+        union {
+            GenTreePtr spillTree; // the value that was spilled
+#ifdef LEGACY_BACKEND
+            LclVarDsc* spillVarDsc; // variable if it's an enregistered variable
+#endif                              // LEGACY_BACKEND
+        };
+
+        TempDsc* spillTemp; // the temp holding the spilled value
+
+#ifdef LEGACY_BACKEND
+        GenTreePtr spillAddr; // owning complex address mode or nullptr
+
+        union {
+            bool spillMoreMultis;
+            bool bEnregisteredVariable; // For FP. Indicates that what was spilled was
+                                        // an enregistered variable
+        };
+#endif // LEGACY_BACKEND
+
+        static SpillDsc* alloc(Compiler* pComp, RegSet* regSet, var_types type);
+        static void freeDsc(RegSet* regSet, SpillDsc* spillDsc);
+    };
+
+#ifdef LEGACY_BACKEND
+public:
+    regMaskTP rsUseIfZero(regMaskTP regs, regMaskTP includeHint);
+#endif // LEGACY_BACKEND
+
+//-------------------------------------------------------------------------
+//
+//  Track the status of the registers
+//
+#ifdef LEGACY_BACKEND
+public:                               // TODO-Cleanup: Should be private, but Compiler uses it
+    GenTreePtr rsUsedTree[REG_COUNT]; // trees currently sitting in the registers
+private:
+    GenTreePtr rsUsedAddr[REG_COUNT];  // addr for which rsUsedTree[reg] is a part of the addressing mode
+    SpillDsc*  rsMultiDesc[REG_COUNT]; // keeps track of 'multiple-use' registers.
+#endif                                 // LEGACY_BACKEND
+
+private:
+    bool      rsNeededSpillReg;   // true if this method needed to spill any registers
+    regMaskTP rsModifiedRegsMask; // mask of the registers modified by the current function.
+
+#ifdef DEBUG
+    bool rsModifiedRegsMaskInitialized; // Has rsModifiedRegsMask been initialized? Guards against illegal use.
+#endif                                  // DEBUG
+
+public:
+    regMaskTP rsGetModifiedRegsMask() const
+    {
+        assert(rsModifiedRegsMaskInitialized);
+        return rsModifiedRegsMask;
+    }
+
+    void rsClearRegsModified();
+
+    void rsSetRegsModified(regMaskTP mask DEBUGARG(bool suppressDump = false));
+
+    void rsRemoveRegsModified(regMaskTP mask);
+
+    bool rsRegsModified(regMaskTP mask) const
+    {
+        assert(rsModifiedRegsMaskInitialized);
+        return (rsModifiedRegsMask & mask) != 0;
+    }
+
+public: // TODO-Cleanup: Should be private, but GCInfo uses them
+#ifdef LEGACY_BACKEND
+    regMaskTP rsMaskUsed; // currently 'used' registers mask
+#endif                    // LEGACY_BACKEND
+
+    __declspec(property(get = GetMaskVars, put = SetMaskVars)) regMaskTP rsMaskVars; // mask of registers currently
+                                                                                     // allocated to variables
+
+    regMaskTP GetMaskVars() const // 'get' property function for rsMaskVars property
+    {
+        return _rsMaskVars;
+    }
+
+    void SetMaskVars(regMaskTP newMaskVars); // 'put' property function for rsMaskVars property
+
+    void AddMaskVars(regMaskTP addMaskVars) // union 'addMaskVars' with the rsMaskVars set
+    {
+        SetMaskVars(_rsMaskVars | addMaskVars);
+    }
+
+    void RemoveMaskVars(regMaskTP removeMaskVars) // remove 'removeMaskVars' from the rsMaskVars set (like bitset DiffD)
+    {
+        SetMaskVars(_rsMaskVars & ~removeMaskVars);
+    }
+
+    void ClearMaskVars() // Like SetMaskVars(RBM_NONE), but without any debug output.
+    {
+        _rsMaskVars = RBM_NONE;
+    }
+
+private:
+    regMaskTP _rsMaskVars; // backing store for rsMaskVars property
+
+#ifdef LEGACY_BACKEND
+    regMaskTP rsMaskLock; // currently 'locked' registers mask
+    regMaskTP rsMaskMult; // currently 'multiply used' registers mask
+#endif                    // LEGACY_BACKEND
+
+#ifdef _TARGET_ARMARCH_
+    regMaskTP rsMaskCalleeSaved; // mask of the registers pushed/popped in the prolog/epilog
+#endif                           // _TARGET_ARM_
+
+public:                    // TODO-Cleanup: Should be private, but Compiler uses it
+    regMaskTP rsMaskResvd; // mask of the registers that are reserved for special purposes (typically empty)
+
+public: // The PreSpill masks are used in LclVars.cpp
+#ifdef _TARGET_ARM_
+    regMaskTP rsMaskPreSpillAlign;  // Mask of alignment padding added to prespill to keep double aligned args
+                                    // at aligned stack addresses.
+    regMaskTP rsMaskPreSpillRegArg; // mask of incoming registers that are spilled at the start of the prolog
+                                    // This includes registers used to pass a struct (or part of a struct)
+                                    // and all enregistered user arguments in a varargs call
+#endif                              // _TARGET_ARM_
+
+#ifdef LEGACY_BACKEND
+
+private:
+    // These getters/setters are ifdef here so that the accesses to these values in sharedfloat.cpp are redirected
+    // to the appropriate value.
+    // With FEATURE_STACK_FP_X87 (x86 FP codegen) we have separate register mask that just handle FP registers.
+    // For all other platforms (and eventually on x86) we use unified register masks that handle both kinds.
+    //
+    regMaskTP rsGetMaskUsed(); // Getter for rsMaskUsed or rsMaskUsedFloat
+    regMaskTP rsGetMaskVars(); // Getter for rsMaskVars or rsMaskRegVarFloat
+    regMaskTP rsGetMaskLock(); // Getter for rsMaskLock or rsMaskLockedFloat
+    regMaskTP rsGetMaskMult(); // Getter for rsMaskMult or 0
+
+    void rsSetMaskUsed(regMaskTP maskUsed); // Setter for rsMaskUsed or rsMaskUsedFloat
+    void rsSetMaskVars(regMaskTP maskVars); // Setter for rsMaskVars or rsMaskRegVarFloat
+    void rsSetMaskLock(regMaskTP maskLock); // Setter for rsMaskLock or rsMaskLockedFloat
+
+    void rsSetUsedTree(regNumber regNum, GenTreePtr tree);  // Setter for  rsUsedTree[]/genUsedRegsFloat[]
+    void rsFreeUsedTree(regNumber regNum, GenTreePtr tree); // Free   for  rsUsedTree[]/genUsedRegsFloat[]
+
+public:
+    regPairNo rsFindRegPairNo(regMaskTP regMask);
+
+private:
+    bool rsIsTreeInReg(regNumber reg, GenTreePtr tree);
+
+    regMaskTP rsExcludeHint(regMaskTP regs, regMaskTP excludeHint);
+    regMaskTP rsNarrowHint(regMaskTP regs, regMaskTP narrowHint);
+    regMaskTP rsMustExclude(regMaskTP regs, regMaskTP exclude);
+    regMaskTP rsRegMaskFree();
+    regMaskTP rsRegMaskCanGrab();
+
+    void rsMarkRegUsed(GenTreePtr tree, GenTreePtr addr = 0);
+    // A special case of "rsMarkRegUsed": the register used is an argument register, used to hold part of
+    // the given argument node "promotedStructArg".  (The name suggests that we're likely to use use this
+    // for register holding a promoted struct argument, but the implementation doesn't depend on that.)  The
+    // "isGCRef" argument indicates whether the register contains a GC reference.
+    void rsMarkArgRegUsedByPromotedFieldArg(GenTreePtr promotedStructArg, regNumber regNum, bool isGCRef);
+
+    void rsMarkRegPairUsed(GenTreePtr tree);
+
+    void rsMarkRegFree(regMaskTP regMask);
+    void rsMarkRegFree(regNumber reg, GenTreePtr tree);
+    void rsMultRegFree(regMaskTP regMask);
+    unsigned rsFreeNeededRegCount(regMaskTP needReg);
+
+    void rsLockReg(regMaskTP regMask);
+    void rsUnlockReg(regMaskTP regMask);
+    void rsLockUsedReg(regMaskTP regMask);
+    void rsUnlockUsedReg(regMaskTP regMask);
+    void rsLockReg(regMaskTP regMask, regMaskTP* usedMask);
+    void rsUnlockReg(regMaskTP regMask, regMaskTP usedMask);
+
+    regMaskTP rsRegExclMask(regMaskTP regMask, regMaskTP rmvMask);
+
+    regNumber rsPickRegInTmpOrder(regMaskTP regMask);
+
+public: // used by emitter (!)
+    regNumber rsGrabReg(regMaskTP regMask);
+
+private:
+    regNumber rsPickReg(regMaskTP regMask = RBM_NONE, regMaskTP regBest = RBM_NONE);
+
+public: // used by emitter (!)
+    regNumber rsPickFreeReg(regMaskTP regMaskHint = RBM_ALLINT);
+
+private:
+    regPairNo rsGrabRegPair(regMaskTP regMask);
+    regPairNo rsPickRegPair(regMaskTP regMask);
+
+    class RegisterPreference
+    {
+    public:
+        regMaskTP ok;
+        regMaskTP best;
+        RegisterPreference(regMaskTP _ok, regMaskTP _best)
+        {
+            ok   = _ok;
+            best = _best;
+        }
+    };
+    regNumber PickRegFloat(GenTreePtr          tree,
+                           var_types           type  = TYP_DOUBLE,
+                           RegisterPreference* pref  = NULL,
+                           bool                bUsed = true);
+    regNumber PickRegFloat(var_types type = TYP_DOUBLE, RegisterPreference* pref = NULL, bool bUsed = true);
+    regNumber PickRegFloatOtherThan(GenTreePtr tree, var_types type, regNumber reg);
+    regNumber PickRegFloatOtherThan(var_types type, regNumber reg);
+
+    regMaskTP RegFreeFloat();
+
+    void SetUsedRegFloat(GenTreePtr tree, bool bValue);
+    void SetLockedRegFloat(GenTreePtr tree, bool bValue);
+    bool IsLockedRegFloat(GenTreePtr tree);
+
+    var_types rsRmvMultiReg(regNumber reg);
+    void rsRecMultiReg(regNumber reg, var_types type);
+#endif // LEGACY_BACKEND
+
+public:
+#ifdef DEBUG
+    /*****************************************************************************
+        *  Should we stress register tracking logic ?
+        *  This is set via COMPlus_JitStressRegs.
+        *  The following values are ordered, such that any value greater than RS_xx
+        *  implies RS_xx.
+        *  LSRA defines a different set of values, but uses the same COMPlus_JitStressRegs
+        *  value, with the same notion of relative ordering.
+        *  1 = rsPickReg() picks 'bad' registers.
+        *  2 = codegen spills at safe points. This is still flaky
+        */
+    enum rsStressRegsType
+    {
+        RS_STRESS_NONE  = 0,
+        RS_PICK_BAD_REG = 01,
+        RS_SPILL_SAFE   = 02,
+    };
+    rsStressRegsType rsStressRegs();
+#endif // DEBUG
+
+private:
+    //-------------------------------------------------------------------------
+    //
+    //  The following tables keep track of spilled register values.
+    //
+
+    // When a register gets spilled, the old information is stored here
+    SpillDsc* rsSpillDesc[REG_COUNT];
+    SpillDsc* rsSpillFree; // list of unused spill descriptors
+
+#ifdef LEGACY_BACKEND
+    SpillDsc* rsSpillFloat;
+#endif // LEGACY_BACKEND
+
+    void rsSpillChk();
+    void rsSpillInit();
+    void rsSpillDone();
+    void rsSpillBeg();
+    void rsSpillEnd();
+
+    void rsSpillTree(regNumber reg, GenTreePtr tree, unsigned regIdx = 0);
+
+#if defined(_TARGET_X86_) && !FEATURE_STACK_FP_X87
+    void rsSpillFPStack(GenTreePtr tree);
+#endif // defined(_TARGET_X86_) && !FEATURE_STACK_FP_X87
+
+#ifdef LEGACY_BACKEND
+    void rsSpillReg(regNumber reg);
+    void rsSpillRegIfUsed(regNumber reg);
+    void rsSpillRegs(regMaskTP regMask);
+#endif // LEGACY_BACKEND
+
+    SpillDsc* rsGetSpillInfo(GenTreePtr tree,
+                             regNumber  reg,
+                             SpillDsc** pPrevDsc = nullptr
+#ifdef LEGACY_BACKEND
+                             ,
+                             SpillDsc** pMultiDsc = NULL
+#endif // LEGACY_BACKEND
+                             );
+
+    TempDsc* rsGetSpillTempWord(regNumber oldReg, SpillDsc* dsc, SpillDsc* prevDsc);
+
+#ifdef LEGACY_BACKEND
+    enum ExactReg
+    {
+        ANY_REG,
+        EXACT_REG
+    };
+    enum KeepReg
+    {
+        FREE_REG,
+        KEEP_REG
+    };
+
+    regNumber rsUnspillOneReg(GenTreePtr tree, regNumber oldReg, KeepReg willKeepNewReg, regMaskTP needReg);
+#endif // LEGACY_BACKEND
+
+    TempDsc* rsUnspillInPlace(GenTreePtr tree, regNumber oldReg, unsigned regIdx = 0);
+
+#ifdef LEGACY_BACKEND
+    void rsUnspillReg(GenTreePtr tree, regMaskTP needReg, KeepReg keepReg);
+
+    void rsUnspillRegPair(GenTreePtr tree, regMaskTP needReg, KeepReg keepReg);
+#endif // LEGACY_BACKEND
+
+    void rsMarkSpill(GenTreePtr tree, regNumber reg);
+
+#ifdef LEGACY_BACKEND
+    void rsMarkUnspill(GenTreePtr tree, regNumber reg);
+#endif // LEGACY_BACKEND
+
+#if FEATURE_STACK_FP_X87
+    regMaskTP  rsMaskUsedFloat;
+    regMaskTP  rsMaskRegVarFloat;
+    regMaskTP  rsMaskLockedFloat;
+    GenTreePtr genUsedRegsFloat[REG_FPCOUNT];
+    LclVarDsc* genRegVarsFloat[REG_FPCOUNT];
+#endif // FEATURE_STACK_FP_X87
+};
+
+//-------------------------------------------------------------------------
+//
+//  These are used to track the contents of the registers during
+//  code generation.
+//
+//  Only integer registers are tracked.
+//
+
+struct RegValDsc
+{
+    regValKind rvdKind;
+    union {
+        ssize_t  rvdIntCnsVal; // for rvdKind == RV_INT_CNS
+        unsigned rvdLclVarNum; // for rvdKind == RV_LCL_VAR, RV_LCL_VAR_LNG_LO, RV_LCL_VAR_LNG_HI
+    };
+};
+
+class RegTracker
+{
+    Compiler* compiler;
+    RegSet*   regSet;
+    RegValDsc rsRegValues[REG_COUNT];
+
+public:
+    void rsTrackInit(Compiler* comp, RegSet* rs)
+    {
+        compiler = comp;
+        regSet   = rs;
+        rsTrackRegClr();
+    }
+
+    void rsTrackRegClr();
+    void rsTrackRegClrPtr();
+    void rsTrackRegTrash(regNumber reg);
+    void rsTrackRegMaskTrash(regMaskTP regMask);
+    regMaskTP rsTrashRegsForGCInterruptability();
+    void rsTrackRegIntCns(regNumber reg, ssize_t val);
+    void rsTrackRegLclVar(regNumber reg, unsigned var);
+    void rsTrackRegLclVarLng(regNumber reg, unsigned var, bool low);
+    bool rsTrackIsLclVarLng(regValKind rvKind);
+    void rsTrackRegClsVar(regNumber reg, GenTreePtr clsVar);
+    void rsTrackRegCopy(regNumber reg1, regNumber reg2);
+    void rsTrackRegSwap(regNumber reg1, regNumber reg2);
+    void rsTrackRegAssign(GenTree* op1, GenTree* op2);
+
+    regNumber rsIconIsInReg(ssize_t val, ssize_t* closeDelta = nullptr);
+    bool rsIconIsInReg(ssize_t val, regNumber reg);
+    regNumber rsLclIsInReg(unsigned var);
+    regPairNo rsLclIsInRegPair(unsigned var);
+
+//---------------------- Load suppression ---------------------------------
+
+#if REDUNDANT_LOAD
+
+    void rsTrashLclLong(unsigned var);
+    void rsTrashLcl(unsigned var);
+    void rsTrashRegSet(regMaskTP regMask);
+
+    regMaskTP rsUselessRegs();
+
+#endif // REDUNDANT_LOAD
+};
+#endif // _REGSET_H
diff --git a/src/jit/scopeinfo.cpp b/src/jit/scopeinfo.cpp
new file mode 100644
index 0000000000..f2a7902317
--- /dev/null
+++ b/src/jit/scopeinfo.cpp
@@ -0,0 +1,1271 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                                  ScopeInfo                                XX
+XX                                                                           XX
+XX   Classes to gather the Scope information from the local variable info.   XX
+XX   Translates the given LocalVarTab from IL instruction offsets into       XX
+XX   native code offsets.                                                    XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+/******************************************************************************
+ *                                  Debuggable code
+ *
+ *  We break up blocks at the start and end IL ranges of the local variables.
+ *  This is because IL offsets do not correspond exactly to native offsets
+ *  except at block boundaries. No basic-blocks are deleted (not even
+ *  unreachable), so there will not be any missing address-ranges, though the
+ *  blocks themselves may not be ordered. (Also, internal blocks may be added).
+ *  o At the start of each basic block, siBeginBlock() checks if any variables
+ *    are coming in scope, and adds an open scope to siOpenScopeList if needed.
+ *  o At the end of each basic block, siEndBlock() checks if any variables
+ *    are going out of scope and moves the open scope from siOpenScopeLast
+ *    to siScopeList.
+ *
+ *                                  Optimized code
+ *
+ *  We cannot break up the blocks as this will produce different code under
+ *  the debugger. Instead we try to do a best effort.
+ *  o At the start of each basic block, siBeginBlock() adds open scopes
+ *    corresponding to block->bbLiveIn to siOpenScopeList. Also siUpdate()
+ *    is called to close scopes for variables which are not live anymore.
+ *  o siEndBlock() closes scopes for any variables which go out of range
+ *    before bbCodeOffsEnd.
+ *  o siCloseAllOpenScopes() closes any open scopes after all the blocks.
+ *    This should only be needed if some basic block are deleted/out of order,
+ *    etc.
+ *  Also,
+ *  o At every assignment to a variable, siCheckVarScope() adds an open scope
+ *    for the variable being assigned to.
+ *  o genChangeLife() calls siUpdate() which closes scopes for variables which
+ *    are not live anymore.
+ *
+ ******************************************************************************
+ */
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "emit.h"
+#include "codegen.h"
+
+/*****************************************************************************/
+#ifdef DEBUGGING_SUPPORT
+/*****************************************************************************/
+
+bool Compiler::siVarLoc::vlIsInReg(regNumber reg)
+{
+    switch (vlType)
+    {
+        case VLT_REG:
+            return (vlReg.vlrReg == reg);
+        case VLT_REG_REG:
+            return ((vlRegReg.vlrrReg1 == reg) || (vlRegReg.vlrrReg2 == reg));
+        case VLT_REG_STK:
+            return (vlRegStk.vlrsReg == reg);
+        case VLT_STK_REG:
+            return (vlStkReg.vlsrReg == reg);
+
+        case VLT_STK:
+        case VLT_STK2:
+        case VLT_FPSTK:
+            return false;
+
+        default:
+            assert(!"Bad locType");
+            return false;
+    }
+}
+
+bool Compiler::siVarLoc::vlIsOnStk(regNumber reg, signed offset)
+{
+    regNumber actualReg;
+
+    switch (vlType)
+    {
+
+        case VLT_REG_STK:
+            actualReg = vlRegStk.vlrsStk.vlrssBaseReg;
+            if ((int)actualReg == (int)ICorDebugInfo::REGNUM_AMBIENT_SP)
+            {
+                actualReg = REG_SPBASE;
+            }
+            return ((actualReg == reg) && (vlRegStk.vlrsStk.vlrssOffset == offset));
+        case VLT_STK_REG:
+            actualReg = vlStkReg.vlsrStk.vlsrsBaseReg;
+            if ((int)actualReg == (int)ICorDebugInfo::REGNUM_AMBIENT_SP)
+            {
+                actualReg = REG_SPBASE;
+            }
+            return ((actualReg == reg) && (vlStkReg.vlsrStk.vlsrsOffset == offset));
+        case VLT_STK:
+            actualReg = vlStk.vlsBaseReg;
+            if ((int)actualReg == (int)ICorDebugInfo::REGNUM_AMBIENT_SP)
+            {
+                actualReg = REG_SPBASE;
+            }
+            return ((actualReg == reg) && (vlStk.vlsOffset == offset));
+        case VLT_STK2:
+            actualReg = vlStk2.vls2BaseReg;
+            if ((int)actualReg == (int)ICorDebugInfo::REGNUM_AMBIENT_SP)
+            {
+                actualReg = REG_SPBASE;
+            }
+            return ((actualReg == reg) && ((vlStk2.vls2Offset == offset) || (vlStk2.vls2Offset == (offset - 4))));
+
+        case VLT_REG:
+        case VLT_REG_FP:
+        case VLT_REG_REG:
+        case VLT_FPSTK:
+            return false;
+
+        default:
+            assert(!"Bad locType");
+            return false;
+    }
+}
+
+/*============================================================================
+ *
+ *              Implementation for ScopeInfo
+ *
+ *
+ * Whenever a variable comes into scope, add it to the list.
+ * When a varDsc goes dead, end its previous scope entry, and make a new one
+ * which is unavailable.
+ * When a varDsc goes live, end its previous un-available entry (if any) and
+ * set its new entry as available.
+ *
+ *============================================================================
+ */
+
+/*****************************************************************************
+ *                      siNewScope
+ *
+ * Creates a new scope and adds it to the Open scope list.
+ */
+
+CodeGen::siScope* CodeGen::siNewScope(unsigned LVnum, unsigned varNum)
+{
+    bool     tracked  = compiler->lvaTable[varNum].lvTracked;
+    unsigned varIndex = compiler->lvaTable[varNum].lvVarIndex;
+
+    if (tracked)
+    {
+        siEndTrackedScope(varIndex);
+    }
+
+    siScope* newScope = (siScope*)compiler->compGetMem(sizeof(*newScope), CMK_SiScope);
+
+    newScope->scStartLoc.CaptureLocation(getEmitter());
+    assert(newScope->scStartLoc.Valid());
+
+    newScope->scEndLoc.Init();
+
+    newScope->scLVnum      = LVnum;
+    newScope->scVarNum     = varNum;
+    newScope->scNext       = nullptr;
+    newScope->scStackLevel = genStackLevel; // used only by stack vars
+
+    siOpenScopeLast->scNext = newScope;
+    newScope->scPrev        = siOpenScopeLast;
+    siOpenScopeLast         = newScope;
+
+    if (tracked)
+    {
+        siLatestTrackedScopes[varIndex] = newScope;
+    }
+
+    return newScope;
+}
+
+/*****************************************************************************
+ *                          siRemoveFromOpenScopeList
+ *
+ * Removes a scope from the open-scope list and puts it into the done-scope list
+ */
+
+void CodeGen::siRemoveFromOpenScopeList(CodeGen::siScope* scope)
+{
+    assert(scope);
+    assert(scope->scEndLoc.Valid());
+
+    // Remove from open-scope list
+
+    scope->scPrev->scNext = scope->scNext;
+    if (scope->scNext)
+    {
+        scope->scNext->scPrev = scope->scPrev;
+    }
+    else
+    {
+        siOpenScopeLast = scope->scPrev;
+    }
+
+    // Add to the finished scope list. (Try to) filter out scopes of length 0.
+
+    if (scope->scStartLoc != scope->scEndLoc)
+    {
+        siScopeLast->scNext = scope;
+        siScopeLast         = scope;
+        siScopeCnt++;
+    }
+}
+
+/*----------------------------------------------------------------------------
+ * These functions end scopes given different types of parameters
+ *----------------------------------------------------------------------------
+ */
+
+/*****************************************************************************
+ * For tracked vars, we don't need to search for the scope in the list as we
+ * have a pointer to the open scopes of all tracked variables.
+ */
+
+void CodeGen::siEndTrackedScope(unsigned varIndex)
+{
+    siScope* scope = siLatestTrackedScopes[varIndex];
+    if (!scope)
+    {
+        return;
+    }
+
+    scope->scEndLoc.CaptureLocation(getEmitter());
+    assert(scope->scEndLoc.Valid());
+
+    siRemoveFromOpenScopeList(scope);
+
+    siLatestTrackedScopes[varIndex] = nullptr;
+}
+
+/*****************************************************************************
+ * If we don't know that the variable is tracked, this function handles both
+ * cases.
+ */
+
+void CodeGen::siEndScope(unsigned varNum)
+{
+    for (siScope* scope = siOpenScopeList.scNext; scope; scope = scope->scNext)
+    {
+        if (scope->scVarNum == varNum)
+        {
+            siEndScope(scope);
+            return;
+        }
+    }
+
+    // At this point, we probably have a bad LocalVarTab
+
+    if (compiler->opts.compDbgCode)
+    {
+        // LocalVarTab is good?? If we reached here implies that we are in a
+        // bad state, so pretend that we don't have any scope info.
+        assert(!siVerifyLocalVarTab());
+
+        compiler->opts.compScopeInfo = false;
+    }
+}
+
+/*****************************************************************************
+ * If we have a handle to the siScope structure, we handle ending this scope
+ * differently than if we just had a variable number. This saves us searching
+ * the open-scope list again.
+ */
+
+void CodeGen::siEndScope(siScope* scope)
+{
+    scope->scEndLoc.CaptureLocation(getEmitter());
+    assert(scope->scEndLoc.Valid());
+
+    siRemoveFromOpenScopeList(scope);
+
+    LclVarDsc& lclVarDsc1 = compiler->lvaTable[scope->scVarNum];
+    if (lclVarDsc1.lvTracked)
+    {
+        siLatestTrackedScopes[lclVarDsc1.lvVarIndex] = nullptr;
+    }
+}
+
+/*****************************************************************************
+ *                      siVerifyLocalVarTab
+ *
+ * Checks the LocalVarTab for consistency. The VM may not have properly
+ * verified the LocalVariableTable.
+ */
+
+#ifdef DEBUG
+
+bool CodeGen::siVerifyLocalVarTab()
+{
+    // No entries with overlapping lives should have the same slot.
+
+    for (unsigned i = 0; i < compiler->info.compVarScopesCount; i++)
+    {
+        for (unsigned j = i + 1; j < compiler->info.compVarScopesCount; j++)
+        {
+            unsigned slot1 = compiler->info.compVarScopes[i].vsdVarNum;
+            unsigned beg1  = compiler->info.compVarScopes[i].vsdLifeBeg;
+            unsigned end1  = compiler->info.compVarScopes[i].vsdLifeEnd;
+
+            unsigned slot2 = compiler->info.compVarScopes[j].vsdVarNum;
+            unsigned beg2  = compiler->info.compVarScopes[j].vsdLifeBeg;
+            unsigned end2  = compiler->info.compVarScopes[j].vsdLifeEnd;
+
+            if (slot1 == slot2 && (end1 > beg2 && beg1 < end2))
+            {
+                return false;
+            }
+        }
+    }
+
+    return true;
+}
+
+#endif
+
+/*============================================================================
+ *           INTERFACE (public) Functions for ScopeInfo
+ *============================================================================
+ */
+
+void CodeGen::siInit()
+{
+#ifdef _TARGET_X86_
+    assert((unsigned)ICorDebugInfo::REGNUM_EAX == REG_EAX);
+    assert((unsigned)ICorDebugInfo::REGNUM_ECX == REG_ECX);
+    assert((unsigned)ICorDebugInfo::REGNUM_EDX == REG_EDX);
+    assert((unsigned)ICorDebugInfo::REGNUM_EBX == REG_EBX);
+    assert((unsigned)ICorDebugInfo::REGNUM_ESP == REG_ESP);
+    assert((unsigned)ICorDebugInfo::REGNUM_EBP == REG_EBP);
+    assert((unsigned)ICorDebugInfo::REGNUM_ESI == REG_ESI);
+    assert((unsigned)ICorDebugInfo::REGNUM_EDI == REG_EDI);
+#endif
+
+    assert((unsigned)ICorDebugInfo::VLT_REG == Compiler::VLT_REG);
+    assert((unsigned)ICorDebugInfo::VLT_STK == Compiler::VLT_STK);
+    assert((unsigned)ICorDebugInfo::VLT_REG_REG == Compiler::VLT_REG_REG);
+    assert((unsigned)ICorDebugInfo::VLT_REG_STK == Compiler::VLT_REG_STK);
+    assert((unsigned)ICorDebugInfo::VLT_STK_REG == Compiler::VLT_STK_REG);
+    assert((unsigned)ICorDebugInfo::VLT_STK2 == Compiler::VLT_STK2);
+    assert((unsigned)ICorDebugInfo::VLT_FPSTK == Compiler::VLT_FPSTK);
+    assert((unsigned)ICorDebugInfo::VLT_FIXED_VA == Compiler::VLT_FIXED_VA);
+    assert((unsigned)ICorDebugInfo::VLT_COUNT == Compiler::VLT_COUNT);
+    assert((unsigned)ICorDebugInfo::VLT_INVALID == Compiler::VLT_INVALID);
+
+    /* ICorDebugInfo::VarLoc and siVarLoc should overlap exactly as we cast
+     * one to the other in eeSetLVinfo()
+     * Below is a "required but not sufficient" condition
+     */
+
+    assert(sizeof(ICorDebugInfo::VarLoc) == sizeof(Compiler::siVarLoc));
+
+    assert(compiler->opts.compScopeInfo);
+
+    siOpenScopeList.scNext = nullptr;
+    siOpenScopeLast        = &siOpenScopeList;
+    siScopeLast            = &siScopeList;
+
+    siScopeCnt = 0;
+
+    VarSetOps::AssignNoCopy(compiler, siLastLife, VarSetOps::MakeEmpty(compiler));
+    siLastEndOffs = 0;
+
+    if (compiler->info.compVarScopesCount == 0)
+    {
+        return;
+    }
+
+#if FEATURE_EH_FUNCLETS
+    siInFuncletRegion = false;
+#endif // FEATURE_EH_FUNCLETS
+
+    for (unsigned i = 0; i < lclMAX_TRACKED; i++)
+    {
+        siLatestTrackedScopes[i] = nullptr;
+    }
+
+    compiler->compResetScopeLists();
+}
+
+/*****************************************************************************
+ *                          siBeginBlock
+ *
+ * Called at the beginning of code-gen for a block. Checks if any scopes
+ * need to be opened.
+ */
+
+void CodeGen::siBeginBlock(BasicBlock* block)
+{
+    assert(block != nullptr);
+
+    if (!compiler->opts.compScopeInfo)
+    {
+        return;
+    }
+
+    if (compiler->info.compVarScopesCount == 0)
+    {
+        return;
+    }
+
+#if FEATURE_EH_FUNCLETS
+    if (siInFuncletRegion)
+    {
+        return;
+    }
+
+    if (block->bbFlags & BBF_FUNCLET_BEG)
+    {
+        // For now, don't report any scopes in funclets. JIT64 doesn't.
+        siInFuncletRegion = true;
+
+        JITDUMP("Scope info: found beginning of funclet region at block BB%02u; ignoring following blocks\n",
+                block->bbNum);
+
+        return;
+    }
+#endif // FEATURE_EH_FUNCLETS
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nScope info: begin block BB%02u, IL range ", block->bbNum);
+        block->dspBlockILRange();
+        printf("\n");
+    }
+#endif // DEBUG
+
+    unsigned beginOffs = block->bbCodeOffs;
+
+    if (beginOffs == BAD_IL_OFFSET)
+    {
+        JITDUMP("Scope info: ignoring block beginning\n");
+        return;
+    }
+
+    if (!compiler->opts.compDbgCode)
+    {
+        /* For non-debuggable code */
+
+        // End scope of variables which are not live for this block
+
+        siUpdate();
+
+        // Check that vars which are live on entry have an open scope
+
+        VARSET_ITER_INIT(compiler, iter, block->bbLiveIn, i);
+        while (iter.NextElem(compiler, &i))
+        {
+            unsigned varNum = compiler->lvaTrackedToVarNum[i];
+            // lvRefCnt may go down to 0 after liveness-analysis.
+            // So we need to check if this tracked variable is actually used.
+            if (!compiler->lvaTable[varNum].lvIsInReg() && !compiler->lvaTable[varNum].lvOnFrame)
+            {
+                assert(compiler->lvaTable[varNum].lvRefCnt == 0);
+                continue;
+            }
+
+            siCheckVarScope(varNum, beginOffs);
+        }
+    }
+    else
+    {
+        // For debuggable code, scopes can begin only on block boundaries.
+        // Check if there are any scopes on the current block's start boundary.
+
+        VarScopeDsc* varScope;
+
+#if FEATURE_EH_FUNCLETS
+
+        // If we find a spot where the code offset isn't what we expect, because
+        // there is a gap, it might be because we've moved the funclets out of
+        // line. Catch up with the enter and exit scopes of the current block.
+        // Ignore the enter/exit scope changes of the missing scopes, which for
+        // funclets must be matched.
+
+        if (siLastEndOffs != beginOffs)
+        {
+            assert(beginOffs > 0);
+            assert(siLastEndOffs < beginOffs);
+
+            JITDUMP("Scope info: found offset hole. lastOffs=%u, currOffs=%u\n", siLastEndOffs, beginOffs);
+
+            // Skip enter scopes
+            while ((varScope = compiler->compGetNextEnterScope(beginOffs - 1, true)) != nullptr)
+            {
+                /* do nothing */
+                JITDUMP("Scope info: skipping enter scope, LVnum=%u\n", varScope->vsdLVnum);
+            }
+
+            // Skip exit scopes
+            while ((varScope = compiler->compGetNextExitScope(beginOffs - 1, true)) != nullptr)
+            {
+                /* do nothing */
+                JITDUMP("Scope info: skipping exit scope, LVnum=%u\n", varScope->vsdLVnum);
+            }
+        }
+
+#else // FEATURE_EH_FUNCLETS
+
+        if (siLastEndOffs != beginOffs)
+        {
+            assert(siLastEndOffs < beginOffs);
+            return;
+        }
+
+#endif // FEATURE_EH_FUNCLETS
+
+        while ((varScope = compiler->compGetNextEnterScope(beginOffs)) != nullptr)
+        {
+            // brace-matching editor workaround for following line: (
+            JITDUMP("Scope info: opening scope, LVnum=%u [%03X..%03X)\n", varScope->vsdLVnum, varScope->vsdLifeBeg,
+                    varScope->vsdLifeEnd);
+
+            siNewScope(varScope->vsdLVnum, varScope->vsdVarNum);
+
+#ifdef DEBUG
+            LclVarDsc* lclVarDsc1 = &compiler->lvaTable[varScope->vsdVarNum];
+            if (VERBOSE)
+            {
+                printf("Scope info: >> new scope, VarNum=%u, tracked? %s, VarIndex=%u, bbLiveIn=%s ",
+                       varScope->vsdVarNum, lclVarDsc1->lvTracked ? "yes" : "no", lclVarDsc1->lvVarIndex,
+                       VarSetOps::ToString(compiler, block->bbLiveIn));
+                dumpConvertedVarSet(compiler, block->bbLiveIn);
+                printf("\n");
+            }
+            assert(!lclVarDsc1->lvTracked || VarSetOps::IsMember(compiler, block->bbLiveIn, lclVarDsc1->lvVarIndex));
+#endif // DEBUG
+        }
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        siDispOpenScopes();
+    }
+#endif
+}
+
+/*****************************************************************************
+ *                          siEndBlock
+ *
+ * Called at the end of code-gen for a block. Any closing scopes are marked
+ * as such. Note that if we are collecting LocalVar info, scopes can
+ * only begin or end at block boundaries for debuggable code.
+ */
+
+void CodeGen::siEndBlock(BasicBlock* block)
+{
+    assert(compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0));
+
+#if FEATURE_EH_FUNCLETS
+    if (siInFuncletRegion)
+    {
+        return;
+    }
+#endif // FEATURE_EH_FUNCLETS
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("\nScope info: end block BB%02u, IL range ", block->bbNum);
+        block->dspBlockILRange();
+        printf("\n");
+    }
+#endif // DEBUG
+
+    unsigned endOffs = block->bbCodeOffsEnd;
+
+    if (endOffs == BAD_IL_OFFSET)
+    {
+        JITDUMP("Scope info: ignoring block end\n");
+        return;
+    }
+
+    // If non-debuggable code, find all scopes which end over this block
+    // and close them. For debuggable code, scopes will only end on block
+    // boundaries.
+
+    VarScopeDsc* varScope;
+    while ((varScope = compiler->compGetNextExitScope(endOffs, !compiler->opts.compDbgCode)) != nullptr)
+    {
+        // brace-matching editor workaround for following line: (
+        JITDUMP("Scope info: ending scope, LVnum=%u [%03X..%03X)\n", varScope->vsdLVnum, varScope->vsdLifeBeg,
+                varScope->vsdLifeEnd);
+
+        unsigned   varNum     = varScope->vsdVarNum;
+        LclVarDsc* lclVarDsc1 = &compiler->lvaTable[varNum];
+
+        assert(lclVarDsc1);
+
+        if (lclVarDsc1->lvTracked)
+        {
+            siEndTrackedScope(lclVarDsc1->lvVarIndex);
+        }
+        else
+        {
+            siEndScope(varNum);
+        }
+    }
+
+    siLastEndOffs = endOffs;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        siDispOpenScopes();
+    }
+#endif
+}
+
+/*****************************************************************************
+ *                          siUpdate
+ *
+ * Called at the start of basic blocks, and during code-gen of a block,
+ * for non-debuggable code, whenever the life of any tracked variable changes
+ * and the appropriate code has been generated. For debuggable code, variables are
+ * live over their entire scope, and so they go live or dead only on
+ * block boundaries.
+ */
+void CodeGen::siUpdate()
+{
+    if (!compiler->opts.compScopeInfo)
+    {
+        return;
+    }
+
+    if (compiler->opts.compDbgCode)
+    {
+        return;
+    }
+
+    if (compiler->info.compVarScopesCount == 0)
+    {
+        return;
+    }
+
+#if FEATURE_EH_FUNCLETS
+    if (siInFuncletRegion)
+    {
+        return;
+    }
+#endif // FEATURE_EH_FUNCLETS
+
+    VARSET_TP VARSET_INIT_NOCOPY(killed, VarSetOps::Diff(compiler, siLastLife, compiler->compCurLife));
+    assert(VarSetOps::IsSubset(compiler, killed, compiler->lvaTrackedVars));
+
+    VARSET_ITER_INIT(compiler, iter, killed, i);
+    while (iter.NextElem(compiler, &i))
+    {
+#ifdef DEBUG
+        unsigned   lclNum = compiler->lvaTrackedToVarNum[i];
+        LclVarDsc* lclVar = &compiler->lvaTable[lclNum];
+        assert(lclVar->lvTracked);
+#endif
+
+        siScope* scope = siLatestTrackedScopes[i];
+        siEndTrackedScope(i);
+    }
+
+    VarSetOps::Assign(compiler, siLastLife, compiler->compCurLife);
+}
+
+/*****************************************************************************
+ *  In optimized code, we may not have access to gtLclVar.gtLclILoffs.
+ *  So there may be ambiguity as to which entry in compiler->info.compVarScopes
+ *  to use. We search the entire table and find the entry whose life
+ *  begins closest to the given offset.
+ */
+
+/*****************************************************************************
+ *                          siCheckVarScope
+ *
+ * For non-debuggable code, whenever we come across a GenTree which is an
+ * assignment to a local variable, this function is called to check if the
+ * variable has an open scope. Also, check if it has the correct LVnum.
+ */
+
+void CodeGen::siCheckVarScope(unsigned varNum, IL_OFFSET offs)
+{
+    assert(compiler->opts.compScopeInfo && !compiler->opts.compDbgCode && (compiler->info.compVarScopesCount > 0));
+
+#if FEATURE_EH_FUNCLETS
+    if (siInFuncletRegion)
+    {
+        return;
+    }
+#endif // FEATURE_EH_FUNCLETS
+
+    if (offs == BAD_IL_OFFSET)
+    {
+        return;
+    }
+
+    siScope*   scope;
+    LclVarDsc* lclVarDsc1 = &compiler->lvaTable[varNum];
+
+    // If there is an open scope corresponding to varNum, find it
+
+    if (lclVarDsc1->lvTracked)
+    {
+        scope = siLatestTrackedScopes[lclVarDsc1->lvVarIndex];
+    }
+    else
+    {
+        for (scope = siOpenScopeList.scNext; scope; scope = scope->scNext)
+        {
+            if (scope->scVarNum == varNum)
+            {
+                break;
+            }
+        }
+    }
+
+    // Look up the compiler->info.compVarScopes[] to find the local var info for (varNum->lvSlotNum, offs)
+    VarScopeDsc* varScope = compiler->compFindLocalVar(varNum, offs);
+    if (varScope == nullptr)
+    {
+        return;
+    }
+
+    // If the currently open scope does not have the correct LVnum, close it
+    // and create a new scope with this new LVnum
+
+    if (scope)
+    {
+        if (scope->scLVnum != varScope->vsdLVnum)
+        {
+            siEndScope(scope);
+            siNewScope(varScope->vsdLVnum, varScope->vsdVarNum);
+        }
+    }
+    else
+    {
+        siNewScope(varScope->vsdLVnum, varScope->vsdVarNum);
+    }
+}
+
+/*****************************************************************************
+ *                          siCloseAllOpenScopes
+ *
+ * For unreachable code, or optimized code with blocks reordered, there may be
+ * scopes left open at the end. Simply close them.
+ */
+
+void CodeGen::siCloseAllOpenScopes()
+{
+    assert(siOpenScopeList.scNext);
+
+    while (siOpenScopeList.scNext)
+    {
+        siEndScope(siOpenScopeList.scNext);
+    }
+}
+
+/*****************************************************************************
+ *                          siDispOpenScopes
+ *
+ * Displays all the vars on the open-scope list
+ */
+
+#ifdef DEBUG
+
+void CodeGen::siDispOpenScopes()
+{
+    assert(compiler->opts.compScopeInfo && (compiler->info.compVarScopesCount > 0));
+
+    printf("Scope info: open scopes =\n");
+
+    if (siOpenScopeList.scNext == nullptr)
+    {
+        printf("   <none>\n");
+    }
+    else
+    {
+        for (siScope* scope = siOpenScopeList.scNext; scope != nullptr; scope = scope->scNext)
+        {
+            VarScopeDsc* localVars = compiler->info.compVarScopes;
+
+            for (unsigned i = 0; i < compiler->info.compVarScopesCount; i++, localVars++)
+            {
+                if (localVars->vsdLVnum == scope->scLVnum)
+                {
+                    const char* name = compiler->VarNameToStr(localVars->vsdName);
+                    // brace-matching editor workaround for following line: (
+                    printf("   %u (%s) [%03X..%03X)\n", localVars->vsdLVnum, name == nullptr ? "UNKNOWN" : name,
+                           localVars->vsdLifeBeg, localVars->vsdLifeEnd);
+                    break;
+                }
+            }
+        }
+    }
+}
+
+#endif // DEBUG
+
+/*============================================================================
+ *
+ *              Implementation for PrologScopeInfo
+ *
+ *============================================================================
+ */
+
+/*****************************************************************************
+ *                      psiNewPrologScope
+ *
+ * Creates a new scope and adds it to the Open scope list.
+ */
+
+CodeGen::psiScope* CodeGen::psiNewPrologScope(unsigned LVnum, unsigned slotNum)
+{
+    psiScope* newScope = (psiScope*)compiler->compGetMem(sizeof(*newScope), CMK_SiScope);
+
+    newScope->scStartLoc.CaptureLocation(getEmitter());
+    assert(newScope->scStartLoc.Valid());
+
+    newScope->scEndLoc.Init();
+
+    newScope->scLVnum   = LVnum;
+    newScope->scSlotNum = slotNum;
+
+    newScope->scNext         = nullptr;
+    psiOpenScopeLast->scNext = newScope;
+    newScope->scPrev         = psiOpenScopeLast;
+    psiOpenScopeLast         = newScope;
+
+    return newScope;
+}
+
+/*****************************************************************************
+ *                          psiEndPrologScope
+ *
+ * Remove the scope from the Open-scope list and add it to the finished-scopes
+ * list if its length is non-zero
+ */
+
+void CodeGen::psiEndPrologScope(psiScope* scope)
+{
+    scope->scEndLoc.CaptureLocation(getEmitter());
+    assert(scope->scEndLoc.Valid());
+
+    // Remove from open-scope list
+    scope->scPrev->scNext = scope->scNext;
+    if (scope->scNext)
+    {
+        scope->scNext->scPrev = scope->scPrev;
+    }
+    else
+    {
+        psiOpenScopeLast = scope->scPrev;
+    }
+
+    // Add to the finished scope list.
+    // If the length is zero, it means that the prolog is empty. In that case,
+    // CodeGen::genSetScopeInfo will report the liveness of all arguments
+    // as spanning the first instruction in the method, so that they can
+    // at least be inspected on entry to the method.
+    if (scope->scStartLoc != scope->scEndLoc || scope->scStartLoc.IsOffsetZero())
+    {
+        psiScopeLast->scNext = scope;
+        psiScopeLast         = scope;
+        psiScopeCnt++;
+    }
+}
+
+/*============================================================================
+ *           INTERFACE (protected) Functions for PrologScopeInfo
+ *============================================================================
+ */
+
+//------------------------------------------------------------------------
+// psSetScopeOffset: Set the offset of the newScope to the offset of the LslVar
+//
+// Arguments:
+//    'newScope'  the new scope object whose offset is to be set to the lclVarDsc offset.
+//    'lclVarDsc' is an op that will now be contained by its parent.
+//
+//
+void CodeGen::psSetScopeOffset(psiScope* newScope, LclVarDsc* lclVarDsc)
+{
+    newScope->scRegister   = false;
+    newScope->u2.scBaseReg = REG_SPBASE;
+
+#ifdef _TARGET_AMD64_
+    // scOffset = offset from caller SP - REGSIZE_BYTES
+    // TODO-Cleanup - scOffset needs to be understood.  For now just matching with the existing definition.
+    newScope->u2.scOffset =
+        compiler->lvaToCallerSPRelativeOffset(lclVarDsc->lvStkOffs, lclVarDsc->lvFramePointerBased) + REGSIZE_BYTES;
+#else  // !_TARGET_AMD64_
+    if (doubleAlignOrFramePointerUsed())
+    {
+        // REGSIZE_BYTES - for the pushed value of EBP
+        newScope->u2.scOffset = lclVarDsc->lvStkOffs - REGSIZE_BYTES;
+    }
+    else
+    {
+        newScope->u2.scOffset = lclVarDsc->lvStkOffs - genTotalFrameSize();
+    }
+#endif // !_TARGET_AMD64_
+}
+
+/*============================================================================
+*           INTERFACE (public) Functions for PrologScopeInfo
+*============================================================================
+*/
+
+/*****************************************************************************
+ *                          psiBegProlog
+ *
+ * Initializes the PrologScopeInfo, and creates open scopes for all the
+ * parameters of the method.
+ */
+
+void CodeGen::psiBegProlog()
+{
+    assert(compiler->compGeneratingProlog);
+
+    VarScopeDsc* varScope;
+
+    psiOpenScopeList.scNext = nullptr;
+    psiOpenScopeLast        = &psiOpenScopeList;
+    psiScopeLast            = &psiScopeList;
+    psiScopeCnt             = 0;
+
+    compiler->compResetScopeLists();
+
+    while ((varScope = compiler->compGetNextEnterScope(0)) != nullptr)
+    {
+        LclVarDsc* lclVarDsc1 = &compiler->lvaTable[varScope->vsdVarNum];
+
+        if (!lclVarDsc1->lvIsParam)
+        {
+            continue;
+        }
+
+        psiScope* newScope = psiNewPrologScope(varScope->vsdLVnum, varScope->vsdVarNum);
+
+        if (lclVarDsc1->lvIsRegArg)
+        {
+            bool isStructHandled = false;
+#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+            SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
+            if (varTypeIsStruct(lclVarDsc1))
+            {
+                CORINFO_CLASS_HANDLE typeHnd = lclVarDsc1->lvVerTypeInfo.GetClassHandle();
+                assert(typeHnd != nullptr);
+                compiler->eeGetSystemVAmd64PassStructInRegisterDescriptor(typeHnd, &structDesc);
+                if (structDesc.passedInRegisters)
+                {
+                    regNumber regNum      = REG_NA;
+                    regNumber otherRegNum = REG_NA;
+                    for (unsigned nCnt = 0; nCnt < structDesc.eightByteCount; nCnt++)
+                    {
+                        unsigned  len     = structDesc.eightByteSizes[nCnt];
+                        var_types regType = TYP_UNDEF;
+
+                        if (nCnt == 0)
+                        {
+                            regNum = lclVarDsc1->lvArgReg;
+                        }
+                        else if (nCnt == 1)
+                        {
+                            otherRegNum = lclVarDsc1->lvOtherArgReg;
+                        }
+                        else
+                        {
+                            assert(false && "Invalid eightbyte number.");
+                        }
+
+                        regType = compiler->GetEightByteType(structDesc, nCnt);
+#ifdef DEBUG
+                        regType = compiler->mangleVarArgsType(regType);
+                        assert(genMapRegNumToRegArgNum((nCnt == 0 ? regNum : otherRegNum), regType) != (unsigned)-1);
+#endif // DEBUG
+                    }
+
+                    newScope->scRegister    = true;
+                    newScope->u1.scRegNum   = (regNumberSmall)regNum;
+                    newScope->u1.scOtherReg = (regNumberSmall)otherRegNum;
+                }
+                else
+                {
+                    // Stack passed argument. Get the offset from the  caller's frame.
+                    psSetScopeOffset(newScope, lclVarDsc1);
+                }
+
+                isStructHandled = true;
+            }
+#endif // !defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
+            if (!isStructHandled)
+            {
+#ifdef DEBUG
+                var_types regType = compiler->mangleVarArgsType(lclVarDsc1->TypeGet());
+                if (lclVarDsc1->lvIsHfaRegArg())
+                {
+                    regType = lclVarDsc1->GetHfaType();
+                }
+                assert(genMapRegNumToRegArgNum(lclVarDsc1->lvArgReg, regType) != (unsigned)-1);
+#endif // DEBUG
+
+                newScope->scRegister  = true;
+                newScope->u1.scRegNum = (regNumberSmall)lclVarDsc1->lvArgReg;
+            }
+        }
+        else
+        {
+            psSetScopeOffset(newScope, lclVarDsc1);
+        }
+    }
+}
+
+/*****************************************************************************
+ Enable this macro to get accurate prolog information for every instruction
+ in the prolog. However, this is overkill as nobody steps through the
+ disassembly of the prolog. Even if they do they will not expect rich debug info.
+
+ We still report all the arguments at the very start of the method so that
+ the user can see the arguments at the very start of the method (offset=0).
+
+ Disabling this decreased the debug maps in mscorlib by 10% (01/2003)
+ */
+
+#if 0
+#define ACCURATE_PROLOG_DEBUG_INFO
+#endif
+
+/*****************************************************************************
+ *                          psiAdjustStackLevel
+ *
+ * When ESP changes, all scopes relative to ESP have to be updated.
+ */
+
+void CodeGen::psiAdjustStackLevel(unsigned size)
+{
+#ifdef DEBUGGING_SUPPORT
+    if (!compiler->opts.compScopeInfo || (compiler->info.compVarScopesCount == 0))
+    {
+        return;
+    }
+
+    assert(compiler->compGeneratingProlog);
+
+#ifdef ACCURATE_PROLOG_DEBUG_INFO
+
+    psiScope* scope;
+
+    // walk the list backwards
+    // Works as psiEndPrologScope does not change scPrev
+    for (scope = psiOpenScopeLast; scope != &psiOpenScopeList; scope = scope->scPrev)
+    {
+        if (scope->scRegister)
+        {
+            assert(compiler->lvaTable[scope->scSlotNum].lvIsRegArg);
+            continue;
+        }
+        assert(scope->u2.scBaseReg == REG_SPBASE);
+
+        psiScope* newScope     = psiNewPrologScope(scope->scLVnum, scope->scSlotNum);
+        newScope->scRegister   = false;
+        newScope->u2.scBaseReg = REG_SPBASE;
+        newScope->u2.scOffset  = scope->u2.scOffset + size;
+
+        psiEndPrologScope(scope);
+    }
+
+#endif // ACCURATE_PROLOG_DEBUG_INFO
+#endif // DEBUGGING_SUPPORT
+}
+
+/*****************************************************************************
+ *                          psiMoveESPtoEBP
+ *
+ * For EBP-frames, the parameters are accessed via ESP on entry to the function,
+ * but via EBP right after a "mov ebp,esp" instruction
+ */
+
+void CodeGen::psiMoveESPtoEBP()
+{
+#ifdef DEBUGGING_SUPPORT
+    if (!compiler->opts.compScopeInfo || (compiler->info.compVarScopesCount == 0))
+    {
+        return;
+    }
+
+    assert(compiler->compGeneratingProlog);
+    assert(doubleAlignOrFramePointerUsed());
+
+#ifdef ACCURATE_PROLOG_DEBUG_INFO
+
+    psiScope* scope;
+
+    // walk the list backwards
+    // Works as psiEndPrologScope does not change scPrev
+    for (scope = psiOpenScopeLast; scope != &psiOpenScopeList; scope = scope->scPrev)
+    {
+        if (scope->scRegister)
+        {
+            assert(compiler->lvaTable[scope->scSlotNum].lvIsRegArg);
+            continue;
+        }
+        assert(scope->u2.scBaseReg == REG_SPBASE);
+
+        psiScope* newScope     = psiNewPrologScope(scope->scLVnum, scope->scSlotNum);
+        newScope->scRegister   = false;
+        newScope->u2.scBaseReg = REG_FPBASE;
+        newScope->u2.scOffset  = scope->u2.scOffset;
+
+        psiEndPrologScope(scope);
+    }
+
+#endif // ACCURATE_PROLOG_DEBUG_INFO
+#endif // DEBUGGING_SUPPORT
+}
+
+/*****************************************************************************
+ *                          psiMoveToReg
+ *
+ * Called when a parameter is loaded into its assigned register from the stack,
+ * or when parameters are moved around due to circular dependancy.
+ * If reg != REG_NA, then the parameter is being moved into its assigned
+ * register, else it may be being moved to a temp register.
+ */
+
+void CodeGen::psiMoveToReg(unsigned varNum, regNumber reg, regNumber otherReg)
+{
+#ifdef DEBUGGING_SUPPORT
+    assert(compiler->compGeneratingProlog);
+
+    if (!compiler->opts.compScopeInfo)
+    {
+        return;
+    }
+
+    if (compiler->info.compVarScopesCount == 0)
+    {
+        return;
+    }
+
+    assert((int)varNum >= 0); // It's not a spill temp number.
+    assert(compiler->lvaTable[varNum].lvIsInReg());
+
+#ifdef ACCURATE_PROLOG_DEBUG_INFO
+
+    /* If reg!=REG_NA, the parameter is part of a cirular dependancy, and is
+     * being moved through temp register "reg".
+     * If reg==REG_NA, it is being moved to its assigned register.
+     */
+    if (reg == REG_NA)
+    {
+        // Grab the assigned registers.
+
+        reg      = compiler->lvaTable[varNum].lvRegNum;
+        otherReg = compiler->lvaTable[varNum].lvOtherReg;
+    }
+
+    psiScope* scope;
+
+    // walk the list backwards
+    // Works as psiEndPrologScope does not change scPrev
+    for (scope = psiOpenScopeLast; scope != &psiOpenScopeList; scope = scope->scPrev)
+    {
+        if (scope->scSlotNum != compiler->lvaTable[varNum].lvSlotNum)
+            continue;
+
+        psiScope* newScope      = psiNewPrologScope(scope->scLVnum, scope->scSlotNum);
+        newScope->scRegister    = true;
+        newScope->u1.scRegNum   = reg;
+        newScope->u1.scOtherReg = otherReg;
+
+        psiEndPrologScope(scope);
+        return;
+    }
+
+    // May happen if a parameter does not have an entry in the LocalVarTab
+    // But assert() just in case it is because of something else.
+    assert(varNum == compiler->info.compRetBuffArg ||
+           !"Parameter scope not found (Assert doesnt always indicate error)");
+
+#endif // ACCURATE_PROLOG_DEBUG_INFO
+#endif // DEBUGGING_SUPPORT
+}
+
+/*****************************************************************************
+ *                      CodeGen::psiMoveToStack
+ *
+ * A incoming register-argument is being moved to its final home on the stack
+ * (ie. all adjustements to {F/S}PBASE have been made
+ */
+
+void CodeGen::psiMoveToStack(unsigned varNum)
+{
+#ifdef DEBUGGING_SUPPORT
+    if (!compiler->opts.compScopeInfo || (compiler->info.compVarScopesCount == 0))
+    {
+        return;
+    }
+
+    assert(compiler->compGeneratingProlog);
+    assert(compiler->lvaTable[varNum].lvIsRegArg);
+    assert(!compiler->lvaTable[varNum].lvRegister);
+
+#ifdef ACCURATE_PROLOG_DEBUG_INFO
+
+    psiScope* scope;
+
+    // walk the list backwards
+    // Works as psiEndPrologScope does not change scPrev
+    for (scope = psiOpenScopeLast; scope != &psiOpenScopeList; scope = scope->scPrev)
+    {
+        if (scope->scSlotNum != compiler->lvaTable[varNum].lvSlotNum)
+            continue;
+
+        /* The param must be currently sitting in the register in which it
+           was passed in */
+        assert(scope->scRegister);
+        assert(scope->u1.scRegNum == compiler->lvaTable[varNum].lvArgReg);
+
+        psiScope* newScope     = psiNewPrologScope(scope->scLVnum, scope->scSlotNum);
+        newScope->scRegister   = false;
+        newScope->u2.scBaseReg = (compiler->lvaTable[varNum].lvFramePointerBased) ? REG_FPBASE : REG_SPBASE;
+        newScope->u2.scOffset  = compiler->lvaTable[varNum].lvStkOffs;
+
+        psiEndPrologScope(scope);
+        return;
+    }
+
+    // May happen if a parameter does not have an entry in the LocalVarTab
+    // But assert() just in case it is because of something else.
+    assert(varNum == compiler->info.compRetBuffArg ||
+           !"Parameter scope not found (Assert doesnt always indicate error)");
+
+#endif // ACCURATE_PROLOG_DEBUG_INFO
+#endif // DEBUGGING_SUPPORT
+}
+
+/*****************************************************************************
+ *                          psiEndProlog
+ */
+
+void CodeGen::psiEndProlog()
+{
+    assert(compiler->compGeneratingProlog);
+    psiScope* scope;
+
+    for (scope = psiOpenScopeList.scNext; scope; scope = psiOpenScopeList.scNext)
+    {
+        psiEndPrologScope(scope);
+    }
+}
+
+/*****************************************************************************/
+#endif // DEBUGGING_SUPPORT
+/*****************************************************************************/
diff --git a/src/jit/sharedfloat.cpp b/src/jit/sharedfloat.cpp
new file mode 100644
index 0000000000..0dbbac4862
--- /dev/null
+++ b/src/jit/sharedfloat.cpp
@@ -0,0 +1,498 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// NOTE: The code in this file is only used for LEGACY_BACKEND compiles.
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "compiler.h"
+#include "emit.h"
+#include "codegen.h"
+
+#ifdef LEGACY_BACKEND
+
+#if FEATURE_STACK_FP_X87
+regMaskTP RegSet::rsGetMaskUsed()
+{
+    return rsMaskUsedFloat;
+}
+regMaskTP RegSet::rsGetMaskVars()
+{
+    return rsMaskRegVarFloat;
+}
+regMaskTP RegSet::rsGetMaskLock()
+{
+    return rsMaskLockedFloat;
+}
+regMaskTP RegSet::rsGetMaskMult()
+{
+    return 0;
+}
+
+void RegSet::rsSetMaskUsed(regMaskTP maskUsed)
+{
+    rsMaskUsedFloat = maskUsed;
+}
+void RegSet::rsSetMaskVars(regMaskTP maskVars)
+{
+    rsMaskRegVarFloat = maskVars;
+}
+void RegSet::rsSetMaskLock(regMaskTP maskLock)
+{
+    rsMaskLockedFloat = maskLock;
+}
+
+void RegSet::rsSetUsedTree(regNumber regNum, GenTreePtr tree)
+{
+    assert(genUsedRegsFloat[regNum] == 0);
+    genUsedRegsFloat[regNum] = tree;
+}
+void RegSet::rsFreeUsedTree(regNumber regNum, GenTreePtr tree)
+{
+    assert(genUsedRegsFloat[regNum] == tree);
+    genUsedRegsFloat[regNum] = 0;
+}
+
+#else  // !FEATURE_STACK_FP_X87
+regMaskTP RegSet::rsGetMaskUsed()
+{
+    return rsMaskUsed;
+}
+regMaskTP RegSet::rsGetMaskVars()
+{
+    return rsMaskVars;
+}
+regMaskTP RegSet::rsGetMaskLock()
+{
+    return rsMaskLock;
+}
+regMaskTP RegSet::rsGetMaskMult()
+{
+    return rsMaskMult;
+}
+
+void RegSet::rsSetMaskUsed(regMaskTP maskUsed)
+{
+    rsMaskUsed = maskUsed;
+}
+void RegSet::rsSetMaskVars(regMaskTP maskVars)
+{
+    rsMaskVars = maskVars;
+}
+void RegSet::rsSetMaskLock(regMaskTP maskLock)
+{
+    rsMaskLock = maskLock;
+}
+
+void RegSet::rsSetUsedTree(regNumber regNum, GenTreePtr tree)
+{
+    assert(rsUsedTree[regNum] == 0);
+    rsUsedTree[regNum] = tree;
+}
+void RegSet::rsFreeUsedTree(regNumber regNum, GenTreePtr tree)
+{
+    assert(rsUsedTree[regNum] == tree);
+    rsUsedTree[regNum] = 0;
+}
+#endif // !FEATURE_STACK_FP_X87
+
+// float stress mode. Will lock out registers to stress high register pressure.
+// This implies setting interferences in register allocator and pushing regs in
+// the prolog and popping them before a ret.
+#ifdef DEBUG
+int CodeGenInterface::genStressFloat()
+{
+    return compiler->compStressCompile(Compiler::STRESS_FLATFP, 40) ? 1 : JitConfig.JitStressFP();
+}
+#endif
+
+regMaskTP RegSet::RegFreeFloat()
+{
+    regMaskTP mask = RBM_ALLFLOAT;
+#if FEATURE_FP_REGALLOC
+    mask &= m_rsCompiler->raConfigRestrictMaskFP();
+#endif
+
+    mask &= ~rsGetMaskUsed();
+    mask &= ~rsGetMaskLock();
+    mask &= ~rsGetMaskVars();
+
+#ifdef DEBUG
+    if (m_rsCompiler->codeGen->genStressFloat())
+    {
+        mask &= ~(m_rsCompiler->codeGen->genStressLockedMaskFloat());
+    }
+#endif
+    return mask;
+}
+
+#ifdef _TARGET_ARM_
+// order registers are picked
+// go in reverse order to minimize chance of spilling with calls
+static const regNumber pickOrder[] = {REG_F15, REG_F14, REG_F13, REG_F12, REG_F11, REG_F10, REG_F9,  REG_F8,
+                                      REG_F7,  REG_F6,  REG_F5,  REG_F4,  REG_F3,  REG_F2,  REG_F1,  REG_F0,
+
+                                      REG_F16, REG_F17, REG_F18, REG_F19, REG_F20, REG_F21, REG_F22, REG_F23,
+                                      REG_F24, REG_F25, REG_F26, REG_F27, REG_F28, REG_F29, REG_F30, REG_F31};
+
+#elif _TARGET_AMD64_
+// order registers are picked
+static const regNumber pickOrder[] = {REG_XMM0,  REG_XMM1,  REG_XMM2,  REG_XMM3, REG_XMM4,  REG_XMM5,
+                                      REG_XMM6,  REG_XMM7,  REG_XMM8,  REG_XMM9, REG_XMM10, REG_XMM11,
+                                      REG_XMM12, REG_XMM13, REG_XMM14, REG_XMM15};
+
+#elif _TARGET_X86_
+// order registers are picked
+static const regNumber pickOrder[] = {REG_FPV0, REG_FPV1, REG_FPV2, REG_FPV3, REG_FPV4, REG_FPV5, REG_FPV6, REG_FPV7};
+#endif
+
+// picks a reg other than the one specified
+regNumber RegSet::PickRegFloatOtherThan(GenTreePtr tree, var_types type, regNumber reg)
+{
+    return PickRegFloatOtherThan(type, reg);
+}
+
+regNumber RegSet::PickRegFloatOtherThan(var_types type, regNumber reg)
+{
+    RegisterPreference pref(RBM_ALLFLOAT ^ genRegMask(reg), 0);
+    return PickRegFloat(type, &pref);
+}
+
+regNumber RegSet::PickRegFloat(GenTreePtr tree, var_types type, RegisterPreference* pref, bool bUsed)
+{
+    return PickRegFloat(type, pref, bUsed);
+}
+
+regNumber RegSet::PickRegFloat(var_types type, RegisterPreference* pref, bool bUsed)
+{
+    regMaskTP wantedMask;
+    bool      tryBest = true;
+    bool      tryOk   = true;
+    bool      bSpill  = false;
+    regNumber reg     = REG_NA;
+
+    while (tryOk)
+    {
+        if (pref)
+        {
+            if (tryBest)
+            {
+                wantedMask = pref->best;
+                tryBest    = false;
+            }
+            else
+            {
+                assert(tryOk);
+                wantedMask = pref->ok;
+                tryOk      = false;
+            }
+        }
+        else // pref is NULL
+        {
+            wantedMask = RBM_ALLFLOAT;
+            tryBest    = false;
+            tryOk      = false;
+        }
+
+        // better not have asked for a non-fp register
+        assert((wantedMask & ~RBM_ALLFLOAT) == 0);
+
+        regMaskTP availMask = RegFreeFloat();
+        regMaskTP OKmask    = availMask & wantedMask;
+
+        if (OKmask == 0)
+        {
+            if (tryOk)
+            {
+                // the pref->best mask doesn't work so try the pref->ok mask next
+                continue;
+            }
+
+            if (bUsed)
+            {
+                // Allow used registers to be picked
+                OKmask |= rsGetMaskUsed() & ~rsGetMaskLock();
+                bSpill = true;
+            }
+        }
+#if FEATURE_FP_REGALLOC
+        regMaskTP restrictMask = (m_rsCompiler->raConfigRestrictMaskFP() | RBM_FLT_CALLEE_TRASH);
+#endif
+
+        for (unsigned i = 0; i < ArrLen(pickOrder); i++)
+        {
+            regNumber r = pickOrder[i];
+            if (!floatRegCanHoldType(r, type))
+                continue;
+
+            regMaskTP mask = genRegMaskFloat(r, type);
+
+#if FEATURE_FP_REGALLOC
+            if ((mask & restrictMask) != mask)
+                continue;
+#endif
+            if ((OKmask & mask) == mask)
+            {
+                reg = r;
+                goto RET;
+            }
+        }
+
+        if (tryOk)
+        {
+            // We couldn't find a register using tryBest
+            continue;
+        }
+
+        assert(!"Unable to find a free FP virtual register");
+        NO_WAY("FP register allocator was too optimistic!");
+    }
+RET:
+    if (bSpill)
+    {
+        m_rsCompiler->codeGen->SpillFloat(reg);
+    }
+
+#if FEATURE_FP_REGALLOC
+    rsSetRegsModified(genRegMaskFloat(reg, type));
+#endif
+
+    return reg;
+}
+
+void RegSet::SetUsedRegFloat(GenTreePtr tree, bool bValue)
+{
+    /* The value must be sitting in a register */
+    assert(tree);
+    assert(tree->gtFlags & GTF_REG_VAL);
+
+    var_types type = tree->TypeGet();
+#ifdef _TARGET_ARM_
+    if (type == TYP_STRUCT)
+    {
+        assert(m_rsCompiler->IsHfa(tree));
+        type = TYP_FLOAT;
+    }
+#endif
+    regNumber regNum  = tree->gtRegNum;
+    regMaskTP regMask = genRegMaskFloat(regNum, type);
+
+    if (bValue)
+    {
+#ifdef DEBUG
+        if (m_rsCompiler->verbose)
+        {
+            printf("\t\t\t\t\t\t\tThe register %s currently holds ", getRegNameFloat(regNum, type));
+            Compiler::printTreeID(tree);
+            printf("\n");
+        }
+#endif
+
+        // Mark as used
+        assert((rsGetMaskLock() & regMask) == 0);
+
+#if FEATURE_STACK_FP_X87
+        assert((rsGetMaskUsed() & regMask) == 0);
+#else
+        /* Is the register used by two different values simultaneously? */
+
+        if (regMask & rsGetMaskUsed())
+        {
+            /* Save the preceding use information */
+
+            rsRecMultiReg(regNum, type);
+        }
+#endif
+        /* Set the register's bit in the 'used' bitset */
+
+        rsSetMaskUsed((rsGetMaskUsed() | regMask));
+
+        // Assign slot
+        rsSetUsedTree(regNum, tree);
+    }
+    else
+    {
+#ifdef DEBUG
+        if (m_rsCompiler->verbose)
+        {
+            printf("\t\t\t\t\t\t\tThe register %s no longer holds ", getRegNameFloat(regNum, type));
+            Compiler::printTreeID(tree);
+            printf("\n");
+        }
+#endif
+
+        // Mark as free
+        assert((rsGetMaskUsed() & regMask) == regMask);
+
+        // Are we freeing a multi-use registers?
+
+        if (regMask & rsGetMaskMult())
+        {
+            // Free any multi-use registers
+            rsMultRegFree(regMask);
+            return;
+        }
+
+        rsSetMaskUsed((rsGetMaskUsed() & ~regMask));
+
+        // Free slot
+        rsFreeUsedTree(regNum, tree);
+    }
+}
+
+void RegSet::SetLockedRegFloat(GenTree* tree, bool bValue)
+{
+    regNumber reg  = tree->gtRegNum;
+    var_types type = tree->TypeGet();
+    assert(varTypeIsFloating(type));
+    regMaskTP regMask = genRegMaskFloat(reg, tree->TypeGet());
+
+    if (bValue)
+    {
+        JITDUMP("locking register %s\n", getRegNameFloat(reg, type));
+
+        assert((rsGetMaskUsed() & regMask) == regMask);
+        assert((rsGetMaskLock() & regMask) == 0);
+
+        rsSetMaskLock((rsGetMaskLock() | regMask));
+    }
+    else
+    {
+        JITDUMP("unlocking register %s\n", getRegNameFloat(reg, type));
+
+        assert((rsGetMaskUsed() & regMask) == regMask);
+        assert((rsGetMaskLock() & regMask) == regMask);
+
+        rsSetMaskLock((rsGetMaskLock() & ~regMask));
+    }
+}
+
+bool RegSet::IsLockedRegFloat(GenTreePtr tree)
+{
+    /* The value must be sitting in a register */
+    assert(tree);
+    assert(tree->gtFlags & GTF_REG_VAL);
+    assert(varTypeIsFloating(tree->TypeGet()));
+
+    regMaskTP regMask = genRegMaskFloat(tree->gtRegNum, tree->TypeGet());
+    return (rsGetMaskLock() & regMask) == regMask;
+}
+
+void CodeGen::UnspillFloat(GenTreePtr tree)
+{
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("UnspillFloat() for tree ");
+        Compiler::printTreeID(tree);
+        printf("\n");
+    }
+#endif // DEBUG
+
+    RegSet::SpillDsc* cur = regSet.rsSpillFloat;
+    assert(cur);
+
+    while (cur->spillTree != tree)
+        cur = cur->spillNext;
+
+    UnspillFloat(cur);
+}
+
+void CodeGen::UnspillFloat(LclVarDsc* varDsc)
+{
+    JITDUMP("UnspillFloat() for var [%08p]\n", dspPtr(varDsc));
+
+    RegSet::SpillDsc* cur = regSet.rsSpillFloat;
+    assert(cur);
+
+    while (cur->spillVarDsc != varDsc)
+        cur = cur->spillNext;
+
+    UnspillFloat(cur);
+}
+
+void CodeGen::RemoveSpillDsc(RegSet::SpillDsc* spillDsc)
+{
+    RegSet::SpillDsc*  cur;
+    RegSet::SpillDsc** prev;
+
+    for (cur = regSet.rsSpillFloat, prev = &regSet.rsSpillFloat; cur != spillDsc;
+         prev = &cur->spillNext, cur = cur->spillNext)
+        ; // EMPTY LOOP
+
+    assert(cur);
+
+    // Remove node from list
+    *prev = cur->spillNext;
+}
+
+void CodeGen::UnspillFloat(RegSet::SpillDsc* spillDsc)
+{
+    JITDUMP("UnspillFloat() for SpillDsc [%08p]\n", dspPtr(spillDsc));
+
+    RemoveSpillDsc(spillDsc);
+    UnspillFloatMachineDep(spillDsc);
+
+    RegSet::SpillDsc::freeDsc(&regSet, spillDsc);
+    compiler->tmpRlsTemp(spillDsc->spillTemp);
+}
+
+#if FEATURE_STACK_FP_X87
+
+Compiler::fgWalkResult CodeGen::genRegVarDiesInSubTreeWorker(GenTreePtr* pTree, Compiler::fgWalkData* data)
+{
+    GenTreePtr                  tree  = *pTree;
+    genRegVarDiesInSubTreeData* pData = (genRegVarDiesInSubTreeData*)data->pCallbackData;
+
+    // if it's dying, just rename the register, else load it normally
+    if (tree->IsRegVar() && tree->IsRegVarDeath() && tree->gtRegVar.gtRegNum == pData->reg)
+    {
+        pData->result = true;
+        return Compiler::WALK_ABORT;
+    }
+
+    return Compiler::WALK_CONTINUE;
+}
+
+bool CodeGen::genRegVarDiesInSubTree(GenTreePtr tree, regNumber reg)
+{
+    genRegVarDiesInSubTreeData Data;
+    Data.reg    = reg;
+    Data.result = false;
+
+    compiler->fgWalkTreePre(&tree, genRegVarDiesInSubTreeWorker, (void*)&Data);
+
+    return Data.result;
+}
+
+#endif // FEATURE_STACK_FP_X87
+
+/*****************************************************************************
+ *
+ *  Force floating point expression results to memory, to get rid of the extra
+ *  80 byte "temp-real" precision.
+ *  Assumes the tree operand has been computed to the top of the stack.
+ *  If type!=TYP_UNDEF, that is the desired presicion, else it is op->gtType
+ */
+
+void CodeGen::genRoundFpExpression(GenTreePtr op, var_types type)
+{
+#if FEATURE_STACK_FP_X87
+    return genRoundFpExpressionStackFP(op, type);
+#else
+    return genRoundFloatExpression(op, type);
+#endif
+}
+
+void CodeGen::genCodeForTreeFloat(GenTreePtr tree, regMaskTP needReg, regMaskTP bestReg)
+{
+    RegSet::RegisterPreference pref(needReg, bestReg);
+    genCodeForTreeFloat(tree, &pref);
+}
+
+#endif // LEGACY_BACKEND
diff --git a/src/jit/sideeffects.cpp b/src/jit/sideeffects.cpp
new file mode 100644
index 0000000000..dbfa27cfae
--- /dev/null
+++ b/src/jit/sideeffects.cpp
@@ -0,0 +1,549 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "sideeffects.h"
+
+LclVarSet::LclVarSet() : m_bitVector(nullptr), m_hasAnyLcl(false), m_hasBitVector(false)
+{
+}
+
+//------------------------------------------------------------------------
+// LclVarSet::Add:
+//    Adds the given lclNum to the LclVarSet.
+//
+// Arguments:
+//    compiler - The compiler context
+//    lclNum - The lclNum to add.
+//
+void LclVarSet::Add(Compiler* compiler, unsigned lclNum)
+{
+    if (!m_hasAnyLcl)
+    {
+        m_lclNum    = lclNum;
+        m_hasAnyLcl = true;
+    }
+    else
+    {
+        if (!m_hasBitVector)
+        {
+            unsigned singleLclNum = m_lclNum;
+            m_bitVector           = hashBv::Create(compiler);
+            m_bitVector->setBit(singleLclNum);
+            m_hasBitVector = true;
+        }
+
+        m_bitVector->setBit(lclNum);
+    }
+}
+
+//------------------------------------------------------------------------
+// LclVarSet::Intersects:
+//    Returns true if this LclVarSet intersects with the given LclVarSet.
+//
+// Arguments:
+//    other - The other lclVarSet.
+//
+bool LclVarSet::Intersects(const LclVarSet& other) const
+{
+    // If neither set has ever contained anything, the sets do not intersect.
+    if (!m_hasAnyLcl || !other.m_hasAnyLcl)
+    {
+        return false;
+    }
+
+    // If this set is not represented by a bit vector, see if the single lclNum is contained in the other set.
+    if (!m_hasBitVector)
+    {
+        if (!other.m_hasBitVector)
+        {
+            return m_lclNum == other.m_lclNum;
+        }
+
+        return other.m_bitVector->testBit(m_lclNum);
+    }
+
+    // If this set is represented by a bit vector but the other set is not, see if the single lclNum in the other
+    // set is contained in this set.
+    if (!other.m_hasBitVector)
+    {
+        return m_bitVector->testBit(other.m_lclNum);
+    }
+
+    // Both sets are represented by bit vectors. Check to see if they intersect.
+    return m_bitVector->Intersects(other.m_bitVector);
+}
+
+//------------------------------------------------------------------------
+// LclVarSet::Contains:
+//    Returns true if this LclVarSet contains the given lclNum.
+//
+// Arguments:
+//    lclNum - The lclNum in question.
+//
+bool LclVarSet::Contains(unsigned lclNum) const
+{
+    // If this set has never contained anything, it does not contain the lclNum.
+    if (!m_hasAnyLcl)
+    {
+        return false;
+    }
+
+    // If this set is not represented by a bit vector, see if its single lclNum is the same as the given lclNum.
+    if (!m_hasBitVector)
+    {
+        return m_lclNum == lclNum;
+    }
+
+    // This set is represented by a bit vector. See if the bit vector contains the given lclNum.
+    return m_bitVector->testBit(lclNum);
+}
+
+//------------------------------------------------------------------------
+// LclVarSet::Clear:
+//    Clears the contents of this LclVarSet.
+//
+void LclVarSet::Clear()
+{
+    if (m_hasBitVector)
+    {
+        assert(m_hasAnyLcl);
+        m_bitVector->ZeroAll();
+    }
+    else if (m_hasAnyLcl)
+    {
+        m_hasAnyLcl = false;
+    }
+}
+
+AliasSet::AliasSet()
+    : m_lclVarReads(), m_lclVarWrites(), m_readsAddressableLocation(false), m_writesAddressableLocation(false)
+{
+}
+
+//------------------------------------------------------------------------
+// AliasSet::NodeInfo::NodeInfo:
+//    Computes the alias info for a given node. Note that this does not
+//    include the set of lclVar accesses for a node unless the node is
+//    itself a lclVar access (e.g. a GT_LCL_VAR, GT_STORE_LCL_VAR, etc.).
+//
+// Arguments:
+//    compiler - The compiler context.
+//    node - The node in question.
+//
+AliasSet::NodeInfo::NodeInfo(Compiler* compiler, GenTree* node)
+    : m_compiler(compiler), m_node(node), m_flags(0), m_lclNum(0)
+{
+    if (node->IsCall())
+    {
+        // Calls are treated as reads and writes of addressable locations unless they are known to be pure.
+        if (node->AsCall()->IsPure(compiler))
+        {
+            m_flags = ALIAS_NONE;
+            return;
+        }
+
+        m_flags = ALIAS_READS_ADDRESSABLE_LOCATION | ALIAS_WRITES_ADDRESSABLE_LOCATION;
+        return;
+    }
+    else if (node->OperIsAtomicOp())
+    {
+        // Atomic operations both read and write addressable locations.
+        m_flags = ALIAS_READS_ADDRESSABLE_LOCATION | ALIAS_WRITES_ADDRESSABLE_LOCATION;
+        return;
+    }
+
+    // Is the operation a write? If so, set `node` to the location that is being written to.
+    bool isWrite = false;
+    if (node->OperIsAssignment())
+    {
+        isWrite = true;
+        node    = node->gtGetOp1();
+    }
+    else if (node->OperIsStore() || node->OperIsAtomicOp())
+    {
+        isWrite = true;
+    }
+
+    // `node` is the location being accessed. Determine whether or not it is a memory or local variable access, and if
+    // it is the latter, get the number of the lclVar.
+    bool     isMemoryAccess = false;
+    bool     isLclVarAccess = false;
+    unsigned lclNum         = 0;
+    if (node->OperIsIndir())
+    {
+        // If the indirection targets a lclVar, we can be more precise with regards to aliasing by treating the
+        // indirection as a lclVar access.
+        GenTree* address = node->AsIndir()->Addr();
+        if (address->OperIsLocalAddr())
+        {
+            isLclVarAccess = true;
+            lclNum         = address->AsLclVarCommon()->GetLclNum();
+        }
+        else
+        {
+            isMemoryAccess = true;
+        }
+    }
+    else if (node->OperIsImplicitIndir())
+    {
+        isMemoryAccess = true;
+    }
+    else if (node->OperIsLocal())
+    {
+        isLclVarAccess = true;
+        lclNum         = node->AsLclVarCommon()->GetLclNum();
+    }
+    else
+    {
+        // This is neither a memory nor a local var access.
+        m_flags = ALIAS_NONE;
+        return;
+    }
+
+    assert(isMemoryAccess || isLclVarAccess);
+
+    // Now that we've determined whether or not this access is a read or a write and whether the accessed location is
+    // memory or a lclVar, determine whther or not the location is addressable and udpate the alias set.
+    const bool isAddressableLocation = isMemoryAccess || compiler->lvaTable[lclNum].lvAddrExposed;
+
+    if (!isWrite)
+    {
+        if (isAddressableLocation)
+        {
+            m_flags |= ALIAS_READS_ADDRESSABLE_LOCATION;
+        }
+
+        if (isLclVarAccess)
+        {
+            m_flags |= ALIAS_READS_LCL_VAR;
+            m_lclNum = lclNum;
+        }
+    }
+    else
+    {
+        if (isAddressableLocation)
+        {
+            m_flags |= ALIAS_WRITES_ADDRESSABLE_LOCATION;
+        }
+
+        if (isLclVarAccess)
+        {
+            m_flags |= ALIAS_WRITES_LCL_VAR;
+            m_lclNum = lclNum;
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// AliasSet::AddNode:
+//    Adds the given node's accesses to this AliasSet.
+//
+// Arguments:
+//    compiler - The compiler context.
+//    node - The node to add to the set.
+//
+void AliasSet::AddNode(Compiler* compiler, GenTree* node)
+{
+    // First, add all lclVar uses associated with the node to the set. This is necessary because the lclVar reads occur
+    // at the position of the user, not at the position of the GenTreeLclVar node.
+    for (GenTree* operand : node->Operands())
+    {
+        if (operand->OperIsLocalRead())
+        {
+            const unsigned lclNum = operand->AsLclVarCommon()->GetLclNum();
+            if (compiler->lvaTable[lclNum].lvAddrExposed)
+            {
+                m_readsAddressableLocation = true;
+            }
+
+            m_lclVarReads.Add(compiler, lclNum);
+        }
+    }
+
+    NodeInfo nodeInfo(compiler, node);
+    if (nodeInfo.ReadsAddressableLocation())
+    {
+        m_readsAddressableLocation = true;
+    }
+    if (nodeInfo.WritesAddressableLocation())
+    {
+        m_writesAddressableLocation = true;
+    }
+    if (nodeInfo.IsLclVarRead())
+    {
+        m_lclVarReads.Add(compiler, nodeInfo.LclNum());
+    }
+    if (nodeInfo.IsLclVarWrite())
+    {
+        m_lclVarWrites.Add(compiler, nodeInfo.LclNum());
+    }
+}
+
+//------------------------------------------------------------------------
+// AliasSet::InterferesWith:
+//    Returns true if the reads and writes in this alias set interfere
+//    with the given alias set.
+//
+//    Two alias sets interfere under any of the following conditions:
+//    - Both sets write to any addressable location (e.g. the heap,
+//      address-exposed locals)
+//    - One set reads any addressable location and the other set writes
+//      any addressable location
+//    - Both sets write to the same lclVar
+//    - One set writes to a lclVar that is read by the other set
+//
+// Arguments:
+//    other - The other alias set.
+//
+bool AliasSet::InterferesWith(const AliasSet& other) const
+{
+    // If both sets write any addressable location, the sets interfere.
+    if (m_writesAddressableLocation && other.m_writesAddressableLocation)
+    {
+        return true;
+    }
+
+    // If one set writes any addressable location and the other reads any addressable location, the sets interfere.
+    if ((m_readsAddressableLocation && other.m_writesAddressableLocation) ||
+        (m_writesAddressableLocation && other.m_readsAddressableLocation))
+    {
+        return true;
+    }
+
+    // If the set of lclVars written by this alias set intersects with the set of lclVars accessed by the other alias
+    // set, the alias sets interfere.
+    if (m_lclVarWrites.Intersects(other.m_lclVarReads) || m_lclVarWrites.Intersects(other.m_lclVarWrites))
+    {
+        return true;
+    }
+
+    // If the set of lclVars read by this alias set intersects with the set of lclVars written by the other alias set,
+    // the alias sets interfere. Otherwise, the alias sets do not interfere.
+    return m_lclVarReads.Intersects(other.m_lclVarWrites);
+}
+
+//------------------------------------------------------------------------
+// AliasSet::InterferesWith:
+//    Returns true if the reads and writes in this alias set interfere
+//    with those for the given node.
+//
+//    An alias set interferes with a given node iff it interferes with the
+//    alias set for that node.
+//
+// Arguments:
+//    other - The info for the node in question.
+//
+bool AliasSet::InterferesWith(const NodeInfo& other) const
+{
+    // First check whether or not this set interferes with the lclVar uses associated with the given node.
+    if (m_writesAddressableLocation || !m_lclVarWrites.IsEmpty())
+    {
+        Compiler* compiler = other.TheCompiler();
+        for (GenTree* operand : other.Node()->Operands())
+        {
+            if (operand->OperIsLocalRead())
+            {
+                // If this set writes any addressable location and the node uses an address-exposed lclVar,
+                // the set interferes with the node.
+                const unsigned lclNum = operand->AsLclVarCommon()->GetLclNum();
+                if (compiler->lvaTable[lclNum].lvAddrExposed && m_writesAddressableLocation)
+                {
+                    return true;
+                }
+
+                // If this set writes to a lclVar used by the node, the set interferes with the node.
+                if (m_lclVarWrites.Contains(lclNum))
+                {
+                    return true;
+                }
+            }
+        }
+    }
+
+    // If the node and the set both write to any addressable location, they interfere.
+    if (m_writesAddressableLocation && other.WritesAddressableLocation())
+    {
+        return true;
+    }
+
+    // If the node or the set writes any addressable location and the other reads any addressable location,
+    // they interfere.
+    if ((m_readsAddressableLocation && other.WritesAddressableLocation()) ||
+        (m_writesAddressableLocation && other.ReadsAddressableLocation()))
+    {
+        return true;
+    }
+
+    // If the set writes a local var accessed by the node, they interfere.
+    if ((other.IsLclVarRead() || other.IsLclVarWrite()) && m_lclVarWrites.Contains(other.LclNum()))
+    {
+        return true;
+    }
+
+    // If the set reads a local var written by the node, they interfere.
+    return other.IsLclVarWrite() && m_lclVarReads.Contains(other.LclNum());
+}
+
+//------------------------------------------------------------------------
+// AliasSet::Clear:
+//    Clears the current alias set.
+//
+void AliasSet::Clear()
+{
+    m_readsAddressableLocation  = false;
+    m_writesAddressableLocation = false;
+
+    m_lclVarReads.Clear();
+    m_lclVarWrites.Clear();
+}
+
+SideEffectSet::SideEffectSet() : m_sideEffectFlags(0), m_aliasSet()
+{
+}
+
+//------------------------------------------------------------------------
+// SideEffectSet::SideEffectSet:
+//    Constructs a side effect set initialized using the given node.
+//    Equivalent to the following;
+//
+//       SideEffectSet sideEffectSet;
+//       sideEffectSet.AddNode(compiler, node);
+//
+// Arguments:
+//    compiler - The compiler context.
+//    node - The node to use for initialization.
+//
+SideEffectSet::SideEffectSet(Compiler* compiler, GenTree* node) : m_sideEffectFlags(0), m_aliasSet()
+{
+    AddNode(compiler, node);
+}
+
+//------------------------------------------------------------------------
+// SideEffectSet::AddNode:
+//    Adds the given node's accesses to this SideEffectSet.
+//
+// Arguments:
+//    compiler - The compiler context.
+//    node - The node to add to the set.
+//
+void SideEffectSet::AddNode(Compiler* compiler, GenTree* node)
+{
+    m_sideEffectFlags |= (node->gtFlags & GTF_ALL_EFFECT);
+    m_aliasSet.AddNode(compiler, node);
+}
+
+//------------------------------------------------------------------------
+// SideEffectSet::InterferesWith:
+//    Returns true if the side effects in this set interfere with the
+//    given side effect flags and alias information.
+//
+//    Two side effect sets interfere under any of the following
+//    conditions:
+//    - If the analysis is strict, and:
+//        - Either set contains a compiler barrier, or
+//        - Both sets produce an exception
+//    - Whether or not the analysis is strict:
+//        - One set produces an exception and the other set contains a
+//          write
+//        - One set's reads and writes interfere with the other set's
+//          reads and writes
+//
+// Arguments:
+//    otherSideEffectFlags - The side effect flags for the other side
+//                           effect set.
+//    otherAliasInfo - The alias information for the other side effect
+//                     set.
+//    strict - True if the analysis should be strict as described above.
+//
+template <typename TOtherAliasInfo>
+bool SideEffectSet::InterferesWith(unsigned               otherSideEffectFlags,
+                                   const TOtherAliasInfo& otherAliasInfo,
+                                   bool                   strict) const
+{
+    const bool thisProducesException  = (m_sideEffectFlags & GTF_EXCEPT) != 0;
+    const bool otherProducesException = (otherSideEffectFlags & GTF_EXCEPT) != 0;
+
+    if (strict)
+    {
+        // If either set contains a compiler barrier, the sets interfere.
+        if (((m_sideEffectFlags | otherSideEffectFlags) & GTF_ORDER_SIDEEFF) != 0)
+        {
+            return true;
+        }
+
+        // If both sets produce an exception, the sets interfere.
+        if (thisProducesException && otherProducesException)
+        {
+            return true;
+        }
+    }
+
+    // If one set produces an exception and the other set writes to any location, the sets interfere.
+    if ((thisProducesException && otherAliasInfo.WritesAnyLocation()) ||
+        (otherProducesException && m_aliasSet.WritesAnyLocation()))
+    {
+        return true;
+    }
+
+    // At this point, the only interference between the sets will arise from their alias sets.
+    return m_aliasSet.InterferesWith(otherAliasInfo);
+}
+
+//------------------------------------------------------------------------
+// SideEffectSet::InterferesWith:
+//    Returns true if the side effects in this set interfere with the side
+//    effects in the given side effect set.
+//
+//    Two side effect sets interfere under any of the following
+//    conditions:
+//    - If the analysis is strict, and:
+//        - Either set contains a compiler barrier, or
+//        - Both sets produce an exception
+//    - Whether or not the analysis is strict:
+//        - One set produces an exception and the other set contains a
+//          write
+//        - One set's reads and writes interfere with the other set's
+//          reads and writes
+//
+// Arguments:
+//    other - The other side effect set.
+//    strict - True if the analysis should be strict as described above.
+//
+bool SideEffectSet::InterferesWith(const SideEffectSet& other, bool strict) const
+{
+    return InterferesWith(other.m_sideEffectFlags, other.m_aliasSet, strict);
+}
+
+//------------------------------------------------------------------------
+// SideEffectSet::InterferesWith:
+//    Returns true if the side effects in this set interfere with the side
+//    effects for the given node.
+//
+//    A side effect set interferes with a given node iff it interferes
+//    with the side effect set of the node.
+//
+// Arguments:
+//    compiler - The compiler context.
+//    node - The node in question.
+//    strict - True if the analysis should be strict as described above.
+//
+bool SideEffectSet::InterferesWith(Compiler* compiler, GenTree* node, bool strict) const
+{
+    return InterferesWith((node->gtFlags & GTF_ALL_EFFECT), AliasSet::NodeInfo(compiler, node), strict);
+}
+
+//------------------------------------------------------------------------
+// SideEffectSet::Clear:
+//    Clears the current side effect set.
+//
+void SideEffectSet::Clear()
+{
+    m_sideEffectFlags = 0;
+    m_aliasSet.Clear();
+}
diff --git a/src/jit/sideeffects.h b/src/jit/sideeffects.h
new file mode 100644
index 0000000000..33fac16f05
--- /dev/null
+++ b/src/jit/sideeffects.h
@@ -0,0 +1,158 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef _SIDEEFFECTS_H_
+#define _SIDEEFFECTS_H_
+
+//------------------------------------------------------------------------
+// LclVarSet:
+//    Represents a set of lclVars. Optimized for the case that the set
+//    never holds more than a single element. This type is used internally
+//    by `AliasSet` to track the sets of lclVars that are read and
+//    written for a given alias set.
+//
+class LclVarSet final
+{
+    union {
+        hashBv*  m_bitVector;
+        unsigned m_lclNum;
+    };
+
+    bool m_hasAnyLcl;
+    bool m_hasBitVector;
+
+public:
+    LclVarSet();
+
+    inline bool IsEmpty() const
+    {
+        return !m_hasAnyLcl || !m_hasBitVector || !m_bitVector->anySet();
+    }
+
+    void Add(Compiler* compiler, unsigned lclNum);
+    bool Intersects(const LclVarSet& other) const;
+    bool Contains(unsigned lclNum) const;
+    void Clear();
+};
+
+//------------------------------------------------------------------------
+// AliasSet:
+//    Represents a set of reads and writes for the purposes of alias
+//    analysis. This type partitions storage into two categories:
+//    lclVars and addressable locations. The definition of the former is
+//    intuitive. The latter is the union of the set of address-exposed
+//    lclVars with the set of all other memory locations. Any memory
+//    access is assumed to alias any other memory access.
+//
+class AliasSet final
+{
+    LclVarSet m_lclVarReads;
+    LclVarSet m_lclVarWrites;
+
+    bool m_readsAddressableLocation;
+    bool m_writesAddressableLocation;
+
+public:
+    //------------------------------------------------------------------------
+    // AliasSet::NodeInfo:
+    //    Represents basic alias information for a single IR node.
+    //
+    class NodeInfo final
+    {
+        enum : unsigned
+        {
+            ALIAS_NONE                        = 0x0,
+            ALIAS_READS_ADDRESSABLE_LOCATION  = 0x1,
+            ALIAS_WRITES_ADDRESSABLE_LOCATION = 0x2,
+            ALIAS_READS_LCL_VAR               = 0x4,
+            ALIAS_WRITES_LCL_VAR              = 0x8
+        };
+
+        Compiler* m_compiler;
+        GenTree*  m_node;
+        unsigned  m_flags;
+        unsigned  m_lclNum;
+
+    public:
+        NodeInfo(Compiler* compiler, GenTree* node);
+
+        inline Compiler* TheCompiler() const
+        {
+            return m_compiler;
+        }
+
+        inline GenTree* Node() const
+        {
+            return m_node;
+        }
+
+        inline bool ReadsAddressableLocation() const
+        {
+            return (m_flags & ALIAS_READS_ADDRESSABLE_LOCATION) != 0;
+        }
+
+        inline bool WritesAddressableLocation() const
+        {
+            return (m_flags & ALIAS_WRITES_ADDRESSABLE_LOCATION) != 0;
+        }
+
+        inline bool IsLclVarRead() const
+        {
+            return (m_flags & ALIAS_READS_LCL_VAR) != 0;
+        }
+
+        inline bool IsLclVarWrite() const
+        {
+            return (m_flags & ALIAS_WRITES_LCL_VAR) != 0;
+        }
+
+        inline unsigned LclNum() const
+        {
+            assert(IsLclVarRead() || IsLclVarWrite());
+            return m_lclNum;
+        }
+
+        inline bool WritesAnyLocation() const
+        {
+            return (m_flags & (ALIAS_WRITES_ADDRESSABLE_LOCATION | ALIAS_WRITES_LCL_VAR)) != 0;
+        }
+    };
+
+    AliasSet();
+
+    inline bool WritesAnyLocation() const
+    {
+        return m_writesAddressableLocation || !m_lclVarWrites.IsEmpty();
+    }
+
+    void AddNode(Compiler* compiler, GenTree* node);
+    bool InterferesWith(const AliasSet& other) const;
+    bool InterferesWith(const NodeInfo& node) const;
+    void Clear();
+};
+
+//------------------------------------------------------------------------
+// SideEffectSet:
+//    Represents a set of side effects for the purposes of analyzing code
+//    motion.
+//
+class SideEffectSet final
+{
+    unsigned m_sideEffectFlags; // A mask of GTF_* flags that represents exceptional and barrier side effects.
+    AliasSet m_aliasSet;        // An AliasSet that represents read and write side effects.
+
+    template <typename TOtherAliasInfo>
+    bool InterferesWith(unsigned otherSideEffectFlags, const TOtherAliasInfo& otherAliasInfo, bool strict) const;
+
+public:
+    SideEffectSet();
+    SideEffectSet(Compiler* compiler, GenTree* node);
+
+    void AddNode(Compiler* compiler, GenTree* node);
+    bool InterferesWith(const SideEffectSet& other, bool strict) const;
+    bool InterferesWith(Compiler* compiler, GenTree* node, bool strict) const;
+    void Clear();
+};
+
+#endif // _SIDEEFFECTS_H_
diff --git a/src/jit/simd.cpp b/src/jit/simd.cpp
new file mode 100644
index 0000000000..1f0c867b55
--- /dev/null
+++ b/src/jit/simd.cpp
@@ -0,0 +1,2556 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+//   SIMD Support
+//
+// IMPORTANT NOTES AND CAVEATS:
+//
+// This implementation is preliminary, and may change dramatically.
+//
+// New JIT types, TYP_SIMDxx, are introduced, and the SIMD intrinsics are created as GT_SIMD nodes.
+// Nodes of SIMD types will be typed as TYP_SIMD* (e.g. TYP_SIMD8, TYP_SIMD16, etc.).
+//
+// Note that currently the "reference implementation" is the same as the runtime dll.  As such, it is currently
+// providing implementations for those methods not currently supported by the JIT as intrinsics.
+//
+// These are currently recognized using string compares, in order to provide an implementation in the JIT
+// without taking a dependency on the VM.
+// Furthermore, in the CTP, in order to limit the impact of doing these string compares
+// against assembly names, we only look for the SIMDVector assembly if we are compiling a class constructor.  This
+// makes it somewhat more "pay for play" but is a significant usability compromise.
+// This has been addressed for RTM by doing the assembly recognition in the VM.
+// --------------------------------------------------------------------------------------
+
+#include "jitpch.h"
+#include "simd.h"
+
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator.
+
+#ifdef FEATURE_SIMD
+
+// Intrinsic Id to intrinsic info map
+const SIMDIntrinsicInfo simdIntrinsicInfoArray[] = {
+#define SIMD_INTRINSIC(mname, inst, id, name, retType, argCount, arg1, arg2, arg3, t1, t2, t3, t4, t5, t6, t7, t8, t9, \
+                       t10)                                                                                            \
+    {SIMDIntrinsic##id, mname, inst, retType, argCount, arg1, arg2, arg3, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10},
+#include "simdintrinsiclist.h"
+};
+
+//------------------------------------------------------------------------
+// getSIMDVectorLength: Get the length (number of elements of base type) of
+//                      SIMD Vector given its size and base (element) type.
+//
+// Arguments:
+//    simdSize   - size of the SIMD vector
+//    baseType   - type of the elements of the SIMD vector
+//
+// static
+int Compiler::getSIMDVectorLength(unsigned simdSize, var_types baseType)
+{
+    return simdSize / genTypeSize(baseType);
+}
+
+//------------------------------------------------------------------------
+// Get the length (number of elements of base type) of SIMD Vector given by typeHnd.
+//
+// Arguments:
+//    typeHnd  - type handle of the SIMD vector
+//
+int Compiler::getSIMDVectorLength(CORINFO_CLASS_HANDLE typeHnd)
+{
+    unsigned  sizeBytes = 0;
+    var_types baseType  = getBaseTypeAndSizeOfSIMDType(typeHnd, &sizeBytes);
+    return getSIMDVectorLength(sizeBytes, baseType);
+}
+
+//------------------------------------------------------------------------
+// Get the preferred alignment of SIMD vector type for better performance.
+//
+// Arguments:
+//    typeHnd  - type handle of the SIMD vector
+//
+int Compiler::getSIMDTypeAlignment(var_types simdType)
+{
+#ifdef _TARGET_AMD64_
+    // Fixed length vectors have the following alignment preference
+    // Vector2/3 = 8 byte alignment
+    // Vector4 = 16-byte alignment
+    unsigned size = genTypeSize(simdType);
+
+    // preferred alignment for SSE2 128-bit vectors is 16-bytes
+    if (size == 8)
+    {
+        return 8;
+    }
+
+    // As per Intel manual, AVX vectors preferred alignment is 32-bytes but on Amd64
+    // RSP/EBP is aligned at 16-bytes, therefore to align SIMD types at 32-bytes we need even
+    // RSP/EBP to be 32-byte aligned. It is not clear whether additional stack space used in
+    // aligning stack is worth the benefit and for now will use 16-byte alignment for AVX
+    // 256-bit vectors with unaligned load/stores to/from memory.
+    return 16;
+#else
+    assert(!"getSIMDTypeAlignment() unimplemented on target arch");
+    unreached();
+#endif
+}
+
+//----------------------------------------------------------------------------------
+// Return the base type and size of SIMD vector type given its type handle.
+//
+// Arguments:
+//    typeHnd   - The handle of the type we're interested in.
+//    sizeBytes - out param
+//
+// Return Value:
+//    base type of SIMD vector.
+//    sizeBytes if non-null is set to size in bytes.
+//
+// TODO-Throughput: current implementation parses class name to find base type. Change
+//         this when we implement  SIMD intrinsic identification for the final
+//         product.
+//
+var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, unsigned* sizeBytes /*= nullptr */)
+{
+    assert(featureSIMD);
+    if (typeHnd == nullptr)
+    {
+        return TYP_UNKNOWN;
+    }
+
+    // fast path search using cached type handles of important types
+    var_types simdBaseType = TYP_UNKNOWN;
+    unsigned  size         = 0;
+
+    // Early return if it is not a SIMD module.
+    if (!isSIMDClass(typeHnd))
+    {
+        return TYP_UNKNOWN;
+    }
+
+    // The most likely to be used type handles are looked up first followed by
+    // less likely to be used type handles
+    if (typeHnd == SIMDFloatHandle)
+    {
+        simdBaseType = TYP_FLOAT;
+        JITDUMP("  Known type SIMD Vector<Float>\n");
+    }
+    else if (typeHnd == SIMDIntHandle)
+    {
+        simdBaseType = TYP_INT;
+        JITDUMP("  Known type SIMD Vector<Int>\n");
+    }
+    else if (typeHnd == SIMDVector2Handle)
+    {
+        simdBaseType = TYP_FLOAT;
+        size         = 2 * genTypeSize(TYP_FLOAT);
+        assert(size == roundUp(info.compCompHnd->getClassSize(typeHnd), TARGET_POINTER_SIZE));
+        JITDUMP("  Known type Vector2\n");
+    }
+    else if (typeHnd == SIMDVector3Handle)
+    {
+        simdBaseType = TYP_FLOAT;
+        size         = 3 * genTypeSize(TYP_FLOAT);
+        assert(size == info.compCompHnd->getClassSize(typeHnd));
+        JITDUMP("  Known type Vector3\n");
+    }
+    else if (typeHnd == SIMDVector4Handle)
+    {
+        simdBaseType = TYP_FLOAT;
+        size         = 4 * genTypeSize(TYP_FLOAT);
+        assert(size == roundUp(info.compCompHnd->getClassSize(typeHnd), TARGET_POINTER_SIZE));
+        JITDUMP("  Known type Vector4\n");
+    }
+    else if (typeHnd == SIMDVectorHandle)
+    {
+        JITDUMP("  Known type Vector\n");
+    }
+    else if (typeHnd == SIMDUShortHandle)
+    {
+        simdBaseType = TYP_CHAR;
+        JITDUMP("  Known type SIMD Vector<ushort>\n");
+    }
+    else if (typeHnd == SIMDUByteHandle)
+    {
+        simdBaseType = TYP_UBYTE;
+        JITDUMP("  Known type SIMD Vector<ubyte>\n");
+    }
+    else if (typeHnd == SIMDDoubleHandle)
+    {
+        simdBaseType = TYP_DOUBLE;
+        JITDUMP("  Known type SIMD Vector<Double>\n");
+    }
+    else if (typeHnd == SIMDLongHandle)
+    {
+        simdBaseType = TYP_LONG;
+        JITDUMP("  Known type SIMD Vector<Long>\n");
+    }
+    else if (typeHnd == SIMDShortHandle)
+    {
+        simdBaseType = TYP_SHORT;
+        JITDUMP("  Known type SIMD Vector<short>\n");
+    }
+    else if (typeHnd == SIMDByteHandle)
+    {
+        simdBaseType = TYP_BYTE;
+        JITDUMP("  Known type SIMD Vector<byte>\n");
+    }
+    else if (typeHnd == SIMDUIntHandle)
+    {
+        simdBaseType = TYP_UINT;
+        JITDUMP("  Known type SIMD Vector<uint>\n");
+    }
+    else if (typeHnd == SIMDULongHandle)
+    {
+        simdBaseType = TYP_ULONG;
+        JITDUMP("  Known type SIMD Vector<ulong>\n");
+    }
+
+    // slow path search
+    if (simdBaseType == TYP_UNKNOWN)
+    {
+        // Doesn't match with any of the cached type handles.
+        // Obtain base type by parsing fully qualified class name.
+        //
+        // TODO-Throughput: implement product shipping solution to query base type.
+        WCHAR  className[256] = {0};
+        WCHAR* pbuf           = &className[0];
+        int    len            = sizeof(className) / sizeof(className[0]);
+        info.compCompHnd->appendClassName(&pbuf, &len, typeHnd, TRUE, FALSE, FALSE);
+        noway_assert(pbuf < &className[256]);
+        JITDUMP("SIMD Candidate Type %S\n", className);
+
+        if (wcsncmp(className, W("System.Numerics."), 16) == 0)
+        {
+            if (wcsncmp(&(className[16]), W("Vector`1["), 9) == 0)
+            {
+                if (wcsncmp(&(className[25]), W("System.Single"), 13) == 0)
+                {
+                    SIMDFloatHandle = typeHnd;
+                    simdBaseType    = TYP_FLOAT;
+                    JITDUMP("  Found type SIMD Vector<Float>\n");
+                }
+                else if (wcsncmp(&(className[25]), W("System.Int32"), 12) == 0)
+                {
+                    SIMDIntHandle = typeHnd;
+                    simdBaseType  = TYP_INT;
+                    JITDUMP("  Found type SIMD Vector<Int>\n");
+                }
+                else if (wcsncmp(&(className[25]), W("System.UInt16"), 13) == 0)
+                {
+                    SIMDUShortHandle = typeHnd;
+                    simdBaseType     = TYP_CHAR;
+                    JITDUMP("  Found type SIMD Vector<ushort>\n");
+                }
+                else if (wcsncmp(&(className[25]), W("System.Byte"), 11) == 0)
+                {
+                    SIMDUByteHandle = typeHnd;
+                    simdBaseType    = TYP_UBYTE;
+                    JITDUMP("  Found type SIMD Vector<ubyte>\n");
+                }
+                else if (wcsncmp(&(className[25]), W("System.Double"), 13) == 0)
+                {
+                    SIMDDoubleHandle = typeHnd;
+                    simdBaseType     = TYP_DOUBLE;
+                    JITDUMP("  Found type SIMD Vector<Double>\n");
+                }
+                else if (wcsncmp(&(className[25]), W("System.Int64"), 12) == 0)
+                {
+                    SIMDLongHandle = typeHnd;
+                    simdBaseType   = TYP_LONG;
+                    JITDUMP("  Found type SIMD Vector<Long>\n");
+                }
+                else if (wcsncmp(&(className[25]), W("System.Int16"), 12) == 0)
+                {
+                    SIMDShortHandle = typeHnd;
+                    simdBaseType    = TYP_SHORT;
+                    JITDUMP("  Found type SIMD Vector<short>\n");
+                }
+                else if (wcsncmp(&(className[25]), W("System.SByte"), 12) == 0)
+                {
+                    SIMDByteHandle = typeHnd;
+                    simdBaseType   = TYP_BYTE;
+                    JITDUMP("  Found type SIMD Vector<byte>\n");
+                }
+                else if (wcsncmp(&(className[25]), W("System.UInt32"), 13) == 0)
+                {
+                    SIMDUIntHandle = typeHnd;
+                    simdBaseType   = TYP_UINT;
+                    JITDUMP("  Found type SIMD Vector<uint>\n");
+                }
+                else if (wcsncmp(&(className[25]), W("System.UInt64"), 13) == 0)
+                {
+                    SIMDULongHandle = typeHnd;
+                    simdBaseType    = TYP_ULONG;
+                    JITDUMP("  Found type SIMD Vector<ulong>\n");
+                }
+                else
+                {
+                    JITDUMP("  Unknown SIMD Vector<T>\n");
+                }
+            }
+            else if (wcsncmp(&(className[16]), W("Vector2"), 8) == 0)
+            {
+                SIMDVector2Handle = typeHnd;
+
+                simdBaseType = TYP_FLOAT;
+                size         = 2 * genTypeSize(TYP_FLOAT);
+                assert(size == roundUp(info.compCompHnd->getClassSize(typeHnd), TARGET_POINTER_SIZE));
+                JITDUMP(" Found Vector2\n");
+            }
+            else if (wcsncmp(&(className[16]), W("Vector3"), 8) == 0)
+            {
+                SIMDVector3Handle = typeHnd;
+
+                simdBaseType = TYP_FLOAT;
+                size         = 3 * genTypeSize(TYP_FLOAT);
+                assert(size == info.compCompHnd->getClassSize(typeHnd));
+                JITDUMP(" Found Vector3\n");
+            }
+            else if (wcsncmp(&(className[16]), W("Vector4"), 8) == 0)
+            {
+                SIMDVector4Handle = typeHnd;
+
+                simdBaseType = TYP_FLOAT;
+                size         = 4 * genTypeSize(TYP_FLOAT);
+                assert(size == roundUp(info.compCompHnd->getClassSize(typeHnd), TARGET_POINTER_SIZE));
+                JITDUMP(" Found Vector4\n");
+            }
+            else if (wcsncmp(&(className[16]), W("Vector"), 6) == 0)
+            {
+                SIMDVectorHandle = typeHnd;
+                JITDUMP(" Found type Vector\n");
+            }
+            else
+            {
+                JITDUMP("  Unknown SIMD Type\n");
+            }
+        }
+    }
+
+    if (simdBaseType != TYP_UNKNOWN && sizeBytes != nullptr)
+    {
+        // If not a fixed size vector then its size is same as SIMD vector
+        // register length in bytes
+        if (size == 0)
+        {
+            size = getSIMDVectorRegisterByteLength();
+        }
+
+        *sizeBytes = size;
+    }
+
+    return simdBaseType;
+}
+
+//--------------------------------------------------------------------------------------
+// getSIMDIntrinsicInfo: get SIMD intrinsic info given the method handle.
+//
+// Arguments:
+//    inOutTypeHnd    - The handle of the type on which the method is invoked.  This is an in-out param.
+//    methodHnd       - The handle of the method we're interested in.
+//    sig             - method signature info
+//    isNewObj        - whether this call represents a newboj constructor call
+//    argCount        - argument count - out pram
+//    baseType        - base type of the intrinsic - out param
+//    sizeBytes       - size of SIMD vector type on which the method is invoked - out param
+//
+// Return Value:
+//    SIMDIntrinsicInfo struct initialized corresponding to methodHnd.
+//    Sets SIMDIntrinsicInfo.id to SIMDIntrinsicInvalid if methodHnd doesn't correspond
+//    to any SIMD intrinsic.  Also, sets the out params inOutTypeHnd, argCount, baseType and
+//    sizeBytes.
+//
+//    Note that VectorMath class doesn't have a base type and first argument of the method
+//    determines the SIMD vector type on which intrinsic is invoked. In such a case inOutTypeHnd
+//    is modified by this routine.
+//
+// TODO-Throughput: The current implementation is based on method name string parsing.
+//         Although we now have type identification from the VM, the parsing of intrinsic names
+//         could be made more efficient.
+//
+const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* inOutTypeHnd,
+                                                        CORINFO_METHOD_HANDLE methodHnd,
+                                                        CORINFO_SIG_INFO*     sig,
+                                                        bool                  isNewObj,
+                                                        unsigned*             argCount,
+                                                        var_types*            baseType,
+                                                        unsigned*             sizeBytes)
+{
+    assert(featureSIMD);
+    assert(baseType != nullptr);
+    assert(sizeBytes != nullptr);
+
+    // get baseType and size of the type
+    CORINFO_CLASS_HANDLE typeHnd = *inOutTypeHnd;
+    *baseType                    = getBaseTypeAndSizeOfSIMDType(typeHnd, sizeBytes);
+
+    bool isHWAcceleratedIntrinsic = false;
+    if (typeHnd == SIMDVectorHandle)
+    {
+        // All of the supported intrinsics on this static class take a first argument that's a vector,
+        // which determines the baseType.
+        // The exception is the IsHardwareAccelerated property, which is handled as a special case.
+        assert(*baseType == TYP_UNKNOWN);
+        if (sig->numArgs == 0)
+        {
+            const SIMDIntrinsicInfo* hwAccelIntrinsicInfo = &(simdIntrinsicInfoArray[SIMDIntrinsicHWAccel]);
+            if ((strcmp(eeGetMethodName(methodHnd, nullptr), hwAccelIntrinsicInfo->methodName) == 0) &&
+                JITtype2varType(sig->retType) == hwAccelIntrinsicInfo->retType)
+            {
+                // Sanity check
+                assert(hwAccelIntrinsicInfo->argCount == 0 && hwAccelIntrinsicInfo->isInstMethod == false);
+                return hwAccelIntrinsicInfo;
+            }
+            return nullptr;
+        }
+        else
+        {
+            typeHnd       = info.compCompHnd->getArgClass(sig, sig->args);
+            *inOutTypeHnd = typeHnd;
+            *baseType     = getBaseTypeAndSizeOfSIMDType(typeHnd, sizeBytes);
+        }
+    }
+
+    if (*baseType == TYP_UNKNOWN)
+    {
+        JITDUMP("NOT a SIMD Intrinsic: unsupported baseType\n");
+        return nullptr;
+    }
+
+    // account for implicit "this" arg
+    *argCount = sig->numArgs;
+    if (sig->hasThis())
+    {
+        *argCount += 1;
+    }
+
+    // Get the Intrinsic Id by parsing method name.
+    //
+    // TODO-Throughput: replace sequential search by binary search by arranging entries
+    // sorted by method name.
+    SIMDIntrinsicID intrinsicId = SIMDIntrinsicInvalid;
+    const char*     methodName  = eeGetMethodName(methodHnd, nullptr);
+    for (int i = SIMDIntrinsicNone + 1; i < SIMDIntrinsicInvalid; ++i)
+    {
+        if (strcmp(methodName, simdIntrinsicInfoArray[i].methodName) == 0)
+        {
+            // Found an entry for the method; further check whether it is one of
+            // the supported base types.
+            bool found = false;
+            for (int j = 0; j < SIMD_INTRINSIC_MAX_BASETYPE_COUNT; ++j)
+            {
+                // Convention: if there are fewer base types supported than MAX_BASETYPE_COUNT,
+                // the end of the list is marked by TYP_UNDEF.
+                if (simdIntrinsicInfoArray[i].supportedBaseTypes[j] == TYP_UNDEF)
+                {
+                    break;
+                }
+
+                if (simdIntrinsicInfoArray[i].supportedBaseTypes[j] == *baseType)
+                {
+                    found = true;
+                    break;
+                }
+            }
+
+            if (!found)
+            {
+                continue;
+            }
+
+            // Now, check the arguments.
+            unsigned int fixedArgCnt    = simdIntrinsicInfoArray[i].argCount;
+            unsigned int expectedArgCnt = fixedArgCnt;
+
+            // First handle SIMDIntrinsicInitN, where the arg count depends on the type.
+            // The listed arg types include the vector and the first two init values, which is the expected number
+            // for Vector2.  For other cases, we'll check their types here.
+            if (*argCount > expectedArgCnt)
+            {
+                if (i == SIMDIntrinsicInitN)
+                {
+                    if (*argCount == 3 && typeHnd == SIMDVector2Handle)
+                    {
+                        expectedArgCnt = 3;
+                    }
+                    else if (*argCount == 4 && typeHnd == SIMDVector3Handle)
+                    {
+                        expectedArgCnt = 4;
+                    }
+                    else if (*argCount == 5 && typeHnd == SIMDVector4Handle)
+                    {
+                        expectedArgCnt = 5;
+                    }
+                }
+                else if (i == SIMDIntrinsicInitFixed)
+                {
+                    if (*argCount == 4 && typeHnd == SIMDVector4Handle)
+                    {
+                        expectedArgCnt = 4;
+                    }
+                }
+            }
+            if (*argCount != expectedArgCnt)
+            {
+                continue;
+            }
+
+            // Validate the types of individual args passed are what is expected of.
+            // If any of the types don't match with what is expected, don't consider
+            // as an intrinsic.  This will make an older JIT with SIMD capabilities
+            // resilient to breaking changes to SIMD managed API.
+            //
+            // Note that from IL type stack, args get popped in right to left order
+            // whereas args get listed in method signatures in left to right order.
+
+            int stackIndex = (expectedArgCnt - 1);
+
+            // Track the arguments from the signature - we currently only use this to distinguish
+            // integral and pointer types, both of which will by TYP_I_IMPL on the importer stack.
+            CORINFO_ARG_LIST_HANDLE argLst = sig->args;
+
+            CORINFO_CLASS_HANDLE argClass;
+            for (unsigned int argIndex = 0; found == true && argIndex < expectedArgCnt; argIndex++)
+            {
+                bool isThisPtr = ((argIndex == 0) && sig->hasThis());
+
+                // In case of "newobj SIMDVector<T>(T val)", thisPtr won't be present on type stack.
+                // We don't check anything in that case.
+                if (!isThisPtr || !isNewObj)
+                {
+                    GenTreePtr arg = impStackTop(stackIndex).val;
+
+                    var_types expectedArgType;
+                    if (argIndex < fixedArgCnt)
+                    {
+                        // Convention:
+                        //   - intrinsicInfo.argType[i] == TYP_UNDEF - intrinsic doesn't have a valid arg at position i
+                        //   - intrinsicInfo.argType[i] == TYP_UNKNOWN - arg type should be same as basetype
+                        // Note that we pop the args off in reverse order.
+                        expectedArgType = simdIntrinsicInfoArray[i].argType[argIndex];
+                        assert(expectedArgType != TYP_UNDEF);
+                        if (expectedArgType == TYP_UNKNOWN)
+                        {
+                            // The type of the argument will be genActualType(*baseType).
+                            expectedArgType = genActualType(*baseType);
+                        }
+                    }
+                    else
+                    {
+                        expectedArgType = *baseType;
+                    }
+
+                    var_types argType = arg->TypeGet();
+                    if (!isThisPtr && argType == TYP_I_IMPL)
+                    {
+                        // The reference implementation has a constructor that takes a pointer.
+                        // We don't want to recognize that one.  This requires us to look at the CorInfoType
+                        // in order to distinguish a signature with a pointer argument from one with an
+                        // integer argument of pointer size, both of which will be TYP_I_IMPL on the stack.
+                        // TODO-Review: This seems quite fragile.  We should consider beefing up the checking
+                        // here.
+                        CorInfoType corType = strip(info.compCompHnd->getArgType(sig, argLst, &argClass));
+                        if (corType == CORINFO_TYPE_PTR)
+                        {
+                            found = false;
+                        }
+                    }
+
+                    if (varTypeIsSIMD(argType))
+                    {
+                        argType = TYP_STRUCT;
+                    }
+                    if (argType != expectedArgType)
+                    {
+                        found = false;
+                    }
+                }
+                if (argIndex != 0 || !sig->hasThis())
+                {
+                    argLst = info.compCompHnd->getArgNext(argLst);
+                }
+                stackIndex--;
+            }
+
+            // Cross check return type and static vs. instance is what we are expecting.
+            // If not, don't consider it as an intrinsic.
+            // Note that ret type of TYP_UNKNOWN means that it is not known apriori and must be same as baseType
+            if (found)
+            {
+                var_types expectedRetType = simdIntrinsicInfoArray[i].retType;
+                if (expectedRetType == TYP_UNKNOWN)
+                {
+                    // JIT maps uint/ulong type vars to TYP_INT/TYP_LONG.
+                    expectedRetType =
+                        (*baseType == TYP_UINT || *baseType == TYP_ULONG) ? genActualType(*baseType) : *baseType;
+                }
+
+                if (JITtype2varType(sig->retType) != expectedRetType ||
+                    sig->hasThis() != simdIntrinsicInfoArray[i].isInstMethod)
+                {
+                    found = false;
+                }
+            }
+
+            if (found)
+            {
+                intrinsicId = (SIMDIntrinsicID)i;
+                break;
+            }
+        }
+    }
+
+    if (intrinsicId != SIMDIntrinsicInvalid)
+    {
+        JITDUMP("Method %s maps to SIMD intrinsic %s\n", methodName, simdIntrinsicNames[intrinsicId]);
+        return &simdIntrinsicInfoArray[intrinsicId];
+    }
+    else
+    {
+        JITDUMP("Method %s is NOT a SIMD intrinsic\n", methodName);
+    }
+
+    return nullptr;
+}
+
+// Pops and returns GenTree node from importer's type stack.
+// Normalizes TYP_STRUCT value in case of GT_CALL, GT_RET_EXPR and arg nodes.
+//
+// Arguments:
+//    type        -  the type of value that the caller expects to be popped off the stack.
+//    expectAddr  -  if true indicates we are expecting type stack entry to be a TYP_BYREF.
+//
+// Notes:
+//    If the popped value is a struct, and the expected type is a simd type, it will be set
+//    to that type, otherwise it will assert if the type being popped is not the expected type.
+
+GenTreePtr Compiler::impSIMDPopStack(var_types type, bool expectAddr)
+{
+    StackEntry se   = impPopStack();
+    typeInfo   ti   = se.seTypeInfo;
+    GenTreePtr tree = se.val;
+
+    // If expectAddr is true implies what we have on stack is address and we need
+    // SIMD type struct that it points to.
+    if (expectAddr)
+    {
+        assert(tree->TypeGet() == TYP_BYREF);
+        if (tree->OperGet() == GT_ADDR)
+        {
+            tree = tree->gtGetOp1();
+        }
+        else
+        {
+            tree = gtNewOperNode(GT_IND, type, tree);
+        }
+    }
+
+    bool isParam = false;
+
+    // If we have a ldobj of a SIMD local we need to transform it.
+    if (tree->OperGet() == GT_OBJ)
+    {
+        GenTree* addr = tree->gtOp.gtOp1;
+        if ((addr->OperGet() == GT_ADDR) && isSIMDTypeLocal(addr->gtOp.gtOp1))
+        {
+            tree = addr->gtOp.gtOp1;
+        }
+    }
+
+    if (tree->OperGet() == GT_LCL_VAR)
+    {
+        unsigned   lclNum    = tree->AsLclVarCommon()->GetLclNum();
+        LclVarDsc* lclVarDsc = &lvaTable[lclNum];
+        isParam              = lclVarDsc->lvIsParam;
+    }
+
+    // normalize TYP_STRUCT value
+    if (varTypeIsStruct(tree) && ((tree->OperGet() == GT_RET_EXPR) || (tree->OperGet() == GT_CALL) || isParam))
+    {
+        assert(ti.IsType(TI_STRUCT));
+        CORINFO_CLASS_HANDLE structType = ti.GetClassHandleForValueClass();
+        tree                            = impNormStructVal(tree, structType, (unsigned)CHECK_SPILL_ALL);
+    }
+
+    // Now set the type of the tree to the specialized SIMD struct type, if applicable.
+    if (genActualType(tree->gtType) != genActualType(type))
+    {
+        assert(tree->gtType == TYP_STRUCT);
+        tree->gtType = type;
+    }
+    else if (tree->gtType == TYP_BYREF)
+    {
+        assert(tree->IsLocal() || (tree->gtOper == GT_ADDR) && varTypeIsSIMD(tree->gtGetOp1()));
+    }
+
+    return tree;
+}
+
+// impSIMDGetFixed: Create a GT_SIMD tree for a Get property of SIMD vector with a fixed index.
+//
+// Arguments:
+//    baseType - The base (element) type of the SIMD vector.
+//    simdSize - The total size in bytes of the SIMD vector.
+//    index    - The index of the field to get.
+//
+// Return Value:
+//    Returns a GT_SIMD node with the SIMDIntrinsicGetItem intrinsic id.
+//
+GenTreeSIMD* Compiler::impSIMDGetFixed(var_types simdType, var_types baseType, unsigned simdSize, int index)
+{
+    assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
+
+    // op1 is a SIMD source.
+    GenTree* op1 = impSIMDPopStack(simdType, true);
+
+    GenTree*     op2      = gtNewIconNode(index);
+    GenTreeSIMD* simdTree = gtNewSIMDNode(baseType, op1, op2, SIMDIntrinsicGetItem, baseType, simdSize);
+    return simdTree;
+}
+
+#ifdef _TARGET_AMD64_
+// impSIMDLongRelOpEqual: transforms operands and returns the SIMD intrinsic to be applied on
+// transformed operands to obtain == comparison result.
+//
+// Argumens:
+//    typeHnd  -  type handle of SIMD vector
+//    size     -  SIMD vector size
+//    op1      -  in-out parameter; first operand
+//    op2      -  in-out parameter; second operand
+//
+// Return Value:
+//    Modifies in-out params op1, op2 and returns intrinsic ID to be applied to modified operands
+//
+SIMDIntrinsicID Compiler::impSIMDLongRelOpEqual(CORINFO_CLASS_HANDLE typeHnd,
+                                                unsigned             size,
+                                                GenTree**            pOp1,
+                                                GenTree**            pOp2)
+{
+    var_types simdType = (*pOp1)->TypeGet();
+    assert(varTypeIsSIMD(simdType) && ((*pOp2)->TypeGet() == simdType));
+
+    // There is no direct SSE2 support for comparing TYP_LONG vectors.
+    // These have to be implemented in terms of TYP_INT vector comparison operations.
+    //
+    // Equality(v1, v2):
+    // tmp = (v1 == v2) i.e. compare for equality as if v1 and v2 are vector<int>
+    // result = BitwiseAnd(t, shuffle(t, (2, 3, 1 0)))
+    // Shuffle is meant to swap the comparison results of low-32-bits and high 32-bits of respective long elements.
+
+    // Compare vector<long> as if they were vector<int> and assign the result to a temp
+    GenTree* compResult = gtNewSIMDNode(simdType, *pOp1, *pOp2, SIMDIntrinsicEqual, TYP_INT, size);
+    unsigned lclNum     = lvaGrabTemp(true DEBUGARG("SIMD Long =="));
+    lvaSetStruct(lclNum, typeHnd, false);
+    GenTree* tmp = gtNewLclvNode(lclNum, simdType);
+    GenTree* asg = gtNewTempAssign(lclNum, compResult);
+
+    // op1 = GT_COMMA(tmp=compResult, tmp)
+    // op2 = Shuffle(tmp, 0xB1)
+    // IntrinsicId = BitwiseAnd
+    *pOp1 = gtNewOperNode(GT_COMMA, simdType, asg, tmp);
+    *pOp2 = gtNewSIMDNode(simdType, gtNewLclvNode(lclNum, simdType), gtNewIconNode(SHUFFLE_ZWYX, TYP_INT),
+                          SIMDIntrinsicShuffleSSE2, TYP_INT, size);
+    return SIMDIntrinsicBitwiseAnd;
+}
+
+// impSIMDLongRelOpGreaterThan: transforms operands and returns the SIMD intrinsic to be applied on
+// transformed operands to obtain > comparison result.
+//
+// Argumens:
+//    typeHnd  -  type handle of SIMD vector
+//    size     -  SIMD vector size
+//    pOp1     -  in-out parameter; first operand
+//    pOp2     -  in-out parameter; second operand
+//
+// Return Value:
+//    Modifies in-out params pOp1, pOp2 and returns intrinsic ID to be applied to modified operands
+//
+SIMDIntrinsicID Compiler::impSIMDLongRelOpGreaterThan(CORINFO_CLASS_HANDLE typeHnd,
+                                                      unsigned             size,
+                                                      GenTree**            pOp1,
+                                                      GenTree**            pOp2)
+{
+    var_types simdType = (*pOp1)->TypeGet();
+    assert(varTypeIsSIMD(simdType) && ((*pOp2)->TypeGet() == simdType));
+
+    // GreaterThan(v1, v2) where v1 and v2 are vector long.
+    // Let us consider the case of single long element comparison.
+    // say L1 = (x1, y1) and L2 = (x2, y2) where x1, y1, x2, and y2 are 32-bit integers that comprise the longs L1 and
+    // L2.
+    //
+    // GreaterThan(L1, L2) can be expressed in terms of > relationship between 32-bit integers that comprise L1 and L2
+    // as
+    //                    =  (x1, y1) > (x2, y2)
+    //                    =  (x1 > x2) || [(x1 == x2) && (y1 > y2)]   - eq (1)
+    //
+    // t = (v1 > v2)  32-bit signed comparison
+    // u = (v1 == v2) 32-bit sized element equality
+    // v = (v1 > v2)  32-bit unsigned comparison
+    //
+    // z = shuffle(t, (3, 3, 1, 1))  - This corresponds to (x1 > x2) in eq(1) above
+    // t1 = Shuffle(v, (2, 2, 0, 0)) - This corresponds to (y1 > y2) in eq(1) above
+    // u1 = Shuffle(u, (3, 3, 1, 1)) - This corresponds to (x1 == x2) in eq(1) above
+    // w = And(t1, u1)               - This corresponds to [(x1 == x2) && (y1 > y2)] in eq(1) above
+    // Result = BitwiseOr(z, w)
+
+    // Since op1 and op2 gets used multiple times, make sure side effects are computed.
+    GenTree* dupOp1    = nullptr;
+    GenTree* dupOp2    = nullptr;
+    GenTree* dupDupOp1 = nullptr;
+    GenTree* dupDupOp2 = nullptr;
+
+    if (((*pOp1)->gtFlags & GTF_SIDE_EFFECT) != 0)
+    {
+        dupOp1    = fgInsertCommaFormTemp(pOp1, typeHnd);
+        dupDupOp1 = gtNewLclvNode(dupOp1->AsLclVarCommon()->GetLclNum(), simdType);
+    }
+    else
+    {
+        dupOp1    = gtCloneExpr(*pOp1);
+        dupDupOp1 = gtCloneExpr(*pOp1);
+    }
+
+    if (((*pOp2)->gtFlags & GTF_SIDE_EFFECT) != 0)
+    {
+        dupOp2    = fgInsertCommaFormTemp(pOp2, typeHnd);
+        dupDupOp2 = gtNewLclvNode(dupOp2->AsLclVarCommon()->GetLclNum(), simdType);
+    }
+    else
+    {
+        dupOp2    = gtCloneExpr(*pOp2);
+        dupDupOp2 = gtCloneExpr(*pOp2);
+    }
+
+    assert(dupDupOp1 != nullptr && dupDupOp2 != nullptr);
+    assert(dupOp1 != nullptr && dupOp2 != nullptr);
+    assert(*pOp1 != nullptr && *pOp2 != nullptr);
+
+    // v1GreaterThanv2Signed - signed 32-bit comparison
+    GenTree* v1GreaterThanv2Signed = gtNewSIMDNode(simdType, *pOp1, *pOp2, SIMDIntrinsicGreaterThan, TYP_INT, size);
+
+    // v1Equalsv2 - 32-bit equality
+    GenTree* v1Equalsv2 = gtNewSIMDNode(simdType, dupOp1, dupOp2, SIMDIntrinsicEqual, TYP_INT, size);
+
+    // v1GreaterThanv2Unsigned - unsigned 32-bit comparison
+    var_types       tempBaseType = TYP_UINT;
+    SIMDIntrinsicID sid = impSIMDRelOp(SIMDIntrinsicGreaterThan, typeHnd, size, &tempBaseType, &dupDupOp1, &dupDupOp2);
+    GenTree*        v1GreaterThanv2Unsigned = gtNewSIMDNode(simdType, dupDupOp1, dupDupOp2, sid, tempBaseType, size);
+
+    GenTree* z = gtNewSIMDNode(simdType, v1GreaterThanv2Signed, gtNewIconNode(SHUFFLE_WWYY, TYP_INT),
+                               SIMDIntrinsicShuffleSSE2, TYP_FLOAT, size);
+    GenTree* t1 = gtNewSIMDNode(simdType, v1GreaterThanv2Unsigned, gtNewIconNode(SHUFFLE_ZZXX, TYP_INT),
+                                SIMDIntrinsicShuffleSSE2, TYP_FLOAT, size);
+    GenTree* u1 = gtNewSIMDNode(simdType, v1Equalsv2, gtNewIconNode(SHUFFLE_WWYY, TYP_INT), SIMDIntrinsicShuffleSSE2,
+                                TYP_FLOAT, size);
+    GenTree* w = gtNewSIMDNode(simdType, u1, t1, SIMDIntrinsicBitwiseAnd, TYP_INT, size);
+
+    *pOp1 = z;
+    *pOp2 = w;
+    return SIMDIntrinsicBitwiseOr;
+}
+
+// impSIMDLongRelOpGreaterThanOrEqual: transforms operands and returns the SIMD intrinsic to be applied on
+// transformed operands to obtain >= comparison result.
+//
+// Argumens:
+//    typeHnd  -  type handle of SIMD vector
+//    size     -  SIMD vector size
+//    pOp1      -  in-out parameter; first operand
+//    pOp2      -  in-out parameter; second operand
+//
+// Return Value:
+//    Modifies in-out params pOp1, pOp2 and returns intrinsic ID to be applied to modified operands
+//
+SIMDIntrinsicID Compiler::impSIMDLongRelOpGreaterThanOrEqual(CORINFO_CLASS_HANDLE typeHnd,
+                                                             unsigned             size,
+                                                             GenTree**            pOp1,
+                                                             GenTree**            pOp2)
+{
+    var_types simdType = (*pOp1)->TypeGet();
+    assert(varTypeIsSIMD(simdType) && ((*pOp2)->TypeGet() == simdType));
+
+    // expand this to (a == b) | (a > b)
+    GenTree* dupOp1 = nullptr;
+    GenTree* dupOp2 = nullptr;
+
+    if (((*pOp1)->gtFlags & GTF_SIDE_EFFECT) != 0)
+    {
+        dupOp1 = fgInsertCommaFormTemp(pOp1, typeHnd);
+    }
+    else
+    {
+        dupOp1 = gtCloneExpr(*pOp1);
+    }
+
+    if (((*pOp2)->gtFlags & GTF_SIDE_EFFECT) != 0)
+    {
+        dupOp2 = fgInsertCommaFormTemp(pOp2, typeHnd);
+    }
+    else
+    {
+        dupOp2 = gtCloneExpr(*pOp2);
+    }
+
+    assert(dupOp1 != nullptr && dupOp2 != nullptr);
+    assert(*pOp1 != nullptr && *pOp2 != nullptr);
+
+    // (a==b)
+    SIMDIntrinsicID id = impSIMDLongRelOpEqual(typeHnd, size, pOp1, pOp2);
+    *pOp1              = gtNewSIMDNode(simdType, *pOp1, *pOp2, id, TYP_LONG, size);
+
+    // (a > b)
+    id    = impSIMDLongRelOpGreaterThan(typeHnd, size, &dupOp1, &dupOp2);
+    *pOp2 = gtNewSIMDNode(simdType, dupOp1, dupOp2, id, TYP_LONG, size);
+
+    return SIMDIntrinsicBitwiseOr;
+}
+
+// impSIMDInt32OrSmallIntRelOpGreaterThanOrEqual: transforms operands and returns the SIMD intrinsic to be applied on
+// transformed operands to obtain >= comparison result in case of integer base type vectors
+//
+// Argumens:
+//    typeHnd  -  type handle of SIMD vector
+//    size     -  SIMD vector size
+//    baseType -  base type of SIMD vector
+//    pOp1      -  in-out parameter; first operand
+//    pOp2      -  in-out parameter; second operand
+//
+// Return Value:
+//    Modifies in-out params pOp1, pOp2 and returns intrinsic ID to be applied to modified operands
+//
+SIMDIntrinsicID Compiler::impSIMDIntegralRelOpGreaterThanOrEqual(
+    CORINFO_CLASS_HANDLE typeHnd, unsigned size, var_types baseType, GenTree** pOp1, GenTree** pOp2)
+{
+    var_types simdType = (*pOp1)->TypeGet();
+    assert(varTypeIsSIMD(simdType) && ((*pOp2)->TypeGet() == simdType));
+
+    // This routine should be used only for integer base type vectors
+    assert(varTypeIsIntegral(baseType));
+    if ((getSIMDInstructionSet() == InstructionSet_SSE2) && ((baseType == TYP_LONG) || baseType == TYP_UBYTE))
+    {
+        return impSIMDLongRelOpGreaterThanOrEqual(typeHnd, size, pOp1, pOp2);
+    }
+
+    // expand this to (a == b) | (a > b)
+    GenTree* dupOp1 = nullptr;
+    GenTree* dupOp2 = nullptr;
+
+    if (((*pOp1)->gtFlags & GTF_SIDE_EFFECT) != 0)
+    {
+        dupOp1 = fgInsertCommaFormTemp(pOp1, typeHnd);
+    }
+    else
+    {
+        dupOp1 = gtCloneExpr(*pOp1);
+    }
+
+    if (((*pOp2)->gtFlags & GTF_SIDE_EFFECT) != 0)
+    {
+        dupOp2 = fgInsertCommaFormTemp(pOp2, typeHnd);
+    }
+    else
+    {
+        dupOp2 = gtCloneExpr(*pOp2);
+    }
+
+    assert(dupOp1 != nullptr && dupOp2 != nullptr);
+    assert(*pOp1 != nullptr && *pOp2 != nullptr);
+
+    // (a==b)
+    *pOp1 = gtNewSIMDNode(simdType, *pOp1, *pOp2, SIMDIntrinsicEqual, baseType, size);
+
+    // (a > b)
+    *pOp2 = gtNewSIMDNode(simdType, dupOp1, dupOp2, SIMDIntrinsicGreaterThan, baseType, size);
+
+    return SIMDIntrinsicBitwiseOr;
+}
+#endif //_TARGET_AMD64_
+
+// Transforms operands and returns the SIMD intrinsic to be applied on
+// transformed operands to obtain given relop result.
+//
+// Argumens:
+//    relOpIntrinsicId - Relational operator SIMD intrinsic
+//    typeHnd          - type handle of SIMD vector
+//    size             -  SIMD vector size
+//    inOutBaseType    - base type of SIMD vector
+//    pOp1             -  in-out parameter; first operand
+//    pOp2             -  in-out parameter; second operand
+//
+// Return Value:
+//    Modifies in-out params pOp1, pOp2, inOutBaseType and returns intrinsic ID to be applied to modified operands
+//
+SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID      relOpIntrinsicId,
+                                       CORINFO_CLASS_HANDLE typeHnd,
+                                       unsigned             size,
+                                       var_types*           inOutBaseType,
+                                       GenTree**            pOp1,
+                                       GenTree**            pOp2)
+{
+    var_types simdType = (*pOp1)->TypeGet();
+    assert(varTypeIsSIMD(simdType) && ((*pOp2)->TypeGet() == simdType));
+
+    assert(isRelOpSIMDIntrinsic(relOpIntrinsicId));
+
+#ifdef _TARGET_AMD64_
+    SIMDIntrinsicID intrinsicID = relOpIntrinsicId;
+    var_types       baseType    = *inOutBaseType;
+
+    if (varTypeIsFloating(baseType))
+    {
+        // SSE2/AVX doesn't support > and >= on vector float/double.
+        // Therefore, we need to use < and <= with swapped operands
+        if (relOpIntrinsicId == SIMDIntrinsicGreaterThan || relOpIntrinsicId == SIMDIntrinsicGreaterThanOrEqual)
+        {
+            GenTree* tmp = *pOp1;
+            *pOp1        = *pOp2;
+            *pOp2        = tmp;
+
+            intrinsicID =
+                (relOpIntrinsicId == SIMDIntrinsicGreaterThan) ? SIMDIntrinsicLessThan : SIMDIntrinsicLessThanOrEqual;
+        }
+    }
+    else if (varTypeIsIntegral(baseType))
+    {
+        // SSE/AVX doesn't support < and <= on integer base type vectors.
+        // Therefore, we need to use > and >= with swapped operands.
+        if (intrinsicID == SIMDIntrinsicLessThan || intrinsicID == SIMDIntrinsicLessThanOrEqual)
+        {
+            GenTree* tmp = *pOp1;
+            *pOp1        = *pOp2;
+            *pOp2        = tmp;
+
+            intrinsicID = (relOpIntrinsicId == SIMDIntrinsicLessThan) ? SIMDIntrinsicGreaterThan
+                                                                      : SIMDIntrinsicGreaterThanOrEqual;
+        }
+
+        if ((getSIMDInstructionSet() == InstructionSet_SSE2) && baseType == TYP_LONG)
+        {
+            // There is no direct SSE2 support for comparing TYP_LONG vectors.
+            // These have to be implemented interms of TYP_INT vector comparison operations.
+            if (intrinsicID == SIMDIntrinsicEqual)
+            {
+                intrinsicID = impSIMDLongRelOpEqual(typeHnd, size, pOp1, pOp2);
+            }
+            else if (intrinsicID == SIMDIntrinsicGreaterThan)
+            {
+                intrinsicID = impSIMDLongRelOpGreaterThan(typeHnd, size, pOp1, pOp2);
+            }
+            else if (intrinsicID == SIMDIntrinsicGreaterThanOrEqual)
+            {
+                intrinsicID = impSIMDLongRelOpGreaterThanOrEqual(typeHnd, size, pOp1, pOp2);
+            }
+            else
+            {
+                unreached();
+            }
+        }
+        // SSE2 and AVX direct support for signed comparison of int32, int16 and int8 types
+        else if (!varTypeIsUnsigned(baseType))
+        {
+            if (intrinsicID == SIMDIntrinsicGreaterThanOrEqual)
+            {
+                intrinsicID = impSIMDIntegralRelOpGreaterThanOrEqual(typeHnd, size, baseType, pOp1, pOp2);
+            }
+        }
+        else // unsigned
+        {
+            // Vector<byte>, Vector<ushort>, Vector<uint> and Vector<ulong>:
+            // SSE2 supports > for signed comparison. Therefore, to use it for
+            // comparing unsigned numbers, we subtract a constant from both the
+            // operands such that the result fits within the corresponding signed
+            // type.  The resulting signed numbers are compared using SSE2 signed
+            // comparison.
+            //
+            // Vector<byte>: constant to be subtracted is 2^7
+            // Vector<ushort> constant to be subtracted is 2^15
+            // Vector<uint> constant to be subtracted is 2^31
+            // Vector<ulong> constant to be subtracted is 2^63
+            //
+            // We need to treat op1 and op2 as signed for comparison purpose after
+            // the transformation.
+            ssize_t constVal = 0;
+            switch (baseType)
+            {
+                case TYP_UBYTE:
+                    constVal       = 0x80808080;
+                    *inOutBaseType = TYP_BYTE;
+                    break;
+                case TYP_CHAR:
+                    constVal       = 0x80008000;
+                    *inOutBaseType = TYP_SHORT;
+                    break;
+                case TYP_UINT:
+                    constVal       = 0x80000000;
+                    *inOutBaseType = TYP_INT;
+                    break;
+                case TYP_ULONG:
+                    constVal       = 0x8000000000000000LL;
+                    *inOutBaseType = TYP_LONG;
+                    break;
+                default:
+                    unreached();
+                    break;
+            }
+            assert(constVal != 0);
+
+            // This transformation is not required for equality.
+            if (intrinsicID != SIMDIntrinsicEqual)
+            {
+                // For constructing const vector use either long or int base type.
+                var_types tempBaseType = (baseType == TYP_ULONG) ? TYP_LONG : TYP_INT;
+                GenTree*  initVal      = gtNewIconNode(constVal);
+                initVal->gtType        = tempBaseType;
+                GenTree* constVector = gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, tempBaseType, size);
+
+                // Assign constVector to a temp, since we intend to use it more than once
+                // TODO-CQ: We have quite a few such constant vectors constructed during
+                // the importation of SIMD intrinsics.  Make sure that we have a single
+                // temp per distinct constant per method.
+                GenTree* tmp = fgInsertCommaFormTemp(&constVector, typeHnd);
+
+                // op1 = op1 - constVector
+                // op2 = op2 - constVector
+                *pOp1 = gtNewSIMDNode(simdType, *pOp1, constVector, SIMDIntrinsicSub, baseType, size);
+                *pOp2 = gtNewSIMDNode(simdType, *pOp2, tmp, SIMDIntrinsicSub, baseType, size);
+            }
+
+            return impSIMDRelOp(intrinsicID, typeHnd, size, inOutBaseType, pOp1, pOp2);
+        }
+    }
+
+    return intrinsicID;
+#else
+    assert(!"impSIMDRelOp() unimplemented on target arch");
+    unreached();
+#endif //_TARGET_AMD64_
+}
+
+// Creates a GT_SIMD tree for Select operation
+//
+// Argumens:
+//    typeHnd          -  type handle of SIMD vector
+//    baseType         -  base type of SIMD vector
+//    size             -  SIMD vector size
+//    op1              -  first operand = Condition vector vc
+//    op2              -  second operand = va
+//    op3              -  third operand = vb
+//
+// Return Value:
+//    Returns GT_SIMD tree that computes Select(vc, va, vb)
+//
+GenTreePtr Compiler::impSIMDSelect(
+    CORINFO_CLASS_HANDLE typeHnd, var_types baseType, unsigned size, GenTree* op1, GenTree* op2, GenTree* op3)
+{
+    assert(varTypeIsSIMD(op1));
+    var_types simdType = op1->TypeGet();
+    assert(op2->TypeGet() == simdType);
+    assert(op3->TypeGet() == simdType);
+
+    // Select(BitVector vc, va, vb) = (va & vc) | (vb & !vc)
+    // Select(op1, op2, op3)        = (op2 & op1) | (op3 & !op1)
+    //                              = SIMDIntrinsicBitwiseOr(SIMDIntrinsicBitwiseAnd(op2, op1),
+    //                                                       SIMDIntrinsicBitwiseAndNot(op3, op1))
+    //
+    // If Op1 has side effect, create an assignment to a temp
+    GenTree* tmp = op1;
+    GenTree* asg = nullptr;
+    if ((op1->gtFlags & GTF_SIDE_EFFECT) != 0)
+    {
+        unsigned lclNum = lvaGrabTemp(true DEBUGARG("SIMD Select"));
+        lvaSetStruct(lclNum, typeHnd, false);
+        tmp = gtNewLclvNode(lclNum, op1->TypeGet());
+        asg = gtNewTempAssign(lclNum, op1);
+    }
+
+    GenTree* andExpr = gtNewSIMDNode(simdType, op2, tmp, SIMDIntrinsicBitwiseAnd, baseType, size);
+    GenTree* dupOp1  = gtCloneExpr(tmp);
+    assert(dupOp1 != nullptr);
+    GenTree* andNotExpr = gtNewSIMDNode(simdType, dupOp1, op3, SIMDIntrinsicBitwiseAndNot, baseType, size);
+    GenTree* simdTree   = gtNewSIMDNode(simdType, andExpr, andNotExpr, SIMDIntrinsicBitwiseOr, baseType, size);
+
+    // If asg not null, create a GT_COMMA tree.
+    if (asg != nullptr)
+    {
+        simdTree = gtNewOperNode(GT_COMMA, simdTree->TypeGet(), asg, simdTree);
+    }
+
+    return simdTree;
+}
+
+// Creates a GT_SIMD tree for Min/Max operation
+//
+// Argumens:
+//    IntrinsicId      -  SIMD intrinsic Id, either Min or Max
+//    typeHnd          -  type handle of SIMD vector
+//    baseType         -  base type of SIMD vector
+//    size             -  SIMD vector size
+//    op1              -  first operand = va
+//    op2              -  second operand = vb
+//
+// Return Value:
+//    Returns GT_SIMD tree that computes Max(va, vb)
+//
+GenTreePtr Compiler::impSIMDMinMax(SIMDIntrinsicID      intrinsicId,
+                                   CORINFO_CLASS_HANDLE typeHnd,
+                                   var_types            baseType,
+                                   unsigned             size,
+                                   GenTree*             op1,
+                                   GenTree*             op2)
+{
+    assert(intrinsicId == SIMDIntrinsicMin || intrinsicId == SIMDIntrinsicMax);
+    assert(varTypeIsSIMD(op1));
+    var_types simdType = op1->TypeGet();
+    assert(op2->TypeGet() == simdType);
+
+#ifdef _TARGET_AMD64_
+    // SSE2 has direct support for float/double/signed word/unsigned byte.
+    // For other integer types we compute min/max as follows
+    //
+    // int32/uint32/int64/uint64:
+    //       compResult        = (op1 < op2) in case of Min
+    //                           (op1 > op2) in case of Max
+    //       Min/Max(op1, op2) = Select(compResult, op1, op2)
+    //
+    // unsigned word:
+    //        op1 = op1 - 2^15  ; to make it fit within a signed word
+    //        op2 = op2 - 2^15  ; to make it fit within a signed word
+    //        result = SSE2 signed word Min/Max(op1, op2)
+    //        result = result + 2^15  ; readjust it back
+    //
+    // signed byte:
+    //        op1 = op1 + 2^7  ; to make it unsigned
+    //        op1 = op1 + 2^7  ; to make it unsigned
+    //        result = SSE2 unsigned byte Min/Max(op1, op2)
+    //        result = result - 2^15 ; readjust it back
+
+    GenTree* simdTree = nullptr;
+
+    if (varTypeIsFloating(baseType) || baseType == TYP_SHORT || baseType == TYP_UBYTE)
+    {
+        // SSE2 has direct support
+        simdTree = gtNewSIMDNode(simdType, op1, op2, intrinsicId, baseType, size);
+    }
+    else if (baseType == TYP_CHAR || baseType == TYP_BYTE)
+    {
+        int             constVal;
+        SIMDIntrinsicID operIntrinsic;
+        SIMDIntrinsicID adjustIntrinsic;
+        var_types       minMaxOperBaseType;
+        if (baseType == TYP_CHAR)
+        {
+            constVal           = 0x80008000;
+            operIntrinsic      = SIMDIntrinsicSub;
+            adjustIntrinsic    = SIMDIntrinsicAdd;
+            minMaxOperBaseType = TYP_SHORT;
+        }
+        else
+        {
+            assert(baseType == TYP_BYTE);
+            constVal           = 0x80808080;
+            operIntrinsic      = SIMDIntrinsicAdd;
+            adjustIntrinsic    = SIMDIntrinsicSub;
+            minMaxOperBaseType = TYP_UBYTE;
+        }
+
+        GenTree* initVal     = gtNewIconNode(constVal);
+        GenTree* constVector = gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, TYP_INT, size);
+
+        // Assign constVector to a temp, since we intend to use it more than once
+        // TODO-CQ: We have quite a few such constant vectors constructed during
+        // the importation of SIMD intrinsics.  Make sure that we have a single
+        // temp per distinct constant per method.
+        GenTree* tmp = fgInsertCommaFormTemp(&constVector, typeHnd);
+
+        // op1 = op1 - constVector
+        // op2 = op2 - constVector
+        op1 = gtNewSIMDNode(simdType, op1, constVector, operIntrinsic, baseType, size);
+        op2 = gtNewSIMDNode(simdType, op2, tmp, operIntrinsic, baseType, size);
+
+        // compute min/max of op1 and op2 considering them as if minMaxOperBaseType
+        simdTree = gtNewSIMDNode(simdType, op1, op2, intrinsicId, minMaxOperBaseType, size);
+
+        // re-adjust the value by adding or subtracting constVector
+        tmp      = gtNewLclvNode(tmp->AsLclVarCommon()->GetLclNum(), tmp->TypeGet());
+        simdTree = gtNewSIMDNode(simdType, simdTree, tmp, adjustIntrinsic, baseType, size);
+    }
+    else
+    {
+        GenTree* dupOp1    = nullptr;
+        GenTree* dupOp2    = nullptr;
+        GenTree* op1Assign = nullptr;
+        GenTree* op2Assign = nullptr;
+        unsigned op1LclNum;
+        unsigned op2LclNum;
+
+        if ((op1->gtFlags & GTF_SIDE_EFFECT) != 0)
+        {
+            op1LclNum = lvaGrabTemp(true DEBUGARG("SIMD Min/Max"));
+            dupOp1    = gtNewLclvNode(op1LclNum, op1->TypeGet());
+            lvaSetStruct(op1LclNum, typeHnd, false);
+            op1Assign = gtNewTempAssign(op1LclNum, op1);
+            op1       = gtNewLclvNode(op1LclNum, op1->TypeGet());
+        }
+        else
+        {
+            dupOp1 = gtCloneExpr(op1);
+        }
+
+        if ((op2->gtFlags & GTF_SIDE_EFFECT) != 0)
+        {
+            op2LclNum = lvaGrabTemp(true DEBUGARG("SIMD Min/Max"));
+            dupOp2    = gtNewLclvNode(op2LclNum, op2->TypeGet());
+            lvaSetStruct(op2LclNum, typeHnd, false);
+            op2Assign = gtNewTempAssign(op2LclNum, op2);
+            op2       = gtNewLclvNode(op2LclNum, op2->TypeGet());
+        }
+        else
+        {
+            dupOp2 = gtCloneExpr(op2);
+        }
+
+        SIMDIntrinsicID relOpIntrinsic =
+            (intrinsicId == SIMDIntrinsicMin) ? SIMDIntrinsicLessThan : SIMDIntrinsicGreaterThan;
+        var_types relOpBaseType = baseType;
+
+        // compResult = op1 relOp op2
+        // simdTree = Select(compResult, op1, op2);
+        assert(dupOp1 != nullptr);
+        assert(dupOp2 != nullptr);
+        relOpIntrinsic            = impSIMDRelOp(relOpIntrinsic, typeHnd, size, &relOpBaseType, &dupOp1, &dupOp2);
+        GenTree* compResult       = gtNewSIMDNode(simdType, dupOp1, dupOp2, relOpIntrinsic, relOpBaseType, size);
+        unsigned compResultLclNum = lvaGrabTemp(true DEBUGARG("SIMD Min/Max"));
+        lvaSetStruct(compResultLclNum, typeHnd, false);
+        GenTree* compResultAssign = gtNewTempAssign(compResultLclNum, compResult);
+        compResult                = gtNewLclvNode(compResultLclNum, compResult->TypeGet());
+        simdTree                  = impSIMDSelect(typeHnd, baseType, size, compResult, op1, op2);
+        simdTree                  = gtNewOperNode(GT_COMMA, simdTree->TypeGet(), compResultAssign, simdTree);
+
+        // Now create comma trees if we have created assignments of op1/op2 to temps
+        if (op2Assign != nullptr)
+        {
+            simdTree = gtNewOperNode(GT_COMMA, simdTree->TypeGet(), op2Assign, simdTree);
+        }
+
+        if (op1Assign != nullptr)
+        {
+            simdTree = gtNewOperNode(GT_COMMA, simdTree->TypeGet(), op1Assign, simdTree);
+        }
+    }
+
+    assert(simdTree != nullptr);
+    return simdTree;
+#else
+    assert(!"impSIMDMinMax() unimplemented on target arch");
+    unreached();
+#endif //_TARGET_AMD64_
+}
+
+//------------------------------------------------------------------------
+// getOp1ForConstructor: Get the op1 for a constructor call.
+//
+// Arguments:
+//    opcode     - the opcode being handled (needed to identify the CEE_NEWOBJ case)
+//    newobjThis - For CEE_NEWOBJ, this is the temp grabbed for the allocated uninitalized object.
+//    clsHnd    - The handle of the class of the method.
+//
+// Return Value:
+//    The tree node representing the object to be initialized with the constructor.
+//
+// Notes:
+//    This method handles the differences between the CEE_NEWOBJ and constructor cases.
+//
+GenTreePtr Compiler::getOp1ForConstructor(OPCODE opcode, GenTreePtr newobjThis, CORINFO_CLASS_HANDLE clsHnd)
+{
+    GenTree* op1;
+    if (opcode == CEE_NEWOBJ)
+    {
+        op1 = newobjThis;
+        assert(newobjThis->gtOper == GT_ADDR && newobjThis->gtOp.gtOp1->gtOper == GT_LCL_VAR);
+
+        // push newobj result on type stack
+        unsigned tmp = op1->gtOp.gtOp1->gtLclVarCommon.gtLclNum;
+        impPushOnStack(gtNewLclvNode(tmp, lvaGetRealType(tmp)), verMakeTypeInfo(clsHnd).NormaliseForStack());
+    }
+    else
+    {
+        op1 = impSIMDPopStack(TYP_BYREF);
+    }
+    assert(op1->TypeGet() == TYP_BYREF);
+    return op1;
+}
+
+//-------------------------------------------------------------------
+// Set the flag that indicates that the lclVar referenced by this tree
+// is used in a SIMD intrinsic.
+// Arguments:
+//      tree - GenTreePtr
+
+void Compiler::setLclRelatedToSIMDIntrinsic(GenTreePtr tree)
+{
+    assert(tree->OperIsLocal());
+    unsigned   lclNum                = tree->AsLclVarCommon()->GetLclNum();
+    LclVarDsc* lclVarDsc             = &lvaTable[lclNum];
+    lclVarDsc->lvUsedInSIMDIntrinsic = true;
+}
+
+//-------------------------------------------------------------
+// Check if two field nodes reference at the same memory location.
+// Notice that this check is just based on pattern matching.
+// Arguments:
+//      op1 - GenTreePtr.
+//      op2 - GenTreePtr.
+// Return Value:
+//    If op1's parents node and op2's parents node are at the same location, return true. Otherwise, return false
+
+bool areFieldsParentsLocatedSame(GenTreePtr op1, GenTreePtr op2)
+{
+    assert(op1->OperGet() == GT_FIELD);
+    assert(op2->OperGet() == GT_FIELD);
+
+    GenTreePtr op1ObjRef = op1->gtField.gtFldObj;
+    GenTreePtr op2ObjRef = op2->gtField.gtFldObj;
+    while (op1ObjRef != nullptr && op2ObjRef != nullptr)
+    {
+
+        if (op1ObjRef->OperGet() != op2ObjRef->OperGet())
+        {
+            break;
+        }
+        else if (op1ObjRef->OperGet() == GT_ADDR)
+        {
+            op1ObjRef = op1ObjRef->gtOp.gtOp1;
+            op2ObjRef = op2ObjRef->gtOp.gtOp1;
+        }
+
+        if (op1ObjRef->OperIsLocal() && op2ObjRef->OperIsLocal() &&
+            op1ObjRef->AsLclVarCommon()->GetLclNum() == op2ObjRef->AsLclVarCommon()->GetLclNum())
+        {
+            return true;
+        }
+        else if (op1ObjRef->OperGet() == GT_FIELD && op2ObjRef->OperGet() == GT_FIELD &&
+                 op1ObjRef->gtField.gtFldHnd == op2ObjRef->gtField.gtFldHnd)
+        {
+            op1ObjRef = op1ObjRef->gtField.gtFldObj;
+            op2ObjRef = op2ObjRef->gtField.gtFldObj;
+            continue;
+        }
+        else
+        {
+            break;
+        }
+    }
+
+    return false;
+}
+
+//----------------------------------------------------------------------
+// Check whether two field are contiguous
+// Arguments:
+//      first - GenTreePtr. The Type of the node should be TYP_FLOAT
+//      second - GenTreePtr. The Type of the node should be TYP_FLOAT
+// Return Value:
+//      if the first field is located before second field, and they are located contiguously,
+//      then return true. Otherwise, return false.
+
+bool Compiler::areFieldsContiguous(GenTreePtr first, GenTreePtr second)
+{
+    assert(first->OperGet() == GT_FIELD);
+    assert(second->OperGet() == GT_FIELD);
+    assert(first->gtType == TYP_FLOAT);
+    assert(second->gtType == TYP_FLOAT);
+
+    var_types firstFieldType  = first->gtType;
+    var_types secondFieldType = second->gtType;
+
+    unsigned firstFieldEndOffset = first->gtField.gtFldOffset + genTypeSize(firstFieldType);
+    unsigned secondFieldOffset   = second->gtField.gtFldOffset;
+    if (firstFieldEndOffset == secondFieldOffset && firstFieldType == secondFieldType &&
+        areFieldsParentsLocatedSame(first, second))
+    {
+        return true;
+    }
+
+    return false;
+}
+
+//-------------------------------------------------------------------------------
+// Check whether two array element nodes are located contiguously or not.
+// Arguments:
+//      op1 - GenTreePtr.
+//      op2 - GenTreePtr.
+// Return Value:
+//      if the array element op1 is located before array element op2, and they are contiguous,
+//      then return true. Otherwise, return false.
+// TODO-CQ:
+//      Right this can only check array element with const number as index. In future,
+//      we should consider to allow this function to check the index using expression.
+
+bool Compiler::areArrayElementsContiguous(GenTreePtr op1, GenTreePtr op2)
+{
+    noway_assert(op1->gtOper == GT_INDEX);
+    noway_assert(op2->gtOper == GT_INDEX);
+    GenTreeIndex* op1Index = op1->AsIndex();
+    GenTreeIndex* op2Index = op2->AsIndex();
+
+    GenTreePtr op1ArrayRef = op1Index->Arr();
+    GenTreePtr op2ArrayRef = op2Index->Arr();
+    assert(op1ArrayRef->TypeGet() == TYP_REF);
+    assert(op2ArrayRef->TypeGet() == TYP_REF);
+
+    GenTreePtr op1IndexNode = op1Index->Index();
+    GenTreePtr op2IndexNode = op2Index->Index();
+    if ((op1IndexNode->OperGet() == GT_CNS_INT && op2IndexNode->OperGet() == GT_CNS_INT) &&
+        op1IndexNode->gtIntCon.gtIconVal + 1 == op2IndexNode->gtIntCon.gtIconVal)
+    {
+        if (op1ArrayRef->OperGet() == GT_FIELD && op2ArrayRef->OperGet() == GT_FIELD &&
+            areFieldsParentsLocatedSame(op1ArrayRef, op2ArrayRef))
+        {
+            return true;
+        }
+        else if (op1ArrayRef->OperIsLocal() && op2ArrayRef->OperIsLocal() &&
+                 op1ArrayRef->AsLclVarCommon()->GetLclNum() == op2ArrayRef->AsLclVarCommon()->GetLclNum())
+        {
+            return true;
+        }
+    }
+    return false;
+}
+
+//-------------------------------------------------------------------------------
+// Check whether two argument nodes are contiguous or not.
+// Arguments:
+//      op1 - GenTreePtr.
+//      op2 - GenTreePtr.
+// Return Value:
+//      if the argument node op1 is located before argument node op2, and they are located contiguously,
+//      then return true. Otherwise, return false.
+// TODO-CQ:
+//      Right now this can only check field and array. In future we should add more cases.
+//
+
+bool Compiler::areArgumentsContiguous(GenTreePtr op1, GenTreePtr op2)
+{
+    if (op1->OperGet() == GT_INDEX && op2->OperGet() == GT_INDEX)
+    {
+        return areArrayElementsContiguous(op1, op2);
+    }
+    else if (op1->OperGet() == GT_FIELD && op2->OperGet() == GT_FIELD)
+    {
+        return areFieldsContiguous(op1, op2);
+    }
+    return false;
+}
+
+//--------------------------------------------------------------------------------------------------------
+// createAddressNodeForSIMDInit: Generate the address node(GT_LEA) if we want to intialize vector2, vector3 or vector4
+// from first argument's address.
+//
+// Arguments:
+//      tree - GenTreePtr. This the tree node which is used to get the address for indir.
+//      simdsize - unsigned. This the simd vector size.
+//      arrayElementsCount - unsigned. This is used for generating the boundary check for array.
+//
+// Return value:
+//      return the address node.
+//
+// TODO-CQ:
+//      1. Currently just support for GT_FIELD and GT_INDEX, because we can only verify the GT_INDEX node or GT_Field
+//         are located contiguously or not. In future we should support more cases.
+//      2. Though it happens to just work fine front-end phases are not aware of GT_LEA node.  Therefore, convert these
+//         to use GT_ADDR.
+GenTreePtr Compiler::createAddressNodeForSIMDInit(GenTreePtr tree, unsigned simdSize)
+{
+    assert(tree->OperGet() == GT_FIELD || tree->OperGet() == GT_INDEX);
+    GenTreePtr byrefNode  = nullptr;
+    GenTreePtr startIndex = nullptr;
+    unsigned   offset     = 0;
+    var_types  baseType   = tree->gtType;
+
+    if (tree->OperGet() == GT_FIELD)
+    {
+        GenTreePtr objRef = tree->gtField.gtFldObj;
+        if (objRef != nullptr && objRef->gtOper == GT_ADDR)
+        {
+            GenTreePtr obj = objRef->gtOp.gtOp1;
+
+            // If the field is directly from a struct, then in this case,
+            // we should set this struct's lvUsedInSIMDIntrinsic as true,
+            // so that this sturct won't be promoted.
+            // e.g. s.x x is a field, and s is a struct, then we should set the s's lvUsedInSIMDIntrinsic as true.
+            // so that s won't be promoted.
+            // Notice that if we have a case like s1.s2.x. s1 s2 are struct, and x is a field, then it is possible that
+            // s1 can be promoted, so that s2 can be promoted. The reason for that is if we don't allow s1 to be
+            // promoted, then this will affect the other optimizations which are depend on s1's struct promotion.
+            // TODO-CQ:
+            //  In future, we should optimize this case so that if there is a nested field like s1.s2.x and s1.s2.x's
+            //  address is used for initializing the vector, then s1 can be promoted but s2 can't.
+            if (varTypeIsSIMD(obj) && obj->OperIsLocal())
+            {
+                setLclRelatedToSIMDIntrinsic(obj);
+            }
+        }
+
+        byrefNode = gtCloneExpr(tree->gtField.gtFldObj);
+        assert(byrefNode != nullptr);
+        offset = tree->gtField.gtFldOffset;
+    }
+    else if (tree->OperGet() == GT_INDEX)
+    {
+
+        GenTreePtr index = tree->AsIndex()->Index();
+        assert(index->OperGet() == GT_CNS_INT);
+
+        GenTreePtr checkIndexExpr = nullptr;
+        unsigned   indexVal       = (unsigned)(index->gtIntCon.gtIconVal);
+        offset                    = indexVal * genTypeSize(tree->TypeGet());
+        GenTreePtr arrayRef       = tree->AsIndex()->Arr();
+
+        // Generate the boundary check exception.
+        // The length for boundary check should be the maximum index number which should be
+        // (first argument's index number) + (how many array arguments we have) - 1
+        // = indexVal + arrayElementsCount - 1
+        unsigned arrayElementsCount = simdSize / genTypeSize(baseType);
+        checkIndexExpr              = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, indexVal + arrayElementsCount - 1);
+        GenTreeArrLen* arrLen =
+            new (this, GT_ARR_LENGTH) GenTreeArrLen(TYP_INT, arrayRef, (int)offsetof(CORINFO_Array, length));
+        GenTreeBoundsChk* arrBndsChk = new (this, GT_ARR_BOUNDS_CHECK)
+            GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, arrLen, checkIndexExpr, SCK_RNGCHK_FAIL);
+
+        offset += offsetof(CORINFO_Array, u1Elems);
+        byrefNode = gtNewOperNode(GT_COMMA, arrayRef->TypeGet(), arrBndsChk, gtCloneExpr(arrayRef));
+    }
+    else
+    {
+        unreached();
+    }
+    GenTreePtr address =
+        new (this, GT_LEA) GenTreeAddrMode(TYP_BYREF, byrefNode, startIndex, genTypeSize(tree->TypeGet()), offset);
+    return address;
+}
+
+//-------------------------------------------------------------------------------
+// impMarkContiguousSIMDFieldAssignments: Try to identify if there are contiguous
+// assignments from SIMD field to memory. If there are, then mark the related
+// lclvar so that it won't be promoted.
+//
+// Arguments:
+//      stmt - GenTreePtr. Input statement node.
+
+void Compiler::impMarkContiguousSIMDFieldAssignments(GenTreePtr stmt)
+{
+    if (!featureSIMD || opts.MinOpts())
+    {
+        return;
+    }
+    GenTreePtr expr = stmt->gtStmt.gtStmtExpr;
+    if (expr->OperGet() == GT_ASG && expr->TypeGet() == TYP_FLOAT)
+    {
+        GenTreePtr curDst            = expr->gtOp.gtOp1;
+        GenTreePtr curSrc            = expr->gtOp.gtOp2;
+        unsigned   index             = 0;
+        var_types  baseType          = TYP_UNKNOWN;
+        unsigned   simdSize          = 0;
+        GenTreePtr srcSimdStructNode = getSIMDStructFromField(curSrc, &baseType, &index, &simdSize, true);
+        if (srcSimdStructNode == nullptr || baseType != TYP_FLOAT)
+        {
+            fgPreviousCandidateSIMDFieldAsgStmt = nullptr;
+        }
+        else if (index == 0 && isSIMDTypeLocal(srcSimdStructNode))
+        {
+            fgPreviousCandidateSIMDFieldAsgStmt = stmt;
+        }
+        else if (fgPreviousCandidateSIMDFieldAsgStmt != nullptr)
+        {
+            assert(index > 0);
+            GenTreePtr prevAsgExpr = fgPreviousCandidateSIMDFieldAsgStmt->gtStmt.gtStmtExpr;
+            GenTreePtr prevDst     = prevAsgExpr->gtOp.gtOp1;
+            GenTreePtr prevSrc     = prevAsgExpr->gtOp.gtOp2;
+            if (!areArgumentsContiguous(prevDst, curDst) || !areArgumentsContiguous(prevSrc, curSrc))
+            {
+                fgPreviousCandidateSIMDFieldAsgStmt = nullptr;
+            }
+            else
+            {
+                if (index == (simdSize / genTypeSize(baseType) - 1))
+                {
+                    // Successfully found the pattern, mark the lclvar as UsedInSIMDIntrinsic
+                    if (srcSimdStructNode->OperIsLocal())
+                    {
+                        setLclRelatedToSIMDIntrinsic(srcSimdStructNode);
+                    }
+
+                    if (curDst->OperGet() == GT_FIELD)
+                    {
+                        GenTreePtr objRef = curDst->gtField.gtFldObj;
+                        if (objRef != nullptr && objRef->gtOper == GT_ADDR)
+                        {
+                            GenTreePtr obj = objRef->gtOp.gtOp1;
+                            if (varTypeIsStruct(obj) && obj->OperIsLocal())
+                            {
+                                setLclRelatedToSIMDIntrinsic(obj);
+                            }
+                        }
+                    }
+                }
+                else
+                {
+                    fgPreviousCandidateSIMDFieldAsgStmt = stmt;
+                }
+            }
+        }
+    }
+    else
+    {
+        fgPreviousCandidateSIMDFieldAsgStmt = nullptr;
+    }
+}
+
+//------------------------------------------------------------------------
+// impSIMDIntrinsic: Check method to see if it is a SIMD method
+//
+// Arguments:
+//    opcode     - the opcode being handled (needed to identify the CEE_NEWOBJ case)
+//    newobjThis - For CEE_NEWOBJ, this is the temp grabbed for the allocated uninitalized object.
+//    clsHnd     - The handle of the class of the method.
+//    method     - The handle of the method.
+//    sig        - The call signature for the method.
+//    memberRef  - The memberRef token for the method reference.
+//
+// Return Value:
+//    If clsHnd is a known SIMD type, and 'method' is one of the methods that are
+//    implemented as an intrinsic in the JIT, then return the tree that implements
+//    it.
+//
+GenTreePtr Compiler::impSIMDIntrinsic(OPCODE                opcode,
+                                      GenTreePtr            newobjThis,
+                                      CORINFO_CLASS_HANDLE  clsHnd,
+                                      CORINFO_METHOD_HANDLE methodHnd,
+                                      CORINFO_SIG_INFO*     sig,
+                                      int                   memberRef)
+{
+    assert(featureSIMD);
+
+    if (!isSIMDClass(clsHnd))
+    {
+        return nullptr;
+    }
+
+    // Get base type and intrinsic Id
+    var_types                baseType = TYP_UNKNOWN;
+    unsigned                 size     = 0;
+    unsigned                 argCount = 0;
+    const SIMDIntrinsicInfo* intrinsicInfo =
+        getSIMDIntrinsicInfo(&clsHnd, methodHnd, sig, (opcode == CEE_NEWOBJ), &argCount, &baseType, &size);
+    if (intrinsicInfo == nullptr || intrinsicInfo->id == SIMDIntrinsicInvalid)
+    {
+        return nullptr;
+    }
+
+    SIMDIntrinsicID simdIntrinsicID = intrinsicInfo->id;
+    var_types       simdType;
+    if (baseType != TYP_UNKNOWN)
+    {
+        simdType = getSIMDTypeForSize(size);
+    }
+    else
+    {
+        assert(simdIntrinsicID == SIMDIntrinsicHWAccel);
+        simdType = TYP_UNKNOWN;
+    }
+    bool      instMethod = intrinsicInfo->isInstMethod;
+    var_types callType   = JITtype2varType(sig->retType);
+    if (callType == TYP_STRUCT)
+    {
+        // Note that here we are assuming that, if the call returns a struct, that it is the same size as the
+        // struct on which the method is declared. This is currently true for all methods on Vector types,
+        // but if this ever changes, we will need to determine the callType from the signature.
+        assert(info.compCompHnd->getClassSize(sig->retTypeClass) == genTypeSize(simdType));
+        callType = simdType;
+    }
+
+    GenTree* simdTree   = nullptr;
+    GenTree* op1        = nullptr;
+    GenTree* op2        = nullptr;
+    GenTree* op3        = nullptr;
+    GenTree* retVal     = nullptr;
+    GenTree* copyBlkDst = nullptr;
+    bool     doCopyBlk  = false;
+
+    switch (simdIntrinsicID)
+    {
+        case SIMDIntrinsicGetCount:
+        {
+            int            length       = getSIMDVectorLength(clsHnd);
+            GenTreeIntCon* intConstTree = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, length);
+            retVal                      = intConstTree;
+        }
+        break;
+
+        case SIMDIntrinsicGetZero:
+        {
+            baseType         = genActualType(baseType);
+            GenTree* initVal = gtNewZeroConNode(baseType);
+            initVal->gtType  = baseType;
+            simdTree         = gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, baseType, size);
+            retVal           = simdTree;
+        }
+        break;
+
+        case SIMDIntrinsicGetOne:
+        {
+            GenTree* initVal;
+            if (varTypeIsSmallInt(baseType))
+            {
+                unsigned baseSize = genTypeSize(baseType);
+                int      val;
+                if (baseSize == 1)
+                {
+                    val = 0x01010101;
+                }
+                else
+                {
+                    val = 0x00010001;
+                }
+                initVal = gtNewIconNode(val);
+            }
+            else
+            {
+                initVal = gtNewOneConNode(baseType);
+            }
+
+            baseType        = genActualType(baseType);
+            initVal->gtType = baseType;
+            simdTree        = gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, baseType, size);
+            retVal          = simdTree;
+        }
+        break;
+
+        case SIMDIntrinsicGetAllOnes:
+        {
+            // Equivalent to (Vector<T>) new Vector<int>(0xffffffff);
+            GenTree* initVal = gtNewIconNode(0xffffffff, TYP_INT);
+            simdTree         = gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, TYP_INT, size);
+            if (baseType != TYP_INT)
+            {
+                // cast it to required baseType if different from TYP_INT
+                simdTree = gtNewSIMDNode(simdType, simdTree, nullptr, SIMDIntrinsicCast, baseType, size);
+            }
+            retVal = simdTree;
+        }
+        break;
+
+        case SIMDIntrinsicInit:
+        case SIMDIntrinsicInitN:
+        {
+            // SIMDIntrinsicInit:
+            //    op2 - the initializer value
+            //    op1 - byref of vector
+            //
+            // SIMDIntrinsicInitN
+            //    op2 - list of initializer values stitched into a list
+            //    op1 - byref of vector
+            bool initFromFirstArgIndir = false;
+            if (simdIntrinsicID == SIMDIntrinsicInit)
+            {
+                op2 = impSIMDPopStack(baseType);
+            }
+            else
+            {
+                assert(simdIntrinsicID == SIMDIntrinsicInitN);
+                assert(baseType == TYP_FLOAT);
+
+                unsigned initCount    = argCount - 1;
+                unsigned elementCount = getSIMDVectorLength(size, baseType);
+                noway_assert(initCount == elementCount);
+                GenTree* nextArg = op2;
+
+                // Build a GT_LIST with the N values.
+                // We must maintain left-to-right order of the args, but we will pop
+                // them off in reverse order (the Nth arg was pushed onto the stack last).
+
+                GenTree*   list              = nullptr;
+                GenTreePtr firstArg          = nullptr;
+                GenTreePtr prevArg           = nullptr;
+                int        offset            = 0;
+                bool       areArgsContiguous = true;
+                for (unsigned i = 0; i < initCount; i++)
+                {
+                    GenTree* nextArg = impSIMDPopStack(baseType);
+                    if (areArgsContiguous)
+                    {
+                        GenTreePtr curArg = nextArg;
+                        firstArg          = curArg;
+
+                        if (prevArg != nullptr)
+                        {
+                            // Recall that we are popping the args off the stack in reverse order.
+                            areArgsContiguous = areArgumentsContiguous(curArg, prevArg);
+                        }
+                        prevArg = curArg;
+                    }
+
+                    list = new (this, GT_LIST) GenTreeOp(GT_LIST, baseType, nextArg, list);
+                }
+
+                if (areArgsContiguous && baseType == TYP_FLOAT)
+                {
+                    // Since Vector2, Vector3 and Vector4's arguments type are only float,
+                    // we intialize the vector from first argument address, only when
+                    // the baseType is TYP_FLOAT and the arguments are located contiguously in memory
+                    initFromFirstArgIndir = true;
+                    GenTreePtr op2Address = createAddressNodeForSIMDInit(firstArg, size);
+                    var_types  simdType   = getSIMDTypeForSize(size);
+                    op2                   = gtNewOperNode(GT_IND, simdType, op2Address);
+                }
+                else
+                {
+                    op2 = list;
+                }
+            }
+
+            op1 = getOp1ForConstructor(opcode, newobjThis, clsHnd);
+
+            assert(op1->TypeGet() == TYP_BYREF);
+            assert(genActualType(op2->TypeGet()) == genActualType(baseType) || initFromFirstArgIndir);
+
+#if AVX_WITHOUT_AVX2
+            // NOTE: This #define, AVX_WITHOUT_AVX2, is never defined.  This code is kept here
+            // in case we decide to implement AVX support (32 byte vectors) with AVX only.
+            // On AVX (as opposed to AVX2), broadcast is supported only for float and double,
+            // and requires taking a mem address of the value.
+            // If not a constant, take the addr of op2.
+            if (simdIntrinsicID == SIMDIntrinsicInit && canUseAVX())
+            {
+                if (!op2->OperIsConst())
+                {
+                    // It is better to assign op2 to a temp and take the addr of temp
+                    // rather than taking address of op2 since the latter would make op2
+                    // address-taken and ineligible for register allocation.
+                    //
+                    // op2 = GT_COMMA(tmp=op2, GT_ADDR(tmp))
+                    unsigned   tmpNum = lvaGrabTemp(true DEBUGARG("Val addr for vector Init"));
+                    GenTreePtr asg    = gtNewTempAssign(tmpNum, op2);
+                    GenTreePtr tmp    = gtNewLclvNode(tmpNum, op2->TypeGet());
+                    tmp               = gtNewOperNode(GT_ADDR, TYP_BYREF, tmp);
+                    op2               = gtNewOperNode(GT_COMMA, TYP_BYREF, asg, tmp);
+                }
+            }
+#endif
+            // For integral base types of size less than TYP_INT, expand the initializer
+            // to fill size of TYP_INT bytes.
+            if (varTypeIsSmallInt(baseType))
+            {
+                // This case should occur only for Init intrinsic.
+                assert(simdIntrinsicID == SIMDIntrinsicInit);
+
+                unsigned baseSize = genTypeSize(baseType);
+                int      multiplier;
+                if (baseSize == 1)
+                {
+                    multiplier = 0x01010101;
+                }
+                else
+                {
+                    assert(baseSize == 2);
+                    multiplier = 0x00010001;
+                }
+
+                GenTree* t1 = nullptr;
+                if (baseType == TYP_BYTE)
+                {
+                    // What we have is a signed byte initializer,
+                    // which when loaded to a reg will get sign extended to TYP_INT.
+                    // But what we need is the initializer without sign extended or
+                    // rather zero extended to 32-bits.
+                    t1 = gtNewOperNode(GT_AND, TYP_INT, op2, gtNewIconNode(0xff, TYP_INT));
+                }
+                else if (baseType == TYP_SHORT)
+                {
+                    // What we have is a signed short initializer,
+                    // which when loaded to a reg will get sign extended to TYP_INT.
+                    // But what we need is the initializer without sign extended or
+                    // rather zero extended to 32-bits.
+                    t1 = gtNewOperNode(GT_AND, TYP_INT, op2, gtNewIconNode(0xffff, TYP_INT));
+                }
+                else
+                {
+                    assert(baseType == TYP_UBYTE || baseType == TYP_CHAR);
+                    t1 = gtNewCastNode(TYP_INT, op2, TYP_INT);
+                }
+
+                assert(t1 != nullptr);
+                GenTree* t2 = gtNewIconNode(multiplier, TYP_INT);
+                op2         = gtNewOperNode(GT_MUL, TYP_INT, t1, t2);
+
+                // Construct a vector of TYP_INT with the new initializer and cast it back to vector of baseType
+                simdTree = gtNewSIMDNode(simdType, op2, nullptr, simdIntrinsicID, TYP_INT, size);
+                simdTree = gtNewSIMDNode(simdType, simdTree, nullptr, SIMDIntrinsicCast, baseType, size);
+            }
+            else
+            {
+
+                if (initFromFirstArgIndir)
+                {
+                    simdTree = op2;
+                    if (op1->gtOp.gtOp1->OperIsLocal())
+                    {
+                        // label the dst struct's lclvar is used for SIMD intrinsic,
+                        // so that this dst struct won't be promoted.
+                        setLclRelatedToSIMDIntrinsic(op1->gtOp.gtOp1);
+                    }
+                }
+                else
+                {
+                    simdTree = gtNewSIMDNode(simdType, op2, nullptr, simdIntrinsicID, baseType, size);
+                }
+            }
+
+            copyBlkDst = op1;
+            doCopyBlk  = true;
+        }
+        break;
+
+        case SIMDIntrinsicInitArray:
+        case SIMDIntrinsicInitArrayX:
+        case SIMDIntrinsicCopyToArray:
+        case SIMDIntrinsicCopyToArrayX:
+        {
+            // op3 - index into array in case of SIMDIntrinsicCopyToArrayX and SIMDIntrinsicInitArrayX
+            // op2 - array itself
+            // op1 - byref to vector struct
+
+            unsigned int vectorLength = getSIMDVectorLength(size, baseType);
+            // (This constructor takes only the zero-based arrays.)
+            // We will add one or two bounds checks:
+            // 1. If we have an index, we must do a check on that first.
+            //    We can't combine it with the index + vectorLength check because
+            //    a. It might be negative, and b. It may need to raise a different exception
+            //    (captured as SCK_ARG_RNG_EXCPN for CopyTo and SCK_RNGCHK_FAIL for Init).
+            // 2. We need to generate a check (SCK_ARG_EXCPN for CopyTo and SCK_RNGCHK_FAIL for Init)
+            //    for the last array element we will access.
+            //    We'll either check against (vectorLength - 1) or (index + vectorLength - 1).
+
+            GenTree* checkIndexExpr = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, vectorLength - 1);
+
+            // Get the index into the array.  If it has been provided, it will be on the
+            // top of the stack.  Otherwise, it is null.
+            if (argCount == 3)
+            {
+                op3 = impSIMDPopStack(TYP_INT);
+                if (op3->IsIntegralConst(0))
+                {
+                    op3 = nullptr;
+                }
+            }
+            else
+            {
+                // TODO-CQ: Here, or elsewhere, check for the pattern where op2 is a newly constructed array, and
+                // change this to the InitN form.
+                // op3 = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, 0);
+                op3 = nullptr;
+            }
+
+            // Clone the array for use in the bounds check.
+            op2 = impSIMDPopStack(TYP_REF);
+            assert(op2->TypeGet() == TYP_REF);
+            GenTree* arrayRefForArgChk = op2;
+            GenTree* argRngChk         = nullptr;
+            GenTree* asg               = nullptr;
+            if ((arrayRefForArgChk->gtFlags & GTF_SIDE_EFFECT) != 0)
+            {
+                op2 = fgInsertCommaFormTemp(&arrayRefForArgChk);
+            }
+            else
+            {
+                op2 = gtCloneExpr(arrayRefForArgChk);
+            }
+            assert(op2 != nullptr);
+
+            if (op3 != nullptr)
+            {
+                SpecialCodeKind op3CheckKind;
+                if (simdIntrinsicID == SIMDIntrinsicInitArrayX)
+                {
+                    op3CheckKind = SCK_RNGCHK_FAIL;
+                }
+                else
+                {
+                    assert(simdIntrinsicID == SIMDIntrinsicCopyToArrayX);
+                    op3CheckKind = SCK_ARG_RNG_EXCPN;
+                }
+                // We need to use the original expression on this, which is the first check.
+                GenTree* arrayRefForArgRngChk = arrayRefForArgChk;
+                // Then we clone the clone we just made for the next check.
+                arrayRefForArgChk = gtCloneExpr(op2);
+                // We know we MUST have had a cloneable expression.
+                assert(arrayRefForArgChk != nullptr);
+                GenTree* index = op3;
+                if ((index->gtFlags & GTF_SIDE_EFFECT) != 0)
+                {
+                    op3 = fgInsertCommaFormTemp(&index);
+                }
+                else
+                {
+                    op3 = gtCloneExpr(index);
+                }
+
+                GenTreeArrLen* arrLen = new (this, GT_ARR_LENGTH)
+                    GenTreeArrLen(TYP_INT, arrayRefForArgRngChk, (int)offsetof(CORINFO_Array, length));
+                argRngChk = new (this, GT_ARR_BOUNDS_CHECK)
+                    GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, arrLen, index, op3CheckKind);
+                // Now, clone op3 to create another node for the argChk
+                GenTree* index2 = gtCloneExpr(op3);
+                assert(index != nullptr);
+                checkIndexExpr = gtNewOperNode(GT_ADD, TYP_INT, index2, checkIndexExpr);
+            }
+
+            // Insert a bounds check for index + offset - 1.
+            // This must be a "normal" array.
+            SpecialCodeKind op2CheckKind;
+            if (simdIntrinsicID == SIMDIntrinsicInitArray || simdIntrinsicID == SIMDIntrinsicInitArrayX)
+            {
+                op2CheckKind = SCK_RNGCHK_FAIL;
+            }
+            else
+            {
+                op2CheckKind = SCK_ARG_EXCPN;
+            }
+            GenTreeArrLen* arrLen = new (this, GT_ARR_LENGTH)
+                GenTreeArrLen(TYP_INT, arrayRefForArgChk, (int)offsetof(CORINFO_Array, length));
+            GenTreeBoundsChk* argChk = new (this, GT_ARR_BOUNDS_CHECK)
+                GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, arrLen, checkIndexExpr, op2CheckKind);
+
+            // Create a GT_COMMA tree for the bounds check(s).
+            op2 = gtNewOperNode(GT_COMMA, op2->TypeGet(), argChk, op2);
+            if (argRngChk != nullptr)
+            {
+                op2 = gtNewOperNode(GT_COMMA, op2->TypeGet(), argRngChk, op2);
+            }
+
+            if (simdIntrinsicID == SIMDIntrinsicInitArray || simdIntrinsicID == SIMDIntrinsicInitArrayX)
+            {
+                op1        = getOp1ForConstructor(opcode, newobjThis, clsHnd);
+                simdTree   = gtNewSIMDNode(simdType, op2, op3, SIMDIntrinsicInitArray, baseType, size);
+                copyBlkDst = op1;
+                doCopyBlk  = true;
+            }
+            else
+            {
+                assert(simdIntrinsicID == SIMDIntrinsicCopyToArray || simdIntrinsicID == SIMDIntrinsicCopyToArrayX);
+                op1 = impSIMDPopStack(simdType, instMethod);
+                assert(op1->TypeGet() == simdType);
+
+                // copy vector (op1) to array (op2) starting at index (op3)
+                simdTree = op1;
+
+                // TODO-Cleanup: Though it happens to just work fine front-end phases are not aware of GT_LEA node.
+                // Therefore, convert these to use GT_ADDR .
+                copyBlkDst = new (this, GT_LEA)
+                    GenTreeAddrMode(TYP_BYREF, op2, op3, genTypeSize(baseType), offsetof(CORINFO_Array, u1Elems));
+                doCopyBlk = true;
+            }
+        }
+        break;
+
+        case SIMDIntrinsicInitFixed:
+        {
+            // We are initializing a fixed-length vector VLarge with a smaller fixed-length vector VSmall, plus 1 or 2
+            // additional floats.
+            //    op4 (optional) - float value for VLarge.W, if VLarge is Vector4, and VSmall is Vector2
+            //    op3 - float value for VLarge.Z or VLarge.W
+            //    op2 - VSmall
+            //    op1 - byref of VLarge
+            assert(baseType == TYP_FLOAT);
+            unsigned elementByteCount = 4;
+
+            GenTree* op4 = nullptr;
+            if (argCount == 4)
+            {
+                op4 = impSIMDPopStack(TYP_FLOAT);
+                assert(op4->TypeGet() == TYP_FLOAT);
+            }
+            op3 = impSIMDPopStack(TYP_FLOAT);
+            assert(op3->TypeGet() == TYP_FLOAT);
+            // The input vector will either be TYP_SIMD8 or TYP_SIMD12.
+            var_types smallSIMDType = TYP_SIMD8;
+            if ((op4 == nullptr) && (simdType == TYP_SIMD16))
+            {
+                smallSIMDType = TYP_SIMD12;
+            }
+            op2 = impSIMDPopStack(smallSIMDType);
+            op1 = getOp1ForConstructor(opcode, newobjThis, clsHnd);
+
+            // We are going to redefine the operands so that:
+            // - op3 is the value that's going into the Z position, or null if it's a Vector4 constructor with a single
+            // operand, and
+            // - op4 is the W position value, or null if this is a Vector3 constructor.
+            if (size == 16 && argCount == 3)
+            {
+                op4 = op3;
+                op3 = nullptr;
+            }
+
+            simdTree = op2;
+            if (op3 != nullptr)
+            {
+                simdTree = gtNewSIMDNode(simdType, simdTree, op3, SIMDIntrinsicSetZ, baseType, size);
+            }
+            if (op4 != nullptr)
+            {
+                simdTree = gtNewSIMDNode(simdType, simdTree, op4, SIMDIntrinsicSetW, baseType, size);
+            }
+
+            copyBlkDst = op1;
+            doCopyBlk  = true;
+        }
+        break;
+
+        case SIMDIntrinsicOpEquality:
+        case SIMDIntrinsicInstEquals:
+        {
+            op2 = impSIMDPopStack(simdType);
+            op1 = impSIMDPopStack(simdType, instMethod);
+
+            assert(op1->TypeGet() == simdType);
+            assert(op2->TypeGet() == simdType);
+
+            simdTree = gtNewSIMDNode(genActualType(callType), op1, op2, SIMDIntrinsicOpEquality, baseType, size);
+            retVal   = simdTree;
+        }
+        break;
+
+        case SIMDIntrinsicOpInEquality:
+        {
+            // op1 is the first operand
+            // op2 is the second operand
+            op2      = impSIMDPopStack(simdType);
+            op1      = impSIMDPopStack(simdType, instMethod);
+            simdTree = gtNewSIMDNode(genActualType(callType), op1, op2, SIMDIntrinsicOpInEquality, baseType, size);
+            retVal   = simdTree;
+        }
+        break;
+
+        case SIMDIntrinsicEqual:
+        case SIMDIntrinsicLessThan:
+        case SIMDIntrinsicLessThanOrEqual:
+        case SIMDIntrinsicGreaterThan:
+        case SIMDIntrinsicGreaterThanOrEqual:
+        {
+            op2 = impSIMDPopStack(simdType);
+            op1 = impSIMDPopStack(simdType, instMethod);
+
+            SIMDIntrinsicID intrinsicID = impSIMDRelOp(simdIntrinsicID, clsHnd, size, &baseType, &op1, &op2);
+            simdTree                    = gtNewSIMDNode(genActualType(callType), op1, op2, intrinsicID, baseType, size);
+            retVal                      = simdTree;
+        }
+        break;
+
+        case SIMDIntrinsicAdd:
+        case SIMDIntrinsicSub:
+        case SIMDIntrinsicMul:
+        case SIMDIntrinsicDiv:
+        case SIMDIntrinsicBitwiseAnd:
+        case SIMDIntrinsicBitwiseAndNot:
+        case SIMDIntrinsicBitwiseOr:
+        case SIMDIntrinsicBitwiseXor:
+        {
+#if defined(_TARGET_AMD64_) && defined(DEBUG)
+            // check for the cases where we don't support intrinsics.
+            // This check should be done before we make modifications to type stack.
+            // Note that this is more of a double safety check for robustness since
+            // we expect getSIMDIntrinsicInfo() to have filtered out intrinsics on
+            // unsupported base types. If getSIMdIntrinsicInfo() doesn't filter due
+            // to some bug, assert in chk/dbg will fire.
+            if (!varTypeIsFloating(baseType))
+            {
+                if (simdIntrinsicID == SIMDIntrinsicMul)
+                {
+                    if ((baseType != TYP_INT) && (baseType != TYP_SHORT))
+                    {
+                        // TODO-CQ: implement mul on these integer vectors.
+                        // Note that SSE2 has no direct support for these vectors.
+                        assert(!"Mul not supported on long/ulong/uint/small int vectors\n");
+                        return nullptr;
+                    }
+                }
+
+                // common to all integer type vectors
+                if (simdIntrinsicID == SIMDIntrinsicDiv)
+                {
+                    // SSE2 doesn't support div on non-floating point vectors.
+                    assert(!"Div not supported on integer type vectors\n");
+                    return nullptr;
+                }
+            }
+#endif //_TARGET_AMD64_ && DEBUG
+
+            // op1 is the first operand; if instance method, op1 is "this" arg
+            // op2 is the second operand
+            op2 = impSIMDPopStack(simdType);
+            op1 = impSIMDPopStack(simdType, instMethod);
+
+            simdTree = gtNewSIMDNode(simdType, op1, op2, simdIntrinsicID, baseType, size);
+            retVal   = simdTree;
+        }
+        break;
+
+        case SIMDIntrinsicSelect:
+        {
+            // op3 is a SIMD variable that is the second source
+            // op2 is a SIMD variable that is the first source
+            // op1 is a SIMD variable which is the bit mask.
+            op3 = impSIMDPopStack(simdType);
+            op2 = impSIMDPopStack(simdType);
+            op1 = impSIMDPopStack(simdType);
+
+            retVal = impSIMDSelect(clsHnd, baseType, size, op1, op2, op3);
+        }
+        break;
+
+        case SIMDIntrinsicMin:
+        case SIMDIntrinsicMax:
+        {
+            // op1 is the first operand; if instance method, op1 is "this" arg
+            // op2 is the second operand
+            op2 = impSIMDPopStack(simdType);
+            op1 = impSIMDPopStack(simdType, instMethod);
+
+            retVal = impSIMDMinMax(simdIntrinsicID, clsHnd, baseType, size, op1, op2);
+        }
+        break;
+
+        case SIMDIntrinsicGetItem:
+        {
+            // op1 is a SIMD variable that is "this" arg
+            // op2 is an index of TYP_INT
+            op2                       = impSIMDPopStack(TYP_INT);
+            op1                       = impSIMDPopStack(simdType, instMethod);
+            unsigned int vectorLength = getSIMDVectorLength(size, baseType);
+            if (!op2->IsCnsIntOrI() || op2->AsIntCon()->gtIconVal >= vectorLength)
+            {
+                // We need to bounds-check the length of the vector.
+                // For that purpose, we need to clone the index expression.
+                GenTree* index = op2;
+                if ((index->gtFlags & GTF_SIDE_EFFECT) != 0)
+                {
+                    op2 = fgInsertCommaFormTemp(&index);
+                }
+                else
+                {
+                    op2 = gtCloneExpr(index);
+                }
+
+                GenTree*          lengthNode = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, vectorLength);
+                GenTreeBoundsChk* simdChk =
+                    new (this, GT_SIMD_CHK) GenTreeBoundsChk(GT_SIMD_CHK, TYP_VOID, lengthNode, index, SCK_RNGCHK_FAIL);
+
+                // Create a GT_COMMA tree for the bounds check.
+                op2 = gtNewOperNode(GT_COMMA, op2->TypeGet(), simdChk, op2);
+            }
+
+            assert(op1->TypeGet() == simdType);
+            assert(op2->TypeGet() == TYP_INT);
+
+            simdTree = gtNewSIMDNode(genActualType(callType), op1, op2, simdIntrinsicID, baseType, size);
+            retVal   = simdTree;
+        }
+        break;
+
+        case SIMDIntrinsicDotProduct:
+        {
+#if defined(_TARGET_AMD64_) && defined(DEBUG)
+            // Right now dot product is supported only for float vectors.
+            // See SIMDIntrinsicList.h for supported base types for this intrinsic.
+            if (!varTypeIsFloating(baseType))
+            {
+                assert(!"Dot product on integer type vectors not supported");
+                return nullptr;
+            }
+#endif //_TARGET_AMD64_ && DEBUG
+
+            // op1 is a SIMD variable that is the first source and also "this" arg.
+            // op2 is a SIMD variable which is the second source.
+            op2 = impSIMDPopStack(simdType);
+            op1 = impSIMDPopStack(simdType, instMethod);
+
+            simdTree = gtNewSIMDNode(baseType, op1, op2, simdIntrinsicID, baseType, size);
+            retVal   = simdTree;
+        }
+        break;
+
+        case SIMDIntrinsicSqrt:
+        {
+#if defined(_TARGET_AMD64_) && defined(DEBUG)
+            // SSE/AVX doesn't support sqrt on integer type vectors and hence
+            // should never be seen as an intrinsic here. See SIMDIntrinsicList.h
+            // for supported base types for this intrinsic.
+            if (!varTypeIsFloating(baseType))
+            {
+                assert(!"Sqrt not supported on integer vectors\n");
+                return nullptr;
+            }
+#endif // _TARGET_AMD64_ && DEBUG
+
+            op1 = impSIMDPopStack(simdType);
+
+            retVal = gtNewSIMDNode(genActualType(callType), op1, nullptr, simdIntrinsicID, baseType, size);
+        }
+        break;
+
+        case SIMDIntrinsicAbs:
+        {
+            op1 = impSIMDPopStack(simdType);
+
+#ifdef _TARGET_AMD64_
+            if (varTypeIsFloating(baseType))
+            {
+                // Abs(vf) = vf & new SIMDVector<float>(0x7fffffff);
+                // Abs(vd) = vf & new SIMDVector<double>(0x7fffffffffffffff);
+                GenTree* bitMask = nullptr;
+                if (baseType == TYP_FLOAT)
+                {
+                    float f;
+                    static_assert_no_msg(sizeof(float) == sizeof(int));
+                    *((int*)&f) = 0x7fffffff;
+                    bitMask     = gtNewDconNode(f);
+                }
+                else if (baseType == TYP_DOUBLE)
+                {
+                    double d;
+                    static_assert_no_msg(sizeof(double) == sizeof(__int64));
+                    *((__int64*)&d) = 0x7fffffffffffffffLL;
+                    bitMask         = gtNewDconNode(d);
+                }
+
+                assert(bitMask != nullptr);
+                bitMask->gtType        = baseType;
+                GenTree* bitMaskVector = gtNewSIMDNode(simdType, bitMask, SIMDIntrinsicInit, baseType, size);
+                retVal = gtNewSIMDNode(simdType, op1, bitMaskVector, SIMDIntrinsicBitwiseAnd, baseType, size);
+            }
+            else if (baseType == TYP_CHAR || baseType == TYP_UBYTE || baseType == TYP_UINT || baseType == TYP_ULONG)
+            {
+                // Abs is a no-op on unsigned integer type vectors
+                retVal = op1;
+            }
+            else
+            {
+                // SSE/AVX doesn't support abs on signed integer vectors and hence
+                // should never be seen as an intrinsic here. See SIMDIntrinsicList.h
+                // for supported base types for this intrinsic.
+                unreached();
+            }
+
+#else //!_TARGET_AMD64_
+            assert(!"Abs intrinsic on non-Amd64 target not implemented");
+            unreached();
+#endif //!_TARGET_AMD64_
+        }
+        break;
+
+        case SIMDIntrinsicGetW:
+            retVal = impSIMDGetFixed(simdType, baseType, size, 3);
+            break;
+
+        case SIMDIntrinsicGetZ:
+            retVal = impSIMDGetFixed(simdType, baseType, size, 2);
+            break;
+
+        case SIMDIntrinsicGetY:
+            retVal = impSIMDGetFixed(simdType, baseType, size, 1);
+            break;
+
+        case SIMDIntrinsicGetX:
+            retVal = impSIMDGetFixed(simdType, baseType, size, 0);
+            break;
+
+        case SIMDIntrinsicSetW:
+        case SIMDIntrinsicSetZ:
+        case SIMDIntrinsicSetY:
+        case SIMDIntrinsicSetX:
+        {
+            // op2 is the value to be set at indexTemp position
+            // op1 is SIMD vector that is going to be modified, which is a byref
+
+            // If op1 has a side-effect, then don't make it an intrinsic.
+            // It would be in-efficient to read the entire vector into xmm reg,
+            // modify it and write back entire xmm reg.
+            //
+            // TODO-CQ: revisit this later.
+            op1 = impStackTop(1).val;
+            if ((op1->gtFlags & GTF_SIDE_EFFECT) != 0)
+            {
+                return nullptr;
+            }
+
+            op2 = impSIMDPopStack(baseType);
+            op1 = impSIMDPopStack(simdType, instMethod);
+
+            GenTree* src = gtCloneExpr(op1);
+            assert(src != nullptr);
+            simdTree = gtNewSIMDNode(simdType, src, op2, simdIntrinsicID, baseType, size);
+
+            copyBlkDst = gtNewOperNode(GT_ADDR, TYP_BYREF, op1);
+            doCopyBlk  = true;
+        }
+        break;
+
+        // Unary operators that take and return a Vector.
+        case SIMDIntrinsicCast:
+        {
+            op1 = impSIMDPopStack(simdType, instMethod);
+
+            simdTree = gtNewSIMDNode(simdType, op1, nullptr, simdIntrinsicID, baseType, size);
+            retVal   = simdTree;
+        }
+        break;
+
+        case SIMDIntrinsicHWAccel:
+        {
+            GenTreeIntCon* intConstTree = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, 1);
+            retVal                      = intConstTree;
+        }
+        break;
+
+        default:
+            assert(!"Unimplemented SIMD Intrinsic");
+            return nullptr;
+    }
+
+#ifdef _TARGET_AMD64_
+    // Amd64: also indicate that we use floating point registers.
+    // The need for setting this here is that a method may not have SIMD
+    // type lclvars, but might be exercising SIMD intrinsics on fields of
+    // SIMD type.
+    //
+    // e.g.  public Vector<float> ComplexVecFloat::sqabs() { return this.r * this.r + this.i * this.i; }
+    compFloatingPointUsed = true;
+#endif
+
+    // At this point, we have a tree that we are going to store into a destination.
+    // TODO-1stClassStructs: This should be a simple store or assignment, and should not require
+    // GTF_ALL_EFFECT for the dest. This is currently emulating the previous behavior of
+    // block ops.
+    if (doCopyBlk)
+    {
+        GenTree* dest = new (this, GT_BLK) GenTreeBlk(GT_BLK, simdType, copyBlkDst, getSIMDTypeSizeInBytes(clsHnd));
+        dest->gtFlags |= GTF_GLOB_REF;
+        retVal = gtNewBlkOpNode(dest, simdTree, getSIMDTypeSizeInBytes(clsHnd),
+                                false, // not volatile
+                                true); // copyBlock
+        retVal->gtFlags |= ((simdTree->gtFlags | copyBlkDst->gtFlags) & GTF_ALL_EFFECT);
+    }
+
+    return retVal;
+}
+
+#endif // FEATURE_SIMD
+
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/simd.h b/src/jit/simd.h
new file mode 100644
index 0000000000..c68899e412
--- /dev/null
+++ b/src/jit/simd.h
@@ -0,0 +1,43 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef _SIMD_H_
+#define _SIMD_H_
+
+#ifdef FEATURE_SIMD
+
+#ifdef DEBUG
+extern const char* const simdIntrinsicNames[];
+#endif
+
+enum SIMDIntrinsicID
+{
+#define SIMD_INTRINSIC(m, i, id, n, r, ac, arg1, arg2, arg3, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) SIMDIntrinsic##id,
+#include "simdintrinsiclist.h"
+};
+
+// Static info about a SIMD intrinsic
+struct SIMDIntrinsicInfo
+{
+    SIMDIntrinsicID id;
+    const char*     methodName;
+    bool            isInstMethod;
+    var_types       retType;
+    unsigned char   argCount;
+    var_types       argType[SIMD_INTRINSIC_MAX_MODELED_PARAM_COUNT];
+    var_types       supportedBaseTypes[SIMD_INTRINSIC_MAX_BASETYPE_COUNT];
+};
+
+#ifdef _TARGET_AMD64_
+// SSE2 Shuffle control byte to shuffle vector <W, Z, Y, X>
+// These correspond to shuffle immediate byte in shufps SSE2 instruction.
+#define SHUFFLE_XXXX 0x00
+#define SHUFFLE_ZWYX 0xB1
+#define SHUFFLE_WWYY 0xF5
+#define SHUFFLE_ZZXX 0xA0
+#endif
+
+#endif // FEATURE_SIMD
+
+#endif //_SIMD_H_
diff --git a/src/jit/simdcodegenxarch.cpp b/src/jit/simdcodegenxarch.cpp
new file mode 100644
index 0000000000..702f967aad
--- /dev/null
+++ b/src/jit/simdcodegenxarch.cpp
@@ -0,0 +1,2143 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                        Amd64 SIMD Code Generator                          XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator.
+
+#ifdef _TARGET_AMD64_
+#include "emit.h"
+#include "codegen.h"
+#include "sideeffects.h"
+#include "lower.h"
+#include "gcinfo.h"
+#include "gcinfoencoder.h"
+
+#ifdef FEATURE_SIMD
+
+// Instruction immediates
+
+// Insertps:
+// - bits 6 and 7 of the immediate indicate which source item to select (0..3)
+// - bits 4 and 5 of the immediate indicate which target item to insert into (0..3)
+// - bits 0 to 3 of the immediate indicate which target item to zero
+#define INSERTPS_SOURCE_SELECT(i) (i << 6)
+#define INSERTPS_TARGET_SELECT(i) (i << 4)
+#define INSERTPS_ZERO(i) (1 << i)
+
+// getOpForSIMDIntrinsic: return the opcode for the given SIMD Intrinsic
+//
+// Arguments:
+//   intrinsicId    -   SIMD intrinsic Id
+//   baseType       -   Base type of the SIMD vector
+//   immed          -   Out param. Any immediate byte operand that needs to be passed to SSE2 opcode
+//
+//
+// Return Value:
+//   Instruction (op) to be used, and immed is set if instruction requires an immediate operand.
+//
+instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_types baseType, unsigned* ival /*=nullptr*/)
+{
+    // Minimal required instruction set is SSE2.
+    assert(compiler->canUseSSE2());
+
+    instruction result = INS_invalid;
+    switch (intrinsicId)
+    {
+        case SIMDIntrinsicInit:
+            if (compiler->canUseAVX())
+            {
+                // AVX supports broadcast instructions to populate YMM reg with a single float/double value from memory.
+                // AVX2 supports broadcast instructions to populate YMM reg with a single value from memory or mm reg.
+                // If we decide to use AVX2 only, we can remove this assert.
+                if ((compiler->opts.eeFlags & CORJIT_FLG_USE_AVX2) == 0)
+                {
+                    assert(baseType == TYP_FLOAT || baseType == TYP_DOUBLE);
+                }
+                switch (baseType)
+                {
+                    case TYP_FLOAT:
+                        result = INS_vbroadcastss;
+                        break;
+                    case TYP_DOUBLE:
+                        result = INS_vbroadcastsd;
+                        break;
+                    case TYP_ULONG:
+                        __fallthrough;
+                    case TYP_LONG:
+                        result = INS_vpbroadcastq;
+                        break;
+                    case TYP_UINT:
+                        __fallthrough;
+                    case TYP_INT:
+                        result = INS_vpbroadcastd;
+                        break;
+                    case TYP_CHAR:
+                        __fallthrough;
+                    case TYP_SHORT:
+                        result = INS_vpbroadcastw;
+                        break;
+                    case TYP_UBYTE:
+                        __fallthrough;
+                    case TYP_BYTE:
+                        result = INS_vpbroadcastb;
+                        break;
+                    default:
+                        unreached();
+                }
+                break;
+            }
+            // For SSE, SIMDIntrinsicInit uses the same instruction as the SIMDIntrinsicShuffleSSE2 intrinsic.
+            __fallthrough;
+        case SIMDIntrinsicShuffleSSE2:
+            if (baseType == TYP_FLOAT)
+            {
+                result = INS_shufps;
+            }
+            else if (baseType == TYP_DOUBLE)
+            {
+                result = INS_shufpd;
+            }
+            else if (baseType == TYP_INT || baseType == TYP_UINT)
+            {
+                result = INS_pshufd;
+            }
+            else if (baseType == TYP_LONG || baseType == TYP_ULONG)
+            {
+                // We don't have a seperate SSE2 instruction and will
+                // use the instruction meant for doubles since it is
+                // of the same size as a long.
+                result = INS_shufpd;
+            }
+            break;
+
+        case SIMDIntrinsicSqrt:
+            if (baseType == TYP_FLOAT)
+            {
+                result = INS_sqrtps;
+            }
+            else if (baseType == TYP_DOUBLE)
+            {
+                result = INS_sqrtpd;
+            }
+            else
+            {
+                unreached();
+            }
+            break;
+
+        case SIMDIntrinsicAdd:
+            if (baseType == TYP_FLOAT)
+            {
+                result = INS_addps;
+            }
+            else if (baseType == TYP_DOUBLE)
+            {
+                result = INS_addpd;
+            }
+            else if (baseType == TYP_INT || baseType == TYP_UINT)
+            {
+                result = INS_paddd;
+            }
+            else if (baseType == TYP_CHAR || baseType == TYP_SHORT)
+            {
+                result = INS_paddw;
+            }
+            else if (baseType == TYP_UBYTE || baseType == TYP_BYTE)
+            {
+                result = INS_paddb;
+            }
+            else if (baseType == TYP_LONG || baseType == TYP_ULONG)
+            {
+                result = INS_paddq;
+            }
+            break;
+
+        case SIMDIntrinsicSub:
+            if (baseType == TYP_FLOAT)
+            {
+                result = INS_subps;
+            }
+            else if (baseType == TYP_DOUBLE)
+            {
+                result = INS_subpd;
+            }
+            else if (baseType == TYP_INT || baseType == TYP_UINT)
+            {
+                result = INS_psubd;
+            }
+            else if (baseType == TYP_CHAR || baseType == TYP_SHORT)
+            {
+                result = INS_psubw;
+            }
+            else if (baseType == TYP_UBYTE || baseType == TYP_BYTE)
+            {
+                result = INS_psubb;
+            }
+            else if (baseType == TYP_LONG || baseType == TYP_ULONG)
+            {
+                result = INS_psubq;
+            }
+            break;
+
+        case SIMDIntrinsicMul:
+            if (baseType == TYP_FLOAT)
+            {
+                result = INS_mulps;
+            }
+            else if (baseType == TYP_DOUBLE)
+            {
+                result = INS_mulpd;
+            }
+            else if (baseType == TYP_SHORT)
+            {
+                result = INS_pmullw;
+            }
+            else if (compiler->canUseAVX())
+            {
+                if (baseType == TYP_INT)
+                {
+                    result = INS_pmulld;
+                }
+            }
+            break;
+
+        case SIMDIntrinsicDiv:
+            if (baseType == TYP_FLOAT)
+            {
+                result = INS_divps;
+            }
+            else if (baseType == TYP_DOUBLE)
+            {
+                result = INS_divpd;
+            }
+            else
+            {
+                unreached();
+            }
+            break;
+
+        case SIMDIntrinsicMin:
+            if (baseType == TYP_FLOAT)
+            {
+                result = INS_minps;
+            }
+            else if (baseType == TYP_DOUBLE)
+            {
+                result = INS_minpd;
+            }
+            else if (baseType == TYP_UBYTE)
+            {
+                result = INS_pminub;
+            }
+            else if (baseType == TYP_SHORT)
+            {
+                result = INS_pminsw;
+            }
+            else
+            {
+                unreached();
+            }
+            break;
+
+        case SIMDIntrinsicMax:
+            if (baseType == TYP_FLOAT)
+            {
+                result = INS_maxps;
+            }
+            else if (baseType == TYP_DOUBLE)
+            {
+                result = INS_maxpd;
+            }
+            else if (baseType == TYP_UBYTE)
+            {
+                result = INS_pmaxub;
+            }
+            else if (baseType == TYP_SHORT)
+            {
+                result = INS_pmaxsw;
+            }
+            else
+            {
+                unreached();
+            }
+            break;
+
+        case SIMDIntrinsicEqual:
+            if (baseType == TYP_FLOAT)
+            {
+                result = INS_cmpps;
+                assert(ival != nullptr);
+                *ival = 0;
+            }
+            else if (baseType == TYP_DOUBLE)
+            {
+                result = INS_cmppd;
+                assert(ival != nullptr);
+                *ival = 0;
+            }
+            else if (baseType == TYP_INT || baseType == TYP_UINT)
+            {
+                result = INS_pcmpeqd;
+            }
+            else if (baseType == TYP_CHAR || baseType == TYP_SHORT)
+            {
+                result = INS_pcmpeqw;
+            }
+            else if (baseType == TYP_UBYTE || baseType == TYP_BYTE)
+            {
+                result = INS_pcmpeqb;
+            }
+            else if (compiler->canUseAVX() && (baseType == TYP_ULONG || baseType == TYP_LONG))
+            {
+                result = INS_pcmpeqq;
+            }
+            break;
+
+        case SIMDIntrinsicLessThan:
+            // Packed integers use > with swapped operands
+            assert(baseType != TYP_INT);
+
+            if (baseType == TYP_FLOAT)
+            {
+                result = INS_cmpps;
+                assert(ival != nullptr);
+                *ival = 1;
+            }
+            else if (baseType == TYP_DOUBLE)
+            {
+                result = INS_cmppd;
+                assert(ival != nullptr);
+                *ival = 1;
+            }
+            break;
+
+        case SIMDIntrinsicLessThanOrEqual:
+            // Packed integers use (a==b) || ( b > a) in place of a <= b.
+            assert(baseType != TYP_INT);
+
+            if (baseType == TYP_FLOAT)
+            {
+                result = INS_cmpps;
+                assert(ival != nullptr);
+                *ival = 2;
+            }
+            else if (baseType == TYP_DOUBLE)
+            {
+                result = INS_cmppd;
+                assert(ival != nullptr);
+                *ival = 2;
+            }
+            break;
+
+        case SIMDIntrinsicGreaterThan:
+            // Packed float/double use < with swapped operands
+            assert(!varTypeIsFloating(baseType));
+
+            // SSE2 supports only signed >
+            if (baseType == TYP_INT)
+            {
+                result = INS_pcmpgtd;
+            }
+            else if (baseType == TYP_SHORT)
+            {
+                result = INS_pcmpgtw;
+            }
+            else if (baseType == TYP_BYTE)
+            {
+                result = INS_pcmpgtb;
+            }
+            else if (compiler->canUseAVX() && (baseType == TYP_LONG))
+            {
+                result = INS_pcmpgtq;
+            }
+            break;
+
+        case SIMDIntrinsicBitwiseAnd:
+            if (baseType == TYP_FLOAT)
+            {
+                result = INS_andps;
+            }
+            else if (baseType == TYP_DOUBLE)
+            {
+                result = INS_andpd;
+            }
+            else if (varTypeIsIntegral(baseType))
+            {
+                result = INS_pand;
+            }
+            break;
+
+        case SIMDIntrinsicBitwiseAndNot:
+            if (baseType == TYP_FLOAT)
+            {
+                result = INS_andnps;
+            }
+            else if (baseType == TYP_DOUBLE)
+            {
+                result = INS_andnpd;
+            }
+            else if (baseType == TYP_INT)
+            {
+                result = INS_pandn;
+            }
+            else if (varTypeIsIntegral(baseType))
+            {
+                result = INS_pandn;
+            }
+            break;
+
+        case SIMDIntrinsicBitwiseOr:
+            if (baseType == TYP_FLOAT)
+            {
+                result = INS_orps;
+            }
+            else if (baseType == TYP_DOUBLE)
+            {
+                result = INS_orpd;
+            }
+            else if (varTypeIsIntegral(baseType))
+            {
+                result = INS_por;
+            }
+            break;
+
+        case SIMDIntrinsicBitwiseXor:
+            if (baseType == TYP_FLOAT)
+            {
+                result = INS_xorps;
+            }
+            else if (baseType == TYP_DOUBLE)
+            {
+                result = INS_xorpd;
+            }
+            else if (varTypeIsIntegral(baseType))
+            {
+                result = INS_pxor;
+            }
+            break;
+
+        case SIMDIntrinsicCast:
+            result = INS_movaps;
+            break;
+
+        case SIMDIntrinsicShiftLeftInternal:
+            // base type doesn't matter since the entire vector is shifted left
+            result = INS_pslldq;
+            break;
+
+        case SIMDIntrinsicShiftRightInternal:
+            // base type doesn't matter since the entire vector is shifted right
+            result = INS_psrldq;
+            break;
+
+        case SIMDIntrinsicUpperSave:
+            result = INS_vextractf128;
+            break;
+
+        case SIMDIntrinsicUpperRestore:
+            result = INS_insertps;
+            break;
+
+        default:
+            assert(!"Unsupported SIMD intrinsic");
+            unreached();
+    }
+
+    noway_assert(result != INS_invalid);
+    return result;
+}
+
+// genSIMDScalarMove: Generate code to move a value of type "type" from src mm reg
+// to target mm reg, zeroing out the upper bits if and only if specified.
+//
+// Arguments:
+//    type             the type of value to be moved
+//    targetReg        the target reg
+//    srcReg           the src reg
+//    moveType         action to be performed on target upper bits
+//
+// Return Value:
+//    None
+//
+// Notes:
+//    This is currently only supported for floating point types.
+//
+void CodeGen::genSIMDScalarMove(var_types type, regNumber targetReg, regNumber srcReg, SIMDScalarMoveType moveType)
+{
+    var_types targetType = compiler->getSIMDVectorType();
+    assert(varTypeIsFloating(type));
+#ifdef FEATURE_AVX_SUPPORT
+    if (compiler->getSIMDInstructionSet() == InstructionSet_AVX)
+    {
+        switch (moveType)
+        {
+            case SMT_PreserveUpper:
+                if (srcReg != targetReg)
+                {
+                    instruction ins = ins_Store(type);
+                    if (getEmitter()->IsThreeOperandMoveAVXInstruction(ins))
+                    {
+                        // In general, when we use a three-operands move instruction, we want to merge the src with
+                        // itself. This is an exception in that we actually want the "merge" behavior, so we must
+                        // specify it with all 3 operands.
+                        inst_RV_RV_RV(ins, targetReg, targetReg, srcReg, emitTypeSize(targetType));
+                    }
+                    else
+                    {
+                        inst_RV_RV(ins, targetReg, srcReg, targetType, emitTypeSize(targetType));
+                    }
+                }
+                break;
+
+            case SMT_ZeroInitUpper:
+            {
+                // insertps is a 128-bit only instruction, and clears the upper 128 bits, which is what we want.
+                // The insertpsImm selects which fields are copied and zero'd of the lower 128 bits, so we choose
+                // to zero all but the lower bits.
+                unsigned int insertpsImm =
+                    (INSERTPS_TARGET_SELECT(0) | INSERTPS_ZERO(1) | INSERTPS_ZERO(2) | INSERTPS_ZERO(3));
+                inst_RV_RV_IV(INS_insertps, EA_16BYTE, targetReg, srcReg, insertpsImm);
+                break;
+            }
+
+            case SMT_ZeroInitUpper_SrcHasUpperZeros:
+                if (srcReg != targetReg)
+                {
+                    instruction ins = ins_Copy(type);
+                    assert(!getEmitter()->IsThreeOperandMoveAVXInstruction(ins));
+                    inst_RV_RV(ins, targetReg, srcReg, targetType, emitTypeSize(targetType));
+                }
+                break;
+
+            default:
+                unreached();
+        }
+    }
+    else
+#endif // FEATURE_AVX_SUPPORT
+    {
+        // SSE
+
+        switch (moveType)
+        {
+            case SMT_PreserveUpper:
+                if (srcReg != targetReg)
+                {
+                    inst_RV_RV(ins_Store(type), targetReg, srcReg, targetType, emitTypeSize(targetType));
+                }
+                break;
+
+            case SMT_ZeroInitUpper:
+                if (srcReg == targetReg)
+                {
+                    // There is no guarantee that upper bits of op1Reg are zero.
+                    // We achieve this by using left logical shift 12-bytes and right logical shift 12 bytes.
+                    instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, type);
+                    getEmitter()->emitIns_R_I(ins, EA_16BYTE, srcReg, 12);
+                    ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, type);
+                    getEmitter()->emitIns_R_I(ins, EA_16BYTE, srcReg, 12);
+                }
+                else
+                {
+                    genSIMDZero(targetType, TYP_FLOAT, targetReg);
+                    inst_RV_RV(ins_Store(type), targetReg, srcReg);
+                }
+                break;
+
+            case SMT_ZeroInitUpper_SrcHasUpperZeros:
+                if (srcReg != targetReg)
+                {
+                    inst_RV_RV(ins_Copy(type), targetReg, srcReg, targetType, emitTypeSize(targetType));
+                }
+                break;
+
+            default:
+                unreached();
+        }
+    }
+}
+
+void CodeGen::genSIMDZero(var_types targetType, var_types baseType, regNumber targetReg)
+{
+    // pxor reg, reg
+    instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicBitwiseXor, baseType);
+    inst_RV_RV(ins, targetReg, targetReg, targetType, emitActualTypeSize(targetType));
+}
+
+//------------------------------------------------------------------------
+// genSIMDIntrinsicInit: Generate code for SIMD Intrinsic Initialize.
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode)
+{
+    assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicInit);
+
+    GenTree*  op1       = simdNode->gtGetOp1();
+    var_types baseType  = simdNode->gtSIMDBaseType;
+    regNumber targetReg = simdNode->gtRegNum;
+    assert(targetReg != REG_NA);
+    var_types      targetType = simdNode->TypeGet();
+    InstructionSet iset       = compiler->getSIMDInstructionSet();
+    unsigned       size       = simdNode->gtSIMDSize;
+
+    // Should never see small int base type vectors except for zero initialization.
+    noway_assert(!varTypeIsSmallInt(baseType) || op1->IsIntegralConst(0));
+
+    instruction ins = INS_invalid;
+    if (op1->isContained())
+    {
+        if (op1->IsIntegralConst(0) || op1->IsFPZero())
+        {
+            genSIMDZero(targetType, baseType, targetReg);
+        }
+        else if (varTypeIsIntegral(baseType) && op1->IsIntegralConst(-1))
+        {
+            // case of initializing elements of vector with all 1's
+            // generate pcmpeqd reg, reg
+            ins = getOpForSIMDIntrinsic(SIMDIntrinsicEqual, TYP_INT);
+            inst_RV_RV(ins, targetReg, targetReg, targetType, emitActualTypeSize(targetType));
+        }
+#ifdef FEATURE_AVX_SUPPORT
+        else
+        {
+            assert(iset == InstructionSet_AVX);
+            ins = getOpForSIMDIntrinsic(SIMDIntrinsicInit, baseType);
+            if (op1->IsCnsFltOrDbl())
+            {
+                getEmitter()->emitInsBinary(ins, emitTypeSize(targetType), simdNode, op1);
+            }
+            else if (op1->OperIsLocalAddr())
+            {
+                unsigned offset = (op1->OperGet() == GT_LCL_FLD_ADDR) ? op1->gtLclFld.gtLclOffs : 0;
+                getEmitter()->emitIns_R_S(ins, emitTypeSize(targetType), targetReg, op1->gtLclVarCommon.gtLclNum,
+                                          offset);
+            }
+            else
+            {
+                unreached();
+            }
+        }
+#endif // FEATURE_AVX_SUPPORT
+    }
+    else if (iset == InstructionSet_AVX && ((size == 32) || (size == 16)))
+    {
+        regNumber srcReg = genConsumeReg(op1);
+        if (baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG)
+        {
+            ins = ins_CopyIntToFloat(baseType, TYP_FLOAT);
+            assert(ins != INS_invalid);
+            inst_RV_RV(ins, targetReg, srcReg, baseType, emitTypeSize(baseType));
+            srcReg = targetReg;
+        }
+
+        ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType);
+        getEmitter()->emitIns_R_R(ins, emitActualTypeSize(targetType), targetReg, srcReg);
+    }
+    else
+    {
+        // If we reach here, op1 is not contained and we are using SSE or it is a SubRegisterSIMDType.
+        // In either case we are going to use the SSE2 shuffle instruction.
+
+        regNumber op1Reg         = genConsumeReg(op1);
+        unsigned  shuffleControl = 0;
+
+        if (compiler->isSubRegisterSIMDType(simdNode))
+        {
+            assert(baseType == TYP_FLOAT);
+
+            // We cannot assume that upper bits of op1Reg or targetReg be zero.
+            // Therefore we need to explicitly zero out upper bits.  This is
+            // essential for the shuffle operation performed below.
+            //
+            // If op1 is a float/double constant, we would have loaded it from
+            // data section using movss/sd.  Similarly if op1 is a memory op we
+            // would have loaded it using movss/sd.  Movss/sd when loading a xmm reg
+            // from memory would zero-out upper bits. In these cases we can
+            // avoid explicitly zero'ing out targetReg if targetReg and op1Reg are the same or do it more efficiently
+            // if they are not the same.
+            SIMDScalarMoveType moveType =
+                op1->IsCnsFltOrDbl() || op1->isMemoryOp() ? SMT_ZeroInitUpper_SrcHasUpperZeros : SMT_ZeroInitUpper;
+
+            genSIMDScalarMove(TYP_FLOAT, targetReg, op1Reg, moveType);
+
+            if (size == 8)
+            {
+                shuffleControl = 0x50;
+            }
+            else if (size == 12)
+            {
+                shuffleControl = 0x40;
+            }
+            else
+            {
+                noway_assert(!"Unexpected size for SIMD type");
+            }
+        }
+        else // Vector<T>
+        {
+            if (op1Reg != targetReg)
+            {
+                if (varTypeIsFloating(baseType))
+                {
+                    ins = ins_Copy(targetType);
+                }
+                else if (baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG)
+                {
+                    ins = ins_CopyIntToFloat(baseType, TYP_FLOAT);
+                }
+
+                assert(ins != INS_invalid);
+                inst_RV_RV(ins, targetReg, op1Reg, baseType, emitTypeSize(baseType));
+            }
+        }
+
+        ins = getOpForSIMDIntrinsic(SIMDIntrinsicShuffleSSE2, baseType);
+        getEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(targetType), targetReg, targetReg, shuffleControl);
+    }
+
+    genProduceReg(simdNode);
+}
+
+//-------------------------------------------------------------------------------------------
+// genSIMDIntrinsicInitN: Generate code for SIMD Intrinsic Initialize for the form that takes
+//                        a number of arguments equal to the length of the Vector.
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode)
+{
+    assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicInitN);
+
+    // Right now this intrinsic is supported only on TYP_FLOAT vectors
+    var_types baseType = simdNode->gtSIMDBaseType;
+    noway_assert(baseType == TYP_FLOAT);
+
+    regNumber targetReg = simdNode->gtRegNum;
+    assert(targetReg != REG_NA);
+
+    var_types targetType = simdNode->TypeGet();
+
+    // Note that we cannot use targetReg before consumed all source operands. Therefore,
+    // Need an internal register to stitch together all the values into a single vector
+    // in an XMM reg.
+    assert(simdNode->gtRsvdRegs != RBM_NONE);
+    assert(genCountBits(simdNode->gtRsvdRegs) == 1);
+    regNumber vectorReg = genRegNumFromMask(simdNode->gtRsvdRegs);
+
+    // Zero out vectorReg if we are constructing a vector whose size is not equal to targetType vector size.
+    // For example in case of Vector4f we don't need to zero when using SSE2.
+    if (compiler->isSubRegisterSIMDType(simdNode))
+    {
+        genSIMDZero(targetType, baseType, vectorReg);
+    }
+
+    unsigned int baseTypeSize = genTypeSize(baseType);
+    instruction  insLeftShift = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, baseType);
+
+    // We will first consume the list items in execution (left to right) order,
+    // and record the registers.
+    regNumber operandRegs[SIMD_INTRINSIC_MAX_PARAM_COUNT];
+    unsigned  initCount = 0;
+    for (GenTree* list = simdNode->gtGetOp1(); list != nullptr; list = list->gtGetOp2())
+    {
+        assert(list->OperGet() == GT_LIST);
+        GenTree* listItem = list->gtGetOp1();
+        assert(listItem->TypeGet() == baseType);
+        assert(!listItem->isContained());
+        regNumber operandReg   = genConsumeReg(listItem);
+        operandRegs[initCount] = operandReg;
+        initCount++;
+    }
+
+    unsigned int offset = 0;
+    for (unsigned i = 0; i < initCount; i++)
+    {
+        // We will now construct the vector from the list items in reverse order.
+        // This allows us to efficiently stitch together a vector as follows:
+        // vectorReg = (vectorReg << offset)
+        // VectorReg[0] = listItemReg
+        // Use genSIMDScalarMove with SMT_PreserveUpper in order to ensure that the upper
+        // bits of vectorReg are not modified.
+
+        regNumber operandReg = operandRegs[initCount - i - 1];
+        if (offset != 0)
+        {
+            getEmitter()->emitIns_R_I(insLeftShift, EA_16BYTE, vectorReg, baseTypeSize);
+        }
+        genSIMDScalarMove(baseType, vectorReg, operandReg, SMT_PreserveUpper);
+
+        offset += baseTypeSize;
+    }
+
+    noway_assert(offset == simdNode->gtSIMDSize);
+
+    // Load the initialized value.
+    if (targetReg != vectorReg)
+    {
+        inst_RV_RV(ins_Copy(targetType), targetReg, vectorReg, targetType, emitActualTypeSize(targetType));
+    }
+    genProduceReg(simdNode);
+}
+
+//----------------------------------------------------------------------------------
+// genSIMDIntrinsicUnOp: Generate code for SIMD Intrinsic unary operations like sqrt.
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode)
+{
+    assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSqrt || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCast);
+
+    GenTree*  op1       = simdNode->gtGetOp1();
+    var_types baseType  = simdNode->gtSIMDBaseType;
+    regNumber targetReg = simdNode->gtRegNum;
+    assert(targetReg != REG_NA);
+    var_types targetType = simdNode->TypeGet();
+
+    regNumber   op1Reg = genConsumeReg(op1);
+    instruction ins    = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType);
+    if (simdNode->gtSIMDIntrinsicID != SIMDIntrinsicCast || targetReg != op1Reg)
+    {
+        inst_RV_RV(ins, targetReg, op1Reg, targetType, emitActualTypeSize(targetType));
+    }
+    genProduceReg(simdNode);
+}
+
+//--------------------------------------------------------------------------------
+// genSIMDIntrinsicBinOp: Generate code for SIMD Intrinsic binary operations
+// add, sub, mul, bit-wise And, AndNot and Or.
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode)
+{
+    assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicAdd || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSub ||
+           simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMul || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicDiv ||
+           simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAnd ||
+           simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAndNot ||
+           simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseOr ||
+           simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseXor || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMin ||
+           simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMax);
+
+    GenTree*  op1       = simdNode->gtGetOp1();
+    GenTree*  op2       = simdNode->gtGetOp2();
+    var_types baseType  = simdNode->gtSIMDBaseType;
+    regNumber targetReg = simdNode->gtRegNum;
+    assert(targetReg != REG_NA);
+    var_types      targetType = simdNode->TypeGet();
+    InstructionSet iset       = compiler->getSIMDInstructionSet();
+
+    genConsumeOperands(simdNode);
+    regNumber op1Reg   = op1->gtRegNum;
+    regNumber op2Reg   = op2->gtRegNum;
+    regNumber otherReg = op2Reg;
+
+    // Vector<Int>.Mul:
+    // SSE2 doesn't have an instruction to perform this operation directly
+    // whereas SSE4.1 does (pmulld).  This is special cased and computed
+    // as follows.
+    if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicMul && baseType == TYP_INT && iset == InstructionSet_SSE2)
+    {
+        // We need a temporary register that is NOT the same as the target,
+        // and we MAY need another.
+        assert(simdNode->gtRsvdRegs != RBM_NONE);
+        assert(genCountBits(simdNode->gtRsvdRegs) == 2);
+
+        regMaskTP tmpRegsMask = simdNode->gtRsvdRegs;
+        regMaskTP tmpReg1Mask = genFindLowestBit(tmpRegsMask);
+        tmpRegsMask &= ~tmpReg1Mask;
+        regNumber tmpReg  = genRegNumFromMask(tmpReg1Mask);
+        regNumber tmpReg2 = genRegNumFromMask(tmpRegsMask);
+        // The register allocator guarantees the following conditions:
+        // - the only registers that may be the same among op1Reg, op2Reg, tmpReg
+        //   and tmpReg2 are op1Reg and op2Reg.
+        // Let's be extra-careful and assert that now.
+        assert((op1Reg != tmpReg) && (op1Reg != tmpReg2) && (op2Reg != tmpReg) && (op2Reg != tmpReg2) &&
+               (tmpReg != tmpReg2));
+
+        // We will start by setting things up so that:
+        //    - We have op1 in op1Reg and targetReg, and they are different registers.
+        //    - We have op2 in op2Reg and tmpReg
+        //    - Either we will leave the input registers (the original op1Reg and op2Reg) unmodified,
+        //      OR they are the targetReg that will be produced.
+        //      (Note that in the code we generate below op1Reg and op2Reg are never written.)
+        // We will copy things as necessary to ensure that this is the case.
+        // Note that we can swap op1 and op2, since multiplication is commutative.
+        // We will not modify the values in op1Reg and op2Reg.
+        // (Though note that if either op1 or op2 is the same as targetReg, we will make
+        // a copy and use that copy as the input register.  In that case we WILL modify
+        // the original value in the register, but will wind up with the result in targetReg
+        // in the end, as expected.)
+
+        // First, we need a tmpReg that is NOT the same as targetReg.
+        // Note that if we have another reg that is the same as targetReg,
+        // we can use tmpReg2 for that case, as we will not have hit this case.
+        if (tmpReg == targetReg)
+        {
+            tmpReg = tmpReg2;
+        }
+
+        if (op2Reg == targetReg)
+        {
+            // We will swap the operands.
+            // Since the code below only deals with registers, this now becomes the case where
+            // op1Reg == targetReg.
+            op2Reg = op1Reg;
+            op1Reg = targetReg;
+        }
+        if (op1Reg == targetReg)
+        {
+            // Copy op1, and make tmpReg2 the new op1Reg.
+            // Note that those regs can't be the same, as we asserted above.
+            // Also, we know that tmpReg2 hasn't been used, because we couldn't have hit
+            // the "tmpReg == targetReg" case.
+            inst_RV_RV(INS_movaps, tmpReg2, op1Reg, targetType, emitActualTypeSize(targetType));
+            op1Reg = tmpReg2;
+            inst_RV_RV(INS_movaps, tmpReg, op2Reg, targetType, emitActualTypeSize(targetType));
+            // However, we have one more case to worry about: what if op2Reg is also targetReg
+            // (i.e. we have the same operand as op1 and op2)?
+            // In that case we will set op2Reg to the same register as op1Reg.
+            if (op2Reg == targetReg)
+            {
+                op2Reg = tmpReg2;
+            }
+        }
+        else
+        {
+            // Copy op1 to targetReg and op2 to tmpReg.
+            inst_RV_RV(INS_movaps, targetReg, op1Reg, targetType, emitActualTypeSize(targetType));
+            inst_RV_RV(INS_movaps, tmpReg, op2Reg, targetType, emitActualTypeSize(targetType));
+        }
+        // Let's assert that things are as we expect.
+        //    - We have op1 in op1Reg and targetReg, and they are different registers.
+        assert(op1Reg != targetReg);
+        //    - We have op2 in op2Reg and tmpReg, and they are different registers.
+        assert(op2Reg != tmpReg);
+        //    - Either we are going to leave op1's reg unmodified, or it is the targetReg.
+        assert((op1->gtRegNum == op1Reg) || (op1->gtRegNum == op2Reg) || (op1->gtRegNum == targetReg));
+        //    - Similarly, we are going to leave op2's reg unmodified, or it is the targetReg.
+        assert((op2->gtRegNum == op1Reg) || (op2->gtRegNum == op2Reg) || (op2->gtRegNum == targetReg));
+
+        // Now we can generate the code.
+
+        // targetReg = op1 >> 4-bytes (op1 is already in targetReg)
+        getEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(targetType), targetReg, 4);
+
+        // tmpReg  = op2 >> 4-bytes (op2 is already in tmpReg)
+        getEmitter()->emitIns_R_I(INS_psrldq, emitActualTypeSize(targetType), tmpReg, 4);
+
+        // tmp = unsigned double word multiply of targetReg and tmpReg. Essentially
+        // tmpReg[63:0] = op1[1] * op2[1]
+        // tmpReg[127:64] = op1[3] * op2[3]
+        inst_RV_RV(INS_pmuludq, tmpReg, targetReg, targetType, emitActualTypeSize(targetType));
+
+        // Extract first and third double word results from tmpReg
+        // tmpReg = shuffle(0,0,2,0) of tmpReg
+        getEmitter()->emitIns_R_R_I(INS_pshufd, emitActualTypeSize(targetType), tmpReg, tmpReg, 0x08);
+
+        // targetReg[63:0] = op1[0] * op2[0]
+        // targetReg[127:64] = op1[2] * op2[2]
+        inst_RV_RV(INS_movaps, targetReg, op1Reg, targetType, emitActualTypeSize(targetType));
+        inst_RV_RV(INS_pmuludq, targetReg, op2Reg, targetType, emitActualTypeSize(targetType));
+
+        // Extract first and third double word results from targetReg
+        // targetReg = shuffle(0,0,2,0) of targetReg
+        getEmitter()->emitIns_R_R_I(INS_pshufd, emitActualTypeSize(targetType), targetReg, targetReg, 0x08);
+
+        // pack the results into a single vector
+        inst_RV_RV(INS_punpckldq, targetReg, tmpReg, targetType, emitActualTypeSize(targetType));
+    }
+    else
+    {
+        instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType);
+
+        // Currently AVX doesn't support integer.
+        // if the ins is INS_cvtsi2ss or INS_cvtsi2sd, we won't use AVX.
+        if (op1Reg != targetReg && compiler->canUseAVX() && !(ins == INS_cvtsi2ss || ins == INS_cvtsi2sd) &&
+            getEmitter()->IsThreeOperandAVXInstruction(ins))
+        {
+            inst_RV_RV_RV(ins, targetReg, op1Reg, op2Reg, emitActualTypeSize(targetType));
+        }
+        else
+        {
+            if (op2Reg == targetReg)
+            {
+                otherReg = op1Reg;
+            }
+            else if (op1Reg != targetReg)
+            {
+                inst_RV_RV(ins_Copy(targetType), targetReg, op1Reg, targetType, emitActualTypeSize(targetType));
+            }
+
+            inst_RV_RV(ins, targetReg, otherReg, targetType, emitActualTypeSize(targetType));
+        }
+    }
+
+    // Vector2/3 div: since the top-most elements will be zero, we end up
+    // perfoming 0/0 which is a NAN. Therefore, post division we need to set the
+    // top-most elements to zero. This is achieved by left logical shift followed
+    // by right logical shift of targetReg.
+    if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicDiv && (simdNode->gtSIMDSize < 16))
+    {
+        // These are 16 byte operations, so we subtract from 16 bytes, not the vector register length.
+        unsigned shiftCount = 16 - simdNode->gtSIMDSize;
+        assert(shiftCount != 0);
+        instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, baseType);
+        getEmitter()->emitIns_R_I(ins, EA_16BYTE, targetReg, shiftCount);
+        ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, baseType);
+        getEmitter()->emitIns_R_I(ins, EA_16BYTE, targetReg, shiftCount);
+    }
+
+    genProduceReg(simdNode);
+}
+
+//--------------------------------------------------------------------------------
+// genSIMDIntrinsicRelOp: Generate code for a SIMD Intrinsic relational operater
+// <, <=, >, >= and ==
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode)
+{
+    GenTree*  op1       = simdNode->gtGetOp1();
+    GenTree*  op2       = simdNode->gtGetOp2();
+    var_types baseType  = simdNode->gtSIMDBaseType;
+    regNumber targetReg = simdNode->gtRegNum;
+    assert(targetReg != REG_NA);
+    var_types      targetType = simdNode->TypeGet();
+    InstructionSet iset       = compiler->getSIMDInstructionSet();
+
+    genConsumeOperands(simdNode);
+    regNumber op1Reg   = op1->gtRegNum;
+    regNumber op2Reg   = op2->gtRegNum;
+    regNumber otherReg = op2Reg;
+
+    switch (simdNode->gtSIMDIntrinsicID)
+    {
+        case SIMDIntrinsicEqual:
+        case SIMDIntrinsicGreaterThan:
+        {
+            // SSE2: vector<(u)long> relation op should be implemented in terms of TYP_INT comparison operations
+            assert(((iset == InstructionSet_AVX) || (baseType != TYP_LONG)) && (baseType != TYP_ULONG));
+
+            // Greater-than: Floating point vectors use "<" with swapped operands
+            if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGreaterThan)
+            {
+                assert(!varTypeIsFloating(baseType));
+            }
+
+            unsigned    ival = 0;
+            instruction ins  = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType, &ival);
+
+            // targetReg = op1reg > op2reg
+            // Therefore, we can optimize if op1Reg == targetReg
+            otherReg = op2Reg;
+            if (op1Reg != targetReg)
+            {
+                if (op2Reg == targetReg)
+                {
+                    assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicEqual);
+                    otherReg = op1Reg;
+                }
+                else
+                {
+                    inst_RV_RV(ins_Copy(targetType), targetReg, op1Reg, targetType, emitActualTypeSize(targetType));
+                }
+            }
+
+            if (varTypeIsFloating(baseType))
+            {
+                getEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(targetType), targetReg, otherReg, ival);
+            }
+            else
+            {
+                inst_RV_RV(ins, targetReg, otherReg, targetType, emitActualTypeSize(targetType));
+            }
+        }
+        break;
+
+        case SIMDIntrinsicLessThan:
+        case SIMDIntrinsicLessThanOrEqual:
+        {
+            // Int vectors use ">" and ">=" with swapped operands
+            assert(varTypeIsFloating(baseType));
+
+            // Get the instruction opcode for compare operation
+            unsigned    ival;
+            instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType, &ival);
+
+            // targetReg = op1reg RelOp op2reg
+            // Thefore, we can optimize if op1Reg == targetReg
+            if (op1Reg != targetReg)
+            {
+                inst_RV_RV(ins_Copy(targetType), targetReg, op1Reg, targetType, emitActualTypeSize(targetType));
+            }
+
+            getEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(targetType), targetReg, op2Reg, ival);
+        }
+        break;
+
+        // (In)Equality that produces bool result instead of a bit vector
+        case SIMDIntrinsicOpEquality:
+        case SIMDIntrinsicOpInEquality:
+        {
+            assert(genIsValidIntReg(targetReg));
+
+            // We need two additional XMM register as scratch
+            assert(simdNode->gtRsvdRegs != RBM_NONE);
+            assert(genCountBits(simdNode->gtRsvdRegs) == 2);
+
+            regMaskTP tmpRegsMask = simdNode->gtRsvdRegs;
+            regMaskTP tmpReg1Mask = genFindLowestBit(tmpRegsMask);
+            tmpRegsMask &= ~tmpReg1Mask;
+            regNumber tmpReg1  = genRegNumFromMask(tmpReg1Mask);
+            regNumber tmpReg2  = genRegNumFromMask(tmpRegsMask);
+            var_types simdType = op1->TypeGet();
+            // TODO-1stClassStructs: Temporary to minimize asmDiffs
+            if (simdType == TYP_DOUBLE)
+            {
+                simdType = TYP_SIMD8;
+            }
+
+            // Here we should consider TYP_SIMD12 operands as if they were TYP_SIMD16
+            // since both the operands will be in XMM registers.
+            if (simdType == TYP_SIMD12)
+            {
+                simdType = TYP_SIMD16;
+            }
+
+            // tmpReg1 = (op1Reg == op2Reg)
+            // Call this value of tmpReg1 as 'compResult' for further reference below.
+            regNumber otherReg = op2Reg;
+            if (tmpReg1 != op2Reg)
+            {
+                if (tmpReg1 != op1Reg)
+                {
+                    inst_RV_RV(ins_Copy(simdType), tmpReg1, op1Reg, simdType, emitActualTypeSize(simdType));
+                }
+            }
+            else
+            {
+                otherReg = op1Reg;
+            }
+
+            // For all integer types we can use TYP_INT comparison.
+            unsigned    ival = 0;
+            instruction ins =
+                getOpForSIMDIntrinsic(SIMDIntrinsicEqual, varTypeIsFloating(baseType) ? baseType : TYP_INT, &ival);
+
+            if (varTypeIsFloating(baseType))
+            {
+                getEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(simdType), tmpReg1, otherReg, ival);
+            }
+            else
+            {
+                inst_RV_RV(ins, tmpReg1, otherReg, simdType, emitActualTypeSize(simdType));
+            }
+
+            // If we have 32 bytes, start by anding the two 16-byte halves to get a 16-byte result.
+            if (compiler->canUseAVX() && (simdType == TYP_SIMD32))
+            {
+                // Reduce tmpReg1 from 256-bits to 128-bits bitwise-Anding the lower and uppper 128-bits
+                //
+                // Generated code sequence
+                // - vextractf128 tmpReg2, tmpReg1, 0x01
+                //       tmpReg2[128..255] <- 0
+                //       tmpReg2[0..127]   <- tmpReg1[128..255]
+                // - vandps tmpReg1, tempReg2
+                //       This will zero-out upper portion of tmpReg1 and
+                //       lower portion of tmpReg1 is and of upper and lower 128-bit comparison result.
+                getEmitter()->emitIns_R_R_I(INS_vextractf128, EA_32BYTE, tmpReg2, tmpReg1, 0x01);
+                inst_RV_RV(INS_andps, tmpReg1, tmpReg2, simdType, emitActualTypeSize(simdType));
+            }
+            // Next, if we have more than 8 bytes, and the two 8-byte halves to get a 8-byte result.
+            if (simdType != TYP_SIMD8)
+            {
+                // tmpReg2 = Shuffle(tmpReg1, (1,0,3,2))
+                // Note: vpshufd is a 128-bit only instruction. Therefore, explicitly pass EA_16BYTE
+                getEmitter()->emitIns_R_R_I(INS_pshufd, EA_16BYTE, tmpReg2, tmpReg1, 0x4E);
+
+                // tmpReg1 = BitwiseAnd(tmpReg1, tmpReg2)
+                //
+                // Note that what we have computed is as follows at this point:
+                // tmpReg1[0] = compResult[0] & compResult[2]
+                // tmpReg1[1] = compResult[1] & compResult[3]
+                inst_RV_RV(INS_andps, tmpReg1, tmpReg2, simdType, emitActualTypeSize(simdType));
+            }
+            // At this point, we have either reduced the result to 8 bytes: tmpReg1[0] and tmpReg1[1],
+            // OR we have a Vector2 (TYP_SIMD8) in tmpReg1, which has only those two fields.
+
+            // tmpReg2 = Shuffle(tmpReg1, (0,0,0,1))
+            // tmpReg2[0] = compResult[1] & compResult[3]
+            getEmitter()->emitIns_R_R_I(INS_pshufd, EA_16BYTE, tmpReg2, tmpReg1, 0x1);
+
+            // tmpReg1 = BitwiseAnd(tmpReg1, tmpReg2)
+            // That is tmpReg1[0] = compResult[0] & compResult[1] & compResult[2] & compResult[3]
+            inst_RV_RV(INS_pand, tmpReg1, tmpReg2, simdType, emitActualTypeSize(simdType)); // ??? INS_andps??
+
+            // targetReg = lower 32-bits of tmpReg1 = compResult[0] & compResult[1] & compResult[2] & compResult[3]
+            // (Note that for mov_xmm2i, the int register is always in the reg2 position.
+            inst_RV_RV(INS_mov_xmm2i, tmpReg1, targetReg, TYP_INT);
+
+            // Since we need to compute a bool result, targetReg needs to be set to 1 on true and zero on false.
+            // Equality:
+            //   cmp targetReg, 0xFFFFFFFF
+            //   sete targetReg
+            //   movzx targetReg, targetReg
+            //
+            // InEquality:
+            //   cmp targetReg, 0xFFFFFFFF
+            //   setne targetReg
+            //   movzx targetReg, targetReg
+            //
+            getEmitter()->emitIns_R_I(INS_cmp, EA_4BYTE, targetReg, 0xFFFFFFFF);
+            inst_RV((simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality) ? INS_sete : INS_setne, targetReg, TYP_INT,
+                    EA_1BYTE);
+            assert(simdNode->TypeGet() == TYP_INT);
+            // Set the higher bytes to 0
+            inst_RV_RV(ins_Move_Extend(TYP_UBYTE, true), targetReg, targetReg, TYP_UBYTE, emitTypeSize(TYP_UBYTE));
+        }
+        break;
+
+        default:
+            noway_assert(!"Unimplemented SIMD relational operation.");
+            unreached();
+    }
+
+    genProduceReg(simdNode);
+}
+
+//--------------------------------------------------------------------------------
+// genSIMDIntrinsicDotProduct: Generate code for SIMD Intrinsic Dot Product.
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode)
+{
+    assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicDotProduct);
+
+    GenTree*  op1      = simdNode->gtGetOp1();
+    GenTree*  op2      = simdNode->gtGetOp2();
+    var_types baseType = simdNode->gtSIMDBaseType;
+    var_types simdType = op1->TypeGet();
+    // TODO-1stClassStructs: Temporary to minimize asmDiffs
+    if (simdType == TYP_DOUBLE)
+    {
+        simdType = TYP_SIMD8;
+    }
+    var_types simdEvalType = (simdType == TYP_SIMD12) ? TYP_SIMD16 : simdType;
+    regNumber targetReg    = simdNode->gtRegNum;
+    assert(targetReg != REG_NA);
+
+    // DotProduct is only supported on floating point types.
+    var_types targetType = simdNode->TypeGet();
+    assert(targetType == baseType);
+    assert(varTypeIsFloating(baseType));
+
+    genConsumeOperands(simdNode);
+    regNumber op1Reg = op1->gtRegNum;
+    regNumber op2Reg = op2->gtRegNum;
+
+    regNumber tmpReg = REG_NA;
+    // For SSE, or AVX with 32-byte vectors, we need an additional Xmm register as scratch.
+    // However, it must be distinct from targetReg, so we request two from the register allocator.
+    // Note that if this is a TYP_SIMD16 or smaller on AVX, then we don't need a tmpReg.
+    if ((compiler->getSIMDInstructionSet() == InstructionSet_SSE2) || (simdEvalType == TYP_SIMD32))
+    {
+        assert(simdNode->gtRsvdRegs != RBM_NONE);
+        assert(genCountBits(simdNode->gtRsvdRegs) == 2);
+
+        regMaskTP tmpRegsMask = simdNode->gtRsvdRegs;
+        regMaskTP tmpReg1Mask = genFindLowestBit(tmpRegsMask);
+        tmpRegsMask &= ~tmpReg1Mask;
+        regNumber tmpReg1 = genRegNumFromMask(tmpReg1Mask);
+        regNumber tmpReg2 = genRegNumFromMask(tmpRegsMask);
+
+        // Choose any register different from targetReg as tmpReg
+        if (tmpReg1 != targetReg)
+        {
+            tmpReg = tmpReg1;
+        }
+        else
+        {
+            assert(targetReg != tmpReg2);
+            tmpReg = tmpReg2;
+        }
+        assert(tmpReg != REG_NA);
+        assert(tmpReg != targetReg);
+    }
+
+    if (compiler->getSIMDInstructionSet() == InstructionSet_SSE2)
+    {
+        // We avoid reg move if either op1Reg == targetReg or op2Reg == targetReg
+        if (op1Reg == targetReg)
+        {
+            // Best case
+            // nothing to do, we have registers in the right place
+        }
+        else if (op2Reg == targetReg)
+        {
+            op2Reg = op1Reg;
+        }
+        else
+        {
+            inst_RV_RV(ins_Copy(simdType), targetReg, op1Reg, simdEvalType, emitActualTypeSize(simdType));
+        }
+
+        // DotProduct(v1, v2)
+        // Here v0 = targetReg, v1 = op1Reg, v2 = op2Reg and tmp = tmpReg
+        if (baseType == TYP_FLOAT)
+        {
+            // v0 = v1 * v2
+            // tmp = v0                                       // v0  = (3, 2, 1, 0) - each element is given by its
+            //                                                // position
+            // tmp = shuffle(tmp, tmp, Shuffle(2,3,0,1))      // tmp = (2, 3, 0, 1)
+            // v0 = v0 + tmp                                  // v0  = (3+2, 2+3, 1+0, 0+1)
+            // tmp = v0
+            // tmp = shuffle(tmp, tmp, Shuffle(0,1,2,3))      // tmp = (0+1, 1+0, 2+3, 3+2)
+            // v0 = v0 + tmp                                  // v0  = (0+1+2+3, 0+1+2+3, 0+1+2+3, 0+1+2+3)
+            //                                                // Essentially horizontal addtion of all elements.
+            //                                                // We could achieve the same using SSEv3 instruction
+            //                                                // HADDPS.
+            //
+            inst_RV_RV(INS_mulps, targetReg, op2Reg);
+            inst_RV_RV(INS_movaps, tmpReg, targetReg);
+            inst_RV_RV_IV(INS_shufps, EA_16BYTE, tmpReg, tmpReg, 0xb1);
+            inst_RV_RV(INS_addps, targetReg, tmpReg);
+            inst_RV_RV(INS_movaps, tmpReg, targetReg);
+            inst_RV_RV_IV(INS_shufps, EA_16BYTE, tmpReg, tmpReg, 0x1b);
+            inst_RV_RV(INS_addps, targetReg, tmpReg);
+        }
+        else if (baseType == TYP_DOUBLE)
+        {
+            // v0 = v1 * v2
+            // tmp = v0                                       // v0  = (1, 0) - each element is given by its position
+            // tmp = shuffle(tmp, tmp, Shuffle(0,1))          // tmp = (0, 1)
+            // v0 = v0 + tmp                                  // v0  = (1+0, 0+1)
+            inst_RV_RV(INS_mulpd, targetReg, op2Reg);
+            inst_RV_RV(INS_movaps, tmpReg, targetReg);
+            inst_RV_RV_IV(INS_shufpd, EA_16BYTE, tmpReg, tmpReg, 0x01);
+            inst_RV_RV(INS_addpd, targetReg, tmpReg);
+        }
+        else
+        {
+            unreached();
+        }
+    }
+    else
+    {
+        // We avoid reg move if either op1Reg == targetReg or op2Reg == targetReg.
+        // Note that this is a duplicate of the code above for SSE, but in the AVX case we can eventually
+        // use the 3-op form, so that we can avoid these copies.
+        // TODO-CQ: Add inst_RV_RV_RV_IV().
+        if (op1Reg == targetReg)
+        {
+            // Best case
+            // nothing to do, we have registers in the right place
+        }
+        else if (op2Reg == targetReg)
+        {
+            op2Reg = op1Reg;
+        }
+        else
+        {
+            inst_RV_RV(ins_Copy(simdType), targetReg, op1Reg, simdEvalType, emitActualTypeSize(simdType));
+        }
+
+        emitAttr emitSize = emitActualTypeSize(simdEvalType);
+        if (baseType == TYP_FLOAT)
+        {
+            // dpps computes the dot product of the upper & lower halves of the 32-byte register.
+            // Notice that if this is a TYP_SIMD16 or smaller on AVX, then we don't need a tmpReg.
+            inst_RV_RV_IV(INS_dpps, emitSize, targetReg, op2Reg, 0xf1);
+            // If this is TYP_SIMD32, we need to combine the lower & upper results.
+            if (simdEvalType == TYP_SIMD32)
+            {
+                getEmitter()->emitIns_R_R_I(INS_vextractf128, EA_32BYTE, tmpReg, targetReg, 0x01);
+                inst_RV_RV(INS_addps, targetReg, tmpReg, targetType, emitTypeSize(targetType));
+            }
+        }
+        else if (baseType == TYP_DOUBLE)
+        {
+            // On AVX, we have no 16-byte vectors of double.  Note that, if we did, we could use
+            // dppd directly.
+            assert(simdType == TYP_SIMD32);
+
+            // targetReg = targetReg * op2Reg
+            // targetReg = vhaddpd(targetReg, targetReg) ; horizontal sum of lower & upper halves
+            // tmpReg    = vextractf128(targetReg, 1)    ; Moves the upper sum into tempReg
+            // targetReg = targetReg + tmpReg
+            inst_RV_RV(INS_mulpd, targetReg, op2Reg, simdEvalType, emitActualTypeSize(simdType));
+            inst_RV_RV(INS_haddpd, targetReg, targetReg, simdEvalType, emitActualTypeSize(simdType));
+            getEmitter()->emitIns_R_R_I(INS_vextractf128, EA_32BYTE, tmpReg, targetReg, 0x01);
+            inst_RV_RV(INS_addpd, targetReg, tmpReg, targetType, emitTypeSize(targetType));
+        }
+        else
+        {
+            unreached();
+        }
+    }
+
+    genProduceReg(simdNode);
+}
+
+//------------------------------------------------------------------------------------
+// genSIMDIntrinsicGetItem: Generate code for SIMD Intrinsic get element at index i.
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
+{
+    assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicGetItem);
+
+    GenTree*  op1      = simdNode->gtGetOp1();
+    GenTree*  op2      = simdNode->gtGetOp2();
+    var_types simdType = op1->TypeGet();
+    assert(varTypeIsSIMD(simdType));
+
+    // op1 of TYP_SIMD12 should be considered as TYP_SIMD16,
+    // since it is in XMM register.
+    if (simdType == TYP_SIMD12)
+    {
+        simdType = TYP_SIMD16;
+    }
+
+    var_types baseType  = simdNode->gtSIMDBaseType;
+    regNumber targetReg = simdNode->gtRegNum;
+    assert(targetReg != REG_NA);
+    var_types targetType = simdNode->TypeGet();
+    assert(targetType == genActualType(baseType));
+
+    // GetItem has 2 operands:
+    // - the source of SIMD type (op1)
+    // - the index of the value to be returned.
+    genConsumeOperands(simdNode);
+    regNumber srcReg = op1->gtRegNum;
+
+    // SSE2 doesn't have an instruction to implement this intrinsic if the index is not a constant.
+    // For the non-constant case, we will use the SIMD temp location to store the vector, and
+    // the load the desired element.
+    // The range check will already have been performed, so at this point we know we have an index
+    // within the bounds of the vector.
+    if (!op2->IsCnsIntOrI())
+    {
+        unsigned simdInitTempVarNum = compiler->lvaSIMDInitTempVarNum;
+        noway_assert(simdInitTempVarNum != BAD_VAR_NUM);
+        bool      isEBPbased;
+        unsigned  offs     = compiler->lvaFrameAddress(simdInitTempVarNum, &isEBPbased);
+        regNumber indexReg = op2->gtRegNum;
+
+        // Store the vector to the temp location.
+        getEmitter()->emitIns_S_R(ins_Store(simdType, compiler->isSIMDTypeLocalAligned(simdInitTempVarNum)),
+                                  emitTypeSize(simdType), srcReg, simdInitTempVarNum, 0);
+
+        // Now, load the desired element.
+        getEmitter()->emitIns_R_ARX(ins_Move_Extend(baseType, false), // Load
+                                    emitTypeSize(baseType),           // Of the vector baseType
+                                    targetReg,                        // To targetReg
+                                    (isEBPbased) ? REG_EBP : REG_ESP, // Stack-based
+                                    indexReg,                         // Indexed
+                                    genTypeSize(baseType),            // by the size of the baseType
+                                    offs);
+        genProduceReg(simdNode);
+        return;
+    }
+
+    noway_assert(op2->isContained());
+    unsigned int index        = (unsigned int)op2->gtIntCon.gtIconVal;
+    unsigned int byteShiftCnt = index * genTypeSize(baseType);
+
+    // In general we shouldn't have an index greater than or equal to the length of the vector.
+    // However, if we have an out-of-range access, under minOpts it will not be optimized
+    // away. The code will throw before we reach this point, but we still need to generate
+    // code. In that case, we will simply mask off the upper bits.
+    if (byteShiftCnt >= compiler->getSIMDVectorRegisterByteLength())
+    {
+        byteShiftCnt &= (compiler->getSIMDVectorRegisterByteLength() - 1);
+        index = byteShiftCnt / genTypeSize(baseType);
+    }
+
+    regNumber tmpReg = REG_NA;
+    if (simdNode->gtRsvdRegs != RBM_NONE)
+    {
+        assert(genCountBits(simdNode->gtRsvdRegs) == 1);
+        tmpReg = genRegNumFromMask(simdNode->gtRsvdRegs);
+    }
+    else
+    {
+        assert((byteShiftCnt == 0) || varTypeIsFloating(baseType) ||
+               (varTypeIsSmallInt(baseType) && (byteShiftCnt < 16)));
+    }
+
+    if (byteShiftCnt >= 16)
+    {
+        assert(compiler->getSIMDInstructionSet() == InstructionSet_AVX);
+        byteShiftCnt -= 16;
+        regNumber newSrcReg;
+        if (varTypeIsFloating(baseType))
+        {
+            newSrcReg = targetReg;
+        }
+        else
+        {
+            // Integer types
+            assert(tmpReg != REG_NA);
+            newSrcReg = tmpReg;
+        }
+        getEmitter()->emitIns_R_R_I(INS_vextractf128, EA_32BYTE, newSrcReg, srcReg, 0x01);
+
+        srcReg = newSrcReg;
+    }
+
+    // Generate the following sequence:
+    // 1) baseType is floating point
+    //   movaps    targetReg, srcReg
+    //   psrldq    targetReg, byteShiftCnt  <-- not generated if accessing zero'th element
+    //
+    // 2) baseType is not floating point
+    //   movaps    tmpReg, srcReg           <-- not generated if accessing zero'th element
+    //                                          OR if tmpReg == srcReg
+    //   psrldq    tmpReg, byteShiftCnt     <-- not generated if accessing zero'th element
+    //   mov_xmm2i targetReg, tmpReg
+    if (varTypeIsFloating(baseType))
+    {
+        if (targetReg != srcReg)
+        {
+            inst_RV_RV(ins_Copy(simdType), targetReg, srcReg, simdType, emitActualTypeSize(simdType));
+        }
+
+        if (byteShiftCnt != 0)
+        {
+            instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, baseType);
+            getEmitter()->emitIns_R_I(ins, emitActualTypeSize(simdType), targetReg, byteShiftCnt);
+        }
+    }
+    else
+    {
+        if (varTypeIsSmallInt(baseType))
+        {
+            // Note that pextrw extracts 16-bit value by index and zero extends it to 32-bits.
+            // In case of vector<short> we also need to sign extend the 16-bit value in targetReg
+            // Vector<byte> - index/2 will give the index of the 16-bit value to extract. Shift right
+            // by 8-bits if index is odd.  In case of Vector<sbyte> also sign extend targetReg.
+
+            unsigned baseSize = genTypeSize(baseType);
+            if (baseSize == 1)
+            {
+                index /= 2;
+            }
+            // We actually want index % 8 for the AVX case (for SSE it will never be > 8).
+            // Note that this doesn't matter functionally, because the instruction uses just the
+            // low 3 bits of index, but it's better to use the right value.
+            if (index > 8)
+            {
+                assert(compiler->getSIMDInstructionSet() == InstructionSet_AVX);
+                index -= 8;
+            }
+
+            getEmitter()->emitIns_R_R_I(INS_pextrw, emitTypeSize(TYP_INT), targetReg, srcReg, index);
+
+            bool ZeroOrSignExtnReqd = true;
+            if (baseSize == 1)
+            {
+                if ((op2->gtIntCon.gtIconVal % 2) == 1)
+                {
+                    // Right shift extracted word by 8-bits if index is odd if we are extracting a byte sized element.
+                    inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, targetReg, 8);
+
+                    // Since Pextrw zero extends to 32-bits, we need sign extension in case of TYP_BYTE
+                    ZeroOrSignExtnReqd = (baseType == TYP_BYTE);
+                }
+                // else - we just need to zero/sign extend the byte since pextrw extracted 16-bits
+            }
+            else
+            {
+                // Since Pextrw zero extends to 32-bits, we need sign extension in case of TYP_SHORT
+                assert(baseSize == 2);
+                ZeroOrSignExtnReqd = (baseType == TYP_SHORT);
+            }
+
+            if (ZeroOrSignExtnReqd)
+            {
+                // Zero/sign extend the byte/short to 32-bits
+                inst_RV_RV(ins_Move_Extend(baseType, false), targetReg, targetReg, baseType, emitTypeSize(baseType));
+            }
+        }
+        else
+        {
+            // We need a temp xmm register if the baseType is not floating point and
+            // accessing non-zero'th element.
+            instruction ins;
+
+            if (byteShiftCnt != 0)
+            {
+                assert(tmpReg != REG_NA);
+
+                if (tmpReg != srcReg)
+                {
+                    inst_RV_RV(ins_Copy(simdType), tmpReg, srcReg, simdType, emitActualTypeSize(simdType));
+                }
+
+                ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, baseType);
+                getEmitter()->emitIns_R_I(ins, emitActualTypeSize(simdType), tmpReg, byteShiftCnt);
+            }
+            else
+            {
+                tmpReg = srcReg;
+            }
+
+            assert(tmpReg != REG_NA);
+            ins = ins_CopyFloatToInt(TYP_FLOAT, baseType);
+            // (Note that for mov_xmm2i, the int register is always in the reg2 position.
+            inst_RV_RV(ins, tmpReg, targetReg, baseType);
+        }
+    }
+
+    genProduceReg(simdNode);
+}
+
+//------------------------------------------------------------------------------------
+// genSIMDIntrinsicSetItem: Generate code for SIMD Intrinsic set element at index i.
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+// TODO-CQ: Use SIMDIntrinsicShuffleSSE2 for the SSE2 case.
+//
+void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode)
+{
+    // Determine index based on intrinsic ID
+    int index = -1;
+    switch (simdNode->gtSIMDIntrinsicID)
+    {
+        case SIMDIntrinsicSetX:
+            index = 0;
+            break;
+        case SIMDIntrinsicSetY:
+            index = 1;
+            break;
+        case SIMDIntrinsicSetZ:
+            index = 2;
+            break;
+        case SIMDIntrinsicSetW:
+            index = 3;
+            break;
+
+        default:
+            unreached();
+    }
+    assert(index != -1);
+
+    // op1 is the SIMD vector
+    // op2 is the value to be set
+    GenTree* op1 = simdNode->gtGetOp1();
+    GenTree* op2 = simdNode->gtGetOp2();
+
+    var_types baseType  = simdNode->gtSIMDBaseType;
+    regNumber targetReg = simdNode->gtRegNum;
+    assert(targetReg != REG_NA);
+    var_types targetType = simdNode->TypeGet();
+    assert(varTypeIsSIMD(targetType));
+
+    // the following assert must hold.
+    // supported only on vector2f/3f/4f right now
+    noway_assert(baseType == TYP_FLOAT);
+    assert(op2->TypeGet() == baseType);
+    assert(simdNode->gtSIMDSize >= ((index + 1) * genTypeSize(baseType)));
+
+    genConsumeOperands(simdNode);
+    regNumber op1Reg = op1->gtRegNum;
+    regNumber op2Reg = op2->gtRegNum;
+
+    // TODO-CQ: For AVX we don't need to do a copy because it supports 3 operands plus immediate.
+    if (targetReg != op1Reg)
+    {
+        inst_RV_RV(ins_Copy(targetType), targetReg, op1Reg, targetType, emitActualTypeSize(targetType));
+    }
+
+    // Right now this intrinsic is supported only for float base type vectors.
+    // If in future need to support on other base type vectors, the below
+    // logic needs modification.
+    noway_assert(baseType == TYP_FLOAT);
+
+    if (compiler->getSIMDInstructionSet() == InstructionSet_SSE2)
+    {
+        // We need one additional int register as scratch
+        assert(simdNode->gtRsvdRegs != RBM_NONE);
+        assert(genCountBits(simdNode->gtRsvdRegs) == 1);
+        regNumber tmpReg = genRegNumFromMask(simdNode->gtRsvdRegs);
+        assert(genIsValidIntReg(tmpReg));
+
+        // Move the value from xmm reg to an int reg
+        instruction ins = ins_CopyFloatToInt(TYP_FLOAT, TYP_INT);
+        // (Note that for mov_xmm2i, the int register is always in the reg2 position.
+        inst_RV_RV(ins, op2Reg, tmpReg, baseType);
+
+        // First insert the lower 16-bits of tmpReg in targetReg at 2*index position
+        // since every float has two 16-bit words.
+        getEmitter()->emitIns_R_R_I(INS_pinsrw, emitTypeSize(TYP_INT), targetReg, tmpReg, 2 * index);
+
+        // Logical right shift tmpReg by 16-bits and insert in targetReg at 2*index + 1 position
+        inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_4BYTE, tmpReg, 16);
+        getEmitter()->emitIns_R_R_I(INS_pinsrw, emitTypeSize(TYP_INT), targetReg, tmpReg, 2 * index + 1);
+    }
+    else
+    {
+        unsigned int insertpsImm = (INSERTPS_SOURCE_SELECT(0) | INSERTPS_TARGET_SELECT(index));
+        inst_RV_RV_IV(INS_insertps, EA_16BYTE, targetReg, op2Reg, insertpsImm);
+    }
+
+    genProduceReg(simdNode);
+}
+
+//------------------------------------------------------------------------
+// genSIMDIntrinsicShuffleSSE2: Generate code for SIMD Intrinsic shuffle.
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genSIMDIntrinsicShuffleSSE2(GenTreeSIMD* simdNode)
+{
+    assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicShuffleSSE2);
+    noway_assert(compiler->getSIMDInstructionSet() == InstructionSet_SSE2);
+
+    GenTree* op1 = simdNode->gtGetOp1();
+    GenTree* op2 = simdNode->gtGetOp2();
+    assert(op2->isContained());
+    assert(op2->IsCnsIntOrI());
+    int       shuffleControl = (int)op2->AsIntConCommon()->IconValue();
+    var_types baseType       = simdNode->gtSIMDBaseType;
+    var_types targetType     = simdNode->TypeGet();
+    regNumber targetReg      = simdNode->gtRegNum;
+    assert(targetReg != REG_NA);
+
+    regNumber op1Reg = genConsumeReg(op1);
+    if (targetReg != op1Reg)
+    {
+        inst_RV_RV(ins_Copy(targetType), targetReg, op1Reg, targetType, emitActualTypeSize(targetType));
+    }
+
+    instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType);
+    getEmitter()->emitIns_R_R_I(ins, emitTypeSize(baseType), targetReg, targetReg, shuffleControl);
+    genProduceReg(simdNode);
+}
+
+//-----------------------------------------------------------------------------
+// genStoreIndTypeSIMD12: store indirect a TYP_SIMD12 (i.e. Vector3) to memory.
+// Since Vector3 is not a hardware supported write size, it is performed
+// as two writes: 8 byte followed by 4-byte.
+//
+// Arguments:
+//    treeNode - tree node that is attempting to store indirect
+//
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genStoreIndTypeSIMD12(GenTree* treeNode)
+{
+    assert(treeNode->OperGet() == GT_STOREIND);
+
+    GenTree* addr = treeNode->gtOp.gtOp1;
+    GenTree* data = treeNode->gtOp.gtOp2;
+
+    // addr and data should not be contained.
+    assert(!data->isContained());
+    assert(!addr->isContained());
+
+#ifdef DEBUG
+    // Should not require a write barrier
+    GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(treeNode, data);
+    assert(writeBarrierForm == GCInfo::WBF_NoBarrier);
+#endif
+
+    // Need an addtional Xmm register to extract upper 4 bytes from data.
+    assert(treeNode->gtRsvdRegs != RBM_NONE);
+    assert(genCountBits(treeNode->gtRsvdRegs) == 1);
+    regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
+
+    genConsumeOperands(treeNode->AsOp());
+
+    // 8-byte write
+    getEmitter()->emitIns_AR_R(ins_Store(TYP_DOUBLE), EA_8BYTE, data->gtRegNum, addr->gtRegNum, 0);
+
+    // Extract upper 4-bytes from data
+    getEmitter()->emitIns_R_R_I(INS_pshufd, emitActualTypeSize(TYP_SIMD16), tmpReg, data->gtRegNum, 0x02);
+
+    // 4-byte write
+    getEmitter()->emitIns_AR_R(ins_Store(TYP_FLOAT), EA_4BYTE, tmpReg, addr->gtRegNum, 8);
+}
+
+//-----------------------------------------------------------------------------
+// genLoadIndTypeSIMD12: load indirect a TYP_SIMD12 (i.e. Vector3) value.
+// Since Vector3 is not a hardware supported write size, it is performed
+// as two loads: 8 byte followed by 4-byte.
+//
+// Arguments:
+//    treeNode - tree node of GT_IND
+//
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode)
+{
+    assert(treeNode->OperGet() == GT_IND);
+
+    regNumber  targetReg = treeNode->gtRegNum;
+    GenTreePtr op1       = treeNode->gtOp.gtOp1;
+    assert(!op1->isContained());
+    regNumber operandReg = genConsumeReg(op1);
+
+    // Need an addtional Xmm register to read upper 4 bytes, which is different from targetReg
+    assert(treeNode->gtRsvdRegs != RBM_NONE);
+    assert(genCountBits(treeNode->gtRsvdRegs) == 2);
+
+    regNumber tmpReg      = REG_NA;
+    regMaskTP tmpRegsMask = treeNode->gtRsvdRegs;
+    regMaskTP tmpReg1Mask = genFindLowestBit(tmpRegsMask);
+    tmpRegsMask &= ~tmpReg1Mask;
+    regNumber tmpReg1 = genRegNumFromMask(tmpReg1Mask);
+    regNumber tmpReg2 = genRegNumFromMask(tmpRegsMask);
+
+    // Choose any register different from targetReg as tmpReg
+    if (tmpReg1 != targetReg)
+    {
+        tmpReg = tmpReg1;
+    }
+    else
+    {
+        assert(targetReg != tmpReg2);
+        tmpReg = tmpReg2;
+    }
+    assert(tmpReg != REG_NA);
+    assert(tmpReg != targetReg);
+
+    // Load upper 4 bytes in tmpReg
+    getEmitter()->emitIns_R_AR(ins_Load(TYP_FLOAT), EA_4BYTE, tmpReg, operandReg, 8);
+
+    // Load lower 8 bytes in targetReg
+    getEmitter()->emitIns_R_AR(ins_Load(TYP_DOUBLE), EA_8BYTE, targetReg, operandReg, 0);
+
+    // combine upper 4 bytes and lower 8 bytes in targetReg
+    getEmitter()->emitIns_R_R_I(INS_shufps, emitActualTypeSize(TYP_SIMD16), targetReg, tmpReg, 0x44);
+
+    genProduceReg(treeNode);
+}
+
+//-----------------------------------------------------------------------------
+// genStoreLclFldTypeSIMD12: store a TYP_SIMD12 (i.e. Vector3) type field.
+// Since Vector3 is not a hardware supported write size, it is performed
+// as two stores: 8 byte followed by 4-byte.
+//
+// Arguments:
+//    treeNode - tree node that is attempting to store TYP_SIMD12 field
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genStoreLclFldTypeSIMD12(GenTree* treeNode)
+{
+    assert(treeNode->OperGet() == GT_STORE_LCL_FLD);
+
+    unsigned offs   = treeNode->gtLclFld.gtLclOffs;
+    unsigned varNum = treeNode->gtLclVarCommon.gtLclNum;
+    assert(varNum < compiler->lvaCount);
+
+    GenTreePtr op1 = treeNode->gtOp.gtOp1;
+    assert(!op1->isContained());
+    regNumber operandReg = genConsumeReg(op1);
+
+    // Need an addtional Xmm register to extract upper 4 bytes from data.
+    assert(treeNode->gtRsvdRegs != RBM_NONE);
+    assert(genCountBits(treeNode->gtRsvdRegs) == 1);
+    regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
+
+    // store lower 8 bytes
+    getEmitter()->emitIns_S_R(ins_Store(TYP_DOUBLE), EA_8BYTE, operandReg, varNum, offs);
+
+    // Extract upper 4-bytes from operandReg
+    getEmitter()->emitIns_R_R_I(INS_pshufd, emitActualTypeSize(TYP_SIMD16), tmpReg, operandReg, 0x02);
+
+    // Store upper 4 bytes
+    getEmitter()->emitIns_S_R(ins_Store(TYP_FLOAT), EA_4BYTE, tmpReg, varNum, offs + 8);
+}
+
+//-----------------------------------------------------------------------------
+// genLoadLclFldTypeSIMD12: load a TYP_SIMD12 (i.e. Vector3) type field.
+// Since Vector3 is not a hardware supported write size, it is performed
+// as two reads: 8 byte followed by 4-byte.
+//
+// Arguments:
+//    treeNode - tree node that is attempting to load TYP_SIMD12 field
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genLoadLclFldTypeSIMD12(GenTree* treeNode)
+{
+    assert(treeNode->OperGet() == GT_LCL_FLD);
+
+    regNumber targetReg = treeNode->gtRegNum;
+    unsigned  offs      = treeNode->gtLclFld.gtLclOffs;
+    unsigned  varNum    = treeNode->gtLclVarCommon.gtLclNum;
+    assert(varNum < compiler->lvaCount);
+
+    // Need an addtional Xmm register to read upper 4 bytes
+    assert(treeNode->gtRsvdRegs != RBM_NONE);
+    assert(genCountBits(treeNode->gtRsvdRegs) == 2);
+
+    regNumber tmpReg      = REG_NA;
+    regMaskTP tmpRegsMask = treeNode->gtRsvdRegs;
+    regMaskTP tmpReg1Mask = genFindLowestBit(tmpRegsMask);
+    tmpRegsMask &= ~tmpReg1Mask;
+    regNumber tmpReg1 = genRegNumFromMask(tmpReg1Mask);
+    regNumber tmpReg2 = genRegNumFromMask(tmpRegsMask);
+
+    // Choose any register different from targetReg as tmpReg
+    if (tmpReg1 != targetReg)
+    {
+        tmpReg = tmpReg1;
+    }
+    else
+    {
+        assert(targetReg != tmpReg2);
+        tmpReg = tmpReg2;
+    }
+    assert(tmpReg != REG_NA);
+    assert(tmpReg != targetReg);
+
+    // Read upper 4 bytes to tmpReg
+    getEmitter()->emitIns_R_S(ins_Move_Extend(TYP_FLOAT, false), EA_4BYTE, tmpReg, varNum, offs + 8);
+
+    // Read lower 8 bytes to targetReg
+    getEmitter()->emitIns_R_S(ins_Move_Extend(TYP_DOUBLE, false), EA_8BYTE, targetReg, varNum, offs);
+
+    // combine upper 4 bytes and lower 8 bytes in targetReg
+    getEmitter()->emitIns_R_R_I(INS_shufps, emitActualTypeSize(TYP_SIMD16), targetReg, tmpReg, 0x44);
+
+    genProduceReg(treeNode);
+}
+
+//-----------------------------------------------------------------------------
+// genSIMDIntrinsicUpperSave: save the upper half of a TYP_SIMD32 vector to
+//                            the given register, if any, or to memory.
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+// Notes:
+//    The upper half of all AVX registers is volatile, even the callee-save registers.
+//    When a 32-byte SIMD value is live across a call, the register allocator will use this intrinsic
+//    to cause the upper half to be saved.  It will first attempt to find another, unused, callee-save
+//    register.  If such a register cannot be found, it will save it to an available caller-save register.
+//    In that case, this node will be marked GTF_SPILL, which will cause genProduceReg to save the 16 byte
+//    value to the stack.  (Note that if there are no caller-save registers available, the entire 32 byte
+//    value will be spilled to the stack.)
+//
+void CodeGen::genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode)
+{
+    assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicUpperSave);
+
+    GenTree* op1 = simdNode->gtGetOp1();
+    assert(op1->IsLocal() && op1->TypeGet() == TYP_SIMD32);
+    regNumber targetReg = simdNode->gtRegNum;
+    regNumber op1Reg    = genConsumeReg(op1);
+    assert(op1Reg != REG_NA);
+    assert(targetReg != REG_NA);
+    getEmitter()->emitIns_R_R_I(INS_vextractf128, EA_32BYTE, targetReg, op1Reg, 0x01);
+
+    genProduceReg(simdNode);
+}
+
+//-----------------------------------------------------------------------------
+// genSIMDIntrinsicUpperRestore: Restore the upper half of a TYP_SIMD32 vector to
+//                               the given register, if any, or to memory.
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+// Notes:
+//    For consistency with genSIMDIntrinsicUpperSave, and to ensure that lclVar nodes always
+//    have their home register, this node has its targetReg on the lclVar child, and its source
+//    on the simdNode.
+//    Regarding spill, please see the note above on genSIMDIntrinsicUpperSave.  If we have spilled
+//    an upper-half to a caller save register, this node will be marked GTF_SPILLED.  However, unlike
+//    most spill scenarios, the saved tree will be different from the restored tree, but the spill
+//    restore logic, which is triggered by the call to genConsumeReg, requires us to provide the
+//    spilled tree (saveNode) in order to perform the reload.  We can easily find that tree,
+//    as it is in the spill descriptor for the register from which it was saved.
+//
+void CodeGen::genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode)
+{
+    assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicUpperRestore);
+
+    GenTree* op1 = simdNode->gtGetOp1();
+    assert(op1->IsLocal() && op1->TypeGet() == TYP_SIMD32);
+    regNumber srcReg    = simdNode->gtRegNum;
+    regNumber lclVarReg = genConsumeReg(op1);
+    unsigned  varNum    = op1->AsLclVarCommon()->gtLclNum;
+    assert(lclVarReg != REG_NA);
+    assert(srcReg != REG_NA);
+    if (simdNode->gtFlags & GTF_SPILLED)
+    {
+        GenTree* saveNode = regSet.rsSpillDesc[srcReg]->spillTree;
+        noway_assert(saveNode != nullptr && (saveNode->gtRegNum == srcReg));
+        genConsumeReg(saveNode);
+    }
+    getEmitter()->emitIns_R_R_I(INS_vinsertf128, EA_32BYTE, lclVarReg, srcReg, 0x01);
+}
+
+//------------------------------------------------------------------------
+// genSIMDIntrinsic: Generate code for a SIMD Intrinsic.  This is the main
+// routine which in turn calls apropriate genSIMDIntrinsicXXX() routine.
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+// Notes:
+//    Currently, we only recognize SIMDVector<float> and SIMDVector<int>, and
+//    a limited set of methods.
+//
+void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode)
+{
+    // NYI for unsupported base types
+    if (simdNode->gtSIMDBaseType != TYP_INT && simdNode->gtSIMDBaseType != TYP_LONG &&
+        simdNode->gtSIMDBaseType != TYP_FLOAT && simdNode->gtSIMDBaseType != TYP_DOUBLE &&
+        simdNode->gtSIMDBaseType != TYP_CHAR && simdNode->gtSIMDBaseType != TYP_UBYTE &&
+        simdNode->gtSIMDBaseType != TYP_SHORT && simdNode->gtSIMDBaseType != TYP_BYTE &&
+        simdNode->gtSIMDBaseType != TYP_UINT && simdNode->gtSIMDBaseType != TYP_ULONG)
+    {
+        noway_assert(!"SIMD intrinsic with unsupported base type.");
+    }
+
+    switch (simdNode->gtSIMDIntrinsicID)
+    {
+        case SIMDIntrinsicInit:
+            genSIMDIntrinsicInit(simdNode);
+            break;
+
+        case SIMDIntrinsicInitN:
+            genSIMDIntrinsicInitN(simdNode);
+            break;
+
+        case SIMDIntrinsicSqrt:
+        case SIMDIntrinsicCast:
+            genSIMDIntrinsicUnOp(simdNode);
+            break;
+
+        case SIMDIntrinsicAdd:
+        case SIMDIntrinsicSub:
+        case SIMDIntrinsicMul:
+        case SIMDIntrinsicDiv:
+        case SIMDIntrinsicBitwiseAnd:
+        case SIMDIntrinsicBitwiseAndNot:
+        case SIMDIntrinsicBitwiseOr:
+        case SIMDIntrinsicBitwiseXor:
+        case SIMDIntrinsicMin:
+        case SIMDIntrinsicMax:
+            genSIMDIntrinsicBinOp(simdNode);
+            break;
+
+        case SIMDIntrinsicOpEquality:
+        case SIMDIntrinsicOpInEquality:
+        case SIMDIntrinsicEqual:
+        case SIMDIntrinsicLessThan:
+        case SIMDIntrinsicGreaterThan:
+        case SIMDIntrinsicLessThanOrEqual:
+        case SIMDIntrinsicGreaterThanOrEqual:
+            genSIMDIntrinsicRelOp(simdNode);
+            break;
+
+        case SIMDIntrinsicDotProduct:
+            genSIMDIntrinsicDotProduct(simdNode);
+            break;
+
+        case SIMDIntrinsicGetItem:
+            genSIMDIntrinsicGetItem(simdNode);
+            break;
+
+        case SIMDIntrinsicShuffleSSE2:
+            genSIMDIntrinsicShuffleSSE2(simdNode);
+            break;
+
+        case SIMDIntrinsicSetX:
+        case SIMDIntrinsicSetY:
+        case SIMDIntrinsicSetZ:
+        case SIMDIntrinsicSetW:
+            genSIMDIntrinsicSetItem(simdNode);
+            break;
+
+        case SIMDIntrinsicUpperSave:
+            genSIMDIntrinsicUpperSave(simdNode);
+            break;
+        case SIMDIntrinsicUpperRestore:
+            genSIMDIntrinsicUpperRestore(simdNode);
+            break;
+
+        default:
+            noway_assert(!"Unimplemented SIMD intrinsic.");
+            unreached();
+    }
+}
+
+#endif // FEATURE_SIMD
+#endif //_TARGET_AMD64_
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/simdintrinsiclist.h b/src/jit/simdintrinsiclist.h
new file mode 100644
index 0000000000..a44fb9d0a1
--- /dev/null
+++ b/src/jit/simdintrinsiclist.h
@@ -0,0 +1,145 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+#ifndef SIMD_INTRINSIC
+#error Define SIMD_INTRINSIC before including this file
+#endif
+/*****************************************************************************/
+
+// clang-format off
+#ifdef FEATURE_SIMD
+
+    /*
+         Notes:
+            a) TYP_UNKNOWN means 'baseType' of SIMD vector which is not known apriori
+            b) Each method maps to a unique intrinsic Id
+            c) To facilitate argument types to be used as an array initializer, args are listed within "{}" braces.
+            d) Since comma is used as actual param seperator in a macro, TYP_UNDEF entries are added to keep param count constant.
+            e) TODO-Cleanup: when we plumb TYP_SIMD through front-end, replace TYP_STRUCT with TYP_SIMD.
+     */
+
+#ifdef _TARGET_AMD64_
+
+// Max number of parameters that we model in the table for SIMD intrinsic methods.
+#define SIMD_INTRINSIC_MAX_MODELED_PARAM_COUNT       3
+
+// Actual maximum number of parameters for any SIMD intrinsic method.
+// Constructors that take either N values, or a smaller Vector plus additional element values,
+// actually have more arguments than the "modeled" count.
+#define SIMD_INTRINSIC_MAX_PARAM_COUNT               5
+
+// Max number of base types supported by an intrinsic
+#define SIMD_INTRINSIC_MAX_BASETYPE_COUNT    10
+
+/***************************************************************************************************************************************************************************************************************************
+              Method Name,              Is Instance    Intrinsic Id,             Display Name,             return type,   Arg count,    Individual argument types                 SSE2 supported
+                                           Method                                                                                      (including implicit "this")                  base types
+ ***************************************************************************************************************************************************************************************************************************/
+SIMD_INTRINSIC(nullptr,                     false,       None,                     "None",                   TYP_UNDEF,      0,      {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF},     {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+
+SIMD_INTRINSIC("get_Count",                 false,       GetCount,                 "count",                  TYP_INT,        0,      {TYP_VOID, TYP_UNDEF, TYP_UNDEF},      {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("get_One",                   false,       GetOne,                   "one",                    TYP_STRUCT,     0,      {TYP_VOID, TYP_UNDEF, TYP_UNDEF},      {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("get_Zero",                  false,       GetZero,                  "zero",                   TYP_STRUCT,     0,      {TYP_VOID, TYP_UNDEF, TYP_UNDEF},      {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("get_AllOnes",               false,       GetAllOnes,               "allOnes",                TYP_STRUCT,     0,      {TYP_VOID, TYP_UNDEF, TYP_UNDEF},      {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+
+// .ctor call or newobj - there are four forms.
+// This form takes the object plus a value of the base (element) type:
+SIMD_INTRINSIC(".ctor",                     true,        Init,                     "init",                   TYP_VOID,       2,      {TYP_BYREF, TYP_UNKNOWN, TYP_UNDEF},   {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+// This form takes the object plus an array of the base (element) type:
+SIMD_INTRINSIC(".ctor",                     true,        InitArray,                "initArray",              TYP_VOID,       2,      {TYP_BYREF, TYP_REF,     TYP_UNDEF},   {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+// This form takes the object, an array of the base (element) type, and an index into the array:
+SIMD_INTRINSIC(".ctor",                     true,        InitArrayX,               "initArray",              TYP_VOID,       3,      {TYP_BYREF, TYP_REF,     TYP_INT  },   {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+// This form takes the object, and N values of the base (element) type.  The actual number of arguments depends upon the Vector size, which must be a fixed type such as Vector2f/3f/4f
+// Right now this intrinsic is supported only on fixed float vectors and hence the supported base types lists only TYP_FLOAT.
+// This is currently the intrinsic that has the largest maximum number of operands - if we add new fixed vector types
+// with more than 4 elements, the above SIMD_INTRINSIC_MAX_PARAM_COUNT will have to change.
+SIMD_INTRINSIC(".ctor",                     true,        InitN,                    "initN",                  TYP_VOID,       2,      {TYP_BYREF, TYP_UNKNOWN, TYP_UNKNOWN}, {TYP_FLOAT, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+// This form takes the object, a smaller fixed vector, and one or two additional arguments of the base type, e.g. Vector3 V = new Vector3(V2, x); where V2 is a Vector2, and x is a float.
+SIMD_INTRINSIC(".ctor",                     true,        InitFixed,                "initFixed",              TYP_VOID,       3,      {TYP_BYREF, TYP_STRUCT,  TYP_UNKNOWN}, {TYP_FLOAT, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+
+// Copy vector to an array
+SIMD_INTRINSIC("CopyTo",                    true,        CopyToArray,               "CopyToArray",           TYP_VOID,       2,      {TYP_BYREF, TYP_REF,     TYP_UNDEF},   {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("CopyTo",                    true,        CopyToArrayX,              "CopyToArray",           TYP_VOID,       3,      {TYP_BYREF, TYP_REF,     TYP_INT  },   {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+
+// Get operations
+SIMD_INTRINSIC("get_Item",                  true,        GetItem,                  "get[i]",                 TYP_UNKNOWN,    2,      {TYP_BYREF, TYP_INT,     TYP_UNDEF},   {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("get_X",                     true,        GetX,                     "getX",                   TYP_UNKNOWN,    1,      {TYP_BYREF, TYP_UNDEF,   TYP_UNDEF},   {TYP_FLOAT, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+SIMD_INTRINSIC("get_Y",                     true,        GetY,                     "getY",                   TYP_UNKNOWN,    1,      {TYP_BYREF, TYP_UNDEF,   TYP_UNDEF},   {TYP_FLOAT, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+SIMD_INTRINSIC("get_Z",                     true,        GetZ,                     "getZ",                   TYP_UNKNOWN,    1,      {TYP_BYREF, TYP_UNDEF,   TYP_UNDEF},   {TYP_FLOAT, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+SIMD_INTRINSIC("get_W",                     true,        GetW,                     "getW",                   TYP_UNKNOWN,    1,      {TYP_BYREF, TYP_UNDEF,   TYP_UNDEF},   {TYP_FLOAT, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+
+// Set operations
+SIMD_INTRINSIC("set_X",                     true,        SetX,                     "setX",                   TYP_VOID,       2,      {TYP_BYREF, TYP_UNKNOWN,   TYP_UNDEF},   {TYP_FLOAT, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+SIMD_INTRINSIC("set_Y",                     true,        SetY,                     "setY",                   TYP_VOID,       2,      {TYP_BYREF, TYP_UNKNOWN,   TYP_UNDEF},   {TYP_FLOAT, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+SIMD_INTRINSIC("set_Z",                     true,        SetZ,                     "setZ",                   TYP_VOID,       2,      {TYP_BYREF, TYP_UNKNOWN,   TYP_UNDEF},   {TYP_FLOAT, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+SIMD_INTRINSIC("set_W",                     true,        SetW,                     "setW",                   TYP_VOID,       2,      {TYP_BYREF, TYP_UNKNOWN,   TYP_UNDEF},   {TYP_FLOAT, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+
+// Object.Equals()
+SIMD_INTRINSIC("Equals",                    true,        InstEquals,               "equals",                 TYP_BOOL,       2,      {TYP_BYREF, TYP_STRUCT, TYP_UNDEF},    {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+
+// Operator == and !=
+SIMD_INTRINSIC("op_Equality",               false,       OpEquality,               "==",                     TYP_BOOL,       2,      {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF},   {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("op_Inequality",             false,       OpInEquality,             "!=",                     TYP_BOOL,       2,      {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF},   {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+
+// Arithmetic Operations
+SIMD_INTRINSIC("op_Addition",               false,       Add,                      "+",                      TYP_STRUCT,     2,      {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF},   {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("op_Subtraction",            false,       Sub,                      "-",                      TYP_STRUCT,     2,      {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF},   {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("op_Multiply",               false,       Mul,                      "*",                      TYP_STRUCT,     2,      {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF},   {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_SHORT,TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+SIMD_INTRINSIC("op_Division",               false,       Div,                      "/",                      TYP_STRUCT,     2,      {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF},   {TYP_FLOAT, TYP_DOUBLE, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+
+// Abs and SquareRoot are recognized as intrinsics only in case of float or double vectors
+SIMD_INTRINSIC("Abs",                       false,       Abs,                      "abs",                    TYP_STRUCT,     1,      {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF},    {TYP_FLOAT, TYP_DOUBLE, TYP_CHAR, TYP_UBYTE, TYP_UINT, TYP_ULONG, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+SIMD_INTRINSIC("SquareRoot",                false,       Sqrt,                     "sqrt",                   TYP_STRUCT,     1,      {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF},    {TYP_FLOAT, TYP_DOUBLE, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+
+// Min and max methods are recognized as intrinsics only in case of float or double vectors
+SIMD_INTRINSIC("Min",                       false,       Min,                      "min",                    TYP_STRUCT,     2,      {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF},   {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("Max",                       false,       Max,                      "max",                    TYP_STRUCT,     2,      {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF},   {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+
+// Vector Relational operators
+SIMD_INTRINSIC("Equals",                    false,       Equal,                    "eq",                     TYP_STRUCT,     2,      {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF},   {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("LessThan",                  false,       LessThan,                 "lt",                     TYP_STRUCT,     2,      {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF},   {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("LessThanOrEqual",           false,       LessThanOrEqual,          "le",                     TYP_STRUCT,     2,      {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF},   {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("GreaterThan",               false,       GreaterThan,              "gt",                     TYP_STRUCT,     2,      {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF},   {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("GreaterThanOrEqual",        false,       GreaterThanOrEqual,       "ge",                     TYP_STRUCT,     2,      {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF},   {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+
+// Bitwise operations
+SIMD_INTRINSIC("op_BitwiseAnd",             false,       BitwiseAnd,               "&",                      TYP_STRUCT,     2,      {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF},   {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("BitwiseAndNot",             false,       BitwiseAndNot,            "&~",                     TYP_STRUCT,     2,      {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF},   {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("op_BitwiseOr",              false,       BitwiseOr,                "|",                      TYP_STRUCT,     2,      {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF},   {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("op_ExclusiveOr",            false,       BitwiseXor,               "^",                      TYP_STRUCT,     2,      {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF},   {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+
+// Dot Product
+SIMD_INTRINSIC("Dot",                       false,       DotProduct,               "Dot",                    TYP_UNKNOWN,    2,      {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF},   {TYP_FLOAT, TYP_DOUBLE, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+
+// Select
+SIMD_INTRINSIC("ConditionalSelect",         false,       Select,                   "Select",                 TYP_STRUCT,     3,      {TYP_STRUCT, TYP_STRUCT, TYP_STRUCT},  {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+
+// Cast
+SIMD_INTRINSIC("op_Explicit",               false,       Cast,                     "Cast",                   TYP_STRUCT,     1,      {TYP_STRUCT, TYP_UNDEF,  TYP_UNDEF},   {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+
+// Miscellaneous
+SIMD_INTRINSIC("get_IsHardwareAccelerated", false,       HWAccel,                  "HWAccel",                TYP_BOOL,       0,      {TYP_UNDEF,  TYP_UNDEF,  TYP_UNDEF},   {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+
+// Shuffle and Shift operations - these are internal intrinsics as there is no corresponding managed method.
+// To prevent this being accidentally recognized as an intrinsic, all of the arg types and supported base types is made TYP_UNDEF
+SIMD_INTRINSIC("ShuffleSSE2",               false,       ShuffleSSE2,              "ShuffleSSE2",            TYP_STRUCT,     2,      {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF},     {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+
+// Internal, logical shift operations that shift the entire vector register instead of individual elements of the vector.
+SIMD_INTRINSIC("ShiftLeftInternal",         false,       ShiftLeftInternal,        "<< Internal",            TYP_STRUCT,     2,      {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF},     {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+SIMD_INTRINSIC("ShiftRightInternal",        false,       ShiftRightInternal,       ">> Internal",            TYP_STRUCT,     2,      {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF},     {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+
+// Internal intrinsics for saving & restoring the upper half of a vector register 
+SIMD_INTRINSIC("UpperSave",                 false,       UpperSave,                "UpperSave Internal",     TYP_STRUCT,     2,      {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF},     {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+SIMD_INTRINSIC("UpperRestore",              false,       UpperRestore,             "UpperRestore Internal",  TYP_STRUCT,     2,      {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF},     {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+
+SIMD_INTRINSIC(nullptr,                     false,       Invalid,                  "Invalid",                TYP_UNDEF,      0,      {TYP_UNDEF,  TYP_UNDEF,  TYP_UNDEF},   {TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+#undef SIMD_INTRINSIC
+
+#else //_TARGET_AMD64_
+#error SIMD intrinsics not defined for target arch
+#endif //!_TARGET_AMD64_
+
+#endif //FEATURE_SIMD
+// clang-format on
diff --git a/src/jit/sm.cpp b/src/jit/sm.cpp
new file mode 100644
index 0000000000..859b238ec8
--- /dev/null
+++ b/src/jit/sm.cpp
@@ -0,0 +1,190 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                 State machine used in the JIT                             XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "smcommon.cpp"
+
+//
+// The array to map from EE opcodes (i.e. CEE_ ) to state machine opcodes (i.e. SM_ )
+//
+const SM_OPCODE smOpcodeMap[] = {
+#define OPCODEMAP(eename, eestring, smname) smname,
+#include "smopcodemap.def"
+#undef OPCODEMAP
+};
+
+// ????????? How to make this method inlinable, since it refers to smOpcodeMap????
+/* static */ SM_OPCODE CodeSeqSM::MapToSMOpcode(OPCODE opcode)
+{
+    assert(opcode < CEE_COUNT);
+
+    SM_OPCODE smOpcode = smOpcodeMap[opcode];
+    assert(smOpcode < SM_COUNT);
+    return smOpcode;
+}
+
+void CodeSeqSM::Start(Compiler* comp)
+{
+    pComp          = comp;
+    States         = gp_SMStates;
+    JumpTableCells = gp_SMJumpTableCells;
+    StateWeights   = gp_StateWeights;
+    NativeSize     = 0;
+
+    Reset();
+}
+
+void CodeSeqSM::Reset()
+{
+    curState = SM_STATE_ID_START;
+
+#ifdef DEBUG
+    // Reset the state occurence counts
+    memset(StateMatchedCounts, 0, sizeof(StateMatchedCounts));
+#endif
+}
+
+void CodeSeqSM::End()
+{
+    if (States[curState].term)
+    {
+        TermStateMatch(curState DEBUGARG(pComp->verbose));
+    }
+}
+
+void CodeSeqSM::Run(SM_OPCODE opcode DEBUGARG(int level))
+{
+    SM_STATE_ID nextState;
+    SM_STATE_ID rollbackState;
+
+    SM_OPCODE opcodesToRevisit[MAX_CODE_SEQUENCE_LENGTH];
+
+    assert(level <= MAX_CODE_SEQUENCE_LENGTH);
+
+_Next:
+    nextState = GetDestState(curState, opcode);
+
+    if (nextState != 0)
+    {
+        // This is easy, Just go to the next state.
+        curState = nextState;
+        return;
+    }
+
+    assert(curState != SM_STATE_ID_START);
+
+    if (States[curState].term)
+    {
+        TermStateMatch(curState DEBUGARG(pComp->verbose));
+        curState = SM_STATE_ID_START;
+        goto _Next;
+    }
+
+    // This is hard. We need to rollback to the longest matched term state and restart from there.
+
+    rollbackState = States[curState].longestTermState;
+    TermStateMatch(rollbackState DEBUGARG(pComp->verbose));
+
+    assert(States[curState].length > States[rollbackState].length);
+
+    unsigned numOfOpcodesToRevisit = States[curState].length - States[rollbackState].length + 1;
+    assert(numOfOpcodesToRevisit > 1 &&
+           numOfOpcodesToRevisit <= MAX_CODE_SEQUENCE_LENGTH); // So it can fit in the local array opcodesToRevisit[]
+
+    SM_OPCODE* p = opcodesToRevisit + (numOfOpcodesToRevisit - 1);
+
+    *p = opcode;
+
+    // Fill in the local array:
+    for (unsigned i = 0; i < numOfOpcodesToRevisit - 1; ++i)
+    {
+        *(--p)   = States[curState].opc;
+        curState = States[curState].prevState;
+    }
+
+    assert(curState == rollbackState);
+
+    // Now revisit these opcodes, starting from SM_STATE_ID_START.
+    curState = SM_STATE_ID_START;
+    for (p = opcodesToRevisit; p < opcodesToRevisit + numOfOpcodesToRevisit; ++p)
+    {
+        Run(*p DEBUGARG(level + 1));
+    }
+}
+
+SM_STATE_ID CodeSeqSM::GetDestState(SM_STATE_ID srcState, SM_OPCODE opcode)
+{
+    assert(opcode < SM_COUNT);
+
+    JumpTableCell* pThisJumpTable = (JumpTableCell*)(((PBYTE)JumpTableCells) + States[srcState].jumpTableByteOffset);
+
+    JumpTableCell* cell = pThisJumpTable + opcode;
+
+    if (cell->srcState != srcState)
+    {
+        assert(cell->srcState == 0 ||
+               cell->srcState != srcState); // Either way means there is not outgoing edge from srcState.
+        return 0;
+    }
+    else
+    {
+        return cell->destState;
+    }
+}
+
+#ifdef DEBUG
+
+const char* CodeSeqSM::StateDesc(SM_STATE_ID stateID)
+{
+    static char      s_StateDesc[500];
+    static SM_OPCODE s_StateDescOpcodes[MAX_CODE_SEQUENCE_LENGTH];
+
+    if (stateID == 0)
+    {
+        return "invalid";
+    }
+    if (stateID == SM_STATE_ID_START)
+    {
+        return "start";
+    }
+    unsigned i = 0;
+
+    SM_STATE_ID b = stateID;
+
+    while (States[b].prevState != 0)
+    {
+        s_StateDescOpcodes[i] = States[b].opc;
+        b                     = States[b].prevState;
+        ++i;
+    }
+
+    assert(i == States[stateID].length && i > 0);
+
+    *s_StateDesc = 0;
+
+    while (--i > 0)
+    {
+        strcat(s_StateDesc, smOpcodeNames[s_StateDescOpcodes[i]]);
+        strcat(s_StateDesc, " -> ");
+    }
+
+    strcat(s_StateDesc, smOpcodeNames[s_StateDescOpcodes[0]]);
+
+    return s_StateDesc;
+}
+
+#endif // DEBUG
diff --git a/src/jit/sm.h b/src/jit/sm.h
new file mode 100644
index 0000000000..33d65092bb
--- /dev/null
+++ b/src/jit/sm.h
@@ -0,0 +1,75 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// State machine header used ONLY in the JIT.
+//
+
+#ifndef __sm_h__
+#define __sm_h__
+
+#include "smcommon.h"
+
+extern const SMState*       gp_SMStates;
+extern const JumpTableCell* gp_SMJumpTableCells;
+extern const short*         gp_StateWeights;
+
+class CodeSeqSM // Represent a particualr run of the state machine
+                // For example, it maintains the array of counts for the terminated states.
+                // These counts should be stored in per method based for them to be correct
+                // under multithreadeded environment.
+{
+public:
+    Compiler* pComp;
+
+    const SMState*       States;
+    const JumpTableCell* JumpTableCells;
+    const short*         StateWeights; // Weight for each state. Including non-terminate states.
+
+    SM_STATE_ID curState;
+
+    int NativeSize; // This is a signed integer!
+
+    void Start(Compiler* comp);
+    void Reset();
+    void End();
+    void Run(SM_OPCODE opcode DEBUGARG(int level));
+
+    SM_STATE_ID GetDestState(SM_STATE_ID srcState, SM_OPCODE opcode);
+
+    // Matched a termination state
+    inline void TermStateMatch(SM_STATE_ID stateID DEBUGARG(bool verbose))
+    {
+        assert(States[stateID].term);
+        assert(StateMatchedCounts[stateID] < _UI16_MAX);
+#ifdef DEBUG
+        ++StateMatchedCounts[stateID];
+#ifndef SMGEN_COMPILE
+        if (verbose)
+        {
+            printf("weight=%3d : state %3d [ %s ]\n", StateWeights[stateID], stateID, StateDesc(stateID));
+        }
+#endif // SMGEN_COMPILE
+#endif // DEBUG
+
+        NativeSize += StateWeights[stateID];
+    }
+
+    // Given an SM opcode retrieve the weight for this single opcode state.
+    // For example, ID for single opcode state SM_NOSHOW is 2.
+    inline short GetWeightForOpcode(SM_OPCODE opcode)
+    {
+        SM_STATE_ID stateID = ((SM_STATE_ID)opcode) + SM_STATE_ID_START + 1;
+        return StateWeights[stateID];
+    }
+
+#ifdef DEBUG
+    WORD        StateMatchedCounts[NUM_SM_STATES];
+    const char* StateDesc(SM_STATE_ID stateID);
+#endif
+
+    static SM_OPCODE MapToSMOpcode(OPCODE opcode);
+};
+
+#endif /* __sm_h__ */
diff --git a/src/jit/smallhash.h b/src/jit/smallhash.h
new file mode 100644
index 0000000000..71ea4a6269
--- /dev/null
+++ b/src/jit/smallhash.h
@@ -0,0 +1,592 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef _SMALLHASHTABLE_H_
+#define _SMALLHASHTABLE_H_
+
+//------------------------------------------------------------------------
+// HashTableInfo: a concept that provides equality and hashing methods for
+//                a particular key type. Used by HashTableBase and its
+//                subclasses.
+template <typename TKey>
+struct HashTableInfo
+{
+    // static bool Equals(const TKey& x, const TKey& y);
+    // static unsigned GetHashCode(const TKey& key);
+};
+
+//------------------------------------------------------------------------
+// HashTableInfo<TKey*>: specialized version of HashTableInfo for pointer-
+//                       typed keys.
+template <typename TKey>
+struct HashTableInfo<TKey*>
+{
+    static bool Equals(const TKey* x, const TKey* y)
+    {
+        return x == y;
+    }
+
+    static unsigned GetHashCode(const TKey* key)
+    {
+        // Shift off bits that are not likely to be significant
+        size_t keyval = reinterpret_cast<size_t>(key) >> ConstLog2<__alignof(TKey)>::value;
+
+        // Truncate and return the result
+        return static_cast<unsigned>(keyval);
+    }
+};
+
+//------------------------------------------------------------------------
+// HashTableBase: base type for HashTable and SmallHashTable. This class
+//                provides the vast majority of the implementation. The
+//                subclasses differ in the storage they use at the time of
+//                construction: HashTable allocates the initial bucket
+//                array on the heap; SmallHashTable contains a small inline
+//                array.
+//
+// This implementation is based on the ideas presented in Herlihy, Shavit,
+// and Tzafrir '08 (http://mcg.cs.tau.ac.il/papers/disc2008-hopscotch.pdf),
+// though it does not currently implement the hopscotch algorithm.
+//
+// The approach taken is intended to perform well in both space and speed.
+// This approach is a hybrid of separate chaining and open addressing with
+// linear probing: collisions are resolved using a bucket chain, but that
+// chain is stored in the bucket array itself.
+//
+// Resolving collisions using a bucket chain avoids the primary clustering
+// issue common in linearly-probed open addressed hash tables, while using
+// buckets as chain nodes avoids the allocaiton traffic typical of chained
+// tables. Applying the hopscotch algorithm in the aforementioned paper
+// could further improve performance by optimizing access patterns for
+// better cache usage.
+//
+// Template parameters:
+//    TKey     - The type of the table's keys.
+//    TValue   - The type of the table's values.
+//    TKeyInfo - A type that conforms to the HashTableInfo<TKey> concept.
+template <typename TKey, typename TValue, typename TKeyInfo = HashTableInfo<TKey>>
+class HashTableBase
+{
+    friend class KeyValuePair;
+    friend class Iterator;
+
+    enum : unsigned
+    {
+        InitialNumBuckets = 8
+    };
+
+protected:
+    //------------------------------------------------------------------------
+    // HashTableBase::Bucket: provides storage for the key-value pairs that
+    //                        make up the contents of the table.
+    //
+    // The "home" bucket for a particular key is the bucket indexed by the
+    // key's hash code modulo the size of the bucket array (the "home index").
+    //
+    // The home bucket is always considered to be part of the chain that it
+    // roots, even if it is also part of the chain rooted at a different
+    // bucket. `m_firstOffset` indicates the offset of the first non-home
+    // bucket in the home bucket's chain. If the `m_firstOffset` of a bucket
+    // is 0, the chain rooted at that bucket is empty.
+    //
+    // The index of the next bucket in a chain is calculated by adding the
+    // value in `m_nextOffset` to the index of the current bucket. If
+    // `m_nextOffset` is 0, the current bucket is the end of its chain. Each
+    // bucket in a chain must be occupied (i.e. `m_isFull` will be true).
+    struct Bucket
+    {
+        bool m_isFull; // True if the bucket is occupied; false otherwise.
+
+        unsigned m_firstOffset; // The offset to the first node in the chain for this bucket index.
+        unsigned m_nextOffset;  // The offset to the next node in the chain for this bucket index.
+
+        unsigned m_hash;  // The hash code for the element stored in this bucket.
+        TKey     m_key;   // The key for the element stored in this bucket.
+        TValue   m_value; // The value for the element stored in this bucket.
+    };
+
+private:
+    Compiler* m_compiler;       // The compiler context to use for allocations.
+    Bucket*   m_buckets;        // The bucket array.
+    unsigned  m_numBuckets;     // The number of buckets in the bucket array.
+    unsigned  m_numFullBuckets; // The number of occupied buckets.
+
+    //------------------------------------------------------------------------
+    // HashTableBase::Insert: inserts a key-value pair into a bucket array.
+    //
+    // Arguments:
+    //    buckets    - The bucket array in which to insert the key-value pair.
+    //    numBuckets - The number of buckets in the bucket array.
+    //    hash       - The hash code of the key to insert.
+    //    key        - The key to insert.
+    //    value      - The value to insert.
+    //
+    // Returns:
+    //    True if the key-value pair was successfully inserted; false
+    //    otherwise.
+    static bool Insert(Bucket* buckets, unsigned numBuckets, unsigned hash, const TKey& key, const TValue& value)
+    {
+        const unsigned mask      = numBuckets - 1;
+        unsigned       homeIndex = hash & mask;
+
+        Bucket* home = &buckets[homeIndex];
+        if (!home->m_isFull)
+        {
+            // The home bucket is empty; use it.
+            //
+            // Note that `m_firstOffset` does not need to be updated: whether or not it is non-zero,
+            // it is already correct, since we're inserting at the head of the list. `m_nextOffset`
+            // must be 0, however, since this node should not be part of a list.
+            assert(home->m_nextOffset == 0);
+
+            home->m_isFull = true;
+            home->m_hash   = hash;
+            home->m_key    = key;
+            home->m_value  = value;
+            return true;
+        }
+
+        // If the home bucket is full, probe to find the next empty bucket.
+        unsigned precedingIndexInChain = homeIndex;
+        unsigned nextIndexInChain      = (homeIndex + home->m_firstOffset) & mask;
+        for (unsigned j = 1; j < numBuckets; j++)
+        {
+            unsigned bucketIndex = (homeIndex + j) & mask;
+            Bucket*  bucket      = &buckets[bucketIndex];
+            if (bucketIndex == nextIndexInChain)
+            {
+                assert(bucket->m_isFull);
+                precedingIndexInChain = bucketIndex;
+                nextIndexInChain      = (bucketIndex + bucket->m_nextOffset) & mask;
+            }
+            else if (!bucket->m_isFull)
+            {
+                bucket->m_isFull = true;
+                if (precedingIndexInChain == nextIndexInChain)
+                {
+                    bucket->m_nextOffset = 0;
+                }
+                else
+                {
+                    assert(((nextIndexInChain - bucketIndex) & mask) > 0);
+                    bucket->m_nextOffset = (nextIndexInChain - bucketIndex) & mask;
+                }
+
+                unsigned offset = (bucketIndex - precedingIndexInChain) & mask;
+                assert(offset != 0);
+
+                if (precedingIndexInChain == homeIndex)
+                {
+                    buckets[precedingIndexInChain].m_firstOffset = offset;
+                }
+                else
+                {
+                    buckets[precedingIndexInChain].m_nextOffset = offset;
+                }
+
+                bucket->m_hash  = hash;
+                bucket->m_key   = key;
+                bucket->m_value = value;
+                return true;
+            }
+        }
+
+        // No more free buckets.
+        return false;
+    }
+
+    //------------------------------------------------------------------------
+    // HashTableBase::TryGetBucket: attempts to get the bucket that holds a
+    //                              particular key.
+    //
+    // Arguments:
+    //    hash           - The hash code of the key to find.
+    //    key            - The key to find.
+    //    precedingIndex - An output parameter that will hold the index of the
+    //                     preceding bucket in the chain for the key. May be
+    //                     equal to `bucketIndex` if the key is stored in its
+    //                     home bucket.
+    //    bucketIndex    - An output parameter that will hold the index of the
+    //                     bucket that stores the key.
+    //
+    // Returns:
+    //    True if the key was successfully found; false otherwise.
+    bool TryGetBucket(unsigned hash, const TKey& key, unsigned* precedingIndex, unsigned* bucketIndex) const
+    {
+        if (m_numBuckets == 0)
+        {
+            return false;
+        }
+
+        const unsigned mask  = m_numBuckets - 1;
+        unsigned       index = hash & mask;
+
+        Bucket* bucket = &m_buckets[index];
+        if (bucket->m_isFull && bucket->m_hash == hash && TKeyInfo::Equals(bucket->m_key, key))
+        {
+            *precedingIndex = index;
+            *bucketIndex    = index;
+            return true;
+        }
+
+        for (unsigned offset = bucket->m_firstOffset; offset != 0; offset = bucket->m_nextOffset)
+        {
+            unsigned precedingIndexInChain = index;
+
+            index  = (index + offset) & mask;
+            bucket = &m_buckets[index];
+
+            assert(bucket->m_isFull);
+            if (bucket->m_hash == hash && TKeyInfo::Equals(bucket->m_key, key))
+            {
+                *precedingIndex = precedingIndexInChain;
+                *bucketIndex    = index;
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+    //------------------------------------------------------------------------
+    // HashTableBase::Resize: allocates a new bucket array twice the size of
+    //                        the current array and copies the key-value pairs
+    //                        from the current bucket array into the new array.
+    void Resize()
+    {
+        Bucket* currentBuckets = m_buckets;
+
+        unsigned newNumBuckets = m_numBuckets == 0 ? InitialNumBuckets : m_numBuckets * 2;
+        size_t   allocSize     = sizeof(Bucket) * newNumBuckets;
+        assert((sizeof(Bucket) * m_numBuckets) < allocSize);
+
+        auto* newBuckets = reinterpret_cast<Bucket*>(m_compiler->compGetMem(allocSize));
+        memset(newBuckets, 0, allocSize);
+
+        for (unsigned currentIndex = 0; currentIndex < m_numBuckets; currentIndex++)
+        {
+            Bucket* currentBucket = &currentBuckets[currentIndex];
+            if (!currentBucket->m_isFull)
+            {
+                continue;
+            }
+
+            bool inserted =
+                Insert(newBuckets, newNumBuckets, currentBucket->m_hash, currentBucket->m_key, currentBucket->m_value);
+            (assert(inserted), (void)inserted);
+        }
+
+        m_numBuckets = newNumBuckets;
+        m_buckets    = newBuckets;
+    }
+
+protected:
+    HashTableBase(Compiler* compiler, Bucket* buckets, unsigned numBuckets)
+        : m_compiler(compiler), m_buckets(buckets), m_numBuckets(numBuckets), m_numFullBuckets(0)
+    {
+        assert(compiler != nullptr);
+
+        if (numBuckets > 0)
+        {
+            assert((numBuckets & (numBuckets - 1)) == 0); // Size must be a power of 2
+            assert(m_buckets != nullptr);
+
+            memset(m_buckets, 0, sizeof(Bucket) * numBuckets);
+        }
+    }
+
+public:
+#ifdef DEBUG
+    class Iterator;
+
+    class KeyValuePair final
+    {
+        friend class HashTableBase<TKey, TValue, TKeyInfo>::Iterator;
+
+        Bucket* m_bucket;
+
+        KeyValuePair(Bucket* bucket) : m_bucket(bucket)
+        {
+            assert(m_bucket != nullptr);
+        }
+
+    public:
+        KeyValuePair() : m_bucket(nullptr)
+        {
+        }
+
+        inline TKey& Key()
+        {
+            return m_bucket->m_key;
+        }
+
+        inline TValue& Value()
+        {
+            return m_bucket->m_value;
+        }
+    };
+
+    // NOTE: HashTableBase only provides iterators in debug builds because the order in which
+    // the iterator type produces values is undefined (e.g. it is not related to the order in
+    // which key-value pairs were inserted).
+    class Iterator final
+    {
+        friend class HashTableBase<TKey, TValue, TKeyInfo>;
+
+        Bucket*  m_buckets;
+        unsigned m_numBuckets;
+        unsigned m_index;
+
+        Iterator(Bucket* buckets, unsigned numBuckets, unsigned index)
+            : m_buckets(buckets), m_numBuckets(numBuckets), m_index(index)
+        {
+            assert((buckets != nullptr) || (numBuckets == 0));
+            assert(index <= numBuckets);
+
+            // Advance to the first occupied bucket
+            while (m_index != m_numBuckets && !m_buckets[m_index].m_isFull)
+            {
+                m_index++;
+            }
+        }
+
+    public:
+        Iterator() : m_buckets(nullptr), m_numBuckets(0), m_index(0)
+        {
+        }
+
+        KeyValuePair operator*() const
+        {
+            if (m_index >= m_numBuckets)
+            {
+                return KeyValuePair();
+            }
+
+            Bucket* bucket = &m_buckets[m_index];
+            assert(bucket->m_isFull);
+            return KeyValuePair(bucket);
+        }
+
+        KeyValuePair operator->() const
+        {
+            return this->operator*();
+        }
+
+        bool operator==(const Iterator& other) const
+        {
+            return (m_buckets == other.m_buckets) && (m_index == other.m_index);
+        }
+
+        bool operator!=(const Iterator& other) const
+        {
+            return (m_buckets != other.m_buckets) || (m_index != other.m_index);
+        }
+
+        Iterator& operator++()
+        {
+            do
+            {
+                m_index++;
+            } while (m_index != m_numBuckets && !m_buckets[m_index].m_isFull);
+
+            return *this;
+        }
+    };
+
+    Iterator begin() const
+    {
+        return Iterator(m_buckets, m_numBuckets, 0);
+    }
+
+    Iterator end() const
+    {
+        return Iterator(m_buckets, m_numBuckets, m_numBuckets);
+    }
+#endif // DEBUG
+
+    unsigned Count() const
+    {
+        return m_numFullBuckets;
+    }
+
+    void Clear()
+    {
+        if (m_numBuckets > 0)
+        {
+            memset(m_buckets, 0, sizeof(Bucket) * m_numBuckets);
+            m_numFullBuckets = 0;
+        }
+    }
+
+    //------------------------------------------------------------------------
+    // HashTableBase::AddOrUpdate: adds a key-value pair to the hash table if
+    //                             the key does not already exist in the
+    //                             table, or updates the value if the key
+    //                             already exists.
+    //
+    // Arguments:
+    //    key   - The key for which to add or update a value.
+    //    value - The value.
+    //
+    // Returns:
+    //    True if the value was added; false if it was updated.
+    bool AddOrUpdate(const TKey& key, const TValue& value)
+    {
+        unsigned hash = TKeyInfo::GetHashCode(key);
+
+        unsigned unused, index;
+        if (TryGetBucket(hash, key, &unused, &index))
+        {
+            m_buckets[index].m_value = value;
+            return false;
+        }
+
+        // If the load is greater than 0.8, resize the table before inserting.
+        if ((m_numFullBuckets * 5) >= (m_numBuckets * 4))
+        {
+            Resize();
+        }
+
+        bool inserted = Insert(m_buckets, m_numBuckets, hash, key, value);
+        (assert(inserted), (void)inserted);
+
+        m_numFullBuckets++;
+
+        return true;
+    }
+
+    //------------------------------------------------------------------------
+    // HashTableBase::TryRemove: removes a key from the hash table and returns
+    //                           its value if the key exists in the table.
+    //
+    // Arguments:
+    //    key   - The key to remove from the table.
+    //    value - An output parameter that will hold the value for the removed
+    //            key.
+    //
+    // Returns:
+    //    True if the key was removed from the table; false otherwise.
+    bool TryRemove(const TKey& key, TValue* value)
+    {
+        unsigned hash = TKeyInfo::GetHashCode(key);
+
+        unsigned precedingIndexInChain, bucketIndex;
+        if (!TryGetBucket(hash, key, &precedingIndexInChain, &bucketIndex))
+        {
+            return false;
+        }
+
+        Bucket* bucket = &m_buckets[bucketIndex];
+
+        if (precedingIndexInChain != bucketIndex)
+        {
+            const unsigned mask      = m_numBuckets - 1;
+            unsigned       homeIndex = hash & mask;
+
+            unsigned nextOffset;
+            if (bucket->m_nextOffset == 0)
+            {
+                nextOffset = 0;
+            }
+            else
+            {
+                unsigned nextIndexInChain = (bucketIndex + bucket->m_nextOffset) & mask;
+                nextOffset                = (nextIndexInChain - precedingIndexInChain) & mask;
+            }
+
+            if (precedingIndexInChain == homeIndex)
+            {
+                m_buckets[precedingIndexInChain].m_firstOffset = nextOffset;
+            }
+            else
+            {
+                m_buckets[precedingIndexInChain].m_nextOffset = nextOffset;
+            }
+        }
+
+        bucket->m_isFull     = false;
+        bucket->m_nextOffset = 0;
+
+        m_numFullBuckets--;
+
+        *value = bucket->m_value;
+        return true;
+    }
+
+    //------------------------------------------------------------------------
+    // HashTableBase::TryGetValue: retrieves the value for a key if the key
+    //                             exists in the table.
+    //
+    // Arguments:
+    //    key   - The key to find from the table.
+    //    value - An output parameter that will hold the value for the key.
+    //
+    // Returns:
+    //    True if the key was found in the table; false otherwise.
+    bool TryGetValue(const TKey& key, TValue* value) const
+    {
+        unsigned unused, index;
+        if (!TryGetBucket(TKeyInfo::GetHashCode(key), key, &unused, &index))
+        {
+            return false;
+        }
+
+        *value = m_buckets[index].m_value;
+        return true;
+    }
+};
+
+//------------------------------------------------------------------------
+// HashTable: a simple subclass of `HashTableBase` that always uses heap
+//            storage for its bucket array.
+template <typename TKey, typename TValue, typename TKeyInfo = HashTableInfo<TKey>>
+class HashTable final : public HashTableBase<TKey, TValue, TKeyInfo>
+{
+    typedef HashTableBase<TKey, TValue, TKeyInfo> TBase;
+
+    static unsigned RoundUp(unsigned initialSize)
+    {
+        return 1 << genLog2(initialSize);
+    }
+
+public:
+    HashTable(Compiler* compiler) : TBase(compiler, nullptr, 0)
+    {
+    }
+
+    HashTable(Compiler* compiler, unsigned initialSize)
+        : TBase(compiler,
+                reinterpret_cast<typename TBase::Bucket*>(
+                    compiler->compGetMem(RoundUp(initialSize) * sizeof(typename TBase::Bucket))),
+                RoundUp(initialSize))
+    {
+    }
+};
+
+//------------------------------------------------------------------------
+// SmallHashTable: an alternative to `HashTable` that stores the initial
+//                 bucket array inline. Most useful for situations where
+//                 the number of key-value pairs that will be stored in
+//                 the map at any given time falls below a certain
+//                 threshold. Switches to heap storage once the initial
+//                 inline storage is exhausted.
+template <typename TKey, typename TValue, unsigned NumInlineBuckets = 8, typename TKeyInfo = HashTableInfo<TKey>>
+class SmallHashTable final : public HashTableBase<TKey, TValue, TKeyInfo>
+{
+    typedef HashTableBase<TKey, TValue, TKeyInfo> TBase;
+
+    enum : unsigned
+    {
+        RoundedNumInlineBuckets = 1 << ConstLog2<NumInlineBuckets>::value
+    };
+
+    typename TBase::Bucket m_inlineBuckets[RoundedNumInlineBuckets];
+
+public:
+    SmallHashTable(Compiler* compiler) : TBase(compiler, m_inlineBuckets, RoundedNumInlineBuckets)
+    {
+    }
+};
+
+#endif // _SMALLHASHTABLE_H_
diff --git a/src/jit/smcommon.cpp b/src/jit/smcommon.cpp
new file mode 100644
index 0000000000..d17e21b874
--- /dev/null
+++ b/src/jit/smcommon.cpp
@@ -0,0 +1,166 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#if defined(DEBUG) || defined(SMGEN_COMPILE)
+
+//
+// The array of state-machine-opcode names
+//
+const char* const smOpcodeNames[] = {
+#define SMOPDEF(smname, string) string,
+#include "smopcode.def"
+#undef SMOPDEF
+};
+
+//
+// The code sequences the state machine will look for.
+//
+
+const SM_OPCODE s_CodeSeqs[][MAX_CODE_SEQUENCE_LENGTH] = {
+
+#define SMOPDEF(smname, string) {smname, CODE_SEQUENCE_END},
+// ==== Single opcode states ====
+#include "smopcode.def"
+#undef SMOPDEF
+
+    // ==== Legel prefixed opcode sequences ====
+    {SM_CONSTRAINED, SM_CALLVIRT, CODE_SEQUENCE_END},
+
+    // ==== Interesting patterns ====
+
+    // Fetching of object field
+    {SM_LDARG_0, SM_LDFLD, CODE_SEQUENCE_END},
+    {SM_LDARG_1, SM_LDFLD, CODE_SEQUENCE_END},
+    {SM_LDARG_2, SM_LDFLD, CODE_SEQUENCE_END},
+    {SM_LDARG_3, SM_LDFLD, CODE_SEQUENCE_END},
+
+    // Fetching of struct field
+    {SM_LDARGA_S, SM_LDFLD, CODE_SEQUENCE_END},
+    {SM_LDLOCA_S, SM_LDFLD, CODE_SEQUENCE_END},
+
+    // Fetching of struct field from a normed struct
+    {SM_LDARGA_S_NORMED, SM_LDFLD, CODE_SEQUENCE_END},
+    {SM_LDLOCA_S_NORMED, SM_LDFLD, CODE_SEQUENCE_END},
+
+    // stloc/ldloc --> dup
+    {SM_STLOC_0, SM_LDLOC_0, CODE_SEQUENCE_END},
+    {SM_STLOC_1, SM_LDLOC_1, CODE_SEQUENCE_END},
+    {SM_STLOC_2, SM_LDLOC_2, CODE_SEQUENCE_END},
+    {SM_STLOC_3, SM_LDLOC_3, CODE_SEQUENCE_END},
+
+    // FPU operations
+    {SM_LDC_R4, SM_ADD, CODE_SEQUENCE_END},
+    {SM_LDC_R4, SM_SUB, CODE_SEQUENCE_END},
+    {SM_LDC_R4, SM_MUL, CODE_SEQUENCE_END},
+    {SM_LDC_R4, SM_DIV, CODE_SEQUENCE_END},
+
+    {SM_LDC_R8, SM_ADD, CODE_SEQUENCE_END},
+    {SM_LDC_R8, SM_SUB, CODE_SEQUENCE_END},
+    {SM_LDC_R8, SM_MUL, CODE_SEQUENCE_END},
+    {SM_LDC_R8, SM_DIV, CODE_SEQUENCE_END},
+
+    {SM_CONV_R4, SM_ADD, CODE_SEQUENCE_END},
+    {SM_CONV_R4, SM_SUB, CODE_SEQUENCE_END},
+    {SM_CONV_R4, SM_MUL, CODE_SEQUENCE_END},
+    {SM_CONV_R4, SM_DIV, CODE_SEQUENCE_END},
+
+    // {SM_CONV_R8,       SM_ADD,        CODE_SEQUENCE_END},  // Removed since it collides with ldelem.r8 in
+    // Math.InternalRound
+    // {SM_CONV_R8,       SM_SUB,        CODE_SEQUENCE_END},  // Just remove the SM_SUB as well.
+    {SM_CONV_R8, SM_MUL, CODE_SEQUENCE_END},
+    {SM_CONV_R8, SM_DIV, CODE_SEQUENCE_END},
+
+    /* Constant init constructor:
+        L_0006: ldarg.0
+        L_0007: ldc.r8 0
+        L_0010: stfld float64 raytracer.Vec::x
+    */
+
+    {SM_LDARG_0, SM_LDC_I4_0, SM_STFLD, CODE_SEQUENCE_END},
+    {SM_LDARG_0, SM_LDC_R4, SM_STFLD, CODE_SEQUENCE_END},
+    {SM_LDARG_0, SM_LDC_R8, SM_STFLD, CODE_SEQUENCE_END},
+
+    /* Copy constructor:
+        L_0006: ldarg.0
+        L_0007: ldarg.1
+        L_0008: ldfld float64 raytracer.Vec::x
+        L_000d: stfld float64 raytracer.Vec::x
+    */
+
+    {SM_LDARG_0, SM_LDARG_1, SM_LDFLD, SM_STFLD, CODE_SEQUENCE_END},
+
+    /* Field setter:
+
+        [DebuggerNonUserCode]
+        private void CtorClosed(object target, IntPtr methodPtr)
+        {
+            if (target == null)
+            {
+                this.ThrowNullThisInDelegateToInstance();
+            }
+            base._target = target;
+            base._methodPtr = methodPtr;
+        }
+
+
+        .method private hidebysig instance void CtorClosed(object target, native int methodPtr) cil managed
+        {
+            .custom instance void System.Diagnostics.DebuggerNonUserCodeAttribute::.ctor()
+            .maxstack 8
+            L_0000: ldarg.1
+            L_0001: brtrue.s L_0009
+            L_0003: ldarg.0
+            L_0004: call instance void System.MulticastDelegate::ThrowNullThisInDelegateToInstance()
+
+            L_0009: ldarg.0
+            L_000a: ldarg.1
+            L_000b: stfld object System.Delegate::_target
+
+            L_0010: ldarg.0
+            L_0011: ldarg.2
+            L_0012: stfld native int System.Delegate::_methodPtr
+
+            L_0017: ret
+        }
+    */
+
+    {SM_LDARG_0, SM_LDARG_1, SM_STFLD, CODE_SEQUENCE_END},
+    {SM_LDARG_0, SM_LDARG_2, SM_STFLD, CODE_SEQUENCE_END},
+    {SM_LDARG_0, SM_LDARG_3, SM_STFLD, CODE_SEQUENCE_END},
+
+    /* Scale operator:
+
+        L_0000: ldarg.0
+        L_0001: dup
+        L_0002: ldfld float64 raytracer.Vec::x
+        L_0007: ldarg.1
+        L_0008: mul
+        L_0009: stfld float64 raytracer.Vec::x
+    */
+
+    {SM_LDARG_0, SM_DUP, SM_LDFLD, SM_LDARG_1, SM_ADD, SM_STFLD, CODE_SEQUENCE_END},
+    {SM_LDARG_0, SM_DUP, SM_LDFLD, SM_LDARG_1, SM_SUB, SM_STFLD, CODE_SEQUENCE_END},
+    {SM_LDARG_0, SM_DUP, SM_LDFLD, SM_LDARG_1, SM_MUL, SM_STFLD, CODE_SEQUENCE_END},
+    {SM_LDARG_0, SM_DUP, SM_LDFLD, SM_LDARG_1, SM_DIV, SM_STFLD, CODE_SEQUENCE_END},
+
+    /* Add operator
+        L_0000: ldarg.0
+        L_0001: ldfld float64 raytracer.Vec::x
+        L_0006: ldarg.1
+        L_0007: ldfld float64 raytracer.Vec::x
+        L_000c: add
+    */
+
+    {SM_LDARG_0, SM_LDFLD, SM_LDARG_1, SM_LDFLD, SM_ADD, CODE_SEQUENCE_END},
+    {SM_LDARG_0, SM_LDFLD, SM_LDARG_1, SM_LDFLD, SM_SUB, CODE_SEQUENCE_END},
+    // No need for mul and div since there is no mathemetical meaning of it.
+
+    {SM_LDARGA_S, SM_LDFLD, SM_LDARGA_S, SM_LDFLD, SM_ADD, CODE_SEQUENCE_END},
+    {SM_LDARGA_S, SM_LDFLD, SM_LDARGA_S, SM_LDFLD, SM_SUB, CODE_SEQUENCE_END},
+    // No need for mul and div since there is no mathemetical meaning of it.
+
+    // The end:
+    {CODE_SEQUENCE_END}};
+
+#endif // defined(DEBUG) || defined(SMGEN_COMPILE)
diff --git a/src/jit/smcommon.h b/src/jit/smcommon.h
new file mode 100644
index 0000000000..0c33e05a7b
--- /dev/null
+++ b/src/jit/smcommon.h
@@ -0,0 +1,50 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// Common headers used both in smgen.exe and the JIT.
+//
+
+#ifndef __sm_common_h__
+#define __sm_common_h__
+
+#include "smopenum.h"
+
+#define NUM_SM_STATES 250
+
+typedef BYTE SM_STATE_ID;
+
+static_assert_no_msg(sizeof(SM_STATE_ID) == 1); // To conserve memory, we don't want to have more than 256 states.
+
+#define SM_STATE_ID_START 1
+
+static_assert_no_msg(SM_STATE_ID_START == 1); // Make sure nobody changes it. We rely on this to map the SM_OPCODE
+                                              // to single-opcode states. For example, in GetWeightForOpcode().
+
+struct JumpTableCell
+{
+    SM_STATE_ID srcState;
+    SM_STATE_ID destState;
+};
+
+struct SMState
+{
+    bool        term;             // does this state terminate a code sequence?
+    BYTE        length;           // the length of currently matched opcodes
+    SM_STATE_ID longestTermState; // the ID of the longest matched terminate state
+
+    SM_STATE_ID prevState; // previous state
+    SM_OPCODE   opc;       // opcode that leads from the previous state to current state
+
+    unsigned short jumpTableByteOffset;
+};
+
+//
+// Code sequences
+//
+
+#define MAX_CODE_SEQUENCE_LENGTH 7
+#define CODE_SEQUENCE_END ((SM_OPCODE)(SM_COUNT + 1))
+
+#endif /* __sm_common_h__ */
diff --git a/src/jit/smdata.cpp b/src/jit/smdata.cpp
new file mode 100644
index 0000000000..9fe00d4984
--- /dev/null
+++ b/src/jit/smdata.cpp
@@ -0,0 +1,705 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+//
+//   Automatically generated code. DO NOT MODIFY!
+//   To generate this file. Do "smgen.exe > SMData.cpp"
+//
+// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+#include "jitpch.h"
+//
+// States in the state machine
+//
+// clang-format off
+const SMState g_SMStates[] = 
+{
+ // {term, len, lng, prev, SMOpcode and SMOpcodeName           , offsets  }           //  state ID and name
+    {   0,   0,   0,    0, (SM_OPCODE)  0 /* noshow          */,       0  },          //  state 0 [invalid]
+    {   0,   0,   0,    0, (SM_OPCODE)  0 /* noshow          */,       0  },          //  state 1 [start]
+    {   1,   1,   0,    1, (SM_OPCODE)  0 /* noshow          */,       0  },          //  state 2 [noshow]
+    {   1,   1,   0,    1, (SM_OPCODE)  1 /* ldarg.0         */,     372  },          //  state 3 [ldarg.0]
+    {   1,   1,   0,    1, (SM_OPCODE)  2 /* ldarg.1         */,     168  },          //  state 4 [ldarg.1]
+    {   1,   1,   0,    1, (SM_OPCODE)  3 /* ldarg.2         */,     170  },          //  state 5 [ldarg.2]
+    {   1,   1,   0,    1, (SM_OPCODE)  4 /* ldarg.3         */,     172  },          //  state 6 [ldarg.3]
+    {   1,   1,   0,    1, (SM_OPCODE)  5 /* ldloc.0         */,       0  },          //  state 7 [ldloc.0]
+    {   1,   1,   0,    1, (SM_OPCODE)  6 /* ldloc.1         */,       0  },          //  state 8 [ldloc.1]
+    {   1,   1,   0,    1, (SM_OPCODE)  7 /* ldloc.2         */,       0  },          //  state 9 [ldloc.2]
+    {   1,   1,   0,    1, (SM_OPCODE)  8 /* ldloc.3         */,       0  },          //  state 10 [ldloc.3]
+    {   1,   1,   0,    1, (SM_OPCODE)  9 /* stloc.0         */,     378  },          //  state 11 [stloc.0]
+    {   1,   1,   0,    1, (SM_OPCODE) 10 /* stloc.1         */,     378  },          //  state 12 [stloc.1]
+    {   1,   1,   0,    1, (SM_OPCODE) 11 /* stloc.2         */,     378  },          //  state 13 [stloc.2]
+    {   1,   1,   0,    1, (SM_OPCODE) 12 /* stloc.3         */,     378  },          //  state 14 [stloc.3]
+    {   1,   1,   0,    1, (SM_OPCODE) 13 /* ldarg.s         */,       0  },          //  state 15 [ldarg.s]
+    {   1,   1,   0,    1, (SM_OPCODE) 14 /* ldarga.s        */,     182  },          //  state 16 [ldarga.s]
+    {   1,   1,   0,    1, (SM_OPCODE) 15 /* starg.s         */,       0  },          //  state 17 [starg.s]
+    {   1,   1,   0,    1, (SM_OPCODE) 16 /* ldloc.s         */,       0  },          //  state 18 [ldloc.s]
+    {   1,   1,   0,    1, (SM_OPCODE) 17 /* ldloca.s        */,     184  },          //  state 19 [ldloca.s]
+    {   1,   1,   0,    1, (SM_OPCODE) 18 /* stloc.s         */,       0  },          //  state 20 [stloc.s]
+    {   1,   1,   0,    1, (SM_OPCODE) 19 /* ldnull          */,       0  },          //  state 21 [ldnull]
+    {   1,   1,   0,    1, (SM_OPCODE) 20 /* ldc.i4.m1       */,       0  },          //  state 22 [ldc.i4.m1]
+    {   1,   1,   0,    1, (SM_OPCODE) 21 /* ldc.i4.0        */,       0  },          //  state 23 [ldc.i4.0]
+    {   1,   1,   0,    1, (SM_OPCODE) 22 /* ldc.i4.1        */,       0  },          //  state 24 [ldc.i4.1]
+    {   1,   1,   0,    1, (SM_OPCODE) 23 /* ldc.i4.2        */,       0  },          //  state 25 [ldc.i4.2]
+    {   1,   1,   0,    1, (SM_OPCODE) 24 /* ldc.i4.3        */,       0  },          //  state 26 [ldc.i4.3]
+    {   1,   1,   0,    1, (SM_OPCODE) 25 /* ldc.i4.4        */,       0  },          //  state 27 [ldc.i4.4]
+    {   1,   1,   0,    1, (SM_OPCODE) 26 /* ldc.i4.5        */,       0  },          //  state 28 [ldc.i4.5]
+    {   1,   1,   0,    1, (SM_OPCODE) 27 /* ldc.i4.6        */,       0  },          //  state 29 [ldc.i4.6]
+    {   1,   1,   0,    1, (SM_OPCODE) 28 /* ldc.i4.7        */,       0  },          //  state 30 [ldc.i4.7]
+    {   1,   1,   0,    1, (SM_OPCODE) 29 /* ldc.i4.8        */,       0  },          //  state 31 [ldc.i4.8]
+    {   1,   1,   0,    1, (SM_OPCODE) 30 /* ldc.i4.s        */,       0  },          //  state 32 [ldc.i4.s]
+    {   1,   1,   0,    1, (SM_OPCODE) 31 /* ldc.i4          */,       0  },          //  state 33 [ldc.i4]
+    {   1,   1,   0,    1, (SM_OPCODE) 32 /* ldc.i8          */,       0  },          //  state 34 [ldc.i8]
+    {   1,   1,   0,    1, (SM_OPCODE) 33 /* ldc.r4          */,     252  },          //  state 35 [ldc.r4]
+    {   1,   1,   0,    1, (SM_OPCODE) 34 /* ldc.r8          */,     268  },          //  state 36 [ldc.r8]
+    {   1,   1,   0,    1, (SM_OPCODE) 35 /* unused          */,       0  },          //  state 37 [unused]
+    {   1,   1,   0,    1, (SM_OPCODE) 36 /* dup             */,       0  },          //  state 38 [dup]
+    {   1,   1,   0,    1, (SM_OPCODE) 37 /* pop             */,       0  },          //  state 39 [pop]
+    {   1,   1,   0,    1, (SM_OPCODE) 38 /* call            */,       0  },          //  state 40 [call]
+    {   1,   1,   0,    1, (SM_OPCODE) 39 /* calli           */,       0  },          //  state 41 [calli]
+    {   1,   1,   0,    1, (SM_OPCODE) 40 /* ret             */,       0  },          //  state 42 [ret]
+    {   1,   1,   0,    1, (SM_OPCODE) 41 /* br.s            */,       0  },          //  state 43 [br.s]
+    {   1,   1,   0,    1, (SM_OPCODE) 42 /* brfalse.s       */,       0  },          //  state 44 [brfalse.s]
+    {   1,   1,   0,    1, (SM_OPCODE) 43 /* brtrue.s        */,       0  },          //  state 45 [brtrue.s]
+    {   1,   1,   0,    1, (SM_OPCODE) 44 /* beq.s           */,       0  },          //  state 46 [beq.s]
+    {   1,   1,   0,    1, (SM_OPCODE) 45 /* bge.s           */,       0  },          //  state 47 [bge.s]
+    {   1,   1,   0,    1, (SM_OPCODE) 46 /* bgt.s           */,       0  },          //  state 48 [bgt.s]
+    {   1,   1,   0,    1, (SM_OPCODE) 47 /* ble.s           */,       0  },          //  state 49 [ble.s]
+    {   1,   1,   0,    1, (SM_OPCODE) 48 /* blt.s           */,       0  },          //  state 50 [blt.s]
+    {   1,   1,   0,    1, (SM_OPCODE) 49 /* bne.un.s        */,       0  },          //  state 51 [bne.un.s]
+    {   1,   1,   0,    1, (SM_OPCODE) 50 /* bge.un.s        */,       0  },          //  state 52 [bge.un.s]
+    {   1,   1,   0,    1, (SM_OPCODE) 51 /* bgt.un.s        */,       0  },          //  state 53 [bgt.un.s]
+    {   1,   1,   0,    1, (SM_OPCODE) 52 /* ble.un.s        */,       0  },          //  state 54 [ble.un.s]
+    {   1,   1,   0,    1, (SM_OPCODE) 53 /* blt.un.s        */,       0  },          //  state 55 [blt.un.s]
+    {   1,   1,   0,    1, (SM_OPCODE) 54 /* long.branch     */,       0  },          //  state 56 [long.branch]
+    {   1,   1,   0,    1, (SM_OPCODE) 55 /* switch          */,       0  },          //  state 57 [switch]
+    {   1,   1,   0,    1, (SM_OPCODE) 56 /* ldind.i1        */,       0  },          //  state 58 [ldind.i1]
+    {   1,   1,   0,    1, (SM_OPCODE) 57 /* ldind.u1        */,       0  },          //  state 59 [ldind.u1]
+    {   1,   1,   0,    1, (SM_OPCODE) 58 /* ldind.i2        */,       0  },          //  state 60 [ldind.i2]
+    {   1,   1,   0,    1, (SM_OPCODE) 59 /* ldind.u2        */,       0  },          //  state 61 [ldind.u2]
+    {   1,   1,   0,    1, (SM_OPCODE) 60 /* ldind.i4        */,       0  },          //  state 62 [ldind.i4]
+    {   1,   1,   0,    1, (SM_OPCODE) 61 /* ldind.u4        */,       0  },          //  state 63 [ldind.u4]
+    {   1,   1,   0,    1, (SM_OPCODE) 62 /* ldind.i8        */,       0  },          //  state 64 [ldind.i8]
+    {   1,   1,   0,    1, (SM_OPCODE) 63 /* ldind.i         */,       0  },          //  state 65 [ldind.i]
+    {   1,   1,   0,    1, (SM_OPCODE) 64 /* ldind.r4        */,       0  },          //  state 66 [ldind.r4]
+    {   1,   1,   0,    1, (SM_OPCODE) 65 /* ldind.r8        */,       0  },          //  state 67 [ldind.r8]
+    {   1,   1,   0,    1, (SM_OPCODE) 66 /* ldind.ref       */,       0  },          //  state 68 [ldind.ref]
+    {   1,   1,   0,    1, (SM_OPCODE) 67 /* stind.ref       */,       0  },          //  state 69 [stind.ref]
+    {   1,   1,   0,    1, (SM_OPCODE) 68 /* stind.i1        */,       0  },          //  state 70 [stind.i1]
+    {   1,   1,   0,    1, (SM_OPCODE) 69 /* stind.i2        */,       0  },          //  state 71 [stind.i2]
+    {   1,   1,   0,    1, (SM_OPCODE) 70 /* stind.i4        */,       0  },          //  state 72 [stind.i4]
+    {   1,   1,   0,    1, (SM_OPCODE) 71 /* stind.i8        */,       0  },          //  state 73 [stind.i8]
+    {   1,   1,   0,    1, (SM_OPCODE) 72 /* stind.r4        */,       0  },          //  state 74 [stind.r4]
+    {   1,   1,   0,    1, (SM_OPCODE) 73 /* stind.r8        */,       0  },          //  state 75 [stind.r8]
+    {   1,   1,   0,    1, (SM_OPCODE) 74 /* add             */,       0  },          //  state 76 [add]
+    {   1,   1,   0,    1, (SM_OPCODE) 75 /* sub             */,       0  },          //  state 77 [sub]
+    {   1,   1,   0,    1, (SM_OPCODE) 76 /* mul             */,       0  },          //  state 78 [mul]
+    {   1,   1,   0,    1, (SM_OPCODE) 77 /* div             */,       0  },          //  state 79 [div]
+    {   1,   1,   0,    1, (SM_OPCODE) 78 /* div.un          */,       0  },          //  state 80 [div.un]
+    {   1,   1,   0,    1, (SM_OPCODE) 79 /* rem             */,       0  },          //  state 81 [rem]
+    {   1,   1,   0,    1, (SM_OPCODE) 80 /* rem.un          */,       0  },          //  state 82 [rem.un]
+    {   1,   1,   0,    1, (SM_OPCODE) 81 /* and             */,       0  },          //  state 83 [and]
+    {   1,   1,   0,    1, (SM_OPCODE) 82 /* or              */,       0  },          //  state 84 [or]
+    {   1,   1,   0,    1, (SM_OPCODE) 83 /* xor             */,       0  },          //  state 85 [xor]
+    {   1,   1,   0,    1, (SM_OPCODE) 84 /* shl             */,       0  },          //  state 86 [shl]
+    {   1,   1,   0,    1, (SM_OPCODE) 85 /* shr             */,       0  },          //  state 87 [shr]
+    {   1,   1,   0,    1, (SM_OPCODE) 86 /* shr.un          */,       0  },          //  state 88 [shr.un]
+    {   1,   1,   0,    1, (SM_OPCODE) 87 /* neg             */,       0  },          //  state 89 [neg]
+    {   1,   1,   0,    1, (SM_OPCODE) 88 /* not             */,       0  },          //  state 90 [not]
+    {   1,   1,   0,    1, (SM_OPCODE) 89 /* conv.i1         */,       0  },          //  state 91 [conv.i1]
+    {   1,   1,   0,    1, (SM_OPCODE) 90 /* conv.i2         */,       0  },          //  state 92 [conv.i2]
+    {   1,   1,   0,    1, (SM_OPCODE) 91 /* conv.i4         */,       0  },          //  state 93 [conv.i4]
+    {   1,   1,   0,    1, (SM_OPCODE) 92 /* conv.i8         */,       0  },          //  state 94 [conv.i8]
+    {   1,   1,   0,    1, (SM_OPCODE) 93 /* conv.r4         */,     276  },          //  state 95 [conv.r4]
+    {   1,   1,   0,    1, (SM_OPCODE) 94 /* conv.r8         */,     256  },          //  state 96 [conv.r8]
+    {   1,   1,   0,    1, (SM_OPCODE) 95 /* conv.u4         */,       0  },          //  state 97 [conv.u4]
+    {   1,   1,   0,    1, (SM_OPCODE) 96 /* conv.u8         */,       0  },          //  state 98 [conv.u8]
+    {   1,   1,   0,    1, (SM_OPCODE) 97 /* callvirt        */,       0  },          //  state 99 [callvirt]
+    {   1,   1,   0,    1, (SM_OPCODE) 98 /* cpobj           */,       0  },          //  state 100 [cpobj]
+    {   1,   1,   0,    1, (SM_OPCODE) 99 /* ldobj           */,       0  },          //  state 101 [ldobj]
+    {   1,   1,   0,    1, (SM_OPCODE)100 /* ldstr           */,       0  },          //  state 102 [ldstr]
+    {   1,   1,   0,    1, (SM_OPCODE)101 /* newobj          */,       0  },          //  state 103 [newobj]
+    {   1,   1,   0,    1, (SM_OPCODE)102 /* castclass       */,       0  },          //  state 104 [castclass]
+    {   1,   1,   0,    1, (SM_OPCODE)103 /* isinst          */,       0  },          //  state 105 [isinst]
+    {   1,   1,   0,    1, (SM_OPCODE)104 /* conv.r.un       */,       0  },          //  state 106 [conv.r.un]
+    {   1,   1,   0,    1, (SM_OPCODE)105 /* unbox           */,       0  },          //  state 107 [unbox]
+    {   1,   1,   0,    1, (SM_OPCODE)106 /* throw           */,       0  },          //  state 108 [throw]
+    {   1,   1,   0,    1, (SM_OPCODE)107 /* ldfld           */,       0  },          //  state 109 [ldfld]
+    {   1,   1,   0,    1, (SM_OPCODE)108 /* ldflda          */,       0  },          //  state 110 [ldflda]
+    {   1,   1,   0,    1, (SM_OPCODE)109 /* stfld           */,       0  },          //  state 111 [stfld]
+    {   1,   1,   0,    1, (SM_OPCODE)110 /* ldsfld          */,       0  },          //  state 112 [ldsfld]
+    {   1,   1,   0,    1, (SM_OPCODE)111 /* ldsflda         */,       0  },          //  state 113 [ldsflda]
+    {   1,   1,   0,    1, (SM_OPCODE)112 /* stsfld          */,       0  },          //  state 114 [stsfld]
+    {   1,   1,   0,    1, (SM_OPCODE)113 /* stobj           */,       0  },          //  state 115 [stobj]
+    {   1,   1,   0,    1, (SM_OPCODE)114 /* ovf.notype.un   */,       0  },          //  state 116 [ovf.notype.un]
+    {   1,   1,   0,    1, (SM_OPCODE)115 /* box             */,       0  },          //  state 117 [box]
+    {   1,   1,   0,    1, (SM_OPCODE)116 /* newarr          */,       0  },          //  state 118 [newarr]
+    {   1,   1,   0,    1, (SM_OPCODE)117 /* ldlen           */,       0  },          //  state 119 [ldlen]
+    {   1,   1,   0,    1, (SM_OPCODE)118 /* ldelema         */,       0  },          //  state 120 [ldelema]
+    {   1,   1,   0,    1, (SM_OPCODE)119 /* ldelem.i1       */,       0  },          //  state 121 [ldelem.i1]
+    {   1,   1,   0,    1, (SM_OPCODE)120 /* ldelem.u1       */,       0  },          //  state 122 [ldelem.u1]
+    {   1,   1,   0,    1, (SM_OPCODE)121 /* ldelem.i2       */,       0  },          //  state 123 [ldelem.i2]
+    {   1,   1,   0,    1, (SM_OPCODE)122 /* ldelem.u2       */,       0  },          //  state 124 [ldelem.u2]
+    {   1,   1,   0,    1, (SM_OPCODE)123 /* ldelem.i4       */,       0  },          //  state 125 [ldelem.i4]
+    {   1,   1,   0,    1, (SM_OPCODE)124 /* ldelem.u4       */,       0  },          //  state 126 [ldelem.u4]
+    {   1,   1,   0,    1, (SM_OPCODE)125 /* ldelem.i8       */,       0  },          //  state 127 [ldelem.i8]
+    {   1,   1,   0,    1, (SM_OPCODE)126 /* ldelem.i        */,       0  },          //  state 128 [ldelem.i]
+    {   1,   1,   0,    1, (SM_OPCODE)127 /* ldelem.r4       */,       0  },          //  state 129 [ldelem.r4]
+    {   1,   1,   0,    1, (SM_OPCODE)128 /* ldelem.r8       */,       0  },          //  state 130 [ldelem.r8]
+    {   1,   1,   0,    1, (SM_OPCODE)129 /* ldelem.ref      */,       0  },          //  state 131 [ldelem.ref]
+    {   1,   1,   0,    1, (SM_OPCODE)130 /* stelem.i        */,       0  },          //  state 132 [stelem.i]
+    {   1,   1,   0,    1, (SM_OPCODE)131 /* stelem.i1       */,       0  },          //  state 133 [stelem.i1]
+    {   1,   1,   0,    1, (SM_OPCODE)132 /* stelem.i2       */,       0  },          //  state 134 [stelem.i2]
+    {   1,   1,   0,    1, (SM_OPCODE)133 /* stelem.i4       */,       0  },          //  state 135 [stelem.i4]
+    {   1,   1,   0,    1, (SM_OPCODE)134 /* stelem.i8       */,       0  },          //  state 136 [stelem.i8]
+    {   1,   1,   0,    1, (SM_OPCODE)135 /* stelem.r4       */,       0  },          //  state 137 [stelem.r4]
+    {   1,   1,   0,    1, (SM_OPCODE)136 /* stelem.r8       */,       0  },          //  state 138 [stelem.r8]
+    {   1,   1,   0,    1, (SM_OPCODE)137 /* stelem.ref      */,       0  },          //  state 139 [stelem.ref]
+    {   1,   1,   0,    1, (SM_OPCODE)138 /* ldelem          */,       0  },          //  state 140 [ldelem]
+    {   1,   1,   0,    1, (SM_OPCODE)139 /* stelem          */,       0  },          //  state 141 [stelem]
+    {   1,   1,   0,    1, (SM_OPCODE)140 /* unbox.any       */,       0  },          //  state 142 [unbox.any]
+    {   1,   1,   0,    1, (SM_OPCODE)141 /* conv.ovf.i1     */,       0  },          //  state 143 [conv.ovf.i1]
+    {   1,   1,   0,    1, (SM_OPCODE)142 /* conv.ovf.u1     */,       0  },          //  state 144 [conv.ovf.u1]
+    {   1,   1,   0,    1, (SM_OPCODE)143 /* conv.ovf.i2     */,       0  },          //  state 145 [conv.ovf.i2]
+    {   1,   1,   0,    1, (SM_OPCODE)144 /* conv.ovf.u2     */,       0  },          //  state 146 [conv.ovf.u2]
+    {   1,   1,   0,    1, (SM_OPCODE)145 /* conv.ovf.i4     */,       0  },          //  state 147 [conv.ovf.i4]
+    {   1,   1,   0,    1, (SM_OPCODE)146 /* conv.ovf.u4     */,       0  },          //  state 148 [conv.ovf.u4]
+    {   1,   1,   0,    1, (SM_OPCODE)147 /* conv.ovf.i8     */,       0  },          //  state 149 [conv.ovf.i8]
+    {   1,   1,   0,    1, (SM_OPCODE)148 /* conv.ovf.u8     */,       0  },          //  state 150 [conv.ovf.u8]
+    {   1,   1,   0,    1, (SM_OPCODE)149 /* refanyval       */,       0  },          //  state 151 [refanyval]
+    {   1,   1,   0,    1, (SM_OPCODE)150 /* ckfinite        */,       0  },          //  state 152 [ckfinite]
+    {   1,   1,   0,    1, (SM_OPCODE)151 /* mkrefany        */,       0  },          //  state 153 [mkrefany]
+    {   1,   1,   0,    1, (SM_OPCODE)152 /* ldtoken         */,       0  },          //  state 154 [ldtoken]
+    {   1,   1,   0,    1, (SM_OPCODE)153 /* conv.u2         */,       0  },          //  state 155 [conv.u2]
+    {   1,   1,   0,    1, (SM_OPCODE)154 /* conv.u1         */,       0  },          //  state 156 [conv.u1]
+    {   1,   1,   0,    1, (SM_OPCODE)155 /* conv.i          */,       0  },          //  state 157 [conv.i]
+    {   1,   1,   0,    1, (SM_OPCODE)156 /* conv.ovf.i      */,       0  },          //  state 158 [conv.ovf.i]
+    {   1,   1,   0,    1, (SM_OPCODE)157 /* conv.ovf.u      */,       0  },          //  state 159 [conv.ovf.u]
+    {   1,   1,   0,    1, (SM_OPCODE)158 /* add.ovf         */,       0  },          //  state 160 [add.ovf]
+    {   1,   1,   0,    1, (SM_OPCODE)159 /* mul.ovf         */,       0  },          //  state 161 [mul.ovf]
+    {   1,   1,   0,    1, (SM_OPCODE)160 /* sub.ovf         */,       0  },          //  state 162 [sub.ovf]
+    {   1,   1,   0,    1, (SM_OPCODE)161 /* leave.s         */,       0  },          //  state 163 [leave.s]
+    {   1,   1,   0,    1, (SM_OPCODE)162 /* stind.i         */,       0  },          //  state 164 [stind.i]
+    {   1,   1,   0,    1, (SM_OPCODE)163 /* conv.u          */,       0  },          //  state 165 [conv.u]
+    {   1,   1,   0,    1, (SM_OPCODE)164 /* prefix.n        */,       0  },          //  state 166 [prefix.n]
+    {   1,   1,   0,    1, (SM_OPCODE)165 /* arglist         */,       0  },          //  state 167 [arglist]
+    {   1,   1,   0,    1, (SM_OPCODE)166 /* ceq             */,       0  },          //  state 168 [ceq]
+    {   1,   1,   0,    1, (SM_OPCODE)167 /* cgt             */,       0  },          //  state 169 [cgt]
+    {   1,   1,   0,    1, (SM_OPCODE)168 /* cgt.un          */,       0  },          //  state 170 [cgt.un]
+    {   1,   1,   0,    1, (SM_OPCODE)169 /* clt             */,       0  },          //  state 171 [clt]
+    {   1,   1,   0,    1, (SM_OPCODE)170 /* clt.un          */,       0  },          //  state 172 [clt.un]
+    {   1,   1,   0,    1, (SM_OPCODE)171 /* ldftn           */,       0  },          //  state 173 [ldftn]
+    {   1,   1,   0,    1, (SM_OPCODE)172 /* ldvirtftn       */,       0  },          //  state 174 [ldvirtftn]
+    {   1,   1,   0,    1, (SM_OPCODE)173 /* long.loc.arg    */,       0  },          //  state 175 [long.loc.arg]
+    {   1,   1,   0,    1, (SM_OPCODE)174 /* localloc        */,       0  },          //  state 176 [localloc]
+    {   1,   1,   0,    1, (SM_OPCODE)175 /* unaligned       */,       0  },          //  state 177 [unaligned]
+    {   1,   1,   0,    1, (SM_OPCODE)176 /* volatile        */,       0  },          //  state 178 [volatile]
+    {   1,   1,   0,    1, (SM_OPCODE)177 /* tailcall        */,       0  },          //  state 179 [tailcall]
+    {   1,   1,   0,    1, (SM_OPCODE)178 /* initobj         */,       0  },          //  state 180 [initobj]
+    {   1,   1,   0,    1, (SM_OPCODE)179 /* constrained     */,     218  },          //  state 181 [constrained]
+    {   1,   1,   0,    1, (SM_OPCODE)180 /* cpblk           */,       0  },          //  state 182 [cpblk]
+    {   1,   1,   0,    1, (SM_OPCODE)181 /* initblk         */,       0  },          //  state 183 [initblk]
+    {   1,   1,   0,    1, (SM_OPCODE)182 /* rethrow         */,       0  },          //  state 184 [rethrow]
+    {   1,   1,   0,    1, (SM_OPCODE)183 /* sizeof          */,       0  },          //  state 185 [sizeof]
+    {   1,   1,   0,    1, (SM_OPCODE)184 /* refanytype      */,       0  },          //  state 186 [refanytype]
+    {   1,   1,   0,    1, (SM_OPCODE)185 /* readonly        */,       0  },          //  state 187 [readonly]
+    {   1,   1,   0,    1, (SM_OPCODE)186 /* ldarga.s.normed */,     218  },          //  state 188 [ldarga.s.normed]
+    {   1,   1,   0,    1, (SM_OPCODE)187 /* ldloca.s.normed */,     220  },          //  state 189 [ldloca.s.normed]
+    {   1,   2, 181,  181, (SM_OPCODE) 97 /* callvirt        */,       0  },          //  state 190 [constrained -> callvirt]
+    {   1,   2,   3,    3, (SM_OPCODE)107 /* ldfld           */,     432  },          //  state 191 [ldarg.0 -> ldfld]
+    {   1,   2,   4,    4, (SM_OPCODE)107 /* ldfld           */,       0  },          //  state 192 [ldarg.1 -> ldfld]
+    {   1,   2,   5,    5, (SM_OPCODE)107 /* ldfld           */,       0  },          //  state 193 [ldarg.2 -> ldfld]
+    {   1,   2,   6,    6, (SM_OPCODE)107 /* ldfld           */,       0  },          //  state 194 [ldarg.3 -> ldfld]
+    {   1,   2,  16,   16, (SM_OPCODE)107 /* ldfld           */,     414  },          //  state 195 [ldarga.s -> ldfld]
+    {   1,   2,  19,   19, (SM_OPCODE)107 /* ldfld           */,       0  },          //  state 196 [ldloca.s -> ldfld]
+    {   1,   2, 188,  188, (SM_OPCODE)107 /* ldfld           */,       0  },          //  state 197 [ldarga.s.normed -> ldfld]
+    {   1,   2, 189,  189, (SM_OPCODE)107 /* ldfld           */,       0  },          //  state 198 [ldloca.s.normed -> ldfld]
+    {   1,   2,  11,   11, (SM_OPCODE)  5 /* ldloc.0         */,       0  },          //  state 199 [stloc.0 -> ldloc.0]
+    {   1,   2,  12,   12, (SM_OPCODE)  6 /* ldloc.1         */,       0  },          //  state 200 [stloc.1 -> ldloc.1]
+    {   1,   2,  13,   13, (SM_OPCODE)  7 /* ldloc.2         */,       0  },          //  state 201 [stloc.2 -> ldloc.2]
+    {   1,   2,  14,   14, (SM_OPCODE)  8 /* ldloc.3         */,       0  },          //  state 202 [stloc.3 -> ldloc.3]
+    {   1,   2,  35,   35, (SM_OPCODE) 74 /* add             */,       0  },          //  state 203 [ldc.r4 -> add]
+    {   1,   2,  35,   35, (SM_OPCODE) 75 /* sub             */,       0  },          //  state 204 [ldc.r4 -> sub]
+    {   1,   2,  35,   35, (SM_OPCODE) 76 /* mul             */,       0  },          //  state 205 [ldc.r4 -> mul]
+    {   1,   2,  35,   35, (SM_OPCODE) 77 /* div             */,       0  },          //  state 206 [ldc.r4 -> div]
+    {   1,   2,  36,   36, (SM_OPCODE) 74 /* add             */,       0  },          //  state 207 [ldc.r8 -> add]
+    {   1,   2,  36,   36, (SM_OPCODE) 75 /* sub             */,       0  },          //  state 208 [ldc.r8 -> sub]
+    {   1,   2,  36,   36, (SM_OPCODE) 76 /* mul             */,       0  },          //  state 209 [ldc.r8 -> mul]
+    {   1,   2,  36,   36, (SM_OPCODE) 77 /* div             */,       0  },          //  state 210 [ldc.r8 -> div]
+    {   1,   2,  95,   95, (SM_OPCODE) 74 /* add             */,       0  },          //  state 211 [conv.r4 -> add]
+    {   1,   2,  95,   95, (SM_OPCODE) 75 /* sub             */,       0  },          //  state 212 [conv.r4 -> sub]
+    {   1,   2,  95,   95, (SM_OPCODE) 76 /* mul             */,       0  },          //  state 213 [conv.r4 -> mul]
+    {   1,   2,  95,   95, (SM_OPCODE) 77 /* div             */,       0  },          //  state 214 [conv.r4 -> div]
+    {   1,   2,  96,   96, (SM_OPCODE) 76 /* mul             */,       0  },          //  state 215 [conv.r8 -> mul]
+    {   1,   2,  96,   96, (SM_OPCODE) 77 /* div             */,       0  },          //  state 216 [conv.r8 -> div]
+    {   0,   2,   3,    3, (SM_OPCODE) 21 /* ldc.i4.0        */,     228  },          //  state 217 [ldarg.0 -> ldc.i4.0]
+    {   1,   3,   3,  217, (SM_OPCODE)109 /* stfld           */,       0  },          //  state 218 [ldarg.0 -> ldc.i4.0 -> stfld]
+    {   0,   2,   3,    3, (SM_OPCODE) 33 /* ldc.r4          */,     230  },          //  state 219 [ldarg.0 -> ldc.r4]
+    {   1,   3,   3,  219, (SM_OPCODE)109 /* stfld           */,       0  },          //  state 220 [ldarg.0 -> ldc.r4 -> stfld]
+    {   0,   2,   3,    3, (SM_OPCODE) 34 /* ldc.r8          */,     232  },          //  state 221 [ldarg.0 -> ldc.r8]
+    {   1,   3,   3,  221, (SM_OPCODE)109 /* stfld           */,       0  },          //  state 222 [ldarg.0 -> ldc.r8 -> stfld]
+    {   0,   2,   3,    3, (SM_OPCODE)  2 /* ldarg.1         */,     238  },          //  state 223 [ldarg.0 -> ldarg.1]
+    {   0,   3,   3,  223, (SM_OPCODE)107 /* ldfld           */,     236  },          //  state 224 [ldarg.0 -> ldarg.1 -> ldfld]
+    {   1,   4,   3,  224, (SM_OPCODE)109 /* stfld           */,       0  },          //  state 225 [ldarg.0 -> ldarg.1 -> ldfld -> stfld]
+    {   1,   3,   3,  223, (SM_OPCODE)109 /* stfld           */,       0  },          //  state 226 [ldarg.0 -> ldarg.1 -> stfld]
+    {   0,   2,   3,    3, (SM_OPCODE)  3 /* ldarg.2         */,     240  },          //  state 227 [ldarg.0 -> ldarg.2]
+    {   1,   3,   3,  227, (SM_OPCODE)109 /* stfld           */,       0  },          //  state 228 [ldarg.0 -> ldarg.2 -> stfld]
+    {   0,   2,   3,    3, (SM_OPCODE)  4 /* ldarg.3         */,     242  },          //  state 229 [ldarg.0 -> ldarg.3]
+    {   1,   3,   3,  229, (SM_OPCODE)109 /* stfld           */,       0  },          //  state 230 [ldarg.0 -> ldarg.3 -> stfld]
+    {   0,   2,   3,    3, (SM_OPCODE) 36 /* dup             */,     248  },          //  state 231 [ldarg.0 -> dup]
+    {   0,   3,   3,  231, (SM_OPCODE)107 /* ldfld           */,     460  },          //  state 232 [ldarg.0 -> dup -> ldfld]
+    {   0,   4,   3,  232, (SM_OPCODE)  2 /* ldarg.1         */,     318  },          //  state 233 [ldarg.0 -> dup -> ldfld -> ldarg.1]
+    {   0,   5,   3,  233, (SM_OPCODE) 74 /* add             */,     256  },          //  state 234 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> add]
+    {   1,   6,   3,  234, (SM_OPCODE)109 /* stfld           */,       0  },          //  state 235 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> add -> stfld]
+    {   0,   5,   3,  233, (SM_OPCODE) 75 /* sub             */,     258  },          //  state 236 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> sub]
+    {   1,   6,   3,  236, (SM_OPCODE)109 /* stfld           */,       0  },          //  state 237 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> sub -> stfld]
+    {   0,   5,   3,  233, (SM_OPCODE) 76 /* mul             */,     260  },          //  state 238 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> mul]
+    {   1,   6,   3,  238, (SM_OPCODE)109 /* stfld           */,       0  },          //  state 239 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> mul -> stfld]
+    {   0,   5,   3,  233, (SM_OPCODE) 77 /* div             */,     262  },          //  state 240 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> div]
+    {   1,   6,   3,  240, (SM_OPCODE)109 /* stfld           */,       0  },          //  state 241 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> div -> stfld]
+    {   0,   3, 191,  191, (SM_OPCODE)  2 /* ldarg.1         */,     268  },          //  state 242 [ldarg.0 -> ldfld -> ldarg.1]
+    {   0,   4, 191,  242, (SM_OPCODE)107 /* ldfld           */,     336  },          //  state 243 [ldarg.0 -> ldfld -> ldarg.1 -> ldfld]
+    {   1,   5, 191,  243, (SM_OPCODE) 74 /* add             */,       0  },          //  state 244 [ldarg.0 -> ldfld -> ldarg.1 -> ldfld -> add]
+    {   1,   5, 191,  243, (SM_OPCODE) 75 /* sub             */,       0  },          //  state 245 [ldarg.0 -> ldfld -> ldarg.1 -> ldfld -> sub]
+    {   0,   3, 195,  195, (SM_OPCODE) 14 /* ldarga.s        */,     274  },          //  state 246 [ldarga.s -> ldfld -> ldarga.s]
+    {   0,   4, 195,  246, (SM_OPCODE)107 /* ldfld           */,     342  },          //  state 247 [ldarga.s -> ldfld -> ldarga.s -> ldfld]
+    {   1,   5, 195,  247, (SM_OPCODE) 74 /* add             */,       0  },          //  state 248 [ldarga.s -> ldfld -> ldarga.s -> ldfld -> add]
+    {   1,   5, 195,  247, (SM_OPCODE) 75 /* sub             */,       0  },          //  state 249 [ldarga.s -> ldfld -> ldarga.s -> ldfld -> sub]
+};
+// clang-format on
+
+static_assert_no_msg(NUM_SM_STATES == sizeof(g_SMStates) / sizeof(g_SMStates[0]));
+
+const SMState* gp_SMStates = g_SMStates;
+
+//
+// JumpTableCells in the state machine
+//
+// clang-format off
+const JumpTableCell g_SMJumpTableCells[] = 
+{
+ // {src, dest  }
+    {  1,    2  },   // cell# 0 : state 1 [start] --(0 noshow)--> state 2 [noshow]
+    {  1,    3  },   // cell# 1 : state 1 [start] --(1 ldarg.0)--> state 3 [ldarg.0]
+    {  1,    4  },   // cell# 2 : state 1 [start] --(2 ldarg.1)--> state 4 [ldarg.1]
+    {  1,    5  },   // cell# 3 : state 1 [start] --(3 ldarg.2)--> state 5 [ldarg.2]
+    {  1,    6  },   // cell# 4 : state 1 [start] --(4 ldarg.3)--> state 6 [ldarg.3]
+    {  1,    7  },   // cell# 5 : state 1 [start] --(5 ldloc.0)--> state 7 [ldloc.0]
+    {  1,    8  },   // cell# 6 : state 1 [start] --(6 ldloc.1)--> state 8 [ldloc.1]
+    {  1,    9  },   // cell# 7 : state 1 [start] --(7 ldloc.2)--> state 9 [ldloc.2]
+    {  1,   10  },   // cell# 8 : state 1 [start] --(8 ldloc.3)--> state 10 [ldloc.3]
+    {  1,   11  },   // cell# 9 : state 1 [start] --(9 stloc.0)--> state 11 [stloc.0]
+    {  1,   12  },   // cell# 10 : state 1 [start] --(10 stloc.1)--> state 12 [stloc.1]
+    {  1,   13  },   // cell# 11 : state 1 [start] --(11 stloc.2)--> state 13 [stloc.2]
+    {  1,   14  },   // cell# 12 : state 1 [start] --(12 stloc.3)--> state 14 [stloc.3]
+    {  1,   15  },   // cell# 13 : state 1 [start] --(13 ldarg.s)--> state 15 [ldarg.s]
+    {  1,   16  },   // cell# 14 : state 1 [start] --(14 ldarga.s)--> state 16 [ldarga.s]
+    {  1,   17  },   // cell# 15 : state 1 [start] --(15 starg.s)--> state 17 [starg.s]
+    {  1,   18  },   // cell# 16 : state 1 [start] --(16 ldloc.s)--> state 18 [ldloc.s]
+    {  1,   19  },   // cell# 17 : state 1 [start] --(17 ldloca.s)--> state 19 [ldloca.s]
+    {  1,   20  },   // cell# 18 : state 1 [start] --(18 stloc.s)--> state 20 [stloc.s]
+    {  1,   21  },   // cell# 19 : state 1 [start] --(19 ldnull)--> state 21 [ldnull]
+    {  1,   22  },   // cell# 20 : state 1 [start] --(20 ldc.i4.m1)--> state 22 [ldc.i4.m1]
+    {  1,   23  },   // cell# 21 : state 1 [start] --(21 ldc.i4.0)--> state 23 [ldc.i4.0]
+    {  1,   24  },   // cell# 22 : state 1 [start] --(22 ldc.i4.1)--> state 24 [ldc.i4.1]
+    {  1,   25  },   // cell# 23 : state 1 [start] --(23 ldc.i4.2)--> state 25 [ldc.i4.2]
+    {  1,   26  },   // cell# 24 : state 1 [start] --(24 ldc.i4.3)--> state 26 [ldc.i4.3]
+    {  1,   27  },   // cell# 25 : state 1 [start] --(25 ldc.i4.4)--> state 27 [ldc.i4.4]
+    {  1,   28  },   // cell# 26 : state 1 [start] --(26 ldc.i4.5)--> state 28 [ldc.i4.5]
+    {  1,   29  },   // cell# 27 : state 1 [start] --(27 ldc.i4.6)--> state 29 [ldc.i4.6]
+    {  1,   30  },   // cell# 28 : state 1 [start] --(28 ldc.i4.7)--> state 30 [ldc.i4.7]
+    {  1,   31  },   // cell# 29 : state 1 [start] --(29 ldc.i4.8)--> state 31 [ldc.i4.8]
+    {  1,   32  },   // cell# 30 : state 1 [start] --(30 ldc.i4.s)--> state 32 [ldc.i4.s]
+    {  1,   33  },   // cell# 31 : state 1 [start] --(31 ldc.i4)--> state 33 [ldc.i4]
+    {  1,   34  },   // cell# 32 : state 1 [start] --(32 ldc.i8)--> state 34 [ldc.i8]
+    {  1,   35  },   // cell# 33 : state 1 [start] --(33 ldc.r4)--> state 35 [ldc.r4]
+    {  1,   36  },   // cell# 34 : state 1 [start] --(34 ldc.r8)--> state 36 [ldc.r8]
+    {  1,   37  },   // cell# 35 : state 1 [start] --(35 unused)--> state 37 [unused]
+    {  1,   38  },   // cell# 36 : state 1 [start] --(36 dup)--> state 38 [dup]
+    {  1,   39  },   // cell# 37 : state 1 [start] --(37 pop)--> state 39 [pop]
+    {  1,   40  },   // cell# 38 : state 1 [start] --(38 call)--> state 40 [call]
+    {  1,   41  },   // cell# 39 : state 1 [start] --(39 calli)--> state 41 [calli]
+    {  1,   42  },   // cell# 40 : state 1 [start] --(40 ret)--> state 42 [ret]
+    {  1,   43  },   // cell# 41 : state 1 [start] --(41 br.s)--> state 43 [br.s]
+    {  1,   44  },   // cell# 42 : state 1 [start] --(42 brfalse.s)--> state 44 [brfalse.s]
+    {  1,   45  },   // cell# 43 : state 1 [start] --(43 brtrue.s)--> state 45 [brtrue.s]
+    {  1,   46  },   // cell# 44 : state 1 [start] --(44 beq.s)--> state 46 [beq.s]
+    {  1,   47  },   // cell# 45 : state 1 [start] --(45 bge.s)--> state 47 [bge.s]
+    {  1,   48  },   // cell# 46 : state 1 [start] --(46 bgt.s)--> state 48 [bgt.s]
+    {  1,   49  },   // cell# 47 : state 1 [start] --(47 ble.s)--> state 49 [ble.s]
+    {  1,   50  },   // cell# 48 : state 1 [start] --(48 blt.s)--> state 50 [blt.s]
+    {  1,   51  },   // cell# 49 : state 1 [start] --(49 bne.un.s)--> state 51 [bne.un.s]
+    {  1,   52  },   // cell# 50 : state 1 [start] --(50 bge.un.s)--> state 52 [bge.un.s]
+    {  1,   53  },   // cell# 51 : state 1 [start] --(51 bgt.un.s)--> state 53 [bgt.un.s]
+    {  1,   54  },   // cell# 52 : state 1 [start] --(52 ble.un.s)--> state 54 [ble.un.s]
+    {  1,   55  },   // cell# 53 : state 1 [start] --(53 blt.un.s)--> state 55 [blt.un.s]
+    {  1,   56  },   // cell# 54 : state 1 [start] --(54 long.branch)--> state 56 [long.branch]
+    {  1,   57  },   // cell# 55 : state 1 [start] --(55 switch)--> state 57 [switch]
+    {  1,   58  },   // cell# 56 : state 1 [start] --(56 ldind.i1)--> state 58 [ldind.i1]
+    {  1,   59  },   // cell# 57 : state 1 [start] --(57 ldind.u1)--> state 59 [ldind.u1]
+    {  1,   60  },   // cell# 58 : state 1 [start] --(58 ldind.i2)--> state 60 [ldind.i2]
+    {  1,   61  },   // cell# 59 : state 1 [start] --(59 ldind.u2)--> state 61 [ldind.u2]
+    {  1,   62  },   // cell# 60 : state 1 [start] --(60 ldind.i4)--> state 62 [ldind.i4]
+    {  1,   63  },   // cell# 61 : state 1 [start] --(61 ldind.u4)--> state 63 [ldind.u4]
+    {  1,   64  },   // cell# 62 : state 1 [start] --(62 ldind.i8)--> state 64 [ldind.i8]
+    {  1,   65  },   // cell# 63 : state 1 [start] --(63 ldind.i)--> state 65 [ldind.i]
+    {  1,   66  },   // cell# 64 : state 1 [start] --(64 ldind.r4)--> state 66 [ldind.r4]
+    {  1,   67  },   // cell# 65 : state 1 [start] --(65 ldind.r8)--> state 67 [ldind.r8]
+    {  1,   68  },   // cell# 66 : state 1 [start] --(66 ldind.ref)--> state 68 [ldind.ref]
+    {  1,   69  },   // cell# 67 : state 1 [start] --(67 stind.ref)--> state 69 [stind.ref]
+    {  1,   70  },   // cell# 68 : state 1 [start] --(68 stind.i1)--> state 70 [stind.i1]
+    {  1,   71  },   // cell# 69 : state 1 [start] --(69 stind.i2)--> state 71 [stind.i2]
+    {  1,   72  },   // cell# 70 : state 1 [start] --(70 stind.i4)--> state 72 [stind.i4]
+    {  1,   73  },   // cell# 71 : state 1 [start] --(71 stind.i8)--> state 73 [stind.i8]
+    {  1,   74  },   // cell# 72 : state 1 [start] --(72 stind.r4)--> state 74 [stind.r4]
+    {  1,   75  },   // cell# 73 : state 1 [start] --(73 stind.r8)--> state 75 [stind.r8]
+    {  1,   76  },   // cell# 74 : state 1 [start] --(74 add)--> state 76 [add]
+    {  1,   77  },   // cell# 75 : state 1 [start] --(75 sub)--> state 77 [sub]
+    {  1,   78  },   // cell# 76 : state 1 [start] --(76 mul)--> state 78 [mul]
+    {  1,   79  },   // cell# 77 : state 1 [start] --(77 div)--> state 79 [div]
+    {  1,   80  },   // cell# 78 : state 1 [start] --(78 div.un)--> state 80 [div.un]
+    {  1,   81  },   // cell# 79 : state 1 [start] --(79 rem)--> state 81 [rem]
+    {  1,   82  },   // cell# 80 : state 1 [start] --(80 rem.un)--> state 82 [rem.un]
+    {  1,   83  },   // cell# 81 : state 1 [start] --(81 and)--> state 83 [and]
+    {  1,   84  },   // cell# 82 : state 1 [start] --(82 or)--> state 84 [or]
+    {  1,   85  },   // cell# 83 : state 1 [start] --(83 xor)--> state 85 [xor]
+    {  1,   86  },   // cell# 84 : state 1 [start] --(84 shl)--> state 86 [shl]
+    {  1,   87  },   // cell# 85 : state 1 [start] --(85 shr)--> state 87 [shr]
+    {  1,   88  },   // cell# 86 : state 1 [start] --(86 shr.un)--> state 88 [shr.un]
+    {  1,   89  },   // cell# 87 : state 1 [start] --(87 neg)--> state 89 [neg]
+    {  1,   90  },   // cell# 88 : state 1 [start] --(88 not)--> state 90 [not]
+    {  1,   91  },   // cell# 89 : state 1 [start] --(89 conv.i1)--> state 91 [conv.i1]
+    {  1,   92  },   // cell# 90 : state 1 [start] --(90 conv.i2)--> state 92 [conv.i2]
+    {  1,   93  },   // cell# 91 : state 1 [start] --(91 conv.i4)--> state 93 [conv.i4]
+    {  1,   94  },   // cell# 92 : state 1 [start] --(92 conv.i8)--> state 94 [conv.i8]
+    {  1,   95  },   // cell# 93 : state 1 [start] --(93 conv.r4)--> state 95 [conv.r4]
+    {  1,   96  },   // cell# 94 : state 1 [start] --(94 conv.r8)--> state 96 [conv.r8]
+    {  1,   97  },   // cell# 95 : state 1 [start] --(95 conv.u4)--> state 97 [conv.u4]
+    {  1,   98  },   // cell# 96 : state 1 [start] --(96 conv.u8)--> state 98 [conv.u8]
+    {  1,   99  },   // cell# 97 : state 1 [start] --(97 callvirt)--> state 99 [callvirt]
+    {  1,  100  },   // cell# 98 : state 1 [start] --(98 cpobj)--> state 100 [cpobj]
+    {  1,  101  },   // cell# 99 : state 1 [start] --(99 ldobj)--> state 101 [ldobj]
+    {  1,  102  },   // cell# 100 : state 1 [start] --(100 ldstr)--> state 102 [ldstr]
+    {  1,  103  },   // cell# 101 : state 1 [start] --(101 newobj)--> state 103 [newobj]
+    {  1,  104  },   // cell# 102 : state 1 [start] --(102 castclass)--> state 104 [castclass]
+    {  1,  105  },   // cell# 103 : state 1 [start] --(103 isinst)--> state 105 [isinst]
+    {  1,  106  },   // cell# 104 : state 1 [start] --(104 conv.r.un)--> state 106 [conv.r.un]
+    {  1,  107  },   // cell# 105 : state 1 [start] --(105 unbox)--> state 107 [unbox]
+    {  1,  108  },   // cell# 106 : state 1 [start] --(106 throw)--> state 108 [throw]
+    {  1,  109  },   // cell# 107 : state 1 [start] --(107 ldfld)--> state 109 [ldfld]
+    {  1,  110  },   // cell# 108 : state 1 [start] --(108 ldflda)--> state 110 [ldflda]
+    {  1,  111  },   // cell# 109 : state 1 [start] --(109 stfld)--> state 111 [stfld]
+    {  1,  112  },   // cell# 110 : state 1 [start] --(110 ldsfld)--> state 112 [ldsfld]
+    {  1,  113  },   // cell# 111 : state 1 [start] --(111 ldsflda)--> state 113 [ldsflda]
+    {  1,  114  },   // cell# 112 : state 1 [start] --(112 stsfld)--> state 114 [stsfld]
+    {  1,  115  },   // cell# 113 : state 1 [start] --(113 stobj)--> state 115 [stobj]
+    {  1,  116  },   // cell# 114 : state 1 [start] --(114 ovf.notype.un)--> state 116 [ovf.notype.un]
+    {  1,  117  },   // cell# 115 : state 1 [start] --(115 box)--> state 117 [box]
+    {  1,  118  },   // cell# 116 : state 1 [start] --(116 newarr)--> state 118 [newarr]
+    {  1,  119  },   // cell# 117 : state 1 [start] --(117 ldlen)--> state 119 [ldlen]
+    {  1,  120  },   // cell# 118 : state 1 [start] --(118 ldelema)--> state 120 [ldelema]
+    {  1,  121  },   // cell# 119 : state 1 [start] --(119 ldelem.i1)--> state 121 [ldelem.i1]
+    {  1,  122  },   // cell# 120 : state 1 [start] --(120 ldelem.u1)--> state 122 [ldelem.u1]
+    {  1,  123  },   // cell# 121 : state 1 [start] --(121 ldelem.i2)--> state 123 [ldelem.i2]
+    {  1,  124  },   // cell# 122 : state 1 [start] --(122 ldelem.u2)--> state 124 [ldelem.u2]
+    {  1,  125  },   // cell# 123 : state 1 [start] --(123 ldelem.i4)--> state 125 [ldelem.i4]
+    {  1,  126  },   // cell# 124 : state 1 [start] --(124 ldelem.u4)--> state 126 [ldelem.u4]
+    {  1,  127  },   // cell# 125 : state 1 [start] --(125 ldelem.i8)--> state 127 [ldelem.i8]
+    {  1,  128  },   // cell# 126 : state 1 [start] --(126 ldelem.i)--> state 128 [ldelem.i]
+    {  1,  129  },   // cell# 127 : state 1 [start] --(127 ldelem.r4)--> state 129 [ldelem.r4]
+    {  1,  130  },   // cell# 128 : state 1 [start] --(128 ldelem.r8)--> state 130 [ldelem.r8]
+    {  1,  131  },   // cell# 129 : state 1 [start] --(129 ldelem.ref)--> state 131 [ldelem.ref]
+    {  1,  132  },   // cell# 130 : state 1 [start] --(130 stelem.i)--> state 132 [stelem.i]
+    {  1,  133  },   // cell# 131 : state 1 [start] --(131 stelem.i1)--> state 133 [stelem.i1]
+    {  1,  134  },   // cell# 132 : state 1 [start] --(132 stelem.i2)--> state 134 [stelem.i2]
+    {  1,  135  },   // cell# 133 : state 1 [start] --(133 stelem.i4)--> state 135 [stelem.i4]
+    {  1,  136  },   // cell# 134 : state 1 [start] --(134 stelem.i8)--> state 136 [stelem.i8]
+    {  1,  137  },   // cell# 135 : state 1 [start] --(135 stelem.r4)--> state 137 [stelem.r4]
+    {  1,  138  },   // cell# 136 : state 1 [start] --(136 stelem.r8)--> state 138 [stelem.r8]
+    {  1,  139  },   // cell# 137 : state 1 [start] --(137 stelem.ref)--> state 139 [stelem.ref]
+    {  1,  140  },   // cell# 138 : state 1 [start] --(138 ldelem)--> state 140 [ldelem]
+    {  1,  141  },   // cell# 139 : state 1 [start] --(139 stelem)--> state 141 [stelem]
+    {  1,  142  },   // cell# 140 : state 1 [start] --(140 unbox.any)--> state 142 [unbox.any]
+    {  1,  143  },   // cell# 141 : state 1 [start] --(141 conv.ovf.i1)--> state 143 [conv.ovf.i1]
+    {  1,  144  },   // cell# 142 : state 1 [start] --(142 conv.ovf.u1)--> state 144 [conv.ovf.u1]
+    {  1,  145  },   // cell# 143 : state 1 [start] --(143 conv.ovf.i2)--> state 145 [conv.ovf.i2]
+    {  1,  146  },   // cell# 144 : state 1 [start] --(144 conv.ovf.u2)--> state 146 [conv.ovf.u2]
+    {  1,  147  },   // cell# 145 : state 1 [start] --(145 conv.ovf.i4)--> state 147 [conv.ovf.i4]
+    {  1,  148  },   // cell# 146 : state 1 [start] --(146 conv.ovf.u4)--> state 148 [conv.ovf.u4]
+    {  1,  149  },   // cell# 147 : state 1 [start] --(147 conv.ovf.i8)--> state 149 [conv.ovf.i8]
+    {  1,  150  },   // cell# 148 : state 1 [start] --(148 conv.ovf.u8)--> state 150 [conv.ovf.u8]
+    {  1,  151  },   // cell# 149 : state 1 [start] --(149 refanyval)--> state 151 [refanyval]
+    {  1,  152  },   // cell# 150 : state 1 [start] --(150 ckfinite)--> state 152 [ckfinite]
+    {  1,  153  },   // cell# 151 : state 1 [start] --(151 mkrefany)--> state 153 [mkrefany]
+    {  1,  154  },   // cell# 152 : state 1 [start] --(152 ldtoken)--> state 154 [ldtoken]
+    {  1,  155  },   // cell# 153 : state 1 [start] --(153 conv.u2)--> state 155 [conv.u2]
+    {  1,  156  },   // cell# 154 : state 1 [start] --(154 conv.u1)--> state 156 [conv.u1]
+    {  1,  157  },   // cell# 155 : state 1 [start] --(155 conv.i)--> state 157 [conv.i]
+    {  1,  158  },   // cell# 156 : state 1 [start] --(156 conv.ovf.i)--> state 158 [conv.ovf.i]
+    {  1,  159  },   // cell# 157 : state 1 [start] --(157 conv.ovf.u)--> state 159 [conv.ovf.u]
+    {  1,  160  },   // cell# 158 : state 1 [start] --(158 add.ovf)--> state 160 [add.ovf]
+    {  1,  161  },   // cell# 159 : state 1 [start] --(159 mul.ovf)--> state 161 [mul.ovf]
+    {  1,  162  },   // cell# 160 : state 1 [start] --(160 sub.ovf)--> state 162 [sub.ovf]
+    {  1,  163  },   // cell# 161 : state 1 [start] --(161 leave.s)--> state 163 [leave.s]
+    {  1,  164  },   // cell# 162 : state 1 [start] --(162 stind.i)--> state 164 [stind.i]
+    {  1,  165  },   // cell# 163 : state 1 [start] --(163 conv.u)--> state 165 [conv.u]
+    {  1,  166  },   // cell# 164 : state 1 [start] --(164 prefix.n)--> state 166 [prefix.n]
+    {  1,  167  },   // cell# 165 : state 1 [start] --(165 arglist)--> state 167 [arglist]
+    {  1,  168  },   // cell# 166 : state 1 [start] --(166 ceq)--> state 168 [ceq]
+    {  1,  169  },   // cell# 167 : state 1 [start] --(167 cgt)--> state 169 [cgt]
+    {  1,  170  },   // cell# 168 : state 1 [start] --(168 cgt.un)--> state 170 [cgt.un]
+    {  1,  171  },   // cell# 169 : state 1 [start] --(169 clt)--> state 171 [clt]
+    {  1,  172  },   // cell# 170 : state 1 [start] --(170 clt.un)--> state 172 [clt.un]
+    {  1,  173  },   // cell# 171 : state 1 [start] --(171 ldftn)--> state 173 [ldftn]
+    {  1,  174  },   // cell# 172 : state 1 [start] --(172 ldvirtftn)--> state 174 [ldvirtftn]
+    {  1,  175  },   // cell# 173 : state 1 [start] --(173 long.loc.arg)--> state 175 [long.loc.arg]
+    {  1,  176  },   // cell# 174 : state 1 [start] --(174 localloc)--> state 176 [localloc]
+    {  1,  177  },   // cell# 175 : state 1 [start] --(175 unaligned)--> state 177 [unaligned]
+    {  1,  178  },   // cell# 176 : state 1 [start] --(176 volatile)--> state 178 [volatile]
+    {  1,  179  },   // cell# 177 : state 1 [start] --(177 tailcall)--> state 179 [tailcall]
+    {  1,  180  },   // cell# 178 : state 1 [start] --(178 initobj)--> state 180 [initobj]
+    {  1,  181  },   // cell# 179 : state 1 [start] --(179 constrained)--> state 181 [constrained]
+    {  1,  182  },   // cell# 180 : state 1 [start] --(180 cpblk)--> state 182 [cpblk]
+    {  1,  183  },   // cell# 181 : state 1 [start] --(181 initblk)--> state 183 [initblk]
+    {  1,  184  },   // cell# 182 : state 1 [start] --(182 rethrow)--> state 184 [rethrow]
+    {  1,  185  },   // cell# 183 : state 1 [start] --(183 sizeof)--> state 185 [sizeof]
+    {  1,  186  },   // cell# 184 : state 1 [start] --(184 refanytype)--> state 186 [refanytype]
+    {  1,  187  },   // cell# 185 : state 1 [start] --(185 readonly)--> state 187 [readonly]
+    {  1,  188  },   // cell# 186 : state 1 [start] --(186 ldarga.s.normed)--> state 188 [ldarga.s.normed]
+    {  1,  189  },   // cell# 187 : state 1 [start] --(187 ldloca.s.normed)--> state 189 [ldloca.s.normed]
+    {  3,  223  },   // cell# 188 : state 3 [ldarg.0] --(2 ldarg.1)--> state 223 [ldarg.0 -> ldarg.1]
+    {  3,  227  },   // cell# 189 : state 3 [ldarg.0] --(3 ldarg.2)--> state 227 [ldarg.0 -> ldarg.2]
+    {  3,  229  },   // cell# 190 : state 3 [ldarg.0] --(4 ldarg.3)--> state 229 [ldarg.0 -> ldarg.3]
+    {  4,  192  },   // cell# 191 : state 4 [ldarg.1] --(107 ldfld)--> state 192 [ldarg.1 -> ldfld]
+    {  5,  193  },   // cell# 192 : state 5 [ldarg.2] --(107 ldfld)--> state 193 [ldarg.2 -> ldfld]
+    {  6,  194  },   // cell# 193 : state 6 [ldarg.3] --(107 ldfld)--> state 194 [ldarg.3 -> ldfld]
+    { 11,  199  },   // cell# 194 : state 11 [stloc.0] --(5 ldloc.0)--> state 199 [stloc.0 -> ldloc.0]
+    { 12,  200  },   // cell# 195 : state 12 [stloc.1] --(6 ldloc.1)--> state 200 [stloc.1 -> ldloc.1]
+    { 13,  201  },   // cell# 196 : state 13 [stloc.2] --(7 ldloc.2)--> state 201 [stloc.2 -> ldloc.2]
+    { 14,  202  },   // cell# 197 : state 14 [stloc.3] --(8 ldloc.3)--> state 202 [stloc.3 -> ldloc.3]
+    { 16,  195  },   // cell# 198 : state 16 [ldarga.s] --(107 ldfld)--> state 195 [ldarga.s -> ldfld]
+    { 19,  196  },   // cell# 199 : state 19 [ldloca.s] --(107 ldfld)--> state 196 [ldloca.s -> ldfld]
+    { 35,  203  },   // cell# 200 : state 35 [ldc.r4] --(74 add)--> state 203 [ldc.r4 -> add]
+    { 35,  204  },   // cell# 201 : state 35 [ldc.r4] --(75 sub)--> state 204 [ldc.r4 -> sub]
+    { 35,  205  },   // cell# 202 : state 35 [ldc.r4] --(76 mul)--> state 205 [ldc.r4 -> mul]
+    { 35,  206  },   // cell# 203 : state 35 [ldc.r4] --(77 div)--> state 206 [ldc.r4 -> div]
+    { 96,  215  },   // cell# 204 : state 96 [conv.r8] --(76 mul)--> state 215 [conv.r8 -> mul]
+    { 96,  216  },   // cell# 205 : state 96 [conv.r8] --(77 div)--> state 216 [conv.r8 -> div]
+    {181,  190  },   // cell# 206 : state 181 [constrained] --(97 callvirt)--> state 190 [constrained -> callvirt]
+    {  3,  217  },   // cell# 207 : state 3 [ldarg.0] --(21 ldc.i4.0)--> state 217 [ldarg.0 -> ldc.i4.0]
+    { 36,  207  },   // cell# 208 : state 36 [ldc.r8] --(74 add)--> state 207 [ldc.r8 -> add]
+    { 36,  208  },   // cell# 209 : state 36 [ldc.r8] --(75 sub)--> state 208 [ldc.r8 -> sub]
+    { 36,  209  },   // cell# 210 : state 36 [ldc.r8] --(76 mul)--> state 209 [ldc.r8 -> mul]
+    { 36,  210  },   // cell# 211 : state 36 [ldc.r8] --(77 div)--> state 210 [ldc.r8 -> div]
+    { 95,  211  },   // cell# 212 : state 95 [conv.r4] --(74 add)--> state 211 [conv.r4 -> add]
+    { 95,  212  },   // cell# 213 : state 95 [conv.r4] --(75 sub)--> state 212 [conv.r4 -> sub]
+    { 95,  213  },   // cell# 214 : state 95 [conv.r4] --(76 mul)--> state 213 [conv.r4 -> mul]
+    { 95,  214  },   // cell# 215 : state 95 [conv.r4] --(77 div)--> state 214 [conv.r4 -> div]
+    {188,  197  },   // cell# 216 : state 188 [ldarga.s.normed] --(107 ldfld)--> state 197 [ldarga.s.normed -> ldfld]
+    {189,  198  },   // cell# 217 : state 189 [ldloca.s.normed] --(107 ldfld)--> state 198 [ldloca.s.normed -> ldfld]
+    {191,  242  },   // cell# 218 : state 191 [ldarg.0 -> ldfld] --(2 ldarg.1)--> state 242 [ldarg.0 -> ldfld -> ldarg.1]
+    {  3,  219  },   // cell# 219 : state 3 [ldarg.0] --(33 ldc.r4)--> state 219 [ldarg.0 -> ldc.r4]
+    {  3,  221  },   // cell# 220 : state 3 [ldarg.0] --(34 ldc.r8)--> state 221 [ldarg.0 -> ldc.r8]
+    {195,  246  },   // cell# 221 : state 195 [ldarga.s -> ldfld] --(14 ldarga.s)--> state 246 [ldarga.s -> ldfld -> ldarga.s]
+    {  3,  231  },   // cell# 222 : state 3 [ldarg.0] --(36 dup)--> state 231 [ldarg.0 -> dup]
+    {217,  218  },   // cell# 223 : state 217 [ldarg.0 -> ldc.i4.0] --(109 stfld)--> state 218 [ldarg.0 -> ldc.i4.0 -> stfld]
+    {219,  220  },   // cell# 224 : state 219 [ldarg.0 -> ldc.r4] --(109 stfld)--> state 220 [ldarg.0 -> ldc.r4 -> stfld]
+    {221,  222  },   // cell# 225 : state 221 [ldarg.0 -> ldc.r8] --(109 stfld)--> state 222 [ldarg.0 -> ldc.r8 -> stfld]
+    {223,  224  },   // cell# 226 : state 223 [ldarg.0 -> ldarg.1] --(107 ldfld)--> state 224 [ldarg.0 -> ldarg.1 -> ldfld]
+    {224,  225  },   // cell# 227 : state 224 [ldarg.0 -> ldarg.1 -> ldfld] --(109 stfld)--> state 225 [ldarg.0 -> ldarg.1 -> ldfld -> stfld]
+    {223,  226  },   // cell# 228 : state 223 [ldarg.0 -> ldarg.1] --(109 stfld)--> state 226 [ldarg.0 -> ldarg.1 -> stfld]
+    {227,  228  },   // cell# 229 : state 227 [ldarg.0 -> ldarg.2] --(109 stfld)--> state 228 [ldarg.0 -> ldarg.2 -> stfld]
+    {229,  230  },   // cell# 230 : state 229 [ldarg.0 -> ldarg.3] --(109 stfld)--> state 230 [ldarg.0 -> ldarg.3 -> stfld]
+    {231,  232  },   // cell# 231 : state 231 [ldarg.0 -> dup] --(107 ldfld)--> state 232 [ldarg.0 -> dup -> ldfld]
+    {232,  233  },   // cell# 232 : state 232 [ldarg.0 -> dup -> ldfld] --(2 ldarg.1)--> state 233 [ldarg.0 -> dup -> ldfld -> ldarg.1]
+    {233,  234  },   // cell# 233 : state 233 [ldarg.0 -> dup -> ldfld -> ldarg.1] --(74 add)--> state 234 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> add]
+    {233,  236  },   // cell# 234 : state 233 [ldarg.0 -> dup -> ldfld -> ldarg.1] --(75 sub)--> state 236 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> sub]
+    {233,  238  },   // cell# 235 : state 233 [ldarg.0 -> dup -> ldfld -> ldarg.1] --(76 mul)--> state 238 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> mul]
+    {233,  240  },   // cell# 236 : state 233 [ldarg.0 -> dup -> ldfld -> ldarg.1] --(77 div)--> state 240 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> div]
+    {234,  235  },   // cell# 237 : state 234 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> add] --(109 stfld)--> state 235 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> add -> stfld]
+    {236,  237  },   // cell# 238 : state 236 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> sub] --(109 stfld)--> state 237 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> sub -> stfld]
+    {238,  239  },   // cell# 239 : state 238 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> mul] --(109 stfld)--> state 239 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> mul -> stfld]
+    {240,  241  },   // cell# 240 : state 240 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> div] --(109 stfld)--> state 241 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> div -> stfld]
+    {242,  243  },   // cell# 241 : state 242 [ldarg.0 -> ldfld -> ldarg.1] --(107 ldfld)--> state 243 [ldarg.0 -> ldfld -> ldarg.1 -> ldfld]
+    {243,  244  },   // cell# 242 : state 243 [ldarg.0 -> ldfld -> ldarg.1 -> ldfld] --(74 add)--> state 244 [ldarg.0 -> ldfld -> ldarg.1 -> ldfld -> add]
+    {243,  245  },   // cell# 243 : state 243 [ldarg.0 -> ldfld -> ldarg.1 -> ldfld] --(75 sub)--> state 245 [ldarg.0 -> ldfld -> ldarg.1 -> ldfld -> sub]
+    {246,  247  },   // cell# 244 : state 246 [ldarga.s -> ldfld -> ldarga.s] --(107 ldfld)--> state 247 [ldarga.s -> ldfld -> ldarga.s -> ldfld]
+    {247,  248  },   // cell# 245 : state 247 [ldarga.s -> ldfld -> ldarga.s -> ldfld] --(74 add)--> state 248 [ldarga.s -> ldfld -> ldarga.s -> ldfld -> add]
+    {247,  249  },   // cell# 246 : state 247 [ldarga.s -> ldfld -> ldarga.s -> ldfld] --(75 sub)--> state 249 [ldarga.s -> ldfld -> ldarga.s -> ldfld -> sub]
+    {  0,    0  },   // cell# 247
+    {  0,    0  },   // cell# 248
+    {  0,    0  },   // cell# 249
+    {  0,    0  },   // cell# 250
+    {  0,    0  },   // cell# 251
+    {  0,    0  },   // cell# 252
+    {  0,    0  },   // cell# 253
+    {  0,    0  },   // cell# 254
+    {  0,    0  },   // cell# 255
+    {  0,    0  },   // cell# 256
+    {  0,    0  },   // cell# 257
+    {  0,    0  },   // cell# 258
+    {  0,    0  },   // cell# 259
+    {  0,    0  },   // cell# 260
+    {  0,    0  },   // cell# 261
+    {  0,    0  },   // cell# 262
+    {  0,    0  },   // cell# 263
+    {  0,    0  },   // cell# 264
+    {  0,    0  },   // cell# 265
+    {  0,    0  },   // cell# 266
+    {  0,    0  },   // cell# 267
+    {  0,    0  },   // cell# 268
+    {  0,    0  },   // cell# 269
+    {  0,    0  },   // cell# 270
+    {  0,    0  },   // cell# 271
+    {  0,    0  },   // cell# 272
+    {  0,    0  },   // cell# 273
+    {  0,    0  },   // cell# 274
+    {  0,    0  },   // cell# 275
+    {  0,    0  },   // cell# 276
+    {  0,    0  },   // cell# 277
+    {  0,    0  },   // cell# 278
+    {  0,    0  },   // cell# 279
+    {  0,    0  },   // cell# 280
+    {  0,    0  },   // cell# 281
+    {  0,    0  },   // cell# 282
+    {  0,    0  },   // cell# 283
+    {  0,    0  },   // cell# 284
+    {  0,    0  },   // cell# 285
+    {  0,    0  },   // cell# 286
+    {  0,    0  },   // cell# 287
+    {  0,    0  },   // cell# 288
+    {  0,    0  },   // cell# 289
+    {  0,    0  },   // cell# 290
+    {  0,    0  },   // cell# 291
+    {  0,    0  },   // cell# 292
+    {  3,  191  },   // cell# 293 : state 3 [ldarg.0] --(107 ldfld)--> state 191 [ldarg.0 -> ldfld]
+    {  0,    0  },   // cell# 294
+    {  0,    0  },   // cell# 295
+    {  0,    0  },   // cell# 296
+    {  0,    0  },   // cell# 297
+    {  0,    0  },   // cell# 298
+    {  0,    0  },   // cell# 299
+    {  0,    0  },   // cell# 300
+    {  0,    0  },   // cell# 301
+    {  0,    0  },   // cell# 302
+    {  0,    0  },   // cell# 303
+    {  0,    0  },   // cell# 304
+    {  0,    0  },   // cell# 305
+    {  0,    0  },   // cell# 306
+    {  0,    0  },   // cell# 307
+    {  0,    0  },   // cell# 308
+    {  0,    0  },   // cell# 309
+    {  0,    0  },   // cell# 310
+    {  0,    0  },   // cell# 311
+    {  0,    0  },   // cell# 312
+    {  0,    0  },   // cell# 313
+    {  0,    0  },   // cell# 314
+    {  0,    0  },   // cell# 315
+    {  0,    0  },   // cell# 316
+    {  0,    0  },   // cell# 317
+    {  0,    0  },   // cell# 318
+    {  0,    0  },   // cell# 319
+    {  0,    0  },   // cell# 320
+    {  0,    0  },   // cell# 321
+    {  0,    0  },   // cell# 322
+    {  0,    0  },   // cell# 323
+    {  0,    0  },   // cell# 324
+    {  0,    0  },   // cell# 325
+    {  0,    0  },   // cell# 326
+    {  0,    0  },   // cell# 327
+    {  0,    0  },   // cell# 328
+    {  0,    0  },   // cell# 329
+    {  0,    0  },   // cell# 330
+    {  0,    0  },   // cell# 331
+    {  0,    0  },   // cell# 332
+    {  0,    0  },   // cell# 333
+    {  0,    0  },   // cell# 334
+    {  0,    0  },   // cell# 335
+    {  0,    0  },   // cell# 336
+    {  0,    0  },   // cell# 337
+    {  0,    0  },   // cell# 338
+    {  0,    0  },   // cell# 339
+    {  0,    0  },   // cell# 340
+    {  0,    0  },   // cell# 341
+    {  0,    0  },   // cell# 342
+    {  0,    0  },   // cell# 343
+    {  0,    0  },   // cell# 344
+    {  0,    0  },   // cell# 345
+    {  0,    0  },   // cell# 346
+    {  0,    0  },   // cell# 347
+    {  0,    0  },   // cell# 348
+    {  0,    0  },   // cell# 349
+    {  0,    0  },   // cell# 350
+    {  0,    0  },   // cell# 351
+    {  0,    0  },   // cell# 352
+    {  0,    0  },   // cell# 353
+    {  0,    0  },   // cell# 354
+    {  0,    0  },   // cell# 355
+    {  0,    0  },   // cell# 356
+    {  0,    0  },   // cell# 357
+    {  0,    0  },   // cell# 358
+    {  0,    0  },   // cell# 359
+    {  0,    0  },   // cell# 360
+    {  0,    0  },   // cell# 361
+    {  0,    0  },   // cell# 362
+    {  0,    0  },   // cell# 363
+    {  0,    0  },   // cell# 364
+    {  0,    0  },   // cell# 365
+    {  0,    0  },   // cell# 366
+    {  0,    0  },   // cell# 367
+    {  0,    0  },   // cell# 368
+    {  0,    0  },   // cell# 369
+    {  0,    0  },   // cell# 370
+    {  0,    0  },   // cell# 371
+    {  0,    0  },   // cell# 372
+    {  0,    0  },   // cell# 373
+    {  0,    0  },   // cell# 374
+    {  0,    0  },   // cell# 375
+    {  0,    0  },   // cell# 376
+    {  0,    0  },   // cell# 377
+    {  0,    0  },   // cell# 378
+    {  0,    0  },   // cell# 379
+    {  0,    0  },   // cell# 380
+    {  0,    0  },   // cell# 381
+    {  0,    0  },   // cell# 382
+    {  0,    0  },   // cell# 383
+    {  0,    0  },   // cell# 384
+    {  0,    0  },   // cell# 385
+    {  0,    0  },   // cell# 386
+    {  0,    0  },   // cell# 387
+    {  0,    0  },   // cell# 388
+    {  0,    0  },   // cell# 389
+    {  0,    0  },   // cell# 390
+    {  0,    0  },   // cell# 391
+    {  0,    0  },   // cell# 392
+    {  0,    0  },   // cell# 393
+    {  0,    0  },   // cell# 394
+    {  0,    0  },   // cell# 395
+    {  0,    0  },   // cell# 396
+    {  0,    0  },   // cell# 397
+    {  0,    0  },   // cell# 398
+    {  0,    0  },   // cell# 399
+    {  0,    0  },   // cell# 400
+    {  0,    0  },   // cell# 401
+    {  0,    0  },   // cell# 402
+    {  0,    0  },   // cell# 403
+    {  0,    0  },   // cell# 404
+    {  0,    0  },   // cell# 405
+    {  0,    0  },   // cell# 406
+    {  0,    0  },   // cell# 407
+    {  0,    0  },   // cell# 408
+    {  0,    0  },   // cell# 409
+    {  0,    0  },   // cell# 410
+    {  0,    0  },   // cell# 411
+    {  0,    0  },   // cell# 412
+    {  0,    0  },   // cell# 413
+    {  0,    0  },   // cell# 414
+    {  0,    0  },   // cell# 415
+    {  0,    0  },   // cell# 416
+    {  0,    0  },   // cell# 417
+};
+// clang-format on
+
+const JumpTableCell* gp_SMJumpTableCells = g_SMJumpTableCells;
diff --git a/src/jit/smopcode.def b/src/jit/smopcode.def
new file mode 100644
index 0000000000..aa918601c2
--- /dev/null
+++ b/src/jit/smopcode.def
@@ -0,0 +1,205 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*******************************************************************************************
+ **                                                                                       **
+ ** Auto-generated file. Do NOT modify!                                                   **
+ **                                                                                       **
+ ** smopcode.def - Opcodes used in the state machine in JIT.                              **
+ **                                                                                       **
+ ** To generate this file, run "makeSmOpcodeDef.pl > smopcode.def"                        **
+ **                                                                                       **
+ *******************************************************************************************/
+
+//
+//  SM opcode name                  SM opcode string
+// -------------------------------------------------------------------------------------------
+SMOPDEF(SM_NOSHOW,                 "noshow")   // 0
+SMOPDEF(SM_LDARG_0,                 "ldarg.0")   // 1
+SMOPDEF(SM_LDARG_1,                 "ldarg.1")   // 2
+SMOPDEF(SM_LDARG_2,                 "ldarg.2")   // 3
+SMOPDEF(SM_LDARG_3,                 "ldarg.3")   // 4
+SMOPDEF(SM_LDLOC_0,                 "ldloc.0")   // 5
+SMOPDEF(SM_LDLOC_1,                 "ldloc.1")   // 6
+SMOPDEF(SM_LDLOC_2,                 "ldloc.2")   // 7
+SMOPDEF(SM_LDLOC_3,                 "ldloc.3")   // 8
+SMOPDEF(SM_STLOC_0,                 "stloc.0")   // 9
+SMOPDEF(SM_STLOC_1,                 "stloc.1")   // 10
+SMOPDEF(SM_STLOC_2,                 "stloc.2")   // 11
+SMOPDEF(SM_STLOC_3,                 "stloc.3")   // 12
+SMOPDEF(SM_LDARG_S,                 "ldarg.s")   // 13
+SMOPDEF(SM_LDARGA_S,                 "ldarga.s")   // 14
+SMOPDEF(SM_STARG_S,                 "starg.s")   // 15
+SMOPDEF(SM_LDLOC_S,                 "ldloc.s")   // 16
+SMOPDEF(SM_LDLOCA_S,                 "ldloca.s")   // 17
+SMOPDEF(SM_STLOC_S,                 "stloc.s")   // 18
+SMOPDEF(SM_LDNULL,                 "ldnull")   // 19
+SMOPDEF(SM_LDC_I4_M1,                 "ldc.i4.m1")   // 20
+SMOPDEF(SM_LDC_I4_0,                 "ldc.i4.0")   // 21
+SMOPDEF(SM_LDC_I4_1,                 "ldc.i4.1")   // 22
+SMOPDEF(SM_LDC_I4_2,                 "ldc.i4.2")   // 23
+SMOPDEF(SM_LDC_I4_3,                 "ldc.i4.3")   // 24
+SMOPDEF(SM_LDC_I4_4,                 "ldc.i4.4")   // 25
+SMOPDEF(SM_LDC_I4_5,                 "ldc.i4.5")   // 26
+SMOPDEF(SM_LDC_I4_6,                 "ldc.i4.6")   // 27
+SMOPDEF(SM_LDC_I4_7,                 "ldc.i4.7")   // 28
+SMOPDEF(SM_LDC_I4_8,                 "ldc.i4.8")   // 29
+SMOPDEF(SM_LDC_I4_S,                 "ldc.i4.s")   // 30
+SMOPDEF(SM_LDC_I4,                 "ldc.i4")   // 31
+SMOPDEF(SM_LDC_I8,                 "ldc.i8")   // 32
+SMOPDEF(SM_LDC_R4,                 "ldc.r4")   // 33
+SMOPDEF(SM_LDC_R8,                 "ldc.r8")   // 34
+SMOPDEF(SM_UNUSED,                 "unused")   // 35
+SMOPDEF(SM_DUP,                 "dup")   // 36
+SMOPDEF(SM_POP,                 "pop")   // 37
+SMOPDEF(SM_CALL,                 "call")   // 38
+SMOPDEF(SM_CALLI,                 "calli")   // 39
+SMOPDEF(SM_RET,                 "ret")   // 40
+SMOPDEF(SM_BR_S,                 "br.s")   // 41
+SMOPDEF(SM_BRFALSE_S,                 "brfalse.s")   // 42
+SMOPDEF(SM_BRTRUE_S,                 "brtrue.s")   // 43
+SMOPDEF(SM_BEQ_S,                 "beq.s")   // 44
+SMOPDEF(SM_BGE_S,                 "bge.s")   // 45
+SMOPDEF(SM_BGT_S,                 "bgt.s")   // 46
+SMOPDEF(SM_BLE_S,                 "ble.s")   // 47
+SMOPDEF(SM_BLT_S,                 "blt.s")   // 48
+SMOPDEF(SM_BNE_UN_S,                 "bne.un.s")   // 49
+SMOPDEF(SM_BGE_UN_S,                 "bge.un.s")   // 50
+SMOPDEF(SM_BGT_UN_S,                 "bgt.un.s")   // 51
+SMOPDEF(SM_BLE_UN_S,                 "ble.un.s")   // 52
+SMOPDEF(SM_BLT_UN_S,                 "blt.un.s")   // 53
+SMOPDEF(SM_LONG_BRANCH,                 "long.branch")   // 54
+SMOPDEF(SM_SWITCH,                 "switch")   // 55
+SMOPDEF(SM_LDIND_I1,                 "ldind.i1")   // 56
+SMOPDEF(SM_LDIND_U1,                 "ldind.u1")   // 57
+SMOPDEF(SM_LDIND_I2,                 "ldind.i2")   // 58
+SMOPDEF(SM_LDIND_U2,                 "ldind.u2")   // 59
+SMOPDEF(SM_LDIND_I4,                 "ldind.i4")   // 60
+SMOPDEF(SM_LDIND_U4,                 "ldind.u4")   // 61
+SMOPDEF(SM_LDIND_I8,                 "ldind.i8")   // 62
+SMOPDEF(SM_LDIND_I,                 "ldind.i")   // 63
+SMOPDEF(SM_LDIND_R4,                 "ldind.r4")   // 64
+SMOPDEF(SM_LDIND_R8,                 "ldind.r8")   // 65
+SMOPDEF(SM_LDIND_REF,                 "ldind.ref")   // 66
+SMOPDEF(SM_STIND_REF,                 "stind.ref")   // 67
+SMOPDEF(SM_STIND_I1,                 "stind.i1")   // 68
+SMOPDEF(SM_STIND_I2,                 "stind.i2")   // 69
+SMOPDEF(SM_STIND_I4,                 "stind.i4")   // 70
+SMOPDEF(SM_STIND_I8,                 "stind.i8")   // 71
+SMOPDEF(SM_STIND_R4,                 "stind.r4")   // 72
+SMOPDEF(SM_STIND_R8,                 "stind.r8")   // 73
+SMOPDEF(SM_ADD,                 "add")   // 74
+SMOPDEF(SM_SUB,                 "sub")   // 75
+SMOPDEF(SM_MUL,                 "mul")   // 76
+SMOPDEF(SM_DIV,                 "div")   // 77
+SMOPDEF(SM_DIV_UN,                 "div.un")   // 78
+SMOPDEF(SM_REM,                 "rem")   // 79
+SMOPDEF(SM_REM_UN,                 "rem.un")   // 80
+SMOPDEF(SM_AND,                 "and")   // 81
+SMOPDEF(SM_OR,                 "or")   // 82
+SMOPDEF(SM_XOR,                 "xor")   // 83
+SMOPDEF(SM_SHL,                 "shl")   // 84
+SMOPDEF(SM_SHR,                 "shr")   // 85
+SMOPDEF(SM_SHR_UN,                 "shr.un")   // 86
+SMOPDEF(SM_NEG,                 "neg")   // 87
+SMOPDEF(SM_NOT,                 "not")   // 88
+SMOPDEF(SM_CONV_I1,                 "conv.i1")   // 89
+SMOPDEF(SM_CONV_I2,                 "conv.i2")   // 90
+SMOPDEF(SM_CONV_I4,                 "conv.i4")   // 91
+SMOPDEF(SM_CONV_I8,                 "conv.i8")   // 92
+SMOPDEF(SM_CONV_R4,                 "conv.r4")   // 93
+SMOPDEF(SM_CONV_R8,                 "conv.r8")   // 94
+SMOPDEF(SM_CONV_U4,                 "conv.u4")   // 95
+SMOPDEF(SM_CONV_U8,                 "conv.u8")   // 96
+SMOPDEF(SM_CALLVIRT,                 "callvirt")   // 97
+SMOPDEF(SM_CPOBJ,                 "cpobj")   // 98
+SMOPDEF(SM_LDOBJ,                 "ldobj")   // 99
+SMOPDEF(SM_LDSTR,                 "ldstr")   // 100
+SMOPDEF(SM_NEWOBJ,                 "newobj")   // 101
+SMOPDEF(SM_CASTCLASS,                 "castclass")   // 102
+SMOPDEF(SM_ISINST,                 "isinst")   // 103
+SMOPDEF(SM_CONV_R_UN,                 "conv.r.un")   // 104
+SMOPDEF(SM_UNBOX,                 "unbox")   // 105
+SMOPDEF(SM_THROW,                 "throw")   // 106
+SMOPDEF(SM_LDFLD,                 "ldfld")   // 107
+SMOPDEF(SM_LDFLDA,                 "ldflda")   // 108
+SMOPDEF(SM_STFLD,                 "stfld")   // 109
+SMOPDEF(SM_LDSFLD,                 "ldsfld")   // 110
+SMOPDEF(SM_LDSFLDA,                 "ldsflda")   // 111
+SMOPDEF(SM_STSFLD,                 "stsfld")   // 112
+SMOPDEF(SM_STOBJ,                 "stobj")   // 113
+SMOPDEF(SM_OVF_NOTYPE_UN,                 "ovf.notype.un")   // 114
+SMOPDEF(SM_BOX,                 "box")   // 115
+SMOPDEF(SM_NEWARR,                 "newarr")   // 116
+SMOPDEF(SM_LDLEN,                 "ldlen")   // 117
+SMOPDEF(SM_LDELEMA,                 "ldelema")   // 118
+SMOPDEF(SM_LDELEM_I1,                 "ldelem.i1")   // 119
+SMOPDEF(SM_LDELEM_U1,                 "ldelem.u1")   // 120
+SMOPDEF(SM_LDELEM_I2,                 "ldelem.i2")   // 121
+SMOPDEF(SM_LDELEM_U2,                 "ldelem.u2")   // 122
+SMOPDEF(SM_LDELEM_I4,                 "ldelem.i4")   // 123
+SMOPDEF(SM_LDELEM_U4,                 "ldelem.u4")   // 124
+SMOPDEF(SM_LDELEM_I8,                 "ldelem.i8")   // 125
+SMOPDEF(SM_LDELEM_I,                 "ldelem.i")   // 126
+SMOPDEF(SM_LDELEM_R4,                 "ldelem.r4")   // 127
+SMOPDEF(SM_LDELEM_R8,                 "ldelem.r8")   // 128
+SMOPDEF(SM_LDELEM_REF,                 "ldelem.ref")   // 129
+SMOPDEF(SM_STELEM_I,                 "stelem.i")   // 130
+SMOPDEF(SM_STELEM_I1,                 "stelem.i1")   // 131
+SMOPDEF(SM_STELEM_I2,                 "stelem.i2")   // 132
+SMOPDEF(SM_STELEM_I4,                 "stelem.i4")   // 133
+SMOPDEF(SM_STELEM_I8,                 "stelem.i8")   // 134
+SMOPDEF(SM_STELEM_R4,                 "stelem.r4")   // 135
+SMOPDEF(SM_STELEM_R8,                 "stelem.r8")   // 136
+SMOPDEF(SM_STELEM_REF,                 "stelem.ref")   // 137
+SMOPDEF(SM_LDELEM,                 "ldelem")   // 138
+SMOPDEF(SM_STELEM,                 "stelem")   // 139
+SMOPDEF(SM_UNBOX_ANY,                 "unbox.any")   // 140
+SMOPDEF(SM_CONV_OVF_I1,                 "conv.ovf.i1")   // 141
+SMOPDEF(SM_CONV_OVF_U1,                 "conv.ovf.u1")   // 142
+SMOPDEF(SM_CONV_OVF_I2,                 "conv.ovf.i2")   // 143
+SMOPDEF(SM_CONV_OVF_U2,                 "conv.ovf.u2")   // 144
+SMOPDEF(SM_CONV_OVF_I4,                 "conv.ovf.i4")   // 145
+SMOPDEF(SM_CONV_OVF_U4,                 "conv.ovf.u4")   // 146
+SMOPDEF(SM_CONV_OVF_I8,                 "conv.ovf.i8")   // 147
+SMOPDEF(SM_CONV_OVF_U8,                 "conv.ovf.u8")   // 148
+SMOPDEF(SM_REFANYVAL,                 "refanyval")   // 149
+SMOPDEF(SM_CKFINITE,                 "ckfinite")   // 150
+SMOPDEF(SM_MKREFANY,                 "mkrefany")   // 151
+SMOPDEF(SM_LDTOKEN,                 "ldtoken")   // 152
+SMOPDEF(SM_CONV_U2,                 "conv.u2")   // 153
+SMOPDEF(SM_CONV_U1,                 "conv.u1")   // 154
+SMOPDEF(SM_CONV_I,                 "conv.i")   // 155
+SMOPDEF(SM_CONV_OVF_I,                 "conv.ovf.i")   // 156
+SMOPDEF(SM_CONV_OVF_U,                 "conv.ovf.u")   // 157
+SMOPDEF(SM_ADD_OVF,                 "add.ovf")   // 158
+SMOPDEF(SM_MUL_OVF,                 "mul.ovf")   // 159
+SMOPDEF(SM_SUB_OVF,                 "sub.ovf")   // 160
+SMOPDEF(SM_LEAVE_S,                 "leave.s")   // 161
+SMOPDEF(SM_STIND_I,                 "stind.i")   // 162
+SMOPDEF(SM_CONV_U,                 "conv.u")   // 163
+SMOPDEF(SM_PREFIX_N,                 "prefix.n")   // 164
+SMOPDEF(SM_ARGLIST,                 "arglist")   // 165
+SMOPDEF(SM_CEQ,                 "ceq")   // 166
+SMOPDEF(SM_CGT,                 "cgt")   // 167
+SMOPDEF(SM_CGT_UN,                 "cgt.un")   // 168
+SMOPDEF(SM_CLT,                 "clt")   // 169
+SMOPDEF(SM_CLT_UN,                 "clt.un")   // 170
+SMOPDEF(SM_LDFTN,                 "ldftn")   // 171
+SMOPDEF(SM_LDVIRTFTN,                 "ldvirtftn")   // 172
+SMOPDEF(SM_LONG_LOC_ARG,                 "long.loc.arg")   // 173
+SMOPDEF(SM_LOCALLOC,                 "localloc")   // 174
+SMOPDEF(SM_UNALIGNED,                 "unaligned")   // 175
+SMOPDEF(SM_VOLATILE,                 "volatile")   // 176
+SMOPDEF(SM_TAILCALL,                 "tailcall")   // 177
+SMOPDEF(SM_INITOBJ,                 "initobj")   // 178
+SMOPDEF(SM_CONSTRAINED,                 "constrained")   // 179
+SMOPDEF(SM_CPBLK,                 "cpblk")   // 180
+SMOPDEF(SM_INITBLK,                 "initblk")   // 181
+SMOPDEF(SM_RETHROW,                 "rethrow")   // 182
+SMOPDEF(SM_SIZEOF,                 "sizeof")   // 183
+SMOPDEF(SM_REFANYTYPE,                 "refanytype")   // 184
+SMOPDEF(SM_READONLY,                 "readonly")   // 185
+SMOPDEF(SM_LDARGA_S_NORMED,                 "ldarga.s.normed")   // 186
+SMOPDEF(SM_LDLOCA_S_NORMED,                 "ldloca.s.normed")   // 187
diff --git a/src/jit/smopcodemap.def b/src/jit/smopcodemap.def
new file mode 100644
index 0000000000..7b2f71fe6f
--- /dev/null
+++ b/src/jit/smopcodemap.def
@@ -0,0 +1,323 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*******************************************************************************************
+ **                                                                                       **
+ ** OpcodeMap.def - Mapping between opcodes in EE to opcodes in the state machine in JIT. **
+ **                                                                                       **
+ *******************************************************************************************/
+
+//                                                                            
+//  EE opcode name                        EE opcode string    State machine opcode name
+// -------------------------------------------------------------------------------------------------------------------------------------------------------
+OPCODEMAP(CEE_NOP,                        "nop",              SM_NOSHOW)
+OPCODEMAP(CEE_BREAK,                      "break",            SM_NOSHOW)
+OPCODEMAP(CEE_LDARG_0,                    "ldarg.0",          SM_LDARG_0)
+OPCODEMAP(CEE_LDARG_1,                    "ldarg.1",          SM_LDARG_1)
+OPCODEMAP(CEE_LDARG_2,                    "ldarg.2",          SM_LDARG_2)
+OPCODEMAP(CEE_LDARG_3,                    "ldarg.3",          SM_LDARG_3)
+OPCODEMAP(CEE_LDLOC_0,                    "ldloc.0",          SM_LDLOC_0)
+OPCODEMAP(CEE_LDLOC_1,                    "ldloc.1",          SM_LDLOC_1)
+OPCODEMAP(CEE_LDLOC_2,                    "ldloc.2",          SM_LDLOC_2)
+OPCODEMAP(CEE_LDLOC_3,                    "ldloc.3",          SM_LDLOC_3)
+OPCODEMAP(CEE_STLOC_0,                    "stloc.0",          SM_STLOC_0)
+OPCODEMAP(CEE_STLOC_1,                    "stloc.1",          SM_STLOC_1)
+OPCODEMAP(CEE_STLOC_2,                    "stloc.2",          SM_STLOC_2)
+OPCODEMAP(CEE_STLOC_3,                    "stloc.3",          SM_STLOC_3)
+OPCODEMAP(CEE_LDARG_S,                    "ldarg.s",          SM_LDARG_S)
+OPCODEMAP(CEE_LDARGA_S,                   "ldarga.s",         SM_LDARGA_S)
+OPCODEMAP(CEE_STARG_S,                    "starg.s",          SM_STARG_S)
+OPCODEMAP(CEE_LDLOC_S,                    "ldloc.s",          SM_LDLOC_S)
+OPCODEMAP(CEE_LDLOCA_S,                   "ldloca.s",         SM_LDLOCA_S)
+OPCODEMAP(CEE_STLOC_S,                    "stloc.s",          SM_STLOC_S)
+OPCODEMAP(CEE_LDNULL,                     "ldnull",           SM_LDNULL)
+OPCODEMAP(CEE_LDC_I4_M1,                  "ldc.i4.m1",        SM_LDC_I4_M1)
+OPCODEMAP(CEE_LDC_I4_0,                   "ldc.i4.0",         SM_LDC_I4_0)
+OPCODEMAP(CEE_LDC_I4_1,                   "ldc.i4.1",         SM_LDC_I4_1)
+OPCODEMAP(CEE_LDC_I4_2,                   "ldc.i4.2",         SM_LDC_I4_2)
+OPCODEMAP(CEE_LDC_I4_3,                   "ldc.i4.3",         SM_LDC_I4_3)
+OPCODEMAP(CEE_LDC_I4_4,                   "ldc.i4.4",         SM_LDC_I4_4)
+OPCODEMAP(CEE_LDC_I4_5,                   "ldc.i4.5",         SM_LDC_I4_5)
+OPCODEMAP(CEE_LDC_I4_6,                   "ldc.i4.6",         SM_LDC_I4_6)
+OPCODEMAP(CEE_LDC_I4_7,                   "ldc.i4.7",         SM_LDC_I4_7)
+OPCODEMAP(CEE_LDC_I4_8,                   "ldc.i4.8",         SM_LDC_I4_8)
+OPCODEMAP(CEE_LDC_I4_S,                   "ldc.i4.s",         SM_LDC_I4_S)
+OPCODEMAP(CEE_LDC_I4,                     "ldc.i4",           SM_LDC_I4)
+OPCODEMAP(CEE_LDC_I8,                     "ldc.i8",           SM_LDC_I8)
+OPCODEMAP(CEE_LDC_R4,                     "ldc.r4",           SM_LDC_R4)
+OPCODEMAP(CEE_LDC_R8,                     "ldc.r8",           SM_LDC_R8)
+OPCODEMAP(CEE_UNUSED49,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_DUP,                        "dup",              SM_DUP)
+OPCODEMAP(CEE_POP,                        "pop",              SM_POP)
+OPCODEMAP(CEE_JMP,                        "jmp",              SM_NOSHOW)
+OPCODEMAP(CEE_CALL,                       "call",             SM_CALL)
+OPCODEMAP(CEE_CALLI,                      "calli",            SM_CALLI)
+OPCODEMAP(CEE_RET,                        "ret",              SM_RET)
+OPCODEMAP(CEE_BR_S,                       "br.s",             SM_BR_S)
+OPCODEMAP(CEE_BRFALSE_S,                  "brfalse.s",        SM_BRFALSE_S)
+OPCODEMAP(CEE_BRTRUE_S,                   "brtrue.s",         SM_BRTRUE_S)
+OPCODEMAP(CEE_BEQ_S,                      "beq.s",            SM_BEQ_S)
+OPCODEMAP(CEE_BGE_S,                      "bge.s",            SM_BGE_S)
+OPCODEMAP(CEE_BGT_S,                      "bgt.s",            SM_BGT_S)
+OPCODEMAP(CEE_BLE_S,                      "ble.s",            SM_BLE_S)
+OPCODEMAP(CEE_BLT_S,                      "blt.s",            SM_BLT_S)
+OPCODEMAP(CEE_BNE_UN_S,                   "bne.un.s",         SM_BNE_UN_S)
+OPCODEMAP(CEE_BGE_UN_S,                   "bge.un.s",         SM_BGE_UN_S)
+OPCODEMAP(CEE_BGT_UN_S,                   "bgt.un.s",         SM_BGT_UN_S)
+OPCODEMAP(CEE_BLE_UN_S,                   "ble.un.s",         SM_BLE_UN_S)
+OPCODEMAP(CEE_BLT_UN_S,                   "blt.un.s",         SM_BLT_UN_S)
+OPCODEMAP(CEE_BR,                         "br",               SM_LONG_BRANCH)
+OPCODEMAP(CEE_BRFALSE,                    "brfalse",          SM_LONG_BRANCH)
+OPCODEMAP(CEE_BRTRUE,                     "brtrue",           SM_LONG_BRANCH)
+OPCODEMAP(CEE_BEQ,                        "beq",              SM_LONG_BRANCH)
+OPCODEMAP(CEE_BGE,                        "bge",              SM_LONG_BRANCH)
+OPCODEMAP(CEE_BGT,                        "bgt",              SM_LONG_BRANCH)
+OPCODEMAP(CEE_BLE,                        "ble",              SM_LONG_BRANCH)
+OPCODEMAP(CEE_BLT,                        "blt",              SM_LONG_BRANCH)
+OPCODEMAP(CEE_BNE_UN,                     "bne.un",           SM_LONG_BRANCH)
+OPCODEMAP(CEE_BGE_UN,                     "bge.un",           SM_LONG_BRANCH)
+OPCODEMAP(CEE_BGT_UN,                     "bgt.un",           SM_LONG_BRANCH)
+OPCODEMAP(CEE_BLE_UN,                     "ble.un",           SM_LONG_BRANCH)
+OPCODEMAP(CEE_BLT_UN,                     "blt.un",           SM_LONG_BRANCH)
+OPCODEMAP(CEE_SWITCH,                     "switch",           SM_SWITCH)
+OPCODEMAP(CEE_LDIND_I1,                   "ldind.i1",         SM_LDIND_I1)
+OPCODEMAP(CEE_LDIND_U1,                   "ldind.u1",         SM_LDIND_U1)
+OPCODEMAP(CEE_LDIND_I2,                   "ldind.i2",         SM_LDIND_I2)
+OPCODEMAP(CEE_LDIND_U2,                   "ldind.u2",         SM_LDIND_U2)
+OPCODEMAP(CEE_LDIND_I4,                   "ldind.i4",         SM_LDIND_I4)
+OPCODEMAP(CEE_LDIND_U4,                   "ldind.u4",         SM_LDIND_U4)
+OPCODEMAP(CEE_LDIND_I8,                   "ldind.i8",         SM_LDIND_I8)
+OPCODEMAP(CEE_LDIND_I,                    "ldind.i",          SM_LDIND_I)
+OPCODEMAP(CEE_LDIND_R4,                   "ldind.r4",         SM_LDIND_R4)
+OPCODEMAP(CEE_LDIND_R8,                   "ldind.r8",         SM_LDIND_R8)
+OPCODEMAP(CEE_LDIND_REF,                  "ldind.ref",        SM_LDIND_REF)
+OPCODEMAP(CEE_STIND_REF,                  "stind.ref",        SM_STIND_REF)
+OPCODEMAP(CEE_STIND_I1,                   "stind.i1",         SM_STIND_I1)
+OPCODEMAP(CEE_STIND_I2,                   "stind.i2",         SM_STIND_I2)
+OPCODEMAP(CEE_STIND_I4,                   "stind.i4",         SM_STIND_I4)
+OPCODEMAP(CEE_STIND_I8,                   "stind.i8",         SM_STIND_I8)
+OPCODEMAP(CEE_STIND_R4,                   "stind.r4",         SM_STIND_R4)
+OPCODEMAP(CEE_STIND_R8,                   "stind.r8",         SM_STIND_R8)
+OPCODEMAP(CEE_ADD,                        "add",              SM_ADD)
+OPCODEMAP(CEE_SUB,                        "sub",              SM_SUB)
+OPCODEMAP(CEE_MUL,                        "mul",              SM_MUL)
+OPCODEMAP(CEE_DIV,                        "div",              SM_DIV)
+OPCODEMAP(CEE_DIV_UN,                     "div.un",           SM_DIV_UN)
+OPCODEMAP(CEE_REM,                        "rem",              SM_REM)
+OPCODEMAP(CEE_REM_UN,                     "rem.un",           SM_REM_UN)
+OPCODEMAP(CEE_AND,                        "and",              SM_AND)
+OPCODEMAP(CEE_OR,                         "or",               SM_OR)
+OPCODEMAP(CEE_XOR,                        "xor",              SM_XOR)
+OPCODEMAP(CEE_SHL,                        "shl",              SM_SHL)
+OPCODEMAP(CEE_SHR,                        "shr",              SM_SHR)
+OPCODEMAP(CEE_SHR_UN,                     "shr.un",           SM_SHR_UN)
+OPCODEMAP(CEE_NEG,                        "neg",              SM_NEG)
+OPCODEMAP(CEE_NOT,                        "not",              SM_NOT)
+OPCODEMAP(CEE_CONV_I1,                    "conv.i1",          SM_CONV_I1)
+OPCODEMAP(CEE_CONV_I2,                    "conv.i2",          SM_CONV_I2)
+OPCODEMAP(CEE_CONV_I4,                    "conv.i4",          SM_CONV_I4)
+OPCODEMAP(CEE_CONV_I8,                    "conv.i8",          SM_CONV_I8)
+OPCODEMAP(CEE_CONV_R4,                    "conv.r4",          SM_CONV_R4)
+OPCODEMAP(CEE_CONV_R8,                    "conv.r8",          SM_CONV_R8)
+OPCODEMAP(CEE_CONV_U4,                    "conv.u4",          SM_CONV_U4)
+OPCODEMAP(CEE_CONV_U8,                    "conv.u8",          SM_CONV_U8)
+OPCODEMAP(CEE_CALLVIRT,                   "callvirt",         SM_CALLVIRT)
+OPCODEMAP(CEE_CPOBJ,                      "cpobj",            SM_CPOBJ)
+OPCODEMAP(CEE_LDOBJ,                      "ldobj",            SM_LDOBJ)
+OPCODEMAP(CEE_LDSTR,                      "ldstr",            SM_LDSTR)
+OPCODEMAP(CEE_NEWOBJ,                     "newobj",           SM_NEWOBJ)
+OPCODEMAP(CEE_CASTCLASS,                  "castclass",        SM_CASTCLASS)
+OPCODEMAP(CEE_ISINST,                     "isinst",           SM_ISINST)
+OPCODEMAP(CEE_CONV_R_UN,                  "conv.r.un",        SM_CONV_R_UN)
+OPCODEMAP(CEE_UNUSED58,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED1,                    "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNBOX,                      "unbox",            SM_UNBOX)
+OPCODEMAP(CEE_THROW,                      "throw",            SM_THROW)
+OPCODEMAP(CEE_LDFLD,                      "ldfld",            SM_LDFLD)
+OPCODEMAP(CEE_LDFLDA,                     "ldflda",           SM_LDFLDA)
+OPCODEMAP(CEE_STFLD,                      "stfld",            SM_STFLD)
+OPCODEMAP(CEE_LDSFLD,                     "ldsfld",           SM_LDSFLD)
+OPCODEMAP(CEE_LDSFLDA,                    "ldsflda",          SM_LDSFLDA)
+OPCODEMAP(CEE_STSFLD,                     "stsfld",           SM_STSFLD)
+OPCODEMAP(CEE_STOBJ,                      "stobj",            SM_STOBJ)
+OPCODEMAP(CEE_CONV_OVF_I1_UN,             "conv.ovf.i1.un",   SM_OVF_NOTYPE_UN)
+OPCODEMAP(CEE_CONV_OVF_I2_UN,             "conv.ovf.i2.un",   SM_OVF_NOTYPE_UN)
+OPCODEMAP(CEE_CONV_OVF_I4_UN,             "conv.ovf.i4.un",   SM_OVF_NOTYPE_UN)
+OPCODEMAP(CEE_CONV_OVF_I8_UN,             "conv.ovf.i8.un",   SM_OVF_NOTYPE_UN)
+OPCODEMAP(CEE_CONV_OVF_U1_UN,             "conv.ovf.u1.un",   SM_OVF_NOTYPE_UN)
+OPCODEMAP(CEE_CONV_OVF_U2_UN,             "conv.ovf.u2.un",   SM_OVF_NOTYPE_UN)
+OPCODEMAP(CEE_CONV_OVF_U4_UN,             "conv.ovf.u4.un",   SM_OVF_NOTYPE_UN)
+OPCODEMAP(CEE_CONV_OVF_U8_UN,             "conv.ovf.u8.un",   SM_OVF_NOTYPE_UN)
+OPCODEMAP(CEE_CONV_OVF_I_UN,              "conv.ovf.i.un",    SM_OVF_NOTYPE_UN)
+OPCODEMAP(CEE_CONV_OVF_U_UN,              "conv.ovf.u.un",    SM_OVF_NOTYPE_UN)
+OPCODEMAP(CEE_BOX,                        "box",              SM_BOX)
+OPCODEMAP(CEE_NEWARR,                     "newarr",           SM_NEWARR)
+OPCODEMAP(CEE_LDLEN,                      "ldlen",            SM_LDLEN)
+OPCODEMAP(CEE_LDELEMA,                    "ldelema",          SM_LDELEMA)
+OPCODEMAP(CEE_LDELEM_I1,                  "ldelem.i1",        SM_LDELEM_I1)
+OPCODEMAP(CEE_LDELEM_U1,                  "ldelem.u1",        SM_LDELEM_U1)
+OPCODEMAP(CEE_LDELEM_I2,                  "ldelem.i2",        SM_LDELEM_I2)
+OPCODEMAP(CEE_LDELEM_U2,                  "ldelem.u2",        SM_LDELEM_U2)
+OPCODEMAP(CEE_LDELEM_I4,                  "ldelem.i4",        SM_LDELEM_I4)
+OPCODEMAP(CEE_LDELEM_U4,                  "ldelem.u4",        SM_LDELEM_U4)
+OPCODEMAP(CEE_LDELEM_I8,                  "ldelem.i8",        SM_LDELEM_I8)
+OPCODEMAP(CEE_LDELEM_I,                   "ldelem.i",         SM_LDELEM_I)
+OPCODEMAP(CEE_LDELEM_R4,                  "ldelem.r4",        SM_LDELEM_R4)
+OPCODEMAP(CEE_LDELEM_R8,                  "ldelem.r8",        SM_LDELEM_R8)
+OPCODEMAP(CEE_LDELEM_REF,                 "ldelem.ref",       SM_LDELEM_REF)
+OPCODEMAP(CEE_STELEM_I,                   "stelem.i",         SM_STELEM_I)
+OPCODEMAP(CEE_STELEM_I1,                  "stelem.i1",        SM_STELEM_I1)
+OPCODEMAP(CEE_STELEM_I2,                  "stelem.i2",        SM_STELEM_I2)
+OPCODEMAP(CEE_STELEM_I4,                  "stelem.i4",        SM_STELEM_I4)
+OPCODEMAP(CEE_STELEM_I8,                  "stelem.i8",        SM_STELEM_I8)
+OPCODEMAP(CEE_STELEM_R4,                  "stelem.r4",        SM_STELEM_R4)
+OPCODEMAP(CEE_STELEM_R8,                  "stelem.r8",        SM_STELEM_R8)
+OPCODEMAP(CEE_STELEM_REF,                 "stelem.ref",       SM_STELEM_REF)
+OPCODEMAP(CEE_LDELEM,                     "ldelem",           SM_LDELEM)
+OPCODEMAP(CEE_STELEM,                     "stelem",           SM_STELEM)
+OPCODEMAP(CEE_UNBOX_ANY,                  "unbox.any",        SM_UNBOX_ANY)
+OPCODEMAP(CEE_UNUSED5,                    "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED6,                    "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED7,                    "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED8,                    "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED9,                    "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED10,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED11,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED12,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED13,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED14,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED15,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED16,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED17,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_CONV_OVF_I1,                "conv.ovf.i1",      SM_CONV_OVF_I1)
+OPCODEMAP(CEE_CONV_OVF_U1,                "conv.ovf.u1",      SM_CONV_OVF_U1)
+OPCODEMAP(CEE_CONV_OVF_I2,                "conv.ovf.i2",      SM_CONV_OVF_I2)
+OPCODEMAP(CEE_CONV_OVF_U2,                "conv.ovf.u2",      SM_CONV_OVF_U2)
+OPCODEMAP(CEE_CONV_OVF_I4,                "conv.ovf.i4",      SM_CONV_OVF_I4)
+OPCODEMAP(CEE_CONV_OVF_U4,                "conv.ovf.u4",      SM_CONV_OVF_U4)
+OPCODEMAP(CEE_CONV_OVF_I8,                "conv.ovf.i8",      SM_CONV_OVF_I8)
+OPCODEMAP(CEE_CONV_OVF_U8,                "conv.ovf.u8",      SM_CONV_OVF_U8)
+OPCODEMAP(CEE_UNUSED50,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED18,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED19,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED20,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED21,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED22,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED23,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_REFANYVAL,                  "refanyval",        SM_REFANYVAL)
+OPCODEMAP(CEE_CKFINITE,                   "ckfinite",         SM_CKFINITE)
+OPCODEMAP(CEE_UNUSED24,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED25,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_MKREFANY,                   "mkrefany",         SM_MKREFANY)
+OPCODEMAP(CEE_UNUSED59,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED60,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED61,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED62,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED63,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED64,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED65,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED66,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED67,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_LDTOKEN,                    "ldtoken",          SM_LDTOKEN)
+OPCODEMAP(CEE_CONV_U2,                    "conv.u2",          SM_CONV_U2)
+OPCODEMAP(CEE_CONV_U1,                    "conv.u1",          SM_CONV_U1)
+OPCODEMAP(CEE_CONV_I,                     "conv.i",           SM_CONV_I)
+OPCODEMAP(CEE_CONV_OVF_I,                 "conv.ovf.i",       SM_CONV_OVF_I)
+OPCODEMAP(CEE_CONV_OVF_U,                 "conv.ovf.u",       SM_CONV_OVF_U)
+OPCODEMAP(CEE_ADD_OVF,                    "add.ovf",          SM_ADD_OVF)
+OPCODEMAP(CEE_ADD_OVF_UN,                 "add.ovf.un",       SM_OVF_NOTYPE_UN)
+OPCODEMAP(CEE_MUL_OVF,                    "mul.ovf",          SM_MUL_OVF)
+OPCODEMAP(CEE_MUL_OVF_UN,                 "mul.ovf.un",       SM_OVF_NOTYPE_UN)
+OPCODEMAP(CEE_SUB_OVF,                    "sub.ovf",          SM_SUB_OVF)
+OPCODEMAP(CEE_SUB_OVF_UN,                 "sub.ovf.un",       SM_OVF_NOTYPE_UN)
+OPCODEMAP(CEE_ENDFINALLY,                 "endfinally",       SM_NOSHOW)
+OPCODEMAP(CEE_LEAVE,                      "leave",            SM_NOSHOW)
+OPCODEMAP(CEE_LEAVE_S,                    "leave.s",          SM_LEAVE_S)
+OPCODEMAP(CEE_STIND_I,                    "stind.i",          SM_STIND_I)
+OPCODEMAP(CEE_CONV_U,                     "conv.u",           SM_CONV_U)
+OPCODEMAP(CEE_UNUSED26,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED27,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED28,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED29,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED30,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED31,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED32,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED33,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED34,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED35,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED36,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED37,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED38,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED39,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED40,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED41,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED42,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED43,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED44,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED45,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED46,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED47,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED48,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_PREFIX7,                    "prefix7",          SM_PREFIX_N)
+OPCODEMAP(CEE_PREFIX6,                    "prefix6",          SM_PREFIX_N)
+OPCODEMAP(CEE_PREFIX5,                    "prefix5",          SM_PREFIX_N)
+OPCODEMAP(CEE_PREFIX4,                    "prefix4",          SM_PREFIX_N)
+OPCODEMAP(CEE_PREFIX3,                    "prefix3",          SM_PREFIX_N)
+OPCODEMAP(CEE_PREFIX2,                    "prefix2",          SM_PREFIX_N)
+OPCODEMAP(CEE_PREFIX1,                    "prefix1",          SM_PREFIX_N)
+OPCODEMAP(CEE_PREFIXREF,                  "prefixref",        SM_PREFIX_N)
+
+OPCODEMAP(CEE_ARGLIST,                    "arglist",          SM_ARGLIST)
+OPCODEMAP(CEE_CEQ,                        "ceq",              SM_CEQ)
+OPCODEMAP(CEE_CGT,                        "cgt",              SM_CGT)
+OPCODEMAP(CEE_CGT_UN,                     "cgt.un",           SM_CGT_UN)
+OPCODEMAP(CEE_CLT,                        "clt",              SM_CLT)
+OPCODEMAP(CEE_CLT_UN,                     "clt.un",           SM_CLT_UN)
+OPCODEMAP(CEE_LDFTN,                      "ldftn",            SM_LDFTN)
+OPCODEMAP(CEE_LDVIRTFTN,                  "ldvirtftn",        SM_LDVIRTFTN)
+OPCODEMAP(CEE_UNUSED56,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_LDARG,                      "ldarg",            SM_LONG_LOC_ARG)
+OPCODEMAP(CEE_LDARGA,                     "ldarga",           SM_LONG_LOC_ARG)
+OPCODEMAP(CEE_STARG,                      "starg",            SM_LONG_LOC_ARG)
+OPCODEMAP(CEE_LDLOC,                      "ldloc",            SM_LONG_LOC_ARG)
+OPCODEMAP(CEE_LDLOCA,                     "ldloca",           SM_LONG_LOC_ARG)
+OPCODEMAP(CEE_STLOC,                      "stloc",            SM_LONG_LOC_ARG)
+OPCODEMAP(CEE_LOCALLOC,                   "localloc",         SM_LOCALLOC)
+OPCODEMAP(CEE_UNUSED57,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_ENDFILTER,                  "endfilter",        SM_NOSHOW)
+OPCODEMAP(CEE_UNALIGNED,                  "unaligned.",       SM_UNALIGNED)
+OPCODEMAP(CEE_VOLATILE,                   "volatile.",        SM_VOLATILE)
+OPCODEMAP(CEE_TAILCALL,                   "tail.",            SM_TAILCALL)
+OPCODEMAP(CEE_INITOBJ,                    "initobj",          SM_INITOBJ)
+OPCODEMAP(CEE_CONSTRAINED,                "constrained.",     SM_CONSTRAINED)
+OPCODEMAP(CEE_CPBLK,                      "cpblk",            SM_CPBLK)
+OPCODEMAP(CEE_INITBLK,                    "initblk",          SM_INITBLK)
+OPCODEMAP(CEE_UNUSED69,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_RETHROW,                    "rethrow",          SM_RETHROW)
+OPCODEMAP(CEE_UNUSED51,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_SIZEOF,                     "sizeof",           SM_SIZEOF)
+OPCODEMAP(CEE_REFANYTYPE,                 "refanytype",       SM_REFANYTYPE)
+OPCODEMAP(CEE_READONLY,                   "readonly.",        SM_READONLY)
+OPCODEMAP(CEE_UNUSED53,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED54,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED55,                   "unused",           SM_UNUSED)
+OPCODEMAP(CEE_UNUSED70,                   "unused",           SM_UNUSED)
+
+// These are not real opcodes, but they are handy internally in the EE
+
+OPCODEMAP(CEE_ILLEGAL,                    "illegal",          SM_UNUSED)
+OPCODEMAP(CEE_MACRO_END,                  "endmac",           SM_UNUSED)
+OPCODEMAP(CEE_CODE_LABEL,                 "codelabel",        SM_UNUSED)
+
+// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+// 
+// New opcodes added just for the state machine.
+// 
+// Do NOT uncomment or delete the following lines. 
+// They are there for makeSmOpcodeDef.pl to automatically generate smopcode.def
+// that contains these new SM_ opcodes.
+//
+// OPCODEMAP(CEE_DUMMY,                   "ldarga.s(normed)", SM_LDARGA_S_NORMED)
+// OPCODEMAP(CEE_DUMMY,                   "ldloca.s(normed)", SM_LDLOCA_S_NORMED)
+
diff --git a/src/jit/smopenum.h b/src/jit/smopenum.h
new file mode 100644
index 0000000000..978bbc2c3b
--- /dev/null
+++ b/src/jit/smopenum.h
@@ -0,0 +1,17 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef __smopenum_h__
+#define __smopenum_h__
+
+typedef enum smopcode_t {
+#define SMOPDEF(smname, string) smname,
+#include "smopcode.def"
+#undef SMOPDEF
+
+    SM_COUNT, /* number of state machine opcodes */
+
+} SM_OPCODE;
+
+#endif /* __smopenum_h__ */
diff --git a/src/jit/smweights.cpp b/src/jit/smweights.cpp
new file mode 100644
index 0000000000..f93d739b61
--- /dev/null
+++ b/src/jit/smweights.cpp
@@ -0,0 +1,274 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+//
+//   Automatically generated code. DO NOT MODIFY!
+//   To generate this file, do
+//        "WeightsArrayGen.pl matrix.txt results.txt > SMWeights.cpp"
+//
+// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+#include "jitpch.h"
+
+#define DEFAULT_WEIGHT_VALUE 65 // This is the average of all the weights.
+
+#define NA 9999
+
+const short g_StateWeights[] = {
+    NA,                   // state 0
+    NA,                   // state 1
+    DEFAULT_WEIGHT_VALUE, // state 2 [noshow]
+    10,                   // state 3 [ldarg.0]
+    16,                   // state 4 [ldarg.1]
+    35,                   // state 5 [ldarg.2]
+    28,                   // state 6 [ldarg.3]
+    12,                   // state 7 [ldloc.0]
+    9,                    // state 8 [ldloc.1]
+    22,                   // state 9 [ldloc.2]
+    24,                   // state 10 [ldloc.3]
+    6,                    // state 11 [stloc.0]
+    34,                   // state 12 [stloc.1]
+    4,                    // state 13 [stloc.2]
+    49,                   // state 14 [stloc.3]
+    32,                   // state 15 [ldarg.s]
+    77,                   // state 16 [ldarga.s]
+    21,                   // state 17 [starg.s]
+    32,                   // state 18 [ldloc.s]
+    61,                   // state 19 [ldloca.s]
+    -45,                  // state 20 [stloc.s]
+    7,                    // state 21 [ldnull]
+    22,                   // state 22 [ldc.i4.m1]
+    15,                   // state 23 [ldc.i4.0]
+    28,                   // state 24 [ldc.i4.1]
+    34,                   // state 25 [ldc.i4.2]
+    -6,                   // state 26 [ldc.i4.3]
+    20,                   // state 27 [ldc.i4.4]
+    4,                    // state 28 [ldc.i4.5]
+    10,                   // state 29 [ldc.i4.6]
+    56,                   // state 30 [ldc.i4.7]
+    42,                   // state 31 [ldc.i4.8]
+    41,                   // state 32 [ldc.i4.s]
+    38,                   // state 33 [ldc.i4]
+    160,                  // state 34 [ldc.i8]
+    33,                   // state 35 [ldc.r4]
+    113,                  // state 36 [ldc.r8]
+    DEFAULT_WEIGHT_VALUE, // state 37 [unused]
+    11,                   // state 38 [dup]
+    -24,                  // state 39 [pop]
+    79,                   // state 40 [call]
+    DEFAULT_WEIGHT_VALUE, // state 41 [calli]
+    19,                   // state 42 [ret]
+    44,                   // state 43 [br.s]
+    27,                   // state 44 [brfalse.s]
+    25,                   // state 45 [brtrue.s]
+    6,                    // state 46 [beq.s]
+    20,                   // state 47 [bge.s]
+    33,                   // state 48 [bgt.s]
+    53,                   // state 49 [ble.s]
+    28,                   // state 50 [blt.s]
+    12,                   // state 51 [bne.un.s]
+    85,                   // state 52 [bge.un.s]
+    -52,                  // state 53 [bgt.un.s]
+    147,                  // state 54 [ble.un.s]
+    -63,                  // state 55 [blt.un.s]
+    DEFAULT_WEIGHT_VALUE, // state 56 [long.branch]
+    116,                  // state 57 [switch]
+    -19,                  // state 58 [ldind.i1]
+    17,                   // state 59 [ldind.u1]
+    -18,                  // state 60 [ldind.i2]
+    10,                   // state 61 [ldind.u2]
+    -11,                  // state 62 [ldind.i4]
+    -33,                  // state 63 [ldind.u4]
+    41,                   // state 64 [ldind.i8]
+    -110,                 // state 65 [ldind.i]
+    31,                   // state 66 [ldind.r4]
+    45,                   // state 67 [ldind.r8]
+    1,                    // state 68 [ldind.ref]
+    60,                   // state 69 [stind.ref]
+    36,                   // state 70 [stind.i1]
+    40,                   // state 71 [stind.i2]
+    11,                   // state 72 [stind.i4]
+    84,                   // state 73 [stind.i8]
+    50,                   // state 74 [stind.r4]
+    73,                   // state 75 [stind.r8]
+    -12,                  // state 76 [add]
+    -15,                  // state 77 [sub]
+    -9,                   // state 78 [mul]
+    35,                   // state 79 [div]
+    89,                   // state 80 [div.un]
+    89,                   // state 81 [rem]
+    82,                   // state 82 [rem.un]
+    -5,                   // state 83 [and]
+    -7,                   // state 84 [or]
+    35,                   // state 85 [xor]
+    0,                    // state 86 [shl]
+    17,                   // state 87 [shr]
+    27,                   // state 88 [shr.un]
+    58,                   // state 89 [neg]
+    19,                   // state 90 [not]
+    78,                   // state 91 [conv.i1]
+    54,                   // state 92 [conv.i2]
+    2,                    // state 93 [conv.i4]
+    99,                   // state 94 [conv.i8]
+    273,                  // state 95 [conv.r4]
+    197,                  // state 96 [conv.r8]
+    45,                   // state 97 [conv.u4]
+    55,                   // state 98 [conv.u8]
+    83,                   // state 99 [callvirt]
+    DEFAULT_WEIGHT_VALUE, // state 100 [cpobj]
+    29,                   // state 101 [ldobj]
+    66,                   // state 102 [ldstr]
+    227,                  // state 103 [newobj]
+    261,                  // state 104 [castclass]
+    166,                  // state 105 [isinst]
+    209,                  // state 106 [conv.r.un]
+    DEFAULT_WEIGHT_VALUE, // state 107 [unbox]
+    210,                  // state 108 [throw]
+    18,                   // state 109 [ldfld]
+    17,                   // state 110 [ldflda]
+    31,                   // state 111 [stfld]
+    159,                  // state 112 [ldsfld]
+    177,                  // state 113 [ldsflda]
+    125,                  // state 114 [stsfld]
+    36,                   // state 115 [stobj]
+    148,                  // state 116 [ovf.notype.un]
+    247,                  // state 117 [box]
+    152,                  // state 118 [newarr]
+    7,                    // state 119 [ldlen]
+    145,                  // state 120 [ldelema]
+    103,                  // state 121 [ldelem.i1]
+    91,                   // state 122 [ldelem.u1]
+    267,                  // state 123 [ldelem.i2]
+    148,                  // state 124 [ldelem.u2]
+    92,                   // state 125 [ldelem.i4]
+    213,                  // state 126 [ldelem.u4]
+    223,                  // state 127 [ldelem.i8]
+    DEFAULT_WEIGHT_VALUE, // state 128 [ldelem.i]
+    DEFAULT_WEIGHT_VALUE, // state 129 [ldelem.r4]
+    549,                  // state 130 [ldelem.r8]
+    81,                   // state 131 [ldelem.ref]
+    DEFAULT_WEIGHT_VALUE, // state 132 [stelem.i]
+    14,                   // state 133 [stelem.i1]
+    23,                   // state 134 [stelem.i2]
+    66,                   // state 135 [stelem.i4]
+    254,                  // state 136 [stelem.i8]
+    DEFAULT_WEIGHT_VALUE, // state 137 [stelem.r4]
+    DEFAULT_WEIGHT_VALUE, // state 138 [stelem.r8]
+    94,                   // state 139 [stelem.ref]
+    DEFAULT_WEIGHT_VALUE, // state 140 [ldelem]
+    DEFAULT_WEIGHT_VALUE, // state 141 [stelem]
+    274,                  // state 142 [unbox.any]
+    DEFAULT_WEIGHT_VALUE, // state 143 [conv.ovf.i1]
+    DEFAULT_WEIGHT_VALUE, // state 144 [conv.ovf.u1]
+    DEFAULT_WEIGHT_VALUE, // state 145 [conv.ovf.i2]
+    DEFAULT_WEIGHT_VALUE, // state 146 [conv.ovf.u2]
+    242,                  // state 147 [conv.ovf.i4]
+    DEFAULT_WEIGHT_VALUE, // state 148 [conv.ovf.u4]
+    293,                  // state 149 [conv.ovf.i8]
+    293,                  // state 150 [conv.ovf.u8]
+    DEFAULT_WEIGHT_VALUE, // state 151 [refanyval]
+    DEFAULT_WEIGHT_VALUE, // state 152 [ckfinite]
+    -17,                  // state 153 [mkrefany]
+    32,                   // state 154 [ldtoken]
+    25,                   // state 155 [conv.u2]
+    50,                   // state 156 [conv.u1]
+    -0,                   // state 157 [conv.i]
+    178,                  // state 158 [conv.ovf.i]
+    DEFAULT_WEIGHT_VALUE, // state 159 [conv.ovf.u]
+    DEFAULT_WEIGHT_VALUE, // state 160 [add.ovf]
+    DEFAULT_WEIGHT_VALUE, // state 161 [mul.ovf]
+    DEFAULT_WEIGHT_VALUE, // state 162 [sub.ovf]
+    -17,                  // state 163 [leave.s]
+    182,                  // state 164 [stind.i]
+    -36,                  // state 165 [conv.u]
+    DEFAULT_WEIGHT_VALUE, // state 166 [prefix.n]
+    120,                  // state 167 [arglist]
+    20,                   // state 168 [ceq]
+    -1,                   // state 169 [cgt]
+    47,                   // state 170 [cgt.un]
+    26,                   // state 171 [clt]
+    85,                   // state 172 [clt.un]
+    102,                  // state 173 [ldftn]
+    234,                  // state 174 [ldvirtftn]
+    DEFAULT_WEIGHT_VALUE, // state 175 [long.loc.arg]
+    347,                  // state 176 [localloc]
+    DEFAULT_WEIGHT_VALUE, // state 177 [unaligned]
+    -44,                  // state 178 [volatile]
+    DEFAULT_WEIGHT_VALUE, // state 179 [tailcall]
+    55,                   // state 180 [initobj]
+    DEFAULT_WEIGHT_VALUE, // state 181 [constrained]
+    DEFAULT_WEIGHT_VALUE, // state 182 [cpblk]
+    DEFAULT_WEIGHT_VALUE, // state 183 [initblk]
+    DEFAULT_WEIGHT_VALUE, // state 184 [rethrow]
+    38,                   // state 185 [sizeof]
+    -68,                  // state 186 [refanytype]
+    DEFAULT_WEIGHT_VALUE, // state 187 [readonly]
+    55,                   // state 188 [ldarga.s.normed]
+    35,                   // state 189 [ldloca.s.normed]
+    161,                  // state 190 [constrained -> callvirt]
+    31,                   // state 191 [ldarg.0 -> ldfld]
+    29,                   // state 192 [ldarg.1 -> ldfld]
+    22,                   // state 193 [ldarg.2 -> ldfld]
+    321,                  // state 194 [ldarg.3 -> ldfld]
+    46,                   // state 195 [ldarga.s -> ldfld]
+    8,                    // state 196 [ldloca.s -> ldfld]
+    19,                   // state 197 [ldarga.s.normed -> ldfld]
+    -35,                  // state 198 [ldloca.s.normed -> ldfld]
+    20,                   // state 199 [stloc.0 -> ldloc.0]
+    -7,                   // state 200 [stloc.1 -> ldloc.1]
+    -10,                  // state 201 [stloc.2 -> ldloc.2]
+    -4,                   // state 202 [stloc.3 -> ldloc.3]
+    DEFAULT_WEIGHT_VALUE, // state 203 [ldc.r4 -> add]
+    DEFAULT_WEIGHT_VALUE, // state 204 [ldc.r4 -> sub]
+    DEFAULT_WEIGHT_VALUE, // state 205 [ldc.r4 -> mul]
+    DEFAULT_WEIGHT_VALUE, // state 206 [ldc.r4 -> div]
+    52,                   // state 207 [ldc.r8 -> add]
+    DEFAULT_WEIGHT_VALUE, // state 208 [ldc.r8 -> sub]
+    -169,                 // state 209 [ldc.r8 -> mul]
+    -17,                  // state 210 [ldc.r8 -> div]
+    DEFAULT_WEIGHT_VALUE, // state 211 [conv.r4 -> add]
+    DEFAULT_WEIGHT_VALUE, // state 212 [conv.r4 -> sub]
+    DEFAULT_WEIGHT_VALUE, // state 213 [conv.r4 -> mul]
+    DEFAULT_WEIGHT_VALUE, // state 214 [conv.r4 -> div]
+    358,                  // state 215 [conv.r8 -> mul]
+    DEFAULT_WEIGHT_VALUE, // state 216 [conv.r8 -> div]
+    NA,                   // state 217
+    32,                   // state 218 [ldarg.0 -> ldc.i4.0 -> stfld]
+    NA,                   // state 219
+    DEFAULT_WEIGHT_VALUE, // state 220 [ldarg.0 -> ldc.r4 -> stfld]
+    NA,                   // state 221
+    38,                   // state 222 [ldarg.0 -> ldc.r8 -> stfld]
+    NA,                   // state 223
+    NA,                   // state 224
+    64,                   // state 225 [ldarg.0 -> ldarg.1 -> ldfld -> stfld]
+    69,                   // state 226 [ldarg.0 -> ldarg.1 -> stfld]
+    NA,                   // state 227
+    98,                   // state 228 [ldarg.0 -> ldarg.2 -> stfld]
+    NA,                   // state 229
+    97,                   // state 230 [ldarg.0 -> ldarg.3 -> stfld]
+    NA,                   // state 231
+    NA,                   // state 232
+    NA,                   // state 233
+    NA,                   // state 234
+    34,                   // state 235 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> add -> stfld]
+    NA,                   // state 236
+    -10,                  // state 237 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> sub -> stfld]
+    NA,                   // state 238
+    DEFAULT_WEIGHT_VALUE, // state 239 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> mul -> stfld]
+    NA,                   // state 240
+    DEFAULT_WEIGHT_VALUE, // state 241 [ldarg.0 -> dup -> ldfld -> ldarg.1 -> div -> stfld]
+    NA,                   // state 242
+    NA,                   // state 243
+    DEFAULT_WEIGHT_VALUE, // state 244 [ldarg.0 -> ldfld -> ldarg.1 -> ldfld -> add]
+    DEFAULT_WEIGHT_VALUE, // state 245 [ldarg.0 -> ldfld -> ldarg.1 -> ldfld -> sub]
+    NA,                   // state 246
+    NA,                   // state 247
+    DEFAULT_WEIGHT_VALUE, // state 248 [ldarga.s -> ldfld -> ldarga.s -> ldfld -> add]
+    DEFAULT_WEIGHT_VALUE, // state 249 [ldarga.s -> ldfld -> ldarga.s -> ldfld -> sub]
+};
+
+static_assert_no_msg(NUM_SM_STATES == sizeof(g_StateWeights) / sizeof(g_StateWeights[0]));
+
+const short* gp_StateWeights = g_StateWeights;
diff --git a/src/jit/ssabuilder.cpp b/src/jit/ssabuilder.cpp
new file mode 100644
index 0000000000..2da6902464
--- /dev/null
+++ b/src/jit/ssabuilder.cpp
@@ -0,0 +1,1903 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// ==++==
+//
+
+//
+
+//
+// ==--==
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                                  SSA                                      XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#include "ssaconfig.h"
+#include "ssarenamestate.h"
+#include "ssabuilder.h"
+
+namespace
+{
+/**
+ * Visits basic blocks in the depth first order and arranges them in the order of
+ * their DFS finish time.
+ *
+ * @param block The fgFirstBB or entry block.
+ * @param comp A pointer to compiler.
+ * @param visited In pointer initialized to false and of size at least fgMaxBBNum.
+ * @param count Out pointer for count of all nodes reachable by DFS.
+ * @param postOrder Out poitner to arrange the blocks and of size at least fgMaxBBNum.
+ */
+static void TopologicalSortHelper(BasicBlock* block, Compiler* comp, bool* visited, int* count, BasicBlock** postOrder)
+{
+    visited[block->bbNum] = true;
+
+    ArrayStack<BasicBlock*>      blocks(comp);
+    ArrayStack<AllSuccessorIter> iterators(comp);
+    ArrayStack<AllSuccessorIter> ends(comp);
+
+    // there are three stacks used here and all should be same height
+    // the first is for blocks
+    // the second is the iterator to keep track of what succ of the block we are looking at
+    // and the third is the end marker iterator
+    blocks.Push(block);
+    iterators.Push(block->GetAllSuccs(comp).begin());
+    ends.Push(block->GetAllSuccs(comp).end());
+
+    while (blocks.Height() > 0)
+    {
+        block = blocks.Top();
+
+#ifdef DEBUG
+        if (comp->verboseSsa)
+        {
+            printf("[SsaBuilder::TopologicalSortHelper] Visiting BB%02u: ", block->bbNum);
+            printf("[");
+            unsigned numSucc = block->NumSucc(comp);
+            for (unsigned i = 0; i < numSucc; ++i)
+            {
+                printf("BB%02u, ", block->GetSucc(i, comp)->bbNum);
+            }
+            EHSuccessorIter end = block->GetEHSuccs(comp).end();
+            for (EHSuccessorIter ehsi = block->GetEHSuccs(comp).begin(); ehsi != end; ++ehsi)
+            {
+                printf("[EH]BB%02u, ", (*ehsi)->bbNum);
+            }
+            printf("]\n");
+        }
+#endif
+
+        if (iterators.TopRef() != ends.TopRef())
+        {
+            // if the block on TOS still has unreached successors, visit them
+            AllSuccessorIter& iter = iterators.TopRef();
+            BasicBlock*       succ = *iter;
+            ++iter;
+            // push the child
+
+            if (!visited[succ->bbNum])
+            {
+                blocks.Push(succ);
+                iterators.Push(succ->GetAllSuccs(comp).begin());
+                ends.Push(succ->GetAllSuccs(comp).end());
+                visited[succ->bbNum] = true;
+            }
+        }
+        else
+        {
+            // all successors have been visited
+            blocks.Pop();
+            iterators.Pop();
+            ends.Pop();
+
+            postOrder[*count]     = block;
+            block->bbPostOrderNum = *count;
+            *count += 1;
+
+            DBG_SSA_JITDUMP("postOrder[%d] = [%p] and BB%02u\n", *count, dspPtr(block), block->bbNum);
+        }
+    }
+}
+
+/**
+ * Method that finds a common IDom parent, much like least common ancestor.
+ *
+ * @param finger1 A basic block that might share IDom ancestor with finger2.
+ * @param finger2 A basic block that might share IDom ancestor with finger1.
+ *
+ * @see "A simple, fast dominance algorithm" by Keith D. Cooper, Timothy J. Harvey, Ken Kennedy.
+ *
+ * @return A basic block whose IDom is the dominator for finger1 and finger2,
+ * or else NULL.  This may be called while immediate dominators are being
+ * computed, and if the input values are members of the same loop (each reachable from the other),
+ * then one may not yet have its immediate dominator computed when we are attempting
+ * to find the immediate dominator of the other.  So a NULL return value means that the
+ * the two inputs are in a cycle, not that they don't have a common dominator ancestor.
+ */
+static inline BasicBlock* IntersectDom(BasicBlock* finger1, BasicBlock* finger2)
+{
+    while (finger1 != finger2)
+    {
+        if (finger1 == nullptr || finger2 == nullptr)
+        {
+            return nullptr;
+        }
+        while (finger1 != nullptr && finger1->bbPostOrderNum < finger2->bbPostOrderNum)
+        {
+            finger1 = finger1->bbIDom;
+        }
+        if (finger1 == nullptr)
+        {
+            return nullptr;
+        }
+        while (finger2 != nullptr && finger2->bbPostOrderNum < finger1->bbPostOrderNum)
+        {
+            finger2 = finger2->bbIDom;
+        }
+    }
+    return finger1;
+}
+
+} // end of anonymous namespace.
+
+// =================================================================================
+//                                      SSA
+// =================================================================================
+
+void Compiler::fgSsaBuild()
+{
+    IAllocator* pIAllocator = new (this, CMK_SSA) CompAllocator(this, CMK_SSA);
+
+    // If this is not the first invocation, reset data structures for SSA.
+    if (fgSsaPassesCompleted > 0)
+    {
+        fgResetForSsa();
+    }
+
+    SsaBuilder builder(this, pIAllocator);
+    builder.Build();
+    fgSsaPassesCompleted++;
+#ifdef DEBUG
+    JitTestCheckSSA();
+#endif // DEBUG
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        JITDUMP("\nAfter fgSsaBuild:\n");
+        fgDispBasicBlocks(/*dumpTrees*/ true);
+    }
+#endif // DEBUG
+}
+
+void Compiler::fgResetForSsa()
+{
+    for (unsigned i = 0; i < lvaCount; ++i)
+    {
+        lvaTable[i].lvPerSsaData.Reset();
+    }
+    for (BasicBlock* blk = fgFirstBB; blk != nullptr; blk = blk->bbNext)
+    {
+        // Eliminate phis.
+        blk->bbHeapSsaPhiFunc = nullptr;
+        if (blk->bbTreeList != nullptr)
+        {
+            GenTreePtr last = blk->bbTreeList->gtPrev;
+            blk->bbTreeList = blk->FirstNonPhiDef();
+            if (blk->bbTreeList != nullptr)
+            {
+                blk->bbTreeList->gtPrev = last;
+            }
+        }
+    }
+}
+
+/**
+ *  Constructor for the SSA builder.
+ *
+ *  @param pCompiler Current compiler instance.
+ *
+ *  @remarks Initializes the class and member pointers/objects that use constructors.
+ */
+SsaBuilder::SsaBuilder(Compiler* pCompiler, IAllocator* pIAllocator)
+    : m_pCompiler(pCompiler)
+    , m_allocator(pIAllocator)
+
+#ifdef SSA_FEATURE_DOMARR
+    , m_pDomPreOrder(NULL)
+    , m_pDomPostOrder(NULL)
+#endif
+#ifdef SSA_FEATURE_USEDEF
+    , m_uses(jitstd::allocator<void>(pIAllocator))
+    , m_defs(jitstd::allocator<void>(pIAllocator))
+#endif
+{
+}
+
+/**
+ *  Topologically sort the graph and return the number of nodes visited.
+ *
+ *  @param postOrder The array in which the arranged basic blocks have to be returned.
+ *  @param count The size of the postOrder array.
+ *
+ *  @return The number of nodes visited while performing DFS on the graph.
+ */
+int SsaBuilder::TopologicalSort(BasicBlock** postOrder, int count)
+{
+    // Allocate and initialize visited flags.
+    bool* visited = (bool*)alloca(count * sizeof(bool));
+    memset(visited, 0, count * sizeof(bool));
+
+    // Display basic blocks.
+    DBEXEC(VERBOSE, m_pCompiler->fgDispBasicBlocks());
+    DBEXEC(VERBOSE, m_pCompiler->fgDispHandlerTab());
+
+    // Call the recursive helper.
+    int postIndex = 0;
+    TopologicalSortHelper(m_pCompiler->fgFirstBB, m_pCompiler, visited, &postIndex, postOrder);
+
+    // In the absence of EH (because catch/finally have no preds), this should be valid.
+    // assert(postIndex == (count - 1));
+
+    return postIndex;
+}
+
+/**
+ * Computes the immediate dominator IDom for each block iteratively.
+ *
+ * @param postOrder The array of basic blocks arranged in postOrder.
+ * @param count The size of valid elements in the postOrder array.
+ *
+ * @see "A simple, fast dominance algorithm." paper.
+ */
+void SsaBuilder::ComputeImmediateDom(BasicBlock** postOrder, int count)
+{
+    JITDUMP("[SsaBuilder::ComputeImmediateDom]\n");
+
+    // TODO-Cleanup: We currently have two dominance computations happening.  We should unify them; for
+    // now, at least forget the results of the first.
+    for (BasicBlock* blk = m_pCompiler->fgFirstBB; blk != nullptr; blk = blk->bbNext)
+    {
+        blk->bbIDom = nullptr;
+    }
+
+    // Add entry point to processed as its IDom is NULL.
+    BitVecTraits traits(m_pCompiler->fgBBNumMax + 1, m_pCompiler);
+    BitVec       BITVEC_INIT_NOCOPY(processed, BitVecOps::MakeEmpty(&traits));
+
+    BitVecOps::AddElemD(&traits, processed, m_pCompiler->fgFirstBB->bbNum);
+    assert(postOrder[count - 1] == m_pCompiler->fgFirstBB);
+
+    bool changed = true;
+    while (changed)
+    {
+        changed = false;
+
+        // In reverse post order, except for the entry block (count - 1 is entry BB).
+        for (int i = count - 2; i >= 0; --i)
+        {
+            BasicBlock* block = postOrder[i];
+
+            DBG_SSA_JITDUMP("Visiting in reverse post order: BB%02u.\n", block->bbNum);
+
+            // Find the first processed predecessor block.
+            BasicBlock* predBlock = nullptr;
+            for (flowList* pred = m_pCompiler->BlockPredsWithEH(block); pred; pred = pred->flNext)
+            {
+                if (BitVecOps::IsMember(&traits, processed, pred->flBlock->bbNum))
+                {
+                    predBlock = pred->flBlock;
+                    break;
+                }
+            }
+
+            // There could just be a single basic block, so just check if there were any preds.
+            if (predBlock != nullptr)
+            {
+                DBG_SSA_JITDUMP("Pred block is BB%02u.\n", predBlock->bbNum);
+            }
+
+            // Intersect DOM, if computed, for all predecessors.
+            BasicBlock* bbIDom = predBlock;
+            for (flowList* pred = m_pCompiler->BlockPredsWithEH(block); pred; pred = pred->flNext)
+            {
+                if (predBlock != pred->flBlock)
+                {
+                    BasicBlock* domAncestor = IntersectDom(pred->flBlock, bbIDom);
+                    // The result may be NULL if "block" and "pred->flBlock" are part of a
+                    // cycle -- neither is guaranteed ordered wrt the other in reverse postorder,
+                    // so we may be computing the IDom of "block" before the IDom of "pred->flBlock" has
+                    // been computed.  But that's OK -- if they're in a cycle, they share the same immediate
+                    // dominator, so the contribution of "pred->flBlock" is not necessary to compute
+                    // the result.
+                    if (domAncestor != nullptr)
+                    {
+                        bbIDom = domAncestor;
+                    }
+                }
+            }
+
+            // Did we change the bbIDom value?  If so, we go around the outer loop again.
+            if (block->bbIDom != bbIDom)
+            {
+                changed = true;
+
+                // IDom has changed, update it.
+                DBG_SSA_JITDUMP("bbIDom of BB%02u becomes BB%02u.\n", block->bbNum, bbIDom ? bbIDom->bbNum : 0);
+                block->bbIDom = bbIDom;
+            }
+
+            // Mark the current block as processed.
+            BitVecOps::AddElemD(&traits, processed, block->bbNum);
+
+            DBG_SSA_JITDUMP("Marking block BB%02u as processed.\n", block->bbNum);
+        }
+    }
+}
+
+#ifdef SSA_FEATURE_DOMARR
+/**
+ * Walk the DOM tree and compute pre and post-order arrangement of the tree.
+ *
+ * @param curBlock The current block being operated on at some recursive level.
+ * @param domTree The DOM tree as a map (block -> set of child blocks.)
+ * @param preIndex The initial index given to the first block visited in pre order.
+ * @param postIndex The initial index given to the first block visited in post order.
+ *
+ * @remarks This would help us answer queries such as "a dom b?" in constant time.
+ *          For example, if a dominated b, then Pre[a] < Pre[b] but Post[a] > Post[b]
+ */
+void SsaBuilder::DomTreeWalk(BasicBlock* curBlock, BlkToBlkSetMap* domTree, int* preIndex, int* postIndex)
+{
+    JITDUMP("[SsaBuilder::DomTreeWalk] block [%p], BB%02u:\n", dspPtr(curBlock), curBlock->bbNum);
+
+    // Store the order number at the block number in the pre order list.
+    m_pDomPreOrder[curBlock->bbNum] = *preIndex;
+    ++(*preIndex);
+
+    BlkSet* pBlkSet;
+    if (domTree->Lookup(curBlock, &pBlkSet))
+    {
+        for (BlkSet::KeyIterator ki = pBlkSet->Begin(); !ki.Equal(pBlkSet->End()); ++ki)
+        {
+            if (curBlock != ki.Get())
+            {
+                DomTreeWalk(ki.Get(), domTree, preIndex, postIndex);
+            }
+        }
+    }
+
+    // Store the order number at the block number in the post order list.
+    m_pDomPostOrder[curBlock->bbNum] = *postIndex;
+    ++(*postIndex);
+}
+#endif
+
+/**
+ * Using IDom of each basic block, add a mapping from block->IDom -> block.
+ * @param pCompiler Compiler instance
+ * @param block The basic block that will become the child node of it's iDom.
+ * @param domTree The output domTree which will hold the mapping "block->bbIDom" -> "block"
+ *
+ */
+/* static */
+void SsaBuilder::ConstructDomTreeForBlock(Compiler* pCompiler, BasicBlock* block, BlkToBlkSetMap* domTree)
+{
+    BasicBlock* bbIDom = block->bbIDom;
+
+    // bbIDom for (only) fgFirstBB will be NULL.
+    if (bbIDom == nullptr)
+    {
+        return;
+    }
+
+    // If the bbIDom map key doesn't exist, create one.
+    BlkSet* pBlkSet;
+    if (!domTree->Lookup(bbIDom, &pBlkSet))
+    {
+        pBlkSet = new (pCompiler->getAllocator()) BlkSet(pCompiler->getAllocator());
+        domTree->Set(bbIDom, pBlkSet);
+    }
+
+    DBG_SSA_JITDUMP("Inserting BB%02u as dom child of BB%02u.\n", block->bbNum, bbIDom->bbNum);
+    // Insert the block into the block's set.
+    pBlkSet->Set(block, true);
+}
+
+/**
+ * Using IDom of each basic block, compute the whole tree. If a block "b" has IDom "i",
+ * then, block "b" is dominated by "i". The mapping then is i -> { ..., b, ... }, in
+ * other words, "domTree" is a tree represented by nodes mapped to their children.
+ *
+ * @param pCompiler Compiler instance
+ * @param domTree The output domTree which will hold the mapping "block->bbIDom" -> "block"
+ *
+ */
+/* static */
+void SsaBuilder::ComputeDominators(Compiler* pCompiler, BlkToBlkSetMap* domTree)
+{
+    JITDUMP("*************** In SsaBuilder::ComputeDominators(Compiler*, ...)\n");
+
+    // Construct the DOM tree from bbIDom
+    for (BasicBlock* block = pCompiler->fgFirstBB; block != nullptr; block = block->bbNext)
+    {
+        ConstructDomTreeForBlock(pCompiler, block, domTree);
+    }
+
+    DBEXEC(pCompiler->verboseSsa, DisplayDominators(domTree));
+}
+
+/**
+ * Compute the DOM tree into a map(block -> set of blocks) adjacency representation.
+ *
+ * Using IDom of each basic block, compute the whole tree. If a block "b" has IDom "i",
+ * then, block "b" is dominated by "i". The mapping then is i -> { ..., b, ... }
+ *
+ * @param postOrder The array of basic blocks arranged in postOrder.
+ * @param count The size of valid elements in the postOrder array.
+ * @param domTree A map of (block -> set of blocks) tree representation that is empty.
+ *
+ */
+void SsaBuilder::ComputeDominators(BasicBlock** postOrder, int count, BlkToBlkSetMap* domTree)
+{
+    JITDUMP("*************** In SsaBuilder::ComputeDominators(BasicBlock** postOrder, int count, ...)\n");
+
+    // Construct the DOM tree from bbIDom
+    for (int i = 0; i < count; ++i)
+    {
+        ConstructDomTreeForBlock(m_pCompiler, postOrder[i], domTree);
+    }
+
+    DBEXEC(m_pCompiler->verboseSsa, DisplayDominators(domTree));
+
+#ifdef SSA_FEATURE_DOMARR
+    // Allocate space for constant time computation of (a DOM b?) query.
+    unsigned bbArrSize = m_pCompiler->fgBBNumMax + 1; // We will use 1-based bbNums as indices into these arrays, so
+                                                      // add 1.
+    m_pDomPreOrder  = jitstd::utility::allocate<int>(m_allocator, bbArrSize);
+    m_pDomPostOrder = jitstd::utility::allocate<int>(m_allocator, bbArrSize);
+
+    // Initial counters.
+    int preIndex  = 0;
+    int postIndex = 0;
+
+    // Populate the pre and post order of the tree.
+    DomTreeWalk(m_pCompiler->fgFirstBB, domTree, &preIndex, &postIndex);
+#endif
+}
+
+#ifdef DEBUG
+
+/**
+ * Display the DOM tree.
+ *
+ * @param domTree A map of (block -> set of blocks) tree representation.
+ */
+/* static */
+void SsaBuilder::DisplayDominators(BlkToBlkSetMap* domTree)
+{
+    printf("After computing dominator tree: \n");
+    for (BlkToBlkSetMap::KeyIterator nodes = domTree->Begin(); !nodes.Equal(domTree->End()); ++nodes)
+    {
+        printf("BB%02u := {", nodes.Get()->bbNum);
+
+        BlkSet* pBlkSet = nodes.GetValue();
+        for (BlkSet::KeyIterator ki = pBlkSet->Begin(); !ki.Equal(pBlkSet->End()); ++ki)
+        {
+            if (!ki.Equal(pBlkSet->Begin()))
+            {
+                printf(",");
+            }
+            printf("BB%02u", ki.Get()->bbNum);
+        }
+        printf("}\n");
+    }
+}
+
+#endif // DEBUG
+
+// (Spec comment at declaration.)
+// See "A simple, fast dominance algorithm", by Cooper, Harvey, and Kennedy.
+// First we compute the dominance frontier for each block, then we convert these to iterated
+// dominance frontiers by a closure operation.
+BlkToBlkSetMap* SsaBuilder::ComputeIteratedDominanceFrontier(BasicBlock** postOrder, int count)
+{
+    BlkToBlkSetMap* frontier = new (m_pCompiler->getAllocator()) BlkToBlkSetMap(m_pCompiler->getAllocator());
+
+    DBG_SSA_JITDUMP("Computing IDF: First computing DF.\n");
+
+    for (int i = 0; i < count; ++i)
+    {
+        BasicBlock* block = postOrder[i];
+
+        DBG_SSA_JITDUMP("Considering block BB%02u.\n", block->bbNum);
+
+        // Recall that B3 is in the dom frontier of B1 if there exists a B2
+        // such that B1 dom B2, !(B1 dom B3), and B3 is an immediate successor
+        // of B2.  (Note that B1 might be the same block as B2.)
+        // In that definition, we're considering "block" to be B3, and trying
+        // to find B1's.  To do so, first we consider the predecessors of "block",
+        // searching for candidate B2's -- "block" is obviously an immediate successor
+        // of its immediate predecessors.  If there are zero or one preds, then there
+        // is no pred, or else the single pred dominates "block", so no B2 exists.
+
+        flowList* blockPreds = m_pCompiler->BlockPredsWithEH(block);
+
+        // If block has more 0/1 predecessor, skip.
+        if (blockPreds == nullptr || blockPreds->flNext == nullptr)
+        {
+            DBG_SSA_JITDUMP("   Has %d preds; skipping.\n", blockPreds == nullptr ? 0 : 1);
+            continue;
+        }
+
+        // Otherwise, there are > 1 preds.  Each is a candidate B2 in the definition --
+        // *unless* it dominates "block"/B3.
+
+        for (flowList* pred = blockPreds; pred; pred = pred->flNext)
+        {
+            DBG_SSA_JITDUMP("   Considering predecessor BB%02u.\n", pred->flBlock->bbNum);
+
+            // If we've found a B2, then consider the possible B1's.  We start with
+            // B2, since a block dominates itself, then traverse upwards in the dominator
+            // tree, stopping when we reach the root, or the immediate dominator of "block"/B3.
+            // (Note that we are guaranteed to encounter this immediate dominator of "block"/B3:
+            // a predecessor must be dominated by B3's immediate dominator.)
+            // Along this way, make "block"/B3 part of the dom frontier of the B1.
+            // When we reach this immediate dominator, the definition no longer applies, since this
+            // potential B1 *does* dominate "block"/B3, so we stop.
+            for (BasicBlock* b1 = pred->flBlock; (b1 != nullptr) && (b1 != block->bbIDom); // !root && !loop
+                 b1             = b1->bbIDom)
+            {
+                DBG_SSA_JITDUMP("      Adding BB%02u to dom frontier of pred dom BB%02u.\n", block->bbNum, b1->bbNum);
+                BlkSet* pBlkSet;
+                if (!frontier->Lookup(b1, &pBlkSet))
+                {
+                    pBlkSet = new (m_pCompiler->getAllocator()) BlkSet(m_pCompiler->getAllocator());
+                    frontier->Set(b1, pBlkSet);
+                }
+                pBlkSet->Set(block, true);
+            }
+        }
+    }
+
+#ifdef DEBUG
+    if (m_pCompiler->verboseSsa)
+    {
+        printf("\nComputed DF:\n");
+        for (int i = 0; i < count; ++i)
+        {
+            BasicBlock* block = postOrder[i];
+            printf("Block BB%02u := {", block->bbNum);
+
+            bool    first = true;
+            BlkSet* blkDf;
+            if (frontier->Lookup(block, &blkDf))
+            {
+                for (BlkSet::KeyIterator blkDfIter = blkDf->Begin(); !blkDfIter.Equal(blkDf->End()); blkDfIter++)
+                {
+                    if (!first)
+                    {
+                        printf(",");
+                    }
+                    printf("BB%02u", blkDfIter.Get()->bbNum);
+                    first = false;
+                }
+            }
+            printf("}\n");
+        }
+    }
+#endif
+
+    // Now do the closure operation to make the dominance frontier into an IDF.
+    // There's probably a better way to do this...
+    BlkToBlkSetMap* idf = new (m_pCompiler->getAllocator()) BlkToBlkSetMap(m_pCompiler->getAllocator());
+    for (BlkToBlkSetMap::KeyIterator kiFrontBlks = frontier->Begin(); !kiFrontBlks.Equal(frontier->End());
+         kiFrontBlks++)
+    {
+        // Create IDF(b)
+        BlkSet* blkIdf = new (m_pCompiler->getAllocator()) BlkSet(m_pCompiler->getAllocator());
+        idf->Set(kiFrontBlks.Get(), blkIdf);
+
+        // Keep track of what got newly added to the IDF, so we can go after their DFs.
+        BlkSet* delta = new (m_pCompiler->getAllocator()) BlkSet(m_pCompiler->getAllocator());
+        delta->Set(kiFrontBlks.Get(), true);
+
+        // Now transitively add DF+(delta) to IDF(b), each step gathering new "delta."
+        while (delta->GetCount() > 0)
+        {
+            // Extract a block x to be worked on.
+            BlkSet::KeyIterator ki     = delta->Begin();
+            BasicBlock*         curBlk = ki.Get();
+            // TODO-Cleanup: Remove(ki) doesn't work correctly in SimplerHash.
+            delta->Remove(curBlk);
+
+            // Get DF(x).
+            BlkSet* blkDf;
+            if (frontier->Lookup(curBlk, &blkDf))
+            {
+                // Add DF(x) to IDF(b) and update "delta" i.e., new additions to IDF(b).
+                for (BlkSet::KeyIterator ki = blkDf->Begin(); !ki.Equal(blkDf->End()); ki++)
+                {
+                    if (!blkIdf->Lookup(ki.Get()))
+                    {
+                        delta->Set(ki.Get(), true);
+                        blkIdf->Set(ki.Get(), true);
+                    }
+                }
+            }
+        }
+    }
+
+#ifdef DEBUG
+    if (m_pCompiler->verboseSsa)
+    {
+        printf("\nComputed IDF:\n");
+        for (int i = 0; i < count; ++i)
+        {
+            BasicBlock* block = postOrder[i];
+            printf("Block BB%02u := {", block->bbNum);
+
+            bool    first = true;
+            BlkSet* blkIdf;
+            if (idf->Lookup(block, &blkIdf))
+            {
+                for (BlkSet::KeyIterator ki = blkIdf->Begin(); !ki.Equal(blkIdf->End()); ki++)
+                {
+                    if (!first)
+                    {
+                        printf(",");
+                    }
+                    printf("BB%02u", ki.Get()->bbNum);
+                    first = false;
+                }
+            }
+            printf("}\n");
+        }
+    }
+#endif
+
+    return idf;
+}
+
+/**
+ * Returns the phi GT_PHI node if the variable already has a phi node.
+ *
+ * @param block The block for which the existence of a phi node needs to be checked.
+ * @param lclNum The lclNum for which the occurrence of a phi node needs to be checked.
+ *
+ * @return If there is a phi node for the lclNum, returns the GT_PHI tree, else NULL.
+ */
+static GenTree* GetPhiNode(BasicBlock* block, unsigned lclNum)
+{
+    // Walk the statements for phi nodes.
+    for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+    {
+        // A prefix of the statements of the block are phi definition nodes. If we complete processing
+        // that prefix, exit.
+        if (!stmt->IsPhiDefnStmt())
+        {
+            break;
+        }
+
+        GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+
+        GenTreePtr phiLhs = tree->gtOp.gtOp1;
+        assert(phiLhs->OperGet() == GT_LCL_VAR);
+        if (phiLhs->gtLclVarCommon.gtLclNum == lclNum)
+        {
+            return tree->gtOp.gtOp2;
+        }
+    }
+    return nullptr;
+}
+
+/**
+ * Inserts phi functions at DF(b) for variables v that are live after the phi
+ * insertion point i.e., v in live-in(b).
+ *
+ * To do so, the function computes liveness, dominance frontier and inserts a phi node,
+ * if we have var v in def(b) and live-in(l) and l is in DF(b).
+ *
+ * @param postOrder The array of basic blocks arranged in postOrder.
+ * @param count The size of valid elements in the postOrder array.
+ */
+void SsaBuilder::InsertPhiFunctions(BasicBlock** postOrder, int count)
+{
+    JITDUMP("*************** In SsaBuilder::InsertPhiFunctions()\n");
+
+    // Compute liveness on the graph.
+    m_pCompiler->fgLocalVarLiveness();
+    EndPhase(PHASE_BUILD_SSA_LIVENESS);
+
+    // Compute dominance frontier.
+    BlkToBlkSetMap* frontier = ComputeIteratedDominanceFrontier(postOrder, count);
+    EndPhase(PHASE_BUILD_SSA_IDF);
+
+    JITDUMP("Inserting phi functions:\n");
+
+    for (int i = 0; i < count; ++i)
+    {
+        BasicBlock* block = postOrder[i];
+        DBG_SSA_JITDUMP("Considering dominance frontier of block BB%02u:\n", block->bbNum);
+
+        // If the block's dominance frontier is empty, go on to the next block.
+        BlkSet* blkIdf;
+        if (!frontier->Lookup(block, &blkIdf))
+        {
+            continue;
+        }
+
+        // For each local var number "lclNum" that "block" assigns to...
+        VARSET_ITER_INIT(m_pCompiler, defVars, block->bbVarDef, varIndex);
+        while (defVars.NextElem(m_pCompiler, &varIndex))
+        {
+            unsigned lclNum = m_pCompiler->lvaTrackedToVarNum[varIndex];
+            DBG_SSA_JITDUMP("  Considering local var V%02u:\n", lclNum);
+
+            if (m_pCompiler->fgExcludeFromSsa(lclNum))
+            {
+                DBG_SSA_JITDUMP("  Skipping because it is excluded.\n");
+                continue;
+            }
+
+            // For each block "bbInDomFront" that is in the dominance frontier of "block"...
+            for (BlkSet::KeyIterator iterBlk = blkIdf->Begin(); !iterBlk.Equal(blkIdf->End()); ++iterBlk)
+            {
+                BasicBlock* bbInDomFront = iterBlk.Get();
+                DBG_SSA_JITDUMP("     Considering BB%02u in dom frontier of BB%02u:\n", bbInDomFront->bbNum,
+                                block->bbNum);
+
+                // Check if variable "lclNum" is live in block "*iterBlk".
+                if (!VarSetOps::IsMember(m_pCompiler, bbInDomFront->bbLiveIn, varIndex))
+                {
+                    continue;
+                }
+
+                // Check if we've already inserted a phi node.
+                if (GetPhiNode(bbInDomFront, lclNum) == nullptr)
+                {
+                    // We have a variable i that is defined in block j and live at l, and l belongs to dom frontier of
+                    // j. So insert a phi node at l.
+                    JITDUMP("Inserting phi definition for V%02u at start of BB%02u.\n", lclNum, bbInDomFront->bbNum);
+
+                    GenTreePtr phiLhs = m_pCompiler->gtNewLclvNode(lclNum, m_pCompiler->lvaTable[lclNum].TypeGet());
+
+                    // Create 'phiRhs' as a GT_PHI node for 'lclNum', it will eventually hold a GT_LIST of GT_PHI_ARG
+                    // nodes. However we have to construct this list so for now the gtOp1 of 'phiRhs' is a nullptr.
+                    // It will get replaced with a GT_LIST of GT_PHI_ARG nodes in
+                    // SsaBuilder::AssignPhiNodeRhsVariables() and in SsaBuilder::AddDefToHandlerPhis()
+
+                    GenTreePtr phiRhs =
+                        m_pCompiler->gtNewOperNode(GT_PHI, m_pCompiler->lvaTable[lclNum].TypeGet(), nullptr);
+
+                    GenTreePtr phiAsg = m_pCompiler->gtNewAssignNode(phiLhs, phiRhs);
+
+                    GenTreePtr stmt = m_pCompiler->fgInsertStmtAtBeg(bbInDomFront, phiAsg);
+                    m_pCompiler->gtSetStmtInfo(stmt);
+                    m_pCompiler->fgSetStmtSeq(stmt);
+                }
+            }
+        }
+
+        // Now make a similar phi definition if the block defines Heap.
+        if (block->bbHeapDef)
+        {
+            // For each block "bbInDomFront" that is in the dominance frontier of "block".
+            for (BlkSet::KeyIterator iterBlk = blkIdf->Begin(); !iterBlk.Equal(blkIdf->End()); ++iterBlk)
+            {
+                BasicBlock* bbInDomFront = iterBlk.Get();
+                DBG_SSA_JITDUMP("     Considering BB%02u in dom frontier of BB%02u for Heap phis:\n",
+                                bbInDomFront->bbNum, block->bbNum);
+
+                // Check if Heap is live into block "*iterBlk".
+                if (!bbInDomFront->bbHeapLiveIn)
+                {
+                    continue;
+                }
+
+                // Check if we've already inserted a phi node.
+                if (bbInDomFront->bbHeapSsaPhiFunc == nullptr)
+                {
+                    // We have a variable i that is defined in block j and live at l, and l belongs to dom frontier of
+                    // j. So insert a phi node at l.
+                    JITDUMP("Inserting phi definition for Heap at start of BB%02u.\n", bbInDomFront->bbNum);
+                    bbInDomFront->bbHeapSsaPhiFunc = BasicBlock::EmptyHeapPhiDef;
+                }
+            }
+        }
+    }
+    EndPhase(PHASE_BUILD_SSA_INSERT_PHIS);
+}
+
+#ifdef SSA_FEATURE_USEDEF
+/**
+ * Record a use point of a variable.
+ *
+ * The use point is just the tree that is a local variable use.
+ *
+ * @param tree Tree node where an SSA variable is used.
+ *
+ * @remarks The result is in the m_uses map :: [lclNum, ssaNum] -> tree.
+ */
+void SsaBuilder::AddUsePoint(GenTree* tree)
+{
+    assert(tree->IsLocal());
+    SsaVarName          key(tree->gtLclVarCommon.gtLclNum, tree->gtLclVarCommon.gtSsaNum);
+    VarToUses::iterator iter = m_uses.find(key);
+    if (iter == m_uses.end())
+    {
+        iter = m_uses.insert(key, VarToUses::mapped_type(m_uses.get_allocator()));
+    }
+    (*iter).second.push_back(tree);
+}
+#endif // !SSA_FEATURE_USEDEF
+
+/**
+ * Record a def point of a variable.
+ *
+ * The def point is just the tree that is a local variable def.
+ *
+ * @param tree Tree node where an SSA variable is def'ed.
+ *
+ * @remarks The result is in the m_defs map :: [lclNum, ssaNum] -> tree.
+ */
+void SsaBuilder::AddDefPoint(GenTree* tree, BasicBlock* blk)
+{
+    Compiler::IndirectAssignmentAnnotation* pIndirAnnot;
+    // In the case of an "indirect assignment", where the LHS is IND of a byref to the local actually being assigned,
+    // we make the ASG tree the def point.
+    assert(tree->IsLocal() || IsIndirectAssign(tree, &pIndirAnnot));
+    unsigned lclNum;
+    unsigned defSsaNum;
+    if (tree->IsLocal())
+    {
+        lclNum    = tree->gtLclVarCommon.gtLclNum;
+        defSsaNum = m_pCompiler->GetSsaNumForLocalVarDef(tree);
+    }
+    else
+    {
+        bool b = m_pCompiler->GetIndirAssignMap()->Lookup(tree, &pIndirAnnot);
+        assert(b);
+        lclNum    = pIndirAnnot->m_lclNum;
+        defSsaNum = pIndirAnnot->m_defSsaNum;
+    }
+#ifdef DEBUG
+    // Record that there's a new SSA def.
+    m_pCompiler->lvaTable[lclNum].lvNumSsaNames++;
+#endif
+    // Record where the defn happens.
+    LclSsaVarDsc* ssaDef    = m_pCompiler->lvaTable[lclNum].GetPerSsaData(defSsaNum);
+    ssaDef->m_defLoc.m_blk  = blk;
+    ssaDef->m_defLoc.m_tree = tree;
+
+#ifdef SSA_FEATURE_USEDEF
+    SsaVarName         key(lclNum, defSsaNum);
+    VarToDef::iterator iter = m_defs.find(key);
+    if (iter == m_defs.end())
+    {
+        iter = m_defs.insert(key, tree);
+        return;
+    }
+    // There can only be a single definition for an SSA var.
+    unreached();
+#endif
+}
+
+bool SsaBuilder::IsIndirectAssign(GenTreePtr tree, Compiler::IndirectAssignmentAnnotation** ppIndirAssign)
+{
+    return tree->OperGet() == GT_ASG && m_pCompiler->m_indirAssignMap != nullptr &&
+           m_pCompiler->GetIndirAssignMap()->Lookup(tree, ppIndirAssign);
+}
+
+/**
+ * Rename the local variable tree node.
+ *
+ * If the given tree node is a local variable, then for a def give a new count, if use,
+ * then give the count in the top of stack, i.e., current count (used for last def.)
+ *
+ * @param tree Tree node where an SSA variable is used or def'ed.
+ * @param pRenameState The incremental rename information stored during renaming process.
+ *
+ * @remarks This method has to maintain parity with TreePopStacks corresponding to pushes
+ *          it makes for defs.
+ */
+void SsaBuilder::TreeRenameVariables(GenTree* tree, BasicBlock* block, SsaRenameState* pRenameState, bool isPhiDefn)
+{
+    // This is perhaps temporary -- maybe should be done elsewhere.  Label GT_INDs on LHS of assignments, so we
+    // can skip these during (at least) value numbering.
+    if (tree->OperIsAssignment())
+    {
+        GenTreePtr lhs     = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
+        GenTreePtr trueLhs = lhs->gtEffectiveVal(/*commaOnly*/ true);
+        if (trueLhs->OperIsIndir())
+        {
+            trueLhs->gtFlags |= GTF_IND_ASG_LHS;
+        }
+        else if (trueLhs->OperGet() == GT_CLS_VAR)
+        {
+            trueLhs->gtFlags |= GTF_CLS_VAR_ASG_LHS;
+        }
+    }
+
+    // Figure out if "tree" may make a new heap state (if we care for this block).
+    if (!block->bbHeapHavoc)
+    {
+        if (tree->OperIsAssignment() || tree->OperIsBlkOp())
+        {
+            if (m_pCompiler->ehBlockHasExnFlowDsc(block))
+            {
+                GenTreeLclVarCommon* lclVarNode;
+                if (!tree->DefinesLocal(m_pCompiler, &lclVarNode))
+                {
+                    // It *may* define the heap in a non-havoc way.  Make a new SSA # -- associate with this node.
+                    unsigned count = pRenameState->CountForHeapDef();
+                    pRenameState->PushHeap(block, count);
+                    m_pCompiler->GetHeapSsaMap()->Set(tree, count);
+#ifdef DEBUG
+                    if (JitTls::GetCompiler()->verboseSsa)
+                    {
+                        printf("Node ");
+                        Compiler::printTreeID(tree);
+                        printf(" (in try block) may define heap; ssa # = %d.\n", count);
+                    }
+#endif // DEBUG
+
+                    // Now add this SSA # to all phis of the reachable catch blocks.
+                    AddHeapDefToHandlerPhis(block, count);
+                }
+            }
+        }
+    }
+
+    Compiler::IndirectAssignmentAnnotation* pIndirAssign = nullptr;
+    if (!tree->IsLocal() && !IsIndirectAssign(tree, &pIndirAssign))
+    {
+        return;
+    }
+
+    if (pIndirAssign != nullptr)
+    {
+        unsigned lclNum = pIndirAssign->m_lclNum;
+        // Is this a variable we exclude from SSA?
+        if (m_pCompiler->fgExcludeFromSsa(lclNum))
+        {
+            pIndirAssign->m_defSsaNum = SsaConfig::RESERVED_SSA_NUM;
+            return;
+        }
+        // Otherwise...
+        if (!pIndirAssign->m_isEntire)
+        {
+            pIndirAssign->m_useSsaNum = pRenameState->CountForUse(lclNum);
+        }
+        unsigned count            = pRenameState->CountForDef(lclNum);
+        pIndirAssign->m_defSsaNum = count;
+        pRenameState->Push(block, lclNum, count);
+        AddDefPoint(tree, block);
+    }
+    else
+    {
+        unsigned lclNum = tree->gtLclVarCommon.gtLclNum;
+        // Is this a variable we exclude from SSA?
+        if (m_pCompiler->fgExcludeFromSsa(lclNum))
+        {
+            tree->gtLclVarCommon.SetSsaNum(SsaConfig::RESERVED_SSA_NUM);
+            return;
+        }
+
+        if (tree->gtFlags & GTF_VAR_DEF)
+        {
+            if (tree->gtFlags & GTF_VAR_USEASG)
+            {
+                // This the "x" in something like "x op= y"; it is both a use (first), then a def.
+                // The def will define a new SSA name, and record that in "x".  If we need the SSA
+                // name of the use, we record it in a map reserved for that purpose.
+                unsigned count = pRenameState->CountForUse(lclNum);
+                tree->gtLclVarCommon.SetSsaNum(count);
+#ifdef SSA_FEATURE_USEDEF
+                AddUsePoint(tree);
+#endif
+            }
+
+            // Give a count and increment.
+            unsigned count = pRenameState->CountForDef(lclNum);
+            if (tree->gtFlags & GTF_VAR_USEASG)
+            {
+                m_pCompiler->GetOpAsgnVarDefSsaNums()->Set(tree, count);
+            }
+            else
+            {
+                tree->gtLclVarCommon.SetSsaNum(count);
+            }
+            pRenameState->Push(block, lclNum, count);
+            AddDefPoint(tree, block);
+
+            // If necessary, add "lclNum/count" to the arg list of a phi def in any
+            // handlers for try blocks that "block" is within.  (But only do this for "real" definitions,
+            // not phi definitions.)
+            if (!isPhiDefn)
+            {
+                AddDefToHandlerPhis(block, lclNum, count);
+            }
+        }
+        else if (!isPhiDefn) // Phi args already have ssa numbers.
+        {
+            // This case is obviated by the short-term "early-out" above...but it's in the right direction.
+            // Is it a promoted struct local?
+            if (m_pCompiler->lvaTable[lclNum].lvPromoted)
+            {
+                assert(tree->TypeGet() == TYP_STRUCT);
+                LclVarDsc* varDsc = &m_pCompiler->lvaTable[lclNum];
+                // If has only a single field var, treat this as a use of that field var.
+                // Otherwise, we don't give SSA names to uses of promoted struct vars.
+                if (varDsc->lvFieldCnt == 1)
+                {
+                    lclNum = varDsc->lvFieldLclStart;
+                }
+                else
+                {
+                    tree->gtLclVarCommon.SetSsaNum(SsaConfig::RESERVED_SSA_NUM);
+                    return;
+                }
+            }
+            // Give the count as top of stack.
+            unsigned count = pRenameState->CountForUse(lclNum);
+            tree->gtLclVarCommon.SetSsaNum(count);
+#ifdef SSA_FEATURE_USEDEF
+            AddUsePoint(tree);
+#endif
+        }
+    }
+}
+
+void SsaBuilder::AddDefToHandlerPhis(BasicBlock* block, unsigned lclNum, unsigned count)
+{
+    assert(m_pCompiler->lvaTable[lclNum].lvTracked); // Precondition.
+    unsigned lclIndex = m_pCompiler->lvaTable[lclNum].lvVarIndex;
+
+    EHblkDsc* tryBlk = m_pCompiler->ehGetBlockExnFlowDsc(block);
+    if (tryBlk != nullptr)
+    {
+        DBG_SSA_JITDUMP(
+            "Definition of local V%02u/d:%d in block BB%02u has exn handler; adding as phi arg to handlers.\n", lclNum,
+            count, block->bbNum);
+        while (true)
+        {
+            BasicBlock* handler = tryBlk->ExFlowBlock();
+
+            // Is "lclNum" live on entry to the handler?
+            if (VarSetOps::IsMember(m_pCompiler, handler->bbLiveIn, lclIndex))
+            {
+#ifdef DEBUG
+                bool phiFound = false;
+#endif
+                // A prefix of blocks statements will be SSA definitions.  Search those for "lclNum".
+                for (GenTreePtr stmt = handler->bbTreeList; stmt; stmt = stmt->gtNext)
+                {
+                    // If the tree is not an SSA def, break out of the loop: we're done.
+                    if (!stmt->IsPhiDefnStmt())
+                    {
+                        break;
+                    }
+
+                    GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+
+                    assert(tree->IsPhiDefn());
+
+                    if (tree->gtOp.gtOp1->gtLclVar.gtLclNum == lclNum)
+                    {
+                        // It's the definition for the right local.  Add "count" to the RHS.
+                        GenTreePtr      phi  = tree->gtOp.gtOp2;
+                        GenTreeArgList* args = nullptr;
+                        if (phi->gtOp.gtOp1 != nullptr)
+                        {
+                            args = phi->gtOp.gtOp1->AsArgList();
+                        }
+#ifdef DEBUG
+                        // Make sure it isn't already present: we should only add each definition once.
+                        for (GenTreeArgList* curArgs = args; curArgs != nullptr; curArgs = curArgs->Rest())
+                        {
+                            GenTreePhiArg* phiArg = curArgs->Current()->AsPhiArg();
+                            assert(phiArg->gtSsaNum != count);
+                        }
+#endif
+                        var_types      typ = m_pCompiler->lvaTable[lclNum].TypeGet();
+                        GenTreePhiArg* newPhiArg =
+                            new (m_pCompiler, GT_PHI_ARG) GenTreePhiArg(typ, lclNum, count, block);
+
+                        phi->gtOp.gtOp1 = new (m_pCompiler, GT_LIST) GenTreeArgList(newPhiArg, args);
+                        m_pCompiler->gtSetStmtInfo(stmt);
+                        m_pCompiler->fgSetStmtSeq(stmt);
+#ifdef DEBUG
+                        phiFound = true;
+#endif
+                        DBG_SSA_JITDUMP("   Added phi arg u:%d for V%02u to phi defn in handler block BB%02u.\n", count,
+                                        lclNum, handler->bbNum);
+                        break;
+                    }
+                }
+                assert(phiFound);
+            }
+
+            unsigned nextTryIndex = tryBlk->ebdEnclosingTryIndex;
+            if (nextTryIndex == EHblkDsc::NO_ENCLOSING_INDEX)
+            {
+                break;
+            }
+
+            tryBlk = m_pCompiler->ehGetDsc(nextTryIndex);
+        }
+    }
+}
+
+void SsaBuilder::AddHeapDefToHandlerPhis(BasicBlock* block, unsigned count)
+{
+    if (m_pCompiler->ehBlockHasExnFlowDsc(block))
+    {
+        // Don't do anything for a compiler-inserted BBJ_ALWAYS that is a "leave helper".
+        if (block->bbJumpKind == BBJ_ALWAYS && (block->bbFlags & BBF_INTERNAL) && (block->bbPrev->isBBCallAlwaysPair()))
+        {
+            return;
+        }
+
+        // Otherwise...
+        DBG_SSA_JITDUMP("Definition of Heap/d:%d in block BB%02u has exn handler; adding as phi arg to handlers.\n",
+                        count, block->bbNum);
+        EHblkDsc* tryBlk = m_pCompiler->ehGetBlockExnFlowDsc(block);
+        while (true)
+        {
+            BasicBlock* handler = tryBlk->ExFlowBlock();
+
+            // Is Heap live on entry to the handler?
+            if (handler->bbHeapLiveIn)
+            {
+                assert(handler->bbHeapSsaPhiFunc != nullptr);
+
+                // Add "count" to the phi args of Heap.
+                if (handler->bbHeapSsaPhiFunc == BasicBlock::EmptyHeapPhiDef)
+                {
+                    handler->bbHeapSsaPhiFunc = new (m_pCompiler) BasicBlock::HeapPhiArg(count);
+                }
+                else
+                {
+#ifdef DEBUG
+                    BasicBlock::HeapPhiArg* curArg = handler->bbHeapSsaPhiFunc;
+                    while (curArg != nullptr)
+                    {
+                        assert(curArg->GetSsaNum() != count);
+                        curArg = curArg->m_nextArg;
+                    }
+#endif // DEBUG
+                    handler->bbHeapSsaPhiFunc =
+                        new (m_pCompiler) BasicBlock::HeapPhiArg(count, handler->bbHeapSsaPhiFunc);
+                }
+
+                DBG_SSA_JITDUMP("   Added phi arg u:%d for Heap to phi defn in handler block BB%02u.\n", count,
+                                handler->bbNum);
+            }
+            unsigned tryInd = tryBlk->ebdEnclosingTryIndex;
+            if (tryInd == EHblkDsc::NO_ENCLOSING_INDEX)
+            {
+                break;
+            }
+            tryBlk = m_pCompiler->ehGetDsc(tryInd);
+        }
+    }
+}
+
+/**
+ * Walk the block's tree in the evaluation order and give var definitions and uses their
+ * SSA names.
+ *
+ * @param block Block for which SSA variables have to be renamed.
+ * @param pRenameState The incremental rename information stored during renaming process.
+ *
+ */
+void SsaBuilder::BlockRenameVariables(BasicBlock* block, SsaRenameState* pRenameState)
+{
+    // Walk the statements of the block and rename the tree variables.
+
+    // First handle the incoming Heap state.
+
+    // Is there an Phi definition for heap at the start of this block?
+    if (block->bbHeapSsaPhiFunc != nullptr)
+    {
+        unsigned count = pRenameState->CountForHeapDef();
+        pRenameState->PushHeap(block, count);
+
+        DBG_SSA_JITDUMP("Ssa # for Heap phi on entry to BB%02u is %d.\n", block->bbNum, count);
+    }
+
+    // Record the "in" Ssa # for Heap.
+    block->bbHeapSsaNumIn = pRenameState->CountForHeapUse();
+
+    // We need to iterate over phi definitions, to give them SSA names, but we need
+    // to know which are which, so we don't add phi definitions to handler phi arg lists.
+    // Statements are phi defns until they aren't.
+    bool       isPhiDefn   = true;
+    GenTreePtr firstNonPhi = block->FirstNonPhiDef();
+    for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
+    {
+        if (stmt == firstNonPhi)
+        {
+            isPhiDefn = false;
+        }
+
+        for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+        {
+            TreeRenameVariables(tree, block, pRenameState, isPhiDefn);
+        }
+    }
+
+    // Now handle the final heap state.
+
+    // If the block defines Heap, allocate an SSA variable for the final heap state in the block.
+    // (This may be redundant with the last SSA var explicitly created, but there's no harm in that.)
+    if (block->bbHeapDef)
+    {
+        unsigned count = pRenameState->CountForHeapDef();
+        pRenameState->PushHeap(block, count);
+        AddHeapDefToHandlerPhis(block, count);
+    }
+
+    // Record the "out" Ssa" # for Heap.
+    block->bbHeapSsaNumOut = pRenameState->CountForHeapUse();
+
+    DBG_SSA_JITDUMP("Ssa # for Heap on entry to BB%02u is %d; on exit is %d.\n", block->bbNum, block->bbHeapSsaNumIn,
+                    block->bbHeapSsaNumOut);
+}
+
+/**
+ * Walk through the phi nodes of a given block and assign rhs variables to them.
+ *
+ * Also renumber the rhs variables from top of the stack.
+ *
+ * @param block Block for which phi nodes have to be assigned their rhs arguments.
+ * @param pRenameState The incremental rename information stored during renaming process.
+ *
+ */
+void SsaBuilder::AssignPhiNodeRhsVariables(BasicBlock* block, SsaRenameState* pRenameState)
+{
+    BasicBlock::AllSuccs allSuccs    = block->GetAllSuccs(m_pCompiler);
+    AllSuccessorIter     allSuccsEnd = allSuccs.end();
+    for (AllSuccessorIter allSuccsIter = allSuccs.begin(); allSuccsIter != allSuccsEnd; ++allSuccsIter)
+    {
+        BasicBlock* succ = (*allSuccsIter);
+        // Walk the statements for phi nodes.
+        for (GenTreePtr stmt = succ->bbTreeList; stmt != nullptr && stmt->IsPhiDefnStmt(); stmt = stmt->gtNext)
+        {
+            GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+            assert(tree->IsPhiDefn());
+
+            // Get the phi node from GT_ASG.
+            GenTreePtr phiNode = tree->gtOp.gtOp2;
+            assert(phiNode->gtOp.gtOp1 == nullptr || phiNode->gtOp.gtOp1->OperGet() == GT_LIST);
+
+            unsigned lclNum = tree->gtOp.gtOp1->gtLclVar.gtLclNum;
+            unsigned ssaNum = pRenameState->CountForUse(lclNum);
+            // Search the arglist for an existing definition for ssaNum.
+            // (Can we assert that its the head of the list?  This should only happen when we add
+            // during renaming for a definition that occurs within a try, and then that's the last
+            // value of the var within that basic block.)
+            GenTreeArgList* argList = (phiNode->gtOp.gtOp1 == nullptr ? nullptr : phiNode->gtOp.gtOp1->AsArgList());
+            bool            found   = false;
+            while (argList != nullptr)
+            {
+                if (argList->Current()->AsLclVarCommon()->GetSsaNum() == ssaNum)
+                {
+                    found = true;
+                    break;
+                }
+                argList = argList->Rest();
+            }
+            if (!found)
+            {
+                GenTreePtr newPhiArg =
+                    new (m_pCompiler, GT_PHI_ARG) GenTreePhiArg(tree->gtOp.gtOp1->TypeGet(), lclNum, ssaNum, block);
+                argList             = (phiNode->gtOp.gtOp1 == nullptr ? nullptr : phiNode->gtOp.gtOp1->AsArgList());
+                phiNode->gtOp.gtOp1 = new (m_pCompiler, GT_LIST) GenTreeArgList(newPhiArg, argList);
+                DBG_SSA_JITDUMP("  Added phi arg u:%d for V%02u from BB%02u in BB%02u.\n", ssaNum, lclNum, block->bbNum,
+                                succ->bbNum);
+            }
+
+            m_pCompiler->gtSetStmtInfo(stmt);
+            m_pCompiler->fgSetStmtSeq(stmt);
+        }
+
+        // Now handle Heap.
+        if (succ->bbHeapSsaPhiFunc != nullptr)
+        {
+            if (succ->bbHeapSsaPhiFunc == BasicBlock::EmptyHeapPhiDef)
+            {
+                succ->bbHeapSsaPhiFunc = new (m_pCompiler) BasicBlock::HeapPhiArg(block);
+            }
+            else
+            {
+                BasicBlock::HeapPhiArg* curArg = succ->bbHeapSsaPhiFunc;
+                bool                    found  = false;
+                // This is a quadratic algorithm.  We might need to consider some switch over to a hash table
+                // representation for the arguments of a phi node, to make this linear.
+                while (curArg != nullptr)
+                {
+                    if (curArg->m_predBB == block)
+                    {
+                        found = true;
+                        break;
+                    }
+                    curArg = curArg->m_nextArg;
+                }
+                if (!found)
+                {
+                    succ->bbHeapSsaPhiFunc = new (m_pCompiler) BasicBlock::HeapPhiArg(block, succ->bbHeapSsaPhiFunc);
+                }
+            }
+            DBG_SSA_JITDUMP("  Added phi arg for Heap from BB%02u in BB%02u.\n", block->bbNum, succ->bbNum);
+        }
+
+        // If "succ" is the first block of a try block (and "block" is not also in that try block)
+        // then we must look at the vars that have phi defs in the corresponding handler;
+        // the current SSA name for such vars must be included as an argument to that phi.
+        if (m_pCompiler->bbIsTryBeg(succ))
+        {
+            assert(succ->hasTryIndex());
+            unsigned tryInd = succ->getTryIndex();
+
+            while (tryInd != EHblkDsc::NO_ENCLOSING_INDEX)
+            {
+                // Check if the predecessor "block" is within the same try block.
+                if (block->hasTryIndex())
+                {
+                    for (unsigned blockTryInd = block->getTryIndex(); blockTryInd != EHblkDsc::NO_ENCLOSING_INDEX;
+                         blockTryInd          = m_pCompiler->ehGetEnclosingTryIndex(blockTryInd))
+                    {
+                        if (blockTryInd == tryInd)
+                        {
+                            // It is; don't execute the loop below.
+                            tryInd = EHblkDsc::NO_ENCLOSING_INDEX;
+                            break;
+                        }
+                    }
+
+                    // The loop just above found that the predecessor "block" is within the same
+                    // try block as "succ."  So we don't need to process this try, or any
+                    // further outer try blocks here, since they would also contain both "succ"
+                    // and "block".
+                    if (tryInd == EHblkDsc::NO_ENCLOSING_INDEX)
+                    {
+                        break;
+                    }
+                }
+
+                EHblkDsc* succTry = m_pCompiler->ehGetDsc(tryInd);
+                // This is necessarily true on the first iteration, but not
+                // necessarily on the second and subsequent.
+                if (succTry->ebdTryBeg != succ)
+                {
+                    break;
+                }
+
+                // succ is the first block of this try.  Look at phi defs in the handler.
+                // For a filter, we consider the filter to be the "real" handler.
+                BasicBlock* handlerStart = succTry->ExFlowBlock();
+
+                for (GenTreePtr stmt = handlerStart->bbTreeList; stmt; stmt = stmt->gtNext)
+                {
+                    GenTreePtr tree = stmt->gtStmt.gtStmtExpr;
+
+                    // Check if the first n of the statements are phi nodes. If not, exit.
+                    if (tree->OperGet() != GT_ASG || tree->gtOp.gtOp2 == nullptr ||
+                        tree->gtOp.gtOp2->OperGet() != GT_PHI)
+                    {
+                        break;
+                    }
+
+                    // Get the phi node from GT_ASG.
+                    GenTreePtr lclVar = tree->gtOp.gtOp1;
+                    unsigned   lclNum = lclVar->gtLclVar.gtLclNum;
+
+                    // If the variable is live-out of "blk", and is therefore live on entry to the try-block-start
+                    // "succ", then we make sure the current SSA name for the
+                    // var is one of the args of the phi node.  If not, go on.
+                    LclVarDsc* lclVarDsc = &m_pCompiler->lvaTable[lclNum];
+                    if (!lclVarDsc->lvTracked ||
+                        !VarSetOps::IsMember(m_pCompiler, block->bbLiveOut, lclVarDsc->lvVarIndex))
+                    {
+                        continue;
+                    }
+
+                    GenTreePtr phiNode = tree->gtOp.gtOp2;
+                    assert(phiNode->gtOp.gtOp1 == nullptr || phiNode->gtOp.gtOp1->OperGet() == GT_LIST);
+                    GenTreeArgList* argList = reinterpret_cast<GenTreeArgList*>(phiNode->gtOp.gtOp1);
+
+                    // What is the current SSAName from the predecessor for this local?
+                    unsigned ssaNum = pRenameState->CountForUse(lclNum);
+
+                    // See if this ssaNum is already an arg to the phi.
+                    bool alreadyArg = false;
+                    for (GenTreeArgList* curArgs = argList; curArgs != nullptr; curArgs = curArgs->Rest())
+                    {
+                        if (curArgs->Current()->gtPhiArg.gtSsaNum == ssaNum)
+                        {
+                            alreadyArg = true;
+                            break;
+                        }
+                    }
+                    if (!alreadyArg)
+                    {
+                        // Add the new argument.
+                        GenTreePtr newPhiArg =
+                            new (m_pCompiler, GT_PHI_ARG) GenTreePhiArg(lclVar->TypeGet(), lclNum, ssaNum, block);
+                        phiNode->gtOp.gtOp1 = new (m_pCompiler, GT_LIST) GenTreeArgList(newPhiArg, argList);
+
+                        DBG_SSA_JITDUMP("  Added phi arg u:%d for V%02u from BB%02u in BB%02u.\n", ssaNum, lclNum,
+                                        block->bbNum, handlerStart->bbNum);
+
+                        m_pCompiler->gtSetStmtInfo(stmt);
+                        m_pCompiler->fgSetStmtSeq(stmt);
+                    }
+                }
+
+                // Now handle Heap.
+                if (handlerStart->bbHeapSsaPhiFunc != nullptr)
+                {
+                    if (handlerStart->bbHeapSsaPhiFunc == BasicBlock::EmptyHeapPhiDef)
+                    {
+                        handlerStart->bbHeapSsaPhiFunc = new (m_pCompiler) BasicBlock::HeapPhiArg(block);
+                    }
+                    else
+                    {
+#ifdef DEBUG
+                        BasicBlock::HeapPhiArg* curArg = handlerStart->bbHeapSsaPhiFunc;
+                        while (curArg != nullptr)
+                        {
+                            assert(curArg->m_predBB != block);
+                            curArg = curArg->m_nextArg;
+                        }
+#endif // DEBUG
+                        handlerStart->bbHeapSsaPhiFunc =
+                            new (m_pCompiler) BasicBlock::HeapPhiArg(block, handlerStart->bbHeapSsaPhiFunc);
+                    }
+                    DBG_SSA_JITDUMP("  Added phi arg for Heap from BB%02u in BB%02u.\n", block->bbNum,
+                                    handlerStart->bbNum);
+                }
+
+                tryInd = succTry->ebdEnclosingTryIndex;
+            }
+        }
+    }
+}
+
+/**
+ * Walk the block's tree in the evaluation order and reclaim rename stack for var definitions.
+ *
+ * @param block Block for which SSA variables have to be renamed.
+ * @param pRenameState The incremental rename information stored during renaming process.
+ *
+ */
+void SsaBuilder::BlockPopStacks(BasicBlock* block, SsaRenameState* pRenameState)
+{
+    // Pop the names given to the non-phi nodes.
+    pRenameState->PopBlockStacks(block);
+
+    // And for Heap.
+    pRenameState->PopBlockHeapStack(block);
+}
+
+/**
+ * Perform variable renaming.
+ *
+ * Walks the blocks and renames all var defs with ssa numbers and all uses with the
+ * current count that is in the top of the stack. Assigns phi node rhs variables
+ * (i.e., the arguments to the phi.) Then, calls the function recursively on child
+ * nodes in the DOM tree to continue the renaming process.
+ *
+ * @param block Block for which SSA variables have to be renamed.
+ * @param pRenameState The incremental rename information stored during renaming process.
+ *
+ * @remarks At the end of the method, m_uses and m_defs should be populated linking the
+ *          uses and defs.
+ *
+ * @see Briggs, Cooper, Harvey and Simpson "Practical Improvements to the Construction
+ *      and Destruction of Static Single Assignment Form."
+ */
+
+void SsaBuilder::RenameVariables(BlkToBlkSetMap* domTree, SsaRenameState* pRenameState)
+{
+    JITDUMP("*************** In SsaBuilder::RenameVariables()\n");
+
+    // The first thing we do is treat parameters and must-init variables as if they have a
+    // virtual definition before entry -- they start out at SSA name 1.
+    for (unsigned i = 0; i < m_pCompiler->lvaCount; i++)
+    {
+        LclVarDsc* varDsc = &m_pCompiler->lvaTable[i];
+
+#ifdef DEBUG
+        varDsc->lvNumSsaNames = SsaConfig::UNINIT_SSA_NUM; // Start off fresh...
+#endif
+
+        if (varDsc->lvIsParam || m_pCompiler->info.compInitMem || varDsc->lvMustInit ||
+            (varDsc->lvTracked &&
+             VarSetOps::IsMember(m_pCompiler, m_pCompiler->fgFirstBB->bbLiveIn, varDsc->lvVarIndex)))
+        {
+            unsigned count = pRenameState->CountForDef(i);
+
+            // In ValueNum we'd assume un-inited variables get FIRST_SSA_NUM.
+            assert(count == SsaConfig::FIRST_SSA_NUM);
+#ifdef DEBUG
+            varDsc->lvNumSsaNames++;
+#endif
+            pRenameState->Push(nullptr, i, count);
+        }
+    }
+    // In ValueNum we'd assume un-inited heap gets FIRST_SSA_NUM.
+    // The heap is a parameter.  Use FIRST_SSA_NUM as first SSA name.
+    unsigned initHeapCount = pRenameState->CountForHeapDef();
+    assert(initHeapCount == SsaConfig::FIRST_SSA_NUM);
+    pRenameState->PushHeap(m_pCompiler->fgFirstBB, initHeapCount);
+
+    // Initialize the heap ssa numbers for unreachable blocks. ValueNum expects
+    // heap ssa numbers to have some intitial value.
+    for (BasicBlock* block = m_pCompiler->fgFirstBB; block; block = block->bbNext)
+    {
+        if (block->bbIDom == nullptr)
+        {
+            block->bbHeapSsaNumIn  = initHeapCount;
+            block->bbHeapSsaNumOut = initHeapCount;
+        }
+    }
+
+    struct BlockWork
+    {
+        BasicBlock* m_blk;
+        bool        m_processed; // Whether the this block have already been processed: its var renamed, and children
+                                 // processed.
+                                 // If so, awaiting only BlockPopStacks.
+        BlockWork(BasicBlock* blk, bool processed = false) : m_blk(blk), m_processed(processed)
+        {
+        }
+    };
+    typedef jitstd::vector<BlockWork> BlockWorkStack;
+    BlockWorkStack*                   blocksToDo =
+        new (jitstd::utility::allocate<BlockWorkStack>(m_allocator), jitstd::placement_t()) BlockWorkStack(m_allocator);
+
+    blocksToDo->push_back(BlockWork(m_pCompiler->fgFirstBB)); // Probably have to include other roots of dom tree.
+
+    while (blocksToDo->size() != 0)
+    {
+        BlockWork blockWrk = blocksToDo->back();
+        blocksToDo->pop_back();
+        BasicBlock* block = blockWrk.m_blk;
+
+        DBG_SSA_JITDUMP("[SsaBuilder::RenameVariables](BB%02u, processed = %d)\n", block->bbNum, blockWrk.m_processed);
+
+        if (!blockWrk.m_processed)
+        {
+            // Push the block back on the stack with "m_processed" true, to record the fact that when its children have
+            // been (recursively) processed, we still need to call BlockPopStacks on it.
+            blocksToDo->push_back(BlockWork(block, true));
+
+            // Walk the block give counts to DEFs and give top of stack count for USEs.
+            BlockRenameVariables(block, pRenameState);
+
+            // Assign arguments to the phi node of successors, corresponding to the block's index.
+            AssignPhiNodeRhsVariables(block, pRenameState);
+
+            // Recurse with the block's DOM children.
+            BlkSet* pBlkSet;
+            if (domTree->Lookup(block, &pBlkSet))
+            {
+                for (BlkSet::KeyIterator child = pBlkSet->Begin(); !child.Equal(pBlkSet->End()); ++child)
+                {
+                    DBG_SSA_JITDUMP("[SsaBuilder::RenameVariables](pushing dom child BB%02u)\n", child.Get()->bbNum);
+                    blocksToDo->push_back(BlockWork(child.Get()));
+                }
+            }
+        }
+        else
+        {
+            // Done, pop all the stack count, if there is one for this block.
+            BlockPopStacks(block, pRenameState);
+            DBG_SSA_JITDUMP("[SsaBuilder::RenameVariables] done with BB%02u\n", block->bbNum);
+        }
+    }
+
+    // Remember the number of Heap SSA names.
+    m_pCompiler->lvHeapNumSsaNames = pRenameState->HeapCount();
+}
+
+#ifdef DEBUG
+/**
+ * Print the blocks, the phi nodes get printed as well.
+ * @example:
+ * After SSA BB02:
+ *                [0027CC0C] -----------                 stmtExpr  void  (IL 0x019...0x01B)
+ * N001 (  1,  1)       [0027CB70] -----------                 const     int    23
+ * N003 (  3,  3)    [0027CBD8] -A------R--                 =         int
+ * N002 (  1,  1)       [0027CBA4] D------N---                 lclVar    int    V01 arg1         d:5
+ *
+ * After SSA BB04:
+ *                [0027D530] -----------                 stmtExpr  void  (IL   ???...  ???)
+ * N002 (  0,  0)       [0027D4C8] -----------                 phi       int
+ *                            [0027D8CC] -----------                 lclVar    int    V01 arg1         u:5
+ *                            [0027D844] -----------                 lclVar    int    V01 arg1         u:4
+ * N004 (  2,  2)    [0027D4FC] -A------R--                 =         int
+ * N003 (  1,  1)       [0027D460] D------N---                 lclVar    int    V01 arg1         d:3
+ */
+void SsaBuilder::Print(BasicBlock** postOrder, int count)
+{
+    for (int i = count - 1; i >= 0; --i)
+    {
+        printf("After SSA BB%02u:\n", postOrder[i]->bbNum);
+        m_pCompiler->gtDispTreeList(postOrder[i]->bbTreeList);
+    }
+}
+#endif // DEBUG
+
+/**
+ * Build SSA form.
+ *
+ * Sorts the graph topologically.
+ *   - Collects them in postOrder array.
+ *
+ * Identifies each block's immediate dominator.
+ *   - Computes this in bbIDom of each BasicBlock.
+ *
+ * Computes DOM tree relation.
+ *   - Computes domTree as block -> set of blocks.
+ *   - Computes pre/post order traversal of the DOM tree.
+ *
+ * Inserts phi nodes.
+ *   - Computes dominance frontier as block -> set of blocks.
+ *   - Allocates block use/def/livein/liveout and computes it.
+ *   - Inserts phi nodes with only rhs at the beginning of the blocks.
+ *
+ * Renames variables.
+ *   - Walks blocks in evaluation order and gives uses and defs names.
+ *   - Gives empty phi nodes their rhs arguments as they become known while renaming.
+ *
+ * @return true if successful, for now, this must always be true.
+ *
+ * @see "A simple, fast dominance algorithm" by Keith D. Cooper, Timothy J. Harvey, Ken Kennedy.
+ * @see Briggs, Cooper, Harvey and Simpson "Practical Improvements to the Construction
+ *      and Destruction of Static Single Assignment Form."
+ */
+void SsaBuilder::Build()
+{
+#ifdef DEBUG
+    if (m_pCompiler->verbose)
+    {
+        printf("*************** In SsaBuilder::Build()\n");
+    }
+#endif
+
+    // Ensure that there's a first block outside a try, so that the dominator tree has a unique root.
+    SetupBBRoot();
+
+    // Just to keep block no. & index same add 1.
+    int blockCount = m_pCompiler->fgBBNumMax + 1;
+
+    JITDUMP("[SsaBuilder] Max block count is %d.\n", blockCount);
+
+    // Allocate the postOrder array for the graph.
+    BasicBlock** postOrder = (BasicBlock**)alloca(blockCount * sizeof(BasicBlock*));
+
+    // Topologically sort the graph.
+    int count = TopologicalSort(postOrder, blockCount);
+    JITDUMP("[SsaBuilder] Topologically sorted the graph.\n");
+    EndPhase(PHASE_BUILD_SSA_TOPOSORT);
+
+    // Compute IDom(b).
+    ComputeImmediateDom(postOrder, count);
+
+    // Compute the dominator tree.
+    BlkToBlkSetMap* domTree = new (m_pCompiler->getAllocator()) BlkToBlkSetMap(m_pCompiler->getAllocator());
+    ComputeDominators(postOrder, count, domTree);
+    EndPhase(PHASE_BUILD_SSA_DOMS);
+
+    // Insert phi functions.
+    InsertPhiFunctions(postOrder, count);
+
+    // Rename local variables and collect UD information for each ssa var.
+    SsaRenameState* pRenameState = new (jitstd::utility::allocate<SsaRenameState>(m_allocator), jitstd::placement_t())
+        SsaRenameState(m_allocator, m_pCompiler->lvaCount);
+    RenameVariables(domTree, pRenameState);
+    EndPhase(PHASE_BUILD_SSA_RENAME);
+
+#ifdef DEBUG
+    // At this point we are in SSA form. Print the SSA form.
+    if (m_pCompiler->verboseSsa)
+    {
+        Print(postOrder, count);
+    }
+#endif
+}
+
+void SsaBuilder::SetupBBRoot()
+{
+    // Allocate a bbroot, if necessary.
+    // We need a unique block to be the root of the dominator tree.
+    // This can be violated if the first block is in a try, or if it is the first block of
+    // a loop (which would necessarily be an infinite loop) -- i.e., it has a predecessor.
+
+    // If neither condition holds, no reason to make a new block.
+    if (!m_pCompiler->fgFirstBB->hasTryIndex() && m_pCompiler->fgFirstBB->bbPreds == nullptr)
+    {
+        return;
+    }
+
+    BasicBlock* bbRoot = m_pCompiler->bbNewBasicBlock(BBJ_NONE);
+    bbRoot->bbFlags |= BBF_INTERNAL;
+
+    // May need to fix up preds list, so remember the old first block.
+    BasicBlock* oldFirst = m_pCompiler->fgFirstBB;
+
+    // Copy the liveness information from the first basic block.
+    if (m_pCompiler->fgLocalVarLivenessDone)
+    {
+        VarSetOps::Assign(m_pCompiler, bbRoot->bbLiveIn, oldFirst->bbLiveIn);
+        VarSetOps::Assign(m_pCompiler, bbRoot->bbLiveOut, oldFirst->bbLiveIn);
+    }
+
+    // Copy the bbWeight.  (This is technically wrong, if the first block is a loop head, but
+    // it shouldn't matter...)
+    bbRoot->inheritWeight(oldFirst);
+
+    // There's an artifical incoming reference count for the first BB.  We're about to make it no longer
+    // the first BB, so decrement that.
+    assert(oldFirst->bbRefs > 0);
+    oldFirst->bbRefs--;
+
+    m_pCompiler->fgInsertBBbefore(m_pCompiler->fgFirstBB, bbRoot);
+
+    assert(m_pCompiler->fgFirstBB == bbRoot);
+    if (m_pCompiler->fgComputePredsDone)
+    {
+        m_pCompiler->fgAddRefPred(oldFirst, bbRoot);
+    }
+}
+
+#ifdef DEBUG
+// This method asserts that SSA name constraints specified are satisfied.
+void Compiler::JitTestCheckSSA()
+{
+    struct SSAName
+    {
+        unsigned m_lvNum;
+        unsigned m_ssaNum;
+
+        static unsigned GetHashCode(SSAName ssaNm)
+        {
+            return ssaNm.m_lvNum << 16 | ssaNm.m_ssaNum;
+        }
+
+        static bool Equals(SSAName ssaNm1, SSAName ssaNm2)
+        {
+            return ssaNm1.m_lvNum == ssaNm2.m_lvNum && ssaNm1.m_ssaNum == ssaNm2.m_ssaNum;
+        }
+    };
+
+    typedef SimplerHashTable<ssize_t, SmallPrimitiveKeyFuncs<ssize_t>, SSAName, JitSimplerHashBehavior>
+        LabelToSSANameMap;
+    typedef SimplerHashTable<SSAName, SSAName, ssize_t, JitSimplerHashBehavior> SSANameToLabelMap;
+
+    // If we have no test data, early out.
+    if (m_nodeTestData == nullptr)
+    {
+        return;
+    }
+
+    NodeToTestDataMap* testData = GetNodeTestData();
+
+    // First we have to know which nodes in the tree are reachable.
+    NodeToIntMap* reachable = FindReachableNodesInNodeTestData();
+
+    LabelToSSANameMap* labelToSSA = new (getAllocatorDebugOnly()) LabelToSSANameMap(getAllocatorDebugOnly());
+    SSANameToLabelMap* ssaToLabel = new (getAllocatorDebugOnly()) SSANameToLabelMap(getAllocatorDebugOnly());
+
+    if (verbose)
+    {
+        printf("\nJit Testing: SSA names.\n");
+    }
+    for (NodeToTestDataMap::KeyIterator ki = testData->Begin(); !ki.Equal(testData->End()); ++ki)
+    {
+        TestLabelAndNum tlAndN;
+        GenTreePtr      node = ki.Get();
+        bool            b    = testData->Lookup(node, &tlAndN);
+        assert(b);
+        if (tlAndN.m_tl == TL_SsaName)
+        {
+            if (node->OperGet() != GT_LCL_VAR)
+            {
+                printf("SSAName constraint put on non-lcl-var expression ");
+                printTreeID(node);
+                printf(" (of type %s).\n", varTypeName(node->TypeGet()));
+                unreached();
+            }
+            GenTreeLclVarCommon* lcl = node->AsLclVarCommon();
+
+            int dummy;
+            if (!reachable->Lookup(lcl, &dummy))
+            {
+                printf("Node ");
+                printTreeID(lcl);
+                printf(" had a test constraint declared, but has become unreachable at the time the constraint is "
+                       "tested.\n"
+                       "(This is probably as a result of some optimization -- \n"
+                       "you may need to modify the test case to defeat this opt.)\n");
+                unreached();
+            }
+
+            if (verbose)
+            {
+                printf("  Node: ");
+                printTreeID(lcl);
+                printf(", SSA name = <%d, %d> -- SSA name class %d.\n", lcl->gtLclNum, lcl->gtSsaNum, tlAndN.m_num);
+            }
+            SSAName ssaNm;
+            if (labelToSSA->Lookup(tlAndN.m_num, &ssaNm))
+            {
+                if (verbose)
+                {
+                    printf("      Already in hash tables.\n");
+                }
+                // The mapping(s) must be one-to-one: if the label has a mapping, then the ssaNm must, as well.
+                ssize_t num2;
+                bool    b = ssaToLabel->Lookup(ssaNm, &num2);
+                // And the mappings must be the same.
+                if (tlAndN.m_num != num2)
+                {
+                    printf("Node: ");
+                    printTreeID(lcl);
+                    printf(", SSA name = <%d, %d> was declared in SSA name class %d,\n", lcl->gtLclNum, lcl->gtSsaNum,
+                           tlAndN.m_num);
+                    printf(
+                        "but this SSA name <%d,%d> has already been associated with a different SSA name class: %d.\n",
+                        ssaNm.m_lvNum, ssaNm.m_ssaNum, num2);
+                    unreached();
+                }
+                // And the current node must be of the specified SSA family.
+                if (!(lcl->gtLclNum == ssaNm.m_lvNum && lcl->gtSsaNum == ssaNm.m_ssaNum))
+                {
+                    printf("Node: ");
+                    printTreeID(lcl);
+                    printf(", SSA name = <%d, %d> was declared in SSA name class %d,\n", lcl->gtLclNum, lcl->gtSsaNum,
+                           tlAndN.m_num);
+                    printf("but that name class was previously bound to a different SSA name: <%d,%d>.\n",
+                           ssaNm.m_lvNum, ssaNm.m_ssaNum);
+                    unreached();
+                }
+            }
+            else
+            {
+                ssaNm.m_lvNum  = lcl->gtLclNum;
+                ssaNm.m_ssaNum = lcl->gtSsaNum;
+                ssize_t num;
+                // The mapping(s) must be one-to-one: if the label has no mapping, then the ssaNm may not, either.
+                if (ssaToLabel->Lookup(ssaNm, &num))
+                {
+                    printf("Node: ");
+                    printTreeID(lcl);
+                    printf(", SSA name = <%d, %d> was declared in SSA name class %d,\n", lcl->gtLclNum, lcl->gtSsaNum,
+                           tlAndN.m_num);
+                    printf("but this SSA name has already been associated with a different name class: %d.\n", num);
+                    unreached();
+                }
+                // Add to both mappings.
+                labelToSSA->Set(tlAndN.m_num, ssaNm);
+                ssaToLabel->Set(ssaNm, tlAndN.m_num);
+                if (verbose)
+                {
+                    printf("      added to hash tables.\n");
+                }
+            }
+        }
+    }
+}
+#endif // DEBUG
diff --git a/src/jit/ssabuilder.h b/src/jit/ssabuilder.h
new file mode 100644
index 0000000000..2fff06573e
--- /dev/null
+++ b/src/jit/ssabuilder.h
@@ -0,0 +1,212 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// ==++==
+//
+
+//
+
+//
+// ==--==
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                                  SSA                                      XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#pragma once
+#pragma warning(disable : 4503) // 'identifier' : decorated name length exceeded, name was truncated
+
+#undef SSA_FEATURE_USEDEF
+#undef SSA_FEATURE_DOMARR
+
+#include "compiler.h"
+
+struct SsaRenameState;
+
+typedef int LclVarNum;
+
+// Pair of a local var name eg: V01 and Ssa number; eg: V01_01
+typedef jitstd::pair<LclVarNum, int> SsaVarName;
+
+class SsaBuilder
+{
+private:
+    struct SsaVarNameHasher
+    {
+        /**
+         * Hash functor used in maps to hash a given key.
+         *
+         * @params key SsaVarName which is a pair of lclNum and ssaNum which defines a variable.
+         * @return Hash value corresponding to a key.
+         */
+        size_t operator()(const SsaVarName& key) const
+        {
+            return jitstd::hash<__int64>()((((__int64)key.first) << sizeof(int)) | key.second);
+        }
+    };
+
+    // Used to maintain a map of a given SSA numbering to its use or def.
+    typedef jitstd::unordered_map<SsaVarName, jitstd::vector<GenTree*>, SsaVarNameHasher> VarToUses;
+    typedef jitstd::unordered_map<SsaVarName, GenTree*, SsaVarNameHasher>                 VarToDef;
+
+    inline void EndPhase(Phases phase)
+    {
+        m_pCompiler->EndPhase(phase);
+    }
+
+public:
+    // Constructor
+    SsaBuilder(Compiler* pCompiler, IAllocator* pIAllocator);
+
+    // Requires stmt nodes to be already sequenced in evaluation order. Analyzes the graph
+    // for introduction of phi-nodes as GT_PHI tree nodes at the beginning of each block.
+    // Each GT_LCL_VAR is given its ssa number through its gtSsaNum field in the node.
+    // Each GT_PHI node will have gtOp1 set to lhs of the phi node and the gtOp2 to be a
+    // GT_LIST of GT_PHI_ARG. Each use or def is denoted by the corresponding GT_LCL_VAR
+    // tree. For example, to get all uses of a particular variable fully defined by its
+    // lclNum and ssaNum, one would use m_uses and look up all the uses. Similarly, a single
+    // def of an SSA variable can be looked up similarly using m_defs member.
+    void Build();
+
+    // Requires "bbIDom" of each block to be computed. Requires "domTree" to be allocated
+    // and can be updated, i.e., by adding mapping from a block to it's dominated children.
+    // Using IDom of each basic block, compute the whole domTree. If a block "b" has IDom "i",
+    // then, block "b" is dominated by "i". The mapping then is i -> { ..., b, ... }, in
+    // other words, "domTree" is a tree represented by nodes mapped to their children.
+    static void ComputeDominators(Compiler* pCompiler, BlkToBlkSetMap* domTree);
+
+private:
+    // Ensures that the basic block graph has a root for the dominator graph, by ensuring
+    // that there is a first block that is not in a try region (adding an empty block for that purpose
+    // if necessary).  Eventually should move to Compiler.
+    void SetupBBRoot();
+
+    // Requires "postOrder" to be an array of size "count". Requires "count" to at least
+    // be the size of the flow graph. Sorts the current compiler's flow-graph and places
+    // the blocks in post order (i.e., a node's children first) in the array. Returns the
+    // number of nodes visited while sorting the graph. In other words, valid entries in
+    // the output array.
+    int TopologicalSort(BasicBlock** postOrder, int count);
+
+    // Requires "postOrder" to hold the blocks of the flowgraph in topologically sorted
+    // order. Requires count to be the valid entries in the "postOrder" array. Computes
+    // each block's immediate dominator and records it in the BasicBlock in bbIDom.
+    void ComputeImmediateDom(BasicBlock** postOrder, int count);
+
+#ifdef SSA_FEATURE_DOMARR
+    // Requires "curBlock" to be the first basic block at the first step of the recursion.
+    // Requires "domTree" to be a adjacency list (actually, a set of blocks with a set of blocks
+    // as children.) Requires "preIndex" and "postIndex" to be initialized to 0 at entry into recursion.
+    // Computes arrays "m_pDomPreOrder" and "m_pDomPostOrder" of block indices such that the blocks of a
+    // "domTree" are in pre and postorder respectively.
+    void DomTreeWalk(BasicBlock* curBlock, const BlkToBlkSetMap& domTree, int* preIndex, int* postIndex);
+#endif
+
+    // Requires all blocks to have computed "bbIDom." Requires "domTree" to be a preallocated BlkToBlkSetMap.
+    // Helper to compute "domTree" from the pre-computed bbIDom of the basic blocks.
+    static void ConstructDomTreeForBlock(Compiler* pCompiler, BasicBlock* block, BlkToBlkSetMap* domTree);
+
+    // Requires "postOrder" to hold the blocks of the flowgraph in topologically sorted order. Requires
+    // count to be the valid entries in the "postOrder" array. Computes "domTree" as a adjacency list
+    // like object, i.e., a set of blocks with a set of blocks as children defining the DOM relation.
+    void ComputeDominators(BasicBlock** postOrder, int count, BlkToBlkSetMap* domTree);
+
+#ifdef DEBUG
+    // Display the dominator tree.
+    static void DisplayDominators(BlkToBlkSetMap* domTree);
+#endif // DEBUG
+
+    // Requires "postOrder" to hold the blocks of the flowgraph in topologically sorted order. Requires
+    // count to be the valid entries in the "postOrder" array.  Returns a mapping from blocks to their
+    // iterated dominance frontiers.  (Recall that the dominance frontier of a block B is the set of blocks
+    // B3 such that there exists some B2 s.t. B3 is a successor of B2, and B dominates B2.  Note that this dominance
+    // need not be strict -- B2 and B may be the same node.  The iterated dominance frontier is formed by a closure
+    // operation: the IDF of B is the smallest set that includes B's dominance frontier, and also includes the dominance
+    // frontier of all elements of the set.)
+    BlkToBlkSetMap* ComputeIteratedDominanceFrontier(BasicBlock** postOrder, int count);
+
+    // Requires "postOrder" to hold the blocks of the flowgraph in topologically sorted order. Requires
+    // count to be the valid entries in the "postOrder" array. Inserts GT_PHI nodes at the beginning
+    // of basic blocks that require them like so:
+    // GT_ASG(GT_LCL_VAR, GT_PHI(GT_PHI_ARG(GT_LCL_VAR, Block*), GT_LIST(GT_PHI_ARG(GT_LCL_VAR, Block*), NULL));
+    void InsertPhiFunctions(BasicBlock** postOrder, int count);
+
+    // Requires "domTree" to be the dominator tree relation defined by a DOM b.
+    // Requires "pRenameState" to have counts and stacks at their initial state.
+    // Assigns gtSsaNames to all variables.
+    void RenameVariables(BlkToBlkSetMap* domTree, SsaRenameState* pRenameState);
+
+    // Requires "block" to be any basic block participating in variable renaming, and has at least a
+    // definition that pushed a ssa number into the rename stack for a variable. Requires "pRenameState"
+    // to have variable stacks that have counts pushed into them for the block while assigning def
+    // numbers. Pops the stack for any local variable that has an entry for block on top.
+    void BlockPopStacks(BasicBlock* block, SsaRenameState* pRenameState);
+
+    // Requires "block" to be non-NULL; and is searched for defs and uses to assign ssa numbers.
+    // Requires "pRenameState" to be non-NULL and be currently used for variables renaming.
+    void BlockRenameVariables(BasicBlock* block, SsaRenameState* pRenameState);
+
+    // Requires "tree" (assumed to be a statement in "block") to be searched for defs and uses to assign ssa numbers.
+    // Requires "pRenameState" to be non-NULL and be currently used for variables renaming.  Assumes that "isPhiDefn"
+    // implies that any definition occurring within "tree" is a phi definition.
+    void TreeRenameVariables(GenTree* tree, BasicBlock* block, SsaRenameState* pRenameState, bool isPhiDefn);
+
+    // Assumes that "block" contains a definition for local var "lclNum", with SSA number "count".
+    // IF "block" is within one or more try blocks,
+    // and the local variable is live at the start of the corresponding handlers,
+    // add this SSA number "count" to the argument list of the phi for the variable in the start
+    // block of those handlers.
+    void AddDefToHandlerPhis(BasicBlock* block, unsigned lclNum, unsigned count);
+
+    // Same as above, for "Heap".
+    void AddHeapDefToHandlerPhis(BasicBlock* block, unsigned count);
+
+    // Requires "block" to be non-NULL.  Requires "pRenameState" to be non-NULL and be currently used
+    // for variables renaming. Assigns the rhs arguments to the phi, i.e., block's phi node arguments.
+    void AssignPhiNodeRhsVariables(BasicBlock* block, SsaRenameState* pRenameState);
+
+    // Requires "tree" to be a local variable node. Maintains a map of <lclNum, ssaNum> -> tree
+    // information in m_defs.
+    void AddDefPoint(GenTree* tree, BasicBlock* blk);
+#ifdef SSA_FEATURE_USEDEF
+    // Requires "tree" to be a local variable node. Maintains a map of <lclNum, ssaNum> -> tree
+    // information in m_uses.
+    void AddUsePoint(GenTree* tree);
+#endif
+
+    // Returns true, and sets "*ppIndirAssign", if "tree" has been recorded as an indirect assignment.
+    // (If the tree is an assignment, it's a definition only if it's labeled as an indirect definition, where
+    // we took the address of the local elsewhere in the extended tree.)
+    bool IsIndirectAssign(GenTreePtr tree, Compiler::IndirectAssignmentAnnotation** ppIndirAssign);
+
+#ifdef DEBUG
+    void Print(BasicBlock** postOrder, int count);
+#endif
+
+private:
+#ifdef SSA_FEATURE_USEDEF
+    // Use Def information after SSA. To query the uses and def of a given ssa var,
+    // probe these data structures.
+    // Do not move these outside of this class, use accessors/interface methods.
+    VarToUses m_uses;
+    VarToDef  m_defs;
+#endif
+
+#ifdef SSA_FEATURE_DOMARR
+    // To answer queries of type a DOM b.
+    // Do not move these outside of this class, use accessors/interface methods.
+    int* m_pDomPreOrder;
+    int* m_pDomPostOrder;
+#endif
+
+    Compiler* m_pCompiler;
+
+    // Used to allocate space for jitstd data structures.
+    jitstd::allocator<void> m_allocator;
+};
diff --git a/src/jit/ssaconfig.h b/src/jit/ssaconfig.h
new file mode 100644
index 0000000000..6e81ad9fd6
--- /dev/null
+++ b/src/jit/ssaconfig.h
@@ -0,0 +1,49 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// ==++==
+//
+
+//
+
+//
+// ==--==
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                                  SSA                                      XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#pragma once
+
+#ifdef DEBUG
+#define DBG_SSA_JITDUMP(...)                                                                                           \
+    if (JitTls::GetCompiler()->verboseSsa)                                                                             \
+    JitDump(__VA_ARGS__)
+#else
+#define DBG_SSA_JITDUMP(...)
+#endif
+
+// DBG_SSA_JITDUMP prints only if DEBUG, DEBUG_SSA, and tlsCompiler->verbose are all set.
+
+namespace SsaConfig
+{
+// FIRST ssa num is given to the first definition of a variable which can either be:
+// 1. A regular definition in the program.
+// 2. Or initialization by compInitMem.
+static const int FIRST_SSA_NUM = 2;
+
+// UNINIT ssa num is given to variables whose definitions were never encountered:
+// 1. Neither by SsaBuilder
+// 2. Nor were they initialized using compInitMem.
+static const int UNINIT_SSA_NUM = 1;
+
+// Sentinel value to indicate variable not touched by SSA.
+static const int RESERVED_SSA_NUM = 0;
+
+} // end of namespace SsaConfig
diff --git a/src/jit/ssarenamestate.cpp b/src/jit/ssarenamestate.cpp
new file mode 100644
index 0000000000..a1e05f192f
--- /dev/null
+++ b/src/jit/ssarenamestate.cpp
@@ -0,0 +1,244 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// ==++==
+//
+
+//
+
+//
+// ==--==
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                                  SSA                                      XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#include "ssaconfig.h"
+#include "ssarenamestate.h"
+
+/**
+ * Constructor - initialize the stacks and counters maps (lclVar -> stack/counter) map.
+ *
+ * @params alloc The allocator class used to allocate jitstd data.
+ */
+SsaRenameState::SsaRenameState(const jitstd::allocator<int>& alloc, unsigned lvaCount)
+    : counts(nullptr)
+    , stacks(nullptr)
+    , definedLocs(alloc)
+    , heapStack(alloc)
+    , heapCount(0)
+    , lvaCount(lvaCount)
+    , m_alloc(alloc)
+{
+}
+
+/**
+ * Allocates memory to hold SSA variable def counts,
+ * if not allocated already.
+ *
+ */
+void SsaRenameState::EnsureCounts()
+{
+    if (counts == nullptr)
+    {
+        counts = jitstd::utility::allocate<unsigned>(m_alloc, lvaCount);
+        for (unsigned i = 0; i < lvaCount; ++i)
+        {
+            counts[i] = SsaConfig::FIRST_SSA_NUM;
+        }
+    }
+}
+
+/**
+ * Allocates memory for holding pointers to lcl's stacks,
+ * if not allocated already.
+ *
+ */
+void SsaRenameState::EnsureStacks()
+{
+    if (stacks == nullptr)
+    {
+        stacks = jitstd::utility::allocate<Stack*>(m_alloc, lvaCount);
+        for (unsigned i = 0; i < lvaCount; ++i)
+        {
+            stacks[i] = nullptr;
+        }
+    }
+}
+
+/**
+ * Returns a SSA count number for a local variable and does a post increment.
+ *
+ * If there is no counter for the local yet, initializes it with the default value
+ * else, returns the count with a post increment, so the next def gets a new count.
+ *
+ * @params lclNum The local variable def for which a count has to be returned.
+ * @return the variable name for the current definition.
+ *
+ */
+unsigned SsaRenameState::CountForDef(unsigned lclNum)
+{
+    EnsureCounts();
+    unsigned count = counts[lclNum];
+    counts[lclNum]++;
+    DBG_SSA_JITDUMP("Incrementing counter = %d by 1 for V%02u.\n", count, lclNum);
+    return count;
+}
+
+/**
+ * Returns a SSA count number for a local variable from top of the stack.
+ *
+ * @params lclNum The local variable def for which a count has to be returned.
+ * @return the current variable name for the "use".
+ *
+ * @remarks If the stack is empty, then we have an use before a def. To handle this
+ *          special case, we need to initialize the count with 'default+1', so the
+ *          next definition will always use 'default+1' but return 'default' for
+ *          all uses until a definition.
+ *
+ */
+unsigned SsaRenameState::CountForUse(unsigned lclNum)
+{
+    EnsureStacks();
+    DBG_SSA_JITDUMP("[SsaRenameState::CountForUse] V%02u\n", lclNum);
+
+    Stack* stack = stacks[lclNum];
+    if (stack == nullptr || stack->empty())
+    {
+        return SsaConfig::UNINIT_SSA_NUM;
+    }
+    return stack->back().m_count;
+}
+
+/**
+ * Pushes a count value on the variable stack.
+ *
+ * @params lclNum The local variable def whose stack the count needs to be pushed onto.
+ * @params count The current count value that needs to be pushed on to the stack.
+ *
+ * @remarks Usually called when renaming a "def."
+ *          Create stack lazily when needed for the first time.
+ */
+void SsaRenameState::Push(BasicBlock* bb, unsigned lclNum, unsigned count)
+{
+    EnsureStacks();
+
+    // We'll use BB00 here to indicate the "block before any real blocks..."
+    DBG_SSA_JITDUMP("[SsaRenameState::Push] BB%02u, V%02u, count = %d\n", bb != nullptr ? bb->bbNum : 0, lclNum, count);
+
+    Stack* stack = stacks[lclNum];
+
+    if (stack == nullptr)
+    {
+        DBG_SSA_JITDUMP("\tCreating a new stack\n");
+        stack = stacks[lclNum] = new (jitstd::utility::allocate<Stack>(m_alloc), jitstd::placement_t()) Stack(m_alloc);
+    }
+
+    if (stack->empty() || stack->back().m_bb != bb)
+    {
+        stack->push_back(SsaRenameStateForBlock(bb, count));
+        // Remember that we've pushed a def for this loc (so we don't have
+        // to traverse *all* the locs to do the necessary pops later).
+        definedLocs.push_back(SsaRenameStateLocDef(bb, lclNum));
+    }
+    else
+    {
+        stack->back().m_count = count;
+    }
+
+#ifdef DEBUG
+    if (JitTls::GetCompiler()->verboseSsa)
+    {
+        printf("\tContents of the stack: [");
+        for (Stack::iterator iter2 = stack->begin(); iter2 != stack->end(); iter2++)
+        {
+            printf("<BB%02u, %d>", ((*iter2).m_bb != nullptr ? (*iter2).m_bb->bbNum : 0), (*iter2).m_count);
+        }
+        printf("]\n");
+
+        DumpStacks();
+    }
+#endif
+}
+
+void SsaRenameState::PopBlockStacks(BasicBlock* block)
+{
+    DBG_SSA_JITDUMP("[SsaRenameState::PopBlockStacks] BB%02u\n", block->bbNum);
+    // Iterate over the stacks for all the variables, popping those that have an entry
+    // for "block" on top.
+    while (!definedLocs.empty() && definedLocs.back().m_bb == block)
+    {
+        unsigned lclNum = definedLocs.back().m_lclNum;
+        assert(stacks != nullptr); // Cannot be empty because definedLocs is not empty.
+        Stack* stack = stacks[lclNum];
+        assert(stack != nullptr);
+        assert(stack->back().m_bb == block);
+        stack->pop_back();
+        definedLocs.pop_back();
+    }
+#ifdef DEBUG
+    // It should now be the case that no stack in stacks has an entry for "block" on top --
+    // the loop above popped them all.
+    for (unsigned i = 0; i < lvaCount; ++i)
+    {
+        if (stacks != nullptr && stacks[i] != nullptr && !stacks[i]->empty())
+        {
+            assert(stacks[i]->back().m_bb != block);
+        }
+    }
+    if (JitTls::GetCompiler()->verboseSsa)
+    {
+        DumpStacks();
+    }
+#endif // DEBUG
+}
+
+void SsaRenameState::PopBlockHeapStack(BasicBlock* block)
+{
+    while (heapStack.size() > 0 && heapStack.back().m_bb == block)
+    {
+        heapStack.pop_back();
+    }
+}
+
+#ifdef DEBUG
+/**
+ * Print the stack data for each variable in a loop.
+ */
+void SsaRenameState::DumpStacks()
+{
+    printf("Dumping stacks:\n-------------------------------\n");
+    if (lvaCount == 0)
+    {
+        printf("None\n");
+    }
+    else
+    {
+        EnsureStacks();
+        for (unsigned i = 0; i < lvaCount; ++i)
+        {
+            Stack* stack = stacks[i];
+            printf("V%02u:\t", i);
+            if (stack != nullptr)
+            {
+                for (Stack::iterator iter2 = stack->begin(); iter2 != stack->end(); ++iter2)
+                {
+                    if (iter2 != stack->begin())
+                    {
+                        printf(", ");
+                    }
+                    printf("<BB%02u, %2d>", ((*iter2).m_bb != nullptr ? (*iter2).m_bb->bbNum : 0), (*iter2).m_count);
+                }
+            }
+            printf("\n");
+        }
+    }
+}
+#endif // DEBUG
diff --git a/src/jit/ssarenamestate.h b/src/jit/ssarenamestate.h
new file mode 100644
index 0000000000..1db36c5b37
--- /dev/null
+++ b/src/jit/ssarenamestate.h
@@ -0,0 +1,129 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// ==++==
+//
+
+//
+
+//
+// ==--==
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                                  SSA                                      XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#pragma once
+
+#include "jitstd.h"
+
+struct SsaRenameStateForBlock
+{
+    BasicBlock* m_bb;
+    unsigned    m_count;
+
+    SsaRenameStateForBlock(BasicBlock* bb, unsigned count) : m_bb(bb), m_count(count)
+    {
+    }
+    SsaRenameStateForBlock() : m_bb(nullptr), m_count(0)
+    {
+    }
+};
+
+// A record indicating that local "m_loc" was defined in block "m_bb".
+struct SsaRenameStateLocDef
+{
+    BasicBlock* m_bb;
+    unsigned    m_lclNum;
+
+    SsaRenameStateLocDef(BasicBlock* bb, unsigned lclNum) : m_bb(bb), m_lclNum(lclNum)
+    {
+    }
+};
+
+struct SsaRenameState
+{
+    typedef jitstd::list<SsaRenameStateForBlock> Stack;
+    typedef Stack**                              Stacks;
+    typedef unsigned*                            Counts;
+    typedef jitstd::list<SsaRenameStateLocDef>   DefStack;
+
+    SsaRenameState(const jitstd::allocator<int>& allocator, unsigned lvaCount);
+
+    void EnsureCounts();
+    void EnsureStacks();
+
+    // Requires "lclNum" to be a variable number for which a new count corresponding to a
+    // definition is desired. The method post increments the counter for the "lclNum."
+    unsigned CountForDef(unsigned lclNum);
+
+    // Requires "lclNum" to be a variable number for which an ssa number at the top of the
+    // stack is required i.e., for variable "uses."
+    unsigned CountForUse(unsigned lclNum);
+
+    // Requires "lclNum" to be a variable number, and requires "count" to represent
+    // an ssa number, that needs to be pushed on to the stack corresponding to the lclNum.
+    void Push(BasicBlock* bb, unsigned lclNum, unsigned count);
+
+    // Pop all stacks that have an entry for "bb" on top.
+    void PopBlockStacks(BasicBlock* bb);
+
+    // Similar functions for the special implicit "Heap" variable.
+    unsigned CountForHeapDef()
+    {
+        if (heapCount == 0)
+        {
+            heapCount = SsaConfig::FIRST_SSA_NUM;
+        }
+        unsigned res = heapCount;
+        heapCount++;
+        return res;
+    }
+    unsigned CountForHeapUse()
+    {
+        return heapStack.back().m_count;
+    }
+
+    void PushHeap(BasicBlock* bb, unsigned count)
+    {
+        heapStack.push_back(SsaRenameStateForBlock(bb, count));
+    }
+
+    void PopBlockHeapStack(BasicBlock* bb);
+
+    unsigned HeapCount()
+    {
+        return heapCount;
+    }
+
+#ifdef DEBUG
+    // Debug interface
+    void DumpStacks();
+#endif
+
+private:
+    // Map of lclNum -> count.
+    Counts counts;
+
+    // Map of lclNum -> SsaRenameStateForBlock.
+    Stacks stacks;
+
+    // This list represents the set of locals defined in the current block.
+    DefStack definedLocs;
+
+    // Same state for the special implicit Heap variable.
+    Stack    heapStack;
+    unsigned heapCount;
+
+    // Number of stacks/counts to allocate.
+    unsigned lvaCount;
+
+    // Allocator to allocate stacks.
+    jitstd::allocator<void> m_alloc;
+};
diff --git a/src/jit/stackfp.cpp b/src/jit/stackfp.cpp
new file mode 100644
index 0000000000..f975822740
--- /dev/null
+++ b/src/jit/stackfp.cpp
@@ -0,0 +1,4494 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifdef LEGACY_BACKEND // This file is NOT used for the RyuJIT backend that uses the linear scan register allocator.
+
+#ifdef _TARGET_AMD64_
+#error AMD64 must be !LEGACY_BACKEND
+#endif
+
+#include "compiler.h"
+#include "emit.h"
+#include "codegen.h"
+
+// Instruction list
+// N=normal, R=reverse, P=pop
+#if FEATURE_STACK_FP_X87
+const static instruction FPmathNN[] = {INS_fadd, INS_fsub, INS_fmul, INS_fdiv};
+const static instruction FPmathNP[] = {INS_faddp, INS_fsubp, INS_fmulp, INS_fdivp};
+const static instruction FPmathRN[] = {INS_fadd, INS_fsubr, INS_fmul, INS_fdivr};
+const static instruction FPmathRP[] = {INS_faddp, INS_fsubrp, INS_fmulp, INS_fdivrp};
+
+FlatFPStateX87* CodeGenInterface::FlatFPAllocFPState(FlatFPStateX87* pInitFrom)
+{
+    FlatFPStateX87* pNewState;
+
+    pNewState = new (compiler, CMK_FlatFPStateX87) FlatFPStateX87;
+    pNewState->Init(pInitFrom);
+
+    return pNewState;
+}
+
+bool CodeGen::FlatFPSameRegisters(FlatFPStateX87* pState, regMaskTP mask)
+{
+    int i;
+    for (i = REG_FPV0; i < REG_FPCOUNT; i++)
+    {
+        if (pState->Mapped(i))
+        {
+            regMaskTP regmask = genRegMaskFloat((regNumber)i);
+            if ((mask & regmask) == 0)
+            {
+                return false;
+            }
+
+            mask &= ~regmask;
+        }
+    }
+
+    return mask ? false : true;
+}
+
+bool FlatFPStateX87::Mapped(unsigned uEntry)
+{
+    return m_uVirtualMap[uEntry] != (unsigned)FP_VRNOTMAPPED;
+}
+
+void FlatFPStateX87::Unmap(unsigned uEntry)
+{
+    assert(Mapped(uEntry));
+    m_uVirtualMap[uEntry] = (unsigned)FP_VRNOTMAPPED;
+}
+
+bool FlatFPStateX87::AreEqual(FlatFPStateX87* pA, FlatFPStateX87* pB)
+{
+    unsigned i;
+
+    assert(pA->IsConsistent());
+    assert(pB->IsConsistent());
+
+    if (pA->m_uStackSize != pB->m_uStackSize)
+    {
+        return false;
+    }
+
+    for (i = 0; i < pA->m_uStackSize; i++)
+    {
+        if (pA->m_uStack[i] != pB->m_uStack[i])
+        {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+#ifdef DEBUG
+bool FlatFPStateX87::IsValidEntry(unsigned uEntry)
+{
+    return (Mapped(uEntry) && (m_uVirtualMap[uEntry] >= 0 && m_uVirtualMap[uEntry] < m_uStackSize)) || !Mapped(uEntry);
+}
+
+bool FlatFPStateX87::IsConsistent()
+{
+    unsigned i;
+
+    for (i = 0; i < FP_VIRTUALREGISTERS; i++)
+    {
+        if (!IsValidEntry(i))
+        {
+            if (m_bIgnoreConsistencyChecks)
+            {
+                return true;
+            }
+            else
+            {
+                assert(!"Virtual register is marked as mapped but out of the stack range");
+                return false;
+            }
+        }
+    }
+
+    for (i = 0; i < m_uStackSize; i++)
+    {
+        if (m_uVirtualMap[m_uStack[i]] != i)
+        {
+            if (m_bIgnoreConsistencyChecks)
+            {
+                return true;
+            }
+            else
+            {
+                assert(!"Register File and stack layout don't match!");
+                return false;
+            }
+        }
+    }
+
+    return true;
+}
+
+void FlatFPStateX87::Dump()
+{
+    unsigned i;
+
+    assert(IsConsistent());
+
+    if (m_uStackSize > 0)
+    {
+        printf("Virtual stack state: ");
+        for (i = 0; i < m_uStackSize; i++)
+        {
+            printf("ST(%i): FPV%i | ", StackToST(i), m_uStack[i]);
+        }
+        printf("\n");
+    }
+}
+
+void FlatFPStateX87::UpdateMappingFromStack()
+{
+    memset(m_uVirtualMap, -1, sizeof(m_uVirtualMap));
+
+    unsigned i;
+
+    for (i = 0; i < m_uStackSize; i++)
+    {
+        m_uVirtualMap[m_uStack[i]] = i;
+    }
+}
+
+#endif
+
+unsigned FlatFPStateX87::StackToST(unsigned uEntry)
+{
+    assert(IsValidEntry(uEntry));
+    return m_uStackSize - 1 - uEntry;
+}
+
+unsigned FlatFPStateX87::VirtualToST(unsigned uEntry)
+{
+    assert(Mapped(uEntry));
+
+    return StackToST(m_uVirtualMap[uEntry]);
+}
+
+unsigned FlatFPStateX87::STToVirtual(unsigned uST)
+{
+    assert(uST < m_uStackSize);
+
+    return m_uStack[m_uStackSize - 1 - uST];
+}
+
+void FlatFPStateX87::Init(FlatFPStateX87* pFrom)
+{
+    if (pFrom)
+    {
+        memcpy(this, pFrom, sizeof(*this));
+    }
+    else
+    {
+        memset(m_uVirtualMap, -1, sizeof(m_uVirtualMap));
+
+#ifdef DEBUG
+        memset(m_uStack, -1, sizeof(m_uStack));
+#endif
+        m_uStackSize = 0;
+    }
+
+#ifdef DEBUG
+    m_bIgnoreConsistencyChecks = false;
+#endif
+}
+
+void FlatFPStateX87::Associate(unsigned uEntry, unsigned uStack)
+{
+    assert(uStack < m_uStackSize);
+
+    m_uStack[uStack]      = uEntry;
+    m_uVirtualMap[uEntry] = uStack;
+}
+
+unsigned FlatFPStateX87::TopIndex()
+{
+    return m_uStackSize - 1;
+}
+
+unsigned FlatFPStateX87::TopVirtual()
+{
+    assert(m_uStackSize > 0);
+    return m_uStack[m_uStackSize - 1];
+}
+
+void FlatFPStateX87::Rename(unsigned uVirtualTo, unsigned uVirtualFrom)
+{
+    assert(!Mapped(uVirtualTo));
+
+    unsigned uSlot = m_uVirtualMap[uVirtualFrom];
+
+    Unmap(uVirtualFrom);
+    Associate(uVirtualTo, uSlot);
+}
+
+void FlatFPStateX87::Push(unsigned uEntry)
+{
+    assert(m_uStackSize <= FP_PHYSICREGISTERS);
+    assert(!Mapped(uEntry));
+
+    m_uStackSize++;
+    Associate(uEntry, TopIndex());
+
+    assert(IsConsistent());
+}
+
+unsigned FlatFPStateX87::Pop()
+{
+    assert(m_uStackSize != 0);
+
+    unsigned uVirtual = m_uStack[--m_uStackSize];
+
+#ifdef DEBUG
+    m_uStack[m_uStackSize] = (unsigned)-1;
+#endif
+
+    Unmap(uVirtual);
+
+    return uVirtual;
+}
+
+bool FlatFPStateX87::IsEmpty()
+{
+    return m_uStackSize == 0;
+}
+
+void CodeGen::genCodeForTransitionStackFP(FlatFPStateX87* pSrc, FlatFPStateX87* pDst)
+{
+    FlatFPStateX87  fpState;
+    FlatFPStateX87* pTmp;
+    int             i;
+
+    // Make a temp copy
+    memcpy(&fpState, pSrc, sizeof(FlatFPStateX87));
+    pTmp = &fpState;
+
+    // Make sure everything seems consistent.
+    assert(pSrc->m_uStackSize >= pDst->m_uStackSize);
+#ifdef DEBUG
+    for (i = 0; i < FP_VIRTUALREGISTERS; i++)
+    {
+        if (!pTmp->Mapped(i) && pDst->Mapped(i))
+        {
+            assert(!"Dst stack state can't have a virtual register live if Src target has it dead");
+        }
+    }
+#endif
+
+    // First we need to get rid of the stuff that's dead in pDst
+    for (i = 0; i < FP_VIRTUALREGISTERS; i++)
+    {
+        if (pTmp->Mapped(i) && !pDst->Mapped(i))
+        {
+            // We have to get rid of this one
+            JITDUMP("Removing virtual register V%i from stack\n", i);
+
+            // Don't need this virtual register any more
+            FlatFPX87_Unload(pTmp, i);
+        }
+    }
+
+    assert(pTmp->m_uStackSize == pDst->m_uStackSize);
+
+    // Extract cycles
+    int iProcessed = 0;
+
+    // We start with the top of the stack so that we can
+    // easily recognize the cycle that contains it
+    for (i = pTmp->m_uStackSize - 1; i >= 0; i--)
+    {
+        // Have we processed this stack element yet?
+        if (((1 << i) & iProcessed) == 0)
+        {
+            // Extract cycle
+            int iCycle[FP_VIRTUALREGISTERS];
+            int iCycleLength = 0;
+            int iCurrent     = i;
+            int iTOS         = pTmp->m_uStackSize - 1;
+
+            do
+            {
+                // Mark current stack element as processed
+                iProcessed |= (1 << iCurrent);
+
+                // Update cycle
+                iCycle[iCycleLength++] = iCurrent;
+
+                // Next element in cycle
+                iCurrent = pDst->m_uVirtualMap[pTmp->m_uStack[iCurrent]];
+
+            } while ((iProcessed & (1 << iCurrent)) == 0);
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("Cycle: (");
+                for (int l = 0; l < iCycleLength; l++)
+                {
+                    printf("%i", pTmp->StackToST(iCycle[l]));
+                    if (l + 1 < iCycleLength)
+                        printf(", ");
+                }
+                printf(")\n");
+            }
+#endif
+
+            // Extract cycle
+            if (iCycleLength == 1)
+            {
+                // Stack element in the same place. Nothing to do
+            }
+            else
+            {
+                if (iCycle[0] == iTOS)
+                {
+                    // Cycle includes stack element 0
+                    int j;
+
+                    for (j = 1; j < iCycleLength; j++)
+                    {
+                        FlatFPX87_SwapStack(pTmp, iCycle[j], iTOS);
+                    }
+                }
+                else
+                {
+                    // Cycle doesn't include stack element 0
+                    int j;
+
+                    for (j = 0; j < iCycleLength; j++)
+                    {
+                        FlatFPX87_SwapStack(pTmp, iCycle[j], iTOS);
+                    }
+
+                    FlatFPX87_SwapStack(pTmp, iCycle[0], iTOS);
+                }
+            }
+        }
+    }
+
+    assert(FlatFPStateX87::AreEqual(pTmp, pDst));
+}
+
+void CodeGen::genCodeForTransitionFromMask(FlatFPStateX87* pSrc, regMaskTP mask, bool bEmitCode)
+{
+    unsigned i;
+    for (i = REG_FPV0; i < REG_FPCOUNT; i++)
+    {
+        if (pSrc->Mapped(i))
+        {
+            if ((mask & genRegMaskFloat((regNumber)i)) == 0)
+            {
+                FlatFPX87_Unload(pSrc, i, bEmitCode);
+            }
+        }
+        else
+        {
+            assert((mask & genRegMaskFloat((regNumber)i)) == 0 &&
+                   "A register marked as incoming live in the target block isnt live in the current block");
+        }
+    }
+}
+
+void CodeGen::genCodeForPrologStackFP()
+{
+    assert(compiler->compGeneratingProlog);
+    assert(compiler->fgFirstBB);
+
+    FlatFPStateX87* pState = compiler->fgFirstBB->bbFPStateX87;
+
+    if (pState && pState->m_uStackSize)
+    {
+        VARSET_TP VARSET_INIT_NOCOPY(liveEnregIn, VarSetOps::Intersection(compiler, compiler->fgFirstBB->bbLiveIn,
+                                                                          compiler->optAllFPregVars));
+        unsigned i;
+
+#ifdef DEBUG
+        unsigned uLoads = 0;
+#endif
+
+        assert(pState->m_uStackSize <= FP_VIRTUALREGISTERS);
+        for (i = 0; i < pState->m_uStackSize; i++)
+        {
+            // Get the virtual register that matches
+            unsigned iVirtual = pState->STToVirtual(pState->m_uStackSize - i - 1);
+
+            unsigned   varNum;
+            LclVarDsc* varDsc;
+
+            for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+            {
+                if (varDsc->IsFloatRegType() && varDsc->lvRegister && varDsc->lvRegNum == iVirtual)
+                {
+                    unsigned varIndex = varDsc->lvVarIndex;
+
+                    // Is this variable live on entry?
+                    if (VarSetOps::IsMember(compiler, liveEnregIn, varIndex))
+                    {
+                        if (varDsc->lvIsParam)
+                        {
+                            getEmitter()->emitIns_S(INS_fld, EmitSize(varDsc->TypeGet()), varNum, 0);
+                        }
+                        else
+                        {
+                            // unitialized regvar
+                            getEmitter()->emitIns(INS_fldz);
+                        }
+
+#ifdef DEBUG
+                        uLoads++;
+#endif
+                        break;
+                    }
+                }
+            }
+
+            assert(varNum != compiler->lvaCount); // We have to find the matching var!!!!
+        }
+
+        assert(uLoads == VarSetOps::Count(compiler, liveEnregIn));
+    }
+}
+
+void CodeGen::genCodeForEndBlockTransitionStackFP(BasicBlock* block)
+{
+    switch (block->bbJumpKind)
+    {
+        case BBJ_EHFINALLYRET:
+        case BBJ_EHFILTERRET:
+        case BBJ_EHCATCHRET:
+            // Nothing to do
+            assert(compCurFPState.m_uStackSize == 0);
+            break;
+        case BBJ_THROW:
+            break;
+        case BBJ_RETURN:
+            // Nothing to do
+            assert((varTypeIsFloating(compiler->info.compRetType) && compCurFPState.m_uStackSize == 1) ||
+                   compCurFPState.m_uStackSize == 0);
+            break;
+        case BBJ_COND:
+        case BBJ_NONE:
+            genCodeForBBTransitionStackFP(block->bbNext);
+            break;
+        case BBJ_ALWAYS:
+            genCodeForBBTransitionStackFP(block->bbJumpDest);
+            break;
+        case BBJ_LEAVE:
+            assert(!"BBJ_LEAVE blocks shouldn't get here");
+            break;
+        case BBJ_CALLFINALLY:
+            assert(compCurFPState.IsEmpty() && "we don't enregister variables live on entry to finallys");
+            genCodeForBBTransitionStackFP(block->bbJumpDest);
+            break;
+        case BBJ_SWITCH:
+            // Nothing to do here
+            break;
+        default:
+            noway_assert(!"Unexpected bbJumpKind");
+            break;
+    }
+}
+
+regMaskTP CodeGen::genRegMaskFromLivenessStackFP(VARSET_VALARG_TP varset)
+{
+    unsigned   varNum;
+    LclVarDsc* varDsc;
+    regMaskTP  result = 0;
+
+    for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+    {
+        if (varDsc->IsFloatRegType() && varDsc->lvRegister)
+        {
+
+            unsigned varIndex = varDsc->lvVarIndex;
+
+            /* Is this variable live on entry? */
+
+            if (VarSetOps::IsMember(compiler, varset, varIndex))
+            {
+                // We should only call this function doing a transition
+                // To a block which hasn't state yet. All incoming live enregistered variables
+                // should have been already initialized.
+                assert(varDsc->lvRegNum != REG_FPNONE);
+
+                result |= genRegMaskFloat(varDsc->lvRegNum);
+            }
+        }
+    }
+
+    return result;
+}
+
+void CodeGen::genCodeForBBTransitionStackFP(BasicBlock* pDst)
+{
+    assert(compCurFPState.IsConsistent());
+    if (pDst->bbFPStateX87)
+    {
+        // Target block has an associated state. generate transition
+        genCodeForTransitionStackFP(&compCurFPState, pDst->bbFPStateX87);
+    }
+    else
+    {
+        // Target block hasn't got an associated state. As it can only possibly
+        // have a subset of the current state, we'll take advantage of this and
+        // generate the optimal transition
+
+        // Copy current state
+        pDst->bbFPStateX87 = FlatFPAllocFPState(&compCurFPState);
+
+        regMaskTP liveRegIn =
+            genRegMaskFromLivenessStackFP(VarSetOps::Intersection(compiler, pDst->bbLiveIn, compiler->optAllFPregVars));
+
+        // Match to live vars
+        genCodeForTransitionFromMask(pDst->bbFPStateX87, liveRegIn);
+    }
+}
+
+void CodeGen::SpillTempsStackFP(regMaskTP canSpillMask)
+{
+
+    unsigned  i;
+    regMaskTP spillMask = 0;
+    regNumber reg;
+
+    // First pass we determine which registers we spill
+    for (i = 0; i < compCurFPState.m_uStackSize; i++)
+    {
+        reg               = (regNumber)compCurFPState.m_uStack[i];
+        regMaskTP regMask = genRegMaskFloat(reg);
+        if ((regMask & canSpillMask) && (regMask & regSet.rsMaskRegVarFloat) == 0)
+        {
+            spillMask |= regMask;
+        }
+    }
+
+    // Second pass we do the actual spills
+    for (i = REG_FPV0; i < REG_FPCOUNT; i++)
+    {
+        if ((genRegMaskFloat((regNumber)i) & spillMask))
+        {
+            JITDUMP("spilling temp in register %s\n", regVarNameStackFP((regNumber)i));
+            SpillFloat((regNumber)i, true);
+        }
+    }
+}
+
+// Spills all the fp stack. We need this to spill
+// across calls
+void CodeGen::SpillForCallStackFP()
+{
+    unsigned i;
+    unsigned uSize = compCurFPState.m_uStackSize;
+
+    for (i = 0; i < uSize; i++)
+    {
+        SpillFloat((regNumber)compCurFPState.m_uStack[compCurFPState.TopIndex()], true);
+    }
+}
+
+void CodeGenInterface::SpillFloat(regNumber reg, bool bIsCall)
+{
+#ifdef DEBUG
+    regMaskTP mask = genRegMaskFloat(reg);
+
+    // We can allow spilling regvars, but we don't need it at the moment, and we're
+    // missing code in setupopforflatfp, so assert.
+    assert(bIsCall || (mask & (regSet.rsMaskLockedFloat | regSet.rsMaskRegVarFloat)) == 0);
+#endif
+
+    JITDUMP("SpillFloat spilling register %s\n", regVarNameStackFP(reg));
+
+    // We take the virtual register to the top of the stack
+    FlatFPX87_MoveToTOS(&compCurFPState, reg);
+
+    // Allocate spill structure
+    RegSet::SpillDsc* spill = RegSet::SpillDsc::alloc(compiler, &regSet, TYP_FLOAT);
+
+    // Fill out spill structure
+    var_types type;
+    if (regSet.genUsedRegsFloat[reg])
+    {
+        JITDUMP("will spill tree [%08p]\n", dspPtr(regSet.genUsedRegsFloat[reg]));
+        // register used for temp stack
+        spill->spillTree             = regSet.genUsedRegsFloat[reg];
+        spill->bEnregisteredVariable = false;
+
+        regSet.genUsedRegsFloat[reg]->gtFlags |= GTF_SPILLED;
+
+        type = genActualType(regSet.genUsedRegsFloat[reg]->TypeGet());
+
+        // Clear used flag
+        regSet.SetUsedRegFloat(regSet.genUsedRegsFloat[reg], false);
+    }
+    else
+    {
+        JITDUMP("will spill varDsc [%08p]\n", dspPtr(regSet.genRegVarsFloat[reg]));
+
+        // enregistered variable
+        spill->spillVarDsc = regSet.genRegVarsFloat[reg];
+        assert(spill->spillVarDsc);
+
+        spill->bEnregisteredVariable = true;
+
+        // Mark as spilled
+        spill->spillVarDsc->lvSpilled = true;
+        type                          = genActualType(regSet.genRegVarsFloat[reg]->TypeGet());
+
+        // Clear register flag
+        SetRegVarFloat(reg, type, 0);
+    }
+
+    // Add to spill list
+    spill->spillNext    = regSet.rsSpillFloat;
+    regSet.rsSpillFloat = spill;
+
+    // Obtain space
+    TempDsc* temp = spill->spillTemp = compiler->tmpGetTemp(type);
+    emitAttr size                    = EmitSize(type);
+
+    getEmitter()->emitIns_S(INS_fstp, size, temp->tdTempNum(), 0);
+    compCurFPState.Pop();
+}
+
+void CodeGen::UnspillFloatMachineDep(RegSet::SpillDsc* spillDsc, bool useSameReg)
+{
+    NYI(!"Need not be implemented for x86.");
+}
+
+void CodeGen::UnspillFloatMachineDep(RegSet::SpillDsc* spillDsc)
+{
+    // Do actual unspill
+    if (spillDsc->bEnregisteredVariable)
+    {
+        assert(spillDsc->spillVarDsc->lvSpilled);
+
+        // Do the logic as it was a regvar birth
+        genRegVarBirthStackFP(spillDsc->spillVarDsc);
+
+        // Mark as not spilled any more
+        spillDsc->spillVarDsc->lvSpilled = false;
+
+        // Update stack layout.
+        compCurFPState.Push(spillDsc->spillVarDsc->lvRegNum);
+    }
+    else
+    {
+        assert(spillDsc->spillTree->gtFlags & GTF_SPILLED);
+
+        spillDsc->spillTree->gtFlags &= ~GTF_SPILLED;
+
+        regNumber reg = regSet.PickRegFloat();
+        genMarkTreeInReg(spillDsc->spillTree, reg);
+        regSet.SetUsedRegFloat(spillDsc->spillTree, true);
+
+        compCurFPState.Push(reg);
+    }
+
+    // load from spilled spot
+    emitAttr size = EmitSize(spillDsc->spillTemp->tdTempType());
+    getEmitter()->emitIns_S(INS_fld, size, spillDsc->spillTemp->tdTempNum(), 0);
+}
+
+// unspills any reg var that we have in the spill list. We need this
+// because we can't have any spilled vars across basic blocks
+void CodeGen::UnspillRegVarsStackFp()
+{
+    RegSet::SpillDsc* cur;
+    RegSet::SpillDsc* next;
+
+    for (cur = regSet.rsSpillFloat; cur; cur = next)
+    {
+        next = cur->spillNext;
+
+        if (cur->bEnregisteredVariable)
+        {
+            UnspillFloat(cur);
+        }
+    }
+}
+
+#ifdef DEBUG
+const char* regNamesFP[] = {
+#define REGDEF(name, rnum, mask, sname) sname,
+#include "registerfp.h"
+};
+
+// static
+const char* CodeGenInterface::regVarNameStackFP(regNumber reg)
+{
+    return regNamesFP[reg];
+}
+
+bool CodeGen::ConsistentAfterStatementStackFP()
+{
+    if (!compCurFPState.IsConsistent())
+    {
+        return false;
+    }
+
+    if (regSet.rsMaskUsedFloat != 0)
+    {
+        assert(!"FP register marked as used after statement");
+        return false;
+    }
+    if (regSet.rsMaskLockedFloat != 0)
+    {
+        assert(!"FP register marked as locked after statement");
+        return false;
+    }
+    if (genCountBits(regSet.rsMaskRegVarFloat) > compCurFPState.m_uStackSize)
+    {
+        assert(!"number of FP regvars in regSet.rsMaskRegVarFloat doesnt match current FP state");
+        return false;
+    }
+
+    return true;
+}
+
+#endif
+
+int CodeGen::genNumberTemps()
+{
+    return compCurFPState.m_uStackSize - genCountBits(regSet.rsMaskRegVarFloat);
+}
+
+void CodeGen::genDiscardStackFP(GenTreePtr tree)
+{
+    assert(tree->InReg());
+    assert(varTypeIsFloating(tree));
+
+    FlatFPX87_Unload(&compCurFPState, tree->gtRegNum, true);
+}
+
+void CodeGen::genRegRenameWithMasks(regNumber dstReg, regNumber srcReg)
+{
+    regMaskTP dstregmask = genRegMaskFloat(dstReg);
+    regMaskTP srcregmask = genRegMaskFloat(srcReg);
+
+    // rename use register
+    compCurFPState.Rename(dstReg, srcReg);
+
+    regSet.rsMaskUsedFloat &= ~srcregmask;
+    regSet.rsMaskUsedFloat |= dstregmask;
+
+    if (srcregmask & regSet.rsMaskLockedFloat)
+    {
+        assert((dstregmask & regSet.rsMaskLockedFloat) == 0);
+        // We will set the new one as locked
+        regSet.rsMaskLockedFloat &= ~srcregmask;
+        regSet.rsMaskLockedFloat |= dstregmask;
+    }
+
+    // Updated used tree
+    assert(!regSet.genUsedRegsFloat[dstReg]);
+    regSet.genUsedRegsFloat[dstReg]           = regSet.genUsedRegsFloat[srcReg];
+    regSet.genUsedRegsFloat[dstReg]->gtRegNum = dstReg;
+    regSet.genUsedRegsFloat[srcReg]           = NULL;
+}
+
+void CodeGen::genRegVarBirthStackFP(LclVarDsc* varDsc)
+{
+    // Mark the virtual register we're assigning to this local;
+    regNumber reg = varDsc->lvRegNum;
+
+#ifdef DEBUG
+    regMaskTP regmask = genRegMaskFloat(reg);
+#endif
+
+    assert(varDsc->lvTracked && varDsc->lvRegister && reg != REG_FPNONE);
+    if (regSet.genUsedRegsFloat[reg])
+    {
+
+        // Register was marked as used... will have to rename it so we can put the
+        // regvar where it belongs.
+        JITDUMP("Renaming used register %s\n", regVarNameStackFP(reg));
+
+        regNumber newreg;
+
+        newreg = regSet.PickRegFloat();
+
+#ifdef DEBUG
+        regMaskTP newregmask = genRegMaskFloat(newreg);
+#endif
+
+        // Update used mask
+        assert((regSet.rsMaskUsedFloat & regmask) && (regSet.rsMaskUsedFloat & newregmask) == 0);
+
+        genRegRenameWithMasks(newreg, reg);
+    }
+
+    // Mark the reg as holding a regvar
+    varDsc->lvSpilled = false;
+    SetRegVarFloat(reg, varDsc->TypeGet(), varDsc);
+}
+
+void CodeGen::genRegVarBirthStackFP(GenTreePtr tree)
+{
+#ifdef DEBUG
+    if (compiler->verbose)
+    {
+        printf("variable V%i is going live in ", tree->gtLclVarCommon.gtLclNum);
+        Compiler::printTreeID(tree);
+        printf("\n");
+    }
+#endif // DEBUG
+
+    // Update register in local var
+    LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
+
+    genRegVarBirthStackFP(varDsc);
+    assert(tree->gtRegNum == tree->gtRegVar.gtRegNum && tree->gtRegNum == varDsc->lvRegNum);
+}
+
+void CodeGen::genRegVarDeathStackFP(LclVarDsc* varDsc)
+{
+    regNumber reg = varDsc->lvRegNum;
+
+    assert(varDsc->lvTracked && varDsc->lvRegister && reg != REG_FPNONE);
+    SetRegVarFloat(reg, varDsc->TypeGet(), 0);
+}
+
+void CodeGen::genRegVarDeathStackFP(GenTreePtr tree)
+{
+#ifdef DEBUG
+    if (compiler->verbose)
+    {
+        printf("register %s is going dead in ", regVarNameStackFP(tree->gtRegVar.gtRegNum));
+        Compiler::printTreeID(tree);
+        printf("\n");
+    }
+#endif // DEBUG
+
+    LclVarDsc* varDsc = compiler->lvaTable + tree->gtLclVarCommon.gtLclNum;
+    genRegVarDeathStackFP(varDsc);
+}
+
+void CodeGen::genLoadStackFP(GenTreePtr tree, regNumber reg)
+{
+#ifdef DEBUG
+    if (compiler->verbose)
+    {
+        printf("genLoadStackFP");
+        Compiler::printTreeID(tree);
+        printf(" %s\n", regVarNameStackFP(reg));
+    }
+#endif // DEBUG
+
+    if (tree->IsRegVar())
+    {
+        // if it has been spilled, unspill it.%
+        LclVarDsc* varDsc = &compiler->lvaTable[tree->gtLclVarCommon.gtLclNum];
+        if (varDsc->lvSpilled)
+        {
+            UnspillFloat(varDsc);
+        }
+
+        // if it's dying, just rename the register, else load it normally
+        if (tree->IsRegVarDeath())
+        {
+            genRegVarDeathStackFP(tree);
+            compCurFPState.Rename(reg, tree->gtRegVar.gtRegNum);
+        }
+        else
+        {
+            assert(tree->gtRegNum == tree->gtRegVar.gtRegNum);
+            inst_FN(INS_fld, compCurFPState.VirtualToST(tree->gtRegVar.gtRegNum));
+            FlatFPX87_PushVirtual(&compCurFPState, reg);
+        }
+    }
+    else
+    {
+        FlatFPX87_PushVirtual(&compCurFPState, reg);
+        inst_FS_TT(INS_fld, tree);
+    }
+}
+
+void CodeGen::genMovStackFP(GenTreePtr dst, regNumber dstreg, GenTreePtr src, regNumber srcreg)
+{
+    if (dstreg == REG_FPNONE && !dst->IsRegVar())
+    {
+        regNumber reg;
+
+        // reg to mem path
+        if (srcreg == REG_FPNONE)
+        {
+            assert(src->IsRegVar());
+            reg = src->gtRegNum;
+        }
+        else
+        {
+            reg = srcreg;
+        }
+
+        // Mov src to top of the stack
+        FlatFPX87_MoveToTOS(&compCurFPState, reg);
+
+        if (srcreg != REG_FPNONE || (src->IsRegVar() && src->IsRegVarDeath()))
+        {
+            // Emit instruction
+            inst_FS_TT(INS_fstp, dst);
+
+            // Update stack
+            compCurFPState.Pop();
+        }
+        else
+        {
+            inst_FS_TT(INS_fst, dst);
+        }
+    }
+    else
+    {
+        if (dstreg == REG_FPNONE)
+        {
+            assert(dst->IsRegVar());
+            dstreg = dst->gtRegNum;
+        }
+
+        if (srcreg == REG_FPNONE && !src->IsRegVar())
+        {
+            // mem to reg
+            assert(dst->IsRegVar() && dst->IsRegVarBirth());
+
+            FlatFPX87_PushVirtual(&compCurFPState, dstreg);
+            FlatFPX87_MoveToTOS(&compCurFPState, dstreg);
+
+            if (src->gtOper == GT_CNS_DBL)
+            {
+                genConstantLoadStackFP(src);
+            }
+            else
+            {
+                inst_FS_TT(INS_fld, src);
+            }
+        }
+        else
+        {
+            // disposable reg to reg, use renaming
+            assert(dst->IsRegVar() && dst->IsRegVarBirth());
+            assert(src->IsRegVar() || (src->InReg()));
+            assert(src->gtRegNum != REG_FPNONE);
+
+            if ((src->InReg()) || (src->IsRegVar() && src->IsRegVarDeath()))
+            {
+                // src is disposable and dst is a regvar, so we'll rename src to dst
+
+                // SetupOp should have masked out the regvar
+                assert(!src->IsRegVar() || !src->IsRegVarDeath() ||
+                       !(genRegMaskFloat(src->gtRegVar.gtRegNum) & regSet.rsMaskRegVarFloat));
+
+                // get slot that holds the value
+                unsigned uStack = compCurFPState.m_uVirtualMap[src->gtRegNum];
+
+                // unlink the slot that holds the value
+                compCurFPState.Unmap(src->gtRegNum);
+
+                regNumber tgtreg = dst->gtRegVar.gtRegNum;
+
+                compCurFPState.IgnoreConsistencyChecks(true);
+
+                if (regSet.genUsedRegsFloat[tgtreg])
+                {
+                    // tgtreg is used, we move it to src reg. We do this here as src reg won't be
+                    // marked as used, if tgtreg is used it srcreg will be a candidate for moving
+                    // which is something we don't want, so we do the renaming here.
+                    genRegRenameWithMasks(src->gtRegNum, tgtreg);
+                }
+
+                compCurFPState.IgnoreConsistencyChecks(false);
+
+                // Birth of FP var
+                genRegVarBirthStackFP(dst);
+
+                // Associate target reg with source physical register
+                compCurFPState.Associate(tgtreg, uStack);
+            }
+            else
+            {
+                if (src->IsRegVar())
+                {
+                    // regvar that isnt dying to regvar
+                    assert(!src->IsRegVarDeath());
+
+                    // Birth of FP var
+                    genRegVarBirthStackFP(dst);
+
+                    // Load register
+                    inst_FN(INS_fld, compCurFPState.VirtualToST(src->gtRegVar.gtRegNum));
+
+                    // update our logic stack
+                    FlatFPX87_PushVirtual(&compCurFPState, dst->gtRegVar.gtRegNum);
+                }
+                else
+                {
+                    // memory to regvar
+
+                    // Birth of FP var
+                    genRegVarBirthStackFP(dst);
+
+                    // load into stack
+                    inst_FS_TT(INS_fld, src);
+
+                    // update our logic stack
+                    FlatFPX87_PushVirtual(&compCurFPState, dst->gtRegVar.gtRegNum);
+                }
+            }
+        }
+    }
+}
+
+void CodeGen::genCodeForTreeStackFP_DONE(GenTreePtr tree, regNumber reg)
+{
+    return genCodeForTree_DONE(tree, reg);
+}
+
+// Does the setup of the FP stack on entry to block
+void CodeGen::genSetupStateStackFP(BasicBlock* block)
+{
+    bool bGenerate = !block->bbFPStateX87;
+    if (bGenerate)
+    {
+        // Allocate FP state
+        block->bbFPStateX87 = FlatFPAllocFPState();
+        block->bbFPStateX87->Init();
+    }
+
+    // Update liveset and lock enregistered live vars on entry
+    VARSET_TP VARSET_INIT_NOCOPY(liveSet,
+                                 VarSetOps::Intersection(compiler, block->bbLiveIn, compiler->optAllFPregVars));
+
+    if (!VarSetOps::IsEmpty(compiler, liveSet))
+    {
+        unsigned   varNum;
+        LclVarDsc* varDsc;
+
+        for (varNum = 0, varDsc = compiler->lvaTable; varNum < compiler->lvaCount; varNum++, varDsc++)
+        {
+            if (varDsc->IsFloatRegType() && varDsc->lvRegister)
+            {
+
+                unsigned varIndex = varDsc->lvVarIndex;
+
+                // Is this variable live on entry?
+                if (VarSetOps::IsMember(compiler, liveSet, varIndex))
+                {
+                    JITDUMP("genSetupStateStackFP(): enregistered variable V%i is live on entry to block\n", varNum);
+
+                    assert(varDsc->lvTracked);
+                    assert(varDsc->lvRegNum != REG_FPNONE);
+
+                    genRegVarBirthStackFP(varDsc);
+
+                    if (bGenerate)
+                    {
+                        // If we're generating layout, update it.
+                        block->bbFPStateX87->Push(varDsc->lvRegNum);
+                    }
+                }
+            }
+        }
+    }
+
+    compCurFPState.Init(block->bbFPStateX87);
+
+    assert(block->bbFPStateX87->IsConsistent());
+}
+
+regMaskTP CodeGen::genPushArgumentStackFP(GenTreePtr args)
+{
+    regMaskTP addrReg = 0;
+    unsigned  opsz    = genTypeSize(genActualType(args->TypeGet()));
+
+    switch (args->gtOper)
+    {
+        GenTreePtr temp;
+        GenTreePtr fval;
+        size_t     flopsz;
+
+        case GT_CNS_DBL:
+        {
+            float f    = 0.0;
+            int*  addr = NULL;
+            if (args->TypeGet() == TYP_FLOAT)
+            {
+                f = (float)args->gtDblCon.gtDconVal;
+                // *(long*) (&f) used instead of *addr because of of strict
+                // pointer aliasing optimization. According to the ISO C/C++
+                // standard, an optimizer can assume two pointers of
+                // non-compatible types do not point to the same memory.
+                inst_IV(INS_push, *((int*)(&f)));
+                genSinglePush();
+                addrReg = 0;
+            }
+            else
+            {
+                addr = (int*)&args->gtDblCon.gtDconVal;
+
+                // store forwarding fix for pentium 4 and Centrino
+                // (even for down level CPUs as we don't care about their perf any more)
+                fval = genMakeConst(&args->gtDblCon.gtDconVal, args->gtType, args, true);
+                inst_FS_TT(INS_fld, fval);
+                flopsz = (size_t)8;
+                inst_RV_IV(INS_sub, REG_ESP, flopsz, EA_PTRSIZE);
+                getEmitter()->emitIns_AR_R(INS_fstp, EA_ATTR(flopsz), REG_NA, REG_ESP, 0);
+                genSinglePush();
+                genSinglePush();
+
+                addrReg = 0;
+            }
+
+            break;
+        }
+
+        case GT_CAST:
+        {
+            // Is the value a cast from double ?
+            if ((args->gtOper == GT_CAST) && (args->CastFromType() == TYP_DOUBLE))
+            {
+                /* Load the value onto the FP stack */
+
+                genCodeForTreeFlt(args->gtCast.CastOp(), false);
+
+                /* Go push the value as a float/double */
+                args = args->gtCast.CastOp();
+
+                addrReg = 0;
+                goto PUSH_FLT;
+            }
+            // Fall through to default case....
+        }
+        default:
+        {
+            temp = genMakeAddrOrFPstk(args, &addrReg, false);
+            if (temp)
+            {
+                unsigned offs;
+
+                // We have the address of the float operand, push its bytes
+                offs = opsz;
+                assert(offs % sizeof(int) == 0);
+
+                if (offs == 4)
+                {
+                    assert(args->gtType == temp->gtType);
+                    do
+                    {
+                        offs -= sizeof(int);
+                        inst_TT(INS_push, temp, offs);
+                        genSinglePush();
+                    } while (offs);
+                }
+                else
+                {
+                    // store forwarding fix for pentium 4 and Centrino
+                    inst_FS_TT(INS_fld, temp);
+                    flopsz = (size_t)offs;
+                    inst_RV_IV(INS_sub, REG_ESP, (size_t)flopsz, EA_PTRSIZE);
+                    getEmitter()->emitIns_AR_R(INS_fstp, EA_ATTR(flopsz), REG_NA, REG_ESP, 0);
+                    genSinglePush();
+                    genSinglePush();
+                }
+            }
+            else
+            {
+            // The argument is on the FP stack -- pop it into [ESP-4/8]
+
+            PUSH_FLT:
+
+                inst_RV_IV(INS_sub, REG_ESP, opsz, EA_PTRSIZE);
+
+                genSinglePush();
+                if (opsz == 2 * sizeof(unsigned))
+                    genSinglePush();
+
+                // Take reg to top of stack
+                FlatFPX87_MoveToTOS(&compCurFPState, args->gtRegNum);
+
+                // Pop it off to stack
+                compCurFPState.Pop();
+                getEmitter()->emitIns_AR_R(INS_fstp, EA_ATTR(opsz), REG_NA, REG_ESP, 0);
+            }
+
+            gcInfo.gcMarkRegSetNpt(addrReg);
+            break;
+        }
+    }
+
+    return addrReg;
+}
+
+void CodeGen::genRoundFpExpressionStackFP(GenTreePtr op, var_types type)
+{
+    // Do nothing with memory resident opcodes - these are the right precision
+    // (even if genMakeAddrOrFPstk loads them to the FP stack)
+    if (type == TYP_UNDEF)
+        type = op->TypeGet();
+
+    switch (op->gtOper)
+    {
+        case GT_LCL_VAR:
+        case GT_LCL_FLD:
+        case GT_CLS_VAR:
+        case GT_CNS_DBL:
+        case GT_IND:
+        case GT_LEA:
+            if (type == op->TypeGet())
+                return;
+        default:
+            break;
+    }
+
+    assert(op->gtRegNum != REG_FPNONE);
+
+    // Take register to top of stack
+    FlatFPX87_MoveToTOS(&compCurFPState, op->gtRegNum);
+
+    // Allocate a temp for the expression
+    TempDsc* temp = compiler->tmpGetTemp(type);
+
+    // Store the FP value into the temp
+    inst_FS_ST(INS_fstp, EmitSize(type), temp, 0);
+
+    // Load the value back onto the FP stack
+    inst_FS_ST(INS_fld, EmitSize(type), temp, 0);
+
+    // We no longer need the temp
+    compiler->tmpRlsTemp(temp);
+}
+
+void CodeGen::genCodeForTreeStackFP_Const(GenTreePtr tree)
+{
+#ifdef DEBUG
+    if (compiler->verbose)
+    {
+        printf("genCodeForTreeStackFP_Const() ");
+        Compiler::printTreeID(tree);
+        printf("\n");
+    }
+#endif // DEBUG
+
+#ifdef DEBUG
+    if (tree->OperGet() != GT_CNS_DBL)
+    {
+        compiler->gtDispTree(tree);
+        assert(!"bogus float const");
+    }
+#endif
+    // Pick register
+    regNumber reg = regSet.PickRegFloat();
+
+    // Load constant
+    genConstantLoadStackFP(tree);
+
+    // Push register to virtual stack
+    FlatFPX87_PushVirtual(&compCurFPState, reg);
+
+    // Update tree
+    genCodeForTreeStackFP_DONE(tree, reg);
+}
+
+void CodeGen::genCodeForTreeStackFP_Leaf(GenTreePtr tree)
+{
+#ifdef DEBUG
+    if (compiler->verbose)
+    {
+        printf("genCodeForTreeStackFP_Leaf() ");
+        Compiler::printTreeID(tree);
+        printf("\n");
+    }
+#endif // DEBUG
+
+    switch (tree->OperGet())
+    {
+        case GT_LCL_VAR:
+        case GT_LCL_FLD:
+        {
+            assert(!compiler->lvaTable[tree->gtLclVarCommon.gtLclNum].lvRegister);
+
+            // Pick register
+            regNumber reg = regSet.PickRegFloat();
+
+            // Load it
+            genLoadStackFP(tree, reg);
+
+            genCodeForTreeStackFP_DONE(tree, reg);
+
+            break;
+        }
+
+        case GT_REG_VAR:
+        {
+            regNumber reg = regSet.PickRegFloat();
+
+            genLoadStackFP(tree, reg);
+
+            genCodeForTreeStackFP_DONE(tree, reg);
+
+            break;
+        }
+
+        case GT_CLS_VAR:
+        {
+            // Pick register
+            regNumber reg = regSet.PickRegFloat();
+
+            // Load it
+            genLoadStackFP(tree, reg);
+
+            genCodeForTreeStackFP_DONE(tree, reg);
+
+            break;
+        }
+
+        default:
+#ifdef DEBUG
+            compiler->gtDispTree(tree);
+#endif
+            assert(!"unexpected leaf");
+    }
+
+    genUpdateLife(tree);
+}
+
+void CodeGen::genCodeForTreeStackFP_Asg(GenTreePtr tree)
+{
+#ifdef DEBUG
+    if (compiler->verbose)
+    {
+        printf("genCodeForTreeStackFP_Asg() ");
+        Compiler::printTreeID(tree);
+        printf("\n");
+    }
+#endif // DEBUG
+
+    emitAttr   size;
+    unsigned   offs;
+    GenTreePtr op1 = tree->gtOp.gtOp1;
+    GenTreePtr op2 = tree->gtGetOp2();
+
+    assert(tree->OperGet() == GT_ASG);
+
+    if (!op1->IsRegVar() && (op2->gtOper == GT_CAST) && (op1->gtType == op2->gtType) &&
+        varTypeIsFloating(op2->gtCast.CastOp()))
+    {
+        /* We can discard the cast */
+        op2 = op2->gtCast.CastOp();
+    }
+
+    size = EmitSize(op1);
+    offs = 0;
+
+    // If lhs is a comma expression, evaluate the non-last parts, make op1 be the remainder.
+    // (But can't do this if the assignment is reversed...)
+    if ((tree->gtFlags & GTF_REVERSE_OPS) == 0)
+    {
+        op1 = genCodeForCommaTree(op1);
+    }
+
+    GenTreePtr op1NonCom = op1->gtEffectiveVal();
+    if (op1NonCom->gtOper == GT_LCL_VAR)
+    {
+#ifdef DEBUG
+        LclVarDsc* varDsc = &compiler->lvaTable[op1NonCom->gtLclVarCommon.gtLclNum];
+        // No dead stores
+        assert(!varDsc->lvTracked || compiler->opts.MinOpts() || !(op1NonCom->gtFlags & GTF_VAR_DEATH));
+#endif
+
+#ifdef DEBUGGING_SUPPORT
+
+        /* For non-debuggable code, every definition of a lcl-var has
+         * to be checked to see if we need to open a new scope for it.
+         */
+
+        if (compiler->opts.compScopeInfo && !compiler->opts.compDbgCode && (compiler->info.compVarScopesCount > 0))
+        {
+            siCheckVarScope(op1NonCom->gtLclVarCommon.gtLclNum, op1NonCom->gtLclVar.gtLclILoffs);
+        }
+#endif
+    }
+
+    assert(op2);
+    switch (op2->gtOper)
+    {
+        case GT_CNS_DBL:
+
+            assert(compCurFPState.m_uStackSize <= FP_PHYSICREGISTERS);
+
+            regMaskTP addrRegInt;
+            addrRegInt = 0;
+            regMaskTP addrRegFlt;
+            addrRegFlt = 0;
+
+            // op2 is already "evaluated," so doesn't matter if they're reversed or not...
+            op1 = genCodeForCommaTree(op1);
+            op1 = genMakeAddressableStackFP(op1, &addrRegInt, &addrRegFlt);
+
+            // We want to 'cast' the constant to the op1'a type
+            double constantValue;
+            constantValue = op2->gtDblCon.gtDconVal;
+            if (op1->gtType == TYP_FLOAT)
+            {
+                float temp    = forceCastToFloat(constantValue);
+                constantValue = (double)temp;
+            }
+
+            GenTreePtr constantTree;
+            constantTree = compiler->gtNewDconNode(constantValue);
+            if (genConstantLoadStackFP(constantTree, true))
+            {
+                if (op1->IsRegVar())
+                {
+                    // regvar birth
+                    genRegVarBirthStackFP(op1);
+
+                    // Update
+                    compCurFPState.Push(op1->gtRegNum);
+                }
+                else
+                {
+                    // store in target
+                    inst_FS_TT(INS_fstp, op1);
+                }
+            }
+            else
+            {
+                // Standard constant
+                if (op1->IsRegVar())
+                {
+                    // Load constant to fp stack.
+
+                    GenTreePtr cnsaddr;
+
+                    // Create slot for constant
+                    if (op1->gtType == TYP_FLOAT || StackFPIsSameAsFloat(op2->gtDblCon.gtDconVal))
+                    {
+                        // We're going to use that double as a float, so recompute addr
+                        float f = forceCastToFloat(op2->gtDblCon.gtDconVal);
+                        cnsaddr = genMakeConst(&f, TYP_FLOAT, tree, true);
+                    }
+                    else
+                    {
+                        cnsaddr = genMakeConst(&op2->gtDblCon.gtDconVal, TYP_DOUBLE, tree, true);
+                    }
+
+                    // Load into stack
+                    inst_FS_TT(INS_fld, cnsaddr);
+
+                    // regvar birth
+                    genRegVarBirthStackFP(op1);
+
+                    // Update
+                    compCurFPState.Push(op1->gtRegNum);
+                }
+                else
+                {
+                    if (size == 4)
+                    {
+
+                        float f    = forceCastToFloat(op2->gtDblCon.gtDconVal);
+                        int*  addr = (int*)&f;
+
+                        do
+                        {
+                            inst_TT_IV(INS_mov, op1, *addr++, offs);
+                            offs += sizeof(int);
+                        } while (offs < size);
+                    }
+                    else
+                    {
+                        // store forwarding fix for pentium 4 and centrino and also
+                        // fld for doubles that can be represented as floats, saving
+                        // 4 bytes of load
+                        GenTreePtr cnsaddr;
+
+                        // Create slot for constant
+                        if (op1->gtType == TYP_FLOAT || StackFPIsSameAsFloat(op2->gtDblCon.gtDconVal))
+                        {
+                            // We're going to use that double as a float, so recompute addr
+                            float f = forceCastToFloat(op2->gtDblCon.gtDconVal);
+                            cnsaddr = genMakeConst(&f, TYP_FLOAT, tree, true);
+                        }
+                        else
+                        {
+                            assert(tree->gtType == TYP_DOUBLE);
+                            cnsaddr = genMakeConst(&op2->gtDblCon.gtDconVal, TYP_DOUBLE, tree, true);
+                        }
+
+                        inst_FS_TT(INS_fld, cnsaddr);
+                        inst_FS_TT(INS_fstp, op1);
+                    }
+                }
+            }
+
+            genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
+            genUpdateLife(op1);
+            return;
+
+        default:
+            break;
+    }
+
+    // Not one of the easy optimizations. Proceed normally
+    if (tree->gtFlags & GTF_REVERSE_OPS)
+    {
+        /* Evaluate the RHS onto the FP stack.
+           We don't need to round it as we will be doing a spill for
+           the assignment anyway (unless op1 is a GT_REG_VAR). */
+
+        genSetupForOpStackFP(op1, op2, true, true, false, true);
+
+        // Do the move
+        genMovStackFP(op1, REG_FPNONE, op2, (op2->InReg()) ? op2->gtRegNum : REG_FPNONE);
+    }
+    else
+    {
+        // Have to evaluate left side before
+
+        // This should never happen
+        assert(!op1->IsRegVar());
+
+        genSetupForOpStackFP(op1, op2, false, true, false, true);
+
+        // Do the actual move
+        genMovStackFP(op1, REG_FPNONE, op2, (op2->InReg()) ? op2->gtRegNum : REG_FPNONE);
+    }
+}
+
+void CodeGen::genSetupForOpStackFP(
+    GenTreePtr& op1, GenTreePtr& op2, bool bReverse, bool bMakeOp1Addressable, bool bOp1ReadOnly, bool bOp2ReadOnly)
+{
+    if (bMakeOp1Addressable)
+    {
+        if (bReverse)
+        {
+            genSetupForOpStackFP(op2, op1, false, false, bOp2ReadOnly, bOp1ReadOnly);
+        }
+        else
+        {
+            regMaskTP addrRegInt = 0;
+            regMaskTP addrRegFlt = 0;
+
+            op1 = genCodeForCommaTree(op1);
+
+            // Evaluate RHS on FP stack
+            if (bOp2ReadOnly && op2->IsRegVar() && !op2->IsRegVarDeath())
+            {
+                // read only and not dying, so just make addressable
+                op1 = genMakeAddressableStackFP(op1, &addrRegInt, &addrRegFlt);
+                genKeepAddressableStackFP(op1, &addrRegInt, &addrRegFlt);
+                genUpdateLife(op2);
+            }
+            else
+            {
+                // Make target addressable
+                op1 = genMakeAddressableStackFP(op1, &addrRegInt, &addrRegFlt);
+
+                op2 = genCodeForCommaTree(op2);
+
+                genCodeForTreeFloat(op2);
+
+                regSet.SetUsedRegFloat(op2, true);
+                regSet.SetLockedRegFloat(op2, true);
+
+                // Make sure target is still adressable
+                genKeepAddressableStackFP(op1, &addrRegInt, &addrRegFlt);
+
+                regSet.SetLockedRegFloat(op2, false);
+                regSet.SetUsedRegFloat(op2, false);
+            }
+
+            /* Free up anything that was tied up by the target address */
+            genDoneAddressableStackFP(op1, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
+        }
+    }
+    else
+    {
+        assert(!bReverse ||
+               !"Can't do this. if op2 is a reg var and dies in op1, we have a serious problem. For the "
+                "moment, handle this in the caller");
+
+        regMaskTP addrRegInt = 0;
+        regMaskTP addrRegFlt = 0;
+
+        op1 = genCodeForCommaTree(op1);
+
+        if (bOp1ReadOnly && op1->IsRegVar() && !op1->IsRegVarDeath() &&
+            !genRegVarDiesInSubTree(op2, op1->gtRegVar.gtRegNum)) // regvar can't die in op2 either
+        {
+            // First update liveness for op1, since we're "evaluating" it here
+            genUpdateLife(op1);
+
+            op2 = genCodeForCommaTree(op2);
+
+            // read only and not dying, we dont have to do anything.
+            op2 = genMakeAddressableStackFP(op2, &addrRegInt, &addrRegFlt);
+            genKeepAddressableStackFP(op2, &addrRegInt, &addrRegFlt);
+        }
+        else
+        {
+            genCodeForTreeFloat(op1);
+
+            regSet.SetUsedRegFloat(op1, true);
+
+            op2 = genCodeForCommaTree(op2);
+
+            op2 = genMakeAddressableStackFP(op2, &addrRegInt, &addrRegFlt);
+
+            // Restore op1 if necessary
+            if (op1->gtFlags & GTF_SPILLED)
+            {
+                UnspillFloat(op1);
+            }
+
+            // Lock op1
+            regSet.SetLockedRegFloat(op1, true);
+
+            genKeepAddressableStackFP(op2, &addrRegInt, &addrRegFlt);
+
+            // unlock op1
+            regSet.SetLockedRegFloat(op1, false);
+
+            // mark as free
+            regSet.SetUsedRegFloat(op1, false);
+        }
+
+        genDoneAddressableStackFP(op2, addrRegInt, addrRegFlt, RegSet::KEEP_REG);
+    }
+}
+
+void CodeGen::genCodeForTreeStackFP_Arithm(GenTreePtr tree)
+{
+#ifdef DEBUG
+    if (compiler->verbose)
+    {
+        printf("genCodeForTreeStackFP_Arithm() ");
+        Compiler::printTreeID(tree);
+        printf("\n");
+    }
+#endif // DEBUG
+
+    assert(tree->OperGet() == GT_ADD || tree->OperGet() == GT_SUB || tree->OperGet() == GT_MUL ||
+           tree->OperGet() == GT_DIV);
+
+    // We handle the reverse here instead of leaving setupop to do it. As for this case
+    //
+    //              + with reverse
+    //          op1    regvar
+    //
+    // and in regvar dies in op1, we would need a load of regvar, instead of a noop. So we handle this
+    // here and tell genArithmStackFP to do the reverse operation
+    bool bReverse;
+
+    GenTreePtr op1, op2;
+
+    if (tree->gtFlags & GTF_REVERSE_OPS)
+    {
+        bReverse = true;
+        op1      = tree->gtGetOp2();
+        op2      = tree->gtOp.gtOp1;
+    }
+    else
+    {
+        bReverse = false;
+        op1      = tree->gtOp.gtOp1;
+        op2      = tree->gtGetOp2();
+    }
+
+    regNumber result;
+
+    // Fast paths
+    genTreeOps oper = tree->OperGet();
+    if (op1->IsRegVar() && op2->IsRegVar() && !op1->IsRegVarDeath() && op2->IsRegVarDeath())
+    {
+        // In this fastpath, we will save a load by doing the operation directly on the op2
+        // register, as it's dying.
+
+        // Mark op2 as dead
+        genRegVarDeathStackFP(op2);
+
+        // Do operation
+        result = genArithmStackFP(oper, op2, op2->gtRegVar.gtRegNum, op1, REG_FPNONE, !bReverse);
+
+        genUpdateLife(op1);
+        genUpdateLife(op2);
+    }
+    else if (!op1->IsRegVar() &&                         // We don't do this for regvars, as we'll need a scratch reg
+             ((tree->gtFlags & GTF_SIDE_EFFECT) == 0) && // No side effects
+             GenTree::Compare(op1, op2))                 // op1 and op2 are the same
+    {
+        // op1 is same thing as op2. Ideal for CSEs that werent optimized
+        // due to their low cost.
+
+        // First we need to update lifetimes from op1
+        VarSetOps::AssignNoCopy(compiler, compiler->compCurLife, genUpdateLiveSetForward(op1));
+        compiler->compCurLifeTree = op1;
+
+        genCodeForTreeFloat(op2);
+
+        result = genArithmStackFP(oper, op2, op2->gtRegNum, op2, op2->gtRegNum, bReverse);
+    }
+    else
+    {
+        genSetupForOpStackFP(op1, op2, false, false, false, true);
+
+        result = genArithmStackFP(oper, op1, (op1->InReg()) ? op1->gtRegNum : REG_FPNONE, op2,
+                                  (op2->InReg()) ? op2->gtRegNum : REG_FPNONE, bReverse);
+    }
+
+    genCodeForTreeStackFP_DONE(tree, result);
+}
+
+regNumber CodeGen::genArithmStackFP(
+    genTreeOps oper, GenTreePtr dst, regNumber dstreg, GenTreePtr src, regNumber srcreg, bool bReverse)
+{
+#ifdef DEBUG
+    if (compiler->verbose)
+    {
+        printf("genArithmStackFP() dst: ");
+        Compiler::printTreeID(dst);
+        printf(" src: ");
+        Compiler::printTreeID(src);
+        printf(" dstreg: %s srcreg: %s\n", dstreg == REG_FPNONE ? "NONE" : regVarNameStackFP(dstreg),
+               srcreg == REG_FPNONE ? "NONE" : regVarNameStackFP(srcreg));
+    }
+#endif // DEBUG
+
+    // Select instruction depending on oper and bReverseOp
+
+    instruction ins_NN;
+    instruction ins_RN;
+    instruction ins_RP;
+    instruction ins_NP;
+
+    switch (oper)
+    {
+        default:
+            assert(!"Unexpected oper");
+        case GT_ADD:
+        case GT_SUB:
+        case GT_MUL:
+        case GT_DIV:
+
+            /* Make sure the instruction tables look correctly ordered */
+            assert(FPmathNN[GT_ADD - GT_ADD] == INS_fadd);
+            assert(FPmathNN[GT_SUB - GT_ADD] == INS_fsub);
+            assert(FPmathNN[GT_MUL - GT_ADD] == INS_fmul);
+            assert(FPmathNN[GT_DIV - GT_ADD] == INS_fdiv);
+
+            assert(FPmathNP[GT_ADD - GT_ADD] == INS_faddp);
+            assert(FPmathNP[GT_SUB - GT_ADD] == INS_fsubp);
+            assert(FPmathNP[GT_MUL - GT_ADD] == INS_fmulp);
+            assert(FPmathNP[GT_DIV - GT_ADD] == INS_fdivp);
+
+            assert(FPmathRN[GT_ADD - GT_ADD] == INS_fadd);
+            assert(FPmathRN[GT_SUB - GT_ADD] == INS_fsubr);
+            assert(FPmathRN[GT_MUL - GT_ADD] == INS_fmul);
+            assert(FPmathRN[GT_DIV - GT_ADD] == INS_fdivr);
+
+            assert(FPmathRP[GT_ADD - GT_ADD] == INS_faddp);
+            assert(FPmathRP[GT_SUB - GT_ADD] == INS_fsubrp);
+            assert(FPmathRP[GT_MUL - GT_ADD] == INS_fmulp);
+            assert(FPmathRP[GT_DIV - GT_ADD] == INS_fdivrp);
+
+            if (bReverse)
+            {
+                ins_NN = FPmathRN[oper - GT_ADD];
+                ins_NP = FPmathRP[oper - GT_ADD];
+                ins_RN = FPmathNN[oper - GT_ADD];
+                ins_RP = FPmathNP[oper - GT_ADD];
+            }
+            else
+            {
+                ins_NN = FPmathNN[oper - GT_ADD];
+                ins_NP = FPmathNP[oper - GT_ADD];
+                ins_RN = FPmathRN[oper - GT_ADD];
+                ins_RP = FPmathRP[oper - GT_ADD];
+            }
+    }
+
+    regNumber result = REG_FPNONE;
+
+    if (dstreg != REG_FPNONE)
+    {
+        if (srcreg == REG_FPNONE)
+        {
+            if (src->IsRegVar())
+            {
+                if (src->IsRegVarDeath())
+                {
+                    if (compCurFPState.TopVirtual() == (unsigned)dst->gtRegNum)
+                    {
+                        // Do operation and store in srcreg
+                        inst_FS(ins_RP, compCurFPState.VirtualToST(src->gtRegNum));
+
+                        // kill current dst and rename src as dst.
+                        FlatFPX87_Kill(&compCurFPState, dstreg);
+                        compCurFPState.Rename(dstreg, src->gtRegNum);
+                    }
+                    else
+                    {
+                        // Take src to top of stack
+                        FlatFPX87_MoveToTOS(&compCurFPState, src->gtRegNum);
+
+                        // do reverse and pop operation
+                        inst_FS(ins_NP, compCurFPState.VirtualToST(dstreg));
+
+                        // Kill the register
+                        FlatFPX87_Kill(&compCurFPState, src->gtRegNum);
+                    }
+
+                    assert(!src->IsRegVar() || !src->IsRegVarDeath() ||
+                           !(genRegMaskFloat(src->gtRegVar.gtRegNum) & regSet.rsMaskRegVarFloat));
+                }
+                else
+                {
+                    if (compCurFPState.TopVirtual() == (unsigned)src->gtRegNum)
+                    {
+                        inst_FS(ins_RN, compCurFPState.VirtualToST(dst->gtRegNum));
+                    }
+                    else
+                    {
+                        FlatFPX87_MoveToTOS(&compCurFPState, dst->gtRegNum);
+                        inst_FN(ins_NN, compCurFPState.VirtualToST(src->gtRegNum));
+                    }
+                }
+            }
+            else
+            {
+                // do operation with memory and store in dest
+                FlatFPX87_MoveToTOS(&compCurFPState, dst->gtRegNum);
+                inst_FS_TT(ins_NN, src);
+            }
+        }
+        else
+        {
+            if (dstreg == srcreg)
+            {
+                FlatFPX87_MoveToTOS(&compCurFPState, dstreg);
+                inst_FN(ins_NN, compCurFPState.VirtualToST(dstreg));
+            }
+            else
+            {
+                if (compCurFPState.TopVirtual() == (unsigned)dst->gtRegNum)
+                {
+                    // Do operation and store in srcreg
+                    inst_FS(ins_RP, compCurFPState.VirtualToST(srcreg));
+
+                    // kill current dst and rename src as dst.
+                    FlatFPX87_Kill(&compCurFPState, dstreg);
+                    compCurFPState.Rename(dstreg, srcreg);
+                }
+                else
+                {
+                    FlatFPX87_MoveToTOS(&compCurFPState, srcreg);
+
+                    // do reverse and pop operation
+                    inst_FS(ins_NP, compCurFPState.VirtualToST(dstreg));
+
+                    // Kill the register
+                    FlatFPX87_Kill(&compCurFPState, srcreg);
+                }
+            }
+        }
+
+        result = dstreg;
+    }
+    else
+    {
+        assert(!"if we get here it means we didnt load op1 into a temp. Investigate why");
+    }
+
+    assert(result != REG_FPNONE);
+    return result;
+}
+
+void CodeGen::genCodeForTreeStackFP_AsgArithm(GenTreePtr tree)
+{
+#ifdef DEBUG
+    if (compiler->verbose)
+    {
+        printf("genCodeForTreeStackFP_AsgArithm() ");
+        Compiler::printTreeID(tree);
+        printf("\n");
+    }
+#endif // DEBUG
+
+    assert(tree->OperGet() == GT_ASG_ADD || tree->OperGet() == GT_ASG_SUB || tree->OperGet() == GT_ASG_MUL ||
+           tree->OperGet() == GT_ASG_DIV);
+
+    GenTreePtr op1, op2;
+
+    op1 = tree->gtOp.gtOp1;
+    op2 = tree->gtGetOp2();
+
+    genSetupForOpStackFP(op1, op2, (tree->gtFlags & GTF_REVERSE_OPS) ? true : false, true, false, true);
+
+    regNumber result = genAsgArithmStackFP(tree->OperGet(), op1, (op1->InReg()) ? op1->gtRegNum : REG_FPNONE, op2,
+                                           (op2->InReg()) ? op2->gtRegNum : REG_FPNONE);
+
+    genCodeForTreeStackFP_DONE(tree, result);
+}
+
+regNumber CodeGen::genAsgArithmStackFP(
+    genTreeOps oper, GenTreePtr dst, regNumber dstreg, GenTreePtr src, regNumber srcreg)
+{
+    regNumber result = REG_FPNONE;
+
+#ifdef DEBUG
+    if (compiler->verbose)
+    {
+        printf("genAsgArithmStackFP() dst: ");
+        Compiler::printTreeID(dst);
+        printf(" src: ");
+        Compiler::printTreeID(src);
+        printf(" dstreg: %s srcreg: %s\n", dstreg == REG_FPNONE ? "NONE" : regVarNameStackFP(dstreg),
+               srcreg == REG_FPNONE ? "NONE" : regVarNameStackFP(srcreg));
+    }
+#endif // DEBUG
+
+    instruction ins_NN;
+    instruction ins_RN;
+    instruction ins_RP;
+    instruction ins_NP;
+
+    switch (oper)
+    {
+        default:
+            assert(!"Unexpected oper");
+            break;
+        case GT_ASG_ADD:
+        case GT_ASG_SUB:
+        case GT_ASG_MUL:
+        case GT_ASG_DIV:
+
+            assert(FPmathRN[GT_ASG_ADD - GT_ASG_ADD] == INS_fadd);
+            assert(FPmathRN[GT_ASG_SUB - GT_ASG_ADD] == INS_fsubr);
+            assert(FPmathRN[GT_ASG_MUL - GT_ASG_ADD] == INS_fmul);
+            assert(FPmathRN[GT_ASG_DIV - GT_ASG_ADD] == INS_fdivr);
+
+            assert(FPmathRP[GT_ASG_ADD - GT_ASG_ADD] == INS_faddp);
+            assert(FPmathRP[GT_ASG_SUB - GT_ASG_ADD] == INS_fsubrp);
+            assert(FPmathRP[GT_ASG_MUL - GT_ASG_ADD] == INS_fmulp);
+            assert(FPmathRP[GT_ASG_DIV - GT_ASG_ADD] == INS_fdivrp);
+
+            ins_NN = FPmathNN[oper - GT_ASG_ADD];
+            ins_NP = FPmathNP[oper - GT_ASG_ADD];
+
+            ins_RN = FPmathRN[oper - GT_ASG_ADD];
+            ins_RP = FPmathRP[oper - GT_ASG_ADD];
+
+            if (dstreg != REG_FPNONE)
+            {
+                assert(!"dst should be a regvar or memory");
+            }
+            else
+            {
+                if (dst->IsRegVar())
+                {
+                    if (src->IsRegVar())
+                    {
+                        if (src->IsRegVarDeath())
+                        {
+                            // Take src to top of stack
+                            FlatFPX87_MoveToTOS(&compCurFPState, src->gtRegNum);
+
+                            // Do op
+                            inst_FS(ins_NP, compCurFPState.VirtualToST(dst->gtRegNum));
+
+                            // Kill the register
+                            FlatFPX87_Kill(&compCurFPState, src->gtRegNum);
+
+                            // SetupOp should mark the regvar as dead
+                            assert((genRegMaskFloat(src->gtRegVar.gtRegNum) & regSet.rsMaskRegVarFloat) == 0);
+                        }
+                        else
+                        {
+                            assert(src->gtRegNum == src->gtRegVar.gtRegNum &&
+                                   "We shoudnt be loading regvar src on the stack as src is readonly");
+
+                            // Take src to top of stack
+                            FlatFPX87_MoveToTOS(&compCurFPState, src->gtRegNum);
+
+                            // Do op
+                            inst_FS(ins_RN, compCurFPState.VirtualToST(dst->gtRegNum));
+                        }
+                    }
+                    else
+                    {
+                        if (srcreg == REG_FPNONE)
+                        {
+                            // take enregistered variable to top of stack
+                            FlatFPX87_MoveToTOS(&compCurFPState, dst->gtRegNum);
+
+                            // Do operation with mem
+                            inst_FS_TT(ins_NN, src);
+                        }
+                        else
+                        {
+                            // take enregistered variable to top of stack
+                            FlatFPX87_MoveToTOS(&compCurFPState, src->gtRegNum);
+
+                            // do op
+                            inst_FS(ins_NP, compCurFPState.VirtualToST(dst->gtRegNum));
+
+                            // Kill the register
+                            FlatFPX87_Kill(&compCurFPState, src->gtRegNum);
+                        }
+                    }
+                }
+                else
+                {
+                    // To memory
+                    if ((src->IsRegVar()) && !src->IsRegVarDeath())
+                    {
+                        // We set src as read only, but as dst is in memory, we will need
+                        // an extra physical register (which we should have, as we have a
+                        // spare one for transitions).
+                        //
+                        // There used to be an assertion: assert(src->gtRegNum == src->gtRegVar.gtRegNum, ...)
+                        // here, but there's actually no reason to assume that.  AFAICT, for FP vars under stack FP,
+                        // src->gtRegVar.gtRegNum is the allocated stack pseudo-register, but src->gtRegNum is the
+                        // FP stack position into which that is loaded to represent a particular use of the variable.
+                        inst_FN(INS_fld, compCurFPState.VirtualToST(src->gtRegNum));
+
+                        // Do operation with mem
+                        inst_FS_TT(ins_RN, dst);
+
+                        // store back
+                        inst_FS_TT(INS_fstp, dst);
+                    }
+                    else
+                    {
+                        // put src in top of stack
+                        FlatFPX87_MoveToTOS(&compCurFPState, srcreg);
+
+                        // Do operation with mem
+                        inst_FS_TT(ins_RN, dst);
+
+                        // store back
+                        inst_FS_TT(INS_fstp, dst);
+
+                        // SetupOp should have marked the regvar as dead in tat case
+                        assert(!src->IsRegVar() || !src->IsRegVarDeath() ||
+                               (genRegMaskFloat(src->gtRegVar.gtRegNum) & regSet.rsMaskRegVarFloat) == 0);
+
+                        FlatFPX87_Kill(&compCurFPState, srcreg);
+                    }
+                }
+            }
+    }
+
+    return result;
+}
+
+void CodeGen::genCodeForTreeStackFP_SmpOp(GenTreePtr tree)
+{
+#ifdef DEBUG
+    if (compiler->verbose)
+    {
+        printf("genCodeForTreeStackFP_SmpOp() ");
+        Compiler::printTreeID(tree);
+        printf("\n");
+    }
+#endif // DEBUG
+
+    assert(tree->OperKind() & GTK_SMPOP);
+
+    switch (tree->OperGet())
+    {
+        // Assignment
+        case GT_ASG:
+        {
+            genCodeForTreeStackFP_Asg(tree);
+            break;
+        }
+
+        // Arithmetic binops
+        case GT_ADD:
+        case GT_SUB:
+        case GT_MUL:
+        case GT_DIV:
+        {
+            genCodeForTreeStackFP_Arithm(tree);
+            break;
+        }
+
+        // Asg-Arithmetic ops
+        case GT_ASG_ADD:
+        case GT_ASG_SUB:
+        case GT_ASG_MUL:
+        case GT_ASG_DIV:
+        {
+            genCodeForTreeStackFP_AsgArithm(tree);
+            break;
+        }
+
+        case GT_IND:
+        case GT_LEA:
+        {
+            regMaskTP addrReg;
+
+            // Make sure the address value is 'addressable' */
+            addrReg = genMakeAddressable(tree, 0, RegSet::FREE_REG);
+
+            // Load the value onto the FP stack
+            regNumber reg = regSet.PickRegFloat();
+            genLoadStackFP(tree, reg);
+
+            genDoneAddressable(tree, addrReg, RegSet::FREE_REG);
+
+            genCodeForTreeStackFP_DONE(tree, reg);
+
+            break;
+        }
+
+        case GT_RETURN:
+        {
+            GenTreePtr op1 = tree->gtOp.gtOp1;
+            assert(op1);
+
+            // Compute the result onto the FP stack
+            if (op1->gtType == TYP_FLOAT)
+            {
+#if ROUND_FLOAT
+                bool roundOp1 = false;
+
+                switch (getRoundFloatLevel())
+                {
+                    case ROUND_NEVER:
+                        /* No rounding at all */
+                        break;
+
+                    case ROUND_CMP_CONST:
+                        break;
+
+                    case ROUND_CMP:
+                        /* Round all comparands and return values*/
+                        roundOp1 = true;
+                        break;
+
+                    case ROUND_ALWAYS:
+                        /* Round everything */
+                        roundOp1 = true;
+                        break;
+
+                    default:
+                        assert(!"Unsupported Round Level");
+                        break;
+                }
+#endif
+                genCodeForTreeFlt(op1);
+            }
+            else
+            {
+                assert(op1->gtType == TYP_DOUBLE);
+                genCodeForTreeFloat(op1);
+
+#if ROUND_FLOAT
+                if ((op1->gtOper == GT_CAST) && (op1->CastFromType() == TYP_LONG))
+                    genRoundFpExpressionStackFP(op1);
+#endif
+            }
+
+            // kill enregistered variables
+            compCurFPState.Pop();
+            assert(compCurFPState.m_uStackSize == 0);
+            break;
+        }
+
+        case GT_COMMA:
+        {
+            GenTreePtr op1 = tree->gtOp.gtOp1;
+            GenTreePtr op2 = tree->gtGetOp2();
+
+            if (tree->gtFlags & GTF_REVERSE_OPS)
+            {
+                genCodeForTreeFloat(op2);
+
+                regSet.SetUsedRegFloat(op2, true);
+
+                genEvalSideEffects(op1);
+
+                if (op2->gtFlags & GTF_SPILLED)
+                {
+                    UnspillFloat(op2);
+                }
+
+                regSet.SetUsedRegFloat(op2, false);
+            }
+            else
+            {
+                genEvalSideEffects(op1);
+                genCodeForTreeFloat(op2);
+            }
+
+            genCodeForTreeStackFP_DONE(tree, op2->gtRegNum);
+            break;
+        }
+        case GT_CAST:
+        {
+            genCodeForTreeStackFP_Cast(tree);
+            break;
+        }
+
+        case GT_NEG:
+        {
+            GenTreePtr op1 = tree->gtOp.gtOp1;
+
+            // get the tree into a register
+            genCodeForTreeFloat(op1);
+
+            // Take reg to top of stack
+            FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
+
+            // change the sign
+            instGen(INS_fchs);
+
+            // mark register that holds tree
+            genCodeForTreeStackFP_DONE(tree, op1->gtRegNum);
+            return;
+        }
+        case GT_INTRINSIC:
+        {
+            assert(Compiler::IsMathIntrinsic(tree));
+
+            GenTreePtr op1 = tree->gtOp.gtOp1;
+
+            // get tree into a register
+            genCodeForTreeFloat(op1);
+
+            // Take reg to top of stack
+            FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
+
+            static const instruction mathIns[] = {
+                INS_fsin, INS_fcos, INS_fsqrt, INS_fabs, INS_frndint,
+            };
+
+            assert(mathIns[CORINFO_INTRINSIC_Sin] == INS_fsin);
+            assert(mathIns[CORINFO_INTRINSIC_Cos] == INS_fcos);
+            assert(mathIns[CORINFO_INTRINSIC_Sqrt] == INS_fsqrt);
+            assert(mathIns[CORINFO_INTRINSIC_Abs] == INS_fabs);
+            assert(mathIns[CORINFO_INTRINSIC_Round] == INS_frndint);
+            assert((unsigned)(tree->gtIntrinsic.gtIntrinsicId) < sizeof(mathIns) / sizeof(mathIns[0]));
+            instGen(mathIns[tree->gtIntrinsic.gtIntrinsicId]);
+
+            // mark register that holds tree
+            genCodeForTreeStackFP_DONE(tree, op1->gtRegNum);
+
+            return;
+        }
+        case GT_CKFINITE:
+        {
+            TempDsc* temp;
+            int      offs;
+
+            GenTreePtr op1 = tree->gtOp.gtOp1;
+
+            // Offset of the DWord containing the exponent
+            offs = (op1->gtType == TYP_FLOAT) ? 0 : sizeof(int);
+
+            // get tree into a register
+            genCodeForTreeFloat(op1);
+
+            // Take reg to top of stack
+            FlatFPX87_MoveToTOS(&compCurFPState, op1->gtRegNum);
+
+            temp          = compiler->tmpGetTemp(op1->TypeGet());
+            emitAttr size = EmitSize(op1);
+
+            // Store the value from the FP stack into the temp
+            getEmitter()->emitIns_S(INS_fst, size, temp->tdTempNum(), 0);
+
+            regNumber reg = regSet.rsPickReg();
+
+            // Load the DWord containing the exponent into a general reg.
+            inst_RV_ST(INS_mov, reg, temp, offs, op1->TypeGet(), EA_4BYTE);
+            compiler->tmpRlsTemp(temp);
+
+            // 'reg' now contains the DWord containing the exponent
+            regTracker.rsTrackRegTrash(reg);
+
+            // Mask of exponent with all 1's - appropriate for given type
+
+            int expMask;
+            expMask = (op1->gtType == TYP_FLOAT) ? 0x7F800000  // TYP_FLOAT
+                                                 : 0x7FF00000; // TYP_DOUBLE
+
+            // Check if the exponent is all 1's
+
+            inst_RV_IV(INS_and, reg, expMask, EA_4BYTE);
+            inst_RV_IV(INS_cmp, reg, expMask, EA_4BYTE);
+
+            // If exponent was all 1's, we need to throw ArithExcep
+            genJumpToThrowHlpBlk(EJ_je, SCK_ARITH_EXCPN);
+
+            genUpdateLife(tree);
+
+            genCodeForTreeStackFP_DONE(tree, op1->gtRegNum);
+            break;
+        }
+        default:
+            NYI("opertype");
+    }
+}
+
+void CodeGen::genCodeForTreeStackFP_Cast(GenTreePtr tree)
+{
+#ifdef DEBUG
+    if (compiler->verbose)
+    {
+        printf("genCodeForTreeStackFP_Cast() ");
+        Compiler::printTreeID(tree);
+        printf("\n");
+    }
+#endif // DEBUG
+
+#if ROUND_FLOAT
+    bool roundResult = true;
+#endif
+
+    regMaskTP addrReg;
+    TempDsc*  temp;
+    emitAttr  size;
+
+    GenTreePtr op1 = tree->gtOp.gtOp1;
+
+    // If op1 is a comma expression, evaluate the non-last parts, make op1 be the rest.
+    op1 = genCodeForCommaTree(op1);
+
+    switch (op1->gtType)
+    {
+        case TYP_BOOL:
+        case TYP_BYTE:
+        case TYP_UBYTE:
+        case TYP_CHAR:
+        case TYP_SHORT:
+        {
+
+            // Operand too small for 'fild', load it into a register
+            genCodeForTree(op1, 0);
+
+#if ROUND_FLOAT
+            // no need to round, can't overflow float or dbl
+            roundResult = false;
+#endif
+
+            // fall through
+        }
+        case TYP_INT:
+        case TYP_BYREF:
+        case TYP_LONG:
+        {
+            // Can't 'fild' a constant, it has to be loaded from memory
+            switch (op1->gtOper)
+            {
+                case GT_CNS_INT:
+                    op1 = genMakeConst(&op1->gtIntCon.gtIconVal, TYP_INT, tree, false);
+                    break;
+
+                case GT_CNS_LNG:
+                    // Our encoder requires fild on m64int to be 64-bit aligned.
+                    op1 = genMakeConst(&op1->gtLngCon.gtLconVal, TYP_LONG, tree, true);
+                    break;
+                default:
+                    break;
+            }
+
+            addrReg = genMakeAddressable(op1, 0, RegSet::FREE_REG);
+
+            // Grab register for the cast
+            regNumber reg = regSet.PickRegFloat();
+            genMarkTreeInReg(tree, reg);
+            compCurFPState.Push(reg);
+
+            // Is the value now sitting in a register?
+            if (op1->InReg())
+            {
+                // We'll have to store the value into the stack */
+                size = EA_ATTR(roundUp(genTypeSize(op1->gtType)));
+                temp = compiler->tmpGetTemp(op1->TypeGet());
+
+                // Move the value into the temp
+                if (op1->gtType == TYP_LONG)
+                {
+                    regPairNo regPair = op1->gtRegPair;
+
+                    // This code is pretty ugly, but straightforward
+
+                    if (genRegPairLo(regPair) == REG_STK)
+                    {
+                        regNumber rg1 = genRegPairHi(regPair);
+
+                        assert(rg1 != REG_STK);
+
+                        /* Move enregistered half to temp */
+
+                        inst_ST_RV(INS_mov, temp, 4, rg1, TYP_LONG);
+
+                        /* Move lower half to temp via "high register" */
+
+                        inst_RV_TT(INS_mov, rg1, op1, 0);
+                        inst_ST_RV(INS_mov, temp, 0, rg1, TYP_LONG);
+
+                        /* Reload transfer register */
+
+                        inst_RV_ST(INS_mov, rg1, temp, 4, TYP_LONG);
+                    }
+                    else if (genRegPairHi(regPair) == REG_STK)
+                    {
+                        regNumber rg1 = genRegPairLo(regPair);
+
+                        assert(rg1 != REG_STK);
+
+                        /* Move enregistered half to temp */
+
+                        inst_ST_RV(INS_mov, temp, 0, rg1, TYP_LONG);
+
+                        /* Move high half to temp via "low register" */
+
+                        inst_RV_TT(INS_mov, rg1, op1, 4);
+                        inst_ST_RV(INS_mov, temp, 4, rg1, TYP_LONG);
+
+                        /* Reload transfer register */
+
+                        inst_RV_ST(INS_mov, rg1, temp, 0, TYP_LONG);
+                    }
+                    else
+                    {
+                        /* Move the value into the temp */
+
+                        inst_ST_RV(INS_mov, temp, 0, genRegPairLo(regPair), TYP_LONG);
+                        inst_ST_RV(INS_mov, temp, 4, genRegPairHi(regPair), TYP_LONG);
+                    }
+                    genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
+
+                    /* Load the long from the temp */
+
+                    inst_FS_ST(INS_fildl, size, temp, 0);
+                }
+                else
+                {
+                    /* Move the value into the temp */
+
+                    inst_ST_RV(INS_mov, temp, 0, op1->gtRegNum, TYP_INT);
+
+                    genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
+
+                    /* Load the integer from the temp */
+
+                    inst_FS_ST(INS_fild, size, temp, 0);
+                }
+
+                // We no longer need the temp
+                compiler->tmpRlsTemp(temp);
+            }
+            else
+            {
+                // Load the value from its address
+                if (op1->gtType == TYP_LONG)
+                    inst_TT(INS_fildl, op1);
+                else
+                    inst_TT(INS_fild, op1);
+
+                genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
+            }
+
+#if ROUND_FLOAT
+            /* integer to fp conversions can overflow. roundResult
+            * is cleared above in cases where it can't
+            */
+            if (roundResult &&
+                ((tree->gtType == TYP_FLOAT) || ((tree->gtType == TYP_DOUBLE) && (op1->gtType == TYP_LONG))))
+                genRoundFpExpression(tree);
+#endif
+
+            break;
+        }
+        case TYP_FLOAT:
+        {
+            //  This is a cast from float to double.
+            //  Note that conv.r(r4/r8) and conv.r8(r4/r9) are indistinguishable
+            //  as we will generate GT_CAST-TYP_DOUBLE for both. This would
+            //  cause us to truncate precision in either case. However,
+            //  conv.r was needless in the first place, and should have
+            //  been removed */
+            genCodeForTreeFloat(op1); // Trucate its precision
+
+            if (op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_LCL_FLD || op1->gtOper == GT_CLS_VAR ||
+                op1->gtOper == GT_IND || op1->gtOper == GT_LEA)
+            {
+                // We take advantage here of the fact that we know that our
+                // codegen will have just loaded this from memory, and that
+                // therefore, no cast is really needed.
+                // Ideally we wouldn't do this optimization here, but in
+                // morphing, however, we need to do this after regalloc, as
+                // this optimization doesnt apply if what we're loading is a
+                // regvar
+            }
+            else
+            {
+                genRoundFpExpressionStackFP(op1, tree->TypeGet());
+            }
+
+            // Assign reg to tree
+            genMarkTreeInReg(tree, op1->gtRegNum);
+
+            break;
+        }
+        case TYP_DOUBLE:
+        {
+            // This is a cast from double to float or double
+            // Load the value, store as destType, load back
+            genCodeForTreeFlt(op1);
+
+            if ((op1->gtOper == GT_LCL_VAR || op1->gtOper == GT_LCL_FLD || op1->gtOper == GT_CLS_VAR ||
+                 op1->gtOper == GT_IND || op1->gtOper == GT_LEA) &&
+                tree->TypeGet() == TYP_DOUBLE)
+            {
+                // We take advantage here of the fact that we know that our
+                // codegen will have just loaded this from memory, and that
+                // therefore, no cast is really needed.
+                // Ideally we wouldn't do this optimization here, but in
+                // morphing. However, we need to do this after regalloc, as
+                // this optimization doesnt apply if what we're loading is a
+                // regvar
+            }
+            else
+            {
+                genRoundFpExpressionStackFP(op1, tree->TypeGet());
+            }
+
+            // Assign reg to tree
+            genMarkTreeInReg(tree, op1->gtRegNum);
+
+            break;
+        }
+        default:
+        {
+            assert(!"unsupported cast");
+            break;
+        }
+    }
+}
+
+void CodeGen::genCodeForTreeStackFP_Special(GenTreePtr tree)
+{
+#ifdef DEBUG
+    if (compiler->verbose)
+    {
+        printf("genCodeForTreeStackFP_Special() ");
+        Compiler::printTreeID(tree);
+        printf("\n");
+    }
+#endif // DEBUG
+
+    switch (tree->OperGet())
+    {
+        case GT_CALL:
+        {
+            genCodeForCall(tree, true);
+            break;
+        }
+        default:
+            NYI("genCodeForTreeStackFP_Special");
+            break;
+    }
+}
+
+void CodeGen::genCodeForTreeFloat(GenTreePtr tree, RegSet::RegisterPreference* pref)
+{
+    // TestTransitions();
+    genTreeOps oper;
+    unsigned   kind;
+
+    assert(tree);
+    assert(tree->gtOper != GT_STMT);
+    assert(varTypeIsFloating(tree));
+
+    // What kind of node do we have?
+    oper = tree->OperGet();
+    kind = tree->OperKind();
+
+    if (kind & GTK_CONST)
+    {
+        genCodeForTreeStackFP_Const(tree);
+    }
+    else if (kind & GTK_LEAF)
+    {
+        genCodeForTreeStackFP_Leaf(tree);
+    }
+    else if (kind & GTK_SMPOP)
+    {
+        genCodeForTreeStackFP_SmpOp(tree);
+    }
+    else
+    {
+        genCodeForTreeStackFP_Special(tree);
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        JitDumpFPState();
+    }
+    assert(compCurFPState.IsConsistent());
+#endif
+}
+
+bool CodeGen::genCompInsStackFP(GenTreePtr tos, GenTreePtr other)
+{
+    // assume gensetupop done
+
+    bool bUseFcomip = genUse_fcomip();
+    bool bReverse   = false;
+
+    // Take op1 to top of the stack
+    FlatFPX87_MoveToTOS(&compCurFPState, tos->gtRegNum);
+
+    // We pop top of stack if it's not a live regvar
+    bool bPopTos   = !(tos->IsRegVar() && !tos->IsRegVarDeath()) || (tos->InReg());
+    bool bPopOther = !(other->IsRegVar() && !other->IsRegVarDeath()) || (other->InReg());
+
+    assert(tos->IsRegVar() || (tos->InReg()));
+
+    if (!(other->IsRegVar() || (other->InReg())))
+    {
+        // op2 in memory
+        assert(bPopOther);
+
+        if (bUseFcomip)
+        {
+            // We should have space for a load
+            assert(compCurFPState.m_uStackSize < FP_PHYSICREGISTERS);
+
+            // load from mem, now the comparison will be the other way around
+            inst_FS_TT(INS_fld, other);
+            inst_FN(INS_fcomip, 1);
+
+            // pop if we've been asked to do so
+            if (bPopTos)
+            {
+                inst_FS(INS_fstp, 0);
+                FlatFPX87_Kill(&compCurFPState, tos->gtRegNum);
+            }
+
+            bReverse = true;
+        }
+        else
+        {
+            // compare directly with memory
+            if (bPopTos)
+            {
+                inst_FS_TT(INS_fcomp, other);
+                FlatFPX87_Kill(&compCurFPState, tos->gtRegNum);
+            }
+            else
+            {
+                inst_FS_TT(INS_fcom, other);
+            }
+        }
+    }
+    else
+    {
+        if (bUseFcomip)
+        {
+            if (bPopTos)
+            {
+                inst_FN(INS_fcomip, compCurFPState.VirtualToST(other->gtRegNum));
+                FlatFPX87_Kill(&compCurFPState, tos->gtRegNum);
+            }
+            else
+            {
+                inst_FN(INS_fcomi, compCurFPState.VirtualToST(other->gtRegNum));
+            }
+
+            if (bPopOther)
+            {
+                FlatFPX87_Unload(&compCurFPState, other->gtRegNum);
+            }
+        }
+        else
+        {
+            if (bPopTos)
+            {
+                inst_FN(INS_fcomp, compCurFPState.VirtualToST(other->gtRegNum));
+                FlatFPX87_Kill(&compCurFPState, tos->gtRegNum);
+            }
+            else
+            {
+                inst_FN(INS_fcom, compCurFPState.VirtualToST(other->gtRegNum));
+            }
+
+            if (bPopOther)
+            {
+                FlatFPX87_Unload(&compCurFPState, other->gtRegNum);
+            }
+        }
+    }
+
+    if (!bUseFcomip)
+    {
+        // oops, we have to put result of compare in eflags
+
+        // Grab EAX for the result of the fnstsw
+        regSet.rsGrabReg(RBM_EAX);
+
+        // Generate the 'fnstsw' and test its result
+        inst_RV(INS_fnstsw, REG_EAX, TYP_INT);
+        regTracker.rsTrackRegTrash(REG_EAX);
+        instGen(INS_sahf);
+    }
+
+    return bReverse;
+}
+
+void CodeGen::genCondJumpFltStackFP(GenTreePtr cond, BasicBlock* jumpTrue, BasicBlock* jumpFalse, bool bDoTransition)
+{
+    assert(jumpTrue && jumpFalse);
+    assert(!(cond->gtFlags & GTF_REVERSE_OPS)); // Done in genCondJump()
+    assert(varTypeIsFloating(cond->gtOp.gtOp1));
+
+    GenTreePtr op1 = cond->gtOp.gtOp1;
+    GenTreePtr op2 = cond->gtOp.gtOp2;
+    genTreeOps cmp = cond->OperGet();
+
+    // Prepare operands.
+    genSetupForOpStackFP(op1, op2, false, false, true, false);
+
+    GenTreePtr tos;
+    GenTreePtr other;
+    bool       bReverseCmp = false;
+
+    if ((op2->IsRegVar() || (op2->InReg())) &&                     // op2 is in a reg
+        (compCurFPState.TopVirtual() == (unsigned)op2->gtRegNum && // Is it already at the top of the stack?
+         (!op2->IsRegVar() || op2->IsRegVarDeath())))              // are we going to pop it off?
+    {
+        tos         = op2;
+        other       = op1;
+        bReverseCmp = true;
+    }
+    else
+    {
+        tos         = op1;
+        other       = op2;
+        bReverseCmp = false;
+    }
+
+    if (genCompInsStackFP(tos, other))
+    {
+        bReverseCmp = !bReverseCmp;
+    }
+
+    // do .un comparison
+    if (cond->gtFlags & GTF_RELOP_NAN_UN)
+    {
+        // Generate the first jump (NaN check)
+        genCondJmpInsStackFP(EJ_jpe, jumpTrue, NULL, bDoTransition);
+    }
+    else
+    {
+        jumpFalse->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL;
+
+        // Generate the first jump (NaN check)
+        genCondJmpInsStackFP(EJ_jpe, jumpFalse, NULL, bDoTransition);
+    }
+
+    /* Generate the second jump (comparison) */
+    const static BYTE dblCmpTstJmp2[] = {
+        EJ_je,  // GT_EQ
+        EJ_jne, // GT_NE
+        EJ_jb,  // GT_LT
+        EJ_jbe, // GT_LE
+        EJ_jae, // GT_GE
+        EJ_ja,  // GT_GT
+    };
+
+    // Swap comp order if necessary
+    if (bReverseCmp)
+    {
+        cmp = GenTree::SwapRelop(cmp);
+    }
+
+    genCondJmpInsStackFP((emitJumpKind)dblCmpTstJmp2[cmp - GT_EQ], jumpTrue, jumpFalse, bDoTransition);
+}
+
+BasicBlock* CodeGen::genTransitionBlockStackFP(FlatFPStateX87* pState, BasicBlock* pFrom, BasicBlock* pTarget)
+{
+    // Fast paths where a transition block is not necessary
+    if (pTarget->bbFPStateX87 && FlatFPStateX87::AreEqual(pState, pTarget->bbFPStateX87) || pState->IsEmpty())
+    {
+        return pTarget;
+    }
+
+    // We shouldn't have any handlers if we're generating transition blocks, as we don't know
+    // how to recover them
+    assert(compiler->compMayHaveTransitionBlocks);
+    assert(compiler->compHndBBtabCount == 0);
+
+#ifdef DEBUG
+    compiler->fgSafeBasicBlockCreation = true;
+#endif
+
+    // Create a temp block
+    BasicBlock* pBlock = compiler->bbNewBasicBlock(BBJ_ALWAYS);
+
+#ifdef DEBUG
+    compiler->fgSafeBasicBlockCreation = false;
+#endif
+
+    VarSetOps::Assign(compiler, pBlock->bbLiveIn, pFrom->bbLiveOut);
+    VarSetOps::Assign(compiler, pBlock->bbLiveOut, pFrom->bbLiveOut);
+
+    pBlock->bbJumpDest = pTarget;
+    pBlock->bbFlags |= BBF_JMP_TARGET;
+    //
+    // If either pFrom or pTarget are cold blocks then
+    // the transition block also must be cold
+    //
+    pBlock->bbFlags |= (pFrom->bbFlags & BBF_COLD);
+    pBlock->bbFlags |= (pTarget->bbFlags & BBF_COLD);
+
+    // The FP state for the block is the same as the current one
+    pBlock->bbFPStateX87 = FlatFPAllocFPState(pState);
+
+    if ((pBlock->bbFlags & BBF_COLD) || (compiler->fgFirstColdBlock == NULL))
+    {
+        //
+        // If this block is cold or if all blocks are hot
+        // then we just insert it at the end of the method.
+        //
+        compiler->fgMoveBlocksAfter(pBlock, pBlock, compiler->fgLastBBInMainFunction());
+    }
+    else
+    {
+        //
+        // This block is hot so we need to insert it in the hot region
+        // of the method.
+        //
+        BasicBlock* lastHotBlock = compiler->fgFirstColdBlock->bbPrev;
+        noway_assert(lastHotBlock != nullptr);
+
+        if (lastHotBlock->bbFallsThrough())
+            NO_WAY("Bad fgFirstColdBlock in genTransitionBlockStackFP()");
+
+        //
+        // Insert pBlock between lastHotBlock and fgFirstColdBlock
+        //
+        compiler->fgInsertBBafter(lastHotBlock, pBlock);
+    }
+
+    return pBlock;
+}
+
+void CodeGen::genCondJumpLngStackFP(GenTreePtr cond, BasicBlock* jumpTrue, BasicBlock* jumpFalse)
+{
+    // For the moment, and so we don't have to deal with the amount of special cases
+    // we have, will insert a dummy block for jumpTrue (if necessary) that will do the
+    // transition for us. For the jumpFalse case, we play a trick. For the false case ,
+    // a Long conditional has a fallthrough (least significant DWORD check is false) and
+    // also has a jump to the fallthrough (bbNext) if the most significant DWORD check
+    // fails. However, we do want to make an FP transition if we're in the later case,
+    // So what we do is create a label and make jumpFalse go there. This label is defined
+    // before doing the FP transition logic at the end of the block, so now both exit paths
+    // for false condition will go through the transition and then fall through to bbnext.
+    assert(jumpFalse == compiler->compCurBB->bbNext);
+
+    BasicBlock* pTransition = genCreateTempLabel();
+
+    genCondJumpLng(cond, jumpTrue, pTransition, true);
+
+    genDefineTempLabel(pTransition);
+}
+
+void CodeGen::genQMarkRegVarTransition(GenTreePtr nextNode, VARSET_VALARG_TP liveset)
+{
+    // Kill any vars that may die in the transition
+    VARSET_TP VARSET_INIT_NOCOPY(newLiveSet, VarSetOps::Intersection(compiler, liveset, compiler->optAllFPregVars));
+
+    regMaskTP liveRegIn = genRegMaskFromLivenessStackFP(newLiveSet);
+    genCodeForTransitionFromMask(&compCurFPState, liveRegIn);
+
+    unsigned i;
+
+    // Kill all regvars
+    for (i = REG_FPV0; i < REG_FPCOUNT; i++)
+    {
+        if ((genRegMaskFloat((regNumber)i) & regSet.rsMaskRegVarFloat))
+        {
+
+            genRegVarDeathStackFP(regSet.genRegVarsFloat[i]);
+        }
+    }
+
+    // Born necessary regvars
+    for (i = 0; i < compiler->lvaTrackedCount; i++)
+    {
+        unsigned   lclVar = compiler->lvaTrackedToVarNum[i];
+        LclVarDsc* varDsc = compiler->lvaTable + lclVar;
+
+        assert(varDsc->lvTracked);
+
+        if (varDsc->lvRegister && VarSetOps::IsMember(compiler, newLiveSet, i))
+        {
+            genRegVarBirthStackFP(varDsc);
+        }
+    }
+}
+
+void CodeGen::genQMarkBeforeElseStackFP(QmarkStateStackFP* pState, VARSET_VALARG_TP varsetCond, GenTreePtr nextNode)
+{
+    assert(regSet.rsMaskLockedFloat == 0);
+
+    // Save current state at colon
+    pState->stackState.Init(&compCurFPState);
+
+    // Kill any vars that may die in the transition to then
+    genQMarkRegVarTransition(nextNode, varsetCond);
+}
+
+void CodeGen::genQMarkAfterElseBlockStackFP(QmarkStateStackFP* pState, VARSET_VALARG_TP varsetCond, GenTreePtr nextNode)
+{
+    assert(regSet.rsMaskLockedFloat == 0);
+
+    FlatFPStateX87 tempSwap;
+
+    // Save current state. Now tempFPState will store the target state for the else block
+    tempSwap.Init(&compCurFPState);
+
+    compCurFPState.Init(&pState->stackState);
+
+    pState->stackState.Init(&tempSwap);
+
+    // Did any regvars die in the then block that are live on entry to the else block?
+    unsigned i;
+    for (i = 0; i < compiler->lvaTrackedCount; i++)
+    {
+        if (VarSetOps::IsMember(compiler, varsetCond, i) && VarSetOps::IsMember(compiler, compiler->optAllFPregVars, i))
+        {
+            // This variable should be live
+            unsigned   lclnum = compiler->lvaTrackedToVarNum[i];
+            LclVarDsc* varDsc = compiler->lvaTable + lclnum;
+
+            if (regSet.genRegVarsFloat[varDsc->lvRegNum] != varDsc)
+            {
+                JITDUMP("genQMarkAfterThenBlockStackFP(): Fixing up regvar that was modified in then\n");
+                if (regSet.genRegVarsFloat[varDsc->lvRegNum])
+                {
+                    genRegVarDeathStackFP(regSet.genRegVarsFloat[varDsc->lvRegNum]);
+                }
+
+                genRegVarBirthStackFP(varDsc);
+            }
+        }
+    }
+
+    // Kill any vars that may die in the transition
+    genQMarkRegVarTransition(nextNode, varsetCond);
+}
+
+void CodeGen::genQMarkAfterThenBlockStackFP(QmarkStateStackFP* pState)
+{
+    JITDUMP("genQMarkAfterThenBlockStackFP()\n");
+    assert(regSet.rsMaskLockedFloat == 0);
+
+    // Generate transition to the previous one set by the then block
+    genCodeForTransitionStackFP(&compCurFPState, &pState->stackState);
+
+    // Update state
+    compCurFPState.Init(&pState->stackState);
+}
+
+void CodeGenInterface::SetRegVarFloat(regNumber reg, var_types type, LclVarDsc* varDsc)
+{
+    regMaskTP mask = genRegMaskFloat(reg, type);
+
+    if (varDsc)
+    {
+        JITDUMP("marking register %s as a regvar\n", getRegNameFloat(reg, type));
+
+        assert(mask && ((regSet.rsMaskLockedFloat | regSet.rsMaskRegVarFloat | regSet.rsMaskUsedFloat) & mask) == 0);
+
+        regSet.rsMaskRegVarFloat |= mask;
+    }
+    else
+    {
+        JITDUMP("unmarking register %s as a regvar\n", getRegNameFloat(reg, type));
+
+        assert(mask && (regSet.rsMaskRegVarFloat & mask));
+
+        regSet.rsMaskRegVarFloat &= ~mask;
+    }
+
+    // Update lookup table
+    regSet.genRegVarsFloat[reg] = varDsc;
+}
+
+// Generates a conditional jump. It will do the appropiate stack matching for the jmpTrue.
+// We don't use jumpFalse anywhere and the integer codebase assumes that it will be bbnext, and that is
+// taken care of at the end of the bb code generation.
+void CodeGen::genCondJmpInsStackFP(emitJumpKind jumpKind,
+                                   BasicBlock*  jumpTrue,
+                                   BasicBlock*  jumpFalse,
+                                   bool         bDoTransition)
+{
+    // Assert the condition above.
+    assert(!jumpFalse || jumpFalse == compiler->compCurBB->bbNext || !bDoTransition);
+
+    // Do the fp stack matching.
+    if (bDoTransition && !jumpTrue->bbFPStateX87 &&
+        FlatFPSameRegisters(&compCurFPState, genRegMaskFromLivenessStackFP(jumpTrue->bbLiveIn)))
+    {
+        // Target block doesn't have state yet, but has the same registers, so
+        // we allocate the block and generate the normal jump
+        genCodeForBBTransitionStackFP(jumpTrue);
+        inst_JMP(jumpKind, jumpTrue);
+    }
+    else if (!bDoTransition || compCurFPState.IsEmpty() || // If it's empty, target has to be empty too.
+             (jumpTrue->bbFPStateX87 && FlatFPStateX87::AreEqual(&compCurFPState, jumpTrue->bbFPStateX87)))
+    {
+        // Nothing to do here. Proceed normally and generate the jump
+        inst_JMP(jumpKind, jumpTrue);
+
+        if (jumpFalse && jumpFalse != compiler->compCurBB->bbNext)
+        {
+            inst_JMP(EJ_jmp, jumpFalse);
+        }
+    }
+    else
+    {
+        // temporal workaround for stack matching
+        // do a forward conditional jump, generate the transition and jump to the target
+        // The payload is an aditional jump instruction, but both jumps will be correctly
+        // predicted by the processor in the loop case.
+        BasicBlock* endLabel = NULL;
+
+        endLabel = genCreateTempLabel();
+
+        inst_JMP(emitter::emitReverseJumpKind(jumpKind), endLabel);
+
+        genCodeForBBTransitionStackFP(jumpTrue);
+
+        inst_JMP(EJ_jmp, jumpTrue);
+
+        genDefineTempLabel(endLabel);
+    }
+}
+
+void CodeGen::genTableSwitchStackFP(regNumber reg, unsigned jumpCnt, BasicBlock** jumpTab)
+{
+    // Only come here when we have to do something special for the FPU stack!
+    //
+    assert(!compCurFPState.IsEmpty());
+    VARSET_TP VARSET_INIT_NOCOPY(liveInFP, VarSetOps::MakeEmpty(compiler));
+    VARSET_TP VARSET_INIT_NOCOPY(liveOutFP, VarSetOps::MakeEmpty(compiler));
+    for (unsigned i = 0; i < jumpCnt; i++)
+    {
+        VarSetOps::Assign(compiler, liveInFP, jumpTab[i]->bbLiveIn);
+        VarSetOps::IntersectionD(compiler, liveInFP, compiler->optAllFPregVars);
+        VarSetOps::Assign(compiler, liveOutFP, compiler->compCurBB->bbLiveOut);
+        VarSetOps::IntersectionD(compiler, liveOutFP, compiler->optAllFPregVars);
+
+        if (!jumpTab[i]->bbFPStateX87 && VarSetOps::Equal(compiler, liveInFP, liveOutFP))
+        {
+            // Hasn't state yet and regvar set is the same, so just copy state and don't change the jump
+            jumpTab[i]->bbFPStateX87 = FlatFPAllocFPState(&compCurFPState);
+        }
+        else if (jumpTab[i]->bbFPStateX87 && FlatFPStateX87::AreEqual(&compCurFPState, jumpTab[i]->bbFPStateX87))
+        {
+            // Same state, don't change the jump
+        }
+        else
+        {
+            // We have to do a transition. First check if we can reuse another one
+            unsigned j;
+            for (j = 0; j < i; j++)
+            {
+                // Has to be already forwarded. If not it can't be targetting the same block
+                if (jumpTab[j]->bbFlags & BBF_FORWARD_SWITCH)
+                {
+                    if (jumpTab[i] == jumpTab[j]->bbJumpDest)
+                    {
+                        // yipee, we can reuse this transition block
+                        jumpTab[i] = jumpTab[j];
+                        break;
+                    }
+                }
+            }
+
+            if (j == i)
+            {
+                // We will have to create a new transition block
+                jumpTab[i] = genTransitionBlockStackFP(&compCurFPState, compiler->compCurBB, jumpTab[i]);
+
+                jumpTab[i]->bbFlags |= BBF_FORWARD_SWITCH;
+            }
+        }
+    }
+
+    // Clear flag
+    for (unsigned i = 0; i < jumpCnt; i++)
+    {
+        jumpTab[i]->bbFlags &= ~BBF_FORWARD_SWITCH;
+    }
+
+    // everything's fixed now, so go down the normal path
+    return genTableSwitch(reg, jumpCnt, jumpTab);
+}
+
+bool CodeGen::genConstantLoadStackFP(GenTreePtr tree, bool bOnlyNoMemAccess)
+{
+    assert(tree->gtOper == GT_CNS_DBL);
+
+    bool        bFastConstant  = false;
+    instruction ins_ConstantNN = INS_fldz; // keep compiler happy
+
+    // Both positive 0 and 1 are represnetable in float and double, beware if we add other constants
+    switch (*((__int64*)&(tree->gtDblCon.gtDconVal)))
+    {
+        case 0:
+            // CAREFUL here!, -0 is different than +0, a -0 shouldn't issue a fldz.
+            ins_ConstantNN = INS_fldz;
+            bFastConstant  = true;
+            break;
+        case I64(0x3ff0000000000000):
+            ins_ConstantNN = INS_fld1;
+            bFastConstant  = true;
+    }
+
+    if (bFastConstant == false && bOnlyNoMemAccess)
+    {
+        // Caller asked only to generate instructions if it didn't involve memory accesses
+        return false;
+    }
+
+    if (bFastConstant)
+    {
+        assert(compCurFPState.m_uStackSize <= FP_PHYSICREGISTERS);
+        instGen(ins_ConstantNN);
+    }
+    else
+    {
+        GenTreePtr addr;
+        if (tree->gtType == TYP_FLOAT || StackFPIsSameAsFloat(tree->gtDblCon.gtDconVal))
+        {
+            float f = forceCastToFloat(tree->gtDblCon.gtDconVal);
+            addr    = genMakeConst(&f, TYP_FLOAT, tree, false);
+        }
+        else
+        {
+            addr = genMakeConst(&tree->gtDblCon.gtDconVal, tree->gtType, tree, true);
+        }
+
+        inst_FS_TT(INS_fld, addr);
+    }
+
+    return true;
+}
+
+// Function called at the end of every statement. For stack based x87 its mission is to
+// remove any remaining temps on the stack.
+void CodeGen::genEndOfStatement()
+{
+    unsigned i;
+
+#ifdef DEBUG
+    // Sanity check
+    unsigned uTemps = 0;
+    for (i = REG_FPV0; i < REG_FPCOUNT; i++)
+    {
+        if (compCurFPState.Mapped(i) &&                                      // register is mapped
+            (genRegMaskFloat((regNumber)i) & regSet.rsMaskRegVarFloat) == 0) // but not enregistered
+        {
+            uTemps++;
+        }
+    }
+    assert(uTemps <= 1);
+#endif
+
+    for (i = REG_FPV0; i < REG_FPCOUNT; i++)
+    {
+        if (compCurFPState.Mapped(i) &&                                      // register is mapped
+            (genRegMaskFloat((regNumber)i) & regSet.rsMaskRegVarFloat) == 0) // but not enregistered
+        {
+            // remove register from stacks
+            FlatFPX87_Unload(&compCurFPState, i);
+        }
+    }
+
+    assert(ConsistentAfterStatementStackFP());
+}
+
+bool CodeGen::StackFPIsSameAsFloat(double d)
+{
+    if (forceCastToFloat(d) == d)
+    {
+        JITDUMP("StackFPIsSameAsFloat is true for value %lf\n", d);
+        return true;
+    }
+    else
+    {
+        JITDUMP("StackFPIsSameAsFloat is false for value %lf\n", d);
+    }
+
+    return false;
+}
+
+GenTreePtr CodeGen::genMakeAddressableStackFP(GenTreePtr tree,
+                                              regMaskTP* regMaskIntPtr,
+                                              regMaskTP* regMaskFltPtr,
+                                              bool       bCollapseConstantDoubles)
+{
+    *regMaskIntPtr = *regMaskFltPtr = 0;
+
+    switch (tree->OperGet())
+    {
+        case GT_CNS_DBL:
+            if (tree->gtDblCon.gtDconVal == 0.0 || tree->gtDblCon.gtDconVal == 1.0)
+            {
+                // For constants like 0 or 1 don't waste memory
+                genCodeForTree(tree, 0);
+                regSet.SetUsedRegFloat(tree, true);
+
+                *regMaskFltPtr = genRegMaskFloat(tree->gtRegNum);
+                return tree;
+            }
+            else
+            {
+                GenTreePtr addr;
+                if (tree->gtType == TYP_FLOAT ||
+                    (bCollapseConstantDoubles && StackFPIsSameAsFloat(tree->gtDblCon.gtDconVal)))
+                {
+                    float f = forceCastToFloat(tree->gtDblCon.gtDconVal);
+                    addr    = genMakeConst(&f, TYP_FLOAT, tree, true);
+                }
+                else
+                {
+                    addr = genMakeConst(&tree->gtDblCon.gtDconVal, tree->gtType, tree, true);
+                }
+#ifdef DEBUG
+                if (compiler->verbose)
+                {
+                    printf("Generated new constant in tree ");
+                    Compiler::printTreeID(addr);
+                    printf(" with value %lf\n", tree->gtDblCon.gtDconVal);
+                }
+#endif // DEBUG
+                tree->CopyFrom(addr, compiler);
+                return tree;
+            }
+            break;
+        case GT_REG_VAR:
+            // We take care about this in genKeepAddressableStackFP
+            return tree;
+        case GT_LCL_VAR:
+        case GT_LCL_FLD:
+        case GT_CLS_VAR:
+            return tree;
+
+        case GT_LEA:
+            if (!genMakeIndAddrMode(tree, tree, false, 0, RegSet::KEEP_REG, regMaskIntPtr, false))
+            {
+                assert(false);
+            }
+            genUpdateLife(tree);
+            return tree;
+
+        case GT_IND:
+            // Try to make the address directly addressable
+
+            if (genMakeIndAddrMode(tree->gtOp.gtOp1, tree, false, 0, RegSet::KEEP_REG, regMaskIntPtr, false))
+            {
+                genUpdateLife(tree);
+                return tree;
+            }
+            else
+            {
+                GenTreePtr addr = tree;
+                tree            = tree->gtOp.gtOp1;
+
+                genCodeForTree(tree, 0);
+                regSet.rsMarkRegUsed(tree, addr);
+
+                *regMaskIntPtr = genRegMask(tree->gtRegNum);
+                return addr;
+            }
+
+        // fall through
+
+        default:
+            genCodeForTreeFloat(tree);
+            regSet.SetUsedRegFloat(tree, true);
+
+            // update mask
+            *regMaskFltPtr = genRegMaskFloat(tree->gtRegNum);
+
+            return tree;
+            break;
+    }
+}
+
+void CodeGen::genKeepAddressableStackFP(GenTreePtr tree, regMaskTP* regMaskIntPtr, regMaskTP* regMaskFltPtr)
+{
+    regMaskTP regMaskInt, regMaskFlt;
+
+    regMaskInt = *regMaskIntPtr;
+    regMaskFlt = *regMaskFltPtr;
+
+    *regMaskIntPtr = *regMaskFltPtr = 0;
+
+    switch (tree->OperGet())
+    {
+        case GT_REG_VAR:
+            // If register has been spilled, unspill it
+            if (tree->gtFlags & GTF_SPILLED)
+            {
+                UnspillFloat(&compiler->lvaTable[tree->gtLclVarCommon.gtLclNum]);
+            }
+
+            // If regvar is dying, take it out of the regvar mask
+            if (tree->IsRegVarDeath())
+            {
+                genRegVarDeathStackFP(tree);
+            }
+            genUpdateLife(tree);
+
+            return;
+        case GT_CNS_DBL:
+        {
+            if (tree->gtFlags & GTF_SPILLED)
+            {
+                UnspillFloat(tree);
+            }
+
+            *regMaskFltPtr = genRegMaskFloat(tree->gtRegNum);
+
+            return;
+        }
+        case GT_LCL_FLD:
+        case GT_LCL_VAR:
+        case GT_CLS_VAR:
+            genUpdateLife(tree);
+            return;
+        case GT_IND:
+        case GT_LEA:
+            if (regMaskFlt)
+            {
+                // fall through
+            }
+            else
+            {
+                *regMaskIntPtr = genKeepAddressable(tree, regMaskInt, 0);
+                *regMaskFltPtr = 0;
+                return;
+            }
+        default:
+
+            *regMaskIntPtr = 0;
+            if (tree->gtFlags & GTF_SPILLED)
+            {
+                UnspillFloat(tree);
+            }
+            *regMaskFltPtr = genRegMaskFloat(tree->gtRegNum);
+            return;
+    }
+}
+
+void CodeGen::genDoneAddressableStackFP(GenTreePtr      tree,
+                                        regMaskTP       addrRegInt,
+                                        regMaskTP       addrRegFlt,
+                                        RegSet::KeepReg keptReg)
+{
+    assert(!(addrRegInt && addrRegFlt));
+
+    if (addrRegInt)
+    {
+        return genDoneAddressable(tree, addrRegInt, keptReg);
+    }
+    else if (addrRegFlt)
+    {
+        if (keptReg == RegSet::KEEP_REG)
+        {
+            for (unsigned i = REG_FPV0; i < REG_FPCOUNT; i++)
+            {
+                if (genRegMaskFloat((regNumber)i) & addrRegFlt)
+                {
+                    regSet.SetUsedRegFloat(tree, false);
+                }
+            }
+        }
+    }
+}
+
+void CodeGen::FlatFPX87_Kill(FlatFPStateX87* pState, unsigned uVirtual)
+{
+    JITDUMP("Killing %s\n", regVarNameStackFP((regNumber)uVirtual));
+
+    assert(pState->TopVirtual() == uVirtual);
+    pState->Pop();
+}
+
+void CodeGen::FlatFPX87_PushVirtual(FlatFPStateX87* pState, unsigned uRegister, bool bEmitCode)
+{
+    JITDUMP("Pushing %s to stack\n", regVarNameStackFP((regNumber)uRegister));
+
+    pState->Push(uRegister);
+}
+
+unsigned CodeGen::FlatFPX87_Pop(FlatFPStateX87* pState, bool bEmitCode)
+{
+    assert(pState->m_uStackSize > 0);
+
+    // Update state
+    unsigned uVirtual = pState->Pop();
+
+    // Emit instruction
+    if (bEmitCode)
+    {
+        inst_FS(INS_fstp, 0);
+    }
+
+    return (uVirtual);
+}
+
+unsigned CodeGen::FlatFPX87_Top(FlatFPStateX87* pState, bool bEmitCode)
+{
+    return pState->TopVirtual();
+}
+
+void CodeGen::FlatFPX87_Unload(FlatFPStateX87* pState, unsigned uVirtual, bool bEmitCode)
+{
+    if (uVirtual != pState->TopVirtual())
+    {
+        // We will do an fstp to the right place
+
+        // Update state
+        unsigned uStack  = pState->m_uVirtualMap[uVirtual];
+        unsigned uPhysic = pState->StackToST(uStack);
+
+        pState->Unmap(uVirtual);
+        pState->Associate(pState->TopVirtual(), uStack);
+        pState->m_uStackSize--;
+
+#ifdef DEBUG
+
+        pState->m_uStack[pState->m_uStackSize] = (unsigned)-1;
+#endif
+
+        // Emit instruction
+        if (bEmitCode)
+        {
+            inst_FS(INS_fstp, uPhysic);
+        }
+    }
+    else
+    {
+        // Emit fstp
+        FlatFPX87_Pop(pState, bEmitCode);
+    }
+
+    assert(pState->IsConsistent());
+}
+
+void CodeGenInterface::FlatFPX87_MoveToTOS(FlatFPStateX87* pState, unsigned uVirtual, bool bEmitCode)
+{
+    assert(!IsUninitialized(uVirtual));
+
+    JITDUMP("Moving %s to top of stack\n", regVarNameStackFP((regNumber)uVirtual));
+
+    if (uVirtual != pState->TopVirtual())
+    {
+        FlatFPX87_SwapStack(pState, pState->m_uVirtualMap[uVirtual], pState->TopIndex(), bEmitCode);
+    }
+    else
+    {
+        JITDUMP("%s already on the top of stack\n", regVarNameStackFP((regNumber)uVirtual));
+    }
+
+    assert(pState->IsConsistent());
+}
+
+void CodeGenInterface::FlatFPX87_SwapStack(FlatFPStateX87* pState, unsigned i, unsigned j, bool bEmitCode)
+{
+    assert(i != j);
+    assert(i < pState->m_uStackSize);
+    assert(j < pState->m_uStackSize);
+
+    JITDUMP("Exchanging ST(%i) and ST(%i)\n", pState->StackToST(i), pState->StackToST(j));
+
+    // issue actual swaps
+    int iPhysic = pState->StackToST(i);
+    int jPhysic = pState->StackToST(j);
+
+    if (bEmitCode)
+    {
+        if (iPhysic == 0 || jPhysic == 0)
+        {
+            inst_FN(INS_fxch, iPhysic ? iPhysic : jPhysic);
+        }
+        else
+        {
+            inst_FN(INS_fxch, iPhysic);
+            inst_FN(INS_fxch, jPhysic);
+            inst_FN(INS_fxch, iPhysic);
+        }
+    }
+
+    // Update State
+
+    // Swap Register file
+    pState->m_uVirtualMap[pState->m_uStack[i]] = j;
+    pState->m_uVirtualMap[pState->m_uStack[j]] = i;
+
+    // Swap stack
+    int temp;
+    temp                = pState->m_uStack[i];
+    pState->m_uStack[i] = pState->m_uStack[j];
+    pState->m_uStack[j] = temp;
+
+    assert(pState->IsConsistent());
+}
+
+#ifdef DEBUG
+
+void CodeGen::JitDumpFPState()
+{
+    int i;
+
+    if ((regSet.rsMaskUsedFloat != 0) || (regSet.rsMaskRegVarFloat != 0))
+    {
+        printf("FPSTATE\n");
+        printf("Used virtual registers: ");
+        for (i = REG_FPV0; i < REG_FPCOUNT; i++)
+        {
+            if (genRegMaskFloat((regNumber)i) & regSet.rsMaskUsedFloat)
+            {
+                printf("FPV%i ", i);
+            }
+        }
+        printf("\n");
+
+        printf("virtual registers holding reg vars: ");
+        for (i = REG_FPV0; i < REG_FPCOUNT; i++)
+        {
+            if (genRegMaskFloat((regNumber)i) & regSet.rsMaskRegVarFloat)
+            {
+                printf("FPV%i ", i);
+            }
+        }
+        printf("\n");
+    }
+    compCurFPState.Dump();
+}
+#endif
+
+//
+//
+//  Register allocation
+//
+struct ChangeToRegVarCallback
+{
+    unsigned  lclnum;
+    regNumber reg;
+};
+
+void Compiler::raInitStackFP()
+{
+    // Reset local/reg interference
+    for (int i = 0; i < REG_FPCOUNT; i++)
+    {
+        VarSetOps::AssignNoCopy(this, raLclRegIntfFloat[i], VarSetOps::MakeEmpty(this));
+    }
+
+    VarSetOps::AssignNoCopy(this, optAllFPregVars, VarSetOps::MakeEmpty(this));
+    VarSetOps::AssignNoCopy(this, optAllNonFPvars, VarSetOps::MakeEmpty(this));
+    VarSetOps::AssignNoCopy(this, optAllFloatVars, VarSetOps::MakeEmpty(this));
+
+    raCntStkStackFP         = 0;
+    raCntWtdStkDblStackFP   = 0;
+    raCntStkParamDblStackFP = 0;
+
+    VarSetOps::AssignNoCopy(this, raMaskDontEnregFloat, VarSetOps::MakeEmpty(this));
+
+    // Calculate the set of all tracked FP/non-FP variables
+    //  into compiler->optAllFloatVars and compiler->optAllNonFPvars
+    unsigned   lclNum;
+    LclVarDsc* varDsc;
+
+    for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+    {
+        /* Ignore the variable if it's not tracked */
+
+        if (!varDsc->lvTracked)
+            continue;
+
+        /* Get hold of the index and the interference mask for the variable */
+
+        unsigned varNum = varDsc->lvVarIndex;
+
+        /* add to the set of all tracked FP/non-FP variables */
+
+        if (varDsc->IsFloatRegType())
+            VarSetOps::AddElemD(this, optAllFloatVars, varNum);
+        else
+            VarSetOps::AddElemD(this, optAllNonFPvars, varNum);
+    }
+}
+
+#ifdef DEBUG
+void Compiler::raDumpVariableRegIntfFloat()
+{
+    unsigned i;
+    unsigned j;
+
+    for (i = REG_FPV0; i < REG_FPCOUNT; i++)
+    {
+        if (!VarSetOps::IsEmpty(this, raLclRegIntfFloat[i]))
+        {
+            JITDUMP("FPV%u interferes with ", i);
+            for (j = 0; j < lvaTrackedCount; j++)
+            {
+                assert(VarSetOps::IsEmpty(this, VarSetOps::Diff(this, raLclRegIntfFloat[i], optAllFloatVars)));
+
+                if (VarSetOps::IsMember(this, raLclRegIntfFloat[i], j))
+                {
+                    JITDUMP("T%02u/V%02u, ", j, lvaTrackedToVarNum[j]);
+                }
+            }
+            JITDUMP("\n");
+        }
+    }
+}
+#endif
+
+// Returns the regnum for the variable passed as param takin in account
+// the fpvar to register interference mask. If we can't find anything, we
+// will return REG_FPNONE
+regNumber Compiler::raRegForVarStackFP(unsigned varTrackedIndex)
+{
+    for (unsigned i = REG_FPV0; i < REG_FPCOUNT; i++)
+    {
+        if (!VarSetOps::IsMember(this, raLclRegIntfFloat[i], varTrackedIndex))
+        {
+            return (regNumber)i;
+        }
+    }
+
+    return REG_FPNONE;
+}
+
+void Compiler::raAddPayloadStackFP(VARSET_VALARG_TP maskArg, unsigned weight)
+{
+    VARSET_TP VARSET_INIT_NOCOPY(mask, VarSetOps::Intersection(this, maskArg, optAllFloatVars));
+    if (VarSetOps::IsEmpty(this, mask))
+    {
+        return;
+    }
+
+    for (unsigned i = 0; i < lvaTrackedCount; i++)
+    {
+        if (VarSetOps::IsMember(this, mask, i))
+        {
+            raPayloadStackFP[i] += weight;
+        }
+    }
+}
+
+bool Compiler::raVarIsGreaterValueStackFP(LclVarDsc* lv1, LclVarDsc* lv2)
+{
+    assert(lv1->lvTracked);
+    assert(lv2->lvTracked);
+
+    bool bSmall = (compCodeOpt() == SMALL_CODE);
+
+    double weight1 = double(bSmall ? lv1->lvRefCnt : lv1->lvRefCntWtd) - double(raPayloadStackFP[lv1->lvVarIndex]) -
+                     double(raHeightsStackFP[lv1->lvVarIndex][FP_VIRTUALREGISTERS]);
+
+    double weight2 = double(bSmall ? lv2->lvRefCnt : lv2->lvRefCntWtd) - double(raPayloadStackFP[lv2->lvVarIndex]) -
+                     double(raHeightsStackFP[lv2->lvVarIndex][FP_VIRTUALREGISTERS]);
+
+    double diff = weight1 - weight2;
+
+    if (diff)
+    {
+        return diff > 0 ? true : false;
+    }
+    else
+    {
+        return int(lv1->lvRefCnt - lv2->lvRefCnt) ? true : false;
+    }
+}
+
+#ifdef DEBUG
+// Dumps only interesting vars (the ones that are not enregistered yet
+void Compiler::raDumpHeightsStackFP()
+{
+    unsigned i;
+    unsigned j;
+
+    JITDUMP("raDumpHeightsStackFP():\n");
+    JITDUMP("--------------------------------------------------------\n");
+    JITDUMP("Weighted Height Table Dump\n            ");
+    for (i = 0; i < FP_VIRTUALREGISTERS; i++)
+    {
+        JITDUMP(" %i    ", i + 1);
+    }
+
+    JITDUMP("OVF\n");
+
+    for (i = 0; i < lvaTrackedCount; i++)
+    {
+        if (VarSetOps::IsMember(this, optAllFloatVars, i) && !VarSetOps::IsMember(this, optAllFPregVars, i))
+        {
+            JITDUMP("V%02u/T%02u: ", lvaTrackedToVarNum[i], i);
+
+            for (j = 0; j <= FP_VIRTUALREGISTERS; j++)
+            {
+                JITDUMP("%5u ", raHeightsStackFP[i][j]);
+            }
+            JITDUMP("\n");
+        }
+    }
+
+    JITDUMP("\nNonweighted Height Table Dump\n            ");
+    for (i = 0; i < FP_VIRTUALREGISTERS; i++)
+    {
+        JITDUMP(" %i    ", i + 1);
+    }
+
+    JITDUMP("OVF\n");
+
+    for (i = 0; i < lvaTrackedCount; i++)
+    {
+        if (VarSetOps::IsMember(this, optAllFloatVars, i) && !VarSetOps::IsMember(this, optAllFPregVars, i))
+        {
+            JITDUMP("V%02u/T%02u: ", lvaTrackedToVarNum[i], i);
+
+            for (j = 0; j <= FP_VIRTUALREGISTERS; j++)
+            {
+                JITDUMP("%5u ", raHeightsNonWeightedStackFP[i][j]);
+            }
+            JITDUMP("\n");
+        }
+    }
+    JITDUMP("--------------------------------------------------------\n");
+}
+#endif
+
+// Increases heights for tracked variables given in mask. We call this
+// function when we enregister a variable and will cause the heights to
+// shift one place to the right.
+void Compiler::raUpdateHeightsForVarsStackFP(VARSET_VALARG_TP mask)
+{
+    assert(VarSetOps::IsSubset(this, mask, optAllFloatVars));
+
+    for (unsigned i = 0; i < lvaTrackedCount; i++)
+    {
+        if (VarSetOps::IsMember(this, mask, i))
+        {
+            for (unsigned j = FP_VIRTUALREGISTERS; j > 0; j--)
+            {
+                raHeightsStackFP[i][j] = raHeightsStackFP[i][j - 1];
+
+#ifdef DEBUG
+                raHeightsNonWeightedStackFP[i][j] = raHeightsNonWeightedStackFP[i][j - 1];
+#endif
+            }
+
+            raHeightsStackFP[i][0] = 0;
+#ifdef DEBUG
+            raHeightsNonWeightedStackFP[i][0] = 0;
+#endif
+        }
+    }
+
+#ifdef DEBUG
+    raDumpHeightsStackFP();
+#endif
+}
+
+// This is the prepass we do to adjust refcounts across calls and
+// create the height structure.
+void Compiler::raEnregisterVarsPrePassStackFP()
+{
+    BasicBlock* block;
+
+    assert(!VarSetOps::IsEmpty(this, optAllFloatVars));
+
+    // Initialization of the height table
+    memset(raHeightsStackFP, 0, sizeof(raHeightsStackFP));
+
+    // Initialization of the payload table
+    memset(raPayloadStackFP, 0, sizeof(raPayloadStackFP));
+
+#ifdef DEBUG
+    memset(raHeightsNonWeightedStackFP, 0, sizeof(raHeightsStackFP));
+#endif
+
+    // We will have a quick table with the pointers to the interesting varDscs
+    // so that we don't have to scan for them for each tree.
+    unsigned FPVars[lclMAX_TRACKED];
+    unsigned numFPVars = 0;
+    for (unsigned i = 0; i < lvaTrackedCount; i++)
+    {
+        if (VarSetOps::IsMember(this, optAllFloatVars, i))
+        {
+            FPVars[numFPVars++] = i;
+        }
+    }
+
+    assert(numFPVars == VarSetOps::Count(this, optAllFloatVars));
+
+    // Things we check here:
+    //
+    // We substract 2 for each FP variable that's live across a call, as we will
+    // have 2 memory accesses to spill and unpsill around it.
+    //
+    //
+    //
+    VARSET_TP VARSET_INIT_NOCOPY(blockLiveOutFloats, VarSetOps::MakeEmpty(this));
+    for (block = fgFirstBB; block; block = block->bbNext)
+    {
+        compCurBB = block;
+        /*
+        This opt fails in the case of a variable that has it's entire lifetime contained in the 'then' of
+        a qmark. The use mask for the whole qmark won't contain that variable as it variable's value comes
+        from  a def in the else, and the def can't be set for the qmark if the else side of
+        the qmark doesn't do a def.
+
+        See VSW# 354454 for more info. Leaving the comment and code here just in case we try to be
+        'smart' again in the future
+
+
+        if (((block->bbVarUse |
+              block->bbVarDef |
+              block->bbLiveIn   ) & optAllFloatVars) == 0)
+        {
+            // Fast way out
+            continue;
+        }
+        */
+        VarSetOps::Assign(this, blockLiveOutFloats, block->bbLiveOut);
+        VarSetOps::IntersectionD(this, blockLiveOutFloats, optAllFloatVars);
+        if (!VarSetOps::IsEmpty(this, blockLiveOutFloats))
+        {
+            // See comment in compiler.h above declaration of compMayHaveTransitionBlocks
+            // to understand the reason for this limitation of FP optimizer.
+            switch (block->bbJumpKind)
+            {
+                case BBJ_COND:
+                {
+                    GenTreePtr stmt;
+                    stmt = block->bbTreeList->gtPrev;
+                    assert(stmt->gtNext == NULL && stmt->gtStmt.gtStmtExpr->gtOper == GT_JTRUE);
+
+                    assert(stmt->gtStmt.gtStmtExpr->gtOp.gtOp1);
+                    GenTreePtr cond = stmt->gtStmt.gtStmtExpr->gtOp.gtOp1;
+
+                    assert(cond->OperIsCompare());
+
+                    if (cond->gtOp.gtOp1->TypeGet() == TYP_LONG)
+                    {
+                        if (compHndBBtabCount > 0)
+                        {
+                            // If we have any handlers we won't enregister whatever is live out of this block
+                            JITDUMP("PERF Warning: Taking out FP candidates due to transition blocks + exception "
+                                    "handlers.\n");
+                            VarSetOps::UnionD(this, raMaskDontEnregFloat,
+                                              VarSetOps::Intersection(this, block->bbLiveOut, optAllFloatVars));
+                        }
+                        else
+                        {
+                            // long conditional jumps can generate transition bloks
+                            compMayHaveTransitionBlocks = true;
+                        }
+                    }
+
+                    break;
+                }
+                case BBJ_SWITCH:
+                {
+                    if (compHndBBtabCount > 0)
+                    {
+                        // If we have any handlers we won't enregister whatever is live out of this block
+                        JITDUMP(
+                            "PERF Warning: Taking out FP candidates due to transition blocks + exception handlers.\n");
+                        VarSetOps::UnionD(this, raMaskDontEnregFloat,
+                                          VarSetOps::Intersection(this, block->bbLiveOut, optAllFloatVars));
+                    }
+                    else
+                    {
+                        // fp vars are live out of the switch, so we may have transition blocks
+                        compMayHaveTransitionBlocks = true;
+                    }
+                    break;
+                    default:
+                        break;
+                }
+            }
+        }
+
+        VARSET_TP VARSET_INIT(this, liveSet, block->bbLiveIn);
+        for (GenTreePtr stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
+        {
+            assert(stmt->gtOper == GT_STMT);
+
+            unsigned prevHeight = stmt->gtStmt.gtStmtList->gtFPlvl;
+            for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+            {
+                VarSetOps::AssignNoCopy(this, liveSet, fgUpdateLiveSet(liveSet, tree));
+                switch (tree->gtOper)
+                {
+                    case GT_CALL:
+                        raAddPayloadStackFP(liveSet, block->getBBWeight(this) * 2);
+                        break;
+                    case GT_CAST:
+                        // For cast from long local var to double, decrement the ref count of the long
+                        // to avoid store forwarding stall
+                        if (tree->gtType == TYP_DOUBLE)
+                        {
+                            GenTreePtr op1 = tree->gtOp.gtOp1;
+                            if (op1->gtOper == GT_LCL_VAR && op1->gtType == TYP_LONG)
+                            {
+                                unsigned int lclNum = op1->gtLclVarCommon.gtLclNum;
+                                assert(lclNum < lvaCount);
+                                LclVarDsc*   varDsc          = lvaTable + lclNum;
+                                unsigned int weightedRefCnt  = varDsc->lvRefCntWtd;
+                                unsigned int refCntDecrement = 2 * block->getBBWeight(this);
+                                if (refCntDecrement > weightedRefCnt)
+                                {
+                                    varDsc->lvRefCntWtd = 0;
+                                }
+                                else
+                                {
+                                    varDsc->lvRefCntWtd = weightedRefCnt - refCntDecrement;
+                                }
+                            }
+                        }
+                        break;
+                    default:
+                        break;
+                }
+
+                // Update heights
+                unsigned height = tree->gtFPlvl;
+
+                if (height != prevHeight)
+                {
+                    if (height > prevHeight && height < FP_VIRTUALREGISTERS)
+                    {
+                        for (unsigned i = 0; i < numFPVars; i++)
+                        {
+                            if (VarSetOps::IsMember(this, liveSet, FPVars[i]))
+                            {
+                                // The -1 are because we don't care about stack height 0
+                                // and we will use offset FP_VIRTUALREGISTERS to know what's
+                                // the count when we overflow. we multiply by 2, because that
+                                // is the number of memory accesses we will do for each spill
+                                // (even if we op directly with the spill)
+                                if (compCodeOpt() == SMALL_CODE)
+                                {
+                                    raHeightsStackFP[FPVars[i]][height - 1] += 2;
+                                }
+                                else
+                                {
+                                    raHeightsStackFP[FPVars[i]][height - 1] += 2 * block->getBBWeight(this);
+                                }
+
+#ifdef DEBUG
+                                raHeightsNonWeightedStackFP[FPVars[i]][height - 1]++;
+#endif
+                            }
+                        }
+                    }
+
+                    prevHeight = height;
+                }
+            }
+        }
+    }
+    compCurBB = NULL;
+
+    if (compJmpOpUsed)
+    {
+        // Disable enregistering of FP vars for methods with jmp op. We have really no
+        // coverage here.
+        // The problem with FP enreg vars is that the returning block is marked with having
+        // all variables live on exit. This works for integer vars, but for FP vars we must
+        // do the work to unload them. This is fairly straightforward to do, but I'm worried
+        // by the coverage, so I'll take the conservative aproach of disabling FP enregistering
+        // and we will fix it if there is demand
+        JITDUMP("PERF Warning: Disabling FP enregistering due to JMP op!!!!!!!.\n");
+        VarSetOps::UnionD(this, raMaskDontEnregFloat, optAllFloatVars);
+    }
+
+#ifdef DEBUG
+    raDumpHeightsStackFP();
+#endif
+}
+
+void Compiler::raSetRegLclBirthDeath(GenTreePtr tree, VARSET_VALARG_TP lastlife, bool fromLDOBJ)
+{
+    assert(tree->gtOper == GT_LCL_VAR);
+
+    unsigned lclnum = tree->gtLclVarCommon.gtLclNum;
+    assert(lclnum < lvaCount);
+
+    LclVarDsc* varDsc = lvaTable + lclnum;
+
+    if (!varDsc->lvTracked)
+    {
+        // Not tracked, can't be one of the enreg fp vars
+        return;
+    }
+
+    unsigned varIndex = varDsc->lvVarIndex;
+
+    if (!VarSetOps::IsMember(this, optAllFPregVars, varIndex))
+    {
+        // Not one of the enreg fp vars
+        return;
+    }
+
+    assert(varDsc->lvRegNum != REG_FPNONE);
+    assert(!VarSetOps::IsMember(this, raMaskDontEnregFloat, varIndex));
+
+    unsigned livenessFlags = (tree->gtFlags & GTF_LIVENESS_MASK);
+    tree->ChangeOper(GT_REG_VAR);
+    tree->gtFlags |= livenessFlags;
+    tree->gtRegNum          = varDsc->lvRegNum;
+    tree->gtRegVar.gtRegNum = varDsc->lvRegNum;
+    tree->gtRegVar.SetLclNum(lclnum);
+
+    // A liveset can change in a lclvar even if the lclvar itself is not
+    // changing its life. This can happen for lclvars inside qmarks,
+    // where lclvars die across the colon edge.
+    // SO, either
+    //     it is marked GTF_VAR_DEATH (already set by fgComputeLife)
+    //     OR it is already live
+    //     OR it is becoming live
+    //
+    if ((tree->gtFlags & GTF_VAR_DEATH) == 0)
+    {
+        if ((tree->gtFlags & GTF_VAR_DEF) != 0)
+
+        {
+            tree->gtFlags |= GTF_REG_BIRTH;
+        }
+    }
+
+#ifdef DEBUG
+    if (verbose)
+        gtDispTree(tree);
+#endif
+}
+
+// In this pass we set the regvars and set the birth and death flags. we do it
+// for all enregistered variables at once.
+void Compiler::raEnregisterVarsPostPassStackFP()
+{
+    if (VarSetOps::IsEmpty(this, optAllFPregVars))
+    {
+        // Nothing to fix up.
+    }
+
+    BasicBlock* block;
+
+    JITDUMP("raEnregisterVarsPostPassStackFP:\n");
+
+    for (block = fgFirstBB; block; block = block->bbNext)
+    {
+        compCurBB = block;
+
+        /*
+        This opt fails in the case of a variable that has it's entire lifetime contained in the 'then' of
+        a qmark. The use mask for the whole qmark won't contain that variable as it variable's value comes
+        from  a def in the else, and the def can't be set for the qmark if the else side of
+        the qmark doesn't do a def.
+
+        See VSW# 354454 for more info. Leaving the comment and code here just in case we try to be
+        'smart' again in the future
+
+
+
+        if (((block->bbVarUse |
+              block->bbVarDef |
+              block->bbLiveIn   ) & optAllFPregVars) == 0)
+        {
+            // Fast way out
+            continue;
+        }
+        */
+
+        VARSET_TP VARSET_INIT(this, lastlife, block->bbLiveIn);
+        for (GenTreePtr stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
+        {
+            assert(stmt->gtOper == GT_STMT);
+
+            for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree;
+                 VarSetOps::AssignNoCopy(this, lastlife, fgUpdateLiveSet(lastlife, tree)), tree = tree->gtNext)
+            {
+                if (tree->gtOper == GT_LCL_VAR)
+                {
+                    raSetRegLclBirthDeath(tree, lastlife, false);
+                }
+            }
+        }
+        assert(VarSetOps::Equal(this, lastlife, block->bbLiveOut));
+    }
+    compCurBB = NULL;
+}
+
+void Compiler::raGenerateFPRefCounts()
+{
+    // Update ref counts to stack
+    assert(raCntWtdStkDblStackFP == 0);
+    assert(raCntStkParamDblStackFP == 0);
+    assert(raCntStkStackFP == 0);
+
+    LclVarDsc* varDsc;
+    unsigned   lclNum;
+    for (lclNum = 0, varDsc = lvaTable; lclNum < lvaCount; lclNum++, varDsc++)
+    {
+        if (varDsc->lvType == TYP_DOUBLE ||
+            varDsc->lvStructDoubleAlign) // Account for structs (A bit over aggressive here, we should
+                                         // account for field accesses, but should be a reasonable
+                                         // heuristic).
+        {
+            if (varDsc->lvRegister)
+            {
+                assert(varDsc->lvTracked);
+            }
+            else
+            {
+                // Increment tmp access
+                raCntStkStackFP += varDsc->lvRefCnt;
+
+                if (varDsc->lvIsParam)
+                {
+                    // Why is this not weighted?
+                    raCntStkParamDblStackFP += varDsc->lvRefCnt;
+                }
+                else
+                {
+                    raCntWtdStkDblStackFP += varDsc->lvRefCntWtd;
+                }
+            }
+        }
+    }
+
+#ifdef DEBUG
+    if ((raCntWtdStkDblStackFP > 0) || (raCntStkParamDblStackFP > 0))
+    {
+        JITDUMP("StackFP double stack weighted ref count: %u ; param ref count: %u\n", raCntWtdStkDblStackFP,
+                raCntStkParamDblStackFP);
+    }
+#endif
+}
+
+void Compiler::raEnregisterVarsStackFP()
+{
+    const int          FPENREGTHRESHOLD          = 1;
+    const unsigned int FPENREGTHRESHOLD_WEIGHTED = FPENREGTHRESHOLD;
+
+    // Do init
+    raInitStackFP();
+
+    if (opts.compDbgCode || opts.MinOpts())
+    {
+        // no enregistering for these options.
+        return;
+    }
+
+    if (VarSetOps::IsEmpty(this, optAllFloatVars))
+    {
+        // No floating point vars. bail out
+        return;
+    }
+
+    // Do additional pass updating weights and generating height table
+    raEnregisterVarsPrePassStackFP();
+
+    // Vars are ordered by weight
+    LclVarDsc* varDsc;
+
+    // Set an interference with V0 and V1, which we reserve as a temp registers.
+    // We need only one temp. but we will take the easy way, as by using
+    // two, we will need to teach codegen how to operate with spilled variables
+    VarSetOps::Assign(this, raLclRegIntfFloat[REG_FPV0], optAllFloatVars);
+    VarSetOps::Assign(this, raLclRegIntfFloat[REG_FPV1], optAllFloatVars);
+
+#ifdef DEBUG
+    if (codeGen->genStressFloat())
+    {
+        // Lock out registers for stress.
+        regMaskTP locked = codeGen->genStressLockedMaskFloat();
+        for (unsigned i = REG_FPV0; i < REG_FPCOUNT; i++)
+        {
+            if (locked & genRegMaskFloat((regNumber)i))
+            {
+                VarSetOps::Assign(this, raLclRegIntfFloat[i], optAllFloatVars);
+            }
+        }
+    }
+#endif
+
+    // Build the interesting FP var table
+    LclVarDsc* fpLclFPVars[lclMAX_TRACKED];
+    unsigned   numFPVars = 0;
+    for (unsigned i = 0; i < lvaTrackedCount; i++)
+    {
+        if (VarSetOps::IsMember(this, raMaskDontEnregFloat, i))
+        {
+            JITDUMP("Won't enregister V%02i (T%02i) because it's marked as dont enregister\n", lvaTrackedToVarNum[i],
+                    i);
+            continue;
+        }
+
+        if (VarSetOps::IsMember(this, optAllFloatVars, i))
+        {
+            varDsc = lvaTable + lvaTrackedToVarNum[i];
+
+            assert(varDsc->lvTracked);
+
+            if (varDsc->lvDoNotEnregister)
+            {
+                JITDUMP("Won't enregister V%02i (T%02i) because it's marked as DoNotEnregister\n",
+                        lvaTrackedToVarNum[i], i);
+                continue;
+            }
+#if !FEATURE_X87_DOUBLES
+            if (varDsc->TypeGet() == TYP_FLOAT)
+            {
+                JITDUMP("Won't enregister V%02i (T%02i) because it's a TYP_FLOAT and we have disabled "
+                        "FEATURE_X87_DOUBLES\n",
+                        lvaTrackedToVarNum[i], i);
+                continue;
+            }
+#endif
+
+            fpLclFPVars[numFPVars++] = lvaTable + lvaTrackedToVarNum[i];
+        }
+    }
+
+    unsigned maxRegVars = 0; // Max num of regvars at one time
+
+    for (unsigned sortNum = 0; sortNum < numFPVars; sortNum++)
+    {
+#ifdef DEBUG
+        {
+            JITDUMP("\n");
+            JITDUMP("FP regvar candidates:\n");
+
+            for (unsigned i = sortNum; i < numFPVars; i++)
+            {
+                varDsc          = fpLclFPVars[i];
+                unsigned lclNum = varDsc - lvaTable;
+                unsigned varIndex;
+                varIndex = varDsc->lvVarIndex;
+
+                JITDUMP("V%02u/T%02u RefCount: %u Weight: %u ; Payload: %u ; Overflow: %u\n", lclNum, varIndex,
+                        varDsc->lvRefCnt, varDsc->lvRefCntWtd, raPayloadStackFP[varIndex],
+                        raHeightsStackFP[varIndex][FP_VIRTUALREGISTERS]);
+            }
+            JITDUMP("\n");
+        }
+#endif
+
+        unsigned min = sortNum;
+
+        // Find the one that will save us most
+        for (unsigned i = sortNum + 1; i < numFPVars; i++)
+        {
+            if (raVarIsGreaterValueStackFP(fpLclFPVars[i], fpLclFPVars[sortNum]))
+            {
+                min = i;
+            }
+        }
+
+        // Put it at the top of the array
+        LclVarDsc* temp;
+        temp                 = fpLclFPVars[min];
+        fpLclFPVars[min]     = fpLclFPVars[sortNum];
+        fpLclFPVars[sortNum] = temp;
+
+        varDsc = fpLclFPVars[sortNum];
+
+#ifdef DEBUG
+        unsigned lclNum = varDsc - lvaTable;
+#endif
+        unsigned varIndex = varDsc->lvVarIndex;
+
+        assert(VarSetOps::IsMember(this, optAllFloatVars, varIndex));
+
+        JITDUMP("Candidate for enregistering: V%02u/T%02u RefCount: %u Weight: %u ; Payload: %u ; Overflow: %u\n",
+                lclNum, varIndex, varDsc->lvRefCnt, varDsc->lvRefCntWtd, raPayloadStackFP[varIndex],
+                raHeightsStackFP[varIndex][FP_VIRTUALREGISTERS]);
+
+        bool bMeetsThreshold = true;
+
+        if (varDsc->lvRefCnt < FPENREGTHRESHOLD || varDsc->lvRefCntWtd < FPENREGTHRESHOLD_WEIGHTED)
+        {
+            bMeetsThreshold = false;
+        }
+
+        // We don't want to enregister arguments with only one use, as they will be
+        // loaded in the prolog. Just don't enregister them and load them lazily(
+        if (varDsc->lvIsParam &&
+            (varDsc->lvRefCnt <= FPENREGTHRESHOLD || varDsc->lvRefCntWtd <= FPENREGTHRESHOLD_WEIGHTED))
+        {
+            bMeetsThreshold = false;
+        }
+
+        if (!bMeetsThreshold
+#ifdef DEBUG
+            && codeGen->genStressFloat() != 1
+#endif
+            )
+        {
+            // Doesn't meet bar, do next
+            JITDUMP("V%02u/T%02u doesnt meet threshold. Won't enregister\n", lclNum, varIndex);
+            continue;
+        }
+
+        // We don't want to have problems with overflow (we now have 2 unsigned counters
+        // that can possibly go to their limits), so we just promote to double here.
+        // diff
+        double balance =
+            double(varDsc->lvRefCntWtd) -
+            double(raPayloadStackFP[varIndex]) -                      // Additional costs of enregistering variable
+            double(raHeightsStackFP[varIndex][FP_VIRTUALREGISTERS]) - // Spilling costs of enregistering variable
+            double(FPENREGTHRESHOLD_WEIGHTED);
+
+        JITDUMP("balance = %d - %d - %d - %d\n", varDsc->lvRefCntWtd, raPayloadStackFP[varIndex],
+                raHeightsStackFP[varIndex][FP_VIRTUALREGISTERS], FPENREGTHRESHOLD_WEIGHTED);
+
+        if (balance < 0.0
+#ifdef DEBUG
+            && codeGen->genStressFloat() != 1
+#endif
+            )
+        {
+            // Doesn't meet bar, do next
+            JITDUMP("V%02u/T%02u doesnt meet threshold. Won't enregister\n", lclNum, varIndex);
+            continue;
+        }
+
+        regNumber reg = raRegForVarStackFP(varDsc->lvVarIndex);
+        if (reg == REG_FPNONE)
+        {
+            // Didn't make if (interferes with other regvars), do next
+            JITDUMP("V%02u/T%02u interferes with other enreg vars. Won't enregister\n", lclNum, varIndex);
+
+            continue;
+        }
+
+        if (lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+        {
+            // Do not enregister if this is a floating field in a struct local of
+            // promotion type PROMOTION_TYPE_DEPENDENT.
+            continue;
+        }
+
+        // Yipee, we will enregister var.
+        varDsc->lvRegister = true;
+        varDsc->lvRegNum   = reg;
+        VarSetOps::AddElemD(this, optAllFPregVars, varIndex);
+
+#ifdef DEBUG
+        raDumpVariableRegIntfFloat();
+
+        if (verbose)
+        {
+            printf("; ");
+            gtDispLclVar(lclNum);
+            printf("V%02u/T%02u (refcnt=%2u,refwtd=%4u%s) enregistered in %s\n", varIndex, varDsc->lvVarIndex,
+                   varDsc->lvRefCnt, varDsc->lvRefCntWtd / 2, (varDsc->lvRefCntWtd & 1) ? ".5" : "",
+                   CodeGen::regVarNameStackFP(varDsc->lvRegNum));
+        }
+
+        JITDUMP("\n");
+#endif
+
+        // Create interferences with other variables.
+        assert(VarSetOps::IsEmpty(this, VarSetOps::Diff(this, raLclRegIntfFloat[(int)reg], optAllFloatVars)));
+        VARSET_TP VARSET_INIT_NOCOPY(intfFloats, VarSetOps::Intersection(this, lvaVarIntf[varIndex], optAllFloatVars));
+
+        VarSetOps::UnionD(this, raLclRegIntfFloat[reg], intfFloats);
+
+        // Update height tables for variables that interfere with this one.
+        raUpdateHeightsForVarsStackFP(intfFloats);
+
+        // Update max number of reg vars at once.
+        maxRegVars = min(REG_FPCOUNT, max(maxRegVars, VarSetOps::Count(this, intfFloats)));
+    }
+
+    assert(VarSetOps::IsSubset(this, optAllFPregVars, optAllFloatVars));
+    assert(VarSetOps::IsEmpty(this, VarSetOps::Intersection(this, optAllFPregVars, raMaskDontEnregFloat)));
+
+    // This is a bit conservative, as they may not all go through a call.
+    // If we have to, we can fix this.
+    tmpDoubleSpillMax += maxRegVars;
+
+    // Do pass marking trees as egvars
+    raEnregisterVarsPostPassStackFP();
+
+#ifdef DEBUG
+    {
+        JITDUMP("FP enregistration summary\n");
+
+        unsigned i;
+        for (i = 0; i < numFPVars; i++)
+        {
+            varDsc = fpLclFPVars[i];
+
+            if (varDsc->lvRegister)
+            {
+                unsigned lclNum = varDsc - lvaTable;
+                unsigned varIndex;
+                varIndex = varDsc->lvVarIndex;
+
+                JITDUMP("Enregistered V%02u/T%02u in FPV%i RefCount: %u Weight: %u \n", lclNum, varIndex,
+                        varDsc->lvRegNum, varDsc->lvRefCnt, varDsc->lvRefCntWtd);
+            }
+        }
+        JITDUMP("End of FP enregistration summary\n\n");
+    }
+#endif
+}
+
+#ifdef DEBUG
+
+regMaskTP CodeGenInterface::genStressLockedMaskFloat()
+{
+    assert(genStressFloat());
+
+    // Don't use REG_FPV0 or REG_FPV1, they're reserved
+    if (genStressFloat() == 1)
+    {
+        return genRegMaskFloat(REG_FPV4) | genRegMaskFloat(REG_FPV5) | genRegMaskFloat(REG_FPV6) |
+               genRegMaskFloat(REG_FPV7);
+    }
+    else
+    {
+        return genRegMaskFloat(REG_FPV2) | genRegMaskFloat(REG_FPV3) | genRegMaskFloat(REG_FPV4) |
+               genRegMaskFloat(REG_FPV5) | genRegMaskFloat(REG_FPV6) | genRegMaskFloat(REG_FPV7);
+    }
+}
+
+#endif
+
+#endif // FEATURE_STACK_FP_X87
+
+#endif // LEGACY_BACKEND
diff --git a/src/jit/standalone/.gitmirror b/src/jit/standalone/.gitmirror
new file mode 100644
index 0000000000..f507630f94
--- /dev/null
+++ b/src/jit/standalone/.gitmirror
@@ -0,0 +1 @@
+Only contents of this folder, excluding subfolders, will be mirrored by the Git-TFS Mirror. 
+\ No newline at end of file
diff --git a/src/jit/standalone/CMakeLists.txt b/src/jit/standalone/CMakeLists.txt
new file mode 100644
index 0000000000..2e6317098e
--- /dev/null
+++ b/src/jit/standalone/CMakeLists.txt
@@ -0,0 +1,58 @@
+project(ryujit)
+add_definitions(-DFEATURE_NO_HOST)
+add_definitions(-DSELF_NO_HOST)
+add_definitions(-DFEATURE_READYTORUN_COMPILER)
+remove_definitions(-DFEATURE_MERGE_JIT_AND_ENGINE)
+
+if(CLR_CMAKE_TARGET_ARCH_I386 OR CLR_CMAKE_TARGET_ARCH_ARM)
+  add_definitions(-DLEGACY_BACKEND)
+endif()
+
+add_library_clr(${JIT_BASE_NAME}
+   SHARED
+   ${SHARED_LIB_SOURCES}
+)
+
+add_dependencies(${JIT_BASE_NAME} jit_exports)
+
+set_property(TARGET ${JIT_BASE_NAME} APPEND_STRING PROPERTY LINK_FLAGS ${JIT_EXPORTS_LINKER_OPTION})
+set_property(TARGET ${JIT_BASE_NAME} APPEND_STRING PROPERTY LINK_DEPENDS ${JIT_EXPORTS_FILE})
+
+set(RYUJIT_LINK_LIBRARIES
+   utilcodestaticnohost
+   gcinfo
+)
+
+if(CLR_CMAKE_PLATFORM_UNIX)
+    list(APPEND RYUJIT_LINK_LIBRARIES
+       mscorrc_debug
+       coreclrpal
+       palrt
+    )
+else()
+    list(APPEND RYUJIT_LINK_LIBRARIES
+       ${STATIC_MT_CRT_LIB}
+       ${STATIC_MT_VCRT_LIB}
+       kernel32.lib
+       advapi32.lib
+       ole32.lib
+       oleaut32.lib
+       uuid.lib
+       user32.lib
+       version.lib
+       shlwapi.lib
+       bcrypt.lib
+       crypt32.lib
+       RuntimeObject.lib
+    )
+endif(CLR_CMAKE_PLATFORM_UNIX)
+
+target_link_libraries(${JIT_BASE_NAME}
+   ${RYUJIT_LINK_LIBRARIES}
+)
+
+# add the install targets
+install_clr(${JIT_BASE_NAME})
+
+# Enable profile guided optimization
+add_pgo(${JIT_BASE_NAME})
diff --git a/src/jit/target.h b/src/jit/target.h
new file mode 100644
index 0000000000..fa0b18af3e
--- /dev/null
+++ b/src/jit/target.h
@@ -0,0 +1,2320 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+#ifndef _TARGET_H_
+#define _TARGET_H_
+
+// Inform includers that we're in a context in which a target has been set.
+#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) || defined(_TARGET_ARM_)
+#define _TARGET_SET_
+#endif
+
+// If the UNIX_AMD64_ABI is defined make sure that _TARGET_AMD64_ is also defined.
+#if defined(UNIX_AMD64_ABI)
+#if !defined(_TARGET_AMD64_)
+#error When UNIX_AMD64_ABI is defined you must define _TARGET_AMD64_ defined as well.
+#endif
+#endif
+
+#if (defined(FEATURE_CORECLR) && defined(PLATFORM_UNIX))
+#define FEATURE_VARARG 0
+#else // !(defined(FEATURE_CORECLR) && defined(PLATFORM_UNIX))
+#define FEATURE_VARARG 1
+#endif // !(defined(FEATURE_CORECLR) && defined(PLATFORM_UNIX))
+
+/*****************************************************************************/
+// The following are human readable names for the target architectures
+#if defined(_TARGET_X86_)
+#define TARGET_READABLE_NAME "X86"
+#elif defined(_TARGET_AMD64_)
+#define TARGET_READABLE_NAME "AMD64"
+#elif defined(_TARGET_ARM_)
+#define TARGET_READABLE_NAME "ARM"
+#elif defined(_TARGET_ARM64_)
+#define TARGET_READABLE_NAME "ARM64"
+#else
+#error Unsupported or unset target architecture
+#endif
+
+/*****************************************************************************/
+// The following are intended to capture only those #defines that cannot be replaced
+// with static const members of Target
+#if defined(_TARGET_X86_) && defined(LEGACY_BACKEND)
+#define REGMASK_BITS 8 // number of bits used to represent register mask
+#elif defined(_TARGET_XARCH_)
+#define REGMASK_BITS 32
+
+#elif defined(_TARGET_ARM_)
+#define REGMASK_BITS 64
+
+#elif defined(_TARGET_ARM64_)
+#define REGMASK_BITS 64
+
+#else
+#error Unsupported or unset target architecture
+#endif
+
+/*****************************************************************************/
+
+#if defined(_TARGET_ARM_)
+DECLARE_TYPED_ENUM(_regNumber_enum, unsigned)
+{
+#define REGDEF(name, rnum, mask, sname) REG_##name = rnum,
+#define REGALIAS(alias, realname) REG_##alias = REG_##realname,
+#include "register.h"
+
+    REG_COUNT, REG_NA = REG_COUNT, ACTUAL_REG_COUNT = REG_COUNT - 1 // everything but REG_STK (only real regs)
+}
+END_DECLARE_TYPED_ENUM(_regNumber_enum, unsigned)
+
+DECLARE_TYPED_ENUM(_regMask_enum, unsigned __int64)
+{
+    RBM_NONE = 0,
+#define REGDEF(name, rnum, mask, sname) RBM_##name = mask,
+#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname,
+#include "register.h"
+}
+END_DECLARE_TYPED_ENUM(_regMask_enum, unsigned __int64)
+
+#elif defined(_TARGET_ARM64_)
+
+DECLARE_TYPED_ENUM(_regNumber_enum, unsigned)
+{
+#define REGDEF(name, rnum, mask, xname, wname) REG_##name = rnum,
+#define REGALIAS(alias, realname) REG_##alias = REG_##realname,
+#include "register.h"
+
+    REG_COUNT, REG_NA = REG_COUNT, ACTUAL_REG_COUNT = REG_COUNT - 1 // everything but REG_STK (only real regs)
+}
+END_DECLARE_TYPED_ENUM(_regNumber_enum, unsigned)
+
+DECLARE_TYPED_ENUM(_regMask_enum, unsigned __int64)
+{
+    RBM_NONE = 0,
+#define REGDEF(name, rnum, mask, xname, wname) RBM_##name = mask,
+#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname,
+#include "register.h"
+}
+END_DECLARE_TYPED_ENUM(_regMask_enum, unsigned __int64)
+
+#elif defined(_TARGET_AMD64_)
+
+DECLARE_TYPED_ENUM(_regNumber_enum, unsigned)
+{
+#define REGDEF(name, rnum, mask, sname) REG_##name = rnum,
+#define REGALIAS(alias, realname) REG_##alias = REG_##realname,
+#include "register.h"
+
+    REG_COUNT, REG_NA = REG_COUNT, ACTUAL_REG_COUNT = REG_COUNT - 1 // everything but REG_STK (only real regs)
+}
+END_DECLARE_TYPED_ENUM(_regNumber_enum, unsigned)
+
+DECLARE_TYPED_ENUM(_regMask_enum, unsigned)
+{
+    RBM_NONE = 0,
+
+#define REGDEF(name, rnum, mask, sname) RBM_##name = mask,
+#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname,
+#include "register.h"
+}
+END_DECLARE_TYPED_ENUM(_regMask_enum, unsigned)
+
+#elif defined(_TARGET_X86_)
+
+#ifndef LEGACY_BACKEND
+DECLARE_TYPED_ENUM(_regNumber_enum, unsigned)
+{
+#define REGDEF(name, rnum, mask, sname) REG_##name = rnum,
+#define REGALIAS(alias, realname) REG_##alias = REG_##realname,
+#include "register.h"
+
+    REG_COUNT, REG_NA = REG_COUNT, ACTUAL_REG_COUNT = REG_COUNT - 1 // everything but REG_STK (only real regs)
+}
+END_DECLARE_TYPED_ENUM(_regNumber_enum, unsigned)
+
+DECLARE_TYPED_ENUM(_regMask_enum, unsigned)
+{
+    RBM_NONE = 0,
+
+#define REGDEF(name, rnum, mask, sname) RBM_##name = mask,
+#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname,
+#include "register.h"
+}
+END_DECLARE_TYPED_ENUM(_regMask_enum, unsigned)
+#else // LEGACY_BACKEND
+DECLARE_TYPED_ENUM(_regNumber_enum, unsigned)
+{
+#define REGDEF(name, rnum, mask, sname) REG_##name = rnum,
+#define REGALIAS(alias, realname) REG_##alias = REG_##realname,
+#include "register.h"
+
+    REG_COUNT, REG_NA = REG_COUNT,
+               ACTUAL_REG_COUNT = REG_COUNT - 1, // everything but REG_STK (only real regs)
+
+#define REGDEF(name, rnum, mask, sname) REG_##name = rnum,
+#include "registerfp.h"
+
+        REG_FPCOUNT, REG_FPNONE = REG_FPCOUNT,
+
+#define REGDEF(name, rnum, mask, sname) REG_##name = rnum,
+#include "registerxmm.h"
+
+               REG_XMMCOUNT
+}
+END_DECLARE_TYPED_ENUM(_regNumber_enum, unsigned)
+
+DECLARE_TYPED_ENUM(_regMask_enum, unsigned)
+{
+    RBM_NONE = 0,
+
+#define REGDEF(name, rnum, mask, sname) RBM_##name = mask,
+#define REGALIAS(alias, realname) RBM_##alias = RBM_##realname,
+#include "register.h"
+
+#define REGDEF(name, rnum, mask, sname) RBM_##name = mask,
+#include "registerfp.h"
+
+#define REGDEF(name, rnum, mask, sname) RBM_##name = mask,
+#include "registerxmm.h"
+}
+END_DECLARE_TYPED_ENUM(_regMask_enum, unsigned)
+
+#endif // LEGACY_BACKEND
+#else
+#error Unsupported target architecture
+#endif
+
+/* The following are used to hold 'long' (64-bit integer) operands */
+
+/*
+    The following yield the number of bits and the mask of a register
+    number in a register pair.
+ */
+
+#ifdef _TARGET_ARM_
+#define REG_PAIR_NBITS 6
+#else
+#define REG_PAIR_NBITS 4
+#endif
+#define REG_PAIR_NMASK ((1 << REG_PAIR_NBITS) - 1)
+
+#ifdef DEBUG
+// Under DEBUG, we want to make sure that code doesn't accidentally confuse a reg pair value
+// with a simple register number. Thus, we offset the reg pair numbers so they are distinct
+// from all register numbers. Note that this increases the minimum size of a regPairNoSmall
+// type due to the additional bits used for this offset.
+#define REG_PAIR_FIRST (7 << REG_PAIR_NBITS)
+#define REG_PAIR_NBITS_DEBUG                                                                                           \
+    (REG_PAIR_NBITS +                                                                                                  \
+     3) // extra bits needed by the debug shifting (3 instead of 0 because we shift "7", not "1", above).
+C_ASSERT(REG_COUNT < REG_PAIR_FIRST); // make sure the register numbers (including REG_NA, ignoring fp/xmm regs on
+                                      // x86/x64) are distinct from the pair numbers
+#else
+#define REG_PAIR_FIRST 0
+#endif
+
+DECLARE_TYPED_ENUM(_regPairNo_enum, unsigned)
+{
+#define PAIRDEF(rlo, rhi) REG_PAIR_##rlo##rhi = REG_##rlo + (REG_##rhi << REG_PAIR_NBITS) + REG_PAIR_FIRST,
+#include "regpair.h"
+
+    REG_PAIR_LAST = (REG_COUNT - 1) + ((REG_COUNT - 1) << REG_PAIR_NBITS) + REG_PAIR_FIRST,
+
+    REG_PAIR_NONE = REG_PAIR_LAST + 1
+}
+END_DECLARE_TYPED_ENUM(_regPairNo_enum, unsigned)
+
+enum regPairMask
+{
+#define PAIRDEF(rlo, rhi) RBM_PAIR_##rlo##rhi = (RBM_##rlo | RBM_##rhi),
+#include "regpair.h"
+};
+
+/*****************************************************************************/
+
+// TODO-Cleanup: The types defined below are mildly confusing: why are there both?
+// regMaskSmall is large enough to represent the entire set of registers.
+// If regMaskSmall is smaller than a "natural" integer type, regMaskTP is wider, based
+// on a belief by the original authors of the JIT that in some situations it is more
+// efficient to have the wider representation.  This belief should be tested, and if it
+// is false, then we should coalesce these two types into one (the Small width, probably).
+// In any case, we believe that is OK to freely cast between these types; no information will
+// be lost.
+
+#ifdef _TARGET_ARMARCH_
+typedef unsigned __int64 regMaskTP;
+#else
+typedef unsigned       regMaskTP;
+#endif
+
+#if REGMASK_BITS == 8
+typedef unsigned char regMaskSmall;
+#define REG_MASK_INT_FMT "%02X"
+#define REG_MASK_ALL_FMT "%02X"
+#elif REGMASK_BITS == 16
+typedef unsigned short regMaskSmall;
+#define REG_MASK_INT_FMT "%04X"
+#define REG_MASK_ALL_FMT "%04X"
+#elif REGMASK_BITS == 32
+typedef unsigned regMaskSmall;
+#define REG_MASK_INT_FMT "%08X"
+#define REG_MASK_ALL_FMT "%08X"
+#else
+typedef unsigned __int64 regMaskSmall;
+#define REG_MASK_INT_FMT "%04llX"
+#define REG_MASK_ALL_FMT "%016llX"
+#endif
+
+typedef _regNumber_enum regNumber;
+typedef _regPairNo_enum regPairNo;
+
+// LSRA currently converts freely between regNumber and regPairNo, so make sure they are the same size.
+C_ASSERT(sizeof(regPairNo) == sizeof(regNumber));
+
+typedef unsigned char regNumberSmall;
+
+#ifdef DEBUG
+
+// Under DEBUG, we shift the reg pair numbers to be independent of the regNumber range,
+// so we need additional bits. See the definition of REG_PAIR_FIRST for details.
+
+#if ((2 * REG_PAIR_NBITS) + REG_PAIR_NBITS_DEBUG) <= 16
+C_ASSERT(((2 * REG_PAIR_NBITS) + REG_PAIR_NBITS_DEBUG) > 8); // assert that nobody fits in 8 bits
+typedef unsigned short regPairNoSmall;                       // x86/x64: need 15 bits
+#else
+C_ASSERT(((2 * REG_PAIR_NBITS) + REG_PAIR_NBITS_DEBUG) <= 32);
+typedef unsigned regPairNoSmall; // arm: need 21 bits
+#endif
+
+#else // DEBUG
+
+#if (2 * REG_PAIR_NBITS) <= 8
+typedef unsigned char  regPairNoSmall; // x86/x64: need 8 bits
+#else
+C_ASSERT((2 * REG_PAIR_NBITS) <= 16);  // assert that nobody needs more than 16 bits
+typedef unsigned short regPairNoSmall; // arm: need 12 bits
+#endif
+
+#endif // DEBUG
+
+/*****************************************************************************/
+
+#define LEA_AVAILABLE 1
+#define SCALED_ADDR_MODES 1
+
+/*****************************************************************************/
+
+#ifdef DEBUG
+#define DSP_SRC_OPER_LEFT 0
+#define DSP_SRC_OPER_RIGHT 1
+#define DSP_DST_OPER_LEFT 1
+#define DSP_DST_OPER_RIGHT 0
+#endif
+
+/*****************************************************************************/
+
+// The pseudorandom nop insertion is not necessary for current CoreCLR scenarios
+// #if defined(FEATURE_CORECLR) && !defined(_TARGET_ARM_)
+// #define PSEUDORANDOM_NOP_INSERTION
+// #endif
+
+/*****************************************************************************/
+
+// clang-format off
+#if defined(_TARGET_X86_)
+
+  #define CPU_LOAD_STORE_ARCH      0
+
+#ifdef LEGACY_BACKEND
+  #define CPU_LONG_USES_REGPAIR    1
+#else
+  #define CPU_LONG_USES_REGPAIR    0       // RyuJIT x86 doesn't use the regPairNo field to record register pairs for long
+                                           // type tree nodes, and instead either decomposes them (for non-atomic operations)
+                                           // or stores multiple regNumber values for operations such as calls where the
+                                           // register definitions are effectively "atomic".
+#endif // LEGACY_BACKEND
+
+  #define CPU_HAS_FP_SUPPORT       1
+  #define ROUND_FLOAT              1       // round intermed float expression results
+  #define CPU_HAS_BYTE_REGS        1
+  #define CPU_USES_BLOCK_MOVE      1 
+
+#ifndef LEGACY_BACKEND
+  // TODO-CQ: Fine tune the following xxBlk threshold values:
+
+#define CPBLK_MOVS_LIMIT         16      // When generating code for CpBlk, this is the buffer size 
+                                           // threshold to stop generating rep movs and switch to the helper call.
+                                           // NOTE: Using rep movs is currently disabled since we found it has bad performance
+                                           //       on pre-Ivy Bridge hardware.
+                                           
+  #define CPBLK_UNROLL_LIMIT       64      // Upper bound to let the code generator to loop unroll CpBlk.
+  #define INITBLK_STOS_LIMIT       64      // When generating code for InitBlk, this is the buffer size 
+                                           // NOTE: Using rep stos is currently disabled since we found it has bad performance
+                                           //       on pre-Ivy Bridge hardware.
+                                           // threshold to stop generating rep movs and switch to the helper call.
+  #define INITBLK_UNROLL_LIMIT     128     // Upper bound to let the code generator to loop unroll InitBlk.
+  #define CPOBJ_NONGC_SLOTS_LIMIT  4       // For CpObj code generation, this is the the threshold of the number 
+                                           // of contiguous non-gc slots that trigger generating rep movsq instead of 
+                                           // sequences of movsq instructions
+                                           // The way we're currently disabling rep movs/stos is by setting a limit less than
+                                           // its unrolling counterparts.  When lower takes the decision on which one to make it
+                                           // always asks for the unrolling limit first so you can say the JIT 'favors' unrolling.
+                                           // Setting the limit to something lower than that makes lower to never consider it.
+
+#endif // !LEGACY_BACKEND
+
+
+  #define FEATURE_WRITE_BARRIER    1       // Generate the proper WriteBarrier calls for GC
+  #define FEATURE_FIXED_OUT_ARGS   0       // X86 uses push instructions to pass args
+  #define FEATURE_STRUCTPROMOTE    1       // JIT Optimization to promote fields of structs into registers
+  #define FEATURE_MULTIREG_STRUCT_PROMOTE  0  // True when we want to promote fields of a multireg struct into registers
+  #define FEATURE_FASTTAILCALL     0       // Tail calls made as epilog+jmp
+  #define FEATURE_TAILCALL_OPT     0       // opportunistic Tail calls (without ".tail" prefix) made as fast tail calls.
+  #define FEATURE_SET_FLAGS        0       // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when
+                                           // the flags need to be set
+#ifdef LEGACY_BACKEND
+  #define FEATURE_MULTIREG_ARGS_OR_RET  0  // Support for passing and/or returning single values in more than one register
+  #define FEATURE_MULTIREG_ARGS         0  // Support for passing a single argument in more than one register  
+  #define FEATURE_MULTIREG_RET          0  // Support for returning a single value in more than one register
+  #define MAX_PASS_MULTIREG_BYTES       0  // No multireg arguments 
+  #define MAX_RET_MULTIREG_BYTES        0  // No multireg return values 
+#else
+  #define FEATURE_MULTIREG_ARGS_OR_RET  1  // Support for passing and/or returning single values in more than one register
+  #define FEATURE_MULTIREG_ARGS         0  // Support for passing a single argument in more than one register  
+  #define FEATURE_MULTIREG_RET          1  // Support for returning a single value in more than one register
+  #define MAX_PASS_MULTIREG_BYTES       0  // No multireg arguments (note this seems wrong as MAX_ARG_REG_COUNT is 2)
+  #define MAX_RET_MULTIREG_BYTES        8  // Maximum size of a struct that could be returned in more than one register
+#endif
+
+  #define MAX_ARG_REG_COUNT             2  // Maximum registers used to pass an argument.
+  #define MAX_RET_REG_COUNT             2  // Maximum registers used to return a value.
+
+#ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS
+  #define NOGC_WRITE_BARRIERS      1       // We have specialized WriteBarrier JIT Helpers that DO-NOT trash the
+                                           // RBM_CALLEE_TRASH registers
+#else
+  #define NOGC_WRITE_BARRIERS      0       // Do not modify this -- modify the definition above.  (If we're not using
+                                           // ASM barriers we definitely don't have NOGC barriers).
+#endif
+  #define USER_ARGS_COME_LAST      0
+  #define EMIT_TRACK_STACK_DEPTH   1
+  #define TARGET_POINTER_SIZE      4       // equal to sizeof(void*) and the managed pointer size in bytes for this
+                                           // target
+  #define FEATURE_EH               1       // To aid platform bring-up, eliminate exceptional EH clauses (catch, filter,
+                                           // filter-handler, fault) and directly execute 'finally' clauses.
+  #define FEATURE_EH_FUNCLETS      0
+  #define FEATURE_EH_CALLFINALLY_THUNKS 0  // Generate call-to-finally code in "thunks" in the enclosing EH region,
+                                           // protected by "cloned finally" clauses.
+#ifndef LEGACY_BACKEND
+  #define FEATURE_STACK_FP_X87     0
+#else // LEGACY_BACKEND
+  #define FEATURE_STACK_FP_X87     1       // Use flat register file model    
+#endif // LEGACY_BACKEND
+  #define FEATURE_X87_DOUBLES      0       // FP tree temps always use x87 doubles (when 1) or can be double or float
+                                           // (when 0).
+  #define ETW_EBP_FRAMED           1       // if 1 we cannot use EBP as a scratch register and must create EBP based
+                                           // frames for most methods
+  #define CSE_CONSTS               1       // Enable if we want to CSE constants
+
+#ifndef LEGACY_BACKEND
+  // The following defines are useful for iterating a regNumber
+  #define REG_FIRST                REG_EAX
+  #define REG_INT_FIRST            REG_EAX
+  #define REG_INT_LAST             REG_EDI
+  #define REG_INT_COUNT            (REG_INT_LAST - REG_INT_FIRST + 1)
+  #define REG_NEXT(reg)           ((regNumber)((unsigned)(reg) + 1))
+  #define REG_PREV(reg)           ((regNumber)((unsigned)(reg) - 1))
+
+  #define REG_FP_FIRST             REG_XMM0
+  #define REG_FP_LAST              REG_XMM7
+  #define FIRST_FP_ARGREG          REG_XMM0
+  #define LAST_FP_ARGREG           REG_XMM3
+  #define REG_FLTARG_0             REG_XMM0
+  #define REG_FLTARG_1             REG_XMM1
+  #define REG_FLTARG_2             REG_XMM2
+  #define REG_FLTARG_3             REG_XMM3
+
+  #define RBM_FLTARG_0             RBM_XMM0
+  #define RBM_FLTARG_1             RBM_XMM1
+  #define RBM_FLTARG_2             RBM_XMM2
+  #define RBM_FLTARG_3             RBM_XMM3
+
+  #define RBM_FLTARG_REGS         (RBM_FLTARG_0|RBM_FLTARG_1|RBM_FLTARG_2|RBM_FLTARG_3)
+
+  #define RBM_ALLFLOAT            (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM3 | RBM_XMM4 | RBM_XMM5 | RBM_XMM6 | RBM_XMM7)
+  #define RBM_ALLDOUBLE            RBM_ALLFLOAT
+
+  // TODO-CQ: Currently we are following the x86 ABI for SSE2 registers.
+  // This should be reconsidered.
+  #define RBM_FLT_CALLEE_SAVED     RBM_NONE
+  #define RBM_FLT_CALLEE_TRASH     RBM_ALLFLOAT
+  #define REG_VAR_ORDER_FLT        REG_XMM0, REG_XMM1, REG_XMM2, REG_XMM3, REG_XMM4, REG_XMM5, REG_XMM6, REG_XMM7
+
+  #define REG_FLT_CALLEE_SAVED_FIRST   REG_XMM6
+  #define REG_FLT_CALLEE_SAVED_LAST    REG_XMM7
+
+  #define XMM_REGSIZE_BYTES        16      // XMM register size in bytes
+  #define YMM_REGSIZE_BYTES        32      // YMM register size in bytes
+
+  #define REGNUM_BITS              6       // number of bits in a REG_*
+  #define TINY_REGNUM_BITS         6       // number used in a tiny instrdesc (same)
+
+#else // LEGACY_BACKEND
+  #define FEATURE_FP_REGALLOC      0       // Enabled if RegAlloc is used to enregister Floating Point LclVars      
+
+  #define FP_STK_SIZE              8
+  #define RBM_ALLFLOAT            (RBM_FPV0 | RBM_FPV1 | RBM_FPV2 | RBM_FPV3 | RBM_FPV4 | RBM_FPV5 | RBM_FPV6)
+  #define REG_FP_FIRST             REG_FPV0
+  #define REG_FP_LAST              REG_FPV7
+  #define FIRST_FP_ARGREG          REG_NA
+  #define LAST_FP_ARGREG           REG_NA
+
+
+  #define REGNUM_BITS              3       // number of bits in a REG_*
+  #define TINY_REGNUM_BITS         3       
+  #define REGMASK_BITS             8       // number of bits in a REGNUM_MASK
+
+  #define RBM_FLTARG_REGS          0
+  #define RBM_FLT_CALLEE_SAVED     0
+  #define RBM_FLT_CALLEE_TRASH     0
+
+#endif // LEGACY_BACKEND
+
+  #define REGSIZE_BYTES            4       // number of bytes in one register
+  #define MIN_ARG_AREA_FOR_CALL    0       // Minimum required outgoing argument space for a call.
+
+  #define CODE_ALIGN               1       // code alignment requirement
+  #define STACK_ALIGN              4       // stack alignment requirement
+  #define STACK_ALIGN_SHIFT        2       // Shift-right amount to convert stack size in bytes to size in DWORD_PTRs
+  #define STACK_ALIGN_SHIFT_ALL    2       // Shift-right amount to convert stack size in bytes to size in STACK_ALIGN units
+
+  #define RBM_INT_CALLEE_SAVED    (RBM_EBX|RBM_ESI|RBM_EDI)
+  #define RBM_INT_CALLEE_TRASH    (RBM_EAX|RBM_ECX|RBM_EDX)
+
+  #define RBM_CALLEE_SAVED        (RBM_INT_CALLEE_SAVED | RBM_FLT_CALLEE_SAVED)
+  #define RBM_CALLEE_TRASH        (RBM_INT_CALLEE_TRASH | RBM_FLT_CALLEE_TRASH)
+
+  #define RBM_ALLINT              (RBM_INT_CALLEE_SAVED | RBM_INT_CALLEE_TRASH)
+
+  #define REG_VAR_ORDER            REG_EAX,REG_EDX,REG_ECX,REG_ESI,REG_EDI,REG_EBX
+  #define MAX_VAR_ORDER_SIZE       6
+  #define REG_TMP_ORDER            REG_EAX,REG_EDX,REG_ECX,REG_EBX,REG_ESI,REG_EDI
+  #define RBM_TMP_ORDER            RBM_EAX,RBM_EDX,RBM_ECX,RBM_EBX,RBM_ESI,RBM_EDI
+  #define REG_TMP_ORDER_COUNT      6
+
+  #define REG_PREDICT_ORDER        REG_EAX,REG_EDX,REG_ECX,REG_EBX,REG_ESI,REG_EDI
+
+  // The order here is fixed: it must agree with an order assumed in eetwain...
+  #define REG_CALLEE_SAVED_ORDER   REG_EDI,REG_ESI,REG_EBX,REG_EBP
+  #define RBM_CALLEE_SAVED_ORDER   RBM_EDI,RBM_ESI,RBM_EBX,RBM_EBP
+
+  #define CNT_CALLEE_SAVED        (4)
+  #define CNT_CALLEE_TRASH        (3)
+  #define CNT_CALLEE_ENREG        (CNT_CALLEE_SAVED-1)
+
+  #define CNT_CALLEE_SAVED_FLOAT  (0)
+  #define CNT_CALLEE_TRASH_FLOAT  (6)
+
+  #define CALLEE_SAVED_REG_MAXSZ  (CNT_CALLEE_SAVED*REGSIZE_BYTES)  // EBX,ESI,EDI,EBP
+
+  // We reuse the ESP register as a illegal value in the register predictor
+  #define RBM_ILLEGAL              RBM_ESP
+  // We reuse the ESP register as a flag for last use handling in the register predictor
+  #define RBM_LASTUSE              RBM_ESP
+  // We're using the encoding for ESP to indicate a half-long on the frame
+  #define REG_L_STK                REG_ESP
+
+  //  This is the first register in REG_TMP_ORDER
+  #define REG_TMP_0                REG_EAX
+  #define RBM_TMP_0                RBM_EAX
+
+  //  This is the second register in REG_TMP_ORDER
+  #define REG_TMP_1                REG_EDX
+  #define RBM_TMP_1                RBM_EDX
+
+  #define REG_PAIR_TMP             REG_PAIR_EAXEDX
+  #define REG_PAIR_TMP_REVERSE     REG_PAIR_EDXEAX
+  #define RBM_PAIR_TMP             (RBM_EAX|RBM_EDX)
+  #define REG_PAIR_TMP_LO          REG_EAX
+  #define RBM_PAIR_TMP_LO          RBM_EAX
+  #define REG_PAIR_TMP_HI          REG_EDX
+  #define RBM_PAIR_TMP_HI          RBM_EDX
+  #define PREDICT_PAIR_TMP         PREDICT_PAIR_EAXEDX
+  #define PREDICT_PAIR_TMP_LO      PREDICT_REG_EAX
+
+  // Used when calling the 64-bit Variable shift helper
+  #define REG_LNGARG_0             REG_PAIR_EAXEDX
+  #define RBM_LNGARG_0            (RBM_EAX|RBM_EDX)
+  #define PREDICT_PAIR_LNGARG_0    PREDICT_PAIR_EAXEDX
+
+  #define REG_LNGARG_LO             REG_EAX
+  #define RBM_LNGARG_LO             RBM_EAX
+  #define REG_LNGARG_HI             REG_EDX
+  #define RBM_LNGARG_HI             RBM_EDX
+  // register to hold shift amount
+  #define REG_SHIFT                REG_ECX
+  #define RBM_SHIFT                RBM_ECX
+  #define PREDICT_REG_SHIFT        PREDICT_REG_ECX
+  
+  // register to hold shift amount when shifting 64-bit values
+  #define REG_SHIFT_LNG            REG_ECX
+  #define RBM_SHIFT_LNG            RBM_ECX
+  #define PREDICT_REG_SHIFT_LNG    PREDICT_REG_ECX
+  
+  // This is a general scratch register that does not conflict with the argument registers
+  #define REG_SCRATCH              REG_EAX
+  #define RBM_SCRATCH              RBM_EAX
+
+  // Where is the exception object on entry to the handler block?
+  #define REG_EXCEPTION_OBJECT     REG_EAX
+  #define RBM_EXCEPTION_OBJECT     RBM_EAX
+
+  // Only used on ARM for GTF_CALL_M_VIRTSTUB_REL_INDIRECT
+  #define REG_JUMP_THUNK_PARAM     REG_EAX
+  #define RBM_JUMP_THUNK_PARAM     RBM_EAX
+
+#if NOGC_WRITE_BARRIERS
+  #define REG_WRITE_BARRIER        REG_EDX
+  #define RBM_WRITE_BARRIER        RBM_EDX
+
+  // We don't allow using ebp as a source register. Maybe we should only prevent this for ETW_EBP_FRAMED (but that is always set right now).
+  #define RBM_WRITE_BARRIER_SRC    (RBM_EAX|RBM_ECX|RBM_EBX|RBM_ESI|RBM_EDI)
+
+  #define RBM_CALLEE_TRASH_NOGC    RBM_EDX
+#endif // NOGC_WRITE_BARRIERS
+
+  // IL stub's secret parameter (CORJIT_FLG_PUBLISH_SECRET_PARAM)
+  #define REG_SECRET_STUB_PARAM    REG_EAX
+  #define RBM_SECRET_STUB_PARAM    RBM_EAX
+
+  // VSD extra parameter
+  #define REG_VIRTUAL_STUB_PARAM   REG_EAX
+  #define RBM_VIRTUAL_STUB_PARAM   RBM_EAX
+  #define PREDICT_REG_VIRTUAL_STUB_PARAM  PREDICT_REG_EAX
+
+  // Registers used by PInvoke frame setup
+  #define REG_PINVOKE_FRAME        REG_EDI      // EDI is p/invoke "Frame" pointer argument to CORINFO_HELP_INIT_PINVOKE_FRAME helper
+  #define RBM_PINVOKE_FRAME        RBM_EDI
+  #define REG_PINVOKE_TCB          REG_ESI      // ESI is set to Thread Control Block (TCB) on return from
+                                                // CORINFO_HELP_INIT_PINVOKE_FRAME helper
+  #define RBM_PINVOKE_TCB          RBM_ESI
+  #define REG_PINVOKE_SCRATCH      REG_EAX      // EAX is trashed by CORINFO_HELP_INIT_PINVOKE_FRAME helper
+  #define RBM_PINVOKE_SCRATCH      RBM_EAX
+
+#ifdef LEGACY_BACKEND
+  #define REG_SPILL_CHOICE         REG_EAX
+  #define RBM_SPILL_CHOICE         RBM_EAX
+#endif // LEGACY_BACKEND
+
+  // The following defines are useful for iterating a regNumber
+  #define REG_FIRST                REG_EAX
+  #define REG_INT_FIRST            REG_EAX
+  #define REG_INT_LAST             REG_EDI
+  #define REG_INT_COUNT            (REG_INT_LAST - REG_INT_FIRST + 1)
+  #define REG_NEXT(reg)           ((regNumber)((unsigned)(reg) + 1))
+  #define REG_PREV(reg)           ((regNumber)((unsigned)(reg) - 1))
+
+  // genCodeForCall() moves the target address of the tailcall into this register, before pushing it on the stack
+  #define REG_TAILCALL_ADDR        REG_EAX
+
+  // Which register are int and long values returned in ?
+  #define REG_INTRET               REG_EAX
+  #define RBM_INTRET               RBM_EAX
+  #define REG_LNGRET               REG_PAIR_EAXEDX
+  #define RBM_LNGRET              (RBM_EDX|RBM_EAX)
+  #define REG_LNGRET_LO            REG_EAX
+  #define RBM_LNGRET_LO            RBM_EAX
+  #define REG_LNGRET_HI            REG_EDX
+  #define RBM_LNGRET_HI            RBM_EDX
+
+  #define REG_FLOATRET             REG_NA
+  #define RBM_FLOATRET             RBM_NONE
+  #define RBM_DOUBLERET            RBM_NONE
+
+  // The registers trashed by the CORINFO_HELP_STOP_FOR_GC helper
+  #define RBM_STOP_FOR_GC_TRASH    RBM_CALLEE_TRASH
+
+  // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper. On x86, this helper has a custom calling
+  // convention that takes EDI as argument (but doesn't trash it), trashes EAX, and returns ESI.
+  #define RBM_INIT_PINVOKE_FRAME_TRASH  (RBM_PINVOKE_SCRATCH | RBM_PINVOKE_TCB)
+
+  #define REG_FPBASE               REG_EBP
+  #define RBM_FPBASE               RBM_EBP
+  #define STR_FPBASE               "ebp"
+  #define REG_SPBASE               REG_ESP
+  #define RBM_SPBASE               RBM_ESP
+  #define STR_SPBASE               "esp"
+
+  #define FIRST_ARG_STACK_OFFS    (2*REGSIZE_BYTES)   // Caller's saved EBP and return address
+
+  #define MAX_REG_ARG              2
+  #define MAX_FLOAT_REG_ARG        0
+  #define REG_ARG_FIRST            REG_ECX
+  #define REG_ARG_LAST             REG_EDX
+  #define INIT_ARG_STACK_SLOT      0                  // No outgoing reserved stack slots
+
+  #define REG_ARG_0                REG_ECX
+  #define REG_ARG_1                REG_EDX
+
+  SELECTANY const regNumber intArgRegs [] = {REG_ECX, REG_EDX};
+  SELECTANY const regMaskTP intArgMasks[] = {RBM_ECX, RBM_EDX};
+#if !FEATURE_STACK_FP_X87
+  SELECTANY const regNumber fltArgRegs [] = {REG_XMM0, REG_XMM1, REG_XMM2, REG_XMM3};
+  SELECTANY const regMaskTP fltArgMasks[] = {RBM_XMM0, RBM_XMM1, RBM_XMM2, RBM_XMM3};
+#endif // FEATURE_STACK_FP_X87
+
+  #define RBM_ARG_0                RBM_ECX
+  #define RBM_ARG_1                RBM_EDX
+
+  #define RBM_ARG_REGS            (RBM_ARG_0|RBM_ARG_1)
+
+  // What sort of reloc do we use for [disp32] address mode
+  #define IMAGE_REL_BASED_DISP32   IMAGE_REL_BASED_HIGHLOW
+
+  // What sort of reloc to we use for 'moffset' address mode (for 'mov eax, moffset' or 'mov moffset, eax')
+  #define IMAGE_REL_BASED_MOFFSET  IMAGE_REL_BASED_HIGHLOW
+
+  // Pointer-sized string move instructions
+  #define INS_movsp                INS_movsd
+  #define INS_r_movsp              INS_r_movsd
+  #define INS_stosp                INS_stosd
+  #define INS_r_stosp              INS_r_stosd
+
+#elif defined(_TARGET_AMD64_)
+  // TODO-AMD64-CQ: Fine tune the following xxBlk threshold values:
+ 
+  #define CPU_LOAD_STORE_ARCH      0
+  #define CPU_LONG_USES_REGPAIR    0
+  #define CPU_HAS_FP_SUPPORT       1
+  #define ROUND_FLOAT              0       // Do not round intermed float expression results
+  #define CPU_HAS_BYTE_REGS        0
+  #define CPU_USES_BLOCK_MOVE      1 
+
+  #define CPBLK_MOVS_LIMIT         16      // When generating code for CpBlk, this is the buffer size 
+                                           // threshold to stop generating rep movs and switch to the helper call.
+                                           // NOTE: Using rep movs is currently disabled since we found it has bad performance
+                                           //       on pre-Ivy Bridge hardware.
+                                           
+  #define CPBLK_UNROLL_LIMIT       64      // Upper bound to let the code generator to loop unroll CpBlk.
+  #define INITBLK_STOS_LIMIT       64      // When generating code for InitBlk, this is the buffer size 
+                                           // NOTE: Using rep stos is currently disabled since we found it has bad performance
+                                           //       on pre-Ivy Bridge hardware.
+                                           // threshold to stop generating rep movs and switch to the helper call.
+  #define INITBLK_UNROLL_LIMIT     128     // Upper bound to let the code generator to loop unroll InitBlk.
+  #define CPOBJ_NONGC_SLOTS_LIMIT  4       // For CpObj code generation, this is the the threshold of the number 
+                                           // of contiguous non-gc slots that trigger generating rep movsq instead of 
+                                           // sequences of movsq instructions
+
+                                           // The way we're currently disabling rep movs/stos is by setting a limit less than
+                                           // its unrolling counterparts.  When lower takes the decision on which one to make it
+                                           // always asks for the unrolling limit first so you can say the JIT 'favors' unrolling.
+                                           // Setting the limit to something lower than that makes lower to never consider it.
+
+
+#ifdef FEATURE_SIMD
+  #define ALIGN_SIMD_TYPES         1       // whether SIMD type locals are to be aligned
+#if defined(UNIX_AMD64_ABI) || !defined(FEATURE_AVX_SUPPORT)
+  #define FEATURE_PARTIAL_SIMD_CALLEE_SAVE 0 // Whether SIMD registers are partially saved at calls
+#else // !UNIX_AMD64_ABI && !FEATURE_AVX_SUPPORT
+  #define FEATURE_PARTIAL_SIMD_CALLEE_SAVE 1 // Whether SIMD registers are partially saved at calls
+#endif // !UNIX_AMD64_ABI
+#endif
+  #define FEATURE_WRITE_BARRIER    1       // Generate the WriteBarrier calls for GC (currently not the x86-style register-customized barriers)
+  #define FEATURE_FIXED_OUT_ARGS   1       // Preallocate the outgoing arg area in the prolog
+  #define FEATURE_STRUCTPROMOTE    1       // JIT Optimization to promote fields of structs into registers
+  #define FEATURE_MULTIREG_STRUCT_PROMOTE  0  // True when we want to promote fields of a multireg struct into registers
+  #define FEATURE_FASTTAILCALL     1       // Tail calls made as epilog+jmp
+  #define FEATURE_TAILCALL_OPT     1       // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls.
+  #define FEATURE_SET_FLAGS        0       // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set
+#ifdef    UNIX_AMD64_ABI
+  #define FEATURE_MULTIREG_ARGS_OR_RET  1  // Support for passing and/or returning single values in more than one register
+  #define FEATURE_MULTIREG_ARGS         1  // Support for passing a single argument in more than one register  
+  #define FEATURE_MULTIREG_RET          1  // Support for returning a single value in more than one register
+  #define FEATURE_STRUCT_CLASSIFIER     1  // Uses a classifier function to determine if structs are passed/returned in more than one register
+  #define MAX_PASS_MULTIREG_BYTES      32  // Maximum size of a struct that could be passed in more than one register (Max is two SIMD16s)
+  #define MAX_RET_MULTIREG_BYTES       32  // Maximum size of a struct that could be returned in more than one register  (Max is two SIMD16s)
+  #define MAX_ARG_REG_COUNT             2  // Maximum registers used to pass a single argument in multiple registers.
+  #define MAX_RET_REG_COUNT             2  // Maximum registers used to return a value.
+#else // !UNIX_AMD64_ABI
+  #define WINDOWS_AMD64_ABI                // Uses the Windows ABI for AMD64
+  #define FEATURE_MULTIREG_ARGS_OR_RET  0  // Support for passing and/or returning single values in more than one register
+  #define FEATURE_MULTIREG_ARGS         0  // Support for passing a single argument in more than one register  
+  #define FEATURE_MULTIREG_RET          0  // Support for returning a single value in more than one register
+  #define MAX_PASS_MULTIREG_BYTES       0  // No multireg arguments 
+  #define MAX_RET_MULTIREG_BYTES        0  // No multireg return values 
+  #define MAX_ARG_REG_COUNT             1  // Maximum registers used to pass a single argument (no arguments are passed using multiple registers)
+  #define MAX_RET_REG_COUNT             1  // Maximum registers used to return a value.
+#endif // !UNIX_AMD64_ABI
+
+#ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS
+  #define NOGC_WRITE_BARRIERS      0       // We DO-NOT have specialized WriteBarrier JIT Helpers that DO-NOT trash the RBM_CALLEE_TRASH registers
+#else
+  #define NOGC_WRITE_BARRIERS      0       // Do not modify this -- modify the definition above.  (If we're not using ASM barriers we definitely don't have NOGC barriers).
+#endif
+  #define USER_ARGS_COME_LAST      1
+  #define EMIT_TRACK_STACK_DEPTH   1
+  #define TARGET_POINTER_SIZE      8       // equal to sizeof(void*) and the managed pointer size in bytes for this target
+  #define FEATURE_EH               1       // To aid platform bring-up, eliminate exceptional EH clauses (catch, filter, filter-handler, fault) and directly execute 'finally' clauses.
+  #define FEATURE_EH_FUNCLETS      1
+  #define FEATURE_EH_CALLFINALLY_THUNKS 1  // Generate call-to-finally code in "thunks" in the enclosing EH region, protected by "cloned finally" clauses.
+  #define FEATURE_STACK_FP_X87     0 
+#ifdef    UNIX_AMD64_ABI
+  #define ETW_EBP_FRAMED           1       // if 1 we cannot use EBP as a scratch register and must create EBP based frames for most methods
+#else // !UNIX_AMD64_ABI
+  #define ETW_EBP_FRAMED           0       // if 1 we cannot use EBP as a scratch register and must create EBP based frames for most methods
+#endif // !UNIX_AMD64_ABI
+  #define FEATURE_FP_REGALLOC      0       // Enabled if RegAlloc is used to enregister Floating Point LclVars  
+  #define CSE_CONSTS               1       // Enable if we want to CSE constants
+
+  #define RBM_ALLFLOAT            (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM3 | RBM_XMM4 | RBM_XMM5 | RBM_XMM6 | RBM_XMM7 | RBM_XMM8 | RBM_XMM9 | RBM_XMM10 | RBM_XMM11 | RBM_XMM12 | RBM_XMM13 | RBM_XMM14 | RBM_XMM15)
+  #define RBM_ALLDOUBLE            RBM_ALLFLOAT
+  #define REG_FP_FIRST             REG_XMM0
+  #define REG_FP_LAST              REG_XMM15
+  #define FIRST_FP_ARGREG          REG_XMM0
+
+#ifdef    UNIX_AMD64_ABI
+  #define LAST_FP_ARGREG        REG_XMM7
+#else // !UNIX_AMD64_ABI
+  #define LAST_FP_ARGREG        REG_XMM3
+#endif // !UNIX_AMD64_ABI
+
+  #define REGNUM_BITS              6       // number of bits in a REG_*
+  #define TINY_REGNUM_BITS         6       // number used in a tiny instrdesc (same)
+  #define REGMASK_BITS             32      // number of bits in a REGNUM_MASK
+  #define REGSIZE_BYTES            8       // number of bytes in one register
+  #define XMM_REGSIZE_BYTES        16      // XMM register size in bytes
+  #define YMM_REGSIZE_BYTES        32      // YMM register size in bytes
+
+  #define CODE_ALIGN               1       // code alignment requirement
+  #define STACK_ALIGN              16      // stack alignment requirement
+  #define STACK_ALIGN_SHIFT        3       // Shift-right amount to convert stack size in bytes to size in pointer sized words
+  #define STACK_ALIGN_SHIFT_ALL    4       // Shift-right amount to convert stack size in bytes to size in STACK_ALIGN units
+
+#if ETW_EBP_FRAMED
+  #define RBM_ETW_FRAMED_EBP        RBM_NONE
+  #define RBM_ETW_FRAMED_EBP_LIST
+  #define REG_ETW_FRAMED_EBP_LIST
+  #define REG_ETW_FRAMED_EBP_COUNT  0
+#else // !ETW_EBP_FRAMED
+  #define RBM_ETW_FRAMED_EBP        RBM_EBP
+  #define RBM_ETW_FRAMED_EBP_LIST   RBM_EBP,
+  #define REG_ETW_FRAMED_EBP_LIST   REG_EBP,
+  #define REG_ETW_FRAMED_EBP_COUNT  1
+#endif // !ETW_EBP_FRAMED
+
+#ifdef UNIX_AMD64_ABI
+  #define MIN_ARG_AREA_FOR_CALL   0       // Minimum required outgoing argument space for a call.
+
+  #define RBM_INT_CALLEE_SAVED    (RBM_EBX|RBM_ETW_FRAMED_EBP|RBM_R12|RBM_R13|RBM_R14|RBM_R15)
+  #define RBM_INT_CALLEE_TRASH    (RBM_EAX|RBM_RDI|RBM_RSI|RBM_EDX|RBM_ECX|RBM_R8|RBM_R9|RBM_R10|RBM_R11)
+  #define RBM_FLT_CALLEE_SAVED    (0)
+  #define RBM_FLT_CALLEE_TRASH    (RBM_XMM0|RBM_XMM1|RBM_XMM2|RBM_XMM3|RBM_XMM4|RBM_XMM5|RBM_XMM6|RBM_XMM7| \
+                                   RBM_XMM8|RBM_XMM9|RBM_XMM10|RBM_XMM11|RBM_XMM12|RBM_XMM13|RBM_XMM14|RBM_XMM15)
+#else // !UNIX_AMD64_ABI
+#define MIN_ARG_AREA_FOR_CALL     (4 * REGSIZE_BYTES)       // Minimum required outgoing argument space for a call.
+
+  #define RBM_INT_CALLEE_SAVED    (RBM_EBX|RBM_ESI|RBM_EDI|RBM_ETW_FRAMED_EBP|RBM_R12|RBM_R13|RBM_R14|RBM_R15)
+  #define RBM_INT_CALLEE_TRASH    (RBM_EAX|RBM_ECX|RBM_EDX|RBM_R8|RBM_R9|RBM_R10|RBM_R11)
+  #define RBM_FLT_CALLEE_SAVED    (RBM_XMM6|RBM_XMM7|RBM_XMM8|RBM_XMM9|RBM_XMM10|RBM_XMM11|RBM_XMM12|RBM_XMM13|RBM_XMM14|RBM_XMM15)
+  #define RBM_FLT_CALLEE_TRASH    (RBM_XMM0|RBM_XMM1|RBM_XMM2|RBM_XMM3|RBM_XMM4|RBM_XMM5)
+#endif // !UNIX_AMD64_ABI
+  
+  #define REG_FLT_CALLEE_SAVED_FIRST   REG_XMM6
+  #define REG_FLT_CALLEE_SAVED_LAST    REG_XMM15
+
+  #define RBM_CALLEE_TRASH        (RBM_INT_CALLEE_TRASH | RBM_FLT_CALLEE_TRASH)
+  #define RBM_CALLEE_SAVED        (RBM_INT_CALLEE_SAVED | RBM_FLT_CALLEE_SAVED)      
+
+  #define RBM_CALLEE_TRASH_NOGC   RBM_CALLEE_TRASH
+
+  #define RBM_ALLINT              (RBM_INT_CALLEE_SAVED | RBM_INT_CALLEE_TRASH)
+
+#if 0
+#define REG_VAR_ORDER            REG_EAX,REG_EDX,REG_ECX,REG_ESI,REG_EDI,REG_EBX,REG_ETW_FRAMED_EBP_LIST \
+                                 REG_R8,REG_R9,REG_R10,REG_R11,REG_R14,REG_R15,REG_R12,REG_R13
+#else
+  // TEMPORARY ORDER TO AVOID CALLEE-SAVES
+  // TODO-CQ: Review this and set appropriately
+#ifdef UNIX_AMD64_ABI
+  #define REG_VAR_ORDER          REG_EAX,REG_EDI,REG_ESI, \
+                                 REG_EDX,REG_ECX,REG_R8,REG_R9, \
+                                 REG_R10,REG_R11,REG_EBX,REG_ETW_FRAMED_EBP_LIST \
+                                 REG_R14,REG_R15,REG_R12,REG_R13
+#else // !UNIX_AMD64_ABI
+  #define REG_VAR_ORDER          REG_EAX,REG_EDX,REG_ECX, \
+                                 REG_R8,REG_R9,REG_R10,REG_R11, \
+                                 REG_ESI,REG_EDI,REG_EBX,REG_ETW_FRAMED_EBP_LIST \
+                                 REG_R14,REG_R15,REG_R12,REG_R13
+#endif // !UNIX_AMD64_ABI
+#endif
+
+  #define REG_VAR_ORDER_FLT      REG_XMM0,REG_XMM1,REG_XMM2,REG_XMM3,REG_XMM4,REG_XMM5,REG_XMM6,REG_XMM7,REG_XMM8,REG_XMM9,REG_XMM10,REG_XMM11,REG_XMM12,REG_XMM13,REG_XMM14,REG_XMM15
+
+#ifdef UNIX_AMD64_ABI
+  #define REG_TMP_ORDER          REG_EAX,REG_EDI,REG_ESI,REG_EDX,REG_ECX,REG_EBX,REG_ETW_FRAMED_EBP_LIST \
+                                 REG_R8,REG_R9,REG_R10,REG_R11,REG_R14,REG_R15,REG_R12,REG_R13
+#else // !UNIX_AMD64_ABI
+  #define MAX_VAR_ORDER_SIZE     (14 + REG_ETW_FRAMED_EBP_COUNT)
+  #define REG_TMP_ORDER          REG_EAX,REG_EDX,REG_ECX,REG_EBX,REG_ESI,REG_EDI,REG_ETW_FRAMED_EBP_LIST \
+                                 REG_R8,REG_R9,REG_R10,REG_R11,REG_R14,REG_R15,REG_R12,REG_R13
+#endif // !UNIX_AMD64_ABI
+
+#ifdef UNIX_AMD64_ABI
+  #define REG_PREDICT_ORDER        REG_EAX,REG_EDI,REG_ESI,REG_EDX,REG_ECX,REG_EBX,REG_ETW_FRAMED_EBP_LIST \
+                                   REG_R8,REG_R9,REG_R10,REG_R11,REG_R14,REG_R15,REG_R12,REG_R13
+  #define CNT_CALLEE_SAVED         (5 + REG_ETW_FRAMED_EBP_COUNT)
+  #define CNT_CALLEE_TRASH         (9)
+  #define CNT_CALLEE_ENREG         (CNT_CALLEE_SAVED)
+
+  #define CNT_CALLEE_SAVED_FLOAT   (0)
+  #define CNT_CALLEE_TRASH_FLOAT   (16)
+
+  #define REG_CALLEE_SAVED_ORDER   REG_EBX,REG_ETW_FRAMED_EBP_LIST REG_R12,REG_R13,REG_R14,REG_R15
+  #define RBM_CALLEE_SAVED_ORDER   RBM_EBX,RBM_ETW_FRAMED_EBP_LIST RBM_R12,RBM_R13,RBM_R14,RBM_R15
+#else // !UNIX_AMD64_ABI
+  #define REG_TMP_ORDER_COUNT      (14 + REG_ETW_FRAMED_EBP_COUNT)
+  #define REG_PREDICT_ORDER        REG_EAX,REG_EDX,REG_ECX,REG_EBX,REG_ESI,REG_EDI,REG_ETW_FRAMED_EBP_LIST \
+                                   REG_R8,REG_R9,REG_R10,REG_R11,REG_R14,REG_R15,REG_R12,REG_R13
+  #define CNT_CALLEE_SAVED         (7 + REG_ETW_FRAMED_EBP_COUNT)
+  #define CNT_CALLEE_TRASH         (7)
+  #define CNT_CALLEE_ENREG         (CNT_CALLEE_SAVED)
+
+  #define CNT_CALLEE_SAVED_FLOAT   (10)
+  #define CNT_CALLEE_TRASH_FLOAT   (6)
+
+  #define REG_CALLEE_SAVED_ORDER   REG_EBX,REG_ESI,REG_EDI,REG_ETW_FRAMED_EBP_LIST REG_R12,REG_R13,REG_R14,REG_R15
+  #define RBM_CALLEE_SAVED_ORDER   RBM_EBX,RBM_ESI,RBM_EDI,RBM_ETW_FRAMED_EBP_LIST RBM_R12,RBM_R13,RBM_R14,RBM_R15
+#endif // !UNIX_AMD64_ABI
+
+  #define CALLEE_SAVED_REG_MAXSZ   (CNT_CALLEE_SAVED*REGSIZE_BYTES)
+  #define CALLEE_SAVED_FLOAT_MAXSZ (CNT_CALLEE_SAVED_FLOAT*16)
+
+  // We reuse the ESP register as a illegal value in the register predictor
+  #define RBM_ILLEGAL              RBM_ESP
+  // We reuse the ESP register as a flag for last use handling in the register predictor
+  #define RBM_LASTUSE              RBM_ESP
+  // We're using the encoding for ESP to indicate a half-long on the frame
+  #define REG_L_STK                REG_ESP
+
+  //  This is the first register in REG_TMP_ORDER
+  #define REG_TMP_0                REG_EAX
+  #define RBM_TMP_0                RBM_EAX
+
+  //  This is the second register in REG_TMP_ORDER
+#ifdef UNIX_AMD64_ABI
+  #define REG_TMP_1                REG_EDI
+  #define RBM_TMP_1                RBM_EDI
+#else // !UNIX_AMD64_ABI
+  #define REG_TMP_1                REG_EDX
+  #define RBM_TMP_1                RBM_EDX
+#endif // !UNIX_AMD64_ABI
+  #define REG_PAIR_TMP             REG_PAIR_EAXEDX
+  #define RBM_PAIR_TMP             (RBM_EAX|RBM_EDX)
+  #define REG_PAIR_TMP_LO          REG_EAX
+  #define RBM_PAIR_TMP_LO          RBM_EAX
+  #define REG_PAIR_TMP_HI          REG_EDX
+  #define RBM_PAIR_TMP_HI          RBM_EDX
+  #define PREDICT_PAIR_TMP         PREDICT_PAIR_RAXRDX
+  #define PREDICT_PAIR_TMP_LO      PREDICT_REG_EAX
+  
+  // register to hold shift amount
+  #define REG_SHIFT                REG_ECX
+  #define RBM_SHIFT                RBM_ECX
+  #define PREDICT_REG_SHIFT        PREDICT_REG_ECX
+  
+  // This is a general scratch register that does not conflict with the argument registers
+  #define REG_SCRATCH              REG_EAX
+  #define RBM_SCRATCH              RBM_EAX
+
+// Where is the exception object on entry to the handler block?
+#ifdef UNIX_AMD64_ABI
+  #define REG_EXCEPTION_OBJECT     REG_ESI
+  #define RBM_EXCEPTION_OBJECT     RBM_ESI
+#else // !UNIX_AMD64_ABI
+  #define REG_EXCEPTION_OBJECT     REG_EDX
+  #define RBM_EXCEPTION_OBJECT     RBM_EDX
+#endif // !UNIX_AMD64_ABI
+
+  #define REG_JUMP_THUNK_PARAM     REG_EAX
+  #define RBM_JUMP_THUNK_PARAM     RBM_EAX
+
+#if NOGC_WRITE_BARRIERS
+  #define REG_WRITE_BARRIER        REG_EDX
+  #define RBM_WRITE_BARRIER        RBM_EDX
+#endif
+
+  // Register to be used for emitting helper calls whose call target is an indir of an
+  // absolute memory address in case of Rel32 overflow i.e. a data address could not be
+  // encoded as PC-relative 32-bit offset.
+  //
+  // Notes:
+  // 1) that RAX is callee trash register that is not used for passing parameter and
+  //    also results in smaller instruction encoding.  
+  // 2) Profiler Leave callback requires the return value to be preserved
+  //    in some form.  We can use custom calling convention for Leave callback.
+  //    For e.g return value could be preserved in rcx so that it is available for
+  //    profiler.
+  #define REG_DEFAULT_HELPER_CALL_TARGET    REG_RAX
+
+  // GenericPInvokeCalliHelper VASigCookie Parameter 
+  #define REG_PINVOKE_COOKIE_PARAM          REG_R11
+  #define RBM_PINVOKE_COOKIE_PARAM          RBM_R11
+  #define PREDICT_REG_PINVOKE_COOKIE_PARAM  PREDICT_REG_R11
+
+  // GenericPInvokeCalliHelper unmanaged target Parameter 
+  #define REG_PINVOKE_TARGET_PARAM          REG_R10
+  #define RBM_PINVOKE_TARGET_PARAM          RBM_R10
+  #define PREDICT_REG_PINVOKE_TARGET_PARAM  PREDICT_REG_R10
+
+  // IL stub's secret MethodDesc parameter (CORJIT_FLG_PUBLISH_SECRET_PARAM)
+  #define REG_SECRET_STUB_PARAM    REG_R10
+  #define RBM_SECRET_STUB_PARAM    RBM_R10
+
+  // VSD extra parameter (slot address)
+  #define REG_VIRTUAL_STUB_PARAM   REG_R11
+  #define RBM_VIRTUAL_STUB_PARAM   RBM_R11
+  #define PREDICT_REG_VIRTUAL_STUB_PARAM  PREDICT_REG_R11
+
+  // Registers used by PInvoke frame setup
+  #define REG_PINVOKE_FRAME        REG_EDI
+  #define RBM_PINVOKE_FRAME        RBM_EDI
+  #define REG_PINVOKE_TCB          REG_EAX
+  #define RBM_PINVOKE_TCB          RBM_EAX
+  #define REG_PINVOKE_SCRATCH      REG_EAX
+  #define RBM_PINVOKE_SCRATCH      RBM_EAX
+
+  // The following defines are useful for iterating a regNumber
+  #define REG_FIRST                REG_EAX
+  #define REG_INT_FIRST            REG_EAX
+  #define REG_INT_LAST             REG_R15
+  #define REG_INT_COUNT            (REG_INT_LAST - REG_INT_FIRST + 1)
+  #define REG_NEXT(reg)           ((regNumber)((unsigned)(reg) + 1))
+  #define REG_PREV(reg)           ((regNumber)((unsigned)(reg) - 1))
+
+  // genCodeForCall() moves the target address of the tailcall into this register, before pushing it on the stack
+  #define REG_TAILCALL_ADDR        REG_RDX
+
+  // Which register are int and long values returned in ?
+  #define REG_INTRET               REG_EAX
+  #define RBM_INTRET               RBM_EAX
+
+  #define REG_LNGRET               REG_EAX
+  #define RBM_LNGRET               RBM_EAX
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+    #define REG_INTRET_1           REG_RDX
+    #define RBM_INTRET_1           RBM_RDX
+
+    #define REG_LNGRET_1           REG_RDX
+    #define RBM_LNGRET_1           RBM_RDX
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+
+  #define REG_FLOATRET             REG_XMM0
+  #define RBM_FLOATRET             RBM_XMM0
+  #define REG_DOUBLERET            REG_XMM0
+  #define RBM_DOUBLERET            RBM_XMM0
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+#define REG_FLOATRET_1             REG_XMM1
+#define RBM_FLOATRET_1             RBM_XMM1
+
+#define REG_DOUBLERET_1            REG_XMM1
+#define RBM_DOUBLERET_1            RBM_XMM1
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+
+  #define REG_FPBASE               REG_EBP
+  #define RBM_FPBASE               RBM_EBP
+  #define STR_FPBASE               "rbp"
+  #define REG_SPBASE               REG_ESP
+  #define RBM_SPBASE               RBM_ESP
+  #define STR_SPBASE               "rsp"
+
+  #define FIRST_ARG_STACK_OFFS     (REGSIZE_BYTES)   // return address
+
+#ifdef UNIX_AMD64_ABI
+  #define MAX_REG_ARG              6
+  #define MAX_FLOAT_REG_ARG        8
+  #define REG_ARG_FIRST            REG_EDI
+  #define REG_ARG_LAST             REG_R9
+  #define INIT_ARG_STACK_SLOT      0                  // No outgoing reserved stack slots
+
+  #define REG_ARG_0                REG_EDI
+  #define REG_ARG_1                REG_ESI
+  #define REG_ARG_2                REG_EDX
+  #define REG_ARG_3                REG_ECX
+  #define REG_ARG_4                REG_R8
+  #define REG_ARG_5                REG_R9
+
+  SELECTANY const regNumber intArgRegs[] = { REG_EDI, REG_ESI, REG_EDX, REG_ECX, REG_R8, REG_R9 };
+  SELECTANY const regMaskTP intArgMasks[] = { REG_EDI, REG_ESI, REG_EDX, REG_ECX, REG_R8, REG_R9 };
+  SELECTANY const regNumber fltArgRegs[] = { REG_XMM0, REG_XMM1, REG_XMM2, REG_XMM3, REG_XMM4, REG_XMM5, REG_XMM6, REG_XMM7 };
+  SELECTANY const regMaskTP fltArgMasks[] = { REG_XMM0, REG_XMM1, REG_XMM2, REG_XMM3, REG_XMM4, REG_XMM5, REG_XMM6, REG_XMM7 };
+
+  #define RBM_ARG_0                RBM_RDI
+  #define RBM_ARG_1                RBM_RSI
+  #define RBM_ARG_2                RBM_EDX
+  #define RBM_ARG_3                RBM_ECX
+  #define RBM_ARG_4                RBM_R8
+  #define RBM_ARG_5                RBM_R9
+#else // !UNIX_AMD64_ABI
+  #define MAX_REG_ARG              4
+  #define MAX_FLOAT_REG_ARG        4
+  #define REG_ARG_FIRST            REG_ECX
+  #define REG_ARG_LAST             REG_R9
+  #define INIT_ARG_STACK_SLOT      4                  // 4 outgoing reserved stack slots
+
+  #define REG_ARG_0                REG_ECX
+  #define REG_ARG_1                REG_EDX
+  #define REG_ARG_2                REG_R8
+  #define REG_ARG_3                REG_R9
+
+  SELECTANY const regNumber intArgRegs[] = { REG_ECX, REG_EDX, REG_R8, REG_R9 };
+  SELECTANY const regMaskTP intArgMasks[] = { RBM_ECX, RBM_EDX, RBM_R8, RBM_R9 };
+  SELECTANY const regNumber fltArgRegs[] = { REG_XMM0, REG_XMM1, REG_XMM2, REG_XMM3 };
+  SELECTANY const regMaskTP fltArgMasks[] = { RBM_XMM0, RBM_XMM1, RBM_XMM2, RBM_XMM3 };
+
+  #define RBM_ARG_0                RBM_ECX
+  #define RBM_ARG_1                RBM_EDX
+  #define RBM_ARG_2                RBM_R8
+  #define RBM_ARG_3                RBM_R9
+#endif // !UNIX_AMD64_ABI
+
+  #define REG_FLTARG_0             REG_XMM0
+  #define REG_FLTARG_1             REG_XMM1
+  #define REG_FLTARG_2             REG_XMM2
+  #define REG_FLTARG_3             REG_XMM3
+
+  #define RBM_FLTARG_0             RBM_XMM0
+  #define RBM_FLTARG_1             RBM_XMM1
+  #define RBM_FLTARG_2             RBM_XMM2
+  #define RBM_FLTARG_3             RBM_XMM3
+
+#ifdef UNIX_AMD64_ABI
+  #define REG_FLTARG_4             REG_XMM4
+  #define REG_FLTARG_5             REG_XMM5
+  #define REG_FLTARG_6             REG_XMM6
+  #define REG_FLTARG_7             REG_XMM7
+
+  #define RBM_FLTARG_4             RBM_XMM4
+  #define RBM_FLTARG_5             RBM_XMM5
+  #define RBM_FLTARG_6             RBM_XMM6
+  #define RBM_FLTARG_7             RBM_XMM7
+
+  #define RBM_ARG_REGS            (RBM_ARG_0|RBM_ARG_1|RBM_ARG_2|RBM_ARG_3|RBM_ARG_4|RBM_ARG_5)
+  #define RBM_FLTARG_REGS         (RBM_FLTARG_0|RBM_FLTARG_1|RBM_FLTARG_2|RBM_FLTARG_3|RBM_FLTARG_4|RBM_FLTARG_5|RBM_FLTARG_6|RBM_FLTARG_7)
+#else // !UNIX_AMD64_ABI
+  #define RBM_ARG_REGS            (RBM_ARG_0|RBM_ARG_1|RBM_ARG_2|RBM_ARG_3)
+  #define RBM_FLTARG_REGS         (RBM_FLTARG_0|RBM_FLTARG_1|RBM_FLTARG_2|RBM_FLTARG_3)
+#endif // !UNIX_AMD64_ABI
+
+  // The registers trashed by profiler enter/leave/tailcall hook
+  // See vm\amd64\amshelpers.asm for more details.
+  #define RBM_PROFILER_ENTER_TRASH  RBM_CALLEE_TRASH
+  #define RBM_PROFILER_LEAVE_TRASH  (RBM_CALLEE_TRASH & ~(RBM_FLOATRET | RBM_INTRET))
+
+  // The registers trashed by the CORINFO_HELP_STOP_FOR_GC helper.
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+  // See vm\amd64\unixasmhelpers.S for more details.
+  //
+  // On Unix a struct of size >=9 and <=16 bytes in size is returned in two return registers.
+  // The return registers could be any two from the set { RAX, RDX, XMM0, XMM1 }.
+  // STOP_FOR_GC helper preserves all the 4 possible return registers.
+  #define RBM_STOP_FOR_GC_TRASH     (RBM_CALLEE_TRASH & ~(RBM_FLOATRET | RBM_INTRET | RBM_FLOATRET_1 | RBM_INTRET_1))
+#else
+  // See vm\amd64\asmhelpers.asm for more details.
+  #define RBM_STOP_FOR_GC_TRASH     (RBM_CALLEE_TRASH & ~(RBM_FLOATRET | RBM_INTRET))
+#endif
+
+  // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper.
+  #define RBM_INIT_PINVOKE_FRAME_TRASH  RBM_CALLEE_TRASH
+
+  // What sort of reloc do we use for [disp32] address mode
+  #define IMAGE_REL_BASED_DISP32   IMAGE_REL_BASED_REL32
+
+  // What sort of reloc to we use for 'moffset' address mode (for 'mov eax, moffset' or 'mov moffset, eax')
+  #define IMAGE_REL_BASED_MOFFSET  IMAGE_REL_BASED_DIR64
+
+  // Pointer-sized string move instructions
+  #define INS_movsp                INS_movsq
+  #define INS_r_movsp              INS_r_movsq
+  #define INS_stosp                INS_stosq
+  #define INS_r_stosp              INS_r_stosq
+
+#elif defined(_TARGET_ARM_)
+
+  // TODO-ARM-CQ: Use shift for division by power of 2
+  // TODO-ARM-CQ: Check for sdiv/udiv at runtime and generate it if available
+  #define USE_HELPERS_FOR_INT_DIV  1       // BeagleBoard (ARMv7A) doesn't support SDIV/UDIV
+  #define CPU_LOAD_STORE_ARCH      1
+  #define CPU_LONG_USES_REGPAIR    1
+  #define CPU_HAS_FP_SUPPORT       1
+  #define ROUND_FLOAT              0       // Do not round intermed float expression results
+  #define CPU_HAS_BYTE_REGS        0
+  #define CPU_USES_BLOCK_MOVE      0
+  #define FEATURE_WRITE_BARRIER    1       // Generate the proper WriteBarrier calls for GC    
+  #define FEATURE_FIXED_OUT_ARGS   1       // Preallocate the outgoing arg area in the prolog
+  #define FEATURE_STRUCTPROMOTE    1       // JIT Optimization to promote fields of structs into registers
+  #define FEATURE_MULTIREG_STRUCT_PROMOTE  0  // True when we want to promote fields of a multireg struct into registers
+  #define FEATURE_FASTTAILCALL     0       // Tail calls made as epilog+jmp
+  #define FEATURE_TAILCALL_OPT     0       // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls.
+  #define FEATURE_SET_FLAGS        1       // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set
+  #define FEATURE_MULTIREG_ARGS_OR_RET  1  // Support for passing and/or returning single values in more than one register (including HFA support)
+  #define FEATURE_MULTIREG_ARGS         1  // Support for passing a single argument in more than one register (including passing HFAs)
+  #define FEATURE_MULTIREG_RET          1  // Support for returning a single value in more than one register (including HFA returns)
+  #define FEATURE_STRUCT_CLASSIFIER     0  // Uses a classifier function to determine is structs are passed/returned in more than one register
+  #define MAX_PASS_MULTIREG_BYTES      32  // Maximum size of a struct that could be passed in more than one register (Max is an HFA of 4 doubles)
+  #define MAX_RET_MULTIREG_BYTES       32  // Maximum size of a struct that could be returned in more than one register (Max is an HFA of 4 doubles)
+  #define MAX_ARG_REG_COUNT             4  // Maximum registers used to pass a single argument in multiple registers. (max is 4 floats or doubles using an HFA)
+  #define MAX_RET_REG_COUNT             4  // Maximum registers used to return a value.
+
+#ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS
+  #define NOGC_WRITE_BARRIERS      0       // We DO-NOT have specialized WriteBarrier JIT Helpers that DO-NOT trash the RBM_CALLEE_TRASH registers
+#else
+  #define NOGC_WRITE_BARRIERS      0       // Do not modify this -- modify the definition above.  (If we're not using ASM barriers we definitely don't have NOGC barriers).
+#endif
+  #define USER_ARGS_COME_LAST      1
+  #define EMIT_TRACK_STACK_DEPTH   1       // This is something of a workaround.  For both ARM and AMD64, the frame size is fixed, so we don't really
+                                           // need to track stack depth, but this is currently necessary to get GC information reported at call sites.
+  #define TARGET_POINTER_SIZE      4       // equal to sizeof(void*) and the managed pointer size in bytes for this target
+  #define FEATURE_EH               1       // To aid platform bring-up, eliminate exceptional EH clauses (catch, filter, filter-handler, fault) and directly execute 'finally' clauses.
+  #define FEATURE_EH_FUNCLETS      1
+  #define FEATURE_EH_CALLFINALLY_THUNKS 0  // Generate call-to-finally code in "thunks" in the enclosing EH region, protected by "cloned finally" clauses.
+  #define FEATURE_STACK_FP_X87     0 
+  #define ETW_EBP_FRAMED           1       // if 1 we cannot use REG_FP as a scratch register and must setup the frame pointer for most methods
+  #define FEATURE_FP_REGALLOC      1       // Enabled if RegAlloc is used to enregister Floating Point LclVars  
+  #define CSE_CONSTS               1       // Enable if we want to CSE constants 
+
+  #define REG_FP_FIRST             REG_F0
+  #define REG_FP_LAST              REG_F31
+  #define FIRST_FP_ARGREG          REG_F0
+  #define LAST_FP_ARGREG           REG_F15
+
+  #define REGNUM_BITS              6       // number of bits in a REG_*
+  #define TINY_REGNUM_BITS         4       // number of bits we will use for a tiny instr desc (may not use float)
+  #define REGMASK_BITS             64      // number of bits in a REGNUM_MASK
+  #define REGSIZE_BYTES            4       // number of bytes in one register
+  #define MIN_ARG_AREA_FOR_CALL    0       // Minimum required outgoing argument space for a call.
+
+  #define CODE_ALIGN               2       // code alignment requirement
+  #define STACK_ALIGN              8       // stack alignment requirement
+  #define STACK_ALIGN_SHIFT        2       // Shift-right amount to convert stack size in bytes to size in DWORD_PTRs
+
+  #define RBM_INT_CALLEE_SAVED    (RBM_R4|RBM_R5|RBM_R6|RBM_R7|RBM_R8|RBM_R9|RBM_R10)
+  #define RBM_INT_CALLEE_TRASH    (RBM_R0|RBM_R1|RBM_R2|RBM_R3|RBM_R12|RBM_LR)
+  #define RBM_FLT_CALLEE_SAVED    (RBM_F16|RBM_F17|RBM_F18|RBM_F19|RBM_F20|RBM_F21|RBM_F22|RBM_F23|RBM_F24|RBM_F25|RBM_F26|RBM_F27|RBM_F28|RBM_F29|RBM_F30|RBM_F31)
+  #define RBM_FLT_CALLEE_TRASH    (RBM_F0|RBM_F1|RBM_F2|RBM_F3|RBM_F4|RBM_F5|RBM_F6|RBM_F7|RBM_F8|RBM_F9|RBM_F10|RBM_F11|RBM_F12|RBM_F13|RBM_F14|RBM_F15)
+
+  #define RBM_CALLEE_SAVED        (RBM_INT_CALLEE_SAVED | RBM_FLT_CALLEE_SAVED)
+  #define RBM_CALLEE_TRASH        (RBM_INT_CALLEE_TRASH | RBM_FLT_CALLEE_TRASH)
+  #define RBM_CALLEE_TRASH_NOGC   (RBM_R2|RBM_R3|RBM_LR)
+
+  #define RBM_ALLINT              (RBM_INT_CALLEE_SAVED | RBM_INT_CALLEE_TRASH)
+  #define RBM_ALLFLOAT            (RBM_FLT_CALLEE_SAVED | RBM_FLT_CALLEE_TRASH)
+  #define RBM_ALLDOUBLE           (RBM_F0|RBM_F2|RBM_F4|RBM_F6|RBM_F8|RBM_F10|RBM_F12|RBM_F14|RBM_F16|RBM_F18|RBM_F20|RBM_F22|RBM_F24|RBM_F26|RBM_F28|RBM_F30)
+
+  #define REG_VAR_ORDER            REG_R3,REG_R2,REG_R1,REG_R0,REG_R4,REG_LR,REG_R12,\
+                                   REG_R5,REG_R6,REG_R7,REG_R8,REG_R9,REG_R10
+
+  #define REG_VAR_ORDER_FLT        REG_F8,  REG_F9,  REG_F10, REG_F11, \
+                                   REG_F12, REG_F13, REG_F14, REG_F15, \
+                                   REG_F6,  REG_F7,  REG_F4,  REG_F5,  \
+                                   REG_F2,  REG_F3,  REG_F0,  REG_F1,  \
+                                   REG_F16, REG_F17, REG_F18, REG_F19, \
+                                   REG_F20, REG_F21, REG_F22, REG_F23, \
+                                   REG_F24, REG_F25, REG_F26, REG_F27, \
+                                   REG_F28, REG_F29, REG_F30, REG_F31,
+
+  #define MAX_VAR_ORDER_SIZE       32
+
+  #define REG_TMP_ORDER            REG_R3,REG_R2,REG_R1,REG_R0, REG_R4,REG_R5,REG_R6,REG_R7,\
+                                   REG_LR,REG_R12,              REG_R8,REG_R9,REG_R10
+  #define REG_TMP_ORDER_COUNT      13
+
+  #define REG_FLT_TMP_ORDER        REG_F14, REG_F15, REG_F12, REG_F13, \
+                                   REG_F10, REG_F11, REG_F8,  REG_F9,  \
+                                   REG_F6,  REG_F7,  REG_F4,  REG_F5,  \
+                                   REG_F2,  REG_F3,  REG_F0,  REG_F1,  \
+                                   REG_F16, REG_F17, REG_F18, REG_F19, \
+                                   REG_F20, REG_F21, REG_F22, REG_F23, \
+                                   REG_F24, REG_F25, REG_F26, REG_F27, \
+                                   REG_F28, REG_F29, REG_F30, REG_F31,
+
+  #define REG_FLT_TMP_ORDER_COUNT  32
+
+  #define REG_PREDICT_ORDER        REG_LR,REG_R12,REG_R3,REG_R2,REG_R1,REG_R0, \
+                                   REG_R7,REG_R6,REG_R5,REG_R4,REG_R8,REG_R9,REG_R10
+
+  #define RBM_LOW_REGS            (RBM_R0|RBM_R1|RBM_R2|RBM_R3|RBM_R4|RBM_R5|RBM_R6|RBM_R7)
+  #define RBM_HIGH_REGS           (RBM_R8|RBM_R9|RBM_R10|RBM_R11|RBM_R12|RBM_SP|RBM_LR|RBM_PC)
+
+  #define REG_CALLEE_SAVED_ORDER   REG_R4,REG_R5,REG_R6,REG_R7,REG_R8,REG_R9,REG_R10,REG_R11
+  #define RBM_CALLEE_SAVED_ORDER   RBM_R4,RBM_R5,RBM_R6,RBM_R7,RBM_R8,RBM_R9,RBM_R10,RBM_R11
+
+  #define CNT_CALLEE_SAVED        (8)
+  #define CNT_CALLEE_TRASH        (6)
+  #define CNT_CALLEE_ENREG        (CNT_CALLEE_SAVED-1)
+
+  #define CNT_CALLEE_SAVED_FLOAT  (16)
+  #define CNT_CALLEE_TRASH_FLOAT  (16)
+
+  #define CALLEE_SAVED_REG_MAXSZ    (CNT_CALLEE_SAVED*REGSIZE_BYTES)
+  #define CALLEE_SAVED_FLOAT_MAXSZ  (CNT_CALLEE_SAVED_FLOAT*sizeof(float))
+
+  // We reuse the ESP register as a illegal value in the register predictor
+  #define RBM_ILLEGAL              RBM_SP
+  // We reuse the ESP register as a flag for last use handling in the register predictor
+  #define RBM_LASTUSE              RBM_SP
+  // We're using the encoding for ESP to indicate a half-long on the frame
+  #define REG_L_STK                REG_SP
+
+  //  This is the first register in REG_TMP_ORDER
+  #define REG_TMP_0                REG_R3
+  #define RBM_TMP_0                RBM_R3
+
+  //  This is the second register in REG_TMP_ORDER
+  #define REG_TMP_1                REG_R2
+  #define RBM_TMP_1                RBM_R2
+
+  //  This is the first register pair in REG_TMP_ORDER
+  #define REG_PAIR_TMP             REG_PAIR_R2R3
+  #define REG_PAIR_TMP_REVERSE     REG_PAIR_R3R2
+  #define RBM_PAIR_TMP             (RBM_R2|RBM_R3)
+  #define REG_PAIR_TMP_LO          REG_R2
+  #define RBM_PAIR_TMP_LO          RBM_R2
+  #define REG_PAIR_TMP_HI          REG_R3
+  #define RBM_PAIR_TMP_HI          RBM_R3
+  #define PREDICT_PAIR_TMP         PREDICT_PAIR_R2R3
+  #define PREDICT_PAIR_TMP_LO      PREDICT_REG_R2
+
+  // Used when calling the 64-bit Variable shift helper
+  #define REG_LNGARG_0             REG_PAIR_R0R1
+  #define RBM_LNGARG_0            (RBM_R0|RBM_R1)
+  #define PREDICT_PAIR_LNGARG_0    PREDICT_PAIR_R0R1
+  
+  // register to hold shift amount; no special register is required on the ARM
+  #define REG_SHIFT                REG_NA
+  #define RBM_SHIFT                RBM_ALLINT
+  #define PREDICT_REG_SHIFT        PREDICT_REG
+
+  // register to hold shift amount when shifting 64-bit values (this uses a helper call)
+  #define REG_SHIFT_LNG            REG_R2            // REG_ARG_2
+  #define RBM_SHIFT_LNG            RBM_R2            // RBM_ARG_2
+  #define PREDICT_REG_SHIFT_LNG    PREDICT_REG_R2
+ 
+  
+  // This is a general scratch register that does not conflict with the argument registers
+  #define REG_SCRATCH              REG_LR
+  #define RBM_SCRATCH              RBM_LR
+
+  // This is a general register that can be optionally reserved for other purposes during codegen
+  #define REG_OPT_RSVD             REG_R10
+  #define RBM_OPT_RSVD             RBM_R10
+
+  // We reserve R9 to store SP on entry for stack unwinding when localloc is used
+  #define REG_SAVED_LOCALLOC_SP    REG_R9
+  #define RBM_SAVED_LOCALLOC_SP    RBM_R9
+
+  // Where is the exception object on entry to the handler block?
+  #define REG_EXCEPTION_OBJECT     REG_R0
+  #define RBM_EXCEPTION_OBJECT     RBM_R0
+
+  #define REG_JUMP_THUNK_PARAM     REG_R12
+  #define RBM_JUMP_THUNK_PARAM     RBM_R12
+
+#if NOGC_WRITE_BARRIERS
+  #define REG_WRITE_BARRIER        REG_R1
+  #define RBM_WRITE_BARRIER        RBM_R1
+#endif
+
+  // GenericPInvokeCalliHelper VASigCookie Parameter 
+  #define REG_PINVOKE_COOKIE_PARAM          REG_R4
+  #define RBM_PINVOKE_COOKIE_PARAM          RBM_R4
+  #define PREDICT_REG_PINVOKE_COOKIE_PARAM  PREDICT_REG_R4
+
+  // GenericPInvokeCalliHelper unmanaged target Parameter 
+  #define REG_PINVOKE_TARGET_PARAM          REG_R12
+  #define RBM_PINVOKE_TARGET_PARAM          RBM_R12
+  #define PREDICT_REG_PINVOKE_TARGET_PARAM  PREDICT_REG_R12
+
+  // IL stub's secret MethodDesc parameter (CORJIT_FLG_PUBLISH_SECRET_PARAM)
+  #define REG_SECRET_STUB_PARAM     REG_R12
+  #define RBM_SECRET_STUB_PARAM     RBM_R12
+
+  // VSD extra parameter (slot address)
+  #define REG_VIRTUAL_STUB_PARAM          REG_R4
+  #define RBM_VIRTUAL_STUB_PARAM          RBM_R4
+  #define PREDICT_REG_VIRTUAL_STUB_PARAM  PREDICT_REG_R4
+
+  // Registers used by PInvoke frame setup
+  #define REG_PINVOKE_FRAME        REG_R4
+  #define RBM_PINVOKE_FRAME        RBM_R4
+  #define REG_PINVOKE_TCB          REG_R5
+  #define RBM_PINVOKE_TCB          RBM_R5
+  #define REG_PINVOKE_SCRATCH      REG_R6
+  #define RBM_PINVOKE_SCRATCH      RBM_R6
+
+#ifdef LEGACY_BACKEND
+  #define REG_SPILL_CHOICE         REG_LR
+  #define RBM_SPILL_CHOICE         RBM_LR
+  #define REG_SPILL_CHOICE_FLT     REG_F14
+  #define RBM_SPILL_CHOICE_FLT    (RBM_F14|RBM_F15)
+#endif // LEGACY_BACKEND
+
+  // The following defines are useful for iterating a regNumber
+  #define REG_FIRST                REG_R0
+  #define REG_INT_FIRST            REG_R0
+  #define REG_INT_LAST             REG_LR
+  #define REG_INT_COUNT            (REG_INT_LAST - REG_INT_FIRST + 1)
+  #define REG_NEXT(reg)           ((regNumber)((unsigned)(reg) + 1))
+  #define REG_PREV(reg)           ((regNumber)((unsigned)(reg) - 1))
+
+  // genCodeForCall() moves the target address of the tailcall into this register, before pushing it on the stack
+  #define REG_TAILCALL_ADDR        REG_R1
+
+  // The following registers are used in emitting Enter/Leave/Tailcall profiler callbacks
+  #define REG_PROFILER_ENTER_ARG           REG_R0
+  #define RBM_PROFILER_ENTER_ARG           RBM_R0
+  #define REG_PROFILER_RET_SCRATCH         REG_R2
+  #define RBM_PROFILER_RET_SCRATCH         RBM_R2
+  #define RBM_PROFILER_RET_USED            (RBM_R0 | RBM_R1 | RBM_R2)
+  #define REG_PROFILER_JMP_ARG             REG_R0
+  #define RBM_PROFILER_JMP_USED            RBM_R0
+  #define RBM_PROFILER_TAIL_USED           (RBM_R0 | RBM_R12 | RBM_LR)
+  
+
+  // Which register are int and long values returned in ?
+  #define REG_INTRET               REG_R0
+  #define RBM_INTRET               RBM_R0
+  #define REG_LNGRET               REG_PAIR_R0R1
+  #define RBM_LNGRET              (RBM_R1|RBM_R0)
+
+  #define REG_FLOATRET             REG_F0
+  #define RBM_FLOATRET             RBM_F0
+  #define RBM_DOUBLERET           (RBM_F0|RBM_F1)
+
+  // The registers trashed by the CORINFO_HELP_STOP_FOR_GC helper
+  // See vm\arm\amshelpers.asm for more details.
+  #define RBM_STOP_FOR_GC_TRASH     (RBM_CALLEE_TRASH & ~(RBM_FLOATRET | RBM_INTRET))
+
+  // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper.
+  #define RBM_INIT_PINVOKE_FRAME_TRASH  RBM_CALLEE_TRASH
+
+  #define REG_FPBASE               REG_R11
+  #define RBM_FPBASE               RBM_R11
+  #define STR_FPBASE               "r11"
+  #define REG_SPBASE               REG_SP
+  #define RBM_SPBASE               RBM_SP
+  #define STR_SPBASE               "sp"
+
+  #define FIRST_ARG_STACK_OFFS    (2*REGSIZE_BYTES)   // Caller's saved FP and return address
+
+  #define MAX_REG_ARG              4
+  #define MAX_FLOAT_REG_ARG        16
+  #define MAX_HFA_RET_SLOTS        8
+
+  #define REG_ARG_FIRST            REG_R0
+  #define REG_ARG_LAST             REG_R3
+  #define INIT_ARG_STACK_SLOT      0                  // No outgoing reserved stack slots
+
+  #define REG_ARG_0                REG_R0
+  #define REG_ARG_1                REG_R1
+  #define REG_ARG_2                REG_R2
+  #define REG_ARG_3                REG_R3
+
+  SELECTANY const regNumber intArgRegs [] = {REG_R0, REG_R1, REG_R2, REG_R3};
+  SELECTANY const regMaskTP intArgMasks[] = {RBM_R0, RBM_R1, RBM_R2, RBM_R3};
+
+  #define RBM_ARG_0                RBM_R0
+  #define RBM_ARG_1                RBM_R1
+  #define RBM_ARG_2                RBM_R2
+  #define RBM_ARG_3                RBM_R3
+
+  #define RBM_ARG_REGS            (RBM_ARG_0|RBM_ARG_1|RBM_ARG_2|RBM_ARG_3)
+  #define RBM_FLTARG_REGS         (RBM_F0|RBM_F1|RBM_F2|RBM_F3|RBM_F4|RBM_F5|RBM_F6|RBM_F7|RBM_F8|RBM_F9|RBM_F10|RBM_F11|RBM_F12|RBM_F13|RBM_F14|RBM_F15)
+  #define RBM_DBL_REGS            RBM_ALLDOUBLE
+
+  SELECTANY const regNumber fltArgRegs [] = {REG_F0, REG_F1, REG_F2, REG_F3, REG_F4, REG_F5, REG_F6, REG_F7, REG_F8, REG_F9, REG_F10, REG_F11, REG_F12, REG_F13, REG_F14, REG_F15 };
+  SELECTANY const regMaskTP fltArgMasks[] = {RBM_F0, RBM_F1, RBM_F2, RBM_F3, RBM_F4, RBM_F5, RBM_F6, RBM_F7, RBM_F8, RBM_F9, RBM_F10, RBM_F11, RBM_F12, RBM_F13, RBM_F14, RBM_F15 };
+
+  #define LBL_DIST_SMALL_MAX_NEG  (0)
+  #define LBL_DIST_SMALL_MAX_POS  (+1020)
+  #define LBL_DIST_MED_MAX_NEG    (-4095)
+  #define LBL_DIST_MED_MAX_POS    (+4096)
+
+  #define JMP_DIST_SMALL_MAX_NEG  (-2048)
+  #define JMP_DIST_SMALL_MAX_POS  (+2046)
+
+  #define JCC_DIST_SMALL_MAX_NEG  (-256)
+  #define JCC_DIST_SMALL_MAX_POS  (+254)
+
+  #define JCC_DIST_MEDIUM_MAX_NEG (-1048576)
+  #define JCC_DIST_MEDIUM_MAX_POS (+1048574)
+
+  #define LBL_SIZE_SMALL          (2)
+
+  #define JMP_SIZE_SMALL          (2)
+  #define JMP_SIZE_LARGE          (4)
+
+  #define JCC_SIZE_SMALL          (2)
+  #define JCC_SIZE_MEDIUM         (4)
+  #define JCC_SIZE_LARGE          (6)
+
+#elif defined(_TARGET_ARM64_)
+
+  #define CPU_LOAD_STORE_ARCH      1
+  #define CPU_LONG_USES_REGPAIR    0
+  #define CPU_HAS_FP_SUPPORT       1
+  #define ROUND_FLOAT              0       // Do not round intermed float expression results
+  #define CPU_HAS_BYTE_REGS        0
+  #define CPU_USES_BLOCK_MOVE      0
+
+  #define CPBLK_UNROLL_LIMIT       64      // Upper bound to let the code generator to loop unroll CpBlk.
+  #define INITBLK_UNROLL_LIMIT     64      // Upper bound to let the code generator to loop unroll InitBlk.
+
+  #define FEATURE_WRITE_BARRIER    1       // Generate the proper WriteBarrier calls for GC    
+  #define FEATURE_FIXED_OUT_ARGS   1       // Preallocate the outgoing arg area in the prolog
+  #define FEATURE_STRUCTPROMOTE    1       // JIT Optimization to promote fields of structs into registers
+  #define FEATURE_MULTIREG_STRUCT_PROMOTE 1  // True when we want to promote fields of a multireg struct into registers
+  #define FEATURE_FASTTAILCALL     1       // Tail calls made as epilog+jmp
+  #define FEATURE_TAILCALL_OPT     0       // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls.
+  #define FEATURE_SET_FLAGS        1       // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set
+  #define FEATURE_MULTIREG_ARGS_OR_RET  1  // Support for passing and/or returning single values in more than one register  
+  #define FEATURE_MULTIREG_ARGS         1  // Support for passing a single argument in more than one register  
+  #define FEATURE_MULTIREG_RET          1  // Support for returning a single value in more than one register  
+  #define FEATURE_STRUCT_CLASSIFIER     0  // Uses a classifier function to determine is structs are passed/returned in more than one register
+  #define MAX_PASS_MULTIREG_BYTES      32  // Maximum size of a struct that could be passed in more than one register (max is 4 doubles using an HFA)
+  #define MAX_RET_MULTIREG_BYTES       32  // Maximum size of a struct that could be returned in more than one register (Max is an HFA of 4 doubles)
+  #define MAX_ARG_REG_COUNT             4  // Maximum registers used to pass a single argument in multiple registers. (max is 4 floats or doubles using an HFA)
+  #define MAX_RET_REG_COUNT             4  // Maximum registers used to return a value.
+
+#ifdef FEATURE_USE_ASM_GC_WRITE_BARRIERS
+  #define NOGC_WRITE_BARRIERS      1       // We have specialized WriteBarrier JIT Helpers that DO-NOT trash the RBM_CALLEE_TRASH registers
+#else
+  #define NOGC_WRITE_BARRIERS      0       // Do not modify this -- modify the definition above.  (If we're not using ASM barriers we definitely don't have NOGC barriers).
+#endif
+  #define USER_ARGS_COME_LAST      1
+  #define EMIT_TRACK_STACK_DEPTH   1       // This is something of a workaround.  For both ARM and AMD64, the frame size is fixed, so we don't really
+                                           // need to track stack depth, but this is currently necessary to get GC information reported at call sites.
+  #define TARGET_POINTER_SIZE      8       // equal to sizeof(void*) and the managed pointer size in bytes for this target
+  #define FEATURE_EH               1       // To aid platform bring-up, eliminate exceptional EH clauses (catch, filter, filter-handler, fault) and directly execute 'finally' clauses.
+  #define FEATURE_EH_FUNCLETS      1
+  #define FEATURE_EH_CALLFINALLY_THUNKS 1  // Generate call-to-finally code in "thunks" in the enclosing EH region, protected by "cloned finally" clauses.
+  #define FEATURE_STACK_FP_X87     0 
+  #define ETW_EBP_FRAMED           1       // if 1 we cannot use REG_FP as a scratch register and must setup the frame pointer for most methods
+  #define FEATURE_FP_REGALLOC      0       // Enabled if RegAlloc is used to enregister Floating Point LclVars  
+  #define CSE_CONSTS               1       // Enable if we want to CSE constants 
+
+  #define REG_FP_FIRST             REG_V0
+  #define REG_FP_LAST              REG_V31
+  #define FIRST_FP_ARGREG          REG_V0
+  #define LAST_FP_ARGREG           REG_V15
+
+  #define REGNUM_BITS              6       // number of bits in a REG_*
+  #define TINY_REGNUM_BITS         5       // number of bits we will use for a tiny instr desc (may not use float)
+  #define REGMASK_BITS             64      // number of bits in a REGNUM_MASK
+  #define REGSIZE_BYTES            8       // number of bytes in one general purpose register
+  #define FP_REGSIZE_BYTES         16      // number of bytes in one FP/SIMD register
+  #define FPSAVE_REGSIZE_BYTES     8       // number of bytes in one FP/SIMD register that are saved/restored, for callee-saved registers
+
+  #define MIN_ARG_AREA_FOR_CALL    0       // Minimum required outgoing argument space for a call.
+
+  #define CODE_ALIGN               4       // code alignment requirement
+  #define STACK_ALIGN              16      // stack alignment requirement
+  #define STACK_ALIGN_SHIFT        3       // Shift-right amount to convert stack size in bytes to size in DWORD_PTRs
+
+  #define RBM_INT_CALLEE_SAVED    (RBM_R19|RBM_R20|RBM_R21|RBM_R22|RBM_R23|RBM_R24|RBM_R25|RBM_R26|RBM_R27|RBM_R28)
+  #define RBM_INT_CALLEE_TRASH    (RBM_R0|RBM_R1|RBM_R2|RBM_R3|RBM_R4|RBM_R5|RBM_R6|RBM_R7|RBM_R8|RBM_R9|RBM_R10|RBM_R11|RBM_R12|RBM_R13|RBM_R14|RBM_R15|RBM_IP0|RBM_IP1|RBM_LR)
+  #define RBM_FLT_CALLEE_SAVED    (RBM_V8|RBM_V9|RBM_V10|RBM_V11|RBM_V12|RBM_V13|RBM_V14|RBM_V15)
+  #define RBM_FLT_CALLEE_TRASH    (RBM_V0|RBM_V1|RBM_V2|RBM_V3|RBM_V4|RBM_V5|RBM_V6|RBM_V7|RBM_V16|RBM_V17|RBM_V18|RBM_V19|RBM_V20|RBM_V21|RBM_V22|RBM_V23|RBM_V24|RBM_V25|RBM_V26|RBM_V27|RBM_V28|RBM_V29|RBM_V30|RBM_V31)
+
+  #define RBM_CALLEE_SAVED        (RBM_INT_CALLEE_SAVED | RBM_FLT_CALLEE_SAVED)
+  #define RBM_CALLEE_TRASH        (RBM_INT_CALLEE_TRASH | RBM_FLT_CALLEE_TRASH)
+  #define RBM_CALLEE_TRASH_NOGC   (RBM_R12|RBM_R13|RBM_R14|RBM_R15)
+  #define REG_DEFAULT_HELPER_CALL_TARGET REG_R12
+
+  #define RBM_ALLINT              (RBM_INT_CALLEE_SAVED | RBM_INT_CALLEE_TRASH)
+  #define RBM_ALLFLOAT            (RBM_FLT_CALLEE_SAVED | RBM_FLT_CALLEE_TRASH)
+  #define RBM_ALLDOUBLE            RBM_ALLFLOAT
+
+  #define REG_VAR_ORDER            REG_R9,REG_R10,REG_R11,REG_R12,REG_R13,REG_R14,REG_R15,\
+                                   REG_R8,REG_R7,REG_R6,REG_R5,REG_R4,REG_R3,REG_R2,REG_R1,REG_R0,\
+                                   REG_R19,REG_R20,REG_R21,REG_R22,REG_R23,REG_R24,REG_R25,REG_R26,REG_R27,REG_R28,\
+
+  #define REG_VAR_ORDER_FLT        REG_V16, REG_V17, REG_V18, REG_V19, \
+                                   REG_V20, REG_V21, REG_V22, REG_V23, \
+                                   REG_V24, REG_V25, REG_V26, REG_V27, \
+                                   REG_V28, REG_V29, REG_V30, REG_V31, \
+                                   REG_V7,  REG_V6,  REG_V5,  REG_V4,  \
+                                   REG_V8,  REG_V9,  REG_V10, REG_V11, \
+                                   REG_V12, REG_V13, REG_V14, REG_V16, \
+                                   REG_V3,  REG_V2, REG_V1,  REG_V0 
+
+  #define REG_CALLEE_SAVED_ORDER   REG_R19,REG_R20,REG_R21,REG_R22,REG_R23,REG_R24,REG_R25,REG_R26,REG_R27,REG_R28
+  #define RBM_CALLEE_SAVED_ORDER   RBM_R19,RBM_R20,RBM_R21,RBM_R22,RBM_R23,RBM_R24,RBM_R25,RBM_R26,RBM_R27,RBM_R28
+
+  #define CNT_CALLEE_SAVED        (11)
+  #define CNT_CALLEE_TRASH        (17)
+  #define CNT_CALLEE_ENREG        (CNT_CALLEE_SAVED-1)
+
+  #define CNT_CALLEE_SAVED_FLOAT  (8)
+  #define CNT_CALLEE_TRASH_FLOAT  (24)
+
+  #define CALLEE_SAVED_REG_MAXSZ    (CNT_CALLEE_SAVED * REGSIZE_BYTES)
+  #define CALLEE_SAVED_FLOAT_MAXSZ  (CNT_CALLEE_SAVED_FLOAT * FPSAVE_REGSIZE_BYTES)
+
+  // TODO-ARM64-Cleanup: Remove this
+  #define REG_L_STK                REG_ZR
+
+  //  This is the first register in REG_TMP_ORDER
+  #define REG_TMP_0                REG_R9
+  #define RBM_TMP_0                RBM_R9
+
+  //  This is the second register in REG_TMP_ORDER
+  #define REG_TMP_1                REG_R10
+  #define RBM_TMP_1                RBM_R10
+
+  // register to hold shift amount; no special register is required on ARM64.
+  #define REG_SHIFT                REG_NA
+  #define RBM_SHIFT                RBM_ALLINT
+  #define PREDICT_REG_SHIFT        PREDICT_REG
+
+  // This is a general scratch register that does not conflict with the argument registers
+  #define REG_SCRATCH              REG_R9
+  #define RBM_SCRATCH              RBM_R9
+
+  // This is a general register that can be optionally reserved for other purposes during codegen
+  #define REG_OPT_RSVD             REG_IP1
+  #define RBM_OPT_RSVD             RBM_IP1
+
+  // Where is the exception object on entry to the handler block?
+  #define REG_EXCEPTION_OBJECT     REG_R0
+  #define RBM_EXCEPTION_OBJECT     RBM_R0
+
+  #define REG_JUMP_THUNK_PARAM     REG_R12
+  #define RBM_JUMP_THUNK_PARAM     RBM_R12
+
+#if NOGC_WRITE_BARRIERS
+  #define REG_WRITE_BARRIER_SRC_BYREF    REG_R13
+  #define RBM_WRITE_BARRIER_SRC_BYREF    RBM_R13
+
+  #define REG_WRITE_BARRIER_DST_BYREF    REG_R14
+  #define RBM_WRITE_BARRIER_DST_BYREF    RBM_R14
+
+  #define REG_WRITE_BARRIER              REG_R15
+  #define RBM_WRITE_BARRIER              RBM_R15
+#endif
+
+  // GenericPInvokeCalliHelper VASigCookie Parameter 
+  #define REG_PINVOKE_COOKIE_PARAM          REG_R15
+  #define RBM_PINVOKE_COOKIE_PARAM          RBM_R15
+  #define PREDICT_REG_PINVOKE_COOKIE_PARAM  PREDICT_REG_R15
+
+  // GenericPInvokeCalliHelper unmanaged target Parameter 
+  #define REG_PINVOKE_TARGET_PARAM          REG_R14
+  #define RBM_PINVOKE_TARGET_PARAM          RBM_R14
+  #define PREDICT_REG_PINVOKE_TARGET_PARAM  PREDICT_REG_R14
+
+  // IL stub's secret MethodDesc parameter (CORJIT_FLG_PUBLISH_SECRET_PARAM)
+  #define REG_SECRET_STUB_PARAM     REG_R12
+  #define RBM_SECRET_STUB_PARAM     RBM_R12
+
+  // VSD extra parameter (slot address)
+  #define REG_VIRTUAL_STUB_PARAM          REG_R11
+  #define RBM_VIRTUAL_STUB_PARAM          RBM_R11
+  #define PREDICT_REG_VIRTUAL_STUB_PARAM  PREDICT_REG_R11
+
+  // R2R indirect call. Use the same registers as VSD
+  #define REG_R2R_INDIRECT_PARAM          REG_R11
+  #define RBM_R2R_INDIRECT_PARAM          RBM_R11
+  #define PREDICT_REG_RER_INDIRECT_PARAM  PREDICT_REG_R11
+
+  // Registers used by PInvoke frame setup
+  #define REG_PINVOKE_FRAME        REG_R9
+  #define RBM_PINVOKE_FRAME        RBM_R9
+  #define REG_PINVOKE_TCB          REG_R10
+  #define RBM_PINVOKE_TCB          RBM_R10
+  #define REG_PINVOKE_SCRATCH      REG_R10
+  #define RBM_PINVOKE_SCRATCH      RBM_R10
+
+  // The following defines are useful for iterating a regNumber
+  #define REG_FIRST                REG_R0
+  #define REG_INT_FIRST            REG_R0
+  #define REG_INT_LAST             REG_ZR
+  #define REG_INT_COUNT            (REG_INT_LAST - REG_INT_FIRST + 1)
+  #define REG_NEXT(reg)           ((regNumber)((unsigned)(reg) + 1))
+  #define REG_PREV(reg)           ((regNumber)((unsigned)(reg) - 1))
+
+  // genCodeForCall() moves the target address of the tailcall into this register, before pushing it on the stack
+  #define REG_TAILCALL_ADDR        REG_R9
+
+  // The following registers are used in emitting Enter/Leave/Tailcall profiler callbacks
+  #define REG_PROFILER_ENTER_ARG           REG_R0
+  #define RBM_PROFILER_ENTER_ARG           RBM_R0
+  #define REG_PROFILER_RET_SCRATCH         REG_R2
+  #define RBM_PROFILER_RET_SCRATCH         RBM_R2
+  #define RBM_PROFILER_RET_USED            (RBM_R0 | RBM_R1 | RBM_R2)
+  #define REG_PROFILER_JMP_ARG             REG_R0
+  #define RBM_PROFILER_JMP_USED            RBM_R0
+  #define RBM_PROFILER_TAIL_USED           (RBM_R0 | RBM_R12 | RBM_LR)
+  
+
+  // Which register are int and long values returned in ?
+  #define REG_INTRET               REG_R0
+  #define RBM_INTRET               RBM_R0
+  #define REG_LNGRET               REG_R0
+  #define RBM_LNGRET               RBM_R0
+  // second return register for 16-byte structs
+  #define REG_INTRET_1             REG_R1 
+  #define RBM_INTRET_1             RBM_R1
+
+  #define REG_FLOATRET             REG_V0
+  #define RBM_FLOATRET             RBM_V0
+  #define RBM_DOUBLERET            RBM_V0
+
+  // The registers trashed by the CORINFO_HELP_STOP_FOR_GC helper
+  #define RBM_STOP_FOR_GC_TRASH    RBM_CALLEE_TRASH
+
+  // The registers trashed by the CORINFO_HELP_INIT_PINVOKE_FRAME helper.
+  #define RBM_INIT_PINVOKE_FRAME_TRASH  RBM_CALLEE_TRASH
+
+  #define REG_FPBASE               REG_FP
+  #define RBM_FPBASE               RBM_FP
+  #define STR_FPBASE               "fp"
+  #define REG_SPBASE               REG_SP
+  #define RBM_SPBASE               RBM_ZR     // reuse the RBM for REG_ZR
+  #define STR_SPBASE               "sp"
+
+  #define FIRST_ARG_STACK_OFFS    (2*REGSIZE_BYTES)   // Caller's saved FP and return address
+
+  // On ARM64 the calling convention defines REG_R8 (x8) as an additional argument register
+  // It isn't allocated for the normal user arguments, so it isn't counted by MAX_REG_ARG
+  // whether we use this register to pass the RetBuff is controlled by the function hasFixedRetBuffReg()
+  // it is consider to be the next integer argnum, which is 8 
+  //
+  #define REG_ARG_RET_BUFF         REG_R8
+  #define RBM_ARG_RET_BUFF         RBM_R8
+  #define RET_BUFF_ARGNUM          8
+
+  #define MAX_REG_ARG              8
+  #define MAX_FLOAT_REG_ARG        8
+
+  #define REG_ARG_FIRST            REG_R0
+  #define REG_ARG_LAST             REG_R7
+  #define REG_ARG_FP_FIRST         REG_V0
+  #define REG_ARG_FP_LAST          REG_V7
+  #define INIT_ARG_STACK_SLOT      0                  // No outgoing reserved stack slots
+
+  #define REG_ARG_0                REG_R0
+  #define REG_ARG_1                REG_R1
+  #define REG_ARG_2                REG_R2
+  #define REG_ARG_3                REG_R3
+  #define REG_ARG_4                REG_R4
+  #define REG_ARG_5                REG_R5
+  #define REG_ARG_6                REG_R6
+  #define REG_ARG_7                REG_R7
+
+  SELECTANY const regNumber intArgRegs [] = {REG_R0, REG_R1, REG_R2, REG_R3, REG_R4, REG_R5, REG_R6, REG_R7};
+  SELECTANY const regMaskTP intArgMasks[] = {RBM_R0, RBM_R1, RBM_R2, RBM_R3, RBM_R4, RBM_R5, RBM_R6, RBM_R7};
+
+  #define RBM_ARG_0                RBM_R0
+  #define RBM_ARG_1                RBM_R1
+  #define RBM_ARG_2                RBM_R2
+  #define RBM_ARG_3                RBM_R3
+  #define RBM_ARG_4                RBM_R4
+  #define RBM_ARG_5                RBM_R5
+  #define RBM_ARG_6                RBM_R6
+  #define RBM_ARG_7                RBM_R7
+
+  #define REG_FLTARG_0             REG_V0
+  #define REG_FLTARG_1             REG_V1
+  #define REG_FLTARG_2             REG_V2
+  #define REG_FLTARG_3             REG_V3
+  #define REG_FLTARG_4             REG_V4
+  #define REG_FLTARG_5             REG_V5
+  #define REG_FLTARG_6             REG_V6
+  #define REG_FLTARG_7             REG_V7
+
+  #define RBM_FLTARG_0             RBM_V0
+  #define RBM_FLTARG_1             RBM_V1
+  #define RBM_FLTARG_2             RBM_V2
+  #define RBM_FLTARG_3             RBM_V3
+  #define RBM_FLTARG_4             RBM_V4
+  #define RBM_FLTARG_5             RBM_V5
+  #define RBM_FLTARG_6             RBM_V6
+  #define RBM_FLTARG_7             RBM_V7
+
+  #define RBM_ARG_REGS            (RBM_ARG_0|RBM_ARG_1|RBM_ARG_2|RBM_ARG_3|RBM_ARG_4|RBM_ARG_5|RBM_ARG_6|RBM_ARG_7)
+  #define RBM_FLTARG_REGS         (RBM_FLTARG_0|RBM_FLTARG_1|RBM_FLTARG_2|RBM_FLTARG_3|RBM_FLTARG_4|RBM_FLTARG_5|RBM_FLTARG_6|RBM_FLTARG_7)
+
+  SELECTANY const regNumber fltArgRegs [] = {REG_V0, REG_V1, REG_V2, REG_V3, REG_V4, REG_V5, REG_V6, REG_V7 };
+  SELECTANY const regMaskTP fltArgMasks[] = {RBM_V0, RBM_V1, RBM_V2, RBM_V3, RBM_V4, RBM_V5, RBM_V6, RBM_V7 };
+
+  #define LBL_DIST_SMALL_MAX_NEG  (-1048576)
+  #define LBL_DIST_SMALL_MAX_POS  (+1048575)
+
+  #define LBL_SIZE_SMALL          (4)
+  #define LBL_SIZE_LARGE          (8)
+
+  #define JCC_DIST_SMALL_MAX_NEG  (-1048576)
+  #define JCC_DIST_SMALL_MAX_POS  (+1048575)
+
+  #define JCC_SIZE_SMALL          (4)
+  #define JCC_SIZE_LARGE          (8)
+
+  #define LDC_DIST_SMALL_MAX_NEG  (-1048576)
+  #define LDC_DIST_SMALL_MAX_POS  (+1048575)
+
+  #define LDC_SIZE_SMALL          (4)
+  #define LDC_SIZE_LARGE          (8)
+
+  #define JMP_SIZE_SMALL          (4)
+
+#else
+  #error Unsupported or unset target architecture
+#endif
+
+#ifdef _TARGET_XARCH_
+
+  #define JMP_DIST_SMALL_MAX_NEG  (-128)
+  #define JMP_DIST_SMALL_MAX_POS  (+127)
+
+  #define JCC_DIST_SMALL_MAX_NEG  (-128)
+  #define JCC_DIST_SMALL_MAX_POS  (+127)
+
+  #define JMP_SIZE_SMALL          (2)
+  #define JMP_SIZE_LARGE          (5)
+
+  #define JCC_SIZE_SMALL          (2)
+  #define JCC_SIZE_LARGE          (6)
+
+  #define PUSH_INST_SIZE          (5)
+  #define CALL_INST_SIZE          (5)
+
+#endif // _TARGET_XARCH_
+
+C_ASSERT(REG_FIRST == 0);
+C_ASSERT(REG_INT_FIRST < REG_INT_LAST);
+C_ASSERT(REG_FP_FIRST  < REG_FP_LAST);
+
+// Opportunistic tail call feature converts non-tail prefixed calls into 
+// tail calls where possible. It requires fast tail calling mechanism for
+// performance. Otherwise, we are better off not converting non-tail prefixed
+// calls into tail calls.
+C_ASSERT((FEATURE_TAILCALL_OPT == 0) || (FEATURE_FASTTAILCALL == 1));
+
+/*****************************************************************************/
+
+#define BITS_PER_BYTE              8 
+#define REGNUM_MASK              ((1 << REGNUM_BITS) - 1)     // a n-bit mask use to encode multiple REGNUMs into a unsigned int
+#define RBM_ALL(type) (varTypeIsFloating(type) ? RBM_ALLFLOAT : RBM_ALLINT)
+
+/*****************************************************************************/
+
+#if CPU_HAS_BYTE_REGS
+  #define RBM_BYTE_REGS           (RBM_EAX|RBM_ECX|RBM_EDX|RBM_EBX)
+  #define RBM_NON_BYTE_REGS       (RBM_ESI|RBM_EDI)
+  // We reuse the ESP register as a flag for byteable registers in lvPrefReg
+  #define RBM_BYTE_REG_FLAG        RBM_ESP
+#else
+  #define RBM_BYTE_REGS            RBM_ALLINT
+  #define RBM_NON_BYTE_REGS        RBM_NONE
+  #define RBM_BYTE_REG_FLAG        RBM_NONE
+#endif
+// clang-format on
+
+/*****************************************************************************/
+class Target
+{
+public:
+    static const char* g_tgtCPUName;
+    static const char* g_tgtPlatformName;
+
+    enum ArgOrder
+    {
+        ARG_ORDER_R2L,
+        ARG_ORDER_L2R
+    };
+    static const enum ArgOrder g_tgtArgOrder;
+
+#if NOGC_WRITE_BARRIERS
+    static regMaskTP exclude_WriteBarrierReg(regMaskTP mask)
+    {
+        unsigned result = (mask & ~RBM_WRITE_BARRIER);
+        if (result)
+            return result;
+        else
+            return RBM_ALLINT & ~RBM_WRITE_BARRIER;
+    }
+#endif // NOGC_WRITE_BARRIERS
+};
+
+#if defined(DEBUG) || defined(LATE_DISASM)
+const char* getRegName(unsigned reg, bool isFloat = false); // this is for gcencode.cpp and disasm.cpp that don't use
+                                                            // the regNumber type
+const char* getRegName(regNumber reg, bool isFloat = false);
+#endif // defined(DEBUG) || defined(LATE_DISASM)
+
+#ifdef DEBUG
+const char* getRegNameFloat(regNumber reg, var_types type);
+extern void dspRegMask(regMaskTP regMask, size_t minSiz = 0);
+#endif
+
+#if CPU_HAS_BYTE_REGS
+inline BOOL isByteReg(regNumber reg)
+{
+    return (reg <= REG_EBX);
+}
+#else
+inline BOOL isByteReg(regNumber reg)
+{
+    return true;
+}
+#endif
+
+#ifdef LEGACY_BACKEND
+extern const regNumber raRegTmpOrder[REG_TMP_ORDER_COUNT];
+extern const regNumber rpRegTmpOrder[REG_TMP_ORDER_COUNT];
+#if FEATURE_FP_REGALLOC
+extern const regNumber raRegFltTmpOrder[REG_FLT_TMP_ORDER_COUNT];
+#endif
+#endif // LEGACY_BACKEND
+
+inline regMaskTP genRegMask(regNumber reg);
+inline regMaskTP genRegMaskFloat(regNumber reg, var_types type = TYP_DOUBLE);
+
+/*****************************************************************************
+ * Return true if the register number is valid
+ */
+inline bool genIsValidReg(regNumber reg)
+{
+    /* It's safest to perform an unsigned comparison in case reg is negative */
+    return ((unsigned)reg < (unsigned)REG_COUNT);
+}
+
+/*****************************************************************************
+ * Return true if the register is a valid integer register
+ */
+inline bool genIsValidIntReg(regNumber reg)
+{
+    return reg >= REG_INT_FIRST && reg <= REG_INT_LAST;
+}
+
+/*****************************************************************************
+ * Return true if the register is a valid floating point register
+ */
+inline bool genIsValidFloatReg(regNumber reg)
+{
+    return reg >= REG_FP_FIRST && reg <= REG_FP_LAST;
+}
+
+#if defined(LEGACY_BACKEND) && defined(_TARGET_ARM_)
+
+/*****************************************************************************
+ * Return true if the register is a valid floating point double register
+ */
+inline bool genIsValidDoubleReg(regNumber reg)
+{
+    return genIsValidFloatReg(reg) && (((reg - REG_FP_FIRST) & 0x1) == 0);
+}
+
+#endif // defined(LEGACY_BACKEND) && defined(_TARGET_ARM_)
+
+//-------------------------------------------------------------------------------------------
+// hasFixedRetBuffReg:
+//     Returns true if our target architecture uses a fixed return buffer register
+//
+inline bool hasFixedRetBuffReg()
+{
+#ifdef _TARGET_ARM64_
+    return true;
+#else
+    return false;
+#endif
+}
+
+//-------------------------------------------------------------------------------------------
+// theFixedRetBuffReg:
+//     Returns the regNumber to use for the fixed return buffer
+//
+inline regNumber theFixedRetBuffReg()
+{
+    assert(hasFixedRetBuffReg()); // This predicate should be checked before calling this method
+#ifdef _TARGET_ARM64_
+    return REG_ARG_RET_BUFF;
+#else
+    return REG_NA;
+#endif
+}
+
+//-------------------------------------------------------------------------------------------
+// theFixedRetBuffMask:
+//     Returns the regNumber to use for the fixed return buffer
+//
+inline regMaskTP theFixedRetBuffMask()
+{
+    assert(hasFixedRetBuffReg()); // This predicate should be checked before calling this method
+#ifdef _TARGET_ARM64_
+    return RBM_ARG_RET_BUFF;
+#else
+    return 0;
+#endif
+}
+
+//-------------------------------------------------------------------------------------------
+// theFixedRetBuffArgNum:
+//     Returns the argNum to use for the fixed return buffer
+//
+inline unsigned theFixedRetBuffArgNum()
+{
+    assert(hasFixedRetBuffReg()); // This predicate should be checked before calling this method
+#ifdef _TARGET_ARM64_
+    return RET_BUFF_ARGNUM;
+#else
+    return BAD_VAR_NUM;
+#endif
+}
+
+//-------------------------------------------------------------------------------------------
+// fullIntArgRegMask:
+//     Returns the full mask of all possible integer registers
+//     Note this includes the fixed return buffer register on Arm64
+//
+inline regMaskTP fullIntArgRegMask()
+{
+    if (hasFixedRetBuffReg())
+    {
+        return RBM_ARG_REGS | theFixedRetBuffMask();
+    }
+    else
+    {
+        return RBM_ARG_REGS;
+    }
+}
+
+//-------------------------------------------------------------------------------------------
+// isValidIntArgReg:
+//     Returns true if the register is a valid integer argument register
+//     Note this method also returns true on Arm64 when 'reg' is the RetBuff register
+//
+inline bool isValidIntArgReg(regNumber reg)
+{
+    return (genRegMask(reg) & fullIntArgRegMask()) != 0;
+}
+
+//-------------------------------------------------------------------------------------------
+// genRegArgNext:
+//     Given a register that is an integer or floating point argument register
+//     returns the next argument register
+//
+regNumber genRegArgNext(regNumber argReg);
+
+//-------------------------------------------------------------------------------------------
+// isValidFloatArgReg:
+//     Returns true if the register is a valid floating-point argument register
+//
+inline bool isValidFloatArgReg(regNumber reg)
+{
+    if (reg == REG_NA)
+    {
+        return false;
+    }
+    else
+    {
+        return (reg >= FIRST_FP_ARGREG) && (reg <= LAST_FP_ARGREG);
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Can the register hold the argument type?
+ */
+
+#ifdef _TARGET_ARM_
+inline bool floatRegCanHoldType(regNumber reg, var_types type)
+{
+    assert(genIsValidFloatReg(reg));
+    if (type == TYP_DOUBLE)
+    {
+        return ((reg - REG_F0) % 2) == 0;
+    }
+    else
+    {
+        // Can be TYP_STRUCT for HFA. It's not clear that's correct; what about
+        // HFA of double? We wouldn't be asserting the right alignment, and
+        // callers like genRegMaskFloat() wouldn't be generating the right mask.
+
+        assert((type == TYP_FLOAT) || (type == TYP_STRUCT));
+        return true;
+    }
+}
+#else
+// AMD64: xmm registers can hold any float type
+// x86: FP stack can hold any float type
+// ARM64: Floating-point/SIMD registers can hold any type.
+inline bool floatRegCanHoldType(regNumber reg, var_types type)
+{
+    return true;
+}
+#endif
+
+/*****************************************************************************
+ *
+ *  Map a register number to a register mask.
+ */
+
+extern const regMaskSmall regMasks[REG_COUNT];
+
+inline regMaskTP genRegMask(regNumber reg)
+{
+    assert((unsigned)reg < ArrLen(regMasks));
+#ifdef _TARGET_AMD64_
+    // shift is faster than a L1 hit on modern x86
+    // (L1 latency on sandy bridge is 4 cycles for [base] and 5 for [base + index*c] )
+    // the reason this is AMD-only is because the x86 BE will try to get reg masks for REG_STK
+    // and the result needs to be zero.
+    regMaskTP result = 1 << reg;
+    assert(result == regMasks[reg]);
+    return result;
+#else
+    return regMasks[reg];
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Map a register number to a floating-point register mask.
+ */
+
+#if defined(_TARGET_X86_) && defined(LEGACY_BACKEND)
+extern const regMaskSmall regFPMasks[REG_FPCOUNT];
+#endif // defined(_TARGET_X86_) && defined(LEGACY_BACKEND)
+
+inline regMaskTP genRegMaskFloat(regNumber reg, var_types type /* = TYP_DOUBLE */)
+{
+#if defined(_TARGET_X86_) && defined(LEGACY_BACKEND)
+    assert(reg >= REG_FPV0 && reg < REG_FPCOUNT);
+    assert((unsigned)reg < ArrLen(regFPMasks));
+    return regFPMasks[reg];
+#elif defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_) || defined(_TARGET_X86_)
+    assert(genIsValidFloatReg(reg));
+    assert((unsigned)reg < ArrLen(regMasks));
+    return regMasks[reg];
+#elif defined _TARGET_ARM_
+    assert(floatRegCanHoldType(reg, type));
+    assert(reg >= REG_F0 && reg <= REG_F31);
+
+    if (type == TYP_DOUBLE)
+    {
+        return regMasks[reg] | regMasks[reg + 1];
+    }
+    else
+    {
+        return regMasks[reg];
+    }
+#else
+#error Unsupported or unset target architecture
+#endif
+}
+
+//------------------------------------------------------------------------
+// genRegMask: Given a register, and its type, generate the appropriate regMask
+//
+// Arguments:
+//    regNum   - the register of interest
+//    type     - the type of regNum (i.e. the type it is being used as)
+//
+// Return Value:
+//    This will usually return the same value as genRegMask(regNum), but
+//    on architectures where multiple registers are used for certain types
+//    (e.g. TYP_DOUBLE on ARM), it will return a regMask that includes
+//    all the registers.
+//    Registers that are used in pairs, but separately named (e.g. TYP_LONG
+//    on ARM) will return just the regMask for the given register.
+//
+// Assumptions:
+//    For registers that are used in pairs, the caller will be handling
+//    each member of the pair separately.
+//
+inline regMaskTP genRegMask(regNumber regNum, var_types type)
+{
+#ifndef _TARGET_ARM_
+    return genRegMask(regNum);
+#else
+    regMaskTP regMask = RBM_NONE;
+
+    if (varTypeIsFloating(type))
+    {
+        regMask = genRegMaskFloat(regNum, type);
+    }
+    else
+    {
+        regMask = genRegMask(regNum);
+    }
+    return regMask;
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  These arrays list the callee-saved register numbers (and bitmaps, respectively) for
+ *  the current architecture.
+ */
+extern const regNumber raRegCalleeSaveOrder[CNT_CALLEE_SAVED];
+extern const regMaskTP raRbmCalleeSaveOrder[CNT_CALLEE_SAVED];
+
+// This method takes a "compact" bitset of the callee-saved registers, and "expands" it to a full register mask.
+regMaskSmall genRegMaskFromCalleeSavedMask(unsigned short);
+
+/*****************************************************************************
+ *
+ *  Returns the register that holds the low  32 bits of the long value given
+ *  by the register pair 'regPair'.
+ */
+inline regNumber genRegPairLo(regPairNo regPair)
+{
+    assert(regPair >= REG_PAIR_FIRST && regPair <= REG_PAIR_LAST);
+
+    return (regNumber)((regPair - REG_PAIR_FIRST) & REG_PAIR_NMASK);
+}
+
+/*****************************************************************************
+ *
+ *  Returns the register that holds the high 32 bits of the long value given
+ *  by the register pair 'regPair'.
+ */
+inline regNumber genRegPairHi(regPairNo regPair)
+{
+    assert(regPair >= REG_PAIR_FIRST && regPair <= REG_PAIR_LAST);
+
+    return (regNumber)(((regPair - REG_PAIR_FIRST) >> REG_PAIR_NBITS) & REG_PAIR_NMASK);
+}
+
+/*****************************************************************************
+ *
+ *  Returns whether regPair is a combination of two "real" registers
+ *  or whether it contains a pseudo register.
+ *
+ *  In debug it also asserts that reg1 and reg2 are not the same.
+ */
+bool genIsProperRegPair(regPairNo regPair);
+
+/*****************************************************************************
+ *
+ *  Returns the register pair number that corresponds to the given two regs.
+ */
+inline regPairNo gen2regs2pair(regNumber regLo, regNumber regHi)
+{
+    assert(regLo != regHi || regLo == REG_STK);
+    assert(genIsValidReg(regLo) && genIsValidReg(regHi));
+    assert(regLo != REG_L_STK && regHi != REG_L_STK);
+
+    regPairNo regPair = (regPairNo)(regLo + (regHi << REG_PAIR_NBITS) + REG_PAIR_FIRST);
+
+    assert(regLo == genRegPairLo(regPair));
+    assert(regHi == genRegPairHi(regPair));
+
+    return regPair;
+}
+
+/*****************************************************************************/
+inline regMaskTP genRegPairMask(regPairNo regPair)
+{
+    assert(regPair >= REG_PAIR_FIRST && regPair <= REG_PAIR_LAST);
+
+    return genRegMask(genRegPairLo(regPair)) | genRegMask(genRegPairHi(regPair));
+}
+
+/*****************************************************************************
+ *
+ *  Assumes that "reg" is of the given "type". Return the next unused reg number after "reg"
+ *  of this type, else REG_NA if there are no more.
+ */
+
+inline regNumber regNextOfType(regNumber reg, var_types type)
+{
+    regNumber regReturn;
+
+#ifdef _TARGET_ARM_
+    if (type == TYP_DOUBLE)
+    {
+        // Skip odd FP registers for double-precision types
+        assert(floatRegCanHoldType(reg, type));
+        regReturn = regNumber(reg + 2);
+    }
+    else
+    {
+        regReturn = REG_NEXT(reg);
+    }
+#else // _TARGET_ARM_
+    regReturn = REG_NEXT(reg);
+#endif
+
+    if (varTypeIsFloating(type))
+    {
+        if (regReturn > REG_FP_LAST)
+        {
+            regReturn = REG_NA;
+        }
+    }
+    else
+    {
+        if (regReturn > REG_INT_LAST)
+        {
+            regReturn = REG_NA;
+        }
+    }
+
+    return regReturn;
+}
+
+/*****************************************************************************
+ *
+ *  Type checks
+ */
+
+inline bool isRegPairType(int /* s/b "var_types" */ type)
+{
+#ifdef _TARGET_64BIT_
+    return false;
+#elif CPU_HAS_FP_SUPPORT
+    return type == TYP_LONG;
+#else
+    return type == TYP_LONG || type == TYP_DOUBLE;
+#endif
+}
+
+inline bool isFloatRegType(int /* s/b "var_types" */ type)
+{
+#if CPU_HAS_FP_SUPPORT
+    return type == TYP_DOUBLE || type == TYP_FLOAT;
+#else
+    return false;
+#endif
+}
+
+// If the WINDOWS_AMD64_ABI is defined make sure that _TARGET_AMD64_ is also defined.
+#if defined(WINDOWS_AMD64_ABI)
+#if !defined(_TARGET_AMD64_)
+#error When WINDOWS_AMD64_ABI is defined you must define _TARGET_AMD64_ defined as well.
+#endif
+#endif
+
+/*****************************************************************************/
+// Some sanity checks on some of the register masks
+// Stack pointer is never part of RBM_ALLINT
+C_ASSERT((RBM_ALLINT & RBM_SPBASE) == RBM_NONE);
+C_ASSERT((RBM_INT_CALLEE_SAVED & RBM_SPBASE) == RBM_NONE);
+
+#if ETW_EBP_FRAMED
+// Frame pointer isn't either if we're supporting ETW frame chaining
+C_ASSERT((RBM_ALLINT & RBM_FPBASE) == RBM_NONE);
+C_ASSERT((RBM_INT_CALLEE_SAVED & RBM_FPBASE) == RBM_NONE);
+#endif
+/*****************************************************************************/
+
+/*****************************************************************************/
+#endif // _TARGET_H_
+/*****************************************************************************/
diff --git a/src/jit/targetamd64.cpp b/src/jit/targetamd64.cpp
new file mode 100644
index 0000000000..0cb302ae34
--- /dev/null
+++ b/src/jit/targetamd64.cpp
@@ -0,0 +1,19 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#if defined(_TARGET_AMD64_)
+
+#include "target.h"
+
+const char*            Target::g_tgtCPUName  = "x64";
+const Target::ArgOrder Target::g_tgtArgOrder = ARG_ORDER_R2L;
+
+#endif // _TARGET_AMD64_
diff --git a/src/jit/targetarm.cpp b/src/jit/targetarm.cpp
new file mode 100644
index 0000000000..f0ea5ca534
--- /dev/null
+++ b/src/jit/targetarm.cpp
@@ -0,0 +1,19 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#if defined(_TARGET_ARM_)
+
+#include "target.h"
+
+const char*            Target::g_tgtCPUName  = "arm";
+const Target::ArgOrder Target::g_tgtArgOrder = ARG_ORDER_R2L;
+
+#endif // _TARGET_ARM_
diff --git a/src/jit/targetarm64.cpp b/src/jit/targetarm64.cpp
new file mode 100644
index 0000000000..2acbe1a050
--- /dev/null
+++ b/src/jit/targetarm64.cpp
@@ -0,0 +1,19 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#if defined(_TARGET_ARM64_)
+
+#include "target.h"
+
+const char*            Target::g_tgtCPUName  = "arm64";
+const Target::ArgOrder Target::g_tgtArgOrder = ARG_ORDER_R2L;
+
+#endif // _TARGET_ARM64_
diff --git a/src/jit/targetx86.cpp b/src/jit/targetx86.cpp
new file mode 100644
index 0000000000..500f4e0651
--- /dev/null
+++ b/src/jit/targetx86.cpp
@@ -0,0 +1,19 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#if defined(_TARGET_X86_)
+
+#include "target.h"
+
+const char*            Target::g_tgtCPUName  = "x86";
+const Target::ArgOrder Target::g_tgtArgOrder = ARG_ORDER_L2R;
+
+#endif // _TARGET_X86_
diff --git a/src/jit/tinyarray.h b/src/jit/tinyarray.h
new file mode 100644
index 0000000000..17d7e044b2
--- /dev/null
+++ b/src/jit/tinyarray.h
@@ -0,0 +1,79 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#ifndef TINYARRAY_H
+#define TINYARRAY_H
+
+/*****************************************************************************/
+
+// This is an array packed into some kind of integral data type
+// storagetype is the type (integral) which your array is going to be packed into
+// itemtype is the type of array elements
+// bits_per_element is size of the elements in bits
+template <class storageType, class itemType, int bits_per_element>
+class TinyArray
+{
+public:
+    // operator[] returns a 'ref' (usually a ref to the element type)
+    // This presents a problem if you wanted to implement something like a
+    // bitvector via this packed array, because you cannot make a ref to
+    // the element type.
+    //    The trick is you define something that acts like a ref (TinyArrayRef in this case)
+    // which for our purposes means you can assign to and from it and our chosen
+    // element type.
+    class TinyArrayRef
+    {
+    public:
+        // this is really the getter for the array.
+        operator itemType()
+        {
+            storageType mask  = ((1 << bits_per_element) - 1);
+            int         shift = bits_per_element * index;
+
+            itemType result = (itemType)((*data >> shift) & mask);
+            return result;
+        }
+
+        void operator=(const itemType b)
+        {
+            storageType mask = ((1 << bits_per_element) - 1);
+            assert(itemType(b & mask) == b);
+
+            mask <<= bits_per_element * index;
+
+            *data &= ~mask;
+            *data |= b << (bits_per_element * index);
+        }
+        friend class TinyArray;
+
+    protected:
+        TinyArrayRef(storageType* d, int idx) : data(d), index(idx)
+        {
+        }
+
+        storageType* data;
+        int          index;
+    };
+
+    storageType data;
+
+    void clear()
+    {
+        data = 0;
+    }
+
+    TinyArrayRef operator[](unsigned int n)
+    {
+        assert((n + 1) * bits_per_element <= sizeof(itemType) * 8);
+        return TinyArrayRef(&data, n);
+    }
+    // only use this for clearing it
+    void operator=(void* rhs)
+    {
+        assert(rhs == NULL);
+        data = 0;
+    }
+};
+
+#endif // TINYARRAY_H
diff --git a/src/jit/titypes.h b/src/jit/titypes.h
new file mode 100644
index 0000000000..a659320709
--- /dev/null
+++ b/src/jit/titypes.h
@@ -0,0 +1,15 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+DEF_TI(TI_ERROR, "<ERROR>")
+DEF_TI(TI_REF, "Ref")
+DEF_TI(TI_STRUCT, "Struct")
+DEF_TI(TI_METHOD, "Method")
+DEF_TI(TI_BYTE, "Byte")
+DEF_TI(TI_SHORT, "Short")
+DEF_TI(TI_INT, "Int")
+DEF_TI(TI_LONG, "Long")
+DEF_TI(TI_FLOAT, "Float")
+DEF_TI(TI_DOUBLE, "Double")
+DEF_TI(TI_NULL, "Null")
diff --git a/src/jit/typeinfo.cpp b/src/jit/typeinfo.cpp
new file mode 100644
index 0000000000..51429cca38
--- /dev/null
+++ b/src/jit/typeinfo.cpp
@@ -0,0 +1,405 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                          typeInfo                                         XX
+XX                                                                           XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "_typeinfo.h"
+
+BOOL Compiler::tiCompatibleWith(const typeInfo& child, const typeInfo& parent, bool normalisedForStack) const
+{
+#ifdef DEBUG
+#if VERBOSE_VERIFY
+    if (VERBOSE && tiVerificationNeeded)
+    {
+        printf("\n");
+        printf(TI_DUMP_PADDING);
+        printf("Verifying compatibility against types: ");
+        child.Dump();
+        printf(" and ");
+        parent.Dump();
+    }
+#endif // VERBOSE_VERIFY
+#endif // DEBUG
+
+    BOOL compatible = typeInfo::tiCompatibleWith(info.compCompHnd, child, parent, normalisedForStack);
+
+#ifdef DEBUG
+#if VERBOSE_VERIFY
+    if (VERBOSE && tiVerificationNeeded)
+    {
+        printf(compatible ? " [YES]" : " [NO]");
+    }
+#endif // VERBOSE_VERIFY
+#endif // DEBUG
+
+    return compatible;
+}
+
+BOOL Compiler::tiMergeCompatibleWith(const typeInfo& child, const typeInfo& parent, bool normalisedForStack) const
+{
+    return typeInfo::tiMergeCompatibleWith(info.compCompHnd, child, parent, normalisedForStack);
+}
+
+BOOL Compiler::tiMergeToCommonParent(typeInfo* pDest, const typeInfo* pSrc, bool* changed) const
+{
+#ifdef DEBUG
+#if VERBOSE_VERIFY
+    if (VERBOSE && tiVerificationNeeded)
+    {
+        printf("\n");
+        printf(TI_DUMP_PADDING);
+        printf("Attempting to merge types: ");
+        pDest->Dump();
+        printf(" and ");
+        pSrc->Dump();
+        printf("\n");
+    }
+#endif // VERBOSE_VERIFY
+#endif // DEBUG
+
+    BOOL mergeable = typeInfo::tiMergeToCommonParent(info.compCompHnd, pDest, pSrc, changed);
+
+#ifdef DEBUG
+#if VERBOSE_VERIFY
+    if (VERBOSE && tiVerificationNeeded)
+    {
+        printf(TI_DUMP_PADDING);
+        printf((mergeable == TRUE) ? "Merge successful" : "Couldn't merge types");
+        if (*changed)
+        {
+            assert(mergeable);
+            printf(", destination type changed to: ");
+            pDest->Dump();
+        }
+        printf("\n");
+    }
+#endif // VERBOSE_VERIFY
+#endif // DEBUG
+
+    return mergeable;
+}
+
+static BOOL tiCompatibleWithByRef(COMP_HANDLE CompHnd, const typeInfo& child, const typeInfo& parent)
+{
+    assert(parent.IsByRef());
+
+    if (!child.IsByRef())
+    {
+        return FALSE;
+    }
+
+    if (child.IsReadonlyByRef() && !parent.IsReadonlyByRef())
+    {
+        return FALSE;
+    }
+
+    // Byrefs are compatible if the underlying types are equivalent
+    typeInfo childTarget  = ::DereferenceByRef(child);
+    typeInfo parentTarget = ::DereferenceByRef(parent);
+
+    if (typeInfo::AreEquivalent(childTarget, parentTarget))
+    {
+        return TRUE;
+    }
+
+    // Make sure that both types have a valid m_cls
+    if ((childTarget.IsType(TI_REF) || childTarget.IsType(TI_STRUCT)) &&
+        (parentTarget.IsType(TI_REF) || parentTarget.IsType(TI_STRUCT)))
+    {
+        return CompHnd->areTypesEquivalent(childTarget.GetClassHandle(), parentTarget.GetClassHandle());
+    }
+
+    return FALSE;
+}
+
+/*****************************************************************************
+ * Verify child is compatible with the template parent.  Basically, that
+ * child is a "subclass" of parent -it can be substituted for parent
+ * anywhere.  Note that if parent contains fancy flags, such as "uninitialized"
+ * , "is this ptr", or  "has byref local/field" info, then child must also
+ * contain those flags, otherwise FALSE will be returned !
+ *
+ * Rules for determining compatibility:
+ *
+ * If parent is a primitive type or value class, then child must be the
+ * same primitive type or value class.  The exception is that the built in
+ * value classes System/Boolean etc. are treated as synonyms for
+ * TI_BYTE etc.
+ *
+ * If parent is a byref of a primitive type or value class, then child
+ * must be a byref of the same (rules same as above case).
+ *
+ * Byrefs are compatible only with byrefs.
+ *
+ * If parent is an object, child must be a subclass of it, implement it
+ * (if it is an interface), or be null.
+ *
+ * If parent is an array, child must be the same or subclassed array.
+ *
+ * If parent is a null objref, only null is compatible with it.
+ *
+ * If the "uninitialized", "by ref local/field", "this pointer" or other flags
+ * are different, the items are incompatible.
+ *
+ * parent CANNOT be an undefined (dead) item.
+ *
+ */
+
+BOOL typeInfo::tiCompatibleWith(COMP_HANDLE     CompHnd,
+                                const typeInfo& child,
+                                const typeInfo& parent,
+                                bool            normalisedForStack)
+{
+    assert(child.IsDead() || !normalisedForStack || typeInfo::AreEquivalent(::NormaliseForStack(child), child));
+    assert(parent.IsDead() || !normalisedForStack || typeInfo::AreEquivalent(::NormaliseForStack(parent), parent));
+
+    if (typeInfo::AreEquivalent(child, parent))
+    {
+        return TRUE;
+    }
+
+    if (parent.IsUnboxedGenericTypeVar() || child.IsUnboxedGenericTypeVar())
+    {
+        return (FALSE); // need to have had child == parent
+    }
+    else if (parent.IsType(TI_REF))
+    {
+        // An uninitialized objRef is not compatible to initialized.
+        if (child.IsUninitialisedObjRef() && !parent.IsUninitialisedObjRef())
+        {
+            return FALSE;
+        }
+
+        if (child.IsNullObjRef())
+        { // NULL can be any reference type
+            return TRUE;
+        }
+        if (!child.IsType(TI_REF))
+        {
+            return FALSE;
+        }
+
+        return CompHnd->canCast(child.m_cls, parent.m_cls);
+    }
+    else if (parent.IsType(TI_METHOD))
+    {
+        if (!child.IsType(TI_METHOD))
+        {
+            return FALSE;
+        }
+
+        // Right now we don't bother merging method handles
+        return FALSE;
+    }
+    else if (parent.IsType(TI_STRUCT))
+    {
+        if (!child.IsType(TI_STRUCT))
+        {
+            return FALSE;
+        }
+
+        // Structures are compatible if they are equivalent
+        return CompHnd->areTypesEquivalent(child.m_cls, parent.m_cls);
+    }
+    else if (parent.IsByRef())
+    {
+        return tiCompatibleWithByRef(CompHnd, child, parent);
+    }
+#ifdef _TARGET_64BIT_
+    // On 64-bit targets we have precise representation for native int, so these rules
+    // represent the fact that the ECMA spec permits the implicit conversion
+    // between an int32 and a native int.
+    else if (parent.IsType(TI_INT) && typeInfo::AreEquivalent(nativeInt(), child))
+    {
+        return TRUE;
+    }
+    else if (typeInfo::AreEquivalent(nativeInt(), parent) && child.IsType(TI_INT))
+    {
+        return TRUE;
+    }
+#endif // _TARGET_64BIT_
+    return FALSE;
+}
+
+BOOL typeInfo::tiMergeCompatibleWith(COMP_HANDLE     CompHnd,
+                                     const typeInfo& child,
+                                     const typeInfo& parent,
+                                     bool            normalisedForStack)
+{
+    if (!child.IsPermanentHomeByRef() && parent.IsPermanentHomeByRef())
+    {
+        return FALSE;
+    }
+
+    return typeInfo::tiCompatibleWith(CompHnd, child, parent, normalisedForStack);
+}
+
+/*****************************************************************************
+ * Merge pDest and pSrc to find some commonality (e.g. a common parent).
+ * Copy the result to pDest, marking it dead if no commonality can be found.
+ *
+ * null ^ null                  -> null
+ * Object ^ null                -> Object
+ * [I4 ^ null                   -> [I4
+ * InputStream ^ OutputStream   -> Stream
+ * InputStream ^ NULL           -> InputStream
+ * [I4 ^ Object                 -> Object
+ * [I4 ^ [Object                -> Array
+ * [I4 ^ [R8                    -> Array
+ * [Foo ^ I4                    -> DEAD
+ * [Foo ^ [I1                   -> Array
+ * [InputStream ^ [OutputStream -> Array
+ * DEAD ^ X                     -> DEAD
+ * [Intfc ^ [OutputStream       -> Array
+ * Intf ^ [OutputStream         -> Object
+ * [[InStream ^ [[OutStream     -> Array
+ * [[InStream ^ [OutStream      -> Array
+ * [[Foo ^ [Object              -> Array
+ *
+ * Importantly:
+ * [I1 ^ [U1                    -> either [I1 or [U1
+ * etc.
+ *
+ * Also, System/Int32 and I4 merge -> I4, etc.
+ *
+ * Returns FALSE if the merge was completely incompatible (i.e. the item became
+ * dead).
+ *
+ */
+
+BOOL typeInfo::tiMergeToCommonParent(COMP_HANDLE CompHnd, typeInfo* pDest, const typeInfo* pSrc, bool* changed)
+{
+    assert(pSrc->IsDead() || typeInfo::AreEquivalent(::NormaliseForStack(*pSrc), *pSrc));
+    assert(pDest->IsDead() || typeInfo::AreEquivalent(::NormaliseForStack(*pDest), *pDest));
+
+    // Merge the auxiliary information like "this" pointer tracking, etc...
+
+    // Remember the pre-state, so we can tell if it changed.
+    *changed              = false;
+    DWORD destFlagsBefore = pDest->m_flags;
+
+    // This bit is only set if both pDest and pSrc have it set
+    pDest->m_flags &= (pSrc->m_flags | ~TI_FLAG_THIS_PTR);
+
+    // This bit is set if either pDest or pSrc have it set
+    pDest->m_flags |= (pSrc->m_flags & TI_FLAG_UNINIT_OBJREF);
+
+    // This bit is set if either pDest or pSrc have it set
+    pDest->m_flags |= (pSrc->m_flags & TI_FLAG_BYREF_READONLY);
+
+    // If the byref wasn't permanent home in both sides, then merge won't have the bit set
+    pDest->m_flags &= (pSrc->m_flags | ~TI_FLAG_BYREF_PERMANENT_HOME);
+
+    if (pDest->m_flags != destFlagsBefore)
+    {
+        *changed = true;
+    }
+
+    // OK the main event.  Merge the main types
+    if (typeInfo::AreEquivalent(*pDest, *pSrc))
+    {
+        return (TRUE);
+    }
+
+    if (pDest->IsUnboxedGenericTypeVar() || pSrc->IsUnboxedGenericTypeVar())
+    {
+        // Should have had *pDest == *pSrc
+        goto FAIL;
+    }
+    if (pDest->IsType(TI_REF))
+    {
+        if (pSrc->IsType(TI_NULL))
+        { // NULL can be any reference type
+            return TRUE;
+        }
+        if (!pSrc->IsType(TI_REF))
+        {
+            goto FAIL;
+        }
+
+        // Ask the EE to find the common parent,  This always succeeds since System.Object always works
+        CORINFO_CLASS_HANDLE pDestClsBefore = pDest->m_cls;
+        pDest->m_cls                        = CompHnd->mergeClasses(pDest->GetClassHandle(), pSrc->GetClassHandle());
+        if (pDestClsBefore != pDest->m_cls)
+        {
+            *changed = true;
+        }
+        return TRUE;
+    }
+    else if (pDest->IsType(TI_NULL))
+    {
+        if (pSrc->IsType(TI_REF)) // NULL can be any reference type
+        {
+            *pDest   = *pSrc;
+            *changed = true;
+            return TRUE;
+        }
+        goto FAIL;
+    }
+    else if (pDest->IsType(TI_STRUCT))
+    {
+        if (pSrc->IsType(TI_STRUCT) && CompHnd->areTypesEquivalent(pDest->GetClassHandle(), pSrc->GetClassHandle()))
+        {
+            return TRUE;
+        }
+        goto FAIL;
+    }
+    else if (pDest->IsByRef())
+    {
+        return tiCompatibleWithByRef(CompHnd, *pSrc, *pDest);
+    }
+#ifdef _TARGET_64BIT_
+    // On 64-bit targets we have precise representation for native int, so these rules
+    // represent the fact that the ECMA spec permits the implicit conversion
+    // between an int32 and a native int.
+    else if (typeInfo::AreEquivalent(*pDest, typeInfo::nativeInt()) && pSrc->IsType(TI_INT))
+    {
+        return TRUE;
+    }
+    else if (typeInfo::AreEquivalent(*pSrc, typeInfo::nativeInt()) && pDest->IsType(TI_INT))
+    {
+        *pDest   = *pSrc;
+        *changed = true;
+        return TRUE;
+    }
+#endif // _TARGET_64BIT_
+
+FAIL:
+    *pDest = typeInfo();
+    return FALSE;
+}
+
+#ifdef DEBUG
+#if VERBOSE_VERIFY
+// Utility method to have a detailed dump of a TypeInfo object
+void typeInfo::Dump() const
+{
+    char flagsStr[8];
+
+    flagsStr[0] = ((m_flags & TI_FLAG_UNINIT_OBJREF) != 0) ? 'U' : '-';
+    flagsStr[1] = ((m_flags & TI_FLAG_BYREF) != 0) ? 'B' : '-';
+    flagsStr[2] = ((m_flags & TI_FLAG_BYREF_READONLY) != 0) ? 'R' : '-';
+    flagsStr[3] = ((m_flags & TI_FLAG_NATIVE_INT) != 0) ? 'N' : '-';
+    flagsStr[4] = ((m_flags & TI_FLAG_THIS_PTR) != 0) ? 'T' : '-';
+    flagsStr[5] = ((m_flags & TI_FLAG_BYREF_PERMANENT_HOME) != 0) ? 'P' : '-';
+    flagsStr[6] = ((m_flags & TI_FLAG_GENERIC_TYPE_VAR) != 0) ? 'G' : '-';
+    flagsStr[7] = '\0';
+
+    printf("[%s(%X) {%s}]", tiType2Str(m_bits.type), m_cls, flagsStr);
+}
+#endif // VERBOSE_VERIFY
+#endif // DEBUG
diff --git a/src/jit/typelist.h b/src/jit/typelist.h
new file mode 100644
index 0000000000..ed5884359d
--- /dev/null
+++ b/src/jit/typelist.h
@@ -0,0 +1,81 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#define GCS EA_GCREF
+#define BRS EA_BYREF
+#define PS EA_PTRSIZE
+#define PST (sizeof(void*) / sizeof(int))
+
+#ifdef _TARGET_64BIT_
+#define VTF_I32 0
+#define VTF_I64 VTF_I
+#else
+#define VTF_I32 VTF_I
+#define VTF_I64 0
+#endif
+
+/*  tn  - TYP_name
+    nm  - name string
+    jitType - The jit compresses types that are 'equivalent', this is the jit type genActualType()
+    verType - Used for type checking
+    sz  - size in bytes (genTypeSize(t))
+    sze - size in bytes for the emitter (GC types are encoded) (emitTypeSize(t))
+    asze- size in bytes for the emitter (GC types are encoded) (emitActualTypeSize(t))
+    st  - stack slots (slots are sizeof(int) bytes) (genTypeStSzs())
+    al  - alignment
+    tf  - flags
+    howUsed - If a variable is used (referenced) as the type
+
+DEF_TP(tn      ,nm        , jitType,     verType, sz,sze,asze, st,al, tf,            howUsed     )
+*/
+
+// clang-format off
+DEF_TP(UNDEF   ,"<UNDEF>" , TYP_UNDEF,   TI_ERROR, 0,  0,  0,   0, 0, VTF_ANY,        0           )
+DEF_TP(VOID    ,"void"    , TYP_VOID,    TI_ERROR, 0,  0,  0,   0, 0, VTF_ANY,        0           )
+
+DEF_TP(BOOL    ,"bool"    , TYP_INT,     TI_BYTE,  1,  1,  4,   1, 1, VTF_INT|VTF_UNS,TYPE_REF_INT)
+DEF_TP(BYTE    ,"byte"    , TYP_INT,     TI_BYTE,  1,  1,  4,   1, 1, VTF_INT,        TYPE_REF_INT)
+DEF_TP(UBYTE   ,"ubyte"   , TYP_INT,     TI_BYTE,  1,  1,  4,   1, 1, VTF_INT|VTF_UNS,TYPE_REF_INT)
+
+DEF_TP(CHAR    ,"char"    , TYP_INT,     TI_SHORT, 2,  2,  4,   1, 2, VTF_INT|VTF_UNS,TYPE_REF_INT)
+DEF_TP(SHORT   ,"short"   , TYP_INT,     TI_SHORT, 2,  2,  4,   1, 2, VTF_INT,        TYPE_REF_INT)
+DEF_TP(USHORT  ,"ushort"  , TYP_INT,     TI_SHORT, 2,  2,  4,   1, 2, VTF_INT|VTF_UNS,TYPE_REF_INT)
+
+DEF_TP(INT     ,"int"     , TYP_INT,     TI_INT,   4,  4,  4,   1, 4, VTF_INT|VTF_I32,        TYPE_REF_INT)
+DEF_TP(UINT    ,"uint"    , TYP_INT,     TI_INT,   4,  4,  4,   1, 4, VTF_INT|VTF_UNS|VTF_I32,TYPE_REF_INT) // Only used in GT_CAST nodes
+
+DEF_TP(LONG    ,"long"    , TYP_LONG,    TI_LONG,  8, PS, PS,   2, 8, VTF_INT|VTF_I64,        TYPE_REF_LNG)
+DEF_TP(ULONG   ,"ulong"   , TYP_LONG,    TI_LONG,  8, PS, PS,   2, 8, VTF_INT|VTF_UNS|VTF_I64,TYPE_REF_LNG)       // Only used in GT_CAST nodes
+
+DEF_TP(FLOAT   ,"float"   , TYP_FLOAT,   TI_FLOAT, 4,  4,  4,   1, 4, VTF_FLT,        TYPE_REF_FLT)
+DEF_TP(DOUBLE  ,"double"  , TYP_DOUBLE,  TI_DOUBLE,8,  8,  8,   2, 8, VTF_FLT,        TYPE_REF_DBL)
+
+DEF_TP(REF     ,"ref"     , TYP_REF,     TI_REF,  PS,GCS,GCS, PST,PS, VTF_ANY|VTF_GCR|VTF_I,TYPE_REF_PTR)
+DEF_TP(BYREF   ,"byref"   , TYP_BYREF,   TI_ERROR,PS,BRS,BRS, PST,PS, VTF_ANY|VTF_BYR|VTF_I,TYPE_REF_BYR)
+DEF_TP(ARRAY   ,"array"   , TYP_REF,     TI_REF,  PS,GCS,GCS, PST,PS, VTF_ANY|VTF_GCR|VTF_I,TYPE_REF_PTR)
+DEF_TP(STRUCT  ,"struct"  , TYP_STRUCT,  TI_STRUCT,0,  0,  0,   1, 4, VTF_S,          TYPE_REF_STC)
+
+DEF_TP(BLK     ,"blk"     , TYP_BLK,     TI_ERROR, 0,  0,  0,   1, 4, VTF_ANY,        0           ) // blob of memory
+DEF_TP(LCLBLK  ,"lclBlk"  , TYP_LCLBLK,  TI_ERROR, 0,  0,  0,   1, 4, VTF_ANY,        0           ) // preallocated memory for locspace
+
+DEF_TP(PTR     ,"pointer" , TYP_PTR,     TI_ERROR,PS, PS, PS, PST,PS, VTF_ANY|VTF_I,  TYPE_REF_PTR) // (not currently used)
+DEF_TP(FNC     ,"function", TYP_FNC,     TI_ERROR, 0, PS, PS,   0, 0, VTF_ANY|VTF_I,  0           )
+
+#ifdef FEATURE_SIMD
+// Amd64: The size and alignment of SIMD vector varies at JIT time based on whether target arch supports AVX or SSE2.
+DEF_TP(SIMD8    ,"simd8"  , TYP_SIMD8,   TI_STRUCT, 8, 8,  8,   2, 8, VTF_S,          TYPE_REF_STC)
+DEF_TP(SIMD12   ,"simd12" , TYP_SIMD12,  TI_STRUCT,12,16, 16,   4,16, VTF_S,          TYPE_REF_STC)
+DEF_TP(SIMD16   ,"simd16" , TYP_SIMD16,  TI_STRUCT,16,16, 16,   4,16, VTF_S,          TYPE_REF_STC)
+DEF_TP(SIMD32   ,"simd32" , TYP_SIMD32,  TI_STRUCT,32,32, 32,   8,16, VTF_S,          TYPE_REF_STC)
+#endif // FEATURE_SIMD
+
+DEF_TP(UNKNOWN ,"unknown" ,TYP_UNKNOWN,  TI_ERROR, 0,  0,  0,   0, 0, VTF_ANY,        0           )
+// clang-format on
+
+#undef GCS
+#undef BRS
+#undef PS
+#undef PST
+#undef VTF_I32
+#undef VTF_I64
diff --git a/src/jit/unwind.cpp b/src/jit/unwind.cpp
new file mode 100644
index 0000000000..4568fed75a
--- /dev/null
+++ b/src/jit/unwind.cpp
@@ -0,0 +1,171 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                              UnwindInfo                                   XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#if FEATURE_EH_FUNCLETS
+
+//------------------------------------------------------------------------
+// Compiler::unwindGetFuncLocations: Get the start/end emitter locations for this
+// function or funclet. If 'getHotSectionData' is true, get the start/end locations
+// for the hot section. Otherwise, get the data for the cold section.
+//
+// Note that we grab these locations before the prolog and epilogs are generated, so the
+// locations must remain correct after the prolog and epilogs are generated.
+//
+// For the prolog, instructions are put in the special, preallocated, prolog instruction group.
+// We don't want to expose the emitPrologIG unnecessarily (locations are actually pointers to
+// emitter instruction groups). Since we know the offset of the start of the function/funclet,
+// where the prolog is, will be zero, we use a nullptr start location to indicate that.
+//
+// There is no instruction group beyond the end of the end of the function, so there is no
+// location to indicate that. Once again, use nullptr for that.
+//
+// Intermediate locations point at the first instruction group of a funclet, which is a
+// placeholder IG. These are converted to real IGs, not deleted and replaced, so the location
+// remains valid.
+//
+// Arguments:
+//    func              - main function or funclet to get locations for.
+//    getHotSectionData - 'true' to get the hot section data, 'false' to get the cold section data.
+//    ppStartLoc        - OUT parameter. Set to the start emitter location.
+//    ppEndLoc          - OUT parameter. Set to the end   emitter location (the location immediately
+//                        the range; the 'end' location is not inclusive).
+//
+// Notes:
+//    A start location of nullptr means the beginning of the code.
+//    An end location of nullptr means the end of the code.
+//
+void Compiler::unwindGetFuncLocations(FuncInfoDsc*             func,
+                                      bool                     getHotSectionData,
+                                      /* OUT */ emitLocation** ppStartLoc,
+                                      /* OUT */ emitLocation** ppEndLoc)
+{
+    if (func->funKind == FUNC_ROOT)
+    {
+        // Since all funclets are pulled out of line, the main code size is everything
+        // up to the first handler. If the function is hot/cold split, we need to get the
+        // appropriate sub-range.
+
+        if (getHotSectionData)
+        {
+            *ppStartLoc = nullptr; // nullptr emit location means the beginning of the code. This is to handle the first
+                                   // fragment prolog.
+
+            if (fgFirstColdBlock != nullptr)
+            {
+                // The hot section only goes up to the cold section
+                assert(fgFirstFuncletBB == nullptr);
+
+                *ppEndLoc = new (this, CMK_UnwindInfo) emitLocation(ehEmitCookie(fgFirstColdBlock));
+            }
+            else
+            {
+                if (fgFirstFuncletBB != nullptr)
+                {
+                    *ppEndLoc = new (this, CMK_UnwindInfo) emitLocation(ehEmitCookie(fgFirstFuncletBB));
+                }
+                else
+                {
+                    *ppEndLoc = nullptr; // nullptr end location means the end of the code
+                }
+            }
+        }
+        else
+        {
+            assert(fgFirstFuncletBB == nullptr); // TODO-CQ: support hot/cold splitting in functions with EH
+            assert(fgFirstColdBlock != nullptr); // There better be a cold section!
+
+            *ppStartLoc = new (this, CMK_UnwindInfo) emitLocation(ehEmitCookie(fgFirstColdBlock));
+            *ppEndLoc   = nullptr; // nullptr end location means the end of the code
+        }
+    }
+    else
+    {
+        assert(getHotSectionData); // TODO-CQ: support funclets in cold section
+
+        EHblkDsc* HBtab = ehGetDsc(func->funEHIndex);
+
+        if (func->funKind == FUNC_FILTER)
+        {
+            assert(HBtab->HasFilter());
+            *ppStartLoc = new (this, CMK_UnwindInfo) emitLocation(ehEmitCookie(HBtab->ebdFilter));
+            *ppEndLoc   = new (this, CMK_UnwindInfo) emitLocation(ehEmitCookie(HBtab->ebdHndBeg));
+        }
+        else
+        {
+            assert(func->funKind == FUNC_HANDLER);
+            *ppStartLoc = new (this, CMK_UnwindInfo) emitLocation(ehEmitCookie(HBtab->ebdHndBeg));
+            *ppEndLoc   = (HBtab->ebdHndLast->bbNext == nullptr)
+                            ? nullptr
+                            : new (this, CMK_UnwindInfo) emitLocation(ehEmitCookie(HBtab->ebdHndLast->bbNext));
+        }
+    }
+}
+
+#endif // FEATURE_EH_FUNCLETS
+
+#if defined(_TARGET_AMD64_)
+
+// See unwindAmd64.cpp
+
+#elif defined(_TARGET_ARM64_)
+
+// See unwindArm64.cpp
+
+#elif defined(_TARGET_ARM_)
+
+// See unwindArm.cpp
+
+#elif defined(_TARGET_X86_)
+
+// Stub routines that do nothing
+void Compiler::unwindBegProlog()
+{
+}
+void Compiler::unwindEndProlog()
+{
+}
+void Compiler::unwindBegEpilog()
+{
+}
+void Compiler::unwindEndEpilog()
+{
+}
+void Compiler::unwindReserve()
+{
+}
+void Compiler::unwindEmit(void* pHotCode, void* pColdCode)
+{
+}
+void Compiler::unwindPush(regNumber reg)
+{
+}
+void Compiler::unwindAllocStack(unsigned size)
+{
+}
+void Compiler::unwindSetFrameReg(regNumber reg, unsigned offset)
+{
+}
+void Compiler::unwindSaveReg(regNumber reg, unsigned offset)
+{
+}
+
+#else // _TARGET_*
+
+#error Unsupported or unset target architecture
+
+#endif // _TARGET_*
diff --git a/src/jit/unwind.h b/src/jit/unwind.h
new file mode 100644
index 0000000000..27d23b1b54
--- /dev/null
+++ b/src/jit/unwind.h
@@ -0,0 +1,852 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                              Unwind Info                                  XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#ifdef _TARGET_ARMARCH_
+
+// Windows no longer imposes a maximum prolog size. However, we still have an
+// assert here just to inform us if we increase the size of the prolog
+// accidentally, as there is still a slight performance advantage in the
+// OS unwinder to having as few unwind codes as possible.
+// You can increase this "max" number if necessary.
+
+#if defined(_TARGET_ARM_)
+const unsigned MAX_PROLOG_SIZE_BYTES = 40;
+const unsigned MAX_EPILOG_SIZE_BYTES = 40;
+#define UWC_END 0xFF // "end" unwind code
+#define UW_MAX_FRAGMENT_SIZE_BYTES (1U << 19)
+#define UW_MAX_CODE_WORDS_COUNT 15      // Max number that can be encoded in the "Code Words" field of the .pdata record
+#define UW_MAX_EPILOG_START_INDEX 0xFFU // Max number that can be encoded in the "Epilog Start Index" field
+                                        // of the .pdata record
+#elif defined(_TARGET_ARM64_)
+const unsigned MAX_PROLOG_SIZE_BYTES = 100;
+const unsigned MAX_EPILOG_SIZE_BYTES = 100;
+#define UWC_END 0xE4   // "end" unwind code
+#define UWC_END_C 0xE5 // "end_c" unwind code
+#define UW_MAX_FRAGMENT_SIZE_BYTES (1U << 20)
+#define UW_MAX_CODE_WORDS_COUNT 31
+#define UW_MAX_EPILOG_START_INDEX 0x3FFU
+#endif // _TARGET_ARM64_
+
+#define UW_MAX_EPILOG_COUNT 31                 // Max number that can be encoded in the "Epilog count" field
+                                               // of the .pdata record
+#define UW_MAX_EXTENDED_CODE_WORDS_COUNT 0xFFU // Max number that can be encoded in the "Extended Code Words"
+                                               // field of the .pdata record
+#define UW_MAX_EXTENDED_EPILOG_COUNT 0xFFFFU   // Max number that can be encoded in the "Extended Epilog Count"
+                                               // field of the .pdata record
+#define UW_MAX_EPILOG_START_OFFSET 0x3FFFFU    // Max number that can be encoded in the "Epilog Start Offset"
+                                               // field of the .pdata record
+
+//
+// Forward declaration of class defined in emit.h
+//
+
+class emitLocation;
+
+//
+// Forward declarations of classes defined in this file
+//
+
+class UnwindCodesBase;
+class UnwindPrologCodes;
+class UnwindEpilogCodes;
+class UnwindEpilogInfo;
+class UnwindFragmentInfo;
+class UnwindInfo;
+
+// UnwindBase: A base class shared by the the unwind classes that require
+// a Compiler* for memory allocation.
+
+class UnwindBase
+{
+protected:
+    UnwindBase(Compiler* comp) : uwiComp(comp)
+    {
+    }
+
+    UnwindBase()
+    {
+    }
+    ~UnwindBase()
+    {
+    }
+
+// TODO: How do we get the ability to access uwiComp without error on Clang?
+#if defined(DEBUG) && !defined(__GNUC__)
+
+    template <typename T>
+    T dspPtr(T p)
+    {
+        return uwiComp->dspPtr(p);
+    }
+
+    template <typename T>
+    T dspOffset(T o)
+    {
+        return uwiComp->dspOffset(o);
+    }
+
+    static char* dspBool(bool b)
+    {
+        return (b) ? "true" : "false";
+    }
+
+#endif // DEBUG
+
+    //
+    // Data
+    //
+
+    Compiler* uwiComp;
+};
+
+// UnwindCodesBase: A base class shared by the the classes used to represent the prolog
+// and epilog unwind codes.
+
+class UnwindCodesBase
+{
+public:
+    // Add a single unwind code.
+
+    virtual void AddCode(BYTE b1) = 0;
+    virtual void AddCode(BYTE b1, BYTE b2) = 0;
+    virtual void AddCode(BYTE b1, BYTE b2, BYTE b3) = 0;
+    virtual void AddCode(BYTE b1, BYTE b2, BYTE b3, BYTE b4) = 0;
+
+    // Get access to the unwind codes
+
+    virtual BYTE* GetCodes() = 0;
+
+    bool IsEndCode(BYTE b)
+    {
+#if defined(_TARGET_ARM_)
+        return b >= 0xFD;
+#elif defined(_TARGET_ARM64_)
+        return (b == UWC_END); // TODO-ARM64-Bug?: what about the "end_c" code?
+#endif // _TARGET_ARM64_
+    }
+
+#ifdef DEBUG
+
+    unsigned GetCodeSizeFromUnwindCodes(bool isProlog);
+
+#endif // DEBUG
+};
+
+// UnwindPrologCodes: represents the unwind codes for a prolog sequence.
+// Prolog unwind codes arrive in reverse order from how they will be emitted.
+// Store them as a stack, storing from the end of an array towards the beginning.
+// This class is also re-used as the final location of the consolidated unwind
+// information for a function, including unwind info header, the prolog codes,
+// and any epilog codes.
+
+class UnwindPrologCodes : public UnwindBase, public UnwindCodesBase
+{
+    // UPC_LOCAL_COUNT is the amount of memory local to this class. For ARM mscorlib.dll, the maximum size is 34.
+    // Here is a histogram of other interesting sizes:
+    //     <=16  79%
+    //     <=24  96%
+    //     <=32  99%
+    // From this data, we choose to use 24.
+
+    static const int UPC_LOCAL_COUNT = 24;
+
+public:
+    UnwindPrologCodes(Compiler* comp)
+        : UnwindBase(comp)
+        , upcMem(upcMemLocal)
+        , upcMemSize(UPC_LOCAL_COUNT)
+        , upcCodeSlot(UPC_LOCAL_COUNT)
+        , upcHeaderSlot(-1)
+        , upcEpilogSlot(-1)
+    {
+        // Assume we've got a normal end code.
+        // Push four so we can generate an array that is a multiple of 4 bytes in size with the
+        // end codes (and padding) already in place. One is the end code for the prolog codes,
+        // three are end-of-array alignment padding.
+        PushByte(UWC_END);
+        PushByte(UWC_END);
+        PushByte(UWC_END);
+        PushByte(UWC_END);
+    }
+
+    //
+    // Implementation of UnwindCodesBase
+    //
+
+    virtual void AddCode(BYTE b1)
+    {
+        PushByte(b1);
+    }
+
+    virtual void AddCode(BYTE b1, BYTE b2)
+    {
+        PushByte(b2);
+        PushByte(b1);
+    }
+
+    virtual void AddCode(BYTE b1, BYTE b2, BYTE b3)
+    {
+        PushByte(b3);
+        PushByte(b2);
+        PushByte(b1);
+    }
+
+    virtual void AddCode(BYTE b1, BYTE b2, BYTE b3, BYTE b4)
+    {
+        PushByte(b4);
+        PushByte(b3);
+        PushByte(b2);
+        PushByte(b1);
+    }
+
+    // Return a pointer to the first unwind code byte
+    virtual BYTE* GetCodes()
+    {
+        assert(upcCodeSlot < upcMemSize); // There better be at least one code!
+        return &upcMem[upcCodeSlot];
+    }
+
+    ///////////////////////////////////////////////////////////////////////////
+
+    BYTE GetByte(int index)
+    {
+        assert(upcCodeSlot <= index && index < upcMemSize);
+        return upcMem[index];
+    }
+
+    // Push a single byte on the unwind code stack
+    void PushByte(BYTE b)
+    {
+        if (upcCodeSlot == 0)
+        {
+            // We've run out of space! Reallocate, and copy everything to a new array.
+            EnsureSize(upcMemSize + 1);
+        }
+
+        --upcCodeSlot;
+        noway_assert(0 <= upcCodeSlot && upcCodeSlot < upcMemSize);
+
+        upcMem[upcCodeSlot] = b;
+    }
+
+    // Return the size of the unwind codes, in bytes. The size is the exact size, not an aligned size.
+    // The size includes exactly one "end" code.
+    int Size()
+    {
+        // -3 because we put 4 "end" codes at the end in the constructor, and we shouldn't count that here
+        return upcMemSize - upcCodeSlot - 3;
+    }
+
+    void SetFinalSize(int headerBytes, int epilogBytes);
+
+    void AddHeaderWord(DWORD d);
+
+    void GetFinalInfo(/* OUT */ BYTE** ppUnwindBlock, /* OUT */ ULONG* pUnwindBlockSize);
+
+    // AppendEpilog: copy the epilog bytes to the next epilog bytes slot
+    void AppendEpilog(UnwindEpilogInfo* pEpi);
+
+    // Match the prolog codes to a set of epilog codes
+    int Match(UnwindEpilogInfo* pEpi);
+
+    // Copy the prolog codes from another prolog
+    void CopyFrom(UnwindPrologCodes* pCopyFrom);
+
+    UnwindPrologCodes()
+    {
+    }
+    ~UnwindPrologCodes()
+    {
+    }
+
+#ifdef DEBUG
+    void Dump(int indent = 0);
+#endif // DEBUG
+
+private:
+    void EnsureSize(int requiredSize);
+
+    // No copy constructor or operator=
+    UnwindPrologCodes(const UnwindPrologCodes& info);
+    UnwindPrologCodes& operator=(const UnwindPrologCodes&);
+
+    //
+    // Data
+    //
+
+    // To store the unwind codes, we first use a local array that should satisfy almost all cases.
+    // If there are more unwind codes, we dynamically allocate memory.
+    BYTE  upcMemLocal[UPC_LOCAL_COUNT];
+    BYTE* upcMem;
+
+    // upcMemSize is the number of bytes in upcMem. This is equal to UPC_LOCAL_COUNT unless
+    // we've dynamically allocated memory to store the codes.
+    int upcMemSize;
+
+    // upcCodeSlot points to the last unwind code added to the array. The array is filled in from
+    // the end, so it starts pointing one beyond the array end.
+    int upcCodeSlot;
+
+    // upcHeaderSlot points to the last header byte prepended to the array. Headers bytes are
+    // filled in from the beginning, and only after SetFinalSize() is called.
+    int upcHeaderSlot;
+
+    // upcEpilogSlot points to the next epilog location to fill
+    int upcEpilogSlot;
+
+    // upcUnwindBlockSlot is only set after SetFinalSize() is called. It is the index of the first
+    // byte of the final unwind data, namely the first byte of the header.
+    int upcUnwindBlockSlot;
+};
+
+// UnwindEpilogCodes: represents the unwind codes for a single epilog sequence.
+// Epilog unwind codes arrive in the order they will be emitted. Store them as an array,
+// adding new ones to the end of the array.
+
+class UnwindEpilogCodes : public UnwindBase, public UnwindCodesBase
+{
+    // UEC_LOCAL_COUNT is the amount of memory local to this class. For ARM mscorlib.dll, the maximum size is 6,
+    // while 89% of epilogs fit in 4. So, set it to 4 to maintain array alignment and hit most cases.
+    static const int UEC_LOCAL_COUNT = 4;
+
+public:
+    UnwindEpilogCodes(Compiler* comp)
+        : UnwindBase(comp), uecMem(uecMemLocal), uecMemSize(UEC_LOCAL_COUNT), uecCodeSlot(-1), uecFinalized(false)
+    {
+    }
+
+    //
+    // Implementation of UnwindCodesBase
+    //
+
+    virtual void AddCode(BYTE b1)
+    {
+        AppendByte(b1);
+    }
+
+    virtual void AddCode(BYTE b1, BYTE b2)
+    {
+        AppendByte(b1);
+        AppendByte(b2);
+    }
+
+    virtual void AddCode(BYTE b1, BYTE b2, BYTE b3)
+    {
+        AppendByte(b1);
+        AppendByte(b2);
+        AppendByte(b3);
+    }
+
+    virtual void AddCode(BYTE b1, BYTE b2, BYTE b3, BYTE b4)
+    {
+        AppendByte(b1);
+        AppendByte(b2);
+        AppendByte(b3);
+        AppendByte(b4);
+    }
+
+    // Return a pointer to the first unwind code byte
+    virtual BYTE* GetCodes()
+    {
+        assert(uecFinalized);
+
+        // Codes start at the beginning
+        return uecMem;
+    }
+
+    ///////////////////////////////////////////////////////////////////////////
+
+    BYTE GetByte(int index)
+    {
+        assert(0 <= index && index <= uecCodeSlot);
+        return uecMem[index];
+    }
+
+    // Add a single byte on the unwind code array
+    void AppendByte(BYTE b)
+    {
+        if (uecCodeSlot == uecMemSize - 1)
+        {
+            // We've run out of space! Reallocate, and copy everything to a new array.
+            EnsureSize(uecMemSize + 1);
+        }
+
+        ++uecCodeSlot;
+        noway_assert(0 <= uecCodeSlot && uecCodeSlot < uecMemSize);
+
+        uecMem[uecCodeSlot] = b;
+    }
+
+    // Return the size of the unwind codes, in bytes. The size is the exact size, not an aligned size.
+    int Size()
+    {
+        if (uecFinalized)
+        {
+            // Add one because uecCodeSlot is 0-based
+            return uecCodeSlot + 1;
+        }
+        else
+        {
+            // Add one because uecCodeSlot is 0-based, and one for an "end" code that isn't stored (yet).
+            return uecCodeSlot + 2;
+        }
+    }
+
+    void FinalizeCodes()
+    {
+        assert(!uecFinalized);
+        noway_assert(0 <= uecCodeSlot && uecCodeSlot < uecMemSize); // There better be at least one code!
+        BYTE lastCode = uecMem[uecCodeSlot];
+        if (!IsEndCode(lastCode)) // If the last code is an end code, we don't need to append one.
+        {
+            AppendByte(UWC_END); // Add a default "end" code to the end of the array of unwind codes
+        }
+        uecFinalized = true; // With the "end" code in place, now we're done
+
+#ifdef DEBUG
+        unsigned codeSize = GetCodeSizeFromUnwindCodes(false);
+        assert(codeSize <= MAX_EPILOG_SIZE_BYTES);
+#endif // DEBUG
+    }
+
+    UnwindEpilogCodes()
+    {
+    }
+    ~UnwindEpilogCodes()
+    {
+    }
+
+#ifdef DEBUG
+    void Dump(int indent = 0);
+#endif // DEBUG
+
+private:
+    void EnsureSize(int requiredSize);
+
+    // No destructor, copy constructor or operator=
+    UnwindEpilogCodes(const UnwindEpilogCodes& info);
+    UnwindEpilogCodes& operator=(const UnwindEpilogCodes&);
+
+    //
+    // Data
+    //
+
+    // To store the unwind codes, we first use a local array that should satisfy almost all cases.
+    // If there are more unwind codes, we dynamically allocate memory.
+    BYTE  uecMemLocal[UEC_LOCAL_COUNT];
+    BYTE* uecMem;
+
+    // uecMemSize is the number of bytes/slots in uecMem. This is equal to UEC_LOCAL_COUNT unless
+    // we've dynamically allocated memory to store the codes.
+    int uecMemSize;
+
+    // uecCodeSlot points to the last unwind code added to the array. The array is filled in from
+    // the beginning, so it starts at -1.
+    int uecCodeSlot;
+
+    // Is the unwind information finalized? Finalized info has an end code appended.
+    bool uecFinalized;
+};
+
+// UnwindEpilogInfo: represents the unwind information for a single epilog sequence. Epilogs for a
+// single function/funclet are in a linked list.
+
+class UnwindEpilogInfo : public UnwindBase
+{
+    friend class UnwindFragmentInfo;
+
+    static const unsigned EPI_ILLEGAL_OFFSET = 0xFFFFFFFF;
+
+public:
+    UnwindEpilogInfo(Compiler* comp)
+        : UnwindBase(comp)
+        , epiNext(NULL)
+        , epiEmitLocation(NULL)
+        , epiCodes(comp)
+        , epiStartOffset(EPI_ILLEGAL_OFFSET)
+        , epiMatches(false)
+        , epiStartIndex(-1)
+    {
+    }
+
+    void CaptureEmitLocation();
+
+    void FinalizeOffset();
+
+    void FinalizeCodes()
+    {
+        epiCodes.FinalizeCodes();
+    }
+
+    UNATIVE_OFFSET GetStartOffset()
+    {
+        assert(epiStartOffset != EPI_ILLEGAL_OFFSET);
+        return epiStartOffset;
+    }
+
+    int GetStartIndex()
+    {
+        assert(epiStartIndex != -1);
+        return epiStartIndex; // The final "Epilog Start Index" of this epilog's unwind codes
+    }
+
+    void SetStartIndex(int index)
+    {
+        assert(epiStartIndex == -1);
+        epiStartIndex = (int)index;
+    }
+
+    void SetMatches()
+    {
+        epiMatches = true;
+    }
+
+    bool Matches()
+    {
+        return epiMatches;
+    }
+
+    // Size of epilog unwind codes in bytes
+    int Size()
+    {
+        return epiCodes.Size();
+    }
+
+    // Return a pointer to the first unwind code byte
+    BYTE* GetCodes()
+    {
+        return epiCodes.GetCodes();
+    }
+
+    // Match the codes to a set of epilog codes
+    int Match(UnwindEpilogInfo* pEpi);
+
+    UnwindEpilogInfo()
+    {
+    }
+    ~UnwindEpilogInfo()
+    {
+    }
+
+#ifdef DEBUG
+    void Dump(int indent = 0);
+#endif // DEBUG
+
+private:
+    // No copy constructor or operator=
+    UnwindEpilogInfo(const UnwindEpilogInfo& info);
+    UnwindEpilogInfo& operator=(const UnwindEpilogInfo&);
+
+    //
+    // Data
+    //
+
+    UnwindEpilogInfo* epiNext;
+    emitLocation*     epiEmitLocation; // The emitter location of the beginning of the epilog
+    UnwindEpilogCodes epiCodes;
+    UNATIVE_OFFSET    epiStartOffset; // Actual offset of the epilog, in bytes, from the start of the function. Set in
+                                      // FinalizeOffset().
+    bool epiMatches;   // Do the epilog unwind codes match some other set of codes? If so, we don't copy these to the
+                       // final set; we just point to another set.
+    int epiStartIndex; // The final "Epilog Start Index" of this epilog's unwind codes
+};
+
+// UnwindFragmentInfo: represents all the unwind information for a single fragment of a function or funclet.
+// A fragment is a section with a code size less than the maximum unwind code size: either 512K bytes, or
+// that specified by COMPlus_JitSplitFunctionSize. In most cases, there will be exactly one fragment.
+
+class UnwindFragmentInfo : public UnwindBase
+{
+    friend class UnwindInfo;
+
+    static const unsigned UFI_ILLEGAL_OFFSET = 0xFFFFFFFF;
+
+public:
+    UnwindFragmentInfo(Compiler* comp, emitLocation* emitLoc, bool hasPhantomProlog);
+
+    void FinalizeOffset();
+
+    UNATIVE_OFFSET GetStartOffset()
+    {
+        assert(ufiStartOffset != UFI_ILLEGAL_OFFSET);
+        return ufiStartOffset;
+    }
+
+    // Add an unwind code. It could be for a prolog, or for the current epilog.
+    // A single unwind code can be from 1 to 4 bytes.
+
+    void AddCode(BYTE b1)
+    {
+        assert(ufiInitialized == UFI_INITIALIZED_PATTERN);
+        ufiCurCodes->AddCode(b1);
+    }
+
+    void AddCode(BYTE b1, BYTE b2)
+    {
+        assert(ufiInitialized == UFI_INITIALIZED_PATTERN);
+        ufiCurCodes->AddCode(b1, b2);
+    }
+
+    void AddCode(BYTE b1, BYTE b2, BYTE b3)
+    {
+        assert(ufiInitialized == UFI_INITIALIZED_PATTERN);
+        ufiCurCodes->AddCode(b1, b2, b3);
+    }
+
+    void AddCode(BYTE b1, BYTE b2, BYTE b3, BYTE b4)
+    {
+        assert(ufiInitialized == UFI_INITIALIZED_PATTERN);
+        ufiCurCodes->AddCode(b1, b2, b3, b4);
+    }
+
+    unsigned EpilogCount()
+    {
+        unsigned count = 0;
+        for (UnwindEpilogInfo* pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext)
+        {
+            ++count;
+        }
+        return count;
+    }
+
+    void AddEpilog();
+
+    void MergeCodes();
+
+    void CopyPrologCodes(UnwindFragmentInfo* pCopyFrom);
+
+    void SplitEpilogCodes(emitLocation* emitLoc, UnwindFragmentInfo* pSplitFrom);
+
+    bool IsAtFragmentEnd(UnwindEpilogInfo* pEpi);
+
+    // Return the full, final size of unwind block. This will be used to allocate memory for
+    // the unwind block. This is called before the code offsets are finalized.
+    // Size is in bytes.
+    ULONG Size()
+    {
+        assert(ufiSize != 0);
+        return ufiSize;
+    }
+
+    void Finalize(UNATIVE_OFFSET functionLength);
+
+    // GetFinalInfo: return a pointer to the final unwind info to hand to the VM, and the size of this info in bytes
+    void GetFinalInfo(/* OUT */ BYTE** ppUnwindBlock, /* OUT */ ULONG* pUnwindBlockSize)
+    {
+        ufiPrologCodes.GetFinalInfo(ppUnwindBlock, pUnwindBlockSize);
+    }
+
+    void Reserve(BOOL isFunclet, bool isHotCode);
+
+    void Allocate(
+        CorJitFuncKind funKind, void* pHotCode, void* pColdCode, UNATIVE_OFFSET funcEndOffset, bool isHotCode);
+
+    UnwindFragmentInfo()
+    {
+    }
+    ~UnwindFragmentInfo()
+    {
+    }
+
+#ifdef DEBUG
+    void Dump(int indent = 0);
+#endif // DEBUG
+
+private:
+    // No copy constructor or operator=
+    UnwindFragmentInfo(const UnwindFragmentInfo& info);
+    UnwindFragmentInfo& operator=(const UnwindFragmentInfo&);
+
+    //
+    // Data
+    //
+
+    UnwindFragmentInfo* ufiNext;             // The next fragment
+    emitLocation*       ufiEmitLoc;          // Emitter location for start of fragment
+    bool                ufiHasPhantomProlog; // Are the prolog codes for a phantom prolog, or a real prolog?
+                                             //   (For a phantom prolog, this code fragment represents a fragment in
+                                             //   the sense of the unwind info spec; something without a real prolog.)
+    UnwindPrologCodes ufiPrologCodes;        // The unwind codes for the prolog
+    UnwindEpilogInfo  ufiEpilogFirst;        // In-line the first epilog to avoid separate memory allocation, since
+                                             //   almost all functions will have at least one epilog. It is pointed
+                                             //   to by ufiEpilogList when the first epilog is added.
+    UnwindEpilogInfo* ufiEpilogList;         // The head of the epilog list
+    UnwindEpilogInfo* ufiEpilogLast;         // The last entry in the epilog list (the last epilog added)
+    UnwindCodesBase*  ufiCurCodes;           // Pointer to current unwind codes, either prolog or epilog
+
+    // Some data computed when merging the unwind codes, and used when finalizing the
+    // unwind block for emission.
+    unsigned       ufiSize; // The size of the unwind data for this fragment, in bytes
+    bool           ufiSetEBit;
+    bool           ufiNeedExtendedCodeWordsEpilogCount;
+    unsigned       ufiCodeWords;
+    unsigned       ufiEpilogScopes;
+    UNATIVE_OFFSET ufiStartOffset;
+
+#ifdef DEBUG
+
+    unsigned ufiNum;
+
+    // Are we processing the prolog? The prolog must come first, followed by a (possibly empty)
+    // set of epilogs, for this function/funclet.
+    bool ufiInProlog;
+
+    static const unsigned UFI_INITIALIZED_PATTERN = 0x0FACADE0; // Something unlikely to be the fill pattern for
+                                                                // uninitialized memory
+    unsigned ufiInitialized;
+
+#endif // DEBUG
+};
+
+// UnwindInfo: represents all the unwind information for a single function or funclet
+
+class UnwindInfo : public UnwindBase
+{
+public:
+    void InitUnwindInfo(Compiler* comp, emitLocation* startLoc, emitLocation* endLoc);
+
+    void HotColdSplitCodes(UnwindInfo* puwi);
+
+    // The following act on all the fragments that make up the unwind info for this function or funclet.
+
+    void Split();
+
+    static void EmitSplitCallback(void* context, emitLocation* emitLoc);
+
+    void Reserve(BOOL isFunclet, bool isHotCode);
+
+    void Allocate(CorJitFuncKind funKind, void* pHotCode, void* pColdCode, bool isHotCode);
+
+    // The following act on the current fragment (the one pointed to by 'uwiFragmentLast').
+
+    // Add an unwind code. It could be for a prolog, or for the current epilog.
+    // A single unwind code can be from 1 to 4 bytes.
+
+    void AddCode(BYTE b1)
+    {
+        assert(uwiInitialized == UWI_INITIALIZED_PATTERN);
+        assert(uwiFragmentLast != NULL);
+        INDEBUG(CheckOpsize(b1));
+
+        uwiFragmentLast->AddCode(b1);
+        CaptureLocation();
+    }
+
+    void AddCode(BYTE b1, BYTE b2)
+    {
+        assert(uwiInitialized == UWI_INITIALIZED_PATTERN);
+        assert(uwiFragmentLast != NULL);
+        INDEBUG(CheckOpsize(b1));
+
+        uwiFragmentLast->AddCode(b1, b2);
+        CaptureLocation();
+    }
+
+    void AddCode(BYTE b1, BYTE b2, BYTE b3)
+    {
+        assert(uwiInitialized == UWI_INITIALIZED_PATTERN);
+        assert(uwiFragmentLast != NULL);
+        INDEBUG(CheckOpsize(b1));
+
+        uwiFragmentLast->AddCode(b1, b2, b3);
+        CaptureLocation();
+    }
+
+    void AddCode(BYTE b1, BYTE b2, BYTE b3, BYTE b4)
+    {
+        assert(uwiInitialized == UWI_INITIALIZED_PATTERN);
+        assert(uwiFragmentLast != NULL);
+        INDEBUG(CheckOpsize(b1));
+
+        uwiFragmentLast->AddCode(b1, b2, b3, b4);
+        CaptureLocation();
+    }
+
+    void AddEpilog();
+
+    emitLocation* GetCurrentEmitterLocation()
+    {
+        return uwiCurLoc;
+    }
+
+#if defined(_TARGET_ARM_)
+    unsigned GetInstructionSize();
+#endif // defined(_TARGET_ARM_)
+
+    void CaptureLocation();
+
+    UnwindInfo()
+    {
+    }
+    ~UnwindInfo()
+    {
+    }
+
+#ifdef DEBUG
+
+#if defined(_TARGET_ARM_)
+    // Given the first byte of the unwind code, check that its opsize matches
+    // the last instruction added in the emitter.
+    void CheckOpsize(BYTE b1);
+#elif defined(_TARGET_ARM64_)
+    void CheckOpsize(BYTE b1)
+    {
+    } // nothing to do; all instructions are 4 bytes
+#endif // defined(_TARGET_ARM64_)
+
+    void Dump(bool isHotCode, int indent = 0);
+
+    bool uwiAddingNOP;
+
+#endif // DEBUG
+
+private:
+    void AddFragment(emitLocation* emitLoc);
+
+    // No copy constructor or operator=
+    UnwindInfo(const UnwindInfo& info);
+    UnwindInfo& operator=(const UnwindInfo&);
+
+    //
+    // Data
+    //
+
+    UnwindFragmentInfo uwiFragmentFirst; // The first fragment is directly here, so it doesn't need to be separately
+                                         // allocated.
+    UnwindFragmentInfo* uwiFragmentLast; // The last entry in the fragment list (the last fragment added)
+    emitLocation*       uwiEndLoc;       // End emitter location of this function/funclet (NULL == end of all code)
+    emitLocation*       uwiCurLoc; // The current emitter location (updated after an unwind code is added), used for NOP
+                                   // padding, and asserts.
+
+#ifdef DEBUG
+
+    static const unsigned UWI_INITIALIZED_PATTERN = 0x0FACADE1; // Something unlikely to be the fill pattern for
+                                                                // uninitialized memory
+    unsigned uwiInitialized;
+
+#endif // DEBUG
+};
+
+#ifdef DEBUG
+
+// Forward declaration
+void DumpUnwindInfo(Compiler*         comp,
+                    bool              isHotCode,
+                    UNATIVE_OFFSET    startOffset,
+                    UNATIVE_OFFSET    endOffset,
+                    const BYTE* const pHeader,
+                    ULONG             unwindBlockSize);
+
+#endif // DEBUG
+
+#endif // _TARGET_ARMARCH_
diff --git a/src/jit/unwindamd64.cpp b/src/jit/unwindamd64.cpp
new file mode 100644
index 0000000000..89abdff2b3
--- /dev/null
+++ b/src/jit/unwindamd64.cpp
@@ -0,0 +1,1056 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                              UnwindInfo                                   XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#if defined(_TARGET_AMD64_)
+#ifdef UNIX_AMD64_ABI
+int Compiler::mapRegNumToDwarfReg(regNumber reg)
+{
+    int dwarfReg = DWARF_REG_ILLEGAL;
+
+    switch (reg)
+    {
+        case REG_RAX:
+            dwarfReg = 0;
+            break;
+        case REG_RCX:
+            dwarfReg = 2;
+            break;
+        case REG_RDX:
+            dwarfReg = 1;
+            break;
+        case REG_RBX:
+            dwarfReg = 3;
+            break;
+        case REG_RSP:
+            dwarfReg = 7;
+            break;
+        case REG_RBP:
+            dwarfReg = 6;
+            break;
+        case REG_RSI:
+            dwarfReg = 4;
+            break;
+        case REG_RDI:
+            dwarfReg = 5;
+            break;
+        case REG_R8:
+            dwarfReg = 8;
+            break;
+        case REG_R9:
+            dwarfReg = 9;
+            break;
+        case REG_R10:
+            dwarfReg = 10;
+            break;
+        case REG_R11:
+            dwarfReg = 11;
+            break;
+        case REG_R12:
+            dwarfReg = 12;
+            break;
+        case REG_R13:
+            dwarfReg = 13;
+            break;
+        case REG_R14:
+            dwarfReg = 14;
+            break;
+        case REG_R15:
+            dwarfReg = 15;
+            break;
+        case REG_XMM0:
+            dwarfReg = 17;
+            break;
+        case REG_XMM1:
+            dwarfReg = 18;
+            break;
+        case REG_XMM2:
+            dwarfReg = 19;
+            break;
+        case REG_XMM3:
+            dwarfReg = 20;
+            break;
+        case REG_XMM4:
+            dwarfReg = 21;
+            break;
+        case REG_XMM5:
+            dwarfReg = 22;
+            break;
+        case REG_XMM6:
+            dwarfReg = 23;
+            break;
+        case REG_XMM7:
+            dwarfReg = 24;
+            break;
+        case REG_XMM8:
+            dwarfReg = 25;
+            break;
+        case REG_XMM9:
+            dwarfReg = 26;
+            break;
+        case REG_XMM10:
+            dwarfReg = 27;
+            break;
+        case REG_XMM11:
+            dwarfReg = 28;
+            break;
+        case REG_XMM12:
+            dwarfReg = 29;
+            break;
+        case REG_XMM13:
+            dwarfReg = 30;
+            break;
+        case REG_XMM14:
+            dwarfReg = 31;
+            break;
+        case REG_XMM15:
+            dwarfReg = 32;
+            break;
+        default:
+            noway_assert(!"unexpected REG_NUM");
+    }
+
+    return dwarfReg;
+}
+
+void Compiler::createCfiCode(FuncInfoDsc* func, UCHAR codeOffset, UCHAR cfiOpcode, USHORT dwarfReg, INT offset)
+{
+    CFI_CODE cfiEntry(codeOffset, cfiOpcode, dwarfReg, offset);
+    func->cfiCodes->push_back(cfiEntry);
+}
+#endif // UNIX_AMD64_ABI
+
+//------------------------------------------------------------------------
+// Compiler::unwindGetCurrentOffset: Calculate the current byte offset of the
+// prolog being generated.
+//
+// Arguments:
+//    func - The main function or funclet of interest.
+//
+// Return Value:
+//    The byte offset of the prolog currently being generated.
+//
+UNATIVE_OFFSET Compiler::unwindGetCurrentOffset(FuncInfoDsc* func)
+{
+    assert(compGeneratingProlog);
+    UNATIVE_OFFSET offset;
+    if (func->funKind == FUNC_ROOT)
+    {
+        offset = genEmitter->emitGetPrologOffsetEstimate();
+    }
+    else
+    {
+        assert(func->startLoc != nullptr);
+        offset = func->startLoc->GetFuncletPrologOffset(genEmitter);
+    }
+
+    return offset;
+}
+
+//------------------------------------------------------------------------
+// Compiler::unwindBegProlog: Initialize the unwind info data structures.
+// Called at the beginning of main function or funclet prolog generation.
+//
+void Compiler::unwindBegProlog()
+{
+#ifdef UNIX_AMD64_ABI
+    if (generateCFIUnwindCodes())
+    {
+        unwindBegPrologCFI();
+    }
+    else
+#endif // UNIX_AMD64_ABI
+    {
+        unwindBegPrologWindows();
+    }
+}
+
+void Compiler::unwindBegPrologWindows()
+{
+    assert(compGeneratingProlog);
+
+    FuncInfoDsc* func = funCurrentFunc();
+
+    // There is only one prolog for a function/funclet, and it comes first. So now is
+    // a good time to initialize all the unwind data structures.
+
+    unwindGetFuncLocations(func, true, &func->startLoc, &func->endLoc);
+
+    if (fgFirstColdBlock != nullptr)
+    {
+        unwindGetFuncLocations(func, false, &func->coldStartLoc, &func->coldEndLoc);
+    }
+
+    func->unwindCodeSlot                  = sizeof(func->unwindCodes);
+    func->unwindHeader.Version            = 1;
+    func->unwindHeader.Flags              = 0;
+    func->unwindHeader.CountOfUnwindCodes = 0;
+    func->unwindHeader.FrameRegister      = 0;
+    func->unwindHeader.FrameOffset        = 0;
+}
+
+#ifdef UNIX_AMD64_ABI
+template <typename T>
+inline static T* allocate_any(jitstd::allocator<void>& alloc, size_t count = 5)
+{
+    return jitstd::allocator<T>(alloc).allocate(count);
+}
+typedef jitstd::vector<CFI_CODE> CFICodeVector;
+
+void Compiler::unwindBegPrologCFI()
+{
+    assert(compGeneratingProlog);
+
+    FuncInfoDsc* func = funCurrentFunc();
+
+    // There is only one prolog for a function/funclet, and it comes first. So now is
+    // a good time to initialize all the unwind data structures.
+
+    unwindGetFuncLocations(func, true, &func->startLoc, &func->endLoc);
+
+    if (fgFirstColdBlock != nullptr)
+    {
+        unwindGetFuncLocations(func, false, &func->coldStartLoc, &func->coldEndLoc);
+    }
+
+    jitstd::allocator<void> allocator(getAllocator());
+
+    func->cfiCodes = new (allocate_any<CFICodeVector>(allocator), jitstd::placement_t()) CFICodeVector(allocator);
+}
+#endif // UNIX_AMD64_ABI
+
+//------------------------------------------------------------------------
+// Compiler::unwindEndProlog: Called at the end of main function or funclet
+// prolog generation to indicate there is no more unwind information for this prolog.
+//
+void Compiler::unwindEndProlog()
+{
+    assert(compGeneratingProlog);
+}
+
+//------------------------------------------------------------------------
+// Compiler::unwindBegEpilog: Called at the beginning of main function or funclet
+// epilog generation.
+//
+void Compiler::unwindBegEpilog()
+{
+    assert(compGeneratingEpilog);
+}
+
+//------------------------------------------------------------------------
+// Compiler::unwindEndEpilog: Called at the end of main function or funclet
+// epilog generation.
+//
+void Compiler::unwindEndEpilog()
+{
+    assert(compGeneratingEpilog);
+}
+
+//------------------------------------------------------------------------
+// Compiler::unwindPush: Record a push/save of a register.
+//
+// Arguments:
+//    reg - The register being pushed/saved.
+//
+void Compiler::unwindPush(regNumber reg)
+{
+#ifdef UNIX_AMD64_ABI
+    if (generateCFIUnwindCodes())
+    {
+        unwindPushCFI(reg);
+    }
+    else
+#endif // UNIX_AMD64_ABI
+    {
+        unwindPushWindows(reg);
+    }
+}
+
+void Compiler::unwindPushWindows(regNumber reg)
+{
+    assert(compGeneratingProlog);
+
+    FuncInfoDsc* func = funCurrentFunc();
+
+    assert(func->unwindHeader.Version == 1);            // Can't call this before unwindBegProlog
+    assert(func->unwindHeader.CountOfUnwindCodes == 0); // Can't call this after unwindReserve
+    assert(func->unwindCodeSlot > sizeof(UNWIND_CODE));
+    UNWIND_CODE* code     = (UNWIND_CODE*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(UNWIND_CODE)];
+    unsigned int cbProlog = unwindGetCurrentOffset(func);
+    noway_assert((BYTE)cbProlog == cbProlog);
+    code->CodeOffset = (BYTE)cbProlog;
+
+    if ((RBM_CALLEE_SAVED & genRegMask(reg))
+#if ETW_EBP_FRAMED
+        // In case of ETW_EBP_FRAMED defined the REG_FPBASE (RBP)
+        // is excluded from the callee-save register list.
+        // Make sure the register gets PUSH unwind info in this case,
+        // since it is pushed as a frame register.
+        || (reg == REG_FPBASE)
+#endif // ETW_EBP_FRAMED
+            )
+    {
+        code->UnwindOp = UWOP_PUSH_NONVOL;
+        code->OpInfo   = (BYTE)reg;
+    }
+    else
+    {
+        // Push of a volatile register is just a small stack allocation
+        code->UnwindOp = UWOP_ALLOC_SMALL;
+        code->OpInfo   = 0;
+    }
+}
+
+#ifdef UNIX_AMD64_ABI
+void Compiler::unwindPushCFI(regNumber reg)
+{
+    assert(compGeneratingProlog);
+
+    FuncInfoDsc* func = funCurrentFunc();
+
+    unsigned int cbProlog = unwindGetCurrentOffset(func);
+    noway_assert((BYTE)cbProlog == cbProlog);
+
+    createCfiCode(func, cbProlog, CFI_ADJUST_CFA_OFFSET, DWARF_REG_ILLEGAL, 8);
+    if ((RBM_CALLEE_SAVED & genRegMask(reg))
+#if ETW_EBP_FRAMED
+        // In case of ETW_EBP_FRAMED defined the REG_FPBASE (RBP)
+        // is excluded from the callee-save register list.
+        // Make sure the register gets PUSH unwind info in this case,
+        // since it is pushed as a frame register.
+        || (reg == REG_FPBASE)
+#endif // ETW_EBP_FRAMED
+            )
+    {
+        createCfiCode(func, cbProlog, CFI_REL_OFFSET, mapRegNumToDwarfReg(reg));
+    }
+}
+#endif // UNIX_AMD64_ABI
+
+//------------------------------------------------------------------------
+// Compiler::unwindAllocStack: Record a stack frame allocation (sub sp, X).
+//
+// Arguments:
+//    size - The size of the stack frame allocation (the amount subtracted from the stack pointer).
+//
+void Compiler::unwindAllocStack(unsigned size)
+{
+#ifdef UNIX_AMD64_ABI
+    if (generateCFIUnwindCodes())
+    {
+        unwindAllocStackCFI(size);
+    }
+    else
+#endif // UNIX_AMD64_ABI
+    {
+        unwindAllocStackWindows(size);
+    }
+}
+
+void Compiler::unwindAllocStackWindows(unsigned size)
+{
+    assert(compGeneratingProlog);
+
+    FuncInfoDsc* func = funCurrentFunc();
+
+    assert(func->unwindHeader.Version == 1);            // Can't call this before unwindBegProlog
+    assert(func->unwindHeader.CountOfUnwindCodes == 0); // Can't call this after unwindReserve
+    assert(size % 8 == 0);                              // Stack size is *always* 8 byte aligned
+    UNWIND_CODE* code;
+    if (size <= 128)
+    {
+        assert(func->unwindCodeSlot > sizeof(UNWIND_CODE));
+        code           = (UNWIND_CODE*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(UNWIND_CODE)];
+        code->UnwindOp = UWOP_ALLOC_SMALL;
+        code->OpInfo   = (size - 8) / 8;
+    }
+    else if (size <= 0x7FFF8)
+    {
+        assert(func->unwindCodeSlot > (sizeof(UNWIND_CODE) + sizeof(USHORT)));
+        USHORT* codedSize = (USHORT*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(USHORT)];
+        *codedSize        = (USHORT)(size / 8);
+        code              = (UNWIND_CODE*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(UNWIND_CODE)];
+        code->UnwindOp    = UWOP_ALLOC_LARGE;
+        code->OpInfo      = 0;
+    }
+    else
+    {
+        assert(func->unwindCodeSlot > (sizeof(UNWIND_CODE) + sizeof(ULONG)));
+        ULONG* codedSize = (ULONG*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(ULONG)];
+        *codedSize       = size;
+        code             = (UNWIND_CODE*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(UNWIND_CODE)];
+        code->UnwindOp   = UWOP_ALLOC_LARGE;
+        code->OpInfo     = 1;
+    }
+    unsigned int cbProlog = unwindGetCurrentOffset(func);
+    noway_assert((BYTE)cbProlog == cbProlog);
+    code->CodeOffset = (BYTE)cbProlog;
+}
+
+#ifdef UNIX_AMD64_ABI
+void Compiler::unwindAllocStackCFI(unsigned size)
+{
+    assert(compGeneratingProlog);
+
+    FuncInfoDsc* func = funCurrentFunc();
+
+    unsigned int cbProlog = unwindGetCurrentOffset(func);
+    noway_assert((BYTE)cbProlog == cbProlog);
+    createCfiCode(func, cbProlog, CFI_ADJUST_CFA_OFFSET, DWARF_REG_ILLEGAL, size);
+}
+#endif // UNIX_AMD64_ABI
+
+//------------------------------------------------------------------------
+// Compiler::unwindSetFrameReg: Record a frame register.
+//
+// Arguments:
+//    reg    - The register being set as the frame register.
+//    offset - The offset from the current stack pointer that the frame pointer will point at.
+//
+void Compiler::unwindSetFrameReg(regNumber reg, unsigned offset)
+{
+#ifdef UNIX_AMD64_ABI
+    if (generateCFIUnwindCodes())
+    {
+        unwindSetFrameRegCFI(reg, offset);
+    }
+    else
+#endif // UNIX_AMD64_ABI
+    {
+        unwindSetFrameRegWindows(reg, offset);
+    }
+}
+
+void Compiler::unwindSetFrameRegWindows(regNumber reg, unsigned offset)
+{
+    assert(compGeneratingProlog);
+
+    FuncInfoDsc* func = funCurrentFunc();
+
+    assert(func->unwindHeader.Version == 1);            // Can't call this before unwindBegProlog
+    assert(func->unwindHeader.CountOfUnwindCodes == 0); // Can't call this after unwindReserve
+    unsigned int cbProlog = unwindGetCurrentOffset(func);
+    noway_assert((BYTE)cbProlog == cbProlog);
+
+    func->unwindHeader.FrameRegister = (BYTE)reg;
+
+#ifdef PLATFORM_UNIX
+    if (offset > 240)
+    {
+        // On Unix only, we have a CLR-only extension to the AMD64 unwind codes: UWOP_SET_FPREG_LARGE.
+        // It has a 32-bit offset (scaled). You must set UNWIND_INFO.FrameOffset to 15. The 32-bit
+        // offset follows in 2 UNWIND_CODE fields.
+
+        assert(func->unwindCodeSlot > (sizeof(UNWIND_CODE) + sizeof(ULONG)));
+        ULONG* codedSize = (ULONG*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(ULONG)];
+        assert(offset % 16 == 0);
+        *codedSize = offset / 16;
+
+        UNWIND_CODE* code              = (UNWIND_CODE*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(UNWIND_CODE)];
+        code->CodeOffset               = (BYTE)cbProlog;
+        code->OpInfo                   = 0;
+        code->UnwindOp                 = UWOP_SET_FPREG_LARGE;
+        func->unwindHeader.FrameOffset = 15;
+    }
+    else
+#endif // PLATFORM_UNIX
+    {
+        assert(func->unwindCodeSlot > sizeof(UNWIND_CODE));
+        UNWIND_CODE* code = (UNWIND_CODE*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(UNWIND_CODE)];
+        code->CodeOffset  = (BYTE)cbProlog;
+        code->OpInfo      = 0;
+        code->UnwindOp    = UWOP_SET_FPREG;
+        assert(offset <= 240);
+        assert(offset % 16 == 0);
+        func->unwindHeader.FrameOffset = offset / 16;
+    }
+}
+
+#ifdef UNIX_AMD64_ABI
+void Compiler::unwindSetFrameRegCFI(regNumber reg, unsigned offset)
+{
+    assert(compGeneratingProlog);
+    FuncInfoDsc* func = funCurrentFunc();
+
+    unsigned int cbProlog = unwindGetCurrentOffset(func);
+    noway_assert((BYTE)cbProlog == cbProlog);
+
+    createCfiCode(func, cbProlog, CFI_DEF_CFA_REGISTER, mapRegNumToDwarfReg(reg));
+    if (offset != 0)
+    {
+        createCfiCode(func, cbProlog, CFI_ADJUST_CFA_OFFSET, DWARF_REG_ILLEGAL, offset);
+    }
+}
+#endif // UNIX_AMD64_ABI
+
+//------------------------------------------------------------------------
+// Compiler::unwindSaveReg: Record a register save.
+//
+// Arguments:
+//    reg    - The register being saved.
+//    offset - The offset from the current stack pointer where the register is being saved.
+//
+void Compiler::unwindSaveReg(regNumber reg, unsigned offset)
+{
+#ifdef UNIX_AMD64_ABI
+    if (generateCFIUnwindCodes())
+    {
+        unwindSaveRegCFI(reg, offset);
+    }
+    else
+#endif // UNIX_AMD64_ABI
+    {
+        unwindSaveRegWindows(reg, offset);
+    }
+}
+
+void Compiler::unwindSaveRegWindows(regNumber reg, unsigned offset)
+{
+    assert(compGeneratingProlog);
+
+    FuncInfoDsc* func = funCurrentFunc();
+
+    assert(func->unwindHeader.Version == 1);            // Can't call this before unwindBegProlog
+    assert(func->unwindHeader.CountOfUnwindCodes == 0); // Can't call this after unwindReserve
+    if (RBM_CALLEE_SAVED & genRegMask(reg))
+    {
+        UNWIND_CODE* code;
+        if (offset < 0x80000)
+        {
+            assert(func->unwindCodeSlot > (sizeof(UNWIND_CODE) + sizeof(USHORT)));
+            USHORT* codedSize = (USHORT*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(USHORT)];
+            code              = (UNWIND_CODE*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(UNWIND_CODE)];
+
+            // As per AMD64 ABI, if saving entire xmm reg, then offset need to be scaled by 16.
+            if (genIsValidFloatReg(reg))
+            {
+                *codedSize     = (USHORT)(offset / 16);
+                code->UnwindOp = UWOP_SAVE_XMM128;
+            }
+            else
+            {
+                *codedSize     = (USHORT)(offset / 8);
+                code->UnwindOp = UWOP_SAVE_NONVOL;
+            }
+        }
+        else
+        {
+            assert(func->unwindCodeSlot > (sizeof(UNWIND_CODE) + sizeof(ULONG)));
+            ULONG* codedSize = (ULONG*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(ULONG)];
+            *codedSize       = offset;
+            code             = (UNWIND_CODE*)&func->unwindCodes[func->unwindCodeSlot -= sizeof(UNWIND_CODE)];
+            code->UnwindOp   = (genIsValidFloatReg(reg)) ? UWOP_SAVE_XMM128_FAR : UWOP_SAVE_NONVOL_FAR;
+        }
+        code->OpInfo          = (BYTE)reg;
+        unsigned int cbProlog = unwindGetCurrentOffset(func);
+        noway_assert((BYTE)cbProlog == cbProlog);
+        code->CodeOffset = (BYTE)cbProlog;
+    }
+}
+
+#ifdef UNIX_AMD64_ABI
+void Compiler::unwindSaveRegCFI(regNumber reg, unsigned offset)
+{
+    assert(compGeneratingProlog);
+
+    if (RBM_CALLEE_SAVED & genRegMask(reg))
+    {
+        FuncInfoDsc* func = funCurrentFunc();
+
+        unsigned int cbProlog = unwindGetCurrentOffset(func);
+        noway_assert((BYTE)cbProlog == cbProlog);
+        createCfiCode(func, cbProlog, CFI_REL_OFFSET, mapRegNumToDwarfReg(reg), offset);
+    }
+}
+#endif // UNIX_AMD64_ABI
+
+#ifdef DEBUG
+
+//------------------------------------------------------------------------
+// DumpUnwindInfo: Dump the unwind data.
+//
+// Arguments:
+//    isHotCode   - true if this unwind data is for the hot section, false otherwise.
+//    startOffset - byte offset of the code start that this unwind data represents.
+//    endOffset   - byte offset of the code end   that this unwind data represents.
+//    pHeader     - pointer to the unwind data blob.
+//
+void DumpUnwindInfo(bool                     isHotCode,
+                    UNATIVE_OFFSET           startOffset,
+                    UNATIVE_OFFSET           endOffset,
+                    const UNWIND_INFO* const pHeader)
+{
+    printf("Unwind Info%s:\n", isHotCode ? "" : " COLD");
+    printf("  >> Start offset   : 0x%06x (not in unwind data)\n", dspOffset(startOffset));
+    printf("  >>   End offset   : 0x%06x (not in unwind data)\n", dspOffset(endOffset));
+
+    if (pHeader == nullptr)
+    {
+        // Cold AMD64 code doesn't have unwind info; the VM creates chained unwind info.
+        assert(!isHotCode);
+        return;
+    }
+
+    printf("  Version           : %u\n", pHeader->Version);
+    printf("  Flags             : 0x%02x", pHeader->Flags);
+    if (pHeader->Flags)
+    {
+        const UCHAR flags = pHeader->Flags;
+        printf(" (");
+        if (flags & UNW_FLAG_EHANDLER)
+        {
+            printf(" UNW_FLAG_EHANDLER");
+        }
+        if (flags & UNW_FLAG_UHANDLER)
+        {
+            printf(" UNW_FLAG_UHANDLER");
+        }
+        if (flags & UNW_FLAG_CHAININFO)
+        {
+            printf(" UNW_FLAG_CHAININFO");
+        }
+        printf(")");
+    }
+    printf("\n");
+    printf("  SizeOfProlog      : 0x%02X\n", pHeader->SizeOfProlog);
+    printf("  CountOfUnwindCodes: %u\n", pHeader->CountOfUnwindCodes);
+    printf("  FrameRegister     : %s (%u)\n",
+           (pHeader->FrameRegister == 0) ? "none" : getRegName(pHeader->FrameRegister),
+           pHeader->FrameRegister); // RAX (0) is not allowed as a frame register
+    if (pHeader->FrameRegister == 0)
+    {
+        printf("  FrameOffset       : N/A (no FrameRegister) (Value=%u)\n", pHeader->FrameOffset);
+    }
+    else
+    {
+        printf("  FrameOffset       : %u * 16 = 0x%02X\n", pHeader->FrameOffset, pHeader->FrameOffset * 16);
+    }
+    printf("  UnwindCodes       :\n");
+
+    for (unsigned i = 0; i < pHeader->CountOfUnwindCodes; i++)
+    {
+        unsigned                 offset;
+        const UNWIND_CODE* const pCode = &(pHeader->UnwindCode[i]);
+        switch (pCode->UnwindOp)
+        {
+            case UWOP_PUSH_NONVOL:
+                printf("    CodeOffset: 0x%02X UnwindOp: UWOP_PUSH_NONVOL (%u)     OpInfo: %s (%u)\n",
+                       pCode->CodeOffset, pCode->UnwindOp, getRegName(pCode->OpInfo), pCode->OpInfo);
+                break;
+
+            case UWOP_ALLOC_LARGE:
+                printf("    CodeOffset: 0x%02X UnwindOp: UWOP_ALLOC_LARGE (%u)     OpInfo: %u - ", pCode->CodeOffset,
+                       pCode->UnwindOp, pCode->OpInfo);
+                if (pCode->OpInfo == 0)
+                {
+                    i++;
+                    printf("Scaled small  \n      Size: %u * 8 = %u = 0x%05X\n", pHeader->UnwindCode[i].FrameOffset,
+                           pHeader->UnwindCode[i].FrameOffset * 8, pHeader->UnwindCode[i].FrameOffset * 8);
+                }
+                else if (pCode->OpInfo == 1)
+                {
+                    i++;
+                    printf("Unscaled large\n      Size: %u = 0x%08X\n\n", *(ULONG*)&(pHeader->UnwindCode[i]),
+                           *(ULONG*)&(pHeader->UnwindCode[i]));
+                    i++;
+                }
+                else
+                {
+                    printf("Unknown\n");
+                }
+                break;
+
+            case UWOP_ALLOC_SMALL:
+                printf("    CodeOffset: 0x%02X UnwindOp: UWOP_ALLOC_SMALL (%u)     OpInfo: %u * 8 + 8 = %u = 0x%02X\n",
+                       pCode->CodeOffset, pCode->UnwindOp, pCode->OpInfo, pCode->OpInfo * 8 + 8, pCode->OpInfo * 8 + 8);
+                break;
+
+            case UWOP_SET_FPREG:
+                printf("    CodeOffset: 0x%02X UnwindOp: UWOP_SET_FPREG (%u)       OpInfo: Unused (%u)\n",
+                       pCode->CodeOffset, pCode->UnwindOp, pCode->OpInfo); // This should be zero
+                break;
+
+#ifdef PLATFORM_UNIX
+
+            case UWOP_SET_FPREG_LARGE:
+                printf("    CodeOffset: 0x%02X UnwindOp: UWOP_SET_FPREG_LARGE (%u) OpInfo: Unused (%u)\n",
+                       pCode->CodeOffset, pCode->UnwindOp, pCode->OpInfo); // This should be zero
+                i++;
+                offset = *(ULONG*)&(pHeader->UnwindCode[i]);
+                i++;
+                printf("      Scaled Offset: %u * 16 = %u = 0x%08X\n", offset, offset * 16, offset * 16);
+                if ((offset & 0xF0000000) != 0)
+                {
+                    printf("      Illegal unscaled offset: too large\n");
+                }
+                break;
+
+#endif // PLATFORM_UNIX
+
+            case UWOP_SAVE_NONVOL:
+                printf("    CodeOffset: 0x%02X UnwindOp: UWOP_SAVE_NONVOL (%u)     OpInfo: %s (%u)\n",
+                       pCode->CodeOffset, pCode->UnwindOp, getRegName(pCode->OpInfo), pCode->OpInfo);
+                i++;
+                printf("      Scaled Small Offset: %u * 8 = %u = 0x%05X\n", pHeader->UnwindCode[i].FrameOffset,
+                       pHeader->UnwindCode[i].FrameOffset * 8, pHeader->UnwindCode[i].FrameOffset * 8);
+                break;
+
+            case UWOP_SAVE_NONVOL_FAR:
+                printf("    CodeOffset: 0x%02X UnwindOp: UWOP_SAVE_NONVOL_FAR (%u) OpInfo: %s (%u)\n",
+                       pCode->CodeOffset, pCode->UnwindOp, getRegName(pCode->OpInfo), pCode->OpInfo);
+                i++;
+                printf("      Unscaled Large Offset: 0x%08X\n\n", *(ULONG*)&(pHeader->UnwindCode[i]));
+                i++;
+                break;
+
+            case UWOP_SAVE_XMM128:
+                printf("    CodeOffset: 0x%02X UnwindOp: UWOP_SAVE_XMM128 (%u)     OpInfo: XMM%u (%u)\n",
+                       pCode->CodeOffset, pCode->UnwindOp, pCode->OpInfo, pCode->OpInfo);
+                i++;
+                printf("      Scaled Small Offset: %u * 16 = %u = 0x%05X\n", pHeader->UnwindCode[i].FrameOffset,
+                       pHeader->UnwindCode[i].FrameOffset * 16, pHeader->UnwindCode[i].FrameOffset * 16);
+                break;
+
+            case UWOP_SAVE_XMM128_FAR:
+                printf("    CodeOffset: 0x%02X UnwindOp: UWOP_SAVE_XMM128_FAR (%u) OpInfo: XMM%u (%u)\n",
+                       pCode->CodeOffset, pCode->UnwindOp, pCode->OpInfo, pCode->OpInfo);
+                i++;
+                printf("      Unscaled Large Offset: 0x%08X\n\n", *(ULONG*)&(pHeader->UnwindCode[i]));
+                i++;
+                break;
+
+            case UWOP_EPILOG:
+            case UWOP_SPARE_CODE:
+            case UWOP_PUSH_MACHFRAME:
+            default:
+                printf("    Unrecognized UNWIND_CODE: 0x%04X\n", *(USHORT*)pCode);
+                break;
+        }
+    }
+}
+
+#ifdef UNIX_AMD64_ABI
+//------------------------------------------------------------------------
+// DumpCfiInfo: Dump the Cfi data.
+//
+// Arguments:
+//    isHotCode   - true if this cfi data is for the hot section, false otherwise.
+//    startOffset - byte offset of the code start that this cfi data represents.
+//    endOffset   - byte offset of the code end   that this cfi data represents.
+//    pcFiCode    - pointer to the cfi data blob.
+//
+void DumpCfiInfo(bool                  isHotCode,
+                 UNATIVE_OFFSET        startOffset,
+                 UNATIVE_OFFSET        endOffset,
+                 DWORD                 cfiCodeBytes,
+                 const CFI_CODE* const pCfiCode)
+{
+    printf("Cfi Info%s:\n", isHotCode ? "" : " COLD");
+    printf("  >> Start offset   : 0x%06x \n", dspOffset(startOffset));
+    printf("  >>   End offset   : 0x%06x \n", dspOffset(endOffset));
+
+    for (int i = 0; i < cfiCodeBytes / sizeof(CFI_CODE); i++)
+    {
+        const CFI_CODE* const pCode = &(pCfiCode[i]);
+
+        UCHAR codeOffset = pCode->CodeOffset;
+        SHORT dwarfReg   = pCode->DwarfReg;
+        INT   offset     = pCode->Offset;
+
+        switch (pCode->CfiOpCode)
+        {
+            case CFI_REL_OFFSET:
+                printf("    CodeOffset: 0x%02X Op: RelOffset DwarfReg:0x%x Offset:0x%X\n", codeOffset, dwarfReg,
+                       offset);
+                break;
+            case CFI_DEF_CFA_REGISTER:
+                assert(offset == 0);
+                printf("    CodeOffset: 0x%02X Op: DefCfaRegister DwarfReg:0x%X\n", codeOffset, dwarfReg);
+                break;
+            case CFI_ADJUST_CFA_OFFSET:
+                assert(dwarfReg == DWARF_REG_ILLEGAL);
+                printf("    CodeOffset: 0x%02X Op: AdjustCfaOffset Offset:0x%X\n", codeOffset, offset);
+                break;
+            default:
+                printf("    Unrecognized CFI_CODE: 0x%IX\n", *(UINT64*)pCode);
+                break;
+        }
+    }
+}
+#endif // UNIX_AMD64_ABI
+#endif // DEBUG
+
+//------------------------------------------------------------------------
+// Compiler::unwindReserve: Ask the VM to reserve space for the unwind information
+// for the function and all its funclets. Called once, just before asking the VM
+// for memory and emitting the generated code. Calls unwindReserveFunc() to handle
+// the main function and each of the funclets, in turn.
+//
+void Compiler::unwindReserve()
+{
+    assert(!compGeneratingProlog);
+    assert(!compGeneratingEpilog);
+
+    assert(compFuncInfoCount > 0);
+    for (unsigned funcIdx = 0; funcIdx < compFuncInfoCount; funcIdx++)
+    {
+        unwindReserveFunc(funGetFunc(funcIdx));
+    }
+}
+
+//------------------------------------------------------------------------
+// Compiler::unwindReserveFunc: Reserve the unwind information from the VM for a
+// given main function or funclet.
+//
+// Arguments:
+//    func - The main function or funclet to reserve unwind info for.
+//
+void Compiler::unwindReserveFunc(FuncInfoDsc* func)
+{
+    unwindReserveFuncHelper(func, true);
+
+    if (fgFirstColdBlock != nullptr)
+    {
+        unwindReserveFuncHelper(func, false);
+    }
+}
+
+//------------------------------------------------------------------------
+// Compiler::unwindReserveFuncHelper: Reserve the unwind information from the VM for a
+// given main function or funclet, for either the hot or the cold section.
+//
+// Arguments:
+//    func      - The main function or funclet to reserve unwind info for.
+//    isHotCode - 'true' to reserve the hot section, 'false' to reserve the cold section.
+//
+void Compiler::unwindReserveFuncHelper(FuncInfoDsc* func, bool isHotCode)
+{
+    DWORD unwindCodeBytes = 0;
+    if (isHotCode)
+    {
+#ifdef UNIX_AMD64_ABI
+        if (generateCFIUnwindCodes())
+        {
+            unwindCodeBytes = func->cfiCodes->size() * sizeof(CFI_CODE);
+        }
+        else
+#endif // UNIX_AMD64_ABI
+        {
+            assert(func->unwindHeader.Version == 1);            // Can't call this before unwindBegProlog
+            assert(func->unwindHeader.CountOfUnwindCodes == 0); // Only call this once per prolog
+
+            // Set the size of the prolog to be the last encoded action
+            if (func->unwindCodeSlot < sizeof(func->unwindCodes))
+            {
+                UNWIND_CODE* code               = (UNWIND_CODE*)&func->unwindCodes[func->unwindCodeSlot];
+                func->unwindHeader.SizeOfProlog = code->CodeOffset;
+            }
+            else
+            {
+                func->unwindHeader.SizeOfProlog = 0;
+            }
+            func->unwindHeader.CountOfUnwindCodes =
+                (BYTE)((sizeof(func->unwindCodes) - func->unwindCodeSlot) / sizeof(UNWIND_CODE));
+
+            // Prepend the unwindHeader onto the unwind codes
+            assert(func->unwindCodeSlot >= offsetof(UNWIND_INFO, UnwindCode));
+
+            func->unwindCodeSlot -= offsetof(UNWIND_INFO, UnwindCode);
+            UNWIND_INFO* pHeader = (UNWIND_INFO*)&func->unwindCodes[func->unwindCodeSlot];
+            memcpy(pHeader, &func->unwindHeader, offsetof(UNWIND_INFO, UnwindCode));
+
+            unwindCodeBytes = sizeof(func->unwindCodes) - func->unwindCodeSlot;
+        }
+    }
+
+    BOOL isFunclet  = (func->funKind != FUNC_ROOT);
+    BOOL isColdCode = isHotCode ? FALSE : TRUE;
+
+    eeReserveUnwindInfo(isFunclet, isColdCode, unwindCodeBytes);
+}
+
+//------------------------------------------------------------------------
+// Compiler::unwindEmit: Report all the unwind information to the VM.
+//
+// Arguments:
+//    pHotCode  - Pointer to the beginning of the memory with the function and funclet hot  code.
+//    pColdCode - Pointer to the beginning of the memory with the function and funclet cold code.
+//
+void Compiler::unwindEmit(void* pHotCode, void* pColdCode)
+{
+    assert(!compGeneratingProlog);
+    assert(!compGeneratingEpilog);
+
+    assert(compFuncInfoCount > 0);
+    for (unsigned funcIdx = 0; funcIdx < compFuncInfoCount; funcIdx++)
+    {
+        unwindEmitFunc(funGetFunc(funcIdx), pHotCode, pColdCode);
+    }
+}
+
+//------------------------------------------------------------------------
+// Compiler::unwindEmitFuncHelper: Report the unwind information to the VM for a
+// given main function or funclet, for either the hot or cold section.
+//
+// Arguments:
+//    func      - The main function or funclet to reserve unwind info for.
+//    pHotCode  - Pointer to the beginning of the memory with the function and funclet hot  code.
+//    pColdCode - Pointer to the beginning of the memory with the function and funclet cold code.
+//                Ignored if 'isHotCode' is true.
+//    isHotCode - 'true' to report the hot section, 'false' to report the cold section.
+//
+void Compiler::unwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode, void* pColdCode, bool isHotCode)
+{
+    UNATIVE_OFFSET startOffset;
+    UNATIVE_OFFSET endOffset;
+    DWORD          unwindCodeBytes = 0;
+    BYTE*          pUnwindBlock    = nullptr;
+
+    if (isHotCode)
+    {
+        if (func->startLoc == nullptr)
+        {
+            startOffset = 0;
+        }
+        else
+        {
+            startOffset = func->startLoc->CodeOffset(genEmitter);
+        }
+
+        if (func->endLoc == nullptr)
+        {
+            endOffset = info.compNativeCodeSize;
+        }
+        else
+        {
+            endOffset = func->endLoc->CodeOffset(genEmitter);
+        }
+
+#ifdef UNIX_AMD64_ABI
+        if (generateCFIUnwindCodes())
+        {
+            int size = func->cfiCodes->size();
+            if (size > 0)
+            {
+                unwindCodeBytes = size * sizeof(CFI_CODE);
+                pUnwindBlock    = (BYTE*)&(*func->cfiCodes)[0];
+            }
+        }
+        else
+#endif // UNIX_AMD64_ABI
+        {
+            unwindCodeBytes = sizeof(func->unwindCodes) - func->unwindCodeSlot;
+
+#ifdef DEBUG
+            UNWIND_INFO* pUnwindInfo = (UNWIND_INFO*)(&func->unwindCodes[func->unwindCodeSlot]);
+            DWORD        unwindCodeBytesSpecified =
+                offsetof(UNWIND_INFO, UnwindCode) +
+                pUnwindInfo->CountOfUnwindCodes * sizeof(UNWIND_CODE); // This is what the unwind codes themselves say;
+                                                                       // it better match what we tell the VM.
+            assert(unwindCodeBytes == unwindCodeBytesSpecified);
+#endif // DEBUG
+
+            pUnwindBlock = &func->unwindCodes[func->unwindCodeSlot];
+        }
+    }
+    else
+    {
+        assert(fgFirstColdBlock != nullptr);
+        assert(func->funKind == FUNC_ROOT); // No splitting of funclets.
+
+        if (func->coldStartLoc == nullptr)
+        {
+            startOffset = 0;
+        }
+        else
+        {
+            startOffset = func->coldStartLoc->CodeOffset(genEmitter);
+        }
+
+        if (func->coldEndLoc == nullptr)
+        {
+            endOffset = info.compNativeCodeSize;
+        }
+        else
+        {
+            endOffset = func->coldEndLoc->CodeOffset(genEmitter);
+        }
+    }
+
+#ifdef DEBUG
+    if (opts.dspUnwind)
+    {
+#ifdef UNIX_AMD64_ABI
+        if (generateCFIUnwindCodes())
+        {
+            DumpCfiInfo(isHotCode, startOffset, endOffset, unwindCodeBytes, (const CFI_CODE* const)pUnwindBlock);
+        }
+        else
+#endif // UNIX_AMD64_ABI
+        {
+            DumpUnwindInfo(isHotCode, startOffset, endOffset, (const UNWIND_INFO* const)pUnwindBlock);
+        }
+    }
+#endif // DEBUG
+
+    // Adjust for cold or hot code:
+    // 1. The VM doesn't want the cold code pointer unless this is cold code.
+    // 2. The startOffset and endOffset need to be from the base of the hot section for hot code
+    //    and from the base of the cold section for cold code
+
+    if (isHotCode)
+    {
+        assert(endOffset <= info.compTotalHotCodeSize);
+        pColdCode = nullptr;
+    }
+    else
+    {
+        assert(startOffset >= info.compTotalHotCodeSize);
+        startOffset -= info.compTotalHotCodeSize;
+        endOffset -= info.compTotalHotCodeSize;
+    }
+
+    eeAllocUnwindInfo((BYTE*)pHotCode, (BYTE*)pColdCode, startOffset, endOffset, unwindCodeBytes, pUnwindBlock,
+                      (CorJitFuncKind)func->funKind);
+}
+
+//------------------------------------------------------------------------
+// Compiler::unwindEmitFunc: Report the unwind information to the VM for a
+// given main function or funclet. Reports the hot section, then the cold
+// section if necessary.
+//
+// Arguments:
+//    func      - The main function or funclet to reserve unwind info for.
+//    pHotCode  - Pointer to the beginning of the memory with the function and funclet hot  code.
+//    pColdCode - Pointer to the beginning of the memory with the function and funclet cold code.
+//
+void Compiler::unwindEmitFunc(FuncInfoDsc* func, void* pHotCode, void* pColdCode)
+{
+    // Verify that the JIT enum is in sync with the JIT-EE interface enum
+    static_assert_no_msg(FUNC_ROOT == (FuncKind)CORJIT_FUNC_ROOT);
+    static_assert_no_msg(FUNC_HANDLER == (FuncKind)CORJIT_FUNC_HANDLER);
+    static_assert_no_msg(FUNC_FILTER == (FuncKind)CORJIT_FUNC_FILTER);
+
+    unwindEmitFuncHelper(func, pHotCode, pColdCode, true);
+
+    if (pColdCode != nullptr)
+    {
+        unwindEmitFuncHelper(func, pHotCode, pColdCode, false);
+    }
+}
+
+#endif // _TARGET_AMD64_
diff --git a/src/jit/unwindarm.cpp b/src/jit/unwindarm.cpp
new file mode 100644
index 0000000000..b537bef4a3
--- /dev/null
+++ b/src/jit/unwindarm.cpp
@@ -0,0 +1,2320 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                              UnwindInfo                                   XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifdef _TARGET_ARMARCH_
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX  Unwind APIs                                                              XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+void Compiler::unwindBegProlog()
+{
+    assert(compGeneratingProlog);
+
+    FuncInfoDsc* func = funCurrentFunc();
+
+    // There is only one prolog for a function/funclet, and it comes first. So now is
+    // a good time to initialize all the unwind data structures.
+
+    emitLocation* startLoc;
+    emitLocation* endLoc;
+    unwindGetFuncLocations(func, true, &startLoc, &endLoc);
+
+    func->uwi.InitUnwindInfo(this, startLoc, endLoc);
+    func->uwi.CaptureLocation();
+
+    func->uwiCold = NULL; // No cold data yet
+}
+
+void Compiler::unwindEndProlog()
+{
+    assert(compGeneratingProlog);
+}
+
+void Compiler::unwindBegEpilog()
+{
+    assert(compGeneratingEpilog);
+    funCurrentFunc()->uwi.AddEpilog();
+}
+
+void Compiler::unwindEndEpilog()
+{
+    assert(compGeneratingEpilog);
+}
+
+#if defined(_TARGET_ARM_)
+
+void Compiler::unwindPushPopMaskInt(regMaskTP maskInt, bool useOpsize16)
+{
+    // floating point registers cannot be specified in 'maskInt'
+    assert((maskInt & RBM_ALLFLOAT) == 0);
+
+    UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+    if (useOpsize16)
+    {
+        // The 16-bit opcode only encode R0-R7 and LR
+        assert((maskInt & ~(RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_LR)) == 0);
+
+        bool shortFormat = false;
+        BYTE val         = 0;
+
+        if ((maskInt & (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3)) == 0)
+        {
+            regMaskTP matchMask = maskInt & (RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7);
+            regMaskTP valMask   = RBM_R4;
+            while (val < 4)
+            {
+                if (matchMask == valMask)
+                {
+                    shortFormat = true;
+                    break;
+                }
+
+                valMask <<= 1;
+                valMask |= RBM_R4;
+
+                val++;
+            }
+        }
+
+        if (shortFormat)
+        {
+            // D0-D7 : pop {r4-rX,lr} (X=4-7) (opsize 16)
+            pu->AddCode(0xD0 | ((maskInt >> 12) & 0x4) | val);
+        }
+        else
+        {
+            // EC-ED : pop {r0-r7,lr} (opsize 16)
+            pu->AddCode(0xEC | ((maskInt >> 14) & 0x1), (BYTE)maskInt);
+        }
+    }
+    else
+    {
+        assert((maskInt &
+                ~(RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8 | RBM_R9 | RBM_R10 |
+                  RBM_R11 | RBM_R12 | RBM_LR)) == 0);
+
+        bool shortFormat = false;
+        BYTE val         = 0;
+
+        if (((maskInt & (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3)) == 0) &&
+            ((maskInt & (RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8)) == (RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8)))
+        {
+            regMaskTP matchMask = maskInt & (RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8 | RBM_R9 | RBM_R10 | RBM_R11);
+            regMaskTP valMask   = RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8;
+            while (val < 4)
+            {
+                if (matchMask == valMask)
+                {
+                    shortFormat = true;
+                    break;
+                }
+
+                valMask <<= 1;
+                valMask |= RBM_R4;
+
+                val++;
+            }
+        }
+
+        if (shortFormat)
+        {
+            // D8-DF : pop {r4-rX,lr} (X=8-11) (opsize 32)
+            pu->AddCode(0xD8 | ((maskInt >> 12) & 0x4) | val);
+        }
+        else
+        {
+            // 80-BF : pop {r0-r12,lr} (opsize 32)
+            pu->AddCode(0x80 | ((maskInt >> 8) & 0x1F) | ((maskInt >> 9) & 0x20), (BYTE)maskInt);
+        }
+    }
+}
+
+void Compiler::unwindPushPopMaskFloat(regMaskTP maskFloat)
+{
+    // Only floating pointer registers can be specified in 'maskFloat'
+    assert((maskFloat & ~RBM_ALLFLOAT) == 0);
+
+    // If the maskFloat is zero there is no unwind code to emit
+    //
+    if (maskFloat == RBM_NONE)
+    {
+        return;
+    }
+
+    UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+    BYTE      val     = 0;
+    regMaskTP valMask = (RBM_F16 | RBM_F17);
+
+    while (maskFloat != valMask)
+    {
+        valMask <<= 2;
+        valMask |= (RBM_F16 | RBM_F17);
+
+        val++;
+
+        if (val == 8)
+        {
+            noway_assert(!"Illegal maskFloat");
+        }
+    }
+
+    // E0-E7 : vpop {d8-dX} (X=8-15) (opsize 32)
+    assert(0 <= val && val <= 7);
+    pu->AddCode(0xE0 | val);
+}
+
+void Compiler::unwindPushMaskInt(regMaskTP maskInt)
+{
+    // Only r0-r12 and lr are supported
+    assert((maskInt &
+            ~(RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8 | RBM_R9 | RBM_R10 |
+              RBM_R11 | RBM_R12 | RBM_LR)) == 0);
+
+    bool useOpsize16 = ((maskInt & (RBM_LOW_REGS | RBM_LR)) == maskInt); // Can PUSH use the 16-bit encoding?
+    unwindPushPopMaskInt(maskInt, useOpsize16);
+}
+
+void Compiler::unwindPushMaskFloat(regMaskTP maskFloat)
+{
+    // Only floating point registers should be in maskFloat
+    assert((maskFloat & RBM_ALLFLOAT) == maskFloat);
+    unwindPushPopMaskFloat(maskFloat);
+}
+
+void Compiler::unwindPopMaskInt(regMaskTP maskInt)
+{
+    // Only r0-r12 and lr and pc are supported (pc is mapped to lr when encoding)
+    assert((maskInt &
+            ~(RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5 | RBM_R6 | RBM_R7 | RBM_R8 | RBM_R9 | RBM_R10 |
+              RBM_R11 | RBM_R12 | RBM_LR | RBM_PC)) == 0);
+
+    bool useOpsize16 = ((maskInt & (RBM_LOW_REGS | RBM_PC)) == maskInt); // Can POP use the 16-bit encoding?
+
+    // If we are popping PC, then we'll return from the function. In this case, we assume
+    // the first thing the prolog did was push LR, so give the unwind codes in terms of
+    // the LR that was pushed. Note that the epilog unwind codes are meant to reverse
+    // the effect of the prolog. For "pop {pc}", the prolog had "push {lr}", so we need
+    // an epilog code to model the reverse of that.
+    if (maskInt & RBM_PC)
+    {
+        maskInt = (maskInt & ~RBM_PC) | RBM_LR;
+    }
+    unwindPushPopMaskInt(maskInt, useOpsize16);
+}
+
+void Compiler::unwindPopMaskFloat(regMaskTP maskFloat)
+{
+    // Only floating point registers should be in maskFloat
+    assert((maskFloat & RBM_ALLFLOAT) == maskFloat);
+    unwindPushPopMaskFloat(maskFloat);
+}
+
+void Compiler::unwindAllocStack(unsigned size)
+{
+    UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+    assert(size % 4 == 0);
+    size /= 4;
+
+    if (size <= 0x7F)
+    {
+        // 00-7F : add sp, sp, #X*4 (opsize 16)
+        pu->AddCode((BYTE)size);
+    }
+    else if (size <= 0x3FF)
+    {
+        // E8-EB : addw sp, sp, #X*4 (opsize 32)
+        pu->AddCode(0xE8 | (BYTE)(size >> 8), (BYTE)size);
+    }
+    else if (size <= 0xFFFF)
+    {
+        // F7 : add sp, sp, #X*4 (opsize 16)
+        // F9 : add sp, sp, #X*4 (opsize 32)
+        //
+        // For large stack size, the most significant bits
+        // are stored first (and next to the opCode (F9)) per the unwind spec.
+        unsigned instrSizeInBytes = pu->GetInstructionSize();
+        BYTE     b1               = (instrSizeInBytes == 2) ? 0xF7 : 0xF9;
+        pu->AddCode(b1,
+                    (BYTE)(size >> 8), // msb
+                    (BYTE)size);       // lsb
+    }
+    else
+    {
+        // F8 : add sp, sp, #X*4 (opsize 16)
+        // FA : add sp, sp, #X*4 (opsize 32)
+        //
+        // For large stack size, the most significant bits
+        // are stored first (and next to the opCode (FA)) per the unwind spec.
+        unsigned instrSizeInBytes = pu->GetInstructionSize();
+        BYTE     b1               = (instrSizeInBytes == 2) ? 0xF8 : 0xFA;
+        pu->AddCode(b1, (BYTE)(size >> 16), (BYTE)(size >> 8), (BYTE)size);
+    }
+}
+
+void Compiler::unwindSetFrameReg(regNumber reg, unsigned offset)
+{
+    UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+    // Arm unwind info does not allow offset
+    assert(offset == 0);
+    assert(0 <= reg && reg <= 15);
+
+    // C0-CF : mov sp, rX (opsize 16)
+    pu->AddCode((BYTE)(0xC0 + reg));
+}
+
+void Compiler::unwindSaveReg(regNumber reg, unsigned offset)
+{
+    unreached();
+}
+
+void Compiler::unwindBranch16()
+{
+    UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+    // TODO-CQ: need to handle changing the exit code from 0xFF to 0xFD. Currently, this will waste an extra 0xFF at the
+    // end, automatically added.
+    pu->AddCode(0xFD);
+}
+
+void Compiler::unwindNop(unsigned codeSizeInBytes) // codeSizeInBytes is 2 or 4 bytes for Thumb2 instruction
+{
+    UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("unwindNop: adding NOP for %d byte instruction\n", codeSizeInBytes);
+    }
+#endif
+
+    INDEBUG(pu->uwiAddingNOP = true);
+
+    if (codeSizeInBytes == 2)
+    {
+        // FB : nop (opsize 16)
+        pu->AddCode(0xFB);
+    }
+    else
+    {
+        noway_assert(codeSizeInBytes == 4);
+
+        // FC : nop (opsize 32)
+        pu->AddCode(0xFC);
+    }
+
+    INDEBUG(pu->uwiAddingNOP = false);
+}
+
+#endif // defined(_TARGET_ARM_)
+
+// The instructions between the last captured "current state" and the current instruction
+// are in the prolog but have no effect for unwinding. Emit the appropriate NOP unwind codes
+// for them.
+void Compiler::unwindPadding()
+{
+    UnwindInfo* pu = &funCurrentFunc()->uwi;
+    genEmitter->emitUnwindNopPadding(pu->GetCurrentEmitterLocation(), this);
+}
+
+// Ask the VM to reserve space for the unwind information for the function and
+// all its funclets.
+void Compiler::unwindReserve()
+{
+    assert(compFuncInfoCount > 0);
+    for (unsigned funcIdx = 0; funcIdx < compFuncInfoCount; funcIdx++)
+    {
+        unwindReserveFunc(funGetFunc(funcIdx));
+    }
+}
+
+void Compiler::unwindReserveFunc(FuncInfoDsc* func)
+{
+    BOOL isFunclet          = (func->funKind == FUNC_ROOT) ? FALSE : TRUE;
+    bool funcHasColdSection = false;
+
+    // If there is cold code, split the unwind data between the hot section and the
+    // cold section. This needs to be done before we split into fragments, as each
+    // of the hot and cold sections can have multiple fragments.
+
+    if (fgFirstColdBlock != NULL)
+    {
+        assert(!isFunclet); // TODO-CQ: support hot/cold splitting with EH
+
+        emitLocation* startLoc;
+        emitLocation* endLoc;
+        unwindGetFuncLocations(func, false, &startLoc, &endLoc);
+
+        func->uwiCold = new (this, CMK_UnwindInfo) UnwindInfo();
+        func->uwiCold->InitUnwindInfo(this, startLoc, endLoc);
+        func->uwiCold->HotColdSplitCodes(&func->uwi);
+
+        funcHasColdSection = true;
+    }
+
+    // First we need to split the function or funclet into fragments that are no larger
+    // than 512K, so the fragment size will fit in the unwind data "Function Length" field.
+    // The ARM Exception Data specification "Function Fragments" section describes this.
+    func->uwi.Split();
+
+    func->uwi.Reserve(isFunclet, true);
+
+    // After the hot section, split and reserve the cold section
+
+    if (funcHasColdSection)
+    {
+        assert(func->uwiCold != NULL);
+
+        func->uwiCold->Split();
+        func->uwiCold->Reserve(isFunclet, false);
+    }
+}
+
+// unwindEmit: Report all the unwind information to the VM.
+// Arguments:
+//      pHotCode:  Pointer to the beginning of the memory with the function and funclet hot  code
+//      pColdCode: Pointer to the beginning of the memory with the function and funclet cold code.
+
+void Compiler::unwindEmit(void* pHotCode, void* pColdCode)
+{
+    assert(compFuncInfoCount > 0);
+    for (unsigned funcIdx = 0; funcIdx < compFuncInfoCount; funcIdx++)
+    {
+        unwindEmitFunc(funGetFunc(funcIdx), pHotCode, pColdCode);
+    }
+}
+
+void Compiler::unwindEmitFunc(FuncInfoDsc* func, void* pHotCode, void* pColdCode)
+{
+    // Verify that the JIT enum is in sync with the JIT-EE interface enum
+    static_assert_no_msg(FUNC_ROOT == (FuncKind)CORJIT_FUNC_ROOT);
+    static_assert_no_msg(FUNC_HANDLER == (FuncKind)CORJIT_FUNC_HANDLER);
+    static_assert_no_msg(FUNC_FILTER == (FuncKind)CORJIT_FUNC_FILTER);
+
+    func->uwi.Allocate((CorJitFuncKind)func->funKind, pHotCode, pColdCode, true);
+
+    if (func->uwiCold != NULL)
+    {
+        func->uwiCold->Allocate((CorJitFuncKind)func->funKind, pHotCode, pColdCode, false);
+    }
+}
+
+#if defined(_TARGET_ARM_)
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX  Unwind Info Debug helpers                                                XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#ifdef DEBUG
+
+// Return the opcode size of an instruction, in bytes, given the first byte of
+// its corresponding unwind code.
+
+unsigned GetOpcodeSizeFromUnwindHeader(BYTE b1)
+{
+    static BYTE s_UnwindOpsize[256] = {
+        // array of opsizes, in bytes (as specified in the ARM unwind specification)
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 00-0F
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 10-1F
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 20-2F
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 30-3F
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 40-4F
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 50-5F
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 60-6F
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 70-7F
+        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 80-8F
+        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 90-9F
+        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // A0-AF
+        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // B0-BF
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C0-CF
+        2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, // D0-DF
+        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 4, // E0-EF
+        0, 0, 0, 0, 0, 4, 4, 2, 2, 4, 4, 2, 4, 2, 4, 0  // F0-FF
+    };
+
+    BYTE opsize = s_UnwindOpsize[b1];
+    assert(opsize == 2 ||
+           opsize == 4); // We shouldn't get a code with no opsize (the 0xFF end code is handled specially)
+    return opsize;
+}
+
+// Return the size of the unwind code (from 1 to 4 bytes), given the first byte of the unwind bytes
+
+unsigned GetUnwindSizeFromUnwindHeader(BYTE b1)
+{
+    static BYTE s_UnwindSize[256] = {
+        // array of unwind sizes, in bytes (as specified in the ARM unwind specification)
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 00-0F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 10-1F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 20-2F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 30-3F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 40-4F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 50-5F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60-6F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 70-7F
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 80-8F
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 90-9F
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // A0-AF
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // B0-BF
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C0-CF
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D0-DF
+        1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, // E0-EF
+        1, 1, 1, 1, 1, 2, 2, 3, 4, 3, 4, 1, 1, 1, 1, 1  // F0-FF
+    };
+
+    unsigned size = s_UnwindSize[b1];
+    assert(1 <= size && size <= 4);
+    return size;
+}
+
+#endif // DEBUG
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX  Unwind Info Support Classes                                              XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//  UnwindCodesBase
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifdef DEBUG
+
+// Walk the prolog codes and calculate the size of the prolog or epilog, in bytes.
+// The 0xFD and 0xFE "end + NOP" codes need to be handled differently between
+// the prolog and epilog. They count as pure "end" codes in a prolog, but they
+// count as 16 and 32 bit NOPs (respectively), as well as an "end", in an epilog.
+unsigned UnwindCodesBase::GetCodeSizeFromUnwindCodes(bool isProlog)
+{
+    BYTE*    pCodesStart = GetCodes();
+    BYTE*    pCodes      = pCodesStart;
+    unsigned size        = 0;
+    for (;;)
+    {
+        BYTE b1 = *pCodes;
+        if (b1 >= 0xFD)
+        {
+            // 0xFD, 0xFE, 0xFF are "end" codes
+
+            if (!isProlog && (b1 == 0xFD || b1 == 0xFE))
+            {
+                // Count the special "end + NOP" code size in the epilog
+                size += GetOpcodeSizeFromUnwindHeader(b1);
+            }
+
+            break; // We hit an "end" code; we're done
+        }
+        size += GetOpcodeSizeFromUnwindHeader(b1);
+        pCodes += GetUnwindSizeFromUnwindHeader(b1);
+        assert(pCodes - pCodesStart < 256); // 255 is the absolute maximum number of code bytes allowed
+    }
+    return size;
+}
+
+#endif // DEBUG
+
+#endif // defined(_TARGET_ARM_)
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//  UnwindPrologCodes
+//
+///////////////////////////////////////////////////////////////////////////////
+
+// We're going to use the prolog codes memory to store the final unwind data.
+// Ensure we have enough memory to store everything. If 'epilogBytes' > 0, then
+// move the prolog codes so there are 'epilogBytes' bytes after the prolog codes.
+// Set the header pointer for future use, adding the header bytes (this pointer
+// is updated when a header byte is added), and remember the index that points
+// to the beginning of the header.
+
+void UnwindPrologCodes::SetFinalSize(int headerBytes, int epilogBytes)
+{
+#ifdef DEBUG
+    // We're done adding codes. Check that we didn't accidentally create a bigger prolog.
+    unsigned codeSize = GetCodeSizeFromUnwindCodes(true);
+    assert(codeSize <= MAX_PROLOG_SIZE_BYTES);
+#endif // DEBUG
+
+    int prologBytes = Size();
+
+    EnsureSize(headerBytes + prologBytes + epilogBytes + 3); // 3 = padding bytes for alignment
+
+    upcUnwindBlockSlot = upcCodeSlot - headerBytes - epilogBytes; // Index of the first byte of the unwind header
+
+    assert(upcMemSize == upcUnwindBlockSlot + headerBytes + prologBytes + epilogBytes + 3);
+
+    upcHeaderSlot = upcUnwindBlockSlot - 1; // upcHeaderSlot is always incremented before storing
+    assert(upcHeaderSlot >= -1);
+
+    if (epilogBytes > 0)
+    {
+        // The prolog codes that are already at the end of the array need to get moved to the middle,
+        // with space for the non-matching epilog codes to follow.
+
+        memmove_s(&upcMem[upcUnwindBlockSlot + headerBytes], upcMemSize - (upcUnwindBlockSlot + headerBytes),
+                  &upcMem[upcCodeSlot], prologBytes);
+
+        // Note that the three UWC_END padding bytes still exist at the end of the array.
+        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+        // Zero out the epilog codes memory, to ensure we've copied the right bytes. Don't zero the padding bytes.
+        memset(&upcMem[upcUnwindBlockSlot + headerBytes + prologBytes], 0, epilogBytes);
+#endif // DEBUG
+
+        upcEpilogSlot =
+            upcUnwindBlockSlot + headerBytes + prologBytes; // upcEpilogSlot points to the next epilog location to fill
+
+        // Update upcCodeSlot to point at the new beginning of the prolog codes
+        upcCodeSlot = upcUnwindBlockSlot + headerBytes;
+    }
+}
+
+// Add a header word. Header words are added starting at the beginning, in order: first to last.
+// This is in contrast to the prolog unwind codes, which are added in reverse order.
+void UnwindPrologCodes::AddHeaderWord(DWORD d)
+{
+    assert(-1 <= upcHeaderSlot);
+    assert(upcHeaderSlot + 4 < upcCodeSlot); // Don't collide with the unwind codes that are already there!
+
+    // Store it byte-by-byte in little-endian format. We've already ensured there is enough space
+    // in SetFinalSize().
+    upcMem[++upcHeaderSlot] = (BYTE)d;
+    upcMem[++upcHeaderSlot] = (BYTE)(d >> 8);
+    upcMem[++upcHeaderSlot] = (BYTE)(d >> 16);
+    upcMem[++upcHeaderSlot] = (BYTE)(d >> 24);
+}
+
+// AppendEpilog: copy the epilog bytes to the next epilog bytes slot
+void UnwindPrologCodes::AppendEpilog(UnwindEpilogInfo* pEpi)
+{
+    assert(upcEpilogSlot != -1);
+
+    int epiSize = pEpi->Size();
+    memcpy_s(&upcMem[upcEpilogSlot], upcMemSize - upcEpilogSlot - 3, pEpi->GetCodes(),
+             epiSize); // -3 to avoid writing to the alignment padding
+    assert(pEpi->GetStartIndex() ==
+           upcEpilogSlot - upcCodeSlot); // Make sure we copied it where we expected to copy it.
+
+    upcEpilogSlot += epiSize;
+    assert(upcEpilogSlot <= upcMemSize - 3);
+}
+
+// GetFinalInfo: return a pointer to the final unwind info to hand to the VM, and the size of this info in bytes
+void UnwindPrologCodes::GetFinalInfo(/* OUT */ BYTE** ppUnwindBlock, /* OUT */ ULONG* pUnwindBlockSize)
+{
+    assert(upcHeaderSlot + 1 == upcCodeSlot); // We better have filled in the header before asking for the final data!
+
+    *ppUnwindBlock = &upcMem[upcUnwindBlockSlot];
+
+    // We put 4 'end' codes at the end for padding, so we can ensure we have an
+    // unwind block that is a multiple of 4 bytes in size. Subtract off three 'end'
+    // codes (leave one), and then align the size up to a multiple of 4.
+    *pUnwindBlockSize = AlignUp((UINT)(upcMemSize - upcUnwindBlockSlot - 3), sizeof(DWORD));
+}
+
+// Do the argument unwind codes match our unwind codes?
+// If they don't match, return -1. If they do, return the offset into
+// our codes at which they match. Note that this means that the
+// argument codes can match a subset of our codes. The subset needs to be at
+// the end, for the "end" code to match.
+//
+// This is similar to UnwindEpilogInfo::Match().
+//
+#if defined(_TARGET_ARM_)
+// Note that if we wanted to handle 0xFD and 0xFE codes, by converting
+// an existing 0xFF code to one of those, we might do that here.
+#endif // defined(_TARGET_ARM_)
+
+int UnwindPrologCodes::Match(UnwindEpilogInfo* pEpi)
+{
+    if (Size() < pEpi->Size())
+    {
+        return -1;
+    }
+
+    int matchIndex = Size() - pEpi->Size();
+
+    if (0 == memcmp(GetCodes() + matchIndex, pEpi->GetCodes(), pEpi->Size()))
+    {
+        return matchIndex;
+    }
+
+    return -1;
+}
+
+// Copy the prolog codes from another prolog. The only time this is legal is
+// if we are at the initial state and no prolog codes have been added.
+// This is used to create the 'phantom' prolog for non-first fragments.
+
+void UnwindPrologCodes::CopyFrom(UnwindPrologCodes* pCopyFrom)
+{
+    assert(uwiComp == pCopyFrom->uwiComp);
+    assert(upcMem == upcMemLocal);
+    assert(upcMemSize == UPC_LOCAL_COUNT);
+    assert(upcHeaderSlot == -1);
+    assert(upcEpilogSlot == -1);
+
+    // Copy the codes
+    EnsureSize(pCopyFrom->upcMemSize);
+    assert(upcMemSize == pCopyFrom->upcMemSize);
+    memcpy_s(upcMem, upcMemSize, pCopyFrom->upcMem, pCopyFrom->upcMemSize);
+
+    // Copy the other data
+    upcCodeSlot        = pCopyFrom->upcCodeSlot;
+    upcHeaderSlot      = pCopyFrom->upcHeaderSlot;
+    upcEpilogSlot      = pCopyFrom->upcEpilogSlot;
+    upcUnwindBlockSlot = pCopyFrom->upcUnwindBlockSlot;
+}
+
+void UnwindPrologCodes::EnsureSize(int requiredSize)
+{
+    if (requiredSize > upcMemSize)
+    {
+        // Reallocate, and copy everything to a new array.
+
+        // Choose the next power of two size. This may or may not be the best choice.
+        noway_assert((requiredSize & 0xC0000000) == 0); // too big!
+        int newSize;
+        for (newSize = upcMemSize << 1; newSize < requiredSize; newSize <<= 1)
+        {
+            // do nothing
+        }
+
+        BYTE* newUnwindCodes = new (uwiComp, CMK_UnwindInfo) BYTE[newSize];
+        memcpy_s(newUnwindCodes + newSize - upcMemSize, upcMemSize, upcMem,
+                 upcMemSize); // copy the existing data to the end
+#ifdef DEBUG
+        // Clear the old unwind codes; nobody should be looking at them
+        memset(upcMem, 0xFF, upcMemSize);
+#endif                           // DEBUG
+        upcMem = newUnwindCodes; // we don't free anything that used to be there since we have a no-release allocator
+        upcCodeSlot += newSize - upcMemSize;
+        upcMemSize = newSize;
+    }
+}
+
+#ifdef DEBUG
+void UnwindPrologCodes::Dump(int indent)
+{
+    printf("%*sUnwindPrologCodes @0x%08p, size:%d:\n", indent, "", dspPtr(this), sizeof(*this));
+    printf("%*s  uwiComp: 0x%08p\n", indent, "", dspPtr(uwiComp));
+    printf("%*s  &upcMemLocal[0]: 0x%08p\n", indent, "", dspPtr(&upcMemLocal[0]));
+    printf("%*s  upcMem: 0x%08p\n", indent, "", dspPtr(upcMem));
+    printf("%*s  upcMemSize: %d\n", indent, "", upcMemSize);
+    printf("%*s  upcCodeSlot: %d\n", indent, "", upcCodeSlot);
+    printf("%*s  upcHeaderSlot: %d\n", indent, "", upcHeaderSlot);
+    printf("%*s  upcEpilogSlot: %d\n", indent, "", upcEpilogSlot);
+    printf("%*s  upcUnwindBlockSlot: %d\n", indent, "", upcUnwindBlockSlot);
+
+    if (upcMemSize > 0)
+    {
+        printf("%*s  codes:", indent, "");
+        for (int i = 0; i < upcMemSize; i++)
+        {
+            printf(" %02x", upcMem[i]);
+            if (i == upcCodeSlot)
+                printf(" <-C");
+            else if (i == upcHeaderSlot)
+                printf(" <-H");
+            else if (i == upcEpilogSlot)
+                printf(" <-E");
+            else if (i == upcUnwindBlockSlot)
+                printf(" <-U");
+        }
+        printf("\n");
+    }
+}
+#endif // DEBUG
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//  UnwindEpilogCodes
+//
+///////////////////////////////////////////////////////////////////////////////
+
+void UnwindEpilogCodes::EnsureSize(int requiredSize)
+{
+    if (requiredSize > uecMemSize)
+    {
+        // Reallocate, and copy everything to a new array.
+
+        // Choose the next power of two size. This may or may not be the best choice.
+        noway_assert((requiredSize & 0xC0000000) == 0); // too big!
+        int newSize;
+        for (newSize = uecMemSize << 1; newSize < requiredSize; newSize <<= 1)
+        {
+            // do nothing
+        }
+
+        BYTE* newUnwindCodes = new (uwiComp, CMK_UnwindInfo) BYTE[newSize];
+        memcpy_s(newUnwindCodes, newSize, uecMem, uecMemSize);
+#ifdef DEBUG
+        // Clear the old unwind codes; nobody should be looking at them
+        memset(uecMem, 0xFF, uecMemSize);
+#endif                           // DEBUG
+        uecMem = newUnwindCodes; // we don't free anything that used to be there since we have a no-release allocator
+        // uecCodeSlot stays the same
+        uecMemSize = newSize;
+    }
+}
+
+#ifdef DEBUG
+void UnwindEpilogCodes::Dump(int indent)
+{
+    printf("%*sUnwindEpilogCodes @0x%08p, size:%d:\n", indent, "", dspPtr(this), sizeof(*this));
+    printf("%*s  uwiComp: 0x%08p\n", indent, "", dspPtr(uwiComp));
+    printf("%*s  &uecMemLocal[0]: 0x%08p\n", indent, "", dspPtr(&uecMemLocal[0]));
+    printf("%*s  uecMem: 0x%08p\n", indent, "", dspPtr(uecMem));
+    printf("%*s  uecMemSize: %d\n", indent, "", uecMemSize);
+    printf("%*s  uecCodeSlot: %d\n", indent, "", uecCodeSlot);
+    printf("%*s  uecFinalized: %s\n", indent, "", dspBool(uecFinalized));
+
+    if (uecMemSize > 0)
+    {
+        printf("%*s  codes:", indent, "");
+        for (int i = 0; i < uecMemSize; i++)
+        {
+            printf(" %02x", uecMem[i]);
+            if (i == uecCodeSlot)
+                printf(" <-C"); // Indicate the current pointer
+        }
+        printf("\n");
+    }
+}
+#endif // DEBUG
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//  UnwindEpilogInfo
+//
+///////////////////////////////////////////////////////////////////////////////
+
+// Do the current unwind codes match those of the argument epilog?
+// If they don't match, return -1. If they do, return the offset into
+// our codes at which the argument codes match. Note that this means that
+// the argument codes can match a subset of our codes. The subset needs to be at
+// the end, for the "end" code to match.
+//
+// Note that if we wanted to handle 0xFD and 0xFE codes, by converting
+// an existing 0xFF code to one of those, we might do that here.
+
+int UnwindEpilogInfo::Match(UnwindEpilogInfo* pEpi)
+{
+    if (Matches())
+    {
+        // We are already matched to someone else, and won't provide codes to the final layout
+        return -1;
+    }
+
+    if (Size() < pEpi->Size())
+    {
+        return -1;
+    }
+
+    int matchIndex = Size() - pEpi->Size();
+
+    if (0 == memcmp(GetCodes() + matchIndex, pEpi->GetCodes(), pEpi->Size()))
+    {
+        return matchIndex;
+    }
+
+    return -1;
+}
+
+void UnwindEpilogInfo::CaptureEmitLocation()
+{
+    noway_assert(epiEmitLocation == NULL); // This function is only called once per epilog
+    epiEmitLocation = new (uwiComp, CMK_UnwindInfo) emitLocation();
+    epiEmitLocation->CaptureLocation(uwiComp->genEmitter);
+}
+
+void UnwindEpilogInfo::FinalizeOffset()
+{
+    epiStartOffset = epiEmitLocation->CodeOffset(uwiComp->genEmitter);
+}
+
+#ifdef DEBUG
+void UnwindEpilogInfo::Dump(int indent)
+{
+    printf("%*sUnwindEpilogInfo @0x%08p, size:%d:\n", indent, "", dspPtr(this), sizeof(*this));
+    printf("%*s  uwiComp: 0x%08p\n", indent, "", dspPtr(uwiComp));
+    printf("%*s  epiNext: 0x%08p\n", indent, "", dspPtr(epiNext));
+    printf("%*s  epiEmitLocation: 0x%08p\n", indent, "", dspPtr(epiEmitLocation));
+    printf("%*s  epiStartOffset: 0x%x\n", indent, "", epiStartOffset);
+    printf("%*s  epiMatches: %s\n", indent, "", dspBool(epiMatches));
+    printf("%*s  epiStartIndex: %d\n", indent, "", epiStartIndex);
+
+    epiCodes.Dump(indent + 2);
+}
+#endif // DEBUG
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//  UnwindFragmentInfo
+//
+///////////////////////////////////////////////////////////////////////////////
+
+UnwindFragmentInfo::UnwindFragmentInfo(Compiler* comp, emitLocation* emitLoc, bool hasPhantomProlog)
+    : UnwindBase(comp)
+    , ufiNext(NULL)
+    , ufiEmitLoc(emitLoc)
+    , ufiHasPhantomProlog(hasPhantomProlog)
+    , ufiPrologCodes(comp)
+    , ufiEpilogFirst(comp)
+    , ufiEpilogList(NULL)
+    , ufiEpilogLast(NULL)
+    , ufiCurCodes(&ufiPrologCodes)
+    , ufiSize(0)
+    , ufiStartOffset(UFI_ILLEGAL_OFFSET)
+{
+#ifdef DEBUG
+    ufiNum         = 1;
+    ufiInProlog    = true;
+    ufiInitialized = UFI_INITIALIZED_PATTERN;
+#endif // DEBUG
+}
+
+void UnwindFragmentInfo::FinalizeOffset()
+{
+    if (ufiEmitLoc == NULL)
+    {
+        // NULL emit location means the beginning of the code. This is to handle the first fragment prolog.
+        ufiStartOffset = 0;
+    }
+    else
+    {
+        ufiStartOffset = ufiEmitLoc->CodeOffset(uwiComp->genEmitter);
+    }
+
+    for (UnwindEpilogInfo* pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext)
+    {
+        pEpi->FinalizeOffset();
+    }
+}
+
+void UnwindFragmentInfo::AddEpilog()
+{
+    assert(ufiInitialized == UFI_INITIALIZED_PATTERN);
+
+#ifdef DEBUG
+    if (ufiInProlog)
+    {
+        assert(ufiEpilogList == NULL);
+        ufiInProlog = false;
+    }
+    else
+    {
+        assert(ufiEpilogList != NULL);
+    }
+#endif // DEBUG
+
+    // Either allocate a new epilog object, or, for the first one, use the
+    // preallocated one that is a member of the UnwindFragmentInfo class.
+
+    UnwindEpilogInfo* newepi;
+
+    if (ufiEpilogList == NULL)
+    {
+        // Use the epilog that's in the class already. Be sure to initialize it!
+        newepi = ufiEpilogList = &ufiEpilogFirst;
+    }
+    else
+    {
+        newepi = new (uwiComp, CMK_UnwindInfo) UnwindEpilogInfo(uwiComp);
+    }
+
+    // Put the new epilog at the end of the epilog list
+
+    if (ufiEpilogLast != NULL)
+    {
+        ufiEpilogLast->epiNext = newepi;
+    }
+
+    ufiEpilogLast = newepi;
+
+    // What is the starting code offset of the epilog? Store an emitter location
+    // so we can ask the emitter later, after codegen.
+
+    newepi->CaptureEmitLocation();
+
+    // Put subsequent unwind codes in this new epilog
+
+    ufiCurCodes = &newepi->epiCodes;
+}
+
+// Copy the prolog codes from the 'pCopyFrom' fragment. These prolog codes will
+// become 'phantom' prolog codes in this fragment. Note that this fragment should
+// not have any prolog codes currently; it is at the initial state.
+
+void UnwindFragmentInfo::CopyPrologCodes(UnwindFragmentInfo* pCopyFrom)
+{
+    ufiPrologCodes.CopyFrom(&pCopyFrom->ufiPrologCodes);
+#ifdef _TARGET_ARM64_
+    ufiPrologCodes.AddCode(UWC_END_C);
+#endif
+}
+
+// Split the epilog codes that currently exist in 'pSplitFrom'. The ones that represent
+// epilogs that start at or after the location represented by 'emitLoc' are removed
+// from 'pSplitFrom' and moved to this fragment. Note that this fragment should not have
+// any epilog codes currently; it is at the initial state.
+
+void UnwindFragmentInfo::SplitEpilogCodes(emitLocation* emitLoc, UnwindFragmentInfo* pSplitFrom)
+{
+    UnwindEpilogInfo* pEpiPrev;
+    UnwindEpilogInfo* pEpi;
+
+    UNATIVE_OFFSET splitOffset = emitLoc->CodeOffset(uwiComp->genEmitter);
+
+    for (pEpiPrev = NULL, pEpi = pSplitFrom->ufiEpilogList; pEpi != NULL; pEpiPrev = pEpi, pEpi = pEpi->epiNext)
+    {
+        pEpi->FinalizeOffset(); // Get the offset of the epilog from the emitter so we can compare it
+        if (pEpi->GetStartOffset() >= splitOffset)
+        {
+            // This epilog and all following epilogs, which must be in order of increasing offsets,
+            // get moved to this fragment.
+
+            // Splice in the epilogs to this fragment. Set the head of the epilog
+            // list to this epilog.
+            ufiEpilogList = pEpi; // In this case, don't use 'ufiEpilogFirst'
+            ufiEpilogLast = pSplitFrom->ufiEpilogLast;
+
+            // Splice out the tail of the list from the 'pSplitFrom' epilog list
+            pSplitFrom->ufiEpilogLast = pEpiPrev;
+            if (pSplitFrom->ufiEpilogLast == NULL)
+            {
+                pSplitFrom->ufiEpilogList = NULL;
+            }
+            else
+            {
+                pSplitFrom->ufiEpilogLast->epiNext = NULL;
+            }
+
+            // No more codes should be added once we start splitting
+            pSplitFrom->ufiCurCodes = NULL;
+            ufiCurCodes             = NULL;
+
+            break;
+        }
+    }
+}
+
+// Is this epilog at the end of an unwind fragment? Ask the emitter.
+// Note that we need to know this before all code offsets are finalized,
+// so we can determine whether we can omit an epilog scope word for a
+// single matching epilog.
+
+bool UnwindFragmentInfo::IsAtFragmentEnd(UnwindEpilogInfo* pEpi)
+{
+    return uwiComp->genEmitter->emitIsFuncEnd(pEpi->epiEmitLocation, (ufiNext == NULL) ? NULL : ufiNext->ufiEmitLoc);
+}
+
+// Merge the unwind codes as much as possible.
+// This function is called before all offsets are final.
+// Also, compute the size of the final unwind block. Store this
+// and some other data for later, when we actually emit the
+// unwind block.
+
+void UnwindFragmentInfo::MergeCodes()
+{
+    assert(ufiInitialized == UFI_INITIALIZED_PATTERN);
+
+    unsigned epilogCount     = 0;
+    unsigned epilogCodeBytes = 0; // The total number of unwind code bytes used by epilogs that don't match the
+                                  // prolog codes
+    unsigned epilogIndex = ufiPrologCodes.Size(); // The "Epilog Start Index" for the next non-matching epilog codes
+    UnwindEpilogInfo* pEpi;
+
+    for (pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext)
+    {
+        ++epilogCount;
+
+        pEpi->FinalizeCodes();
+
+        // Does this epilog match the prolog?
+        // NOTE: for the purpose of matching, we don't handle the 0xFD and 0xFE end codes that allow slightly unequal
+        // prolog and epilog codes.
+
+        int matchIndex;
+
+        matchIndex = ufiPrologCodes.Match(pEpi);
+        if (matchIndex != -1)
+        {
+            pEpi->SetMatches();
+            pEpi->SetStartIndex(matchIndex); // Prolog codes start at zero, so matchIndex is exactly the start index
+        }
+        else
+        {
+            // The epilog codes don't match the prolog codes. Do they match any of the epilogs
+            // we've seen so far?
+
+            bool matched = false;
+            for (UnwindEpilogInfo* pEpi2 = ufiEpilogList; pEpi2 != pEpi; pEpi2 = pEpi2->epiNext)
+            {
+                matchIndex = pEpi2->Match(pEpi);
+                if (matchIndex != -1)
+                {
+                    // Use the same epilog index as the one we matched, as it has already been set.
+                    pEpi->SetMatches();
+                    pEpi->SetStartIndex(pEpi2->GetStartIndex() + matchIndex); // We might match somewhere inside pEpi2's
+                                                                              // codes, in which case matchIndex > 0
+                    matched = true;
+                    break;
+                }
+            }
+
+            if (!matched)
+            {
+                pEpi->SetStartIndex(epilogIndex); // We'll copy these codes to the next available location
+                epilogCodeBytes += pEpi->Size();
+                epilogIndex += pEpi->Size();
+            }
+        }
+    }
+
+    DWORD codeBytes = ufiPrologCodes.Size() + epilogCodeBytes;
+    codeBytes       = AlignUp(codeBytes, sizeof(DWORD));
+
+    DWORD codeWords =
+        codeBytes / sizeof(DWORD); // This is how many words we need to store all the unwind codes in the unwind block
+
+    // Do we need the 2nd header word for "Extended Code Words" or "Extended Epilog Count"?
+
+    bool needExtendedCodeWordsEpilogCount =
+        (codeWords > UW_MAX_CODE_WORDS_COUNT) || (epilogCount > UW_MAX_EPILOG_COUNT);
+
+    // How many epilog scope words do we need?
+
+    bool     setEBit      = false;       // do we need to set the E bit?
+    unsigned epilogScopes = epilogCount; // Note that this could be zero if we have no epilogs!
+
+    if (epilogCount == 1)
+    {
+        assert(ufiEpilogList != NULL);
+        assert(ufiEpilogList->epiNext == NULL);
+
+        if (ufiEpilogList->Matches() && (ufiEpilogList->GetStartIndex() == 0) && // The match is with the prolog
+            !needExtendedCodeWordsEpilogCount && IsAtFragmentEnd(ufiEpilogList))
+        {
+            epilogScopes = 0; // Don't need any epilog scope words
+            setEBit      = true;
+        }
+    }
+
+    DWORD headerBytes = (1                                            // Always need first header DWORD
+                         + (needExtendedCodeWordsEpilogCount ? 1 : 0) // Do we need the 2nd DWORD for Extended Code
+                                                                      // Words or Extended Epilog Count?
+                         + epilogScopes                               // One DWORD per epilog scope, for EBit = 0
+                         ) *
+                        sizeof(DWORD); // convert it to bytes
+
+    DWORD finalSize = headerBytes + codeBytes; // Size of actual unwind codes, aligned up to 4-byte words,
+                                               // including end padding if necessary
+
+    // Construct the final unwind information.
+
+    // We re-use the memory for the prolog unwind codes to construct the full unwind data. If all the epilogs
+    // match the prolog, this is easy: we just prepend the header. If there are epilog codes that don't match
+    // the prolog, we still use the prolog codes memory, but it's a little more complicated, since the
+    // unwind info is ordered as: (a) header, (b) prolog codes, (c) non-matching epilog codes. And, the prolog
+    // codes array is filled in from end-to-beginning. So, we compute the size of memory we need, ensure we
+    // have that much memory, and then copy the prolog codes to the right place, appending the non-matching
+    // epilog codes and prepending the header.
+
+    ufiPrologCodes.SetFinalSize(headerBytes, epilogCodeBytes);
+
+    if (epilogCodeBytes != 0)
+    {
+        // We need to copy the epilog code bytes to their final memory location
+
+        for (pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext)
+        {
+            if (!pEpi->Matches())
+            {
+                ufiPrologCodes.AppendEpilog(pEpi);
+            }
+        }
+    }
+
+    // Save some data for later
+
+    ufiSize                             = finalSize;
+    ufiSetEBit                          = setEBit;
+    ufiNeedExtendedCodeWordsEpilogCount = needExtendedCodeWordsEpilogCount;
+    ufiCodeWords                        = codeWords;
+    ufiEpilogScopes                     = epilogScopes;
+}
+
+// Finalize: Prepare the unwind information for the VM. Compute and prepend the unwind header.
+
+void UnwindFragmentInfo::Finalize(UNATIVE_OFFSET functionLength)
+{
+    assert(ufiInitialized == UFI_INITIALIZED_PATTERN);
+
+#ifdef DEBUG
+    if (0 && uwiComp->verbose)
+    {
+        printf("*************** Before fragment #%d finalize\n", ufiNum);
+        Dump();
+    }
+#endif
+
+// Compute the header
+
+#if defined(_TARGET_ARM_)
+    noway_assert((functionLength & 1) == 0);
+    DWORD headerFunctionLength = functionLength / 2;
+#elif defined(_TARGET_ARM64_)
+    noway_assert((functionLength & 3) == 0);
+    DWORD headerFunctionLength = functionLength / 4;
+#endif // _TARGET_ARM64_
+
+    DWORD headerVers = 0; // Version of the unwind info is zero. No other version number is currently defined.
+    DWORD headerXBit = 0; // We never generate "exception data", but the VM might add some.
+    DWORD headerEBit;
+#if defined(_TARGET_ARM_)
+    DWORD headerFBit = ufiHasPhantomProlog ? 1 : 0; // Is this data a fragment in the sense of the unwind data
+                                                    // specification? That is, do the prolog codes represent a real
+                                                    // prolog or not?
+#endif                                              // defined(_TARGET_ARM_)
+    DWORD headerEpilogCount;                        // This depends on how we set headerEBit.
+    DWORD headerCodeWords;
+    DWORD headerExtendedEpilogCount = 0; // This depends on how we set headerEBit.
+    DWORD headerExtendedCodeWords   = 0;
+
+    if (ufiSetEBit)
+    {
+        headerEBit        = 1;
+        headerEpilogCount = ufiEpilogList->GetStartIndex(); // probably zero -- the start of the prolog codes!
+        headerCodeWords   = ufiCodeWords;
+    }
+    else
+    {
+        headerEBit = 0;
+
+        if (ufiNeedExtendedCodeWordsEpilogCount)
+        {
+            headerEpilogCount         = 0;
+            headerCodeWords           = 0;
+            headerExtendedEpilogCount = ufiEpilogScopes;
+            headerExtendedCodeWords   = ufiCodeWords;
+        }
+        else
+        {
+            headerEpilogCount = ufiEpilogScopes;
+            headerCodeWords   = ufiCodeWords;
+        }
+    }
+
+    // Start writing the header
+
+    noway_assert(headerFunctionLength <=
+                 0x3FFFFU); // We create fragments to prevent this from firing, so if it hits, we have an internal error
+
+    if ((headerEpilogCount > UW_MAX_EPILOG_COUNT) || (headerCodeWords > UW_MAX_CODE_WORDS_COUNT))
+    {
+        IMPL_LIMITATION("unwind data too large");
+    }
+
+#if defined(_TARGET_ARM_)
+    DWORD header = headerFunctionLength | (headerVers << 18) | (headerXBit << 20) | (headerEBit << 21) |
+                   (headerFBit << 22) | (headerEpilogCount << 23) | (headerCodeWords << 28);
+#elif defined(_TARGET_ARM64_)
+    DWORD header               = headerFunctionLength | (headerVers << 18) | (headerXBit << 20) | (headerEBit << 21) |
+                   (headerEpilogCount << 22) | (headerCodeWords << 27);
+#endif // defined(_TARGET_ARM64_)
+
+    ufiPrologCodes.AddHeaderWord(header);
+
+    // Construct the second header word, if needed
+
+    if (ufiNeedExtendedCodeWordsEpilogCount)
+    {
+        noway_assert(headerEBit == 0);
+        noway_assert(headerEpilogCount == 0);
+        noway_assert(headerCodeWords == 0);
+        noway_assert((headerExtendedEpilogCount > UW_MAX_EPILOG_COUNT) ||
+                     (headerExtendedCodeWords > UW_MAX_CODE_WORDS_COUNT));
+
+        if ((headerExtendedEpilogCount > UW_MAX_EXTENDED_EPILOG_COUNT) ||
+            (headerExtendedCodeWords > UW_MAX_EXTENDED_CODE_WORDS_COUNT))
+        {
+            IMPL_LIMITATION("unwind data too large");
+        }
+
+        DWORD header2 = headerExtendedEpilogCount | (headerExtendedCodeWords << 16);
+
+        ufiPrologCodes.AddHeaderWord(header2);
+    }
+
+    // Construct the epilog scope words, if needed
+
+    if (!ufiSetEBit)
+    {
+        for (UnwindEpilogInfo* pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext)
+        {
+#if defined(_TARGET_ARM_)
+            DWORD headerCondition = 0xE; // The epilog is unconditional. We don't have epilogs under the IT instruction.
+#endif                                   // defined(_TARGET_ARM_)
+
+            // The epilog must strictly follow the prolog. The prolog is in the first fragment of
+            // the hot section. If this epilog is at the start of a fragment, it can't be the
+            // first fragment in the hot section. We actually don't know if we're processing
+            // the hot or cold section (or a funclet), so we can't distinguish these cases. Thus,
+            // we just assert that the epilog starts within the fragment.
+            assert(pEpi->GetStartOffset() >= GetStartOffset());
+
+            // We report the offset of an epilog as the offset from the beginning of the function/funclet fragment,
+            // NOT the offset from the beginning of the main function.
+            DWORD headerEpilogStartOffset = pEpi->GetStartOffset() - GetStartOffset();
+
+#if defined(_TARGET_ARM_)
+            noway_assert((headerEpilogStartOffset & 1) == 0);
+            headerEpilogStartOffset /= 2; // The unwind data stores the actual offset divided by 2 (since the low bit of
+                                          // the actual offset is always zero)
+#elif defined(_TARGET_ARM64_)
+            noway_assert((headerEpilogStartOffset & 3) == 0);
+            headerEpilogStartOffset /= 4; // The unwind data stores the actual offset divided by 4 (since the low 2 bits
+                                          // of the actual offset is always zero)
+#endif // defined(_TARGET_ARM64_)
+
+            DWORD headerEpilogStartIndex = pEpi->GetStartIndex();
+
+            if ((headerEpilogStartOffset > UW_MAX_EPILOG_START_OFFSET) ||
+                (headerEpilogStartIndex > UW_MAX_EPILOG_START_INDEX))
+            {
+                IMPL_LIMITATION("unwind data too large");
+            }
+
+#if defined(_TARGET_ARM_)
+            DWORD epilogScopeWord = headerEpilogStartOffset | (headerCondition << 20) | (headerEpilogStartIndex << 24);
+#elif defined(_TARGET_ARM64_)
+            DWORD epilogScopeWord = headerEpilogStartOffset | (headerEpilogStartIndex << 22);
+#endif // defined(_TARGET_ARM64_)
+
+            ufiPrologCodes.AddHeaderWord(epilogScopeWord);
+        }
+    }
+
+    // The unwind code words are already here, following the header, so we're done!
+}
+
+void UnwindFragmentInfo::Reserve(BOOL isFunclet, bool isHotCode)
+{
+    assert(isHotCode || !isFunclet); // TODO-CQ: support hot/cold splitting in functions with EH
+
+    MergeCodes();
+
+    BOOL isColdCode = isHotCode ? FALSE : TRUE;
+
+    ULONG unwindSize = Size();
+
+#ifdef DEBUG
+    if (uwiComp->verbose)
+    {
+        if (ufiNum != 1)
+            printf("reserveUnwindInfo: fragment #%d:\n", ufiNum);
+    }
+#endif
+
+    uwiComp->eeReserveUnwindInfo(isFunclet, isColdCode, unwindSize);
+}
+
+// Allocate the unwind info for a fragment with the VM.
+// Arguments:
+//      funKind:       funclet kind
+//      pHotCode:      hot section code buffer
+//      pColdCode:     cold section code buffer
+//      funcEndOffset: offset of the end of this function/funclet. Used if this fragment is the last one for a
+//                     function/funclet.
+//      isHotCode:     are we allocating the unwind info for the hot code section?
+
+void UnwindFragmentInfo::Allocate(
+    CorJitFuncKind funKind, void* pHotCode, void* pColdCode, UNATIVE_OFFSET funcEndOffset, bool isHotCode)
+{
+    UNATIVE_OFFSET startOffset;
+    UNATIVE_OFFSET endOffset;
+    UNATIVE_OFFSET codeSize;
+
+    // We don't support hot/cold splitting with EH, so if there is cold code, this
+    // better not be a funclet!
+    // TODO-CQ: support funclets in cold code
+
+    noway_assert(isHotCode || funKind == CORJIT_FUNC_ROOT);
+
+    // Compute the final size, and start and end offsets of the fragment
+
+    startOffset = GetStartOffset();
+
+    if (ufiNext == NULL)
+    {
+        // This is the last fragment, so the fragment extends to the end of the function/fragment.
+        assert(funcEndOffset != 0);
+        endOffset = funcEndOffset;
+    }
+    else
+    {
+        // The fragment length is all the code between the beginning of this fragment
+        // and the beginning of the next fragment. Note that all fragments have had their
+        // offsets computed before any fragment is allocated.
+        endOffset = ufiNext->GetStartOffset();
+    }
+
+    assert(endOffset > startOffset);
+    codeSize = endOffset - startOffset;
+
+    // Finalize the fragment unwind block to hand to the VM
+
+    Finalize(codeSize);
+
+    // Get the final unwind information and hand it to the VM
+
+    ULONG unwindBlockSize;
+    BYTE* pUnwindBlock;
+
+    GetFinalInfo(&pUnwindBlock, &unwindBlockSize);
+
+#ifdef DEBUG
+    if (uwiComp->opts.dspUnwind)
+    {
+        DumpUnwindInfo(uwiComp, isHotCode, startOffset, endOffset, pUnwindBlock, unwindBlockSize);
+    }
+#endif // DEBUG
+
+    // Adjust for cold or hot code:
+    // 1. The VM doesn't want the cold code pointer unless this is cold code.
+    // 2. The startOffset and endOffset need to be from the base of the hot section for hot code
+    //    and from the base of the cold section for cold code
+
+    if (isHotCode)
+    {
+        assert(endOffset <= uwiComp->info.compTotalHotCodeSize);
+        pColdCode = NULL;
+    }
+    else
+    {
+        assert(startOffset >= uwiComp->info.compTotalHotCodeSize);
+        startOffset -= uwiComp->info.compTotalHotCodeSize;
+        endOffset -= uwiComp->info.compTotalHotCodeSize;
+    }
+
+#ifdef DEBUG
+    if (uwiComp->verbose)
+    {
+        if (ufiNum != 1)
+            printf("unwindEmit: fragment #%d:\n", ufiNum);
+    }
+#endif // DEBUG
+
+    uwiComp->eeAllocUnwindInfo((BYTE*)pHotCode, (BYTE*)pColdCode, startOffset, endOffset, unwindBlockSize, pUnwindBlock,
+                               funKind);
+}
+
+#ifdef DEBUG
+void UnwindFragmentInfo::Dump(int indent)
+{
+    unsigned          count;
+    UnwindEpilogInfo* pEpi;
+
+    count = 0;
+    for (pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext)
+    {
+        ++count;
+    }
+
+    printf("%*sUnwindFragmentInfo #%d, @0x%08p, size:%d:\n", indent, "", ufiNum, dspPtr(this), sizeof(*this));
+    printf("%*s  uwiComp: 0x%08p\n", indent, "", dspPtr(uwiComp));
+    printf("%*s  ufiNext: 0x%08p\n", indent, "", dspPtr(ufiNext));
+    printf("%*s  ufiEmitLoc: 0x%08p\n", indent, "", dspPtr(ufiEmitLoc));
+    printf("%*s  ufiHasPhantomProlog: %s\n", indent, "", dspBool(ufiHasPhantomProlog));
+    printf("%*s  %d epilog%s\n", indent, "", count, (count != 1) ? "s" : "");
+    printf("%*s  ufiEpilogList: 0x%08p\n", indent, "", dspPtr(ufiEpilogList));
+    printf("%*s  ufiEpilogLast: 0x%08p\n", indent, "", dspPtr(ufiEpilogLast));
+    printf("%*s  ufiCurCodes: 0x%08p\n", indent, "", dspPtr(ufiCurCodes));
+    printf("%*s  ufiSize: %u\n", indent, "", ufiSize);
+    printf("%*s  ufiSetEBit: %s\n", indent, "", dspBool(ufiSetEBit));
+    printf("%*s  ufiNeedExtendedCodeWordsEpilogCount: %s\n", indent, "", dspBool(ufiNeedExtendedCodeWordsEpilogCount));
+    printf("%*s  ufiCodeWords: %u\n", indent, "", ufiCodeWords);
+    printf("%*s  ufiEpilogScopes: %u\n", indent, "", ufiEpilogScopes);
+    printf("%*s  ufiStartOffset: 0x%x\n", indent, "", ufiStartOffset);
+    printf("%*s  ufiInProlog: %s\n", indent, "", dspBool(ufiInProlog));
+    printf("%*s  ufiInitialized: 0x%08x\n", indent, "", ufiInitialized);
+
+    ufiPrologCodes.Dump(indent + 2);
+
+    for (pEpi = ufiEpilogList; pEpi != NULL; pEpi = pEpi->epiNext)
+    {
+        pEpi->Dump(indent + 2);
+    }
+}
+#endif // DEBUG
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//  UnwindInfo
+//
+///////////////////////////////////////////////////////////////////////////////
+
+void UnwindInfo::InitUnwindInfo(Compiler* comp, emitLocation* startLoc, emitLocation* endLoc)
+{
+    uwiComp = comp;
+
+    // The first fragment is a member of UnwindInfo, so it doesn't need to be allocated.
+    // However, its constructor needs to be explicitly called, since the constructor for
+    // UnwindInfo is not called.
+
+    uwiFragmentFirst.UnwindFragmentInfo::UnwindFragmentInfo(comp, startLoc, false);
+
+    uwiFragmentLast = &uwiFragmentFirst;
+
+    uwiEndLoc = endLoc;
+
+    // Allocate an emitter location object. It is initialized to something
+    // invalid: it has a null 'ig' that needs to get set before it can be used.
+    // Note that when we create an UnwindInfo for the cold section, this never
+    // gets initialized with anything useful, since we never add unwind codes
+    // to the cold section; we simply distribute the existing (previously added) codes.
+    uwiCurLoc = new (uwiComp, CMK_UnwindInfo) emitLocation();
+
+#ifdef DEBUG
+    uwiInitialized = UWI_INITIALIZED_PATTERN;
+    uwiAddingNOP   = false;
+#endif // DEBUG
+}
+
+// Split the unwind codes in 'puwi' into those that are in the hot section (leave them in 'puwi')
+// and those that are in the cold section (move them to 'this'). There is exactly one fragment
+// in each UnwindInfo; the fragments haven't been split for size, yet.
+
+void UnwindInfo::HotColdSplitCodes(UnwindInfo* puwi)
+{
+    // Ensure that there is exactly a single fragment in both the hot and the cold sections
+    assert(&uwiFragmentFirst == uwiFragmentLast);
+    assert(&puwi->uwiFragmentFirst == puwi->uwiFragmentLast);
+    assert(uwiFragmentLast->ufiNext == NULL);
+    assert(puwi->uwiFragmentLast->ufiNext == NULL);
+
+    // The real prolog is in the hot section, so this, cold, section has a phantom prolog
+    uwiFragmentLast->ufiHasPhantomProlog = true;
+    uwiFragmentLast->CopyPrologCodes(puwi->uwiFragmentLast);
+
+    // Now split the epilog codes
+    uwiFragmentLast->SplitEpilogCodes(uwiFragmentLast->ufiEmitLoc, puwi->uwiFragmentLast);
+}
+
+// Split the function or funclet into fragments that are no larger than 512K,
+// so the fragment size will fit in the unwind data "Function Length" field.
+// The ARM Exception Data specification "Function Fragments" section describes this.
+// We split the function so that it is no larger than 512K bytes, or the value of
+// the COMPlus_JitSplitFunctionSize value, if defined (and smaller). We must determine
+// how to split the function/funclet before we issue the instructions, so we can
+// reserve the unwind space with the VM. The instructions issued may shrink (but not
+// expand!) during issuing (although this is extremely rare in any case, and may not
+// actually occur on ARM), so we don't finalize actual sizes or offsets.
+//
+// ARM64 has very similar limitations, except functions can be up to 1MB. TODO-ARM64-Bug?: make sure this works!
+//
+// We don't split any prolog or epilog. Ideally, we might not split an instruction,
+// although that doesn't matter because the unwind at any point would still be
+// well-defined.
+
+void UnwindInfo::Split()
+{
+    UNATIVE_OFFSET maxFragmentSize; // The maximum size of a code fragment in bytes
+
+    maxFragmentSize = UW_MAX_FRAGMENT_SIZE_BYTES;
+
+#ifdef DEBUG
+    // Consider COMPlus_JitSplitFunctionSize
+    unsigned splitFunctionSize = (unsigned)JitConfig.JitSplitFunctionSize();
+
+    if (splitFunctionSize != 0)
+        if (splitFunctionSize < maxFragmentSize)
+            maxFragmentSize = splitFunctionSize;
+#endif // DEBUG
+
+    // Now, there should be exactly one fragment.
+
+    assert(uwiFragmentLast != NULL);
+    assert(uwiFragmentLast == &uwiFragmentFirst);
+    assert(uwiFragmentLast->ufiNext == NULL);
+
+    // Find the code size of this function/funclet.
+
+    UNATIVE_OFFSET startOffset;
+    UNATIVE_OFFSET endOffset;
+    UNATIVE_OFFSET codeSize;
+
+    if (uwiFragmentLast->ufiEmitLoc == NULL)
+    {
+        // NULL emit location means the beginning of the code. This is to handle the first fragment prolog.
+        startOffset = 0;
+    }
+    else
+    {
+        startOffset = uwiFragmentLast->ufiEmitLoc->CodeOffset(uwiComp->genEmitter);
+    }
+
+    if (uwiEndLoc == NULL)
+    {
+        // Note that compTotalHotCodeSize and compTotalColdCodeSize are computed before issuing instructions
+        // from the emitter instruction group offsets, and will be accurate unless the issued code shrinks.
+        // compNativeCodeSize is precise, but is only set after instructions are issued, which is too late
+        // for us, since we need to decide how many fragments we need before the code memory is allocated
+        // (which is before instruction issuing).
+        UNATIVE_OFFSET estimatedTotalCodeSize =
+            uwiComp->info.compTotalHotCodeSize + uwiComp->info.compTotalColdCodeSize;
+        assert(estimatedTotalCodeSize != 0);
+        endOffset = estimatedTotalCodeSize;
+    }
+    else
+    {
+        endOffset = uwiEndLoc->CodeOffset(uwiComp->genEmitter);
+    }
+
+    assert(endOffset > startOffset); // there better be at least 1 byte of code
+    codeSize = endOffset - startOffset;
+
+    // Now that we know the code size for this section (main function hot or cold, or funclet),
+    // figure out how many fragments we're going to need.
+
+    UNATIVE_OFFSET numberOfFragments = (codeSize + maxFragmentSize - 1) / maxFragmentSize; // round up
+    assert(numberOfFragments > 0);
+
+    if (numberOfFragments == 1)
+    {
+        // No need to split; we're done
+        return;
+    }
+
+    // Now, we're going to commit to splitting the function into "numberOfFragments" fragments,
+    // for the purpose of unwind information. We need to do the actual splits so we can figure out
+    // the size of each piece of unwind data for the call to reserveUnwindInfo(). We won't know
+    // the actual offsets of the splits since we haven't issued the instructions yet, so store
+    // an emitter location instead of an offset, and "finalize" the offset in the unwindEmit() phase,
+    // like we do for the function length and epilog offsets.
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+    if (uwiComp->verbose)
+    {
+        printf("Split unwind info into %d fragments (function/funclet size: %d, maximum fragment size: %d)\n",
+               numberOfFragments, codeSize, maxFragmentSize);
+    }
+#endif // DEBUG
+
+    // Call the emitter to do the split, and call us back for every split point it chooses.
+    uwiComp->genEmitter->emitSplit(uwiFragmentLast->ufiEmitLoc, uwiEndLoc, maxFragmentSize, (void*)this,
+                                   EmitSplitCallback);
+
+#ifdef DEBUG
+    // Did the emitter split the function/funclet into as many fragments as we asked for?
+    // It might be fewer if the COMPlus_JitSplitFunctionSize was used, but it better not
+    // be fewer if we're splitting into 512K blocks!
+
+    unsigned fragCount = 0;
+    for (UnwindFragmentInfo* pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext)
+    {
+        ++fragCount;
+    }
+    if (fragCount < numberOfFragments)
+    {
+        if (uwiComp->verbose)
+        {
+            printf("WARNING: asked the emitter for %d fragments, but only got %d\n", numberOfFragments, fragCount);
+        }
+
+        // If this fires, then we split into fewer fragments than we asked for, and we are using
+        // the default, unwind-data-defined 512K maximum fragment size. We won't be able to fit
+        // this fragment into the unwind data! If you set COMPlus_JitSplitFunctionSize to something
+        // small, we might not be able to split into as many fragments as asked for, because we
+        // can't split prologs or epilogs.
+        assert(maxFragmentSize != UW_MAX_FRAGMENT_SIZE_BYTES);
+    }
+#endif // DEBUG
+}
+
+/*static*/ void UnwindInfo::EmitSplitCallback(void* context, emitLocation* emitLoc)
+{
+    UnwindInfo* puwi = (UnwindInfo*)context;
+    puwi->AddFragment(emitLoc);
+}
+
+// Reserve space for the unwind info for all fragments
+
+void UnwindInfo::Reserve(BOOL isFunclet, bool isHotCode)
+{
+    assert(uwiInitialized == UWI_INITIALIZED_PATTERN);
+    assert(isHotCode || !isFunclet);
+
+    for (UnwindFragmentInfo* pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext)
+    {
+        pFrag->Reserve(isFunclet, isHotCode);
+    }
+}
+
+// Allocate and populate VM unwind info for all fragments
+
+void UnwindInfo::Allocate(CorJitFuncKind funKind, void* pHotCode, void* pColdCode, bool isHotCode)
+{
+    assert(uwiInitialized == UWI_INITIALIZED_PATTERN);
+
+    UnwindFragmentInfo* pFrag;
+
+    // First, finalize all the offsets (the location of the beginning of fragments, and epilogs),
+    // so a fragment can use the finalized offset of the subsequent fragment to determine its code size.
+
+    UNATIVE_OFFSET endOffset;
+
+    if (uwiEndLoc == NULL)
+    {
+        assert(uwiComp->info.compNativeCodeSize != 0);
+        endOffset = uwiComp->info.compNativeCodeSize;
+    }
+    else
+    {
+        endOffset = uwiEndLoc->CodeOffset(uwiComp->genEmitter);
+    }
+
+    for (pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext)
+    {
+        pFrag->FinalizeOffset();
+    }
+
+    for (pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext)
+    {
+        pFrag->Allocate(funKind, pHotCode, pColdCode, endOffset, isHotCode);
+    }
+}
+
+void UnwindInfo::AddEpilog()
+{
+    assert(uwiInitialized == UWI_INITIALIZED_PATTERN);
+    assert(uwiFragmentLast != NULL);
+    uwiFragmentLast->AddEpilog();
+    CaptureLocation();
+}
+
+#if defined(_TARGET_ARM_)
+
+unsigned UnwindInfo::GetInstructionSize()
+{
+    assert(uwiInitialized == UWI_INITIALIZED_PATTERN);
+    return uwiComp->genEmitter->emitGetInstructionSize(uwiCurLoc);
+}
+
+#endif // defined(_TARGET_ARM_)
+
+void UnwindInfo::CaptureLocation()
+{
+    assert(uwiInitialized == UWI_INITIALIZED_PATTERN);
+    assert(uwiCurLoc != NULL);
+    uwiCurLoc->CaptureLocation(uwiComp->genEmitter);
+}
+
+void UnwindInfo::AddFragment(emitLocation* emitLoc)
+{
+    assert(uwiInitialized == UWI_INITIALIZED_PATTERN);
+    assert(uwiFragmentLast != NULL);
+
+    UnwindFragmentInfo* newFrag = new (uwiComp, CMK_UnwindInfo) UnwindFragmentInfo(uwiComp, emitLoc, true);
+
+#ifdef DEBUG
+    newFrag->ufiNum = uwiFragmentLast->ufiNum + 1;
+#endif // DEBUG
+
+    newFrag->CopyPrologCodes(&uwiFragmentFirst);
+    newFrag->SplitEpilogCodes(emitLoc, uwiFragmentLast);
+
+    // Link the new fragment in at the end of the fragment list
+    uwiFragmentLast->ufiNext = newFrag;
+    uwiFragmentLast          = newFrag;
+}
+
+#ifdef DEBUG
+
+#if defined(_TARGET_ARM_)
+
+// Given the first byte of the unwind code, check that its opsize matches
+// the last instruction added in the emitter.
+void UnwindInfo::CheckOpsize(BYTE b1)
+{
+    // Adding NOP padding goes through the same path, but doesn't update the location to indicate
+    // the correct location of the instruction for which we are adding a NOP, so just skip the
+    // assert. Should be ok, because the emitter is telling us the size of the instruction for
+    // which we are adding the NOP.
+    if (uwiAddingNOP)
+        return;
+
+    unsigned opsizeInBytes    = GetOpcodeSizeFromUnwindHeader(b1);
+    unsigned instrSizeInBytes = GetInstructionSize();
+    assert(opsizeInBytes == instrSizeInBytes);
+}
+
+#endif // defined(_TARGET_ARM_)
+
+void UnwindInfo::Dump(bool isHotCode, int indent)
+{
+    unsigned            count;
+    UnwindFragmentInfo* pFrag;
+
+    count = 0;
+    for (pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext)
+    {
+        ++count;
+    }
+
+    printf("%*sUnwindInfo %s@0x%08p, size:%d:\n", indent, "", isHotCode ? "" : "COLD ", dspPtr(this), sizeof(*this));
+    printf("%*s  uwiComp: 0x%08p\n", indent, "", dspPtr(uwiComp));
+    printf("%*s  %d fragment%s\n", indent, "", count, (count != 1) ? "s" : "");
+    printf("%*s  uwiFragmentLast: 0x%08p\n", indent, "", dspPtr(uwiFragmentLast));
+    printf("%*s  uwiEndLoc: 0x%08p\n", indent, "", dspPtr(uwiEndLoc));
+    printf("%*s  uwiInitialized: 0x%08x\n", indent, "", uwiInitialized);
+
+    for (pFrag = &uwiFragmentFirst; pFrag != NULL; pFrag = pFrag->ufiNext)
+    {
+        pFrag->Dump(indent + 2);
+    }
+}
+
+#endif // DEBUG
+
+#if defined(_TARGET_ARM_)
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX  Debug dumpers                                                            XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#ifdef DEBUG
+
+// start is 0-based index from LSB, length is number of bits
+DWORD ExtractBits(DWORD dw, DWORD start, DWORD length)
+{
+    return (dw >> start) & ((1 << length) - 1);
+}
+
+// Dump an integer register set. 'x' is an array of bits where bit 0 = r0, bit 1 = r1, etc.
+// The highest register considered is r12.
+// If 'lr' is non-zero, the "lr" register is emitted last.
+// Returns the number of characters printed.
+DWORD DumpIntRegSet(DWORD x, DWORD lr)
+{
+    assert(x != 0 || lr != 0); // we must have one
+    assert((x & 0xE000) == 0); // don't handle r13 (sp), r14 (lr), r15 (pc) in 'x'
+    DWORD printed = 0;
+
+    printf("{");
+    ++printed;
+    bool  first   = true;
+    DWORD bitMask = 1;
+    for (DWORD bitNum = 0; bitNum < 12; bitNum++)
+    {
+        if (x & bitMask)
+        {
+            if (!first)
+            {
+                printf(",");
+                ++printed;
+            }
+            printf("r%u", bitNum);
+            printed += (bitNum < 10) ? 2 : 3;
+            first = false;
+        }
+        bitMask <<= 1;
+    }
+    if (lr)
+    {
+        if (!first)
+        {
+            printf(",");
+            ++printed;
+        }
+        printf("lr");
+        printed += 2;
+    }
+    printf("}");
+    ++printed;
+
+    return printed;
+}
+
+// Dump a register set range from register 'start' to register 'end'.
+// rtype should be "r" or "d" to indicate register type.
+// If 'lr' is non-zero, the "lr" register is emitted last. (Note that
+// 'lr' should be zero for rtype == "d".)
+// Returns the number of characters printed.
+DWORD DumpRegSetRange(const char* const rtype, DWORD start, DWORD end, DWORD lr)
+{
+    assert(start <= end);
+    DWORD printed  = 0;
+    DWORD rtypeLen = strlen(rtype);
+
+    printf("{");
+    ++printed;
+    bool first = true;
+    for (DWORD reg = start; reg <= end; reg++)
+    {
+        if (!first)
+        {
+            printf(",");
+            ++printed;
+        }
+        printf("%s%u", rtype, reg);
+        printed += rtypeLen + ((reg < 10) ? 1 : 2);
+        first = false;
+    }
+    if (lr)
+    {
+        assert(!first); // If 'lr' is set, it can't be first, since we require a non-empty range
+        printf(",lr");
+        printed += 3;
+    }
+    printf("}");
+    ++printed;
+
+    return printed;
+}
+
+// Dump the opsize.
+// Returns the number of characters printed.
+DWORD DumpOpsize(DWORD padding, DWORD opsize)
+{
+    if (padding > 100) // underflow?
+        padding   = 4;
+    DWORD printed = padding;
+    for (; padding > 0; padding--)
+        printf(" ");
+    printf("; opsize %d\n", opsize);
+    return printed + 11; // assumes opsize is always 2 digits
+}
+
+// Dump the unwind data.
+// Arguments:
+//      isHotCode:          true if this unwind data is for the hot section
+//      startOffset:        byte offset of the code start that this unwind data represents
+//      endOffset:          byte offset of the code end   that this unwind data represents
+//      pHeader:            pointer to the unwind data blob
+//      unwindBlockSize:    size in bytes of the unwind data blob
+
+void DumpUnwindInfo(Compiler*         comp,
+                    bool              isHotCode,
+                    UNATIVE_OFFSET    startOffset,
+                    UNATIVE_OFFSET    endOffset,
+                    const BYTE* const pHeader,
+                    ULONG             unwindBlockSize)
+{
+    printf("Unwind Info%s:\n", isHotCode ? "" : " COLD");
+
+    // pHeader is not guaranteed to be aligned. We put four 0xFF end codes at the end
+    // to provide padding, and round down to get a multiple of 4 bytes in size.
+    DWORD UNALIGNED* pdw = (DWORD UNALIGNED*)pHeader;
+    DWORD dw;
+
+    dw = *pdw++;
+
+    DWORD codeWords      = ExtractBits(dw, 28, 4);
+    DWORD epilogCount    = ExtractBits(dw, 23, 5);
+    DWORD FBit           = ExtractBits(dw, 22, 1);
+    DWORD EBit           = ExtractBits(dw, 21, 1);
+    DWORD XBit           = ExtractBits(dw, 20, 1);
+    DWORD Vers           = ExtractBits(dw, 18, 2);
+    DWORD functionLength = ExtractBits(dw, 0, 18);
+
+    printf("  >> Start offset   : 0x%06x (not in unwind data)\n", comp->dspOffset(startOffset));
+    printf("  >>   End offset   : 0x%06x (not in unwind data)\n", comp->dspOffset(endOffset));
+    printf("  Code Words        : %u\n", codeWords);
+    printf("  Epilog Count      : %u\n", epilogCount);
+    printf("  F bit             : %u\n", FBit);
+    printf("  E bit             : %u\n", EBit);
+    printf("  X bit             : %u\n", XBit);
+    printf("  Vers              : %u\n", Vers);
+    printf("  Function Length   : %u (0x%05x) Actual length = %u (0x%06x)\n", functionLength, functionLength,
+           functionLength * 2, functionLength * 2);
+
+    assert(functionLength * 2 == endOffset - startOffset);
+
+    if (codeWords == 0 && epilogCount == 0)
+    {
+        // We have an extension word specifying a larger number of Code Words or Epilog Counts
+        // than can be specified in the header word.
+
+        dw = *pdw++;
+
+        codeWords   = ExtractBits(dw, 16, 8);
+        epilogCount = ExtractBits(dw, 0, 16);
+        assert((dw & 0xF0000000) == 0); // reserved field should be zero
+
+        printf("  ---- Extension word ----\n");
+        printf("  Extended Code Words        : %u\n", codeWords);
+        printf("  Extended Epilog Count      : %u\n", epilogCount);
+    }
+
+    bool epilogStartAt[256] = {}; // One byte per possible epilog start index; initialized to false
+
+    if (EBit == 0)
+    {
+        // We have an array of epilog scopes
+
+        printf("  ---- Epilog scopes ----\n");
+        if (epilogCount == 0)
+        {
+            printf("  No epilogs\n");
+        }
+        else
+        {
+            for (DWORD scope = 0; scope < epilogCount; scope++)
+            {
+                dw = *pdw++;
+
+                DWORD epilogStartOffset = ExtractBits(dw, 0, 18);
+                DWORD res               = ExtractBits(dw, 18, 2);
+                DWORD condition         = ExtractBits(dw, 20, 4);
+                DWORD epilogStartIndex  = ExtractBits(dw, 24, 8);
+
+                // Note that epilogStartOffset for a funclet is the offset from the beginning
+                // of the current funclet, not the offset from the beginning of the main function.
+                // To help find it when looking through JitDump output, also show the offset from
+                // the beginning of the main function.
+                DWORD epilogStartOffsetFromMainFunctionBegin = epilogStartOffset * 2 + startOffset;
+
+                assert(res == 0);
+
+                printf("  ---- Scope %d\n", scope);
+                printf("  Epilog Start Offset        : %u (0x%05x) Actual offset = %u (0x%06x) Offset from main "
+                       "function begin = %u (0x%06x)\n",
+                       comp->dspOffset(epilogStartOffset), comp->dspOffset(epilogStartOffset),
+                       comp->dspOffset(epilogStartOffset * 2), comp->dspOffset(epilogStartOffset * 2),
+                       comp->dspOffset(epilogStartOffsetFromMainFunctionBegin),
+                       comp->dspOffset(epilogStartOffsetFromMainFunctionBegin));
+                printf("  Condition                  : %u (0x%x)%s\n", condition, condition,
+                       (condition == 0xE) ? " (always)" : "");
+                printf("  Epilog Start Index         : %u (0x%02x)\n", epilogStartIndex, epilogStartIndex);
+
+                epilogStartAt[epilogStartIndex] = true; // an epilog starts at this offset in the unwind codes
+            }
+        }
+    }
+    else
+    {
+        printf("  --- One epilog, unwind codes at %u\n", epilogCount);
+        assert(epilogCount < sizeof(epilogStartAt) / sizeof(epilogStartAt[0]));
+        epilogStartAt[epilogCount] = true; // the one and only epilog starts its unwind codes at this offset
+    }
+
+    if (FBit)
+    {
+        printf("  ---- Note: 'F' bit is set. Prolog codes are for a 'phantom' prolog.\n");
+    }
+
+    // Dump the unwind codes
+
+    printf("  ---- Unwind codes ----\n");
+
+    DWORD countOfUnwindCodes = codeWords * 4;
+    PBYTE pUnwindCode        = (PBYTE)pdw;
+    BYTE  b1, b2, b3, b4;
+    DWORD x, y;
+    DWORD opsize;
+    DWORD opCol = 52;
+    DWORD printed;
+    for (DWORD i = 0; i < countOfUnwindCodes; i++)
+    {
+        // Does this byte start an epilog sequence? If so, note that fact.
+        if (epilogStartAt[i])
+        {
+            printf("    ---- Epilog start at index %u ----\n", i);
+        }
+
+        b1 = *pUnwindCode++;
+
+        if ((b1 & 0x80) == 0)
+        {
+            // 00-7F : add sp, sp, #X*4 (opsize 16)
+            x = b1 & 0x7F;
+            printf("    %02X          add sp, sp, #%-8d", b1, x * 4);
+            DumpOpsize(opCol - 37, 16);
+        }
+        else if ((b1 & 0xC0) == 0x80)
+        {
+            // 80-BF : pop {r0-r12,lr} (X = bitmask) (opsize 32)
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            i++;
+
+            DWORD LBit = ExtractBits(b1, 5, 1);
+            x          = ((DWORD)(b1 & 0x1F) << 8) | (DWORD)b2;
+
+            printf("    %02X %02X       pop ", b1, b2);
+            printed = 20;
+            printed += DumpIntRegSet(x, LBit);
+            DumpOpsize(opCol - printed, 32);
+        }
+        else if ((b1 & 0xF0) == 0xC0)
+        {
+            // C0-CF : mov sp, rX (X=0-15) (opsize 16)
+            x = b1 & 0xF;
+            printf("    %02X          mov sp, r%u", b1, x);
+            printed = 25 + ((x > 10) ? 2 : 1);
+            DumpOpsize(opCol - printed, 16);
+        }
+        else if ((b1 & 0xF8) == 0xD0)
+        {
+            // D0-D7 : pop {r4-rX,lr} (X=4-7) (opsize 16)
+            x          = b1 & 0x3;
+            DWORD LBit = b1 & 0x4;
+            printf("    %02X          pop ", b1);
+            printed = 20;
+            printed += DumpRegSetRange("r", 4, x + 4, LBit);
+            DumpOpsize(opCol - printed, 16);
+        }
+        else if ((b1 & 0xF8) == 0xD8)
+        {
+            // D8-DF : pop {r4-rX,lr} (X=8-11) (opsize 32)
+            x          = b1 & 0x3;
+            DWORD LBit = b1 & 0x4;
+            printf("    %02X          pop ", b1);
+            printed = 20;
+            printed += DumpRegSetRange("r", 4, x + 8, LBit);
+            DumpOpsize(opCol - printed, 32);
+        }
+        else if ((b1 & 0xF8) == 0xE0)
+        {
+            // E0-E7 : vpop {d8-dX} (X=8-15) (opsize 32)
+            x = b1 & 0x7;
+            printf("    %02X          vpop ", b1);
+            printed = 21;
+            printed += DumpRegSetRange("d", 8, x + 8, 0);
+            DumpOpsize(opCol - printed, 32);
+        }
+        else if ((b1 & 0xFC) == 0xE8)
+        {
+            // E8-EB : addw sp, sp, #X*4 (opsize 32)
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            i++;
+
+            x = ((DWORD)(b1 & 0x3) << 8) | (DWORD)b2;
+
+            printf("    %02X %02X       addw sp, sp, #%-8u", b1, b2, x * 4);
+            DumpOpsize(opCol - 38, 32);
+        }
+        else if ((b1 & 0xFE) == 0xEC)
+        {
+            // EC-ED : pop {r0-r7,lr} (X = bitmask) (opsize 16)
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            i++;
+
+            DWORD LBit = ExtractBits(b1, 0, 1);
+            x          = (DWORD)b2;
+
+            printf("    %02X %02X       pop ", b1, b2);
+            printed = 20;
+            printed += DumpIntRegSet(x, LBit);
+            DumpOpsize(opCol - printed, 16);
+        }
+        else if (b1 == 0xEE)
+        {
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            i++;
+
+            if ((b2 & 0xF0) == 0)
+            {
+                // EE/0x (opsize 16)
+                x = b2 & 0xF;
+                printf("    %02X %02X       Microsoft-specific (x = %02X)", b1, b2, x);
+                DumpOpsize(4, 16);
+            }
+            else
+            {
+                // EE/xy (opsize 16)
+                x = ExtractBits(b2, 4, 4);
+                y = ExtractBits(b2, 0, 4);
+                printf("    %02X %02X       Available (x = %02X, y = %02X)", b1, b2, x, y);
+                DumpOpsize(4, 16);
+            }
+        }
+        else if (b1 == 0xEF)
+        {
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            i++;
+
+            if ((b2 & 0xF0) == 0)
+            {
+                // EF/0x : ldr lr, [sp], #X*4 (opsize 32)
+                x = b2 & 0xF;
+                printf("    %02X %02X       ldr lr, [sp], #%-8u", b1, b2, x * 4);
+                DumpOpsize(opCol - 39, 32);
+            }
+            else
+            {
+                // EF/xy (opsize 32)
+                x = ExtractBits(b2, 4, 4);
+                y = ExtractBits(b2, 0, 4);
+                printf("    %02X %02X       Available (x = %02X, y = %02X)", b1, b2, x, y);
+                DumpOpsize(4, 32);
+            }
+        }
+        else if ((b1 & 0xF7) == 0xF0)
+        {
+            // F0-F4
+            x = b1 & 0x7;
+            printf("    %02X          Available (x = %02X)\n", b1, x);
+        }
+        else if (b1 == 0xF5)
+        {
+            // F5 : vpop {dS-dE} (opsize 32)
+
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            i++;
+
+            DWORD s = ExtractBits(b2, 4, 4);
+            DWORD e = ExtractBits(b2, 0, 4);
+
+            printf("    %02X %02X       vpop ", b1, b2);
+            printed = 21;
+            printed += DumpRegSetRange("d", s, e, 0);
+            DumpOpsize(opCol - printed, 32);
+        }
+        else if (b1 == 0xF6)
+        {
+            // F6 : vpop {d(S+16)-d(E+16)} (opsize 32)
+
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            i++;
+
+            DWORD s = ExtractBits(b2, 4, 4);
+            DWORD e = ExtractBits(b2, 0, 4);
+
+            printf("    %02X %02X       vpop ", b1, b2);
+            printed = 21;
+            printed += DumpRegSetRange("d", s + 16, e + 16, 0);
+            DumpOpsize(opCol - printed, 32);
+        }
+        else if (b1 == 0xF7 || b1 == 0xF9)
+        {
+            // F7, F9 : add sp, sp, #X*4
+            // 0xF7 has opsize 16, 0xF9 has opsize 32
+
+            assert(i + 2 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            b3 = *pUnwindCode++;
+            i += 2;
+
+            x = ((DWORD)b2 << 8) | (DWORD)b3;
+
+            opsize = (b1 == 0xF7) ? 16 : 32;
+
+            printf("    %02X %02X %02X    add sp, sp, #%-8u", b1, b2, b3, x * 4, opsize);
+            DumpOpsize(opCol - 37, opsize);
+        }
+        else if (b1 == 0xF8 || b1 == 0xFA)
+        {
+            // F8, FA : add sp, sp, #X*4
+            // 0xF8 has opsize 16, 0xFA has opsize 32
+
+            assert(i + 3 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            b3 = *pUnwindCode++;
+            b4 = *pUnwindCode++;
+            i += 3;
+
+            x = ((DWORD)b2 << 16) | ((DWORD)b3 << 8) | (DWORD)b4;
+
+            opsize = (b1 == 0xF8) ? 16 : 32;
+
+            printf("    %02X %02X %02X %02X add sp, sp, #%-8u", b1, b2, b3, b4, x * 4, opsize);
+            DumpOpsize(opCol - 37, opsize);
+        }
+        else if (b1 == 0xFB || b1 == 0xFC)
+        {
+            // FB, FC : nop
+            // 0xFB has opsize 16, 0xFC has opsize 32
+
+            opsize = (b1 == 0xFB) ? 16 : 32;
+
+            printf("    %02X          nop", b1, opsize);
+            DumpOpsize(opCol - 19, opsize);
+        }
+        else if (b1 == 0xFD || b1 == 0xFE)
+        {
+            // FD, FE : end + nop
+            // 0xFD has opsize 16, 0xFE has opsize 32
+
+            opsize = (b1 == 0xFD) ? 16 : 32;
+
+            printf("    %02X          end + nop", b1, opsize);
+            DumpOpsize(opCol - 25, opsize);
+        }
+        else if (b1 == 0xFF)
+        {
+            // FF : end
+
+            printf("    %02X          end\n", b1);
+        }
+        else
+        {
+            assert(!"Internal error decoding unwind codes");
+        }
+    }
+
+    pdw += codeWords;
+    assert((PBYTE)pdw == pUnwindCode);
+    assert((PBYTE)pdw == pHeader + unwindBlockSize);
+
+    assert(XBit == 0); // We don't handle the case where exception data is present, such as the Exception Handler RVA
+
+    printf("\n");
+}
+
+#endif // DEBUG
+
+#endif // defined(_TARGET_ARM_)
+
+#endif // _TARGET_ARMARCH_
diff --git a/src/jit/unwindarm64.cpp b/src/jit/unwindarm64.cpp
new file mode 100644
index 0000000000..21e2a36b2a
--- /dev/null
+++ b/src/jit/unwindarm64.cpp
@@ -0,0 +1,802 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                              UnwindInfo                                   XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#if defined(_TARGET_ARM64_)
+
+void Compiler::unwindPush(regNumber reg)
+{
+    unreached(); // use one of the unwindSaveReg* functions instead.
+}
+
+void Compiler::unwindAllocStack(unsigned size)
+{
+    UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+    assert(size % 16 == 0);
+    unsigned x = size / 16;
+
+    if (x <= 0x1F)
+    {
+        // alloc_s: 000xxxxx: allocate small stack with size < 128 (2^5 * 16)
+        // TODO-Review: should say size < 512
+
+        pu->AddCode((BYTE)x);
+    }
+    else if (x <= 0x7FF)
+    {
+        // alloc_m: 11000xxx | xxxxxxxx: allocate large stack with size < 16k (2^11 * 16)
+        // TODO-Review: should say size < 32K
+
+        pu->AddCode(0xC0 | (BYTE)(x >> 8), (BYTE)x);
+    }
+    else
+    {
+        // alloc_l: 11100000 | xxxxxxxx | xxxxxxxx | xxxxxxxx : allocate large stack with size < 256M (2^24 * 16)
+        //
+        // For large stack size, the most significant bits
+        // are stored first (and next to the opCode) per the unwind spec.
+
+        pu->AddCode(0xE0, (BYTE)(x >> 16), (BYTE)(x >> 8), (BYTE)x);
+    }
+}
+
+void Compiler::unwindSetFrameReg(regNumber reg, unsigned offset)
+{
+    UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+    if (offset == 0)
+    {
+        assert(reg == REG_FP);
+
+        // set_fp: 11100001 : set up r29 : with : mov r29, sp
+        pu->AddCode(0xE1);
+    }
+    else
+    {
+        // add_fp: 11100010 | xxxxxxxx : set up r29 with : add r29, sp, #x * 8
+
+        assert(reg == REG_FP);
+        assert((offset % 8) == 0);
+
+        unsigned x = offset / 8;
+        assert(x <= 0xFF);
+
+        pu->AddCode(0xE2, (BYTE)x);
+    }
+}
+
+void Compiler::unwindSaveReg(regNumber reg, unsigned offset)
+{
+    unreached();
+}
+
+void Compiler::unwindNop()
+{
+    UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("unwindNop: adding NOP\n");
+    }
+#endif
+
+    INDEBUG(pu->uwiAddingNOP = true);
+
+    // nop: 11100011: no unwind operation is required.
+    pu->AddCode(0xE3);
+
+    INDEBUG(pu->uwiAddingNOP = false);
+}
+
+// unwindSaveRegPair: save a register pair to the stack at the specified byte offset (which must be positive,
+// a multiple of 8 from 0 to 504). Note that for ARM64 unwind codes, reg2 must be exactly one register higher than reg1,
+// except for the case of a pair including LR, in which case reg1 must be either FP or R19/R21/R23/R25/R27 (note that it
+// can't be even, such as R20, because that would mean R19 was saved separately, instead of saving <R19,R20> as a pair,
+// which we should do instead).
+void Compiler::unwindSaveRegPair(regNumber reg1, regNumber reg2, int offset)
+{
+    UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+    // stp reg1, reg2, [sp, #offset]
+
+    // offset for store pair in prolog must be positive and a multiple of 8.
+    assert(0 <= offset && offset <= 504);
+    assert((offset % 8) == 0);
+
+    int z = offset / 8;
+    assert(0 <= z && z <= 0x3F);
+
+    if (reg1 == REG_FP)
+    {
+        // save_fplr: 01zzzzzz: save <r29,lr> pair at [sp+#Z*8], offset <= 504
+
+        assert(reg2 == REG_LR);
+
+        pu->AddCode(0x40 | (BYTE)z);
+    }
+    else if (reg2 == REG_LR)
+    {
+        // save_lrpair: 1101011x | xxzzzzzz: save pair <r19 + 2 * #X, lr> at [sp + #Z * 8], offset <= 504
+
+        assert(REG_R19 <= reg1 && // first legal pair: R19, LR
+               reg1 <= REG_R27);  // last legal pair: R27, LR
+
+        BYTE x = (BYTE)(reg1 - REG_R19);
+        assert((x % 2) == 0); // only legal reg1: R19, R21, R23, R25, R27
+        x /= 2;
+        assert(0 <= x && x <= 0x7);
+
+        pu->AddCode(0xD6 | (BYTE)(x >> 2), (BYTE)(x << 6) | (BYTE)z);
+    }
+    else if (emitter::isGeneralRegister(reg1))
+    {
+        // save_regp: 110010xx | xxzzzzzz: save r(19 + #X) pair at [sp + #Z * 8], offset <= 504
+
+        assert(REG_NEXT(reg1) == reg2);
+        assert(REG_R19 <= reg1 && // first legal pair: R19, R20
+               reg1 <= REG_R27);  // last legal pair: R27, R28 (FP is never saved without LR)
+
+        BYTE x = (BYTE)(reg1 - REG_R19);
+        assert(0 <= x && x <= 0xF);
+
+        pu->AddCode(0xC8 | (BYTE)(x >> 2), (BYTE)(x << 6) | (BYTE)z);
+    }
+    else
+    {
+        // save_fregp: 1101100x | xxzzzzzz : save pair d(8 + #X) at [sp + #Z * 8], offset <= 504
+
+        assert(REG_NEXT(reg1) == reg2);
+        assert(REG_V8 <= reg1 && // first legal pair: V8, V9
+               reg1 <= REG_V14); // last legal pair: V14, V15
+
+        BYTE x = (BYTE)(reg1 - REG_V8);
+        assert(0 <= x && x <= 0x7);
+
+        pu->AddCode(0xD8 | (BYTE)(x >> 2), (BYTE)(x << 6) | (BYTE)z);
+    }
+}
+
+// unwindSaveRegPairPreindexed: save a register pair to the stack at the specified byte offset (which must be negative,
+// a multiple of 8 from -512 to -8). Note that for ARM64 unwind codes, reg2 must be exactly one register higher than
+// reg1.
+void Compiler::unwindSaveRegPairPreindexed(regNumber reg1, regNumber reg2, int offset)
+{
+    UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+    // stp reg1, reg2, [sp, #offset]!
+
+    // pre-indexed offset in prolog must be negative and a multiple of 8.
+    assert(offset < 0);
+    assert((offset % 8) == 0);
+
+    if (reg1 == REG_FP)
+    {
+        // save_fplr_x: 10zzzzzz: save <r29,lr> pair at [sp-(#Z+1)*8]!, pre-indexed offset >= -512
+
+        assert(-512 <= offset);
+        int z = (-offset) / 8 - 1;
+        assert(0 <= z && z <= 0x3F);
+
+        assert(reg2 == REG_LR);
+
+        pu->AddCode(0x80 | (BYTE)z);
+    }
+    else if ((reg1 == REG_R19) &&
+             (-256 <= offset)) // If the offset is between -512 and -256, we use the save_regp_x unwind code.
+    {
+        // save_r19r20_x: 001zzzzz: save <r19,r20> pair at [sp-#Z*8]!, pre-indexed offset >= -248
+        // NOTE: I'm not sure why we allow Z==0 here; seems useless, and the calculation of offset is different from the
+        // other cases.
+
+        int z = (-offset) / 8;
+        assert(0 <= z && z <= 0x1F);
+
+        assert(reg2 == REG_R20);
+
+        pu->AddCode(0x20 | (BYTE)z);
+    }
+    else if (emitter::isGeneralRegister(reg1))
+    {
+        // save_regp_x: 110011xx | xxzzzzzz: save pair r(19 + #X) at [sp - (#Z + 1) * 8]!, pre-indexed offset >= -512
+
+        assert(-512 <= offset);
+        int z = (-offset) / 8 - 1;
+        assert(0 <= z && z <= 0x3F);
+
+        assert(REG_NEXT(reg1) == reg2);
+        assert(REG_R19 <= reg1 && // first legal pair: R19, R20
+               reg1 <= REG_R27);  // last legal pair: R27, R28 (FP is never saved without LR)
+
+        BYTE x = (BYTE)(reg1 - REG_R19);
+        assert(0 <= x && x <= 0xF);
+
+        pu->AddCode(0xCC | (BYTE)(x >> 2), (BYTE)(x << 6) | (BYTE)z);
+    }
+    else
+    {
+        // save_fregp_x: 1101101x | xxzzzzzz : save pair d(8 + #X), at [sp - (#Z + 1) * 8]!, pre-indexed offset >= -512
+
+        assert(-512 <= offset);
+        int z = (-offset) / 8 - 1;
+        assert(0 <= z && z <= 0x3F);
+
+        assert(REG_NEXT(reg1) == reg2);
+        assert(REG_V8 <= reg1 && // first legal pair: V8, V9
+               reg1 <= REG_V14); // last legal pair: V14, V15
+
+        BYTE x = (BYTE)(reg1 - REG_V8);
+        assert(0 <= x && x <= 0x7);
+
+        pu->AddCode(0xDA | (BYTE)(x >> 2), (BYTE)(x << 6) | (BYTE)z);
+    }
+}
+
+void Compiler::unwindSaveReg(regNumber reg, int offset)
+{
+    UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+    // str reg, [sp, #offset]
+
+    // offset for store in prolog must be positive and a multiple of 8.
+    assert(0 <= offset && offset <= 504);
+    assert((offset % 8) == 0);
+
+    int z = offset / 8;
+    assert(0 <= z && z <= 0x3F);
+
+    if (emitter::isGeneralRegister(reg))
+    {
+        // save_reg: 110100xx | xxzzzzzz: save reg r(19 + #X) at [sp + #Z * 8], offset <= 504
+
+        assert(REG_R19 <= reg && // first legal register: R19
+               reg <= REG_LR);   // last legal register: LR
+
+        BYTE x = (BYTE)(reg - REG_R19);
+        assert(0 <= x && x <= 0xF);
+
+        pu->AddCode(0xD0 | (BYTE)(x >> 2), (BYTE)(x << 6) | (BYTE)z);
+    }
+    else
+    {
+        // save_freg: 1101110x | xxzzzzzz : save reg d(8 + #X) at [sp + #Z * 8], offset <= 504
+
+        assert(REG_V8 <= reg && // first legal register: V8
+               reg <= REG_V15); // last legal register: V15
+
+        BYTE x = (BYTE)(reg - REG_V8);
+        assert(0 <= x && x <= 0x7);
+
+        pu->AddCode(0xDC | (BYTE)(x >> 2), (BYTE)(x << 6) | (BYTE)z);
+    }
+}
+
+void Compiler::unwindSaveRegPreindexed(regNumber reg, int offset)
+{
+    UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+    // str reg, [sp, #offset]!
+
+    // pre-indexed offset in prolog must be negative and a multiple of 8.
+    assert(-256 <= offset && offset < 0);
+    assert((offset % 8) == 0);
+
+    int z = (-offset) / 8 - 1;
+    assert(0 <= z && z <= 0x1F);
+
+    if (emitter::isGeneralRegister(reg))
+    {
+        // save_reg_x: 1101010x | xxxzzzzz: save reg r(19 + #X) at [sp - (#Z + 1) * 8]!, pre-indexed offset >= -256
+
+        assert(REG_R19 <= reg && // first legal register: R19
+               reg <= REG_LR);   // last legal register: LR
+
+        BYTE x = (BYTE)(reg - REG_R19);
+        assert(0 <= x && x <= 0xF);
+
+        pu->AddCode(0xD4 | (BYTE)(x >> 3), (BYTE)(x << 5) | (BYTE)z);
+    }
+    else
+    {
+        // save_freg_x: 11011110 | xxxzzzzz : save reg d(8 + #X) at [sp - (#Z + 1) * 8]!, pre - indexed offset >= -256
+
+        assert(REG_V8 <= reg && // first legal register: V8
+               reg <= REG_V15); // last legal register: V15
+
+        BYTE x = (BYTE)(reg - REG_V8);
+        assert(0 <= x && x <= 0x7);
+
+        pu->AddCode(0xDE, (BYTE)(x << 5) | (BYTE)z);
+    }
+}
+
+void Compiler::unwindSaveNext()
+{
+    UnwindInfo* pu = &funCurrentFunc()->uwi;
+
+    // We're saving the next register pair. The caller is responsible for ensuring this is correct!
+
+    // save_next: 11100110 : save next non - volatile Int or FP register pair.
+    pu->AddCode(0xE6);
+}
+
+void Compiler::unwindReturn(regNumber reg)
+{
+    // Nothing to do; we will always have at least one trailing "end" opcode in our padding.
+}
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX  Unwind Info Debug helpers                                                XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#ifdef DEBUG
+
+// Return the size of the unwind code (from 1 to 4 bytes), given the first byte of the unwind bytes
+
+unsigned GetUnwindSizeFromUnwindHeader(BYTE b1)
+{
+    static BYTE s_UnwindSize[256] = {
+        // array of unwind sizes, in bytes (as specified in the ARM unwind specification)
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 00-0F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 10-1F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 20-2F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 30-3F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 40-4F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 50-5F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 60-6F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 70-7F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 80-8F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 90-9F
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A0-AF
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B0-BF
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C0-CF
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, // D0-DF
+        4, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E0-EF
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1  // F0-FF
+    };
+
+    unsigned size = s_UnwindSize[b1];
+    assert(1 <= size && size <= 4);
+    return size;
+}
+
+#endif // DEBUG
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX  Unwind Info Support Classes                                              XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+///////////////////////////////////////////////////////////////////////////////
+//
+//  UnwindCodesBase
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifdef DEBUG
+
+// Walk the prolog codes and calculate the size of the prolog or epilog, in bytes.
+unsigned UnwindCodesBase::GetCodeSizeFromUnwindCodes(bool isProlog)
+{
+    BYTE*    pCodesStart = GetCodes();
+    BYTE*    pCodes      = pCodesStart;
+    unsigned size        = 0;
+    for (;;)
+    {
+        BYTE b1 = *pCodes;
+        if (IsEndCode(b1))
+        {
+            break; // We hit an "end" code; we're done
+        }
+        size += 4; // All codes represent 4 byte instructions.
+        pCodes += GetUnwindSizeFromUnwindHeader(b1);
+        assert(pCodes - pCodesStart < 256); // 255 is the absolute maximum number of code bytes allowed
+    }
+    return size;
+}
+
+#endif // DEBUG
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX  Debug dumpers                                                            XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#ifdef DEBUG
+
+// start is 0-based index from LSB, length is number of bits
+DWORD ExtractBits(DWORD dw, DWORD start, DWORD length)
+{
+    return (dw >> start) & ((1 << length) - 1);
+}
+
+// Dump the unwind data.
+// Arguments:
+//      isHotCode:          true if this unwind data is for the hot section
+//      startOffset:        byte offset of the code start that this unwind data represents
+//      endOffset:          byte offset of the code end   that this unwind data represents
+//      pHeader:            pointer to the unwind data blob
+//      unwindBlockSize:    size in bytes of the unwind data blob
+
+void DumpUnwindInfo(Compiler*         comp,
+                    bool              isHotCode,
+                    UNATIVE_OFFSET    startOffset,
+                    UNATIVE_OFFSET    endOffset,
+                    const BYTE* const pHeader,
+                    ULONG             unwindBlockSize)
+{
+    printf("Unwind Info%s:\n", isHotCode ? "" : " COLD");
+
+    // pHeader is not guaranteed to be aligned. We put four 0xFF end codes at the end
+    // to provide padding, and round down to get a multiple of 4 bytes in size.
+    DWORD UNALIGNED* pdw = (DWORD UNALIGNED*)pHeader;
+    DWORD dw;
+
+    dw = *pdw++;
+
+    DWORD codeWords      = ExtractBits(dw, 27, 5);
+    DWORD epilogCount    = ExtractBits(dw, 22, 5);
+    DWORD EBit           = ExtractBits(dw, 21, 1);
+    DWORD XBit           = ExtractBits(dw, 20, 1);
+    DWORD Vers           = ExtractBits(dw, 18, 2);
+    DWORD functionLength = ExtractBits(dw, 0, 18);
+
+    printf("  >> Start offset   : 0x%06x (not in unwind data)\n", comp->dspOffset(startOffset));
+    printf("  >>   End offset   : 0x%06x (not in unwind data)\n", comp->dspOffset(endOffset));
+    printf("  Code Words        : %u\n", codeWords);
+    printf("  Epilog Count      : %u\n", epilogCount);
+    printf("  E bit             : %u\n", EBit);
+    printf("  X bit             : %u\n", XBit);
+    printf("  Vers              : %u\n", Vers);
+    printf("  Function Length   : %u (0x%05x) Actual length = %u (0x%06x)\n", functionLength, functionLength,
+           functionLength * 4, functionLength * 4);
+
+    assert(functionLength * 4 == endOffset - startOffset);
+
+    if (codeWords == 0 && epilogCount == 0)
+    {
+        // We have an extension word specifying a larger number of Code Words or Epilog Counts
+        // than can be specified in the header word.
+
+        dw = *pdw++;
+
+        codeWords   = ExtractBits(dw, 16, 8);
+        epilogCount = ExtractBits(dw, 0, 16);
+        assert((dw & 0xF0000000) == 0); // reserved field should be zero
+
+        printf("  ---- Extension word ----\n");
+        printf("  Extended Code Words        : %u\n", codeWords);
+        printf("  Extended Epilog Count      : %u\n", epilogCount);
+    }
+
+    bool epilogStartAt[1024] = {}; // One byte per possible epilog start index; initialized to false
+
+    if (EBit == 0)
+    {
+        // We have an array of epilog scopes
+
+        printf("  ---- Epilog scopes ----\n");
+        if (epilogCount == 0)
+        {
+            printf("  No epilogs\n");
+        }
+        else
+        {
+            for (DWORD scope = 0; scope < epilogCount; scope++)
+            {
+                dw = *pdw++;
+
+                DWORD epilogStartOffset = ExtractBits(dw, 0, 18);
+                DWORD res               = ExtractBits(dw, 18, 4);
+                DWORD epilogStartIndex  = ExtractBits(dw, 22, 10);
+
+                // Note that epilogStartOffset for a funclet is the offset from the beginning
+                // of the current funclet, not the offset from the beginning of the main function.
+                // To help find it when looking through JitDump output, also show the offset from
+                // the beginning of the main function.
+                DWORD epilogStartOffsetFromMainFunctionBegin = epilogStartOffset * 4 + startOffset;
+
+                assert(res == 0);
+
+                printf("  ---- Scope %d\n", scope);
+                printf("  Epilog Start Offset        : %u (0x%05x) Actual offset = %u (0x%06x) Offset from main "
+                       "function begin = %u (0x%06x)\n",
+                       comp->dspOffset(epilogStartOffset), comp->dspOffset(epilogStartOffset),
+                       comp->dspOffset(epilogStartOffset * 4), comp->dspOffset(epilogStartOffset * 4),
+                       comp->dspOffset(epilogStartOffsetFromMainFunctionBegin),
+                       comp->dspOffset(epilogStartOffsetFromMainFunctionBegin));
+                printf("  Epilog Start Index         : %u (0x%02x)\n", epilogStartIndex, epilogStartIndex);
+
+                epilogStartAt[epilogStartIndex] = true; // an epilog starts at this offset in the unwind codes
+            }
+        }
+    }
+    else
+    {
+        printf("  --- One epilog, unwind codes at %u\n", epilogCount);
+        assert(epilogCount < ArrLen(epilogStartAt));
+        epilogStartAt[epilogCount] = true; // the one and only epilog starts its unwind codes at this offset
+    }
+
+    // Dump the unwind codes
+
+    printf("  ---- Unwind codes ----\n");
+
+    DWORD countOfUnwindCodes = codeWords * 4;
+    PBYTE pUnwindCode        = (PBYTE)pdw;
+    BYTE  b1, b2, b3, b4;
+    DWORD x, z;
+    for (DWORD i = 0; i < countOfUnwindCodes; i++)
+    {
+        // Does this byte start an epilog sequence? If so, note that fact.
+        if (epilogStartAt[i])
+        {
+            printf("    ---- Epilog start at index %u ----\n", i);
+        }
+
+        b1 = *pUnwindCode++;
+
+        if ((b1 & 0xE0) == 0)
+        {
+            // alloc_s: 000xxxxx: allocate small stack with size < 128 (2^5 * 16)
+            // TODO-Review:should say size < 512
+            x = b1 & 0x1F;
+            printf("    %02X          alloc_s #%u (0x%02X); sub sp, sp, #%u (0x%03X)\n", b1, x, x, x * 16, x * 16);
+        }
+        else if ((b1 & 0xE0) == 0x20)
+        {
+            // save_r19r20_x: 001zzzzz: save <r19,r20> pair at [sp-#Z*8]!, pre-indexed offset >= -248
+            z = b1 & 0x1F;
+            printf("    %02X          save_r19r20_x #%u (0x%02X); stp %s, %s, [sp, #-%u]!\n", b1, z, z,
+                   getRegName(REG_R19), getRegName(REG_R20), z * 8);
+        }
+        else if ((b1 & 0xC0) == 0x40)
+        {
+            // save_fplr: 01zzzzzz: save <r29,lr> pair at [sp+#Z*8], offset <= 504
+            z = b1 & 0x3F;
+            printf("    %02X          save_fplr #%u (0x%02X); stp %s, %s, [sp, #%u]\n", b1, z, z, getRegName(REG_FP),
+                   getRegName(REG_LR), z * 8);
+        }
+        else if ((b1 & 0xC0) == 0x80)
+        {
+            // save_fplr_x: 10zzzzzz: save <r29,lr> pair at [sp-(#Z+1)*8]!, pre-indexed offset >= -512
+            z = b1 & 0x3F;
+            printf("    %02X          save_fplr_x #%u (0x%02X); stp %s, %s, [sp, #-%u]!\n", b1, z, z,
+                   getRegName(REG_FP), getRegName(REG_LR), (z + 1) * 8);
+        }
+        else if ((b1 & 0xF8) == 0xC0)
+        {
+            // alloc_m: 11000xxx | xxxxxxxx: allocate large stack with size < 16k (2^11 * 16)
+            // TODO-Review: should save size < 32K
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            i++;
+
+            x = ((DWORD)(b1 & 0x7) << 8) | (DWORD)b2;
+
+            printf("    %02X %02X       alloc_m #%u (0x%03X); sub sp, sp, #%u (0x%04X)\n", b1, b2, x, x, x * 16,
+                   x * 16);
+        }
+        else if ((b1 & 0xFC) == 0xC8)
+        {
+            // save_regp: 110010xx | xxzzzzzz: save r(19 + #X) pair at [sp + #Z * 8], offset <= 504
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            i++;
+
+            x = ((DWORD)(b1 & 0x3) << 2) | (DWORD)(b2 >> 6);
+            z = (DWORD)(b2 & 0x3F);
+
+            printf("    %02X %02X       save_regp X#%u Z#%u (0x%02X); stp %s, %s, [sp, #%u]\n", b1, b2, x, z, z,
+                   getRegName(REG_R19 + x), getRegName(REG_R19 + x + 1), z * 8);
+        }
+        else if ((b1 & 0xFC) == 0xCC)
+        {
+            // save_regp_x: 110011xx | xxzzzzzz: save pair r(19 + #X) at [sp - (#Z + 1) * 8]!, pre-indexed offset >=
+            // -512
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            i++;
+
+            x = ((DWORD)(b1 & 0x3) << 2) | (DWORD)(b2 >> 6);
+            z = (DWORD)(b2 & 0x3F);
+
+            printf("    %02X %02X       save_regp_x X#%u Z#%u (0x%02X); stp %s, %s, [sp, #-%u]!\n", b1, b2, x, z, z,
+                   getRegName(REG_R19 + x), getRegName(REG_R19 + x + 1), (z + 1) * 8);
+        }
+        else if ((b1 & 0xFC) == 0xD0)
+        {
+            // save_reg: 110100xx | xxzzzzzz: save reg r(19 + #X) at [sp + #Z * 8], offset <= 504
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            i++;
+
+            x = ((DWORD)(b1 & 0x3) << 2) | (DWORD)(b2 >> 6);
+            z = (DWORD)(b2 & 0x3F);
+
+            printf("    %02X %02X       save_reg X#%u Z#%u (0x%02X); str %s, [sp, #%u]\n", b1, b2, x, z, z,
+                   getRegName(REG_R19 + x), z * 8);
+        }
+        else if ((b1 & 0xFE) == 0xD4)
+        {
+            // save_reg_x: 1101010x | xxxzzzzz: save reg r(19 + #X) at [sp - (#Z + 1) * 8]!, pre-indexed offset >= -256
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            i++;
+
+            x = ((DWORD)(b1 & 0x1) << 3) | (DWORD)(b2 >> 5);
+            z = (DWORD)(b2 & 0x1F);
+
+            printf("    %02X %02X       save_reg_x X#%u Z#%u (0x%02X); str %s, [sp, #-%u]!\n", b1, b2, x, z, z,
+                   getRegName(REG_R19 + x), (z + 1) * 8);
+        }
+        else if ((b1 & 0xFE) == 0xD6)
+        {
+            // save_lrpair: 1101011x | xxzzzzzz: save pair <r19 + 2 * #X, lr> at [sp + #Z * 8], offset <= 504
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            i++;
+
+            x = ((DWORD)(b1 & 0x1) << 2) | (DWORD)(b2 >> 6);
+            z = (DWORD)(b2 & 0x3F);
+
+            printf("    %02X %02X       save_lrpair X#%u Z#%u (0x%02X); stp %s, %s, [sp, #%u]\n", b1, b2, x, z, z,
+                   getRegName(REG_R19 + 2 * x), getRegName(REG_LR), z * 8);
+        }
+        else if ((b1 & 0xFE) == 0xD8)
+        {
+            // save_fregp: 1101100x | xxzzzzzz : save pair d(8 + #X) at [sp + #Z * 8], offset <= 504
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            i++;
+
+            x = ((DWORD)(b1 & 0x1) << 2) | (DWORD)(b2 >> 6);
+            z = (DWORD)(b2 & 0x3F);
+
+            printf("    %02X %02X       save_fregp X#%u Z#%u (0x%02X); stp %s, %s, [sp, #%u]\n", b1, b2, x, z, z,
+                   getRegName(REG_V8 + x, true), getRegName(REG_V8 + x + 1, true), z * 8);
+        }
+        else if ((b1 & 0xFE) == 0xDA)
+        {
+            // save_fregp_x: 1101101x | xxzzzzzz : save pair d(8 + #X), at [sp - (#Z + 1) * 8]!, pre-indexed offset >=
+            // -512
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            i++;
+
+            x = ((DWORD)(b1 & 0x1) << 2) | (DWORD)(b2 >> 6);
+            z = (DWORD)(b2 & 0x3F);
+
+            printf("    %02X %02X       save_fregp_x X#%u Z#%u (0x%02X); stp %s, %s, [sp, #-%u]!\n", b1, b2, x, z, z,
+                   getRegName(REG_V8 + x, true), getRegName(REG_V8 + x + 1, true), (z + 1) * 8);
+        }
+        else if ((b1 & 0xFE) == 0xDC)
+        {
+            // save_freg: 1101110x | xxzzzzzz : save reg d(8 + #X) at [sp + #Z * 8], offset <= 504
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            i++;
+
+            x = ((DWORD)(b1 & 0x1) << 2) | (DWORD)(b2 >> 6);
+            z = (DWORD)(b2 & 0x3F);
+
+            printf("    %02X %02X       save_freg X#%u Z#%u (0x%02X); str %s, [sp, #%u]\n", b1, b2, x, z, z,
+                   getRegName(REG_V8 + x, true), z * 8);
+        }
+        else if (b1 == 0xDE)
+        {
+            // save_freg_x: 11011110 | xxxzzzzz : save reg d(8 + #X) at [sp - (#Z + 1) * 8]!, pre - indexed offset >=
+            // -256
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            i++;
+
+            x = (DWORD)(b2 >> 5);
+            z = (DWORD)(b2 & 0x1F);
+
+            printf("    %02X %02X       save_freg_x X#%u Z#%u (0x%02X); str %s, [sp, #-%u]!\n", b1, b2, x, z, z,
+                   getRegName(REG_V8 + x, true), (z + 1) * 8);
+        }
+        else if (b1 == 0xE0)
+        {
+            // alloc_l: 11100000 | xxxxxxxx | xxxxxxxx | xxxxxxxx : allocate large stack with size < 256M (2^24 * 16)
+            assert(i + 3 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            b3 = *pUnwindCode++;
+            b4 = *pUnwindCode++;
+            i += 3;
+
+            x = ((DWORD)b2 << 16) | ((DWORD)b3 << 8) | (DWORD)b4;
+
+            printf("    %02X %02X %02X %02X alloc_l %u (0x%06X); sub sp, sp, #%u (%06X)\n", b1, b2, b3, b4, x, x,
+                   x * 16, x * 16);
+        }
+        else if (b1 == 0xE1)
+        {
+            // set_fp: 11100001 : set up r29 : with : mov r29, sp
+
+            printf("    %02X          set_fp; mov %s, sp\n", b1, getRegName(REG_FP));
+        }
+        else if (b1 == 0xE2)
+        {
+            // add_fp: 11100010 | xxxxxxxx : set up r29 with : add r29, sp, #x * 8
+            assert(i + 1 < countOfUnwindCodes);
+            b2 = *pUnwindCode++;
+            i++;
+
+            x = (DWORD)b2;
+
+            printf("    %02X %02X       add_fp %u (0x%02X); add %s, sp, #%u\n", b1, b2, x, x, getRegName(REG_FP),
+                   x * 8);
+        }
+        else if (b1 == 0xE3)
+        {
+            // nop: 11100011: no unwind operation is required.
+
+            printf("    %02X          nop\n", b1);
+        }
+        else if (b1 == 0xE4)
+        {
+            // end: 11100100 : end of unwind code
+
+            printf("    %02X          end\n", b1);
+        }
+        else if (b1 == 0xE5)
+        {
+            // end_c: 11100101 : end of unwind code in current chained scope.
+
+            printf("    %02X          end_c\n", b1);
+        }
+        else if (b1 == 0xE6)
+        {
+            // save_next: 11100110 : save next non - volatile Int or FP register pair.
+
+            printf("    %02X          save_next\n", b1);
+        }
+        else
+        {
+            // Unknown / reserved unwind code
+            assert(!"Internal error decoding unwind codes");
+        }
+    }
+
+    pdw += codeWords;
+    assert((PBYTE)pdw == pUnwindCode);
+    assert((PBYTE)pdw == pHeader + unwindBlockSize);
+
+    assert(XBit == 0); // We don't handle the case where exception data is present, such as the Exception Handler RVA
+
+    printf("\n");
+}
+
+#endif // DEBUG
+
+#endif // _TARGET_ARM64_
diff --git a/src/jit/utils.cpp b/src/jit/utils.cpp
new file mode 100644
index 0000000000..9934416412
--- /dev/null
+++ b/src/jit/utils.cpp
@@ -0,0 +1,1767 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                                  Utils.cpp                                XX
+XX                                                                           XX
+XX   Has miscellaneous utility functions                                     XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "opcode.h"
+
+/*****************************************************************************/
+// Define the string platform name based on compilation #ifdefs. This is the
+// same code for all platforms, hence it is here instead of in the targetXXX.cpp
+// files.
+
+#ifdef PLATFORM_UNIX
+// Should we distinguish Mac? Can we?
+// Should we distinguish flavors of Unix? Can we?
+const char* Target::g_tgtPlatformName = "Unix";
+#else  // !PLATFORM_UNIX
+const char* Target::g_tgtPlatformName = "Windows";
+#endif // !PLATFORM_UNIX
+
+/*****************************************************************************/
+
+#define DECLARE_DATA
+
+// clang-format off
+extern
+const signed char       opcodeSizes[] =
+{
+    #define InlineNone_size           0
+    #define ShortInlineVar_size       1
+    #define InlineVar_size            2
+    #define ShortInlineI_size         1
+    #define InlineI_size              4
+    #define InlineI8_size             8
+    #define ShortInlineR_size         4
+    #define InlineR_size              8
+    #define ShortInlineBrTarget_size  1
+    #define InlineBrTarget_size       4
+    #define InlineMethod_size         4
+    #define InlineField_size          4
+    #define InlineType_size           4
+    #define InlineString_size         4
+    #define InlineSig_size            4
+    #define InlineRVA_size            4
+    #define InlineTok_size            4
+    #define InlineSwitch_size         0       // for now
+    #define InlinePhi_size            0       // for now
+    #define InlineVarTok_size         0       // remove
+
+    #define OPDEF(name,string,pop,push,oprType,opcType,l,s1,s2,ctrl) oprType ## _size ,
+    #include "opcode.def"
+    #undef OPDEF
+
+    #undef InlineNone_size
+    #undef ShortInlineVar_size
+    #undef InlineVar_size
+    #undef ShortInlineI_size
+    #undef InlineI_size
+    #undef InlineI8_size
+    #undef ShortInlineR_size
+    #undef InlineR_size
+    #undef ShortInlineBrTarget_size
+    #undef InlineBrTarget_size
+    #undef InlineMethod_size
+    #undef InlineField_size
+    #undef InlineType_size
+    #undef InlineString_size
+    #undef InlineSig_size
+    #undef InlineRVA_size
+    #undef InlineTok_size
+    #undef InlineSwitch_size
+    #undef InlinePhi_size
+};
+// clang-format on
+
+const BYTE varTypeClassification[] = {
+#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) tf,
+#include "typelist.h"
+#undef DEF_TP
+};
+
+/*****************************************************************************/
+/*****************************************************************************/
+#ifdef DEBUG
+extern const char* const opcodeNames[] = {
+#define OPDEF(name, string, pop, push, oprType, opcType, l, s1, s2, ctrl) string,
+#include "opcode.def"
+#undef OPDEF
+};
+
+extern const BYTE opcodeArgKinds[] = {
+#define OPDEF(name, string, pop, push, oprType, opcType, l, s1, s2, ctrl) (BYTE) oprType,
+#include "opcode.def"
+#undef OPDEF
+};
+#endif
+
+/*****************************************************************************/
+
+const char* varTypeName(var_types vt)
+{
+    static const char* const varTypeNames[] = {
+#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) nm,
+#include "typelist.h"
+#undef DEF_TP
+    };
+
+    assert((unsigned)vt < sizeof(varTypeNames) / sizeof(varTypeNames[0]));
+
+    return varTypeNames[vt];
+}
+
+#if defined(DEBUG) || defined(LATE_DISASM)
+/*****************************************************************************
+ *
+ *  Return the name of the given register.
+ */
+
+const char* getRegName(regNumber reg, bool isFloat)
+{
+    // Special-case REG_NA; it's not in the regNames array, but we might want to print it.
+    if (reg == REG_NA)
+    {
+        return "NA";
+    }
+#if defined(_TARGET_X86_) && defined(LEGACY_BACKEND)
+    static const char* const regNames[] = {
+#define REGDEF(name, rnum, mask, sname) sname,
+#include "register.h"
+    };
+
+    static const char* const floatRegNames[] = {
+#define REGDEF(name, rnum, mask, sname) sname,
+#include "registerxmm.h"
+    };
+    if (isFloat)
+    {
+        assert(reg < ArrLen(floatRegNames));
+        return floatRegNames[reg];
+    }
+    else
+    {
+        assert(reg < ArrLen(regNames));
+        return regNames[reg];
+    }
+#elif defined(_TARGET_ARM64_)
+    static const char* const regNames[] = {
+#define REGDEF(name, rnum, mask, xname, wname) xname,
+#include "register.h"
+    };
+    assert(reg < ArrLen(regNames));
+    return regNames[reg];
+#else
+    static const char* const regNames[] = {
+#define REGDEF(name, rnum, mask, sname) sname,
+#include "register.h"
+    };
+    assert(reg < ArrLen(regNames));
+    return regNames[reg];
+#endif
+}
+
+const char* getRegName(unsigned reg,
+                       bool     isFloat) // this is for gcencode.cpp and disasm.cpp that dont use the regNumber type
+{
+    return getRegName((regNumber)reg, isFloat);
+}
+#endif // defined(DEBUG) || defined(LATE_DISASM)
+
+#if defined(DEBUG)
+
+const char* getRegNameFloat(regNumber reg, var_types type)
+{
+#ifdef _TARGET_ARM_
+    assert(genIsValidFloatReg(reg));
+    if (type == TYP_FLOAT)
+        return getRegName(reg);
+    else
+    {
+        const char* regName;
+
+        switch (reg)
+        {
+            default:
+                assert(!"Bad double register");
+                regName = "d??";
+                break;
+            case REG_F0:
+                regName = "d0";
+                break;
+            case REG_F2:
+                regName = "d2";
+                break;
+            case REG_F4:
+                regName = "d4";
+                break;
+            case REG_F6:
+                regName = "d6";
+                break;
+            case REG_F8:
+                regName = "d8";
+                break;
+            case REG_F10:
+                regName = "d10";
+                break;
+            case REG_F12:
+                regName = "d12";
+                break;
+            case REG_F14:
+                regName = "d14";
+                break;
+            case REG_F16:
+                regName = "d16";
+                break;
+            case REG_F18:
+                regName = "d18";
+                break;
+            case REG_F20:
+                regName = "d20";
+                break;
+            case REG_F22:
+                regName = "d22";
+                break;
+            case REG_F24:
+                regName = "d24";
+                break;
+            case REG_F26:
+                regName = "d26";
+                break;
+            case REG_F28:
+                regName = "d28";
+                break;
+            case REG_F30:
+                regName = "d30";
+                break;
+        }
+        return regName;
+    }
+
+#elif defined(_TARGET_X86_) && defined(LEGACY_BACKEND)
+
+    static const char* regNamesFloat[] = {
+#define REGDEF(name, rnum, mask, sname) sname,
+#include "registerxmm.h"
+    };
+    assert((unsigned)reg < ArrLen(regNamesFloat));
+
+    return regNamesFloat[reg];
+
+#elif defined(_TARGET_ARM64_)
+
+    static const char* regNamesFloat[] = {
+#define REGDEF(name, rnum, mask, xname, wname) xname,
+#include "register.h"
+    };
+    assert((unsigned)reg < ArrLen(regNamesFloat));
+
+    return regNamesFloat[reg];
+
+#else
+    static const char* regNamesFloat[] = {
+#define REGDEF(name, rnum, mask, sname) "x" sname,
+#include "register.h"
+    };
+#ifdef FEATURE_AVX_SUPPORT
+    static const char* regNamesYMM[] = {
+#define REGDEF(name, rnum, mask, sname) "y" sname,
+#include "register.h"
+    };
+#endif // FEATURE_AVX_SUPPORT
+    assert((unsigned)reg < ArrLen(regNamesFloat));
+
+#ifdef FEATURE_AVX_SUPPORT
+    if (type == TYP_SIMD32)
+    {
+        return regNamesYMM[reg];
+    }
+#endif // FEATURE_AVX_SUPPORT
+
+    return regNamesFloat[reg];
+#endif
+}
+
+/*****************************************************************************
+ *
+ *  Displays a register set.
+ *  TODO-ARM64-Cleanup: don't allow ip0, ip1 as part of a range.
+ */
+
+void dspRegMask(regMaskTP regMask, size_t minSiz)
+{
+    const char* sep = "";
+
+    printf("[");
+
+    bool      inRegRange = false;
+    regNumber regPrev    = REG_NA;
+    regNumber regHead    = REG_NA; // When we start a range, remember the first register of the range, so we don't use
+                                   // range notation if the range contains just a single register.
+    for (regNumber regNum = REG_INT_FIRST; regNum <= REG_INT_LAST; regNum = REG_NEXT(regNum))
+    {
+        regMaskTP regBit = genRegMask(regNum);
+
+        if ((regMask & regBit) != 0)
+        {
+            // We have a register to display. It gets displayed now if:
+            // 1. This is the first register to display of a new range of registers (possibly because
+            //    no register has ever been displayed).
+            // 2. This is the last register of an acceptable range (either the last integer register,
+            //    or the last of a range that is displayed with range notation).
+            if (!inRegRange)
+            {
+                // It's the first register of a potential range.
+                const char* nam = getRegName(regNum);
+                printf("%s%s", sep, nam);
+                minSiz -= strlen(sep) + strlen(nam);
+
+                // By default, we're not starting a potential register range.
+                sep = " ";
+
+                // What kind of separator should we use for this range (if it is indeed going to be a range)?
+                CLANG_FORMAT_COMMENT_ANCHOR;
+
+#if defined(_TARGET_AMD64_)
+                // For AMD64, create ranges for int registers R8 through R15, but not the "old" registers.
+                if (regNum >= REG_R8)
+                {
+                    regHead    = regNum;
+                    inRegRange = true;
+                    sep        = "-";
+                }
+#elif defined(_TARGET_ARM64_)
+                // R17 and R28 can't be the start of a range, since the range would include TEB or FP
+                if ((regNum < REG_R17) || ((REG_R19 <= regNum) && (regNum < REG_R28)))
+                {
+                    regHead    = regNum;
+                    inRegRange = true;
+                    sep        = "-";
+                }
+#elif defined(_TARGET_ARM_)
+                if (regNum < REG_R12)
+                {
+                    regHead    = regNum;
+                    inRegRange = true;
+                    sep        = "-";
+                }
+#elif defined(_TARGET_X86_)
+// No register ranges
+#else // _TARGET_*
+#error Unsupported or unset target architecture
+#endif // _TARGET_*
+            }
+
+#if defined(_TARGET_ARM64_)
+            // We've already printed a register. Is this the end of a range?
+            else if ((regNum == REG_INT_LAST) || (regNum == REG_R17) // last register before TEB
+                     || (regNum == REG_R28))                         // last register before FP
+#else                                                                // _TARGET_ARM64_
+            // We've already printed a register. Is this the end of a range?
+            else if (regNum == REG_INT_LAST)
+#endif                                                               // _TARGET_ARM64_
+            {
+                const char* nam = getRegName(regNum);
+                printf("%s%s", sep, nam);
+                minSiz -= strlen(sep) + strlen(nam);
+                inRegRange = false; // No longer in the middle of a register range
+                regHead    = REG_NA;
+                sep        = " ";
+            }
+        }
+        else // ((regMask & regBit) == 0)
+        {
+            if (inRegRange)
+            {
+                assert(regHead != REG_NA);
+                if (regPrev != regHead)
+                {
+                    // Close out the previous range, if it included more than one register.
+                    const char* nam = getRegName(regPrev);
+                    printf("%s%s", sep, nam);
+                    minSiz -= strlen(sep) + strlen(nam);
+                }
+                sep        = " ";
+                inRegRange = false;
+                regHead    = REG_NA;
+            }
+        }
+
+        if (regBit > regMask)
+        {
+            break;
+        }
+
+        regPrev = regNum;
+    }
+
+#if CPU_HAS_BYTE_REGS
+    if (regMask & RBM_BYTE_REG_FLAG)
+    {
+        const char* nam = "BYTE";
+        printf("%s%s", sep, nam);
+        minSiz -= (strlen(sep) + strlen(nam));
+    }
+#endif
+
+#if !FEATURE_STACK_FP_X87
+    if (strlen(sep) > 0)
+    {
+        // We've already printed something.
+        sep = " ";
+    }
+    inRegRange = false;
+    regPrev    = REG_NA;
+    regHead    = REG_NA;
+    for (regNumber regNum = REG_FP_FIRST; regNum <= REG_FP_LAST; regNum = REG_NEXT(regNum))
+    {
+        regMaskTP regBit = genRegMask(regNum);
+
+        if (regMask & regBit)
+        {
+            if (!inRegRange || (regNum == REG_FP_LAST))
+            {
+                const char* nam = getRegName(regNum);
+                printf("%s%s", sep, nam);
+                minSiz -= strlen(sep) + strlen(nam);
+                sep     = "-";
+                regHead = regNum;
+            }
+            inRegRange = true;
+        }
+        else
+        {
+            if (inRegRange)
+            {
+                if (regPrev != regHead)
+                {
+                    const char* nam = getRegName(regPrev);
+                    printf("%s%s", sep, nam);
+                    minSiz -= (strlen(sep) + strlen(nam));
+                }
+                sep = " ";
+            }
+            inRegRange = false;
+        }
+
+        if (regBit > regMask)
+        {
+            break;
+        }
+
+        regPrev = regNum;
+    }
+#endif
+
+    printf("]");
+
+    while ((int)minSiz > 0)
+    {
+        printf(" ");
+        minSiz--;
+    }
+}
+
+//------------------------------------------------------------------------
+// dumpILBytes: Helper for dumpSingleInstr() to dump hex bytes of an IL stream,
+// aligning up to a minimum alignment width.
+//
+// Arguments:
+//    codeAddr  - Pointer to IL byte stream to display.
+//    codeSize  - Number of bytes of IL byte stream to display.
+//    alignSize - Pad out to this many characters, if fewer than this were written.
+//
+void dumpILBytes(const BYTE* const codeAddr,
+                 unsigned          codeSize,
+                 unsigned          alignSize) // number of characters to write, for alignment
+{
+    for (IL_OFFSET offs = 0; offs < codeSize; ++offs)
+    {
+        printf(" %02x", *(codeAddr + offs));
+    }
+
+    unsigned charsWritten = 3 * codeSize;
+    for (unsigned i = charsWritten; i < alignSize; i++)
+    {
+        printf(" ");
+    }
+}
+
+//------------------------------------------------------------------------
+// dumpSingleInstr: Display a single IL instruction.
+//
+// Arguments:
+//    codeAddr  - Base pointer to a stream of IL instructions.
+//    offs      - Offset from codeAddr of the IL instruction to display.
+//    prefix    - Optional string to prefix the IL instruction with (if nullptr, no prefix is output).
+//
+// Return Value:
+//    Size of the displayed IL instruction in the instruction stream, in bytes. (Add this to 'offs' to
+//    get to the next instruction.)
+//
+unsigned dumpSingleInstr(const BYTE* const codeAddr, IL_OFFSET offs, const char* prefix)
+{
+    const BYTE*    opcodePtr      = codeAddr + offs;
+    const BYTE*    startOpcodePtr = opcodePtr;
+    const unsigned ALIGN_WIDTH    = 3 * 6; // assume 3 characters * (1 byte opcode + 4 bytes data + 1 prefix byte) for
+                                           // most things
+
+    if (prefix != nullptr)
+    {
+        printf("%s", prefix);
+    }
+
+    OPCODE opcode = (OPCODE)getU1LittleEndian(opcodePtr);
+    opcodePtr += sizeof(__int8);
+
+DECODE_OPCODE:
+
+    if (opcode >= CEE_COUNT)
+    {
+        printf("\nIllegal opcode: %02X\n", (int)opcode);
+        return (IL_OFFSET)(opcodePtr - startOpcodePtr);
+    }
+
+    /* Get the size of additional parameters */
+
+    size_t   sz      = opcodeSizes[opcode];
+    unsigned argKind = opcodeArgKinds[opcode];
+
+    /* See what kind of an opcode we have, then */
+
+    switch (opcode)
+    {
+        case CEE_PREFIX1:
+            opcode = OPCODE(getU1LittleEndian(opcodePtr) + 256);
+            opcodePtr += sizeof(__int8);
+            goto DECODE_OPCODE;
+
+        default:
+        {
+            __int64 iOp;
+            double  dOp;
+            int     jOp;
+            DWORD   jOp2;
+
+            switch (argKind)
+            {
+                case InlineNone:
+                    dumpILBytes(startOpcodePtr, (unsigned)(opcodePtr - startOpcodePtr), ALIGN_WIDTH);
+                    printf(" %-12s", opcodeNames[opcode]);
+                    break;
+
+                case ShortInlineVar:
+                    iOp = getU1LittleEndian(opcodePtr);
+                    goto INT_OP;
+                case ShortInlineI:
+                    iOp = getI1LittleEndian(opcodePtr);
+                    goto INT_OP;
+                case InlineVar:
+                    iOp = getU2LittleEndian(opcodePtr);
+                    goto INT_OP;
+                case InlineTok:
+                case InlineMethod:
+                case InlineField:
+                case InlineType:
+                case InlineString:
+                case InlineSig:
+                case InlineI:
+                    iOp = getI4LittleEndian(opcodePtr);
+                    goto INT_OP;
+                case InlineI8:
+                    iOp = getU4LittleEndian(opcodePtr);
+                    iOp |= (__int64)getU4LittleEndian(opcodePtr + 4) << 32;
+                    goto INT_OP;
+
+                INT_OP:
+                    dumpILBytes(startOpcodePtr, (unsigned)((opcodePtr - startOpcodePtr) + sz), ALIGN_WIDTH);
+                    printf(" %-12s 0x%X", opcodeNames[opcode], iOp);
+                    break;
+
+                case ShortInlineR:
+                    dOp = getR4LittleEndian(opcodePtr);
+                    goto FLT_OP;
+                case InlineR:
+                    dOp = getR8LittleEndian(opcodePtr);
+                    goto FLT_OP;
+
+                FLT_OP:
+                    dumpILBytes(startOpcodePtr, (unsigned)((opcodePtr - startOpcodePtr) + sz), ALIGN_WIDTH);
+                    printf(" %-12s %f", opcodeNames[opcode], dOp);
+                    break;
+
+                case ShortInlineBrTarget:
+                    jOp = getI1LittleEndian(opcodePtr);
+                    goto JMP_OP;
+                case InlineBrTarget:
+                    jOp = getI4LittleEndian(opcodePtr);
+                    goto JMP_OP;
+
+                JMP_OP:
+                    dumpILBytes(startOpcodePtr, (unsigned)((opcodePtr - startOpcodePtr) + sz), ALIGN_WIDTH);
+                    printf(" %-12s %d (IL_%04x)", opcodeNames[opcode], jOp, (int)(opcodePtr + sz - codeAddr) + jOp);
+                    break;
+
+                case InlineSwitch:
+                    jOp2 = getU4LittleEndian(opcodePtr);
+                    opcodePtr += 4;
+                    opcodePtr += jOp2 * 4; // Jump over the table
+                    dumpILBytes(startOpcodePtr, (unsigned)(opcodePtr - startOpcodePtr), ALIGN_WIDTH);
+                    printf(" %-12s", opcodeNames[opcode]);
+                    break;
+
+                case InlinePhi:
+                    jOp2 = getU1LittleEndian(opcodePtr);
+                    opcodePtr += 1;
+                    opcodePtr += jOp2 * 2; // Jump over the table
+                    dumpILBytes(startOpcodePtr, (unsigned)(opcodePtr - startOpcodePtr), ALIGN_WIDTH);
+                    printf(" %-12s", opcodeNames[opcode]);
+                    break;
+
+                default:
+                    assert(!"Bad argKind");
+            }
+
+            opcodePtr += sz;
+            break;
+        }
+    }
+
+    printf("\n");
+    return (IL_OFFSET)(opcodePtr - startOpcodePtr);
+}
+
+//------------------------------------------------------------------------
+// dumpILRange: Display a range of IL instructions from an IL instruction stream.
+//
+// Arguments:
+//    codeAddr  - Pointer to IL byte stream to display.
+//    codeSize  - Number of bytes of IL byte stream to display.
+//
+void dumpILRange(const BYTE* const codeAddr, unsigned codeSize) // in bytes
+{
+    for (IL_OFFSET offs = 0; offs < codeSize;)
+    {
+        char prefix[100];
+        sprintf(prefix, "IL_%04x ", offs);
+        unsigned codeBytesDumped = dumpSingleInstr(codeAddr, offs, prefix);
+        offs += codeBytesDumped;
+    }
+}
+
+/*****************************************************************************
+ *
+ *  Display a variable set (which may be a 32-bit or 64-bit number); only
+ *  one or two of these can be used at once.
+ */
+
+const char* genES2str(EXPSET_TP set)
+{
+    const int   bufSize = 17;
+    static char num1[bufSize];
+
+    static char num2[bufSize];
+
+    static char* nump = num1;
+
+    char* temp = nump;
+
+    nump = (nump == num1) ? num2 : num1;
+
+#if EXPSET_SZ == 32
+    sprintf_s(temp, bufSize, "%08X", set);
+#else
+    sprintf_s(temp, bufSize, "%08X%08X", (int)(set >> 32), (int)set);
+#endif
+
+    return temp;
+}
+
+const char* refCntWtd2str(unsigned refCntWtd)
+{
+    const int   bufSize = 17;
+    static char num1[bufSize];
+
+    static char num2[bufSize];
+
+    static char* nump = num1;
+
+    char* temp = nump;
+
+    nump = (nump == num1) ? num2 : num1;
+
+    unsigned valueInt  = refCntWtd / BB_UNITY_WEIGHT;
+    unsigned valueFrac = refCntWtd % BB_UNITY_WEIGHT;
+
+    if (valueFrac == 0)
+    {
+        sprintf_s(temp, bufSize, "%2u  ", valueInt);
+    }
+    else
+    {
+        sprintf_s(temp, bufSize, "%2u.%1u", valueInt, (valueFrac * 10 / BB_UNITY_WEIGHT));
+    }
+
+    return temp;
+}
+
+#endif // DEBUG
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+//------------------------------------------------------------------------
+// Contains: check if the range includes a particular method
+//
+// Arguments:
+//    info   -- jit interface pointer
+//    method -- method handle for the method of interest
+
+bool ConfigMethodRange::Contains(ICorJitInfo* info, CORINFO_METHOD_HANDLE method)
+{
+    _ASSERT(m_inited == 1);
+
+    // No ranges specified means all methods included.
+    if (m_lastRange == 0)
+    {
+        return true;
+    }
+
+    // Check the hash. Note we can't use the cached hash here since
+    // we may not be asking about the method currently being jitted.
+    const unsigned hash = info->getMethodHash(method);
+
+    for (unsigned i = 0; i < m_lastRange; i++)
+    {
+        if ((m_ranges[i].m_low <= hash) && (hash <= m_ranges[i].m_high))
+        {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+//------------------------------------------------------------------------
+// InitRanges: parse the range string and set up the range info
+//
+// Arguments:
+//    rangeStr -- string to parse (may be nullptr)
+//    capacity -- number ranges to allocate in the range array
+//
+// Notes:
+//    Does some internal error checking; clients can use Error()
+//    to determine if the range string couldn't be fully parsed
+//    because of bad characters or too many entries, or had values
+//    that were too large to represent.
+
+void ConfigMethodRange::InitRanges(const wchar_t* rangeStr, unsigned capacity)
+{
+    // Make sure that the memory was zero initialized
+    assert(m_inited == 0 || m_inited == 1);
+    assert(m_entries == 0);
+    assert(m_ranges == nullptr);
+    assert(m_lastRange == 0);
+
+    // Flag any crazy-looking requests
+    assert(capacity < 100000);
+
+    if (rangeStr == nullptr)
+    {
+        m_inited = 1;
+        return;
+    }
+
+    // Allocate some persistent memory
+    ICorJitHost* jitHost = JitHost::getJitHost();
+    m_ranges             = (Range*)jitHost->allocateMemory(capacity * sizeof(Range));
+    m_entries            = capacity;
+
+    const wchar_t* p           = rangeStr;
+    unsigned       lastRange   = 0;
+    bool           setHighPart = false;
+
+    while ((*p != 0) && (lastRange < m_entries))
+    {
+        while (*p == L' ')
+        {
+            p++;
+        }
+
+        int i = 0;
+
+        while (L'0' <= *p && *p <= L'9')
+        {
+            int j = 10 * i + ((*p++) - L'0');
+
+            // Check for overflow
+            if ((m_badChar != 0) && (j <= i))
+            {
+                m_badChar = (p - rangeStr) + 1;
+            }
+
+            i = j;
+        }
+
+        // Was this the high part of a low-high pair?
+        if (setHighPart)
+        {
+            // Yep, set it and move to the next range
+            m_ranges[lastRange].m_high = i;
+
+            // Sanity check that range is proper
+            if ((m_badChar != 0) && (m_ranges[lastRange].m_high < m_ranges[lastRange].m_low))
+            {
+                m_badChar = (p - rangeStr) + 1;
+            }
+
+            lastRange++;
+            setHighPart = false;
+            continue;
+        }
+
+        // Must have been looking for the low part of a range
+        m_ranges[lastRange].m_low = i;
+
+        while (*p == L' ')
+        {
+            p++;
+        }
+
+        // Was that the low part of a low-high pair?
+        if (*p == L'-')
+        {
+            // Yep, skip the dash and set high part next time around.
+            p++;
+            setHighPart = true;
+            continue;
+        }
+
+        // Else we have a point range, so set high = low
+        m_ranges[lastRange].m_high = i;
+        lastRange++;
+    }
+
+    // If we didn't parse the full range string, note index of the the
+    // first bad char.
+    if ((m_badChar != 0) && (*p != 0))
+    {
+        m_badChar = (p - rangeStr) + 1;
+    }
+
+    // Finish off any remaining open range
+    if (setHighPart)
+    {
+        m_ranges[lastRange].m_high = UINT_MAX;
+        lastRange++;
+    }
+
+    assert(lastRange <= m_entries);
+    m_lastRange = lastRange;
+    m_inited    = 1;
+}
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+#if CALL_ARG_STATS || COUNT_BASIC_BLOCKS || COUNT_LOOPS || EMITTER_STATS || MEASURE_NODE_SIZE
+
+/*****************************************************************************
+ *  Histogram class.
+ */
+
+Histogram::Histogram(IAllocator* allocator, const unsigned* const sizeTable)
+    : m_allocator(allocator), m_sizeTable(sizeTable), m_counts(nullptr)
+{
+    unsigned sizeCount = 0;
+    do
+    {
+        sizeCount++;
+    } while ((sizeTable[sizeCount] != 0) && (sizeCount < 1000));
+
+    m_sizeCount = sizeCount;
+}
+
+Histogram::~Histogram()
+{
+    m_allocator->Free(m_counts);
+}
+
+// We need to lazy allocate the histogram data so static `Histogram` variables don't try to
+// call the host memory allocator in the loader lock, which doesn't work.
+void Histogram::ensureAllocated()
+{
+    if (m_counts == nullptr)
+    {
+        m_counts = new (m_allocator) unsigned[m_sizeCount + 1];
+        memset(m_counts, 0, (m_sizeCount + 1) * sizeof(*m_counts));
+    }
+}
+
+void Histogram::dump(FILE* output)
+{
+    ensureAllocated();
+
+    unsigned t = 0;
+    for (unsigned i = 0; i < m_sizeCount; i++)
+    {
+        t += m_counts[i];
+    }
+
+    for (unsigned c = 0, i = 0; i <= m_sizeCount; i++)
+    {
+        if (i == m_sizeCount)
+        {
+            if (m_counts[i] == 0)
+            {
+                break;
+            }
+
+            fprintf(output, "      >    %7u", m_sizeTable[i - 1]);
+        }
+        else
+        {
+            if (i == 0)
+            {
+                fprintf(output, "     <=    ");
+            }
+            else
+            {
+                fprintf(output, "%7u .. ", m_sizeTable[i - 1] + 1);
+            }
+
+            fprintf(output, "%7u", m_sizeTable[i]);
+        }
+
+        c += m_counts[i];
+
+        fprintf(output, " ===> %7u count (%3u%% of total)\n", m_counts[i], (int)(100.0 * c / t));
+    }
+}
+
+void Histogram::record(unsigned size)
+{
+    ensureAllocated();
+
+    unsigned i;
+    for (i = 0; i < m_sizeCount; i++)
+    {
+        if (m_sizeTable[i] >= size)
+        {
+            break;
+        }
+    }
+
+    m_counts[i]++;
+}
+
+#endif // CALL_ARG_STATS || COUNT_BASIC_BLOCKS || COUNT_LOOPS || EMITTER_STATS || MEASURE_NODE_SIZE
+
+/*****************************************************************************
+ * Fixed bit vector class
+ */
+
+// bitChunkSize() - Returns number of bits in a bitVect chunk
+inline UINT FixedBitVect::bitChunkSize()
+{
+    return sizeof(UINT) * 8;
+}
+
+// bitNumToBit() - Returns a bit mask of the given bit number
+inline UINT FixedBitVect::bitNumToBit(UINT bitNum)
+{
+    assert(bitNum < bitChunkSize());
+    assert(bitChunkSize() <= sizeof(int) * 8);
+
+    return 1 << bitNum;
+}
+
+// bitVectInit() - Initializes a bit vector of a given size
+FixedBitVect* FixedBitVect::bitVectInit(UINT size, Compiler* comp)
+{
+    UINT          bitVectMemSize, numberOfChunks;
+    FixedBitVect* bv;
+
+    assert(size != 0);
+
+    numberOfChunks = (size - 1) / bitChunkSize() + 1;
+    bitVectMemSize = numberOfChunks * (bitChunkSize() / 8); // size in bytes
+
+    assert(bitVectMemSize * bitChunkSize() >= size);
+
+    bv = (FixedBitVect*)comp->compGetMemA(sizeof(FixedBitVect) + bitVectMemSize, CMK_FixedBitVect);
+    memset(bv->bitVect, 0, bitVectMemSize);
+
+    bv->bitVectSize = size;
+
+    return bv;
+}
+
+// bitVectSet() - Sets the given bit
+void FixedBitVect::bitVectSet(UINT bitNum)
+{
+    UINT index;
+
+    assert(bitNum <= bitVectSize);
+
+    index = bitNum / bitChunkSize();
+    bitNum -= index * bitChunkSize();
+
+    bitVect[index] |= bitNumToBit(bitNum);
+}
+
+// bitVectTest() - Tests the given bit
+bool FixedBitVect::bitVectTest(UINT bitNum)
+{
+    UINT index;
+
+    assert(bitNum <= bitVectSize);
+
+    index = bitNum / bitChunkSize();
+    bitNum -= index * bitChunkSize();
+
+    return (bitVect[index] & bitNumToBit(bitNum)) != 0;
+}
+
+// bitVectOr() - Or in the given bit vector
+void FixedBitVect::bitVectOr(FixedBitVect* bv)
+{
+    UINT bitChunkCnt = (bitVectSize - 1) / bitChunkSize() + 1;
+
+    assert(bitVectSize == bv->bitVectSize);
+
+    // Or each chunks
+    for (UINT i = 0; i < bitChunkCnt; i++)
+    {
+        bitVect[i] |= bv->bitVect[i];
+    }
+}
+
+// bitVectAnd() - And with passed in bit vector
+void FixedBitVect::bitVectAnd(FixedBitVect& bv)
+{
+    UINT bitChunkCnt = (bitVectSize - 1) / bitChunkSize() + 1;
+
+    assert(bitVectSize == bv.bitVectSize);
+
+    // And each chunks
+    for (UINT i = 0; i < bitChunkCnt; i++)
+    {
+        bitVect[i] &= bv.bitVect[i];
+    }
+}
+
+// bitVectGetFirst() - Find the first bit on and return bit num,
+//                    Return -1 if no bits found.
+UINT FixedBitVect::bitVectGetFirst()
+{
+    return bitVectGetNext((UINT)-1);
+}
+
+// bitVectGetNext() - Find the next bit on given previous position and return bit num.
+//                    Return -1 if no bits found.
+UINT FixedBitVect::bitVectGetNext(UINT bitNumPrev)
+{
+    UINT bitNum = (UINT)-1;
+    UINT index;
+    UINT bitMask;
+    UINT bitChunkCnt = (bitVectSize - 1) / bitChunkSize() + 1;
+    UINT i;
+
+    if (bitNumPrev == (UINT)-1)
+    {
+        index   = 0;
+        bitMask = (UINT)-1;
+    }
+    else
+    {
+        UINT bit;
+
+        index = bitNumPrev / bitChunkSize();
+        bitNumPrev -= index * bitChunkSize();
+        bit     = bitNumToBit(bitNumPrev);
+        bitMask = ~(bit | (bit - 1));
+    }
+
+    // Find first bit
+    for (i = index; i < bitChunkCnt; i++)
+    {
+        UINT bitChunk = bitVect[i] & bitMask;
+
+        if (bitChunk != 0)
+        {
+            BitScanForward((ULONG*)&bitNum, bitChunk);
+            break;
+        }
+
+        bitMask = 0xFFFFFFFF;
+    }
+
+    // Empty bit vector?
+    if (bitNum == (UINT)-1)
+    {
+        return (UINT)-1;
+    }
+
+    bitNum += i * bitChunkSize();
+
+    assert(bitNum <= bitVectSize);
+
+    return bitNum;
+}
+
+// bitVectGetNextAndClear() - Find the first bit on, clear it and return it.
+//                            Return -1 if no bits found.
+UINT FixedBitVect::bitVectGetNextAndClear()
+{
+    UINT bitNum      = (UINT)-1;
+    UINT bitChunkCnt = (bitVectSize - 1) / bitChunkSize() + 1;
+    UINT i;
+
+    // Find first bit
+    for (i = 0; i < bitChunkCnt; i++)
+    {
+        if (bitVect[i] != 0)
+        {
+            BitScanForward((ULONG*)&bitNum, bitVect[i]);
+            break;
+        }
+    }
+
+    // Empty bit vector?
+    if (bitNum == (UINT)-1)
+    {
+        return (UINT)-1;
+    }
+
+    // Clear the bit in the right chunk
+    bitVect[i] &= ~bitNumToBit(bitNum);
+
+    bitNum += i * bitChunkSize();
+
+    assert(bitNum <= bitVectSize);
+
+    return bitNum;
+}
+
+int SimpleSprintf_s(__in_ecount(cbBufSize - (pWriteStart - pBufStart)) char* pWriteStart,
+                    __in_ecount(cbBufSize) char*                             pBufStart,
+                    size_t                                                   cbBufSize,
+                    __in_z const char*                                       fmt,
+                    ...)
+{
+    assert(fmt);
+    assert(pBufStart);
+    assert(pWriteStart);
+    assert((size_t)pBufStart <= (size_t)pWriteStart);
+    int ret;
+
+    // compute the space left in the buffer.
+    if ((pBufStart + cbBufSize) < pWriteStart)
+    {
+        NO_WAY("pWriteStart is past end of buffer");
+    }
+    size_t  cbSpaceLeft = (size_t)((pBufStart + cbBufSize) - pWriteStart);
+    va_list args;
+    va_start(args, fmt);
+    ret = vsprintf_s(pWriteStart, cbSpaceLeft, const_cast<char*>(fmt), args);
+    va_end(args);
+    if (ret < 0)
+    {
+        NO_WAY("vsprintf_s failed.");
+    }
+    return ret;
+}
+
+#ifdef DEBUG
+
+void hexDump(FILE* dmpf, const char* name, BYTE* addr, size_t size)
+{
+    if (!size)
+    {
+        return;
+    }
+
+    assert(addr);
+
+    fprintf(dmpf, "Hex dump of %s:\n", name);
+
+    for (unsigned i = 0; i < size; i++)
+    {
+        if ((i % 16) == 0)
+        {
+            fprintf(dmpf, "\n    %04X: ", i);
+        }
+
+        fprintf(dmpf, "%02X ", *addr++);
+    }
+
+    fprintf(dmpf, "\n\n");
+}
+
+#endif // DEBUG
+
+void HelperCallProperties::init()
+{
+    for (CorInfoHelpFunc helper = CORINFO_HELP_UNDEF; // initialize helper
+         (helper < CORINFO_HELP_COUNT);               // test helper for loop exit
+         helper = CorInfoHelpFunc(int(helper) + 1))   // update helper to next
+    {
+        // Generally you want initialize these to their most typical/safest result
+        //
+        bool isPure        = false; // true if the result only depends upon input args and not any global state
+        bool noThrow       = false; // true if the helper will never throw
+        bool nonNullReturn = false; // true if the result will never be null or zero
+        bool isAllocator   = false; // true if the result is usually a newly created heap item, or may throw OutOfMemory
+        bool mutatesHeap   = false; // true if any previous heap objects [are|can be] modified
+        bool mayRunCctor   = false; // true if the helper call may cause a static constructor to be run.
+        bool mayFinalize   = false; // true if the helper call allocates an object that may need to run a finalizer
+
+        switch (helper)
+        {
+            // Arithmetic helpers that cannot throw
+            case CORINFO_HELP_LLSH:
+            case CORINFO_HELP_LRSH:
+            case CORINFO_HELP_LRSZ:
+            case CORINFO_HELP_LMUL:
+            case CORINFO_HELP_LNG2DBL:
+            case CORINFO_HELP_ULNG2DBL:
+            case CORINFO_HELP_DBL2INT:
+            case CORINFO_HELP_DBL2LNG:
+            case CORINFO_HELP_DBL2UINT:
+            case CORINFO_HELP_DBL2ULNG:
+            case CORINFO_HELP_FLTREM:
+            case CORINFO_HELP_DBLREM:
+            case CORINFO_HELP_FLTROUND:
+            case CORINFO_HELP_DBLROUND:
+
+                isPure  = true;
+                noThrow = true;
+                break;
+
+            // Arithmetic helpers that *can* throw.
+
+            // This (or these) are not pure, in that they have "VM side effects"...but they don't mutate the heap.
+            case CORINFO_HELP_ENDCATCH:
+                break;
+
+            // Arithmetic helpers that may throw
+            case CORINFO_HELP_LMOD: // Mods throw div-by zero, and signed mods have problems with the smallest integer
+                                    // mod -1,
+            case CORINFO_HELP_MOD:  // which is not representable as a positive integer.
+            case CORINFO_HELP_UMOD:
+            case CORINFO_HELP_ULMOD:
+
+            case CORINFO_HELP_UDIV: // Divs throw divide-by-zero.
+            case CORINFO_HELP_DIV:
+            case CORINFO_HELP_LDIV:
+            case CORINFO_HELP_ULDIV:
+
+            case CORINFO_HELP_LMUL_OVF:
+            case CORINFO_HELP_ULMUL_OVF:
+            case CORINFO_HELP_DBL2INT_OVF:
+            case CORINFO_HELP_DBL2LNG_OVF:
+            case CORINFO_HELP_DBL2UINT_OVF:
+            case CORINFO_HELP_DBL2ULNG_OVF:
+
+                isPure = true;
+                break;
+
+            // Heap Allocation helpers, these all never return null
+            case CORINFO_HELP_NEWSFAST:
+            case CORINFO_HELP_NEWSFAST_ALIGN8:
+
+                isAllocator   = true;
+                nonNullReturn = true;
+                noThrow       = true; // only can throw OutOfMemory
+                break;
+
+            case CORINFO_HELP_NEW_CROSSCONTEXT:
+            case CORINFO_HELP_NEWFAST:
+            case CORINFO_HELP_READYTORUN_NEW:
+
+                mayFinalize   = true; // These may run a finalizer
+                isAllocator   = true;
+                nonNullReturn = true;
+                noThrow       = true; // only can throw OutOfMemory
+                break;
+
+            // These allocation helpers do some checks on the size (and lower bound) inputs,
+            // and can throw exceptions other than OOM.
+            case CORINFO_HELP_NEWARR_1_VC:
+            case CORINFO_HELP_NEWARR_1_ALIGN8:
+
+                isAllocator   = true;
+                nonNullReturn = true;
+                break;
+
+            // These allocation helpers do some checks on the size (and lower bound) inputs,
+            // and can throw exceptions other than OOM.
+            case CORINFO_HELP_NEW_MDARR:
+            case CORINFO_HELP_NEWARR_1_DIRECT:
+            case CORINFO_HELP_NEWARR_1_OBJ:
+            case CORINFO_HELP_READYTORUN_NEWARR_1:
+
+                mayFinalize   = true; // These may run a finalizer
+                isAllocator   = true;
+                nonNullReturn = true;
+                break;
+
+            // Heap Allocation helpers that are also pure
+            case CORINFO_HELP_STRCNS:
+
+                isPure        = true;
+                isAllocator   = true;
+                nonNullReturn = true;
+                noThrow       = true; // only can throw OutOfMemory
+                break;
+
+            case CORINFO_HELP_BOX:
+                nonNullReturn = true;
+                isAllocator   = true;
+                noThrow       = true; // only can throw OutOfMemory
+                break;
+
+            case CORINFO_HELP_BOX_NULLABLE:
+                // Box Nullable is not a 'pure' function
+                // It has a Byref argument that it reads the contents of.
+                //
+                // So two calls to Box Nullable that pass the same address (with the same Value Number)
+                // will produce different results when the contents of the memory pointed to by the Byref changes
+                //
+                isAllocator = true;
+                noThrow     = true; // only can throw OutOfMemory
+                break;
+
+            case CORINFO_HELP_RUNTIMEHANDLE_METHOD:
+            case CORINFO_HELP_RUNTIMEHANDLE_CLASS:
+            case CORINFO_HELP_RUNTIMEHANDLE_METHOD_LOG:
+            case CORINFO_HELP_RUNTIMEHANDLE_CLASS_LOG:
+                // logging helpers are not technically pure but can be optimized away
+                isPure        = true;
+                noThrow       = true;
+                nonNullReturn = true;
+                break;
+
+            // type casting helpers
+            case CORINFO_HELP_ISINSTANCEOFINTERFACE:
+            case CORINFO_HELP_ISINSTANCEOFARRAY:
+            case CORINFO_HELP_ISINSTANCEOFCLASS:
+            case CORINFO_HELP_ISINSTANCEOFANY:
+            case CORINFO_HELP_READYTORUN_ISINSTANCEOF:
+
+                isPure  = true;
+                noThrow = true; // These return null for a failing cast
+                break;
+
+            // type casting helpers that throw
+            case CORINFO_HELP_CHKCASTINTERFACE:
+            case CORINFO_HELP_CHKCASTARRAY:
+            case CORINFO_HELP_CHKCASTCLASS:
+            case CORINFO_HELP_CHKCASTANY:
+            case CORINFO_HELP_CHKCASTCLASS_SPECIAL:
+            case CORINFO_HELP_READYTORUN_CHKCAST:
+
+                // These throw for a failing cast
+                // But if given a null input arg will return null
+                isPure = true;
+                break;
+
+            // helpers returning addresses, these can also throw
+            case CORINFO_HELP_UNBOX:
+            case CORINFO_HELP_GETREFANY:
+            case CORINFO_HELP_LDELEMA_REF:
+
+                isPure = true;
+                break;
+
+            // helpers that return internal handle
+            // TODO-ARM64-Bug?: Can these throw or not?
+            case CORINFO_HELP_GETCLASSFROMMETHODPARAM:
+            case CORINFO_HELP_GETSYNCFROMCLASSHANDLE:
+
+                isPure = true;
+                break;
+
+            // Helpers that load the base address for static variables.
+            // We divide these between those that may and may not invoke
+            // static class constructors.
+            case CORINFO_HELP_GETSHARED_GCSTATIC_BASE:
+            case CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE:
+            case CORINFO_HELP_GETSHARED_GCSTATIC_BASE_DYNAMICCLASS:
+            case CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_DYNAMICCLASS:
+            case CORINFO_HELP_GETGENERICS_GCTHREADSTATIC_BASE:
+            case CORINFO_HELP_GETGENERICS_NONGCTHREADSTATIC_BASE:
+            case CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE:
+            case CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE:
+            case CORINFO_HELP_CLASSINIT_SHARED_DYNAMICCLASS:
+            case CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_DYNAMICCLASS:
+            case CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_DYNAMICCLASS:
+            case CORINFO_HELP_GETSTATICFIELDADDR_CONTEXT:
+            case CORINFO_HELP_GETSTATICFIELDADDR_TLS:
+            case CORINFO_HELP_GETGENERICS_GCSTATIC_BASE:
+            case CORINFO_HELP_GETGENERICS_NONGCSTATIC_BASE:
+            case CORINFO_HELP_READYTORUN_STATIC_BASE:
+
+                // These may invoke static class constructors
+                // These can throw InvalidProgram exception if the class can not be constructed
+                //
+                isPure        = true;
+                nonNullReturn = true;
+                mayRunCctor   = true;
+                break;
+
+            case CORINFO_HELP_GETSHARED_GCSTATIC_BASE_NOCTOR:
+            case CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_NOCTOR:
+            case CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR:
+            case CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR:
+
+                // These do not invoke static class constructors
+                //
+                isPure        = true;
+                noThrow       = true;
+                nonNullReturn = true;
+                break;
+
+            // GC Write barrier support
+            // TODO-ARM64-Bug?: Can these throw or not?
+            case CORINFO_HELP_ASSIGN_REF:
+            case CORINFO_HELP_CHECKED_ASSIGN_REF:
+            case CORINFO_HELP_ASSIGN_REF_ENSURE_NONHEAP:
+            case CORINFO_HELP_ASSIGN_BYREF:
+            case CORINFO_HELP_ASSIGN_STRUCT:
+
+                mutatesHeap = true;
+                break;
+
+            // Accessing fields (write)
+            case CORINFO_HELP_SETFIELD32:
+            case CORINFO_HELP_SETFIELD64:
+            case CORINFO_HELP_SETFIELDOBJ:
+            case CORINFO_HELP_SETFIELDSTRUCT:
+            case CORINFO_HELP_SETFIELDFLOAT:
+            case CORINFO_HELP_SETFIELDDOUBLE:
+            case CORINFO_HELP_ARRADDR_ST:
+
+                mutatesHeap = true;
+                break;
+
+            // These helper calls always throw an exception
+            case CORINFO_HELP_OVERFLOW:
+            case CORINFO_HELP_VERIFICATION:
+            case CORINFO_HELP_RNGCHKFAIL:
+            case CORINFO_HELP_THROWDIVZERO:
+#if COR_JIT_EE_VERSION > 460
+            case CORINFO_HELP_THROWNULLREF:
+#endif // COR_JIT_EE_VERSION
+            case CORINFO_HELP_THROW:
+            case CORINFO_HELP_RETHROW:
+
+                break;
+
+            // These helper calls may throw an exception
+            case CORINFO_HELP_METHOD_ACCESS_CHECK:
+            case CORINFO_HELP_FIELD_ACCESS_CHECK:
+            case CORINFO_HELP_CLASS_ACCESS_CHECK:
+            case CORINFO_HELP_DELEGATE_SECURITY_CHECK:
+
+                break;
+
+            // This is a debugging aid; it simply returns a constant address.
+            case CORINFO_HELP_LOOP_CLONE_CHOICE_ADDR:
+                isPure  = true;
+                noThrow = true;
+                break;
+
+            // Not sure how to handle optimization involving the rest of these  helpers
+            default:
+
+                // The most pessimistic results are returned for these helpers
+                mutatesHeap = true;
+                break;
+        }
+
+        m_isPure[helper]        = isPure;
+        m_noThrow[helper]       = noThrow;
+        m_nonNullReturn[helper] = nonNullReturn;
+        m_isAllocator[helper]   = isAllocator;
+        m_mutatesHeap[helper]   = mutatesHeap;
+        m_mayRunCctor[helper]   = mayRunCctor;
+        m_mayFinalize[helper]   = mayFinalize;
+    }
+}
+
+//=============================================================================
+// AssemblyNamesList2
+//=============================================================================
+// The string should be of the form
+// MyAssembly
+// MyAssembly;mscorlib;System
+// MyAssembly;mscorlib System
+
+AssemblyNamesList2::AssemblyNamesList2(const wchar_t* list, IAllocator* alloc) : m_alloc(alloc)
+{
+    assert(m_alloc != nullptr);
+
+    WCHAR          prevChar   = '?';     // dummy
+    LPWSTR         nameStart  = nullptr; // start of the name currently being processed. nullptr if no current name
+    AssemblyName** ppPrevLink = &m_pNames;
+
+    for (LPWSTR listWalk = const_cast<LPWSTR>(list); prevChar != '\0'; prevChar = *listWalk, listWalk++)
+    {
+        WCHAR curChar = *listWalk;
+
+        if (iswspace(curChar) || curChar == W(';') || curChar == W('\0'))
+        {
+            //
+            // Found white-space
+            //
+
+            if (nameStart)
+            {
+                // Found the end of the current name; add a new assembly name to the list.
+
+                AssemblyName* newName = new (m_alloc) AssemblyName();
+
+                // Null out the current character so we can do zero-terminated string work; we'll restore it later.
+                *listWalk = W('\0');
+
+                // How much space do we need?
+                int convertedNameLenBytes =
+                    WszWideCharToMultiByte(CP_UTF8, 0, nameStart, -1, nullptr, 0, nullptr, nullptr);
+                newName->m_assemblyName = new (m_alloc) char[convertedNameLenBytes]; // convertedNameLenBytes includes
+                                                                                     // the trailing null character
+                if (WszWideCharToMultiByte(CP_UTF8, 0, nameStart, -1, newName->m_assemblyName, convertedNameLenBytes,
+                                           nullptr, nullptr) != 0)
+                {
+                    *ppPrevLink = newName;
+                    ppPrevLink  = &newName->m_next;
+                }
+                else
+                {
+                    // Failed to convert the string. Ignore this string (and leak the memory).
+                }
+
+                nameStart = nullptr;
+
+                // Restore the current character.
+                *listWalk = curChar;
+            }
+        }
+        else if (!nameStart)
+        {
+            //
+            // Found the start of a new name
+            //
+
+            nameStart = listWalk;
+        }
+    }
+
+    assert(nameStart == nullptr); // cannot be in the middle of a name
+    *ppPrevLink = nullptr;        // Terminate the last element of the list.
+}
+
+AssemblyNamesList2::~AssemblyNamesList2()
+{
+    for (AssemblyName* pName = m_pNames; pName != nullptr; /**/)
+    {
+        AssemblyName* cur = pName;
+        pName             = pName->m_next;
+
+        m_alloc->Free(cur->m_assemblyName);
+        m_alloc->Free(cur);
+    }
+}
+
+bool AssemblyNamesList2::IsInList(const char* assemblyName)
+{
+    for (AssemblyName* pName = m_pNames; pName != nullptr; pName = pName->m_next)
+    {
+        if (_stricmp(pName->m_assemblyName, assemblyName) == 0)
+        {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+#ifdef FEATURE_JIT_METHOD_PERF
+CycleCount::CycleCount() : cps(CycleTimer::CyclesPerSecond())
+{
+}
+
+bool CycleCount::GetCycles(unsigned __int64* time)
+{
+    return CycleTimer::GetThreadCyclesS(time);
+}
+
+bool CycleCount::Start()
+{
+    return GetCycles(&beginCycles);
+}
+
+double CycleCount::ElapsedTime()
+{
+    unsigned __int64 nowCycles;
+    (void)GetCycles(&nowCycles);
+    return ((double)(nowCycles - beginCycles) / cps) * 1000.0;
+}
+
+bool PerfCounter::Start()
+{
+    bool result = QueryPerformanceFrequency(&beg) != 0;
+    if (!result)
+    {
+        return result;
+    }
+    freq = (double)beg.QuadPart / 1000.0;
+    (void)QueryPerformanceCounter(&beg);
+    return result;
+}
+
+// Return elapsed time from Start() in millis.
+double PerfCounter::ElapsedTime()
+{
+    LARGE_INTEGER li;
+    (void)QueryPerformanceCounter(&li);
+    return (double)(li.QuadPart - beg.QuadPart) / freq;
+}
+
+#endif
+
+#ifdef DEBUG
+
+/*****************************************************************************
+ * Return the number of digits in a number of the given base (default base 10).
+ * Used when outputting strings.
+ */
+unsigned CountDigits(unsigned num, unsigned base /* = 10 */)
+{
+    assert(2 <= base && base <= 16); // sanity check
+    unsigned count = 1;
+    while (num >= base)
+    {
+        num /= base;
+        ++count;
+    }
+    return count;
+}
+
+#endif // DEBUG
+
+double FloatingPointUtils::convertUInt64ToDouble(unsigned __int64 uIntVal)
+{
+    __int64 s64 = uIntVal;
+    double  d;
+    if (s64 < 0)
+    {
+#if defined(_TARGET_XARCH_)
+        // RyuJIT codegen and clang (or gcc) may produce different results for casting uint64 to
+        // double, and the clang result is more accurate. For example,
+        //    1) (double)0x84595161401484A0UL --> 43e08b2a2c280290  (RyuJIT codegen or VC++)
+        //    2) (double)0x84595161401484A0UL --> 43e08b2a2c280291  (clang or gcc)
+        // If the folding optimization below is implemented by simple casting of (double)uint64_val
+        // and it is compiled by clang, casting result can be inconsistent, depending on whether
+        // the folding optimization is triggered or the codegen generates instructions for casting. //
+        // The current solution is to force the same math as the codegen does, so that casting
+        // result is always consistent.
+
+        // d = (double)(int64_t)uint64 + 0x1p64
+        uint64_t adjHex = 0x43F0000000000000UL;
+        d               = (double)s64 + *(double*)&adjHex;
+#else
+        d                             = (double)uIntVal;
+#endif
+    }
+    else
+    {
+        d = (double)uIntVal;
+    }
+    return d;
+}
+
+float FloatingPointUtils::convertUInt64ToFloat(unsigned __int64 u64)
+{
+    double d = convertUInt64ToDouble(u64);
+    return (float)d;
+}
+
+unsigned __int64 FloatingPointUtils::convertDoubleToUInt64(double d)
+{
+    unsigned __int64 u64;
+    if (d >= 0.0)
+    {
+        // Work around a C++ issue where it doesn't properly convert large positive doubles
+        const double two63 = 2147483648.0 * 4294967296.0;
+        if (d < two63)
+        {
+            u64 = UINT64(d);
+        }
+        else
+        {
+            // subtract 0x8000000000000000, do the convert then add it back again
+            u64 = INT64(d - two63) + I64(0x8000000000000000);
+        }
+        return u64;
+    }
+
+#ifdef _TARGET_XARCH_
+
+    // While the Ecma spec does not specifically call this out,
+    // the case of conversion from negative double to unsigned integer is
+    // effectively an overflow and therefore the result is unspecified.
+    // With MSVC for x86/x64, such a conversion results in the bit-equivalent
+    // unsigned value of the conversion to integer. Other compilers convert
+    // negative doubles to zero when the target is unsigned.
+    // To make the behavior consistent across OS's on TARGET_XARCH,
+    // this double cast is needed to conform MSVC behavior.
+
+    u64 = UINT64(INT64(d));
+#else
+    u64                               = UINT64(d);
+#endif // _TARGET_XARCH_
+
+    return u64;
+}
+
+// Rounds a double-precision floating-point value to the nearest integer,
+// and rounds midpoint values to the nearest even number.
+// Note this should align with classlib in floatdouble.cpp
+// Specializing for x86 using a x87 instruction is optional since
+// this outcome is identical across targets.
+double FloatingPointUtils::round(double x)
+{
+    // If the number has no fractional part do nothing
+    // This shortcut is necessary to workaround precision loss in borderline cases on some platforms
+    if (x == ((double)((__int64)x)))
+    {
+        return x;
+    }
+
+    // We had a number that was equally close to 2 integers.
+    // We need to return the even one.
+
+    double tempVal    = (x + 0.5);
+    double flrTempVal = floor(tempVal);
+
+    if ((flrTempVal == tempVal) && (fmod(tempVal, 2.0) != 0))
+    {
+        flrTempVal -= 1.0;
+    }
+
+    return _copysign(flrTempVal, x);
+}
diff --git a/src/jit/utils.h b/src/jit/utils.h
new file mode 100644
index 0000000000..1cd35903dd
--- /dev/null
+++ b/src/jit/utils.h
@@ -0,0 +1,710 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                                  Utils.h                                  XX
+XX                                                                           XX
+XX   Has miscellaneous utility functions                                     XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#ifndef _UTILS_H_
+#define _UTILS_H_
+
+#include "iallocator.h"
+#include "cycletimer.h"
+
+// Needed for unreached()
+#include "error.h"
+
+#ifdef _TARGET_64BIT_
+#define BitScanForwardPtr BitScanForward64
+#else
+#define BitScanForwardPtr BitScanForward
+#endif
+
+template <typename T, int size>
+unsigned ArrLen(T (&)[size])
+{
+    return size;
+}
+
+// return true if arg is a power of 2
+template <typename T>
+inline bool isPow2(T i)
+{
+    return (i > 0 && ((i - 1) & i) == 0);
+}
+
+// Adapter for iterators to a type that is compatible with C++11
+// range-based for loops.
+template <typename TIterator>
+class IteratorPair
+{
+    TIterator m_begin;
+    TIterator m_end;
+
+public:
+    IteratorPair(TIterator begin, TIterator end) : m_begin(begin), m_end(end)
+    {
+    }
+
+    inline TIterator begin()
+    {
+        return m_begin;
+    }
+
+    inline TIterator end()
+    {
+        return m_end;
+    }
+};
+
+template <typename TIterator>
+inline IteratorPair<TIterator> MakeIteratorPair(TIterator begin, TIterator end)
+{
+    return IteratorPair<TIterator>(begin, end);
+}
+
+// Recursive template definition to calculate the base-2 logarithm
+// of a constant value.
+template <unsigned val, unsigned acc = 0>
+struct ConstLog2
+{
+    enum
+    {
+        value = ConstLog2<val / 2, acc + 1>::value
+    };
+};
+
+template <unsigned acc>
+struct ConstLog2<0, acc>
+{
+    enum
+    {
+        value = acc
+    };
+};
+
+template <unsigned acc>
+struct ConstLog2<1, acc>
+{
+    enum
+    {
+        value = acc
+    };
+};
+
+inline const char* dspBool(bool b)
+{
+    return (b) ? "true" : "false";
+}
+
+#ifdef FEATURE_CORECLR
+#ifdef _CRT_ABS_DEFINED
+// we don't have the full standard library
+inline int64_t abs(int64_t t)
+{
+    return t > 0 ? t : -t;
+}
+#endif
+#endif // FEATURE_CORECLR
+
+template <typename T>
+int signum(T val)
+{
+    if (val < T(0))
+    {
+        return -1;
+    }
+    else if (val > T(0))
+    {
+        return 1;
+    }
+    else
+    {
+        return 0;
+    }
+}
+
+class JitSimplerHashBehavior
+{
+public:
+    static const unsigned s_growth_factor_numerator   = 3;
+    static const unsigned s_growth_factor_denominator = 2;
+
+    static const unsigned s_density_factor_numerator   = 3;
+    static const unsigned s_density_factor_denominator = 4;
+
+    static const unsigned s_minimum_allocation = 7;
+
+    inline static void DECLSPEC_NORETURN NoMemory()
+    {
+        NOMEM();
+    }
+};
+
+#if defined(DEBUG) || defined(INLINE_DATA)
+
+// ConfigMethodRange describes a set of methods, specified via their
+// hash codes. This can be used for binary search and/or specifying an
+// explicit method set.
+//
+// Note method hash codes are not necessarily unique. For instance
+// many IL stubs may have the same hash.
+//
+// If range string is null or just whitespace, range includes all
+// methods.
+//
+// Parses values as decimal numbers.
+//
+// Examples:
+//
+//  [string with just spaces] : all methods
+//                   12345678 : a single method
+//          12345678-23456789 : a range of methods
+// 99998888 12345678-23456789 : a range of methods plus a single method
+
+class ConfigMethodRange
+{
+
+public:
+    // Default capacity
+    enum
+    {
+        DEFAULT_CAPACITY = 50
+    };
+
+    // Does the range include this method's hash?
+    bool Contains(class ICorJitInfo* info, CORINFO_METHOD_HANDLE method);
+
+    // Ensure the range string has been parsed.
+    void EnsureInit(const wchar_t* rangeStr, unsigned capacity = DEFAULT_CAPACITY)
+    {
+        // Make sure that the memory was zero initialized
+        assert(m_inited == 0 || m_inited == 1);
+
+        if (!m_inited)
+        {
+            InitRanges(rangeStr, capacity);
+            assert(m_inited == 1);
+        }
+    }
+
+    // Error checks
+    bool Error() const
+    {
+        return m_badChar != 0;
+    }
+    size_t BadCharIndex() const
+    {
+        return m_badChar - 1;
+    }
+
+private:
+    struct Range
+    {
+        unsigned m_low;
+        unsigned m_high;
+    };
+
+    void InitRanges(const wchar_t* rangeStr, unsigned capacity);
+
+    unsigned m_entries;   // number of entries in the range array
+    unsigned m_lastRange; // count of low-high pairs
+    unsigned m_inited;    // 1 if range string has been parsed
+    size_t   m_badChar;   // index + 1 of any bad character in range string
+    Range*   m_ranges;    // ranges of functions to include
+};
+
+#endif // defined(DEBUG) || defined(INLINE_DATA)
+
+class Compiler;
+
+/*****************************************************************************
+ * Fixed bit vector class
+ */
+class FixedBitVect
+{
+private:
+    UINT bitVectSize;
+    UINT bitVect[];
+
+    // bitChunkSize() - Returns number of bits in a bitVect chunk
+    static UINT bitChunkSize();
+
+    // bitNumToBit() - Returns a bit mask of the given bit number
+    static UINT bitNumToBit(UINT bitNum);
+
+public:
+    // bitVectInit() - Initializes a bit vector of a given size
+    static FixedBitVect* bitVectInit(UINT size, Compiler* comp);
+
+    // bitVectSet() - Sets the given bit
+    void bitVectSet(UINT bitNum);
+
+    // bitVectTest() - Tests the given bit
+    bool bitVectTest(UINT bitNum);
+
+    // bitVectOr() - Or in the given bit vector
+    void bitVectOr(FixedBitVect* bv);
+
+    // bitVectAnd() - And with passed in bit vector
+    void bitVectAnd(FixedBitVect& bv);
+
+    // bitVectGetFirst() - Find the first bit on and return the bit num.
+    //                    Return -1 if no bits found.
+    UINT bitVectGetFirst();
+
+    // bitVectGetNext() - Find the next bit on given previous bit and return bit num.
+    //                    Return -1 if no bits found.
+    UINT bitVectGetNext(UINT bitNumPrev);
+
+    // bitVectGetNextAndClear() - Find the first bit on, clear it and return it.
+    //                            Return -1 if no bits found.
+    UINT bitVectGetNextAndClear();
+};
+
+/******************************************************************************
+ * A specialized version of sprintf_s to simplify conversion to SecureCRT
+ *
+ * pWriteStart -> A pointer to the first byte to which data is written.
+ * pBufStart -> the start of the buffer into which the data is written.  If
+ *              composing a complex string with multiple calls to sprintf, this
+ *              should not change.
+ * cbBufSize -> The size of the overall buffer (i.e. the size of the buffer
+ *              pointed to by pBufStart).  For subsequent calls, this does not
+ *              change.
+ * fmt -> The format string
+ * ... -> Arguments.
+ *
+ * returns -> number of bytes successfully written, not including the null
+ *            terminator.  Calls NO_WAY on error.
+ */
+int SimpleSprintf_s(__in_ecount(cbBufSize - (pWriteStart - pBufStart)) char* pWriteStart,
+                    __in_ecount(cbBufSize) char*                             pBufStart,
+                    size_t                                                   cbBufSize,
+                    __in_z const char*                                       fmt,
+                    ...);
+
+#ifdef DEBUG
+void hexDump(FILE* dmpf, const char* name, BYTE* addr, size_t size);
+#endif // DEBUG
+
+/******************************************************************************
+ * ScopedSetVariable: A simple class to set and restore a variable within a scope.
+ * For example, it can be used to set a 'bool' flag to 'true' at the beginning of a
+ * function and automatically back to 'false' either at the end the function, or at
+ * any other return location. The variable should not be changed during the scope:
+ * the destructor asserts that the value at destruction time is the same one we set.
+ * Usage: ScopedSetVariable<bool> _unused_name(&variable, true);
+ */
+template <typename T>
+class ScopedSetVariable
+{
+public:
+    ScopedSetVariable(T* pVariable, T value) : m_pVariable(pVariable)
+    {
+        m_oldValue   = *m_pVariable;
+        *m_pVariable = value;
+        INDEBUG(m_value = value;)
+    }
+
+    ~ScopedSetVariable()
+    {
+        assert(*m_pVariable == m_value); // Assert that the value didn't change between ctor and dtor
+        *m_pVariable = m_oldValue;
+    }
+
+private:
+#ifdef DEBUG
+    T m_value;      // The value we set the variable to (used for assert).
+#endif              // DEBUG
+    T  m_oldValue;  // The old value, to restore the variable to.
+    T* m_pVariable; // Address of the variable to change
+};
+
+/******************************************************************************
+ * PhasedVar: A class to represent a variable that has phases, in particular,
+ * a write phase where the variable is computed, and a read phase where the
+ * variable is used. Once the variable has been read, it can no longer be changed.
+ * Reading the variable essentially commits everyone to using that value forever,
+ * and it is assumed that subsequent changes to the variable would invalidate
+ * whatever assumptions were made by the previous readers, leading to bad generated code.
+ * These assumptions are asserted in DEBUG builds.
+ * The phase ordering is clean for AMD64, but not for x86/ARM. So don't do the phase
+ * ordering asserts for those platforms.
+ */
+template <typename T>
+class PhasedVar
+{
+public:
+    PhasedVar()
+#ifdef DEBUG
+        : m_initialized(false), m_writePhase(true)
+#endif // DEBUG
+    {
+    }
+
+    PhasedVar(T value)
+        : m_value(value)
+#ifdef DEBUG
+        , m_initialized(true)
+        , m_writePhase(true)
+#endif // DEBUG
+    {
+    }
+
+    ~PhasedVar()
+    {
+#ifdef DEBUG
+        m_initialized = false;
+        m_writePhase  = true;
+#endif // DEBUG
+    }
+
+    // Read the value. Change to the read phase.
+    // Marked 'const' because we don't change the encapsulated value, even though
+    // we do change the write phase, which is only for debugging asserts.
+
+    operator T() const
+    {
+#ifdef DEBUG
+        assert(m_initialized);
+        (const_cast<PhasedVar*>(this))->m_writePhase = false;
+#endif // DEBUG
+        return m_value;
+    }
+
+    // Functions/operators to write the value. Must be in the write phase.
+
+    PhasedVar& operator=(const T& value)
+    {
+#ifdef DEBUG
+#ifndef LEGACY_BACKEND
+        assert(m_writePhase);
+#endif // !LEGACY_BACKEND
+        m_initialized = true;
+#endif // DEBUG
+        m_value = value;
+        return *this;
+    }
+
+    PhasedVar& operator&=(const T& value)
+    {
+#ifdef DEBUG
+#ifndef LEGACY_BACKEND
+        assert(m_writePhase);
+#endif // !LEGACY_BACKEND
+        m_initialized = true;
+#endif // DEBUG
+        m_value &= value;
+        return *this;
+    }
+
+    // Note: if you need more <op>= functions, you can define them here, like operator&=
+
+    // Assign a value, but don't assert if we're not in the write phase, and
+    // don't change the phase (if we're actually in the read phase, we'll stay
+    // in the read phase). This is a dangerous function, and overrides the main
+    // benefit of this class. Use it wisely!
+    void OverrideAssign(const T& value)
+    {
+#ifdef DEBUG
+        m_initialized = true;
+#endif // DEBUG
+        m_value = value;
+    }
+
+    // We've decided that this variable can go back to write phase, even if it has been
+    // written. This can be used, for example, for variables set and read during frame
+    // layout calculation, as long as it is before final layout, such that anything
+    // being calculated is just an estimate anyway. Obviously, it must be used carefully,
+    // since it overrides the main benefit of this class.
+    void ResetWritePhase()
+    {
+#ifdef DEBUG
+        m_writePhase = true;
+#endif // DEBUG
+    }
+
+private:
+    // Don't allow a copy constructor. (This could be allowed, but only add it once it is actually needed.)
+
+    PhasedVar(const PhasedVar& o)
+    {
+        unreached();
+    }
+
+    T m_value;
+#ifdef DEBUG
+    bool m_initialized; // true once the variable has been initialized, that is, written once.
+    bool m_writePhase;  // true if we are in the (initial) "write" phase. Once the value is read, this changes to false,
+                        // and can't be changed back.
+#endif                  // DEBUG
+};
+
+class HelperCallProperties
+{
+private:
+    bool m_isPure[CORINFO_HELP_COUNT];
+    bool m_noThrow[CORINFO_HELP_COUNT];
+    bool m_nonNullReturn[CORINFO_HELP_COUNT];
+    bool m_isAllocator[CORINFO_HELP_COUNT];
+    bool m_mutatesHeap[CORINFO_HELP_COUNT];
+    bool m_mayRunCctor[CORINFO_HELP_COUNT];
+    bool m_mayFinalize[CORINFO_HELP_COUNT];
+
+    void init();
+
+public:
+    HelperCallProperties()
+    {
+        init();
+    }
+
+    bool IsPure(CorInfoHelpFunc helperId)
+    {
+        assert(helperId > CORINFO_HELP_UNDEF);
+        assert(helperId < CORINFO_HELP_COUNT);
+        return m_isPure[helperId];
+    }
+
+    bool NoThrow(CorInfoHelpFunc helperId)
+    {
+        assert(helperId > CORINFO_HELP_UNDEF);
+        assert(helperId < CORINFO_HELP_COUNT);
+        return m_noThrow[helperId];
+    }
+
+    bool NonNullReturn(CorInfoHelpFunc helperId)
+    {
+        assert(helperId > CORINFO_HELP_UNDEF);
+        assert(helperId < CORINFO_HELP_COUNT);
+        return m_nonNullReturn[helperId];
+    }
+
+    bool IsAllocator(CorInfoHelpFunc helperId)
+    {
+        assert(helperId > CORINFO_HELP_UNDEF);
+        assert(helperId < CORINFO_HELP_COUNT);
+        return m_isAllocator[helperId];
+    }
+
+    bool MutatesHeap(CorInfoHelpFunc helperId)
+    {
+        assert(helperId > CORINFO_HELP_UNDEF);
+        assert(helperId < CORINFO_HELP_COUNT);
+        return m_mutatesHeap[helperId];
+    }
+
+    bool MayRunCctor(CorInfoHelpFunc helperId)
+    {
+        assert(helperId > CORINFO_HELP_UNDEF);
+        assert(helperId < CORINFO_HELP_COUNT);
+        return m_mayRunCctor[helperId];
+    }
+
+    bool MayFinalize(CorInfoHelpFunc helperId)
+    {
+        assert(helperId > CORINFO_HELP_UNDEF);
+        assert(helperId < CORINFO_HELP_COUNT);
+        return m_mayFinalize[helperId];
+    }
+};
+
+//*****************************************************************************
+// AssemblyNamesList2: Parses and stores a list of Assembly names, and provides
+// a function for determining whether a given assembly name is part of the list.
+//
+// This is a clone of the AssemblyNamesList class that exists in the VM's utilcode,
+// modified to use the JIT's memory allocator and throw on out of memory behavior.
+// It is named AssemblyNamesList2 to avoid a name conflict with the VM version.
+// It might be preferable to adapt the VM's code to be more flexible (for example,
+// by using an IAllocator), but the string handling code there is heavily macroized,
+// and for the small usage we have of this class, investing in genericizing the VM
+// implementation didn't seem worth it.
+//*****************************************************************************
+
+class AssemblyNamesList2
+{
+    struct AssemblyName
+    {
+        char*         m_assemblyName;
+        AssemblyName* m_next;
+    };
+
+    AssemblyName* m_pNames; // List of names
+    IAllocator*   m_alloc;  // IAllocator to use in this class
+
+public:
+    // Take a Unicode string list of assembly names, parse it, and store it.
+    AssemblyNamesList2(const wchar_t* list, __in IAllocator* alloc);
+
+    ~AssemblyNamesList2();
+
+    // Return 'true' if 'assemblyName' (in UTF-8 format) is in the stored list of assembly names.
+    bool IsInList(const char* assemblyName);
+
+    // Return 'true' if the assembly name list is empty.
+    bool IsEmpty()
+    {
+        return m_pNames == nullptr;
+    }
+};
+
+#ifdef FEATURE_JIT_METHOD_PERF
+// When Start() is called time is noted and when ElapsedTime
+// is called we know how much time was spent in msecs.
+//
+class CycleCount
+{
+private:
+    double           cps;         // cycles per second
+    unsigned __int64 beginCycles; // cycles at stop watch construction
+public:
+    CycleCount();
+
+    // Kick off the counter, and if re-entrant will use the latest cycles as starting point.
+    // If the method returns false, any other query yield unpredictable results.
+    bool Start();
+
+    // Return time elapsed in msecs, if Start returned true.
+    double ElapsedTime();
+
+private:
+    // Return true if successful.
+    bool GetCycles(unsigned __int64* time);
+};
+
+// Uses win API QueryPerformanceCounter/QueryPerformanceFrequency.
+class PerfCounter
+{
+    LARGE_INTEGER beg;
+    double        freq;
+
+public:
+    // If the method returns false, any other query yield unpredictable results.
+    bool Start();
+
+    // Return time elapsed from start in millis, if Start returned true.
+    double ElapsedTime();
+};
+
+#endif // FEATURE_JIT_METHOD_PERF
+
+#ifdef DEBUG
+
+/*****************************************************************************
+ * Return the number of digits in a number of the given base (default base 10).
+ * Used when outputting strings.
+ */
+unsigned CountDigits(unsigned num, unsigned base = 10);
+
+#endif // DEBUG
+
+// Utility class for lists.
+template <typename T>
+struct ListNode
+{
+    T            data;
+    ListNode<T>* next;
+
+    // Create the class without using constructors.
+    static ListNode<T>* Create(T value, IAllocator* alloc)
+    {
+        ListNode<T>* node = new (alloc) ListNode<T>;
+        node->data        = value;
+        node->next        = nullptr;
+        return node;
+    }
+};
+
+/*****************************************************************************
+* Floating point utility class
+*/
+class FloatingPointUtils
+{
+public:
+    static double convertUInt64ToDouble(unsigned __int64 u64);
+
+    static float convertUInt64ToFloat(unsigned __int64 u64);
+
+    static unsigned __int64 convertDoubleToUInt64(double d);
+
+    static double round(double x);
+};
+
+// The CLR requires that critical section locks be initialized via its ClrCreateCriticalSection API...but
+// that can't be called until the CLR is initialized. If we have static data that we'd like to protect by a
+// lock, and we have a statically allocated lock to protect that data, there's an issue in how to initialize
+// that lock. We could insert an initialize call in the startup path, but one might prefer to keep the code
+// more local. For such situations, CritSecObject solves the initialization problem, via a level of
+// indirection. A pointer to the lock is initially null, and when we query for the lock pointer via "Val()".
+// If the lock has not yet been allocated, this allocates one (here a leaf lock), and uses a
+// CompareAndExchange-based lazy-initialization to update the field. If this fails, the allocated lock is
+// destroyed. This will work as long as the first locking attempt occurs after enough CLR initialization has
+// happened to make ClrCreateCriticalSection calls legal.
+
+class CritSecObject
+{
+public:
+    CritSecObject()
+    {
+        m_pCs = nullptr;
+    }
+
+    CRITSEC_COOKIE Val()
+    {
+        if (m_pCs == nullptr)
+        {
+            // CompareExchange-based lazy init.
+            CRITSEC_COOKIE newCs    = ClrCreateCriticalSection(CrstLeafLock, CRST_DEFAULT);
+            CRITSEC_COOKIE observed = InterlockedCompareExchangeT(&m_pCs, newCs, NULL);
+            if (observed != nullptr)
+            {
+                ClrDeleteCriticalSection(newCs);
+            }
+        }
+        return m_pCs;
+    }
+
+private:
+    // CRITSEC_COOKIE is an opaque pointer type.
+    CRITSEC_COOKIE m_pCs;
+
+    // No copying or assignment allowed.
+    CritSecObject(const CritSecObject&) = delete;
+    CritSecObject& operator=(const CritSecObject&) = delete;
+};
+
+// Stack-based holder for a critial section lock.
+// Ensures lock is released.
+
+class CritSecHolder
+{
+public:
+    CritSecHolder(CritSecObject& critSec) : m_CritSec(critSec)
+    {
+        ClrEnterCriticalSection(m_CritSec.Val());
+    }
+
+    ~CritSecHolder()
+    {
+        ClrLeaveCriticalSection(m_CritSec.Val());
+    }
+
+private:
+    CritSecObject& m_CritSec;
+
+    // No copying or assignment allowed.
+    CritSecHolder(const CritSecHolder&) = delete;
+    CritSecHolder& operator=(const CritSecHolder&) = delete;
+};
+
+#endif // _UTILS_H_
diff --git a/src/jit/valuenum.cpp b/src/jit/valuenum.cpp
new file mode 100644
index 0000000000..5bc96ed4a9
--- /dev/null
+++ b/src/jit/valuenum.cpp
@@ -0,0 +1,7518 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX                                                                           XX
+XX                           ValueNum                                        XX
+XX                                                                           XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "valuenum.h"
+#include "ssaconfig.h"
+
+VNFunc GetVNFuncForOper(genTreeOps oper, bool isUnsigned)
+{
+    if (!isUnsigned || (oper == GT_EQ) || (oper == GT_NE))
+    {
+        return VNFunc(oper);
+    }
+    switch (oper)
+    {
+        case GT_LT:
+            return VNF_LT_UN;
+        case GT_LE:
+            return VNF_LE_UN;
+        case GT_GE:
+            return VNF_GT_UN;
+        case GT_GT:
+            return VNF_GT_UN;
+        case GT_ADD:
+            return VNF_ADD_UN;
+        case GT_SUB:
+            return VNF_SUB_UN;
+        case GT_MUL:
+            return VNF_MUL_UN;
+        case GT_DIV:
+            return VNF_DIV_UN;
+        case GT_MOD:
+            return VNF_MOD_UN;
+
+        case GT_NOP:
+        case GT_COMMA:
+            return VNFunc(oper);
+        default:
+            unreached();
+    }
+}
+
+ValueNumStore::ValueNumStore(Compiler* comp, IAllocator* alloc)
+    : m_pComp(comp)
+    , m_alloc(alloc)
+    ,
+#ifdef DEBUG
+    m_numMapSels(0)
+    ,
+#endif
+    m_nextChunkBase(0)
+    , m_fixedPointMapSels(alloc, 8)
+    , m_chunks(alloc, 8)
+    , m_intCnsMap(nullptr)
+    , m_longCnsMap(nullptr)
+    , m_handleMap(nullptr)
+    , m_floatCnsMap(nullptr)
+    , m_doubleCnsMap(nullptr)
+    , m_byrefCnsMap(nullptr)
+    , m_VNFunc0Map(nullptr)
+    , m_VNFunc1Map(nullptr)
+    , m_VNFunc2Map(nullptr)
+    , m_VNFunc3Map(nullptr)
+    , m_VNFunc4Map(nullptr)
+    , m_uPtrToLocNotAFieldCount(1)
+{
+    // We have no current allocation chunks.
+    for (unsigned i = 0; i < TYP_COUNT; i++)
+    {
+        for (unsigned j = CEA_None; j <= CEA_Count + MAX_LOOP_NUM; j++)
+        {
+            m_curAllocChunk[i][j] = NoChunk;
+        }
+    }
+
+    for (unsigned i = 0; i < SmallIntConstNum; i++)
+    {
+        m_VNsForSmallIntConsts[i] = NoVN;
+    }
+    // We will reserve chunk 0 to hold some special constants, like the constant NULL, the "exception" value, and the
+    // "zero map."
+    Chunk* specialConstChunk = new (m_alloc) Chunk(m_alloc, &m_nextChunkBase, TYP_REF, CEA_Const, MAX_LOOP_NUM);
+    specialConstChunk->m_numUsed +=
+        SRC_NumSpecialRefConsts; // Implicitly allocate 0 ==> NULL, and 1 ==> Exception, 2 ==> ZeroMap.
+    ChunkNum cn = m_chunks.Push(specialConstChunk);
+    assert(cn == 0);
+
+    m_mapSelectBudget = JitConfig.JitVNMapSelBudget();
+}
+
+// static.
+template <typename T>
+T ValueNumStore::EvalOp(VNFunc vnf, T v0)
+{
+    genTreeOps oper = genTreeOps(vnf);
+
+    // Here we handle those unary ops that are the same for integral and floating-point types.
+    switch (oper)
+    {
+        case GT_NEG:
+            return -v0;
+        default:
+            // Must be int-specific
+            return EvalOpIntegral(vnf, v0);
+    }
+}
+
+template <typename T>
+T ValueNumStore::EvalOpIntegral(VNFunc vnf, T v0)
+{
+    genTreeOps oper = genTreeOps(vnf);
+
+    // Here we handle unary ops that are the same for all integral types.
+    switch (oper)
+    {
+        case GT_NOT:
+            return ~v0;
+        default:
+            unreached();
+    }
+}
+
+// static
+template <typename T>
+T ValueNumStore::EvalOp(VNFunc vnf, T v0, T v1, ValueNum* pExcSet)
+{
+    if (vnf < VNF_Boundary)
+    {
+        genTreeOps oper = genTreeOps(vnf);
+        // Here we handle those that are the same for integral and floating-point types.
+        switch (oper)
+        {
+            case GT_ADD:
+                return v0 + v1;
+            case GT_SUB:
+                return v0 - v1;
+            case GT_MUL:
+                return v0 * v1;
+            case GT_DIV:
+                if (IsIntZero(v1))
+                {
+                    *pExcSet = VNExcSetSingleton(VNForFunc(TYP_REF, VNF_DivideByZeroExc));
+                    return (T)0;
+                }
+                if (IsOverflowIntDiv(v0, v1))
+                {
+                    *pExcSet = VNExcSetSingleton(VNForFunc(TYP_REF, VNF_ArithmeticExc));
+                    return (T)0;
+                }
+                else
+                {
+                    return v0 / v1;
+                }
+
+            default:
+                // Must be int-specific
+                return EvalOpIntegral(vnf, v0, v1, pExcSet);
+        }
+    }
+    else // must be a VNF_ function
+    {
+        typedef typename jitstd::make_unsigned<T>::type UT;
+        switch (vnf)
+        {
+            case VNF_GT_UN:
+                return T(UT(v0) > UT(v1));
+            case VNF_GE_UN:
+                return T(UT(v0) >= UT(v1));
+            case VNF_LT_UN:
+                return T(UT(v0) < UT(v1));
+            case VNF_LE_UN:
+                return T(UT(v0) <= UT(v1));
+            case VNF_ADD_UN:
+                return T(UT(v0) + UT(v1));
+            case VNF_SUB_UN:
+                return T(UT(v0) - UT(v1));
+            case VNF_MUL_UN:
+                return T(UT(v0) * UT(v1));
+            case VNF_DIV_UN:
+                if (IsIntZero(v1))
+                {
+                    *pExcSet = VNExcSetSingleton(VNForFunc(TYP_REF, VNF_DivideByZeroExc));
+                    return (T)0;
+                }
+                else
+                {
+                    return T(UT(v0) / UT(v1));
+                }
+            default:
+                // Must be int-specific
+                return EvalOpIntegral(vnf, v0, v1, pExcSet);
+        }
+    }
+}
+
+// Specialize for double for floating operations, that doesn't involve unsigned.
+template <>
+double ValueNumStore::EvalOp<double>(VNFunc vnf, double v0, double v1, ValueNum* pExcSet)
+{
+    genTreeOps oper = genTreeOps(vnf);
+    // Here we handle those that are the same for floating-point types.
+    switch (oper)
+    {
+        case GT_ADD:
+            return v0 + v1;
+        case GT_SUB:
+            return v0 - v1;
+        case GT_MUL:
+            return v0 * v1;
+        case GT_DIV:
+            return v0 / v1;
+        case GT_MOD:
+            return fmod(v0, v1);
+
+        default:
+            unreached();
+    }
+}
+
+template <typename T>
+int ValueNumStore::EvalComparison(VNFunc vnf, T v0, T v1)
+{
+    if (vnf < VNF_Boundary)
+    {
+        genTreeOps oper = genTreeOps(vnf);
+        // Here we handle those that are the same for floating-point types.
+        switch (oper)
+        {
+            case GT_EQ:
+                return v0 == v1;
+            case GT_NE:
+                return v0 != v1;
+            case GT_GT:
+                return v0 > v1;
+            case GT_GE:
+                return v0 >= v1;
+            case GT_LT:
+                return v0 < v1;
+            case GT_LE:
+                return v0 <= v1;
+            default:
+                unreached();
+        }
+    }
+    else // must be a VNF_ function
+    {
+        switch (vnf)
+        {
+            case VNF_GT_UN:
+                return unsigned(v0) > unsigned(v1);
+            case VNF_GE_UN:
+                return unsigned(v0) >= unsigned(v1);
+            case VNF_LT_UN:
+                return unsigned(v0) < unsigned(v1);
+            case VNF_LE_UN:
+                return unsigned(v0) <= unsigned(v1);
+            default:
+                unreached();
+        }
+    }
+}
+
+/* static */
+template <typename T>
+int ValueNumStore::EvalOrderedComparisonFloat(VNFunc vnf, T v0, T v1)
+{
+    // !! NOTE !!
+    //
+    // All comparisons below are ordered comparisons.
+    //
+    // We should guard this function from unordered comparisons
+    // identified by the GTF_RELOP_NAN_UN flag. Either the flag
+    // should be bubbled (similar to GTF_UNSIGNED for ints)
+    // to this point or we should bail much earlier if any of
+    // the operands are NaN.
+    //
+    genTreeOps oper = genTreeOps(vnf);
+    // Here we handle those that are the same for floating-point types.
+    switch (oper)
+    {
+        case GT_EQ:
+            return v0 == v1;
+        case GT_NE:
+            return v0 != v1;
+        case GT_GT:
+            return v0 > v1;
+        case GT_GE:
+            return v0 >= v1;
+        case GT_LT:
+            return v0 < v1;
+        case GT_LE:
+            return v0 <= v1;
+        default:
+            unreached();
+    }
+}
+
+template <>
+int ValueNumStore::EvalComparison<double>(VNFunc vnf, double v0, double v1)
+{
+    return EvalOrderedComparisonFloat(vnf, v0, v1);
+}
+
+template <>
+int ValueNumStore::EvalComparison<float>(VNFunc vnf, float v0, float v1)
+{
+    return EvalOrderedComparisonFloat(vnf, v0, v1);
+}
+
+template <typename T>
+T ValueNumStore::EvalOpIntegral(VNFunc vnf, T v0, T v1, ValueNum* pExcSet)
+{
+    genTreeOps oper = genTreeOps(vnf);
+    switch (oper)
+    {
+        case GT_EQ:
+            return v0 == v1;
+        case GT_NE:
+            return v0 != v1;
+        case GT_GT:
+            return v0 > v1;
+        case GT_GE:
+            return v0 >= v1;
+        case GT_LT:
+            return v0 < v1;
+        case GT_LE:
+            return v0 <= v1;
+        case GT_OR:
+            return v0 | v1;
+        case GT_XOR:
+            return v0 ^ v1;
+        case GT_AND:
+            return v0 & v1;
+        case GT_LSH:
+            return v0 << v1;
+        case GT_RSH:
+            return v0 >> v1;
+        case GT_RSZ:
+            if (sizeof(T) == 8)
+            {
+                return UINT64(v0) >> v1;
+            }
+            else
+            {
+                return UINT32(v0) >> v1;
+            }
+        case GT_ROL:
+            if (sizeof(T) == 8)
+            {
+                return (v0 << v1) | (UINT64(v0) >> (64 - v1));
+            }
+            else
+            {
+                return (v0 << v1) | (UINT32(v0) >> (32 - v1));
+            }
+
+        case GT_ROR:
+            if (sizeof(T) == 8)
+            {
+                return (v0 << (64 - v1)) | (UINT64(v0) >> v1);
+            }
+            else
+            {
+                return (v0 << (32 - v1)) | (UINT32(v0) >> v1);
+            }
+
+        case GT_DIV:
+        case GT_MOD:
+            if (v1 == 0)
+            {
+                *pExcSet = VNExcSetSingleton(VNForFunc(TYP_REF, VNF_DivideByZeroExc));
+            }
+            else if (IsOverflowIntDiv(v0, v1))
+            {
+                *pExcSet = VNExcSetSingleton(VNForFunc(TYP_REF, VNF_ArithmeticExc));
+                return 0;
+            }
+            else // We are not dividing by Zero, so we can calculate the exact result.
+            {
+                // Perform the appropriate operation.
+                if (oper == GT_DIV)
+                {
+                    return v0 / v1;
+                }
+                else // Must be GT_MOD
+                {
+                    return v0 % v1;
+                }
+            }
+
+        case GT_UDIV:
+        case GT_UMOD:
+            if (v1 == 0)
+            {
+                *pExcSet = VNExcSetSingleton(VNForFunc(TYP_REF, VNF_DivideByZeroExc));
+                return 0;
+            }
+            else // We are not dividing by Zero, so we can calculate the exact result.
+            {
+                typedef typename jitstd::make_unsigned<T>::type UT;
+                // We need for force the source operands for the divide or mod operation
+                // to be considered unsigned.
+                //
+                if (oper == GT_UDIV)
+                {
+                    // This is return unsigned(v0) / unsigned(v1) for both sizes of integers
+                    return T(UT(v0) / UT(v1));
+                }
+                else // Must be GT_UMOD
+                {
+                    // This is return unsigned(v0) % unsigned(v1) for both sizes of integers
+                    return T(UT(v0) % UT(v1));
+                }
+            }
+        default:
+            unreached(); // NYI?
+    }
+}
+
+ValueNum ValueNumStore::VNExcSetSingleton(ValueNum x)
+{
+    ValueNum res = VNForFunc(TYP_REF, VNF_ExcSetCons, x, VNForEmptyExcSet());
+#ifdef DEBUG
+    if (m_pComp->verbose)
+    {
+        printf("    " STR_VN "%x = singleton exc set", res);
+        vnDump(m_pComp, x);
+        printf("\n");
+    }
+#endif
+    return res;
+}
+
+ValueNumPair ValueNumStore::VNPExcSetSingleton(ValueNumPair xp)
+{
+    return ValueNumPair(VNExcSetSingleton(xp.GetLiberal()), VNExcSetSingleton(xp.GetConservative()));
+}
+
+ValueNum ValueNumStore::VNExcSetUnion(ValueNum xs0, ValueNum xs1 DEBUGARG(bool topLevel))
+{
+    if (xs0 == VNForEmptyExcSet())
+    {
+        return xs1;
+    }
+    else if (xs1 == VNForEmptyExcSet())
+    {
+        return xs0;
+    }
+    else
+    {
+        VNFuncApp funcXs0;
+        bool      b0 = GetVNFunc(xs0, &funcXs0);
+        assert(b0 && funcXs0.m_func == VNF_ExcSetCons); // Precondition: xs0 is an exception set.
+        VNFuncApp funcXs1;
+        bool      b1 = GetVNFunc(xs1, &funcXs1);
+        assert(b1 && funcXs1.m_func == VNF_ExcSetCons); // Precondition: xs1 is an exception set.
+        ValueNum res = NoVN;
+        if (funcXs0.m_args[0] < funcXs1.m_args[0])
+        {
+            res = VNForFunc(TYP_REF, VNF_ExcSetCons, funcXs0.m_args[0],
+                            VNExcSetUnion(funcXs0.m_args[1], xs1 DEBUGARG(false)));
+        }
+        else if (funcXs0.m_args[0] == funcXs1.m_args[0])
+        {
+            // Equal elements; only add one to the result.
+            res = VNExcSetUnion(funcXs0.m_args[1], xs1);
+        }
+        else
+        {
+            assert(funcXs0.m_args[0] > funcXs1.m_args[0]);
+            res = VNForFunc(TYP_REF, VNF_ExcSetCons, funcXs1.m_args[0],
+                            VNExcSetUnion(xs0, funcXs1.m_args[1] DEBUGARG(false)));
+        }
+
+        return res;
+    }
+}
+
+ValueNumPair ValueNumStore::VNPExcSetUnion(ValueNumPair xs0vnp, ValueNumPair xs1vnp)
+{
+    return ValueNumPair(VNExcSetUnion(xs0vnp.GetLiberal(), xs1vnp.GetLiberal()),
+                        VNExcSetUnion(xs0vnp.GetConservative(), xs1vnp.GetConservative()));
+}
+
+void ValueNumStore::VNUnpackExc(ValueNum vnWx, ValueNum* pvn, ValueNum* pvnx)
+{
+    assert(vnWx != NoVN);
+    VNFuncApp funcApp;
+    if (GetVNFunc(vnWx, &funcApp) && funcApp.m_func == VNF_ValWithExc)
+    {
+        *pvn  = funcApp.m_args[0];
+        *pvnx = funcApp.m_args[1];
+    }
+    else
+    {
+        *pvn = vnWx;
+    }
+}
+
+void ValueNumStore::VNPUnpackExc(ValueNumPair vnWx, ValueNumPair* pvn, ValueNumPair* pvnx)
+{
+    VNUnpackExc(vnWx.GetLiberal(), pvn->GetLiberalAddr(), pvnx->GetLiberalAddr());
+    VNUnpackExc(vnWx.GetConservative(), pvn->GetConservativeAddr(), pvnx->GetConservativeAddr());
+}
+
+ValueNum ValueNumStore::VNNormVal(ValueNum vn)
+{
+    VNFuncApp funcApp;
+    if (GetVNFunc(vn, &funcApp) && funcApp.m_func == VNF_ValWithExc)
+    {
+        return funcApp.m_args[0];
+    }
+    else
+    {
+        return vn;
+    }
+}
+
+ValueNumPair ValueNumStore::VNPNormVal(ValueNumPair vnp)
+{
+    return ValueNumPair(VNNormVal(vnp.GetLiberal()), VNNormVal(vnp.GetConservative()));
+}
+
+ValueNum ValueNumStore::VNExcVal(ValueNum vn)
+{
+    VNFuncApp funcApp;
+    if (GetVNFunc(vn, &funcApp) && funcApp.m_func == VNF_ValWithExc)
+    {
+        return funcApp.m_args[1];
+    }
+    else
+    {
+        return VNForEmptyExcSet();
+    }
+}
+
+ValueNumPair ValueNumStore::VNPExcVal(ValueNumPair vnp)
+{
+    return ValueNumPair(VNExcVal(vnp.GetLiberal()), VNExcVal(vnp.GetConservative()));
+}
+
+// If vn "excSet" is not "VNForEmptyExcSet()", return "VNF_ValWithExc(vn, excSet)".  Otherwise,
+// just return "vn".
+ValueNum ValueNumStore::VNWithExc(ValueNum vn, ValueNum excSet)
+{
+    if (excSet == VNForEmptyExcSet())
+    {
+        return vn;
+    }
+    else
+    {
+        ValueNum vnNorm;
+        ValueNum vnX = VNForEmptyExcSet();
+        VNUnpackExc(vn, &vnNorm, &vnX);
+        return VNForFunc(TypeOfVN(vnNorm), VNF_ValWithExc, vnNorm, VNExcSetUnion(vnX, excSet));
+    }
+}
+
+ValueNumPair ValueNumStore::VNPWithExc(ValueNumPair vnp, ValueNumPair excSetVNP)
+{
+    return ValueNumPair(VNWithExc(vnp.GetLiberal(), excSetVNP.GetLiberal()),
+                        VNWithExc(vnp.GetConservative(), excSetVNP.GetConservative()));
+}
+
+bool ValueNumStore::IsKnownNonNull(ValueNum vn)
+{
+    if (vn == NoVN)
+    {
+        return false;
+    }
+    VNFuncApp funcAttr;
+    return GetVNFunc(vn, &funcAttr) && (s_vnfOpAttribs[funcAttr.m_func] & VNFOA_KnownNonNull) != 0;
+}
+
+bool ValueNumStore::IsSharedStatic(ValueNum vn)
+{
+    if (vn == NoVN)
+    {
+        return false;
+    }
+    VNFuncApp funcAttr;
+    return GetVNFunc(vn, &funcAttr) && (s_vnfOpAttribs[funcAttr.m_func] & VNFOA_SharedStatic) != 0;
+}
+
+ValueNumStore::Chunk::Chunk(
+    IAllocator* alloc, ValueNum* pNextBaseVN, var_types typ, ChunkExtraAttribs attribs, BasicBlock::loopNumber loopNum)
+    : m_defs(nullptr), m_numUsed(0), m_baseVN(*pNextBaseVN), m_typ(typ), m_attribs(attribs), m_loopNum(loopNum)
+{
+    // Allocate "m_defs" here, according to the typ/attribs pair.
+    switch (attribs)
+    {
+        case CEA_None:
+            break; // Nothing to do.
+        case CEA_Const:
+            switch (typ)
+            {
+                case TYP_INT:
+                    m_defs = new (alloc) Alloc<TYP_INT>::Type[ChunkSize];
+                    break;
+                case TYP_FLOAT:
+                    m_defs = new (alloc) Alloc<TYP_FLOAT>::Type[ChunkSize];
+                    break;
+                case TYP_LONG:
+                    m_defs = new (alloc) Alloc<TYP_LONG>::Type[ChunkSize];
+                    break;
+                case TYP_DOUBLE:
+                    m_defs = new (alloc) Alloc<TYP_DOUBLE>::Type[ChunkSize];
+                    break;
+                case TYP_BYREF:
+                    m_defs = new (alloc) Alloc<TYP_BYREF>::Type[ChunkSize];
+                    break;
+                case TYP_REF:
+                    // We allocate space for a single REF constant, NULL, so we can access these values uniformly.
+                    // Since this value is always the same, we represent it as a static.
+                    m_defs = &s_specialRefConsts[0];
+                    break; // Nothing to do.
+                default:
+                    assert(false); // Should not reach here.
+            }
+            break;
+
+        case CEA_Handle:
+            m_defs = new (alloc) VNHandle[ChunkSize];
+            break;
+
+        case CEA_Func0:
+            m_defs = new (alloc) VNFunc[ChunkSize];
+            break;
+
+        case CEA_Func1:
+            m_defs = new (alloc) VNDefFunc1Arg[ChunkSize];
+            break;
+        case CEA_Func2:
+            m_defs = new (alloc) VNDefFunc2Arg[ChunkSize];
+            break;
+        case CEA_Func3:
+            m_defs = new (alloc) VNDefFunc3Arg[ChunkSize];
+            break;
+        case CEA_Func4:
+            m_defs = new (alloc) VNDefFunc4Arg[ChunkSize];
+            break;
+        default:
+            unreached();
+    }
+    *pNextBaseVN += ChunkSize;
+}
+
+ValueNumStore::Chunk* ValueNumStore::GetAllocChunk(var_types              typ,
+                                                   ChunkExtraAttribs      attribs,
+                                                   BasicBlock::loopNumber loopNum)
+{
+    Chunk*   res;
+    unsigned index;
+    if (loopNum == MAX_LOOP_NUM)
+    {
+        // Loop nest is unknown/irrelevant for this VN.
+        index = attribs;
+    }
+    else
+    {
+        // Loop nest is interesting.  Since we know this is only true for unique VNs, we know attribs will
+        // be CEA_None and can just index based on loop number.
+        noway_assert(attribs == CEA_None);
+        // Map NOT_IN_LOOP -> MAX_LOOP_NUM to make the index range contiguous [0..MAX_LOOP_NUM]
+        index = CEA_Count + (loopNum == BasicBlock::NOT_IN_LOOP ? MAX_LOOP_NUM : loopNum);
+    }
+    ChunkNum cn = m_curAllocChunk[typ][index];
+    if (cn != NoChunk)
+    {
+        res = m_chunks.Get(cn);
+        if (res->m_numUsed < ChunkSize)
+        {
+            return res;
+        }
+    }
+    // Otherwise, must allocate a new one.
+    res                         = new (m_alloc) Chunk(m_alloc, &m_nextChunkBase, typ, attribs, loopNum);
+    cn                          = m_chunks.Push(res);
+    m_curAllocChunk[typ][index] = cn;
+    return res;
+}
+
+ValueNum ValueNumStore::VNForIntCon(INT32 cnsVal)
+{
+    if (IsSmallIntConst(cnsVal))
+    {
+        unsigned ind = cnsVal - SmallIntConstMin;
+        ValueNum vn  = m_VNsForSmallIntConsts[ind];
+        if (vn != NoVN)
+        {
+            return vn;
+        }
+        vn                          = GetVNForIntCon(cnsVal);
+        m_VNsForSmallIntConsts[ind] = vn;
+        return vn;
+    }
+    else
+    {
+        return GetVNForIntCon(cnsVal);
+    }
+}
+
+ValueNum ValueNumStore::VNForLongCon(INT64 cnsVal)
+{
+    ValueNum res;
+    if (GetLongCnsMap()->Lookup(cnsVal, &res))
+    {
+        return res;
+    }
+    else
+    {
+        Chunk*   c                                             = GetAllocChunk(TYP_LONG, CEA_Const);
+        unsigned offsetWithinChunk                             = c->AllocVN();
+        res                                                    = c->m_baseVN + offsetWithinChunk;
+        reinterpret_cast<INT64*>(c->m_defs)[offsetWithinChunk] = cnsVal;
+        GetLongCnsMap()->Set(cnsVal, res);
+        return res;
+    }
+}
+
+ValueNum ValueNumStore::VNForFloatCon(float cnsVal)
+{
+    ValueNum res;
+    if (GetFloatCnsMap()->Lookup(cnsVal, &res))
+    {
+        return res;
+    }
+    else
+    {
+        Chunk*   c                                             = GetAllocChunk(TYP_FLOAT, CEA_Const);
+        unsigned offsetWithinChunk                             = c->AllocVN();
+        res                                                    = c->m_baseVN + offsetWithinChunk;
+        reinterpret_cast<float*>(c->m_defs)[offsetWithinChunk] = cnsVal;
+        GetFloatCnsMap()->Set(cnsVal, res);
+        return res;
+    }
+}
+
+ValueNum ValueNumStore::VNForDoubleCon(double cnsVal)
+{
+    ValueNum res;
+    if (GetDoubleCnsMap()->Lookup(cnsVal, &res))
+    {
+        return res;
+    }
+    else
+    {
+        Chunk*   c                                              = GetAllocChunk(TYP_DOUBLE, CEA_Const);
+        unsigned offsetWithinChunk                              = c->AllocVN();
+        res                                                     = c->m_baseVN + offsetWithinChunk;
+        reinterpret_cast<double*>(c->m_defs)[offsetWithinChunk] = cnsVal;
+        GetDoubleCnsMap()->Set(cnsVal, res);
+        return res;
+    }
+}
+
+ValueNum ValueNumStore::VNForByrefCon(INT64 cnsVal)
+{
+    ValueNum res;
+    if (GetByrefCnsMap()->Lookup(cnsVal, &res))
+    {
+        return res;
+    }
+    else
+    {
+        Chunk*   c                                             = GetAllocChunk(TYP_BYREF, CEA_Const);
+        unsigned offsetWithinChunk                             = c->AllocVN();
+        res                                                    = c->m_baseVN + offsetWithinChunk;
+        reinterpret_cast<INT64*>(c->m_defs)[offsetWithinChunk] = cnsVal;
+        GetByrefCnsMap()->Set(cnsVal, res);
+        return res;
+    }
+}
+
+ValueNum ValueNumStore::VNForCastOper(var_types castToType, bool srcIsUnsigned /*=false*/)
+{
+    assert(castToType != TYP_STRUCT);
+    INT32 cnsVal = INT32(castToType) << INT32(VCA_BitCount);
+    assert((cnsVal & INT32(VCA_ReservedBits)) == 0);
+
+    if (srcIsUnsigned)
+    {
+        // We record the srcIsUnsigned by or-ing a 0x01
+        cnsVal |= INT32(VCA_UnsignedSrc);
+    }
+    ValueNum result = VNForIntCon(cnsVal);
+
+#ifdef DEBUG
+    if (m_pComp->verbose)
+    {
+        printf("    VNForCastOper(%s%s) is " STR_VN "%x\n", varTypeName(castToType),
+               srcIsUnsigned ? ", unsignedSrc" : "", result);
+    }
+#endif
+
+    return result;
+}
+
+ValueNum ValueNumStore::VNForHandle(ssize_t cnsVal, unsigned handleFlags)
+{
+    assert((handleFlags & ~GTF_ICON_HDL_MASK) == 0);
+
+    ValueNum res;
+    VNHandle handle;
+    VNHandle::Initialize(&handle, cnsVal, handleFlags);
+    if (GetHandleMap()->Lookup(handle, &res))
+    {
+        return res;
+    }
+    else
+    {
+        Chunk*   c                                                = GetAllocChunk(TYP_I_IMPL, CEA_Handle);
+        unsigned offsetWithinChunk                                = c->AllocVN();
+        res                                                       = c->m_baseVN + offsetWithinChunk;
+        reinterpret_cast<VNHandle*>(c->m_defs)[offsetWithinChunk] = handle;
+        GetHandleMap()->Set(handle, res);
+        return res;
+    }
+}
+
+// Returns the value number for zero of the given "typ".
+// It has an unreached() for a "typ" that has no zero value, such as TYP_BYREF.
+ValueNum ValueNumStore::VNZeroForType(var_types typ)
+{
+    switch (typ)
+    {
+        case TYP_BOOL:
+        case TYP_BYTE:
+        case TYP_UBYTE:
+        case TYP_CHAR:
+        case TYP_SHORT:
+        case TYP_USHORT:
+        case TYP_INT:
+        case TYP_UINT:
+            return VNForIntCon(0);
+        case TYP_LONG:
+        case TYP_ULONG:
+            return VNForLongCon(0);
+        case TYP_FLOAT:
+#if FEATURE_X87_DOUBLES
+            return VNForDoubleCon(0.0);
+#else
+            return VNForFloatCon(0.0f);
+#endif
+        case TYP_DOUBLE:
+            return VNForDoubleCon(0.0);
+        case TYP_REF:
+        case TYP_ARRAY:
+            return VNForNull();
+        case TYP_STRUCT:
+#ifdef FEATURE_SIMD
+        // TODO-CQ: Improve value numbering for SIMD types.
+        case TYP_SIMD8:
+        case TYP_SIMD12:
+        case TYP_SIMD16:
+        case TYP_SIMD32:
+#endif                             // FEATURE_SIMD
+            return VNForZeroMap(); // Recursion!
+
+        // These should be unreached.
+        default:
+            unreached(); // Should handle all types.
+    }
+}
+
+// Returns the value number for one of the given "typ".
+// It returns NoVN for a "typ" that has no one value, such as TYP_REF.
+ValueNum ValueNumStore::VNOneForType(var_types typ)
+{
+    switch (typ)
+    {
+        case TYP_BOOL:
+        case TYP_BYTE:
+        case TYP_UBYTE:
+        case TYP_CHAR:
+        case TYP_SHORT:
+        case TYP_USHORT:
+        case TYP_INT:
+        case TYP_UINT:
+            return VNForIntCon(1);
+        case TYP_LONG:
+        case TYP_ULONG:
+            return VNForLongCon(1);
+        case TYP_FLOAT:
+            return VNForFloatCon(1.0f);
+        case TYP_DOUBLE:
+            return VNForDoubleCon(1.0);
+
+        default:
+            return NoVN;
+    }
+}
+
+class Object* ValueNumStore::s_specialRefConsts[] = {nullptr, nullptr, nullptr};
+
+// Nullary operators (i.e., symbolic constants).
+ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func)
+{
+    assert(VNFuncArity(func) == 0);
+
+    ValueNum res;
+
+    if (GetVNFunc0Map()->Lookup(func, &res))
+    {
+        return res;
+    }
+    else
+    {
+        Chunk*   c                                              = GetAllocChunk(typ, CEA_Func0);
+        unsigned offsetWithinChunk                              = c->AllocVN();
+        res                                                     = c->m_baseVN + offsetWithinChunk;
+        reinterpret_cast<VNFunc*>(c->m_defs)[offsetWithinChunk] = func;
+        GetVNFunc0Map()->Set(func, res);
+        return res;
+    }
+}
+
+ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN)
+{
+    assert(arg0VN == VNNormVal(arg0VN)); // Arguments don't carry exceptions.
+
+    ValueNum      res;
+    VNDefFunc1Arg fstruct(func, arg0VN);
+
+    // Do constant-folding.
+    if (CanEvalForConstantArgs(func) && IsVNConstant(arg0VN))
+    {
+        return EvalFuncForConstantArgs(typ, func, arg0VN);
+    }
+
+    if (GetVNFunc1Map()->Lookup(fstruct, &res))
+    {
+        return res;
+    }
+    else
+    {
+        // Otherwise, create a new VN for this application.
+        Chunk*   c                                                     = GetAllocChunk(typ, CEA_Func1);
+        unsigned offsetWithinChunk                                     = c->AllocVN();
+        res                                                            = c->m_baseVN + offsetWithinChunk;
+        reinterpret_cast<VNDefFunc1Arg*>(c->m_defs)[offsetWithinChunk] = fstruct;
+        GetVNFunc1Map()->Set(fstruct, res);
+        return res;
+    }
+}
+
+ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, ValueNum arg1VN)
+{
+    assert(arg0VN != NoVN && arg1VN != NoVN);
+    assert(arg0VN == VNNormVal(arg0VN)); // Arguments carry no exceptions.
+    assert(arg1VN == VNNormVal(arg1VN)); // Arguments carry no exceptions.
+    assert(VNFuncArity(func) == 2);
+    assert(func != VNF_MapSelect); // Precondition: use the special function VNForMapSelect defined for that.
+
+    ValueNum res;
+
+    // Do constant-folding.
+    if (CanEvalForConstantArgs(func) && IsVNConstant(arg0VN) && IsVNConstant(arg1VN))
+    {
+        bool canFold = true; // Normally we will be able to fold this 'func'
+
+        // Special case for VNF_Cast of constant handles
+        // Don't allow eval/fold of a GT_CAST(non-I_IMPL, Handle)
+        //
+        if ((func == VNF_Cast) && (typ != TYP_I_IMPL) && IsVNHandle(arg0VN))
+        {
+            canFold = false;
+        }
+
+        // It is possible for us to have mismatched types (see Bug 750863)
+        // We don't try to fold a binary operation when one of the constant operands
+        // is a floating-point constant and the other is not.
+        //
+        bool arg0IsFloating = varTypeIsFloating(TypeOfVN(arg0VN));
+        bool arg1IsFloating = varTypeIsFloating(TypeOfVN(arg1VN));
+        if (arg0IsFloating != arg1IsFloating)
+        {
+            canFold = false;
+        }
+
+        // NaNs are unordered wrt to other floats. While an ordered
+        // comparison would return false, an unordered comparison
+        // will return true if any operands are a NaN. We only perform
+        // ordered NaN comparison in EvalComparison.
+        if ((arg0IsFloating && _isnan(GetConstantDouble(arg0VN))) ||
+            (arg1IsFloating && _isnan(GetConstantDouble(arg1VN))))
+        {
+            canFold = false;
+        }
+
+        if (canFold)
+        {
+            return EvalFuncForConstantArgs(typ, func, arg0VN, arg1VN);
+        }
+    }
+    // We canonicalize commutative operations.
+    // (Perhaps should eventually handle associative/commutative [AC] ops -- but that gets complicated...)
+    if (VNFuncIsCommutative(func))
+    {
+        // Order arg0 arg1 by numerical VN value.
+        if (arg0VN > arg1VN)
+        {
+            jitstd::swap(arg0VN, arg1VN);
+        }
+    }
+    VNDefFunc2Arg fstruct(func, arg0VN, arg1VN);
+    if (GetVNFunc2Map()->Lookup(fstruct, &res))
+    {
+        return res;
+    }
+    else
+    {
+        // We have ways of evaluating some binary functions.
+        if (func < VNF_Boundary)
+        {
+            if (typ != TYP_BYREF) // We don't want/need to optimize a zero byref
+            {
+                genTreeOps oper = genTreeOps(func);
+                ValueNum   ZeroVN, OneVN; // We may need to create one of these in the switch below.
+                switch (oper)
+                {
+                    case GT_ADD:
+                        // This identity does not apply for floating point (when x == -0.0)
+                        if (!varTypeIsFloating(typ))
+                        {
+                            // (x + 0) == (0 + x) => x
+                            ZeroVN = VNZeroForType(typ);
+                            if (arg0VN == ZeroVN)
+                            {
+                                return arg1VN;
+                            }
+                            else if (arg1VN == ZeroVN)
+                            {
+                                return arg0VN;
+                            }
+                        }
+                        break;
+
+                    case GT_SUB:
+                        // (x - 0) => x
+                        ZeroVN = VNZeroForType(typ);
+                        if (arg1VN == ZeroVN)
+                        {
+                            return arg0VN;
+                        }
+                        break;
+
+                    case GT_MUL:
+                        // (x * 1) == (1 * x) => x
+                        OneVN = VNOneForType(typ);
+                        if (OneVN != NoVN)
+                        {
+                            if (arg0VN == OneVN)
+                            {
+                                return arg1VN;
+                            }
+                            else if (arg1VN == OneVN)
+                            {
+                                return arg0VN;
+                            }
+                        }
+
+                        if (!varTypeIsFloating(typ))
+                        {
+                            // (x * 0) == (0 * x) => 0 (unless x is NaN, which we must assume a fp value may be)
+                            ZeroVN = VNZeroForType(typ);
+                            if (arg0VN == ZeroVN)
+                            {
+                                return ZeroVN;
+                            }
+                            else if (arg1VN == ZeroVN)
+                            {
+                                return ZeroVN;
+                            }
+                        }
+                        break;
+
+                    case GT_DIV:
+                    case GT_UDIV:
+                        // (x / 1) => x
+                        OneVN = VNOneForType(typ);
+                        if (OneVN != NoVN)
+                        {
+                            if (arg1VN == OneVN)
+                            {
+                                return arg0VN;
+                            }
+                        }
+                        break;
+
+                    case GT_OR:
+                    case GT_XOR:
+                        // (x | 0) == (0 | x) => x
+                        // (x ^ 0) == (0 ^ x) => x
+                        ZeroVN = VNZeroForType(typ);
+                        if (arg0VN == ZeroVN)
+                        {
+                            return arg1VN;
+                        }
+                        else if (arg1VN == ZeroVN)
+                        {
+                            return arg0VN;
+                        }
+                        break;
+
+                    case GT_AND:
+                        // (x & 0) == (0 & x) => 0
+                        ZeroVN = VNZeroForType(typ);
+                        if (arg0VN == ZeroVN)
+                        {
+                            return ZeroVN;
+                        }
+                        else if (arg1VN == ZeroVN)
+                        {
+                            return ZeroVN;
+                        }
+                        break;
+
+                    case GT_LSH:
+                    case GT_RSH:
+                    case GT_RSZ:
+                    case GT_ROL:
+                    case GT_ROR:
+                        // (x << 0) => x
+                        // (x >> 0) => x
+                        // (x rol 0) => x
+                        // (x ror 0) => x
+                        ZeroVN = VNZeroForType(typ);
+                        if (arg1VN == ZeroVN)
+                        {
+                            return arg0VN;
+                        }
+                        break;
+
+                    case GT_EQ:
+                        // (x == x) => true (unless x is NaN)
+                        if (!varTypeIsFloating(TypeOfVN(arg0VN)) && (arg0VN != NoVN) && (arg0VN == arg1VN))
+                        {
+                            return VNOneForType(typ);
+                        }
+                        if ((arg0VN == VNForNull() && IsKnownNonNull(arg1VN)) ||
+                            (arg1VN == VNForNull() && IsKnownNonNull(arg0VN)))
+                        {
+                            return VNZeroForType(typ);
+                        }
+                        break;
+                    case GT_NE:
+                        // (x != x) => false (unless x is NaN)
+                        if (!varTypeIsFloating(TypeOfVN(arg0VN)) && (arg0VN != NoVN) && (arg0VN == arg1VN))
+                        {
+                            return VNZeroForType(typ);
+                        }
+                        if ((arg0VN == VNForNull() && IsKnownNonNull(arg1VN)) ||
+                            (arg1VN == VNForNull() && IsKnownNonNull(arg0VN)))
+                        {
+                            return VNOneForType(typ);
+                        }
+                        break;
+
+                    default:
+                        break;
+                }
+            }
+        }
+        else // must be a VNF_ function
+        {
+            if (func == VNF_CastClass)
+            {
+                // In terms of values, a castclass always returns its second argument, the object being cast.
+                // The IL operation may also throw an exception
+                return VNWithExc(arg1VN, VNExcSetSingleton(VNForFunc(TYP_REF, VNF_InvalidCastExc, arg1VN, arg0VN)));
+            }
+        }
+
+        // Otherwise, assign a new VN for the function application.
+        Chunk*   c                                                     = GetAllocChunk(typ, CEA_Func2);
+        unsigned offsetWithinChunk                                     = c->AllocVN();
+        res                                                            = c->m_baseVN + offsetWithinChunk;
+        reinterpret_cast<VNDefFunc2Arg*>(c->m_defs)[offsetWithinChunk] = fstruct;
+        GetVNFunc2Map()->Set(fstruct, res);
+        return res;
+    }
+}
+
+//------------------------------------------------------------------------------
+// VNForMapStore : Evaluate VNF_MapStore with the given arguments.
+//
+//
+// Arguments:
+//    typ  -    Value type
+//    arg0VN  - Map value number
+//    arg1VN  - Index value number
+//    arg2VN  - New value for map[index]
+//
+// Return Value:
+//    Value number for the result of the evaluation.
+
+ValueNum ValueNumStore::VNForMapStore(var_types typ, ValueNum arg0VN, ValueNum arg1VN, ValueNum arg2VN)
+{
+    ValueNum result = VNForFunc(typ, VNF_MapStore, arg0VN, arg1VN, arg2VN);
+#ifdef DEBUG
+    if (m_pComp->verbose)
+    {
+        printf("    VNForMapStore(" STR_VN "%x, " STR_VN "%x, " STR_VN "%x):%s returns ", arg0VN, arg1VN, arg2VN,
+               varTypeName(typ));
+        m_pComp->vnPrint(result, 1);
+        printf("\n");
+    }
+#endif
+    return result;
+}
+
+//------------------------------------------------------------------------------
+// VNForMapSelect : Evaluate VNF_MapSelect with the given arguments.
+//
+//
+// Arguments:
+//    vnk  -    Value number kind
+//    typ  -    Value type
+//    arg0VN  - Map value number
+//    arg1VN  - Index value number
+//
+// Return Value:
+//    Value number for the result of the evaluation.
+//
+// Notes:
+//    This requires a "ValueNumKind" because it will attempt, given "select(phi(m1, ..., mk), ind)", to evaluate
+//    "select(m1, ind)", ..., "select(mk, ind)" to see if they agree.  It needs to know which kind of value number
+//    (liberal/conservative) to read from the SSA def referenced in the phi argument.
+
+ValueNum ValueNumStore::VNForMapSelect(ValueNumKind vnk, var_types typ, ValueNum arg0VN, ValueNum arg1VN)
+{
+    unsigned budget          = m_mapSelectBudget;
+    bool     usedRecursiveVN = false;
+    ValueNum result          = VNForMapSelectWork(vnk, typ, arg0VN, arg1VN, &budget, &usedRecursiveVN);
+#ifdef DEBUG
+    if (m_pComp->verbose)
+    {
+        printf("    VNForMapSelect(" STR_VN "%x, " STR_VN "%x):%s returns ", arg0VN, arg1VN, varTypeName(typ));
+        m_pComp->vnPrint(result, 1);
+        printf("\n");
+    }
+#endif
+    return result;
+}
+
+//------------------------------------------------------------------------------
+// VNForMapSelectWork : A method that does the work for VNForMapSelect and may call itself recursively.
+//
+//
+// Arguments:
+//    vnk  -             Value number kind
+//    typ  -             Value type
+//    arg0VN  -          Zeroth argument
+//    arg1VN  -          First argument
+//    pBudget -          Remaining budget for the outer evaluation
+//    pUsedRecursiveVN - Out-parameter that is set to true iff RecursiveVN was returned from this method
+//                       or from a method called during one of recursive invocations.
+//
+// Return Value:
+//    Value number for the result of the evaluation.
+//
+// Notes:
+//    This requires a "ValueNumKind" because it will attempt, given "select(phi(m1, ..., mk), ind)", to evaluate
+//    "select(m1, ind)", ..., "select(mk, ind)" to see if they agree.  It needs to know which kind of value number
+//    (liberal/conservative) to read from the SSA def referenced in the phi argument.
+
+ValueNum ValueNumStore::VNForMapSelectWork(
+    ValueNumKind vnk, var_types typ, ValueNum arg0VN, ValueNum arg1VN, unsigned* pBudget, bool* pUsedRecursiveVN)
+{
+TailCall:
+    // This label allows us to directly implement a tail call by setting up the arguments, and doing a goto to here.
+    assert(arg0VN != NoVN && arg1VN != NoVN);
+    assert(arg0VN == VNNormVal(arg0VN)); // Arguments carry no exceptions.
+    assert(arg1VN == VNNormVal(arg1VN)); // Arguments carry no exceptions.
+
+    *pUsedRecursiveVN = false;
+
+#ifdef DEBUG
+    // Provide a mechanism for writing tests that ensure we don't call this ridiculously often.
+    m_numMapSels++;
+#if 1
+// This printing is sometimes useful in debugging.
+// if ((m_numMapSels % 1000) == 0) printf("%d VNF_MapSelect applications.\n", m_numMapSels);
+#endif
+    unsigned selLim = JitConfig.JitVNMapSelLimit();
+    assert(selLim == 0 || m_numMapSels < selLim);
+#endif
+    ValueNum res;
+
+    VNDefFunc2Arg fstruct(VNF_MapSelect, arg0VN, arg1VN);
+    if (GetVNFunc2Map()->Lookup(fstruct, &res))
+    {
+        return res;
+    }
+    else
+    {
+
+        // Give up if we've run out of budget.
+        if (--(*pBudget) == 0)
+        {
+            // We have to use 'nullptr' for the basic block here, because subsequent expressions
+            // in different blocks may find this result in the VNFunc2Map -- other expressions in
+            // the IR may "evaluate" to this same VNForExpr, so it is not "unique" in the sense
+            // that permits the BasicBlock attribution.
+            res = VNForExpr(nullptr, typ);
+            GetVNFunc2Map()->Set(fstruct, res);
+            return res;
+        }
+
+        // If it's recursive, stop the recursion.
+        if (SelectIsBeingEvaluatedRecursively(arg0VN, arg1VN))
+        {
+            *pUsedRecursiveVN = true;
+            return RecursiveVN;
+        }
+
+        if (arg0VN == VNForZeroMap())
+        {
+            return VNZeroForType(typ);
+        }
+        else if (IsVNFunc(arg0VN))
+        {
+            VNFuncApp funcApp;
+            GetVNFunc(arg0VN, &funcApp);
+            if (funcApp.m_func == VNF_MapStore)
+            {
+                // select(store(m, i, v), i) == v
+                if (funcApp.m_args[1] == arg1VN)
+                {
+#if FEATURE_VN_TRACE_APPLY_SELECTORS
+                    JITDUMP("      AX1: select([" STR_VN "%x]store(" STR_VN "%x, " STR_VN "%x, " STR_VN "%x), " STR_VN
+                            "%x) ==> " STR_VN "%x.\n",
+                            funcApp.m_args[0], arg0VN, funcApp.m_args[1], funcApp.m_args[2], arg1VN, funcApp.m_args[2]);
+#endif
+                    return funcApp.m_args[2];
+                }
+                // i # j ==> select(store(m, i, v), j) == select(m, j)
+                // Currently the only source of distinctions is when both indices are constants.
+                else if (IsVNConstant(arg1VN) && IsVNConstant(funcApp.m_args[1]))
+                {
+                    assert(funcApp.m_args[1] != arg1VN); // we already checked this above.
+#if FEATURE_VN_TRACE_APPLY_SELECTORS
+                    JITDUMP("      AX2: " STR_VN "%x != " STR_VN "%x ==> select([" STR_VN "%x]store(" STR_VN
+                            "%x, " STR_VN "%x, " STR_VN "%x), " STR_VN "%x) ==> select(" STR_VN "%x, " STR_VN "%x).\n",
+                            arg1VN, funcApp.m_args[1], arg0VN, funcApp.m_args[0], funcApp.m_args[1], funcApp.m_args[2],
+                            arg1VN, funcApp.m_args[0], arg1VN);
+#endif
+                    // This is the equivalent of the recursive tail call:
+                    // return VNForMapSelect(vnk, typ, funcApp.m_args[0], arg1VN);
+                    // Make sure we capture any exceptions from the "i" and "v" of the store...
+                    arg0VN = funcApp.m_args[0];
+                    goto TailCall;
+                }
+            }
+            else if (funcApp.m_func == VNF_PhiDef || funcApp.m_func == VNF_PhiHeapDef)
+            {
+                unsigned  lclNum = BAD_VAR_NUM;
+                bool      isHeap = false;
+                VNFuncApp phiFuncApp;
+                bool      defArgIsFunc = false;
+                if (funcApp.m_func == VNF_PhiDef)
+                {
+                    lclNum       = unsigned(funcApp.m_args[0]);
+                    defArgIsFunc = GetVNFunc(funcApp.m_args[2], &phiFuncApp);
+                }
+                else
+                {
+                    assert(funcApp.m_func == VNF_PhiHeapDef);
+                    isHeap       = true;
+                    defArgIsFunc = GetVNFunc(funcApp.m_args[1], &phiFuncApp);
+                }
+                if (defArgIsFunc && phiFuncApp.m_func == VNF_Phi)
+                {
+                    // select(phi(m1, m2), x): if select(m1, x) == select(m2, x), return that, else new fresh.
+                    // Get the first argument of the phi.
+
+                    // We need to be careful about breaking infinite recursion.  Record the outer select.
+                    m_fixedPointMapSels.Push(VNDefFunc2Arg(VNF_MapSelect, arg0VN, arg1VN));
+
+                    assert(IsVNConstant(phiFuncApp.m_args[0]));
+                    unsigned phiArgSsaNum = ConstantValue<unsigned>(phiFuncApp.m_args[0]);
+                    ValueNum phiArgVN;
+                    if (isHeap)
+                    {
+                        phiArgVN = m_pComp->GetHeapPerSsaData(phiArgSsaNum)->m_vnPair.Get(vnk);
+                    }
+                    else
+                    {
+                        phiArgVN = m_pComp->lvaTable[lclNum].GetPerSsaData(phiArgSsaNum)->m_vnPair.Get(vnk);
+                    }
+                    if (phiArgVN != ValueNumStore::NoVN)
+                    {
+                        bool     allSame = true;
+                        ValueNum argRest = phiFuncApp.m_args[1];
+                        ValueNum sameSelResult =
+                            VNForMapSelectWork(vnk, typ, phiArgVN, arg1VN, pBudget, pUsedRecursiveVN);
+                        while (allSame && argRest != ValueNumStore::NoVN)
+                        {
+                            ValueNum  cur = argRest;
+                            VNFuncApp phiArgFuncApp;
+                            if (GetVNFunc(argRest, &phiArgFuncApp) && phiArgFuncApp.m_func == VNF_Phi)
+                            {
+                                cur     = phiArgFuncApp.m_args[0];
+                                argRest = phiArgFuncApp.m_args[1];
+                            }
+                            else
+                            {
+                                argRest = ValueNumStore::NoVN; // Cause the loop to terminate.
+                            }
+                            assert(IsVNConstant(cur));
+                            phiArgSsaNum = ConstantValue<unsigned>(cur);
+                            if (isHeap)
+                            {
+                                phiArgVN = m_pComp->GetHeapPerSsaData(phiArgSsaNum)->m_vnPair.Get(vnk);
+                            }
+                            else
+                            {
+                                phiArgVN = m_pComp->lvaTable[lclNum].GetPerSsaData(phiArgSsaNum)->m_vnPair.Get(vnk);
+                            }
+                            if (phiArgVN == ValueNumStore::NoVN)
+                            {
+                                allSame = false;
+                            }
+                            else
+                            {
+                                bool     usedRecursiveVN = false;
+                                ValueNum curResult =
+                                    VNForMapSelectWork(vnk, typ, phiArgVN, arg1VN, pBudget, &usedRecursiveVN);
+                                *pUsedRecursiveVN |= usedRecursiveVN;
+                                if (sameSelResult == ValueNumStore::RecursiveVN)
+                                {
+                                    sameSelResult = curResult;
+                                }
+                                if (curResult != ValueNumStore::RecursiveVN && curResult != sameSelResult)
+                                {
+                                    allSame = false;
+                                }
+                            }
+                        }
+                        if (allSame && sameSelResult != ValueNumStore::RecursiveVN)
+                        {
+                            // Make sure we're popping what we pushed.
+                            assert(FixedPointMapSelsTopHasValue(arg0VN, arg1VN));
+                            m_fixedPointMapSels.Pop();
+
+                            // To avoid exponential searches, we make sure that this result is memo-ized.
+                            // The result is always valid for memoization if we didn't rely on RecursiveVN to get it.
+                            // If RecursiveVN was used, we are processing a loop and we can't memo-ize this intermediate
+                            // result if, e.g., this block is in a multi-entry loop.
+                            if (!*pUsedRecursiveVN)
+                            {
+                                GetVNFunc2Map()->Set(fstruct, sameSelResult);
+                            }
+
+                            return sameSelResult;
+                        }
+                        // Otherwise, fall through to creating the select(phi(m1, m2), x) function application.
+                    }
+                    // Make sure we're popping what we pushed.
+                    assert(FixedPointMapSelsTopHasValue(arg0VN, arg1VN));
+                    m_fixedPointMapSels.Pop();
+                }
+            }
+        }
+
+        // Otherwise, assign a new VN for the function application.
+        Chunk*   c                                                     = GetAllocChunk(typ, CEA_Func2);
+        unsigned offsetWithinChunk                                     = c->AllocVN();
+        res                                                            = c->m_baseVN + offsetWithinChunk;
+        reinterpret_cast<VNDefFunc2Arg*>(c->m_defs)[offsetWithinChunk] = fstruct;
+        GetVNFunc2Map()->Set(fstruct, res);
+        return res;
+    }
+}
+
+ValueNum ValueNumStore::EvalFuncForConstantArgs(var_types typ, VNFunc func, ValueNum arg0VN)
+{
+    assert(CanEvalForConstantArgs(func));
+    assert(IsVNConstant(arg0VN));
+    switch (TypeOfVN(arg0VN))
+    {
+        case TYP_INT:
+        {
+            int resVal = EvalOp(func, ConstantValue<int>(arg0VN));
+            // Unary op on a handle results in a handle.
+            return IsVNHandle(arg0VN) ? VNForHandle(ssize_t(resVal), GetHandleFlags(arg0VN)) : VNForIntCon(resVal);
+        }
+        case TYP_LONG:
+        {
+            INT64 resVal = EvalOp(func, ConstantValue<INT64>(arg0VN));
+            // Unary op on a handle results in a handle.
+            return IsVNHandle(arg0VN) ? VNForHandle(ssize_t(resVal), GetHandleFlags(arg0VN)) : VNForLongCon(resVal);
+        }
+        case TYP_FLOAT:
+            return VNForFloatCon(EvalOp(func, ConstantValue<float>(arg0VN)));
+        case TYP_DOUBLE:
+            return VNForDoubleCon(EvalOp(func, ConstantValue<double>(arg0VN)));
+        case TYP_REF:
+            // If arg0 has a possible exception, it wouldn't have been constant.
+            assert(!VNHasExc(arg0VN));
+            // Otherwise...
+            assert(arg0VN == VNForNull());         // Only other REF constant.
+            assert(func == VNFunc(GT_ARR_LENGTH)); // Only function we can apply to a REF constant!
+            return VNWithExc(VNForVoid(), VNExcSetSingleton(VNForFunc(TYP_REF, VNF_NullPtrExc, VNForNull())));
+        default:
+            unreached();
+    }
+}
+
+bool ValueNumStore::SelectIsBeingEvaluatedRecursively(ValueNum map, ValueNum ind)
+{
+    for (unsigned i = 0; i < m_fixedPointMapSels.Size(); i++)
+    {
+        VNDefFunc2Arg& elem = m_fixedPointMapSels.GetRef(i);
+        assert(elem.m_func == VNF_MapSelect);
+        if (elem.m_arg0 == map && elem.m_arg1 == ind)
+        {
+            return true;
+        }
+    }
+    return false;
+}
+
+#ifdef DEBUG
+bool ValueNumStore::FixedPointMapSelsTopHasValue(ValueNum map, ValueNum index)
+{
+    if (m_fixedPointMapSels.Size() == 0)
+    {
+        return false;
+    }
+    VNDefFunc2Arg& top = m_fixedPointMapSels.TopRef();
+    return top.m_func == VNF_MapSelect && top.m_arg0 == map && top.m_arg1 == index;
+}
+#endif
+
+// Given an integer constant value number return its value as an int.
+//
+int ValueNumStore::GetConstantInt32(ValueNum argVN)
+{
+    assert(IsVNConstant(argVN));
+    var_types argVNtyp = TypeOfVN(argVN);
+
+    int result = 0;
+
+    switch (argVNtyp)
+    {
+        case TYP_INT:
+            result = ConstantValue<int>(argVN);
+            break;
+#ifndef _TARGET_64BIT_
+        case TYP_REF:
+        case TYP_BYREF:
+            result = (int)ConstantValue<size_t>(argVN);
+            break;
+#endif
+        default:
+            unreached();
+    }
+    return result;
+}
+
+// Given an integer constant value number return its value as an INT64.
+//
+INT64 ValueNumStore::GetConstantInt64(ValueNum argVN)
+{
+    assert(IsVNConstant(argVN));
+    var_types argVNtyp = TypeOfVN(argVN);
+
+    INT64 result = 0;
+
+    switch (argVNtyp)
+    {
+        case TYP_INT:
+            result = (INT64)ConstantValue<int>(argVN);
+            break;
+        case TYP_LONG:
+            result = ConstantValue<INT64>(argVN);
+            break;
+        case TYP_REF:
+        case TYP_BYREF:
+            result = (INT64)ConstantValue<size_t>(argVN);
+            break;
+        default:
+            unreached();
+    }
+    return result;
+}
+
+// Given a float or a double constant value number return its value as a double.
+//
+double ValueNumStore::GetConstantDouble(ValueNum argVN)
+{
+    assert(IsVNConstant(argVN));
+    var_types argVNtyp = TypeOfVN(argVN);
+
+    double result = 0;
+
+    switch (argVNtyp)
+    {
+        case TYP_FLOAT:
+            result = (double)ConstantValue<float>(argVN);
+            break;
+        case TYP_DOUBLE:
+            result = ConstantValue<double>(argVN);
+            break;
+        default:
+            unreached();
+    }
+    return result;
+}
+
+// Compute the proper value number when the VNFunc has all constant arguments
+// This essentially performs constant folding at value numbering time
+//
+ValueNum ValueNumStore::EvalFuncForConstantArgs(var_types typ, VNFunc func, ValueNum arg0VN, ValueNum arg1VN)
+{
+    assert(CanEvalForConstantArgs(func));
+    assert(IsVNConstant(arg0VN) && IsVNConstant(arg1VN));
+    assert(!VNHasExc(arg0VN) && !VNHasExc(arg1VN)); // Otherwise, would not be constant.
+
+    // if our func is the VNF_Cast operation we handle it first
+    if (func == VNF_Cast)
+    {
+        return EvalCastForConstantArgs(typ, func, arg0VN, arg1VN);
+    }
+
+    if (typ == TYP_BYREF)
+    {
+        // We don't want to fold expressions that produce TYP_BYREF
+        return false;
+    }
+
+    var_types arg0VNtyp = TypeOfVN(arg0VN);
+    var_types arg1VNtyp = TypeOfVN(arg1VN);
+
+    // When both arguments are floating point types
+    // We defer to the EvalFuncForConstantFPArgs()
+    if (varTypeIsFloating(arg0VNtyp) && varTypeIsFloating(arg1VNtyp))
+    {
+        return EvalFuncForConstantFPArgs(typ, func, arg0VN, arg1VN);
+    }
+
+    // after this we shouldn't have to deal with floating point types for arg0VN or arg1VN
+    assert(!varTypeIsFloating(arg0VNtyp));
+    assert(!varTypeIsFloating(arg1VNtyp));
+
+    // Stack-normalize the result type.
+    if (varTypeIsSmall(typ))
+    {
+        typ = TYP_INT;
+    }
+
+    ValueNum result; // left uninitialized, we are required to initialize it on all paths below.
+    ValueNum excSet = VNForEmptyExcSet();
+
+    // Are both args of the same type?
+    if (arg0VNtyp == arg1VNtyp)
+    {
+        if (arg0VNtyp == TYP_INT)
+        {
+            int arg0Val = ConstantValue<int>(arg0VN);
+            int arg1Val = ConstantValue<int>(arg1VN);
+
+            assert(typ == TYP_INT);
+            int resultVal = EvalOp(func, arg0Val, arg1Val, &excSet);
+            // Bin op on a handle results in a handle.
+            ValueNum handleVN = IsVNHandle(arg0VN) ? arg0VN : IsVNHandle(arg1VN) ? arg1VN : NoVN;
+            ValueNum resultVN = (handleVN != NoVN)
+                                    ? VNForHandle(ssize_t(resultVal), GetHandleFlags(handleVN)) // Use VN for Handle
+                                    : VNForIntCon(resultVal);
+            result = VNWithExc(resultVN, excSet);
+        }
+        else if (arg0VNtyp == TYP_LONG)
+        {
+            INT64 arg0Val = ConstantValue<INT64>(arg0VN);
+            INT64 arg1Val = ConstantValue<INT64>(arg1VN);
+
+            if (VNFuncIsComparison(func))
+            {
+                assert(typ == TYP_INT);
+                result = VNForIntCon(EvalComparison(func, arg0Val, arg1Val));
+            }
+            else
+            {
+                assert(typ == TYP_LONG);
+                INT64    resultVal = EvalOp(func, arg0Val, arg1Val, &excSet);
+                ValueNum handleVN  = IsVNHandle(arg0VN) ? arg0VN : IsVNHandle(arg1VN) ? arg1VN : NoVN;
+                ValueNum resultVN  = (handleVN != NoVN)
+                                        ? VNForHandle(ssize_t(resultVal), GetHandleFlags(handleVN)) // Use VN for Handle
+                                        : VNForLongCon(resultVal);
+                result = VNWithExc(resultVN, excSet);
+            }
+        }
+        else // both args are TYP_REF or both args are TYP_BYREF
+        {
+            INT64 arg0Val = ConstantValue<size_t>(arg0VN); // We represent ref/byref constants as size_t's.
+            INT64 arg1Val = ConstantValue<size_t>(arg1VN); // Also we consider null to be zero.
+
+            if (VNFuncIsComparison(func))
+            {
+                assert(typ == TYP_INT);
+                result = VNForIntCon(EvalComparison(func, arg0Val, arg1Val));
+            }
+            else if (typ == TYP_INT) // We could see GT_OR of a constant ByRef and Null
+            {
+                int resultVal = (int)EvalOp(func, arg0Val, arg1Val, &excSet);
+                result        = VNWithExc(VNForIntCon(resultVal), excSet);
+            }
+            else // We could see GT_OR of a constant ByRef and Null
+            {
+                assert((typ == TYP_BYREF) || (typ == TYP_LONG));
+                INT64 resultVal = EvalOp(func, arg0Val, arg1Val, &excSet);
+                result          = VNWithExc(VNForByrefCon(resultVal), excSet);
+            }
+        }
+    }
+    else // We have args of different types
+    {
+        // We represent ref/byref constants as size_t's.
+        // Also we consider null to be zero.
+        //
+        INT64 arg0Val = GetConstantInt64(arg0VN);
+        INT64 arg1Val = GetConstantInt64(arg1VN);
+
+        if (VNFuncIsComparison(func))
+        {
+            assert(typ == TYP_INT);
+            result = VNForIntCon(EvalComparison(func, arg0Val, arg1Val));
+        }
+        else if (typ == TYP_INT) // We could see GT_OR of an int and constant ByRef or Null
+        {
+            int resultVal = (int)EvalOp(func, arg0Val, arg1Val, &excSet);
+            result        = VNWithExc(VNForIntCon(resultVal), excSet);
+        }
+        else
+        {
+            assert(typ != TYP_INT);
+            ValueNum resultValx = VNForEmptyExcSet();
+            INT64    resultVal  = EvalOp(func, arg0Val, arg1Val, &resultValx);
+
+            // check for the Exception case
+            if (resultValx != VNForEmptyExcSet())
+            {
+                result = VNWithExc(VNForVoid(), resultValx);
+            }
+            else
+            {
+                switch (typ)
+                {
+                    case TYP_BYREF:
+                        result = VNForByrefCon(resultVal);
+                        break;
+                    case TYP_LONG:
+                        result = VNForLongCon(resultVal);
+                        break;
+                    case TYP_REF:
+                        assert(resultVal == 0); // Only valid REF constant
+                        result = VNForNull();
+                        break;
+                    default:
+                        unreached();
+                }
+            }
+        }
+    }
+
+    return result;
+}
+
+// Compute the proper value number when the VNFunc has all constant floating-point arguments
+// This essentially must perform constant folding at value numbering time
+//
+ValueNum ValueNumStore::EvalFuncForConstantFPArgs(var_types typ, VNFunc func, ValueNum arg0VN, ValueNum arg1VN)
+{
+    assert(CanEvalForConstantArgs(func));
+    assert(IsVNConstant(arg0VN) && IsVNConstant(arg1VN));
+
+    // We expect both argument types to be floating point types
+    var_types arg0VNtyp = TypeOfVN(arg0VN);
+    var_types arg1VNtyp = TypeOfVN(arg1VN);
+
+    assert(varTypeIsFloating(arg0VNtyp));
+    assert(varTypeIsFloating(arg1VNtyp));
+
+    double arg0Val = GetConstantDouble(arg0VN);
+    double arg1Val = GetConstantDouble(arg1VN);
+
+    ValueNum result; // left uninitialized, we are required to initialize it on all paths below.
+
+    if (VNFuncIsComparison(func))
+    {
+        assert(genActualType(typ) == TYP_INT);
+        result = VNForIntCon(EvalComparison(func, arg0Val, arg1Val));
+    }
+    else
+    {
+        assert(varTypeIsFloating(typ)); // We must be computing a floating point result
+
+        // We always compute the result using a double
+        ValueNum exception       = VNForEmptyExcSet();
+        double   doubleResultVal = EvalOp(func, arg0Val, arg1Val, &exception);
+        assert(exception == VNForEmptyExcSet()); // Floating point ops don't throw.
+
+        if (typ == TYP_FLOAT)
+        {
+            float floatResultVal = float(doubleResultVal);
+            result               = VNForFloatCon(floatResultVal);
+        }
+        else
+        {
+            assert(typ == TYP_DOUBLE);
+            result = VNForDoubleCon(doubleResultVal);
+        }
+    }
+
+    return result;
+}
+
+// Compute the proper value number for a VNF_Cast with constant arguments
+// This essentially must perform constant folding at value numbering time
+//
+ValueNum ValueNumStore::EvalCastForConstantArgs(var_types typ, VNFunc func, ValueNum arg0VN, ValueNum arg1VN)
+{
+    assert(func == VNF_Cast);
+    assert(IsVNConstant(arg0VN) && IsVNConstant(arg1VN));
+
+    // Stack-normalize the result type.
+    if (varTypeIsSmall(typ))
+    {
+        typ = TYP_INT;
+    }
+
+    var_types arg0VNtyp = TypeOfVN(arg0VN);
+    var_types arg1VNtyp = TypeOfVN(arg1VN);
+
+    // arg1VN is really the gtCastType that we are casting to
+    assert(arg1VNtyp == TYP_INT);
+    int arg1Val = ConstantValue<int>(arg1VN);
+    assert(arg1Val >= 0);
+
+    if (IsVNHandle(arg0VN))
+    {
+        // We don't allow handles to be cast to random var_types.
+        assert(typ == TYP_I_IMPL);
+    }
+
+    // We previously encoded the castToType operation using vnForCastOper()
+    //
+    bool      srcIsUnsigned = ((arg1Val & INT32(VCA_UnsignedSrc)) != 0);
+    var_types castToType    = var_types(arg1Val >> INT32(VCA_BitCount));
+
+    var_types castFromType = arg0VNtyp;
+
+    switch (castFromType) // GT_CAST source type
+    {
+#ifndef _TARGET_64BIT_
+        case TYP_REF:
+#endif
+        case TYP_INT:
+        {
+            int arg0Val = GetConstantInt32(arg0VN);
+
+            switch (castToType)
+            {
+                case TYP_BYTE:
+                    assert(typ == TYP_INT);
+                    return VNForIntCon(INT8(arg0Val));
+                case TYP_BOOL:
+                case TYP_UBYTE:
+                    assert(typ == TYP_INT);
+                    return VNForIntCon(UINT8(arg0Val));
+                case TYP_SHORT:
+                    assert(typ == TYP_INT);
+                    return VNForIntCon(INT16(arg0Val));
+                case TYP_CHAR:
+                case TYP_USHORT:
+                    assert(typ == TYP_INT);
+                    return VNForIntCon(UINT16(arg0Val));
+                case TYP_INT:
+                case TYP_UINT:
+                    assert(typ == TYP_INT);
+                    return arg0VN;
+                case TYP_LONG:
+                case TYP_ULONG:
+                    assert(!IsVNHandle(arg0VN));
+#ifdef _TARGET_64BIT_
+                    if (typ == TYP_LONG)
+                    {
+                        if (srcIsUnsigned)
+                        {
+                            return VNForLongCon(INT64(unsigned(arg0Val)));
+                        }
+                        else
+                        {
+                            return VNForLongCon(INT64(arg0Val));
+                        }
+                    }
+                    else
+                    {
+                        assert(typ == TYP_BYREF);
+                        if (srcIsUnsigned)
+                        {
+                            return VNForByrefCon(INT64(unsigned(arg0Val)));
+                        }
+                        else
+                        {
+                            return VNForByrefCon(INT64(arg0Val));
+                        }
+                    }
+#else // TARGET_32BIT
+                    if (srcIsUnsigned)
+                        return VNForLongCon(INT64(unsigned(arg0Val)));
+                    else
+                        return VNForLongCon(INT64(arg0Val));
+#endif
+                case TYP_FLOAT:
+                    assert(typ == TYP_FLOAT);
+                    if (srcIsUnsigned)
+                    {
+                        return VNForFloatCon(float(unsigned(arg0Val)));
+                    }
+                    else
+                    {
+                        return VNForFloatCon(float(arg0Val));
+                    }
+                case TYP_DOUBLE:
+                    assert(typ == TYP_DOUBLE);
+                    if (srcIsUnsigned)
+                    {
+                        return VNForDoubleCon(double(unsigned(arg0Val)));
+                    }
+                    else
+                    {
+                        return VNForDoubleCon(double(arg0Val));
+                    }
+                default:
+                    unreached();
+            }
+            break;
+        }
+            {
+#ifdef _TARGET_64BIT_
+                case TYP_REF:
+#endif
+                case TYP_LONG:
+                    INT64 arg0Val = GetConstantInt64(arg0VN);
+
+                    switch (castToType)
+                    {
+                        case TYP_BYTE:
+                            assert(typ == TYP_INT);
+                            return VNForIntCon(INT8(arg0Val));
+                        case TYP_BOOL:
+                        case TYP_UBYTE:
+                            assert(typ == TYP_INT);
+                            return VNForIntCon(UINT8(arg0Val));
+                        case TYP_SHORT:
+                            assert(typ == TYP_INT);
+                            return VNForIntCon(INT16(arg0Val));
+                        case TYP_CHAR:
+                        case TYP_USHORT:
+                            assert(typ == TYP_INT);
+                            return VNForIntCon(UINT16(arg0Val));
+                        case TYP_INT:
+                            assert(typ == TYP_INT);
+                            return VNForIntCon(INT32(arg0Val));
+                        case TYP_UINT:
+                            assert(typ == TYP_INT);
+                            return VNForIntCon(UINT32(arg0Val));
+                        case TYP_LONG:
+                        case TYP_ULONG:
+                            assert(typ == TYP_LONG);
+                            return arg0VN;
+                        case TYP_FLOAT:
+                            assert(typ == TYP_FLOAT);
+                            if (srcIsUnsigned)
+                            {
+                                return VNForFloatCon(FloatingPointUtils::convertUInt64ToFloat(UINT64(arg0Val)));
+                            }
+                            else
+                            {
+                                return VNForFloatCon(float(arg0Val));
+                            }
+                        case TYP_DOUBLE:
+                            assert(typ == TYP_DOUBLE);
+                            if (srcIsUnsigned)
+                            {
+                                return VNForDoubleCon(FloatingPointUtils::convertUInt64ToDouble(UINT64(arg0Val)));
+                            }
+                            else
+                            {
+                                return VNForDoubleCon(double(arg0Val));
+                            }
+                        default:
+                            unreached();
+                    }
+            }
+        case TYP_FLOAT:
+        case TYP_DOUBLE:
+        {
+            double arg0Val = GetConstantDouble(arg0VN);
+
+            switch (castToType)
+            {
+                case TYP_BYTE:
+                    assert(typ == TYP_INT);
+                    return VNForIntCon(INT8(arg0Val));
+                case TYP_BOOL:
+                case TYP_UBYTE:
+                    assert(typ == TYP_INT);
+                    return VNForIntCon(UINT8(arg0Val));
+                case TYP_SHORT:
+                    assert(typ == TYP_INT);
+                    return VNForIntCon(INT16(arg0Val));
+                case TYP_CHAR:
+                case TYP_USHORT:
+                    assert(typ == TYP_INT);
+                    return VNForIntCon(UINT16(arg0Val));
+                case TYP_INT:
+                    assert(typ == TYP_INT);
+                    return VNForIntCon(INT32(arg0Val));
+                case TYP_UINT:
+                    assert(typ == TYP_INT);
+                    return VNForIntCon(UINT32(arg0Val));
+                case TYP_LONG:
+                    assert(typ == TYP_LONG);
+                    return VNForLongCon(INT64(arg0Val));
+                case TYP_ULONG:
+                    assert(typ == TYP_LONG);
+                    return VNForLongCon(UINT64(arg0Val));
+                case TYP_FLOAT:
+                    assert(typ == TYP_FLOAT);
+                    return VNForFloatCon(float(arg0Val));
+                case TYP_DOUBLE:
+                    assert(typ == TYP_DOUBLE);
+                    return VNForDoubleCon(arg0Val);
+                default:
+                    unreached();
+            }
+        }
+        default:
+            unreached();
+    }
+}
+
+bool ValueNumStore::CanEvalForConstantArgs(VNFunc vnf)
+{
+    if (vnf < VNF_Boundary)
+    {
+        // We'll refine this as we get counterexamples.  But to
+        // a first approximation, VNFuncs that are genTreeOps should
+        // be things we can evaluate.
+        genTreeOps oper = genTreeOps(vnf);
+        // Some exceptions...
+        switch (oper)
+        {
+            case GT_MKREFANY: // We can't evaluate these.
+            case GT_RETFILT:
+            case GT_LIST:
+            case GT_ARR_LENGTH:
+                return false;
+            case GT_MULHI:
+                // should be rare, not worth the complexity and risk of getting it wrong
+                return false;
+            default:
+                return true;
+        }
+    }
+    else
+    {
+        // some VNF_ that we can evaluate
+        switch (vnf)
+        {
+            case VNF_Cast: // We can evaluate these.
+                return true;
+            case VNF_ObjGetType:
+                return false;
+            default:
+                return false;
+        }
+    }
+}
+
+unsigned ValueNumStore::VNFuncArity(VNFunc vnf)
+{
+    // Read the bit field out of the table...
+    return (s_vnfOpAttribs[vnf] & VNFOA_ArityMask) >> VNFOA_ArityShift;
+}
+
+template <>
+bool ValueNumStore::IsOverflowIntDiv(int v0, int v1)
+{
+    return (v1 == -1) && (v0 == INT32_MIN);
+}
+template <>
+bool ValueNumStore::IsOverflowIntDiv(INT64 v0, INT64 v1)
+{
+    return (v1 == -1) && (v0 == INT64_MIN);
+}
+template <typename T>
+bool ValueNumStore::IsOverflowIntDiv(T v0, T v1)
+{
+    return false;
+}
+
+template <>
+bool ValueNumStore::IsIntZero(int v)
+{
+    return v == 0;
+}
+template <>
+bool ValueNumStore::IsIntZero(unsigned v)
+{
+    return v == 0;
+}
+template <>
+bool ValueNumStore::IsIntZero(INT64 v)
+{
+    return v == 0;
+}
+template <>
+bool ValueNumStore::IsIntZero(UINT64 v)
+{
+    return v == 0;
+}
+template <typename T>
+bool ValueNumStore::IsIntZero(T v)
+{
+    return false;
+}
+
+template <>
+float ValueNumStore::EvalOpIntegral<float>(VNFunc vnf, float v0)
+{
+    assert(!"EvalOpIntegral<float>");
+    return 0.0f;
+}
+
+template <>
+double ValueNumStore::EvalOpIntegral<double>(VNFunc vnf, double v0)
+{
+    assert(!"EvalOpIntegral<double>");
+    return 0.0;
+}
+
+template <>
+float ValueNumStore::EvalOpIntegral<float>(VNFunc vnf, float v0, float v1, ValueNum* pExcSet)
+{
+    genTreeOps oper = genTreeOps(vnf);
+    switch (oper)
+    {
+        case GT_MOD:
+            return fmodf(v0, v1);
+        default:
+            // For any other values of 'oper', we will assert and return 0.0f
+            break;
+    }
+    assert(!"EvalOpIntegral<float> with pExcSet");
+    return 0.0f;
+}
+
+template <>
+double ValueNumStore::EvalOpIntegral<double>(VNFunc vnf, double v0, double v1, ValueNum* pExcSet)
+{
+    genTreeOps oper = genTreeOps(vnf);
+    switch (oper)
+    {
+        case GT_MOD:
+            return fmod(v0, v1);
+        default:
+            // For any other value of 'oper', we will assert and return 0.0
+            break;
+    }
+    assert(!"EvalOpIntegral<double> with pExcSet");
+    return 0.0;
+}
+
+ValueNum ValueNumStore::VNForFunc(var_types typ, VNFunc func, ValueNum arg0VN, ValueNum arg1VN, ValueNum arg2VN)
+{
+    assert(arg0VN != NoVN);
+    assert(arg1VN != NoVN);
+    assert(arg2VN != NoVN);
+    assert(VNFuncArity(func) == 3);
+
+    // Function arguments carry no exceptions.
+    CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+    if (func != VNF_PhiDef)
+    {
+        // For a phi definition first and second argument are "plain" local/ssa numbers.
+        // (I don't know if having such non-VN arguments to a VN function is a good idea -- if we wanted to declare
+        // ValueNum to be "short" it would be a problem, for example.  But we'll leave it for now, with these explicit
+        // exceptions.)
+        assert(arg0VN == VNNormVal(arg0VN));
+        assert(arg1VN == VNNormVal(arg1VN));
+    }
+    assert(arg2VN == VNNormVal(arg2VN));
+
+#endif
+    assert(VNFuncArity(func) == 3);
+
+    ValueNum      res;
+    VNDefFunc3Arg fstruct(func, arg0VN, arg1VN, arg2VN);
+    if (GetVNFunc3Map()->Lookup(fstruct, &res))
+    {
+        return res;
+    }
+    else
+    {
+        Chunk*   c                                                     = GetAllocChunk(typ, CEA_Func3);
+        unsigned offsetWithinChunk                                     = c->AllocVN();
+        res                                                            = c->m_baseVN + offsetWithinChunk;
+        reinterpret_cast<VNDefFunc3Arg*>(c->m_defs)[offsetWithinChunk] = fstruct;
+        GetVNFunc3Map()->Set(fstruct, res);
+        return res;
+    }
+}
+
+ValueNum ValueNumStore::VNForFunc(
+    var_types typ, VNFunc func, ValueNum arg0VN, ValueNum arg1VN, ValueNum arg2VN, ValueNum arg3VN)
+{
+    assert(arg0VN != NoVN && arg1VN != NoVN && arg2VN != NoVN && arg3VN != NoVN);
+    // Function arguments carry no exceptions.
+    assert(arg0VN == VNNormVal(arg0VN));
+    assert(arg1VN == VNNormVal(arg1VN));
+    assert(arg2VN == VNNormVal(arg2VN));
+    assert(arg3VN == VNNormVal(arg3VN));
+    assert(VNFuncArity(func) == 4);
+
+    ValueNum      res;
+    VNDefFunc4Arg fstruct(func, arg0VN, arg1VN, arg2VN, arg3VN);
+    if (GetVNFunc4Map()->Lookup(fstruct, &res))
+    {
+        return res;
+    }
+    else
+    {
+        Chunk*   c                                                     = GetAllocChunk(typ, CEA_Func4);
+        unsigned offsetWithinChunk                                     = c->AllocVN();
+        res                                                            = c->m_baseVN + offsetWithinChunk;
+        reinterpret_cast<VNDefFunc4Arg*>(c->m_defs)[offsetWithinChunk] = fstruct;
+        GetVNFunc4Map()->Set(fstruct, res);
+        return res;
+    }
+}
+
+//------------------------------------------------------------------------
+// VNForExpr: Opaque value number that is equivalent to itself but unique
+//    from all other value numbers.
+//
+// Arguments:
+//    block - BasicBlock where the expression that produces this value occurs.
+//            May be nullptr to force conservative "could be anywhere" interpretation.
+//     typ - Type of the expression in the IR
+//
+// Return Value:
+//    A new value number distinct from any previously generated, that compares as equal
+//    to itself, but not any other value number, and is annotated with the given
+//    type and block.
+
+ValueNum ValueNumStore::VNForExpr(BasicBlock* block, var_types typ)
+{
+    BasicBlock::loopNumber loopNum;
+    if (block == nullptr)
+    {
+        loopNum = MAX_LOOP_NUM;
+    }
+    else
+    {
+        loopNum = block->bbNatLoopNum;
+    }
+
+    // We always allocate a new, unique VN in this call.
+    // The 'typ' is used to partition the allocation of VNs into different chunks.
+    Chunk*   c                 = GetAllocChunk(typ, CEA_None, loopNum);
+    unsigned offsetWithinChunk = c->AllocVN();
+    ValueNum result            = c->m_baseVN + offsetWithinChunk;
+    return result;
+}
+
+ValueNum ValueNumStore::VNApplySelectors(ValueNumKind  vnk,
+                                         ValueNum      map,
+                                         FieldSeqNode* fieldSeq,
+                                         size_t*       wbFinalStructSize)
+{
+    if (fieldSeq == nullptr)
+    {
+        return map;
+    }
+    else
+    {
+        assert(fieldSeq != FieldSeqStore::NotAField());
+
+        // Skip any "FirstElem" pseudo-fields or any "ConstantIndex" pseudo-fields
+        if (fieldSeq->IsPseudoField())
+        {
+            return VNApplySelectors(vnk, map, fieldSeq->m_next, wbFinalStructSize);
+        }
+
+        // Otherwise, is a real field handle.
+        CORINFO_FIELD_HANDLE fldHnd    = fieldSeq->m_fieldHnd;
+        CORINFO_CLASS_HANDLE structHnd = NO_CLASS_HANDLE;
+        ValueNum             fldHndVN  = VNForHandle(ssize_t(fldHnd), GTF_ICON_FIELD_HDL);
+        noway_assert(fldHnd != nullptr);
+        CorInfoType fieldCit  = m_pComp->info.compCompHnd->getFieldType(fldHnd, &structHnd);
+        var_types   fieldType = JITtype2varType(fieldCit);
+
+        size_t structSize = 0;
+        if (varTypeIsStruct(fieldType))
+        {
+            structSize = m_pComp->info.compCompHnd->getClassSize(structHnd);
+            // We do not normalize the type field accesses during importation unless they
+            // are used in a call, return or assignment.
+            if ((fieldType == TYP_STRUCT) && (structSize <= m_pComp->largestEnregisterableStructSize()))
+            {
+                fieldType = m_pComp->impNormStructType(structHnd);
+            }
+        }
+        if (wbFinalStructSize != nullptr)
+        {
+            *wbFinalStructSize = structSize;
+        }
+
+#ifdef DEBUG
+        if (m_pComp->verbose)
+        {
+            printf("  VNApplySelectors:\n");
+            const char* modName;
+            const char* fldName = m_pComp->eeGetFieldName(fldHnd, &modName);
+            printf("    VNForHandle(Fseq[%s]) is " STR_VN "%x, fieldType is %s", fldName, fldHndVN,
+                   varTypeName(fieldType));
+            if (varTypeIsStruct(fieldType))
+            {
+                printf(", size = %d", structSize);
+            }
+            printf("\n");
+        }
+#endif
+
+        if (fieldSeq->m_next != nullptr)
+        {
+            ValueNum newMap = VNForMapSelect(vnk, fieldType, map, fldHndVN);
+            return VNApplySelectors(vnk, newMap, fieldSeq->m_next, wbFinalStructSize);
+        }
+        else // end of fieldSeq
+        {
+            return VNForMapSelect(vnk, fieldType, map, fldHndVN);
+        }
+    }
+}
+
+ValueNum ValueNumStore::VNApplySelectorsTypeCheck(ValueNum elem, var_types indType, size_t elemStructSize)
+{
+    var_types elemTyp = TypeOfVN(elem);
+
+    // Check if the elemTyp is matching/compatible
+
+    if (indType != elemTyp)
+    {
+        bool isConstant = IsVNConstant(elem);
+        if (isConstant && (elemTyp == genActualType(indType)))
+        {
+            // (i.e. We recorded a constant of TYP_INT for a TYP_BYTE field)
+        }
+        else
+        {
+            // We are trying to read from an 'elem' of type 'elemType' using 'indType' read
+
+            size_t elemTypSize = (elemTyp == TYP_STRUCT) ? elemStructSize : genTypeSize(elemTyp);
+            size_t indTypeSize = genTypeSize(indType);
+
+            if ((indType == TYP_REF) && (varTypeIsStruct(elemTyp)))
+            {
+                // indType is TYP_REF and elemTyp is TYP_STRUCT
+                //
+                // We have a pointer to a static that is a Boxed Struct
+                //
+                return elem;
+            }
+            else if (indTypeSize > elemTypSize)
+            {
+                // Reading beyong the end of 'elem'
+
+                // return a new unique value number
+                elem = VNForExpr(nullptr, indType);
+                JITDUMP("    *** Mismatched types in VNApplySelectorsTypeCheck (reading beyond the end)\n");
+            }
+            else if (varTypeIsStruct(indType))
+            {
+                // indType is TYP_STRUCT
+
+                // return a new unique value number
+                elem = VNForExpr(nullptr, indType);
+                JITDUMP("    *** Mismatched types in VNApplySelectorsTypeCheck (indType is TYP_STRUCT)\n");
+            }
+            else
+            {
+                // We are trying to read an 'elem' of type 'elemType' using 'indType' read
+
+                // insert a cast of elem to 'indType'
+                elem = VNForCast(elem, indType, elemTyp);
+            }
+        }
+    }
+    return elem;
+}
+
+ValueNum ValueNumStore::VNApplySelectorsAssignTypeCoerce(ValueNum elem, var_types indType, BasicBlock* block)
+{
+    var_types elemTyp = TypeOfVN(elem);
+
+    // Check if the elemTyp is matching/compatible
+
+    if (indType != elemTyp)
+    {
+        bool isConstant = IsVNConstant(elem);
+        if (isConstant && (elemTyp == genActualType(indType)))
+        {
+            // (i.e. We recorded a constant of TYP_INT for a TYP_BYTE field)
+        }
+        else
+        {
+            // We are trying to write an 'elem' of type 'elemType' using 'indType' store
+
+            if (varTypeIsStruct(indType))
+            {
+                // return a new unique value number
+                elem = VNForExpr(block, indType);
+                JITDUMP("    *** Mismatched types in VNApplySelectorsAssignTypeCoerce (indType is TYP_STRUCT)\n");
+            }
+            else
+            {
+                // We are trying to write an 'elem' of type 'elemType' using 'indType' store
+
+                // insert a cast of elem to 'indType'
+                elem = VNForCast(elem, indType, elemTyp);
+            }
+        }
+    }
+    return elem;
+}
+
+//------------------------------------------------------------------------
+// VNApplySelectorsAssign: Compute the value number corresponding to "map" but with
+//    the element at "fieldSeq" updated to have type "elem"; this is the new heap
+//    value for an assignment of value "elem" into the heap at location "fieldSeq"
+//    that occurs in block "block" and has type "indType".
+//
+// Arguments:
+//    vnk - Identifies whether to recurse to Conservative or Liberal value numbers
+//          when recursing through phis
+//    map - Value number for the field map before the assignment
+//    elem - Value number for the value being stored (to the given field)
+//    indType - Type of the indirection storing the value to the field
+//    block - Block where the assignment occurs
+//
+// Return Value:
+//    The value number corresopnding to the heap after the assignment.
+
+ValueNum ValueNumStore::VNApplySelectorsAssign(
+    ValueNumKind vnk, ValueNum map, FieldSeqNode* fieldSeq, ValueNum elem, var_types indType, BasicBlock* block)
+{
+    if (fieldSeq == nullptr)
+    {
+        return VNApplySelectorsAssignTypeCoerce(elem, indType, block);
+    }
+    else
+    {
+        assert(fieldSeq != FieldSeqStore::NotAField());
+
+        // Skip any "FirstElem" pseudo-fields or any "ConstantIndex" pseudo-fields
+        // These will occur, at least, in struct static expressions, for method table offsets.
+        if (fieldSeq->IsPseudoField())
+        {
+            return VNApplySelectorsAssign(vnk, map, fieldSeq->m_next, elem, indType, block);
+        }
+
+        // Otherwise, fldHnd is a real field handle.
+        CORINFO_FIELD_HANDLE fldHnd     = fieldSeq->m_fieldHnd;
+        CORINFO_CLASS_HANDLE structType = nullptr;
+        noway_assert(fldHnd != nullptr);
+        CorInfoType fieldCit  = m_pComp->info.compCompHnd->getFieldType(fldHnd, &structType);
+        var_types   fieldType = JITtype2varType(fieldCit);
+
+        ValueNum fieldHndVN = VNForHandle(ssize_t(fldHnd), GTF_ICON_FIELD_HDL);
+
+#ifdef DEBUG
+        if (m_pComp->verbose)
+        {
+            printf("  fieldHnd " STR_VN "%x is ", fieldHndVN);
+            vnDump(m_pComp, fieldHndVN);
+            printf("\n");
+
+            ValueNum seqNextVN  = VNForFieldSeq(fieldSeq->m_next);
+            ValueNum fieldSeqVN = VNForFunc(TYP_REF, VNF_FieldSeq, fieldHndVN, seqNextVN);
+
+            printf("  fieldSeq " STR_VN "%x is ", fieldSeqVN);
+            vnDump(m_pComp, fieldSeqVN);
+            printf("\n");
+        }
+#endif
+
+        ValueNum elemAfter;
+        if (fieldSeq->m_next)
+        {
+            ValueNum fseqMap = VNForMapSelect(vnk, fieldType, map, fieldHndVN);
+            elemAfter        = VNApplySelectorsAssign(vnk, fseqMap, fieldSeq->m_next, elem, indType, block);
+        }
+        else
+        {
+            elemAfter = VNApplySelectorsAssignTypeCoerce(elem, indType, block);
+        }
+
+        ValueNum newMap = VNForMapStore(fieldType, map, fieldHndVN, elemAfter);
+        return newMap;
+    }
+}
+
+ValueNumPair ValueNumStore::VNPairApplySelectors(ValueNumPair map, FieldSeqNode* fieldSeq, var_types indType)
+{
+    size_t   structSize = 0;
+    ValueNum liberalVN  = VNApplySelectors(VNK_Liberal, map.GetLiberal(), fieldSeq, &structSize);
+    liberalVN           = VNApplySelectorsTypeCheck(liberalVN, indType, structSize);
+
+    structSize         = 0;
+    ValueNum conservVN = VNApplySelectors(VNK_Conservative, map.GetConservative(), fieldSeq, &structSize);
+    conservVN          = VNApplySelectorsTypeCheck(conservVN, indType, structSize);
+
+    return ValueNumPair(liberalVN, conservVN);
+}
+
+ValueNum ValueNumStore::VNForFieldSeq(FieldSeqNode* fieldSeq)
+{
+    if (fieldSeq == nullptr)
+    {
+        return VNForNull();
+    }
+    else if (fieldSeq == FieldSeqStore::NotAField())
+    {
+        return VNForNotAField();
+    }
+    else
+    {
+        ssize_t  fieldHndVal = ssize_t(fieldSeq->m_fieldHnd);
+        ValueNum fieldHndVN  = VNForHandle(fieldHndVal, GTF_ICON_FIELD_HDL);
+        ValueNum seqNextVN   = VNForFieldSeq(fieldSeq->m_next);
+        ValueNum fieldSeqVN  = VNForFunc(TYP_REF, VNF_FieldSeq, fieldHndVN, seqNextVN);
+
+#ifdef DEBUG
+        if (m_pComp->verbose)
+        {
+            printf("  fieldHnd " STR_VN "%x is ", fieldHndVN);
+            vnDump(m_pComp, fieldHndVN);
+            printf("\n");
+
+            printf("  fieldSeq " STR_VN "%x is ", fieldSeqVN);
+            vnDump(m_pComp, fieldSeqVN);
+            printf("\n");
+        }
+#endif
+
+        return fieldSeqVN;
+    }
+}
+
+FieldSeqNode* ValueNumStore::FieldSeqVNToFieldSeq(ValueNum vn)
+{
+    if (vn == VNForNull())
+    {
+        return nullptr;
+    }
+    else if (vn == VNForNotAField())
+    {
+        return FieldSeqStore::NotAField();
+    }
+    else
+    {
+        assert(IsVNFunc(vn));
+        VNFuncApp funcApp;
+        GetVNFunc(vn, &funcApp);
+        assert(funcApp.m_func == VNF_FieldSeq);
+        ssize_t       fieldHndVal = ConstantValue<ssize_t>(funcApp.m_args[0]);
+        FieldSeqNode* head =
+            m_pComp->GetFieldSeqStore()->CreateSingleton(reinterpret_cast<CORINFO_FIELD_HANDLE>(fieldHndVal));
+        FieldSeqNode* tail = FieldSeqVNToFieldSeq(funcApp.m_args[1]);
+        return m_pComp->GetFieldSeqStore()->Append(head, tail);
+    }
+}
+
+ValueNum ValueNumStore::FieldSeqVNAppend(ValueNum fsVN1, ValueNum fsVN2)
+{
+    if (fsVN1 == VNForNull())
+    {
+        return fsVN2;
+    }
+    else if (fsVN1 == VNForNotAField() || fsVN2 == VNForNotAField())
+    {
+        return VNForNotAField();
+    }
+    else
+    {
+        assert(IsVNFunc(fsVN1));
+        VNFuncApp funcApp1;
+        GetVNFunc(fsVN1, &funcApp1);
+        assert(funcApp1.m_func == VNF_FieldSeq);
+        ValueNum tailRes    = FieldSeqVNAppend(funcApp1.m_args[1], fsVN2);
+        ValueNum fieldSeqVN = VNForFunc(TYP_REF, VNF_FieldSeq, funcApp1.m_args[0], tailRes);
+
+#ifdef DEBUG
+        if (m_pComp->verbose)
+        {
+            printf("  fieldSeq " STR_VN "%x is ", fieldSeqVN);
+            vnDump(m_pComp, fieldSeqVN);
+            printf("\n");
+        }
+#endif
+
+        return fieldSeqVN;
+    }
+}
+
+ValueNum ValueNumStore::VNForPtrToLoc(var_types typ, ValueNum lclVarVN, ValueNum fieldSeqVN)
+{
+    if (fieldSeqVN == VNForNotAField())
+    {
+        // To distinguish two different not a fields, append a unique value.
+        return VNForFunc(typ, VNF_PtrToLoc, lclVarVN, fieldSeqVN, VNForIntCon(++m_uPtrToLocNotAFieldCount));
+    }
+    return VNForFunc(typ, VNF_PtrToLoc, lclVarVN, fieldSeqVN, VNForIntCon(0));
+}
+
+ValueNum ValueNumStore::ExtendPtrVN(GenTreePtr opA, GenTreePtr opB)
+{
+    if (opB->OperGet() == GT_CNS_INT)
+    {
+        FieldSeqNode* fldSeq = opB->gtIntCon.gtFieldSeq;
+        if ((fldSeq != nullptr) && (fldSeq != FieldSeqStore::NotAField()))
+        {
+            return ExtendPtrVN(opA, opB->gtIntCon.gtFieldSeq);
+        }
+    }
+    return NoVN;
+}
+
+ValueNum ValueNumStore::ExtendPtrVN(GenTreePtr opA, FieldSeqNode* fldSeq)
+{
+    ValueNum res = NoVN;
+    assert(fldSeq != FieldSeqStore::NotAField());
+
+    ValueNum opAvnWx = opA->gtVNPair.GetLiberal();
+    assert(VNIsValid(opAvnWx));
+    ValueNum opAvn;
+    ValueNum opAvnx = VNForEmptyExcSet();
+    VNUnpackExc(opAvnWx, &opAvn, &opAvnx);
+    assert(VNIsValid(opAvn) && VNIsValid(opAvnx));
+
+    VNFuncApp funcApp;
+    if (!GetVNFunc(opAvn, &funcApp))
+    {
+        return res;
+    }
+
+    if (funcApp.m_func == VNF_PtrToLoc)
+    {
+#ifdef DEBUG
+        // For PtrToLoc, lib == cons.
+        VNFuncApp consFuncApp;
+        assert(GetVNFunc(VNNormVal(opA->GetVN(VNK_Conservative)), &consFuncApp) && consFuncApp.Equals(funcApp));
+#endif
+        ValueNum fldSeqVN = VNForFieldSeq(fldSeq);
+        res               = VNForPtrToLoc(TYP_BYREF, funcApp.m_args[0], FieldSeqVNAppend(funcApp.m_args[1], fldSeqVN));
+    }
+    else if (funcApp.m_func == VNF_PtrToStatic)
+    {
+        ValueNum fldSeqVN = VNForFieldSeq(fldSeq);
+        res               = VNForFunc(TYP_BYREF, VNF_PtrToStatic, FieldSeqVNAppend(funcApp.m_args[0], fldSeqVN));
+    }
+    else if (funcApp.m_func == VNF_PtrToArrElem)
+    {
+        ValueNum fldSeqVN = VNForFieldSeq(fldSeq);
+        res = VNForFunc(TYP_BYREF, VNF_PtrToArrElem, funcApp.m_args[0], funcApp.m_args[1], funcApp.m_args[2],
+                        FieldSeqVNAppend(funcApp.m_args[3], fldSeqVN));
+    }
+    if (res != NoVN)
+    {
+        res = VNWithExc(res, opAvnx);
+    }
+    return res;
+}
+
+void Compiler::fgValueNumberArrIndexAssign(CORINFO_CLASS_HANDLE elemTypeEq,
+                                           ValueNum             arrVN,
+                                           ValueNum             inxVN,
+                                           FieldSeqNode*        fldSeq,
+                                           ValueNum             rhsVN,
+                                           var_types            indType)
+{
+    bool      invalidateArray      = false;
+    ValueNum  elemTypeEqVN         = vnStore->VNForHandle(ssize_t(elemTypeEq), GTF_ICON_CLASS_HDL);
+    var_types arrElemType          = DecodeElemType(elemTypeEq);
+    ValueNum  hAtArrType           = vnStore->VNForMapSelect(VNK_Liberal, TYP_REF, fgCurHeapVN, elemTypeEqVN);
+    ValueNum  hAtArrTypeAtArr      = vnStore->VNForMapSelect(VNK_Liberal, TYP_REF, hAtArrType, arrVN);
+    ValueNum  hAtArrTypeAtArrAtInx = vnStore->VNForMapSelect(VNK_Liberal, arrElemType, hAtArrTypeAtArr, inxVN);
+
+    ValueNum newValAtInx     = ValueNumStore::NoVN;
+    ValueNum newValAtArr     = ValueNumStore::NoVN;
+    ValueNum newValAtArrType = ValueNumStore::NoVN;
+
+    if (fldSeq == FieldSeqStore::NotAField())
+    {
+        // This doesn't represent a proper array access
+        JITDUMP("    *** NotAField sequence encountered in fgValueNumberArrIndexAssign\n");
+
+        // Store a new unique value for newValAtArrType
+        newValAtArrType = vnStore->VNForExpr(compCurBB, TYP_REF);
+        invalidateArray = true;
+    }
+    else
+    {
+        // Note that this does the right thing if "fldSeq" is null -- returns last "rhs" argument.
+        // This is the value that should be stored at "arr[inx]".
+        newValAtInx =
+            vnStore->VNApplySelectorsAssign(VNK_Liberal, hAtArrTypeAtArrAtInx, fldSeq, rhsVN, indType, compCurBB);
+
+        var_types arrElemFldType = arrElemType; // Uses arrElemType unless we has a non-null fldSeq
+        if (vnStore->IsVNFunc(newValAtInx))
+        {
+            VNFuncApp funcApp;
+            vnStore->GetVNFunc(newValAtInx, &funcApp);
+            if (funcApp.m_func == VNF_MapStore)
+            {
+                arrElemFldType = vnStore->TypeOfVN(newValAtInx);
+            }
+        }
+
+        if (indType != arrElemFldType)
+        {
+            // Mismatched types: Store between different types (indType into array of arrElemFldType)
+            //
+
+            JITDUMP("    *** Mismatched types in fgValueNumberArrIndexAssign\n");
+
+            // Store a new unique value for newValAtArrType
+            newValAtArrType = vnStore->VNForExpr(compCurBB, TYP_REF);
+            invalidateArray = true;
+        }
+    }
+
+    if (!invalidateArray)
+    {
+        newValAtArr     = vnStore->VNForMapStore(indType, hAtArrTypeAtArr, inxVN, newValAtInx);
+        newValAtArrType = vnStore->VNForMapStore(TYP_REF, hAtArrType, arrVN, newValAtArr);
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("  hAtArrType " STR_VN "%x is MapSelect(curHeap(" STR_VN "%x), ", hAtArrType, fgCurHeapVN);
+
+        if (arrElemType == TYP_STRUCT)
+        {
+            printf("%s[]).\n", eeGetClassName(elemTypeEq));
+        }
+        else
+        {
+            printf("%s[]).\n", varTypeName(arrElemType));
+        }
+        printf("  hAtArrTypeAtArr " STR_VN "%x is MapSelect(hAtArrType(" STR_VN "%x), arr=" STR_VN "%x)\n",
+               hAtArrTypeAtArr, hAtArrType, arrVN);
+        printf("  hAtArrTypeAtArrAtInx " STR_VN "%x is MapSelect(hAtArrTypeAtArr(" STR_VN "%x), inx=" STR_VN "%x):%s\n",
+               hAtArrTypeAtArrAtInx, hAtArrTypeAtArr, inxVN, varTypeName(arrElemType));
+
+        if (!invalidateArray)
+        {
+            printf("  newValAtInd " STR_VN "%x is ", newValAtInx);
+            vnStore->vnDump(this, newValAtInx);
+            printf("\n");
+
+            printf("  newValAtArr " STR_VN "%x is ", newValAtArr);
+            vnStore->vnDump(this, newValAtArr);
+            printf("\n");
+        }
+
+        printf("  newValAtArrType " STR_VN "%x is ", newValAtArrType);
+        vnStore->vnDump(this, newValAtArrType);
+        printf("\n");
+
+        printf("  fgCurHeapVN assigned:\n");
+    }
+#endif // DEBUG
+
+    // bbHeapDef must be set to true for any block that Mutates the global Heap
+    assert(compCurBB->bbHeapDef);
+
+    fgCurHeapVN = vnStore->VNForMapStore(TYP_REF, fgCurHeapVN, elemTypeEqVN, newValAtArrType);
+}
+
+ValueNum Compiler::fgValueNumberArrIndexVal(GenTreePtr tree, VNFuncApp* pFuncApp, ValueNum addrXvn)
+{
+    assert(vnStore->IsVNHandle(pFuncApp->m_args[0]));
+    CORINFO_CLASS_HANDLE arrElemTypeEQ = CORINFO_CLASS_HANDLE(vnStore->ConstantValue<ssize_t>(pFuncApp->m_args[0]));
+    ValueNum             arrVN         = pFuncApp->m_args[1];
+    ValueNum             inxVN         = pFuncApp->m_args[2];
+    FieldSeqNode*        fldSeq        = vnStore->FieldSeqVNToFieldSeq(pFuncApp->m_args[3]);
+    return fgValueNumberArrIndexVal(tree, arrElemTypeEQ, arrVN, inxVN, addrXvn, fldSeq);
+}
+
+ValueNum Compiler::fgValueNumberArrIndexVal(GenTreePtr           tree,
+                                            CORINFO_CLASS_HANDLE elemTypeEq,
+                                            ValueNum             arrVN,
+                                            ValueNum             inxVN,
+                                            ValueNum             excVN,
+                                            FieldSeqNode*        fldSeq)
+{
+    assert(tree == nullptr || tree->OperIsIndir());
+
+    // The VN inputs are required to be non-exceptional values.
+    assert(arrVN == vnStore->VNNormVal(arrVN));
+    assert(inxVN == vnStore->VNNormVal(inxVN));
+
+    var_types elemTyp = DecodeElemType(elemTypeEq);
+    var_types indType = (tree == nullptr) ? elemTyp : tree->TypeGet();
+    ValueNum  selectedElem;
+
+    if (fldSeq == FieldSeqStore::NotAField())
+    {
+        // This doesn't represent a proper array access
+        JITDUMP("    *** NotAField sequence encountered in fgValueNumberArrIndexVal\n");
+
+        // a new unique value number
+        selectedElem = vnStore->VNForExpr(compCurBB, elemTyp);
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("  IND of PtrToArrElem is unique VN " STR_VN "%x.\n", selectedElem);
+        }
+#endif // DEBUG
+
+        if (tree != nullptr)
+        {
+            tree->gtVNPair.SetBoth(selectedElem);
+        }
+    }
+    else
+    {
+        ValueNum elemTypeEqVN    = vnStore->VNForHandle(ssize_t(elemTypeEq), GTF_ICON_CLASS_HDL);
+        ValueNum hAtArrType      = vnStore->VNForMapSelect(VNK_Liberal, TYP_REF, fgCurHeapVN, elemTypeEqVN);
+        ValueNum hAtArrTypeAtArr = vnStore->VNForMapSelect(VNK_Liberal, TYP_REF, hAtArrType, arrVN);
+        ValueNum wholeElem       = vnStore->VNForMapSelect(VNK_Liberal, elemTyp, hAtArrTypeAtArr, inxVN);
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("  hAtArrType " STR_VN "%x is MapSelect(curHeap(" STR_VN "%x), ", hAtArrType, fgCurHeapVN);
+            if (elemTyp == TYP_STRUCT)
+            {
+                printf("%s[]).\n", eeGetClassName(elemTypeEq));
+            }
+            else
+            {
+                printf("%s[]).\n", varTypeName(elemTyp));
+            }
+
+            printf("  hAtArrTypeAtArr " STR_VN "%x is MapSelect(hAtArrType(" STR_VN "%x), arr=" STR_VN "%x).\n",
+                   hAtArrTypeAtArr, hAtArrType, arrVN);
+
+            printf("  wholeElem " STR_VN "%x is MapSelect(hAtArrTypeAtArr(" STR_VN "%x), ind=" STR_VN "%x).\n",
+                   wholeElem, hAtArrTypeAtArr, inxVN);
+        }
+#endif // DEBUG
+
+        selectedElem          = wholeElem;
+        size_t elemStructSize = 0;
+        if (fldSeq)
+        {
+            selectedElem = vnStore->VNApplySelectors(VNK_Liberal, wholeElem, fldSeq, &elemStructSize);
+            elemTyp      = vnStore->TypeOfVN(selectedElem);
+        }
+        selectedElem = vnStore->VNApplySelectorsTypeCheck(selectedElem, indType, elemStructSize);
+        selectedElem = vnStore->VNWithExc(selectedElem, excVN);
+
+#ifdef DEBUG
+        if (verbose && (selectedElem != wholeElem))
+        {
+            printf("  selectedElem is " STR_VN "%x after applying selectors.\n", selectedElem);
+        }
+#endif // DEBUG
+
+        if (tree != nullptr)
+        {
+            tree->gtVNPair.SetLiberal(selectedElem);
+            // TODO-CQ: what to do here about exceptions?  We don't have the array and ind conservative
+            // values, so we don't have their exceptions.  Maybe we should.
+            tree->gtVNPair.SetConservative(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+        }
+    }
+
+    return selectedElem;
+}
+
+var_types ValueNumStore::TypeOfVN(ValueNum vn)
+{
+    Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn));
+    return c->m_typ;
+}
+
+//------------------------------------------------------------------------
+// LoopOfVN: If the given value number is an opaque one associated with a particular
+//    expression in the IR, give the loop number where the expression occurs; otherwise,
+//    returns MAX_LOOP_NUM.
+//
+// Arguments:
+//    vn - Value number to query
+//
+// Return Value:
+//    The correspondingblock's bbNatLoopNum, which may be BasicBlock::NOT_IN_LOOP.
+//    Returns MAX_LOOP_NUM if this VN is not an opaque value number associated with
+//    a particular expression/location in the IR.
+
+BasicBlock::loopNumber ValueNumStore::LoopOfVN(ValueNum vn)
+{
+    Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn));
+    return c->m_loopNum;
+}
+
+bool ValueNumStore::IsVNConstant(ValueNum vn)
+{
+    if (vn == NoVN)
+    {
+        return false;
+    }
+    Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn));
+    if (c->m_attribs == CEA_Const)
+    {
+        return vn != VNForVoid(); // Void is not a "real" constant -- in the sense that it represents no value.
+    }
+    else
+    {
+        return c->m_attribs == CEA_Handle;
+    }
+}
+
+bool ValueNumStore::IsVNInt32Constant(ValueNum vn)
+{
+    if (!IsVNConstant(vn))
+    {
+        return false;
+    }
+
+    return TypeOfVN(vn) == TYP_INT;
+}
+
+unsigned ValueNumStore::GetHandleFlags(ValueNum vn)
+{
+    assert(IsVNHandle(vn));
+    Chunk*    c      = m_chunks.GetNoExpand(GetChunkNum(vn));
+    unsigned  offset = ChunkOffset(vn);
+    VNHandle* handle = &reinterpret_cast<VNHandle*>(c->m_defs)[offset];
+    return handle->m_flags;
+}
+
+bool ValueNumStore::IsVNHandle(ValueNum vn)
+{
+    if (vn == NoVN)
+    {
+        return false;
+    }
+
+    Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn));
+    return c->m_attribs == CEA_Handle;
+}
+
+bool ValueNumStore::IsVNConstantBound(ValueNum vn)
+{
+    // Do we have "var < 100"?
+    if (vn == NoVN)
+    {
+        return false;
+    }
+
+    VNFuncApp funcAttr;
+    if (!GetVNFunc(vn, &funcAttr))
+    {
+        return false;
+    }
+    if (funcAttr.m_func != (VNFunc)GT_LE && funcAttr.m_func != (VNFunc)GT_GE && funcAttr.m_func != (VNFunc)GT_LT &&
+        funcAttr.m_func != (VNFunc)GT_GT)
+    {
+        return false;
+    }
+
+    return IsVNInt32Constant(funcAttr.m_args[0]) != IsVNInt32Constant(funcAttr.m_args[1]);
+}
+
+void ValueNumStore::GetConstantBoundInfo(ValueNum vn, ConstantBoundInfo* info)
+{
+    assert(IsVNConstantBound(vn));
+    assert(info);
+
+    // Do we have var < 100?
+    VNFuncApp funcAttr;
+    GetVNFunc(vn, &funcAttr);
+
+    bool isOp1Const = IsVNInt32Constant(funcAttr.m_args[1]);
+
+    if (isOp1Const)
+    {
+        info->cmpOper  = funcAttr.m_func;
+        info->cmpOpVN  = funcAttr.m_args[0];
+        info->constVal = GetConstantInt32(funcAttr.m_args[1]);
+    }
+    else
+    {
+        info->cmpOper  = GenTree::SwapRelop((genTreeOps)funcAttr.m_func);
+        info->cmpOpVN  = funcAttr.m_args[1];
+        info->constVal = GetConstantInt32(funcAttr.m_args[0]);
+    }
+}
+
+bool ValueNumStore::IsVNArrLenBound(ValueNum vn)
+{
+    // Do we have "var < a.len"?
+    if (vn == NoVN)
+    {
+        return false;
+    }
+
+    VNFuncApp funcAttr;
+    if (!GetVNFunc(vn, &funcAttr))
+    {
+        return false;
+    }
+    if (funcAttr.m_func != (VNFunc)GT_LE && funcAttr.m_func != (VNFunc)GT_GE && funcAttr.m_func != (VNFunc)GT_LT &&
+        funcAttr.m_func != (VNFunc)GT_GT)
+    {
+        return false;
+    }
+    if (!IsVNArrLen(funcAttr.m_args[0]) && !IsVNArrLen(funcAttr.m_args[1]))
+    {
+        return false;
+    }
+
+    return true;
+}
+
+void ValueNumStore::GetArrLenBoundInfo(ValueNum vn, ArrLenArithBoundInfo* info)
+{
+    assert(IsVNArrLenBound(vn));
+
+    // Do we have var < a.len?
+    VNFuncApp funcAttr;
+    GetVNFunc(vn, &funcAttr);
+
+    bool isOp1ArrLen = IsVNArrLen(funcAttr.m_args[1]);
+    if (isOp1ArrLen)
+    {
+        info->cmpOper = funcAttr.m_func;
+        info->cmpOp   = funcAttr.m_args[0];
+        info->vnArray = GetArrForLenVn(funcAttr.m_args[1]);
+    }
+    else
+    {
+        info->cmpOper = GenTree::SwapRelop((genTreeOps)funcAttr.m_func);
+        info->cmpOp   = funcAttr.m_args[1];
+        info->vnArray = GetArrForLenVn(funcAttr.m_args[0]);
+    }
+}
+
+bool ValueNumStore::IsVNArrLenArith(ValueNum vn)
+{
+    // Do we have "a.len +or- var"
+    if (vn == NoVN)
+    {
+        return false;
+    }
+
+    VNFuncApp funcAttr;
+
+    return GetVNFunc(vn, &funcAttr) &&                                                 // vn is a func.
+           (funcAttr.m_func == (VNFunc)GT_ADD || funcAttr.m_func == (VNFunc)GT_SUB) && // the func is +/-
+           (IsVNArrLen(funcAttr.m_args[0]) || IsVNArrLen(funcAttr.m_args[1]));         // either op1 or op2 is a.len
+}
+
+void ValueNumStore::GetArrLenArithInfo(ValueNum vn, ArrLenArithBoundInfo* info)
+{
+    // Do we have a.len +/- var?
+    assert(IsVNArrLenArith(vn));
+    VNFuncApp funcArith;
+    GetVNFunc(vn, &funcArith);
+
+    bool isOp1ArrLen = IsVNArrLen(funcArith.m_args[1]);
+    if (isOp1ArrLen)
+    {
+        info->arrOper = funcArith.m_func;
+        info->arrOp   = funcArith.m_args[0];
+        info->vnArray = GetArrForLenVn(funcArith.m_args[1]);
+    }
+    else
+    {
+        info->arrOper = funcArith.m_func;
+        info->arrOp   = funcArith.m_args[1];
+        info->vnArray = GetArrForLenVn(funcArith.m_args[0]);
+    }
+}
+
+bool ValueNumStore::IsVNArrLenArithBound(ValueNum vn)
+{
+    // Do we have: "var < a.len - var"
+    if (vn == NoVN)
+    {
+        return false;
+    }
+
+    VNFuncApp funcAttr;
+    if (!GetVNFunc(vn, &funcAttr))
+    {
+        return false;
+    }
+
+    // Suitable comparator.
+    if (funcAttr.m_func != (VNFunc)GT_LE && funcAttr.m_func != (VNFunc)GT_GE && funcAttr.m_func != (VNFunc)GT_LT &&
+        funcAttr.m_func != (VNFunc)GT_GT)
+    {
+        return false;
+    }
+
+    // Either the op0 or op1 is arr len arithmetic.
+    if (!IsVNArrLenArith(funcAttr.m_args[0]) && !IsVNArrLenArith(funcAttr.m_args[1]))
+    {
+        return false;
+    }
+
+    return true;
+}
+
+void ValueNumStore::GetArrLenArithBoundInfo(ValueNum vn, ArrLenArithBoundInfo* info)
+{
+    assert(IsVNArrLenArithBound(vn));
+
+    VNFuncApp funcAttr;
+    GetVNFunc(vn, &funcAttr);
+
+    // Check whether op0 or op1 ia arr len arithmetic.
+    bool isOp1ArrLenArith = IsVNArrLenArith(funcAttr.m_args[1]);
+    if (isOp1ArrLenArith)
+    {
+        info->cmpOper = funcAttr.m_func;
+        info->cmpOp   = funcAttr.m_args[0];
+        GetArrLenArithInfo(funcAttr.m_args[1], info);
+    }
+    else
+    {
+        info->cmpOper = GenTree::SwapRelop((genTreeOps)funcAttr.m_func);
+        info->cmpOp   = funcAttr.m_args[1];
+        GetArrLenArithInfo(funcAttr.m_args[0], info);
+    }
+}
+
+ValueNum ValueNumStore::GetArrForLenVn(ValueNum vn)
+{
+    if (vn == NoVN)
+    {
+        return NoVN;
+    }
+
+    VNFuncApp funcAttr;
+    if (GetVNFunc(vn, &funcAttr) && funcAttr.m_func == (VNFunc)GT_ARR_LENGTH)
+    {
+        return funcAttr.m_args[0];
+    }
+    return NoVN;
+}
+
+bool ValueNumStore::IsVNNewArr(ValueNum vn, VNFuncApp* funcApp)
+{
+    if (vn == NoVN)
+    {
+        return false;
+    }
+    bool result = false;
+    if (GetVNFunc(vn, funcApp))
+    {
+        result = (funcApp->m_func == VNF_JitNewArr) || (funcApp->m_func == VNF_JitReadyToRunNewArr);
+    }
+    return result;
+}
+
+int ValueNumStore::GetNewArrSize(ValueNum vn)
+{
+    VNFuncApp funcApp;
+    if (IsVNNewArr(vn, &funcApp))
+    {
+        ValueNum arg1VN = funcApp.m_args[1];
+        if (IsVNConstant(arg1VN) && TypeOfVN(arg1VN) == TYP_INT)
+        {
+            return ConstantValue<int>(arg1VN);
+        }
+    }
+    return 0;
+}
+
+bool ValueNumStore::IsVNArrLen(ValueNum vn)
+{
+    if (vn == NoVN)
+    {
+        return false;
+    }
+    VNFuncApp funcAttr;
+    return (GetVNFunc(vn, &funcAttr) && funcAttr.m_func == (VNFunc)GT_ARR_LENGTH);
+}
+
+ValueNum ValueNumStore::EvalMathFuncUnary(var_types typ, CorInfoIntrinsics gtMathFN, ValueNum arg0VN)
+{
+    assert(arg0VN == VNNormVal(arg0VN));
+    if (IsVNConstant(arg0VN) && Compiler::IsTargetIntrinsic(gtMathFN))
+    {
+        // If the math intrinsic is not implemented by target-specific instructions, such as implemented
+        // by user calls, then don't do constant folding on it. This minimizes precision loss.
+        // I *may* need separate tracks for the double/float -- if the intrinsic funcs have overloads for these.
+        double arg0Val = GetConstantDouble(arg0VN);
+
+        double res = 0.0;
+        switch (gtMathFN)
+        {
+            case CORINFO_INTRINSIC_Sin:
+                res = sin(arg0Val);
+                break;
+            case CORINFO_INTRINSIC_Cos:
+                res = cos(arg0Val);
+                break;
+            case CORINFO_INTRINSIC_Sqrt:
+                res = sqrt(arg0Val);
+                break;
+            case CORINFO_INTRINSIC_Abs:
+                res = fabs(arg0Val); // The result and params are doubles.
+                break;
+            case CORINFO_INTRINSIC_Round:
+                res = FloatingPointUtils::round(arg0Val);
+                break;
+            default:
+                unreached(); // the above are the only math intrinsics at the time of this writing.
+        }
+        if (typ == TYP_DOUBLE)
+        {
+            return VNForDoubleCon(res);
+        }
+        else if (typ == TYP_FLOAT)
+        {
+            return VNForFloatCon(float(res));
+        }
+        else
+        {
+            assert(typ == TYP_INT);
+            assert(gtMathFN == CORINFO_INTRINSIC_Round);
+
+            return VNForIntCon(int(res));
+        }
+    }
+    else
+    {
+        assert(typ == TYP_DOUBLE || typ == TYP_FLOAT || (typ == TYP_INT && gtMathFN == CORINFO_INTRINSIC_Round));
+
+        VNFunc vnf = VNF_Boundary;
+        switch (gtMathFN)
+        {
+            case CORINFO_INTRINSIC_Sin:
+                vnf = VNF_Sin;
+                break;
+            case CORINFO_INTRINSIC_Cos:
+                vnf = VNF_Cos;
+                break;
+            case CORINFO_INTRINSIC_Sqrt:
+                vnf = VNF_Sqrt;
+                break;
+            case CORINFO_INTRINSIC_Abs:
+                vnf = VNF_Abs;
+                break;
+            case CORINFO_INTRINSIC_Round:
+                if (typ == TYP_DOUBLE)
+                {
+                    vnf = VNF_RoundDouble;
+                }
+                else if (typ == TYP_FLOAT)
+                {
+                    vnf = VNF_RoundFloat;
+                }
+                else if (typ == TYP_INT)
+                {
+                    vnf = VNF_RoundInt;
+                }
+                else
+                {
+                    noway_assert(!"Invalid INTRINSIC_Round");
+                }
+                break;
+            case CORINFO_INTRINSIC_Cosh:
+                vnf = VNF_Cosh;
+                break;
+            case CORINFO_INTRINSIC_Sinh:
+                vnf = VNF_Sinh;
+                break;
+            case CORINFO_INTRINSIC_Tan:
+                vnf = VNF_Tan;
+                break;
+            case CORINFO_INTRINSIC_Tanh:
+                vnf = VNF_Tanh;
+                break;
+            case CORINFO_INTRINSIC_Asin:
+                vnf = VNF_Asin;
+                break;
+            case CORINFO_INTRINSIC_Acos:
+                vnf = VNF_Acos;
+                break;
+            case CORINFO_INTRINSIC_Atan:
+                vnf = VNF_Atan;
+                break;
+            case CORINFO_INTRINSIC_Log10:
+                vnf = VNF_Log10;
+                break;
+            case CORINFO_INTRINSIC_Exp:
+                vnf = VNF_Exp;
+                break;
+            case CORINFO_INTRINSIC_Ceiling:
+                vnf = VNF_Ceiling;
+                break;
+            case CORINFO_INTRINSIC_Floor:
+                vnf = VNF_Floor;
+                break;
+            default:
+                unreached(); // the above are the only math intrinsics at the time of this writing.
+        }
+
+        return VNForFunc(typ, vnf, arg0VN);
+    }
+}
+
+ValueNum ValueNumStore::EvalMathFuncBinary(var_types typ, CorInfoIntrinsics gtMathFN, ValueNum arg0VN, ValueNum arg1VN)
+{
+    assert(varTypeIsFloating(typ));
+    assert(arg0VN == VNNormVal(arg0VN));
+    assert(arg1VN == VNNormVal(arg1VN));
+
+    VNFunc vnf = VNF_Boundary;
+
+    // Currently, none of the binary math intrinsic are implemented by target-specific instructions.
+    // To minimize precision loss, do not do constant folding on them.
+
+    switch (gtMathFN)
+    {
+        case CORINFO_INTRINSIC_Atan2:
+            vnf = VNF_Atan2;
+            break;
+
+        case CORINFO_INTRINSIC_Pow:
+            vnf = VNF_Pow;
+            break;
+
+        default:
+            unreached(); // the above are the only binary math intrinsics at the time of this writing.
+    }
+
+    return VNForFunc(typ, vnf, arg0VN, arg1VN);
+}
+
+bool ValueNumStore::IsVNFunc(ValueNum vn)
+{
+    if (vn == NoVN)
+    {
+        return false;
+    }
+    Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn));
+    switch (c->m_attribs)
+    {
+        case CEA_Func0:
+        case CEA_Func1:
+        case CEA_Func2:
+        case CEA_Func3:
+        case CEA_Func4:
+            return true;
+        default:
+            return false;
+    }
+}
+
+bool ValueNumStore::GetVNFunc(ValueNum vn, VNFuncApp* funcApp)
+{
+    Chunk*   c      = m_chunks.GetNoExpand(GetChunkNum(vn));
+    unsigned offset = ChunkOffset(vn);
+    assert(offset < c->m_numUsed);
+    switch (c->m_attribs)
+    {
+        case CEA_Func4:
+        {
+            VNDefFunc4Arg* farg4 = &reinterpret_cast<VNDefFunc4Arg*>(c->m_defs)[offset];
+            funcApp->m_func      = farg4->m_func;
+            funcApp->m_arity     = 4;
+            funcApp->m_args[0]   = farg4->m_arg0;
+            funcApp->m_args[1]   = farg4->m_arg1;
+            funcApp->m_args[2]   = farg4->m_arg2;
+            funcApp->m_args[3]   = farg4->m_arg3;
+        }
+            return true;
+        case CEA_Func3:
+        {
+            VNDefFunc3Arg* farg3 = &reinterpret_cast<VNDefFunc3Arg*>(c->m_defs)[offset];
+            funcApp->m_func      = farg3->m_func;
+            funcApp->m_arity     = 3;
+            funcApp->m_args[0]   = farg3->m_arg0;
+            funcApp->m_args[1]   = farg3->m_arg1;
+            funcApp->m_args[2]   = farg3->m_arg2;
+        }
+            return true;
+        case CEA_Func2:
+        {
+            VNDefFunc2Arg* farg2 = &reinterpret_cast<VNDefFunc2Arg*>(c->m_defs)[offset];
+            funcApp->m_func      = farg2->m_func;
+            funcApp->m_arity     = 2;
+            funcApp->m_args[0]   = farg2->m_arg0;
+            funcApp->m_args[1]   = farg2->m_arg1;
+        }
+            return true;
+        case CEA_Func1:
+        {
+            VNDefFunc1Arg* farg1 = &reinterpret_cast<VNDefFunc1Arg*>(c->m_defs)[offset];
+            funcApp->m_func      = farg1->m_func;
+            funcApp->m_arity     = 1;
+            funcApp->m_args[0]   = farg1->m_arg0;
+        }
+            return true;
+        case CEA_Func0:
+        {
+            VNDefFunc0Arg* farg0 = &reinterpret_cast<VNDefFunc0Arg*>(c->m_defs)[offset];
+            funcApp->m_func      = farg0->m_func;
+            funcApp->m_arity     = 0;
+        }
+            return true;
+        default:
+            return false;
+    }
+}
+
+ValueNum ValueNumStore::VNForRefInAddr(ValueNum vn)
+{
+    var_types vnType = TypeOfVN(vn);
+    if (vnType == TYP_REF)
+    {
+        return vn;
+    }
+    // Otherwise...
+    assert(vnType == TYP_BYREF);
+    VNFuncApp funcApp;
+    if (GetVNFunc(vn, &funcApp))
+    {
+        assert(funcApp.m_arity == 2 && (funcApp.m_func == VNFunc(GT_ADD) || funcApp.m_func == VNFunc(GT_SUB)));
+        var_types vnArg0Type = TypeOfVN(funcApp.m_args[0]);
+        if (vnArg0Type == TYP_REF || vnArg0Type == TYP_BYREF)
+        {
+            return VNForRefInAddr(funcApp.m_args[0]);
+        }
+        else
+        {
+            assert(funcApp.m_func == VNFunc(GT_ADD) &&
+                   (TypeOfVN(funcApp.m_args[1]) == TYP_REF || TypeOfVN(funcApp.m_args[1]) == TYP_BYREF));
+            return VNForRefInAddr(funcApp.m_args[1]);
+        }
+    }
+    else
+    {
+        assert(IsVNConstant(vn));
+        return vn;
+    }
+}
+
+bool ValueNumStore::VNIsValid(ValueNum vn)
+{
+    ChunkNum cn = GetChunkNum(vn);
+    if (cn >= m_chunks.Size())
+    {
+        return false;
+    }
+    // Otherwise...
+    Chunk* c = m_chunks.GetNoExpand(cn);
+    return ChunkOffset(vn) < c->m_numUsed;
+}
+
+#ifdef DEBUG
+
+void ValueNumStore::vnDump(Compiler* comp, ValueNum vn, bool isPtr)
+{
+    printf(" {");
+    if (vn == NoVN)
+    {
+        printf("NoVN");
+    }
+    else if (IsVNHandle(vn))
+    {
+        ssize_t val = ConstantValue<ssize_t>(vn);
+        printf("Hnd const: 0x%p", dspPtr(val));
+    }
+    else if (IsVNConstant(vn))
+    {
+        var_types vnt = TypeOfVN(vn);
+        switch (vnt)
+        {
+            case TYP_BOOL:
+            case TYP_BYTE:
+            case TYP_UBYTE:
+            case TYP_CHAR:
+            case TYP_SHORT:
+            case TYP_USHORT:
+            case TYP_INT:
+            case TYP_UINT:
+            {
+                int val = ConstantValue<int>(vn);
+                if (isPtr)
+                {
+                    printf("PtrCns[%p]", dspPtr(val));
+                }
+                else
+                {
+                    printf("IntCns");
+                    if ((val > -1000) && (val < 1000))
+                    {
+                        printf(" %ld", val);
+                    }
+                    else
+                    {
+                        printf(" 0x%X", val);
+                    }
+                }
+            }
+            break;
+            case TYP_LONG:
+            case TYP_ULONG:
+            {
+                INT64 val = ConstantValue<INT64>(vn);
+                if (isPtr)
+                {
+                    printf("LngPtrCns: 0x%p", dspPtr(val));
+                }
+                else
+                {
+                    printf("LngCns: ");
+                    if ((val > -1000) && (val < 1000))
+                    {
+                        printf(" %ld", val);
+                    }
+                    else if ((val & 0xFFFFFFFF00000000LL) == 0)
+                    {
+                        printf(" 0x%X", val);
+                    }
+                    else
+                    {
+                        printf(" 0x%llx", val);
+                    }
+                }
+            }
+            break;
+            case TYP_FLOAT:
+                printf("FltCns[%f]", ConstantValue<float>(vn));
+                break;
+            case TYP_DOUBLE:
+                printf("DblCns[%f]", ConstantValue<double>(vn));
+                break;
+            case TYP_REF:
+            case TYP_ARRAY:
+                if (vn == VNForNull())
+                {
+                    printf("null");
+                }
+                else if (vn == VNForVoid())
+                {
+                    printf("void");
+                }
+                else
+                {
+                    assert(vn == VNForZeroMap());
+                    printf("zeroMap");
+                }
+                break;
+            case TYP_BYREF:
+                printf("byrefVal");
+                break;
+            case TYP_STRUCT:
+#ifdef FEATURE_SIMD
+            case TYP_SIMD8:
+            case TYP_SIMD12:
+            case TYP_SIMD16:
+            case TYP_SIMD32:
+#endif // FEATURE_SIMD
+                printf("structVal");
+                break;
+
+            // These should be unreached.
+            default:
+                unreached();
+        }
+    }
+    else if (IsVNArrLenBound(vn))
+    {
+        ArrLenArithBoundInfo info;
+        GetArrLenBoundInfo(vn, &info);
+        info.dump(this);
+    }
+    else if (IsVNArrLenArithBound(vn))
+    {
+        ArrLenArithBoundInfo info;
+        GetArrLenArithBoundInfo(vn, &info);
+        info.dump(this);
+    }
+    else if (IsVNFunc(vn))
+    {
+        VNFuncApp funcApp;
+        GetVNFunc(vn, &funcApp);
+        // A few special cases...
+        switch (funcApp.m_func)
+        {
+            case VNF_FieldSeq:
+                vnDumpFieldSeq(comp, &funcApp, true);
+                break;
+            case VNF_MapSelect:
+                vnDumpMapSelect(comp, &funcApp);
+                break;
+            case VNF_MapStore:
+                vnDumpMapStore(comp, &funcApp);
+                break;
+            default:
+                printf("%s(", VNFuncName(funcApp.m_func));
+                for (unsigned i = 0; i < funcApp.m_arity; i++)
+                {
+                    if (i > 0)
+                    {
+                        printf(", ");
+                    }
+
+                    printf(STR_VN "%x", funcApp.m_args[i]);
+
+#if FEATURE_VN_DUMP_FUNC_ARGS
+                    printf("=");
+                    vnDump(comp, funcApp.m_args[i]);
+#endif
+                }
+                printf(")");
+        }
+    }
+    else
+    {
+        // Otherwise, just a VN with no structure; print just the VN.
+        printf("%x", vn);
+    }
+    printf("}");
+}
+
+void ValueNumStore::vnDumpFieldSeq(Compiler* comp, VNFuncApp* fieldSeq, bool isHead)
+{
+    assert(fieldSeq->m_func == VNF_FieldSeq); // Precondition.
+    // First arg is the field handle VN.
+    assert(IsVNConstant(fieldSeq->m_args[0]) && TypeOfVN(fieldSeq->m_args[0]) == TYP_I_IMPL);
+    ssize_t fieldHndVal = ConstantValue<ssize_t>(fieldSeq->m_args[0]);
+    bool    hasTail     = (fieldSeq->m_args[1] != VNForNull());
+
+    if (isHead && hasTail)
+    {
+        printf("(");
+    }
+
+    CORINFO_FIELD_HANDLE fldHnd = CORINFO_FIELD_HANDLE(fieldHndVal);
+    if (fldHnd == FieldSeqStore::FirstElemPseudoField)
+    {
+        printf("#FirstElem");
+    }
+    else if (fldHnd == FieldSeqStore::ConstantIndexPseudoField)
+    {
+        printf("#ConstantIndex");
+    }
+    else
+    {
+        const char* modName;
+        const char* fldName = m_pComp->eeGetFieldName(fldHnd, &modName);
+        printf("%s", fldName);
+    }
+
+    if (hasTail)
+    {
+        printf(", ");
+        assert(IsVNFunc(fieldSeq->m_args[1]));
+        VNFuncApp tail;
+        GetVNFunc(fieldSeq->m_args[1], &tail);
+        vnDumpFieldSeq(comp, &tail, false);
+    }
+
+    if (isHead && hasTail)
+    {
+        printf(")");
+    }
+}
+
+void ValueNumStore::vnDumpMapSelect(Compiler* comp, VNFuncApp* mapSelect)
+{
+    assert(mapSelect->m_func == VNF_MapSelect); // Precondition.
+
+    ValueNum mapVN   = mapSelect->m_args[0]; // First arg is the map id
+    ValueNum indexVN = mapSelect->m_args[1]; // Second arg is the index
+
+    comp->vnPrint(mapVN, 0);
+    printf("[");
+    comp->vnPrint(indexVN, 0);
+    printf("]");
+}
+
+void ValueNumStore::vnDumpMapStore(Compiler* comp, VNFuncApp* mapStore)
+{
+    assert(mapStore->m_func == VNF_MapStore); // Precondition.
+
+    ValueNum mapVN    = mapStore->m_args[0]; // First arg is the map id
+    ValueNum indexVN  = mapStore->m_args[1]; // Second arg is the index
+    ValueNum newValVN = mapStore->m_args[2]; // Third arg is the new value
+
+    comp->vnPrint(mapVN, 0);
+    printf("[");
+    comp->vnPrint(indexVN, 0);
+    printf(" := ");
+    comp->vnPrint(newValVN, 0);
+    printf("]");
+}
+#endif // DEBUG
+
+// Static fields, methods.
+static UINT8      vnfOpAttribs[VNF_COUNT];
+static genTreeOps genTreeOpsIllegalAsVNFunc[] = {GT_IND, // When we do heap memory.
+                                                 GT_NULLCHECK, GT_QMARK, GT_COLON, GT_LOCKADD, GT_XADD, GT_XCHG,
+                                                 GT_CMPXCHG, GT_LCLHEAP, GT_BOX,
+
+                                                 // These need special semantics:
+                                                 GT_COMMA, // == second argument (but with exception(s) from first).
+                                                 GT_ADDR, GT_ARR_BOUNDS_CHECK,
+                                                 GT_OBJ, // May reference heap memory.
+                                                 GT_BLK, // May reference heap memory.
+
+                                                 // These control-flow operations need no values.
+                                                 GT_JTRUE, GT_RETURN, GT_SWITCH, GT_RETFILT, GT_CKFINITE};
+
+UINT8* ValueNumStore::s_vnfOpAttribs = nullptr;
+
+void ValueNumStore::InitValueNumStoreStatics()
+{
+    // Make sure we've gotten constants right...
+    assert(unsigned(VNFOA_Arity) == (1 << VNFOA_ArityShift));
+    assert(unsigned(VNFOA_AfterArity) == (unsigned(VNFOA_Arity) << VNFOA_ArityBits));
+
+    s_vnfOpAttribs = &vnfOpAttribs[0];
+    for (unsigned i = 0; i < GT_COUNT; i++)
+    {
+        genTreeOps gtOper = static_cast<genTreeOps>(i);
+        unsigned   arity  = 0;
+        if (GenTree::OperIsUnary(gtOper))
+        {
+            arity = 1;
+        }
+        else if (GenTree::OperIsBinary(gtOper))
+        {
+            arity = 2;
+        }
+        // Since GT_ARR_BOUNDS_CHECK is not currently GTK_BINOP
+        else if (gtOper == GT_ARR_BOUNDS_CHECK)
+        {
+            arity = 2;
+        }
+        vnfOpAttribs[i] |= (arity << VNFOA_ArityShift);
+
+        if (GenTree::OperIsCommutative(gtOper))
+        {
+            vnfOpAttribs[i] |= VNFOA_Commutative;
+        }
+    }
+
+    // I so wish this wasn't the best way to do this...
+
+    int vnfNum = VNF_Boundary + 1; // The macro definition below will update this after using it.
+
+#define ValueNumFuncDef(vnf, arity, commute, knownNonNull, sharedStatic)                                               \
+    if (commute)                                                                                                       \
+        vnfOpAttribs[vnfNum] |= VNFOA_Commutative;                                                                     \
+    if (knownNonNull)                                                                                                  \
+        vnfOpAttribs[vnfNum] |= VNFOA_KnownNonNull;                                                                    \
+    if (sharedStatic)                                                                                                  \
+        vnfOpAttribs[vnfNum] |= VNFOA_SharedStatic;                                                                    \
+    vnfOpAttribs[vnfNum] |= (arity << VNFOA_ArityShift);                                                               \
+    vnfNum++;
+
+#include "valuenumfuncs.h"
+#undef ValueNumFuncDef
+
+    unsigned n = sizeof(genTreeOpsIllegalAsVNFunc) / sizeof(genTreeOps);
+    for (unsigned i = 0; i < n; i++)
+    {
+        vnfOpAttribs[genTreeOpsIllegalAsVNFunc[i]] |= VNFOA_IllegalGenTreeOp;
+    }
+}
+
+#ifdef DEBUG
+// Define the name array.
+#define ValueNumFuncDef(vnf, arity, commute, knownNonNull, sharedStatic) #vnf,
+
+const char* ValueNumStore::VNFuncNameArr[] = {
+#include "valuenumfuncs.h"
+#undef ValueNumFuncDef
+};
+
+// static
+const char* ValueNumStore::VNFuncName(VNFunc vnf)
+{
+    if (vnf < VNF_Boundary)
+    {
+        return GenTree::NodeName(genTreeOps(vnf));
+    }
+    else
+    {
+        return VNFuncNameArr[vnf - (VNF_Boundary + 1)];
+    }
+}
+
+static const char* s_reservedNameArr[] = {
+    "$VN.Recursive",    // -2  RecursiveVN
+    "$VN.No",           // -1  NoVN
+    "$VN.Null",         //  0  VNForNull()
+    "$VN.ZeroMap",      //  1  VNForZeroMap()
+    "$VN.NotAField",    //  2  VNForNotAField()
+    "$VN.ReadOnlyHeap", //  3  VNForROH()
+    "$VN.Void",         //  4  VNForVoid()
+    "$VN.EmptyExcSet"   //  5  VNForEmptyExcSet()
+};
+
+// Returns the string name of "vn" when it is a reserved value number, nullptr otherwise
+// static
+const char* ValueNumStore::reservedName(ValueNum vn)
+{
+    int val = vn - ValueNumStore::RecursiveVN; // Add two, making 'RecursiveVN' equal to zero
+    int max = ValueNumStore::SRC_NumSpecialRefConsts - ValueNumStore::RecursiveVN;
+
+    if ((val >= 0) && (val < max))
+    {
+        return s_reservedNameArr[val];
+    }
+    return nullptr;
+}
+
+#endif // DEBUG
+
+// Returns true if "vn" is a reserved value number
+
+// static
+bool ValueNumStore::isReservedVN(ValueNum vn)
+{
+    int val = vn - ValueNumStore::RecursiveVN; // Adding two, making 'RecursiveVN' equal to zero
+    int max = ValueNumStore::SRC_NumSpecialRefConsts - ValueNumStore::RecursiveVN;
+
+    if ((val >= 0) && (val < max))
+    {
+        return true;
+    }
+    return false;
+}
+
+#ifdef DEBUG
+void ValueNumStore::RunTests(Compiler* comp)
+{
+    VNFunc VNF_Add = GenTreeOpToVNFunc(GT_ADD);
+
+    ValueNumStore* vns    = new (comp->getAllocatorDebugOnly()) ValueNumStore(comp, comp->getAllocatorDebugOnly());
+    ValueNum       vnNull = VNForNull();
+    assert(vnNull == VNForNull());
+
+    ValueNum vnFor1 = vns->VNForIntCon(1);
+    assert(vnFor1 == vns->VNForIntCon(1));
+    assert(vns->TypeOfVN(vnFor1) == TYP_INT);
+    assert(vns->IsVNConstant(vnFor1));
+    assert(vns->ConstantValue<int>(vnFor1) == 1);
+
+    ValueNum vnFor100 = vns->VNForIntCon(100);
+    assert(vnFor100 == vns->VNForIntCon(100));
+    assert(vnFor100 != vnFor1);
+    assert(vns->TypeOfVN(vnFor100) == TYP_INT);
+    assert(vns->IsVNConstant(vnFor100));
+    assert(vns->ConstantValue<int>(vnFor100) == 100);
+
+    ValueNum vnFor1F = vns->VNForFloatCon(1.0f);
+    assert(vnFor1F == vns->VNForFloatCon(1.0f));
+    assert(vnFor1F != vnFor1 && vnFor1F != vnFor100);
+    assert(vns->TypeOfVN(vnFor1F) == TYP_FLOAT);
+    assert(vns->IsVNConstant(vnFor1F));
+    assert(vns->ConstantValue<float>(vnFor1F) == 1.0f);
+
+    ValueNum vnFor1D = vns->VNForDoubleCon(1.0);
+    assert(vnFor1D == vns->VNForDoubleCon(1.0));
+    assert(vnFor1D != vnFor1F && vnFor1D != vnFor1 && vnFor1D != vnFor100);
+    assert(vns->TypeOfVN(vnFor1D) == TYP_DOUBLE);
+    assert(vns->IsVNConstant(vnFor1D));
+    assert(vns->ConstantValue<double>(vnFor1D) == 1.0);
+
+    ValueNum vnRandom1   = vns->VNForExpr(nullptr, TYP_INT);
+    ValueNum vnForFunc2a = vns->VNForFunc(TYP_INT, VNF_Add, vnFor1, vnRandom1);
+    assert(vnForFunc2a == vns->VNForFunc(TYP_INT, VNF_Add, vnFor1, vnRandom1));
+    assert(vnForFunc2a != vnFor1D && vnForFunc2a != vnFor1F && vnForFunc2a != vnFor1 && vnForFunc2a != vnRandom1);
+    assert(vns->TypeOfVN(vnForFunc2a) == TYP_INT);
+    assert(!vns->IsVNConstant(vnForFunc2a));
+    assert(vns->IsVNFunc(vnForFunc2a));
+    VNFuncApp fa2a;
+    bool      b = vns->GetVNFunc(vnForFunc2a, &fa2a);
+    assert(b);
+    assert(fa2a.m_func == VNF_Add && fa2a.m_arity == 2 && fa2a.m_args[0] == vnFor1 && fa2a.m_args[1] == vnRandom1);
+
+    ValueNum vnForFunc2b = vns->VNForFunc(TYP_INT, VNF_Add, vnFor1, vnFor100);
+    assert(vnForFunc2b == vns->VNForFunc(TYP_INT, VNF_Add, vnFor1, vnFor100));
+    assert(vnForFunc2b != vnFor1D && vnForFunc2b != vnFor1F && vnForFunc2b != vnFor1 && vnForFunc2b != vnFor100);
+    assert(vns->TypeOfVN(vnForFunc2b) == TYP_INT);
+    assert(vns->IsVNConstant(vnForFunc2b));
+    assert(vns->ConstantValue<int>(vnForFunc2b) == 101);
+
+    // printf("Did ValueNumStore::RunTests.\n");
+}
+#endif // DEBUG
+
+typedef ExpandArrayStack<BasicBlock*> BlockStack;
+
+// This represents the "to do" state of the value number computation.
+struct ValueNumberState
+{
+    // These two stacks collectively represent the set of blocks that are candidates for
+    // processing, because at least one predecessor has been processed.  Blocks on "m_toDoAllPredsDone"
+    // have had *all* predecessors processed, and thus are candidates for some extra optimizations.
+    // Blocks on "m_toDoNotAllPredsDone" have at least one predecessor that has not been processed.
+    // Blocks are initially on "m_toDoNotAllPredsDone" may be moved to "m_toDoAllPredsDone" when their last
+    // unprocessed predecessor is processed, thus maintaining the invariants.
+    BlockStack m_toDoAllPredsDone;
+    BlockStack m_toDoNotAllPredsDone;
+
+    Compiler* m_comp;
+
+    // TBD: This should really be a bitset...
+    // For now:
+    // first bit indicates completed,
+    // second bit indicates that it's been pushed on all-done stack,
+    // third bit indicates that it's been pushed on not-all-done stack.
+    BYTE* m_visited;
+
+    enum BlockVisitBits
+    {
+        BVB_complete     = 0x1,
+        BVB_onAllDone    = 0x2,
+        BVB_onNotAllDone = 0x4,
+    };
+
+    bool GetVisitBit(unsigned bbNum, BlockVisitBits bvb)
+    {
+        return (m_visited[bbNum] & bvb) != 0;
+    }
+    void SetVisitBit(unsigned bbNum, BlockVisitBits bvb)
+    {
+        m_visited[bbNum] |= bvb;
+    }
+
+    ValueNumberState(Compiler* comp)
+        : m_toDoAllPredsDone(comp->getAllocator(), /*minSize*/ 4)
+        , m_toDoNotAllPredsDone(comp->getAllocator(), /*minSize*/ 4)
+        , m_comp(comp)
+        , m_visited(new (comp, CMK_ValueNumber) BYTE[comp->fgBBNumMax + 1]())
+    {
+    }
+
+    BasicBlock* ChooseFromNotAllPredsDone()
+    {
+        assert(m_toDoAllPredsDone.Size() == 0);
+        // If we have no blocks with all preds done, then (ideally, if all cycles have been captured by loops)
+        // we must have at least one block within a loop.  We want to do the loops first.  Doing a loop entry block
+        // should break the cycle, making the rest of the body of the loop (unless there's a nested loop) doable by the
+        // all-preds-done rule.  If several loop entry blocks are available, at least one should have all non-loop preds
+        // done -- we choose that.
+        for (unsigned i = 0; i < m_toDoNotAllPredsDone.Size(); i++)
+        {
+            BasicBlock* cand = m_toDoNotAllPredsDone.Get(i);
+
+            // Skip any already-completed blocks (a block may have all its preds finished, get added to the
+            // all-preds-done todo set, and get processed there).  Do this by moving the last one down, to
+            // keep the array compact.
+            while (GetVisitBit(cand->bbNum, BVB_complete))
+            {
+                if (i + 1 < m_toDoNotAllPredsDone.Size())
+                {
+                    cand = m_toDoNotAllPredsDone.Pop();
+                    m_toDoNotAllPredsDone.Set(i, cand);
+                }
+                else
+                {
+                    // "cand" is the last element; delete it.
+                    (void)m_toDoNotAllPredsDone.Pop();
+                    break;
+                }
+            }
+            // We may have run out of non-complete candidates above.  If so, we're done.
+            if (i == m_toDoNotAllPredsDone.Size())
+            {
+                break;
+            }
+
+            // See if "cand" is a loop entry.
+            unsigned lnum;
+            if (m_comp->optBlockIsLoopEntry(cand, &lnum))
+            {
+                // "lnum" is the innermost loop of which "cand" is the entry; find the outermost.
+                unsigned lnumPar = m_comp->optLoopTable[lnum].lpParent;
+                while (lnumPar != BasicBlock::NOT_IN_LOOP)
+                {
+                    if (m_comp->optLoopTable[lnumPar].lpEntry == cand)
+                    {
+                        lnum = lnumPar;
+                    }
+                    else
+                    {
+                        break;
+                    }
+                    lnumPar = m_comp->optLoopTable[lnumPar].lpParent;
+                }
+
+                bool allNonLoopPredsDone = true;
+                for (flowList* pred = m_comp->BlockPredsWithEH(cand); pred != nullptr; pred = pred->flNext)
+                {
+                    BasicBlock* predBlock = pred->flBlock;
+                    if (!m_comp->optLoopTable[lnum].lpContains(predBlock))
+                    {
+                        if (!GetVisitBit(predBlock->bbNum, BVB_complete))
+                        {
+                            allNonLoopPredsDone = false;
+                        }
+                    }
+                }
+                if (allNonLoopPredsDone)
+                {
+                    return cand;
+                }
+            }
+        }
+
+        // If we didn't find a loop entry block with all non-loop preds done above, then return a random member (if
+        // there is one).
+        if (m_toDoNotAllPredsDone.Size() == 0)
+        {
+            return nullptr;
+        }
+        else
+        {
+            return m_toDoNotAllPredsDone.Pop();
+        }
+    }
+
+// Debugging output that is too detailed for a normal JIT dump...
+#define DEBUG_VN_VISIT 0
+
+    // Record that "blk" has been visited, and add any unvisited successors of "blk" to the appropriate todo set.
+    void FinishVisit(BasicBlock* blk)
+    {
+#ifdef DEBUG_VN_VISIT
+        JITDUMP("finish(BB%02u).\n", blk->bbNum);
+#endif // DEBUG_VN_VISIT
+
+        SetVisitBit(blk->bbNum, BVB_complete);
+
+        AllSuccessorIter succsEnd = blk->GetAllSuccs(m_comp).end();
+        for (AllSuccessorIter succs = blk->GetAllSuccs(m_comp).begin(); succs != succsEnd; ++succs)
+        {
+            BasicBlock* succ = (*succs);
+#ifdef DEBUG_VN_VISIT
+            JITDUMP("   Succ(BB%02u).\n", succ->bbNum);
+#endif // DEBUG_VN_VISIT
+
+            if (GetVisitBit(succ->bbNum, BVB_complete))
+            {
+                continue;
+            }
+#ifdef DEBUG_VN_VISIT
+            JITDUMP("     Not yet completed.\n");
+#endif // DEBUG_VN_VISIT
+
+            bool allPredsVisited = true;
+            for (flowList* pred = m_comp->BlockPredsWithEH(succ); pred != nullptr; pred = pred->flNext)
+            {
+                BasicBlock* predBlock = pred->flBlock;
+                if (!GetVisitBit(predBlock->bbNum, BVB_complete))
+                {
+                    allPredsVisited = false;
+                    break;
+                }
+            }
+
+            if (allPredsVisited)
+            {
+#ifdef DEBUG_VN_VISIT
+                JITDUMP("     All preds complete, adding to allDone.\n");
+#endif // DEBUG_VN_VISIT
+
+                assert(!GetVisitBit(succ->bbNum, BVB_onAllDone)); // Only last completion of last succ should add to
+                                                                  // this.
+                m_toDoAllPredsDone.Push(succ);
+                SetVisitBit(succ->bbNum, BVB_onAllDone);
+            }
+            else
+            {
+#ifdef DEBUG_VN_VISIT
+                JITDUMP("     Not all preds complete  Adding to notallDone, if necessary...\n");
+#endif // DEBUG_VN_VISIT
+
+                if (!GetVisitBit(succ->bbNum, BVB_onNotAllDone))
+                {
+#ifdef DEBUG_VN_VISIT
+                    JITDUMP("       Was necessary.\n");
+#endif // DEBUG_VN_VISIT
+                    m_toDoNotAllPredsDone.Push(succ);
+                    SetVisitBit(succ->bbNum, BVB_onNotAllDone);
+                }
+            }
+        }
+    }
+
+    bool ToDoExists()
+    {
+        return m_toDoAllPredsDone.Size() > 0 || m_toDoNotAllPredsDone.Size() > 0;
+    }
+};
+
+void Compiler::fgValueNumber()
+{
+#ifdef DEBUG
+    // This could be a JITDUMP, but some people find it convenient to set a breakpoint on the printf.
+    if (verbose)
+    {
+        printf("\n*************** In fgValueNumber()\n");
+    }
+#endif
+
+    // If we skipped SSA, skip VN as well.
+    if (fgSsaPassesCompleted == 0)
+    {
+        return;
+    }
+
+    // Allocate the value number store.
+    assert(fgVNPassesCompleted > 0 || vnStore == nullptr);
+    if (fgVNPassesCompleted == 0)
+    {
+        CompAllocator* allocator = new (this, CMK_ValueNumber) CompAllocator(this, CMK_ValueNumber);
+        vnStore                  = new (this, CMK_ValueNumber) ValueNumStore(this, allocator);
+    }
+    else
+    {
+        ValueNumPair noVnp;
+        // Make sure the heap SSA names have no value numbers.
+        for (unsigned i = 0; i < lvHeapNumSsaNames; i++)
+        {
+            lvHeapPerSsaData.GetRef(i).m_vnPair = noVnp;
+        }
+        for (BasicBlock* blk = fgFirstBB; blk != nullptr; blk = blk->bbNext)
+        {
+            // Now iterate over the block's statements, and their trees.
+            for (GenTreePtr stmts = blk->FirstNonPhiDef(); stmts != nullptr; stmts = stmts->gtNext)
+            {
+                assert(stmts->IsStatement());
+                for (GenTreePtr tree = stmts->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+                {
+                    tree->gtVNPair.SetBoth(ValueNumStore::NoVN);
+                }
+            }
+        }
+    }
+
+    // Compute the side effects of loops.
+    optComputeLoopSideEffects();
+
+    // At the block level, we will use a modified worklist algorithm.  We will have two
+    // "todo" sets of unvisited blocks.  Blocks (other than the entry block) are put in a
+    // todo set only when some predecessor has been visited, so all blocks have at least one
+    // predecessor visited.  The distinction between the two sets is whether *all* predecessors have
+    // already been visited.  We visit such blocks preferentially if they exist, since phi definitions
+    // in such blocks will have all arguments defined, enabling a simplification in the case that all
+    // arguments to the phi have the same VN.  If no such blocks exist, we pick a block with at least
+    // one unvisited predecessor.  In this case, we assign a new VN for phi definitions.
+
+    // Start by giving incoming arguments value numbers.
+    // Also give must-init vars a zero of their type.
+    for (unsigned i = 0; i < lvaCount; i++)
+    {
+        LclVarDsc* varDsc = &lvaTable[i];
+        if (varDsc->lvIsParam)
+        {
+            // We assume that code equivalent to this variable initialization loop
+            // has been performed when doing SSA naming, so that all the variables we give
+            // initial VNs to here have been given initial SSA definitions there.
+            // SSA numbers always start from FIRST_SSA_NUM, and we give the value number to SSA name FIRST_SSA_NUM.
+            // We use the VNF_InitVal(i) from here so we know that this value is loop-invariant
+            // in all loops.
+            ValueNum      initVal = vnStore->VNForFunc(varDsc->TypeGet(), VNF_InitVal, vnStore->VNForIntCon(i));
+            LclSsaVarDsc* ssaDef  = varDsc->GetPerSsaData(SsaConfig::FIRST_SSA_NUM);
+            ssaDef->m_vnPair.SetBoth(initVal);
+            ssaDef->m_defLoc.m_blk = fgFirstBB;
+        }
+        else if (info.compInitMem || varDsc->lvMustInit ||
+                 (varDsc->lvTracked && VarSetOps::IsMember(this, fgFirstBB->bbLiveIn, varDsc->lvVarIndex)))
+        {
+            // The last clause covers the use-before-def variables (the ones that are live-in to the the first block),
+            // these are variables that are read before being initialized (at least on some control flow paths)
+            // if they are not must-init, then they get VNF_InitVal(i), as with the param case.)
+
+            bool      isZeroed = (info.compInitMem || varDsc->lvMustInit);
+            ValueNum  initVal  = ValueNumStore::NoVN; // We must assign a new value to initVal
+            var_types typ      = varDsc->TypeGet();
+
+            switch (typ)
+            {
+                case TYP_LCLBLK: // The outgoing args area for arm and x64
+                case TYP_BLK:    // A blob of memory
+                    // TYP_BLK is used for the EHSlots LclVar on x86 (aka shadowSPslotsVar)
+                    // and for the lvaInlinedPInvokeFrameVar on x64, arm and x86
+                    // The stack associated with these LclVars are not zero initialized
+                    // thus we set 'initVN' to a new, unique VN.
+                    //
+                    initVal = vnStore->VNForExpr(fgFirstBB);
+                    break;
+
+                case TYP_BYREF:
+                    if (isZeroed)
+                    {
+                        // LclVars of TYP_BYREF can be zero-inited.
+                        initVal = vnStore->VNForByrefCon(0);
+                    }
+                    else
+                    {
+                        // Here we have uninitialized TYP_BYREF
+                        initVal = vnStore->VNForFunc(typ, VNF_InitVal, vnStore->VNForIntCon(i));
+                    }
+                    break;
+
+                default:
+                    if (isZeroed)
+                    {
+                        // By default we will zero init these LclVars
+                        initVal = vnStore->VNZeroForType(typ);
+                    }
+                    else
+                    {
+                        initVal = vnStore->VNForFunc(typ, VNF_InitVal, vnStore->VNForIntCon(i));
+                    }
+                    break;
+            }
+#ifdef _TARGET_X86_
+            bool isVarargParam = (i == lvaVarargsBaseOfStkArgs || i == lvaVarargsHandleArg);
+            if (isVarargParam)
+                initVal = vnStore->VNForExpr(fgFirstBB); // a new, unique VN.
+#endif
+            assert(initVal != ValueNumStore::NoVN);
+
+            LclSsaVarDsc* ssaDef = varDsc->GetPerSsaData(SsaConfig::FIRST_SSA_NUM);
+            ssaDef->m_vnPair.SetBoth(initVal);
+            ssaDef->m_defLoc.m_blk = fgFirstBB;
+        }
+    }
+    // Give "Heap" an initial value number (about which we know nothing).
+    ValueNum heapInitVal = vnStore->VNForFunc(TYP_REF, VNF_InitVal, vnStore->VNForIntCon(-1)); // Use -1 for the heap.
+    GetHeapPerSsaData(SsaConfig::FIRST_SSA_NUM)->m_vnPair.SetBoth(heapInitVal);
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("Heap Initial Value in BB01 is: " STR_VN "%x\n", heapInitVal);
+    }
+#endif // DEBUG
+
+    ValueNumberState vs(this);
+
+    // Push the first block.  This has no preds.
+    vs.m_toDoAllPredsDone.Push(fgFirstBB);
+
+    while (vs.ToDoExists())
+    {
+        while (vs.m_toDoAllPredsDone.Size() > 0)
+        {
+            BasicBlock* toDo = vs.m_toDoAllPredsDone.Pop();
+            fgValueNumberBlock(toDo, /*newVNsForPhis*/ false);
+            // Record that we've visited "toDo", and add successors to the right sets.
+            vs.FinishVisit(toDo);
+        }
+        // OK, we've run out of blocks whose predecessors are done.  Pick one whose predecessors are not all done,
+        // process that.  This may make more "all-done" blocks, so we'll go around the outer loop again --
+        // note that this is an "if", not a "while" loop.
+        if (vs.m_toDoNotAllPredsDone.Size() > 0)
+        {
+            BasicBlock* toDo = vs.ChooseFromNotAllPredsDone();
+            if (toDo == nullptr)
+            {
+                continue; // We may have run out, because of completed blocks on the not-all-preds done list.
+            }
+
+            fgValueNumberBlock(toDo, /*newVNsForPhis*/ true);
+            // Record that we've visited "toDo", and add successors to the right sest.
+            vs.FinishVisit(toDo);
+        }
+    }
+
+#ifdef DEBUG
+    JitTestCheckVN();
+#endif // DEBUG
+
+    fgVNPassesCompleted++;
+}
+
+void Compiler::fgValueNumberBlock(BasicBlock* blk, bool newVNsForPhis)
+{
+    compCurBB = blk;
+
+#ifdef DEBUG
+    compCurStmtNum = blk->bbStmtNum - 1; // Set compCurStmtNum
+#endif
+
+    unsigned outerLoopNum = BasicBlock::NOT_IN_LOOP;
+
+    // First: visit phi's.  If "newVNForPhis", give them new VN's.  If not,
+    // first check to see if all phi args have the same value.
+    GenTreePtr firstNonPhi = blk->FirstNonPhiDef();
+    for (GenTreePtr phiDefs = blk->bbTreeList; phiDefs != firstNonPhi; phiDefs = phiDefs->gtNext)
+    {
+        // TODO-Cleanup: It has been proposed that we should have an IsPhiDef predicate.  We would use it
+        // in Block::FirstNonPhiDef as well.
+        GenTreePtr phiDef = phiDefs->gtStmt.gtStmtExpr;
+        assert(phiDef->OperGet() == GT_ASG);
+        GenTreeLclVarCommon* newSsaVar = phiDef->gtOp.gtOp1->AsLclVarCommon();
+
+        ValueNumPair phiAppVNP;
+        ValueNumPair sameVNPair;
+
+        GenTreePtr phiFunc = phiDef->gtOp.gtOp2;
+
+        // At this point a GT_PHI node should never have a nullptr for gtOp1
+        // and the gtOp1 should always be a GT_LIST node.
+        GenTreePtr phiOp1 = phiFunc->gtOp.gtOp1;
+        noway_assert(phiOp1 != nullptr);
+        noway_assert(phiOp1->OperGet() == GT_LIST);
+
+        GenTreeArgList* phiArgs = phiFunc->gtOp.gtOp1->AsArgList();
+
+        // A GT_PHI node should have more than one argument.
+        noway_assert(phiArgs->Rest() != nullptr);
+
+        GenTreeLclVarCommon* phiArg = phiArgs->Current()->AsLclVarCommon();
+        phiArgs                     = phiArgs->Rest();
+
+        phiAppVNP.SetBoth(vnStore->VNForIntCon(phiArg->gtSsaNum));
+        bool allSameLib  = true;
+        bool allSameCons = true;
+        sameVNPair       = lvaTable[phiArg->gtLclNum].GetPerSsaData(phiArg->gtSsaNum)->m_vnPair;
+        if (!sameVNPair.BothDefined())
+        {
+            allSameLib  = false;
+            allSameCons = false;
+        }
+        while (phiArgs != nullptr)
+        {
+            phiArg = phiArgs->Current()->AsLclVarCommon();
+            // Set the VN of the phi arg.
+            phiArg->gtVNPair = lvaTable[phiArg->gtLclNum].GetPerSsaData(phiArg->gtSsaNum)->m_vnPair;
+            if (phiArg->gtVNPair.BothDefined())
+            {
+                if (phiArg->gtVNPair.GetLiberal() != sameVNPair.GetLiberal())
+                {
+                    allSameLib = false;
+                }
+                if (phiArg->gtVNPair.GetConservative() != sameVNPair.GetConservative())
+                {
+                    allSameCons = false;
+                }
+            }
+            else
+            {
+                allSameLib  = false;
+                allSameCons = false;
+            }
+            ValueNumPair phiArgSsaVNP;
+            phiArgSsaVNP.SetBoth(vnStore->VNForIntCon(phiArg->gtSsaNum));
+            phiAppVNP = vnStore->VNPairForFunc(newSsaVar->TypeGet(), VNF_Phi, phiArgSsaVNP, phiAppVNP);
+            phiArgs   = phiArgs->Rest();
+        }
+
+        ValueNumPair newVNPair;
+        if (allSameLib)
+        {
+            newVNPair.SetLiberal(sameVNPair.GetLiberal());
+        }
+        else
+        {
+            newVNPair.SetLiberal(phiAppVNP.GetLiberal());
+        }
+        if (allSameCons)
+        {
+            newVNPair.SetConservative(sameVNPair.GetConservative());
+        }
+        else
+        {
+            newVNPair.SetConservative(phiAppVNP.GetConservative());
+        }
+
+        LclSsaVarDsc* newSsaVarDsc = lvaTable[newSsaVar->gtLclNum].GetPerSsaData(newSsaVar->GetSsaNum());
+        // If all the args of the phi had the same value(s, liberal and conservative), then there wasn't really
+        // a reason to have the phi -- just pass on that value.
+        if (allSameLib && allSameCons)
+        {
+            newSsaVarDsc->m_vnPair = newVNPair;
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("In SSA definition, incoming phi args all same, set VN of local %d/%d to ",
+                       newSsaVar->GetLclNum(), newSsaVar->GetSsaNum());
+                vnpPrint(newVNPair, 1);
+                printf(".\n");
+            }
+#endif // DEBUG
+        }
+        else
+        {
+            // They were not the same; we need to create a phi definition.
+            ValueNumPair lclNumVNP;
+            lclNumVNP.SetBoth(ValueNum(newSsaVar->GetLclNum()));
+            ValueNumPair ssaNumVNP;
+            ssaNumVNP.SetBoth(ValueNum(newSsaVar->GetSsaNum()));
+            ValueNumPair vnPhiDef =
+                vnStore->VNPairForFunc(newSsaVar->TypeGet(), VNF_PhiDef, lclNumVNP, ssaNumVNP, phiAppVNP);
+            newSsaVarDsc->m_vnPair = vnPhiDef;
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("SSA definition: set VN of local %d/%d to ", newSsaVar->GetLclNum(), newSsaVar->GetSsaNum());
+                vnpPrint(vnPhiDef, 1);
+                printf(".\n");
+            }
+#endif // DEBUG
+        }
+    }
+
+    // Now do the same for "Heap".
+    // Is there a phi for this block?
+    if (blk->bbHeapSsaPhiFunc == nullptr)
+    {
+        fgCurHeapVN = GetHeapPerSsaData(blk->bbHeapSsaNumIn)->m_vnPair.GetLiberal();
+        assert(fgCurHeapVN != ValueNumStore::NoVN);
+    }
+    else
+    {
+        unsigned loopNum;
+        ValueNum newHeapVN;
+        if (optBlockIsLoopEntry(blk, &loopNum))
+        {
+            newHeapVN = fgHeapVNForLoopSideEffects(blk, loopNum);
+        }
+        else
+        {
+            // Are all the VN's the same?
+            BasicBlock::HeapPhiArg* phiArgs = blk->bbHeapSsaPhiFunc;
+            assert(phiArgs != BasicBlock::EmptyHeapPhiDef);
+            // There should be > 1 args to a phi.
+            assert(phiArgs->m_nextArg != nullptr);
+            ValueNum phiAppVN = vnStore->VNForIntCon(phiArgs->GetSsaNum());
+            JITDUMP("  Building phi application: $%x = SSA# %d.\n", phiAppVN, phiArgs->GetSsaNum());
+            bool     allSame = true;
+            ValueNum sameVN  = GetHeapPerSsaData(phiArgs->GetSsaNum())->m_vnPair.GetLiberal();
+            if (sameVN == ValueNumStore::NoVN)
+            {
+                allSame = false;
+            }
+            phiArgs = phiArgs->m_nextArg;
+            while (phiArgs != nullptr)
+            {
+                ValueNum phiArgVN = GetHeapPerSsaData(phiArgs->GetSsaNum())->m_vnPair.GetLiberal();
+                if (phiArgVN == ValueNumStore::NoVN || phiArgVN != sameVN)
+                {
+                    allSame = false;
+                }
+#ifdef DEBUG
+                ValueNum oldPhiAppVN = phiAppVN;
+#endif
+                unsigned phiArgSSANum   = phiArgs->GetSsaNum();
+                ValueNum phiArgSSANumVN = vnStore->VNForIntCon(phiArgSSANum);
+                JITDUMP("  Building phi application: $%x = SSA# %d.\n", phiArgSSANumVN, phiArgSSANum);
+                phiAppVN = vnStore->VNForFunc(TYP_REF, VNF_Phi, phiArgSSANumVN, phiAppVN);
+                JITDUMP("  Building phi application: $%x = phi($%x, $%x).\n", phiAppVN, phiArgSSANumVN, oldPhiAppVN);
+                phiArgs = phiArgs->m_nextArg;
+            }
+            if (allSame)
+            {
+                newHeapVN = sameVN;
+            }
+            else
+            {
+                newHeapVN =
+                    vnStore->VNForFunc(TYP_REF, VNF_PhiHeapDef, vnStore->VNForHandle(ssize_t(blk), 0), phiAppVN);
+            }
+        }
+        GetHeapPerSsaData(blk->bbHeapSsaNumIn)->m_vnPair.SetLiberal(newHeapVN);
+        fgCurHeapVN = newHeapVN;
+    }
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("The SSA definition for heap (#%d) at start of BB%02u is ", blk->bbHeapSsaNumIn, blk->bbNum);
+        vnPrint(fgCurHeapVN, 1);
+        printf("\n");
+    }
+#endif // DEBUG
+
+    // Now iterate over the remaining statements, and their trees.
+    for (GenTreePtr stmt = firstNonPhi; stmt != nullptr; stmt = stmt->gtNext)
+    {
+        assert(stmt->IsStatement());
+
+#ifdef DEBUG
+        compCurStmtNum++;
+        if (verbose)
+        {
+            printf("\n***** BB%02u, stmt %d (before)\n", blk->bbNum, compCurStmtNum);
+            gtDispTree(stmt->gtStmt.gtStmtExpr);
+            printf("\n");
+        }
+#endif
+
+        for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
+        {
+            fgValueNumberTree(tree);
+        }
+
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("\n***** BB%02u, stmt %d (after)\n", blk->bbNum, compCurStmtNum);
+            gtDispTree(stmt->gtStmt.gtStmtExpr);
+            printf("\n");
+            if (stmt->gtNext)
+            {
+                printf("---------\n");
+            }
+        }
+#endif
+    }
+
+    if (blk->bbHeapSsaNumOut != blk->bbHeapSsaNumIn)
+    {
+        GetHeapPerSsaData(blk->bbHeapSsaNumOut)->m_vnPair.SetLiberal(fgCurHeapVN);
+    }
+
+    compCurBB = nullptr;
+}
+
+ValueNum Compiler::fgHeapVNForLoopSideEffects(BasicBlock* entryBlock, unsigned innermostLoopNum)
+{
+    // "loopNum" is the innermost loop for which "blk" is the entry; find the outermost one.
+    assert(innermostLoopNum != BasicBlock::NOT_IN_LOOP);
+    unsigned loopsInNest = innermostLoopNum;
+    unsigned loopNum     = innermostLoopNum;
+    while (loopsInNest != BasicBlock::NOT_IN_LOOP)
+    {
+        if (optLoopTable[loopsInNest].lpEntry != entryBlock)
+        {
+            break;
+        }
+        loopNum     = loopsInNest;
+        loopsInNest = optLoopTable[loopsInNest].lpParent;
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("Computing heap state for block BB%02u, entry block for loops %d to %d:\n", entryBlock->bbNum,
+               innermostLoopNum, loopNum);
+    }
+#endif // DEBUG
+
+    // If this loop has heap havoc effects, just use a new, unique VN.
+    if (optLoopTable[loopNum].lpLoopHasHeapHavoc)
+    {
+        ValueNum res = vnStore->VNForExpr(entryBlock, TYP_REF);
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("  Loop %d has heap havoc effect; heap state is new fresh $%x.\n", loopNum, res);
+        }
+#endif // DEBUG
+        return res;
+    }
+
+    // Otherwise, find the predecessors of the entry block that are not in the loop.
+    // If there is only one such, use its heap value as the "base."  If more than one,
+    // use a new unique heap VN.
+    BasicBlock* nonLoopPred          = nullptr;
+    bool        multipleNonLoopPreds = false;
+    for (flowList* pred = BlockPredsWithEH(entryBlock); pred != nullptr; pred = pred->flNext)
+    {
+        BasicBlock* predBlock = pred->flBlock;
+        if (!optLoopTable[loopNum].lpContains(predBlock))
+        {
+            if (nonLoopPred == nullptr)
+            {
+                nonLoopPred = predBlock;
+            }
+            else
+            {
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("  Entry block has >1 non-loop preds: (at least) BB%02u and BB%02u.\n", nonLoopPred->bbNum,
+                           predBlock->bbNum);
+                }
+#endif // DEBUG
+                multipleNonLoopPreds = true;
+                break;
+            }
+        }
+    }
+    if (multipleNonLoopPreds)
+    {
+        ValueNum res = vnStore->VNForExpr(entryBlock, TYP_REF);
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("  Therefore, heap state is new, fresh $%x.\n", res);
+        }
+#endif // DEBUG
+        return res;
+    }
+    // Otherwise, there is a single non-loop pred.
+    assert(nonLoopPred != nullptr);
+    // What is it's heap post-state?
+    ValueNum newHeapVN = GetHeapPerSsaData(nonLoopPred->bbHeapSsaNumOut)->m_vnPair.GetLiberal();
+    assert(newHeapVN !=
+           ValueNumStore::NoVN); // We must have processed the single non-loop pred before reaching the loop entry.
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("  Init heap state is $%x, with new, fresh VN at:\n", newHeapVN);
+    }
+#endif // DEBUG
+    // Modify "base" by setting all the modified fields/field maps/array maps to unknown values.
+    // First the fields/field maps.
+
+    Compiler::LoopDsc::FieldHandleSet* fieldsMod = optLoopTable[loopNum].lpFieldsModified;
+    if (fieldsMod != nullptr)
+    {
+        for (Compiler::LoopDsc::FieldHandleSet::KeyIterator ki = fieldsMod->Begin(); !ki.Equal(fieldsMod->End()); ++ki)
+        {
+            CORINFO_FIELD_HANDLE fldHnd   = ki.Get();
+            ValueNum             fldHndVN = vnStore->VNForHandle(ssize_t(fldHnd), GTF_ICON_FIELD_HDL);
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                const char* modName;
+                const char* fldName = eeGetFieldName(fldHnd, &modName);
+                printf("     VNForHandle(Fseq[%s]) is " STR_VN "%x\n", fldName, fldHndVN);
+
+                printf("  fgCurHeapVN assigned:\n");
+            }
+#endif // DEBUG
+
+            newHeapVN = vnStore->VNForMapStore(TYP_REF, newHeapVN, fldHndVN, vnStore->VNForExpr(entryBlock, TYP_REF));
+        }
+    }
+    // Now do the array maps.
+    Compiler::LoopDsc::ClassHandleSet* elemTypesMod = optLoopTable[loopNum].lpArrayElemTypesModified;
+    if (elemTypesMod != nullptr)
+    {
+        for (Compiler::LoopDsc::ClassHandleSet::KeyIterator ki = elemTypesMod->Begin(); !ki.Equal(elemTypesMod->End());
+             ++ki)
+        {
+            CORINFO_CLASS_HANDLE elemClsHnd = ki.Get();
+
+#ifdef DEBUG
+            if (verbose)
+            {
+                var_types elemTyp = DecodeElemType(elemClsHnd);
+                if (varTypeIsStruct(elemTyp))
+                {
+                    printf("     Array map %s[]\n", eeGetClassName(elemClsHnd));
+                }
+                else
+                {
+                    printf("     Array map %s[]\n", varTypeName(elemTyp));
+                }
+                printf("  fgCurHeapVN assigned:\n");
+            }
+#endif // DEBUG
+
+            ValueNum elemTypeVN = vnStore->VNForHandle(ssize_t(elemClsHnd), GTF_ICON_CLASS_HDL);
+            ValueNum uniqueVN   = vnStore->VNForExpr(entryBlock, TYP_REF);
+            newHeapVN           = vnStore->VNForMapStore(TYP_REF, newHeapVN, elemTypeVN, uniqueVN);
+        }
+    }
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("  Final heap state is $%x.\n", newHeapVN);
+    }
+#endif // DEBUG
+    return newHeapVN;
+}
+
+void Compiler::fgMutateHeap(GenTreePtr tree DEBUGARG(const char* msg))
+{
+    // bbHeapDef must be set to true for any block that Mutates the global Heap
+    assert(compCurBB->bbHeapDef);
+
+    fgCurHeapVN = vnStore->VNForExpr(compCurBB, TYP_REF);
+
+    // If we're tracking the heap SSA # caused by this node, record it.
+    fgValueNumberRecordHeapSsa(tree);
+
+#ifdef DEBUG
+    if (verbose)
+    {
+        printf("  fgCurHeapVN assigned by %s at ", msg);
+        Compiler::printTreeID(tree);
+        printf(" to new unique VN: " STR_VN "%x.\n", fgCurHeapVN);
+    }
+#endif // DEBUG
+}
+
+void Compiler::fgValueNumberRecordHeapSsa(GenTreePtr tree)
+{
+    unsigned ssaNum;
+    if (GetHeapSsaMap()->Lookup(tree, &ssaNum))
+    {
+        GetHeapPerSsaData(ssaNum)->m_vnPair.SetLiberal(fgCurHeapVN);
+#ifdef DEBUG
+        if (verbose)
+        {
+            printf("Node ");
+            Compiler::printTreeID(tree);
+            printf(" sets heap SSA # %d to VN $%x: ", ssaNum, fgCurHeapVN);
+            vnStore->vnDump(this, fgCurHeapVN);
+            printf("\n");
+        }
+#endif // DEBUG
+    }
+}
+
+// The input 'tree' is a leaf node that is a constant
+// Assign the proper value number to the tree
+void Compiler::fgValueNumberTreeConst(GenTreePtr tree)
+{
+    genTreeOps oper = tree->OperGet();
+    var_types  typ  = tree->TypeGet();
+    assert(GenTree::OperIsConst(oper));
+
+    switch (typ)
+    {
+        case TYP_LONG:
+        case TYP_ULONG:
+        case TYP_INT:
+        case TYP_UINT:
+        case TYP_CHAR:
+        case TYP_SHORT:
+        case TYP_BYTE:
+        case TYP_UBYTE:
+        case TYP_BOOL:
+            if (tree->IsCnsIntOrI() && tree->IsIconHandle())
+            {
+                tree->gtVNPair.SetBoth(
+                    vnStore->VNForHandle(ssize_t(tree->gtIntConCommon.IconValue()), tree->GetIconHandleFlag()));
+            }
+            else if ((typ == TYP_LONG) || (typ == TYP_ULONG))
+            {
+                tree->gtVNPair.SetBoth(vnStore->VNForLongCon(INT64(tree->gtIntConCommon.LngValue())));
+            }
+            else
+            {
+                tree->gtVNPair.SetBoth(vnStore->VNForIntCon(int(tree->gtIntConCommon.IconValue())));
+            }
+            break;
+
+        case TYP_FLOAT:
+            tree->gtVNPair.SetBoth(vnStore->VNForFloatCon((float)tree->gtDblCon.gtDconVal));
+            break;
+        case TYP_DOUBLE:
+            tree->gtVNPair.SetBoth(vnStore->VNForDoubleCon(tree->gtDblCon.gtDconVal));
+            break;
+        case TYP_REF:
+            // Null is the only constant.  (Except maybe for String?)
+            tree->gtVNPair.SetBoth(ValueNumStore::VNForNull());
+            break;
+
+        case TYP_BYREF:
+            if (tree->gtIntConCommon.IconValue() == 0)
+            {
+                tree->gtVNPair.SetBoth(ValueNumStore::VNForNull());
+            }
+            else
+            {
+                assert(tree->IsCnsIntOrI());
+
+                if (tree->IsIconHandle())
+                {
+                    tree->gtVNPair.SetBoth(
+                        vnStore->VNForHandle(ssize_t(tree->gtIntConCommon.IconValue()), tree->GetIconHandleFlag()));
+                }
+                else
+                {
+                    tree->gtVNPair.SetBoth(vnStore->VNForByrefCon(tree->gtIntConCommon.IconValue()));
+                }
+            }
+            break;
+
+        default:
+            unreached();
+    }
+}
+
+//------------------------------------------------------------------------
+// fgValueNumberBlockAssignment: Perform value numbering for block assignments.
+//
+// Arguments:
+//    tree          - the block assignment to be value numbered.
+//    evalAsgLhsInd - true iff we should value number the LHS of the assignment.
+//
+// Return Value:
+//    None.
+//
+// Assumptions:
+//    'tree' must be a block assignment (GT_INITBLK, GT_COPYBLK, GT_COPYOBJ).
+
+void Compiler::fgValueNumberBlockAssignment(GenTreePtr tree, bool evalAsgLhsInd)
+{
+    GenTree* lhs = tree->gtGetOp1();
+    GenTree* rhs = tree->gtGetOp2();
+#ifdef DEBUG
+    // Sometimes we query the heap ssa map, and need a dummy location for the ignored result.
+    unsigned heapSsaNum;
+#endif
+
+    if (tree->OperIsInitBlkOp())
+    {
+        GenTreeLclVarCommon* lclVarTree;
+        bool                 isEntire;
+
+        if (tree->DefinesLocal(this, &lclVarTree, &isEntire))
+        {
+            assert(lclVarTree->gtFlags & GTF_VAR_DEF);
+            // Should not have been recorded as updating the heap.
+            assert(!GetHeapSsaMap()->Lookup(tree, &heapSsaNum));
+
+            unsigned lclNum = lclVarTree->GetLclNum();
+
+            // Ignore vars that we excluded from SSA (for example, because they're address-exposed). They don't have
+            // SSA names in which to store VN's on defs.  We'll yield unique VN's when we read from them.
+            if (!fgExcludeFromSsa(lclNum))
+            {
+                unsigned lclDefSsaNum = GetSsaNumForLocalVarDef(lclVarTree);
+
+                ValueNum   initBlkVN = ValueNumStore::NoVN;
+                GenTreePtr initConst = rhs;
+                if (isEntire && initConst->OperGet() == GT_CNS_INT)
+                {
+                    unsigned initVal = 0xFF & (unsigned)initConst->AsIntConCommon()->IconValue();
+                    if (initVal == 0)
+                    {
+                        initBlkVN = vnStore->VNZeroForType(lclVarTree->TypeGet());
+                    }
+                }
+                ValueNum lclVarVN = (initBlkVN != ValueNumStore::NoVN)
+                                        ? initBlkVN
+                                        : vnStore->VNForExpr(compCurBB, var_types(lvaTable[lclNum].lvType));
+
+                lvaTable[lclNum].GetPerSsaData(lclDefSsaNum)->m_vnPair.SetBoth(lclVarVN);
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("N%03u ", tree->gtSeqNum);
+                    Compiler::printTreeID(tree);
+                    printf(" ");
+                    gtDispNodeName(tree);
+                    printf(" V%02u/%d => ", lclNum, lclDefSsaNum);
+                    vnPrint(lclVarVN, 1);
+                    printf("\n");
+                }
+#endif // DEBUG
+            }
+            // Initblock's are of type void.  Give them the void "value" -- they may occur in argument lists, which we
+            // want to be able to give VN's to.
+            tree->gtVNPair.SetBoth(ValueNumStore::VNForVoid());
+        }
+        else
+        {
+            // For now, arbitrary side effect on Heap.
+            // TODO-CQ: Why not be complete, and get this case right?
+            fgMutateHeap(tree DEBUGARG("INITBLK - non local"));
+        }
+    }
+    else
+    {
+        assert(tree->OperIsCopyBlkOp());
+        // TODO-Cleanup: We should factor things so that we uniformly rely on "PtrTo" VN's, and
+        // the heap cases can be shared with assignments.
+        GenTreeLclVarCommon* lclVarTree = nullptr;
+        bool                 isEntire   = false;
+        // Note that we don't care about exceptions here, since we're only using the values
+        // to perform an assignment (which happens after any exceptions are raised...)
+
+        if (tree->DefinesLocal(this, &lclVarTree, &isEntire))
+        {
+            // Should not have been recorded as updating the heap.
+            assert(!GetHeapSsaMap()->Lookup(tree, &heapSsaNum));
+
+            unsigned      lhsLclNum = lclVarTree->GetLclNum();
+            FieldSeqNode* lhsFldSeq = nullptr;
+            // If it's excluded from SSA, don't need to do anything.
+            if (!fgExcludeFromSsa(lhsLclNum))
+            {
+                unsigned lclDefSsaNum = GetSsaNumForLocalVarDef(lclVarTree);
+
+                if (lhs->IsLocalExpr(this, &lclVarTree, &lhsFldSeq) ||
+                    (lhs->OperIsBlk() && (lhs->AsBlk()->gtBlkSize == lvaLclSize(lhsLclNum))))
+                {
+                    noway_assert(lclVarTree->gtLclNum == lhsLclNum);
+                }
+                else
+                {
+                    GenTree* lhsAddr;
+                    if (lhs->OperIsBlk())
+                    {
+                        lhsAddr = lhs->AsBlk()->Addr();
+                    }
+                    else
+                    {
+                        assert(lhs->OperGet() == GT_IND);
+                        lhsAddr = lhs->gtOp.gtOp1;
+                    }
+                    // For addr-of-local expressions, lib/cons shouldn't matter.
+                    assert(lhsAddr->gtVNPair.BothEqual());
+                    ValueNum lhsAddrVN = lhsAddr->GetVN(VNK_Liberal);
+
+                    // Unpack the PtrToLoc value number of the address.
+                    assert(vnStore->IsVNFunc(lhsAddrVN));
+                    VNFuncApp lhsAddrFuncApp;
+                    vnStore->GetVNFunc(lhsAddrVN, &lhsAddrFuncApp);
+                    assert(lhsAddrFuncApp.m_func == VNF_PtrToLoc);
+                    assert(vnStore->IsVNConstant(lhsAddrFuncApp.m_args[0]) &&
+                           vnStore->ConstantValue<unsigned>(lhsAddrFuncApp.m_args[0]) == lhsLclNum);
+                    lhsFldSeq = vnStore->FieldSeqVNToFieldSeq(lhsAddrFuncApp.m_args[1]);
+                }
+
+                // Now we need to get the proper RHS.
+                GenTreeLclVarCommon* rhsLclVarTree = nullptr;
+                LclVarDsc*           rhsVarDsc     = nullptr;
+                FieldSeqNode*        rhsFldSeq     = nullptr;
+                ValueNumPair         rhsVNPair;
+                bool                 isNewUniq = false;
+                if (!rhs->OperIsIndir())
+                {
+                    if (rhs->IsLocalExpr(this, &rhsLclVarTree, &rhsFldSeq))
+                    {
+                        unsigned rhsLclNum = rhsLclVarTree->GetLclNum();
+                        rhsVarDsc          = &lvaTable[rhsLclNum];
+                        if (fgExcludeFromSsa(rhsLclNum) || rhsFldSeq == FieldSeqStore::NotAField())
+                        {
+                            rhsVNPair.SetBoth(vnStore->VNForExpr(compCurBB, rhsLclVarTree->TypeGet()));
+                            isNewUniq = true;
+                        }
+                        else
+                        {
+                            rhsVNPair = lvaTable[rhsLclVarTree->GetLclNum()]
+                                            .GetPerSsaData(rhsLclVarTree->GetSsaNum())
+                                            ->m_vnPair;
+                            var_types indType = rhsLclVarTree->TypeGet();
+
+                            rhsVNPair = vnStore->VNPairApplySelectors(rhsVNPair, rhsFldSeq, indType);
+                        }
+                    }
+                    else
+                    {
+                        rhsVNPair.SetBoth(vnStore->VNForExpr(compCurBB, rhs->TypeGet()));
+                        isNewUniq = true;
+                    }
+                }
+                else
+                {
+                    GenTreePtr srcAddr = rhs->AsIndir()->Addr();
+                    VNFuncApp  srcAddrFuncApp;
+                    if (srcAddr->IsLocalAddrExpr(this, &rhsLclVarTree, &rhsFldSeq))
+                    {
+                        unsigned rhsLclNum = rhsLclVarTree->GetLclNum();
+                        rhsVarDsc          = &lvaTable[rhsLclNum];
+                        if (fgExcludeFromSsa(rhsLclNum) || rhsFldSeq == FieldSeqStore::NotAField())
+                        {
+                            isNewUniq = true;
+                        }
+                        else
+                        {
+                            rhsVNPair = lvaTable[rhsLclVarTree->GetLclNum()]
+                                            .GetPerSsaData(rhsLclVarTree->GetSsaNum())
+                                            ->m_vnPair;
+                            var_types indType = rhsLclVarTree->TypeGet();
+
+                            rhsVNPair = vnStore->VNPairApplySelectors(rhsVNPair, rhsFldSeq, indType);
+                        }
+                    }
+                    else if (vnStore->GetVNFunc(vnStore->VNNormVal(srcAddr->gtVNPair.GetLiberal()), &srcAddrFuncApp))
+                    {
+                        if (srcAddrFuncApp.m_func == VNF_PtrToStatic)
+                        {
+                            var_types indType    = lclVarTree->TypeGet();
+                            ValueNum  fieldSeqVN = srcAddrFuncApp.m_args[0];
+
+                            FieldSeqNode* zeroOffsetFldSeq = nullptr;
+                            if (GetZeroOffsetFieldMap()->Lookup(srcAddr, &zeroOffsetFldSeq))
+                            {
+                                fieldSeqVN =
+                                    vnStore->FieldSeqVNAppend(fieldSeqVN, vnStore->VNForFieldSeq(zeroOffsetFldSeq));
+                            }
+
+                            FieldSeqNode* fldSeqForStaticVar = vnStore->FieldSeqVNToFieldSeq(fieldSeqVN);
+
+                            if (fldSeqForStaticVar != FieldSeqStore::NotAField())
+                            {
+                                // We model statics as indices into the heap variable.
+                                ValueNum selectedStaticVar;
+                                size_t   structSize = 0;
+                                selectedStaticVar   = vnStore->VNApplySelectors(VNK_Liberal, fgCurHeapVN,
+                                                                              fldSeqForStaticVar, &structSize);
+                                selectedStaticVar =
+                                    vnStore->VNApplySelectorsTypeCheck(selectedStaticVar, indType, structSize);
+
+                                rhsVNPair.SetLiberal(selectedStaticVar);
+                                rhsVNPair.SetConservative(vnStore->VNForExpr(compCurBB, indType));
+                            }
+                            else
+                            {
+                                JITDUMP("    *** Missing field sequence info for Src/RHS of COPYBLK\n");
+                                rhsVNPair.SetBoth(vnStore->VNForExpr(compCurBB, indType)); //  a new unique value number
+                            }
+                        }
+                        else if (srcAddrFuncApp.m_func == VNF_PtrToArrElem)
+                        {
+                            ValueNum elemLib =
+                                fgValueNumberArrIndexVal(nullptr, &srcAddrFuncApp, vnStore->VNForEmptyExcSet());
+                            rhsVNPair.SetLiberal(elemLib);
+                            rhsVNPair.SetConservative(vnStore->VNForExpr(compCurBB, lclVarTree->TypeGet()));
+                        }
+                        else
+                        {
+                            isNewUniq = true;
+                        }
+                    }
+                    else
+                    {
+                        isNewUniq = true;
+                    }
+                }
+
+                if (lhsFldSeq == FieldSeqStore::NotAField())
+                {
+                    // We don't have proper field sequence information for the lhs
+                    //
+                    JITDUMP("    *** Missing field sequence info for Dst/LHS of COPYBLK\n");
+                    isNewUniq = true;
+                }
+                else if (lhsFldSeq != nullptr && isEntire)
+                {
+                    // This can occur in for structs with one field, itself of a struct type.
+                    // We won't promote these.
+                    // TODO-Cleanup: decide what exactly to do about this.
+                    // Always treat them as maps, making them use/def, or reconstitute the
+                    // map view here?
+                    isNewUniq = true;
+                }
+                else if (!isNewUniq)
+                {
+                    ValueNumPair oldLhsVNPair = lvaTable[lhsLclNum].GetPerSsaData(lclVarTree->GetSsaNum())->m_vnPair;
+                    rhsVNPair                 = vnStore->VNPairApplySelectorsAssign(oldLhsVNPair, lhsFldSeq, rhsVNPair,
+                                                                    lclVarTree->TypeGet(), compCurBB);
+                }
+
+                if (isNewUniq)
+                {
+                    rhsVNPair.SetBoth(vnStore->VNForExpr(compCurBB, lclVarTree->TypeGet()));
+                }
+
+                lvaTable[lhsLclNum].GetPerSsaData(lclDefSsaNum)->m_vnPair = vnStore->VNPNormVal(rhsVNPair);
+
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("Tree ");
+                    Compiler::printTreeID(tree);
+                    printf(" assigned VN to local var V%02u/%d: ", lhsLclNum, lclDefSsaNum);
+                    if (isNewUniq)
+                    {
+                        printf("new uniq ");
+                    }
+                    vnpPrint(rhsVNPair, 1);
+                    printf("\n");
+                }
+#endif // DEBUG
+            }
+        }
+        else
+        {
+            // For now, arbitrary side effect on Heap.
+            // TODO-CQ: Why not be complete, and get this case right?
+            fgMutateHeap(tree DEBUGARG("COPYBLK - non local"));
+        }
+        // Copyblock's are of type void.  Give them the void "value" -- they may occur in argument lists, which we want
+        // to be able to give VN's to.
+        tree->gtVNPair.SetBoth(ValueNumStore::VNForVoid());
+    }
+}
+
+void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
+{
+    genTreeOps oper = tree->OperGet();
+
+#ifdef FEATURE_SIMD
+    // TODO-CQ: For now TYP_SIMD values are not handled by value numbering to be amenable for CSE'ing.
+    if (oper == GT_SIMD)
+    {
+        tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, TYP_UNKNOWN));
+        return;
+    }
+#endif
+
+    var_types typ = tree->TypeGet();
+    if (GenTree::OperIsConst(oper))
+    {
+        // If this is a struct assignment, with a constant rhs, it is an initBlk, and it is not
+        // really useful to value number the constant.
+        if (!varTypeIsStruct(tree))
+        {
+            fgValueNumberTreeConst(tree);
+        }
+    }
+    else if (GenTree::OperIsLeaf(oper))
+    {
+        switch (oper)
+        {
+            case GT_LCL_VAR:
+            case GT_REG_VAR:
+            {
+                GenTreeLclVarCommon* lcl    = tree->AsLclVarCommon();
+                unsigned             lclNum = lcl->gtLclNum;
+
+                if ((lcl->gtFlags & GTF_VAR_DEF) == 0 ||
+                    (lcl->gtFlags & GTF_VAR_USEASG)) // If it is a "pure" def, will handled as part of the assignment.
+                {
+                    LclVarDsc* varDsc = &lvaTable[lcl->gtLclNum];
+                    if (varDsc->lvPromoted && varDsc->lvFieldCnt == 1)
+                    {
+                        // If the promoted var has only one field var, treat like a use of the field var.
+                        lclNum = varDsc->lvFieldLclStart;
+                    }
+
+                    // Initialize to the undefined value, so we know whether we hit any of the cases here.
+                    lcl->gtVNPair = ValueNumPair();
+
+                    if (lcl->gtSsaNum == SsaConfig::RESERVED_SSA_NUM)
+                    {
+                        // Not an SSA variable.  Assign each occurrence a new, unique, VN.
+                        lcl->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, lcl->TypeGet()));
+                    }
+                    else
+                    {
+                        var_types    varType        = varDsc->TypeGet();
+                        ValueNumPair wholeLclVarVNP = varDsc->GetPerSsaData(lcl->gtSsaNum)->m_vnPair;
+
+                        // Check for mismatched LclVar size
+                        //
+                        unsigned typSize = genTypeSize(genActualType(typ));
+                        unsigned varSize = genTypeSize(genActualType(varType));
+
+                        if (typSize == varSize)
+                        {
+                            lcl->gtVNPair = wholeLclVarVNP;
+                        }
+                        else // mismatched LclVar definition and LclVar use size
+                        {
+                            if (typSize < varSize)
+                            {
+                                // the indirection is reading less that the whole LclVar
+                                // create a new VN that represent the partial value
+                                //
+                                ValueNumPair partialLclVarVNP = vnStore->VNPairForCast(wholeLclVarVNP, typ, varType);
+                                lcl->gtVNPair                 = partialLclVarVNP;
+                            }
+                            else
+                            {
+                                assert(typSize > varSize);
+                                // the indirection is reading beyond the end of the field
+                                //
+                                lcl->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, typ)); // return a new unique value
+                                                                                           // number
+                            }
+                        }
+                    }
+                    // Temporary, to make progress.
+                    // TODO-CQ: This should become an assert again...
+                    if (lcl->gtVNPair.GetLiberal() == ValueNumStore::NoVN)
+                    {
+                        assert(lcl->gtVNPair.GetConservative() == ValueNumStore::NoVN);
+
+                        // We don't want to fabricate arbitrary value numbers to things we can't reason about.
+                        // So far, we know about two of these cases:
+                        // Case 1) We have a local var who has never been defined but it's seen as a use.
+                        //         This is the case of storeIndir(addr(lclvar)) = expr.  In this case since we only
+                        //         take the address of the variable, this doesn't mean it's a use nor we have to
+                        //         initialize it, so in this very rare case, we fabricate a value number.
+                        // Case 2) Local variables that represent structs which are assigned using CpBlk.
+                        GenTree* nextNode = lcl->gtNext;
+                        assert((nextNode->gtOper == GT_ADDR && nextNode->gtOp.gtOp1 == lcl) ||
+                               varTypeIsStruct(lcl->TypeGet()));
+                        lcl->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, lcl->TypeGet()));
+                    }
+                    assert(lcl->gtVNPair.BothDefined());
+                }
+
+                // TODO-Review: For the short term, we have a workaround for copyblk/initblk.  Those that use
+                // addrSpillTemp will have a statement like "addrSpillTemp = addr(local)."  If we previously decided
+                // that this block operation defines the local, we will have labeled the "local" node as a DEF
+                // (or USEDEF).  This flag propogates to the "local" on the RHS.  So we'll assume that this is correct,
+                // and treat it as a def (to a new, unique VN).
+                else if ((lcl->gtFlags & GTF_VAR_DEF) != 0)
+                {
+                    LclVarDsc* varDsc = &lvaTable[lcl->gtLclNum];
+                    if (lcl->gtSsaNum != SsaConfig::RESERVED_SSA_NUM)
+                    {
+                        lvaTable[lclNum]
+                            .GetPerSsaData(lcl->gtSsaNum)
+                            ->m_vnPair.SetBoth(vnStore->VNForExpr(compCurBB, lcl->TypeGet()));
+                    }
+                    lcl->gtVNPair = ValueNumPair(); // Avoid confusion -- we don't set the VN of a lcl being defined.
+                }
+            }
+            break;
+
+            case GT_FTN_ADDR:
+                // Use the value of the function pointer (actually, a method handle.)
+                tree->gtVNPair.SetBoth(
+                    vnStore->VNForHandle(ssize_t(tree->gtFptrVal.gtFptrMethod), GTF_ICON_METHOD_HDL));
+                break;
+
+            // This group passes through a value from a child node.
+            case GT_RET_EXPR:
+                tree->SetVNsFromNode(tree->gtRetExpr.gtInlineCandidate);
+                break;
+
+            case GT_LCL_FLD:
+            {
+                GenTreeLclFld* lclFld = tree->AsLclFld();
+                assert(fgExcludeFromSsa(lclFld->GetLclNum()) || lclFld->gtFieldSeq != nullptr);
+                // If this is a (full) def, then the variable will be labeled with the new SSA number,
+                // which will not have a value.  We skip; it will be handled by one of the assignment-like
+                // forms (assignment, or initBlk or copyBlk).
+                if (((lclFld->gtFlags & GTF_VAR_DEF) == 0) || (lclFld->gtFlags & GTF_VAR_USEASG))
+                {
+                    unsigned   lclNum = lclFld->GetLclNum();
+                    unsigned   ssaNum = lclFld->GetSsaNum();
+                    LclVarDsc* varDsc = &lvaTable[lclNum];
+
+                    if (ssaNum == SsaConfig::UNINIT_SSA_NUM)
+                    {
+                        if (varDsc->GetPerSsaData(ssaNum)->m_vnPair.GetLiberal() == ValueNumStore::NoVN)
+                        {
+                            ValueNum vnForLcl                       = vnStore->VNForExpr(compCurBB, lclFld->TypeGet());
+                            varDsc->GetPerSsaData(ssaNum)->m_vnPair = ValueNumPair(vnForLcl, vnForLcl);
+                        }
+                    }
+
+                    var_types indType = tree->TypeGet();
+                    if (lclFld->gtFieldSeq == FieldSeqStore::NotAField() || fgExcludeFromSsa(lclFld->GetLclNum()))
+                    {
+                        // This doesn't represent a proper field access or it's a struct
+                        // with overlapping fields that is hard to reason about; return a new unique VN.
+                        tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, indType));
+                    }
+                    else
+                    {
+                        ValueNumPair lclVNPair = varDsc->GetPerSsaData(ssaNum)->m_vnPair;
+                        tree->gtVNPair         = vnStore->VNPairApplySelectors(lclVNPair, lclFld->gtFieldSeq, indType);
+                    }
+                }
+            }
+            break;
+
+            // The ones below here all get a new unique VN -- but for various reasons, explained after each.
+            case GT_CATCH_ARG:
+                // We know nothing about the value of a caught expression.
+                tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+                break;
+
+            case GT_CLS_VAR:
+                // Skip GT_CLS_VAR nodes that are the LHS of an assignment.  (We labeled these earlier.)
+                // We will "evaluate" this as part of the assignment.  (Unless we're explicitly told by
+                // the caller to evaluate anyway -- perhaps the assignment is an "op=" assignment.)
+                //
+                if (((tree->gtFlags & GTF_CLS_VAR_ASG_LHS) == 0) || evalAsgLhsInd)
+                {
+                    bool isVolatile = (tree->gtFlags & GTF_FLD_VOLATILE) != 0;
+
+                    if (isVolatile)
+                    {
+                        // For Volatile indirection, first mutate the global heap
+                        fgMutateHeap(tree DEBUGARG("GTF_FLD_VOLATILE - read"));
+                    }
+
+                    // We just mutate the heap if isVolatile is true, and then do the read as normal.
+                    //
+                    // This allows:
+                    //   1: read s;
+                    //   2: volatile read s;
+                    //   3: read s;
+                    //
+                    // We should never assume that the values read by 1 and 2 are the same (because the heap was mutated
+                    // in between them)... but we *should* be able to prove that the values read in 2 and 3 are the
+                    // same.
+                    //
+
+                    ValueNumPair clsVarVNPair;
+
+                    // If the static field handle is for a struct type field, then the value of the static
+                    // is a "ref" to the boxed struct -- treat it as the address of the static (we assume that a
+                    // first element offset will be added to get to the actual struct...)
+                    GenTreeClsVar* clsVar = tree->AsClsVar();
+                    FieldSeqNode*  fldSeq = clsVar->gtFieldSeq;
+                    assert(fldSeq != nullptr); // We need to have one.
+                    ValueNum selectedStaticVar = ValueNumStore::NoVN;
+                    if (gtIsStaticFieldPtrToBoxedStruct(clsVar->TypeGet(), fldSeq->m_fieldHnd))
+                    {
+                        clsVarVNPair.SetBoth(
+                            vnStore->VNForFunc(TYP_BYREF, VNF_PtrToStatic, vnStore->VNForFieldSeq(fldSeq)));
+                    }
+                    else
+                    {
+                        // This is a reference to heap memory.
+                        // We model statics as indices into the heap variable.
+
+                        FieldSeqNode* fldSeqForStaticVar =
+                            GetFieldSeqStore()->CreateSingleton(tree->gtClsVar.gtClsVarHnd);
+                        size_t structSize = 0;
+                        selectedStaticVar =
+                            vnStore->VNApplySelectors(VNK_Liberal, fgCurHeapVN, fldSeqForStaticVar, &structSize);
+                        selectedStaticVar =
+                            vnStore->VNApplySelectorsTypeCheck(selectedStaticVar, tree->TypeGet(), structSize);
+
+                        clsVarVNPair.SetLiberal(selectedStaticVar);
+                        // The conservative interpretation always gets a new, unique VN.
+                        clsVarVNPair.SetConservative(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+                    }
+
+                    // The ValueNum returned must represent the full-sized IL-Stack value
+                    // If we need to widen this value then we need to introduce a VNF_Cast here to represent
+                    // the widened value.    This is necessary since the CSE package can replace all occurances
+                    // of a given ValueNum with a LclVar that is a full-sized IL-Stack value
+                    //
+                    if (varTypeIsSmall(tree->TypeGet()))
+                    {
+                        var_types castToType = tree->TypeGet();
+                        clsVarVNPair         = vnStore->VNPairForCast(clsVarVNPair, castToType, castToType);
+                    }
+                    tree->gtVNPair = clsVarVNPair;
+                }
+                break;
+
+            case GT_MEMORYBARRIER: // Leaf
+                // For MEMORYBARRIER add an arbitrary side effect on Heap.
+                fgMutateHeap(tree DEBUGARG("MEMORYBARRIER"));
+                break;
+
+            // These do not represent values.
+            case GT_NO_OP:
+            case GT_JMP:   // Control flow
+            case GT_LABEL: // Control flow
+#if !FEATURE_EH_FUNCLETS
+            case GT_END_LFIN: // Control flow
+#endif
+            case GT_ARGPLACE:
+                // This node is a standin for an argument whose value will be computed later.  (Perhaps it's
+                // a register argument, and we don't want to preclude use of the register in arg evaluation yet.)
+                // We give this a "fake" value number now; if the call in which it occurs cares about the
+                // value (e.g., it's a helper call whose result is a function of argument values) we'll reset
+                // this later, when the later args have been assigned VNs.
+                tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+                break;
+
+            case GT_PHI_ARG:
+                // This one is special because we should never process it in this method: it should
+                // always be taken care of, when needed, during pre-processing of a blocks phi definitions.
+                assert(false);
+                break;
+
+            default:
+                unreached();
+        }
+    }
+    else if (GenTree::OperIsSimple(oper))
+    {
+#ifdef DEBUG
+        // Sometimes we query the heap ssa map, and need a dummy location for the ignored result.
+        unsigned heapSsaNum;
+#endif
+
+        if (GenTree::OperIsAssignment(oper) && !varTypeIsStruct(tree))
+        {
+
+            GenTreePtr lhs = tree->gtOp.gtOp1;
+            GenTreePtr rhs = tree->gtOp.gtOp2;
+
+            ValueNumPair rhsVNPair;
+            if (oper == GT_ASG)
+            {
+                rhsVNPair = rhs->gtVNPair;
+            }
+            else // Must be an "op="
+            {
+                // If the LHS is an IND, we didn't evaluate it when we visited it previously.
+                // But we didn't know that the parent was an op=.  We do now, so go back and evaluate it.
+                // (We actually check if the effective val is the IND.  We will have evaluated any non-last
+                // args of an LHS comma already -- including their heap effects.)
+                GenTreePtr lhsVal = lhs->gtEffectiveVal(/*commaOnly*/ true);
+                if (lhsVal->OperIsIndir() || (lhsVal->OperGet() == GT_CLS_VAR))
+                {
+                    fgValueNumberTree(lhsVal, /*evalAsgLhsInd*/ true);
+                }
+                // Now we can make this assertion:
+                assert(lhsVal->gtVNPair.BothDefined());
+                genTreeOps op = GenTree::OpAsgToOper(oper);
+                if (GenTree::OperIsBinary(op))
+                {
+                    ValueNumPair lhsNormVNP;
+                    ValueNumPair lhsExcVNP;
+                    lhsExcVNP.SetBoth(ValueNumStore::VNForEmptyExcSet());
+                    vnStore->VNPUnpackExc(lhsVal->gtVNPair, &lhsNormVNP, &lhsExcVNP);
+                    assert(rhs->gtVNPair.BothDefined());
+                    ValueNumPair rhsNormVNP;
+                    ValueNumPair rhsExcVNP;
+                    rhsExcVNP.SetBoth(ValueNumStore::VNForEmptyExcSet());
+                    vnStore->VNPUnpackExc(rhs->gtVNPair, &rhsNormVNP, &rhsExcVNP);
+                    rhsVNPair = vnStore->VNPWithExc(vnStore->VNPairForFunc(tree->TypeGet(),
+                                                                           GetVNFuncForOper(op, (tree->gtFlags &
+                                                                                                 GTF_UNSIGNED) != 0),
+                                                                           lhsNormVNP, rhsNormVNP),
+                                                    vnStore->VNPExcSetUnion(lhsExcVNP, rhsExcVNP));
+                }
+                else
+                {
+                    // As of now, GT_CHS ==> GT_NEG is the only pattern fitting this.
+                    assert(GenTree::OperIsUnary(op));
+                    ValueNumPair lhsNormVNP;
+                    ValueNumPair lhsExcVNP;
+                    lhsExcVNP.SetBoth(ValueNumStore::VNForEmptyExcSet());
+                    vnStore->VNPUnpackExc(lhsVal->gtVNPair, &lhsNormVNP, &lhsExcVNP);
+                    rhsVNPair = vnStore->VNPWithExc(vnStore->VNPairForFunc(tree->TypeGet(),
+                                                                           GetVNFuncForOper(op, (tree->gtFlags &
+                                                                                                 GTF_UNSIGNED) != 0),
+                                                                           lhsNormVNP),
+                                                    lhsExcVNP);
+                }
+            }
+            if (tree->TypeGet() != TYP_VOID)
+            {
+                // Assignment operators, as expressions, return the value of the RHS.
+                tree->gtVNPair = rhsVNPair;
+            }
+
+            // Now that we've labeled the assignment as a whole, we don't care about exceptions.
+            rhsVNPair = vnStore->VNPNormVal(rhsVNPair);
+
+            // If the types of the rhs and lhs are different then we
+            //  may want to change the ValueNumber assigned to the lhs.
+            //
+            if (rhs->TypeGet() != lhs->TypeGet())
+            {
+                if (rhs->TypeGet() == TYP_REF)
+                {
+                    // If we have an unsafe IL assignment of a TYP_REF to a non-ref (typically a TYP_BYREF)
+                    // then don't propagate this ValueNumber to the lhs, instead create a new unique VN
+                    //
+                    rhsVNPair.SetBoth(vnStore->VNForExpr(compCurBB, lhs->TypeGet()));
+                }
+            }
+
+            // We have to handle the case where the LHS is a comma.  In that case, we don't evaluate the comma,
+            // so we give it VNForVoid, and we're really interested in the effective value.
+            GenTreePtr lhsCommaIter = lhs;
+            while (lhsCommaIter->OperGet() == GT_COMMA)
+            {
+                lhsCommaIter->gtVNPair.SetBoth(vnStore->VNForVoid());
+                lhsCommaIter = lhsCommaIter->gtOp.gtOp2;
+            }
+            lhs = lhs->gtEffectiveVal();
+
+            // Now, record the new VN for an assignment (performing the indicated "state update").
+            // It's safe to use gtEffectiveVal here, because the non-last elements of a comma list on the
+            // LHS will come before the assignment in evaluation order.
+            switch (lhs->OperGet())
+            {
+                case GT_LCL_VAR:
+                case GT_REG_VAR:
+                {
+                    GenTreeLclVarCommon* lcl          = lhs->AsLclVarCommon();
+                    unsigned             lclDefSsaNum = GetSsaNumForLocalVarDef(lcl);
+
+                    // Should not have been recorded as updating the heap.
+                    assert(!GetHeapSsaMap()->Lookup(tree, &heapSsaNum));
+
+                    if (lclDefSsaNum != SsaConfig::RESERVED_SSA_NUM)
+                    {
+                        assert(rhsVNPair.GetLiberal() != ValueNumStore::NoVN);
+
+                        lhs->gtVNPair                                                 = rhsVNPair;
+                        lvaTable[lcl->gtLclNum].GetPerSsaData(lclDefSsaNum)->m_vnPair = rhsVNPair;
+
+#ifdef DEBUG
+                        if (verbose)
+                        {
+                            printf("N%03u ", lhs->gtSeqNum);
+                            Compiler::printTreeID(lhs);
+                            printf(" ");
+                            gtDispNodeName(lhs);
+                            gtDispLeaf(lhs, nullptr);
+                            printf(" => ");
+                            vnpPrint(lhs->gtVNPair, 1);
+                            printf("\n");
+                        }
+#endif // DEBUG
+                    }
+#ifdef DEBUG
+                    else
+                    {
+                        if (verbose)
+                        {
+                            JITDUMP("Tree ");
+                            Compiler::printTreeID(tree);
+                            printf(" assigns to local var V%02u; excluded from SSA, so value not tracked.\n",
+                                   lcl->GetLclNum());
+                        }
+                    }
+#endif // DEBUG
+                }
+                break;
+                case GT_LCL_FLD:
+                {
+                    GenTreeLclFld* lclFld       = lhs->AsLclFld();
+                    unsigned       lclDefSsaNum = GetSsaNumForLocalVarDef(lclFld);
+
+                    // Should not have been recorded as updating the heap.
+                    assert(!GetHeapSsaMap()->Lookup(tree, &heapSsaNum));
+
+                    if (lclDefSsaNum != SsaConfig::RESERVED_SSA_NUM)
+                    {
+                        ValueNumPair newLhsVNPair;
+                        // Is this a full definition?
+                        if ((lclFld->gtFlags & GTF_VAR_USEASG) == 0)
+                        {
+                            assert(!lclFld->IsPartialLclFld(this));
+                            assert(rhsVNPair.GetLiberal() != ValueNumStore::NoVN);
+                            newLhsVNPair = rhsVNPair;
+                        }
+                        else
+                        {
+                            // We should never have a null field sequence here.
+                            assert(lclFld->gtFieldSeq != nullptr);
+                            if (lclFld->gtFieldSeq == FieldSeqStore::NotAField())
+                            {
+                                // We don't know what field this represents.  Assign a new VN to the whole variable
+                                // (since we may be writing to an unknown portion of it.)
+                                newLhsVNPair.SetBoth(vnStore->VNForExpr(compCurBB, lvaGetActualType(lclFld->gtLclNum)));
+                            }
+                            else
+                            {
+                                // We do know the field sequence.
+                                // The "lclFld" node will be labeled with the SSA number of its "use" identity
+                                // (we looked in a side table above for its "def" identity).  Look up that value.
+                                ValueNumPair oldLhsVNPair =
+                                    lvaTable[lclFld->GetLclNum()].GetPerSsaData(lclFld->GetSsaNum())->m_vnPair;
+                                newLhsVNPair =
+                                    vnStore->VNPairApplySelectorsAssign(oldLhsVNPair, lclFld->gtFieldSeq,
+                                                                        rhsVNPair, // Pre-value.
+                                                                        lvaGetActualType(lclFld->gtLclNum), compCurBB);
+                            }
+                        }
+                        lvaTable[lclFld->GetLclNum()].GetPerSsaData(lclDefSsaNum)->m_vnPair = newLhsVNPair;
+                        lhs->gtVNPair                                                       = newLhsVNPair;
+#ifdef DEBUG
+                        if (verbose)
+                        {
+                            if (lhs->gtVNPair.GetLiberal() != ValueNumStore::NoVN)
+                            {
+                                printf("N%03u ", lhs->gtSeqNum);
+                                Compiler::printTreeID(lhs);
+                                printf(" ");
+                                gtDispNodeName(lhs);
+                                gtDispLeaf(lhs, nullptr);
+                                printf(" => ");
+                                vnpPrint(lhs->gtVNPair, 1);
+                                printf("\n");
+                            }
+                        }
+#endif // DEBUG
+                    }
+                }
+                break;
+
+                case GT_PHI_ARG:
+                    assert(false); // Phi arg cannot be LHS.
+
+                case GT_BLK:
+                case GT_OBJ:
+                case GT_IND:
+                {
+                    bool isVolatile = (lhs->gtFlags & GTF_IND_VOLATILE) != 0;
+
+                    if (isVolatile)
+                    {
+                        // For Volatile store indirection, first mutate the global heap
+                        fgMutateHeap(lhs DEBUGARG("GTF_IND_VOLATILE - store"));
+                        tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, lhs->TypeGet()));
+                    }
+
+                    GenTreePtr arg = lhs->gtOp.gtOp1;
+
+                    // Indicates whether the argument of the IND is the address of a local.
+                    bool wasLocal = false;
+
+                    lhs->gtVNPair = rhsVNPair;
+
+                    VNFuncApp funcApp;
+                    ValueNum  argVN = arg->gtVNPair.GetLiberal();
+
+                    bool argIsVNFunc = vnStore->GetVNFunc(vnStore->VNNormVal(argVN), &funcApp);
+
+                    // Is this an assignment to a (field of, perhaps) a local?
+                    // If it is a PtrToLoc, lib and cons VNs will be the same.
+                    if (argIsVNFunc)
+                    {
+                        IndirectAssignmentAnnotation* pIndirAnnot =
+                            nullptr; // This will be used if "tree" is an "indirect assignment",
+                                     // explained below.
+                        if (funcApp.m_func == VNF_PtrToLoc)
+                        {
+                            assert(arg->gtVNPair.BothEqual()); // If it's a PtrToLoc, lib/cons shouldn't differ.
+                            assert(vnStore->IsVNConstant(funcApp.m_args[0]));
+                            unsigned lclNum = vnStore->ConstantValue<unsigned>(funcApp.m_args[0]);
+
+                            wasLocal = true;
+
+                            if (!fgExcludeFromSsa(lclNum))
+                            {
+                                FieldSeqNode* fieldSeq = vnStore->FieldSeqVNToFieldSeq(funcApp.m_args[1]);
+
+                                // Either "arg" is the address of (part of) a local itself, or the assignment is an
+                                // "indirect assignment", where an outer comma expression assigned the address of a
+                                // local to a temp, and that temp is our lhs, and we recorded this in a table when we
+                                // made the indirect assignment...or else we have a "rogue" PtrToLoc, one that should
+                                // have made the local in question address-exposed.  Assert on that.
+                                GenTreeLclVarCommon* lclVarTree   = nullptr;
+                                bool                 isEntire     = false;
+                                unsigned             lclDefSsaNum = SsaConfig::RESERVED_SSA_NUM;
+                                ValueNumPair         newLhsVNPair;
+
+                                if (arg->DefinesLocalAddr(this, genTypeSize(lhs->TypeGet()), &lclVarTree, &isEntire))
+                                {
+                                    // The local #'s should agree.
+                                    assert(lclNum == lclVarTree->GetLclNum());
+
+                                    if (fieldSeq == FieldSeqStore::NotAField())
+                                    {
+                                        // We don't know where we're storing, so give the local a new, unique VN.
+                                        // Do this by considering it an "entire" assignment, with an unknown RHS.
+                                        isEntire = true;
+                                        rhsVNPair.SetBoth(vnStore->VNForExpr(compCurBB, lclVarTree->TypeGet()));
+                                    }
+
+                                    if (isEntire)
+                                    {
+                                        newLhsVNPair = rhsVNPair;
+                                        lclDefSsaNum = lclVarTree->GetSsaNum();
+                                    }
+                                    else
+                                    {
+                                        // Don't use the lclVarTree's VN: if it's a local field, it will
+                                        // already be dereferenced by it's field sequence.
+                                        ValueNumPair oldLhsVNPair = lvaTable[lclVarTree->GetLclNum()]
+                                                                        .GetPerSsaData(lclVarTree->GetSsaNum())
+                                                                        ->m_vnPair;
+                                        lclDefSsaNum = GetSsaNumForLocalVarDef(lclVarTree);
+                                        newLhsVNPair =
+                                            vnStore->VNPairApplySelectorsAssign(oldLhsVNPair, fieldSeq, rhsVNPair,
+                                                                                lhs->TypeGet(), compCurBB);
+                                    }
+                                    lvaTable[lclNum].GetPerSsaData(lclDefSsaNum)->m_vnPair = newLhsVNPair;
+                                }
+                                else if (m_indirAssignMap != nullptr && GetIndirAssignMap()->Lookup(tree, &pIndirAnnot))
+                                {
+                                    // The local #'s should agree.
+                                    assert(lclNum == pIndirAnnot->m_lclNum);
+                                    assert(pIndirAnnot->m_defSsaNum != SsaConfig::RESERVED_SSA_NUM);
+                                    lclDefSsaNum = pIndirAnnot->m_defSsaNum;
+                                    // Does this assignment write the entire width of the local?
+                                    if (genTypeSize(lhs->TypeGet()) == genTypeSize(var_types(lvaTable[lclNum].lvType)))
+                                    {
+                                        assert(pIndirAnnot->m_useSsaNum == SsaConfig::RESERVED_SSA_NUM);
+                                        assert(pIndirAnnot->m_isEntire);
+                                        newLhsVNPair = rhsVNPair;
+                                    }
+                                    else
+                                    {
+                                        assert(pIndirAnnot->m_useSsaNum != SsaConfig::RESERVED_SSA_NUM);
+                                        assert(!pIndirAnnot->m_isEntire);
+                                        assert(pIndirAnnot->m_fieldSeq == fieldSeq);
+                                        ValueNumPair oldLhsVNPair =
+                                            lvaTable[lclNum].GetPerSsaData(pIndirAnnot->m_useSsaNum)->m_vnPair;
+                                        newLhsVNPair =
+                                            vnStore->VNPairApplySelectorsAssign(oldLhsVNPair, fieldSeq, rhsVNPair,
+                                                                                lhs->TypeGet(), compCurBB);
+                                    }
+                                    lvaTable[lclNum].GetPerSsaData(lclDefSsaNum)->m_vnPair = newLhsVNPair;
+                                }
+                                else
+                                {
+                                    unreached(); // "Rogue" PtrToLoc, as discussed above.
+                                }
+#ifdef DEBUG
+                                if (verbose)
+                                {
+                                    printf("Tree ");
+                                    Compiler::printTreeID(tree);
+                                    printf(" assigned VN to local var V%02u/%d: VN ", lclNum, lclDefSsaNum);
+                                    vnpPrint(newLhsVNPair, 1);
+                                    printf("\n");
+                                }
+#endif // DEBUG
+                            }
+                        }
+                    }
+
+                    // Was the argument of the GT_IND the address of a local, handled above?
+                    if (!wasLocal)
+                    {
+                        GenTreePtr    obj          = nullptr;
+                        GenTreePtr    staticOffset = nullptr;
+                        FieldSeqNode* fldSeq       = nullptr;
+
+                        // Is the LHS an array index expression?
+                        if (argIsVNFunc && funcApp.m_func == VNF_PtrToArrElem)
+                        {
+                            CORINFO_CLASS_HANDLE elemTypeEq =
+                                CORINFO_CLASS_HANDLE(vnStore->ConstantValue<ssize_t>(funcApp.m_args[0]));
+                            ValueNum      arrVN  = funcApp.m_args[1];
+                            ValueNum      inxVN  = funcApp.m_args[2];
+                            FieldSeqNode* fldSeq = vnStore->FieldSeqVNToFieldSeq(funcApp.m_args[3]);
+
+                            // Does the child of the GT_IND 'arg' have an associated zero-offset field sequence?
+                            FieldSeqNode* addrFieldSeq = nullptr;
+                            if (GetZeroOffsetFieldMap()->Lookup(arg, &addrFieldSeq))
+                            {
+                                fldSeq = GetFieldSeqStore()->Append(addrFieldSeq, fldSeq);
+                            }
+
+#ifdef DEBUG
+                            if (verbose)
+                            {
+                                printf("Tree ");
+                                Compiler::printTreeID(tree);
+                                printf(" assigns to an array element:\n");
+                            }
+#endif // DEBUG
+
+                            fgValueNumberArrIndexAssign(elemTypeEq, arrVN, inxVN, fldSeq, rhsVNPair.GetLiberal(),
+                                                        lhs->TypeGet());
+                            fgValueNumberRecordHeapSsa(tree);
+                        }
+                        // It may be that we haven't parsed it yet.  Try.
+                        else if (lhs->gtFlags & GTF_IND_ARR_INDEX)
+                        {
+                            ArrayInfo arrInfo;
+                            bool      b = GetArrayInfoMap()->Lookup(lhs, &arrInfo);
+                            assert(b);
+                            ValueNum      arrVN  = ValueNumStore::NoVN;
+                            ValueNum      inxVN  = ValueNumStore::NoVN;
+                            FieldSeqNode* fldSeq = nullptr;
+
+                            // Try to parse it.
+                            GenTreePtr arr = nullptr;
+                            arg->ParseArrayAddress(this, &arrInfo, &arr, &inxVN, &fldSeq);
+                            if (arr == nullptr)
+                            {
+                                fgMutateHeap(tree DEBUGARG("assignment to unparseable array expression"));
+                                return;
+                            }
+                            // Otherwise, parsing succeeded.
+
+                            // Need to form H[arrType][arr][ind][fldSeq] = rhsVNPair.GetLiberal()
+
+                            // Get the element type equivalence class representative.
+                            CORINFO_CLASS_HANDLE elemTypeEq =
+                                EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
+                            arrVN = arr->gtVNPair.GetLiberal();
+
+                            FieldSeqNode* zeroOffsetFldSeq = nullptr;
+                            if (GetZeroOffsetFieldMap()->Lookup(arg, &zeroOffsetFldSeq))
+                            {
+                                fldSeq = GetFieldSeqStore()->Append(fldSeq, zeroOffsetFldSeq);
+                            }
+
+                            fgValueNumberArrIndexAssign(elemTypeEq, arrVN, inxVN, fldSeq, rhsVNPair.GetLiberal(),
+                                                        lhs->TypeGet());
+                            fgValueNumberRecordHeapSsa(tree);
+                        }
+                        else if (arg->IsFieldAddr(this, &obj, &staticOffset, &fldSeq))
+                        {
+                            if (fldSeq == FieldSeqStore::NotAField())
+                            {
+                                fgMutateHeap(tree DEBUGARG("NotAField"));
+                            }
+                            else
+                            {
+                                assert(fldSeq != nullptr);
+#ifdef DEBUG
+                                CORINFO_CLASS_HANDLE fldCls = info.compCompHnd->getFieldClass(fldSeq->m_fieldHnd);
+                                if (obj != nullptr)
+                                {
+                                    // Make sure that the class containing it is not a value class (as we are expecting
+                                    // an instance field)
+                                    assert((info.compCompHnd->getClassAttribs(fldCls) & CORINFO_FLG_VALUECLASS) == 0);
+                                    assert(staticOffset == nullptr);
+                                }
+#endif // DEBUG
+                                // Get the first (instance or static) field from field seq.  Heap[field] will yield the
+                                // "field map".
+                                if (fldSeq->IsFirstElemFieldSeq())
+                                {
+                                    fldSeq = fldSeq->m_next;
+                                    assert(fldSeq != nullptr);
+                                }
+
+                                // Get a field sequence for just the first field in the sequence
+                                //
+                                FieldSeqNode* firstFieldOnly = GetFieldSeqStore()->CreateSingleton(fldSeq->m_fieldHnd);
+
+                                // The final field in the sequence will need to match the 'indType'
+                                var_types indType = lhs->TypeGet();
+                                ValueNum fldMapVN = vnStore->VNApplySelectors(VNK_Liberal, fgCurHeapVN, firstFieldOnly);
+
+                                // The type of the field is "struct" if there are more fields in the sequence,
+                                // otherwise it is the type returned from VNApplySelectors above.
+                                var_types firstFieldType = vnStore->TypeOfVN(fldMapVN);
+
+                                ValueNum storeVal =
+                                    rhsVNPair.GetLiberal(); // The value number from the rhs of the assignment
+                                ValueNum newFldMapVN = ValueNumStore::NoVN;
+
+                                // when (obj != nullptr) we have an instance field, otherwise a static field
+                                // when (staticOffset != nullptr) it represents a offset into a static or the call to
+                                // Shared Static Base
+                                if ((obj != nullptr) || (staticOffset != nullptr))
+                                {
+                                    ValueNum valAtAddr = fldMapVN;
+                                    ValueNum normVal   = ValueNumStore::NoVN;
+
+                                    if (obj != nullptr)
+                                    {
+                                        // construct the ValueNumber for 'fldMap at obj'
+                                        normVal = vnStore->VNNormVal(obj->GetVN(VNK_Liberal));
+                                        valAtAddr =
+                                            vnStore->VNForMapSelect(VNK_Liberal, firstFieldType, fldMapVN, normVal);
+                                    }
+                                    else // (staticOffset != nullptr)
+                                    {
+                                        // construct the ValueNumber for 'fldMap at staticOffset'
+                                        normVal = vnStore->VNNormVal(staticOffset->GetVN(VNK_Liberal));
+                                        valAtAddr =
+                                            vnStore->VNForMapSelect(VNK_Liberal, firstFieldType, fldMapVN, normVal);
+                                    }
+                                    // Now get rid of any remaining struct field dereferences. (if they exist)
+                                    if (fldSeq->m_next)
+                                    {
+                                        storeVal =
+                                            vnStore->VNApplySelectorsAssign(VNK_Liberal, valAtAddr, fldSeq->m_next,
+                                                                            storeVal, indType, compCurBB);
+                                    }
+
+                                    // From which we can construct the new ValueNumber for 'fldMap at normVal'
+                                    newFldMapVN = vnStore->VNForMapStore(vnStore->TypeOfVN(fldMapVN), fldMapVN, normVal,
+                                                                         storeVal);
+                                }
+                                else
+                                {
+                                    // plain static field
+
+                                    // Now get rid of any remaining struct field dereferences. (if they exist)
+                                    if (fldSeq->m_next)
+                                    {
+                                        storeVal =
+                                            vnStore->VNApplySelectorsAssign(VNK_Liberal, fldMapVN, fldSeq->m_next,
+                                                                            storeVal, indType, compCurBB);
+                                    }
+
+                                    newFldMapVN = vnStore->VNApplySelectorsAssign(VNK_Liberal, fgCurHeapVN, fldSeq,
+                                                                                  storeVal, indType, compCurBB);
+                                }
+
+                                // It is not strictly necessary to set the lhs value number,
+                                // but the dumps read better with it set to the 'storeVal' that we just computed
+                                lhs->gtVNPair.SetBoth(storeVal);
+
+#ifdef DEBUG
+                                if (verbose)
+                                {
+                                    printf("  fgCurHeapVN assigned:\n");
+                                }
+#endif // DEBUG
+                                // bbHeapDef must be set to true for any block that Mutates the global Heap
+                                assert(compCurBB->bbHeapDef);
+
+                                // Update the field map for firstField in Heap to this new value.
+                                fgCurHeapVN = vnStore->VNApplySelectorsAssign(VNK_Liberal, fgCurHeapVN, firstFieldOnly,
+                                                                              newFldMapVN, indType, compCurBB);
+
+                                fgValueNumberRecordHeapSsa(tree);
+                            }
+                        }
+                        else
+                        {
+                            GenTreeLclVarCommon* dummyLclVarTree = nullptr;
+                            if (!tree->DefinesLocal(this, &dummyLclVarTree))
+                            {
+                                // If it doesn't define a local, then it might update the heap.
+                                fgMutateHeap(tree DEBUGARG("assign-of-IND"));
+                            }
+                        }
+                    }
+
+                    // We don't actually evaluate an IND on the LHS, so give it the Void value.
+                    tree->gtVNPair.SetBoth(vnStore->VNForVoid());
+                }
+                break;
+
+                case GT_CLS_VAR:
+                {
+                    bool isVolatile = (lhs->gtFlags & GTF_FLD_VOLATILE) != 0;
+
+                    if (isVolatile)
+                    {
+                        // For Volatile store indirection, first mutate the global heap
+                        fgMutateHeap(lhs DEBUGARG("GTF_CLS_VAR - store")); // always change fgCurHeapVN
+                    }
+
+                    // We model statics as indices into the heap variable.
+                    FieldSeqNode* fldSeqForStaticVar = GetFieldSeqStore()->CreateSingleton(lhs->gtClsVar.gtClsVarHnd);
+                    assert(fldSeqForStaticVar != FieldSeqStore::NotAField());
+
+                    ValueNum storeVal = rhsVNPair.GetLiberal(); // The value number from the rhs of the assignment
+                    storeVal = vnStore->VNApplySelectorsAssign(VNK_Liberal, fgCurHeapVN, fldSeqForStaticVar, storeVal,
+                                                               lhs->TypeGet(), compCurBB);
+
+                    // It is not strictly necessary to set the lhs value number,
+                    // but the dumps read better with it set to the 'storeVal' that we just computed
+                    lhs->gtVNPair.SetBoth(storeVal);
+#ifdef DEBUG
+                    if (verbose)
+                    {
+                        printf("  fgCurHeapVN assigned:\n");
+                    }
+#endif // DEBUG
+                    // bbHeapDef must be set to true for any block that Mutates the global Heap
+                    assert(compCurBB->bbHeapDef);
+
+                    // Update the field map for the fgCurHeapVN
+                    fgCurHeapVN = storeVal;
+                    fgValueNumberRecordHeapSsa(tree);
+                }
+                break;
+
+                default:
+                    assert(!"Unknown node for lhs of assignment!");
+
+                    // For Unknown stores, mutate the global heap
+                    fgMutateHeap(lhs DEBUGARG("Unkwown Assignment - store")); // always change fgCurHeapVN
+                    break;
+            }
+        }
+        // Other kinds of assignment: initblk and copyblk.
+        else if (oper == GT_ASG && varTypeIsStruct(tree))
+        {
+            fgValueNumberBlockAssignment(tree, evalAsgLhsInd);
+        }
+        else if (oper == GT_ADDR)
+        {
+            // We have special representations for byrefs to lvalues.
+            GenTreePtr arg = tree->gtOp.gtOp1;
+            if (arg->OperIsLocal())
+            {
+                FieldSeqNode* fieldSeq = nullptr;
+                ValueNum      newVN    = ValueNumStore::NoVN;
+                if (fgExcludeFromSsa(arg->gtLclVarCommon.GetLclNum()))
+                {
+                    newVN = vnStore->VNForExpr(compCurBB, TYP_BYREF);
+                }
+                else if (arg->OperGet() == GT_LCL_FLD)
+                {
+                    fieldSeq = arg->AsLclFld()->gtFieldSeq;
+                    if (fieldSeq == nullptr)
+                    {
+                        // Local field with unknown field seq -- not a precise pointer.
+                        newVN = vnStore->VNForExpr(compCurBB, TYP_BYREF);
+                    }
+                }
+                if (newVN == ValueNumStore::NoVN)
+                {
+                    assert(arg->gtLclVarCommon.GetSsaNum() != ValueNumStore::NoVN);
+                    newVN = vnStore->VNForPtrToLoc(TYP_BYREF, vnStore->VNForIntCon(arg->gtLclVarCommon.GetLclNum()),
+                                                   vnStore->VNForFieldSeq(fieldSeq));
+                }
+                tree->gtVNPair.SetBoth(newVN);
+            }
+            else if ((arg->gtOper == GT_IND) || arg->OperIsBlk())
+            {
+                // Usually the ADDR and IND just cancel out...
+                // except when this GT_ADDR has a valid zero-offset field sequence
+                //
+                FieldSeqNode* zeroOffsetFieldSeq = nullptr;
+                if (GetZeroOffsetFieldMap()->Lookup(tree, &zeroOffsetFieldSeq) &&
+                    (zeroOffsetFieldSeq != FieldSeqStore::NotAField()))
+                {
+                    ValueNum addrExtended = vnStore->ExtendPtrVN(arg->gtOp.gtOp1, zeroOffsetFieldSeq);
+                    if (addrExtended != ValueNumStore::NoVN)
+                    {
+                        tree->gtVNPair.SetBoth(addrExtended); // We don't care about lib/cons differences for addresses.
+                    }
+                    else
+                    {
+                        // ExtendPtrVN returned a failure result
+                        // So give this address a new unique value
+                        tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, TYP_BYREF));
+                    }
+                }
+                else
+                {
+                    // They just cancel, so fetch the ValueNumber from the op1 of the GT_IND node.
+                    //
+                    GenTree* addr  = arg->AsIndir()->Addr();
+                    tree->gtVNPair = addr->gtVNPair;
+
+                    // For the CSE phase mark the address as GTF_DONT_CSE
+                    // because it will end up with the same value number as tree (the GT_ADDR).
+                    addr->gtFlags |= GTF_DONT_CSE;
+                }
+            }
+            else
+            {
+                // May be more cases to do here!  But we'll punt for now.
+                tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, TYP_BYREF));
+            }
+        }
+        else if ((oper == GT_IND) || GenTree::OperIsBlk(oper))
+        {
+            // So far, we handle cases in which the address is a ptr-to-local, or if it's
+            // a pointer to an object field.
+            GenTreePtr           addr         = tree->AsIndir()->Addr();
+            GenTreeLclVarCommon* lclVarTree   = nullptr;
+            FieldSeqNode*        fldSeq1      = nullptr;
+            FieldSeqNode*        fldSeq2      = nullptr;
+            GenTreePtr           obj          = nullptr;
+            GenTreePtr           staticOffset = nullptr;
+            bool                 isVolatile   = (tree->gtFlags & GTF_IND_VOLATILE) != 0;
+
+            // See if the addr has any exceptional part.
+            ValueNumPair addrNvnp;
+            ValueNumPair addrXvnp = ValueNumPair(ValueNumStore::VNForEmptyExcSet(), ValueNumStore::VNForEmptyExcSet());
+            vnStore->VNPUnpackExc(addr->gtVNPair, &addrNvnp, &addrXvnp);
+
+            // Is the dereference immutable?  If so, model it as referencing the read-only heap.
+            if (tree->gtFlags & GTF_IND_INVARIANT)
+            {
+                assert(!isVolatile); // We don't expect both volatile and invariant
+                tree->gtVNPair =
+                    ValueNumPair(vnStore->VNForMapSelect(VNK_Liberal, TYP_REF, ValueNumStore::VNForROH(),
+                                                         addrNvnp.GetLiberal()),
+                                 vnStore->VNForMapSelect(VNK_Conservative, TYP_REF, ValueNumStore::VNForROH(),
+                                                         addrNvnp.GetConservative()));
+                tree->gtVNPair = vnStore->VNPWithExc(tree->gtVNPair, addrXvnp);
+            }
+            else if (isVolatile)
+            {
+                // For Volatile indirection, mutate the global heap
+                fgMutateHeap(tree DEBUGARG("GTF_IND_VOLATILE - read"));
+
+                // The value read by the GT_IND can immediately change
+                ValueNum newUniq = vnStore->VNForExpr(compCurBB, tree->TypeGet());
+                tree->gtVNPair   = vnStore->VNPWithExc(ValueNumPair(newUniq, newUniq), addrXvnp);
+            }
+            // We always want to evaluate the LHS when the GT_IND node is marked with GTF_IND_ARR_INDEX
+            // as this will relabel the GT_IND child correctly using the VNF_PtrToArrElem
+            else if ((tree->gtFlags & GTF_IND_ARR_INDEX) != 0)
+            {
+                ArrayInfo arrInfo;
+                bool      b = GetArrayInfoMap()->Lookup(tree, &arrInfo);
+                assert(b);
+
+                ValueNum      inxVN  = ValueNumStore::NoVN;
+                FieldSeqNode* fldSeq = nullptr;
+
+                // GenTreePtr addr = tree->gtOp.gtOp1;
+                ValueNum addrVN = addrNvnp.GetLiberal();
+
+                // Try to parse it.
+                GenTreePtr arr = nullptr;
+                addr->ParseArrayAddress(this, &arrInfo, &arr, &inxVN, &fldSeq);
+                if (arr == nullptr)
+                {
+                    tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+                    return;
+                }
+                assert(fldSeq != FieldSeqStore::NotAField());
+
+                // Otherwise...
+                // Need to form H[arrType][arr][ind][fldSeq]
+                // Get the array element type equivalence class rep.
+                CORINFO_CLASS_HANDLE elemTypeEq   = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
+                ValueNum             elemTypeEqVN = vnStore->VNForHandle(ssize_t(elemTypeEq), GTF_ICON_CLASS_HDL);
+
+                // We take the "VNNormVal"s here, because if either has exceptional outcomes, they will be captured
+                // as part of the value of the composite "addr" operation...
+                ValueNum arrVN = vnStore->VNNormVal(arr->gtVNPair.GetLiberal());
+                inxVN          = vnStore->VNNormVal(inxVN);
+
+                // Additionally, relabel the address with a PtrToArrElem value number.
+                ValueNum fldSeqVN = vnStore->VNForFieldSeq(fldSeq);
+                ValueNum elemAddr =
+                    vnStore->VNForFunc(TYP_BYREF, VNF_PtrToArrElem, elemTypeEqVN, arrVN, inxVN, fldSeqVN);
+
+                // The aggregate "addr" VN should have had all the exceptions bubble up...
+                elemAddr = vnStore->VNWithExc(elemAddr, addrXvnp.GetLiberal());
+                addr->gtVNPair.SetBoth(elemAddr);
+#ifdef DEBUG
+                if (verbose)
+                {
+                    printf("  Relabeled IND_ARR_INDEX address node ");
+                    Compiler::printTreeID(addr);
+                    printf(" with l:" STR_VN "%x: ", elemAddr);
+                    vnStore->vnDump(this, elemAddr);
+                    printf("\n");
+                    if (vnStore->VNNormVal(elemAddr) != elemAddr)
+                    {
+                        printf("      [" STR_VN "%x is: ", vnStore->VNNormVal(elemAddr));
+                        vnStore->vnDump(this, vnStore->VNNormVal(elemAddr));
+                        printf("]\n");
+                    }
+                }
+#endif // DEBUG
+                // We now need to retrieve the value number for the array element value
+                // and give this value number to the GT_IND node 'tree'
+                // We do this whenever we have an rvalue, or for the LHS when we have an "op=",
+                // but we don't do it for a normal LHS assignment into an array element.
+                //
+                if (evalAsgLhsInd || ((tree->gtFlags & GTF_IND_ASG_LHS) == 0))
+                {
+                    fgValueNumberArrIndexVal(tree, elemTypeEq, arrVN, inxVN, addrXvnp.GetLiberal(), fldSeq);
+                }
+            }
+
+            // In general we skip GT_IND nodes on that are the LHS of an assignment.  (We labeled these earlier.)
+            // We will "evaluate" this as part of the assignment.  (Unless we're explicitly told by
+            // the caller to evaluate anyway -- perhaps the assignment is an "op=" assignment.)
+            else if (((tree->gtFlags & GTF_IND_ASG_LHS) == 0) || evalAsgLhsInd)
+            {
+                FieldSeqNode* localFldSeq = nullptr;
+                VNFuncApp     funcApp;
+
+                // Is it a local or a heap address?
+                if (addr->IsLocalAddrExpr(this, &lclVarTree, &localFldSeq) &&
+                    !fgExcludeFromSsa(lclVarTree->GetLclNum()))
+                {
+                    unsigned   lclNum = lclVarTree->GetLclNum();
+                    unsigned   ssaNum = lclVarTree->GetSsaNum();
+                    LclVarDsc* varDsc = &lvaTable[lclNum];
+
+                    if ((localFldSeq == FieldSeqStore::NotAField()) || (localFldSeq == nullptr))
+                    {
+                        tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+                    }
+                    else
+                    {
+                        var_types    indType   = tree->TypeGet();
+                        ValueNumPair lclVNPair = varDsc->GetPerSsaData(ssaNum)->m_vnPair;
+                        tree->gtVNPair         = vnStore->VNPairApplySelectors(lclVNPair, localFldSeq, indType);
+                        ;
+                    }
+                    tree->gtVNPair = vnStore->VNPWithExc(tree->gtVNPair, addrXvnp);
+                }
+                else if (vnStore->GetVNFunc(addrNvnp.GetLiberal(), &funcApp) && funcApp.m_func == VNF_PtrToStatic)
+                {
+                    var_types indType    = tree->TypeGet();
+                    ValueNum  fieldSeqVN = funcApp.m_args[0];
+
+                    FieldSeqNode* fldSeqForStaticVar = vnStore->FieldSeqVNToFieldSeq(fieldSeqVN);
+
+                    if (fldSeqForStaticVar != FieldSeqStore::NotAField())
+                    {
+                        ValueNum selectedStaticVar;
+                        // We model statics as indices into the heap variable.
+                        size_t structSize = 0;
+                        selectedStaticVar =
+                            vnStore->VNApplySelectors(VNK_Liberal, fgCurHeapVN, fldSeqForStaticVar, &structSize);
+                        selectedStaticVar = vnStore->VNApplySelectorsTypeCheck(selectedStaticVar, indType, structSize);
+
+                        tree->gtVNPair.SetLiberal(selectedStaticVar);
+                        tree->gtVNPair.SetConservative(vnStore->VNForExpr(compCurBB, indType));
+                    }
+                    else
+                    {
+                        JITDUMP("    *** Missing field sequence info for VNF_PtrToStatic value GT_IND\n");
+                        tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, indType)); //  a new unique value number
+                    }
+                    tree->gtVNPair = vnStore->VNPWithExc(tree->gtVNPair, addrXvnp);
+                }
+                else if (!varTypeIsStruct(tree) && vnStore->GetVNFunc(addrNvnp.GetLiberal(), &funcApp) &&
+                         (funcApp.m_func == VNF_PtrToArrElem))
+                {
+                    // TODO-1stClassStructs: The above condition need not exclude struct types, but it is
+                    // excluded for now to minimize diffs.
+                    fgValueNumberArrIndexVal(tree, &funcApp, addrXvnp.GetLiberal());
+                }
+                else if (!varTypeIsStruct(tree) && addr->IsFieldAddr(this, &obj, &staticOffset, &fldSeq2))
+                {
+                    // TODO-1stClassStructs: The above condition need not exclude struct types, but it is
+                    // excluded for now to minimize diffs.
+                    if (fldSeq2 == FieldSeqStore::NotAField())
+                    {
+                        tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+                    }
+                    else if (fldSeq2 != nullptr)
+                    {
+                        // Get the first (instance or static) field from field seq.  Heap[field] will yield the "field
+                        // map".
+                        CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef DEBUG
+                        CORINFO_CLASS_HANDLE fldCls = info.compCompHnd->getFieldClass(fldSeq2->m_fieldHnd);
+                        if (obj != nullptr)
+                        {
+                            // Make sure that the class containing it is not a value class (as we are expecting an
+                            // instance field)
+                            assert((info.compCompHnd->getClassAttribs(fldCls) & CORINFO_FLG_VALUECLASS) == 0);
+                            assert(staticOffset == nullptr);
+                        }
+#endif // DEBUG
+                        // Get a field sequence for just the first field in the sequence
+                        //
+                        FieldSeqNode* firstFieldOnly = GetFieldSeqStore()->CreateSingleton(fldSeq2->m_fieldHnd);
+                        size_t        structSize     = 0;
+                        ValueNum      fldMapVN =
+                            vnStore->VNApplySelectors(VNK_Liberal, fgCurHeapVN, firstFieldOnly, &structSize);
+
+                        // The final field in the sequence will need to match the 'indType'
+                        var_types indType = tree->TypeGet();
+
+                        // The type of the field is "struct" if there are more fields in the sequence,
+                        // otherwise it is the type returned from VNApplySelectors above.
+                        var_types firstFieldType = vnStore->TypeOfVN(fldMapVN);
+
+                        ValueNum valAtAddr = fldMapVN;
+                        if (obj != nullptr)
+                        {
+                            // construct the ValueNumber for 'fldMap at obj'
+                            ValueNum objNormVal = vnStore->VNNormVal(obj->GetVN(VNK_Liberal));
+                            valAtAddr = vnStore->VNForMapSelect(VNK_Liberal, firstFieldType, fldMapVN, objNormVal);
+                        }
+                        else if (staticOffset != nullptr)
+                        {
+                            // construct the ValueNumber for 'fldMap at staticOffset'
+                            ValueNum offsetNormVal = vnStore->VNNormVal(staticOffset->GetVN(VNK_Liberal));
+                            valAtAddr = vnStore->VNForMapSelect(VNK_Liberal, firstFieldType, fldMapVN, offsetNormVal);
+                        }
+
+                        // Now get rid of any remaining struct field dereferences.
+                        if (fldSeq2->m_next)
+                        {
+                            valAtAddr = vnStore->VNApplySelectors(VNK_Liberal, valAtAddr, fldSeq2->m_next, &structSize);
+                        }
+                        valAtAddr = vnStore->VNApplySelectorsTypeCheck(valAtAddr, indType, structSize);
+
+                        tree->gtVNPair.SetLiberal(valAtAddr);
+
+                        // The conservative value is a new, unique VN.
+                        tree->gtVNPair.SetConservative(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+                        tree->gtVNPair = vnStore->VNPWithExc(tree->gtVNPair, addrXvnp);
+                    }
+                    else
+                    {
+                        // Occasionally we do an explicit null test on a REF, so we just dereference it with no
+                        // field sequence.  The result is probably unused.
+                        tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+                        tree->gtVNPair = vnStore->VNPWithExc(tree->gtVNPair, addrXvnp);
+                    }
+                }
+                else // We don't know where the address points.
+                {
+                    tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+                    tree->gtVNPair = vnStore->VNPWithExc(tree->gtVNPair, addrXvnp);
+                }
+            }
+        }
+        else if (tree->OperGet() == GT_CAST)
+        {
+            fgValueNumberCastTree(tree);
+        }
+        else if (tree->OperGet() == GT_INTRINSIC)
+        {
+            fgValueNumberIntrinsic(tree);
+        }
+        else if (ValueNumStore::VNFuncIsLegal(GetVNFuncForOper(oper, (tree->gtFlags & GTF_UNSIGNED) != 0)))
+        {
+            if (GenTree::OperIsUnary(oper))
+            {
+                if (tree->gtOp.gtOp1 != nullptr)
+                {
+                    if (tree->OperGet() == GT_NOP)
+                    {
+                        // Pass through arg vn.
+                        tree->gtVNPair = tree->gtOp.gtOp1->gtVNPair;
+                    }
+                    else
+                    {
+                        ValueNumPair op1VNP;
+                        ValueNumPair op1VNPx = ValueNumStore::VNPForEmptyExcSet();
+                        vnStore->VNPUnpackExc(tree->gtOp.gtOp1->gtVNPair, &op1VNP, &op1VNPx);
+                        tree->gtVNPair =
+                            vnStore->VNPWithExc(vnStore->VNPairForFunc(tree->TypeGet(),
+                                                                       GetVNFuncForOper(oper, (tree->gtFlags &
+                                                                                               GTF_UNSIGNED) != 0),
+                                                                       op1VNP),
+                                                op1VNPx);
+                    }
+                }
+                else // Is actually nullary.
+                {
+                    // Mostly we'll leave these without a value number, assuming we'll detect these as VN failures
+                    // if they actually need to have values.  With the exception of NOPs, which can sometimes have
+                    // meaning.
+                    if (tree->OperGet() == GT_NOP)
+                    {
+                        tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+                    }
+                }
+            }
+            else
+            {
+                assert(!GenTree::OperIsAssignment(oper)); // We handled assignments earlier.
+                assert(GenTree::OperIsBinary(oper));
+                // Standard binary operator.
+                ValueNumPair op2VNPair;
+                if (tree->gtOp.gtOp2 == nullptr)
+                {
+                    op2VNPair.SetBoth(ValueNumStore::VNForNull());
+                }
+                else
+                {
+                    op2VNPair = tree->gtOp.gtOp2->gtVNPair;
+                }
+                // A few special case: if we add a field offset constant to a PtrToXXX, we get back a new PtrToXXX.
+                ValueNum newVN = ValueNumStore::NoVN;
+
+                ValueNumPair op1vnp;
+                ValueNumPair op1Xvnp = ValueNumStore::VNPForEmptyExcSet();
+                vnStore->VNPUnpackExc(tree->gtOp.gtOp1->gtVNPair, &op1vnp, &op1Xvnp);
+                ValueNumPair op2vnp;
+                ValueNumPair op2Xvnp = ValueNumStore::VNPForEmptyExcSet();
+                vnStore->VNPUnpackExc(op2VNPair, &op2vnp, &op2Xvnp);
+                ValueNumPair excSet = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp);
+
+                if (oper == GT_ADD)
+                {
+                    newVN = vnStore->ExtendPtrVN(tree->gtOp.gtOp1, tree->gtOp.gtOp2);
+                    if (newVN == ValueNumStore::NoVN)
+                    {
+                        newVN = vnStore->ExtendPtrVN(tree->gtOp.gtOp2, tree->gtOp.gtOp1);
+                    }
+                }
+                if (newVN != ValueNumStore::NoVN)
+                {
+                    newVN = vnStore->VNWithExc(newVN, excSet.GetLiberal());
+                    // We don't care about differences between liberal and conservative for pointer values.
+                    tree->gtVNPair.SetBoth(newVN);
+                }
+                else
+                {
+
+                    ValueNumPair normalRes =
+                        vnStore->VNPairForFunc(tree->TypeGet(),
+                                               GetVNFuncForOper(oper, (tree->gtFlags & GTF_UNSIGNED) != 0), op1vnp,
+                                               op2vnp);
+                    // Overflow-checking operations add an overflow exception
+                    if (tree->gtOverflowEx())
+                    {
+                        ValueNum overflowExcSet =
+                            vnStore->VNExcSetSingleton(vnStore->VNForFunc(TYP_REF, VNF_OverflowExc));
+                        excSet = vnStore->VNPExcSetUnion(excSet, ValueNumPair(overflowExcSet, overflowExcSet));
+                    }
+                    tree->gtVNPair = vnStore->VNPWithExc(normalRes, excSet);
+                }
+            }
+        }
+        else // ValueNumStore::VNFuncIsLegal returns false
+        {
+            // Some of the genTreeOps that aren't legal VNFuncs so they get special handling.
+            switch (oper)
+            {
+                case GT_COMMA:
+                {
+                    ValueNumPair op1vnp;
+                    ValueNumPair op1Xvnp = ValueNumStore::VNPForEmptyExcSet();
+                    vnStore->VNPUnpackExc(tree->gtOp.gtOp1->gtVNPair, &op1vnp, &op1Xvnp);
+                    ValueNumPair op2vnp;
+                    ValueNumPair op2Xvnp = ValueNumStore::VNPForEmptyExcSet();
+
+                    GenTree* op2 = tree->gtGetOp2();
+                    if (op2->OperIsIndir() && ((op2->gtFlags & GTF_IND_ASG_LHS) != 0))
+                    {
+                        // If op2 represents the lhs of an assignment then we give a VNForVoid for the lhs
+                        op2vnp = ValueNumPair(ValueNumStore::VNForVoid(), ValueNumStore::VNForVoid());
+                    }
+                    else if ((op2->OperGet() == GT_CLS_VAR) && (op2->gtFlags & GTF_CLS_VAR_ASG_LHS))
+                    {
+                        // If op2 represents the lhs of an assignment then we give a VNForVoid for the lhs
+                        op2vnp = ValueNumPair(ValueNumStore::VNForVoid(), ValueNumStore::VNForVoid());
+                    }
+                    else
+                    {
+                        vnStore->VNPUnpackExc(op2->gtVNPair, &op2vnp, &op2Xvnp);
+                    }
+
+                    tree->gtVNPair = vnStore->VNPWithExc(op2vnp, vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp));
+                }
+                break;
+
+                case GT_NULLCHECK:
+                    // Explicit null check.
+                    tree->gtVNPair =
+                        vnStore->VNPWithExc(ValueNumPair(ValueNumStore::VNForVoid(), ValueNumStore::VNForVoid()),
+                                            vnStore->VNPExcSetSingleton(
+                                                vnStore->VNPairForFunc(TYP_REF, VNF_NullPtrExc,
+                                                                       tree->gtOp.gtOp1->gtVNPair)));
+                    break;
+
+                case GT_BLK:
+                case GT_OBJ:
+                case GT_IND:
+                    if (tree->gtFlags & GTF_IND_ARR_LEN)
+                    {
+                        // It's an array length.  The argument is the sum of an array ref with some integer values...
+                        ValueNum arrRefLib  = vnStore->VNForRefInAddr(tree->gtOp.gtOp1->gtVNPair.GetLiberal());
+                        ValueNum arrRefCons = vnStore->VNForRefInAddr(tree->gtOp.gtOp1->gtVNPair.GetConservative());
+
+                        assert(vnStore->TypeOfVN(arrRefLib) == TYP_REF || vnStore->TypeOfVN(arrRefLib) == TYP_BYREF);
+                        if (vnStore->IsVNConstant(arrRefLib))
+                        {
+                            // (or in weird cases, a REF or BYREF constant, in which case the result is an exception).
+                            tree->gtVNPair.SetLiberal(
+                                vnStore->VNWithExc(ValueNumStore::VNForVoid(),
+                                                   vnStore->VNExcSetSingleton(
+                                                       vnStore->VNForFunc(TYP_REF, VNF_NullPtrExc, arrRefLib))));
+                        }
+                        else
+                        {
+                            tree->gtVNPair.SetLiberal(vnStore->VNForFunc(TYP_INT, VNFunc(GT_ARR_LENGTH), arrRefLib));
+                        }
+                        assert(vnStore->TypeOfVN(arrRefCons) == TYP_REF || vnStore->TypeOfVN(arrRefCons) == TYP_BYREF);
+                        if (vnStore->IsVNConstant(arrRefCons))
+                        {
+                            // (or in weird cases, a REF or BYREF constant, in which case the result is an exception).
+                            tree->gtVNPair.SetConservative(
+                                vnStore->VNWithExc(ValueNumStore::VNForVoid(),
+                                                   vnStore->VNExcSetSingleton(
+                                                       vnStore->VNForFunc(TYP_REF, VNF_NullPtrExc, arrRefCons))));
+                        }
+                        else
+                        {
+                            tree->gtVNPair.SetConservative(
+                                vnStore->VNForFunc(TYP_INT, VNFunc(GT_ARR_LENGTH), arrRefCons));
+                        }
+                    }
+                    else
+                    {
+                        tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+                    }
+                    break;
+
+                case GT_LOCKADD: // Binop
+                case GT_XADD:    // Binop
+                case GT_XCHG:    // Binop
+                    // For CMPXCHG and other intrinsics add an arbitrary side effect on Heap.
+                    fgMutateHeap(tree DEBUGARG("Interlocked intrinsic"));
+                    tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+                    break;
+
+                case GT_JTRUE:
+                case GT_LIST:
+                    // These nodes never need to have a ValueNumber
+                    tree->gtVNPair.SetBoth(ValueNumStore::NoVN);
+                    break;
+
+                default:
+                    // The default action is to give the node a new, unique VN.
+                    tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+                    break;
+            }
+        }
+    }
+    else
+    {
+        assert(GenTree::OperIsSpecial(oper));
+
+        // TBD: We must handle these individually.  For now:
+        switch (oper)
+        {
+            case GT_CALL:
+                fgValueNumberCall(tree->AsCall());
+                break;
+
+            case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+            case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+            {
+                // A bounds check node has no value, but may throw exceptions.
+                ValueNumPair excSet = vnStore->VNPExcSetSingleton(
+                    vnStore->VNPairForFunc(TYP_REF, VNF_IndexOutOfRangeExc,
+                                           vnStore->VNPNormVal(tree->AsBoundsChk()->gtArrLen->gtVNPair),
+                                           vnStore->VNPNormVal(tree->AsBoundsChk()->gtIndex->gtVNPair)));
+                excSet = vnStore->VNPExcSetUnion(excSet, vnStore->VNPExcVal(tree->AsBoundsChk()->gtArrLen->gtVNPair));
+                excSet = vnStore->VNPExcSetUnion(excSet, vnStore->VNPExcVal(tree->AsBoundsChk()->gtIndex->gtVNPair));
+
+                tree->gtVNPair = vnStore->VNPWithExc(vnStore->VNPForVoid(), excSet);
+            }
+            break;
+
+            case GT_CMPXCHG: // Specialop
+                // For CMPXCHG and other intrinsics add an arbitrary side effect on Heap.
+                fgMutateHeap(tree DEBUGARG("Interlocked intrinsic"));
+                tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+                break;
+
+            default:
+                tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+        }
+    }
+#ifdef DEBUG
+    if (verbose)
+    {
+        if (tree->gtVNPair.GetLiberal() != ValueNumStore::NoVN)
+        {
+            printf("N%03u ", tree->gtSeqNum);
+            printTreeID(tree);
+            printf(" ");
+            gtDispNodeName(tree);
+            if (tree->OperIsLeaf() || tree->OperIsLocalStore()) // local stores used to be leaves
+            {
+                gtDispLeaf(tree, nullptr);
+            }
+            printf(" => ");
+            vnpPrint(tree->gtVNPair, 1);
+            printf("\n");
+        }
+    }
+#endif // DEBUG
+}
+
+void Compiler::fgValueNumberIntrinsic(GenTreePtr tree)
+{
+    assert(tree->OperGet() == GT_INTRINSIC);
+    GenTreeIntrinsic* intrinsic = tree->AsIntrinsic();
+    ValueNumPair      arg0VNP, arg1VNP;
+    ValueNumPair      arg0VNPx = ValueNumStore::VNPForEmptyExcSet();
+    ValueNumPair      arg1VNPx = ValueNumStore::VNPForEmptyExcSet();
+
+    vnStore->VNPUnpackExc(intrinsic->gtOp.gtOp1->gtVNPair, &arg0VNP, &arg0VNPx);
+
+    if (intrinsic->gtOp.gtOp2 != nullptr)
+    {
+        vnStore->VNPUnpackExc(intrinsic->gtOp.gtOp2->gtVNPair, &arg1VNP, &arg1VNPx);
+    }
+
+    switch (intrinsic->gtIntrinsicId)
+    {
+        case CORINFO_INTRINSIC_Sin:
+        case CORINFO_INTRINSIC_Sqrt:
+        case CORINFO_INTRINSIC_Abs:
+        case CORINFO_INTRINSIC_Cos:
+        case CORINFO_INTRINSIC_Round:
+        case CORINFO_INTRINSIC_Cosh:
+        case CORINFO_INTRINSIC_Sinh:
+        case CORINFO_INTRINSIC_Tan:
+        case CORINFO_INTRINSIC_Tanh:
+        case CORINFO_INTRINSIC_Asin:
+        case CORINFO_INTRINSIC_Acos:
+        case CORINFO_INTRINSIC_Atan:
+        case CORINFO_INTRINSIC_Atan2:
+        case CORINFO_INTRINSIC_Log10:
+        case CORINFO_INTRINSIC_Pow:
+        case CORINFO_INTRINSIC_Exp:
+        case CORINFO_INTRINSIC_Ceiling:
+        case CORINFO_INTRINSIC_Floor:
+
+            // GT_INTRINSIC is a currently a subtype of binary operators. But most of
+            // the math intrinsics are actually unary operations.
+
+            if (intrinsic->gtOp.gtOp2 == nullptr)
+            {
+                intrinsic->gtVNPair =
+                    vnStore->VNPWithExc(vnStore->EvalMathFuncUnary(tree->TypeGet(), intrinsic->gtIntrinsicId, arg0VNP),
+                                        arg0VNPx);
+            }
+            else
+            {
+                ValueNumPair newVNP =
+                    vnStore->EvalMathFuncBinary(tree->TypeGet(), intrinsic->gtIntrinsicId, arg0VNP, arg1VNP);
+                ValueNumPair excSet = vnStore->VNPExcSetUnion(arg0VNPx, arg1VNPx);
+                intrinsic->gtVNPair = vnStore->VNPWithExc(newVNP, excSet);
+            }
+
+            break;
+
+        case CORINFO_INTRINSIC_Object_GetType:
+            intrinsic->gtVNPair =
+                vnStore->VNPWithExc(vnStore->VNPairForFunc(intrinsic->TypeGet(), VNF_ObjGetType, arg0VNP), arg0VNPx);
+            break;
+
+        default:
+            unreached();
+    }
+}
+
+void Compiler::fgValueNumberCastTree(GenTreePtr tree)
+{
+    assert(tree->OperGet() == GT_CAST);
+
+    ValueNumPair srcVNPair        = tree->gtOp.gtOp1->gtVNPair;
+    var_types    castToType       = tree->CastToType();
+    var_types    castFromType     = tree->CastFromType();
+    bool         srcIsUnsigned    = ((tree->gtFlags & GTF_UNSIGNED) != 0);
+    bool         hasOverflowCheck = tree->gtOverflowEx();
+
+    assert(genActualType(castToType) == tree->TypeGet()); // Insure that the resultType is correct
+
+    tree->gtVNPair = vnStore->VNPairForCast(srcVNPair, castToType, castFromType, srcIsUnsigned, hasOverflowCheck);
+}
+
+// Compute the normal ValueNumber for a cast operation with no exceptions
+ValueNum ValueNumStore::VNForCast(ValueNum  srcVN,
+                                  var_types castToType,
+                                  var_types castFromType,
+                                  bool      srcIsUnsigned /* = false */)
+{
+    // The resulting type after performingthe cast is always widened to a supported IL stack size
+    var_types resultType = genActualType(castToType);
+
+    // When we're considering actual value returned by a non-checking cast whether or not the source is
+    // unsigned does *not* matter for non-widening casts.  That is, if we cast an int or a uint to short,
+    // we just extract the first two bytes from the source bit pattern, not worrying about the interpretation.
+    // The same is true in casting between signed/unsigned types of the same width.  Only when we're doing
+    // a widening cast do we care about whether the source was unsigned,so we know whether to sign or zero extend it.
+    //
+    bool srcIsUnsignedNorm = srcIsUnsigned;
+    if (genTypeSize(castToType) <= genTypeSize(castFromType))
+    {
+        srcIsUnsignedNorm = false;
+    }
+
+    ValueNum castTypeVN = VNForCastOper(castToType, srcIsUnsigned);
+    ValueNum resultVN   = VNForFunc(resultType, VNF_Cast, srcVN, castTypeVN);
+
+#ifdef DEBUG
+    if (m_pComp->verbose)
+    {
+        printf("    VNForCast(" STR_VN "%x, " STR_VN "%x) returns ", srcVN, castTypeVN);
+        m_pComp->vnPrint(resultVN, 1);
+        printf("\n");
+    }
+#endif
+
+    return resultVN;
+}
+
+// Compute the ValueNumberPair for a cast operation
+ValueNumPair ValueNumStore::VNPairForCast(ValueNumPair srcVNPair,
+                                          var_types    castToType,
+                                          var_types    castFromType,
+                                          bool         srcIsUnsigned,    /* = false */
+                                          bool         hasOverflowCheck) /* = false */
+{
+    // The resulting type after performingthe cast is always widened to a supported IL stack size
+    var_types resultType = genActualType(castToType);
+
+    ValueNumPair castArgVNP;
+    ValueNumPair castArgxVNP = ValueNumStore::VNPForEmptyExcSet();
+    VNPUnpackExc(srcVNPair, &castArgVNP, &castArgxVNP);
+
+    // When we're considering actual value returned by a non-checking cast (or a checking cast that succeeds),
+    // whether or not the source is unsigned does *not* matter for non-widening casts.
+    // That is, if we cast an int or a uint to short, we just extract the first two bytes from the source
+    // bit pattern, not worrying about the interpretation.  The same is true in casting between signed/unsigned
+    // types of the same width.  Only when we're doing a widening cast do we care about whether the source
+    // was unsigned, so we know whether to sign or zero extend it.
+    //
+    // Important: Casts to floating point cannot be optimized in this fashion. (bug 946768)
+    //
+    bool srcIsUnsignedNorm = srcIsUnsigned;
+    if (genTypeSize(castToType) <= genTypeSize(castFromType) && !varTypeIsFloating(castToType))
+    {
+        srcIsUnsignedNorm = false;
+    }
+
+    ValueNum     castTypeVN = VNForCastOper(castToType, srcIsUnsignedNorm);
+    ValueNumPair castTypeVNPair(castTypeVN, castTypeVN);
+    ValueNumPair castNormRes = VNPairForFunc(resultType, VNF_Cast, castArgVNP, castTypeVNPair);
+
+    ValueNumPair resultVNP = VNPWithExc(castNormRes, castArgxVNP);
+
+    // If we have a check for overflow, add the exception information.
+    if (hasOverflowCheck)
+    {
+        // For overflow checking, we always need to know whether the source is unsigned.
+        castTypeVNPair.SetBoth(VNForCastOper(castToType, srcIsUnsigned));
+        ValueNumPair excSet =
+            VNPExcSetSingleton(VNPairForFunc(TYP_REF, VNF_ConvOverflowExc, castArgVNP, castTypeVNPair));
+        excSet    = VNPExcSetUnion(excSet, castArgxVNP);
+        resultVNP = VNPWithExc(castNormRes, excSet);
+    }
+
+    return resultVNP;
+}
+
+void Compiler::fgValueNumberHelperCallFunc(GenTreeCall* call, VNFunc vnf, ValueNumPair vnpExc)
+{
+    unsigned nArgs = ValueNumStore::VNFuncArity(vnf);
+    assert(vnf != VNF_Boundary);
+    GenTreeArgList* args                    = call->gtCallArgs;
+    bool            generateUniqueVN        = false;
+    bool            useEntryPointAddrAsArg0 = false;
+
+    switch (vnf)
+    {
+        case VNF_JitNew:
+        {
+            generateUniqueVN = true;
+            vnpExc           = ValueNumStore::VNPForEmptyExcSet();
+        }
+        break;
+
+        case VNF_JitNewArr:
+        {
+            generateUniqueVN  = true;
+            ValueNumPair vnp1 = vnStore->VNPNormVal(args->Rest()->Current()->gtVNPair);
+
+            // The New Array helper may throw an overflow exception
+            vnpExc = vnStore->VNPExcSetSingleton(vnStore->VNPairForFunc(TYP_REF, VNF_NewArrOverflowExc, vnp1));
+        }
+        break;
+
+        case VNF_BoxNullable:
+        {
+            // Generate unique VN so, VNForFunc generates a uniq value number for box nullable.
+            // Alternatively instead of using vnpUniq below in VNPairForFunc(...),
+            // we could use the value number of what the byref arg0 points to.
+            //
+            // But retrieving the value number of what the byref arg0 points to is quite a bit more work
+            // and doing so only very rarely allows for an additional optimization.
+            generateUniqueVN = true;
+        }
+        break;
+
+        case VNF_JitReadyToRunNew:
+        {
+            generateUniqueVN        = true;
+            vnpExc                  = ValueNumStore::VNPForEmptyExcSet();
+            useEntryPointAddrAsArg0 = true;
+        }
+        break;
+
+        case VNF_JitReadyToRunNewArr:
+        {
+            generateUniqueVN  = true;
+            ValueNumPair vnp1 = vnStore->VNPNormVal(args->Current()->gtVNPair);
+
+            // The New Array helper may throw an overflow exception
+            vnpExc = vnStore->VNPExcSetSingleton(vnStore->VNPairForFunc(TYP_REF, VNF_NewArrOverflowExc, vnp1));
+            useEntryPointAddrAsArg0 = true;
+        }
+        break;
+
+        case VNF_ReadyToRunStaticBase:
+        case VNF_ReadyToRunIsInstanceOf:
+        case VNF_ReadyToRunCastClass:
+        {
+            useEntryPointAddrAsArg0 = true;
+        }
+        break;
+
+        default:
+        {
+            assert(s_helperCallProperties.IsPure(eeGetHelperNum(call->gtCallMethHnd)));
+        }
+        break;
+    }
+
+    if (generateUniqueVN)
+    {
+        nArgs--;
+    }
+
+    ValueNumPair vnpUniq;
+    if (generateUniqueVN)
+    {
+        // Generate unique VN so, VNForFunc generates a unique value number.
+        vnpUniq.SetBoth(vnStore->VNForExpr(compCurBB, call->TypeGet()));
+    }
+
+    if (nArgs == 0)
+    {
+        if (generateUniqueVN)
+        {
+            call->gtVNPair = vnStore->VNPairForFunc(call->TypeGet(), vnf, vnpUniq);
+        }
+        else
+        {
+            call->gtVNPair.SetBoth(vnStore->VNForFunc(call->TypeGet(), vnf));
+        }
+    }
+    else
+    {
+        // Has at least one argument.
+        ValueNumPair vnp0;
+        ValueNumPair vnp0x = ValueNumStore::VNPForEmptyExcSet();
+#ifdef FEATURE_READYTORUN_COMPILER
+        if (useEntryPointAddrAsArg0)
+        {
+            ValueNum callAddrVN = vnStore->VNForPtrSizeIntCon((ssize_t)call->gtCall.gtEntryPoint.addr);
+            vnp0                = ValueNumPair(callAddrVN, callAddrVN);
+        }
+        else
+#endif
+        {
+            assert(!useEntryPointAddrAsArg0);
+            ValueNumPair vnp0wx = args->Current()->gtVNPair;
+            vnStore->VNPUnpackExc(vnp0wx, &vnp0, &vnp0x);
+
+            // Also include in the argument exception sets
+            vnpExc = vnStore->VNPExcSetUnion(vnpExc, vnp0x);
+
+            args = args->Rest();
+        }
+        if (nArgs == 1)
+        {
+            if (generateUniqueVN)
+            {
+                call->gtVNPair = vnStore->VNPairForFunc(call->TypeGet(), vnf, vnp0, vnpUniq);
+            }
+            else
+            {
+                call->gtVNPair = vnStore->VNPairForFunc(call->TypeGet(), vnf, vnp0);
+            }
+        }
+        else
+        {
+            // Has at least two arguments.
+            ValueNumPair vnp1wx = args->Current()->gtVNPair;
+            ValueNumPair vnp1;
+            ValueNumPair vnp1x = ValueNumStore::VNPForEmptyExcSet();
+            vnStore->VNPUnpackExc(vnp1wx, &vnp1, &vnp1x);
+            vnpExc = vnStore->VNPExcSetUnion(vnpExc, vnp1x);
+
+            args = args->Rest();
+            if (nArgs == 2)
+            {
+                if (generateUniqueVN)
+                {
+                    call->gtVNPair = vnStore->VNPairForFunc(call->TypeGet(), vnf, vnp0, vnp1, vnpUniq);
+                }
+                else
+                {
+                    call->gtVNPair = vnStore->VNPairForFunc(call->TypeGet(), vnf, vnp0, vnp1);
+                }
+            }
+            else
+            {
+                ValueNumPair vnp2wx = args->Current()->gtVNPair;
+                ValueNumPair vnp2;
+                ValueNumPair vnp2x = ValueNumStore::VNPForEmptyExcSet();
+                vnStore->VNPUnpackExc(vnp2wx, &vnp2, &vnp2x);
+                vnpExc = vnStore->VNPExcSetUnion(vnpExc, vnp2x);
+
+                args = args->Rest();
+                assert(nArgs == 3); // Our current maximum.
+                assert(args == nullptr);
+                if (generateUniqueVN)
+                {
+                    call->gtVNPair = vnStore->VNPairForFunc(call->TypeGet(), vnf, vnp0, vnp1, vnp2, vnpUniq);
+                }
+                else
+                {
+                    call->gtVNPair = vnStore->VNPairForFunc(call->TypeGet(), vnf, vnp0, vnp1, vnp2);
+                }
+            }
+        }
+        // Add the accumulated exceptions.
+        call->gtVNPair = vnStore->VNPWithExc(call->gtVNPair, vnpExc);
+    }
+}
+
+void Compiler::fgValueNumberCall(GenTreeCall* call)
+{
+    // First: do value numbering of any argument placeholder nodes in the argument list
+    // (by transferring from the VN of the late arg that they are standing in for...)
+    unsigned        i               = 0;
+    GenTreeArgList* args            = call->gtCallArgs;
+    bool            updatedArgPlace = false;
+    while (args != nullptr)
+    {
+        GenTreePtr arg = args->Current();
+        if (arg->OperGet() == GT_ARGPLACE)
+        {
+            // Find the corresponding late arg.
+            GenTreePtr lateArg = nullptr;
+            for (unsigned j = 0; j < call->fgArgInfo->ArgCount(); j++)
+            {
+                if (call->fgArgInfo->ArgTable()[j]->argNum == i)
+                {
+                    lateArg = call->fgArgInfo->ArgTable()[j]->node;
+                    break;
+                }
+            }
+            assert(lateArg != nullptr);
+            assert(lateArg->gtVNPair.BothDefined());
+            arg->gtVNPair   = lateArg->gtVNPair;
+            updatedArgPlace = true;
+#ifdef DEBUG
+            if (verbose)
+            {
+                printf("VN of ARGPLACE tree ");
+                Compiler::printTreeID(arg);
+                printf(" updated to ");
+                vnpPrint(arg->gtVNPair, 1);
+                printf("\n");
+            }
+#endif
+        }
+        i++;
+        args = args->Rest();
+    }
+    if (updatedArgPlace)
+    {
+        // Now we have to update the VN's of the argument list nodes, since that will be used in determining
+        // loop-invariance.
+        fgUpdateArgListVNs(call->gtCallArgs);
+    }
+
+    if (call->gtCallType == CT_HELPER)
+    {
+        bool modHeap = fgValueNumberHelperCall(call);
+
+        if (modHeap)
+        {
+            // For now, arbitrary side effect on Heap.
+            fgMutateHeap(call DEBUGARG("HELPER - modifies heap"));
+        }
+    }
+    else
+    {
+        if (call->TypeGet() == TYP_VOID)
+        {
+            call->gtVNPair.SetBoth(ValueNumStore::VNForVoid());
+        }
+        else
+        {
+            call->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, call->TypeGet()));
+        }
+
+        // For now, arbitrary side effect on Heap.
+        fgMutateHeap(call DEBUGARG("CALL"));
+    }
+}
+
+void Compiler::fgUpdateArgListVNs(GenTreeArgList* args)
+{
+    if (args == nullptr)
+    {
+        return;
+    }
+    // Otherwise...
+    fgUpdateArgListVNs(args->Rest());
+    fgValueNumberTree(args);
+}
+
+VNFunc Compiler::fgValueNumberHelperMethVNFunc(CorInfoHelpFunc helpFunc)
+{
+    assert(s_helperCallProperties.IsPure(helpFunc) || s_helperCallProperties.IsAllocator(helpFunc));
+
+    VNFunc vnf = VNF_Boundary; // An illegal value...
+    switch (helpFunc)
+    {
+        // These translate to other function symbols:
+        case CORINFO_HELP_DIV:
+            vnf = VNFunc(GT_DIV);
+            break;
+        case CORINFO_HELP_MOD:
+            vnf = VNFunc(GT_MOD);
+            break;
+        case CORINFO_HELP_UDIV:
+            vnf = VNFunc(GT_UDIV);
+            break;
+        case CORINFO_HELP_UMOD:
+            vnf = VNFunc(GT_UMOD);
+            break;
+        case CORINFO_HELP_LLSH:
+            vnf = VNFunc(GT_LSH);
+            break;
+        case CORINFO_HELP_LRSH:
+            vnf = VNFunc(GT_RSH);
+            break;
+        case CORINFO_HELP_LRSZ:
+            vnf = VNFunc(GT_RSZ);
+            break;
+        case CORINFO_HELP_LMUL:
+        case CORINFO_HELP_LMUL_OVF:
+            vnf = VNFunc(GT_MUL);
+            break;
+        case CORINFO_HELP_ULMUL_OVF:
+            vnf = VNFunc(GT_MUL);
+            break; // Is this the right thing?
+        case CORINFO_HELP_LDIV:
+            vnf = VNFunc(GT_DIV);
+            break;
+        case CORINFO_HELP_LMOD:
+            vnf = VNFunc(GT_MOD);
+            break;
+        case CORINFO_HELP_ULDIV:
+            vnf = VNFunc(GT_DIV);
+            break; // Is this the right thing?
+        case CORINFO_HELP_ULMOD:
+            vnf = VNFunc(GT_MOD);
+            break; // Is this the right thing?
+
+        case CORINFO_HELP_LNG2DBL:
+            vnf = VNF_Lng2Dbl;
+            break;
+        case CORINFO_HELP_ULNG2DBL:
+            vnf = VNF_ULng2Dbl;
+            break;
+        case CORINFO_HELP_DBL2INT:
+            vnf = VNF_Dbl2Int;
+            break;
+        case CORINFO_HELP_DBL2INT_OVF:
+            vnf = VNF_Dbl2Int;
+            break;
+        case CORINFO_HELP_DBL2LNG:
+            vnf = VNF_Dbl2Lng;
+            break;
+        case CORINFO_HELP_DBL2LNG_OVF:
+            vnf = VNF_Dbl2Lng;
+            break;
+        case CORINFO_HELP_DBL2UINT:
+            vnf = VNF_Dbl2UInt;
+            break;
+        case CORINFO_HELP_DBL2UINT_OVF:
+            vnf = VNF_Dbl2UInt;
+            break;
+        case CORINFO_HELP_DBL2ULNG:
+            vnf = VNF_Dbl2ULng;
+            break;
+        case CORINFO_HELP_DBL2ULNG_OVF:
+            vnf = VNF_Dbl2ULng;
+            break;
+        case CORINFO_HELP_FLTREM:
+            vnf = VNFunc(GT_MOD);
+            break;
+        case CORINFO_HELP_DBLREM:
+            vnf = VNFunc(GT_MOD);
+            break;
+        case CORINFO_HELP_FLTROUND:
+            vnf = VNF_FltRound;
+            break; // Is this the right thing?
+        case CORINFO_HELP_DBLROUND:
+            vnf = VNF_DblRound;
+            break; // Is this the right thing?
+
+        // These allocation operations probably require some augmentation -- perhaps allocSiteId,
+        // something about array length...
+        case CORINFO_HELP_NEW_CROSSCONTEXT:
+        case CORINFO_HELP_NEWFAST:
+        case CORINFO_HELP_NEWSFAST:
+        case CORINFO_HELP_NEWSFAST_ALIGN8:
+            vnf = VNF_JitNew;
+            break;
+
+        case CORINFO_HELP_READYTORUN_NEW:
+            vnf = VNF_JitReadyToRunNew;
+            break;
+
+        case CORINFO_HELP_NEWARR_1_DIRECT:
+        case CORINFO_HELP_NEWARR_1_OBJ:
+        case CORINFO_HELP_NEWARR_1_VC:
+        case CORINFO_HELP_NEWARR_1_ALIGN8:
+            vnf = VNF_JitNewArr;
+            break;
+
+        case CORINFO_HELP_READYTORUN_NEWARR_1:
+            vnf = VNF_JitReadyToRunNewArr;
+            break;
+
+        case CORINFO_HELP_GETGENERICS_GCSTATIC_BASE:
+            vnf = VNF_GetgenericsGcstaticBase;
+            break;
+        case CORINFO_HELP_GETGENERICS_NONGCSTATIC_BASE:
+            vnf = VNF_GetgenericsNongcstaticBase;
+            break;
+        case CORINFO_HELP_GETSHARED_GCSTATIC_BASE:
+            vnf = VNF_GetsharedGcstaticBase;
+            break;
+        case CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE:
+            vnf = VNF_GetsharedNongcstaticBase;
+            break;
+        case CORINFO_HELP_GETSHARED_GCSTATIC_BASE_NOCTOR:
+            vnf = VNF_GetsharedGcstaticBaseNoctor;
+            break;
+        case CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_NOCTOR:
+            vnf = VNF_GetsharedNongcstaticBaseNoctor;
+            break;
+        case CORINFO_HELP_READYTORUN_STATIC_BASE:
+            vnf = VNF_ReadyToRunStaticBase;
+            break;
+        case CORINFO_HELP_GETSHARED_GCSTATIC_BASE_DYNAMICCLASS:
+            vnf = VNF_GetsharedGcstaticBaseDynamicclass;
+            break;
+        case CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_DYNAMICCLASS:
+            vnf = VNF_GetsharedNongcstaticBaseDynamicclass;
+            break;
+        case CORINFO_HELP_CLASSINIT_SHARED_DYNAMICCLASS:
+            vnf = VNF_ClassinitSharedDynamicclass;
+            break;
+        case CORINFO_HELP_GETGENERICS_GCTHREADSTATIC_BASE:
+            vnf = VNF_GetgenericsGcthreadstaticBase;
+            break;
+        case CORINFO_HELP_GETGENERICS_NONGCTHREADSTATIC_BASE:
+            vnf = VNF_GetgenericsNongcthreadstaticBase;
+            break;
+        case CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE:
+            vnf = VNF_GetsharedGcthreadstaticBase;
+            break;
+        case CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE:
+            vnf = VNF_GetsharedNongcthreadstaticBase;
+            break;
+        case CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_NOCTOR:
+            vnf = VNF_GetsharedGcthreadstaticBaseNoctor;
+            break;
+        case CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_NOCTOR:
+            vnf = VNF_GetsharedNongcthreadstaticBaseNoctor;
+            break;
+        case CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE_DYNAMICCLASS:
+            vnf = VNF_GetsharedGcthreadstaticBaseDynamicclass;
+            break;
+        case CORINFO_HELP_GETSHARED_NONGCTHREADSTATIC_BASE_DYNAMICCLASS:
+            vnf = VNF_GetsharedNongcthreadstaticBaseDynamicclass;
+            break;
+        case CORINFO_HELP_GETSTATICFIELDADDR_CONTEXT:
+            vnf = VNF_GetStaticAddrContext;
+            break;
+        case CORINFO_HELP_GETSTATICFIELDADDR_TLS:
+            vnf = VNF_GetStaticAddrTLS;
+            break;
+
+        case CORINFO_HELP_RUNTIMEHANDLE_METHOD:
+        case CORINFO_HELP_RUNTIMEHANDLE_METHOD_LOG:
+            vnf = VNF_RuntimeHandleMethod;
+            break;
+
+        case CORINFO_HELP_RUNTIMEHANDLE_CLASS:
+        case CORINFO_HELP_RUNTIMEHANDLE_CLASS_LOG:
+            vnf = VNF_RuntimeHandleClass;
+            break;
+
+        case CORINFO_HELP_STRCNS:
+            vnf = VNF_StrCns;
+            break;
+
+        case CORINFO_HELP_CHKCASTCLASS:
+        case CORINFO_HELP_CHKCASTCLASS_SPECIAL:
+        case CORINFO_HELP_CHKCASTARRAY:
+        case CORINFO_HELP_CHKCASTINTERFACE:
+        case CORINFO_HELP_CHKCASTANY:
+            vnf = VNF_CastClass;
+            break;
+
+        case CORINFO_HELP_READYTORUN_CHKCAST:
+            vnf = VNF_ReadyToRunCastClass;
+            break;
+
+        case CORINFO_HELP_ISINSTANCEOFCLASS:
+        case CORINFO_HELP_ISINSTANCEOFINTERFACE:
+        case CORINFO_HELP_ISINSTANCEOFARRAY:
+        case CORINFO_HELP_ISINSTANCEOFANY:
+            vnf = VNF_IsInstanceOf;
+            break;
+
+        case CORINFO_HELP_READYTORUN_ISINSTANCEOF:
+            vnf = VNF_ReadyToRunIsInstanceOf;
+            break;
+
+        case CORINFO_HELP_LDELEMA_REF:
+            vnf = VNF_LdElemA;
+            break;
+
+        case CORINFO_HELP_UNBOX:
+            vnf = VNF_Unbox;
+            break;
+
+        // A constant within any method.
+        case CORINFO_HELP_GETCURRENTMANAGEDTHREADID:
+            vnf = VNF_ManagedThreadId;
+            break;
+
+        case CORINFO_HELP_GETREFANY:
+            // TODO-CQ: This should really be interpreted as just a struct field reference, in terms of values.
+            vnf = VNF_GetRefanyVal;
+            break;
+
+        case CORINFO_HELP_GETCLASSFROMMETHODPARAM:
+            vnf = VNF_GetClassFromMethodParam;
+            break;
+
+        case CORINFO_HELP_GETSYNCFROMCLASSHANDLE:
+            vnf = VNF_GetSyncFromClassHandle;
+            break;
+
+        case CORINFO_HELP_LOOP_CLONE_CHOICE_ADDR:
+            vnf = VNF_LoopCloneChoiceAddr;
+            break;
+
+        case CORINFO_HELP_BOX_NULLABLE:
+            vnf = VNF_BoxNullable;
+            break;
+
+        default:
+            unreached();
+    }
+
+    assert(vnf != VNF_Boundary);
+    return vnf;
+}
+
+bool Compiler::fgValueNumberHelperCall(GenTreeCall* call)
+{
+    CorInfoHelpFunc helpFunc    = eeGetHelperNum(call->gtCallMethHnd);
+    bool            pure        = s_helperCallProperties.IsPure(helpFunc);
+    bool            isAlloc     = s_helperCallProperties.IsAllocator(helpFunc);
+    bool            modHeap     = s_helperCallProperties.MutatesHeap(helpFunc);
+    bool            mayRunCctor = s_helperCallProperties.MayRunCctor(helpFunc);
+    bool            noThrow     = s_helperCallProperties.NoThrow(helpFunc);
+
+    ValueNumPair vnpExc = ValueNumStore::VNPForEmptyExcSet();
+
+    // If the JIT helper can throw an exception make sure that we fill in
+    // vnpExc with a Value Number that represents the exception(s) that can be thrown.
+    if (!noThrow)
+    {
+        // If the helper is known to only throw only one particular exception
+        // we can set vnpExc to that exception, otherwise we conservatively
+        // model the JIT helper as possibly throwing multiple different exceptions
+        //
+        switch (helpFunc)
+        {
+            case CORINFO_HELP_OVERFLOW:
+                // This helper always throws the VNF_OverflowExc exception
+                vnpExc = vnStore->VNPExcSetSingleton(vnStore->VNPairForFunc(TYP_REF, VNF_OverflowExc));
+                break;
+
+            default:
+                // Setup vnpExc with the information that multiple different exceptions
+                // could be generated by this helper
+                vnpExc = vnStore->VNPExcSetSingleton(vnStore->VNPairForFunc(TYP_REF, VNF_HelperMultipleExc));
+        }
+    }
+
+    ValueNumPair vnpNorm;
+
+    if (call->TypeGet() == TYP_VOID)
+    {
+        vnpNorm = ValueNumStore::VNPForVoid();
+    }
+    else
+    {
+        // TODO-CQ: this is a list of helpers we're going to treat as non-pure,
+        // because they raise complications.  Eventually, we need to handle those complications...
+        bool needsFurtherWork = false;
+        switch (helpFunc)
+        {
+            case CORINFO_HELP_NEW_MDARR:
+                // This is a varargs helper.  We need to represent the array shape in the VN world somehow.
+                needsFurtherWork = true;
+                break;
+            default:
+                break;
+        }
+
+        if (!needsFurtherWork && (pure || isAlloc))
+        {
+            VNFunc vnf = fgValueNumberHelperMethVNFunc(helpFunc);
+
+            if (mayRunCctor)
+            {
+                if ((call->gtFlags & GTF_CALL_HOISTABLE) == 0)
+                {
+                    modHeap = true;
+                }
+            }
+
+            fgValueNumberHelperCallFunc(call, vnf, vnpExc);
+            return modHeap;
+        }
+        else
+        {
+            vnpNorm.SetBoth(vnStore->VNForExpr(compCurBB, call->TypeGet()));
+        }
+    }
+
+    call->gtVNPair = vnStore->VNPWithExc(vnpNorm, vnpExc);
+    return modHeap;
+}
+
+#ifdef DEBUG
+// This method asserts that SSA name constraints specified are satisfied.
+// Until we figure out otherwise, all VN's are assumed to be liberal.
+// TODO-Cleanup: new JitTestLabels for lib vs cons vs both VN classes?
+void Compiler::JitTestCheckVN()
+{
+    typedef SimplerHashTable<ssize_t, SmallPrimitiveKeyFuncs<ssize_t>, ValueNum, JitSimplerHashBehavior>  LabelToVNMap;
+    typedef SimplerHashTable<ValueNum, SmallPrimitiveKeyFuncs<ValueNum>, ssize_t, JitSimplerHashBehavior> VNToLabelMap;
+
+    // If we have no test data, early out.
+    if (m_nodeTestData == nullptr)
+    {
+        return;
+    }
+
+    NodeToTestDataMap* testData = GetNodeTestData();
+
+    // First we have to know which nodes in the tree are reachable.
+    typedef SimplerHashTable<GenTreePtr, PtrKeyFuncs<GenTree>, int, JitSimplerHashBehavior> NodeToIntMap;
+    NodeToIntMap* reachable = FindReachableNodesInNodeTestData();
+
+    LabelToVNMap* labelToVN = new (getAllocatorDebugOnly()) LabelToVNMap(getAllocatorDebugOnly());
+    VNToLabelMap* vnToLabel = new (getAllocatorDebugOnly()) VNToLabelMap(getAllocatorDebugOnly());
+
+    if (verbose)
+    {
+        printf("\nJit Testing: Value numbering.\n");
+    }
+    for (NodeToTestDataMap::KeyIterator ki = testData->Begin(); !ki.Equal(testData->End()); ++ki)
+    {
+        TestLabelAndNum tlAndN;
+        GenTreePtr      node   = ki.Get();
+        ValueNum        nodeVN = node->GetVN(VNK_Liberal);
+
+        bool b = testData->Lookup(node, &tlAndN);
+        assert(b);
+        if (tlAndN.m_tl == TL_VN || tlAndN.m_tl == TL_VNNorm)
+        {
+            int dummy;
+            if (!reachable->Lookup(node, &dummy))
+            {
+                printf("Node ");
+                Compiler::printTreeID(node);
+                printf(" had a test constraint declared, but has become unreachable at the time the constraint is "
+                       "tested.\n"
+                       "(This is probably as a result of some optimization -- \n"
+                       "you may need to modify the test case to defeat this opt.)\n");
+                assert(false);
+            }
+
+            if (verbose)
+            {
+                printf("  Node ");
+                Compiler::printTreeID(node);
+                printf(" -- VN class %d.\n", tlAndN.m_num);
+            }
+
+            if (tlAndN.m_tl == TL_VNNorm)
+            {
+                nodeVN = vnStore->VNNormVal(nodeVN);
+            }
+
+            ValueNum vn;
+            if (labelToVN->Lookup(tlAndN.m_num, &vn))
+            {
+                if (verbose)
+                {
+                    printf("      Already in hash tables.\n");
+                }
+                // The mapping(s) must be one-to-one: if the label has a mapping, then the ssaNm must, as well.
+                ssize_t num2;
+                bool    b = vnToLabel->Lookup(vn, &num2);
+                // And the mappings must be the same.
+                if (tlAndN.m_num != num2)
+                {
+                    printf("Node: ");
+                    Compiler::printTreeID(node);
+                    printf(", with value number " STR_VN "%x, was declared in VN class %d,\n", nodeVN, tlAndN.m_num);
+                    printf("but this value number " STR_VN
+                           "%x has already been associated with a different SSA name class: %d.\n",
+                           vn, num2);
+                    assert(false);
+                }
+                // And the current node must be of the specified SSA family.
+                if (nodeVN != vn)
+                {
+                    printf("Node: ");
+                    Compiler::printTreeID(node);
+                    printf(", " STR_VN "%x was declared in SSA name class %d,\n", nodeVN, tlAndN.m_num);
+                    printf("but that name class was previously bound to a different value number: " STR_VN "%x.\n", vn);
+                    assert(false);
+                }
+            }
+            else
+            {
+                ssize_t num;
+                // The mapping(s) must be one-to-one: if the label has no mapping, then the ssaNm may not, either.
+                if (vnToLabel->Lookup(nodeVN, &num))
+                {
+                    printf("Node: ");
+                    Compiler::printTreeID(node);
+                    printf(", " STR_VN "%x was declared in value number class %d,\n", nodeVN, tlAndN.m_num);
+                    printf(
+                        "but this value number has already been associated with a different value number class: %d.\n",
+                        num);
+                    assert(false);
+                }
+                // Add to both mappings.
+                labelToVN->Set(tlAndN.m_num, nodeVN);
+                vnToLabel->Set(nodeVN, tlAndN.m_num);
+                if (verbose)
+                {
+                    printf("      added to hash tables.\n");
+                }
+            }
+        }
+    }
+}
+
+void Compiler::vnpPrint(ValueNumPair vnp, unsigned level)
+{
+    if (vnp.BothEqual())
+    {
+        vnPrint(vnp.GetLiberal(), level);
+    }
+    else
+    {
+        printf("<l:");
+        vnPrint(vnp.GetLiberal(), level);
+        printf(", c:");
+        vnPrint(vnp.GetConservative(), level);
+        printf(">");
+    }
+}
+
+void Compiler::vnPrint(ValueNum vn, unsigned level)
+{
+
+    if (ValueNumStore::isReservedVN(vn))
+    {
+        printf(ValueNumStore::reservedName(vn));
+    }
+    else
+    {
+        printf(STR_VN "%x", vn);
+        if (level > 0)
+        {
+            vnStore->vnDump(this, vn);
+        }
+    }
+}
+
+#endif // DEBUG
+
+// Methods of ValueNumPair.
+ValueNumPair::ValueNumPair() : m_liberal(ValueNumStore::NoVN), m_conservative(ValueNumStore::NoVN)
+{
+}
+
+bool ValueNumPair::BothDefined() const
+{
+    return (m_liberal != ValueNumStore::NoVN) && (m_conservative != ValueNumStore::NoVN);
+}
diff --git a/src/jit/valuenum.h b/src/jit/valuenum.h
new file mode 100644
index 0000000000..17dacfbb54
--- /dev/null
+++ b/src/jit/valuenum.h
@@ -0,0 +1,1378 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// Defines the class "ValueNumStore", which maintains value numbers for a compilation.
+
+// Recall that "value numbering" assigns an integer value number to each expression.  The "value
+// number property" is that two expressions with the same value number will evaluate to the same value
+// at runtime.  Expressions with different value numbers may or may not be equivalent.  This property
+// of value numbers has obvious applications in redundancy-elimination optimizations.
+//
+// Since value numbers give us a way of talking about the (immutable) values to which expressions
+// evaluate, they provide a good "handle" to use for attributing properties to values.  For example,
+// we might note that some value number represents some particular integer constant -- which has obvious
+// application to constant propagation.  Or that we know the exact type of some object reference,
+// which might be used in devirtualization.
+//
+// Finally, we will also use value numbers to express control-flow-dependent assertions.  Some test may
+// imply that after the test, something new is known about a value: that an object reference is non-null
+// after a dereference (since control flow continued because no exception was thrown); that an integer value
+// is restricted to some subrange in after a comparison test; etc.
+
+/*****************************************************************************/
+#ifndef _VALUENUM_H_
+#define _VALUENUM_H_
+/*****************************************************************************/
+
+#include "vartype.h"
+// For "GT_COUNT"
+#include "gentree.h"
+// Defines the type ValueNum.
+#include "valuenumtype.h"
+
+// A "ValueNumStore" represents the "universe" of value numbers used in a single
+// compilation.
+
+// All members of the enumeration genTreeOps are also members of VNFunc.
+// (Though some of these may be labeled "illegal").
+enum VNFunc
+{
+    // Implicitly, elements of genTreeOps here.
+    VNF_Boundary = GT_COUNT,
+#define ValueNumFuncDef(nm, arity, commute, knownNonNull, sharedStatic) VNF_##nm,
+#include "valuenumfuncs.h"
+    VNF_COUNT
+};
+
+// Given an "oper" and associated flags with it, transform the oper into a
+// more accurate oper that can be used in evaluation. For example, (GT_ADD, unsigned)
+// transforms to GT_ADD_UN.
+VNFunc GetVNFuncForOper(genTreeOps oper, bool isUnsigned);
+
+// An instance of this struct represents an application of the function symbol
+// "m_func" to the first "m_arity" (<= 4) argument values in "m_args."
+struct VNFuncApp
+{
+    VNFunc   m_func;
+    unsigned m_arity;
+    ValueNum m_args[4];
+
+    bool Equals(const VNFuncApp& funcApp)
+    {
+        if (m_func != funcApp.m_func)
+        {
+            return false;
+        }
+        if (m_arity != funcApp.m_arity)
+        {
+            return false;
+        }
+        for (unsigned i = 0; i < m_arity; i++)
+        {
+            if (m_args[i] != funcApp.m_args[i])
+            {
+                return false;
+            }
+        }
+        return true;
+    }
+};
+
+// A unique prefix character to use when dumping a tree's gtVN in the tree dumps
+// We use this together with string concatenation to put this in printf format strings
+// static const char* const VN_DumpPrefix = "$";
+#define STR_VN "$"
+
+class ValueNumStore
+{
+
+public:
+    // We will reserve "max unsigned" to represent "not a value number", for maps that might start uninitialized.
+    static const ValueNum NoVN = UINT32_MAX;
+    // A second special value, used to indicate that a function evaluation would cause infinite recursion.
+    static const ValueNum RecursiveVN = UINT32_MAX - 1;
+
+    // ==================================================================================================
+    // VNMap - map from something to ValueNum, where something is typically a constant value or a VNFunc
+    //         This class has two purposes - to abstract the implementation and to validate the ValueNums
+    //         being stored or retrieved.
+    template <class fromType, class keyfuncs = LargePrimitiveKeyFuncs<fromType>>
+    class VNMap : public SimplerHashTable<fromType, keyfuncs, ValueNum, JitSimplerHashBehavior>
+    {
+    public:
+        VNMap(IAllocator* alloc) : SimplerHashTable<fromType, keyfuncs, ValueNum, JitSimplerHashBehavior>(alloc)
+        {
+        }
+        ~VNMap()
+        {
+            ~VNMap<fromType, keyfuncs>::SimplerHashTable();
+        }
+
+        bool Set(fromType k, ValueNum val)
+        {
+            assert(val != RecursiveVN);
+            return SimplerHashTable<fromType, keyfuncs, ValueNum, JitSimplerHashBehavior>::Set(k, val);
+        }
+        bool Lookup(fromType k, ValueNum* pVal = nullptr) const
+        {
+            bool result = SimplerHashTable<fromType, keyfuncs, ValueNum, JitSimplerHashBehavior>::Lookup(k, pVal);
+            assert(!result || *pVal != RecursiveVN);
+            return result;
+        }
+    };
+
+private:
+    Compiler* m_pComp;
+
+    // For allocations.  (Other things?)
+    IAllocator* m_alloc;
+
+    // TODO-Cleanup: should transform "attribs" into a struct with bit fields.  That would be simpler...
+
+    enum VNFOpAttrib
+    {
+        VNFOA_IllegalGenTreeOp = 0x1,  // corresponds to a genTreeOps value that is not a legal VN func.
+        VNFOA_Commutative      = 0x2,  // 1 iff the function is commutative.
+        VNFOA_Arity            = 0x4,  // Bits 2..3 encode the arity.
+        VNFOA_AfterArity       = 0x20, // Makes it clear what value the next flag(s) after Arity should have.
+        VNFOA_KnownNonNull     = 0x20, // 1 iff the result is known to be non-null.
+        VNFOA_SharedStatic     = 0x40, // 1 iff this VNF is represent one of the shared static jit helpers
+    };
+
+    static const unsigned VNFOA_ArityShift = 2;
+    static const unsigned VNFOA_ArityBits  = 3;
+    static const unsigned VNFOA_MaxArity   = (1 << VNFOA_ArityBits) - 1; // Max arity we can represent.
+    static const unsigned VNFOA_ArityMask  = VNFOA_AfterArity - VNFOA_Arity;
+
+    // These enum constants are used to encode the cast operation in the lowest bits by VNForCastOper
+    enum VNFCastAttrib
+    {
+        VCA_UnsignedSrc = 0x01,
+
+        VCA_BitCount     = 1,    // the number of reserved bits
+        VCA_ReservedBits = 0x01, // i.e. (VCA_UnsignedSrc)
+    };
+
+    // An array of length GT_COUNT, mapping genTreeOp values to their VNFOpAttrib.
+    static UINT8* s_vnfOpAttribs;
+
+    // Returns "true" iff gtOper is a legal value number function.
+    // (Requires InitValueNumStoreStatics to have been run.)
+    static bool GenTreeOpIsLegalVNFunc(genTreeOps gtOper);
+
+    // Returns "true" iff "vnf" is a commutative (and thus binary) operator.
+    // (Requires InitValueNumStoreStatics to have been run.)
+    static bool VNFuncIsCommutative(VNFunc vnf);
+
+    // Returns "true" iff "vnf" is a comparison (and thus binary) operator.
+    static bool VNFuncIsComparison(VNFunc vnf);
+
+    // Returns "true" iff "vnf" can be evaluated for constant arguments.
+    static bool CanEvalForConstantArgs(VNFunc vnf);
+
+    // return vnf(v0)
+    template <typename T>
+    static T EvalOp(VNFunc vnf, T v0);
+
+    // If vnf(v0, v1) would raise an exception, sets *pExcSet to the singleton set containing the exception, and
+    // returns (T)0. Otherwise, returns vnf(v0, v1).
+    template <typename T>
+    T EvalOp(VNFunc vnf, T v0, T v1, ValueNum* pExcSet);
+
+    template <typename T>
+    static int EvalComparison(VNFunc vnf, T v0, T v1);
+    template <typename T>
+    static int EvalOrderedComparisonFloat(VNFunc vnf, T v0, T v1);
+    // return vnf(v0) or vnf(v0, v1), respectively (must, of course be unary/binary ops, respectively.)
+    // Should only be instantiated for integral types.
+    template <typename T>
+    static T EvalOpIntegral(VNFunc vnf, T v0);
+    template <typename T>
+    T EvalOpIntegral(VNFunc vnf, T v0, T v1, ValueNum* pExcSet);
+
+    // Should only instantiate (in a non-trivial way) for "int" and "INT64".  Returns true iff dividing "v0" by "v1"
+    // would produce integer overflow (an ArithmeticException -- *not* division by zero, which is separate.)
+    template <typename T>
+    static bool IsOverflowIntDiv(T v0, T v1);
+
+    // Should only instantiate (in a non-trivial way) for integral types (signed/unsigned int32/int64).
+    // Returns true iff v is the zero of the appropriate type.
+    template <typename T>
+    static bool IsIntZero(T v);
+
+    // Given an constant value number return its value.
+    int GetConstantInt32(ValueNum argVN);
+    INT64 GetConstantInt64(ValueNum argVN);
+    double GetConstantDouble(ValueNum argVN);
+
+    // Assumes that all the ValueNum arguments of each of these functions have been shown to represent constants.
+    // Assumes that "vnf" is a operator of the appropriate arity (unary for the first, binary for the second).
+    // Assume that "CanEvalForConstantArgs(vnf)" is true.
+    // Returns the result of evaluating the function with those constant arguments.
+    ValueNum EvalFuncForConstantArgs(var_types typ, VNFunc vnf, ValueNum vn0);
+    ValueNum EvalFuncForConstantArgs(var_types typ, VNFunc vnf, ValueNum vn0, ValueNum vn1);
+    ValueNum EvalFuncForConstantFPArgs(var_types typ, VNFunc vnf, ValueNum vn0, ValueNum vn1);
+    ValueNum EvalCastForConstantArgs(var_types typ, VNFunc vnf, ValueNum vn0, ValueNum vn1);
+
+#ifdef DEBUG
+    // This helps test some performance pathologies related to "evaluation" of VNF_MapSelect terms,
+    // especially relating to the heap.  We count the number of applications of such terms we consider,
+    // and if this exceeds a limit, indicated by a COMPlus_ variable, we assert.
+    unsigned m_numMapSels;
+#endif
+
+    // This is the maximum number of MapSelect terms that can be "considered" as part of evaluation of a top-level
+    // MapSelect application.
+    unsigned m_mapSelectBudget;
+
+public:
+    // Initializes any static variables of ValueNumStore.
+    static void InitValueNumStoreStatics();
+
+    // Initialize an empty ValueNumStore.
+    ValueNumStore(Compiler* comp, IAllocator* allocator);
+
+    // Returns "true" iff "vnf" (which may have been created by a cast from an integral value) represents
+    // a legal value number function.
+    // (Requires InitValueNumStoreStatics to have been run.)
+    static bool VNFuncIsLegal(VNFunc vnf)
+    {
+        return unsigned(vnf) > VNF_Boundary || GenTreeOpIsLegalVNFunc(static_cast<genTreeOps>(vnf));
+    }
+
+    // Returns the arity of "vnf".
+    static unsigned VNFuncArity(VNFunc vnf);
+
+    // Requires "gtOper" to be a genTreeOps legally representing a VNFunc, and returns that
+    // VNFunc.
+    // (Requires InitValueNumStoreStatics to have been run.)
+    static VNFunc GenTreeOpToVNFunc(genTreeOps gtOper)
+    {
+        assert(GenTreeOpIsLegalVNFunc(gtOper));
+        return static_cast<VNFunc>(gtOper);
+    }
+
+#ifdef DEBUG
+    static void RunTests(Compiler* comp);
+#endif // DEBUG
+
+    // This block of methods gets value numbers for constants of primitive types.
+
+    ValueNum VNForIntCon(INT32 cnsVal);
+    ValueNum VNForLongCon(INT64 cnsVal);
+    ValueNum VNForFloatCon(float cnsVal);
+    ValueNum VNForDoubleCon(double cnsVal);
+    ValueNum VNForByrefCon(INT64 byrefVal);
+
+#ifdef _TARGET_64BIT_
+    ValueNum VNForPtrSizeIntCon(INT64 cnsVal)
+    {
+        return VNForLongCon(cnsVal);
+    }
+#else
+    ValueNum VNForPtrSizeIntCon(INT32 cnsVal)
+    {
+        return VNForIntCon(cnsVal);
+    }
+#endif
+
+    ValueNum VNForCastOper(var_types castToType, bool srcIsUnsigned = false);
+
+    // We keep handle values in a separate pool, so we don't confuse a handle with an int constant
+    // that happens to be the same...
+    ValueNum VNForHandle(ssize_t cnsVal, unsigned iconFlags);
+
+    // And the single constant for an object reference type.
+    static ValueNum VNForNull()
+    {
+        // We reserve Chunk 0 for "special" VNs.  SRC_Null (== 0) is the VN of "null".
+        return ValueNum(SRC_Null);
+    }
+
+    // The zero map is the map that returns a zero "for the appropriate type" when indexed at any index.
+    static ValueNum VNForZeroMap()
+    {
+        // We reserve Chunk 0 for "special" VNs.  Let SRC_ZeroMap (== 1) be the zero map.
+        return ValueNum(SRC_ZeroMap);
+    }
+
+    // The value number for the special "NotAField" field sequence.
+    static ValueNum VNForNotAField()
+    {
+        // We reserve Chunk 0 for "special" VNs.  Let SRC_NotAField (== 2) be the "not a field seq".
+        return ValueNum(SRC_NotAField);
+    }
+
+    // The ROH map is the map for the "read-only heap".  We assume that this is never mutated, and always
+    // has the same value number.
+    static ValueNum VNForROH()
+    {
+        // We reserve Chunk 0 for "special" VNs.  Let SRC_ReadOnlyHeap (== 3) be the read-only heap.
+        return ValueNum(SRC_ReadOnlyHeap);
+    }
+
+    // A special value number for "void" -- sometimes a type-void thing is an argument to a
+    // GT_LIST, and we want the args to be non-NoVN.
+    static ValueNum VNForVoid()
+    {
+        // We reserve Chunk 0 for "special" VNs.  Let SRC_Void (== 4) be the value for "void".
+        return ValueNum(SRC_Void);
+    }
+    static ValueNumPair VNPForVoid()
+    {
+        return ValueNumPair(VNForVoid(), VNForVoid());
+    }
+
+    // A special value number for the empty set of exceptions.
+    static ValueNum VNForEmptyExcSet()
+    {
+        // We reserve Chunk 0 for "special" VNs.  Let SRC_EmptyExcSet (== 5) be the value for the empty set of
+        // exceptions.
+        return ValueNum(SRC_EmptyExcSet);
+    }
+    static ValueNumPair VNPForEmptyExcSet()
+    {
+        return ValueNumPair(VNForEmptyExcSet(), VNForEmptyExcSet());
+    }
+
+    // Returns the value number for zero of the given "typ".
+    // It has an unreached() for a "typ" that has no zero value, such as TYP_BYREF.
+    ValueNum VNZeroForType(var_types typ);
+
+    // Returns the value number for one of the given "typ".
+    // It returns NoVN for a "typ" that has no one value, such as TYP_REF.
+    ValueNum VNOneForType(var_types typ);
+
+    // Return the value number representing the singleton exception set containing the exception value "x".
+    ValueNum VNExcSetSingleton(ValueNum x);
+    ValueNumPair VNPExcSetSingleton(ValueNumPair x);
+
+    // Returns the VN representing the union of the two exception sets "xs0" and "xs1".
+    // These must be VNForEmtpyExcSet() or applications of VNF_ExcSetCons, obeying
+    // the ascending order invariant (which is preserved in the result.)
+    ValueNum VNExcSetUnion(ValueNum xs0, ValueNum xs1 DEBUGARG(bool topLevel = true));
+
+    ValueNumPair VNPExcSetUnion(ValueNumPair xs0vnp, ValueNumPair xs1vnp);
+
+    // Returns "true" iff "vn" is an application of "VNF_ValWithExc".
+    bool VNHasExc(ValueNum vn)
+    {
+        VNFuncApp funcApp;
+        return GetVNFunc(vn, &funcApp) && funcApp.m_func == VNF_ValWithExc;
+    }
+
+    // Requires that "vn" is *not* a "VNF_ValWithExc" appliation.
+    // If vn "excSet" is not "VNForEmptyExcSet()", return "VNF_ValWithExc(vn, excSet)".  Otherwise,
+    // just return "vn".
+    ValueNum VNWithExc(ValueNum vn, ValueNum excSet);
+
+    ValueNumPair VNPWithExc(ValueNumPair vnp, ValueNumPair excSetVNP);
+
+    // If "vnWx" is a "VNF_ValWithExc(normal, excSet)" application, sets "*pvn" to "normal", and
+    // "*pvnx" to "excSet".  Otherwise, just sets "*pvn" to "normal".
+    void VNUnpackExc(ValueNum vnWx, ValueNum* pvn, ValueNum* pvnx);
+
+    void VNPUnpackExc(ValueNumPair vnWx, ValueNumPair* pvn, ValueNumPair* pvnx);
+
+    // If "vn" is a "VNF_ValWithExc(norm, excSet)" value, returns the "norm" argument; otherwise,
+    // just returns "vn".
+    ValueNum VNNormVal(ValueNum vn);
+    ValueNumPair VNPNormVal(ValueNumPair vn);
+
+    // If "vn" is a "VNF_ValWithExc(norm, excSet)" value, returns the "excSet" argument; otherwise,
+    // just returns "EmptyExcSet()".
+    ValueNum VNExcVal(ValueNum vn);
+    ValueNumPair VNPExcVal(ValueNumPair vn);
+
+    // True "iff" vn is a value known to be non-null.  (For example, the result of an allocation...)
+    bool IsKnownNonNull(ValueNum vn);
+
+    // True "iff" vn is a value returned by a call to a shared static helper.
+    bool IsSharedStatic(ValueNum vn);
+
+    // VN's for functions of other values.
+    // Four overloads, for arities 0, 1, 2, and 3.  If we need other arities, we'll consider it.
+    ValueNum VNForFunc(var_types typ, VNFunc func);
+    ValueNum VNForFunc(var_types typ, VNFunc func, ValueNum opVNwx);
+    // This must not be used for VNF_MapSelect applications; instead use VNForMapSelect, below.
+    ValueNum VNForFunc(var_types typ, VNFunc func, ValueNum op1VNwx, ValueNum op2VNwx);
+    ValueNum VNForFunc(var_types typ, VNFunc func, ValueNum op1VNwx, ValueNum op2VNwx, ValueNum op3VNwx);
+
+    // The following four op VNForFunc is only used for VNF_PtrToArrElem, elemTypeEqVN, arrVN, inxVN, fldSeqVN
+    ValueNum VNForFunc(
+        var_types typ, VNFunc func, ValueNum op1VNwx, ValueNum op2VNwx, ValueNum op3VNwx, ValueNum op4VNwx);
+
+    // This requires a "ValueNumKind" because it will attempt, given "select(phi(m1, ..., mk), ind)", to evaluate
+    // "select(m1, ind)", ..., "select(mk, ind)" to see if they agree.  It needs to know which kind of value number
+    // (liberal/conservative) to read from the SSA def referenced in the phi argument.
+    ValueNum VNForMapSelect(ValueNumKind vnk, var_types typ, ValueNum op1VN, ValueNum op2VN);
+
+    // A method that does the work for VNForMapSelect and may call itself recursively.
+    ValueNum VNForMapSelectWork(
+        ValueNumKind vnk, var_types typ, ValueNum op1VN, ValueNum op2VN, unsigned* pBudget, bool* pUsedRecursiveVN);
+
+    // A specialized version of VNForFunc that is used for VNF_MapStore and provides some logging when verbose is set
+    ValueNum VNForMapStore(var_types typ, ValueNum arg0VN, ValueNum arg1VN, ValueNum arg2VN);
+
+    // These functions parallel the ones above, except that they take liberal/conservative VN pairs
+    // as arguments, and return such a pair (the pair of the function applied to the liberal args, and
+    // the function applied to the conservative args).
+    ValueNumPair VNPairForFunc(var_types typ, VNFunc func)
+    {
+        ValueNumPair res;
+        res.SetBoth(VNForFunc(typ, func));
+        return res;
+    }
+    ValueNumPair VNPairForFunc(var_types typ, VNFunc func, ValueNumPair opVN)
+    {
+        return ValueNumPair(VNForFunc(typ, func, opVN.GetLiberal()), VNForFunc(typ, func, opVN.GetConservative()));
+    }
+    ValueNumPair VNPairForFunc(var_types typ, VNFunc func, ValueNumPair op1VN, ValueNumPair op2VN)
+    {
+        return ValueNumPair(VNForFunc(typ, func, op1VN.GetLiberal(), op2VN.GetLiberal()),
+                            VNForFunc(typ, func, op1VN.GetConservative(), op2VN.GetConservative()));
+    }
+    ValueNumPair VNPairForFunc(var_types typ, VNFunc func, ValueNumPair op1VN, ValueNumPair op2VN, ValueNumPair op3VN)
+    {
+        return ValueNumPair(VNForFunc(typ, func, op1VN.GetLiberal(), op2VN.GetLiberal(), op3VN.GetLiberal()),
+                            VNForFunc(typ, func, op1VN.GetConservative(), op2VN.GetConservative(),
+                                      op3VN.GetConservative()));
+    }
+    ValueNumPair VNPairForFunc(
+        var_types typ, VNFunc func, ValueNumPair op1VN, ValueNumPair op2VN, ValueNumPair op3VN, ValueNumPair op4VN)
+    {
+        return ValueNumPair(VNForFunc(typ, func, op1VN.GetLiberal(), op2VN.GetLiberal(), op3VN.GetLiberal(),
+                                      op4VN.GetLiberal()),
+                            VNForFunc(typ, func, op1VN.GetConservative(), op2VN.GetConservative(),
+                                      op3VN.GetConservative(), op4VN.GetConservative()));
+    }
+
+    // Get a new, unique value number for an expression that we're not equating to some function,
+    // which is the value of a tree in the given block.
+    ValueNum VNForExpr(BasicBlock *block, var_types typ = TYP_UNKNOWN);
+
+// This controls extra tracing of the "evaluation" of "VNF_MapSelect" functions.
+#define FEATURE_VN_TRACE_APPLY_SELECTORS 1
+
+    // Return the value number corresponding to constructing "MapSelect(map, f0)", where "f0" is the
+    // (value number of) the first field in "fieldSeq".  (The type of this application will be the type of "f0".)
+    // If there are no remaining fields in "fieldSeq", return that value number; otherwise, return VNApplySelectors
+    // applied to that value number and the remainder of "fieldSeq". When the 'fieldSeq' specifies a TYP_STRUCT
+    // then the size of the struct is returned by 'wbFinalStructSize' (when it is non-null)
+    ValueNum VNApplySelectors(ValueNumKind  vnk,
+                              ValueNum      map,
+                              FieldSeqNode* fieldSeq,
+                              size_t*       wbFinalStructSize = nullptr);
+
+    // Used after VNApplySelectors has determined that "selectedVN" is contained in a Map using VNForMapSelect
+    // It determines whether the 'selectedVN' is of an appropriate type to be read using and indirection of 'indType'
+    // If it is appropriate type then 'selectedVN' is returned, otherwise it may insert a cast to indType
+    // or return a unique value number for an incompatible indType.
+    ValueNum VNApplySelectorsTypeCheck(ValueNum selectedVN, var_types indType, size_t structSize);
+
+    // Assumes that "map" represents a map that is addressable by the fields in "fieldSeq", to get
+    // to a value of the type of "rhs".  Returns an expression for the RHS of an assignment, in the given "block",
+    // to a location containing value "map" that will change the field addressed by "fieldSeq" to "rhs", leaving
+    // all other indices in "map" the same.
+    ValueNum VNApplySelectorsAssign(
+        ValueNumKind vnk, ValueNum map, FieldSeqNode* fieldSeq, ValueNum rhs, var_types indType, BasicBlock* block);
+
+    // Used after VNApplySelectorsAssign has determined that "elem" is to be writen into a Map using VNForMapStore
+    // It determines whether the 'elem' is of an appropriate type to be writen using using an indirection of 'indType'
+    // It may insert a cast to indType or return a unique value number for an incompatible indType.
+    ValueNum VNApplySelectorsAssignTypeCoerce(ValueNum elem, var_types indType, BasicBlock* block);
+
+    ValueNumPair VNPairApplySelectors(ValueNumPair map, FieldSeqNode* fieldSeq, var_types indType);
+
+    ValueNumPair VNPairApplySelectorsAssign(ValueNumPair  map,
+                                            FieldSeqNode* fieldSeq,
+                                            ValueNumPair  rhs,
+                                            var_types     indType,
+                                            BasicBlock*   block)
+    {
+        return ValueNumPair(VNApplySelectorsAssign(VNK_Liberal, map.GetLiberal(), fieldSeq, rhs.GetLiberal(), indType, block),
+                            VNApplySelectorsAssign(VNK_Conservative, map.GetConservative(), fieldSeq,
+                                                   rhs.GetConservative(), indType, block));
+    }
+
+    // Compute the normal ValueNumber for a cast with no exceptions
+    ValueNum VNForCast(ValueNum srcVN, var_types castToType, var_types castFromType, bool srcIsUnsigned = false);
+
+    // Compute the ValueNumberPair for a cast
+    ValueNumPair VNPairForCast(ValueNumPair srcVNPair,
+                               var_types    castToType,
+                               var_types    castFromType,
+                               bool         srcIsUnsigned    = false,
+                               bool         hasOverflowCheck = false);
+
+    // PtrToLoc values need to express a field sequence as one of their arguments.  VN for null represents
+    // empty sequence, otherwise, "FieldSeq(VN(FieldHandle), restOfSeq)".
+    ValueNum VNForFieldSeq(FieldSeqNode* fieldSeq);
+
+    // Requires that "vn" represents a field sequence, that is, is the result of a call to VNForFieldSeq.
+    // Returns the FieldSequence it represents.
+    FieldSeqNode* FieldSeqVNToFieldSeq(ValueNum vn);
+
+    // Both argument must represent field sequences; returns the value number representing the
+    // concatenation "fsVN1 || fsVN2".
+    ValueNum FieldSeqVNAppend(ValueNum fsVN1, ValueNum fsVN2);
+
+    // Requires "lclVarVN" be a value number for a GT_LCL_VAR pointer tree.
+    // Requires "fieldSeqVN" be a field sequence value number.
+    // Requires "typ" to be a TYP_REF/TYP_BYREF used for VNF_PtrToLoc.
+    // When "fieldSeqVN" is VNForNotAField, a unique VN is generated using m_uPtrToLocNotAFieldCount.
+    ValueNum VNForPtrToLoc(var_types typ, ValueNum lclVarVN, ValueNum fieldSeqVN);
+
+    // If "opA" has a PtrToLoc, PtrToArrElem, or PtrToStatic application as its value numbers, and "opB" is an integer
+    // with a "fieldSeq", returns the VN for the pointer form extended with the field sequence; or else NoVN.
+    ValueNum ExtendPtrVN(GenTreePtr opA, GenTreePtr opB);
+    // If "opA" has a PtrToLoc, PtrToArrElem, or PtrToStatic application as its value numbers, returns the VN for the
+    // pointer form extended with "fieldSeq"; or else NoVN.
+    ValueNum ExtendPtrVN(GenTreePtr opA, FieldSeqNode* fieldSeq);
+
+    // Queries on value numbers.
+    // All queries taking value numbers require that those value numbers are valid, that is, that
+    // they have been returned by previous "VNFor..." operations.  They can assert false if this is
+    // not true.
+
+    // Returns TYP_UNKNOWN if the given value number has not been given a type.
+    var_types TypeOfVN(ValueNum vn);
+
+    // Returns MAX_LOOP_NUM if the given value number's loop nest is unknown or ill-defined.
+    BasicBlock::loopNumber LoopOfVN(ValueNum vn);
+
+    // Returns true iff the VN represents a (non-handle) constant.
+    bool IsVNConstant(ValueNum vn);
+
+    // Returns true iff the VN represents an integeral constant.
+    bool IsVNInt32Constant(ValueNum vn);
+
+    struct ArrLenArithBoundInfo
+    {
+        // (vnArr.len - 1) > vnOp
+        // (vnArr.len arrOper arrOp) cmpOper cmpOp
+        ValueNum vnArray;
+        unsigned arrOper;
+        ValueNum arrOp;
+        unsigned cmpOper;
+        ValueNum cmpOp;
+        ArrLenArithBoundInfo() : vnArray(NoVN), arrOper(GT_NONE), arrOp(NoVN), cmpOper(GT_NONE), cmpOp(NoVN)
+        {
+        }
+#ifdef DEBUG
+        void dump(ValueNumStore* vnStore)
+        {
+            vnStore->vnDump(vnStore->m_pComp, cmpOp);
+            printf(" ");
+            printf(vnStore->VNFuncName((VNFunc)cmpOper));
+            printf(" ");
+            vnStore->vnDump(vnStore->m_pComp, vnArray);
+            if (arrOper != GT_NONE)
+            {
+                printf(vnStore->VNFuncName((VNFunc)arrOper));
+                vnStore->vnDump(vnStore->m_pComp, arrOp);
+            }
+        }
+#endif
+    };
+
+    struct ConstantBoundInfo
+    {
+        // 100 > vnOp
+        int      constVal;
+        unsigned cmpOper;
+        ValueNum cmpOpVN;
+
+        ConstantBoundInfo() : constVal(0), cmpOper(GT_NONE), cmpOpVN(NoVN)
+        {
+        }
+
+#ifdef DEBUG
+        void dump(ValueNumStore* vnStore)
+        {
+            vnStore->vnDump(vnStore->m_pComp, cmpOpVN);
+            printf(" ");
+            printf(vnStore->VNFuncName((VNFunc)cmpOper));
+            printf(" ");
+            printf("%d", constVal);
+        }
+#endif
+    };
+
+    // Check if "vn" is "new [] (type handle, size)"
+    bool IsVNNewArr(ValueNum vn, VNFuncApp* funcApp);
+
+    // Check if "vn" IsVNNewArr and return <= 0 if arr size cannot be determined, else array size.
+    int GetNewArrSize(ValueNum vn);
+
+    // Check if "vn" is "a.len"
+    bool IsVNArrLen(ValueNum vn);
+
+    // If "vn" is VN(a.len) then return VN(a); NoVN if VN(a) can't be determined.
+    ValueNum GetArrForLenVn(ValueNum vn);
+
+    // Return true with any Relop except for == and !=  and one operand has to be a 32-bit integer constant.
+    bool IsVNConstantBound(ValueNum vn);
+
+    // If "vn" is constant bound, then populate the "info" fields for constVal, cmpOp, cmpOper.
+    void GetConstantBoundInfo(ValueNum vn, ConstantBoundInfo* info);
+
+    // If "vn" is of the form "var < a.len" or "a.len <= var" return true.
+    bool IsVNArrLenBound(ValueNum vn);
+
+    // If "vn" is arr len bound, then populate the "info" fields for the arrVn, cmpOp, cmpOper.
+    void GetArrLenBoundInfo(ValueNum vn, ArrLenArithBoundInfo* info);
+
+    // If "vn" is of the form "a.len +/- var" return true.
+    bool IsVNArrLenArith(ValueNum vn);
+
+    // If "vn" is arr len arith, then populate the "info" fields for arrOper, arrVn, arrOp.
+    void GetArrLenArithInfo(ValueNum vn, ArrLenArithBoundInfo* info);
+
+    // If "vn" is of the form "var < a.len +/- k" return true.
+    bool IsVNArrLenArithBound(ValueNum vn);
+
+    // If "vn" is arr len arith bound, then populate the "info" fields for cmpOp, cmpOper.
+    void GetArrLenArithBoundInfo(ValueNum vn, ArrLenArithBoundInfo* info);
+
+    // Returns the flags on the current handle. GTF_ICON_SCOPE_HDL for example.
+    unsigned GetHandleFlags(ValueNum vn);
+
+    // Returns true iff the VN represents a handle constant.
+    bool IsVNHandle(ValueNum vn);
+
+    // Convert a vartype_t to the value number's storage type for that vartype_t.
+    // For example, ValueNum of type TYP_LONG are stored in a map of INT64 variables.
+    // Lang is the language (C++) type for the corresponding vartype_t.
+    template <int N>
+    struct VarTypConv
+    {
+    };
+
+private:
+    struct Chunk;
+
+    template <typename T>
+    static T CoerceTypRefToT(Chunk* c, unsigned offset);
+
+    // Get the actual value and coerce the actual type c->m_typ to the wanted type T.
+    template <typename T>
+    FORCEINLINE T SafeGetConstantValue(Chunk* c, unsigned offset);
+
+    template <typename T>
+    T ConstantValueInternal(ValueNum vn DEBUGARG(bool coerce))
+    {
+        Chunk* c = m_chunks.GetNoExpand(GetChunkNum(vn));
+        assert(c->m_attribs == CEA_Const || c->m_attribs == CEA_Handle);
+
+        unsigned offset = ChunkOffset(vn);
+
+        switch (c->m_typ)
+        {
+            case TYP_REF:
+                assert(0 <= offset && offset <= 1); // Null or exception.
+                __fallthrough;
+
+            case TYP_BYREF:
+#ifndef PLATFORM_UNIX
+                assert(&typeid(T) == &typeid(size_t)); // We represent ref/byref constants as size_t's.
+#endif                                                 // PLATFORM_UNIX
+                __fallthrough;
+
+            case TYP_INT:
+            case TYP_LONG:
+            case TYP_FLOAT:
+            case TYP_DOUBLE:
+                if (c->m_attribs == CEA_Handle)
+                {
+                    C_ASSERT(offsetof(VNHandle, m_cnsVal) == 0);
+                    return (T) reinterpret_cast<VNHandle*>(c->m_defs)[offset].m_cnsVal;
+                }
+#ifdef DEBUG
+                if (!coerce)
+                {
+                    T val1 = reinterpret_cast<T*>(c->m_defs)[offset];
+                    T val2 = SafeGetConstantValue<T>(c, offset);
+
+                    // Detect if there is a mismatch between the VN storage type and explicitly
+                    // passed-in type T.
+                    bool mismatch = false;
+                    if (varTypeIsFloating(c->m_typ))
+                    {
+                        mismatch = (memcmp(&val1, &val2, sizeof(val1)) != 0);
+                    }
+                    else
+                    {
+                        mismatch = (val1 != val2);
+                    }
+
+                    if (mismatch)
+                    {
+                        assert(
+                            !"Called ConstantValue<T>(vn), but type(T) != type(vn); Use CoercedConstantValue instead.");
+                    }
+                }
+#endif
+                return SafeGetConstantValue<T>(c, offset);
+
+            default:
+                assert(false); // We do not record constants of this typ.
+                return (T)0;
+        }
+    }
+
+public:
+    // Requires that "vn" is a constant, and that its type is compatible with the explicitly passed
+    // type "T". Also, note that "T" has to have an accurate storage size of the TypeOfVN(vn).
+    template <typename T>
+    T ConstantValue(ValueNum vn)
+    {
+        return ConstantValueInternal<T>(vn DEBUGARG(false));
+    }
+
+    // Requires that "vn" is a constant, and that its type can be coerced to the explicitly passed
+    // type "T".
+    template <typename T>
+    T CoercedConstantValue(ValueNum vn)
+    {
+        return ConstantValueInternal<T>(vn DEBUGARG(true));
+    }
+
+    // Given a value number "vn", go through the list of VNs that are handles
+    // to find if it is present, if so, return "true", else "false."
+    bool IsHandle(ValueNum vn);
+
+    // Requires "mthFunc" to be an intrinsic math function (one of the allowable values for the "gtMath" field
+    // of a GenTreeMath node).  For unary ops, return the value number for the application of this function to
+    // "arg0VN". For binary ops, return the value number for the application of this function to "arg0VN" and
+    // "arg1VN".
+
+    ValueNum EvalMathFuncUnary(var_types typ, CorInfoIntrinsics mthFunc, ValueNum arg0VN);
+
+    ValueNum EvalMathFuncBinary(var_types typ, CorInfoIntrinsics mthFunc, ValueNum arg0VN, ValueNum arg1VN);
+
+    ValueNumPair EvalMathFuncUnary(var_types typ, CorInfoIntrinsics mthFunc, ValueNumPair arg0VNP)
+    {
+        return ValueNumPair(EvalMathFuncUnary(typ, mthFunc, arg0VNP.GetLiberal()),
+                            EvalMathFuncUnary(typ, mthFunc, arg0VNP.GetConservative()));
+    }
+
+    ValueNumPair EvalMathFuncBinary(var_types         typ,
+                                    CorInfoIntrinsics mthFunc,
+                                    ValueNumPair      arg0VNP,
+                                    ValueNumPair      arg1VNP)
+    {
+        return ValueNumPair(EvalMathFuncBinary(typ, mthFunc, arg0VNP.GetLiberal(), arg1VNP.GetLiberal()),
+                            EvalMathFuncBinary(typ, mthFunc, arg0VNP.GetConservative(), arg1VNP.GetConservative()));
+    }
+
+    // Returns "true" iff "vn" represents a function application.
+    bool IsVNFunc(ValueNum vn);
+
+    // If "vn" represents a function application, returns "true" and set "*funcApp" to
+    // the function application it represents; otherwise, return "false."
+    bool GetVNFunc(ValueNum vn, VNFuncApp* funcApp);
+
+    // Requires that "vn" represents a "heap address" the sum of a "TYP_REF" value and some integer
+    // value.  Returns the TYP_REF value.
+    ValueNum VNForRefInAddr(ValueNum vn);
+
+    // Returns "true" iff "vn" is a valid value number -- one that has been previously returned.
+    bool VNIsValid(ValueNum vn);
+
+#ifdef DEBUG
+// This controls whether we recursively call vnDump on function arguments.
+#define FEATURE_VN_DUMP_FUNC_ARGS 0
+
+    // Prints, to standard out, a representation of "vn".
+    void vnDump(Compiler* comp, ValueNum vn, bool isPtr = false);
+
+    // Requires "fieldSeq" to be a field sequence VNFuncApp.
+    // Prints a representation (comma-separated list of field names) on standard out.
+    void vnDumpFieldSeq(Compiler* comp, VNFuncApp* fieldSeq, bool isHead);
+
+    // Requires "mapSelect" to be a map select VNFuncApp.
+    // Prints a representation of a MapSelect operation on standard out.
+    void vnDumpMapSelect(Compiler* comp, VNFuncApp* mapSelect);
+
+    // Requires "mapStore" to be a map store VNFuncApp.
+    // Prints a representation of a MapStore operation on standard out.
+    void vnDumpMapStore(Compiler* comp, VNFuncApp* mapStore);
+
+    // Returns the string name of "vnf".
+    static const char* VNFuncName(VNFunc vnf);
+    // Used in the implementation of the above.
+    static const char* VNFuncNameArr[];
+
+    // Returns the string name of "vn" when it is a reserved value number, nullptr otherwise
+    static const char* reservedName(ValueNum vn);
+
+#endif // DEBUG
+
+    // Returns true if "vn" is a reserved value number
+    static bool isReservedVN(ValueNum);
+
+#define VALUENUM_SUPPORT_MERGE 0
+#if VALUENUM_SUPPORT_MERGE
+    // If we're going to support the Merge operation, and do it right, we really need to use an entire
+    // egraph data structure, so that we can do congruence closure, and discover congruences implied
+    // by the eq-class merge.
+
+    // It may be that we provisionally give two expressions distinct value numbers, then later discover
+    // that the values of the expressions are provably equal.  We allow the two value numbers to be
+    // "merged" -- after the merge, they represent the same abstract value.
+    void MergeVNs(ValueNum vn1, ValueNum vn2);
+#endif
+
+private:
+    // We will allocate value numbers in "chunks".  Each chunk will have the same type and "constness".
+    static const unsigned LogChunkSize    = 6;
+    static const unsigned ChunkSize       = 1 << LogChunkSize;
+    static const unsigned ChunkOffsetMask = ChunkSize - 1;
+
+    // A "ChunkNum" is a zero-based index naming a chunk in the Store, or else the special "NoChunk" value.
+    typedef UINT32        ChunkNum;
+    static const ChunkNum NoChunk = UINT32_MAX;
+
+    // Returns the ChunkNum of the Chunk that holds "vn" (which is required to be a valid
+    // value number, i.e., one returned by some VN-producing method of this class).
+    static ChunkNum GetChunkNum(ValueNum vn)
+    {
+        return vn >> LogChunkSize;
+    }
+
+    // Returns the offset of the given "vn" within its chunk.
+    static unsigned ChunkOffset(ValueNum vn)
+    {
+        return vn & ChunkOffsetMask;
+    }
+
+    // The base VN of the next chunk to be allocated.  Should always be a multiple of ChunkSize.
+    ValueNum m_nextChunkBase;
+
+    DECLARE_TYPED_ENUM(ChunkExtraAttribs, BYTE)
+    {
+        CEA_None,       // No extra attributes.
+            CEA_Const,  // This chunk contains constant values.
+            CEA_Handle, // This chunk contains handle constants.
+            CEA_Func0,  // Represents functions of arity 0.
+            CEA_Func1,  // ...arity 1.
+            CEA_Func2,  // ...arity 2.
+            CEA_Func3,  // ...arity 3.
+            CEA_Func4,  // ...arity 4.
+            CEA_Count
+    }
+    END_DECLARE_TYPED_ENUM(ChunkExtraAttribs, BYTE);
+
+    // A "Chunk" holds "ChunkSize" value numbers, starting at "m_baseVN".  All of these share the same
+    // "m_typ" and "m_attribs".  These properties determine the interpretation of "m_defs", as discussed below.
+    struct Chunk
+    {
+        // If "m_defs" is non-null, it is an array of size ChunkSize, whose element type is determined by the other
+        // members. The "m_numUsed" field indicates the number of elements of "m_defs" that are already consumed (the
+        // next one to allocate).
+        void*    m_defs;
+        unsigned m_numUsed;
+
+        // The value number of the first VN in the chunk.
+        ValueNum m_baseVN;
+
+        // The common attributes of this chunk.
+        var_types              m_typ;
+        ChunkExtraAttribs      m_attribs;
+        BasicBlock::loopNumber m_loopNum;
+
+        // Initialize a chunk, starting at "*baseVN", for the given "typ", "attribs", and "loopNum" (using "alloc" for allocations).
+        // (Increments "*baseVN" by ChunkSize.)
+        Chunk(IAllocator* alloc, ValueNum* baseVN, var_types typ, ChunkExtraAttribs attribs, BasicBlock::loopNumber loopNum);
+
+        // Requires that "m_numUsed < ChunkSize."  Returns the offset of the allocated VN within the chunk; the
+        // actual VN is this added to the "m_baseVN" of the chunk.
+        unsigned AllocVN()
+        {
+            assert(m_numUsed < ChunkSize);
+            return m_numUsed++;
+        }
+
+        template <int N>
+        struct Alloc
+        {
+            typedef typename ValueNumStore::VarTypConv<N>::Type Type;
+        };
+    };
+
+    struct VNHandle : public KeyFuncsDefEquals<VNHandle>
+    {
+        ssize_t  m_cnsVal;
+        unsigned m_flags;
+        // Don't use a constructor to use the default copy constructor for hashtable rehash.
+        static void Initialize(VNHandle* handle, ssize_t m_cnsVal, unsigned m_flags)
+        {
+            handle->m_cnsVal = m_cnsVal;
+            handle->m_flags  = m_flags;
+        }
+        bool operator==(const VNHandle& y) const
+        {
+            return m_cnsVal == y.m_cnsVal && m_flags == y.m_flags;
+        }
+        static unsigned GetHashCode(const VNHandle& val)
+        {
+            return static_cast<unsigned>(val.m_cnsVal);
+        }
+    };
+
+    struct VNDefFunc0Arg
+    {
+        VNFunc m_func;
+        VNDefFunc0Arg(VNFunc func) : m_func(func)
+        {
+        }
+
+        VNDefFunc0Arg() : m_func(VNF_COUNT)
+        {
+        }
+
+        bool operator==(const VNDefFunc0Arg& y) const
+        {
+            return m_func == y.m_func;
+        }
+    };
+
+    struct VNDefFunc1Arg : public VNDefFunc0Arg
+    {
+        ValueNum m_arg0;
+        VNDefFunc1Arg(VNFunc func, ValueNum arg0) : VNDefFunc0Arg(func), m_arg0(arg0)
+        {
+        }
+
+        VNDefFunc1Arg() : VNDefFunc0Arg(), m_arg0(ValueNumStore::NoVN)
+        {
+        }
+
+        bool operator==(const VNDefFunc1Arg& y) const
+        {
+            return VNDefFunc0Arg::operator==(y) && m_arg0 == y.m_arg0;
+        }
+    };
+
+    struct VNDefFunc2Arg : public VNDefFunc1Arg
+    {
+        ValueNum m_arg1;
+        VNDefFunc2Arg(VNFunc func, ValueNum arg0, ValueNum arg1) : VNDefFunc1Arg(func, arg0), m_arg1(arg1)
+        {
+        }
+
+        VNDefFunc2Arg() : m_arg1(ValueNumStore::NoVN)
+        {
+        }
+
+        bool operator==(const VNDefFunc2Arg& y) const
+        {
+            return VNDefFunc1Arg::operator==(y) && m_arg1 == y.m_arg1;
+        }
+    };
+
+    struct VNDefFunc3Arg : public VNDefFunc2Arg
+    {
+        ValueNum m_arg2;
+        VNDefFunc3Arg(VNFunc func, ValueNum arg0, ValueNum arg1, ValueNum arg2)
+            : VNDefFunc2Arg(func, arg0, arg1), m_arg2(arg2)
+        {
+        }
+        VNDefFunc3Arg() : m_arg2(ValueNumStore::NoVN)
+        {
+        }
+
+        bool operator==(const VNDefFunc3Arg& y) const
+        {
+            return VNDefFunc2Arg::operator==(y) && m_arg2 == y.m_arg2;
+        }
+    };
+
+    struct VNDefFunc4Arg : public VNDefFunc3Arg
+    {
+        ValueNum m_arg3;
+        VNDefFunc4Arg(VNFunc func, ValueNum arg0, ValueNum arg1, ValueNum arg2, ValueNum arg3)
+            : VNDefFunc3Arg(func, arg0, arg1, arg2), m_arg3(arg3)
+        {
+        }
+        VNDefFunc4Arg() : m_arg3(ValueNumStore::NoVN)
+        {
+        }
+
+        bool operator==(const VNDefFunc4Arg& y) const
+        {
+            return VNDefFunc3Arg::operator==(y) && m_arg3 == y.m_arg3;
+        }
+    };
+
+    // When we evaluate "select(m, i)", if "m" is a the value of a phi definition, we look at
+    // all the values of the phi args, and see if doing the "select" on each of them yields identical
+    // results.  If so, that is the result of the entire "select" form.  We have to be careful, however,
+    // because phis may be recursive in the presence of loop structures -- the VN for the phi may be (or be
+    // part of the definition of) the VN's of some of the arguments.  But there will be at least one
+    // argument that does *not* depend on the outer phi VN -- after all, we had to get into the loop somehow.
+    // So we have to be careful about breaking infinite recursion.  We can ignore "recursive" results -- if all the
+    // non-recursive results are the same, the recursion indicates that the loop structure didn't alter the result.
+    // This stack represents the set of outer phis such that select(phi, ind) is being evaluated.
+    ExpandArrayStack<VNDefFunc2Arg> m_fixedPointMapSels;
+
+#ifdef DEBUG
+    // Returns "true" iff "m_fixedPointMapSels" is non-empty, and it's top element is
+    // "select(map, index)".
+    bool FixedPointMapSelsTopHasValue(ValueNum map, ValueNum index);
+#endif
+
+    // Returns true if "sel(map, ind)" is a member of "m_fixedPointMapSels".
+    bool SelectIsBeingEvaluatedRecursively(ValueNum map, ValueNum ind);
+
+    // This is a map from "chunk number" to the attributes of the chunk.
+    ExpandArrayStack<Chunk*> m_chunks;
+
+    // These entries indicate the current allocation chunk, if any, for each valid combination of <var_types,
+    // ChunkExtraAttribute, loopNumber>.  Valid combinations require attribs==CEA_None or loopNum==MAX_LOOP_NUM.
+    // If the value is NoChunk, it indicates that there is no current allocation chunk for that pair, otherwise
+    // it is the index in "m_chunks" of a chunk with the given attributes, in which the next allocation should
+    // be attempted.
+    ChunkNum m_curAllocChunk[TYP_COUNT][CEA_Count + MAX_LOOP_NUM + 1];
+
+    // Returns a (pointer to a) chunk in which a new value number may be allocated.
+    Chunk* GetAllocChunk(var_types typ, ChunkExtraAttribs attribs, BasicBlock::loopNumber loopNum = MAX_LOOP_NUM);
+
+    // First, we need mechanisms for mapping from constants to value numbers.
+    // For small integers, we'll use an array.
+    static const int      SmallIntConstMin = -1;
+    static const int      SmallIntConstMax = 10;
+    static const unsigned SmallIntConstNum = SmallIntConstMax - SmallIntConstMin + 1;
+    static bool IsSmallIntConst(int i)
+    {
+        return SmallIntConstMin <= i && i <= SmallIntConstMax;
+    }
+    ValueNum m_VNsForSmallIntConsts[SmallIntConstNum];
+
+    struct ValueNumList
+    {
+        ValueNum      vn;
+        ValueNumList* next;
+        ValueNumList(const ValueNum& v, ValueNumList* n = nullptr) : vn(v), next(n)
+        {
+        }
+    };
+
+    // Keeps track of value numbers that are integer constants and also handles (GTG_ICON_HDL_MASK.)
+    ValueNumList* m_intConHandles;
+
+    typedef VNMap<INT32> IntToValueNumMap;
+    IntToValueNumMap*    m_intCnsMap;
+    IntToValueNumMap*    GetIntCnsMap()
+    {
+        if (m_intCnsMap == nullptr)
+        {
+            m_intCnsMap = new (m_alloc) IntToValueNumMap(m_alloc);
+        }
+        return m_intCnsMap;
+    }
+
+    ValueNum GetVNForIntCon(INT32 cnsVal)
+    {
+        ValueNum res;
+        if (GetIntCnsMap()->Lookup(cnsVal, &res))
+        {
+            return res;
+        }
+        else
+        {
+            Chunk*   c                                             = GetAllocChunk(TYP_INT, CEA_Const);
+            unsigned offsetWithinChunk                             = c->AllocVN();
+            res                                                    = c->m_baseVN + offsetWithinChunk;
+            reinterpret_cast<INT32*>(c->m_defs)[offsetWithinChunk] = cnsVal;
+            GetIntCnsMap()->Set(cnsVal, res);
+            return res;
+        }
+    }
+
+    typedef VNMap<INT64> LongToValueNumMap;
+    LongToValueNumMap*   m_longCnsMap;
+    LongToValueNumMap*   GetLongCnsMap()
+    {
+        if (m_longCnsMap == nullptr)
+        {
+            m_longCnsMap = new (m_alloc) LongToValueNumMap(m_alloc);
+        }
+        return m_longCnsMap;
+    }
+
+    typedef VNMap<VNHandle, VNHandle> HandleToValueNumMap;
+    HandleToValueNumMap* m_handleMap;
+    HandleToValueNumMap* GetHandleMap()
+    {
+        if (m_handleMap == nullptr)
+        {
+            m_handleMap = new (m_alloc) HandleToValueNumMap(m_alloc);
+        }
+        return m_handleMap;
+    }
+
+    struct LargePrimitiveKeyFuncsFloat : public LargePrimitiveKeyFuncs<float>
+    {
+        static bool Equals(float x, float y)
+        {
+            return *(unsigned*)&x == *(unsigned*)&y;
+        }
+    };
+
+    typedef VNMap<float, LargePrimitiveKeyFuncsFloat> FloatToValueNumMap;
+    FloatToValueNumMap* m_floatCnsMap;
+    FloatToValueNumMap* GetFloatCnsMap()
+    {
+        if (m_floatCnsMap == nullptr)
+        {
+            m_floatCnsMap = new (m_alloc) FloatToValueNumMap(m_alloc);
+        }
+        return m_floatCnsMap;
+    }
+
+    // In the JIT we need to distinguish -0.0 and 0.0 for optimizations.
+    struct LargePrimitiveKeyFuncsDouble : public LargePrimitiveKeyFuncs<double>
+    {
+        static bool Equals(double x, double y)
+        {
+            return *(__int64*)&x == *(__int64*)&y;
+        }
+    };
+
+    typedef VNMap<double, LargePrimitiveKeyFuncsDouble> DoubleToValueNumMap;
+    DoubleToValueNumMap* m_doubleCnsMap;
+    DoubleToValueNumMap* GetDoubleCnsMap()
+    {
+        if (m_doubleCnsMap == nullptr)
+        {
+            m_doubleCnsMap = new (m_alloc) DoubleToValueNumMap(m_alloc);
+        }
+        return m_doubleCnsMap;
+    }
+
+    LongToValueNumMap* m_byrefCnsMap;
+    LongToValueNumMap* GetByrefCnsMap()
+    {
+        if (m_byrefCnsMap == nullptr)
+        {
+            m_byrefCnsMap = new (m_alloc) LongToValueNumMap(m_alloc);
+        }
+        return m_byrefCnsMap;
+    }
+
+    struct VNDefFunc0ArgKeyFuncs : public KeyFuncsDefEquals<VNDefFunc1Arg>
+    {
+        static unsigned GetHashCode(VNDefFunc1Arg val)
+        {
+            return (val.m_func << 24) + val.m_arg0;
+        }
+    };
+    typedef VNMap<VNFunc> VNFunc0ToValueNumMap;
+    VNFunc0ToValueNumMap* m_VNFunc0Map;
+    VNFunc0ToValueNumMap* GetVNFunc0Map()
+    {
+        if (m_VNFunc0Map == nullptr)
+        {
+            m_VNFunc0Map = new (m_alloc) VNFunc0ToValueNumMap(m_alloc);
+        }
+        return m_VNFunc0Map;
+    }
+
+    struct VNDefFunc1ArgKeyFuncs : public KeyFuncsDefEquals<VNDefFunc1Arg>
+    {
+        static unsigned GetHashCode(VNDefFunc1Arg val)
+        {
+            return (val.m_func << 24) + val.m_arg0;
+        }
+    };
+    typedef VNMap<VNDefFunc1Arg, VNDefFunc1ArgKeyFuncs> VNFunc1ToValueNumMap;
+    VNFunc1ToValueNumMap* m_VNFunc1Map;
+    VNFunc1ToValueNumMap* GetVNFunc1Map()
+    {
+        if (m_VNFunc1Map == nullptr)
+        {
+            m_VNFunc1Map = new (m_alloc) VNFunc1ToValueNumMap(m_alloc);
+        }
+        return m_VNFunc1Map;
+    }
+
+    struct VNDefFunc2ArgKeyFuncs : public KeyFuncsDefEquals<VNDefFunc2Arg>
+    {
+        static unsigned GetHashCode(VNDefFunc2Arg val)
+        {
+            return (val.m_func << 24) + (val.m_arg0 << 8) + val.m_arg1;
+        }
+    };
+    typedef VNMap<VNDefFunc2Arg, VNDefFunc2ArgKeyFuncs> VNFunc2ToValueNumMap;
+    VNFunc2ToValueNumMap* m_VNFunc2Map;
+    VNFunc2ToValueNumMap* GetVNFunc2Map()
+    {
+        if (m_VNFunc2Map == nullptr)
+        {
+            m_VNFunc2Map = new (m_alloc) VNFunc2ToValueNumMap(m_alloc);
+        }
+        return m_VNFunc2Map;
+    }
+
+    struct VNDefFunc3ArgKeyFuncs : public KeyFuncsDefEquals<VNDefFunc3Arg>
+    {
+        static unsigned GetHashCode(VNDefFunc3Arg val)
+        {
+            return (val.m_func << 24) + (val.m_arg0 << 16) + (val.m_arg1 << 8) + val.m_arg2;
+        }
+    };
+    typedef VNMap<VNDefFunc3Arg, VNDefFunc3ArgKeyFuncs> VNFunc3ToValueNumMap;
+    VNFunc3ToValueNumMap* m_VNFunc3Map;
+    VNFunc3ToValueNumMap* GetVNFunc3Map()
+    {
+        if (m_VNFunc3Map == nullptr)
+        {
+            m_VNFunc3Map = new (m_alloc) VNFunc3ToValueNumMap(m_alloc);
+        }
+        return m_VNFunc3Map;
+    }
+
+    struct VNDefFunc4ArgKeyFuncs : public KeyFuncsDefEquals<VNDefFunc4Arg>
+    {
+        static unsigned GetHashCode(VNDefFunc4Arg val)
+        {
+            return (val.m_func << 24) + (val.m_arg0 << 16) + (val.m_arg1 << 8) + val.m_arg2 + (val.m_arg3 << 12);
+        }
+    };
+    typedef VNMap<VNDefFunc4Arg, VNDefFunc4ArgKeyFuncs> VNFunc4ToValueNumMap;
+    VNFunc4ToValueNumMap* m_VNFunc4Map;
+    VNFunc4ToValueNumMap* GetVNFunc4Map()
+    {
+        if (m_VNFunc4Map == nullptr)
+        {
+            m_VNFunc4Map = new (m_alloc) VNFunc4ToValueNumMap(m_alloc);
+        }
+        return m_VNFunc4Map;
+    }
+
+    enum SpecialRefConsts
+    {
+        SRC_Null,
+        SRC_ZeroMap,
+        SRC_NotAField,
+        SRC_ReadOnlyHeap,
+        SRC_Void,
+        SRC_EmptyExcSet,
+
+        SRC_NumSpecialRefConsts
+    };
+
+    // Counter to keep track of all the unique not a field sequences that have been assigned to
+    // PtrToLoc, because the ptr was added to an offset that was not a field.
+    unsigned m_uPtrToLocNotAFieldCount;
+
+    // The "values" of special ref consts will be all be "null" -- their differing meanings will
+    // be carried by the distinct value numbers.
+    static class Object* s_specialRefConsts[SRC_NumSpecialRefConsts];
+    static class Object* s_nullConst;
+};
+
+template <>
+struct ValueNumStore::VarTypConv<TYP_INT>
+{
+    typedef INT32 Type;
+    typedef int   Lang;
+};
+template <>
+struct ValueNumStore::VarTypConv<TYP_FLOAT>
+{
+    typedef INT32 Type;
+    typedef float Lang;
+};
+template <>
+struct ValueNumStore::VarTypConv<TYP_LONG>
+{
+    typedef INT64 Type;
+    typedef INT64 Lang;
+};
+template <>
+struct ValueNumStore::VarTypConv<TYP_DOUBLE>
+{
+    typedef INT64  Type;
+    typedef double Lang;
+};
+template <>
+struct ValueNumStore::VarTypConv<TYP_BYREF>
+{
+    typedef INT64 Type;
+    typedef void* Lang;
+};
+template <>
+struct ValueNumStore::VarTypConv<TYP_REF>
+{
+    typedef class Object* Type;
+    typedef class Object* Lang;
+};
+
+// Get the actual value and coerce the actual type c->m_typ to the wanted type T.
+template <typename T>
+FORCEINLINE T ValueNumStore::SafeGetConstantValue(Chunk* c, unsigned offset)
+{
+    switch (c->m_typ)
+    {
+        case TYP_REF:
+            return CoerceTypRefToT<T>(c, offset);
+        case TYP_BYREF:
+            return static_cast<T>(reinterpret_cast<VarTypConv<TYP_BYREF>::Type*>(c->m_defs)[offset]);
+        case TYP_INT:
+            return static_cast<T>(reinterpret_cast<VarTypConv<TYP_INT>::Type*>(c->m_defs)[offset]);
+        case TYP_LONG:
+            return static_cast<T>(reinterpret_cast<VarTypConv<TYP_LONG>::Type*>(c->m_defs)[offset]);
+        case TYP_FLOAT:
+            return static_cast<T>(reinterpret_cast<VarTypConv<TYP_FLOAT>::Lang*>(c->m_defs)[offset]);
+        case TYP_DOUBLE:
+            return static_cast<T>(reinterpret_cast<VarTypConv<TYP_DOUBLE>::Lang*>(c->m_defs)[offset]);
+        default:
+            assert(false);
+            return (T)0;
+    }
+}
+
+// Inline functions.
+
+// static
+inline bool ValueNumStore::GenTreeOpIsLegalVNFunc(genTreeOps gtOper)
+{
+    return (s_vnfOpAttribs[gtOper] & VNFOA_IllegalGenTreeOp) == 0;
+}
+
+// static
+inline bool ValueNumStore::VNFuncIsCommutative(VNFunc vnf)
+{
+    return (s_vnfOpAttribs[vnf] & VNFOA_Commutative) != 0;
+}
+
+inline bool ValueNumStore::VNFuncIsComparison(VNFunc vnf)
+{
+    if (vnf >= VNF_Boundary)
+    {
+        return false;
+    }
+    genTreeOps gtOp = genTreeOps(vnf);
+    return GenTree::OperIsCompare(gtOp) != 0;
+}
+
+template <>
+inline size_t ValueNumStore::CoerceTypRefToT(Chunk* c, unsigned offset)
+{
+    return reinterpret_cast<size_t>(reinterpret_cast<VarTypConv<TYP_REF>::Type*>(c->m_defs)[offset]);
+}
+
+template <typename T>
+inline T ValueNumStore::CoerceTypRefToT(Chunk* c, unsigned offset)
+{
+    noway_assert(sizeof(T) >= sizeof(VarTypConv<TYP_REF>::Type));
+    unreached();
+}
+
+/*****************************************************************************/
+#endif // _VALUENUM_H_
+/*****************************************************************************/
diff --git a/src/jit/valuenumfuncs.h b/src/jit/valuenumfuncs.h
new file mode 100644
index 0000000000..064a33707b
--- /dev/null
+++ b/src/jit/valuenumfuncs.h
@@ -0,0 +1,141 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// Defines the functions understood by the value-numbering system.
+// ValueNumFuncDef(<name of function>, <arity (1-4)>, <is-commutative (for arity = 2)>, <non-null (for gc functions)>,
+// <is-shared-static>)
+
+// clang-format off
+ValueNumFuncDef(MapStore, 3, false, false, false)
+ValueNumFuncDef(MapSelect, 2, false, false, false)
+
+ValueNumFuncDef(FieldSeq, 2, false, false, false)   // Sequence (VN of null == empty) of (VN's of) field handles.
+ValueNumFuncDef(ZeroMap, 0, false, false, false)    // The "ZeroMap": indexing at any index yields "zero of the desired type".
+
+ValueNumFuncDef(PtrToLoc, 3, false, false, false)           // Pointer (byref) to a local variable.  Args: VN's of: 0: var num, 1: FieldSeq, 2: Unique value for this PtrToLoc.
+ValueNumFuncDef(PtrToArrElem, 4, false, false, false)       // Pointer (byref) to an array element.  Args: 0: array elem type eq class var_types value, VN's of: 1: array, 2: index, 3: FieldSeq.
+ValueNumFuncDef(PtrToStatic, 1, false, false, false)        // Pointer (byref) to a static variable (or possibly a field thereof, if the static variable is a struct).  Args: 0: FieldSeq, first element
+                                                     // of which is the static var.
+ValueNumFuncDef(Phi, 2, false, false, false)        // A phi function.  Only occurs as arg of PhiDef or PhiHeapDef.  Arguments are SSA numbers of var being defined.
+ValueNumFuncDef(PhiDef, 3, false, false, false)     // Args: 0: local var # (or -1 for Heap), 1: SSA #, 2: VN of definition.
+// Wouldn't need this if I'd made Heap a regular local variable...
+ValueNumFuncDef(PhiHeapDef, 2, false, false, false) // Args: 0: VN for basic block pointer, 1: VN of definition
+ValueNumFuncDef(InitVal, 1, false, false, false)    // An input arg, or init val of a local Args: 0: a constant VN.
+
+
+ValueNumFuncDef(Cast, 2, false, false, false)               // VNF_Cast: Cast Operation changes the representations size and unsigned-ness.
+                                                     //           Args: 0: Source for the cast operation.
+                                                     //                 1: Constant integer representing the operation .
+                                                     //                    Use VNForCastOper() to construct.
+
+ValueNumFuncDef(CastClass, 2, false, false, false)          // Args: 0: Handle of class being cast to, 1: object being cast.
+ValueNumFuncDef(IsInstanceOf, 2, false, false, false)       // Args: 0: Handle of class being queried, 1: object being queried.
+ValueNumFuncDef(ReadyToRunCastClass, 2, false, false, false)          // Args: 0: Helper stub address, 1: object being cast.
+ValueNumFuncDef(ReadyToRunIsInstanceOf, 2, false, false, false)       // Args: 0: Helper stub address, 1: object being queried.
+
+ValueNumFuncDef(LdElemA, 3, false, false, false)            // Args: 0: array value; 1: index value; 2: type handle of element.
+
+ValueNumFuncDef(GetRefanyVal, 2, false, false, false)       // Args: 0: type handle; 1: typedref value.  Returns the value (asserting that the type is right).
+
+ValueNumFuncDef(GetClassFromMethodParam, 1, false, true, false)       // Args: 0: method generic argument.
+ValueNumFuncDef(GetSyncFromClassHandle, 1, false, true, false)        // Args: 0: class handle.
+ValueNumFuncDef(LoopCloneChoiceAddr, 0, false, true, false)
+
+// How we represent values of expressions with exceptional side effects:
+ValueNumFuncDef(ValWithExc, 2, false, false, false)         // Args: 0: value number from normal execution; 1: VN for set of possible exceptions.
+
+ValueNumFuncDef(ExcSetCons, 2, false, false, false)         // Args: 0: exception; 1: exception set (including EmptyExcSet).  Invariant: "car"s are always in ascending order.
+
+// Various exception values.
+ValueNumFuncDef(NullPtrExc, 1, false, false, false)         // Null pointer exception.
+ValueNumFuncDef(ArithmeticExc, 0, false, false, false)      // E.g., for signed its, MinInt / -1.
+ValueNumFuncDef(OverflowExc, 0, false, false, false)        // Integer overflow.
+ValueNumFuncDef(ConvOverflowExc, 2, false, false, false)    // Integer overflow produced by converion.  Args: 0: input value; 1: var_types of target type
+                                                     // (shifted left one bit; low bit encode whether source is unsigned.) 
+ValueNumFuncDef(DivideByZeroExc, 0, false, false, false)    // Division by zero.
+ValueNumFuncDef(IndexOutOfRangeExc, 2, false, false, false) // Args: 0: array length; 1: index.  The exception raised if this bounds check fails.
+ValueNumFuncDef(InvalidCastExc, 2, false, false, false)     // Args: 0: ref value being cast; 1: handle of type being cast to.  Represents the exception thrown if the cast fails.
+ValueNumFuncDef(NewArrOverflowExc, 1, false, false, false)  // Raises Integer overflow when Arg 0 is negative
+ValueNumFuncDef(HelperMultipleExc, 0, false, false, false)  // Represents one or more different exceptions that may be thrown by a JitHelper
+
+ValueNumFuncDef(Lng2Dbl, 1, false, false, false)
+ValueNumFuncDef(ULng2Dbl, 1, false, false, false)
+ValueNumFuncDef(Dbl2Int, 1, false, false, false)
+ValueNumFuncDef(Dbl2UInt, 1, false, false, false)
+ValueNumFuncDef(Dbl2Lng, 1, false, false, false)
+ValueNumFuncDef(Dbl2ULng, 1, false, false, false)
+ValueNumFuncDef(FltRound, 1, false, false, false)
+ValueNumFuncDef(DblRound, 1, false, false, false)
+
+ValueNumFuncDef(Sin, 1, false, false, false)
+ValueNumFuncDef(Cos, 1, false, false, false)
+ValueNumFuncDef(Sqrt, 1, false, false, false)
+ValueNumFuncDef(Abs, 1, false, false, false)
+ValueNumFuncDef(RoundDouble, 1, false, false, false)
+ValueNumFuncDef(RoundFloat, 1, false, false, false)
+ValueNumFuncDef(RoundInt, 1, false, false, false)
+ValueNumFuncDef(Cosh, 1, false, false, false)
+ValueNumFuncDef(Sinh, 1, false, false, false)
+ValueNumFuncDef(Tan, 1, false, false, false)
+ValueNumFuncDef(Tanh, 1, false, false, false)
+ValueNumFuncDef(Asin, 1, false, false, false)
+ValueNumFuncDef(Acos, 1, false, false, false)
+ValueNumFuncDef(Atan, 1, false, false, false)
+ValueNumFuncDef(Atan2, 2, false, false, false)
+ValueNumFuncDef(Log10, 1, false, false, false)
+ValueNumFuncDef(Pow, 2, false, false, false)
+ValueNumFuncDef(Exp, 1, false, false, false)
+ValueNumFuncDef(Ceiling, 1, false, false, false)
+ValueNumFuncDef(Floor, 1, false, false, false)
+
+ValueNumFuncDef(ManagedThreadId, 0, false, false, false)
+
+ValueNumFuncDef(ObjGetType, 1, false, false, false)
+ValueNumFuncDef(GetgenericsGcstaticBase, 1, false, true, true)
+ValueNumFuncDef(GetgenericsNongcstaticBase, 1, false, true, true)
+ValueNumFuncDef(GetsharedGcstaticBase, 2, false, true, true)
+ValueNumFuncDef(GetsharedNongcstaticBase, 2, false, true, true)
+ValueNumFuncDef(GetsharedGcstaticBaseNoctor, 1, false, true, true)
+ValueNumFuncDef(GetsharedNongcstaticBaseNoctor, 1, false, true, true)
+ValueNumFuncDef(ReadyToRunStaticBase, 1, false, true, true)
+ValueNumFuncDef(GetsharedGcstaticBaseDynamicclass, 2, false, true, true)
+ValueNumFuncDef(GetsharedNongcstaticBaseDynamicclass, 2, false, true, true)
+ValueNumFuncDef(GetgenericsGcthreadstaticBase, 1, false, true, true)
+ValueNumFuncDef(GetgenericsNongcthreadstaticBase, 1, false, true, true)
+ValueNumFuncDef(GetsharedGcthreadstaticBase, 2, false, true, true)
+ValueNumFuncDef(GetsharedNongcthreadstaticBase, 2, false, true, true)
+ValueNumFuncDef(GetsharedGcthreadstaticBaseNoctor, 2, false, true, true)
+ValueNumFuncDef(GetsharedNongcthreadstaticBaseNoctor, 2, false, true, true)
+ValueNumFuncDef(GetsharedGcthreadstaticBaseDynamicclass, 2, false, true, true)
+ValueNumFuncDef(GetsharedNongcthreadstaticBaseDynamicclass, 2, false, true, true)
+
+ValueNumFuncDef(ClassinitSharedDynamicclass, 2, false, false, false)
+ValueNumFuncDef(RuntimeHandleMethod, 2, false, true, false)
+ValueNumFuncDef(RuntimeHandleClass, 2, false, true, false)
+
+ValueNumFuncDef(GetStaticAddrContext, 1, false, true, false)
+ValueNumFuncDef(GetStaticAddrTLS, 1, false, true, false)
+
+ValueNumFuncDef(JitNew, 2, false, true, false)
+ValueNumFuncDef(JitNewArr, 3, false, true, false)
+ValueNumFuncDef(JitReadyToRunNew, 2, false, true, false)
+ValueNumFuncDef(JitReadyToRunNewArr, 3, false, true, false)
+ValueNumFuncDef(BoxNullable, 3, false, false, false)
+
+ValueNumFuncDef(LT_UN, 2, false, false, false)
+ValueNumFuncDef(LE_UN, 2, false, false, false)
+ValueNumFuncDef(GE_UN, 2, false, false, false)
+ValueNumFuncDef(GT_UN, 2, false, false, false)
+ValueNumFuncDef(ADD_UN, 2, true, false, false)
+ValueNumFuncDef(SUB_UN, 2, false, false, false)
+ValueNumFuncDef(MUL_UN, 2, true, false, false)
+ValueNumFuncDef(DIV_UN, 2, false, false, false)
+ValueNumFuncDef(MOD_UN, 2, false, false, false)
+
+ValueNumFuncDef(StrCns, 2, false, true, false)
+
+ValueNumFuncDef(Unbox, 2, false, true, false)
+// clang-format on
+
+#undef ValueNumFuncDef
diff --git a/src/jit/valuenumtype.h b/src/jit/valuenumtype.h
new file mode 100644
index 0000000000..f898d87532
--- /dev/null
+++ b/src/jit/valuenumtype.h
@@ -0,0 +1,101 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+// Defines the type "ValueNum".
+
+// This file exists only to break an include file cycle -- had been in ValueNum.h.  But that
+// file wanted to include gentree.h to get GT_COUNT, and gentree.h wanted ton include ValueNum.h to
+// the ValueNum type.
+
+/*****************************************************************************/
+#ifndef _VALUENUMTYPE_H_
+#define _VALUENUMTYPE_H_
+/*****************************************************************************/
+
+// We will represent ValueNum's as unsigned integers.
+typedef UINT32 ValueNum;
+
+// There are two "kinds" of value numbers, which differ in their modeling of the actions of other threads.
+// "Liberal" value numbers assume that the other threads change contents of heap locations only at
+// synchronization points.  Liberal VNs are appropriate, for example, in identifying CSE opportunities.
+// "Conservative" value numbers assume that the contents of heap locations change arbitrarily between
+// every two accesses.  Conservative VNs are appropriate, for example, in assertion prop, where an observation
+// of a property of the value in some storage location is used to perform an optimization downstream on
+// an operation involving the contents of that storage location.  If other threads may modify the storage
+// location between the two accesses, the observed property may no longer hold -- and conservative VNs make
+// it clear that the values need not be the same.
+//
+enum ValueNumKind
+{
+    VNK_Liberal,
+    VNK_Conservative
+};
+
+struct ValueNumPair
+{
+private:
+    ValueNum m_liberal;
+    ValueNum m_conservative;
+
+public:
+    ValueNum GetLiberal() const
+    {
+        return m_liberal;
+    }
+    void SetLiberal(ValueNum vn)
+    {
+        m_liberal = vn;
+    }
+    ValueNum GetConservative() const
+    {
+        return m_conservative;
+    }
+    void SetConservative(ValueNum vn)
+    {
+        m_conservative = vn;
+    }
+
+    ValueNum* GetLiberalAddr()
+    {
+        return &m_liberal;
+    }
+    ValueNum* GetConservativeAddr()
+    {
+        return &m_conservative;
+    }
+
+    ValueNum Get(ValueNumKind vnk)
+    {
+        return vnk == VNK_Liberal ? m_liberal : m_conservative;
+    }
+
+    void SetBoth(ValueNum vn)
+    {
+        m_liberal      = vn;
+        m_conservative = vn;
+    }
+
+    void operator=(const ValueNumPair& vn2)
+    {
+        m_liberal      = vn2.m_liberal;
+        m_conservative = vn2.m_conservative;
+    }
+
+    // Initializes both elements to "NoVN".  Defined in ValueNum.cpp.
+    ValueNumPair();
+
+    ValueNumPair(ValueNum lib, ValueNum cons) : m_liberal(lib), m_conservative(cons)
+    {
+    }
+
+    // True iff neither element is "NoVN".  Defined in ValueNum.cpp.
+    bool BothDefined() const;
+
+    bool BothEqual() const
+    {
+        return m_liberal == m_conservative;
+    }
+};
+
+#endif // _VALUENUMTYPE_H_
diff --git a/src/jit/varset.h b/src/jit/varset.h
new file mode 100644
index 0000000000..6a2c37ed40
--- /dev/null
+++ b/src/jit/varset.h
@@ -0,0 +1,211 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+// This include file determines how VARSET_TP is implemented.
+//
+#ifndef _VARSET_INCLUDED_
+#define _VARSET_INCLUDED_ 1
+
+// A VARSET_TP is a set of (small) integers representing local variables.
+// We implement varsets using the BitSet abstraction, which supports
+// several different implementations.
+//
+// The set of tracked variables may change during a compilation, and variables may be
+// re-sorted, so the tracked variable index of a variable is decidedly *not* stable.  The
+// bitset abstraction supports labeling of bitsets with "epochs", and supports a
+// debugging mode in which live bitsets must have the current epoch.  To use this feature,
+// divide a compilation up into epochs, during which tracked variable indices are
+// stable.
+
+// Some implementations of BitSet may use a level of indirection.  Therefore, we
+// must be careful about about assignment and initialization.  We often want to
+// reason about VARSET_TP as immutable values, and just copying the contents would
+// introduce sharing in the indirect case, which is usually not what's desired.  On
+// the other hand, there are many cases in which the RHS value has just been
+// created functionally, and the intialization/assignment is obviously its last
+// use.  In these cases, allocating a new indirect representation for the lhs (if
+// it does not already have one) would be unnecessary and wasteful.  Thus, for both
+// initialization and assignment, we have normal versions, which do make copies to
+// prevent sharing and definitely preserve value semantics, and "NOCOPY" versions,
+// which do not.  Obviously, the latter should be used with care.
+
+#include "bitset.h"
+#include "compilerbitsettraits.h"
+
+const unsigned UInt64Bits = sizeof(UINT64) * 8;
+
+// This #define chooses the BitSet representation used for VARSET.
+// The choices are defined in "bitset.h"; they currently include
+// BSUInt64, BSShortLong, and BSUInt64Class.
+#define VARSET_REP BSShortLong
+
+#if VARSET_REP == BSUInt64
+
+#include "bitsetasuint64.h"
+
+typedef BitSetOps</*BitSetType*/ UINT64,
+                  /*Brand*/ VARSET_REP,
+                  /*Env*/ Compiler*,
+                  /*BitSetTraits*/ TrackedVarBitSetTraits>
+    VarSetOpsRaw;
+
+typedef UINT64 VARSET_TP;
+
+const unsigned lclMAX_TRACKED = UInt64Bits;
+
+#define VARSET_REP_IS_CLASS 0
+
+#elif VARSET_REP == BSShortLong
+
+#include "bitsetasshortlong.h"
+
+typedef BitSetOps</*BitSetType*/ BitSetShortLongRep,
+                  /*Brand*/ VARSET_REP,
+                  /*Env*/ Compiler*,
+                  /*BitSetTraits*/ TrackedVarBitSetTraits>
+    VarSetOpsRaw;
+
+typedef BitSetShortLongRep VARSET_TP;
+
+// Tested various sizes for max tracked locals. The largest value for which no throughput regression
+// could be measured was 512. Going to 1024 showed the first throughput regressions.
+// We anticipate the larger size will be needed to support better inlining.
+// There were a number of failures when 512 was used for legacy, so we just retain the 128 value
+// for legacy backend.
+
+#if !defined(LEGACY_BACKEND)
+const unsigned       lclMAX_TRACKED = 512;
+#else
+const unsigned lclMAX_TRACKED = 128;
+#endif
+
+#define VARSET_REP_IS_CLASS 0
+
+#elif VARSET_REP == BSUInt64Class
+
+#include "bitsetasuint64inclass.h"
+
+typedef BitSetOps</*BitSetType*/ BitSetUint64<Compiler*, TrackedVarBitSetTraits>,
+                  /*Brand*/ VARSET_REP,
+                  /*Env*/ Compiler*,
+                  /*BitSetTraits*/ TrackedVarBitSetTraits>
+    VarSetOpsRaw;
+
+typedef BitSetUint64<Compiler*, TrackedVarBitSetTraits> VARSET_TP;
+
+const unsigned lclMAX_TRACKED = UInt64Bits;
+
+#define VARSET_REP_IS_CLASS 1
+
+#else
+
+#error "Unrecognized BitSet implemention for VarSet."
+
+#endif
+
+// These types should be used as the types for VARSET_TP arguments and return values, respectively.
+typedef VarSetOpsRaw::ValArgType VARSET_VALARG_TP;
+typedef VarSetOpsRaw::RetValType VARSET_VALRET_TP;
+
+#define VARSET_COUNTOPS 0
+#if VARSET_COUNTOPS
+typedef BitSetOpsWithCounter<VARSET_TP,
+                             VARSET_REP,
+                             Compiler*,
+                             TrackedVarBitSetTraits,
+                             VARSET_VALARG_TP,
+                             VARSET_VALRET_TP,
+                             VarSetOpsRaw::Iter>
+    VarSetOps;
+#else
+typedef VarSetOpsRaw VarSetOps;
+#endif
+
+#define ALLVARSET_REP BSUInt64
+
+#if ALLVARSET_REP == BSUInt64
+
+#include "bitsetasuint64.h"
+
+typedef BitSetOps</*BitSetType*/ UINT64,
+                  /*Brand*/ ALLVARSET_REP,
+                  /*Env*/ Compiler*,
+                  /*BitSetTraits*/ AllVarBitSetTraits>
+    AllVarSetOps;
+
+typedef UINT64 ALLVARSET_TP;
+
+const unsigned lclMAX_ALLSET_TRACKED = UInt64Bits;
+
+#define ALLVARSET_REP_IS_CLASS 0
+
+#elif ALLVARSET_REP == BSShortLong
+
+#include "bitsetasshortlong.h"
+
+typedef BitSetOps</*BitSetType*/ BitSetShortLongRep,
+                  /*Brand*/ ALLVARSET_REP,
+                  /*Env*/ Compiler*,
+                  /*BitSetTraits*/ AllVarBitSetTraits>
+    AllVarSetOps;
+
+typedef BitSetShortLongRep ALLVARSET_TP;
+
+const unsigned lclMAX_ALLSET_TRACKED = lclMAX_TRACKED;
+
+#define ALLVARSET_REP_IS_CLASS 0
+
+#elif ALLVARSET_REP == BSUInt64Class
+
+#include "bitsetasuint64inclass.h"
+
+typedef BitSetOps</*BitSetType*/ BitSetUint64<Compiler*, AllVarBitSetTraits>,
+                  /*Brand*/ ALLVARSET_REP,
+                  /*Env*/ Compiler*,
+                  /*BitSetTraits*/ AllVarBitSetTraits>
+    AllVarSetOps;
+
+typedef BitSetUint64<Compiler*, AllVarBitSetTraits> ALLVARSET_TP;
+
+const unsigned lclMAX_ALLSET_TRACKED = UInt64Bits;
+
+#define ALLVARSET_REP_IS_CLASS 1
+
+#else
+#error "Unrecognized BitSet implemention for AllVarSet."
+#endif
+
+// These types should be used as the types for VARSET_TP arguments and return values, respectively.
+typedef AllVarSetOps::ValArgType ALLVARSET_VALARG_TP;
+typedef AllVarSetOps::RetValType ALLVARSET_VALRET_TP;
+
+// Initialize "varName" to "initVal."  Copies contents, not references; if "varName" is uninitialized, allocates a var
+// set for it (using "comp" for any necessary allocation), and copies the contents of "initVal" into it.
+#define VARSET_INIT(comp, varName, initVal) varName(VarSetOps::MakeCopy(comp, initVal))
+#define ALLVARSET_INIT(comp, varName, initVal) varName(AllVarSetOps::MakeCopy(comp, initVal))
+
+// Initializes "varName" to "initVal", without copying: if "initVal" is an indirect representation, copies its
+// pointer into "varName".
+#if defined(DEBUG) && VARSET_REP_IS_CLASS
+#define VARSET_INIT_NOCOPY(varName, initVal) varName(initVal, 0)
+#else
+#define VARSET_INIT_NOCOPY(varName, initVal) varName(initVal)
+#endif
+
+#if defined(DEBUG) && ALLVARSET_REP_IS_CLASS
+#define ALLVARSET_INIT_NOCOPY(varName, initVal) varName(initVal, 0)
+#else
+#define ALLVARSET_INIT_NOCOPY(varName, initVal) varName(initVal)
+#endif
+
+// The iterator pattern.
+
+// Use this to initialize an iterator "iterName" to iterate over a VARSET_TP "vs".
+// "varIndex" will be an unsigned variable to which we assign the elements of "vs".
+#define VARSET_ITER_INIT(comp, iterName, vs, varIndex)                                                                 \
+    unsigned        varIndex = 0;                                                                                      \
+    VarSetOps::Iter iterName(comp, vs)
+
+#endif // _VARSET_INCLUDED_
diff --git a/src/jit/vartype.h b/src/jit/vartype.h
new file mode 100644
index 0000000000..550aeb9c5b
--- /dev/null
+++ b/src/jit/vartype.h
@@ -0,0 +1,285 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*****************************************************************************/
+#ifndef _VARTYPE_H_
+#define _VARTYPE_H_
+/*****************************************************************************/
+#include "error.h"
+
+enum var_types_classification
+{
+    VTF_ANY = 0x0000,
+    VTF_INT = 0x0001,
+    VTF_UNS = 0x0002, // type is unsigned
+    VTF_FLT = 0x0004,
+    VTF_GCR = 0x0008, // type is GC ref
+    VTF_BYR = 0x0010, // type is Byref
+    VTF_I   = 0x0020, // is machine sized
+    VTF_S   = 0x0040, // is a struct type
+};
+
+DECLARE_TYPED_ENUM(var_types, BYTE)
+{
+#define DEF_TP(tn, nm, jitType, verType, sz, sze, asze, st, al, tf, howUsed) TYP_##tn,
+#include "typelist.h"
+#undef DEF_TP
+
+    TYP_COUNT,
+
+        TYP_lastIntrins = TYP_DOUBLE
+}
+END_DECLARE_TYPED_ENUM(var_types, BYTE)
+
+/*****************************************************************************
+ * C-style pointers are implemented as TYP_INT or TYP_LONG depending on the
+ * platform
+ */
+
+#ifdef _TARGET_64BIT_
+#define TYP_I_IMPL TYP_LONG
+#define TYP_U_IMPL TYP_ULONG
+#define TYPE_REF_IIM TYPE_REF_LNG
+#else
+#define TYP_I_IMPL TYP_INT
+#define TYP_U_IMPL TYP_UINT
+#define TYPE_REF_IIM TYPE_REF_INT
+#ifdef _PREFAST_
+// We silence this in the 32-bit build because for portability, we like to have asserts like this:
+// assert(op2->gtType == TYP_INT || op2->gtType == TYP_I_IMPL);
+// This is obviously redundant for 32-bit builds, but we don't want to have ifdefs and different
+// asserts just for 64-bit builds, so for now just silence the assert
+#pragma warning(disable : 6287) // warning 6287: the left and right sub-expressions are identical
+#endif                          //_PREFAST_
+#endif
+
+/*****************************************************************************/
+
+const extern BYTE varTypeClassification[TYP_COUNT];
+
+// make any class with a TypeGet member also have a function TypeGet() that does the same thing
+template <class T>
+inline var_types TypeGet(T* t)
+{
+    return t->TypeGet();
+}
+
+// make a TypeGet function which is the identity function for var_types
+// the point of this and the preceding template is now you can make template functions
+// that work on var_types as well as any object that exposes a TypeGet method.
+// such as all of these varTypeIs* functions
+inline var_types TypeGet(var_types v)
+{
+    return v;
+}
+
+#ifdef FEATURE_SIMD
+template <class T>
+inline bool varTypeIsSIMD(T vt)
+{
+    switch (TypeGet(vt))
+    {
+        case TYP_SIMD8:
+        case TYP_SIMD12:
+        case TYP_SIMD16:
+#ifdef FEATURE_AVX_SUPPORT
+        case TYP_SIMD32:
+#endif // FEATURE_AVX_SUPPORT
+            return true;
+        default:
+            return false;
+    }
+}
+#else  // FEATURE_SIMD
+
+// Always return false if FEATURE_SIMD is not enabled
+template <class T>
+inline bool varTypeIsSIMD(T vt)
+{
+    return false;
+}
+#endif // !FEATURE_SIMD
+
+template <class T>
+inline bool varTypeIsIntegral(T vt)
+{
+    return ((varTypeClassification[TypeGet(vt)] & (VTF_INT)) != 0);
+}
+
+template <class T>
+inline bool varTypeIsIntegralOrI(T vt)
+{
+    return ((varTypeClassification[TypeGet(vt)] & (VTF_INT | VTF_I)) != 0);
+}
+
+template <class T>
+inline bool varTypeIsUnsigned(T vt)
+{
+    return ((varTypeClassification[TypeGet(vt)] & (VTF_UNS)) != 0);
+}
+
+// If "vt" is an unsigned integral type, returns the corresponding signed integral type, otherwise
+// return "vt".
+inline var_types varTypeUnsignedToSigned(var_types vt)
+{
+    if (varTypeIsUnsigned(vt))
+    {
+        switch (vt)
+        {
+            case TYP_BOOL:
+            case TYP_UBYTE:
+                return TYP_BYTE;
+            case TYP_USHORT:
+            case TYP_CHAR:
+                return TYP_SHORT;
+            case TYP_UINT:
+                return TYP_INT;
+            case TYP_ULONG:
+                return TYP_LONG;
+            default:
+                unreached();
+        }
+    }
+    else
+    {
+        return vt;
+    }
+}
+
+template <class T>
+inline bool varTypeIsFloating(T vt)
+{
+    return ((varTypeClassification[TypeGet(vt)] & (VTF_FLT)) != 0);
+}
+
+template <class T>
+inline bool varTypeIsArithmetic(T vt)
+{
+    return ((varTypeClassification[TypeGet(vt)] & (VTF_INT | VTF_FLT)) != 0);
+}
+
+template <class T>
+inline unsigned varTypeGCtype(T vt)
+{
+    return (unsigned)(varTypeClassification[TypeGet(vt)] & (VTF_GCR | VTF_BYR));
+}
+
+template <class T>
+inline bool varTypeIsGC(T vt)
+{
+    return (varTypeGCtype(vt) != 0);
+}
+
+template <class T>
+inline bool varTypeIsI(T vt)
+{
+    return ((varTypeClassification[TypeGet(vt)] & VTF_I) != 0);
+}
+
+template <class T>
+inline bool varTypeCanReg(T vt)
+{
+    return ((varTypeClassification[TypeGet(vt)] & (VTF_INT | VTF_I | VTF_FLT)) != 0);
+}
+
+template <class T>
+inline bool varTypeIsByte(T vt)
+{
+    return (TypeGet(vt) >= TYP_BOOL) && (TypeGet(vt) <= TYP_UBYTE);
+}
+
+template <class T>
+inline bool varTypeIsShort(T vt)
+{
+    return (TypeGet(vt) >= TYP_CHAR) && (TypeGet(vt) <= TYP_USHORT);
+}
+
+template <class T>
+inline bool varTypeIsSmall(T vt)
+{
+    return (TypeGet(vt) >= TYP_BOOL) && (TypeGet(vt) <= TYP_USHORT);
+}
+
+template <class T>
+inline bool varTypeIsSmallInt(T vt)
+{
+    return (TypeGet(vt) >= TYP_BYTE) && (TypeGet(vt) <= TYP_USHORT);
+}
+
+template <class T>
+inline bool varTypeIsIntOrI(T vt)
+{
+    return ((TypeGet(vt) == TYP_INT)
+#ifdef _TARGET_64BIT_
+            || (TypeGet(vt) == TYP_I_IMPL)
+#endif // _TARGET_64BIT_
+                );
+}
+
+template <class T>
+inline bool genActualTypeIsIntOrI(T vt)
+{
+    return ((TypeGet(vt) >= TYP_BOOL) && (TypeGet(vt) <= TYP_U_IMPL));
+}
+
+template <class T>
+inline bool varTypeIsLong(T vt)
+{
+    return (TypeGet(vt) >= TYP_LONG) && (TypeGet(vt) <= TYP_ULONG);
+}
+
+template <class T>
+inline bool varTypeIsMultiReg(T vt)
+{
+#ifdef _TARGET_64BIT_
+    return false;
+#else
+    return (TypeGet(vt) == TYP_LONG);
+#endif
+}
+
+template <class T>
+inline bool varTypeIsSingleReg(T vt)
+{
+    return !varTypeIsMultiReg(vt);
+}
+
+template <class T>
+inline bool varTypeIsComposite(T vt)
+{
+    return (!varTypeIsArithmetic(TypeGet(vt)) && TypeGet(vt) != TYP_VOID);
+}
+
+// Is this type promotable?
+// In general only structs are promotable.
+// However, a SIMD type, e.g. TYP_SIMD may be handled as either a struct, OR a
+// fully-promoted register type.
+// On 32-bit systems longs are split into an upper and lower half, and they are
+// handled as if they are structs with two integer fields.
+
+template <class T>
+inline bool varTypeIsPromotable(T vt)
+{
+    return (varTypeIsStruct(vt) || (TypeGet(vt) == TYP_BLK)
+#if !defined(_TARGET_64BIT_)
+            || varTypeIsLong(vt)
+#endif // !defined(_TARGET_64BIT_)
+                );
+}
+
+template <class T>
+inline bool varTypeIsStruct(T vt)
+{
+    return ((varTypeClassification[TypeGet(vt)] & VTF_S) != 0);
+}
+
+template <class T>
+inline bool varTypeIsEnregisterableStruct(T vt)
+{
+    return (TypeGet(vt) != TYP_STRUCT);
+}
+
+/*****************************************************************************/
+#endif // _VARTYPE_H_
+/*****************************************************************************/
diff --git a/src/jit/x86_instrs.h b/src/jit/x86_instrs.h
new file mode 100644
index 0000000000..1c3489d3b4
--- /dev/null
+++ b/src/jit/x86_instrs.h
@@ -0,0 +1,10 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+//
+//  This is a temporary file which defined the x86 instructions that
+//  are currently still referenced when building the Arm Jit compiler
+//
+
+INS_lea,